3870 files changed, 237477 insertions, 103774 deletions
diff --git a/.gitignore b/.gitignore
index 5dae4342984f..b3d030e51782 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,3 +38,5 @@ projects/*
 !projects/Makefile
 # Clang, which is tracked independently.
 tools/clang
+# LLDB, which is tracked independently.
+tools/lldb
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 039f619ff971..8336bc975e3a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,7 +10,10 @@ set(CMAKE_MODULE_PATH
   "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules"
   )
 
-set(PACKAGE_VERSION "3.0")
+set(LLVM_VERSION_MAJOR 3)
+set(LLVM_VERSION_MINOR 1)
+
+set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}svn")
 
 set_property(GLOBAL PROPERTY USE_FOLDERS ON)
 
@@ -23,10 +26,15 @@ if( LLVM_APPEND_VC_REV )
   add_version_info_from_vcs(PACKAGE_VERSION)
 endif()
 
-set(PACKAGE_NAME llvm)
+set(PACKAGE_NAME LLVM)
 set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
-set(PACKAGE_BUGREPORT "llvmbugs@cs.uiuc.edu")
+set(PACKAGE_BUGREPORT "http://llvm.org/bugs/")
 
+# Sanity check our source directory to make sure that we are not trying to
+# generate an in-tree build (unless on MSVC_IDE, where it is ok), and to make
+# sure that we don't have any stray generated files lying around in the tree
+# (which would end up getting picked up by header search, instead of the correct
+# versions).
 if( CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE )
   message(FATAL_ERROR "In-source builds are not allowed.
 CMake would overwrite the makefiles distributed with LLVM.
@@ -35,27 +43,17 @@ to this source directory as the last argument.
 This process created the file `CMakeCache.txt' and the directory `CMakeFiles'.
 Please delete them.")
 endif()
-
-string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
-
-set(LLVM_MAIN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
-set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/include)
-set(LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
-set(LLVM_TOOLS_BINARY_DIR ${LLVM_BINARY_DIR}/bin)
-set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples)
-set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
-
 if( NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR )
   file(GLOB_RECURSE
     tablegenned_files_on_include_dir
-    "${LLVM_MAIN_SRC_DIR}/include/llvm/*.gen")
+    "${CMAKE_CURRENT_SOURCE_DIR}/include/llvm/*.gen")
   file(GLOB_RECURSE
     tablegenned_files_on_lib_dir
-    "${LLVM_MAIN_SRC_DIR}/lib/Target/*.inc")
+    "${CMAKE_CURRENT_SOURCE_DIR}/lib/Target/*.inc")
   if( tablegenned_files_on_include_dir OR tablegenned_files_on_lib_dir)
     message(FATAL_ERROR "Apparently there is a previous in-source build,
 probably as the result of running `configure' and `make' on
-${LLVM_MAIN_SRC_DIR}.
+${CMAKE_CURRENT_SOURCE_DIR}.
 This may cause problems. The suspicious files are:
 ${tablegenned_files_on_lib_dir}
 ${tablegenned_files_on_include_dir}
@@ -63,20 +61,26 @@ Please clean the source directory.")
   endif()
 endif()
 
+string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
+
+set(LLVM_MAIN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/include)
+set(LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
+set(LLVM_TOOLS_BINARY_DIR ${LLVM_BINARY_DIR}/bin)
+set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples)
+set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
+
 set(LLVM_ALL_TARGETS
-  Alpha
   ARM
-  Blackfin
-  CBackend
   CellSPU
   CppBackend
+  Hexagon
   Mips
   MBlaze
   MSP430
   PowerPC
   PTX
   Sparc
-  SystemZ
   X86
   XCore
   )
@@ -88,10 +92,13 @@ if( MSVC )
   set(LLVM_TARGETS_TO_BUILD X86
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
 else( MSVC )
-  set(LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS}
+  set(LLVM_TARGETS_TO_BUILD "all"
     CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
 endif( MSVC )
 
+option(BUILD_SHARED_LIBS
+  "Build all libraries as shared libraries instead of static" OFF)
+
 option(LLVM_ENABLE_CBE_PRINTF_A "Set to ON if CBE is enabled for printf %a output" ON)
 if(LLVM_ENABLE_CBE_PRINTF_A)
   set(ENABLE_CBE_PRINTF_A 1)
@@ -126,12 +133,6 @@ foreach(c ${LLVM_TARGETS_TO_BUILD})
   endif()
 endforeach(c)
 
-# Produce llvm/Config/Targets.def
-configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Targets.def.in
-  ${LLVM_BINARY_DIR}/include/llvm/Config/Targets.def
-  )
-
 set(llvm_builded_incs_dir ${LLVM_BINARY_DIR}/include/llvm)
 
 include(AddLLVMDefinitions)
@@ -154,20 +155,207 @@ else()
   option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON)
 endif()
 
+option(LLVM_USE_INTEL_JITEVENTS
+  "Use Intel JIT API to inform Intel(R) VTune(TM) Amplifier XE 2011 about JIT code"
+  OFF)
+
+if( LLVM_USE_INTEL_JITEVENTS )
+  # Verify we are on a supported platform
+  if( CMAKE_SYSTEM_NAME MATCHES "Windows" OR CMAKE_SYSTEM_NAME MATCHES "Linux" )
+    # Directory where Intel Parallel Amplifier XE 2011 is installed.
+    if ( WIN32 )
+      set(LLVM_INTEL_JITEVENTS_DIR $ENV{VTUNE_AMPLIFIER_XE_2011_DIR})
+    else ( WIN32 )
+      set(LLVM_INTEL_JITEVENTS_DIR "/opt/intel/vtune_amplifier_xe_2011")
+    endif ( WIN32 )
+
+    # Set include and library search paths for Intel JIT Events API
+    set(LLVM_INTEL_JITEVENTS_INCDIR "${LLVM_INTEL_JITEVENTS_DIR}/include")
+
+    if ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
+      set(LLVM_INTEL_JITEVENTS_LIBDIR "${LLVM_INTEL_JITEVENTS_DIR}/lib64")
+    else ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
+      set(LLVM_INTEL_JITEVENTS_LIBDIR "${LLVM_INTEL_JITEVENTS_DIR}/lib32")
+    endif ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
+  else()
+    message(FATAL_ERROR
+      "Intel JIT API support is available on Linux and Windows only.")
+  endif()
+endif( LLVM_USE_INTEL_JITEVENTS )
+
+option(LLVM_USE_OPROFILE
+  "Use opagent JIT interface to inform OProfile about JIT code" OFF)
+
+# If enabled, ierify we are on a platform that supports oprofile.
+if( LLVM_USE_OPROFILE )
+  if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+    message(FATAL_ERROR "OProfile support is available on Linux only.") 
+  endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
+endif( LLVM_USE_OPROFILE )
+
+# Define an option controlling whether we should build for 32-bit on 64-bit
+# platforms, where supported.
+if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
+  # TODO: support other platforms and toolchains.
+  option(LLVM_BUILD_32_BITS "Build 32 bits executables and libraries." OFF)
+endif()
+
+# Define the default arguments to use with 'lit', and an option for the user to
+# override.
+set(LIT_ARGS_DEFAULT "-sv")
+if (MSVC OR XCODE)
+  set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
+endif()
+set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
+
+# On Win32 hosts, provide an option to specify the path to the GnuWin32 tools.
+if( WIN32 AND NOT CYGWIN )
+  set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
+endif()
+
+# On Win32 using MS tools, provide an option to set the number of parallel jobs
+# to use.
+if( MSVC_IDE AND ( MSVC90 OR MSVC10 ) )
+  # Only Visual Studio 2008 and 2010 officially supports /MP.  Visual Studio
+  # 2005 supports it but it is experimental.
+  set(LLVM_COMPILER_JOBS "0" CACHE STRING
+    "Number of parallel compiler jobs. 0 means use all processors. Default is 0.")
+endif()
+
+# Define options to control the inclusion and default build behavior for
+# components which may not strictly be necessary (tools, runtime, examples, and
+# tests).
+#
+# This is primarily to support building smaller or faster project files.
+option(LLVM_INCLUDE_TOOLS "Generate build targets for the LLVM tools." ON)
+option(LLVM_BUILD_TOOLS
+  "Build the LLVM tools. If OFF, just generate build targets." ON)
+
+option(LLVM_INCLUDE_RUNTIME "Generate build targets for the LLVM runtimes" ON)
+option(LLVM_BUILD_RUNTIME
+  "Build the LLVM runtime libraries. If OFF, just generate build targets." ON)
+
+option(LLVM_BUILD_EXAMPLES
+  "Build the LLVM example programs. If OFF, just generate build targets." OFF)
+option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON)
+
+option(LLVM_BUILD_TESTS
+  "Build LLVM unit tests. If OFF, just generate build targets." OFF)
+option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON)
+
 # All options referred to from HandleLLVMOptions have to be specified
 # BEFORE this include, otherwise options will not be correctly set on
 # first cmake run
 include(config-ix)
 include(HandleLLVMOptions)
 
+# Verify that we can find a Python interpreter,
+include(FindPythonInterp)
+if( NOT PYTHONINTERP_FOUND )
+  message(FATAL_ERROR
+"Unable to find Python interpreter, required for builds and testing.
+
+Please install Python or specify the PYTHON_EXECUTABLE CMake variable.")
+endif()
+
+######
+# LLVMBuild Integration
+#
+# We use llvm-build to generate all the data required by the CMake based
+# build system in one swoop:
+#
+#  - We generate a file (a CMake fragment) in the object root which contains
+#    all the definitions that are required by CMake.
+#
+#  - We generate the library table used by llvm-config.
+#
+#  - We generate the dependencies for the CMake fragment, so that we will
+#    automatically reconfigure outselves.
+
+set(LLVMBUILDTOOL "${LLVM_MAIN_SRC_DIR}/utils/llvm-build/llvm-build")
+set(LLVMCONFIGLIBRARYDEPENDENCIESINC
+  "${LLVM_BINARY_DIR}/tools/llvm-config/LibraryDependencies.inc")
+set(LLVMBUILDCMAKEFRAG
+  "${LLVM_BINARY_DIR}/LLVMBuild.cmake")
+message(STATUS "Constructing LLVMBuild project information")
+execute_process(
+  COMMAND ${PYTHON_EXECUTABLE} ${LLVMBUILDTOOL}
+            --native-target "${LLVM_NATIVE_ARCH}"
+            --enable-targets "${LLVM_TARGETS_TO_BUILD}"
+            --write-library-table ${LLVMCONFIGLIBRARYDEPENDENCIESINC}
+            --write-cmake-fragment ${LLVMBUILDCMAKEFRAG}
+            ERROR_VARIABLE LLVMBUILDOUTPUT
+            ERROR_VARIABLE LLVMBUILDERRORS
+            OUTPUT_STRIP_TRAILING_WHITESPACE
+            ERROR_STRIP_TRAILING_WHITESPACE
+  RESULT_VARIABLE LLVMBUILDRESULT)
+
+# On Win32, CMake doesn't properly handle piping the default output/error
+# streams into the GUI console. So, we explicitly catch and report them.
+if( NOT "${LLVMBUILDOUTPUT}" STREQUAL "")
+  message(STATUS "llvm-build output: ${LLVMBUILDOUTPUT}")
+endif()
+if( NOT "${LLVMBUILDRESULT}" STREQUAL "0" )
+  message(FATAL_ERROR
+    "Unexpected failure executing llvm-build: ${LLVMBUILDERRORS}")
+endif()
+
+# Include the generated CMake fragment. This will define properties from the
+# LLVMBuild files in a format which is easy to consume from CMake, and will add
+# the dependencies so that CMake will reconfigure properly when the LLVMBuild
+# files change.
+include(${LLVMBUILDCMAKEFRAG})
+
+######
+
+# Configure all of the various header file fragments LLVM uses which depend on
+# configuration variables.
+set(LLVM_ENUM_ASM_PRINTERS "")
+set(LLVM_ENUM_ASM_PARSERS "")
+set(LLVM_ENUM_DISASSEMBLERS "")
+foreach(t ${LLVM_TARGETS_TO_BUILD})
+  set( td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t} )
+  file(GLOB asmp_file "${td}/*AsmPrinter.cpp")
+  if( asmp_file )
+    set(LLVM_ENUM_ASM_PRINTERS
+      "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n")
+  endif()
+  if( EXISTS ${td}/AsmParser/CMakeLists.txt )
+    set(LLVM_ENUM_ASM_PARSERS
+      "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n")
+  endif()
+  if( EXISTS ${td}/Disassembler/CMakeLists.txt )
+    set(LLVM_ENUM_DISASSEMBLERS
+      "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n")
+  endif()
+endforeach(t)
+
+# Produce the target definition files, which provide a way for clients to easily
+# include various classes of targets.
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def
+  )
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def
+  )
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/Disassemblers.def
+  )
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Targets.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/Targets.def
+  )
+
+# Configure the three LLVM configuration header files.
 configure_file(
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake
   ${LLVM_BINARY_DIR}/include/llvm/Config/config.h)
-
 configure_file(
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/llvm-config.h.cmake
   ${LLVM_BINARY_DIR}/include/llvm/Config/llvm-config.h)
-
 configure_file(
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake
   ${LLVM_BINARY_DIR}/include/llvm/Support/DataTypes.h)
@@ -187,10 +375,6 @@ endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
 include(AddLLVM)
 include(TableGen)
 
-macro(llvm_tablegen)
-  tablegen(LLVM ${ARGN})
-endmacro()
-
 if( MINGW )
   # People report that -O3 is unreliable on MinGW. The traditional
   # build also uses -O2 for that reason:
@@ -212,32 +396,23 @@ add_subdirectory(utils/FileUpdate)
 add_subdirectory(utils/count)
 add_subdirectory(utils/not)
 add_subdirectory(utils/llvm-lit)
+add_subdirectory(utils/json-bench)
+add_subdirectory(utils/yaml-bench)
 
 add_subdirectory(projects)
 
-option(LLVM_BUILD_TOOLS
-  "Build the LLVM tools. If OFF, just generate build targets." ON)
-option(LLVM_INCLUDE_TOOLS "Generate build targets for the LLVM tools." ON)
 if( LLVM_INCLUDE_TOOLS )
   add_subdirectory(tools)
 endif()
 
-option(LLVM_BUILD_RUNTIME
-  "Build the LLVM runtime libraries. If OFF, just generate build targets." ON)
-option(LLVM_INCLUDE_RUNTIME "Generate build targets for the LLVM runtimes" ON)
 if( LLVM_INCLUDE_RUNTIME )
   add_subdirectory(runtime)
 endif()
 
-option(LLVM_BUILD_EXAMPLES
-  "Build the LLVM example programs. If OFF, just generate build targets." OFF)
-option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON)
 if( LLVM_INCLUDE_EXAMPLES )
   add_subdirectory(examples)
 endif()
 
-option(LLVM_BUILD_TESTS
-  "Build LLVM unit tests. If OFF, just generate build targets." OFF)
 if( LLVM_INCLUDE_TESTS )
   add_subdirectory(test)
   add_subdirectory(utils/unittest)
@@ -277,8 +452,8 @@ install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/
 # TODO: make and install documentation.
 
 set(CPACK_PACKAGE_VENDOR "LLVM")
-set(CPACK_PACKAGE_VERSION_MAJOR 2)
-set(CPACK_PACKAGE_VERSION_MINOR 9)
+set(CPACK_PACKAGE_VERSION_MAJOR ${LLVM_VERSION_MAJOR})
+set(CPACK_PACKAGE_VERSION_MINOR ${LLVM_VERSION_MINOR})
 add_version_info_from_vcs(CPACK_PACKAGE_VERSION_PATCH)
 include(CPack)
 
diff --git a/CREDITS.TXT b/CREDITS.TXT
index f20152796ae1..ef471b0887ee 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -83,6 +83,10 @@ N: John T. Criswell
 E: criswell@uiuc.edu
 D: Original Autoconf support, documentation improvements, bug fixes
 
+N: Anshuman Dasgupta
+E: adasgupt@codeaurora.org
+D: Deterministic finite automaton based infrastructure for VLIW packetization
+
 N: Stefanus Du Toit
 E: stefanus.dutoit@rapidmind.com
 D: Bug fixes and minor improvements
@@ -95,6 +99,10 @@ N: Alkis Evlogimenos
 E: alkis@evlogimenos.com
 D: Linear scan register allocator, many codegen improvements, Java frontend
 
+N: Hal Finkel
+E: hfinkel@anl.gov
+D: Basic-block autovectorization, PowerPC backend improvements
+
 N: Ryan Flynn
 E: pizza@parseerror.com
 D: Miscellaneous bug fixes
@@ -143,6 +151,8 @@ N: James Grosbach
 E: grosbach@apple.com
 D: SjLj exception handling support
 D: General fixes and improvements for the ARM back-end
+D: MCJIT
+D: ARM integrated assembler and assembly parser
 
 N: Lang Hames
 E: lhames@gmail.com
@@ -265,6 +275,7 @@ N: Takumi Nakamura
 E: geek4civic@gmail.com
 E: chapuni@hf.rim.or.jp
 D: Cygwin and MinGW support.
+D: Win32 tweaks.
 S: Yokohama, Japan
 
 N: Edward O'Callaghan
@@ -313,6 +324,19 @@ W: http://vladimir_prus.blogspot.com
 E: ghost@cs.msu.su
 D: Made inst_iterator behave like a proper iterator, LowerConstantExprs pass
 
+N: Xerxes Ranby
+E: xerxes@zafena.se
+D: Cmake dependency chain and various bug fixes
+
+N: Chad Rosier
+E: mcrosier@apple.com
+D: ARM fast-isel improvements
+D: Performance monitoring
+
+N: Nadav Rotem
+E: nadav.rotem@intel.com
+D: Vector code generation improvements.
+
 N: Roman Samoilov
 E: roman@codedgers.com
 D: MSIL backend
@@ -363,12 +387,9 @@ E: lauro.venancio@indt.org.br
 D: ARM backend improvements
 D: Thread Local Storage implementation
 
-N: Xerxes Ranby
-E: xerxes@zafena.se
-D: Cmake dependency chain and various bug fixes
-
 N: Bill Wendling
 E: wendling@apple.com
+D: Exception handling
 D: Bunches of stuff
 
 N: Bob Wilson
diff --git a/LICENSE.TXT b/LICENSE.TXT
index 1b1047ca37db..00cf60116941 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -4,7 +4,7 @@ LLVM Release License
 University of Illinois/NCSA
 Open Source License
 
-Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
 All rights reserved.
 
 Developed by:
@@ -67,3 +67,4 @@ Autoconf            llvm/autoconf
 CellSPU backend     llvm/lib/Target/CellSPU/README.txt
 Google Test         llvm/utils/unittest/googletest
 OpenBSD regex       llvm/lib/Support/{reg*, COPYRIGHT.regex}
+pyyaml tests        llvm/test/YAMLParser/{*.data, LICENSE.TXT}
diff --git a/LLVMBuild.txt b/LLVMBuild.txt
new file mode 100644
index 000000000000..e763fd2afee2
--- /dev/null
+++ b/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./LLVMBuild.txt ------------------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = bindings docs examples lib projects runtime tools utils
+
+[component_0]
+type = Group
+name = Miscellaneous
+parent = $ROOT
diff --git a/Makefile b/Makefile
index a350cb19d2a9..ec24862ad6fc 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ LEVEL := .
 ifneq ($(findstring llvmCore, $(RC_ProjectName)),llvmCore)  # Normal build (not "Apple-style").
 
 ifeq ($(BUILD_DIRS_ONLY),1)
-  DIRS := lib/Support lib/TableGen utils
+  DIRS := lib/Support lib/TableGen utils tools/llvm-config
   OPTIONAL_DIRS := tools/clang/utils/TableGen
 else
   DIRS := lib/Support lib/TableGen utils lib/VMCore lib tools/llvm-shlib \
@@ -68,20 +68,14 @@ endif
 
 ifeq ($(MAKECMDGOALS),install-clang)
   DIRS := tools/clang/tools/driver tools/clang/lib/Headers \
+          tools/clang/tools/libclang tools/clang/tools/c-index-test \
+          tools/clang/include/clang-c \
           tools/clang/runtime tools/clang/docs \
           tools/lto runtime
   OPTIONAL_DIRS :=
   NO_INSTALL = 1
 endif
 
-ifeq ($(MAKECMDGOALS),install-clang-c)
-  DIRS := tools/clang/tools/driver tools/clang/lib/Headers \
-          tools/clang/tools/libclang tools/clang/tools/c-index-test \
-	  tools/clang/include/clang-c
-  OPTIONAL_DIRS :=
-  NO_INSTALL = 1
-endif
-
 ifeq ($(MAKECMDGOALS),clang-only)
   DIRS := $(filter-out tools docs unittests, $(DIRS)) \
           tools/clang tools/lto
@@ -126,11 +120,14 @@ cross-compile-build-tools:
 	 $(MAKE) -C BuildTools \
 	  BUILD_DIRS_ONLY=1 \
 	  UNIVERSAL= \
+	  TARGET_NATIVE_ARCH="$(TARGET_NATIVE_ARCH)" \
+	  TARGETS_TO_BUILD="$(TARGETS_TO_BUILD)" \
 	  ENABLE_OPTIMIZED=$(ENABLE_OPTIMIZED) \
 	  ENABLE_PROFILING=$(ENABLE_PROFILING) \
 	  ENABLE_COVERAGE=$(ENABLE_COVERAGE) \
 	  DISABLE_ASSERTIONS=$(DISABLE_ASSERTIONS) \
 	  ENABLE_EXPENSIVE_CHECKS=$(ENABLE_EXPENSIVE_CHECKS) \
+	  ENABLE_LIBCPP=$(ENABLE_LIBCPP) \
 	  CFLAGS= \
 	  CXXFLAGS= \
 	) || exit 1;
@@ -166,7 +163,6 @@ clang-only: all
 tools-only: all
 libs-only: all
 install-clang: install
-install-clang-c: install
 install-libs: install
 
 # If SHOW_DIAGNOSTICS is enabled, clear the diagnostics file first.
@@ -179,11 +175,18 @@ all-local:: clean-diagnostics
 endif
 
 #------------------------------------------------------------------------
-# Make sure the generated headers are up-to-date. This must be kept in
-# sync with the AC_CONFIG_HEADER invocations in autoconf/configure.ac
+# Make sure the generated files are up-to-date. This must be kept in
+# sync with the AC_CONFIG_HEADER and AC_CONFIG_FILE invocations in
+# autoconf/configure.ac.
+# Note that Makefile.config is covered by its own separate rule
+# in Makefile.rules where it can be reused by sub-projects.
 #------------------------------------------------------------------------
 FilesToConfig := \
+  bindings/ocaml/llvm/META.llvm \
+  docs/doxygen.cfg \
+  llvm.spec \
   include/llvm/Config/config.h \
+  include/llvm/Config/llvm-config.h \
   include/llvm/Config/Targets.def \
   include/llvm/Config/AsmPrinters.def \
   include/llvm/Config/AsmParsers.def \
@@ -209,7 +212,7 @@ ifneq ($(ENABLE_OPTIMIZED),1)
 	$(Echo) '*****' configure with --enable-optimized.
 ifeq ($(SHOW_DIAGNOSTICS),1)
 	$(Verb) if test -s $(LLVM_OBJ_ROOT)/$(BuildMode)/diags; then \
-	  $(LLVM_SRC_ROOT)/utils/show-diagnostics \
+	  $(LLVM_SRC_ROOT)/utils/clang-parse-diagnostics-file -a \
 	    $(LLVM_OBJ_ROOT)/$(BuildMode)/diags; \
 	fi
 endif
@@ -243,7 +246,7 @@ SVN-UPDATE-OPTIONS =
 AWK = awk
 SUB-SVN-DIRS = $(AWK) '/\?\ \ \ \ \ \ / {print $$2}'   \
 		| LC_ALL=C xargs $(SVN) info 2>/dev/null \
-		| $(AWK) '/Path:\ / {print $$2}'
+		| $(AWK) '/^Path:\ / {print $$2}'
 
 update:
 	$(SVN) $(SVN-UPDATE-OPTIONS) update $(LLVM_SRC_ROOT)
diff --git a/Makefile.config.in b/Makefile.config.in
index fff482e77748..33fbb2ad4ca9 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -13,7 +13,7 @@
 #===------------------------------------------------------------------------===#
 
 # Define LLVM specific info and directories based on the autoconf variables
-LLVMPackageName   := @PACKAGE_NAME@
+LLVMPackageName   := @PACKAGE_TARNAME@
 LLVMVersion       := @PACKAGE_VERSION@
 LLVM_CONFIGTIME   := @LLVM_CONFIGTIME@
 
@@ -46,11 +46,19 @@ realpath = $(shell cd $(1); $(PWD))
 PROJ_OBJ_DIR  := $(call realpath, .)
 PROJ_OBJ_ROOT := $(call realpath, $(PROJ_OBJ_DIR)/$(LEVEL))
 
-ifeq ($(PROJECT_NAME),llvm)
+CLANG_SRC_ROOT  := @CLANG_SRC_ROOT@
+
+ifeq ($(PROJECT_NAME),$(LLVMPackageName))
 LLVM_SRC_ROOT   := $(call realpath, @abs_top_srcdir@)
 LLVM_OBJ_ROOT   := $(call realpath, @abs_top_builddir@)
 PROJ_SRC_ROOT   := $(LLVM_SRC_ROOT)
-PROJ_SRC_DIR    := $(call realpath, $(LLVM_SRC_ROOT)/$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR)))
+PROJ_SRC_DIR    := $(LLVM_SRC_ROOT)$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR))
+
+ifneq ($(CLANG_SRC_ROOT),)
+  CLANG_SRC_ROOT:= $(call realpath, $(CLANG_SRC_ROOT))
+  PROJ_SRC_DIR  := $(patsubst $(LLVM_SRC_ROOT)/tools/clang%,$(CLANG_SRC_ROOT)%,$(PROJ_SRC_DIR))
+endif
+
 prefix          := @prefix@
 PROJ_prefix     := $(prefix)
 PROJ_VERSION    := $(LLVMVersion)
@@ -78,7 +86,12 @@ PROJ_VERSION := 1.0
 endif
 endif
 
-LLVMMAKE := $(LLVM_SRC_ROOT)/make
+INTERNAL_PREFIX := @INTERNAL_PREFIX@
+ifneq ($(INTERNAL_PREFIX),)
+PROJ_internal_prefix := $(INTERNAL_PREFIX)
+else
+PROJ_internal_prefix := $(prefix)
+endif
 
 PROJ_bindir     := $(PROJ_prefix)/bin
 PROJ_libdir     := $(PROJ_prefix)/lib
@@ -101,6 +114,7 @@ TARGET_OS=@TARGET_OS@
 
 # Target hardware architecture
 ARCH=@ARCH@
+TARGET_NATIVE_ARCH := $(ARCH)
 
 # Indicates, whether we're cross-compiling LLVM or not
 LLVM_CROSS_COMPILING=@LLVM_CROSS_COMPILING@
@@ -161,7 +175,6 @@ SED        := @SED@
 TAR        := @TAR@
 
 # Paths to miscellaneous programs we hope are present but might not be
-PERL       := @PERL@
 BZIP2      := @BZIP2@
 CAT        := @CAT@
 DOT        := @DOT@
@@ -180,7 +193,6 @@ RUNTEST    := @RUNTEST@
 TCLSH      := @TCLSH@
 ZIP        := @ZIP@
 
-HAVE_PERL    := @HAVE_PERL@
 HAVE_PTHREAD := @HAVE_PTHREAD@
 
 LIBS       := @LIBS@
@@ -202,6 +214,10 @@ RDYNAMIC := @RDYNAMIC@
 # These are options that can either be enabled here, or can be enabled on the
 # make command line (ie, make ENABLE_PROFILING=1):
 
+# When ENABLE_LIBCPP is enabled, LLVM uses libc++ by default to build.
+#ENABLE_LIBCPP = 0
+ENABLE_LIBCPP = @ENABLE_LIBCPP@
+
 # When ENABLE_OPTIMIZED is enabled, LLVM code is optimized and output is put
 # into the "Release" directories. Otherwise, LLVM code is not optimized and
 # output is put in the "Debug" directories.
@@ -319,9 +335,19 @@ BINUTILS_INCDIR := @BINUTILS_INCDIR@
 NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@
 # -Wno-variadic-macros
 NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@
+# -Wcovered-switch-default
+COVERED_SWITCH_DEFAULT = @COVERED_SWITCH_DEFAULT@
 
 # Was polly found in tools/polly?
 LLVM_HAS_POLLY = @LLVM_HAS_POLLY@
 # Flags supported by the linker.
 # bfd ld / gold --version-script=file
 HAVE_LINK_VERSION_SCRIPT = @HAVE_LINK_VERSION_SCRIPT@
+
+# Flags to control building support for Intel JIT Events API
+USE_INTEL_JITEVENTS := @USE_INTEL_JITEVENTS@
+INTEL_JITEVENTS_INCDIR := @INTEL_JITEVENTS_INCDIR@
+INTEL_JITEVENTS_LIBDIR := @INTEL_JITEVENTS_LIBDIR@
+
+# Flags to control building support for OProfile JIT API
+USE_OPROFILE := @USE_OPROFILE@
diff --git a/Makefile.rules b/Makefile.rules
index d057f043ff64..0984dc072300 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -57,6 +57,72 @@ VPATH=$(PROJ_SRC_DIR)
 
 $(UserTargets)::
 
+#------------------------------------------------------------------------
+# LLVMBuild Integration
+#------------------------------------------------------------------------
+#
+# We use llvm-build to generate all the data required by the Makefile based
+# build system in one swoop:
+#
+#  - We generate a file (a Makefile fragment) in the object root which contains
+#    all the definitions that are required by Makefiles across the entire
+#    project.
+#
+#  - We generate the library table used by llvm-config.
+#
+#  - We generate the dependencies for the Makefile fragment, so that we will
+#    automatically reconfigure outselves.
+
+# The path to the llvm-build tool itself.
+LLVMBuildTool	:= $(PROJ_SRC_ROOT)/utils/llvm-build/llvm-build
+
+# The files we are going to generate using llvm-build.
+LLVMBuildMakeFrag := $(PROJ_OBJ_ROOT)/Makefile.llvmbuild
+LLVMConfigLibraryDependenciesInc := \
+	$(PROJ_OBJ_ROOT)/tools/llvm-config/LibraryDependencies.inc
+
+# This is for temporary backwards compatibility.
+ifndef TARGET_NATIVE_ARCH
+TARGET_NATIVE_ARCH := $(ARCH)
+endif
+
+# The rule to create the LLVMBuild Makefile fragment as well as the llvm-config
+# library table.
+#
+# Note that this target gets its real dependencies generated for us by
+# llvm-build.
+#
+# We include a dependency on this Makefile to ensure that changes to the
+# generation command get picked up.
+$(LLVMBuildMakeFrag): $(PROJ_SRC_ROOT)/Makefile.rules \
+		      $(PROJ_OBJ_ROOT)/Makefile.config
+	$(Echo) Constructing LLVMBuild project information.
+	$(Verb) $(LLVMBuildTool) \
+	  --native-target "$(TARGET_NATIVE_ARCH)" \
+	  --enable-targets "$(TARGETS_TO_BUILD)" \
+	  --write-library-table $(LLVMConfigLibraryDependenciesInc) \
+	  --write-make-fragment $(LLVMBuildMakeFrag)
+
+# For completeness, let Make know how the extra files are generated.
+$(LLVMConfigLibraryDependenciesInc): $(LLVMBuildMakeFrag)
+
+# Include the generated Makefile fragment.
+#
+# We currently only include the dependencies for the fragment itself if we are
+# at the top-level. Otherwise, recursive invocations would ends up doing
+# substantially more redundant stat'ing.
+#
+# This means that we won't properly regenerate things for developers used to
+# building from a subdirectory, but that is always somewhat unreliable.
+ifeq ($(LEVEL),.)
+LLVMBUILD_INCLUDE_DEPENDENCIES := 1
+
+# Clean the generated makefile fragment at the top-level.
+clean-local::
+	-$(Verb) $(RM) -f $(LLVMBuildMakeFrag)
+endif
+-include $(LLVMBuildMakeFrag)
+
 ################################################################################
 # PRECONDITIONS: that which must be built/checked first
 ################################################################################
@@ -245,6 +311,11 @@ else
   endif
 endif
 
+ifeq ($(ENABLE_LIBCPP),1)
+  CXX.Flags +=  -stdlib=libc++
+  LD.Flags +=  -stdlib=libc++
+endif
+
 ifeq ($(ENABLE_PROFILING),1)
   BuildMode := $(BuildMode)+Profile
   CXX.Flags := $(filter-out -fomit-frame-pointer,$(CXX.Flags)) -pg -g
@@ -253,9 +324,9 @@ ifeq ($(ENABLE_PROFILING),1)
   KEEP_SYMBOLS := 1
 endif
 
-#ifeq ($(ENABLE_VISIBILITY_INLINES_HIDDEN),1)
-#    CXX.Flags += -fvisibility-inlines-hidden
-#endif
+ifeq ($(ENABLE_VISIBILITY_INLINES_HIDDEN),1)
+    CXX.Flags += -fvisibility-inlines-hidden
+endif
 
 ifdef ENABLE_EXPENSIVE_CHECKS
   # GNU libstdc++ uses RTTI if you define _GLIBCXX_DEBUG, which we did above.
@@ -445,8 +516,12 @@ endif
 #--------------------------------------------------------------------
 # Full Paths To Compiled Tools and Utilities
 #--------------------------------------------------------------------
-EchoCmd  = $(ECHO) llvm[$(MAKELEVEL)]:
-Echo     = @$(EchoCmd)
+EchoCmd  := $(ECHO) llvm[$(MAKELEVEL)]:
+ifdef BUILD_DIRS_ONLY
+EchoCmd  := $(EchoCmd) "(build tools)":
+endif
+
+Echo     := @$(EchoCmd)
 ifndef LLVMAS
 LLVMAS   := $(LLVMToolDir)/llvm-as$(EXEEXT)
 endif
@@ -457,7 +532,11 @@ ifndef LLVM_TBLGEN
     LLVM_TBLGEN   := $(LLVMToolDir)/llvm-tblgen$(EXEEXT)
   endif
 endif
-LLVM_CONFIG := $(LLVMToolDir)/llvm-config
+ifeq ($(LLVM_CROSS_COMPILING),1)
+  LLVM_CONFIG := $(BuildLLVMToolDir)/llvm-config$(BUILD_EXEEXT)
+else
+  LLVM_CONFIG := $(LLVMToolDir)/llvm-config$(EXEEXT)
+endif
 ifndef LLVMLD
 LLVMLD    := $(LLVMToolDir)/llvm-ld$(EXEEXT)
 endif
@@ -571,7 +650,7 @@ ifndef NO_PEDANTIC
 CompileCommonOpts += -pedantic -Wno-long-long
 endif
 CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \
-                     $(EXTRA_OPTIONS)
+                     $(EXTRA_OPTIONS) $(COVERED_SWITCH_DEFAULT)
 # Enable cast-qual for C++; the workaround is to use const_cast.
 CXX.Flags += -Wcast-qual
 
@@ -622,7 +701,13 @@ ifeq ($(HOST_OS),AuroraUX)
 CPP.BaseFlags += -include llvm/Support/Solaris.h
 endif # !HOST_OS - AuroraUX.
 
-LD.Flags      += -L$(LibDir) -L$(LLVMLibDir)
+# On Windows, SharedLibDir != LibDir. The order is important.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+  LD.Flags    += -L$(SharedLibDir) -L$(LibDir) -L$(LLVMToolDir) -L$(LLVMLibDir)
+else
+  LD.Flags    += -L$(LibDir) -L$(LLVMLibDir)
+endif
+
 CPP.BaseFlags += -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS
 # All -I flags should go here, so that they don't confuse llvm-config.
 CPP.Flags     += $(sort -I$(PROJ_OBJ_DIR) -I$(PROJ_SRC_DIR) \
@@ -631,6 +716,10 @@ CPP.Flags     += $(sort -I$(PROJ_OBJ_DIR) -I$(PROJ_SRC_DIR) \
 	         $(LLVM_OBJ_ROOT) $(LLVM_SRC_ROOT))) \
 	         $(CPP.BaseFlags)
 
+ifeq ($(INCLUDE_BUILD_DIR),1)
+  CPP.Flags   += -I$(ObjDir)
+endif
+
 # SHOW_DIAGNOSTICS support.
 ifeq ($(SHOW_DIAGNOSTICS),1)
   Compile.Wrapper := env CC_LOG_DIAGNOSTICS=1 \
@@ -639,35 +728,18 @@ else
   Compile.Wrapper :=
 endif
 
-ifeq ($(BUILD_COMPONENT), 1)
-  Compile.C     = $(Compile.Wrapper) \
-	          $(BUILD_CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
-                  $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Compile.CXX   = $(Compile.Wrapper) \
-	          $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
-		  $(CPPFLAGS) \
-                  $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Preprocess.CXX= $(Compile.Wrapper) \
-	          $(BUILD_CXX) $(CPP.Flags) $(CPPFLAGS) $(TargetCommonOpts) \
-                  $(CompileCommonOpts) $(CXX.Flags) -E
-  Link          = $(Compile.Wrapper) \
-	          $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
-		  $(LD.Flags) $(LDFLAGS) \
-                  $(TargetCommonOpts) $(CompileCommonOpts) $(Strip)
-else
-  Compile.C     = $(Compile.Wrapper) \
+Compile.C     = $(Compile.Wrapper) \
 	          $(CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
-                  $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Compile.CXX   = $(Compile.Wrapper) \
+                $(TargetCommonOpts) $(CompileCommonOpts) -c
+Compile.CXX   = $(Compile.Wrapper) \
 	          $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \
-                  $(TargetCommonOpts) $(CompileCommonOpts) -c
-  Preprocess.CXX= $(Compile.Wrapper) \
+                $(TargetCommonOpts) $(CompileCommonOpts) -c
+Preprocess.CXX= $(Compile.Wrapper) \
 	          $(CXX) $(CPP.Flags) $(TargetCommonOpts) $(CPPFLAGS) \
-                  $(CompileCommonOpts) $(CXX.Flags) -E
-  Link          = $(Compile.Wrapper) \
+                $(CompileCommonOpts) $(CXX.Flags) -E
+Link          = $(Compile.Wrapper) \
 	          $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(LD.Flags) \
-                  $(LDFLAGS) $(TargetCommonOpts)  $(CompileCommonOpts) $(Strip)
-endif
+                $(LDFLAGS) $(TargetCommonOpts)  $(CompileCommonOpts) $(Strip)
 
 BCCompile.C   = $(LLVMCC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
                 $(TargetCommonOpts) $(CompileCommonOpts)
@@ -806,7 +878,7 @@ endif
 # Handle the OPTIONAL_PARALLEL_DIRS options for optional parallel construction
 #-----------------------------------------------------------
 ifdef OPTIONAL_PARALLEL_DIRS
-  PARALLEL_DIRS += $(foreach T,$(OPTIONAL_PARALLEL_DIRS),$(shell test -d $(PROJ_SRC_DIR)/$(T) && echo "$(T)"))
+  PARALLEL_DIRS += $(foreach T,$(OPTIONAL_PARALLEL_DIRS),$(shell test -d $(PROJ_SRC_DIR)/$(T) -o -f $(T)/Makefile && echo "$(T)"))
 endif
 
 #-----------------------------------------------------------
@@ -828,13 +900,20 @@ unitcheck:: $(addsuffix /.makeunitcheck,$(PARALLEL_DIRS))
 ParallelTargets := $(foreach T,$(RecursiveTargets),%/.make$(T))
 
 $(ParallelTargets) :
-	$(Verb) if ([ ! -f $(@D)/Makefile ] || \
-	            command test $(@D)/Makefile -ot \
-                      $(PROJ_SRC_DIR)/$(@D)/Makefile ); then \
-	  $(MKDIR) $(@D); \
-	  $(CP) $(PROJ_SRC_DIR)/$(@D)/Makefile $(@D)/Makefile; \
+	$(Verb) \
+	  SD=$(PROJ_SRC_DIR)/$(@D); \
+	  DD=$(@D); \
+	  if [ ! -f $$SD/Makefile ]; then \
+	    SD=$(@D); \
+	    DD=$(notdir $(@D)); \
+	  fi; \
+	  if ([ ! -f $$DD/Makefile ] || \
+	            command test $$DD/Makefile -ot \
+                      $$SD/Makefile ); then \
+	  $(MKDIR) $$DD; \
+	  $(CP) $$SD/Makefile $$DD/Makefile; \
 	fi; \
-	$(MAKE) -C $(@D) $(subst $(@D)/.make,,$@)
+	$(MAKE) -C $$DD $(subst $(@D)/.make,,$@)
 endif
 
 #---------------------------------------------------------
@@ -991,7 +1070,7 @@ ifeq ($(HAVE_LINK_VERSION_SCRIPT),1)
 NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).map
 $(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
 	$(Verb) echo "{" > $@
-	$(Verb) grep -q "\<" $< && echo "  global:" >> $@ || :
+	$(Verb) grep -q '[[:alnum:]_]' $< && echo "  global:" >> $@ || :
 	$(Verb) sed -e 's/$$/;/' -e 's/^/    /' < $< >> $@
 ifneq ($(HOST_OS),OpenBSD)
 	$(Verb) echo "  local: *;" >> $@
@@ -1353,7 +1432,7 @@ LD.Flags += -Wl,-exported_symbol,_main
 endif
 endif
 
-ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux NetBSD FreeBSD))
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux NetBSD FreeBSD GNU))
 ifneq ($(ARCH), Mips)
   LD.Flags += -Wl,--version-script=$(LLVM_SRC_ROOT)/autoconf/ExportMap.map
 endif
@@ -1413,12 +1492,23 @@ else
 $(ToolBuildPath): $(ToolDir)/.dir
 endif
 
+ifdef CODESIGN_TOOLS
+$(ToolBuildPath): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
+	$(Echo) Linking $(BuildMode) executable $(TOOLNAME) $(StripWarnMsg)
+	$(Verb) $(Link) -o $@ $(TOOLLINKOPTS) $(ObjectsO) $(ProjLibsOptions) \
+	$(LLVMLibsOptions) $(ExtraLibs) $(TOOLLINKOPTSB) $(LIBS)
+	$(Echo) ======= Finished Linking $(BuildMode) Executable $(TOOLNAME) \
+          $(StripWarnMsg)
+	$(Echo) ======= Code-Signing $(BuildMode) Executable $(TOOLNAME)
+	$(Verb) codesign -s - $@
+else
 $(ToolBuildPath): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
 	$(Echo) Linking $(BuildMode) executable $(TOOLNAME) $(StripWarnMsg)
 	$(Verb) $(Link) -o $@ $(TOOLLINKOPTS) $(ObjectsO) $(ProjLibsOptions) \
 	$(LLVMLibsOptions) $(ExtraLibs) $(TOOLLINKOPTSB) $(LIBS)
 	$(Echo) ======= Finished Linking $(BuildMode) Executable $(TOOLNAME) \
           $(StripWarnMsg)
+endif
 
 ifneq ($(strip $(ToolAliasBuildPath)),)
 $(ToolAliasBuildPath): $(ToolBuildPath)
@@ -1435,12 +1525,19 @@ install-local::
 uninstall-local::
 	$(Echo) Uninstall circumvented with NO_INSTALL
 else
-DestTool = $(DESTDIR)$(PROJ_bindir)/$(TOOLEXENAME)
+
+ifdef INTERNAL_TOOL
+ToolBinDir = $(DESTDIR)$(PROJ_internal_prefix)/bin
+else
+ToolBinDir = $(DESTDIR)$(PROJ_bindir)
+endif
+DestTool = $(ToolBinDir)/$(TOOLEXENAME)
 
 install-local:: $(DestTool)
 
-$(DestTool): $(ToolBuildPath) $(DESTDIR)$(PROJ_bindir)
+$(DestTool): $(ToolBuildPath)
 	$(Echo) Installing $(BuildMode) $(DestTool)
+	$(Verb) $(MKDIR) $(ToolBinDir)
 	$(Verb) $(ProgInstall) $(ToolBuildPath) $(DestTool)
 
 uninstall-local::
@@ -1449,7 +1546,7 @@ uninstall-local::
 
 # TOOLALIAS install.
 ifdef TOOLALIAS
-DestToolAlias = $(DESTDIR)$(PROJ_bindir)/$(TOOLALIAS)$(EXEEXT)
+DestToolAlias = $(ToolBinDir)/$(TOOLALIAS)$(EXEEXT)
 
 install-local:: $(DestToolAlias)
 
@@ -1783,6 +1880,9 @@ $(ObjDir)/ARMGenDecoderTables.inc.tmp : ARM.td $(ObjDir)/.dir $(LLVM_TBLGEN)
 	$(Echo) "Building $(<F) decoder tables with tblgen"
 	$(Verb) $(LLVMTableGen) -gen-arm-decoder -o $(call SYSPATH, $@) $<
 
+$(ObjDir)/%GenDFAPacketizer.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) DFA packetizer tables with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-dfa-packetizer -o $(call SYSPATH, $@) $<
 
 clean-local::
 	-$(Verb) $(RM) -f $(INCFiles)
@@ -1815,7 +1915,6 @@ clean-local::
 ifneq ($(strip $(ObjRootDir)),)
 	-$(Verb) $(RM) -rf $(ObjRootDir)
 endif
-	-$(Verb) $(RM) -f core core.[0-9][0-9]* *.o *.d *~ *.flc
 ifneq ($(strip $(SHLIBEXT)),) # Extra paranoia - make real sure SHLIBEXT is set
 	-$(Verb) $(RM) -f *$(SHLIBEXT)
 endif
@@ -2209,6 +2308,7 @@ printvars::
 	$(Echo) "LLVM_SRC_ROOT: " '$(LLVM_SRC_ROOT)'
 	$(Echo) "LLVM_OBJ_ROOT: " '$(LLVM_OBJ_ROOT)'
 	$(Echo) "PROJ_prefix  : " '$(PROJ_prefix)'
+	$(Echo) "PROJ_internal_prefix  : " '$(PROJ_internal_prefix)'
 	$(Echo) "PROJ_bindir  : " '$(PROJ_bindir)'
 	$(Echo) "PROJ_libdir  : " '$(PROJ_libdir)'
 	$(Echo) "PROJ_etcdir  : " '$(PROJ_etcdir)'
diff --git a/ModuleInfo.txt b/ModuleInfo.txt
deleted file mode 100644
index 40607c71a944..000000000000
--- a/ModuleInfo.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-DepModule:
-BuildCmd: ./build-for-llvm-top.sh
-CleanCmd: make clean -C ../build.llvm
-InstallCmd: make install -C ../build.llvm
diff --git a/README.txt b/README.txt
index 2ebe271b8e51..34a568fb7845 100644
--- a/README.txt
+++ b/README.txt
@@ -1,3 +1,4 @@
+
 Low Level Virtual Machine (LLVM)
 ================================
 
@@ -13,3 +14,5 @@ assistance with LLVM.
 
 If you're writing a package for LLVM, see docs/Packaging.html for our
 suggestions.
+
+
diff --git a/autoconf/AutoRegen.sh b/autoconf/AutoRegen.sh
index 5102aebc9701..7809667ac5f1 100755
--- a/autoconf/AutoRegen.sh
+++ b/autoconf/AutoRegen.sh
@@ -14,9 +14,9 @@ clean() {
 ### Periods should be escaped with backslash for use by grep.
 ###
 ### If you update these, please also update docs/GettingStarted.html
-want_autoconf_version='2\.61'
+want_autoconf_version='2\.60'
 want_autoheader_version=$want_autoconf_version
-want_aclocal_version='1\.10'
+want_aclocal_version='1\.9\.6'
 want_libtool_version='1\.5\.22'
 ### END NOTE #########################################################
 
diff --git a/autoconf/config.sub b/autoconf/config.sub
index da19a880e5f1..9942491533e8 100755
--- a/autoconf/config.sub
+++ b/autoconf/config.sub
@@ -4,7 +4,7 @@
 #   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
 #   2011 Free Software Foundation, Inc.
 
-timestamp='2011-08-23'
+timestamp='2011-11-02'
 
 # This file is (in principle) common to ALL GNU software.
 # The presence of a machine in this file suggests that SOME GNU software
@@ -256,6 +256,7 @@ case $basic_machine in
 	| c4x | clipper \
 	| d10v | d30v | dlx | dsp16xx \
 	| fido | fr30 | frv \
+	| hexagon \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
 	| i370 | i860 | i960 | ia64 \
 	| ip2k | iq2000 \
@@ -367,6 +368,7 @@ case $basic_machine in
 	| elxsi-* \
 	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
+	| hexagon-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
 	| i*86-* | i860-* | i960-* | ia64-* \
 	| ip2k-* | iq2000-* \
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index a4ccfcd64315..0a2c8b69ddd5 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -30,13 +30,15 @@ dnl=== SECTION 1: Initialization & Setup
 dnl===
 dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
-dnl email address for reporting bugs.
-AC_INIT([[llvm]],[[3.0]],[llvmbugs@cs.uiuc.edu])
+dnl address for reporting bugs.
+AC_INIT([LLVM],[3.1svn],[http://llvm.org/bugs/])
+AC_DEFINE([LLVM_VERSION_MAJOR], [3], [Major version of the LLVM API])
+AC_DEFINE([LLVM_VERSION_MINOR], [1], [Minor version of the LLVM API])
 
 dnl Provide a copyright substitution and ensure the copyright notice is included
 dnl in the output of --version option of the generated configure script.
-AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign."])
-AC_COPYRIGHT([Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign.])
+AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign."])
+AC_COPYRIGHT([Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.])
 
 dnl Indicate that we require autoconf 2.60 or later.
 AC_PREREQ(2.60)
@@ -114,6 +116,7 @@ do
       llvm-tv)      AC_CONFIG_SUBDIRS([projects/llvm-tv])   ;;
       safecode)     AC_CONFIG_SUBDIRS([projects/safecode]) ;;
       llvm-kernel)  AC_CONFIG_SUBDIRS([projects/llvm-kernel]) ;;
+      compiler-rt)       ;;
       llvm-gcc)       ;;
       test-suite)     ;;
       llvm-test)      ;;
@@ -188,7 +191,7 @@ AC_CACHE_CHECK([type of operating system we're going to host on],
     llvm_cv_no_link_all_option="-Wl,-noall_load"
     llvm_cv_os_type="Minix"
     llvm_cv_platform_type="Unix" ;;
-  *-*-freebsd*)
+  *-*-freebsd* | *-*-kfreebsd-gnu)
     llvm_cv_link_all_option="-Wl,--whole-archive"
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="FreeBSD"
@@ -223,6 +226,11 @@ AC_CACHE_CHECK([type of operating system we're going to host on],
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="Linux"
     llvm_cv_platform_type="Unix" ;;
+  *-*-gnu*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="GNU"
+    llvm_cv_platform_type="Unix" ;;
   *-*-solaris*)
     llvm_cv_link_all_option="-Wl,-z,allextract"
     llvm_cv_no_link_all_option="-Wl,-z,defaultextract"
@@ -278,7 +286,7 @@ AC_CACHE_CHECK([type of operating system we're going to target],
     llvm_cv_target_os_type="Darwin" ;;
   *-*-minix*)
     llvm_cv_target_os_type="Minix" ;;
-  *-*-freebsd*)
+  *-*-freebsd* | *-*-kfreebsd-gnu)
     llvm_cv_target_os_type="FreeBSD" ;;
   *-*-openbsd*)
     llvm_cv_target_os_type="OpenBSD" ;;
@@ -292,6 +300,8 @@ AC_CACHE_CHECK([type of operating system we're going to target],
     llvm_cv_target_os_type="Interix" ;;
   *-*-linux*)
     llvm_cv_target_os_type="Linux" ;;
+  *-*-gnu*)
+    llvm_cv_target_os_type="GNU" ;;
   *-*-solaris*)
     llvm_cv_target_os_type="SunOS" ;;
   *-*-auroraux*)
@@ -352,13 +362,12 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   amd64-* | x86_64-*)     llvm_cv_target_arch="x86_64" ;;
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
-  alpha*-*)               llvm_cv_target_arch="Alpha" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
   mips-*)                 llvm_cv_target_arch="Mips" ;;
+  mipsel-*)               llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
-  s390x-*)                llvm_cv_target_arch="SystemZ" ;;
-  bfin-*)                 llvm_cv_target_arch="Blackfin" ;;
+  hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
   ptx-*)                  llvm_cv_target_arch="PTX" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
@@ -368,7 +377,7 @@ if test "$llvm_cv_target_arch" = "Unknown" ; then
   AC_MSG_WARN([Configuring LLVM for an unknown target archicture])
 fi
 
-# Determine the LLVM native architecture for the target
+dnl Determine the LLVM native architecture for the target
 case "$llvm_cv_target_arch" in
     x86)     LLVM_NATIVE_ARCH="X86" ;;
     x86_64)  LLVM_NATIVE_ARCH="X86" ;;
@@ -381,7 +390,7 @@ AC_SUBST(ARCH,$llvm_cv_target_arch)
 dnl Check for the endianness of the target
 AC_C_BIGENDIAN(AC_SUBST([ENDIAN],[big]),AC_SUBST([ENDIAN],[little]))
 
-dnl Check for build platform executable suffix if we're crosscompiling
+dnl Check for build platform executable suffix if we're cross-compiling
 if test "$cross_compiling" = yes; then
   AC_SUBST(LLVM_CROSS_COMPILING, [1])
   AC_BUILD_EXEEXT
@@ -418,9 +427,21 @@ dnl=== SECTION 3: Command line arguments for the configure script.
 dnl===
 dnl===-----------------------------------------------------------------------===
 
+dnl --enable-libcpp : check whether or not to use libc++ on the command line
+AC_ARG_ENABLE(libcpp,
+              AS_HELP_STRING([--enable-libcpp],
+                             [Use libc++ if available (default is NO)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_LIBCPP,[1]) ;;
+  no)  AC_SUBST(ENABLE_LIBCPP,[0]) ;;
+  default) AC_SUBST(ENABLE_LIBCPP,[0]);;
+  *) AC_MSG_ERROR([Invalid setting for --enable-libcpp. Use "yes" or "no"]) ;;
+esac
+
 dnl --enable-optimized : check whether they want to do an optimized build:
 AC_ARG_ENABLE(optimized, AS_HELP_STRING(
- --enable-optimized,[Compile with optimizations enabled (default is YES)]),,enableval=$optimize)
+ --enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
 if test ${enableval} = "no" ; then
   AC_SUBST(ENABLE_OPTIMIZED,[[]])
 else
@@ -438,7 +459,7 @@ fi
 
 dnl --enable-assertions : check whether they want to turn on assertions or not:
 AC_ARG_ENABLE(assertions,AS_HELP_STRING(
-  --enable-assertions,[Compile with assertion checks enabled (default is NO)]),, enableval="no")
+  --enable-assertions,[Compile with assertion checks enabled (default is YES)]),, enableval="yes")
 if test ${enableval} = "yes" ; then
   AC_SUBST(DISABLE_ASSERTIONS,[[]])
 else
@@ -489,13 +510,11 @@ else
     Sparc)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
     x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
-    Alpha)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
     Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
     XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
-    SystemZ)     AC_SUBST(TARGET_HAS_JIT,0) ;;
-    Blackfin)    AC_SUBST(TARGET_HAS_JIT,0) ;;
+    Hexagon)     AC_SUBST(TARGET_HAS_JIT,0) ;;
     MBlaze)      AC_SUBST(TARGET_HAS_JIT,0) ;;
     PTX)         AC_SUBST(TARGET_HAS_JIT,0) ;;
     *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
@@ -532,12 +551,13 @@ AC_ARG_ENABLE(threads,
                              [Use threads if available (default is YES)]),,
                              enableval=default)
 case "$enableval" in
-  yes) AC_SUBST(ENABLE_THREADS,[1]) ;;
-  no)  AC_SUBST(ENABLE_THREADS,[0]) ;;
-  default) AC_SUBST(ENABLE_THREADS,[1]) ;;
+  yes) AC_SUBST(LLVM_ENABLE_THREADS,[1]) ;;
+  no)  AC_SUBST(LLVM_ENABLE_THREADS,[0]) ;;
+  default) AC_SUBST(LLVM_ENABLE_THREADS,[1]) ;;
   *) AC_MSG_ERROR([Invalid setting for --enable-threads. Use "yes" or "no"]) ;;
 esac
-AC_DEFINE_UNQUOTED([ENABLE_THREADS],$ENABLE_THREADS,[Define if threads enabled])
+AC_DEFINE_UNQUOTED([LLVM_ENABLE_THREADS],$LLVM_ENABLE_THREADS,
+                   [Define if threads enabled])
 
 dnl Allow disablement of pthread.h
 AC_ARG_ENABLE(pthreads,
@@ -607,30 +627,28 @@ dnl Allow specific targets to be specified for building (or not)
 TARGETS_TO_BUILD=""
 AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
     [Build specific host targets: all or target1,target2,... Valid targets are:
-     host, x86, x86_64, sparc, powerpc, alpha, arm, mips, spu,
-     xcore, msp430, systemz, blackfin, ptx, cbe, and cpp (default=all)]),,
+     host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
+     xcore, msp430, ptx, and cpp (default=all)]),,
     enableval=all)
 if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU XCore MSP430 SystemZ Blackfin CBackend CppBackend MBlaze PTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
         x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
         sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
         powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
-        alpha)    TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+        mipsel)   TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
-        systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
-        blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
-        cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
         ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
@@ -638,15 +656,13 @@ case "$enableval" in
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
             PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
-            Alpha)       TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
             CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
-            s390x)       TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
-            Blackfin)    TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+            Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
             *)       AC_MSG_ERROR([Can not set target to build]) ;;
           esac ;;
@@ -657,8 +673,8 @@ case "$enableval" in
 esac
 AC_SUBST(TARGETS_TO_BUILD,$TARGETS_TO_BUILD)
 
-# Determine whether we are building LLVM support for the native architecture.
-# If so, define LLVM_NATIVE_ARCH to that LLVM target.
+dnl Determine whether we are building LLVM support for the native architecture.
+dnl If so, define LLVM_NATIVE_ARCH to that LLVM target.
 for a_target in $TARGETS_TO_BUILD; do
   if test "$a_target" = "$LLVM_NATIVE_ARCH"; then
     AC_DEFINE_UNQUOTED(LLVM_NATIVE_ARCH, $LLVM_NATIVE_ARCH,
@@ -670,6 +686,9 @@ for a_target in $TARGETS_TO_BUILD; do
     if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
       LLVM_NATIVE_ASMPARSER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser"
     fi
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/Disassembler/Makefile ; then
+      LLVM_NATIVE_DISASSEMBLER="LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler"
+    fi
     AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGET, $LLVM_NATIVE_TARGET,
       [LLVM name for the native Target init function, if available])
     AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGETINFO, $LLVM_NATIVE_TARGETINFO,
@@ -682,11 +701,15 @@ for a_target in $TARGETS_TO_BUILD; do
       AC_DEFINE_UNQUOTED(LLVM_NATIVE_ASMPARSER, $LLVM_NATIVE_ASMPARSER,
        [LLVM name for the native AsmParser init function, if available])
     fi
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/Disassembler/Makefile ; then
+      AC_DEFINE_UNQUOTED(LLVM_NATIVE_DISASSEMBLER, $LLVM_NATIVE_DISASSEMBLER,
+       [LLVM name for the native Disassembler init function, if available])
+    fi
   fi
 done
 
-# Build the LLVM_TARGET and LLVM_... macros for Targets.def and the individual
-# target feature def files.
+dnl Build the LLVM_TARGET and LLVM_... macros for Targets.def and the individual
+dnl target feature def files.
 LLVM_ENUM_TARGETS=""
 LLVM_ENUM_ASM_PRINTERS=""
 LLVM_ENUM_ASM_PARSERS=""
@@ -708,21 +731,6 @@ AC_SUBST(LLVM_ENUM_ASM_PRINTERS)
 AC_SUBST(LLVM_ENUM_ASM_PARSERS)
 AC_SUBST(LLVM_ENUM_DISASSEMBLERS)
 
-dnl Prevent the CBackend from using printf("%a") for floating point so older
-dnl C compilers that cannot deal with the 0x0p+0 hex floating point format
-dnl can still compile the CBE's output
-AC_ARG_ENABLE([cbe-printf-a],AS_HELP_STRING([--enable-cbe-printf-a],
-  [Enable C Backend output with hex floating point via %a  (default is YES)]),,
-  enableval=default)
-case "$enableval" in
-  yes) AC_SUBST(ENABLE_CBE_PRINTF_A,[1]) ;;
-  no)  AC_SUBST(ENABLE_CBE_PRINTF_A,[0]) ;;
-  default)  AC_SUBST(ENABLE_CBE_PRINTF_A,[1]) ;;
-  *) AC_MSG_ERROR([Invalid setting for --enable-cbe-printf-a. Use "yes" or "no"]) ;;
-esac
-AC_DEFINE_UNQUOTED([ENABLE_CBE_PRINTF_A],$ENABLE_CBE_PRINTF_A,
-                   [Define if CBE is enabled for printf %a output])
-
 dnl Override the option to use for optimized builds.
 AC_ARG_WITH(optimize-option,
   AS_HELP_STRING([--with-optimize-option],
@@ -795,6 +803,17 @@ case "$withval" in
   *) AC_MSG_ERROR([Invalid path for --with-ocaml-libdir. Provide full path]) ;;
 esac
 
+AC_ARG_WITH(clang-srcdir,
+  AS_HELP_STRING([--with-clang-srcdir],
+    [Directory to the out-of-tree Clang source]),,
+    withval="-")
+case "$withval" in
+  -) clang_src_root="" ;;
+  /* | [[A-Za-z]]:[[\\/]]*) clang_src_root="$withval" ;;
+  *) clang_src_root="$ac_pwd/$withval" ;;
+esac
+AC_SUBST(CLANG_SRC_ROOT,[$clang_src_root])
+
 AC_ARG_WITH(clang-resource-dir,
   AS_HELP_STRING([--with-clang-resource-dir],
     [Relative directory from the Clang binary for resource files]),,
@@ -809,33 +828,15 @@ AC_ARG_WITH(c-include-dirs,
 AC_DEFINE_UNQUOTED(C_INCLUDE_DIRS,"$withval",
                    [Directories clang will search for headers])
 
-AC_ARG_WITH(cxx-include-root,
-  AS_HELP_STRING([--with-cxx-include-root],
-    [Directory with the libstdc++ headers.]),,
-    withval="")
-AC_DEFINE_UNQUOTED(CXX_INCLUDE_ROOT,"$withval",
-                   [Directory with the libstdc++ headers.])
-
-AC_ARG_WITH(cxx-include-arch,
-  AS_HELP_STRING([--with-cxx-include-arch],
-    [Architecture of the libstdc++ headers.]),,
-    withval="")
-AC_DEFINE_UNQUOTED(CXX_INCLUDE_ARCH,"$withval",
-                   [Arch the libstdc++ headers.])
-
-AC_ARG_WITH(cxx-include-32bit-dir,
-  AS_HELP_STRING([--with-cxx-include-32bit-dir],
-    [32 bit multilib dir.]),,
+# Clang normally uses the system c++ headers and libraries. With this option,
+# clang will use the ones provided by a gcc installation instead. This option should
+# be passed the same value that was used with --prefix when configuring gcc.
+AC_ARG_WITH(gcc-toolchain,
+  AS_HELP_STRING([--with-gcc-toolchain],
+    [Directory where gcc is installed.]),,
     withval="")
-AC_DEFINE_UNQUOTED(CXX_INCLUDE_32BIT_DIR,"$withval",
-                   [32 bit multilib directory.])
-
-AC_ARG_WITH(cxx-include-64bit-dir,
-  AS_HELP_STRING([--with-cxx-include-64bit-dir],
-    [64 bit multilib directory.]),,
-    withval="")
-AC_DEFINE_UNQUOTED(CXX_INCLUDE_64BIT_DIR,"$withval",
-                   [64 bit multilib directory.])
+AC_DEFINE_UNQUOTED(GCC_INSTALL_PREFIX,"$withval",
+                   [Directory where gcc is installed.])
 
 dnl Allow linking of LLVM with GPLv3 binutils code.
 AC_ARG_WITH(binutils-include,
@@ -873,6 +874,12 @@ AC_ARG_ENABLE(libffi,AS_HELP_STRING(
   esac],
   llvm_cv_enable_libffi=no)
 
+AC_ARG_WITH(internal-prefix,
+  AS_HELP_STRING([--with-internal-prefix],
+    [Installation directory for internal files]),,
+    withval="")
+AC_SUBST(INTERNAL_PREFIX,[$withval])
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 4: Check for programs we need and that they are the right version
@@ -991,16 +998,6 @@ if test "$XDOT_PY" != "echo xdot.py" ; then
    [Define to path to xdot.py program if found or 'echo xdot.py' otherwise])
 fi
 
-dnl Look for a sufficiently recent version of Perl.
-LLVM_PROG_PERL([5.006])
-AC_SUBST(PERL)
-if test x"$PERL" = xnone; then
-   AC_SUBST(HAVE_PERL,0)
-   AC_MSG_ERROR([perl is required but was not found, please install it])
-else
-   AC_SUBST(HAVE_PERL,1)
-fi
-
 dnl Find the install program
 AC_PROG_INSTALL
 dnl Prepend src dir to install path dir if it's a relative path
@@ -1101,7 +1098,8 @@ dnl Check optional compiler flags.
 AC_MSG_CHECKING([optional compiler flags])
 CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros])
 CXX_FLAG_CHECK(NO_MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers])
-AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS])
+CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default])
+AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT])
 
 dnl===-----------------------------------------------------------------------===
 dnl===
@@ -1133,7 +1131,7 @@ AC_SEARCH_LIBS(mallinfo,malloc,AC_DEFINE([HAVE_MALLINFO],[1],
 
 dnl pthread locking functions are optional - but llvm will not be thread-safe
 dnl without locks.
-if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
   AC_CHECK_LIB(pthread, pthread_mutex_init)
   AC_SEARCH_LIBS(pthread_mutex_lock,pthread,
                  AC_DEFINE([HAVE_PTHREAD_MUTEX_LOCK],[1],
@@ -1178,29 +1176,83 @@ AC_ARG_WITH(oprofile,
         *) llvm_cv_oppath="${withval}/lib/oprofile"
            CPPFLAGS="-I${withval}/include";;
       esac
-      if test -n "$llvm_cv_oppath" ; then
-        LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
-        dnl Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=537744:
-        dnl libbfd is not included properly in libopagent in some Debian
-        dnl versions.  If libbfd isn't found at all, we assume opagent works
-        dnl anyway.
-        AC_SEARCH_LIBS(bfd_init, bfd, [], [])
-        AC_SEARCH_LIBS(op_open_agent, opagent, [], [
-          echo "Error! You need to have libopagent around."
-          exit -1
-        ])
-        AC_CHECK_HEADER([opagent.h], [], [
-          echo "Error! You need to have opagent.h around."
-          exit -1
-          ])
-      fi
+      case $llvm_cv_os_type in
+        Linux)
+          if test -n "$llvm_cv_oppath" ; then
+            LIBS="$LIBS -lopagent -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
+            dnl Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=537744:
+            dnl libbfd is not included properly in libopagent in some Debian
+            dnl versions.  If libbfd isn't found at all, we assume opagent works
+            dnl anyway.
+            AC_SEARCH_LIBS(bfd_init, bfd, [], [])
+            AC_SEARCH_LIBS(op_open_agent, opagent, [], [
+              echo "Error! You need to have libopagent around."
+              exit -1
+            ])
+            AC_CHECK_HEADER([opagent.h], [], [
+              echo "Error! You need to have opagent.h around."
+              exit -1
+              ])
+          fi ;;
+        *)
+          AC_MSG_ERROR([OProfile support is available on Linux only.]) ;;
+      esac 
     ],
     [
       AC_SUBST(USE_OPROFILE, [0])
     ])
-AC_DEFINE_UNQUOTED([USE_OPROFILE],$USE_OPROFILE,
+AC_DEFINE_UNQUOTED([LLVM_USE_OPROFILE],$USE_OPROFILE,
                    [Define if we have the oprofile JIT-support library])
 
+dnl Enable support for Intel JIT Events API.
+AC_ARG_WITH(intel-jitevents,
+  AS_HELP_STRING([--with-intel-jitevents=<vtune-amplifier-dir>],
+    [Specify location of run-time support library for Intel JIT API (default=/opt/intel/vtune_amplifier_xe_2011)]),
+    [
+      case $llvm_cv_os_type in
+        Linux|Win32|Cygwin|MingW) ;;
+        *)
+          AC_MSG_ERROR([
+            Intel JIT API support is available on Linux and Windows only."]) ;;
+      esac
+
+      AC_SUBST(USE_INTEL_JITEVENTS, [1])
+      case "$llvm_cv_target_arch" in
+        x86)    llvm_intel_jitevents_archdir="lib32";;
+        x86_64) llvm_intel_jitevents_archdir="lib64";;
+        *)      echo "Target architecture $llvm_cv_target_arch does not support Intel JIT Events API"
+                exit -1;;
+      esac
+      INTEL_JITEVENTS_INCDIR="/opt/intel/vtune_amplifier_xe_2011/include"
+      INTEL_JITEVENTS_LIBDIR="/opt/intel/vtune_amplifier_xe_2011/$llvm_intel_jitevents_archdir"
+      case "$withval" in
+        /* | [[A-Za-z]]:[[\\/]]*) INTEL_JITEVENTS_INCDIR=$withval/include
+                                  INTEL_JITEVENTS_LIBDIR=$withval/$llvm_intel_jitevents_archdir ;;
+        *) ;;
+      esac
+
+      AC_SUBST(INTEL_JITEVENTS_INCDIR)
+      AC_SUBST(INTEL_JITEVENTS_LIBDIR)
+
+      LIBS="$LIBS -L${INTEL_JITEVENTS_LIBDIR}"
+      CPPFLAGS="$CPPFLAGS -I$INTEL_JITEVENTS_INCDIR"
+
+      AC_SEARCH_LIBS(iJIT_IsProfilingActive, jitprofiling, [], [
+        echo "Error! Cannot find libjitprofiling.a. Please check path specified in flag --with-intel-jitevents"
+        exit -1
+      ])
+      AC_CHECK_HEADER([jitprofiling.h], [], [
+        echo "Error! Cannot find jitprofiling.h. Please check path specified in flag --with-intel-jitevents"
+        exit -1
+      ])
+
+    ],
+    [
+      AC_SUBST(USE_INTEL_JITEVENTS, [0])
+    ])
+AC_DEFINE_UNQUOTED([LLVM_USE_INTEL_JITEVENTS],$USE_INTEL_JITEVENTS,
+                   [Define if we have the Intel JIT API runtime support library])
+
 dnl===-----------------------------------------------------------------------===
 dnl===
 dnl=== SECTION 6: Check for header files
@@ -1224,7 +1276,7 @@ AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h])
 AC_CHECK_HEADERS([sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h])
 AC_CHECK_HEADERS([valgrind/valgrind.h])
 AC_CHECK_HEADERS([fenv.h])
-if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
   AC_CHECK_HEADERS(pthread.h,
                    AC_SUBST(HAVE_PTHREAD, 1),
                    AC_SUBST(HAVE_PTHREAD, 0))
@@ -1282,7 +1334,7 @@ AC_CHECK_FUNCS([backtrace ceilf floorf roundf rintf nearbyintf getcwd ])
 AC_CHECK_FUNCS([powf fmodf strtof round ])
 AC_CHECK_FUNCS([getpagesize getrusage getrlimit setrlimit gettimeofday ])
 AC_CHECK_FUNCS([isatty mkdtemp mkstemp ])
-AC_CHECK_FUNCS([mktemp posix_spawn realpath sbrk setrlimit strdup ])
+AC_CHECK_FUNCS([mktemp posix_spawn pread realpath sbrk setrlimit strdup ])
 AC_CHECK_FUNCS([strerror strerror_r setenv ])
 AC_CHECK_FUNCS([strtoll strtoq sysconf malloc_zone_statistics ])
 AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp writev])
@@ -1405,9 +1457,9 @@ dnl Propagate the run-time library path variable that the libltdl
 dnl checks found to the Makefiles so we can use it there too
 AC_SUBST(SHLIBPATH_VAR,$libltdl_cv_shlibpath_var)
 
-# Translate the various configuration directories and other basic
-# information into substitutions that will end up in Makefile.config.in
-# that these configured values can be used by the makefiles
+dnl Translate the various configuration directories and other basic
+dnl information into substitutions that will end up in Makefile.config.in
+dnl that these configured values can be used by the makefiles
 if test "${prefix}" = "NONE" ; then
   prefix="/usr/local"
 fi
@@ -1432,8 +1484,13 @@ AC_SUBST(LLVM_INFODIR)
 AC_SUBST(LLVM_MANDIR)
 AC_SUBST(LLVM_CONFIGTIME)
 
-# Place the various directores into the config.h file as #defines so that we
-# can know about the installation paths within LLVM.
+dnl Disable embedding timestamps in the build directory, with ENABLE_TIMESTAMPS.
+if test "${ENABLE_TIMESTAMPS}" = "0"; then
+  LLVM_CONFIGTIME="(timestamp not enabled)"
+fi
+
+dnl Place the various directories into the config.h file as #defines so that we
+dnl can know about the installation paths within LLVM.
 AC_DEFINE_UNQUOTED(LLVM_PREFIX,"$LLVM_PREFIX",
                    [Installation prefix directory])
 AC_DEFINE_UNQUOTED(LLVM_BINDIR, "$LLVM_BINDIR",
@@ -1454,10 +1511,10 @@ AC_DEFINE_UNQUOTED(LLVM_MANDIR, "$LLVM_MANDIR",
                    [Installation directory for man pages])
 AC_DEFINE_UNQUOTED(LLVM_CONFIGTIME, "$LLVM_CONFIGTIME",
                    [Time at which LLVM was configured])
-AC_DEFINE_UNQUOTED(LLVM_HOSTTRIPLE, "$host",
-                   [Host triple we were built on])
+AC_DEFINE_UNQUOTED(LLVM_DEFAULT_TARGET_TRIPLE, "$target",
+                   [Target triple LLVM will generate code for by default])
 
-# Determine which bindings to build.
+dnl Determine which bindings to build.
 if test "$BINDINGS_TO_BUILD" = auto ; then
   BINDINGS_TO_BUILD=""
   if test "x$OCAMLC" != x -a "x$OCAMLDEP" != x ; then
@@ -1466,11 +1523,11 @@ if test "$BINDINGS_TO_BUILD" = auto ; then
 fi
 AC_SUBST(BINDINGS_TO_BUILD,$BINDINGS_TO_BUILD)
 
-# This isn't really configurey, but it avoids having to repeat the list in
-# other files.
+dnl This isn't really configurey, but it avoids having to repeat the list in
+dnl other files.
 AC_SUBST(ALL_BINDINGS,ocaml)
 
-# Do any work necessary to ensure that bindings have what they need.
+dnl Do any work necessary to ensure that bindings have what they need.
 binding_prereqs_failed=0
 for a_binding in $BINDINGS_TO_BUILD ; do
   case "$a_binding" in
@@ -1534,7 +1591,7 @@ dnl===-----------------------------------------------------------------------===
 
 dnl Configure header files
 dnl WARNING: dnl If you add or remove any of the following config headers, then
-dnl you MUST also update Makefile.rules so that the variable FilesToConfig
+dnl you MUST also update Makefile so that the variable FilesToConfig
 dnl contains the same list of files as AC_CONFIG_HEADERS below. This ensures the
 dnl files can be updated automatically when their *.in sources change.
 AC_CONFIG_HEADERS([include/llvm/Config/config.h include/llvm/Config/llvm-config.h])
@@ -1556,12 +1613,18 @@ AC_CONFIG_FILES([llvm.spec])
 
 dnl Configure doxygen's configuration file
 AC_CONFIG_FILES([docs/doxygen.cfg])
-if test -f ${srcdir}/tools/clang/README.txt; then
-  AC_CONFIG_FILES([tools/clang/docs/doxygen.cfg])
-fi
 
-dnl Do the first stage of configuration for llvm-config.in.
-AC_CONFIG_FILES([tools/llvm-config/llvm-config.in])
+dnl Configure clang, if present
+if test "${clang_src_root}" = ""; then
+  clang_src_root="$srcdir/tools/clang"
+fi
+if test -f ${clang_src_root}/README.txt; then
+  dnl Use variables to stay under 80 columns.
+  configh="include/clang/Config/config.h"
+  doxy="docs/doxygen.cfg"
+  AC_CONFIG_HEADERS([tools/clang/${configh}:${clang_src_root}/${configh}.in])
+  AC_CONFIG_FILES([tools/clang/${doxy}:${clang_src_root}/${doxy}.in])
+fi
 
 dnl OCaml findlib META file
 AC_CONFIG_FILES([bindings/ocaml/llvm/META.llvm])
diff --git a/autoconf/ltmain.sh b/autoconf/ltmain.sh
index 06823e057a57..2455278a4d75 100644
--- a/autoconf/ltmain.sh
+++ b/autoconf/ltmain.sh
@@ -4668,7 +4668,7 @@ static const void *lt_preloaded_setup() {
 	  # linked before any other PIC object.  But we must not use
 	  # pic_flag when linking with -static.  The problem exists in
 	  # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
-	  *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
+	  *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
 	    case "$compile_command " in
 	    *" -static "*) ;;
 	    *) pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND";;
diff --git a/autoconf/m4/cxx_flag_check.m4 b/autoconf/m4/cxx_flag_check.m4
index ab09f2af5cf0..62454b7147f9 100644
--- a/autoconf/m4/cxx_flag_check.m4
+++ b/autoconf/m4/cxx_flag_check.m4
@@ -1,2 +1,2 @@
 AC_DEFUN([CXX_FLAG_CHECK],
-  [AC_SUBST($1, `$CXX $2 -fsyntax-only -xc /dev/null 2>/dev/null && echo $2`)])
+  [AC_SUBST($1, `$CXX -Werror $2 -fsyntax-only -xc /dev/null 2>/dev/null && echo $2`)])
diff --git a/autoconf/m4/func_isinf.m4 b/autoconf/m4/func_isinf.m4
index c936bf920d02..5c000f8fad91 100644
--- a/autoconf/m4/func_isinf.m4
+++ b/autoconf/m4/func_isinf.m4
@@ -19,7 +19,7 @@ fi
 
 AC_SINGLE_CXX_CHECK([ac_cv_func_std_isinf_in_cmath],
                     [std::isinf], [<cmath>],
-                    [float f; std::isinf(f)}])
+                    [float f; std::isinf(f);])
 if test "$ac_cv_func_std_isinf_in_cmath" = "yes" ; then 
   AC_DEFINE([HAVE_STD_ISINF_IN_CMATH],1,[Set to 1 if the std::isinf function is found in <cmath>])
 fi
diff --git a/autoconf/m4/huge_val.m4 b/autoconf/m4/huge_val.m4
index 5fffbfc8d37c..9dc76f22350c 100644
--- a/autoconf/m4/huge_val.m4
+++ b/autoconf/m4/huge_val.m4
@@ -6,7 +6,7 @@ AC_DEFUN([AC_HUGE_VAL_CHECK],[
   AC_CACHE_CHECK([for HUGE_VAL sanity], [ac_cv_huge_val_sanity],[
     AC_LANG_PUSH([C++])
     ac_save_CXXFLAGS=$CXXFLAGS
-    CXXFLAGS=-pedantic
+    CXXFLAGS="$CXXFLAGS -pedantic"
     AC_RUN_IFELSE(
       AC_LANG_PROGRAM(
         [#include <math.h>],
diff --git a/autoconf/m4/libtool.m4 b/autoconf/m4/libtool.m4
index e89738cc9129..36ac3d15def6 100644
--- a/autoconf/m4/libtool.m4
+++ b/autoconf/m4/libtool.m4
@@ -1384,7 +1384,7 @@ dgux*)
   shlibpath_var=LD_LIBRARY_PATH
   ;;
 
-freebsd1*)
+freebsd1.*)
   dynamic_linker=no
   ;;
 
@@ -1407,7 +1407,7 @@ freebsd* | dragonfly*)
     objformat=`/usr/bin/objformat`
   else
     case $host_os in
-    freebsd[[123]]*) objformat=aout ;;
+    freebsd[[123]].*) objformat=aout ;;
     *) objformat=elf ;;
     esac
   fi
@@ -1425,7 +1425,7 @@ freebsd* | dragonfly*)
   esac
   shlibpath_var=LD_LIBRARY_PATH
   case $host_os in
-  freebsd2*)
+  freebsd2.*)
     shlibpath_overrides_runpath=yes
     ;;
   freebsd3.[[01]]* | freebsdelf3.[[01]]*)
@@ -3099,7 +3099,7 @@ case $host_os in
 	;;
     esac
     ;;
-  freebsd[[12]]*)
+  freebsd[[12]].*)
     # C++ shared libraries reported to be fairly broken before switch to ELF
     _LT_AC_TAGVAR(ld_shlibs, $1)=no
     ;;
@@ -5858,7 +5858,7 @@ _LT_EOF
       _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
       ;;
 
-    freebsd1*)
+    freebsd1.*)
       _LT_AC_TAGVAR(ld_shlibs, $1)=no
       ;;
 
@@ -5874,7 +5874,7 @@ _LT_EOF
       ;;
 
     # Unfortunately, older versions of FreeBSD 2 do not have this feature.
-    freebsd2*)
+    freebsd2.*)
       _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
       _LT_AC_TAGVAR(hardcode_direct, $1)=yes
       _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
diff --git a/autoconf/m4/link_options.m4 b/autoconf/m4/link_options.m4
index 4c5f2f435d04..57da4a0d9269 100644
--- a/autoconf/m4/link_options.m4
+++ b/autoconf/m4/link_options.m4
@@ -10,7 +10,7 @@ AC_DEFUN([AC_LINK_GET_VERSION],
 
    # Check for ld64.
    if (echo "$version_string" | grep -q "ld64"); then
-     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#.*ld64-\([^ ]*\)#\1#")
+     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#.*ld64-\([^ ]*\)\( (.*)\)\{0,1\}#\1#")
    else
      llvm_cv_link_version=$(echo "$version_string" | sed -e "s#[^0-9]*\([0-9.]*\).*#\1#")
    fi
diff --git a/autoconf/m4/path_perl.m4 b/autoconf/m4/path_perl.m4
deleted file mode 100644
index 406656cb0322..000000000000
--- a/autoconf/m4/path_perl.m4
+++ /dev/null
@@ -1,16 +0,0 @@
-dnl Check for a reasonable version of Perl.
-dnl   $1 - Minimum Perl version.  Typically 5.006.
-dnl 
-AC_DEFUN([LLVM_PROG_PERL], [
-AC_PATH_PROG(PERL, [perl], [none])
-if test "$PERL" != "none"; then
-  AC_MSG_CHECKING(for Perl $1 or newer)
-  if $PERL -e 'use $1;' 2>&1 > /dev/null; then
-    AC_MSG_RESULT(yes)
-  else
-    PERL=none
-    AC_MSG_RESULT(not found)
-  fi
-fi
-])
-
diff --git a/autoconf/m4/visibility_inlines_hidden.m4 b/autoconf/m4/visibility_inlines_hidden.m4
index 42ddbe9128b3..b1cc42aa5f53 100644
--- a/autoconf/m4/visibility_inlines_hidden.m4
+++ b/autoconf/m4/visibility_inlines_hidden.m4
@@ -8,8 +8,10 @@ AC_DEFUN([AC_CXX_USE_VISIBILITY_INLINES_HIDDEN],
                 [llvm_cv_cxx_visibility_inlines_hidden],
 [ AC_LANG_PUSH([C++])
   oldcxxflags="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -fvisibility-inlines-hidden"
-  AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
+  CXXFLAGS="$CXXFLAGS -O0 -fvisibility-inlines-hidden -Werror"
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+    [template <typename T> struct X { void __attribute__((noinline)) f() {} };],
+    [X<int>().f();])],
     [llvm_cv_cxx_visibility_inlines_hidden=yes],[llvm_cv_cxx_visibility_inlines_hidden=no])
   CXXFLAGS="$oldcxxflags"
   AC_LANG_POP([C++])
diff --git a/bindings/LLVMBuild.txt b/bindings/LLVMBuild.txt
new file mode 100644
index 000000000000..241ac0964f36
--- /dev/null
+++ b/bindings/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./bindings/LLVMBuild.txt ---------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Group
+name = Bindings
+parent = $ROOT
diff --git a/bindings/ocaml/Makefile.ocaml b/bindings/ocaml/Makefile.ocaml
index 40ecc9c08e09..a2a8b02eac7d 100644
--- a/bindings/ocaml/Makefile.ocaml
+++ b/bindings/ocaml/Makefile.ocaml
@@ -238,14 +238,14 @@ clean-cmis::
 # Also install the .mli's (headers) as documentation.
 install-cmis: $(OutputsCMI) $(OcamlHeaders)
 	$(Verb) $(MKDIR) $(PROJ_libocamldir)
-	$(Verb) for i in $(OutputsCMI:$(OcamlDir)/%=%); do \
-	  $(EchoCmd) "Installing $(BuildMode) $(PROJ_libocamldir)/$$i"; \
-	  $(DataInstall) $(OcamlDir)/$$i "$(PROJ_libocamldir)/$$i"; \
-	done
 	$(Verb) for i in $(OcamlHeaders:$(ObjDir)/%=%); do \
 	  $(EchoCmd) "Installing $(BuildMode) $(PROJ_libocamldir)/$$i"; \
 	  $(DataInstall) $(ObjDir)/$$i "$(PROJ_libocamldir)/$$i"; \
 	done
+	$(Verb) for i in $(OutputsCMI:$(OcamlDir)/%=%); do \
+	  $(EchoCmd) "Installing $(BuildMode) $(PROJ_libocamldir)/$$i"; \
+	  $(DataInstall) $(OcamlDir)/$$i "$(PROJ_libocamldir)/$$i"; \
+	done
 
 uninstall-cmis::
 	$(Verb) for i in $(OutputsCMI:$(OcamlDir)/%=%); do \
diff --git a/bindings/ocaml/llvm/META.llvm.in b/bindings/ocaml/llvm/META.llvm.in
index 29e7eb418efc..fdb325382373 100644
--- a/bindings/ocaml/llvm/META.llvm.in
+++ b/bindings/ocaml/llvm/META.llvm.in
@@ -1,6 +1,6 @@
 name = "llvm"
 version = "@PACKAGE_VERSION@"
-description = "Low Level Virtual Machine OCaml bindings"
+description = "LLVM OCaml bindings"
 archive(byte) = "llvm.cma"
 archive(native) = "llvm.cmxa"
 directory = "."
diff --git a/bindings/ocaml/llvm/Makefile b/bindings/ocaml/llvm/Makefile
index 673eaa2e35a9..203075a9bdde 100644
--- a/bindings/ocaml/llvm/Makefile
+++ b/bindings/ocaml/llvm/Makefile
@@ -30,11 +30,13 @@ copy-meta: $(OcamlDir)/META.llvm
 $(OcamlDir)/META.llvm: META.llvm
 	$(Verb) $(CP) -f $< $@
 
-install-meta:: $(ObjDir)/META.llvm
+install-meta:: $(OcamlDir)/META.llvm
 	$(Echo) "Install $(BuildMode) $(DestMETA)"
 	$(Verb) $(MKDIR) $(PROJ_libocamldir)
-	$(Verb) $(DataInstall) META.llvm "$(DestMETA)"
+	$(Verb) $(DataInstall) $< "$(DestMETA)"
 
 uninstall-meta::
 	$(Echo) "Uninstalling $(DestMETA)"
 	-$(Verb) $(RM) -f "$(DestMETA)"
+
+.PHONY: copy-meta install-meta uninstall-meta
diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml
index 40b013863667..b169b85bc99f 100644
--- a/bindings/ocaml/llvm/llvm.ml
+++ b/bindings/ocaml/llvm/llvm.ml
@@ -20,6 +20,7 @@ type llmemorybuffer
 module TypeKind = struct
   type t =
   | Void
+  | Half
   | Float
   | Double
   | X86fp80
@@ -1234,5 +1235,6 @@ let rec string_of_lltype ty =
   | TypeKind.X86fp80 -> "x86_fp80"
   | TypeKind.Double -> "double"
   | TypeKind.Float -> "float"
+  | TypeKind.Half -> "half"
   | TypeKind.Void -> "void"
   | TypeKind.Metadata -> "metadata"
diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli
index 33bbc74deb1b..96448ccd960d 100644
--- a/bindings/ocaml/llvm/llvm.mli
+++ b/bindings/ocaml/llvm/llvm.mli
@@ -53,6 +53,7 @@ type llmemorybuffer
 module TypeKind : sig
   type t =
     Void
+  | Half
   | Float
   | Double
   | X86fp80
diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c
index 86cc4bd01436..a5985d9d2b04 100644
--- a/bindings/ocaml/llvm/llvm_ocaml.c
+++ b/bindings/ocaml/llvm/llvm_ocaml.c
@@ -21,7 +21,6 @@
 #include "caml/memory.h"
 #include "caml/fail.h"
 #include "caml/callback.h"
-#include "llvm/Config/config.h"
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
@@ -1164,7 +1163,7 @@ CAMLprim value llvm_instr_get_opcode(LLVMValueRef Inst) {
   if (!LLVMIsAInstruction(Inst))
       failwith("Not an instruction");
   o = LLVMGetInstructionOpcode(Inst);
-  assert (o <= LLVMUnwind );
+  assert (o <= LLVMLandingPad);
   return Val_int(o);
 }
 
diff --git a/bindings/python/README.txt b/bindings/python/README.txt
new file mode 100644
index 000000000000..96e334319bde
--- /dev/null
+++ b/bindings/python/README.txt
@@ -0,0 +1,67 @@
+This directory contains Python bindings for LLVM's C library.
+
+The bindings are currently a work in progress and are far from complete.
+Use at your own risk.
+
+Developer Info
+==============
+
+The single Python package is "llvm." Modules inside this package roughly
+follow the names of the modules/headers defined by LLVM's C API.
+
+Testing
+-------
+
+All test code is location in llvm/tests. Tests are written as classes
+which inherit from llvm.tests.base.TestBase, which is a convenience base
+class that provides common functionality.
+
+Tests can be executed by installing nose:
+
+    pip install nosetests
+
+Then by running nosetests:
+
+    nosetests
+
+To see more output:
+
+    nosetests -v
+
+To step into the Python debugger while running a test, add the following
+to your test at the point you wish to enter the debugger:
+
+    import pdb; pdb.set_trace()
+
+Then run nosetests:
+
+    nosetests -s -v
+
+You should strive for high code coverage. To see current coverage:
+
+    pip install coverage
+    nosetests --with-coverage --cover-html
+
+Then open cover/index.html in your browser of choice to see the code coverage.
+
+Style Convention
+----------------
+
+All code should pass PyFlakes. First, install PyFlakes:
+
+    pip install pyflakes
+
+Then at any time run it to see a report:
+
+    pyflakes .
+
+Eventually we'll provide a Pylint config file. In the meantime, install
+Pylint:
+
+    pip install pylint
+
+And run:
+
+    pylint llvm
+
+And try to keep the number of violations to a minimum.
diff --git a/bindings/python/llvm/__init__.py b/bindings/python/llvm/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/bindings/python/llvm/__init__.py
diff --git a/bindings/python/llvm/common.py b/bindings/python/llvm/common.py
new file mode 100644
index 000000000000..0c5fcd03d844
--- /dev/null
+++ b/bindings/python/llvm/common.py
@@ -0,0 +1,106 @@
+#===- common.py - Python LLVM Bindings -----------------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+from ctypes import POINTER
+from ctypes import c_void_p
+from ctypes import cdll
+
+import ctypes.util
+
+__all__ = [
+    'c_object_p',
+    'find_library',
+    'get_library',
+]
+
+c_object_p = POINTER(c_void_p)
+
+class LLVMObject(object):
+    """Base class for objects that are backed by an LLVM data structure.
+
+    This class should never be instantiated outside of this package.
+    """
+    def __init__(self, ptr, ownable=True, disposer=None):
+        assert isinstance(ptr, c_object_p)
+
+        self._ptr = self._as_parameter_ = ptr
+
+        self._self_owned = True
+        self._ownable = ownable
+        self._disposer = disposer
+
+        self._owned_objects = []
+
+    def take_ownership(self, obj):
+        """Take ownership of another object.
+
+        When you take ownership of another object, you are responsible for
+        destroying that object. In addition, a reference to that object is
+        placed inside this object so the Python garbage collector will not
+        collect the object while it is still alive in libLLVM.
+
+        This method should likely only be called from within modules inside
+        this package.
+        """
+        assert isinstance(obj, LLVMObject)
+
+        self._owned_objects.append(obj)
+        obj._self_owned = False
+
+    def from_param(self):
+        """ctypes function that converts this object to a function parameter."""
+        return self._as_parameter_
+
+    def __del__(self):
+        if not hasattr(self, '_self_owned') or not hasattr(self, '_disposer'):
+            return
+
+        if self._self_owned and self._disposer:
+            self._disposer(self)
+
+class CachedProperty(object):
+    """Decorator that caches the result of a property lookup.
+
+    This is a useful replacement for @property. It is recommended to use this
+    decorator on properties that invoke C API calls for which the result of the
+    call will be idempotent.
+    """
+    def __init__(self, wrapped):
+        self.wrapped = wrapped
+        try:
+            self.__doc__ = wrapped.__doc__
+        except: # pragma: no cover
+            pass
+
+    def __get__(self, instance, instance_type=None):
+        if instance is None:
+            return self
+
+        value = self.wrapped(instance)
+        setattr(instance, self.wrapped.__name__, value)
+
+        return value
+
+def find_library():
+    # FIXME should probably have build system define absolute path of shared
+    # library at install time.
+    for lib in ['LLVM-3.1svn', 'libLLVM-3.1svn', 'LLVM', 'libLLVM']:
+        result = ctypes.util.find_library(lib)
+        if result:
+            return result
+
+    return None
+
+def get_library():
+    """Obtain a reference to the llvm library."""
+    lib = find_library()
+    if not lib:
+        raise Exception('LLVM shared library not found!')
+
+    return cdll.LoadLibrary(lib)
diff --git a/bindings/python/llvm/core.py b/bindings/python/llvm/core.py
new file mode 100644
index 000000000000..67566374256e
--- /dev/null
+++ b/bindings/python/llvm/core.py
@@ -0,0 +1,98 @@
+#===- core.py - Python LLVM Bindings -------------------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+from .common import LLVMObject
+from .common import c_object_p
+from .common import get_library
+
+from . import enumerations
+
+from ctypes import POINTER
+from ctypes import byref
+from ctypes import c_char_p
+
+__all__ = [
+    "lib",
+    "MemoryBuffer",
+]
+
+lib = get_library()
+
+class OpCode(object):
+    """Represents an individual OpCode enumeration."""
+
+    _value_map = {}
+
+    def __init__(self, name, value):
+        self.name = name
+        self.value = value
+
+    def __repr__(self):
+        return 'OpCode.%s' % self.name
+
+    @staticmethod
+    def from_value(value):
+        """Obtain an OpCode instance from a numeric value."""
+        result = OpCode._value_map.get(value, None)
+
+        if result is None:
+            raise ValueError('Unknown OpCode: %d' % value)
+
+        return result
+
+    @staticmethod
+    def register(name, value):
+        """Registers a new OpCode enumeration.
+
+        This is called by this module for each enumeration defined in
+        enumerations. You should not need to call this outside this module.
+        """
+        if value in OpCode._value_map:
+            raise ValueError('OpCode value already registered: %d' % value)
+
+        opcode = OpCode(name, value)
+        OpCode._value_map[value] = opcode
+        setattr(OpCode, name, opcode)
+
+class MemoryBuffer(LLVMObject):
+    """Represents an opaque memory buffer."""
+
+    def __init__(self, filename=None):
+        """Create a new memory buffer.
+
+        Currently, we support creating from the contents of a file at the
+        specified filename.
+        """
+        if filename is None:
+            raise Exception("filename argument must be defined")
+
+        memory = c_object_p()
+        out = c_char_p(None)
+
+        result = lib.LLVMCreateMemoryBufferWithContentsOfFile(filename,
+                byref(memory), byref(out))
+
+        if result:
+            raise Exception("Could not create memory buffer: %s" % out.value)
+
+        LLVMObject.__init__(self, memory, disposer=lib.LLVMDisposeMemoryBuffer)
+
+def register_library(library):
+    library.LLVMCreateMemoryBufferWithContentsOfFile.argtypes = [c_char_p,
+            POINTER(c_object_p), POINTER(c_char_p)]
+    library.LLVMCreateMemoryBufferWithContentsOfFile.restype = bool
+
+    library.LLVMDisposeMemoryBuffer.argtypes = [MemoryBuffer]
+
+def register_enumerations():
+    for name, value in enumerations.OpCodes:
+        OpCode.register(name, value)
+
+register_library(lib)
+register_enumerations()
diff --git a/bindings/python/llvm/disassembler.py b/bindings/python/llvm/disassembler.py
new file mode 100644
index 000000000000..5030b989a944
--- /dev/null
+++ b/bindings/python/llvm/disassembler.py
@@ -0,0 +1,134 @@
+#===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+from ctypes import CFUNCTYPE
+from ctypes import POINTER
+from ctypes import addressof
+from ctypes import byref
+from ctypes import c_byte
+from ctypes import c_char_p
+from ctypes import c_int
+from ctypes import c_size_t
+from ctypes import c_ubyte
+from ctypes import c_uint64
+from ctypes import c_void_p
+from ctypes import cast
+
+from .common import LLVMObject
+from .common import c_object_p
+from .common import get_library
+
+__all__ = [
+    'Disassembler',
+]
+
+lib = get_library()
+callbacks = {}
+
+class Disassembler(LLVMObject):
+    """Represents a disassembler instance.
+
+    Disassembler instances are tied to specific "triple," which must be defined
+    at creation time.
+
+    Disassembler instances can disassemble instructions from multiple sources.
+    """
+    def __init__(self, triple):
+        """Create a new disassembler instance.
+
+        The triple argument is the triple to create the disassembler for. This
+        is something like 'i386-apple-darwin9'.
+        """
+        ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0),
+                callbacks['op_info'](0), callbacks['symbol_lookup'](0))
+        if not ptr.contents:
+            raise Exception('Could not obtain disassembler for triple: %s' %
+                            triple)
+
+        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose)
+
+    def get_instruction(self, source, pc=0):
+        """Obtain the next instruction from an input source.
+
+        The input source should be a str or bytearray or something that
+        represents a sequence of bytes.
+
+        This function will start reading bytes from the beginning of the
+        source.
+
+        The pc argument specifies the address that the first byte is at.
+
+        This returns a 2-tuple of:
+
+          long number of bytes read. 0 if no instruction was read.
+          str representation of instruction. This will be the assembly that
+            represents the instruction.
+        """
+        buf = cast(c_char_p(source), POINTER(c_ubyte))
+        out_str = cast((c_byte * 255)(), c_char_p)
+
+        result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)),
+                                           c_uint64(pc), out_str, 255)
+
+        return (result, out_str.value)
+
+    def get_instructions(self, source, pc=0):
+        """Obtain multiple instructions from an input source.
+
+        This is like get_instruction() except it is a generator for all
+        instructions within the source. It starts at the beginning of the
+        source and reads instructions until no more can be read.
+
+        This generator returns 3-tuple of:
+
+          long address of instruction.
+          long size of instruction, in bytes.
+          str representation of instruction.
+        """
+        source_bytes = c_char_p(source)
+        out_str = cast((c_byte * 255)(), c_char_p)
+
+        # This could probably be written cleaner. But, it does work.
+        buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents
+        offset = 0
+        address = pc
+        end_address = pc + len(source)
+        while address < end_address:
+            b = cast(addressof(buf) + offset, POINTER(c_ubyte))
+            result = lib.LLVMDisasmInstruction(self, b,
+                    c_uint64(len(source) - offset), c_uint64(address),
+                    out_str, 255)
+
+            if result == 0:
+                break
+
+            yield (address, result, out_str.value)
+
+            address += result
+            offset += result
+
+
+def register_library(library):
+    library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
+        callbacks['op_info'], callbacks['symbol_lookup']]
+    library.LLVMCreateDisasm.restype = c_object_p
+
+    library.LLVMDisasmDispose.argtypes = [Disassembler]
+
+    library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte),
+            c_uint64, c_uint64, c_char_p, c_size_t]
+    library.LLVMDisasmInstruction.restype = c_size_t
+
+callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
+                                 c_int, c_void_p)
+callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
+                                       POINTER(c_uint64), c_uint64,
+                                       POINTER(c_char_p))
+
+register_library(lib)
diff --git a/bindings/python/llvm/enumerations.py b/bindings/python/llvm/enumerations.py
new file mode 100644
index 000000000000..f49d2faad351
--- /dev/null
+++ b/bindings/python/llvm/enumerations.py
@@ -0,0 +1,211 @@
+#===- enumerations.py - Python LLVM Enumerations -------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+r"""
+LLVM Enumerations
+=================
+
+This file defines enumerations from LLVM.
+
+Each enumeration is exposed as a list of 2-tuples. These lists are consumed by
+dedicated types elsewhere in the package. The enumerations are centrally
+defined in this file so they are easier to locate and maintain.
+"""
+
+__all__ = [
+    'Attributes',
+    'OpCodes',
+    'TypeKinds',
+    'Linkages',
+    'Visibility',
+    'CallConv',
+    'IntPredicate',
+    'RealPredicate',
+    'LandingPadClauseTy',
+]
+
+Attributes = [
+    ('ZExt', 1 << 0),
+    ('MSExt', 1 << 1),
+    ('NoReturn', 1 << 2),
+    ('InReg', 1 << 3),
+    ('StructRet', 1 << 4),
+    ('NoUnwind', 1 << 5),
+    ('NoAlias', 1 << 6),
+    ('ByVal', 1 << 7),
+    ('Nest', 1 << 8),
+    ('ReadNone', 1 << 9),
+    ('ReadOnly', 1 << 10),
+    ('NoInline', 1 << 11),
+    ('AlwaysInline', 1 << 12),
+    ('OptimizeForSize', 1 << 13),
+    ('StackProtect', 1 << 14),
+    ('StackProtectReq', 1 << 15),
+    ('Alignment', 31 << 16),
+    ('NoCapture', 1 << 21),
+    ('NoRedZone', 1 << 22),
+    ('ImplicitFloat', 1 << 23),
+    ('Naked', 1 << 24),
+    ('InlineHint', 1 << 25),
+    ('StackAlignment', 7 << 26),
+    ('ReturnsTwice', 1 << 29),
+    ('UWTable', 1 << 30),
+    ('NonLazyBind', 1 << 31),
+]
+
+OpCodes = [
+    ('Ret', 1),
+    ('Br', 2),
+    ('Switch', 3),
+    ('IndirectBr', 4),
+    ('Invoke', 5),
+    ('Unreachable', 7),
+    ('Add', 8),
+    ('FAdd', 9),
+    ('Sub', 10),
+    ('FSub', 11),
+    ('Mul', 12),
+    ('FMul', 13),
+    ('UDiv', 14),
+    ('SDiv', 15),
+    ('FDiv', 16),
+    ('URem', 17),
+    ('SRem', 18),
+    ('FRem', 19),
+    ('Shl', 20),
+    ('LShr', 21),
+    ('AShr', 22),
+    ('And', 23),
+    ('Or', 24),
+    ('Xor', 25),
+    ('Alloca', 26),
+    ('Load', 27),
+    ('Store', 28),
+    ('GetElementPtr', 29),
+    ('Trunc', 30),
+    ('ZExt', 31),
+    ('SExt', 32),
+    ('FPToUI', 33),
+    ('FPToSI', 34),
+    ('UIToFP', 35),
+    ('SIToFP', 36),
+    ('FPTrunc', 37),
+    ('FPExt', 38),
+    ('PtrToInt', 39),
+    ('IntToPtr', 40),
+    ('BitCast', 41),
+    ('ICmp', 42),
+    ('FCmpl', 43),
+    ('PHI', 44),
+    ('Call', 45),
+    ('Select', 46),
+    ('UserOp1', 47),
+    ('UserOp2', 48),
+    ('AArg', 49),
+    ('ExtractElement', 50),
+    ('InsertElement', 51),
+    ('ShuffleVector', 52),
+    ('ExtractValue', 53),
+    ('InsertValue', 54),
+    ('Fence', 55),
+    ('AtomicCmpXchg', 56),
+    ('AtomicRMW', 57),
+    ('Resume', 58),
+    ('LandingPad', 59),
+]
+
+TypeKinds = [
+    ('Void', 0),
+    ('Half', 1),
+    ('Float', 2),
+    ('Double', 3),
+    ('X86_FP80', 4),
+    ('FP128', 5),
+    ('PPC_FP128', 6),
+    ('Label', 7),
+    ('Integer', 8),
+    ('Function', 9),
+    ('Struct', 10),
+    ('Array', 11),
+    ('Pointer', 12),
+    ('Vector', 13),
+    ('Metadata', 14),
+    ('X86_MMX', 15),
+]
+
+Linkages = [
+    ('External', 0),
+    ('AvailableExternally', 1),
+    ('LinkOnceAny', 2),
+    ('LinkOnceODR', 3),
+    ('WeakAny', 4),
+    ('WeakODR', 5),
+    ('Appending', 6),
+    ('Internal', 7),
+    ('Private', 8),
+    ('DLLImport', 9),
+    ('DLLExport', 10),
+    ('ExternalWeak', 11),
+    ('Ghost', 12),
+    ('Common', 13),
+    ('LinkerPrivate', 14),
+    ('LinkerPrivateWeak', 15),
+    ('LinkerPrivateWeakDefAuto', 16),
+]
+
+Visibility = [
+    ('Default', 0),
+    ('Hidden', 1),
+    ('Protected', 2),
+]
+
+CallConv = [
+    ('CCall', 0),
+    ('FastCall', 8),
+    ('ColdCall', 9),
+    ('X86StdcallCall', 64),
+    ('X86FastcallCall', 65),
+]
+
+IntPredicate = [
+    ('EQ', 32),
+    ('NE', 33),
+    ('UGT', 34),
+    ('UGE', 35),
+    ('ULT', 36),
+    ('ULE', 37),
+    ('SGT', 38),
+    ('SGE', 39),
+    ('SLT', 40),
+    ('SLE', 41),
+]
+
+RealPredicate = [
+    ('PredicateFalse', 0),
+    ('OEQ', 1),
+    ('OGT', 2),
+    ('OGE', 3),
+    ('OLT', 4),
+    ('OLE', 5),
+    ('ONE', 6),
+    ('ORD', 7),
+    ('UNO', 8),
+    ('UEQ', 9),
+    ('UGT', 10),
+    ('UGE', 11),
+    ('ULT', 12),
+    ('ULE', 13),
+    ('UNE', 14),
+    ('PredicateTrue', 15),
+]
+
+LandingPadClauseTy = [
+    ('Catch', 0),
+    ('Filter', 1),
+]
diff --git a/bindings/python/llvm/object.py b/bindings/python/llvm/object.py
new file mode 100644
index 000000000000..473aa3a1089e
--- /dev/null
+++ b/bindings/python/llvm/object.py
@@ -0,0 +1,523 @@
+#===- object.py - Python Object Bindings --------------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+r"""
+Object File Interface
+=====================
+
+This module provides an interface for reading information from object files
+(e.g. binary executables and libraries).
+
+Using this module, you can obtain information about an object file's sections,
+symbols, and relocations. These are represented by the classes ObjectFile,
+Section, Symbol, and Relocation, respectively.
+
+Usage
+-----
+
+The only way to use this module is to start by creating an ObjectFile. You can
+create an ObjectFile by loading a file (specified by its path) or by creating a
+llvm.core.MemoryBuffer and loading that.
+
+Once you have an object file, you can inspect its sections and symbols directly
+by calling get_sections() and get_symbols() respectively. To inspect
+relocations, call get_relocations() on a Section instance.
+
+Iterator Interface
+------------------
+
+The LLVM bindings expose iteration over sections, symbols, and relocations in a
+way that only allows one instance to be operated on at a single time. This is
+slightly annoying from a Python perspective, as it isn't very Pythonic to have
+objects that "expire" but are still active from a dynamic language.
+
+To aid working around this limitation, each Section, Symbol, and Relocation
+instance caches its properties after first access. So, if the underlying
+iterator is advanced, the properties can still be obtained provided they have
+already been retrieved.
+
+In addition, we also provide a "cache" method on each class to cache all
+available data. You can call this on each obtained instance. Or, you can pass
+cache=True to the appropriate get_XXX() method to have this done for you.
+
+Here are some examples on how to perform iteration:
+
+    obj = ObjectFile(filename='/bin/ls')
+
+    # This is OK. Each Section is only accessed inside its own iteration slot.
+    section_names = []
+    for section in obj.get_sections():
+        section_names.append(section.name)
+
+    # This is NOT OK. You perform a lookup after the object has expired.
+    symbols = list(obj.get_symbols())
+    for symbol in symbols:
+        print symbol.name # This raises because the object has expired.
+
+    # In this example, we mix a working and failing scenario.
+    symbols = []
+    for symbol in obj.get_symbols():
+        symbols.append(symbol)
+        print symbol.name
+
+    for symbol in symbols:
+        print symbol.name # OK
+        print symbol.address # NOT OK. We didn't look up this property before.
+
+    # Cache everything up front.
+    symbols = list(obj.get_symbols(cache=True))
+    for symbol in symbols:
+        print symbol.name # OK
+
+"""
+
+from ctypes import c_char_p
+from ctypes import c_uint64
+
+from .common import CachedProperty
+from .common import LLVMObject
+from .common import c_object_p
+from .common import get_library
+from .core import MemoryBuffer
+
+__all__ = [
+    "lib",
+    "ObjectFile",
+    "Relocation",
+    "Section",
+    "Symbol",
+]
+
+class ObjectFile(LLVMObject):
+    """Represents an object/binary file."""
+
+    def __init__(self, filename=None, contents=None):
+        """Construct an instance from a filename or binary data.
+
+        filename must be a path to a file that can be opened with open().
+        contents can be either a native Python buffer type (like str) or a
+        llvm.core.MemoryBuffer instance.
+        """
+        if contents:
+            assert isinstance(contents, MemoryBuffer)
+
+        if filename is not None:
+            contents = MemoryBuffer(filename=filename)
+
+        if contents is None:
+            raise Exception('No input found.')
+
+        ptr = lib.LLVMCreateObjectFile(contents)
+        LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile)
+        self.take_ownership(contents)
+
+    def get_sections(self, cache=False):
+        """Obtain the sections in this object file.
+
+        This is a generator for llvm.object.Section instances.
+
+        Sections are exposed as limited-use objects. See the module's
+        documentation on iterators for more.
+        """
+        sections = lib.LLVMGetSections(self)
+        last = None
+        while True:
+            if lib.LLVMIsSectionIteratorAtEnd(self, sections):
+                break
+
+            last = Section(sections)
+            if cache:
+                last.cache()
+
+            yield last
+
+            lib.LLVMMoveToNextSection(sections)
+            last.expire()
+
+        if last is not None:
+            last.expire()
+
+        lib.LLVMDisposeSectionIterator(sections)
+
+    def get_symbols(self, cache=False):
+        """Obtain the symbols in this object file.
+
+        This is a generator for llvm.object.Symbol instances.
+
+        Each Symbol instance is a limited-use object. See this module's
+        documentation on iterators for more.
+        """
+        symbols = lib.LLVMGetSymbols(self)
+        last = None
+        while True:
+            if lib.LLVMIsSymbolIteratorAtEnd(self, symbols):
+                break
+
+            last = Symbol(symbols, self)
+            if cache:
+                last.cache()
+
+            yield last
+
+            lib.LLVMMoveToNextSymbol(symbols)
+            last.expire()
+
+        if last is not None:
+            last.expire()
+
+        lib.LLVMDisposeSymbolIterator(symbols)
+
+class Section(LLVMObject):
+    """Represents a section in an object file."""
+
+    def __init__(self, ptr):
+        """Construct a new section instance.
+
+        Section instances can currently only be created from an ObjectFile
+        instance. Therefore, this constructor should not be used outside of
+        this module.
+        """
+        LLVMObject.__init__(self, ptr)
+
+        self.expired = False
+
+    @CachedProperty
+    def name(self):
+        """Obtain the string name of the section.
+
+        This is typically something like '.dynsym' or '.rodata'.
+        """
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        return lib.LLVMGetSectionName(self)
+
+    @CachedProperty
+    def size(self):
+        """The size of the section, in long bytes."""
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        return lib.LLVMGetSectionSize(self)
+
+    @CachedProperty
+    def contents(self):
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        return lib.LLVMGetSectionContents(self)
+
+    @CachedProperty
+    def address(self):
+        """The address of this section, in long bytes."""
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        return lib.LLVMGetSectionAddress(self)
+
+    def has_symbol(self, symbol):
+        """Returns whether a Symbol instance is present in this Section."""
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        assert isinstance(symbol, Symbol)
+        return lib.LLVMGetSectionContainsSymbol(self, symbol)
+
+    def get_relocations(self, cache=False):
+        """Obtain the relocations in this Section.
+
+        This is a generator for llvm.object.Relocation instances.
+
+        Each instance is a limited used object. See this module's documentation
+        on iterators for more.
+        """
+        if self.expired:
+            raise Exception('Section instance has expired.')
+
+        relocations = lib.LLVMGetRelocations(self)
+        last = None
+        while True:
+            if lib.LLVMIsRelocationIteratorAtEnd(self, relocations):
+                break
+
+            last = Relocation(relocations)
+            if cache:
+                last.cache()
+
+            yield last
+
+            lib.LLVMMoveToNextRelocation(relocations)
+            last.expire()
+
+        if last is not None:
+            last.expire()
+
+        lib.LLVMDisposeRelocationIterator(relocations)
+
+    def cache(self):
+        """Cache properties of this Section.
+
+        This can be called as a workaround to the single active Section
+        limitation. When called, the properties of the Section are fetched so
+        they are still available after the Section has been marked inactive.
+        """
+        getattr(self, 'name')
+        getattr(self, 'size')
+        getattr(self, 'contents')
+        getattr(self, 'address')
+
+    def expire(self):
+        """Expire the section.
+
+        This is called internally by the section iterator.
+        """
+        self.expired = True
+
+class Symbol(LLVMObject):
+    """Represents a symbol in an object file."""
+    def __init__(self, ptr, object_file):
+        assert isinstance(ptr, c_object_p)
+        assert isinstance(object_file, ObjectFile)
+
+        LLVMObject.__init__(self, ptr)
+
+        self.expired = False
+        self._object_file = object_file
+
+    @CachedProperty
+    def name(self):
+        """The str name of the symbol.
+
+        This is often a function or variable name. Keep in mind that name
+        mangling could be in effect.
+        """
+        if self.expired:
+            raise Exception('Symbol instance has expired.')
+
+        return lib.LLVMGetSymbolName(self)
+
+    @CachedProperty
+    def address(self):
+        """The address of this symbol, in long bytes."""
+        if self.expired:
+            raise Exception('Symbol instance has expired.')
+
+        return lib.LLVMGetSymbolAddress(self)
+
+    @CachedProperty
+    def file_offset(self):
+        """The offset of this symbol in the file, in long bytes."""
+        if self.expired:
+            raise Exception('Symbol instance has expired.')
+
+        return lib.LLVMGetSymbolFileOffset(self)
+
+    @CachedProperty
+    def size(self):
+        """The size of the symbol, in long bytes."""
+        if self.expired:
+            raise Exception('Symbol instance has expired.')
+
+        return lib.LLVMGetSymbolSize(self)
+
+    @CachedProperty
+    def section(self):
+        """The Section to which this Symbol belongs.
+
+        The returned Section instance does not expire, unlike Sections that are
+        commonly obtained through iteration.
+
+        Because this obtains a new section iterator each time it is accessed,
+        calling this on a number of Symbol instances could be expensive.
+        """
+        sections = lib.LLVMGetSections(self._object_file)
+        lib.LLVMMoveToContainingSection(sections, self)
+
+        return Section(sections)
+
+    def cache(self):
+        """Cache all cacheable properties."""
+        getattr(self, 'name')
+        getattr(self, 'address')
+        getattr(self, 'file_offset')
+        getattr(self, 'size')
+
+    def expire(self):
+        """Mark the object as expired to prevent future API accesses.
+
+        This is called internally by this module and it is unlikely that
+        external callers have a legitimate reason for using it.
+        """
+        self.expired = True
+
+class Relocation(LLVMObject):
+    """Represents a relocation definition."""
+    def __init__(self, ptr):
+        """Create a new relocation instance.
+
+        Relocations are created from objects derived from Section instances.
+        Therefore, this constructor should not be called outside of this
+        module. See Section.get_relocations() for the proper method to obtain
+        a Relocation instance.
+        """
+        assert isinstance(ptr, c_object_p)
+
+        LLVMObject.__init__(self, ptr)
+
+        self.expired = False
+
+    @CachedProperty
+    def address(self):
+        """The address of this relocation, in long bytes."""
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        return lib.LLVMGetRelocationAddress(self)
+
+    @CachedProperty
+    def offset(self):
+        """The offset of this relocation, in long bytes."""
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        return lib.LLVMGetRelocationOffset(self)
+
+    @CachedProperty
+    def symbol(self):
+        """The Symbol corresponding to this Relocation."""
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        ptr = lib.LLVMGetRelocationSymbol(self)
+        return Symbol(ptr)
+
+    @CachedProperty
+    def type_number(self):
+        """The relocation type, as a long."""
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        return lib.LLVMGetRelocationType(self)
+
+    @CachedProperty
+    def type_name(self):
+        """The relocation type's name, as a str."""
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        return lib.LLVMGetRelocationTypeName(self)
+
+    @CachedProperty
+    def value_string(self):
+        if self.expired:
+            raise Exception('Relocation instance has expired.')
+
+        return lib.LLVMGetRelocationValueString(self)
+
+    def expire(self):
+        """Expire this instance, making future API accesses fail."""
+        self.expired = True
+
+    def cache(self):
+        """Cache all cacheable properties on this instance."""
+        getattr(self, 'address')
+        getattr(self, 'offset')
+        getattr(self, 'symbol')
+        getattr(self, 'type')
+        getattr(self, 'type_name')
+        getattr(self, 'value_string')
+
+def register_library(library):
+    """Register function prototypes with LLVM library instance."""
+
+    # Object.h functions
+    library.LLVMCreateObjectFile.argtypes = [MemoryBuffer]
+    library.LLVMCreateObjectFile.restype = c_object_p
+
+    library.LLVMDisposeObjectFile.argtypes = [ObjectFile]
+
+    library.LLVMGetSections.argtypes = [ObjectFile]
+    library.LLVMGetSections.restype = c_object_p
+
+    library.LLVMDisposeSectionIterator.argtypes = [c_object_p]
+
+    library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
+    library.LLVMIsSectionIteratorAtEnd.restype = bool
+
+    library.LLVMMoveToNextSection.argtypes = [c_object_p]
+
+    library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p]
+
+    library.LLVMGetSymbols.argtypes = [ObjectFile]
+    library.LLVMGetSymbols.restype = c_object_p
+
+    library.LLVMDisposeSymbolIterator.argtypes = [c_object_p]
+
+    library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p]
+    library.LLVMIsSymbolIteratorAtEnd.restype = bool
+
+    library.LLVMMoveToNextSymbol.argtypes = [c_object_p]
+
+    library.LLVMGetSectionName.argtypes = [c_object_p]
+    library.LLVMGetSectionName.restype = c_char_p
+
+    library.LLVMGetSectionSize.argtypes = [c_object_p]
+    library.LLVMGetSectionSize.restype = c_uint64
+
+    library.LLVMGetSectionContents.argtypes = [c_object_p]
+    library.LLVMGetSectionContents.restype = c_char_p
+
+    library.LLVMGetSectionAddress.argtypes = [c_object_p]
+    library.LLVMGetSectionAddress.restype = c_uint64
+
+    library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p]
+    library.LLVMGetSectionContainsSymbol.restype = bool
+
+    library.LLVMGetRelocations.argtypes = [c_object_p]
+    library.LLVMGetRelocations.restype = c_object_p
+
+    library.LLVMDisposeRelocationIterator.argtypes = [c_object_p]
+
+    library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p]
+    library.LLVMIsRelocationIteratorAtEnd.restype = bool
+
+    library.LLVMMoveToNextRelocation.argtypes = [c_object_p]
+
+    library.LLVMGetSymbolName.argtypes = [Symbol]
+    library.LLVMGetSymbolName.restype = c_char_p
+
+    library.LLVMGetSymbolAddress.argtypes = [Symbol]
+    library.LLVMGetSymbolAddress.restype = c_uint64
+
+    library.LLVMGetSymbolFileOffset.argtypes = [Symbol]
+    library.LLVMGetSymbolFileOffset.restype = c_uint64
+
+    library.LLVMGetSymbolSize.argtypes = [Symbol]
+    library.LLVMGetSymbolSize.restype = c_uint64
+
+    library.LLVMGetRelocationAddress.argtypes = [c_object_p]
+    library.LLVMGetRelocationAddress.restype = c_uint64
+
+    library.LLVMGetRelocationOffset.argtypes = [c_object_p]
+    library.LLVMGetRelocationOffset.restype = c_uint64
+
+    library.LLVMGetRelocationSymbol.argtypes = [c_object_p]
+    library.LLVMGetRelocationSymbol.restype = c_object_p
+
+    library.LLVMGetRelocationType.argtypes = [c_object_p]
+    library.LLVMGetRelocationType.restype = c_uint64
+
+    library.LLVMGetRelocationTypeName.argtypes = [c_object_p]
+    library.LLVMGetRelocationTypeName.restype = c_char_p
+
+    library.LLVMGetRelocationValueString.argtypes = [c_object_p]
+    library.LLVMGetRelocationValueString.restype = c_char_p
+
+lib = get_library()
+register_library(lib)
diff --git a/bindings/python/llvm/tests/__init__.py b/bindings/python/llvm/tests/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/bindings/python/llvm/tests/__init__.py
diff --git a/bindings/python/llvm/tests/base.py b/bindings/python/llvm/tests/base.py
new file mode 100644
index 000000000000..ff9eb2fc1438
--- /dev/null
+++ b/bindings/python/llvm/tests/base.py
@@ -0,0 +1,32 @@
+import os.path
+import unittest
+
+POSSIBLE_TEST_BINARIES = [
+    'libreadline.so.5',
+    'libreadline.so.6',
+]
+
+POSSIBLE_TEST_BINARY_PATHS = [
+    '/usr/lib/debug',
+    '/lib',
+    '/usr/lib',
+    '/usr/local/lib',
+    '/lib/i386-linux-gnu',
+]
+
+class TestBase(unittest.TestCase):
+    def get_test_binary(self):
+        """Helper to obtain a test binary for object file testing.
+
+        FIXME Support additional, highly-likely targets or create one
+        ourselves.
+        """
+        for d in POSSIBLE_TEST_BINARY_PATHS:
+            for lib in POSSIBLE_TEST_BINARIES:
+                path = os.path.join(d, lib)
+
+                if os.path.exists(path):
+                    return path
+
+        raise Exception('No suitable test binaries available!')
+    get_test_binary.__test__ = False
diff --git a/bindings/python/llvm/tests/test_core.py b/bindings/python/llvm/tests/test_core.py
new file mode 100644
index 000000000000..545abc826ea7
--- /dev/null
+++ b/bindings/python/llvm/tests/test_core.py
@@ -0,0 +1,23 @@
+from .base import TestBase
+from ..core import OpCode
+from ..core import MemoryBuffer
+
+class TestCore(TestBase):
+    def test_opcode(self):
+        self.assertTrue(hasattr(OpCode, 'Ret'))
+        self.assertTrue(isinstance(OpCode.Ret, OpCode))
+        self.assertEqual(OpCode.Ret.value, 1)
+
+        op = OpCode.from_value(1)
+        self.assertTrue(isinstance(op, OpCode))
+        self.assertEqual(op, OpCode.Ret)
+
+    def test_memory_buffer_create_from_file(self):
+        source = self.get_test_binary()
+
+        MemoryBuffer(filename=source)
+
+    def test_memory_buffer_failing(self):
+        with self.assertRaises(Exception):
+            MemoryBuffer(filename="/hopefully/this/path/doesnt/exist")
+
diff --git a/bindings/python/llvm/tests/test_disassembler.py b/bindings/python/llvm/tests/test_disassembler.py
new file mode 100644
index 000000000000..545e8668b6c9
--- /dev/null
+++ b/bindings/python/llvm/tests/test_disassembler.py
@@ -0,0 +1,28 @@
+from .base import TestBase
+
+from ..disassembler import Disassembler
+
+class TestDisassembler(TestBase):
+    def test_instantiate(self):
+         Disassembler('i686-apple-darwin9')
+
+    def test_basic(self):
+        sequence = '\x67\xe3\x81' # jcxz -127
+        triple = 'i686-apple-darwin9'
+
+        disassembler = Disassembler(triple)
+
+        count, s = disassembler.get_instruction(sequence)
+        self.assertEqual(count, 3)
+        self.assertEqual(s, '\tjcxz\t-127')
+
+    def test_get_instructions(self):
+        sequence = '\x67\xe3\x81\x01\xc7' # jcxz -127; addl %eax, %edi
+
+        disassembler = Disassembler('i686-apple-darwin9')
+
+        instructions = list(disassembler.get_instructions(sequence))
+        self.assertEqual(len(instructions), 2)
+
+        self.assertEqual(instructions[0], (0, 3, '\tjcxz\t-127'))
+        self.assertEqual(instructions[1], (3, 2, '\taddl\t%eax, %edi'))
diff --git a/bindings/python/llvm/tests/test_object.py b/bindings/python/llvm/tests/test_object.py
new file mode 100644
index 000000000000..7ff981b6a2f7
--- /dev/null
+++ b/bindings/python/llvm/tests/test_object.py
@@ -0,0 +1,67 @@
+from .base import TestBase
+from ..object import ObjectFile
+from ..object import Relocation
+from ..object import Section
+from ..object import Symbol
+
+class TestObjectFile(TestBase):
+    def get_object_file(self):
+        source = self.get_test_binary()
+        return ObjectFile(filename=source)
+
+    def test_create_from_file(self):
+        self.get_object_file()
+
+    def test_get_sections(self):
+        o = self.get_object_file()
+
+        count = 0
+        for section in o.get_sections():
+            count += 1
+            assert isinstance(section, Section)
+            assert isinstance(section.name, str)
+            assert isinstance(section.size, long)
+            assert isinstance(section.contents, str)
+            assert isinstance(section.address, long)
+
+        self.assertGreater(count, 0)
+
+        for section in o.get_sections():
+            section.cache()
+
+    def test_get_symbols(self):
+        o = self.get_object_file()
+
+        count = 0
+        for symbol in o.get_symbols():
+            count += 1
+            assert isinstance(symbol, Symbol)
+            assert isinstance(symbol.name, str)
+            assert isinstance(symbol.address, long)
+            assert isinstance(symbol.size, long)
+            assert isinstance(symbol.file_offset, long)
+
+        self.assertGreater(count, 0)
+
+        for symbol in o.get_symbols():
+            symbol.cache()
+
+    def test_symbol_section_accessor(self):
+        o = self.get_object_file()
+
+        for symbol in o.get_symbols():
+            section = symbol.section
+            assert isinstance(section, Section)
+
+            break
+
+    def test_get_relocations(self):
+        o = self.get_object_file()
+        for section in o.get_sections():
+            for relocation in section.get_relocations():
+                assert isinstance(relocation, Relocation)
+                assert isinstance(relocation.address, long)
+                assert isinstance(relocation.offset, long)
+                assert isinstance(relocation.type_number, long)
+                assert isinstance(relocation.type_name, str)
+                assert isinstance(relocation.value_string, str)
diff --git a/build-for-llvm-top.sh b/build-for-llvm-top.sh
deleted file mode 100755
index 78e3ed87f092..000000000000
--- a/build-for-llvm-top.sh
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/bin/sh
-
-# This includes the Bourne shell library from llvm-top. Since this file is
-# generally only used when building from llvm-top, it is safe to assume that
-# llvm is checked out into llvm-top in which case .. just works.
-. ../library.sh
-
-# Process the options passed in to us by the build script into standard
-# variables. 
-process_arguments "$@"
-
-# First, see if the build directory is there. If not, create it.
-build_dir="$LLVM_TOP/build.llvm"
-if test ! -d "$build_dir" ; then
-  mkdir -p "$build_dir"
-fi
-
-# See if we have previously been configured by sensing the presence
-# of the config.status scripts
-config_status="$build_dir/config.status"
-if test ! -f "$config_status" -o "$config_status" -ot "$0" ; then
-  # We must configure so build a list of configure options
-  config_options="--prefix=$PREFIX --with-llvmgccdir=$PREFIX"
-  if test "$OPTIMIZED" -eq 1 ; then
-    config_options="$config_options --enable-optimized"
-  else
-    config_options="$config_options --disable-optimized"
-  fi
-  if test "$DEBUG" -eq 1 ; then
-    config_options="$config_options --enable-debug"
-  else
-    config_options="$config_options --disable-debug"
-  fi
-  if test "$ASSERTIONS" -eq 1 ; then
-    config_options="$config_options --enable-assertions"
-  else
-    config_options="$config_options --disable-assertions"
-  fi
-  if test "$CHECKING" -eq 1 ; then
-    config_options="$config_options --enable-expensive-checks"
-  else
-    config_options="$config_options --disable-expensive-checks"
-  fi
-  if test "$DOXYGEN" -eq 1 ; then
-    config_options="$config_options --enable-doxygen"
-  else
-    config_options="$config_options --disable-doxygen"
-  fi
-  if test "$THREADS" -eq 1 ; then
-    config_options="$config_options --enable-threads"
-  else
-    config_options="$config_options --disable-threads"
-  fi
-  config_options="$config_options $OPTIONS_DASH $OPTIONS_DASH_DASH"
-  src_dir=`pwd`
-  cd "$build_dir"
-  msg 0 Configuring $module with:
-  msg 0 "  $src_dir/configure" $config_options
-  $src_dir/configure $config_options || \
-    die $? "Configuring $module module failed"
-else
-  msg 0 Module $module already configured, ignoring configure options.
-  cd "$build_dir"
-fi
-
-msg 0 Building $module with:
-msg 0 "  make" $OPTIONS_ASSIGN tools-only
-make $OPTIONS_ASSIGN tools-only
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 0381dbf49635..57ae79a92fdd 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -94,7 +94,7 @@ endif()
 check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
 check_symbol_exists(getrusage sys/resource.h HAVE_GETRUSAGE)
 check_symbol_exists(setrlimit sys/resource.h HAVE_SETRLIMIT)
-check_function_exists(isatty HAVE_ISATTY)
+check_symbol_exists(isatty unistd.h HAVE_ISATTY)
 check_symbol_exists(index strings.h HAVE_INDEX)
 check_symbol_exists(isinf cmath HAVE_ISINF_IN_CMATH)
 check_symbol_exists(isinf math.h HAVE_ISINF_IN_MATH_H)
@@ -126,6 +126,8 @@ check_symbol_exists(readdir "sys/types.h;dirent.h" HAVE_READDIR)
 check_symbol_exists(getcwd unistd.h HAVE_GETCWD)
 check_symbol_exists(gettimeofday sys/time.h HAVE_GETTIMEOFDAY)
 check_symbol_exists(getrlimit "sys/types.h;sys/time.h;sys/resource.h" HAVE_GETRLIMIT)
+check_symbol_exists(posix_spawn spawn.h HAVE_POSIX_SPAWN)
+check_symbol_exists(pread unistd.h HAVE_PREAD)
 check_symbol_exists(rindex strings.h HAVE_RINDEX)
 check_symbol_exists(strchr string.h HAVE_STRCHR)
 check_symbol_exists(strcmp string.h HAVE_STRCMP)
@@ -285,16 +287,18 @@ include(CheckCXXCompilerFlag)
 
 check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG)
 
-include(GetTargetTriple)
-get_target_triple(LLVM_HOSTTRIPLE)
+include(GetHostTriple)
+get_host_triple(LLVM_HOST_TRIPLE)
 
-# FIXME: We don't distinguish the target and the host. :(
-set(TARGET_TRIPLE "${LLVM_HOSTTRIPLE}")
+# By default, we target the host, but this can be overridden at CMake
+# invocation time.
+set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_HOST_TRIPLE}")
+set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}")
 
 # Determine the native architecture.
 string(TOLOWER "${LLVM_TARGET_ARCH}" LLVM_NATIVE_ARCH)
 if( LLVM_NATIVE_ARCH STREQUAL "host" )
-  string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_HOSTTRIPLE})
+  string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_HOST_TRIPLE})
 endif ()
 
 if (LLVM_NATIVE_ARCH MATCHES "i[2-6]86")
@@ -309,8 +313,6 @@ elseif (LLVM_NATIVE_ARCH MATCHES "sparc")
   set(LLVM_NATIVE_ARCH Sparc)
 elseif (LLVM_NATIVE_ARCH MATCHES "powerpc")
   set(LLVM_NATIVE_ARCH PowerPC)
-elseif (LLVM_NATIVE_ARCH MATCHES "alpha")
-  set(LLVM_NATIVE_ARCH Alpha)
 elseif (LLVM_NATIVE_ARCH MATCHES "arm")
   set(LLVM_NATIVE_ARCH ARM)
 elseif (LLVM_NATIVE_ARCH MATCHES "mips")
@@ -333,6 +335,16 @@ else ()
   set(LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo)
   set(LLVM_NATIVE_TARGETMC LLVMInitialize${LLVM_NATIVE_ARCH}TargetMC)
   set(LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter)
+
+  # We don't have an ASM parser for all architectures yet.
+  if (EXISTS ${CMAKE_SOURCE_DIR}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/CMakeLists.txt)
+    set(LLVM_NATIVE_ASMPARSER LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser)
+  endif ()
+
+  # We don't have an disassembler for all architectures yet.
+  if (EXISTS ${CMAKE_SOURCE_DIR}/lib/Target/${LLVM_NATIVE_ARCH}/Disassembler/CMakeLists.txt)
+    set(LLVM_NATIVE_DISASSEMBLER LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler)
+  endif ()
 endif ()
 
 if( MINGW )
@@ -380,14 +392,15 @@ endif( PURE_WINDOWS )
 set(RETSIGTYPE void)
 
 if( LLVM_ENABLE_THREADS )
-  if( HAVE_PTHREAD_H OR WIN32 )
-    set(ENABLE_THREADS 1)
+  # Check if threading primitives aren't supported on this platform
+  if( NOT HAVE_PTHREAD_H AND NOT WIN32 )
+    set(LLVM_ENABLE_THREADS 0)
   endif()
 endif()
 
-if( ENABLE_THREADS )
+if( LLVM_ENABLE_THREADS )
   message(STATUS "Threads enabled.")
-else( ENABLE_THREADS )
+else( LLVM_ENABLE_THREADS )
   message(STATUS "Threads disabled.")
 endif()
 
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index b486fe4d8214..388208b6a8af 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -25,16 +25,15 @@ macro(add_llvm_library name)
       ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
   endif()
   set_target_properties(${name} PROPERTIES FOLDER "Libraries")
-endmacro(add_llvm_library name)
-
-macro(add_llvm_library_dependencies name)
-  # Save the dependencies of the LLVM library in a variable so that we can
-  # query it when resolve llvm-config-style component -> library mappings.
-  set_property(GLOBAL PROPERTY LLVM_LIB_DEPS_${name} ${ARGN})
 
-  # Then add the actual dependencies to the library target.
-  target_link_libraries(${name} ${ARGN})
-endmacro(add_llvm_library_dependencies name)
+  # Add the explicit dependency information for this library.
+  #
+  # It would be nice to verify that we have the dependencies for this library
+  # name, but using get_property(... SET) doesn't suffice to determine if a
+  # property has been set to an empty value.
+  get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${name})
+  target_link_libraries(${name} ${lib_deps})
+endmacro(add_llvm_library name)
 
 macro(add_llvm_loadable_module name)
   if( NOT LLVM_ON_UNIX OR CYGWIN )
diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt
index 2dcfa141026e..f51e9af8db0d 100644
--- a/cmake/modules/CMakeLists.txt
+++ b/cmake/modules/CMakeLists.txt
@@ -4,9 +4,9 @@ set(LLVM_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX})
 get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
 
 foreach(lib ${llvm_libs})
-  get_property(llvm_lib_deps GLOBAL PROPERTY LLVM_LIB_DEPS_${lib})
+  get_property(llvm_lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${lib})
   set(all_llvm_lib_deps
-    "${all_llvm_lib_deps}\nset_property(GLOBAL PROPERTY LLVM_LIB_DEPS_${lib} ${llvm_lib_deps})")
+    "${all_llvm_lib_deps}\nset_property(GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${lib} ${llvm_lib_deps})")
 endforeach(lib)
 
 configure_file(
@@ -32,6 +32,6 @@ install(DIRECTORY .
   PATTERN LLVMConfig.cmake EXCLUDE
   PATTERN LLVMConfigVersion.cmake EXCLUDE
   PATTERN LLVM-Config.cmake EXCLUDE
-  PATTERN GetTargetTriple.cmake EXCLUDE
+  PATTERN GetHostTriple.cmake EXCLUDE
   PATTERN VersionFromVCS.cmake EXCLUDE
   PATTERN CheckAtomic.cmake EXCLUDE)
diff --git a/cmake/modules/ChooseMSVCCRT.cmake b/cmake/modules/ChooseMSVCCRT.cmake
index eb78f45c885a..6a2f426b2694 100644
--- a/cmake/modules/ChooseMSVCCRT.cmake
+++ b/cmake/modules/ChooseMSVCCRT.cmake
@@ -60,7 +60,7 @@ variables (LLVM_USE_CRT_DEBUG, etc) instead.")
 
   make_crt_regex(MSVC_CRT_REGEX ${MSVC_CRT})
 
-  foreach(build_type ${CMAKE_CONFIGURATION_TYPES})
+  foreach(build_type ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE})
     string(TOUPPER "${build_type}" build)
     if (NOT LLVM_USE_CRT_${build})
       get_current_crt(LLVM_USE_CRT_${build}
@@ -75,7 +75,7 @@ variables (LLVM_USE_CRT_DEBUG, etc) instead.")
     endif(NOT LLVM_USE_CRT_${build})
   endforeach(build_type)
 
-  foreach(build_type ${CMAKE_CONFIGURATION_TYPES})
+  foreach(build_type ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE})
     string(TOUPPER "${build_type}" build)
     if ("${LLVM_USE_CRT_${build}}" STREQUAL "")
       set(flag_string " ")
diff --git a/cmake/modules/GetHostTriple.cmake b/cmake/modules/GetHostTriple.cmake
new file mode 100644
index 000000000000..671a8ce7d7ce
--- /dev/null
+++ b/cmake/modules/GetHostTriple.cmake
@@ -0,0 +1,30 @@
+# Returns the host triple.
+# Invokes config.guess
+
+function( get_host_triple var )
+  if( MSVC )
+    if( CMAKE_CL_64 )
+      set( value "x86_64-pc-win32" )
+    else()
+      set( value "i686-pc-win32" )
+    endif()
+  elseif( MINGW AND NOT MSYS )
+    if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
+      set( value "x86_64-w64-mingw32" )
+    else()
+      set( value "i686-pc-mingw32" )
+    endif()
+  else( MSVC )
+    set(config_guess ${LLVM_MAIN_SRC_DIR}/autoconf/config.guess)
+    execute_process(COMMAND sh ${config_guess}
+      RESULT_VARIABLE TT_RV
+      OUTPUT_VARIABLE TT_OUT
+      OUTPUT_STRIP_TRAILING_WHITESPACE)
+    if( NOT TT_RV EQUAL 0 )
+      message(FATAL_ERROR "Failed to execute ${config_guess}")
+    endif( NOT TT_RV EQUAL 0 )
+    set( value ${TT_OUT} )
+  endif( MSVC )
+  set( ${var} ${value} PARENT_SCOPE )
+  message(STATUS "Target triple: ${value}")
+endfunction( get_host_triple var )
diff --git a/cmake/modules/GetTargetTriple.cmake b/cmake/modules/GetTargetTriple.cmake
deleted file mode 100644
index f4321c9b67ec..000000000000
--- a/cmake/modules/GetTargetTriple.cmake
+++ /dev/null
@@ -1,30 +0,0 @@
-# Returns the host triple.
-# Invokes config.guess
-
-function( get_target_triple var )
-  if( MSVC )
-    if( CMAKE_CL_64 )
-      set( value "x86_64-pc-win32" )
-    else()
-      set( value "i686-pc-win32" )
-    endif()
-  elseif( MINGW AND NOT MSYS )
-    if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
-      set( value "x86_64-w64-mingw32" )
-    else()
-      set( value "i686-pc-mingw32" )
-    endif()
-  else( MSVC )
-    set(config_guess ${LLVM_MAIN_SRC_DIR}/autoconf/config.guess)
-    execute_process(COMMAND sh ${config_guess}
-      RESULT_VARIABLE TT_RV
-      OUTPUT_VARIABLE TT_OUT
-      OUTPUT_STRIP_TRAILING_WHITESPACE)
-    if( NOT TT_RV EQUAL 0 )
-      message(FATAL_ERROR "Failed to execute ${config_guess}")
-    endif( NOT TT_RV EQUAL 0 )
-    set( value ${TT_OUT} )
-  endif( MSVC )
-  set( ${var} ${value} PARENT_SCOPE )
-  message(STATUS "Target triple: ${value}")
-endfunction( get_target_triple var )
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index 9dc1624f446c..3a10a861d67d 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -1,3 +1,7 @@
+# This CMake module is responsible for interpreting the user defined LLVM_
+# options and executing the appropriate CMake commands to realize the users'
+# selections.
+
 include(AddLLVMDefinitions)
 
 if( CMAKE_COMPILER_IS_GNUCXX )
@@ -20,13 +24,6 @@ else()
   set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}")
 endif()
 
-set(LIT_ARGS_DEFAULT "-sv")
-if (MSVC OR XCODE)
-  set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
-endif()
-set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}"
-    CACHE STRING "Default options for lit")
-
 if( LLVM_ENABLE_ASSERTIONS )
   # MSVC doesn't like _DEBUG on release builds. See PR 4379.
   if( NOT MSVC )
@@ -52,9 +49,6 @@ if(WIN32)
   else(CYGWIN)
     set(LLVM_ON_WIN32 1)
     set(LLVM_ON_UNIX 0)
-
-    # This is effective only on Win32 hosts to use gnuwin32 tools.
-    set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
   endif(CYGWIN)
   set(LTDL_SHLIB_EXT ".dll")
   set(EXEEXT ".exe")
@@ -82,7 +76,7 @@ if( LLVM_ENABLE_PIC )
     # Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't
     # know how to disable this, so just force ENABLE_PIC off for now.
     message(WARNING "-fPIC not supported with Xcode.")
-  elseif( WIN32 )
+  elseif( WIN32 OR CYGWIN)
     # On Windows all code is PIC. MinGW warns if -fPIC is used.
   else()
     include(CheckCXXCompilerFlag)
@@ -94,12 +88,20 @@ if( LLVM_ENABLE_PIC )
     else( SUPPORTS_FPIC_FLAG )
       message(WARNING "-fPIC not supported.")
     endif()
+
+    if( WIN32 OR CYGWIN)
+      # MinGW warns if -fvisibility-inlines-hidden is used.
+    else()
+      check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG)
+      if( SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG )
+        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden")
+      endif()
+     endif()
   endif()
 endif()
 
 if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
   # TODO: support other platforms and toolchains.
-  option(LLVM_BUILD_32_BITS "Build 32 bits executables and libraries." OFF)
   if( LLVM_BUILD_32_BITS )
     message(STATUS "Building 32 bits executables and libraries.")
     add_llvm_definitions( -m32 )
@@ -134,6 +136,10 @@ endif()
 if( MSVC )
   include(ChooseMSVCCRT)
 
+  if( MSVC11 )
+    add_llvm_definitions(-D_VARIADIC_MAX=10)
+  endif()
+
   # Add definitions that make MSVC much less annoying.
   add_llvm_definitions(
     # For some reason MS wants to deprecate a bunch of standard functions...
@@ -180,6 +186,10 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
     if (LLVM_ENABLE_PEDANTIC)
       add_llvm_definitions( -pedantic -Wno-long-long )
     endif (LLVM_ENABLE_PEDANTIC)
+    check_cxx_compiler_flag("-Werror -Wcovered-switch-default" SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG)
+    if( SUPPORTS_COVERED_SWITCH_DEFAULT_FLAG )
+      add_llvm_definitions( -Wcovered-switch-default )
+    endif()
   endif (LLVM_ENABLE_WARNINGS)
   if (LLVM_ENABLE_WERROR)
     add_llvm_definitions( -Werror )
@@ -189,5 +199,3 @@ endif( MSVC )
 add_llvm_definitions( -D__STDC_CONSTANT_MACROS )
 add_llvm_definitions( -D__STDC_FORMAT_MACROS )
 add_llvm_definitions( -D__STDC_LIMIT_MACROS )
-
-option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON)
diff --git a/cmake/modules/LLVM-Config.cmake b/cmake/modules/LLVM-Config.cmake
index b5f262a24da8..574335c49d0e 100755
--- a/cmake/modules/LLVM-Config.cmake
+++ b/cmake/modules/LLVM-Config.cmake
@@ -152,7 +152,7 @@ function(explicit_map_components_to_libraries out_libs)
   set(processed)
   while( cursor LESS lst_size )
     list(GET expanded_components ${cursor} lib)
-    get_property(lib_deps GLOBAL PROPERTY LLVM_LIB_DEPS_${lib})
+    get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${lib})
     list(APPEND expanded_components ${lib_deps})
     # Remove duplicates at the front:
     list(REVERSE expanded_components)
diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in
index 6b202b2e7e87..443ec4153280 100644
--- a/cmake/modules/LLVMConfig.cmake.in
+++ b/cmake/modules/LLVMConfig.cmake.in
@@ -1,5 +1,7 @@
 # This file provides information and services to the final user.
 
+set(LLVM_VERSION_MAJOR @LLVM_VERSION_MAJOR@)
+set(LLVM_VERSION_MINOR @LLVM_VERSION_MINOR@)
 set(LLVM_PACKAGE_VERSION @PACKAGE_VERSION@)
 
 set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@)
diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake
index 3dc820b4abfa..1b1b1728d744 100644
--- a/cmake/modules/TableGen.cmake
+++ b/cmake/modules/TableGen.cmake
@@ -51,6 +51,7 @@ function(add_public_tablegen_target target)
     add_custom_target(${target}
       DEPENDS ${TABLEGEN_OUTPUT})
     add_dependencies(${target} ${LLVM_COMMON_DEPENDS})
+    set_target_properties(${target} PROPERTIES FOLDER "Tablegenning")
   endif( TABLEGEN_OUTPUT )
 endfunction()
 
@@ -76,7 +77,10 @@ endif()
 macro(add_tablegen target project)
   set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR})
 
+  set(${target}_OLD_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS})
+  set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen)
   add_llvm_utility(${target} ${ARGN})
+  set(LLVM_LINK_COMPONENTS ${${target}_OLD_LLVM_LINK_COMPONENTS})
 
   set(${project}_TABLEGEN "${target}" CACHE
       STRING "Native TableGen executable. Saves building one when cross-compiling.")
@@ -110,7 +114,6 @@ macro(add_tablegen target project)
     endif()
   endif()
 
-  target_link_libraries(${target} LLVMSupport LLVMTableGen)
   if( MINGW )
     target_link_libraries(${target} imagehlp psapi)
     if(CMAKE_SIZEOF_VOID_P MATCHES "8")
diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake
index 81739be927a4..d6a2ae5f45f5 100644
--- a/cmake/modules/VersionFromVCS.cmake
+++ b/cmake/modules/VersionFromVCS.cmake
@@ -3,7 +3,7 @@
 # existence of certain subdirectories under CMAKE_CURRENT_SOURCE_DIR.
 
 function(add_version_info_from_vcs VERS)
-  set(result ${${VERS}})
+  string(REPLACE "svn" "" result "${${VERS}}")
   if( EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.svn" )
     set(result "${result}svn")
     # FindSubversion does not work with symlinks. See PR 8437
@@ -13,6 +13,7 @@ function(add_version_info_from_vcs VERS)
     if( Subversion_FOUND )
       subversion_wc_info( ${CMAKE_CURRENT_SOURCE_DIR} Project )
       if( Project_WC_REVISION )
+        set(SVN_REVISION ${Project_WC_REVISION} PARENT_SCOPE)
         set(result "${result}-r${Project_WC_REVISION}")
       endif()
     endif()
@@ -21,24 +22,47 @@ function(add_version_info_from_vcs VERS)
     # Try to get a ref-id
     find_program(git_executable NAMES git git.exe git.cmd)
     if( git_executable )
-      execute_process(COMMAND ${git_executable} show-ref HEAD
+      set(is_git_svn_rev_exact false)
+      execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline
                       WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                       TIMEOUT 5
                       RESULT_VARIABLE git_result
                       OUTPUT_VARIABLE git_output)
       if( git_result EQUAL 0 )
-        string(SUBSTRING ${git_output} 0 7 git_ref_id)
-        set(result "${result}-${git_ref_id}")
-      else()
-        execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline
+        string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output})
+        string(LENGTH "${git_svn_rev}" rev_length)
+        math(EXPR rev_length "${rev_length}-1")
+        string(SUBSTRING "${git_svn_rev}" 1 ${rev_length} git_svn_rev_number)
+        set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE)
+        set(git_svn_rev "-svn-${git_svn_rev}")
+
+        # Determine if the HEAD points directly at a subversion revision.
+        execute_process(COMMAND ${git_executable} svn find-rev HEAD
                         WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
                         TIMEOUT 5
                         RESULT_VARIABLE git_result
                         OUTPUT_VARIABLE git_output)
         if( git_result EQUAL 0 )
-          string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output})
-          set(result "${result}-svn-${git_svn_rev}")
+          string(STRIP "${git_output}" git_head_svn_rev_number)
+          if( git_head_svn_rev_number EQUAL git_svn_rev_number )
+            set(is_git_svn_rev_exact true)
+          endif()
         endif()
+      else()
+        set(git_svn_rev "")
+      endif()
+      execute_process(COMMAND
+                      ${git_executable} rev-parse --short HEAD
+                      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+                      TIMEOUT 5
+                      RESULT_VARIABLE git_result
+                      OUTPUT_VARIABLE git_output)
+      if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact )
+        string(STRIP "${git_output}" git_ref_id)
+        set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
+        set(result "${result}${git_svn_rev}-${git_ref_id}")
+      else()
+        set(result "${result}${git_svn_rev}")
       endif()
     endif()
   endif()
diff --git a/configure b/configure
index fbd95ca78915..e87160d75c4c 100755
--- a/configure
+++ b/configure
@@ -1,21 +1,20 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.61 for llvm 3.0.
+# Generated by GNU Autoconf 2.60 for LLVM 3.1svn.
 #
-# Report bugs to <llvmbugs@cs.uiuc.edu>.
+# Report bugs to <http://llvm.org/bugs/>.
 #
 # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
 # 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
 #
-# Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign.
+# Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
 ## --------------------- ##
 ## M4sh Initialization.  ##
 ## --------------------- ##
 
-# Be more Bourne compatible
-DUALCASE=1; export DUALCASE # for MKS sh
+# Be Bourne compatible
 if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
   emulate sh
   NULLCMD=:
@@ -24,13 +23,10 @@ if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
   alias -g '${1+"$@"}'='"$@"'
   setopt NO_GLOB_SUBST
 else
-  case `(set -o) 2>/dev/null` in
-  *posix*) set -o posix ;;
-esac
-
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
 fi
-
-
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
 
 
 # PATH needs CR
@@ -223,7 +219,7 @@ test \$exitcode = 0) || { (exit 1); exit 1; }
 else
   as_candidate_shells=
     as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+for as_dir in /usr/bin/posix$PATH_SEPARATOR/bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
 do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
@@ -241,6 +237,7 @@ IFS=$as_save_IFS
 	 # Try only shells that exist, to save several forks.
 	 if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
 		{ ("$as_shell") 2> /dev/null <<\_ASEOF
+# Be Bourne compatible
 if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
   emulate sh
   NULLCMD=:
@@ -249,12 +246,10 @@ if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
   alias -g '${1+"$@"}'='"$@"'
   setopt NO_GLOB_SUBST
 else
-  case `(set -o) 2>/dev/null` in
-  *posix*) set -o posix ;;
-esac
-
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
 fi
-
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
 
 :
 _ASEOF
@@ -262,6 +257,7 @@ _ASEOF
   CONFIG_SHELL=$as_shell
 	       as_have_required=yes
 	       if { "$as_shell" 2> /dev/null <<\_ASEOF
+# Be Bourne compatible
 if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
   emulate sh
   NULLCMD=:
@@ -270,12 +266,10 @@ if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
   alias -g '${1+"$@"}'='"$@"'
   setopt NO_GLOB_SUBST
 else
-  case `(set -o) 2>/dev/null` in
-  *posix*) set -o posix ;;
-esac
-
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
 fi
-
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
 
 :
 (as_func_return () {
@@ -522,28 +516,19 @@ else
   as_mkdir_p=false
 fi
 
-if test -x / >/dev/null 2>&1; then
-  as_test_x='test -x'
+# Find out whether ``test -x'' works.  Don't use a zero-byte file, as
+# systems may use methods other than mode bits to determine executability.
+cat >conf$$.file <<_ASEOF
+#! /bin/sh
+exit 0
+_ASEOF
+chmod +x conf$$.file
+if test -x conf$$.file >/dev/null 2>&1; then
+  as_executable_p="test -x"
 else
-  if ls -dL / >/dev/null 2>&1; then
-    as_ls_L_option=L
-  else
-    as_ls_L_option=
-  fi
-  as_test_x='
-    eval sh -c '\''
-      if test -d "$1"; then
-        test -d "$1/.";
-      else
-	case $1 in
-        -*)set "./$1";;
-	esac;
-	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in
-	???[sx]*):;;*)false;;esac;fi
-    '\'' sh
-  '
+  as_executable_p=:
 fi
-as_executable_p=$as_test_x
+rm -f conf$$.file
 
 # Sed expression to map a string onto a valid CPP name.
 as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -574,46 +559,46 @@ MAKEFLAGS=
 SHELL=${CONFIG_SHELL-/bin/sh}
 
 # Identity of this package.
-PACKAGE_NAME='llvm'
-PACKAGE_TARNAME='-llvm-'
-PACKAGE_VERSION='3.0'
-PACKAGE_STRING='llvm 3.0'
-PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu'
+PACKAGE_NAME='LLVM'
+PACKAGE_TARNAME='llvm'
+PACKAGE_VERSION='3.1svn'
+PACKAGE_STRING='LLVM 3.1svn'
+PACKAGE_BUGREPORT='http://llvm.org/bugs/'
 
 ac_unique_file="lib/VMCore/Module.cpp"
 # Factoring default headers for most tests.
 ac_includes_default="\
 #include <stdio.h>
-#ifdef HAVE_SYS_TYPES_H
+#if HAVE_SYS_TYPES_H
 # include <sys/types.h>
 #endif
-#ifdef HAVE_SYS_STAT_H
+#if HAVE_SYS_STAT_H
 # include <sys/stat.h>
 #endif
-#ifdef STDC_HEADERS
+#if STDC_HEADERS
 # include <stdlib.h>
 # include <stddef.h>
 #else
-# ifdef HAVE_STDLIB_H
+# if HAVE_STDLIB_H
 #  include <stdlib.h>
 # endif
 #endif
-#ifdef HAVE_STRING_H
-# if !defined STDC_HEADERS && defined HAVE_MEMORY_H
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
 #  include <memory.h>
 # endif
 # include <string.h>
 #endif
-#ifdef HAVE_STRINGS_H
+#if HAVE_STRINGS_H
 # include <strings.h>
 #endif
-#ifdef HAVE_INTTYPES_H
+#if HAVE_INTTYPES_H
 # include <inttypes.h>
 #endif
-#ifdef HAVE_STDINT_H
+#if HAVE_STDINT_H
 # include <stdint.h>
 #endif
-#ifdef HAVE_UNISTD_H
+#if HAVE_UNISTD_H
 # include <unistd.h>
 #endif"
 
@@ -697,6 +682,7 @@ BUILD_CC
 BUILD_EXEEXT
 BUILD_CXX
 CVSBUILD
+ENABLE_LIBCPP
 ENABLE_OPTIMIZED
 ENABLE_PROFILING
 DISABLE_ASSERTIONS
@@ -708,7 +694,7 @@ JIT
 TARGET_HAS_JIT
 ENABLE_DOCS
 ENABLE_DOXYGEN
-ENABLE_THREADS
+LLVM_ENABLE_THREADS
 ENABLE_PTHREADS
 ENABLE_PIC
 ENABLE_SHARED
@@ -719,11 +705,12 @@ LLVM_ENUM_TARGETS
 LLVM_ENUM_ASM_PRINTERS
 LLVM_ENUM_ASM_PARSERS
 LLVM_ENUM_DISASSEMBLERS
-ENABLE_CBE_PRINTF_A
 OPTIMIZE_OPTION
 EXTRA_OPTIONS
 EXTRA_LD_OPTIONS
+CLANG_SRC_ROOT
 BINUTILS_INCDIR
+INTERNAL_PREFIX
 NM
 ifGNUmake
 LN_S
@@ -748,8 +735,6 @@ CIRCO
 GV
 DOTTY
 XDOT_PY
-PERL
-HAVE_PERL
 INSTALL_PROGRAM
 INSTALL_SCRIPT
 INSTALL_DATA
@@ -777,8 +762,12 @@ CONVENIENCE_LTDL_FALSE
 LIBADD_DL
 NO_VARIADIC_MACROS
 NO_MISSING_FIELD_INITIALIZERS
+COVERED_SWITCH_DEFAULT
 USE_UDIS86
 USE_OPROFILE
+USE_INTEL_JITEVENTS
+INTEL_JITEVENTS_INCDIR
+INTEL_JITEVENTS_LIBDIR
 HAVE_PTHREAD
 HUGE_VAL_SANITY
 MMAP_FILE
@@ -809,7 +798,6 @@ target_alias
 CC
 CFLAGS
 LDFLAGS
-LIBS
 CPPFLAGS
 CXX
 CXXFLAGS
@@ -933,10 +921,10 @@ do
   -disable-* | --disable-*)
     ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
     # Reject names that are not valid shell variable names.
-    expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+    expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
       { echo "$as_me: error: invalid feature name: $ac_feature" >&2
    { (exit 1); exit 1; }; }
-    ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'`
+    ac_feature=`echo $ac_feature | sed 's/-/_/g'`
     eval enable_$ac_feature=no ;;
 
   -docdir | --docdir | --docdi | --doc | --do)
@@ -952,10 +940,10 @@ do
   -enable-* | --enable-*)
     ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
     # Reject names that are not valid shell variable names.
-    expr "x$ac_feature" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+    expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
       { echo "$as_me: error: invalid feature name: $ac_feature" >&2
    { (exit 1); exit 1; }; }
-    ac_feature=`echo $ac_feature | sed 's/[-.]/_/g'`
+    ac_feature=`echo $ac_feature | sed 's/-/_/g'`
     eval enable_$ac_feature=\$ac_optarg ;;
 
   -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
@@ -1149,19 +1137,19 @@ do
   -with-* | --with-*)
     ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
     # Reject names that are not valid shell variable names.
-    expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+    expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
       { echo "$as_me: error: invalid package name: $ac_package" >&2
    { (exit 1); exit 1; }; }
-    ac_package=`echo $ac_package | sed 's/[-.]/_/g'`
+    ac_package=`echo $ac_package| sed 's/-/_/g'`
     eval with_$ac_package=\$ac_optarg ;;
 
   -without-* | --without-*)
     ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
     # Reject names that are not valid shell variable names.
-    expr "x$ac_package" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+    expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
       { echo "$as_me: error: invalid package name: $ac_package" >&2
    { (exit 1); exit 1; }; }
-    ac_package=`echo $ac_package | sed 's/[-.]/_/g'`
+    ac_package=`echo $ac_package | sed 's/-/_/g'`
     eval with_$ac_package=no ;;
 
   --x)
@@ -1330,7 +1318,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures llvm 3.0 to adapt to many kinds of systems.
+\`configure' configures LLVM 3.1svn to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1378,7 +1366,7 @@ Fine tuning of the installation directories:
   --infodir=DIR          info documentation [DATAROOTDIR/info]
   --localedir=DIR        locale-dependent data [DATAROOTDIR/locale]
   --mandir=DIR           man documentation [DATAROOTDIR/man]
-  --docdir=DIR           documentation root [DATAROOTDIR/doc/-llvm-]
+  --docdir=DIR           documentation root [DATAROOTDIR/doc/llvm]
   --htmldir=DIR          html documentation [DOCDIR]
   --dvidir=DIR           dvi documentation [DOCDIR]
   --pdfdir=DIR           pdf documentation [DOCDIR]
@@ -1396,7 +1384,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of llvm 3.0:";;
+     short | recursive ) echo "Configuration of LLVM 3.1svn:";;
    esac
   cat <<\_ACEOF
 
@@ -1404,10 +1392,11 @@ Optional Features:
   --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
   --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
   --enable-polly          Use polly if available (default is YES)
-  --enable-optimized      Compile with optimizations enabled (default is YES)
+  --enable-libcpp         Use libc++ if available (default is NO)
+  --enable-optimized      Compile with optimizations enabled (default is NO)
   --enable-profiling      Compile with profiling enabled (default is NO)
   --enable-assertions     Compile with assertion checks enabled (default is
-                          NO)
+                          YES)
   --enable-expensive-checks
                           Compile with expensive debug checks enabled (default
                           is NO)
@@ -1430,11 +1419,8 @@ Optional Features:
                           (default is YES)
   --enable-targets        Build specific host targets: all or
                           target1,target2,... Valid targets are: host, x86,
-                          x86_64, sparc, powerpc, alpha, arm, mips, spu,
-                          xcore, msp430, systemz, blackfin, ptx, cbe, and cpp
-                          (default=all)
-  --enable-cbe-printf-a   Enable C Backend output with hex floating point via
-                          %a (default is YES)
+                          x86_64, sparc, powerpc, arm, mips, spu, hexagon,
+                          xcore, msp430, ptx, and cpp (default=all)
   --enable-bindings       Build specific language bindings:
                           all,auto,none,{binding-name} (default=auto)
   --enable-libffi         Check for the presence of libffi (default is NO)
@@ -1449,32 +1435,32 @@ Optional Packages:
   --with-extra-ld-options Specify additional options to link LLVM with
   --with-ocaml-libdir     Specify install location for ocaml bindings (default
                           is stdlib)
+  --with-clang-srcdir     Directory to the out-of-tree Clang source
   --with-clang-resource-dir
                           Relative directory from the Clang binary for
                           resource files
   --with-c-include-dirs   Colon separated list of directories clang will
                           search for headers
-  --with-cxx-include-root Directory with the libstdc++ headers.
-  --with-cxx-include-arch Architecture of the libstdc++ headers.
-  --with-cxx-include-32bit-dir
-                          32 bit multilib dir.
-  --with-cxx-include-64bit-dir
-                          64 bit multilib directory.
+  --with-gcc-toolchain    Directory where gcc is installed.
   --with-binutils-include Specify path to binutils/include/ containing
                           plugin-api.h file for gold plugin.
   --with-bug-report-url   Specify the URL where bug reports should be
                           submitted (default=http://llvm.org/bugs/)
+  --with-internal-prefix  Installation directory for internal files
   --with-tclinclude       directory where tcl headers are
   --with-udis86=<path>    Use udis86 external x86 disassembler library
   --with-oprofile=<prefix>
                           Tell OProfile >= 0.9.4 how to symbolize JIT output
+  --with-intel-jitevents=<vtune-amplifier-dir>
+                          Specify location of run-time support library for
+                          Intel JIT API
+                          (default=/opt/intel/vtune_amplifier_xe_2011)
 
 Some influential environment variables:
   CC          C compiler command
   CFLAGS      C compiler flags
   LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
               nonstandard directory <lib dir>
-  LIBS        libraries to pass to the linker, e.g. -l<library>
   CPPFLAGS    C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
               you have headers in a nonstandard directory <include dir>
   CXX         C++ compiler command
@@ -1484,7 +1470,7 @@ Some influential environment variables:
 Use these variables to override the choices made by `configure' or to help
 it to find libraries and programs with nonstandard names/locations.
 
-Report bugs to <llvmbugs@cs.uiuc.edu>.
+Report bugs to <http://llvm.org/bugs/>.
 _ACEOF
 ac_status=$?
 fi
@@ -1545,15 +1531,15 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-llvm configure 3.0
-generated by GNU Autoconf 2.61
+LLVM configure 3.1svn
+generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 
-Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign.
 _ACEOF
   exit
 fi
@@ -1561,8 +1547,8 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by llvm $as_me 3.0, which was
-generated by GNU Autoconf 2.61.  Invocation command line was
+It was created by LLVM $as_me 3.1svn, which was
+generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
 
@@ -1915,7 +1901,17 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-LLVM_COPYRIGHT="Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign."
+cat >>confdefs.h <<\_ACEOF
+#define LLVM_VERSION_MAJOR 3
+_ACEOF
+
+
+cat >>confdefs.h <<\_ACEOF
+#define LLVM_VERSION_MINOR 1
+_ACEOF
+
+
+LLVM_COPYRIGHT="Copyright (c) 2003-2012 University of Illinois at Urbana-Champaign."
 
 
 
@@ -1987,7 +1983,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2031,7 +2027,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_ac_ct_CC="$ac_prog"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2170,7 +2166,7 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
 # in a Makefile.  We should not override ac_cv_exeext if it was cached,
 # so that the user can short-circuit this test for compilers unknown to
 # Autoconf.
-for ac_file in $ac_files ''
+for ac_file in $ac_files
 do
   test -f "$ac_file" || continue
   case $ac_file in
@@ -2198,12 +2194,6 @@ done
 test "$ac_cv_exeext" = no && ac_cv_exeext=
 
 else
-  ac_file=''
-fi
-
-{ echo "$as_me:$LINENO: result: $ac_file" >&5
-echo "${ECHO_T}$ac_file" >&6; }
-if test -z "$ac_file"; then
   echo "$as_me: failed program was:" >&5
 sed 's/^/| /' conftest.$ac_ext >&5
 
@@ -2215,6 +2205,8 @@ See \`config.log' for more details." >&2;}
 fi
 
 ac_exeext=$ac_cv_exeext
+{ echo "$as_me:$LINENO: result: $ac_file" >&5
+echo "${ECHO_T}$ac_file" >&6; }
 
 # Check that the compiler produces executables we can run.  If not, either
 # the compiler is broken, or we cross compile.
@@ -2392,10 +2384,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_compiler_gnu=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -2450,10 +2459,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_prog_cc_g=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -2488,10 +2514,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   :
 else
   echo "$as_me: failed program was:" >&5
@@ -2527,10 +2570,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_prog_cc_g=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -2646,10 +2706,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_prog_cc_c89=$ac_arg
 else
   echo "$as_me: failed program was:" >&5
@@ -2714,7 +2791,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2758,7 +2835,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_ac_ct_CXX="$ac_prog"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -2871,10 +2948,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_compiler_gnu=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -2929,10 +3023,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_prog_cxx_g=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -2967,10 +3078,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   :
 else
   echo "$as_me: failed program was:" >&5
@@ -3006,10 +3134,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_prog_cxx_g=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -3102,10 +3247,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   :
 else
   echo "$as_me: failed program was:" >&5
@@ -3139,10 +3291,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   # Broken: success on invalid input.
 continue
 else
@@ -3207,10 +3366,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   :
 else
   echo "$as_me: failed program was:" >&5
@@ -3244,10 +3410,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   # Broken: success on invalid input.
 continue
 else
@@ -3328,6 +3501,7 @@ do
  ;;
       llvm-kernel)  subdirs="$subdirs projects/llvm-kernel"
  ;;
+      compiler-rt)       ;;
       llvm-gcc)       ;;
       test-suite)     ;;
       llvm-test)      ;;
@@ -3527,7 +3701,7 @@ else
     llvm_cv_no_link_all_option="-Wl,-noall_load"
     llvm_cv_os_type="Minix"
     llvm_cv_platform_type="Unix" ;;
-  *-*-freebsd*)
+  *-*-freebsd* | *-*-kfreebsd-gnu)
     llvm_cv_link_all_option="-Wl,--whole-archive"
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="FreeBSD"
@@ -3562,6 +3736,11 @@ else
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="Linux"
     llvm_cv_platform_type="Unix" ;;
+  *-*-gnu*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="GNU"
+    llvm_cv_platform_type="Unix" ;;
   *-*-solaris*)
     llvm_cv_link_all_option="-Wl,-z,allextract"
     llvm_cv_no_link_all_option="-Wl,-z,defaultextract"
@@ -3623,7 +3802,7 @@ else
     llvm_cv_target_os_type="Darwin" ;;
   *-*-minix*)
     llvm_cv_target_os_type="Minix" ;;
-  *-*-freebsd*)
+  *-*-freebsd*| *-*-kfreebsd-gnu)
     llvm_cv_target_os_type="FreeBSD" ;;
   *-*-openbsd*)
     llvm_cv_target_os_type="OpenBSD" ;;
@@ -3637,6 +3816,8 @@ else
     llvm_cv_target_os_type="Interix" ;;
   *-*-linux*)
     llvm_cv_target_os_type="Linux" ;;
+  *-*-gnu*)
+    llvm_cv_target_os_type="GNU" ;;
   *-*-solaris*)
     llvm_cv_target_os_type="SunOS" ;;
   *-*-auroraux*)
@@ -3713,13 +3894,12 @@ else
   amd64-* | x86_64-*)     llvm_cv_target_arch="x86_64" ;;
   sparc*-*)               llvm_cv_target_arch="Sparc" ;;
   powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
-  alpha*-*)               llvm_cv_target_arch="Alpha" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
   mips-*)                 llvm_cv_target_arch="Mips" ;;
+  mipsel-*)               llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
   msp430-*)               llvm_cv_target_arch="MSP430" ;;
-  s390x-*)                llvm_cv_target_arch="SystemZ" ;;
-  bfin-*)                 llvm_cv_target_arch="Blackfin" ;;
+  hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
   mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
   ptx-*)                  llvm_cv_target_arch="PTX" ;;
   *)                      llvm_cv_target_arch="Unknown" ;;
@@ -3733,7 +3913,6 @@ if test "$llvm_cv_target_arch" = "Unknown" ; then
 echo "$as_me: WARNING: Configuring LLVM for an unknown target archicture" >&2;}
 fi
 
-# Determine the LLVM native architecture for the target
 case "$llvm_cv_target_arch" in
     x86)     LLVM_NATIVE_ARCH="X86" ;;
     x86_64)  LLVM_NATIVE_ARCH="X86" ;;
@@ -3766,7 +3945,7 @@ do
   for ac_prog in grep ggrep; do
   for ac_exec_ext in '' $ac_executable_extensions; do
     ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
-    { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+    { test -f "$ac_path_GREP" && $as_executable_p "$ac_path_GREP"; } || continue
     # Check for GNU ac_path_GREP and select it if it is found.
   # Check for GNU $ac_path_GREP
 case `"$ac_path_GREP" --version 2>&1` in
@@ -3848,7 +4027,7 @@ do
   for ac_prog in egrep; do
   for ac_exec_ext in '' $ac_executable_extensions; do
     ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
-    { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+    { test -f "$ac_path_EGREP" && $as_executable_p "$ac_path_EGREP"; } || continue
     # Check for GNU ac_path_EGREP and select it if it is found.
   # Check for GNU $ac_path_EGREP
 case `"$ac_path_EGREP" --version 2>&1` in
@@ -3944,10 +4123,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_header_stdc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -3975,7 +4171,7 @@ if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
 else
   ac_cv_header_stdc=no
 fi
-rm -f -r conftest*
+rm -f conftest*
 
 fi
 
@@ -3996,7 +4192,7 @@ if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
 else
   ac_cv_header_stdc=no
 fi
-rm -f -r conftest*
+rm -f conftest*
 
 fi
 
@@ -4123,10 +4319,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_Header=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -4168,8 +4381,7 @@ cat >>conftest.$ac_ext <<_ACEOF
 int
 main ()
 {
-#if  ! (defined BYTE_ORDER && defined BIG_ENDIAN && defined LITTLE_ENDIAN \
-	&& BYTE_ORDER && BIG_ENDIAN && LITTLE_ENDIAN)
+#if !BYTE_ORDER || !BIG_ENDIAN || !LITTLE_ENDIAN
  bogus endian macros
 #endif
 
@@ -4190,10 +4402,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   # It does; now see whether it defined to BIG_ENDIAN or not.
 cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
@@ -4228,10 +4457,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_c_bigendian=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -4282,10 +4528,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   if grep BIGenDianSyS conftest.$ac_objext >/dev/null ; then
   ac_cv_c_bigendian=yes
 fi
@@ -4415,7 +4678,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_BUILD_CC="${ac_build_prefix}gcc"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4453,7 +4716,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_BUILD_CC="gcc"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4492,7 +4755,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
        ac_prog_rejected=yes
        continue
@@ -4582,7 +4845,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_BUILD_CXX="${ac_build_prefix}g++"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4620,7 +4883,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_BUILD_CXX="g++"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -4659,7 +4922,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/c++"; then
        ac_prog_rejected=yes
        continue
@@ -4714,6 +4977,25 @@ else
 fi
 
 
+# Check whether --enable-libcpp was given.
+if test "${enable_libcpp+set}" = set; then
+  enableval=$enable_libcpp;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_LIBCPP=1
+ ;;
+  no)  ENABLE_LIBCPP=0
+ ;;
+  default) ENABLE_LIBCPP=0
+;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-libcpp. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-libcpp. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
 # Check whether --enable-optimized was given.
 if test "${enable_optimized+set}" = set; then
   enableval=$enable_optimized;
@@ -4748,7 +5030,7 @@ fi
 if test "${enable_assertions+set}" = set; then
   enableval=$enable_assertions;
 else
-  enableval="no"
+  enableval="yes"
 fi
 
 if test ${enableval} = "yes" ; then
@@ -4829,8 +5111,6 @@ else
  ;;
     x86_64)      TARGET_HAS_JIT=1
  ;;
-    Alpha)       TARGET_HAS_JIT=0
- ;;
     ARM)         TARGET_HAS_JIT=1
  ;;
     Mips)        TARGET_HAS_JIT=1
@@ -4839,9 +5119,7 @@ else
  ;;
     MSP430)      TARGET_HAS_JIT=0
  ;;
-    SystemZ)     TARGET_HAS_JIT=0
- ;;
-    Blackfin)    TARGET_HAS_JIT=0
+    Hexagon)     TARGET_HAS_JIT=0
  ;;
     MBlaze)      TARGET_HAS_JIT=0
  ;;
@@ -4898,11 +5176,11 @@ else
 fi
 
 case "$enableval" in
-  yes) ENABLE_THREADS=1
+  yes) LLVM_ENABLE_THREADS=1
  ;;
-  no)  ENABLE_THREADS=0
+  no)  LLVM_ENABLE_THREADS=0
  ;;
-  default) ENABLE_THREADS=1
+  default) LLVM_ENABLE_THREADS=1
  ;;
   *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-threads. Use \"yes\" or \"no\"" >&5
 echo "$as_me: error: Invalid setting for --enable-threads. Use \"yes\" or \"no\"" >&2;}
@@ -4910,7 +5188,7 @@ echo "$as_me: error: Invalid setting for --enable-threads. Use \"yes\" or \"no\"
 esac
 
 cat >>confdefs.h <<_ACEOF
-#define ENABLE_THREADS $ENABLE_THREADS
+#define LLVM_ENABLE_THREADS $LLVM_ENABLE_THREADS
 _ACEOF
 
 
@@ -5031,23 +5309,21 @@ if test "$enableval" = host-only ; then
   enableval=host
 fi
 case "$enableval" in
-  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU XCore MSP430 SystemZ Blackfin CBackend CppBackend MBlaze PTX" ;;
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX Hexagon" ;;
   *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
       case "$a_target" in
         x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
         x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
         sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
         powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
-        alpha)    TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
         arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
         mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+        mipsel)   TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
         spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
         xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
         msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
-        systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
-        blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
-        cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
         cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
         mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
         ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
         host) case "$llvm_cv_target_arch" in
@@ -5055,15 +5331,13 @@ case "$enableval" in
             x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
             Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
             PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
-            Alpha)       TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
             ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
             Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
             MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
             CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
             XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
             MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
-            s390x)       TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
-            Blackfin)    TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+            Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
             PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
             *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
 echo "$as_me: error: Can not set target to build" >&2;}
@@ -5079,8 +5353,6 @@ esac
 TARGETS_TO_BUILD=$TARGETS_TO_BUILD
 
 
-# Determine whether we are building LLVM support for the native architecture.
-# If so, define LLVM_NATIVE_ARCH to that LLVM target.
 for a_target in $TARGETS_TO_BUILD; do
   if test "$a_target" = "$LLVM_NATIVE_ARCH"; then
 
@@ -5095,6 +5367,9 @@ _ACEOF
     if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
       LLVM_NATIVE_ASMPARSER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser"
     fi
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/Disassembler/Makefile ; then
+      LLVM_NATIVE_DISASSEMBLER="LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler"
+    fi
 
 cat >>confdefs.h <<_ACEOF
 #define LLVM_NATIVE_TARGET $LLVM_NATIVE_TARGET
@@ -5122,11 +5397,16 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
     fi
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/Disassembler/Makefile ; then
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_DISASSEMBLER $LLVM_NATIVE_DISASSEMBLER
+_ACEOF
+
+    fi
   fi
 done
 
-# Build the LLVM_TARGET and LLVM_... macros for Targets.def and the individual
-# target feature def files.
 LLVM_ENUM_TARGETS=""
 LLVM_ENUM_ASM_PRINTERS=""
 LLVM_ENUM_ASM_PARSERS=""
@@ -5148,30 +5428,6 @@ done
 
 
 
-# Check whether --enable-cbe-printf-a was given.
-if test "${enable_cbe_printf_a+set}" = set; then
-  enableval=$enable_cbe_printf_a;
-else
-  enableval=default
-fi
-
-case "$enableval" in
-  yes) ENABLE_CBE_PRINTF_A=1
- ;;
-  no)  ENABLE_CBE_PRINTF_A=0
- ;;
-  default)  ENABLE_CBE_PRINTF_A=1
- ;;
-  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-cbe-printf-a. Use \"yes\" or \"no\"" >&5
-echo "$as_me: error: Invalid setting for --enable-cbe-printf-a. Use \"yes\" or \"no\"" >&2;}
-   { (exit 1); exit 1; }; } ;;
-esac
-
-cat >>confdefs.h <<_ACEOF
-#define ENABLE_CBE_PRINTF_A $ENABLE_CBE_PRINTF_A
-_ACEOF
-
-
 
 # Check whether --with-optimize-option was given.
 if test "${with_optimize_option+set}" = set; then
@@ -5266,6 +5522,22 @@ echo "$as_me: error: Invalid path for --with-ocaml-libdir. Provide full path" >&
 esac
 
 
+# Check whether --with-clang-srcdir was given.
+if test "${with_clang_srcdir+set}" = set; then
+  withval=$with_clang_srcdir;
+else
+  withval="-"
+fi
+
+case "$withval" in
+  -) clang_src_root="" ;;
+  /* | [A-Za-z]:[\\/]*) clang_src_root="$withval" ;;
+  *) clang_src_root="$ac_pwd/$withval" ;;
+esac
+CLANG_SRC_ROOT=$clang_src_root
+
+
+
 # Check whether --with-clang-resource-dir was given.
 if test "${with_clang_resource_dir+set}" = set; then
   withval=$with_clang_resource_dir;
@@ -5293,59 +5565,20 @@ cat >>confdefs.h <<_ACEOF
 _ACEOF
 
 
+# Clang normally uses the system c++ headers and libraries. With this option,
+# clang will use the ones provided by a gcc installation instead. This option should
+# be passed the same value that was used with --prefix when configuring gcc.
 
-# Check whether --with-cxx-include-root was given.
-if test "${with_cxx_include_root+set}" = set; then
-  withval=$with_cxx_include_root;
-else
-  withval=""
-fi
-
-
-cat >>confdefs.h <<_ACEOF
-#define CXX_INCLUDE_ROOT "$withval"
-_ACEOF
-
-
-
-# Check whether --with-cxx-include-arch was given.
-if test "${with_cxx_include_arch+set}" = set; then
-  withval=$with_cxx_include_arch;
-else
-  withval=""
-fi
-
-
-cat >>confdefs.h <<_ACEOF
-#define CXX_INCLUDE_ARCH "$withval"
-_ACEOF
-
-
-
-# Check whether --with-cxx-include-32bit-dir was given.
-if test "${with_cxx_include_32bit_dir+set}" = set; then
-  withval=$with_cxx_include_32bit_dir;
-else
-  withval=""
-fi
-
-
-cat >>confdefs.h <<_ACEOF
-#define CXX_INCLUDE_32BIT_DIR "$withval"
-_ACEOF
-
-
-
-# Check whether --with-cxx-include-64bit-dir was given.
-if test "${with_cxx_include_64bit_dir+set}" = set; then
-  withval=$with_cxx_include_64bit_dir;
+# Check whether --with-gcc-toolchain was given.
+if test "${with_gcc_toolchain+set}" = set; then
+  withval=$with_gcc_toolchain;
 else
   withval=""
 fi
 
 
 cat >>confdefs.h <<_ACEOF
-#define CXX_INCLUDE_64BIT_DIR "$withval"
+#define GCC_INSTALL_PREFIX "$withval"
 _ACEOF
 
 
@@ -5404,6 +5637,17 @@ fi
 
 
 
+# Check whether --with-internal-prefix was given.
+if test "${with_internal_prefix+set}" = set; then
+  withval=$with_internal_prefix;
+else
+  withval=""
+fi
+
+INTERNAL_PREFIX=$withval
+
+
+
 { echo "$as_me:$LINENO: checking for BSD-compatible nm" >&5
 echo $ECHO_N "checking for BSD-compatible nm... $ECHO_C" >&6; }
 if test "${lt_cv_path_NM+set}" = set; then
@@ -5514,7 +5758,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_CMP="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5555,7 +5799,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_CP="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5596,7 +5840,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_DATE="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5637,7 +5881,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_FIND="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5678,7 +5922,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_GREP="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5719,7 +5963,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_MKDIR="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5760,7 +6004,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_MV="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5800,7 +6044,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5840,7 +6084,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_ac_ct_RANLIB="ranlib"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5896,7 +6140,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_AR="${ac_tool_prefix}ar"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5936,7 +6180,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_prog_ac_ct_AR="ar"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -5993,7 +6237,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_RM="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6034,7 +6278,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_SED="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6075,7 +6319,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_TAR="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6116,7 +6360,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_BINPWD="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6158,7 +6402,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_GRAPHVIZ="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6214,7 +6458,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_DOT="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6270,7 +6514,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_FDP="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6326,7 +6570,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_NEATO="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6382,7 +6626,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_TWOPI="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6438,7 +6682,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_CIRCO="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6496,7 +6740,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_GV="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6555,7 +6799,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_DOTTY="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6611,7 +6855,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_XDOT_PY="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6650,73 +6894,6 @@ _ACEOF
 
 fi
 
-
-# Extract the first word of "perl", so it can be a program name with args.
-set dummy perl; ac_word=$2
-{ echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
-if test "${ac_cv_path_PERL+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-  case $PERL in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_PERL="$PERL" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-  for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_path_PERL="$as_dir/$ac_word$ac_exec_ext"
-    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-done
-IFS=$as_save_IFS
-
-  test -z "$ac_cv_path_PERL" && ac_cv_path_PERL="none"
-  ;;
-esac
-fi
-PERL=$ac_cv_path_PERL
-if test -n "$PERL"; then
-  { echo "$as_me:$LINENO: result: $PERL" >&5
-echo "${ECHO_T}$PERL" >&6; }
-else
-  { echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6; }
-fi
-
-
-if test "$PERL" != "none"; then
-  { echo "$as_me:$LINENO: checking for Perl 5.006 or newer" >&5
-echo $ECHO_N "checking for Perl 5.006 or newer... $ECHO_C" >&6; }
-  if $PERL -e 'use 5.006;' 2>&1 > /dev/null; then
-    { echo "$as_me:$LINENO: result: yes" >&5
-echo "${ECHO_T}yes" >&6; }
-  else
-    PERL=none
-    { echo "$as_me:$LINENO: result: not found" >&5
-echo "${ECHO_T}not found" >&6; }
-  fi
-fi
-
-
-if test x"$PERL" = xnone; then
-   HAVE_PERL=0
-
-   { { echo "$as_me:$LINENO: error: perl is required but was not found, please install it" >&5
-echo "$as_me: error: perl is required but was not found, please install it" >&2;}
-   { (exit 1); exit 1; }; }
-else
-   HAVE_PERL=1
-
-fi
-
 # Find a good install program.  We prefer a C program (faster),
 # so one script is as good as another.  But avoid the broken or
 # incompatible versions:
@@ -6753,7 +6930,7 @@ case $as_dir/ in
     # by default.
     for ac_prog in ginstall scoinst install; do
       for ac_exec_ext in '' $ac_executable_extensions; do
-	if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then
+	if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; }; then
 	  if test $ac_prog = install &&
 	    grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
 	    # AIX install.  It has an incompatible calling convention.
@@ -6820,7 +6997,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_BZIP2="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6860,7 +7037,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_CAT="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6900,7 +7077,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_DOXYGEN="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6940,7 +7117,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_GROFF="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -6980,7 +7157,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_GZIPBIN="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7020,7 +7197,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_POD2HTML="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7060,7 +7237,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_POD2MAN="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7100,7 +7277,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_PDFROFF="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7140,7 +7317,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_RUNTEST="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7215,7 +7392,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_TCLSH="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7270,7 +7447,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_ZIP="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7312,7 +7489,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_OCAMLC="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7357,7 +7534,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_OCAMLOPT="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7402,7 +7579,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_OCAMLDEP="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7447,7 +7624,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_OCAMLDOC="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7492,7 +7669,7 @@ do
   IFS=$as_save_IFS
   test -z "$as_dir" && as_dir=.
   for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
     ac_cv_path_GAS="$as_dir/$ac_word$ac_exec_ext"
     echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
     break 2
@@ -7528,7 +7705,7 @@ else
 
    # Check for ld64.
    if (echo "$version_string" | grep -q "ld64"); then
-     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#.*ld64-\([^ ]*\)#\1#")
+     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#.*ld64-\([^ ]*\)\( (.*)\)\{0,1\}#\1#")
    else
      llvm_cv_link_version=$(echo "$version_string" | sed -e "s#[^0-9]*\([0-9.]*\).*#\1#")
    fi
@@ -7584,11 +7761,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   llvm_cv_link_use_r=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -7597,7 +7790,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	llvm_cv_link_use_r=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
   CFLAGS="$oldcflags"
   ac_ext=c
@@ -7660,11 +7853,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   llvm_cv_link_use_export_dynamic=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -7673,7 +7882,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	llvm_cv_link_use_export_dynamic=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
   CFLAGS="$oldcflags"
   ac_ext=c
@@ -7758,11 +7967,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   llvm_cv_link_use_version_script=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -7771,7 +7996,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	llvm_cv_link_use_version_script=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
   rm "$tmp/export.map"
   rmdir "$tmp"
@@ -7813,10 +8038,10 @@ main ()
 #ifndef __cplusplus
   /* Ultrix mips cc rejects this.  */
   typedef int charset[2];
-  const charset cs;
+  const charset x;
   /* SunOS 4.1.1 cc rejects this.  */
-  char const *const *pcpcc;
-  char **ppc;
+  char const *const *ccp;
+  char **p;
   /* NEC SVR4.0.2 mips cc rejects this.  */
   struct point {int x, y;};
   static struct point const zero = {0,0};
@@ -7825,11 +8050,11 @@ main ()
      an arm of an if-expression whose if-part is not a constant
      expression */
   const char *g = "string";
-  pcpcc = &g + (g ? g-g : 0);
+  ccp = &g + (g ? g-g : 0);
   /* HPUX 7.0 cc rejects these. */
-  ++pcpcc;
-  ppc = (char**) pcpcc;
-  pcpcc = (char const *const *) ppc;
+  ++ccp;
+  p = (char**) ccp;
+  ccp = (char const *const *) p;
   { /* SCO 3.2v4 cc rejects this.  */
     char *t;
     char const *s = 0 ? (char *) 0 : (char const *) 0;
@@ -7856,7 +8081,7 @@ main ()
     const int foo = 10;
     if (!foo) return 0;
   }
-  return !cs[0] && !zero.x;
+  return !x[0] && !zero.x;
 #endif
 
   ;
@@ -7876,10 +8101,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_c_const=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -7944,10 +8186,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_Header=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -8020,11 +8279,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_opendir=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -8033,7 +8308,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_opendir+set}" = set; then
   break
@@ -8104,11 +8379,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_opendir=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -8117,7 +8408,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_opendir+set}" = set; then
   break
@@ -8180,10 +8471,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -8219,10 +8527,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -8258,9 +8573,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -8292,7 +8607,9 @@ if test "${enable_ltdl_install+set}" = set; then
 fi
 
 
- if test x"${enable_ltdl_install-no}" != xno; then
+
+
+if test x"${enable_ltdl_install-no}" != xno; then
   INSTALL_LTDL_TRUE=
   INSTALL_LTDL_FALSE='#'
 else
@@ -8300,7 +8617,9 @@ else
   INSTALL_LTDL_FALSE=
 fi
 
- if test x"${enable_ltdl_convenience-no}" != xno; then
+
+
+if test x"${enable_ltdl_convenience-no}" != xno; then
   CONVENIENCE_LTDL_TRUE=
   CONVENIENCE_LTDL_FALSE='#'
 else
@@ -8512,7 +8831,7 @@ dgux*)
   shlibpath_var=LD_LIBRARY_PATH
   ;;
 
-freebsd1*)
+freebsd1.*)
   dynamic_linker=no
   ;;
 
@@ -8535,7 +8854,7 @@ freebsd* | dragonfly*)
     objformat=`/usr/bin/objformat`
   else
     case $host_os in
-    freebsd[123]*) objformat=aout ;;
+    freebsd[123].*) objformat=aout ;;
     *) objformat=elf ;;
     esac
   fi
@@ -8553,7 +8872,7 @@ freebsd* | dragonfly*)
   esac
   shlibpath_var=LD_LIBRARY_PATH
   case $host_os in
-  freebsd2*)
+  freebsd2.*)
     shlibpath_overrides_runpath=yes
     ;;
   freebsd3.[01]* | freebsdelf3.[01]*)
@@ -9312,11 +9631,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_func_shl_load=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -9325,7 +9660,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_func_shl_load=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func_shl_load" >&5
@@ -9379,11 +9714,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_dld_shl_load=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -9392,7 +9743,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_dld_shl_load=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -9448,11 +9799,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_dl_dlopen=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -9461,7 +9828,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_dl_dlopen=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -9506,11 +9873,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
 
 cat >>confdefs.h <<\_ACEOF
 #define HAVE_LIBDL 1
@@ -9562,11 +9945,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_svld_dlopen=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -9575,7 +9974,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_svld_dlopen=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -9631,11 +10030,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_dld_dld_link=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -9644,7 +10059,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_dld_dld_link=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -9721,11 +10136,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_func__dyld_func_lookup=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -9734,7 +10165,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_func__dyld_func_lookup=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_func__dyld_func_lookup" >&5
@@ -9756,7 +10187,7 @@ fi
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 
 fi
@@ -9839,11 +10270,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -9852,7 +10299,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -9939,7 +10386,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 9942 "configure"
+#line 10387 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -10168,10 +10615,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -10207,10 +10671,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -10246,9 +10717,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -10315,10 +10786,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_type_error_t=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -10418,11 +10906,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -10431,7 +10935,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -10511,10 +11015,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -10550,10 +11071,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -10589,9 +11117,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -10659,10 +11187,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -10698,10 +11243,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -10737,9 +11289,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -10805,10 +11357,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -10844,10 +11413,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -10883,9 +11459,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -10980,11 +11556,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -10993,7 +11585,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -11075,11 +11667,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -11088,7 +11696,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -11170,11 +11778,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -11183,7 +11807,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -11265,11 +11889,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -11278,7 +11918,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -11361,11 +12001,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -11374,7 +12030,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -11439,10 +12095,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   :
 else
   echo "$as_me: failed program was:" >&5
@@ -11468,12 +12141,14 @@ echo "${ECHO_T}ok" >&6; }
 
 { echo "$as_me:$LINENO: checking optional compiler flags" >&5
 echo $ECHO_N "checking optional compiler flags... $ECHO_C" >&6; }
-NO_VARIADIC_MACROS=`$CXX -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros`
+NO_VARIADIC_MACROS=`$CXX -Werror -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros`
+
+NO_MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers`
 
-NO_MISSING_FIELD_INITIALIZERS=`$CXX -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers`
+COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default`
 
-{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS" >&5
-echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS" >&6; }
+{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT" >&5
+echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT" >&6; }
 
 
 
@@ -11519,11 +12194,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_m_sin=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -11532,7 +12223,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_m_sin=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -11585,11 +12276,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_imagehlp_main=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -11598,7 +12305,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_imagehlp_main=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -11650,11 +12357,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_psapi_main=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -11663,7 +12386,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_psapi_main=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -11728,11 +12451,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_dlopen=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -11741,7 +12480,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_dlopen+set}" = set; then
   break
@@ -11820,11 +12559,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_ffi_call=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -11833,7 +12588,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_ffi_call+set}" = set; then
   break
@@ -11913,11 +12668,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_mallinfo=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -11926,7 +12697,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_mallinfo+set}" = set; then
   break
@@ -11953,7 +12724,7 @@ _ACEOF
 fi
 
 
-if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
 
 { echo "$as_me:$LINENO: checking for pthread_mutex_init in -lpthread" >&5
 echo $ECHO_N "checking for pthread_mutex_init in -lpthread... $ECHO_C" >&6; }
@@ -11997,11 +12768,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_pthread_pthread_mutex_init=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -12010,7 +12797,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_pthread_pthread_mutex_init=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -12073,11 +12860,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_pthread_mutex_lock=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -12086,7 +12889,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_pthread_mutex_lock+set}" = set; then
   break
@@ -12160,11 +12963,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_pthread_rwlock_init=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -12173,7 +12992,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_pthread_rwlock_init+set}" = set; then
   break
@@ -12247,11 +13066,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_pthread_getspecific=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -12260,7 +13095,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_pthread_getspecific+set}" = set; then
   break
@@ -12341,11 +13176,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_udis86_ud_init=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -12354,7 +13205,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_udis86_ud_init=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -12400,9 +13251,11 @@ if test "${with_oprofile+set}" = set; then
         *) llvm_cv_oppath="${withval}/lib/oprofile"
            CPPFLAGS="-I${withval}/include";;
       esac
-      if test -n "$llvm_cv_oppath" ; then
-        LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
-                                        { echo "$as_me:$LINENO: checking for library containing bfd_init" >&5
+      case $llvm_cv_os_type in
+        Linux)
+          if test -n "$llvm_cv_oppath" ; then
+            LIBS="$LIBS -lopagent -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
+                                                            { echo "$as_me:$LINENO: checking for library containing bfd_init" >&5
 echo $ECHO_N "checking for library containing bfd_init... $ECHO_C" >&6; }
 if test "${ac_cv_search_bfd_init+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
@@ -12450,11 +13303,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_bfd_init=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -12463,7 +13332,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_bfd_init+set}" = set; then
   break
@@ -12485,7 +13354,7 @@ if test "$ac_res" != no; then
 
 fi
 
-        { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5
+            { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5
 echo $ECHO_N "checking for library containing op_open_agent... $ECHO_C" >&6; }
 if test "${ac_cv_search_op_open_agent+set}" = set; then
   echo $ECHO_N "(cached) $ECHO_C" >&6
@@ -12533,11 +13402,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_op_open_agent=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -12546,7 +13431,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_op_open_agent+set}" = set; then
   break
@@ -12568,12 +13453,12 @@ if test "$ac_res" != no; then
 
 else
 
-          echo "Error! You need to have libopagent around."
-          exit -1
+              echo "Error! You need to have libopagent around."
+              exit -1
 
 fi
 
-        if test "${ac_cv_header_opagent_h+set}" = set; then
+            if test "${ac_cv_header_opagent_h+set}" = set; then
   { echo "$as_me:$LINENO: checking for opagent.h" >&5
 echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; }
 if test "${ac_cv_header_opagent_h+set}" = set; then
@@ -12607,10 +13492,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -12646,10 +13548,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -12685,9 +13594,9 @@ echo "$as_me: WARNING: opagent.h: proceeding with the preprocessor's result" >&2
     { echo "$as_me:$LINENO: WARNING: opagent.h: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: opagent.h: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -12707,13 +13616,18 @@ if test $ac_cv_header_opagent_h = yes; then
   :
 else
 
-          echo "Error! You need to have opagent.h around."
-          exit -1
+              echo "Error! You need to have opagent.h around."
+              exit -1
 
 fi
 
 
-      fi
+          fi ;;
+        *)
+          { { echo "$as_me:$LINENO: error: OProfile support is available on Linux only." >&5
+echo "$as_me: error: OProfile support is available on Linux only." >&2;}
+   { (exit 1); exit 1; }; } ;;
+      esac
 
 else
 
@@ -12724,7 +13638,326 @@ fi
 
 
 cat >>confdefs.h <<_ACEOF
-#define USE_OPROFILE $USE_OPROFILE
+#define LLVM_USE_OPROFILE $USE_OPROFILE
+_ACEOF
+
+
+
+# Check whether --with-intel-jitevents was given.
+if test "${with_intel_jitevents+set}" = set; then
+  withval=$with_intel_jitevents;
+      case $llvm_cv_os_type in
+        Linux|Win32|Cygwin|MingW) ;;
+        *)
+          { { echo "$as_me:$LINENO: error:
+            Intel JIT API support is available on Linux and Windows only.\"" >&5
+echo "$as_me: error:
+            Intel JIT API support is available on Linux and Windows only.\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+      esac
+
+      USE_INTEL_JITEVENTS=1
+
+      case "$llvm_cv_target_arch" in
+        x86)    llvm_intel_jitevents_archdir="lib32";;
+        x86_64) llvm_intel_jitevents_archdir="lib64";;
+        *)      echo "Target architecture $llvm_cv_target_arch does not support Intel JIT Events API"
+                exit -1;;
+      esac
+      INTEL_JITEVENTS_INCDIR="/opt/intel/vtune_amplifier_xe_2011/include"
+      INTEL_JITEVENTS_LIBDIR="/opt/intel/vtune_amplifier_xe_2011/$llvm_intel_jitevents_archdir"
+      case "$withval" in
+        /* | [A-Za-z]:[\\/]*) INTEL_JITEVENTS_INCDIR=$withval/include
+                                  INTEL_JITEVENTS_LIBDIR=$withval/$llvm_intel_jitevents_archdir ;;
+        *) ;;
+      esac
+
+
+
+
+      LIBS="$LIBS -L${INTEL_JITEVENTS_LIBDIR}"
+      CPPFLAGS="$CPPFLAGS -I$INTEL_JITEVENTS_INCDIR"
+
+      { echo "$as_me:$LINENO: checking for library containing iJIT_IsProfilingActive" >&5
+echo $ECHO_N "checking for library containing iJIT_IsProfilingActive... $ECHO_C" >&6; }
+if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char iJIT_IsProfilingActive ();
+int
+main ()
+{
+return iJIT_IsProfilingActive ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' jitprofiling; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_iJIT_IsProfilingActive=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_iJIT_IsProfilingActive+set}" = set; then
+  :
+else
+  ac_cv_search_iJIT_IsProfilingActive=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_iJIT_IsProfilingActive" >&5
+echo "${ECHO_T}$ac_cv_search_iJIT_IsProfilingActive" >&6; }
+ac_res=$ac_cv_search_iJIT_IsProfilingActive
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+
+        echo "Error! Cannot find libjitprofiling.a. Please check path specified in flag --with-intel-jitevents"
+        exit -1
+
+fi
+
+      if test "${ac_cv_header_jitprofiling_h+set}" = set; then
+  { echo "$as_me:$LINENO: checking for jitprofiling.h" >&5
+echo $ECHO_N "checking for jitprofiling.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_jitprofiling_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_jitprofiling_h" >&5
+echo "${ECHO_T}$ac_cv_header_jitprofiling_h" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking jitprofiling.h usability" >&5
+echo $ECHO_N "checking jitprofiling.h usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <jitprofiling.h>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking jitprofiling.h presence" >&5
+echo $ECHO_N "checking jitprofiling.h presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <jitprofiling.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: jitprofiling.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: jitprofiling.h: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: present but cannot be compiled" >&5
+echo "$as_me: WARNING: jitprofiling.h: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: jitprofiling.h:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: jitprofiling.h: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: jitprofiling.h:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: jitprofiling.h: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: jitprofiling.h: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: jitprofiling.h: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for jitprofiling.h" >&5
+echo $ECHO_N "checking for jitprofiling.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_jitprofiling_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_cv_header_jitprofiling_h=$ac_header_preproc
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_jitprofiling_h" >&5
+echo "${ECHO_T}$ac_cv_header_jitprofiling_h" >&6; }
+
+fi
+if test $ac_cv_header_jitprofiling_h = yes; then
+  :
+else
+
+        echo "Error! Cannot find jitprofiling.h. Please check path specified in flag --with-intel-jitevents"
+        exit -1
+
+fi
+
+
+
+
+else
+
+      USE_INTEL_JITEVENTS=0
+
+
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_USE_INTEL_JITEVENTS $USE_INTEL_JITEVENTS
 _ACEOF
 
 
@@ -12773,10 +14006,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_Header=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -12849,11 +14099,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_opendir=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -12862,7 +14128,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_opendir+set}" = set; then
   break
@@ -12933,11 +14199,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_search_opendir=$ac_res
 else
   echo "$as_me: failed program was:" >&5
@@ -12946,7 +14228,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext
   if test "${ac_cv_search_opendir+set}" = set; then
   break
@@ -13011,10 +14293,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_header_mmap_anon=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13057,48 +14356,38 @@ cat >>conftest.$ac_ext <<_ACEOF
 #include <sys/stat.h>
 
 #if defined S_ISBLK && defined S_IFDIR
-extern char c1[S_ISBLK (S_IFDIR) ? -1 : 1];
+# if S_ISBLK (S_IFDIR)
+You lose.
+# endif
 #endif
 
 #if defined S_ISBLK && defined S_IFCHR
-extern char c2[S_ISBLK (S_IFCHR) ? -1 : 1];
+# if S_ISBLK (S_IFCHR)
+You lose.
+# endif
 #endif
 
 #if defined S_ISLNK && defined S_IFREG
-extern char c3[S_ISLNK (S_IFREG) ? -1 : 1];
+# if S_ISLNK (S_IFREG)
+You lose.
+# endif
 #endif
 
 #if defined S_ISSOCK && defined S_IFREG
-extern char c4[S_ISSOCK (S_IFREG) ? -1 : 1];
+# if S_ISSOCK (S_IFREG)
+You lose.
+# endif
 #endif
 
 _ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_compile") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
-  ac_cv_header_stat_broken=no
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "You lose" >/dev/null 2>&1; then
+  ac_cv_header_stat_broken=yes
 else
-  echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-	ac_cv_header_stat_broken=yes
+  ac_cv_header_stat_broken=no
 fi
+rm -f conftest*
 
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 fi
 { echo "$as_me:$LINENO: result: $ac_cv_header_stat_broken" >&5
 echo "${ECHO_T}$ac_cv_header_stat_broken" >&6; }
@@ -13153,10 +14442,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_header_sys_wait_h=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13214,10 +14520,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_header_time=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13283,10 +14606,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13322,10 +14662,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13361,9 +14708,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -13433,10 +14780,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13472,10 +14836,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13511,9 +14882,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -13579,10 +14950,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13618,10 +15006,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13657,9 +15052,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -13728,10 +15123,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13767,10 +15179,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13806,9 +15225,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -13876,10 +15295,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13915,10 +15351,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -13954,9 +15397,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -14021,10 +15464,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14060,10 +15520,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14099,9 +15566,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -14166,10 +15633,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14205,10 +15689,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14244,9 +15735,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -14272,7 +15763,7 @@ fi
 
 done
 
-if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+if test "$LLVM_ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
 
 for ac_header in pthread.h
 do
@@ -14312,10 +15803,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14351,10 +15859,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14390,9 +15905,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -14468,10 +15983,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14507,10 +16039,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14546,9 +16085,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -14615,10 +16154,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14654,10 +16210,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14693,9 +16256,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -14750,11 +16313,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   { echo "$as_me:$LINENO: result: yes" >&5
 echo "${ECHO_T}yes" >&6; }
 
@@ -14775,7 +16354,7 @@ _ACEOF
 
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 
 
@@ -14794,7 +16373,7 @@ ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ex
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
     ac_save_CXXFLAGS=$CXXFLAGS
-    CXXFLAGS=-pedantic
+    CXXFLAGS="$CXXFLAGS -pedantic"
     if test "$cross_compiling" = yes; then
   ac_cv_huge_val_sanity=yes
 else
@@ -14897,10 +16476,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_type_pid_t=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -14960,10 +16556,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_type_size_t=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -15008,9 +16621,7 @@ cat >>conftest.$ac_ext <<_ACEOF
 int
 main ()
 {
-struct tm tm;
-				     int *p = &tm.tm_sec;
- 				     return !p;
+struct tm *tp; tp->tm_sec;
   ;
   return 0;
 }
@@ -15028,10 +16639,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_struct_tm=time.h
 else
   echo "$as_me: failed program was:" >&5
@@ -15089,10 +16717,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_type_int64_t=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -15155,10 +16800,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_type_uint64_t=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -15216,10 +16878,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_type_u_int64_t=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -15322,11 +17001,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -15335,7 +17030,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -15419,11 +17114,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -15432,7 +17143,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -15517,11 +17228,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -15530,7 +17257,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -15613,11 +17340,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -15626,7 +17369,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -15646,7 +17389,8 @@ done
 
 
 
-for ac_func in mktemp posix_spawn realpath sbrk setrlimit strdup
+
+for ac_func in mktemp posix_spawn pread realpath sbrk setrlimit strdup
 do
 as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { echo "$as_me:$LINENO: checking for $ac_func" >&5
@@ -15712,11 +17456,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -15725,7 +17485,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -15808,11 +17568,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -15821,7 +17597,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -15905,11 +17681,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -15918,7 +17710,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -16003,11 +17795,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -16016,7 +17824,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -16164,10 +17972,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_func_rand48=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16212,7 +18037,8 @@ int
 main ()
 {
 #ifndef strerror_s
-  (void) strerror_s;
+  char *p = (char *) strerror_s;
+  return !p;
 #endif
 
   ;
@@ -16232,10 +18058,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_have_decl_strerror_s=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16308,11 +18151,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc__alloca=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16321,7 +18180,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc__alloca=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -16377,11 +18236,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___alloca=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16390,7 +18265,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___alloca=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -16446,11 +18321,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___chkstk=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16459,7 +18350,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___chkstk=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -16515,11 +18406,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc____chkstk=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16528,7 +18435,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc____chkstk=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -16585,11 +18492,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___ashldi3=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16598,7 +18521,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___ashldi3=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -16654,11 +18577,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___ashrdi3=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16667,7 +18606,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___ashrdi3=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -16723,11 +18662,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___divdi3=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16736,7 +18691,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___divdi3=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -16792,11 +18747,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___fixdfdi=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16805,7 +18776,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___fixdfdi=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -16861,11 +18832,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___fixsfdi=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16874,7 +18861,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___fixsfdi=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -16930,11 +18917,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___floatdidf=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -16943,7 +18946,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___floatdidf=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -16999,11 +19002,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___lshrdi3=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17012,7 +19031,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___lshrdi3=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -17068,11 +19087,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___moddi3=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17081,7 +19116,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___moddi3=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -17137,11 +19172,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___udivdi3=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17150,7 +19201,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___udivdi3=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -17206,11 +19257,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___umoddi3=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17219,7 +19286,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___umoddi3=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -17276,11 +19343,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___main=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17289,7 +19372,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___main=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -17345,11 +19428,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_lib_gcc___cmpdi2=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17358,7 +19457,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	ac_cv_lib_gcc___cmpdi2=no
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 LIBS=$ac_check_lib_save_LIBS
 fi
@@ -17396,10 +19495,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
 
   { echo "$as_me:$LINENO: result: yes" >&5
 echo "${ECHO_T}yes" >&6; }
@@ -17464,10 +19580,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_func_isnan_in_math_h=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17535,10 +19668,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_func_isnan_in_cmath=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17605,10 +19755,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_func_std_isnan_in_cmath=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17676,10 +19843,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_func_isinf_in_math_h=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17746,10 +19930,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_func_isinf_in_cmath=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17798,7 +19999,7 @@ cat >>conftest.$ac_ext <<_ACEOF
 int
 main ()
 {
-float f; std::isinf(f)}
+float f; std::isinf(f);
   ;
   return 0;
 }
@@ -17816,10 +20017,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_func_std_isinf_in_cmath=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17886,10 +20104,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_cv_func_finite_in_ieeefp_h=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17960,10 +20195,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_header_compiler=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -17999,10 +20251,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null && {
-	 test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       }; then
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
   ac_header_preproc=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -18038,9 +20297,9 @@ echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&
     { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
 echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
     ( cat <<\_ASBOX
-## ----------------------------------- ##
-## Report this to llvmbugs@cs.uiuc.edu ##
-## ----------------------------------- ##
+## ------------------------------------ ##
+## Report this to http://llvm.org/bugs/ ##
+## ------------------------------------ ##
 _ASBOX
      ) | sed "s/^/$as_me: WARNING:     /" >&2
     ;;
@@ -18133,11 +20392,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -18146,7 +20421,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -18203,21 +20478,21 @@ $ac_includes_default
 #include <fcntl.h>
 #include <sys/mman.h>
 
-#if !defined STDC_HEADERS && !defined HAVE_STDLIB_H
+#if !STDC_HEADERS && !HAVE_STDLIB_H
 char *malloc ();
 #endif
 
 /* This mess was copied from the GNU getpagesize.h.  */
-#ifndef HAVE_GETPAGESIZE
+#if !HAVE_GETPAGESIZE
 /* Assume that all systems that can run configure have sys/param.h.  */
-# ifndef HAVE_SYS_PARAM_H
+# if !HAVE_SYS_PARAM_H
 #  define HAVE_SYS_PARAM_H 1
 # endif
 
 # ifdef _SC_PAGESIZE
 #  define getpagesize() sysconf(_SC_PAGESIZE)
 # else /* no _SC_PAGESIZE */
-#  ifdef HAVE_SYS_PARAM_H
+#  if HAVE_SYS_PARAM_H
 #   include <sys/param.h>
 #   ifdef EXEC_PAGESIZE
 #    define getpagesize() EXEC_PAGESIZE
@@ -18509,11 +20784,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
@@ -18542,7 +20833,7 @@ _ACEOF
 echo "$as_me: WARNING: LLVM will be built thread-unsafe because atomic builtins are missing" >&2;}
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 
 
@@ -18589,10 +20880,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   llvm_cv_linux_mixed=no
 else
   echo "$as_me: failed program was:" >&5
@@ -18686,11 +20994,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_c_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest$ac_exeext &&
-       $as_test_x conftest$ac_exeext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   eval "$as_ac_var=yes"
 else
   echo "$as_me: failed program was:" >&5
@@ -18699,7 +21023,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
 	eval "$as_ac_var=no"
 fi
 
-rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+rm -f core conftest.err conftest.$ac_objext \
       conftest$ac_exeext conftest.$ac_ext
 fi
 ac_res=`eval echo '${'$as_ac_var'}'`
@@ -18720,9 +21044,6 @@ SHLIBEXT=$libltdl_cv_shlibext
 SHLIBPATH_VAR=$libltdl_cv_shlibpath_var
 
 
-# Translate the various configuration directories and other basic
-# information into substitutions that will end up in Makefile.config.in
-# that these configured values can be used by the makefiles
 if test "${prefix}" = "NONE" ; then
   prefix="/usr/local"
 fi
@@ -18747,8 +21068,10 @@ LLVM_CONFIGTIME=`date`
 
 
 
-# Place the various directores into the config.h file as #defines so that we
-# can know about the installation paths within LLVM.
+if test "${ENABLE_TIMESTAMPS}" = "0"; then
+  LLVM_CONFIGTIME="(timestamp not enabled)"
+fi
+
 
 cat >>confdefs.h <<_ACEOF
 #define LLVM_PREFIX "$LLVM_PREFIX"
@@ -18801,11 +21124,10 @@ _ACEOF
 
 
 cat >>confdefs.h <<_ACEOF
-#define LLVM_HOSTTRIPLE "$host"
+#define LLVM_DEFAULT_TARGET_TRIPLE "$target"
 _ACEOF
 
 
-# Determine which bindings to build.
 if test "$BINDINGS_TO_BUILD" = auto ; then
   BINDINGS_TO_BUILD=""
   if test "x$OCAMLC" != x -a "x$OCAMLDEP" != x ; then
@@ -18815,12 +21137,9 @@ fi
 BINDINGS_TO_BUILD=$BINDINGS_TO_BUILD
 
 
-# This isn't really configurey, but it avoids having to repeat the list in
-# other files.
 ALL_BINDINGS=ocaml
 
 
-# Do any work necessary to ensure that bindings have what they need.
 binding_prereqs_failed=0
 for a_binding in $BINDINGS_TO_BUILD ; do
   case "$a_binding" in
@@ -18876,18 +21195,18 @@ ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ex
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
 
   oldcxxflags="$CXXFLAGS"
-  CXXFLAGS="$CXXFLAGS -fvisibility-inlines-hidden"
+  CXXFLAGS="$CXXFLAGS -O0 -fvisibility-inlines-hidden -Werror"
   cat >conftest.$ac_ext <<_ACEOF
 /* confdefs.h.  */
 _ACEOF
 cat confdefs.h >>conftest.$ac_ext
 cat >>conftest.$ac_ext <<_ACEOF
 /* end confdefs.h.  */
-
+template <typename T> struct X { void __attribute__((noinline)) f() {} };
 int
 main ()
 {
-
+X<int>().f();
   ;
   return 0;
 }
@@ -18905,10 +21224,27 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
   rm -f conftest.er1
   cat conftest.err >&5
   echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } && {
-	 test -z "$ac_cxx_werror_flag" ||
-	 test ! -s conftest.err
-       } && test -s conftest.$ac_objext; then
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
   llvm_cv_cxx_visibility_inlines_hidden=yes
 else
   echo "$as_me: failed program was:" >&5
@@ -18979,13 +21315,18 @@ ac_config_files="$ac_config_files llvm.spec"
 
 ac_config_files="$ac_config_files docs/doxygen.cfg"
 
-if test -f ${srcdir}/tools/clang/README.txt; then
-  ac_config_files="$ac_config_files tools/clang/docs/doxygen.cfg"
 
+if test "${clang_src_root}" = ""; then
+  clang_src_root="$srcdir/tools/clang"
 fi
+if test -f ${clang_src_root}/README.txt; then
+    configh="include/clang/Config/config.h"
+  doxy="docs/doxygen.cfg"
+  ac_config_headers="$ac_config_headers tools/clang/${configh}:${clang_src_root}/${configh}.in"
 
-ac_config_files="$ac_config_files tools/llvm-config/llvm-config.in"
+  ac_config_files="$ac_config_files tools/clang/${doxy}:${clang_src_root}/${doxy}.in"
 
+fi
 
 ac_config_files="$ac_config_files bindings/ocaml/llvm/META.llvm"
 
@@ -19166,8 +21507,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF
 ## M4sh Initialization.  ##
 ## --------------------- ##
 
-# Be more Bourne compatible
-DUALCASE=1; export DUALCASE # for MKS sh
+# Be Bourne compatible
 if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
   emulate sh
   NULLCMD=:
@@ -19176,13 +21516,10 @@ if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
   alias -g '${1+"$@"}'='"$@"'
   setopt NO_GLOB_SUBST
 else
-  case `(set -o) 2>/dev/null` in
-  *posix*) set -o posix ;;
-esac
-
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
 fi
-
-
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
 
 
 # PATH needs CR
@@ -19406,28 +21743,19 @@ else
   as_mkdir_p=false
 fi
 
-if test -x / >/dev/null 2>&1; then
-  as_test_x='test -x'
+# Find out whether ``test -x'' works.  Don't use a zero-byte file, as
+# systems may use methods other than mode bits to determine executability.
+cat >conf$$.file <<_ASEOF
+#! /bin/sh
+exit 0
+_ASEOF
+chmod +x conf$$.file
+if test -x conf$$.file >/dev/null 2>&1; then
+  as_executable_p="test -x"
 else
-  if ls -dL / >/dev/null 2>&1; then
-    as_ls_L_option=L
-  else
-    as_ls_L_option=
-  fi
-  as_test_x='
-    eval sh -c '\''
-      if test -d "$1"; then
-        test -d "$1/.";
-      else
-	case $1 in
-        -*)set "./$1";;
-	esac;
-	case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in
-	???[sx]*):;;*)false;;esac;fi
-    '\'' sh
-  '
+  as_executable_p=:
 fi
-as_executable_p=$as_test_x
+rm -f conf$$.file
 
 # Sed expression to map a string onto a valid CPP name.
 as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
@@ -19442,8 +21770,8 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by llvm $as_me 3.0, which was
-generated by GNU Autoconf 2.61.  Invocation command line was
+This file was extended by LLVM $as_me 3.1svn, which was
+generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
   CONFIG_HEADERS  = $CONFIG_HEADERS
@@ -19472,7 +21800,7 @@ current configuration.
 Usage: $0 [OPTIONS] [FILE]...
 
   -h, --help       print this help, then exit
-  -V, --version    print version number and configuration settings, then exit
+  -V, --version    print version number, then exit
   -q, --quiet      do not print progress messages
   -d, --debug      don't remove temporary files
       --recheck    update $as_me by reconfiguring in the same conditions
@@ -19495,8 +21823,8 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-llvm config.status 3.0
-configured by $0, generated by GNU Autoconf 2.61,
+LLVM config.status 3.1svn
+configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
 Copyright (C) 2006 Free Software Foundation, Inc.
@@ -19618,8 +21946,8 @@ do
     "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;;
     "llvm.spec") CONFIG_FILES="$CONFIG_FILES llvm.spec" ;;
     "docs/doxygen.cfg") CONFIG_FILES="$CONFIG_FILES docs/doxygen.cfg" ;;
-    "tools/clang/docs/doxygen.cfg") CONFIG_FILES="$CONFIG_FILES tools/clang/docs/doxygen.cfg" ;;
-    "tools/llvm-config/llvm-config.in") CONFIG_FILES="$CONFIG_FILES tools/llvm-config/llvm-config.in" ;;
+    "tools/clang/${configh}") CONFIG_HEADERS="$CONFIG_HEADERS tools/clang/${configh}:${clang_src_root}/${configh}.in" ;;
+    "tools/clang/${doxy}") CONFIG_FILES="$CONFIG_FILES tools/clang/${doxy}:${clang_src_root}/${doxy}.in" ;;
     "bindings/ocaml/llvm/META.llvm") CONFIG_FILES="$CONFIG_FILES bindings/ocaml/llvm/META.llvm" ;;
     "setup") CONFIG_COMMANDS="$CONFIG_COMMANDS setup" ;;
     "Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS Makefile" ;;
@@ -19777,6 +22105,7 @@ BUILD_CC!$BUILD_CC$ac_delim
 BUILD_EXEEXT!$BUILD_EXEEXT$ac_delim
 BUILD_CXX!$BUILD_CXX$ac_delim
 CVSBUILD!$CVSBUILD$ac_delim
+ENABLE_LIBCPP!$ENABLE_LIBCPP$ac_delim
 ENABLE_OPTIMIZED!$ENABLE_OPTIMIZED$ac_delim
 ENABLE_PROFILING!$ENABLE_PROFILING$ac_delim
 DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
@@ -19788,12 +22117,11 @@ JIT!$JIT$ac_delim
 TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
 ENABLE_DOCS!$ENABLE_DOCS$ac_delim
 ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
-ENABLE_THREADS!$ENABLE_THREADS$ac_delim
+LLVM_ENABLE_THREADS!$LLVM_ENABLE_THREADS$ac_delim
 ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
 ENABLE_PIC!$ENABLE_PIC$ac_delim
 ENABLE_SHARED!$ENABLE_SHARED$ac_delim
 ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
-ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -19835,16 +22163,18 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim
 TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim
 LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim
 LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim
 LLVM_ENUM_ASM_PARSERS!$LLVM_ENUM_ASM_PARSERS$ac_delim
 LLVM_ENUM_DISASSEMBLERS!$LLVM_ENUM_DISASSEMBLERS$ac_delim
-ENABLE_CBE_PRINTF_A!$ENABLE_CBE_PRINTF_A$ac_delim
 OPTIMIZE_OPTION!$OPTIMIZE_OPTION$ac_delim
 EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
 EXTRA_LD_OPTIONS!$EXTRA_LD_OPTIONS$ac_delim
+CLANG_SRC_ROOT!$CLANG_SRC_ROOT$ac_delim
 BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim
+INTERNAL_PREFIX!$INTERNAL_PREFIX$ac_delim
 NM!$NM$ac_delim
 ifGNUmake!$ifGNUmake$ac_delim
 LN_S!$LN_S$ac_delim
@@ -19869,8 +22199,6 @@ CIRCO!$CIRCO$ac_delim
 GV!$GV$ac_delim
 DOTTY!$DOTTY$ac_delim
 XDOT_PY!$XDOT_PY$ac_delim
-PERL!$PERL$ac_delim
-HAVE_PERL!$HAVE_PERL$ac_delim
 INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim
 INSTALL_SCRIPT!$INSTALL_SCRIPT$ac_delim
 INSTALL_DATA!$INSTALL_DATA$ac_delim
@@ -19898,8 +22226,12 @@ CONVENIENCE_LTDL_FALSE!$CONVENIENCE_LTDL_FALSE$ac_delim
 LIBADD_DL!$LIBADD_DL$ac_delim
 NO_VARIADIC_MACROS!$NO_VARIADIC_MACROS$ac_delim
 NO_MISSING_FIELD_INITIALIZERS!$NO_MISSING_FIELD_INITIALIZERS$ac_delim
+COVERED_SWITCH_DEFAULT!$COVERED_SWITCH_DEFAULT$ac_delim
 USE_UDIS86!$USE_UDIS86$ac_delim
 USE_OPROFILE!$USE_OPROFILE$ac_delim
+USE_INTEL_JITEVENTS!$USE_INTEL_JITEVENTS$ac_delim
+INTEL_JITEVENTS_INCDIR!$INTEL_JITEVENTS_INCDIR$ac_delim
+INTEL_JITEVENTS_LIBDIR!$INTEL_JITEVENTS_LIBDIR$ac_delim
 HAVE_PTHREAD!$HAVE_PTHREAD$ac_delim
 HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim
 MMAP_FILE!$MMAP_FILE$ac_delim
@@ -19925,7 +22257,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 88; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 92; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
@@ -20286,7 +22618,7 @@ do
     cat >>$CONFIG_STATUS <<_ACEOF
     # First, check the format of the line:
     cat >"\$tmp/defines.sed" <<\\CEOF
-/^[	 ]*#[	 ]*undef[	 ][	 ]*$ac_word_re[	 ]*/b def
+/^[	 ]*#[	 ]*undef[	 ][	 ]*$ac_word_re[	 ]*\$/b def
 /^[	 ]*#[	 ]*define[	 ][	 ]*$ac_word_re[(	 ]/b def
 b
 :def
@@ -20434,12 +22766,7 @@ if test "$no_recursion" != yes; then
   case $ac_arg in
   *\'*) ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
   esac
-  ac_sub_configure_args="'$ac_arg' $ac_sub_configure_args"
-
-  # Pass --silent
-  if test "$silent" = yes; then
-    ac_sub_configure_args="--silent $ac_sub_configure_args"
-  fi
+  ac_sub_configure_args="$ac_arg $ac_sub_configure_args"
 
   ac_popdir=`pwd`
   for ac_dir in : $subdirs; do test "x$ac_dir" = x: && continue
diff --git a/docs/AliasAnalysis.html b/docs/AliasAnalysis.html
index e65279c1deb1..c59f60df6d99 100644
--- a/docs/AliasAnalysis.html
+++ b/docs/AliasAnalysis.html
@@ -418,9 +418,8 @@ implementing, you just override the interfaces you can improve.</p>
 
 <div>
 
-<p>With only two special exceptions (the <tt><a
-href="#basic-aa">basicaa</a></tt> and <a href="#no-aa"><tt>no-aa</tt></a>
-passes) every alias analysis pass chains to another alias analysis
+<p>With only one special exception (the <a href="#no-aa"><tt>no-aa</tt></a>
+pass) every alias analysis pass chains to another alias analysis
 implementation (for example, the user can specify "<tt>-basicaa -ds-aa
 -licm</tt>" to get the maximum benefit from both alias
 analyses).  The alias analysis class automatically takes care of most of this
@@ -1061,7 +1060,7 @@ analysis directly.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2012-01-31 00:05:41 +0100 (Tue, 31 Jan 2012) $
 </address>
 
 </body>
diff --git a/docs/Bugpoint.html b/docs/Bugpoint.html
index a1de242f6c7f..d9cce0be6b80 100644
--- a/docs/Bugpoint.html
+++ b/docs/Bugpoint.html
@@ -232,7 +232,7 @@ non-obvious ways.  Here are some hints and tips:<p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2011-10-31 12:21:59 +0100 (Mon, 31 Oct 2011) $
 </address>
 
 </body>
diff --git a/docs/CFEBuildInstrs.html b/docs/CFEBuildInstrs.html
deleted file mode 100644
index ab10844a8e0e..000000000000
--- a/docs/CFEBuildInstrs.html
+++ /dev/null
@@ -1,29 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
-  <link rel="stylesheet" href="llvm.css" type="text/css" media="screen">
-  <title>Building the LLVM C/C++ Front-End</title>
-  <meta HTTP-EQUIV="REFRESH" CONTENT="3; URL=GCCFEBuildInstrs.html">
-</head>
-<body>
-<div class="doc_title">
-This page has moved <a href="GCCFEBuildInstrs.html">here</A>.
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2008-02-13 17:46:10 +0100 (Wed, 13 Feb 2008) $
-</address>
-
-</body>
-</html>
diff --git a/docs/CMake.html b/docs/CMake.html
index feb1db05cbaa..acc7fe9e8083 100644
--- a/docs/CMake.html
+++ b/docs/CMake.html
@@ -145,7 +145,7 @@
       text. Generator's names are case-sensitive. Example:</p>
 
     <div class="doc_code">
-      <p><tt>cmake -G "Visual Studio 8 2005" path/to/llvm/source/root</tt></p>
+      <p><tt>cmake -G "Visual Studio 9 2008" path/to/llvm/source/root</tt></p>
     </div>
 
     <p>For a given development platform there can be more than one
@@ -250,7 +250,7 @@
   <dd>Semicolon-separated list of targets to build, or <i>all</i> for
     building all targets. Case-sensitive. For Visual C++ defaults
     to <i>X86</i>. On the other cases defaults to <i>all</i>. Example:
-    <i>-DLLVM_TARGETS_TO_BUILD="X86;PowerPC;Alpha"</i>.</dd>
+    <i>-DLLVM_TARGETS_TO_BUILD="X86;PowerPC"</i>.</dd>
 
   <dt><b>LLVM_BUILD_TOOLS</b>:BOOL</dt>
   <dd>Build LLVM tools. Defaults to ON. Targets for building each tool
@@ -352,6 +352,24 @@
     Function Interface library. If the library or its headers are
     installed on a custom location, you can set the variables
     FFI_INCLUDE_DIR and FFI_LIBRARY_DIR. Defaults to OFF.</dd>
+
+  <dt><b>LLVM_CLANG_SOURCE_DIR</b>:PATH</dt>
+  <dd>Path to Clang's source directory. Defaults to tools/clang.
+    Clang will not be built when it is empty or it does not point valid
+    path.</dd>
+
+  <dt><b>LLVM_USE_OPROFILE</b>:BOOL</dt>
+  <dd> Enable building OProfile JIT support. Defaults to OFF</dd>
+
+  <dt><b>LLVM_USE_INTEL_JITEVENTS</b>:BOOL</dt>
+  <dd> Enable building support for Intel JIT Events API. Defaults to OFF</dd>
+
+  <dt><b>LLVM_INTEL_JITEVENTS_DIR</b>:PATH</dt>
+  <dd> Path to installation of Intel(R) VTune(TM) Amplifier XE 2011,
+    used to locate the <tt>jitprofiling</tt> library. Default =
+    <tt>%VTUNE_AMPLIFIER_XE_2011_DIR%</tt> (Windows)
+    | <tt>/opt/intel/vtune_amplifier_xe_2011</tt> (Linux) </dd>
+
 </dl>
 
 </div>
diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html
index 9d0370fd3cc8..a37c3dca0692 100644
--- a/docs/CodeGenerator.html
+++ b/docs/CodeGenerator.html
@@ -50,6 +50,7 @@
     <li><a href="#machinebasicblock">The <tt>MachineBasicBlock</tt>
                                      class</a></li>
     <li><a href="#machinefunction">The <tt>MachineFunction</tt> class</a></li>
+    <li><a href="#machineinstrbundle"><tt>MachineInstr Bundles</tt></a></li>
     </ul>
   </li>
   <li><a href="#mc">The "MC" Layer</a>
@@ -97,6 +98,14 @@
       <li><a href="#regAlloc_builtIn">Built in register allocators</a></li>
       </ul></li>
     <li><a href="#codeemit">Code Emission</a></li>
+    <li><a href="#vliw_packetizer">VLIW Packetizer</a>
+      <ul>
+      <li><a href="#vliw_mapping">Mapping from instructions to functional
+                 units</a></li>
+      <li><a href="#vliw_repr">How the packetization tables are
+                             generated and used</a></li>
+      </ul>
+    </li>
     </ul>
   </li>
   <li><a href="#nativeassembler">Implementing a Native Assembler</a></li>
@@ -700,6 +709,21 @@ ret
 
 <!-- _______________________________________________________________________ -->
 <h4>
+  <a name="callclobber">Call-clobbered registers</a>
+</h4>
+
+<div>
+
+<p>Some machine instructions, like calls, clobber a large number of physical
+   registers.  Rather than adding <code>&lt;def,dead&gt;</code> operands for
+   all of them, it is possible to use an <code>MO_RegisterMask</code> operand
+   instead.  The register mask operand holds a bit mask of preserved registers,
+   and everything else is considered to be clobbered by the instruction.  </p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
   <a name="ssa">Machine code in SSA form</a>
 </h4>
 
@@ -753,6 +777,90 @@ ret
 
 </div>
 
+<!-- ======================================================================= -->
+<h3>
+  <a name="machineinstrbundle"><tt>MachineInstr Bundles</tt></a>
+</h3>
+
+<div>
+
+<p>LLVM code generator can model sequences of instructions as MachineInstr
+   bundles. A MI bundle can model a VLIW group / pack which contains an
+   arbitrary number of parallel instructions. It can also be used to model
+   a sequential list of instructions (potentially with data dependencies) that
+   cannot be legally separated (e.g. ARM Thumb2 IT blocks).</p>
+
+<p>Conceptually a MI bundle is a MI with a number of other MIs nested within:
+</p>
+
+<div class="doc_code">
+<pre>
+--------------
+|   Bundle   | ---------
+--------------          \
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |
+--------------
+|   Bundle   | --------
+--------------         \
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |           ----------------
+       |           |      MI      |
+       |           ----------------
+       |                   |
+       |                  ...
+       |
+--------------
+|   Bundle   | --------
+--------------         \
+       |
+      ...
+</pre>
+</div>
+
+<p> MI bundle support does not change the physical representations of
+    MachineBasicBlock and MachineInstr. All the MIs (including top level and
+    nested ones) are stored as sequential list of MIs. The "bundled" MIs are
+    marked with the 'InsideBundle' flag. A top level MI with the special BUNDLE
+    opcode is used to represent the start of a bundle. It's legal to mix BUNDLE
+    MIs with indiviual MIs that are not inside bundles nor represent bundles.
+</p>
+
+<p> MachineInstr passes should operate on a MI bundle as a single unit. Member
+    methods have been taught to correctly handle bundles and MIs inside bundles.
+    The MachineBasicBlock iterator has been modified to skip over bundled MIs to
+    enforce the bundle-as-a-single-unit concept. An alternative iterator
+    instr_iterator has been added to MachineBasicBlock to allow passes to
+    iterate over all of the MIs in a MachineBasicBlock, including those which
+    are nested inside bundles. The top level BUNDLE instruction must have the
+    correct set of register MachineOperand's that represent the cumulative
+    inputs and outputs of the bundled MIs.</p>
+
+<p> Packing / bundling of MachineInstr's should be done as part of the register
+    allocation super-pass. More specifically, the pass which determines what
+    MIs should be bundled together must be done after code generator exits SSA
+    form (i.e. after two-address pass, PHI elimination, and copy coalescing).
+    Bundles should only be finalized (i.e. adding BUNDLE MIs and input and
+    output register MachineOperands) after virtual registers have been
+    rewritten into physical registers. This requirement eliminates the need to
+    add virtual register operands to BUNDLE instructions which would effectively
+    double the virtual register def and use lists.</p>
+
+</div>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -2001,6 +2109,73 @@ to implement an assembler for your target.</p>
 
 </div>
 
+<!-- ======================================================================= -->
+<h3>
+  <a name="vliw_packetizer">VLIW Packetizer</a>
+</h3>
+
+<div>
+
+<p>In a Very Long Instruction Word (VLIW) architecture, the compiler is
+   responsible for mapping instructions to functional-units available on
+   the architecture. To that end, the compiler creates groups of instructions
+   called <i>packets</i> or <i>bundles</i>. The VLIW packetizer in LLVM is
+   a target-independent mechanism to enable the packetization of machine
+   instructions.</p>
+
+<!-- _______________________________________________________________________ -->
+
+<h4>
+  <a name="vliw_mapping">Mapping from instructions to functional units</a>
+</h4>
+
+<div>
+
+<p>Instructions in a VLIW target can typically be mapped to multiple functional
+units. During the process of packetizing, the compiler must be able to reason
+about whether an instruction can be added to a packet. This decision can be
+complex since the compiler has to examine all possible mappings of instructions
+to functional units. Therefore to alleviate compilation-time complexity, the
+VLIW packetizer parses the instruction classes of a target and generates tables
+at compiler build time. These tables can then be queried by the provided
+machine-independent API to determine if an instruction can be accommodated in a
+packet.</p>
+</div>
+
+<!-- ======================================================================= -->
+<h4>
+  <a name="vliw_repr">
+    How the packetization tables are generated and used
+  </a>
+</h4>
+
+<div>
+
+<p>The packetizer reads instruction classes from a target's itineraries and
+creates a deterministic finite automaton (DFA) to represent the state of a
+packet. A DFA consists of three major elements: inputs, states, and
+transitions. The set of inputs for the generated DFA represents the instruction
+being added to a packet. The states represent the possible consumption
+of functional units by instructions in a packet. In the DFA, transitions from
+one state to another occur on the addition of an instruction to an existing
+packet. If there is a legal mapping of functional units to instructions, then
+the DFA contains a corresponding transition. The absence of a transition
+indicates that a legal mapping does not exist and that the instruction cannot
+be added to the packet.</p>
+
+<p>To generate tables for a VLIW target, add <i>Target</i>GenDFAPacketizer.inc
+as a target to the Makefile in the target directory. The exported API provides
+three functions: <tt>DFAPacketizer::clearResources()</tt>,
+<tt>DFAPacketizer::reserveResources(MachineInstr *MI)</tt>, and
+<tt>DFAPacketizer::canReserveResources(MachineInstr *MI)</tt>. These functions
+allow a target packetizer to add an instruction to an existing packet and to
+check whether an instruction can be added to a packet. See
+<tt>llvm/CodeGen/DFAPacketizer.h</tt> for more information.</p>
+
+</div>
+
+</div>
+
 </div>
 
 <!-- *********************************************************************** -->
@@ -2212,16 +2387,14 @@ is the key:</p>
   <tr>
     <th>Feature</th>
     <th>ARM</th>
-    <th>Alpha</th>
-    <th>Blackfin</th>
     <th>CellSPU</th>
+    <th>Hexagon</th>
     <th>MBlaze</th>
     <th>MSP430</th>
     <th>Mips</th>
     <th>PTX</th>
     <th>PowerPC</th>
     <th>Sparc</th>
-    <th>SystemZ</th>
     <th>X86</th>
     <th>XCore</th>
   </tr>
@@ -2229,16 +2402,14 @@ is the key:</p>
 <tr>
   <td><a href="#feat_reliable">is generally reliable</a></td>
   <td class="yes"></td> <!-- ARM -->
-  <td class="unknown"></td> <!-- Alpha -->
-  <td class="no"></td> <!-- Blackfin -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- Hexagon -->
   <td class="no"></td> <!-- MBlaze -->
   <td class="unknown"></td> <!-- MSP430 -->
-  <td class="no"></td> <!-- Mips -->
+  <td class="yes"></td> <!-- Mips -->
   <td class="no"></td> <!-- PTX -->
   <td class="yes"></td> <!-- PowerPC -->
   <td class="yes"></td> <!-- Sparc -->
-  <td class="unknown"></td> <!-- SystemZ -->
   <td class="yes"></td> <!-- X86 -->
   <td class="unknown"></td> <!-- XCore -->
 </tr>
@@ -2246,16 +2417,14 @@ is the key:</p>
 <tr>
   <td><a href="#feat_asmparser">assembly parser</a></td>
   <td class="no"></td> <!-- ARM -->
-  <td class="no"></td> <!-- Alpha -->
-  <td class="no"></td> <!-- Blackfin -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- Hexagon -->
   <td class="yes"></td> <!-- MBlaze -->
   <td class="no"></td> <!-- MSP430 -->
   <td class="no"></td> <!-- Mips -->
   <td class="no"></td> <!-- PTX -->
   <td class="no"></td> <!-- PowerPC -->
   <td class="no"></td> <!-- Sparc -->
-  <td class="no"></td> <!-- SystemZ -->
   <td class="yes"></td> <!-- X86 -->
   <td class="no"></td> <!-- XCore -->
 </tr>
@@ -2263,16 +2432,14 @@ is the key:</p>
 <tr>
   <td><a href="#feat_disassembler">disassembler</a></td>
   <td class="yes"></td> <!-- ARM -->
-  <td class="no"></td> <!-- Alpha -->
-  <td class="no"></td> <!-- Blackfin -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- Hexagon -->
   <td class="yes"></td> <!-- MBlaze -->
   <td class="no"></td> <!-- MSP430 -->
   <td class="no"></td> <!-- Mips -->
   <td class="no"></td> <!-- PTX -->
   <td class="no"></td> <!-- PowerPC -->
   <td class="no"></td> <!-- Sparc -->
-  <td class="no"></td> <!-- SystemZ -->
   <td class="yes"></td> <!-- X86 -->
   <td class="no"></td> <!-- XCore -->
 </tr>
@@ -2280,16 +2447,14 @@ is the key:</p>
 <tr>
   <td><a href="#feat_inlineasm">inline asm</a></td>
   <td class="yes"></td> <!-- ARM -->
-  <td class="unknown"></td> <!-- Alpha -->
-  <td class="yes"></td> <!-- Blackfin -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- Hexagon -->
   <td class="yes"></td> <!-- MBlaze -->
   <td class="unknown"></td> <!-- MSP430 -->
   <td class="no"></td> <!-- Mips -->
   <td class="unknown"></td> <!-- PTX -->
   <td class="yes"></td> <!-- PowerPC -->
   <td class="unknown"></td> <!-- Sparc -->
-  <td class="unknown"></td> <!-- SystemZ -->
   <td class="yes"></td> <!-- X86 -->
   <td class="unknown"></td> <!-- XCore -->
 </tr>
@@ -2297,16 +2462,14 @@ is the key:</p>
 <tr>
   <td><a href="#feat_jit">jit</a></td>
   <td class="partial"><a href="#feat_jit_arm">*</a></td> <!-- ARM -->
-  <td class="no"></td> <!-- Alpha -->
-  <td class="no"></td> <!-- Blackfin -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- Hexagon -->
   <td class="no"></td> <!-- MBlaze -->
   <td class="unknown"></td> <!-- MSP430 -->
-  <td class="no"></td> <!-- Mips -->
+  <td class="yes"></td> <!-- Mips -->
   <td class="unknown"></td> <!-- PTX -->
   <td class="yes"></td> <!-- PowerPC -->
   <td class="unknown"></td> <!-- Sparc -->
-  <td class="unknown"></td> <!-- SystemZ -->
   <td class="yes"></td> <!-- X86 -->
   <td class="unknown"></td> <!-- XCore -->
 </tr>
@@ -2314,16 +2477,14 @@ is the key:</p>
 <tr>
   <td><a href="#feat_objectwrite">.o&nbsp;file writing</a></td>
   <td class="no"></td> <!-- ARM -->
-  <td class="no"></td> <!-- Alpha -->
-  <td class="no"></td> <!-- Blackfin -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- Hexagon -->
   <td class="yes"></td> <!-- MBlaze -->
   <td class="no"></td> <!-- MSP430 -->
   <td class="no"></td> <!-- Mips -->
   <td class="no"></td> <!-- PTX -->
   <td class="no"></td> <!-- PowerPC -->
   <td class="no"></td> <!-- Sparc -->
-  <td class="no"></td> <!-- SystemZ -->
   <td class="yes"></td> <!-- X86 -->
   <td class="no"></td> <!-- XCore -->
 </tr>
@@ -2331,20 +2492,33 @@ is the key:</p>
 <tr>
   <td><a href="#feat_tailcall">tail calls</a></td>
   <td class="yes"></td> <!-- ARM -->
-  <td class="unknown"></td> <!-- Alpha -->
-  <td class="no"></td> <!-- Blackfin -->
   <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- Hexagon -->
   <td class="no"></td> <!-- MBlaze -->
   <td class="unknown"></td> <!-- MSP430 -->
   <td class="no"></td> <!-- Mips -->
   <td class="unknown"></td> <!-- PTX -->
   <td class="yes"></td> <!-- PowerPC -->
   <td class="unknown"></td> <!-- Sparc -->
-  <td class="unknown"></td> <!-- SystemZ -->
   <td class="yes"></td> <!-- X86 -->
   <td class="unknown"></td> <!-- XCore -->
 </tr>
 
+<tr>
+  <td><a href="#feat_segstacks">segmented stacks</a></td>
+  <td class="no"></td> <!-- ARM -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- Hexagon -->
+  <td class="no"></td> <!-- MBlaze -->
+  <td class="no"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="no"></td> <!-- PTX -->
+  <td class="no"></td> <!-- PowerPC -->
+  <td class="no"></td> <!-- Sparc -->
+  <td class="partial"><a href="#feat_segstacks_x86">*</a></td> <!-- X86 -->
+  <td class="no"></td> <!-- XCore -->
+</tr>
+
 
 </table>
 
@@ -2428,6 +2602,22 @@ more more details</a>.</p>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<h4 id="feat_segstacks">Segmented Stacks</h4>
+
+<div>
+
+<p>This box indicates whether the target supports segmented stacks. This
+replaces the traditional large C stack with many linked segments. It
+is compatible with the <a href="http://gcc.gnu.org/wiki/SplitStacks">gcc
+implementation</a> used by the Go front end.</p>
+
+<p id="feat_segstacks_x86">Basic support exists on the X86 backend. Currently
+vararg doesn't work and the object files are not marked the way the gold
+linker expects, but simple Go programs can be built by dragonegg.</p>
+
+</div>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -2992,7 +3182,7 @@ MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:54 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2012-03-27 13:25:16 +0200 (Tue, 27 Mar 2012) $
 </address>
 
 </body>
diff --git a/docs/CodingStandards.html b/docs/CodingStandards.html
index 3ccbfc9c6235..847ac4c96b78 100644
--- a/docs/CodingStandards.html
+++ b/docs/CodingStandards.html
@@ -31,6 +31,7 @@
               Errors</a></li>
           <li><a href="#ci_portable_code">Write Portable Code</a></li>
           <li><a href="#ci_rtti_exceptions">Do not use RTTI or Exceptions</a></li>
+          <li><a href="#ci_static_ctors">Do not use Static Constructors</a></li>
           <li><a href="#ci_class_struct">Use of <tt>class</tt>/<tt>struct</tt> Keywords</a></li>
         </ol></li>
     </ol></li>
@@ -84,17 +85,16 @@
 
 
 <!-- *********************************************************************** -->
-<h2>
-  <a name="introduction">Introduction</a>
-</h2>
+<h2><a name="introduction">Introduction</a></h2>
 <!-- *********************************************************************** -->
 
 <div>
 
 <p>This document attempts to describe a few coding standards that are being used
 in the LLVM source tree.  Although no coding standards should be regarded as
-absolute requirements to be followed in all instances, coding standards can be
-useful.</p>
+absolute requirements to be followed in all instances, coding standards are
+particularly important for large-scale code bases that follow a library-based
+design (like LLVM).</p>
 
 <p>This document intentionally does not prescribe fixed standards for religious
 issues such as brace placement and space usage.  For issues like this, follow
@@ -102,14 +102,27 @@ the golden rule:</p>
 
 <blockquote>
 
-<p><b><a name="goldenrule">If you are adding a significant body of source to a
-project, feel free to use whatever style you are most comfortable with.  If you
-are extending, enhancing, or bug fixing already implemented code, use the style
-that is already being used so that the source is uniform and easy to
-follow.</a></b></p>
+<p><b><a name="goldenrule">If you are extending, enhancing, or bug fixing
+already implemented code, use the style that is already being used so that the
+source is uniform and easy to follow.</a></b></p>
 
 </blockquote>
-
+  
+<p>Note that some code bases (e.g. libc++) have really good reasons to deviate
+from the coding standards.  In the case of libc++, this is because the naming
+and other conventions are dictated by the C++ standard.  If you think there is
+a specific good reason to deviate from the standards here, please bring it up
+on the LLVMdev mailing list.</p>
+
+<p>There are some conventions that are not uniformly followed in the code base
+(e.g. the naming convention).  This is because they are relatively new, and a
+lot of code was written before they were put in place.  Our long term goal is
+for the entire codebase to follow the convention, but we explicitly <em>do 
+not</em> want patches that do large-scale reformating of existing code.  OTOH,
+it is reasonable to rename the methods of a class if you're about to change it
+in some other way.  Just do the reformating as a separate commit from the
+functionality change. </p>
+  
 <p>The ultimate goal of these guidelines is the increase readability and
 maintainability of our common source base. If you have suggestions for topics to
 be included, please mail them to <a
@@ -140,11 +153,11 @@ href="mailto:sabre@nondot.org">Chris</a>.</p>
 <div>
 
 <p>Comments are one critical part of readability and maintainability.  Everyone
-knows they should comment, so should you.  When writing comments, write them as
-English prose, which means they should use proper capitalization, punctuation,
-etc.  Although we all should probably
-comment our code more than we do, there are a few very critical places that
-documentation is very useful:</p>
+knows they should comment their code, and so should you.  When writing comments,
+write them as English prose, which means they should use proper capitalization,
+punctuation, etc.  Aim to describe what a code is trying to do and why, not
+"how" it does it at a micro level. Here are a few critical things to
+document:</p>
 
 <h5>File Headers</h5>
 
@@ -152,9 +165,7 @@ documentation is very useful:</p>
 
 <p>Every source file should have a header on it that describes the basic 
 purpose of the file.  If a file does not have a header, it should not be 
-checked into Subversion.  Most source trees will probably have a standard
-file header format.  The standard format for the LLVM source tree looks like
-this:</p>
+checked into the tree.  The standard header looks like this:</p>
 
 <div class="doc_code">
 <pre>
@@ -197,9 +208,8 @@ included, as well as any notes or "gotchas" in the code to watch out for.</p>
 
 <p>Classes are one fundamental part of a good object oriented design.  As such,
 a class definition should have a comment block that explains what the class is
-used for... if it's not obvious.  If it's so completely obvious your grandma
-could figure it out, it's probably safe to leave it out.  Naming classes
-something sane goes a long ways towards avoiding writing documentation.</p>
+used for and how it works.  Every non-trivial class is expected to have a
+doxygen comment block.</p>
 
 
 <h5>Method information</h5>
@@ -210,8 +220,7 @@ something sane goes a long ways towards avoiding writing documentation.</p>
 documented properly.  A quick note about what it does and a description of the
 borderline behaviour is all that is necessary here (unless something
 particularly tricky or insidious is going on).  The hope is that people can
-figure out how to use your interfaces without reading the code itself... that is
-the goal metric.</p>
+figure out how to use your interfaces without reading the code itself.</p>
 
 <p>Good things to talk about here are what happens when something unexpected
 happens: does the method return null?  Abort?  Format your hard disk?</p>
@@ -397,14 +406,6 @@ if ((V = getValue())) {
 <p>which shuts <tt>gcc</tt> up.  Any <tt>gcc</tt> warning that annoys you can
 be fixed by massaging the code appropriately.</p>
 
-<p>These are the <tt>gcc</tt> warnings that I prefer to enable:</p>
-
-<div class="doc_code">
-<pre>
--Wall -Winline -W -Wwrite-strings -Wno-unused
-</pre>
-</div>
-
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -449,6 +450,51 @@ than <tt>dynamic_cast&lt;&gt;</tt>.</p>
 
 <!-- _______________________________________________________________________ -->
 <h4>
+<a name="ci_static_ctors">Do not use Static Constructors</a>
+</h4>
+<div>
+
+<p>Static constructors and destructors (e.g. global variables whose types have
+a constructor or destructor) should not be added to the code base, and should be
+removed wherever possible.  Besides <a 
+href="http://yosefk.com/c++fqa/ctors.html#fqa-10.12">well known problems</a>
+where the order of initialization is undefined between globals in different
+source files, the entire concept of static constructors is at odds with the
+common use case of LLVM as a library linked into a larger application.</p>
+  
+<p>Consider the use of LLVM as a JIT linked into another application (perhaps
+for <a href="http://llvm.org/Users.html">OpenGL, custom languages</a>,
+<a href="http://llvm.org/devmtg/2010-11/Gritz-OpenShadingLang.pdf">shaders in
+movies</a>, etc).  Due to the design of static constructors, they must be
+executed at startup time of the entire application, regardless of whether or
+how LLVM is used in that larger application.  There are two problems with
+this:</p>
+  
+<ol>
+  <li>The time to run the static constructors impacts startup time of
+    applications &mdash; a critical time for GUI apps, among others.</li>
+  
+  <li>The static constructors cause the app to pull many extra pages of memory
+    off the disk: both the code for the constructor in each <tt>.o</tt> file and
+    the small amount of data that gets touched. In addition, touched/dirty pages
+    put more pressure on the VM system on low-memory machines.</li>
+</ol>
+
+<p>We would really like for there to be zero cost for linking in an additional
+LLVM target or other library into an application, but static constructors
+violate this goal.</p>
+  
+<p>That said, LLVM unfortunately does contain static constructors.  It would be
+a <a href="http://llvm.org/PR11944">great project</a> for someone to purge all
+static constructors from LLVM, and then enable the 
+<tt>-Wglobal-constructors</tt> warning flag (when building with Clang) to ensure
+we do not regress in the future.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
 <a name="ci_class_struct">Use of <tt>class</tt> and <tt>struct</tt> Keywords</a>
 </h4>
 <div>
@@ -1151,22 +1197,10 @@ prefer it.</p>
 <div>
 
 <p>The use of <tt>#include &lt;iostream&gt;</tt> in library files is
-hereby <b><em>forbidden</em></b>. The primary reason for doing this is to
-support clients using LLVM libraries as part of larger systems. In particular,
-we statically link LLVM into some dynamic libraries. Even if LLVM isn't used,
-the static constructors are run whenever an application starts up that uses the
-dynamic library. There are two problems with this:</p>
-
-<ol>
-  <li>The time to run the static c'tors impacts startup time of applications
-      &mdash; a critical time for GUI apps.</li>
-
-  <li>The static c'tors cause the app to pull many extra pages of memory off the
-      disk: both the code for the static c'tors in each <tt>.o</tt> file and the
-      small amount of data that gets touched. In addition, touched/dirty pages
-      put more pressure on the VM system on low-memory machines.</li>
-</ol>
-
+hereby <b><em>forbidden</em></b>, because many common implementations
+transparently inject a <a href="#ci_static_ctors">static constructor</a> into
+every translation unit that includes it.</p>
+  
 <p>Note that using the other stream headers (<tt>&lt;sstream&gt;</tt> for
 example) is not problematic in this regard &mdash;
 just <tt>&lt;iostream&gt;</tt>. However, <tt>raw_ostream</tt> provides various
@@ -1527,7 +1561,7 @@ something.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2012-03-27 13:25:16 +0200 (Tue, 27 Mar 2012) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/Makefile b/docs/CommandGuide/Makefile
index 2c2d0760e799..3f9f60b8e7fb 100644
--- a/docs/CommandGuide/Makefile
+++ b/docs/CommandGuide/Makefile
@@ -49,7 +49,7 @@ MAN  := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_MAN_DIR)%.1, $(POD))
 PS   := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_PS_DIR)%.ps, $(POD))
 
 # The set of man pages we will not install
-NO_INSTALL_MANS = $(DST_MAN_DIR)FileCheck.1
+NO_INSTALL_MANS = $(DST_MAN_DIR)FileCheck.1 $(DST_MAN_DIR)llvm-build.1
 
 # The set of man pages that we will install
 INSTALL_MANS = $(filter-out $(NO_INSTALL_MANS), $(MAN))
diff --git a/docs/CommandGuide/index.html b/docs/CommandGuide/index.html
index 3e4e2200c95b..74ac0048d3f5 100644
--- a/docs/CommandGuide/index.html
+++ b/docs/CommandGuide/index.html
@@ -72,6 +72,12 @@ options) arguments to the tool you are interested in.</p>
 <li><a href="/cmds/llvm-diff.html"><b>llvm-diff</b></a> -
     structurally compare two modules</li>
 
+<li><a href="/cmds/llvm-cov.html"><b>llvm-cov</b></a> -
+    emit coverage information</li>
+
+<li><a href="/cmds/llvm-stress.html"><b>llvm-stress</b></a> -
+    generate random .ll files to fuzz different llvm components</li>
+
 </ul>
 
 </div>
@@ -129,7 +135,7 @@ options) arguments to the tool you are interested in.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-09-20 20:24:04 +0200 (Tue, 20 Sep 2011) $
+  Last modified: $Date: 2012-02-26 09:35:53 +0100 (Sun, 26 Feb 2012) $
 </address>
 
 </body>
diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod
index faf4811dd18c..81fc2c918046 100644
--- a/docs/CommandGuide/lit.pod
+++ b/docs/CommandGuide/lit.pod
@@ -28,7 +28,7 @@ By default B<lit> will use a succinct progress display and will only print
 summary information for test failures. See L<"OUTPUT OPTIONS"> for options
 controlling the B<lit> progress display and output.
 
-B<lit> also includes a number of options for controlling how tests are exected
+B<lit> also includes a number of options for controlling how tests are executed
 (specific features may depend on the particular test format). See L<"EXECUTION
 OPTIONS"> for more information.
 
@@ -37,7 +37,7 @@ the options specified on the command line, see L<"SELECTION OPTIONS"> for
 more information.
 
 Users interested in the B<lit> architecture or designing a B<lit> testing
-implementation should see L<"LIT ARCHITECTURE">
+implementation should see L<"LIT INFRASTRUCTURE">
 
 =head1 GENERAL OPTIONS
 
@@ -159,8 +159,8 @@ other results are not collated in any reasonable fashion.
 =head1 EXIT STATUS
 
 B<lit> will exit with an exit code of 1 if there are any FAIL or XPASS
-results. Otherwise, it will exit with the status 0. Other exit codes used for
-non-test related failures (for example a user error or an internal program
+results. Otherwise, it will exit with the status 0. Other exit codes are used
+for non-test related failures (for example a user error or an internal program
 error).
 
 =head1 TEST DISCOVERY
@@ -208,7 +208,7 @@ suite.
 
 The test succeeded, but it was expected to fail. This is used for tests which
 were specified as expected to fail, but are now succeeding (generally because
-the feautre they test was broken and has been fixed).
+the feature they test was broken and has been fixed).
 
 =item B<FAIL>
 
@@ -227,7 +227,7 @@ which can report unsupported tests.
 =back
 
 Depending on the test format tests may produce additional information about
-their status (generally only for failures). See the L<Output|"LIT OUTPUT">
+their status (generally only for failures). See the L<Output|"OUTPUT OPTIONS">
 section for more information.
 
 =head1 LIT INFRASTRUCTURE
@@ -247,7 +247,7 @@ suite>. Test suites serve to define the format of the tests they contain, the
 logic for finding those tests, and any additional information to run the tests.
 
 B<lit> identifies test suites as directories containing I<lit.cfg> or
-I<lit.site.cfg> files (see also B<--config-prefix>. Test suites are initially
+I<lit.site.cfg> files (see also B<--config-prefix>). Test suites are initially
 discovered by recursively searching up the directory hierarchy for all the input
 files passed on the command line. You can use B<--show-suites> to display the
 discovered test suites at startup.
@@ -283,13 +283,13 @@ builds this is the directory that will be scanned for tests.
 
 B<test_exec_root> For out-of-dir builds, the path to the test suite root inside
 the object directory. This is where tests will be run and temporary output files
-places.
+placed.
 
 B<environment> A dictionary representing the environment to use when executing
 tests in the suite.
 
 B<suffixes> For B<lit> test formats which scan directories for tests, this
-variable as a list of suffixes to identify test files. Used by: I<ShTest>,
+variable is a list of suffixes to identify test files. Used by: I<ShTest>,
 I<TclTest>.
 
 B<substitutions> For B<lit> test formats which substitute variables into a test
@@ -301,6 +301,9 @@ reported as unsupported. Used by: I<ShTest>, I<TclTest>.
 B<parent> The parent configuration, this is the config object for the directory
 containing the test suite, or None.
 
+B<root> The root configuration. This is the top-most B<lit> configuration in
+the project.
+
 B<on_clone> The config is actually cloned for every subdirectory inside a test
 suite, to allow local configuration on a per-directory basis. The I<on_clone>
 variable can be set to a Python function which will be called whenever a
@@ -315,7 +318,7 @@ directory being scanned.
 
 Once test suites are located, B<lit> recursively traverses the source directory
 (following I<test_src_root>) looking for tests. When B<lit> enters a
-sub-directory, it first checks to see if a nest test suite is defined in that
+sub-directory, it first checks to see if a nested test suite is defined in that
 directory. If so, it loads that test suite recursively, otherwise it
 instantiates a local test config for the directory (see L<"LOCAL CONFIGURATION
 FILES">).
@@ -338,6 +341,53 @@ define subdirectories of optional tests, or to change other configuration
 parameters -- for example, to change the test format, or the suffixes which
 identify test files.
 
+=head2 TEST RUN OUTPUT FORMAT
+
+The b<lit> output for a test run conforms to the following schema, in both short
+and verbose modes (although in short mode no PASS lines will be shown). This
+schema has been chosen to be relatively easy to reliably parse by a machine (for
+example in buildbot log scraping), and for other tools to generate.
+
+Each test result is expected to appear on a line that matches:
+
+<result code>: <test name> (<progress info>)
+
+where <result-code> is a standard test result such as PASS, FAIL, XFAIL, XPASS,
+UNRESOLVED, or UNSUPPORTED. The performance result codes of IMPROVED and
+REGRESSED are also allowed.
+
+The <test name> field can consist of an arbitrary string containing no newline.
+
+The <progress info> field can be used to report progress information such as
+(1/300) or can be empty, but even when empty the parentheses are required.
+
+Each test result may include additional (multiline) log information in the
+following format.
+
+<log delineator> TEST '(<test name>)' <trailing delineator>
+... log message ...
+<log delineator>
+
+where <test name> should be the name of a preceeding reported test, <log
+delineator> is a string of '*' characters I<at least> four characters long (the
+recommended length is 20), and <trailing delineator> is an arbitrary (unparsed)
+string.
+
+The following is an example of a test run output which consists of four tests A,
+B, C, and D, and a log message for the failing test C.
+
+=head3 Example Test Run Output Listing
+
+PASS: A (1 of 4)
+PASS: B (2 of 4)
+FAIL: C (3 of 4)
+******************** TEST 'C' FAILED ********************
+Test 'C' failed as a result of exit code 1.
+********************
+PASS: D (4 of 4)
+
+=back
+
 =head2 LIT EXAMPLE TESTS
 
 The B<lit> distribution contains several example implementations of test suites
diff --git a/docs/CommandGuide/llc.pod b/docs/CommandGuide/llc.pod
index 50b45c8d5a2a..35abdaeb2b15 100644
--- a/docs/CommandGuide/llc.pod
+++ b/docs/CommandGuide/llc.pod
@@ -45,7 +45,7 @@ Print a summary of command line options.
 =item B<-O>=I<uint>
 
 Generate code at different optimization levels. These correspond to the I<-O0>,
-I<-O1>, I<-O2>, I<-O3>, and I<-O4> optimization levels used by B<llvm-gcc> and
+I<-O1>, I<-O2>, and I<-O3> optimization levels used by B<llvm-gcc> and
 B<clang>.
 
 =item B<-mtriple>=I<target triple>
diff --git a/docs/CommandGuide/llvm-build.pod b/docs/CommandGuide/llvm-build.pod
new file mode 100644
index 000000000000..14e08cb6299b
--- /dev/null
+++ b/docs/CommandGuide/llvm-build.pod
@@ -0,0 +1,86 @@
+=pod
+
+=head1 NAME
+
+llvm-build - LLVM Project Build Utility
+
+=head1 SYNOPSIS
+
+B<llvm-build> [I<options>]
+
+=head1 DESCRIPTION
+
+B<llvm-build> is a tool for working with LLVM projects that use the LLVMBuild
+system for describing their components.
+
+At heart, B<llvm-build> is responsible for loading, verifying, and manipulating
+the project's component data. The tool is primarily designed for use in
+implementing build systems and tools which need access to the project structure
+information.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-h>, B<--help>
+
+Print the builtin program help.
+
+=item B<--source-root>=I<PATH>
+
+If given, load the project at the given source root path. If this option is not
+given, the location of the project sources will be inferred from the location of
+the B<llvm-build> script itself.
+
+=item B<--print-tree>
+
+Print the component tree for the project.
+
+=item B<--write-library-table>
+
+Write out the C++ fragment which defines the components, library names, and
+required libraries. This C++ fragment is built into L<llvm-config|llvm-config>
+in order to provide clients with the list of required libraries for arbitrary
+component combinations.
+
+=item B<--write-llvmbuild>
+
+Write out new I<LLVMBuild.txt> files based on the loaded components. This is
+useful for auto-upgrading the schema of the files. B<llvm-build> will try to a
+limited extent to preserve the comments which were written in the original
+source file, although at this time it only preserves block comments that preceed
+the section names in the I<LLVMBuild> files.
+
+=item B<--write-cmake-fragment>
+
+Write out the LLVMBuild in the form of a CMake fragment, so it can easily be
+consumed by the CMake based build system. The exact contents and format of this
+file are closely tied to how LLVMBuild is integrated with CMake, see LLVM's
+top-level CMakeLists.txt.
+
+=item B<--write-make-fragment>
+
+Write out the LLVMBuild in the form of a Makefile fragment, so it can easily be
+consumed by a Make based build system. The exact contents and format of this
+file are closely tied to how LLVMBuild is integrated with the Makefiles, see
+LLVM's Makefile.rules.
+
+=item B<--llvmbuild-source-root>=I<PATH>
+
+If given, expect the I<LLVMBuild> files for the project to be rooted at the
+given path, instead of inside the source tree itself. This option is primarily
+designed for use in conjunction with B<--write-llvmbuild> to test changes to
+I<LLVMBuild> schema.
+
+=back
+
+=head1 EXIT STATUS
+
+B<llvm-build> exits with 0 if operation was successful. Otherwise, it will exist
+with a non-zero value.
+
+=head1 AUTHOR
+
+Maintained by the LLVM Team (L<http://llvm.org/>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-cov.pod b/docs/CommandGuide/llvm-cov.pod
new file mode 100644
index 000000000000..e8ff68311750
--- /dev/null
+++ b/docs/CommandGuide/llvm-cov.pod
@@ -0,0 +1,45 @@
+=pod
+
+=head1 NAME
+
+llvm-cov - emit coverage information
+
+=head1 SYNOPSIS
+
+B<llvm-cov> [-gcno=filename] [-gcda=filename] [dump]
+
+=head1 DESCRIPTION
+
+The experimental B<llvm-cov> tool reads in description file generated by compiler 
+and coverage data file generated by instrumented program. This program assumes 
+that the description and data file uses same format as gcov files. 
+
+=head1 OPTIONS
+
+=over
+
+=item B<-gcno=filename]
+
+This option selects input description file generated by compiler while instrumenting
+program.
+
+=item B<-gcda=filename]
+
+This option selects coverage data file generated by instrumented compiler.
+
+=item B<-dump>
+
+This options enables output dump that is suitable for a developer to help debug
+B<llvm-cov> itself.
+
+=back
+
+=head1 EXIT STATUS
+
+B<llvm-cov> returns 1 if it cannot read input files. Otherwise, it exits with zero.
+
+=head1 AUTHOR
+
+B<llvm-cov> is maintained by the LLVM Team (L<http://llvm.org/>).
+
+=cut
diff --git a/docs/CommandGuide/llvm-stress.pod b/docs/CommandGuide/llvm-stress.pod
new file mode 100644
index 000000000000..92083d2d2bc7
--- /dev/null
+++ b/docs/CommandGuide/llvm-stress.pod
@@ -0,0 +1,42 @@
+=pod
+
+=head1 NAME
+
+llvm-stress - generate random .ll files
+
+=head1 SYNOPSIS
+
+B<llvm-cov> [-gcno=filename] [-gcda=filename] [dump]
+
+=head1 DESCRIPTION
+
+The B<llvm-stress> tool is used to generate random .ll files that can be used to
+test different components of LLVM.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-o> I<filename>
+
+Specify the output filename.
+
+=item B<-size> I<size>
+
+Specify the size of the generated .ll file.
+
+=item B<-seed> I<seed>
+
+Specify the seed to be used for the randomly generated instructions.
+
+=back
+
+=head1 EXIT STATUS
+
+B<llvm-stress> returns 0.
+
+=head1 AUTHOR
+
+B<llvm-stress> is maintained by the LLVM Team (L<http://llvm.org/>).
+
+=cut
diff --git a/docs/CommandGuide/tblgen.pod b/docs/CommandGuide/tblgen.pod
index fe1be5ecfa3c..180bcc1769e6 100644
--- a/docs/CommandGuide/tblgen.pod
+++ b/docs/CommandGuide/tblgen.pod
@@ -41,6 +41,10 @@ Specify where to find other target description files for inclusion. The
 F<directory> value should be a full or partial path to a directory that contains
 target description files.
 
+=item B<-asmparsernum> F<N>
+
+Make -gen-asm-parser emit assembly writer number F<N>.
+
 =item B<-asmwriternum> F<N>
 
 Make -gen-asm-writer emit assembly writer number F<N>.
@@ -57,38 +61,50 @@ Print all records to standard output (default).
 
 Print enumeration values for a class
 
-=item B<-gen-emitter>
+=item B<-print-sets>
 
-Generate machine code emitter.
+Print expanded sets for testing DAG exprs.
 
-=item B<-gen-register-enums>
+=item B<-gen-emitter>
 
-Generate the enumeration values for all registers.
+Generate machine code emitter.
 
-=item B<-gen-register-desc>
+=item B<-gen-register-info>
 
-Generate a register info description for each register.
+Generate registers and register classes info.
 
-=item B<-gen-register-desc-header>
+=item B<-gen-instr-info>
 
-Generate a register info description header for each register.
+Generate instruction descriptions.
 
-=item B<-gen-instr-enums>
+=item B<-gen-asm-writer>
 
-Generate enumeration values for instructions.
+Generate the assembly writer.
 
-=item B<-gen-instr-desc>
+=item B<-gen-disassembler>
 
-Generate instruction descriptions.
+Generate disassembler.
 
-=item B<-gen-asm-writer>
+=item B<-gen-pseudo-lowering>
 
-Generate the assembly writer.
+Generate pseudo instruction lowering.
 
 =item B<-gen-dag-isel>
 
 Generate a DAG (Directed Acycle Graph) instruction selector.
 
+=item B<-gen-asm-matcher>
+
+Generate assembly instruction matcher.
+
+=item B<-gen-dfa-packetizer>
+
+Generate DFA Packetizer for VLIW targets.
+
+=item B<-gen-fast-isel>
+
+Generate a "fast" instruction selector.
+
 =item B<-gen-subtarget>
 
 Generate subtarget enumerations.
@@ -97,6 +113,14 @@ Generate subtarget enumerations.
 
 Generate intrinsic information.
 
+=item B<-gen-tgt-intrinsic>
+
+Generate target intrinsic information.
+
+=item B<-gen-enhanced-disassembly-info>
+
+Generate enhanced disassembly info.
+
 =item B<-version>
 
 Show the version number of this program.
diff --git a/docs/CompilerWriterInfo.html b/docs/CompilerWriterInfo.html
index ed326b30eb6d..5fdb4fc6e37c 100644
--- a/docs/CompilerWriterInfo.html
+++ b/docs/CompilerWriterInfo.html
@@ -21,7 +21,6 @@
 <ol>
   <li><a href="#hw">Hardware</a>
   <ol>
-    <li><a href="#alpha">Alpha</a></li>
     <li><a href="#arm">ARM</a></li>
     <li><a href="#ia64">Itanium</a></li>
     <li><a href="#mips">MIPS</a></li>
@@ -49,17 +48,6 @@
 <div>
 
 <!-- ======================================================================= -->
-<h3><a name="alpha">Alpha</a></h3>
-
-<div>
-<ul>
-<li><a
-href="http://ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html">Alpha manuals</a> 
-</li>
-</ul>
-</div>
-
-<!-- ======================================================================= -->
 <h3><a name="arm">ARM</a></h3>
 
 <div>
@@ -272,7 +260,7 @@ processors.</li>
 
   <a href="http://misha.brukman.net">Misha Brukman</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
+  Last modified: $Date: 2011-10-28 00:56:32 +0200 (Fri, 28 Oct 2011) $
 </address>
 
 </body>
diff --git a/docs/DebuggingJITedCode.html b/docs/DebuggingJITedCode.html
index a6883adc531f..1946fdd9e08b 100644
--- a/docs/DebuggingJITedCode.html
+++ b/docs/DebuggingJITedCode.html
@@ -147,7 +147,7 @@ coordinate with GDB to get better debug information.
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="mailto:reid.kleckner@gmail.com">Reid Kleckner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2011-10-31 12:21:59 +0100 (Mon, 31 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html
index 7c78016693f7..264975e31985 100644
--- a/docs/DeveloperPolicy.html
+++ b/docs/DeveloperPolicy.html
@@ -43,7 +43,7 @@
    the distributed nature of LLVM's development.  By stating the policy in clear
    terms, we hope each developer can know ahead of time what to expect when
    making LLVM contributions.  This policy covers all llvm.org subprojects,
-   including Clang, LLDB, etc.</p>
+   including Clang, LLDB, libc++, etc.</p>
 <p>This policy is also designed to accomplish the following objectives:</p>
 
 <ol>
@@ -52,6 +52,9 @@
   <li>Make life as simple and easy for contributors as possible.</li>
 
   <li>Keep the top of Subversion trees as stable as possible.</li>
+
+  <li>Establish awareness of the project's <a href="#clp">copyright,
+      license, and patent policies</a> with contributors to the project.</li>
 </ol>
   
 <p>This policy is aimed at frequent contributors to LLVM. People interested in
@@ -212,6 +215,10 @@
   <li><b>Jakob Olesen</b>: Register allocators and TableGen.</li>
 
   <li><b>Duncan Sands</b>: dragonegg and llvm-gcc 4.2.</li>
+  
+  <li><b>Peter Collingbourne</b>: libclc.</li>
+  
+  <li><b>Tobias Grosser</b>: polly.</li>
 </ol>
   
 <p>Note that code ownership is completely different than reviewers: anyone can
@@ -495,18 +502,24 @@
 <!--=========================================================================-->
 
 <div>
+
+<div class="doc_notes">
+<p style="text-align:center;font-weight:bold">NOTE: This section deals with
+   legal matters but does not provide legal advice.  We are not lawyers &mdash; 
+   please seek legal counsel from an attorney.</p>
+</div>
+
+<div>
 <p>This section addresses the issues of copyright, license and patents for the
-   LLVM project.  The copyright holder for the code is held by the individual
+   LLVM project.  The copyright for the code is held by the individual
    contributors of the code and the terms of its license to LLVM users and 
    developers is the
    <a href="http://www.opensource.org/licenses/UoI-NCSA.php">University of 
-   Illinois/NCSA Open Source License</a>.</p>
+   Illinois/NCSA Open Source License</a> (with portions dual licensed under the
+   <a href="http://www.opensource.org/licenses/mit-license.php">MIT License</a>,
+   see below).  As contributor to the LLVM project, you agree to allow any 
+   contributions to the project to licensed under these terms.</p>
 
-<div class="doc_notes">
-<p style="text-align:center;font-weight:bold">NOTE: This section deals with
-   legal matters but does not provide legal advice.  We are not lawyers, please
-   seek legal counsel from an attorney.</p>
-</div>
 
 <!-- _______________________________________________________________________ -->
 <h3><a name="copyright">Copyright</a></h3>
@@ -535,7 +548,10 @@
 <h3><a name="license">License</a></h3>
 <div>
 <p>We intend to keep LLVM perpetually open source and to use a liberal open
-   source license. All of the code in LLVM is available under the
+   source license.  <b>As a contributor to the project, you agree that any 
+   contributions be licensed under the terms of the corresponding 
+   subproject.</b>
+   All of the code in LLVM is available under the
    <a href="http://www.opensource.org/licenses/UoI-NCSA.php">University of
    Illinois/NCSA Open Source License</a>, which boils down to this:</p>
 
@@ -556,7 +572,7 @@
    if further clarification is needed.</p>
    
 <p>In addition to the UIUC license, the runtime library components of LLVM
-   (<b>compiler_rt and libc++</b>) are also licensed under the <a
+   (<b>compiler_rt, libc++, and libclc</b>) are also licensed under the <a
    href="http://www.opensource.org/licenses/mit-license.php">MIT license</a>,
    which does not contain the binary redistribution clause.  As a user of these
    runtime libraries, it means that you can choose to use the code under either
@@ -570,16 +586,17 @@
    the LLVM core to libc++ without the copyright owner's permission.
 </p>
 
-<p>Note that the LLVM Project does distribute llvm-gcc, <b>which is GPL.</b>
+<p>Note that the LLVM Project does distribute llvm-gcc and dragonegg, <b>which  
+   are GPL.</b>
    This means that anything "linked" into llvm-gcc must itself be compatible
    with the GPL, and must be releasable under the terms of the GPL.  This
    implies that <b>any code linked into llvm-gcc and distributed to others may
    be subject to the viral aspects of the GPL</b> (for example, a proprietary
    code generator linked into llvm-gcc must be made available under the GPL).
    This is not a problem for code already distributed under a more liberal
-   license (like the UIUC license), and does not affect code generated by
-   llvm-gcc.  It may be a problem if you intend to base commercial development
-   on llvm-gcc without redistributing your source code.</p>
+   license (like the UIUC license), and GPL-containing subprojects are kept
+   in separate SVN repositories whose LICENSE.txt files specifically indicate
+   that they contain GPL code.</p>
   
 <p>We have no plans to change the license of LLVM.  If you have questions or
    comments about the license, please contact the
@@ -596,7 +613,8 @@
    arbitrary purposes (including commercial use).</p>
    
 <p>When contributing code, we expect contributors to notify us of any potential
-   for patent-related trouble with their changes.  If you or your employer own
+   for patent-related trouble with their changes (including from third parties).  
+   If you or your employer own
    the rights to a patent and would like to contribute code to LLVM that relies
    on it, we require that the copyright owner sign an agreement that allows any
    other user of LLVM to freely use your patent.  Please contact
@@ -606,6 +624,8 @@
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <hr>
 <address>
@@ -616,7 +636,7 @@
   Written by the 
   <a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-10-07 19:26:38 +0200 (Fri, 07 Oct 2011) $
+  Last modified: $Date: 2012-03-27 13:25:16 +0200 (Tue, 27 Mar 2012) $
 </address>
 </body>
 </html>
diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html
index 85ab796938e7..49e6b010030c 100644
--- a/docs/ExceptionHandling.html
+++ b/docs/ExceptionHandling.html
@@ -38,7 +38,6 @@
   	<li><a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a></li>
   	<li><a href="#llvm_eh_sjlj_lsda"><tt>llvm.eh.sjlj.lsda</tt></a></li>
   	<li><a href="#llvm_eh_sjlj_callsite"><tt>llvm.eh.sjlj.callsite</tt></a></li>
-  	<li><a href="#llvm_eh_sjlj_dispatchsetup"><tt>llvm.eh.sjlj.dispatchsetup</tt></a></li>
   </ol></li>
   <li><a href="#asm">Asm Table Formats</a>
   <ol>
@@ -50,7 +49,7 @@
 </tr></table>
 
 <div class="doc_author">
-  <p>Written by <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
+  <p>Written by the <a href="http://llvm.org/">LLVM Team</a></p>
 </div>
 
 
@@ -498,23 +497,6 @@
 
 </div>
 
-<!-- ======================================================================= -->
-<h4>
-  <a name="llvm_eh_sjlj_dispatchsetup">llvm.eh.sjlj.dispatchsetup</a>
-</h4>
-
-<div>
-
-<pre>
-  void @llvm.eh.sjlj.dispatchsetup(i32 %dispatch_value)
-</pre>
-
-<p>For SJLJ based exception handling, the <tt>llvm.eh.sjlj.dispatchsetup</tt>
-   intrinsic is used by targets to do any unwind edge setup they need. By
-   default, no action is taken.</p>
-
-</div>
-
 </div>
 
 <!-- ======================================================================= -->
@@ -573,9 +555,8 @@
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-09-27 22:16:57 +0200 (Tue, 27 Sep 2011) $
+  Last modified: $Date: 2012-03-27 13:25:16 +0200 (Tue, 27 Mar 2012) $
 </address>
 
 </body>
diff --git a/docs/ExtendingLLVM.html b/docs/ExtendingLLVM.html
index 15e698421066..a0cc4ead35d5 100644
--- a/docs/ExtendingLLVM.html
+++ b/docs/ExtendingLLVM.html
@@ -105,19 +105,6 @@ function and then be turned into an instruction if warranted.</p>
 support for it.  Generally you must do the following steps:</p>
 
 <dl>
-<dt>Add support to the C backend in <tt>lib/Target/CBackend/</tt></dt>
-
-<dd>Depending on the intrinsic, there are a few ways to implement this.  For
-    most intrinsics, it makes sense to add code to lower your intrinsic in
-    <tt>LowerIntrinsicCall</tt> in <tt>lib/CodeGen/IntrinsicLowering.cpp</tt>.
-    Second, if it makes sense to lower the intrinsic to an expanded sequence of
-    C code in all cases, just emit the expansion in <tt>visitCallInst</tt> in
-    <tt>Writer.cpp</tt>.  If the intrinsic has some way to express it with GCC
-    (or any other compiler) extensions, it can be conditionally supported based
-    on the compiler compiling the CBE output (see <tt>llvm.prefetch</tt> for an
-    example).  Third, if the intrinsic really has no way to be lowered, just
-    have the code generator emit code that prints an error message and calls
-    abort if executed.</dd>
 
 <dt>Add support to the .td file for the target(s) of your choice in 
    <tt>lib/Target/*/*.td</tt>.</dt>
@@ -385,7 +372,7 @@ void calcTypeName(const Type *Ty,
 
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2012-03-23 06:50:46 +0100 (Fri, 23 Mar 2012) $
 </address>
 
 </body>
diff --git a/docs/FAQ.html b/docs/FAQ.html
index 341f1c977a9d..78c02684b51d 100644
--- a/docs/FAQ.html
+++ b/docs/FAQ.html
@@ -140,6 +140,8 @@
 </h2>
 <!-- *********************************************************************** -->
 
+<div>
+
 <div class="question">
 <p>Why are the LLVM source code and the front-end distributed under different
    licenses?</p>
@@ -185,12 +187,16 @@
    GPL, as explained in the first question above.</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <h2>
   <a name="source">Source Code</a>
 </h2>
 <!-- *********************************************************************** -->
 
+<div>
+
 <div class="question">
 <p>In what language is LLVM written?</p>
 </div>
@@ -223,12 +229,16 @@ LLVM have been ported to a plethora of platforms.</p>
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <h2>
   <a name="build">Build Problems</a>
 </h2>
 <!-- *********************************************************************** -->
 
+<div>
+
 <div class="question">
 <p>When I run configure, it finds the wrong C compiler.</p>
 </div>
@@ -435,11 +445,15 @@ Stop.
 <p>We regret the inconvenience.</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <h2>
   <a name="felangs">Source Languages</a>
 </h2>
 
+<div>
+
 <div class="question">
 <p><a name="langs">What source languages are supported?</a></p>
 </div>
@@ -540,11 +554,15 @@ Stop.
    Instruction</a>.</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <h2>
   <a name="cfe">Using the GCC Front End</a>
 </h2>
 
+<div>
+
 <div class="question">
 <p>When I compile software that uses a configure script, the configure script
    thinks my system has all of the header files and libraries it is testing for.
@@ -697,11 +715,15 @@ Stop.
    order to have the result conform to the platform ABI.</p>
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 <h2>
   <a name="cfe_code">Questions about code generated by the GCC front-end</a>
 </h2>
 
+<div>
+
 <div class="question">
 <p><a name="iosinit">What is this <tt>llvm.global_ctors</tt> and
    <tt>_GLOBAL__I__tmp_webcompile...</tt> stuff that happens when I <tt>#include
@@ -907,6 +929,8 @@ F.i:
 
 </div>
 
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -917,7 +941,7 @@ F.i:
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-09-20 02:42:28 +0200 (Tue, 20 Sep 2011) $
+  Last modified: $Date: 2012-03-27 13:25:16 +0200 (Tue, 27 Mar 2012) $
 </address>
 
 </body>
diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html
index 10bc6632c59f..9463eaa64f25 100644
--- a/docs/GarbageCollection.html
+++ b/docs/GarbageCollection.html
@@ -429,7 +429,8 @@ programs that use different garbage collection algorithms (or none at all).</p>
 <p>The <tt>llvm.gcroot</tt> intrinsic is used to inform LLVM that a stack
 variable references an object on the heap and is to be tracked for garbage
 collection. The exact impact on generated code is specified by a <a
-href="#plugin">compiler plugin</a>.</p>
+href="#plugin">compiler plugin</a>. All calls to <tt>llvm.gcroot</tt> <b>must</b> reside
+ inside the first basic block.</p>
 
 <p>A compiler which uses mem2reg to raise imperative code using <tt>alloca</tt>
 into SSA form need only add a call to <tt>@llvm.gcroot</tt> for those variables
@@ -437,7 +438,9 @@ which a pointers into the GC heap.</p>
 
 <p>It is also important to mark intermediate values with <tt>llvm.gcroot</tt>.
 For example, consider <tt>h(f(), g())</tt>. Beware leaking the result of
-<tt>f()</tt> in the case that <tt>g()</tt> triggers a collection.</p>
+<tt>f()</tt> in the case that <tt>g()</tt> triggers a collection. Note, that
+stack variables must be initialized and marked with <tt>llvm.gcroot</tt> in
+function's prologue.</p>
 
 <p>The first argument <b>must</b> be a value referring to an alloca instruction
 or a bitcast of an alloca. The second contains a pointer to metadata that
@@ -1379,7 +1382,7 @@ Fergus Henderson. International Symposium on Memory Management 2002.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-08-12 08:17:17 +0200 (Fri, 12 Aug 2011) $
+  Last modified: $Date: 2012-03-03 05:32:33 +0100 (Sat, 03 Mar 2012) $
 </address>
 
 </body>
diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html
index f678e2723634..17a93f5245ba 100644
--- a/docs/GetElementPtr.html
+++ b/docs/GetElementPtr.html
@@ -594,10 +594,10 @@ idx3 = (char*) &amp;MyVar + 8
      because LLVM has no restrictions on mixing types in addressing, loads or
      stores.</p>
 
-  <p>It would be possible to add special annotations to the IR, probably using
-     metadata, to describe a different type system (such as the C type system),
-     and do type-based aliasing on top of that. This is a much bigger
-     undertaking though.</p>
+  <p>LLVM's type-based alias analysis pass uses metadata to describe a different
+     type system (such as the C type system), and performs type-based aliasing
+     on top of that.  Further details are in the
+     <a href="LangRef.html#tbaa">language reference</a>.</p>
 
 </div>
 
@@ -747,7 +747,7 @@ idx3 = (char*) &amp;MyVar + 8
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:54 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2011-10-31 14:04:26 +0100 (Mon, 31 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html
index e198e0277196..52baf90aa71a 100644
--- a/docs/GettingStarted.html
+++ b/docs/GettingStarted.html
@@ -29,7 +29,6 @@
       <li><a href="#unpack">Unpacking the LLVM Archives</a></li>
       <li><a href="#checkout">Checkout LLVM from Subversion</a></li>
       <li><a href="#git_mirror">LLVM GIT mirror</a></li>
-      <li><a href="#installcf">Install the GCC Front End</a></li>
       <li><a href="#config">Local LLVM Configuration</a></li>
       <li><a href="#compile">Compiling the LLVM Suite Source Code</a></li>
       <li><a href="#cross-compile">Cross-Compiling LLVM</a></li>
@@ -52,7 +51,7 @@
 
   <li><a href="#tutorial">An Example Using the LLVM Tool Chain</a>
       <ol>
-         <li><a href="#tutorial4">Example with llvm-gcc4</a></li>
+         <li><a href="#tutorial4">Example with Clang</a></li>
       </ol>
   <li><a href="#problems">Common Problems</a>
   <li><a href="#links">Links</a>
@@ -82,16 +81,15 @@ basic information.</p>
 
 <p>First, LLVM comes in three pieces. The first piece is the LLVM
 suite. This contains all of the tools, libraries, and header files
-needed to use the low level virtual machine.  It contains an
-assembler, disassembler, bitcode analyzer and bitcode optimizer.  It
-also contains basic regression tests that can be used to test the LLVM
-tools and the GCC front end.</p>
-
-<p>The second piece is the GCC front end.  This component provides a version of
-GCC that compiles C and C++ code into LLVM bitcode.  Currently, the GCC front
-end uses the GCC parser to convert code to LLVM.  Once
-compiled into LLVM bitcode, a program can be manipulated with the LLVM tools
-from the LLVM suite.</p>
+needed to use LLVM.  It contains an assembler, disassembler, bitcode
+analyzer and bitcode optimizer.  It also contains basic regression tests that
+can be used to test the LLVM tools and the Clang front end.</p>
+
+<p>The second piece is the <a href="http://clang.llvm.org/">Clang</a> front end.
+This component compiles C, C++, Objective C, and Objective C++ code into LLVM
+bitcode. Once compiled into LLVM bitcode, a program can be manipulated with the
+LLVM tools from the LLVM suite.
+</p>
 
 <p>
 There is a third, optional piece called Test Suite.  It is a suite of programs
@@ -109,83 +107,98 @@ and performance.
 
 <div>
 
+<p>The LLVM Getting Started documentation may be out of date.  So, the Clang 
+<a href="http://clang.llvm.org/get_started.html">Getting Started</a> page might 
+also be a good place to start.</p>
+
 <p>Here's the short story for getting up and running quickly with LLVM:</p>
 
 <ol>
   <li>Read the documentation.</li>
   <li>Read the documentation.</li>
   <li>Remember that you were warned twice about reading the documentation.</li>
-  <li>Install the llvm-gcc-4.2 front end if you intend to compile C or C++
-      (see <a href="#installcf">Install the GCC Front End</a> for details):
-    <ol>
-      <li><tt>cd <i>where-you-want-the-C-front-end-to-live</i></tt></li>
-      <li><tt>gunzip --stdout llvm-gcc-4.2-<i>version</i>-<i>platform</i>.tar.gz | tar -xvf -</tt></li>
-	  <li><tt><i>install-binutils-binary-from-MinGW</i></tt> (Windows only)</li>
-	  <li>Note: If the binary extension is "<tt>.bz</tt>" use <tt>bunzip2</tt> instead of <tt>gunzip</tt>.</li>
-	  <li>Note: On Windows, use <a href="http://www.7-zip.org/">7-Zip</a> or a similar archiving tool.</li>
-	  <li>Add <tt>llvm-gcc</tt>'s "<tt>bin</tt>" directory to your <tt>PATH</tt> environment variable.</li>
-    </ol></li>
 
-  <li>Get the LLVM Source Code
+  <li>Checkout LLVM:
   <ul>
-    <li>With the distributed files (or use <a href="#checkout">SVN</a>):
-    <ol>
-      <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
-      <li><tt>gunzip --stdout llvm-<i>version</i>.tar.gz | tar -xvf -</tt>
-    </ol></li>
-
-  </ul></li>
+    <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+    <li><tt>svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm</tt></li>
+  </ul>
+  </li>
 
-  <li><b>[Optional]</b> Get the Test Suite Source Code 
+  <li>Checkout Clang:
   <ul>
-    <li>With the distributed files (or use <a href="#checkout">SVN</a>):
-    <ol>
-      <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
-      <li><tt>cd llvm/projects</tt>
-      <li><tt>gunzip --stdout llvm-test-<i>version</i>.tar.gz | tar -xvf -</tt>
-      <li><tt>mv llvm-test-<i>version</i> test-suite</tt>
-    </ol></li>
+    <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+    <li><tt>cd llvm/tools</tt>
+    <li><tt>svn co http://llvm.org/svn/llvm-project/cfe/trunk clang</tt></li>
+  </ul>
+  </li>
 
-  </ul></li>
+  <li>Checkout Compiler-RT:
+  <ul>
+    <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+    <li><tt>cd llvm/projects</tt>
+    <li><tt>svn co http://llvm.org/svn/llvm-project/compiler-rt/trunk
+        compiler-rt</tt></li>
+  </ul>
+  </li>
 
+  <li>Get the Test Suite Source Code <b>[Optional]</b>
+  <ul>
+    <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+    <li><tt>cd llvm/projects</tt>
+    <li><tt>svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite</tt></li>
+  </ul>
+  </li>
 
-  <li>Configure the LLVM Build Environment
-  <ol>
+  <li>Configure and build LLVM and Clang:
+  <ul>
     <li><tt>cd <i>where-you-want-to-build-llvm</i></tt></li>
-    <li><tt><i>/path/to/llvm/</i>configure [options]</tt><br>
-    Some common options:
+    <li><tt>mkdir build</tt> (for building without polluting the source dir)</li>
+    <li><tt>cd build</tt></li>
+    <li><tt>../llvm/configure [options]</tt>
+    <br>Some common options:
 
       <ul>
-        <li><tt>--prefix=<i>directory</i></tt>
-        <p>Specify for <i>directory</i> the full pathname of where you
+        <li><tt>--prefix=<i>directory</i></tt> -
+        Specify for <i>directory</i> the full pathname of where you
         want the LLVM tools and libraries to be installed (default
-        <tt>/usr/local</tt>).</p></li>
-        <li><tt>--with-llvmgccdir=<i>directory</i></tt>
-        <p>Optionally, specify for <i>directory</i> the full pathname of the 
-        C/C++ front end installation to use with this LLVM configuration. If
-        not specified, the PATH will be searched.  This is only needed if you
-        want to run test-suite or do some special kinds of LLVM builds.</p></li>
-        <li><tt>--enable-spec2000=<i>directory</i></tt>
-            <p>Enable the SPEC2000 benchmarks for testing.  The SPEC2000
-            benchmarks should be available in
-            <tt><i>directory</i></tt>.</p></li>
+        <tt>/usr/local</tt>).</li>
       </ul>
-  </ol></li>
 
-  <li>Build the LLVM Suite:
-  <ol>
-      <li><tt>gmake -k |&amp; tee gnumake.out
-      &nbsp;&nbsp;&nbsp;# this is csh or tcsh syntax</tt></li>
-      <li>If you get an "internal compiler error (ICE)" or test failures, see 
-          <a href="#brokengcc">below</a>.</li>
-  </ol>
+      <ul>
+        <li><tt>--enable-optimized</tt> -
+        Compile with optimizations enabled (default is NO).</li>
+      </ul>
+
+      <ul>
+        <li><tt>--enable-assertions</tt> -
+        Compile with assertion checks enabled (default is YES).</li>
+      </ul>
+   </li>
+    <li><tt>make [-j]</tt> - The -j specifies the number of jobs (commands) to 
+    run simultaneously.  This builds both LLVM and Clang for Debug+Asserts mode.
+    The --enabled-optimized configure option is used to specify a Release build.</li>
+    <li><tt>make check-all</tt> -
+    This run the regression tests to ensure everything is in working order.</li>
+    <li><tt>make update</tt> -
+    This command is used to update all the svn repositories at once, rather then
+    having to <tt>cd</tt> into the individual repositories and running
+    <tt>svn update</tt>.</li>
+    <li>It is also possible to use CMake instead of the makefiles. With CMake
+    it is also possible to generate project files for several IDEs: Eclipse
+    CDT4, CodeBlocks, Qt-Creator (use the CodeBlocks generator), KDevelop3.</li>
+    <li>If you get an "internal compiler error (ICE)" or test failures, see 
+        <a href="#brokengcc">below</a>.</li>
+
+  </ul>
+  </li>
 
 </ol>
 
 <p>Consult the <a href="#starting">Getting Started with LLVM</a> section for
 detailed information on configuring and compiling LLVM.  See <a
 href="#environment">Setting Up Your Environment</a> for tips that simplify
-working with the GCC front end and LLVM tools.  Go to <a href="#layout">Program
+working with the Clang front end and LLVM tools.  Go to <a href="#layout">Program
 Layout</a> to learn about the layout of the source code tree.</p>
 
 </div>
@@ -283,7 +296,7 @@ software you will need.</p>
 <tr>
   <td>Windows</td>
   <td>x86<sup><a href="#pf_1">1</a></sup></td>
-  <td>Visual Studio 2005 SP1 or higher<sup><a href="#pf_4">4</a>,<a href="#pf_5">5</a></sup></td>
+  <td>Visual Studio 2008 or higher<sup><a href="#pf_4">4</a>,<a href="#pf_5">5</a></sup></td>
 <tr>
   <td>AIX<sup><a href="#pf_3">3</a>,<a href="#pf_4">4</a></sup></td>
   <td>PowerPC</td>
@@ -361,10 +374,6 @@ able to assemble, disassemble, analyze, and optimize LLVM bitcode.  Code
 generation should work as well, although the generated native code may not work
 on your platform.</p>
 
-<p>The GCC front end is not very portable at the moment.  If you want to get it
-to work on another platform, you can download a copy of the source and <a
-href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
-
 </div>
 
 <!-- ======================================================================= -->
@@ -430,7 +439,7 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
     <tr>
       <td><a href="http://www.perl.com/download.csp">perl</a></td>
       <td>&ge;5.6.0</td>
-      <td>Nightly tester, utilities</td>
+      <td>Utilities</td>
     </tr>
 
     <tr>
@@ -441,13 +450,13 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
 
     <tr>
       <td><a href="http://www.gnu.org/software/autoconf/">GNU Autoconf</a></td>
-      <td>2.61</td>
+      <td>2.60</td>
       <td>Configuration script builder<sup><a href="#sf4">4</a></sup></td>
     </tr>
 
     <tr>
       <td><a href="http://www.gnu.org/software/automake/">GNU Automake</a></td>
-      <td>1.10</td>
+      <td>1.9.6</td>
       <td>aclocal macro generator<sup><a href="#sf4">4</a></sup></td>
     </tr>
 
@@ -471,8 +480,8 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
     <li><a name="sf3">Only needed if you want to run the automated test 
       suite in the <tt>llvm/test</tt> directory.</a></li>
     <li><a name="sf4">If you want to make changes to the configure scripts, 
-      you will need GNU autoconf (2.61), and consequently, GNU M4 (version 1.4 
-      or higher). You will also need automake (1.10). We only use aclocal 
+      you will need GNU autoconf (2.60), and consequently, GNU M4 (version 1.4 
+      or higher). You will also need automake (1.9.6). We only use aclocal 
       from that package.</a></li>
   </ol>
   </div>
@@ -516,9 +525,8 @@ href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
 
 <p>LLVM is very demanding of the host C++ compiler, and as such tends to expose
 bugs in the compiler.  In particular, several versions of GCC crash when trying
-to compile LLVM.  We routinely use GCC 3.3.3, 3.4.0, and Apple 4.0.1 
-successfully with them (however, see important notes below).  Other versions 
-of GCC will probably work as well.  GCC versions listed
+to compile LLVM.  We routinely use GCC 4.2 (and higher) or Clang.  
+Other versions of GCC will probably work as well.  GCC versions listed
 here are known to not work.  If you are using one of these versions, please try
 to upgrade your GCC to something more recent.  If you run into a problem with a
 version of GCC not listed here, please <a href="mailto:llvmdev@cs.uiuc.edu">let
@@ -538,8 +546,7 @@ href="http://gcc.gnu.org/PR13392">serious bug</a> which causes it to crash in
 the "<tt>convert_from_eh_region_ranges_1</tt>" GCC function.</p>
 
 <p><b>Cygwin GCC 3.3.3</b>: The version of GCC 3.3.3 commonly shipped with 
-   Cygwin does not work.  Please <a href="GCCFEBuildInstrs.html#cygwin">upgrade 
-   to a newer version</a> if possible.</p>
+   Cygwin does not work.</p>
 <p><b>SuSE GCC 3.3.3</b>: The version of GCC 3.3.3 shipped with SuSE 9.1 (and 
    possibly others) does not compile LLVM correctly (it appears that exception 
    handling is broken in some cases).  Please download the FSF 3.3.3 or upgrade
@@ -651,12 +658,6 @@ All these paths are absolute:</p>
     can be the same as SRC_ROOT).
     <br><br>
 
-    <dt>LLVMGCCDIR
-    <dd>
-    This is where the LLVM GCC Front End is installed.
-    <p>
-    For the pre-built GCC front end binaries, the LLVMGCCDIR is
-    <tt>llvm-gcc/<i>platform</i>/llvm-gcc</tt>.
 </dl>
 
 </div>
@@ -747,7 +748,6 @@ revision), you can checkout it from the '<tt>tags</tt>' directory (instead of
 subdirectories of the '<tt>tags</tt>' directory:</p>
 
 <ul>
-<li>Release 3.0: <b>RELEASE_30/final</b></li>
 <li>Release 2.9: <b>RELEASE_29/final</b></li>
 <li>Release 2.8: <b>RELEASE_28</b></li>
 <li>Release 2.7: <b>RELEASE_27</b></li>
@@ -784,10 +784,6 @@ you get it from the Subversion repository:</p>
 configured by the LLVM configure script as well as automatically updated when
 you run <tt>svn update</tt>.</p>
 
-<p>If you would like to get the GCC front end source code, you can also get it 
-and build it yourself.  Please follow <a href="GCCFEBuildInstrs.html">these 
-instructions</a> to successfully get and build the LLVM GCC front-end.</p>
-
 </div>
 
 <!-- ======================================================================= -->
@@ -891,6 +887,8 @@ Then, your .git/config should have [imap] sections.
         folder = "[Gmail]/Drafts"
 ; example for Japanese, "Modified UTF-7" encoded.
         folder = "[Gmail]/&amp;Tgtm+DBN-"
+; example for Traditional Chinese
+        folder = "[Gmail]/&amp;g0l6Pw-"
 </pre>
 
 </div>
@@ -951,76 +949,6 @@ git svn rebase -l
 
 <!-- ======================================================================= -->
 <h3>
-  <a name="installcf">Install the GCC Front End</a>
-</h3>
-
-<div>
-
-<p>Before configuring and compiling the LLVM suite (or if you want to use just the LLVM
-GCC front end) you can optionally extract the front end from the binary distribution.
-It is used for running the LLVM test-suite and for compiling C/C++ programs.  Note that
-you can optionally <a href="GCCFEBuildInstrs.html">build llvm-gcc yourself</a> after building the
-main LLVM repository.</p>
-
-<p>To install the GCC front end, do the following (on Windows, use an archival tool
-like <a href="http://www.7-zip.org/">7-zip</a> that understands gzipped tars):</p>
-
-<ol>
-  <li><tt>cd <i>where-you-want-the-front-end-to-live</i></tt></li>
-  <li><tt>gunzip --stdout llvm-gcc-4.2-<i>version</i>-<i>platform</i>.tar.gz | tar -xvf
-      -</tt></li>
-</ol>
-
-<p>Once the binary is uncompressed, if you're using a *nix-based system, add a symlink for
-<tt>llvm-gcc</tt> and <tt>llvm-g++</tt> to some directory in your path.  If you're using a
-Windows-based system, add the <tt>bin</tt> subdirectory of your front end installation directory
-to your <tt>PATH</tt> environment variable.  For example, if you uncompressed the binary to
-<tt>c:\llvm-gcc</tt>, add <tt>c:\llvm-gcc\bin</tt> to your <tt>PATH</tt>.</p>
-
-<p>If you now want to build LLVM from source, when you configure LLVM, it will 
-automatically detect <tt>llvm-gcc</tt>'s presence (if it is in your path) enabling its
-use in test-suite.  Note that you can always build or install <tt>llvm-gcc</tt> at any
-point after building the main LLVM repository: just reconfigure llvm and 
-test-suite will pick it up.
-</p>
-
-<p>As a convenience for Windows users, the front end binaries for MinGW/x86 include
-versions of the required w32api and mingw-runtime binaries.  The last remaining step for
-Windows users is to simply uncompress the binary binutils package from
-<a href="http://mingw.org/">MinGW</a> into your front end installation directory.  While the
-front end installation steps are not quite the same as a typical manual MinGW installation,
-they should be similar enough to those who have previously installed MinGW on Windows systems.</p>
-
-<p>To install binutils on Windows:</p>
-
-<ol>
-  <li><tt><i>download GNU Binutils from <a href="http://sourceforge.net/projects/mingw/files/">MinGW Downloads</a></i></tt></li>
-  <li><tt>cd <i>where-you-uncompressed-the-front-end</i></tt></li>
-  <li><tt><i>uncompress archived binutils directories (not the tar file) into the current directory</i></tt></li>
-</ol>
-
-<p>The binary versions of the LLVM GCC front end may not suit all of your needs.  For
-example, the binary distribution may include an old version of a system header
-file, not "fix" a header file that needs to be fixed for GCC, or it may be linked with
-libraries not available on your system.  In cases like these, you may want to try
-<a href="GCCFEBuildInstrs.html">building the GCC front end from source</a>.  Thankfully,
-this is much easier now than it was in the past.</p>
-
-<p>We also do not currently support updating of the GCC front end by manually overlaying
-newer versions of the w32api and mingw-runtime binary packages that may become available
-from MinGW.  At this time, it's best to think of the MinGW LLVM GCC front end binary as
-a self-contained convenience package that requires Windows users to simply download and
-uncompress the GNU Binutils binary package from the MinGW project.</p>
-
-<p>Regardless of your platform, if you discover that installing the LLVM GCC front end
-binaries is not as easy as previously described, or you would like to suggest improvements,
-please let us know how you would like to see things improved by dropping us a note on our
-<a href="http://llvm.org/docs/#maillist">mailing list</a>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
   <a name="config">Local LLVM Configuration</a>
 </h3>
 
@@ -1057,29 +985,6 @@ script to configure the build system:</p>
 <p>The following options can be used to set or enable LLVM specific options:</p>
 
 <dl>
-  <dt><i>--with-llvmgccdir</i></dt>
-  <dd>Path to the LLVM C/C++ FrontEnd to be used with this LLVM configuration. 
-  The value of this option should specify the full pathname of the C/C++ Front
-  End to be used. If this option is not provided, the PATH will be searched for
-  a program named <i>llvm-gcc</i> and the C/C++ FrontEnd install directory will
-  be inferred from the path found. If the option is not given, and no llvm-gcc
-  can be found in the path then a warning will be produced by 
-  <tt>configure</tt> indicating this situation. LLVM may still be built with 
-  the <tt>tools-only</tt> target but attempting to build the runtime libraries
-  will fail as these libraries require llvm-gcc and llvm-g++. See 
-  <a href="#installcf">Install the GCC Front End</a> for details on installing
-  the C/C++ Front End. See
-  <a href="GCCFEBuildInstrs.html">Bootstrapping the LLVM C/C++ Front-End</a>
-  for details on building the C/C++ Front End.</dd>
-  <dt><i>--with-tclinclude</i></dt>
-  <dd>Path to the tcl include directory under which <tt>tclsh</tt> can be
-  found. Use this if you have multiple tcl installations on your machine and you
-  want to use a specific one (8.x) for LLVM. LLVM only uses tcl for running the
-  dejagnu based test suite in <tt>llvm/test</tt>. If you don't specify this
-  option, the LLVM configure script will search for the tcl 8.4 and 8.3
-  releases.
-  <br><br>
-  </dd>
   <dt><i>--enable-optimized</i></dt>
   <dd>
     Enables optimized compilation (debugging symbols are removed
@@ -1110,7 +1015,7 @@ script to configure the build system:</p>
   selected as the target of the build host. You can also specify a comma 
   separated list of target names that you want available in llc. The target 
   names use all lower case. The current set of targets is: <br>
-  <tt>alpha, ia64, powerpc, skeleton, sparc, x86</tt>.
+  <tt>arm, cbe, cpp, hexagon, mblaze, mips, mipsel, msp430, powerpc, ptx, sparc, spu, x86, x86_64, xcore</tt>.
   <br><br></dd>
   <dt><i>--enable-doxygen</i></dt>
   <dd>Look for the doxygen program and enable construction of doxygen based
@@ -1483,7 +1388,7 @@ different <a href="#tools">tools</a>.</p>
   <dd> This directory contains files that describe various target architectures
   for code generation.  For example, the <tt>llvm/lib/Target/X86</tt> 
   directory holds the X86 machine description while
-  <tt>llvm/lib/Target/CBackend</tt> implements the LLVM-to-C converter.</dd>
+  <tt>llvm/lib/Target/ARM</tt> implements the ARM backend.</dd>
     
   <dt><tt><b>llvm/lib/CodeGen/</b></tt></dt>
   <dd> This directory contains the major parts of the code generator: Instruction 
@@ -1530,7 +1435,7 @@ different <a href="#tools">tools</a>.</p>
 <div>
 
 <p>This directory contains libraries which are compiled into LLVM bitcode and
-used when linking programs with the GCC front end.  Most of these libraries are
+used when linking programs with the Clang front end.  Most of these libraries are
 skeleton versions of real libraries; for example, libc is a stripped down
 version of glibc.</p>
 
@@ -1692,12 +1597,6 @@ are code generators for parts of LLVM infrastructure.</p>
   directory, switch to directory <tt>llvm/tools/llc</tt> and build it,
   causing a re-linking of LLC.<br><br>
 
-  <dt><tt><b>NewNightlyTest.pl</b></tt> and
-  <tt><b>NightlyTestTemplate.html</b></tt> <dd>These files are used in a
-  cron script to generate nightly status reports of the functionality of
-  tools, and the results can be seen by following the appropriate link on
-  the <a href="http://llvm.org/">LLVM homepage</a>.<br><br>
-
   <dt><tt><b>TableGen/</b></tt> <dd>The <tt>TableGen</tt> directory contains
   the tool used to generate register descriptions, instruction set
   descriptions, and even assemblers from common TableGen description
@@ -1722,20 +1621,11 @@ are code generators for parts of LLVM infrastructure.</p>
 <!-- *********************************************************************** -->
 
 <div>
-<p>This section gives an example of using LLVM.  llvm-gcc3 is now obsolete,
-so we only include instructions for llvm-gcc4.
-</p>
-
-<p><b>Note:</b> The <i>gcc4</i> frontend's invocation is <b><i>considerably different</i></b>
-from the previous <i>gcc3</i> frontend. In particular, the <i>gcc4</i> frontend <b><i>does not</i></b>
-create bitcode by default: <i>gcc4</i> produces native code. As the example below illustrates,
-the '--emit-llvm' flag is needed to produce LLVM bitcode output. For <i>makefiles</i> and
-<i>configure</i> scripts, the CFLAGS variable needs '--emit-llvm' to produce bitcode
-output.</p>
+<p>This section gives an example of using LLVM with the Clang front end.</p>
 
 <!-- ======================================================================= -->
 <h3>
-  <a name="tutorial4">Example with llvm-gcc4</a>
+  <a name="tutorial4">Example with clang</a>
 </h3>
 
 <div>
@@ -1755,24 +1645,21 @@ int main() {
 
   <li><p>Next, compile the C file into a native executable:</p>
 
-      <div class="doc_code"><pre>% llvm-gcc hello.c -o hello</pre></div>
+      <div class="doc_code"><pre>% clang hello.c -o hello</pre></div>
 
-      <p>Note that llvm-gcc works just like GCC by default.  The standard -S and
+      <p>Note that clang works just like GCC by default.  The standard -S and
         -c arguments work as usual (producing a native .s or .o file,
         respectively).</p></li>
 
   <li><p>Next, compile the C file into a LLVM bitcode file:</p>
 
       <div class="doc_code">
-      <pre>% llvm-gcc -O3 -emit-llvm hello.c -c -o hello.bc</pre></div>
+      <pre>% clang -O3 -emit-llvm hello.c -c -o hello.bc</pre></div>
 
       <p>The -emit-llvm option can be used with the -S or -c options to emit an
          LLVM ".ll" or ".bc" file (respectively) for the code.  This allows you
          to use the <a href="CommandGuide/index.html">standard LLVM tools</a> on
-         the bitcode file.</p>
-
-      <p>Unlike llvm-gcc3, llvm-gcc4 correctly responds to -O[0123] arguments.
-         </p></li>
+         the bitcode file.</p></li>
 
   <li><p>Run the program in both forms. To run the program, use:</p>
       
@@ -1811,7 +1698,7 @@ int main() {
 
       <div class="doc_code"><pre>% ./hello.native</pre></div>
 
-      <p>Note that using llvm-gcc to compile directly to native code (i.e. when
+      <p>Note that using clang to compile directly to native code (i.e. when
          the -emit-llvm option is not present) does steps 6/7/8 for you.</p>
         </li>
 
@@ -1870,7 +1757,7 @@ out:</p>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.x10sys.com/rspencer/">Reid Spencer</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-10-17 08:31:32 +0200 (Mon, 17 Oct 2011) $
+  Last modified: $Date: 2012-03-27 13:25:16 +0200 (Tue, 27 Mar 2012) $
 </address>
 </body>
 </html>
diff --git a/docs/GettingStartedVS.html b/docs/GettingStartedVS.html
index 6a604333c8f1..beadd0bcd943 100644
--- a/docs/GettingStartedVS.html
+++ b/docs/GettingStartedVS.html
@@ -44,7 +44,7 @@
 
   <p>There are many different projects that compose LLVM. The first is the LLVM
   suite. This contains all of the tools, libraries, and header files needed to
-  use the low level virtual machine. It contains an assembler, disassembler,
+  use LLVM. It contains an assembler, disassembler,
   bitcode analyzer and bitcode optimizer. It also contains a test suite that can
   be used to test the LLVM tools.</p>
 
@@ -88,8 +88,8 @@
 
 <div>
 
-  <p>Any system that can adequately run Visual Studio .NET 2005 SP1 is fine.
-  The LLVM source tree and object files, libraries and executables will consume
+  <p>Any system that can adequately run Visual Studio 2008 is fine. The LLVM
+  source tree and object files, libraries and executables will consume
   approximately 3GB.</p>
 
 </div>
@@ -98,10 +98,9 @@
 <h3><a name="software"><b>Software</b></a></h3>
 <div>
 
-  <p>You will need Visual Studio .NET 2005 SP1 or higher.  The VS2005 SP1
-  beta and the normal VS2005 still have bugs that are not completely
-  compatible.  Earlier versions of Visual Studio do not support the C++ standard
-  well enough and will not work.</p>
+  <p>You will need Visual Studio 2008 or higher.  Earlier versions of Visual
+  Studio have bugs, are not completely compatible, or do not support the C++
+  standard well enough.</p>
 
   <p>You will also need the <a href="http://www.cmake.org/">CMake</a> build
   system since it generates the project files you will use to build with.</p>
@@ -363,7 +362,7 @@ out:</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
+  Last modified: $Date: 2012-01-25 23:00:23 +0100 (Wed, 25 Jan 2012) $
 </address>
 </body>
 </html>
diff --git a/docs/GoldPlugin.html b/docs/GoldPlugin.html
index 375dd3c8ca78..2c08bd031b7f 100644
--- a/docs/GoldPlugin.html
+++ b/docs/GoldPlugin.html
@@ -89,11 +89,11 @@ placed.
   <tt>-emit-llvm</tt> or <tt>-flto</tt>, or the <tt>-O4</tt> flag which is
   synonymous with <tt>-O3 -flto</tt>.</p>
 
-  <p><tt>Clang</tt> has a <tt>-use-gold-plugin</tt> option which looks for the
-  gold plugin in the same directories as it looks for <tt>cc1</tt> and passes
-  the <tt>-plugin</tt> option to <tt>ld</tt>. It will not look for an alternate
-  linker, which is why you need gold to be the installed system linker in your
-  path.</p>
+  <p>Any of these flags will also cause <tt>clang</tt> to look for the
+  gold plugin in the <tt>lib</tt> directory under its prefix and pass the
+  <tt>-plugin</tt> option to <tt>ld</tt>. It will not look for an alternate
+  linker, which is why you need gold to be the installed system linker in
+  your path.</p>
 
   <p>If you want <tt>ar</tt> and <tt>nm</tt> to work seamlessly as well, install
   <tt>LLVMgold.so</tt> to <tt>/usr/lib/bfd-plugins</tt>. If you built your
@@ -141,10 +141,10 @@ void foo4(void) {
 }
 
 --- command lines ---
-$ clang -flto a.c -c -o a.o                 # &lt;-- a.o is LLVM bitcode file
-$ ar q a.a a.o                              # &lt;-- a.a is an archive with LLVM bitcode
-$ clang b.c -c -o b.o                       # &lt;-- b.o is native object file
-$ clang -use-gold-plugin a.a b.o -o main    # &lt;-- link with LLVMgold plugin
+$ clang -flto a.c -c -o a.o      # &lt;-- a.o is LLVM bitcode file
+$ ar q a.a a.o                   # &lt;-- a.a is an archive with LLVM bitcode
+$ clang b.c -c -o b.o            # &lt;-- b.o is native object file
+$ clang -flto a.a b.o -o main    # &lt;-- link with LLVMgold plugin
 </pre>
 
   <p>Gold informs the plugin that foo3 is never referenced outside the IR,
@@ -171,13 +171,12 @@ $ clang -use-gold-plugin a.a b.o -o main    # &lt;-- link with LLVMgold plugin
     <li>Follow the instructions <a href="#build">on how to build LLVMgold.so</a>.</li>
     <li>Install the newly built binutils to <tt>$PREFIX</tt></li>
     <li>Copy <tt>Release/lib/LLVMgold.so</tt> to
-        <tt>$PREFIX/libexec/gcc/x86_64-unknown-linux-gnu/4.2.1/</tt> and
         <tt>$PREFIX/lib/bfd-plugins/</tt></li>
     <li>Set environment variables (<tt>$PREFIX</tt> is where you installed clang and
         binutils):
 <pre class="doc_code">
-export CC="$PREFIX/bin/clang -use-gold-plugin"
-export CXX="$PREFIX/bin/clang++ -use-gold-plugin"
+export CC="$PREFIX/bin/clang -flto"
+export CXX="$PREFIX/bin/clang++ -flto"
 export AR="$PREFIX/bin/ar"
 export NM="$PREFIX/bin/nm"
 export RANLIB=/bin/true #ranlib is not needed, and doesn't support .bc files in .a
@@ -187,8 +186,8 @@ export CFLAGS="-O4"
      <li>Or you can just set your path:
 <pre class="doc_code">
 export PATH="$PREFIX/bin:$PATH"
-export CC="clang -use-gold-plugin"
-export CXX="clang++ -use-gold-plugin"
+export CC="clang -flto"
+export CXX="clang++ -flto"
 export RANLIB=/bin/true
 export CFLAGS="-O4"
 </pre></li>
diff --git a/docs/HowToAddABuilder.html b/docs/HowToAddABuilder.html
new file mode 100644
index 000000000000..0de2dacebe2b
--- /dev/null
+++ b/docs/HowToAddABuilder.html
@@ -0,0 +1,142 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>
+    How To Add Your Build Configuration To LLVM Buildbot Infrastructure
+  </title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<h1>How To Add Your Build Configuration To LLVM Buildbot Infrastructure</h1>
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#steps">Steps To Add Builder To LLVM Buildbot</a></li>
+</ol>
+<div class="doc_author">
+  <p>Written by <a href="mailto:gkistanova@gmail.com">Galina Kistanova</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<h2><a name="introduction">Introduction</a></h2>
+<!-- *********************************************************************** -->
+
+<div>
+
+<p>This document contains information about adding a build configuration and
+   buildslave to private slave builder to LLVM Buildbot Infrastructure
+   <a href="http://lab.llvm.org:8011">http://lab.llvm.org:8011</a></p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<h2><a name="steps">Steps To Add Builder To LLVM Buildbot</a></h2>
+<!-- *********************************************************************** -->
+
+<div>
+
+<p>Volunteers can provide their build machines to work as build slaves to
+   public LLVM Buildbot.</p>
+
+<p>Here are the steps you can follow to do so:</p>
+
+<ol>
+  <li><p>Check the existing build configurations to make sure the one you are
+      interested in is not covered yet or gets built on your computer much
+      faster than on the existing one. We prefer faster builds so developers
+      will get feedback sooner after changes get committed.</p></li>
+
+  <li><p>The computer you will be registering with the LLVM buildbot
+      infrastructure should have all dependencies installed and you can
+      actually build your configuration successfully. Please check what degree
+      of parallelism (-j param) would give the fastest build.
+      You can build multiple configurations on one computer.</p></li>
+
+  <li><p>Install buildslave (currently we are using buildbot version 0.8.5).
+      Depending on the platform, buildslave could be available to download and
+      install with your packet manager, or you can download it directly from
+      <a href="http://trac.buildbot.net">http://trac.buildbot.net</a> and
+      install it manually.</p></li>
+
+  <li><p>Create a designated user account, your buildslave will be running
+      under, and set appropriate permissions.</p></li>
+
+  <li><p>Choose the buildslave root directory (all builds will be placed under
+      it), buildslave access name and password the build master will be using
+      to authenticate your buildslave.</p></li>
+
+  <li><p>Create a buildslave in context of that buildslave account.
+      Point it to the <b>lab.llvm.org</b> port <b>9990</b> (see
+      <a href="http://buildbot.net/buildbot/docs/current/full.html#creating-a-slave">
+      Buildbot documentation, Creating a slave</a>
+      for more details) by running the following command:</p>
+
+<div class="doc_code">
+<pre>
+$ buildslave create-slave <i>buildslave-root-directory</i> \
+             lab.llvm.org:9990 \
+             <i>buildslave-access-name buildslave-access-password</i>
+</pre>
+</div></li>
+
+  <li><p>Fill the buildslave description and admin name/e-mail.
+      Here is an example of the buildslave description:</p>
+
+<div class="doc_code">
+<pre>
+Windows 7 x64
+Core i7 (2.66GHz), 16GB of RAM
+
+g++.exe (TDM-1 mingw32) 4.4.0
+GNU Binutils 2.19.1
+cmake version 2.8.4
+Microsoft(R) 32-bit C/C++ Optimizing Compiler Version 16.00.40219.01 for 80x86
+</pre>
+</div></li>
+
+  <li><p>Make sure you can actually start the buildslave successfully. Then set
+      up your buildslave to start automatically at the start up time.
+      See the buildbot documentation for help.
+      You may want to restart your computer to see if it works.</p></li>
+
+  <li><p>Send a patch which adds your build slave and your builder to zorg.</p>
+      <ul>
+          <li>slaves are added to
+              <tt>buildbot/osuosl/master/config/slaves.py</tt></li>
+          <li>builders are added to
+              <tt>buildbot/osuosl/master/config/builders.py</tt></li>
+      </ul></li>
+
+  <li><p>Send the buildslave access name and the access password directly
+      to <a href="mailto:gkistanova@gmail.com">Galina Kistanova</a>, and wait
+      till she will let you know that your changes are applied and buildmaster
+      is reconfigured.</p>
+
+  <li><p>Check the status of your buildslave on the
+      <a href="http://lab.llvm.org:8011/waterfall">Waterfall Display</a>
+      to make sure it is connected, and
+      <a href="http://lab.llvm.org:8011/buildslaves/your-buildslave-name">
+      http://lab.llvm.org:8011/buildslaves/&lt;your-buildslave-name&gt;</a>
+      to see if administrator contact and slave information are correct.</p>
+      </li>
+
+  <li><p>Wait for the first build to succeed and enjoy.</p></li>
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
+  <br>
+  Last modified: $Date: 2011-10-31 12:50:0 -0700 (Mon, 31 Oct 2011) $
+</address>
+</body>
+</html>
diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html
index 8588f3fa7c00..30c4f0cb5df6 100644
--- a/docs/HowToReleaseLLVM.html
+++ b/docs/HowToReleaseLLVM.html
@@ -575,7 +575,7 @@ $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2011-10-31 12:21:59 +0100 (Mon, 31 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html
index a6e5a70ac769..0071ec665487 100644
--- a/docs/HowToSubmitABug.html
+++ b/docs/HowToSubmitABug.html
@@ -341,7 +341,7 @@ the following:</p>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2011-10-31 12:21:59 +0100 (Mon, 31 Oct 2011) $
 </address>
 
 </body>
diff --git a/docs/LLVMBuild.html b/docs/LLVMBuild.html
new file mode 100644
index 000000000000..cce607d611fc
--- /dev/null
+++ b/docs/LLVMBuild.html
@@ -0,0 +1,363 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>LLVMBuild Documentation</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<h1>LLVMBuild Guide</h1>
+
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#projectorg">Project Organization</a></li>
+  <li><a href="#buildintegration">Build Integration</a></li>
+  <li><a href="#componentoverview">Component Overview</a></li>
+  <li><a href="#formatreference">Format Reference</a></li>
+</ol>
+
+<!-- *********************************************************************** -->
+<h2><a name="introduction">Introduction</a></h2>
+<!-- *********************************************************************** -->
+
+<div>
+  <p>This document describes the <tt>LLVMBuild</tt> organization and files which
+  we use to describe parts of the LLVM ecosystem. For description of specific
+  LLVMBuild related tools, please see the command guide.</p>
+
+  <p>LLVM is designed to be a modular set of libraries which can be flexibly
+  mixed together in order to build a variety of tools, like compilers, JITs,
+  custom code generators, optimization passes, interpreters, and so on. Related
+  projects in the LLVM system like Clang and LLDB also tend to follow this
+  philosophy.</p>
+
+  <p>In order to support this usage style, LLVM has a fairly strict structure as
+  to how the source code and various components are organized. The
+  <tt>LLVMBuild.txt</tt> files are the explicit specification of that structure,
+  and are used by the build systems and other tools in order to develop the LLVM
+  project.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<h2><a name="projectorg">Project Organization</a></h2>
+<!-- *********************************************************************** -->
+
+<!-- FIXME: We should probably have an explicit top level project object. Good
+place to hang project level data, name, etc. Also useful for serving as the
+$ROOT of project trees for things which can be checked out separately. -->
+
+<div>
+  <p>The source code for LLVM projects using the LLVMBuild system (LLVM, Clang,
+  and LLDB) is organized into <em>components</em>, which define the separate
+  pieces of functionality that make up the project. These projects may consist
+  of many libraries, associated tools, build tools, or other utility tools (for
+  example, testing tools).</p>
+
+  <p>For the most part, the project contents are organized around defining one
+  main component per each subdirectory. Each such directory contains
+  an <tt>LLVMBuild.txt</tt> which contains the component definitions.</p>
+
+  <p>The component descriptions for the project as a whole are automatically
+  gathered by the LLVMBuild tools. The tools automatically traverse the source
+  directory structure to find all of the component description files. NOTE: For
+  performance/sanity reasons, we only traverse into subdirectories when the
+  parent itself contains an <tt>LLVMBuild.txt</tt> description file.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<h2><a name="buildintegration">Build Integration</a></h2>
+<!-- *********************************************************************** -->
+
+<div>
+  <p>The LLVMBuild files themselves are just a declarative way to describe the
+  project structure. The actual building of the LLVM project is handled by
+  another build system (currently we support
+  both <a href="MakefileGuide.html">Makefiles</a>
+  and <a href="CMake.html">CMake</a>.</p>
+
+  <p>The build system implementation will load the relevant contents of the
+  LLVMBuild files and use that to drive the actual project build. Typically, the
+  build system will only need to load this information at "configure" time, and
+  use it to generative native information. Build systems will also handle
+  automatically reconfiguring their information when the contents of
+  the <i>LLVMBuild.txt</i> files change.</p>
+
+  <p>Developers generally are not expected to need to be aware of the details of
+  how the LLVMBuild system is integrated into their build. Ideally, LLVM
+  developers who are not working on the build system would only ever need to
+  modify the contents of the <i>LLVMBuild.txt</i> description files (although we
+  have not reached this goal yet).</p>
+
+  <p>For more information on the utility tool we provide to help interfacing
+  with the build system, please see
+  the <a href="CommandGuide/html/llvm-build.html">llvm-build</a>
+  documentation.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<h2><a name="componentoverview">Component Overview</a></h2>
+<!-- *********************************************************************** -->
+
+<div>
+  <p>As mentioned earlier, LLVM projects are organized into
+  logical <em>components</em>. Every component is typically grouped into it's
+  own subdirectory. Generally, a component is organized around a coherent group
+  of sources which have some kind of clear API separation from other parts of
+  the code.</p>
+
+  <p>LLVM primarily uses the following types of components:</p>
+  <ul>
+    <li><em>Libraries</em> - Library components define a distinct API which can
+    be independently linked into LLVM client applications. Libraries typically
+    have private and public header files, and may specify a link of required
+    libraries that they build on top of.</li>
+
+    <li><em>Build Tools</em> - Build tools are applications which are designed
+    to be run as part of the build process (typically to generate other source
+    files). Currently, LLVM uses one main build tool
+    called <a href="TableGenFundamentals.html">TableGen</a> to generate a
+    variety of source files.</li>
+
+    <li><em>Tools</em> - Command line applications which are built using the
+    LLVM component libraries. Most LLVM tools are small and are primarily
+    frontends to the library interfaces.</li>
+
+<!-- FIXME: We also need shared libraries as a first class component, but this
+     is not yet implemented. -->
+  </ul>
+
+  <p>Components are described using <em>LLVMBuild.txt</em> files in the
+  directories that define the component. See
+  the <a href="#formatreference">Format Reference</a> section for information on
+  the exact format of these files.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<h2><a name="formatreference">LLVMBuild Format Reference</a></h2>
+<!-- *********************************************************************** -->
+
+<div>
+  <p>LLVMBuild files are written in a simple variant of the INI or configuration
+  file format (<a href="http://en.wikipedia.org/wiki/INI_file">Wikipedia
+  entry</a>). The format defines a list of sections each of which may contain
+  some number of properties. A simple example of the file format is below:</p>
+  <div class="doc_code">
+  <pre>
+<i>; Comments start with a semi-colon.</i>
+
+<i>; Sections are declared using square brackets.</i>
+[component_0]
+
+<i>; Properties are declared using '=' and are contained in the previous section.
+;
+; We support simple string and boolean scalar values and list values, where
+; items are separated by spaces. There is no support for quoting, and so
+; property values may not contain spaces.</i>
+property_name = property_value
+list_property_name = value_1 value_2 <em>...</em> value_n
+boolean_property_name = 1 <em>(or 0)</em>
+</pre>
+  </div>
+
+  <p>LLVMBuild files are expected to define a strict set of sections and
+  properties. An typical component description file for a library
+  component would look typically look like the following example:</p>
+  <div class="doc_code">
+  <pre>
+[component_0]
+type = Library
+name = Linker
+parent = Libraries
+required_libraries = Archive BitReader Core Support TransformUtils
+</pre>
+  </div>
+
+  <p>A full description of the exact sections and properties which are allowed
+ follows.</p>
+
+  <p>Each file may define exactly one common component, named "common". The
+  common component may define the following properties:</p>
+  <ul>
+    <li><i>subdirectories</i> <b>[optional]</b>
+      <p>If given, a list of the names of the subdirectories from the current
+        subpath to search for additional LLVMBuild files.</p></li>
+  </ul>
+
+  <p>Each file may define multiple components. Each component is described by a
+  section who name starts with "component". The remainder of the section name is
+  ignored, but each section name must be unique. Typically components are just
+  number in order for files with multiple components ("component_0",
+  "component_1", and so on).<p>
+
+  <p><b>Section names not matches this format (or the "common" section) are
+  currently unused and are disallowed.</b></p>
+
+  <p>Every component is defined by the properties in the section. The exact list
+  of properties that are allowed depends on the component
+  type. Components <b>may not</b> define any properties other than those
+  expected by the component type.</p>
+
+  <p>Every component must define the following properties:</p>
+  <ul>
+    <li><i>type</i> <b>[required]</b>
+      <p>The type of the component. Supported component types are
+      detailed below. Most components will define additional properties which
+      may be required or optional.</p></li>
+
+    <li><i>name</i> <b>[required]</b>
+      <p>The name of the component. Names are required to be unique
+      across the entire project.</p></li>
+
+    <li><i>parent</i> <b>[required]</b>
+      <p>The name of the logical parent of the component. Components are
+      organized into a logical tree to make it easier to navigate and organize
+      groups of components. The parent's have no semantics as far as the project
+      build is concerned, however. Typically, the parent will be the main
+      component of the parent directory.</p>
+
+      <!-- FIXME: Should we make the parent optional, and default to parent
+      directories component? -->
+
+      <p>Components may reference the root pseudo component using '$ROOT' to
+      indicate they should logically be grouped at the top-level.</p>
+    </li>
+  </ul>
+
+  <p>Components may define the following properties:</p>
+  <ul>
+    <li><i>dependencies</i> <b>[optional]</b>
+      <p>If specified, a list of names of components which <i>must</i> be built
+      prior to this one. This should only be exactly those components which
+      produce some tool or source code required for building the
+      component.</p>
+
+      <p><em>NOTE:</em> Group and LibraryGroup components have no semantics for
+      the actual build, and are not allowed to specify dependencies.</p></li>
+  </ul>
+
+  <p>The following section lists the available component types, as well as the
+  properties which are associated with that component.</p>
+
+  <ul>
+    <li><i>type = Group</i>
+      <p>Group components exist purely to allow additional arbitrary structuring
+      of the logical components tree. For example, one might define a
+      "Libraries" group to hold all of the root library components.</p>
+
+      <p>Group components have no additionally properties.</p>
+    </li>
+
+    <li><i>type = Library</i>
+      <p>Library components define an individual library which should be built
+      from the source code in the component directory.</p>
+
+      <p>Components with this type use the following properties:</p>
+      <ul>
+        <li><i>library_name</i> <b>[optional]</b>
+          <p>If given, the name to use for the actual library file on disk. If
+          not given, the name is derived from the component name
+          itself.</p></li>
+
+        <li><i>required_libraries</i> <b>[optional]</b>
+          <p>If given, a list of the names of Library or LibraryGroup components
+          which must also be linked in whenever this library is used. That is,
+          the link time dependencies for this component. When tools are built,
+          the build system will include the transitive closer of
+          all <i>required_libraries</i> for the components the tool needs.</p></li>
+
+        <li><i>add_to_library_groups</i> <b>[optional]</b>
+          <p>If given, a list of the names of LibraryGroup components which this
+          component is also part of. This allows nesting groups of
+          components. For example, the <i>X86</i> target might define a library
+          group for all of the <i>X86</i> components. That library group might
+          then be included in the <i>all-targets</i> library group.</p></li>
+      </ul>
+    </li>
+
+    <li><i>type = LibraryGroup</i>
+      <p>LibraryGroup components are a mechanism to allow easy definition of
+      useful sets of related components. In particular, we use them to easily
+      specify things like "all targets", or "all assembly printers".</p>
+
+      <p>Components with this type use the following properties:</p>
+      <ul>
+        <li><i>required_libraries</i> <b>[optional]</b>
+          <p>See the Library type for a description of this property.</p></li>
+
+        <li><i>add_to_library_groups</i> <b>[optional]</b>
+          <p>See the Library type for a description of this property.</p></li>
+      </ul>
+    </li>
+
+    <li><i>type = TargetGroup</i>
+      <p>TargetGroup components are an extension of LibraryGroups, specifically
+      for defining LLVM targets (which are handled specially in a few
+      places).</p>
+
+      <p>The name of the component should always be the name of the target.</p>
+
+      <p>Components with this type use the LibraryGroup properties in addition
+      to:</p>
+      <ul>
+        <li><i>has_asmparser</i> <b>[optional]</b> <b>[boolean]</b>
+          <p>Whether this target defines an assembly parser.</p></li>
+        <li><i>has_asmprinter</i> <b>[optional]</b> <b>[boolean]</b>
+          <p>Whether this target defines an assembly printer.</p></li>
+        <li><i>has_disassembler</i> <b>[optional]</b> <b>[boolean]</b>
+          <p>Whether this target defines a disassembler.</p></li>
+        <li><i>has_jit</i> <b>[optional]</b> <b>[boolean]</b>
+          <p>Whether this target supports JIT compilation.</p></li>
+      </ul>
+    </li>
+
+    <li><i>type = Tool</i>
+      <p>Tool components define standalone command line tools which should be
+      built from the source code in the component directory and linked.</p>
+
+      <p>Components with this type use the following properties:</p>
+      <ul>
+        <li><i>required_libraries</i> <b>[optional]</b>
+
+          <p>If given, a list of the names of Library or LibraryGroup components
+          which this tool is required to be linked with. <b>NOTE:</b> The values
+          should be the component names, which may not always match up with the
+          actual library names on disk.</p>
+
+          <p>Build systems are expected to properly include all of the libraries
+          required by the linked components (i.e., the transitive closer
+          of <em>required_libraries</em>).</p>
+
+          <p>Build systems are also expected to understand that those library
+          components must be built prior to linking -- they do not also need to
+          be listed under <i>dependencies</i>.</p></li>
+      </ul>
+    </li>
+
+    <li><i>type = BuildTool</i>
+      <p>BuildTool components are like Tool components, except that the tool is
+      supposed to be built for the platform where the build is running (instead
+      of that platform being targetted). Build systems are expected to handle
+      the fact that required libraries may need to be built for multiple
+      platforms in order to be able to link this tool.</p>
+
+      <p>BuildTool components currently use the exact same properties as Tool
+      components, the type distinction is only used to differentiate what the
+      tool is built for.</p>
+    </li>
+  </ul>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/LLVMBuild.txt b/docs/LLVMBuild.txt
new file mode 100644
index 000000000000..d5aea864ecdd
--- /dev/null
+++ b/docs/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./docs/LLVMBuild.txt -------------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Group
+name = Docs
+parent = $ROOT
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 3d01b60a8b47..d1c6e58c31c4 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -92,7 +92,7 @@
       <li><a href="#complexconstants">Complex Constants</a></li>
       <li><a href="#globalconstants">Global Variable and Function Addresses</a></li>
       <li><a href="#undefvalues">Undefined Values</a></li>
-      <li><a href="#trapvalues">Trap Values</a></li>
+      <li><a href="#poisonvalues">Poison Values</a></li>
       <li><a href="#blockaddress">Addresses of Basic Blocks</a></li>
       <li><a href="#constantexprs">Constant Expressions</a></li>
     </ol>
@@ -100,7 +100,18 @@
   <li><a href="#othervalues">Other Values</a>
     <ol>
       <li><a href="#inlineasm">Inline Assembler Expressions</a></li>
-      <li><a href="#metadata">Metadata Nodes and Metadata Strings</a></li>
+      <li><a href="#metadata">Metadata Nodes and Metadata Strings</a>
+        <ol>
+          <li><a href="#tbaa">'<tt>tbaa</tt>' Metadata</a></li>
+          <li><a href="#fpaccuracy">'<tt>fpaccuracy</tt>' Metadata</a></li>
+          <li><a href="#range">'<tt>range</tt>' Metadata</a></li>
+        </ol>
+      </li>
+    </ol>
+  </li>
+  <li><a href="#module_flags">Module Flags Metadata</a>
+    <ol>
+      <li><a href="#objc_gc_flags">Objective-C Garbage Collection Module Flags Metadata</a></li>
     </ol>
   </li>
   <li><a href="#intrinsic_globals">Intrinsic Global Variables</a>
@@ -123,7 +134,6 @@
           <li><a href="#i_switch">'<tt>switch</tt>' Instruction</a></li>
           <li><a href="#i_indirectbr">'<tt>indirectbr</tt>' Instruction</a></li>
           <li><a href="#i_invoke">'<tt>invoke</tt>' Instruction</a></li>
-          <li><a href="#i_unwind">'<tt>unwind</tt>'  Instruction</a></li>
           <li><a href="#i_resume">'<tt>resume</tt>'  Instruction</a></li>
           <li><a href="#i_unreachable">'<tt>unreachable</tt>' Instruction</a></li>
         </ol>
@@ -283,10 +293,10 @@
       </li>
       <li><a href="#int_memorymarkers">Memory Use Markers</a>
         <ol>
-          <li><a href="#int_lifetime_start"><tt>llvm.lifetime.start</tt></a></li>
-          <li><a href="#int_lifetime_end"><tt>llvm.lifetime.end</tt></a></li>
-          <li><a href="#int_invariant_start"><tt>llvm.invariant.start</tt></a></li>
-          <li><a href="#int_invariant_end"><tt>llvm.invariant.end</tt></a></li>
+          <li><a href="#int_lifetime_start">'<tt>llvm.lifetime.start</tt>' Intrinsic</a></li>
+          <li><a href="#int_lifetime_end">'<tt>llvm.lifetime.end</tt>' Intrinsic</a></li>
+          <li><a href="#int_invariant_start">'<tt>llvm.invariant.start</tt>' Intrinsic</a></li>
+          <li><a href="#int_invariant_end">'<tt>llvm.invariant.end</tt>' Intrinsic</a></li>
         </ol>
       </li>
       <li><a href="#int_general">General intrinsics</a>
@@ -301,6 +311,8 @@
             '<tt>llvm.stackprotector</tt>' Intrinsic</a></li>
 	  <li><a href="#int_objectsize">
             '<tt>llvm.objectsize</tt>' Intrinsic</a></li>
+	  <li><a href="#int_expect">
+            '<tt>llvm.expect</tt>' Intrinsic</a></li>
         </ol>
       </li>
     </ol>
@@ -479,43 +491,43 @@
 
 <div>
 
-<p>LLVM programs are composed of "Module"s, each of which is a translation unit
-   of the input programs.  Each module consists of functions, global variables,
-   and symbol table entries.  Modules may be combined together with the LLVM
-   linker, which merges function (and global variable) definitions, resolves
-   forward declarations, and merges symbol table entries. Here is an example of
-   the "hello world" module:</p>
+<p>LLVM programs are composed of <tt>Module</tt>s, each of which is a
+   translation unit of the input programs.  Each module consists of functions,
+   global variables, and symbol table entries.  Modules may be combined together
+   with the LLVM linker, which merges function (and global variable)
+   definitions, resolves forward declarations, and merges symbol table
+   entries. Here is an example of the "hello world" module:</p>
 
 <pre class="doc_code">
 <i>; Declare the string constant as a global constant.</i>&nbsp;
-<a href="#identifiers">@.LC0</a> = <a href="#linkage_internal">internal</a>&nbsp;<a href="#globalvars">constant</a>&nbsp;<a href="#t_array">[13 x i8]</a> c"hello world\0A\00"      <i>; [13 x i8]*</i>&nbsp;
+<a href="#identifiers">@.str</a> = <a href="#linkage_private">private</a>&nbsp;<a href="#globalvars">unnamed_addr</a>&nbsp;<a href="#globalvars">constant</a>&nbsp;<a href="#t_array">[13 x i8]</a> c"hello world\0A\00"&nbsp;
 
 <i>; External declaration of the puts function</i>&nbsp;
-<a href="#functionstructure">declare</a> i32 @puts(i8*)                                      <i>; i32 (i8*)* </i>&nbsp;
+<a href="#functionstructure">declare</a> i32 @puts(i8* <a href="#nocapture">nocapture</a>) <a href="#fnattrs">nounwind</a>&nbsp;
 
 <i>; Definition of main function</i>
 define i32 @main() {   <i>; i32()* </i>&nbsp;
   <i>; Convert [13 x i8]* to i8  *...</i>&nbsp;
-  %cast210 = <a href="#i_getelementptr">getelementptr</a> [13 x i8]* @.LC0, i64 0, i64 0   <i>; i8*</i>&nbsp;
+  %cast210 = <a href="#i_getelementptr">getelementptr</a> [13 x i8]* @.str, i64 0, i64 0
 
   <i>; Call puts function to write out the string to stdout.</i>&nbsp;
-  <a href="#i_call">call</a> i32 @puts(i8* %cast210)           <i>; i32</i>&nbsp;
+  <a href="#i_call">call</a> i32 @puts(i8* %cast210)
   <a href="#i_ret">ret</a> i32 0&nbsp;
 }
 
 <i>; Named metadata</i>
-!1 = metadata !{i32 41}
+!1 = metadata !{i32 42}
 !foo = !{!1, null}
 </pre>
 
 <p>This example is made up of a <a href="#globalvars">global variable</a> named
-   "<tt>.LC0</tt>", an external declaration of the "<tt>puts</tt>" function,
+   "<tt>.str</tt>", an external declaration of the "<tt>puts</tt>" function,
    a <a href="#functionstructure">function definition</a> for
    "<tt>main</tt>" and <a href="#namedmetadatastructure">named metadata</a> 
-   "<tt>foo"</tt>.</p>
+   "<tt>foo</tt>".</p>
 
-<p>In general, a module is made up of a list of global values, where both
-   functions and global variables are global values.  Global values are
+<p>In general, a module is made up of a list of global values (where both
+   functions and global variables are global values). Global values are
    represented by a pointer to a memory location (in this case, a pointer to an
    array of char, and a pointer to a function), and have one of the
    following <a href="#linkage">linkage types</a>.</p>
@@ -630,7 +642,7 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
       be merged with equivalent globals.  These linkage types are otherwise the
       same as their non-<tt>odr</tt> versions.</dd>
 
-  <dt><tt><b><a name="linkage_external">external</a></b></tt>:</dt>
+  <dt><tt><b><a name="linkage_external">external</a></b></tt></dt>
   <dd>If none of the above identifiers are used, the global is externally
       visible, meaning that it participates in linkage and can be used to
       resolve external symbol references.</dd>
@@ -927,7 +939,7 @@ define i32 @main() {   <i>; i32()* </i>&nbsp;
    alignments must be a power of 2.</p>
 
 <p>If the <tt>unnamed_addr</tt> attribute is given, the address is know to not
-  be significant and two identical functions can be merged</p>.
+   be significant and two identical functions can be merged.</p>
 
 <h5>Syntax:</h5>
 <pre class="doc_code">
@@ -1136,6 +1148,10 @@ define void @f() optsize { ... }
 </pre>
 
 <dl>
+  <dt><tt><b>address_safety</b></tt></dt>
+  <dd>This attribute indicates that the address safety analysis
+  is enabled for this function.  </dd>
+
   <dt><tt><b>alignstack(&lt;<em>n</em>&gt;)</b></tt></dt>
   <dd>This attribute indicates that, when emitting the prologue and epilogue,
       the backend should forcibly align the stack pointer. Specify the
@@ -1195,8 +1211,7 @@ define void @f() optsize { ... }
       It does not write through any pointer arguments
       (including <tt><a href="#byval">byval</a></tt> arguments) and never
       changes any state visible to callers.  This means that it cannot unwind
-      exceptions by calling the <tt>C++</tt> exception throwing methods, but
-      could use the <tt>unwind</tt> instruction.</dd>
+      exceptions by calling the <tt>C++</tt> exception throwing methods.</dd>
 
   <dt><tt><b><a name="readonly">readonly</a></b></tt></dt>
   <dd>This attribute indicates that the function does not write through any
@@ -1206,8 +1221,13 @@ define void @f() optsize { ... }
       and read state that may be set in the caller.  A readonly function always
       returns the same value (or unwinds an exception identically) when called
       with the same set of arguments and global state.  It cannot unwind an
-      exception by calling the <tt>C++</tt> exception throwing methods, but may
-      use the <tt>unwind</tt> instruction.</dd>
+      exception by calling the <tt>C++</tt> exception throwing methods.</dd>
+
+  <dt><tt><b><a name="returns_twice">returns_twice</a></b></tt></dt>
+  <dd>This attribute indicates that this function can return twice. The
+      C <code>setjmp</code> is an example of such a function.  The compiler
+      disables some optimizations (like tail calls) in the caller of these
+      functions.</dd>
 
   <dt><tt><b><a name="ssp">ssp</a></b></tt></dt>
   <dd>This attribute indicates that the function should emit a stack smashing
@@ -1236,12 +1256,6 @@ define void @f() optsize { ... }
       show that no exceptions passes by it. This is normally the case for
       the ELF x86-64 abi, but it can be disabled for some compilation
       units.</dd>
-
-  <dt><tt><b><a name="returns_twice">returns_twice</a></b></tt></dt>
-  <dd>This attribute indicates that this function can return
-  twice. The C <code>setjmp</code> is an example of such a function.
-  The compiler disables some optimizations (like tail calls) in the caller of
-  these functions.</dd>
 </dl>
 
 </div>
@@ -1603,7 +1617,7 @@ that determines which other atomic instructions on the same address they
 <i>synchronize with</i>.  These semantics are borrowed from Java and C++0x,
 but are somewhat more colloquial. If these descriptions aren't precise enough,
 check those specs (see spec references in the
-<a href="Atomic.html#introduction">atomics guide</a>).
+<a href="Atomics.html#introduction">atomics guide</a>).
 <a href="#i_fence"><code>fence</code></a> instructions
 treat these orderings somewhat differently since they don't take an address.
 See that instruction's documentation for details.</p>
@@ -1702,7 +1716,7 @@ in signal handlers).</p>
     </tr>
     <tr>
       <td><a href="#t_floating">floating point</a></td>
-      <td><tt>float, double, x86_fp80, fp128, ppc_fp128</tt></td>
+      <td><tt>half, float, double, x86_fp80, fp128, ppc_fp128</tt></td>
     </tr>
     <tr>
       <td><a name="t_firstclass">first class</a></td>
@@ -1802,6 +1816,7 @@ in signal handlers).</p>
 <table>
   <tbody>
     <tr><th>Type</th><th>Description</th></tr>
+    <tr><td><tt>half</tt></td><td>16-bit floating point value</td></tr>
     <tr><td><tt>float</tt></td><td>32-bit floating point value</td></tr>
     <tr><td><tt>double</tt></td><td>64-bit floating point value</td></tr>
     <tr><td><tt>fp128</tt></td><td>128-bit floating point value (112-bit mantissa)</td></tr>
@@ -1906,9 +1921,9 @@ in signal handlers).</p>
 <div>
 
 <p>Aggregate Types are a subset of derived types that can contain multiple
-  member types. <a href="#t_array">Arrays</a>,
-  <a href="#t_struct">structs</a>, and <a href="#t_vector">vectors</a> are
-  aggregate types.</p>
+  member types. <a href="#t_array">Arrays</a> and
+  <a href="#t_struct">structs</a> are aggregate types.
+  <a href="#t_vector">Vectors</a> are not considered to be aggregate types.</p>
 
 </div>
 
@@ -2182,8 +2197,8 @@ in signal handlers).</p>
 </pre>
 
 <p>The number of elements is a constant integer value larger than 0; elementtype
-   may be any integer or floating point type.  Vectors of size zero are not
-   allowed, and pointers are not allowed as the element type.</p>
+   may be any integer or floating point type, or a pointer to these types.
+   Vectors of size zero are not allowed. </p>
 
 <h5>Examples:</h5>
 <table class="layout">
@@ -2199,6 +2214,10 @@ in signal handlers).</p>
     <td class="left"><tt>&lt;2 x i64&gt;</tt></td>
     <td class="left">Vector of 2 64-bit integer values.</td>
   </tr>
+  <tr class="layout">
+    <td class="left"><tt>&lt;4 x i64*&gt;</tt></td>
+    <td class="left">Vector of 4 pointers to 64-bit integer values.</td>
+  </tr>
 </table>
 
 </div>
@@ -2257,10 +2276,11 @@ in signal handlers).</p>
    represented in their IEEE hexadecimal format so that assembly and disassembly
    do not cause any bits to change in the constants.</p>
 
-<p>When using the hexadecimal form, constants of types float and double are
+<p>When using the hexadecimal form, constants of types half, float, and double are
    represented using the 16-digit form shown above (which matches the IEEE754
-   representation for double); float values must, however, be exactly
-   representable as IEE754 single precision.  Hexadecimal format is always used
+   representation for double); half and float values must, however, be exactly
+   representable as IEE754 half and single precision, respectively.
+   Hexadecimal format is always used
    for long double, and there are three forms of long double.  The 80-bit format
    used by x86 is represented as <tt>0xK</tt> followed by 20 hexadecimal digits.
    The 128-bit format used by PowerPC (two adjacent doubles) is represented
@@ -2495,22 +2515,21 @@ b: unreachable
 
 <!-- ======================================================================= -->
 <h3>
-  <a name="trapvalues">Trap Values</a>
+  <a name="poisonvalues">Poison Values</a>
 </h3>
 
 <div>
 
-<p>Trap values are similar to <a href="#undefvalues">undef values</a>, however
-   instead of representing an unspecified bit pattern, they represent the
-   fact that an instruction or constant expression which cannot evoke side
-   effects has nevertheless detected a condition which results in undefined
-   behavior.</p>
+<p>Poison values are similar to <a href="#undefvalues">undef values</a>, however
+   they also represent the fact that an instruction or constant expression which
+   cannot evoke side effects has nevertheless detected a condition which results
+   in undefined behavior.</p>
 
-<p>There is currently no way of representing a trap value in the IR; they
+<p>There is currently no way of representing a poison value in the IR; they
    only exist when produced by operations such as
    <a href="#i_add"><tt>add</tt></a> with the <tt>nsw</tt> flag.</p>
 
-<p>Trap value behavior is defined in terms of value <i>dependence</i>:</p>
+<p>Poison value behavior is defined in terms of value <i>dependence</i>:</p>
 
 <ul>
 <li>Values other than <a href="#i_phi"><tt>phi</tt></a> nodes depend on
@@ -2527,7 +2546,7 @@ b: unreachable
     control back to them.</li>
 
 <li><a href="#i_invoke"><tt>Invoke</tt></a> instructions depend on the
-    <a href="#i_ret"><tt>ret</tt></a>, <a href="#i_unwind"><tt>unwind</tt></a>,
+    <a href="#i_ret"><tt>ret</tt></a>, <a href="#i_resume"><tt>resume</tt></a>,
     or exception-throwing call instructions that dynamically transfer control
     back to them.</li>
 
@@ -2561,62 +2580,61 @@ b: unreachable
 
 </ul>
 
-<p>Whenever a trap value is generated, all values which depend on it evaluate
-   to trap. If they have side effects, they evoke their side effects as if each
-   operand with a trap value were undef. If they have externally-visible side
-   effects, the behavior is undefined.</p>
+<p>Poison Values have the same behavior as <a href="#undefvalues">undef values</a>,
+   with the additional affect that any instruction which has a <i>dependence</i>
+   on a poison value has undefined behavior.</p>
 
 <p>Here are some examples:</p>
 
 <pre class="doc_code">
 entry:
-  %trap = sub nuw i32 0, 1           ; Results in a trap value.
-  %still_trap = and i32 %trap, 0     ; Whereas (and i32 undef, 0) would return 0.
-  %trap_yet_again = getelementptr i32* @h, i32 %still_trap
-  store i32 0, i32* %trap_yet_again  ; undefined behavior
+  %poison = sub nuw i32 0, 1           ; Results in a poison value.
+  %still_poison = and i32 %poison, 0   ; 0, but also poison.
+  %poison_yet_again = getelementptr i32* @h, i32 %still_poison
+  store i32 0, i32* %poison_yet_again  ; memory at @h[0] is poisoned
 
-  store i32 %trap, i32* @g           ; Trap value conceptually stored to memory.
-  %trap2 = load i32* @g              ; Returns a trap value, not just undef.
+  store i32 %poison, i32* @g           ; Poison value stored to memory.
+  %poison2 = load i32* @g              ; Poison value loaded back from memory.
 
-  volatile store i32 %trap, i32* @g  ; External observation; undefined behavior.
+  store volatile i32 %poison, i32* @g  ; External observation; undefined behavior.
 
   %narrowaddr = bitcast i32* @g to i16*
   %wideaddr = bitcast i32* @g to i64*
-  %trap3 = load i16* %narrowaddr     ; Returns a trap value.
-  %trap4 = load i64* %wideaddr       ; Returns a trap value.
+  %poison3 = load i16* %narrowaddr     ; Returns a poison value.
+  %poison4 = load i64* %wideaddr       ; Returns a poison value.
 
-  %cmp = icmp slt i32 %trap, 0       ; Returns a trap value.
-  br i1 %cmp, label %true, label %end ; Branch to either destination.
+  %cmp = icmp slt i32 %poison, 0       ; Returns a poison value.
+  br i1 %cmp, label %true, label %end  ; Branch to either destination.
 
 true:
-  volatile store i32 0, i32* @g      ; This is control-dependent on %cmp, so
-                                     ; it has undefined behavior.
+  store volatile i32 0, i32* @g        ; This is control-dependent on %cmp, so
+                                       ; it has undefined behavior.
   br label %end
 
 end:
   %p = phi i32 [ 0, %entry ], [ 1, %true ]
-                                     ; Both edges into this PHI are
-                                     ; control-dependent on %cmp, so this
-                                     ; always results in a trap value.
+                                       ; Both edges into this PHI are
+                                       ; control-dependent on %cmp, so this
+                                       ; always results in a poison value.
 
-  volatile store i32 0, i32* @g      ; This would depend on the store in %true
-                                     ; if %cmp is true, or the store in %entry
-                                     ; otherwise, so this is undefined behavior.
+  store volatile i32 0, i32* @g        ; This would depend on the store in %true
+                                       ; if %cmp is true, or the store in %entry
+                                       ; otherwise, so this is undefined behavior.
 
   br i1 %cmp, label %second_true, label %second_end
-                                     ; The same branch again, but this time the
-                                     ; true block doesn't have side effects.
+                                       ; The same branch again, but this time the
+                                       ; true block doesn't have side effects.
 
 second_true:
   ; No side effects!
   ret void
 
 second_end:
-  volatile store i32 0, i32* @g      ; This time, the instruction always depends
-                                     ; on the store in %end. Also, it is
-                                     ; control-equivalent to %end, so this is
-                                     ; well-defined (again, ignoring earlier
-                                     ; undefined behavior in this example).
+  store volatile i32 0, i32* @g        ; This time, the instruction always depends
+                                       ; on the store in %end. Also, it is
+                                       ; control-equivalent to %end, so this is
+                                       ; well-defined (ignoring earlier undefined
+                                       ; behavior in this example).
 </pre>
 
 </div>
@@ -2795,7 +2813,7 @@ second_end:
 <div>
 
 <p>LLVM supports inline assembler expressions (as opposed
-   to <a href="#moduleasm"> Module-Level Inline Assembly</a>) through the use of
+   to <a href="#moduleasm">Module-Level Inline Assembly</a>) through the use of
    a special value.  This value represents the inline assembler as a string
    (containing the instructions to emit), a list of operand constraints (stored
    as a string), a flag that indicates whether or not the inline asm
@@ -2837,23 +2855,27 @@ call void asm alignstack "eieio", ""()
 <p>If both keywords appear the '<tt>sideeffect</tt>' keyword must come
    first.</p>
 
+<!--
 <p>TODO: The format of the asm and constraints string still need to be
    documented here.  Constraints on what can be done (e.g. duplication, moving,
    etc need to be documented).  This is probably best done by reference to
    another document that covers inline asm from a holistic perspective.</p>
+  -->
 
+<!-- _______________________________________________________________________ -->
 <h4>
-<a name="inlineasm_md">Inline Asm Metadata</a>
+  <a name="inlineasm_md">Inline Asm Metadata</a>
 </h4>
 
 <div>
 
-<p>The call instructions that wrap inline asm nodes may have a "!srcloc" MDNode
-   attached to it that contains a list of constant integers.  If present, the
-  code generator will use the integer as the location cookie value when report
-   errors through the LLVMContext error reporting mechanisms.  This allows a
-   front-end to correlate backend errors that occur with inline asm back to the
-   source code that produced it.  For example:</p>
+<p>The call instructions that wrap inline asm nodes may have a
+   "<tt>!srcloc</tt>" MDNode attached to it that contains a list of constant
+   integers.  If present, the code generator will use the integer as the
+   location cookie value when report errors through the <tt>LLVMContext</tt>
+   error reporting mechanisms.  This allows a front-end to correlate backend
+   errors that occur with inline asm back to the source code that produced it.
+   For example:</p>
 
 <pre class="doc_code">
 call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
@@ -2862,7 +2884,7 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
 </pre>
 
 <p>It is up to the front-end to make sense of the magic numbers it places in the
-   IR.  If the MDNode contains multiple constants, the code generator will use
+   IR. If the MDNode contains multiple constants, the code generator will use
    the one that corresponds to the line of the asm that the error occurs on.</p>
 
 </div>
@@ -2884,20 +2906,33 @@ call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
    preceding exclamation point ('<tt>!</tt>').</p>
 
 <p>A metadata string is a string surrounded by double quotes.  It can contain
-   any character by escaping non-printable characters with "\xx" where "xx" is
-   the two digit hex code.  For example: "<tt>!"test\00"</tt>".</p>
+   any character by escaping non-printable characters with "<tt>\xx</tt>" where
+   "<tt>xx</tt>" is the two digit hex code.  For example:
+   "<tt>!"test\00"</tt>".</p>
 
 <p>Metadata nodes are represented with notation similar to structure constants
    (a comma separated list of elements, surrounded by braces and preceded by an
-   exclamation point).  For example: "<tt>!{ metadata !"test\00", i32
-   10}</tt>".  Metadata nodes can have any values as their operand.</p>
+   exclamation point). Metadata nodes can have any values as their operand. For
+   example:</p>
+
+<div class="doc_code">
+<pre>
+!{ metadata !"test\00", i32 10}
+</pre>
+</div>
 
 <p>A <a href="#namedmetadatastructure">named metadata</a> is a collection of 
    metadata nodes, which can be looked up in the module symbol table. For
-   example: "<tt>!foo =  metadata !{!4, !3}</tt>".
+   example:</p>
+
+<div class="doc_code">
+<pre>
+!foo =  metadata !{!4, !3}
+</pre>
+</div>
 
 <p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt> 
-   function is using two metadata arguments.</p>
+   function is using two metadata arguments:</p>
 
 <div class="doc_code">
 <pre>
@@ -2906,7 +2941,8 @@ call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
 </div>
 
 <p>Metadata can be attached with an instruction. Here metadata <tt>!21</tt> is
-   attached with <tt>add</tt> instruction using <tt>!dbg</tt> identifier.</p>
+   attached to the <tt>add</tt> instruction using the <tt>!dbg</tt>
+   identifier:</p>
 
 <div class="doc_code">
 <pre>
@@ -2914,6 +2950,325 @@ call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
 </pre>
 </div>
 
+<p>More information about specific metadata nodes recognized by the optimizers
+   and code generator is found below.</p>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="tbaa">'<tt>tbaa</tt>' Metadata</a>
+</h4>
+
+<div>
+
+<p>In LLVM IR, memory does not have types, so LLVM's own type system is not
+   suitable for doing TBAA. Instead, metadata is added to the IR to describe
+   a type system of a higher level language. This can be used to implement
+   typical C/C++ TBAA, but it can also be used to implement custom alias
+   analysis behavior for other languages.</p>
+
+<p>The current metadata format is very simple. TBAA metadata nodes have up to
+   three fields, e.g.:</p>
+
+<div class="doc_code">
+<pre>
+!0 = metadata !{ metadata !"an example type tree" }
+!1 = metadata !{ metadata !"int", metadata !0 }
+!2 = metadata !{ metadata !"float", metadata !0 }
+!3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+</pre>
+</div>
+
+<p>The first field is an identity field. It can be any value, usually
+   a metadata string, which uniquely identifies the type. The most important
+   name in the tree is the name of the root node. Two trees with
+   different root node names are entirely disjoint, even if they
+   have leaves with common names.</p>
+
+<p>The second field identifies the type's parent node in the tree, or
+   is null or omitted for a root node. A type is considered to alias
+   all of its descendants and all of its ancestors in the tree. Also,
+   a type is considered to alias all types in other trees, so that
+   bitcode produced from multiple front-ends is handled conservatively.</p>
+
+<p>If the third field is present, it's an integer which if equal to 1
+   indicates that the type is "constant" (meaning
+   <tt>pointsToConstantMemory</tt> should return true; see
+   <a href="AliasAnalysis.html#OtherItfs">other useful
+   <tt>AliasAnalysis</tt> methods</a>).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="fpaccuracy">'<tt>fpaccuracy</tt>' Metadata</a>
+</h4>
+ 
+<div>
+
+<p><tt>fpaccuracy</tt> metadata may be attached to any instruction of floating
+   point type.  It expresses the maximum relative error allowed in the result
+   of that instruction, in ULPs, thus potentially allowing the compiler to use
+   a more efficient but less accurate method of computing it.
+   ULP is defined as follows:</p>
+
+<blockquote>
+
+<p>If <tt>x</tt> is a real number that lies between two finite consecutive
+   floating-point numbers <tt>a</tt> and <tt>b</tt>, without being equal to one
+   of them, then <tt>ulp(x) = |b - a|</tt>, otherwise <tt>ulp(x)</tt> is the
+   distance between the two non-equal finite floating-point numbers nearest
+   <tt>x</tt>. Moreover, <tt>ulp(NaN)</tt> is <tt>NaN</tt>.</p>
+
+</blockquote>
+
+<p>The metadata node shall consist of a single non-negative floating
+   point number representing the maximum relative error.  For example,
+   2.5 ULP:</p>
+
+<div class="doc_code">
+<pre>
+!0 = metadata !{ float 2.5 }
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="range">'<tt>range</tt>' Metadata</a>
+</h4>
+
+<div>
+<p><tt>range</tt> metadata may be attached only to loads of integer types. It
+   expresses the possible ranges the loaded value is in. The ranges are
+   represented with a flattened list of integers. The loaded value is known to
+   be in the union of the ranges defined by each consecutive pair. Each pair
+   has the following properties:</p>
+<ul>
+   <li>The type must match the type loaded by the instruction.</li>
+   <li>The pair <tt>a,b</tt> represents the range <tt>[a,b)</tt>.</li>
+   <li>Both <tt>a</tt> and <tt>b</tt> are constants.</li>
+   <li>The range is allowed to wrap.</li>
+   <li>The range should not represent the full or empty set. That is,
+       <tt>a!=b</tt>. </li>
+</ul>
+
+<p>Examples:</p>
+<div class="doc_code">
+<pre>
+  %a = load i8* %x, align 1, !range !0 ; Can only be 0 or 1
+  %b = load i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1
+  %c = load i8* %z, align 1, !range !2 ; Can only be 0, 1, 3, 4 or 5
+...
+!0 = metadata !{ i8 0, i8 2 }
+!1 = metadata !{ i8 255, i8 2 }
+!2 = metadata !{ i8 0, i8 2, i8 3, i8 6 }
+</pre>
+</div>
+</div>
+</div>
+
+</div>
+
+<!-- *********************************************************************** -->
+<h2>
+  <a name="module_flags">Module Flags Metadata</a>
+</h2>
+<!-- *********************************************************************** -->
+
+<div>
+
+<p>Information about the module as a whole is difficult to convey to LLVM's
+   subsystems. The LLVM IR isn't sufficient to transmit this
+   information. The <tt>llvm.module.flags</tt> named metadata exists in order to
+   facilitate this. These flags are in the form of key / value pairs &mdash;
+   much like a dictionary &mdash; making it easy for any subsystem who cares
+   about a flag to look it up.</p>
+
+<p>The <tt>llvm.module.flags</tt> metadata contains a list of metadata
+   triplets. Each triplet has the following form:</p>
+
+<ul>
+  <li>The first element is a <i>behavior</i> flag, which specifies the behavior
+      when two (or more) modules are merged together, and it encounters two (or
+      more) metadata with the same ID. The supported behaviors are described
+      below.</li>
+
+  <li>The second element is a metadata string that is a unique ID for the
+      metadata. How each ID is interpreted is documented below.</li>
+
+  <li>The third element is the value of the flag.</li>
+</ul>
+
+<p>When two (or more) modules are merged together, the resulting
+   <tt>llvm.module.flags</tt> metadata is the union of the
+   modules' <tt>llvm.module.flags</tt> metadata. The only exception being a flag
+   with the <i>Override</i> behavior, which may override another flag's value
+   (see below).</p>
+
+<p>The following behaviors are supported:</p>
+
+<table border="1" cellspacing="0" cellpadding="4">
+  <tbody>
+    <tr>
+      <th>Value</th>
+      <th>Behavior</th>
+    </tr>
+    <tr>
+      <td>1</td>
+      <td align="left">
+        <dl>
+          <dt><b>Error</b></dt>
+          <dd>Emits an error if two values disagree. It is an error to have an ID
+              with both an Error and a Warning behavior.</dd>
+        </dl>
+      </td>
+    </tr>
+    <tr>
+      <td>2</td>
+      <td align="left">
+        <dl>
+          <dt><b>Warning</b></dt>
+          <dd>Emits a warning if two values disagree.</dd>
+        </dl>
+      </td>
+    </tr>
+    <tr>
+      <td>3</td>
+      <td align="left">
+        <dl>
+          <dt><b>Require</b></dt>
+          <dd>Emits an error when the specified value is not present or doesn't
+              have the specified value. It is an error for two (or more)
+              <tt>llvm.module.flags</tt> with the same ID to have the Require
+              behavior but different values. There may be multiple Require flags
+              per ID.</dd>
+        </dl>
+      </td>
+    </tr>
+    <tr>
+      <td>4</td>
+      <td align="left">
+        <dl>
+          <dt><b>Override</b></dt>
+          <dd>Uses the specified value if the two values disagree. It is an
+              error for two (or more) <tt>llvm.module.flags</tt> with the same
+              ID to have the Override behavior but different values.</dd>
+        </dl>
+      </td>
+    </tr>
+  </tbody>
+</table>
+
+<p>An example of module flags:</p>
+
+<pre class="doc_code">
+!0 = metadata !{ i32 1, metadata !"foo", i32 1 }
+!1 = metadata !{ i32 4, metadata !"bar", i32 37 }
+!2 = metadata !{ i32 2, metadata !"qux", i32 42 }
+!3 = metadata !{ i32 3, metadata !"qux",
+  metadata !{
+    metadata !"foo", i32 1
+  }
+}
+!llvm.module.flags = !{ !0, !1, !2, !3 }
+</pre>
+
+<ul>
+  <li><p>Metadata <tt>!0</tt> has the ID <tt>!"foo"</tt> and the value '1'. The
+         behavior if two or more <tt>!"foo"</tt> flags are seen is to emit an
+         error if their values are not equal.</p></li>
+
+  <li><p>Metadata <tt>!1</tt> has the ID <tt>!"bar"</tt> and the value '37'. The
+         behavior if two or more <tt>!"bar"</tt> flags are seen is to use the
+         value '37' if their values are not equal.</p></li>
+
+  <li><p>Metadata <tt>!2</tt> has the ID <tt>!"qux"</tt> and the value '42'. The
+         behavior if two or more <tt>!"qux"</tt> flags are seen is to emit a
+         warning if their values are not equal.</p></li>
+
+  <li><p>Metadata <tt>!3</tt> has the ID <tt>!"qux"</tt> and the value:</p>
+
+<pre class="doc_code">
+metadata !{ metadata !"foo", i32 1 }
+</pre>
+
+      <p>The behavior is to emit an error if the <tt>llvm.module.flags</tt> does
+         not contain a flag with the ID <tt>!"foo"</tt> that has the value
+         '1'. If two or more <tt>!"qux"</tt> flags exist, then they must have
+         the same value or an error will be issued.</p></li>
+</ul>
+
+
+<!-- ======================================================================= -->
+<h3>
+<a name="objc_gc_flags">Objective-C Garbage Collection Module Flags Metadata</a>
+</h3>
+
+<div>
+
+<p>On the Mach-O platform, Objective-C stores metadata about garbage collection
+   in a special section called "image info". The metadata consists of a version
+   number and a bitmask specifying what types of garbage collection are
+   supported (if any) by the file. If two or more modules are linked together
+   their garbage collection metadata needs to be merged rather than appended
+   together.</p>
+
+<p>The Objective-C garbage collection module flags metadata consists of the
+   following key-value pairs:</p>
+
+<table border="1" cellspacing="0" cellpadding="4">
+  <col width="30%">
+  <tbody>
+    <tr>
+      <th>Key</th>
+      <th>Value</th>
+    </tr>
+    <tr>
+      <td><tt>Objective-C&nbsp;Version</tt></td>
+      <td align="left"><b>[Required]</b> &mdash; The Objective-C ABI
+         version. Valid values are 1 and 2.</td>
+    </tr>
+    <tr>
+      <td><tt>Objective-C&nbsp;Image&nbsp;Info&nbsp;Version</tt></td>
+      <td align="left"><b>[Required]</b> &mdash; The version of the image info
+         section. Currently always 0.</td>
+    </tr>
+    <tr>
+      <td><tt>Objective-C&nbsp;Image&nbsp;Info&nbsp;Section</tt></td>
+      <td align="left"><b>[Required]</b> &mdash; The section to place the
+         metadata. Valid values are <tt>"__OBJC, __image_info, regular"</tt> for
+         Objective-C ABI version 1, and <tt>"__DATA,__objc_imageinfo, regular,
+         no_dead_strip"</tt> for Objective-C ABI version 2.</td>
+    </tr>
+    <tr>
+      <td><tt>Objective-C&nbsp;Garbage&nbsp;Collection</tt></td>
+      <td align="left"><b>[Required]</b> &mdash; Specifies whether garbage
+          collection is supported or not. Valid values are 0, for no garbage
+          collection, and 2, for garbage collection supported.</td>
+    </tr>
+    <tr>
+      <td><tt>Objective-C&nbsp;GC&nbsp;Only</tt></td>
+      <td align="left"><b>[Optional]</b> &mdash; Specifies that only garbage
+         collection is supported. If present, its value must be 6. This flag
+         requires that the <tt>Objective-C Garbage Collection</tt> flag have the
+         value 2.</td>
+    </tr>
+  </tbody>
+</table>
+
+<p>Some important flag interactions:</p>
+
+<ul>
+  <li>If a module with <tt>Objective-C Garbage Collection</tt> set to 0 is
+      merged with a module with <tt>Objective-C Garbage Collection</tt> set to
+      2, then the resulting module has the <tt>Objective-C Garbage
+      Collection</tt> flag set to 0.</li>
+
+  <li>A module with <tt>Objective-C Garbage Collection</tt> set to 0 cannot be
+      merged with a module with <tt>Objective-C GC Only</tt> set to 6.</li>
+</ul>
+
 </div>
 
 </div>
@@ -2942,26 +3297,29 @@ href="#linkage_appending">appending linkage</a>.  This array contains a list of
 pointers to global variables and functions which may optionally have a pointer
 cast formed of bitcast or getelementptr.  For example, a legal use of it is:</p>
 
+<div class="doc_code">
 <pre>
-  @X = global i8 4
-  @Y = global i32 123
+@X = global i8 4
+@Y = global i32 123
 
-  @llvm.used = appending global [2 x i8*] [
-     i8* @X,
-     i8* bitcast (i32* @Y to i8*)
-  ], section "llvm.metadata"
+@llvm.used = appending global [2 x i8*] [
+   i8* @X,
+   i8* bitcast (i32* @Y to i8*)
+], section "llvm.metadata"
 </pre>
+</div>
 
 <p>If a global variable appears in the <tt>@llvm.used</tt> list, then the
-compiler, assembler, and linker are required to treat the symbol as if there is
-a reference to the global that it cannot see.  For example, if a variable has
-internal linkage and no references other than that from the <tt>@llvm.used</tt>
-list, it cannot be deleted.  This is commonly used to represent references from
-inline asms and other things the compiler cannot "see", and corresponds to
-"attribute((used))" in GNU C.</p>
+   compiler, assembler, and linker are required to treat the symbol as if there
+   is a reference to the global that it cannot see.  For example, if a variable
+   has internal linkage and no references other than that from
+   the <tt>@llvm.used</tt> list, it cannot be deleted.  This is commonly used to
+   represent references from inline asms and other things the compiler cannot
+   "see", and corresponds to "<tt>attribute((used))</tt>" in GNU C.</p>
 
 <p>On some targets, the code generator must emit a directive to the assembler or
-object file to prevent the assembler and linker from molesting the symbol.</p>
+   object file to prevent the assembler and linker from molesting the
+   symbol.</p>
 
 </div>
 
@@ -2975,13 +3333,13 @@ object file to prevent the assembler and linker from molesting the symbol.</p>
 <div>
 
 <p>The <tt>@llvm.compiler.used</tt> directive is the same as the
-<tt>@llvm.used</tt> directive, except that it only prevents the compiler from
-touching the symbol.  On targets that support it, this allows an intelligent
-linker to optimize references to the symbol without being impeded as it would be
-by <tt>@llvm.used</tt>.</p>
+   <tt>@llvm.used</tt> directive, except that it only prevents the compiler from
+   touching the symbol.  On targets that support it, this allows an intelligent
+   linker to optimize references to the symbol without being impeded as it would
+   be by <tt>@llvm.used</tt>.</p>
 
 <p>This is a rare construct that should only be used in rare circumstances, and
-should not be exposed to source languages.</p>
+   should not be exposed to source languages.</p>
 
 </div>
 
@@ -2991,12 +3349,19 @@ should not be exposed to source languages.</p>
 </h3>
 
 <div>
+
+<div class="doc_code">
 <pre>
 %0 = type { i32, void ()* }
 @llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
 </pre>
-<p>The <tt>@llvm.global_ctors</tt> array contains a list of constructor functions and associated priorities.  The functions referenced by this array will be called in ascending order of priority (i.e. lowest first) when the module is loaded.  The order of functions with the same priority is not defined.
-</p>
+</div>
+
+<p>The <tt>@llvm.global_ctors</tt> array contains a list of constructor
+   functions and associated priorities.  The functions referenced by this array
+   will be called in ascending order of priority (i.e. lowest first) when the
+   module is loaded.  The order of functions with the same priority is not
+   defined.</p>
 
 </div>
 
@@ -3006,13 +3371,18 @@ should not be exposed to source languages.</p>
 </h3>
 
 <div>
+
+<div class="doc_code">
 <pre>
 %0 = type { i32, void ()* }
 @llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
 </pre>
+</div>
 
-<p>The <tt>@llvm.global_dtors</tt> array contains a list of destructor functions and associated priorities.  The functions referenced by this array will be called in descending order of priority (i.e. highest first) when the module is loaded.  The order of functions with the same priority is not defined.
-</p>
+<p>The <tt>@llvm.global_dtors</tt> array contains a list of destructor functions
+   and associated priorities.  The functions referenced by this array will be
+   called in descending order of priority (i.e. highest first) when the module
+   is loaded.  The order of functions with the same priority is not defined.</p>
 
 </div>
 
@@ -3051,7 +3421,6 @@ should not be exposed to source languages.</p>
    '<a href="#i_switch"><tt>switch</tt></a>', 
    '<a href="#i_indirectbr"><tt>indirectbr</tt></a>',
    '<a href="#i_invoke"><tt>invoke</tt></a>', 
-   '<a href="#i_unwind"><tt>unwind</tt></a>',
    '<a href="#i_resume"><tt>resume</tt></a>', and 
    '<a href="#i_unreachable"><tt>unreachable</tt></a>'.</p>
 
@@ -3271,15 +3640,15 @@ IfUnequal:
    '<tt>normal</tt>' label or the '<tt>exception</tt>' label.  If the callee
    function returns with the "<tt><a href="#i_ret">ret</a></tt>" instruction,
    control flow will return to the "normal" label.  If the callee (or any
-   indirect callees) returns with the "<a href="#i_unwind"><tt>unwind</tt></a>"
-   instruction, control is interrupted and continued at the dynamically nearest
-   "exception" label.</p>
+   indirect callees) returns via the "<a href="#i_resume"><tt>resume</tt></a>"
+   instruction or other exception handling mechanism, control is interrupted and
+   continued at the dynamically nearest "exception" label.</p>
 
 <p>The '<tt>exception</tt>' label is a
    <i><a href="ExceptionHandling.html#overview">landing pad</a></i> for the
    exception. As such, '<tt>exception</tt>' label is required to have the
    "<a href="#i_landingpad"><tt>landingpad</tt></a>" instruction, which contains
-   the information about about the behavior of the program after unwinding
+   the information about the behavior of the program after unwinding
    happens, as its first non-PHI instruction. The restrictions on the
    "<tt>landingpad</tt>" instruction's tightly couples it to the
    "<tt>invoke</tt>" instruction, so that the important information contained
@@ -3315,8 +3684,9 @@ IfUnequal:
   <li>'<tt>normal label</tt>': the label reached when the called function
       executes a '<tt><a href="#i_ret">ret</a></tt>' instruction. </li>
 
-  <li>'<tt>exception label</tt>': the label reached when a callee returns with
-      the <a href="#i_unwind"><tt>unwind</tt></a> instruction. </li>
+  <li>'<tt>exception label</tt>': the label reached when a callee returns via
+      the <a href="#i_resume"><tt>resume</tt></a> instruction or other exception
+      handling mechanism.</li>
 
   <li>The optional <a href="#fnattrs">function attributes</a> list. Only
       '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
@@ -3339,9 +3709,6 @@ IfUnequal:
    block to the "normal" label. If the callee unwinds then no return value is
    available.</p>
 
-<p>Note that the code generator does not yet completely support unwind, and
-that the invoke/unwind semantics are likely to change in future versions.</p>
-
 <h5>Example:</h5>
 <pre>
   %retval = invoke i32 @Test(i32 15) to label %Continue
@@ -3352,38 +3719,6 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 
 </div>
 
-<!-- _______________________________________________________________________ -->
-
-<h4>
-  <a name="i_unwind">'<tt>unwind</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  unwind
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>unwind</tt>' instruction unwinds the stack, continuing control flow
-   at the first callee in the dynamic call stack which used
-   an <a href="#i_invoke"><tt>invoke</tt></a> instruction to perform the call.
-   This is primarily used to implement exception handling.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>unwind</tt>' instruction causes execution of the current function to
-   immediately halt.  The dynamic call stack is then searched for the
-   first <a href="#i_invoke"><tt>invoke</tt></a> instruction on the call stack.
-   Once found, execution continues at the "exceptional" destination block
-   specified by the <tt>invoke</tt> instruction.  If there is no <tt>invoke</tt>
-   instruction in the dynamic call chain, undefined behavior results.</p>
-
-<p>Note that the code generator does not yet completely support unwind, and
-that the invoke/unwind semantics are likely to change in future versions.</p>
-
-</div>
-
  <!-- _______________________________________________________________________ -->
  
 <h4>
@@ -3494,7 +3829,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
    and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
    <tt>nsw</tt> keywords are present, the result value of the <tt>add</tt>
-   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
    respectively, occurs.</p>
 
 <h5>Example:</h5>
@@ -3575,7 +3910,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
    and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
    <tt>nsw</tt> keywords are present, the result value of the <tt>sub</tt>
-   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
    respectively, occurs.</p>
 
 <h5>Example:</h5>
@@ -3662,7 +3997,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
    and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
    <tt>nsw</tt> keywords are present, the result value of the <tt>mul</tt>
-   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
    respectively, occurs.</p>
 
 <h5>Example:</h5>
@@ -3732,7 +4067,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <p>Division by zero leads to undefined behavior.</p>
 
 <p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>udiv</tt> is a <a href="#trapvalues">trap value</a> if %op1 is not a
+   <tt>udiv</tt> is a <a href="#poisonvalues">poison value</a> if %op1 is not a
   multiple of %op2 (as such, "((a udiv exact b) mul b) == a").</p>
 
 
@@ -3776,7 +4111,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
    a 32-bit division of -2147483648 by -1.</p>
 
 <p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>sdiv</tt> is a <a href="#trapvalues">trap value</a> if the result would
+   <tt>sdiv</tt> is a <a href="#poisonvalues">poison value</a> if the result would
    be rounded.</p>
 
 <h5>Example:</h5>
@@ -3985,9 +4320,9 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
    shift amount in <tt>op2</tt>.</p>
 
 <p>If the <tt>nuw</tt> keyword is present, then the shift produces a 
-   <a href="#trapvalues">trap value</a> if it shifts out any non-zero bits.  If
+   <a href="#poisonvalues">poison value</a> if it shifts out any non-zero bits.  If
    the <tt>nsw</tt> keyword is present, then the shift produces a
-   <a href="#trapvalues">trap value</a> if it shifts out any bits that disagree
+   <a href="#poisonvalues">poison value</a> if it shifts out any bits that disagree
    with the resultant sign bit.  As such, NUW/NSW have the same semantics as
    they would if the shift were expressed as a mul instruction with the same
    nsw/nuw bits in (mul %op1, (shl 1, %op2)).</p>
@@ -4034,7 +4369,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
    shift amount in <tt>op2</tt>.</p>
 
 <p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>lshr</tt> is a <a href="#trapvalues">trap value</a> if any of the bits
+   <tt>lshr</tt> is a <a href="#poisonvalues">poison value</a> if any of the bits
    shifted out are non-zero.</p>
 
 
@@ -4082,7 +4417,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
    the corresponding shift amount in <tt>op2</tt>.</p>
 
 <p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>ashr</tt> is a <a href="#trapvalues">trap value</a> if any of the bits
+   <tt>ashr</tt> is a <a href="#poisonvalues">poison value</a> if any of the bits
    shifted out are non-zero.</p>
 
 <h5>Example:</h5>
@@ -4124,9 +4459,9 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <table border="1" cellspacing="0" cellpadding="4">
   <tbody>
     <tr>
-      <td>In0</td>
-      <td>In1</td>
-      <td>Out</td>
+      <th>In0</th>
+      <th>In1</th>
+      <th>Out</th>
     </tr>
     <tr>
       <td>0</td>
@@ -4185,9 +4520,9 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <table border="1" cellspacing="0" cellpadding="4">
   <tbody>
     <tr>
-      <td>In0</td>
-      <td>In1</td>
-      <td>Out</td>
+      <th>In0</th>
+      <th>In1</th>
+      <th>Out</th>
     </tr>
     <tr>
       <td>0</td>
@@ -4249,9 +4584,9 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 <table border="1" cellspacing="0" cellpadding="4">
   <tbody>
     <tr>
-      <td>In0</td>
-      <td>In1</td>
-      <td>Out</td>
+      <th>In0</th>
+      <th>In1</th>
+      <th>Out</th>
     </tr>
     <tr>
       <td>0</td>
@@ -4568,8 +4903,12 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
    '<tt>alloca</tt>' instruction is commonly used to represent automatic
    variables that must have an address available.  When the function returns
    (either with the <tt><a href="#i_ret">ret</a></tt>
-   or <tt><a href="#i_unwind">unwind</a></tt> instructions), the memory is
-   reclaimed.  Allocating zero bytes is legal, but the result is undefined.</p>
+   or <tt><a href="#i_resume">resume</a></tt> instructions), the memory is
+   reclaimed.  Allocating zero bytes is legal, but the result is undefined.
+   The order in which memory is allocated (ie., which way the stack grows) is
+   not specified.</p>
+
+<p>
 
 <h5>Example:</h5>
 <pre>
@@ -4590,7 +4929,7 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 
 <h5>Syntax:</h5>
 <pre>
-  &lt;result&gt; = load [volatile] &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]
+  &lt;result&gt; = load [volatile] &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;][, !invariant.load !&lt;index&gt;]
   &lt;result&gt; = load atomic [volatile] &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;
   !&lt;index&gt; = !{ i32 1 }
 </pre>
@@ -4635,6 +4974,14 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
    The code generator may select special instructions to save cache bandwidth,
    such as the <tt>MOVNT</tt> instruction on x86.</p>
 
+<p>The optional <tt>!invariant.load</tt> metadata must reference a single
+   metatadata name &lt;index&gt; corresponding to a metadata node with no
+   entries.  The existence of the <tt>!invariant.load</tt> metatadata on the
+   instruction tells the optimizer and code generator that this load address
+   points to memory which does not change value during program execution.
+   The optimizer may then move this load around, for example, by hoisting it
+   out of loops using loop invariant code motion.</p>
+
 <h5>Semantics:</h5>
 <p>The location of memory pointed to is loaded.  If the value being loaded is of
    scalar type then the number of bytes read does not exceed the minimum number
@@ -4662,8 +5009,8 @@ that the invoke/unwind semantics are likely to change in future versions.</p>
 
 <h5>Syntax:</h5>
 <pre>
-  store [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]                   <i>; yields {void}</i>
-  store atomic [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;             <i>; yields {void}</i>
+  store [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]        <i>; yields {void}</i>
+  store atomic [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;  <i>; yields {void}</i>
 </pre>
 
 <h5>Overview:</h5>
@@ -4792,7 +5139,7 @@ thread.  (This is useful for interacting with signal handlers.)</p>
 
 <h5>Syntax:</h5>
 <pre>
-  cmpxchg [volatile] &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;cmp&gt;, &lt;ty&gt; &lt;new&gt; [singlethread] &lt;ordering&gt;                   <i>; yields {ty}</i>
+  cmpxchg [volatile] &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;cmp&gt;, &lt;ty&gt; &lt;new&gt; [singlethread] &lt;ordering&gt;  <i>; yields {ty}</i>
 </pre>
 
 <h5>Overview:</h5>
@@ -4850,13 +5197,13 @@ FIXME: Is a weaker ordering constraint on failure helpful in practice?
 <h5>Example:</h5>
 <pre>
 entry:
-  %orig = atomic <a href="#i_load">load</a> i32* %ptr unordered                       <i>; yields {i32}</i>
+  %orig = atomic <a href="#i_load">load</a> i32* %ptr unordered                   <i>; yields {i32}</i>
   <a href="#i_br">br</a> label %loop
 
 loop:
   %cmp = <a href="#i_phi">phi</a> i32 [ %orig, %entry ], [%old, %loop]
   %squared = <a href="#i_mul">mul</a> i32 %cmp, %cmp
-  %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared                       <i>; yields {i32}</i>
+  %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared          <i>; yields {i32}</i>
   %success = <a href="#i_icmp">icmp</a> eq i32 %cmp, %old
   <a href="#i_br">br</a> i1 %success, label %done, label %loop
 
@@ -4948,6 +5295,7 @@ specified by the <var>operation</var> argument:</p>
 <pre>
   &lt;result&gt; = getelementptr &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
   &lt;result&gt; = getelementptr inbounds &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
+  &lt;result&gt; = getelementptr &lt;ptr vector&gt; ptrval, &lt;vector index type&gt; idx 
 </pre>
 
 <h5>Overview:</h5>
@@ -4956,7 +5304,8 @@ specified by the <var>operation</var> argument:</p>
    It performs address calculation only and does not access memory.</p>
 
 <h5>Arguments:</h5>
-<p>The first argument is always a pointer, and forms the basis of the
+<p>The first argument is always a pointer or a vector of pointers,
+   and forms the basis of the
    calculation. The remaining arguments are indices that indicate which of the
    elements of the aggregate object are indexed. The interpretation of each
    index is dependent on the type being indexed into. The first index always
@@ -4994,54 +5343,57 @@ int *foo(struct ST *s) {
 }
 </pre>
 
-<p>The LLVM code generated by the GCC frontend is:</p>
+<p>The LLVM code generated by Clang is:</p>
 
 <pre class="doc_code">
-%RT = <a href="#namedtypes">type</a> { i8 , [10 x [20 x i32]], i8  }
-%ST = <a href="#namedtypes">type</a> { i32, double, %RT }
+%struct.RT = <a href="#namedtypes">type</a> { i8, [10 x [20 x i32]], i8 }
+%struct.ST = <a href="#namedtypes">type</a> { i32, double, %struct.RT }
 
-define i32* @foo(%ST* %s) {
+define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp {
 entry:
-  %reg = getelementptr %ST* %s, i32 1, i32 2, i32 1, i32 5, i32 13
-  ret i32* %reg
+  %arrayidx = getelementptr inbounds %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13
+  ret i32* %arrayidx
 }
 </pre>
 
 <h5>Semantics:</h5>
-<p>In the example above, the first index is indexing into the '<tt>%ST*</tt>'
-   type, which is a pointer, yielding a '<tt>%ST</tt>' = '<tt>{ i32, double, %RT
-   }</tt>' type, a structure.  The second index indexes into the third element
-   of the structure, yielding a '<tt>%RT</tt>' = '<tt>{ i8 , [10 x [20 x i32]],
-   i8 }</tt>' type, another structure.  The third index indexes into the second
-   element of the structure, yielding a '<tt>[10 x [20 x i32]]</tt>' type, an
-   array.  The two dimensions of the array are subscripted into, yielding an
-   '<tt>i32</tt>' type.  The '<tt>getelementptr</tt>' instruction returns a
-   pointer to this element, thus computing a value of '<tt>i32*</tt>' type.</p>
+<p>In the example above, the first index is indexing into the
+   '<tt>%struct.ST*</tt>' type, which is a pointer, yielding a
+   '<tt>%struct.ST</tt>' = '<tt>{ i32, double, %struct.RT }</tt>' type, a
+   structure. The second index indexes into the third element of the structure,
+   yielding a '<tt>%struct.RT</tt>' = '<tt>{ i8 , [10 x [20 x i32]], i8 }</tt>'
+   type, another structure. The third index indexes into the second element of
+   the structure, yielding a '<tt>[10 x [20 x i32]]</tt>' type, an array. The
+   two dimensions of the array are subscripted into, yielding an '<tt>i32</tt>'
+   type. The '<tt>getelementptr</tt>' instruction returns a pointer to this
+   element, thus computing a value of '<tt>i32*</tt>' type.</p>
 
 <p>Note that it is perfectly legal to index partially through a structure,
    returning a pointer to an inner element.  Because of this, the LLVM code for
    the given testcase is equivalent to:</p>
 
-<pre>
-  define i32* @foo(%ST* %s) {
-    %t1 = getelementptr %ST* %s, i32 1                        <i>; yields %ST*:%t1</i>
-    %t2 = getelementptr %ST* %t1, i32 0, i32 2                <i>; yields %RT*:%t2</i>
-    %t3 = getelementptr %RT* %t2, i32 0, i32 1                <i>; yields [10 x [20 x i32]]*:%t3</i>
-    %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5  <i>; yields [20 x i32]*:%t4</i>
-    %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13        <i>; yields i32*:%t5</i>
-    ret i32* %t5
-  }
+<pre class="doc_code">
+define i32* @foo(%struct.ST* %s) {
+  %t1 = getelementptr %struct.ST* %s, i32 1                 <i>; yields %struct.ST*:%t1</i>
+  %t2 = getelementptr %struct.ST* %t1, i32 0, i32 2         <i>; yields %struct.RT*:%t2</i>
+  %t3 = getelementptr %struct.RT* %t2, i32 0, i32 1         <i>; yields [10 x [20 x i32]]*:%t3</i>
+  %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5  <i>; yields [20 x i32]*:%t4</i>
+  %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13        <i>; yields i32*:%t5</i>
+  ret i32* %t5
+}
 </pre>
 
 <p>If the <tt>inbounds</tt> keyword is present, the result value of the
-   <tt>getelementptr</tt> is a <a href="#trapvalues">trap value</a> if the
+   <tt>getelementptr</tt> is a <a href="#poisonvalues">poison value</a> if the
    base pointer is not an <i>in bounds</i> address of an allocated object,
    or if any of the addresses that would be formed by successive addition of
    the offsets implied by the indices to the base address with infinitely
    precise signed arithmetic are not an <i>in bounds</i> address of that
    allocated object. The <i>in bounds</i> addresses for an allocated object
    are all the addresses that point into the object, plus the address one
-   byte past the end.</p>
+   byte past the end.
+   In cases where the base is a vector of pointers the <tt>inbounds</tt> keyword
+   applies to each of the computations element-wise. </p>
 
 <p>If the <tt>inbounds</tt> keyword is not present, the offsets are added to
    the base address with silently-wrapping two's complement arithmetic. If the
@@ -5068,6 +5420,13 @@ entry:
     %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
 </pre>
 
+<p>In cases where the pointer argument is a vector of pointers, only a
+   single index may be used, and the number of vector elements has to be
+   the same.  For example: </p>
+<pre class="doc_code">
+ %A = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets,
+</pre>
+
 </div>
 
 </div>
@@ -5440,13 +5799,16 @@ entry:
 </pre>
 
 <h5>Overview:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction converts the pointer <tt>value</tt> to
-   the integer type <tt>ty2</tt>.</p>
+<p>The '<tt>ptrtoint</tt>' instruction converts the pointer or a vector of
+   pointers <tt>value</tt> to
+   the integer (or vector of integers) type <tt>ty2</tt>.</p>
 
 <h5>Arguments:</h5>
 <p>The '<tt>ptrtoint</tt>' instruction takes a <tt>value</tt> to cast, which
-   must be a <a href="#t_pointer">pointer</a> value, and a type to cast it to
-   <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> type.</p>
+   must be a a value of type <a href="#t_pointer">pointer</a> or a vector of
+    pointers, and a type to cast it to
+   <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> or a vector
+   of integers type.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>ptrtoint</tt>' instruction converts <tt>value</tt> to integer type
@@ -5459,8 +5821,9 @@ entry:
 
 <h5>Example:</h5>
 <pre>
-  %X = ptrtoint i32* %X to i8           <i>; yields truncation on 32-bit architecture</i>
-  %Y = ptrtoint i32* %x to i64          <i>; yields zero extension on 32-bit architecture</i>
+  %X = ptrtoint i32* %P to i8                         <i>; yields truncation on 32-bit architecture</i>
+  %Y = ptrtoint i32* %P to i64                        <i>; yields zero extension on 32-bit architecture</i>
+  %Z = ptrtoint &lt;4 x i32*&gt; %P to &lt;4 x i64&gt;<i>; yields vector zero extension for a vector of addresses on 32-bit architecture</i>
 </pre>
 
 </div>
@@ -5499,6 +5862,7 @@ entry:
   %X = inttoptr i32 255 to i32*          <i>; yields zero extension on 64-bit architecture</i>
   %Y = inttoptr i32 255 to i32*          <i>; yields no-op on 32-bit architecture</i>
   %Z = inttoptr i64 0 to i32*            <i>; yields truncation on 32-bit architecture</i>
+  %Z = inttoptr &lt;4 x i32&gt; %G to &lt;4 x i8*&gt;<i>; yields truncation of vector G to four pointers</i>
 </pre>
 
 </div>
@@ -5533,8 +5897,9 @@ entry:
 <p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
    <tt>ty2</tt>. It is always a <i>no-op cast</i> because no bits change with
    this conversion.  The conversion is done as if the <tt>value</tt> had been
-   stored to memory and read back as type <tt>ty2</tt>. Pointer types may only
-   be converted to other pointer types with this instruction. To convert
+   stored to memory and read back as type <tt>ty2</tt>.
+   Pointer (or vector of pointers) types may only be converted to other pointer
+   (or vector of pointers) types with this instruction. To convert
    pointers to other types, use the <a href="#i_inttoptr">inttoptr</a> or
    <a href="#i_ptrtoint">ptrtoint</a> instructions first.</p>
 
@@ -5542,7 +5907,8 @@ entry:
 <pre>
   %X = bitcast i8 255 to i8              <i>; yields i8 :-1</i>
   %Y = bitcast i32* %x to sint*          <i>; yields sint*:%x</i>
-  %Z = bitcast &lt;2 x int&gt; %V to i64;      <i>; yields i64: %V</i>
+  %Z = bitcast &lt;2 x int&gt; %V to i64;        <i>; yields i64: %V</i>
+  %Z = bitcast &lt;2 x i32*&gt; %V to &lt;2 x i64*&gt; <i>; yields &lt;2 x i64*&gt;</i>
 </pre>
 
 </div>
@@ -5573,8 +5939,8 @@ entry:
 
 <h5>Overview:</h5>
 <p>The '<tt>icmp</tt>' instruction returns a boolean value or a vector of
-   boolean values based on comparison of its two integer, integer vector, or
-   pointer operands.</p>
+   boolean values based on comparison of its two integer, integer vector,
+   pointer, or pointer vector operands.</p>
 
 <h5>Arguments:</h5>
 <p>The '<tt>icmp</tt>' instruction takes three operands. The first operand is
@@ -5869,9 +6235,6 @@ Loop:       ; Infinite loop that counts from 0 on up...
   %X = select i1 true, i8 17, i8 42          <i>; yields i8:17</i>
 </pre>
 
-<p>Note that the code generator does not yet support conditions
-   with vector type.</p>
-
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -6038,8 +6401,8 @@ freestanding environments and non-C-based languages.</p>
 
 <h5>Syntax:</h5>
 <pre>
-  &lt;resultval&gt; = landingpad &lt;somety&gt; personality &lt;type&gt; &lt;pers_fn&gt; &lt;clause&gt;+
-  &lt;resultval&gt; = landingpad &lt;somety&gt; personality &lt;type&gt; &lt;pers_fn&gt; cleanup &lt;clause&gt;*
+  &lt;resultval&gt; = landingpad &lt;resultty&gt; personality &lt;type&gt; &lt;pers_fn&gt; &lt;clause&gt;+
+  &lt;resultval&gt; = landingpad &lt;resultty&gt; personality &lt;type&gt; &lt;pers_fn&gt; cleanup &lt;clause&gt;*
 
   &lt;clause&gt; := catch &lt;type&gt; &lt;value&gt;
   &lt;clause&gt; := filter &lt;array constant type&gt; &lt;array constant&gt;
@@ -6053,7 +6416,7 @@ freestanding environments and non-C-based languages.</p>
    <i><tt>catch</tt></i> portion of a <i><tt>try/catch</tt></i> sequence. It
    defines values supplied by the personality function (<tt>pers_fn</tt>) upon
    re-entry to the function. The <tt>resultval</tt> has the
-   type <tt>somety</tt>.</p>
+   type <tt>resultty</tt>.</p>
 
 <h5>Arguments:</h5>
 <p>This instruction takes a <tt>pers_fn</tt> value. This is the personality
@@ -6077,7 +6440,11 @@ freestanding environments and non-C-based languages.</p>
 
 <p>The clauses are applied in order from top to bottom. If two
    <tt>landingpad</tt> instructions are merged together through inlining, the
-   clauses from the calling function are appended to the list of clauses.</p>
+   clauses from the calling function are appended to the list of clauses.
+   When the call stack is being unwound due to an exception being thrown, the
+   exception is compared against each <tt>clause</tt> in turn.  If it doesn't
+   match any of the clauses, and the <tt>cleanup</tt> flag is not set, then
+   unwinding continues further up the call stack.</p>
 
 <p>The <tt>landingpad</tt> instruction has several restrictions:</p>
 
@@ -7194,12 +7561,12 @@ LLVM</a>.</p>
    targets support all bit widths or vector types, however.</p>
 
 <pre>
-  declare i8 @llvm.ctlz.i8 (i8  &lt;src&gt;)
-  declare i16 @llvm.ctlz.i16(i16 &lt;src&gt;)
-  declare i32 @llvm.ctlz.i32(i32 &lt;src&gt;)
-  declare i64 @llvm.ctlz.i64(i64 &lt;src&gt;)
-  declare i256 @llvm.ctlz.i256(i256 &lt;src&gt;)
-  declare &lt;2 x i32&gt; @llvm.ctlz.v2i32(&lt;2 x i32&gt; &lt;src;gt)
+  declare i8   @llvm.ctlz.i8  (i8   &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i16  @llvm.ctlz.i16 (i16  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i32  @llvm.ctlz.i32 (i32  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i64  @llvm.ctlz.i64 (i64  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i256 @llvm.ctlz.i256(i256 &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declase &lt;2 x i32&gt; @llvm.ctlz.v2i32(&lt;2 x i32&gt; &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -7207,15 +7574,22 @@ LLVM</a>.</p>
    leading zeros in a variable.</p>
 
 <h5>Arguments:</h5>
-<p>The only argument is the value to be counted.  The argument may be of any
-   integer type, or any vector type with integer element type.
-   The return type must match the argument type.</p>
+<p>The first argument is the value to be counted. This argument may be of any
+   integer type, or a vectory with integer element type. The return type
+   must match the first argument type.</p>
+
+<p>The second argument must be a constant and is a flag to indicate whether the
+   intrinsic should ensure that a zero as the first argument produces a defined
+   result. Historically some architectures did not provide a defined result for
+   zero values as efficiently, and many algorithms are now predicated on
+   avoiding zero-value inputs.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>llvm.ctlz</tt>' intrinsic counts the leading (most significant)
-   zeros in a variable, or within each element of the vector if the operation
-   is of vector type.  If the src == 0 then the result is the size in bits of
-   the type of src. For example, <tt>llvm.ctlz(i32 2) = 30</tt>.</p>
+   zeros in a variable, or within each element of the vector.
+   If <tt>src == 0</tt> then the result is the size in bits of the type of
+   <tt>src</tt> if <tt>is_zero_undef == 0</tt> and <tt>undef</tt> otherwise.
+   For example, <tt>llvm.ctlz(i32 2) = 30</tt>.</p>
 
 </div>
 
@@ -7232,12 +7606,12 @@ LLVM</a>.</p>
    support all bit widths or vector types, however.</p>
 
 <pre>
-  declare i8 @llvm.cttz.i8 (i8  &lt;src&gt;)
-  declare i16 @llvm.cttz.i16(i16 &lt;src&gt;)
-  declare i32 @llvm.cttz.i32(i32 &lt;src&gt;)
-  declare i64 @llvm.cttz.i64(i64 &lt;src&gt;)
-  declare i256 @llvm.cttz.i256(i256 &lt;src&gt;)
-  declase &lt;2 x i32&gt; @llvm.cttz.v2i32(&lt;2 x i32&gt; &lt;src&gt;)
+  declare i8   @llvm.cttz.i8  (i8   &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i16  @llvm.cttz.i16 (i16  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i32  @llvm.cttz.i32 (i32  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i64  @llvm.cttz.i64 (i64  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declare i256 @llvm.cttz.i256(i256 &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
+  declase &lt;2 x i32&gt; @llvm.cttz.v2i32(&lt;2 x i32&gt; &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -7245,15 +7619,22 @@ LLVM</a>.</p>
    trailing zeros.</p>
 
 <h5>Arguments:</h5>
-<p>The only argument is the value to be counted.  The argument may be of any
-   integer type, or a vectory with integer element type..  The return type
-   must match the argument type.</p>
+<p>The first argument is the value to be counted. This argument may be of any
+   integer type, or a vectory with integer element type. The return type
+   must match the first argument type.</p>
+
+<p>The second argument must be a constant and is a flag to indicate whether the
+   intrinsic should ensure that a zero as the first argument produces a defined
+   result. Historically some architectures did not provide a defined result for
+   zero values as efficiently, and many algorithms are now predicated on
+   avoiding zero-value inputs.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>llvm.cttz</tt>' intrinsic counts the trailing (least significant)
    zeros in a variable, or within each element of a vector.
-   If the src == 0 then the result is the size in bits of
-   the type of src.  For example, <tt>llvm.cttz(2) = 1</tt>.</p>
+   If <tt>src == 0</tt> then the result is the size in bits of the type of
+   <tt>src</tt> if <tt>is_zero_undef == 0</tt> and <tt>undef</tt> otherwise.
+   For example, <tt>llvm.cttz(2) = 1</tt>.</p>
 
 </div>
 
@@ -8086,11 +8467,35 @@ LLVM</a>.</p>
    compile time.</p>
 
 </div>
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="int_expect">'<tt>llvm.expect</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<pre>
+  declare i32 @llvm.expect.i32(i32 &lt;val&gt;, i32 &lt;expected_val&gt;)
+  declare i64 @llvm.expect.i64(i64 &lt;val&gt;, i64 &lt;expected_val&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The <tt>llvm.expect</tt> intrinsic provides information about expected (the
+   most probable) value of <tt>val</tt>, which can be used by optimizers.</p>
 
+<h5>Arguments:</h5>
+<p>The <tt>llvm.expect</tt> intrinsic takes two arguments. The first
+   argument is a value. The second argument is an expected value, this needs to
+   be a constant value, variables are not allowed.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic is lowered to the <tt>val</tt>.</p>
 </div>
 
 </div>
 
+</div>
 <!-- *********************************************************************** -->
 <hr>
 <address>
@@ -8101,7 +8506,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:54 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2012-04-10 10:22:43 +0200 (Tue, 10 Apr 2012) $
 </address>
 
 </body>
diff --git a/docs/Lexicon.html b/docs/Lexicon.html
index e12041d0a89e..dbb7f9b15b6b 100644
--- a/docs/Lexicon.html
+++ b/docs/Lexicon.html
@@ -275,7 +275,7 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
   </dl>
 </div>
 
-</div>
+</div>  
 <!-- *********************************************************************** -->
 <hr>
 <address> <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
@@ -284,7 +284,7 @@ href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a><a
  href="http://llvm.org/">The LLVM Team</a><br>
 <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-Last modified: $Date: 2011-09-27 20:44:01 +0200 (Tue, 27 Sep 2011) $
+Last modified: $Date: 2012-01-05 09:18:41 +0100 (Thu, 05 Jan 2012) $
 </address>
 <!-- vim: sw=2
 -->
diff --git a/docs/LinkTimeOptimization.html b/docs/LinkTimeOptimization.html
index 63403ca5f636..56525554c9da 100644
--- a/docs/LinkTimeOptimization.html
+++ b/docs/LinkTimeOptimization.html
@@ -393,7 +393,7 @@ of the native object files.</p>
 
   Devang Patel and Nick Kledzik<br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2011-10-31 12:21:59 +0100 (Mon, 31 Oct 2011) $
 </address>
 
 </body>
diff --git a/docs/Packaging.html b/docs/Packaging.html
index e3cdf7911aec..ac4dcf066acf 100644
--- a/docs/Packaging.html
+++ b/docs/Packaging.html
@@ -113,7 +113,7 @@ line numbers.</dd>
   <a href="http://validator.w3.org/check/referer"><img
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2011-10-31 12:21:59 +0100 (Mon, 31 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/Passes.html b/docs/Passes.html
index 96d1aeefd4d3..37a304d81de4 100644
--- a/docs/Passes.html
+++ b/docs/Passes.html
@@ -126,6 +126,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 <tr><td><a href="#adce">-adce</a></td><td>Aggressive Dead Code Elimination</td></tr>
 <tr><td><a href="#always-inline">-always-inline</a></td><td>Inliner for always_inline functions</td></tr>
 <tr><td><a href="#argpromotion">-argpromotion</a></td><td>Promote 'by reference' arguments to scalars</td></tr>
+<tr><td><a href="#bb-vectorize">-bb-vectorize</a></td><td>Combine instructions to form vector instructions within basic blocks</td></tr>
 <tr><td><a href="#block-placement">-block-placement</a></td><td>Profile Guided Basic Block Placement</td></tr>
 <tr><td><a href="#break-crit-edges">-break-crit-edges</a></td><td>Break critical edges in CFG</td></tr>
 <tr><td><a href="#codegenprepare">-codegenprepare</a></td><td>Optimize for code generation</td></tr>
@@ -817,6 +818,26 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !
 
 <!-------------------------------------------------------------------------- -->
 <h3>
+  <a name="bb-vectorize">-bb-vectorize: Basic-Block Vectorization</a>
+</h3>
+<div>
+  <p>This pass combines instructions inside basic blocks to form vector
+  instructions. It iterates over each basic block, attempting to pair
+  compatible instructions, repeating this process until no additional
+  pairs are selected for vectorization. When the outputs of some pair
+  of compatible instructions are used as inputs by some other pair of
+  compatible instructions, those pairs are part of a potential
+  vectorization chain. Instruction pairs are only fused into vector
+  instructions when they are part of a chain longer than some
+  threshold length. Moreover, the pass attempts to find the best
+  possible chain for each pair of compatible instructions. These
+  heuristics are intended to prevent vectorization in cases where
+  it would not yield a performance increase of the resulting code.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<h3>
   <a name="block-placement">-block-placement: Profile Guided Basic Block Placement</a>
 </h3>
 <div>
@@ -2039,7 +2060,7 @@ if (X &lt; 3) {</pre>
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-04 07:30:50 +0100 (Fri, 04 Nov 2011) $
+  Last modified: $Date: 2012-02-01 04:51:43 +0100 (Wed, 01 Feb 2012) $
 </address>
 
 </body>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index 47fc6e99b276..951e2d5766d5 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -81,11 +81,13 @@ option</a></li>
       <li><a href="#dss_smallset">"llvm/ADT/SmallSet.h"</a></li>
       <li><a href="#dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a></li>
       <li><a href="#dss_denseset">"llvm/ADT/DenseSet.h"</a></li>
+      <li><a href="#dss_sparseset">"llvm/ADT/SparseSet.h"</a></li>
       <li><a href="#dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a></li>
       <li><a href="#dss_set">&lt;set&gt;</a></li>
       <li><a href="#dss_setvector">"llvm/ADT/SetVector.h"</a></li>
       <li><a href="#dss_uniquevector">"llvm/ADT/UniqueVector.h"</a></li>
-      <li><a href="#dss_otherset">Other Set-Like ContainerOptions</a></li>
+      <li><a href="#dss_immutableset">"llvm/ADT/ImmutableSet.h"</a></li>
+      <li><a href="#dss_otherset">Other Set-Like Container Options</a></li>
     </ul></li>
     <li><a href="#ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
     <ul>
@@ -97,6 +99,7 @@ option</a></li>
       <li><a href="#dss_intervalmap">"llvm/ADT/IntervalMap.h"</a></li>
       <li><a href="#dss_map">&lt;map&gt;</a></li>
       <li><a href="#dss_inteqclasses">"llvm/ADT/IntEqClasses.h"</a></li>
+      <li><a href="#dss_immutablemap">"llvm/ADT/ImmutableMap.h"</a></li>
       <li><a href="#dss_othermap">Other Map-Like Container Options</a></li>
     </ul></li>
     <li><a href="#ds_bit">BitVector-like containers</a>
@@ -995,7 +998,7 @@ vector is also useful when interfacing with code that expects vectors :).
 <pre>
 for ( ... ) {
    std::vector&lt;foo&gt; V;
-   use V;
+   // make use of V.
 }
 </pre>
 </div>
@@ -1006,7 +1009,7 @@ for ( ... ) {
 <pre>
 std::vector&lt;foo&gt; V;
 for ( ... ) {
-   use V;
+   // make use of V.
    V.clear();
 }
 </pre>
@@ -1488,6 +1491,24 @@ href="#dss_densemap">DenseMap</a> has.
 
 <!-- _______________________________________________________________________ -->
 <h4>
+  <a name="dss_sparseset">"llvm/ADT/SparseSet.h"</a>
+</h4>
+
+<div>
+
+<p>SparseSet holds a small number of objects identified by unsigned keys of
+moderate size. It uses a lot of memory, but provides operations that are
+almost as fast as a vector. Typical keys are physical registers, virtual
+registers, or numbered basic blocks.</p>
+
+<p>SparseSet is useful for algorithms that need very fast clear/find/insert/erase
+and fast iteration over small sets.  It is not intended for building composite
+data structures.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
   <a name="dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a>
 </h4>
 
@@ -1608,6 +1629,29 @@ factors, and produces a lot of malloc traffic.  It should be avoided.</p>
 
 </div>
 
+<!-- _______________________________________________________________________ -->
+<h4>
+  <a name="dss_immutableset">"llvm/ADT/ImmutableSet.h"</a>
+</h4>
+
+<div>
+
+<p>
+ImmutableSet is an immutable (functional) set implementation based on an AVL
+tree.
+Adding or removing elements is done through a Factory object and results in the
+creation of a new ImmutableSet object.
+If an ImmutableSet already exists with the given contents, then the existing one
+is returned; equality is compared with a FoldingSetNodeID.
+The time and space complexity of add or remove operations is logarithmic in the
+size of the original set.
+
+<p>
+There is no method for returning an element of the set, you can only check for
+membership.
+
+</div>
+
 
 <!-- _______________________________________________________________________ -->
 <h4>
@@ -1728,7 +1772,7 @@ pointers, or map other small types to each other.
 
 <p>
 There are several aspects of DenseMap that you should be aware of, however.  The
-iterators in a densemap are invalidated whenever an insertion occurs, unlike
+iterators in a DenseMap are invalidated whenever an insertion occurs, unlike
 map.  Also, because DenseMap allocates space for a large number of key/value
 pairs (it starts with 64 by default), it will waste a lot of space if your keys
 or values are large.  Finally, you must implement a partial specialization of
@@ -1736,6 +1780,14 @@ DenseMapInfo for the key that you want, if it isn't already supported.  This
 is required to tell DenseMap about two special marker values (which can never be
 inserted into the map) that it needs internally.</p>
 
+<p>
+DenseMap's find_as() method supports lookup operations using an alternate key
+type. This is useful in cases where the normal key type is expensive to
+construct, but cheap to compare against. The DenseMapInfo is responsible for
+defining the appropriate comparison and hashing methods for each alternate
+key type used.
+</p>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -1814,6 +1866,25 @@ it can be edited again.</p>
 
 <!-- _______________________________________________________________________ -->
 <h4>
+  <a name="dss_immutablemap">"llvm/ADT/ImmutableMap.h"</a>
+</h4>
+
+<div>
+
+<p>
+ImmutableMap is an immutable (functional) map implementation based on an AVL
+tree.
+Adding or removing elements is done through a Factory object and results in the
+creation of a new ImmutableMap object.
+If an ImmutableMap already exists with the given key set, then the existing one
+is returned; equality is compared with a FoldingSetNodeID.
+The time and space complexity of add or remove operations is logarithmic in the
+size of the original map.
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
   <a name="dss_othermap">Other Map-Like Container Options</a>
 </h4>
 
@@ -2496,7 +2567,7 @@ block but not delete it, you can use the <tt>removeFromParent()</tt> method.</p>
 
 <div>
 
-<p><i>Replacing individual instructions</i></p>
+<h5><i>Replacing individual instructions</i></h5>
 
 <p>Including "<a href="/doxygen/BasicBlockUtils_8h-source.html">llvm/Transforms/Utils/BasicBlockUtils.h</a>"
 permits use of two very useful replace functions: <tt>ReplaceInstWithValue</tt>
@@ -2504,6 +2575,7 @@ and <tt>ReplaceInstWithInst</tt>.</p>
 
 <h5><a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a></h5>
 
+<div>
 <ul>
   <li><tt>ReplaceInstWithValue</tt>
 
@@ -2540,7 +2612,9 @@ ReplaceInstWithInst(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
 </pre></div></li>
 </ul>
 
-<p><i>Replacing multiple uses of <tt>User</tt>s and <tt>Value</tt>s</i></p>
+</div>
+
+<h5><i>Replacing multiple uses of <tt>User</tt>s and <tt>Value</tt>s</i></h5>
 
 <p>You can use <tt>Value::replaceAllUsesWith</tt> and
 <tt>User::replaceUsesOfWith</tt> to change more than one use at a time.  See the
@@ -3234,13 +3308,12 @@ helpful member functions that try to make common operations easy.</p>
 <div>
 
 <ul>
-  <li><tt>Module::Module(std::string name = "")</tt></li>
-</ul>
+  <li><tt>Module::Module(std::string name = "")</tt>
 
-<p>Constructing a <a href="#Module">Module</a> is easy. You can optionally
+  <p>Constructing a <a href="#Module">Module</a> is easy. You can optionally
 provide a name for it (probably based on the name of the translation unit).</p>
+  </li>
 
-<ul>
   <li><tt>Module::iterator</tt> - Typedef for function list iterator<br>
     <tt>Module::const_iterator</tt> - Typedef for const_iterator.<br>
 
@@ -4052,7 +4125,7 @@ arguments. An argument has a pointer to the parent Function.</p>
   <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:54 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2012-03-27 13:25:16 +0200 (Tue, 27 Mar 2012) $
 </address>
 
 </body>
diff --git a/docs/Projects.html b/docs/Projects.html
index a3d68911d934..ebd72031790b 100644
--- a/docs/Projects.html
+++ b/docs/Projects.html
@@ -482,7 +482,7 @@ Mailing List</a>.</p>
   <a href="mailto:criswell@uiuc.edu">John Criswell</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2011-10-31 12:21:59 +0100 (Mon, 31 Oct 2011) $
 </address>
 
 </body>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 56d0eb914839..bcac29395316 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -4,20 +4,22 @@
 <head>
   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <link rel="stylesheet" href="llvm.css" type="text/css">
-  <title>LLVM 3.0 Release Notes</title>
+  <title>LLVM 3.1 Release Notes</title>
 </head>
 <body>
 
-<h1>LLVM 3.0 Release Notes</h1>
+<h1>LLVM 3.1 Release Notes</h1>
 
-<img align=right src="http://llvm.org/img/DragonSmall.png"
-    width="136" height="136" alt="LLVM Dragon Logo">
+<div>
+<img style="float:right" src="http://llvm.org/img/DragonSmall.png"
+     width="136" height="136" alt="LLVM Dragon Logo">
+</div>
 
 <ol>
   <li><a href="#intro">Introduction</a></li>
   <li><a href="#subproj">Sub-project Status Update</a></li>
-  <li><a href="#externalproj">External Projects Using LLVM 3.0</a></li>
-  <li><a href="#whatsnew">What's New in LLVM 3.0?</a></li>
+  <li><a href="#externalproj">External Projects Using LLVM 3.1</a></li>
+  <li><a href="#whatsnew">What's New in LLVM?</a></li>
   <li><a href="GettingStarted.html">Installation Instructions</a></li>
   <li><a href="#knownproblems">Known Problems</a></li>
   <li><a href="#additionalinfo">Additional Information</a></li>
@@ -27,13 +29,11 @@
   <p>Written by the <a href="http://llvm.org/">LLVM Team</a></p>
 </div>
 
-<!--
-<h1 style="color:red">These are in-progress notes for the upcoming LLVM 3.0
+<h1 style="color:red">These are in-progress notes for the upcoming LLVM 3.1
 release.<br>
 You may prefer the
-<a href="http://llvm.org/releases/2.9/docs/ReleaseNotes.html">LLVM 2.9
+<a href="http://llvm.org/releases/3.0/docs/ReleaseNotes.html">LLVM 3.0
 Release Notes</a>.</h1>
- -->
 
 <!-- *********************************************************************** -->
 <h2>
@@ -44,8 +44,9 @@ Release Notes</a>.</h1>
 <div>
 
 <p>This document contains the release notes for the LLVM Compiler
-   Infrastructure, release 3.0.  Here we describe the status of LLVM, including
-   major improvements from the previous release and significant known problems.
+   Infrastructure, release 3.1.  Here we describe the status of LLVM, including
+   major improvements from the previous release, improvements in various
+   subprojects of LLVM, and some of the current users of the code.
    All LLVM releases may be downloaded from
    the <a href="http://llvm.org/releases/">LLVM releases web site</a>.</p>
 
@@ -61,16 +62,8 @@ Release Notes</a>.</h1>
    <a href="http://llvm.org/releases/">releases page</a>.</p>
 
 </div>
-   
-<!-- Features that need text if they're finished for 3.1:
-  ARM EHABI
-  combiner-aa?
-  strong phi elim
-  loop dependence analysis
-  CorrelatedValuePropagation
-  lib/Transforms/IPO/MergeFunctions.cpp => consider for 3.1.
- -->
- 
+
+
 <!-- *********************************************************************** -->
 <h2>
   <a name="subproj">Sub-project Status Update</a>
@@ -79,9 +72,9 @@ Release Notes</a>.</h1>
 
 <div>
 
-<p>The LLVM 3.0 distribution currently consists of code from the core LLVM
+<p>The LLVM 3.1 distribution currently consists of code from the core LLVM
    repository (which roughly includes the LLVM optimizers, code generators and
-   supporting tools), the Clang repository and the llvm-gcc repository.  In
+   supporting tools), and the Clang repository.  In
    addition to this code, the LLVM Project includes other sub-projects that are
    in development.  Here we include updates on these subprojects.</p>
 
@@ -99,37 +92,18 @@ Release Notes</a>.</h1>
    provides a modular, library-based architecture that makes it suitable for
    creating or integrating with other development tools. Clang is considered a
    production-quality compiler for C, Objective-C, C++ and Objective-C++ on x86
-   (32- and 64-bit), and for darwin/arm targets.</p>
-
-<p>In the LLVM 3.0 time-frame, the Clang team has made many improvements:</p>
+   (32- and 64-bit), and for Darwin/ARM targets.</p>
 
+<p>In the LLVM 3.1 time-frame, the Clang team has made many improvements:</p>
 <ul>
-  <li>Greatly improved support for building C++ applications, with greater
-      stability and better diagnostics.</li>
-  
-  <li><a href="http://clang.llvm.org/cxx_status.html">Improved support</a> for
-      the <a href="http://www.iso.org/iso/iso_catalogue/catalogue_tc/catalogue_detail.htm?csnumber=50372">C++
-      2011</a> standard, including implementations of non-static data member
-      initializers, alias templates, delegating constructors, the range-based
-      for loop, and implicitly-generated move constructors and move assignment
-      operators, among others.</li>
-
-  <li>Implemented support for some features of the upcoming C1x standard,
-      including static assertions and generic selections.</li>
-  
-  <li>Better detection of include and linking paths for system headers and
-      libraries, especially for Linux distributions.</li>
-
-  <li>Implemented support
-      for <a href="http://clang.llvm.org/docs/AutomaticReferenceCounting.html">Automatic
-      Reference Counting</a> for Objective-C.</li>
-
-  <li>Implemented a number of optimizations in <tt>libclang</tt>, the Clang C
-      interface, to improve the performance of code completion and the mapping
-      from source locations to abstract syntax tree nodes.</li>
+  <li>...</li>
 </ul>
 
-  
+  <p>For more details about the changes to Clang since the 2.9 release, see the
+<a href="http://clang.llvm.org/docs/ReleaseNotes.html">Clang release notes</a>
+</p>
+
+
 <p>If Clang rejects your code but another compiler accepts it, please take a
    look at the <a href="http://clang.llvm.org/compatibility.html">language
    compatibility</a> guide to make sure this is not intentional or a known
@@ -145,19 +119,18 @@ Release Notes</a>.</h1>
 <div>
 <p><a href="http://dragonegg.llvm.org/">DragonEgg</a> is a
    <a href="http://gcc.gnu.org/wiki/plugins">gcc plugin</a> that replaces GCC's
-   optimizers and code generators with LLVM's. Currently it requires a patched
-   version of gcc-4.5.  The plugin can target the x86-32 and x86-64 processor
-   families and has been used successfully on the Darwin, FreeBSD and Linux
-   platforms.  The Ada, C, C++ and Fortran languages work well.  The plugin is
-   capable of compiling plenty of Obj-C, Obj-C++ and Java but it is not known
-   whether the compiled code actually works or not!</p>
+   optimizers and code generators with LLVM's. It works with gcc-4.5 or gcc-4.6,
+   targets the x86-32 and x86-64 processor families, and has been successfully
+   used on the Darwin, FreeBSD, KFreeBSD, Linux and OpenBSD platforms.  It fully
+   supports Ada, C, C++ and Fortran.  It has partial support for Go, Java, Obj-C
+   and Obj-C++.</p>
 
-<p>The 3.0 release has the following notable changes:</p>
+<p>The 3.1 release has the following notable changes:</p>
+
+  <ul>
+
+  <li>...</li>
 
-<ul>
-<!--
-<li></li>
--->
 </ul>
 
 </div>
@@ -178,7 +151,7 @@ Release Notes</a>.</h1>
    implementations of this and other low-level routines (some are 3x faster than
    the equivalent libgcc routines).</p>
 
-<p>In the LLVM 3.0 timeframe,</p>
+<p>....</p>
 
 </div>
 
@@ -189,11 +162,12 @@ Release Notes</a>.</h1>
 
 <div>
 
-<p>LLDB has advanced by leaps and bounds in the 3.0 timeframe.  It is
-   dramatically more stable and useful, and includes both a
-   new <a href="http://lldb.llvm.org/tutorial.html">tutorial</a> and
-   a <a href="http://lldb.llvm.org/lldb-gdb.html">side-by-side comparison with
-   GDB</a>.</p>
+<p>LLDB is a ground-up implementation of a command line debugger, as well as a
+   debugger API that can be used from other applications.  LLDB makes use of the
+   Clang parser to provide high-fidelity expression parsing (particularly for
+   C++) and uses the LLVM JIT for target support.</p>
+
+<p>...</p>
 
 </div>
 
@@ -208,22 +182,7 @@ Release Notes</a>.</h1>
    licensed</a> under the MIT and UIUC license, allowing it to be used more
    permissively.</p>
 
-</div>
-
-
-<!--=========================================================================-->
-<h3>
-<a name="LLBrowse">LLBrowse: IR Browser</a>
-</h3>
-
-<div>
-
-<p><a href="http://llvm.org/svn/llvm-project/llbrowse/trunk/doc/LLBrowse.html">
-   LLBrowse</a> is an interactive viewer for LLVM modules. It can load any LLVM
-   module and displays its contents as an expandable tree view, facilitating an
-   easy way to inspect types, functions, global variables, or metadata nodes. It
-   is fully cross-platform, being based on the popular wxWidgets GUI
-   toolkit.</p>
+<p>...</p>
 
 </div>
 
@@ -234,39 +193,24 @@ Release Notes</a>.</h1>
 
 <div>
 
-<p>The <a href="http://vmkit.llvm.org/">VMKit project</a> is an implementation
-   of a Java Virtual Machine (Java VM or JVM) that uses LLVM for static and
-   just-in-time compilation. As of LLVM 3.0, VMKit now supports generational
-   garbage collectors. The garbage collectors are provided by the MMTk
-   framework, and VMKit can be configured to use one of the numerous implemented
-   collectors of MMTk.</p>
+  <p>The <a href="http://vmkit.llvm.org/">VMKit project</a> is an
+  implementation of a Java Virtual Machine (Java VM or JVM) that uses LLVM for
+  static and just-in-time compilation.
 
-</div>
-  
-  
-<!--=========================================================================-->
-<!--
-<h3>
-<a name="klee">KLEE: A Symbolic Execution Virtual Machine</a>
-</h3>
+  <p>In the LLVM 3.1 time-frame, VMKit has had significant improvements on both
+  runtime and startup performance:</p>
 
-<div>
-<p>
-<a href="http://klee.llvm.org/">KLEE</a> is a symbolic execution framework for
-programs in LLVM bitcode form. KLEE tries to symbolically evaluate "all" paths
-through the application and records state transitions that lead to fault
-states. This allows it to construct testcases that lead to faults and can even
-be used to verify some algorithms.
-</p>
+  <ul>
+  <li>...</li>
+  </ul>
 
-<p>UPDATE!</p>
-</div>-->
+</div>
 
 </div>
 
 <!-- *********************************************************************** -->
 <h2>
-  <a name="externalproj">External Open Source Projects Using LLVM 3.0</a>
+  <a name="externalproj">External Open Source Projects Using LLVM 3.1</a>
 </h2>
 <!-- *********************************************************************** -->
 
@@ -274,415 +218,15 @@ be used to verify some algorithms.
 
 <p>An exciting aspect of LLVM is that it is used as an enabling technology for
    a lot of other language and tools projects.  This section lists some of the
-   projects that have already been updated to work with LLVM 3.0.</p>
-
-<!--=========================================================================-->
-<h3>AddressSanitizer</h3>
-  
-<div>
-
-<p><a href="http://code.google.com/p/address-sanitizer/">AddressSanitizer</a>
-   uses compiler instrumentation and a specialized malloc library to find C/C++
-   bugs such as use-after-free and out-of-bound accesses to heap, stack, and
-   globals. The key feature of the tool is speed: the average slowdown
-   introduced by AddressSanitizer is less than 2x.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>ClamAV</h3>
-  
-<div>
-
-<p><a href="http://www.clamav.net">Clam AntiVirus</a> is an open source (GPL)
-   anti-virus toolkit for UNIX, designed especially for e-mail scanning on mail
-   gateways.</p>
-
-<p>Since version 0.96 it
-   has <a href="http://vrt-sourcefire.blogspot.com/2010/09/introduction-to-clamavs-low-level.html">bytecode
-   signatures</a> that allow writing detections for complex malware.</p>
-
-<p>It uses LLVM's JIT to speed up the execution of bytecode on X86, X86-64,
-   PPC32/64, falling back to its own interpreter otherwise.  The git version was
-   updated to work with LLVM 3.0.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>clReflect</h3>
-
-<div>
-
-<p><a href="https://bitbucket.org/dwilliamson/clreflect">clReflect</a> is a C++
-   parser that uses clang/LLVM to derive a light-weight reflection database
-   suitable for use in game development. It comes with a very simple runtime
-   library for loading and querying the database, requiring no external
-   dependencies (including CRT), and an additional utility library for object
-   management and serialisation.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Cling C++ Interpreter</h3>
-
-<div>
-
-<p><a href="http://cern.ch/cling">Cling</a> is an interactive compiler interface
-   (aka C++ interpreter). It uses LLVM's JIT and clang; it currently supports
-   C++ and C. It has a prompt interface, runs source files, calls into shared
-   libraries, prints the value of expressions, even does runtime lookup of
-   identifiers (dynamic scopes). And it just behaves like one would expect from
-   an interpreter.</p>
-
-</div>
-
-<!--=========================================================================-->
-<!-- FIXME: Comment out
-<h3>Crack Programming Language</h3>
-
-<div>
-<p>
-<a href="http://code.google.com/p/crack-language/">Crack</a> aims to provide the
-ease of development of a scripting language with the performance of a compiled
-language. The language derives concepts from C++, Java and Python, incorporating
-object-oriented programming, operator overloading and strong typing.</p>
-</div>
--->  
-  
-<!--=========================================================================-->
-<h3>Glasgow Haskell Compiler (GHC)</h3>
-  
-<div>
-
-<p>GHC is an open source, state-of-the-art programming suite for Haskell, a
-   standard lazy functional programming language. It includes an optimizing
-   static compiler generating good code for a variety of platforms, together
-   with an interactive system for convenient, quick development.</p>
-
-<p>GHC 7.0 and onwards include an LLVM code generator, supporting LLVM 2.8 and
-   later. Since LLVM 2.9, GHC now includes experimental support for the ARM
-   platform with LLVM 3.0.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>gwXscript</h3>
-
-<div>
-
-<p><a href="http://botwars.tk/gwscript/">gwXscript</a> is an object oriented,
-   aspect oriented programming language which can create both executables (ELF,
-   EXE) and shared libraries (DLL, SO, DYNLIB). The compiler is implemented in
-   its own language and translates scripts into LLVM-IR which can be optimized
-   and translated into native code by the LLVM framework. Source code in
-   gwScript contains definitions that expand the namespaces. So you can build
-   your project and simply 'plug out' features by removing a file. The remaining
-   project does not leave scars since you directly separate concerns by the
-   'template' feature of gwX. It is also possible to add new features to a
-   project by just adding files and without editing the original project. This
-   language is used for example to create games or content management systems
-   that should be extendable.</p>
-
-<p>gwXscript is strongly typed and offers comfort with its native types string,
-   hash and array. You can easily write new libraries in gwXscript or native
-   code. gwXscript is type safe and users should not be able to crash your
-   program or execute malicious code except code that is eating CPU time.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>include-what-you-use</h3>
-
-<div>
-
-<p><a href="http://code.google.com/p/include-what-you-use">include-what-you-use</a>
-   is a tool to ensure that a file directly <code>#include</code>s
-   all <code>.h</code> files that provide a symbol that the file uses. It also
-   removes superfluous <code>#include</code>s from source files.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>LanguageKit and Pragmatic Smalltalk</h3>
-
-<div>
-
-<p><a href="http://etoileos.com/etoile/features/languagekit/">LanguageKit</a> is
-   a framework for implementing dynamic languages sharing an object model with
-   Objective-C. It provides static and JIT compilation using LLVM along with
-   its own interpreter. Pragmatic Smalltalk is a dialect of Smalltalk, built on
-   top of LanguageKit, that interfaces directly with Objective-C, sharing the
-   same object representation and message sending behaviour. These projects are
-   developed as part of the &Eacute;toi&eacute; desktop environment.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>LuaAV</h3>
-
-<div>
-
-<p><a href="http://lua-av.mat.ucsb.edu/blog/">LuaAV</a> is a real-time
-   audiovisual scripting environment based around the Lua language and a
-   collection of libraries for sound, graphics, and other media protocols. LuaAV
-   uses LLVM and Clang to JIT compile efficient user-defined audio synthesis
-   routines specified in a declarative syntax.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Mono</h3>
-
-<div>
-
-<p>An open source, cross-platform implementation of C# and the CLR that is
-   binary compatible with Microsoft.NET. Has an optional, dynamically-loaded
-   LLVM code generation backend in Mini, the JIT compiler.</p>
-
-<p>Note that we use a Git mirror of LLVM with some patches. See:
-   https://github.com/mono/llvm</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Portable OpenCL (pocl)</h3>
-
-<div>
-
-<p>Portable OpenCL is an open source implementation of the OpenCL standard which
-   can be easily adapted for new targets. One of the goals of the project is
-   improving performance portability of OpenCL programs, avoiding the need for
-   target-dependent manual optimizations. A "native" target is included, which
-   allows running OpenCL kernels on the host (CPU).</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Pure</h3>
-  
-<div>
-<p><a href="http://pure-lang.googlecode.com/">Pure</a> is an
-  algebraic/functional programming language based on term rewriting. Programs
-  are collections of equations which are used to evaluate expressions in a
-  symbolic fashion. The interpreter uses LLVM as a backend to JIT-compile Pure
-  programs to fast native code. Pure offers dynamic typing, eager and lazy
-  evaluation, lexical closures, a hygienic macro system (also based on term
-  rewriting), built-in list and matrix support (including list and matrix
-  comprehensions) and an easy-to-use interface to C and other programming
-  languages (including the ability to load LLVM bitcode modules, and inline C,
-  C++, Fortran and Faust code in Pure programs if the corresponding LLVM-enabled
-  compilers are installed).</p>
-  
-<p>Pure version 0.48 has been tested and is known to work with LLVM 3.0
-  (and continues to work with older LLVM releases &gt;= 2.5).</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>Renderscript</h3>
-
-<div>
-
-<p><a href="http://developer.android.com/guide/topics/renderscript/index.html">Renderscript</a>
-   is Android's advanced 3D graphics rendering and compute API. It provides a
-   portable C99-based language with extensions to facilitate common use cases
-   for enhancing graphics and thread level parallelism. The Renderscript
-   compiler frontend is based on Clang/LLVM. It emits a portable bitcode format
-   for the actual compiled script code, as well as reflects a Java interface for
-   developers to control the execution of the compiled bitcode. Executable
-   machine code is then generated from this bitcode by an LLVM backend on the
-   device. Renderscript is thus able to provide a mechanism by which Android
-   developers can improve performance of their applications while retaining
-   portability.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>SAFECode</h3>
-
-<div>
-
-<p><a href="http://safecode.cs.illinois.edu">SAFECode</a> is a memory safe C/C++
-   compiler built using LLVM.  It takes standard, unannotated C/C++ code,
-   analyzes the code to ensure that memory accesses and array indexing
-   operations are safe, and instruments the code with run-time checks when
-   safety cannot be proven statically.  SAFECode can be used as a debugging aid
-   (like Valgrind) to find and repair memory safety bugs.  It can also be used
-   to protect code from security attacks at run-time.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>The Stupid D Compiler (SDC)</h3>
-
-<div>
-
-<p><a href="https://github.com/bhelyer/SDC">The Stupid D Compiler</a> is a
-   project seeking to write a self-hosting compiler for the D programming
-   language without using the frontend of the reference compiler (DMD).</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>TTA-based Co-design Environment (TCE)</h3>
-
-<div>
-
-<p>TCE is a toolset for designing application-specific processors (ASP) based on
-   the Transport triggered architecture (TTA). The toolset provides a complete
-   co-design flow from C/C++ programs down to synthesizable VHDL and parallel
-   program binaries. Processor customization points include the register files,
-   function units, supported operations, and the interconnection network.</p>
-  
-<p>TCE uses Clang and LLVM for C/C++ language support, target independent
-   optimizations and also for parts of code generation. It generates new
-   LLVM-based code generators "on the fly" for the designed TTA processors and
-   loads them in to the compiler backend as runtime libraries to avoid
-   per-target recompilation of larger parts of the compiler chain.</p>
-
-</div>
-  
-<!--=========================================================================-->
-<h3>Tart Programming Language</h3>
-
-<div>
-
-<p><a href="http://code.google.com/p/tart/">Tart</a> is a general-purpose,
-   strongly typed programming language designed for application
-   developers. Strongly inspired by Python and C#, Tart focuses on practical
-   solutions for the professional software developer, while avoiding the clutter
-   and boilerplate of legacy languages like Java and C++. Although Tart is still
-   in development, the current implementation supports many features expected of
-   a modern programming language, such as garbage collection, powerful
-   bidirectional type inference, a greatly simplified syntax for template
-   metaprogramming, closures and function literals, reflection, operator
-   overloading, explicit mutability and immutability, and much more. Tart is
-   flexible enough to accommodate a broad range of programming styles and
-   philosophies, while maintaining a strong commitment to simplicity, minimalism
-   and elegance in design.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>ThreadSanitizer</h3>
-
-<div>
-
-<p><a href="http://code.google.com/p/data-race-test/">ThreadSanitizer</a> is a
-   data race detector for (mostly) C and C++ code, available for Linux, Mac OS
-   and Windows. On different systems, we use binary instrumentation frameworks
-   (Valgrind and Pin) as frontends that generate the program events for the race
-   detection algorithm. On Linux, there's an option of using LLVM-based
-   compile-time instrumentation.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>The ZooLib C++ Cross-Platform Application Framework</h3>
-
-<div>
-
-<p><a href="http://www.zoolib.org/">ZooLib</a> is Open Source under the MIT
-   License. It provides GUI, filesystem access, TCP networking, thread-safe
-   memory management, threading and locking for Mac OS X, Classic Mac OS,
-   Microsoft Windows, POSIX operating systems with X11, BeOS, Haiku, Apple's iOS
-   and Research in Motion's BlackBerry.</p>
-
-<p>My current work is to use CLang's static analyzer to improve ZooLib's code
-   quality.  I also plan to set up LLVM compiles of the demo programs and test
-   programs using CLang and LLVM on all the platforms that CLang, LLVM and
-   ZooLib all support.</p>
-
-</div>
-
-<!--=========================================================================-->
-<!--
-<h3>PinaVM</h3>
-  
-<div>
-<p><a href="http://gitorious.org/pinavm/pages/Home">PinaVM</a> is an open
-source, <a href="http://www.systemc.org/">SystemC</a> front-end. Unlike many
-other front-ends, PinaVM actually executes the elaboration of the
-program analyzed using LLVM's JIT infrastructure. It later enriches the
-bitcode with SystemC-specific information.</p>
-</div>
--->
-
-
-<!--=========================================================================-->
-<!--
-<h3 id="icedtea">IcedTea Java Virtual Machine Implementation</h3>
-
-<div>
-<p>
-<a href="http://icedtea.classpath.org/wiki/Main_Page">IcedTea</a> provides a
-harness to build OpenJDK using only free software build tools and to provide
-replacements for the not-yet free parts of OpenJDK.  One of the extensions that
-IcedTea provides is a new JIT compiler named <a
-href="http://icedtea.classpath.org/wiki/ZeroSharkFaq">Shark</a> which uses LLVM
-to provide native code generation without introducing processor-dependent
-code.
-</p>
+   projects that have already been updated to work with LLVM 3.1.</p>
 
-<p> OpenJDK 7 b112, IcedTea6 1.9 and IcedTea7 1.13 and later have been tested
-and are known to work with LLVM 3.0 (and continue to work with older LLVM
-releases &gt;= 2.6 as well).</p>
-</div>
--->
-
-<!--=========================================================================-->
-<!--
-<h3>Polly - Polyhedral optimizations for LLVM</h3>
-  
-<div>
-<p>Polly is a project that aims to provide advanced memory access optimizations
-to better take advantage of SIMD units, cache hierarchies, multiple cores or
-even vector accelerators for LLVM. Built around an abstract mathematical
-description based on Z-polyhedra, it provides the infrastructure to develop
-advanced optimizations in LLVM and to connect complex external optimizers. In
-its first year of existence Polly already provides an exact value-based
-dependency analysis as well as basic SIMD and OpenMP code generation support.
-Furthermore, Polly can use PoCC(Pluto) an advanced optimizer for data-locality
-and parallelism.</p>
-</div>
--->
-
-<!--=========================================================================-->
-<!--
-<h3>Rubinius</h3>
-
-<div>
-  <p><a href="http://github.com/evanphx/rubinius">Rubinius</a> is an environment
-  for running Ruby code which strives to write as much of the implementation in
-  Ruby as possible. Combined with a bytecode interpreting VM, it uses LLVM to
-  optimize and compile ruby code down to machine code. Techniques such as type
-  feedback, method inlining, and deoptimization are all used to remove dynamism
-  from ruby execution and increase performance.</p>
-</div>
--->
-
-<!--=========================================================================-->
-<!--
-<h3>
-<a name="FAUST">FAUST Real-Time Audio Signal Processing Language</a>
-</h3>
+  ... to be filled in right before the release ...
 
-<div>
-<p>
-<a href="http://faust.grame.fr">FAUST</a> is a compiled language for real-time
-audio signal processing. The name FAUST stands for Functional AUdio STream. Its
-programming model combines two approaches: functional programming and block
-diagram composition. In addition with the C, C++, JAVA output formats, the
-Faust compiler can now generate LLVM bitcode, and works with LLVM 2.7-3.0.</p>
-
-</div>
--->
-  
 </div>
 
 <!-- *********************************************************************** -->
 <h2>
-  <a name="whatsnew">What's New in LLVM 3.0?</a>
+  <a name="whatsnew">What's New in LLVM 3.1?</a>
 </h2>
 <!-- *********************************************************************** -->
 
@@ -699,18 +243,38 @@ Faust compiler can now generate LLVM bitcode, and works with LLVM 2.7-3.0.</p>
 
 <div>
 
-<p>LLVM 3.0 includes several major new capabilities:</p>
+  <!-- Features that need text if they're finished for 3.1:
+   ARM EHABI
+   combiner-aa?
+   strong phi elim
+   loop dependence analysis
+   CorrelatedValuePropagation
+   lib/Transforms/IPO/MergeFunctions.cpp => consider for 3.1.
+   Integrated assembler on by default for arm/thumb?
 
-<ul>
+   -->
+
+  <!-- Near dead:
+   Analysis/RegionInfo.h + Dom Frontiers
+   SparseBitVector: used in LiveVar.
+   llvm/lib/Archive - replace with lib object?
+   -->
+
+<p>LLVM 3.1 includes several major changes and big features:</p>
 
-<!--
-<li></li>
--->
-  
+<ul>
+  <li><a href="../tools/clang/docs/AddressSanitizer.html">AddressSanitizer</a>,
+      a fast memory error detector.</li>
+  <li><a href="CodeGenerator.html#machineinstrbundle">MachineInstr Bundles</a>,
+      Support to model instruction bundling / packing.</li>
+  <li><a href="#armintegratedassembler">ARM Integrated Assembler</a>,
+      A full featured assembler and direct-to-object support for ARM.</li>
+  <li>....</li>
 </ul>
-  
+
 </div>
 
+
 <!--=========================================================================-->
 <h3>
 <a name="coreimprovements">LLVM IR and Core Improvements</a>
@@ -721,117 +285,15 @@ Faust compiler can now generate LLVM bitcode, and works with LLVM 2.7-3.0.</p>
 <p>LLVM IR has several new features for better support of new targets and that
    expose new optimization opportunities:</p>
 
-<p>One of the biggest changes is that 3.0 has a new exception handling
-   system. The old system used LLVM intrinsics to convey the exception handling
-   information to the code generator. It worked in most cases, but not
-   all. Inlining was especially difficult to get right. Also, the intrinsics
-   could be moved away from the <code>invoke</code> instruction, making it hard
-   to recover that information.</p>
-
-<p>The new EH system makes exception handling a first-class member of the IR. It
-   adds two new instructions:</p>
-
-<ul>
-  <li><a href="LangRef.html#i_landingpad"><code>landingpad</code></a> &mdash;
-      this instruction defines a landing pad basic block. It contains all of the
-      information that's needed by the code generator. It's also required to be
-      the first non-PHI instruction in the landing pad. In addition, a landing
-      pad may be jumped to only by the unwind edge of an <code>invoke</code>
-      instruction.</li>
-
-  <li><a href="LangRef.html#i_resume"><code>resume</code></a> &mdash; this
-      instruction causes the current exception to resume traveling up the
-      stack. It replaces the <code>@llvm.eh.resume</code> intrinsic.</li>
-</ul>
-
-<p>Converting from the old EH API to the new EH API is rather simple, because a
-   lot of complexity has been removed. The two intrinsics,
-   <code>@llvm.eh.exception</code> and <code>@llvm.eh.selector</code> have been
-   superceded by the <code>landingpad</code> instruction. Instead of generating
-   a call to <code>@llvm.eh.exception</code> and <code>@llvm.eh.selector</code>:
-
-<div class="doc_code">
-<pre>
-Function *ExcIntr = Intrinsic::getDeclaration(TheModule,
-                                              Intrinsic::eh_exception);
-Function *SlctrIntr = Intrinsic::getDeclaration(TheModule,
-                                                Intrinsic::eh_selector);
-
-// The exception pointer.
-Value *ExnPtr = Builder.CreateCall(ExcIntr, "exc_ptr");
-
-std::vector&lt;Value*&gt; Args;
-Args.push_back(ExnPtr);
-Args.push_back(Builder.CreateBitCast(Personality,
-                                     Type::getInt8PtrTy(Context)));
-
-<i>// Add selector clauses to Args.</i>
-
-// The selector call.
-Builder.CreateCall(SlctrIntr, Args, "exc_sel");
-</pre>
-</div>
-
-<p>You should instead generate a <code>landingpad</code> instruction, that
-   returns an exception object and selector value:</p>
-
-<div class="doc_code">
-<pre>
-LandingPadInst *LPadInst =
-  Builder.CreateLandingPad(StructType::get(Int8PtrTy, Int32Ty, NULL),
-                           Personality, 0);
-
-Value *LPadExn = Builder.CreateExtractValue(LPadInst, 0);
-Builder.CreateStore(LPadExn, getExceptionSlot());
-
-Value *LPadSel = Builder.CreateExtractValue(LPadInst, 1);
-Builder.CreateStore(LPadSel, getEHSelectorSlot());
-</pre>
-</div>
-
-<p>It's now trivial to add the individual clauses to the <code>landingpad</code>
-   instruction.</p>
-
-<div class="doc_code">
-<pre>
-<i><b>// Adding a catch clause</b></i>
-Constant *TypeInfo = getTypeInfo();
-LPadInst-&gt;addClause(TypeInfo);
-
-<i><b>// Adding a C++ catch-all</b></i>
-LPadInst-&gt;addClause(Constant::getNullValue(Builder.getInt8PtrTy()));
-
-<i><b>// Adding a cleanup</b></i>
-LPadInst-&gt;setCleanup(true);
-
-<i><b>// Adding a filter clause</b></i>
-std::vector&lt;Constant*&gt; TypeInfos;
-Constant *TypeInfo = getFilterTypeInfo();
-TypeInfos.push_back(Builder.CreateBitCast(TypeInfo, Builder.getInt8PtrTy()));
-
-ArrayType *FilterTy = ArrayType::get(Int8PtrTy, TypeInfos.size());
-LPadInst-&gt;addClause(ConstantArray::get(FilterTy, TypeInfos));
-</pre>
-</div>
-
-<p>Converting from using the <code>@llvm.eh.resume</code> intrinsic to
-   the <code>resume</code> instruction is trivial. It takes the exception
-   pointer and exception selector values returned by
-   the <code>landingpad</code> instruction:</p>
-
-<div class="doc_code">
-<pre>
-Type *UnwindDataTy = StructType::get(Builder.getInt8PtrTy(),
-                                     Builder.getInt32Ty(), NULL);
-Value *UnwindData = UndefValue::get(UnwindDataTy);
-Value *ExcPtr = Builder.CreateLoad(getExceptionObjSlot());
-Value *ExcSel = Builder.CreateLoad(getExceptionSelSlot());
-UnwindData = Builder.CreateInsertValue(UnwindData, ExcPtr, 0, "exc_ptr");
-UnwindData = Builder.CreateInsertValue(UnwindData, ExcSel, 1, "exc_sel");
-Builder.CreateResume(UnwindData);
-</pre>
-</div>
-
+  <ul>
+    <li>IR support for half float</li>
+    <li>IR support for vectors of pointers, including vector GEPs.</li>
+    <li>Module flags have been introduced. They convey information about the
+        module as a whole to LLVM subsystems.</li>
+    <li>Loads can now have range metadata attached to them to describe the
+        possible values being loaded.</li>
+    <li>....</li>
+  </ul>
 </div>
 
 <!--=========================================================================-->
@@ -841,16 +303,12 @@ Builder.CreateResume(UnwindData);
 
 <div>
 
-<p>In addition to a large array of minor performance tweaks and bug fixes, this
+<p>In addition to many minor performance tweaks and bug fixes, this
    release includes a few major enhancements and additions to the
    optimizers:</p>
 
 <ul>
-<!--
-<li></li>
--->
-</li>
-  
+  <li>....</li>
 </ul>
 
 </div>
@@ -865,18 +323,14 @@ Builder.CreateResume(UnwindData);
 <p>The LLVM Machine Code (aka MC) subsystem was created to solve a number of
    problems in the realm of assembly, disassembly, object file format handling,
    and a number of other related areas that CPU instruction-set level tools work
-   in.</p>
+   in. For more information, please see
+  the <a href="http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html">Intro
+    to the LLVM MC Project Blog Post</a>.</p>
 
 <ul>
-<!--
-<li></li>
--->
+  <li>....</li>
 </ul>
 
-<p>For more information, please see
-   the <a href="http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html">Intro
-   to the LLVM MC Project Blog Post</a>.</p>
-
 </div>
 
 <!--=========================================================================-->
@@ -886,15 +340,39 @@ Builder.CreateResume(UnwindData);
 
 <div>
 
+<p>We have changed the way that the Type Legalizer legalizes vectors. The type
+   legalizer now attempts to promote integer elements.  This enabled the
+   implementation of vector-select.  Additionally, we see a performance boost on
+   workloads which use vectors of chars and shorts, since they are now promoted
+   to 32-bit types, which are better supported by the SIMD instruction set.
+   Floating point types are still widened as before.</p>
+
+
 <p>We have put a significant amount of work into the code generator
    infrastructure, which allows us to implement more aggressive algorithms and
    make it run faster:</p>
 
 <ul>
-<!--
-<li></li>
--->
+  <li>TableGen can now synthesize register classes that are only needed to
+      represent combinations of constraints from instructions and sub-registers.
+      The synthetic register classes inherit most of their properties form their
+      closest user-defined super-class.</li>
+  <li><code>MachineRegisterInfo</code> now allows the reserved registers to be
+      frozen when register allocation starts.  Target hooks should use the
+      <code>MRI-&gt;canReserveReg(FramePtr)</code> method to avoid accidentally
+      disabling frame pointer elimination during register allocation.</li>
+  <li>A new kind of <code>MachineOperand</code> provides a compact
+      representation of large clobber lists on call instructions.  The register
+      mask operand references a bit mask of preserved registers. Everything else
+      is clobbered.</li>
 </ul>
+
+<p> We added new TableGen infrastructure to support bundling for
+    Very Long Instruction Word (VLIW) architectures. TableGen can now
+    automatically generate a deterministic finite automaton from a VLIW
+    target's schedule description which can be queried to determine
+    legal groupings of instructions in a bundle.</p>
+
 </div>
 
 <!--=========================================================================-->
@@ -907,13 +385,12 @@ Builder.CreateResume(UnwindData);
 <p>New features and major changes in the X86 target include:</p>
 
 <ul>
-
-  <li>The CRC32 intrinsics have been renamed.  The intrinsics were previously
-      <code>@llvm.x86.sse42.crc32.[8|16|32]</code>
-      and <code>@llvm.x86.sse42.crc64.[8|64]</code>. They have been renamed to
-      <code>@llvm.x86.sse42.crc32.32.[8|16|32]</code> and
-      <code>@llvm.x86.sse42.crc32.64.[8|64]</code>.</li>
-
+  <li>Bug fixes and improved support for AVX1</li>
+  <li>Support for AVX2 (still incomplete at this point)</li>
+  <li>Call instructions use the new register mask operands for faster compile
+  times and better support for different calling conventions.  The old WINCALL
+  instructions are no longer needed.</li>
+  <li>DW2 Exception Handling is enabled on Cygwin and MinGW.</li>
 </ul>
 
 </div>
@@ -928,386 +405,188 @@ Builder.CreateResume(UnwindData);
 <p>New features of the ARM target include:</p>
 
 <ul>
-<!--
-<li></li>
--->
+  <li>The constant island pass now supports basic block and constant pool entry
+  alignments greater than 4 bytes.</li>
+  <li>On Darwin, the ARM target now has a full-featured integrated assembler.
+  </li>
 </ul>
-</div>
-  
-<!--=========================================================================-->
-<h3>
-<a name="OtherTS">Other Target Specific Improvements</a>
-</h3>
-
-<p>PPC32/ELF va_arg was implemented.</p>
-<p>PPC32 initial support for .o file writing was implemented.</p>
 
+<h4>
+<a name="armintegratedassembler">ARM Integrated Assembler</a>
+</h4>
 <div>
+<p>The ARM target now includes a full featured macro assembler, including
+direct-to-object module support for clang. The assembler is currently enabled
+by default for Darwin only pending testing and any additional necessary
+platform specific support for Linux.</p>
 
-<ul>
-<!--
-<li></li>
--->
-</ul>
+<p>Full support is included for Thumb1, Thumb2 and ARM modes, along with
+subtarget and CPU specific extensions for VFP2, VFP3 and NEON.</p>
 
+<p>The assembler is Unified Syntax only (see ARM Architecural Reference Manual
+for details). While there is some, and growing, support for pre-unfied (divided)
+syntax, there are still significant gaps in that support.</p>
 </div>
 
+</div>
 <!--=========================================================================-->
 <h3>
-<a name="changes">Major Changes and Removed Features</a>
+<a name="MIPS">MIPS Target Improvements</a>
 </h3>
 
 <div>
 
-<p>If you're already an LLVM user or developer with out-of-tree changes based on
-   LLVM 2.9, this section lists some "gotchas" that you may run into upgrading
-   from the previous release.</p>
+<p>This release has seen major new work on just about every aspect of the MIPS
+  backend.  Some of the major new features include:</p>
 
 <ul>
-  <li>The <code>LLVMC</code> front end code was removed while separating
-      out language independence.</li>
-  <li>The <code>LowerSetJmp</code> pass wasn't used effectively by any
-      target and has been removed.</li>
-  <li>The old <code>TailDup</code> pass was not used in the standard pipeline
-      and was unable to update ssa form, so it has been removed.
-  <li>The syntax of volatile loads and stores in IR has been changed to
-      "<code>load volatile</code>"/"<code>store volatile</code>".  The old
-      syntax ("<code>volatile load</code>"/"<code>volatile store</code>")
-      is still accepted, but is now considered deprecated.</li>
-  <li>The old atomic intrinscs (<code>llvm.memory.barrier</code> and
-      <code>llvm.atomic.*</code>) are now gone.  Please use the new atomic
-      instructions, described in the <a href="Atomics.html">atomics guide</a>.
+  <li>....</li>
 </ul>
-
-<h4>Windows (32-bit)</h4>
-<div>
-
-<ul>
-  <li>On Win32(MinGW32 and MSVC), Windows 2000 will not be supported.
-      Windows XP or higher is required.</li>
-</ul>
-
-</div>
-
 </div>
 
 <!--=========================================================================-->
 <h3>
-<a name="api_changes">Internal API Changes</a>
+<a name="OtherTS">Other Target Specific Improvements</a>
 </h3>
 
 <div>
 
-<p>In addition, many APIs have changed in this release.  Some of the major
-   LLVM API changes are:</p>
-
 <ul>
-  <li>The biggest and most pervasive change is that llvm::Type's are no longer
-      returned or accepted as 'const' values.  Instead, just pass around
-      non-const Type's.</li>
-  
-  <li><code>PHINode::reserveOperandSpace</code> has been removed. Instead, you
-      must specify how many operands to reserve space for when you create the
-      PHINode, by passing an extra argument
-      into <code>PHINode::Create</code>.</li>
-
-  <li>PHINodes no longer store their incoming BasicBlocks as operands. Instead,
-      the list of incoming BasicBlocks is stored separately, and can be accessed
-      with new functions <code>PHINode::block_begin</code>
-      and <code>PHINode::block_end</code>.</li>
-
-  <li>Various functions now take an <code>ArrayRef</code> instead of either a
-      pair of pointers (or iterators) to the beginning and end of a range, or a
-      pointer and a length. Others now return an <code>ArrayRef</code> instead
-      of a reference to a <code>SmallVector</code>
-      or <code>std::vector</code>. These include:
-<ul>
-<!-- Please keep this list sorted. -->
-<li><code>CallInst::Create</code></li>
-<li><code>ComputeLinearIndex</code> (in <code>llvm/CodeGen/Analysis.h</code>)</li>
-<li><code>ConstantArray::get</code></li>
-<li><code>ConstantExpr::getExtractElement</code></li>
-<li><code>ConstantExpr::getGetElementPtr</code></li>
-<li><code>ConstantExpr::getInBoundsGetElementPtr</code></li>
-<li><code>ConstantExpr::getIndices</code></li>
-<li><code>ConstantExpr::getInsertElement</code></li>
-<li><code>ConstantExpr::getWithOperands</code></li>
-<li><code>ConstantFoldCall</code> (in <code>llvm/Analysis/ConstantFolding.h</code>)</li>
-<li><code>ConstantFoldInstOperands</code> (in <code>llvm/Analysis/ConstantFolding.h</code>)</li>
-<li><code>ConstantVector::get</code></li>
-<li><code>DIBuilder::createComplexVariable</code></li>
-<li><code>DIBuilder::getOrCreateArray</code></li>
-<li><code>ExtractValueInst::Create</code></li>
-<li><code>ExtractValueInst::getIndexedType</code></li>
-<li><code>ExtractValueInst::getIndices</code></li>
-<li><code>FindInsertedValue</code> (in <code>llvm/Analysis/ValueTracking.h</code>)</li>
-<li><code>gep_type_begin</code> (in <code>llvm/Support/GetElementPtrTypeIterator.h</code>)</li>
-<li><code>gep_type_end</code> (in <code>llvm/Support/GetElementPtrTypeIterator.h</code>)</li>
-<li><code>GetElementPtrInst::Create</code></li>
-<li><code>GetElementPtrInst::CreateInBounds</code></li>
-<li><code>GetElementPtrInst::getIndexedType</code></li>
-<li><code>InsertValueInst::Create</code></li>
-<li><code>InsertValueInst::getIndices</code></li>
-<li><code>InvokeInst::Create</code></li>
-<li><code>IRBuilder::CreateCall</code></li>
-<li><code>IRBuilder::CreateExtractValue</code></li>
-<li><code>IRBuilder::CreateGEP</code></li>
-<li><code>IRBuilder::CreateInBoundsGEP</code></li>
-<li><code>IRBuilder::CreateInsertValue</code></li>
-<li><code>IRBuilder::CreateInvoke</code></li>
-<li><code>MDNode::get</code></li>
-<li><code>MDNode::getIfExists</code></li>
-<li><code>MDNode::getTemporary</code></li>
-<li><code>MDNode::getWhenValsUnresolved</code></li>
-<li><code>SimplifyGEPInst</code> (in <code>llvm/Analysis/InstructionSimplify.h</code>)</li>
-<li><code>TargetData::getIndexedOffset</code></li>
-</ul></li>
-
-  <li>All forms of <code>StringMap::getOrCreateValue</code> have been remove
-      except for the one which takes a <code>StringRef</code>.</li>
-
-  <li>The <code>LLVMBuildUnwind</code> function from the C API was removed. The
-      LLVM <code>unwind</code> instruction has been deprecated for a long time
-      and isn't used by the current front-ends. So this was removed during the
-      exception handling rewrite.</li>
-
-  <li>The <code>LLVMAddLowerSetJmpPass</code> function from the C API was
-      removed because the <code>LowerSetJmp</code> pass was removed.</li>
+  <li>....</li>
 
-  <li>The <code>DIBuilder</code> interface used by front ends to encode
-      debugging information in the LLVM IR now expects clients to
-      use <code>DIBuilder::finalize()</code> at the end of translation unit to
-      complete debugging information encoding.</li>
 
-  <li>The way the type system works has been
-      rewritten: <code>PATypeHolder</code> and <code>OpaqueType</code> are gone,
-      and all APIs deal with <code>Type*</code> instead of <code>const
-      Type*</code>.  If you need to create recursive structures, then create a
-      named structure, and use <code>setBody()</code> when all its elements are
-      built.  Type merging and refining is gone too: named structures are not
-      merged with other structures, even if their layout is identical.  (of
-      course anonymous structures are still uniqued by layout).</li>
-
-  <li>TargetSelect.h moved to Support/ from Target/</li>
-
-  <li>UpgradeIntrinsicCall no longer upgrades pre-2.9 intrinsic calls (for
-      example <code>llvm.memset.i32</code>).</li>
-
-  <li>It is mandatory to initialize all out-of-tree passes too and their dependencies now with
-      <code>INITIALIZE_PASS{BEGIN,END,}</code>
-      and <code>INITIALIZE_{PASS,AG}_DEPENDENCY</code>.</li>
-
-  <li>The interface for MemDepResult in MemoryDependenceAnalysis has been
-      enhanced with new return types Unknown and NonFuncLocal, in addition to
-      the existing types Clobber, Def, and NonLocal.</li>
 </ul>
 
 </div>
 
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="knownproblems">Known Problems</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section contains significant known problems with the LLVM system, listed
-   by component.  If you run into a problem, please check
-   the <a href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
-   there isn't already one.</p>
-
-<!-- ======================================================================= -->
+<!--=========================================================================-->
 <h3>
-  <a name="experimental">Experimental features included with this release</a>
+<a name="changes">Major Changes and Removed Features</a>
 </h3>
 
 <div>
 
-<p>The following components of this LLVM release are either untested, known to
-   be broken or unreliable, or are in early development.  These components
-   should not be relied on, and bugs should not be filed against them, but they
-   may be useful to some people.  In particular, if you would like to work on
-   one of these components, please contact us on
-   the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev
-   list</a>.</p>
+<p>If you're already an LLVM user or developer with out-of-tree changes based on
+   LLVM 3.1, this section lists some "gotchas" that you may run into upgrading
+   from the previous release.</p>
 
 <ul>
-  <li>The Alpha, Blackfin, CellSPU, MicroBlaze, MSP430, MIPS, PTX, SystemZ and
-      XCore backends are experimental.</li>
-
-  <li><tt>llc</tt> "<tt>-filetype=obj</tt>" is experimental on all targets other
-      than darwin and ELF X86 systems.</li>
+  <li>LLVM 3.1 removes support for reading LLVM 2.9 bitcode files. Going
+      forward, we aim for all future versions of LLVM to read bitcode files and
+      <tt>.ll</tt> files produced by LLVM 3.0 and later.</li>
+  <li>The <tt>unwind</tt> instruction is now gone. With the introduction of the
+      new exception handling system in LLVM 3.0, the <tt>unwind</tt> instruction
+      became obsolete.</li>
+  <li>....</li>
 </ul>
 
 </div>
 
-<!-- ======================================================================= -->
+<!--=========================================================================-->
 <h3>
-  <a name="x86-be">Known problems with the X86 back-end</a>
+<a name="api_changes">Internal API Changes</a>
 </h3>
 
 <div>
 
-<ul>
-  <li>The X86 backend does not yet support
-      all <a href="http://llvm.org/PR879">inline assembly that uses the X86
-      floating point stack</a>.  It supports the 'f' and 't' constraints, but
-      not 'u'.</li>
-
-  <li>The X86-64 backend does not yet support the LLVM IR instruction
-      <tt>va_arg</tt>. Currently, front-ends support variadic argument
-      constructs on X86-64 by lowering them manually.</li>
-
-  <li>Windows x64 (aka Win64) code generator has a few issues.
-    <ul>
-      <li>llvm-gcc cannot build the mingw-w64 runtime currently due to lack of
-          support for the 'u' inline assembly constraint and for X87 floating
-          point inline assembly.</li>
-
-      <li>On mingw-w64, you will see unresolved symbol <tt>__chkstk</tt> due
-          to <a href="http://llvm.org/bugs/show_bug.cgi?id=8919">Bug 8919</a>.
-          It is fixed
-          in <a href="http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20110321/118499.html">r128206</a>.</li>
-
-      <li>Miss-aligned MOVDQA might crash your program. It is due to
-          <a href="http://llvm.org/bugs/show_bug.cgi?id=9483">Bug 9483</a>, lack
-          of handling aligned internal globals.</li>
-      </ul>
-  </li>
-
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ppc-be">Known problems with the PowerPC back-end</a>
-</h3>
-
-<div>
+<p>In addition, many APIs have changed in this release.  Some of the major
+   LLVM API changes are:</p>
 
 <ul>
-  <li>The PPC32/ELF support lacks PIC support.</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="arm-be">Known problems with the ARM back-end</a>
-</h3>
-
-<div>
-
+  <li>Target specific options have been moved from global variables to members
+      on the new <code>TargetOptions</code> class, which is local to each
+      <code>TargetMachine</code>. As a consequence, the associated flags will
+      no longer be accepted by <tt>clang -mllvm</tt>. This includes:
 <ul>
-  <li>Thumb mode works only on ARMv6 or higher processors. On sub-ARMv6
-      processors, thumb programs can crash or produce wrong results
-      (<a href="http://llvm.org/PR1388">PR1388</a>).</li>
-
-  <li>Compilation for ARM Linux OABI (old ABI) is supported but not fully
-      tested.</li>
+<li><code>llvm::PrintMachineCode</code></li>
+<li><code>llvm::NoFramePointerElim</code></li>
+<li><code>llvm::NoFramePointerElimNonLeaf</code></li>
+<li><code>llvm::DisableFramePointerElim(const MachineFunction &)</code></li>
+<li><code>llvm::LessPreciseFPMADOption</code></li>
+<li><code>llvm::LessPrecideFPMAD()</code></li>
+<li><code>llvm::NoExcessFPPrecision</code></li>
+<li><code>llvm::UnsafeFPMath</code></li>
+<li><code>llvm::NoInfsFPMath</code></li>
+<li><code>llvm::NoNaNsFPMath</code></li>
+<li><code>llvm::HonorSignDependentRoundingFPMathOption</code></li>
+<li><code>llvm::HonorSignDependentRoundingFPMath()</code></li>
+<li><code>llvm::UseSoftFloat</code></li>
+<li><code>llvm::FloatABIType</code></li>
+<li><code>llvm::NoZerosInBSS</code></li>
+<li><code>llvm::JITExceptionHandling</code></li>
+<li><code>llvm::JITEmitDebugInfo</code></li>
+<li><code>llvm::JITEmitDebugInfoToDisk</code></li>
+<li><code>llvm::GuaranteedTailCallOpt</code></li>
+<li><code>llvm::StackAlignmentOverride</code></li>
+<li><code>llvm::RealignStack</code></li>
+<li><code>llvm::DisableJumpTables</code></li>
+<li><code>llvm::EnableFastISel</code></li>
+<li><code>llvm::getTrapFunctionName()</code></li>
+<li><code>llvm::EnableSegmentedStacks</code></li>
+</ul></li>
+  <li>....</li>
 </ul>
 
 </div>
 
-<!-- ======================================================================= -->
+<!--=========================================================================-->
 <h3>
-  <a name="sparc-be">Known problems with the SPARC back-end</a>
+<a name="tools_changes">Tools Changes</a>
 </h3>
 
 <div>
 
-<ul>
-  <li>The SPARC backend only supports the 32-bit SPARC ABI (-m32); it does not
-      support the 64-bit SPARC ABI (-m64).</li>
-</ul>
+<p>In addition, some tools have changed in this release. Some of the changes
+   are:</p>
 
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="mips-be">Known problems with the MIPS back-end</a>
-</h3>
-
-<div>
 
 <ul>
-  <li>64-bit MIPS targets are not supported yet.</li>
+  <li>llvm-stress is a command line tool for generating random .ll files to fuzz
+      different LLVM components. </li>
+  <li>....</li>
 </ul>
 
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="alpha-be">Known problems with the Alpha back-end</a>
-</h3>
-
-<div>
-
 <ul>
-  <li>On 21164s, some rare FP arithmetic sequences which may trap do not have
-      the appropriate nops inserted to ensure restartability.</li>
+  <li>....</li>
 </ul>
 
 </div>
 
-<!-- ======================================================================= -->
-<h3>
-  <a name="c-be">Known problems with the C back-end</a>
-</h3>
-
-<div>
-
-<p>The C backend has numerous problems and is not being actively maintained.
-   Depending on it for anything serious is not advised.</p>
-
-<ul>
-  <li><a href="http://llvm.org/PR802">The C backend has only basic support for
-      inline assembly code</a>.</li>
-
-  <li><a href="http://llvm.org/PR1658">The C backend violates the ABI of common
-      C++ programs</a>, preventing intermixing between C++ compiled by the CBE
-      and C++ code compiled with <tt>llc</tt> or native compilers.</li>
-
-  <li>The C backend does not support all exception handling constructs.</li>
-
-  <li>The C backend does not support arbitrary precision integers.</li>
-</ul>
-
 </div>
 
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="llvm-gcc">Known problems with the llvm-gcc front-end</a>
-</h3>
+<!-- *********************************************************************** -->
+<h2>
+  <a name="knownproblems">Known Problems</a>
+</h2>
+<!-- *********************************************************************** -->
 
 <div>
 
-<p><b>LLVM 2.9 was the last release of llvm-gcc.</b></p>
+<p>LLVM is generally a production quality compiler, and is used by a broad range
+   of applications and shipping in many products.  That said, not every
+   subsystem is as mature as the aggregate, particularly the more obscure
+   targets.  If you run into a problem, please check the <a
+   href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
+   there isn't already one or ask on the <a
+    href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev
+    list</a>.</p>
 
-<p>llvm-gcc is generally very stable for the C family of languages.  The only
-   major language feature of GCC not supported by llvm-gcc is the
-   <tt>__builtin_apply</tt> family of builtins.   However, some extensions
-   are only supported on some targets.  For example, trampolines are only
-   supported on some targets (these are used when you take the address of a
-   nested function).</p>
+  <p>Known problem areas include:</p>
 
-<p>Fortran support generally works, but there are still several unresolved bugs
-   in <a href="http://llvm.org/bugs/">Bugzilla</a>.  Please see the
-   tools/gfortran component for details.  Note that llvm-gcc is missing major
-   Fortran performance work in the frontend and library that went into GCC after
-   4.2.  If you are interested in Fortran, we recommend that you consider using
-   <a href="#dragonegg">dragonegg</a> instead.</p>
-
-<p>The llvm-gcc 4.2 Ada compiler has basic functionality, but is no longer being
-   actively maintained.  If you are interested in Ada, we recommend that you
-   consider using <a href="#dragonegg">dragonegg</a> instead.</p>
+<ul>
+  <li>The Alpha, Blackfin, CellSPU, MSP430, PTX, SystemZ and
+      XCore backends are experimental, and the Alpha, Blackfin and SystemZ
+      targets have already been removed from mainline.</li>
+
+  <li>The integrated assembler, disassembler, and JIT is not supported by
+      several targets.  If an integrated assembler is not supported, then a
+      system assembler is required.  For more details, see the <a
+      href="CodeGenerator.html#targetfeatures">Target Features Matrix</a>.
+  </li>
 
-</div>
+  <li>The C backend has numerous problems and is not being actively maintained.
+    Depending on it for anything serious is not advised.</li>
+</ul>
 
 </div>
 
@@ -1342,7 +621,7 @@ Builder.CreateResume(UnwindData);
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-01 05:51:35 +0100 (Tue, 01 Nov 2011) $
+  Last modified: $Date: 2012-04-12 17:17:35 +0200 (Thu, 12 Apr 2012) $
 </address>
 
 </body>
diff --git a/docs/SegmentedStacks.html b/docs/SegmentedStacks.html
index a91b109308a2..16f55074732c 100644
--- a/docs/SegmentedStacks.html
+++ b/docs/SegmentedStacks.html
@@ -20,18 +20,12 @@
                   <li><a href="#alloca">Variable Sized Allocas</a></li>
                 </ol>
           </li>
-          <li><a href="#results">Results</a>
-            <ol>
-              <li><a href="#go">Go on LLVM</a></li>
-              <li><a href="#abi">Runtime ABI</a></li>
-            </ol>
-          </li>
         </ol>
 
         <h2><a name="intro">Introduction</a></h2>
         <div>
           <p>
-            Segmented stack allows stack space to be allocated incrementally than as a monolithic chunk (of some worst case size) at thread initialization. This is done by allocating stack blocks (henceforth called <em>stacklets</em>) and linking them into a doubly linked list. The function prologue is responsible for checking if the current stacklet has enough space for the function to execute; and if not, call into the libgcc runtime to allocate more stack space. Support for segmented stacks on x86 / Linux is currently being worked on.
+            Segmented stack allows stack space to be allocated incrementally than as a monolithic chunk (of some worst case size) at thread initialization. This is done by allocating stack blocks (henceforth called <em>stacklets</em>) and linking them into a doubly linked list. The function prologue is responsible for checking if the current stacklet has enough space for the function to execute; and if not, call into the libgcc runtime to allocate more stack space. When using <tt>llc</tt>, segmented stacks can be enabled by adding <tt>-segmented-stacks</tt> to the command line.
           </p>
           <p>
             The runtime functionality is <a href="http://gcc.gnu.org/wiki/SplitStacks">already there in libgcc</a>.
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index 75fae6e89c1f..259a2597ded6 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -53,6 +53,28 @@
     <li><a href="#ccxx_composite_types">C/C++ struct/union types</a></li>
     <li><a href="#ccxx_enumeration_types">C/C++ enumeration types</a></li>
   </ol></li>
+  <li><a href="#llvmdwarfextension">LLVM Dwarf Extensions</a>
+    <ol>
+      <li><a href="#objcproperty">Debugging Information Extension
+	  for Objective C Properties</a>
+        <ul>
+	  <li><a href="#objcpropertyintroduction">Introduction</a></li>
+	  <li><a href="#objcpropertyproposal">Proposal</a></li>
+	  <li><a href="#objcpropertynewattributes">New DWARF Attributes</a></li>
+	  <li><a href="#objcpropertynewconstants">New DWARF Constants</a></li>
+        </ul>
+      </li>
+      <li><a href="#acceltable">Name Accelerator Tables</a>
+        <ul>
+          <li><a href="#acceltableintroduction">Introduction</a></li>
+          <li><a href="#acceltablehashes">Hash Tables</a></li>
+          <li><a href="#acceltabledetails">Details</a></li>
+          <li><a href="#acceltablecontents">Contents</a></li>
+          <li><a href="#acceltableextensions">Language Extensions and File Format Changes</a></li>
+        </ul>
+      </li>
+    </ol>
+  </li>
 </ul>
 </td>
 <td class="right">
@@ -231,8 +253,8 @@ height="369">
    for the optimizer to optimize the program and debugging information without
    necessarily having to know anything about debugging information.  In
    particular, the use of metadata avoids duplicated debugging information from
-   the beginning, and the global dead code elimination pass automatically 
-   deletes debugging information for a function if it decides to delete the 
+   the beginning, and the global dead code elimination pass automatically
+   deletes debugging information for a function if it decides to delete the
    function. </p>
 
 <p>To do this, most of the debugging information (descriptors for types,
@@ -241,9 +263,9 @@ height="369">
 
 <p>Debug information is designed to be agnostic about the target debugger and
    debugging information representation (e.g. DWARF/Stabs/etc).  It uses a
-   generic pass to decode the information that represents variables, types, 
-   functions, namespaces, etc: this allows for arbitrary source-language 
-   semantics and type-systems to be used, as long as there is a module 
+   generic pass to decode the information that represents variables, types,
+   functions, namespaces, etc: this allows for arbitrary source-language
+   semantics and type-systems to be used, as long as there is a module
    written for the target debugger to interpret the information. </p>
 
 <p>To provide basic functionality, the LLVM debugger does have to make some
@@ -279,7 +301,7 @@ height="369">
    the range 0x1000 through 0x2000 (there is a defined enum DW_TAG_user_base =
    0x1000.)</p>
 
-<p>The fields of debug descriptors used internally by LLVM 
+<p>The fields of debug descriptors used internally by LLVM
    are restricted to only the simple data types <tt>i32</tt>, <tt>i1</tt>,
    <tt>float</tt>, <tt>double</tt>, <tt>mdstring</tt> and <tt>mdnode</tt>. </p>
 
@@ -301,7 +323,7 @@ height="369">
    with the current debug version (LLVMDebugVersion = 8 &lt;&lt; 16 or
    0x80000 or 524288.)</a></p>
 
-<p>The details of the various descriptors follow.</p>  
+<p>The details of the various descriptors follow.</p>
 
 <!-- ======================================================================= -->
 <h4>
@@ -313,14 +335,14 @@ height="369">
 <div class="doc_code">
 <pre>
 !0 = metadata !{
-  i32,       ;; Tag = 17 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+  i32,       ;; Tag = 17 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
              ;; (DW_TAG_compile_unit)
-  i32,       ;; Unused field. 
-  i32,       ;; DWARF language identifier (ex. DW_LANG_C89) 
+  i32,       ;; Unused field.
+  i32,       ;; DWARF language identifier (ex. DW_LANG_C89)
   metadata,  ;; Source file name
   metadata,  ;; Source file directory (includes trailing slash)
   metadata   ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
-  i1,        ;; True if this is a main compile unit. 
+  i1,        ;; True if this is a main compile unit.
   i1,        ;; True if this is optimized.
   metadata,  ;; Flags
   i32        ;; Runtime version
@@ -340,7 +362,7 @@ height="369">
 
 <p>Compile unit descriptors provide the root context for objects declared in a
    specific compilation unit. File descriptors are defined using this context.
-   These descriptors are collected by a named metadata 
+   These descriptors are collected by a named metadata
    <tt>!llvm.dbg.cu</tt>. Compile unit descriptor keeps track of subprograms,
    global variables and type information.
 
@@ -356,7 +378,7 @@ height="369">
 <div class="doc_code">
 <pre>
 !0 = metadata !{
-  i32,       ;; Tag = 41 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+  i32,       ;; Tag = 41 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
              ;; (DW_TAG_file_type)
   metadata,  ;; Source file name
   metadata,  ;; Source file directory (includes trailing slash)
@@ -384,7 +406,7 @@ height="369">
 <div class="doc_code">
 <pre>
 !1 = metadata !{
-  i32,      ;; Tag = 52 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+  i32,      ;; Tag = 52 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
             ;; (DW_TAG_variable)
   i32,      ;; Unused field.
   metadata, ;; Reference to context descriptor
@@ -403,7 +425,8 @@ height="369">
 
 <p>These descriptors provide debug information about globals variables.  The
 provide details such as name, type and where the variable is defined. All
-global variables are collected by named metadata <tt>!llvm.dbg.gv</tt>.</p>
+global variables are collected inside the named metadata
+<tt>!llvm.dbg.cu</tt>.</p>
 
 </div>
 
@@ -429,11 +452,12 @@ global variables are collected by named metadata <tt>!llvm.dbg.gv</tt>.</p>
   metadata, ;; Reference to type descriptor
   i1,       ;; True if the global is local to compile unit (static)
   i1,       ;; True if the global is defined in the compile unit (not extern)
+  i32,      ;; Line number where the scope of the subprogram begins
   i32,      ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual
   i32,      ;; Index into a virtual function
-  metadata, ;; indicates which base type contains the vtable pointer for the 
+  metadata, ;; indicates which base type contains the vtable pointer for the
             ;; derived class
-  i1,       ;; isArtificial
+  i32,      ;; Flags - Artifical, Private, Protected, Explicit, Prototyped.
   i1,       ;; isOptimized
   Function *,;; Pointer to LLVM function
   metadata, ;; Lists function template parameters
@@ -446,8 +470,6 @@ global variables are collected by named metadata <tt>!llvm.dbg.gv</tt>.</p>
 <p>These descriptors provide debug information about functions, methods and
    subprograms.  They provide details such as name, return types and the source
    location where the subprogram is defined.
-   All subprogram descriptors are collected by a named metadata 
-   <tt>!llvm.dbg.sp</tt>.
 </p>
 
 </div>
@@ -501,9 +523,9 @@ global variables are collected by named metadata <tt>!llvm.dbg.gv</tt>.</p>
 <div class="doc_code">
 <pre>
 !4 = metadata !{
-  i32,      ;; Tag = 36 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+  i32,      ;; Tag = 36 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
             ;; (DW_TAG_base_type)
-  metadata, ;; Reference to context 
+  metadata, ;; Reference to context
   metadata, ;; Name (may be "" for anonymous types)
   metadata, ;; Reference to file where defined (may be NULL)
   i32,      ;; Line number where defined (may be 0)
@@ -561,9 +583,10 @@ DW_ATE_unsigned_char = 8
   i64,      ;; Size in bits
   i64,      ;; Alignment in bits
   i64,      ;; Offset in bits
+  i32,      ;; Flags to encode attributes, e.g. private
   metadata, ;; Reference to type derived from
-  metadata, ;; (optional) Name of the Objective C property assoicated with 
-            ;; Objective-C an ivar 
+  metadata, ;; (optional) Name of the Objective C property associated with
+            ;; Objective-C an ivar
   metadata, ;; (optional) Name of the Objective C property getter selector.
   metadata, ;; (optional) Name of the Objective C property setter selector.
   i32       ;; (optional) Objective C property attributes.
@@ -597,9 +620,9 @@ DW_TAG_restrict_type    = 55
 
 <p><tt>DW_TAG_typedef</tt> is used to provide a name for the derived type.</p>
 
-<p><tt>DW_TAG_pointer_type</tt>,<tt>DW_TAG_reference_type</tt>,
-   <tt>DW_TAG_const_type</tt>, <tt>DW_TAG_volatile_type</tt>
-   and <tt>DW_TAG_restrict_type</tt> are used to qualify
+<p><tt>DW_TAG_pointer_type</tt>, <tt>DW_TAG_reference_type</tt>,
+   <tt>DW_TAG_const_type</tt>, <tt>DW_TAG_volatile_type</tt> and
+   <tt>DW_TAG_restrict_type</tt> are used to qualify
    the <a href="#format_derived_type">derived type</a>. </p>
 
 <p><a href="#format_derived_type">Derived type</a> location can be determined
@@ -667,7 +690,8 @@ DW_TAG_inheritance      = 28
 <p>The members of enumeration types (tag = <tt>DW_TAG_enumeration_type</tt>) are
    <a href="#format_enumeration">enumerator descriptors</a>, each representing
    the definition of enumeration value for the set. All enumeration type
-   descriptors are collected by named metadata <tt>!llvm.dbg.enum</tt>.</p>
+   descriptors are collected inside the named metadata
+   <tt>!llvm.dbg.cu</tt>.</p>
 
 <p>The members of structure (tag = <tt>DW_TAG_structure_type</tt>) or union (tag
    = <tt>DW_TAG_union_type</tt>) types are any one of
@@ -738,7 +762,7 @@ DW_TAG_inheritance      = 28
 <div class="doc_code">
 <pre>
 !6 = metadata !{
-  i32,      ;; Tag = 40 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+  i32,      ;; Tag = 40 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
             ;; (DW_TAG_enumerator)
   metadata, ;; Name
   i64       ;; Value
@@ -820,9 +844,9 @@ DW_TAG_return_variable = 258
   void %<a href="#format_common_declare">llvm.dbg.declare</a>(metadata, metadata)
 </pre>
 
-<p>This intrinsic provides information about a local element (ex. variable.) The
-   first argument is metadata holding alloca for the variable. The
-   second argument is metadata containing description of the variable. </p>
+<p>This intrinsic provides information about a local element (e.g., variable). The
+   first argument is metadata holding the alloca for the variable. The
+   second argument is metadata containing a description of the variable.</p>
 </div>
 
 <!-- ======================================================================= -->
@@ -838,8 +862,8 @@ DW_TAG_return_variable = 258
 <p>This intrinsic provides information when a user source variable is set to a
    new value.  The first argument is the new value (wrapped as metadata).  The
    second argument is the offset in the user source variable where the new value
-   is written.  The third argument is metadata containing description of the
-   user source variable. </p>
+   is written.  The third argument is metadata containing a description of the
+   user source variable.</p>
 </div>
 
 </div>
@@ -906,27 +930,27 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
-!0 = metadata !{i32 459008, metadata !1, metadata !"X", 
+!0 = metadata !{i32 459008, metadata !1, metadata !"X",
                 metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ]
 !1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", 
-               metadata !"foo", metadata !3, i32 1, metadata !4, 
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo",
+               metadata !"foo", metadata !3, i32 1, metadata !4,
                i1 false, i1 true}; [DW_TAG_subprogram ]
-!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c", 
-                metadata !"/private/tmp", metadata !"clang 1.1", i1 true, 
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c",
+                metadata !"/private/tmp", metadata !"clang 1.1", i1 true,
                 i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
-!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, 
+!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0,
                 i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
 !5 = metadata !{null}
-!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, 
+!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0,
                 i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
 !7 = metadata !{i32 2, i32 7, metadata !1, null}
 !8 = metadata !{i32 2, i32 3, metadata !1, null}
-!9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3, 
+!9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3,
                 metadata !6}; [ DW_TAG_auto_variable ]
 !10 = metadata !{i32 3, i32 7, metadata !1, null}
 !11 = metadata !{i32 3, i32 3, metadata !1, null}
-!12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5, 
+!12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5,
                  metadata !6}; [ DW_TAG_auto_variable ]
 !13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
 !14 = metadata !{i32 5, i32 9, metadata !13, null}
@@ -946,7 +970,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 
 <div class="doc_code">
 <pre>
-call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7   
+call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7
 </pre>
 </div>
 
@@ -960,9 +984,9 @@ call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7
 <pre>
 !7 = metadata !{i32 2, i32 7, metadata !1, null}
 !1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
-!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", 
-                metadata !"foo", metadata !"foo", metadata !3, i32 1, 
-                metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]   
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo",
+                metadata !"foo", metadata !"foo", metadata !3, i32 1,
+                metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
 </pre>
 </div>
 
@@ -987,7 +1011,7 @@ call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
 
 <p>The second intrinsic
    <tt>%<a href="#format_common_declare">llvm.dbg.declare</a></tt>
-   encodes debugging information for variable <tt>Z</tt>. The metadata 
+   encodes debugging information for variable <tt>Z</tt>. The metadata
    <tt>!dbg !14</tt> attached to the intrinsic provides scope information for
    the variable <tt>Z</tt>.</p>
 
@@ -1068,9 +1092,9 @@ int main(int argc, char *argv[]) {
   i32 524305,    ;; Tag
   i32 0,         ;; Unused
   i32 4,         ;; Language Id
-  metadata !"MySource.cpp", 
-  metadata !"/Users/mine/sources", 
-  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)", 
+  metadata !"MySource.cpp",
+  metadata !"/Users/mine/sources",
+  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)",
   i1 true,       ;; Main Compile Unit
   i1 false,      ;; Optimized compile unit
   metadata !"",  ;; Compiler flags
@@ -1081,8 +1105,8 @@ int main(int argc, char *argv[]) {
 ;;
 !1 = metadata !{
   i32 524329,    ;; Tag
-  metadata !"MySource.cpp", 
-  metadata !"/Users/mine/sources", 
+  metadata !"MySource.cpp",
+  metadata !"/Users/mine/sources",
   metadata !2    ;; Compile unit
 }
 
@@ -1092,7 +1116,7 @@ int main(int argc, char *argv[]) {
 !3 = metadata !{
   i32 524329,    ;; Tag
   metadata !"Myheader.h"
-  metadata !"/Users/mine/sources", 
+  metadata !"/Users/mine/sources",
   metadata !2    ;; Compile unit
 }
 
@@ -1100,9 +1124,9 @@ int main(int argc, char *argv[]) {
 </pre>
 </div>
 
-<p>llvm::Instruction provides easy access to metadata attached with an 
+<p>llvm::Instruction provides easy access to metadata attached with an
 instruction. One can extract line number information encoded in LLVM IR
-using <tt>Instruction::getMetadata()</tt> and 
+using <tt>Instruction::getMetadata()</tt> and
 <tt>DILocation::getLineNumber()</tt>.
 <pre>
  if (MDNode *N = I->getMetadata("dbg")) {  // Here I is an LLVM instruction
@@ -1141,44 +1165,79 @@ int MyGlobal = 100;
 ;;
 ;; List of debug info of globals
 ;;
-!llvm.dbg.gv = !{!0}
+!llvm.dbg.cu = !{!0}
 
-;;
-;; Define the global variable descriptor.  Note the reference to the global
-;; variable anchor and the global variable itself.
-;;
+;; Define the compile unit.
 !0 = metadata !{
-  i32 524340,              ;; Tag
-  i32 0,                   ;; Unused
-  metadata !1,             ;; Context
-  metadata !"MyGlobal",    ;; Name
-  metadata !"MyGlobal",    ;; Display Name
-  metadata !"MyGlobal",    ;; Linkage Name
-  metadata !3,             ;; Compile Unit
-  i32 1,                   ;; Line Number
-  metadata !4,             ;; Type
-  i1 false,                ;; Is a local variable
-  i1 true,                 ;; Is this a definition
-  i32* @MyGlobal           ;; The global variable
+  i32 786449,                       ;; Tag
+  i32 0,                            ;; Context
+  i32 4,                            ;; Language
+  metadata !"foo.cpp",              ;; File
+  metadata !"/Volumes/Data/tmp",    ;; Directory
+  metadata !"clang version 3.1 ",   ;; Producer
+  i1 true,                          ;; Deprecated field
+  i1 false,                         ;; "isOptimized"?
+  metadata !"",                     ;; Flags
+  i32 0,                            ;; Runtime Version
+  metadata !1,                      ;; Enum Types
+  metadata !1,                      ;; Retained Types
+  metadata !1,                      ;; Subprograms
+  metadata !3                       ;; Global Variables
+} ; [ DW_TAG_compile_unit ]
+
+;; The Array of Global Variables
+!3 = metadata !{
+  metadata !4
 }
 
-;;
-;; Define the basic type of 32 bit signed integer.  Note that since int is an
-;; intrinsic type the source file is NULL and line 0.
-;;    
 !4 = metadata !{
-  i32 524324,              ;; Tag
-  metadata !1,             ;; Context
-  metadata !"int",         ;; Name
-  metadata !1,             ;; File
-  i32 0,                   ;; Line number
-  i64 32,                  ;; Size in Bits
-  i64 32,                  ;; Align in Bits
-  i64 0,                   ;; Offset in Bits
-  i32 0,                   ;; Flags
-  i32 5                    ;; Encoding
+  metadata !5
 }
 
+;;
+;; Define the global variable itself.
+;;
+!5 = metadata !{
+  i32 786484,                        ;; Tag
+  i32 0,                             ;; Unused
+  null,                              ;; Unused
+  metadata !"MyGlobal",              ;; Name
+  metadata !"MyGlobal",              ;; Display Name
+  metadata !"",                      ;; Linkage Name
+  metadata !6,                       ;; File
+  i32 1,                             ;; Line
+  metadata !7,                       ;; Type
+  i32 0,                             ;; IsLocalToUnit
+  i32 1,                             ;; IsDefinition
+  i32* @MyGlobal                     ;; LLVM-IR Value
+} ; [ DW_TAG_variable ]
+
+;;
+;; Define the file
+;;
+!6 = metadata !{
+  i32 786473,                        ;; Tag
+  metadata !"foo.cpp",               ;; File
+  metadata !"/Volumes/Data/tmp",     ;; Directory
+  null                               ;; Unused
+} ; [ DW_TAG_file_type ]
+
+;;
+;; Define the type
+;;
+!7 = metadata !{
+  i32 786468,                         ;; Tag
+  null,                               ;; Unused
+  metadata !"int",                    ;; Name
+  null,                               ;; Unused
+  i32 0,                              ;; Line
+  i64 32,                             ;; Size in Bits
+  i64 32,                             ;; Align in Bits
+  i64 0,                              ;; Offset
+  i32 0,                              ;; Flags
+  i32 5                               ;; Encoding
+} ; [ DW_TAG_base_type ]
+
 </pre>
 </div>
 
@@ -1220,7 +1279,7 @@ int main(int argc, char *argv[]) {
   metadata !1,       ;; File
   i32 1,             ;; Line number
   metadata !4,       ;; Type
-  i1 false,          ;; Is local 
+  i1 false,          ;; Is local
   i1 true,           ;; Is definition
   i32 0,             ;; Virtuality attribute, e.g. pure virtual function
   i32 0,             ;; Index into virtual table for C++ methods
@@ -1314,7 +1373,7 @@ define i32 @main(i32 %argc, i8** %argv) {
 !2 = metadata !{
   i32 524324,        ;; Tag
   metadata !1,       ;; Context
-  metadata !"unsigned char", 
+  metadata !"unsigned char",
   metadata !1,       ;; File
   i32 0,             ;; Line number
   i64 8,             ;; Size in Bits
@@ -1803,6 +1862,988 @@ enum Trees {
 
 </div>
 
+
+<!-- *********************************************************************** -->
+<h2>
+  <a name="llvmdwarfextension">Debugging information format</a>
+</h2>
+<!-- *********************************************************************** -->
+<div>
+<!-- ======================================================================= -->
+<h3>
+  <a name="objcproperty">Debugging Information Extension for Objective C Properties</a>
+</h3>
+<div>
+<!-- *********************************************************************** -->
+<h4>
+  <a name="objcpropertyintroduction">Introduction</a>
+</h4>
+<!-- *********************************************************************** -->
+
+<div>
+<p>Objective C provides a simpler way to declare and define accessor methods
+using declared properties. The language provides features to declare a
+property and to let compiler synthesize accessor methods.
+</p>
+
+<p>The debugger lets developer inspect Objective C interfaces and their
+instance variables and class variables. However, the debugger does not know
+anything about the properties defined in Objective C interfaces. The debugger
+consumes information generated by compiler in DWARF format. The format does
+not support encoding of Objective C properties. This proposal describes DWARF
+extensions to encode Objective C properties, which the debugger can use to let
+developers inspect Objective C properties.
+</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<h4>
+  <a name="objcpropertyproposal">Proposal</a>
+</h4>
+<!-- *********************************************************************** -->
+
+<div>
+<p>Objective C properties exist separately from class members. A property
+can be defined only by &quot;setter&quot; and &quot;getter&quot; selectors, and
+be calculated anew on each access.  Or a property can just be a direct access
+to some declared ivar.  Finally it can have an ivar &quot;automatically
+synthesized&quot; for it by the compiler, in which case the property can be
+referred to in user code directly using the standard C dereference syntax as
+well as through the property &quot;dot&quot; syntax, but there is no entry in
+the @interface declaration corresponding to this ivar.
+</p>
+<p>
+To facilitate debugging, these properties we will add a new DWARF TAG into the
+DW_TAG_structure_type definition for the class to hold the description of a
+given property, and a set of DWARF attributes that provide said description.
+The property tag will also contain the name and declared type of the property.
+</p>
+<p>
+If there is a related ivar, there will also be a DWARF property attribute placed
+in the DW_TAG_member DIE for that ivar referring back to the property TAG for
+that property. And in the case where the compiler synthesizes the ivar directly,
+the compiler is expected to generate a DW_TAG_member for that ivar (with the
+DW_AT_artificial set to 1), whose name will be the name used to access this
+ivar directly in code, and with the property attribute pointing back to the
+property it is backing.
+</p>
+<p>
+The following examples will serve as illustration for our discussion:
+</p>
+
+<div class="doc_code">
+<pre>
+@interface I1 {
+  int n2;
+}
+
+@property int p1;
+@property int p2;
+@end
+
+@implementation I1
+@synthesize p1;
+@synthesize p2 = n2;
+@end
+</pre>
+</div>
+
+<p>
+This produces the following DWARF (this is a &quot;pseudo dwarfdump&quot; output):
+</p>
+<div class="doc_code">
+<pre>
+0x00000100:  TAG_structure_type [7] *
+               AT_APPLE_runtime_class( 0x10 )
+               AT_name( "I1" )
+               AT_decl_file( "Objc_Property.m" )
+               AT_decl_line( 3 )
+
+0x00000110    TAG_APPLE_property
+                AT_name ( "p1" )
+                AT_type ( {0x00000150} ( int ) )
+
+0x00000120:   TAG_APPLE_property
+                AT_name ( "p2" )
+                AT_type ( {0x00000150} ( int ) )
+
+0x00000130:   TAG_member [8]
+                AT_name( "_p1" )
+                AT_APPLE_property ( {0x00000110} "p1" )
+                AT_type( {0x00000150} ( int ) )
+                AT_artificial ( 0x1 )
+
+0x00000140:    TAG_member [8]
+                 AT_name( "n2" )
+                 AT_APPLE_property ( {0x00000120} "p2" )
+                 AT_type( {0x00000150} ( int ) )
+
+0x00000150:  AT_type( ( int ) )
+</pre>
+</div>
+
+<p> Note, the current convention is that the name of the ivar for an
+auto-synthesized property is the name of the property from which it derives with
+an underscore prepended, as is shown in the example.
+But we actually don't need to know this convention, since we are given the name
+of the ivar directly.
+</p>
+
+<p>
+Also, it is common practice in ObjC to have different property declarations in
+the @interface and @implementation - e.g. to provide a read-only property in
+the interface,and a read-write interface in the implementation.  In that case,
+the compiler should emit whichever property declaration will be in force in the
+current translation unit.
+</p>
+
+<p> Developers can decorate a property with attributes which are encoded using
+DW_AT_APPLE_property_attribute.
+</p>
+
+<div class="doc_code">
+<pre>
+@property (readonly, nonatomic) int pr;
+</pre>
+</div>
+<p>
+Which produces a property tag:
+<p>
+<div class="doc_code">
+<pre>
+TAG_APPLE_property [8]
+  AT_name( "pr" )
+  AT_type ( {0x00000147} (int) )
+  AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic)
+</pre>
+</div>
+
+<p> The setter and getter method names are attached to the property using
+DW_AT_APPLE_property_setter and DW_AT_APPLE_property_getter attributes.
+</p>
+<div class="doc_code">
+<pre>
+@interface I1
+@property (setter=myOwnP3Setter:) int p3;
+-(void)myOwnP3Setter:(int)a;
+@end
+
+@implementation I1
+@synthesize p3;
+-(void)myOwnP3Setter:(int)a{ }
+@end
+</pre>
+</div>
+
+<p>
+The DWARF for this would be:
+</p>
+<div class="doc_code">
+<pre>
+0x000003bd: TAG_structure_type [7] *
+              AT_APPLE_runtime_class( 0x10 )
+              AT_name( "I1" )
+              AT_decl_file( "Objc_Property.m" )
+              AT_decl_line( 3 )
+
+0x000003cd      TAG_APPLE_property
+                  AT_name ( "p3" )
+                  AT_APPLE_property_setter ( "myOwnP3Setter:" )
+                  AT_type( {0x00000147} ( int ) )
+
+0x000003f3:     TAG_member [8]
+                  AT_name( "_p3" )
+                  AT_type ( {0x00000147} ( int ) )
+                  AT_APPLE_property ( {0x000003cd} )
+                  AT_artificial ( 0x1 )
+</pre>
+</div>
+
+</div>
+
+<!-- *********************************************************************** -->
+<h4>
+  <a name="objcpropertynewtags">New DWARF Tags</a>
+</h4>
+<!-- *********************************************************************** -->
+
+<div>
+<table border="1" cellspacing="0">
+  <col width="200">
+  <col width="200">
+  <tr>
+    <th>TAG</th>
+    <th>Value</th>
+  </tr>
+  <tr>
+    <td>DW_TAG_APPLE_property</td>
+    <td>0x4200</td>
+  </tr>
+</table>
+
+</div>
+
+<!-- *********************************************************************** -->
+<h4>
+  <a name="objcpropertynewattributes">New DWARF Attributes</a>
+</h4>
+<!-- *********************************************************************** -->
+
+<div>
+<table border="1" cellspacing="0">
+  <col width="200">
+  <col width="200">
+  <col width="200">
+  <tr>
+    <th>Attribute</th>
+    <th>Value</th>
+    <th>Classes</th>
+  </tr>
+  <tr>
+    <td>DW_AT_APPLE_property</td>
+    <td>0x3fed</td>
+    <td>Reference</td>
+  </tr>
+  <tr>
+    <td>DW_AT_APPLE_property_getter</td>
+    <td>0x3fe9</td>
+    <td>String</td>
+  </tr>
+  <tr>
+    <td>DW_AT_APPLE_property_setter</td>
+    <td>0x3fea</td>
+    <td>String</td>
+  </tr>
+  <tr>
+    <td>DW_AT_APPLE_property_attribute</td>
+    <td>0x3feb</td>
+    <td>Constant</td>
+  </tr>
+</table>
+
+</div>
+
+<!-- *********************************************************************** -->
+<h4>
+  <a name="objcpropertynewconstants">New DWARF Constants</a>
+</h4>
+<!-- *********************************************************************** -->
+
+<div>
+<table border="1" cellspacing="0">
+  <col width="200">
+  <col width="200">
+  <tr>
+    <th>Name</th>
+    <th>Value</th>
+  </tr>
+  <tr>
+    <td>DW_AT_APPLE_PROPERTY_readonly</td>
+    <td>0x1</td>
+  </tr>
+  <tr>
+    <td>DW_AT_APPLE_PROPERTY_readwrite</td>
+    <td>0x2</td>
+  </tr>
+  <tr>
+    <td>DW_AT_APPLE_PROPERTY_assign</td>
+    <td>0x4</td>
+  </tr>
+  <tr>
+    <td>DW_AT_APPLE_PROPERTY_retain</td>
+    <td>0x8</td>
+  </tr>
+  <tr>
+    <td>DW_AT_APPLE_PROPERTY_copy</td>
+    <td>0x10</td>
+  </tr>
+  <tr>
+    <td>DW_AT_APPLE_PROPERTY_nonatomic</td>
+    <td>0x20</td>
+  </tr>
+</table>
+
+</div>
+</div>
+
+<!-- ======================================================================= -->
+<h3>
+  <a name="acceltable">Name Accelerator Tables</a>
+</h3>
+<!-- ======================================================================= -->
+<div>
+<!-- ======================================================================= -->
+<h4>
+  <a name="acceltableintroduction">Introduction</a>
+</h4>
+<!-- ======================================================================= -->
+<div>
+<p>The .debug_pubnames and .debug_pubtypes formats are not what a debugger
+  needs. The "pub" in the section name indicates that the entries in the
+  table are publicly visible names only. This means no static or hidden
+  functions show up in the .debug_pubnames. No static variables or private class
+  variables are in the .debug_pubtypes. Many compilers add different things to
+  these tables, so we can't rely upon the contents between gcc, icc, or clang.</p>
+
+<p>The typical query given by users tends not to match up with the contents of
+  these tables. For example, the DWARF spec states that "In the case of the
+  name of a function member or static data member of a C++ structure, class or
+  union, the name presented in the .debug_pubnames section is not the simple
+  name given by the DW_AT_name attribute of the referenced debugging information
+  entry, but rather the fully qualified name of the data or function member."
+  So the only names in these tables for complex C++ entries is a fully
+  qualified name.  Debugger users tend not to enter their search strings as
+  "a::b::c(int,const Foo&) const", but rather as "c", "b::c" , or "a::b::c".  So
+  the name entered in the name table must be demangled in order to chop it up
+  appropriately and additional names must be manually entered into the table
+  to make it effective as a name lookup table for debuggers to use.</p>
+
+<p>All debuggers currently ignore the .debug_pubnames table as a result of
+  its inconsistent and useless public-only name content making it a waste of
+  space in the object file. These tables, when they are written to disk, are
+  not sorted in any way, leaving every debugger to do its own parsing
+  and sorting. These tables also include an inlined copy of the string values
+  in the table itself making the tables much larger than they need to be on
+  disk, especially for large C++ programs.</p>
+
+<p>Can't we just fix the sections by adding all of the names we need to this
+  table? No, because that is not what the tables are defined to contain and we
+  won't know the difference between the old bad tables and the new good tables.
+  At best we could make our own renamed sections that contain all of the data
+  we need.</p>
+
+<p>These tables are also insufficient for what a debugger like LLDB needs.
+  LLDB uses clang for its expression parsing where LLDB acts as a PCH. LLDB is
+  then often asked to look for type "foo" or namespace "bar", or list items in
+  namespace "baz". Namespaces are not included in the pubnames or pubtypes
+  tables. Since clang asks a lot of questions when it is parsing an expression,
+  we need to be very fast when looking up names, as it happens a lot. Having new
+  accelerator tables that are optimized for very quick lookups will benefit
+  this type of debugging experience greatly.</p>
+
+<p>We would like to generate name lookup tables that can be mapped into
+  memory from disk, and used as is, with little or no up-front parsing. We would
+  also be able to control the exact content of these different tables so they
+  contain exactly what we need. The Name Accelerator Tables were designed
+  to fix these issues. In order to solve these issues we need to:</p>
+
+<ul>
+  <li>Have a format that can be mapped into memory from disk and used as is</li>
+  <li>Lookups should be very fast</li>
+  <li>Extensible table format so these tables can be made by many producers</li>
+  <li>Contain all of the names needed for typical lookups out of the box</li>
+  <li>Strict rules for the contents of tables</li>
+</ul>
+
+<p>Table size is important and the accelerator table format should allow the
+  reuse of strings from common string tables so the strings for the names are
+  not duplicated. We also want to make sure the table is ready to be used as-is
+  by simply mapping the table into memory with minimal header parsing.</p>
+
+<p>The name lookups need to be fast and optimized for the kinds of lookups
+  that debuggers tend to do. Optimally we would like to touch as few parts of
+  the mapped table as possible when doing a name lookup and be able to quickly
+  find the name entry we are looking for, or discover there are no matches. In
+  the case of debuggers we optimized for lookups that fail most of the time.</p>
+
+<p>Each table that is defined should have strict rules on exactly what is in
+  the accelerator tables and documented so clients can rely on the content.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<h4>
+  <a name="acceltablehashes">Hash Tables</a>
+</h4>
+<!-- ======================================================================= -->
+
+<div>
+<h5>Standard Hash Tables</h5>
+
+<p>Typical hash tables have a header, buckets, and each bucket points to the
+bucket contents:
+</p>
+
+<div class="doc_code">
+<pre>
+.------------.
+|  HEADER    |
+|------------|
+|  BUCKETS   |
+|------------|
+|  DATA      |
+`------------'
+</pre>
+</div>
+
+<p>The BUCKETS are an array of offsets to DATA for each hash:</p>
+
+<div class="doc_code">
+<pre>
+.------------.
+| 0x00001000 | BUCKETS[0]
+| 0x00002000 | BUCKETS[1]
+| 0x00002200 | BUCKETS[2]
+| 0x000034f0 | BUCKETS[3]
+|            | ...
+| 0xXXXXXXXX | BUCKETS[n_buckets]
+'------------'
+</pre>
+</div>
+
+<p>So for bucket[3] in the example above, we have an offset into the table
+  0x000034f0 which points to a chain of entries for the bucket. Each bucket
+  must contain a next pointer, full 32 bit hash value, the string itself,
+  and the data for the current string value.</p>
+
+<div class="doc_code">
+<pre>
+            .------------.
+0x000034f0: | 0x00003500 | next pointer
+            | 0x12345678 | 32 bit hash
+            | "erase"    | string value
+            | data[n]    | HashData for this bucket
+            |------------|
+0x00003500: | 0x00003550 | next pointer
+            | 0x29273623 | 32 bit hash
+            | "dump"     | string value
+            | data[n]    | HashData for this bucket
+            |------------|
+0x00003550: | 0x00000000 | next pointer
+            | 0x82638293 | 32 bit hash
+            | "main"     | string value
+            | data[n]    | HashData for this bucket
+            `------------'
+</pre>
+</div>
+
+<p>The problem with this layout for debuggers is that we need to optimize for
+  the negative lookup case where the symbol we're searching for is not present.
+  So if we were to lookup "printf" in the table above, we would make a 32 hash
+  for "printf", it might match bucket[3]. We would need to go to the offset
+  0x000034f0 and start looking to see if our 32 bit hash matches. To do so, we
+  need to read the next pointer, then read the hash, compare it, and skip to
+  the next bucket. Each time we are skipping many bytes in memory and touching
+  new cache pages just to do the compare on the full 32 bit hash. All of these
+  accesses then tell us that we didn't have a match.</p>
+
+<h5>Name Hash Tables</h5>
+
+<p>To solve the issues mentioned above we have structured the hash tables
+  a bit differently: a header, buckets, an array of all unique 32 bit hash
+  values, followed by an array of hash value data offsets, one for each hash
+  value, then the data for all hash values:</p>
+
+<div class="doc_code">
+<pre>
+.-------------.
+|  HEADER     |
+|-------------|
+|  BUCKETS    |
+|-------------|
+|  HASHES     |
+|-------------|
+|  OFFSETS    |
+|-------------|
+|  DATA       |
+`-------------'
+</pre>
+</div>
+
+<p>The BUCKETS in the name tables are an index into the HASHES array. By
+  making all of the full 32 bit hash values contiguous in memory, we allow
+  ourselves to efficiently check for a match while touching as little
+  memory as possible. Most often checking the 32 bit hash values is as far as
+  the lookup goes. If it does match, it usually is a match with no collisions.
+  So for a table with "n_buckets" buckets, and "n_hashes" unique 32 bit hash
+  values, we can clarify the contents of the BUCKETS, HASHES and OFFSETS as:</p>
+
+<div class="doc_code">
+<pre>
+.-------------------------.
+|  HEADER.magic           | uint32_t
+|  HEADER.version         | uint16_t
+|  HEADER.hash_function   | uint16_t
+|  HEADER.bucket_count    | uint32_t
+|  HEADER.hashes_count    | uint32_t
+|  HEADER.header_data_len | uint32_t
+|  HEADER_DATA            | HeaderData
+|-------------------------|
+|  BUCKETS                | uint32_t[n_buckets] // 32 bit hash indexes
+|-------------------------|
+|  HASHES                 | uint32_t[n_buckets] // 32 bit hash values
+|-------------------------|
+|  OFFSETS                | uint32_t[n_buckets] // 32 bit offsets to hash value data
+|-------------------------|
+|  ALL HASH DATA          |
+`-------------------------'
+</pre>
+</div>
+
+<p>So taking the exact same data from the standard hash example above we end up
+  with:</p>
+
+<div class="doc_code">
+<pre>
+            .------------.
+            | HEADER     |
+            |------------|
+            |          0 | BUCKETS[0]
+            |          2 | BUCKETS[1]
+            |          5 | BUCKETS[2]
+            |          6 | BUCKETS[3]
+            |            | ...
+            |        ... | BUCKETS[n_buckets]
+            |------------|
+            | 0x........ | HASHES[0]
+            | 0x........ | HASHES[1]
+            | 0x........ | HASHES[2]
+            | 0x........ | HASHES[3]
+            | 0x........ | HASHES[4]
+            | 0x........ | HASHES[5]
+            | 0x12345678 | HASHES[6]    hash for BUCKETS[3]
+            | 0x29273623 | HASHES[7]    hash for BUCKETS[3]
+            | 0x82638293 | HASHES[8]    hash for BUCKETS[3]
+            | 0x........ | HASHES[9]
+            | 0x........ | HASHES[10]
+            | 0x........ | HASHES[11]
+            | 0x........ | HASHES[12]
+            | 0x........ | HASHES[13]
+            | 0x........ | HASHES[n_hashes]
+            |------------|
+            | 0x........ | OFFSETS[0]
+            | 0x........ | OFFSETS[1]
+            | 0x........ | OFFSETS[2]
+            | 0x........ | OFFSETS[3]
+            | 0x........ | OFFSETS[4]
+            | 0x........ | OFFSETS[5]
+            | 0x000034f0 | OFFSETS[6]   offset for BUCKETS[3]
+            | 0x00003500 | OFFSETS[7]   offset for BUCKETS[3]
+            | 0x00003550 | OFFSETS[8]   offset for BUCKETS[3]
+            | 0x........ | OFFSETS[9]
+            | 0x........ | OFFSETS[10]
+            | 0x........ | OFFSETS[11]
+            | 0x........ | OFFSETS[12]
+            | 0x........ | OFFSETS[13]
+            | 0x........ | OFFSETS[n_hashes]
+            |------------|
+            |            |
+            |            |
+            |            |
+            |            |
+            |            |
+            |------------|
+0x000034f0: | 0x00001203 | .debug_str ("erase")
+            | 0x00000004 | A 32 bit array count - number of HashData with name "erase"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x........ | HashData[2]
+            | 0x........ | HashData[3]
+            | 0x00000000 | String offset into .debug_str (terminate data for hash)
+            |------------|
+0x00003500: | 0x00001203 | String offset into .debug_str ("collision")
+            | 0x00000002 | A 32 bit array count - number of HashData with name "collision"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x00001203 | String offset into .debug_str ("dump")
+            | 0x00000003 | A 32 bit array count - number of HashData with name "dump"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x........ | HashData[2]
+            | 0x00000000 | String offset into .debug_str (terminate data for hash)
+            |------------|
+0x00003550: | 0x00001203 | String offset into .debug_str ("main")
+            | 0x00000009 | A 32 bit array count - number of HashData with name "main"
+            | 0x........ | HashData[0]
+            | 0x........ | HashData[1]
+            | 0x........ | HashData[2]
+            | 0x........ | HashData[3]
+            | 0x........ | HashData[4]
+            | 0x........ | HashData[5]
+            | 0x........ | HashData[6]
+            | 0x........ | HashData[7]
+            | 0x........ | HashData[8]
+            | 0x00000000 | String offset into .debug_str (terminate data for hash)
+            `------------'
+</pre>
+</div>
+
+<p>So we still have all of the same data, we just organize it more efficiently
+  for debugger lookup. If we repeat the same "printf" lookup from above, we
+  would hash "printf" and find it matches BUCKETS[3] by taking the 32 bit hash
+  value and modulo it by n_buckets. BUCKETS[3] contains "6" which is the index
+  into the HASHES table. We would then compare any consecutive 32 bit hashes
+  values in the HASHES array as long as the hashes would be in BUCKETS[3]. We
+  do this by verifying that each subsequent hash value modulo n_buckets is still
+  3. In the case of a failed lookup we would access the memory for BUCKETS[3], and
+  then compare a few consecutive 32 bit hashes before we know that we have no match.
+  We don't end up marching through multiple words of memory and we really keep the
+  number of processor data cache lines being accessed as small as possible.</p>
+
+<p>The string hash that is used for these lookup tables is the Daniel J.
+  Bernstein hash which is also used in the ELF GNU_HASH sections. It is a very
+  good hash for all kinds of names in programs with very few hash collisions.</p>
+
+<p>Empty buckets are designated by using an invalid hash index of UINT32_MAX.</p>
+</div>
+
+<!-- ======================================================================= -->
+<h4>
+  <a name="acceltabledetails">Details</a>
+</h4>
+<!-- ======================================================================= -->
+<div>
+<p>These name hash tables are designed to be generic where specializations of
+  the table get to define additional data that goes into the header
+  ("HeaderData"), how the string value is stored ("KeyType") and the content
+  of the data for each hash value.</p>
+
+<h5>Header Layout</h5>
+<p>The header has a fixed part, and the specialized part. The exact format of
+  the header is:</p>
+<div class="doc_code">
+<pre>
+struct Header
+{
+  uint32_t   magic;           // 'HASH' magic value to allow endian detection
+  uint16_t   version;         // Version number
+  uint16_t   hash_function;   // The hash function enumeration that was used
+  uint32_t   bucket_count;    // The number of buckets in this hash table
+  uint32_t   hashes_count;    // The total number of unique hash values and hash data offsets in this table
+  uint32_t   header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment
+                              // Specifically the length of the following HeaderData field - this does not
+                              // include the size of the preceding fields
+  HeaderData header_data;     // Implementation specific header data
+};
+</pre>
+</div>
+<p>The header starts with a 32 bit "magic" value which must be 'HASH' encoded as
+  an ASCII integer. This allows the detection of the start of the hash table and
+  also allows the table's byte order to be determined so the table can be
+  correctly extracted. The "magic" value is followed by a 16 bit version number
+  which allows the table to be revised and modified in the future. The current
+  version number is 1. "hash_function" is a uint16_t enumeration that specifies
+  which hash function was used to produce this table. The current values for the
+  hash function enumerations include:</p>
+<div class="doc_code">
+<pre>
+enum HashFunctionType
+{
+  eHashFunctionDJB = 0u, // Daniel J Bernstein hash function
+};
+</pre>
+</div>
+<p>"bucket_count" is a 32 bit unsigned integer that represents how many buckets
+  are in the BUCKETS array. "hashes_count" is the number of unique 32 bit hash
+  values that are in the HASHES array, and is the same number of offsets are
+  contained in the OFFSETS array. "header_data_len" specifies the size in
+  bytes of the HeaderData that is filled in by specialized versions of this
+  table.</p>
+
+<h5>Fixed Lookup</h5>
+<p>The header is followed by the buckets, hashes, offsets, and hash value
+  data.
+<div class="doc_code">
+<pre>
+struct FixedTable
+{
+  uint32_t buckets[Header.bucket_count];  // An array of hash indexes into the "hashes[]" array below
+  uint32_t hashes [Header.hashes_count];  // Every unique 32 bit hash for the entire table is in this table
+  uint32_t offsets[Header.hashes_count];  // An offset that corresponds to each item in the "hashes[]" array above
+};
+</pre>
+</div>
+<p>"buckets" is an array of 32 bit indexes into the "hashes" array. The
+  "hashes" array contains all of the 32 bit hash values for all names in the
+  hash table. Each hash in the "hashes" table has an offset in the "offsets"
+  array that points to the data for the hash value.</p>
+
+<p>This table setup makes it very easy to repurpose these tables to contain
+  different data, while keeping the lookup mechanism the same for all tables.
+  This layout also makes it possible to save the table to disk and map it in
+  later and do very efficient name lookups with little or no parsing.</p>
+
+<p>DWARF lookup tables can be implemented in a variety of ways and can store
+  a lot of information for each name. We want to make the DWARF tables
+  extensible and able to store the data efficiently so we have used some of the
+  DWARF features that enable efficient data storage to define exactly what kind
+  of data we store for each name.</p>
+
+<p>The "HeaderData" contains a definition of the contents of each HashData
+  chunk. We might want to store an offset to all of the debug information
+  entries (DIEs) for each name. To keep things extensible, we create a list of
+  items, or Atoms, that are contained in the data for each name. First comes the
+  type of the data in each atom:</p>
+<div class="doc_code">
+<pre>
+enum AtomType
+{
+  eAtomTypeNULL       = 0u,
+  eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
+  eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that contains the item in question
+  eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2
+  eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
+  eAtomTypeTypeFlags  = 5u,   // Flags from enum TypeFlags
+};
+</pre>
+</div>
+<p>The enumeration values and their meanings are:</p>
+<div class="doc_code">
+<pre>
+  eAtomTypeNULL       - a termination atom that specifies the end of the atom list
+  eAtomTypeDIEOffset  - an offset into the .debug_info section for the DWARF DIE for this name
+  eAtomTypeCUOffset   - an offset into the .debug_info section for the CU that contains the DIE
+  eAtomTypeDIETag     - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is
+  eAtomTypeNameFlags  - Flags for functions and global variables (isFunction, isInlined, isExternal...)
+  eAtomTypeTypeFlags  - Flags for types (isCXXClass, isObjCClass, ...)
+</pre>
+</div>
+<p>Then we allow each atom type to define the atom type and how the data for
+  each atom type data is encoded:</p>
+<div class="doc_code">
+<pre>
+struct Atom
+{
+  uint16_t type;  // AtomType enum value
+  uint16_t form;  // DWARF DW_FORM_XXX defines
+};
+</pre>
+</div>
+<p>The "form" type above is from the DWARF specification and defines the
+  exact encoding of the data for the Atom type. See the DWARF specification for
+  the DW_FORM_ definitions.</p>
+<div class="doc_code">
+<pre>
+struct HeaderData
+{
+  uint32_t die_offset_base;
+  uint32_t atom_count;
+  Atoms    atoms[atom_count0];
+};
+</pre>
+</div>
+<p>"HeaderData" defines the base DIE offset that should be added to any atoms
+  that are encoded using the DW_FORM_ref1, DW_FORM_ref2, DW_FORM_ref4,
+  DW_FORM_ref8 or DW_FORM_ref_udata. It also defines what is contained in
+  each "HashData" object -- Atom.form tells us how large each field will be in
+  the HashData and the Atom.type tells us how this data should be interpreted.</p>
+
+<p>For the current implementations of the ".apple_names" (all functions + globals),
+  the ".apple_types" (names of all types that are defined), and the
+  ".apple_namespaces" (all namespaces), we currently set the Atom array to be:</p>
+<div class="doc_code">
+<pre>
+HeaderData.atom_count = 1;
+HeaderData.atoms[0].type = eAtomTypeDIEOffset;
+HeaderData.atoms[0].form = DW_FORM_data4;
+</pre>
+</div>
+<p>This defines the contents to be the DIE offset (eAtomTypeDIEOffset) that is
+  encoded as a 32 bit value (DW_FORM_data4). This allows a single name to have
+  multiple matching DIEs in a single file, which could come up with an inlined
+  function for instance. Future tables could include more information about the
+  DIE such as flags indicating if the DIE is a function, method, block,
+  or inlined.</p>
+
+<p>The KeyType for the DWARF table is a 32 bit string table offset into the
+  ".debug_str" table. The ".debug_str" is the string table for the DWARF which
+  may already contain copies of all of the strings. This helps make sure, with
+  help from the compiler, that we reuse the strings between all of the DWARF
+  sections and keeps the hash table size down. Another benefit to having the
+  compiler generate all strings as DW_FORM_strp in the debug info, is that
+  DWARF parsing can be made much faster.</p>
+
+<p>After a lookup is made, we get an offset into the hash data. The hash data
+  needs to be able to deal with 32 bit hash collisions, so the chunk of data
+  at the offset in the hash data consists of a triple:</p>
+<div class="doc_code">
+<pre>
+uint32_t str_offset
+uint32_t hash_data_count
+HashData[hash_data_count]
+</pre>
+</div>
+<p>If "str_offset" is zero, then the bucket contents are done. 99.9% of the
+  hash data chunks contain a single item (no 32 bit hash collision):</p>
+<div class="doc_code">
+<pre>
+.------------.
+| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
+| 0x00000004 | uint32_t HashData count
+| 0x........ | uint32_t HashData[0] DIE offset
+| 0x........ | uint32_t HashData[1] DIE offset
+| 0x........ | uint32_t HashData[2] DIE offset
+| 0x........ | uint32_t HashData[3] DIE offset
+| 0x00000000 | uint32_t KeyType (end of hash chain)
+`------------'
+</pre>
+</div>
+<p>If there are collisions, you will have multiple valid string offsets:</p>
+<div class="doc_code">
+<pre>
+.------------.
+| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main")
+| 0x00000004 | uint32_t HashData count
+| 0x........ | uint32_t HashData[0] DIE offset
+| 0x........ | uint32_t HashData[1] DIE offset
+| 0x........ | uint32_t HashData[2] DIE offset
+| 0x........ | uint32_t HashData[3] DIE offset
+| 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print")
+| 0x00000002 | uint32_t HashData count
+| 0x........ | uint32_t HashData[0] DIE offset
+| 0x........ | uint32_t HashData[1] DIE offset
+| 0x00000000 | uint32_t KeyType (end of hash chain)
+`------------'
+</pre>
+</div>
+<p>Current testing with real world C++ binaries has shown that there is around 1
+  32 bit hash collision per 100,000 name entries.</p>
+</div>
+<!-- ======================================================================= -->
+<h4>
+  <a name="acceltablecontents">Contents</a>
+</h4>
+<!-- ======================================================================= -->
+<div>
+<p>As we said, we want to strictly define exactly what is included in the
+  different tables. For DWARF, we have 3 tables: ".apple_names", ".apple_types",
+  and ".apple_namespaces".</p>
+
+<p>".apple_names" sections should contain an entry for each DWARF DIE whose
+  DW_TAG is a DW_TAG_label, DW_TAG_inlined_subroutine, or DW_TAG_subprogram that
+  has address attributes: DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges or
+  DW_AT_entry_pc. It also contains DW_TAG_variable DIEs that have a DW_OP_addr
+  in the location (global and static variables). All global and static variables
+  should be included, including those scoped withing functions and classes. For
+  example using the following code:</p>
+<div class="doc_code">
+<pre>
+static int var = 0;
+
+void f ()
+{
+  static int var = 0;
+}
+</pre>
+</div>
+<p>Both of the static "var" variables would be included in the table. All
+  functions should emit both their full names and their basenames. For C or C++,
+  the full name is the mangled name (if available) which is usually in the
+  DW_AT_MIPS_linkage_name attribute, and the DW_AT_name contains the function
+  basename. If global or static variables have a mangled name in a
+  DW_AT_MIPS_linkage_name attribute, this should be emitted along with the
+  simple name found in the DW_AT_name attribute.</p>
+
+<p>".apple_types" sections should contain an entry for each DWARF DIE whose
+  tag is one of:</p>
+<ul>
+  <li>DW_TAG_array_type</li>
+  <li>DW_TAG_class_type</li>
+  <li>DW_TAG_enumeration_type</li>
+  <li>DW_TAG_pointer_type</li>
+  <li>DW_TAG_reference_type</li>
+  <li>DW_TAG_string_type</li>
+  <li>DW_TAG_structure_type</li>
+  <li>DW_TAG_subroutine_type</li>
+  <li>DW_TAG_typedef</li>
+  <li>DW_TAG_union_type</li>
+  <li>DW_TAG_ptr_to_member_type</li>
+  <li>DW_TAG_set_type</li>
+  <li>DW_TAG_subrange_type</li>
+  <li>DW_TAG_base_type</li>
+  <li>DW_TAG_const_type</li>
+  <li>DW_TAG_constant</li>
+  <li>DW_TAG_file_type</li>
+  <li>DW_TAG_namelist</li>
+  <li>DW_TAG_packed_type</li>
+  <li>DW_TAG_volatile_type</li>
+  <li>DW_TAG_restrict_type</li>
+  <li>DW_TAG_interface_type</li>
+  <li>DW_TAG_unspecified_type</li>
+  <li>DW_TAG_shared_type</li>
+</ul>
+<p>Only entries with a DW_AT_name attribute are included, and the entry must
+  not be a forward declaration (DW_AT_declaration attribute with a non-zero value).
+  For example, using the following code:</p>
+<div class="doc_code">
+<pre>
+int main ()
+{
+  int *b = 0;
+  return *b;
+}
+</pre>
+</div>
+<p>We get a few type DIEs:</p>
+<div class="doc_code">
+<pre>
+0x00000067:     TAG_base_type [5]
+                AT_encoding( DW_ATE_signed )
+                AT_name( "int" )
+                AT_byte_size( 0x04 )
+
+0x0000006e:     TAG_pointer_type [6]
+                AT_type( {0x00000067} ( int ) )
+                AT_byte_size( 0x08 )
+</pre>
+</div>
+<p>The DW_TAG_pointer_type is not included because it does not have a DW_AT_name.</p>
+
+<p>".apple_namespaces" section should contain all DW_TAG_namespace DIEs. If
+  we run into a namespace that has no name this is an anonymous namespace,
+  and the name should be output as "(anonymous namespace)" (without the quotes).
+  Why? This matches the output of the abi::cxa_demangle() that is in the standard
+  C++ library that demangles mangled names.</p>
+</div>
+
+<!-- ======================================================================= -->
+<h4>
+  <a name="acceltableextensions">Language Extensions and File Format Changes</a>
+</h4>
+<!-- ======================================================================= -->
+<div>
+<h5>Objective-C Extensions</h5>
+<p>".apple_objc" section should contain all DW_TAG_subprogram DIEs for an
+  Objective-C class. The name used in the hash table is the name of the
+  Objective-C class itself. If the Objective-C class has a category, then an
+  entry is made for both the class name without the category, and for the class
+  name with the category. So if we have a DIE at offset 0x1234 with a name
+  of method "-[NSString(my_additions) stringWithSpecialString:]", we would add
+  an entry for "NSString" that points to DIE 0x1234, and an entry for
+  "NSString(my_additions)" that points to 0x1234. This allows us to quickly
+  track down all Objective-C methods for an Objective-C class when doing
+  expressions. It is needed because of the dynamic nature of Objective-C where
+  anyone can add methods to a class. The DWARF for Objective-C methods is also
+  emitted differently from C++ classes where the methods are not usually
+  contained in the class definition, they are scattered about across one or more
+  compile units. Categories can also be defined in different shared libraries.
+  So we need to be able to quickly find all of the methods and class functions
+  given the Objective-C class name, or quickly find all methods and class
+  functions for a class + category name. This table does not contain any selector
+  names, it just maps Objective-C class names (or class names + category) to all
+  of the methods and class functions. The selectors are added as function
+  basenames in the .debug_names section.</p>
+
+<p>In the ".apple_names" section for Objective-C functions, the full name is the
+  entire function name with the brackets ("-[NSString stringWithCString:]") and the
+  basename is the selector only ("stringWithCString:").</p>
+
+<h5>Mach-O Changes</h5>
+<p>The sections names for the apple hash tables are for non mach-o files. For
+  mach-o files, the sections should be contained in the "__DWARF" segment with
+  names as follows:</p>
+<ul>
+  <li>".apple_names" -> "__apple_names"</li>
+  <li>".apple_types" -> "__apple_types"</li>
+  <li>".apple_namespaces" -> "__apple_namespac" (16 character limit)</li>
+  <li> ".apple_objc" -> "__apple_objc"</li>
+</ul>
+</div>
+</div>
+</div>
+
 <!-- *********************************************************************** -->
 
 <hr>
@@ -1814,7 +2855,7 @@ enum Trees {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-10-12 00:59:11 +0200 (Wed, 12 Oct 2011) $
+  Last modified: $Date: 2012-04-03 02:43:49 +0200 (Tue, 03 Apr 2012) $
 </address>
 
 </body>
diff --git a/docs/SystemLibrary.html b/docs/SystemLibrary.html
index 24c4dc533182..7cafedfe77cd 100644
--- a/docs/SystemLibrary.html
+++ b/docs/SystemLibrary.html
@@ -310,7 +310,7 @@
 
   <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2011-10-31 12:21:59 +0100 (Mon, 31 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html
index 92b90e687bbf..a2113895fe3d 100644
--- a/docs/TableGenFundamentals.html
+++ b/docs/TableGenFundamentals.html
@@ -37,6 +37,7 @@
     <ol>
       <li><a href="#include">File inclusion</a></li>
       <li><a href="#globallet">'let' expressions</a></li>
+      <li><a href="#foreach">'foreach' blocks</a></li>
     </ol></li>
   </ol></li>
   <li><a href="#backends">TableGen backends</a>
@@ -208,6 +209,14 @@ file, to factor out the common features that instructions of its class share.  A
 key feature of TableGen is that it allows the end-user to define the
 abstractions they prefer to use when describing their information.</p>
 
+<p>Each def record has a special entry called "NAME."  This is the
+name of the def ("ADD32rr" above).  In the general case def names can
+be formed from various kinds of string processing expressions and NAME
+resolves to the final value obtained after resolving all of those
+expressions.  The user may refer to NAME anywhere she desires to use
+the ultimate name of the def.  NAME should not be defined anywhere
+else in user code to avoid conflict problems.</p>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -393,6 +402,14 @@ which case the user must specify it explicitly.</dd>
 <dt><tt>list[4-7,17,2-3]</tt></dt>
   <dd>A slice of the 'list' list, including elements 4,5,6,7,17,2, and 3 from
   it.  Elements may be included multiple times.</dd>
+<dt><tt>foreach &lt;var&gt; = &lt;list&gt; in { &lt;body&gt; }</tt></dt>
+<dt><tt>foreach &lt;var&gt; = &lt;list&gt; in &lt;def&gt;</tt></dt>
+  <dd> Replicate &lt;body&gt; or &lt;def&gt;, replacing instances of
+  &lt;var&gt; with each value in &lt;list&gt;.  &lt;var&gt; is scoped at the
+  level of the <tt>foreach</tt> loop and must not conflict with any other object
+  introduced in &lt;body&gt; or &lt;def&gt;.  Currently only <tt>def</tt>s are
+  expanded within &lt;body&gt;.
+  </dd>
 <dt><tt>(DEF a, b)</tt></dt>
   <dd>a dag value.  The first element is required to be a record definition, the
   remaining elements in the list may be arbitrary other values, including nested
@@ -400,6 +417,10 @@ which case the user must specify it explicitly.</dd>
 <dt><tt>!strconcat(a, b)</tt></dt>
   <dd>A string value that is the result of concatenating the 'a' and 'b'
   strings.</dd>
+<dt><tt>str1#str2</tt></dt>
+  <dd>"#" (paste) is a shorthand for !strconcat.  It may concatenate
+  things that are not quoted strings, in which case an implicit
+  !cast&lt;string&gt; is done on the operand of the paste.</dd>
 <dt><tt>!cast&lt;type&gt;(a)</tt></dt>
   <dd>A symbol of type <em>type</em> obtained by looking up the string 'a' in
 the symbol table.  If the type of 'a' does not match <em>type</em>, TableGen
@@ -868,6 +889,39 @@ several levels of multiclass instanciations. This also avoids the need of using
 </pre>
 </div>
 
+<!-- -------------------------------------------------------------------------->
+<h4>
+  <a name="foreach">Looping</a>
+</h4>
+
+<div>
+<p>TableGen supports the '<tt>foreach</tt>' block, which textually replicates
+the loop body, substituting iterator values for iterator references in the
+body.  Example:</p>
+
+<div class="doc_code">
+<pre>
+<b>foreach</b> i = [0, 1, 2, 3] in {
+  <b>def</b> R#i : Register&lt;...&gt;;
+  <b>def</b> F#i : Register&lt;...&gt;;
+}
+</pre>
+</div>
+
+<p>This will create objects <tt>R0</tt>, <tt>R1</tt>, <tt>R2</tt> and
+<tt>R3</tt>.  <tt>foreach</tt> blocks may be nested. If there is only
+one item in the body the braces may be elided:</p>
+
+<div class="doc_code">
+<pre>
+<b>foreach</b> i = [0, 1, 2, 3] in
+  <b>def</b> R#i : Register&lt;...&gt;;
+
+</pre>
+</div>
+
+</div>
+
 </div>
 
 </div>
@@ -912,7 +966,7 @@ This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2012-03-27 13:25:16 +0200 (Tue, 27 Mar 2012) $
 </address>
 
 </body>
diff --git a/docs/TestSuiteMakefileGuide.html b/docs/TestSuiteMakefileGuide.html
new file mode 100644
index 000000000000..876fe426cf5e
--- /dev/null
+++ b/docs/TestSuiteMakefileGuide.html
@@ -0,0 +1,351 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>LLVM test-suite Makefile Guide</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+      
+<h1>
+  LLVM test-suite Makefile Guide
+</h1>
+
+<ol>
+  <li><a href="#overview">Overview</a></li>
+  <li><a href="#testsuitestructure">Test suite structure</a></li>
+  <li><a href="#testsuiterun">Running the test suite</a>
+    <ul>
+      <li><a href="#testsuiteexternal">Configuring External Tests</a></li>
+      <li><a href="#testsuitetests">Running different tests</a></li>
+      <li><a href="#testsuiteoutput">Generating test output</a></li>
+      <li><a href="#testsuitecustom">Writing custom tests for test-suite</a></li>
+   </ul>
+  </li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner</p>
+</div>
+
+<!--=========================================================================-->
+<h2><a name="overview">Overview</a></h2>
+<!--=========================================================================-->
+
+<div>
+
+<p>This document describes the features of the Makefile-based LLVM
+test-suite. This way of interacting with the test-suite is deprecated in favor
+of running the test-suite using LNT, but may continue to prove useful for some
+users. See the Testing
+Guide's <a href="TestingGuide.html#testsuitequickstart">test-suite
+Quickstart</a> section for more information.</p>
+
+</div>
+
+<!--=========================================================================-->
+<h2><a name="testsuitestructure">Test suite Structure</a></h2>
+<!--=========================================================================-->
+
+<div>
+
+<p>The <tt>test-suite</tt> module contains a number of programs that can be compiled 
+with LLVM and executed. These programs are compiled using the native compiler
+and various LLVM backends. The output from the program compiled with the 
+native compiler is assumed correct; the results from the other programs are
+compared to the native program output and pass if they match.</p>
+
+<p>When executing tests, it is usually a good idea to start out with a subset of
+the available tests or programs. This makes test run times smaller at first and
+later on this is useful to investigate individual test failures. To run some
+test only on a subset of programs, simply change directory to the programs you
+want tested and run <tt>gmake</tt> there. Alternatively, you can run a different
+test using the <tt>TEST</tt> variable to change what tests or run on the
+selected programs (see below for more info).</p>
+
+<p>In addition for testing correctness, the <tt>test-suite</tt> directory also
+performs timing tests of various LLVM optimizations.  It also records
+compilation times for the compilers and the JIT.  This information can be
+used to compare the effectiveness of LLVM's optimizations and code
+generation.</p>
+
+<p><tt>test-suite</tt> tests are divided into three types of tests: MultiSource,
+SingleSource, and External.</p> 
+
+<ul>
+<li><tt>test-suite/SingleSource</tt>
+<p>The SingleSource directory contains test programs that are only a single 
+source file in size.  These are usually small benchmark programs or small 
+programs that calculate a particular value.  Several such programs are grouped 
+together in each directory.</p></li>
+
+<li><tt>test-suite/MultiSource</tt>
+<p>The MultiSource directory contains subdirectories which contain entire 
+programs with multiple source files.  Large benchmarks and whole applications 
+go here.</p></li>
+
+<li><tt>test-suite/External</tt>
+<p>The External directory contains Makefiles for building code that is external
+to (i.e., not distributed with) LLVM.  The most prominent members of this
+directory are the SPEC 95 and SPEC 2000 benchmark suites. The <tt>External</tt>
+directory does not contain these actual tests, but only the Makefiles that know
+how to properly compile these programs from somewhere else. The presence and
+location of these external programs is configured by the test-suite
+<tt>configure</tt> script.</p></li>
+</ul>
+
+<p>Each tree is then subdivided into several categories, including applications,
+benchmarks, regression tests, code that is strange grammatically, etc.  These
+organizations should be relatively self explanatory.</p>
+
+<p>Some tests are known to fail.  Some are bugs that we have not fixed yet;
+others are features that we haven't added yet (or may never add).  In the
+regression tests, the result for such tests will be XFAIL (eXpected FAILure).
+In this way, you can tell the difference between an expected and unexpected
+failure.</p>
+
+<p>The tests in the test suite have no such feature at this time. If the
+test passes, only warnings and other miscellaneous output will be generated.  If
+a test fails, a large &lt;program&gt; FAILED message will be displayed.  This
+will help you separate benign warnings from actual test failures.</p>
+
+</div>
+
+<!--=========================================================================-->
+<h2><a name="testsuiterun">Running the test suite</a></h2>
+<!--=========================================================================-->
+
+<div>
+
+<p>First, all tests are executed within the LLVM object directory tree.  They
+<i>are not</i> executed inside of the LLVM source tree. This is because the
+test suite creates temporary files during execution.</p>
+
+<p>To run the test suite, you need to use the following steps:</p>
+
+<ol>
+  <li><tt>cd</tt> into the <tt>llvm/projects</tt> directory in your source tree.
+  </li>
+
+  <li><p>Check out the <tt>test-suite</tt> module with:</p>
+
+<div class="doc_code">
+<pre>
+% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
+</pre>
+</div>
+    <p>This will get the test suite into <tt>llvm/projects/test-suite</tt>.</p>
+  </li>
+  <li><p>Configure and build <tt>llvm</tt>.</p></li>
+  <li><p>Configure and build <tt>llvm-gcc</tt>.</p></li>
+  <li><p>Install <tt>llvm-gcc</tt> somewhere.</p></li>
+  <li><p><em>Re-configure</em> <tt>llvm</tt> from the top level of
+      each build tree (LLVM object directory tree) in which you want
+      to run the test suite, just as you do before building LLVM.</p>
+    <p>During the <em>re-configuration</em>, you must either: (1)
+      have <tt>llvm-gcc</tt> you just built in your path, or (2)
+      specify the directory where your just-built <tt>llvm-gcc</tt> is
+      installed using <tt>--with-llvmgccdir=$LLVM_GCC_DIR</tt>.</p>
+    <p>You must also tell the configure machinery that the test suite
+      is available so it can be configured for your build tree:</p>
+<div class="doc_code">
+<pre>
+% cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
+</pre>
+</div>
+    <p>[Remember that <tt>$LLVM_GCC_DIR</tt> is the directory where you
+    <em>installed</em> llvm-gcc, not its src or obj directory.]</p>
+  </li>
+
+  <li><p>You can now run the test suite from your build tree as follows:</p>
+<div class="doc_code">
+<pre>
+% cd $LLVM_OBJ_ROOT/projects/test-suite
+% make
+</pre>
+</div>
+  </li>
+</ol>
+<p>Note that the second and third steps only need to be done once. After you
+have the suite checked out and configured, you don't need to do it again (unless
+the test code or configure script changes).</p>
+
+<!-- _______________________________________________________________________ -->
+<h3>
+  <a name="testsuiteexternal">Configuring External Tests</a>
+</h3>
+<!-- _______________________________________________________________________ -->
+
+<div>
+<p>In order to run the External tests in the <tt>test-suite</tt>
+  module, you must specify <i>--with-externals</i>.  This
+  must be done during the <em>re-configuration</em> step (see above),
+  and the <tt>llvm</tt> re-configuration must recognize the
+  previously-built <tt>llvm-gcc</tt>.  If any of these is missing or
+  neglected, the External tests won't work.</p>
+<dl>
+<dt><i>--with-externals</i></dt>
+<dt><i>--with-externals=&lt;<tt>directory</tt>&gt;</i></dt>
+</dl>
+  This tells LLVM where to find any external tests.  They are expected to be
+  in specifically named subdirectories of &lt;<tt>directory</tt>&gt;.
+  If <tt>directory</tt> is left unspecified,
+  <tt>configure</tt> uses the default value
+  <tt>/home/vadve/shared/benchmarks/speccpu2000/benchspec</tt>.
+  Subdirectory names known to LLVM include:
+  <dl>
+  <dt>spec95</dt>
+  <dt>speccpu2000</dt>
+  <dt>speccpu2006</dt>
+  <dt>povray31</dt>
+  </dl>
+  Others are added from time to time, and can be determined from 
+  <tt>configure</tt>.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h3>
+  <a name="testsuitetests">Running different tests</a>
+</h3>
+<!-- _______________________________________________________________________ -->
+<div>
+<p>In addition to the regular "whole program" tests, the <tt>test-suite</tt>
+module also provides a mechanism for compiling the programs in different ways.
+If the variable TEST is defined on the <tt>gmake</tt> command line, the test system will
+include a Makefile named <tt>TEST.&lt;value of TEST variable&gt;.Makefile</tt>.
+This Makefile can modify build rules to yield different results.</p>
+
+<p>For example, the LLVM nightly tester uses <tt>TEST.nightly.Makefile</tt> to
+create the nightly test reports.  To run the nightly tests, run <tt>gmake
+TEST=nightly</tt>.</p>
+
+<p>There are several TEST Makefiles available in the tree.  Some of them are
+designed for internal LLVM research and will not work outside of the LLVM
+research group.  They may still be valuable, however, as a guide to writing your
+own TEST Makefile for any optimization or analysis passes that you develop with
+LLVM.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h3>
+  <a name="testsuiteoutput">Generating test output</a>
+</h3>
+<!-- _______________________________________________________________________ -->
+<div>
+  <p>There are a number of ways to run the tests and generate output. The most
+  simple one is simply running <tt>gmake</tt> with no arguments. This will
+  compile and run all programs in the tree using a number of different methods
+  and compare results. Any failures are reported in the output, but are likely
+  drowned in the other output. Passes are not reported explicitely.</p>
+
+  <p>Somewhat better is running <tt>gmake TEST=sometest test</tt>, which runs
+  the specified test and usually adds per-program summaries to the output
+  (depending on which sometest you use). For example, the <tt>nightly</tt> test
+  explicitely outputs TEST-PASS or TEST-FAIL for every test after each program.
+  Though these lines are still drowned in the output, it's easy to grep the
+  output logs in the Output directories.</p>
+
+  <p>Even better are the <tt>report</tt> and <tt>report.format</tt> targets
+  (where <tt>format</tt> is one of <tt>html</tt>, <tt>csv</tt>, <tt>text</tt> or
+  <tt>graphs</tt>). The exact contents of the report are dependent on which
+  <tt>TEST</tt> you are running, but the text results are always shown at the
+  end of the run and the results are always stored in the
+  <tt>report.&lt;type&gt;.format</tt> file (when running with
+  <tt>TEST=&lt;type&gt;</tt>).
+
+  The <tt>report</tt> also generate a file called
+  <tt>report.&lt;type&gt;.raw.out</tt> containing the output of the entire test
+  run.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h3>
+  <a name="testsuitecustom">Writing custom tests for the test suite</a>
+</h3>
+<!-- _______________________________________________________________________ -->
+
+<div>
+
+<p>Assuming you can run the test suite, (e.g. "<tt>gmake TEST=nightly report</tt>"
+should work), it is really easy to run optimizations or code generator
+components against every program in the tree, collecting statistics or running
+custom checks for correctness.  At base, this is how the nightly tester works,
+it's just one example of a general framework.</p>
+
+<p>Lets say that you have an LLVM optimization pass, and you want to see how
+many times it triggers.  First thing you should do is add an LLVM
+<a href="ProgrammersManual.html#Statistic">statistic</a> to your pass, which
+will tally counts of things you care about.</p>
+
+<p>Following this, you can set up a test and a report that collects these and
+formats them for easy viewing.  This consists of two files, a
+"<tt>test-suite/TEST.XXX.Makefile</tt>" fragment (where XXX is the name of your
+test) and a "<tt>test-suite/TEST.XXX.report</tt>" file that indicates how to
+format the output into a table.  There are many example reports of various
+levels of sophistication included with the test suite, and the framework is very
+general.</p>
+
+<p>If you are interested in testing an optimization pass, check out the
+"libcalls" test as an example.  It can be run like this:<p>
+
+<div class="doc_code">
+<pre>
+% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
+% make TEST=libcalls report
+</pre>
+</div>
+
+<p>This will do a bunch of stuff, then eventually print a table like this:</p>
+
+<div class="doc_code">
+<pre>
+Name                                  | total | #exit |
+...
+FreeBench/analyzer/analyzer           | 51    | 6     | 
+FreeBench/fourinarow/fourinarow       | 1     | 1     | 
+FreeBench/neural/neural               | 19    | 9     | 
+FreeBench/pifft/pifft                 | 5     | 3     | 
+MallocBench/cfrac/cfrac               | 1     | *     | 
+MallocBench/espresso/espresso         | 52    | 12    | 
+MallocBench/gs/gs                     | 4     | *     | 
+Prolangs-C/TimberWolfMC/timberwolfmc  | 302   | *     | 
+Prolangs-C/agrep/agrep                | 33    | 12    | 
+Prolangs-C/allroots/allroots          | *     | *     | 
+Prolangs-C/assembler/assembler        | 47    | *     | 
+Prolangs-C/bison/mybison              | 74    | *     | 
+...
+</pre>
+</div>
+
+<p>This basically is grepping the -stats output and displaying it in a table.
+You can also use the "TEST=libcalls report.html" target to get the table in HTML
+form, similarly for report.csv and report.tex.</p>
+
+<p>The source for this is in test-suite/TEST.libcalls.*.  The format is pretty
+simple: the Makefile indicates how to run the test (in this case, 
+"<tt>opt -simplify-libcalls -stats</tt>"), and the report contains one line for
+each column of the output.  The first value is the header for the column and the
+second is the regex to grep the output of the command for.  There are lots of
+example reports that can do fancy stuff.</p>
+
+</div>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
+  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
index eb3714272d33..805ae776a8db 100644
--- a/docs/TestingGuide.html
+++ b/docs/TestingGuide.html
@@ -18,14 +18,13 @@
   <li><a href="#org">LLVM testing infrastructure organization</a>
     <ul>
       <li><a href="#regressiontests">Regression tests</a></li>
-      <li><a href="#testsuite">Test suite</a></li>
+      <li><a href="#testsuite"><tt>test-suite</tt></a></li>
       <li><a href="#debuginfotests">Debugging Information tests</a></li>
     </ul>
   </li>
   <li><a href="#quick">Quick start</a>
     <ul>
       <li><a href="#quickregressiontests">Regression tests</a></li>
-      <li><a href="#quicktestsuite">Test suite</a></li>
       <li><a href="#quickdebuginfotests">Debugging Information tests</a></li>
    </ul>
   </li>
@@ -37,13 +36,10 @@
       <li><a href="#rtfeatures">Other features</a></li>
    </ul>
   </li>
-  <li><a href="#testsuitestructure">Test suite structure</a></li>
-  <li><a href="#testsuiterun">Running the test suite</a>
+  <li><a href="#testsuiteoverview"><tt>test-suite</tt> Overview</a>
     <ul>
-      <li><a href="#testsuiteexternal">Configuring External Tests</a></li>
-      <li><a href="#testsuitetests">Running different tests</a></li>
-      <li><a href="#testsuiteoutput">Generating test output</a></li>
-      <li><a href="#testsuitecustom">Writing custom tests for test-suite</a></li>
+      <li><a href="#testsuitequickstart"><tt>test-suite</tt> Quickstart</a></li>
+      <li><a href="#testsuitemakefiles"><tt>test-suite</tt> Makefiles</a></li>
    </ul>
   </li>
 </ol>
@@ -85,10 +81,13 @@ as <a href="http://python.org">Python</a> 2.4 or later.</p>
 <p>The LLVM testing infrastructure contains two major categories of tests:
 regression tests and whole programs. The regression tests are contained inside
 the LLVM repository itself under <tt>llvm/test</tt> and are expected to always
-pass -- they should be run before every commit. The whole programs tests are
-referred to as the "LLVM test suite" and are in the <tt>test-suite</tt> module
-in subversion.
-</p>
+pass -- they should be run before every commit.</p>
+
+<p>The whole programs tests are referred to as the "LLVM test suite" (or
+"test-suite") and are in the <tt>test-suite</tt> module in subversion. For
+historical reasons, these tests are also referred to as the "nightly tests" in
+places, which is less ambiguous than "test-suite" and remains in use although we
+run them much more often than nightly.</p>
 
 <!-- _______________________________________________________________________ -->
 <h3><a name="regressiontests">Regression tests</a></h3>
@@ -118,20 +117,19 @@ application or benchmark.</p>
 </div>
 
 <!-- _______________________________________________________________________ -->
-<h3><a name="testsuite">Test suite</a></h3>
+<h3><a name="testsuite"><tt>test-suite</tt></a></h3>
 <!-- _______________________________________________________________________ -->
 
 <div>
 
-<p>The test suite contains whole programs, which are pieces of
-code which can be compiled and linked into a stand-alone program that can be
-executed.  These programs are generally written in high level languages such as
-C or C++, but sometimes they are written straight in LLVM assembly.</p>
+<p>The test suite contains whole programs, which are pieces of code which can be
+compiled and linked into a stand-alone program that can be executed.  These
+programs are generally written in high level languages such as C or C++.</p>
 
-<p>These programs are compiled and then executed using several different
-methods (native compiler, LLVM C backend, LLVM JIT, LLVM native code generation,
-etc).  The output of these programs is compared to ensure that LLVM is compiling
-the program correctly.</p>
+<p>These programs are compiled using a user specified compiler and set of flags,
+and then executed to capture the program output and timing information.  The
+output of these programs is compared to a reference output to ensure that the
+program is being compiled correctly.</p>
 
 <p>In addition to compiling and executing programs, whole program tests serve as
 a way of benchmarking LLVM performance, both in terms of the efficiency of the
@@ -168,15 +166,14 @@ test suite for more information . This test suite is located in the
 
   <p>The tests are located in two separate Subversion modules. The regressions
   tests are in the main "llvm" module under the directory
-  <tt>llvm/test</tt> (so you get these tests for free with the main llvm tree).
-  The more comprehensive test suite that includes whole 
-programs in C and C++ is in the <tt>test-suite</tt> module. This module should
-be checked out to the <tt>llvm/projects</tt> directory (don't use another name
-than the default "test-suite", for then the test suite will be run every time
-you run <tt>make</tt> in the main <tt>llvm</tt> directory).
-When you <tt>configure</tt> the <tt>llvm</tt> module, 
-the <tt>test-suite</tt> directory will be automatically configured. 
-Alternatively, you can configure the <tt>test-suite</tt> module manually.</p>
+  <tt>llvm/test</tt> (so you get these tests for free with the main llvm
+  tree). Use "make check-all" to run the regression tests after building
+  LLVM.</p>
+
+  <p>The more comprehensive test suite that includes whole programs in C and C++
+  is in the <tt>test-suite</tt>
+  module. See <a href="#testsuitequickstart"><tt>test-suite</tt> Quickstart</a>
+  for more information on running these tests.</p>
 
 <!-- _______________________________________________________________________ -->
 <h3><a name="quickregressiontests">Regression tests</a></h3>
@@ -243,60 +240,6 @@ script which is built as part of LLVM. For example, to run the
 </div>
 
 <!-- _______________________________________________________________________ -->
-<h3><a name="quicktestsuite">Test suite</a></h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-
-<p>To run the comprehensive test suite (tests that compile and execute whole 
-programs), first checkout and setup the <tt>test-suite</tt> module:</p>
-
-<div class="doc_code">
-<pre>
-% cd llvm/projects
-% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
-% cd ..
-% ./configure --with-llvmgccdir=$LLVM_GCC_DIR
-</pre>
-</div>
-
-<p>where <tt>$LLVM_GCC_DIR</tt> is the directory where
-you <em>installed</em> llvm-gcc, not its src or obj
-dir. The <tt>--with-llvmgccdir</tt> option assumes that
-the <tt>llvm-gcc-4.2</tt> module was configured with
-<tt>--program-prefix=llvm-</tt>, and therefore that the C and C++
-compiler drivers are called <tt>llvm-gcc</tt> and <tt>llvm-g++</tt>
-respectively.  If this is not the case,
-use <tt>--with-llvmgcc</tt>/<tt>--with-llvmgxx</tt> to specify each
-executable's location.</p>
-
-<p>Then, run the entire test suite by running make in the <tt>test-suite</tt>
-directory:</p>
-
-<div class="doc_code">
-<pre>
-% cd projects/test-suite
-% gmake
-</pre>
-</div>
-
-<p>Usually, running the "nightly" set of tests is a good idea, and you can also
-let it generate a report by running:</p>
-
-<div class="doc_code">
-<pre>
-% cd projects/test-suite
-% gmake TEST=nightly report report.html
-</pre>
-</div>
-
-<p>Any of the above commands can also be run in a subdirectory of
-<tt>projects/test-suite</tt> to run the specified test only on the programs in
-that subdirectory.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
 <h3><a name="quickdebuginfotests">Debugging Information tests</a></h3>
 <div>
 <!-- _______________________________________________________________________ -->
@@ -799,37 +742,10 @@ define two separate CHECK lines that match on the same line.
     you need multiple temporaries. This is useful as the destination of some
     redirected output.</dd>
 
-    <dt><b>llvmlibsdir</b> (%llvmlibsdir)</dt>
-    <dd>The directory where the LLVM libraries are located.</dd>
-
     <dt><b>target_triplet</b> (%target_triplet)</dt>
     <dd>The target triplet that corresponds to the current host machine (the one
     running the test cases). This should probably be called "host".<dd>
 
-    <dt><b>llvmgcc</b> (%llvmgcc)</dt>
-    <dd>The full path to the <tt>llvm-gcc</tt> executable as specified in the
-    configured LLVM environment</dd>
-
-    <dt><b>llvmgxx</b> (%llvmgxx)</dt>
-    <dd>The full path to the <tt>llvm-gxx</tt> executable as specified in the
-    configured LLVM environment</dd>
-
-    <dt><b>gccpath</b></dt>
-    <dd>The full path to the C compiler used to <i>build </i> LLVM. Note that 
-    this might not be gcc.</dd>
-
-    <dt><b>gxxpath</b></dt>
-    <dd>The full path to the C++ compiler used to <i>build </i> LLVM. Note that 
-    this might not be g++.</dd>
-
-    <dt><b>compile_c</b> (%compile_c)</dt>
-    <dd>The full command line used to compile LLVM C source  code. This has all 
-    the configured -I, -D and optimization options.</dd>
-
-    <dt><b>compile_cxx</b> (%compile_cxx)</dt>
-    <dd>The full command used to compile LLVM C++ source  code. This has 
-    all the configured -I, -D and optimization options.</dd>
-
     <dt><b>link</b> (%link)</dt> 
     <dd>This full link command used to link LLVM executables. This has all the
     configured -I, -L and -l options.</dd>
@@ -907,30 +823,15 @@ define two separate CHECK lines that match on the same line.
 </div>
 
 <!--=========================================================================-->
-<h2><a name="testsuitestructure">Test suite Structure</a></h2>
+<h2><a name="testsuiteoverview"><tt>test-suite</tt> Overview</a></h2>
 <!--=========================================================================-->
 
 <div>
 
-<p>The <tt>test-suite</tt> module contains a number of programs that can be compiled 
-with LLVM and executed. These programs are compiled using the native compiler
-and various LLVM backends. The output from the program compiled with the 
-native compiler is assumed correct; the results from the other programs are
-compared to the native program output and pass if they match.</p>
-
-<p>When executing tests, it is usually a good idea to start out with a subset of
-the available tests or programs. This makes test run times smaller at first and
-later on this is useful to investigate individual test failures. To run some
-test only on a subset of programs, simply change directory to the programs you
-want tested and run <tt>gmake</tt> there. Alternatively, you can run a different
-test using the <tt>TEST</tt> variable to change what tests or run on the
-selected programs (see below for more info).</p>
-
-<p>In addition for testing correctness, the <tt>test-suite</tt> directory also
-performs timing tests of various LLVM optimizations.  It also records
-compilation times for the compilers and the JIT.  This information can be
-used to compare the effectiveness of LLVM's optimizations and code
-generation.</p>
+<p>The <tt>test-suite</tt> module contains a number of programs that can be
+compiled and executed. The <tt>test-suite</tt> includes reference outputs for
+all of the programs, so that the output of the executed program can be checked
+for correctness.</p>
 
 <p><tt>test-suite</tt> tests are divided into three types of tests: MultiSource,
 SingleSource, and External.</p> 
@@ -952,248 +853,40 @@ go here.</p></li>
 to (i.e., not distributed with) LLVM.  The most prominent members of this
 directory are the SPEC 95 and SPEC 2000 benchmark suites. The <tt>External</tt>
 directory does not contain these actual tests, but only the Makefiles that know
-how to properly compile these programs from somewhere else. The presence and
-location of these external programs is configured by the test-suite
-<tt>configure</tt> script.</p></li>
+how to properly compile these programs from somewhere else. When
+using <tt>LNT</tt>, use the <tt>--test-externals</tt> option to include these
+tests in the results.</p></li>
 </ul>
-
-<p>Each tree is then subdivided into several categories, including applications,
-benchmarks, regression tests, code that is strange grammatically, etc.  These
-organizations should be relatively self explanatory.</p>
-
-<p>Some tests are known to fail.  Some are bugs that we have not fixed yet;
-others are features that we haven't added yet (or may never add).  In the
-regression tests, the result for such tests will be XFAIL (eXpected FAILure).
-In this way, you can tell the difference between an expected and unexpected
-failure.</p>
-
-<p>The tests in the test suite have no such feature at this time. If the
-test passes, only warnings and other miscellaneous output will be generated.  If
-a test fails, a large &lt;program&gt; FAILED message will be displayed.  This
-will help you separate benign warnings from actual test failures.</p>
-
 </div>
 
 <!--=========================================================================-->
-<h2><a name="testsuiterun">Running the test suite</a></h2>
+<h2><a name="testsuitequickstart"><tt>test-suite</tt> Quickstart</a></h2>
 <!--=========================================================================-->
 
 <div>
+<p>The modern way of running the <tt>test-suite</tt> is focused on testing and
+benchmarking complete compilers using
+the <a href="http://llvm.org/docs/lnt">LNT</a> testing infrastructure.</p>
 
-<p>First, all tests are executed within the LLVM object directory tree.  They
-<i>are not</i> executed inside of the LLVM source tree. This is because the
-test suite creates temporary files during execution.</p>
-
-<p>To run the test suite, you need to use the following steps:</p>
-
-<ol>
-  <li><tt>cd</tt> into the <tt>llvm/projects</tt> directory in your source tree.
-  </li>
-
-  <li><p>Check out the <tt>test-suite</tt> module with:</p>
-
-<div class="doc_code">
-<pre>
-% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
-</pre>
-</div>
-    <p>This will get the test suite into <tt>llvm/projects/test-suite</tt>.</p>
-  </li>
-  <li><p>Configure and build <tt>llvm</tt>.</p></li>
-  <li><p>Configure and build <tt>llvm-gcc</tt>.</p></li>
-  <li><p>Install <tt>llvm-gcc</tt> somewhere.</p></li>
-  <li><p><em>Re-configure</em> <tt>llvm</tt> from the top level of
-      each build tree (LLVM object directory tree) in which you want
-      to run the test suite, just as you do before building LLVM.</p>
-    <p>During the <em>re-configuration</em>, you must either: (1)
-      have <tt>llvm-gcc</tt> you just built in your path, or (2)
-      specify the directory where your just-built <tt>llvm-gcc</tt> is
-      installed using <tt>--with-llvmgccdir=$LLVM_GCC_DIR</tt>.</p>
-    <p>You must also tell the configure machinery that the test suite
-      is available so it can be configured for your build tree:</p>
-<div class="doc_code">
-<pre>
-% cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
-</pre>
-</div>
-    <p>[Remember that <tt>$LLVM_GCC_DIR</tt> is the directory where you
-    <em>installed</em> llvm-gcc, not its src or obj directory.]</p>
-  </li>
-
-  <li><p>You can now run the test suite from your build tree as follows:</p>
-<div class="doc_code">
-<pre>
-% cd $LLVM_OBJ_ROOT/projects/test-suite
-% make
-</pre>
-</div>
-  </li>
-</ol>
-<p>Note that the second and third steps only need to be done once. After you
-have the suite checked out and configured, you don't need to do it again (unless
-the test code or configure script changes).</p>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuiteexternal">Configuring External Tests</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-
-<div>
-<p>In order to run the External tests in the <tt>test-suite</tt>
-  module, you must specify <i>--with-externals</i>.  This
-  must be done during the <em>re-configuration</em> step (see above),
-  and the <tt>llvm</tt> re-configuration must recognize the
-  previously-built <tt>llvm-gcc</tt>.  If any of these is missing or
-  neglected, the External tests won't work.</p>
-<dl>
-<dt><i>--with-externals</i></dt>
-<dt><i>--with-externals=&lt;<tt>directory</tt>&gt;</i></dt>
-</dl>
-  This tells LLVM where to find any external tests.  They are expected to be
-  in specifically named subdirectories of &lt;<tt>directory</tt>&gt;.
-  If <tt>directory</tt> is left unspecified,
-  <tt>configure</tt> uses the default value
-  <tt>/home/vadve/shared/benchmarks/speccpu2000/benchspec</tt>.
-  Subdirectory names known to LLVM include:
-  <dl>
-  <dt>spec95</dt>
-  <dt>speccpu2000</dt>
-  <dt>speccpu2006</dt>
-  <dt>povray31</dt>
-  </dl>
-  Others are added from time to time, and can be determined from 
-  <tt>configure</tt>.
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuitetests">Running different tests</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-<div>
-<p>In addition to the regular "whole program" tests, the <tt>test-suite</tt>
-module also provides a mechanism for compiling the programs in different ways.
-If the variable TEST is defined on the <tt>gmake</tt> command line, the test system will
-include a Makefile named <tt>TEST.&lt;value of TEST variable&gt;.Makefile</tt>.
-This Makefile can modify build rules to yield different results.</p>
-
-<p>For example, the LLVM nightly tester uses <tt>TEST.nightly.Makefile</tt> to
-create the nightly test reports.  To run the nightly tests, run <tt>gmake
-TEST=nightly</tt>.</p>
-
-<p>There are several TEST Makefiles available in the tree.  Some of them are
-designed for internal LLVM research and will not work outside of the LLVM
-research group.  They may still be valuable, however, as a guide to writing your
-own TEST Makefile for any optimization or analysis passes that you develop with
-LLVM.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuiteoutput">Generating test output</a>
-</h3>
-<!-- _______________________________________________________________________ -->
-<div>
-  <p>There are a number of ways to run the tests and generate output. The most
-  simple one is simply running <tt>gmake</tt> with no arguments. This will
-  compile and run all programs in the tree using a number of different methods
-  and compare results. Any failures are reported in the output, but are likely
-  drowned in the other output. Passes are not reported explicitely.</p>
-
-  <p>Somewhat better is running <tt>gmake TEST=sometest test</tt>, which runs
-  the specified test and usually adds per-program summaries to the output
-  (depending on which sometest you use). For example, the <tt>nightly</tt> test
-  explicitely outputs TEST-PASS or TEST-FAIL for every test after each program.
-  Though these lines are still drowned in the output, it's easy to grep the
-  output logs in the Output directories.</p>
-
-  <p>Even better are the <tt>report</tt> and <tt>report.format</tt> targets
-  (where <tt>format</tt> is one of <tt>html</tt>, <tt>csv</tt>, <tt>text</tt> or
-  <tt>graphs</tt>). The exact contents of the report are dependent on which
-  <tt>TEST</tt> you are running, but the text results are always shown at the
-  end of the run and the results are always stored in the
-  <tt>report.&lt;type&gt;.format</tt> file (when running with
-  <tt>TEST=&lt;type&gt;</tt>).
-
-  The <tt>report</tt> also generate a file called
-  <tt>report.&lt;type&gt;.raw.out</tt> containing the output of the entire test
-  run.
+<p>For more information on using LNT to execute the <tt>test-suite</tt>, please
+see the <a href="http://llvm.org/docs/lnt/quickstart.html">LNT Quickstart</a>
+documentation.</p>
 </div>
 
-<!-- _______________________________________________________________________ -->
-<h3>
-  <a name="testsuitecustom">Writing custom tests for the test suite</a>
-</h3>
-<!-- _______________________________________________________________________ -->
+<!--=========================================================================-->
+<h2><a name="testsuitemakefiles"><tt>test-suite</tt> Makefiles</a></h2>
+<!--=========================================================================-->
 
 <div>
+<p>Historically, the <tt>test-suite</tt> was executed using a complicated setup
+of Makefiles. The LNT based approach above is recommended for most users, but
+there are some testing scenarios which are not supported by the LNT approach. In
+addition, LNT currently uses the Makefile setup under the covers and so
+developers who are interested in how LNT works under the hood may want to
+understand the Makefile based setup.</p>
 
-<p>Assuming you can run the test suite, (e.g. "<tt>gmake TEST=nightly report</tt>"
-should work), it is really easy to run optimizations or code generator
-components against every program in the tree, collecting statistics or running
-custom checks for correctness.  At base, this is how the nightly tester works,
-it's just one example of a general framework.</p>
-
-<p>Lets say that you have an LLVM optimization pass, and you want to see how
-many times it triggers.  First thing you should do is add an LLVM
-<a href="ProgrammersManual.html#Statistic">statistic</a> to your pass, which
-will tally counts of things you care about.</p>
-
-<p>Following this, you can set up a test and a report that collects these and
-formats them for easy viewing.  This consists of two files, a
-"<tt>test-suite/TEST.XXX.Makefile</tt>" fragment (where XXX is the name of your
-test) and a "<tt>test-suite/TEST.XXX.report</tt>" file that indicates how to
-format the output into a table.  There are many example reports of various
-levels of sophistication included with the test suite, and the framework is very
-general.</p>
-
-<p>If you are interested in testing an optimization pass, check out the
-"libcalls" test as an example.  It can be run like this:<p>
-
-<div class="doc_code">
-<pre>
-% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
-% make TEST=libcalls report
-</pre>
-</div>
-
-<p>This will do a bunch of stuff, then eventually print a table like this:</p>
-
-<div class="doc_code">
-<pre>
-Name                                  | total | #exit |
-...
-FreeBench/analyzer/analyzer           | 51    | 6     | 
-FreeBench/fourinarow/fourinarow       | 1     | 1     | 
-FreeBench/neural/neural               | 19    | 9     | 
-FreeBench/pifft/pifft                 | 5     | 3     | 
-MallocBench/cfrac/cfrac               | 1     | *     | 
-MallocBench/espresso/espresso         | 52    | 12    | 
-MallocBench/gs/gs                     | 4     | *     | 
-Prolangs-C/TimberWolfMC/timberwolfmc  | 302   | *     | 
-Prolangs-C/agrep/agrep                | 33    | 12    | 
-Prolangs-C/allroots/allroots          | *     | *     | 
-Prolangs-C/assembler/assembler        | 47    | *     | 
-Prolangs-C/bison/mybison              | 74    | *     | 
-...
-</pre>
-</div>
-
-<p>This basically is grepping the -stats output and displaying it in a table.
-You can also use the "TEST=libcalls report.html" target to get the table in HTML
-form, similarly for report.csv and report.tex.</p>
-
-<p>The source for this is in test-suite/TEST.libcalls.*.  The format is pretty
-simple: the Makefile indicates how to run the test (in this case, 
-"<tt>opt -simplify-libcalls -stats</tt>"), and the report contains one line for
-each column of the output.  The first value is the header for the column and the
-second is the regex to grep the output of the command for.  There are lots of
-example reports that can do fancy stuff.</p>
-
-</div>
-
+<p>For more information on the <tt>test-suite</tt> Makefile setup, please see
+the <a href="TestSuiteMakefileGuide.html">Test Suite Makefile Guide.</a></p>
 </div>
 
 <!-- *********************************************************************** -->
@@ -1207,7 +900,7 @@ example reports that can do fancy stuff.</p>
 
   John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2012-03-27 13:25:16 +0200 (Tue, 27 Mar 2012) $
 </address>
 </body>
 </html>
diff --git a/docs/UsingLibraries.html b/docs/UsingLibraries.html
deleted file mode 100644
index 6c1dd18aac92..000000000000
--- a/docs/UsingLibraries.html
+++ /dev/null
@@ -1,448 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>Using The LLVM Libraries</title>
-  <link rel="stylesheet" href="llvm.css" type="text/css">
-</head>
-<body>
-<h1>Using The LLVM Libraries</h1>
-<ol>
-  <li><a href="#abstract">Abstract</a></li>
-  <li><a href="#introduction">Introduction</a></li>
-  <li><a href="#descriptions">Library Descriptions</a></li>
-  <li><a href="#dependencies">Library Dependencies</a></li>
-  <li><a href="#rot">Linkage Rules Of Thumb</a>
-	  <ol>
-      <li><a href="#always">Always link LLVMCore, LLVMSupport, LLVMSystem</a>
-			<li><a href="#onlyone">Never link both archive and re-linked</a>
-		</ol>
-	</li>
-</ol>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
-</div>
-
-<p class="doc_warning">Warning: This document is out of date, for more
-  information please
-  see <a href="CommandGuide/html/llvm-config.html">llvm-config</a> or,
-  if you use CMake, <a href="CMake.html#embedding">the CMake LLVM
-  guide</a>.</p>
-
-<!-- ======================================================================= -->
-<h2><a name="abstract">Abstract</a></h2>
-<div>
-  <p>Amongst other things, LLVM is a toolkit for building compilers, linkers,
-  runtime executives, virtual machines, and other program execution related
-  tools. In addition to the LLVM tool set, the functionality of LLVM is
-  available through a set of libraries.  To use LLVM as a toolkit for
-  constructing tools, a developer needs to understand what is contained in the
-  various libraries, what they depend on, and how to use them.  Fortunately,
-  there is a tool, <tt>llvm-config</tt> to aid with this. This document 
-  describes the contents of the libraries and how to use <tt>llvm-config</tt>
-  to generate command line options.
-</p>
-</div>
-
-<!-- ======================================================================= -->
-<h2><a name="introduction">Introduction</a></h2>
-<div>
-  <p>If you're writing a compiler, virtual machine, or any other utility based 
-  on LLVM, you'll need to figure out which of the many libraries files you will 
-  need to link with to be successful. An understanding of the contents of these 
-  libraries will be useful in coming up with an optimal specification for the 
-  libraries to link with. The purpose of this document is to reduce some of 
-  the trial and error that the author experienced in using LLVM.</p>
-  <p>LLVM produces two types of libraries: archives (ending in <tt>.a</tt>) and
-  objects (ending in <tt>.o</tt>). However, both are libraries. Libraries ending
-  in <tt>.o</tt> are known as re-linked libraries because they contain all the
-  compilation units of the library linked together as a single <tt>.o</tt> file.
-  Furthermore, several of the libraries have <em>both</em> forms of library. The
-  re-linked libraries are used whenever you want to include all symbols from the
-  library. The archive libraries are used whenever you want to only resolve
-  outstanding symbols at that point in the link without including everything in
-  the library. </p>
-  <p>If you're using the LLVM Makefile system to link your tools,you will use 
-  the <tt>LLVMLIBS</tt> make variable. 
-  (see the <a href="MakefileGuide.html#LLVMLIBS">Makefile Guide</a> for 
-  details). This variable specifies which LLVM libraries to link into your tool 
-  and the order in which they will be linked. You specify re-linked libraries by
-  naming the library without a suffix. You specify archive libraries by naming
-  the library with a <tt>.a</tt> suffix but without the <tt>lib</tt> prefix. The
-  order in which the libraries appear in the <tt>LLVMLIBS</tt> variable
-  definition is the order in which they will be linked. Getting this order
-  correct for your tool can sometimes be challenging.
-</div>
-<!-- ======================================================================= -->
-<h2><a name="descriptions">Library Descriptions</a></h2>
-<div>
-  <p>The table below categorizes each library
-<table style="text-align:left">
-  <tr><th>Library</th><th>Forms</th><th>Description</th></tr>
-  <tr><th colspan="3">Core Libraries</th></tr>
-  <tr><td>LLVMArchive</td><td><tt>.a</tt></td>
-    <td>LLVM archive reading and writing</td></tr>
-  <tr><td>LLVMAsmParser</td><td><tt>.a</tt></td>
-    <td>LLVM assembly parsing</td></tr>
-  <tr><td>LLVMBCReader</td><td><tt>.a</tt></td>
-    <td>LLVM bitcode reading</td></tr>
-  <tr><td>LLVMBCWriter</td><td><tt>.a</tt></td>
-    <td>LLVM bitcode writing</td></tr>
-  <tr><td>LLVMCore</td><td><tt>.a</tt></td>
-    <td>LLVM core intermediate representation</td></tr>
-  <tr><td>LLVMDebugger</td><td><tt>.a</tt></td>
-    <td>Source level debugging support</td></tr>
-  <tr><td>LLVMLinker</td><td><tt>.a</tt></td>
-    <td>Bitcode and archive linking interface</td></tr>
-  <tr><td>LLVMSupport</td><td><tt>.a</tt></td>
-    <td>General support utilities</td></tr>
-  <tr><td>LLVMSystem</td><td><tt>.a</tt></td>
-    <td>Operating system abstraction layer</td></tr>
-  <tr><td>LLVMbzip2</td><td><tt>.a</tt></td>
-    <td>BZip2 compression library</td></tr>
-
-  <tr><th colspan="3">Analysis Libraries</th></tr>
-  <tr><td>LLVMAnalysis</td><td><tt>.a</tt></td>
-    <td>Various analysis passes.</td></tr>
-  <tr><td>LLVMDataStructure</td><td><tt>.o</tt></td>
-    <td>Data structure analysis passes.</td></tr>
-  <tr><td>LLVMipa</td><td><tt>.a</tt></td>
-    <td>Inter-procedural analysis passes.</td></tr>
-
-  <tr><th colspan="3">Transformation Libraries</th></tr>
-  <tr><td>LLVMInstrumentation</td><td><tt>.a</tt></td>
-    <td>Instrumentation passes.</td></tr>
-  <tr><td>LLVMipo</td><td><tt>.a</tt></td>
-    <td>All inter-procedural optimization passes.</td></tr>
-  <tr><td>LLVMScalarOpts</td><td><tt>.a</tt></td>
-    <td>All scalar optimization passes.</td></tr>
-  <tr><td>LLVMTransformUtils</td><td><tt>.a</tt></td>
-    <td>Transformation utilities used by many passes.</td></tr>
-
-  <tr><th colspan="3">Code Generation Libraries </th></tr>
-  <tr><td>LLVMCodeGen</td><td><tt>.o</tt></td>
-    <td>Native code generation infrastructure</td></tr>
-  <tr><td>LLVMSelectionDAG</td><td><tt>.o</tt></td>
-    <td>Aggressive instruction selector for directed acyclic graphs</td></tr>
-
-  <tr><th colspan="3">Target Libraries</th></tr>
-  <tr><td>LLVMAlpha</td><td><tt>.o</tt></td>
-    <td>Code generation for Alpha architecture</td></tr>
-  <tr><td>LLVMARM</td><td><tt>.o</tt></td>
-    <td>Code generation for ARM architecture</td></tr>
-  <tr><td>LLVMCBackend</td><td><tt>.o</tt></td>
-    <td>'C' language code generator.</td></tr>
-  <tr><td>LLVMPowerPC</td><td><tt>.o</tt></td>
-    <td>Code generation for PowerPC architecture</td></tr>
-  <tr><td>LLVMSparc</td><td><tt>.o</tt></td>
-    <td>Code generation for Sparc architecture</td></tr>
-  <tr><td>LLVMTarget</td><td><tt>.a</tt></td>
-    <td>Generic code generation utilities.</td></tr>
-  <tr><td>LLVMX86</td><td><tt>.o</tt></td>
-    <td>Code generation for Intel x86 architecture</td></tr>
-
-  <tr><th colspan="3">Runtime Libraries</th></tr>
-  <tr><td>LLVMInterpreter</td><td><tt>.o</tt></td>
-    <td>Bitcode Interpreter</td></tr>
-  <tr><td>LLVMJIT</td><td><tt>.o</tt></td>
-    <td>Bitcode JIT Compiler</td></tr>
-  <tr><td>LLVMExecutionEngine</td><td><tt>.o</tt></td>
-    <td>Virtual machine engine</td></tr>
-</table>
-</div>
-
-<!-- ======================================================================= -->
-<h2><a name="dependencies">Using llvm-config</a></h2>
-<div>
-  <p>The <tt>llvm-config</tt> tool is a perl script that produces on its output
-  various kinds of information. For example, the source or object directories 
-  used to build LLVM can be accessed by passing options to <tt>llvm-config</tt>.
-  For complete details on this tool, please see the
-  <a href="CommandGuide/html/llvm-config.html">manual page</a>.</p>
-  <p>To understand the relationships between libraries, the <tt>llvm-config</tt>
-  can be very useful. If all you know is that you want certain libraries to
-  be available, you can generate the complete set of libraries to link with
-  using one of four options, as below:</p>
-  <ol>
-    <li><tt>--ldflags</tt>. This generates the command line options necessary to
-    be passed to the <tt>ld</tt> tool in order to link with LLVM. Most notably,
-    the <tt>-L</tt> option is provided to specify a library search directory 
-    that contains the LLVM libraries.</li>
-    <li><tt>--libs</tt>. This generates command line options suitable for
-    use with a gcc-style linker. That is, libraries are given with a -l option
-    and object files are given with a full path.</li>
-    <li><tt>--libnames</tt>. This generates a list of just the library file
-    names. If you know the directory in which these files reside (see --ldflags)
-    then you can find the libraries there.</li>
-    <li><tt>--libfiles</tt>. This generates the full path names of the
-    LLVM library files.</li>
-  </ol>
-  <p>If you wish to delve further into how <tt>llvm-config</tt> generates the
-  correct order (based on library dependencies), please see the tool named
-  <tt>GenLibDeps.pl</tt> in the <tt>utils</tt> source directory of LLVM.</p>
-
-  <!-- =======NOTE: =========================================================-->
-  <!-- === The following graphs and <dl> list are generated automatically ===-->
-  <!-- === by the util named GenLibDeps.pl in the llvm/utils directory.   ===-->
-  <!-- === This should be updated whenever new libraries are added,       ===-->
-  <!-- === removed, or changed                                            ===-->
-  <!-- =======NOTE: =========================================================-->
-  <h3>Dependency Relationships Of Libraries</h3>
-  <p>This graph shows the dependency of archive libraries on other archive 
-  libraries or objects. Where a library has both archive and object forms, only
-  the archive form is shown.</p>
-  <img src="img/libdeps.gif" alt="Library Dependencies">
-  <h3>Dependency Relationships Of Object Files</h3>
-  <p>This graph shows the dependency of object files on archive libraries or 
-  other objects. Where a library has both object and archive forms, only the 
-  dependency to the archive form is shown.</p> 
-  <img src="img/objdeps.gif" alt="Object File Dependencies">
-  <p>The following list shows the dependency relationships between libraries in
-  textual form. The information is the same as shown on the graphs but arranged
-  alphabetically.</p>
-<dl>
-  <dt><b>libLLVMAnalysis.a</b></dt><dd><ul>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-  </ul></dd>
-  <dt><b>libLLVMArchive.a</b></dt><dd><ul>
-    <li>libLLVMBCReader.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-  </ul></dd>
-  <dt><b>libLLVMAsmParser.a</b></dt><dd><ul>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSystem.a</li>
-  </ul></dd>
-  <dt><b>libLLVMBCReader.a</b></dt><dd><ul>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-  </ul></dd>
-  <dt><b>libLLVMBCWriter.a</b></dt><dd><ul>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-  </ul></dd>
-  <dt><b>libLLVMCodeGen.a</b></dt><dd><ul>
-    <li>libLLVMAnalysis.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMScalarOpts.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-    <li>libLLVMTransformUtils.a</li>
-  </ul></dd>
-  <dt><b>libLLVMCore.a</b></dt><dd><ul>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-  </ul></dd>
-  <dt><b>libLLVMDebugger.a</b></dt><dd><ul>
-    <li>libLLVMBCReader.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-  </ul></dd>
-  <dt><b>libLLVMInstrumentation.a</b></dt><dd><ul>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMScalarOpts.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMTransformUtils.a</li>
-  </ul></dd>
-  <dt><b>libLLVMLinker.a</b></dt><dd><ul>
-    <li>libLLVMArchive.a</li>
-    <li>libLLVMBCReader.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-  </ul></dd>
-  <dt><b>libLLVMScalarOpts.a</b></dt><dd><ul>
-    <li>libLLVMAnalysis.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-    <li>libLLVMTransformUtils.a</li>
-  </ul></dd>
-  <dt><b>libLLVMSelectionDAG.a</b></dt><dd><ul>
-    <li>libLLVMAnalysis.a</li>
-    <li>libLLVMCodeGen.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-    <li>libLLVMTransformUtils.a</li>
-  </ul></dd>
-  <dt><b>libLLVMSupport.a</b></dt><dd><ul>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMbzip2.a</li>
-  </ul></dd>
-  <dt><b>libLLVMSystem.a</b></dt><dd>
-  </dd>
-  <dt><b>libLLVMTarget.a</b></dt><dd><ul>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-  </ul></dd>
-  <dt><b>libLLVMTransformUtils.a</b></dt><dd><ul>
-    <li>libLLVMAnalysis.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-    <li>libLLVMipa.a</li>
-  </ul></dd>
-  <dt><b>libLLVMbzip2.a</b></dt><dd>
-  </dd>
-  <dt><b>libLLVMipa.a</b></dt><dd><ul>
-    <li>libLLVMAnalysis.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-  </ul></dd>
-  <dt><b>libLLVMipo.a</b></dt><dd><ul>
-    <li>libLLVMAnalysis.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-    <li>libLLVMTransformUtils.a</li>
-    <li>libLLVMipa.a</li>
-  </ul></dd>
-  <dt><b>libLLVMlto.a</b></dt><dd><ul>
-    <li>libLLVMAnalysis.a</li>
-    <li>libLLVMBCReader.a</li>
-    <li>libLLVMBCWriter.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMLinker.a</li>
-    <li>libLLVMScalarOpts.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-    <li>libLLVMipa.a</li>
-    <li>libLLVMipo.a</li>
-  </ul></dd>
-  <dt><b>LLVMARM.o</b></dt><dd><ul>
-    <li>libLLVMCodeGen.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSelectionDAG.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-  </ul></dd>
-  <dt><b>LLVMAlpha.o</b></dt><dd><ul>
-    <li>libLLVMCodeGen.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSelectionDAG.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-  </ul></dd>
-  <dt><b>LLVMCBackend.o</b></dt><dd><ul>
-    <li>libLLVMAnalysis.a</li>
-    <li>libLLVMCodeGen.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMScalarOpts.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-    <li>libLLVMTransformUtils.a</li>
-    <li>libLLVMipa.a</li>
-  </ul></dd>
-  <dt><b>LLVMExecutionEngine.o</b></dt><dd><ul>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-  </ul></dd>
-  <dt><b>LLVMInterpreter.o</b></dt><dd><ul>
-    <li>LLVMExecutionEngine.o</li>
-    <li>libLLVMCodeGen.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-  </ul></dd>
-  <dt><b>LLVMJIT.o</b></dt><dd><ul>
-    <li>LLVMExecutionEngine.o</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-  </ul></dd>
-  <dt><b>LLVMPowerPC.o</b></dt><dd><ul>
-    <li>libLLVMCodeGen.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSelectionDAG.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-  </ul></dd>
-  <dt><b>LLVMSparc.o</b></dt><dd><ul>
-    <li>libLLVMCodeGen.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSelectionDAG.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-  </ul></dd>
-  <dt><b>LLVMX86.o</b></dt><dd><ul>
-    <li>libLLVMCodeGen.a</li>
-    <li>libLLVMCore.a</li>
-    <li>libLLVMSelectionDAG.a</li>
-    <li>libLLVMSupport.a</li>
-    <li>libLLVMSystem.a</li>
-    <li>libLLVMTarget.a</li>
-  </ul></dd>
-</dl>
-</div>
-
-<!-- ======================================================================= -->
-<h2><a name="rot">Linkage Rules Of Thumb</a></h2>
-<div>
-	<p>This section contains various "rules of thumb" about what files you
-	should link into your programs.</p>
-<!-- ======================================================================= -->
-<h3>
-  <a name="always">Always Link LLVMCore, LLVMSupport, and LLVMSystem</a>
-</h3>
-<div>
-  <p>No matter what you do with LLVM, the last three entries in the value of 
-  your LLVMLIBS make variable should always be: 
-  <tt>LLVMCore LLVMSupport.a LLVMSystem.a</tt>. There are no <tt>LLVM</tt> 
-  programs that don't depend on these three.</p>
-</div>
-<!-- ======================================================================= -->
-<h3>
-  <a name="onlyone">Never link both archive and re-linked library</a>
-</h3>
-<div>
-  <p>There is never any point to linking both the re-linked (<tt>.o</tt>) and
-  the archive (<tt>.a</tt>) versions of a library. Since the re-linked version
-  includes the entire library, the archive version will not resolve any symbols.
-  You could even end up with link error if you place the archive version before
-  the re-linked version on the linker's command line.</p>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<hr>
-<div class="doc_footer">
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-    src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-    src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>
-</address>
-<a href="http://llvm.org/">The LLVM Compiler Infrastructure</a> 
-<br>Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $ </div>
-</body>
-</html>
-<!-- vim: sw=2 ts=2 ai
--->
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
index dd6f1ec8d6d1..85548ea999e3 100644
--- a/docs/WritingAnLLVMBackend.html
+++ b/docs/WritingAnLLVMBackend.html
@@ -77,7 +77,7 @@ either assembly code or binary code (usable for a JIT compiler).
 
 <p>
 The backend of LLVM features a target-independent code generator that may create
-output for several types of target CPUs &mdash; including X86, PowerPC, Alpha,
+output for several types of target CPUs &mdash; including X86, PowerPC, ARM,
 and SPARC. The backend may also be used to generate code targeted at SPUs of the
 Cell processor or GPUs to support the execution of compute kernels.
 </p>
@@ -2526,7 +2526,7 @@ with assembler.
   <a href="http://www.woo.com">Mason Woo</a> and <a href="http://misha.brukman.net">Misha Brukman</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2011-06-16 01:28:14 +0200 (Thu, 16 Jun 2011) $
+  Last modified: $Date: 2012-03-01 16:14:19 +0100 (Thu, 01 Mar 2012) $
 </address>
 
 </body>
diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html
index adbd691b870b..5dc67ae27b8d 100644
--- a/docs/WritingAnLLVMPass.html
+++ b/docs/WritingAnLLVMPass.html
@@ -417,17 +417,17 @@ USAGE: opt [options] &lt;input bitcode&gt;
 OPTIONS:
   Optimizations available:
 ...
-    -funcresolve    - Resolve Functions
-    -gcse           - Global Common Subexpression Elimination
-    -globaldce      - Dead Global Elimination
-    <b>-hello          - Hello World Pass</b>
-    -indvars        - Canonicalize Induction Variables
-    -inline         - Function Integration/Inlining
-    -instcombine    - Combine redundant instructions
+    -globalopt                - Global Variable Optimizer
+    -globalsmodref-aa         - Simple mod/ref analysis for globals
+    -gvn                      - Global Value Numbering
+    <b>-hello                    - Hello World Pass</b>
+    -indvars                  - Induction Variable Simplification
+    -inline                   - Function Integration/Inlining
+    -insert-edge-profiling    - Insert instrumentation for edge profiling
 ...
 </pre></div>
 
-<p>The pass name get added as the information string for your pass, giving some
+<p>The pass name gets added as the information string for your pass, giving some
 documentation to users of <tt>opt</tt>.  Now that you have a working pass, you
 would go ahead and make it do the cool transformations you want.  Once you get
 it all working and tested, it may become useful to find out how fast your pass
@@ -545,7 +545,7 @@ following signature:</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> runOnModule(Module &amp;M) = 0;
+<b>virtual bool</b> runOnModule(Module &amp;M) = 0;
 </pre></div>
 
 <p>The <tt>runOnModule</tt> method performs the interesting work of the pass.
@@ -612,7 +612,7 @@ false if they didn't.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> doInitialization(CallGraph &amp;CG);
+<b>virtual bool</b> doInitialization(CallGraph &amp;CG);
 </pre></div>
 
 <p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
@@ -633,7 +633,7 @@ fast).</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> runOnSCC(CallGraphSCC &amp;SCC) = 0;
+<b>virtual bool</b> runOnSCC(CallGraphSCC &amp;SCC) = 0;
 </pre></div>
 
 <p>The <tt>runOnSCC</tt> method performs the interesting work of the pass, and
@@ -652,7 +652,7 @@ otherwise.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> doFinalization(CallGraph &amp;CG);
+<b>virtual bool</b> doFinalization(CallGraph &amp;CG);
 </pre></div>
 
 <p>The <tt>doFinalization</tt> method is an infrequently used method that is
@@ -704,7 +704,7 @@ should return true if they modified the program, or false if they didn't.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> doInitialization(Module &amp;M);
+<b>virtual bool</b> doInitialization(Module &amp;M);
 </pre></div>
 
 <p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
@@ -732,7 +732,7 @@ free functions that it needs, adding prototypes to the module if necessary.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> runOnFunction(Function &amp;F) = 0;
+<b>virtual bool</b> runOnFunction(Function &amp;F) = 0;
 </pre></div><p>
 
 <p>The <tt>runOnFunction</tt> method must be implemented by your subclass to do
@@ -751,7 +751,7 @@ be returned if the function is modified.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> doFinalization(Module &amp;M);
+<b>virtual bool</b> doFinalization(Module &amp;M);
 </pre></div>
 
 <p>The <tt>doFinalization</tt> method is an infrequently used method that is
@@ -790,7 +790,7 @@ program, or false if they didn't. </p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> doInitialization(Loop *, LPPassManager &amp;LPM);
+<b>virtual bool</b> doInitialization(Loop *, LPPassManager &amp;LPM);
 </pre></div>
 
 <p>The <tt>doInitialization</tt> method is designed to do simple initialization 
@@ -811,7 +811,7 @@ information.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> runOnLoop(Loop *, LPPassManager &amp;LPM) = 0;
+<b>virtual bool</b> runOnLoop(Loop *, LPPassManager &amp;LPM) = 0;
 </pre></div><p>
 
 <p>The <tt>runOnLoop</tt> method must be implemented by your subclass to do
@@ -829,7 +829,7 @@ should be used to update loop nest.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> doFinalization();
+<b>virtual bool</b> doFinalization();
 </pre></div>
 
 <p>The <tt>doFinalization</tt> method is an infrequently used method that is
@@ -869,7 +869,7 @@ methods should return true if they modified the program, or false if they didn n
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> doInitialization(Region *, RGPassManager &amp;RGM);
+<b>virtual bool</b> doInitialization(Region *, RGPassManager &amp;RGM);
 </pre></div>
 
 <p>The <tt>doInitialization</tt> method is designed to do simple initialization
@@ -890,7 +890,7 @@ information.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> runOnRegion(Region *, RGPassManager &amp;RGM) = 0;
+<b>virtual bool</b> runOnRegion(Region *, RGPassManager &amp;RGM) = 0;
 </pre></div><p>
 
 <p>The <tt>runOnRegion</tt> method must be implemented by your subclass to do
@@ -908,7 +908,7 @@ should be used to update region tree.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> doFinalization();
+<b>virtual bool</b> doFinalization();
 </pre></div>
 
 <p>The <tt>doFinalization</tt> method is an infrequently used method that is
@@ -957,7 +957,7 @@ href="#FunctionPass"><tt>FunctionPass</tt></a>'s have, but also have the followi
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> doInitialization(Function &amp;F);
+<b>virtual bool</b> doInitialization(Function &amp;F);
 </pre></div>
 
 <p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
@@ -978,7 +978,7 @@ fast).</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> runOnBasicBlock(BasicBlock &amp;BB) = 0;
+<b>virtual bool</b> runOnBasicBlock(BasicBlock &amp;BB) = 0;
 </pre></div>
 
 <p>Override this function to do the work of the <tt>BasicBlockPass</tt>.  This
@@ -998,7 +998,7 @@ if the basic block is modified.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> doFinalization(Function &amp;F);
+<b>virtual bool</b> doFinalization(Function &amp;F);
 </pre></div>
 
 <p>The <tt>doFinalization</tt> method is an infrequently used method that is
@@ -1051,7 +1051,7 @@ data)</li>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual bool</b> runOnMachineFunction(MachineFunction &amp;MF) = 0;
+<b>virtual bool</b> runOnMachineFunction(MachineFunction &amp;MF) = 0;
 </pre></div>
 
 <p><tt>runOnMachineFunction</tt> can be considered the main entry point of a
@@ -1104,7 +1104,7 @@ implement the virtual <tt>print</tt> method:</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual void</b> print(std::ostream &amp;O, <b>const</b> Module *M) <b>const</b>;
+<b>virtual void</b> print(std::ostream &amp;O, <b>const</b> Module *M) <b>const</b>;
 </pre></div>
 
 <p>The <tt>print</tt> method must be implemented by "analyses" in order to print
@@ -1154,7 +1154,7 @@ having any prerequisite passes, and invalidating <b>all</b> other passes.</p>
 <div>
 
 <div class="doc_code"><pre>
-  <b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;Info) <b>const</b>;
+<b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;Info) <b>const</b>;
 </pre></div>
 
 <p>By implementing the <tt>getAnalysisUsage</tt> method, the required and
@@ -1242,11 +1242,11 @@ the fact that it hacks on the CFG.
 <div>
 
 <div class="doc_code"><pre>
-  <i>// This example modifies the program, but does not modify the CFG</i>
-  <b>void</b> <a href="http://llvm.org/doxygen/structLICM.html">LICM</a>::getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
-    AU.setPreservesCFG();
-    AU.addRequired&lt;<a href="http://llvm.org/doxygen/classllvm_1_1LoopInfo.html">LoopInfo</a>&gt;();
-  }
+<i>// This example modifies the program, but does not modify the CFG</i>
+<b>void</b> <a href="http://llvm.org/doxygen/structLICM.html">LICM</a>::getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
+  AU.setPreservesCFG();
+  AU.addRequired&lt;<a href="http://llvm.org/doxygen/classllvm_1_1LoopInfo.html">LoopInfo</a>&gt;();
+}
 </pre></div>
 
 </div>
@@ -1268,10 +1268,10 @@ method.  It takes a single template argument that specifies which pass class you
 want, and returns a reference to that pass.  For example:</p>
 
 <div class="doc_code"><pre>
-   bool LICM::runOnFunction(Function &amp;F) {
-     LoopInfo &amp;LI = getAnalysis&lt;LoopInfo&gt;();
-     ...
-   }
+bool LICM::runOnFunction(Function &amp;F) {
+  LoopInfo &amp;LI = getAnalysis&lt;LoopInfo&gt;();
+  ...
+}
 </pre></div>
 
 <p>This method call returns a reference to the pass desired.  You may get a
@@ -1285,11 +1285,11 @@ A module level pass can use function level analysis info using this interface.
 For example:</p>
 
 <div class="doc_code"><pre>
-   bool ModuleLevelPass::runOnModule(Module &amp;M) {
-     ...
-     DominatorTree &amp;DT = getAnalysis&lt;DominatorTree&gt;(Func);
-     ...
-   }
+bool ModuleLevelPass::runOnModule(Module &amp;M) {
+  ...
+  DominatorTree &amp;DT = getAnalysis&lt;DominatorTree&gt;(Func);
+  ...
+}
 </pre></div>
 
 <p>In above example, runOnFunction for DominatorTree is called by pass manager
@@ -1302,11 +1302,11 @@ If your pass is capable of updating analyses if they exist (e.g.,
 if it is active.  For example:</p>
 
 <div class="doc_code"><pre>
-  ...
-  if (DominatorSet *DS = getAnalysisIfAvailable&lt;DominatorSet&gt;()) {
-    <i>// A DominatorSet is active.  This code will update it.</i>
-  }
-  ...
+...
+if (DominatorSet *DS = getAnalysisIfAvailable&lt;DominatorSet&gt;()) {
+  <i>// A DominatorSet is active.  This code will update it.</i>
+}
+...
 </pre></div>
 
 </div>
@@ -1405,7 +1405,7 @@ Unlike registration of passes, there is no command line argument to be specified
 for the Analysis Group Interface itself, because it is "abstract":</p>
 
 <div class="doc_code"><pre>
-  <b>static</b> RegisterAnalysisGroup&lt;<a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>&gt; A("<i>Alias Analysis</i>");
+<b>static</b> RegisterAnalysisGroup&lt;<a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>&gt; A("<i>Alias Analysis</i>");
 </pre></div>
 
 <p>Once the analysis is registered, passes can declare that they are valid
@@ -1416,10 +1416,9 @@ implementations of the interface by using the following code:</p>
   //<i> Declare that we implement the AliasAnalysis interface</i>
   INITIALIZE_AG_PASS(FancyAA, <a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>, "<i>somefancyaa</i>",
                      "<i>A more complex alias analysis implementation</i>",
-                     false, // <i>Is CFG Only?</i>
-                     true,  // <i>Is Analysis?</i>
-                     false, // <i>Is default Analysis Group implementation?</i>
-                    );
+                     false,  // <i>Is CFG Only?</i>
+                     true,   // <i>Is Analysis?</i>
+                     false); // <i>Is default Analysis Group implementation?</i>
 }
 </pre></div>
 
@@ -1436,8 +1435,7 @@ this macro.</p>
                      "<i>Basic Alias Analysis (default AA impl)</i>",
                      false, // <i>Is CFG Only?</i>
                      true,  // <i>Is Analysis?</i>
-                     true, // <i>Is default Analysis Group implementation?</i>
-                    );
+                     true); // <i>Is default Analysis Group implementation?</i>
 }
 </pre></div>
 
@@ -1606,10 +1604,10 @@ we need to add the following <a
 href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a> method to our pass:</p>
 
 <div class="doc_code"><pre>
-    <i>// We don't modify the program, so we preserve all analyses</i>
-    <b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
-      AU.setPreservesAll();
-    }
+<i>// We don't modify the program, so we preserve all analyses</i>
+<b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
+  AU.setPreservesAll();
+}
 </pre></div>
 
 <p>Now when we run our pass, we get this output:</p>
@@ -1717,19 +1715,19 @@ machine passes.  Here we will describe how to <i>register</i> a register
 allocator machine pass.</p>
 
 <p>Implement your register allocator machine pass.  In your register allocator
-.cpp file add the following include;</p>
+<tt>.cpp</tt> file add the following include;</p>
 
 <div class="doc_code"><pre>
-  #include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
 </pre></div>
 
 <p>Also in your register allocator .cpp file, define a creator function in the
 form; </p>
 
 <div class="doc_code"><pre>
-  FunctionPass *createMyRegisterAllocator() {
-    return new MyRegisterAllocator();
-  }
+FunctionPass *createMyRegisterAllocator() {
+  return new MyRegisterAllocator();
+}
 </pre></div>
 
 <p>Note that the signature of this function should match the type of
@@ -1737,9 +1735,9 @@ form; </p>
 "installing" declaration, in the form;</p>
 
 <div class="doc_code"><pre>
-  static RegisterRegAlloc myRegAlloc("myregalloc",
-    "  my register allocator help string",
-    createMyRegisterAllocator);
+static RegisterRegAlloc myRegAlloc("myregalloc",
+                                   "my register allocator help string",
+                                   createMyRegisterAllocator);
 </pre></div>
 
 <p>Note the two spaces prior to the help string produces a tidy result on the
@@ -1790,11 +1788,11 @@ MachinePassRegistry RegisterMyPasses::Registry;
 <p>And finally, declare the command line option for your passes.  Example:</p> 
 
 <div class="doc_code"><pre>
-  cl::opt&lt;RegisterMyPasses::FunctionPassCtor, false,
-          RegisterPassParser&lt;RegisterMyPasses&gt; &gt;
-  MyPassOpt("mypass",
-            cl::init(&amp;createDefaultMyPass),
-            cl::desc("my pass option help")); 
+cl::opt&lt;RegisterMyPasses::FunctionPassCtor, false,
+        RegisterPassParser&lt;RegisterMyPasses&gt; &gt;
+MyPassOpt("mypass",
+          cl::init(&amp;createDefaultMyPass),
+          cl::desc("my pass option help")); 
 </pre></div>
 
 <p>Here the command option is "mypass", with createDefaultMyPass as the default
@@ -1949,7 +1947,7 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-10-11 09:03:52 +0200 (Tue, 11 Oct 2011) $
+  Last modified: $Date: 2012-04-08 13:52:52 +0200 (Sun, 08 Apr 2012) $
 </address>
 
 </body>
diff --git a/docs/doxygen.cfg.in b/docs/doxygen.cfg.in
index bc4ab9ff7513..20de0773f403 100644
--- a/docs/doxygen.cfg.in
+++ b/docs/doxygen.cfg.in
@@ -47,7 +47,7 @@ OUTPUT_DIRECTORY       = @abs_top_builddir@/docs/doxygen
 # source files, where putting all generated files in the same directory would
 # otherwise cause performance problems for the file system.
 
-CREATE_SUBDIRS         = YES
+CREATE_SUBDIRS         = NO
 
 # The OUTPUT_LANGUAGE tag is used to specify the language in which all
 # documentation generated by doxygen is written. Doxygen will use this
diff --git a/docs/doxygen.footer b/docs/doxygen.footer
index 15585b8da733..c492e7df6cba 100644
--- a/docs/doxygen.footer
+++ b/docs/doxygen.footer
@@ -3,7 +3,7 @@
 Generated on $datetime for <a href="http://llvm.org/">$projectname</a> by
 <a href="http://www.doxygen.org"><img src="doxygen.png" alt="Doxygen"
 align="middle" border="0"/>$doxygenversion</a><br>
-Copyright &copy; 2003-2009 University of Illinois at Urbana-Champaign.
+Copyright &copy; 2003-2012 University of Illinois at Urbana-Champaign.
 All Rights Reserved.</p>
 
 <hr>
diff --git a/docs/doxygen.header b/docs/doxygen.header
index a520434d6c25..56fb77fafdd5 100644
--- a/docs/doxygen.header
+++ b/docs/doxygen.header
@@ -2,7 +2,7 @@
 <html><head>
 <meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"/>
 <meta name="keywords" content="LLVM,Low Level Virtual Machine,C++,doxygen,API,documentation"/>
-<meta name="description" content="C++ source code API documentation for the Low Level Virtual Machine (LLVM)."/>
+<meta name="description" content="C++ source code API documentation for LLVM."/>
 <title>LLVM: $title</title>
 <link href="doxygen.css" rel="stylesheet" type="text/css"/>
 </head><body>
diff --git a/docs/doxygen.intro b/docs/doxygen.intro
index 547730cba79c..699dadc27e85 100644
--- a/docs/doxygen.intro
+++ b/docs/doxygen.intro
@@ -1,7 +1,7 @@
-/// @mainpage Low Level Virtual Machine
+/// @mainpage LLVM
 ///
 /// @section main_intro Introduction
-/// Welcome to the Low Level Virtual Machine (LLVM).
+/// Welcome to LLVM.
 ///
 /// This documentation describes the @b internal software that makes 
 /// up LLVM, not the @b external use of  LLVM. There are no instructions
diff --git a/docs/index.html b/docs/index.html
index b17ca0388bed..edd476d02133 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -19,8 +19,9 @@ your documentation.</p>
   <li><a href="#llvmdesign">LLVM Design</a></li>
   <li><a href="/pubs/">LLVM Publications</a></li>
   <li><a href="#userguide">LLVM User Guides</a></li>
-  <li><a href="#llvmprog">General LLVM Programming Documentation</a></li>
+  <li><a href="#llvmprog">LLVM Programming Documentation</a></li>
   <li><a href="#subsystems">LLVM Subsystem Documentation</a></li>
+  <li><a href="#develprocess">LLVM Development Process Documentation</a></li>
   <li><a href="#maillist">LLVM Mailing Lists</a></li>
 </ul>
 </td><td class="right">
@@ -79,25 +80,7 @@ LLVM for a custom language, and the facilities LLVM offers in tutorial form.</li
 policy towards developers and their contributions.</li>
 
 <li><a href="CommandGuide/index.html">LLVM Command Guide</a> - A reference
-manual for the LLVM command line utilities ("man" pages for LLVM tools).<br>
-Current tools:
- <a href="/cmds/llvm-ar.html">llvm-ar</a>,
- <a href="/cmds/llvm-as.html">llvm-as</a>,
- <a href="/cmds/llvm-dis.html">llvm-dis</a>,
- <a href="/cmds/llvm-extract.html">llvm-extract</a>,
- <a href="/cmds/llvm-ld.html">llvm-ld</a>,
- <a href="/cmds/llvm-link.html">llvm-link</a>,
- <a href="/cmds/llvm-nm.html">llvm-nm</a>,
- <a href="/cmds/llvm-prof.html">llvm-prof</a>,
- <a href="/cmds/llvm-ranlib.html">llvm-ranlib</a>,
- <a href="/cmds/opt.html">opt</a>,
- <a href="/cmds/llc.html">llc</a>,
- <a href="/cmds/lli.html">lli</a>,
- <a href="/cmds/llvmgcc.html">llvm-gcc</a>,
- <a href="/cmds/llvmgxx.html">llvm-g++</a>,
- <a href="/cmds/bugpoint.html">bugpoint</a>,
- <a href="/cmds/llvm-bcanalyzer.html">llvm-bcanalyzer</a>,
-</li>
+manual for the LLVM command line utilities ("man" pages for LLVM tools).</li>
 
 <li><a href="Passes.html">LLVM's Analysis and Transform Passes</a> - A list of
 optimizations and analyses implemented in LLVM.</li>
@@ -115,8 +98,9 @@ the LLVM system.</li>
 <li><a href="TestingGuide.html">LLVM Testing Infrastructure Guide</a> - A reference
 manual for using the LLVM testing infrastructure.</li>
 
-<li><a href="GCCFEBuildInstrs.html">How to build the Ada/C/C++/Fortran front-ends</a> -
-Instructions for building gcc front-ends from source.</li>
+<li><a href="http://clang.llvm.org/get_started.html">How to build the C, C++, ObjC,
+and ObjC++ front end</a> - Instructions for building the clang front-end from
+source.</li>
 
 <li><a href="Packaging.html">Packaging guide</a> - Advice on packaging
 LLVM into a distribution.</li>
@@ -129,11 +113,15 @@ channel</a>.  We often are on irc.oftc.net in the #llvm channel.  If you are
 using the mozilla browser, and have chatzilla installed, you can <a
 href="irc://irc.oftc.net/llvm">join #llvm on irc.oftc.net</a> directly.</li>
 
+<li><a href="HowToAddABuilder.html">How To Add Your Build Configuration 
+To LLVM Buildbot Infrastructure</a> - Instructions for adding new builder to
+LLVM buildbot master.</li>
+
 </ul>
 
 
 <!--=======================================================================-->
-<h2><a name="llvmprog">General LLVM Programming Documentation</a></h2>
+<h2><a name="llvmprog">LLVM Programming Documentation</a></h2>
 <!--=======================================================================-->
 
 <ul>
@@ -144,15 +132,6 @@ intermediate representation and the assembly form of the different nodes.</li>
 Introduction to the general layout of the LLVM sourcebase, important classes
 and APIs, and some tips &amp; tricks.</li>
 
-<li><a href="Projects.html">LLVM Project Guide</a> - How-to guide and
-templates for new projects that <em>use</em> the LLVM infrastructure.  The
-templates (directory organization, Makefiles, and test tree) allow the project
-code to be located outside (or inside) the <tt>llvm/</tt> tree, while using LLVM
-header files and libraries.</li>
-
-<li><a href="MakefileGuide.html">LLVM Makefile Guide</a> - Describes how the
-LLVM makefiles work and how to use them.</li>
-
 <li><a href="CommandLine.html">CommandLine library Reference Manual</a> -
 Provides information on using the command line parsing library.</li>
 
@@ -163,13 +142,6 @@ efficient C++ code.</li>
 <li><a href="ExtendingLLVM.html">Extending LLVM</a> - Look here to see how
 to add instructions and intrinsics to LLVM.</li>
 
-<li><a href="UsingLibraries.html">Using LLVM Libraries</a> - Look here to
-understand how to use the libraries produced when LLVM is compiled.</li>
-
-<li><a href="HowToReleaseLLVM.html">How To Release LLVM To The Public</a> - This
-is a guide to preparing LLVM releases. Most developers can ignore it.</li>
-
-
 <li><a href="http://llvm.org/doxygen/">Doxygen generated
 documentation</a> (<a
 href="http://llvm.org/doxygen/inherits.html">classes</a>)
@@ -243,6 +215,29 @@ information about Branch Prediction Information.</li>
 
 </ul>
 
+<!--=======================================================================-->
+<h2><a name="develprocess">LLVM Development Process Documentation</a></h2>
+<!--=======================================================================-->
+
+<ul>
+
+<li><a href="Projects.html">LLVM Project Guide</a> - How-to guide and
+templates for new projects that <em>use</em> the LLVM infrastructure.  The
+templates (directory organization, Makefiles, and test tree) allow the project
+code to be located outside (or inside) the <tt>llvm/</tt> tree, while using LLVM
+header files and libraries.</li>
+
+<li><a href="LLVMBuild.html">LLVMBuild Documentation</a> - Describes the
+LLVMBuild organization and files used by LLVM to specify component
+descriptions.</li>
+
+<li><a href="MakefileGuide.html">LLVM Makefile Guide</a> - Describes how the
+LLVM makefiles work and how to use them.</li>
+
+<li><a href="HowToReleaseLLVM.html">How To Release LLVM To The Public</a> - This
+is a guide to preparing LLVM releases. Most developers can ignore it.</li>
+
+</ul>
 
 <!--=======================================================================-->
 <h2><a name="maillist">LLVM Mailing Lists</a></h2>
@@ -286,7 +281,6 @@ times each day, making it a high volume list.</li>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-11-03 07:43:23 +0100 (Thu, 03 Nov 2011) $
+  Last modified: $Date: 2012-02-26 23:26:37 +0100 (Sun, 26 Feb 2012) $
 </address>
 </body></html>
-
diff --git a/docs/tutorial/LangImpl2.html b/docs/tutorial/LangImpl2.html
index 2696d86d0a8d..e4707b3c11dc 100644
--- a/docs/tutorial/LangImpl2.html
+++ b/docs/tutorial/LangImpl2.html
@@ -1225,7 +1225,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-10-16 10:07:38 +0200 (Sun, 16 Oct 2011) $
+  Last modified: $Date: 2011-10-16 10:06:54 +0200 (Sun, 16 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html
index c9517a0b7cbd..9647b439f0bc 100644
--- a/docs/tutorial/LangImpl3.html
+++ b/docs/tutorial/LangImpl3.html
@@ -1262,7 +1262,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-10-16 10:07:38 +0200 (Sun, 16 Oct 2011) $
+  Last modified: $Date: 2011-10-16 10:06:54 +0200 (Sun, 16 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html
index e910cc1fbdda..06a8a13cedb4 100644
--- a/docs/tutorial/LangImpl4.html
+++ b/docs/tutorial/LangImpl4.html
@@ -1147,7 +1147,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-10-16 10:07:38 +0200 (Sun, 16 Oct 2011) $
+  Last modified: $Date: 2011-10-16 10:06:54 +0200 (Sun, 16 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html
index 95144dcc4167..0164ca3b98d2 100644
--- a/docs/tutorial/LangImpl5.html
+++ b/docs/tutorial/LangImpl5.html
@@ -1766,7 +1766,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-10-16 10:07:38 +0200 (Sun, 16 Oct 2011) $
+  Last modified: $Date: 2011-10-16 10:06:54 +0200 (Sun, 16 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html
index 8876e8317983..4fcf10924363 100644
--- a/docs/tutorial/LangImpl6.html
+++ b/docs/tutorial/LangImpl6.html
@@ -1823,7 +1823,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-10-16 10:07:38 +0200 (Sun, 16 Oct 2011) $
+  Last modified: $Date: 2011-10-16 10:06:54 +0200 (Sun, 16 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html
index 939987b2df60..ebf6514ae398 100644
--- a/docs/tutorial/LangImpl7.html
+++ b/docs/tutorial/LangImpl7.html
@@ -2158,7 +2158,7 @@ int main() {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-10-16 10:07:38 +0200 (Sun, 16 Oct 2011) $
+  Last modified: $Date: 2011-10-16 10:06:54 +0200 (Sun, 16 Oct 2011) $
 </address>
 </body>
 </html>
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp
index df6687f2805e..b002d1f496d2 100644
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@@ -134,7 +134,8 @@ void BrainF::header(LLVMContext& C) {
   {
     //@aberrormsg = internal constant [%d x i8] c"\00"
     Constant *msg_0 =
-      ConstantArray::get(C, "Error: The head has left the tape.", true);
+      ConstantDataArray::getString(C, "Error: The head has left the tape.",
+                                   true);
 
     GlobalVariable *aberrormsg = new GlobalVariable(
       *module,
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 20516a783b9d..cf078bb3f543 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -63,11 +63,6 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/TargetSelect.h"
 
-#ifdef OLD_EXC_SYSTEM
-// See use of UpgradeExceptionHandling(...) below                        
-#include "llvm/AutoUpgrade.h"
-#endif
-
 // FIXME: Although all systems tested with (Linux, OS X), do not need this 
 //        header file included. A user on ubuntu reported, undefined symbols 
 //        for stderr, and fprintf, and the addition of this include fixed the
@@ -188,9 +183,7 @@ static std::vector<std::string> ourTypeInfoNames;
 static std::map<int, std::string> ourTypeInfoNamesIndex;
 
 static llvm::StructType *ourTypeInfoType;
-#ifndef OLD_EXC_SYSTEM
 static llvm::StructType *ourCaughtResultType;
-#endif
 static llvm::StructType *ourExceptionType;
 static llvm::StructType *ourUnwindExceptionType;
 
@@ -885,7 +878,7 @@ void generateStringPrint(llvm::LLVMContext &context,
   
   llvm::Value *stringVar;
   llvm::Constant *stringConstant = 
-  llvm::ConstantArray::get(context, toPrint);
+  llvm::ConstantDataArray::getString(context, toPrint);
   
   if (useGlobal) {
     // Note: Does not work without allocation
@@ -927,7 +920,8 @@ void generateIntegerPrint(llvm::LLVMContext &context,
                           llvm::Value &toPrint,
                           std::string format, 
                           bool useGlobal = true) {
-  llvm::Constant *stringConstant = llvm::ConstantArray::get(context, format);
+  llvm::Constant *stringConstant =
+    llvm::ConstantDataArray::getString(context, format);
   llvm::Value *stringVar;
   
   if (useGlobal) {
@@ -969,9 +963,7 @@ void generateIntegerPrint(llvm::LLVMContext &context,
 /// @param unwindResumeBlock unwind resume block
 /// @param exceptionCaughtFlag reference exception caught/thrown status storage
 /// @param exceptionStorage reference to exception pointer storage
-#ifndef OLD_EXC_SYSTEM
 /// @param caughtResultStorage reference to landingpad result storage
-#endif
 /// @returns newly created block
 static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context, 
                                             llvm::Module &module, 
@@ -982,23 +974,17 @@ static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
                                             llvm::BasicBlock &terminatorBlock,
                                             llvm::BasicBlock &unwindResumeBlock,
                                             llvm::Value **exceptionCaughtFlag,
-                                            llvm::Value **exceptionStorage
-#ifndef OLD_EXC_SYSTEM
-                                            ,llvm::Value **caughtResultStorage
-#endif
-                                            ) {
+                                            llvm::Value **exceptionStorage,
+                                            llvm::Value **caughtResultStorage) {
   assert(exceptionCaughtFlag && 
          "ExceptionDemo::createFinallyBlock(...):exceptionCaughtFlag "
          "is NULL");
   assert(exceptionStorage && 
          "ExceptionDemo::createFinallyBlock(...):exceptionStorage "
          "is NULL");
-
-#ifndef OLD_EXC_SYSTEM
   assert(caughtResultStorage && 
          "ExceptionDemo::createFinallyBlock(...):caughtResultStorage "
          "is NULL");
-#endif
   
   *exceptionCaughtFlag = createEntryBlockAlloca(toAddTo,
                                          "exceptionCaught",
@@ -1011,13 +997,11 @@ static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
                                              exceptionStorageType,
                                              llvm::ConstantPointerNull::get(
                                                exceptionStorageType));
-#ifndef OLD_EXC_SYSTEM
   *caughtResultStorage = createEntryBlockAlloca(toAddTo,
                                               "caughtResultStorage",
                                               ourCaughtResultType,
                                               llvm::ConstantAggregateZero::get(
                                                 ourCaughtResultType));
-#endif
   
   llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
                                                    blockName,
@@ -1171,9 +1155,7 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
   std::vector<llvm::BasicBlock*> catchBlocks(numExceptionsToCatch);
   llvm::Value *exceptionCaughtFlag = NULL;
   llvm::Value *exceptionStorage = NULL;
-#ifndef OLD_EXC_SYSTEM
   llvm::Value *caughtResultStorage = NULL;
-#endif
   
   // Finally block which will branch to unwindResumeBlock if 
   // exception is not caught. Initializes/allocates stack locations.
@@ -1186,10 +1168,8 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
                                                       *endBlock,
                                                       *unwindResumeBlock,
                                                       &exceptionCaughtFlag,
-                                                      &exceptionStorage
-#ifndef OLD_EXC_SYSTEM
-                                                      ,&caughtResultStorage
-#endif
+                                                      &exceptionStorage,
+                                                      &caughtResultStorage
                                                       );
   
   for (unsigned i = 0; i < numExceptionsToCatch; ++i) {
@@ -1250,15 +1230,7 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
   
   builder.SetInsertPoint(unwindResumeBlock);
   
-  
-#ifndef OLD_EXC_SYSTEM
   builder.CreateResume(builder.CreateLoad(caughtResultStorage));
-#else
-  llvm::Function *resumeOurException = module.getFunction("_Unwind_Resume");
-  builder.CreateCall(resumeOurException, 
-                     builder.CreateLoad(exceptionStorage));
-  builder.CreateUnreachable();
-#endif
   
   // Exception Block
   
@@ -1266,7 +1238,6 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
   
   llvm::Function *personality = module.getFunction("ourPersonality");
   
-#ifndef OLD_EXC_SYSTEM
   llvm::LandingPadInst *caughtResult = 
     builder.CreateLandingPad(ourCaughtResultType,
                              personality,
@@ -1290,42 +1261,6 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
   builder.CreateStore(caughtResult, caughtResultStorage);
   builder.CreateStore(unwindException, exceptionStorage);
   builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag);
-#else
-  llvm::Function *ehException = module.getFunction("llvm.eh.exception");
-
-  // Retrieve thrown exception
-  llvm::Value *unwindException = builder.CreateCall(ehException);
-  
-  // Store exception and flag
-  builder.CreateStore(unwindException, exceptionStorage);
-  builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag);
-  llvm::Value *functPtr = builder.CreatePointerCast(personality, 
-                                                    builder.getInt8PtrTy());
-  
-  args.clear();
-  args.push_back(unwindException);
-  args.push_back(functPtr);
-  
-  // Note: Skipping index 0
-  for (unsigned i = 0; i < numExceptionsToCatch; ++i) {
-    // Set up type infos to be caught
-    args.push_back(module.getGlobalVariable(
-                                  ourTypeInfoNames[exceptionTypesToCatch[i]]));
-  }
-  
-  args.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), 0));
-  
-  llvm::Function *ehSelector = module.getFunction("llvm.eh.selector");
-  
-  // Set up this exeption block as the landing pad which will handle
-  // given type infos. See case Intrinsic::eh_selector in 
-  // SelectionDAGBuilder::visitIntrinsicCall(...) and AddCatchInfo(...)
-  // implemented in FunctionLoweringInfo.cpp to see how the implementation
-  // handles this call. This landing pad (this exception block), will be 
-  // called either because it nees to cleanup (call finally) or a type 
-  // info was found which matched the thrown exception.
-  llvm::Value *retTypeInfoIndex = builder.CreateCall(ehSelector, args);
-#endif
   
   // Retrieve exception_class member from thrown exception 
   // (_Unwind_Exception instance). This member tells us whether or not
@@ -1404,12 +1339,6 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
                                 catchBlocks[nextTypeToCatch]);
   }
 
-#ifdef OLD_EXC_SYSTEM
-  // Must be run before verifier                                                
-  UpgradeExceptionHandling(&module);
-#endif
-
-  
   llvm::verifyFunction(*ret);
   fpm.run(*ret);
   
@@ -1709,8 +1638,6 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   ourTypeInfoType = llvm::StructType::get(context, 
                                           TypeArray(builder.getInt32Ty()));
 
-#ifndef OLD_EXC_SYSTEM
-
   llvm::Type *caughtResultFieldTypes[] = {
     builder.getInt8PtrTy(),
     builder.getInt32Ty()
@@ -1720,8 +1647,6 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   ourCaughtResultType = llvm::StructType::get(context,
                                             TypeArray(caughtResultFieldTypes));
 
-#endif
-
   // Create OurException type
   ourExceptionType = llvm::StructType::get(context, 
                                            TypeArray(ourTypeInfoType));
@@ -1965,14 +1890,6 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
                  true, 
                  false);
   
-  // llvm.eh.selector intrinsic
-  
-  getDeclaration(&module, llvm::Intrinsic::eh_selector);
-  
-  // llvm.eh.exception intrinsic
-  
-  getDeclaration(&module, llvm::Intrinsic::eh_exception);
-  
   // llvm.eh.typeid.for intrinsic
   
   getDeclaration(&module, llvm::Intrinsic::eh_typeid_for);
@@ -2005,7 +1922,8 @@ int main(int argc, char *argv[]) {
   }
   
   // If not set, exception handling will not be turned on
-  llvm::JITExceptionHandling = true;
+  llvm::TargetOptions Opts;
+  Opts.JITExceptionHandling = true;
   
   llvm::InitializeNativeTarget();
   llvm::LLVMContext &context = llvm::getGlobalContext();
@@ -2018,6 +1936,7 @@ int main(int argc, char *argv[]) {
   llvm::EngineBuilder factory(module);
   factory.setEngineKind(llvm::EngineKind::JIT);
   factory.setAllocateGVsWithCode(false);
+  factory.setTargetOptions(Opts);
   llvm::ExecutionEngine *executionEngine = factory.create();
   
   {
diff --git a/examples/LLVMBuild.txt b/examples/LLVMBuild.txt
new file mode 100644
index 000000000000..4d06ffb995f2
--- /dev/null
+++ b/examples/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./examples/LLVMBuild.txt ---------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Group
+name = Examples
+parent = $ROOT
diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp
index 3e483275edbb..305cf1dde06f 100644
--- a/examples/ParallelJIT/ParallelJIT.cpp
+++ b/examples/ParallelJIT/ParallelJIT.cpp
@@ -209,7 +209,8 @@ private:
     waitFor = 0;
 
     int result = pthread_cond_broadcast( &condition );
-    assert(result == 0); result=result;
+    (void)result;
+    assert(result == 0);
   }
 
   size_t n;
diff --git a/include/llvm-c/Analysis.h b/include/llvm-c/Analysis.h
index e1e44872b162..f0bdddc50ab7 100644
--- a/include/llvm-c/Analysis.h
+++ b/include/llvm-c/Analysis.h
@@ -25,6 +25,12 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCAnalysis Analysis
+ * @ingroup LLVMC
+ *
+ * @{
+ */
 
 typedef enum {
   LLVMAbortProcessAction, /* verifier will print to stderr and abort() */
@@ -48,6 +54,10 @@ LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action);
 void LLVMViewFunctionCFG(LLVMValueRef Fn);
 void LLVMViewFunctionCFGOnly(LLVMValueRef Fn);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/BitReader.h b/include/llvm-c/BitReader.h
index 6db66074b31a..522803518398 100644
--- a/include/llvm-c/BitReader.h
+++ b/include/llvm-c/BitReader.h
@@ -25,6 +25,12 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCBitReader Bit Reader
+ * @ingroup LLVMC
+ *
+ * @{
+ */
 
 /* Builds a module from the bitcode in the specified memory buffer, returning a
    reference to the module via the OutModule parameter. Returns 0 on success.
@@ -59,6 +65,10 @@ LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
                                       LLVMModuleProviderRef *OutMP,
                                       char **OutMessage);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/BitWriter.h b/include/llvm-c/BitWriter.h
index bcbfb111492a..ba5a6778c942 100644
--- a/include/llvm-c/BitWriter.h
+++ b/include/llvm-c/BitWriter.h
@@ -25,6 +25,12 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCBitWriter Bit Writer
+ * @ingroup LLVMC
+ *
+ * @{
+ */
 
 /*===-- Operations on modules ---------------------------------------------===*/
 
@@ -39,6 +45,10 @@ int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
     descriptor. Returns 0 on success. Closes the Handle. */ 
 int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int Handle);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index d23b91c4e0de..77746069a25c 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -10,24 +10,6 @@
 |* This header declares the C interface to libLLVMCore.a, which implements    *|
 |* the LLVM intermediate representation.                                      *|
 |*                                                                            *|
-|* LLVM uses a polymorphic type hierarchy which C cannot represent, therefore *|
-|* parameters must be passed as base types. Despite the declared types, most  *|
-|* of the functions provided operate only on branches of the type hierarchy.  *|
-|* The declared parameter names are descriptive and specify which type is     *|
-|* required. Additionally, each type hierarchy is documented along with the   *|
-|* functions that operate upon it. For more detail, refer to LLVM's C++ code. *|
-|* If in doubt, refer to Core.cpp, which performs paramter downcasts in the   *|
-|* form unwrap<RequiredType>(Param).                                          *|
-|*                                                                            *|
-|* Many exotic languages can interoperate with C code but have a harder time  *|
-|* with C++ due to name mangling. So in addition to C, this interface enables *|
-|* tools written in such languages.                                           *|
-|*                                                                            *|
-|* When included into a C++ source file, also declares 'wrap' and 'unwrap'    *|
-|* helpers to perform opaque reference<-->pointer conversions. These helpers  *|
-|* are shorter and more tightly typed than writing the casts by hand when     *|
-|* authoring bindings. In assert builds, they will do runtime type checking.  *|
-|*                                                                            *|
 \*===----------------------------------------------------------------------===*/
 
 #ifndef LLVM_C_CORE_H
@@ -46,50 +28,121 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMC LLVM-C: C interface to LLVM
+ *
+ * This module exposes parts of the LLVM library as a C API.
+ *
+ * @{
+ */
+
+/**
+ * @defgroup LLVMCTransforms Transforms
+ */
+
+/**
+ * @defgroup LLVMCCore Core
+ *
+ * This modules provide an interface to libLLVMCore, which implements
+ * the LLVM intermediate representation as well as other related types
+ * and utilities.
+ *
+ * LLVM uses a polymorphic type hierarchy which C cannot represent, therefore
+ * parameters must be passed as base types. Despite the declared types, most
+ * of the functions provided operate only on branches of the type hierarchy.
+ * The declared parameter names are descriptive and specify which type is
+ * required. Additionally, each type hierarchy is documented along with the
+ * functions that operate upon it. For more detail, refer to LLVM's C++ code.
+ * If in doubt, refer to Core.cpp, which performs paramter downcasts in the
+ * form unwrap<RequiredType>(Param).
+ *
+ * Many exotic languages can interoperate with C code but have a harder time
+ * with C++ due to name mangling. So in addition to C, this interface enables
+ * tools written in such languages.
+ *
+ * When included into a C++ source file, also declares 'wrap' and 'unwrap'
+ * helpers to perform opaque reference<-->pointer conversions. These helpers
+ * are shorter and more tightly typed than writing the casts by hand when
+ * authoring bindings. In assert builds, they will do runtime type checking.
+ *
+ * @{
+ */
+
+/**
+ * @defgroup LLVMCCoreTypes Types and Enumerations
+ *
+ * @{
+ */
 
 typedef int LLVMBool;
 
 /* Opaque types. */
 
 /**
- * The top-level container for all LLVM global data.  See the LLVMContext class.
+ * The top-level container for all LLVM global data. See the LLVMContext class.
  */
 typedef struct LLVMOpaqueContext *LLVMContextRef;
 
 /**
  * The top-level container for all other LLVM Intermediate Representation (IR)
- * objects. See the llvm::Module class.
+ * objects.
+ *
+ * @see llvm::Module
  */
 typedef struct LLVMOpaqueModule *LLVMModuleRef;
 
 /**
- * Each value in the LLVM IR has a type, an LLVMTypeRef. See the llvm::Type
- * class.
+ * Each value in the LLVM IR has a type, an LLVMTypeRef.
+ *
+ * @see llvm::Type
  */
 typedef struct LLVMOpaqueType *LLVMTypeRef;
 
+/**
+ * Represents an individual value in LLVM IR.
+ *
+ * This models llvm::Value.
+ */
 typedef struct LLVMOpaqueValue *LLVMValueRef;
+
+/**
+ * Represents a basic block of instruction in LLVM IR.
+ *
+ * This models llvm::BasicBlock.
+ */
 typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
+
+/**
+ * Represents an LLVM basic block builder.
+ *
+ * This models llvm::IRBuilder.
+ */
 typedef struct LLVMOpaqueBuilder *LLVMBuilderRef;
 
-/* Interface used to provide a module to JIT or interpreter.  This is now just a
- * synonym for llvm::Module, but we have to keep using the different type to
- * keep binary compatibility.
+/**
+ * Interface used to provide a module to JIT or interpreter.
+ * This is now just a synonym for llvm::Module, but we have to keep using the
+ * different type to keep binary compatibility.
  */
 typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef;
 
-/* Used to provide a module to JIT or interpreter.
- * See the llvm::MemoryBuffer class.
+/**
+ * Used to provide a module to JIT or interpreter.
+ *
+ * @see llvm::MemoryBuffer
  */
 typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef;
 
-/** See the llvm::PassManagerBase class. */
+/** @see llvm::PassManagerBase */
 typedef struct LLVMOpaquePassManager *LLVMPassManagerRef;
 
-/** See the llvm::PassRegistry class. */
+/** @see llvm::PassRegistry */
 typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef;
 
-/** Used to get the users and usees of a Value. See the llvm::Use class. */
+/**
+ * Used to get the users and usees of a Value.
+ *
+ * @see llvm::Use */
 typedef struct LLVMOpaqueUse *LLVMUseRef;
 
 typedef enum {
@@ -119,6 +172,11 @@ typedef enum {
     LLVMReturnsTwice = 1 << 29,
     LLVMUWTable = 1 << 30,
     LLVMNonLazyBind = 1 << 31
+
+    // FIXME: This attribute is currently not included in the C API as
+    // a temporary measure until the API/ABI impact to the C API is understood
+    // and the path forward agreed upon.
+    //LLVMAddressSafety = 1ULL << 32
 } LLVMAttribute;
 
 typedef enum {
@@ -195,14 +253,13 @@ typedef enum {
 
   /* Exception Handling Operators */
   LLVMResume         = 58,
-  LLVMLandingPad     = 59,
-  LLVMUnwind         = 60
-
+  LLVMLandingPad     = 59
 
 } LLVMOpcode;
 
 typedef enum {
   LLVMVoidTypeKind,        /**< type with no size */
+  LLVMHalfTypeKind,        /**< 16 bit floating point type */
   LLVMFloatTypeKind,       /**< 32 bit floating point type */
   LLVMDoubleTypeKind,      /**< 64 bit floating point type */
   LLVMX86_FP80TypeKind,    /**< 80 bit floating point type (X87) */
@@ -294,6 +351,10 @@ typedef enum {
   LLVMLandingPadFilter    /**< A filter clause  */
 } LLVMLandingPadClauseTy;
 
+/**
+ * @}
+ */
+
 void LLVMInitializeCore(LLVMPassRegistryRef R);
 
 
@@ -302,49 +363,233 @@ void LLVMInitializeCore(LLVMPassRegistryRef R);
 void LLVMDisposeMessage(char *Message);
 
 
-/*===-- Contexts ----------------------------------------------------------===*/
+/**
+ * @defgroup LLVMCCoreContext Contexts
+ *
+ * Contexts are execution states for the core LLVM IR system.
+ *
+ * Most types are tied to a context instance. Multiple contexts can
+ * exist simultaneously. A single context is not thread safe. However,
+ * different contexts can execute on different threads simultaneously.
+ *
+ * @{
+ */
 
-/* Create and destroy contexts. */
+/**
+ * Create a new context.
+ *
+ * Every call to this function should be paired with a call to
+ * LLVMContextDispose() or the context will leak memory.
+ */
 LLVMContextRef LLVMContextCreate(void);
+
+/**
+ * Obtain the global context instance.
+ */
 LLVMContextRef LLVMGetGlobalContext(void);
+
+/**
+ * Destroy a context instance.
+ *
+ * This should be called for every call to LLVMContextCreate() or memory
+ * will be leaked.
+ */
 void LLVMContextDispose(LLVMContextRef C);
 
 unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
                                   unsigned SLen);
 unsigned LLVMGetMDKindID(const char* Name, unsigned SLen);
 
-/*===-- Modules -----------------------------------------------------------===*/
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreModule Modules
+ *
+ * Modules represent the top-level structure in a LLVM program. An LLVM
+ * module is effectively a translation unit or a collection of
+ * translation units merged together.
+ *
+ * @{
+ */
 
-/* Create and destroy modules. */ 
-/** See llvm::Module::Module. */
+/**
+ * Create a new, empty module in the global context.
+ *
+ * This is equivalent to calling LLVMModuleCreateWithNameInContext with
+ * LLVMGetGlobalContext() as the context parameter.
+ *
+ * Every invocation should be paired with LLVMDisposeModule() or memory
+ * will be leaked.
+ */
 LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID);
+
+/**
+ * Create a new, empty module in a specific context.
+ *
+ * Every invocation should be paired with LLVMDisposeModule() or memory
+ * will be leaked.
+ */
 LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID,
                                                 LLVMContextRef C);
 
-/** See llvm::Module::~Module. */
+/**
+ * Destroy a module instance.
+ *
+ * This must be called for every created module or memory will be
+ * leaked.
+ */
 void LLVMDisposeModule(LLVMModuleRef M);
 
-/** Data layout. See Module::getDataLayout. */
+/**
+ * Obtain the data layout for a module.
+ *
+ * @see Module::getDataLayout()
+ */
 const char *LLVMGetDataLayout(LLVMModuleRef M);
+
+/**
+ * Set the data layout for a module.
+ *
+ * @see Module::setDataLayout()
+ */
 void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple);
 
-/** Target triple. See Module::getTargetTriple. */
+/**
+ * Obtain the target triple for a module.
+ *
+ * @see Module::getTargetTriple()
+ */
 const char *LLVMGetTarget(LLVMModuleRef M);
+
+/**
+ * Set the target triple for a module.
+ *
+ * @see Module::setTargetTriple()
+ */
 void LLVMSetTarget(LLVMModuleRef M, const char *Triple);
 
-/** See Module::dump. */
+/**
+ * Dump a representation of a module to stderr.
+ *
+ * @see Module::dump()
+ */
 void LLVMDumpModule(LLVMModuleRef M);
 
-/** See Module::setModuleInlineAsm. */
+/**
+ * Set inline assembly for a module.
+ *
+ * @see Module::setModuleInlineAsm()
+ */
 void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm);
 
-/** See Module::getContext. */
+/**
+ * Obtain the context to which this module is associated.
+ *
+ * @see Module::getContext()
+ */
 LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M);
 
-/*===-- Types -------------------------------------------------------------===*/
+/**
+ * Obtain a Type from a module by its registered name.
+ */
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
+
+/**
+ * Obtain the number of operands for named metadata in a module.
+ *
+ * @see llvm::Module::getNamedMetadata()
+ */
+unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name);
+
+/**
+ * Obtain the named metadata operands for a module.
+ *
+ * The passed LLVMValueRef pointer should refer to an array of
+ * LLVMValueRef at least LLVMGetNamedMetadataNumOperands long. This
+ * array will be populated with the LLVMValueRef instances. Each
+ * instance corresponds to a llvm::MDNode.
+ *
+ * @see llvm::Module::getNamedMetadata()
+ * @see llvm::MDNode::getOperand()
+ */
+void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest);
 
-/* LLVM types conform to the following hierarchy:
- * 
+/**
+ * Add an operand to named metadata.
+ *
+ * @see llvm::Module::getNamedMetadata()
+ * @see llvm::MDNode::addOperand()
+ */
+void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
+                                 LLVMValueRef Val);
+
+/**
+ * Add a function to a module under a specified name.
+ *
+ * @see llvm::Function::Create()
+ */
+LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
+                             LLVMTypeRef FunctionTy);
+
+/**
+ * Obtain a Function value from a Module by its name.
+ *
+ * The returned value corresponds to a llvm::Function value.
+ *
+ * @see llvm::Module::getFunction()
+ */
+LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name);
+
+/**
+ * Obtain an iterator to the first Function in a Module.
+ *
+ * @see llvm::Module::begin()
+ */
+LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M);
+
+/**
+ * Obtain an iterator to the last Function in a Module.
+ *
+ * @see llvm::Module::end()
+ */
+LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M);
+
+/**
+ * Advance a Function iterator to the next Function.
+ *
+ * Returns NULL if the iterator was already at the end and there are no more
+ * functions.
+ */
+LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn);
+
+/**
+ * Decrement a Function iterator to the previous Function.
+ *
+ * Returns NULL if the iterator was already at the beginning and there are
+ * no previous functions.
+ */
+LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreType Types
+ *
+ * Types represent the type of a value.
+ *
+ * Types are associated with a context instance. The context internally
+ * deduplicates types so there is only 1 instance of a specific type
+ * alive at a time. In other words, a unique type is shared among all
+ * consumers within a context.
+ *
+ * A Type in the C API corresponds to llvm::Type.
+ *
+ * Types have the following hierarchy:
+ *
  *   types:
  *     integer type
  *     real type
@@ -356,16 +601,44 @@ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M);
  *     void type
  *     label type
  *     opaque type
+ *
+ * @{
  */
 
-/** See llvm::LLVMTypeKind::getTypeID. */
+/**
+ * Obtain the enumerated type of a Type instance.
+ *
+ * @see llvm::Type:getTypeID()
+ */
 LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty);
+
+/**
+ * Whether the type has a known size.
+ *
+ * Things that don't have a size are abstract types, labels, and void.a
+ *
+ * @see llvm::Type::isSized()
+ */
 LLVMBool LLVMTypeIsSized(LLVMTypeRef Ty);
 
-/** See llvm::LLVMType::getContext. */
+/**
+ * Obtain the context to which this type instance is associated.
+ *
+ * @see llvm::Type::getContext()
+ */
 LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty);
 
-/* Operations on integer types */
+/**
+ * @defgroup LLVMCCoreTypeInt Integer Types
+ *
+ * Functions in this section operate on integer types.
+ *
+ * @{
+ */
+
+/**
+ * Obtain an integer type from a context with specified bit width.
+ */
 LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C);
@@ -373,6 +646,10 @@ LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits);
 
+/**
+ * Obtain an integer type from the global context with a specified bit
+ * width.
+ */
 LLVMTypeRef LLVMInt1Type(void);
 LLVMTypeRef LLVMInt8Type(void);
 LLVMTypeRef LLVMInt16Type(void);
@@ -381,68 +658,336 @@ LLVMTypeRef LLVMInt64Type(void);
 LLVMTypeRef LLVMIntType(unsigned NumBits);
 unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy);
 
-/* Operations on real types */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreTypeFloat Floating Point Types
+ *
+ * @{
+ */
+
+/**
+ * Obtain a 16-bit floating point type from a context.
+ */
+LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C);
+
+/**
+ * Obtain a 32-bit floating point type from a context.
+ */
 LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C);
+
+/**
+ * Obtain a 64-bit floating point type from a context.
+ */
 LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C);
+
+/**
+ * Obtain a 80-bit floating point type (X87) from a context.
+ */
 LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C);
+
+/**
+ * Obtain a 128-bit floating point type (112-bit mantissa) from a
+ * context.
+ */
 LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C);
+
+/**
+ * Obtain a 128-bit floating point type (two 64-bits) from a context.
+ */
 LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C);
 
+/**
+ * Obtain a floating point type from the global context.
+ *
+ * These map to the functions in this group of the same name.
+ */
+LLVMTypeRef LLVMHalfType(void);
 LLVMTypeRef LLVMFloatType(void);
 LLVMTypeRef LLVMDoubleType(void);
 LLVMTypeRef LLVMX86FP80Type(void);
 LLVMTypeRef LLVMFP128Type(void);
 LLVMTypeRef LLVMPPCFP128Type(void);
 
-/* Operations on function types */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreTypeFunction Function Types
+ *
+ * @{
+ */
+
+/**
+ * Obtain a function type consisting of a specified signature.
+ *
+ * The function is defined as a tuple of a return Type, a list of
+ * parameter types, and whether the function is variadic.
+ */
 LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
                              LLVMTypeRef *ParamTypes, unsigned ParamCount,
                              LLVMBool IsVarArg);
+
+/**
+ * Returns whether a function type is variadic.
+ */
 LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy);
+
+/**
+ * Obtain the Type this function Type returns.
+ */
 LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy);
+
+/**
+ * Obtain the number of parameters this function accepts.
+ */
 unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy);
+
+/**
+ * Obtain the types of a function's parameters.
+ *
+ * The Dest parameter should point to a pre-allocated array of
+ * LLVMTypeRef at least LLVMCountParamTypes() large. On return, the
+ * first LLVMCountParamTypes() entries in the array will be populated
+ * with LLVMTypeRef instances.
+ *
+ * @param FunctionTy The function type to operate on.
+ * @param Dest Memory address of an array to be filled with result.
+ */
 void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest);
 
-/* Operations on struct types */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreTypeStruct Structure Types
+ *
+ * These functions relate to LLVMTypeRef instances.
+ *
+ * @see llvm::StructType
+ *
+ * @{
+ */
+
+/**
+ * Create a new structure type in a context.
+ *
+ * A structure is specified by a list of inner elements/types and
+ * whether these can be packed together.
+ *
+ * @see llvm::StructType::create()
+ */
 LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
                                     unsigned ElementCount, LLVMBool Packed);
+
+/**
+ * Create a new structure type in the global context.
+ *
+ * @see llvm::StructType::create()
+ */
 LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount,
                            LLVMBool Packed);
+
+/**
+ * Create an empty structure in a context having a specified name.
+ *
+ * @see llvm::StructType::create()
+ */
 LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name);
+
+/**
+ * Obtain the name of a structure.
+ *
+ * @see llvm::StructType::getName()
+ */
 const char *LLVMGetStructName(LLVMTypeRef Ty);
+
+/**
+ * Set the contents of a structure type.
+ *
+ * @see llvm::StructType::setBody()
+ */
 void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
                        unsigned ElementCount, LLVMBool Packed);
 
+/**
+ * Get the number of elements defined inside the structure.
+ *
+ * @see llvm::StructType::getNumElements()
+ */
 unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy);
+
+/**
+ * Get the elements within a structure.
+ *
+ * The function is passed the address of a pre-allocated array of
+ * LLVMTypeRef at least LLVMCountStructElementTypes() long. After
+ * invocation, this array will be populated with the structure's
+ * elements. The objects in the destination array will have a lifetime
+ * of the structure type itself, which is the lifetime of the context it
+ * is contained in.
+ */
 void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest);
+
+/**
+ * Determine whether a structure is packed.
+ *
+ * @see llvm::StructType::isPacked()
+ */
 LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy);
+
+/**
+ * Determine whether a structure is opaque.
+ *
+ * @see llvm::StructType::isOpaque()
+ */
 LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy);
 
-LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
+/**
+ * @}
+ */
 
-/* Operations on array, pointer, and vector types (sequence types) */
-LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount);
-LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace);
-LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount);
 
+/**
+ * @defgroup LLVMCCoreTypeSequential Sequential Types
+ *
+ * Sequential types represents "arrays" of types. This is a super class
+ * for array, vector, and pointer types.
+ *
+ * @{
+ */
+
+/**
+ * Obtain the type of elements within a sequential type.
+ *
+ * This works on array, vector, and pointer types.
+ *
+ * @see llvm::SequentialType::getElementType()
+ */
 LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty);
+
+/**
+ * Create a fixed size array type that refers to a specific type.
+ *
+ * The created type will exist in the context that its element type
+ * exists in.
+ *
+ * @see llvm::ArrayType::get()
+ */
+LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount);
+
+/**
+ * Obtain the length of an array type.
+ *
+ * This only works on types that represent arrays.
+ *
+ * @see llvm::ArrayType::getNumElements()
+ */
 unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy);
+
+/**
+ * Create a pointer type that points to a defined type.
+ *
+ * The created type will exist in the context that its pointee type
+ * exists in.
+ *
+ * @see llvm::PointerType::get()
+ */
+LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace);
+
+/**
+ * Obtain the address space of a pointer type.
+ *
+ * This only works on types that represent pointers.
+ *
+ * @see llvm::PointerType::getAddressSpace()
+ */
 unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy);
+
+/**
+ * Create a vector type that contains a defined type and has a specific
+ * number of elements.
+ *
+ * The created type will exist in the context thats its element type
+ * exists in.
+ *
+ * @see llvm::VectorType::get()
+ */
+LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount);
+
+/**
+ * Obtain the number of elements in a vector type.
+ *
+ * This only works on types that represent vectors.
+ *
+ * @see llvm::VectorType::getNumElements()
+ */
 unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy);
 
-/* Operations on other types */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreTypeOther Other Types
+ *
+ * @{
+ */
+
+/**
+ * Create a void type in a context.
+ */
 LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C);
+
+/**
+ * Create a label type in a context.
+ */
 LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C);
+
+/**
+ * Create a X86 MMX type in a context.
+ */
 LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C);
 
+/**
+ * These are similar to the above functions except they operate on the
+ * global context.
+ */
 LLVMTypeRef LLVMVoidType(void);
 LLVMTypeRef LLVMLabelType(void);
 LLVMTypeRef LLVMX86MMXType(void);
 
-/*===-- Values ------------------------------------------------------------===*/
+/**
+ * @}
+ */
 
-/* The bulk of LLVM's object model consists of values, which comprise a very
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValues Values
+ *
+ * The bulk of LLVM's object model consists of values, which comprise a very
  * rich type hierarchy.
+ *
+ * LLVMValueRef essentially represents llvm::Value. There is a rich
+ * hierarchy of classes within this type. Depending on the instance
+ * obtain, not all APIs are available.
+ *
+ * Callers can determine the type of a LLVMValueRef by calling the
+ * LLVMIsA* family of functions (e.g. LLVMIsAArgument()). These
+ * functions are defined by a macro, so it isn't obvious which are
+ * available by looking at the Doxygen source code. Instead, look at the
+ * source definition of LLVM_FOR_EACH_VALUE_SUBCLASS and note the list
+ * of value names given. These value names also correspond to classes in
+ * the llvm::Value hierarchy.
+ *
+ * @{
  */
 
 #define LLVM_FOR_EACH_VALUE_SUBCLASS(macro) \
@@ -473,8 +1018,6 @@ LLVMTypeRef LLVMX86MMXType(void);
         macro(IntrinsicInst)                \
           macro(DbgInfoIntrinsic)           \
             macro(DbgDeclareInst)           \
-          macro(EHExceptionInst)            \
-          macro(EHSelectorInst)             \
           macro(MemIntrinsic)               \
             macro(MemCpyInst)               \
             macro(MemMoveInst)              \
@@ -518,92 +1061,399 @@ LLVMTypeRef LLVMX86MMXType(void);
       macro(LoadInst)                       \
       macro(VAArgInst)
 
-/* Operations on all values */
+/**
+ * @defgroup LLVMCCoreValueGeneral General APIs
+ *
+ * Functions in this section work on all LLVMValueRef instances,
+ * regardless of their sub-type. They correspond to functions available
+ * on llvm::Value.
+ *
+ * @{
+ */
+
+/**
+ * Obtain the type of a value.
+ *
+ * @see llvm::Value::getType()
+ */
 LLVMTypeRef LLVMTypeOf(LLVMValueRef Val);
+
+/**
+ * Obtain the string name of a value.
+ *
+ * @see llvm::Value::getName()
+ */
 const char *LLVMGetValueName(LLVMValueRef Val);
+
+/**
+ * Set the string name of a value.
+ *
+ * @see llvm::Value::setName()
+ */
 void LLVMSetValueName(LLVMValueRef Val, const char *Name);
+
+/**
+ * Dump a representation of a value to stderr.
+ *
+ * @see llvm::Value::dump()
+ */
 void LLVMDumpValue(LLVMValueRef Val);
+
+/**
+ * Replace all uses of a value with another one.
+ *
+ * @see llvm::Value::replaceAllUsesWith()
+ */
 void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal);
-int LLVMHasMetadata(LLVMValueRef Val);
-LLVMValueRef LLVMGetMetadata(LLVMValueRef Val, unsigned KindID);
-void LLVMSetMetadata(LLVMValueRef Val, unsigned KindID, LLVMValueRef Node);
 
-/* Conversion functions. Return the input value if it is an instance of the
-   specified class, otherwise NULL. See llvm::dyn_cast_or_null<>. */
+/**
+ * Determine whether the specified constant instance is constant.
+ */
+LLVMBool LLVMIsConstant(LLVMValueRef Val);
+
+/**
+ * Determine whether a value instance is undefined.
+ */
+LLVMBool LLVMIsUndef(LLVMValueRef Val);
+
+/**
+ * Convert value instances between types.
+ *
+ * Internally, a LLVMValueRef is "pinned" to a specific type. This
+ * series of functions allows you to cast an instance to a specific
+ * type.
+ *
+ * If the cast is not valid for the specified type, NULL is returned.
+ *
+ * @see llvm::dyn_cast_or_null<>
+ */
 #define LLVM_DECLARE_VALUE_CAST(name) \
   LLVMValueRef LLVMIsA##name(LLVMValueRef Val);
 LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DECLARE_VALUE_CAST)
 
-/* Operations on Uses */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueUses Usage
+ *
+ * This module defines functions that allow you to inspect the uses of a
+ * LLVMValueRef.
+ *
+ * It is possible to obtain a LLVMUseRef for any LLVMValueRef instance.
+ * Each LLVMUseRef (which corresponds to a llvm::Use instance) holds a
+ * llvm::User and llvm::Value.
+ *
+ * @{
+ */
+
+/**
+ * Obtain the first use of a value.
+ *
+ * Uses are obtained in an iterator fashion. First, call this function
+ * to obtain a reference to the first use. Then, call LLVMGetNextUse()
+ * on that instance and all subsequently obtained instances untl
+ * LLVMGetNextUse() returns NULL.
+ *
+ * @see llvm::Value::use_begin()
+ */
 LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val);
+
+/**
+ * Obtain the next use of a value.
+ *
+ * This effectively advances the iterator. It returns NULL if you are on
+ * the final use and no more are available.
+ */
 LLVMUseRef LLVMGetNextUse(LLVMUseRef U);
+
+/**
+ * Obtain the user value for a user.
+ *
+ * The returned value corresponds to a llvm::User type.
+ *
+ * @see llvm::Use::getUser()
+ */
 LLVMValueRef LLVMGetUser(LLVMUseRef U);
+
+/**
+ * Obtain the value this use corresponds to.
+ *
+ * @see llvm::Use::get().
+ */
 LLVMValueRef LLVMGetUsedValue(LLVMUseRef U);
 
-/* Operations on Users */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueUser User value
+ *
+ * Function in this group pertain to LLVMValueRef instances that descent
+ * from llvm::User. This includes constants, instructions, and
+ * operators.
+ *
+ * @{
+ */
+
+/**
+ * Obtain an operand at a specific index in a llvm::User value.
+ *
+ * @see llvm::User::getOperand()
+ */
 LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index);
+
+/**
+ * Set an operand at a specific index in a llvm::User value.
+ *
+ * @see llvm::User::setOperand()
+ */
 void LLVMSetOperand(LLVMValueRef User, unsigned Index, LLVMValueRef Val);
+
+/**
+ * Obtain the number of operands in a llvm::User value.
+ *
+ * @see llvm::User::getNumOperands()
+ */
 int LLVMGetNumOperands(LLVMValueRef Val);
 
-/* Operations on constants of any type */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueConstant Constants
+ *
+ * This section contains APIs for interacting with LLVMValueRef that
+ * correspond to llvm::Constant instances.
+ *
+ * These functions will work for any LLVMValueRef in the llvm::Constant
+ * class hierarchy.
+ *
+ * @{
+ */
+
+/**
+ * Obtain a constant value referring to the null instance of a type.
+ *
+ * @see llvm::Constant::getNullValue()
+ */
 LLVMValueRef LLVMConstNull(LLVMTypeRef Ty); /* all zeroes */
-LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); /* only for int/vector */
+
+/**
+ * Obtain a constant value referring to the instance of a type
+ * consisting of all ones.
+ *
+ * This is only valid for integer types.
+ *
+ * @see llvm::Constant::getAllOnesValue()
+ */
+LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty);
+
+/**
+ * Obtain a constant value referring to an undefined value of a type.
+ *
+ * @see llvm::UndefValue::get()
+ */
 LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty);
-LLVMBool LLVMIsConstant(LLVMValueRef Val);
+
+/**
+ * Determine whether a value instance is null.
+ *
+ * @see llvm::Constant::isNullValue()
+ */
 LLVMBool LLVMIsNull(LLVMValueRef Val);
-LLVMBool LLVMIsUndef(LLVMValueRef Val);
+
+/**
+ * Obtain a constant that is a constant pointer pointing to NULL for a
+ * specified type.
+ */
 LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty);
 
-/* Operations on metadata */
-LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
-                                   unsigned SLen);
-LLVMValueRef LLVMMDString(const char *Str, unsigned SLen);
-LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
-                                 unsigned Count);
-LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);
-const char  *LLVMGetMDString(LLVMValueRef V, unsigned* Len);
-int LLVMGetMDNodeNumOperands(LLVMValueRef V);
-LLVMValueRef *LLVMGetMDNodeOperand(LLVMValueRef V, unsigned i);
-unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char* name);
-void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRef *Dest);
+/**
+ * @defgroup LLVMCCoreValueConstantScalar Scalar constants
+ *
+ * Functions in this group model LLVMValueRef instances that correspond
+ * to constants referring to scalar types.
+ *
+ * For integer types, the LLVMTypeRef parameter should correspond to a
+ * llvm::IntegerType instance and the returned LLVMValueRef will
+ * correspond to a llvm::ConstantInt.
+ *
+ * For floating point types, the LLVMTypeRef returned corresponds to a
+ * llvm::ConstantFP.
+ *
+ * @{
+ */
 
-/* Operations on scalar constants */
+/**
+ * Obtain a constant value for an integer type.
+ *
+ * The returned value corresponds to a llvm::ConstantInt.
+ *
+ * @see llvm::ConstantInt::get()
+ *
+ * @param IntTy Integer type to obtain value of.
+ * @param N The value the returned instance should refer to.
+ * @param SignExtend Whether to sign extend the produced value.
+ */
 LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
                           LLVMBool SignExtend);
+
+/**
+ * Obtain a constant value for an integer of arbitrary precision.
+ *
+ * @see llvm::ConstantInt::get()
+ */
 LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
                                               unsigned NumWords,
                                               const uint64_t Words[]);
+
+/**
+ * Obtain a constant value for an integer parsed from a string.
+ *
+ * A similar API, LLVMConstIntOfStringAndSize is also available. If the
+ * string's length is available, it is preferred to call that function
+ * instead.
+ *
+ * @see llvm::ConstantInt::get()
+ */
 LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char *Text,
                                   uint8_t Radix);
+
+/**
+ * Obtain a constant value for an integer parsed from a string with
+ * specified length.
+ *
+ * @see llvm::ConstantInt::get()
+ */
 LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char *Text,
                                          unsigned SLen, uint8_t Radix);
+
+/**
+ * Obtain a constant value referring to a double floating point value.
+ */
 LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N);
+
+/**
+ * Obtain a constant for a floating point value parsed from a string.
+ *
+ * A similar API, LLVMConstRealOfStringAndSize is also available. It
+ * should be used if the input string's length is known.
+ */
 LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text);
+
+/**
+ * Obtain a constant for a floating point value parsed from a string.
+ */
 LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char *Text,
                                           unsigned SLen);
+
+/**
+ * Obtain the zero extended value for an integer constant value.
+ *
+ * @see llvm::ConstantInt::getZExtValue()
+ */
 unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal);
+
+/**
+ * Obtain the sign extended value for an integer constant value.
+ *
+ * @see llvm::ConstantInt::getSExtValue()
+ */
 long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal);
 
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueConstantComposite Composite Constants
+ *
+ * Functions in this group operate on composite constants.
+ *
+ * @{
+ */
 
-/* Operations on composite constants */
+/**
+ * Create a ConstantDataSequential and initialize it with a string.
+ *
+ * @see llvm::ConstantDataArray::getString()
+ */
 LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
                                       unsigned Length, LLVMBool DontNullTerminate);
-LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
-                                      LLVMValueRef *ConstantVals,
-                                      unsigned Count, LLVMBool Packed);
 
+/**
+ * Create a ConstantDataSequential with string content in the global context.
+ *
+ * This is the same as LLVMConstStringInContext except it operates on the
+ * global context.
+ *
+ * @see LLVMConstStringInContext()
+ * @see llvm::ConstantDataArray::getString()
+ */
 LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
                              LLVMBool DontNullTerminate);
-LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
-                            LLVMValueRef *ConstantVals, unsigned Length);
+
+/**
+ * Create an anonymous ConstantStruct with the specified values.
+ *
+ * @see llvm::ConstantStruct::getAnon()
+ */
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C,
+                                      LLVMValueRef *ConstantVals,
+                                      unsigned Count, LLVMBool Packed);
+
+/**
+ * Create a ConstantStruct in the global Context.
+ *
+ * This is the same as LLVMConstStructInContext except it operates on the
+ * global Context.
+ *
+ * @see LLVMConstStructInContext()
+ */
 LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
                              LLVMBool Packed);
+
+/**
+ * Create a ConstantArray from values.
+ *
+ * @see llvm::ConstantArray::get()
+ */
+LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
+                            LLVMValueRef *ConstantVals, unsigned Length);
+
+/**
+ * Create a non-anonymous ConstantStruct from values.
+ *
+ * @see llvm::ConstantStruct::get()
+ */
 LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
                                   LLVMValueRef *ConstantVals,
                                   unsigned Count);
+
+/**
+ * Create a ConstantVector from values.
+ *
+ * @see llvm::ConstantVector::get()
+ */
 LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size);
 
-/* Constant expressions */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueConstantExpressions Constant Expressions
+ *
+ * Functions in this group correspond to APIs on llvm::ConstantExpr.
+ *
+ * @see llvm::ConstantExpr.
+ *
+ * @{
+ */
 LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal);
 LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty);
 LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty);
@@ -690,7 +1540,21 @@ LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty,
                                 LLVMBool HasSideEffects, LLVMBool IsAlignStack);
 LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB);
 
-/* Operations on global variables, functions, and aliases (globals) */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueConstantGlobals Global Values
+ *
+ * This group contains functions that operate on global values. Functions in
+ * this group relate to functions in the llvm::GlobalValue class tree.
+ *
+ * @see llvm::GlobalValue
+ *
+ * @{
+ */
+
 LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global);
 LLVMBool LLVMIsDeclaration(LLVMValueRef Global);
 LLVMLinkage LLVMGetLinkage(LLVMValueRef Global);
@@ -702,7 +1566,15 @@ void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz);
 unsigned LLVMGetAlignment(LLVMValueRef Global);
 void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes);
 
-/* Operations on global variables */
+/**
+ * @defgroup LLVMCoreValueConstantGlobalVariable Global Variables
+ *
+ * This group contains functions that operate on global variable values.
+ *
+ * @see llvm::GlobalVariable
+ *
+ * @{
+ */
 LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name);
 LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
                                          const char *Name,
@@ -720,110 +1592,672 @@ void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal);
 LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar);
 void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant);
 
-/* Operations on aliases */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCoreValueConstantGlobalAlias Global Aliases
+ *
+ * This group contains function that operate on global alias values.
+ *
+ * @see llvm::GlobalAlias
+ *
+ * @{
+ */
 LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
                           const char *Name);
 
-/* Operations on functions */
-LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
-                             LLVMTypeRef FunctionTy);
-LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name);
-LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M);
-LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M);
-LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn);
-LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn);
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueFunction Function values
+ *
+ * Functions in this group operate on LLVMValueRef instances that
+ * correspond to llvm::Function instances.
+ *
+ * @see llvm::Function
+ *
+ * @{
+ */
+
+/**
+ * Remove a function from its containing module and deletes it.
+ *
+ * @see llvm::Function::eraseFromParent()
+ */
 void LLVMDeleteFunction(LLVMValueRef Fn);
+
+/**
+ * Obtain the ID number from a function instance.
+ *
+ * @see llvm::Function::getIntrinsicID()
+ */
 unsigned LLVMGetIntrinsicID(LLVMValueRef Fn);
+
+/**
+ * Obtain the calling function of a function.
+ *
+ * The returned value corresponds to the LLVMCallConv enumeration.
+ *
+ * @see llvm::Function::getCallingConv()
+ */
 unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn);
+
+/**
+ * Set the calling convention of a function.
+ *
+ * @see llvm::Function::setCallingConv()
+ *
+ * @param Fn Function to operate on
+ * @param CC LLVMCallConv to set calling convention to
+ */
 void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC);
+
+/**
+ * Obtain the name of the garbage collector to use during code
+ * generation.
+ *
+ * @see llvm::Function::getGC()
+ */
 const char *LLVMGetGC(LLVMValueRef Fn);
+
+/**
+ * Define the garbage collector to use during code generation.
+ *
+ * @see llvm::Function::setGC()
+ */
 void LLVMSetGC(LLVMValueRef Fn, const char *Name);
+
+/**
+ * Add an attribute to a function.
+ *
+ * @see llvm::Function::addAttribute()
+ */
 void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
+
+/**
+ * Obtain an attribute from a function.
+ *
+ * @see llvm::Function::getAttributes()
+ */
 LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn);
+
+/**
+ * Remove an attribute from a function.
+ */
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
 
-/* Operations on parameters */
+/**
+ * @defgroup LLVMCCoreValueFunctionParameters Function Parameters
+ *
+ * Functions in this group relate to arguments/parameters on functions.
+ *
+ * Functions in this group expect LLVMValueRef instances that correspond
+ * to llvm::Function instances.
+ *
+ * @{
+ */
+
+/**
+ * Obtain the number of parameters in a function.
+ *
+ * @see llvm::Function::arg_size()
+ */
 unsigned LLVMCountParams(LLVMValueRef Fn);
+
+/**
+ * Obtain the parameters in a function.
+ *
+ * The takes a pointer to a pre-allocated array of LLVMValueRef that is
+ * at least LLVMCountParams() long. This array will be filled with
+ * LLVMValueRef instances which correspond to the parameters the
+ * function receives. Each LLVMValueRef corresponds to a llvm::Argument
+ * instance.
+ *
+ * @see llvm::Function::arg_begin()
+ */
 void LLVMGetParams(LLVMValueRef Fn, LLVMValueRef *Params);
+
+/**
+ * Obtain the parameter at the specified index.
+ *
+ * Parameters are indexed from 0.
+ *
+ * @see llvm::Function::arg_begin()
+ */
 LLVMValueRef LLVMGetParam(LLVMValueRef Fn, unsigned Index);
+
+/**
+ * Obtain the function to which this argument belongs.
+ *
+ * Unlike other functions in this group, this one takes a LLVMValueRef
+ * that corresponds to a llvm::Attribute.
+ *
+ * The returned LLVMValueRef is the llvm::Function to which this
+ * argument belongs.
+ */
 LLVMValueRef LLVMGetParamParent(LLVMValueRef Inst);
+
+/**
+ * Obtain the first parameter to a function.
+ *
+ * @see llvm::Function::arg_begin()
+ */
 LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn);
+
+/**
+ * Obtain the last parameter to a function.
+ *
+ * @see llvm::Function::arg_end()
+ */
 LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn);
+
+/**
+ * Obtain the next parameter to a function.
+ *
+ * This takes a LLVMValueRef obtained from LLVMGetFirstParam() (which is
+ * actually a wrapped iterator) and obtains the next parameter from the
+ * underlying iterator.
+ */
 LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg);
+
+/**
+ * Obtain the previous parameter to a function.
+ *
+ * This is the opposite of LLVMGetNextParam().
+ */
 LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg);
+
+/**
+ * Add an attribute to a function argument.
+ *
+ * @see llvm::Argument::addAttr()
+ */
 void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA);
+
+/**
+ * Remove an attribute from a function argument.
+ *
+ * @see llvm::Argument::removeAttr()
+ */
 void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA);
+
+/**
+ * Get an attribute from a function argument.
+ */
 LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg);
+
+/**
+ * Set the alignment for a function parameter.
+ *
+ * @see llvm::Argument::addAttr()
+ * @see llvm::Attribute::constructAlignmentFromInt()
+ */
 void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align);
 
-/* Operations on basic blocks */
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueMetadata Metadata
+ *
+ * @{
+ */
+
+/**
+ * Obtain a MDString value from a context.
+ *
+ * The returned instance corresponds to the llvm::MDString class.
+ *
+ * The instance is specified by string data of a specified length. The
+ * string content is copied, so the backing memory can be freed after
+ * this function returns.
+ */
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+                                   unsigned SLen);
+
+/**
+ * Obtain a MDString value from the global context.
+ */
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen);
+
+/**
+ * Obtain a MDNode value from a context.
+ *
+ * The returned value corresponds to the llvm::MDNode class.
+ */
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+                                 unsigned Count);
+
+/**
+ * Obtain a MDNode value from the global context.
+ */
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);
+
+/**
+ * Obtain the underlying string from a MDString value.
+ *
+ * @param V Instance to obtain string from.
+ * @param Len Memory address which will hold length of returned string.
+ * @return String data in MDString.
+ */
+const char  *LLVMGetMDString(LLVMValueRef V, unsigned* Len);
+
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueBasicBlock Basic Block
+ *
+ * A basic block represents a single entry single exit section of code.
+ * Basic blocks contain a list of instructions which form the body of
+ * the block.
+ *
+ * Basic blocks belong to functions. They have the type of label.
+ *
+ * Basic blocks are themselves values. However, the C API models them as
+ * LLVMBasicBlockRef.
+ *
+ * @see llvm::BasicBlock
+ *
+ * @{
+ */
+
+/**
+ * Convert a basic block instance to a value type.
+ */
 LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB);
+
+/**
+ * Determine whether a LLVMValueRef is itself a basic block.
+ */
 LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val);
+
+/**
+ * Convert a LLVMValueRef to a LLVMBasicBlockRef instance.
+ */
 LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val);
+
+/**
+ * Obtain the function to which a basic block belongs.
+ *
+ * @see llvm::BasicBlock::getParent()
+ */
 LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB);
+
+/**
+ * Obtain the terminator instruction for a basic block.
+ *
+ * If the basic block does not have a terminator (it is not well-formed
+ * if it doesn't), then NULL is returned.
+ *
+ * The returned LLVMValueRef corresponds to a llvm::TerminatorInst.
+ *
+ * @see llvm::BasicBlock::getTerminator()
+ */
 LLVMValueRef LLVMGetBasicBlockTerminator(LLVMBasicBlockRef BB);
+
+/**
+ * Obtain the number of basic blocks in a function.
+ *
+ * @param Fn Function value to operate on.
+ */
 unsigned LLVMCountBasicBlocks(LLVMValueRef Fn);
+
+/**
+ * Obtain all of the basic blocks in a function.
+ *
+ * This operates on a function value. The BasicBlocks parameter is a
+ * pointer to a pre-allocated array of LLVMBasicBlockRef of at least
+ * LLVMCountBasicBlocks() in length. This array is populated with
+ * LLVMBasicBlockRef instances.
+ */
 void LLVMGetBasicBlocks(LLVMValueRef Fn, LLVMBasicBlockRef *BasicBlocks);
+
+/**
+ * Obtain the first basic block in a function.
+ *
+ * The returned basic block can be used as an iterator. You will likely
+ * eventually call into LLVMGetNextBasicBlock() with it.
+ *
+ * @see llvm::Function::begin()
+ */
 LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn);
+
+/**
+ * Obtain the last basic block in a function.
+ *
+ * @see llvm::Function::end()
+ */
 LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn);
+
+/**
+ * Advance a basic block iterator.
+ */
 LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB);
+
+/**
+ * Go backwards in a basic block iterator.
+ */
 LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB);
+
+/**
+ * Obtain the basic block that corresponds to the entry point of a
+ * function.
+ *
+ * @see llvm::Function::getEntryBlock()
+ */
 LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn);
 
+/**
+ * Append a basic block to the end of a function.
+ *
+ * @see llvm::BasicBlock::Create()
+ */
 LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
                                                 LLVMValueRef Fn,
                                                 const char *Name);
+
+/**
+ * Append a basic block to the end of a function using the global
+ * context.
+ *
+ * @see llvm::BasicBlock::Create()
+ */
+LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef Fn, const char *Name);
+
+/**
+ * Insert a basic block in a function before another basic block.
+ *
+ * The function to add to is determined by the function of the
+ * passed basic block.
+ *
+ * @see llvm::BasicBlock::Create()
+ */
 LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
                                                 LLVMBasicBlockRef BB,
                                                 const char *Name);
 
-LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef Fn, const char *Name);
+/**
+ * Insert a basic block in a function using the global context.
+ *
+ * @see llvm::BasicBlock::Create()
+ */
 LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef InsertBeforeBB,
                                        const char *Name);
+
+/**
+ * Remove a basic block from a function and delete it.
+ *
+ * This deletes the basic block from its containing function and deletes
+ * the basic block itself.
+ *
+ * @see llvm::BasicBlock::eraseFromParent()
+ */
 void LLVMDeleteBasicBlock(LLVMBasicBlockRef BB);
+
+/**
+ * Remove a basic block from a function.
+ *
+ * This deletes the basic block from its containing function but keep
+ * the basic block alive.
+ *
+ * @see llvm::BasicBlock::removeFromParent()
+ */
 void LLVMRemoveBasicBlockFromParent(LLVMBasicBlockRef BB);
 
+/**
+ * Move a basic block to before another one.
+ *
+ * @see llvm::BasicBlock::moveBefore()
+ */
 void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos);
+
+/**
+ * Move a basic block to after another one.
+ *
+ * @see llvm::BasicBlock::moveAfter()
+ */
 void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos);
 
+/**
+ * Obtain the first instruction in a basic block.
+ *
+ * The returned LLVMValueRef corresponds to a llvm::Instruction
+ * instance.
+ */
 LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB);
+
+/**
+ * Obtain the last instruction in a basic block.
+ *
+ * The returned LLVMValueRef corresponds to a LLVM:Instruction.
+ */
 LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB);
 
-/* Operations on instructions */
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCoreValueInstruction Instructions
+ *
+ * Functions in this group relate to the inspection and manipulation of
+ * individual instructions.
+ *
+ * In the C++ API, an instruction is modeled by llvm::Instruction. This
+ * class has a large number of descendents. llvm::Instruction is a
+ * llvm::Value and in the C API, instructions are modeled by
+ * LLVMValueRef.
+ *
+ * This group also contains sub-groups which operate on specific
+ * llvm::Instruction types, e.g. llvm::CallInst.
+ *
+ * @{
+ */
+
+/**
+ * Determine whether an instruction has any metadata attached.
+ */
+int LLVMHasMetadata(LLVMValueRef Val);
+
+/**
+ * Return metadata associated with an instruction value.
+ */
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Val, unsigned KindID);
+
+/**
+ * Set metadata associated with an instruction value.
+ */
+void LLVMSetMetadata(LLVMValueRef Val, unsigned KindID, LLVMValueRef Node);
+
+/**
+ * Obtain the basic block to which an instruction belongs.
+ *
+ * @see llvm::Instruction::getParent()
+ */
 LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst);
+
+/**
+ * Obtain the instruction that occurs after the one specified.
+ *
+ * The next instruction will be from the same basic block.
+ *
+ * If this is the last instruction in a basic block, NULL will be
+ * returned.
+ */
 LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst);
+
+/**
+ * Obtain the instruction that occured before this one.
+ *
+ * If the instruction is the first instruction in a basic block, NULL
+ * will be returned.
+ */
 LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst);
+
+/**
+ * Remove and delete an instruction.
+ *
+ * The instruction specified is removed from its containing building
+ * block and then deleted.
+ *
+ * @see llvm::Instruction::eraseFromParent()
+ */
 void LLVMInstructionEraseFromParent(LLVMValueRef Inst);
+
+/**
+ * Obtain the code opcode for an individual instruction.
+ *
+ * @see llvm::Instruction::getOpCode()
+ */
 LLVMOpcode   LLVMGetInstructionOpcode(LLVMValueRef Inst);
+
+/**
+ * Obtain the predicate of an instruction.
+ *
+ * This is only valid for instructions that correspond to llvm::ICmpInst
+ * or llvm::ConstantExpr whose opcode is llvm::Instruction::ICmp.
+ *
+ * @see llvm::ICmpInst::getPredicate()
+ */
 LLVMIntPredicate LLVMGetICmpPredicate(LLVMValueRef Inst);
 
-/* Operations on call sites */
+/**
+ * @defgroup LLVMCCoreValueInstructionCall Call Sites and Invocations
+ *
+ * Functions in this group apply to instructions that refer to call
+ * sites and invocations. These correspond to C++ types in the
+ * llvm::CallInst class tree.
+ *
+ * @{
+ */
+
+/**
+ * Set the calling convention for a call instruction.
+ *
+ * This expects an LLVMValueRef that corresponds to a llvm::CallInst or
+ * llvm::InvokeInst.
+ *
+ * @see llvm::CallInst::setCallingConv()
+ * @see llvm::InvokeInst::setCallingConv()
+ */
 void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC);
+
+/**
+ * Obtain the calling convention for a call instruction.
+ *
+ * This is the opposite of LLVMSetInstructionCallConv(). Reads its
+ * usage.
+ *
+ * @see LLVMSetInstructionCallConv()
+ */
 unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr);
+
+
 void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute);
-void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, 
+void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
                               LLVMAttribute);
-void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, 
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
                                 unsigned align);
 
-/* Operations on call instructions (only) */
+/**
+ * Obtain whether a call instruction is a tail call.
+ *
+ * This only works on llvm::CallInst instructions.
+ *
+ * @see llvm::CallInst::isTailCall()
+ */
 LLVMBool LLVMIsTailCall(LLVMValueRef CallInst);
+
+/**
+ * Set whether a call instruction is a tail call.
+ *
+ * This only works on llvm::CallInst instructions.
+ *
+ * @see llvm::CallInst::setTailCall()
+ */
 void LLVMSetTailCall(LLVMValueRef CallInst, LLVMBool IsTailCall);
 
-/* Operations on switch instructions (only) */
+/**
+ * @}
+ */
+
+/**
+ * Obtain the default destination basic block of a switch instruction.
+ *
+ * This only works on llvm::SwitchInst instructions.
+ *
+ * @see llvm::SwitchInst::getDefaultDest()
+ */
 LLVMBasicBlockRef LLVMGetSwitchDefaultDest(LLVMValueRef SwitchInstr);
 
-/* Operations on phi nodes */
+/**
+ * @defgroup LLVMCCoreValueInstructionPHINode PHI Nodes
+ *
+ * Functions in this group only apply to instructions that map to
+ * llvm::PHINode instances.
+ *
+ * @{
+ */
+
+/**
+ * Add an incoming value to the end of a PHI list.
+ */
 void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
                      LLVMBasicBlockRef *IncomingBlocks, unsigned Count);
+
+/**
+ * Obtain the number of incoming basic blocks to a PHI node.
+ */
 unsigned LLVMCountIncoming(LLVMValueRef PhiNode);
+
+/**
+ * Obtain an incoming value to a PHI node as a LLVMValueRef.
+ */
 LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index);
+
+/**
+ * Obtain an incoming value to a PHI node as a LLVMBasicBlockRef.
+ */
 LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index);
 
-/*===-- Instruction builders ----------------------------------------------===*/
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
 
-/* An instruction builder represents a point within a basic block, and is the
- * exclusive means of building instructions using the C interface.
+/**
+ * @defgroup LLVMCCoreInstructionBuilder Instruction Builders
+ *
+ * An instruction builder represents a point within a basic block and is
+ * the exclusive means of building instructions using the C interface.
+ *
+ * @{
  */
 
 LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C);
@@ -964,6 +2398,8 @@ LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
                                    const char *Name);
 LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
                                       const char *Name);
+LLVMBool LLVMGetVolatile(LLVMValueRef MemoryAccessInst);
+void LLVMSetVolatile(LLVMValueRef MemoryAccessInst, LLVMBool IsVolatile);
 
 /* Casts */
 LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val,
@@ -1044,21 +2480,37 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val,
 LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
                               LLVMValueRef RHS, const char *Name);
 
+/**
+ * @}
+ */
 
-/*===-- Module providers --------------------------------------------------===*/
+/**
+ * @defgroup LLVMCCoreModuleProvider Module Providers
+ *
+ * @{
+ */
 
-/* Changes the type of M so it can be passed to FunctionPassManagers and the
+/**
+ * Changes the type of M so it can be passed to FunctionPassManagers and the
  * JIT.  They take ModuleProviders for historical reasons.
  */
 LLVMModuleProviderRef
 LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M);
 
-/* Destroys the module M.
+/**
+ * Destroys the module M.
  */
 void LLVMDisposeModuleProvider(LLVMModuleProviderRef M);
 
+/**
+ * @}
+ */
 
-/*===-- Memory buffers ----------------------------------------------------===*/
+/**
+ * @defgroup LLVMCCoreMemoryBuffers Memory Buffers
+ *
+ * @{
+ */
 
 LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
                                                   LLVMMemoryBufferRef *OutMemBuf,
@@ -1067,23 +2519,39 @@ LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
                                          char **OutMessage);
 void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
 
-/*===-- Pass Registry -----------------------------------------------------===*/
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCorePassRegistry Pass Registry
+ *
+ * @{
+ */
 
 /** Return the global pass registry, for use with initialization functions.
-    See llvm::PassRegistry::getPassRegistry. */
+    @see llvm::PassRegistry::getPassRegistry */
 LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void);
 
-/*===-- Pass Managers -----------------------------------------------------===*/
+/**
+ * @}
+ */
+
+/**
+ * @defgroup LLVMCCorePassManagers Pass Managers
+ *
+ * @{
+ */
 
 /** Constructs a new whole-module pass pipeline. This type of pipeline is
     suitable for link-time optimization and whole-module transformations.
-    See llvm::PassManager::PassManager. */
+    @see llvm::PassManager::PassManager */
 LLVMPassManagerRef LLVMCreatePassManager(void);
 
 /** Constructs a new function-by-function pass pipeline over the module
     provider. It does not take ownership of the module provider. This type of
     pipeline is suitable for code generation and JIT compilation tasks.
-    See llvm::FunctionPassManager::FunctionPassManager. */
+    @see llvm::FunctionPassManager::FunctionPassManager */
 LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M);
 
 /** Deprecated: Use LLVMCreateFunctionPassManagerForModule instead. */
@@ -1091,30 +2559,42 @@ LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef MP);
 
 /** Initializes, executes on the provided module, and finalizes all of the
     passes scheduled in the pass manager. Returns 1 if any of the passes
-    modified the module, 0 otherwise. See llvm::PassManager::run(Module&). */
+    modified the module, 0 otherwise.
+    @see llvm::PassManager::run(Module&) */
 LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M);
 
 /** Initializes all of the function passes scheduled in the function pass
     manager. Returns 1 if any of the passes modified the module, 0 otherwise.
-    See llvm::FunctionPassManager::doInitialization. */
+    @see llvm::FunctionPassManager::doInitialization */
 LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM);
 
 /** Executes all of the function passes scheduled in the function pass manager
     on the provided function. Returns 1 if any of the passes modified the
     function, false otherwise.
-    See llvm::FunctionPassManager::run(Function&). */
+    @see llvm::FunctionPassManager::run(Function&) */
 LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F);
 
 /** Finalizes all of the function passes scheduled in in the function pass
     manager. Returns 1 if any of the passes modified the module, 0 otherwise.
-    See llvm::FunctionPassManager::doFinalization. */
+    @see llvm::FunctionPassManager::doFinalization */
 LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM);
 
 /** Frees the memory of a pass pipeline. For function pipelines, does not free
     the module provider.
-    See llvm::PassManagerBase::~PassManagerBase. */
+    @see llvm::PassManagerBase::~PassManagerBase. */
 void LLVMDisposePassManager(LLVMPassManagerRef PM);
 
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
+
+/**
+ * @}
+ */
 
 #ifdef __cplusplus
 }
diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h
index bf2f2767cd34..a676e37768e4 100644
--- a/include/llvm-c/Disassembler.h
+++ b/include/llvm-c/Disassembler.h
@@ -19,6 +19,13 @@
 #include <stddef.h>
 
 /**
+ * @defgroup LLVMCDisassembler Disassembler
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
+/**
  * An opaque reference to a disassembler context.
  */
 typedef void *LLVMDisasmContextRef;
@@ -157,6 +164,10 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DC, uint8_t *Bytes,
                              uint64_t BytesSize, uint64_t PC,
                              char *OutString, size_t OutStringSize);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif /* !defined(__cplusplus) */
diff --git a/include/llvm-c/EnhancedDisassembly.h b/include/llvm-c/EnhancedDisassembly.h
index 0c173c2b1999..71a0d496c028 100644
--- a/include/llvm-c/EnhancedDisassembly.h
+++ b/include/llvm-c/EnhancedDisassembly.h
@@ -25,6 +25,19 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCEnhancedDisassembly Enhanced Disassembly
+ * @ingroup LLVMC
+ * @deprecated
+ *
+ * This module contains an interface to the Enhanced Disassembly (edis)
+ * library. The edis library is deprecated and will likely disappear in
+ * the near future. You should use the @ref LLVMCDisassembler interface
+ * instead.
+ *
+ * @{
+ */
+
 /*!
  @typedef EDByteReaderCallback
  Interface to memory from which instructions may be read.
@@ -504,6 +517,10 @@ int EDBlockEvaluateOperand(uint64_t *result,
 int EDBlockVisitTokens(EDInstRef inst,
                        EDTokenVisitor_t visitor);
 
+/**
+ * @}
+ */
+
 #endif
 
 #ifdef __cplusplus
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index f5f40619ef0d..cb77bb2e2e23 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -26,6 +26,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCExecutionEngine Execution Engine
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
 void LLVMLinkInJIT(void);
 void LLVMLinkInInterpreter(void);
 
@@ -125,6 +132,10 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
 
 void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 
diff --git a/include/llvm-c/Initialization.h b/include/llvm-c/Initialization.h
index 3b59abbec03c..cb3ab9e3f393 100644
--- a/include/llvm-c/Initialization.h
+++ b/include/llvm-c/Initialization.h
@@ -22,9 +22,19 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCInitialization Initialization Routines
+ * @ingroup LLVMC
+ *
+ * This module contains routines used to initialize the LLVM system.
+ *
+ * @{
+ */
+
 void LLVMInitializeCore(LLVMPassRegistryRef R);
 void LLVMInitializeTransformUtils(LLVMPassRegistryRef R);
 void LLVMInitializeScalarOpts(LLVMPassRegistryRef R);
+void LLVMInitializeVectorization(LLVMPassRegistryRef R);
 void LLVMInitializeInstCombine(LLVMPassRegistryRef R);
 void LLVMInitializeIPO(LLVMPassRegistryRef R);
 void LLVMInitializeInstrumentation(LLVMPassRegistryRef R);
@@ -33,6 +43,10 @@ void LLVMInitializeIPA(LLVMPassRegistryRef R);
 void LLVMInitializeCodeGen(LLVMPassRegistryRef R);
 void LLVMInitializeTarget(LLVMPassRegistryRef R);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/LinkTimeOptimizer.h b/include/llvm-c/LinkTimeOptimizer.h
index fca394681c76..5338d3fc4c85 100644
--- a/include/llvm-c/LinkTimeOptimizer.h
+++ b/include/llvm-c/LinkTimeOptimizer.h
@@ -20,6 +20,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCLinkTimeOptimizer Link Time Optimization
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
   /// This provides a dummy type for pointers to the LTO object.
   typedef void* llvm_lto_t;
 
@@ -51,6 +58,10 @@ extern "C" {
   extern llvm_lto_status_t llvm_optimize_modules
     (llvm_lto_t lto, const char* output_filename);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/Object.h b/include/llvm-c/Object.h
index 7b1cf717f777..e2dad62b4e07 100644
--- a/include/llvm-c/Object.h
+++ b/include/llvm-c/Object.h
@@ -28,23 +28,74 @@
 extern "C" {
 #endif
 
-
+/**
+ * @defgroup LLVMCObject Object file reading and writing
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
+// Opaque type wrappers
 typedef struct LLVMOpaqueObjectFile *LLVMObjectFileRef;
-
 typedef struct LLVMOpaqueSectionIterator *LLVMSectionIteratorRef;
+typedef struct LLVMOpaqueSymbolIterator *LLVMSymbolIteratorRef;
+typedef struct LLVMOpaqueRelocationIterator *LLVMRelocationIteratorRef;
 
+// ObjectFile creation
 LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf);
 void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile);
 
+// ObjectFile Section iterators
 LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile);
 void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI);
 LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
                                 LLVMSectionIteratorRef SI);
 void LLVMMoveToNextSection(LLVMSectionIteratorRef SI);
+void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
+                                 LLVMSymbolIteratorRef Sym);
+
+// ObjectFile Symbol iterators
+LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile);
+void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI);
+LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+                                LLVMSymbolIteratorRef SI);
+void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI);
+
+// SectionRef accessors
 const char *LLVMGetSectionName(LLVMSectionIteratorRef SI);
 uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI);
 const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI);
-
+uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI);
+LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI,
+                                 LLVMSymbolIteratorRef Sym);
+
+// Section Relocation iterators
+LLVMRelocationIteratorRef LLVMGetRelocations(LLVMSectionIteratorRef Section);
+void LLVMDisposeRelocationIterator(LLVMRelocationIteratorRef RI);
+LLVMBool LLVMIsRelocationIteratorAtEnd(LLVMSectionIteratorRef Section,
+                                       LLVMRelocationIteratorRef RI);
+void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef RI);
+
+
+// SymbolRef accessors
+const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI);
+uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI);
+uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI);
+uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI);
+
+// RelocationRef accessors
+uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI);
+uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI);
+LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI);
+uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI);
+// NOTE: Caller takes ownership of returned string of the two
+// following functions.
+const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI);
+const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI);
+
+/**
+ * @}
+ */
 
 #ifdef __cplusplus
 }
@@ -68,6 +119,27 @@ namespace llvm {
       return reinterpret_cast<LLVMSectionIteratorRef>
         (const_cast<section_iterator*>(SI));
     }
+
+    inline symbol_iterator *unwrap(LLVMSymbolIteratorRef SI) {
+      return reinterpret_cast<symbol_iterator*>(SI);
+    }
+
+    inline LLVMSymbolIteratorRef
+    wrap(const symbol_iterator *SI) {
+      return reinterpret_cast<LLVMSymbolIteratorRef>
+        (const_cast<symbol_iterator*>(SI));
+    }
+
+    inline relocation_iterator *unwrap(LLVMRelocationIteratorRef SI) {
+      return reinterpret_cast<relocation_iterator*>(SI);
+    }
+
+    inline LLVMRelocationIteratorRef
+    wrap(const relocation_iterator *SI) {
+      return reinterpret_cast<LLVMRelocationIteratorRef>
+        (const_cast<relocation_iterator*>(SI));
+    }
+
   }
 }
 
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index 7afaef15c419..568e60dfb43e 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -26,6 +26,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCTarget Target information
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
 enum LLVMByteOrdering { LLVMBigEndian, LLVMLittleEndian };
 
 typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef;
@@ -47,6 +54,24 @@ typedef struct LLVMStructLayout *LLVMStructLayoutRef;
 #include "llvm/Config/Targets.def"
 #undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
   
+/* Declare all of the available assembly printer initialization functions. */
+#define LLVM_ASM_PRINTER(TargetName) \
+  void LLVMInitialize##TargetName##AsmPrinter();
+#include "llvm/Config/AsmPrinters.def"
+#undef LLVM_ASM_PRINTER  /* Explicit undef to make SWIG happier */
+
+/* Declare all of the available assembly parser initialization functions. */
+#define LLVM_ASM_PARSER(TargetName) \
+  void LLVMInitialize##TargetName##AsmParser();
+#include "llvm/Config/AsmParsers.def"
+#undef LLVM_ASM_PARSER  /* Explicit undef to make SWIG happier */
+
+/* Declare all of the available disassembler initialization functions. */
+#define LLVM_DISASSEMBLER(TargetName) \
+  void LLVMInitialize##TargetName##Disassembler();
+#include "llvm/Config/Disassemblers.def"
+#undef LLVM_DISASSEMBLER  /* Explicit undef to make SWIG happier */
+  
 /** LLVMInitializeAllTargetInfos - The main program should call this function if
     it wants access to all available targets that LLVM is configured to
     support. */
@@ -64,6 +89,43 @@ static inline void LLVMInitializeAllTargets(void) {
 #include "llvm/Config/Targets.def"
 #undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
 }
+
+/** LLVMInitializeAllTargetMCs - The main program should call this function if
+    it wants access to all available target MC that LLVM is configured to
+    support. */
+static inline void LLVMInitializeAllTargetMCs(void) {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetMC();
+#include "llvm/Config/Targets.def"
+#undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
+}
+  
+/** LLVMInitializeAllAsmPrinters - The main program should call this function if
+    it wants all asm printers that LLVM is configured to support, to make them
+    available via the TargetRegistry. */
+static inline void LLVMInitializeAllAsmPrinters() {
+#define LLVM_ASM_PRINTER(TargetName) LLVMInitialize##TargetName##AsmPrinter();
+#include "llvm/Config/AsmPrinters.def"
+#undef LLVM_ASM_PRINTER  /* Explicit undef to make SWIG happier */
+}
+  
+/** LLVMInitializeAllAsmParsers - The main program should call this function if
+    it wants all asm parsers that LLVM is configured to support, to make them
+    available via the TargetRegistry. */
+static inline void LLVMInitializeAllAsmParsers() {
+#define LLVM_ASM_PARSER(TargetName) LLVMInitialize##TargetName##AsmParser();
+#include "llvm/Config/AsmParsers.def"
+#undef LLVM_ASM_PARSER  /* Explicit undef to make SWIG happier */
+}
+  
+/** LLVMInitializeAllDisassemblers - The main program should call this function
+    if it wants all disassemblers that LLVM is configured to support, to make
+    them available via the TargetRegistry. */
+static inline void LLVMInitializeAllDisassemblers() {
+#define LLVM_DISASSEMBLER(TargetName) \
+  LLVMInitialize##TargetName##Disassembler();
+#include "llvm/Config/Disassemblers.def"
+#undef LLVM_DISASSEMBLER  /* Explicit undef to make SWIG happier */
+}
   
 /** LLVMInitializeNativeTarget - The main program should call this function to
     initialize the native target corresponding to the host.  This is useful 
@@ -157,6 +219,9 @@ unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef, LLVMTypeRef StructTy,
     See the destructor llvm::TargetData::~TargetData. */
 void LLVMDisposeTargetData(LLVMTargetDataRef);
 
+/**
+ * @}
+ */
 
 #ifdef __cplusplus
 }
diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h
new file mode 100644
index 000000000000..0d35d73a11df
--- /dev/null
+++ b/include/llvm-c/TargetMachine.h
@@ -0,0 +1,142 @@
+/*===-- llvm-c/TargetMachine.h - Target Machine Library C Interface - C++ -*-=*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to the Target and TargetMachine       *|
+|* classes, which can be used to generate assembly or object files.           *|
+|*                                                                            *|
+|* Many exotic languages can interoperate with C code but have a harder time  *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages.                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TARGETMACHINE_H
+#define LLVM_C_TARGETMACHINE_H
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+typedef struct LLVMTargetMachine *LLVMTargetMachineRef;
+typedef struct LLVMTarget *LLVMTargetRef;
+
+typedef enum {
+    LLVMCodeGenLevelNone,
+    LLVMCodeGenLevelLess,
+    LLVMCodeGenLevelDefault,
+    LLVMCodeGenLevelAggressive
+} LLVMCodeGenOptLevel;
+
+typedef enum {
+    LLVMRelocDefault,
+    LLVMRelocStatic,
+    LLVMRelocPIC,
+    LLVMRelocDynamicNoPic
+} LLVMRelocMode;
+
+typedef enum {
+    LLVMCodeModelDefault,
+    LLVMCodeModelJITDefault,
+    LLVMCodeModelSmall,
+    LLVMCodeModelKernel,
+    LLVMCodeModelMedium,
+    LLVMCodeModelLarge
+} LLVMCodeModel;
+
+typedef enum {
+    LLVMAssemblyFile,
+    LLVMObjectFile
+} LLVMCodeGenFileType;
+
+/** Returns the first llvm::Target in the registered targets list. */
+LLVMTargetRef LLVMGetFirstTarget();
+/** Returns the next llvm::Target given a previous one (or null if there's none) */
+LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T);
+
+/*===-- Target ------------------------------------------------------------===*/
+/** Returns the name of a target. See llvm::Target::getName */
+const char *LLVMGetTargetName(LLVMTargetRef T);
+
+/** Returns the description  of a target. See llvm::Target::getDescription */
+const char *LLVMGetTargetDescription(LLVMTargetRef T);
+
+/** Returns if the target has a JIT */
+LLVMBool LLVMTargetHasJIT(LLVMTargetRef T);
+
+/** Returns if the target has a TargetMachine associated */
+LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T);
+
+/** Returns if the target as an ASM backend (required for emitting output) */
+LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T);
+
+/*===-- Target Machine ----------------------------------------------------===*/
+/** Creates a new llvm::TargetMachine. See llvm::Target::createTargetMachine */
+LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char *Triple,
+  char *CPU, char *Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
+  LLVMCodeModel CodeModel);
+
+/** Dispose the LLVMTargetMachineRef instance generated by
+  LLVMCreateTargetMachine. */
+void LLVMDisposeTargetMachine(LLVMTargetMachineRef T);
+
+/** Returns the Target used in a TargetMachine */
+LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T);
+
+/** Returns the triple used creating this target machine. See
+  llvm::TargetMachine::getTriple. The result needs to be disposed with
+  LLVMDisposeMessage. */
+char *LLVMGetTargetMachineTriple(LLVMTargetMachineRef T);
+
+/** Returns the cpu used creating this target machine. See
+  llvm::TargetMachine::getCPU. The result needs to be disposed with
+  LLVMDisposeMessage. */
+char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T);
+
+/** Returns the feature string used creating this target machine. See
+  llvm::TargetMachine::getFeatureString. The result needs to be disposed with
+  LLVMDisposeMessage. */
+char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T);
+
+/** Returns the llvm::TargetData used for this llvm:TargetMachine. */
+LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T);
+
+/** Emits an asm or object file for the given module to the filename. This
+  wraps several c++ only classes (among them a file stream). Returns any
+  error in ErrorMessage. Use LLVMDisposeMessage to dispose the message. */
+LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
+  char *Filename, LLVMCodeGenFileType codegen, char **ErrorMessage);
+
+
+
+
+#ifdef __cplusplus
+}
+
+namespace llvm {
+  class TargetMachine;
+  class Target;
+
+  inline TargetMachine *unwrap(LLVMTargetMachineRef P) {
+    return reinterpret_cast<TargetMachine*>(P);
+  }
+  inline Target *unwrap(LLVMTargetRef P) {
+    return reinterpret_cast<Target*>(P);
+  }
+  inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
+    return reinterpret_cast<LLVMTargetMachineRef>(
+      const_cast<TargetMachine*>(P));
+  }
+  inline LLVMTargetRef wrap(const Target * P) {
+    return reinterpret_cast<LLVMTargetRef>(const_cast<Target*>(P));
+  }
+}
+#endif
+
+#endif
diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h
index 710bebe598be..448078012eac 100644
--- a/include/llvm-c/Transforms/IPO.h
+++ b/include/llvm-c/Transforms/IPO.h
@@ -21,6 +21,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCTransformsIPO Interprocedural transformations
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
 /** See llvm::createArgumentPromotionPass function. */
 void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM);
 
@@ -63,6 +70,10 @@ void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM);
 /** See llvm::createStripSymbolsPass function. */
 void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 #endif /* defined(__cplusplus) */
diff --git a/include/llvm-c/Transforms/PassManagerBuilder.h b/include/llvm-c/Transforms/PassManagerBuilder.h
index fa722c95874d..cee6e5a0ee08 100644
--- a/include/llvm-c/Transforms/PassManagerBuilder.h
+++ b/include/llvm-c/Transforms/PassManagerBuilder.h
@@ -23,6 +23,13 @@ typedef struct LLVMOpaquePassManagerBuilder *LLVMPassManagerBuilderRef;
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCTransformsPassManagerBuilder Pass manager builder
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
 /** See llvm::PassManagerBuilder. */
 LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void);
 void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB);
@@ -73,6 +80,10 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
                                                   bool Internalize,
                                                   bool RunInliner);
 
+/**
+ * @}
+ */
+
 #ifdef __cplusplus
 }
 
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h
index 6015ef90eed2..a2c4d6116f03 100644
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -25,6 +25,13 @@
 extern "C" {
 #endif
 
+/**
+ * @defgroup LLVMCTransformsScalar Scalar transformations
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
 /** See llvm::createAggressiveDCEPass function. */
 void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
 
@@ -116,6 +123,9 @@ void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM);
 /** See llvm::createBasicAliasAnalysisPass function */
 void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM);
 
+/**
+ * @}
+ */
 
 #ifdef __cplusplus
 }
diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h
new file mode 100644
index 000000000000..9e7c7540d766
--- /dev/null
+++ b/include/llvm-c/Transforms/Vectorize.h
@@ -0,0 +1,48 @@
+/*===---------------------------Vectorize.h --------------------- -*- C -*-===*\
+|*===----------- Vectorization Transformation Library C Interface ---------===*|
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMVectorize.a, which          *|
+|* implements various vectorization transformations of the LLVM IR.           *|
+|*                                                                            *|
+|* Many exotic languages can interoperate with C code but have a harder time  *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages.                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TRANSFORMS_VECTORIZE_H
+#define LLVM_C_TRANSFORMS_VECTORIZE_H
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup LLVMCTransformsVectorize Vectorization transformations
+ * @ingroup LLVMCTransforms
+ *
+ * @{
+ */
+
+/** See llvm::createBBVectorizePass function. */
+void LLVMAddBBVectorizePass(LLVMPassManagerRef PM);
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif /* defined(__cplusplus) */
+
+#endif
+
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 7ea7ad01a211..5d9cecbc5153 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -20,24 +20,31 @@
 #include <stddef.h>
 #include <unistd.h>
 
+/**
+ * @defgroup LLVMCLTO LTO
+ * @ingroup LLVMC
+ *
+ * @{
+ */
+
 #define LTO_API_VERSION 4
 
 typedef enum {
     LTO_SYMBOL_ALIGNMENT_MASK              = 0x0000001F, /* log2 of alignment */
-    LTO_SYMBOL_PERMISSIONS_MASK            = 0x000000E0,    
-    LTO_SYMBOL_PERMISSIONS_CODE            = 0x000000A0,    
-    LTO_SYMBOL_PERMISSIONS_DATA            = 0x000000C0,    
-    LTO_SYMBOL_PERMISSIONS_RODATA          = 0x00000080,    
-    LTO_SYMBOL_DEFINITION_MASK             = 0x00000700,    
-    LTO_SYMBOL_DEFINITION_REGULAR          = 0x00000100,    
-    LTO_SYMBOL_DEFINITION_TENTATIVE        = 0x00000200,    
-    LTO_SYMBOL_DEFINITION_WEAK             = 0x00000300,    
-    LTO_SYMBOL_DEFINITION_UNDEFINED        = 0x00000400,    
+    LTO_SYMBOL_PERMISSIONS_MASK            = 0x000000E0,
+    LTO_SYMBOL_PERMISSIONS_CODE            = 0x000000A0,
+    LTO_SYMBOL_PERMISSIONS_DATA            = 0x000000C0,
+    LTO_SYMBOL_PERMISSIONS_RODATA          = 0x00000080,
+    LTO_SYMBOL_DEFINITION_MASK             = 0x00000700,
+    LTO_SYMBOL_DEFINITION_REGULAR          = 0x00000100,
+    LTO_SYMBOL_DEFINITION_TENTATIVE        = 0x00000200,
+    LTO_SYMBOL_DEFINITION_WEAK             = 0x00000300,
+    LTO_SYMBOL_DEFINITION_UNDEFINED        = 0x00000400,
     LTO_SYMBOL_DEFINITION_WEAKUNDEF        = 0x00000500,
-    LTO_SYMBOL_SCOPE_MASK                  = 0x00003800,    
-    LTO_SYMBOL_SCOPE_INTERNAL              = 0x00000800,    
-    LTO_SYMBOL_SCOPE_HIDDEN                = 0x00001000,    
-    LTO_SYMBOL_SCOPE_PROTECTED             = 0x00002000,    
+    LTO_SYMBOL_SCOPE_MASK                  = 0x00003800,
+    LTO_SYMBOL_SCOPE_INTERNAL              = 0x00000800,
+    LTO_SYMBOL_SCOPE_HIDDEN                = 0x00001000,
+    LTO_SYMBOL_SCOPE_PROTECTED             = 0x00002000,
     LTO_SYMBOL_SCOPE_DEFAULT               = 0x00001800,
     LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN = 0x00002800
 } lto_symbol_attributes;
@@ -88,7 +95,7 @@ lto_module_is_object_file(const char* path);
  * Checks if a file is a loadable object compiled for requested target.
  */
 extern bool
-lto_module_is_object_file_for_target(const char* path, 
+lto_module_is_object_file_for_target(const char* path,
                                      const char* target_triple_prefix);
 
 
@@ -103,7 +110,7 @@ lto_module_is_object_file_in_memory(const void* mem, size_t length);
  * Checks if a buffer is a loadable object compiled for requested target.
  */
 extern bool
-lto_module_is_object_file_in_memory_for_target(const void* mem, size_t length, 
+lto_module_is_object_file_in_memory_for_target(const void* mem, size_t length,
                                               const char* target_triple_prefix);
 
 
@@ -244,6 +251,12 @@ lto_codegen_set_assembler_args(lto_code_gen_t cg, const char **args,
                                int nargs);
 
 /**
+ * Enables the internalize pass during LTO optimizations.
+ */
+extern void
+lto_codegen_set_whole_program_optimization(lto_code_gen_t cg);
+
+/**
  * Adds to a list of all global symbols that must exist in the final
  * generated code.  If a function is not listed, it might be
  * inlined into every usage and optimized away.
@@ -251,7 +264,6 @@ lto_codegen_set_assembler_args(lto_code_gen_t cg, const char **args,
 extern void
 lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol);
 
-
 /**
  * Writes a new object file at the specified path that contains the
  * merged contents of all modules added so far.
@@ -260,11 +272,10 @@ lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol);
 extern bool
 lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path);
 
-
 /**
  * Generates code for all added modules into one native object file.
  * On success returns a pointer to a generated mach-o/ELF buffer and
- * length set to the buffer size.  The buffer is owned by the 
+ * length set to the buffer size.  The buffer is owned by the
  * lto_code_gen_t and will be freed when lto_codegen_dispose()
  * is called, or lto_codegen_compile() is called again.
  * On failure, returns NULL (check lto_get_error_message() for details).
@@ -285,9 +296,13 @@ lto_codegen_compile_to_file(lto_code_gen_t cg, const char** name);
  */
 extern void
 lto_codegen_debug_options(lto_code_gen_t cg, const char *);
+
 #ifdef __cplusplus
 }
 #endif
 
+/**
+ * @}
+ */
 
 #endif
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index d2566a44bac9..2b466f900c81 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -320,6 +320,7 @@ namespace llvm {
     const fltSemantics &getSemantics() const { return *semantics; }
     bool isZero() const { return category == fcZero; }
     bool isNonZero() const { return category != fcZero; }
+    bool isNormal() const { return category == fcNormal; }
     bool isNaN() const { return category == fcNaN; }
     bool isInfinity() const { return category == fcInfinity; }
     bool isNegative() const { return sign; }
@@ -328,8 +329,16 @@ namespace llvm {
 
     APFloat& operator=(const APFloat &);
 
-    /* Return an arbitrary integer value usable for hashing. */
-    uint32_t getHashValue() const;
+    /// \brief Overload to compute a hash code for an APFloat value.
+    ///
+    /// Note that the use of hash codes for floating point values is in general
+    /// frought with peril. Equality is hard to define for these values. For
+    /// example, should negative and positive zero hash to different codes? Are
+    /// they equal or not? This hash value implementation specifically
+    /// emphasizes producing different codes for different inputs in order to
+    /// be used in canonicalization and memoization. As such, equality is
+    /// bitwiseIsEqual, and 0 != -0.
+    friend hash_code hash_value(const APFloat &Arg);
 
     /// Converts this value into a decimal string.
     ///
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 707e0dbb6b91..41019899766b 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -23,11 +23,12 @@
 #include <string>
 
 namespace llvm {
-  class Serializer;
   class Deserializer;
   class FoldingSetNodeID;
-  class raw_ostream;
+  class Serializer;
   class StringRef;
+  class hash_code;
+  class raw_ostream;
 
   template<typename T>
   class SmallVectorImpl;
@@ -497,15 +498,13 @@ public:
     if (loBitsSet == APINT_BITS_PER_WORD)
       return APInt(numBits, -1ULL);
     // For small values, return quickly.
-    if (numBits < APINT_BITS_PER_WORD)
-      return APInt(numBits, (1ULL << loBitsSet) - 1);
+    if (loBitsSet <= APINT_BITS_PER_WORD)
+      return APInt(numBits, -1ULL >> (APINT_BITS_PER_WORD - loBitsSet));
     return getAllOnesValue(numBits).lshr(numBits - loBitsSet);
   }
 
-  /// The hash value is computed as the sum of the words and the bit width.
-  /// @returns A hash value computed from the sum of the APInt words.
-  /// @brief Get a hash value based on this APInt
-  uint64_t getHashValue() const;
+  /// \brief Overload to compute a hash_code for an APInt value.
+  friend hash_code hash_value(const APInt &Arg);
 
   /// This function returns a pointer to the internal storage of the APInt.
   /// This is useful for writing out the APInt in binary form without any
@@ -562,7 +561,15 @@ public:
   /// Performs logical negation operation on this APInt.
   /// @returns true if *this is zero, false otherwise.
   /// @brief Logical negation operator.
-  bool operator!() const;
+  bool operator!() const {
+    if (isSingleWord())
+      return !VAL;
+
+    for (unsigned i = 0; i != getNumWords(); ++i)
+      if (pVal[i])
+        return false;
+    return true;
+  }
 
   /// @}
   /// @name Assignment Operators
@@ -835,7 +842,11 @@ public:
 
   /// @returns the bit value at bitPosition
   /// @brief Array-indexing support.
-  bool operator[](unsigned bitPosition) const;
+  bool operator[](unsigned bitPosition) const {
+    assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
+    return (maskBit(bitPosition) &
+            (isSingleWord() ? VAL : pVal[whichWord(bitPosition)])) != 0;
+  }
 
   /// @}
   /// @name Comparison Operators
@@ -1056,6 +1067,16 @@ public:
   /// @brief Zero extend or truncate to width
   APInt zextOrTrunc(unsigned width) const;
 
+  /// Make this APInt have the bit width given by \p width. The value is sign
+  /// extended, or left alone to make it that width.
+  /// @brief Sign extend or truncate to width
+  APInt sextOrSelf(unsigned width) const;
+
+  /// Make this APInt have the bit width given by \p width. The value is zero
+  /// extended, or left alone to make it that width.
+  /// @brief Zero extend or truncate to width
+  APInt zextOrSelf(unsigned width) const;
+
   /// @}
   /// @name Bit Manipulation Operators
   /// @{
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index 33a8c651b23a..f4c8e5586213 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -14,8 +14,7 @@
 #include <vector>
 
 namespace llvm {
-  class APInt;
-  
+
   /// ArrayRef - Represent a constant reference to an array (0 or more elements
   /// consecutively in memory), i.e. a start pointer and a length.  It allows
   /// various APIs to take consecutive elements easily and conveniently.
@@ -33,33 +32,33 @@ namespace llvm {
     typedef const T *iterator;
     typedef const T *const_iterator;
     typedef size_t size_type;
-    
+
   private:
     /// The start of the array, in an external buffer.
     const T *Data;
-    
+
     /// The number of elements.
     size_type Length;
-    
+
   public:
     /// @name Constructors
     /// @{
-    
+
     /// Construct an empty ArrayRef.
     /*implicit*/ ArrayRef() : Data(0), Length(0) {}
-    
+
     /// Construct an ArrayRef from a single element.
     /*implicit*/ ArrayRef(const T &OneElt)
       : Data(&OneElt), Length(1) {}
-    
+
     /// Construct an ArrayRef from a pointer and length.
     /*implicit*/ ArrayRef(const T *data, size_t length)
       : Data(data), Length(length) {}
-    
+
     /// Construct an ArrayRef from a range.
     ArrayRef(const T *begin, const T *end)
       : Data(begin), Length(end - begin) {}
-    
+
     /// Construct an ArrayRef from a SmallVector.
     /*implicit*/ ArrayRef(const SmallVectorImpl<T> &Vec)
       : Data(Vec.data()), Length(Vec.size()) {}
@@ -67,39 +66,39 @@ namespace llvm {
     /// Construct an ArrayRef from a std::vector.
     /*implicit*/ ArrayRef(const std::vector<T> &Vec)
       : Data(Vec.empty() ? (T*)0 : &Vec[0]), Length(Vec.size()) {}
-    
+
     /// Construct an ArrayRef from a C array.
     template <size_t N>
     /*implicit*/ ArrayRef(const T (&Arr)[N])
       : Data(Arr), Length(N) {}
-    
+
     /// @}
     /// @name Simple Operations
     /// @{
 
     iterator begin() const { return Data; }
     iterator end() const { return Data + Length; }
-    
+
     /// empty - Check if the array is empty.
     bool empty() const { return Length == 0; }
-    
+
     const T *data() const { return Data; }
-    
+
     /// size - Get the array size.
     size_t size() const { return Length; }
-    
+
     /// front - Get the first element.
     const T &front() const {
       assert(!empty());
       return Data[0];
     }
-    
+
     /// back - Get the last element.
     const T &back() const {
       assert(!empty());
       return Data[Length-1];
     }
-    
+
     /// equals - Check for element-wise equality.
     bool equals(ArrayRef RHS) const {
       if (Length != RHS.Length)
@@ -111,18 +110,18 @@ namespace llvm {
     }
 
     /// slice(n) - Chop off the first N elements of the array.
-    ArrayRef<T> slice(unsigned N) {
+    ArrayRef<T> slice(unsigned N) const {
       assert(N <= size() && "Invalid specifier");
       return ArrayRef<T>(data()+N, size()-N);
     }
 
     /// slice(n, m) - Chop off the first N elements of the array, and keep M
     /// elements in the array.
-    ArrayRef<T> slice(unsigned N, unsigned M) {
+    ArrayRef<T> slice(unsigned N, unsigned M) const {
       assert(N+M <= size() && "Invalid specifier");
       return ArrayRef<T>(data()+N, M);
     }
-    
+
     /// @}
     /// @name Operator Overloads
     /// @{
@@ -130,22 +129,104 @@ namespace llvm {
       assert(Index < Length && "Invalid index!");
       return Data[Index];
     }
-    
+
     /// @}
     /// @name Expensive Operations
     /// @{
     std::vector<T> vec() const {
       return std::vector<T>(Data, Data+Length);
     }
-    
+
     /// @}
     /// @name Conversion operators
     /// @{
     operator std::vector<T>() const {
       return std::vector<T>(Data, Data+Length);
     }
+
+    /// @}
+  };
+
+  /// MutableArrayRef - Represent a mutable reference to an array (0 or more
+  /// elements consecutively in memory), i.e. a start pointer and a length.  It
+  /// allows various APIs to take and modify consecutive elements easily and
+  /// conveniently.
+  ///
+  /// This class does not own the underlying data, it is expected to be used in
+  /// situations where the data resides in some other buffer, whose lifetime
+  /// extends past that of the MutableArrayRef. For this reason, it is not in
+  /// general safe to store a MutableArrayRef.
+  ///
+  /// This is intended to be trivially copyable, so it should be passed by
+  /// value.
+  template<typename T>
+  class MutableArrayRef : public ArrayRef<T> {
+  public:
+    typedef T *iterator;
+
+    /// Construct an empty ArrayRef.
+    /*implicit*/ MutableArrayRef() : ArrayRef<T>() {}
+    
+    /// Construct an MutableArrayRef from a single element.
+    /*implicit*/ MutableArrayRef(T &OneElt) : ArrayRef<T>(OneElt) {}
+    
+    /// Construct an MutableArrayRef from a pointer and length.
+    /*implicit*/ MutableArrayRef(T *data, size_t length)
+      : ArrayRef<T>(data, length) {}
+    
+    /// Construct an MutableArrayRef from a range.
+    MutableArrayRef(T *begin, T *end) : ArrayRef<T>(begin, end) {}
+    
+    /// Construct an MutableArrayRef from a SmallVector.
+    /*implicit*/ MutableArrayRef(SmallVectorImpl<T> &Vec)
+    : ArrayRef<T>(Vec) {}
+    
+    /// Construct a MutableArrayRef from a std::vector.
+    /*implicit*/ MutableArrayRef(std::vector<T> &Vec)
+    : ArrayRef<T>(Vec) {}
+    
+    /// Construct an MutableArrayRef from a C array.
+    template <size_t N>
+    /*implicit*/ MutableArrayRef(T (&Arr)[N])
+      : ArrayRef<T>(Arr) {}
+    
+    T *data() const { return const_cast<T*>(ArrayRef<T>::data()); }
+
+    iterator begin() const { return data(); }
+    iterator end() const { return data() + this->size(); }
+    
+    /// front - Get the first element.
+    T &front() const {
+      assert(!this->empty());
+      return data()[0];
+    }
+    
+    /// back - Get the last element.
+    T &back() const {
+      assert(!this->empty());
+      return data()[this->size()-1];
+    }
+
+    /// slice(n) - Chop off the first N elements of the array.
+    MutableArrayRef<T> slice(unsigned N) const {
+      assert(N <= this->size() && "Invalid specifier");
+      return MutableArrayRef<T>(data()+N, this->size()-N);
+    }
+    
+    /// slice(n, m) - Chop off the first N elements of the array, and keep M
+    /// elements in the array.
+    MutableArrayRef<T> slice(unsigned N, unsigned M) const {
+      assert(N+M <= this->size() && "Invalid specifier");
+      return MutableArrayRef<T>(data()+N, M);
+    }
     
     /// @}
+    /// @name Operator Overloads
+    /// @{
+    T &operator[](size_t Index) const {
+      assert(Index < this->size() && "Invalid index!");
+      return data()[Index];
+    }
   };
 
   /// @name ArrayRef Convenience constructors
@@ -215,5 +296,5 @@ namespace llvm {
     static const bool value = true;
   };
 }
-
+  
 #endif
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index ac1cf0c79a8f..7e0b5ba37196 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -14,12 +14,12 @@
 #ifndef LLVM_ADT_BITVECTOR_H
 #define LLVM_ADT_BITVECTOR_H
 
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cassert>
 #include <climits>
 #include <cstdlib>
-#include <cstring>
 
 namespace llvm {
 
@@ -116,7 +116,7 @@ public:
       else if (sizeof(BitWord) == 8)
         NumBits += CountPopulation_64(Bits[i]);
       else
-        assert(0 && "Unsupported!");
+        llvm_unreachable("Unsupported!");
     return NumBits;
   }
 
@@ -146,10 +146,9 @@ public:
       if (Bits[i] != 0) {
         if (sizeof(BitWord) == 4)
           return i * BITWORD_SIZE + CountTrailingZeros_32((uint32_t)Bits[i]);
-        else if (sizeof(BitWord) == 8)
+        if (sizeof(BitWord) == 8)
           return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]);
-        else
-          assert(0 && "Unsupported!");
+        llvm_unreachable("Unsupported!");
       }
     return -1;
   }
@@ -170,10 +169,9 @@ public:
     if (Copy != 0) {
       if (sizeof(BitWord) == 4)
         return WordPos * BITWORD_SIZE + CountTrailingZeros_32((uint32_t)Copy);
-      else if (sizeof(BitWord) == 8)
+      if (sizeof(BitWord) == 8)
         return WordPos * BITWORD_SIZE + CountTrailingZeros_64(Copy);
-      else
-        assert(0 && "Unsupported!");
+      llvm_unreachable("Unsupported!");
     }
 
     // Check subsequent words.
@@ -181,10 +179,9 @@ public:
       if (Bits[i] != 0) {
         if (sizeof(BitWord) == 4)
           return i * BITWORD_SIZE + CountTrailingZeros_32((uint32_t)Bits[i]);
-        else if (sizeof(BitWord) == 8)
+        if (sizeof(BitWord) == 8)
           return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]);
-        else
-          assert(0 && "Unsupported!");
+        llvm_unreachable("Unsupported!");
       }
     return -1;
   }
@@ -318,6 +315,16 @@ public:
     return *this;
   }
 
+  // reset - Reset bits that are set in RHS. Same as *this &= ~RHS.
+  BitVector &reset(const BitVector &RHS) {
+    unsigned ThisWords = NumBitWords(size());
+    unsigned RHSWords  = NumBitWords(RHS.size());
+    unsigned i;
+    for (i = 0; i != std::min(ThisWords, RHSWords); ++i)
+      Bits[i] &= ~RHS.Bits[i];
+    return *this;
+  }
+
   BitVector &operator|=(const BitVector &RHS) {
     if (size() < RHS.size())
       resize(RHS.size());
@@ -365,6 +372,42 @@ public:
     std::swap(Capacity, RHS.Capacity);
   }
 
+  //===--------------------------------------------------------------------===//
+  // Portable bit mask operations.
+  //===--------------------------------------------------------------------===//
+  //
+  // These methods all operate on arrays of uint32_t, each holding 32 bits. The
+  // fixed word size makes it easier to work with literal bit vector constants
+  // in portable code.
+  //
+  // The LSB in each word is the lowest numbered bit.  The size of a portable
+  // bit mask is always a whole multiple of 32 bits.  If no bit mask size is
+  // given, the bit mask is assumed to cover the entire BitVector.
+
+  /// setBitsInMask - Add '1' bits from Mask to this vector. Don't resize.
+  /// This computes "*this |= Mask".
+  void setBitsInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) {
+    applyMask<true, false>(Mask, MaskWords);
+  }
+
+  /// clearBitsInMask - Clear any bits in this vector that are set in Mask.
+  /// Don't resize. This computes "*this &= ~Mask".
+  void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) {
+    applyMask<false, false>(Mask, MaskWords);
+  }
+
+  /// setBitsNotInMask - Add a bit to this vector for every '0' bit in Mask.
+  /// Don't resize.  This computes "*this |= ~Mask".
+  void setBitsNotInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) {
+    applyMask<true, true>(Mask, MaskWords);
+  }
+
+  /// clearBitsNotInMask - Clear a bit in this vector for every '0' bit in Mask.
+  /// Don't resize.  This computes "*this &= Mask".
+  void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords = ~0u) {
+    applyMask<false, true>(Mask, MaskWords);
+  }
+
 private:
   unsigned NumBitWords(unsigned S) const {
     return (S + BITWORD_SIZE-1) / BITWORD_SIZE;
@@ -400,6 +443,33 @@ private:
   void init_words(BitWord *B, unsigned NumWords, bool t) {
     memset(B, 0 - (int)t, NumWords*sizeof(BitWord));
   }
+
+  template<bool AddBits, bool InvertMask>
+  void applyMask(const uint32_t *Mask, unsigned MaskWords) {
+    assert(BITWORD_SIZE % 32 == 0 && "Unsupported BitWord size.");
+    MaskWords = std::min(MaskWords, (size() + 31) / 32);
+    const unsigned Scale = BITWORD_SIZE / 32;
+    unsigned i;
+    for (i = 0; MaskWords >= Scale; ++i, MaskWords -= Scale) {
+      BitWord BW = Bits[i];
+      // This inner loop should unroll completely when BITWORD_SIZE > 32.
+      for (unsigned b = 0; b != BITWORD_SIZE; b += 32) {
+        uint32_t M = *Mask++;
+        if (InvertMask) M = ~M;
+        if (AddBits) BW |=   BitWord(M) << b;
+        else         BW &= ~(BitWord(M) << b);
+      }
+      Bits[i] = BW;
+    }
+    for (unsigned b = 0; MaskWords; b += 32, --MaskWords) {
+      uint32_t M = *Mask++;
+      if (InvertMask) M = ~M;
+      if (AddBits) Bits[i] |=   BitWord(M) << b;
+      else         Bits[i] &= ~(BitWord(M) << b);
+    }
+    if (AddBits)
+      clear_unused_bits();
+  }
 };
 
 inline BitVector operator&(const BitVector &LHS, const BitVector &RHS) {
diff --git a/include/llvm/ADT/DAGDeltaAlgorithm.h b/include/llvm/ADT/DAGDeltaAlgorithm.h
index 99ed15c0d60f..e502ac4348d0 100644
--- a/include/llvm/ADT/DAGDeltaAlgorithm.h
+++ b/include/llvm/ADT/DAGDeltaAlgorithm.h
@@ -36,6 +36,7 @@ namespace llvm {
 /// for more information on the properties which the predicate function itself
 /// should satisfy.
 class DAGDeltaAlgorithm {
+  virtual void anchor();
 public:
   typedef unsigned change_ty;
   typedef std::pair<change_ty, change_ty> edge_ty;
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index e70cacf3ca5b..8d4a19d0919c 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -30,12 +30,11 @@ namespace llvm {
 
 template<typename KeyT, typename ValueT,
          typename KeyInfoT = DenseMapInfo<KeyT>,
-         typename ValueInfoT = DenseMapInfo<ValueT>, bool IsConst = false>
+         bool IsConst = false>
 class DenseMapIterator;
 
 template<typename KeyT, typename ValueT,
-         typename KeyInfoT = DenseMapInfo<KeyT>,
-         typename ValueInfoT = DenseMapInfo<ValueT> >
+         typename KeyInfoT = DenseMapInfo<KeyT> >
 class DenseMap {
   typedef std::pair<KeyT, ValueT> BucketT;
   unsigned NumBuckets;
@@ -80,19 +79,19 @@ public:
 
   typedef DenseMapIterator<KeyT, ValueT, KeyInfoT> iterator;
   typedef DenseMapIterator<KeyT, ValueT,
-                           KeyInfoT, ValueInfoT, true> const_iterator;
+                           KeyInfoT, true> const_iterator;
   inline iterator begin() {
     // When the map is empty, avoid the overhead of AdvancePastEmptyBuckets().
     return empty() ? end() : iterator(Buckets, Buckets+NumBuckets);
   }
   inline iterator end() {
-    return iterator(Buckets+NumBuckets, Buckets+NumBuckets);
+    return iterator(Buckets+NumBuckets, Buckets+NumBuckets, true);
   }
   inline const_iterator begin() const {
     return empty() ? end() : const_iterator(Buckets, Buckets+NumBuckets);
   }
   inline const_iterator end() const {
-    return const_iterator(Buckets+NumBuckets, Buckets+NumBuckets);
+    return const_iterator(Buckets+NumBuckets, Buckets+NumBuckets, true);
   }
 
   bool empty() const { return NumEntries == 0; }
@@ -137,13 +136,33 @@ public:
   iterator find(const KeyT &Val) {
     BucketT *TheBucket;
     if (LookupBucketFor(Val, TheBucket))
-      return iterator(TheBucket, Buckets+NumBuckets);
+      return iterator(TheBucket, Buckets+NumBuckets, true);
     return end();
   }
   const_iterator find(const KeyT &Val) const {
     BucketT *TheBucket;
     if (LookupBucketFor(Val, TheBucket))
-      return const_iterator(TheBucket, Buckets+NumBuckets);
+      return const_iterator(TheBucket, Buckets+NumBuckets, true);
+    return end();
+  }
+
+  /// Alternate version of find() which allows a different, and possibly
+  /// less expensive, key type.
+  /// The DenseMapInfo is responsible for supplying methods
+  /// getHashValue(LookupKeyT) and isEqual(LookupKeyT, KeyT) for each key
+  /// type used.
+  template<class LookupKeyT>
+  iterator find_as(const LookupKeyT &Val) {
+    BucketT *TheBucket;
+    if (LookupBucketFor(Val, TheBucket))
+      return iterator(TheBucket, Buckets+NumBuckets, true);
+    return end();
+  }
+  template<class LookupKeyT>
+  const_iterator find_as(const LookupKeyT &Val) const {
+    BucketT *TheBucket;
+    if (LookupBucketFor(Val, TheBucket))
+      return const_iterator(TheBucket, Buckets+NumBuckets, true);
     return end();
   }
 
@@ -162,13 +181,12 @@ public:
   std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) {
     BucketT *TheBucket;
     if (LookupBucketFor(KV.first, TheBucket))
-      return std::make_pair(iterator(TheBucket, Buckets+NumBuckets),
+      return std::make_pair(iterator(TheBucket, Buckets+NumBuckets, true),
                             false); // Already in map.
 
     // Otherwise, insert the new element.
     TheBucket = InsertIntoBucket(KV.first, KV.second, TheBucket);
-    return std::make_pair(iterator(TheBucket, Buckets+NumBuckets),
-                          true);
+    return std::make_pair(iterator(TheBucket, Buckets+NumBuckets, true), true);
   }
 
   /// insert - Range insertion of pairs.
@@ -237,7 +255,7 @@ public:
 private:
   void CopyFrom(const DenseMap& other) {
     if (NumBuckets != 0 &&
-        (!isPodLike<KeyInfoT>::value || !isPodLike<ValueInfoT>::value)) {
+        (!isPodLike<KeyT>::value || !isPodLike<ValueT>::value)) {
       const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
       for (BucketT *P = Buckets, *E = Buckets+NumBuckets; P != E; ++P) {
         if (!KeyInfoT::isEqual(P->first, EmptyKey) &&
@@ -266,7 +284,7 @@ private:
 
     Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT) * NumBuckets));
 
-    if (isPodLike<KeyInfoT>::value && isPodLike<ValueInfoT>::value)
+    if (isPodLike<KeyT>::value && isPodLike<ValueT>::value)
       memcpy(Buckets, other.Buckets, NumBuckets * sizeof(BucketT));
     else
       for (size_t i = 0; i < NumBuckets; ++i) {
@@ -310,6 +328,10 @@ private:
   static unsigned getHashValue(const KeyT &Val) {
     return KeyInfoT::getHashValue(Val);
   }
+  template<typename LookupKeyT>
+  static unsigned getHashValue(const LookupKeyT &Val) {
+    return KeyInfoT::getHashValue(Val);
+  }
   static const KeyT getEmptyKey() {
     return KeyInfoT::getEmptyKey();
   }
@@ -321,7 +343,8 @@ private:
   /// FoundBucket.  If the bucket contains the key and a value, this returns
   /// true, otherwise it returns a bucket with an empty marker or tombstone and
   /// returns false.
-  bool LookupBucketFor(const KeyT &Val, BucketT *&FoundBucket) const {
+  template<typename LookupKeyT>
+  bool LookupBucketFor(const LookupKeyT &Val, BucketT *&FoundBucket) const {
     unsigned BucketNo = getHashValue(Val);
     unsigned ProbeAmt = 1;
     BucketT *BucketsPtr = Buckets;
@@ -342,7 +365,7 @@ private:
     while (1) {
       BucketT *ThisBucket = BucketsPtr + (BucketNo & (NumBuckets-1));
       // Found Val's bucket?  If so, return it.
-      if (KeyInfoT::isEqual(ThisBucket->first, Val)) {
+      if (KeyInfoT::isEqual(Val, ThisBucket->first)) {
         FoundBucket = ThisBucket;
         return true;
       }
@@ -478,12 +501,12 @@ public:
 };
 
 template<typename KeyT, typename ValueT,
-         typename KeyInfoT, typename ValueInfoT, bool IsConst>
+         typename KeyInfoT, bool IsConst>
 class DenseMapIterator {
   typedef std::pair<KeyT, ValueT> Bucket;
   typedef DenseMapIterator<KeyT, ValueT,
-                           KeyInfoT, ValueInfoT, true> ConstIterator;
-  friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, ValueInfoT, true>;
+                           KeyInfoT, true> ConstIterator;
+  friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, true>;
 public:
   typedef ptrdiff_t difference_type;
   typedef typename conditional<IsConst, const Bucket, Bucket>::type value_type;
@@ -495,15 +518,16 @@ private:
 public:
   DenseMapIterator() : Ptr(0), End(0) {}
 
-  DenseMapIterator(pointer Pos, pointer E) : Ptr(Pos), End(E) {
-    AdvancePastEmptyBuckets();
+  DenseMapIterator(pointer Pos, pointer E, bool NoAdvance = false)
+    : Ptr(Pos), End(E) {
+    if (!NoAdvance) AdvancePastEmptyBuckets();
   }
 
   // If IsConst is true this is a converting constructor from iterator to
   // const_iterator and the default copy constructor is used.
   // Otherwise this is a copy constructor for iterator.
   DenseMapIterator(const DenseMapIterator<KeyT, ValueT,
-                                          KeyInfoT, ValueInfoT, false>& I)
+                                          KeyInfoT, false>& I)
     : Ptr(I.Ptr), End(I.End) {}
 
   reference operator*() const {
@@ -541,9 +565,9 @@ private:
   }
 };
   
-template<typename KeyT, typename ValueT, typename KeyInfoT, typename ValueInfoT>
+template<typename KeyT, typename ValueT, typename KeyInfoT>
 static inline size_t
-capacity_in_bytes(const DenseMap<KeyT, ValueT, KeyInfoT, ValueInfoT> &X) {
+capacity_in_bytes(const DenseMap<KeyT, ValueT, KeyInfoT> &X) {
   return X.getMemorySize();
 }
 
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index df4084e6f411..1559a35c39f9 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -59,7 +59,7 @@ template<> struct DenseMapInfo<char> {
   
 // Provide DenseMapInfo for unsigned ints.
 template<> struct DenseMapInfo<unsigned> {
-  static inline unsigned getEmptyKey() { return ~0; }
+  static inline unsigned getEmptyKey() { return ~0U; }
   static inline unsigned getTombstoneKey() { return ~0U - 1; }
   static unsigned getHashValue(const unsigned& Val) { return Val * 37U; }
   static bool isEqual(const unsigned& LHS, const unsigned& RHS) {
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index d2e0b8f91b2c..7d7c77770020 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -193,12 +193,11 @@ protected:
   virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const = 0;
   /// NodeEquals - Instantiations of the FoldingSet template implement
   /// this function to compare the given node with the given ID.
-  virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID,
+  virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, unsigned IDHash,
                           FoldingSetNodeID &TempID) const=0;
-  /// NodeEquals - Instantiations of the FoldingSet template implement
+  /// ComputeNodeHash - Instantiations of the FoldingSet template implement
   /// this function to compute a hash value for the given node.
-  virtual unsigned ComputeNodeHash(Node *N,
-                                   FoldingSetNodeID &TempID) const = 0;
+  virtual unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const = 0;
 };
 
 //===----------------------------------------------------------------------===//
@@ -220,7 +219,7 @@ template<typename T> struct DefaultFoldingSetTrait {
   // to compute a temporary ID if necessary. The default implementation
   // just calls Profile and does a regular comparison. Implementations
   // can override this to provide more efficient implementations.
-  static inline bool Equals(T &X, const FoldingSetNodeID &ID,
+  static inline bool Equals(T &X, const FoldingSetNodeID &ID, unsigned IDHash,
                             FoldingSetNodeID &TempID);
 
   // ComputeHash - Compute a hash value for X, using TempID to
@@ -249,7 +248,7 @@ struct DefaultContextualFoldingSetTrait {
   static void Profile(T &X, FoldingSetNodeID &ID, Ctx Context) {
     X.Profile(ID, Context);
   }
-  static inline bool Equals(T &X, const FoldingSetNodeID &ID,
+  static inline bool Equals(T &X, const FoldingSetNodeID &ID, unsigned IDHash,
                             FoldingSetNodeID &TempID, Ctx Context);
   static inline unsigned ComputeHash(T &X, FoldingSetNodeID &TempID,
                                      Ctx Context);
@@ -344,7 +343,7 @@ template<class T> class FoldingSetBucketIterator;
 template<typename T>
 inline bool
 DefaultFoldingSetTrait<T>::Equals(T &X, const FoldingSetNodeID &ID,
-                                  FoldingSetNodeID &TempID) {
+                                  unsigned IDHash, FoldingSetNodeID &TempID) {
   FoldingSetTrait<T>::Profile(X, TempID);
   return TempID == ID;
 }
@@ -358,6 +357,7 @@ template<typename T, typename Ctx>
 inline bool
 DefaultContextualFoldingSetTrait<T, Ctx>::Equals(T &X,
                                                  const FoldingSetNodeID &ID,
+                                                 unsigned IDHash,
                                                  FoldingSetNodeID &TempID,
                                                  Ctx Context) {
   ContextualFoldingSetTrait<T, Ctx>::Profile(X, TempID, Context);
@@ -387,15 +387,14 @@ private:
   }
   /// NodeEquals - Instantiations may optionally provide a way to compare a
   /// node with a specified ID.
-  virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID,
+  virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, unsigned IDHash,
                           FoldingSetNodeID &TempID) const {
     T *TN = static_cast<T *>(N);
-    return FoldingSetTrait<T>::Equals(*TN, ID, TempID);
+    return FoldingSetTrait<T>::Equals(*TN, ID, IDHash, TempID);
   }
-  /// NodeEquals - Instantiations may optionally provide a way to compute a
+  /// ComputeNodeHash - Instantiations may optionally provide a way to compute a
   /// hash value directly from a node.
-  virtual unsigned ComputeNodeHash(Node *N,
-                                   FoldingSetNodeID &TempID) const {
+  virtual unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const {
     T *TN = static_cast<T *>(N);
     return FoldingSetTrait<T>::ComputeHash(*TN, TempID);
   }
@@ -465,10 +464,11 @@ private:
     ContextualFoldingSetTrait<T, Ctx>::Profile(*TN, ID, Context);
   }
   virtual bool NodeEquals(FoldingSetImpl::Node *N,
-                          const FoldingSetNodeID &ID,
+                          const FoldingSetNodeID &ID, unsigned IDHash,
                           FoldingSetNodeID &TempID) const {
     T *TN = static_cast<T *>(N);
-    return ContextualFoldingSetTrait<T, Ctx>::Equals(*TN, ID, TempID, Context);
+    return ContextualFoldingSetTrait<T, Ctx>::Equals(*TN, ID, IDHash, TempID,
+                                                     Context);
   }
   virtual unsigned ComputeNodeHash(FoldingSetImpl::Node *N,
                                    FoldingSetNodeID &TempID) const {
diff --git a/include/llvm/ADT/GraphTraits.h b/include/llvm/ADT/GraphTraits.h
index 0fd1f5022af7..823caef7647e 100644
--- a/include/llvm/ADT/GraphTraits.h
+++ b/include/llvm/ADT/GraphTraits.h
@@ -43,9 +43,12 @@ struct GraphTraits {
   // typedef  ...iterator nodes_iterator;
   // static nodes_iterator nodes_begin(GraphType *G)
   // static nodes_iterator nodes_end  (GraphType *G)
-  //
   //    nodes_iterator/begin/end - Allow iteration over all nodes in the graph
 
+  // static unsigned       size       (GraphType *G)
+  //    Return total number of nodes in the graph
+  //
+
 
   // If anyone tries to use this class without having an appropriate
   // specialization, make an error.  If you get this error, it's because you
diff --git a/include/llvm/ADT/Hashing.h b/include/llvm/ADT/Hashing.h
new file mode 100644
index 000000000000..53032ee538d2
--- /dev/null
+++ b/include/llvm/ADT/Hashing.h
@@ -0,0 +1,770 @@
+//===-- llvm/ADT/Hashing.h - Utilities for hashing --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the newly proposed standard C++ interfaces for hashing
+// arbitrary data and building hash functions for user-defined types. This
+// interface was originally proposed in N3333[1] and is currently under review
+// for inclusion in a future TR and/or standard.
+//
+// The primary interfaces provide are comprised of one type and three functions:
+//
+//  -- 'hash_code' class is an opaque type representing the hash code for some
+//     data. It is the intended product of hashing, and can be used to implement
+//     hash tables, checksumming, and other common uses of hashes. It is not an
+//     integer type (although it can be converted to one) because it is risky
+//     to assume much about the internals of a hash_code. In particular, each
+//     execution of the program has a high probability of producing a different
+//     hash_code for a given input. Thus their values are not stable to save or
+//     persist, and should only be used during the execution for the
+//     construction of hashing datastructures.
+//
+//  -- 'hash_value' is a function designed to be overloaded for each
+//     user-defined type which wishes to be used within a hashing context. It
+//     should be overloaded within the user-defined type's namespace and found
+//     via ADL. Overloads for primitive types are provided by this library.
+//
+//  -- 'hash_combine' and 'hash_combine_range' are functions designed to aid
+//      programmers in easily and intuitively combining a set of data into
+//      a single hash_code for their object. They should only logically be used
+//      within the implementation of a 'hash_value' routine or similar context.
+//
+// Note that 'hash_combine_range' contains very special logic for hashing
+// a contiguous array of integers or pointers. This logic is *extremely* fast,
+// on a modern Intel "Gainestown" Xeon (Nehalem uarch) @2.2 GHz, these were
+// benchmarked at over 6.5 GiB/s for large keys, and <20 cycles/hash for keys
+// under 32-bytes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_HASHING_H
+#define LLVM_ADT_HASHING_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/type_traits.h"
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <iterator>
+#include <utility>
+
+// Allow detecting C++11 feature availability when building with Clang without
+// breaking other compilers.
+#ifndef __has_feature
+# define __has_feature(x) 0
+#endif
+
+namespace llvm {
+
+/// \brief An opaque object representing a hash code.
+///
+/// This object represents the result of hashing some entity. It is intended to
+/// be used to implement hashtables or other hashing-based data structures.
+/// While it wraps and exposes a numeric value, this value should not be
+/// trusted to be stable or predictable across processes or executions.
+///
+/// In order to obtain the hash_code for an object 'x':
+/// \code
+///   using llvm::hash_value;
+///   llvm::hash_code code = hash_value(x);
+/// \endcode
+///
+/// Also note that there are two numerical values which are reserved, and the
+/// implementation ensures will never be produced for real hash_codes. These
+/// can be used as sentinels within hashing data structures.
+class hash_code {
+  size_t value;
+
+public:
+  /// \brief Default construct a hash_code.
+  /// Note that this leaves the value uninitialized.
+  hash_code() {}
+
+  /// \brief Form a hash code directly from a numerical value.
+  hash_code(size_t value) : value(value) {}
+
+  /// \brief Convert the hash code to its numerical value for use.
+  /*explicit*/ operator size_t() const { return value; }
+
+  friend bool operator==(const hash_code &lhs, const hash_code &rhs) {
+    return lhs.value == rhs.value;
+  }
+  friend bool operator!=(const hash_code &lhs, const hash_code &rhs) {
+    return lhs.value != rhs.value;
+  }
+
+  /// \brief Allow a hash_code to be directly run through hash_value.
+  friend size_t hash_value(const hash_code &code) { return code.value; }
+};
+
+/// \brief Compute a hash_code for any integer value.
+///
+/// Note that this function is intended to compute the same hash_code for
+/// a particular value without regard to the pre-promotion type. This is in
+/// contrast to hash_combine which may produce different hash_codes for
+/// differing argument types even if they would implicit promote to a common
+/// type without changing the value.
+template <typename T>
+typename enable_if<is_integral_or_enum<T>, hash_code>::type hash_value(T value);
+
+/// \brief Compute a hash_code for a pointer's address.
+///
+/// N.B.: This hashes the *address*. Not the value and not the type.
+template <typename T> hash_code hash_value(const T *ptr);
+
+/// \brief Compute a hash_code for a pair of objects.
+template <typename T, typename U>
+hash_code hash_value(const std::pair<T, U> &arg);
+
+/// \brief Compute a hash_code for a standard string.
+template <typename T>
+hash_code hash_value(const std::basic_string<T> &arg);
+
+
+/// \brief Override the execution seed with a fixed value.
+///
+/// This hashing library uses a per-execution seed designed to change on each
+/// run with high probability in order to ensure that the hash codes are not
+/// attackable and to ensure that output which is intended to be stable does
+/// not rely on the particulars of the hash codes produced.
+///
+/// That said, there are use cases where it is important to be able to
+/// reproduce *exactly* a specific behavior. To that end, we provide a function
+/// which will forcibly set the seed to a fixed value. This must be done at the
+/// start of the program, before any hashes are computed. Also, it cannot be
+/// undone. This makes it thread-hostile and very hard to use outside of
+/// immediately on start of a simple program designed for reproducible
+/// behavior.
+void set_fixed_execution_hash_seed(size_t fixed_value);
+
+
+// All of the implementation details of actually computing the various hash
+// code values are held within this namespace. These routines are included in
+// the header file mainly to allow inlining and constant propagation.
+namespace hashing {
+namespace detail {
+
+inline uint64_t fetch64(const char *p) {
+  uint64_t result;
+  memcpy(&result, p, sizeof(result));
+  if (sys::isBigEndianHost())
+    return sys::SwapByteOrder(result);
+  return result;
+}
+
+inline uint32_t fetch32(const char *p) {
+  uint32_t result;
+  memcpy(&result, p, sizeof(result));
+  if (sys::isBigEndianHost())
+    return sys::SwapByteOrder(result);
+  return result;
+}
+
+/// Some primes between 2^63 and 2^64 for various uses.
+static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
+static const uint64_t k1 = 0xb492b66fbe98f273ULL;
+static const uint64_t k2 = 0x9ae16a3b2f90404fULL;
+static const uint64_t k3 = 0xc949d7c7509e6557ULL;
+
+/// \brief Bitwise right rotate.
+/// Normally this will compile to a single instruction, especially if the
+/// shift is a manifest constant.
+inline uint64_t rotate(uint64_t val, size_t shift) {
+  // Avoid shifting by 64: doing so yields an undefined result.
+  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
+}
+
+inline uint64_t shift_mix(uint64_t val) {
+  return val ^ (val >> 47);
+}
+
+inline uint64_t hash_16_bytes(uint64_t low, uint64_t high) {
+  // Murmur-inspired hashing.
+  const uint64_t kMul = 0x9ddfea08eb382d69ULL;
+  uint64_t a = (low ^ high) * kMul;
+  a ^= (a >> 47);
+  uint64_t b = (high ^ a) * kMul;
+  b ^= (b >> 47);
+  b *= kMul;
+  return b;
+}
+
+inline uint64_t hash_1to3_bytes(const char *s, size_t len, uint64_t seed) {
+  uint8_t a = s[0];
+  uint8_t b = s[len >> 1];
+  uint8_t c = s[len - 1];
+  uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
+  uint32_t z = len + (static_cast<uint32_t>(c) << 2);
+  return shift_mix(y * k2 ^ z * k3 ^ seed) * k2;
+}
+
+inline uint64_t hash_4to8_bytes(const char *s, size_t len, uint64_t seed) {
+  uint64_t a = fetch32(s);
+  return hash_16_bytes(len + (a << 3), seed ^ fetch32(s + len - 4));
+}
+
+inline uint64_t hash_9to16_bytes(const char *s, size_t len, uint64_t seed) {
+  uint64_t a = fetch64(s);
+  uint64_t b = fetch64(s + len - 8);
+  return hash_16_bytes(seed ^ a, rotate(b + len, len)) ^ b;
+}
+
+inline uint64_t hash_17to32_bytes(const char *s, size_t len, uint64_t seed) {
+  uint64_t a = fetch64(s) * k1;
+  uint64_t b = fetch64(s + 8);
+  uint64_t c = fetch64(s + len - 8) * k2;
+  uint64_t d = fetch64(s + len - 16) * k0;
+  return hash_16_bytes(rotate(a - b, 43) + rotate(c ^ seed, 30) + d,
+                       a + rotate(b ^ k3, 20) - c + len + seed);
+}
+
+inline uint64_t hash_33to64_bytes(const char *s, size_t len, uint64_t seed) {
+  uint64_t z = fetch64(s + 24);
+  uint64_t a = fetch64(s) + (len + fetch64(s + len - 16)) * k0;
+  uint64_t b = rotate(a + z, 52);
+  uint64_t c = rotate(a, 37);
+  a += fetch64(s + 8);
+  c += rotate(a, 7);
+  a += fetch64(s + 16);
+  uint64_t vf = a + z;
+  uint64_t vs = b + rotate(a, 31) + c;
+  a = fetch64(s + 16) + fetch64(s + len - 32);
+  z = fetch64(s + len - 8);
+  b = rotate(a + z, 52);
+  c = rotate(a, 37);
+  a += fetch64(s + len - 24);
+  c += rotate(a, 7);
+  a += fetch64(s + len - 16);
+  uint64_t wf = a + z;
+  uint64_t ws = b + rotate(a, 31) + c;
+  uint64_t r = shift_mix((vf + ws) * k2 + (wf + vs) * k0);
+  return shift_mix((seed ^ (r * k0)) + vs) * k2;
+}
+
+inline uint64_t hash_short(const char *s, size_t length, uint64_t seed) {
+  if (length >= 4 && length <= 8)
+    return hash_4to8_bytes(s, length, seed);
+  if (length > 8 && length <= 16)
+    return hash_9to16_bytes(s, length, seed);
+  if (length > 16 && length <= 32)
+    return hash_17to32_bytes(s, length, seed);
+  if (length > 32)
+    return hash_33to64_bytes(s, length, seed);
+  if (length != 0)
+    return hash_1to3_bytes(s, length, seed);
+
+  return k2 ^ seed;
+}
+
+/// \brief The intermediate state used during hashing.
+/// Currently, the algorithm for computing hash codes is based on CityHash and
+/// keeps 56 bytes of arbitrary state.
+struct hash_state {
+  uint64_t h0, h1, h2, h3, h4, h5, h6;
+  uint64_t seed;
+
+  /// \brief Create a new hash_state structure and initialize it based on the
+  /// seed and the first 64-byte chunk.
+  /// This effectively performs the initial mix.
+  static hash_state create(const char *s, uint64_t seed) {
+    hash_state state = {
+      0, seed, hash_16_bytes(seed, k1), rotate(seed ^ k1, 49),
+      seed * k1, shift_mix(seed), 0, seed };
+    state.h6 = hash_16_bytes(state.h4, state.h5);
+    state.mix(s);
+    return state;
+  }
+
+  /// \brief Mix 32-bytes from the input sequence into the 16-bytes of 'a'
+  /// and 'b', including whatever is already in 'a' and 'b'.
+  static void mix_32_bytes(const char *s, uint64_t &a, uint64_t &b) {
+    a += fetch64(s);
+    uint64_t c = fetch64(s + 24);
+    b = rotate(b + a + c, 21);
+    uint64_t d = a;
+    a += fetch64(s + 8) + fetch64(s + 16);
+    b += rotate(a, 44) + d;
+    a += c;
+  }
+
+  /// \brief Mix in a 64-byte buffer of data.
+  /// We mix all 64 bytes even when the chunk length is smaller, but we
+  /// record the actual length.
+  void mix(const char *s) {
+    h0 = rotate(h0 + h1 + h3 + fetch64(s + 8), 37) * k1;
+    h1 = rotate(h1 + h4 + fetch64(s + 48), 42) * k1;
+    h0 ^= h6;
+    h1 += h3 + fetch64(s + 40);
+    h2 = rotate(h2 + h5, 33) * k1;
+    h3 = h4 * k1;
+    h4 = h0 + h5;
+    mix_32_bytes(s, h3, h4);
+    h5 = h2 + h6;
+    h6 = h1 + fetch64(s + 16);
+    mix_32_bytes(s + 32, h5, h6);
+    std::swap(h2, h0);
+  }
+
+  /// \brief Compute the final 64-bit hash code value based on the current
+  /// state and the length of bytes hashed.
+  uint64_t finalize(size_t length) {
+    return hash_16_bytes(hash_16_bytes(h3, h5) + shift_mix(h1) * k1 + h2,
+                         hash_16_bytes(h4, h6) + shift_mix(length) * k1 + h0);
+  }
+};
+
+
+/// \brief A global, fixed seed-override variable.
+///
+/// This variable can be set using the \see llvm::set_fixed_execution_seed
+/// function. See that function for details. Do not, under any circumstances,
+/// set or read this variable.
+extern size_t fixed_seed_override;
+
+inline size_t get_execution_seed() {
+  // FIXME: This needs to be a per-execution seed. This is just a placeholder
+  // implementation. Switching to a per-execution seed is likely to flush out
+  // instability bugs and so will happen as its own commit.
+  //
+  // However, if there is a fixed seed override set the first time this is
+  // called, return that instead of the per-execution seed.
+  const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
+  static size_t seed = fixed_seed_override ? fixed_seed_override
+                                           : (size_t)seed_prime;
+  return seed;
+}
+
+
+/// \brief Trait to indicate whether a type's bits can be hashed directly.
+///
+/// A type trait which is true if we want to combine values for hashing by
+/// reading the underlying data. It is false if values of this type must
+/// first be passed to hash_value, and the resulting hash_codes combined.
+//
+// FIXME: We want to replace is_integral_or_enum and is_pointer here with
+// a predicate which asserts that comparing the underlying storage of two
+// values of the type for equality is equivalent to comparing the two values
+// for equality. For all the platforms we care about, this holds for integers
+// and pointers, but there are platforms where it doesn't and we would like to
+// support user-defined types which happen to satisfy this property.
+template <typename T> struct is_hashable_data
+  : integral_constant<bool, ((is_integral_or_enum<T>::value ||
+                              is_pointer<T>::value) &&
+                             64 % sizeof(T) == 0)> {};
+
+// Special case std::pair to detect when both types are viable and when there
+// is no alignment-derived padding in the pair. This is a bit of a lie because
+// std::pair isn't truly POD, but it's close enough in all reasonable
+// implementations for our use case of hashing the underlying data.
+template <typename T, typename U> struct is_hashable_data<std::pair<T, U> >
+  : integral_constant<bool, (is_hashable_data<T>::value &&
+                             is_hashable_data<U>::value &&
+                             (sizeof(T) + sizeof(U)) ==
+                              sizeof(std::pair<T, U>))> {};
+
+/// \brief Helper to get the hashable data representation for a type.
+/// This variant is enabled when the type itself can be used.
+template <typename T>
+typename enable_if<is_hashable_data<T>, T>::type
+get_hashable_data(const T &value) {
+  return value;
+}
+/// \brief Helper to get the hashable data representation for a type.
+/// This variant is enabled when we must first call hash_value and use the
+/// result as our data.
+template <typename T>
+typename enable_if_c<!is_hashable_data<T>::value, size_t>::type
+get_hashable_data(const T &value) {
+  using ::llvm::hash_value;
+  return hash_value(value);
+}
+
+/// \brief Helper to store data from a value into a buffer and advance the
+/// pointer into that buffer.
+///
+/// This routine first checks whether there is enough space in the provided
+/// buffer, and if not immediately returns false. If there is space, it
+/// copies the underlying bytes of value into the buffer, advances the
+/// buffer_ptr past the copied bytes, and returns true.
+template <typename T>
+bool store_and_advance(char *&buffer_ptr, char *buffer_end, const T& value,
+                       size_t offset = 0) {
+  size_t store_size = sizeof(value) - offset;
+  if (buffer_ptr + store_size > buffer_end)
+    return false;
+  const char *value_data = reinterpret_cast<const char *>(&value);
+  memcpy(buffer_ptr, value_data + offset, store_size);
+  buffer_ptr += store_size;
+  return true;
+}
+
+/// \brief Implement the combining of integral values into a hash_code.
+///
+/// This overload is selected when the value type of the iterator is
+/// integral. Rather than computing a hash_code for each object and then
+/// combining them, this (as an optimization) directly combines the integers.
+template <typename InputIteratorT>
+hash_code hash_combine_range_impl(InputIteratorT first, InputIteratorT last) {
+  typedef typename std::iterator_traits<InputIteratorT>::value_type ValueT;
+  const size_t seed = get_execution_seed();
+  char buffer[64], *buffer_ptr = buffer;
+  char *const buffer_end = buffer_ptr + array_lengthof(buffer);
+  while (first != last && store_and_advance(buffer_ptr, buffer_end,
+                                            get_hashable_data(*first)))
+    ++first;
+  if (first == last)
+    return hash_short(buffer, buffer_ptr - buffer, seed);
+  assert(buffer_ptr == buffer_end);
+
+  hash_state state = state.create(buffer, seed);
+  size_t length = 64;
+  while (first != last) {
+    // Fill up the buffer. We don't clear it, which re-mixes the last round
+    // when only a partial 64-byte chunk is left.
+    buffer_ptr = buffer;
+    while (first != last && store_and_advance(buffer_ptr, buffer_end,
+                                              get_hashable_data(*first)))
+      ++first;
+
+    // Rotate the buffer if we did a partial fill in order to simulate doing
+    // a mix of the last 64-bytes. That is how the algorithm works when we
+    // have a contiguous byte sequence, and we want to emulate that here.
+    std::rotate(buffer, buffer_ptr, buffer_end);
+
+    // Mix this chunk into the current state.
+    state.mix(buffer);
+    length += buffer_ptr - buffer;
+  };
+
+  return state.finalize(length);
+}
+
+/// \brief Implement the combining of integral values into a hash_code.
+///
+/// This overload is selected when the value type of the iterator is integral
+/// and when the input iterator is actually a pointer. Rather than computing
+/// a hash_code for each object and then combining them, this (as an
+/// optimization) directly combines the integers. Also, because the integers
+/// are stored in contiguous memory, this routine avoids copying each value
+/// and directly reads from the underlying memory.
+template <typename ValueT>
+typename enable_if<is_hashable_data<ValueT>, hash_code>::type
+hash_combine_range_impl(ValueT *first, ValueT *last) {
+  const size_t seed = get_execution_seed();
+  const char *s_begin = reinterpret_cast<const char *>(first);
+  const char *s_end = reinterpret_cast<const char *>(last);
+  const size_t length = std::distance(s_begin, s_end);
+  if (length <= 64)
+    return hash_short(s_begin, length, seed);
+
+  const char *s_aligned_end = s_begin + (length & ~63);
+  hash_state state = state.create(s_begin, seed);
+  s_begin += 64;
+  while (s_begin != s_aligned_end) {
+    state.mix(s_begin);
+    s_begin += 64;
+  }
+  if (length & 63)
+    state.mix(s_end - 64);
+
+  return state.finalize(length);
+}
+
+} // namespace detail
+} // namespace hashing
+
+
+/// \brief Compute a hash_code for a sequence of values.
+///
+/// This hashes a sequence of values. It produces the same hash_code as
+/// 'hash_combine(a, b, c, ...)', but can run over arbitrary sized sequences
+/// and is significantly faster given pointers and types which can be hashed as
+/// a sequence of bytes.
+template <typename InputIteratorT>
+hash_code hash_combine_range(InputIteratorT first, InputIteratorT last) {
+  return ::llvm::hashing::detail::hash_combine_range_impl(first, last);
+}
+
+
+// Implementation details for hash_combine.
+namespace hashing {
+namespace detail {
+
+/// \brief Helper class to manage the recursive combining of hash_combine
+/// arguments.
+///
+/// This class exists to manage the state and various calls involved in the
+/// recursive combining of arguments used in hash_combine. It is particularly
+/// useful at minimizing the code in the recursive calls to ease the pain
+/// caused by a lack of variadic functions.
+struct hash_combine_recursive_helper {
+  char buffer[64];
+  hash_state state;
+  const size_t seed;
+
+public:
+  /// \brief Construct a recursive hash combining helper.
+  ///
+  /// This sets up the state for a recursive hash combine, including getting
+  /// the seed and buffer setup.
+  hash_combine_recursive_helper()
+    : seed(get_execution_seed()) {}
+
+  /// \brief Combine one chunk of data into the current in-flight hash.
+  ///
+  /// This merges one chunk of data into the hash. First it tries to buffer
+  /// the data. If the buffer is full, it hashes the buffer into its
+  /// hash_state, empties it, and then merges the new chunk in. This also
+  /// handles cases where the data straddles the end of the buffer.
+  template <typename T>
+  char *combine_data(size_t &length, char *buffer_ptr, char *buffer_end, T data) {
+    if (!store_and_advance(buffer_ptr, buffer_end, data)) {
+      // Check for skew which prevents the buffer from being packed, and do
+      // a partial store into the buffer to fill it. This is only a concern
+      // with the variadic combine because that formation can have varying
+      // argument types.
+      size_t partial_store_size = buffer_end - buffer_ptr;
+      memcpy(buffer_ptr, &data, partial_store_size);
+
+      // If the store fails, our buffer is full and ready to hash. We have to
+      // either initialize the hash state (on the first full buffer) or mix
+      // this buffer into the existing hash state. Length tracks the *hashed*
+      // length, not the buffered length.
+      if (length == 0) {
+        state = state.create(buffer, seed);
+        length = 64;
+      } else {
+        // Mix this chunk into the current state and bump length up by 64.
+        state.mix(buffer);
+        length += 64;
+      }
+      // Reset the buffer_ptr to the head of the buffer for the next chunk of
+      // data.
+      buffer_ptr = buffer;
+
+      // Try again to store into the buffer -- this cannot fail as we only
+      // store types smaller than the buffer.
+      if (!store_and_advance(buffer_ptr, buffer_end, data,
+                             partial_store_size))
+        abort();
+    }
+    return buffer_ptr;
+  }
+
+#if defined(__has_feature) && __has_feature(__cxx_variadic_templates__)
+
+  /// \brief Recursive, variadic combining method.
+  ///
+  /// This function recurses through each argument, combining that argument
+  /// into a single hash.
+  template <typename T, typename ...Ts>
+  hash_code combine(size_t length, char *buffer_ptr, char *buffer_end,
+                    const T &arg, const Ts &...args) {
+    buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg));
+
+    // Recurse to the next argument.
+    return combine(length, buffer_ptr, buffer_end, args...);
+  }
+
+#else
+  // Manually expanded recursive combining methods. See variadic above for
+  // documentation.
+
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+            typename T6>
+  hash_code combine(size_t length, char *buffer_ptr, char *buffer_end,
+                    const T1 &arg1, const T2 &arg2, const T3 &arg3,
+                    const T4 &arg4, const T5 &arg5, const T6 &arg6) {
+    buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1));
+    return combine(length, buffer_ptr, buffer_end, arg2, arg3, arg4, arg5, arg6);
+  }
+  template <typename T1, typename T2, typename T3, typename T4, typename T5>
+  hash_code combine(size_t length, char *buffer_ptr, char *buffer_end,
+                    const T1 &arg1, const T2 &arg2, const T3 &arg3,
+                    const T4 &arg4, const T5 &arg5) {
+    buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1));
+    return combine(length, buffer_ptr, buffer_end, arg2, arg3, arg4, arg5);
+  }
+  template <typename T1, typename T2, typename T3, typename T4>
+  hash_code combine(size_t length, char *buffer_ptr, char *buffer_end,
+                    const T1 &arg1, const T2 &arg2, const T3 &arg3,
+                    const T4 &arg4) {
+    buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1));
+    return combine(length, buffer_ptr, buffer_end, arg2, arg3, arg4);
+  }
+  template <typename T1, typename T2, typename T3>
+  hash_code combine(size_t length, char *buffer_ptr, char *buffer_end,
+                    const T1 &arg1, const T2 &arg2, const T3 &arg3) {
+    buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1));
+    return combine(length, buffer_ptr, buffer_end, arg2, arg3);
+  }
+  template <typename T1, typename T2>
+  hash_code combine(size_t length, char *buffer_ptr, char *buffer_end,
+                    const T1 &arg1, const T2 &arg2) {
+    buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1));
+    return combine(length, buffer_ptr, buffer_end, arg2);
+  }
+  template <typename T1>
+  hash_code combine(size_t length, char *buffer_ptr, char *buffer_end,
+                    const T1 &arg1) {
+    buffer_ptr = combine_data(length, buffer_ptr, buffer_end, get_hashable_data(arg1));
+    return combine(length, buffer_ptr, buffer_end);
+  }
+
+#endif
+
+  /// \brief Base case for recursive, variadic combining.
+  ///
+  /// The base case when combining arguments recursively is reached when all
+  /// arguments have been handled. It flushes the remaining buffer and
+  /// constructs a hash_code.
+  hash_code combine(size_t length, char *buffer_ptr, char *buffer_end) {
+    // Check whether the entire set of values fit in the buffer. If so, we'll
+    // use the optimized short hashing routine and skip state entirely.
+    if (length == 0)
+      return hash_short(buffer, buffer_ptr - buffer, seed);
+
+    // Mix the final buffer, rotating it if we did a partial fill in order to
+    // simulate doing a mix of the last 64-bytes. That is how the algorithm
+    // works when we have a contiguous byte sequence, and we want to emulate
+    // that here.
+    std::rotate(buffer, buffer_ptr, buffer_end);
+
+    // Mix this chunk into the current state.
+    state.mix(buffer);
+    length += buffer_ptr - buffer;
+
+    return state.finalize(length);
+  }
+};
+
+} // namespace detail
+} // namespace hashing
+
+
+#if __has_feature(__cxx_variadic_templates__)
+
+/// \brief Combine values into a single hash_code.
+///
+/// This routine accepts a varying number of arguments of any type. It will
+/// attempt to combine them into a single hash_code. For user-defined types it
+/// attempts to call a \see hash_value overload (via ADL) for the type. For
+/// integer and pointer types it directly combines their data into the
+/// resulting hash_code.
+///
+/// The result is suitable for returning from a user's hash_value
+/// *implementation* for their user-defined type. Consumers of a type should
+/// *not* call this routine, they should instead call 'hash_value'.
+template <typename ...Ts> hash_code hash_combine(const Ts &...args) {
+  // Recursively hash each argument using a helper class.
+  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
+  return helper.combine(0, helper.buffer, helper.buffer + 64, args...);
+}
+
+#else
+
+// What follows are manually exploded overloads for each argument width. See
+// the above variadic definition for documentation and specification.
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+          typename T6>
+hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3,
+                       const T4 &arg4, const T5 &arg5, const T6 &arg6) {
+  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
+  return helper.combine(0, helper.buffer, helper.buffer + 64,
+                        arg1, arg2, arg3, arg4, arg5, arg6);
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3,
+                       const T4 &arg4, const T5 &arg5) {
+  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
+  return helper.combine(0, helper.buffer, helper.buffer + 64,
+                        arg1, arg2, arg3, arg4, arg5);
+}
+template <typename T1, typename T2, typename T3, typename T4>
+hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3,
+                       const T4 &arg4) {
+  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
+  return helper.combine(0, helper.buffer, helper.buffer + 64,
+                        arg1, arg2, arg3, arg4);
+}
+template <typename T1, typename T2, typename T3>
+hash_code hash_combine(const T1 &arg1, const T2 &arg2, const T3 &arg3) {
+  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
+  return helper.combine(0, helper.buffer, helper.buffer + 64, arg1, arg2, arg3);
+}
+template <typename T1, typename T2>
+hash_code hash_combine(const T1 &arg1, const T2 &arg2) {
+  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
+  return helper.combine(0, helper.buffer, helper.buffer + 64, arg1, arg2);
+}
+template <typename T1>
+hash_code hash_combine(const T1 &arg1) {
+  ::llvm::hashing::detail::hash_combine_recursive_helper helper;
+  return helper.combine(0, helper.buffer, helper.buffer + 64, arg1);
+}
+
+#endif
+
+
+// Implementation details for implementatinos of hash_value overloads provided
+// here.
+namespace hashing {
+namespace detail {
+
+/// \brief Helper to hash the value of a single integer.
+///
+/// Overloads for smaller integer types are not provided to ensure consistent
+/// behavior in the presence of integral promotions. Essentially,
+/// "hash_value('4')" and "hash_value('0' + 4)" should be the same.
+inline hash_code hash_integer_value(uint64_t value) {
+  // Similar to hash_4to8_bytes but using a seed instead of length.
+  const uint64_t seed = get_execution_seed();
+  const char *s = reinterpret_cast<const char *>(&value);
+  const uint64_t a = fetch32(s);
+  return hash_16_bytes(seed + (a << 3), fetch32(s + 4));
+}
+
+} // namespace detail
+} // namespace hashing
+
+// Declared and documented above, but defined here so that any of the hashing
+// infrastructure is available.
+template <typename T>
+typename enable_if<is_integral_or_enum<T>, hash_code>::type
+hash_value(T value) {
+  return ::llvm::hashing::detail::hash_integer_value(value);
+}
+
+// Declared and documented above, but defined here so that any of the hashing
+// infrastructure is available.
+template <typename T> hash_code hash_value(const T *ptr) {
+  return ::llvm::hashing::detail::hash_integer_value(
+    reinterpret_cast<uintptr_t>(ptr));
+}
+
+// Declared and documented above, but defined here so that any of the hashing
+// infrastructure is available.
+template <typename T, typename U>
+hash_code hash_value(const std::pair<T, U> &arg) {
+  return hash_combine(arg.first, arg.second);
+}
+
+// Declared and documented above, but defined here so that any of the hashing
+// infrastructure is available.
+template <typename T>
+hash_code hash_value(const std::basic_string<T> &arg) {
+  return hash_combine_range(arg.begin(), arg.end());
+}
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index d597a7c9be72..89b164819d37 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <functional>
 #include <vector>
@@ -346,7 +347,7 @@ public:
       if (prev)
         prev->next = next;
       else
-        factory->Cache[computeDigest()] = next;
+        factory->Cache[factory->maskCacheIndex(computeDigest())] = next;
     }
     
     // We need to clear the mutability bit in case we are
@@ -428,6 +429,11 @@ protected:
   TreeTy*         getRight(TreeTy* T) const { return T->getRight(); }
   value_type_ref  getValue(TreeTy* T) const { return T->value; }
 
+  // Make sure the index is not the Tombstone or Entry key of the DenseMap.
+  static inline unsigned maskCacheIndex(unsigned I) {
+	return (I & ~0x02);
+  }
+
   unsigned incrementHeight(TreeTy* L, TreeTy* R) const {
     unsigned hl = getHeight(L);
     unsigned hr = getHeight(R);
@@ -610,7 +616,7 @@ public:
     // Search the hashtable for another tree with the same digest, and
     // if find a collision compare those trees by their contents.
     unsigned digest = TNew->computeDigest();
-    TreeTy *&entry = Cache[digest];
+    TreeTy *&entry = Cache[maskCacheIndex(digest)];
     do {
       if (!entry)
         break;
@@ -686,7 +692,7 @@ public:
         stack.back() |= VisitedRight;
         break;
       default:
-        assert(false && "Unreachable.");
+        llvm_unreachable("Unreachable.");
     }
   }
 
@@ -722,7 +728,7 @@ public:
         skipToParent();
         break;
       default:
-        assert(false && "Unreachable.");
+        llvm_unreachable("Unreachable.");
     }
     return *this;
   }
@@ -747,7 +753,7 @@ public:
           stack.push_back(reinterpret_cast<uintptr_t>(R) | VisitedRight);
         break;
       default:
-        assert(false && "Unreachable.");
+        llvm_unreachable("Unreachable.");
     }
     return *this;
   }
diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h
index 1230e8f5fb43..931b67e40911 100644
--- a/include/llvm/ADT/IntervalMap.h
+++ b/include/llvm/ADT/IntervalMap.h
@@ -739,7 +739,7 @@ public:
 // A Path is used by iterators to represent a position in a B+-tree, and the
 // path to get there from the root.
 //
-// The Path class also constains the tree navigation code that doesn't have to
+// The Path class also contains the tree navigation code that doesn't have to
 // be templatized.
 //
 //===----------------------------------------------------------------------===//
@@ -1977,7 +1977,7 @@ iterator::overflow(unsigned Level) {
     CurSize[Nodes] = CurSize[NewNode];
     Node[Nodes] = Node[NewNode];
     CurSize[NewNode] = 0;
-    Node[NewNode] = this->map->newNode<NodeT>();
+    Node[NewNode] = this->map->template newNode<NodeT>();
     ++Nodes;
   }
 
diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h
index 2f6fd2bd5590..3a1a3f4634cf 100644
--- a/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -46,6 +46,7 @@ namespace llvm {
 
   public:
     RefCountedBase() : ref_cnt(0) {}
+    RefCountedBase(const RefCountedBase &) : ref_cnt(0) {}
 
     void Retain() const { ++ref_cnt; }
     void Release() const {
@@ -64,9 +65,12 @@ namespace llvm {
 //===----------------------------------------------------------------------===//
   class RefCountedBaseVPTR {
     mutable unsigned ref_cnt;
+    virtual void anchor();
 
   protected:
     RefCountedBaseVPTR() : ref_cnt(0) {}
+    RefCountedBaseVPTR(const RefCountedBaseVPTR &) : ref_cnt(0) {}
+
     virtual ~RefCountedBaseVPTR() {}
 
     void Retain() const { ++ref_cnt; }
@@ -76,9 +80,15 @@ namespace llvm {
     }
 
     template <typename T>
-    friend class IntrusiveRefCntPtr;
+    friend struct IntrusiveRefCntPtrInfo;
   };
 
+  
+  template <typename T> struct IntrusiveRefCntPtrInfo {
+    static void retain(T *obj) { obj->Retain(); }
+    static void release(T *obj) { obj->Release(); }
+  };
+  
 //===----------------------------------------------------------------------===//
 /// IntrusiveRefCntPtr - A template class that implements a "smart pointer"
 ///  that assumes the wrapped object has a reference count associated
@@ -105,7 +115,7 @@ namespace llvm {
 
     explicit IntrusiveRefCntPtr() : Obj(0) {}
 
-    explicit IntrusiveRefCntPtr(T* obj) : Obj(obj) {
+    IntrusiveRefCntPtr(T* obj) : Obj(obj) {
       retain();
     }
 
@@ -153,14 +163,19 @@ namespace llvm {
       other.Obj = Obj;
       Obj = tmp;
     }
-    
+
+    void reset() {
+      release();
+      Obj = 0;
+    }
+
     void resetWithoutRelease() {
       Obj = 0;
     }
 
   private:
-    void retain() { if (Obj) Obj->Retain(); }
-    void release() { if (Obj) Obj->Release(); }
+    void retain() { if (Obj) IntrusiveRefCntPtrInfo<T>::retain(Obj); }
+    void release() { if (Obj) IntrusiveRefCntPtrInfo<T>::release(Obj); }
 
     void replace(T* S) {
       this_type(S).swap(*this);
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 85dbba2b4a4a..ccdcd1a8d1b9 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -92,10 +92,14 @@ public:
   }
 
   PointerTy const *getAddrOfPointer() const {
+    return const_cast<PointerIntPair *>(this)->getAddrOfPointer();
+  }
+
+  PointerTy *getAddrOfPointer() {
     assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&
            "Can only return the address if IntBits is cleared and "
            "PtrTraits doesn't change the pointer");
-    return reinterpret_cast<PointerTy const *>(&Value);
+    return reinterpret_cast<PointerTy *>(&Value);
   }
 
   void *getOpaqueValue() const { return reinterpret_cast<void*>(Value); }
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index 487096a17105..614b59c844e3 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -142,16 +142,19 @@ namespace llvm {
       return T();
     }
 
-    /// \brief If the union is set to the first pointer type we can get an
-    /// address pointing to it.
-    template <typename T>
-    PT1 const *getAddrOf() const {
+    /// \brief If the union is set to the first pointer type get an address
+    /// pointing to it.
+    PT1 const *getAddrOfPtr1() const {
+      return const_cast<PointerUnion *>(this)->getAddrOfPtr1();
+    }
+
+    /// \brief If the union is set to the first pointer type get an address
+    /// pointing to it.
+    PT1 *getAddrOfPtr1() {
       assert(is<PT1>() && "Val is not the first pointer");
       assert(get<PT1>() == Val.getPointer() &&
          "Can't get the address because PointerLikeTypeTraits changes the ptr");
-      T const *can_only_get_address_of_first_pointer_type
-                        = reinterpret_cast<PT1 const *>(Val.getAddrOfPointer());
-      return can_only_get_address_of_first_pointer_type;
+      return (PT1 *)Val.getAddrOfPointer();
     }
     
     /// Assignment operators - Allow assigning into this union from either
@@ -263,7 +266,7 @@ namespace llvm {
         ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
           ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
                                                                    >::Return Ty;
-      return Ty(Val).is<T>();
+      return Ty(Val).template is<T>();
     }
     
     /// get<T>() - Return the value of the specified pointer type. If the
@@ -276,7 +279,7 @@ namespace llvm {
         ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
           ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
                                                                    >::Return Ty;
-      return Ty(Val).get<T>();
+      return Ty(Val).template get<T>();
     }
     
     /// dyn_cast<T>() - If the current value is of the specified pointer type,
diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h
index abe20676d54d..965f0deacaa2 100644
--- a/include/llvm/ADT/SetVector.h
+++ b/include/llvm/ADT/SetVector.h
@@ -144,6 +144,12 @@ public:
     set_.erase(back());
     vector_.pop_back();
   }
+  
+  T pop_back_val() {
+    T Ret = back();
+    pop_back();
+    return Ret;
+  }
 
   bool operator==(const SetVector &that) const {
     return vector_ == that.vector_;
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index b15b3ee0418f..a3469a1c6226 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -175,7 +175,7 @@ public:
         return CountPopulation_32(Bits);
       if (sizeof(uintptr_t) * CHAR_BIT == 64)
         return CountPopulation_64(Bits);
-      assert(0 && "Unsupported!");
+      llvm_unreachable("Unsupported!");
     }
     return getPointer()->count();
   }
@@ -212,7 +212,7 @@ public:
         return CountTrailingZeros_32(Bits);
       if (sizeof(uintptr_t) * CHAR_BIT == 64)
         return CountTrailingZeros_64(Bits);
-      assert(0 && "Unsupported!");
+      llvm_unreachable("Unsupported!");
     }
     return getPointer()->find_first();
   }
@@ -230,7 +230,7 @@ public:
         return CountTrailingZeros_32(Bits);
       if (sizeof(uintptr_t) * CHAR_BIT == 64)
         return CountTrailingZeros_64(Bits);
-      assert(0 && "Unsupported!");
+      llvm_unreachable("Unsupported!");
     }
     return getPointer()->find_next(Prev);
   }
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index 9992858d67b0..70693d5b9aa2 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -137,6 +137,10 @@ private:
 
   void operator=(const SmallPtrSetImpl &RHS);  // DO NOT IMPLEMENT.
 protected:
+  /// swap - Swaps the elements of two sets.
+  /// Note: This method assumes that both sets have the same small size.
+  void swap(SmallPtrSetImpl &RHS);
+
   void CopyFrom(const SmallPtrSetImpl &RHS);
 };
 
@@ -287,8 +291,20 @@ public:
     return *this;
   }
 
+  /// swap - Swaps the elements of two sets.
+  void swap(SmallPtrSet<PtrType, SmallSize> &RHS) {
+    SmallPtrSetImpl::swap(RHS);
+  }
 };
 
 }
 
+namespace std {
+  /// Implement std::swap in terms of SmallPtrSet swap.
+  template<class T, unsigned N>
+  inline void swap(llvm::SmallPtrSet<T, N> &LHS, llvm::SmallPtrSet<T, N> &RHS) {
+    LHS.swap(RHS);
+  }
+}
+
 #endif
diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h
index d03f1bef15b1..cd117f59ba76 100644
--- a/include/llvm/ADT/SmallSet.h
+++ b/include/llvm/ADT/SmallSet.h
@@ -27,13 +27,13 @@ namespace llvm {
 ///
 /// Note that this set does not provide a way to iterate over members in the
 /// set.
-template <typename T, unsigned N>
+template <typename T, unsigned N,  typename C = std::less<T> >
 class SmallSet {
   /// Use a SmallVector to hold the elements here (even though it will never
   /// reach its 'large' stage) to avoid calling the default ctors of elements
   /// we will never use.
   SmallVector<T, N> Vector;
-  std::set<T> Set;
+  std::set<T, C> Set;
   typedef typename SmallVector<T, N>::const_iterator VIterator;
   typedef typename SmallVector<T, N>::iterator mutable_iterator;
 public:
diff --git a/include/llvm/ADT/SmallString.h b/include/llvm/ADT/SmallString.h
index da264164821f..199783ba3899 100644
--- a/include/llvm/ADT/SmallString.h
+++ b/include/llvm/ADT/SmallString.h
@@ -24,21 +24,244 @@ namespace llvm {
 template<unsigned InternalLen>
 class SmallString : public SmallVector<char, InternalLen> {
 public:
-  // Default ctor - Initialize to empty.
+  /// Default ctor - Initialize to empty.
   SmallString() {}
 
-  // Initialize from a StringRef.
+  /// Initialize from a StringRef.
   SmallString(StringRef S) : SmallVector<char, InternalLen>(S.begin(), S.end()) {}
 
-  // Initialize with a range.
+  /// Initialize with a range.
   template<typename ItTy>
   SmallString(ItTy S, ItTy E) : SmallVector<char, InternalLen>(S, E) {}
 
-  // Copy ctor.
+  /// Copy ctor.
   SmallString(const SmallString &RHS) : SmallVector<char, InternalLen>(RHS) {}
 
+  // Note that in order to add new overloads for append & assign, we have to
+  // duplicate the inherited versions so as not to inadvertently hide them.
+
+  /// @}
+  /// @name String Assignment
+  /// @{
+
+  /// Assign from a repeated element
+  void assign(unsigned NumElts, char Elt) {
+    this->SmallVectorImpl<char>::assign(NumElts, Elt);
+  }
+
+  /// Assign from an iterator pair
+  template<typename in_iter>
+  void assign(in_iter S, in_iter E) {
+    this->clear();
+    SmallVectorImpl<char>::append(S, E);
+  }
+
+  /// Assign from a StringRef
+  void assign(StringRef RHS) {
+    this->clear();
+    SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
+  }
+
+  /// Assign from a SmallVector
+  void assign(const SmallVectorImpl<char> &RHS) {
+    this->clear();
+    SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
+  }
+
+  /// @}
+  /// @name String Concatenation
+  /// @{
+
+  /// Append from an iterator pair
+  template<typename in_iter>
+  void append(in_iter S, in_iter E) {
+    SmallVectorImpl<char>::append(S, E);
+  }
+
+  /// Append from a StringRef
+  void append(StringRef RHS) {
+    SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
+  }
+
+  /// Append from a SmallVector
+  void append(const SmallVectorImpl<char> &RHS) {
+    SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
+  }
+
+  /// @}
+  /// @name String Comparison
+  /// @{
+
+  /// equals - Check for string equality, this is more efficient than
+  /// compare() when the relative ordering of inequal strings isn't needed.
+  bool equals(StringRef RHS) const {
+    return str().equals(RHS);
+  }
+
+  /// equals_lower - Check for string equality, ignoring case.
+  bool equals_lower(StringRef RHS) const {
+    return str().equals_lower(RHS);
+  }
+
+  /// compare - Compare two strings; the result is -1, 0, or 1 if this string
+  /// is lexicographically less than, equal to, or greater than the \arg RHS.
+  int compare(StringRef RHS) const {
+    return str().compare(RHS);
+  }
+
+  /// compare_lower - Compare two strings, ignoring case.
+  int compare_lower(StringRef RHS) const {
+    return str().compare_lower(RHS);
+  }
+
+  /// compare_numeric - Compare two strings, treating sequences of digits as
+  /// numbers.
+  int compare_numeric(StringRef RHS) const {
+    return str().compare_numeric(RHS);
+  }
+
+  /// @}
+  /// @name String Predicates
+  /// @{
+
+  /// startswith - Check if this string starts with the given \arg Prefix.
+  bool startswith(StringRef Prefix) const {
+    return str().startswith(Prefix);
+  }
+
+  /// endswith - Check if this string ends with the given \arg Suffix.
+  bool endswith(StringRef Suffix) const {
+    return str().endswith(Suffix);
+  }
+
+  /// @}
+  /// @name String Searching
+  /// @{
+
+  /// find - Search for the first character \arg C in the string.
+  ///
+  /// \return - The index of the first occurrence of \arg C, or npos if not
+  /// found.
+  size_t find(char C, size_t From = 0) const {
+    return str().find(C, From);
+  }
+
+  /// find - Search for the first string \arg Str in the string.
+  ///
+  /// \return - The index of the first occurrence of \arg Str, or npos if not
+  /// found.
+  size_t find(StringRef Str, size_t From = 0) const {
+    return str().find(Str, From);
+  }
+
+  /// rfind - Search for the last character \arg C in the string.
+  ///
+  /// \return - The index of the last occurrence of \arg C, or npos if not
+  /// found.
+  size_t rfind(char C, size_t From = StringRef::npos) const {
+    return str().rfind(C, From);
+  }
+
+  /// rfind - Search for the last string \arg Str in the string.
+  ///
+  /// \return - The index of the last occurrence of \arg Str, or npos if not
+  /// found.
+  size_t rfind(StringRef Str) const {
+    return str().rfind(Str);
+  }
+
+  /// find_first_of - Find the first character in the string that is \arg C,
+  /// or npos if not found. Same as find.
+  size_t find_first_of(char C, size_t From = 0) const {
+    return str().find_first_of(C, From);
+  }
+
+  /// find_first_of - Find the first character in the string that is in \arg
+  /// Chars, or npos if not found.
+  ///
+  /// Note: O(size() + Chars.size())
+  size_t find_first_of(StringRef Chars, size_t From = 0) const {
+    return str().find_first_of(Chars, From);
+  }
+
+  /// find_first_not_of - Find the first character in the string that is not
+  /// \arg C or npos if not found.
+  size_t find_first_not_of(char C, size_t From = 0) const {
+    return str().find_first_not_of(C, From);
+  }
+
+  /// find_first_not_of - Find the first character in the string that is not
+  /// in the string \arg Chars, or npos if not found.
+  ///
+  /// Note: O(size() + Chars.size())
+  size_t find_first_not_of(StringRef Chars, size_t From = 0) const {
+    return str().find_first_not_of(Chars, From);
+  }
+
+  /// find_last_of - Find the last character in the string that is \arg C, or
+  /// npos if not found.
+  size_t find_last_of(char C, size_t From = StringRef::npos) const {
+    return str().find_last_of(C, From);
+  }
+
+  /// find_last_of - Find the last character in the string that is in \arg C,
+  /// or npos if not found.
+  ///
+  /// Note: O(size() + Chars.size())
+  size_t find_last_of(
+      StringRef Chars, size_t From = StringRef::npos) const {
+    return str().find_last_of(Chars, From);
+  }
+
+  /// @}
+  /// @name Helpful Algorithms
+  /// @{
+
+  /// count - Return the number of occurrences of \arg C in the string.
+  size_t count(char C) const {
+    return str().count(C);
+  }
+
+  /// count - Return the number of non-overlapped occurrences of \arg Str in
+  /// the string.
+  size_t count(StringRef Str) const {
+    return str().count(Str);
+  }
+
+  /// @}
+  /// @name Substring Operations
+  /// @{
+
+  /// substr - Return a reference to the substring from [Start, Start + N).
+  ///
+  /// \param Start - The index of the starting character in the substring; if
+  /// the index is npos or greater than the length of the string then the
+  /// empty substring will be returned.
+  ///
+  /// \param N - The number of characters to included in the substring. If N
+  /// exceeds the number of characters remaining in the string, the string
+  /// suffix (starting with \arg Start) will be returned.
+  StringRef substr(size_t Start, size_t N = StringRef::npos) const {
+    return str().substr(Start, N);
+  }
+
+  /// slice - Return a reference to the substring from [Start, End).
+  ///
+  /// \param Start - The index of the starting character in the substring; if
+  /// the index is npos or greater than the length of the string then the
+  /// empty substring will be returned.
+  ///
+  /// \param End - The index following the last character to include in the
+  /// substring. If this is npos, or less than \arg Start, or exceeds the
+  /// number of characters remaining in the string, the string suffix
+  /// (starting with \arg Start) will be returned.
+  StringRef slice(size_t Start, size_t End) const {
+    return str().slice(Start, End);
+  }
 
   // Extra methods.
+
+  /// Explicit conversion to StringRef
   StringRef str() const { return StringRef(this->begin(), this->size()); }
 
   // TODO: Make this const, if it's safe...
@@ -48,7 +271,7 @@ public:
     return this->data();
   }
 
-  // Implicit conversion to StringRef.
+  /// Implicit conversion to StringRef.
   operator StringRef() const { return str(); }
 
   // Extra operators.
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 1c42f29771b3..0d9d0d12e868 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -23,30 +23,6 @@
 #include <iterator>
 #include <memory>
 
-#ifdef _MSC_VER
-namespace std {
-#if _MSC_VER <= 1310
-  // Work around flawed VC++ implementation of std::uninitialized_copy.  Define
-  // additional overloads so that elements with pointer types are recognized as
-  // scalars and not objects, causing bizarre type conversion errors.
-  template<class T1, class T2>
-  inline _Scalar_ptr_iterator_tag _Ptr_cat(T1 **, T2 **) {
-    _Scalar_ptr_iterator_tag _Cat;
-    return _Cat;
-  }
-
-  template<class T1, class T2>
-  inline _Scalar_ptr_iterator_tag _Ptr_cat(T1* const *, T2 **) {
-    _Scalar_ptr_iterator_tag _Cat;
-    return _Cat;
-  }
-#else
-// FIXME: It is not clear if the problem is fixed in VS 2005.  What is clear
-// is that the above hack won't work if it wasn't fixed.
-#endif
-}
-#endif
-
 namespace llvm {
 
 /// SmallVectorBase - This is all the non-templated stuff common to all
@@ -100,10 +76,10 @@ public:
 template <typename T>
 class SmallVectorTemplateCommon : public SmallVectorBase {
 protected:
-  void setEnd(T *P) { this->EndX = P; }
-public:
   SmallVectorTemplateCommon(size_t Size) : SmallVectorBase(Size) {}
 
+  void setEnd(T *P) { this->EndX = P; }
+public:
   typedef size_t size_type;
   typedef ptrdiff_t difference_type;
   typedef T value_type;
@@ -174,7 +150,7 @@ public:
 /// implementations that are designed to work with non-POD-like T's.
 template <typename T, bool isPodLike>
 class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
-public:
+protected:
   SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
 
   static void destroy_range(T *S, T *E) {
@@ -194,6 +170,23 @@ public:
   /// grow - double the size of the allocated memory, guaranteeing space for at
   /// least one more element or MinSize if specified.
   void grow(size_t MinSize = 0);
+  
+public:
+  void push_back(const T &Elt) {
+    if (this->EndX < this->CapacityX) {
+    Retry:
+      new (this->end()) T(Elt);
+      this->setEnd(this->end()+1);
+      return;
+    }
+    this->grow();
+    goto Retry;
+  }
+  
+  void pop_back() {
+    this->setEnd(this->end()-1);
+    this->end()->~T();
+  }
 };
 
 // Define this out-of-line to dissuade the C++ compiler from inlining it.
@@ -226,7 +219,7 @@ void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) {
 /// implementations that are designed to work with POD-like T's.
 template <typename T>
 class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
-public:
+protected:
   SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
 
   // No need to do a destroy loop for POD's.
@@ -255,6 +248,21 @@ public:
   void grow(size_t MinSize = 0) {
     this->grow_pod(MinSize*sizeof(T), sizeof(T));
   }
+public:
+  void push_back(const T &Elt) {
+    if (this->EndX < this->CapacityX) {
+    Retry:
+      *this->end() = Elt;
+      this->setEnd(this->end()+1);
+      return;
+    }
+    this->grow();
+    goto Retry;
+  }
+  
+  void pop_back() {
+    this->setEnd(this->end()-1);
+  }
 };
 
 
@@ -270,11 +278,13 @@ public:
   typedef typename SuperClass::iterator iterator;
   typedef typename SuperClass::size_type size_type;
 
+protected:
   // Default ctor - Initialize to empty.
   explicit SmallVectorImpl(unsigned N)
     : SmallVectorTemplateBase<T, isPodLike<T>::value>(N*sizeof(T)) {
   }
 
+public:
   ~SmallVectorImpl() {
     // Destroy the constructed elements in the vector.
     this->destroy_range(this->begin(), this->end());
@@ -297,7 +307,7 @@ public:
     } else if (N > this->size()) {
       if (this->capacity() < N)
         this->grow(N);
-      this->construct_range(this->end(), this->begin()+N, T());
+      std::uninitialized_fill(this->end(), this->begin()+N, T());
       this->setEnd(this->begin()+N);
     }
   }
@@ -309,7 +319,7 @@ public:
     } else if (N > this->size()) {
       if (this->capacity() < N)
         this->grow(N);
-      construct_range(this->end(), this->begin()+N, NV);
+      std::uninitialized_fill(this->end(), this->begin()+N, NV);
       this->setEnd(this->begin()+N);
     }
   }
@@ -319,25 +329,9 @@ public:
       this->grow(N);
   }
 
-  void push_back(const T &Elt) {
-    if (this->EndX < this->CapacityX) {
-    Retry:
-      new (this->end()) T(Elt);
-      this->setEnd(this->end()+1);
-      return;
-    }
-    this->grow();
-    goto Retry;
-  }
-
-  void pop_back() {
-    this->setEnd(this->end()-1);
-    this->end()->~T();
-  }
-
   T pop_back_val() {
     T Result = this->back();
-    pop_back();
+    this->pop_back();
     return Result;
   }
 
@@ -376,7 +370,7 @@ public:
     if (this->capacity() < NumElts)
       this->grow(NumElts);
     this->setEnd(this->begin()+NumElts);
-    construct_range(this->begin(), this->end(), Elt);
+    std::uninitialized_fill(this->begin(), this->end(), Elt);
   }
 
   iterator erase(iterator I) {
@@ -384,7 +378,7 @@ public:
     // Shift all elts down one.
     std::copy(I+1, this->end(), I);
     // Drop the last elt.
-    pop_back();
+    this->pop_back();
     return(N);
   }
 
@@ -400,7 +394,7 @@ public:
 
   iterator insert(iterator I, const T &Elt) {
     if (I == this->end()) {  // Important special case for empty vector.
-      push_back(Elt);
+      this->push_back(Elt);
       return this->end()-1;
     }
 
@@ -554,12 +548,6 @@ public:
     assert(N <= this->capacity());
     this->setEnd(this->begin() + N);
   }
-
-private:
-  static void construct_range(T *S, T *E, const T &Elt) {
-    for (; S != E; ++S)
-      new (S) T(Elt);
-  }
 };
 
 
@@ -686,9 +674,7 @@ public:
 
   explicit SmallVector(unsigned Size, const T &Value = T())
     : SmallVectorImpl<T>(NumTsAvailable) {
-    this->reserve(Size);
-    while (Size--)
-      this->push_back(Value);
+    this->assign(Size, Value);
   }
 
   template<typename ItTy>
@@ -718,9 +704,7 @@ public:
 
   explicit SmallVector(unsigned Size, const T &Value = T())
     : SmallVectorImpl<T>(0) {
-    this->reserve(Size);
-    while (Size--)
-      this->push_back(Value);
+    this->assign(Size, Value);
   }
 
   template<typename ItTy>
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index d977136b2fc1..89774c3f5628 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -18,11 +18,11 @@
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <climits>
-#include <cstring>
 
 namespace llvm {
 
@@ -128,7 +128,7 @@ public:
       else if (sizeof(BitWord) == 8)
         NumBits += CountPopulation_64(Bits[i]);
       else
-        assert(0 && "Unsupported!");
+        llvm_unreachable("Unsupported!");
     return NumBits;
   }
 
@@ -138,13 +138,11 @@ public:
       if (Bits[i] != 0) {
         if (sizeof(BitWord) == 4)
           return i * BITWORD_SIZE + CountTrailingZeros_32(Bits[i]);
-        else if (sizeof(BitWord) == 8)
+        if (sizeof(BitWord) == 8)
           return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]);
-        else
-          assert(0 && "Unsupported!");
+        llvm_unreachable("Unsupported!");
       }
-    assert(0 && "Illegal empty element");
-    return 0; // Not reached
+    llvm_unreachable("Illegal empty element");
   }
 
   /// find_next - Returns the index of the next set bit starting from the
@@ -165,10 +163,9 @@ public:
     if (Copy != 0) {
       if (sizeof(BitWord) == 4)
         return WordPos * BITWORD_SIZE + CountTrailingZeros_32(Copy);
-      else if (sizeof(BitWord) == 8)
+      if (sizeof(BitWord) == 8)
         return WordPos * BITWORD_SIZE + CountTrailingZeros_64(Copy);
-      else
-        assert(0 && "Unsupported!");
+      llvm_unreachable("Unsupported!");
     }
 
     // Check subsequent words.
@@ -176,10 +173,9 @@ public:
       if (Bits[i] != 0) {
         if (sizeof(BitWord) == 4)
           return i * BITWORD_SIZE + CountTrailingZeros_32(Bits[i]);
-        else if (sizeof(BitWord) == 8)
+        if (sizeof(BitWord) == 8)
           return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]);
-        else
-          assert(0 && "Unsupported!");
+        llvm_unreachable("Unsupported!");
       }
     return -1;
   }
@@ -264,15 +260,6 @@ public:
     }
     BecameZero = allzero;
   }
-
-  // Get a hash value for this element;
-  uint64_t getHashValue() const {
-    uint64_t HashVal = 0;
-    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
-      HashVal ^= Bits[i];
-    }
-    return HashVal;
-  }
 };
 
 template <unsigned ElementSize = 128>
@@ -813,18 +800,6 @@ public:
   iterator end() const {
     return iterator(this, true);
   }
-
-  // Get a hash value for this bitmap.
-  uint64_t getHashValue() const {
-    uint64_t HashVal = 0;
-    for (ElementListConstIter Iter = Elements.begin();
-         Iter != Elements.end();
-         ++Iter) {
-      HashVal ^= Iter->index();
-      HashVal ^= Iter->getHashValue();
-    }
-    return HashVal;
-  }
 };
 
 // Convenience functions to allow Or and And without dereferencing in the user
diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h
new file mode 100644
index 000000000000..923c6a5954d0
--- /dev/null
+++ b/include/llvm/ADT/SparseSet.h
@@ -0,0 +1,268 @@
+//===--- llvm/ADT/SparseSet.h - Sparse set ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SparseSet class derived from the version described in
+// Briggs, Torczon, "An efficient representation for sparse sets", ACM Letters
+// on Programming Languages and Systems, Volume 2 Issue 1-4, March-Dec.  1993.
+//
+// A sparse set holds a small number of objects identified by integer keys from
+// a moderately sized universe. The sparse set uses more memory than other
+// containers in order to provide faster operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SPARSESET_H
+#define LLVM_ADT_SPARSESET_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include <limits>
+
+namespace llvm {
+
+/// SparseSetFunctor - Objects in a SparseSet are identified by small integer
+/// keys.  A functor object is used to compute the key of an object.  The
+/// functor's operator() must return an unsigned smaller than the universe.
+///
+/// The default functor implementation forwards to a getSparseSetKey() method
+/// on the object.  It is intended for sparse sets holding ad-hoc structs.
+///
+template<typename ValueT>
+struct SparseSetFunctor {
+  unsigned operator()(const ValueT &Val) {
+    return Val.getSparseSetKey();
+  }
+};
+
+/// SparseSetFunctor<unsigned> - Provide a trivial identity functor for
+/// SparseSet<unsigned>.
+///
+template<> struct SparseSetFunctor<unsigned> {
+  unsigned operator()(unsigned Val) { return Val; }
+};
+
+/// SparseSet - Fast set implementation for objects that can be identified by
+/// small unsigned keys.
+///
+/// SparseSet allocates memory proportional to the size of the key universe, so
+/// it is not recommended for building composite data structures.  It is useful
+/// for algorithms that require a single set with fast operations.
+///
+/// Compared to DenseSet and DenseMap, SparseSet provides constant-time fast
+/// clear() and iteration as fast as a vector.  The find(), insert(), and
+/// erase() operations are all constant time, and typically faster than a hash
+/// table.  The iteration order doesn't depend on numerical key values, it only
+/// depends on the order of insert() and erase() operations.  When no elements
+/// have been erased, the iteration order is the insertion order.
+///
+/// Compared to BitVector, SparseSet<unsigned> uses 8x-40x more memory, but
+/// offers constant-time clear() and size() operations as well as fast
+/// iteration independent on the size of the universe.
+///
+/// SparseSet contains a dense vector holding all the objects and a sparse
+/// array holding indexes into the dense vector.  Most of the memory is used by
+/// the sparse array which is the size of the key universe.  The SparseT
+/// template parameter provides a space/speed tradeoff for sets holding many
+/// elements.
+///
+/// When SparseT is uint32_t, find() only touches 2 cache lines, but the sparse
+/// array uses 4 x Universe bytes.
+///
+/// When SparseT is uint8_t (the default), find() touches up to 2+[N/256] cache
+/// lines, but the sparse array is 4x smaller.  N is the number of elements in
+/// the set.
+///
+/// For sets that may grow to thousands of elements, SparseT should be set to
+/// uint16_t or uint32_t.
+///
+/// @param ValueT      The type of objects in the set.
+/// @param SparseT     An unsigned integer type. See above.
+/// @param KeyFunctorT A functor that computes the unsigned key of a ValueT.
+///
+template<typename ValueT,
+         typename SparseT = uint8_t,
+         typename KeyFunctorT = SparseSetFunctor<ValueT> >
+class SparseSet {
+  typedef SmallVector<ValueT, 8> DenseT;
+  DenseT Dense;
+  SparseT *Sparse;
+  unsigned Universe;
+  KeyFunctorT KeyOf;
+
+  // Disable copy construction and assignment.
+  // This data structure is not meant to be used that way.
+  SparseSet(const SparseSet&); // DO NOT IMPLEMENT.
+  SparseSet &operator=(const SparseSet&); // DO NOT IMPLEMENT.
+
+public:
+  typedef ValueT value_type;
+  typedef ValueT &reference;
+  typedef const ValueT &const_reference;
+  typedef ValueT *pointer;
+  typedef const ValueT *const_pointer;
+
+  SparseSet() : Sparse(0), Universe(0) {}
+  ~SparseSet() { free(Sparse); }
+
+  /// setUniverse - Set the universe size which determines the largest key the
+  /// set can hold.  The universe must be sized before any elements can be
+  /// added.
+  ///
+  /// @param U Universe size. All object keys must be less than U.
+  ///
+  void setUniverse(unsigned U) {
+    // It's not hard to resize the universe on a non-empty set, but it doesn't
+    // seem like a likely use case, so we can add that code when we need it.
+    assert(empty() && "Can only resize universe on an empty map");
+    // Hysteresis prevents needless reallocations.
+    if (U >= Universe/4 && U <= Universe)
+      return;
+    free(Sparse);
+    // The Sparse array doesn't actually need to be initialized, so malloc
+    // would be enough here, but that will cause tools like valgrind to
+    // complain about branching on uninitialized data.
+    Sparse = reinterpret_cast<SparseT*>(calloc(U, sizeof(SparseT)));
+    Universe = U;
+  }
+
+  // Import trivial vector stuff from DenseT.
+  typedef typename DenseT::iterator iterator;
+  typedef typename DenseT::const_iterator const_iterator;
+
+  const_iterator begin() const { return Dense.begin(); }
+  const_iterator end() const { return Dense.end(); }
+  iterator begin() { return Dense.begin(); }
+  iterator end() { return Dense.end(); }
+
+  /// empty - Returns true if the set is empty.
+  ///
+  /// This is not the same as BitVector::empty().
+  ///
+  bool empty() const { return Dense.empty(); }
+
+  /// size - Returns the number of elements in the set.
+  ///
+  /// This is not the same as BitVector::size() which returns the size of the
+  /// universe.
+  ///
+  unsigned size() const { return Dense.size(); }
+
+  /// clear - Clears the set.  This is a very fast constant time operation.
+  ///
+  void clear() {
+    // Sparse does not need to be cleared, see find().
+    Dense.clear();
+  }
+
+  /// find - Find an element by its key.
+  ///
+  /// @param   Key A valid key to find.
+  /// @returns An iterator to the element identified by key, or end().
+  ///
+  iterator find(unsigned Key) {
+    assert(Key < Universe && "Key out of range");
+    assert(std::numeric_limits<SparseT>::is_integer &&
+           !std::numeric_limits<SparseT>::is_signed &&
+           "SparseT must be an unsigned integer type");
+    const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u;
+    for (unsigned i = Sparse[Key], e = size(); i < e; i += Stride) {
+      const unsigned FoundKey = KeyOf(Dense[i]);
+      assert(FoundKey < Universe && "Invalid key in set. Did object mutate?");
+      if (Key == FoundKey)
+        return begin() + i;
+      // Stride is 0 when SparseT >= unsigned.  We don't need to loop.
+      if (!Stride)
+        break;
+    }
+    return end();
+  }
+
+  const_iterator find(unsigned Key) const {
+    return const_cast<SparseSet*>(this)->find(Key);
+  }
+
+  /// count - Returns true if this set contains an element identified by Key.
+  ///
+  bool count(unsigned Key) const {
+    return find(Key) != end();
+  }
+
+  /// insert - Attempts to insert a new element.
+  ///
+  /// If Val is successfully inserted, return (I, true), where I is an iterator
+  /// pointing to the newly inserted element.
+  ///
+  /// If the set already contains an element with the same key as Val, return
+  /// (I, false), where I is an iterator pointing to the existing element.
+  ///
+  /// Insertion invalidates all iterators.
+  ///
+  std::pair<iterator, bool> insert(const ValueT &Val) {
+    unsigned Key = KeyOf(Val);
+    iterator I = find(Key);
+    if (I != end())
+      return std::make_pair(I, false);
+    Sparse[Key] = size();
+    Dense.push_back(Val);
+    return std::make_pair(end() - 1, true);
+  }
+
+  /// array subscript - If an element already exists with this key, return it.
+  /// Otherwise, automatically construct a new value from Key, insert it,
+  /// and return the newly inserted element.
+  ValueT &operator[](unsigned Key) {
+    return *insert(ValueT(Key)).first;
+  }
+
+  /// erase - Erases an existing element identified by a valid iterator.
+  ///
+  /// This invalidates all iterators, but erase() returns an iterator pointing
+  /// to the next element.  This makes it possible to erase selected elements
+  /// while iterating over the set:
+  ///
+  ///   for (SparseSet::iterator I = Set.begin(); I != Set.end();)
+  ///     if (test(*I))
+  ///       I = Set.erase(I);
+  ///     else
+  ///       ++I;
+  ///
+  /// Note that end() changes when elements are erased, unlike std::list.
+  ///
+  iterator erase(iterator I) {
+    assert(unsigned(I - begin()) < size() && "Invalid iterator");
+    if (I != end() - 1) {
+      *I = Dense.back();
+      unsigned BackKey = KeyOf(Dense.back());
+      assert(BackKey < Universe && "Invalid key in set. Did object mutate?");
+      Sparse[BackKey] = I - begin();
+    }
+    // This depends on SmallVector::pop_back() not invalidating iterators.
+    // std::vector::pop_back() doesn't give that guarantee.
+    Dense.pop_back();
+    return I;
+  }
+
+  /// erase - Erases an element identified by Key, if it exists.
+  ///
+  /// @param   Key The key identifying the element to erase.
+  /// @returns True when an element was erased, false if no element was found.
+  ///
+  bool erase(unsigned Key) {
+    iterator I = find(Key);
+    if (I == end())
+      return false;
+    erase(I);
+    return true;
+  }
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index b8a1a2f5c4e8..b54d10b9dd33 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -27,6 +27,7 @@
 #define LLVM_ADT_STATISTIC_H
 
 #include "llvm/Support/Atomic.h"
+#include "llvm/Support/Valgrind.h"
 
 namespace llvm {
 class raw_ostream;
@@ -110,6 +111,7 @@ protected:
     bool tmp = Initialized;
     sys::MemoryFence();
     if (!tmp) RegisterStatistic();
+    TsanHappensAfter(this);
     return *this;
   }
   void RegisterStatistic();
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index d01d3e1d6b10..655d884e7baa 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -15,12 +15,7 @@
 #define LLVM_ADT_STRINGEXTRAS_H
 
 #include "llvm/Support/DataTypes.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/StringRef.h"
-#include <cctype>
-#include <cstdio>
-#include <string>
 
 namespace llvm {
 template<typename T> class SmallVectorImpl;
@@ -101,38 +96,6 @@ static inline std::string itostr(int64_t X) {
     return utostr(static_cast<uint64_t>(X));
 }
 
-static inline std::string ftostr(double V) {
-  char Buffer[200];
-  sprintf(Buffer, "%20.6e", V);
-  char *B = Buffer;
-  while (*B == ' ') ++B;
-  return B;
-}
-
-static inline std::string ftostr(const APFloat& V) {
-  if (&V.getSemantics() == &APFloat::IEEEdouble)
-    return ftostr(V.convertToDouble());
-  else if (&V.getSemantics() == &APFloat::IEEEsingle)
-    return ftostr((double)V.convertToFloat());
-  return "<unknown format in ftostr>"; // error
-}
-
-static inline std::string LowercaseString(const std::string &S) {
-  std::string result(S);
-  for (unsigned i = 0; i < S.length(); ++i)
-    if (isupper(result[i]))
-      result[i] = char(tolower(result[i]));
-  return result;
-}
-
-static inline std::string UppercaseString(const std::string &S) {
-  std::string result(S);
-  for (unsigned i = 0; i < S.length(); ++i)
-    if (islower(result[i]))
-      result[i] = char(toupper(result[i]));
-  return result;
-}
-
 /// StrInStrNoCase - Portable version of strcasestr.  Locates the first
 /// occurrence of string 's1' in string 's2', ignoring case.  Returns
 /// the offset of s2 in s1 or npos if s2 cannot be found.
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index 35077879351b..097418efc817 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -51,20 +51,11 @@ public:
 /// StringMapImpl - This is the base class of StringMap that is shared among
 /// all of its instantiations.
 class StringMapImpl {
-public:
-  /// ItemBucket - The hash table consists of an array of these.  If Item is
-  /// non-null, this is an extant entry, otherwise, it is a hole.
-  struct ItemBucket {
-    /// FullHashValue - This remembers the full hash value of the key for
-    /// easy scanning.
-    unsigned FullHashValue;
-
-    /// Item - This is a pointer to the actual item object.
-    StringMapEntryBase *Item;
-  };
-
 protected:
-  ItemBucket *TheTable;
+  // Array of NumBuckets pointers to entries, null pointers are holes.
+  // TheTable[NumBuckets] contains a sentinel value for easy iteration. Follwed
+  // by an array of the actual hash values as unsigned integers.
+  StringMapEntryBase **TheTable;
   unsigned NumBuckets;
   unsigned NumItems;
   unsigned NumTombstones;
@@ -238,8 +229,9 @@ public:
 template<typename ValueTy, typename AllocatorTy = MallocAllocator>
 class StringMap : public StringMapImpl {
   AllocatorTy Allocator;
-  typedef StringMapEntry<ValueTy> MapEntryTy;
 public:
+  typedef StringMapEntry<ValueTy> MapEntryTy;
+  
   StringMap() : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {}
   explicit StringMap(unsigned InitialSize)
     : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))) {}
@@ -289,13 +281,13 @@ public:
   iterator find(StringRef Key) {
     int Bucket = FindKey(Key);
     if (Bucket == -1) return end();
-    return iterator(TheTable+Bucket);
+    return iterator(TheTable+Bucket, true);
   }
 
   const_iterator find(StringRef Key) const {
     int Bucket = FindKey(Key);
     if (Bucket == -1) return end();
-    return const_iterator(TheTable+Bucket);
+    return const_iterator(TheTable+Bucket, true);
   }
 
    /// lookup - Return the entry for the specified key, or a default
@@ -320,13 +312,13 @@ public:
   /// insert it and return true.
   bool insert(MapEntryTy *KeyValue) {
     unsigned BucketNo = LookupBucketFor(KeyValue->getKey());
-    ItemBucket &Bucket = TheTable[BucketNo];
-    if (Bucket.Item && Bucket.Item != getTombstoneVal())
+    StringMapEntryBase *&Bucket = TheTable[BucketNo];
+    if (Bucket && Bucket != getTombstoneVal())
       return false;  // Already exists in map.
 
-    if (Bucket.Item == getTombstoneVal())
+    if (Bucket == getTombstoneVal())
       --NumTombstones;
-    Bucket.Item = KeyValue;
+    Bucket = KeyValue;
     ++NumItems;
     assert(NumItems + NumTombstones <= NumBuckets);
 
@@ -340,10 +332,11 @@ public:
 
     // Zap all values, resetting the keys back to non-present (not tombstone),
     // which is safe because we're removing all elements.
-    for (ItemBucket *I = TheTable, *E = TheTable+NumBuckets; I != E; ++I) {
-      if (I->Item && I->Item != getTombstoneVal()) {
-        static_cast<MapEntryTy*>(I->Item)->Destroy(Allocator);
-        I->Item = 0;
+    for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
+      StringMapEntryBase *&Bucket = TheTable[I];
+      if (Bucket && Bucket != getTombstoneVal()) {
+        static_cast<MapEntryTy*>(Bucket)->Destroy(Allocator);
+        Bucket = 0;
       }
     }
 
@@ -357,21 +350,21 @@ public:
   template <typename InitTy>
   MapEntryTy &GetOrCreateValue(StringRef Key, InitTy Val) {
     unsigned BucketNo = LookupBucketFor(Key);
-    ItemBucket &Bucket = TheTable[BucketNo];
-    if (Bucket.Item && Bucket.Item != getTombstoneVal())
-      return *static_cast<MapEntryTy*>(Bucket.Item);
+    StringMapEntryBase *&Bucket = TheTable[BucketNo];
+    if (Bucket && Bucket != getTombstoneVal())
+      return *static_cast<MapEntryTy*>(Bucket);
 
     MapEntryTy *NewItem =
       MapEntryTy::Create(Key.begin(), Key.end(), Allocator, Val);
 
-    if (Bucket.Item == getTombstoneVal())
+    if (Bucket == getTombstoneVal())
       --NumTombstones;
     ++NumItems;
     assert(NumItems + NumTombstones <= NumBuckets);
 
     // Fill in the bucket for the hash table.  The FullHashValue was already
     // filled in by LookupBucketFor.
-    Bucket.Item = NewItem;
+    Bucket = NewItem;
 
     RehashTable();
     return *NewItem;
@@ -410,21 +403,21 @@ public:
 template<typename ValueTy>
 class StringMapConstIterator {
 protected:
-  StringMapImpl::ItemBucket *Ptr;
+  StringMapEntryBase **Ptr;
 public:
   typedef StringMapEntry<ValueTy> value_type;
 
-  explicit StringMapConstIterator(StringMapImpl::ItemBucket *Bucket,
+  explicit StringMapConstIterator(StringMapEntryBase **Bucket,
                                   bool NoAdvance = false)
   : Ptr(Bucket) {
     if (!NoAdvance) AdvancePastEmptyBuckets();
   }
 
   const value_type &operator*() const {
-    return *static_cast<StringMapEntry<ValueTy>*>(Ptr->Item);
+    return *static_cast<StringMapEntry<ValueTy>*>(*Ptr);
   }
   const value_type *operator->() const {
-    return static_cast<StringMapEntry<ValueTy>*>(Ptr->Item);
+    return static_cast<StringMapEntry<ValueTy>*>(*Ptr);
   }
 
   bool operator==(const StringMapConstIterator &RHS) const {
@@ -445,7 +438,7 @@ public:
 
 private:
   void AdvancePastEmptyBuckets() {
-    while (Ptr->Item == 0 || Ptr->Item == StringMapImpl::getTombstoneVal())
+    while (*Ptr == 0 || *Ptr == StringMapImpl::getTombstoneVal())
       ++Ptr;
   }
 };
@@ -453,15 +446,15 @@ private:
 template<typename ValueTy>
 class StringMapIterator : public StringMapConstIterator<ValueTy> {
 public:
-  explicit StringMapIterator(StringMapImpl::ItemBucket *Bucket,
+  explicit StringMapIterator(StringMapEntryBase **Bucket,
                              bool NoAdvance = false)
     : StringMapConstIterator<ValueTy>(Bucket, NoAdvance) {
   }
   StringMapEntry<ValueTy> &operator*() const {
-    return *static_cast<StringMapEntry<ValueTy>*>(this->Ptr->Item);
+    return *static_cast<StringMapEntry<ValueTy>*>(*this->Ptr);
   }
   StringMapEntry<ValueTy> *operator->() const {
-    return static_cast<StringMapEntry<ValueTy>*>(this->Ptr->Item);
+    return static_cast<StringMapEntry<ValueTy>*>(*this->Ptr);
   }
 };
 
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 8396921744ad..76ba66e746ce 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -10,15 +10,26 @@
 #ifndef LLVM_ADT_STRINGREF_H
 #define LLVM_ADT_STRINGREF_H
 
+#include "llvm/Support/type_traits.h"
+
 #include <cassert>
 #include <cstring>
-#include <utility>
+#include <limits>
 #include <string>
+#include <utility>
 
 namespace llvm {
   template<typename T>
   class SmallVectorImpl;
   class APInt;
+  class hash_code;
+  class StringRef;
+
+  /// Helper functions for StringRef::getAsInteger.
+  bool getAsUnsignedInteger(StringRef Str, unsigned Radix,
+                            unsigned long long &Result);
+
+  bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
 
   /// StringRef - Represent a constant reference to a string, i.e. a character
   /// array and a length, which need not be null terminated.
@@ -304,14 +315,29 @@ namespace llvm {
     ///
     /// If the string is invalid or if only a subset of the string is valid,
     /// this returns true to signify the error.  The string is considered
-    /// erroneous if empty.
+    /// erroneous if empty or if it overflows T.
     ///
-    bool getAsInteger(unsigned Radix, long long &Result) const;
-    bool getAsInteger(unsigned Radix, unsigned long long &Result) const;
-    bool getAsInteger(unsigned Radix, int &Result) const;
-    bool getAsInteger(unsigned Radix, unsigned &Result) const;
+    template <typename T>
+    typename enable_if_c<std::numeric_limits<T>::is_signed, bool>::type
+    getAsInteger(unsigned Radix, T &Result) const {
+      long long LLVal;
+      if (getAsSignedInteger(*this, Radix, LLVal) ||
+            static_cast<T>(LLVal) != LLVal)
+        return true;
+      Result = LLVal;
+      return false;
+    }
 
-    // TODO: Provide overloads for int/unsigned that check for overflow.
+    template <typename T>
+    typename enable_if_c<!std::numeric_limits<T>::is_signed, bool>::type
+    getAsInteger(unsigned Radix, T &Result) const {
+      unsigned long long ULLVal;
+      if (getAsUnsignedInteger(*this, Radix, ULLVal) ||
+            static_cast<T>(ULLVal) != ULLVal)
+        return true;
+      Result = ULLVal;
+      return false;
+    }
 
     /// getAsInteger - Parse the current string as an integer of the
     /// specified radix, or of an autosensed radix if the radix given
@@ -327,6 +353,16 @@ namespace llvm {
     bool getAsInteger(unsigned Radix, APInt &Result) const;
 
     /// @}
+    /// @name String Operations
+    /// @{
+
+    // lower - Convert the given ASCII string to lowercase.
+    std::string lower() const;
+
+    /// upper - Convert the given ASCII string to uppercase.
+    std::string upper() const;
+
+    /// @}
     /// @name Substring Operations
     /// @{
 
@@ -343,6 +379,20 @@ namespace llvm {
       Start = min(Start, Length);
       return StringRef(Data + Start, min(N, Length - Start));
     }
+    
+    /// drop_front - Return a StringRef equal to 'this' but with the first
+    /// elements dropped.
+    StringRef drop_front(unsigned N = 1) const {
+      assert(size() >= N && "Dropping more elements than exist");
+      return substr(N);
+    }
+
+    /// drop_back - Return a StringRef equal to 'this' but with the last
+    /// elements dropped.
+    StringRef drop_back(unsigned N = 1) const {
+      assert(size() >= N && "Dropping more elements than exist");
+      return substr(0, size()-N);
+    }
 
     /// slice - Return a reference to the substring from [Start, End).
     ///
@@ -466,6 +516,9 @@ namespace llvm {
 
   /// @}
 
+  /// \brief Compute a hash_code for a StringRef.
+  hash_code hash_value(StringRef S);
+
   // StringRefs can be treated like a POD type.
   template <typename T> struct isPodLike;
   template <> struct isPodLike<StringRef> { static const bool value = true; };
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index ee86d8bdf70f..5014517c9e05 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -37,6 +37,15 @@ public:
       delete V;
   }
   
+  // implicit conversion operator to ArrayRef.
+  operator ArrayRef<EltTy>() const {
+    if (Val.isNull())
+      return ArrayRef<EltTy>();
+    if (Val.template is<EltTy>())
+      return *Val.getAddrOfPtr1();
+    return *Val.template get<VecTy*>();
+  }
+  
   bool empty() const {
     // This vector can be empty if it contains no element, or if it
     // contains a pointer to an empty vector.
@@ -54,18 +63,20 @@ public:
     return Val.template get<VecTy*>()->size();
   }
   
-  typedef const EltTy *iterator;
-  iterator begin() const {
+  typedef const EltTy *const_iterator;
+  typedef EltTy *iterator;
+
+  iterator begin() {
     if (empty())
       return 0;
     
     if (Val.template is<EltTy>())
-      return Val.template getAddrOf<EltTy>();
+      return Val.getAddrOfPtr1();
     
     return Val.template get<VecTy *>()->begin();
 
   }
-  iterator end() const {
+  iterator end() {
     if (empty())
       return 0;
     
@@ -75,7 +86,14 @@ public:
     return Val.template get<VecTy *>()->end();
   }
 
-  
+  const_iterator begin() const {
+    return (const_iterator)const_cast<TinyPtrVector*>(this)->begin();
+  }
+
+  const_iterator end() const {
+    return (const_iterator)const_cast<TinyPtrVector*>(this)->end();
+  }
+
   EltTy operator[](unsigned i) const {
     assert(!Val.isNull() && "can't index into an empty vector");
     if (EltTy V = Val.template dyn_cast<EltTy>()) {
@@ -124,6 +142,20 @@ public:
     }
     // Otherwise, we're already empty.
   }
+
+  iterator erase(iterator I) {
+    // If we have a single value, convert to empty.
+    if (Val.template is<EltTy>()) {
+      if (I == begin())
+        Val = (EltTy)0;
+    } else if (VecTy *Vec = Val.template dyn_cast<VecTy*>()) {
+      // multiple items in a vector; just do the erase, there is no
+      // benefit to collapsing back to a pointer
+      return Vec->erase(I);
+    }
+
+    return 0;
+  }
   
 private:
   void operator=(const TinyPtrVector&); // NOT IMPLEMENTED YET.
diff --git a/include/llvm/ADT/Trie.h b/include/llvm/ADT/Trie.h
index 6b150c8fffa0..845af015b052 100644
--- a/include/llvm/ADT/Trie.h
+++ b/include/llvm/ADT/Trie.h
@@ -220,8 +220,7 @@ bool Trie<Payload>::addString(const std::string& s, const Payload& data) {
         assert(0 && "FIXME!");
         return false;
       case Node::DontMatch:
-        assert(0 && "Impossible!");
-        return false;
+        llvm_unreachable("Impossible!");
       case Node::LabelIsPrefix:
         s1 = s1.substr(nNode->label().length());
         cNode = nNode;
@@ -258,8 +257,7 @@ const Payload& Trie<Payload>::lookup(const std::string& s) const {
       case Node::StringIsPrefix:
         return Empty;
       case Node::DontMatch:
-        assert(0 && "Impossible!");
-        return Empty;
+        llvm_unreachable("Impossible!");
       case Node::LabelIsPrefix:
         s1 = s1.substr(nNode->label().length());
         cNode = nNode;
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 3503c0f22145..f5f99d0f1b82 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -43,20 +43,19 @@ public:
   enum ArchType {
     UnknownArch,
 
-    alpha,   // Alpha: alpha
     arm,     // ARM; arm, armv.*, xscale
-    bfin,    // Blackfin: bfin
     cellspu, // CellSPU: spu, cellspu
+    hexagon, // Hexagon: hexagon
     mips,    // MIPS: mips, mipsallegrex
-    mipsel,  // MIPSEL: mipsel, mipsallegrexel, psp
+    mipsel,  // MIPSEL: mipsel, mipsallegrexel
     mips64,  // MIPS64: mips64
     mips64el,// MIPS64EL: mips64el
     msp430,  // MSP430: msp430
     ppc,     // PPC: powerpc
     ppc64,   // PPC64: powerpc64, ppu
+    r600,    // R600: AMD GPUs HD2XXX - HD6XXX
     sparc,   // Sparc: sparc
     sparcv9, // Sparcv9: Sparcv9
-    systemz, // SystemZ: s390x
     tce,     // TCE (http://tce.cs.tut.fi/): tce
     thumb,   // Thumb: thumb, thumbv.*
     x86,     // X86: i[3-9]86
@@ -66,16 +65,16 @@ public:
     ptx32,   // PTX: ptx (32-bit)
     ptx64,   // PTX: ptx (64-bit)
     le32,    // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
-    amdil,   // amdil: amd IL
-
-    InvalidArch
+    amdil   // amdil: amd IL
   };
   enum VendorType {
     UnknownVendor,
 
     Apple,
     PC,
-    SCEI
+    SCEI,
+    BGP,
+    BGQ
   };
   enum OSType {
     UnknownOS,
@@ -93,61 +92,52 @@ public:
     MinGW32,    // i*86-pc-mingw32, *-w64-mingw32
     NetBSD,
     OpenBSD,
-    Psp,
     Solaris,
     Win32,
     Haiku,
     Minix,
     RTEMS,
-    NativeClient
+    NativeClient,
+    CNK         // BG/P Compute-Node Kernel
   };
   enum EnvironmentType {
     UnknownEnvironment,
 
     GNU,
     GNUEABI,
+    GNUEABIHF,
     EABI,
-    MachO
+    MachO,
+    ANDROIDEABI
   };
 
 private:
   std::string Data;
 
-  /// The parsed arch type (or InvalidArch if uninitialized).
-  mutable ArchType Arch;
+  /// The parsed arch type.
+  ArchType Arch;
 
   /// The parsed vendor type.
-  mutable VendorType Vendor;
+  VendorType Vendor;
 
   /// The parsed OS type.
-  mutable OSType OS;
+  OSType OS;
 
   /// The parsed Environment type.
-  mutable EnvironmentType Environment;
-
-  bool isInitialized() const { return Arch != InvalidArch; }
-  static ArchType ParseArch(StringRef ArchName);
-  static VendorType ParseVendor(StringRef VendorName);
-  static OSType ParseOS(StringRef OSName);
-  static EnvironmentType ParseEnvironment(StringRef EnvironmentName);
-  void Parse() const;
+  EnvironmentType Environment;
 
 public:
   /// @name Constructors
   /// @{
 
-  Triple() : Data(), Arch(InvalidArch) {}
-  explicit Triple(const Twine &Str) : Data(Str.str()), Arch(InvalidArch) {}
-  Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr)
-    : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr).str()),
-      Arch(InvalidArch) {
-  }
+  /// \brief Default constructor is the same as an empty string and leaves all
+  /// triple fields unknown.
+  Triple() : Data(), Arch(), Vendor(), OS(), Environment() {}
 
+  explicit Triple(const Twine &Str);
+  Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr);
   Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr,
-         const Twine &EnvironmentStr)
-    : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr + Twine('-') +
-            EnvironmentStr).str()), Arch(InvalidArch) {
-  }
+         const Twine &EnvironmentStr);
 
   /// @}
   /// @name Normalization
@@ -164,22 +154,13 @@ public:
   /// @{
 
   /// getArch - Get the parsed architecture type of this triple.
-  ArchType getArch() const {
-    if (!isInitialized()) Parse();
-    return Arch;
-  }
+  ArchType getArch() const { return Arch; }
 
   /// getVendor - Get the parsed vendor type of this triple.
-  VendorType getVendor() const {
-    if (!isInitialized()) Parse();
-    return Vendor;
-  }
+  VendorType getVendor() const { return Vendor; }
 
   /// getOS - Get the parsed operating system type of this triple.
-  OSType getOS() const {
-    if (!isInitialized()) Parse();
-    return OS;
-  }
+  OSType getOS() const { return OS; }
 
   /// hasEnvironment - Does this triple have the optional environment
   /// (fourth) component?
@@ -188,11 +169,31 @@ public:
   }
 
   /// getEnvironment - Get the parsed environment type of this triple.
-  EnvironmentType getEnvironment() const {
-    if (!isInitialized()) Parse();
-    return Environment;
+  EnvironmentType getEnvironment() const { return Environment; }
+
+  /// getOSVersion - Parse the version number from the OS name component of the
+  /// triple, if present.
+  ///
+  /// For example, "fooos1.2.3" would return (1, 2, 3).
+  ///
+  /// If an entry is not defined, it will be returned as 0.
+  void getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const;
+
+  /// getOSMajorVersion - Return just the major version number, this is
+  /// specialized because it is a common query.
+  unsigned getOSMajorVersion() const {
+    unsigned Maj, Min, Micro;
+    getOSVersion(Maj, Min, Micro);
+    return Maj;
   }
 
+  /// getMacOSXVersion - Parse the version number as with getOSVersion and then
+  /// translate generic "darwin" versions to the corresponding OS X versions.
+  /// This may also be called with IOS triples but the OS X version number is
+  /// just set to a constant 10.4.0 in that case.  Returns true if successful.
+  bool getMacOSXVersion(unsigned &Major, unsigned &Minor,
+                        unsigned &Micro) const;
+
   /// @}
   /// @name Direct Component Access
   /// @{
@@ -221,21 +222,28 @@ public:
   /// if the environment component is present).
   StringRef getOSAndEnvironmentName() const;
 
-  /// getOSVersion - Parse the version number from the OS name component of the
-  /// triple, if present.
+  /// @}
+  /// @name Convenience Predicates
+  /// @{
+
+  /// \brief Test whether the architecture is 64-bit
   ///
-  /// For example, "fooos1.2.3" would return (1, 2, 3).
+  /// Note that this tests for 64-bit pointer width, and nothing else. Note
+  /// that we intentionally expose only three predicates, 64-bit, 32-bit, and
+  /// 16-bit. The inner details of pointer width for particular architectures
+  /// is not summed up in the triple, and so only a coarse grained predicate
+  /// system is provided.
+  bool isArch64Bit() const;
+
+  /// \brief Test whether the architecture is 32-bit
   ///
-  /// If an entry is not defined, it will be returned as 0.
-  void getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const;
+  /// Note that this tests for 32-bit pointer width, and nothing else.
+  bool isArch32Bit() const;
 
-  /// getOSMajorVersion - Return just the major version number, this is
-  /// specialized because it is a common query.
-  unsigned getOSMajorVersion() const {
-    unsigned Maj, Min, Micro;
-    getOSVersion(Maj, Min, Micro);
-    return Maj;
-  }
+  /// \brief Test whether the architecture is 16-bit
+  ///
+  /// Note that this tests for 16-bit pointer width, and nothing else.
+  bool isArch16Bit() const;
 
   /// isOSVersionLT - Helper function for doing comparisons against version
   /// numbers included in the target triple.
@@ -254,6 +262,22 @@ public:
     return false;
   }
 
+  /// isMacOSXVersionLT - Comparison function for checking OS X version
+  /// compatibility, which handles supporting skewed version numbering schemes
+  /// used by the "darwin" triples.
+  unsigned isMacOSXVersionLT(unsigned Major, unsigned Minor = 0,
+			     unsigned Micro = 0) const {
+    assert(isMacOSX() && "Not an OS X triple!");
+
+    // If this is OS X, expect a sane version number.
+    if (getOS() == Triple::MacOSX)
+      return isOSVersionLT(Major, Minor, Micro);
+
+    // Otherwise, compare to the "Darwin" number.
+    assert(Major == 10 && "Unexpected major version");
+    return isOSVersionLT(Minor + 4, Micro, 0);
+  }
+
   /// isMacOSX - Is this a Mac OS X triple. For legacy reasons, we support both
   /// "darwin" and "osx" as OS X triples.
   bool isMacOSX() const {
@@ -265,26 +289,30 @@ public:
     return isMacOSX() || getOS() == Triple::IOS;
   }
 
+  /// \brief Tests for either Cygwin or MinGW OS
+  bool isOSCygMing() const {
+    return getOS() == Triple::Cygwin || getOS() == Triple::MinGW32;
+  }
+
   /// isOSWindows - Is this a "Windows" OS.
   bool isOSWindows() const {
-    return getOS() == Triple::Win32 || getOS() == Triple::Cygwin ||
-      getOS() == Triple::MinGW32;
+    return getOS() == Triple::Win32 || isOSCygMing();
   }
 
-  /// isMacOSXVersionLT - Comparison function for checking OS X version
-  /// compatibility, which handles supporting skewed version numbering schemes
-  /// used by the "darwin" triples.
-  unsigned isMacOSXVersionLT(unsigned Major, unsigned Minor = 0,
-			     unsigned Micro = 0) const {
-    assert(isMacOSX() && "Not an OS X triple!");
+  /// \brief Tests whether the OS uses the ELF binary format.
+  bool isOSBinFormatELF() const {
+    return !isOSDarwin() && !isOSWindows();
+  }
 
-    // If this is OS X, expect a sane version number.
-    if (getOS() == Triple::MacOSX)
-      return isOSVersionLT(Major, Minor, Micro);
+  /// \brief Tests whether the OS uses the COFF binary format.
+  bool isOSBinFormatCOFF() const {
+    return isOSWindows();
+  }
 
-    // Otherwise, compare to the "Darwin" number.
-    assert(Major == 10 && "Unexpected major version");
-    return isOSVersionLT(Minor + 4, Micro, 0);
+  /// \brief Tests whether the environment is MachO.
+  // FIXME: Should this be an OSBinFormat predicate?
+  bool isEnvironmentMachO() const {
+    return getEnvironment() == Triple::MachO || isOSDarwin();
   }
 
   /// @}
@@ -335,6 +363,26 @@ public:
   const char *getArchNameForAssembler();
 
   /// @}
+  /// @name Helpers to build variants of a particular triple.
+  /// @{
+
+  /// \brief Form a triple with a 32-bit variant of the current architecture.
+  ///
+  /// This can be used to move across "families" of architectures where useful.
+  ///
+  /// \returns A new triple with a 32-bit architecture or an unknown
+  ///          architecture if no such variant can be found.
+  llvm::Triple get32BitArchVariant() const;
+
+  /// \brief Form a triple with a 64-bit variant of the current architecture.
+  ///
+  /// This can be used to move across "families" of architectures where useful.
+  ///
+  /// \returns A new triple with a 64-bit architecture or an unknown
+  ///          architecture if no such variant can be found.
+  llvm::Triple get64BitArchVariant() const;
+
+  /// @}
   /// @name Static helpers for IDs.
   /// @{
 
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index 3a60cab77935..9101df8cee37 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -12,6 +12,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <string>
 
@@ -425,7 +426,7 @@ namespace llvm {
     StringRef getSingleStringRef() const {
       assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
       switch (getLHSKind()) {
-      default: assert(0 && "Out of sync with isSingleStringRef");
+      default: llvm_unreachable("Out of sync with isSingleStringRef");
       case EmptyKind:      return StringRef();
       case CStringKind:    return StringRef(LHS.cString);
       case StdStringKind:  return StringRef(*LHS.stdString);
diff --git a/include/llvm/ADT/ValueMap.h b/include/llvm/ADT/ValueMap.h
index d1f4e5a0dacd..707d07d32cbc 100644
--- a/include/llvm/ADT/ValueMap.h
+++ b/include/llvm/ADT/ValueMap.h
@@ -35,7 +35,7 @@
 
 namespace llvm {
 
-template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
+template<typename KeyT, typename ValueT, typename Config>
 class ValueMapCallbackVH;
 
 template<typename DenseMapT, typename KeyT>
@@ -72,13 +72,11 @@ struct ValueMapConfig {
 };
 
 /// See the file comment.
-template<typename KeyT, typename ValueT, typename Config = ValueMapConfig<KeyT>,
-         typename ValueInfoT = DenseMapInfo<ValueT> >
+template<typename KeyT, typename ValueT, typename Config =ValueMapConfig<KeyT> >
 class ValueMap {
-  friend class ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT>;
-  typedef ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> ValueMapCVH;
-  typedef DenseMap<ValueMapCVH, ValueT, DenseMapInfo<ValueMapCVH>,
-                   ValueInfoT> MapT;
+  friend class ValueMapCallbackVH<KeyT, ValueT, Config>;
+  typedef ValueMapCallbackVH<KeyT, ValueT, Config> ValueMapCVH;
+  typedef DenseMap<ValueMapCVH, ValueT, DenseMapInfo<ValueMapCVH> > MapT;
   typedef typename Config::ExtraData ExtraData;
   MapT Map;
   ExtraData Data;
@@ -190,11 +188,11 @@ private:
 
 // This CallbackVH updates its ValueMap when the contained Value changes,
 // according to the user's preferences expressed through the Config object.
-template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
+template<typename KeyT, typename ValueT, typename Config>
 class ValueMapCallbackVH : public CallbackVH {
-  friend class ValueMap<KeyT, ValueT, Config, ValueInfoT>;
+  friend class ValueMap<KeyT, ValueT, Config>;
   friend struct DenseMapInfo<ValueMapCallbackVH>;
-  typedef ValueMap<KeyT, ValueT, Config, ValueInfoT> ValueMapT;
+  typedef ValueMap<KeyT, ValueT, Config> ValueMapT;
   typedef typename llvm::remove_pointer<KeyT>::type KeySansPointerT;
 
   ValueMapT *Map;
@@ -244,9 +242,9 @@ public:
   }
 };
 
-template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
-struct DenseMapInfo<ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> > {
-  typedef ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> VH;
+template<typename KeyT, typename ValueT, typename Config>
+struct DenseMapInfo<ValueMapCallbackVH<KeyT, ValueT, Config> > {
+  typedef ValueMapCallbackVH<KeyT, ValueT, Config> VH;
   typedef DenseMapInfo<KeyT> PointerInfo;
 
   static inline VH getEmptyKey() {
diff --git a/include/llvm/ADT/VariadicFunction.h b/include/llvm/ADT/VariadicFunction.h
new file mode 100644
index 000000000000..a9a0dc6b6e20
--- /dev/null
+++ b/include/llvm/ADT/VariadicFunction.h
@@ -0,0 +1,331 @@
+//===--- VariadicFunctions.h - Variadic Functions ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements compile-time type-safe variadic functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_VARIADIC_FUNCTION_H
+#define LLVM_ADT_VARIADIC_FUNCTION_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+
+// Define macros to aid in expanding a comma separated series with the index of
+// the series pasted onto the last token.
+#define LLVM_COMMA_JOIN1(x) x ## 0
+#define LLVM_COMMA_JOIN2(x) LLVM_COMMA_JOIN1(x), x ## 1
+#define LLVM_COMMA_JOIN3(x) LLVM_COMMA_JOIN2(x), x ## 2
+#define LLVM_COMMA_JOIN4(x) LLVM_COMMA_JOIN3(x), x ## 3
+#define LLVM_COMMA_JOIN5(x) LLVM_COMMA_JOIN4(x), x ## 4
+#define LLVM_COMMA_JOIN6(x) LLVM_COMMA_JOIN5(x), x ## 5
+#define LLVM_COMMA_JOIN7(x) LLVM_COMMA_JOIN6(x), x ## 6
+#define LLVM_COMMA_JOIN8(x) LLVM_COMMA_JOIN7(x), x ## 7
+#define LLVM_COMMA_JOIN9(x) LLVM_COMMA_JOIN8(x), x ## 8
+#define LLVM_COMMA_JOIN10(x) LLVM_COMMA_JOIN9(x), x ## 9
+#define LLVM_COMMA_JOIN11(x) LLVM_COMMA_JOIN10(x), x ## 10
+#define LLVM_COMMA_JOIN12(x) LLVM_COMMA_JOIN11(x), x ## 11
+#define LLVM_COMMA_JOIN13(x) LLVM_COMMA_JOIN12(x), x ## 12
+#define LLVM_COMMA_JOIN14(x) LLVM_COMMA_JOIN13(x), x ## 13
+#define LLVM_COMMA_JOIN15(x) LLVM_COMMA_JOIN14(x), x ## 14
+#define LLVM_COMMA_JOIN16(x) LLVM_COMMA_JOIN15(x), x ## 15
+#define LLVM_COMMA_JOIN17(x) LLVM_COMMA_JOIN16(x), x ## 16
+#define LLVM_COMMA_JOIN18(x) LLVM_COMMA_JOIN17(x), x ## 17
+#define LLVM_COMMA_JOIN19(x) LLVM_COMMA_JOIN18(x), x ## 18
+#define LLVM_COMMA_JOIN20(x) LLVM_COMMA_JOIN19(x), x ## 19
+#define LLVM_COMMA_JOIN21(x) LLVM_COMMA_JOIN20(x), x ## 20
+#define LLVM_COMMA_JOIN22(x) LLVM_COMMA_JOIN21(x), x ## 21
+#define LLVM_COMMA_JOIN23(x) LLVM_COMMA_JOIN22(x), x ## 22
+#define LLVM_COMMA_JOIN24(x) LLVM_COMMA_JOIN23(x), x ## 23
+#define LLVM_COMMA_JOIN25(x) LLVM_COMMA_JOIN24(x), x ## 24
+#define LLVM_COMMA_JOIN26(x) LLVM_COMMA_JOIN25(x), x ## 25
+#define LLVM_COMMA_JOIN27(x) LLVM_COMMA_JOIN26(x), x ## 26
+#define LLVM_COMMA_JOIN28(x) LLVM_COMMA_JOIN27(x), x ## 27
+#define LLVM_COMMA_JOIN29(x) LLVM_COMMA_JOIN28(x), x ## 28
+#define LLVM_COMMA_JOIN30(x) LLVM_COMMA_JOIN29(x), x ## 29
+#define LLVM_COMMA_JOIN31(x) LLVM_COMMA_JOIN30(x), x ## 30
+#define LLVM_COMMA_JOIN32(x) LLVM_COMMA_JOIN31(x), x ## 31
+
+/// \brief Class which can simulate a type-safe variadic function.
+///
+/// The VariadicFunction class template makes it easy to define
+/// type-safe variadic functions where all arguments have the same
+/// type.
+///
+/// Suppose we need a variadic function like this:
+///
+///   ResultT Foo(const ArgT &A_0, const ArgT &A_1, ..., const ArgT &A_N);
+///
+/// Instead of many overloads of Foo(), we only need to define a helper
+/// function that takes an array of arguments:
+///
+///   ResultT FooImpl(ArrayRef<const ArgT *> Args) {
+///     // 'Args[i]' is a pointer to the i-th argument passed to Foo().
+///     ...
+///   }
+///
+/// and then define Foo() like this:
+///
+///   const VariadicFunction<ResultT, ArgT, FooImpl> Foo;
+///
+/// VariadicFunction takes care of defining the overloads of Foo().
+///
+/// Actually, Foo is a function object (i.e. functor) instead of a plain
+/// function.  This object is stateless and its constructor/destructor
+/// does nothing, so it's safe to create global objects and call Foo(...) at
+/// any time.
+///
+/// Sometimes we need a variadic function to have some fixed leading
+/// arguments whose types may be different from that of the optional
+/// arguments.  For example:
+///
+///   bool FullMatch(const StringRef &S, const RE &Regex,
+///                  const ArgT &A_0, ..., const ArgT &A_N);
+///
+/// VariadicFunctionN is for such cases, where N is the number of fixed
+/// arguments.  It is like VariadicFunction, except that it takes N more
+/// template arguments for the types of the fixed arguments:
+///
+///   bool FullMatchImpl(const StringRef &S, const RE &Regex,
+///                      ArrayRef<const ArgT *> Args) { ... }
+///   const VariadicFunction2<bool, const StringRef&,
+///                           const RE&, ArgT, FullMatchImpl>
+///       FullMatch;
+///
+/// Currently VariadicFunction and friends support up-to 3
+/// fixed leading arguments and up-to 32 optional arguments.
+template <typename ResultT, typename ArgT,
+          ResultT (*Func)(ArrayRef<const ArgT *>)>
+struct VariadicFunction {
+  ResultT operator()() const {
+    return Func(ArrayRef<const ArgT *>());
+  }
+
+#define LLVM_DEFINE_OVERLOAD(N) \
+  ResultT operator()(LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
+    const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
+    return Func(makeArrayRef(Args)); \
+  }
+  LLVM_DEFINE_OVERLOAD(1)
+  LLVM_DEFINE_OVERLOAD(2)
+  LLVM_DEFINE_OVERLOAD(3)
+  LLVM_DEFINE_OVERLOAD(4)
+  LLVM_DEFINE_OVERLOAD(5)
+  LLVM_DEFINE_OVERLOAD(6)
+  LLVM_DEFINE_OVERLOAD(7)
+  LLVM_DEFINE_OVERLOAD(8)
+  LLVM_DEFINE_OVERLOAD(9)
+  LLVM_DEFINE_OVERLOAD(10)
+  LLVM_DEFINE_OVERLOAD(11)
+  LLVM_DEFINE_OVERLOAD(12)
+  LLVM_DEFINE_OVERLOAD(13)
+  LLVM_DEFINE_OVERLOAD(14)
+  LLVM_DEFINE_OVERLOAD(15)
+  LLVM_DEFINE_OVERLOAD(16)
+  LLVM_DEFINE_OVERLOAD(17)
+  LLVM_DEFINE_OVERLOAD(18)
+  LLVM_DEFINE_OVERLOAD(19)
+  LLVM_DEFINE_OVERLOAD(20)
+  LLVM_DEFINE_OVERLOAD(21)
+  LLVM_DEFINE_OVERLOAD(22)
+  LLVM_DEFINE_OVERLOAD(23)
+  LLVM_DEFINE_OVERLOAD(24)
+  LLVM_DEFINE_OVERLOAD(25)
+  LLVM_DEFINE_OVERLOAD(26)
+  LLVM_DEFINE_OVERLOAD(27)
+  LLVM_DEFINE_OVERLOAD(28)
+  LLVM_DEFINE_OVERLOAD(29)
+  LLVM_DEFINE_OVERLOAD(30)
+  LLVM_DEFINE_OVERLOAD(31)
+  LLVM_DEFINE_OVERLOAD(32)
+#undef LLVM_DEFINE_OVERLOAD
+};
+
+template <typename ResultT, typename Param0T, typename ArgT,
+          ResultT (*Func)(Param0T, ArrayRef<const ArgT *>)>
+struct VariadicFunction1 {
+  ResultT operator()(Param0T P0) const {
+    return Func(P0, ArrayRef<const ArgT *>());
+  }
+
+#define LLVM_DEFINE_OVERLOAD(N) \
+  ResultT operator()(Param0T P0, LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
+    const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
+    return Func(P0, makeArrayRef(Args)); \
+  }
+  LLVM_DEFINE_OVERLOAD(1)
+  LLVM_DEFINE_OVERLOAD(2)
+  LLVM_DEFINE_OVERLOAD(3)
+  LLVM_DEFINE_OVERLOAD(4)
+  LLVM_DEFINE_OVERLOAD(5)
+  LLVM_DEFINE_OVERLOAD(6)
+  LLVM_DEFINE_OVERLOAD(7)
+  LLVM_DEFINE_OVERLOAD(8)
+  LLVM_DEFINE_OVERLOAD(9)
+  LLVM_DEFINE_OVERLOAD(10)
+  LLVM_DEFINE_OVERLOAD(11)
+  LLVM_DEFINE_OVERLOAD(12)
+  LLVM_DEFINE_OVERLOAD(13)
+  LLVM_DEFINE_OVERLOAD(14)
+  LLVM_DEFINE_OVERLOAD(15)
+  LLVM_DEFINE_OVERLOAD(16)
+  LLVM_DEFINE_OVERLOAD(17)
+  LLVM_DEFINE_OVERLOAD(18)
+  LLVM_DEFINE_OVERLOAD(19)
+  LLVM_DEFINE_OVERLOAD(20)
+  LLVM_DEFINE_OVERLOAD(21)
+  LLVM_DEFINE_OVERLOAD(22)
+  LLVM_DEFINE_OVERLOAD(23)
+  LLVM_DEFINE_OVERLOAD(24)
+  LLVM_DEFINE_OVERLOAD(25)
+  LLVM_DEFINE_OVERLOAD(26)
+  LLVM_DEFINE_OVERLOAD(27)
+  LLVM_DEFINE_OVERLOAD(28)
+  LLVM_DEFINE_OVERLOAD(29)
+  LLVM_DEFINE_OVERLOAD(30)
+  LLVM_DEFINE_OVERLOAD(31)
+  LLVM_DEFINE_OVERLOAD(32)
+#undef LLVM_DEFINE_OVERLOAD
+};
+
+template <typename ResultT, typename Param0T, typename Param1T, typename ArgT,
+          ResultT (*Func)(Param0T, Param1T, ArrayRef<const ArgT *>)>
+struct VariadicFunction2 {
+  ResultT operator()(Param0T P0, Param1T P1) const {
+    return Func(P0, P1, ArrayRef<const ArgT *>());
+  }
+
+#define LLVM_DEFINE_OVERLOAD(N) \
+  ResultT operator()(Param0T P0, Param1T P1, \
+                     LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
+    const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
+    return Func(P0, P1, makeAraryRef(Args)); \
+  }
+  LLVM_DEFINE_OVERLOAD(1)
+  LLVM_DEFINE_OVERLOAD(2)
+  LLVM_DEFINE_OVERLOAD(3)
+  LLVM_DEFINE_OVERLOAD(4)
+  LLVM_DEFINE_OVERLOAD(5)
+  LLVM_DEFINE_OVERLOAD(6)
+  LLVM_DEFINE_OVERLOAD(7)
+  LLVM_DEFINE_OVERLOAD(8)
+  LLVM_DEFINE_OVERLOAD(9)
+  LLVM_DEFINE_OVERLOAD(10)
+  LLVM_DEFINE_OVERLOAD(11)
+  LLVM_DEFINE_OVERLOAD(12)
+  LLVM_DEFINE_OVERLOAD(13)
+  LLVM_DEFINE_OVERLOAD(14)
+  LLVM_DEFINE_OVERLOAD(15)
+  LLVM_DEFINE_OVERLOAD(16)
+  LLVM_DEFINE_OVERLOAD(17)
+  LLVM_DEFINE_OVERLOAD(18)
+  LLVM_DEFINE_OVERLOAD(19)
+  LLVM_DEFINE_OVERLOAD(20)
+  LLVM_DEFINE_OVERLOAD(21)
+  LLVM_DEFINE_OVERLOAD(22)
+  LLVM_DEFINE_OVERLOAD(23)
+  LLVM_DEFINE_OVERLOAD(24)
+  LLVM_DEFINE_OVERLOAD(25)
+  LLVM_DEFINE_OVERLOAD(26)
+  LLVM_DEFINE_OVERLOAD(27)
+  LLVM_DEFINE_OVERLOAD(28)
+  LLVM_DEFINE_OVERLOAD(29)
+  LLVM_DEFINE_OVERLOAD(30)
+  LLVM_DEFINE_OVERLOAD(31)
+  LLVM_DEFINE_OVERLOAD(32)
+#undef LLVM_DEFINE_OVERLOAD
+};
+
+template <typename ResultT, typename Param0T, typename Param1T,
+          typename Param2T, typename ArgT,
+          ResultT (*Func)(Param0T, Param1T, Param2T, ArrayRef<const ArgT *>)>
+struct VariadicFunction3 {
+  ResultT operator()(Param0T P0, Param1T P1, Param2T P2) const {
+    return Func(P0, P1, P2, ArrayRef<const ArgT *>());
+  }
+
+#define LLVM_DEFINE_OVERLOAD(N) \
+  ResultT operator()(Param0T P0, Param1T P1, Param2T P2, \
+                     LLVM_COMMA_JOIN ## N(const ArgT &A)) const { \
+    const ArgT *const Args[] = { LLVM_COMMA_JOIN ## N(&A) }; \
+    return Func(P0, P1, P2, makeArrayRef(Args)); \
+  }
+  LLVM_DEFINE_OVERLOAD(1)
+  LLVM_DEFINE_OVERLOAD(2)
+  LLVM_DEFINE_OVERLOAD(3)
+  LLVM_DEFINE_OVERLOAD(4)
+  LLVM_DEFINE_OVERLOAD(5)
+  LLVM_DEFINE_OVERLOAD(6)
+  LLVM_DEFINE_OVERLOAD(7)
+  LLVM_DEFINE_OVERLOAD(8)
+  LLVM_DEFINE_OVERLOAD(9)
+  LLVM_DEFINE_OVERLOAD(10)
+  LLVM_DEFINE_OVERLOAD(11)
+  LLVM_DEFINE_OVERLOAD(12)
+  LLVM_DEFINE_OVERLOAD(13)
+  LLVM_DEFINE_OVERLOAD(14)
+  LLVM_DEFINE_OVERLOAD(15)
+  LLVM_DEFINE_OVERLOAD(16)
+  LLVM_DEFINE_OVERLOAD(17)
+  LLVM_DEFINE_OVERLOAD(18)
+  LLVM_DEFINE_OVERLOAD(19)
+  LLVM_DEFINE_OVERLOAD(20)
+  LLVM_DEFINE_OVERLOAD(21)
+  LLVM_DEFINE_OVERLOAD(22)
+  LLVM_DEFINE_OVERLOAD(23)
+  LLVM_DEFINE_OVERLOAD(24)
+  LLVM_DEFINE_OVERLOAD(25)
+  LLVM_DEFINE_OVERLOAD(26)
+  LLVM_DEFINE_OVERLOAD(27)
+  LLVM_DEFINE_OVERLOAD(28)
+  LLVM_DEFINE_OVERLOAD(29)
+  LLVM_DEFINE_OVERLOAD(30)
+  LLVM_DEFINE_OVERLOAD(31)
+  LLVM_DEFINE_OVERLOAD(32)
+#undef LLVM_DEFINE_OVERLOAD
+};
+
+// Cleanup the macro namespace.
+#undef LLVM_COMMA_JOIN1
+#undef LLVM_COMMA_JOIN2
+#undef LLVM_COMMA_JOIN3
+#undef LLVM_COMMA_JOIN4
+#undef LLVM_COMMA_JOIN5
+#undef LLVM_COMMA_JOIN6
+#undef LLVM_COMMA_JOIN7
+#undef LLVM_COMMA_JOIN8
+#undef LLVM_COMMA_JOIN9
+#undef LLVM_COMMA_JOIN10
+#undef LLVM_COMMA_JOIN11
+#undef LLVM_COMMA_JOIN12
+#undef LLVM_COMMA_JOIN13
+#undef LLVM_COMMA_JOIN14
+#undef LLVM_COMMA_JOIN15
+#undef LLVM_COMMA_JOIN16
+#undef LLVM_COMMA_JOIN17
+#undef LLVM_COMMA_JOIN18
+#undef LLVM_COMMA_JOIN19
+#undef LLVM_COMMA_JOIN20
+#undef LLVM_COMMA_JOIN21
+#undef LLVM_COMMA_JOIN22
+#undef LLVM_COMMA_JOIN23
+#undef LLVM_COMMA_JOIN24
+#undef LLVM_COMMA_JOIN25
+#undef LLVM_COMMA_JOIN26
+#undef LLVM_COMMA_JOIN27
+#undef LLVM_COMMA_JOIN28
+#undef LLVM_COMMA_JOIN29
+#undef LLVM_COMMA_JOIN30
+#undef LLVM_COMMA_JOIN31
+#undef LLVM_COMMA_JOIN32
+
+} // end namespace llvm
+
+#endif  // LLVM_ADT_VARIADIC_FUNCTION_H
diff --git a/include/llvm/ADT/VectorExtras.h b/include/llvm/ADT/VectorExtras.h
deleted file mode 100644
index e05f585996f9..000000000000
--- a/include/llvm/ADT/VectorExtras.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//===-- llvm/ADT/VectorExtras.h - Helpers for std::vector -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains helper functions which are useful for working with the
-// std::vector class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADT_VECTOREXTRAS_H
-#define LLVM_ADT_VECTOREXTRAS_H
-
-#include <cstdarg>
-#include <vector>
-
-namespace llvm {
-
-/// make_vector - Helper function which is useful for building temporary vectors
-/// to pass into type construction of CallInst ctors.  This turns a null
-/// terminated list of pointers (or other value types) into a real live vector.
-///
-template<typename T>
-inline std::vector<T> make_vector(T A, ...) {
-  va_list Args;
-  va_start(Args, A);
-  std::vector<T> Result;
-  Result.push_back(A);
-  while (T Val = va_arg(Args, T))
-    Result.push_back(Val);
-  va_end(Args);
-  return Result;
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/ADT/edit_distance.h b/include/llvm/ADT/edit_distance.h
new file mode 100644
index 000000000000..f77ef13fef2b
--- /dev/null
+++ b/include/llvm/ADT/edit_distance.h
@@ -0,0 +1,102 @@
+//===-- llvm/ADT/edit_distance.h - Array edit distance function --- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a Levenshtein distance function that works for any two
+// sequences, with each element of each sequence being analogous to a character
+// in a string.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_EDIT_DISTANCE_H
+#define LLVM_ADT_EDIT_DISTANCE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/OwningPtr.h"
+#include <algorithm>
+
+namespace llvm {
+
+/// \brief Determine the edit distance between two sequences.
+///
+/// \param FromArray the first sequence to compare.
+///
+/// \param ToArray the second sequence to compare.
+///
+/// \param AllowReplacements whether to allow element replacements (change one
+/// element into another) as a single operation, rather than as two operations
+/// (an insertion and a removal).
+///
+/// \param MaxEditDistance If non-zero, the maximum edit distance that this
+/// routine is allowed to compute. If the edit distance will exceed that
+/// maximum, returns \c MaxEditDistance+1.
+///
+/// \returns the minimum number of element insertions, removals, or (if
+/// \p AllowReplacements is \c true) replacements needed to transform one of
+/// the given sequences into the other. If zero, the sequences are identical.
+template<typename T>
+unsigned ComputeEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
+                             bool AllowReplacements = true,
+                             unsigned MaxEditDistance = 0) {
+  // The algorithm implemented below is the "classic"
+  // dynamic-programming algorithm for computing the Levenshtein
+  // distance, which is described here:
+  //
+  //   http://en.wikipedia.org/wiki/Levenshtein_distance
+  //
+  // Although the algorithm is typically described using an m x n
+  // array, only two rows are used at a time, so this implemenation
+  // just keeps two separate vectors for those two rows.
+  typename ArrayRef<T>::size_type m = FromArray.size();
+  typename ArrayRef<T>::size_type n = ToArray.size();
+
+  const unsigned SmallBufferSize = 64;
+  unsigned SmallBuffer[SmallBufferSize];
+  llvm::OwningArrayPtr<unsigned> Allocated;
+  unsigned *Previous = SmallBuffer;
+  if (2*(n + 1) > SmallBufferSize) {
+    Previous = new unsigned [2*(n+1)];
+    Allocated.reset(Previous);
+  }
+  unsigned *Current = Previous + (n + 1);
+
+  for (unsigned i = 0; i <= n; ++i)
+    Previous[i] = i;
+
+  for (typename ArrayRef<T>::size_type y = 1; y <= m; ++y) {
+    Current[0] = y;
+    unsigned BestThisRow = Current[0];
+
+    for (typename ArrayRef<T>::size_type x = 1; x <= n; ++x) {
+      if (AllowReplacements) {
+        Current[x] = std::min(
+            Previous[x-1] + (FromArray[y-1] == ToArray[x-1] ? 0u : 1u),
+            std::min(Current[x-1], Previous[x])+1);
+      }
+      else {
+        if (FromArray[y-1] == ToArray[x-1]) Current[x] = Previous[x-1];
+        else Current[x] = std::min(Current[x-1], Previous[x]) + 1;
+      }
+      BestThisRow = std::min(BestThisRow, Current[x]);
+    }
+
+    if (MaxEditDistance && BestThisRow > MaxEditDistance)
+      return MaxEditDistance + 1;
+
+    unsigned *tmp = Current;
+    Current = Previous;
+    Previous = tmp;
+  }
+
+  unsigned Result = Previous[n];
+  return Result;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index bcacfd9df426..ba9864a98a7e 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -652,10 +652,6 @@ struct ilist : public iplist<NodeTy> {
   void push_front(const NodeTy &val) { insert(this->begin(), val); }
   void push_back(const NodeTy &val) { insert(this->end(), val); }
 
-  // Special forms of insert...
-  template<class InIt> void insert(iterator where, InIt first, InIt last) {
-    for (; first != last; ++first) insert(where, *first);
-  }
   void insert(iterator where, size_type count, const NodeTy &val) {
     for (; count != 0; --count) insert(where, val);
   }
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index d71ba208a129..b823f71a2217 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -327,7 +327,7 @@ public:
   }
 
   /// doesAccessArgPointees - Return true if functions with the specified
-  /// behavior are known to potentially read or write  from objects pointed
+  /// behavior are known to potentially read or write from objects pointed
   /// to be their pointer-typed arguments (with arbitrary offsets).
   ///
   static bool doesAccessArgPointees(ModRefBehavior MRB) {
@@ -568,6 +568,11 @@ bool isNoAliasCall(const Value *V);
 ///
 bool isIdentifiedObject(const Value *V);
 
+/// isKnownNonNull - Return true if this pointer couldn't possibly be null by
+/// its definition.  This returns true for allocas, non-extern-weak globals and
+/// byval arguments.
+bool isKnownNonNull(const Value *V);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index c4ebe4015bf8..95626d624a13 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -264,6 +264,7 @@ private:
   }
   void setVolatile() { Volatile = true; }
 
+public:
   /// aliasesPointer - Return true if the specified pointer "may" (or must)
   /// alias one of the members in the set.
   ///
diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h
index 0fb2bd7db50e..6f2ccfb19901 100644
--- a/include/llvm/Analysis/BlockFrequencyImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyImpl.h
@@ -24,7 +24,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector>
-#include <sstream>
 #include <string>
 
 namespace llvm {
@@ -41,7 +40,7 @@ class MachineBlockFrequencyInfo;
 template<class BlockT, class FunctionT, class BlockProbInfoT>
 class BlockFrequencyImpl {
 
-  DenseMap<BlockT *, BlockFrequency> Freqs;
+  DenseMap<const BlockT *, BlockFrequency> Freqs;
 
   BlockProbInfoT *BPI;
 
@@ -52,15 +51,16 @@ class BlockFrequencyImpl {
   const uint32_t EntryFreq;
 
   std::string getBlockName(BasicBlock *BB) const {
-    return BB->getNameStr();
+    return BB->getName().str();
   }
 
   std::string getBlockName(MachineBasicBlock *MBB) const {
-    std::stringstream ss;
+    std::string str;
+    raw_string_ostream ss(str);
     ss << "BB#" << MBB->getNumber();
 
     if (const BasicBlock *BB = MBB->getBasicBlock())
-      ss << " derived from LLVM BB " << BB->getNameStr();
+      ss << " derived from LLVM BB " << BB->getName();
 
     return ss.str();
   }
@@ -308,8 +308,9 @@ class BlockFrequencyImpl {
 
 public:
   /// getBlockFreq - Return block frequency. Return 0 if we don't have it.
-  BlockFrequency getBlockFreq(BlockT *BB) const {
-    typename DenseMap<BlockT *, BlockFrequency>::const_iterator I = Freqs.find(BB);
+  BlockFrequency getBlockFreq(const BlockT *BB) const {
+    typename DenseMap<const BlockT *, BlockFrequency>::const_iterator
+      I = Freqs.find(BB);
     if (I != Freqs.end())
       return I->second;
     return 0;
diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h
index 9e698a9f4bb1..fcab90677a48 100644
--- a/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -47,7 +47,7 @@ public:
   /// that we should not rely on the value itself, but only on the comparison to
   /// the other block frequencies. We do this to avoid using of floating points.
   ///
-  BlockFrequency getBlockFreq(BasicBlock *BB) const;
+  BlockFrequency getBlockFreq(const BasicBlock *BB) const;
 };
 
 }
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index a2c12ab9e824..2ced7967ed5b 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -17,29 +17,23 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/BranchProbability.h"
 
 namespace llvm {
-
+class LoopInfo;
 class raw_ostream;
 
+/// \brief Analysis pass providing branch probability information.
+///
+/// This is a function analysis pass which provides information on the relative
+/// probabilities of each "edge" in the function's CFG where such an edge is
+/// defined by a pair of basic blocks. The probability for a given block and
+/// a successor block are always relative to the probabilities of the other
+/// successor blocks. Another way of looking at it is that the probabilities
+/// for a given block B and each of its successors should sum to exactly
+/// one (100%).
 class BranchProbabilityInfo : public FunctionPass {
-
-  // Default weight value. Used when we don't have information about the edge.
-  // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of
-  // the successors have a weight yet. But it doesn't make sense when providing
-  // weight to an edge that may have siblings with non-zero weights. This can
-  // be handled various ways, but it's probably fine for an edge with unknown
-  // weight to just "inherit" the non-zero weight of an adjacent successor.
-  static const uint32_t DEFAULT_WEIGHT = 16;
-
-  typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
-
-  DenseMap<Edge, uint32_t> Weights;
-
-  // Get sum of the block successors' weights.
-  uint32_t getSumForBlock(const BasicBlock *BB) const;
-
 public:
   static char ID;
 
@@ -48,34 +42,86 @@ public:
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const;
-
   bool runOnFunction(Function &F);
+  void print(raw_ostream &OS, const Module *M = 0) const;
+
+  /// \brief Get an edge's probability, relative to other out-edges of the Src.
+  ///
+  /// This routine provides access to the fractional probability between zero
+  /// (0%) and one (100%) of this edge executing, relative to other edges
+  /// leaving the 'Src' block. The returned probability is never zero, and can
+  /// only be one if the source block has only one successor.
+  BranchProbability getEdgeProbability(const BasicBlock *Src,
+                                       const BasicBlock *Dst) const;
+
+  /// \brief Test if an edge is hot relative to other out-edges of the Src.
+  ///
+  /// Check whether this edge out of the source block is 'hot'. We define hot
+  /// as having a relative probability >= 80%.
+  bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const;
 
-  // Returned value is between 1 and UINT32_MAX. Look at
-  // BranchProbabilityInfo.cpp for details.
+  /// \brief Retrieve the hot successor of a block if one exists.
+  ///
+  /// Given a basic block, look through its successors and if one exists for
+  /// which \see isEdgeHot would return true, return that successor block.
+  BasicBlock *getHotSucc(BasicBlock *BB) const;
+
+  /// \brief Print an edge's probability.
+  ///
+  /// Retrieves an edge's probability similarly to \see getEdgeProbability, but
+  /// then prints that probability to the provided stream. That stream is then
+  /// returned.
+  raw_ostream &printEdgeProbability(raw_ostream &OS, const BasicBlock *Src,
+                                    const BasicBlock *Dst) const;
+
+  /// \brief Get the raw edge weight calculated for the block pair.
+  ///
+  /// This returns the raw edge weight. It is guaranteed to fall between 1 and
+  /// UINT32_MAX. Note that the raw edge weight is not meaningful in isolation.
+  /// This interface should be very carefully, and primarily by routines that
+  /// are updating the analysis by later calling setEdgeWeight.
   uint32_t getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const;
 
-  // Look at BranchProbabilityInfo.cpp for details. Use it with caution!
+  /// \brief Set the raw edge weight for the block pair.
+  ///
+  /// This allows a pass to explicitly set the edge weight for a block. It can
+  /// be used when updating the CFG to update and preserve the branch
+  /// probability information. Read the implementation of how these edge
+  /// weights are calculated carefully before using!
   void setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst,
                      uint32_t Weight);
 
-  // A 'Hot' edge is an edge which probability is >= 80%.
-  bool isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const;
+private:
+  typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
 
-  // Return a hot successor for the block BB or null if there isn't one.
-  BasicBlock *getHotSucc(BasicBlock *BB) const;
+  // Default weight value. Used when we don't have information about the edge.
+  // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of
+  // the successors have a weight yet. But it doesn't make sense when providing
+  // weight to an edge that may have siblings with non-zero weights. This can
+  // be handled various ways, but it's probably fine for an edge with unknown
+  // weight to just "inherit" the non-zero weight of an adjacent successor.
+  static const uint32_t DEFAULT_WEIGHT = 16;
 
-  // Return a probability as a fraction between 0 (0% probability) and
-  // 1 (100% probability), however the value is never equal to 0, and can be 1
-  // only iff SRC block has only one successor.
-  BranchProbability getEdgeProbability(const BasicBlock *Src,
-                                       const BasicBlock *Dst) const;
+  DenseMap<Edge, uint32_t> Weights;
+
+  /// \brief Handle to the LoopInfo analysis.
+  LoopInfo *LI;
+
+  /// \brief Track the last function we run over for printing.
+  Function *LastF;
+
+  /// \brief Track the set of blocks directly succeeded by a returning block.
+  SmallPtrSet<BasicBlock *, 16> PostDominatedByUnreachable;
+
+  /// \brief Get sum of the block successors' weights.
+  uint32_t getSumForBlock(const BasicBlock *BB) const;
 
-  // Print value between 0 (0% probability) and 1 (100% probability),
-  // however the value is never equal to 0, and can be 1 only iff SRC block
-  // has only one successor.
-  raw_ostream &printEdgeProbability(raw_ostream &OS, BasicBlock *Src,
-                                    BasicBlock *Dst) const;
+  bool calcUnreachableHeuristics(BasicBlock *BB);
+  bool calcMetadataWeights(BasicBlock *BB);
+  bool calcPointerHeuristics(BasicBlock *BB);
+  bool calcLoopBranchHeuristics(BasicBlock *BB);
+  bool calcZeroHeuristics(BasicBlock *BB);
+  bool calcFloatingPointHeuristics(BasicBlock *BB);
 };
 
 }
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index 61614e34dacc..4704a929acf6 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -29,13 +29,13 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
 
   static std::string getGraphName(const Function *F) {
-    return "CFG for '" + F->getNameStr() + "' function";
+    return "CFG for '" + F->getName().str() + "' function";
   }
 
   static std::string getSimpleNodeLabel(const BasicBlock *Node,
-                                  const Function *Graph) {
+                                        const Function *) {
     if (!Node->getName().empty())
-      return Node->getNameStr(); 
+      return Node->getName().str();
 
     std::string Str;
     raw_string_ostream OS(Str);
@@ -45,7 +45,7 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
   }
 
   static std::string getCompleteNodeLabel(const BasicBlock *Node, 
-                                          const Function *Graph) {
+                                          const Function *) {
     std::string Str;
     raw_string_ostream OS(Str);
 
@@ -95,7 +95,9 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
       
       std::string Str;
       raw_string_ostream OS(Str);
-      OS << SI->getCaseValue(SuccNo)->getValue();
+      SwitchInst::ConstCaseIt Case =
+          SwitchInst::ConstCaseIt::fromSuccessorIndex(SI, SuccNo); 
+      OS << Case.getCaseValue()->getValue();
       return OS.str();
     }    
     return "";
diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h
index b3390f47d2f3..9b5e8425ad29 100644
--- a/include/llvm/Analysis/CaptureTracking.h
+++ b/include/llvm/Analysis/CaptureTracking.h
@@ -14,9 +14,12 @@
 #ifndef LLVM_ANALYSIS_CAPTURETRACKING_H
 #define LLVM_ANALYSIS_CAPTURETRACKING_H
 
-namespace llvm {
-  class Value;
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Support/CallSite.h"
 
+namespace llvm {
   /// PointerMayBeCaptured - Return true if this pointer value may be captured
   /// by the enclosing function (which is required to exist).  This routine can
   /// be expensive, so consider caching the results.  The boolean ReturnCaptures
@@ -28,6 +31,33 @@ namespace llvm {
                             bool ReturnCaptures,
                             bool StoreCaptures);
 
+  /// This callback is used in conjunction with PointerMayBeCaptured. In
+  /// addition to the interface here, you'll need to provide your own getters
+  /// to see whether anything was captured.
+  struct CaptureTracker {
+    virtual ~CaptureTracker();
+
+    /// tooManyUses - The depth of traversal has breached a limit. There may be
+    /// capturing instructions that will not be passed into captured().
+    virtual void tooManyUses() = 0;
+
+    /// shouldExplore - This is the use of a value derived from the pointer.
+    /// To prune the search (ie., assume that none of its users could possibly
+    /// capture) return false. To search it, return true.
+    ///
+    /// U->getUser() is always an Instruction.
+    virtual bool shouldExplore(Use *U) = 0;
+
+    /// captured - Information about the pointer was captured by the user of
+    /// use U. Return true to stop the traversal or false to continue looking
+    /// for more capturing instructions.
+    virtual bool captured(Use *U) = 0;
+  };
+
+  /// PointerMayBeCaptured - Visit the value and the values derived from it and
+  /// find values which appear to be capturing the pointer value. This feeds
+  /// results into and is controlled by the CaptureTracker object.
+  void PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker);
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h
index d96dd82b3591..711607834921 100644
--- a/include/llvm/Analysis/CodeMetrics.h
+++ b/include/llvm/Analysis/CodeMetrics.h
@@ -1,4 +1,4 @@
-//===- CodeMetrics.h - Measures the weight of a function---------*- C++ -*-===//
+//===- CodeMetrics.h - Code cost measurements -------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,80 +18,75 @@
 #include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
-
+  class BasicBlock;
+  class Function;
+  class Instruction;
   class TargetData;
+  class Value;
 
-  // CodeMetrics - Calculate size and a few similar metrics for a set of
-  // basic blocks.
-  struct CodeMetrics {
-    /// NeverInline - True if this callee should never be inlined into a
-    /// caller.
-    // bool NeverInline;
+  /// \brief Check whether an instruction is likely to be "free" when lowered.
+  bool isInstructionFree(const Instruction *I, const TargetData *TD = 0);
+
+  /// \brief Check whether a call will lower to something small.
+  ///
+  /// This tests checks whether calls to this function will lower to something
+  /// significantly cheaper than a traditional call, often a single
+  /// instruction.
+  bool callIsSmall(const Function *F);
 
-    // True if this function contains a call to setjmp or _setjmp
-    bool callsSetJmp;
+  /// \brief Utility to calculate the size and a few similar metrics for a set
+  /// of basic blocks.
+  struct CodeMetrics {
+    /// \brief True if this function contains a call to setjmp or other functions
+    /// with attribute "returns twice" without having the attribute itself.
+    bool exposesReturnsTwice;
 
-    // True if this function calls itself
+    /// \brief True if this function calls itself.
     bool isRecursive;
 
-    // True if this function contains one or more indirect branches
+    /// \brief True if this function contains one or more indirect branches.
     bool containsIndirectBr;
 
-    /// usesDynamicAlloca - True if this function calls alloca (in the C sense).
+    /// \brief True if this function calls alloca (in the C sense).
     bool usesDynamicAlloca;
 
-    /// NumInsts, NumBlocks - Keep track of how large each function is, which
-    /// is used to estimate the code size cost of inlining it.
-    unsigned NumInsts, NumBlocks;
+    /// \brief Number of instructions in the analyzed blocks.
+    unsigned NumInsts;
+
+    /// \brief Number of analyzed blocks.
+    unsigned NumBlocks;
 
-    /// NumBBInsts - Keeps track of basic block code size estimates.
+    /// \brief Keeps track of basic block code size estimates.
     DenseMap<const BasicBlock *, unsigned> NumBBInsts;
 
-    /// NumCalls - Keep track of the number of calls to 'big' functions.
+    /// \brief Keep track of the number of calls to 'big' functions.
     unsigned NumCalls;
 
-    /// NumInlineCandidates - Keep track of the number of calls to internal
-    /// functions with only a single caller.  These are likely targets for
-    /// future inlining, likely exposed by interleaved devirtualization.
+    /// \brief The number of calls to internal functions with a single caller.
+    ///
+    /// These are likely targets for future inlining, likely exposed by
+    /// interleaved devirtualization.
     unsigned NumInlineCandidates;
 
-    /// NumVectorInsts - Keep track of how many instructions produce vector
-    /// values.  The inliner is being more aggressive with inlining vector
-    /// kernels.
+    /// \brief How many instructions produce vector values.
+    ///
+    /// The inliner is more aggressive with inlining vector kernels.
     unsigned NumVectorInsts;
 
-    /// NumRets - Keep track of how many Ret instructions the block contains.
+    /// \brief How many 'ret' instructions the blocks contain.
     unsigned NumRets;
 
-    CodeMetrics() : callsSetJmp(false), isRecursive(false),
+    CodeMetrics() : exposesReturnsTwice(false), isRecursive(false),
                     containsIndirectBr(false), usesDynamicAlloca(false),
                     NumInsts(0), NumBlocks(0), NumCalls(0),
                     NumInlineCandidates(0), NumVectorInsts(0),
                     NumRets(0) {}
 
-    /// analyzeBasicBlock - Add information about the specified basic block
-    /// to the current structure.
+    /// \brief Add information about a block to the current state.
     void analyzeBasicBlock(const BasicBlock *BB, const TargetData *TD = 0);
 
-    /// analyzeFunction - Add information about the specified function
-    /// to the current structure.
+    /// \brief Add information about a function to the current state.
     void analyzeFunction(Function *F, const TargetData *TD = 0);
-
-    /// CountCodeReductionForConstant - Figure out an approximation for how
-    /// many instructions will be constant folded if the specified value is
-    /// constant.
-    unsigned CountCodeReductionForConstant(Value *V);
-
-    /// CountBonusForConstant - Figure out an approximation for how much
-    /// per-call performance boost we can expect if the specified value is
-    /// constant.
-    unsigned CountBonusForConstant(Value *V);
-
-    /// CountCodeReductionForAlloca - Figure out an approximation of how much
-    /// smaller the function will be if it is inlined into a context where an
-    /// argument becomes an alloca.
-    ///
-    unsigned CountCodeReductionForAlloca(Value *V);
   };
 }
 
diff --git a/include/llvm/Analysis/ConstantFolding.h b/include/llvm/Analysis/ConstantFolding.h
index 05018fa1617a..2fdef5f0836e 100644
--- a/include/llvm/Analysis/ConstantFolding.h
+++ b/include/llvm/Analysis/ConstantFolding.h
@@ -25,6 +25,7 @@ namespace llvm {
   class ConstantExpr;
   class Instruction;
   class TargetData;
+  class TargetLibraryInfo;
   class Function;
   class Type;
   template<typename T>
@@ -35,13 +36,15 @@ namespace llvm {
 /// Note that this fails if not all of the operands are constant.  Otherwise,
 /// this function can only fail when attempting to fold instructions like loads
 /// and stores, which have no constant expression form.
-Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0);
+Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0,
+                                  const TargetLibraryInfo *TLI = 0);
 
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
 /// using the specified TargetData.  If successful, the constant result is
 /// result is returned, if not, null is returned.
 Constant *ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                         const TargetData *TD = 0);
+                                         const TargetData *TD = 0,
+                                         const TargetLibraryInfo *TLI = 0);
 
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
 /// specified operands.  If successful, the constant result is returned, if not,
@@ -51,7 +54,8 @@ Constant *ConstantFoldConstantExpression(const ConstantExpr *CE,
 ///
 Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
                                    ArrayRef<Constant *> Ops,
-                                   const TargetData *TD = 0);
+                                   const TargetData *TD = 0,
+                                   const TargetLibraryInfo *TLI = 0);
 
 /// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
 /// instruction (icmp/fcmp) with the specified operands.  If it fails, it
@@ -59,7 +63,8 @@ Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
 ///
 Constant *ConstantFoldCompareInstOperands(unsigned Predicate,
                                           Constant *LHS, Constant *RHS,
-                                          const TargetData *TD = 0);
+                                          const TargetData *TD = 0,
+                                          const TargetLibraryInfo *TLI = 0);
 
 /// ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue
 /// instruction with the specified operands and indices.  The constant result is
@@ -76,15 +81,22 @@ Constant *ConstantFoldLoadFromConstPtr(Constant *C, const TargetData *TD = 0);
 /// getelementptr constantexpr, return the constant value being addressed by the
 /// constant expression, or null if something is funny and we can't decide.
 Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE);
-  
+
+/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
+/// indices (with an *implied* zero pointer index that is not in the list),
+/// return the constant value being addressed by a virtual load, or null if
+/// something is funny and we can't decide.
+Constant *ConstantFoldLoadThroughGEPIndices(Constant *C,
+                                            ArrayRef<Constant*> Indices);
+
 /// canConstantFoldCallTo - Return true if its even possible to fold a call to
 /// the specified function.
 bool canConstantFoldCallTo(const Function *F);
 
 /// ConstantFoldCall - Attempt to constant fold a call to the specified function
 /// with the specified arguments, returning null if unsuccessful.
-Constant *
-ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands);
+Constant *ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+                           const TargetLibraryInfo *TLI = 0);
 }
 
 #endif
diff --git a/include/llvm/Analysis/DIBuilder.h b/include/llvm/Analysis/DIBuilder.h
index ee24226c748f..2d109cdbf08f 100644
--- a/include/llvm/Analysis/DIBuilder.h
+++ b/include/llvm/Analysis/DIBuilder.h
@@ -42,6 +42,7 @@ namespace llvm {
   class DISubprogram;
   class DITemplateTypeParameter;
   class DITemplateValueParameter;
+  class DIObjCProperty;
 
   class DIBuilder {
     private:
@@ -190,6 +191,39 @@ namespace llvm {
                           StringRef PropertySetterName = StringRef(),
                           unsigned PropertyAttributes = 0);
 
+    /// createObjCIVar - Create debugging information entry for Objective-C
+    /// instance variable.
+    /// @param Name         Member name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param OffsetInBits Member offset.
+    /// @param Flags        Flags to encode member attribute, e.g. private
+    /// @param Ty           Parent type.
+    /// @param Property     Property associated with this ivar.
+    DIType createObjCIVar(StringRef Name, DIFile File,
+                          unsigned LineNo, uint64_t SizeInBits, 
+                          uint64_t AlignInBits, uint64_t OffsetInBits, 
+                          unsigned Flags, DIType Ty,
+                          MDNode *PropertyNode);
+
+    /// createObjCProperty - Create debugging information entry for Objective-C
+    /// property.
+    /// @param Name         Property name.
+    /// @param File         File where this property is defined.
+    /// @param LineNumber   Line number.
+    /// @param GetterName   Name of the Objective C property getter selector.
+    /// @param SetterName   Name of the Objective C property setter selector.
+    /// @param PropertyAttributes Objective C property attributes.
+    /// @param Ty           Type.
+    DIObjCProperty createObjCProperty(StringRef Name,
+				      DIFile File, unsigned LineNumber,
+				      StringRef GetterName,
+				      StringRef SetterName,
+				      unsigned PropertyAttributes,
+				      DIType Ty);
+      
     /// createClassType - Create debugging information entry for a class.
     /// @param Scope        Scope in which this class is defined.
     /// @param Name         class name.
@@ -313,6 +347,10 @@ namespace llvm {
     DIType createTemporaryType();
     DIType createTemporaryType(DIFile F);
 
+    /// createForwardDecl - Create a temporary forward-declared type.
+    DIType createForwardDecl(unsigned Tag, StringRef Name, DIFile F,
+                             unsigned Line, unsigned RuntimeLang = 0);
+
     /// retainType - Retain DIType in a module even if it is not referenced 
     /// through debug info anchors.
     void retainType(DIType T);
@@ -407,6 +445,7 @@ namespace llvm {
     /// @param Ty            Function type.
     /// @param isLocalToUnit True if this function is not externally visible..
     /// @param isDefinition  True if this is a function definition.
+    /// @param ScopeLine     Set to the beginning of the scope this starts
     /// @param Flags         e.g. is this function prototyped or not.
     ///                      This flags are used to emit dwarf attributes.
     /// @param isOptimized   True if optimization is ON.
@@ -417,6 +456,7 @@ namespace llvm {
                                 DIFile File, unsigned LineNo,
                                 DIType Ty, bool isLocalToUnit,
                                 bool isDefinition,
+                                unsigned ScopeLine,
                                 unsigned Flags = 0,
                                 bool isOptimized = false,
                                 Function *Fn = 0,
@@ -470,7 +510,7 @@ namespace llvm {
     /// @param Scope       Lexical block.
     /// @param File        Source file.
     DILexicalBlockFile createLexicalBlockFile(DIDescriptor Scope,
-					      DIFile File);
+                                              DIFile File);
     
     /// createLexicalBlock - This creates a descriptor for a lexical block
     /// with the specified parent context.
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index 30741c4970ab..b701b8fca5d4 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -31,7 +31,7 @@ struct DOTGraphTraitsViewer : public FunctionPass {
     std::string Title, GraphName;
     Graph = &getAnalysis<Analysis>();
     GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
-    Title = GraphName + " for '" + F.getNameStr() + "' function";
+    Title = GraphName + " for '" + F.getName().str() + "' function";
     ViewGraph(Graph, Name, Simple, Title);
 
     return false;
@@ -55,7 +55,7 @@ struct DOTGraphTraitsPrinter : public FunctionPass {
 
   virtual bool runOnFunction(Function &F) {
     Analysis *Graph;
-    std::string Filename = Name + "." + F.getNameStr() + ".dot";
+    std::string Filename = Name + "." + F.getName().str() + ".dot";
     errs() << "Writing '" << Filename << "'...";
 
     std::string ErrorInfo;
@@ -64,7 +64,7 @@ struct DOTGraphTraitsPrinter : public FunctionPass {
 
     std::string Title, GraphName;
     GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
-    Title = GraphName + " for '" + F.getNameStr() + "' function";
+    Title = GraphName + " for '" + F.getName().str() + "' function";
 
     if (ErrorInfo.empty())
       WriteGraph(File, Graph, Simple, Title);
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index 9a53c4dadba0..894c5428b988 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -43,6 +43,7 @@ namespace llvm {
   class DILexicalBlockFile;
   class DIVariable;
   class DIType;
+  class DIObjCProperty;
 
   /// DIDescriptor - A thin wraper around MDNode to access encoded debug info.
   /// This should not be stored in a container, because underly MDNode may
@@ -128,6 +129,7 @@ namespace llvm {
     bool isUnspecifiedParameter() const;
     bool isTemplateTypeParameter() const;
     bool isTemplateValueParameter() const;
+    bool isObjCProperty() const;
   };
 
   /// DISubrange - This is used to represent ranges, for array bounds.
@@ -135,8 +137,8 @@ namespace llvm {
   public:
     explicit DISubrange(const MDNode *N = 0) : DIDescriptor(N) {}
 
-    int64_t getLo() const { return (int64_t)getUInt64Field(1); }
-    int64_t getHi() const { return (int64_t)getUInt64Field(2); }
+    uint64_t getLo() const { return getUInt64Field(1); }
+    uint64_t getHi() const { return getUInt64Field(2); }
   };
 
   /// DIArray - This descriptor holds an array of descriptors.
@@ -153,6 +155,7 @@ namespace llvm {
 
   /// DIScope - A base class for various scopes.
   class DIScope : public DIDescriptor {
+    virtual void anchor();
   public:
     explicit DIScope(const MDNode *N = 0) : DIDescriptor (N) {}
     virtual ~DIScope() {}
@@ -163,6 +166,7 @@ namespace llvm {
 
   /// DICompileUnit - A wrapper for a compile unit.
   class DICompileUnit : public DIScope {
+    virtual void anchor();
   public:
     explicit DICompileUnit(const MDNode *N = 0) : DIScope(N) {}
 
@@ -202,6 +206,7 @@ namespace llvm {
 
   /// DIFile - This is a wrapper for a file.
   class DIFile : public DIScope {
+    virtual void anchor();
   public:
     explicit DIFile(const MDNode *N = 0) : DIScope(N) {
       if (DbgNode && !isFile())
@@ -230,7 +235,7 @@ namespace llvm {
   /// FIXME: Types should be factored much better so that CV qualifiers and
   /// others do not require a huge and empty descriptor full of zeros.
   class DIType : public DIScope {
-  public:
+    virtual void anchor();
   protected:
     // This ctor is used when the Tag has already been validated by a derived
     // ctor.
@@ -240,7 +245,6 @@ namespace llvm {
 
     /// Verify - Verify that a type descriptor is well formed.
     bool Verify() const;
-  public:
     explicit DIType(const MDNode *N);
     explicit DIType() {}
     virtual ~DIType() {}
@@ -320,6 +324,7 @@ namespace llvm {
 
   /// DIBasicType - A basic type, like 'int' or 'float'.
   class DIBasicType : public DIType {
+    virtual void anchor();
   public:
     explicit DIBasicType(const MDNode *N = 0) : DIType(N) {}
 
@@ -338,6 +343,7 @@ namespace llvm {
   /// DIDerivedType - A simple derived type, like a const qualified type,
   /// a typedef, a pointer or reference, etc.
   class DIDerivedType : public DIType {
+    virtual void anchor();
   protected:
     explicit DIDerivedType(const MDNode *N, bool, bool)
       : DIType(N, true, true) {}
@@ -351,29 +357,45 @@ namespace llvm {
     /// return base type size.
     uint64_t getOriginalTypeSize() const;
 
-    StringRef getObjCPropertyName() const { return getStringField(10); }
+    /// getObjCProperty - Return property node, if this ivar is 
+    /// associated with one.
+    MDNode *getObjCProperty() const;
+
+    StringRef getObjCPropertyName() const { 
+      if (getVersion() > LLVMDebugVersion11)
+        return StringRef();
+      return getStringField(10); 
+    }
     StringRef getObjCPropertyGetterName() const {
+      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
       return getStringField(11);
     }
     StringRef getObjCPropertySetterName() const {
+      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
       return getStringField(12);
     }
     bool isReadOnlyObjCProperty() {
+      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
       return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readonly) != 0;
     }
     bool isReadWriteObjCProperty() {
+      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
       return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readwrite) != 0;
     }
     bool isAssignObjCProperty() {
+      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
       return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_assign) != 0;
     }
     bool isRetainObjCProperty() {
+      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
       return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_retain) != 0;
     }
     bool isCopyObjCProperty() {
+      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
       return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_copy) != 0;
     }
     bool isNonAtomicObjCProperty() {
+      assert (getVersion() <= LLVMDebugVersion11  && "Invalid Request");
       return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_nonatomic) != 0;
     }
 
@@ -391,6 +413,7 @@ namespace llvm {
   /// other types, like a function or struct.
   /// FIXME: Why is this a DIDerivedType??
   class DICompositeType : public DIDerivedType {
+    virtual void anchor();
   public:
     explicit DICompositeType(const MDNode *N = 0)
       : DIDerivedType(N, true, true) {
@@ -454,6 +477,7 @@ namespace llvm {
 
   /// DISubprogram - This is a wrapper for a subprogram (e.g. a function).
   class DISubprogram : public DIScope {
+    virtual void anchor();
   public:
     explicit DISubprogram(const MDNode *N = 0) : DIScope(N) {}
 
@@ -495,6 +519,7 @@ namespace llvm {
     DICompositeType getContainingType() const {
       return getFieldAs<DICompositeType>(13);
     }
+
     unsigned isArtificial() const    { 
       if (getVersion() <= llvm::LLVMDebugVersion8)
         return getUnsignedField(14); 
@@ -543,6 +568,11 @@ namespace llvm {
       return getFieldAs<DIFile>(6).getDirectory(); 
     }
 
+    /// getScopeLineNumber - Get the beginning of the scope of the
+    /// function, not necessarily where the name of the program
+    /// starts.
+    unsigned getScopeLineNumber() const { return getUnsignedField(20); }
+
     /// Verify - Verify that a subprogram descriptor is well formed.
     bool Verify() const;
 
@@ -621,7 +651,7 @@ namespace llvm {
 
     DIScope getContext() const          { return getFieldAs<DIScope>(1); }
     StringRef getName() const           { return getStringField(2);     }
-    DICompileUnit getCompileUnit() const{ 
+    DICompileUnit getCompileUnit() const { 
       assert (getVersion() <= LLVMDebugVersion10 && "Invalid getCompileUnit!");
       if (getVersion() == llvm::LLVMDebugVersion7)
         return getFieldAs<DICompileUnit>(3);
@@ -687,6 +717,7 @@ namespace llvm {
 
   /// DILexicalBlock - This is a wrapper for a lexical block.
   class DILexicalBlock : public DIScope {
+    virtual void anchor();
   public:
     explicit DILexicalBlock(const MDNode *N = 0) : DIScope(N) {}
     DIScope getContext() const       { return getFieldAs<DIScope>(1);      }
@@ -705,6 +736,7 @@ namespace llvm {
   /// DILexicalBlockFile - This is a wrapper for a lexical block with
   /// a filename change.
   class DILexicalBlockFile : public DIScope {
+    virtual void anchor();
   public:
     explicit DILexicalBlockFile(const MDNode *N = 0) : DIScope(N) {}
     DIScope getContext() const { return getScope().getContext(); }
@@ -724,6 +756,7 @@ namespace llvm {
 
   /// DINameSpace - A wrapper for a C++ style name space.
   class DINameSpace : public DIScope { 
+    virtual void anchor();
   public:
     explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {}
     DIScope getContext() const     { return getFieldAs<DIScope>(1);      }
@@ -760,6 +793,51 @@ namespace llvm {
     bool Verify() const;
   };
 
+  class DIObjCProperty : public DIDescriptor {
+  public:
+    explicit DIObjCProperty(const MDNode *N) : DIDescriptor(N) { }
+
+    StringRef getObjCPropertyName() const { return getStringField(1); }
+    DIFile getFile() const { return getFieldAs<DIFile>(2); }
+    unsigned getLineNumber() const { return getUnsignedField(3); }
+
+    StringRef getObjCPropertyGetterName() const {
+      return getStringField(4);
+    }
+    StringRef getObjCPropertySetterName() const {
+      return getStringField(5);
+    }
+    bool isReadOnlyObjCProperty() {
+      return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_readonly) != 0;
+    }
+    bool isReadWriteObjCProperty() {
+      return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_readwrite) != 0;
+    }
+    bool isAssignObjCProperty() {
+      return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_assign) != 0;
+    }
+    bool isRetainObjCProperty() {
+      return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_retain) != 0;
+    }
+    bool isCopyObjCProperty() {
+      return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_copy) != 0;
+    }
+    bool isNonAtomicObjCProperty() {
+      return (getUnsignedField(6) & dwarf::DW_APPLE_PROPERTY_nonatomic) != 0;
+    }
+
+    DIType getType() const { return getFieldAs<DIType>(7); }
+
+    /// Verify - Verify that a derived type descriptor is well formed.
+    bool Verify() const;
+
+    /// print - print derived type.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print derived type to dbgs() with a newline.
+    void dump() const;
+  };
+
   /// getDISubprogram - Find subprogram that is enclosing this scope.
   DISubprogram getDISubprogram(const MDNode *Scope);
 
@@ -816,7 +894,7 @@ namespace llvm {
     /// addGlobalVariable - Add global variable into GVs.
     bool addGlobalVariable(DIGlobalVariable DIG);
 
-    // addSubprogram - Add subprgoram into SPs.
+    // addSubprogram - Add subprogram into SPs.
     bool addSubprogram(DISubprogram SP);
 
     /// addType - Add type into Tys.
diff --git a/include/llvm/Analysis/DominanceFrontier.h b/include/llvm/Analysis/DominanceFrontier.h
index d7f74af1c65c..a2e0675e92b7 100644
--- a/include/llvm/Analysis/DominanceFrontier.h
+++ b/include/llvm/Analysis/DominanceFrontier.h
@@ -154,6 +154,7 @@ public:
 /// used to compute a forward dominator frontiers.
 ///
 class DominanceFrontier : public DominanceFrontierBase {
+  virtual void anchor();
 public:
   static char ID; // Pass ID, replacement for typeid
   DominanceFrontier() :
diff --git a/include/llvm/Analysis/DominatorInternals.h b/include/llvm/Analysis/DominatorInternals.h
index ae552b05abff..0c29236dde96 100644
--- a/include/llvm/Analysis/DominatorInternals.h
+++ b/include/llvm/Analysis/DominatorInternals.h
@@ -171,7 +171,7 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
 
   // it might be that some blocks did not get a DFS number (e.g., blocks of 
   // infinite loops). In these cases an artificial exit node is required.
-  MultipleRoots |= (DT.isPostDominator() && N != F.size());
+  MultipleRoots |= (DT.isPostDominator() && N != GraphTraits<FuncT*>::size(&F));
 
   // When naively implemented, the Lengauer-Tarjan algorithm requires a separate
   // bucket for each vertex. However, this is unnecessary, because each vertex
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 230e83d30121..6e8e4246367e 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -185,6 +185,18 @@ void Calculate(DominatorTreeBase<typename GraphTraits<N>::NodeType>& DT,
 
 template<class NodeT>
 class DominatorTreeBase : public DominatorBase<NodeT> {
+  bool dominatedBySlowTreeWalk(const DomTreeNodeBase<NodeT> *A,
+                               const DomTreeNodeBase<NodeT> *B) const {
+    assert(A != B);
+    assert(isReachableFromEntry(B));
+    assert(isReachableFromEntry(A));
+
+    const DomTreeNodeBase<NodeT> *IDom;
+    while ((IDom = B->getIDom()) != 0 && IDom != A && IDom != B)
+      B = IDom;   // Walk up the tree
+    return IDom != 0;
+  }
+
 protected:
   typedef DenseMap<NodeT*, DomTreeNodeBase<NodeT>*> DomTreeNodeMapType;
   DomTreeNodeMapType DomTreeNodes;
@@ -321,8 +333,7 @@ public:
   /// block.  This is the same as using operator[] on this class.
   ///
   inline DomTreeNodeBase<NodeT> *getNode(NodeT *BB) const {
-    typename DomTreeNodeMapType::const_iterator I = DomTreeNodes.find(BB);
-    return I != DomTreeNodes.end() ? I->second : 0;
+    return DomTreeNodes.lookup(BB);
   }
 
   /// getRootNode - This returns the entry node for the CFG of the function.  If
@@ -339,38 +350,26 @@ public:
   /// Note that this is not a constant time operation!
   ///
   bool properlyDominates(const DomTreeNodeBase<NodeT> *A,
-                         const DomTreeNodeBase<NodeT> *B) const {
-    if (A == 0 || B == 0) return false;
-    return dominatedBySlowTreeWalk(A, B);
-  }
-
-  inline bool properlyDominates(const NodeT *A, const NodeT *B) {
+                         const DomTreeNodeBase<NodeT> *B) {
+    if (A == 0 || B == 0)
+      return false;
     if (A == B)
       return false;
-
-    // Cast away the const qualifiers here. This is ok since
-    // this function doesn't actually return the values returned
-    // from getNode.
-    return properlyDominates(getNode(const_cast<NodeT *>(A)),
-                             getNode(const_cast<NodeT *>(B)));
-  }
-
-  bool dominatedBySlowTreeWalk(const DomTreeNodeBase<NodeT> *A,
-                               const DomTreeNodeBase<NodeT> *B) const {
-    const DomTreeNodeBase<NodeT> *IDom;
-    if (A == 0 || B == 0) return false;
-    while ((IDom = B->getIDom()) != 0 && IDom != A && IDom != B)
-      B = IDom;   // Walk up the tree
-    return IDom != 0;
+    return dominates(A, B);
   }
 
+  bool properlyDominates(const NodeT *A, const NodeT *B);
 
   /// isReachableFromEntry - Return true if A is dominated by the entry
   /// block of the function containing it.
-  bool isReachableFromEntry(const NodeT* A) {
+  bool isReachableFromEntry(const NodeT* A) const {
     assert(!this->isPostDominator() &&
            "This is not implemented for post dominators");
-    return dominates(&A->getParent()->front(), A);
+    return isReachableFromEntry(getNode(const_cast<NodeT *>(A)));
+  }
+
+  inline bool isReachableFromEntry(const DomTreeNodeBase<NodeT> *A) const {
+    return A;
   }
 
   /// dominates - Returns true iff A dominates B.  Note that this is not a
@@ -378,10 +377,16 @@ public:
   ///
   inline bool dominates(const DomTreeNodeBase<NodeT> *A,
                         const DomTreeNodeBase<NodeT> *B) {
+    // A node trivially dominates itself.
     if (B == A)
-      return true;  // A node trivially dominates itself.
+      return true;
 
-    if (A == 0 || B == 0)
+    // An unreachable node is dominated by anything.
+    if (!isReachableFromEntry(B))
+      return true;
+
+    // And dominates nothing.
+    if (!isReachableFromEntry(A))
       return false;
 
     // Compare the result of the tree walk and the dfs numbers, if expensive
@@ -406,16 +411,7 @@ public:
     return dominatedBySlowTreeWalk(A, B);
   }
 
-  inline bool dominates(const NodeT *A, const NodeT *B) {
-    if (A == B)
-      return true;
-
-    // Cast away the const qualifiers here. This is ok since
-    // this function doesn't actually return the values returned
-    // from getNode.
-    return dominates(getNode(const_cast<NodeT *>(A)),
-                     getNode(const_cast<NodeT *>(B)));
-  }
+  bool dominates(const NodeT *A, const NodeT *B);
 
   NodeT *getRoot() const {
     assert(this->Roots.size() == 1 && "Should always have entry node!");
@@ -623,9 +619,8 @@ protected:
   }
 
   DomTreeNodeBase<NodeT> *getNodeForBlock(NodeT *BB) {
-    typename DomTreeNodeMapType::iterator I = this->DomTreeNodes.find(BB);
-    if (I != this->DomTreeNodes.end() && I->second)
-      return I->second;
+    if (DomTreeNodeBase<NodeT> *Node = getNode(BB))
+      return Node;
 
     // Haven't calculated this node yet?  Get or calculate the node for the
     // immediate dominator.
@@ -641,8 +636,7 @@ protected:
   }
 
   inline NodeT *getIDom(NodeT *BB) const {
-    typename DenseMap<NodeT*, NodeT*>::const_iterator I = IDoms.find(BB);
-    return I != IDoms.end() ? I->second : 0;
+    return IDoms.lookup(BB);
   }
 
   inline void addRoot(NodeT* BB) {
@@ -653,21 +647,24 @@ public:
   /// recalculate - compute a dominator tree for the given function
   template<class FT>
   void recalculate(FT& F) {
+    typedef GraphTraits<FT*> TraitsTy;
     reset();
     this->Vertex.push_back(0);
 
     if (!this->IsPostDominators) {
       // Initialize root
-      this->Roots.push_back(&F.front());
-      this->IDoms[&F.front()] = 0;
-      this->DomTreeNodes[&F.front()] = 0;
+      NodeT *entry = TraitsTy::getEntryNode(&F);
+      this->Roots.push_back(entry);
+      this->IDoms[entry] = 0;
+      this->DomTreeNodes[entry] = 0;
 
       Calculate<FT, NodeT*>(*this, F);
     } else {
       // Initialize the roots list
-      for (typename FT::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-        if (std::distance(GraphTraits<FT*>::child_begin(I),
-                          GraphTraits<FT*>::child_end(I)) == 0)
+      for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F),
+                                        E = TraitsTy::nodes_end(&F); I != E; ++I) {
+        if (std::distance(TraitsTy::child_begin(I),
+                          TraitsTy::child_end(I)) == 0)
           addRoot(I);
 
         // Prepopulate maps so that we don't get iterator invalidation issues later.
@@ -680,6 +677,32 @@ public:
   }
 };
 
+// These two functions are declared out of line as a workaround for building
+// with old (< r147295) versions of clang because of pr11642.
+template<class NodeT>
+bool DominatorTreeBase<NodeT>::dominates(const NodeT *A, const NodeT *B) {
+  if (A == B)
+    return true;
+
+  // Cast away the const qualifiers here. This is ok since
+  // this function doesn't actually return the values returned
+  // from getNode.
+  return dominates(getNode(const_cast<NodeT *>(A)),
+                   getNode(const_cast<NodeT *>(B)));
+}
+template<class NodeT>
+bool
+DominatorTreeBase<NodeT>::properlyDominates(const NodeT *A, const NodeT *B) {
+  if (A == B)
+    return false;
+
+  // Cast away the const qualifiers here. This is ok since
+  // this function doesn't actually return the values returned
+  // from getNode.
+  return dominates(getNode(const_cast<NodeT *>(A)),
+                   getNode(const_cast<NodeT *>(B)));
+}
+
 EXTERN_TEMPLATE_INSTANTIATION(class DominatorTreeBase<BasicBlock>);
 
 //===-------------------------------------
@@ -749,9 +772,12 @@ public:
     return DT->dominates(A, B);
   }
 
-  // dominates - Return true if A dominates B. This performs the
-  // special checks necessary if A and B are in the same basic block.
-  bool dominates(const Instruction *A, const Instruction *B) const;
+  // dominates - Return true if Def dominates a use in User. This performs
+  // the special checks necessary if Def and User are in the same basic block.
+  // Note that Def doesn't dominate a use in Def itself!
+  bool dominates(const Instruction *Def, const Use &U) const;
+  bool dominates(const Instruction *Def, const Instruction *User) const;
+  bool dominates(const Instruction *Def, const BasicBlock *BB) const;
 
   bool properlyDominates(const DomTreeNode *A, const DomTreeNode *B) const {
     return DT->properlyDominates(A, B);
@@ -814,10 +840,12 @@ public:
     DT->splitBlock(NewBB);
   }
 
-  bool isReachableFromEntry(const BasicBlock* A) {
+  bool isReachableFromEntry(const BasicBlock* A) const {
     return DT->isReachableFromEntry(A);
   }
 
+  bool isReachableFromEntry(const Use &U) const;
+
 
   virtual void releaseMemory() {
     DT->releaseMemory();
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 2fb607cc5c37..2bf79b9c932b 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -166,10 +166,16 @@ public:
   const_iterator end() const   { return IVUses.end(); }
   bool empty() const { return IVUses.empty(); }
 
+  bool isIVUserOrOperand(Instruction *Inst) const {
+    return Processed.count(Inst);
+  }
+
   void print(raw_ostream &OS, const Module* = 0) const;
 
   /// dump - This method is used for debugging.
   void dump() const;
+protected:
+  bool AddUsersImpl(Instruction *I, SmallPtrSet<Loop*,16> &SimpleLoopNests);
 };
 
 Pass *createIVUsersPass();
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 36a16e68df07..691c2d19be9a 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -14,171 +14,118 @@
 #ifndef LLVM_ANALYSIS_INLINECOST_H
 #define LLVM_ANALYSIS_INLINECOST_H
 
-#include <cassert>
-#include <climits>
-#include <vector>
+#include "llvm/Function.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/ValueMap.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include <cassert>
+#include <climits>
+#include <vector>
 
 namespace llvm {
 
-  class Value;
-  class Function;
-  class BasicBlock;
   class CallSite;
-  template<class PtrType, unsigned SmallSize>
-  class SmallPtrSet;
   class TargetData;
 
   namespace InlineConstants {
     // Various magic constants used to adjust heuristics.
     const int InstrCost = 5;
-    const int IndirectCallBonus = -100;
+    const int IndirectCallThreshold = 100;
     const int CallPenalty = 25;
     const int LastCallToStaticBonus = -15000;
     const int ColdccPenalty = 2000;
     const int NoreturnPenalty = 10000;
   }
 
-  /// InlineCost - Represent the cost of inlining a function. This
-  /// supports special values for functions which should "always" or
-  /// "never" be inlined. Otherwise, the cost represents a unitless
-  /// amount; smaller values increase the likelihood of the function
-  /// being inlined.
+  /// \brief Represents the cost of inlining a function.
+  ///
+  /// This supports special values for functions which should "always" or
+  /// "never" be inlined. Otherwise, the cost represents a unitless amount;
+  /// smaller values increase the likelihood of the function being inlined.
+  ///
+  /// Objects of this type also provide the adjusted threshold for inlining
+  /// based on the information available for a particular callsite. They can be
+  /// directly tested to determine if inlining should occur given the cost and
+  /// threshold for this cost metric.
   class InlineCost {
-    enum Kind {
-      Value,
-      Always,
-      Never
+    enum SentinelValues {
+      AlwaysInlineCost = INT_MIN,
+      NeverInlineCost = INT_MAX
     };
 
-    // This is a do-it-yourself implementation of
-    //   int Cost : 30;
-    //   unsigned Type : 2;
-    // We used to use bitfields, but they were sometimes miscompiled (PR3822).
-    enum { TYPE_BITS = 2 };
-    enum { COST_BITS = unsigned(sizeof(unsigned)) * CHAR_BIT - TYPE_BITS };
-    unsigned TypedCost; // int Cost : COST_BITS; unsigned Type : TYPE_BITS;
+    /// \brief The estimated cost of inlining this callsite.
+    const int Cost;
 
-    Kind getType() const {
-      return Kind(TypedCost >> COST_BITS);
-    }
+    /// \brief The adjusted threshold against which this cost was computed.
+    const int Threshold;
 
-    int getCost() const {
-      // Sign-extend the bottom COST_BITS bits.
-      return (int(TypedCost << TYPE_BITS)) >> TYPE_BITS;
-    }
+    // Trivial constructor, interesting logic in the factory functions below.
+    InlineCost(int Cost, int Threshold)
+      : Cost(Cost), Threshold(Threshold) {}
 
-    InlineCost(int C, int T) {
-      TypedCost = (unsigned(C << TYPE_BITS) >> TYPE_BITS) | (T << COST_BITS);
-      assert(getCost() == C && "Cost exceeds InlineCost precision");
-    }
   public:
-    static InlineCost get(int Cost) { return InlineCost(Cost, Value); }
-    static InlineCost getAlways() { return InlineCost(0, Always); }
-    static InlineCost getNever() { return InlineCost(0, Never); }
-
-    bool isVariable() const { return getType() == Value; }
-    bool isAlways() const { return getType() == Always; }
-    bool isNever() const { return getType() == Never; }
-
-    /// getValue() - Return a "variable" inline cost's amount. It is
-    /// an error to call this on an "always" or "never" InlineCost.
-    int getValue() const {
-      assert(getType() == Value && "Invalid access of InlineCost");
-      return getCost();
+    static InlineCost get(int Cost, int Threshold) {
+      assert(Cost > AlwaysInlineCost && "Cost crosses sentinel value");
+      assert(Cost < NeverInlineCost && "Cost crosses sentinel value");
+      return InlineCost(Cost, Threshold);
+    }
+    static InlineCost getAlways() {
+      return InlineCost(AlwaysInlineCost, 0);
+    }
+    static InlineCost getNever() {
+      return InlineCost(NeverInlineCost, 0);
     }
-  };
-
-  /// InlineCostAnalyzer - Cost analyzer used by inliner.
-  class InlineCostAnalyzer {
-    struct ArgInfo {
-    public:
-      unsigned ConstantWeight;
-      unsigned AllocaWeight;
-
-      ArgInfo(unsigned CWeight, unsigned AWeight)
-        : ConstantWeight(CWeight), AllocaWeight(AWeight)
-          {}
-    };
-
-    struct FunctionInfo {
-      CodeMetrics Metrics;
 
-      /// ArgumentWeights - Each formal argument of the function is inspected to
-      /// see if it is used in any contexts where making it a constant or alloca
-      /// would reduce the code size.  If so, we add some value to the argument
-      /// entry here.
-      std::vector<ArgInfo> ArgumentWeights;
+    /// \brief Test whether the inline cost is low enough for inlining.
+    operator bool() const {
+      return Cost < Threshold;
+    }
 
-      /// analyzeFunction - Add information about the specified function
-      /// to the current structure.
-      void analyzeFunction(Function *F, const TargetData *TD);
+    bool isAlways() const   { return Cost == AlwaysInlineCost; }
+    bool isNever() const    { return Cost == NeverInlineCost; }
+    bool isVariable() const { return !isAlways() && !isNever(); }
 
-      /// NeverInline - Returns true if the function should never be
-      /// inlined into any caller.
-      bool NeverInline();
-    };
+    /// \brief Get the inline cost estimate.
+    /// It is an error to call this on an "always" or "never" InlineCost.
+    int getCost() const {
+      assert(isVariable() && "Invalid access of InlineCost");
+      return Cost;
+    }
 
-    // The Function* for a function can be changed (by ArgumentPromotion);
-    // the ValueMap will update itself when this happens.
-    ValueMap<const Function *, FunctionInfo> CachedFunctionInfo;
+    /// \brief Get the cost delta from the threshold for inlining.
+    /// Only valid if the cost is of the variable kind. Returns a negative
+    /// value if the cost is too high to inline.
+    int getCostDelta() const { return Threshold - getCost(); }
+  };
 
+  /// InlineCostAnalyzer - Cost analyzer used by inliner.
+  class InlineCostAnalyzer {
     // TargetData if available, or null.
     const TargetData *TD;
 
-    int CountBonusForConstant(Value *V, Constant *C = NULL);
-    int ConstantFunctionBonus(CallSite CS, Constant *C);
-    int getInlineSize(CallSite CS, Function *Callee);
-    int getInlineBonuses(CallSite CS, Function *Callee);
   public:
     InlineCostAnalyzer(): TD(0) {}
 
     void setTargetData(const TargetData *TData) { TD = TData; }
 
-    /// getInlineCost - The heuristic used to determine if we should inline the
-    /// function call or not.
+    /// \brief Get an InlineCost object representing the cost of inlining this
+    /// callsite.
     ///
-    InlineCost getInlineCost(CallSite CS,
-                             SmallPtrSet<const Function *, 16> &NeverInline);
+    /// Note that threshold is passed into this function. Only costs below the
+    /// threshold are computed with any accuracy. The threshold can be used to
+    /// bound the computation necessary to determine whether the cost is
+    /// sufficiently low to warrant inlining.
+    InlineCost getInlineCost(CallSite CS, int Threshold);
     /// getCalledFunction - The heuristic used to determine if we should inline
     /// the function call or not.  The callee is explicitly specified, to allow
-    /// you to calculate the cost of inlining a function via a pointer.  The
-    /// result assumes that the inlined version will always be used.  You should
-    /// weight it yourself in cases where this callee will not always be called.
-    InlineCost getInlineCost(CallSite CS,
-                             Function *Callee,
-                             SmallPtrSet<const Function *, 16> &NeverInline);
-
-    /// getSpecializationBonus - The heuristic used to determine the per-call
-    /// performance boost for using a specialization of Callee with argument
-    /// SpecializedArgNos replaced by a constant.
-    int getSpecializationBonus(Function *Callee,
-             SmallVectorImpl<unsigned> &SpecializedArgNo);
-
-    /// getSpecializationCost - The heuristic used to determine the code-size
-    /// impact of creating a specialized version of Callee with argument
-    /// SpecializedArgNo replaced by a constant.
-    InlineCost getSpecializationCost(Function *Callee,
-               SmallVectorImpl<unsigned> &SpecializedArgNo);
-
-    /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
-    /// higher threshold to determine if the function call should be inlined.
-    float getInlineFudgeFactor(CallSite CS);
-
-    /// resetCachedFunctionInfo - erase any cached cost info for this function.
-    void resetCachedCostInfo(Function* Caller) {
-      CachedFunctionInfo[Caller] = FunctionInfo();
-    }
-
-    /// growCachedCostInfo - update the cached cost info for Caller after Callee
-    /// has been inlined. If Callee is NULL it means a dead call has been
-    /// eliminated.
-    void growCachedCostInfo(Function* Caller, Function* Callee);
-
-    /// clear - empty the cache of inline costs
-    void clear();
+    /// you to calculate the cost of inlining a function via a pointer.  This
+    /// behaves exactly as the version with no explicit callee parameter in all
+    /// other respects.
+    //
+    //  Note: This is used by out-of-tree passes, please do not remove without
+    //  adding a replacement API.
+    InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold);
   };
 
   /// callIsSmall - If a call is likely to lower to a single target instruction,
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index c1d87d3f7712..152e885bf667 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -20,147 +20,198 @@
 #define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
 
 namespace llvm {
+  template<typename T>
+  class ArrayRef;
   class DominatorTree;
   class Instruction;
-  class Value;
   class TargetData;
-  template<typename T>
-  class ArrayRef;
+  class TargetLibraryInfo;
+  class Type;
+  class Value;
 
   /// SimplifyAddInst - Given operands for an Add, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
-                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+                         const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifySubInst - Given operands for a Sub, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
-                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+                         const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifyMulInst - Given operands for a Mul, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyMulInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
                          const DominatorTree *DT = 0);
 
   /// SimplifySDivInst - Given operands for an SDiv, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifySDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyUDivInst - Given operands for a UDiv, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifyUDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+  Value *SimplifyUDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0, 
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyFDivInst - Given operands for an FDiv, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyFDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifySRemInst - Given operands for an SRem, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifySRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+  Value *SimplifySRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0, 
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyURemInst - Given operands for a URem, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyURemInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyFRemInst - Given operands for an FRem, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyFRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyShlInst - Given operands for a Shl, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+                         const TargetData *TD = 0, 
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifyLShrInst - Given operands for a LShr, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                          const TargetData *TD = 0, const DominatorTree *DT=0);
+                          const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
+                          const DominatorTree *DT = 0);
 
   /// SimplifyAShrInst - Given operands for a AShr, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
                           const TargetData *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyAndInst - Given operands for an And, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyAndInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
                          const DominatorTree *DT = 0);
 
   /// SimplifyOrInst - Given operands for an Or, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyOrInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                        const TargetLibraryInfo *TLI = 0,
                         const DominatorTree *DT = 0);
 
   /// SimplifyXorInst - Given operands for a Xor, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyXorInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
                          const DominatorTree *DT = 0);
 
   /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                          const TargetData *TD = 0,
+                          const TargetData *TD = 0, 
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                          const TargetData *TD = 0,
+                          const TargetData *TD = 0, 
+                          const TargetLibraryInfo *TLI = 0,
                           const DominatorTree *DT = 0);
 
   /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
   /// the result.  If not, this returns null.
   Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
                             const TargetData *TD = 0,
+                            const TargetLibraryInfo *TLI = 0,
                             const DominatorTree *DT = 0);
 
   /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
   /// fold the result.  If not, this returns null.
-  Value *SimplifyGEPInst(ArrayRef<Value *> Ops,
-                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+  Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
   /// can fold the result.  If not, this returns null.
   Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
                                  ArrayRef<unsigned> Idxs,
                                  const TargetData *TD = 0,
+                                 const TargetLibraryInfo *TLI = 0,
                                  const DominatorTree *DT = 0);
 
+  /// SimplifyTruncInst - Given operands for an TruncInst, see if we can fold
+  /// the result.  If not, this returns null.
+  Value *SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD = 0,
+                           const TargetLibraryInfo *TLI = 0,
+                           const DominatorTree *DT = 0);
+
   //=== Helper functions for higher up the class hierarchy.
 
 
   /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+                         const TargetData *TD = 0,
+                         const TargetLibraryInfo *TLI = 0,
+                         const DominatorTree *DT = 0);
 
   /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                       const TargetData *TD = 0, const DominatorTree *DT = 0);
+                       const TargetData *TD = 0, 
+                       const TargetLibraryInfo *TLI = 0,
+                       const DominatorTree *DT = 0);
 
   /// SimplifyInstruction - See if we can compute a simplified version of this
   /// instruction.  If not, this returns null.
   Value *SimplifyInstruction(Instruction *I, const TargetData *TD = 0,
+                             const TargetLibraryInfo *TLI = 0,
                              const DominatorTree *DT = 0);
 
 
-  /// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
-  /// delete the From instruction.  In addition to a basic RAUW, this does a
-  /// recursive simplification of the updated instructions.  This catches
-  /// things where one simplification exposes other opportunities.  This only
-  /// simplifies and deletes scalar operations, it does not change the CFG.
+  /// \brief Replace all uses of 'I' with 'SimpleV' and simplify the uses
+  /// recursively.
   ///
-  void ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
-                                 const TargetData *TD = 0,
-                                 const DominatorTree *DT = 0);
+  /// This first performs a normal RAUW of I with SimpleV. It then recursively
+  /// attempts to simplify those users updated by the operation. The 'I'
+  /// instruction must not be equal to the simplified value 'SimpleV'.
+  ///
+  /// The function returns true if any simplifications were performed.
+  bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
+                                     const TargetData *TD = 0,
+                                     const TargetLibraryInfo *TLI = 0,
+                                     const DominatorTree *DT = 0);
+
+  /// \brief Recursively attempt to simplify an instruction.
+  ///
+  /// This routine uses SimplifyInstruction to simplify 'I', and if successful
+  /// replaces uses of 'I' with the simplified value. It then recurses on each
+  /// of the users impacted. It returns true if any simplifications were
+  /// performed.
+  bool recursivelySimplifyInstruction(Instruction *I,
+                                      const TargetData *TD = 0,
+                                      const TargetLibraryInfo *TLI = 0,
+                                      const DominatorTree *DT = 0);
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Analysis/IntervalIterator.h b/include/llvm/Analysis/IntervalIterator.h
index 82b3294cc504..0968c7468e68 100644
--- a/include/llvm/Analysis/IntervalIterator.h
+++ b/include/llvm/Analysis/IntervalIterator.h
@@ -101,14 +101,14 @@ public:
   IntervalIterator(Function *M, bool OwnMemory) : IOwnMem(OwnMemory) {
     OrigContainer = M;
     if (!ProcessInterval(&M->front())) {
-      assert(0 && "ProcessInterval should never fail for first interval!");
+      llvm_unreachable("ProcessInterval should never fail for first interval!");
     }
   }
 
   IntervalIterator(IntervalPartition &IP, bool OwnMemory) : IOwnMem(OwnMemory) {
     OrigContainer = &IP;
     if (!ProcessInterval(IP.getRootInterval())) {
-      assert(0 && "ProcessInterval should never fail for first interval!");
+      llvm_unreachable("ProcessInterval should never fail for first interval!");
     }
   }
 
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index fc4d0af920e9..065c230fb2fd 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -20,12 +20,14 @@
 namespace llvm {
   class Constant;
   class TargetData;
+  class TargetLibraryInfo;
   class Value;
   
 /// LazyValueInfo - This pass computes, caches, and vends lazy value constraint
 /// information.
 class LazyValueInfo : public FunctionPass {
   class TargetData *TD;
+  class TargetLibraryInfo *TLI;
   void *PImpl;
   LazyValueInfo(const LazyValueInfo&); // DO NOT IMPLEMENT.
   void operator=(const LazyValueInfo&); // DO NOT IMPLEMENT.
@@ -68,9 +70,7 @@ public:
   
   // Implementation boilerplate.
   
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.setPreservesAll();
-  }
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
   virtual void releaseMemory();
   virtual bool runOnFunction(Function &F);
 };
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index 1574262dd6d3..5f0aefbeb015 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -20,6 +20,7 @@ namespace llvm {
 
 class AliasAnalysis;
 class TargetData;
+class MDNode;
 
 /// isSafeToLoadUnconditionally - Return true if we know that executing a load
 /// from this value cannot trap.  If it is not obviously safe to load from the
@@ -41,10 +42,15 @@ bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
 /// MaxInstsToScan specifies the maximum instructions to scan in the block.
 /// If it is set to 0, it will scan the whole block. You can also optionally
 /// specify an alias analysis implementation, which makes this more precise.
+///
+/// If TBAATag is non-null and a load or store is found, the TBAA tag from the
+/// load or store is recorded there.  If there is no TBAA tag or if no access
+/// is found, it is left unmodified.
 Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
                                 BasicBlock::iterator &ScanFrom,
                                 unsigned MaxInstsToScan = 6,
-                                AliasAnalysis *AA = 0);
+                                AliasAnalysis *AA = 0,
+                                MDNode **TBAATag = 0);
 
 }
 
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 12cb6c5cc480..91feaaac038d 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -23,7 +23,6 @@
 //  * whether or not a particular block branches out of the loop
 //  * the successor blocks of the loop
 //  * the loop depth
-//  * the trip count
 //  * etc...
 //
 //===----------------------------------------------------------------------===//
@@ -416,14 +415,26 @@ public:
 #ifndef NDEBUG
     assert(!Blocks.empty() && "Loop header is missing");
 
+    // Setup for using a depth-first iterator to visit every block in the loop.
+    SmallVector<BlockT*, 8> ExitBBs;
+    getExitBlocks(ExitBBs);
+    llvm::SmallPtrSet<BlockT*, 8> VisitSet;
+    VisitSet.insert(ExitBBs.begin(), ExitBBs.end());
+    df_ext_iterator<BlockT*, llvm::SmallPtrSet<BlockT*, 8> >
+        BI = df_ext_begin(getHeader(), VisitSet),
+        BE = df_ext_end(getHeader(), VisitSet);
+
+    // Keep track of the number of BBs visited.
+    unsigned NumVisited = 0;
+
     // Sort the blocks vector so that we can use binary search to do quick
     // lookups.
     SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end());
     std::sort(LoopBBs.begin(), LoopBBs.end());
 
     // Check the individual blocks.
-    for (block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
-      BlockT *BB = *I;
+    for ( ; BI != BE; ++BI) {
+      BlockT *BB = *BI;
       bool HasInsideLoopSuccs = false;
       bool HasInsideLoopPreds = false;
       SmallVector<BlockT *, 2> OutsideLoopPreds;
@@ -440,7 +451,7 @@ public:
       for (typename InvBlockTraits::ChildIteratorType PI =
            InvBlockTraits::child_begin(BB), PE = InvBlockTraits::child_end(BB);
            PI != PE; ++PI) {
-        typename InvBlockTraits::NodeType *N = *PI;
+        BlockT *N = *PI;
         if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), N))
           HasInsideLoopPreds = true;
         else
@@ -464,8 +475,12 @@ public:
       assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!");
       assert(BB != getHeader()->getParent()->begin() &&
              "Loop contains function entry block!");
+
+      NumVisited++;
     }
 
+    assert(NumVisited == getNumBlocks() && "Unreachable block in loop");
+
     // Check the subloops.
     for (iterator I = begin(), E = end(); I != E; ++I)
       // Each block in each subloop should be contained within this loop.
@@ -571,37 +586,6 @@ public:
   ///
   PHINode *getCanonicalInductionVariable() const;
 
-  /// getTripCount - Return a loop-invariant LLVM value indicating the number of
-  /// times the loop will be executed.  Note that this means that the backedge
-  /// of the loop executes N-1 times.  If the trip-count cannot be determined,
-  /// this returns null.
-  ///
-  /// The IndVarSimplify pass transforms loops to have a form that this
-  /// function easily understands.
-  ///
-  Value *getTripCount() const;
-
-  /// getSmallConstantTripCount - Returns the trip count of this loop as a
-  /// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-  /// of not constant. Will also return 0 if the trip count is very large
-  /// (>= 2^32)
-  ///
-  /// The IndVarSimplify pass transforms loops to have a form that this
-  /// function easily understands.
-  ///
-  unsigned getSmallConstantTripCount() const;
-
-  /// getSmallConstantTripMultiple - Returns the largest constant divisor of the
-  /// trip count of this loop as a normal unsigned value, if possible. This
-  /// means that the actual trip count is always a multiple of the returned
-  /// value (don't forget the trip count could very well be zero as well!).
-  ///
-  /// Returns 1 if the trip count is unknown or not guaranteed to be the
-  /// multiple of a constant (which is also the case if the trip count is simply
-  /// constant, use getSmallConstantTripCount for that case), Will also return 1
-  /// if the trip count is very large (>= 2^32).
-  unsigned getSmallConstantTripMultiple() const;
-
   /// isLCSSAForm - Return true if the Loop is in LCSSA form
   bool isLCSSAForm(DominatorTree &DT) const;
 
@@ -610,6 +594,9 @@ public:
   /// normal form.
   bool isLoopSimplifyForm() const;
 
+  /// isSafeToClone - Return true if the loop body is safe to clone in practice.
+  bool isSafeToClone() const;
+
   /// hasDedicatedExits - Return true if no exit block for the loop
   /// has a predecessor that is outside the loop.
   bool hasDedicatedExits() const;
@@ -671,9 +658,7 @@ public:
   /// block is in no loop (for example the entry node), null is returned.
   ///
   LoopT *getLoopFor(const BlockT *BB) const {
-    typename DenseMap<BlockT *, LoopT *>::const_iterator I=
-      BBMap.find(const_cast<BlockT*>(BB));
-    return I != BBMap.end() ? I->second : 0;
+    return BBMap.lookup(const_cast<BlockT*>(BB));
   }
 
   /// operator[] - same as getLoopFor...
@@ -712,9 +697,7 @@ public:
   /// the loop hierarchy tree.
   void changeLoopFor(BlockT *BB, LoopT *L) {
     if (!L) {
-      typename DenseMap<BlockT *, LoopT *>::iterator I = BBMap.find(BB);
-      if (I != BBMap.end())
-        BBMap.erase(I);
+      BBMap.erase(BB);
       return;
     }
     BBMap[BB] = L;
@@ -771,7 +754,7 @@ public:
   }
 
   LoopT *ConsiderForLoop(BlockT *BB, DominatorTreeBase<BlockT> &DT) {
-    if (BBMap.find(BB) != BBMap.end()) return 0;// Haven't processed this node?
+    if (BBMap.count(BB)) return 0; // Haven't processed this node?
 
     std::vector<BlockT *> TodoStack;
 
@@ -782,7 +765,8 @@ public:
          InvBlockTraits::child_begin(BB), E = InvBlockTraits::child_end(BB);
          I != E; ++I) {
       typename InvBlockTraits::NodeType *N = *I;
-      if (DT.dominates(BB, N))   // If BB dominates its predecessor...
+      // If BB dominates its predecessor...
+      if (DT.dominates(BB, N) && DT.isReachableFromEntry(N))
           TodoStack.push_back(N);
     }
 
@@ -792,14 +776,12 @@ public:
     LoopT *L = new LoopT(BB);
     BBMap[BB] = L;
 
-    BlockT *EntryBlock = BB->getParent()->begin();
-
     while (!TodoStack.empty()) {  // Process all the nodes in the loop
       BlockT *X = TodoStack.back();
       TodoStack.pop_back();
 
       if (!L->contains(X) &&         // As of yet unprocessed??
-          DT.dominates(EntryBlock, X)) {   // X is reachable from entry block?
+          DT.isReachableFromEntry(X)) {
         // Check to see if this block already belongs to a loop.  If this occurs
         // then we have a case where a loop that is supposed to be a child of
         // the current loop was processed before the current loop.  When this
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index e18d937f6916..68ce364f4413 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -324,6 +324,7 @@ namespace llvm {
     /// Current AA implementation, just a cache.
     AliasAnalysis *AA;
     TargetData *TD;
+    DominatorTree *DT;
     OwningPtr<PredIteratorCache> PredCache;
   public:
     MemoryDependenceAnalysis();
@@ -430,6 +431,9 @@ namespace llvm {
 
     void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P);
     
+    AliasAnalysis::ModRefResult
+    getModRefInfo(const Instruction *Inst, const AliasAnalysis::Location &Loc);
+
     /// verifyRemoved - Verify that the specified instruction does not occur
     /// in our internal data structures.
     void verifyRemoved(Instruction *Inst) const;
diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h
index 033efba3e742..ff9a24790a99 100644
--- a/include/llvm/Analysis/PHITransAddr.h
+++ b/include/llvm/Analysis/PHITransAddr.h
@@ -20,7 +20,8 @@
 namespace llvm {
   class DominatorTree;
   class TargetData;
-  
+  class TargetLibraryInfo;
+
 /// PHITransAddr - An address value which tracks and handles phi translation.
 /// As we walk "up" the CFG through predecessors, we need to ensure that the
 /// address we're tracking is kept up to date.  For example, if we're analyzing
@@ -37,11 +38,14 @@ class PHITransAddr {
   
   /// TD - The target data we are playing with if known, otherwise null.
   const TargetData *TD;
+
+  /// TLI - The target library info if known, otherwise null.
+  const TargetLibraryInfo *TLI;
   
   /// InstInputs - The inputs for our symbolic address.
   SmallVector<Instruction*, 4> InstInputs;
 public:
-  PHITransAddr(Value *addr, const TargetData *td) : Addr(addr), TD(td) {
+  PHITransAddr(Value *addr, const TargetData *td) : Addr(addr), TD(td), TLI(0) {
     // If the address is an instruction, the whole thing is considered an input.
     if (Instruction *I = dyn_cast<Instruction>(Addr))
       InstInputs.push_back(I);
diff --git a/include/llvm/Analysis/ProfileInfo.h b/include/llvm/Analysis/ProfileInfo.h
index 300a0279042c..6c2e2732d344 100644
--- a/include/llvm/Analysis/ProfileInfo.h
+++ b/include/llvm/Analysis/ProfileInfo.h
@@ -22,6 +22,7 @@
 #define LLVM_ANALYSIS_PROFILEINFO_H
 
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -85,13 +86,11 @@ namespace llvm {
 
     // getFunction() - Returns the Function for an Edge, checking for validity.
     static const FType* getFunction(Edge e) {
-      if (e.first) {
+      if (e.first)
         return e.first->getParent();
-      } else if (e.second) {
+      if (e.second)
         return e.second->getParent();
-      }
-      assert(0 && "Invalid ProfileInfo::Edge");
-      return (const FType*)0;
+      llvm_unreachable("Invalid ProfileInfo::Edge");
     }
 
     // getEdge() - Creates an Edge from two BasicBlocks.
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 9d8954595d61..b098eeaa3db8 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -681,7 +681,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RegionNode &Node) {
   if (Node.isSubRegion())
     return OS << Node.getNodeAs<Region>()->getNameStr();
   else
-    return OS << Node.getNodeAs<BasicBlock>()->getNameStr();
+    return OS << Node.getNodeAs<BasicBlock>()->getName();
 }
 } // End llvm namespace
 #endif
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 10d933e68f5b..72408f773840 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -41,6 +41,7 @@ namespace llvm {
   class Type;
   class ScalarEvolution;
   class TargetData;
+  class TargetLibraryInfo;
   class LLVMContext;
   class Loop;
   class LoopInfo;
@@ -118,6 +119,10 @@ namespace llvm {
     ///
     bool isAllOnesValue() const;
 
+    /// isNonConstantNegative - Return true if the specified scev is negated,
+    /// but not a constant.
+    bool isNonConstantNegative() const;
+
     /// print - Print out the internal representation of this scalar to the
     /// specified stream.  This should really only be used for debugging
     /// purposes.
@@ -135,7 +140,7 @@ namespace llvm {
       ID = X.FastID;
     }
     static bool Equals(const SCEV &X, const FoldingSetNodeID &ID,
-                       FoldingSetNodeID &TempID) {
+                       unsigned IDHash, FoldingSetNodeID &TempID) {
       return ID == X.FastID;
     }
     static unsigned ComputeHash(const SCEV &X, FoldingSetNodeID &TempID) {
@@ -224,6 +229,10 @@ namespace llvm {
     ///
     TargetData *TD;
 
+    /// TLI - The target library information for the target we are targeting.
+    ///
+    TargetLibraryInfo *TLI;
+
     /// DT - The dominator tree.
     ///
     DominatorTree *DT;
@@ -721,16 +730,21 @@ namespace llvm {
                                      const SCEV *LHS, const SCEV *RHS);
 
     /// getSmallConstantTripCount - Returns the maximum trip count of this loop
-    /// as a normal unsigned value, if possible. Returns 0 if the trip count is
-    /// unknown or not constant.
-    unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitBlock);
+    /// as a normal unsigned value. Returns 0 if the trip count is unknown or
+    /// not constant. This "trip count" assumes that control exits via
+    /// ExitingBlock. More precisely, it is the number of times that control may
+    /// reach ExitingBlock before taking the branch. For loops with multiple
+    /// exits, it may not be the number times that the loop header executes if
+    /// the loop exits prematurely via another branch.
+    unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock);
 
     /// getSmallConstantTripMultiple - Returns the largest constant divisor of
     /// the trip count of this loop as a normal unsigned value, if
     /// possible. This means that the actual trip count is always a multiple of
     /// the returned value (don't forget the trip count could very well be zero
-    /// as well!).
-    unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitBlock);
+    /// as well!). As explained in the comments for getSmallConstantTripCount,
+    /// this assumes that control exits the loop via ExitingBlock.
+    unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock);
 
     // getExitCount - Get the expression for the number of loop iterations for
     // which this loop is guaranteed not to exit via ExitingBlock. Otherwise
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index a4ad1451d412..c22fc3ab74b7 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -22,6 +22,8 @@
 #include <set>
 
 namespace llvm {
+  class TargetLowering;
+
   /// SCEVExpander - This class uses information about analyze scalars to
   /// rewrite expressions in canonical form.
   ///
@@ -58,6 +60,9 @@ namespace llvm {
     /// insert the IV increment at this position.
     Instruction *IVIncInsertPos;
 
+    /// Phis that complete an IV chain. Reuse
+    std::set<AssertingVH<PHINode> > ChainedPhis;
+
     /// CanonicalMode - When true, expressions are expanded in "canonical"
     /// form. In particular, addrecs are expanded as arithmetic based on
     /// a canonical induction variable. When false, expression are expanded
@@ -100,6 +105,7 @@ namespace llvm {
       InsertedExpressions.clear();
       InsertedValues.clear();
       InsertedPostIncValues.clear();
+      ChainedPhis.clear();
     }
 
     /// getOrInsertCanonicalInductionVariable - This method returns the
@@ -108,14 +114,18 @@ namespace llvm {
     /// starts at zero and steps by one on each iteration.
     PHINode *getOrInsertCanonicalInductionVariable(const Loop *L, Type *Ty);
 
-    /// hoistStep - Utility for hoisting an IV increment.
-    static bool hoistStep(Instruction *IncV, Instruction *InsertPos,
-                          const DominatorTree *DT);
+    /// getIVIncOperand - Return the induction variable increment's IV operand.
+    Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos,
+                                 bool allowScale);
+
+    /// hoistIVInc - Utility for hoisting an IV increment.
+    bool hoistIVInc(Instruction *IncV, Instruction *InsertPos);
 
     /// replaceCongruentIVs - replace congruent phis with their most canonical
     /// representative. Return the number of phis eliminated.
     unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
-                                 SmallVectorImpl<WeakVH> &DeadInsts);
+                                 SmallVectorImpl<WeakVH> &DeadInsts,
+                                 const TargetLowering *TLI = NULL);
 
     /// expandCodeFor - Insert code to directly compute the specified SCEV
     /// expression into the program.  The inserted code is inserted into the
@@ -161,6 +171,16 @@ namespace llvm {
     void clearInsertPoint() {
       Builder.ClearInsertionPoint();
     }
+
+    /// isInsertedInstruction - Return true if the specified instruction was
+    /// inserted by the code rewriter.  If so, the client should not modify the
+    /// instruction.
+    bool isInsertedInstruction(Instruction *I) const {
+      return InsertedValues.count(I) || InsertedPostIncValues.count(I);
+    }
+
+    void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); }
+
   private:
     LLVMContext &getContext() const { return SE.getContext(); }
 
@@ -195,13 +215,6 @@ namespace llvm {
     /// result will be expanded to have that type, with a cast if necessary.
     Value *expandCodeFor(const SCEV *SH, Type *Ty = 0);
 
-    /// isInsertedInstruction - Return true if the specified instruction was
-    /// inserted by the code rewriter.  If so, the client should not modify the
-    /// instruction.
-    bool isInsertedInstruction(Instruction *I) const {
-      return InsertedValues.count(I) || InsertedPostIncValues.count(I);
-    }
-
     /// getRelevantLoop - Determine the most "relevant" loop for the given SCEV.
     const Loop *getRelevantLoop(const SCEV *);
 
@@ -244,6 +257,8 @@ namespace llvm {
                                        const Loop *L,
                                        Type *ExpandTy,
                                        Type *IntTy);
+    Value *expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
+                       Type *ExpandTy, Type *IntTy, bool useSubtract);
   };
 }
 
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index b6f0ae54cfa0..47b371029186 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -491,7 +491,6 @@ namespace llvm {
 
     RetVal visitCouldNotCompute(const SCEVCouldNotCompute *S) {
       llvm_unreachable("Invalid use of SCEVCouldNotCompute!");
-      return RetVal();
     }
   };
 }
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index 68263300c726..f2f9db4ce4e8 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -17,15 +17,15 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/DataTypes.h"
-#include <string>
 
 namespace llvm {
-  template <typename T> class SmallVectorImpl;
   class Value;
   class Instruction;
   class APInt;
   class TargetData;
-  
+  class StringRef;
+  class MDNode;
+
   /// ComputeMaskedBits - Determine which of the bits specified in Mask are
   /// known to be either zero or one and return them in the KnownZero/KnownOne
   /// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
@@ -36,10 +36,10 @@ namespace llvm {
   /// where V is a vector, the mask, known zero, and known one values are the
   /// same width as the vector element, and the bit is set only if it is true
   /// for all of the elements in the vector.
-  void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
-                         APInt &KnownOne, const TargetData *TD = 0,
-                         unsigned Depth = 0);
-  
+  void ComputeMaskedBits(Value *V,  APInt &KnownZero, APInt &KnownOne,
+                         const TargetData *TD = 0, unsigned Depth = 0);
+  void computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero);
+
   /// ComputeSignBit - Determine whether the sign bit is known to be zero or
   /// one.  Convenience wrapper around ComputeMaskedBits.
   void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
@@ -48,8 +48,10 @@ namespace llvm {
   /// isPowerOfTwo - Return true if the given value is known to have exactly one
   /// bit set when defined. For vectors return true if every element is known to
   /// be a power of two when defined.  Supports values with integer or pointer
-  /// type and vectors of integers.
-  bool isPowerOfTwo(Value *V, const TargetData *TD = 0, unsigned Depth = 0);
+  /// type and vectors of integers.  If 'OrZero' is set then returns true if the
+  /// given value is either a power of two or zero.
+  bool isPowerOfTwo(Value *V, const TargetData *TD = 0, bool OrZero = false,
+                    unsigned Depth = 0);
 
   /// isKnownNonZero - Return true if the given value is known to be non-zero
   /// when defined.  For vectors return true if every element is known to be
@@ -123,16 +125,15 @@ namespace llvm {
     return GetPointerBaseWithConstantOffset(const_cast<Value*>(Ptr), Offset,TD);
   }
   
-  /// GetConstantStringInfo - This function computes the length of a
+  /// getConstantStringInfo - This function computes the length of a
   /// null-terminated C string pointed to by V.  If successful, it returns true
-  /// and returns the string in Str.  If unsuccessful, it returns false.  If
-  /// StopAtNul is set to true (the default), the returned string is truncated
-  /// by a nul character in the global.  If StopAtNul is false, the nul
-  /// character is included in the result string.
-  bool GetConstantStringInfo(const Value *V, std::string &Str,
-                             uint64_t Offset = 0,
-                             bool StopAtNul = true);
-                        
+  /// and returns the string in Str.  If unsuccessful, it returns false.  This
+  /// does not include the trailing nul character by default.  If TrimAtNul is
+  /// set to false, then this returns any trailing nul characters as well as any
+  /// other characters that come after it.
+  bool getConstantStringInfo(const Value *V, StringRef &Str,
+                             uint64_t Offset = 0, bool TrimAtNul = true);
+
   /// GetStringLength - If we can compute the length of the string pointed to by
   /// the specified pointer, return 'len+1'.  If we can't, return 0.
   uint64_t GetStringLength(Value *V);
@@ -154,6 +155,27 @@ namespace llvm {
   /// are lifetime markers.
   bool onlyUsedByLifetimeMarkers(const Value *V);
 
+  /// isSafeToSpeculativelyExecute - Return true if the instruction does not
+  /// have any effects besides calculating the result and does not have
+  /// undefined behavior.
+  ///
+  /// This method never returns true for an instruction that returns true for
+  /// mayHaveSideEffects; however, this method also does some other checks in
+  /// addition. It checks for undefined behavior, like dividing by zero or
+  /// loading from an invalid pointer (but not for undefined results, like a
+  /// shift with a shift amount larger than the width of the result). It checks
+  /// for malloc and alloca because speculatively executing them might cause a
+  /// memory leak. It also returns false for instructions related to control
+  /// flow, specifically terminators and PHI nodes.
+  ///
+  /// This method only looks at the instruction itself and its operands, so if
+  /// this method returns true, it is safe to move the instruction as long as
+  /// the correct dominance relationships for the operands and users hold.
+  /// However, this method can return true for instructions that read memory;
+  /// for such instructions, moving them may change the resulting value.
+  bool isSafeToSpeculativelyExecute(const Value *V,
+                                    const TargetData *TD = 0);
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Argument.h b/include/llvm/Argument.h
index cd7488266231..e66075c1f235 100644
--- a/include/llvm/Argument.h
+++ b/include/llvm/Argument.h
@@ -30,6 +30,7 @@ template<typename ValueSubClass, typename ItemParentClass>
 /// the function was called with.
 /// @brief LLVM Argument representation  
 class Argument : public Value, public ilist_node<Argument> {
+  virtual void anchor();
   Function *Parent;
 
   friend class SymbolTableListTraits<Argument, Function>;
diff --git a/include/llvm/Assembly/AssemblyAnnotationWriter.h b/include/llvm/Assembly/AssemblyAnnotationWriter.h
index 3a65f97a5b50..37b47c31e8c7 100644
--- a/include/llvm/Assembly/AssemblyAnnotationWriter.h
+++ b/include/llvm/Assembly/AssemblyAnnotationWriter.h
@@ -22,7 +22,7 @@ namespace llvm {
 class Function;
 class BasicBlock;
 class Instruction;
-class raw_ostream;
+class Value;
 class formatted_raw_ostream;
 
 class AssemblyAnnotationWriter {
@@ -32,30 +32,30 @@ public:
 
   /// emitFunctionAnnot - This may be implemented to emit a string right before
   /// the start of a function.
-  virtual void emitFunctionAnnot(const Function *F,
-                                 formatted_raw_ostream &OS) {}
+  virtual void emitFunctionAnnot(const Function *,
+                                 formatted_raw_ostream &) {}
 
   /// emitBasicBlockStartAnnot - This may be implemented to emit a string right
   /// after the basic block label, but before the first instruction in the
   /// block.
-  virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
-                                        formatted_raw_ostream &OS) {
+  virtual void emitBasicBlockStartAnnot(const BasicBlock *,
+                                        formatted_raw_ostream &) {
   }
 
   /// emitBasicBlockEndAnnot - This may be implemented to emit a string right
   /// after the basic block.
-  virtual void emitBasicBlockEndAnnot(const BasicBlock *BB,
-                                      formatted_raw_ostream &OS) {
+  virtual void emitBasicBlockEndAnnot(const BasicBlock *,
+                                      formatted_raw_ostream &) {
   }
 
   /// emitInstructionAnnot - This may be implemented to emit a string right
   /// before an instruction is emitted.
-  virtual void emitInstructionAnnot(const Instruction *I, 
-                                    formatted_raw_ostream &OS) {}
+  virtual void emitInstructionAnnot(const Instruction *, 
+                                    formatted_raw_ostream &) {}
 
   /// printInfoComment - This may be implemented to emit a comment to the
   /// right of an instruction or global value.
-  virtual void printInfoComment(const Value &V, formatted_raw_ostream &OS) {}
+  virtual void printInfoComment(const Value &, formatted_raw_ostream &) {}
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Assembly/Parser.h b/include/llvm/Assembly/Parser.h
index 82ec6d81367b..b971c531ae05 100644
--- a/include/llvm/Assembly/Parser.h
+++ b/include/llvm/Assembly/Parser.h
@@ -21,7 +21,6 @@ namespace llvm {
 class Module;
 class MemoryBuffer;
 class SMDiagnostic;
-class raw_ostream;
 class LLVMContext;
 
 /// This function is the main interface to the LLVM Assembly Parser. It parses
diff --git a/include/llvm/Assembly/Writer.h b/include/llvm/Assembly/Writer.h
index 8d8befd472d8..6b89ae022da3 100644
--- a/include/llvm/Assembly/Writer.h
+++ b/include/llvm/Assembly/Writer.h
@@ -19,7 +19,6 @@
 
 namespace llvm {
 
-class Type;
 class Module;
 class Value;
 class raw_ostream;
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 2d7b33b29bcf..0099f173b626 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -22,8 +22,66 @@
 namespace llvm {
 class Type;
 
+namespace Attribute {
+/// We use this proxy POD type to allow constructing Attributes constants
+/// using initializer lists. Do not use this class directly.
+struct AttrConst {
+  uint64_t v;
+  AttrConst operator | (const AttrConst Attrs) const {
+    AttrConst Res = {v | Attrs.v};
+    return Res;
+  }
+  AttrConst operator ~ () const {
+    AttrConst Res = {~v};
+    return Res;
+  }
+};
+}  // namespace Attribute
+
+
 /// Attributes - A bitset of attributes.
-typedef unsigned Attributes;
+class Attributes {
+ public:
+  Attributes() : Bits(0) { }
+  explicit Attributes(uint64_t Val) : Bits(Val) { }
+  /*implicit*/ Attributes(Attribute::AttrConst Val) : Bits(Val.v) { }
+  Attributes(const Attributes &Attrs) : Bits(Attrs.Bits) { }
+  // This is a "safe bool() operator".
+  operator const void *() const { return Bits ? this : 0; }
+  bool isEmptyOrSingleton() const { return (Bits & (Bits - 1)) == 0; }
+  Attributes &operator = (const Attributes &Attrs) {
+    Bits = Attrs.Bits;
+    return *this;
+  }
+  bool operator == (const Attributes &Attrs) const {
+    return Bits == Attrs.Bits;
+  }
+  bool operator != (const Attributes &Attrs) const {
+    return Bits != Attrs.Bits;
+  }
+  Attributes operator | (const Attributes &Attrs) const {
+    return Attributes(Bits | Attrs.Bits);
+  }
+  Attributes operator & (const Attributes &Attrs) const {
+    return Attributes(Bits & Attrs.Bits);
+  }
+  Attributes operator ^ (const Attributes &Attrs) const {
+    return Attributes(Bits ^ Attrs.Bits);
+  }
+  Attributes &operator |= (const Attributes &Attrs) {
+    Bits |= Attrs.Bits;
+    return *this;
+  }
+  Attributes &operator &= (const Attributes &Attrs) {
+    Bits &= Attrs.Bits;
+    return *this;
+  }
+  Attributes operator ~ () const { return Attributes(~Bits); }
+  uint64_t Raw() const { return Bits; }
+ private:
+  // Currently, we need less than 64 bits.
+  uint64_t Bits;
+};
 
 namespace Attribute {
 
@@ -33,44 +91,55 @@ namespace Attribute {
 /// results or the function itself.
 /// @brief Function attributes.
 
-const Attributes None      = 0;     ///< No attributes have been set
-const Attributes ZExt      = 1<<0;  ///< Zero extended before/after call
-const Attributes SExt      = 1<<1;  ///< Sign extended before/after call
-const Attributes NoReturn  = 1<<2;  ///< Mark the function as not returning
-const Attributes InReg     = 1<<3;  ///< Force argument to be passed in register
-const Attributes StructRet = 1<<4;  ///< Hidden pointer to structure to return
-const Attributes NoUnwind  = 1<<5;  ///< Function doesn't unwind stack
-const Attributes NoAlias   = 1<<6;  ///< Considered to not alias after call
-const Attributes ByVal     = 1<<7;  ///< Pass structure by value
-const Attributes Nest      = 1<<8;  ///< Nested function static chain
-const Attributes ReadNone  = 1<<9;  ///< Function does not access memory
-const Attributes ReadOnly  = 1<<10; ///< Function only reads from memory
-const Attributes NoInline        = 1<<11; ///< inline=never
-const Attributes AlwaysInline    = 1<<12; ///< inline=always
-const Attributes OptimizeForSize = 1<<13; ///< opt_size
-const Attributes StackProtect    = 1<<14; ///< Stack protection.
-const Attributes StackProtectReq = 1<<15; ///< Stack protection required.
-const Attributes Alignment = 31<<16; ///< Alignment of parameter (5 bits)
+// We declare AttrConst objects that will be used throughout the code
+// and also raw uint64_t objects with _i suffix to be used below for other
+// constant declarations. This is done to avoid static CTORs and at the same
+// time to keep type-safety of Attributes.
+#define DECLARE_LLVM_ATTRIBUTE(name, value) \
+  const uint64_t name##_i = value; \
+  const AttrConst name = {value};
+
+DECLARE_LLVM_ATTRIBUTE(None,0)    ///< No attributes have been set
+DECLARE_LLVM_ATTRIBUTE(ZExt,1<<0) ///< Zero extended before/after call
+DECLARE_LLVM_ATTRIBUTE(SExt,1<<1) ///< Sign extended before/after call
+DECLARE_LLVM_ATTRIBUTE(NoReturn,1<<2) ///< Mark the function as not returning
+DECLARE_LLVM_ATTRIBUTE(InReg,1<<3) ///< Force argument to be passed in register
+DECLARE_LLVM_ATTRIBUTE(StructRet,1<<4) ///< Hidden pointer to structure to return
+DECLARE_LLVM_ATTRIBUTE(NoUnwind,1<<5) ///< Function doesn't unwind stack
+DECLARE_LLVM_ATTRIBUTE(NoAlias,1<<6) ///< Considered to not alias after call
+DECLARE_LLVM_ATTRIBUTE(ByVal,1<<7) ///< Pass structure by value
+DECLARE_LLVM_ATTRIBUTE(Nest,1<<8) ///< Nested function static chain
+DECLARE_LLVM_ATTRIBUTE(ReadNone,1<<9) ///< Function does not access memory
+DECLARE_LLVM_ATTRIBUTE(ReadOnly,1<<10) ///< Function only reads from memory
+DECLARE_LLVM_ATTRIBUTE(NoInline,1<<11) ///< inline=never
+DECLARE_LLVM_ATTRIBUTE(AlwaysInline,1<<12) ///< inline=always
+DECLARE_LLVM_ATTRIBUTE(OptimizeForSize,1<<13) ///< opt_size
+DECLARE_LLVM_ATTRIBUTE(StackProtect,1<<14) ///< Stack protection.
+DECLARE_LLVM_ATTRIBUTE(StackProtectReq,1<<15) ///< Stack protection required.
+DECLARE_LLVM_ATTRIBUTE(Alignment,31<<16) ///< Alignment of parameter (5 bits)
                                      // stored as log2 of alignment with +1 bias
                                      // 0 means unaligned different from align 1
-const Attributes NoCapture = 1<<21; ///< Function creates no aliases of pointer
-const Attributes NoRedZone = 1<<22; /// disable redzone
-const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point
-                                          /// instructions.
-const Attributes Naked           = 1<<24; ///< Naked function
-const Attributes InlineHint      = 1<<25; ///< source said inlining was
-                                          ///desirable
-const Attributes StackAlignment  = 7<<26; ///< Alignment of stack for
-                                          ///function (3 bits) stored as log2
-                                          ///of alignment with +1 bias
-                                          ///0 means unaligned (different from
-                                          ///alignstack(1))
-const Attributes ReturnsTwice    = 1<<29; ///< Function can return twice
-const Attributes UWTable     = 1<<30;     ///< Function must be in a unwind
-                                          ///table
-const Attributes NonLazyBind = 1U<<31;    ///< Function is called early and/or
-                                          ///  often, so lazy binding isn't
-                                          ///  worthwhile.
+DECLARE_LLVM_ATTRIBUTE(NoCapture,1<<21) ///< Function creates no aliases of pointer
+DECLARE_LLVM_ATTRIBUTE(NoRedZone,1<<22) /// disable redzone
+DECLARE_LLVM_ATTRIBUTE(NoImplicitFloat,1<<23) /// disable implicit floating point
+                                           /// instructions.
+DECLARE_LLVM_ATTRIBUTE(Naked,1<<24) ///< Naked function
+DECLARE_LLVM_ATTRIBUTE(InlineHint,1<<25) ///< source said inlining was
+                                           ///desirable
+DECLARE_LLVM_ATTRIBUTE(StackAlignment,7<<26) ///< Alignment of stack for
+                                           ///function (3 bits) stored as log2
+                                           ///of alignment with +1 bias
+                                           ///0 means unaligned (different from
+                                           ///alignstack= {1))
+DECLARE_LLVM_ATTRIBUTE(ReturnsTwice,1<<29) ///< Function can return twice
+DECLARE_LLVM_ATTRIBUTE(UWTable,1<<30) ///< Function must be in a unwind
+                                           ///table
+DECLARE_LLVM_ATTRIBUTE(NonLazyBind,1U<<31) ///< Function is called early and/or
+                                            /// often, so lazy binding isn't
+                                            /// worthwhile.
+DECLARE_LLVM_ATTRIBUTE(AddressSafety,1ULL<<32) ///< Address safety checking is on.
+
+#undef DECLARE_LLVM_ATTRIBUTE
 
 /// Note that uwtable is about the ABI or the user mandating an entry in the
 /// unwind table. The nounwind attribute is about an exception passing by the
@@ -85,24 +154,26 @@ const Attributes NonLazyBind = 1U<<31;    ///< Function is called early and/or
 /// uwtable + nounwind = Needs an entry because the ABI says so.
 
 /// @brief Attributes that only apply to function parameters.
-const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
+const AttrConst ParameterOnly = {ByVal_i | Nest_i |
+    StructRet_i | NoCapture_i};
 
 /// @brief Attributes that may be applied to the function itself.  These cannot
 /// be used on return values or function parameters.
-const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly |
-  NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
-  NoRedZone | NoImplicitFloat | Naked | InlineHint | StackAlignment |
-  UWTable | NonLazyBind | ReturnsTwice;
+const AttrConst FunctionOnly = {NoReturn_i | NoUnwind_i | ReadNone_i |
+  ReadOnly_i | NoInline_i | AlwaysInline_i | OptimizeForSize_i |
+  StackProtect_i | StackProtectReq_i | NoRedZone_i | NoImplicitFloat_i |
+  Naked_i | InlineHint_i | StackAlignment_i |
+  UWTable_i | NonLazyBind_i | ReturnsTwice_i | AddressSafety_i};
 
 /// @brief Parameter attributes that do not apply to vararg call arguments.
-const Attributes VarArgsIncompatible = StructRet;
+const AttrConst VarArgsIncompatible = {StructRet_i};
 
 /// @brief Attributes that are mutually incompatible.
-const Attributes MutuallyIncompatible[4] = {
-  ByVal | InReg | Nest | StructRet,
-  ZExt  | SExt,
-  ReadNone | ReadOnly,
-  NoInline | AlwaysInline
+const AttrConst MutuallyIncompatible[4] = {
+  {ByVal_i | InReg_i | Nest_i | StructRet_i},
+  {ZExt_i  | SExt_i},
+  {ReadNone_i | ReadOnly_i},
+  {NoInline_i | AlwaysInline_i}
 };
 
 /// @brief Which attributes cannot be applied to a type.
@@ -113,20 +184,20 @@ Attributes typeIncompatible(Type *Ty);
 inline Attributes constructAlignmentFromInt(unsigned i) {
   // Default alignment, allow the target to define how to align it.
   if (i == 0)
-    return 0;
+    return None;
 
   assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
   assert(i <= 0x40000000 && "Alignment too large.");
-  return (Log2_32(i)+1) << 16;
+  return Attributes((Log2_32(i)+1) << 16);
 }
 
 /// This returns the alignment field of an attribute as a byte alignment value.
 inline unsigned getAlignmentFromAttrs(Attributes A) {
   Attributes Align = A & Attribute::Alignment;
-  if (Align == 0)
+  if (!Align)
     return 0;
 
-  return 1U << ((Align >> 16) - 1);
+  return 1U << ((Align.Raw() >> 16) - 1);
 }
 
 /// This turns an int stack alignment (which must be a power of 2) into
@@ -134,21 +205,21 @@ inline unsigned getAlignmentFromAttrs(Attributes A) {
 inline Attributes constructStackAlignmentFromInt(unsigned i) {
   // Default alignment, allow the target to define how to align it.
   if (i == 0)
-    return 0;
+    return None;
 
   assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
   assert(i <= 0x100 && "Alignment too large.");
-  return (Log2_32(i)+1) << 26;
+  return Attributes((Log2_32(i)+1) << 26);
 }
 
 /// This returns the stack alignment field of an attribute as a byte alignment
 /// value.
 inline unsigned getStackAlignmentFromAttrs(Attributes A) {
   Attributes StackAlign = A & Attribute::StackAlignment;
-  if (StackAlign == 0)
+  if (!StackAlign)
     return 0;
 
-  return 1U << ((StackAlign >> 26) - 1);
+  return 1U << ((StackAlign.Raw() >> 26) - 1);
 }
 
 
@@ -242,7 +313,7 @@ public:
   /// paramHasAttr - Return true if the specified parameter index has the
   /// specified attribute set.
   bool paramHasAttr(unsigned Idx, Attributes Attr) const {
-    return (getAttributes(Idx) & Attr) != 0;
+    return getAttributes(Idx) & Attr;
   }
 
   /// getParamAlignment - Return the alignment for the specified function
diff --git a/include/llvm/AutoUpgrade.h b/include/llvm/AutoUpgrade.h
index 8ca3548f533e..e13c4c12b0f4 100644
--- a/include/llvm/AutoUpgrade.h
+++ b/include/llvm/AutoUpgrade.h
@@ -39,14 +39,6 @@ namespace llvm {
   /// This checks for global variables which should be upgraded. It returns true
   /// if it requires upgrading.
   bool UpgradeGlobalVariable(GlobalVariable *GV);
-
-  /// This function checks debug info intrinsics. If an intrinsic is invalid
-  /// then this function simply removes the intrinsic. 
-  void CheckDebugInfoIntrinsics(Module *M);
-  
-  /// This function upgrades the old pre-3.0 exception handling system to the
-  /// new one. N.B. This will be removed in 3.1.
-  void UpgradeExceptionHandling(Module *M);
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
index 1cd8dc55ab58..d2aa1673d921 100644
--- a/include/llvm/BasicBlock.h
+++ b/include/llvm/BasicBlock.h
@@ -110,12 +110,6 @@ public:
   const Function *getParent() const { return Parent; }
         Function *getParent()       { return Parent; }
 
-  /// use_back - Specialize the methods defined in Value, as we know that an
-  /// BasicBlock can only be used by Users (specifically terminators
-  /// and BlockAddress's).
-  User       *use_back()       { return cast<User>(*use_begin());}
-  const User *use_back() const { return cast<User>(*use_begin());}
-
   /// getTerminator() - If this is a well formed basic block, then this returns
   /// a pointer to the terminator instruction.  If it is not, then you get a
   /// null pointer back.
@@ -274,6 +268,7 @@ public:
   /// getLandingPadInst() - Return the landingpad instruction associated with
   /// the landing pad.
   LandingPadInst *getLandingPadInst();
+  const LandingPadInst *getLandingPadInst() const;
 
 private:
   /// AdjustBlockAddressRefCount - BasicBlock stores the number of BlockAddress
diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h
index f89a86cb0f77..86c44c7f150b 100644
--- a/include/llvm/Bitcode/Archive.h
+++ b/include/llvm/Bitcode/Archive.h
@@ -394,7 +394,7 @@ class Archive {
     /// @brief Look up multiple symbols in the archive.
     bool findModulesDefiningSymbols(
       std::set<std::string>& symbols,     ///< Symbols to be sought
-      std::set<Module*>& modules,         ///< The modules matching \p symbols
+      SmallVectorImpl<Module*>& modules,  ///< The modules matching \p symbols
       std::string* ErrMessage             ///< Error msg storage, if non-zero
     );
 
diff --git a/include/llvm/Bitcode/BitCodes.h b/include/llvm/Bitcode/BitCodes.h
index 449dc35d7de2..28e1ab1c8711 100644
--- a/include/llvm/Bitcode/BitCodes.h
+++ b/include/llvm/Bitcode/BitCodes.h
@@ -20,6 +20,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 
 namespace llvm {
@@ -114,7 +115,6 @@ public:
   bool hasEncodingData() const { return hasEncodingData(getEncoding()); }
   static bool hasEncodingData(Encoding E) {
     switch (E) {
-    default: assert(0 && "Unknown encoding");
     case Fixed:
     case VBR:
       return true;
@@ -123,6 +123,7 @@ public:
     case Blob:
       return false;
     }
+    llvm_unreachable("Invalid encoding");
   }
 
   /// isChar6 - Return true if this character is legal in the Char6 encoding.
@@ -139,8 +140,7 @@ public:
     if (C >= '0' && C <= '9') return C-'0'+26+26;
     if (C == '.') return 62;
     if (C == '_') return 63;
-    assert(0 && "Not a value Char6 character!");
-    return 0;
+    llvm_unreachable("Not a value Char6 character!");
   }
 
   static char DecodeChar6(unsigned V) {
@@ -150,17 +150,18 @@ public:
     if (V < 26+26+10) return V-26-26+'0';
     if (V == 62) return '.';
     if (V == 63) return '_';
-    assert(0 && "Not a value Char6 character!");
-    return ' ';
+    llvm_unreachable("Not a value Char6 character!");
   }
 
 };
 
+template <> struct isPodLike<BitCodeAbbrevOp> { static const bool value=true; };
+
 /// BitCodeAbbrev - This class represents an abbreviation record.  An
 /// abbreviation allows a complex record that has redundancy to be stored in a
 /// specialized format instead of the fully-general, fully-vbr, format.
 class BitCodeAbbrev {
-  SmallVector<BitCodeAbbrevOp, 8> OperandList;
+  SmallVector<BitCodeAbbrevOp, 32> OperandList;
   unsigned char RefCount; // Number of things using this.
   ~BitCodeAbbrev() {}
 public:
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 0437f53134dc..65868294403c 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -15,7 +15,10 @@
 #ifndef BITSTREAM_READER_H
 #define BITSTREAM_READER_H
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Bitcode/BitCodes.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/StreamableMemoryObject.h"
 #include <climits>
 #include <string>
 #include <vector>
@@ -36,9 +39,7 @@ public:
     std::vector<std::pair<unsigned, std::string> > RecordNames;
   };
 private:
-  /// FirstChar/LastChar - This remembers the first and last bytes of the
-  /// stream.
-  const unsigned char *FirstChar, *LastChar;
+  OwningPtr<StreamableMemoryObject> BitcodeBytes;
   
   std::vector<BlockInfo> BlockInfoRecords;
 
@@ -47,10 +48,10 @@ private:
   /// uses this.
   bool IgnoreBlockInfoNames;
   
-  BitstreamReader(const BitstreamReader&);  // NOT IMPLEMENTED
-  void operator=(const BitstreamReader&);  // NOT IMPLEMENTED
+  BitstreamReader(const BitstreamReader&);  // DO NOT IMPLEMENT
+  void operator=(const BitstreamReader&);  // DO NOT IMPLEMENT
 public:
-  BitstreamReader() : FirstChar(0), LastChar(0), IgnoreBlockInfoNames(true) {
+  BitstreamReader() : IgnoreBlockInfoNames(true) {
   }
 
   BitstreamReader(const unsigned char *Start, const unsigned char *End) {
@@ -58,12 +59,17 @@ public:
     init(Start, End);
   }
 
+  BitstreamReader(StreamableMemoryObject *bytes) {
+    BitcodeBytes.reset(bytes);
+  }
+
   void init(const unsigned char *Start, const unsigned char *End) {
-    FirstChar = Start;
-    LastChar = End;
     assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+    BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
   }
 
+  StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
+
   ~BitstreamReader() {
     // Free the BlockInfoRecords.
     while (!BlockInfoRecords.empty()) {
@@ -75,9 +81,6 @@ public:
       BlockInfoRecords.pop_back();
     }
   }
-  
-  const unsigned char *getFirstChar() const { return FirstChar; }
-  const unsigned char *getLastChar() const { return LastChar; }
 
   /// CollectBlockInfoNames - This is called by clients that want block/record
   /// name information.
@@ -122,7 +125,7 @@ public:
 class BitstreamCursor {
   friend class Deserializer;
   BitstreamReader *BitStream;
-  const unsigned char *NextChar;
+  size_t NextChar;
   
   /// CurWord - This is the current data we have pulled from the stream but have
   /// not returned to the client.
@@ -156,8 +159,7 @@ public:
   }
   
   explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
-    NextChar = R.getFirstChar();
-    assert(NextChar && "Bitstream not initialized yet");
+    NextChar = 0;
     CurWord = 0;
     BitsInCurWord = 0;
     CurCodeSize = 2;
@@ -167,8 +169,7 @@ public:
     freeState();
     
     BitStream = &R;
-    NextChar = R.getFirstChar();
-    assert(NextChar && "Bitstream not initialized yet");
+    NextChar = 0;
     CurWord = 0;
     BitsInCurWord = 0;
     CurCodeSize = 2;
@@ -225,13 +226,39 @@ public:
   /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
   unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
   
-  bool AtEndOfStream() const {
-    return NextChar == BitStream->getLastChar() && BitsInCurWord == 0;
+  bool isEndPos(size_t pos) {
+    return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
+  }
+
+  bool canSkipToPos(size_t pos) const {
+    // pos can be skipped to if it is a valid address or one byte past the end.
+    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
+        static_cast<uint64_t>(pos - 1));
+  }
+
+  unsigned char getByte(size_t pos) {
+    uint8_t byte = -1;
+    BitStream->getBitcodeBytes().readByte(pos, &byte);
+    return byte;
+  }
+
+  uint32_t getWord(size_t pos) {
+    uint8_t buf[sizeof(uint32_t)];
+    memset(buf, 0xFF, sizeof(buf));
+    BitStream->getBitcodeBytes().readBytes(pos,
+                                           sizeof(buf),
+                                           buf,
+                                           NULL);
+    return *reinterpret_cast<support::ulittle32_t *>(buf);
+  }
+
+  bool AtEndOfStream() {
+    return isEndPos(NextChar) && BitsInCurWord == 0;
   }
   
   /// GetCurrentBitNo - Return the bit # of the bit we are reading.
   uint64_t GetCurrentBitNo() const {
-    return (NextChar-BitStream->getFirstChar())*CHAR_BIT - BitsInCurWord;
+    return NextChar*CHAR_BIT - BitsInCurWord;
   }
   
   BitstreamReader *getBitStreamReader() {
@@ -246,12 +273,10 @@ public:
   void JumpToBit(uint64_t BitNo) {
     uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
     uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
-    assert(ByteNo <= (uintptr_t)(BitStream->getLastChar()-
-                                 BitStream->getFirstChar()) &&
-           "Invalid location");
+    assert(canSkipToPos(ByteNo) && "Invalid location");
     
     // Move the cursor to the right word.
-    NextChar = BitStream->getFirstChar()+ByteNo;
+    NextChar = ByteNo;
     BitsInCurWord = 0;
     CurWord = 0;
     
@@ -272,7 +297,7 @@ public:
     }
 
     // If we run out of data, stop at the end of the stream.
-    if (NextChar == BitStream->getLastChar()) {
+    if (isEndPos(NextChar)) {
       CurWord = 0;
       BitsInCurWord = 0;
       return 0;
@@ -281,8 +306,7 @@ public:
     unsigned R = CurWord;
 
     // Read the next word from the stream.
-    CurWord = (NextChar[0] <<  0) | (NextChar[1] << 8) |
-              (NextChar[2] << 16) | (NextChar[3] << 24);
+    CurWord = getWord(NextChar);
     NextChar += 4;
 
     // Extract NumBits-BitsInCurWord from what we just read.
@@ -376,9 +400,8 @@ public:
 
     // Check that the block wasn't partially defined, and that the offset isn't
     // bogus.
-    const unsigned char *const SkipTo = NextChar + NumWords*4;
-    if (AtEndOfStream() || SkipTo > BitStream->getLastChar() ||
-                           SkipTo < BitStream->getFirstChar())
+    size_t SkipTo = NextChar + NumWords*4;
+    if (AtEndOfStream() || !canSkipToPos(SkipTo))
       return true;
 
     NextChar = SkipTo;
@@ -409,8 +432,7 @@ public:
     if (NumWordsP) *NumWordsP = NumWords;
 
     // Validate that this block is sane.
-    if (CurCodeSize == 0 || AtEndOfStream() ||
-        NextChar+NumWords*4 > BitStream->getLastChar())
+    if (CurCodeSize == 0 || AtEndOfStream())
       return true;
 
     return false;
@@ -455,10 +477,10 @@ private:
   void ReadAbbreviatedField(const BitCodeAbbrevOp &Op,
                             SmallVectorImpl<uint64_t> &Vals) {
     assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
-    
+
     // Decode the value as we are commanded.
     switch (Op.getEncoding()) {
-    default: assert(0 && "Unknown encoding!");
+    default: llvm_unreachable("Unknown encoding!");
     case BitCodeAbbrevOp::Fixed:
       Vals.push_back(Read((unsigned)Op.getEncodingData()));
       break;
@@ -512,24 +534,25 @@ public:
         SkipToWord();  // 32-bit alignment
 
         // Figure out where the end of this blob will be including tail padding.
-        const unsigned char *NewEnd = NextChar+((NumElts+3)&~3);
+        size_t NewEnd = NextChar+((NumElts+3)&~3);
         
         // If this would read off the end of the bitcode file, just set the
         // record to empty and return.
-        if (NewEnd > BitStream->getLastChar()) {
+        if (!canSkipToPos(NewEnd)) {
           Vals.append(NumElts, 0);
-          NextChar = BitStream->getLastChar();
+          NextChar = BitStream->getBitcodeBytes().getExtent();
           break;
         }
         
         // Otherwise, read the number of bytes.  If we can return a reference to
         // the data, do so to avoid copying it.
         if (BlobStart) {
-          *BlobStart = (const char*)NextChar;
+          *BlobStart = (const char*)BitStream->getBitcodeBytes().getPointer(
+              NextChar, NumElts);
           *BlobLen = NumElts;
         } else {
           for (; NumElts; ++NextChar, --NumElts)
-            Vals.push_back(*NextChar);
+            Vals.push_back(getByte(NextChar));
         }
         // Skip over tail padding.
         NextChar = NewEnd;
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index bfb3a4e49c51..475da133f8a8 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -16,13 +16,14 @@
 #define BITSTREAM_WRITER_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Bitcode/BitCodes.h"
 #include <vector>
 
 namespace llvm {
 
 class BitstreamWriter {
-  std::vector<unsigned char> &Out;
+  SmallVectorImpl<char> &Out;
 
   /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
   unsigned CurBit;
@@ -59,8 +60,40 @@ class BitstreamWriter {
   };
   std::vector<BlockInfo> BlockInfoRecords;
 
+  // BackpatchWord - Backpatch a 32-bit word in the output with the specified
+  // value.
+  void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
+    Out[ByteNo++] = (unsigned char)(NewWord >>  0);
+    Out[ByteNo++] = (unsigned char)(NewWord >>  8);
+    Out[ByteNo++] = (unsigned char)(NewWord >> 16);
+    Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
+  }
+
+  void WriteByte(unsigned char Value) {
+    Out.push_back(Value);
+  }
+
+  void WriteWord(unsigned Value) {
+    unsigned char Bytes[4] = {
+      (unsigned char)(Value >>  0),
+      (unsigned char)(Value >>  8),
+      (unsigned char)(Value >> 16),
+      (unsigned char)(Value >> 24) };
+    Out.append(&Bytes[0], &Bytes[4]);
+  }
+
+  unsigned GetBufferOffset() const {
+    return Out.size();
+  }
+
+  unsigned GetWordIndex() const {
+    unsigned Offset = GetBufferOffset();
+    assert((Offset & 3) == 0 && "Not 32-bit aligned");
+    return Offset / 4;
+  }
+
 public:
-  explicit BitstreamWriter(std::vector<unsigned char> &O)
+  explicit BitstreamWriter(SmallVectorImpl<char> &O)
     : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
 
   ~BitstreamWriter() {
@@ -78,10 +111,8 @@ public:
     }
   }
 
-  std::vector<unsigned char> &getBuffer() { return Out; }
-
   /// \brief Retrieve the current position in the stream, in bits.
-  uint64_t GetCurrentBitNo() const { return Out.size() * 8 + CurBit; }
+  uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
 
   //===--------------------------------------------------------------------===//
   // Basic Primitives for emitting bits to the stream.
@@ -97,11 +128,7 @@ public:
     }
 
     // Add the current word.
-    unsigned V = CurValue;
-    Out.push_back((unsigned char)(V >>  0));
-    Out.push_back((unsigned char)(V >>  8));
-    Out.push_back((unsigned char)(V >> 16));
-    Out.push_back((unsigned char)(V >> 24));
+    WriteWord(CurValue);
 
     if (CurBit)
       CurValue = Val >> (32-CurBit);
@@ -121,11 +148,7 @@ public:
 
   void FlushToWord() {
     if (CurBit) {
-      unsigned V = CurValue;
-      Out.push_back((unsigned char)(V >>  0));
-      Out.push_back((unsigned char)(V >>  8));
-      Out.push_back((unsigned char)(V >> 16));
-      Out.push_back((unsigned char)(V >> 24));
+      WriteWord(CurValue);
       CurBit = 0;
       CurValue = 0;
     }
@@ -164,15 +187,6 @@ public:
     Emit(Val, CurCodeSize);
   }
 
-  // BackpatchWord - Backpatch a 32-bit word in the output with the specified
-  // value.
-  void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
-    Out[ByteNo++] = (unsigned char)(NewWord >>  0);
-    Out[ByteNo++] = (unsigned char)(NewWord >>  8);
-    Out[ByteNo++] = (unsigned char)(NewWord >> 16);
-    Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
-  }
-
   //===--------------------------------------------------------------------===//
   // Block Manipulation
   //===--------------------------------------------------------------------===//
@@ -199,7 +213,7 @@ public:
     EmitVBR(CodeLen, bitc::CodeLenWidth);
     FlushToWord();
 
-    unsigned BlockSizeWordLoc = static_cast<unsigned>(Out.size());
+    unsigned BlockSizeWordIndex = GetWordIndex();
     unsigned OldCodeSize = CurCodeSize;
 
     // Emit a placeholder, which will be replaced when the block is popped.
@@ -209,7 +223,7 @@ public:
 
     // Push the outer block's abbrev set onto the stack, start out with an
     // empty abbrev set.
-    BlockScope.push_back(Block(OldCodeSize, BlockSizeWordLoc/4));
+    BlockScope.push_back(Block(OldCodeSize, BlockSizeWordIndex));
     BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 
     // If there is a blockinfo for this BlockID, add all the predefined abbrevs
@@ -239,7 +253,7 @@ public:
     FlushToWord();
 
     // Compute the size of the block, in words, not counting the size field.
-    unsigned SizeInWords= static_cast<unsigned>(Out.size())/4-B.StartSizeWord-1;
+    unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
     unsigned ByteNo = B.StartSizeWord*4;
 
     // Update the block size field in the header of this sub-block.
@@ -275,7 +289,7 @@ private:
     
     // Encode the value as we are commanded.
     switch (Op.getEncoding()) {
-    default: assert(0 && "Unknown encoding!");
+    default: llvm_unreachable("Unknown encoding!");
     case BitCodeAbbrevOp::Fixed:
       if (Op.getEncodingData())
         Emit((unsigned)V, (unsigned)Op.getEncodingData());
@@ -355,25 +369,24 @@ private:
         
         // Flush to a 32-bit alignment boundary.
         FlushToWord();
-        assert((Out.size() & 3) == 0 && "Not 32-bit aligned");
 
         // Emit each field as a literal byte.
         if (BlobData) {
           for (unsigned i = 0; i != BlobLen; ++i)
-            Out.push_back((unsigned char)BlobData[i]);
+            WriteByte((unsigned char)BlobData[i]);
           
           // Know that blob data is consumed for assertion below.
           BlobData = 0;
         } else {
           for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) {
             assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob");
-            Out.push_back((unsigned char)Vals[RecordIdx]);
+            WriteByte((unsigned char)Vals[RecordIdx]);
           }
         }
+
         // Align end to 32-bits.
-        while (Out.size() & 3)
-          Out.push_back(0);
-        
+        while (GetBufferOffset() & 3)
+          WriteByte(0);
       } else {  // Single scalar field.
         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
         EmitAbbreviatedField(Op, Vals[RecordIdx]);
@@ -488,7 +501,7 @@ public:
   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
   void EnterBlockInfoBlock(unsigned CodeWidth) {
     EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
-    BlockInfoCurBID = -1U;
+    BlockInfoCurBID = ~0U;
   }
 private:
   /// SwitchToBlockID - If we aren't already talking about the specified block
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 4b0dcc36232f..a8c34cb82995 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -29,23 +29,21 @@ namespace bitc {
 
     // Module sub-block id's.
     PARAMATTR_BLOCK_ID,
-    
-    /// TYPE_BLOCK_ID_OLD - This is the type descriptor block in LLVM 2.9 and
-    /// earlier, replaced with TYPE_BLOCK_ID2.  FIXME: Remove in LLVM 3.1.
-    TYPE_BLOCK_ID_OLD,
+
+    UNUSED_ID1,
     
     CONSTANTS_BLOCK_ID,
     FUNCTION_BLOCK_ID,
     
-    /// TYPE_SYMTAB_BLOCK_ID_OLD - This type descriptor is from LLVM 2.9 and
-    /// earlier bitcode files.  FIXME: Remove in LLVM 3.1
-    TYPE_SYMTAB_BLOCK_ID_OLD,
+    UNUSED_ID2,
     
     VALUE_SYMTAB_BLOCK_ID,
     METADATA_BLOCK_ID,
     METADATA_ATTACHMENT_ID,
     
-    TYPE_BLOCK_ID_NEW
+    TYPE_BLOCK_ID_NEW,
+
+    USELIST_BLOCK_ID
   };
 
 
@@ -63,10 +61,10 @@ namespace bitc {
     MODULE_CODE_GLOBALVAR   = 7,
 
     // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
-    //             section, visibility]
+    //             section, visibility, gc, unnamed_addr]
     MODULE_CODE_FUNCTION    = 8,
 
-    // ALIAS: [alias type, aliasee val#, linkage]
+    // ALIAS: [alias type, aliasee val#, linkage, visibility]
     MODULE_CODE_ALIAS       = 9,
 
     /// MODULE_CODE_PURGEVALS: [numvals]
@@ -92,11 +90,12 @@ namespace bitc {
     TYPE_CODE_OPAQUE   =  6,    // OPAQUE
     TYPE_CODE_INTEGER  =  7,    // INTEGER: [width]
     TYPE_CODE_POINTER  =  8,    // POINTER: [pointee type]
-    TYPE_CODE_FUNCTION =  9,    // FUNCTION: [vararg, retty, paramty x N]
+
+    TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty,
+                                //            paramty x N]
+    
+    TYPE_CODE_HALF     =  10,   // HALF
     
-    // FIXME: This is the encoding used for structs in LLVM 2.9 and earlier.
-    // REMOVE this in LLVM 3.1
-    TYPE_CODE_STRUCT_OLD = 10,  // STRUCT: [ispacked, eltty x N]
     TYPE_CODE_ARRAY    = 11,    // ARRAY: [numelts, eltty]
     TYPE_CODE_VECTOR   = 12,    // VECTOR: [numelts, eltty]
 
@@ -113,7 +112,9 @@ namespace bitc {
     
     TYPE_CODE_STRUCT_ANON = 18, // STRUCT_ANON: [ispacked, eltty x N]
     TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N]
-    TYPE_CODE_STRUCT_NAMED = 20 // STRUCT_NAMED: [ispacked, eltty x N]
+    TYPE_CODE_STRUCT_NAMED = 20,// STRUCT_NAMED: [ispacked, eltty x N]
+
+    TYPE_CODE_FUNCTION = 21     // FUNCTION: [vararg, retty, paramty x N]
   };
 
   // The type symbol table only has one code (TST_ENTRY_CODE).
@@ -163,7 +164,8 @@ namespace bitc {
     CST_CODE_INLINEASM     = 18,  // INLINEASM:     [sideeffect,asmstr,conststr]
     CST_CODE_CE_SHUFVEC_EX = 19,  // SHUFVEC_EX:    [opty, opval, opval, opval]
     CST_CODE_CE_INBOUNDS_GEP = 20,// INBOUNDS_GEP:  [n x operands]
-    CST_CODE_BLOCKADDRESS  = 21   // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#]
+    CST_CODE_BLOCKADDRESS  = 21,  // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#]
+    CST_CODE_DATA          = 22   // DATA:          [n x elements]
   };
 
   /// CastOpcodes - These are values used in the bitcode files to encode which
@@ -270,7 +272,7 @@ namespace bitc {
     FUNC_CODE_INST_BR          = 11, // BR:         [bb#, bb#, cond] or [bb#]
     FUNC_CODE_INST_SWITCH      = 12, // SWITCH:     [opty, op0, op1, ...]
     FUNC_CODE_INST_INVOKE      = 13, // INVOKE:     [attr, fnty, op0,op1, ...]
-    FUNC_CODE_INST_UNWIND      = 14, // UNWIND
+    // 14 is unused.
     FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE
 
     FUNC_CODE_INST_PHI         = 16, // PHI:        [ty, val0,bb0, ...]
@@ -314,6 +316,10 @@ namespace bitc {
     FUNC_CODE_INST_STOREATOMIC = 42  // STORE: [ptrty,ptr,val, align, vol
                                      //         ordering, synchscope]
   };
+
+  enum UseListCodes {
+    USELIST_CODE_ENTRY = 1   // USELIST_CODE_ENTRY: TBD.
+  };
 } // End bitc namespace
 } // End llvm namespace
 
diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
index fa754c014621..cc2b473f2c57 100644
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -17,35 +17,45 @@
 #include <string>
 
 namespace llvm {
-  class Module;
-  class MemoryBuffer;
-  class ModulePass;
   class BitstreamWriter;
+  class MemoryBuffer;
+  class DataStreamer;
   class LLVMContext;
+  class Module;
+  class ModulePass;
   class raw_ostream;
-  
+
   /// getLazyBitcodeModule - Read the header of the specified bitcode buffer
   /// and prepare for lazy deserialization of function bodies.  If successful,
   /// this takes ownership of 'buffer' and returns a non-null pointer.  On
   /// error, this returns null, *does not* take ownership of Buffer, and fills
   /// in *ErrMsg with an error description if ErrMsg is non-null.
   Module *getLazyBitcodeModule(MemoryBuffer *Buffer,
-                               LLVMContext& Context,
+                               LLVMContext &Context,
                                std::string *ErrMsg = 0);
 
+  /// getStreamedBitcodeModule - Read the header of the specified stream
+  /// and prepare for lazy deserialization and streaming of function bodies.
+  /// On error, this returns null, and fills in *ErrMsg with an error
+  /// description if ErrMsg is non-null.
+  Module *getStreamedBitcodeModule(const std::string &name,
+                                   DataStreamer *streamer,
+                                   LLVMContext &Context,
+                                   std::string *ErrMsg = 0);
+
   /// getBitcodeTargetTriple - Read the header of the specified bitcode
   /// buffer and extract just the triple information. If successful,
   /// this returns a string and *does not* take ownership
   /// of 'buffer'. On error, this returns "", and fills in *ErrMsg
   /// if ErrMsg is non-null.
   std::string getBitcodeTargetTriple(MemoryBuffer *Buffer,
-                                     LLVMContext& Context,
+                                     LLVMContext &Context,
                                      std::string *ErrMsg = 0);
 
   /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
   /// If an error occurs, this returns null and fills in *ErrMsg if it is
   /// non-null.  This method *never* takes ownership of Buffer.
-  Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
+  Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext &Context,
                            std::string *ErrMsg = 0);
 
   /// WriteBitcodeToFile - Write the specified module to the specified
@@ -53,15 +63,11 @@ namespace llvm {
   /// should be in "binary" mode.
   void WriteBitcodeToFile(const Module *M, raw_ostream &Out);
 
-  /// WriteBitcodeToStream - Write the specified module to the specified
-  /// raw output stream.
-  void WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream);
-
   /// createBitcodeWriterPass - Create and return a pass that writes the module
   /// to the specified ostream.
   ModulePass *createBitcodeWriterPass(raw_ostream &Str);
-  
-  
+
+
   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
   /// for an LLVM IR bitcode wrapper.
   ///
@@ -109,21 +115,24 @@ namespace llvm {
   ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
   ///   ... potentially other gunk ...
   /// };
-  /// 
+  ///
   /// This function is called when we find a file with a matching magic number.
   /// In this case, skip down to the subsection of the file that is actually a
   /// BC file.
-  static inline bool SkipBitcodeWrapperHeader(unsigned char *&BufPtr,
-                                              unsigned char *&BufEnd) {
+  /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
+  /// contain the whole bitcode file.
+  static inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
+                                              const unsigned char *&BufEnd,
+                                              bool VerifyBufferSize) {
     enum {
       KnownHeaderSize = 4*4,  // Size of header we read.
       OffsetField = 2*4,      // Offset in bytes to Offset field.
       SizeField = 3*4         // Offset in bytes to Size field.
     };
-    
+
     // Must contain the header!
     if (BufEnd-BufPtr < KnownHeaderSize) return true;
-    
+
     unsigned Offset = ( BufPtr[OffsetField  ]        |
                        (BufPtr[OffsetField+1] << 8)  |
                        (BufPtr[OffsetField+2] << 16) |
@@ -132,9 +141,9 @@ namespace llvm {
                        (BufPtr[SizeField  +1] << 8)  |
                        (BufPtr[SizeField  +2] << 16) |
                        (BufPtr[SizeField  +3] << 24));
-    
+
     // Verify that Offset+Size fits in the file.
-    if (Offset+Size > unsigned(BufEnd-BufPtr))
+    if (VerifyBufferSize && Offset+Size > unsigned(BufEnd-BufPtr))
       return true;
     BufPtr += Offset;
     BufEnd = BufPtr+Size;
diff --git a/include/llvm/CMakeLists.txt b/include/llvm/CMakeLists.txt
index 7956f8cafcfa..de3ff8623cff 100644
--- a/include/llvm/CMakeLists.txt
+++ b/include/llvm/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(LLVM_TARGET_DEFINITIONS Intrinsics.td)
 
-llvm_tablegen(Intrinsics.gen -gen-intrinsic)
+tablegen(LLVM Intrinsics.gen -gen-intrinsic)
 
 add_custom_target(intrinsics_gen ALL
   DEPENDS ${llvm_builded_incs_dir}/Intrinsics.gen)
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index d8e64071a1d9..0b609ed6586e 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -27,6 +27,7 @@ namespace llvm {
 class GlobalVariable;
 class TargetLowering;
 class SDNode;
+class SDValue;
 class SelectionDAG;
 
 /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
@@ -70,6 +71,10 @@ bool hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
 ///
 ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred);
 
+/// getFCmpCodeWithoutNaN - Given an ISD condition code comparing floats,
+/// return the equivalent code if we're allowed to assume that NaNs won't occur.
+ISD::CondCode getFCmpCodeWithoutNaN(ISD::CondCode CC);
+
 /// getICmpCondCode - Return the ISD condition code corresponding to
 /// the given LLVM IR integer condition code.
 ///
@@ -85,7 +90,7 @@ bool isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
                           const TargetLowering &TLI);
 
 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
-                          const TargetLowering &TLI);
+                          SDValue &Chain, const TargetLowering &TLI);
 
 } // End llvm namespace
 
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 06c5c83c95ec..56a87f139a21 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -18,16 +18,12 @@
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
   class BlockAddress;
   class GCStrategy;
   class Constant;
-  class ConstantArray;
-  class ConstantFP;
-  class ConstantInt;
-  class ConstantStruct;
-  class ConstantVector;
   class GCMetadataPrinter;
   class GlobalValue;
   class GlobalVariable;
@@ -37,14 +33,11 @@ namespace llvm {
   class MachineLocation;
   class MachineLoopInfo;
   class MachineLoop;
-  class MachineConstantPool;
-  class MachineConstantPoolEntry;
   class MachineConstantPoolValue;
   class MachineJumpTableInfo;
   class MachineModuleInfo;
   class MachineMove;
   class MCAsmInfo;
-  class MCInst;
   class MCContext;
   class MCSection;
   class MCStreamer;
@@ -56,8 +49,6 @@ namespace llvm {
   class TargetLoweringObjectFile;
   class TargetData;
   class TargetMachine;
-  class Twine;
-  class Type;
 
   /// AsmPrinter - This class is intended to be used as a driving class for all
   /// asm writers.
@@ -97,6 +88,11 @@ namespace llvm {
     ///
     MCSymbol *CurrentFnSym;
 
+    /// The symbol used to represent the start of the current function for the
+    /// purpose of calculating its size (e.g. using the .size directive). By
+    /// default, this is equal to CurrentFnSym.
+    MCSymbol *CurrentFnSymForSize;
+
   private:
     // GCMetadataPrinters - The garbage collection metadata printer table.
     void *GCMetadataPrinters;  // Really a DenseMap.
@@ -194,6 +190,11 @@ namespace llvm {
 
     bool needsSEHMoves();
 
+    /// needsRelocationsForDwarfStringPool - Specifies whether the object format
+    /// expects to use relocations to refer to debug entries. Alternatively we
+    /// emit section offsets in bytes from the start of the string pool.
+    bool needsRelocationsForDwarfStringPool() const;
+
     /// EmitConstantPool - Print to the current output stream assembly
     /// representations of the constants in the constant pool MCP. This is
     /// used to print out constants which have been "spilled to memory" by
@@ -256,13 +257,20 @@ namespace llvm {
 
     /// EmitInstruction - Targets should implement this to emit instructions.
     virtual void EmitInstruction(const MachineInstr *) {
-      assert(0 && "EmitInstruction not implemented");
+      llvm_unreachable("EmitInstruction not implemented");
     }
 
     virtual void EmitFunctionEntryLabel();
 
     virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
 
+    /// EmitXXStructor - Targets can override this to change how global
+    /// constants that are part of a C++ static/global constructor list are
+    /// emitted.
+    virtual void EmitXXStructor(const Constant *CV) {
+      EmitGlobalConstant(CV);
+    }
+
     /// isBlockOnlyReachableByFallthough - Return true if the basic block has
     /// exactly one predecessor and the control transfer mechanism between
     /// the predecessor and this block is a fall-through.
@@ -466,7 +474,7 @@ namespace llvm {
                             const MachineBasicBlock *MBB,
                             unsigned uid) const;
     void EmitLLVMUsedList(const Constant *List);
-    void EmitXXStructorList(const Constant *List);
+    void EmitXXStructorList(const Constant *List, bool isCtor);
     GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy *C);
   };
 }
diff --git a/include/llvm/CodeGen/BinaryObject.h b/include/llvm/CodeGen/BinaryObject.h
deleted file mode 100644
index 8c1431ffbeed..000000000000
--- a/include/llvm/CodeGen/BinaryObject.h
+++ /dev/null
@@ -1,353 +0,0 @@
-//===-- llvm/CodeGen/BinaryObject.h - Binary Object. -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a Binary Object Aka. "blob" for holding data from code
-// generators, ready for data to the object module code writters.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_BINARYOBJECT_H
-#define LLVM_CODEGEN_BINARYOBJECT_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Support/DataTypes.h"
-
-#include <string>
-#include <vector>
-
-namespace llvm {
-
-typedef std::vector<uint8_t> BinaryData;
-
-class BinaryObject {
-protected:
-  std::string Name;
-  bool IsLittleEndian;
-  bool Is64Bit;
-  BinaryData Data;
-  std::vector<MachineRelocation> Relocations;
-
-public:
-  /// Constructors and destructor
-  BinaryObject() {}
-
-  BinaryObject(bool isLittleEndian, bool is64Bit)
-    : IsLittleEndian(isLittleEndian), Is64Bit(is64Bit) {}
-
-  BinaryObject(const std::string &name, bool isLittleEndian, bool is64Bit)
-    : Name(name), IsLittleEndian(isLittleEndian), Is64Bit(is64Bit) {}
-
-  ~BinaryObject() {}
-
-  /// getName - get name of BinaryObject
-  inline std::string getName() const { return Name; }
-
-  /// get size of binary data
-  size_t size() const {
-    return Data.size();
-  }
-
-  /// get binary data
-  BinaryData& getData() {
-    return Data;
-  }
-
-  /// get machine relocations
-  const std::vector<MachineRelocation>& getRelocations() const {
-    return Relocations;
-  }
-
-  /// hasRelocations - Return true if 'Relocations' is not empty
-  bool hasRelocations() const {
-    return !Relocations.empty();
-  }
-
-  /// emitZeros - This callback is invoked to emit a arbitrary number 
-  /// of zero bytes to the data stream.
-  inline void emitZeros(unsigned Size) {
-    for (unsigned i=0; i < Size; ++i)
-      emitByte(0);
-  }
-
-  /// emitByte - This callback is invoked when a byte needs to be
-  /// written to the data stream.
-  inline void emitByte(uint8_t B) {
-    Data.push_back(B);
-  }
-
-  /// emitWord16 - This callback is invoked when a 16-bit word needs to be
-  /// written to the data stream in correct endian format and correct size.
-  inline void emitWord16(uint16_t W) {
-    if (IsLittleEndian)
-      emitWord16LE(W);
-    else
-      emitWord16BE(W);
-  }
-
-  /// emitWord16LE - This callback is invoked when a 16-bit word needs to be
-  /// written to the data stream in correct endian format and correct size.
-  inline void emitWord16LE(uint16_t W) {
-    Data.push_back((uint8_t)(W >> 0));
-    Data.push_back((uint8_t)(W >> 8));
-  }
-
-  /// emitWord16BE - This callback is invoked when a 16-bit word needs to be
-  /// written to the data stream in correct endian format and correct size.
-  inline void emitWord16BE(uint16_t W) {
-    Data.push_back((uint8_t)(W >> 8));
-    Data.push_back((uint8_t)(W >> 0));
-  }
-
-  /// emitWord - This callback is invoked when a word needs to be
-  /// written to the data stream in correct endian format and correct size.
-  inline void emitWord(uint64_t W) {
-    if (!Is64Bit)
-      emitWord32(W);
-    else
-      emitWord64(W);
-  }
-
-  /// emitWord32 - This callback is invoked when a 32-bit word needs to be
-  /// written to the data stream in correct endian format.
-  inline void emitWord32(uint32_t W) {
-    if (IsLittleEndian)
-      emitWordLE(W);
-    else
-      emitWordBE(W);
-  }
-
-  /// emitWord64 - This callback is invoked when a 32-bit word needs to be
-  /// written to the data stream in correct endian format.
-  inline void emitWord64(uint64_t W) {
-    if (IsLittleEndian)
-      emitDWordLE(W);
-    else
-      emitDWordBE(W);
-  }
-
-  /// emitWord64 - This callback is invoked when a x86_fp80 needs to be
-  /// written to the data stream in correct endian format.
-  inline void emitWordFP80(const uint64_t *W, unsigned PadSize) {
-    if (IsLittleEndian) {
-      emitWord64(W[0]);
-      emitWord16(W[1]);  
-    } else {
-      emitWord16(W[1]);  
-      emitWord64(W[0]);
-    }
-    emitZeros(PadSize);
-  }
-
-  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
-  /// written to the data stream in little-endian format.
-  inline void emitWordLE(uint32_t W) {
-    Data.push_back((uint8_t)(W >>  0));
-    Data.push_back((uint8_t)(W >>  8));
-    Data.push_back((uint8_t)(W >> 16));
-    Data.push_back((uint8_t)(W >> 24));
-  }
-
-  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
-  /// written to the data stream in big-endian format.
-  ///
-  inline void emitWordBE(uint32_t W) {
-    Data.push_back((uint8_t)(W >> 24));
-    Data.push_back((uint8_t)(W >> 16));
-    Data.push_back((uint8_t)(W >>  8));
-    Data.push_back((uint8_t)(W >>  0));
-  }
-
-  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
-  /// written to the data stream in little-endian format.
-  inline void emitDWordLE(uint64_t W) {
-    Data.push_back((uint8_t)(W >>  0));
-    Data.push_back((uint8_t)(W >>  8));
-    Data.push_back((uint8_t)(W >> 16));
-    Data.push_back((uint8_t)(W >> 24));
-    Data.push_back((uint8_t)(W >> 32));
-    Data.push_back((uint8_t)(W >> 40));
-    Data.push_back((uint8_t)(W >> 48));
-    Data.push_back((uint8_t)(W >> 56));
-  }
-
-  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
-  /// written to the data stream in big-endian format.
-  inline void emitDWordBE(uint64_t W) {
-    Data.push_back((uint8_t)(W >> 56));
-    Data.push_back((uint8_t)(W >> 48));
-    Data.push_back((uint8_t)(W >> 40));
-    Data.push_back((uint8_t)(W >> 32));
-    Data.push_back((uint8_t)(W >> 24));
-    Data.push_back((uint8_t)(W >> 16));
-    Data.push_back((uint8_t)(W >>  8));
-    Data.push_back((uint8_t)(W >>  0));
-  }
-
-  /// fixByte - This callback is invoked when a byte needs to be
-  /// fixup the buffer.
-  inline void fixByte(uint8_t B, uint32_t offset) {
-    Data[offset] = B;
-  }
-
-  /// fixWord16 - This callback is invoked when a 16-bit word needs to
-  /// fixup the data stream in correct endian format.
-  inline void fixWord16(uint16_t W, uint32_t offset) {
-    if (IsLittleEndian)
-      fixWord16LE(W, offset);
-    else
-      fixWord16BE(W, offset);
-  }
-
-  /// emitWord16LE - This callback is invoked when a 16-bit word needs to
-  /// fixup the data stream in little endian format.
-  inline void fixWord16LE(uint16_t W, uint32_t offset) {
-    Data[offset]   = (uint8_t)(W >> 0);
-    Data[++offset] = (uint8_t)(W >> 8);
-  }
-
-  /// fixWord16BE - This callback is invoked when a 16-bit word needs to
-  /// fixup data stream in big endian format.
-  inline void fixWord16BE(uint16_t W, uint32_t offset) {
-    Data[offset]   = (uint8_t)(W >> 8);
-    Data[++offset] = (uint8_t)(W >> 0);
-  }
-
-  /// emitWord - This callback is invoked when a word needs to
-  /// fixup the data in correct endian format and correct size.
-  inline void fixWord(uint64_t W, uint32_t offset) {
-    if (!Is64Bit)
-      fixWord32(W, offset);
-    else
-      fixWord64(W, offset);
-  }
-
-  /// fixWord32 - This callback is invoked when a 32-bit word needs to
-  /// fixup the data in correct endian format.
-  inline void fixWord32(uint32_t W, uint32_t offset) {
-    if (IsLittleEndian)
-      fixWord32LE(W, offset);
-    else
-      fixWord32BE(W, offset);
-  }
-
-  /// fixWord32LE - This callback is invoked when a 32-bit word needs to
-  /// fixup the data in little endian format.
-  inline void fixWord32LE(uint32_t W, uint32_t offset) {
-    Data[offset]   = (uint8_t)(W >>  0);
-    Data[++offset] = (uint8_t)(W >>  8);
-    Data[++offset] = (uint8_t)(W >> 16);
-    Data[++offset] = (uint8_t)(W >> 24);
-  }
-
-  /// fixWord32BE - This callback is invoked when a 32-bit word needs to
-  /// fixup the data in big endian format.
-  inline void fixWord32BE(uint32_t W, uint32_t offset) {
-    Data[offset]   = (uint8_t)(W >> 24);
-    Data[++offset] = (uint8_t)(W >> 16);
-    Data[++offset] = (uint8_t)(W >>  8);
-    Data[++offset] = (uint8_t)(W >>  0);
-  }
-
-  /// fixWord64 - This callback is invoked when a 64-bit word needs to
-  /// fixup the data in correct endian format.
-  inline void fixWord64(uint64_t W, uint32_t offset) {
-    if (IsLittleEndian)
-      fixWord64LE(W, offset);
-    else
-      fixWord64BE(W, offset);
-  }
-
-  /// fixWord64BE - This callback is invoked when a 64-bit word needs to
-  /// fixup the data in little endian format.
-  inline void fixWord64LE(uint64_t W, uint32_t offset) {
-    Data[offset]   = (uint8_t)(W >>  0);
-    Data[++offset] = (uint8_t)(W >>  8);
-    Data[++offset] = (uint8_t)(W >> 16);
-    Data[++offset] = (uint8_t)(W >> 24);
-    Data[++offset] = (uint8_t)(W >> 32);
-    Data[++offset] = (uint8_t)(W >> 40);
-    Data[++offset] = (uint8_t)(W >> 48);
-    Data[++offset] = (uint8_t)(W >> 56);
-  }
-
-  /// fixWord64BE - This callback is invoked when a 64-bit word needs to
-  /// fixup the data in big endian format.
-  inline void fixWord64BE(uint64_t W, uint32_t offset) {
-    Data[offset]   = (uint8_t)(W >> 56);
-    Data[++offset] = (uint8_t)(W >> 48);
-    Data[++offset] = (uint8_t)(W >> 40);
-    Data[++offset] = (uint8_t)(W >> 32);
-    Data[++offset] = (uint8_t)(W >> 24);
-    Data[++offset] = (uint8_t)(W >> 16);
-    Data[++offset] = (uint8_t)(W >>  8);
-    Data[++offset] = (uint8_t)(W >>  0);
-  }
-
-  /// emitAlignment - Pad the data to the specified alignment.
-  void emitAlignment(unsigned Alignment, uint8_t fill = 0) {
-    if (Alignment <= 1) return;
-    unsigned PadSize = -Data.size() & (Alignment-1);
-    for (unsigned i = 0; i<PadSize; ++i)
-      Data.push_back(fill);
-  }
-
-  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
-  /// written to the data stream.
-  void emitULEB128Bytes(uint64_t Value) {
-    do {
-      uint8_t Byte = (uint8_t)(Value & 0x7f);
-      Value >>= 7;
-      if (Value) Byte |= 0x80;
-      emitByte(Byte);
-    } while (Value);
-  }
-
-  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
-  /// written to the data stream.
-  void emitSLEB128Bytes(int64_t Value) {
-    int Sign = Value >> (8 * sizeof(Value) - 1);
-    bool IsMore;
-
-    do {
-      uint8_t Byte = (uint8_t)(Value & 0x7f);
-      Value >>= 7;
-      IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
-      if (IsMore) Byte |= 0x80;
-      emitByte(Byte);
-    } while (IsMore);
-  }
-
-  /// emitString - This callback is invoked when a String needs to be
-  /// written to the data stream.
-  void emitString(const std::string &String) {
-    for (unsigned i = 0, N = static_cast<unsigned>(String.size()); i<N; ++i) {
-      unsigned char C = String[i];
-      emitByte(C);
-    }
-    emitByte(0);
-  }
-
-  /// getCurrentPCOffset - Return the offset from the start of the emitted
-  /// buffer that we are currently writing to.
-  uintptr_t getCurrentPCOffset() const {
-    return Data.size();
-  }
-
-  /// addRelocation - Whenever a relocatable address is needed, it should be
-  /// noted with this interface.
-  void addRelocation(const MachineRelocation& relocation) {
-    Relocations.push_back(relocation);
-  }
-
-};
-
-} // end namespace llvm
-
-#endif
-
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 77dc644f761e..3afe3095d4f6 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -229,7 +229,7 @@ public:
 
   /// getFirstUnallocated - Return the first unallocated register in the set, or
   /// NumRegs if they are all allocated.
-  unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
+  unsigned getFirstUnallocated(const uint16_t *Regs, unsigned NumRegs) const {
     for (unsigned i = 0; i != NumRegs; ++i)
       if (!isAllocated(Regs[i]))
         return i;
@@ -256,7 +256,7 @@ public:
   /// AllocateReg - Attempt to allocate one of the specified registers.  If none
   /// are available, return zero.  Otherwise, return the first one available,
   /// marking it and any aliases as allocated.
-  unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
+  unsigned AllocateReg(const uint16_t *Regs, unsigned NumRegs) {
     unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
     if (FirstUnalloc == NumRegs)
       return 0;    // Didn't find the reg.
@@ -268,7 +268,7 @@ public:
   }
 
   /// Version of AllocateReg with list of registers to be shadowed.
-  unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
+  unsigned AllocateReg(const uint16_t *Regs, const uint16_t *ShadowRegs,
                        unsigned NumRegs) {
     unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
     if (FirstUnalloc == NumRegs)
@@ -306,12 +306,12 @@ public:
 
   // First GPR that carries part of a byval aggregate that's split
   // between registers and memory.
-  unsigned getFirstByValReg() { return FirstByValRegValid ? FirstByValReg : 0; }
+  unsigned getFirstByValReg() const { return FirstByValRegValid ? FirstByValReg : 0; }
   void setFirstByValReg(unsigned r) { FirstByValReg = r; FirstByValRegValid = true; }
   void clearFirstByValReg() { FirstByValReg = 0; FirstByValRegValid = false; }
-  bool isFirstByValRegValid() { return FirstByValRegValid; }
+  bool isFirstByValRegValid() const { return FirstByValRegValid; }
 
-  ParmContext getCallOrPrologue() { return CallOrPrologue; }
+  ParmContext getCallOrPrologue() const { return CallOrPrologue; }
 
 private:
   /// MarkAllocated - Mark a register and all of its aliases as allocated.
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
new file mode 100644
index 000000000000..2d2db78144a4
--- /dev/null
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -0,0 +1,167 @@
+//=- llvm/CodeGen/DFAPacketizer.h - DFA Packetizer for VLIW ---*- C++ -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_DFAPACKETIZER_H
+#define LLVM_CODEGEN_DFAPACKETIZER_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/DenseMap.h"
+#include <map>
+
+namespace llvm {
+
+class MCInstrDesc;
+class MachineInstr;
+class MachineLoopInfo;
+class MachineDominatorTree;
+class InstrItineraryData;
+class DefaultVLIWScheduler;
+class SUnit;
+
+class DFAPacketizer {
+private:
+  typedef std::pair<unsigned, unsigned> UnsignPair;
+  const InstrItineraryData *InstrItins;
+  int CurrentState;
+  const int (*DFAStateInputTable)[2];
+  const unsigned *DFAStateEntryTable;
+
+  // CachedTable is a map from <FromState, Input> to ToState.
+  DenseMap<UnsignPair, unsigned> CachedTable;
+
+  // ReadTable - Read the DFA transition table and update CachedTable.
+  void ReadTable(unsigned int state);
+
+public:
+  DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+                const unsigned *SET);
+
+  // Reset the current state to make all resources available.
+  void clearResources() {
+    CurrentState = 0;
+  }
+
+  // canReserveResources - Check if the resources occupied by a MCInstrDesc
+  // are available in the current state.
+  bool canReserveResources(const llvm::MCInstrDesc *MID);
+
+  // reserveResources - Reserve the resources occupied by a MCInstrDesc and
+  // change the current state to reflect that change.
+  void reserveResources(const llvm::MCInstrDesc *MID);
+
+  // canReserveResources - Check if the resources occupied by a machine
+  // instruction are available in the current state.
+  bool canReserveResources(llvm::MachineInstr *MI);
+
+  // reserveResources - Reserve the resources occupied by a machine
+  // instruction and change the current state to reflect that change.
+  void reserveResources(llvm::MachineInstr *MI);
+
+  const InstrItineraryData *getInstrItins() const { return InstrItins; }
+};
+
+// VLIWPacketizerList - Implements a simple VLIW packetizer using DFA. The
+// packetizer works on machine basic blocks. For each instruction I in BB, the
+// packetizer consults the DFA to see if machine resources are available to
+// execute I. If so, the packetizer checks if I depends on any instruction J in
+// the current packet. If no dependency is found, I is added to current packet
+// and machine resource is marked as taken. If any dependency is found, a target
+// API call is made to prune the dependence.
+class VLIWPacketizerList {
+protected:
+  const TargetMachine &TM;
+  const MachineFunction &MF;
+  const TargetInstrInfo *TII;
+
+  // The VLIW Scheduler.
+  DefaultVLIWScheduler *VLIWScheduler;
+
+  // Vector of instructions assigned to the current packet.
+  std::vector<MachineInstr*> CurrentPacketMIs;
+  // DFA resource tracker.
+  DFAPacketizer *ResourceTracker;
+
+  // Generate MI -> SU map.
+  std::map<MachineInstr*, SUnit*> MIToSUnit;
+
+public:
+  VLIWPacketizerList(
+    MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+    bool IsPostRA);
+
+  virtual ~VLIWPacketizerList();
+
+  // PacketizeMIs - Implement this API in the backend to bundle instructions.
+  void PacketizeMIs(MachineBasicBlock *MBB,
+                    MachineBasicBlock::iterator BeginItr,
+                    MachineBasicBlock::iterator EndItr);
+
+  // getResourceTracker - return ResourceTracker
+  DFAPacketizer *getResourceTracker() {return ResourceTracker;}
+
+  // addToPacket - Add MI to the current packet.
+  virtual MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
+    MachineBasicBlock::iterator MII = MI;
+    CurrentPacketMIs.push_back(MI);
+    ResourceTracker->reserveResources(MI);
+    return MII;
+  }
+
+  // endPacket - End the current packet.
+  void endPacket(MachineBasicBlock *MBB, MachineInstr *MI);
+
+  // initPacketizerState - perform initialization before packetizing
+  // an instruction. This function is supposed to be overrided by
+  // the target dependent packetizer.
+  virtual void initPacketizerState(void) { return; }
+
+  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+  virtual bool ignorePseudoInstruction(MachineInstr *I,
+                                       MachineBasicBlock *MBB) {
+    return false;
+  }
+
+  // isSoloInstruction - return true if instruction MI can not be packetized
+  // with any other instruction, which means that MI itself is a packet.
+  virtual bool isSoloInstruction(MachineInstr *MI) {
+    return true;
+  }
+
+  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+  // together.
+  virtual bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+    return false;
+  }
+
+  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+  // and SUJ.
+  virtual bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
+    return false;
+  }
+
+};
+}
+
+#endif
diff --git a/include/llvm/CodeGen/EdgeBundles.h b/include/llvm/CodeGen/EdgeBundles.h
index 8aab3c64f170..a1d29b1f02c5 100644
--- a/include/llvm/CodeGen/EdgeBundles.h
+++ b/include/llvm/CodeGen/EdgeBundles.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/IntEqClasses.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
@@ -61,7 +62,7 @@ private:
 /// Specialize WriteGraph, the standard implementation won't work.
 raw_ostream &WriteGraph(raw_ostream &O, const EdgeBundles &G,
                         bool ShortNames = false,
-                        const std::string &Title = "");
+                        const Twine &Title = "");
 
 } // end namespace llvm
 
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 18202d93b460..e57c8b18c63e 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -21,9 +21,11 @@
 namespace llvm {
 
 class AllocaInst;
+class Constant;
 class ConstantFP;
 class FunctionLoweringInfo;
 class Instruction;
+class LoadInst;
 class MachineBasicBlock;
 class MachineConstantPool;
 class MachineFunction;
@@ -36,7 +38,8 @@ class TargetLowering;
 class TargetMachine;
 class TargetRegisterClass;
 class TargetRegisterInfo;
-class LoadInst;
+class User;
+class Value;
 
 /// FastISel - This is a fast-path instruction selection class that
 /// generates poor code and doesn't support illegal types or non-trivial
@@ -358,6 +361,8 @@ private:
 
   bool SelectExtractValue(const User *I);
 
+  bool SelectInsertValue(const User *I);
+
   /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
   /// Emit code to ensure constants are copied into registers when needed.
   /// Remember the virtual registers that need to be added to the Machine PHI
@@ -378,6 +383,10 @@ private:
 
   /// hasTrivialKill - Test whether the given value has exactly one use.
   bool hasTrivialKill(const Value *V) const;
+
+  /// removeDeadCode - Remove all dead instructions between the I and E.
+  void removeDeadCode(MachineBasicBlock::iterator I,
+                      MachineBasicBlock::iterator E);
 };
 
 }
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 09dac8547a62..8cf22eca4fa6 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -21,10 +21,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SmallSet.h"
-#endif
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
@@ -98,8 +96,8 @@ public:
   MachineBasicBlock::iterator InsertPt;
 
 #ifndef NDEBUG
-  SmallSet<const Instruction *, 8> CatchInfoLost;
-  SmallSet<const Instruction *, 8> CatchInfoFound;
+  SmallPtrSet<const Instruction *, 8> CatchInfoLost;
+  SmallPtrSet<const Instruction *, 8> CatchInfoFound;
 #endif
 
   struct LiveOutInfo {
@@ -112,7 +110,7 @@ public:
 
   /// VisitedBBs - The set of basic blocks visited thus far by instruction
   /// selection.
-  DenseSet<const BasicBlock*> VisitedBBs;
+  SmallPtrSet<const BasicBlock*, 4> VisitedBBs;
 
   /// PHINodesToUpdate - A list of phi instructions whose operand list will
   /// be updated after processing the current basic block.
@@ -202,7 +200,7 @@ public:
   /// setArgumentFrameIndex - Record frame index for the byval
   /// argument.
   void setArgumentFrameIndex(const Argument *A, int FI);
-  
+
   /// getArgumentFrameIndex - Get frame index for the byval argument.
   int getArgumentFrameIndex(const Argument *A);
 
@@ -211,16 +209,18 @@ private:
   IndexedMap<LiveOutInfo, VirtReg2IndexFunctor> LiveOutRegInfo;
 };
 
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void ComputeUsesVAFloatArgument(const CallInst &I, MachineModuleInfo *MMI);
+
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
 /// call, and add them to the specified machine basic block.
 void AddCatchInfo(const CallInst &I,
                   MachineModuleInfo *MMI, MachineBasicBlock *MBB);
 
-/// CopyCatchInfo - Copy catch information from SuccBB (or one of its
-/// successors) to LPad.
-void CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
-                   MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
-
 /// AddLandingPadInfo - Extract the exception handling information from the
 /// landingpad instruction and add them to the specified machine module info.
 void AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h
index cd760dba92aa..1cbd36abfbf8 100644
--- a/include/llvm/CodeGen/GCStrategy.h
+++ b/include/llvm/CodeGen/GCStrategy.h
@@ -37,6 +37,7 @@
 #define LLVM_CODEGEN_GCSTRATEGY_H
 
 #include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Support/Registry.h"
 #include <string>
 
@@ -68,6 +69,8 @@ namespace llvm {
     bool CustomReadBarriers;   //< Default is to insert loads.
     bool CustomWriteBarriers;  //< Default is to insert stores.
     bool CustomRoots;          //< Default is to pass through to backend.
+    bool CustomSafePoints;     //< Default is to use NeededSafePoints
+                               //  to find safe points.
     bool InitRoots;            //< If set, roots are nulled during lowering.
     bool UsesMetadata;         //< If set, backend must emit metadata tables.
     
@@ -87,7 +90,9 @@ namespace llvm {
 
     /// needsSafePoitns - True if safe points of any kind are required. By
     //                    default, none are recorded.
-    bool needsSafePoints() const { return NeededSafePoints != 0; }
+    bool needsSafePoints() const {
+      return CustomSafePoints || NeededSafePoints != 0;
+    }
     
     /// needsSafePoint(Kind) - True if the given kind of safe point is
     //                          required. By default, none are recorded.
@@ -109,6 +114,11 @@ namespace llvm {
     ///               can generate a stack map. If true, then
     //                performCustomLowering must delete them.
     bool customRoots() const { return CustomRoots; }
+
+    /// customSafePoints - By default, the GC analysis will find safe
+    ///                    points according to NeededSafePoints. If true,
+    ///                    then findCustomSafePoints must create them.
+    bool customSafePoints() const { return CustomSafePoints; }
     
     /// initializeRoots - If set, gcroot intrinsics should initialize their
     //                    allocas to null before the first use. This is
@@ -135,6 +145,7 @@ namespace llvm {
     /// which the LLVM IR can be modified.
     virtual bool initializeCustomLowering(Module &F);
     virtual bool performCustomLowering(Function &F);
+    virtual bool findCustomSafePoints(GCFunctionInfo& FI, MachineFunction& MF);
   };
   
 }
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 184e96dc4766..ab8ab5dd7b4e 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -57,7 +57,7 @@ namespace ISD {
     AssertSext, AssertZext,
 
     // Various leaf nodes.
-    BasicBlock, VALUETYPE, CONDCODE, Register,
+    BasicBlock, VALUETYPE, CONDCODE, Register, RegisterMask,
     Constant, ConstantFP,
     GlobalAddress, GlobalTLSAddress, FrameIndex,
     JumpTable, ConstantPool, ExternalSymbol, BlockAddress,
@@ -107,13 +107,6 @@ namespace ISD {
     // and returns an outchain.
     EH_SJLJ_LONGJMP,
 
-    // OUTCHAIN = EH_SJLJ_DISPATCHSETUP(INCHAIN, setjmpval)
-    // This corresponds to the eh.sjlj.dispatchsetup intrinsic. It takes an
-    // input chain and the value returning from setjmp as inputs and returns an
-    // outchain. By default, this does nothing. Targets can lower this to unwind
-    // setup code if needed.
-    EH_SJLJ_DISPATCHSETUP,
-
     // TargetConstant* - Like Constant*, but the DAG does not do any folding,
     // simplification, or lowering of the constant. They are used for constants
     // which are known to fit in the immediate fields of their users, or for
@@ -319,6 +312,9 @@ namespace ISD {
     /// Byte Swap and Counting operators.
     BSWAP, CTTZ, CTLZ, CTPOP,
 
+    /// Bit counting operators with an undefined result for zero inputs.
+    CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
+
     // Select(COND, TRUEVAL, FALSEVAL).  If the type of the boolean COND is not
     // i1 then the high bits must conform to getBooleanContents.
     SELECT,
@@ -327,6 +323,9 @@ namespace ISD {
     // and #2), returning a vector result.  All vectors have the same length.
     // Much like the scalar select and setcc, each bit in the condition selects
     // whether the corresponding result element is taken from op #1 or op #2.
+    // At first, the VSELECT condition is of vXi1 type. Later, targets may change
+    // the condition type in order to match the VSELECT node using a a pattern.
+    // The condition follows the BooleanContent format of the target.
     VSELECT,
 
     // Select with condition operator - This selects between a true value and
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index 88e22d6a24ce..89f00e91f78e 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -51,6 +51,7 @@ class Function;
 /// occurred, more memory is allocated, and we reemit the code into it.
 /// 
 class JITCodeEmitter : public MachineCodeEmitter {
+  virtual void anchor();
 public:
   virtual ~JITCodeEmitter() {}
 
diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h
index 1ed2547ca6cf..8fb31aa8a6d1 100644
--- a/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -85,11 +85,11 @@ namespace llvm {
 
     virtual void dump(ScheduleDAG* DAG) const;
 
-    // ScheduledNode - As nodes are scheduled, we look to see if there are any
+    // scheduledNode - As nodes are scheduled, we look to see if there are any
     // successor nodes that have a single unscheduled predecessor.  If so, that
     // single predecessor has a higher priority, since scheduling it will make
     // the node available.
-    void ScheduledNode(SUnit *Node);
+    void scheduledNode(SUnit *Node);
 
 private:
     void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index 0271c5d85222..eb01f66c3129 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -153,6 +153,7 @@ private:
 /// LexicalScope - This class is used to track scope information.
 ///
 class LexicalScope {
+  virtual void anchor();
 
 public:
   LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A)
@@ -208,7 +209,7 @@ public:
       Parent->closeInsnRange(NewScope);
   }
 
-  /// dominates - Return true if current scope dominsates given lexical scope.
+  /// dominates - Return true if current scope dominates given lexical scope.
   bool dominates(const LexicalScope *S) const {
     if (S == this)
       return true;
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 098dd0b3bf73..46dd004609f5 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -31,24 +31,20 @@ namespace {
       if (std::getenv("bar") != (char*) -1)
         return;
 
-      (void) llvm::createDeadMachineInstructionElimPass();
-
       (void) llvm::createFastRegisterAllocator();
       (void) llvm::createBasicRegisterAllocator();
-      (void) llvm::createLinearScanRegisterAllocator();
       (void) llvm::createGreedyRegisterAllocator();
       (void) llvm::createDefaultPBQPRegisterAllocator();
 
       llvm::linkOcamlGC();
       llvm::linkShadowStackGC();
-      
+
       (void) llvm::createBURRListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
-      (void) llvm::createTDRRListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
       (void) llvm::createSourceListDAGScheduler(NULL,llvm::CodeGenOpt::Default);
       (void) llvm::createHybridListDAGScheduler(NULL,llvm::CodeGenOpt::Default);
-      (void) llvm::createTDListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
       (void) llvm::createFastDAGScheduler(NULL, llvm::CodeGenOpt::Default);
       (void) llvm::createDefaultScheduler(NULL, llvm::CodeGenOpt::Default);
+      (void) llvm::createVLIWDAGScheduler(NULL, llvm::CodeGenOpt::Default);
 
     }
   } ForceCodegenLinking; // Force link by creating a global definition.
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 2288c1a98b2d..a6008ab33761 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -43,12 +43,10 @@ namespace llvm {
   private:
     enum {
       HAS_PHI_KILL    = 1,
-      REDEF_BY_EC     = 1 << 1,
-      IS_PHI_DEF      = 1 << 2,
-      IS_UNUSED       = 1 << 3
+      IS_PHI_DEF      = 1 << 1,
+      IS_UNUSED       = 1 << 2
     };
 
-    MachineInstr *copy;
     unsigned char flags;
 
   public:
@@ -57,23 +55,22 @@ namespace llvm {
     /// The ID number of this value.
     unsigned id;
 
-    /// The index of the defining instruction (if isDefAccurate() returns true).
+    /// The index of the defining instruction.
     SlotIndex def;
 
     /// VNInfo constructor.
-    VNInfo(unsigned i, SlotIndex d, MachineInstr *c)
-      : copy(c), flags(0), id(i), def(d)
+    VNInfo(unsigned i, SlotIndex d)
+      : flags(0), id(i), def(d)
     { }
 
     /// VNInfo construtor, copies values from orig, except for the value number.
     VNInfo(unsigned i, const VNInfo &orig)
-      : copy(orig.copy), flags(orig.flags), id(i), def(orig.def)
+      : flags(orig.flags), id(i), def(orig.def)
     { }
 
     /// Copy from the parameter into this VNInfo.
     void copyFrom(VNInfo &src) {
       flags = src.flags;
-      copy = src.copy;
       def = src.def;
     }
 
@@ -86,19 +83,6 @@ namespace llvm {
       flags = (flags | VNI->flags) & ~IS_UNUSED;
     }
 
-    /// For a register interval, if this VN was definied by a copy instr
-    /// getCopy() returns a pointer to it, otherwise returns 0.
-    /// For a stack interval the behaviour of this method is undefined.
-    MachineInstr* getCopy() const { return copy; }
-    /// For a register interval, set the copy member.
-    /// This method should not be called on stack intervals as it may lead to
-    /// undefined behavior.
-    void setCopy(MachineInstr *c) { copy = c; }
-
-    /// isDefByCopy - Return true when this value was defined by a copy-like
-    /// instruction as determined by MachineInstr::isCopyLike.
-    bool isDefByCopy() const { return copy != 0; }
-
     /// Returns true if one or more kills are PHI nodes.
     /// Obsolete, do not use!
     bool hasPHIKill() const { return flags & HAS_PHI_KILL; }
@@ -110,17 +94,6 @@ namespace llvm {
         flags &= ~HAS_PHI_KILL;
     }
 
-    /// Returns true if this value is re-defined by an early clobber somewhere
-    /// during the live range.
-    bool hasRedefByEC() const { return flags & REDEF_BY_EC; }
-    /// Set the "redef by early clobber" flag on this value.
-    void setHasRedefByEC(bool hasRedef) {
-      if (hasRedef)
-        flags |= REDEF_BY_EC;
-      else
-        flags &= ~REDEF_BY_EC;
-    }
-
     /// Returns true if this value is defined by a PHI instruction (or was,
     /// PHI instrucions may have been eliminated).
     bool isPHIDef() const { return flags & IS_PHI_DEF; }
@@ -294,10 +267,9 @@ namespace llvm {
 
     /// getNextValue - Create a new value number and return it.  MIIdx specifies
     /// the instruction that defines the value number.
-    VNInfo *getNextValue(SlotIndex def, MachineInstr *CopyMI,
-                         VNInfo::Allocator &VNInfoAllocator) {
+    VNInfo *getNextValue(SlotIndex def, VNInfo::Allocator &VNInfoAllocator) {
       VNInfo *VNI =
-        new (VNInfoAllocator) VNInfo((unsigned)valnos.size(), def, CopyMI);
+        new (VNInfoAllocator) VNInfo((unsigned)valnos.size(), def);
       valnos.push_back(VNI);
       return VNI;
     }
@@ -381,7 +353,7 @@ namespace llvm {
     /// point is not contained in the half-open live range. It is usually the
     /// getDefIndex() slot following its last use.
     bool killedAt(SlotIndex index) const {
-      const_iterator r = find(index.getUseIndex());
+      const_iterator r = find(index.getRegSlot(true));
       return r != end() && r->end == index;
     }
 
@@ -405,6 +377,14 @@ namespace llvm {
       return I == end() ? 0 : &*I;
     }
 
+    const LiveRange *getLiveRangeBefore(SlotIndex Idx) const {
+      return getLiveRangeContaining(Idx.getPrevSlot());
+    }
+
+    LiveRange *getLiveRangeBefore(SlotIndex Idx) {
+      return getLiveRangeContaining(Idx.getPrevSlot());
+    }
+
     /// getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
     VNInfo *getVNInfoAt(SlotIndex Idx) const {
       const_iterator I = FindLiveRangeContaining(Idx);
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 8ca58b82c8bb..76201c96f915 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -63,8 +63,34 @@ namespace llvm {
     /// allocatableRegs_ - A bit vector of allocatable registers.
     BitVector allocatableRegs_;
 
-    /// CloneMIs - A list of clones as result of re-materialization.
-    std::vector<MachineInstr*> CloneMIs;
+    /// reservedRegs_ - A bit vector of reserved registers.
+    BitVector reservedRegs_;
+
+    /// RegMaskSlots - Sorted list of instructions with register mask operands.
+    /// Always use the 'r' slot, RegMasks are normal clobbers, not early
+    /// clobbers.
+    SmallVector<SlotIndex, 8> RegMaskSlots;
+
+    /// RegMaskBits - This vector is parallel to RegMaskSlots, it holds a
+    /// pointer to the corresponding register mask.  This pointer can be
+    /// recomputed as:
+    ///
+    ///   MI = Indexes->getInstructionFromIndex(RegMaskSlot[N]);
+    ///   unsigned OpNum = findRegMaskOperand(MI);
+    ///   RegMaskBits[N] = MI->getOperand(OpNum).getRegMask();
+    ///
+    /// This is kept in a separate vector partly because some standard
+    /// libraries don't support lower_bound() with mixed objects, partly to
+    /// improve locality when searching in RegMaskSlots.
+    /// Also see the comment in LiveInterval::find().
+    SmallVector<const uint32_t*, 8> RegMaskBits;
+
+    /// For each basic block number, keep (begin, size) pairs indexing into the
+    /// RegMaskSlots and RegMaskBits arrays.
+    /// Note that basic block numbers may not be layout contiguous, that's why
+    /// we can't just keep track of the first register mask in each basic
+    /// block.
+    SmallVector<std::pair<unsigned, unsigned>, 8> RegMaskBlocks;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -105,6 +131,12 @@ namespace llvm {
       return allocatableRegs_.test(reg);
     }
 
+    /// isReserved - is the physical register reg reserved in the current
+    /// function
+    bool isReserved(unsigned reg) const {
+      return reservedRegs_.test(reg);
+    }
+
     /// getScaledIntervalSize - get the size of an interval in "units,"
     /// where every function is composed of one thousand units.  This
     /// measure scales properly with empty index slots in the function.
@@ -125,19 +157,6 @@ namespace llvm {
       return (unsigned)(IntervalPercentage * indexes_->getFunctionSize());
     }
 
-    /// conflictsWithPhysReg - Returns true if the specified register is used or
-    /// defined during the duration of the specified interval. Copies to and
-    /// from li.reg are allowed. This method is only able to analyze simple
-    /// ranges that stay within a single basic block. Anything else is
-    /// considered a conflict.
-    bool conflictsWithPhysReg(const LiveInterval &li, VirtRegMap &vrm,
-                              unsigned reg);
-
-    /// conflictsWithAliasRef - Similar to conflictsWithPhysRegRef except
-    /// it checks for alias uses and defs.
-    bool conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
-                                   SmallPtrSet<MachineInstr*,32> &JoinedCopies);
-
     // Interval creation
     LiveInterval &getOrCreateInterval(unsigned reg) {
       Reg2IntervalMap::iterator I = r2iMap_.find(reg);
@@ -177,14 +196,6 @@ namespace llvm {
       return indexes_;
     }
 
-    SlotIndex getZeroIndex() const {
-      return indexes_->getZeroIndex();
-    }
-
-    SlotIndex getInvalidIndex() const {
-      return indexes_->getInvalidIndex();
-    }
-
     /// isNotInMIMap - returns true if the specified machine instr has been
     /// removed or was never entered in the map.
     bool isNotInMIMap(const MachineInstr* Instr) const {
@@ -216,21 +227,11 @@ namespace llvm {
       return li.liveAt(getMBBStartIdx(mbb));
     }
 
-    LiveRange* findEnteringRange(LiveInterval &li,
-                                 const MachineBasicBlock *mbb) {
-      return li.getLiveRangeContaining(getMBBStartIdx(mbb));
-    }
-
     bool isLiveOutOfMBB(const LiveInterval &li,
                         const MachineBasicBlock *mbb) const {
       return li.liveAt(getMBBEndIdx(mbb).getPrevSlot());
     }
 
-    LiveRange* findExitingRange(LiveInterval &li,
-                                const MachineBasicBlock *mbb) {
-      return li.getLiveRangeContaining(getMBBEndIdx(mbb).getPrevSlot());
-    }
-
     MachineBasicBlock* getMBBFromIndex(SlotIndex index) const {
       return indexes_->getMBBFromIndex(index);
     }
@@ -247,19 +248,11 @@ namespace llvm {
       indexes_->replaceMachineInstrInMaps(MI, NewMI);
     }
 
-    void InsertMBBInMaps(MachineBasicBlock *MBB) {
-      indexes_->insertMBBInMaps(MBB);
-    }
-
     bool findLiveInMBBs(SlotIndex Start, SlotIndex End,
                         SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
       return indexes_->findLiveInMBBs(Start, End, MBBs);
     }
 
-    void renumber() {
-      indexes_->renumberIndexes();
-    }
-
     VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -271,20 +264,6 @@ namespace llvm {
     /// print - Implement the dump method.
     virtual void print(raw_ostream &O, const Module* = 0) const;
 
-    /// addIntervalsForSpills - Create new intervals for spilled defs / uses of
-    /// the given interval. FIXME: It also returns the weight of the spill slot
-    /// (if any is created) by reference. This is temporary.
-    std::vector<LiveInterval*>
-    addIntervalsForSpills(const LiveInterval& i,
-                          const SmallVectorImpl<LiveInterval*> *SpillIs,
-                          const MachineLoopInfo *loopInfo, VirtRegMap& vrm);
-
-    /// spillPhysRegAroundRegDefsUses - Spill the specified physical register
-    /// around all defs and uses of the specified interval. Return true if it
-    /// was able to cut its interval.
-    bool spillPhysRegAroundRegDefsUses(const LiveInterval &li,
-                                       unsigned PhysReg, VirtRegMap &vrm);
-
     /// isReMaterializable - Returns true if every definition of MI of every
     /// val# of the specified interval is re-materializable. Also returns true
     /// by reference if all of the defs are load instructions.
@@ -292,33 +271,71 @@ namespace llvm {
                             const SmallVectorImpl<LiveInterval*> *SpillIs,
                             bool &isLoad);
 
-    /// isReMaterializable - Returns true if the definition MI of the specified
-    /// val# of the specified interval is re-materializable.
-    bool isReMaterializable(const LiveInterval &li, const VNInfo *ValNo,
-                            MachineInstr *MI);
+    /// intervalIsInOneMBB - If LI is confined to a single basic block, return
+    /// a pointer to that block.  If LI is live in to or out of any block,
+    /// return NULL.
+    MachineBasicBlock *intervalIsInOneMBB(const LiveInterval &LI) const;
+
+    /// addKillFlags - Add kill flags to any instruction that kills a virtual
+    /// register.
+    void addKillFlags();
 
-    /// getRepresentativeReg - Find the largest super register of the specified
-    /// physical register.
-    unsigned getRepresentativeReg(unsigned Reg) const;
+    /// handleMove - call this method to notify LiveIntervals that
+    /// instruction 'mi' has been moved within a basic block. This will update
+    /// the live intervals for all operands of mi. Moves between basic blocks
+    /// are not supported.
+    void handleMove(MachineInstr* MI);
 
-    /// getNumConflictsWithPhysReg - Return the number of uses and defs of the
-    /// specified interval that conflicts with the specified physical register.
-    unsigned getNumConflictsWithPhysReg(const LiveInterval &li,
-                                        unsigned PhysReg) const;
+    /// moveIntoBundle - Update intervals for operands of MI so that they
+    /// begin/end on the SlotIndex for BundleStart.
+    ///
+    /// Requires MI and BundleStart to have SlotIndexes, and assumes
+    /// existing liveness is accurate. BundleStart should be the first
+    /// instruction in the Bundle.
+    void handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart);
+
+    // Register mask functions.
+    //
+    // Machine instructions may use a register mask operand to indicate that a
+    // large number of registers are clobbered by the instruction.  This is
+    // typically used for calls.
+    //
+    // For compile time performance reasons, these clobbers are not recorded in
+    // the live intervals for individual physical registers.  Instead,
+    // LiveIntervalAnalysis maintains a sorted list of instructions with
+    // register mask operands.
+
+    /// getRegMaskSlots - Returns a sorted array of slot indices of all
+    /// instructions with register mask operands.
+    ArrayRef<SlotIndex> getRegMaskSlots() const { return RegMaskSlots; }
+
+    /// getRegMaskSlotsInBlock - Returns a sorted array of slot indices of all
+    /// instructions with register mask operands in the basic block numbered
+    /// MBBNum.
+    ArrayRef<SlotIndex> getRegMaskSlotsInBlock(unsigned MBBNum) const {
+      std::pair<unsigned, unsigned> P = RegMaskBlocks[MBBNum];
+      return getRegMaskSlots().slice(P.first, P.second);
+    }
 
-    /// intervalIsInOneMBB - Returns true if the specified interval is entirely
-    /// within a single basic block.
-    bool intervalIsInOneMBB(const LiveInterval &li) const;
+    /// getRegMaskBits() - Returns an array of register mask pointers
+    /// corresponding to getRegMaskSlots().
+    ArrayRef<const uint32_t*> getRegMaskBits() const { return RegMaskBits; }
 
-    /// getLastSplitPoint - Return the last possible insertion point in mbb for
-    /// spilling and splitting code. This is the first terminator, or the call
-    /// instruction if li is live into a landing pad successor.
-    MachineBasicBlock::iterator getLastSplitPoint(const LiveInterval &li,
-                                                  MachineBasicBlock *mbb) const;
+    /// getRegMaskBitsInBlock - Returns an array of mask pointers corresponding
+    /// to getRegMaskSlotsInBlock(MBBNum).
+    ArrayRef<const uint32_t*> getRegMaskBitsInBlock(unsigned MBBNum) const {
+      std::pair<unsigned, unsigned> P = RegMaskBlocks[MBBNum];
+      return getRegMaskBits().slice(P.first, P.second);
+    }
 
-    /// addKillFlags - Add kill flags to any instruction that kills a virtual
-    /// register.
-    void addKillFlags();
+    /// checkRegMaskInterference - Test if LI is live across any register mask
+    /// instructions, and compute a bit mask of physical registers that are not
+    /// clobbered by any of them.
+    ///
+    /// Returns false if LI doesn't cross any register mask instructions. In
+    /// that case, the bit vector is not filled in.
+    bool checkRegMaskInterference(LiveInterval &LI,
+                                  BitVector &UsableRegs);
 
   private:
     /// computeIntervals - Compute live intervals.
@@ -351,13 +368,12 @@ namespace llvm {
     void handlePhysicalRegisterDef(MachineBasicBlock* mbb,
                                    MachineBasicBlock::iterator mi,
                                    SlotIndex MIIdx, MachineOperand& MO,
-                                   LiveInterval &interval,
-                                   MachineInstr *CopyMI);
+                                   LiveInterval &interval);
 
     /// handleLiveInRegister - Create interval for a livein register.
     void handleLiveInRegister(MachineBasicBlock* mbb,
                               SlotIndex MIIdx,
-                              LiveInterval &interval, bool isAlias = false);
+                              LiveInterval &interval);
 
     /// getReMatImplicitUse - If the remat definition MI has one (for now, we
     /// only allow one) virtual register operand, then its uses are implicitly
@@ -379,88 +395,12 @@ namespace llvm {
                             const SmallVectorImpl<LiveInterval*> *SpillIs,
                             bool &isLoad);
 
-    /// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
-    /// slot / to reg or any rematerialized load into ith operand of specified
-    /// MI. If it is successul, MI is updated with the newly created MI and
-    /// returns true.
-    bool tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm,
-                              MachineInstr *DefMI, SlotIndex InstrIdx,
-                              SmallVector<unsigned, 2> &Ops,
-                              bool isSS, int FrameIndex, unsigned Reg);
-
-    /// canFoldMemoryOperand - Return true if the specified load / store
-    /// folding is possible.
-    bool canFoldMemoryOperand(MachineInstr *MI,
-                              SmallVector<unsigned, 2> &Ops,
-                              bool ReMatLoadSS) const;
-
-    /// anyKillInMBBAfterIdx - Returns true if there is a kill of the specified
-    /// VNInfo that's after the specified index but is within the basic block.
-    bool anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI,
-                              MachineBasicBlock *MBB,
-                              SlotIndex Idx) const;
-
-    /// hasAllocatableSuperReg - Return true if the specified physical register
-    /// has any super register that's allocatable.
-    bool hasAllocatableSuperReg(unsigned Reg) const;
-
-    /// SRInfo - Spill / restore info.
-    struct SRInfo {
-      SlotIndex index;
-      unsigned vreg;
-      bool canFold;
-      SRInfo(SlotIndex i, unsigned vr, bool f)
-        : index(i), vreg(vr), canFold(f) {}
-    };
-
-    bool alsoFoldARestore(int Id, SlotIndex index, unsigned vr,
-                          BitVector &RestoreMBBs,
-                          DenseMap<unsigned,std::vector<SRInfo> >&RestoreIdxes);
-    void eraseRestoreInfo(int Id, SlotIndex index, unsigned vr,
-                          BitVector &RestoreMBBs,
-                          DenseMap<unsigned,std::vector<SRInfo> >&RestoreIdxes);
-
-    /// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
-    /// spilled and create empty intervals for their uses.
-    void handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
-                              const TargetRegisterClass* rc,
-                              std::vector<LiveInterval*> &NewLIs);
-
-    /// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
-    /// interval on to-be re-materialized operands of MI) with new register.
-    void rewriteImplicitOps(const LiveInterval &li,
-                           MachineInstr *MI, unsigned NewVReg, VirtRegMap &vrm);
-
-    /// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper
-    /// functions for addIntervalsForSpills to rewrite uses / defs for the given
-    /// live range.
-    bool rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
-        bool TrySplit, SlotIndex index, SlotIndex end,
-        MachineInstr *MI, MachineInstr *OrigDefMI, MachineInstr *DefMI,
-        unsigned Slot, int LdSlot,
-        bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
-        VirtRegMap &vrm, const TargetRegisterClass* rc,
-        SmallVector<int, 4> &ReMatIds, const MachineLoopInfo *loopInfo,
-        unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
-        DenseMap<unsigned,unsigned> &MBBVRegsMap,
-        std::vector<LiveInterval*> &NewLIs);
-    void rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
-        LiveInterval::Ranges::const_iterator &I,
-        MachineInstr *OrigDefMI, MachineInstr *DefMI, unsigned Slot, int LdSlot,
-        bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
-        VirtRegMap &vrm, const TargetRegisterClass* rc,
-        SmallVector<int, 4> &ReMatIds, const MachineLoopInfo *loopInfo,
-        BitVector &SpillMBBs,
-        DenseMap<unsigned,std::vector<SRInfo> > &SpillIdxes,
-        BitVector &RestoreMBBs,
-        DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes,
-        DenseMap<unsigned,unsigned> &MBBVRegsMap,
-        std::vector<LiveInterval*> &NewLIs);
-
     static LiveInterval* createInterval(unsigned Reg);
 
     void printInstrs(raw_ostream &O) const;
     void dumpInstrs() const;
+
+    class HMEditor;
   };
 } // End llvm namespace
 
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
new file mode 100644
index 000000000000..57a619389fa5
--- /dev/null
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -0,0 +1,207 @@
+//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//
+// The parent register is never changed. Instead, a number of new virtual
+// registers are created and added to the newRegs vector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
+#define LLVM_CODEGEN_LIVERANGEEDIT_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class VirtRegMap;
+
+class LiveRangeEdit {
+public:
+  /// Callback methods for LiveRangeEdit owners.
+  class Delegate {
+    virtual void anchor();
+  public:
+    /// Called immediately before erasing a dead machine instruction.
+    virtual void LRE_WillEraseInstruction(MachineInstr *MI) {}
+
+    /// Called when a virtual register is no longer used. Return false to defer
+    /// its deletion from LiveIntervals.
+    virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
+
+    /// Called before shrinking the live range of a virtual register.
+    virtual void LRE_WillShrinkVirtReg(unsigned) {}
+
+    /// Called after cloning a virtual register.
+    /// This is used for new registers representing connected components of Old.
+    virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
+
+    virtual ~Delegate() {}
+  };
+
+private:
+  LiveInterval &parent_;
+  SmallVectorImpl<LiveInterval*> &newRegs_;
+  MachineRegisterInfo &MRI;
+  LiveIntervals &LIS;
+  VirtRegMap *VRM;
+  const TargetInstrInfo &TII;
+  Delegate *const delegate_;
+
+  /// firstNew_ - Index of the first register added to newRegs_.
+  const unsigned firstNew_;
+
+  /// scannedRemattable_ - true when remattable values have been identified.
+  bool scannedRemattable_;
+
+  /// remattable_ - Values defined by remattable instructions as identified by
+  /// tii.isTriviallyReMaterializable().
+  SmallPtrSet<const VNInfo*,4> remattable_;
+
+  /// rematted_ - Values that were actually rematted, and so need to have their
+  /// live range trimmed or entirely removed.
+  SmallPtrSet<const VNInfo*,4> rematted_;
+
+  /// scanRemattable - Identify the parent_ values that may rematerialize.
+  void scanRemattable(AliasAnalysis *aa);
+
+  /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+  /// OrigIdx are also available with the same value at UseIdx.
+  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+                          SlotIndex UseIdx);
+
+  /// foldAsLoad - If LI has a single use and a single def that can be folded as
+  /// a load, eliminate the register by folding the def into the use.
+  bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead);
+
+public:
+  /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
+  /// @param parent The register being spilled or split.
+  /// @param newRegs List to receive any new registers created. This needn't be
+  ///                empty initially, any existing registers are ignored.
+  /// @param MF The MachineFunction the live range edit is taking place in.
+  /// @param lis The collection of all live intervals in this function.
+  /// @param vrm Map of virtual registers to physical registers for this
+  ///            function.  If NULL, no virtual register map updates will
+  ///            be done.  This could be the case if called before Regalloc.
+  LiveRangeEdit(LiveInterval &parent,
+                SmallVectorImpl<LiveInterval*> &newRegs,
+                MachineFunction &MF,
+                LiveIntervals &lis,
+                VirtRegMap *vrm,
+                Delegate *delegate = 0)
+    : parent_(parent), newRegs_(newRegs),
+      MRI(MF.getRegInfo()), LIS(lis), VRM(vrm),
+      TII(*MF.getTarget().getInstrInfo()),
+      delegate_(delegate),
+      firstNew_(newRegs.size()),
+      scannedRemattable_(false) {}
+
+  LiveInterval &getParent() const { return parent_; }
+  unsigned getReg() const { return parent_.reg; }
+
+  /// Iterator for accessing the new registers added by this edit.
+  typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
+  iterator begin() const { return newRegs_.begin()+firstNew_; }
+  iterator end() const { return newRegs_.end(); }
+  unsigned size() const { return newRegs_.size()-firstNew_; }
+  bool empty() const { return size() == 0; }
+  LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
+
+  ArrayRef<LiveInterval*> regs() const {
+    return makeArrayRef(newRegs_).slice(firstNew_);
+  }
+
+  /// createFrom - Create a new virtual register based on OldReg.
+  LiveInterval &createFrom(unsigned OldReg);
+
+  /// create - Create a new register with the same class and original slot as
+  /// parent.
+  LiveInterval &create() {
+    return createFrom(getReg());
+  }
+
+  /// anyRematerializable - Return true if any parent values may be
+  /// rematerializable.
+  /// This function must be called before any rematerialization is attempted.
+  bool anyRematerializable(AliasAnalysis*);
+
+  /// checkRematerializable - Manually add VNI to the list of rematerializable
+  /// values if DefMI may be rematerializable.
+  bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
+                             AliasAnalysis*);
+
+  /// Remat - Information needed to rematerialize at a specific location.
+  struct Remat {
+    VNInfo *ParentVNI;      // parent_'s value at the remat location.
+    MachineInstr *OrigMI;   // Instruction defining ParentVNI.
+    explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
+  };
+
+  /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
+  /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
+  /// When cheapAsAMove is set, only cheap remats are allowed.
+  bool canRematerializeAt(Remat &RM,
+                          SlotIndex UseIdx,
+                          bool cheapAsAMove);
+
+  /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
+  /// instruction into MBB before MI. The new instruction is mapped, but
+  /// liveness is not updated.
+  /// Return the SlotIndex of the new instruction.
+  SlotIndex rematerializeAt(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            unsigned DestReg,
+                            const Remat &RM,
+                            const TargetRegisterInfo&,
+                            bool Late = false);
+
+  /// markRematerialized - explicitly mark a value as rematerialized after doing
+  /// it manually.
+  void markRematerialized(const VNInfo *ParentVNI) {
+    rematted_.insert(ParentVNI);
+  }
+
+  /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
+  bool didRematerialize(const VNInfo *ParentVNI) const {
+    return rematted_.count(ParentVNI);
+  }
+
+  /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
+  /// to erase it from LIS.
+  void eraseVirtReg(unsigned Reg);
+
+  /// eliminateDeadDefs - Try to delete machine instructions that are now dead
+  /// (allDefsAreDead returns true). This may cause live intervals to be trimmed
+  /// and further dead efs to be eliminated.
+  /// RegsBeingSpilled lists registers currently being spilled by the register
+  /// allocator.  These registers should not be split into new intervals
+  /// as currently those new intervals are not guaranteed to spill.
+  void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
+                         ArrayRef<unsigned> RegsBeingSpilled 
+                          = ArrayRef<unsigned>());
+
+  /// calculateRegClassAndHint - Recompute register class and hint for each new
+  /// register.
+  void calculateRegClassAndHint(MachineFunction&,
+                                const MachineLoopInfo&);
+};
+
+}
+
+#endif
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index 7ba901fc28a4..d4bb409e0605 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -85,17 +85,11 @@ public:
     ///
     SparseBitVector<> AliveBlocks;
 
-    /// NumUses - Number of uses of this register across the entire function.
-    ///
-    unsigned NumUses;
-
     /// Kills - List of MachineInstruction's which are the last use of this
     /// virtual register (kill it) in their basic block.
     ///
     std::vector<MachineInstr*> Kills;
 
-    VarInfo() : NumUses(0) {}
-
     /// removeKill - Delete a kill corresponding to the specified
     /// machine instruction. Returns true if there was a kill
     /// corresponding to this instruction, false otherwise.
@@ -166,6 +160,9 @@ private:   // Intermediate data structures
   /// the last use of the whole register.
   bool HandlePhysRegKill(unsigned Reg, MachineInstr *MI);
 
+  /// HandleRegMask - Call HandlePhysRegKill for all registers clobbered by Mask.
+  void HandleRegMask(const MachineOperand&);
+
   void HandlePhysRegUse(unsigned Reg, MachineInstr *MI);
   void HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
                         SmallVector<unsigned, 4> &Defs);
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 5a20e952b9cc..ef9c0c200584 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -77,6 +77,7 @@ class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
   /// (disable optimization).
   std::vector<uint32_t> Weights;
   typedef std::vector<uint32_t>::iterator weight_iterator;
+  typedef std::vector<uint32_t>::const_iterator const_weight_iterator;
 
   /// LiveIns - Keep track of the physical registers that are livein of
   /// the basicblock.
@@ -84,8 +85,9 @@ class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
 
   /// Alignment - Alignment of the basic block. Zero if the basic block does
   /// not need to be aligned.
+  /// The alignment is specified as log2(bytes).
   unsigned Alignment;
-  
+
   /// IsLandingPad - Indicate that this basic block is entered via an
   /// exception handler.
   bool IsLandingPad;
@@ -115,6 +117,10 @@ public:
   /// "(null)".
   StringRef getName() const;
 
+  /// getFullName - Return a formatted string to identify this block and its
+  /// parent function.
+  std::string getFullName() const;
+
   /// hasAddressTaken - Test whether this block is potentially the target
   /// of an indirect branch.
   bool hasAddressTaken() const { return AddressTaken; }
@@ -128,10 +134,89 @@ public:
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
-  typedef Instructions::iterator                              iterator;
-  typedef Instructions::const_iterator                  const_iterator;
-  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
-  typedef std::reverse_iterator<iterator>             reverse_iterator;
+
+  /// bundle_iterator - MachineBasicBlock iterator that automatically skips over
+  /// MIs that are inside bundles (i.e. walk top level MIs only).
+  template<typename Ty, typename IterTy>
+  class bundle_iterator
+    : public std::iterator<std::bidirectional_iterator_tag, Ty, ptrdiff_t> {
+    IterTy MII;
+
+  public:
+    bundle_iterator(IterTy mii) : MII(mii) {
+      assert(!MII->isInsideBundle() &&
+             "It's not legal to initialize bundle_iterator with a bundled MI");
+    }
+
+    bundle_iterator(Ty &mi) : MII(mi) {
+      assert(!mi.isInsideBundle() &&
+             "It's not legal to initialize bundle_iterator with a bundled MI");
+    }
+    bundle_iterator(Ty *mi) : MII(mi) {
+      assert((!mi || !mi->isInsideBundle()) &&
+             "It's not legal to initialize bundle_iterator with a bundled MI");
+    }
+    bundle_iterator(const bundle_iterator &I) : MII(I.MII) {}
+    bundle_iterator() : MII(0) {}
+
+    Ty &operator*() const { return *MII; }
+    Ty *operator->() const { return &operator*(); }
+
+    operator Ty*() const { return MII; }
+
+    bool operator==(const bundle_iterator &x) const {
+      return MII == x.MII;
+    }
+    bool operator!=(const bundle_iterator &x) const {
+      return !operator==(x);
+    }
+
+    // Increment and decrement operators...
+    bundle_iterator &operator--() {      // predecrement - Back up
+      do {
+        --MII;
+      } while (MII->isInsideBundle());
+      return *this;
+    }
+    bundle_iterator &operator++() {      // preincrement - Advance
+      do {
+        ++MII;
+      } while (MII->isInsideBundle());
+      return *this;
+    }
+    bundle_iterator operator--(int) {    // postdecrement operators...
+      bundle_iterator tmp = *this;
+      do {
+        --MII;
+      } while (MII->isInsideBundle());
+      return tmp;
+    }
+    bundle_iterator operator++(int) {    // postincrement operators...
+      bundle_iterator tmp = *this;
+      do {
+        ++MII;
+      } while (MII->isInsideBundle());
+      return tmp;
+    }
+
+    IterTy getInstrIterator() const {
+      return MII;
+    }
+  };
+
+  typedef Instructions::iterator                                 instr_iterator;
+  typedef Instructions::const_iterator                     const_instr_iterator;
+  typedef std::reverse_iterator<instr_iterator>          reverse_instr_iterator;
+  typedef
+  std::reverse_iterator<const_instr_iterator>      const_reverse_instr_iterator;
+
+  typedef
+  bundle_iterator<MachineInstr,instr_iterator>                         iterator;
+  typedef
+  bundle_iterator<const MachineInstr,const_instr_iterator>       const_iterator;
+  typedef std::reverse_iterator<const_iterator>          const_reverse_iterator;
+  typedef std::reverse_iterator<iterator>                      reverse_iterator;
+
 
   unsigned size() const { return (unsigned)Insts.size(); }
   bool empty() const { return Insts.empty(); }
@@ -141,15 +226,53 @@ public:
   const MachineInstr& front() const { return Insts.front(); }
   const MachineInstr& back()  const { return Insts.back(); }
 
+  instr_iterator                instr_begin()       { return Insts.begin();  }
+  const_instr_iterator          instr_begin() const { return Insts.begin();  }
+  instr_iterator                  instr_end()       { return Insts.end();    }
+  const_instr_iterator            instr_end() const { return Insts.end();    }
+  reverse_instr_iterator       instr_rbegin()       { return Insts.rbegin(); }
+  const_reverse_instr_iterator instr_rbegin() const { return Insts.rbegin(); }
+  reverse_instr_iterator       instr_rend  ()       { return Insts.rend();   }
+  const_reverse_instr_iterator instr_rend  () const { return Insts.rend();   }
+
   iterator                begin()       { return Insts.begin();  }
   const_iterator          begin() const { return Insts.begin();  }
-  iterator                  end()       { return Insts.end();    }
-  const_iterator            end() const { return Insts.end();    }
-  reverse_iterator       rbegin()       { return Insts.rbegin(); }
-  const_reverse_iterator rbegin() const { return Insts.rbegin(); }
+  iterator                  end()       {
+    instr_iterator II = instr_end();
+    if (II != instr_begin()) {
+      while (II->isInsideBundle())
+        --II;
+    }
+    return II;
+  }
+  const_iterator            end() const {
+    const_instr_iterator II = instr_end();
+    if (II != instr_begin()) {
+      while (II->isInsideBundle())
+        --II;
+    }
+    return II;
+  }
+  reverse_iterator       rbegin()       {
+    reverse_instr_iterator II = instr_rbegin();
+    if (II != instr_rend()) {
+      while (II->isInsideBundle())
+        ++II;
+    }
+    return II;
+  }
+  const_reverse_iterator rbegin() const {
+    const_reverse_instr_iterator II = instr_rbegin();
+    if (II != instr_rend()) {
+      while (II->isInsideBundle())
+        ++II;
+    }
+    return II;
+  }
   reverse_iterator       rend  ()       { return Insts.rend();   }
   const_reverse_iterator rend  () const { return Insts.rend();   }
 
+
   // Machine-CFG iterators
   typedef std::vector<MachineBasicBlock *>::iterator       pred_iterator;
   typedef std::vector<MachineBasicBlock *>::const_iterator const_pred_iterator;
@@ -219,10 +342,12 @@ public:
   bool            livein_empty() const { return LiveIns.empty(); }
 
   /// getAlignment - Return alignment of the basic block.
+  /// The alignment is specified as log2(bytes).
   ///
   unsigned getAlignment() const { return Alignment; }
 
   /// setAlignment - Set alignment of the basic block.
+  /// The alignment is specified as log2(bytes).
   ///
   void setAlignment(unsigned Align) { Alignment = Align; }
 
@@ -239,7 +364,7 @@ public:
   const MachineBasicBlock *getLandingPadSuccessor() const;
 
   // Code Layout methods.
-  
+
   /// moveBefore/moveAfter - move 'this' block before or after the specified
   /// block.  This only moves the block, it does not modify the CFG or adjust
   /// potential fall-throughs at the end of the block.
@@ -286,7 +411,7 @@ public:
   /// in transferSuccessors, and update PHI operands in the successor blocks
   /// which refer to fromMBB to refer to this.
   void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB);
-  
+
   /// isSuccessor - Return true if the specified MBB is a successor of this
   /// block.
   bool isSuccessor(const MachineBasicBlock *MBB) const;
@@ -304,7 +429,7 @@ public:
   /// branch to do so (e.g., a table jump).  True is a conservative answer.
   bool canFallThrough();
 
-  /// Returns a pointer to the first instructon in this block that is not a 
+  /// Returns a pointer to the first instructon in this block that is not a
   /// PHINode instruction. When adding instruction to the beginning of the
   /// basic block, they should be added before the returned value, not before
   /// the first instruction, which might be PHI.
@@ -320,18 +445,16 @@ public:
   /// instruction of this basic block. If a terminator does not exist,
   /// it returns end()
   iterator getFirstTerminator();
+  const_iterator getFirstTerminator() const;
 
-  const_iterator getFirstTerminator() const {
-    return const_cast<MachineBasicBlock*>(this)->getFirstTerminator();
-  }
+  /// getFirstInstrTerminator - Same getFirstTerminator but it ignores bundles
+  /// and return an instr_iterator instead.
+  instr_iterator getFirstInstrTerminator();
 
   /// getLastNonDebugInstr - returns an iterator to the last non-debug
   /// instruction in the basic block, or end()
   iterator getLastNonDebugInstr();
-
-  const_iterator getLastNonDebugInstr() const {
-    return const_cast<MachineBasicBlock*>(this)->getLastNonDebugInstr();
-  }
+  const_iterator getLastNonDebugInstr() const;
 
   /// SplitCriticalEdge - Split the critical edge from this block to the
   /// given successor block, and return the newly created block, or null
@@ -344,38 +467,88 @@ public:
   void pop_front() { Insts.pop_front(); }
   void pop_back() { Insts.pop_back(); }
   void push_back(MachineInstr *MI) { Insts.push_back(MI); }
+
   template<typename IT>
-  void insert(iterator I, IT S, IT E) { Insts.insert(I, S, E); }
-  iterator insert(iterator I, MachineInstr *M) { return Insts.insert(I, M); }
-  iterator insertAfter(iterator I, MachineInstr *M) { 
-    return Insts.insertAfter(I, M); 
+  void insert(instr_iterator I, IT S, IT E) {
+    Insts.insert(I, S, E);
+  }
+  instr_iterator insert(instr_iterator I, MachineInstr *M) {
+    return Insts.insert(I, M);
+  }
+  instr_iterator insertAfter(instr_iterator I, MachineInstr *M) {
+    return Insts.insertAfter(I, M);
   }
 
-  // erase - Remove the specified element or range from the instruction list.
-  // These functions delete any instructions removed.
-  //
-  iterator erase(iterator I)             { return Insts.erase(I); }
-  iterator erase(iterator I, iterator E) { return Insts.erase(I, E); }
-  MachineInstr *remove(MachineInstr *I)  { return Insts.remove(I); }
-  void clear()                           { Insts.clear(); }
+  template<typename IT>
+  void insert(iterator I, IT S, IT E) {
+    Insts.insert(I.getInstrIterator(), S, E);
+  }
+  iterator insert(iterator I, MachineInstr *M) {
+    return Insts.insert(I.getInstrIterator(), M);
+  }
+  iterator insertAfter(iterator I, MachineInstr *M) {
+    return Insts.insertAfter(I.getInstrIterator(), M);
+  }
+
+  /// erase - Remove the specified element or range from the instruction list.
+  /// These functions delete any instructions removed.
+  ///
+  instr_iterator erase(instr_iterator I) {
+    return Insts.erase(I);
+  }
+  instr_iterator erase(instr_iterator I, instr_iterator E) {
+    return Insts.erase(I, E);
+  }
+  instr_iterator erase_instr(MachineInstr *I) {
+    instr_iterator MII(I);
+    return erase(MII);
+  }
+
+  iterator erase(iterator I);
+  iterator erase(iterator I, iterator E) {
+    return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
+  }
+  iterator erase(MachineInstr *I) {
+    iterator MII(I);
+    return erase(MII);
+  }
+
+  /// remove - Remove the instruction from the instruction list. This function
+  /// does not delete the instruction. WARNING: Note, if the specified
+  /// instruction is a bundle this function will remove all the bundled
+  /// instructions as well. It is up to the caller to keep a list of the
+  /// bundled instructions and re-insert them if desired. This function is
+  /// *not recommended* for manipulating instructions with bundles. Use
+  /// splice instead.
+  MachineInstr *remove(MachineInstr *I);
+  void clear() {
+    Insts.clear();
+  }
 
   /// splice - Take an instruction from MBB 'Other' at the position From,
   /// and insert it into this MBB right before 'where'.
-  void splice(iterator where, MachineBasicBlock *Other, iterator From) {
+  void splice(instr_iterator where, MachineBasicBlock *Other,
+              instr_iterator From) {
     Insts.splice(where, Other->Insts, From);
   }
+  void splice(iterator where, MachineBasicBlock *Other, iterator From);
 
   /// splice - Take a block of instructions from MBB 'Other' in the range [From,
   /// To), and insert them into this MBB right before 'where'.
+  void splice(instr_iterator where, MachineBasicBlock *Other, instr_iterator From,
+              instr_iterator To) {
+    Insts.splice(where, Other->Insts, From, To);
+  }
   void splice(iterator where, MachineBasicBlock *Other, iterator From,
               iterator To) {
-    Insts.splice(where, Other->Insts, From, To);
+    Insts.splice(where.getInstrIterator(), Other->Insts,
+                 From.getInstrIterator(), To.getInstrIterator());
   }
 
   /// removeFromParent - This method unlinks 'this' from the containing
   /// function, and returns it, but does not delete it.
   MachineBasicBlock *removeFromParent();
-  
+
   /// eraseFromParent - This method unlinks 'this' from the containing
   /// function and deletes it.
   void eraseFromParent();
@@ -396,7 +569,10 @@ public:
 
   /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
   /// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
-  DebugLoc findDebugLoc(MachineBasicBlock::iterator &MBBI);
+  DebugLoc findDebugLoc(instr_iterator MBBI);
+  DebugLoc findDebugLoc(iterator MBBI) {
+    return findDebugLoc(MBBI.getInstrIterator());
+  }
 
   // Debugging methods.
   void dump() const;
@@ -418,13 +594,14 @@ private:
   /// getWeightIterator - Return weight iterator corresponding to the I
   /// successor iterator.
   weight_iterator getWeightIterator(succ_iterator I);
+  const_weight_iterator getWeightIterator(const_succ_iterator I) const;
 
   friend class MachineBranchProbabilityInfo;
 
   /// getSuccWeight - Return weight of the edge from this block to MBB. This
   /// method should NOT be called directly, but by using getEdgeWeight method
   /// from MachineBranchProbabilityInfo class.
-  uint32_t getSuccWeight(MachineBasicBlock *succ);
+  uint32_t getSuccWeight(const MachineBasicBlock *succ) const;
 
 
   // Methods used to maintain doubly linked list of blocks...
diff --git a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index 416d40bf3098..a9c7bf7dbc60 100644
--- a/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -20,6 +20,7 @@
 
 namespace llvm {
 
+class MachineBasicBlock;
 class MachineBranchProbabilityInfo;
 template<class BlockT, class FunctionT, class BranchProbInfoT>
 class BlockFrequencyImpl;
@@ -28,7 +29,8 @@ class BlockFrequencyImpl;
 /// machine basic block frequencies.
 class MachineBlockFrequencyInfo : public MachineFunctionPass {
 
-  BlockFrequencyImpl<MachineBasicBlock, MachineFunction, MachineBranchProbabilityInfo> *MBFI;
+  BlockFrequencyImpl<MachineBasicBlock, MachineFunction,
+                     MachineBranchProbabilityInfo> *MBFI;
 
 public:
   static char ID;
@@ -46,7 +48,7 @@ public:
   /// that we should not rely on the value itself, but only on the comparison to
   /// the other block frequencies. We do this to avoid using of floating points.
   ///
-  BlockFrequency getBlockFreq(MachineBasicBlock *MBB) const;
+  BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
 };
 
 }
diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index d9673e2197c8..af4db7d6bde6 100644
--- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -25,6 +25,7 @@ class raw_ostream;
 class MachineBasicBlock;
 
 class MachineBranchProbabilityInfo : public ImmutablePass {
+  virtual void anchor();
 
   // Default weight value. Used when we don't have information about the edge.
   // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of
@@ -34,9 +35,6 @@ class MachineBranchProbabilityInfo : public ImmutablePass {
   // weight to just "inherit" the non-zero weight of an adjacent successor.
   static const uint32_t DEFAULT_WEIGHT = 16;
 
-  // Get sum of the block successors' weights.
-  uint32_t getSumForBlock(MachineBasicBlock *MBB) const;
-
 public:
   static char ID;
 
@@ -51,17 +49,27 @@ public:
 
   // Return edge weight. If we don't have any informations about it - return
   // DEFAULT_WEIGHT.
-  uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst) const;
+  uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+                         const MachineBasicBlock *Dst) const;
+
+  // Get sum of the block successors' weights, potentially scaling them to fit
+  // within 32-bits. If scaling is required, sets Scale based on the necessary
+  // adjustment. Any edge weights used with the sum should be divided by Scale.
+  uint32_t getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const;
 
   // A 'Hot' edge is an edge which probability is >= 80%.
   bool isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const;
 
   // Return a hot successor for the block BB or null if there isn't one.
+  // NB: This routine's complexity is linear on the number of successors.
   MachineBasicBlock *getHotSucc(MachineBasicBlock *MBB) const;
 
   // Return a probability as a fraction between 0 (0% probability) and
   // 1 (100% probability), however the value is never equal to 0, and can be 1
   // only iff SRC block has only one successor.
+  // NB: This routine's complexity is linear on the number of successors of
+  // Src. Querying sequentially for each successor's probability is a quadratic
+  // query pattern.
   BranchProbability getEdgeProbability(MachineBasicBlock *Src,
                                        MachineBasicBlock *Dst) const;
 
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
index 428aada7ba13..86e8f27877e2 100644
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -20,6 +20,8 @@
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
 
+#include <string>
+
 namespace llvm {
 
 class MachineBasicBlock;
@@ -49,6 +51,7 @@ class MCSymbol;
 /// occurred, more memory is allocated, and we reemit the code into it.
 /// 
 class MachineCodeEmitter {
+  virtual void anchor();
 protected:
   /// BufferBegin/BufferEnd - Pointers to the start and end of the memory
   /// allocated for this code buffer.
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index 29f4f443bf7b..d6d65a24defb 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -34,6 +34,7 @@ class raw_ostream;
 /// Abstract base class for all machine specific constantpool value subclasses.
 ///
 class MachineConstantPoolValue {
+  virtual void anchor();
   Type *Ty;
 
 public:
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index ab944a2335f7..82a4ac821b69 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -84,7 +84,8 @@ public:
 
     // Loop through the basic block until we find A or B.
     MachineBasicBlock::iterator I = BBA->begin();
-    for (; &*I != A && &*I != B; ++I) /*empty*/;
+    for (; &*I != A && &*I != B; ++I)
+      /*empty*/ ;
 
     //if(!DT.IsPostDominators) {
       // A dominates B if it is found first in the basic block.
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index b347ca8e680a..44402a9e68fb 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -465,7 +465,7 @@ public:
   bool isSpillSlotObjectIndex(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
            "Invalid Object Idx!");
-    return Objects[ObjectIdx+NumFixedObjects].isSpillSlot;;
+    return Objects[ObjectIdx+NumFixedObjects].isSpillSlot;
   }
 
   /// isDeadObjectIndex - Returns true if the specified index corresponds to
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 6e08f7b0503d..dda2dc708769 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -120,10 +120,12 @@ class MachineFunction {
   /// Alignment - The alignment of the function.
   unsigned Alignment;
 
-  /// CallsSetJmp - True if the function calls setjmp or sigsetjmp. This is used
-  /// to limit optimizations which cannot reason about the control flow of
-  /// setjmp.
-  bool CallsSetJmp;
+  /// ExposesReturnsTwice - True if the function calls setjmp or related
+  /// functions with attribute "returns twice", but doesn't have
+  /// the attribute itself.
+  /// This is used to limit optimizations which cannot reason
+  /// about the control flow of such functions.
+  bool ExposesReturnsTwice;
 
   MachineFunction(const MachineFunction &); // DO NOT IMPLEMENT
   void operator=(const MachineFunction&);   // DO NOT IMPLEMENT
@@ -187,20 +189,22 @@ public:
   ///
   void setAlignment(unsigned A) { Alignment = A; }
 
-  /// EnsureAlignment - Make sure the function is at least 'A' bits aligned.
+  /// EnsureAlignment - Make sure the function is at least 1 << A bytes aligned.
   void EnsureAlignment(unsigned A) {
     if (Alignment < A) Alignment = A;
   }
 
-  /// callsSetJmp - Returns true if the function calls setjmp or sigsetjmp.
-  bool callsSetJmp() const {
-    return CallsSetJmp;
+  /// exposesReturnsTwice - Returns true if the function calls setjmp or
+  /// any other similar functions with attribute "returns twice" without
+  /// having the attribute itself.
+  bool exposesReturnsTwice() const {
+    return ExposesReturnsTwice;
   }
 
-  /// setCallsSetJmp - Set a flag that indicates if there's a call to setjmp or
-  /// sigsetjmp.
-  void setCallsSetJmp(bool B) {
-    CallsSetJmp = B;
+  /// setCallsSetJmp - Set a flag that indicates if there's a call to
+  /// a "returns twice" function.
+  void setExposesReturnsTwice(bool B) {
+    ExposesReturnsTwice = B;
   }
   
   /// getInfo - Keep track of various per-function pieces of information for
@@ -376,7 +380,8 @@ public:
   MachineMemOperand *getMachineMemOperand(MachinePointerInfo PtrInfo,
                                           unsigned f, uint64_t s,
                                           unsigned base_alignment,
-                                          const MDNode *TBAAInfo = 0);
+                                          const MDNode *TBAAInfo = 0,
+                                          const MDNode *Ranges = 0);
   
   /// getMachineMemOperand - Allocate a new MachineMemOperand by copying
   /// an existing one, adjusting by an offset and using the given size.
@@ -437,6 +442,7 @@ template <> struct GraphTraits<MachineFunction*> :
   typedef MachineFunction::iterator nodes_iterator;
   static nodes_iterator nodes_begin(MachineFunction *F) { return F->begin(); }
   static nodes_iterator nodes_end  (MachineFunction *F) { return F->end(); }
+  static unsigned       size       (MachineFunction *F) { return F->size(); }
 };
 template <> struct GraphTraits<const MachineFunction*> :
   public GraphTraits<const MachineBasicBlock*> {
@@ -452,6 +458,9 @@ template <> struct GraphTraits<const MachineFunction*> :
   static nodes_iterator nodes_end  (const MachineFunction *F) {
     return F->end();
   }
+  static unsigned       size       (const MachineFunction *F)  {
+    return F->size();
+  }
 };
 
 
diff --git a/include/llvm/CodeGen/MachineFunctionAnalysis.h b/include/llvm/CodeGen/MachineFunctionAnalysis.h
index 50676ad4ad49..50ea2062f30c 100644
--- a/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ b/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -26,17 +26,14 @@ class MachineFunction;
 struct MachineFunctionAnalysis : public FunctionPass {
 private:
   const TargetMachine &TM;
-  CodeGenOpt::Level OptLevel;
   MachineFunction *MF;
   unsigned NextFnNum;
 public:
   static char ID;
-  explicit MachineFunctionAnalysis(const TargetMachine &tm,
-                                   CodeGenOpt::Level OL = CodeGenOpt::Default);
+  explicit MachineFunctionAnalysis(const TargetMachine &tm);
   ~MachineFunctionAnalysis();
 
   MachineFunction &getMF() const { return *MF; }
-  CodeGenOpt::Level getOptLevel() const { return OptLevel; }
   
   virtual const char* getPassName() const {
     return "Machine Function Analysis";
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index cae38f34709d..65093d7e7ad6 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Target/TargetOpcodes.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/STLExtras.h"
@@ -53,9 +54,11 @@ public:
   };
 
   enum MIFlag {
-    NoFlags    = 0,
-    FrameSetup = 1 << 0                 // Instruction is used as a part of
+    NoFlags      = 0,
+    FrameSetup   = 1 << 0,              // Instruction is used as a part of
                                         // function frame setup code.
+    InsideBundle = 1 << 1               // Instruction is inside a bundle (not
+                                        // the first MI in a bundle)
   };
 private:
   const MCInstrDesc *MCID;              // Instruction descriptor.
@@ -71,9 +74,10 @@ private:
                                         // anything other than to convey comment
                                         // information to AsmPrinter.
 
+  uint16_t NumMemRefs;                  // information on memory references
+  mmo_iterator MemRefs;
+
   std::vector<MachineOperand> Operands; // the operands
-  mmo_iterator MemRefs;                 // information on memory references
-  mmo_iterator MemRefsEnd;
   MachineBasicBlock *Parent;            // Pointer to the owning basic block.
   DebugLoc debugLoc;                    // Source line information.
 
@@ -148,6 +152,12 @@ public:
     AsmPrinterFlags |= (uint8_t)Flag;
   }
 
+  /// clearAsmPrinterFlag - clear specific AsmPrinter flags
+  ///
+  void clearAsmPrinterFlag(CommentFlag Flag) {
+    AsmPrinterFlags &= ~Flag;
+  }
+
   /// getFlags - Return the MI flags bitvector.
   uint8_t getFlags() const {
     return Flags;
@@ -167,12 +177,64 @@ public:
     Flags = flags;
   }
 
-  /// clearAsmPrinterFlag - clear specific AsmPrinter flags
+  /// clearFlag - Clear a MI flag.
+  void clearFlag(MIFlag Flag) {
+    Flags &= ~((uint8_t)Flag);
+  }
+
+  /// isInsideBundle - Return true if MI is in a bundle (but not the first MI
+  /// in a bundle).
   ///
-  void clearAsmPrinterFlag(CommentFlag Flag) {
-    AsmPrinterFlags &= ~Flag;
+  /// A bundle looks like this before it's finalized:
+  ///   ----------------
+  ///   |      MI      |
+  ///   ----------------
+  ///          |
+  ///   ----------------
+  ///   |      MI    * |
+  ///   ----------------
+  ///          |
+  ///   ----------------
+  ///   |      MI    * |
+  ///   ----------------
+  /// In this case, the first MI starts a bundle but is not inside a bundle, the
+  /// next 2 MIs are considered "inside" the bundle.
+  ///
+  /// After a bundle is finalized, it looks like this:
+  ///   ----------------
+  ///   |    Bundle    |
+  ///   ----------------
+  ///          |
+  ///   ----------------
+  ///   |      MI    * |
+  ///   ----------------
+  ///          |
+  ///   ----------------
+  ///   |      MI    * |
+  ///   ----------------
+  ///          |
+  ///   ----------------
+  ///   |      MI    * |
+  ///   ----------------
+  /// The first instruction has the special opcode "BUNDLE". It's not "inside"
+  /// a bundle, but the next three MIs are.
+  bool isInsideBundle() const {
+    return getFlag(InsideBundle);
   }
 
+  /// setIsInsideBundle - Set InsideBundle bit.
+  ///
+  void setIsInsideBundle(bool Val = true) {
+    if (Val)
+      setFlag(InsideBundle);
+    else
+      clearFlag(InsideBundle);
+  }
+
+  /// isBundled - Return true if this instruction part of a bundle. This is true
+  /// if either itself or its following instruction is marked "InsideBundle".
+  bool isBundled() const;
+
   /// getDebugLoc - Returns the debug location id of this MachineInstr.
   ///
   DebugLoc getDebugLoc() const { return debugLoc; }
@@ -223,15 +285,285 @@ public:
 
   /// Access to memory operands of the instruction
   mmo_iterator memoperands_begin() const { return MemRefs; }
-  mmo_iterator memoperands_end() const { return MemRefsEnd; }
-  bool memoperands_empty() const { return MemRefsEnd == MemRefs; }
+  mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
+  bool memoperands_empty() const { return NumMemRefs == 0; }
 
   /// hasOneMemOperand - Return true if this instruction has exactly one
   /// MachineMemOperand.
   bool hasOneMemOperand() const {
-    return MemRefsEnd - MemRefs == 1;
+    return NumMemRefs == 1;
+  }
+
+  /// API for querying MachineInstr properties. They are the same as MCInstrDesc
+  /// queries but they are bundle aware.
+
+  enum QueryType {
+    IgnoreBundle,    // Ignore bundles
+    AnyInBundle,     // Return true if any instruction in bundle has property
+    AllInBundle      // Return true if all instructions in bundle have property
+  };
+
+  /// hasProperty - Return true if the instruction (or in the case of a bundle,
+  /// the instructions inside the bundle) has the specified property.
+  /// The first argument is the property being queried.
+  /// The second argument indicates whether the query should look inside
+  /// instruction bundles.
+  bool hasProperty(unsigned MCFlag, QueryType Type = AnyInBundle) const {
+    // Inline the fast path.
+    if (Type == IgnoreBundle || !isBundle())
+      return getDesc().getFlags() & (1 << MCFlag);
+
+    // If we have a bundle, take the slow path.
+    return hasPropertyInBundle(1 << MCFlag, Type);
+  }
+
+  /// isVariadic - Return true if this instruction can have a variable number of
+  /// operands.  In this case, the variable operands will be after the normal
+  /// operands but before the implicit definitions and uses (if any are
+  /// present).
+  bool isVariadic(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Variadic, Type);
+  }
+
+  /// hasOptionalDef - Set if this instruction has an optional definition, e.g.
+  /// ARM instructions which can set condition code if 's' bit is set.
+  bool hasOptionalDef(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::HasOptionalDef, Type);
+  }
+
+  /// isPseudo - Return true if this is a pseudo instruction that doesn't
+  /// correspond to a real machine instruction.
+  ///
+  bool isPseudo(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Pseudo, Type);
+  }
+
+  bool isReturn(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::Return, Type);
+  }
+
+  bool isCall(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::Call, Type);
+  }
+
+  /// isBarrier - Returns true if the specified instruction stops control flow
+  /// from executing the instruction immediately following it.  Examples include
+  /// unconditional branches and return instructions.
+  bool isBarrier(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::Barrier, Type);
+  }
+
+  /// isTerminator - Returns true if this instruction part of the terminator for
+  /// a basic block.  Typically this is things like return and branch
+  /// instructions.
+  ///
+  /// Various passes use this to insert code into the bottom of a basic block,
+  /// but before control flow occurs.
+  bool isTerminator(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::Terminator, Type);
+  }
+
+  /// isBranch - Returns true if this is a conditional, unconditional, or
+  /// indirect branch.  Predicates below can be used to discriminate between
+  /// these cases, and the TargetInstrInfo::AnalyzeBranch method can be used to
+  /// get more information.
+  bool isBranch(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::Branch, Type);
+  }
+
+  /// isIndirectBranch - Return true if this is an indirect branch, such as a
+  /// branch through a register.
+  bool isIndirectBranch(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::IndirectBranch, Type);
+  }
+
+  /// isConditionalBranch - Return true if this is a branch which may fall
+  /// through to the next instruction or may transfer control flow to some other
+  /// block.  The TargetInstrInfo::AnalyzeBranch method can be used to get more
+  /// information about this branch.
+  bool isConditionalBranch(QueryType Type = AnyInBundle) const {
+    return isBranch(Type) & !isBarrier(Type) & !isIndirectBranch(Type);
+  }
+
+  /// isUnconditionalBranch - Return true if this is a branch which always
+  /// transfers control flow to some other block.  The
+  /// TargetInstrInfo::AnalyzeBranch method can be used to get more information
+  /// about this branch.
+  bool isUnconditionalBranch(QueryType Type = AnyInBundle) const {
+    return isBranch(Type) & isBarrier(Type) & !isIndirectBranch(Type);
+  }
+
+  // isPredicable - Return true if this instruction has a predicate operand that
+  // controls execution.  It may be set to 'always', or may be set to other
+  /// values.   There are various methods in TargetInstrInfo that can be used to
+  /// control and modify the predicate in this instruction.
+  bool isPredicable(QueryType Type = AllInBundle) const {
+    // If it's a bundle than all bundled instructions must be predicable for this
+    // to return true.
+    return hasProperty(MCID::Predicable, Type);
+  }
+
+  /// isCompare - Return true if this instruction is a comparison.
+  bool isCompare(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Compare, Type);
+  }
+
+  /// isMoveImmediate - Return true if this instruction is a move immediate
+  /// (including conditional moves) instruction.
+  bool isMoveImmediate(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::MoveImm, Type);
   }
 
+  /// isBitcast - Return true if this instruction is a bitcast instruction.
+  ///
+  bool isBitcast(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Bitcast, Type);
+  }
+
+  /// isNotDuplicable - Return true if this instruction cannot be safely
+  /// duplicated.  For example, if the instruction has a unique labels attached
+  /// to it, duplicating it would cause multiple definition errors.
+  bool isNotDuplicable(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::NotDuplicable, Type);
+  }
+
+  /// hasDelaySlot - Returns true if the specified instruction has a delay slot
+  /// which must be filled by the code generator.
+  bool hasDelaySlot(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::DelaySlot, Type);
+  }
+
+  /// canFoldAsLoad - Return true for instructions that can be folded as
+  /// memory operands in other instructions. The most common use for this
+  /// is instructions that are simple loads from memory that don't modify
+  /// the loaded value in any way, but it can also be used for instructions
+  /// that can be expressed as constant-pool loads, such as V_SETALLONES
+  /// on x86, to allow them to be folded when it is beneficial.
+  /// This should only be set on instructions that return a value in their
+  /// only virtual register definition.
+  bool canFoldAsLoad(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::FoldableAsLoad, Type);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Side Effect Analysis
+  //===--------------------------------------------------------------------===//
+
+  /// mayLoad - Return true if this instruction could possibly read memory.
+  /// Instructions with this flag set are not necessarily simple load
+  /// instructions, they may load a value and modify it, for example.
+  bool mayLoad(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::MayLoad, Type);
+  }
+
+
+  /// mayStore - Return true if this instruction could possibly modify memory.
+  /// Instructions with this flag set are not necessarily simple store
+  /// instructions, they may store a modified value based on their operands, or
+  /// may not actually modify anything, for example.
+  bool mayStore(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::MayStore, Type);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Flags that indicate whether an instruction can be modified by a method.
+  //===--------------------------------------------------------------------===//
+
+  /// isCommutable - Return true if this may be a 2- or 3-address
+  /// instruction (of the form "X = op Y, Z, ..."), which produces the same
+  /// result if Y and Z are exchanged.  If this flag is set, then the
+  /// TargetInstrInfo::commuteInstruction method may be used to hack on the
+  /// instruction.
+  ///
+  /// Note that this flag may be set on instructions that are only commutable
+  /// sometimes.  In these cases, the call to commuteInstruction will fail.
+  /// Also note that some instructions require non-trivial modification to
+  /// commute them.
+  bool isCommutable(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::Commutable, Type);
+  }
+
+  /// isConvertibleTo3Addr - Return true if this is a 2-address instruction
+  /// which can be changed into a 3-address instruction if needed.  Doing this
+  /// transformation can be profitable in the register allocator, because it
+  /// means that the instruction can use a 2-address form if possible, but
+  /// degrade into a less efficient form if the source and dest register cannot
+  /// be assigned to the same register.  For example, this allows the x86
+  /// backend to turn a "shl reg, 3" instruction into an LEA instruction, which
+  /// is the same speed as the shift but has bigger code size.
+  ///
+  /// If this returns true, then the target must implement the
+  /// TargetInstrInfo::convertToThreeAddress method for this instruction, which
+  /// is allowed to fail if the transformation isn't valid for this specific
+  /// instruction (e.g. shl reg, 4 on x86).
+  ///
+  bool isConvertibleTo3Addr(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::ConvertibleTo3Addr, Type);
+  }
+
+  /// usesCustomInsertionHook - Return true if this instruction requires
+  /// custom insertion support when the DAG scheduler is inserting it into a
+  /// machine basic block.  If this is true for the instruction, it basically
+  /// means that it is a pseudo instruction used at SelectionDAG time that is
+  /// expanded out into magic code by the target when MachineInstrs are formed.
+  ///
+  /// If this is true, the TargetLoweringInfo::InsertAtEndOfBasicBlock method
+  /// is used to insert this into the MachineBasicBlock.
+  bool usesCustomInsertionHook(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::UsesCustomInserter, Type);
+  }
+
+  /// hasPostISelHook - Return true if this instruction requires *adjustment*
+  /// after instruction selection by calling a target hook. For example, this
+  /// can be used to fill in ARM 's' optional operand depending on whether
+  /// the conditional flag register is used.
+  bool hasPostISelHook(QueryType Type = IgnoreBundle) const {
+    return hasProperty(MCID::HasPostISelHook, Type);
+  }
+
+  /// isRematerializable - Returns true if this instruction is a candidate for
+  /// remat.  This flag is deprecated, please don't use it anymore.  If this
+  /// flag is set, the isReallyTriviallyReMaterializable() method is called to
+  /// verify the instruction is really rematable.
+  bool isRematerializable(QueryType Type = AllInBundle) const {
+    // It's only possible to re-mat a bundle if all bundled instructions are
+    // re-materializable.
+    return hasProperty(MCID::Rematerializable, Type);
+  }
+
+  /// isAsCheapAsAMove - Returns true if this instruction has the same cost (or
+  /// less) than a move instruction. This is useful during certain types of
+  /// optimizations (e.g., remat during two-address conversion or machine licm)
+  /// where we would like to remat or hoist the instruction, but not if it costs
+  /// more than moving the instruction into the appropriate register. Note, we
+  /// are not marking copies from and to the same register class with this flag.
+  bool isAsCheapAsAMove(QueryType Type = AllInBundle) const {
+    // Only returns true for a bundle if all bundled instructions are cheap.
+    // FIXME: This probably requires a target hook.
+    return hasProperty(MCID::CheapAsAMove, Type);
+  }
+
+  /// hasExtraSrcRegAllocReq - Returns true if this instruction source operands
+  /// have special register allocation requirements that are not captured by the
+  /// operand register classes. e.g. ARM::STRD's two source registers must be an
+  /// even / odd pair, ARM::STM registers have to be in ascending order.
+  /// Post-register allocation passes should not attempt to change allocations
+  /// for sources of instructions with this flag.
+  bool hasExtraSrcRegAllocReq(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::ExtraSrcRegAllocReq, Type);
+  }
+
+  /// hasExtraDefRegAllocReq - Returns true if this instruction def operands
+  /// have special register allocation requirements that are not captured by the
+  /// operand register classes. e.g. ARM::LDRD's two def registers must be an
+  /// even / odd pair, ARM::LDM registers have to be in ascending order.
+  /// Post-register allocation passes should not attempt to change allocations
+  /// for definitions of instructions with this flag.
+  bool hasExtraDefRegAllocReq(QueryType Type = AnyInBundle) const {
+    return hasProperty(MCID::ExtraDefRegAllocReq, Type);
+  }
+
+
   enum MICheckType {
     CheckDefs,      // Check all operands for equality
     CheckKillDead,  // Check all operands including kill / dead markers
@@ -281,6 +613,9 @@ public:
   bool isRegSequence() const {
     return getOpcode() == TargetOpcode::REG_SEQUENCE;
   }
+  bool isBundle() const {
+    return getOpcode() == TargetOpcode::BUNDLE;
+  }
   bool isCopy() const {
     return getOpcode() == TargetOpcode::COPY;
   }
@@ -300,6 +635,9 @@ public:
       getOperand(0).getSubReg() == getOperand(1).getSubReg();
   }
 
+  /// getBundleSize - Return the number of instructions inside the MI bundle.
+  unsigned getBundleSize() const;
+
   /// readsRegister - Return true if the MachineInstr reads the specified
   /// register. If TargetRegisterInfo is passed, then it also checks if there
   /// is a read of a super-register.
@@ -372,6 +710,7 @@ public:
   /// that are not dead are skipped. If Overlap is true, then it also looks for
   /// defs that merely overlap the specified register. If TargetRegisterInfo is
   /// non-null, then it also checks if there is a def of a super-register.
+  /// This may also return a register mask operand when Overlap is true.
   int findRegisterDefOperandIdx(unsigned Reg,
                                 bool isDead = false, bool Overlap = false,
                                 const TargetRegisterInfo *TRI = NULL) const;
@@ -416,7 +755,7 @@ public:
   /// isRegTiedToUseOperand - Given the index of a register def operand,
   /// check if the register def is tied to a source operand, due to either
   /// two-address elimination or inline assembly constraints. Returns the
-  /// first tied use operand index by reference is UseOpIdx is not null.
+  /// first tied use operand index by reference if UseOpIdx is not null.
   bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx = 0) const;
 
   /// isRegTiedToDefOperand - Return true if the use operand of the specified
@@ -448,6 +787,10 @@ public:
                          const TargetRegisterInfo *RegInfo,
                          bool AddIfNotFound = false);
 
+  /// clearRegisterKills - Clear all kill flags affecting Reg.  If RegInfo is
+  /// provided, this includes super-register kills.
+  void clearRegisterKills(unsigned Reg, const TargetRegisterInfo *RegInfo);
+
   /// addRegisterDead - We have determined MI defined a register without a use.
   /// Look for the operand that defines it and mark it as IsDead. If
   /// AddIfNotFound is true, add a implicit operand if it's not found. Returns
@@ -462,7 +805,10 @@ public:
 
   /// setPhysRegsDeadExcept - Mark every physreg used by this instruction as
   /// dead except those in the UsedRegs list.
-  void setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+  ///
+  /// On instructions with register mask operands, also add implicit-def
+  /// operands for all registers in UsedRegs.
+  void setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
                              const TargetRegisterInfo &TRI);
 
   /// isSafeToMove - Return true if it is safe to move this instruction. If
@@ -550,7 +896,7 @@ public:
   /// list. This does not transfer ownership.
   void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
     MemRefs = NewMemRefs;
-    MemRefsEnd = NewMemRefsEnd;
+    NumMemRefs = NewMemRefsEnd - NewMemRefs;
   }
 
 private:
@@ -572,6 +918,10 @@ private:
   /// this instruction from their respective use lists.  This requires that the
   /// operands not be on their use lists yet.
   void AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo);
+
+  /// hasPropertyInBundle - Slow path for hasProperty when we're dealing with a
+  /// bundle.
+  bool hasPropertyInBundle(unsigned Mask, QueryType Type) const;
 };
 
 /// MachineInstrExpressionTrait - Special DenseMapInfo traits to compare
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index b98902724690..99849a64c56a 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -34,6 +34,7 @@ namespace RegState {
     Undef          = 0x20,
     EarlyClobber   = 0x40,
     Debug          = 0x80,
+    DefineNoRead   = Define | Undef,
     ImplicitDefine = Implicit | Define,
     ImplicitKill   = Implicit | Kill
   };
@@ -124,6 +125,11 @@ public:
     return *this;
   }
 
+  const MachineInstrBuilder &addRegMask(const uint32_t *Mask) const {
+    MI->addOperand(MachineOperand::CreateRegMask(Mask));
+    return *this;
+  }
+
   const MachineInstrBuilder &addMemOperand(MachineMemOperand *MMO) const {
     MI->addMemOperand(*MI->getParent()->getParent(), MMO);
     return *this;
@@ -209,6 +215,30 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define);
 }
 
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                                   MachineBasicBlock::instr_iterator I,
+                                   DebugLoc DL,
+                                   const MCInstrDesc &MCID,
+                                   unsigned DestReg) {
+  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL);
+  BB.insert(I, MI);
+  return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define);
+}
+
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                                   MachineInstr *I,
+                                   DebugLoc DL,
+                                   const MCInstrDesc &MCID,
+                                   unsigned DestReg) {
+  if (I->isInsideBundle()) {
+    MachineBasicBlock::instr_iterator MII = I;
+    return BuildMI(BB, MII, DL, MCID, DestReg);
+  }
+
+  MachineBasicBlock::iterator MII = I;
+  return BuildMI(BB, MII, DL, MCID, DestReg);
+}
+
 /// BuildMI - This version of the builder inserts the newly-built
 /// instruction before the given position in the given MachineBasicBlock, and
 /// does NOT take a destination register.
@@ -222,6 +252,28 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return MachineInstrBuilder(MI);
 }
 
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                                   MachineBasicBlock::instr_iterator I,
+                                   DebugLoc DL,
+                                   const MCInstrDesc &MCID) {
+  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL);
+  BB.insert(I, MI);
+  return MachineInstrBuilder(MI);
+}
+
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                                   MachineInstr *I,
+                                   DebugLoc DL,
+                                   const MCInstrDesc &MCID) {
+  if (I->isInsideBundle()) {
+    MachineBasicBlock::instr_iterator MII = I;
+    return BuildMI(BB, MII, DL, MCID);
+  }
+
+  MachineBasicBlock::iterator MII = I;
+  return BuildMI(BB, MII, DL, MCID);
+}
+
 /// BuildMI - This version of the builder inserts the newly-built
 /// instruction at the end of the given MachineBasicBlock, and does NOT take a
 /// destination register.
diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h
new file mode 100644
index 000000000000..0fb496982276
--- /dev/null
+++ b/include/llvm/CodeGen/MachineInstrBundle.h
@@ -0,0 +1,203 @@
+//===-- CodeGen/MachineInstBundle.h - MI bundle utilities -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provide utility functions to manipulate machine instruction
+// bundles.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEINSTRBUNDLE_H
+#define LLVM_CODEGEN_MACHINEINSTRBUNDLE_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+
+/// finalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void finalizeBundle(MachineBasicBlock &MBB,
+                    MachineBasicBlock::instr_iterator FirstMI,
+                    MachineBasicBlock::instr_iterator LastMI);
+  
+/// finalizeBundle - Same functionality as the previous finalizeBundle except
+/// the last instruction in the bundle is not provided as an input. This is
+/// used in cases where bundles are pre-determined by marking instructions
+/// with 'InsideBundle' marker. It returns the MBB instruction iterator that
+/// points to the end of the bundle.
+MachineBasicBlock::instr_iterator finalizeBundle(MachineBasicBlock &MBB,
+                    MachineBasicBlock::instr_iterator FirstMI);
+
+/// finalizeBundles - Finalize instruction bundles in the specified
+/// MachineFunction. Return true if any bundles are finalized.
+bool finalizeBundles(MachineFunction &MF);
+
+/// getBundleStart - Returns the first instruction in the bundle containing MI.
+///
+static inline MachineInstr *getBundleStart(MachineInstr *MI) {
+  MachineBasicBlock::instr_iterator I = MI;
+  while (I->isInsideBundle())
+    --I;
+  return I;
+}
+
+static inline const MachineInstr *getBundleStart(const MachineInstr *MI) {
+  MachineBasicBlock::const_instr_iterator I = MI;
+  while (I->isInsideBundle())
+    --I;
+  return I;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineOperand iterator
+//
+
+/// MachineOperandIteratorBase - Iterator that can visit all operands on a
+/// MachineInstr, or all operands on a bundle of MachineInstrs.  This class is
+/// not intended to be used directly, use one of the sub-classes instead.
+///
+/// Intended use:
+///
+///   for (MIBundleOperands MIO(MI); MIO.isValid(); ++MIO) {
+///     if (!MIO->isReg())
+///       continue;
+///     ...
+///   }
+///
+class MachineOperandIteratorBase {
+  MachineBasicBlock::instr_iterator InstrI, InstrE;
+  MachineInstr::mop_iterator OpI, OpE;
+
+  // If the operands on InstrI are exhausted, advance InstrI to the next
+  // bundled instruction with operands.
+  void advance() {
+    while (OpI == OpE) {
+      // Don't advance off the basic block, or into a new bundle.
+      if (++InstrI == InstrE || !InstrI->isInsideBundle())
+        break;
+      OpI = InstrI->operands_begin();
+      OpE = InstrI->operands_end();
+    }
+  }
+
+protected:
+  /// MachineOperandIteratorBase - Create an iterator that visits all operands
+  /// on MI, or all operands on every instruction in the bundle containing MI.
+  ///
+  /// @param MI The instruction to examine.
+  /// @param WholeBundle When true, visit all operands on the entire bundle.
+  ///
+  explicit MachineOperandIteratorBase(MachineInstr *MI, bool WholeBundle) {
+    if (WholeBundle) {
+      InstrI = getBundleStart(MI);
+      InstrE = MI->getParent()->instr_end();
+    } else {
+      InstrI = InstrE = MI;
+      ++InstrE;
+    }
+    OpI = InstrI->operands_begin();
+    OpE = InstrI->operands_end();
+    if (WholeBundle)
+      advance();
+  }
+
+  MachineOperand &deref() const { return *OpI; }
+
+public:
+  /// isValid - Returns true until all the operands have been visited.
+  bool isValid() const { return OpI != OpE; }
+
+  /// Preincrement.  Move to the next operand.
+  void operator++() {
+    assert(isValid() && "Cannot advance MIOperands beyond the last operand");
+    ++OpI;
+    advance();
+  }
+
+  /// getOperandNo - Returns the number of the current operand relative to its
+  /// instruction.
+  ///
+  unsigned getOperandNo() const {
+    return OpI - InstrI->operands_begin();
+  }
+
+  /// RegInfo - Information about a virtual register used by a set of operands.
+  ///
+  struct RegInfo {
+    /// Reads - One of the operands read the virtual register.  This does not
+    /// include <undef> or <internal> use operands, see MO::readsReg().
+    bool Reads;
+
+    /// Writes - One of the operands writes the virtual register.
+    bool Writes;
+
+    /// Tied - Uses and defs must use the same register. This can be because of
+    /// a two-address constraint, or there may be a partial redefinition of a
+    /// sub-register.
+    bool Tied;
+  };
+
+  /// analyzeVirtReg - Analyze how the current instruction or bundle uses a
+  /// virtual register.  This function should not be called after operator++(),
+  /// it expects a fresh iterator.
+  ///
+  /// @param Reg The virtual register to analyze.
+  /// @param Ops When set, this vector will receive an (MI, OpNum) entry for
+  ///            each operand referring to Reg.
+  /// @returns A filled-in RegInfo struct.
+  RegInfo analyzeVirtReg(unsigned Reg,
+                 SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops = 0);
+};
+
+/// MIOperands - Iterate over operands of a single instruction.
+///
+class MIOperands : public MachineOperandIteratorBase {
+public:
+  MIOperands(MachineInstr *MI) : MachineOperandIteratorBase(MI, false) {}
+  MachineOperand &operator* () const { return deref(); }
+  MachineOperand *operator->() const { return &deref(); }
+};
+
+/// ConstMIOperands - Iterate over operands of a single const instruction.
+///
+class ConstMIOperands : public MachineOperandIteratorBase {
+public:
+  ConstMIOperands(const MachineInstr *MI)
+    : MachineOperandIteratorBase(const_cast<MachineInstr*>(MI), false) {}
+  const MachineOperand &operator* () const { return deref(); }
+  const MachineOperand *operator->() const { return &deref(); }
+};
+
+/// MIBundleOperands - Iterate over all operands in a bundle of machine
+/// instructions.
+///
+class MIBundleOperands : public MachineOperandIteratorBase {
+public:
+  MIBundleOperands(MachineInstr *MI) : MachineOperandIteratorBase(MI, true) {}
+  MachineOperand &operator* () const { return deref(); }
+  MachineOperand *operator->() const { return &deref(); }
+};
+
+/// ConstMIBundleOperands - Iterate over all operands in a const bundle of
+/// machine instructions.
+///
+class ConstMIBundleOperands : public MachineOperandIteratorBase {
+public:
+  ConstMIBundleOperands(const MachineInstr *MI)
+    : MachineOperandIteratorBase(const_cast<MachineInstr*>(MI), true) {}
+  const MachineOperand &operator* () const { return deref(); }
+  const MachineOperand *operator->() const { return &deref(); }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index 62643497655a..6bd6682dd39c 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -47,7 +47,12 @@ public:
     /// EK_BlockAddress - Each entry is a plain address of block, e.g.:
     ///     .word LBB123
     EK_BlockAddress,
-    
+
+    /// EK_GPRel64BlockAddress - Each entry is an address of block, encoded
+    /// with a relocation as gp-relative, e.g.:
+    ///     .gpdword LBB123
+    EK_GPRel64BlockAddress,
+
     /// EK_GPRel32BlockAddress - Each entry is an address of block, encoded
     /// with a relocation as gp-relative, e.g.:
     ///     .gprel32 LBB123
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index 768ce47f8b39..1ac9080b75d5 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -22,6 +22,7 @@ namespace llvm {
 
 class Value;
 class FoldingSetNodeID;
+class MDNode;
 class raw_ostream;
 
 /// MachinePointerInfo - This class contains a discriminated union of
@@ -83,6 +84,7 @@ class MachineMemOperand {
   uint64_t Size;
   unsigned Flags;
   const MDNode *TBAAInfo;
+  const MDNode *Ranges;
 
 public:
   /// Flags values. These may be or'd together.
@@ -95,14 +97,17 @@ public:
     MOVolatile = 4,
     /// The memory access is non-temporal.
     MONonTemporal = 8,
+    /// The memory access is invariant.
+    MOInvariant = 16,
     // This is the number of bits we need to represent flags.
-    MOMaxBits = 4
+    MOMaxBits = 5
   };
 
   /// MachineMemOperand - Construct an MachineMemOperand object with the
   /// specified PtrInfo, flags, size, and base alignment.
   MachineMemOperand(MachinePointerInfo PtrInfo, unsigned flags, uint64_t s,
-                    unsigned base_alignment, const MDNode *TBAAInfo = 0);
+                    unsigned base_alignment, const MDNode *TBAAInfo = 0,
+                    const MDNode *Ranges = 0);
 
   const MachinePointerInfo &getPointerInfo() const { return PtrInfo; }
   
@@ -137,10 +142,14 @@ public:
   /// getTBAAInfo - Return the TBAA tag for the memory reference.
   const MDNode *getTBAAInfo() const { return TBAAInfo; }
 
+  /// getRanges - Return the range tag for the memory reference.
+  const MDNode *getRanges() const { return Ranges; }
+
   bool isLoad() const { return Flags & MOLoad; }
   bool isStore() const { return Flags & MOStore; }
   bool isVolatile() const { return Flags & MOVolatile; }
   bool isNonTemporal() const { return Flags & MONonTemporal; }
+  bool isInvariant() const { return Flags & MOInvariant; }
 
   /// refineAlignment - Update this MachineMemOperand to reflect the alignment
   /// of MMO, if it has a greater alignment. This must only be used when the
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 2bf7f1788f8a..6b88d4a9499b 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -161,10 +161,10 @@ class MachineModuleInfo : public ImmutablePass {
   /// in this module.
   bool DbgInfoAvailable;
 
-  /// CallsExternalVAFunctionWithFloatingPointArguments - True if this module
-  /// calls VarArg function with floating point arguments.  This is used to emit
-  /// an undefined reference to fltused on Windows targets.
-  bool CallsExternalVAFunctionWithFloatingPointArguments;
+  /// UsesVAFloatArgument - True if this module calls VarArg function with
+  /// floating-point arguments.  This is used to emit an undefined reference
+  /// to _fltused on Windows targets.
+  bool UsesVAFloatArgument;
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -223,12 +223,12 @@ public:
   bool callsUnwindInit() const { return CallsUnwindInit; }
   void setCallsUnwindInit(bool b) { CallsUnwindInit = b; }
 
-  bool callsExternalVAFunctionWithFloatingPointArguments() const {
-    return CallsExternalVAFunctionWithFloatingPointArguments;
+  bool usesVAFloatArgument() const {
+    return UsesVAFloatArgument;
   }
 
-  void setCallsExternalVAFunctionWithFloatingPointArguments(bool b) {
-    CallsExternalVAFunctionWithFloatingPointArguments = b;
+  void setUsesVAFloatArgument(bool b) {
+    UsesVAFloatArgument = b;
   }
 
   /// getFrameMoves - Returns a reference to a list of moves done in the current
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 5440a636a4a5..d244dd92103d 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -48,6 +48,7 @@ public:
     MO_ExternalSymbol,         ///< Name of external global symbol
     MO_GlobalAddress,          ///< Address of a global value
     MO_BlockAddress,           ///< Address of a basic block
+    MO_RegisterMask,           ///< Mask of preserved registers.
     MO_Metadata,               ///< Metadata reference (for debug info)
     MO_MCSymbol                ///< MCSymbol reference (for debug/eh info)
   };
@@ -102,6 +103,17 @@ private:
   ///
   bool IsUndef : 1;
 
+  /// IsInternalRead - True if this operand reads a value that was defined
+  /// inside the same instruction or bundle.  This flag can be set on both use
+  /// and def operands.  On a sub-register def operand, it refers to the part
+  /// of the register that isn't written.  On a full-register def operand, it
+  /// is a noop.
+  ///
+  /// When this flag is set, the instruction bundle must contain at least one
+  /// other def of the register.  If multiple instructions in the bundle define
+  /// the register, the meaning is target-defined.
+  bool IsInternalRead : 1;
+
   /// IsEarlyClobber - True if this MO_Register 'def' operand is written to
   /// by the MachineInstr before all input registers are read.  This is used to
   /// model the GCC inline asm '&' constraint modifier.
@@ -130,6 +142,7 @@ private:
     const ConstantFP *CFP;    // For MO_FPImmediate.
     const ConstantInt *CI;    // For MO_CImmediate. Integers > 64bit.
     int64_t ImmVal;           // For MO_Immediate.
+    const uint32_t *RegMask;  // For MO_RegisterMask.
     const MDNode *MD;         // For MO_Metadata.
     MCSymbol *Sym;            // For MO_MCSymbol
 
@@ -209,10 +222,13 @@ public:
   bool isSymbol() const { return OpKind == MO_ExternalSymbol; }
   /// isBlockAddress - Tests if this is a MO_BlockAddress operand.
   bool isBlockAddress() const { return OpKind == MO_BlockAddress; }
+  /// isRegMask - Tests if this is a MO_RegisterMask operand.
+  bool isRegMask() const { return OpKind == MO_RegisterMask; }
   /// isMetadata - Tests if this is a MO_Metadata operand.
   bool isMetadata() const { return OpKind == MO_Metadata; }
   bool isMCSymbol() const { return OpKind == MO_MCSymbol; }
 
+
   //===--------------------------------------------------------------------===//
   // Accessors for Register Operands
   //===--------------------------------------------------------------------===//
@@ -258,6 +274,11 @@ public:
     return IsUndef;
   }
 
+  bool isInternalRead() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return IsInternalRead;
+  }
+
   bool isEarlyClobber() const {
     assert(isReg() && "Wrong MachineOperand accessor");
     return IsEarlyClobber;
@@ -272,9 +293,12 @@ public:
   /// register.  A use operand with the <undef> flag set doesn't read its
   /// register.  A sub-register def implicitly reads the other parts of the
   /// register being redefined unless the <undef> flag is set.
+  ///
+  /// This refers to reading the register value from before the current
+  /// instruction or bundle. Internal bundle reads are not included.
   bool readsReg() const {
     assert(isReg() && "Wrong MachineOperand accessor");
-    return !isUndef() && (isUse() || getSubReg());
+    return !isUndef() && !isInternalRead() && (isUse() || getSubReg());
   }
 
   /// getNextOperandForReg - Return the next MachineOperand in the function that
@@ -343,6 +367,11 @@ public:
     IsUndef = Val;
   }
 
+  void setIsInternalRead(bool Val = true) {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    IsInternalRead = Val;
+  }
+
   void setIsEarlyClobber(bool Val = true) {
     assert(isReg() && IsDef && "Wrong MachineOperand accessor");
     IsEarlyClobber = Val;
@@ -412,6 +441,28 @@ public:
     return Contents.OffsetedInfo.Val.SymbolName;
   }
 
+  /// clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
+  /// It is sometimes necessary to detach the register mask pointer from its
+  /// machine operand. This static method can be used for such detached bit
+  /// mask pointers.
+  static bool clobbersPhysReg(const uint32_t *RegMask, unsigned PhysReg) {
+    // See TargetRegisterInfo.h.
+    assert(PhysReg < (1u << 30) && "Not a physical register");
+    return !(RegMask[PhysReg / 32] & (1u << PhysReg % 32));
+  }
+
+  /// clobbersPhysReg - Returns true if this RegMask operand clobbers PhysReg.
+  bool clobbersPhysReg(unsigned PhysReg) const {
+     return clobbersPhysReg(getRegMask(), PhysReg);
+  }
+
+  /// getRegMask - Returns a bit mask of registers preserved by this RegMask
+  /// operand.
+  const uint32_t *getRegMask() const {
+    assert(isRegMask() && "Wrong MachineOperand accessor");
+    return Contents.RegMask;
+  }
+
   const MDNode *getMetadata() const {
     assert(isMetadata() && "Wrong MachineOperand accessor");
     return Contents.MD;
@@ -498,6 +549,7 @@ public:
     Op.IsKill = isKill;
     Op.IsDead = isDead;
     Op.IsUndef = isUndef;
+    Op.IsInternalRead = false;
     Op.IsEarlyClobber = isEarlyClobber;
     Op.IsDebug = isDebug;
     Op.SmallContents.RegNo = Reg;
@@ -557,6 +609,24 @@ public:
     Op.setTargetFlags(TargetFlags);
     return Op;
   }
+  /// CreateRegMask - Creates a register mask operand referencing Mask.  The
+  /// operand does not take ownership of the memory referenced by Mask, it must
+  /// remain valid for the lifetime of the operand.
+  ///
+  /// A RegMask operand represents a set of non-clobbered physical registers on
+  /// an instruction that clobbers many registers, typically a call.  The bit
+  /// mask has a bit set for each physreg that is preserved by this
+  /// instruction, as described in the documentation for
+  /// TargetRegisterInfo::getCallPreservedMask().
+  ///
+  /// Any physreg with a 0 bit in the mask is clobbered by the instruction.
+  ///
+  static MachineOperand CreateRegMask(const uint32_t *Mask) {
+    assert(Mask && "Missing register mask");
+    MachineOperand Op(MachineOperand::MO_RegisterMask);
+    Op.Contents.RegMask = Mask;
+    return Op;
+  }
   static MachineOperand CreateMetadata(const MDNode *Meta) {
     MachineOperand Op(MachineOperand::MO_Metadata);
     Op.Contents.MD = Meta;
diff --git a/include/llvm/CodeGen/MachinePassRegistry.h b/include/llvm/CodeGen/MachinePassRegistry.h
index 6ee2e90a9f56..c41e8e26d66a 100644
--- a/include/llvm/CodeGen/MachinePassRegistry.h
+++ b/include/llvm/CodeGen/MachinePassRegistry.h
@@ -33,6 +33,7 @@ typedef void *(*MachinePassCtor)();
 ///
 //===----------------------------------------------------------------------===//
 class MachinePassRegistryListener {
+  virtual void anchor();
 public:
   MachinePassRegistryListener() {}
   virtual ~MachinePassRegistryListener() {}
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 3866b2650d0b..3272fbd78ff5 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -15,12 +15,13 @@
 #define LLVM_CODEGEN_MACHINEREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/IndexedMap.h"
 #include <vector>
 
 namespace llvm {
-  
+
 /// MachineRegisterInfo - Keep track of information for virtual and physical
 /// registers, including vreg register classes, use/def chains for registers,
 /// etc.
@@ -31,6 +32,11 @@ class MachineRegisterInfo {
   /// registers have a single def.
   bool IsSSA;
 
+  /// TracksLiveness - True while register liveness is being tracked accurately.
+  /// Basic block live-in lists, kill flags, and implicit defs may not be
+  /// accurate when after this flag is cleared.
+  bool TracksLiveness;
+
   /// VRegInfo - Information we keep for each virtual register.
   ///
   /// Each element in this list contains the register class of the vreg and the
@@ -46,18 +52,32 @@ class MachineRegisterInfo {
   /// the allocator should prefer the physical register allocated to the virtual
   /// register of the hint.
   IndexedMap<std::pair<unsigned, unsigned>, VirtReg2IndexFunctor> RegAllocHints;
-  
+
   /// PhysRegUseDefLists - This is an array of the head of the use/def list for
   /// physical registers.
-  MachineOperand **PhysRegUseDefLists; 
-  
+  MachineOperand **PhysRegUseDefLists;
+
   /// UsedPhysRegs - This is a bit vector that is computed and set by the
   /// register allocator, and must be kept up to date by passes that run after
   /// register allocation (though most don't modify this).  This is used
   /// so that the code generator knows which callee save registers to save and
   /// for other target specific uses.
+  /// This vector only has bits set for registers explicitly used, not their
+  /// aliases.
   BitVector UsedPhysRegs;
-  
+
+  /// UsedPhysRegMask - Additional used physregs, but including aliases.
+  BitVector UsedPhysRegMask;
+
+  /// ReservedRegs - This is a bit vector of reserved registers.  The target
+  /// may change its mind about which registers should be reserved.  This
+  /// vector is the frozen set of reserved registers when register allocation
+  /// started.
+  BitVector ReservedRegs;
+
+  /// AllocatableRegs - From TRI->getAllocatableSet.
+  mutable BitVector AllocatableRegs;
+
   /// LiveIns/LiveOuts - Keep track of the physical registers that are
   /// livein/liveout of the function.  Live in values are typically arguments in
   /// registers, live out values are typically return values in registers.
@@ -65,7 +85,7 @@ class MachineRegisterInfo {
   /// stored in the second element.
   std::vector<std::pair<unsigned, unsigned> > LiveIns;
   std::vector<unsigned> LiveOuts;
-  
+
   MachineRegisterInfo(const MachineRegisterInfo&); // DO NOT IMPLEMENT
   void operator=(const MachineRegisterInfo&);      // DO NOT IMPLEMENT
 public:
@@ -88,6 +108,23 @@ public:
   // leaveSSA - Indicates that the machine function is no longer in SSA form.
   void leaveSSA() { IsSSA = false; }
 
+  /// tracksLiveness - Returns true when tracking register liveness accurately.
+  ///
+  /// While this flag is true, register liveness information in basic block
+  /// live-in lists and machine instruction operands is accurate. This means it
+  /// can be used to change the code in ways that affect the values in
+  /// registers, for example by the register scavenger.
+  ///
+  /// When this flag is false, liveness is no longer reliable.
+  bool tracksLiveness() const { return TracksLiveness; }
+
+  /// invalidateLiveness - Indicates that register liveness is no longer being
+  /// tracked accurately.
+  ///
+  /// This should be called by late passes that invalidate the liveness
+  /// information.
+  void invalidateLiveness() { TracksLiveness = false; }
+
   //===--------------------------------------------------------------------===//
   // Register Info
   //===--------------------------------------------------------------------===//
@@ -141,7 +178,7 @@ public:
     return use_iterator(getRegUseDefListHead(RegNo));
   }
   static use_iterator use_end() { return use_iterator(0); }
-  
+
   /// use_empty - Return true if there are no instructions using the specified
   /// register.
   bool use_empty(unsigned RegNo) const { return use_begin(RegNo) == use_end(); }
@@ -157,7 +194,7 @@ public:
     return use_nodbg_iterator(getRegUseDefListHead(RegNo));
   }
   static use_nodbg_iterator use_nodbg_end() { return use_nodbg_iterator(0); }
-  
+
   /// use_nodbg_empty - Return true if there are no non-Debug instructions
   /// using the specified register.
   bool use_nodbg_empty(unsigned RegNo) const {
@@ -171,8 +208,16 @@ public:
   /// replaceRegWith - Replace all instances of FromReg with ToReg in the
   /// machine function.  This is like llvm-level X->replaceAllUsesWith(Y),
   /// except that it also changes any definitions of the register as well.
+  ///
+  /// Note that it is usually necessary to first constrain ToReg's register
+  /// class to match the FromReg constraints using:
+  ///
+  ///   constrainRegClass(ToReg, getRegClass(FromReg))
+  ///
+  /// That function will return NULL if the virtual registers have incompatible
+  /// constraints.
   void replaceRegWith(unsigned FromReg, unsigned ToReg);
-  
+
   /// getRegUseDefListHead - Return the head pointer for the register use/def
   /// list for the specified virtual or physical register.
   MachineOperand *&getRegUseDefListHead(unsigned RegNo) {
@@ -180,7 +225,7 @@ public:
       return VRegInfo[RegNo].second;
     return PhysRegUseDefLists[RegNo];
   }
-  
+
   MachineOperand *getRegUseDefListHead(unsigned RegNo) const {
     if (TargetRegisterInfo::isVirtualRegister(RegNo))
       return VRegInfo[RegNo].second;
@@ -197,15 +242,20 @@ public:
   /// optimization passes which extend register lifetimes and need only
   /// preserve conservative kill flag information.
   void clearKillFlags(unsigned Reg) const;
-  
+
 #ifndef NDEBUG
   void dumpUses(unsigned RegNo) const;
 #endif
-  
+
+  /// isConstantPhysReg - Returns true if PhysReg is unallocatable and constant
+  /// throughout the function.  It is safe to move instructions that read such
+  /// a physreg.
+  bool isConstantPhysReg(unsigned PhysReg, const MachineFunction &MF) const;
+
   //===--------------------------------------------------------------------===//
   // Virtual Register Info
   //===--------------------------------------------------------------------===//
-  
+
   /// getRegClass - Return the register class of the specified virtual register.
   ///
   const TargetRegisterClass *getRegClass(unsigned Reg) const {
@@ -246,6 +296,9 @@ public:
   ///
   unsigned getNumVirtRegs() const { return VRegInfo.size(); }
 
+  /// clearVirtRegs - Remove all virtual registers (after physreg assignment).
+  void clearVirtRegs();
+
   /// setRegAllocationHint - Specify a register allocation hint for the
   /// specified virtual register.
   void setRegAllocationHint(unsigned Reg, unsigned Type, unsigned PrefReg) {
@@ -271,38 +324,87 @@ public:
   //===--------------------------------------------------------------------===//
   // Physical Register Use Info
   //===--------------------------------------------------------------------===//
-  
+
   /// isPhysRegUsed - Return true if the specified register is used in this
   /// function.  This only works after register allocation.
-  bool isPhysRegUsed(unsigned Reg) const { return UsedPhysRegs[Reg]; }
-  
+  bool isPhysRegUsed(unsigned Reg) const {
+    return UsedPhysRegs.test(Reg) || UsedPhysRegMask.test(Reg);
+  }
+
+  /// isPhysRegOrOverlapUsed - Return true if Reg or any overlapping register
+  /// is used in this function.
+  bool isPhysRegOrOverlapUsed(unsigned Reg) const {
+    if (UsedPhysRegMask.test(Reg))
+      return true;
+    for (const uint16_t *AI = TRI->getOverlaps(Reg); *AI; ++AI)
+      if (UsedPhysRegs.test(*AI))
+        return true;
+    return false;
+  }
+
   /// setPhysRegUsed - Mark the specified register used in this function.
   /// This should only be called during and after register allocation.
-  void setPhysRegUsed(unsigned Reg) { UsedPhysRegs[Reg] = true; }
+  void setPhysRegUsed(unsigned Reg) { UsedPhysRegs.set(Reg); }
 
   /// addPhysRegsUsed - Mark the specified registers used in this function.
   /// This should only be called during and after register allocation.
   void addPhysRegsUsed(const BitVector &Regs) { UsedPhysRegs |= Regs; }
 
+  /// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used.
+  /// This corresponds to the bit mask attached to register mask operands.
+  void addPhysRegsUsedFromRegMask(const uint32_t *RegMask) {
+    UsedPhysRegMask.setBitsNotInMask(RegMask);
+  }
+
   /// setPhysRegUnused - Mark the specified register unused in this function.
   /// This should only be called during and after register allocation.
-  void setPhysRegUnused(unsigned Reg) { UsedPhysRegs[Reg] = false; }
+  void setPhysRegUnused(unsigned Reg) {
+    UsedPhysRegs.reset(Reg);
+    UsedPhysRegMask.reset(Reg);
+  }
+
+
+  //===--------------------------------------------------------------------===//
+  // Reserved Register Info
+  //===--------------------------------------------------------------------===//
+  //
+  // The set of reserved registers must be invariant during register
+  // allocation.  For example, the target cannot suddenly decide it needs a
+  // frame pointer when the register allocator has already used the frame
+  // pointer register for something else.
+  //
+  // These methods can be used by target hooks like hasFP() to avoid changing
+  // the reserved register set during register allocation.
+
+  /// freezeReservedRegs - Called by the register allocator to freeze the set
+  /// of reserved registers before allocation begins.
+  void freezeReservedRegs(const MachineFunction&);
+
+  /// reservedRegsFrozen - Returns true after freezeReservedRegs() was called
+  /// to ensure the set of reserved registers stays constant.
+  bool reservedRegsFrozen() const {
+    return !ReservedRegs.empty();
+  }
+
+  /// canReserveReg - Returns true if PhysReg can be used as a reserved
+  /// register.  Any register can be reserved before freezeReservedRegs() is
+  /// called.
+  bool canReserveReg(unsigned PhysReg) const {
+    return !reservedRegsFrozen() || ReservedRegs.test(PhysReg);
+  }
 
-  /// closePhysRegsUsed - Expand UsedPhysRegs to its transitive closure over
-  /// subregisters. That means that if R is used, so are all subregisters.
-  void closePhysRegsUsed(const TargetRegisterInfo&);
 
   //===--------------------------------------------------------------------===//
   // LiveIn/LiveOut Management
   //===--------------------------------------------------------------------===//
-  
+
   /// addLiveIn/Out - Add the specified register as a live in/out.  Note that it
   /// is an error to add the same register to the same set more than once.
   void addLiveIn(unsigned Reg, unsigned vreg = 0) {
     LiveIns.push_back(std::make_pair(Reg, vreg));
   }
   void addLiveOut(unsigned Reg) { LiveOuts.push_back(Reg); }
-  
+
   // Iteration support for live in/out sets.  These sets are kept in sorted
   // order by their register number.
   typedef std::vector<std::pair<unsigned,unsigned> >::const_iterator
@@ -334,7 +436,7 @@ public:
 
 private:
   void HandleVRegListReallocation();
-  
+
 public:
   /// defusechain_iterator - This class provides iterator support for machine
   /// operands in the function that use or define a specific register.  If
@@ -362,31 +464,31 @@ public:
                           MachineInstr, ptrdiff_t>::reference reference;
     typedef std::iterator<std::forward_iterator_tag,
                           MachineInstr, ptrdiff_t>::pointer pointer;
-    
+
     defusechain_iterator(const defusechain_iterator &I) : Op(I.Op) {}
     defusechain_iterator() : Op(0) {}
-    
+
     bool operator==(const defusechain_iterator &x) const {
       return Op == x.Op;
     }
     bool operator!=(const defusechain_iterator &x) const {
       return !operator==(x);
     }
-    
+
     /// atEnd - return true if this iterator is equal to reg_end() on the value.
     bool atEnd() const { return Op == 0; }
-    
+
     // Iterator traversal: forward iteration only
     defusechain_iterator &operator++() {          // Preincrement
       assert(Op && "Cannot increment end iterator!");
       Op = Op->getNextOperandForReg();
-      
+
       // If this is an operand we don't care about, skip it.
-      while (Op && ((!ReturnUses && Op->isUse()) || 
+      while (Op && ((!ReturnUses && Op->isUse()) ||
                     (!ReturnDefs && Op->isDef()) ||
                     (SkipDebug && Op->isDebug())))
         Op = Op->getNextOperandForReg();
-      
+
       return *this;
     }
     defusechain_iterator operator++(int) {        // Postincrement
@@ -404,30 +506,38 @@ public:
       return MI;
     }
 
+    MachineInstr *skipBundle() {
+      if (!Op) return 0;
+      MachineInstr *MI = getBundleStart(Op->getParent());
+      do ++*this;
+      while (Op && getBundleStart(Op->getParent()) == MI);
+      return MI;
+    }
+
     MachineOperand &getOperand() const {
       assert(Op && "Cannot dereference end iterator!");
       return *Op;
     }
-    
+
     /// getOperandNo - Return the operand # of this MachineOperand in its
     /// MachineInstr.
     unsigned getOperandNo() const {
       assert(Op && "Cannot dereference end iterator!");
       return Op - &Op->getParent()->getOperand(0);
     }
-    
+
     // Retrieve a reference to the current operand.
     MachineInstr &operator*() const {
       assert(Op && "Cannot dereference end iterator!");
       return *Op->getParent();
     }
-    
+
     MachineInstr *operator->() const {
       assert(Op && "Cannot dereference end iterator!");
       return Op->getParent();
     }
   };
-  
+
 };
 
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
new file mode 100644
index 000000000000..e852009f7e8b
--- /dev/null
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -0,0 +1,91 @@
+//==- MachineScheduler.h - MachineInstr Scheduling Pass ----------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a MachineSchedRegistry for registering alternative machine
+// schedulers. A Target may provide an alternative scheduler implementation by
+// implementing the following boilerplate:
+//
+// static ScheduleDAGInstrs *createCustomMachineSched(MachineSchedContext *C) {
+//  return new CustomMachineScheduler(C);
+// }
+// static MachineSchedRegistry
+// SchedCustomRegistry("custom", "Run my target's custom scheduler",
+//                     createCustomMachineSched);
+//
+// Inside <Target>PassConfig:
+//   enablePass(MachineSchedulerID);
+//   MachineSchedRegistry::setDefault(createCustomMachineSched);
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MACHINESCHEDULER_H
+#define MACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineDominatorTree;
+class MachineLoopInfo;
+class ScheduleDAGInstrs;
+
+/// MachineSchedContext provides enough context from the MachineScheduler pass
+/// for the target to instantiate a scheduler.
+struct MachineSchedContext {
+  MachineFunction *MF;
+  const MachineLoopInfo *MLI;
+  const MachineDominatorTree *MDT;
+  const TargetPassConfig *PassConfig;
+  AliasAnalysis *AA;
+  LiveIntervals *LIS;
+
+  MachineSchedContext(): MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {}
+};
+
+/// MachineSchedRegistry provides a selection of available machine instruction
+/// schedulers.
+class MachineSchedRegistry : public MachinePassRegistryNode {
+public:
+  typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineSchedContext *);
+
+  // RegisterPassParser requires a (misnamed) FunctionPassCtor type.
+  typedef ScheduleDAGCtor FunctionPassCtor;
+
+  static MachinePassRegistry Registry;
+
+  MachineSchedRegistry(const char *N, const char *D, ScheduleDAGCtor C)
+    : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
+    Registry.Add(this);
+  }
+  ~MachineSchedRegistry() { Registry.Remove(this); }
+
+  // Accessors.
+  //
+  MachineSchedRegistry *getNext() const {
+    return (MachineSchedRegistry *)MachinePassRegistryNode::getNext();
+  }
+  static MachineSchedRegistry *getList() {
+    return (MachineSchedRegistry *)Registry.getList();
+  }
+  static ScheduleDAGCtor getDefault() {
+    return (ScheduleDAGCtor)Registry.getDefault();
+  }
+  static void setDefault(ScheduleDAGCtor C) {
+    Registry.setDefault((MachinePassCtor)C);
+  }
+  static void setListener(MachinePassRegistryListener *L) {
+    Registry.setListener(L);
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/ObjectCodeEmitter.h b/include/llvm/CodeGen/ObjectCodeEmitter.h
deleted file mode 100644
index d46628caae79..000000000000
--- a/include/llvm/CodeGen/ObjectCodeEmitter.h
+++ /dev/null
@@ -1,171 +0,0 @@
-//===-- llvm/CodeGen/ObjectCodeEmitter.h - Object Code Emitter -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  Generalized Object Code Emitter, works with ObjectModule and BinaryObject.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_OBJECTCODEEMITTER_H
-#define LLVM_CODEGEN_OBJECTCODEEMITTER_H
-
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-
-namespace llvm {
-
-class BinaryObject;
-class MachineBasicBlock;
-class MachineCodeEmitter;
-class MachineFunction;
-class MachineConstantPool;
-class MachineJumpTableInfo;
-class MachineModuleInfo;
-
-class ObjectCodeEmitter : public MachineCodeEmitter {
-protected:
-
-  /// Binary Object (Section or Segment) we are emitting to.
-  BinaryObject *BO;
-
-  /// MBBLocations - This vector is a mapping from MBB ID's to their address.
-  /// It is filled in by the StartMachineBasicBlock callback and queried by
-  /// the getMachineBasicBlockAddress callback.
-  std::vector<uintptr_t> MBBLocations;
-
-  /// LabelLocations - This vector is a mapping from Label ID's to their 
-  /// address.
-  std::vector<uintptr_t> LabelLocations;
-
-  /// CPLocations - This is a map of constant pool indices to offsets from the
-  /// start of the section for that constant pool index.
-  std::vector<uintptr_t> CPLocations;
-
-  /// CPSections - This is a map of constant pool indices to the Section
-  /// containing the constant pool entry for that index.
-  std::vector<uintptr_t> CPSections;
-
-  /// JTLocations - This is a map of jump table indices to offsets from the
-  /// start of the section for that jump table index.
-  std::vector<uintptr_t> JTLocations;
-
-public:
-  ObjectCodeEmitter();
-  ObjectCodeEmitter(BinaryObject *bo);
-  virtual ~ObjectCodeEmitter();
-
-  /// setBinaryObject - set the BinaryObject we are writting to
-  void setBinaryObject(BinaryObject *bo);
-
-  /// emitByte - This callback is invoked when a byte needs to be 
-  /// written to the data stream, without buffer overflow testing.
-  void emitByte(uint8_t B);
-
-  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
-  /// written to the data stream in little-endian format.
-  void emitWordLE(uint32_t W);
-
-  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
-  /// written to the data stream in big-endian format.
-  void emitWordBE(uint32_t W);
-
-  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
-  /// written to the data stream in little-endian format.
-  void emitDWordLE(uint64_t W);
-
-  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
-  /// written to the data stream in big-endian format.
-  void emitDWordBE(uint64_t W);
-
-  /// emitAlignment - Move the CurBufferPtr pointer up to the specified
-  /// alignment (saturated to BufferEnd of course).
-  void emitAlignment(unsigned Alignment = 0, uint8_t fill = 0);
-
-  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
-  /// written to the data stream.
-  void emitULEB128Bytes(uint64_t Value);
-
-  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
-  /// written to the data stream.
-  void emitSLEB128Bytes(uint64_t Value);
-
-  /// emitString - This callback is invoked when a String needs to be
-  /// written to the data stream.
-  void emitString(const std::string &String);
-
-  /// getCurrentPCValue - This returns the address that the next emitted byte
-  /// will be output to.
-  uintptr_t getCurrentPCValue() const;
-
-  /// getCurrentPCOffset - Return the offset from the start of the emitted
-  /// buffer that we are currently writing to.
-  uintptr_t getCurrentPCOffset() const;
-
-  /// addRelocation - Whenever a relocatable address is needed, it should be
-  /// noted with this interface.
-  void addRelocation(const MachineRelocation& relocation);
-
-  /// earlyResolveAddresses - True if the code emitter can use symbol addresses 
-  /// during code emission time. The JIT is capable of doing this because it
-  /// creates jump tables or constant pools in memory on the fly while the
-  /// object code emitters rely on a linker to have real addresses and should
-  /// use relocations instead.
-  bool earlyResolveAddresses() const { return false; }
-
-  /// startFunction - This callback is invoked when the specified function is
-  /// about to be code generated.  This initializes the BufferBegin/End/Ptr
-  /// fields.
-  virtual void startFunction(MachineFunction &F) = 0;
-
-  /// finishFunction - This callback is invoked when the specified function has
-  /// finished code generation.  If a buffer overflow has occurred, this method
-  /// returns true (the callee is required to try again), otherwise it returns
-  /// false.
-  virtual bool finishFunction(MachineFunction &F) = 0;
-
-  /// StartMachineBasicBlock - This should be called by the target when a new
-  /// basic block is about to be emitted.  This way the MCE knows where the
-  /// start of the block is, and can implement getMachineBasicBlockAddress.
-  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB);
-
-  /// getMachineBasicBlockAddress - Return the address of the specified
-  /// MachineBasicBlock, only usable after the label for the MBB has been
-  /// emitted.
-  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const;
-
-  /// emitJumpTables - Emit all the jump tables for a given jump table info
-  /// record to the appropriate section.
-  virtual void emitJumpTables(MachineJumpTableInfo *MJTI) = 0;
-
-  /// getJumpTableEntryAddress - Return the address of the jump table with index
-  /// 'Index' in the function that last called initJumpTableInfo.
-  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const;
-
-  /// emitConstantPool - For each constant pool entry, figure out which section
-  /// the constant should live in, allocate space for it, and emit it to the 
-  /// Section data buffer.
-  virtual void emitConstantPool(MachineConstantPool *MCP) = 0;
-
-  /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
-  /// the constant pool that was last emitted with the emitConstantPool method.
-  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const;
-
-  /// getConstantPoolEntrySection - Return the section of the 'Index' entry in
-  /// the constant pool that was last emitted with the emitConstantPool method.
-  virtual uintptr_t getConstantPoolEntrySection(unsigned Index) const;
-
-  /// Specifies the MachineModuleInfo object. This is used for exception handling
-  /// purposes.
-  virtual void setModuleInfo(MachineModuleInfo* Info) = 0;
-  // to be implemented or depreciated with MachineModuleInfo
-
-}; // end class ObjectCodeEmitter
-
-} // end namespace llvm
-
-#endif
-
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index 5240729f52d0..a5d8b0dbd6a7 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -350,6 +350,43 @@ namespace PBQP {
       numNodes = numEdges = 0;
     }
 
+    /// \brief Dump a graph to an output stream.
+    template <typename OStream>
+    void dump(OStream &os) {
+      os << getNumNodes() << " " << getNumEdges() << "\n";
+
+      for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
+           nodeItr != nodeEnd; ++nodeItr) {
+        const Vector& v = getNodeCosts(nodeItr);
+        os << "\n" << v.getLength() << "\n";
+        assert(v.getLength() != 0 && "Empty vector in graph.");
+        os << v[0];
+        for (unsigned i = 1; i < v.getLength(); ++i) {
+          os << " " << v[i];
+        }
+        os << "\n";
+      }
+
+      for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd();
+           edgeItr != edgeEnd; ++edgeItr) {
+        unsigned n1 = std::distance(nodesBegin(), getEdgeNode1(edgeItr));
+        unsigned n2 = std::distance(nodesBegin(), getEdgeNode2(edgeItr));
+        assert(n1 != n2 && "PBQP graphs shound not have self-edges.");
+        const Matrix& m = getEdgeCosts(edgeItr);
+        os << "\n" << n1 << " " << n2 << "\n"
+           << m.getRows() << " " << m.getCols() << "\n";
+        assert(m.getRows() != 0 && "No rows in matrix.");
+        assert(m.getCols() != 0 && "No cols in matrix.");
+        for (unsigned i = 0; i < m.getRows(); ++i) {
+          os << m[i][0];
+          for (unsigned j = 1; j < m.getCols(); ++j) {
+            os << " " << m[i][j];
+          }
+          os << "\n";
+        }
+      }
+    }
+
     /// \brief Print a representation of this graph in DOT format.
     /// @param os Output stream to print on.
     template <typename OStream>
diff --git a/include/llvm/CodeGen/PBQP/HeuristicBase.h b/include/llvm/CodeGen/PBQP/HeuristicBase.h
index 791c227f0d07..3fee18cc42d9 100644
--- a/include/llvm/CodeGen/PBQP/HeuristicBase.h
+++ b/include/llvm/CodeGen/PBQP/HeuristicBase.h
@@ -157,7 +157,7 @@ namespace PBQP {
         case 0: s.applyR0(nItr); break;
         case 1: s.applyR1(nItr); break;
         case 2: s.applyR2(nItr); break;
-        default: assert(false &&
+        default: llvm_unreachable(
                         "Optimal reductions of degree > 2 nodes is invalid.");
       }
 
@@ -186,7 +186,7 @@ namespace PBQP {
     /// \brief Add a node to the heuristic reduce list.
     /// @param nItr Node iterator to add to the heuristic reduce list.
     void addToHeuristicList(Graph::NodeItr nItr) {
-      assert(false && "Must be implemented in derived class.");
+      llvm_unreachable("Must be implemented in derived class.");
     }
 
     /// \brief Heuristically reduce one of the nodes in the heuristic
@@ -194,25 +194,25 @@ namespace PBQP {
     /// @return True if a reduction takes place, false if the heuristic reduce
     ///         list is empty.
     void heuristicReduce() {
-      assert(false && "Must be implemented in derived class.");
+      llvm_unreachable("Must be implemented in derived class.");
     }
 
     /// \brief Prepare a change in the costs on the given edge.
     /// @param eItr Edge iterator.    
     void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
-      assert(false && "Must be implemented in derived class.");
+      llvm_unreachable("Must be implemented in derived class.");
     }
 
     /// \brief Handle the change in the costs on the given edge.
     /// @param eItr Edge iterator.
     void postUpdateEdgeCostts(Graph::EdgeItr eItr) {
-      assert(false && "Must be implemented in derived class.");
+      llvm_unreachable("Must be implemented in derived class.");
     }
 
     /// \brief Handle the addition of a new edge into the PBQP graph.
     /// @param eItr Edge iterator for the added edge.
     void handleAddEdge(Graph::EdgeItr eItr) {
-      assert(false && "Must be implemented in derived class.");
+      llvm_unreachable("Must be implemented in derived class.");
     }
 
     /// \brief Handle disconnection of an edge from a node.
@@ -223,7 +223,7 @@ namespace PBQP {
     /// method allows for the effect to be computed only for the remaining
     /// node in the graph.
     void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
-      assert(false && "Must be implemented in derived class.");
+      llvm_unreachable("Must be implemented in derived class.");
     }
 
     /// \brief Clean up any structures used by HeuristicBase.
diff --git a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
index e96c4cb1e0c1..a859e5899f06 100644
--- a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
@@ -418,6 +418,12 @@ namespace PBQP {
         unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
 
         nd.numDenied = 0;
+        const Vector& nCosts = getGraph().getNodeCosts(nItr);
+        for (unsigned i = 1; i < nCosts.getLength(); ++i) {
+          if (nCosts[i] == std::numeric_limits<PBQPNum>::infinity())
+            ++nd.numDenied;
+        }
+
         nd.numSafe = numRegs;
         nd.unsafeDegrees.resize(numRegs, 0);
 
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 7a03ce905d89..3b3819985880 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CODEGEN_PASSES_H
 #define LLVM_CODEGEN_PASSES_H
 
+#include "llvm/Pass.h"
 #include "llvm/Target/TargetMachine.h"
 #include <string>
 
@@ -26,7 +27,211 @@ namespace llvm {
   class TargetLowering;
   class TargetRegisterClass;
   class raw_ostream;
+}
 
+namespace llvm {
+
+extern char &NoPassID; // Allow targets to choose not to run a pass.
+
+class PassConfigImpl;
+
+/// Target-Independent Code Generator Pass Configuration Options.
+///
+/// This is an ImmutablePass solely for the purpose of exposing CodeGen options
+/// to the internals of other CodeGen passes.
+class TargetPassConfig : public ImmutablePass {
+public:
+  /// Pseudo Pass IDs. These are defined within TargetPassConfig because they
+  /// are unregistered pass IDs. They are only useful for use with
+  /// TargetPassConfig APIs to identify multiple occurrences of the same pass.
+  ///
+
+  /// EarlyTailDuplicate - A clone of the TailDuplicate pass that runs early
+  /// during codegen, on SSA form.
+  static char EarlyTailDuplicateID;
+
+  /// PostRAMachineLICM - A clone of the LICM pass that runs during late machine
+  /// optimization after regalloc.
+  static char PostRAMachineLICMID;
+
+protected:
+  TargetMachine *TM;
+  PassManagerBase &PM;
+  PassConfigImpl *Impl; // Internal data structures
+  bool Initialized;     // Flagged after all passes are configured.
+
+  // Target Pass Options
+  // Targets provide a default setting, user flags override.
+  //
+  bool DisableVerify;
+
+  /// Default setting for -enable-tail-merge on this target.
+  bool EnableTailMerge;
+
+public:
+  TargetPassConfig(TargetMachine *tm, PassManagerBase &pm);
+  // Dummy constructor.
+  TargetPassConfig();
+
+  virtual ~TargetPassConfig();
+
+  static char ID;
+
+  /// Get the right type of TargetMachine for this target.
+  template<typename TMC> TMC &getTM() const {
+    return *static_cast<TMC*>(TM);
+  }
+
+  const TargetLowering *getTargetLowering() const {
+    return TM->getTargetLowering();
+  }
+
+  //
+  void setInitialized() { Initialized = true; }
+
+  CodeGenOpt::Level getOptLevel() const { return TM->getOptLevel(); }
+
+  void setDisableVerify(bool Disable) { setOpt(DisableVerify, Disable); }
+
+  bool getEnableTailMerge() const { return EnableTailMerge; }
+  void setEnableTailMerge(bool Enable) { setOpt(EnableTailMerge, Enable); }
+
+  /// Allow the target to override a specific pass without overriding the pass
+  /// pipeline. When passes are added to the standard pipeline at the
+  /// point where StadardID is expected, add TargetID in its place.
+  void substitutePass(char &StandardID, char &TargetID);
+
+  /// Allow the target to enable a specific standard pass by default.
+  void enablePass(char &ID) { substitutePass(ID, ID); }
+
+  /// Allow the target to disable a specific standard pass by default.
+  void disablePass(char &ID) { substitutePass(ID, NoPassID); }
+
+  /// Return the pass ssubtituted for StandardID by the target.
+  /// If no substitution exists, return StandardID.
+  AnalysisID getPassSubstitution(AnalysisID StandardID) const;
+
+  /// Return true if the optimized regalloc pipeline is enabled.
+  bool getOptimizeRegAlloc() const;
+
+  /// Add common target configurable passes that perform LLVM IR to IR
+  /// transforms following machine independent optimization.
+  virtual void addIRPasses();
+
+  /// Add common passes that perform LLVM IR to IR transforms in preparation for
+  /// instruction selection.
+  virtual void addISelPrepare();
+
+  /// addInstSelector - This method should install an instruction selector pass,
+  /// which converts from LLVM code to machine instructions.
+  virtual bool addInstSelector() {
+    return true;
+  }
+
+  /// Add the complete, standard set of LLVM CodeGen passes.
+  /// Fully developed targets will not generally override this.
+  virtual void addMachinePasses();
+
+protected:
+  // Helper to verify the analysis is really immutable.
+  void setOpt(bool &Opt, bool Val);
+
+  /// Methods with trivial inline returns are convenient points in the common
+  /// codegen pass pipeline where targets may insert passes. Methods with
+  /// out-of-line standard implementations are major CodeGen stages called by
+  /// addMachinePasses. Some targets may override major stages when inserting
+  /// passes is insufficient, but maintaining overriden stages is more work.
+  ///
+
+  /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM
+  /// passes (which are run just before instruction selector).
+  virtual bool addPreISel() {
+    return true;
+  }
+
+  /// addMachineSSAOptimization - Add standard passes that optimize machine
+  /// instructions in SSA form.
+  virtual void addMachineSSAOptimization();
+
+  /// addPreRegAlloc - This method may be implemented by targets that want to
+  /// run passes immediately before register allocation. This should return
+  /// true if -print-machineinstrs should print after these passes.
+  virtual bool addPreRegAlloc() {
+    return false;
+  }
+
+  /// createTargetRegisterAllocator - Create the register allocator pass for
+  /// this target at the current optimization level.
+  virtual FunctionPass *createTargetRegisterAllocator(bool Optimized);
+
+  /// addFastRegAlloc - Add the minimum set of target-independent passes that
+  /// are required for fast register allocation.
+  virtual void addFastRegAlloc(FunctionPass *RegAllocPass);
+
+  /// addOptimizedRegAlloc - Add passes related to register allocation.
+  /// LLVMTargetMachine provides standard regalloc passes for most targets.
+  virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
+
+  /// addFinalizeRegAlloc - This method may be implemented by targets that want
+  /// to run passes within the regalloc pipeline, immediately after the register
+  /// allocation pass itself. These passes run as soon as virtual regisiters
+  /// have been rewritten to physical registers but before and other postRA
+  /// optimization happens. Targets that have marked instructions for bundling
+  /// must have finalized those bundles by the time these passes have run,
+  /// because subsequent passes are not guaranteed to be bundle-aware.
+  virtual bool addFinalizeRegAlloc() {
+    return false;
+  }
+
+  /// addPostRegAlloc - This method may be implemented by targets that want to
+  /// run passes after register allocation pass pipeline but before
+  /// prolog-epilog insertion.  This should return true if -print-machineinstrs
+  /// should print after these passes.
+  virtual bool addPostRegAlloc() {
+    return false;
+  }
+
+  /// Add passes that optimize machine instructions after register allocation.
+  virtual void addMachineLateOptimization();
+
+  /// addPreSched2 - This method may be implemented by targets that want to
+  /// run passes after prolog-epilog insertion and before the second instruction
+  /// scheduling pass.  This should return true if -print-machineinstrs should
+  /// print after these passes.
+  virtual bool addPreSched2() {
+    return false;
+  }
+
+  /// Add standard basic block placement passes.
+  virtual void addBlockPlacement();
+
+  /// addPreEmitPass - This pass may be implemented by targets that want to run
+  /// passes immediately before machine code is emitted.  This should return
+  /// true if -print-machineinstrs should print out the code after the passes.
+  virtual bool addPreEmitPass() {
+    return false;
+  }
+
+  /// Utilities for targets to add passes to the pass manager.
+  ///
+
+  /// Add a CodeGen pass at this point in the pipeline after checking overrides.
+  /// Return the pass that was added, or NoPassID.
+  AnalysisID addPass(char &ID);
+
+  /// addMachinePasses helper to create the target-selected or overriden
+  /// regalloc pass.
+  FunctionPass *createRegAllocPass(bool Optimized);
+
+  /// printAndVerify - Add a pass to dump then verify the machine function, if
+  /// those steps are enabled.
+  ///
+  void printAndVerify(const char *Banner) const;
+};
+} // namespace llvm
+
+/// List of target independent CodeGen pass IDs.
+namespace llvm {
   /// createUnreachableBlockEliminationPass - The LLVM code generator does not
   /// work well with unreachable basic blocks (what live ranges make sense for a
   /// block that cannot be reached?).  As such, a code generator should either
@@ -41,31 +246,29 @@ namespace llvm {
   createMachineFunctionPrinterPass(raw_ostream &OS,
                                    const std::string &Banner ="");
 
-  /// MachineLoopInfo pass - This pass is a loop analysis pass.
-  ///
+  /// MachineLoopInfo - This pass is a loop analysis pass.
   extern char &MachineLoopInfoID;
 
-  /// MachineLoopRanges pass - This pass is an on-demand loop coverage
-  /// analysis pass.
-  ///
+  /// MachineLoopRanges - This pass is an on-demand loop coverage analysis.
   extern char &MachineLoopRangesID;
 
-  /// MachineDominators pass - This pass is a machine dominators analysis pass.
-  ///
+  /// MachineDominators - This pass is a machine dominators analysis pass.
   extern char &MachineDominatorsID;
 
   /// EdgeBundles analysis - Bundle machine CFG edges.
-  ///
   extern char &EdgeBundlesID;
 
-  /// PHIElimination pass - This pass eliminates machine instruction PHI nodes
+  /// LiveVariables pass - This pass computes the set of blocks in which each
+  /// variable is life and sets machine operand kill flags.
+  extern char &LiveVariablesID;
+
+  /// PHIElimination - This pass eliminates machine instruction PHI nodes
   /// by inserting copy instructions.  This destroys SSA information, but is the
   /// desired input for some register allocators.  This pass is "required" by
   /// these register allocator like this: AU.addRequiredID(PHIEliminationID);
-  ///
   extern char &PHIEliminationID;
 
-  /// StrongPHIElimination pass - This pass eliminates machine instruction PHI
+  /// StrongPHIElimination - This pass eliminates machine instruction PHI
   /// nodes by inserting copy instructions.  This destroys SSA information, but
   /// is the desired input for some register allocators.  This pass is
   /// "required" by these register allocator like this:
@@ -76,32 +279,30 @@ namespace llvm {
   /// LiveStacks pass. An analysis keeping track of the liveness of stack slots.
   extern char &LiveStacksID;
 
-  /// TwoAddressInstruction pass - This pass reduces two-address instructions to
+  /// TwoAddressInstruction - This pass reduces two-address instructions to
   /// use two operands. This destroys SSA information but it is desired by
   /// register allocators.
   extern char &TwoAddressInstructionPassID;
 
-  /// RegisteCoalescer pass - This pass merges live ranges to eliminate copies.
-  extern char &RegisterCoalescerPassID;
+  /// ProcessImpicitDefs pass - This pass removes IMPLICIT_DEFs.
+  extern char &ProcessImplicitDefsID;
+
+  /// RegisterCoalescer - This pass merges live ranges to eliminate copies.
+  extern char &RegisterCoalescerID;
+
+  /// MachineScheduler - This pass schedules machine instructions.
+  extern char &MachineSchedulerID;
 
   /// SpillPlacement analysis. Suggest optimal placement of spill code between
   /// basic blocks.
-  ///
   extern char &SpillPlacementID;
 
-  /// UnreachableMachineBlockElimination pass - This pass removes unreachable
+  /// UnreachableMachineBlockElimination - This pass removes unreachable
   /// machine basic blocks.
   extern char &UnreachableMachineBlockElimID;
 
-  /// DeadMachineInstructionElim pass - This pass removes dead machine
-  /// instructions.
-  ///
-  FunctionPass *createDeadMachineInstructionElimPass();
-
-  /// Creates a register allocator as the user specified on the command line, or
-  /// picks one that matches OptLevel.
-  ///
-  FunctionPass *createRegisterAllocator(CodeGenOpt::Level OptLevel);
+  /// DeadMachineInstructionElim - This pass removes dead machine instructions.
+  extern char &DeadMachineInstructionElimID;
 
   /// FastRegisterAllocation Pass - This pass register allocates as fast as
   /// possible. It is best suited for debug code where live ranges are short.
@@ -118,56 +319,59 @@ namespace llvm {
   ///
   FunctionPass *createGreedyRegisterAllocator();
 
-  /// LinearScanRegisterAllocation Pass - This pass implements the linear scan
-  /// register allocation algorithm, a global register allocator.
-  ///
-  FunctionPass *createLinearScanRegisterAllocator();
-
   /// PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean
   /// Quadratic Prograaming (PBQP) based register allocator.
   ///
   FunctionPass *createDefaultPBQPRegisterAllocator();
 
-  /// PrologEpilogCodeInserter Pass - This pass inserts prolog and epilog code,
+  /// PrologEpilogCodeInserter - This pass inserts prolog and epilog code,
   /// and eliminates abstract frame references.
-  ///
-  FunctionPass *createPrologEpilogCodeInserter();
+  extern char &PrologEpilogCodeInserterID;
 
-  /// ExpandPostRAPseudos Pass - This pass expands pseudo instructions after
+  /// ExpandPostRAPseudos - This pass expands pseudo instructions after
   /// register allocation.
-  ///
-  FunctionPass *createExpandPostRAPseudosPass();
+  extern char &ExpandPostRAPseudosID;
 
   /// createPostRAScheduler - This pass performs post register allocation
   /// scheduling.
-  FunctionPass *createPostRAScheduler(CodeGenOpt::Level OptLevel);
+  extern char &PostRASchedulerID;
 
-  /// BranchFolding Pass - This pass performs machine code CFG based
+  /// BranchFolding - This pass performs machine code CFG based
   /// optimizations to delete branches to branches, eliminate branches to
   /// successor blocks (creating fall throughs), and eliminating branches over
   /// branches.
-  FunctionPass *createBranchFoldingPass(bool DefaultEnableTailMerge);
+  extern char &BranchFolderPassID;
 
-  /// TailDuplicate Pass - Duplicate blocks with unconditional branches
+  /// TailDuplicate - Duplicate blocks with unconditional branches
   /// into tails of their predecessors.
-  FunctionPass *createTailDuplicatePass(bool PreRegAlloc = false);
+  extern char &TailDuplicateID;
+
+  /// IfConverter - This pass performs machine code if conversion.
+  extern char &IfConverterID;
 
-  /// IfConverter Pass - This pass performs machine code if conversion.
-  FunctionPass *createIfConverterPass();
+  /// MachineBlockPlacement - This pass places basic blocks based on branch
+  /// probabilities.
+  extern char &MachineBlockPlacementID;
 
-  /// Code Placement Pass - This pass optimize code placement and aligns loop
+  /// MachineBlockPlacementStats - This pass collects statistics about the
+  /// basic block placement using branch probabilities and block frequency
+  /// information.
+  extern char &MachineBlockPlacementStatsID;
+
+  /// Code Placement - This pass optimize code placement and aligns loop
   /// headers to target specific alignment boundary.
-  FunctionPass *createCodePlacementOptPass();
+  extern char &CodePlacementOptID;
 
-  /// IntrinsicLowering Pass - Performs target-independent LLVM IR
-  /// transformations for highly portable strategies.
+  /// GCLowering Pass - Performs target-independent LLVM IR transformations for
+  /// highly portable strategies.
+  ///
   FunctionPass *createGCLoweringPass();
 
-  /// MachineCodeAnalysis Pass - Target-independent pass to mark safe points in
-  /// machine code. Must be added very late during code generation, just prior
-  /// to output, and importantly after all CFG transformations (such as branch
-  /// folding).
-  FunctionPass *createGCMachineCodeAnalysisPass();
+  /// GCMachineCodeAnalysis - Target-independent pass to mark safe points
+  /// in machine code. Must be added very late during code generation, just
+  /// prior to output, and importantly after all CFG transformations (such as
+  /// branch folding).
+  extern char &GCMachineCodeAnalysisID;
 
   /// Deleter Pass - Releases GC metadata.
   ///
@@ -177,54 +381,56 @@ namespace llvm {
   ///
   FunctionPass *createGCInfoPrinter(raw_ostream &OS);
 
-  /// createMachineCSEPass - This pass performs global CSE on machine
-  /// instructions.
-  FunctionPass *createMachineCSEPass();
+  /// MachineCSE - This pass performs global CSE on machine instructions.
+  extern char &MachineCSEID;
 
-  /// createMachineLICMPass - This pass performs LICM on machine instructions.
-  ///
-  FunctionPass *createMachineLICMPass(bool PreRegAlloc = true);
+  /// MachineLICM - This pass performs LICM on machine instructions.
+  extern char &MachineLICMID;
+
+  /// MachineSinking - This pass performs sinking on machine instructions.
+  extern char &MachineSinkingID;
 
-  /// createMachineSinkingPass - This pass performs sinking on machine
-  /// instructions.
-  FunctionPass *createMachineSinkingPass();
+  /// MachineCopyPropagation - This pass performs copy propagation on
+  /// machine instructions.
+  extern char &MachineCopyPropagationID;
 
-  /// createPeepholeOptimizerPass - This pass performs peephole optimizations -
+  /// PeepholeOptimizer - This pass performs peephole optimizations -
   /// like extension and comparison eliminations.
-  FunctionPass *createPeepholeOptimizerPass();
+  extern char &PeepholeOptimizerID;
 
-  /// createOptimizePHIsPass - This pass optimizes machine instruction PHIs
+  /// OptimizePHIs - This pass optimizes machine instruction PHIs
   /// to take advantage of opportunities created during DAG legalization.
-  FunctionPass *createOptimizePHIsPass();
+  extern char &OptimizePHIsID;
 
-  /// createStackSlotColoringPass - This pass performs stack slot coloring.
-  FunctionPass *createStackSlotColoringPass(bool);
+  /// StackSlotColoring - This pass performs stack slot coloring.
+  extern char &StackSlotColoringID;
 
   /// createStackProtectorPass - This pass adds stack protectors to functions.
+  ///
   FunctionPass *createStackProtectorPass(const TargetLowering *tli);
 
   /// createMachineVerifierPass - This pass verifies cenerated machine code
   /// instructions for correctness.
+  ///
   FunctionPass *createMachineVerifierPass(const char *Banner = 0);
 
   /// createDwarfEHPass - This pass mulches exception handling code into a form
   /// adapted to code generation.  Required if using dwarf exception handling.
   FunctionPass *createDwarfEHPass(const TargetMachine *tm);
 
-  /// createSjLjEHPass - This pass adapts exception handling code to use
+  /// createSjLjEHPreparePass - This pass adapts exception handling code to use
   /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
-  FunctionPass *createSjLjEHPass(const TargetLowering *tli);
+  ///
+  FunctionPass *createSjLjEHPreparePass(const TargetLowering *tli);
 
-  /// createLocalStackSlotAllocationPass - This pass assigns local frame
-  /// indices to stack slots relative to one another and allocates
-  /// base registers to access them when it is estimated by the target to
-  /// be out of range of normal frame pointer or stack pointer index
-  /// addressing.
-  FunctionPass *createLocalStackSlotAllocationPass();
+  /// LocalStackSlotAllocation - This pass assigns local frame indices to stack
+  /// slots relative to one another and allocates base registers to access them
+  /// when it is estimated by the target to be out of range of normal frame
+  /// pointer or stack pointer index addressing.
+  extern char &LocalStackSlotAllocationID;
 
-  /// createExpandISelPseudosPass - This pass expands pseudo-instructions.
-  ///
-  FunctionPass *createExpandISelPseudosPass();
+  /// ExpandISelPseudos - This pass expands pseudo-instructions.
+  extern char &ExpandISelPseudosID;
 
   /// createExecutionDependencyFixPass - This pass fixes execution time
   /// problems with dependent instructions, such as switching execution
@@ -234,6 +440,13 @@ namespace llvm {
   ///
   FunctionPass *createExecutionDependencyFixPass(const TargetRegisterClass *RC);
 
+  /// UnpackMachineBundles - This pass unpack machine instruction bundles.
+  extern char &UnpackMachineBundlesID;
+
+  /// FinalizeMachineBundles - This pass finalize machine instruction
+  /// bundles (created earlier, e.g. during pre-RA scheduling).
+  extern char &FinalizeMachineBundlesID;
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 26b6773c0530..3986a8dd7da1 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -68,6 +68,10 @@ class RegScavenger {
   /// available, unset means the register is currently being used.
   BitVector RegsAvailable;
 
+  // These BitVectors are only used internally to forward(). They are members
+  // to avoid frequent reallocations.
+  BitVector KillRegs, DefRegs;
+
 public:
   RegScavenger()
     : MBB(NULL), NumPhysRegs(0), Tracking(false),
@@ -130,8 +134,9 @@ private:
 
   /// isUsed / isUnused - Test if a register is currently being used.
   ///
-  bool isUsed(unsigned Reg) const   { return !RegsAvailable.test(Reg); }
-  bool isUnused(unsigned Reg) const { return RegsAvailable.test(Reg); }
+  bool isUsed(unsigned Reg) const   {
+    return !RegsAvailable.test(Reg) || ReservedRegs.test(Reg);
+  }
 
   /// isAliasUsed - Is Reg or an alias currently in use?
   bool isAliasUsed(unsigned Reg) const;
@@ -139,7 +144,7 @@ private:
   /// setUsed / setUnused - Mark the state of one or a number of registers.
   ///
   void setUsed(BitVector &Regs) {
-    RegsAvailable &= ~Regs;
+    RegsAvailable.reset(Regs);
   }
   void setUnused(BitVector &Regs) {
     RegsAvailable |= Regs;
@@ -148,9 +153,6 @@ private:
   /// Add Reg and all its sub-registers to BV.
   void addRegWithSubRegs(BitVector &BV, unsigned Reg);
 
-  /// Add Reg and its aliases to BV.
-  void addRegWithAliases(BitVector &BV, unsigned Reg);
-
   /// findSurvivorReg - Return the candidate register that is unused for the
   /// longest after StartMI. UseMI is set to the instruction where the search
   /// stopped.
diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h
new file mode 100644
index 000000000000..56b5855c01c9
--- /dev/null
+++ b/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -0,0 +1,142 @@
+//===----- ResourcePriorityQueue.h - A DFA-oriented priority queue -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef RESOURCE_PRIORITY_QUEUE_H
+#define RESOURCE_PRIORITY_QUEUE_H
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class ResourcePriorityQueue;
+
+  /// Sorting functions for the Available queue.
+  struct resource_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    ResourcePriorityQueue *PQ;
+    explicit resource_sort(ResourcePriorityQueue *pq) : PQ(pq) {}
+
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+
+  class ResourcePriorityQueue : public SchedulingPriorityQueue {
+    /// SUnits - The SUnits for the current graph.
+    std::vector<SUnit> *SUnits;
+
+    /// NumNodesSolelyBlocking - This vector contains, for every node in the
+    /// Queue, the number of nodes that the node is the sole unscheduled
+    /// predecessor for.  This is used as a tie-breaker heuristic for better
+    /// mobility.
+    std::vector<unsigned> NumNodesSolelyBlocking;
+
+    /// Queue - The queue.
+    std::vector<SUnit*> Queue;
+
+    /// RegPressure - Tracking current reg pressure per register class.
+    ///
+    std::vector<unsigned> RegPressure;
+
+    /// RegLimit - Tracking the number of allocatable registers per register
+    /// class.
+    std::vector<unsigned> RegLimit;
+
+    resource_sort Picker;
+    const TargetRegisterInfo *TRI;
+    const TargetLowering *TLI;
+    const TargetInstrInfo *TII;
+    const InstrItineraryData* InstrItins;
+    /// ResourcesModel - Represents VLIW state.
+    /// Not limited to VLIW targets per say, but assumes
+    /// definition of DFA by a target.
+    DFAPacketizer *ResourcesModel;
+
+    /// Resource model - packet/bundle model. Purely
+    /// internal at the time.
+    std::vector<SUnit*> Packet;
+
+    /// Heuristics for estimating register pressure.
+    unsigned ParallelLiveRanges;
+    signed HorizontalVerticalBalance;
+
+  public:
+    ResourcePriorityQueue(SelectionDAGISel *IS);
+
+    ~ResourcePriorityQueue() {
+      delete ResourcesModel;
+    }
+
+    bool isBottomUp() const { return false; }
+
+    void initNodes(std::vector<SUnit> &sunits);
+
+    void addNode(const SUnit *SU) {
+      NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+    }
+
+    void updateNode(const SUnit *SU) {}
+
+    void releaseState() {
+      SUnits = 0;
+    }
+
+    unsigned getLatency(unsigned NodeNum) const {
+      assert(NodeNum < (*SUnits).size());
+      return (*SUnits)[NodeNum].getHeight();
+    }
+
+    unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
+      assert(NodeNum < NumNodesSolelyBlocking.size());
+      return NumNodesSolelyBlocking[NodeNum];
+    }
+
+    /// Single cost function reflecting benefit of scheduling SU
+    /// in the current cycle.
+    signed SUSchedulingCost (SUnit *SU);
+
+    /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+    ///
+    void initNumRegDefsLeft(SUnit *SU);
+    void updateNumRegDefsLeft(SUnit *SU);
+    signed regPressureDelta(SUnit *SU, bool RawPressure = false);
+    signed rawRegPressureDelta (SUnit *SU, unsigned RCId);
+
+    bool empty() const { return Queue.empty(); }
+
+    virtual void push(SUnit *U);
+
+    virtual SUnit *pop();
+
+    virtual void remove(SUnit *SU);
+
+    virtual void dump(ScheduleDAG* DAG) const;
+
+    /// scheduledNode - Main resource tracking point.
+    void scheduledNode(SUnit *Node);
+    bool isResourceAvailable(SUnit *SU);
+    void reserveResources(SUnit *SU);
+
+private:
+    void adjustPriorityOfUnscheduledPreds(SUnit *SU);
+    SUnit *getSingleUnscheduledPred(SUnit *SU);
+    unsigned numberRCValPredInSU (SUnit *SU, unsigned RCId);
+    unsigned numberRCValSuccInSU (SUnit *SU, unsigned RCId);
+  };
+}
+
+#endif
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 1bbc6c54d7fa..f4de6933b317 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -8,7 +8,8 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements the ScheduleDAG class, which is used as the common
-// base class for instruction schedulers.
+// base class for instruction schedulers. This encapsulates the scheduling DAG,
+// which is shared between SelectionDAG and MachineInstr scheduling.
 //
 //===----------------------------------------------------------------------===//
 
@@ -16,7 +17,7 @@
 #define LLVM_CODEGEN_SCHEDULEDAG_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/GraphTraits.h"
@@ -129,8 +130,7 @@ namespace llvm {
                Contents.Order.isMustAlias == Other.Contents.Order.isMustAlias &&
                Contents.Order.isArtificial == Other.Contents.Order.isArtificial;
       }
-      assert(0 && "Invalid dependency kind!");
-      return false;
+      llvm_unreachable("Invalid dependency kind!");
     }
 
     bool operator!=(const SDep &Other) const {
@@ -232,6 +232,7 @@ namespace llvm {
   public:
     SUnit *OrigNode;                    // If not this, the node from which
                                         // this node was cloned.
+                                        // (SD scheduling only)
 
     // Preds/Succs - The SUnits before/after us in the graph.
     SmallVector<SDep, 4> Preds;  // All sunit predecessors.
@@ -409,6 +410,13 @@ namespace llvm {
       return false;
     }
 
+    bool isTopReady() const {
+      return NumPredsLeft == 0;
+    }
+    bool isBottomReady() const {
+      return NumSuccsLeft == 0;
+    }
+
     void dump(const ScheduleDAG *G) const;
     void dumpAll(const ScheduleDAG *G) const;
     void print(raw_ostream &O, const ScheduleDAG *G) const;
@@ -427,6 +435,7 @@ namespace llvm {
   /// implementation to decide.
   ///
   class SchedulingPriorityQueue {
+    virtual void anchor();
     unsigned CurCycle;
     bool HasReadyFilter;
   public:
@@ -465,13 +474,13 @@ namespace llvm {
 
     virtual void dump(ScheduleDAG *) const {}
 
-    /// ScheduledNode - As each node is scheduled, this method is invoked.  This
+    /// scheduledNode - As each node is scheduled, this method is invoked.  This
     /// allows the priority function to adjust the priority of related
     /// unscheduled nodes, for example.
     ///
-    virtual void ScheduledNode(SUnit *) {}
+    virtual void scheduledNode(SUnit *) {}
 
-    virtual void UnscheduledNode(SUnit *) {}
+    virtual void unscheduledNode(SUnit *) {}
 
     void setCurCycle(unsigned Cycle) {
       CurCycle = Cycle;
@@ -484,15 +493,11 @@ namespace llvm {
 
   class ScheduleDAG {
   public:
-    MachineBasicBlock *BB;          // The block in which to insert instructions
-    MachineBasicBlock::iterator InsertPos;// The position to insert instructions
     const TargetMachine &TM;              // Target processor
     const TargetInstrInfo *TII;           // Target instruction information
     const TargetRegisterInfo *TRI;        // Target processor register info
     MachineFunction &MF;                  // Machine function
     MachineRegisterInfo &MRI;             // Virtual/real register map
-    std::vector<SUnit*> Sequence;         // The schedule. Null SUnit*'s
-                                          // represent noop instructions.
     std::vector<SUnit> SUnits;            // The scheduling units.
     SUnit EntrySU;                        // Special node for the region entry.
     SUnit ExitSU;                         // Special node for the region exit.
@@ -507,6 +512,9 @@ namespace llvm {
 
     virtual ~ScheduleDAG();
 
+    /// clearDAG - clear the DAG state (between regions).
+    void clearDAG();
+
     /// getInstrDesc - Return the MCInstrDesc of this SUnit.
     /// Return NULL for SDNodes without a machine opcode.
     const MCInstrDesc *getInstrDesc(const SUnit *SU) const {
@@ -517,66 +525,43 @@ namespace llvm {
     /// viewGraph - Pop up a GraphViz/gv window with the ScheduleDAG rendered
     /// using 'dot'.
     ///
+    void viewGraph(const Twine &Name, const Twine &Title);
     void viewGraph();
 
-    /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
-    /// according to the order specified in Sequence.
-    ///
-    virtual MachineBasicBlock *EmitSchedule() = 0;
-
-    void dumpSchedule() const;
-
     virtual void dumpNode(const SUnit *SU) const = 0;
 
     /// getGraphNodeLabel - Return a label for an SUnit node in a visualization
     /// of the ScheduleDAG.
     virtual std::string getGraphNodeLabel(const SUnit *SU) const = 0;
 
+    /// getDAGLabel - Return a label for the region of code covered by the DAG.
+    virtual std::string getDAGName() const = 0;
+
     /// addCustomGraphFeatures - Add custom features for a visualization of
     /// the ScheduleDAG.
     virtual void addCustomGraphFeatures(GraphWriter<ScheduleDAG*> &) const {}
 
 #ifndef NDEBUG
-    /// VerifySchedule - Verify that all SUnits were scheduled and that
-    /// their state is consistent.
-    void VerifySchedule(bool isBottomUp);
+    /// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+    /// their state is consistent. Return the number of scheduled SUnits.
+    unsigned VerifyScheduledDAG(bool isBottomUp);
 #endif
 
   protected:
-    /// Run - perform scheduling.
-    ///
-    void Run(MachineBasicBlock *bb, MachineBasicBlock::iterator insertPos);
-
-    /// BuildSchedGraph - Build SUnits and set up their Preds and Succs
-    /// to form the scheduling dependency graph.
-    ///
-    virtual void BuildSchedGraph(AliasAnalysis *AA) = 0;
-
     /// ComputeLatency - Compute node latency.
     ///
-    virtual void ComputeLatency(SUnit *SU) = 0;
+    virtual void computeLatency(SUnit *SU) = 0;
 
     /// ComputeOperandLatency - Override dependence edge latency using
     /// operand use/def information
     ///
-    virtual void ComputeOperandLatency(SUnit *, SUnit *,
+    virtual void computeOperandLatency(SUnit *, SUnit *,
                                        SDep&) const { }
 
-    /// Schedule - Order nodes according to selected style, filling
-    /// in the Sequence member.
-    ///
-    virtual void Schedule() = 0;
-
     /// ForceUnitLatencies - Return true if all scheduling edges should be given
     /// a latency value of one.  The default is to return false; schedulers may
     /// override this as needed.
-    virtual bool ForceUnitLatencies() const { return false; }
-
-    /// EmitNoop - Emit a noop instruction.
-    ///
-    void EmitNoop();
-
-    void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
+    virtual bool forceUnitLatencies() const { return false; }
 
   private:
     // Return the MCInstrDesc of this SDNode or NULL.
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
new file mode 100644
index 000000000000..c8de7bc8f892
--- /dev/null
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -0,0 +1,344 @@
+//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGInstrs class, which implements
+// scheduling for a MachineInstr-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGINSTRS_H
+#define SCHEDULEDAGINSTRS_H
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SparseSet.h"
+#include <map>
+
+namespace llvm {
+  class MachineLoopInfo;
+  class MachineDominatorTree;
+  class LiveIntervals;
+
+  /// LoopDependencies - This class analyzes loop-oriented register
+  /// dependencies, which are used to guide scheduling decisions.
+  /// For example, loop induction variable increments should be
+  /// scheduled as soon as possible after the variable's last use.
+  ///
+  class LoopDependencies {
+    const MachineLoopInfo &MLI;
+    const MachineDominatorTree &MDT;
+
+  public:
+    typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
+      LoopDeps;
+    LoopDeps Deps;
+
+    LoopDependencies(const MachineLoopInfo &mli,
+                     const MachineDominatorTree &mdt) :
+      MLI(mli), MDT(mdt) {}
+
+    /// VisitLoop - Clear out any previous state and analyze the given loop.
+    ///
+    void VisitLoop(const MachineLoop *Loop) {
+      assert(Deps.empty() && "stale loop dependencies");
+
+      MachineBasicBlock *Header = Loop->getHeader();
+      SmallSet<unsigned, 8> LoopLiveIns;
+      for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
+           LE = Header->livein_end(); LI != LE; ++LI)
+        LoopLiveIns.insert(*LI);
+
+      const MachineDomTreeNode *Node = MDT.getNode(Header);
+      const MachineBasicBlock *MBB = Node->getBlock();
+      assert(Loop->contains(MBB) &&
+             "Loop does not contain header!");
+      VisitRegion(Node, MBB, Loop, LoopLiveIns);
+    }
+
+  private:
+    void VisitRegion(const MachineDomTreeNode *Node,
+                     const MachineBasicBlock *MBB,
+                     const MachineLoop *Loop,
+                     const SmallSet<unsigned, 8> &LoopLiveIns) {
+      unsigned Count = 0;
+      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+           I != E; ++I) {
+        const MachineInstr *MI = I;
+        if (MI->isDebugValue())
+          continue;
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isUse())
+            continue;
+          unsigned MOReg = MO.getReg();
+          if (LoopLiveIns.count(MOReg))
+            Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
+        }
+        ++Count; // Not every iteration due to dbg_value above.
+      }
+
+      const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+      for (std::vector<MachineDomTreeNode*>::const_iterator I =
+           Children.begin(), E = Children.end(); I != E; ++I) {
+        const MachineDomTreeNode *ChildNode = *I;
+        MachineBasicBlock *ChildBlock = ChildNode->getBlock();
+        if (Loop->contains(ChildBlock))
+          VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
+      }
+    }
+  };
+
+  /// An individual mapping from virtual register number to SUnit.
+  struct VReg2SUnit {
+    unsigned VirtReg;
+    SUnit *SU;
+
+    VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {}
+
+    unsigned getSparseSetKey() const {
+      return TargetRegisterInfo::virtReg2Index(VirtReg);
+    }
+  };
+
+  /// Combine a SparseSet with a 1x1 vector to track physical registers.
+  /// The SparseSet allows iterating over the (few) live registers for quickly
+  /// comparing against a regmask or clearing the set.
+  ///
+  /// Storage for the map is allocated once for the pass. The map can be
+  /// cleared between scheduling regions without freeing unused entries.
+  class Reg2SUnitsMap {
+    SparseSet<unsigned> PhysRegSet;
+    std::vector<std::vector<SUnit*> > SUnits;
+  public:
+    typedef SparseSet<unsigned>::const_iterator const_iterator;
+
+    // Allow iteration over register numbers (keys) in the map. If needed, we
+    // can provide an iterator over SUnits (values) as well.
+    const_iterator reg_begin() const { return PhysRegSet.begin(); }
+    const_iterator reg_end() const { return PhysRegSet.end(); }
+
+    /// Initialize the map with the number of registers.
+    /// If the map is already large enough, no allocation occurs.
+    /// For simplicity we expect the map to be empty().
+    void setRegLimit(unsigned Limit);
+
+    /// Returns true if the map is empty.
+    bool empty() const { return PhysRegSet.empty(); }
+
+    /// Clear the map without deallocating storage.
+    void clear();
+
+    bool contains(unsigned Reg) const { return PhysRegSet.count(Reg); }
+
+    /// If this register is mapped, return its existing SUnits vector.
+    /// Otherwise map the register and return an empty SUnits vector.
+    std::vector<SUnit *> &operator[](unsigned Reg) {
+      bool New = PhysRegSet.insert(Reg).second;
+      assert((!New || SUnits[Reg].empty()) && "stale SUnits vector");
+      (void)New;
+      return SUnits[Reg];
+    }
+
+    /// Erase an existing element without freeing memory.
+    void erase(unsigned Reg) {
+      PhysRegSet.erase(Reg);
+      SUnits[Reg].clear();
+    }
+  };
+
+  /// Use SparseSet as a SparseMap by relying on the fact that it never
+  /// compares ValueT's, only unsigned keys. This allows the set to be cleared
+  /// between scheduling regions in constant time as long as ValueT does not
+  /// require a destructor.
+  typedef SparseSet<VReg2SUnit> VReg2SUnitMap;
+
+  /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
+  /// MachineInstrs.
+  class ScheduleDAGInstrs : public ScheduleDAG {
+  protected:
+    const MachineLoopInfo &MLI;
+    const MachineDominatorTree &MDT;
+    const MachineFrameInfo *MFI;
+    const InstrItineraryData *InstrItins;
+
+    /// Live Intervals provides reaching defs in preRA scheduling.
+    LiveIntervals *LIS;
+
+    /// isPostRA flag indicates vregs cannot be present.
+    bool IsPostRA;
+
+    /// UnitLatencies (misnamed) flag avoids computing def-use latencies, using
+    /// the def-side latency only.
+    bool UnitLatencies;
+
+    /// State specific to the current scheduling region.
+    /// ------------------------------------------------
+
+    /// The block in which to insert instructions
+    MachineBasicBlock *BB;
+
+    /// The beginning of the range to be scheduled.
+    MachineBasicBlock::iterator RegionBegin;
+
+    /// The end of the range to be scheduled.
+    MachineBasicBlock::iterator RegionEnd;
+
+    /// The index in BB of RegionEnd.
+    unsigned EndIndex;
+
+    /// After calling BuildSchedGraph, each machine instruction in the current
+    /// scheduling region is mapped to an SUnit.
+    DenseMap<MachineInstr*, SUnit*> MISUnitMap;
+
+    /// State internal to DAG building.
+    /// -------------------------------
+
+    /// Defs, Uses - Remember where defs and uses of each register are as we
+    /// iterate upward through the instructions. This is allocated here instead
+    /// of inside BuildSchedGraph to avoid the need for it to be initialized and
+    /// destructed for each block.
+    Reg2SUnitsMap Defs;
+    Reg2SUnitsMap Uses;
+
+    /// Track the last instructon in this region defining each virtual register.
+    VReg2SUnitMap VRegDefs;
+
+    /// PendingLoads - Remember where unknown loads are after the most recent
+    /// unknown store, as we iterate. As with Defs and Uses, this is here
+    /// to minimize construction/destruction.
+    std::vector<SUnit *> PendingLoads;
+
+    /// LoopRegs - Track which registers are used for loop-carried dependencies.
+    ///
+    LoopDependencies LoopRegs;
+
+    /// DbgValues - Remember instruction that preceeds DBG_VALUE.
+    /// These are generated by buildSchedGraph but persist so they can be
+    /// referenced when emitting the final schedule.
+    typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
+      DbgValueVector;
+    DbgValueVector DbgValues;
+    MachineInstr *FirstDbgValue;
+
+  public:
+    explicit ScheduleDAGInstrs(MachineFunction &mf,
+                               const MachineLoopInfo &mli,
+                               const MachineDominatorTree &mdt,
+                               bool IsPostRAFlag,
+                               LiveIntervals *LIS = 0);
+
+    virtual ~ScheduleDAGInstrs() {}
+
+    /// begin - Return an iterator to the top of the current scheduling region.
+    MachineBasicBlock::iterator begin() const { return RegionBegin; }
+
+    /// end - Return an iterator to the bottom of the current scheduling region.
+    MachineBasicBlock::iterator end() const { return RegionEnd; }
+
+    /// newSUnit - Creates a new SUnit and return a ptr to it.
+    SUnit *newSUnit(MachineInstr *MI);
+
+    /// getSUnit - Return an existing SUnit for this MI, or NULL.
+    SUnit *getSUnit(MachineInstr *MI) const;
+
+    /// startBlock - Prepare to perform scheduling in the given block.
+    virtual void startBlock(MachineBasicBlock *BB);
+
+    /// finishBlock - Clean up after scheduling in the given block.
+    virtual void finishBlock();
+
+    /// Initialize the scheduler state for the next scheduling region.
+    virtual void enterRegion(MachineBasicBlock *bb,
+                             MachineBasicBlock::iterator begin,
+                             MachineBasicBlock::iterator end,
+                             unsigned endcount);
+
+    /// Notify that the scheduler has finished scheduling the current region.
+    virtual void exitRegion();
+
+    /// buildSchedGraph - Build SUnits from the MachineBasicBlock that we are
+    /// input.
+    void buildSchedGraph(AliasAnalysis *AA);
+
+    /// addSchedBarrierDeps - Add dependencies from instructions in the current
+    /// list of instructions being scheduled to scheduling barrier. We want to
+    /// make sure instructions which define registers that are either used by
+    /// the terminator or are live-out are properly scheduled. This is
+    /// especially important when the definition latency of the return value(s)
+    /// are too high to be hidden by the branch or when the liveout registers
+    /// used by instructions in the fallthrough block.
+    void addSchedBarrierDeps();
+
+    /// computeLatency - Compute node latency.
+    ///
+    virtual void computeLatency(SUnit *SU);
+
+    /// computeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const;
+
+    /// schedule - Order nodes according to selected style, filling
+    /// in the Sequence member.
+    ///
+    /// Typically, a scheduling algorithm will implement schedule() without
+    /// overriding enterRegion() or exitRegion().
+    virtual void schedule() = 0;
+
+    /// finalizeSchedule - Allow targets to perform final scheduling actions at
+    /// the level of the whole MachineFunction. By default does nothing.
+    virtual void finalizeSchedule() {}
+
+    virtual void dumpNode(const SUnit *SU) const;
+
+    /// Return a label for a DAG node that points to an instruction.
+    virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+
+    /// Return a label for the region of code covered by the DAG.
+    virtual std::string getDAGName() const;
+
+  protected:
+    void initSUnits();
+    void addPhysRegDataDeps(SUnit *SU, const MachineOperand &MO);
+    void addPhysRegDeps(SUnit *SU, unsigned OperIdx);
+    void addVRegDefDeps(SUnit *SU, unsigned OperIdx);
+    void addVRegUseDeps(SUnit *SU, unsigned OperIdx);
+
+    VReg2SUnitMap::iterator findVRegDef(unsigned VirtReg) {
+      return VRegDefs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+    }
+  };
+
+  /// newSUnit - Creates a new SUnit and return a ptr to it.
+  inline SUnit *ScheduleDAGInstrs::newSUnit(MachineInstr *MI) {
+#ifndef NDEBUG
+    const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
+#endif
+    SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
+    assert((Addr == 0 || Addr == &SUnits[0]) &&
+           "SUnits std::vector reallocated on the fly!");
+    SUnits.back().OrigNode = &SUnits.back();
+    return &SUnits.back();
+  }
+
+  /// getSUnit - Return an existing SUnit for this MI, or NULL.
+  inline SUnit *ScheduleDAGInstrs::getSUnit(MachineInstr *MI) const {
+    DenseMap<MachineInstr*, SUnit*>::const_iterator I = MISUnitMap.find(MI);
+    if (I == MISUnitMap.end())
+      return 0;
+    return I->second;
+  }
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h
index 96573dd5d8b1..a582b0c40c8b 100644
--- a/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/include/llvm/CodeGen/SchedulerRegistry.h
@@ -42,7 +42,7 @@ public:
   : MachinePassRegistryNode(N, D, (MachinePassCtor)C)
   { Registry.Add(this); }
   ~RegisterScheduler() { Registry.Remove(this); }
-  
+
 
   // Accessors.
   //
@@ -68,11 +68,6 @@ public:
 ScheduleDAGSDNodes *createBURRListDAGScheduler(SelectionDAGISel *IS,
                                                CodeGenOpt::Level OptLevel);
 
-/// createTDRRListDAGScheduler - This creates a top down register usage
-/// reduction list scheduler.
-ScheduleDAGSDNodes *createTDRRListDAGScheduler(SelectionDAGISel *IS,
-                                               CodeGenOpt::Level OptLevel);
-
 /// createBURRListDAGScheduler - This creates a bottom up list scheduler that
 /// schedules nodes in source code order when possible.
 ScheduleDAGSDNodes *createSourceListDAGScheduler(SelectionDAGISel *IS,
@@ -91,16 +86,17 @@ ScheduleDAGSDNodes *createHybridListDAGScheduler(SelectionDAGISel *IS,
 /// to reduce register pressure.
 ScheduleDAGSDNodes *createILPListDAGScheduler(SelectionDAGISel *IS,
                                               CodeGenOpt::Level);
-/// createTDListDAGScheduler - This creates a top-down list scheduler with
-/// a hazard recognizer.
-ScheduleDAGSDNodes *createTDListDAGScheduler(SelectionDAGISel *IS,
-                                             CodeGenOpt::Level OptLevel);
 
 /// createFastDAGScheduler - This creates a "fast" scheduler.
 ///
 ScheduleDAGSDNodes *createFastDAGScheduler(SelectionDAGISel *IS,
                                            CodeGenOpt::Level OptLevel);
 
+/// createVLIWDAGScheduler - Scheduler for VLIW targets. This creates top down
+/// DFA driven list scheduler with clustering heuristic to control
+/// register pressure.
+ScheduleDAGSDNodes *createVLIWDAGScheduler(SelectionDAGISel *IS,
+                                           CodeGenOpt::Level OptLevel);
 /// createDefaultScheduler - This creates an instruction scheduler appropriate
 /// for the target.
 ScheduleDAGSDNodes *createDefaultScheduler(SelectionDAGISel *IS,
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 132983c504e7..6a7a87e86636 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -51,7 +51,7 @@ public:
   static void noteHead(SDNode*, SDNode*) {}
 
   static void deleteNode(SDNode *) {
-    assert(0 && "ilist_traits<SDNode> shouldn't see a deleteNode call!");
+    llvm_unreachable("ilist_traits<SDNode> shouldn't see a deleteNode call!");
   }
 private:
   static void createNode(const SDNode &);
@@ -112,9 +112,10 @@ public:
 };
 
 enum CombineLevel {
-  Unrestricted,   // Combine may create illegal operations and illegal types.
-  NoIllegalTypes, // Combine may create illegal operations but no illegal types.
-  NoIllegalOperations // Combine may only create legal operations and types.
+  BeforeLegalizeTypes,
+  AfterLegalizeTypes,
+  AfterLegalizeVectorOps,
+  AfterLegalizeDAG
 };
 
 class SelectionDAG;
@@ -138,6 +139,7 @@ class SelectionDAG {
   const TargetSelectionDAGInfo &TSI;
   MachineFunction *MF;
   LLVMContext *Context;
+  CodeGenOpt::Level OptLevel;
 
   /// EntryNode - The starting token.
   SDNode EntryNode;
@@ -186,7 +188,7 @@ class SelectionDAG {
   SelectionDAG(const SelectionDAG&);   // Do not implement.
 
 public:
-  explicit SelectionDAG(const TargetMachine &TM);
+  explicit SelectionDAG(const TargetMachine &TM, llvm::CodeGenOpt::Level);
   ~SelectionDAG();
 
   /// init - Prepare this SelectionDAG to process code in the given
@@ -392,6 +394,7 @@ public:
                                   unsigned char TargetFlags = 0);
   SDValue getValueType(EVT);
   SDValue getRegister(unsigned Reg, EVT VT);
+  SDValue getRegisterMask(const uint32_t *RegMask);
   SDValue getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label);
   SDValue getBlockAddress(const BlockAddress *BA, EVT VT,
                           bool isTarget = false, unsigned char TargetFlags = 0);
@@ -650,8 +653,8 @@ public:
   ///
   SDValue getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr,
                   MachinePointerInfo PtrInfo, bool isVolatile,
-                  bool isNonTemporal, unsigned Alignment,
-                  const MDNode *TBAAInfo = 0);
+                  bool isNonTemporal, bool isInvariant, unsigned Alignment,
+                  const MDNode *TBAAInfo = 0, const MDNode *Ranges = 0);
   SDValue getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                      SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo,
                      EVT MemVT, bool isVolatile,
@@ -663,8 +666,9 @@ public:
                   EVT VT, DebugLoc dl,
                   SDValue Chain, SDValue Ptr, SDValue Offset,
                   MachinePointerInfo PtrInfo, EVT MemVT,
-                  bool isVolatile, bool isNonTemporal, unsigned Alignment,
-                  const MDNode *TBAAInfo = 0);
+                  bool isVolatile, bool isNonTemporal, bool isInvariant,
+                  unsigned Alignment, const MDNode *TBAAInfo = 0,
+                  const MDNode *Ranges = 0);
   SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                   EVT VT, DebugLoc dl,
                   SDValue Chain, SDValue Ptr, SDValue Offset,
@@ -976,8 +980,8 @@ public:
   /// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
   /// processing.  Targets can implement the computeMaskedBitsForTargetNode
   /// method in the TargetLowering class to allow target nodes to be understood.
-  void ComputeMaskedBits(SDValue Op, const APInt &Mask, APInt &KnownZero,
-                         APInt &KnownOne, unsigned Depth = 0) const;
+  void ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt &KnownOne,
+                         unsigned Depth = 0) const;
 
   /// ComputeNumSignBits - Return the number of times the sign bit of the
   /// register is replicated into the other bits.  We know that at least 1 bit
@@ -1033,6 +1037,7 @@ private:
                                void *&InsertPos);
   SDNode *FindModifiedNodeSlot(SDNode *N, const SDValue *Ops, unsigned NumOps,
                                void *&InsertPos);
+  SDNode *UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc loc);
 
   void DeleteNodeNotInCSEMaps(SDNode *N);
   void DeallocateNode(SDNode *N);
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index ecf394701053..ee3f2319c0b3 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -29,6 +29,7 @@ namespace llvm {
   class MachineFunction;
   class MachineInstr;
   class TargetLowering;
+  class TargetLibraryInfo;
   class TargetInstrInfo;
   class FunctionLoweringInfo;
   class ScheduleHazardRecognizer;
@@ -42,6 +43,7 @@ class SelectionDAGISel : public MachineFunctionPass {
 public:
   const TargetMachine &TM;
   const TargetLowering &TLI;
+  const TargetLibraryInfo *LibInfo;
   FunctionLoweringInfo *FuncInfo;
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
@@ -92,7 +94,7 @@ public:
 
   /// IsLegalToFold - Returns true if the specific operand node N of
   /// U can be folded during instruction selection that starts at Root.
-  /// FIXME: This is a static member function because the MSP430/SystemZ/X86
+  /// FIXME: This is a static member function because the MSP430/X86
   /// targets, which uses it during isel.  This could become a proper member.
   static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
                             CodeGenOpt::Level OptLevel,
@@ -179,6 +181,7 @@ protected:
   /// ISelUpdater - helper class to handle updates of the
   /// instruction selection graph.
   class ISelUpdater : public SelectionDAG::DAGUpdateListener {
+    virtual void anchor();
     SelectionDAG::allnodes_iterator &ISelPosition;
   public:
     explicit ISelUpdater(SelectionDAG::allnodes_iterator &isp)
@@ -237,8 +240,7 @@ public:
   /// succeeds or false if it fails.  The number is a private implementation
   /// detail to the code tblgen produces.
   virtual bool CheckPatternPredicate(unsigned PredNo) const {
-    assert(0 && "Tblgen should generate the implementation of this!");
-    return 0;
+    llvm_unreachable("Tblgen should generate the implementation of this!");
   }
 
   /// CheckNodePredicate - This function is generated by tblgen in the target.
@@ -246,20 +248,17 @@ public:
   /// false if it fails.  The number is a private implementation
   /// detail to the code tblgen produces.
   virtual bool CheckNodePredicate(SDNode *N, unsigned PredNo) const {
-    assert(0 && "Tblgen should generate the implementation of this!");
-    return 0;
+    llvm_unreachable("Tblgen should generate the implementation of this!");
   }
 
   virtual bool CheckComplexPattern(SDNode *Root, SDNode *Parent, SDValue N,
                                    unsigned PatternNo,
                         SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) {
-    assert(0 && "Tblgen should generate the implementation of this!");
-    return false;
+    llvm_unreachable("Tblgen should generate the implementation of this!");
   }
 
   virtual SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) {
-    assert(0 && "Tblgen should generate this!");
-    return SDValue();
+    llvm_unreachable("Tblgen should generate this!");
   }
 
   SDNode *SelectCodeCommon(SDNode *NodeToMatch,
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 6c7be69b4d2f..f8248b845337 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -917,12 +917,13 @@ public:
   // with MachineMemOperand information.
   bool isVolatile() const { return (SubclassData >> 5) & 1; }
   bool isNonTemporal() const { return (SubclassData >> 6) & 1; }
+  bool isInvariant() const { return (SubclassData >> 7) & 1; }
 
   AtomicOrdering getOrdering() const {
-    return AtomicOrdering((SubclassData >> 7) & 15);
+    return AtomicOrdering((SubclassData >> 8) & 15);
   }
   SynchronizationScope getSynchScope() const {
-    return SynchronizationScope((SubclassData >> 11) & 1);
+    return SynchronizationScope((SubclassData >> 12) & 1);
   }
 
   /// Returns the SrcValue and offset that describes the location of the access
@@ -932,6 +933,9 @@ public:
   /// Returns the TBAAInfo that describes the dereference.
   const MDNode *getTBAAInfo() const { return MMO->getTBAAInfo(); }
 
+  /// Returns the Ranges that describes the dereference.
+  const MDNode *getRanges() const { return MMO->getRanges(); }
+
   /// getMemoryVT - Return the type of the in-memory value.
   EVT getMemoryVT() const { return MemoryVT; }
 
@@ -993,8 +997,8 @@ class AtomicSDNode : public MemSDNode {
            "Ordering may not require more than 4 bits!");
     assert((SynchScope & 1) == SynchScope &&
            "SynchScope may not require more than 1 bit!");
-    SubclassData |= Ordering << 7;
-    SubclassData |= SynchScope << 11;
+    SubclassData |= Ordering << 8;
+    SubclassData |= SynchScope << 12;
     assert(getOrdering() == Ordering && "Ordering encoding error!");
     assert(getSynchScope() == SynchScope && "Synch-scope encoding error!");
 
@@ -1113,11 +1117,9 @@ protected:
   }
 public:
 
-  void getMask(SmallVectorImpl<int> &M) const {
+  ArrayRef<int> getMask() const {
     EVT VT = getValueType(0);
-    M.clear();
-    for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
-      M.push_back(Mask[i]);
+    return makeArrayRef(Mask, VT.getVectorNumElements());
   }
   int getMaskElt(unsigned Idx) const {
     assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!");
@@ -1434,6 +1436,23 @@ public:
   }
 };
 
+class RegisterMaskSDNode : public SDNode {
+  // The memory for RegMask is not owned by the node.
+  const uint32_t *RegMask;
+  friend class SelectionDAG;
+  RegisterMaskSDNode(const uint32_t *mask)
+    : SDNode(ISD::RegisterMask, DebugLoc(), getSDVTList(MVT::Untyped)),
+      RegMask(mask) {}
+public:
+
+  const uint32_t *getRegMask() const { return RegMask; }
+
+  static bool classof(const RegisterMaskSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::RegisterMask;
+  }
+};
+
 class BlockAddressSDNode : public SDNode {
   const BlockAddress *BA;
   unsigned char TargetFlags;
@@ -1684,6 +1703,8 @@ public:
   /// setMemRefs - Assign this MachineSDNodes's memory reference descriptor
   /// list. This does not transfer ownership.
   void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
+    for (mmo_iterator MMI = NewMemRefs, MME = NewMemRefsEnd; MMI != MME; ++MMI)
+      assert(*MMI && "Null mem ref detected!");
     MemRefs = NewMemRefs;
     MemRefsEnd = NewMemRefsEnd;
   }
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 2d98864dc9ed..d868cb8dade8 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_CODEGEN_SLOTINDEXES_H
 #define LLVM_CODEGEN_SLOTINDEXES_H
 
-#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/ADT/PointerIntPair.h"
@@ -83,7 +83,29 @@ namespace llvm {
     friend class SlotIndexes;
     friend struct DenseMapInfo<SlotIndex>;
 
-    enum Slot { LOAD, USE, DEF, STORE, NUM };
+    enum Slot {
+      /// Basic block boundary.  Used for live ranges entering and leaving a
+      /// block without being live in the layout neighbor.  Also used as the
+      /// def slot of PHI-defs.
+      Slot_Block,
+
+      /// Early-clobber register use/def slot.  A live range defined at
+      /// Slot_EarlyCLobber interferes with normal live ranges killed at
+      /// Slot_Register.  Also used as the kill slot for live ranges tied to an
+      /// early-clobber def.
+      Slot_EarlyClobber,
+
+      /// Normal register use/def slot.  Normal instructions kill and define
+      /// register live ranges at this slot.
+      Slot_Register,
+
+      /// Dead def kill point.  Kill slot for a live range that is defined by
+      /// the same instruction (Slot_Register or Slot_EarlyClobber), but isn't
+      /// used anywhere.
+      Slot_Dead,
+
+      Slot_Count
+    };
 
     PointerIntPair<IndexListEntry*, 2, unsigned> lie;
 
@@ -113,7 +135,7 @@ namespace llvm {
     enum {
       /// The default distance between instructions as returned by distance().
       /// This may vary as instructions are inserted and removed.
-      InstrDist = 4*NUM
+      InstrDist = 4 * Slot_Count
     };
 
     static inline SlotIndex getEmptyKey() {
@@ -186,69 +208,55 @@ namespace llvm {
       return A.lie.getPointer() == B.lie.getPointer();
     }
 
+    /// isEarlierInstr - Return true if A refers to an instruction earlier than
+    /// B. This is equivalent to A < B && !isSameInstr(A, B).
+    static bool isEarlierInstr(SlotIndex A, SlotIndex B) {
+      return A.entry().getIndex() < B.entry().getIndex();
+    }
+
     /// Return the distance from this index to the given one.
     int distance(SlotIndex other) const {
       return other.getIndex() - getIndex();
     }
 
-    /// isLoad - Return true if this is a LOAD slot.
-    bool isLoad() const {
-      return getSlot() == LOAD;
-    }
+    /// isBlock - Returns true if this is a block boundary slot.
+    bool isBlock() const { return getSlot() == Slot_Block; }
 
-    /// isDef - Return true if this is a DEF slot.
-    bool isDef() const {
-      return getSlot() == DEF;
-    }
+    /// isEarlyClobber - Returns true if this is an early-clobber slot.
+    bool isEarlyClobber() const { return getSlot() == Slot_EarlyClobber; }
 
-    /// isUse - Return true if this is a USE slot.
-    bool isUse() const {
-      return getSlot() == USE;
-    }
+    /// isRegister - Returns true if this is a normal register use/def slot.
+    /// Note that early-clobber slots may also be used for uses and defs.
+    bool isRegister() const { return getSlot() == Slot_Register; }
 
-    /// isStore - Return true if this is a STORE slot.
-    bool isStore() const {
-      return getSlot() == STORE;
-    }
+    /// isDead - Returns true if this is a dead def kill slot.
+    bool isDead() const { return getSlot() == Slot_Dead; }
 
     /// Returns the base index for associated with this index. The base index
-    /// is the one associated with the LOAD slot for the instruction pointed to
-    /// by this index.
+    /// is the one associated with the Slot_Block slot for the instruction
+    /// pointed to by this index.
     SlotIndex getBaseIndex() const {
-      return getLoadIndex();
+      return SlotIndex(&entry(), Slot_Block);
     }
 
     /// Returns the boundary index for associated with this index. The boundary
-    /// index is the one associated with the LOAD slot for the instruction
+    /// index is the one associated with the Slot_Block slot for the instruction
     /// pointed to by this index.
     SlotIndex getBoundaryIndex() const {
-      return getStoreIndex();
+      return SlotIndex(&entry(), Slot_Dead);
     }
 
-    /// Returns the index of the LOAD slot for the instruction pointed to by
-    /// this index.
-    SlotIndex getLoadIndex() const {
-      return SlotIndex(&entry(), SlotIndex::LOAD);
-    }    
-
-    /// Returns the index of the USE slot for the instruction pointed to by
-    /// this index.
-    SlotIndex getUseIndex() const {
-      return SlotIndex(&entry(), SlotIndex::USE);
+    /// Returns the register use/def slot in the current instruction for a
+    /// normal or early-clobber def.
+    SlotIndex getRegSlot(bool EC = false) const {
+      return SlotIndex(&entry(), EC ? Slot_EarlyClobber : Slot_Register);
     }
 
-    /// Returns the index of the DEF slot for the instruction pointed to by
-    /// this index.
-    SlotIndex getDefIndex() const {
-      return SlotIndex(&entry(), SlotIndex::DEF);
+    /// Returns the dead def kill slot for the current instruction.
+    SlotIndex getDeadSlot() const {
+      return SlotIndex(&entry(), Slot_Dead);
     }
 
-    /// Returns the index of the STORE slot for the instruction pointed to by
-    /// this index.
-    SlotIndex getStoreIndex() const {
-      return SlotIndex(&entry(), SlotIndex::STORE);
-    }    
-
     /// Returns the next slot in the index list. This could be either the
     /// next slot for the instruction pointed to by this index or, if this
     /// index is a STORE, the first slot for the next instruction.
@@ -257,8 +265,8 @@ namespace llvm {
     /// use one of those methods.
     SlotIndex getNextSlot() const {
       Slot s = getSlot();
-      if (s == SlotIndex::STORE) {
-        return SlotIndex(entry().getNext(), SlotIndex::LOAD);
+      if (s == Slot_Dead) {
+        return SlotIndex(entry().getNext(), Slot_Block);
       }
       return SlotIndex(&entry(), s + 1);
     }
@@ -271,14 +279,14 @@ namespace llvm {
 
     /// Returns the previous slot in the index list. This could be either the
     /// previous slot for the instruction pointed to by this index or, if this
-    /// index is a LOAD, the last slot for the previous instruction.
+    /// index is a Slot_Block, the last slot for the previous instruction.
     /// WARNING: This method is considerably more expensive than the methods
     /// that return specific slots (getUseIndex(), etc). If you can - please
     /// use one of those methods.
     SlotIndex getPrevSlot() const {
       Slot s = getSlot();
-      if (s == SlotIndex::LOAD) {
-        return SlotIndex(entry().getPrev(), SlotIndex::STORE);
+      if (s == Slot_Block) {
+        return SlotIndex(entry().getPrev(), Slot_Dead);
       }
       return SlotIndex(&entry(), s - 1);
     }
@@ -464,11 +472,6 @@ namespace llvm {
       return SlotIndex(back(), 0);
     }
 
-    /// Returns the invalid index marker for this analysis.
-    SlotIndex getInvalidIndex() {
-      return getZeroIndex();
-    }
-
     /// Returns the distance between the highest and lowest indexes allocated
     /// so far.
     unsigned getIndexesLength() const {
@@ -486,12 +489,13 @@ namespace llvm {
     /// Returns true if the given machine instr is mapped to an index,
     /// otherwise returns false.
     bool hasIndex(const MachineInstr *instr) const {
-      return (mi2iMap.find(instr) != mi2iMap.end());
+      return mi2iMap.count(instr);
     }
 
     /// Returns the base index for the given instruction.
-    SlotIndex getInstructionIndex(const MachineInstr *instr) const {
-      Mi2IndexMap::const_iterator itr = mi2iMap.find(instr);
+    SlotIndex getInstructionIndex(const MachineInstr *MI) const {
+      // Instructions inside a bundle have the same number as the bundle itself.
+      Mi2IndexMap::const_iterator itr = mi2iMap.find(getBundleStart(MI));
       assert(itr != mi2iMap.end() && "Instruction not found in maps.");
       return itr->second;
     }
@@ -645,6 +649,8 @@ namespace llvm {
     /// instructions, create the new index after the null indexes instead of
     /// before them.
     SlotIndex insertMachineInstrInMaps(MachineInstr *mi, bool Late = false) {
+      assert(!mi->isInsideBundle() &&
+             "Instructions inside bundles should use bundle start's slot.");
       assert(mi2iMap.find(mi) == mi2iMap.end() && "Instr already indexed.");
       // Numbering DBG_VALUE instructions could cause code generation to be
       // affected by debug information.
@@ -677,7 +683,7 @@ namespace llvm {
       if (dist == 0)
         renumberIndexes(newEntry);
 
-      SlotIndex newIndex(newEntry, SlotIndex::LOAD);
+      SlotIndex newIndex(newEntry, SlotIndex::Slot_Block);
       mi2iMap.insert(std::make_pair(mi, newIndex));
       return newIndex;
     }
@@ -728,8 +734,8 @@ namespace llvm {
       insert(nextEntry, startEntry);
       insert(nextEntry, stopEntry);
 
-      SlotIndex startIdx(startEntry, SlotIndex::LOAD);
-      SlotIndex endIdx(nextEntry, SlotIndex::LOAD);
+      SlotIndex startIdx(startEntry, SlotIndex::Slot_Block);
+      SlotIndex endIdx(nextEntry, SlotIndex::Slot_Block);
 
       assert(unsigned(mbb->getNumber()) == MBBRanges.size() &&
              "Blocks must be added in order");
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index ca40ccf85378..5a4213625bae 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
   class MachineModuleInfo;
@@ -65,6 +65,11 @@ public:
   virtual MCSymbol *
   getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
                           MachineModuleInfo *MMI) const;
+
+  virtual const MCSection *
+  getStaticCtorSection(unsigned Priority = 65535) const;
+  virtual const MCSection *
+  getStaticDtorSection(unsigned Priority = 65535) const;
 };
 
 
@@ -73,6 +78,12 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
 public:
   virtual ~TargetLoweringObjectFileMachO() {}
 
+  /// emitModuleFlags - Emit the module flags that specify the garbage
+  /// collection information.
+  virtual void emitModuleFlags(MCStreamer &Streamer,
+                               ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+                               Mangler *Mang, const TargetMachine &TM) const;
+
   virtual const MCSection *
   SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const;
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index cae0bcb165c1..76c2357a552f 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -16,10 +16,11 @@
 #ifndef LLVM_CODEGEN_VALUETYPES_H
 #define LLVM_CODEGEN_VALUETYPES_H
 
-#include <cassert>
-#include <string>
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <string>
 
 namespace llvm {
   class Type;
@@ -45,49 +46,56 @@ namespace llvm {
       FIRST_INTEGER_VALUETYPE = i1,
       LAST_INTEGER_VALUETYPE  = i128,
 
-      f32            =   7,   // This is a 32 bit floating point value
-      f64            =   8,   // This is a 64 bit floating point value
-      f80            =   9,   // This is a 80 bit floating point value
-      f128           =  10,   // This is a 128 bit floating point value
-      ppcf128        =  11,   // This is a PPC 128-bit floating point value
-
-      v2i8           =  12,   //  2 x i8
-      v4i8           =  13,   //  4 x i8
-      v8i8           =  14,   //  8 x i8
-      v16i8          =  15,   // 16 x i8
-      v32i8          =  16,   // 32 x i8
-      v2i16          =  17,   //  2 x i16
-      v4i16          =  18,   //  4 x i16
-      v8i16          =  19,   //  8 x i16
-      v16i16         =  20,   // 16 x i16
-      v2i32          =  21,   //  2 x i32
-      v4i32          =  22,   //  4 x i32
-      v8i32          =  23,   //  8 x i32
-      v1i64          =  24,   //  1 x i64
-      v2i64          =  25,   //  2 x i64
-      v4i64          =  26,   //  4 x i64
-      v8i64          =  27,   //  8 x i64
-
-      v2f32          =  28,   //  2 x f32
-      v4f32          =  29,   //  4 x f32
-      v8f32          =  30,   //  8 x f32
-      v2f64          =  31,   //  2 x f64
-      v4f64          =  32,   //  4 x f64
+      f16            =   7,   // This is a 16 bit floating point value
+      f32            =   8,   // This is a 32 bit floating point value
+      f64            =   9,   // This is a 64 bit floating point value
+      f80            =  10,   // This is a 80 bit floating point value
+      f128           =  11,   // This is a 128 bit floating point value
+      ppcf128        =  12,   // This is a PPC 128-bit floating point value
+
+      FIRST_FP_VALUETYPE = f16,
+      LAST_FP_VALUETYPE  = ppcf128,
+
+      v2i8           =  13,   //  2 x i8
+      v4i8           =  14,   //  4 x i8
+      v8i8           =  15,   //  8 x i8
+      v16i8          =  16,   // 16 x i8
+      v32i8          =  17,   // 32 x i8
+      v2i16          =  18,   //  2 x i16
+      v4i16          =  19,   //  4 x i16
+      v8i16          =  20,   //  8 x i16
+      v16i16         =  21,   // 16 x i16
+      v2i32          =  22,   //  2 x i32
+      v4i32          =  23,   //  4 x i32
+      v8i32          =  24,   //  8 x i32
+      v1i64          =  25,   //  1 x i64
+      v2i64          =  26,   //  2 x i64
+      v4i64          =  27,   //  4 x i64
+      v8i64          =  28,   //  8 x i64
+
+      v2f16          =  29,   //  2 x f16
+      v2f32          =  30,   //  2 x f32
+      v4f32          =  31,   //  4 x f32
+      v8f32          =  32,   //  8 x f32
+      v2f64          =  33,   //  2 x f64
+      v4f64          =  34,   //  4 x f64
 
       FIRST_VECTOR_VALUETYPE = v2i8,
       LAST_VECTOR_VALUETYPE  = v4f64,
+      FIRST_FP_VECTOR_VALUETYPE = v2f16,
+      LAST_FP_VECTOR_VALUETYPE = v4f64,
 
-      x86mmx         =  33,   // This is an X86 MMX value
+      x86mmx         =  35,   // This is an X86 MMX value
 
-      Glue           =  34,   // This glues nodes together during pre-RA sched
+      Glue           =  36,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  35,   // This has no value
+      isVoid         =  37,   // This has no value
 
-      untyped        =  36,   // This value takes a register, but has
+      Untyped        =  38,   // This value takes a register, but has
                               // unspecified type.  The register class
                               // will be determined by the opcode.
 
-      LAST_VALUETYPE =  37,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  39,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -143,8 +151,10 @@ namespace llvm {
 
     /// isFloatingPoint - Return true if this is a FP, or a vector FP type.
     bool isFloatingPoint() const {
-      return ((SimpleTy >= MVT::f32 && SimpleTy <= MVT::ppcf128) ||
-	      (SimpleTy >= MVT::v2f32 && SimpleTy <= MVT::v4f64));
+      return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE &&
+               SimpleTy <= MVT::LAST_FP_VALUETYPE) ||
+	      (SimpleTy >= MVT::FIRST_FP_VECTOR_VALUETYPE &&
+         SimpleTy <= MVT::LAST_FP_VECTOR_VALUETYPE));
     }
 
     /// isInteger - Return true if this is an integer, or a vector integer type.
@@ -203,6 +213,7 @@ namespace llvm {
       case v2i64:
       case v4i64:
       case v8i64: return i64;
+      case v2f16: return f16;
       case v2f32:
       case v4f32:
       case v8f32: return f32;
@@ -233,6 +244,7 @@ namespace llvm {
       case v2i16:
       case v2i32:
       case v2i64:
+      case v2f16:
       case v2f32:
       case v2f64: return 2;
       case v1i64: return 1;
@@ -242,21 +254,23 @@ namespace llvm {
     unsigned getSizeInBits() const {
       switch (SimpleTy) {
       case iPTR:
-        assert(0 && "Value type size is target-dependent. Ask TLI.");
+        llvm_unreachable("Value type size is target-dependent. Ask TLI.");
       case iPTRAny:
       case iAny:
       case fAny:
-        assert(0 && "Value type is overloaded.");
+        llvm_unreachable("Value type is overloaded.");
       default:
-        assert(0 && "getSizeInBits called on extended MVT.");
+        llvm_unreachable("getSizeInBits called on extended MVT.");
       case i1  :  return 1;
       case i8  :  return 8;
       case i16 :
+      case f16:
       case v2i8:  return 16;
       case f32 :
       case i32 :
       case v4i8:
-      case v2i16: return 32;
+      case v2i16:
+      case v2f16: return 32;
       case x86mmx:
       case f64 :
       case i64 :
@@ -300,7 +314,9 @@ namespace llvm {
     static MVT getFloatingPointVT(unsigned BitWidth) {
       switch (BitWidth) {
       default:
-        assert(false && "Bad bit width!");
+        llvm_unreachable("Bad bit width!");
+      case 16:
+        return MVT::f16;
       case 32:
         return MVT::f32;
       case 64:
@@ -359,6 +375,9 @@ namespace llvm {
         if (NumElements == 4)  return MVT::v4i64;
         if (NumElements == 8)  return MVT::v8i64;
         break;
+      case MVT::f16:
+        if (NumElements == 2)  return MVT::v2f16;
+        break;
       case MVT::f32:
         if (NumElements == 2)  return MVT::v2f32;
         if (NumElements == 4)  return MVT::v4f32;
@@ -424,20 +443,6 @@ namespace llvm {
       return getExtendedVectorVT(Context, VT, NumElements);
     }
 
-    /// getIntVectorWithNumElements - Return any integer vector type that has
-    /// the specified number of elements.
-    static EVT getIntVectorWithNumElements(LLVMContext &C, unsigned NumElts) {
-      switch (NumElts) {
-      default: return getVectorVT(C, MVT::i8, NumElts);
-      case  1: return MVT::v1i64;
-      case  2: return MVT::v2i32;
-      case  4: return MVT::v4i16;
-      case  8: return MVT::v8i8;
-      case 16: return MVT::v16i8;
-      }
-      return MVT::INVALID_SIMPLE_VALUE_TYPE;
-    }
-
     /// changeVectorElementTypeToInteger - Return a vector with the same number
     /// of elements as this vector, but with the element type converted to an
     /// integer type with the same bitwidth.
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 0cfb634ead79..6c2269052a11 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -26,39 +26,41 @@ def i16    : ValueType<16 ,  3>;   // 16-bit integer value
 def i32    : ValueType<32 ,  4>;   // 32-bit integer value
 def i64    : ValueType<64 ,  5>;   // 64-bit integer value
 def i128   : ValueType<128,  6>;   // 128-bit integer value
-def f32    : ValueType<32 ,  7>;   // 32-bit floating point value
-def f64    : ValueType<64 ,  8>;   // 64-bit floating point value
-def f80    : ValueType<80 ,  9>;   // 80-bit floating point value
-def f128   : ValueType<128, 10>;   // 128-bit floating point value
-def ppcf128: ValueType<128, 11>;   // PPC 128-bit floating point value
+def f16    : ValueType<16 ,  7>;   // 32-bit floating point value
+def f32    : ValueType<32 ,  8>;   // 32-bit floating point value
+def f64    : ValueType<64 ,  9>;   // 64-bit floating point value
+def f80    : ValueType<80 , 10>;   // 80-bit floating point value
+def f128   : ValueType<128, 11>;   // 128-bit floating point value
+def ppcf128: ValueType<128, 12>;   // PPC 128-bit floating point value
 
-def v2i8   : ValueType<16 , 12>;   //  2 x i8  vector value
-def v4i8   : ValueType<32 , 13>;   //  4 x i8  vector value
-def v8i8   : ValueType<64 , 14>;   //  8 x i8  vector value
-def v16i8  : ValueType<128, 15>;   // 16 x i8  vector value
-def v32i8  : ValueType<256, 16>;   // 32 x i8 vector value
-def v2i16  : ValueType<32 , 17>;   //  2 x i16 vector value
-def v4i16  : ValueType<64 , 18>;   //  4 x i16 vector value
-def v8i16  : ValueType<128, 19>;   //  8 x i16 vector value
-def v16i16 : ValueType<256, 20>;   // 16 x i16 vector value
-def v2i32  : ValueType<64 , 21>;   //  2 x i32 vector value
-def v4i32  : ValueType<128, 22>;   //  4 x i32 vector value
-def v8i32  : ValueType<256, 23>;   //  8 x i32 vector value
-def v1i64  : ValueType<64 , 24>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 25>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 26>;   //  4 x i64 vector value
-def v8i64  : ValueType<512, 27>;   //  8 x i64 vector value
+def v2i8   : ValueType<16 , 13>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 14>;   //  4 x i8  vector value
+def v8i8   : ValueType<64 , 15>;   //  8 x i8  vector value
+def v16i8  : ValueType<128, 16>;   // 16 x i8  vector value
+def v32i8  : ValueType<256, 17>;   // 32 x i8 vector value
+def v2i16  : ValueType<32 , 18>;   //  2 x i16 vector value
+def v4i16  : ValueType<64 , 19>;   //  4 x i16 vector value
+def v8i16  : ValueType<128, 20>;   //  8 x i16 vector value
+def v16i16 : ValueType<256, 21>;   // 16 x i16 vector value
+def v2i32  : ValueType<64 , 22>;   //  2 x i32 vector value
+def v4i32  : ValueType<128, 23>;   //  4 x i32 vector value
+def v8i32  : ValueType<256, 24>;   //  8 x i32 vector value
+def v1i64  : ValueType<64 , 25>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 26>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 27>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 28>;   //  8 x i64 vector value
 
-def v2f32  : ValueType<64 , 28>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 29>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 30>;   //  8 x f32 vector value
-def v2f64  : ValueType<128, 31>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 32>;   //  4 x f64 vector value
+def v2f16  : ValueType<32 , 29>;   //  2 x f16 vector value
+def v2f32  : ValueType<64 , 30>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 31>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 32>;   //  8 x f32 vector value
+def v2f64  : ValueType<128, 33>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 34>;   //  4 x f64 vector value
 
-def x86mmx : ValueType<64 , 33>;   // X86 MMX value
-def FlagVT : ValueType<0  , 34>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 35>;   // Produces no value
-def untyped: ValueType<8  , 36>;   // Produces an untyped value
+def x86mmx : ValueType<64 , 35>;   // X86 MMX value
+def FlagVT : ValueType<0  , 36>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 37>;   // Produces no value
+def untyped: ValueType<8  , 38>;   // Produces an untyped value
 
 def MetadataVT: ValueType<0, 250>; // Metadata
 
diff --git a/include/llvm/Config/Disassemblers.def.in b/include/llvm/Config/Disassemblers.def.in
index 1b136570933b..1e6281de9989 100644
--- a/include/llvm/Config/Disassemblers.def.in
+++ b/include/llvm/Config/Disassemblers.def.in
@@ -9,7 +9,7 @@
 //
 // This file enumerates all of the assembly-language parsers
 // supported by this build of LLVM. Clients of this file should define
-// the LLVM_ASM_PARSER macro to be a function-like macro with a
+// the LLVM_DISASSEMBLER macro to be a function-like macro with a
 // single parameter (the name of the target whose assembly can be
 // generated); including this file will then enumerate all of the
 // targets with assembly parsers.
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index e44d429dfcf1..69e358031ebc 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -11,17 +11,8 @@
 /* Relative directory for resource files */
 #define CLANG_RESOURCE_DIR "${CLANG_RESOURCE_DIR}"
 
-/* 32 bit multilib directory. */
-#define CXX_INCLUDE_32BIT_DIR "${CXX_INCLUDE_32BIT_DIR}"
-
-/* 64 bit multilib directory. */
-#define CXX_INCLUDE_64BIT_DIR "${CXX_INCLUDE_64BIT_DIR}"
-
-/* Arch the libstdc++ headers. */
-#define CXX_INCLUDE_ARCH "${CXX_INCLUDE_ARCH}"
-
-/* Directory with the libstdc++ headers. */
-#define CXX_INCLUDE_ROOT "${CXX_INCLUDE_ROOT}"
+/* Directory wherelibstdc++ is installed. */
+#define GCC_INSTALL_PREFIX "${GCC_INSTALL_PREFIX}"
 
 /* Directories clang will search for headers */
 #define C_INCLUDE_DIRS "${C_INCLUDE_DIRS}"
@@ -32,9 +23,6 @@
 /* Define if position independent code is enabled */
 #cmakedefine ENABLE_PIC
 
-/* Define if threads enabled */
-#cmakedefine ENABLE_THREADS ${ENABLE_THREADS}
-
 /* Define if timestamp information (e.g., __DATE___) is allowed */
 #cmakedefine ENABLE_TIMESTAMPS ${ENABLE_TIMESTAMPS}
 
@@ -297,6 +285,9 @@
 /* Define to 1 if you have the `powf' function. */
 #cmakedefine HAVE_POWF ${HAVE_POWF}
 
+/* Define to 1 if you have the `pread' function. */
+#cmakedefine HAVE_PREAD ${HAVE_PREAD}
+
 /* Define if libtool can extract symbol lists from object files. */
 #undef HAVE_PRELOADED_SYMBOLS
 
@@ -545,18 +536,21 @@
 /* Installation directory for data files */
 #cmakedefine LLVM_DATADIR "${LLVM_DATADIR}"
 
+/* Target triple LLVM will generate code for by default */
+#cmakedefine LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}"
+
 /* Installation directory for documentation */
 #cmakedefine LLVM_DOCSDIR "${LLVM_DOCSDIR}"
 
+/* Define if threads enabled */
+#cmakedefine01 LLVM_ENABLE_THREADS
+
 /* Installation directory for config files */
 #cmakedefine LLVM_ETCDIR "${LLVM_ETCDIR}"
 
 /* Has gcc/MSVC atomic intrinsics */
 #cmakedefine01 LLVM_HAS_ATOMICS
 
-/* Host triple we were built on */
-#cmakedefine LLVM_HOSTTRIPLE "${LLVM_HOSTTRIPLE}"
-
 /* Installation directory for include files */
 #cmakedefine LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}"
 
@@ -578,6 +572,9 @@
 /* LLVM name for the native AsmPrinter init function, if available */
 #cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
 
+/* LLVM name for the native Disassembler init function, if available */
+#cmakedefine LLVM_NATIVE_DISASSEMBLER LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler
+
 /* LLVM name for the native Target init function, if available */
 #cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target
 
@@ -623,6 +620,12 @@
 /* Installation prefix directory */
 #cmakedefine LLVM_PREFIX "${LLVM_PREFIX}"
 
+/* Major version of the LLVM API */
+#cmakedefine LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
+
+/* Minor version of the LLVM API */
+#cmakedefine LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR}
+
 /* Define if the OS needs help to load dependent libraries for dlopen(). */
 #cmakedefine LTDL_DLOPEN_DEPLIBS ${LTDL_DLOPEN_DEPLIBS}
 
@@ -668,15 +671,15 @@
 /* Define to 1 if the `S_IS*' macros in <sys/stat.h> do not work properly. */
 #undef STAT_MACROS_BROKEN
 
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
 /* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
 #undef TIME_WITH_SYS_TIME
 
 /* Define to 1 if your <sys/time.h> declares `struct tm'. */
 #undef TM_IN_SYS_TIME
 
-/* Define if we have the oprofile JIT-support library */
-#undef USE_OPROFILE
-
 /* Define if use udis86 library */
 #undef USE_UDIS86
 
@@ -713,4 +716,10 @@
 /* Added by Kevin -- Maximum path length */
 #cmakedefine MAXPATHLEN ${MAXPATHLEN}
 
+/* Support for Intel JIT Events API is enabled */
+#cmakedefine LLVM_USE_INTEL_JITEVENTS 1
+
+/* Support for OProfile JIT API is enabled */
+#cmakedefine LLVM_USE_OPROFILE 1
+
 #endif
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 3670de557f4f..ccff7da96718 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -9,33 +9,18 @@
 /* Relative directory for resource files */
 #undef CLANG_RESOURCE_DIR
 
-/* 32 bit multilib directory. */
-#undef CXX_INCLUDE_32BIT_DIR
-
-/* 64 bit multilib directory. */
-#undef CXX_INCLUDE_64BIT_DIR
-
-/* Arch the libstdc++ headers. */
-#undef CXX_INCLUDE_ARCH
-
-/* Directory with the libstdc++ headers. */
-#undef CXX_INCLUDE_ROOT
-
 /* Directories clang will search for headers */
 #undef C_INCLUDE_DIRS
 
-/* Define if CBE is enabled for printf %a output */
-#undef ENABLE_CBE_PRINTF_A
-
 /* Define if position independent code is enabled */
 #undef ENABLE_PIC
 
-/* Define if threads enabled */
-#undef ENABLE_THREADS
-
 /* Define if timestamp information (e.g., __DATE___) is allowed */
 #undef ENABLE_TIMESTAMPS
 
+/* Directory where gcc is installed. */
+#undef GCC_INSTALL_PREFIX
+
 /* Define to 1 if you have the `argz_append' function. */
 #undef HAVE_ARGZ_APPEND
 
@@ -295,6 +280,9 @@
 /* Define to 1 if you have the `powf' function. */
 #undef HAVE_POWF
 
+/* Define to 1 if you have the `pread' function. */
+#undef HAVE_PREAD
+
 /* Define if libtool can extract symbol lists from object files. */
 #undef HAVE_PRELOADED_SYMBOLS
 
@@ -543,18 +531,21 @@
 /* Installation directory for data files */
 #undef LLVM_DATADIR
 
+/* Target triple LLVM will generate code for by default */
+#undef LLVM_DEFAULT_TARGET_TRIPLE
+
 /* Installation directory for documentation */
 #undef LLVM_DOCSDIR
 
+/* Define if threads enabled */
+#undef LLVM_ENABLE_THREADS
+
 /* Installation directory for config files */
 #undef LLVM_ETCDIR
 
 /* Has gcc/MSVC atomic intrinsics */
 #undef LLVM_HAS_ATOMICS
 
-/* Host triple we were built on */
-#undef LLVM_HOSTTRIPLE
-
 /* Installation directory for include files */
 #undef LLVM_INCLUDEDIR
 
@@ -576,6 +567,9 @@
 /* LLVM name for the native AsmPrinter init function, if available */
 #undef LLVM_NATIVE_ASMPRINTER
 
+/* LLVM name for the native Disassembler init function, if available */
+#undef LLVM_NATIVE_DISASSEMBLER
+
 /* LLVM name for the native Target init function, if available */
 #undef LLVM_NATIVE_TARGET
 
@@ -621,6 +615,18 @@
 /* Installation prefix directory */
 #undef LLVM_PREFIX
 
+/* Define if we have the Intel JIT API runtime support library */
+#undef LLVM_USE_INTEL_JITEVENTS
+
+/* Define if we have the oprofile JIT-support library */
+#undef LLVM_USE_OPROFILE
+
+/* Major version of the LLVM API */
+#undef LLVM_VERSION_MAJOR
+
+/* Minor version of the LLVM API */
+#undef LLVM_VERSION_MINOR
+
 /* Define if the OS needs help to load dependent libraries for dlopen(). */
 #undef LTDL_DLOPEN_DEPLIBS
 
@@ -675,9 +681,6 @@
 /* Define to 1 if your <sys/time.h> declares `struct tm'. */
 #undef TM_IN_SYS_TIME
 
-/* Define if we have the oprofile JIT-support library */
-#undef USE_OPROFILE
-
 /* Define if use udis86 library */
 #undef USE_UDIS86
 
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
index 4147fd1ff66d..85d28fe134f2 100644
--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -25,18 +25,21 @@
 /* Installation directory for data files */
 #cmakedefine LLVM_DATADIR "${LLVM_DATADIR}"
 
+/* Target triple LLVM will generate code for by default */
+#cmakedefine LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}"
+
 /* Installation directory for documentation */
 #cmakedefine LLVM_DOCSDIR "${LLVM_DOCSDIR}"
 
+/* Define if threads enabled */
+#cmakedefine01 LLVM_ENABLE_THREADS
+
 /* Installation directory for config files */
 #cmakedefine LLVM_ETCDIR "${LLVM_ETCDIR}"
 
 /* Has gcc/MSVC atomic intrinsics */
 #cmakedefine01 LLVM_HAS_ATOMICS
 
-/* Host triple we were built on */
-#cmakedefine LLVM_HOSTTRIPLE "${LLVM_HOSTTRIPLE}"
-
 /* Installation directory for include files */
 #cmakedefine LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}"
 
@@ -58,6 +61,9 @@
 /* LLVM name for the native AsmPrinter init function, if available */
 #cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
 
+/* LLVM name for the native Disassembler init function, if available */
+#cmakedefine LLVM_NATIVE_DISASSEMBLER LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler
+
 /* LLVM name for the native Target init function, if available */
 #cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target
 
@@ -103,4 +109,10 @@
 /* Installation prefix directory */
 #cmakedefine LLVM_PREFIX "${LLVM_PREFIX}"
 
+/* Major version of the LLVM API */
+#cmakedefine LLVM_VERSION_MAJOR ${LLVM_VERSION_MAJOR}
+
+/* Minor version of the LLVM API */
+#cmakedefine LLVM_VERSION_MINOR ${LLVM_VERSION_MINOR}
+
 #endif
diff --git a/include/llvm/Config/llvm-config.h.in b/include/llvm/Config/llvm-config.h.in
index b2257f37bbc7..973652ff2dfb 100644
--- a/include/llvm/Config/llvm-config.h.in
+++ b/include/llvm/Config/llvm-config.h.in
@@ -25,18 +25,21 @@
 /* Installation directory for data files */
 #undef LLVM_DATADIR
 
+/* Target triple LLVM will generate code for by default */
+#undef LLVM_DEFAULT_TARGET_TRIPLE
+
 /* Installation directory for documentation */
 #undef LLVM_DOCSDIR
 
+/* Define if threads enabled */
+#undef LLVM_ENABLE_THREADS
+
 /* Installation directory for config files */
 #undef LLVM_ETCDIR
 
 /* Has gcc/MSVC atomic intrinsics */
 #undef LLVM_HAS_ATOMICS
 
-/* Host triple we were built on */
-#undef LLVM_HOSTTRIPLE
-
 /* Installation directory for include files */
 #undef LLVM_INCLUDEDIR
 
@@ -58,6 +61,9 @@
 /* LLVM name for the native AsmPrinter init function, if available */
 #undef LLVM_NATIVE_ASMPRINTER
 
+/* LLVM name for the native Disassembler init function, if available */
+#undef LLVM_NATIVE_DISASSEMBLER
+
 /* LLVM name for the native Target init function, if available */
 #undef LLVM_NATIVE_TARGET
 
@@ -103,4 +109,10 @@
 /* Installation prefix directory */
 #undef LLVM_PREFIX
 
+/* Major version of the LLVM API */
+#undef LLVM_VERSION_MAJOR
+
+/* Minor version of the LLVM API */
+#undef LLVM_VERSION_MINOR
+
 #endif
diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h
index ecc1fe70cc5d..13acdc66b892 100644
--- a/include/llvm/Constant.h
+++ b/include/llvm/Constant.h
@@ -41,6 +41,7 @@ namespace llvm {
 class Constant : public User {
   void operator=(const Constant &);     // Do not implement
   Constant(const Constant &);           // Do not implement
+  virtual void anchor();
   
 protected:
   Constant(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps)
@@ -90,12 +91,13 @@ public:
   /// FIXME: This really should not be in VMCore.
   PossibleRelocationsTy getRelocationInfo() const;
   
-  /// getVectorElements - This method, which is only valid on constant of vector
-  /// type, returns the elements of the vector in the specified smallvector.
-  /// This handles breaking down a vector undef into undef elements, etc.  For
-  /// constant exprs and other cases we can't handle, we return an empty vector.
-  void getVectorElements(SmallVectorImpl<Constant*> &Elts) const;
-
+  /// getAggregateElement - For aggregates (struct/array/vector) return the
+  /// constant that corresponds to the specified element if possible, or null if
+  /// not.  This can return null if the element index is a ConstantExpr, or if
+  /// 'this' is a constant expr.
+  Constant *getAggregateElement(unsigned Elt) const;
+  Constant *getAggregateElement(Constant *Elt) const;
+  
   /// destroyConstant - Called if some element of this constant is no longer
   /// valid.  At this point only other constants may be on the use_list for this
   /// constant.  Any constants on our Use list must also be destroy'd.  The
@@ -103,7 +105,7 @@ public:
   /// available cached constants.  Implementations should call
   /// destroyConstantImpl as the last thing they do, to destroy all users and
   /// delete this.
-  virtual void destroyConstant() { assert(0 && "Not reached!"); }
+  virtual void destroyConstant() { llvm_unreachable("Not reached!"); }
 
   //// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Constant *) { return true; }
@@ -129,11 +131,12 @@ public:
     // to be here to avoid link errors.
     assert(getNumOperands() == 0 && "replaceUsesOfWithOnConstant must be "
            "implemented for all constants that have operands!");
-    assert(0 && "Constants that do not have operands cannot be using 'From'!");
+    llvm_unreachable("Constants that do not have operands cannot be using "
+                     "'From'!");
   }
-  
+
   static Constant *getNullValue(Type* Ty);
-  
+
   /// @returns the value for an integer constant of the given type that has all
   /// its bits set to true.
   /// @brief Get the all ones value
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 6545a3fedb92..0abe17d365d4 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -34,10 +34,13 @@ class IntegerType;
 class StructType;
 class PointerType;
 class VectorType;
+class SequentialType;
 
 template<class ConstantClass, class TypeClass, class ValType>
 struct ConstantCreator;
 template<class ConstantClass, class TypeClass>
+struct ConstantArrayCreator;
+template<class ConstantClass, class TypeClass>
 struct ConvertConstantType;
 
 //===----------------------------------------------------------------------===//
@@ -45,6 +48,7 @@ struct ConvertConstantType;
 /// represents both boolean and integral constants.
 /// @brief Class for constant integers.
 class ConstantInt : public Constant {
+  virtual void anchor();
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
   ConstantInt(const ConstantInt &);      // DO NOT IMPLEMENT
   ConstantInt(IntegerType *Ty, const APInt& V);
@@ -229,6 +233,7 @@ public:
 ///
 class ConstantFP : public Constant {
   APFloat Val;
+  virtual void anchor();
   void *operator new(size_t, unsigned);// DO NOT IMPLEMENT
   ConstantFP(const ConstantFP &);      // DO NOT IMPLEMENT
   friend class LLVMContextImpl;
@@ -296,7 +301,6 @@ public:
 /// ConstantAggregateZero - All zero aggregate value
 ///
 class ConstantAggregateZero : public Constant {
-  friend struct ConstantCreator<ConstantAggregateZero, Type, char>;
   void *operator new(size_t, unsigned);                      // DO NOT IMPLEMENT
   ConstantAggregateZero(const ConstantAggregateZero &);      // DO NOT IMPLEMENT
 protected:
@@ -308,10 +312,26 @@ protected:
     return User::operator new(s, 0);
   }
 public:
-  static ConstantAggregateZero* get(Type *Ty);
+  static ConstantAggregateZero *get(Type *Ty);
   
   virtual void destroyConstant();
 
+  /// getSequentialElement - If this CAZ has array or vector type, return a zero
+  /// with the right element type.
+  Constant *getSequentialElement() const;
+
+  /// getStructElement - If this CAZ has struct type, return a zero with the
+  /// right element type for the specified element.
+  Constant *getStructElement(unsigned Elt) const;
+
+  /// getElementValue - Return a zero of the right value for the specified GEP
+  /// index.
+  Constant *getElementValue(Constant *C) const;
+
+  /// getElementValue - Return a zero of the right value for the specified GEP
+  /// index.
+  Constant *getElementValue(unsigned Idx) const;
+
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   ///
   static bool classof(const ConstantAggregateZero *) { return true; }
@@ -325,8 +345,7 @@ public:
 /// ConstantArray - Constant Array Declarations
 ///
 class ConstantArray : public Constant {
-  friend struct ConstantCreator<ConstantArray, ArrayType,
-                                    std::vector<Constant*> >;
+  friend struct ConstantArrayCreator<ConstantArray, ArrayType>;
   ConstantArray(const ConstantArray &);      // DO NOT IMPLEMENT
 protected:
   ConstantArray(ArrayType *T, ArrayRef<Constant *> Val);
@@ -334,15 +353,6 @@ public:
   // ConstantArray accessors
   static Constant *get(ArrayType *T, ArrayRef<Constant*> V);
                              
-  /// This method constructs a ConstantArray and initializes it with a text
-  /// string. The default behavior (AddNull==true) causes a null terminator to
-  /// be placed at the end of the array. This effectively increases the length
-  /// of the array by one (you've been warned).  However, in some situations 
-  /// this is not desired so if AddNull==false then the string is copied without
-  /// null termination.
-  static Constant *get(LLVMContext &Context, StringRef Initializer,
-                       bool AddNull = true);
-  
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
 
@@ -353,28 +363,6 @@ public:
     return reinterpret_cast<ArrayType*>(Value::getType());
   }
 
-  /// isString - This method returns true if the array is an array of i8 and
-  /// the elements of the array are all ConstantInt's.
-  bool isString() const;
-
-  /// isCString - This method returns true if the array is a string (see
-  /// @verbatim
-  /// isString) and it ends in a null byte \0 and does not contains any other
-  /// @endverbatim
-  /// null bytes except its terminator.
-  bool isCString() const;
-
-  /// getAsString - If this array is isString(), then this method converts the
-  /// array to an std::string and returns it.  Otherwise, it asserts out.
-  ///
-  std::string getAsString() const;
-
-  /// getAsCString - If this array is isCString(), then this method converts the
-  /// array (without the trailing null byte) to an std::string and returns it.
-  /// Otherwise, it asserts out.
-  ///
-  std::string getAsCString() const;
-
   virtual void destroyConstant();
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
 
@@ -396,8 +384,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantArray, Constant)
 // ConstantStruct - Constant Struct Declarations
 //
 class ConstantStruct : public Constant {
-  friend struct ConstantCreator<ConstantStruct, StructType,
-                                    std::vector<Constant*> >;
+  friend struct ConstantArrayCreator<ConstantStruct, StructType>;
   ConstantStruct(const ConstantStruct &);      // DO NOT IMPLEMENT
 protected:
   ConstantStruct(StructType *T, ArrayRef<Constant *> Val);
@@ -457,8 +444,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantStruct, Constant)
 /// ConstantVector - Constant Vector Declarations
 ///
 class ConstantVector : public Constant {
-  friend struct ConstantCreator<ConstantVector, VectorType,
-                                    std::vector<Constant*> >;
+  friend struct ConstantArrayCreator<ConstantVector, VectorType>;
   ConstantVector(const ConstantVector &);      // DO NOT IMPLEMENT
 protected:
   ConstantVector(VectorType *T, ArrayRef<Constant *> Val);
@@ -466,6 +452,10 @@ public:
   // ConstantVector accessors
   static Constant *get(ArrayRef<Constant*> V);
   
+  /// getSplat - Return a ConstantVector with the specified constant in each
+  /// element.
+  static Constant *getSplat(unsigned NumElts, Constant *Elt);
+  
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
 
@@ -475,12 +465,6 @@ public:
   inline VectorType *getType() const {
     return reinterpret_cast<VectorType*>(Value::getType());
   }
-  
-  /// This function will return true iff every element in this vector constant
-  /// is set to all ones.
-  /// @returns true iff this constant's emements are all set to all ones.
-  /// @brief Determine if the value is all ones.
-  bool isAllOnesValue() const;
 
   /// getSplatValue - If this is a splat constant, meaning that all of the
   /// elements have the same value, return that value. Otherwise return NULL.
@@ -507,7 +491,6 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantVector, Constant)
 /// ConstantPointerNull - a constant pointer value that points to null
 ///
 class ConstantPointerNull : public Constant {
-  friend struct ConstantCreator<ConstantPointerNull, PointerType, char>;
   void *operator new(size_t, unsigned);                  // DO NOT IMPLEMENT
   ConstantPointerNull(const ConstantPointerNull &);      // DO NOT IMPLEMENT
 protected:
@@ -539,6 +522,240 @@ public:
     return V->getValueID() == ConstantPointerNullVal;
   }
 };
+  
+//===----------------------------------------------------------------------===//
+/// ConstantDataSequential - A vector or array constant whose element type is a
+/// simple 1/2/4/8-byte integer or float/double, and whose elements are just
+/// simple data values (i.e. ConstantInt/ConstantFP).  This Constant node has no
+/// operands because it stores all of the elements of the constant as densely
+/// packed data, instead of as Value*'s.
+///
+/// This is the common base class of ConstantDataArray and ConstantDataVector.
+///
+class ConstantDataSequential : public Constant {
+  friend class LLVMContextImpl;
+  /// DataElements - A pointer to the bytes underlying this constant (which is
+  /// owned by the uniquing StringMap).
+  const char *DataElements;
+  
+  /// Next - This forms a link list of ConstantDataSequential nodes that have
+  /// the same value but different type.  For example, 0,0,0,1 could be a 4
+  /// element array of i8, or a 1-element array of i32.  They'll both end up in
+  /// the same StringMap bucket, linked up.
+  ConstantDataSequential *Next;
+  void *operator new(size_t, unsigned);                      // DO NOT IMPLEMENT
+  ConstantDataSequential(const ConstantDataSequential &);    // DO NOT IMPLEMENT
+protected:
+  explicit ConstantDataSequential(Type *ty, ValueTy VT, const char *Data)
+    : Constant(ty, VT, 0, 0), DataElements(Data), Next(0) {}
+  ~ConstantDataSequential() { delete Next; }
+  
+  static Constant *getImpl(StringRef Bytes, Type *Ty);
+
+protected:
+  // allocate space for exactly zero operands.
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  
+  /// isElementTypeCompatible - Return true if a ConstantDataSequential can be
+  /// formed with a vector or array of the specified element type.
+  /// ConstantDataArray only works with normal float and int types that are
+  /// stored densely in memory, not with things like i42 or x86_f80.
+  static bool isElementTypeCompatible(const Type *Ty);
+  
+  /// getElementAsInteger - If this is a sequential container of integers (of
+  /// any size), return the specified element in the low bits of a uint64_t.
+  uint64_t getElementAsInteger(unsigned i) const;
+
+  /// getElementAsAPFloat - If this is a sequential container of floating point
+  /// type, return the specified element as an APFloat.
+  APFloat getElementAsAPFloat(unsigned i) const;
+
+  /// getElementAsFloat - If this is an sequential container of floats, return
+  /// the specified element as a float.
+  float getElementAsFloat(unsigned i) const;
+  
+  /// getElementAsDouble - If this is an sequential container of doubles, return
+  /// the specified element as a double.
+  double getElementAsDouble(unsigned i) const;
+  
+  /// getElementAsConstant - Return a Constant for a specified index's element.
+  /// Note that this has to compute a new constant to return, so it isn't as
+  /// efficient as getElementAsInteger/Float/Double.
+  Constant *getElementAsConstant(unsigned i) const;
+  
+  /// getType - Specialize the getType() method to always return a
+  /// SequentialType, which reduces the amount of casting needed in parts of the
+  /// compiler.
+  inline SequentialType *getType() const {
+    return reinterpret_cast<SequentialType*>(Value::getType());
+  }
+  
+  /// getElementType - Return the element type of the array/vector.
+  Type *getElementType() const;
+  
+  /// getNumElements - Return the number of elements in the array or vector.
+  unsigned getNumElements() const;
+
+  /// getElementByteSize - Return the size (in bytes) of each element in the
+  /// array/vector.  The size of the elements is known to be a multiple of one
+  /// byte.
+  uint64_t getElementByteSize() const;
+
+  
+  /// isString - This method returns true if this is an array of i8.
+  bool isString() const;
+  
+  /// isCString - This method returns true if the array "isString", ends with a
+  /// nul byte, and does not contains any other nul bytes.
+  bool isCString() const;
+  
+  /// getAsString - If this array is isString(), then this method returns the
+  /// array as a StringRef.  Otherwise, it asserts out.
+  ///
+  StringRef getAsString() const {
+    assert(isString() && "Not a string");
+    return getRawDataValues();
+  }
+  
+  /// getAsCString - If this array is isCString(), then this method returns the
+  /// array (without the trailing null byte) as a StringRef. Otherwise, it
+  /// asserts out.
+  ///
+  StringRef getAsCString() const {
+    assert(isCString() && "Isn't a C string");
+    StringRef Str = getAsString();
+    return Str.substr(0, Str.size()-1);
+  }
+  
+  /// getRawDataValues - Return the raw, underlying, bytes of this data.  Note
+  /// that this is an extremely tricky thing to work with, as it exposes the
+  /// host endianness of the data elements.
+  StringRef getRawDataValues() const;
+  
+  virtual void destroyConstant();
+  
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  ///
+  static bool classof(const ConstantDataSequential *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantDataArrayVal ||
+           V->getValueID() == ConstantDataVectorVal;
+  }
+private:
+  const char *getElementPointer(unsigned Elt) const;
+};
+
+//===----------------------------------------------------------------------===//
+/// ConstantDataArray - An array constant whose element type is a simple
+/// 1/2/4/8-byte integer or float/double, and whose elements are just simple
+/// data values (i.e. ConstantInt/ConstantFP).  This Constant node has no
+/// operands because it stores all of the elements of the constant as densely
+/// packed data, instead of as Value*'s.
+class ConstantDataArray : public ConstantDataSequential {
+  void *operator new(size_t, unsigned);            // DO NOT IMPLEMENT
+  ConstantDataArray(const ConstantDataArray &);    // DO NOT IMPLEMENT
+  virtual void anchor();
+  friend class ConstantDataSequential;
+  explicit ConstantDataArray(Type *ty, const char *Data)
+    : ConstantDataSequential(ty, ConstantDataArrayVal, Data) {}
+protected:
+  // allocate space for exactly zero operands.
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  
+  /// get() constructors - Return a constant with array type with an element
+  /// count and element type matching the ArrayRef passed in.  Note that this
+  /// can return a ConstantAggregateZero object.
+  static Constant *get(LLVMContext &Context, ArrayRef<uint8_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint16_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint32_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint64_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<float> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<double> Elts);
+  
+  /// getString - This method constructs a CDS and initializes it with a text
+  /// string. The default behavior (AddNull==true) causes a null terminator to
+  /// be placed at the end of the array (increasing the length of the string by
+  /// one more than the StringRef would normally indicate.  Pass AddNull=false
+  /// to disable this behavior.
+  static Constant *getString(LLVMContext &Context, StringRef Initializer,
+                             bool AddNull = true);
+
+  /// getType - Specialize the getType() method to always return an ArrayType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline ArrayType *getType() const {
+    return reinterpret_cast<ArrayType*>(Value::getType());
+  }
+  
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  ///
+  static bool classof(const ConstantDataArray *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantDataArrayVal;
+  }
+};
+  
+//===----------------------------------------------------------------------===//
+/// ConstantDataVector - A vector constant whose element type is a simple
+/// 1/2/4/8-byte integer or float/double, and whose elements are just simple
+/// data values (i.e. ConstantInt/ConstantFP).  This Constant node has no
+/// operands because it stores all of the elements of the constant as densely
+/// packed data, instead of as Value*'s.
+class ConstantDataVector : public ConstantDataSequential {
+  void *operator new(size_t, unsigned);              // DO NOT IMPLEMENT
+  ConstantDataVector(const ConstantDataVector &);    // DO NOT IMPLEMENT
+  virtual void anchor();
+  friend class ConstantDataSequential;
+  explicit ConstantDataVector(Type *ty, const char *Data)
+  : ConstantDataSequential(ty, ConstantDataVectorVal, Data) {}
+protected:
+  // allocate space for exactly zero operands.
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  
+  /// get() constructors - Return a constant with vector type with an element
+  /// count and element type matching the ArrayRef passed in.  Note that this
+  /// can return a ConstantAggregateZero object.
+  static Constant *get(LLVMContext &Context, ArrayRef<uint8_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint16_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint32_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<uint64_t> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<float> Elts);
+  static Constant *get(LLVMContext &Context, ArrayRef<double> Elts);
+  
+  /// getSplat - Return a ConstantVector with the specified constant in each
+  /// element.  The specified constant has to be a of a compatible type (i8/i16/
+  /// i32/i64/float/double) and must be a ConstantFP or ConstantInt.
+  static Constant *getSplat(unsigned NumElts, Constant *Elt);
+
+  /// getSplatValue - If this is a splat constant, meaning that all of the
+  /// elements have the same value, return that value. Otherwise return NULL.
+  Constant *getSplatValue() const;
+  
+  /// getType - Specialize the getType() method to always return a VectorType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline VectorType *getType() const {
+    return reinterpret_cast<VectorType*>(Value::getType());
+  }
+  
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  ///
+  static bool classof(const ConstantDataVector *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantDataVectorVal;
+  }
+};
+
+
 
 /// BlockAddress - The address of a basic block.
 ///
@@ -897,7 +1114,6 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantExpr, Constant)
 /// LangRef.html#undefvalues for details.
 ///
 class UndefValue : public Constant {
-  friend struct ConstantCreator<UndefValue, Type, char>;
   void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
   UndefValue(const UndefValue &);      // DO NOT IMPLEMENT
 protected:
@@ -913,6 +1129,22 @@ public:
   ///
   static UndefValue *get(Type *T);
 
+  /// getSequentialElement - If this Undef has array or vector type, return a
+  /// undef with the right element type.
+  UndefValue *getSequentialElement() const;
+  
+  /// getStructElement - If this undef has struct type, return a undef with the
+  /// right element type for the specified element.
+  UndefValue *getStructElement(unsigned Elt) const;
+  
+  /// getElementValue - Return an undef of the right value for the specified GEP
+  /// index.
+  UndefValue *getElementValue(Constant *C) const;
+
+  /// getElementValue - Return an undef of the right value for the specified GEP
+  /// index.
+  UndefValue *getElementValue(unsigned Idx) const;
+
   virtual void destroyConstant();
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
diff --git a/include/llvm/DebugInfoProbe.h b/include/llvm/DebugInfoProbe.h
deleted file mode 100644
index 78d00dfeeddf..000000000000
--- a/include/llvm/DebugInfoProbe.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//===-- DebugInfoProbe.h - DebugInfo Probe ----------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a probe, DebugInfoProbe, that can be used by pass
-// manager to analyze how optimizer is treating debugging information.
-// 
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_UTILS_DEBUGINFOPROBE_H
-#define LLVM_TRANSFORMS_UTILS_DEBUGINFOPROBE_H
-
-#include "llvm/ADT/StringMap.h"
-
-namespace llvm {
-  class Function;
-  class Pass;
-  class DebugInfoProbeImpl;
-
-  /// DebugInfoProbe - This class provides a interface to monitor
-  /// how an optimization pass is preserving debugging information.
-  class DebugInfoProbe {
-    public:
-    DebugInfoProbe();
-    ~DebugInfoProbe();
-
-    /// initialize - Collect information before running an optimization pass.
-    void initialize(StringRef PName, Function &F);
-
-    /// finalize - Collect information after running an optimization pass. This
-    /// must be used after initialization.
-    void finalize(Function &F);
-
-    /// report - Report findings. This should be invoked after finalize.
-    void report();
-
-    private:
-    DebugInfoProbeImpl *pImpl;
-  };
-
-  /// DebugInfoProbeInfo - This class provides an interface that a pass manager
-  /// can use to manage debug info probes.
-  class DebugInfoProbeInfo {
-    StringMap<DebugInfoProbe *> Probes;
-  public:
-    DebugInfoProbeInfo() {}
-
-    /// ~DebugInfoProbeInfo - Report data collected by all probes before deleting
-    /// them.
-    ~DebugInfoProbeInfo();
-
-    /// initialize - Collect information before running an optimization pass.
-    void initialize(Pass *P, Function &F);
-
-    /// finalize - Collect information after running an optimization pass. This
-    /// must be used after initialization.
-    void finalize(Pass *P, Function &F);
-  };
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/DefaultPasses.h b/include/llvm/DefaultPasses.h
index 2e4145b5ebbe..929569d543d9 100644
--- a/include/llvm/DefaultPasses.h
+++ b/include/llvm/DefaultPasses.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_DEFAULT_PASS_SUPPORT_H
 #define LLVM_DEFAULT_PASS_SUPPORT_H
 
+#include <llvm/PassSupport.h>
+
 namespace llvm {
 
 class PassManagerBase;
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index 445c3deb7ce2..da5ad27b1f1c 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -195,9 +195,10 @@ class StructType : public CompositeType {
     // This is the contents of the SubClassData field.
     SCDB_HasBody = 1,
     SCDB_Packed = 2,
-    SCDB_IsLiteral = 4
+    SCDB_IsLiteral = 4,
+    SCDB_IsSized = 8
   };
-  
+
   /// SymbolTableEntry - For a named struct that actually has a name, this is a
   /// pointer to the symbol table entry (maintained by LLVMContext) for the
   /// struct.  This is null if the type is an literal struct or if it is
@@ -248,6 +249,9 @@ public:
   /// isOpaque - Return true if this is a type with an identity that has no body
   /// specified yet.  These prints as 'opaque' in .ll files.
   bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; }
+
+  /// isSized - Return true if this is a sized type.
+  bool isSized() const;
   
   /// hasName - Return true if this is a named struct that has a non-empty name.
   bool hasName() const { return SymbolTableEntry != 0; }
@@ -374,6 +378,7 @@ public:
   ///
   static VectorType *getInteger(VectorType *VTy) {
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    assert(EltBits && "Element size must be of a non-zero size");
     Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
     return VectorType::get(EltTy, VTy->getNumElements());
   }
@@ -408,6 +413,7 @@ public:
   unsigned getNumElements() const { return NumElements; }
 
   /// @brief Return the number of bits in the Vector type.
+  /// Returns zero when the vector is a vector of pointers.
   unsigned getBitWidth() const {
     return NumElements * getElementType()->getPrimitiveSizeInBits();
   }
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index cf85671eb414..e920e98a0bf6 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -15,17 +15,19 @@
 #ifndef LLVM_EXECUTION_ENGINE_H
 #define LLVM_EXECUTION_ENGINE_H
 
-#include <vector>
-#include <map>
-#include <string>
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/ValueMap.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <vector>
+#include <map>
+#include <string>
 
 namespace llvm {
 
@@ -41,6 +43,7 @@ class MachineCodeInfo;
 class Module;
 class MutexGuard;
 class TargetData;
+class Triple;
 class Type;
 
 /// \brief Helper class for helping synchronize access to the global address map
@@ -132,14 +135,12 @@ protected:
     Module *M,
     std::string *ErrorStr,
     JITMemoryManager *JMM,
-    CodeGenOpt::Level OptLevel,
     bool GVsWithCode,
     TargetMachine *TM);
   static ExecutionEngine *(*MCJITCtor)(
     Module *M,
     std::string *ErrorStr,
     JITMemoryManager *JMM,
-    CodeGenOpt::Level OptLevel,
     bool GVsWithCode,
     TargetMachine *TM);
   static ExecutionEngine *(*InterpCtor)(Module *M, std::string *ErrorStr);
@@ -228,6 +229,26 @@ public:
   virtual GenericValue runFunction(Function *F,
                                 const std::vector<GenericValue> &ArgValues) = 0;
 
+  /// getPointerToNamedFunction - This method returns the address of the
+  /// specified function by using the dlsym function call.  As such it is only
+  /// useful for resolving library symbols, not code generated symbols.
+  ///
+  /// If AbortOnFailure is false and no function with the given name is
+  /// found, this function silently returns a null pointer. Otherwise,
+  /// it prints a message to stderr and aborts.
+  ///
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) = 0;
+
+  /// mapSectionAddress - map a section to its target address space value.
+  /// Map the address of a JIT section as returned from the memory manager
+  /// to the address in the target process as the running code will see it.
+  /// This is the address which will be used for relocation resolution.
+  virtual void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress) {
+    llvm_unreachable("Re-mapping of section addresses not supported with this "
+                     "EE!");
+  }
+
   /// runStaticConstructorsDestructors - This method is used to execute all of
   /// the static constructors or destructors for a program.
   ///
@@ -462,6 +483,7 @@ private:
   CodeGenOpt::Level OptLevel;
   JITMemoryManager *JMM;
   bool AllocateGVsWithCode;
+  TargetOptions Options;
   Reloc::Model RelocModel;
   CodeModel::Model CMModel;
   std::string MArch;
@@ -475,6 +497,7 @@ private:
     ErrorStr = NULL;
     OptLevel = CodeGenOpt::Default;
     JMM = NULL;
+    Options = TargetOptions();
     AllocateGVsWithCode = false;
     RelocModel = Reloc::Default;
     CMModel = CodeModel::JITDefault;
@@ -518,6 +541,13 @@ public:
     return *this;
   }
 
+  /// setTargetOptions - Set the target options that the ExecutionEngine
+  /// target is using. Defaults to TargetOptions().
+  EngineBuilder &setTargetOptions(const TargetOptions &Opts) {
+    Options = Opts;
+    return *this;
+  }
+
   /// setRelocationModel - Set the relocation model that the ExecutionEngine
   /// target is using. Defaults to target specific default "Reloc::Default".
   EngineBuilder &setRelocationModel(Reloc::Model RM) {
@@ -572,17 +602,20 @@ public:
     return *this;
   }
 
+  TargetMachine *selectTarget();
+
   /// selectTarget - Pick a target either via -march or by guessing the native
   /// arch.  Add any CPU features specified via -mcpu or -mattr.
-  static TargetMachine *selectTarget(Module *M,
-                                     StringRef MArch,
-                                     StringRef MCPU,
-                                     const SmallVectorImpl<std::string>& MAttrs,
-                                     Reloc::Model RM,
-                                     CodeModel::Model CM,
-                                     std::string *Err);
-
-  ExecutionEngine *create();
+  TargetMachine *selectTarget(const Triple &TargetTriple,
+                              StringRef MArch,
+                              StringRef MCPU,
+                              const SmallVectorImpl<std::string>& MAttrs);
+
+  ExecutionEngine *create() {
+    return create(selectTarget());
+  }
+
+  ExecutionEngine *create(TargetMachine *TM);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h b/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h
new file mode 100644
index 000000000000..ca873420299c
--- /dev/null
+++ b/include/llvm/ExecutionEngine/IntelJITEventsWrapper.h
@@ -0,0 +1,102 @@
+//===-- IntelJITEventsWrapper.h - Intel JIT Events API Wrapper --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a wrapper for the Intel JIT Events API. It allows for the
+// implementation of the jitprofiling library to be swapped with an alternative
+// implementation (for testing). To include this file, you must have the
+// jitprofiling.h header available; it is available in Intel(R) VTune(TM)
+// Amplifier XE 2011.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INTEL_JIT_EVENTS_WRAPPER_H
+#define INTEL_JIT_EVENTS_WRAPPER_H
+
+#include <jitprofiling.h>
+
+namespace llvm {
+
+class IntelJITEventsWrapper {
+  // Function pointer types for testing implementation of Intel jitprofiling
+  // library
+  typedef int (*NotifyEventPtr)(iJIT_JVM_EVENT, void*);
+  typedef void (*RegisterCallbackExPtr)(void *, iJIT_ModeChangedEx );
+  typedef iJIT_IsProfilingActiveFlags (*IsProfilingActivePtr)(void);
+  typedef void (*FinalizeThreadPtr)(void);
+  typedef void (*FinalizeProcessPtr)(void);
+  typedef unsigned int (*GetNewMethodIDPtr)(void);
+
+  NotifyEventPtr NotifyEventFunc;
+  RegisterCallbackExPtr RegisterCallbackExFunc;
+  IsProfilingActivePtr IsProfilingActiveFunc;
+  FinalizeThreadPtr FinalizeThreadFunc;
+  FinalizeProcessPtr FinalizeProcessFunc;
+  GetNewMethodIDPtr GetNewMethodIDFunc;
+
+public:
+  bool isAmplifierRunning() {
+    return iJIT_IsProfilingActive() == iJIT_SAMPLING_ON;
+  }
+
+  IntelJITEventsWrapper()
+  : NotifyEventFunc(::iJIT_NotifyEvent),
+    RegisterCallbackExFunc(::iJIT_RegisterCallbackEx),
+    IsProfilingActiveFunc(::iJIT_IsProfilingActive),
+    FinalizeThreadFunc(::FinalizeThread),
+    FinalizeProcessFunc(::FinalizeProcess),
+    GetNewMethodIDFunc(::iJIT_GetNewMethodID) {
+  }
+
+  IntelJITEventsWrapper(NotifyEventPtr NotifyEventImpl,
+                   RegisterCallbackExPtr RegisterCallbackExImpl,
+                   IsProfilingActivePtr IsProfilingActiveImpl,
+                   FinalizeThreadPtr FinalizeThreadImpl,
+                   FinalizeProcessPtr FinalizeProcessImpl,
+                   GetNewMethodIDPtr GetNewMethodIDImpl)
+  : NotifyEventFunc(NotifyEventImpl),
+    RegisterCallbackExFunc(RegisterCallbackExImpl),
+    IsProfilingActiveFunc(IsProfilingActiveImpl),
+    FinalizeThreadFunc(FinalizeThreadImpl),
+    FinalizeProcessFunc(FinalizeProcessImpl),
+    GetNewMethodIDFunc(GetNewMethodIDImpl) {
+  }
+
+  // Sends an event anncouncing that a function has been emitted
+  //   return values are event-specific.  See Intel documentation for details.
+  int  iJIT_NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) {
+    if (!NotifyEventFunc)
+      return -1;
+    return NotifyEventFunc(EventType, EventSpecificData);
+  }
+
+  // Registers a callback function to receive notice of profiling state changes
+  void iJIT_RegisterCallbackEx(void *UserData,
+                               iJIT_ModeChangedEx NewModeCallBackFuncEx) {
+    if (RegisterCallbackExFunc)
+      RegisterCallbackExFunc(UserData, NewModeCallBackFuncEx);
+  }
+
+  // Returns the current profiler mode
+  iJIT_IsProfilingActiveFlags iJIT_IsProfilingActive(void) {
+    if (!IsProfilingActiveFunc)
+      return iJIT_NOTHING_RUNNING;
+    return IsProfilingActiveFunc();
+  }
+
+  // Generates a locally unique method ID for use in code registration
+  unsigned int iJIT_GetNewMethodID(void) {
+    if (!GetNewMethodIDFunc)
+      return -1;
+    return GetNewMethodIDFunc();
+  }
+};
+
+} //namespace llvm
+
+#endif //INTEL_JIT_EVENTS_WRAPPER_H
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index abc063b07038..eea603fcee2c 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
 #define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
 
+#include "llvm/Config/config.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
 
@@ -23,6 +24,8 @@
 namespace llvm {
 class Function;
 class MachineFunction;
+class OProfileWrapper;
+class IntelJITEventsWrapper;
 
 /// JITEvent_EmittedFunctionDetails - Helper struct for containing information
 /// about a generated machine code function.
@@ -59,9 +62,9 @@ public:
   /// NotifyFunctionEmitted - Called after a function has been successfully
   /// emitted to memory.  The function still has its MachineFunction attached,
   /// if you should happen to need that.
-  virtual void NotifyFunctionEmitted(const Function &F,
-                                     void *Code, size_t Size,
-                                     const EmittedFunctionDetails &Details) {}
+  virtual void NotifyFunctionEmitted(const Function &,
+                                     void *, size_t,
+                                     const EmittedFunctionDetails &) {}
 
   /// NotifyFreeingMachineCode - Called from freeMachineCodeForFunction(), after
   /// the global mapping is removed, but before the machine code is returned to
@@ -71,12 +74,43 @@ public:
   /// parameter to a previous NotifyFunctionEmitted call.  The Function passed
   /// to NotifyFunctionEmitted may have been destroyed by the time of the
   /// matching NotifyFreeingMachineCode call.
-  virtual void NotifyFreeingMachineCode(void *OldPtr) {}
-};
+  virtual void NotifyFreeingMachineCode(void *) {}
+
+#if LLVM_USE_INTEL_JITEVENTS
+  // Construct an IntelJITEventListener
+  static JITEventListener *createIntelJITEventListener();
+
+  // Construct an IntelJITEventListener with a test Intel JIT API implementation
+  static JITEventListener *createIntelJITEventListener(
+                                      IntelJITEventsWrapper* AlternativeImpl);
+#else
+  static JITEventListener *createIntelJITEventListener() { return 0; }
+
+  static JITEventListener *createIntelJITEventListener(
+                                      IntelJITEventsWrapper* AlternativeImpl) {
+    return 0;
+  }
+#endif // USE_INTEL_JITEVENTS
+
+#if LLVM_USE_OPROFILE
+  // Construct an OProfileJITEventListener
+  static JITEventListener *createOProfileJITEventListener();
 
-// This returns NULL if support isn't available.
-JITEventListener *createOProfileJITEventListener();
+  // Construct an OProfileJITEventListener with a test opagent implementation
+  static JITEventListener *createOProfileJITEventListener(
+                                      OProfileWrapper* AlternativeImpl);
+#else
+
+  static JITEventListener *createOProfileJITEventListener() { return 0; }
+
+  static JITEventListener *createOProfileJITEventListener(
+                                      OProfileWrapper* AlternativeImpl) {
+    return 0;
+  }
+#endif // USE_OPROFILE
+
+};
 
 } // end namespace llvm.
 
-#endif
+#endif // defined LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index a63f0da773a2..4c75b6ab970e 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -47,6 +47,17 @@ public:
   /// debugging, and may be turned on by default in debug mode.
   virtual void setPoisonMemory(bool poison) = 0;
 
+  /// getPointerToNamedFunction - This method returns the address of the
+  /// specified function. As such it is only useful for resolving library
+  /// symbols, not code generated symbols.
+  ///
+  /// If AbortOnFailure is false and no function with the given name is
+  /// found, this function silently returns a null pointer. Otherwise,
+  /// it prints a message to stderr and aborts.
+  ///
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) = 0;
+
   //===--------------------------------------------------------------------===//
   // Global Offset Table Management
   //===--------------------------------------------------------------------===//
@@ -101,6 +112,22 @@ public:
   virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
                                uint8_t *FunctionEnd) = 0;
 
+  /// allocateCodeSection - Allocate a memory block of (at least) the given
+  /// size suitable for executable code. The SectionID is a unique identifier
+  /// assigned by the JIT and passed through to the memory manager for
+  /// the instance class to use if it needs to communicate to the JIT about
+  /// a given section after the fact.
+  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID) = 0;
+
+  /// allocateDataSection - Allocate a memory block of (at least) the given
+  /// size suitable for data. The SectionID is a unique identifier
+  /// assigned by the JIT and passed through to the memory manager for
+  /// the instance class to use if it needs to communicate to the JIT about
+  /// a given section after the fact.
+  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID) = 0;
+
   /// allocateSpace - Allocate a memory block of the given size.  This method
   /// cannot be called between calls to startFunctionBody and endFunctionBody.
   virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) = 0;
diff --git a/include/llvm/ExecutionEngine/OProfileWrapper.h b/include/llvm/ExecutionEngine/OProfileWrapper.h
new file mode 100644
index 000000000000..ab7f25e9d03d
--- /dev/null
+++ b/include/llvm/ExecutionEngine/OProfileWrapper.h
@@ -0,0 +1,124 @@
+//===-- OProfileWrapper.h - OProfile JIT API Wrapper ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file defines a OProfileWrapper object that detects if the oprofile
+// daemon is running, and provides wrappers for opagent functions used to
+// communicate with the oprofile JIT interface. The dynamic library libopagent
+// does not need to be linked directly as this object lazily loads the library
+// when the first op_ function is called.
+//
+// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the
+// definition of the interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OPROFILE_WRAPPER_H
+#define OPROFILE_WRAPPER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <opagent.h>
+
+namespace llvm {
+
+
+class OProfileWrapper {
+  typedef  op_agent_t    (*op_open_agent_ptr_t)();
+  typedef  int           (*op_close_agent_ptr_t)(op_agent_t);
+  typedef  int           (*op_write_native_code_ptr_t)(op_agent_t,
+                                                const char*,
+                                                uint64_t,
+                                                void const*,
+                                                const unsigned int);
+  typedef  int           (*op_write_debug_line_info_ptr_t)(op_agent_t,
+                                                void const*,
+                                                size_t,
+                                                struct debug_line_info const*);
+  typedef  int           (*op_unload_native_code_ptr_t)(op_agent_t, uint64_t);
+
+  // Also used for op_minor_version function which has the same signature
+  typedef  int           (*op_major_version_ptr_t)(void);
+
+  // This is not a part of the opagent API, but is useful nonetheless
+  typedef  bool          (*IsOProfileRunningPtrT)(void);
+
+
+  op_agent_t                      Agent;
+  op_open_agent_ptr_t             OpenAgentFunc;
+  op_close_agent_ptr_t            CloseAgentFunc;
+  op_write_native_code_ptr_t      WriteNativeCodeFunc;
+  op_write_debug_line_info_ptr_t  WriteDebugLineInfoFunc;
+  op_unload_native_code_ptr_t     UnloadNativeCodeFunc;
+  op_major_version_ptr_t          MajorVersionFunc;
+  op_major_version_ptr_t          MinorVersionFunc;
+  IsOProfileRunningPtrT           IsOProfileRunningFunc;
+
+  bool Initialized;
+
+public:
+  OProfileWrapper();
+
+  // For testing with a mock opagent implementation, skips the dynamic load and
+  // the function resolution.
+  OProfileWrapper(op_open_agent_ptr_t OpenAgentImpl,
+                  op_close_agent_ptr_t CloseAgentImpl,
+                  op_write_native_code_ptr_t WriteNativeCodeImpl,
+                  op_write_debug_line_info_ptr_t WriteDebugLineInfoImpl,
+                  op_unload_native_code_ptr_t UnloadNativeCodeImpl,
+                  op_major_version_ptr_t MajorVersionImpl,
+                  op_major_version_ptr_t MinorVersionImpl,
+                  IsOProfileRunningPtrT MockIsOProfileRunningImpl = 0)
+  : OpenAgentFunc(OpenAgentImpl),
+    CloseAgentFunc(CloseAgentImpl),
+    WriteNativeCodeFunc(WriteNativeCodeImpl),
+    WriteDebugLineInfoFunc(WriteDebugLineInfoImpl),
+    UnloadNativeCodeFunc(UnloadNativeCodeImpl),
+    MajorVersionFunc(MajorVersionImpl),
+    MinorVersionFunc(MinorVersionImpl),
+    IsOProfileRunningFunc(MockIsOProfileRunningImpl),
+    Initialized(true)
+  {
+  }
+
+  // Calls op_open_agent in the oprofile JIT library and saves the returned
+  // op_agent_t handle internally so it can be used when calling all the other
+  // op_* functions. Callers of this class do not need to keep track of
+  // op_agent_t objects.
+  bool op_open_agent();
+
+  int op_close_agent();
+  int op_write_native_code(const char* name,
+                           uint64_t addr,
+                           void const* code,
+                           const unsigned int size);
+  int op_write_debug_line_info(void const* code,
+                               size_t num_entries,
+                               struct debug_line_info const* info);
+  int op_unload_native_code(uint64_t addr);
+  int op_major_version(void);
+  int op_minor_version(void);
+
+  // Returns true if the oprofiled process is running, the opagent library is
+  // loaded and a connection to the agent has been established, and false
+  // otherwise.
+  bool isAgentAvailable();
+
+private:
+  // Loads the libopagent library and initializes this wrapper if the oprofile
+  // daemon is running
+  bool initialize();
+
+  // Searches /proc for the oprofile daemon and returns true if the process if
+  // found, or false otherwise.
+  bool checkForOProfileProcEntry();
+
+  bool isOProfileRunning();
+};
+
+} // namespace llvm
+
+#endif //OPROFILE_WRAPPER_H
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index 724b9f09e0e5..54c28f3ec142 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -35,15 +35,18 @@ public:
   RTDyldMemoryManager() {}
   virtual ~RTDyldMemoryManager();
 
-  // Allocate ActualSize bytes, or more, for the named function. Return
-  // a pointer to the allocated memory and update Size to reflect how much
-  // memory was acutally allocated.
-  virtual uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) = 0;
+  /// allocateCodeSection - Allocate a memory block of (at least) the given
+  /// size suitable for executable code.
+  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID) = 0;
 
-  // Mark the end of the function, including how much of the allocated
-  // memory was actually used.
-  virtual void endFunctionBody(const char *Name, uint8_t *FunctionStart,
-                               uint8_t *FunctionEnd) = 0;
+  /// allocateDataSection - Allocate a memory block of (at least) the given
+  /// size suitable for data.
+  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID) = 0;
+
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) = 0;
 };
 
 class RuntimeDyld {
@@ -54,6 +57,10 @@ class RuntimeDyld {
   // interface.
   RuntimeDyldImpl *Dyld;
   RTDyldMemoryManager *MM;
+protected:
+  // Change the address associated with a section when resolving relocations.
+  // Any relocations already associated with the symbol will be re-resolved.
+  void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
 public:
   RuntimeDyld(RTDyldMemoryManager*);
   ~RuntimeDyld();
@@ -65,9 +72,13 @@ public:
   void *getSymbolAddress(StringRef Name);
   // Resolve the relocations for all symbols we currently know about.
   void resolveRelocations();
-  // Change the address associated with a symbol when resolving relocations.
-  // Any relocations already associated with the symbol will be re-resolved.
-  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
+
+  /// mapSectionAddress - map a section to its target address space value.
+  /// Map the address of a JIT section as returned from the memory manager
+  /// to the address in the target process as the running code will see it.
+  /// This is the address which will be used for relocation resolution.
+  void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress);
+
   StringRef getErrorString();
 };
 
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index 678651bbf1f8..e17cd87fe348 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -146,7 +146,7 @@ public:
   /// The particular intrinsic functions which correspond to this value are
   /// defined in llvm/Intrinsics.h.
   ///
-  unsigned getIntrinsicID() const LLVM_ATTRIBUTE_READONLY;
+  unsigned getIntrinsicID() const LLVM_READONLY;
   bool isIntrinsic() const { return getIntrinsicID() != 0; }
 
   /// getCallingConv()/setCallingConv(CC) - These method get and set the
@@ -425,6 +425,12 @@ public:
   ///
   bool hasAddressTaken(const User** = 0) const;
 
+  /// isDefTriviallyDead - Return true if it is trivially safe to remove
+  /// this function definition from the module (because it isn't externally
+  /// visible, does not have its address taken, and has no callers).  To make
+  /// this more accurate, call removeDeadConstantUsers first.
+  bool isDefTriviallyDead() const;
+
   /// callsFunctionThatReturnsTwice - Return true if the function has a call to
   /// setjmp or other function that gcc recognizes as "returning twice".
   bool callsFunctionThatReturnsTwice() const;
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
index 63dc4ab6bae0..81a11a4c9258 100644
--- a/include/llvm/GlobalValue.h
+++ b/include/llvm/GlobalValue.h
@@ -59,19 +59,18 @@ public:
 protected:
   GlobalValue(Type *ty, ValueTy vty, Use *Ops, unsigned NumOps,
               LinkageTypes linkage, const Twine &Name)
-    : Constant(ty, vty, Ops, NumOps), Parent(0),
-      Linkage(linkage), Visibility(DefaultVisibility), Alignment(0),
-      UnnamedAddr(0) {
+    : Constant(ty, vty, Ops, NumOps), Linkage(linkage),
+      Visibility(DefaultVisibility), Alignment(0), UnnamedAddr(0), Parent(0) {
     setName(Name);
   }
 
-  Module *Parent;
   // Note: VC++ treats enums as signed, so an extra bit is required to prevent
   // Linkage and Visibility from turning into negative values.
   LinkageTypes Linkage : 5;   // The linkage of this global
   unsigned Visibility : 2;    // The visibility style of this global
   unsigned Alignment : 16;    // Alignment of this symbol, must be power of two
   unsigned UnnamedAddr : 1;   // This value's address is not significant
+  Module *Parent;             // The containing module.
   std::string Section;        // Section to emit this into, empty mean default
 public:
   ~GlobalValue() {
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index c91fbf8de812..33d20435de1d 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -31,6 +31,10 @@ void initializeTransformUtils(PassRegistry&);
 /// ScalarOpts library.
 void initializeScalarOpts(PassRegistry&);
 
+/// initializeVectorization - Initialize all passes linked into the
+/// Vectorize library.
+void initializeVectorization(PassRegistry&);
+
 /// initializeInstCombine - Initialize all passes linked into the
 /// ScalarOpts library.
 void initializeInstCombine(PassRegistry&);
@@ -67,6 +71,7 @@ void initializeBasicCallGraphPass(PassRegistry&);
 void initializeBlockExtractorPassPass(PassRegistry&);
 void initializeBlockFrequencyInfoPass(PassRegistry&);
 void initializeBlockPlacementPass(PassRegistry&);
+void initializeBranchFolderPassPass(PassRegistry&);
 void initializeBranchProbabilityInfoPass(PassRegistry&);
 void initializeBreakCriticalEdgesPass(PassRegistry&);
 void initializeCFGOnlyPrinterPass(PassRegistry&);
@@ -77,8 +82,10 @@ void initializeCFGViewerPass(PassRegistry&);
 void initializeCalculateSpillWeightsPass(PassRegistry&);
 void initializeCallGraphAnalysisGroup(PassRegistry&);
 void initializeCodeGenPreparePass(PassRegistry&);
+void initializeCodePlacementOptPass(PassRegistry&);
 void initializeConstantMergePass(PassRegistry&);
 void initializeConstantPropagationPass(PassRegistry&);
+void initializeMachineCopyPropagationPass(PassRegistry&);
 void initializeCorrelatedValuePropagationPass(PassRegistry&);
 void initializeDAEPass(PassRegistry&);
 void initializeDAHPass(PassRegistry&);
@@ -94,12 +101,17 @@ void initializeDominanceFrontierPass(PassRegistry&);
 void initializeDominatorTreePass(PassRegistry&);
 void initializeEdgeBundlesPass(PassRegistry&);
 void initializeEdgeProfilerPass(PassRegistry&);
+void initializeExpandPostRAPass(PassRegistry&);
 void initializePathProfilerPass(PassRegistry&);
 void initializeGCOVProfilerPass(PassRegistry&);
+void initializeAddressSanitizerPass(PassRegistry&);
+void initializeThreadSanitizerPass(PassRegistry&);
 void initializeEarlyCSEPass(PassRegistry&);
 void initializeExpandISelPseudosPass(PassRegistry&);
 void initializeFindUsedTypesPass(PassRegistry&);
 void initializeFunctionAttrsPass(PassRegistry&);
+void initializeGCInfoDeleterPass(PassRegistry&);
+void initializeGCMachineCodeAnalysisPass(PassRegistry&);
 void initializeGCModuleInfoPass(PassRegistry&);
 void initializeGVNPass(PassRegistry&);
 void initializeGlobalDCEPass(PassRegistry&);
@@ -127,6 +139,7 @@ void initializeLiveStacksPass(PassRegistry&);
 void initializeLiveVariablesPass(PassRegistry&);
 void initializeLoaderPassPass(PassRegistry&);
 void initializePathProfileLoaderPassPass(PassRegistry&);
+void initializeLocalStackSlotPassPass(PassRegistry&);
 void initializeLoopDeletionPass(PassRegistry&);
 void initializeLoopDependenceAnalysisPass(PassRegistry&);
 void initializeLoopExtractorPass(PassRegistry&);
@@ -134,8 +147,8 @@ void initializeLoopInfoPass(PassRegistry&);
 void initializeLoopInstSimplifyPass(PassRegistry&);
 void initializeLoopRotatePass(PassRegistry&);
 void initializeLoopSimplifyPass(PassRegistry&);
-void initializeLoopSplitterPass(PassRegistry&);
 void initializeLoopStrengthReducePass(PassRegistry&);
+void initializeGlobalMergePass(PassRegistry&);
 void initializeLoopUnrollPass(PassRegistry&);
 void initializeLoopUnswitchPass(PassRegistry&);
 void initializeLoopIdiomRecognizePass(PassRegistry&);
@@ -145,6 +158,8 @@ void initializeLowerIntrinsicsPass(PassRegistry&);
 void initializeLowerInvokePass(PassRegistry&);
 void initializeLowerSwitchPass(PassRegistry&);
 void initializeMachineBlockFrequencyInfoPass(PassRegistry&);
+void initializeMachineBlockPlacementPass(PassRegistry&);
+void initializeMachineBlockPlacementStatsPass(PassRegistry&);
 void initializeMachineBranchProbabilityInfoPass(PassRegistry&);
 void initializeMachineCSEPass(PassRegistry&);
 void initializeMachineDominatorTreePass(PassRegistry&);
@@ -152,6 +167,7 @@ void initializeMachineLICMPass(PassRegistry&);
 void initializeMachineLoopInfoPass(PassRegistry&);
 void initializeMachineLoopRangesPass(PassRegistry&);
 void initializeMachineModuleInfoPass(PassRegistry&);
+void initializeMachineSchedulerPass(PassRegistry&);
 void initializeMachineSinkingPass(PassRegistry&);
 void initializeMachineVerifierPassPass(PassRegistry&);
 void initializeMemCpyOptPass(PassRegistry&);
@@ -163,6 +179,7 @@ void initializeNoAAPass(PassRegistry&);
 void initializeNoProfileInfoPass(PassRegistry&);
 void initializeNoPathProfileInfoPass(PassRegistry&);
 void initializeObjCARCAliasAnalysisPass(PassRegistry&);
+void initializeObjCARCAPElimPass(PassRegistry&);
 void initializeObjCARCExpandPass(PassRegistry&);
 void initializeObjCARCContractPass(PassRegistry&);
 void initializeObjCARCOptPass(PassRegistry&);
@@ -177,6 +194,7 @@ void initializePostDomOnlyViewerPass(PassRegistry&);
 void initializePostDomPrinterPass(PassRegistry&);
 void initializePostDomViewerPass(PassRegistry&);
 void initializePostDominatorTreePass(PassRegistry&);
+void initializePostRASchedulerPass(PassRegistry&);
 void initializePreVerifierPass(PassRegistry&);
 void initializePrintDbgInfoPass(PassRegistry&);
 void initializePrintFunctionPassPass(PassRegistry&);
@@ -189,7 +207,6 @@ void initializePathProfileVerifierPass(PassRegistry&);
 void initializeProfileVerifierPassPass(PassRegistry&);
 void initializePromotePassPass(PassRegistry&);
 void initializePruneEHPass(PassRegistry&);
-void initializeRALinScanPass(PassRegistry&);
 void initializeReassociatePass(PassRegistry&);
 void initializeRegToMemPass(PassRegistry&);
 void initializeRegionInfoPass(PassRegistry&);
@@ -219,6 +236,8 @@ void initializeStripNonDebugSymbolsPass(PassRegistry&);
 void initializeStripSymbolsPass(PassRegistry&);
 void initializeStrongPHIEliminationPass(PassRegistry&);
 void initializeTailCallElimPass(PassRegistry&);
+void initializeTailDuplicatePassPass(PassRegistry&);
+void initializeTargetPassConfigPass(PassRegistry&);
 void initializeTargetDataPass(PassRegistry&);
 void initializeTargetLibraryInfoPass(PassRegistry&);
 void initializeTwoAddressInstructionPassPass(PassRegistry&);
@@ -229,7 +248,9 @@ void initializeUnreachableMachineBlockElimPass(PassRegistry&);
 void initializeVerifierPass(PassRegistry&);
 void initializeVirtRegMapPass(PassRegistry&);
 void initializeInstSimplifierPass(PassRegistry&);
-
+void initializeUnpackMachineBundlesPass(PassRegistry&);
+void initializeFinalizeMachineBundlesPass(PassRegistry&);
+void initializeBBVectorizePass(PassRegistry&);
 }
 
 #endif
diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
index de5ce4ecafc7..37aa18bfff73 100644
--- a/include/llvm/InlineAsm.h
+++ b/include/llvm/InlineAsm.h
@@ -17,6 +17,7 @@
 #define LLVM_INLINEASM_H
 
 #include "llvm/Value.h"
+#include "llvm/ADT/StringRef.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index a1492f3c141a..2529f24fe991 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -388,6 +388,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
 /// if (isa<CastInst>(Instr)) { ... }
 /// @brief Base class of casting instructions.
 class CastInst : public UnaryInstruction {
+  virtual void anchor();
 protected:
   /// @brief Constructor with insert-before-instruction semantics for subclasses
   CastInst(Type *Ty, unsigned iType, Value *S,
diff --git a/include/llvm/Instruction.def b/include/llvm/Instruction.def
index d36e4be1d912..e59a0528e90f 100644
--- a/include/llvm/Instruction.def
+++ b/include/llvm/Instruction.def
@@ -99,81 +99,80 @@ HANDLE_TERM_INST  ( 2, Br         , BranchInst)
 HANDLE_TERM_INST  ( 3, Switch     , SwitchInst)
 HANDLE_TERM_INST  ( 4, IndirectBr , IndirectBrInst)
 HANDLE_TERM_INST  ( 5, Invoke     , InvokeInst)
-HANDLE_TERM_INST  ( 6, Unwind     , UnwindInst)
-HANDLE_TERM_INST  ( 7, Resume     , ResumeInst)
-HANDLE_TERM_INST  ( 8, Unreachable, UnreachableInst)
-  LAST_TERM_INST  ( 8)
+HANDLE_TERM_INST  ( 6, Resume     , ResumeInst)
+HANDLE_TERM_INST  ( 7, Unreachable, UnreachableInst)
+  LAST_TERM_INST  ( 7)
 
 // Standard binary operators...
- FIRST_BINARY_INST( 9)
-HANDLE_BINARY_INST( 9, Add  , BinaryOperator)
-HANDLE_BINARY_INST(10, FAdd , BinaryOperator)
-HANDLE_BINARY_INST(11, Sub  , BinaryOperator)
-HANDLE_BINARY_INST(12, FSub , BinaryOperator)
-HANDLE_BINARY_INST(13, Mul  , BinaryOperator)
-HANDLE_BINARY_INST(14, FMul , BinaryOperator)
-HANDLE_BINARY_INST(15, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(16, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(17, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(18, URem , BinaryOperator)
-HANDLE_BINARY_INST(19, SRem , BinaryOperator)
-HANDLE_BINARY_INST(20, FRem , BinaryOperator)
+ FIRST_BINARY_INST( 8)
+HANDLE_BINARY_INST( 8, Add  , BinaryOperator)
+HANDLE_BINARY_INST( 9, FAdd , BinaryOperator)
+HANDLE_BINARY_INST(10, Sub  , BinaryOperator)
+HANDLE_BINARY_INST(11, FSub , BinaryOperator)
+HANDLE_BINARY_INST(12, Mul  , BinaryOperator)
+HANDLE_BINARY_INST(13, FMul , BinaryOperator)
+HANDLE_BINARY_INST(14, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(15, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(16, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(17, URem , BinaryOperator)
+HANDLE_BINARY_INST(18, SRem , BinaryOperator)
+HANDLE_BINARY_INST(19, FRem , BinaryOperator)
 
 // Logical operators (integer operands)
-HANDLE_BINARY_INST(21, Shl  , BinaryOperator) // Shift left  (logical)
-HANDLE_BINARY_INST(22, LShr , BinaryOperator) // Shift right (logical)
-HANDLE_BINARY_INST(23, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(24, And  , BinaryOperator)
-HANDLE_BINARY_INST(25, Or   , BinaryOperator)
-HANDLE_BINARY_INST(26, Xor  , BinaryOperator)
-  LAST_BINARY_INST(26)
+HANDLE_BINARY_INST(20, Shl  , BinaryOperator) // Shift left  (logical)
+HANDLE_BINARY_INST(21, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(22, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(23, And  , BinaryOperator)
+HANDLE_BINARY_INST(24, Or   , BinaryOperator)
+HANDLE_BINARY_INST(25, Xor  , BinaryOperator)
+  LAST_BINARY_INST(25)
 
 // Memory operators...
- FIRST_MEMORY_INST(27)
-HANDLE_MEMORY_INST(27, Alloca, AllocaInst)  // Stack management
-HANDLE_MEMORY_INST(28, Load  , LoadInst  )  // Memory manipulation instrs
-HANDLE_MEMORY_INST(29, Store , StoreInst )
-HANDLE_MEMORY_INST(30, GetElementPtr, GetElementPtrInst)
-HANDLE_MEMORY_INST(31, Fence , FenceInst )
-HANDLE_MEMORY_INST(32, AtomicCmpXchg , AtomicCmpXchgInst )
-HANDLE_MEMORY_INST(33, AtomicRMW , AtomicRMWInst )
-  LAST_MEMORY_INST(33)
+ FIRST_MEMORY_INST(26)
+HANDLE_MEMORY_INST(26, Alloca, AllocaInst)  // Stack management
+HANDLE_MEMORY_INST(27, Load  , LoadInst  )  // Memory manipulation instrs
+HANDLE_MEMORY_INST(28, Store , StoreInst )
+HANDLE_MEMORY_INST(29, GetElementPtr, GetElementPtrInst)
+HANDLE_MEMORY_INST(30, Fence , FenceInst )
+HANDLE_MEMORY_INST(31, AtomicCmpXchg , AtomicCmpXchgInst )
+HANDLE_MEMORY_INST(32, AtomicRMW , AtomicRMWInst )
+  LAST_MEMORY_INST(32)
 
 // Cast operators ...
 // NOTE: The order matters here because CastInst::isEliminableCastPair 
 // NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(34)
-HANDLE_CAST_INST(34, Trunc   , TruncInst   )  // Truncate integers
-HANDLE_CAST_INST(35, ZExt    , ZExtInst    )  // Zero extend integers
-HANDLE_CAST_INST(36, SExt    , SExtInst    )  // Sign extend integers
-HANDLE_CAST_INST(37, FPToUI  , FPToUIInst  )  // floating point -> UInt
-HANDLE_CAST_INST(38, FPToSI  , FPToSIInst  )  // floating point -> SInt
-HANDLE_CAST_INST(39, UIToFP  , UIToFPInst  )  // UInt -> floating point
-HANDLE_CAST_INST(40, SIToFP  , SIToFPInst  )  // SInt -> floating point
-HANDLE_CAST_INST(41, FPTrunc , FPTruncInst )  // Truncate floating point
-HANDLE_CAST_INST(42, FPExt   , FPExtInst   )  // Extend floating point
-HANDLE_CAST_INST(43, PtrToInt, PtrToIntInst)  // Pointer -> Integer
-HANDLE_CAST_INST(44, IntToPtr, IntToPtrInst)  // Integer -> Pointer
-HANDLE_CAST_INST(45, BitCast , BitCastInst )  // Type cast
-  LAST_CAST_INST(45)
+ FIRST_CAST_INST(33)
+HANDLE_CAST_INST(33, Trunc   , TruncInst   )  // Truncate integers
+HANDLE_CAST_INST(34, ZExt    , ZExtInst    )  // Zero extend integers
+HANDLE_CAST_INST(35, SExt    , SExtInst    )  // Sign extend integers
+HANDLE_CAST_INST(36, FPToUI  , FPToUIInst  )  // floating point -> UInt
+HANDLE_CAST_INST(37, FPToSI  , FPToSIInst  )  // floating point -> SInt
+HANDLE_CAST_INST(38, UIToFP  , UIToFPInst  )  // UInt -> floating point
+HANDLE_CAST_INST(39, SIToFP  , SIToFPInst  )  // SInt -> floating point
+HANDLE_CAST_INST(40, FPTrunc , FPTruncInst )  // Truncate floating point
+HANDLE_CAST_INST(41, FPExt   , FPExtInst   )  // Extend floating point
+HANDLE_CAST_INST(42, PtrToInt, PtrToIntInst)  // Pointer -> Integer
+HANDLE_CAST_INST(43, IntToPtr, IntToPtrInst)  // Integer -> Pointer
+HANDLE_CAST_INST(44, BitCast , BitCastInst )  // Type cast
+  LAST_CAST_INST(44)
 
 // Other operators...
- FIRST_OTHER_INST(46)
-HANDLE_OTHER_INST(46, ICmp   , ICmpInst   )  // Integer comparison instruction
-HANDLE_OTHER_INST(47, FCmp   , FCmpInst   )  // Floating point comparison instr.
-HANDLE_OTHER_INST(48, PHI    , PHINode    )  // PHI node instruction
-HANDLE_OTHER_INST(49, Call   , CallInst   )  // Call a function
-HANDLE_OTHER_INST(50, Select , SelectInst )  // select instruction
-HANDLE_OTHER_INST(51, UserOp1, Instruction)  // May be used internally in a pass
-HANDLE_OTHER_INST(52, UserOp2, Instruction)  // Internal to passes only
-HANDLE_OTHER_INST(53, VAArg  , VAArgInst  )  // vaarg instruction
-HANDLE_OTHER_INST(54, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(55, InsertElement, InsertElementInst)  // insert into vector
-HANDLE_OTHER_INST(56, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
-HANDLE_OTHER_INST(57, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(58, InsertValue, InsertValueInst)  // insert into aggregate
-HANDLE_OTHER_INST(59, LandingPad, LandingPadInst)  // Landing pad instruction.
-  LAST_OTHER_INST(59)
+ FIRST_OTHER_INST(45)
+HANDLE_OTHER_INST(45, ICmp   , ICmpInst   )  // Integer comparison instruction
+HANDLE_OTHER_INST(46, FCmp   , FCmpInst   )  // Floating point comparison instr.
+HANDLE_OTHER_INST(47, PHI    , PHINode    )  // PHI node instruction
+HANDLE_OTHER_INST(48, Call   , CallInst   )  // Call a function
+HANDLE_OTHER_INST(49, Select , SelectInst )  // select instruction
+HANDLE_OTHER_INST(50, UserOp1, Instruction)  // May be used internally in a pass
+HANDLE_OTHER_INST(51, UserOp2, Instruction)  // Internal to passes only
+HANDLE_OTHER_INST(52, VAArg  , VAArgInst  )  // vaarg instruction
+HANDLE_OTHER_INST(53, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(54, InsertElement, InsertElementInst)  // insert into vector
+HANDLE_OTHER_INST(55, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
+HANDLE_OTHER_INST(56, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(57, InsertValue, InsertValueInst)  // insert into aggregate
+HANDLE_OTHER_INST(58, LandingPad, LandingPadInst)  // Landing pad instruction.
+  LAST_OTHER_INST(58)
 
 #undef  FIRST_TERM_INST
 #undef HANDLE_TERM_INST
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index 934e890151f0..9c5ac4430f89 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -143,7 +143,7 @@ public:
   
   /// getMetadata - Get the metadata of given kind attached to this Instruction.
   /// If the metadata is not found then return null.
-  MDNode *getMetadata(const char *Kind) const {
+  MDNode *getMetadata(StringRef Kind) const {
     if (!hasMetadata()) return 0;
     return getMetadataImpl(Kind);
   }
@@ -168,7 +168,7 @@ public:
   /// node.  This updates/replaces metadata if already present, or removes it if
   /// Node is null.
   void setMetadata(unsigned KindID, MDNode *Node);
-  void setMetadata(const char *Kind, MDNode *Node);
+  void setMetadata(StringRef Kind, MDNode *Node);
 
   /// setDebugLoc - Set the debug location information for this instruction.
   void setDebugLoc(const DebugLoc &Loc) { DbgLoc = Loc; }
@@ -185,7 +185,7 @@ private:
   
   // These are all implemented in Metadata.cpp.
   MDNode *getMetadataImpl(unsigned KindID) const;
-  MDNode *getMetadataImpl(const char *Kind) const;
+  MDNode *getMetadataImpl(StringRef Kind) const;
   void getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,MDNode*> > &)const;
   void getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
                                            MDNode*> > &) const;
@@ -244,26 +244,6 @@ public:
     return mayWriteToMemory() || mayThrow();
   }
 
-  /// isSafeToSpeculativelyExecute - Return true if the instruction does not
-  /// have any effects besides calculating the result and does not have
-  /// undefined behavior.
-  ///
-  /// This method never returns true for an instruction that returns true for
-  /// mayHaveSideEffects; however, this method also does some other checks in
-  /// addition. It checks for undefined behavior, like dividing by zero or
-  /// loading from an invalid pointer (but not for undefined results, like a
-  /// shift with a shift amount larger than the width of the result). It checks
-  /// for malloc and alloca because speculatively executing them might cause a
-  /// memory leak. It also returns false for instructions related to control
-  /// flow, specifically terminators and PHI nodes.
-  ///
-  /// This method only looks at the instruction itself and its operands, so if
-  /// this method returns true, it is safe to move the instruction as long as
-  /// the correct dominance relationships for the operands and users hold.
-  /// However, this method can return true for instructions that read memory;
-  /// for such instructions, moving them may change the resulting value.
-  bool isSafeToSpeculativelyExecute() const;
-
   /// clone() - Create a copy of 'this' instruction that is identical in all
   /// ways except the following:
   ///   * The instruction has no parent
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 3faab35bf687..f6eaf04fd0d9 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -776,6 +776,10 @@ public:
   static Type *getIndexedType(Type *Ptr, ArrayRef<Constant *> IdxList);
   static Type *getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList);
 
+  /// getIndexedType - Returns the address space used by the GEP pointer.
+  ///
+  static unsigned getAddressSpace(Value *Ptr);
+
   inline op_iterator       idx_begin()       { return op_begin()+1; }
   inline const_op_iterator idx_begin() const { return op_begin()+1; }
   inline op_iterator       idx_end()         { return op_end(); }
@@ -788,7 +792,7 @@ public:
     return getOperand(0);
   }
   static unsigned getPointerOperandIndex() {
-    return 0U;                      // get index for modifying correct operand
+    return 0U;    // get index for modifying correct operand.
   }
 
   unsigned getPointerAddressSpace() const {
@@ -797,10 +801,25 @@ public:
 
   /// getPointerOperandType - Method to return the pointer operand as a
   /// PointerType.
-  PointerType *getPointerOperandType() const {
-    return reinterpret_cast<PointerType*>(getPointerOperand()->getType());
+  Type *getPointerOperandType() const {
+    return getPointerOperand()->getType();
   }
 
+  /// GetGEPReturnType - Returns the pointer type returned by the GEP
+  /// instruction, which may be a vector of pointers.
+  static Type *getGEPReturnType(Value *Ptr, ArrayRef<Value *> IdxList) {
+    Type *PtrTy = PointerType::get(checkGEPType(
+                                   getIndexedType(Ptr->getType(), IdxList)),
+                                   getAddressSpace(Ptr));
+    // Vector GEP
+    if (Ptr->getType()->isVectorTy()) {
+      unsigned NumElem = cast<VectorType>(Ptr->getType())->getNumElements();
+      return VectorType::get(PtrTy, NumElem);
+    }
+
+    // Scalar GEP
+    return PtrTy;
+  }
 
   unsigned getNumIndices() const {  // Note: always non-negative
     return getNumOperands() - 1;
@@ -847,10 +866,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr,
                                      unsigned Values,
                                      const Twine &NameStr,
                                      Instruction *InsertBefore)
-  : Instruction(PointerType::get(checkGEPType(
-                                   getIndexedType(Ptr->getType(), IdxList)),
-                                 cast<PointerType>(Ptr->getType())
-                                   ->getAddressSpace()),
+  : Instruction(getGEPReturnType(Ptr, IdxList),
                 GetElementPtr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - Values,
                 Values, InsertBefore) {
@@ -861,10 +877,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr,
                                      unsigned Values,
                                      const Twine &NameStr,
                                      BasicBlock *InsertAtEnd)
-  : Instruction(PointerType::get(checkGEPType(
-                                   getIndexedType(Ptr->getType(), IdxList)),
-                                 cast<PointerType>(Ptr->getType())
-                                   ->getAddressSpace()),
+  : Instruction(getGEPReturnType(Ptr, IdxList),
                 GetElementPtr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - Values,
                 Values, InsertAtEnd) {
@@ -905,7 +918,7 @@ public:
           "Both operands to ICmp instruction are not of the same type!");
     // Check that the operands are the right type
     assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
-            getOperand(0)->getType()->isPointerTy()) &&
+            getOperand(0)->getType()->getScalarType()->isPointerTy()) &&
            "Invalid operand types for ICmp instruction");
   }
 
@@ -945,7 +958,7 @@ public:
           "Both operands to ICmp instruction are not of the same type!");
     // Check that the operands are the right type
     assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
-            getOperand(0)->getType()->isPointerTy()) &&
+            getOperand(0)->getType()->getScalarType()->isPointerTy()) &&
            "Invalid operand types for ICmp instruction");
   }
 
@@ -1657,10 +1670,33 @@ public:
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
+  Constant *getMask() const {
+    return reinterpret_cast<Constant*>(getOperand(2));
+  }
+  
   /// getMaskValue - Return the index from the shuffle mask for the specified
   /// output result.  This is either -1 if the element is undef or a number less
   /// than 2*numelements.
-  int getMaskValue(unsigned i) const;
+  static int getMaskValue(Constant *Mask, unsigned i);
+
+  int getMaskValue(unsigned i) const {
+    return getMaskValue(getMask(), i);
+  }
+  
+  /// getShuffleMask - Return the full mask for this instruction, where each
+  /// element is the element number and undef's are returned as -1.
+  static void getShuffleMask(Constant *Mask, SmallVectorImpl<int> &Result);
+
+  void getShuffleMask(SmallVectorImpl<int> &Result) const {
+    return getShuffleMask(getMask(), Result);
+  }
+
+  SmallVector<int, 16> getShuffleMask() const {
+    SmallVector<int, 16> Mask;
+    getShuffleMask(Mask);
+    return Mask;
+  }
+
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const ShuffleVectorInst *) { return true; }
@@ -2431,6 +2467,122 @@ class SwitchInst : public TerminatorInst {
 protected:
   virtual SwitchInst *clone_impl() const;
 public:
+  
+  // -2
+  static const unsigned DefaultPseudoIndex = static_cast<unsigned>(~0L-1);
+  
+  template <class SwitchInstTy, class ConstantIntTy, class BasicBlockTy> 
+  class CaseIteratorT {
+  protected:
+    
+    SwitchInstTy *SI;
+    unsigned Index;
+    
+  public:
+    
+    typedef CaseIteratorT<SwitchInstTy, ConstantIntTy, BasicBlockTy> Self;
+    
+    /// Initializes case iterator for given SwitchInst and for given
+    /// case number.    
+    CaseIteratorT(SwitchInstTy *SI, unsigned CaseNum) {
+      this->SI = SI;
+      Index = CaseNum;
+    }
+    
+    /// Initializes case iterator for given SwitchInst and for given
+    /// TerminatorInst's successor index.
+    static Self fromSuccessorIndex(SwitchInstTy *SI, unsigned SuccessorIndex) {
+      assert(SuccessorIndex < SI->getNumSuccessors() &&
+             "Successor index # out of range!");    
+      return SuccessorIndex != 0 ? 
+             Self(SI, SuccessorIndex - 1) :
+             Self(SI, DefaultPseudoIndex);       
+    }
+    
+    /// Resolves case value for current case.
+    ConstantIntTy *getCaseValue() {
+      assert(Index < SI->getNumCases() && "Index out the number of cases.");
+      return reinterpret_cast<ConstantIntTy*>(SI->getOperand(2 + Index*2));
+    }
+    
+    /// Resolves successor for current case.
+    BasicBlockTy *getCaseSuccessor() {
+      assert((Index < SI->getNumCases() ||
+              Index == DefaultPseudoIndex) &&
+             "Index out the number of cases.");
+      return SI->getSuccessor(getSuccessorIndex());      
+    }
+    
+    /// Returns number of current case.
+    unsigned getCaseIndex() const { return Index; }
+    
+    /// Returns TerminatorInst's successor index for current case successor.
+    unsigned getSuccessorIndex() const {
+      assert((Index == DefaultPseudoIndex || Index < SI->getNumCases()) &&
+             "Index out the number of cases.");
+      return Index != DefaultPseudoIndex ? Index + 1 : 0;
+    }
+    
+    Self operator++() {
+      // Check index correctness after increment.
+      // Note: Index == getNumCases() means end().      
+      assert(Index+1 <= SI->getNumCases() && "Index out the number of cases.");
+      ++Index;
+      return *this;
+    }
+    Self operator++(int) {
+      Self tmp = *this;
+      ++(*this);
+      return tmp;
+    }
+    Self operator--() { 
+      // Check index correctness after decrement.
+      // Note: Index == getNumCases() means end().
+      // Also allow "-1" iterator here. That will became valid after ++.
+      assert((Index == 0 || Index-1 <= SI->getNumCases()) &&
+             "Index out the number of cases.");
+      --Index;
+      return *this;
+    }
+    Self operator--(int) {
+      Self tmp = *this;
+      --(*this);
+      return tmp;
+    }
+    bool operator==(const Self& RHS) const {
+      assert(RHS.SI == SI && "Incompatible operators.");
+      return RHS.Index == Index;
+    }
+    bool operator!=(const Self& RHS) const {
+      assert(RHS.SI == SI && "Incompatible operators.");
+      return RHS.Index != Index;
+    }
+  };
+  
+  typedef CaseIteratorT<const SwitchInst, const ConstantInt, const BasicBlock>
+    ConstCaseIt;
+
+  class CaseIt : public CaseIteratorT<SwitchInst, ConstantInt, BasicBlock> {
+    
+    typedef CaseIteratorT<SwitchInst, ConstantInt, BasicBlock> ParentTy;
+  
+  public:
+    
+    CaseIt(const ParentTy& Src) : ParentTy(Src) {}
+    CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {}
+
+    /// Sets the new value for current case.    
+    void setValue(ConstantInt *V) {
+      assert(Index < SI->getNumCases() && "Index out the number of cases.");
+      SI->setOperand(2 + Index*2, reinterpret_cast<Value*>(V));
+    }
+    
+    /// Sets the new successor for current case.
+    void setSuccessor(BasicBlock *S) {
+      SI->setSuccessor(getSuccessorIndex(), S);      
+    }
+  };
+
   static SwitchInst *Create(Value *Value, BasicBlock *Default,
                             unsigned NumCases, Instruction *InsertBefore = 0) {
     return new SwitchInst(Value, Default, NumCases, InsertBefore);
@@ -2439,6 +2591,7 @@ public:
                             unsigned NumCases, BasicBlock *InsertAtEnd) {
     return new SwitchInst(Value, Default, NumCases, InsertAtEnd);
   }
+  
   ~SwitchInst();
 
   /// Provide fast operand accessors
@@ -2452,61 +2605,94 @@ public:
     return cast<BasicBlock>(getOperand(1));
   }
 
-  /// getNumCases - return the number of 'cases' in this switch instruction.
-  /// Note that case #0 is always the default case.
-  unsigned getNumCases() const {
-    return getNumOperands()/2;
+  void setDefaultDest(BasicBlock *DefaultCase) {
+    setOperand(1, reinterpret_cast<Value*>(DefaultCase));
   }
 
-  /// getCaseValue - Return the specified case value.  Note that case #0, the
-  /// default destination, does not have a case value.
-  ConstantInt *getCaseValue(unsigned i) {
-    assert(i && i < getNumCases() && "Illegal case value to get!");
-    return getSuccessorValue(i);
+  /// getNumCases - return the number of 'cases' in this switch instruction,
+  /// except the default case
+  unsigned getNumCases() const {
+    return getNumOperands()/2 - 1;
   }
 
-  /// getCaseValue - Return the specified case value.  Note that case #0, the
-  /// default destination, does not have a case value.
-  const ConstantInt *getCaseValue(unsigned i) const {
-    assert(i && i < getNumCases() && "Illegal case value to get!");
-    return getSuccessorValue(i);
+  /// Returns a read/write iterator that points to the first
+  /// case in SwitchInst.
+  CaseIt case_begin() {
+    return CaseIt(this, 0);
   }
-
+  /// Returns a read-only iterator that points to the first
+  /// case in the SwitchInst.
+  ConstCaseIt case_begin() const {
+    return ConstCaseIt(this, 0);
+  }
+  
+  /// Returns a read/write iterator that points one past the last
+  /// in the SwitchInst.
+  CaseIt case_end() {
+    return CaseIt(this, getNumCases());
+  }
+  /// Returns a read-only iterator that points one past the last
+  /// in the SwitchInst.
+  ConstCaseIt case_end() const {
+    return ConstCaseIt(this, getNumCases());
+  }
+  /// Returns an iterator that points to the default case.
+  /// Note: this iterator allows to resolve successor only. Attempt
+  /// to resolve case value causes an assertion.
+  /// Also note, that increment and decrement also causes an assertion and
+  /// makes iterator invalid. 
+  CaseIt case_default() {
+    return CaseIt(this, DefaultPseudoIndex);
+  }
+  ConstCaseIt case_default() const {
+    return ConstCaseIt(this, DefaultPseudoIndex);
+  }
+  
   /// findCaseValue - Search all of the case values for the specified constant.
-  /// If it is explicitly handled, return the case number of it, otherwise
-  /// return 0 to indicate that it is handled by the default handler.
-  unsigned findCaseValue(const ConstantInt *C) const {
-    for (unsigned i = 1, e = getNumCases(); i != e; ++i)
-      if (getCaseValue(i) == C)
+  /// If it is explicitly handled, return the case iterator of it, otherwise
+  /// return default case iterator to indicate
+  /// that it is handled by the default handler.
+  CaseIt findCaseValue(const ConstantInt *C) {
+    for (CaseIt i = case_begin(), e = case_end(); i != e; ++i)
+      if (i.getCaseValue() == C)
         return i;
-    return 0;
+    return case_default();
   }
-
+  ConstCaseIt findCaseValue(const ConstantInt *C) const {
+    for (ConstCaseIt i = case_begin(), e = case_end(); i != e; ++i)
+      if (i.getCaseValue() == C)
+        return i;
+    return case_default();
+  }    
+  
   /// findCaseDest - Finds the unique case value for a given successor. Returns
   /// null if the successor is not found, not unique, or is the default case.
   ConstantInt *findCaseDest(BasicBlock *BB) {
     if (BB == getDefaultDest()) return NULL;
 
     ConstantInt *CI = NULL;
-    for (unsigned i = 1, e = getNumCases(); i != e; ++i) {
-      if (getSuccessor(i) == BB) {
+    for (CaseIt i = case_begin(), e = case_end(); i != e; ++i) {
+      if (i.getCaseSuccessor() == BB) {
         if (CI) return NULL;   // Multiple cases lead to BB.
-        else CI = getCaseValue(i);
+        else CI = i.getCaseValue();
       }
     }
     return CI;
   }
 
   /// addCase - Add an entry to the switch instruction...
-  ///
+  /// Note:
+  /// This action invalidates case_end(). Old case_end() iterator will
+  /// point to the added case.
   void addCase(ConstantInt *OnVal, BasicBlock *Dest);
 
-  /// removeCase - This method removes the specified successor from the switch
-  /// instruction.  Note that this cannot be used to remove the default
-  /// destination (successor #0). Also note that this operation may reorder the
+  /// removeCase - This method removes the specified case and its successor
+  /// from the switch instruction. Note that this operation may reorder the
   /// remaining cases at index idx and above.
-  ///
-  void removeCase(unsigned idx);
+  /// Note:
+  /// This action invalidates iterators for all cases following the one removed,
+  /// including the case_end() iterator.
+  void removeCase(CaseIt i);
 
   unsigned getNumSuccessors() const { return getNumOperands()/2; }
   BasicBlock *getSuccessor(unsigned idx) const {
@@ -2518,20 +2704,6 @@ public:
     setOperand(idx*2+1, (Value*)NewSucc);
   }
 
-  // getSuccessorValue - Return the value associated with the specified
-  // successor.
-  ConstantInt *getSuccessorValue(unsigned idx) const {
-    assert(idx < getNumSuccessors() && "Successor # out of range!");
-    return reinterpret_cast<ConstantInt*>(getOperand(idx*2));
-  }
-
-  // setSuccessorValue - Updates the value associated with the specified
-  // successor.
-  void setSuccessorValue(unsigned idx, ConstantInt* SuccessorValue) {
-    assert(idx < getNumSuccessors() && "Successor # out of range!");
-    setOperand(idx*2, reinterpret_cast<Value*>(SuccessorValue));
-  }
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const SwitchInst *) { return true; }
   static inline bool classof(const Instruction *I) {
@@ -2890,42 +3062,6 @@ InvokeInst::InvokeInst(Value *Func,
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
 
 //===----------------------------------------------------------------------===//
-//                              UnwindInst Class
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------------------------------------------
-/// UnwindInst - Immediately exit the current function, unwinding the stack
-/// until an invoke instruction is found.
-///
-class UnwindInst : public TerminatorInst {
-  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
-protected:
-  virtual UnwindInst *clone_impl() const;
-public:
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-  explicit UnwindInst(LLVMContext &C, Instruction *InsertBefore = 0);
-  explicit UnwindInst(LLVMContext &C, BasicBlock *InsertAtEnd);
-
-  unsigned getNumSuccessors() const { return 0; }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const UnwindInst *) { return true; }
-  static inline bool classof(const Instruction *I) {
-    return I->getOpcode() == Instruction::Unwind;
-  }
-  static inline bool classof(const Value *V) {
-    return isa<Instruction>(V) && classof(cast<Instruction>(V));
-  }
-private:
-  virtual BasicBlock *getSuccessorV(unsigned idx) const;
-  virtual unsigned getNumSuccessorsV() const;
-  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
-};
-
-//===----------------------------------------------------------------------===//
 //                              ResumeInst Class
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h
index 42862011ac7a..1cebdd2ee642 100644
--- a/include/llvm/IntrinsicInst.h
+++ b/include/llvm/IntrinsicInst.h
@@ -277,34 +277,6 @@ namespace llvm {
     }
   };
 
-  /// EHExceptionInst - This represents the llvm.eh.exception instruction.
-  ///
-  class EHExceptionInst : public IntrinsicInst {
-  public:
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const EHExceptionInst *) { return true; }
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::eh_exception;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
-  /// EHSelectorInst - This represents the llvm.eh.selector instruction.
-  ///
-  class EHSelectorInst : public IntrinsicInst {
-  public:
-    // Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const EHSelectorInst *) { return true; }
-    static inline bool classof(const IntrinsicInst *I) {
-      return I->getIntrinsicID() == Intrinsic::eh_selector;
-    }
-    static inline bool classof(const Value *V) {
-      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
-    }
-  };
-
 }
 
 #endif
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index d70f9153fd8a..069f907d4ff2 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -284,8 +284,8 @@ def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
 let Properties = [IntrNoMem] in {
   def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
-  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
-  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
 }
 
 //===------------------------ Debugger Intrinsics -------------------------===//
@@ -304,10 +304,6 @@ let Properties = [IntrNoMem] in {
 
 //===------------------ Exception Handling Intrinsics----------------------===//
 //
-def int_eh_exception : Intrinsic<[llvm_ptr_ty], [], [IntrReadMem]>;
-def int_eh_selector  : Intrinsic<[llvm_i32_ty],
-                                 [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty]>;
-def int_eh_resume    : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [Throws]>;
 
 // The result of eh.typeid.for depends on the enclosing function, but inside a
 // given function it is 'const' and may be CSE'd etc.
@@ -326,7 +322,6 @@ let Properties = [IntrNoMem] in {
   def int_eh_sjlj_callsite         : Intrinsic<[], [llvm_i32_ty]>;
 }
 def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>;
-def int_eh_sjlj_dispatch_setup  : Intrinsic<[], [llvm_i32_ty]>;
 def int_eh_sjlj_setjmp          : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
 def int_eh_sjlj_longjmp         : Intrinsic<[], [llvm_ptr_ty]>;
 
@@ -443,6 +438,6 @@ include "llvm/IntrinsicsPowerPC.td"
 include "llvm/IntrinsicsX86.td"
 include "llvm/IntrinsicsARM.td"
 include "llvm/IntrinsicsCellSPU.td"
-include "llvm/IntrinsicsAlpha.td"
 include "llvm/IntrinsicsXCore.td"
 include "llvm/IntrinsicsPTX.td"
+include "llvm/IntrinsicsHexagon.td"
diff --git a/include/llvm/IntrinsicsAlpha.td b/include/llvm/IntrinsicsAlpha.td
deleted file mode 100644
index 59865cf8a3e6..000000000000
--- a/include/llvm/IntrinsicsAlpha.td
+++ /dev/null
@@ -1,18 +0,0 @@
-//===- IntrinsicsAlpha.td - Defines Alpha intrinsics -------*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file defines all of the Alpha-specific intrinsics.
-//
-//===----------------------------------------------------------------------===//
-
-
-let TargetPrefix = "alpha" in {  // All intrinsics start with "llvm.alpha.".
-  def int_alpha_umulh : GCCBuiltin<"__builtin_alpha_umulh">,
-              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
-}
diff --git a/include/llvm/IntrinsicsHexagon.td b/include/llvm/IntrinsicsHexagon.td
new file mode 100644
index 000000000000..eb5dc8fb1e7f
--- /dev/null
+++ b/include/llvm/IntrinsicsHexagon.td
@@ -0,0 +1,3671 @@
+//===- IntrinsicsHexagon.td - Defines Hexagon intrinsics ---*- tablegen -*-===//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the Hexagon-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all Hexagon intrinsics.
+//
+// All Hexagon intrinsics start with "llvm.hexagon.".
+let TargetPrefix = "hexagon" in {
+  /// Hexagon_Intrinsic - Base class for all altivec intrinsics.
+  class Hexagon_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
+                              list<LLVMType> param_types,
+                              list<IntrinsicProperty> properties>
+    : GCCBuiltin<!strconcat("__builtin_", GCCIntSuffix)>,
+      Intrinsic<ret_types, param_types, properties>;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// DEF_FUNCTION_TYPE_1(QI_ftype_MEM,BT_BOOL,BT_PTR) ->
+// Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_ptr_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(void_ftype_SI,BT_VOID,BT_INT) ->
+// Hexagon_void_si_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_void_si_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_void_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) ->
+// Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i16_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(SI_ftype_SI,BT_INT,BT_INT) ->
+// Hexagon_si_si_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_si_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(DI_ftype_SI,BT_LONGLONG,BT_INT) ->
+// Hexagon_di_si_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_si_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(SI_ftype_DI,BT_INT,BT_LONGLONG) ->
+// Hexagon_si_di_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_di_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(DI_ftype_DI,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_di_di_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_di_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(QI_ftype_QI,BT_BOOL,BT_BOOL) ->
+// Hexagon_qi_qi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_qi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(QI_ftype_SI,BT_BOOL,BT_INT) ->
+// Hexagon_qi_si_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_si_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(DI_ftype_QI,BT_LONGLONG,BT_BOOL) ->
+// Hexagon_di_qi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_qi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_1(SI_ftype_QI,BT_INT,BT_BOOL) ->
+// Hexagon_si_qi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_qi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(QI_ftype_SISI,BT_BOOL,BT_INT,BT_INT) ->
+// Hexagon_qi_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(void_ftype_SISI,BT_VOID,BT_INT,BT_INT) ->
+// Hexagon_void_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_void_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_void_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_SISI,BT_INT,BT_INT,BT_INT) ->
+// Hexagon_si_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(USI_ftype_SISI,BT_UINT,BT_INT,BT_INT) ->
+// Hexagon_usi_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_usi_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(DI_ftype_SISI,BT_LONGLONG,BT_INT,BT_INT) ->
+// Hexagon_di_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(UDI_ftype_SISI,BT_ULONGLONG,BT_INT,BT_INT) ->
+// Hexagon_udi_sisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_udi_sisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(DI_ftype_SIDI,BT_LONGLONG,BT_INT,BT_LONGLONG) ->
+// Hexagon_di_sidi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_sidi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(DI_ftype_DISI,BT_LONGLONG,BT_LONGLONG,BT_INT) ->
+// Hexagon_di_disi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_disi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_SIDI,BT_INT,BT_INT,BT_LONGLONG) ->
+// Hexagon_si_sidi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sidi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_DIDI,BT_INT,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_si_didi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_didi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i64_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(DI_ftype_DIDI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_di_didi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_didi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(UDI_ftype_DIDI,BT_ULONGLONG,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_udi_didi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_udi_didi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_DISI,BT_INT,BT_LONGLONG,BT_INT) ->
+// Hexagon_si_disi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_disi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(QI_ftype_DIDI,BT_BOOL,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_qi_didi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_didi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i64_ty, llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(QI_ftype_QIQI,BT_BOOL,BT_BOOL,BT_BOOL) ->
+// Hexagon_qi_qiqi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_qiqi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(QI_ftype_QIQIQI,BT_BOOL,BT_BOOL,BT_BOOL) ->
+// Hexagon_qi_qiqiqi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_qi_qiqiqi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i1_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_QIQI,BT_INT,BT_BOOL,BT_BOOL) ->
+// Hexagon_si_qiqi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_qiqi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_2(SI_ftype_QISI,BT_INT,BT_BOOL,BT_INT) ->
+// Hexagon_si_qisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_qisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i1_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(void_ftype_SISISI,BT_VOID,BT_INT,BT_INT,BT_INT) ->
+// Hexagon_void_sisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_void_sisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_void_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_SISISI,BT_INT,BT_INT,BT_INT,BT_INT) ->
+// Hexagon_si_sisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_SISISI,BT_LONGLONG,BT_INT,BT_INT,BT_INT) ->
+// Hexagon_di_sisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_sisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_DISISI,BT_INT,BT_LONGLONG,BT_INT,BT_INT) ->
+// Hexagon_si_disisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_disisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i64_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_DISISI,BT_LONGLONG,BT_LONGLONG,BT_INT,BT_INT) ->
+// Hexagon_di_disisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_disisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_SIDISI,BT_INT,BT_INT,BT_LONGLONG,BT_INT) ->
+// Hexagon_si_sidisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sidisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_DIDISI,BT_LONGLONG,BT_LONGLONG,
+//                     BT_LONGLONG,BT_INT) ->
+// Hexagon_di_didisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_didisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_SIDIDI,BT_INT,BT_INT,BT_LONGLONG,BT_LONGLONG) ->
+// Hexagon_si_sididi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sididi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty,
+                           llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_DIDIDI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG,
+//                     BT_LONGLONG) ->
+// Hexagon_di_dididi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_dididi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
+                           llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_SISIDI,BT_INT,BT_INT,BT_INT,BT_LONGLONG) ->
+// Hexagon_si_sisidi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sisidi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(SI_ftype_QISISI,BT_INT,BT_BOOL,BT_INT,BT_INT) ->
+// Hexagon_si_qisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_qisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_QISISI,BT_LONGLONG,BT_BOOL,BT_INT,BT_INT) ->
+// Hexagon_di_qisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_qisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i1_ty, llvm_i32_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_QIDIDI,BT_LONGLONG,BT_BOOL,BT_LONGLONG,
+//                     BT_LONGLONG) ->
+// Hexagon_di_qididi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_qididi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty,
+                           llvm_i64_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_3(DI_ftype_DIDIQI,BT_LONGLONG,BT_LONGLONG,BT_LONGLONG,
+//                     BT_BOOL) ->
+// Hexagon_di_didiqi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_didiqi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
+                           llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_4(SI_ftype_SISISISI,BT_INT,BT_INT,BT_INT,BT_INT,BT_INT) ->
+// Hexagon_si_sisisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_si_sisisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty,
+                           llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+//
+// DEF_FUNCTION_TYPE_4(DI_ftype_DIDISISI,BT_LONGLONG,BT_LONGLONG,
+//                     BT_LONGLONG,BT_INT,BT_INT) ->
+// Hexagon_di_didisisi_Intrinsic<string GCCIntSuffix>
+//
+class Hexagon_di_didisisi_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty,
+                           llvm_i32_ty, llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpeq : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpeq">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgt,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgt : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgt">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgtu,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgtu : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgtu">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpeqp,QI_ftype_DIDI,2)
+//
+def int_hexagon_C2_cmpeqp : Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpeqp">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgtp,QI_ftype_DIDI,2)
+//
+def int_hexagon_C2_cmpgtp : Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpgtp">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgtup,QI_ftype_DIDI,2)
+//
+def int_hexagon_C2_cmpgtup : Hexagon_qi_didi_Intrinsic<"HEXAGON.C2.cmpgtup">;
+//
+// BUILTIN_INFO(HEXAGON.C2_bitsset,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_bitsset : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsset">;
+//
+// BUILTIN_INFO(HEXAGON.C2_bitsclr,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_bitsclr : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsclr">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpeqi,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpeqi : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpeqi">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgti,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgti : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgti">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgtui,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgtui : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgtui">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgei,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgei : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgei">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpgeui,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpgeui : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpgeui">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmplt,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmplt : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmplt">;
+//
+// BUILTIN_INFO(HEXAGON.C2_cmpltu,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_cmpltu : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.cmpltu">;
+//
+// BUILTIN_INFO(HEXAGON.C2_bitsclri,QI_ftype_SISI,2)
+//
+def int_hexagon_C2_bitsclri : Hexagon_qi_sisi_Intrinsic<"HEXAGON.C2.bitsclri">;
+//
+// BUILTIN_INFO(HEXAGON.C2_and,QI_ftype_QIQI,2)
+//
+def int_hexagon_C2_and : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.and">;
+//
+// BUILTIN_INFO(HEXAGON.C2_or,QI_ftype_QIQI,2)
+//
+def int_hexagon_C2_or : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.or">;
+//
+// BUILTIN_INFO(HEXAGON.C2_xor,QI_ftype_QIQI,2)
+//
+def int_hexagon_C2_xor : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.xor">;
+//
+// BUILTIN_INFO(HEXAGON.C2_andn,QI_ftype_QIQI,2)
+//
+def int_hexagon_C2_andn : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.andn">;
+//
+// BUILTIN_INFO(HEXAGON.C2_not,QI_ftype_QI,1)
+//
+def int_hexagon_C2_not : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.not">;
+//
+// BUILTIN_INFO(HEXAGON.C2_orn,QI_ftype_QIQI,2)
+//
+def int_hexagon_C2_orn : Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C2.orn">;
+//
+// BUILTIN_INFO(HEXAGON.C2_pxfer_map,QI_ftype_QI,1)
+//
+def int_hexagon_C2_pxfer_map : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.pxfer.map">;
+//
+// BUILTIN_INFO(HEXAGON.C2_any8,QI_ftype_QI,1)
+//
+def int_hexagon_C2_any8 : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.any8">;
+//
+// BUILTIN_INFO(HEXAGON.C2_all8,QI_ftype_QI,1)
+//
+def int_hexagon_C2_all8 : Hexagon_qi_qi_Intrinsic<"HEXAGON.C2.all8">;
+//
+// BUILTIN_INFO(HEXAGON.C2_vitpack,SI_ftype_QIQI,2)
+//
+def int_hexagon_C2_vitpack : Hexagon_si_qiqi_Intrinsic<"HEXAGON.C2.vitpack">;
+//
+// BUILTIN_INFO(HEXAGON.C2_mux,SI_ftype_QISISI,3)
+//
+def int_hexagon_C2_mux : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.mux">;
+//
+// BUILTIN_INFO(HEXAGON.C2_muxii,SI_ftype_QISISI,3)
+//
+def int_hexagon_C2_muxii : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxii">;
+//
+// BUILTIN_INFO(HEXAGON.C2_muxir,SI_ftype_QISISI,3)
+//
+def int_hexagon_C2_muxir : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxir">;
+//
+// BUILTIN_INFO(HEXAGON.C2_muxri,SI_ftype_QISISI,3)
+//
+def int_hexagon_C2_muxri : Hexagon_si_qisisi_Intrinsic<"HEXAGON.C2.muxri">;
+//
+// BUILTIN_INFO(HEXAGON.C2_vmux,DI_ftype_QIDIDI,3)
+//
+def int_hexagon_C2_vmux : Hexagon_di_qididi_Intrinsic<"HEXAGON.C2.vmux">;
+//
+// BUILTIN_INFO(HEXAGON.C2_mask,DI_ftype_QI,1)
+//
+def int_hexagon_C2_mask : Hexagon_di_qi_Intrinsic<"HEXAGON.C2.mask">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpbeq,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpbeq : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpbeq">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpbgtu,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpbgtu : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpbgtu">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpheq,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpheq : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpheq">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmphgt,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmphgt : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmphgt">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmphgtu,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmphgtu : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmphgtu">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpweq,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpweq : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpweq">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpwgt,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpwgt : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpwgt">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vcmpwgtu,QI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vcmpwgtu : Hexagon_qi_didi_Intrinsic<"HEXAGON.A2.vcmpwgtu">;
+//
+// BUILTIN_INFO(HEXAGON.C2_tfrpr,SI_ftype_QI,1)
+//
+def int_hexagon_C2_tfrpr : Hexagon_si_qi_Intrinsic<"HEXAGON.C2.tfrpr">;
+//
+// BUILTIN_INFO(HEXAGON.C2_tfrrp,QI_ftype_SI,1)
+//
+def int_hexagon_C2_tfrrp : Hexagon_qi_si_Intrinsic<"HEXAGON.C2.tfrrp">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_acc_sat_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_acc_sat_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.acc.sat.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_nac_sat_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpy_nac_sat_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpy.nac.sat.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_hh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_hh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_hh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_hh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_hl_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_hl_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_hl_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_hl_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_lh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_lh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_lh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_lh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_ll_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_ll_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_ll_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_ll_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_hh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_hh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_hl_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_hl_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_hl_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_lh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_lh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_lh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_ll_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_ll_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_ll_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_hh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_hh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_hl_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_hl_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_hl_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_lh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_lh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_lh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_ll_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_rnd_ll_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_rnd_ll_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.rnd.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_hh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_hh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_hl_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_hl_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_hl_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_lh_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_lh_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_lh_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_ll_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_sat_rnd_ll_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_sat_rnd_ll_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.sat.rnd.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_hh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_hh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_hl_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_hl_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_hl_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_lh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_lh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_lh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_ll_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_acc_ll_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_acc_ll_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.acc.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_hh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_hh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_hl_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_hl_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_hl_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_lh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_lh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_lh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_ll_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_nac_ll_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyd_nac_ll_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyd.nac.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_hh_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_hh_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_hh_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_hl_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_hl_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_hl_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_lh_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_lh_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_lh_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_ll_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_ll_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_ll_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_hh_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hh_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_hh_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_hl_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_hl_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_hl_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_lh_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_lh_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_lh_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_ll_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyd_rnd_ll_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyd_rnd_ll_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.mpyd.rnd.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_acc_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_acc_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.acc.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_hh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_hh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_hl_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_hl_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_hl_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_lh_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_lh_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_lh_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s0,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_ll_s0 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_nac_ll_s1,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_mpyu_nac_ll_s1 :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.mpyu.nac.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s0,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_hh_s0 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_hh_s1,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_hh_s1 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s0,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_hl_s0 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_hl_s1,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_hl_s1 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s0,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_lh_s0 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_lh_s1,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_lh_s1 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s0,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_ll_s0 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_ll_s1,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_ll_s1 :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_hh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_hh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_hl_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_hl_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_hl_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_lh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_lh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_lh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_ll_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_acc_ll_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_acc_ll_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.acc.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_hh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_hh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_hl_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_hl_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_hl_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_lh_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_lh_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_lh_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_ll_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_nac_ll_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_mpyud_nac_ll_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.mpyud.nac.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s0,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_hh_s0 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_hh_s1,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_hh_s1 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s0,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_hl_s0 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_hl_s1,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_hl_s1 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.hl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s0,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_lh_s0 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.lh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_lh_s1,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_lh_s1 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.lh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s0,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_ll_s0 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.ll.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyud_ll_s1,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyud_ll_s1 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.mpyud.ll.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpysmi,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpysmi :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpysmi">;
+//
+// BUILTIN_INFO(HEXAGON.M2_macsip,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_macsip :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.macsip">;
+//
+// BUILTIN_INFO(HEXAGON.M2_macsin,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_macsin :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.macsin">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyss_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_dpmpyss_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.dpmpyss.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyss_acc_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_dpmpyss_acc_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyss.acc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyss_nac_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_dpmpyss_nac_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyss.nac.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_s0,UDI_ftype_SISI,2)
+//
+def int_hexagon_M2_dpmpyuu_s0 :
+Hexagon_udi_sisi_Intrinsic<"HEXAGON.M2.dpmpyuu.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_acc_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_dpmpyuu_acc_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyuu.acc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyuu_nac_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_dpmpyuu_nac_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.dpmpyuu.nac.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpy_up,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpy_up :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpy.up">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyu_up,USI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyu_up :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.M2.mpyu.up">;
+//
+// BUILTIN_INFO(HEXAGON.M2_dpmpyss_rnd_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_dpmpyss_rnd_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.dpmpyss.rnd.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyi,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyi :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyi">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mpyui,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_mpyui :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.mpyui">;
+//
+// BUILTIN_INFO(HEXAGON.M2_maci,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_maci :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.maci">;
+//
+// BUILTIN_INFO(HEXAGON.M2_acci,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_acci :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.acci">;
+//
+// BUILTIN_INFO(HEXAGON.M2_accii,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_accii :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.accii">;
+//
+// BUILTIN_INFO(HEXAGON.M2_nacci,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_nacci :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.nacci">;
+//
+// BUILTIN_INFO(HEXAGON.M2_naccii,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_naccii :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.naccii">;
+//
+// BUILTIN_INFO(HEXAGON.M2_subacc,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_subacc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.subacc">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_vmpy2s_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_vmpy2s_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2s_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_vmac2s_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2s.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2s_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_vmac2s_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2s.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s0pack,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_vmpy2s_s0pack :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s0pack">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2s_s1pack,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_vmpy2s_s1pack :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.vmpy2s.s1pack">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_vmac2 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.vmac2">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2es_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vmpy2es_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vmpy2es.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmpy2es_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vmpy2es_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vmpy2es.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2es_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vmac2es_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2es_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vmac2es_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vmac2es,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vmac2es :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vmac2es">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrmac_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vrmac_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrmac.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrmpy_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vrmpy_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrmpy.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s0,SI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vdmpyrs_s0 :
+Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vdmpyrs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmpyrs_s1,SI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vdmpyrs_s1 :
+Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vdmpyrs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmacs_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vdmacs_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vdmacs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmacs_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vdmacs_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vdmacs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmpys_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vdmpys_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vdmpys.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vdmpys_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vdmpys_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vdmpys.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyrs_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyrs_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyrs_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyrs_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s0,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyrsc_s0 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrsc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyrsc_s1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyrsc_s1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.cmpyrsc.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmacs_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmacs_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmacs_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmacs_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmacsc_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmacsc_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacsc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmacsc_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmacsc_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacsc.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpys_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpys_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpys.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpys_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpys_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpys.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpysc_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpysc_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpysc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpysc_s1,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpysc_s1 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpysc.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cnacs_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cnacs_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cnacs_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cnacs_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cnacsc_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cnacsc_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacsc.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cnacsc_s1,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cnacsc_s1 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cnacsc.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1,DI_ftype_DISI,2)
+//
+def int_hexagon_M2_vrcmpys_s1 :
+Hexagon_di_disi_Intrinsic<"HEXAGON.M2.vrcmpys.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpys_acc_s1,DI_ftype_DIDISI,3)
+//
+def int_hexagon_M2_vrcmpys_acc_s1 :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.M2.vrcmpys.acc.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpys_s1rp,SI_ftype_DISI,2)
+//
+def int_hexagon_M2_vrcmpys_s1rp :
+Hexagon_si_disi_Intrinsic<"HEXAGON.M2.vrcmpys.s1rp">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacls_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacls_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacls_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacls_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmachs_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmachs_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmachs_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmachs_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyl_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyl_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyl_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyl_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyh_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyh_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyh_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyh_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacls_rs0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacls_rs0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacls_rs1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacls_rs1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacls.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmachs_rs0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmachs_rs0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmachs_rs1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmachs_rs1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmachs.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyl_rs0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyl_rs0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyl_rs1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyl_rs1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyl.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyh_rs0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyh_rs0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyh_rs1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyh_rs1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyh.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_hmmpyl_rs1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_hmmpyl_rs1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.hmmpyl.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_hmmpyh_rs1,SI_ftype_SISI,2)
+//
+def int_hexagon_M2_hmmpyh_rs1 :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.M2.hmmpyh.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmaculs_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmaculs_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmaculs_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmaculs_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacuhs_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacuhs_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacuhs_s1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacuhs_s1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyul_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyul_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyul_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyul_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyuh_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyuh_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyuh_s1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyuh_s1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.s1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmaculs_rs0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmaculs_rs0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmaculs_rs1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmaculs_rs1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmaculs.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacuhs_rs0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmacuhs_rs1,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_mmacuhs_rs1 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.mmacuhs.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyul_rs0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyul_rs0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyul_rs1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyul_rs1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyul.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyuh_rs0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.rs0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_mmpyuh_rs1,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_mmpyuh_rs1 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.mmpyuh.rs1">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vrcmaci_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmaci.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vrcmacr_s0 :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmacr.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmaci_s0c,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vrcmaci_s0c :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmaci.s0c">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmacr_s0c,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vrcmacr_s0c :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vrcmacr.s0c">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmaci_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmaci_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmaci.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmacr_s0,DI_ftype_DISISI,3)
+//
+def int_hexagon_M2_cmacr_s0 :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.M2.cmacr.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vrcmpyi_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyi.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vrcmpyr_s0 :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyr.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpyi_s0c,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vrcmpyi_s0c :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyi.s0c">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vrcmpyr_s0c,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vrcmpyr_s0c :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vrcmpyr.s0c">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyi_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyi_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpyi.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_cmpyr_s0,DI_ftype_SISI,2)
+//
+def int_hexagon_M2_cmpyr_s0 :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.M2.cmpyr.s0">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_i,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vcmpy_s0_sat_i :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s0.sat.i">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmpy_s0_sat_r,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vcmpy_s0_sat_r :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s0.sat.r">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_i,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vcmpy_s1_sat_i :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s1.sat.i">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmpy_s1_sat_r,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vcmpy_s1_sat_r :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vcmpy.s1.sat.r">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_i,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vcmac_s0_sat_i :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vcmac.s0.sat.i">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vcmac_s0_sat_r,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M2_vcmac_s0_sat_r :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M2.vcmac.s0.sat.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vcrotate,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_vcrotate :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.vcrotate">;
+//
+// BUILTIN_INFO(HEXAGON.A2_add,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_add :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.add">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sub,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_sub :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.sub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addsat,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addsat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addsat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subsat,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subsat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subsat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addi,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addi :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addi">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_l16_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_l16_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_l16_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_l16_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.hl">;
+def int_hexagon_A2_addh_l16_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.lh">;
+def int_hexagon_A2_addh_l16_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_l16_sat_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_l16_sat_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_l16_sat_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.hl">;
+def int_hexagon_A2_addh_l16_sat_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.lh">;
+def int_hexagon_A2_addh_l16_sat_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.l16.sat.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_l16_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_l16_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_l16_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_l16_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_l16_sat_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.sat.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_l16_sat_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_l16_sat_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.l16.sat.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_lh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.lh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_hh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_sat_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_lh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_sat_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.lh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_sat_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addh_h16_sat_hh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_addh_h16_sat_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.addh.h16.sat.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_lh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.lh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_hh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_sat_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_lh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_sat_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.lh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_sat_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subh_h16_sat_hh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subh_h16_sat_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subh.h16.sat.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_aslh,SI_ftype_SI,1)
+//
+def int_hexagon_A2_aslh :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.aslh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_asrh,SI_ftype_SI,1)
+//
+def int_hexagon_A2_asrh :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.asrh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_addp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.addp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addpsat,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_addpsat :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.addpsat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_addsp,DI_ftype_SIDI,2)
+//
+def int_hexagon_A2_addsp :
+Hexagon_di_sidi_Intrinsic<"HEXAGON.A2.addsp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_subp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.subp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_neg,SI_ftype_SI,1)
+//
+def int_hexagon_A2_neg :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.neg">;
+//
+// BUILTIN_INFO(HEXAGON.A2_negsat,SI_ftype_SI,1)
+//
+def int_hexagon_A2_negsat :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.negsat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_abs,SI_ftype_SI,1)
+//
+def int_hexagon_A2_abs :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.abs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_abssat,SI_ftype_SI,1)
+//
+def int_hexagon_A2_abssat :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.abssat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vconj,DI_ftype_DI,1)
+//
+def int_hexagon_A2_vconj :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.vconj">;
+//
+// BUILTIN_INFO(HEXAGON.A2_negp,DI_ftype_DI,1)
+//
+def int_hexagon_A2_negp :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.negp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_absp,DI_ftype_DI,1)
+//
+def int_hexagon_A2_absp :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.absp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_max,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_max :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.max">;
+//
+// BUILTIN_INFO(HEXAGON.A2_maxu,USI_ftype_SISI,2)
+//
+def int_hexagon_A2_maxu :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.A2.maxu">;
+//
+// BUILTIN_INFO(HEXAGON.A2_min,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_min :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.min">;
+//
+// BUILTIN_INFO(HEXAGON.A2_minu,USI_ftype_SISI,2)
+//
+def int_hexagon_A2_minu :
+Hexagon_usi_sisi_Intrinsic<"HEXAGON.A2.minu">;
+//
+// BUILTIN_INFO(HEXAGON.A2_maxp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_maxp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.maxp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_maxup,UDI_ftype_DIDI,2)
+//
+def int_hexagon_A2_maxup :
+Hexagon_udi_didi_Intrinsic<"HEXAGON.A2.maxup">;
+//
+// BUILTIN_INFO(HEXAGON.A2_minp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_minp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.minp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_minup,UDI_ftype_DIDI,2)
+//
+def int_hexagon_A2_minup :
+Hexagon_udi_didi_Intrinsic<"HEXAGON.A2.minup">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfr,SI_ftype_SI,1)
+//
+def int_hexagon_A2_tfr :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.tfr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfrsi,SI_ftype_SI,1)
+//
+def int_hexagon_A2_tfrsi :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.tfrsi">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfrp,DI_ftype_DI,1)
+//
+def int_hexagon_A2_tfrp :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.tfrp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfrpi,DI_ftype_SI,1)
+//
+def int_hexagon_A2_tfrpi :
+Hexagon_di_si_Intrinsic<"HEXAGON.A2.tfrpi">;
+//
+// BUILTIN_INFO(HEXAGON.A2_zxtb,SI_ftype_SI,1)
+//
+def int_hexagon_A2_zxtb :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.zxtb">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sxtb,SI_ftype_SI,1)
+//
+def int_hexagon_A2_sxtb :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.sxtb">;
+//
+// BUILTIN_INFO(HEXAGON.A2_zxth,SI_ftype_SI,1)
+//
+def int_hexagon_A2_zxth :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.zxth">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sxth,SI_ftype_SI,1)
+//
+def int_hexagon_A2_sxth :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.sxth">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combinew,DI_ftype_SISI,2)
+//
+def int_hexagon_A2_combinew :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.A2.combinew">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combineii,DI_ftype_SISI,2)
+//
+def int_hexagon_A2_combineii :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.A2.combineii">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combine_hh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_combine_hh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.hh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combine_hl,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_combine_hl :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.hl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combine_lh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_combine_lh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.lh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_combine_ll,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_combine_ll :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.combine.ll">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfril,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_tfril :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.tfril">;
+//
+// BUILTIN_INFO(HEXAGON.A2_tfrih,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_tfrih :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.tfrih">;
+//
+// BUILTIN_INFO(HEXAGON.A2_and,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_and :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.and">;
+//
+// BUILTIN_INFO(HEXAGON.A2_or,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_or :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.or">;
+//
+// BUILTIN_INFO(HEXAGON.A2_xor,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_xor :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.xor">;
+//
+// BUILTIN_INFO(HEXAGON.A2_not,SI_ftype_SI,1)
+//
+def int_hexagon_A2_not :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.not">;
+//
+// BUILTIN_INFO(HEXAGON.M2_xor_xacc,SI_ftype_SISISI,3)
+//
+def int_hexagon_M2_xor_xacc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M2.xor.xacc">;
+//
+// BUILTIN_INFO(HEXAGON.A2_subri,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_subri :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.subri">;
+//
+// BUILTIN_INFO(HEXAGON.A2_andir,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_andir :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.andir">;
+//
+// BUILTIN_INFO(HEXAGON.A2_orir,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_orir :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.orir">;
+//
+// BUILTIN_INFO(HEXAGON.A2_andp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_andp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.andp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_orp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_orp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.orp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_xorp,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_xorp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.xorp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_notp,DI_ftype_DI,1)
+//
+def int_hexagon_A2_notp :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.notp">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sxtw,DI_ftype_SI,1)
+//
+def int_hexagon_A2_sxtw :
+Hexagon_di_si_Intrinsic<"HEXAGON.A2.sxtw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sat,SI_ftype_DI,1)
+//
+def int_hexagon_A2_sat :
+Hexagon_si_di_Intrinsic<"HEXAGON.A2.sat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_sath,SI_ftype_SI,1)
+//
+def int_hexagon_A2_sath :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.sath">;
+//
+// BUILTIN_INFO(HEXAGON.A2_satuh,SI_ftype_SI,1)
+//
+def int_hexagon_A2_satuh :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.satuh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_satub,SI_ftype_SI,1)
+//
+def int_hexagon_A2_satub :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.satub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_satb,SI_ftype_SI,1)
+//
+def int_hexagon_A2_satb :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.satb">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddubs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddubs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddubs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddh  :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddhs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddhs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vadduhs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vadduhs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vadduhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vaddws,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vaddws :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vaddws">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svavgh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svavgh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svavgh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svavghs,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svavghs :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svavghs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svnavgh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svnavgh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svnavgh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svaddh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svaddh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svaddh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svaddhs,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svaddhs :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svaddhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svadduhs,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svadduhs :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svadduhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svsubh,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svsubh :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svsubhs,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svsubhs :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_svsubuhs,SI_ftype_SISI,2)
+//
+def int_hexagon_A2_svsubuhs :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A2.svsubuhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vraddub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vraddub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vraddub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vraddub_acc,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_A2_vraddub_acc :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.A2.vraddub.acc">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vradduh,SI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vradduh :
+Hexagon_si_didi_Intrinsic<"HEXAGON.M2.vradduh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsububs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsububs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsububs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubhs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubhs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubuhs,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubuhs :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubuhs">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vsubws,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vsubws :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vsubws">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vabsh,DI_ftype_DI,1)
+//
+def int_hexagon_A2_vabsh :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabsh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vabshsat,DI_ftype_DI,1)
+//
+def int_hexagon_A2_vabshsat :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabshsat">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vabsw,DI_ftype_DI,1)
+//
+def int_hexagon_A2_vabsw :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabsw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vabswsat,DI_ftype_DI,1)
+//
+def int_hexagon_A2_vabswsat :
+Hexagon_di_di_Intrinsic<"HEXAGON.A2.vabswsat">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vabsdiffw,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vabsdiffw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vabsdiffw">;
+//
+// BUILTIN_INFO(HEXAGON.M2_vabsdiffh,DI_ftype_DIDI,2)
+//
+def int_hexagon_M2_vabsdiffh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.M2.vabsdiffh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vrsadub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vrsadub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vrsadub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vrsadub_acc,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_A2_vrsadub_acc :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.A2.vrsadub.acc">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavguh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavguh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavgh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavgh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavgw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavgw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgwr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgwr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgwr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavgwr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavgwr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgwr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgwcr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgwcr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgwcr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavgwcr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavgwcr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavgwcr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavghcr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavghcr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavghcr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavghcr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavghcr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavghcr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavguw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavguw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavguwr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavguwr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguwr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavgubr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavgubr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavgubr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavguhr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavguhr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavguhr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vavghr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vavghr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vavghr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vnavghr,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vnavghr :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vnavghr">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vminh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vminh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vmaxh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vmaxh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vminub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vminub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vmaxub,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vmaxub :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxub">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vminuh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vminuh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminuh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vmaxuh,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vmaxuh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxuh">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vminw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vminw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vmaxw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vmaxw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vminuw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vminuw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vminuw">;
+//
+// BUILTIN_INFO(HEXAGON.A2_vmaxuw,DI_ftype_DIDI,2)
+//
+def int_hexagon_A2_vmaxuw :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A2.vmaxuw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asr_r_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.r.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asl_r_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.r.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_lsr_r_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsr.r.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_lsl_r_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsl.r.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_r_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_r_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_r_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsl_r_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_r_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_r_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_r_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsl_r_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_r_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_r_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_r_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsl_r_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_r_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_r_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_r_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsl_r_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_r_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_r_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_r_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsl_r_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_r_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_r_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_r_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsl_r_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_r_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.r.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_r_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.r.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_r_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.r.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsl_r_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsl.r.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_r_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_r_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_r_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsl_r_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_r_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.r.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_r_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.r.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_r_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.r.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsl_r_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsl.r.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_r_sat,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asr_r_r_sat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.r.r.sat">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_r_sat,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asl_r_r_sat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.r.r.sat">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asr_i_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_lsr_i_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.lsr.i.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asl_i_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.i.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_i_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_i_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_i_p :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_i_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_i_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_acc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_i_r_acc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_i_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_i_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p_acc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_i_p_acc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.acc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_i_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_i_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_nac,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_i_r_nac :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_i_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_i_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p_nac,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_i_p_nac :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.nac">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_xacc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_i_r_xacc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.xacc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_xacc,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_i_r_xacc :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.xacc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_xacc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_i_p_xacc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.xacc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p_xacc,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_i_p_xacc :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.xacc">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_i_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_i_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_i_r_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asr_i_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asr.i.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_lsr_i_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.lsr.i.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_asl_i_r_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.asl.i.r.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_i_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_i_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p_and,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_i_p_and :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.and">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asr_i_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asr.i.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_lsr_i_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.lsr.i.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_p_or,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_asl_i_p_or :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.asl.i.p.or">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_r_sat,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asl_i_r_sat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asl.i.r.sat">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asr_i_r_rnd :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r.rnd">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_r_rnd_goodsyntax,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_asr_i_r_rnd_goodsyntax :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.asr.i.r.rnd.goodsyntax">;
+//
+// BUILTIN_INFO(HEXAGON.S2_addasl_rrri,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_addasl_rrri :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.addasl.rrri">;
+//
+// BUILTIN_INFO(HEXAGON.S2_valignib,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_valignib :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.valignib">;
+//
+// BUILTIN_INFO(HEXAGON.S2_valignrb,DI_ftype_DIDIQI,3)
+//
+def int_hexagon_S2_valignrb :
+Hexagon_di_didiqi_Intrinsic<"HEXAGON.S2.valignrb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vspliceib,DI_ftype_DIDISI,3)
+//
+def int_hexagon_S2_vspliceib :
+Hexagon_di_didisi_Intrinsic<"HEXAGON.S2.vspliceib">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsplicerb,DI_ftype_DIDIQI,3)
+//
+def int_hexagon_S2_vsplicerb :
+Hexagon_di_didiqi_Intrinsic<"HEXAGON.S2.vsplicerb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsplatrh,DI_ftype_SI,1)
+//
+def int_hexagon_S2_vsplatrh :
+Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsplatrh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsplatrb,SI_ftype_SI,1)
+//
+def int_hexagon_S2_vsplatrb :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.vsplatrb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_insert,SI_ftype_SISISISI,4)
+//
+def int_hexagon_S2_insert :
+Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.insert">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tableidxb_goodsyntax,SI_ftype_SISISISI,4)
+//
+def int_hexagon_S2_tableidxb_goodsyntax :
+Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxb.goodsyntax">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tableidxh_goodsyntax,SI_ftype_SISISISI,4)
+//
+def int_hexagon_S2_tableidxh_goodsyntax :
+Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxh.goodsyntax">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tableidxw_goodsyntax,SI_ftype_SISISISI,4)
+//
+def int_hexagon_S2_tableidxw_goodsyntax :
+Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxw.goodsyntax">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tableidxd_goodsyntax,SI_ftype_SISISISI,4)
+//
+def int_hexagon_S2_tableidxd_goodsyntax :
+Hexagon_si_sisisisi_Intrinsic<"HEXAGON.S2.tableidxd.goodsyntax">;
+//
+// BUILTIN_INFO(HEXAGON.S2_extractu,SI_ftype_SISISI,3)
+//
+def int_hexagon_S2_extractu :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S2.extractu">;
+//
+// BUILTIN_INFO(HEXAGON.S2_insertp,DI_ftype_DIDISISI,4)
+//
+def int_hexagon_S2_insertp :
+Hexagon_di_didisisi_Intrinsic<"HEXAGON.S2.insertp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_extractup,DI_ftype_DISISI,3)
+//
+def int_hexagon_S2_extractup :
+Hexagon_di_disisi_Intrinsic<"HEXAGON.S2.extractup">;
+//
+// BUILTIN_INFO(HEXAGON.S2_insert_rp,SI_ftype_SISIDI,3)
+//
+def int_hexagon_S2_insert_rp :
+Hexagon_si_sisidi_Intrinsic<"HEXAGON.S2.insert.rp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_extractu_rp,SI_ftype_SIDI,2)
+//
+def int_hexagon_S2_extractu_rp :
+Hexagon_si_sidi_Intrinsic<"HEXAGON.S2.extractu.rp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_insertp_rp,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_S2_insertp_rp :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.S2.insertp.rp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_extractup_rp,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_extractup_rp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.extractup.rp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tstbit_i,QI_ftype_SISI,2)
+//
+def int_hexagon_S2_tstbit_i :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.S2.tstbit.i">;
+//
+// BUILTIN_INFO(HEXAGON.S2_setbit_i,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_setbit_i :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.setbit.i">;
+//
+// BUILTIN_INFO(HEXAGON.S2_togglebit_i,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_togglebit_i :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.togglebit.i">;
+//
+// BUILTIN_INFO(HEXAGON.S2_clrbit_i,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_clrbit_i :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.clrbit.i">;
+//
+// BUILTIN_INFO(HEXAGON.S2_tstbit_r,QI_ftype_SISI,2)
+//
+def int_hexagon_S2_tstbit_r :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.S2.tstbit.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_setbit_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_setbit_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.setbit.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_togglebit_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_togglebit_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.togglebit.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_clrbit_r,SI_ftype_SISI,2)
+//
+def int_hexagon_S2_clrbit_r :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.S2.clrbit.r">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_i_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_i_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_i_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_r_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_r_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_r_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_vh,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsl_r_vh :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.vh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_i_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.i.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_i_svw_trun,SI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_i_svw_trun :
+Hexagon_si_disi_Intrinsic<"HEXAGON.S2.asr.i.svw.trun">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_svw_trun,SI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_r_svw_trun :
+Hexagon_si_disi_Intrinsic<"HEXAGON.S2.asr.r.svw.trun">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_i_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_i_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.i.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_i_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_i_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.i.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asr_r_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asr_r_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asr.r.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_asl_r_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_asl_r_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.asl.r.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsr_r_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsr_r_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsr.r.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lsl_r_vw,DI_ftype_DISI,2)
+//
+def int_hexagon_S2_lsl_r_vw :
+Hexagon_di_disi_Intrinsic<"HEXAGON.S2.lsl.r.vw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vrndpackwh,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vrndpackwh :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vrndpackwh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vrndpackwhs,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vrndpackwhs :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vrndpackwhs">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsxtbh,DI_ftype_SI,1)
+//
+def int_hexagon_S2_vsxtbh :
+Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsxtbh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vzxtbh,DI_ftype_SI,1)
+//
+def int_hexagon_S2_vzxtbh :
+Hexagon_di_si_Intrinsic<"HEXAGON.S2.vzxtbh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsathub,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vsathub :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsathub">;
+//
+// BUILTIN_INFO(HEXAGON.S2_svsathub,SI_ftype_SI,1)
+//
+def int_hexagon_S2_svsathub :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.svsathub">;
+//
+// BUILTIN_INFO(HEXAGON.S2_svsathb,SI_ftype_SI,1)
+//
+def int_hexagon_S2_svsathb :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.svsathb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsathb,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vsathb :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsathb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vtrunohb,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vtrunohb :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vtrunohb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vtrunewh,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_vtrunewh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.vtrunewh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vtrunowh,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_vtrunowh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.vtrunowh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vtrunehb,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vtrunehb :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vtrunehb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsxthw,DI_ftype_SI,1)
+//
+def int_hexagon_S2_vsxthw :
+Hexagon_di_si_Intrinsic<"HEXAGON.S2.vsxthw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vzxthw,DI_ftype_SI,1)
+//
+def int_hexagon_S2_vzxthw :
+Hexagon_di_si_Intrinsic<"HEXAGON.S2.vzxthw">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsatwh,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vsatwh :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsatwh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsatwuh,SI_ftype_DI,1)
+//
+def int_hexagon_S2_vsatwuh :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.vsatwuh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_packhl,DI_ftype_SISI,2)
+//
+def int_hexagon_S2_packhl :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.S2.packhl">;
+//
+// BUILTIN_INFO(HEXAGON.A2_swiz,SI_ftype_SI,1)
+//
+def int_hexagon_A2_swiz :
+Hexagon_si_si_Intrinsic<"HEXAGON.A2.swiz">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsathub_nopack,DI_ftype_DI,1)
+//
+def int_hexagon_S2_vsathub_nopack :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsathub.nopack">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsathb_nopack,DI_ftype_DI,1)
+//
+def int_hexagon_S2_vsathb_nopack :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsathb.nopack">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsatwh_nopack,DI_ftype_DI,1)
+//
+def int_hexagon_S2_vsatwh_nopack :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsatwh.nopack">;
+//
+// BUILTIN_INFO(HEXAGON.S2_vsatwuh_nopack,DI_ftype_DI,1)
+//
+def int_hexagon_S2_vsatwuh_nopack :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.vsatwuh.nopack">;
+//
+// BUILTIN_INFO(HEXAGON.S2_shuffob,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_shuffob :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffob">;
+//
+// BUILTIN_INFO(HEXAGON.S2_shuffeb,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_shuffeb :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffeb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_shuffoh,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_shuffoh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffoh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_shuffeh,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_shuffeh :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.shuffeh">;
+//
+// BUILTIN_INFO(HEXAGON.S2_parityp,SI_ftype_DIDI,2)
+//
+def int_hexagon_S2_parityp :
+Hexagon_si_didi_Intrinsic<"HEXAGON.S2.parityp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_lfsp,DI_ftype_DIDI,2)
+//
+def int_hexagon_S2_lfsp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S2.lfsp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_clbnorm,SI_ftype_SI,1)
+//
+def int_hexagon_S2_clbnorm :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.clbnorm">;
+//
+// BUILTIN_INFO(HEXAGON.S2_clb,SI_ftype_SI,1)
+//
+def int_hexagon_S2_clb :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.clb">;
+//
+// BUILTIN_INFO(HEXAGON.S2_cl0,SI_ftype_SI,1)
+//
+def int_hexagon_S2_cl0 :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.cl0">;
+//
+// BUILTIN_INFO(HEXAGON.S2_cl1,SI_ftype_SI,1)
+//
+def int_hexagon_S2_cl1 :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.cl1">;
+//
+// BUILTIN_INFO(HEXAGON.S2_clbp,SI_ftype_DI,1)
+//
+def int_hexagon_S2_clbp :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.clbp">;
+//
+// BUILTIN_INFO(HEXAGON.S2_cl0p,SI_ftype_DI,1)
+//
+def int_hexagon_S2_cl0p :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.cl0p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_cl1p,SI_ftype_DI,1)
+//
+def int_hexagon_S2_cl1p :
+Hexagon_si_di_Intrinsic<"HEXAGON.S2.cl1p">;
+//
+// BUILTIN_INFO(HEXAGON.S2_brev,SI_ftype_SI,1)
+//
+def int_hexagon_S2_brev :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.brev">;
+//
+// BUILTIN_INFO(HEXAGON.S2_ct0,SI_ftype_SI,1)
+//
+def int_hexagon_S2_ct0 :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.ct0">;
+//
+// BUILTIN_INFO(HEXAGON.S2_ct1,SI_ftype_SI,1)
+//
+def int_hexagon_S2_ct1 :
+Hexagon_si_si_Intrinsic<"HEXAGON.S2.ct1">;
+//
+// BUILTIN_INFO(HEXAGON.S2_interleave,DI_ftype_DI,1)
+//
+def int_hexagon_S2_interleave :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.interleave">;
+//
+// BUILTIN_INFO(HEXAGON.S2_deinterleave,DI_ftype_DI,1)
+//
+def int_hexagon_S2_deinterleave :
+Hexagon_di_di_Intrinsic<"HEXAGON.S2.deinterleave">;
+
+//
+// BUILTIN_INFO(SI_to_SXTHI_asrh,SI_ftype_SI,1)
+//
+def int_hexagon_SI_to_SXTHI_asrh :
+Hexagon_si_si_Intrinsic<"SI.to.SXTHI.asrh">;
+
+//
+// BUILTIN_INFO(HEXAGON.A4_orn,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_orn :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.orn">;
+//
+// BUILTIN_INFO(HEXAGON.A4_andn,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_andn :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.andn">;
+//
+// BUILTIN_INFO(HEXAGON.A4_orn,DI_ftype_DIDI,2)
+//
+def int_hexagon_A4_ornp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A4.ornp">;
+//
+// BUILTIN_INFO(HEXAGON.A4_andn,DI_ftype_DIDI,2)
+//
+def int_hexagon_A4_andnp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.A4.andnp">;
+//
+// BUILTIN_INFO(HEXAGON.A4_combineir,DI_ftype_sisi,2)
+//
+def int_hexagon_A4_combineir :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.A4.combineir">;
+//
+// BUILTIN_INFO(HEXAGON.A4_combineir,DI_ftype_sisi,2)
+//
+def int_hexagon_A4_combineri :
+Hexagon_di_sisi_Intrinsic<"HEXAGON.A4.combineri">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmpneq,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmpneq :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpneq">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmpneqi,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmpneqi :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpneqi">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmplte,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmplte :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplte">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmpltei,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmpltei :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmpltei">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmplteu,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmplteu :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplteu">;
+//
+// BUILTIN_INFO(HEXAGON.C4_cmplteui,QI_ftype_SISI,2)
+//
+def int_hexagon_C4_cmplteui :
+Hexagon_qi_sisi_Intrinsic<"HEXAGON.C4.cmplteui">;
+//
+// BUILTIN_INFO(HEXAGON.A4_rcmpneq,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_rcmpneq :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpneq">;
+//
+// BUILTIN_INFO(HEXAGON.A4_rcmpneqi,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_rcmpneqi :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpneqi">;
+//
+// BUILTIN_INFO(HEXAGON.A4_rcmpeq,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_rcmpeq :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpeq">;
+//
+// BUILTIN_INFO(HEXAGON.A4_rcmpeqi,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_rcmpeqi :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.rcmpeqi">;
+//
+// BUILTIN_INFO(HEXAGON.C4_fastcorner9,QI_ftype_QIQI,2)
+//
+def int_hexagon_C4_fastcorner9 :
+Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C4.fastcorner9">;
+//
+// BUILTIN_INFO(HEXAGON.C4_fastcorner9_not,QI_ftype_QIQI,2)
+//
+def int_hexagon_C4_fastcorner9_not :
+Hexagon_qi_qiqi_Intrinsic<"HEXAGON.C4.fastcorner9_not">;
+//
+// BUILTIN_INFO(HEXAGON.C4_and_andn,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_and_andn :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_andn">;
+//
+// BUILTIN_INFO(HEXAGON.C4_and_and,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_and_and :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_and">;
+//
+// BUILTIN_INFO(HEXAGON.C4_and_orn,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_and_orn :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_orn">;
+//
+// BUILTIN_INFO(HEXAGON.C4_and_or,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_and_or :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.and_or">;
+//
+// BUILTIN_INFO(HEXAGON.C4_or_andn,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_or_andn :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_andn">;
+//
+// BUILTIN_INFO(HEXAGON.C4_or_and,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_or_and :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_and">;
+//
+// BUILTIN_INFO(HEXAGON.C4_or_orn,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_or_orn :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_orn">;
+//
+// BUILTIN_INFO(HEXAGON.C4_or_or,QI_ftype_QIQIQI,3)
+//
+def int_hexagon_C4_or_or :
+Hexagon_qi_qiqiqi_Intrinsic<"HEXAGON.C4.or_or">;
+//
+// BUILTIN_INFO(HEXAGON.S4_addaddi,SI_ftype_SISISI,3)
+//
+def int_hexagon_S4_addaddi :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.addaddi">;
+//
+// BUILTIN_INFO(HEXAGON.S4_subaddi,SI_ftype_SISISI,3)
+//
+def int_hexagon_S4_subaddi :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.subaddi">;
+//
+// BUILTIN_INFO(HEXAGON.S4_andnp,DI_ftype_DIDI,2)
+//
+def int_hexagon_S4_andnp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S4.andnp">;
+//
+// BUILTIN_INFO(HEXAGON.S4_ornp,DI_ftype_DIDI,2)
+//
+def int_hexagon_S4_ornp :
+Hexagon_di_didi_Intrinsic<"HEXAGON.S4.ornp">;
+//
+// BUILTIN_INFO(HEXAGON.M4_xor_xacc,DI_ftype_DIDIDI,3)
+//
+def int_hexagon_M4_xor_xacc :
+Hexagon_di_dididi_Intrinsic<"HEXAGON.M4.xor_xacc">;
+//
+// BUILTIN_INFO(HEXAGON.M4_and_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_and_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_and">;
+//
+// BUILTIN_INFO(HEXAGON.M4_and_andn,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_and_andn :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_andn">;
+//
+// BUILTIN_INFO(HEXAGON.M4_and_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_and_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_or">;
+//
+// BUILTIN_INFO(HEXAGON.M4_and_xor,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_and_xor :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.and_xor">;
+//
+// BUILTIN_INFO(HEXAGON.M4_xor_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_xor_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor_or">;
+//
+// BUILTIN_INFO(HEXAGON.M4_xor_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_xor_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor_and">;
+//
+// BUILTIN_INFO(HEXAGON.M4_xor_andn,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_xor_andn :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.xor_andn">;
+//
+// BUILTIN_INFO(HEXAGON.M4_or_and,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_or_and :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_and">;
+//
+// BUILTIN_INFO(HEXAGON.M4_or_or,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_or_or :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_or">;
+//
+// BUILTIN_INFO(HEXAGON.M4_or_xor,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_or_xor :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_xor">;
+//
+// BUILTIN_INFO(HEXAGON.M4_or_andn,SI_ftype_SISISI,3)
+//
+def int_hexagon_M4_or_andn :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.M4.or_andn">;
+//
+// BUILTIN_INFO(HEXAGON.S4_or_andix,SI_ftype_SISISI,3)
+//
+def int_hexagon_S4_or_andix :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or_andix">;
+//
+// BUILTIN_INFO(HEXAGON.S4_or_andi,SI_ftype_SISISI,3)
+//
+def int_hexagon_S4_or_andi :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or_andi">;
+//
+// BUILTIN_INFO(HEXAGON.S4_or_ori,SI_ftype_SISISI,3)
+//
+def int_hexagon_S4_or_ori :
+Hexagon_si_sisisi_Intrinsic<"HEXAGON.S4.or_ori">;
+//
+// BUILTIN_INFO(HEXAGON.A4_modwrapu,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_modwrapu :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.modwrapu">;
+//
+// BUILTIN_INFO(HEXAGON.A4_cround_ri,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_cround_ri :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.cround_ri">;
+//
+// BUILTIN_INFO(HEXAGON.A4_cround_rr,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_cround_rr :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.cround_rr">;
+//
+// BUILTIN_INFO(HEXAGON.A4_round_ri,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_round_ri :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_ri">;
+//
+// BUILTIN_INFO(HEXAGON.A4_round_rr,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_round_rr :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_rr">;
+//
+// BUILTIN_INFO(HEXAGON.A4_round_ri_sat,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_round_ri_sat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_ri_sat">;
+//
+// BUILTIN_INFO(HEXAGON.A4_round_rr_sat,SI_ftype_SISI,2)
+//
+def int_hexagon_A4_round_rr_sat :
+Hexagon_si_sisi_Intrinsic<"HEXAGON.A4.round_rr_sat">;
diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td
index d445a010cefd..a6fda4a3afc7 100644
--- a/include/llvm/IntrinsicsX86.td
+++ b/include/llvm/IntrinsicsX86.td
@@ -145,10 +145,10 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Comparison ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_cmp_ss :
+  def int_x86_sse_cmp_ss : GCCBuiltin<"__builtin_ia32_cmpss">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_sse_cmp_ps :
+  def int_x86_sse_cmp_ps : GCCBuiltin<"__builtin_ia32_cmpps">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">,
@@ -281,10 +281,10 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // FP comparison ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_cmp_sd :
+  def int_x86_sse2_cmp_sd : GCCBuiltin<"__builtin_ia32_cmpsd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_cmp_pd :
+  def int_x86_sse2_cmp_pd : GCCBuiltin<"__builtin_ia32_cmppd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">,
@@ -452,28 +452,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_i32_ty], [IntrNoMem]>;
 }
 
-// Integer comparison ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
-                         llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_sse2_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_sse2_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
-}
-
 // Conversion ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">,
@@ -627,8 +605,8 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_phadd_sw_128    : GCCBuiltin<"__builtin_ia32_phaddsw128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
 
   def int_x86_ssse3_phsub_w         : GCCBuiltin<"__builtin_ia32_phsubw">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
@@ -655,8 +633,8 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                          llvm_x86mmx_ty], [IntrNoMem]>;
   def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
 }
 
 // Packed multiply high with round and scale
@@ -792,12 +770,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Vector compare, min, max
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse41_pcmpeqq         : GCCBuiltin<"__builtin_ia32_pcmpeqq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem, Commutative]>;
-  def int_x86_sse42_pcmpgtq         : GCCBuiltin<"__builtin_ia32_pcmpgtq">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
-                        [IntrNoMem]>;
   def int_x86_sse41_pmaxsb          : GCCBuiltin<"__builtin_ia32_pmaxsb128">,
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
                         [IntrNoMem, Commutative]>;
@@ -919,7 +891,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Vector sum of absolute differences
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_mpsadbw         : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty],
+          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty],
                     [IntrNoMem, Commutative]>;
 }
 
@@ -932,13 +904,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Test instruction with bitwise comparison.
 let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
   def int_x86_sse41_ptestz          : GCCBuiltin<"__builtin_ia32_ptestz128">,
-          Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+          Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
                     [IntrNoMem]>;
   def int_x86_sse41_ptestc          : GCCBuiltin<"__builtin_ia32_ptestc128">,
-          Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+          Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
                     [IntrNoMem]>;
   def int_x86_sse41_ptestnzc        : GCCBuiltin<"__builtin_ia32_ptestnzc128">,
-          Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+          Intrinsic<[llvm_i32_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
                     [IntrNoMem]>;
 }
 
@@ -1120,17 +1092,17 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
                   llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx_vpermil_pd : GCCBuiltin<"__builtin_ia32_vpermilpd">,
+  def int_x86_avx_vpermil_pd :
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx_vpermil_ps : GCCBuiltin<"__builtin_ia32_vpermilps">,
+  def int_x86_avx_vpermil_ps :
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx_vpermil_pd_256 : GCCBuiltin<"__builtin_ia32_vpermilpd256">,
+  def int_x86_avx_vpermil_pd_256 :
         Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
                   llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx_vpermil_ps_256 : GCCBuiltin<"__builtin_ia32_vpermilps256">,
+  def int_x86_avx_vpermil_ps_256 :
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
 }
@@ -1281,13 +1253,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Vector load with broadcast
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_vbroadcastss :
+  def int_x86_avx_vbroadcast_ss :
         GCCBuiltin<"__builtin_ia32_vbroadcastss">,
         Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
   def int_x86_avx_vbroadcast_sd_256 :
         GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
         Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
-  def int_x86_avx_vbroadcastss_256 :
+  def int_x86_avx_vbroadcast_ss_256 :
         GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
         Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
   def int_x86_avx_vbroadcastf128_pd_256 :
@@ -1300,12 +1272,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // SIMD load ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_loadu_pd_256 : GCCBuiltin<"__builtin_ia32_loadupd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
-  def int_x86_avx_loadu_ps_256 : GCCBuiltin<"__builtin_ia32_loadups256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
-  def int_x86_avx_loadu_dq_256 : GCCBuiltin<"__builtin_ia32_loaddqu256">,
-        Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
   def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">,
         Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
 }
@@ -1361,6 +1327,1046 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 
 //===----------------------------------------------------------------------===//
+// AVX2
+
+// Integer arithmetic ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmul_dq : GCCBuiltin<"__builtin_ia32_pmuldq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+}
+
+// Vector min, max
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmaxu_w : GCCBuiltin<"__builtin_ia32_pmaxuw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmaxu_d : GCCBuiltin<"__builtin_ia32_pmaxud256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmaxs_b : GCCBuiltin<"__builtin_ia32_pmaxsb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmaxs_d : GCCBuiltin<"__builtin_ia32_pmaxsd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pminu_w : GCCBuiltin<"__builtin_ia32_pminuw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pminu_d : GCCBuiltin<"__builtin_ia32_pminud256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmins_b : GCCBuiltin<"__builtin_ia32_pminsb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_pmins_d : GCCBuiltin<"__builtin_ia32_pminsd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem, Commutative]>;
+}
+
+// Integer shift ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_psll_d : GCCBuiltin<"__builtin_ia32_pslld256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psll_q : GCCBuiltin<"__builtin_ia32_psllq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx2_psra_w : GCCBuiltin<"__builtin_ia32_psraw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_psra_d : GCCBuiltin<"__builtin_ia32_psrad256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi256_byteshift">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi256_byteshift">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Pack ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_packsswb : GCCBuiltin<"__builtin_ia32_packsswb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_packssdw : GCCBuiltin<"__builtin_ia32_packssdw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_packuswb : GCCBuiltin<"__builtin_ia32_packuswb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_packusdw : GCCBuiltin<"__builtin_ia32_packusdw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+}
+
+// Absolute value ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
+}
+
+// Horizontal arithmetic ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+}
+
+// Sign ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                         llvm_v8i32_ty], [IntrNoMem]>;
+}
+
+// Packed multiply high with round and scale
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v16i16_ty], [IntrNoMem, Commutative]>;
+}
+
+// Vector sign and zero extend
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector blend
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pblendvb : GCCBuiltin<"__builtin_ia32_pblendvb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Vector load with broadcast
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_vbroadcast_ss_ps :
+              GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx2_vbroadcast_sd_pd_256 :
+              GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
+              Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx2_vbroadcast_ss_ps_256 :
+              GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx2_vbroadcasti128 :
+              GCCBuiltin<"__builtin_ia32_vbroadcastsi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_avx2_pbroadcastb_128 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastb_256 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastw_128 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastw_256 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastd_128 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastd_256 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastd256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastq_128 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx2_pbroadcastq_256 :
+              GCCBuiltin<"__builtin_ia32_pbroadcastq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
+}
+
+// Vector permutation
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_permd : GCCBuiltin<"__builtin_ia32_permvarsi256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_permq : GCCBuiltin<"__builtin_ia32_permdi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_permpd : GCCBuiltin<"__builtin_ia32_permdf256">,
+              Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector extract and insert
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_vextracti128 : GCCBuiltin<"__builtin_ia32_extract128i256">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty,
+                         llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_vinserti128 : GCCBuiltin<"__builtin_ia32_insert128i256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Conditional load ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty], [IntrReadMem]>;
+  def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty], [IntrReadMem]>;
+  def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadMem]>;
+  def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">,
+        Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadMem]>;
+}
+
+// Conditional store ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], []>;
+  def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>;
+  def int_x86_avx2_maskstore_d_256 :
+        GCCBuiltin<"__builtin_ia32_maskstored256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty], []>;
+  def int_x86_avx2_maskstore_q_256 :
+        GCCBuiltin<"__builtin_ia32_maskstoreq256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], []>;
+}
+
+// Variable bit shift ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_psllv_d : GCCBuiltin<"__builtin_ia32_psllv4si">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psllv_d_256 : GCCBuiltin<"__builtin_ia32_psllv8si">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psllv_q : GCCBuiltin<"__builtin_ia32_psllv2di">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psllv_q_256 : GCCBuiltin<"__builtin_ia32_psllv4di">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psrlv_d_256 : GCCBuiltin<"__builtin_ia32_psrlv8si">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv2di">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psrlv_q_256 : GCCBuiltin<"__builtin_ia32_psrlv4di">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx2_psrav_d_256 : GCCBuiltin<"__builtin_ia32_psrav8si">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+                        [IntrNoMem]>;
+}
+
+// Misc.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
+              Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
+                         llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx2_mpsadbw : GCCBuiltin<"__builtin_ia32_mpsadbw256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
+                         llvm_i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_avx2_movntdqa : GCCBuiltin<"__builtin_ia32_movntdqa256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// FMA4
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_fma4_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_pd : GCCBuiltin<"__builtin_ia32_vfmaddpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfmaddps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfmaddpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_ss : GCCBuiltin<"__builtin_ia32_vfmsubss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_sd : GCCBuiltin<"__builtin_ia32_vfmsubsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_ps : GCCBuiltin<"__builtin_ia32_vfmsubps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_pd : GCCBuiltin<"__builtin_ia32_vfmsubpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfmsubps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfmsubpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_ss : GCCBuiltin<"__builtin_ia32_vfnmaddss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_sd : GCCBuiltin<"__builtin_ia32_vfnmaddsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_ps : GCCBuiltin<"__builtin_ia32_vfnmaddps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_pd : GCCBuiltin<"__builtin_ia32_vfnmaddpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmaddps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmadd_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmaddpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_ss : GCCBuiltin<"__builtin_ia32_vfnmsubss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_sd : GCCBuiltin<"__builtin_ia32_vfnmsubsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_ps : GCCBuiltin<"__builtin_ia32_vfnmsubps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_pd : GCCBuiltin<"__builtin_ia32_vfnmsubpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_ps_256 : GCCBuiltin<"__builtin_ia32_vfnmsubps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfnmsub_pd_256 : GCCBuiltin<"__builtin_ia32_vfnmsubpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmaddsub_ps : GCCBuiltin<"__builtin_ia32_vfmaddsubps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmaddsub_pd : GCCBuiltin<"__builtin_ia32_vfmaddsubpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmaddsub_ps_256 :
+               GCCBuiltin<"__builtin_ia32_vfmaddsubps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmaddsub_pd_256 :
+              GCCBuiltin<"__builtin_ia32_vfmaddsubpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsubadd_ps : GCCBuiltin<"__builtin_ia32_vfmsubaddps">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsubadd_pd : GCCBuiltin<"__builtin_ia32_vfmsubaddpd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsubadd_ps_256 :
+              GCCBuiltin<"__builtin_ia32_vfmsubaddps256">,
+              Intrinsic<[llvm_v8f32_ty],
+                        [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmsubadd_pd_256 :
+              GCCBuiltin<"__builtin_ia32_vfmsubaddpd256">,
+              Intrinsic<[llvm_v4f64_ty],
+                        [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty],
+                        [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// XOP
+
+  def int_x86_xop_vpermil2pd : GCCBuiltin<"__builtin_ia32_vpermil2pd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+                                          llvm_v2f64_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_xop_vpermil2pd_256 :
+              GCCBuiltin<"__builtin_ia32_vpermil2pd256">,
+              Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+                                          llvm_v4f64_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_xop_vpermil2ps : GCCBuiltin<"__builtin_ia32_vpermil2ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+                                          llvm_v4f32_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpermil2ps_256 :
+              GCCBuiltin<"__builtin_ia32_vpermil2ps256">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+                                          llvm_v8f32_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_xop_vfrcz_pd :
+              GCCBuiltin<"__builtin_ia32_vfrczpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_xop_vfrcz_ps :
+              GCCBuiltin<"__builtin_ia32_vfrczps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_xop_vfrcz_sd :
+              GCCBuiltin<"__builtin_ia32_vfrczsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vfrcz_ss :
+              GCCBuiltin<"__builtin_ia32_vfrczss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vfrcz_pd_256 :
+              GCCBuiltin<"__builtin_ia32_vfrczpd256">,
+              Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_xop_vfrcz_ps_256 :
+              GCCBuiltin<"__builtin_ia32_vfrczps256">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_xop_vpcmov :
+              GCCBuiltin<"__builtin_ia32_vpcmov">,
+              Intrinsic<[llvm_v2i64_ty],
+                        [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcmov_256 :
+              GCCBuiltin<"__builtin_ia32_vpcmov_256">,
+              Intrinsic<[llvm_v4i64_ty],
+                        [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomeqb :
+              GCCBuiltin<"__builtin_ia32_vpcomeqb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomeqw :
+              GCCBuiltin<"__builtin_ia32_vpcomeqw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomeqd :
+              GCCBuiltin<"__builtin_ia32_vpcomeqd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomeqq :
+              GCCBuiltin<"__builtin_ia32_vpcomeqq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomequb :
+              GCCBuiltin<"__builtin_ia32_vpcomequb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomequd :
+              GCCBuiltin<"__builtin_ia32_vpcomequd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomequq :
+              GCCBuiltin<"__builtin_ia32_vpcomequq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomequw :
+              GCCBuiltin<"__builtin_ia32_vpcomequw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomfalseb :
+              GCCBuiltin<"__builtin_ia32_vpcomfalseb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomfalsed :
+              GCCBuiltin<"__builtin_ia32_vpcomfalsed">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomfalseq :
+              GCCBuiltin<"__builtin_ia32_vpcomfalseq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomfalseub :
+              GCCBuiltin<"__builtin_ia32_vpcomfalseub">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomfalseud :
+              GCCBuiltin<"__builtin_ia32_vpcomfalseud">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomfalseuq :
+              GCCBuiltin<"__builtin_ia32_vpcomfalseuq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomfalseuw :
+              GCCBuiltin<"__builtin_ia32_vpcomfalseuw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomfalsew :
+              GCCBuiltin<"__builtin_ia32_vpcomfalsew">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgeb :
+              GCCBuiltin<"__builtin_ia32_vpcomgeb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomged :
+              GCCBuiltin<"__builtin_ia32_vpcomged">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgeq :
+              GCCBuiltin<"__builtin_ia32_vpcomgeq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgeub :
+              GCCBuiltin<"__builtin_ia32_vpcomgeub">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgeud :
+              GCCBuiltin<"__builtin_ia32_vpcomgeud">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgeuq :
+              GCCBuiltin<"__builtin_ia32_vpcomgeuq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgeuw :
+              GCCBuiltin<"__builtin_ia32_vpcomgeuw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgew :
+              GCCBuiltin<"__builtin_ia32_vpcomgew">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgtb :
+              GCCBuiltin<"__builtin_ia32_vpcomgtb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgtd :
+              GCCBuiltin<"__builtin_ia32_vpcomgtd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgtq :
+              GCCBuiltin<"__builtin_ia32_vpcomgtq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgtub :
+              GCCBuiltin<"__builtin_ia32_vpcomgtub">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgtud :
+              GCCBuiltin<"__builtin_ia32_vpcomgtud">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgtuq :
+              GCCBuiltin<"__builtin_ia32_vpcomgtuq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgtuw :
+              GCCBuiltin<"__builtin_ia32_vpcomgtuw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomgtw :
+              GCCBuiltin<"__builtin_ia32_vpcomgtw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomleb :
+              GCCBuiltin<"__builtin_ia32_vpcomleb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomled :
+              GCCBuiltin<"__builtin_ia32_vpcomled">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomleq :
+              GCCBuiltin<"__builtin_ia32_vpcomleq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomleub :
+              GCCBuiltin<"__builtin_ia32_vpcomleub">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomleud :
+              GCCBuiltin<"__builtin_ia32_vpcomleud">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomleuq :
+              GCCBuiltin<"__builtin_ia32_vpcomleuq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomleuw :
+              GCCBuiltin<"__builtin_ia32_vpcomleuw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomlew :
+              GCCBuiltin<"__builtin_ia32_vpcomlew">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomltb :
+              GCCBuiltin<"__builtin_ia32_vpcomltb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomltd :
+              GCCBuiltin<"__builtin_ia32_vpcomltd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomltq :
+              GCCBuiltin<"__builtin_ia32_vpcomltq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomltub :
+              GCCBuiltin<"__builtin_ia32_vpcomltub">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomltud :
+              GCCBuiltin<"__builtin_ia32_vpcomltud">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomltuq :
+              GCCBuiltin<"__builtin_ia32_vpcomltuq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomltuw :
+              GCCBuiltin<"__builtin_ia32_vpcomltuw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomltw :
+              GCCBuiltin<"__builtin_ia32_vpcomltw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomneb :
+              GCCBuiltin<"__builtin_ia32_vpcomneb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomned :
+              GCCBuiltin<"__builtin_ia32_vpcomned">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomneq :
+              GCCBuiltin<"__builtin_ia32_vpcomneq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomneub :
+              GCCBuiltin<"__builtin_ia32_vpcomneub">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomneud :
+              GCCBuiltin<"__builtin_ia32_vpcomneud">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomneuq :
+              GCCBuiltin<"__builtin_ia32_vpcomneuq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomneuw :
+              GCCBuiltin<"__builtin_ia32_vpcomneuw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomnew :
+              GCCBuiltin<"__builtin_ia32_vpcomnew">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomtrueb :
+              GCCBuiltin<"__builtin_ia32_vpcomtrueb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomtrued :
+              GCCBuiltin<"__builtin_ia32_vpcomtrued">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomtrueq :
+              GCCBuiltin<"__builtin_ia32_vpcomtrueq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomtrueub :
+              GCCBuiltin<"__builtin_ia32_vpcomtrueub">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomtrueud :
+              GCCBuiltin<"__builtin_ia32_vpcomtrueud">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomtrueuq :
+              GCCBuiltin<"__builtin_ia32_vpcomtrueuq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomtrueuw :
+              GCCBuiltin<"__builtin_ia32_vpcomtrueuw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpcomtruew :
+              GCCBuiltin<"__builtin_ia32_vpcomtruew">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vphaddbd :
+              GCCBuiltin<"__builtin_ia32_vphaddbd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddbq :
+              GCCBuiltin<"__builtin_ia32_vphaddbq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddbw :
+              GCCBuiltin<"__builtin_ia32_vphaddbw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphadddq :
+              GCCBuiltin<"__builtin_ia32_vphadddq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddubd :
+              GCCBuiltin<"__builtin_ia32_vphaddubd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddubq :
+              GCCBuiltin<"__builtin_ia32_vphaddubq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddubw :
+              GCCBuiltin<"__builtin_ia32_vphaddubw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddudq :
+              GCCBuiltin<"__builtin_ia32_vphaddudq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_xop_vphadduwd :
+              GCCBuiltin<"__builtin_ia32_vphadduwd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_xop_vphadduwq :
+              GCCBuiltin<"__builtin_ia32_vphadduwq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddwd :
+              GCCBuiltin<"__builtin_ia32_vphaddwd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_xop_vphaddwq :
+              GCCBuiltin<"__builtin_ia32_vphaddwq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_xop_vphsubbw :
+              GCCBuiltin<"__builtin_ia32_vphsubbw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_xop_vphsubdq :
+              GCCBuiltin<"__builtin_ia32_vphsubdq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_xop_vphsubwd :
+              GCCBuiltin<"__builtin_ia32_vphsubwd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_xop_vpmacsdd :
+              GCCBuiltin<"__builtin_ia32_vpmacsdd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacsdqh :
+              GCCBuiltin<"__builtin_ia32_vpmacsdqh">,
+              Intrinsic<[llvm_v2i64_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacsdql :
+              GCCBuiltin<"__builtin_ia32_vpmacsdql">,
+              Intrinsic<[llvm_v2i64_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacssdd :
+              GCCBuiltin<"__builtin_ia32_vpmacssdd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacssdqh :
+              GCCBuiltin<"__builtin_ia32_vpmacssdqh">,
+              Intrinsic<[llvm_v2i64_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacssdql :
+              GCCBuiltin<"__builtin_ia32_vpmacssdql">,
+              Intrinsic<[llvm_v2i64_ty],
+                        [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacsswd :
+              GCCBuiltin<"__builtin_ia32_vpmacsswd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacssww :
+              GCCBuiltin<"__builtin_ia32_vpmacssww">,
+              Intrinsic<[llvm_v8i16_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacswd :
+              GCCBuiltin<"__builtin_ia32_vpmacswd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmacsww :
+              GCCBuiltin<"__builtin_ia32_vpmacsww">,
+              Intrinsic<[llvm_v8i16_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmadcsswd :
+              GCCBuiltin<"__builtin_ia32_vpmadcsswd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpmadcswd :
+              GCCBuiltin<"__builtin_ia32_vpmadcswd">,
+              Intrinsic<[llvm_v4i32_ty],
+                        [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpperm :
+              GCCBuiltin<"__builtin_ia32_vpperm">,
+              Intrinsic<[llvm_v16i8_ty],
+                        [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotb :
+              GCCBuiltin<"__builtin_ia32_vprotb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotd :
+              GCCBuiltin<"__builtin_ia32_vprotd">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotq :
+              GCCBuiltin<"__builtin_ia32_vprotq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vprotw :
+              GCCBuiltin<"__builtin_ia32_vprotw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshab :
+              GCCBuiltin<"__builtin_ia32_vpshab">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshad :
+              GCCBuiltin<"__builtin_ia32_vpshad">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshaq :
+              GCCBuiltin<"__builtin_ia32_vpshaq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshaw :
+              GCCBuiltin<"__builtin_ia32_vpshaw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshlb :
+              GCCBuiltin<"__builtin_ia32_vpshlb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshld :
+              GCCBuiltin<"__builtin_ia32_vpshld">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshlq :
+              GCCBuiltin<"__builtin_ia32_vpshlq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_xop_vpshlw :
+              GCCBuiltin<"__builtin_ia32_vpshlw">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+
+//===----------------------------------------------------------------------===//
 // MMX
 
 // Empty MMX state op.
@@ -1587,13 +2593,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
   def int_x86_mmx_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
-                         llvm_x86mmx_ty], [IntrNoMem]>;
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
 
   def int_x86_mmx_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
@@ -1629,3 +2635,63 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 }
+
+//===----------------------------------------------------------------------===//
+// BMI
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_bmi_bextr_32 : GCCBuiltin<"__builtin_ia32_bextr_u32">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_bmi_bextr_64 : GCCBuiltin<"__builtin_ia32_bextr_u64">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_bmi_bzhi_32 : GCCBuiltin<"__builtin_ia32_bzhi_si">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_bmi_bzhi_64 : GCCBuiltin<"__builtin_ia32_bzhi_di">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_bmi_pdep_32 : GCCBuiltin<"__builtin_ia32_pdep_si">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_bmi_pdep_64 : GCCBuiltin<"__builtin_ia32_pdep_di">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_bmi_pext_32 : GCCBuiltin<"__builtin_ia32_pext_si">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_bmi_pext_64 : GCCBuiltin<"__builtin_ia32_pext_di">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// FS/GS Base
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_rdfsbase_32 : GCCBuiltin<"__builtin_ia32_rdfsbase32">,
+              Intrinsic<[llvm_i32_ty], []>;
+  def int_x86_rdgsbase_32 : GCCBuiltin<"__builtin_ia32_rdgsbase32">,
+              Intrinsic<[llvm_i32_ty], []>;
+  def int_x86_rdfsbase_64 : GCCBuiltin<"__builtin_ia32_rdfsbase64">,
+              Intrinsic<[llvm_i64_ty], []>;
+  def int_x86_rdgsbase_64 : GCCBuiltin<"__builtin_ia32_rdgsbase64">,
+              Intrinsic<[llvm_i64_ty], []>;
+  def int_x86_wrfsbase_32 : GCCBuiltin<"__builtin_ia32_wrfsbase32">,
+              Intrinsic<[], [llvm_i32_ty]>;
+  def int_x86_wrgsbase_32 : GCCBuiltin<"__builtin_ia32_wrgsbase32">,
+              Intrinsic<[], [llvm_i32_ty]>;
+  def int_x86_wrfsbase_64 : GCCBuiltin<"__builtin_ia32_wrfsbase64">,
+              Intrinsic<[], [llvm_i64_ty]>;
+  def int_x86_wrgsbase_64 : GCCBuiltin<"__builtin_ia32_wrgsbase64">,
+              Intrinsic<[], [llvm_i64_ty]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Half float conversion
+
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+}
diff --git a/include/llvm/LLVMContext.h b/include/llvm/LLVMContext.h
index 65146c31aaa3..18adcd1e3c23 100644
--- a/include/llvm/LLVMContext.h
+++ b/include/llvm/LLVMContext.h
@@ -19,6 +19,7 @@ namespace llvm {
 
 class LLVMContextImpl;
 class StringRef;
+class Twine;
 class Instruction;
 class Module;
 class SMDiagnostic;
@@ -40,7 +41,9 @@ public:
   enum {
     MD_dbg = 0,  // "dbg"
     MD_tbaa = 1, // "tbaa"
-    MD_prof = 2  // "prof"
+    MD_prof = 2,  // "prof"
+    MD_fpaccuracy = 3,  // "fpaccuracy"
+    MD_range = 4 // "range"
   };
   
   /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
@@ -79,9 +82,9 @@ public:
   /// be prepared to drop the erroneous construct on the floor and "not crash".
   /// The generated code need not be correct.  The error message will be
   /// implicitly prefixed with "error: " and should not end with a ".".
-  void emitError(unsigned LocCookie, StringRef ErrorStr);
-  void emitError(const Instruction *I, StringRef ErrorStr);
-  void emitError(StringRef ErrorStr);
+  void emitError(unsigned LocCookie, const Twine &ErrorStr);
+  void emitError(const Instruction *I, const Twine &ErrorStr);
+  void emitError(const Twine &ErrorStr);
 
 private:
   // DO NOT IMPLEMENT
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index f690d045d172..2258d45ce90a 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -31,6 +31,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include <cstdlib>
 
@@ -69,7 +70,7 @@ namespace {
       (void) llvm::createEdgeProfilerPass();
       (void) llvm::createOptimalEdgeProfilerPass();
       (void) llvm::createPathProfilerPass();
-      (void) llvm::createGCOVProfilerPass(true, true, false);
+      (void) llvm::createGCOVProfilerPass();
       (void) llvm::createFunctionInliningPass();
       (void) llvm::createAlwaysInlinerPass();
       (void) llvm::createGlobalDCEPass();
@@ -97,6 +98,7 @@ namespace {
       (void) llvm::createNoAAPass();
       (void) llvm::createNoProfileInfoPass();
       (void) llvm::createObjCARCAliasAnalysisPass();
+      (void) llvm::createObjCARCAPElimPass();
       (void) llvm::createObjCARCExpandPass();
       (void) llvm::createObjCARCContractPass();
       (void) llvm::createObjCARCOptPass();
@@ -150,6 +152,7 @@ namespace {
       (void) llvm::createCorrelatedValuePropagationPass();
       (void) llvm::createMemDepPrinter();
       (void) llvm::createInstructionSimplifierPass();
+      (void) llvm::createBBVectorizePass();
 
       (void)new llvm::IntervalPartition();
       (void)new llvm::FindUsedTypes();
diff --git a/include/llvm/Linker.h b/include/llvm/Linker.h
index 88908fbd72a7..1ebcd6b53863 100644
--- a/include/llvm/Linker.h
+++ b/include/llvm/Linker.h
@@ -15,14 +15,15 @@
 #define LLVM_LINKER_H
 
 #include <memory>
+#include <string>
 #include <vector>
-#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
   namespace sys { class Path; }
 
 class Module;
 class LLVMContext;
+class StringRef;
 
 /// This class provides the core functionality of linking in LLVM. It retains a
 /// Module object which is the composite of the modules and libraries linked
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index 4a0cf37a6eb2..05e6286b7cc5 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -12,17 +12,20 @@
 
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCFixup.h"
-#include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
+class MCAsmLayout;
+class MCAssembler;
 class MCELFObjectTargetWriter;
-class MCFixup;
+struct MCFixupKindInfo;
+class MCFragment;
 class MCInst;
+class MCInstFragment;
 class MCObjectWriter;
 class MCSection;
-template<typename T>
-class SmallVectorImpl;
+class MCValue;
 class raw_ostream;
 
 /// MCAsmBackend - Generic interface to target specific assembler backends.
@@ -44,8 +47,8 @@ public:
   /// createELFObjectTargetWriter - Create a new ELFObjectTargetWriter to enable
   /// non-standard ELFObjectWriters.
   virtual  MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
-    assert(0 && "createELFObjectTargetWriter is not supported by asm backend");
-    return 0;
+    llvm_unreachable("createELFObjectTargetWriter is not supported by asm "
+                     "backend");
   }
 
   /// hasReliableSymbolDifference - Check whether this target implements
@@ -85,12 +88,21 @@ public:
   /// getFixupKindInfo - Get information on a fixup kind.
   virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const;
 
+  /// processFixupValue - Target hook to adjust the literal value of a fixup
+  /// if necessary. IsResolved signals whether the caller believes a relocation
+  /// is needed; the target can modify the value. The default does nothing.
+  virtual void processFixupValue(const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFixup &Fixup, const MCFragment *DF,
+                                 MCValue &Target, uint64_t &Value,
+                                 bool &IsResolved) {}
+
   /// @}
 
-  /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
+  /// applyFixup - Apply the \arg Value for given \arg Fixup into the provided
   /// data fragment, at the offset specified by the fixup and following the
   /// fixup kind as appropriate.
-  virtual void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+  virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                           uint64_t Value) const = 0;
 
   /// @}
@@ -98,11 +110,18 @@ public:
   /// @name Target Relaxation Interfaces
   /// @{
 
-  /// MayNeedRelaxation - Check whether the given instruction may need
+  /// mayNeedRelaxation - Check whether the given instruction may need
   /// relaxation.
   ///
   /// \param Inst - The instruction to test.
-  virtual bool MayNeedRelaxation(const MCInst &Inst) const = 0;
+  virtual bool mayNeedRelaxation(const MCInst &Inst) const = 0;
+
+  /// fixupNeedsRelaxation - Target specific predicate for whether a given
+  /// fixup requires the associated instruction to be relaxed.
+  virtual bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                                    uint64_t Value,
+                                    const MCInstFragment *DF,
+                                    const MCAsmLayout &Layout) const = 0;
 
   /// RelaxInstruction - Relax the instruction in the given fragment to the next
   /// wider instruction.
@@ -110,20 +129,20 @@ public:
   /// \param Inst - The instruction to relax, which may be the same as the
   /// output.
   /// \parm Res [output] - On return, the relaxed instruction.
-  virtual void RelaxInstruction(const MCInst &Inst, MCInst &Res) const = 0;
+  virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const = 0;
 
   /// @}
 
-  /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given
+  /// writeNopData - Write an (optimal) nop sequence of Count bytes to the given
   /// output. If the target cannot generate such a sequence, it should return an
   /// error.
   ///
   /// \return - True on success.
-  virtual bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const = 0;
+  virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const = 0;
 
-  /// HandleAssemblerFlag - Handle any target-specific assembler flags.
+  /// handleAssemblerFlag - Handle any target-specific assembler flags.
   /// By default, do nothing.
-  virtual void HandleAssemblerFlag(MCAssemblerFlag Flag) {}
+  virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {}
 };
 
 } // End llvm namespace
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index c3c296e23dc1..0f67c993714c 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -36,10 +36,6 @@ namespace llvm {
     enum LCOMMType { None, NoAlignment, ByteAlignment };
   }
 
-  namespace Structors {
-    enum OutputOrder { None, PriorityOrder, ReversePriorityOrder };
-  }
-
   /// MCAsmInfo - This class is intended to be used as a base class for asm
   /// properties and features specific to the target.
   class MCAsmInfo {
@@ -47,7 +43,7 @@ namespace llvm {
     //===------------------------------------------------------------------===//
     // Properties to be set by the target writer, used to configure asm printer.
     //
-    
+
     /// PointerSize - Pointer size in bytes.
     ///               Default is 4.
     unsigned PointerSize;
@@ -72,11 +68,6 @@ namespace llvm {
     /// the macho-specific .tbss directive for emitting thread local BSS Symbols
     bool HasMachoTBSSDirective;                 // Default is false.
 
-    /// StructorOutputOrder - Whether the static ctor/dtor list should be output
-    /// in no particular order, in order of increasing priority or the reverse:
-    /// in order of decreasing priority (the default).
-    Structors::OutputOrder StructorOutputOrder; // Default is reverse order.
-
     /// HasStaticCtorDtorReferenceInStaticMode - True if the compiler should
     /// emit a ".reference .constructors_used" or ".reference .destructors_used"
     /// directive after the a static ctor/dtor list.  This directive is only
@@ -152,6 +143,10 @@ namespace llvm {
     /// symbol names.  This defaults to true.
     bool AllowPeriodsInName;
 
+    /// AllowUTF8 - This is true if the assembler accepts UTF-8 input.
+    // FIXME: Make this a more general encoding setting?
+    bool AllowUTF8;
+
     //===--- Data Emission Directives -------------------------------------===//
 
     /// ZeroDirective - this should be set to the directive used to get some
@@ -189,6 +184,11 @@ namespace llvm {
     const char *JT32Begin;                   // Defaults to "$a."
     bool SupportsDataRegions;
 
+    /// GPRel64Directive - if non-null, a directive that is used to emit a word
+    /// which should be relocated as a 64-bit GP-relative offset, e.g. .gpdword
+    /// on Mips.
+    const char *GPRel64Directive;            // Defaults to NULL.
+
     /// GPRel32Directive - if non-null, a directive that is used to emit a word
     /// which should be relocated as a 32-bit GP-relative offset, e.g. .gpword
     /// on Mips or .gprel32 on Alpha.
@@ -323,13 +323,17 @@ namespace llvm {
     const char* DwarfSectionOffsetDirective; // Defaults to NULL
 
     /// DwarfRequiresRelocationForSectionOffset - True if we need to produce a
-    // relocation when we want a section offset in dwarf.
+    /// relocation when we want a section offset in dwarf.
     bool DwarfRequiresRelocationForSectionOffset;  // Defaults to true;
 
-    // DwarfUsesLabelOffsetDifference - True if Dwarf2 output can
-    // use EmitLabelOffsetDifference.
+    /// DwarfUsesLabelOffsetDifference - True if Dwarf2 output can
+    /// use EmitLabelOffsetDifference.
     bool DwarfUsesLabelOffsetForRanges;
 
+    /// DwarfUsesRelocationsForStringPool - True if this Dwarf output must use
+    /// relocations to refer to entries in the string pool.
+    bool DwarfUsesRelocationsForStringPool;
+
     /// DwarfRegNumForCFI - True if dwarf register numbers are printed
     /// instead of symbolic register names in .cfi_* directives.
     bool DwarfRegNumForCFI;  // Defaults to false;
@@ -381,6 +385,7 @@ namespace llvm {
     const char *getData64bitsDirective(unsigned AS = 0) const {
       return AS == 0 ? Data64bitsDirective : getDataASDirective(64, AS);
     }
+    const char *getGPRel64Directive() const { return GPRel64Directive; }
     const char *getGPRel32Directive() const { return GPRel32Directive; }
 
     /// [Code|Data]Begin label name accessors.
@@ -424,9 +429,6 @@ namespace llvm {
     //
     bool hasMachoZeroFillDirective() const { return HasMachoZeroFillDirective; }
     bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; }
-    Structors::OutputOrder getStructorOutputOrder() const {
-      return StructorOutputOrder;
-    }
     bool hasStaticCtorDtorReferenceInStaticMode() const {
       return HasStaticCtorDtorReferenceInStaticMode;
     }
@@ -487,6 +489,9 @@ namespace llvm {
     bool doesAllowPeriodsInName() const {
       return AllowPeriodsInName;
     }
+    bool doesAllowUTF8() const {
+      return AllowUTF8;
+    }
     const char *getZeroDirective() const {
       return ZeroDirective;
     }
@@ -554,7 +559,7 @@ namespace llvm {
          ExceptionsType == ExceptionHandling::ARM ||
          ExceptionsType == ExceptionHandling::Win64);
     }
-    bool doesDwarfUsesInlineInfoSection() const {
+    bool doesDwarfUseInlineInfoSection() const {
       return DwarfUsesInlineInfoSection;
     }
     const char *getDwarfSectionOffsetDirective() const {
@@ -563,9 +568,12 @@ namespace llvm {
     bool doesDwarfRequireRelocationForSectionOffset() const {
       return DwarfRequiresRelocationForSectionOffset;
     }
-    bool doesDwarfUsesLabelOffsetForRanges() const {
+    bool doesDwarfUseLabelOffsetForRanges() const {
       return DwarfUsesLabelOffsetForRanges;
     }
+    bool doesDwarfUseRelocationsForStringPool() const {
+      return DwarfUsesRelocationsForStringPool;
+    }
     bool useDwarfRegNumForCFI() const {
       return DwarfRegNumForCFI;
     }
diff --git a/include/llvm/MC/MCAsmInfoCOFF.h b/include/llvm/MC/MCAsmInfoCOFF.h
index a3ee1593c3ac..0ff3e127ed0e 100644
--- a/include/llvm/MC/MCAsmInfoCOFF.h
+++ b/include/llvm/MC/MCAsmInfoCOFF.h
@@ -14,9 +14,21 @@
 
 namespace llvm {
   class MCAsmInfoCOFF : public MCAsmInfo {
+    virtual void anchor();
   protected:
     explicit MCAsmInfoCOFF();
-      
+  };
+
+  class MCAsmInfoMicrosoft : public MCAsmInfoCOFF {
+    virtual void anchor();
+  protected:
+    explicit MCAsmInfoMicrosoft();
+  };
+
+  class MCAsmInfoGNUCOFF : public MCAsmInfoCOFF {
+    virtual void anchor();
+  protected:
+    explicit MCAsmInfoGNUCOFF();
   };
 }
 
diff --git a/include/llvm/MC/MCAsmInfoDarwin.h b/include/llvm/MC/MCAsmInfoDarwin.h
index 1f6c49938c9c..af552de6e690 100644
--- a/include/llvm/MC/MCAsmInfoDarwin.h
+++ b/include/llvm/MC/MCAsmInfoDarwin.h
@@ -18,7 +18,9 @@
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
-  struct MCAsmInfoDarwin : public MCAsmInfo {
+  class MCAsmInfoDarwin : public MCAsmInfo {
+    virtual void anchor();
+  public:
     explicit MCAsmInfoDarwin();
   };
 }
diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h
index a4585d1f1953..cf79216d076a 100644
--- a/include/llvm/MC/MCAsmLayout.h
+++ b/include/llvm/MC/MCAsmLayout.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_MC_MCASMLAYOUT_H
 #define LLVM_MC_MCASMLAYOUT_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index b8f8cc4cec90..d139173c3e13 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -25,7 +25,6 @@ namespace llvm {
 class raw_ostream;
 class MCAsmLayout;
 class MCAssembler;
-class MCBinaryExpr;
 class MCContext;
 class MCCodeEmitter;
 class MCExpr;
@@ -106,6 +105,7 @@ public:
 };
 
 class MCDataFragment : public MCFragment {
+  virtual void anchor();
   SmallString<32> Contents;
 
   /// Fixups - The list of fixups in this fragment.
@@ -160,6 +160,8 @@ public:
 // object with just the MCInst and a code size, then we should just change
 // MCDataFragment to have an optional MCInst at its end.
 class MCInstFragment : public MCFragment {
+  virtual void anchor();
+
   /// Inst - The instruction this is a fragment for.
   MCInst Inst;
 
@@ -215,6 +217,8 @@ public:
 };
 
 class MCAlignFragment : public MCFragment {
+  virtual void anchor();
+
   /// Alignment - The alignment to ensure, in bytes.
   unsigned Alignment;
 
@@ -263,6 +267,8 @@ public:
 };
 
 class MCFillFragment : public MCFragment {
+  virtual void anchor();
+
   /// Value - Value to use for filling bytes.
   int64_t Value;
 
@@ -300,6 +306,8 @@ public:
 };
 
 class MCOrgFragment : public MCFragment {
+  virtual void anchor();
+
   /// Offset - The offset this fragment should start at.
   const MCExpr *Offset;
 
@@ -327,6 +335,8 @@ public:
 };
 
 class MCLEBFragment : public MCFragment {
+  virtual void anchor();
+
   /// Value - The value this fragment should contain.
   const MCExpr *Value;
 
@@ -358,6 +368,8 @@ public:
 };
 
 class MCDwarfLineAddrFragment : public MCFragment {
+  virtual void anchor();
+
   /// LineDelta - the value of the difference between the two line numbers
   /// between two .loc dwarf directives.
   int64_t LineDelta;
@@ -393,6 +405,8 @@ public:
 };
 
 class MCDwarfCallFrameFragment : public MCFragment {
+  virtual void anchor();
+
   /// AddrDelta - The expression for the difference of the two symbols that
   /// make up the address delta between two .cfi_* dwarf directives.
   const MCExpr *AddrDelta;
@@ -711,43 +725,44 @@ private:
   /// \return Whether the fixup value was fully resolved. This is true if the
   /// \arg Value result is fixed, otherwise the value may change due to
   /// relocation.
-  bool EvaluateFixup(const MCAsmLayout &Layout,
+  bool evaluateFixup(const MCAsmLayout &Layout,
                      const MCFixup &Fixup, const MCFragment *DF,
                      MCValue &Target, uint64_t &Value) const;
 
   /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
   /// (increased in size, in order to hold its value correctly).
-  bool FixupNeedsRelaxation(const MCFixup &Fixup, const MCFragment *DF,
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, const MCInstFragment *DF,
                             const MCAsmLayout &Layout) const;
 
   /// Check whether the given fragment needs relaxation.
-  bool FragmentNeedsRelaxation(const MCInstFragment *IF,
+  bool fragmentNeedsRelaxation(const MCInstFragment *IF,
                                const MCAsmLayout &Layout) const;
 
-  /// LayoutOnce - Perform one layout iteration and return true if any offsets
+  /// layoutOnce - Perform one layout iteration and return true if any offsets
   /// were adjusted.
-  bool LayoutOnce(MCAsmLayout &Layout);
+  bool layoutOnce(MCAsmLayout &Layout);
 
-  bool LayoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD);
+  bool layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD);
 
-  bool RelaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF);
+  bool relaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF);
 
-  bool RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF);
+  bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF);
 
-  bool RelaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF);
-  bool RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+  bool relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF);
+  bool relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
                                    MCDwarfCallFrameFragment &DF);
 
-  /// FinishLayout - Finalize a layout, including fragment lowering.
-  void FinishLayout(MCAsmLayout &Layout);
+  /// finishLayout - Finalize a layout, including fragment lowering.
+  void finishLayout(MCAsmLayout &Layout);
 
-  uint64_t HandleFixup(const MCAsmLayout &Layout,
+  uint64_t handleFixup(const MCAsmLayout &Layout,
                        MCFragment &F, const MCFixup &Fixup);
 
 public:
   /// Compute the effective fragment size assuming it is laid out at the given
   /// \arg SectionAddress and \arg FragmentOffset.
-  uint64_t ComputeFragmentSize(const MCAsmLayout &Layout, const MCFragment &F) const;
+  uint64_t computeFragmentSize(const MCAsmLayout &Layout,
+                               const MCFragment &F) const;
 
   /// Find the symbol which defines the atom containing the given symbol, or
   /// null if there is no such symbol.
@@ -760,7 +775,7 @@ public:
   bool isSymbolLinkerVisible(const MCSymbol &SD) const;
 
   /// Emit the section contents using the given object writer.
-  void WriteSectionData(const MCSectionData *Section,
+  void writeSectionData(const MCSectionData *Section,
                         const MCAsmLayout &Layout) const;
 
   /// Check whether a given symbol has been flagged with .thumb_func.
diff --git a/include/llvm/MC/MCCodeEmitter.h b/include/llvm/MC/MCCodeEmitter.h
index bc63241bece9..934ef69ce3fe 100644
--- a/include/llvm/MC/MCCodeEmitter.h
+++ b/include/llvm/MC/MCCodeEmitter.h
@@ -10,12 +10,8 @@
 #ifndef LLVM_MC_MCCODEEMITTER_H
 #define LLVM_MC_MCCODEEMITTER_H
 
-#include "llvm/MC/MCFixup.h"
-
-#include <cassert>
-
 namespace llvm {
-class MCExpr;
+class MCFixup;
 class MCInst;
 class raw_ostream;
 template<typename T> class SmallVectorImpl;
diff --git a/include/llvm/MC/MCCodeGenInfo.h b/include/llvm/MC/MCCodeGenInfo.h
index 1c54c47e2d95..d1765e1240a4 100644
--- a/include/llvm/MC/MCCodeGenInfo.h
+++ b/include/llvm/MC/MCCodeGenInfo.h
@@ -20,7 +20,7 @@
 namespace llvm {
 
   class MCCodeGenInfo {
-    /// RelocationModel - Relocation model: statcic, pic, etc.
+    /// RelocationModel - Relocation model: static, pic, etc.
     ///
     Reloc::Model RelocationModel;
 
@@ -28,13 +28,20 @@ namespace llvm {
     ///
     CodeModel::Model CMModel;
 
+    /// OptLevel - Optimization level.
+    ///
+    CodeGenOpt::Level OptLevel;
+
   public:
     void InitMCCodeGenInfo(Reloc::Model RM = Reloc::Default,
-                           CodeModel::Model CM = CodeModel::Default);
+                           CodeModel::Model CM = CodeModel::Default,
+                           CodeGenOpt::Level OL = CodeGenOpt::Default);
 
     Reloc::Model getRelocationModel() const { return RelocationModel; }
 
     CodeModel::Model getCodeModel() const { return CMModel; }
+
+    CodeGenOpt::Level getOptLevel() const { return OptLevel; }
   };
 } // namespace llvm
 
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index a49a35c8d5ba..b58631919330 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -15,6 +15,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector> // FIXME: Shouldn't be needed.
 
@@ -29,6 +30,7 @@ namespace llvm {
   class MCObjectFileInfo;
   class MCRegisterInfo;
   class MCLineSection;
+  class SMLoc;
   class StringRef;
   class Twine;
   class MCSectionMachO;
@@ -43,6 +45,8 @@ namespace llvm {
   public:
     typedef StringMap<MCSymbol*, BumpPtrAllocator&> SymbolTable;
   private:
+    /// The SourceMgr for this object, if any.
+    const SourceMgr *SrcMgr;
 
     /// The MCAsmInfo for this target.
     const MCAsmInfo &MAI;
@@ -98,6 +102,27 @@ namespace llvm {
     MCDwarfLoc CurrentDwarfLoc;
     bool DwarfLocSeen;
 
+    /// Generate dwarf debugging info for assembly source files.
+    bool GenDwarfForAssembly;
+
+    /// The current dwarf file number when generate dwarf debugging info for
+    /// assembly source files.
+    unsigned GenDwarfFileNumber;
+
+    /// The default initial text section that we generate dwarf debugging line
+    /// info for when generating dwarf assembly source files.
+    const MCSection *GenDwarfSection;
+    /// Symbols created for the start and end of this section.
+    MCSymbol *GenDwarfSectionStartSym, *GenDwarfSectionEndSym;
+
+    /// The information gathered from labels that will have dwarf label
+    /// entries when generating dwarf assembly source files.
+    std::vector<const MCGenDwarfLabelEntry *> MCGenDwarfLabelEntries;
+
+    /// The string to embed in the debug information for the compile unit, if
+    /// non-empty.
+    StringRef DwarfDebugFlags;
+
     /// Honor temporary labels, this is useful for debugging semantic
     /// differences between temporary and non-temporary labels (primarily on
     /// Darwin).
@@ -116,9 +141,11 @@ namespace llvm {
 
   public:
     explicit MCContext(const MCAsmInfo &MAI, const MCRegisterInfo &MRI,
-                       const MCObjectFileInfo *MOFI);
+                       const MCObjectFileInfo *MOFI, const SourceMgr *Mgr = 0);
     ~MCContext();
 
+    const SourceMgr *getSourceManager() const { return SrcMgr; }
+
     const MCAsmInfo &getAsmInfo() const { return MAI; }
 
     const MCRegisterInfo &getRegisterInfo() const { return MRI; }
@@ -204,7 +231,8 @@ namespace llvm {
     /// @{
 
     /// GetDwarfFile - creates an entry in the dwarf file and directory tables.
-    unsigned GetDwarfFile(StringRef FileName, unsigned FileNumber);
+    unsigned GetDwarfFile(StringRef Directory, StringRef FileName,
+                          unsigned FileNumber);
 
     bool isValidDwarfFileNumber(unsigned FileNumber);
 
@@ -251,6 +279,31 @@ namespace llvm {
     bool getDwarfLocSeen() { return DwarfLocSeen; }
     const MCDwarfLoc &getCurrentDwarfLoc() { return CurrentDwarfLoc; }
 
+    bool getGenDwarfForAssembly() { return GenDwarfForAssembly; }
+    void setGenDwarfForAssembly(bool Value) { GenDwarfForAssembly = Value; }
+    unsigned getGenDwarfFileNumber() { return GenDwarfFileNumber; }
+    unsigned nextGenDwarfFileNumber() { return ++GenDwarfFileNumber; }
+    const MCSection *getGenDwarfSection() { return GenDwarfSection; }
+    void setGenDwarfSection(const MCSection *Sec) { GenDwarfSection = Sec; }
+    MCSymbol *getGenDwarfSectionStartSym() { return GenDwarfSectionStartSym; }
+    void setGenDwarfSectionStartSym(MCSymbol *Sym) {
+      GenDwarfSectionStartSym = Sym;
+    }
+    MCSymbol *getGenDwarfSectionEndSym() { return GenDwarfSectionEndSym; }
+    void setGenDwarfSectionEndSym(MCSymbol *Sym) {
+      GenDwarfSectionEndSym = Sym;
+    }
+    const std::vector<const MCGenDwarfLabelEntry *>
+      &getMCGenDwarfLabelEntries() const {
+      return MCGenDwarfLabelEntries;
+    }
+    void addMCGenDwarfLabelEntry(const MCGenDwarfLabelEntry *E) {
+      MCGenDwarfLabelEntries.push_back(E);
+    }
+
+    void setDwarfDebugFlags(StringRef S) { DwarfDebugFlags = S; }
+    StringRef getDwarfDebugFlags() { return DwarfDebugFlags; }
+
     /// @}
 
     char *getSecureLogFile() { return SecureLogFile; }
@@ -268,6 +321,11 @@ namespace llvm {
     }
     void Deallocate(void *Ptr) {
     }
+
+    // Unrecoverable error has occured. Display the best diagnostic we can
+    // and bail via exit(1). For now, most MC backend errors are unrecoverable.
+    // FIXME: We should really do something about that.
+    LLVM_ATTRIBUTE_NORETURN void FatalError(SMLoc L, const Twine &Msg);
   };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h
index 454277d6852c..4b5fbec47dce 100644
--- a/include/llvm/MC/MCDisassembler.h
+++ b/include/llvm/MC/MCDisassembler.h
@@ -90,7 +90,7 @@ public:
   /// @return         - An array of instruction information, with one entry for
   ///                   each MCInst opcode this disassembler returns.
   ///                   NULL if there is no info for this target.
-  virtual EDInstInfo   *getEDInfo() const { return (EDInstInfo*)0; }
+  virtual const EDInstInfo   *getEDInfo() const { return (EDInstInfo*)0; }
 
 private:
   //
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 431e3c4da86a..fdb7ab23c09f 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -17,20 +17,18 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MachineLocation.h"
-#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Dwarf.h"
 #include <vector>
 
 namespace llvm {
   class MCContext;
-  class MCExpr;
+  class MCObjectWriter;
   class MCSection;
-  class MCSectionData;
   class MCStreamer;
   class MCSymbol;
-  class MCObjectStreamer;
-  class raw_ostream;
+  class SourceMgr;
+  class SMLoc;
 
   /// MCDwarfFile - Instances of this class represent the name of the dwarf
   /// .file directive and its associated dwarf file number in the MC file,
@@ -210,7 +208,7 @@ namespace llvm {
     //
     // This emits the Dwarf file and the line tables.
     //
-    static void Emit(MCStreamer *MCOS);
+    static const MCSymbol *Emit(MCStreamer *MCOS);
   };
 
   class MCDwarfLineAddr {
@@ -227,23 +225,63 @@ namespace llvm {
                       int64_t LineDelta, uint64_t AddrDelta);
   };
 
+  class MCGenDwarfInfo {
+  public:
+    //
+    // When generating dwarf for assembly source files this emits the Dwarf
+    // sections.
+    //
+    static void Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol);
+  };
+
+  // When generating dwarf for assembly source files this is the info that is
+  // needed to be gathered for each symbol that will have a dwarf label.
+  class MCGenDwarfLabelEntry {
+  private:
+    // Name of the symbol without a leading underbar, if any.
+    StringRef Name;
+    // The dwarf file number this symbol is in.
+    unsigned FileNumber;
+    // The line number this symbol is at.
+    unsigned LineNumber;
+    // The low_pc for the dwarf label is taken from this symbol.
+    MCSymbol *Label;
+
+  public:
+    MCGenDwarfLabelEntry(StringRef name, unsigned fileNumber,
+                         unsigned lineNumber, MCSymbol *label) :
+      Name(name), FileNumber(fileNumber), LineNumber(lineNumber), Label(label){}
+
+    StringRef getName() const { return Name; }
+    unsigned getFileNumber() const { return FileNumber; }
+    unsigned getLineNumber() const { return LineNumber; }
+    MCSymbol *getLabel() const { return Label; }
+
+    // This is called when label is created when we are generating dwarf for
+    // assembly source files.
+    static void Make(MCSymbol *Symbol, MCStreamer *MCOS, SourceMgr &SrcMgr,
+                     SMLoc &Loc);
+  };
+
   class MCCFIInstruction {
   public:
-    enum OpType { SameValue, Remember, Restore, Move, RelMove };
+    enum OpType { SameValue, RememberState, RestoreState, Move, RelMove, Escape,
+                  Restore};
   private:
     OpType Operation;
     MCSymbol *Label;
     // Move to & from location.
     MachineLocation Destination;
     MachineLocation Source;
+    std::vector<char> Values;
   public:
     MCCFIInstruction(OpType Op, MCSymbol *L)
       : Operation(Op), Label(L) {
-      assert(Op == Remember || Op == Restore);
+      assert(Op == RememberState || Op == RestoreState);
     }
     MCCFIInstruction(OpType Op, MCSymbol *L, unsigned Register)
       : Operation(Op), Label(L), Destination(Register) {
-      assert(Op == SameValue);
+      assert(Op == SameValue || Op == Restore);
     }
     MCCFIInstruction(MCSymbol *L, const MachineLocation &D,
                      const MachineLocation &S)
@@ -254,16 +292,24 @@ namespace llvm {
       : Operation(Op), Label(L), Destination(D), Source(S) {
       assert(Op == RelMove);
     }
+    MCCFIInstruction(OpType Op, MCSymbol *L, StringRef Vals)
+      : Operation(Op), Label(L), Values(Vals.begin(), Vals.end()) {
+      assert(Op == Escape);
+    }
     OpType getOperation() const { return Operation; }
     MCSymbol *getLabel() const { return Label; }
     const MachineLocation &getDestination() const { return Destination; }
     const MachineLocation &getSource() const { return Source; }
+    const StringRef getValues() const {
+      return StringRef(&Values[0], Values.size());
+    }
   };
 
   struct MCDwarfFrameInfo {
     MCDwarfFrameInfo() : Begin(0), End(0), Personality(0), Lsda(0),
                          Function(0), Instructions(), PersonalityEncoding(),
-                         LsdaEncoding(0), CompactUnwindEncoding(0) {}
+                         LsdaEncoding(0), CompactUnwindEncoding(0),
+                         IsSignalFrame(false) {}
     MCSymbol *Begin;
     MCSymbol *End;
     const MCSymbol *Personality;
@@ -273,6 +319,7 @@ namespace llvm {
     unsigned PersonalityEncoding;
     unsigned LsdaEncoding;
     uint32_t CompactUnwindEncoding;
+    bool IsSignalFrame;
   };
 
   class MCDwarfFrameEmitter {
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index 3c150dca9e62..f153cb0c1af0 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -10,28 +10,91 @@
 #ifndef LLVM_MC_MCELFOBJECTWRITER_H
 #define LLVM_MC_MCELFOBJECTWRITER_H
 
-#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ELF.h"
+#include <vector>
 
 namespace llvm {
+class MCAssembler;
+class MCFixup;
+class MCFragment;
+class MCObjectWriter;
+class MCSymbol;
+class MCValue;
+
+/// @name Relocation Data
+/// @{
+
+struct ELFRelocationEntry {
+  // Make these big enough for both 32-bit and 64-bit
+  uint64_t r_offset;
+  int Index;
+  unsigned Type;
+  const MCSymbol *Symbol;
+  uint64_t r_addend;
+  const MCFixup *Fixup;
+
+  ELFRelocationEntry()
+    : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0), Fixup(0) {}
+
+  ELFRelocationEntry(uint64_t RelocOffset, int Idx, unsigned RelType,
+                     const MCSymbol *Sym, uint64_t Addend, const MCFixup &Fixup)
+    : r_offset(RelocOffset), Index(Idx), Type(RelType), Symbol(Sym),
+      r_addend(Addend), Fixup(&Fixup) {}
+
+  // Support lexicographic sorting.
+  bool operator<(const ELFRelocationEntry &RE) const {
+    return RE.r_offset < r_offset;
+  }
+};
+
 class MCELFObjectTargetWriter {
-  const Triple::OSType OSType;
+  const uint8_t OSABI;
   const uint16_t EMachine;
   const unsigned HasRelocationAddend : 1;
   const unsigned Is64Bit : 1;
+
 protected:
-  MCELFObjectTargetWriter(bool Is64Bit_, Triple::OSType OSType_,
+
+  MCELFObjectTargetWriter(bool Is64Bit_, uint8_t OSABI_,
                           uint16_t EMachine_,  bool HasRelocationAddend_);
 
 public:
-  virtual ~MCELFObjectTargetWriter();
+  static uint8_t getOSABI(Triple::OSType OSType) {
+    switch (OSType) {
+      case Triple::FreeBSD:
+        return ELF::ELFOSABI_FREEBSD;
+      case Triple::Linux:
+        return ELF::ELFOSABI_LINUX;
+      default:
+        return ELF::ELFOSABI_NONE;
+    }
+  }
+
+  virtual ~MCELFObjectTargetWriter() {}
+
+  virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                bool IsPCRel, bool IsRelocWithSymbol,
+                                int64_t Addend) const = 0;
+  virtual unsigned getEFlags() const;
+  virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                         const MCValue &Target,
+                                         const MCFragment &F,
+                                         const MCFixup &Fixup,
+                                         bool IsPCRel) const;
+  virtual void adjustFixupOffset(const MCFixup &Fixup,
+                                 uint64_t &RelocOffset);
+
+  virtual void sortRelocs(const MCAssembler &Asm,
+                          std::vector<ELFRelocationEntry> &Relocs);
 
   /// @name Accessors
   /// @{
-  Triple::OSType getOSType() { return OSType; }
+  uint8_t getOSABI() { return OSABI; }
   uint16_t getEMachine() { return EMachine; }
   bool hasRelocationAddend() { return HasRelocationAddend; }
-  bool is64Bit() { return Is64Bit; }
+  bool is64Bit() const { return Is64Bit; }
   /// @}
 };
 
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 0f285999ad62..ff33641dba7f 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -15,7 +15,6 @@
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
-class MCAsmInfo;
 class MCAsmLayout;
 class MCAssembler;
 class MCContext;
@@ -162,6 +161,7 @@ public:
     VK_TPOFF,
     VK_DTPOFF,
     VK_TLVP,      // Mach-O thread local variable relocation
+    VK_SECREL,
     // FIXME: We'd really like to use the generic Kinds listed above for these.
     VK_ARM_PLT,   // ARM-style PLT references. i.e., (PLT) instead of @PLT
     VK_ARM_TLSGD, //   ditto for TLSGD, GOT, GOTOFF, TPOFF and GOTTPOFF
@@ -169,12 +169,32 @@ public:
     VK_ARM_GOTOFF,
     VK_ARM_TPOFF,
     VK_ARM_GOTTPOFF,
+    VK_ARM_TARGET1,
 
     VK_PPC_TOC,
     VK_PPC_DARWIN_HA16,  // ha16(symbol)
     VK_PPC_DARWIN_LO16,  // lo16(symbol)
     VK_PPC_GAS_HA16,     // symbol@ha
-    VK_PPC_GAS_LO16      // symbol@l
+    VK_PPC_GAS_LO16,      // symbol@l
+
+    VK_Mips_GPREL,
+    VK_Mips_GOT_CALL,
+    VK_Mips_GOT16,
+    VK_Mips_GOT,
+    VK_Mips_ABS_HI,
+    VK_Mips_ABS_LO,
+    VK_Mips_TLSGD,
+    VK_Mips_TLSLDM,
+    VK_Mips_DTPREL_HI,
+    VK_Mips_DTPREL_LO,
+    VK_Mips_GOTTPREL,
+    VK_Mips_TPREL_HI,
+    VK_Mips_TPREL_LO,
+    VK_Mips_GPOFF_HI,
+    VK_Mips_GPOFF_LO,
+    VK_Mips_GOT_DISP,
+    VK_Mips_GOT_PAGE,
+    VK_Mips_GOT_OFST 
   };
 
 private:
@@ -185,7 +205,9 @@ private:
   const VariantKind Kind;
 
   explicit MCSymbolRefExpr(const MCSymbol *_Symbol, VariantKind _Kind)
-    : MCExpr(MCExpr::SymbolRef), Symbol(_Symbol), Kind(_Kind) {}
+    : MCExpr(MCExpr::SymbolRef), Symbol(_Symbol), Kind(_Kind) {
+    assert(Symbol);
+  }
 
 public:
   /// @name Construction
diff --git a/include/llvm/MC/MCFixup.h b/include/llvm/MC/MCFixup.h
index 6fde797e40fd..16e9eb730b4e 100644
--- a/include/llvm/MC/MCFixup.h
+++ b/include/llvm/MC/MCFixup.h
@@ -11,6 +11,8 @@
 #define LLVM_MC_MCFIXUP_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SMLoc.h"
 #include <cassert>
 
 namespace llvm {
@@ -26,6 +28,14 @@ enum MCFixupKind {
   FK_PCRel_2,    ///< A two-byte pc relative fixup.
   FK_PCRel_4,    ///< A four-byte pc relative fixup.
   FK_PCRel_8,    ///< A eight-byte pc relative fixup.
+  FK_GPRel_1,    ///< A one-byte gp relative fixup.
+  FK_GPRel_2,    ///< A two-byte gp relative fixup.
+  FK_GPRel_4,    ///< A four-byte gp relative fixup.
+  FK_GPRel_8,    ///< A eight-byte gp relative fixup.
+  FK_SecRel_1,   ///< A one-byte section relative fixup.
+  FK_SecRel_2,   ///< A two-byte section relative fixup.
+  FK_SecRel_4,   ///< A four-byte section relative fixup.
+  FK_SecRel_8,   ///< A eight-byte section relative fixup.
 
   FirstTargetFixupKind = 128,
 
@@ -61,14 +71,17 @@ class MCFixup {
   /// determine how the operand value should be encoded into the instruction.
   unsigned Kind;
 
+  /// The source location which gave rise to the fixup, if any.
+  SMLoc Loc;
 public:
   static MCFixup Create(uint32_t Offset, const MCExpr *Value,
-                        MCFixupKind Kind) {
+                        MCFixupKind Kind, SMLoc Loc = SMLoc()) {
     assert(unsigned(Kind) < MaxTargetFixupKind && "Kind out of range!");
     MCFixup FI;
     FI.Value = Value;
     FI.Offset = Offset;
     FI.Kind = unsigned(Kind);
+    FI.Loc = Loc;
     return FI;
   }
 
@@ -83,13 +96,15 @@ public:
   /// size. It is an error to pass an unsupported size.
   static MCFixupKind getKindForSize(unsigned Size, bool isPCRel) {
     switch (Size) {
-    default: assert(0 && "Invalid generic fixup size!");
+    default: llvm_unreachable("Invalid generic fixup size!");
     case 1: return isPCRel ? FK_PCRel_1 : FK_Data_1;
     case 2: return isPCRel ? FK_PCRel_2 : FK_Data_2;
     case 4: return isPCRel ? FK_PCRel_4 : FK_Data_4;
     case 8: return isPCRel ? FK_PCRel_8 : FK_Data_8;
     }
   }
+
+  SMLoc getLoc() const { return Loc; }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index d38476477495..397a37d3ce48 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -19,12 +19,14 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/SMLoc.h"
 
 namespace llvm {
 class raw_ostream;
 class MCAsmInfo;
 class MCInstPrinter;
 class MCExpr;
+class MCInst;
 
 /// MCOperand - Instances of this class represent operands of the MCInst class.
 /// This is a simple discriminated union.
@@ -34,7 +36,8 @@ class MCOperand {
     kRegister,                ///< Register operand.
     kImmediate,               ///< Immediate operand.
     kFPImmediate,             ///< Floating-point immediate operand.
-    kExpr                     ///< Relocatable immediate operand.
+    kExpr,                    ///< Relocatable immediate operand.
+    kInst                     ///< Sub-instruction operand.
   };
   unsigned char Kind;
 
@@ -43,6 +46,7 @@ class MCOperand {
     int64_t ImmVal;
     double FPImmVal;
     const MCExpr *ExprVal;
+    const MCInst *InstVal;
   };
 public:
 
@@ -53,6 +57,7 @@ public:
   bool isImm() const { return Kind == kImmediate; }
   bool isFPImm() const { return Kind == kFPImmediate; }
   bool isExpr() const { return Kind == kExpr; }
+  bool isInst() const { return Kind == kInst; }
 
   /// getReg - Returns the register number.
   unsigned getReg() const {
@@ -94,6 +99,15 @@ public:
     ExprVal = Val;
   }
 
+  const MCInst *getInst() const {
+    assert(isInst() && "This is not a sub-instruction");
+    return InstVal;
+  }
+  void setInst(const MCInst *Val) {
+    assert(isInst() && "This is not a sub-instruction");
+    InstVal = Val;
+  }
+
   static MCOperand CreateReg(unsigned Reg) {
     MCOperand Op;
     Op.Kind = kRegister;
@@ -118,24 +132,34 @@ public:
     Op.ExprVal = Val;
     return Op;
   }
+  static MCOperand CreateInst(const MCInst *Val) {
+    MCOperand Op;
+    Op.Kind = kInst;
+    Op.InstVal = Val;
+    return Op;
+  }
 
   void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
   void dump() const;
 };
 
+template <> struct isPodLike<MCOperand> { static const bool value = true; };
 
 /// MCInst - Instances of this class represent a single low-level machine
 /// instruction.
 class MCInst {
   unsigned Opcode;
+  SMLoc Loc;
   SmallVector<MCOperand, 8> Operands;
 public:
   MCInst() : Opcode(0) {}
 
   void setOpcode(unsigned Op) { Opcode = Op; }
-
   unsigned getOpcode() const { return Opcode; }
 
+  void setLoc(SMLoc loc) { Loc = loc; }
+  SMLoc getLoc() const { return Loc; }
+
   const MCOperand &getOperand(unsigned i) const { return Operands[i]; }
   MCOperand &getOperand(unsigned i) { return Operands[i]; }
   unsigned getNumOperands() const { return Operands.size(); }
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index 01ad2d3f8088..3c4f28be7ca6 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -14,6 +14,8 @@ namespace llvm {
 class MCInst;
 class raw_ostream;
 class MCAsmInfo;
+class MCInstrInfo;
+class MCRegisterInfo;
 class StringRef;
 
 /// MCInstPrinter - This is an instance of a target assembly language printer
@@ -25,6 +27,8 @@ protected:
   /// assembly emission is disable.
   raw_ostream *CommentStream;
   const MCAsmInfo &MAI;
+  const MCInstrInfo &MII;
+  const MCRegisterInfo &MRI;
 
   /// The current set of available features.
   unsigned AvailableFeatures;
@@ -32,8 +36,9 @@ protected:
   /// Utility function for printing annotations.
   void printAnnotation(raw_ostream &OS, StringRef Annot);
 public:
-  MCInstPrinter(const MCAsmInfo &mai)
-    : CommentStream(0), MAI(mai), AvailableFeatures(0) {}
+  MCInstPrinter(const MCAsmInfo &mai, const MCInstrInfo &mii,
+                const MCRegisterInfo &mri)
+    : CommentStream(0), MAI(mai), MII(mii), MRI(mri), AvailableFeatures(0) {}
 
   virtual ~MCInstPrinter();
 
@@ -47,7 +52,7 @@ public:
 
   /// getOpcodeName - Return the name of the specified opcode enum (e.g.
   /// "MOV32ri") or empty if we can't resolve it.
-  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  StringRef getOpcodeName(unsigned Opcode) const;
 
   /// printRegName - Print the assembler register name.
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
diff --git a/include/llvm/MC/MCInstrAnalysis.h b/include/llvm/MC/MCInstrAnalysis.h
index 8f3c499b1c73..acad6336aca7 100644
--- a/include/llvm/MC/MCInstrAnalysis.h
+++ b/include/llvm/MC/MCInstrAnalysis.h
@@ -33,7 +33,7 @@ public:
   }
 
   virtual bool isConditionalBranch(const MCInst &Inst) const {
-    return Info->get(Inst.getOpcode()).isBranch();
+    return Info->get(Inst.getOpcode()).isConditionalBranch();
   }
 
   virtual bool isUnconditionalBranch(const MCInst &Inst) const {
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index aafa800c1ac8..186612d904d8 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -58,17 +58,17 @@ public:
   /// if the operand is a register.  If isLookupPtrRegClass is set, then this is
   /// an index that is passed to TargetRegisterInfo::getPointerRegClass(x) to
   /// get a dynamic register class.
-  short RegClass;
+  int16_t RegClass;
 
   /// Flags - These are flags from the MCOI::OperandFlags enum.
-  unsigned short Flags;
+  uint8_t Flags;
+
+  /// OperandType - Information about the type of the operand.
+  uint8_t OperandType;
 
   /// Lower 16 bits are used to specify which constraints are set. The higher 16
   /// bits are used to specify the value of constraints (4 bits each).
-  unsigned Constraints;
-
-  /// OperandType - Information about the type of the operand.
-  MCOI::OperandType OperandType;
+  uint32_t Constraints;
   /// Currently no other information.
 
   /// isLookupPtrRegClass - Set if this operand is a pointer value and it
@@ -137,11 +137,10 @@ public:
   unsigned short  NumDefs;       // Num of args that are definitions
   unsigned short  SchedClass;    // enum identifying instr sched class
   unsigned short  Size;          // Number of bytes in encoding.
-  const char *    Name;          // Name of the instruction record in td file
   unsigned        Flags;         // Flags identifying machine instr class
   uint64_t        TSFlags;       // Target Specific Flag values
-  const unsigned *ImplicitUses;  // Registers implicitly read by this instr
-  const unsigned *ImplicitDefs;  // Registers implicitly defined by this instr
+  const uint16_t *ImplicitUses;  // Registers implicitly read by this instr
+  const uint16_t *ImplicitDefs;  // Registers implicitly defined by this instr
   const MCOperandInfo *OpInfo;   // 'NumOperands' entries about operands
 
   /// getOperandConstraint - Returns the value of the specific constraint if
@@ -161,12 +160,6 @@ public:
     return Opcode;
   }
 
-  /// getName - Return the name of the record in the .td file for this
-  /// instruction, for example "ADD8ri".
-  const char *getName() const {
-    return Name;
-  }
-
   /// getNumOperands - Return the number of declared MachineOperands for this
   /// MachineInstruction.  Note that variadic (isVariadic() returns true)
   /// instructions may have additional operands at the end of the list, and note
@@ -184,6 +177,10 @@ public:
     return NumDefs;
   }
 
+  /// getFlags - Return flags of this instruction.
+  ///
+  unsigned getFlags() const { return Flags; }
+
   /// isVariadic - Return true if this instruction can have a variable number of
   /// operands.  In this case, the variable operands will be after the normal
   /// operands but before the implicit definitions and uses (if any are
@@ -198,84 +195,6 @@ public:
     return Flags & (1 << MCID::HasOptionalDef);
   }
 
-  /// getImplicitUses - Return a list of registers that are potentially
-  /// read by any instance of this machine instruction.  For example, on X86,
-  /// the "adc" instruction adds two register operands and adds the carry bit in
-  /// from the flags register.  In this case, the instruction is marked as
-  /// implicitly reading the flags.  Likewise, the variable shift instruction on
-  /// X86 is marked as implicitly reading the 'CL' register, which it always
-  /// does.
-  ///
-  /// This method returns null if the instruction has no implicit uses.
-  const unsigned *getImplicitUses() const {
-    return ImplicitUses;
-  }
-
-  /// getNumImplicitUses - Return the number of implicit uses this instruction
-  /// has.
-  unsigned getNumImplicitUses() const {
-    if (ImplicitUses == 0) return 0;
-    unsigned i = 0;
-    for (; ImplicitUses[i]; ++i) /*empty*/;
-    return i;
-  }
-
-  /// getImplicitDefs - Return a list of registers that are potentially
-  /// written by any instance of this machine instruction.  For example, on X86,
-  /// many instructions implicitly set the flags register.  In this case, they
-  /// are marked as setting the FLAGS.  Likewise, many instructions always
-  /// deposit their result in a physical register.  For example, the X86 divide
-  /// instruction always deposits the quotient and remainder in the EAX/EDX
-  /// registers.  For that instruction, this will return a list containing the
-  /// EAX/EDX/EFLAGS registers.
-  ///
-  /// This method returns null if the instruction has no implicit defs.
-  const unsigned *getImplicitDefs() const {
-    return ImplicitDefs;
-  }
-
-  /// getNumImplicitDefs - Return the number of implicit defs this instruction
-  /// has.
-  unsigned getNumImplicitDefs() const {
-    if (ImplicitDefs == 0) return 0;
-    unsigned i = 0;
-    for (; ImplicitDefs[i]; ++i) /*empty*/;
-    return i;
-  }
-
-  /// hasImplicitUseOfPhysReg - Return true if this instruction implicitly
-  /// uses the specified physical register.
-  bool hasImplicitUseOfPhysReg(unsigned Reg) const {
-    if (const unsigned *ImpUses = ImplicitUses)
-      for (; *ImpUses; ++ImpUses)
-        if (*ImpUses == Reg) return true;
-    return false;
-  }
-
-  /// hasImplicitDefOfPhysReg - Return true if this instruction implicitly
-  /// defines the specified physical register.
-  bool hasImplicitDefOfPhysReg(unsigned Reg) const {
-    if (const unsigned *ImpDefs = ImplicitDefs)
-      for (; *ImpDefs; ++ImpDefs)
-        if (*ImpDefs == Reg) return true;
-    return false;
-  }
-
-  /// getSchedClass - Return the scheduling class for this instruction.  The
-  /// scheduling class is an index into the InstrItineraryData table.  This
-  /// returns zero if there is no known scheduling information for the
-  /// instruction.
-  ///
-  unsigned getSchedClass() const {
-    return SchedClass;
-  }
-
-  /// getSize - Return the number of bytes in the encoding of this instruction,
-  /// or zero if the encoding size cannot be known from the opcode.
-  unsigned getSize() const {
-    return Size;
-  }
-
   /// isPseudo - Return true if this is a pseudo instruction that doesn't
   /// correspond to a real machine instruction.
   ///
@@ -298,18 +217,6 @@ public:
     return Flags & (1 << MCID::Barrier);
   }
 
-  /// findFirstPredOperandIdx() - Find the index of the first operand in the
-  /// operand list that is used to represent the predicate. It returns -1 if
-  /// none is found.
-  int findFirstPredOperandIdx() const {
-    if (isPredicable()) {
-      for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-        if (OpInfo[i].isPredicate())
-          return i;
-    }
-    return -1;
-  }
-
   /// isTerminator - Returns true if this instruction part of the terminator for
   /// a basic block.  Typically this is things like return and branch
   /// instructions.
@@ -530,6 +437,97 @@ public:
   bool hasExtraDefRegAllocReq() const {
     return Flags & (1 << MCID::ExtraDefRegAllocReq);
   }
+
+
+  /// getImplicitUses - Return a list of registers that are potentially
+  /// read by any instance of this machine instruction.  For example, on X86,
+  /// the "adc" instruction adds two register operands and adds the carry bit in
+  /// from the flags register.  In this case, the instruction is marked as
+  /// implicitly reading the flags.  Likewise, the variable shift instruction on
+  /// X86 is marked as implicitly reading the 'CL' register, which it always
+  /// does.
+  ///
+  /// This method returns null if the instruction has no implicit uses.
+  const uint16_t *getImplicitUses() const {
+    return ImplicitUses;
+  }
+
+  /// getNumImplicitUses - Return the number of implicit uses this instruction
+  /// has.
+  unsigned getNumImplicitUses() const {
+    if (ImplicitUses == 0) return 0;
+    unsigned i = 0;
+    for (; ImplicitUses[i]; ++i) /*empty*/;
+    return i;
+  }
+
+  /// getImplicitDefs - Return a list of registers that are potentially
+  /// written by any instance of this machine instruction.  For example, on X86,
+  /// many instructions implicitly set the flags register.  In this case, they
+  /// are marked as setting the FLAGS.  Likewise, many instructions always
+  /// deposit their result in a physical register.  For example, the X86 divide
+  /// instruction always deposits the quotient and remainder in the EAX/EDX
+  /// registers.  For that instruction, this will return a list containing the
+  /// EAX/EDX/EFLAGS registers.
+  ///
+  /// This method returns null if the instruction has no implicit defs.
+  const uint16_t *getImplicitDefs() const {
+    return ImplicitDefs;
+  }
+
+  /// getNumImplicitDefs - Return the number of implicit defs this instruction
+  /// has.
+  unsigned getNumImplicitDefs() const {
+    if (ImplicitDefs == 0) return 0;
+    unsigned i = 0;
+    for (; ImplicitDefs[i]; ++i) /*empty*/;
+    return i;
+  }
+
+  /// hasImplicitUseOfPhysReg - Return true if this instruction implicitly
+  /// uses the specified physical register.
+  bool hasImplicitUseOfPhysReg(unsigned Reg) const {
+    if (const uint16_t *ImpUses = ImplicitUses)
+      for (; *ImpUses; ++ImpUses)
+        if (*ImpUses == Reg) return true;
+    return false;
+  }
+
+  /// hasImplicitDefOfPhysReg - Return true if this instruction implicitly
+  /// defines the specified physical register.
+  bool hasImplicitDefOfPhysReg(unsigned Reg) const {
+    if (const uint16_t *ImpDefs = ImplicitDefs)
+      for (; *ImpDefs; ++ImpDefs)
+        if (*ImpDefs == Reg) return true;
+    return false;
+  }
+
+  /// getSchedClass - Return the scheduling class for this instruction.  The
+  /// scheduling class is an index into the InstrItineraryData table.  This
+  /// returns zero if there is no known scheduling information for the
+  /// instruction.
+  ///
+  unsigned getSchedClass() const {
+    return SchedClass;
+  }
+
+  /// getSize - Return the number of bytes in the encoding of this instruction,
+  /// or zero if the encoding size cannot be known from the opcode.
+  unsigned getSize() const {
+    return Size;
+  }
+
+  /// findFirstPredOperandIdx() - Find the index of the first operand in the
+  /// operand list that is used to represent the predicate. It returns -1 if
+  /// none is found.
+  int findFirstPredOperandIdx() const {
+    if (isPredicable()) {
+      for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+        if (OpInfo[i].isPredicate())
+          return i;
+    }
+    return -1;
+  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCInstrInfo.h b/include/llvm/MC/MCInstrInfo.h
index a63e5faf8f93..1d3a36ca7c73 100644
--- a/include/llvm/MC/MCInstrInfo.h
+++ b/include/llvm/MC/MCInstrInfo.h
@@ -24,14 +24,19 @@ namespace llvm {
 /// MCInstrInfo - Interface to description of machine instruction set
 ///
 class MCInstrInfo {
-  const MCInstrDesc *Desc;  // Raw array to allow static init'n
-  unsigned NumOpcodes;      // Number of entries in the desc array
+  const MCInstrDesc *Desc;          // Raw array to allow static init'n
+  const unsigned *InstrNameIndices; // Array for name indices in InstrNameData
+  const char *InstrNameData;        // Instruction name string pool
+  unsigned NumOpcodes;              // Number of entries in the desc array
 
 public:
   /// InitMCInstrInfo - Initialize MCInstrInfo, called by TableGen
   /// auto-generated routines. *DO NOT USE*.
-  void InitMCInstrInfo(const MCInstrDesc *D, unsigned NO) {
+  void InitMCInstrInfo(const MCInstrDesc *D, const unsigned *NI, const char *ND,
+                       unsigned NO) {
     Desc = D;
+    InstrNameIndices = NI;
+    InstrNameData = ND;
     NumOpcodes = NO;
   }
 
@@ -44,6 +49,12 @@ public:
     assert(Opcode < NumOpcodes && "Invalid opcode!");
     return Desc[Opcode];
   }
+
+  /// getName - Returns the name for the instructions with the given opcode.
+  const char *getName(unsigned Opcode) const {
+    assert(Opcode < NumOpcodes && "Invalid opcode!");
+    return &InstrNameData[InstrNameIndices[Opcode]];
+  }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index 060d5085d0c3..aea4b410fea2 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -14,15 +14,14 @@
 #ifndef LLVM_MC_MCBJECTFILEINFO_H
 #define LLVM_MC_MCBJECTFILEINFO_H
 
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/CodeGen.h"
 
 namespace llvm {
-class MCContext;
-class MCSection;
-class Triple;
-  
+  class MCContext;
+  class MCSection;
+  class StringRef;
+  class Triple;
+
 class MCObjectFileInfo {  
 protected:
   /// CommDirectiveSupportsAlignment - True if .comm supports alignment.  This
@@ -47,6 +46,9 @@ protected:
   unsigned FDEEncoding;
   unsigned FDECFIEncoding;
   unsigned TTypeEncoding;
+  // Section flags for eh_frame
+  unsigned EHSectionType;
+  unsigned EHSectionFlags;
 
   /// TextSection - Section directive for standard text.
   ///
@@ -82,13 +84,20 @@ protected:
   /// this is the section to emit them into.
   const MCSection *CompactUnwindSection;
 
+  /// DwarfAccelNamesSection, DwarfAccelObjCSection
+  /// If we use the DWARF accelerated hash tables then we want toe emit these
+  /// sections.
+  const MCSection *DwarfAccelNamesSection;
+  const MCSection *DwarfAccelObjCSection;
+  const MCSection *DwarfAccelNamespaceSection;
+  const MCSection *DwarfAccelTypesSection;
+
   // Dwarf sections for debug info.  If a target supports debug info, these must
   // be set.
   const MCSection *DwarfAbbrevSection;
   const MCSection *DwarfInfoSection;
   const MCSection *DwarfLineSection;
   const MCSection *DwarfFrameSection;
-  const MCSection *DwarfPubNamesSection;
   const MCSection *DwarfPubTypesSection;
   const MCSection *DwarfDebugInlineSection;
   const MCSection *DwarfStrSection;
@@ -102,7 +111,7 @@ protected:
   const MCSection *TLSExtraDataSection;
   
   /// TLSDataSection - Section directive for Thread Local data.
-  /// ELF and MachO only.
+  /// ELF, MachO and COFF.
   const MCSection *TLSDataSection;        // Defaults to ".tdata".
 
   /// TLSBSSSection - Section directive for Thread Local uninitialized data.
@@ -156,7 +165,7 @@ protected:
   const MCSection *DrectveSection;
   const MCSection *PDataSection;
   const MCSection *XDataSection;
-  
+
 public:
   void InitMCObjectFileInfo(StringRef TT, Reloc::Model RM, CodeModel::Model CM,
                             MCContext &ctx);
@@ -181,17 +190,26 @@ public:
   const MCSection *getTextSection() const { return TextSection; }
   const MCSection *getDataSection() const { return DataSection; }
   const MCSection *getBSSSection() const { return BSSSection; }
-  const MCSection *getStaticCtorSection() const { return StaticCtorSection; }
-  const MCSection *getStaticDtorSection() const { return StaticDtorSection; }
   const MCSection *getLSDASection() const { return LSDASection; }
   const MCSection *getCompactUnwindSection() const{
     return CompactUnwindSection;
   }
+  const MCSection *getDwarfAccelNamesSection() const {
+    return DwarfAccelNamesSection;
+  }
+  const MCSection *getDwarfAccelObjCSection() const {
+    return DwarfAccelObjCSection;
+  }
+  const MCSection *getDwarfAccelNamespaceSection() const {
+    return DwarfAccelNamespaceSection;
+  }
+  const MCSection *getDwarfAccelTypesSection() const {
+    return DwarfAccelTypesSection;
+  }
   const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
   const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
   const MCSection *getDwarfLineSection() const { return DwarfLineSection; }
   const MCSection *getDwarfFrameSection() const { return DwarfFrameSection; }
-  const MCSection *getDwarfPubNamesSection() const{return DwarfPubNamesSection;}
   const MCSection *getDwarfPubTypesSection() const{return DwarfPubTypesSection;}
   const MCSection *getDwarfDebugInlineSection() const {
     return DwarfDebugInlineSection;
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index f897e64f4456..a69075ddd002 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -34,6 +34,8 @@ class MCObjectStreamer : public MCStreamer {
   MCSectionData *CurSectionData;
 
   virtual void EmitInstToData(const MCInst &Inst) = 0;
+  virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
+  virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame);
 
 protected:
   MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
@@ -70,14 +72,15 @@ public:
   virtual void ChangeSection(const MCSection *Section);
   virtual void EmitInstruction(const MCInst &Inst);
   virtual void EmitInstToFragment(const MCInst &Inst);
-  virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value);
+  virtual bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value);
   virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                         const MCSymbol *LastLabel,
                                         const MCSymbol *Label,
                                         unsigned PointerSize);
   virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
                                          const MCSymbol *Label);
-  virtual void Finish();
+  virtual void EmitGPRel32Value(const MCExpr *Value);
+  virtual void FinishImpl();
 
   /// @}
 };
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 782d844598b4..6e44e6ceffa3 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -10,7 +10,6 @@
 #ifndef LLVM_MC_MCOBJECTWRITER_H
 #define LLVM_MC_MCOBJECTWRITER_H
 
-#include "llvm/ADT/Triple.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
@@ -20,11 +19,9 @@ class MCAsmLayout;
 class MCAssembler;
 class MCFixup;
 class MCFragment;
-class MCSymbol;
 class MCSymbolData;
 class MCSymbolRefExpr;
 class MCValue;
-class raw_ostream;
 
 /// MCObjectWriter - Defines the object file and target independent interfaces
 /// used by the assembler backend to write native file format object files.
@@ -188,11 +185,10 @@ public:
   /// Utility function to encode a SLEB128 value.
   static void EncodeSLEB128(int64_t Value, raw_ostream &OS);
   /// Utility function to encode a ULEB128 value.
-  static void EncodeULEB128(uint64_t Value, raw_ostream &OS);
+  static void EncodeULEB128(uint64_t Value, raw_ostream &OS,
+                            unsigned Padding = 0);
 };
 
-MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index 9bbb75581c25..ac04483ccf16 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -71,6 +71,7 @@ public:
   bool isNot(TokenKind K) const { return Kind != K; }
 
   SMLoc getLoc() const;
+  SMLoc getEndLoc() const;
 
   /// getStringContents - Get the contents of a string token (without quotes).
   StringRef getStringContents() const {
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index 6ff175349e43..793c7097ba14 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -11,6 +11,7 @@
 #define LLVM_MC_MCASMPARSER_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/ArrayRef.h"
 
 namespace llvm {
 class AsmToken;
@@ -22,6 +23,7 @@ class MCExpr;
 class MCStreamer;
 class MCTargetAsmParser;
 class SMLoc;
+class SMRange;
 class SourceMgr;
 class StringRef;
 class Twine;
@@ -62,6 +64,9 @@ public:
   MCTargetAsmParser &getTargetParser() const { return *TargetParser; }
   void setTargetParser(MCTargetAsmParser &P);
 
+  virtual unsigned getAssemblerDialect() { return 0;}
+  virtual void setAssemblerDialect(unsigned i) { }
+
   bool getShowParsedOperands() const { return ShowParsedOperands; }
   void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; }
 
@@ -72,14 +77,16 @@ public:
   /// Msg.
   ///
   /// \return The return value is true, if warnings are fatal.
-  virtual bool Warning(SMLoc L, const Twine &Msg) = 0;
+  virtual bool Warning(SMLoc L, const Twine &Msg,
+                       ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) = 0;
 
   /// Error - Emit an error at the location \arg L, with the message \arg
   /// Msg.
   ///
   /// \return The return value is always true, as an idiomatic convenience to
   /// clients.
-  virtual bool Error(SMLoc L, const Twine &Msg) = 0;
+  virtual bool Error(SMLoc L, const Twine &Msg,
+                     ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) = 0;
 
   /// Lex - Get the next AsmToken in the stream, possibly handling file
   /// inclusion first.
@@ -89,7 +96,8 @@ public:
   const AsmToken &getTok();
 
   /// \brief Report an error at the current lexer location.
-  bool TokError(const Twine &Msg);
+  bool TokError(const Twine &Msg,
+                ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
 
   /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
   /// and set \arg Res to the identifier contents.
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index ada5ae80af0c..27acf2f2cc21 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -17,6 +17,7 @@
 #define LLVM_MC_MCREGISTERINFO_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 
 namespace llvm {
@@ -24,28 +25,18 @@ namespace llvm {
 /// MCRegisterClass - Base class of TargetRegisterClass.
 class MCRegisterClass {
 public:
-  typedef const unsigned* iterator;
-  typedef const unsigned* const_iterator;
-private:
-  unsigned ID;
+  typedef const uint16_t* iterator;
+  typedef const uint16_t* const_iterator;
+
   const char *Name;
-  const unsigned RegSize, Alignment; // Size & Alignment of register in bytes
-  const int CopyCost;
+  const iterator RegsBegin;
+  const uint8_t *const RegSet;
+  const uint16_t RegsSize;
+  const uint16_t RegSetSize;
+  const uint16_t ID;
+  const uint16_t RegSize, Alignment; // Size & Alignment of register in bytes
+  const int8_t CopyCost;
   const bool Allocatable;
-  const iterator RegsBegin, RegsEnd;
-  const unsigned char *const RegSet;
-  const unsigned RegSetSize;
-public:
-  MCRegisterClass(unsigned id, const char *name,
-                  unsigned RS, unsigned Al, int CC, bool Allocable,
-                  iterator RB, iterator RE, const unsigned char *Bits,
-                  unsigned NumBytes)
-    : ID(id), Name(name), RegSize(RS), Alignment(Al), CopyCost(CC),
-      Allocatable(Allocable), RegsBegin(RB), RegsEnd(RE), RegSet(Bits),
-      RegSetSize(NumBytes) {
-    for (iterator i = RegsBegin; i != RegsEnd; ++i)
-       assert(contains(*i) && "Bit field corrupted.");
-  }
 
   /// getID() - Return the register class ID number.
   ///
@@ -58,11 +49,11 @@ public:
   /// begin/end - Return all of the registers in this class.
   ///
   iterator       begin() const { return RegsBegin; }
-  iterator         end() const { return RegsEnd; }
+  iterator         end() const { return RegsBegin + RegsSize; }
 
   /// getNumRegs - Return the number of registers in this class.
   ///
-  unsigned getNumRegs() const { return (unsigned)(RegsEnd-RegsBegin); }
+  unsigned getNumRegs() const { return RegsSize; }
 
   /// getRegister - Return the specified register in the class.
   ///
@@ -115,10 +106,10 @@ public:
 /// of AX.
 ///
 struct MCRegisterDesc {
-  const char     *Name;         // Printable name for the reg (for debugging)
-  const unsigned *Overlaps;     // Overlapping registers, described above
-  const unsigned *SubRegs;      // Sub-register set, described above
-  const unsigned *SuperRegs;    // Super-register set, described above
+  const char *Name;         // Printable name for the reg (for debugging)
+  uint32_t   Overlaps;      // Overlapping registers, described above
+  uint32_t   SubRegs;       // Sub-register set, described above
+  uint32_t   SuperRegs;     // Super-register set, described above
 };
 
 /// MCRegisterInfo base class - We assume that the target defines a static
@@ -136,50 +127,82 @@ struct MCRegisterDesc {
 class MCRegisterInfo {
 public:
   typedef const MCRegisterClass *regclass_iterator;
+
+  /// DwarfLLVMRegPair - Emitted by tablegen so Dwarf<->LLVM reg mappings can be
+  /// performed with a binary search.
+  struct DwarfLLVMRegPair {
+    unsigned FromReg;
+    unsigned ToReg;
+
+    bool operator<(DwarfLLVMRegPair RHS) const { return FromReg < RHS.FromReg; }
+  };
 private:
   const MCRegisterDesc *Desc;                 // Pointer to the descriptor array
   unsigned NumRegs;                           // Number of entries in the array
   unsigned RAReg;                             // Return address register
   const MCRegisterClass *Classes;             // Pointer to the regclass array
   unsigned NumClasses;                        // Number of entries in the array
-  DenseMap<unsigned, int> L2DwarfRegs;        // LLVM to Dwarf regs mapping
-  DenseMap<unsigned, int> EHL2DwarfRegs;      // LLVM to Dwarf regs mapping EH
-  DenseMap<unsigned, unsigned> Dwarf2LRegs;   // Dwarf to LLVM regs mapping
-  DenseMap<unsigned, unsigned> EHDwarf2LRegs; // Dwarf to LLVM regs mapping EH
+  const uint16_t *RegLists;                   // Pointer to the reglists array
+  const uint16_t *SubRegIndices;              // Pointer to the subreg lookup
+                                              // array.
+  unsigned NumSubRegIndices;                  // Number of subreg indices.
+
+  unsigned L2DwarfRegsSize;
+  unsigned EHL2DwarfRegsSize;
+  unsigned Dwarf2LRegsSize;
+  unsigned EHDwarf2LRegsSize;
+  const DwarfLLVMRegPair *L2DwarfRegs;        // LLVM to Dwarf regs mapping
+  const DwarfLLVMRegPair *EHL2DwarfRegs;      // LLVM to Dwarf regs mapping EH
+  const DwarfLLVMRegPair *Dwarf2LRegs;        // Dwarf to LLVM regs mapping
+  const DwarfLLVMRegPair *EHDwarf2LRegs;      // Dwarf to LLVM regs mapping EH
   DenseMap<unsigned, int> L2SEHRegs;          // LLVM to SEH regs mapping
 
 public:
   /// InitMCRegisterInfo - Initialize MCRegisterInfo, called by TableGen
   /// auto-generated routines. *DO NOT USE*.
   void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR, unsigned RA,
-                          const MCRegisterClass *C, unsigned NC) {
+                          const MCRegisterClass *C, unsigned NC,
+                          const uint16_t *RL,
+                          const uint16_t *SubIndices,
+                          unsigned NumIndices) {
     Desc = D;
     NumRegs = NR;
     RAReg = RA;
     Classes = C;
+    RegLists = RL;
     NumClasses = NC;
+    SubRegIndices = SubIndices;
+    NumSubRegIndices = NumIndices;
   }
 
-  /// mapLLVMRegToDwarfReg - Used to initialize LLVM register to Dwarf
+  /// mapLLVMRegsToDwarfRegs - Used to initialize LLVM register to Dwarf
   /// register number mapping. Called by TableGen auto-generated routines.
   /// *DO NOT USE*.
-  void mapLLVMRegToDwarfReg(unsigned LLVMReg, int DwarfReg, bool isEH) {
-    if (isEH)
-      EHL2DwarfRegs[LLVMReg] = DwarfReg;
-    else
-      L2DwarfRegs[LLVMReg] = DwarfReg;
+  void mapLLVMRegsToDwarfRegs(const DwarfLLVMRegPair *Map, unsigned Size,
+                              bool isEH) {
+    if (isEH) {
+      EHL2DwarfRegs = Map;
+      EHL2DwarfRegsSize = Size;
+    } else {
+      L2DwarfRegs = Map;
+      L2DwarfRegsSize = Size;
+    }
   }
-    
-  /// mapDwarfRegToLLVMReg - Used to initialize Dwarf register to LLVM
+
+  /// mapDwarfRegsToLLVMRegs - Used to initialize Dwarf register to LLVM
   /// register number mapping. Called by TableGen auto-generated routines.
   /// *DO NOT USE*.
-  void mapDwarfRegToLLVMReg(unsigned DwarfReg, unsigned LLVMReg, bool isEH) {
-    if (isEH)
-      EHDwarf2LRegs[DwarfReg] = LLVMReg;
-    else
-      Dwarf2LRegs[DwarfReg] = LLVMReg;
+  void mapDwarfRegsToLLVMRegs(const DwarfLLVMRegPair *Map, unsigned Size,
+                              bool isEH) {
+    if (isEH) {
+      EHDwarf2LRegs = Map;
+      EHDwarf2LRegsSize = Size;
+    } else {
+      Dwarf2LRegs = Map;
+      Dwarf2LRegsSize = Size;
+    }
   }
-     
+
   /// mapLLVMRegToSEHReg - Used to initialize LLVM register to SEH register
   /// number mapping. By default the SEH register number is just the same
   /// as the LLVM register number.
@@ -212,9 +235,9 @@ public:
   /// register, or a null list of there are none.  The list returned is zero
   /// terminated.
   ///
-  const unsigned *getAliasSet(unsigned RegNo) const {
+  const uint16_t *getAliasSet(unsigned RegNo) const {
     // The Overlaps set always begins with Reg itself.
-    return get(RegNo).Overlaps + 1;
+    return RegLists + get(RegNo).Overlaps + 1;
   }
 
   /// getOverlaps - Return a list of registers that overlap Reg, including
@@ -222,8 +245,8 @@ public:
   /// list.
   /// These are exactly the registers in { x | regsOverlap(x, Reg) }.
   ///
-  const unsigned *getOverlaps(unsigned RegNo) const {
-    return get(RegNo).Overlaps;
+  const uint16_t *getOverlaps(unsigned RegNo) const {
+    return RegLists + get(RegNo).Overlaps;
   }
 
   /// getSubRegisters - Return the list of registers that are sub-registers of
@@ -231,8 +254,35 @@ public:
   /// returned is zero terminated and sorted according to super-sub register
   /// relations. e.g. X86::RAX's sub-register list is EAX, AX, AL, AH.
   ///
-  const unsigned *getSubRegisters(unsigned RegNo) const {
-    return get(RegNo).SubRegs;
+  const uint16_t *getSubRegisters(unsigned RegNo) const {
+    return RegLists + get(RegNo).SubRegs;
+  }
+
+  /// getSubReg - Returns the physical register number of sub-register "Index"
+  /// for physical register RegNo. Return zero if the sub-register does not
+  /// exist.
+  unsigned getSubReg(unsigned Reg, unsigned Idx) const {
+    return *(SubRegIndices + (Reg - 1) * NumSubRegIndices + Idx - 1);
+  }
+
+  /// getMatchingSuperReg - Return a super-register of the specified register
+  /// Reg so its sub-register of index SubIdx is Reg.
+  unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
+                               const MCRegisterClass *RC) const {
+    for (const uint16_t *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs)
+      if (Reg == getSubReg(SR, SubIdx) && RC->contains(SR))
+        return SR;
+    return 0;
+  }
+
+  /// getSubRegIndex - For a given register pair, return the sub-register index
+  /// if the second register is a sub-register of the first. Return zero
+  /// otherwise.
+  unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const {
+    for (unsigned I = 1; I <= NumSubRegIndices; ++I)
+      if (getSubReg(RegNo, I) == SubRegNo)
+        return I;
+    return 0;
   }
 
   /// getSuperRegisters - Return the list of registers that are super-registers
@@ -240,8 +290,8 @@ public:
   /// returned is zero terminated and sorted according to super-sub register
   /// relations. e.g. X86::AL's super-register list is AX, EAX, RAX.
   ///
-  const unsigned *getSuperRegisters(unsigned RegNo) const {
-    return get(RegNo).SuperRegs;
+  const uint16_t *getSuperRegisters(unsigned RegNo) const {
+    return RegLists + get(RegNo).SuperRegs;
   }
 
   /// getName - Return the human-readable symbolic target-specific name for the
@@ -261,22 +311,26 @@ public:
   /// parameter allows targets to use different numberings for EH info and
   /// debugging info.
   int getDwarfRegNum(unsigned RegNum, bool isEH) const {
-    const DenseMap<unsigned, int> &M = isEH ? EHL2DwarfRegs : L2DwarfRegs;
-    const DenseMap<unsigned, int>::const_iterator I = M.find(RegNum);
-    if (I == M.end()) return -1;
-    return I->second;
+    const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs;
+    unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize;
+
+    DwarfLLVMRegPair Key = { RegNum, 0 };
+    const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
+    if (I == M+Size || I->FromReg != RegNum)
+      return -1;
+    return I->ToReg;
   }
 
   /// getLLVMRegNum - Map a dwarf register back to a target register.
   ///
   int getLLVMRegNum(unsigned RegNum, bool isEH) const {
-    const DenseMap<unsigned, unsigned> &M = isEH ? EHDwarf2LRegs : Dwarf2LRegs;
-    const DenseMap<unsigned, unsigned>::const_iterator I = M.find(RegNum);
-    if (I == M.end()) {
-      assert(0 && "Invalid RegNum");
-      return -1;
-    }
-    return I->second;
+    const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs;
+    unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize;
+
+    DwarfLLVMRegPair Key = { RegNum, 0 };
+    const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
+    assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum");
+    return I->ToReg;
   }
 
   /// getSEHRegNum - Map a target register to an equivalent SEH register
@@ -301,7 +355,7 @@ public:
     return Classes[i];
   }
 };
- 
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 57008177b6d3..7da6534b6e88 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -14,12 +14,10 @@
 #ifndef LLVM_MC_MCSECTION_H
 #define LLVM_MC_MCSECTION_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Casting.h"
 
 namespace llvm {
-  class MCContext;
   class MCAsmInfo;
   class raw_ostream;
 
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index b154cf59d106..7eacde57f48f 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -15,8 +15,8 @@
 #define LLVM_MC_MCSECTIONCOFF_H
 
 #include "llvm/MC/MCSection.h"
-
 #include "llvm/Support/COFF.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index c82de7128202..7321ca83e897 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -16,6 +16,7 @@
 
 #include "llvm/MC/MCSection.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index bdb17e9008b2..15eb4f4a7685 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -15,6 +15,7 @@
 #define LLVM_MC_MCSECTIONMACHO_H
 
 #include "llvm/MC/MCSection.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 451efbff6e3a..25956008e021 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -23,7 +23,6 @@
 
 namespace llvm {
   class MCAsmBackend;
-  class MCAsmInfo;
   class MCCodeEmitter;
   class MCContext;
   class MCExpr;
@@ -32,7 +31,6 @@ namespace llvm {
   class MCSection;
   class MCSymbol;
   class StringRef;
-  class TargetLoweringObjectFile;
   class Twine;
   class raw_ostream;
   class formatted_raw_ostream;
@@ -94,6 +92,10 @@ namespace llvm {
 
     const MCExpr *ForceExpAbs(const MCExpr* Expr);
 
+    void RecordProcStart(MCDwarfFrameInfo &Frame);
+    virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
+    void RecordProcEnd(MCDwarfFrameInfo &Frame);
+    virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame);
     void EmitFrames(bool usingCFI);
 
     MCWin64EHUnwindInfo *getCurrentW64UnwindInfo(){return CurrentW64UnwindInfo;}
@@ -334,6 +336,11 @@ namespace llvm {
     /// EndCOFFSymbolDef - Marks the end of the symbol definition.
     virtual void EndCOFFSymbolDef() = 0;
 
+    /// EmitCOFFSecRel32 - Emits a COFF section relative relocation.
+    ///
+    /// @param Symbol - Symbol the section relative realocation should point to.
+    virtual void EmitCOFFSecRel32(MCSymbol const *Symbol);
+
     /// EmitELFSize - Emit an ELF .size directive.
     ///
     /// This corresponds to an assembler statement such as:
@@ -420,7 +427,8 @@ namespace llvm {
 
     /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
     /// client having to pass in a MCExpr for constant integers.
-    void EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace = 0);
+    void EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace = 0,
+                             unsigned Padding = 0);
 
     /// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
     /// client having to pass in a MCExpr for constant integers.
@@ -431,6 +439,13 @@ namespace llvm {
     void EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
                          unsigned AddrSpace = 0);
 
+    /// EmitGPRel64Value - Emit the expression @p Value into the output as a
+    /// gprel64 (64-bit GP relative) value.
+    ///
+    /// This is used to implement assembler directives such as .gpdword on
+    /// targets that support them.
+    virtual void EmitGPRel64Value(const MCExpr *Value);
+
     /// EmitGPRel32Value - Emit the expression @p Value into the output as a
     /// gprel32 (32-bit GP relative) value.
     ///
@@ -493,7 +508,8 @@ namespace llvm {
     /// @param Offset - The offset to reach. This may be an expression, but the
     /// expression must be associated with the current section.
     /// @param Value - The value to use when filling bytes.
-    virtual void EmitValueToOffset(const MCExpr *Offset,
+    /// @return false on success, true if the offset was invalid.
+    virtual bool EmitValueToOffset(const MCExpr *Offset,
                                    unsigned char Value = 0) = 0;
 
     /// @}
@@ -505,7 +521,8 @@ namespace llvm {
     /// EmitDwarfFileDirective - Associate a filename with a specified logical
     /// file number.  This implements the DWARF2 '.file 4 "foo.c"' assembler
     /// directive.
-    virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename);
+    virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+                                        StringRef Filename);
 
     /// EmitDwarfLocDirective - This implements the DWARF2
     // '.loc fileno lineno ...' assembler directive.
@@ -529,8 +546,8 @@ namespace llvm {
 
     virtual void EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding);
     virtual void EmitCFISections(bool EH, bool Debug);
-    virtual void EmitCFIStartProc();
-    virtual void EmitCFIEndProc();
+    void EmitCFIStartProc();
+    void EmitCFIEndProc();
     virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
     virtual void EmitCFIDefCfaOffset(int64_t Offset);
     virtual void EmitCFIDefCfaRegister(int64_t Register);
@@ -540,8 +557,11 @@ namespace llvm {
     virtual void EmitCFIRememberState();
     virtual void EmitCFIRestoreState();
     virtual void EmitCFISameValue(int64_t Register);
+    virtual void EmitCFIRestore(int64_t Register);
     virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
     virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
+    virtual void EmitCFIEscape(StringRef Values);
+    virtual void EmitCFISignalFrame();
 
     virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
     virtual void EmitWin64EHEndProc();
@@ -581,8 +601,10 @@ namespace llvm {
     virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
                              bool isVector);
 
+    /// FinishImpl - Streamer specific finalization.
+    virtual void FinishImpl() = 0;
     /// Finish - Finish emission of machine code.
-    virtual void Finish() = 0;
+    void Finish();
   };
 
   /// createNullStreamer - Create a dummy machine code streamer, which does
@@ -613,6 +635,7 @@ namespace llvm {
                                 bool isVerboseAsm,
                                 bool useLoc,
                                 bool useCFI,
+                                bool useDwarfDirectory,
                                 MCInstPrinter *InstPrint = 0,
                                 MCCodeEmitter *CE = 0,
                                 MCAsmBackend *TAB = 0,
@@ -638,14 +661,8 @@ namespace llvm {
   /// createELFStreamer - Create a machine code streamer which will generate
   /// ELF format object files.
   MCStreamer *createELFStreamer(MCContext &Ctx, MCAsmBackend &TAB,
-				raw_ostream &OS, MCCodeEmitter *CE,
-				bool RelaxAll, bool NoExecStack);
-
-  /// createLoggingStreamer - Create a machine code streamer which just logs the
-  /// API calls and then dispatches to another streamer.
-  ///
-  /// The new streamer takes ownership of the \arg Child.
-  MCStreamer *createLoggingStreamer(MCStreamer *Child, raw_ostream &OS);
+                                raw_ostream &OS, MCCodeEmitter *CE,
+                                bool RelaxAll, bool NoExecStack);
 
   /// createPureStreamer - Create a machine code streamer which will generate
   /// "pure" MC object files, for use with MC-JIT and testing tools.
diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h
new file mode 100644
index 000000000000..7a0b1ffaf0a0
--- /dev/null
+++ b/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -0,0 +1,36 @@
+//===-- llvm/MC/MCWinCOFFObjectWriter.h - Win COFF Object Writer *- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCWINCOFFOBJECTWRITER_H
+#define LLVM_MC_MCWINCOFFOBJECTWRITER_H
+
+namespace llvm {
+  class MCWinCOFFObjectTargetWriter {
+    const unsigned Machine;
+
+  protected:
+    MCWinCOFFObjectTargetWriter(unsigned Machine_);
+
+  public:
+    virtual ~MCWinCOFFObjectTargetWriter() {}
+
+    unsigned getMachine() const { return Machine; }
+    virtual unsigned getRelocType(unsigned FixupKind) const = 0;
+  };
+
+  /// \brief Construct a new Win COFF writer instance.
+  ///
+  /// \param MOTW - The target specific WinCOFF writer subclass.
+  /// \param OS - The stream to write to.
+  /// \returns The constructed object writer.
+  MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
+                                            raw_ostream &OS);
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index 887e33c7a181..73579861ec41 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -36,30 +36,27 @@ template<typename ValueSubClass, typename ItemParentClass>
 /// These are used to efficiently contain a byte sequence for metadata.
 /// MDString is always unnamed.
 class MDString : public Value {
+  virtual void anchor();
   MDString(const MDString &);            // DO NOT IMPLEMENT
 
-  StringRef Str;
-  explicit MDString(LLVMContext &C, StringRef S);
-
+  explicit MDString(LLVMContext &C);
 public:
   static MDString *get(LLVMContext &Context, StringRef Str);
   static MDString *get(LLVMContext &Context, const char *Str) {
     return get(Context, Str ? StringRef(Str) : StringRef());
   }
 
-  StringRef getString() const { return Str; }
+  StringRef getString() const { return getName(); }
 
-  unsigned getLength() const { return (unsigned)Str.size(); }
+  unsigned getLength() const { return (unsigned)getName().size(); }
 
   typedef StringRef::iterator iterator;
   
   /// begin() - Pointer to the first byte of the string.
-  ///
-  iterator begin() const { return Str.begin(); }
+  iterator begin() const { return getName().begin(); }
 
   /// end() - Pointer to one byte past the end of the string.
-  ///
-  iterator end() const { return Str.end(); }
+  iterator end() const { return getName().end(); }
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const MDString *) { return true; }
@@ -78,6 +75,10 @@ class MDNode : public Value, public FoldingSetNode {
   void operator=(const MDNode &);        // DO NOT IMPLEMENT
   friend class MDNodeOperand;
   friend class LLVMContextImpl;
+  friend struct FoldingSetTrait<MDNode>;
+
+  /// Hash - If the MDNode is uniqued cache the hash to speed up lookup.
+  unsigned Hash;
 
   /// NumOperands - This many 'MDNodeOperand' items are co-allocated onto the
   /// end of this MDNode.
@@ -134,6 +135,9 @@ public:
   /// deleteTemporary - Deallocate a node created by getTemporary. The
   /// node must not have any users.
   static void deleteTemporary(MDNode *N);
+
+  /// replaceOperandWith - Replace a specific operand.
+  void replaceOperandWith(unsigned i, Value *NewVal);
   
   /// getOperand - Return specified operand.
   Value *getOperand(unsigned i) const;
@@ -225,6 +229,9 @@ public:
 
   /// print - Implement operator<< on NamedMDNode.
   void print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW = 0) const;
+
+  /// dump() - Allow printing of NamedMDNodes from the debugger.
+  void dump() const;
 };
 
 } // end llvm namespace
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index 8ce5ec4f1d14..b9c98814f159 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -30,8 +30,7 @@ class GVMaterializer;
 class LLVMContext;
 class StructType;
 template<typename T> struct DenseMapInfo;
-template<typename KeyT, typename ValueT, 
-         typename KeyInfoT, typename ValueInfoT> class DenseMap;
+template<typename KeyT, typename ValueT, typename KeyInfoT> class DenseMap;
 
 template<> struct ilist_traits<Function>
   : public SymbolTableListTraits<Function, Module> {
@@ -154,6 +153,39 @@ public:
   /// An enumeration for describing the size of a pointer on the target machine.
   enum PointerSize { AnyPointerSize, Pointer32, Pointer64 };
 
+  /// An enumeration for the supported behaviors of module flags. The following
+  /// module flags behavior values are supported:
+  ///
+  ///    Value        Behavior
+  ///    -----        --------
+  ///      1          Error
+  ///                   Emits an error if two values disagree.
+  ///
+  ///      2          Warning
+  ///                   Emits a warning if two values disagree.
+  ///
+  ///      3          Require
+  ///                   Emits an error when the specified value is not present
+  ///                   or doesn't have the specified value. It is an error for
+  ///                   two (or more) llvm.module.flags with the same ID to have
+  ///                   the Require behavior but different values. There may be
+  ///                   multiple Require flags per ID.
+  ///
+  ///      4          Override
+  ///                   Uses the specified value if the two values disagree. It
+  ///                   is an error for two (or more) llvm.module.flags with the
+  ///                   same ID to have the Override behavior but different
+  ///                   values.
+  enum ModFlagBehavior { Error = 1, Warning  = 2, Require = 3, Override = 4 };
+
+  struct ModuleFlagEntry {
+    ModFlagBehavior Behavior;
+    MDString *Key;
+    Value *Val;
+    ModuleFlagEntry(ModFlagBehavior B, MDString *K, Value *V)
+      : Behavior(B), Key(K), Val(V) {}
+  };
+
 /// @}
 /// @name Member Variables
 /// @{
@@ -266,8 +298,8 @@ public:
   void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
 
   
-  typedef DenseMap<StructType*, unsigned, DenseMapInfo<StructType*>,
-                   DenseMapInfo<unsigned> > NumeredTypesMapTy;
+  typedef DenseMap<StructType*, unsigned, DenseMapInfo<StructType*> >
+                   NumeredTypesMapTy;
 
   /// findUsedStructTypes - Walk the entire module and find all of the
   /// struct types that are in use, returning them in a vector.
@@ -373,6 +405,30 @@ public:
   void eraseNamedMetadata(NamedMDNode *NMD);
 
 /// @}
+/// @name Module Flags Accessors
+/// @{
+
+  /// getModuleFlagsMetadata - Returns the module flags in the provided vector.
+  void getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const;
+
+  /// getModuleFlagsMetadata - Returns the NamedMDNode in the module that
+  /// represents module-level flags. This method returns null if there are no
+  /// module-level flags.
+  NamedMDNode *getModuleFlagsMetadata() const;
+
+  /// getOrInsertModuleFlagsMetadata - Returns the NamedMDNode in the module
+  /// that represents module-level flags. If module-level flags aren't found,
+  /// it creates the named metadata that contains them.
+  NamedMDNode *getOrInsertModuleFlagsMetadata();
+
+  /// addModuleFlag - Add a module-level flag to the module-level flags
+  /// metadata. It will create the module-level flags named metadata if it
+  /// doesn't already exist.
+  void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, Value *Val);
+  void addModuleFlag(ModFlagBehavior Behavior, StringRef Key, uint32_t Val);
+  void addModuleFlag(MDNode *Node);
+
+/// @}
 /// @name Materialization
 /// @{
 
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index 4f081206c5bc..358b27a416cd 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -22,6 +22,7 @@ namespace llvm {
 namespace object {
 
 class Archive : public Binary {
+  virtual void anchor();
 public:
   class Child {
     const Archive *Parent;
@@ -34,6 +35,10 @@ public:
       return (Parent == other.Parent) && (Data.begin() == other.Data.begin());
     }
 
+    bool operator <(const Child &other) const {
+      return Data.begin() < other.Data.begin();
+    }
+
     Child getNext() const;
     error_code getName(StringRef &Result) const;
     int getLastModified() const;
@@ -50,6 +55,7 @@ public:
   class child_iterator {
     Child child;
   public:
+    child_iterator() : child(Child(0, StringRef())) {}
     child_iterator(const Child &c) : child(c) {}
     const Child* operator->() const {
       return &child;
@@ -63,24 +69,73 @@ public:
       return !(*this == other);
     }
 
+    bool operator <(const child_iterator &other) const {
+      return child < other.child;
+    }
+
     child_iterator& operator++() {  // Preincrement
       child = child.getNext();
       return *this;
     }
   };
 
+  class Symbol {
+    const Archive *Parent;
+    uint32_t SymbolIndex;
+    uint32_t StringIndex; // Extra index to the string.
+
+  public:
+    bool operator ==(const Symbol &other) const {
+      return (Parent == other.Parent) && (SymbolIndex == other.SymbolIndex);
+    }
+
+    Symbol(const Archive *p, uint32_t symi, uint32_t stri)
+      : Parent(p)
+      , SymbolIndex(symi)
+      , StringIndex(stri) {}
+    error_code getName(StringRef &Result) const;
+    error_code getMember(child_iterator &Result) const;
+    Symbol getNext() const;
+  };
+
+  class symbol_iterator {
+    Symbol symbol;
+  public:
+    symbol_iterator(const Symbol &s) : symbol(s) {}
+    const Symbol *operator->() const {
+      return &symbol;
+    }
+
+    bool operator==(const symbol_iterator &other) const {
+      return symbol == other.symbol;
+    }
+
+    bool operator!=(const symbol_iterator &other) const {
+      return !(*this == other);
+    }
+
+    symbol_iterator& operator++() {  // Preincrement
+      symbol = symbol.getNext();
+      return *this;
+    }
+  };
+
   Archive(MemoryBuffer *source, error_code &ec);
 
-  child_iterator begin_children() const;
+  child_iterator begin_children(bool skip_internal = true) const;
   child_iterator end_children() const;
 
+  symbol_iterator begin_symbols() const;
+  symbol_iterator end_symbols() const;
+
   // Cast methods.
   static inline bool classof(Archive const *v) { return true; }
   static inline bool classof(Binary const *v) {
-    return v->getType() == Binary::isArchive;
+    return v->isArchive();
   }
 
 private:
+  child_iterator SymbolTable;
   child_iterator StringTable;
 };
 
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index cd092fd8e485..77a08d597c4d 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -37,16 +37,25 @@ protected:
   Binary(unsigned int Type, MemoryBuffer *Source);
 
   enum {
-    isArchive,
-
+    ID_Archive,
     // Object and children.
-    isObject,
-    isCOFF,
-    isELF,
-    isMachO,
-    lastObject
+    ID_StartObjects,
+    ID_COFF,
+    ID_ELF32L, // ELF 32-bit, little endian
+    ID_ELF32B, // ELF 32-bit, big endian
+    ID_ELF64L, // ELF 64-bit, little endian
+    ID_ELF64B, // ELF 64-bit, big endian
+    ID_MachO,
+    ID_EndObjects
   };
 
+  static inline unsigned int getELFType(bool isLittleEndian, bool is64Bits) {
+    if (isLittleEndian)
+      return is64Bits ? ID_ELF64L : ID_ELF32L;
+    else
+      return is64Bits ? ID_ELF64B : ID_ELF32B;
+  }
+
 public:
   virtual ~Binary();
 
@@ -56,9 +65,37 @@ public:
   // Cast methods.
   unsigned int getType() const { return TypeID; }
   static inline bool classof(const Binary *v) { return true; }
+
+  // Convenience methods
+  bool isObject() const {
+    return TypeID > ID_StartObjects && TypeID < ID_EndObjects;
+  }
+
+  bool isArchive() const {
+    return TypeID == ID_Archive;
+  }
+
+  bool isELF() const {
+    return TypeID >= ID_ELF32L && TypeID <= ID_ELF64B;
+  }
+
+  bool isMachO() const {
+    return TypeID == ID_MachO;
+  }
+
+  bool isCOFF() const {
+    return TypeID == ID_COFF;
+  }
 };
 
+/// @brief Create a Binary from Source, autodetecting the file type.
+///
+/// @param Source The data to create the Binary from. Ownership is transfered
+///        to Result if successful. If an error is returned, Source is destroyed
+///        by createBinary before returning.
+/// @param Result A pointer to the resulting Binary if no error occured.
 error_code createBinary(MemoryBuffer *Source, OwningPtr<Binary> &Result);
+
 error_code createBinary(StringRef Path, OwningPtr<Binary> &Result);
 
 }
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index 067bcd471ae9..68b5ca1bc781 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -19,6 +19,9 @@
 #include "llvm/Support/Endian.h"
 
 namespace llvm {
+  template <typename T>
+  class ArrayRef;
+
 namespace object {
 
 struct coff_file_header {
@@ -45,13 +48,18 @@ struct coff_symbol {
   support::ulittle32_t Value;
   support::little16_t SectionNumber;
 
-  struct {
-    support::ulittle8_t BaseType;
-    support::ulittle8_t ComplexType;
-  } Type;
+  support::ulittle16_t Type;
 
   support::ulittle8_t  StorageClass;
   support::ulittle8_t  NumberOfAuxSymbols;
+
+  uint8_t getBaseType() const {
+    return Type & 0x0F;
+  }
+
+  uint8_t getComplexType() const {
+    return (Type & 0xF0) >> 4;
+  }
 };
 
 struct coff_section {
@@ -73,6 +81,16 @@ struct coff_relocation {
   support::ulittle16_t Type;
 };
 
+struct coff_aux_section_definition {
+  support::ulittle32_t Length;
+  support::ulittle16_t NumberOfRelocations;
+  support::ulittle16_t NumberOfLinenumbers;
+  support::ulittle32_t CheckSum;
+  support::ulittle16_t Number;
+  support::ulittle8_t Selection;
+  char Unused[3];
+};
+
 class COFFObjectFile : public ObjectFile {
 private:
   const coff_file_header *Header;
@@ -81,11 +99,7 @@ private:
   const char             *StringTable;
         uint32_t          StringTableSize;
 
-        error_code        getSection(int32_t index,
-                                     const coff_section *&Res) const;
         error_code        getString(uint32_t offset, StringRef &Res) const;
-        error_code        getSymbol(uint32_t index,
-                                    const coff_symbol *&Res) const;
 
   const coff_symbol      *toSymb(DataRefImpl Symb) const;
   const coff_section     *toSec(DataRefImpl Sec) const;
@@ -94,13 +108,14 @@ private:
 protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
-  virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
-  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
-  virtual error_code isSymbolGlobal(DataRefImpl Symb, bool &Res) const;
-  virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::SymbolType &Res) const;
+  virtual error_code getSymbolFlags(DataRefImpl Symb, uint32_t &Res) const;
+  virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const;
+  virtual error_code getSymbolSection(DataRefImpl Symb,
+                                      section_iterator &Res) const;
 
   virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
   virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
@@ -111,6 +126,10 @@ protected:
   virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
   virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const;
   virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const;
+  virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const;
+  virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const;
+  virtual error_code isSectionRequiredForExecution(DataRefImpl Sec,
+                                                   bool &Res) const;
   virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
                                            bool &Result) const;
   virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const;
@@ -120,10 +139,12 @@ protected:
                                        RelocationRef &Res) const;
   virtual error_code getRelocationAddress(DataRefImpl Rel,
                                           uint64_t &Res) const;
+  virtual error_code getRelocationOffset(DataRefImpl Rel,
+                                         uint64_t &Res) const;
   virtual error_code getRelocationSymbol(DataRefImpl Rel,
                                          SymbolRef &Res) const;
   virtual error_code getRelocationType(DataRefImpl Rel,
-                                       uint32_t &Res) const;
+                                       uint64_t &Res) const;
   virtual error_code getRelocationTypeName(DataRefImpl Rel,
                                            SmallVectorImpl<char> &Result) const;
   virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel,
@@ -131,16 +152,46 @@ protected:
   virtual error_code getRelocationValueString(DataRefImpl Rel,
                                            SmallVectorImpl<char> &Result) const;
 
+  virtual error_code getLibraryNext(DataRefImpl LibData,
+                                    LibraryRef &Result) const;
+  virtual error_code getLibraryPath(DataRefImpl LibData,
+                                    StringRef &Result) const;
+
 public:
   COFFObjectFile(MemoryBuffer *Object, error_code &ec);
   virtual symbol_iterator begin_symbols() const;
   virtual symbol_iterator end_symbols() const;
+  virtual symbol_iterator begin_dynamic_symbols() const;
+  virtual symbol_iterator end_dynamic_symbols() const;
+  virtual library_iterator begin_libraries_needed() const;
+  virtual library_iterator end_libraries_needed() const;
   virtual section_iterator begin_sections() const;
   virtual section_iterator end_sections() const;
 
   virtual uint8_t getBytesInAddress() const;
   virtual StringRef getFileFormatName() const;
   virtual unsigned getArch() const;
+  virtual StringRef getLoadName() const;
+
+  error_code getHeader(const coff_file_header *&Res) const;
+  error_code getSection(int32_t index, const coff_section *&Res) const;
+  error_code getSymbol(uint32_t index, const coff_symbol *&Res) const;
+  template <typename T>
+  error_code getAuxSymbol(uint32_t index, const T *&Res) const {
+    const coff_symbol *s;
+    error_code ec = getSymbol(index, s);
+    Res = reinterpret_cast<const T*>(s);
+    return ec;
+  }
+  error_code getSymbolName(const coff_symbol *symbol, StringRef &Res) const;
+  error_code getSectionName(const coff_section *Sec, StringRef &Res) const;
+  error_code getSectionContents(const coff_section *Sec,
+                                ArrayRef<uint8_t> &Res) const;
+
+  static inline bool classof(const Binary *v) {
+    return v->isCOFF();
+  }
+  static inline bool classof(const COFFObjectFile *v) { return true; }
 };
 
 }
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
new file mode 100644
index 000000000000..0828985f2e9b
--- /dev/null
+++ b/include/llvm/Object/ELF.h
@@ -0,0 +1,2209 @@
+//===- ELF.h - ELF object file implementation -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ELFObjectFile template class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_ELF_H
+#define LLVM_OBJECT_ELF_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <limits>
+#include <utility>
+
+namespace llvm {
+namespace object {
+
+// Templates to choose Elf_Addr and Elf_Off depending on is64Bits.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelperCommon {
+  typedef support::detail::packed_endian_specific_integral
+    <uint16_t, target_endianness, support::aligned> Elf_Half;
+  typedef support::detail::packed_endian_specific_integral
+    <uint32_t, target_endianness, support::aligned> Elf_Word;
+  typedef support::detail::packed_endian_specific_integral
+    <int32_t, target_endianness, support::aligned> Elf_Sword;
+  typedef support::detail::packed_endian_specific_integral
+    <uint64_t, target_endianness, support::aligned> Elf_Xword;
+  typedef support::detail::packed_endian_specific_integral
+    <int64_t, target_endianness, support::aligned> Elf_Sxword;
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct ELFDataTypeTypedefHelper;
+
+/// ELF 32bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, false>
+  : ELFDataTypeTypedefHelperCommon<target_endianness> {
+  typedef uint32_t value_type;
+  typedef support::detail::packed_endian_specific_integral
+    <value_type, target_endianness, support::aligned> Elf_Addr;
+  typedef support::detail::packed_endian_specific_integral
+    <value_type, target_endianness, support::aligned> Elf_Off;
+};
+
+/// ELF 64bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, true>
+  : ELFDataTypeTypedefHelperCommon<target_endianness>{
+  typedef uint64_t value_type;
+  typedef support::detail::packed_endian_specific_integral
+    <value_type, target_endianness, support::aligned> Elf_Addr;
+  typedef support::detail::packed_endian_specific_integral
+    <value_type, target_endianness, support::aligned> Elf_Off;
+};
+
+// I really don't like doing this, but the alternative is copypasta.
+#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Addr Elf_Addr; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Off Elf_Off; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Half Elf_Half; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Word Elf_Word; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sword Elf_Sword; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Xword Elf_Xword; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sxword Elf_Sxword;
+
+  // Section header.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Word sh_name;     // Section name (index into string table)
+  Elf_Word sh_type;     // Section type (SHT_*)
+  Elf_Word sh_flags;    // Section flags (SHF_*)
+  Elf_Addr sh_addr;     // Address where section is to be loaded
+  Elf_Off  sh_offset;   // File offset of section data, in bytes
+  Elf_Word sh_size;     // Size of section, in bytes
+  Elf_Word sh_link;     // Section type-specific header table index link
+  Elf_Word sh_info;     // Section type-specific extra information
+  Elf_Word sh_addralign;// Section address alignment
+  Elf_Word sh_entsize;  // Size of records contained within the section
+};
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Word  sh_name;     // Section name (index into string table)
+  Elf_Word  sh_type;     // Section type (SHT_*)
+  Elf_Xword sh_flags;    // Section flags (SHF_*)
+  Elf_Addr  sh_addr;     // Address where section is to be loaded
+  Elf_Off   sh_offset;   // File offset of section data, in bytes
+  Elf_Xword sh_size;     // Size of section, in bytes
+  Elf_Word  sh_link;     // Section type-specific header table index link
+  Elf_Word  sh_info;     // Section type-specific extra information
+  Elf_Xword sh_addralign;// Section address alignment
+  Elf_Xword sh_entsize;  // Size of records contained within the section
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Impl : Elf_Shdr_Base<target_endianness, is64Bits> {
+  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_entsize;
+  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_size;
+
+  /// @brief Get the number of entities this section contains if it has any.
+  unsigned getEntityCount() const {
+    if (sh_entsize == 0)
+      return 0;
+    return sh_size / sh_entsize;
+  }
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Word      st_name;  // Symbol name (index into string table)
+  Elf_Addr      st_value; // Value or address associated with the symbol
+  Elf_Word      st_size;  // Size of the symbol
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf_Half      st_shndx; // Which section (header table index) it's defined in
+};
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Word      st_name;  // Symbol name (index into string table)
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf_Half      st_shndx; // Which section (header table index) it's defined in
+  Elf_Addr      st_value; // Value or address associated with the symbol
+  Elf_Xword     st_size;  // Size of the symbol
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
+  using Elf_Sym_Base<target_endianness, is64Bits>::st_info;
+
+  // These accessors and mutators correspond to the ELF32_ST_BIND,
+  // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
+  unsigned char getBinding() const { return st_info >> 4; }
+  unsigned char getType() const { return st_info & 0x0f; }
+  void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
+  void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
+  void setBindingAndType(unsigned char b, unsigned char t) {
+    st_info = (b << 4) + (t & 0x0f);
+  }
+};
+
+/// Elf_Versym: This is the structure of entries in the SHT_GNU_versym section
+/// (.gnu.version). This structure is identical for ELF32 and ELF64.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Versym_Impl {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  Elf_Half vs_index;   // Version index with flags (e.g. VERSYM_HIDDEN)
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Verdaux_Impl;
+
+/// Elf_Verdef: This is the structure of entries in the SHT_GNU_verdef section
+/// (.gnu.version_d). This structure is identical for ELF32 and ELF64.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Verdef_Impl {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  typedef Elf_Verdaux_Impl<target_endianness, is64Bits> Elf_Verdaux;
+  Elf_Half vd_version; // Version of this structure (e.g. VER_DEF_CURRENT)
+  Elf_Half vd_flags;   // Bitwise flags (VER_DEF_*)
+  Elf_Half vd_ndx;     // Version index, used in .gnu.version entries
+  Elf_Half vd_cnt;     // Number of Verdaux entries
+  Elf_Word vd_hash;    // Hash of name
+  Elf_Word vd_aux;     // Offset to the first Verdaux entry (in bytes)
+  Elf_Word vd_next;    // Offset to the next Verdef entry (in bytes)
+
+  /// Get the first Verdaux entry for this Verdef.
+  const Elf_Verdaux *getAux() const {
+    return reinterpret_cast<const Elf_Verdaux*>((const char*)this + vd_aux);
+  }
+};
+
+/// Elf_Verdaux: This is the structure of auxilary data in the SHT_GNU_verdef
+/// section (.gnu.version_d). This structure is identical for ELF32 and ELF64.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Verdaux_Impl {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  Elf_Word vda_name; // Version name (offset in string table)
+  Elf_Word vda_next; // Offset to next Verdaux entry (in bytes)
+};
+
+/// Elf_Verneed: This is the structure of entries in the SHT_GNU_verneed
+/// section (.gnu.version_r). This structure is identical for ELF32 and ELF64.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Verneed_Impl {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  Elf_Half vn_version; // Version of this structure (e.g. VER_NEED_CURRENT)
+  Elf_Half vn_cnt;     // Number of associated Vernaux entries
+  Elf_Word vn_file;    // Library name (string table offset)
+  Elf_Word vn_aux;     // Offset to first Vernaux entry (in bytes)
+  Elf_Word vn_next;    // Offset to next Verneed entry (in bytes)
+};
+
+/// Elf_Vernaux: This is the structure of auxiliary data in SHT_GNU_verneed
+/// section (.gnu.version_r). This structure is identical for ELF32 and ELF64.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Vernaux_Impl {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+  Elf_Word vna_hash;  // Hash of dependency name
+  Elf_Half vna_flags; // Bitwise Flags (VER_FLAG_*)
+  Elf_Half vna_other; // Version index, used in .gnu.version entries
+  Elf_Word vna_name;  // Dependency name
+  Elf_Word vna_next;  // Offset to next Vernaux entry (in bytes)
+};
+
+/// Elf_Dyn_Base: This structure matches the form of entries in the dynamic
+///               table section (.dynamic) look like.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Dyn_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Dyn_Base<target_endianness, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Sword d_tag;
+  union {
+    Elf_Word d_val;
+    Elf_Addr d_ptr;
+  } d_un;
+};
+
+template<support::endianness target_endianness>
+struct Elf_Dyn_Base<target_endianness, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Sxword d_tag;
+  union {
+    Elf_Xword d_val;
+    Elf_Addr d_ptr;
+  } d_un;
+};
+
+/// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters and setters.
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Dyn_Impl : Elf_Dyn_Base<target_endianness, is64Bits> {
+  using Elf_Dyn_Base<target_endianness, is64Bits>::d_tag;
+  using Elf_Dyn_Base<target_endianness, is64Bits>::d_un;
+  int64_t getTag() const { return d_tag; }
+  uint64_t getVal() const { return d_un.d_val; }
+  uint64_t getPtr() const { return d_un.ptr; }
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+class ELFObjectFile;
+
+// DynRefImpl: Reference to an entry in the dynamic table
+// This is an ELF-specific interface.
+template<support::endianness target_endianness, bool is64Bits>
+class DynRefImpl {
+  typedef Elf_Dyn_Impl<target_endianness, is64Bits> Elf_Dyn;
+  typedef ELFObjectFile<target_endianness, is64Bits> OwningType;
+
+  DataRefImpl DynPimpl;
+  const OwningType *OwningObject;
+
+public:
+  DynRefImpl() : OwningObject(NULL) { }
+
+  DynRefImpl(DataRefImpl DynP, const OwningType *Owner);
+
+  bool operator==(const DynRefImpl &Other) const;
+  bool operator <(const DynRefImpl &Other) const;
+
+  error_code getNext(DynRefImpl &Result) const;
+  int64_t getTag() const;
+  uint64_t getVal() const;
+  uint64_t getPtr() const;
+
+  DataRefImpl getRawDataRefImpl() const;
+};
+
+// Elf_Rel: Elf Relocation
+template<support::endianness target_endianness, bool is64Bits, bool isRela>
+struct Elf_Rel_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Rel_Base<target_endianness, false, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
+  Elf_Word      r_info;  // Symbol table index and type of relocation to apply
+};
+
+template<support::endianness target_endianness>
+struct Elf_Rel_Base<target_endianness, true, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
+  Elf_Xword     r_info;   // Symbol table index and type of relocation to apply
+};
+
+template<support::endianness target_endianness>
+struct Elf_Rel_Base<target_endianness, false, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
+  Elf_Word      r_info;   // Symbol table index and type of relocation to apply
+  Elf_Sword     r_addend; // Compute value for relocatable field by adding this
+};
+
+template<support::endianness target_endianness>
+struct Elf_Rel_Base<target_endianness, true, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
+  Elf_Xword     r_info;   // Symbol table index and type of relocation to apply
+  Elf_Sxword    r_addend; // Compute value for relocatable field by adding this.
+};
+
+template<support::endianness target_endianness, bool is64Bits, bool isRela>
+struct Elf_Rel_Impl;
+
+template<support::endianness target_endianness, bool isRela>
+struct Elf_Rel_Impl<target_endianness, true, isRela>
+       : Elf_Rel_Base<target_endianness, true, isRela> {
+  using Elf_Rel_Base<target_endianness, true, isRela>::r_info;
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+
+  // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
+  // and ELF64_R_INFO macros defined in the ELF specification:
+  uint64_t getSymbol() const { return (r_info >> 32); }
+  unsigned char getType() const {
+    return (unsigned char) (r_info & 0xffffffffL);
+  }
+  void setSymbol(uint64_t s) { setSymbolAndType(s, getType()); }
+  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(uint64_t s, unsigned char t) {
+    r_info = (s << 32) + (t&0xffffffffL);
+  }
+};
+
+template<support::endianness target_endianness, bool isRela>
+struct Elf_Rel_Impl<target_endianness, false, isRela>
+       : Elf_Rel_Base<target_endianness, false, isRela> {
+  using Elf_Rel_Base<target_endianness, false, isRela>::r_info;
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+
+  // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
+  // and ELF32_R_INFO macros defined in the ELF specification:
+  uint32_t getSymbol() const { return (r_info >> 8); }
+  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
+  void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); }
+  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(uint32_t s, unsigned char t) {
+    r_info = (s << 8) + t;
+  }
+};
+
+
+template<support::endianness target_endianness, bool is64Bits>
+class ELFObjectFile : public ObjectFile {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+
+  typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
+  typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
+  typedef Elf_Dyn_Impl<target_endianness, is64Bits> Elf_Dyn;
+  typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel;
+  typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela;
+  typedef Elf_Verdef_Impl<target_endianness, is64Bits> Elf_Verdef;
+  typedef Elf_Verdaux_Impl<target_endianness, is64Bits> Elf_Verdaux;
+  typedef Elf_Verneed_Impl<target_endianness, is64Bits> Elf_Verneed;
+  typedef Elf_Vernaux_Impl<target_endianness, is64Bits> Elf_Vernaux;
+  typedef Elf_Versym_Impl<target_endianness, is64Bits> Elf_Versym;
+  typedef DynRefImpl<target_endianness, is64Bits> DynRef;
+  typedef content_iterator<DynRef> dyn_iterator;
+
+protected:
+  struct Elf_Ehdr {
+    unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
+    Elf_Half e_type;     // Type of file (see ET_*)
+    Elf_Half e_machine;  // Required architecture for this file (see EM_*)
+    Elf_Word e_version;  // Must be equal to 1
+    Elf_Addr e_entry;    // Address to jump to in order to start program
+    Elf_Off  e_phoff;    // Program header table's file offset, in bytes
+    Elf_Off  e_shoff;    // Section header table's file offset, in bytes
+    Elf_Word e_flags;    // Processor-specific flags
+    Elf_Half e_ehsize;   // Size of ELF header, in bytes
+    Elf_Half e_phentsize;// Size of an entry in the program header table
+    Elf_Half e_phnum;    // Number of entries in the program header table
+    Elf_Half e_shentsize;// Size of an entry in the section header table
+    Elf_Half e_shnum;    // Number of entries in the section header table
+    Elf_Half e_shstrndx; // Section header table index of section name
+                                  // string table
+    bool checkMagic() const {
+      return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
+    }
+    unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
+    unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
+  };
+  // This flag is used for classof, to distinguish ELFObjectFile from
+  // its subclass. If more subclasses will be created, this flag will
+  // have to become an enum.
+  bool isDyldELFObject;
+
+private:
+  typedef SmallVector<const Elf_Shdr*, 1> Sections_t;
+  typedef DenseMap<unsigned, unsigned> IndexMap_t;
+  typedef DenseMap<const Elf_Shdr*, SmallVector<uint32_t, 1> > RelocMap_t;
+
+  const Elf_Ehdr *Header;
+  const Elf_Shdr *SectionHeaderTable;
+  const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
+  const Elf_Shdr *dot_strtab_sec;   // Symbol header string table.
+  const Elf_Shdr *dot_dynstr_sec;   // Dynamic symbol string table.
+
+  // SymbolTableSections[0] always points to the dynamic string table section
+  // header, or NULL if there is no dynamic string table.
+  Sections_t SymbolTableSections;
+  IndexMap_t SymbolTableSectionsIndexMap;
+  DenseMap<const Elf_Sym*, ELF::Elf64_Word> ExtendedSymbolTable;
+
+  const Elf_Shdr *dot_dynamic_sec;       // .dynamic
+  const Elf_Shdr *dot_gnu_version_sec;   // .gnu.version
+  const Elf_Shdr *dot_gnu_version_r_sec; // .gnu.version_r
+  const Elf_Shdr *dot_gnu_version_d_sec; // .gnu.version_d
+
+  // Pointer to SONAME entry in dynamic string table
+  // This is set the first time getLoadName is called.
+  mutable const char *dt_soname;
+
+  // Records for each version index the corresponding Verdef or Vernaux entry.
+  // This is filled the first time LoadVersionMap() is called.
+  class VersionMapEntry : public PointerIntPair<const void*, 1> {
+    public:
+    // If the integer is 0, this is an Elf_Verdef*.
+    // If the integer is 1, this is an Elf_Vernaux*.
+    VersionMapEntry() : PointerIntPair<const void*, 1>(NULL, 0) { }
+    VersionMapEntry(const Elf_Verdef *verdef)
+        : PointerIntPair<const void*, 1>(verdef, 0) { }
+    VersionMapEntry(const Elf_Vernaux *vernaux)
+        : PointerIntPair<const void*, 1>(vernaux, 1) { }
+    bool isNull() const { return getPointer() == NULL; }
+    bool isVerdef() const { return !isNull() && getInt() == 0; }
+    bool isVernaux() const { return !isNull() && getInt() == 1; }
+    const Elf_Verdef *getVerdef() const {
+      return isVerdef() ? (const Elf_Verdef*)getPointer() : NULL;
+    }
+    const Elf_Vernaux *getVernaux() const {
+      return isVernaux() ? (const Elf_Vernaux*)getPointer() : NULL;
+    }
+  };
+  mutable SmallVector<VersionMapEntry, 16> VersionMap;
+  void LoadVersionDefs(const Elf_Shdr *sec) const;
+  void LoadVersionNeeds(const Elf_Shdr *ec) const;
+  void LoadVersionMap() const;
+
+  /// @brief Map sections to an array of relocation sections that reference
+  ///        them sorted by section index.
+  RelocMap_t SectionRelocMap;
+
+  /// @brief Get the relocation section that contains \a Rel.
+  const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
+    return getSection(Rel.w.b);
+  }
+
+  bool            isRelocationHasAddend(DataRefImpl Rel) const;
+  template<typename T>
+  const T        *getEntry(uint16_t Section, uint32_t Entry) const;
+  template<typename T>
+  const T        *getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
+  const Elf_Shdr *getSection(DataRefImpl index) const;
+  const Elf_Shdr *getSection(uint32_t index) const;
+  const Elf_Rel  *getRel(DataRefImpl Rel) const;
+  const Elf_Rela *getRela(DataRefImpl Rela) const;
+  const char     *getString(uint32_t section, uint32_t offset) const;
+  const char     *getString(const Elf_Shdr *section, uint32_t offset) const;
+  error_code      getSymbolName(const Elf_Shdr *section,
+                                const Elf_Sym *Symb,
+                                StringRef &Res) const;
+  error_code      getSymbolVersion(const Elf_Shdr *section,
+                                   const Elf_Sym *Symb,
+                                   StringRef &Version,
+                                   bool &IsDefault) const;
+  void VerifyStrTab(const Elf_Shdr *sh) const;
+
+protected:
+  const Elf_Sym  *getSymbol(DataRefImpl Symb) const; // FIXME: Should be private?
+  void            validateSymbol(DataRefImpl Symb) const;
+
+public:
+  const Elf_Dyn  *getDyn(DataRefImpl DynData) const;
+  error_code getSymbolVersion(SymbolRef Symb, StringRef &Version,
+                              bool &IsDefault) const;
+protected:
+  virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
+  virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
+  virtual error_code getSymbolFlags(DataRefImpl Symb, uint32_t &Res) const;
+  virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const;
+  virtual error_code getSymbolSection(DataRefImpl Symb,
+                                      section_iterator &Res) const;
+
+  friend class DynRefImpl<target_endianness, is64Bits>;
+  virtual error_code getDynNext(DataRefImpl DynData, DynRef &Result) const;
+
+  virtual error_code getLibraryNext(DataRefImpl Data, LibraryRef &Result) const;
+  virtual error_code getLibraryPath(DataRefImpl Data, StringRef &Res) const;
+
+  virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
+  virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+  virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const;
+  virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const;
+  virtual error_code isSectionRequiredForExecution(DataRefImpl Sec,
+                                                   bool &Res) const;
+  virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const;
+  virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const;
+  virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
+                                           bool &Result) const;
+  virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const;
+  virtual relocation_iterator getSectionRelEnd(DataRefImpl Sec) const;
+
+  virtual error_code getRelocationNext(DataRefImpl Rel,
+                                       RelocationRef &Res) const;
+  virtual error_code getRelocationAddress(DataRefImpl Rel,
+                                          uint64_t &Res) const;
+  virtual error_code getRelocationOffset(DataRefImpl Rel,
+                                         uint64_t &Res) const;
+  virtual error_code getRelocationSymbol(DataRefImpl Rel,
+                                         SymbolRef &Res) const;
+  virtual error_code getRelocationType(DataRefImpl Rel,
+                                       uint64_t &Res) const;
+  virtual error_code getRelocationTypeName(DataRefImpl Rel,
+                                           SmallVectorImpl<char> &Result) const;
+  virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel,
+                                                 int64_t &Res) const;
+  virtual error_code getRelocationValueString(DataRefImpl Rel,
+                                           SmallVectorImpl<char> &Result) const;
+
+public:
+  ELFObjectFile(MemoryBuffer *Object, error_code &ec);
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
+
+  virtual symbol_iterator begin_dynamic_symbols() const;
+  virtual symbol_iterator end_dynamic_symbols() const;
+
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
+
+  virtual library_iterator begin_libraries_needed() const;
+  virtual library_iterator end_libraries_needed() const;
+
+  virtual dyn_iterator begin_dynamic_table() const;
+  virtual dyn_iterator end_dynamic_table() const;
+
+  virtual uint8_t getBytesInAddress() const;
+  virtual StringRef getFileFormatName() const;
+  virtual StringRef getObjectType() const { return "ELF"; }
+  virtual unsigned getArch() const;
+  virtual StringRef getLoadName() const;
+
+  uint64_t getNumSections() const;
+  uint64_t getStringTableIndex() const;
+  ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const;
+  const Elf_Shdr *getSection(const Elf_Sym *symb) const;
+
+  // Methods for type inquiry through isa, cast, and dyn_cast
+  bool isDyldType() const { return isDyldELFObject; }
+  static inline bool classof(const Binary *v) {
+    return v->getType() == getELFType(target_endianness == support::little,
+                                      is64Bits);
+  }
+  static inline bool classof(const ELFObjectFile *v) { return true; }
+};
+
+// Iterate through the version definitions, and place each Elf_Verdef
+// in the VersionMap according to its index.
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>::
+                  LoadVersionDefs(const Elf_Shdr *sec) const {
+  unsigned vd_size = sec->sh_size; // Size of section in bytes
+  unsigned vd_count = sec->sh_info; // Number of Verdef entries
+  const char *sec_start = (const char*)base() + sec->sh_offset;
+  const char *sec_end = sec_start + vd_size;
+  // The first Verdef entry is at the start of the section.
+  const char *p = sec_start;
+  for (unsigned i = 0; i < vd_count; i++) {
+    if (p + sizeof(Elf_Verdef) > sec_end)
+      report_fatal_error("Section ended unexpectedly while scanning "
+                         "version definitions.");
+    const Elf_Verdef *vd = reinterpret_cast<const Elf_Verdef *>(p);
+    if (vd->vd_version != ELF::VER_DEF_CURRENT)
+      report_fatal_error("Unexpected verdef version");
+    size_t index = vd->vd_ndx & ELF::VERSYM_VERSION;
+    if (index >= VersionMap.size())
+      VersionMap.resize(index+1);
+    VersionMap[index] = VersionMapEntry(vd);
+    p += vd->vd_next;
+  }
+}
+
+// Iterate through the versions needed section, and place each Elf_Vernaux
+// in the VersionMap according to its index.
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>::
+                  LoadVersionNeeds(const Elf_Shdr *sec) const {
+  unsigned vn_size = sec->sh_size; // Size of section in bytes
+  unsigned vn_count = sec->sh_info; // Number of Verneed entries
+  const char *sec_start = (const char*)base() + sec->sh_offset;
+  const char *sec_end = sec_start + vn_size;
+  // The first Verneed entry is at the start of the section.
+  const char *p = sec_start;
+  for (unsigned i = 0; i < vn_count; i++) {
+    if (p + sizeof(Elf_Verneed) > sec_end)
+      report_fatal_error("Section ended unexpectedly while scanning "
+                         "version needed records.");
+    const Elf_Verneed *vn = reinterpret_cast<const Elf_Verneed *>(p);
+    if (vn->vn_version != ELF::VER_NEED_CURRENT)
+      report_fatal_error("Unexpected verneed version");
+    // Iterate through the Vernaux entries
+    const char *paux = p + vn->vn_aux;
+    for (unsigned j = 0; j < vn->vn_cnt; j++) {
+      if (paux + sizeof(Elf_Vernaux) > sec_end)
+        report_fatal_error("Section ended unexpected while scanning auxiliary "
+                           "version needed records.");
+      const Elf_Vernaux *vna = reinterpret_cast<const Elf_Vernaux *>(paux);
+      size_t index = vna->vna_other & ELF::VERSYM_VERSION;
+      if (index >= VersionMap.size())
+        VersionMap.resize(index+1);
+      VersionMap[index] = VersionMapEntry(vna);
+      paux += vna->vna_next;
+    }
+    p += vn->vn_next;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>::LoadVersionMap() const {
+  // If there is no dynamic symtab or version table, there is nothing to do.
+  if (SymbolTableSections[0] == NULL || dot_gnu_version_sec == NULL)
+    return;
+
+  // Has the VersionMap already been loaded?
+  if (VersionMap.size() > 0)
+    return;
+
+  // The first two version indexes are reserved.
+  // Index 0 is LOCAL, index 1 is GLOBAL.
+  VersionMap.push_back(VersionMapEntry());
+  VersionMap.push_back(VersionMapEntry());
+
+  if (dot_gnu_version_d_sec)
+    LoadVersionDefs(dot_gnu_version_d_sec);
+
+  if (dot_gnu_version_r_sec)
+    LoadVersionNeeds(dot_gnu_version_r_sec);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>
+                  ::validateSymbol(DataRefImpl Symb) const {
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+  // FIXME: We really need to do proper error handling in the case of an invalid
+  //        input file. Because we don't use exceptions, I think we'll just pass
+  //        an error object around.
+  if (!(  symb
+        && SymbolTableSection
+        && symb >= (const Elf_Sym*)(base()
+                   + SymbolTableSection->sh_offset)
+        && symb <  (const Elf_Sym*)(base()
+                   + SymbolTableSection->sh_offset
+                   + SymbolTableSection->sh_size)))
+    // FIXME: Proper error handling.
+    report_fatal_error("Symb must point to a valid symbol!");
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolNext(DataRefImpl Symb,
+                                        SymbolRef &Result) const {
+  validateSymbol(Symb);
+  const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+
+  ++Symb.d.a;
+  // Check to see if we are at the end of this symbol table.
+  if (Symb.d.a >= SymbolTableSection->getEntityCount()) {
+    // We are at the end. If there are other symbol tables, jump to them.
+    // If the symbol table is .dynsym, we are iterating dynamic symbols,
+    // and there is only one table of these.
+    if (Symb.d.b != 0) {
+      ++Symb.d.b;
+      Symb.d.a = 1; // The 0th symbol in ELF is fake.
+    }
+    // Otherwise return the terminator.
+    if (Symb.d.b == 0 || Symb.d.b >= SymbolTableSections.size()) {
+      Symb.d.a = std::numeric_limits<uint32_t>::max();
+      Symb.d.b = std::numeric_limits<uint32_t>::max();
+    }
+  }
+
+  Result = SymbolRef(Symb, this);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolName(DataRefImpl Symb,
+                                        StringRef &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym *symb = getSymbol(Symb);
+  return getSymbolName(SymbolTableSections[Symb.d.b], symb, Result);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolVersion(SymbolRef SymRef,
+                                           StringRef &Version,
+                                           bool &IsDefault) const {
+  DataRefImpl Symb = SymRef.getRawDataRefImpl();
+  validateSymbol(Symb);
+  const Elf_Sym *symb = getSymbol(Symb);
+  return getSymbolVersion(SymbolTableSections[Symb.d.b], symb,
+                          Version, IsDefault);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ELF::Elf64_Word ELFObjectFile<target_endianness, is64Bits>
+                      ::getSymbolTableIndex(const Elf_Sym *symb) const {
+  if (symb->st_shndx == ELF::SHN_XINDEX)
+    return ExtendedSymbolTable.lookup(symb);
+  return symb->st_shndx;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>
+                             ::getSection(const Elf_Sym *symb) const {
+  if (symb->st_shndx == ELF::SHN_XINDEX)
+    return getSection(ExtendedSymbolTable.lookup(symb));
+  if (symb->st_shndx >= ELF::SHN_LORESERVE)
+    return 0;
+  return getSection(symb->st_shndx);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolFileOffset(DataRefImpl Symb,
+                                          uint64_t &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *Section;
+  switch (getSymbolTableIndex(symb)) {
+  case ELF::SHN_COMMON:
+   // Unintialized symbols have no offset in the object file
+  case ELF::SHN_UNDEF:
+    Result = UnknownAddressOrSize;
+    return object_error::success;
+  case ELF::SHN_ABS:
+    Result = symb->st_value;
+    return object_error::success;
+  default: Section = getSection(symb);
+  }
+
+  switch (symb->getType()) {
+  case ELF::STT_SECTION:
+    Result = Section ? Section->sh_addr : UnknownAddressOrSize;
+    return object_error::success;
+  case ELF::STT_FUNC:
+  case ELF::STT_OBJECT:
+  case ELF::STT_NOTYPE:
+    Result = symb->st_value +
+             (Section ? Section->sh_offset : 0);
+    return object_error::success;
+  default:
+    Result = UnknownAddressOrSize;
+    return object_error::success;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolAddress(DataRefImpl Symb,
+                                           uint64_t &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *Section;
+  switch (getSymbolTableIndex(symb)) {
+  case ELF::SHN_COMMON:
+  case ELF::SHN_UNDEF:
+    Result = UnknownAddressOrSize;
+    return object_error::success;
+  case ELF::SHN_ABS:
+    Result = symb->st_value;
+    return object_error::success;
+  default: Section = getSection(symb);
+  }
+
+  switch (symb->getType()) {
+  case ELF::STT_SECTION:
+    Result = Section ? Section->sh_addr : UnknownAddressOrSize;
+    return object_error::success;
+  case ELF::STT_FUNC:
+  case ELF::STT_OBJECT:
+  case ELF::STT_NOTYPE:
+    Result = symb->st_value + (Section ? Section->sh_addr : 0);
+    return object_error::success;
+  default:
+    Result = UnknownAddressOrSize;
+    return object_error::success;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolSize(DataRefImpl Symb,
+                                        uint64_t &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  if (symb->st_size == 0)
+    Result = UnknownAddressOrSize;
+  Result = symb->st_size;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolNMTypeChar(DataRefImpl Symb,
+                                              char &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *Section = getSection(symb);
+
+  char ret = '?';
+
+  if (Section) {
+    switch (Section->sh_type) {
+    case ELF::SHT_PROGBITS:
+    case ELF::SHT_DYNAMIC:
+      switch (Section->sh_flags) {
+      case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
+        ret = 't'; break;
+      case (ELF::SHF_ALLOC | ELF::SHF_WRITE):
+        ret = 'd'; break;
+      case ELF::SHF_ALLOC:
+      case (ELF::SHF_ALLOC | ELF::SHF_MERGE):
+      case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS):
+        ret = 'r'; break;
+      }
+      break;
+    case ELF::SHT_NOBITS: ret = 'b';
+    }
+  }
+
+  switch (getSymbolTableIndex(symb)) {
+  case ELF::SHN_UNDEF:
+    if (ret == '?')
+      ret = 'U';
+    break;
+  case ELF::SHN_ABS: ret = 'a'; break;
+  case ELF::SHN_COMMON: ret = 'c'; break;
+  }
+
+  switch (symb->getBinding()) {
+  case ELF::STB_GLOBAL: ret = ::toupper(ret); break;
+  case ELF::STB_WEAK:
+    if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF)
+      ret = 'w';
+    else
+      if (symb->getType() == ELF::STT_OBJECT)
+        ret = 'V';
+      else
+        ret = 'W';
+  }
+
+  if (ret == '?' && symb->getType() == ELF::STT_SECTION) {
+    StringRef name;
+    if (error_code ec = getSymbolName(Symb, name))
+      return ec;
+    Result = StringSwitch<char>(name)
+      .StartsWith(".debug", 'N')
+      .StartsWith(".note", 'n')
+      .Default('?');
+    return object_error::success;
+  }
+
+  Result = ret;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolType(DataRefImpl Symb,
+                                        SymbolRef::Type &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+
+  switch (symb->getType()) {
+  case ELF::STT_NOTYPE:
+    Result = SymbolRef::ST_Unknown;
+    break;
+  case ELF::STT_SECTION:
+    Result = SymbolRef::ST_Debug;
+    break;
+  case ELF::STT_FILE:
+    Result = SymbolRef::ST_File;
+    break;
+  case ELF::STT_FUNC:
+    Result = SymbolRef::ST_Function;
+    break;
+  case ELF::STT_OBJECT:
+  case ELF::STT_COMMON:
+  case ELF::STT_TLS:
+    Result = SymbolRef::ST_Data;
+    break;
+  default:
+    Result = SymbolRef::ST_Other;
+    break;
+  }
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolFlags(DataRefImpl Symb,
+                                         uint32_t &Result) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+
+  Result = SymbolRef::SF_None;
+
+  if (symb->getBinding() != ELF::STB_LOCAL)
+    Result |= SymbolRef::SF_Global;
+
+  if (symb->getBinding() == ELF::STB_WEAK)
+    Result |= SymbolRef::SF_Weak;
+
+  if (symb->st_shndx == ELF::SHN_ABS)
+    Result |= SymbolRef::SF_Absolute;
+
+  if (symb->getType() == ELF::STT_FILE ||
+      symb->getType() == ELF::STT_SECTION)
+    Result |= SymbolRef::SF_FormatSpecific;
+
+  if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF)
+    Result |= SymbolRef::SF_Undefined;
+
+  if (symb->getType() == ELF::STT_COMMON ||
+      getSymbolTableIndex(symb) == ELF::SHN_COMMON)
+    Result |= SymbolRef::SF_Common;
+
+  if (symb->getType() == ELF::STT_TLS)
+    Result |= SymbolRef::SF_ThreadLocal;
+
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolSection(DataRefImpl Symb,
+                                           section_iterator &Res) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *sec = getSection(symb);
+  if (!sec)
+    Res = end_sections();
+  else {
+    DataRefImpl Sec;
+    Sec.p = reinterpret_cast<intptr_t>(sec);
+    Res = section_iterator(SectionRef(Sec, this));
+  }
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const {
+  const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
+  sec += Header->e_shentsize;
+  Sec.p = reinterpret_cast<intptr_t>(sec);
+  Result = SectionRef(Sec, this);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionName(DataRefImpl Sec,
+                                         StringRef &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name));
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionAddress(DataRefImpl Sec,
+                                            uint64_t &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  Result = sec->sh_addr;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionSize(DataRefImpl Sec,
+                                         uint64_t &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  Result = sec->sh_size;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionContents(DataRefImpl Sec,
+                                             StringRef &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  const char *start = (const char*)base() + sec->sh_offset;
+  Result = StringRef(start, sec->sh_size);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionAlignment(DataRefImpl Sec,
+                                              uint64_t &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  Result = sec->sh_addralign;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSectionText(DataRefImpl Sec,
+                                        bool &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  if (sec->sh_flags & ELF::SHF_EXECINSTR)
+    Result = true;
+  else
+    Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSectionData(DataRefImpl Sec,
+                                        bool &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)
+      && sec->sh_type == ELF::SHT_PROGBITS)
+    Result = true;
+  else
+    Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSectionBSS(DataRefImpl Sec,
+                                       bool &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)
+      && sec->sh_type == ELF::SHT_NOBITS)
+    Result = true;
+  else
+    Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSectionRequiredForExecution(DataRefImpl Sec,
+                                                        bool &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  if (sec->sh_flags & ELF::SHF_ALLOC)
+    Result = true;
+  else
+    Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSectionVirtual(DataRefImpl Sec,
+                                           bool &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  if (sec->sh_type == ELF::SHT_NOBITS)
+    Result = true;
+  else
+    Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>::isSectionZeroInit(DataRefImpl Sec,
+                                            bool &Result) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  // For ELF, all zero-init sections are virtual (that is, they occupy no space
+  //   in the object image) and vice versa.
+  if (sec->sh_flags & ELF::SHT_NOBITS)
+    Result = true;
+  else
+    Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                          ::sectionContainsSymbol(DataRefImpl Sec,
+                                                  DataRefImpl Symb,
+                                                  bool &Result) const {
+  // FIXME: Unimplemented.
+  Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+relocation_iterator ELFObjectFile<target_endianness, is64Bits>
+                                 ::getSectionRelBegin(DataRefImpl Sec) const {
+  DataRefImpl RelData;
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec);
+  if (sec != 0 && ittr != SectionRelocMap.end()) {
+    RelData.w.a = getSection(ittr->second[0])->sh_info;
+    RelData.w.b = ittr->second[0];
+    RelData.w.c = 0;
+  }
+  return relocation_iterator(RelocationRef(RelData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+relocation_iterator ELFObjectFile<target_endianness, is64Bits>
+                                 ::getSectionRelEnd(DataRefImpl Sec) const {
+  DataRefImpl RelData;
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec);
+  if (sec != 0 && ittr != SectionRelocMap.end()) {
+    // Get the index of the last relocation section for this section.
+    std::size_t relocsecindex = ittr->second[ittr->second.size() - 1];
+    const Elf_Shdr *relocsec = getSection(relocsecindex);
+    RelData.w.a = relocsec->sh_info;
+    RelData.w.b = relocsecindex;
+    RelData.w.c = relocsec->sh_size / relocsec->sh_entsize;
+  }
+  return relocation_iterator(RelocationRef(RelData, this));
+}
+
+// Relocations
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getRelocationNext(DataRefImpl Rel,
+                                            RelocationRef &Result) const {
+  ++Rel.w.c;
+  const Elf_Shdr *relocsec = getSection(Rel.w.b);
+  if (Rel.w.c >= (relocsec->sh_size / relocsec->sh_entsize)) {
+    // We have reached the end of the relocations for this section. See if there
+    // is another relocation section.
+    typename RelocMap_t::mapped_type relocseclist =
+      SectionRelocMap.lookup(getSection(Rel.w.a));
+
+    // Do a binary search for the current reloc section index (which must be
+    // present). Then get the next one.
+    typename RelocMap_t::mapped_type::const_iterator loc =
+      std::lower_bound(relocseclist.begin(), relocseclist.end(), Rel.w.b);
+    ++loc;
+
+    // If there is no next one, don't do anything. The ++Rel.w.c above sets Rel
+    // to the end iterator.
+    if (loc != relocseclist.end()) {
+      Rel.w.b = *loc;
+      Rel.w.a = 0;
+    }
+  }
+  Result = RelocationRef(Rel, this);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getRelocationSymbol(DataRefImpl Rel,
+                                              SymbolRef &Result) const {
+  uint32_t symbolIdx;
+  const Elf_Shdr *sec = getSection(Rel.w.b);
+  switch (sec->sh_type) {
+    default :
+      report_fatal_error("Invalid section type in Rel!");
+    case ELF::SHT_REL : {
+      symbolIdx = getRel(Rel)->getSymbol();
+      break;
+    }
+    case ELF::SHT_RELA : {
+      symbolIdx = getRela(Rel)->getSymbol();
+      break;
+    }
+  }
+  DataRefImpl SymbolData;
+  IndexMap_t::const_iterator it = SymbolTableSectionsIndexMap.find(sec->sh_link);
+  if (it == SymbolTableSectionsIndexMap.end())
+    report_fatal_error("Relocation symbol table not found!");
+  SymbolData.d.a = symbolIdx;
+  SymbolData.d.b = it->second;
+  Result = SymbolRef(SymbolData, this);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getRelocationAddress(DataRefImpl Rel,
+                                               uint64_t &Result) const {
+  uint64_t offset;
+  const Elf_Shdr *sec = getSection(Rel.w.b);
+  switch (sec->sh_type) {
+    default :
+      report_fatal_error("Invalid section type in Rel!");
+    case ELF::SHT_REL : {
+      offset = getRel(Rel)->r_offset;
+      break;
+    }
+    case ELF::SHT_RELA : {
+      offset = getRela(Rel)->r_offset;
+      break;
+    }
+  }
+
+  Result = offset;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getRelocationOffset(DataRefImpl Rel,
+                                              uint64_t &Result) const {
+  uint64_t offset;
+  const Elf_Shdr *sec = getSection(Rel.w.b);
+  switch (sec->sh_type) {
+    default :
+      report_fatal_error("Invalid section type in Rel!");
+    case ELF::SHT_REL : {
+      offset = getRel(Rel)->r_offset;
+      break;
+    }
+    case ELF::SHT_RELA : {
+      offset = getRela(Rel)->r_offset;
+      break;
+    }
+  }
+
+  Result = offset - sec->sh_addr;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getRelocationType(DataRefImpl Rel,
+                                            uint64_t &Result) const {
+  const Elf_Shdr *sec = getSection(Rel.w.b);
+  switch (sec->sh_type) {
+    default :
+      report_fatal_error("Invalid section type in Rel!");
+    case ELF::SHT_REL : {
+      Result = getRel(Rel)->getType();
+      break;
+    }
+    case ELF::SHT_RELA : {
+      Result = getRela(Rel)->getType();
+      break;
+    }
+  }
+  return object_error::success;
+}
+
+#define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \
+  case ELF::enum: res = #enum; break;
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getRelocationTypeName(DataRefImpl Rel,
+                                          SmallVectorImpl<char> &Result) const {
+  const Elf_Shdr *sec = getSection(Rel.w.b);
+  uint8_t type;
+  StringRef res;
+  switch (sec->sh_type) {
+    default :
+      return object_error::parse_failed;
+    case ELF::SHT_REL : {
+      type = getRel(Rel)->getType();
+      break;
+    }
+    case ELF::SHT_RELA : {
+      type = getRela(Rel)->getType();
+      break;
+    }
+  }
+  switch (Header->e_machine) {
+  case ELF::EM_X86_64:
+    switch (type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLT32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_COPY);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GLOB_DAT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_JUMP_SLOT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_RELATIVE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32S);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_8);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC8);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPMOD64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSGD);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSLD);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTTPOFF);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTOFF64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE64);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32_TLSDESC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC_CALL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC);
+    default:
+      res = "Unknown";
+    }
+    break;
+  case ELF::EM_386:
+    switch (type) {
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_NONE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOT32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PLT32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_COPY);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GLOB_DAT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_JUMP_SLOT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_RELATIVE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTOFF);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTPC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32PLT);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTIE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC16);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_8);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC8);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_PUSH);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_CALL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_POP);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_PUSH);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_CALL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_POP);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDO_32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE_32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE_32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPMOD32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPOFF32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF32);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTDESC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC);
+      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE);
+    default:
+      res = "Unknown";
+    }
+    break;
+  default:
+    res = "Unknown";
+  }
+  Result.append(res.begin(), res.end());
+  return object_error::success;
+}
+
+#undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getRelocationAdditionalInfo(DataRefImpl Rel,
+                                                      int64_t &Result) const {
+  const Elf_Shdr *sec = getSection(Rel.w.b);
+  switch (sec->sh_type) {
+    default :
+      report_fatal_error("Invalid section type in Rel!");
+    case ELF::SHT_REL : {
+      Result = 0;
+      return object_error::success;
+    }
+    case ELF::SHT_RELA : {
+      Result = getRela(Rel)->r_addend;
+      return object_error::success;
+    }
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getRelocationValueString(DataRefImpl Rel,
+                                          SmallVectorImpl<char> &Result) const {
+  const Elf_Shdr *sec = getSection(Rel.w.b);
+  uint8_t type;
+  StringRef res;
+  int64_t addend = 0;
+  uint16_t symbol_index = 0;
+  switch (sec->sh_type) {
+    default :
+      return object_error::parse_failed;
+    case ELF::SHT_REL : {
+      type = getRel(Rel)->getType();
+      symbol_index = getRel(Rel)->getSymbol();
+      // TODO: Read implicit addend from section data.
+      break;
+    }
+    case ELF::SHT_RELA : {
+      type = getRela(Rel)->getType();
+      symbol_index = getRela(Rel)->getSymbol();
+      addend = getRela(Rel)->r_addend;
+      break;
+    }
+  }
+  const Elf_Sym *symb = getEntry<Elf_Sym>(sec->sh_link, symbol_index);
+  StringRef symname;
+  if (error_code ec = getSymbolName(getSection(sec->sh_link), symb, symname))
+    return ec;
+  switch (Header->e_machine) {
+  case ELF::EM_X86_64:
+    switch (type) {
+    case ELF::R_X86_64_32S:
+      res = symname;
+      break;
+    case ELF::R_X86_64_PC32: {
+        std::string fmtbuf;
+        raw_string_ostream fmt(fmtbuf);
+        fmt << symname << (addend < 0 ? "" : "+") << addend << "-P";
+        fmt.flush();
+        Result.append(fmtbuf.begin(), fmtbuf.end());
+      }
+      break;
+    default:
+      res = "Unknown";
+    }
+    break;
+  default:
+    res = "Unknown";
+  }
+  if (Result.empty())
+    Result.append(res.begin(), res.end());
+  return object_error::success;
+}
+
+// Verify that the last byte in the string table in a null.
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>
+                  ::VerifyStrTab(const Elf_Shdr *sh) const {
+  const char *strtab = (const char*)base() + sh->sh_offset;
+  if (strtab[sh->sh_size - 1] != 0)
+    // FIXME: Proper error handling.
+    report_fatal_error("String table must end with a null terminator!");
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
+                                                          , error_code &ec)
+  : ObjectFile(getELFType(target_endianness == support::little, is64Bits),
+               Object, ec)
+  , isDyldELFObject(false)
+  , SectionHeaderTable(0)
+  , dot_shstrtab_sec(0)
+  , dot_strtab_sec(0)
+  , dot_dynstr_sec(0)
+  , dot_dynamic_sec(0)
+  , dot_gnu_version_sec(0)
+  , dot_gnu_version_r_sec(0)
+  , dot_gnu_version_d_sec(0)
+  , dt_soname(0)
+ {
+
+  const uint64_t FileSize = Data->getBufferSize();
+
+  if (sizeof(Elf_Ehdr) > FileSize)
+    // FIXME: Proper error handling.
+    report_fatal_error("File too short!");
+
+  Header = reinterpret_cast<const Elf_Ehdr *>(base());
+
+  if (Header->e_shoff == 0)
+    return;
+
+  const uint64_t SectionTableOffset = Header->e_shoff;
+
+  if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize)
+    // FIXME: Proper error handling.
+    report_fatal_error("Section header table goes past end of file!");
+
+  // The getNumSections() call below depends on SectionHeaderTable being set.
+  SectionHeaderTable =
+    reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset);
+  const uint64_t SectionTableSize = getNumSections() * Header->e_shentsize;
+
+  if (SectionTableOffset + SectionTableSize > FileSize)
+    // FIXME: Proper error handling.
+    report_fatal_error("Section table goes past end of file!");
+
+  // To find the symbol tables we walk the section table to find SHT_SYMTAB.
+  const Elf_Shdr* SymbolTableSectionHeaderIndex = 0;
+  const Elf_Shdr* sh = SectionHeaderTable;
+
+  // Reserve SymbolTableSections[0] for .dynsym
+  SymbolTableSections.push_back(NULL);
+
+  for (uint64_t i = 0, e = getNumSections(); i != e; ++i) {
+    switch (sh->sh_type) {
+    case ELF::SHT_SYMTAB_SHNDX: {
+      if (SymbolTableSectionHeaderIndex)
+        // FIXME: Proper error handling.
+        report_fatal_error("More than one .symtab_shndx!");
+      SymbolTableSectionHeaderIndex = sh;
+      break;
+    }
+    case ELF::SHT_SYMTAB: {
+      SymbolTableSectionsIndexMap[i] = SymbolTableSections.size();
+      SymbolTableSections.push_back(sh);
+      break;
+    }
+    case ELF::SHT_DYNSYM: {
+      if (SymbolTableSections[0] != NULL)
+        // FIXME: Proper error handling.
+        report_fatal_error("More than one .dynsym!");
+      SymbolTableSectionsIndexMap[i] = 0;
+      SymbolTableSections[0] = sh;
+      break;
+    }
+    case ELF::SHT_REL:
+    case ELF::SHT_RELA: {
+      SectionRelocMap[getSection(sh->sh_info)].push_back(i);
+      break;
+    }
+    case ELF::SHT_DYNAMIC: {
+      if (dot_dynamic_sec != NULL)
+        // FIXME: Proper error handling.
+        report_fatal_error("More than one .dynamic!");
+      dot_dynamic_sec = sh;
+      break;
+    }
+    case ELF::SHT_GNU_versym: {
+      if (dot_gnu_version_sec != NULL)
+        // FIXME: Proper error handling.
+        report_fatal_error("More than one .gnu.version section!");
+      dot_gnu_version_sec = sh;
+      break;
+    }
+    case ELF::SHT_GNU_verdef: {
+      if (dot_gnu_version_d_sec != NULL)
+        // FIXME: Proper error handling.
+        report_fatal_error("More than one .gnu.version_d section!");
+      dot_gnu_version_d_sec = sh;
+      break;
+    }
+    case ELF::SHT_GNU_verneed: {
+      if (dot_gnu_version_r_sec != NULL)
+        // FIXME: Proper error handling.
+        report_fatal_error("More than one .gnu.version_r section!");
+      dot_gnu_version_r_sec = sh;
+      break;
+    }
+    }
+    ++sh;
+  }
+
+  // Sort section relocation lists by index.
+  for (typename RelocMap_t::iterator i = SectionRelocMap.begin(),
+                                     e = SectionRelocMap.end(); i != e; ++i) {
+    std::sort(i->second.begin(), i->second.end());
+  }
+
+  // Get string table sections.
+  dot_shstrtab_sec = getSection(getStringTableIndex());
+  if (dot_shstrtab_sec) {
+    // Verify that the last byte in the string table in a null.
+    VerifyStrTab(dot_shstrtab_sec);
+  }
+
+  // Merge this into the above loop.
+  for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+                  *e = i + getNumSections() * Header->e_shentsize;
+                   i != e; i += Header->e_shentsize) {
+    const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+    if (sh->sh_type == ELF::SHT_STRTAB) {
+      StringRef SectionName(getString(dot_shstrtab_sec, sh->sh_name));
+      if (SectionName == ".strtab") {
+        if (dot_strtab_sec != 0)
+          // FIXME: Proper error handling.
+          report_fatal_error("Already found section named .strtab!");
+        dot_strtab_sec = sh;
+        VerifyStrTab(dot_strtab_sec);
+      } else if (SectionName == ".dynstr") {
+        if (dot_dynstr_sec != 0)
+          // FIXME: Proper error handling.
+          report_fatal_error("Already found section named .dynstr!");
+        dot_dynstr_sec = sh;
+        VerifyStrTab(dot_dynstr_sec);
+      }
+    }
+  }
+
+  // Build symbol name side-mapping if there is one.
+  if (SymbolTableSectionHeaderIndex) {
+    const Elf_Word *ShndxTable = reinterpret_cast<const Elf_Word*>(base() +
+                                      SymbolTableSectionHeaderIndex->sh_offset);
+    error_code ec;
+    for (symbol_iterator si = begin_symbols(),
+                         se = end_symbols(); si != se; si.increment(ec)) {
+      if (ec)
+        report_fatal_error("Fewer extended symbol table entries than symbols!");
+      if (*ShndxTable != ELF::SHN_UNDEF)
+        ExtendedSymbolTable[getSymbol(si->getRawDataRefImpl())] = *ShndxTable;
+      ++ShndxTable;
+    }
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+                             ::begin_symbols() const {
+  DataRefImpl SymbolData;
+  if (SymbolTableSections.size() <= 1) {
+    SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+    SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+  } else {
+    SymbolData.d.a = 1; // The 0th symbol in ELF is fake.
+    SymbolData.d.b = 1; // The 0th table is .dynsym
+  }
+  return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+                             ::end_symbols() const {
+  DataRefImpl SymbolData;
+  SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+  SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+  return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+                             ::begin_dynamic_symbols() const {
+  DataRefImpl SymbolData;
+  if (SymbolTableSections[0] == NULL) {
+    SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+    SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+  } else {
+    SymbolData.d.a = 1; // The 0th symbol in ELF is fake.
+    SymbolData.d.b = 0; // The 0th table is .dynsym
+  }
+  return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+                             ::end_dynamic_symbols() const {
+  DataRefImpl SymbolData;
+  SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+  SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+  return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+section_iterator ELFObjectFile<target_endianness, is64Bits>
+                              ::begin_sections() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff);
+  return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+section_iterator ELFObjectFile<target_endianness, is64Bits>
+                              ::end_sections() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(base()
+                                     + Header->e_shoff
+                                     + (Header->e_shentsize*getNumSections()));
+  return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+typename ELFObjectFile<target_endianness, is64Bits>::dyn_iterator
+ELFObjectFile<target_endianness, is64Bits>::begin_dynamic_table() const {
+  DataRefImpl DynData;
+  if (dot_dynamic_sec == NULL || dot_dynamic_sec->sh_size == 0) {
+    DynData.d.a = std::numeric_limits<uint32_t>::max();
+  } else {
+    DynData.d.a = 0;
+  }
+  return dyn_iterator(DynRef(DynData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+typename ELFObjectFile<target_endianness, is64Bits>::dyn_iterator
+ELFObjectFile<target_endianness, is64Bits>
+                          ::end_dynamic_table() const {
+  DataRefImpl DynData;
+  DynData.d.a = std::numeric_limits<uint32_t>::max();
+  return dyn_iterator(DynRef(DynData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getDynNext(DataRefImpl DynData,
+                                     DynRef &Result) const {
+  ++DynData.d.a;
+
+  // Check to see if we are at the end of .dynamic
+  if (DynData.d.a >= dot_dynamic_sec->getEntityCount()) {
+    // We are at the end. Return the terminator.
+    DynData.d.a = std::numeric_limits<uint32_t>::max();
+  }
+
+  Result = DynRef(DynData, this);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef
+ELFObjectFile<target_endianness, is64Bits>::getLoadName() const {
+  if (!dt_soname) {
+    // Find the DT_SONAME entry
+    dyn_iterator it = begin_dynamic_table();
+    dyn_iterator ie = end_dynamic_table();
+    error_code ec;
+    while (it != ie) {
+      if (it->getTag() == ELF::DT_SONAME)
+        break;
+      it.increment(ec);
+      if (ec)
+        report_fatal_error("dynamic table iteration failed");
+    }
+    if (it != ie) {
+      if (dot_dynstr_sec == NULL)
+        report_fatal_error("Dynamic string table is missing");
+      dt_soname = getString(dot_dynstr_sec, it->getVal());
+    } else {
+      dt_soname = "";
+    }
+  }
+  return dt_soname;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+library_iterator ELFObjectFile<target_endianness, is64Bits>
+                             ::begin_libraries_needed() const {
+  // Find the first DT_NEEDED entry
+  dyn_iterator i = begin_dynamic_table();
+  dyn_iterator e = end_dynamic_table();
+  error_code ec;
+  while (i != e) {
+    if (i->getTag() == ELF::DT_NEEDED)
+      break;
+    i.increment(ec);
+    if (ec)
+      report_fatal_error("dynamic table iteration failed");
+  }
+  // Use the same DataRefImpl format as DynRef.
+  return library_iterator(LibraryRef(i->getRawDataRefImpl(), this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getLibraryNext(DataRefImpl Data,
+                                         LibraryRef &Result) const {
+  // Use the same DataRefImpl format as DynRef.
+  dyn_iterator i = dyn_iterator(DynRef(Data, this));
+  dyn_iterator e = end_dynamic_table();
+
+  // Skip the current dynamic table entry.
+  error_code ec;
+  if (i != e) {
+    i.increment(ec);
+    // TODO: proper error handling
+    if (ec)
+      report_fatal_error("dynamic table iteration failed");
+  }
+
+  // Find the next DT_NEEDED entry.
+  while (i != e) {
+    if (i->getTag() == ELF::DT_NEEDED)
+      break;
+    i.increment(ec);
+    if (ec)
+      report_fatal_error("dynamic table iteration failed");
+  }
+  Result = LibraryRef(i->getRawDataRefImpl(), this);
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+         ::getLibraryPath(DataRefImpl Data, StringRef &Res) const {
+  dyn_iterator i = dyn_iterator(DynRef(Data, this));
+  if (i == end_dynamic_table())
+    report_fatal_error("getLibraryPath() called on iterator end");
+
+  if (i->getTag() != ELF::DT_NEEDED)
+    report_fatal_error("Invalid library_iterator");
+
+  // This uses .dynstr to lookup the name of the DT_NEEDED entry.
+  // THis works as long as DT_STRTAB == .dynstr. This is true most of
+  // the time, but the specification allows exceptions.
+  // TODO: This should really use DT_STRTAB instead. Doing this requires
+  // reading the program headers.
+  if (dot_dynstr_sec == NULL)
+    report_fatal_error("Dynamic string table is missing");
+  Res = getString(dot_dynstr_sec, i->getVal());
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+library_iterator ELFObjectFile<target_endianness, is64Bits>
+                             ::end_libraries_needed() const {
+  dyn_iterator e = end_dynamic_table();
+  // Use the same DataRefImpl format as DynRef.
+  return library_iterator(LibraryRef(e->getRawDataRefImpl(), this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint8_t ELFObjectFile<target_endianness, is64Bits>::getBytesInAddress() const {
+  return is64Bits ? 8 : 4;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getFileFormatName() const {
+  switch(Header->e_ident[ELF::EI_CLASS]) {
+  case ELF::ELFCLASS32:
+    switch(Header->e_machine) {
+    case ELF::EM_386:
+      return "ELF32-i386";
+    case ELF::EM_X86_64:
+      return "ELF32-x86-64";
+    case ELF::EM_ARM:
+      return "ELF32-arm";
+    default:
+      return "ELF32-unknown";
+    }
+  case ELF::ELFCLASS64:
+    switch(Header->e_machine) {
+    case ELF::EM_386:
+      return "ELF64-i386";
+    case ELF::EM_X86_64:
+      return "ELF64-x86-64";
+    default:
+      return "ELF64-unknown";
+    }
+  default:
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid ELFCLASS!");
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
+  switch(Header->e_machine) {
+  case ELF::EM_386:
+    return Triple::x86;
+  case ELF::EM_X86_64:
+    return Triple::x86_64;
+  case ELF::EM_ARM:
+    return Triple::arm;
+  default:
+    return Triple::UnknownArch;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>::getNumSections() const {
+  assert(Header && "Header not initialized!");
+  if (Header->e_shnum == ELF::SHN_UNDEF) {
+    assert(SectionHeaderTable && "SectionHeaderTable not initialized!");
+    return SectionHeaderTable->sh_size;
+  }
+  return Header->e_shnum;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t
+ELFObjectFile<target_endianness, is64Bits>::getStringTableIndex() const {
+  if (Header->e_shnum == ELF::SHN_UNDEF) {
+    if (Header->e_shstrndx == ELF::SHN_HIRESERVE)
+      return SectionHeaderTable->sh_link;
+    if (Header->e_shstrndx >= getNumSections())
+      return 0;
+  }
+  return Header->e_shstrndx;
+}
+
+
+template<support::endianness target_endianness, bool is64Bits>
+template<typename T>
+inline const T *
+ELFObjectFile<target_endianness, is64Bits>::getEntry(uint16_t Section,
+                                                     uint32_t Entry) const {
+  return getEntry<T>(getSection(Section), Entry);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+template<typename T>
+inline const T *
+ELFObjectFile<target_endianness, is64Bits>::getEntry(const Elf_Shdr * Section,
+                                                     uint32_t Entry) const {
+  return reinterpret_cast<const T *>(
+           base()
+           + Section->sh_offset
+           + (Entry * Section->sh_entsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
+ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
+  return getEntry<Elf_Sym>(SymbolTableSections[Symb.d.b], Symb.d.a);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Dyn *
+ELFObjectFile<target_endianness, is64Bits>::getDyn(DataRefImpl DynData) const {
+  return getEntry<Elf_Dyn>(dot_dynamic_sec, DynData.d.a);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rel *
+ELFObjectFile<target_endianness, is64Bits>::getRel(DataRefImpl Rel) const {
+  return getEntry<Elf_Rel>(Rel.w.b, Rel.w.c);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rela *
+ELFObjectFile<target_endianness, is64Bits>::getRela(DataRefImpl Rela) const {
+  return getEntry<Elf_Rela>(Rela.w.b, Rela.w.c);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
+  const Elf_Shdr *sec = getSection(Symb.d.b);
+  if (sec->sh_type != ELF::SHT_SYMTAB || sec->sh_type != ELF::SHT_DYNSYM)
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid symbol table section!");
+  return sec;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(uint32_t index) const {
+  if (index == 0)
+    return 0;
+  if (!SectionHeaderTable || index >= getNumSections())
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid section index!");
+
+  return reinterpret_cast<const Elf_Shdr *>(
+         reinterpret_cast<const char *>(SectionHeaderTable)
+         + (index * Header->e_shentsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+                         ::getString(uint32_t section,
+                                     ELF::Elf32_Word offset) const {
+  return getString(getSection(section), offset);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+                         ::getString(const Elf_Shdr *section,
+                                     ELF::Elf32_Word offset) const {
+  assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
+  if (offset >= section->sh_size)
+    // FIXME: Proper error handling.
+    report_fatal_error("Symbol name offset outside of string table!");
+  return (const char *)base() + section->sh_offset + offset;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolName(const Elf_Shdr *section,
+                                        const Elf_Sym *symb,
+                                        StringRef &Result) const {
+  if (symb->st_name == 0) {
+    const Elf_Shdr *section = getSection(symb);
+    if (!section)
+      Result = "";
+    else
+      Result = getString(dot_shstrtab_sec, section->sh_name);
+    return object_error::success;
+  }
+
+  if (section == SymbolTableSections[0]) {
+    // Symbol is in .dynsym, use .dynstr string table
+    Result = getString(dot_dynstr_sec, symb->st_name);
+  } else {
+    // Use the default symbol table name section.
+    Result = getString(dot_strtab_sec, symb->st_name);
+  }
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolVersion(const Elf_Shdr *section,
+                                           const Elf_Sym *symb,
+                                           StringRef &Version,
+                                           bool &IsDefault) const {
+  // Handle non-dynamic symbols.
+  if (section != SymbolTableSections[0]) {
+    // Non-dynamic symbols can have versions in their names
+    // A name of the form 'foo@V1' indicates version 'V1', non-default.
+    // A name of the form 'foo@@V2' indicates version 'V2', default version.
+    StringRef Name;
+    error_code ec = getSymbolName(section, symb, Name);
+    if (ec != object_error::success)
+      return ec;
+    size_t atpos = Name.find('@');
+    if (atpos == StringRef::npos) {
+      Version = "";
+      IsDefault = false;
+      return object_error::success;
+    }
+    ++atpos;
+    if (atpos < Name.size() && Name[atpos] == '@') {
+      IsDefault = true;
+      ++atpos;
+    } else {
+      IsDefault = false;
+    }
+    Version = Name.substr(atpos);
+    return object_error::success;
+  }
+
+  // This is a dynamic symbol. Look in the GNU symbol version table.
+  if (dot_gnu_version_sec == NULL) {
+    // No version table.
+    Version = "";
+    IsDefault = false;
+    return object_error::success;
+  }
+
+  // Determine the position in the symbol table of this entry.
+  const char *sec_start = (const char*)base() + section->sh_offset;
+  size_t entry_index = ((const char*)symb - sec_start)/section->sh_entsize;
+
+  // Get the corresponding version index entry
+  const Elf_Versym *vs = getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index);
+  size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
+
+  // Special markers for unversioned symbols.
+  if (version_index == ELF::VER_NDX_LOCAL ||
+      version_index == ELF::VER_NDX_GLOBAL) {
+    Version = "";
+    IsDefault = false;
+    return object_error::success;
+  }
+
+  // Lookup this symbol in the version table
+  LoadVersionMap();
+  if (version_index >= VersionMap.size() || VersionMap[version_index].isNull())
+    report_fatal_error("Symbol has version index without corresponding "
+                       "define or reference entry");
+  const VersionMapEntry &entry = VersionMap[version_index];
+
+  // Get the version name string
+  size_t name_offset;
+  if (entry.isVerdef()) {
+    // The first Verdaux entry holds the name.
+    name_offset = entry.getVerdef()->getAux()->vda_name;
+  } else {
+    name_offset = entry.getVernaux()->vna_name;
+  }
+  Version = getString(dot_dynstr_sec, name_offset);
+
+  // Set IsDefault
+  if (entry.isVerdef()) {
+    IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN);
+  } else {
+    IsDefault = false;
+  }
+
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+inline DynRefImpl<target_endianness, is64Bits>
+                 ::DynRefImpl(DataRefImpl DynP, const OwningType *Owner)
+  : DynPimpl(DynP)
+  , OwningObject(Owner) {}
+
+template<support::endianness target_endianness, bool is64Bits>
+inline bool DynRefImpl<target_endianness, is64Bits>
+                      ::operator==(const DynRefImpl &Other) const {
+  return DynPimpl == Other.DynPimpl;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+inline bool DynRefImpl<target_endianness, is64Bits>
+                      ::operator <(const DynRefImpl &Other) const {
+  return DynPimpl < Other.DynPimpl;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+inline error_code DynRefImpl<target_endianness, is64Bits>
+                            ::getNext(DynRefImpl &Result) const {
+  return OwningObject->getDynNext(DynPimpl, Result);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+inline int64_t DynRefImpl<target_endianness, is64Bits>
+                            ::getTag() const {
+  return OwningObject->getDyn(DynPimpl)->d_tag;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+inline uint64_t DynRefImpl<target_endianness, is64Bits>
+                            ::getVal() const {
+  return OwningObject->getDyn(DynPimpl)->d_un.d_val;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+inline uint64_t DynRefImpl<target_endianness, is64Bits>
+                            ::getPtr() const {
+  return OwningObject->getDyn(DynPimpl)->d_un.d_ptr;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+inline DataRefImpl DynRefImpl<target_endianness, is64Bits>
+                             ::getRawDataRefImpl() const {
+  return DynPimpl;
+}
+
+/// This is a generic interface for retrieving GNU symbol version
+/// information from an ELFObjectFile.
+static inline error_code GetELFSymbolVersion(const ObjectFile *Obj,
+                                             const SymbolRef &Sym,
+                                             StringRef &Version,
+                                             bool &IsDefault) {
+  // Little-endian 32-bit
+  if (const ELFObjectFile<support::little, false> *ELFObj =
+          dyn_cast<ELFObjectFile<support::little, false> >(Obj))
+    return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
+
+  // Big-endian 32-bit
+  if (const ELFObjectFile<support::big, false> *ELFObj =
+          dyn_cast<ELFObjectFile<support::big, false> >(Obj))
+    return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
+
+  // Little-endian 64-bit
+  if (const ELFObjectFile<support::little, true> *ELFObj =
+          dyn_cast<ELFObjectFile<support::little, true> >(Obj))
+    return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
+
+  // Big-endian 64-bit
+  if (const ELFObjectFile<support::big, true> *ELFObj =
+          dyn_cast<ELFObjectFile<support::big, true> >(Obj))
+    return ELFObj->getSymbolVersion(Sym, Version, IsDefault);
+
+  llvm_unreachable("Object passed to GetELFSymbolVersion() is not ELF");
+}
+
+}
+}
+
+#endif
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index f5e7461a488a..0b73f9483164 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -18,6 +18,7 @@
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/MachOObject.h"
 #include "llvm/Support/MachO.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
@@ -31,23 +32,36 @@ public:
 
   virtual symbol_iterator begin_symbols() const;
   virtual symbol_iterator end_symbols() const;
+  virtual symbol_iterator begin_dynamic_symbols() const;
+  virtual symbol_iterator end_dynamic_symbols() const;
+  virtual library_iterator begin_libraries_needed() const;
+  virtual library_iterator end_libraries_needed() const;
   virtual section_iterator begin_sections() const;
   virtual section_iterator end_sections() const;
 
   virtual uint8_t getBytesInAddress() const;
   virtual StringRef getFileFormatName() const;
   virtual unsigned getArch() const;
+  virtual StringRef getLoadName() const;
+
+  MachOObject *getObject() { return MachOObj; }
+
+  static inline bool classof(const Binary *v) {
+    return v->isMachO();
+  }
+  static inline bool classof(const MachOObjectFile *v) { return true; }
 
 protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
-  virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
   virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
-  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
-  virtual error_code isSymbolGlobal(DataRefImpl Symb, bool &Res) const;
-  virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::SymbolType &Res) const;
+  virtual error_code getSymbolFlags(DataRefImpl Symb, uint32_t &Res) const;
+  virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const;
+  virtual error_code getSymbolSection(DataRefImpl Symb,
+                                      section_iterator &Res) const;
 
   virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
   virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
@@ -58,6 +72,10 @@ protected:
   virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
   virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const;
   virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const;
+  virtual error_code isSectionRequiredForExecution(DataRefImpl Sec,
+                                                   bool &Res) const;
+  virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const;
+  virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const;
   virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S,
                                            bool &Result) const;
   virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const;
@@ -67,16 +85,22 @@ protected:
                                        RelocationRef &Res) const;
   virtual error_code getRelocationAddress(DataRefImpl Rel,
                                           uint64_t &Res) const;
+  virtual error_code getRelocationOffset(DataRefImpl Rel,
+                                         uint64_t &Res) const;
   virtual error_code getRelocationSymbol(DataRefImpl Rel,
                                          SymbolRef &Res) const;
   virtual error_code getRelocationType(DataRefImpl Rel,
-                                       uint32_t &Res) const;
+                                       uint64_t &Res) const;
   virtual error_code getRelocationTypeName(DataRefImpl Rel,
                                            SmallVectorImpl<char> &Result) const;
   virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel,
                                                  int64_t &Res) const;
   virtual error_code getRelocationValueString(DataRefImpl Rel,
                                            SmallVectorImpl<char> &Result) const;
+  virtual error_code getRelocationHidden(DataRefImpl Rel, bool &Result) const;
+
+  virtual error_code getLibraryNext(DataRefImpl LibData, LibraryRef &Res) const;
+  virtual error_code getLibraryPath(DataRefImpl LibData, StringRef &Res) const;
 
 private:
   MachOObject *MachOObj;
@@ -97,6 +121,9 @@ private:
   void getRelocation(DataRefImpl Rel,
                      InMemoryStruct<macho::RelocationEntry> &Res) const;
   std::size_t getSectionIndex(DataRefImpl Sec) const;
+
+  void printRelocationTargetName(InMemoryStruct<macho::RelocationEntry>& RE,
+                                 raw_string_ostream &fmt) const;
 };
 
 }
diff --git a/include/llvm/Object/MachOObject.h b/include/llvm/Object/MachOObject.h
index 51be847858a1..056040274319 100644
--- a/include/llvm/Object/MachOObject.h
+++ b/include/llvm/Object/MachOObject.h
@@ -177,14 +177,14 @@ public:
   void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const;
 
   /// @}
-  
+
   /// @name Object Dump Facilities
   /// @{
   /// dump - Support for debugging, callable in GDB: V->dump()
   //
   void dump() const;
   void dumpHeader() const;
-  
+
   /// print - Implement operator<< on Value.
   ///
   void print(raw_ostream &O) const;
@@ -192,7 +192,7 @@ public:
 
   /// @}
 };
-  
+
 inline raw_ostream &operator<<(raw_ostream &OS, const MachOObject &V) {
   V.print(OS);
   return OS;
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 83854a0d6c28..4dd7fb581308 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -20,6 +20,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <cstring>
+#include <vector>
 
 namespace llvm {
 namespace object {
@@ -37,6 +38,9 @@ union DataRefImpl {
     uint32_t a, b;
   } d;
   uintptr_t p;
+  DataRefImpl() {
+    std::memset(this, 0, sizeof(DataRefImpl));
+  }
 };
 
 template<class content_type>
@@ -78,52 +82,13 @@ static bool operator ==(const DataRefImpl &a, const DataRefImpl &b) {
   return std::memcmp(&a, &b, sizeof(DataRefImpl)) == 0;
 }
 
-/// SymbolRef - This is a value type class that represents a single symbol in
-/// the list of symbols in the object file.
-class SymbolRef {
-  friend class SectionRef;
-  DataRefImpl SymbolPimpl;
-  const ObjectFile *OwningObject;
-
-public:
-  SymbolRef() : OwningObject(NULL) {
-    std::memset(&SymbolPimpl, 0, sizeof(SymbolPimpl));
-  }
-
-  enum SymbolType {
-    ST_Function,
-    ST_Data,
-    ST_External,    // Defined in another object file
-    ST_Other
-  };
-
-  SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner);
-
-  bool operator==(const SymbolRef &Other) const;
-
-  error_code getNext(SymbolRef &Result) const;
-
-  error_code getName(StringRef &Result) const;
-  error_code getAddress(uint64_t &Result) const;
-  error_code getOffset(uint64_t &Result) const;
-  error_code getSize(uint64_t &Result) const;
-  error_code getSymbolType(SymbolRef::SymbolType &Result) const;
-
-  /// Returns the ascii char that should be displayed in a symbol table dump via
-  /// nm for this symbol.
-  error_code getNMTypeChar(char &Result) const;
-
-  /// Returns true for symbols that are internal to the object file format such
-  /// as section symbols.
-  error_code isInternal(bool &Result) const;
-
-  /// Returns true for symbols that can be used in another objects,
-  /// such as library functions
-  error_code isGlobal(bool &Result) const;
+static bool operator <(const DataRefImpl &a, const DataRefImpl &b) {
+  // Check bitwise identical. This is the only legal way to compare a union w/o
+  // knowing which member is in use.
+  return std::memcmp(&a, &b, sizeof(DataRefImpl)) < 0;
+}
 
-  DataRefImpl getRawDataRefImpl() const;
-};
-typedef content_iterator<SymbolRef> symbol_iterator;
+class SymbolRef;
 
 /// RelocationRef - This is a value type class that represents a single
 /// relocation in the list of relocations in the object file.
@@ -132,9 +97,7 @@ class RelocationRef {
   const ObjectFile *OwningObject;
 
 public:
-  RelocationRef() : OwningObject(NULL) {
-    std::memset(&RelocationPimpl, 0, sizeof(RelocationPimpl));
-  }
+  RelocationRef() : OwningObject(NULL) { }
 
   RelocationRef(DataRefImpl RelocationP, const ObjectFile *Owner);
 
@@ -143,8 +106,14 @@ public:
   error_code getNext(RelocationRef &Result) const;
 
   error_code getAddress(uint64_t &Result) const;
+  error_code getOffset(uint64_t &Result) const;
   error_code getSymbol(SymbolRef &Result) const;
-  error_code getType(uint32_t &Result) const;
+  error_code getType(uint64_t &Result) const;
+
+  /// @brief Indicates whether this relocation should hidden when listing
+  /// relocations, usually because it is the trailing part of a multipart
+  /// relocation that will be printed as part of the leading relocation.
+  error_code getHidden(bool &Result) const;
 
   /// @brief Get a string that represents the type of this relocation.
   ///
@@ -168,13 +137,12 @@ class SectionRef {
   const ObjectFile *OwningObject;
 
 public:
-  SectionRef() : OwningObject(NULL) {
-    std::memset(&SectionPimpl, 0, sizeof(SectionPimpl));
-  }
+  SectionRef() : OwningObject(NULL) { }
 
   SectionRef(DataRefImpl SectionP, const ObjectFile *Owner);
 
   bool operator==(const SectionRef &Other) const;
+  bool operator <(const SectionRef &Other) const;
 
   error_code getNext(SectionRef &Result) const;
 
@@ -190,21 +158,112 @@ public:
   error_code isText(bool &Result) const;
   error_code isData(bool &Result) const;
   error_code isBSS(bool &Result) const;
+  error_code isRequiredForExecution(bool &Result) const;
+  error_code isVirtual(bool &Result) const;
+  error_code isZeroInit(bool &Result) const;
 
   error_code containsSymbol(SymbolRef S, bool &Result) const;
 
   relocation_iterator begin_relocations() const;
   relocation_iterator end_relocations() const;
+
+  DataRefImpl getRawDataRefImpl() const;
 };
 typedef content_iterator<SectionRef> section_iterator;
 
+/// SymbolRef - This is a value type class that represents a single symbol in
+/// the list of symbols in the object file.
+class SymbolRef {
+  friend class SectionRef;
+  DataRefImpl SymbolPimpl;
+  const ObjectFile *OwningObject;
+
+public:
+  SymbolRef() : OwningObject(NULL) { }
+
+  enum Type {
+    ST_Unknown, // Type not specified
+    ST_Data,
+    ST_Debug,
+    ST_File,
+    ST_Function,
+    ST_Other
+  };
+
+  enum Flags {
+    SF_None            = 0,
+    SF_Undefined       = 1U << 0,  // Symbol is defined in another object file
+    SF_Global          = 1U << 1,  // Global symbol
+    SF_Weak            = 1U << 2,  // Weak symbol
+    SF_Absolute        = 1U << 3,  // Absolute symbol
+    SF_ThreadLocal     = 1U << 4,  // Thread local symbol
+    SF_Common          = 1U << 5,  // Symbol has common linkage
+    SF_FormatSpecific  = 1U << 31  // Specific to the object file format
+                                   // (e.g. section symbols)
+  };
+
+  SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner);
+
+  bool operator==(const SymbolRef &Other) const;
+  bool operator <(const SymbolRef &Other) const;
+
+  error_code getNext(SymbolRef &Result) const;
+
+  error_code getName(StringRef &Result) const;
+  error_code getAddress(uint64_t &Result) const;
+  error_code getFileOffset(uint64_t &Result) const;
+  error_code getSize(uint64_t &Result) const;
+  error_code getType(SymbolRef::Type &Result) const;
+
+  /// Returns the ascii char that should be displayed in a symbol table dump via
+  /// nm for this symbol.
+  error_code getNMTypeChar(char &Result) const;
+
+  /// Get symbol flags (bitwise OR of SymbolRef::Flags)
+  error_code getFlags(uint32_t &Result) const;
+
+  /// @brief Return true for common symbols such as uninitialized globals
+  error_code isCommon(bool &Result) const;
+
+  /// @brief Get section this symbol is defined in reference to. Result is
+  /// end_sections() if it is undefined or is an absolute symbol.
+  error_code getSection(section_iterator &Result) const;
+
+  DataRefImpl getRawDataRefImpl() const;
+};
+typedef content_iterator<SymbolRef> symbol_iterator;
+
+/// LibraryRef - This is a value type class that represents a single library in
+/// the list of libraries needed by a shared or dynamic object.
+class LibraryRef {
+  friend class SectionRef;
+  DataRefImpl LibraryPimpl;
+  const ObjectFile *OwningObject;
+
+public:
+  LibraryRef() : OwningObject(NULL) { }
+
+  LibraryRef(DataRefImpl LibraryP, const ObjectFile *Owner);
+
+  bool operator==(const LibraryRef &Other) const;
+  bool operator <(const LibraryRef &Other) const;
+
+  error_code getNext(LibraryRef &Result) const;
+
+  // Get the path to this library, as stored in the object file.
+  error_code getPath(StringRef &Result) const;
+
+  DataRefImpl getRawDataRefImpl() const;
+};
+typedef content_iterator<LibraryRef> library_iterator;
+
 const uint64_t UnknownAddressOrSize = ~0ULL;
 
 /// ObjectFile - This class is the base class for all object file types.
 /// Concrete instances of this object are created by createObjectFile, which
 /// figure out which type to create.
 class ObjectFile : public Binary {
-private:
+  virtual void anchor();
   ObjectFile(); // = delete
   ObjectFile(const ObjectFile &other); // = delete
 
@@ -227,12 +286,15 @@ protected:
   virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const = 0;
   virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const = 0;
   virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const =0;
-  virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const =0;
+  virtual error_code getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const =0;
   virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const = 0;
+  virtual error_code getSymbolType(DataRefImpl Symb,
+                                   SymbolRef::Type &Res) const = 0;
   virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const = 0;
-  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const = 0;
-  virtual error_code isSymbolGlobal(DataRefImpl Symb, bool &Res) const = 0;
-  virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::SymbolType &Res) const = 0;
+  virtual error_code getSymbolFlags(DataRefImpl Symb,
+                                    uint32_t &Res) const = 0;
+  virtual error_code getSymbolSection(DataRefImpl Symb,
+                                      section_iterator &Res) const = 0;
 
   // Same as above for SectionRef.
   friend class SectionRef;
@@ -245,6 +307,11 @@ protected:
   virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const = 0;
   virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const = 0;
   virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const = 0;
+  virtual error_code isSectionRequiredForExecution(DataRefImpl Sec,
+                                                   bool &Res) const = 0;
+  // A section is 'virtual' if its contents aren't present in the object image.
+  virtual error_code isSectionVirtual(DataRefImpl Sec, bool &Res) const = 0;
+  virtual error_code isSectionZeroInit(DataRefImpl Sec, bool &Res) const = 0;
   virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
                                            bool &Result) const = 0;
   virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const = 0;
@@ -257,25 +324,42 @@ protected:
                                        RelocationRef &Res) const = 0;
   virtual error_code getRelocationAddress(DataRefImpl Rel,
                                           uint64_t &Res) const =0;
+  virtual error_code getRelocationOffset(DataRefImpl Rel,
+                                         uint64_t &Res) const =0;
   virtual error_code getRelocationSymbol(DataRefImpl Rel,
                                          SymbolRef &Res) const = 0;
   virtual error_code getRelocationType(DataRefImpl Rel,
-                                       uint32_t &Res) const = 0;
+                                       uint64_t &Res) const = 0;
   virtual error_code getRelocationTypeName(DataRefImpl Rel,
                                        SmallVectorImpl<char> &Result) const = 0;
   virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel,
                                                  int64_t &Res) const = 0;
   virtual error_code getRelocationValueString(DataRefImpl Rel,
                                        SmallVectorImpl<char> &Result) const = 0;
+  virtual error_code getRelocationHidden(DataRefImpl Rel, bool &Result) const {
+    Result = false;
+    return object_error::success;
+  }
+
+  // Same for LibraryRef
+  friend class LibraryRef;
+  virtual error_code getLibraryNext(DataRefImpl Lib, LibraryRef &Res) const = 0;
+  virtual error_code getLibraryPath(DataRefImpl Lib, StringRef &Res) const = 0;
 
 public:
 
   virtual symbol_iterator begin_symbols() const = 0;
   virtual symbol_iterator end_symbols() const = 0;
 
+  virtual symbol_iterator begin_dynamic_symbols() const = 0;
+  virtual symbol_iterator end_dynamic_symbols() const = 0;
+
   virtual section_iterator begin_sections() const = 0;
   virtual section_iterator end_sections() const = 0;
 
+  virtual library_iterator begin_libraries_needed() const = 0;
+  virtual library_iterator end_libraries_needed() const = 0;
+
   /// @brief The number of bytes used to represent an address in this object
   ///        file format.
   virtual uint8_t getBytesInAddress() const = 0;
@@ -283,6 +367,11 @@ public:
   virtual StringRef getFileFormatName() const = 0;
   virtual /* Triple::ArchType */ unsigned getArch() const = 0;
 
+  /// For shared objects, returns the name which this object should be
+  /// loaded from at runtime. This corresponds to DT_SONAME on ELF and
+  /// LC_ID_DYLIB (install name) on MachO.
+  virtual StringRef getLoadName() const = 0;
+
   /// @returns Pointer to ObjectFile subclass to handle this type of object.
   /// @param ObjectPath The path to the object file. ObjectPath.isObject must
   ///        return true.
@@ -291,8 +380,7 @@ public:
   static ObjectFile *createObjectFile(MemoryBuffer *Object);
 
   static inline bool classof(const Binary *v) {
-    return v->getType() >= isObject &&
-           v->getType() < lastObject;
+    return v->isObject();
   }
   static inline bool classof(const ObjectFile *v) { return true; }
 
@@ -311,6 +399,10 @@ inline bool SymbolRef::operator==(const SymbolRef &Other) const {
   return SymbolPimpl == Other.SymbolPimpl;
 }
 
+inline bool SymbolRef::operator <(const SymbolRef &Other) const {
+  return SymbolPimpl < Other.SymbolPimpl;
+}
+
 inline error_code SymbolRef::getNext(SymbolRef &Result) const {
   return OwningObject->getSymbolNext(SymbolPimpl, Result);
 }
@@ -323,8 +415,8 @@ inline error_code SymbolRef::getAddress(uint64_t &Result) const {
   return OwningObject->getSymbolAddress(SymbolPimpl, Result);
 }
 
-inline error_code SymbolRef::getOffset(uint64_t &Result) const {
-  return OwningObject->getSymbolOffset(SymbolPimpl, Result);
+inline error_code SymbolRef::getFileOffset(uint64_t &Result) const {
+  return OwningObject->getSymbolFileOffset(SymbolPimpl, Result);
 }
 
 inline error_code SymbolRef::getSize(uint64_t &Result) const {
@@ -335,15 +427,15 @@ inline error_code SymbolRef::getNMTypeChar(char &Result) const {
   return OwningObject->getSymbolNMTypeChar(SymbolPimpl, Result);
 }
 
-inline error_code SymbolRef::isInternal(bool &Result) const {
-  return OwningObject->isSymbolInternal(SymbolPimpl, Result);
+inline error_code SymbolRef::getFlags(uint32_t &Result) const {
+  return OwningObject->getSymbolFlags(SymbolPimpl, Result);
 }
 
-inline error_code SymbolRef::isGlobal(bool &Result) const {
-  return OwningObject->isSymbolGlobal(SymbolPimpl, Result);
+inline error_code SymbolRef::getSection(section_iterator &Result) const {
+  return OwningObject->getSymbolSection(SymbolPimpl, Result);
 }
 
-inline error_code SymbolRef::getSymbolType(SymbolRef::SymbolType &Result) const {
+inline error_code SymbolRef::getType(SymbolRef::Type &Result) const {
   return OwningObject->getSymbolType(SymbolPimpl, Result);
 }
 
@@ -362,6 +454,10 @@ inline bool SectionRef::operator==(const SectionRef &Other) const {
   return SectionPimpl == Other.SectionPimpl;
 }
 
+inline bool SectionRef::operator <(const SectionRef &Other) const {
+  return SectionPimpl < Other.SectionPimpl;
+}
+
 inline error_code SectionRef::getNext(SectionRef &Result) const {
   return OwningObject->getSectionNext(SectionPimpl, Result);
 }
@@ -398,6 +494,18 @@ inline error_code SectionRef::isBSS(bool &Result) const {
   return OwningObject->isSectionBSS(SectionPimpl, Result);
 }
 
+inline error_code SectionRef::isRequiredForExecution(bool &Result) const {
+  return OwningObject->isSectionRequiredForExecution(SectionPimpl, Result);
+}
+
+inline error_code SectionRef::isVirtual(bool &Result) const {
+  return OwningObject->isSectionVirtual(SectionPimpl, Result);
+}
+
+inline error_code SectionRef::isZeroInit(bool &Result) const {
+  return OwningObject->isSectionZeroInit(SectionPimpl, Result);
+}
+
 inline error_code SectionRef::containsSymbol(SymbolRef S, bool &Result) const {
   return OwningObject->sectionContainsSymbol(SectionPimpl, S.SymbolPimpl,
                                              Result);
@@ -411,6 +519,9 @@ inline relocation_iterator SectionRef::end_relocations() const {
   return OwningObject->getSectionRelEnd(SectionPimpl);
 }
 
+inline DataRefImpl SectionRef::getRawDataRefImpl() const {
+  return SectionPimpl;
+}
 
 /// RelocationRef
 inline RelocationRef::RelocationRef(DataRefImpl RelocationP,
@@ -430,11 +541,15 @@ inline error_code RelocationRef::getAddress(uint64_t &Result) const {
   return OwningObject->getRelocationAddress(RelocationPimpl, Result);
 }
 
+inline error_code RelocationRef::getOffset(uint64_t &Result) const {
+  return OwningObject->getRelocationOffset(RelocationPimpl, Result);
+}
+
 inline error_code RelocationRef::getSymbol(SymbolRef &Result) const {
   return OwningObject->getRelocationSymbol(RelocationPimpl, Result);
 }
 
-inline error_code RelocationRef::getType(uint32_t &Result) const {
+inline error_code RelocationRef::getType(uint64_t &Result) const {
   return OwningObject->getRelocationType(RelocationPimpl, Result);
 }
 
@@ -452,6 +567,30 @@ inline error_code RelocationRef::getValueString(SmallVectorImpl<char> &Result)
   return OwningObject->getRelocationValueString(RelocationPimpl, Result);
 }
 
+inline error_code RelocationRef::getHidden(bool &Result) const {
+  return OwningObject->getRelocationHidden(RelocationPimpl, Result);
+}
+// Inline function definitions.
+inline LibraryRef::LibraryRef(DataRefImpl LibraryP, const ObjectFile *Owner)
+  : LibraryPimpl(LibraryP)
+  , OwningObject(Owner) {}
+
+inline bool LibraryRef::operator==(const LibraryRef &Other) const {
+  return LibraryPimpl == Other.LibraryPimpl;
+}
+
+inline bool LibraryRef::operator <(const LibraryRef &Other) const {
+  return LibraryPimpl < Other.LibraryPimpl;
+}
+
+inline error_code LibraryRef::getNext(LibraryRef &Result) const {
+  return OwningObject->getLibraryNext(LibraryPimpl, Result);
+}
+
+inline error_code LibraryRef::getPath(StringRef &Result) const {
+  return OwningObject->getLibraryPath(LibraryPimpl, Result);
+}
+
 } // end namespace object
 } // end namespace llvm
 
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index 48a5796383b4..abd6a1939d71 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -261,8 +261,8 @@ public:
 
   /// getPointerOperandType - Method to return the pointer operand as a
   /// PointerType.
-  PointerType *getPointerOperandType() const {
-    return reinterpret_cast<PointerType*>(getPointerOperand()->getType());
+  Type *getPointerOperandType() const {
+    return getPointerOperand()->getType();
   }
 
   unsigned getNumIndices() const {  // Note: always non-negative
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 04dd8b60547a..888537daa425 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -53,7 +53,7 @@ typedef const void* AnalysisID;
 /// Ordering of pass manager types is important here.
 enum PassManagerType {
   PMT_Unknown = 0,
-  PMT_ModulePassManager = 1, ///< MPPassManager 
+  PMT_ModulePassManager = 1, ///< MPPassManager
   PMT_CallGraphPassManager,  ///< CGPassManager
   PMT_FunctionPassManager,   ///< FPPassManager
   PMT_LoopPassManager,       ///< LPPassManager
@@ -84,14 +84,14 @@ class Pass {
   PassKind Kind;
   void operator=(const Pass&);  // DO NOT IMPLEMENT
   Pass(const Pass &);           // DO NOT IMPLEMENT
-  
+
 public:
-  explicit Pass(PassKind K, char &pid);
+  explicit Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { }
   virtual ~Pass();
 
-  
+
   PassKind getPassKind() const { return Kind; }
-  
+
   /// getPassName - Return a nice clean name for a pass.  This usually
   /// implemented in terms of the name that is registered by one of the
   /// Registration templates, but can be overloaded directly.
@@ -99,7 +99,7 @@ public:
   virtual const char *getPassName() const;
 
   /// getPassID - Return the PassID number that corresponds to this pass.
-  virtual AnalysisID getPassID() const {
+  AnalysisID getPassID() const {
     return PassID;
   }
 
@@ -119,12 +119,12 @@ public:
                                   const std::string &Banner) const = 0;
 
   /// Each pass is responsible for assigning a pass manager to itself.
-  /// PMS is the stack of available pass manager. 
-  virtual void assignPassManager(PMStack &, 
+  /// PMS is the stack of available pass manager.
+  virtual void assignPassManager(PMStack &,
                                  PassManagerType) {}
   /// Check if available pass managers are suitable for this pass or not.
   virtual void preparePassManager(PMStack &);
-  
+
   ///  Return what kind of Pass Manager can manage this pass.
   virtual PassManagerType getPotentialPassManagerType() const;
 
@@ -159,9 +159,9 @@ public:
   virtual void *getAdjustedAnalysisPointer(AnalysisID ID);
   virtual ImmutablePass *getAsImmutablePass();
   virtual PMDataManager *getAsPMDataManager();
-  
+
   /// verifyAnalysis() - This member can be implemented by a analysis pass to
-  /// check state of analysis information. 
+  /// check state of analysis information.
   virtual void verifyAnalysis() const;
 
   // dumpPassStructure - Implement the -debug-passes=PassStructure option
@@ -175,6 +175,10 @@ public:
   // argument string, or null if it is not known.
   static const PassInfo *lookupPassInfo(StringRef Arg);
 
+  // createPass - Create a object for the specified pass class,
+  // or null if it is not known.
+  static Pass *createPass(AnalysisID ID);
+
   /// getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to
   /// get analysis information that might be around, for example to update it.
   /// This is different than getAnalysis in that it can fail (if the analysis
@@ -226,7 +230,7 @@ public:
   /// being operated on.
   virtual bool runOnModule(Module &M) = 0;
 
-  virtual void assignPassManager(PMStack &PMS, 
+  virtual void assignPassManager(PMStack &PMS,
                                  PassManagerType T);
 
   ///  Return what kind of Pass Manager can manage this pass.
@@ -259,9 +263,9 @@ public:
   ///
   bool runOnModule(Module &) { return false; }
 
-  explicit ImmutablePass(char &pid) 
+  explicit ImmutablePass(char &pid)
   : ModulePass(pid) {}
-  
+
   // Force out-of-line virtual method.
   virtual ~ImmutablePass();
 };
@@ -286,7 +290,7 @@ public:
   /// any necessary per-module initialization.
   ///
   virtual bool doInitialization(Module &);
-  
+
   /// runOnFunction - Virtual method overriden by subclasses to do the
   /// per-function processing of the pass.
   ///
@@ -297,7 +301,7 @@ public:
   ///
   virtual bool doFinalization(Module &);
 
-  virtual void assignPassManager(PMStack &PMS, 
+  virtual void assignPassManager(PMStack &PMS,
                                  PassManagerType T);
 
   ///  Return what kind of Pass Manager can manage this pass.
@@ -348,7 +352,7 @@ public:
   ///
   virtual bool doFinalization(Module &);
 
-  virtual void assignPassManager(PMStack &PMS, 
+  virtual void assignPassManager(PMStack &PMS,
                                  PassManagerType T);
 
   ///  Return what kind of Pass Manager can manage this pass.
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index fede1216c3c4..5c6a2d7a92f9 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -19,6 +19,7 @@
 #ifndef LLVM_PASS_ANALYSIS_SUPPORT_H
 #define LLVM_PASS_ANALYSIS_SUPPORT_H
 
+#include "llvm/Pass.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include <vector>
diff --git a/include/llvm/PassManager.h b/include/llvm/PassManager.h
index c8b5dcaf0f2d..ce5fda79f9c7 100644
--- a/include/llvm/PassManager.h
+++ b/include/llvm/PassManager.h
@@ -53,17 +53,13 @@ public:
   /// will be destroyed as well, so there is no need to delete the pass.  This
   /// implies that all passes MUST be allocated with 'new'.
   void add(Pass *P);
- 
+
   /// run - Execute all of the passes scheduled for execution.  Keep track of
   /// whether any of the passes modifies the module, and if so, return true.
   bool run(Module &M);
 
 private:
-  /// addImpl - Add a pass to the queue of passes to run, without
-  /// checking whether to add a printer pass.
-  void addImpl(Pass *P);
-
-  /// PassManagerImpl_New is the actual class. PassManager is just the 
+  /// PassManagerImpl_New is the actual class. PassManager is just the
   /// wraper to publish simple pass manager interface
   PassManagerImpl *PM;
 };
@@ -75,11 +71,11 @@ public:
   /// but does not take ownership of, the specified Module.
   explicit FunctionPassManager(Module *M);
   ~FunctionPassManager();
- 
+
   /// add - Add a pass to the queue of passes to run.  This passes
   /// ownership of the Pass to the PassManager.  When the
   /// PassManager_X is destroyed, the pass will be destroyed as well, so
-  /// there is no need to delete the pass. (TODO delete passes.)
+  /// there is no need to delete the pass.
   /// This implies that all passes MUST be allocated with 'new'.
   void add(Pass *P);
 
@@ -88,20 +84,16 @@ public:
   /// so, return true.
   ///
   bool run(Function &F);
-  
+
   /// doInitialization - Run all of the initializers for the function passes.
   ///
   bool doInitialization();
-  
+
   /// doFinalization - Run all of the finalizers for the function passes.
   ///
   bool doFinalization();
-  
-private:
-  /// addImpl - Add a pass to the queue of passes to run, without
-  /// checking whether to add a printer pass.
-  void addImpl(Pass *P);
 
+private:
   FunctionPassManagerImpl *FPM;
   Module *M;
 };
diff --git a/include/llvm/PassManagers.h b/include/llvm/PassManagers.h
index c05347da7934..fa29f50ccf77 100644
--- a/include/llvm/PassManagers.h
+++ b/include/llvm/PassManagers.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the LLVM Pass Manager infrastructure. 
+// This file declares the LLVM Pass Manager infrastructure.
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,11 +24,11 @@
 //===----------------------------------------------------------------------===//
 // Overview:
 // The Pass Manager Infrastructure manages passes. It's responsibilities are:
-// 
+//
 //   o Manage optimization pass execution order
 //   o Make required Analysis information available before pass P is run
 //   o Release memory occupied by dead passes
-//   o If Analysis information is dirtied by a pass then regenerate Analysis 
+//   o If Analysis information is dirtied by a pass then regenerate Analysis
 //     information before it is consumed by another pass.
 //
 // Pass Manager Infrastructure uses multiple pass managers.  They are
@@ -43,13 +43,13 @@
 //
 // [o] class PMTopLevelManager;
 //
-// Two top level managers, PassManager and FunctionPassManager, derive from 
-// PMTopLevelManager. PMTopLevelManager manages information used by top level 
+// Two top level managers, PassManager and FunctionPassManager, derive from
+// PMTopLevelManager. PMTopLevelManager manages information used by top level
 // managers such as last user info.
 //
 // [o] class PMDataManager;
 //
-// PMDataManager manages information, e.g. list of available analysis info, 
+// PMDataManager manages information, e.g. list of available analysis info,
 // used by a pass manager to manage execution order of passes. It also provides
 // a place to implement common pass manager APIs. All pass managers derive from
 // PMDataManager.
@@ -82,7 +82,7 @@
 // relies on PassManagerImpl to do all the tasks.
 //
 // [o] class PassManagerImpl : public Pass, public PMDataManager,
-//                             public PMDTopLevelManager
+//                             public PMTopLevelManager
 //
 // PassManagerImpl is a top level pass manager responsible for managing
 // MPPassManagers.
@@ -109,7 +109,7 @@ enum PassDebuggingString {
   ON_REGION_MSG, // " 'on Region ...\n'"
   ON_LOOP_MSG, // " 'on Loop ...\n'"
   ON_CG_MSG // "' on Call Graph ...\n'"
-};  
+};
 
 /// PassManagerPrettyStackEntry - This is used to print informative information
 /// about what pass is running when/if a stack trace is generated.
@@ -124,19 +124,19 @@ public:
     : P(p), V(&v), M(0) {} // When P is run on V
   PassManagerPrettyStackEntry(Pass *p, Module &m)
     : P(p), V(0), M(&m) {} // When P is run on M
-  
+
   /// print - Emit information about this stack frame to OS.
   virtual void print(raw_ostream &OS) const;
 };
-  
-  
+
+
 //===----------------------------------------------------------------------===//
 // PMStack
 //
 /// PMStack - This class implements a stack data structure of PMDataManager
 /// pointers.
 ///
-/// Top level pass managers (see PassManager.cpp) maintain active Pass Managers 
+/// Top level pass managers (see PassManager.cpp) maintain active Pass Managers
 /// using PMStack. Each Pass implements assignPassManager() to connect itself
 /// with appropriate manager. assignPassManager() walks PMStack to find
 /// suitable manager.
@@ -174,9 +174,8 @@ protected:
   void initializeAllAnalysisInfo();
 
 private:
-  /// This is implemented by top level pass manager and used by 
-  /// schedulePass() to add analysis info passes that are not available.
-  virtual void addTopLevelPass(Pass  *P) = 0;
+  virtual PMDataManager *getAsPMDataManager() = 0;
+  virtual PassManagerType getTopLevelPassManagerType() = 0;
 
 public:
   /// Schedule pass P for execution. Make sure that passes required by
@@ -198,7 +197,7 @@ public:
   /// Find analysis usage information for the pass P.
   AnalysisUsage *findAnalysisUsage(Pass *P);
 
-  virtual ~PMTopLevelManager(); 
+  virtual ~PMTopLevelManager();
 
   /// Add immutable pass and initialize it.
   inline void addImmutablePass(ImmutablePass *P) {
@@ -228,7 +227,7 @@ public:
   PMStack activeStack;
 
 protected:
-  
+
   /// Collection of pass managers
   SmallVector<PMDataManager *, 8> PassManagers;
 
@@ -254,7 +253,7 @@ private:
 };
 
 
-  
+
 //===----------------------------------------------------------------------===//
 // PMDataManager
 
@@ -268,7 +267,7 @@ public:
   }
 
   virtual ~PMDataManager();
-  
+
   virtual Pass *getAsPass() = 0;
 
   /// Augment AvailableAnalysis by adding analysis made available by pass P.
@@ -279,16 +278,16 @@ public:
 
   /// Remove Analysis that is not preserved by the pass
   void removeNotPreservedAnalysis(Pass *P);
-  
+
   /// Remove dead passes used by P.
-  void removeDeadPasses(Pass *P, StringRef Msg, 
+  void removeDeadPasses(Pass *P, StringRef Msg,
                         enum PassDebuggingString);
 
   /// Remove P.
-  void freePass(Pass *P, StringRef Msg, 
+  void freePass(Pass *P, StringRef Msg,
                 enum PassDebuggingString);
 
-  /// Add pass P into the PassVector. Update 
+  /// Add pass P into the PassVector. Update
   /// AvailableAnalysis appropriately if ProcessAnalysis is true.
   void add(Pass *P, bool ProcessAnalysis = true);
 
@@ -300,7 +299,7 @@ public:
   virtual Pass *getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F);
 
   /// Initialize available analysis information.
-  void initializeAnalysisInfo() { 
+  void initializeAnalysisInfo() {
     AvailableAnalysis.clear();
     for (unsigned i = 0; i < PMT_Last; ++i)
       InheritedAnalysis[i] = NULL;
@@ -347,9 +346,9 @@ public:
     return (unsigned)PassVector.size();
   }
 
-  virtual PassManagerType getPassManagerType() const { 
+  virtual PassManagerType getPassManagerType() const {
     assert ( 0 && "Invalid use of getPassManagerType");
-    return PMT_Unknown; 
+    return PMT_Unknown;
   }
 
   std::map<AnalysisID, Pass*> *getAvailableAnalysis() {
@@ -377,17 +376,17 @@ protected:
   // then PMT_Last active pass mangers.
   std::map<AnalysisID, Pass *> *InheritedAnalysis[PMT_Last];
 
-  
+
   /// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
   /// or higher is specified.
   bool isPassDebuggingExecutionsOrMore() const;
-  
+
 private:
   void dumpAnalysisUsage(StringRef Msg, const Pass *P,
                          const AnalysisUsage::VectorType &Set) const;
 
-  // Set of available Analysis. This information is used while scheduling 
-  // pass. If a pass requires an analysis which is not available then 
+  // Set of available Analysis. This information is used while scheduling
+  // pass. If a pass requires an analysis which is not available then
   // the required analysis pass is scheduled to run before the pass itself is
   // scheduled to run.
   std::map<AnalysisID, Pass*> AvailableAnalysis;
@@ -403,27 +402,27 @@ private:
 // FPPassManager
 //
 /// FPPassManager manages BBPassManagers and FunctionPasses.
-/// It batches all function passes and basic block pass managers together and 
-/// sequence them to process one function at a time before processing next 
+/// It batches all function passes and basic block pass managers together and
+/// sequence them to process one function at a time before processing next
 /// function.
 class FPPassManager : public ModulePass, public PMDataManager {
 public:
   static char ID;
-  explicit FPPassManager() 
+  explicit FPPassManager()
   : ModulePass(ID), PMDataManager() { }
-  
+
   /// run - Execute all of the passes scheduled for execution.  Keep track of
   /// whether any of the passes modifies the module, and if so, return true.
   bool runOnFunction(Function &F);
   bool runOnModule(Module &M);
-  
+
   /// cleanup - After running all passes, clean up pass manager cache.
   void cleanup();
 
   /// doInitialization - Run all of the initializers for the function passes.
   ///
   bool doInitialization(Module &M);
-  
+
   /// doFinalization - Run all of the finalizers for the function passes.
   ///
   bool doFinalization(Module &M);
@@ -449,8 +448,8 @@ public:
     return FP;
   }
 
-  virtual PassManagerType getPassManagerType() const { 
-    return PMT_FunctionPassManager; 
+  virtual PassManagerType getPassManagerType() const {
+    return PMT_FunctionPassManager;
   }
 };
 
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index 082790956c46..c50c2cc184e3 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -25,6 +25,7 @@
 #include "llvm/PassRegistry.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Atomic.h"
+#include "llvm/Support/Valgrind.h"
 #include <vector>
 
 namespace llvm {
@@ -135,7 +136,10 @@ private:
   if (old_val == 0) { \
     function(Registry); \
     sys::MemoryFence(); \
+    TsanIgnoreWritesBegin(); \
+    TsanHappensBefore(&initialized); \
     initialized = 2; \
+    TsanIgnoreWritesEnd(); \
   } else { \
     sys::cas_flag tmp = initialized; \
     sys::MemoryFence(); \
@@ -143,7 +147,8 @@ private:
       tmp = initialized; \
       sys::MemoryFence(); \
     } \
-  }
+  } \
+  TsanHappensAfter(&initialized);
 
 #define INITIALIZE_PASS(passName, arg, name, cfg, analysis) \
   static void* initialize##passName##PassOnce(PassRegistry &Registry) { \
diff --git a/include/llvm/Support/BlockFrequency.h b/include/llvm/Support/BlockFrequency.h
index 554b7845696d..839cf9371247 100644
--- a/include/llvm/Support/BlockFrequency.h
+++ b/include/llvm/Support/BlockFrequency.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_SUPPORT_BLOCKFREQUENCY_H
 #define LLVM_SUPPORT_BLOCKFREQUENCY_H
 
+#include "llvm/Support/DataTypes.h"
+
 namespace llvm {
 
 class raw_ostream;
diff --git a/include/llvm/Support/BranchProbability.h b/include/llvm/Support/BranchProbability.h
index 05c24d4fcfcb..eedf69247ef5 100644
--- a/include/llvm/Support/BranchProbability.h
+++ b/include/llvm/Support/BranchProbability.h
@@ -15,6 +15,7 @@
 #define LLVM_SUPPORT_BRANCHPROBABILITY_H
 
 #include "llvm/Support/DataTypes.h"
+#include <cassert>
 
 namespace llvm {
 
@@ -22,7 +23,6 @@ class raw_ostream;
 
 // This class represents Branch Probability as a non-negative fraction.
 class BranchProbability {
-
   // Numerator
   uint32_t N;
 
@@ -30,19 +30,44 @@ class BranchProbability {
   uint32_t D;
 
 public:
-  BranchProbability(uint32_t n, uint32_t d);
+  BranchProbability(uint32_t n, uint32_t d) : N(n), D(d) {
+    assert(d > 0 && "Denomiator cannot be 0!");
+    assert(n <= d && "Probability cannot be bigger than 1!");
+  }
+
+  static BranchProbability getZero() { return BranchProbability(0, 1); }
+  static BranchProbability getOne() { return BranchProbability(1, 1); }
 
   uint32_t getNumerator() const { return N; }
   uint32_t getDenominator() const { return D; }
 
   // Return (1 - Probability).
-  BranchProbability getCompl() {
+  BranchProbability getCompl() const {
     return BranchProbability(D - N, D);
   }
 
   void print(raw_ostream &OS) const;
 
   void dump() const;
+
+  bool operator==(BranchProbability RHS) const {
+    return (uint64_t)N * RHS.D == (uint64_t)D * RHS.N;
+  }
+  bool operator!=(BranchProbability RHS) const {
+    return !(*this == RHS);
+  }
+  bool operator<(BranchProbability RHS) const {
+    return (uint64_t)N * RHS.D < (uint64_t)D * RHS.N;
+  }
+  bool operator>(BranchProbability RHS) const {
+    return RHS < *this;
+  }
+  bool operator<=(BranchProbability RHS) const {
+    return (uint64_t)N * RHS.D <= (uint64_t)D * RHS.N;
+  }
+  bool operator>=(BranchProbability RHS) const {
+    return RHS <= *this;
+  }
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob);
diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h
index 29313ef90099..f5dc8ea055a3 100644
--- a/include/llvm/Support/CFG.h
+++ b/include/llvm/Support/CFG.h
@@ -71,6 +71,12 @@ public:
   unsigned getOperandNo() const {
     return It.getOperandNo();
   }
+
+  /// getUse - Return the operand Use in the predecessor's terminator
+  /// of the successor.
+  Use &getUse() const {
+    return It.getUse();
+  }
 };
 
 typedef PredIterator<BasicBlock, Value::use_iterator> pred_iterator;
@@ -314,6 +320,7 @@ template <> struct GraphTraits<Function*> : public GraphTraits<BasicBlock*> {
   typedef Function::iterator nodes_iterator;
   static nodes_iterator nodes_begin(Function *F) { return F->begin(); }
   static nodes_iterator nodes_end  (Function *F) { return F->end(); }
+  static unsigned       size       (Function *F) { return F->size(); }
 };
 template <> struct GraphTraits<const Function*> :
   public GraphTraits<const BasicBlock*> {
@@ -323,6 +330,7 @@ template <> struct GraphTraits<const Function*> :
   typedef Function::const_iterator nodes_iterator;
   static nodes_iterator nodes_begin(const Function *F) { return F->begin(); }
   static nodes_iterator nodes_end  (const Function *F) { return F->end(); }
+  static unsigned       size       (const Function *F) { return F->size(); }
 };
 
 
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index 673925593e6a..88c60bac7402 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -24,6 +24,7 @@
 #define LLVM_SUPPORT_WIN_COFF_H
 
 #include "llvm/Support/DataTypes.h"
+#include <cassert>
 #include <cstring>
 
 namespace llvm {
@@ -49,8 +50,65 @@ namespace COFF {
   };
 
   enum MachineTypes {
-    IMAGE_FILE_MACHINE_I386 = 0x14C,
-    IMAGE_FILE_MACHINE_AMD64 = 0x8664
+    IMAGE_FILE_MACHINE_UNKNOWN   = 0x0,
+    IMAGE_FILE_MACHINE_AM33      = 0x13,
+    IMAGE_FILE_MACHINE_AMD64     = 0x8664,
+    IMAGE_FILE_MACHINE_ARM       = 0x1C0,
+    IMAGE_FILE_MACHINE_ARMV7     = 0x1C4,
+    IMAGE_FILE_MACHINE_EBC       = 0xEBC,
+    IMAGE_FILE_MACHINE_I386      = 0x14C,
+    IMAGE_FILE_MACHINE_IA64      = 0x200,
+    IMAGE_FILE_MACHINE_M32R      = 0x9041,
+    IMAGE_FILE_MACHINE_MIPS16    = 0x266,
+    IMAGE_FILE_MACHINE_MIPSFPU   = 0x366,
+    IMAGE_FILE_MACHINE_MIPSFPU16 = 0x466,
+    IMAGE_FILE_MACHINE_POWERPC   = 0x1F0,
+    IMAGE_FILE_MACHINE_POWERPCFP = 0x1F1,
+    IMAGE_FILE_MACHINE_R4000     = 0x166,
+    IMAGE_FILE_MACHINE_SH3       = 0x1A2,
+    IMAGE_FILE_MACHINE_SH3DSP    = 0x1A3,
+    IMAGE_FILE_MACHINE_SH4       = 0x1A6,
+    IMAGE_FILE_MACHINE_SH5       = 0x1A8,
+    IMAGE_FILE_MACHINE_THUMB     = 0x1C2,
+    IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169
+  };
+
+  enum Characteristics {
+    /// The file does not contain base relocations and must be loaded at its
+    /// preferred base. If this cannot be done, the loader will error.
+    IMAGE_FILE_RELOCS_STRIPPED         = 0x0001,
+    /// The file is valid and can be run.
+    IMAGE_FILE_EXECUTABLE_IMAGE        = 0x0002,
+    /// COFF line numbers have been stripped. This is deprecated and should be
+    /// 0.
+    IMAGE_FILE_LINE_NUMS_STRIPPED      = 0x0004,
+    /// COFF symbol table entries for local symbols have been removed. This is
+    /// deprecated and should be 0.
+    IMAGE_FILE_LOCAL_SYMS_STRIPPED     = 0x0008,
+    /// Aggressively trim working set. This is deprecated and must be 0.
+    IMAGE_FILE_AGGRESSIVE_WS_TRIM      = 0x0010,
+    /// Image can handle > 2GiB addresses.
+    IMAGE_FILE_LARGE_ADDRESS_AWARE     = 0x0020,
+    /// Little endian: the LSB precedes the MSB in memory. This is deprecated
+    /// and should be 0.
+    IMAGE_FILE_BYTES_REVERSED_LO       = 0x0080,
+    /// Machine is based on a 32bit word architecture.
+    IMAGE_FILE_32BIT_MACHINE           = 0x0100,
+    /// Debugging info has been removed.
+    IMAGE_FILE_DEBUG_STRIPPED          = 0x0200,
+    /// If the image is on removable media, fully load it and copy it to swap.
+    IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP = 0x0400,
+    /// If the image is on network media, fully load it and copy it to swap.
+    IMAGE_FILE_NET_RUN_FROM_SWAP       = 0x0800,
+    /// The image file is a system file, not a user program.
+    IMAGE_FILE_SYSTEM                  = 0x1000,
+    /// The image file is a DLL.
+    IMAGE_FILE_DLL                     = 0x2000,
+    /// This file should only be run on a uniprocessor machine.
+    IMAGE_FILE_UP_SYSTEM_ONLY          = 0x4000,
+    /// Big endian: the MSB precedes the LSB in memory. This is deprecated
+    /// and should be 0.
+    IMAGE_FILE_BYTES_REVERSED_HI       = 0x8000
   };
 
   struct symbol {
@@ -231,6 +289,24 @@ namespace COFF {
     IMAGE_REL_AMD64_SSPAN32   = 0x0010
   };
 
+  enum RelocationTypesARM {
+    IMAGE_REL_ARM_ABSOLUTE  = 0x0000,
+    IMAGE_REL_ARM_ADDR32    = 0x0001,
+    IMAGE_REL_ARM_ADDR32NB  = 0x0002,
+    IMAGE_REL_ARM_BRANCH24  = 0x0003,
+    IMAGE_REL_ARM_BRANCH11  = 0x0004,
+    IMAGE_REL_ARM_TOKEN     = 0x0005,
+    IMAGE_REL_ARM_BLX24     = 0x0008,
+    IMAGE_REL_ARM_BLX11     = 0x0009,
+    IMAGE_REL_ARM_SECTION   = 0x000E,
+    IMAGE_REL_ARM_SECREL    = 0x000F,
+    IMAGE_REL_ARM_MOV32A    = 0x0010,
+    IMAGE_REL_ARM_MOV32T    = 0x0011,
+    IMAGE_REL_ARM_BRANCH20T = 0x0012,
+    IMAGE_REL_ARM_BRANCH24T = 0x0014,
+    IMAGE_REL_ARM_BLX23T    = 0x0015
+  };
+
   enum COMDATType {
     IMAGE_COMDAT_SELECT_NODUPLICATES = 1,
     IMAGE_COMDAT_SELECT_ANY,
@@ -292,7 +368,219 @@ namespace COFF {
     AuxiliarySectionDefinition  SectionDefinition;
   };
 
-} // End namespace llvm.
+  /// @brief The Import Directory Table.
+  ///
+  /// There is a single array of these and one entry per imported DLL.
+  struct ImportDirectoryTableEntry {
+    uint32_t ImportLookupTableRVA;
+    uint32_t TimeDateStamp;
+    uint32_t ForwarderChain;
+    uint32_t NameRVA;
+    uint32_t ImportAddressTableRVA;
+  };
+
+  /// @brief The PE32 Import Lookup Table.
+  ///
+  /// There is an array of these for each imported DLL. It represents either
+  /// the ordinal to import from the target DLL, or a name to lookup and import
+  /// from the target DLL.
+  ///
+  /// This also happens to be the same format used by the Import Address Table
+  /// when it is initially written out to the image.
+  struct ImportLookupTableEntry32 {
+    uint32_t data;
+
+    /// @brief Is this entry specified by ordinal, or name?
+    bool isOrdinal() const { return data & 0x80000000; }
+
+    /// @brief Get the ordinal value of this entry. isOrdinal must be true.
+    uint16_t getOrdinal() const {
+      assert(isOrdinal() && "ILT entry is not an ordinal!");
+      return data & 0xFFFF;
+    }
+
+    /// @brief Set the ordinal value and set isOrdinal to true.
+    void setOrdinal(uint16_t o) {
+      data = o;
+      data |= 0x80000000;
+    }
+
+    /// @brief Get the Hint/Name entry RVA. isOrdinal must be false.
+    uint32_t getHintNameRVA() const {
+      assert(!isOrdinal() && "ILT entry is not a Hint/Name RVA!");
+      return data;
+    }
+
+    /// @brief Set the Hint/Name entry RVA and set isOrdinal to false.
+    void setHintNameRVA(uint32_t rva) { data = rva; }
+  };
+
+  /// @brief The DOS compatible header at the front of all PEs.
+  struct DOSHeader {
+    uint16_t Magic;
+    uint16_t UsedBytesInTheLastPage;
+    uint16_t FileSizeInPages;
+    uint16_t NumberOfRelocationItems;
+    uint16_t HeaderSizeInParagraphs;
+    uint16_t MinimumExtraParagraphs;
+    uint16_t MaximumExtraParagraphs;
+    uint16_t InitialRelativeSS;
+    uint16_t InitialSP;
+    uint16_t Checksum;
+    uint16_t InitialIP;
+    uint16_t InitialRelativeCS;
+    uint16_t AddressOfRelocationTable;
+    uint16_t OverlayNumber;
+    uint16_t Reserved[4];
+    uint16_t OEMid;
+    uint16_t OEMinfo;
+    uint16_t Reserved2[10];
+    uint32_t AddressOfNewExeHeader;
+  };
+
+  struct PEHeader {
+    uint32_t Signature;
+    header COFFHeader;
+    uint16_t Magic;
+    uint8_t  MajorLinkerVersion;
+    uint8_t  MinorLinkerVersion;
+    uint32_t SizeOfCode;
+    uint32_t SizeOfInitializedData;
+    uint32_t SizeOfUninitializedData;
+    uint32_t AddressOfEntryPoint; // RVA
+    uint32_t BaseOfCode; // RVA
+    uint32_t BaseOfData; // RVA
+    uint64_t ImageBase;
+    uint32_t SectionAlignment;
+    uint32_t FileAlignment;
+    uint16_t MajorOperatingSystemVersion;
+    uint16_t MinorOperatingSystemVersion;
+    uint16_t MajorImageVersion;
+    uint16_t MinorImageVersion;
+    uint16_t MajorSubsystemVersion;
+    uint16_t MinorSubsystemVersion;
+    uint32_t Win32VersionValue;
+    uint32_t SizeOfImage;
+    uint32_t SizeOfHeaders;
+    uint32_t CheckSum;
+    uint16_t Subsystem;
+    uint16_t DLLCharacteristics;
+    uint64_t SizeOfStackReserve;
+    uint64_t SizeOfStackCommit;
+    uint64_t SizeOfHeapReserve;
+    uint64_t SizeOfHeapCommit;
+    uint32_t LoaderFlags;
+    uint32_t NumberOfRvaAndSize;
+  };
+
+  struct DataDirectory {
+    uint32_t RelativeVirtualAddress;
+    uint32_t Size;
+  };
+
+  enum WindowsSubsystem {
+    IMAGE_SUBSYSTEM_UNKNOWN = 0, ///< An unknown subsystem.
+    IMAGE_SUBSYSTEM_NATIVE = 1, ///< Device drivers and native Windows processes
+    IMAGE_SUBSYSTEM_WINDOWS_GUI = 2, ///< The Windows GUI subsystem.
+    IMAGE_SUBSYSTEM_WINDOWS_CUI = 3, ///< The Windows character subsystem.
+    IMAGE_SUBSYSTEM_POSIX_CUI = 7, ///< The POSIX character subsystem.
+    IMAGE_SUBSYSTEM_WINDOWS_CE_GUI = 9, ///< Windows CE.
+    IMAGE_SUBSYSTEM_EFI_APPLICATION = 10, ///< An EFI application.
+    IMAGE_SUBSYSTEM_EFI_BOOT_SERVICE_DRIVER = 11, ///< An EFI driver with boot
+                                                  ///  services.
+    IMAGE_SUBSYSTEM_EFI_RUNTIME_DRIVER = 12, ///< An EFI driver with run-time
+                                             ///  services.
+    IMAGE_SUBSYSTEM_EFI_ROM = 13, ///< An EFI ROM image.
+    IMAGE_SUBSYSTEM_XBOX = 14 ///< XBOX.
+  };
+
+  enum DLLCharacteristics {
+    /// DLL can be relocated at load time.
+    IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE = 0x0040,
+    /// Code integrity checks are enforced.
+    IMAGE_DLL_CHARACTERISTICS_FORCE_INTEGRITY = 0x0080,
+    IMAGE_DLL_CHARACTERISTICS_NX_COMPAT = 0x0100, ///< Image is NX compatible.
+    /// Isolation aware, but do not isolate the image.
+    IMAGE_DLL_CHARACTERISTICS_NO_ISOLATION = 0x0200,
+    /// Does not use structured exception handling (SEH). No SEH handler may be
+    /// called in this image.
+    IMAGE_DLL_CHARACTERISTICS_NO_SEH = 0x0400,
+    /// Do not bind the image.
+    IMAGE_DLL_CHARACTERISTICS_NO_BIND = 0x0800,
+    IMAGE_DLL_CHARACTERISTICS_WDM_DRIVER = 0x2000, ///< A WDM driver.
+    /// Terminal Server aware.
+    IMAGE_DLL_CHARACTERISTICS_TERMINAL_SERVER_AWARE = 0x8000
+  };
+
+  enum DebugType {
+    IMAGE_DEBUG_TYPE_UNKNOWN       = 0,
+    IMAGE_DEBUG_TYPE_COFF          = 1,
+    IMAGE_DEBUG_TYPE_CODEVIEW      = 2,
+    IMAGE_DEBUG_TYPE_FPO           = 3,
+    IMAGE_DEBUG_TYPE_MISC          = 4,
+    IMAGE_DEBUG_TYPE_EXCEPTION     = 5,
+    IMAGE_DEBUG_TYPE_FIXUP         = 6,
+    IMAGE_DEBUG_TYPE_OMAP_TO_SRC   = 7,
+    IMAGE_DEBUG_TYPE_OMAP_FROM_SRC = 8,
+    IMAGE_DEBUG_TYPE_BORLAND       = 9,
+    IMAGE_DEBUG_TYPE_CLSID         = 11
+  };
+
+  enum BaseRelocationType {
+    IMAGE_REL_BASED_ABSOLUTE       = 0,
+    IMAGE_REL_BASED_HIGH           = 1,
+    IMAGE_REL_BASED_LOW            = 2,
+    IMAGE_REL_BASED_HIGHLOW        = 3,
+    IMAGE_REL_BASED_HIGHADJ        = 4,
+    IMAGE_REL_BASED_MIPS_JMPADDR   = 5,
+    IMAGE_REL_BASED_ARM_MOV32A     = 5,
+    IMAGE_REL_BASED_ARM_MOV32T     = 7,
+    IMAGE_REL_BASED_MIPS_JMPADDR16 = 9,
+    IMAGE_REL_BASED_DIR64          = 10
+  };
+
+  enum ImportType {
+    IMPORT_CODE  = 0,
+    IMPORT_DATA  = 1,
+    IMPORT_CONST = 2
+  };
+
+  enum ImportNameType {
+    /// Import is by ordinal. This indicates that the value in the Ordinal/Hint
+    /// field of the import header is the import's ordinal. If this constant is
+    /// not specified, then the Ordinal/Hint field should always be interpreted
+    /// as the import's hint.
+    IMPORT_ORDINAL         = 0,
+    /// The import name is identical to the public symbol name
+    IMPORT_NAME            = 1,
+    /// The import name is the public symbol name, but skipping the leading ?,
+    /// @, or optionally _.
+    IMPORT_NAME_NOPREFIX   = 2,
+    /// The import name is the public symbol name, but skipping the leading ?,
+    /// @, or optionally _, and truncating at the first @.
+    IMPORT_NAME_UNDECORATE = 3
+  };
+
+  struct ImportHeader {
+    uint16_t Sig1; ///< Must be IMAGE_FILE_MACHINE_UNKNOWN (0).
+    uint16_t Sig2; ///< Must be 0xFFFF.
+    uint16_t Version;
+    uint16_t Machine;
+    uint32_t TimeDateStamp;
+    uint32_t SizeOfData;
+    uint16_t OrdinalHint;
+    uint16_t TypeInfo;
+
+    ImportType getType() const {
+      return static_cast<ImportType>(TypeInfo & 0x3);
+    }
+
+    ImportNameType getNameType() const {
+      return static_cast<ImportNameType>((TypeInfo & 0x1C) >> 3);
+    }
+  };
+
 } // End namespace COFF.
+} // End namespace llvm.
 
 #endif
diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h
index 04b8c4e69c52..20634ede7644 100644
--- a/include/llvm/Support/CallSite.h
+++ b/include/llvm/Support/CallSite.h
@@ -237,6 +237,16 @@ public:
 #undef CALLSITE_DELEGATE_GETTER
 #undef CALLSITE_DELEGATE_SETTER
 
+  /// @brief Determine whether this argument is not captured.
+  bool doesNotCapture(unsigned ArgNo) const {
+    return paramHasAttr(ArgNo + 1, Attribute::NoCapture);
+  }
+
+  /// @brief Determine whether this argument is passed by value.
+  bool isByValArgument(unsigned ArgNo) const {
+    return paramHasAttr(ArgNo + 1, Attribute::ByVal);
+  }
+
   /// hasArgument - Returns true if this CallSite passes the given Value* as an
   /// argument to the called function.
   bool hasArgument(const Value *Arg) const {
diff --git a/include/llvm/Support/Capacity.h b/include/llvm/Support/Capacity.h
index d8cda43b3576..7460f9825bd3 100644
--- a/include/llvm/Support/Capacity.h
+++ b/include/llvm/Support/Capacity.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_SUPPORT_CAPACITY_H
 #define LLVM_SUPPORT_CAPACITY_H
 
+#include <cstddef>
+
 namespace llvm {
 
 template <typename T>
diff --git a/include/llvm/Support/CodeGen.h b/include/llvm/Support/CodeGen.h
index 41351dc73f3b..1b66c943895e 100644
--- a/include/llvm/Support/CodeGen.h
+++ b/include/llvm/Support/CodeGen.h
@@ -27,6 +27,26 @@ namespace llvm {
     enum Model { Default, JITDefault, Small, Kernel, Medium, Large };
   }
 
+  // TLS models.
+  namespace TLSModel {
+    enum Model {
+      GeneralDynamic,
+      LocalDynamic,
+      InitialExec,
+      LocalExec
+    };
+  }
+
+  // Code generation optimization level.
+  namespace CodeGenOpt {
+    enum Level {
+      None,        // -O0
+      Less,        // -O1
+      Default,     // -O2, -Os
+      Aggressive   // -O3
+    };
+  }
+
 }  // end llvm namespace
 
 #endif
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index c6b62a8df9a4..c212d2d59f64 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -40,7 +40,7 @@ namespace cl {
 //===----------------------------------------------------------------------===//
 // ParseCommandLineOptions - Command line option processing entry point.
 //
-void ParseCommandLineOptions(int argc, char **argv,
+void ParseCommandLineOptions(int argc, const char * const *argv,
                              const char *Overview = 0,
                              bool ReadResponseFiles = false);
 
@@ -83,10 +83,10 @@ void MarkOptionsChanged();
 //
 
 enum NumOccurrencesFlag {      // Flags for the number of occurrences allowed
-  Optional        = 0x01,      // Zero or One occurrence
-  ZeroOrMore      = 0x02,      // Zero or more occurrences allowed
-  Required        = 0x03,      // One occurrence required
-  OneOrMore       = 0x04,      // One or more occurrences required
+  Optional        = 0x00,      // Zero or One occurrence
+  ZeroOrMore      = 0x01,      // Zero or more occurrences allowed
+  Required        = 0x02,      // One occurrence required
+  OneOrMore       = 0x03,      // One or more occurrences required
 
   // ConsumeAfter - Indicates that this option is fed anything that follows the
   // last positional argument required by the application (it is an error if
@@ -95,23 +95,20 @@ enum NumOccurrencesFlag {      // Flags for the number of occurrences allowed
   // found.  Once a filename is found, all of the succeeding arguments are
   // passed, unprocessed, to the ConsumeAfter option.
   //
-  ConsumeAfter    = 0x05,
-
-  OccurrencesMask  = 0x07
+  ConsumeAfter    = 0x04
 };
 
 enum ValueExpected {           // Is a value required for the option?
-  ValueOptional   = 0x08,      // The value can appear... or not
-  ValueRequired   = 0x10,      // The value is required to appear!
-  ValueDisallowed = 0x18,      // A value may not be specified (for flags)
-  ValueMask       = 0x18
+  // zero reserved for the unspecified value
+  ValueOptional   = 0x01,      // The value can appear... or not
+  ValueRequired   = 0x02,      // The value is required to appear!
+  ValueDisallowed = 0x03       // A value may not be specified (for flags)
 };
 
 enum OptionHidden {            // Control whether -help shows this option
-  NotHidden       = 0x20,      // Option included in -help & -help-hidden
-  Hidden          = 0x40,      // -help doesn't, but -help-hidden does
-  ReallyHidden    = 0x60,      // Neither -help nor -help-hidden show this arg
-  HiddenMask      = 0x60
+  NotHidden       = 0x00,      // Option included in -help & -help-hidden
+  Hidden          = 0x01,      // -help doesn't, but -help-hidden does
+  ReallyHidden    = 0x02       // Neither -help nor -help-hidden show this arg
 };
 
 // Formatting flags - This controls special features that the option might have
@@ -130,18 +127,16 @@ enum OptionHidden {            // Control whether -help shows this option
 //
 
 enum FormattingFlags {
-  NormalFormatting = 0x000,     // Nothing special
-  Positional       = 0x080,     // Is a positional argument, no '-' required
-  Prefix           = 0x100,     // Can this option directly prefix its value?
-  Grouping         = 0x180,     // Can this option group with other options?
-  FormattingMask   = 0x180      // Union of the above flags.
+  NormalFormatting = 0x00,     // Nothing special
+  Positional       = 0x01,     // Is a positional argument, no '-' required
+  Prefix           = 0x02,     // Can this option directly prefix its value?
+  Grouping         = 0x03      // Can this option group with other options?
 };
 
 enum MiscFlags {               // Miscellaneous flags to adjust argument
-  CommaSeparated     = 0x200,  // Should this cl::list split between commas?
-  PositionalEatsArgs = 0x400,  // Should this positional cl::list eat -args?
-  Sink               = 0x800,  // Should this cl::list eat all unknown options?
-  MiscMask           = 0xE00   // Union of the above flags.
+  CommaSeparated     = 0x01,  // Should this cl::list split between commas?
+  PositionalEatsArgs = 0x02,  // Should this positional cl::list eat -args?
+  Sink               = 0x04   // Should this cl::list eat all unknown options?
 };
 
 
@@ -168,7 +163,15 @@ class Option {
   virtual void anchor();
 
   int NumOccurrences;     // The number of times specified
-  int Flags;              // Flags for the argument
+  // Occurrences, HiddenFlag, and Formatting are all enum types but to avoid
+  // problems with signed enums in bitfields.
+  unsigned Occurrences : 3; // enum NumOccurrencesFlag
+  // not using the enum type for 'Value' because zero is an implementation
+  // detail representing the non-value
+  unsigned Value : 2;
+  unsigned HiddenFlag : 2; // enum OptionHidden
+  unsigned Formatting : 2; // enum FormattingFlags
+  unsigned Misc : 3;
   unsigned Position;      // Position of last occurrence of the option
   unsigned AdditionalVals;// Greater than 0 for multi-valued option.
   Option *NextRegistered; // Singly linked list of registered options.
@@ -178,21 +181,20 @@ public:
   const char *ValueStr;   // String describing what the value of this option is
 
   inline enum NumOccurrencesFlag getNumOccurrencesFlag() const {
-    return static_cast<enum NumOccurrencesFlag>(Flags & OccurrencesMask);
+    return (enum NumOccurrencesFlag)Occurrences;
   }
   inline enum ValueExpected getValueExpectedFlag() const {
-    int VE = Flags & ValueMask;
-    return VE ? static_cast<enum ValueExpected>(VE)
+    return Value ? ((enum ValueExpected)Value)
               : getValueExpectedFlagDefault();
   }
   inline enum OptionHidden getOptionHiddenFlag() const {
-    return static_cast<enum OptionHidden>(Flags & HiddenMask);
+    return (enum OptionHidden)HiddenFlag;
   }
   inline enum FormattingFlags getFormattingFlag() const {
-    return static_cast<enum FormattingFlags>(Flags & FormattingMask);
+    return (enum FormattingFlags)Formatting;
   }
   inline unsigned getMiscFlags() const {
-    return Flags & MiscMask;
+    return Misc;
   }
   inline unsigned getPosition() const { return Position; }
   inline unsigned getNumAdditionalVals() const { return AdditionalVals; }
@@ -206,27 +208,21 @@ public:
   void setArgStr(const char *S) { ArgStr = S; }
   void setDescription(const char *S) { HelpStr = S; }
   void setValueStr(const char *S) { ValueStr = S; }
-
-  void setFlag(unsigned Flag, unsigned FlagMask) {
-    Flags &= ~FlagMask;
-    Flags |= Flag;
-  }
-
   void setNumOccurrencesFlag(enum NumOccurrencesFlag Val) {
-    setFlag(Val, OccurrencesMask);
+    Occurrences = Val;
   }
-  void setValueExpectedFlag(enum ValueExpected Val) { setFlag(Val, ValueMask); }
-  void setHiddenFlag(enum OptionHidden Val) { setFlag(Val, HiddenMask); }
-  void setFormattingFlag(enum FormattingFlags V) { setFlag(V, FormattingMask); }
-  void setMiscFlag(enum MiscFlags M) { setFlag(M, M); }
+  void setValueExpectedFlag(enum ValueExpected Val) { Value = Val; }
+  void setHiddenFlag(enum OptionHidden Val) { HiddenFlag = Val; }
+  void setFormattingFlag(enum FormattingFlags V) { Formatting = V; }
+  void setMiscFlag(enum MiscFlags M) { Misc |= M; }
   void setPosition(unsigned pos) { Position = pos; }
 protected:
-  explicit Option(unsigned DefaultFlags)
-    : NumOccurrences(0), Flags(DefaultFlags | NormalFormatting), Position(0),
+  explicit Option(enum NumOccurrencesFlag Occurrences, 
+                  enum OptionHidden Hidden)
+    : NumOccurrences(0), Occurrences(Occurrences), HiddenFlag(Hidden), 
+      Formatting(NormalFormatting), Position(0),
       AdditionalVals(0), NextRegistered(0),
       ArgStr(""), HelpStr(""), ValueStr("") {
-    assert(getNumOccurrencesFlag() != 0 &&
-           getOptionHiddenFlag() != 0 && "Not all default flags specified!");
   }
 
   inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; }
@@ -326,6 +322,8 @@ LocationClass<Ty> location(Ty &L) { return LocationClass<Ty>(L); }
 struct GenericOptionValue {
   virtual ~GenericOptionValue() {}
   virtual bool compare(const GenericOptionValue &V) const = 0;
+private:
+  virtual void anchor();
 };
 
 template<class DataType> struct OptionValue;
@@ -339,7 +337,7 @@ struct OptionValueBase : public GenericOptionValue {
 
   bool hasValue() const { return false; }
 
-  const DataType &getValue() const { assert(false && "no default value"); }
+  const DataType &getValue() const { llvm_unreachable("no default value"); }
 
   // Some options may take their value from a different data type.
   template<class DT>
@@ -416,6 +414,8 @@ struct OptionValue<cl::boolOrDefault> : OptionValueCopy<cl::boolOrDefault> {
     setValue(V);
     return *this;
   }
+private:
+  virtual void anchor();
 };
 
 template<>
@@ -431,6 +431,8 @@ struct OptionValue<std::string> : OptionValueCopy<std::string> {
     setValue(V);
     return *this;
   }
+private:
+  virtual void anchor();
 };
 
 //===----------------------------------------------------------------------===//
@@ -1171,14 +1173,14 @@ public:
 
   // One option...
   template<class M0t>
-  explicit opt(const M0t &M0) : Option(Optional | NotHidden) {
+  explicit opt(const M0t &M0) : Option(Optional, NotHidden) {
     apply(M0, this);
     done();
   }
 
   // Two options...
   template<class M0t, class M1t>
-  opt(const M0t &M0, const M1t &M1) : Option(Optional | NotHidden) {
+  opt(const M0t &M0, const M1t &M1) : Option(Optional, NotHidden) {
     apply(M0, this); apply(M1, this);
     done();
   }
@@ -1186,21 +1188,21 @@ public:
   // Three options...
   template<class M0t, class M1t, class M2t>
   opt(const M0t &M0, const M1t &M1,
-      const M2t &M2) : Option(Optional | NotHidden) {
+      const M2t &M2) : Option(Optional, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this);
     done();
   }
   // Four options...
   template<class M0t, class M1t, class M2t, class M3t>
   opt(const M0t &M0, const M1t &M1, const M2t &M2,
-      const M3t &M3) : Option(Optional | NotHidden) {
+      const M3t &M3) : Option(Optional, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     done();
   }
   // Five options...
   template<class M0t, class M1t, class M2t, class M3t, class M4t>
   opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
-      const M4t &M4) : Option(Optional | NotHidden) {
+      const M4t &M4) : Option(Optional, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this);
     done();
@@ -1209,7 +1211,7 @@ public:
   template<class M0t, class M1t, class M2t, class M3t,
            class M4t, class M5t>
   opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
-      const M4t &M4, const M5t &M5) : Option(Optional | NotHidden) {
+      const M4t &M4, const M5t &M5) : Option(Optional, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this); apply(M5, this);
     done();
@@ -1219,7 +1221,7 @@ public:
            class M4t, class M5t, class M6t>
   opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
       const M4t &M4, const M5t &M5,
-      const M6t &M6) : Option(Optional | NotHidden) {
+      const M6t &M6) : Option(Optional, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this); apply(M5, this); apply(M6, this);
     done();
@@ -1229,7 +1231,7 @@ public:
            class M4t, class M5t, class M6t, class M7t>
   opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
       const M4t &M4, const M5t &M5, const M6t &M6,
-      const M7t &M7) : Option(Optional | NotHidden) {
+      const M7t &M7) : Option(Optional, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this);
     done();
@@ -1338,34 +1340,34 @@ public:
 
   // One option...
   template<class M0t>
-  explicit list(const M0t &M0) : Option(ZeroOrMore | NotHidden) {
+  explicit list(const M0t &M0) : Option(ZeroOrMore, NotHidden) {
     apply(M0, this);
     done();
   }
   // Two options...
   template<class M0t, class M1t>
-  list(const M0t &M0, const M1t &M1) : Option(ZeroOrMore | NotHidden) {
+  list(const M0t &M0, const M1t &M1) : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this);
     done();
   }
   // Three options...
   template<class M0t, class M1t, class M2t>
   list(const M0t &M0, const M1t &M1, const M2t &M2)
-    : Option(ZeroOrMore | NotHidden) {
+    : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this);
     done();
   }
   // Four options...
   template<class M0t, class M1t, class M2t, class M3t>
   list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3)
-    : Option(ZeroOrMore | NotHidden) {
+    : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     done();
   }
   // Five options...
   template<class M0t, class M1t, class M2t, class M3t, class M4t>
   list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
-       const M4t &M4) : Option(ZeroOrMore | NotHidden) {
+       const M4t &M4) : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this);
     done();
@@ -1374,7 +1376,7 @@ public:
   template<class M0t, class M1t, class M2t, class M3t,
            class M4t, class M5t>
   list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
-       const M4t &M4, const M5t &M5) : Option(ZeroOrMore | NotHidden) {
+       const M4t &M4, const M5t &M5) : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this); apply(M5, this);
     done();
@@ -1384,7 +1386,7 @@ public:
            class M4t, class M5t, class M6t>
   list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
        const M4t &M4, const M5t &M5, const M6t &M6)
-    : Option(ZeroOrMore | NotHidden) {
+    : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this); apply(M5, this); apply(M6, this);
     done();
@@ -1394,7 +1396,7 @@ public:
            class M4t, class M5t, class M6t, class M7t>
   list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
        const M4t &M4, const M5t &M5, const M6t &M6,
-       const M7t &M7) : Option(ZeroOrMore | NotHidden) {
+       const M7t &M7) : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this);
     done();
@@ -1536,34 +1538,34 @@ public:
 
   // One option...
   template<class M0t>
-  explicit bits(const M0t &M0) : Option(ZeroOrMore | NotHidden) {
+  explicit bits(const M0t &M0) : Option(ZeroOrMore, NotHidden) {
     apply(M0, this);
     done();
   }
   // Two options...
   template<class M0t, class M1t>
-  bits(const M0t &M0, const M1t &M1) : Option(ZeroOrMore | NotHidden) {
+  bits(const M0t &M0, const M1t &M1) : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this);
     done();
   }
   // Three options...
   template<class M0t, class M1t, class M2t>
   bits(const M0t &M0, const M1t &M1, const M2t &M2)
-    : Option(ZeroOrMore | NotHidden) {
+    : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this);
     done();
   }
   // Four options...
   template<class M0t, class M1t, class M2t, class M3t>
   bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3)
-    : Option(ZeroOrMore | NotHidden) {
+    : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     done();
   }
   // Five options...
   template<class M0t, class M1t, class M2t, class M3t, class M4t>
   bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
-       const M4t &M4) : Option(ZeroOrMore | NotHidden) {
+       const M4t &M4) : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this);
     done();
@@ -1572,7 +1574,7 @@ public:
   template<class M0t, class M1t, class M2t, class M3t,
            class M4t, class M5t>
   bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
-       const M4t &M4, const M5t &M5) : Option(ZeroOrMore | NotHidden) {
+       const M4t &M4, const M5t &M5) : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this); apply(M5, this);
     done();
@@ -1582,7 +1584,7 @@ public:
            class M4t, class M5t, class M6t>
   bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
        const M4t &M4, const M5t &M5, const M6t &M6)
-    : Option(ZeroOrMore | NotHidden) {
+    : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this); apply(M5, this); apply(M6, this);
     done();
@@ -1592,7 +1594,7 @@ public:
            class M4t, class M5t, class M6t, class M7t>
   bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
        const M4t &M4, const M5t &M5, const M6t &M6,
-       const M7t &M7) : Option(ZeroOrMore | NotHidden) {
+       const M7t &M7) : Option(ZeroOrMore, NotHidden) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this);
     done();
@@ -1632,27 +1634,27 @@ public:
 
   // One option...
   template<class M0t>
-  explicit alias(const M0t &M0) : Option(Optional | Hidden), AliasFor(0) {
+  explicit alias(const M0t &M0) : Option(Optional, Hidden), AliasFor(0) {
     apply(M0, this);
     done();
   }
   // Two options...
   template<class M0t, class M1t>
-  alias(const M0t &M0, const M1t &M1) : Option(Optional | Hidden), AliasFor(0) {
+  alias(const M0t &M0, const M1t &M1) : Option(Optional, Hidden), AliasFor(0) {
     apply(M0, this); apply(M1, this);
     done();
   }
   // Three options...
   template<class M0t, class M1t, class M2t>
   alias(const M0t &M0, const M1t &M1, const M2t &M2)
-    : Option(Optional | Hidden), AliasFor(0) {
+    : Option(Optional, Hidden), AliasFor(0) {
     apply(M0, this); apply(M1, this); apply(M2, this);
     done();
   }
   // Four options...
   template<class M0t, class M1t, class M2t, class M3t>
   alias(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3)
-    : Option(Optional | Hidden), AliasFor(0) {
+    : Option(Optional, Hidden), AliasFor(0) {
     apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
     done();
   }
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index e0921572182b..d0b186ea7c2b 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -49,16 +49,22 @@
 #define LLVM_ATTRIBUTE_UNUSED
 #endif
 
-#ifdef __GNUC__ // aka 'ATTRIBUTE_CONST' but following LLVM Conventions.
-#define LLVM_ATTRIBUTE_READNONE __attribute__((__const__))
+#if (__GNUC__ >= 4) && !defined(__MINGW32__) && !defined(__CYGWIN__)
+#define LLVM_ATTRIBUTE_WEAK __attribute__((__weak__))
 #else
-#define LLVM_ATTRIBUTE_READNONE
+#define LLVM_ATTRIBUTE_WEAK
 #endif
 
-#ifdef __GNUC__  // aka 'ATTRIBUTE_PURE' but following LLVM Conventions.
-#define LLVM_ATTRIBUTE_READONLY __attribute__((__pure__))
+#ifdef __GNUC__ // aka 'CONST' but following LLVM Conventions.
+#define LLVM_READNONE __attribute__((__const__))
 #else
-#define LLVM_ATTRIBUTE_READONLY
+#define LLVM_READNONE
+#endif
+
+#ifdef __GNUC__  // aka 'PURE' but following LLVM Conventions.
+#define LLVM_READONLY __attribute__((__pure__))
+#else
+#define LLVM_READONLY
 #endif
 
 #if (__GNUC__ >= 4)
@@ -67,6 +73,7 @@
 #define BUILTIN_EXPECT(EXPR, VALUE) (EXPR)
 #endif
 
+
 // C++ doesn't support 'extern template' of template specializations.  GCC does,
 // but requires __extension__ before it.  In the header, use this:
 //   EXTERN_TEMPLATE_INSTANTIATION(class foo<bar>);
@@ -111,6 +118,14 @@
 #define LLVM_ATTRIBUTE_NORETURN
 #endif
 
+// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
+// pedantic diagnostics.
+#ifdef __GNUC__
+#define LLVM_EXTENSION __extension__
+#else
+#define LLVM_EXTENSION
+#endif
+
 // LLVM_ATTRIBUTE_DEPRECATED(decl, "message")
 #if __has_feature(attribute_deprecated_with_message)
 # define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h
index 3cb8164c3c3d..483f2674af7b 100644
--- a/include/llvm/Support/DOTGraphTraits.h
+++ b/include/llvm/Support/DOTGraphTraits.h
@@ -42,13 +42,13 @@ public:
   /// top of the graph.
   ///
   template<typename GraphType>
-  static std::string getGraphName(const GraphType& Graph) { return ""; }
+  static std::string getGraphName(const GraphType &) { return ""; }
 
   /// getGraphProperties - Return any custom properties that should be included
   /// in the top level graph structure for dot.
   ///
   template<typename GraphType>
-  static std::string getGraphProperties(const GraphType& Graph) {
+  static std::string getGraphProperties(const GraphType &) {
     return "";
   }
 
@@ -61,44 +61,44 @@ public:
 
   /// isNodeHidden - If the function returns true, the given node is not
   /// displayed in the graph.
-  static bool isNodeHidden(const void *Node) {
+  static bool isNodeHidden(const void *) {
     return false;
   }
 
   /// getNodeLabel - Given a node and a pointer to the top level graph, return
   /// the label to print in the node.
   template<typename GraphType>
-  std::string getNodeLabel(const void *Node, const GraphType& Graph) {
+  std::string getNodeLabel(const void *, const GraphType &) {
     return "";
   }
 
   /// hasNodeAddressLabel - If this method returns true, the address of the node
   /// is added to the label of the node.
   template<typename GraphType>
-  static bool hasNodeAddressLabel(const void *Node, const GraphType& Graph) {
+  static bool hasNodeAddressLabel(const void *, const GraphType &) {
     return false;
   }
 
   /// If you want to specify custom node attributes, this is the place to do so
   ///
   template<typename GraphType>
-  static std::string getNodeAttributes(const void *Node,
-                                       const GraphType& Graph) {
+  static std::string getNodeAttributes(const void *,
+                                       const GraphType &) {
     return "";
   }
 
   /// If you want to override the dot attributes printed for a particular edge,
   /// override this method.
   template<typename EdgeIter, typename GraphType>
-  static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
-                                       const GraphType& Graph) {
+  static std::string getEdgeAttributes(const void *, EdgeIter,
+                                       const GraphType &) {
     return "";
   }
 
   /// getEdgeSourceLabel - If you want to label the edge source itself,
   /// implement this method.
   template<typename EdgeIter>
-  static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+  static std::string getEdgeSourceLabel(const void *, EdgeIter) {
     return "";
   }
 
@@ -106,7 +106,7 @@ public:
   /// should actually target another edge source, not a node.  If this method is
   /// implemented, getEdgeTarget should be implemented.
   template<typename EdgeIter>
-  static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+  static bool edgeTargetsEdgeSource(const void *, EdgeIter) {
     return false;
   }
 
@@ -114,7 +114,7 @@ public:
   /// called to determine which outgoing edge of Node is the target of this
   /// edge.
   template<typename EdgeIter>
-  static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+  static EdgeIter getEdgeTarget(const void *, EdgeIter I) {
     return I;
   }
 
@@ -126,13 +126,13 @@ public:
 
   /// numEdgeDestLabels - If hasEdgeDestLabels, this function returns the
   /// number of incoming edge labels the given node has.
-  static unsigned numEdgeDestLabels(const void *Node) {
+  static unsigned numEdgeDestLabels(const void *) {
     return 0;
   }
 
   /// getEdgeDestLabel - If hasEdgeDestLabels, this function returns the
   /// incoming edge label with the given index in the given node.
-  static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+  static std::string getEdgeDestLabel(const void *, unsigned) {
     return "";
   }
 
@@ -143,7 +143,7 @@ public:
   /// it to add things to the output graph.
   ///
   template<typename GraphType, typename GraphWriter>
-  static void addCustomGraphFeatures(const GraphType& Graph, GraphWriter &GW) {}
+  static void addCustomGraphFeatures(const GraphType &, GraphWriter &) {}
 };
 
 
diff --git a/include/llvm/Support/DataStream.h b/include/llvm/Support/DataStream.h
new file mode 100644
index 000000000000..fedb0c925611
--- /dev/null
+++ b/include/llvm/Support/DataStream.h
@@ -0,0 +1,38 @@
+//===---- llvm/Support/DataStream.h - Lazy bitcode streaming ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines DataStreamer, which fetches bytes of data from
+// a stream source. It provides support for streaming (lazy reading) of
+// data, e.g. bitcode
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_SUPPORT_DATASTREAM_H_
+#define LLVM_SUPPORT_DATASTREAM_H_
+
+#include <string>
+
+namespace llvm {
+
+class DataStreamer {
+public:
+  /// Fetch bytes [start-end) from the stream, and write them to the
+  /// buffer pointed to by buf. Returns the number of bytes actually written.
+  virtual size_t GetBytes(unsigned char *buf, size_t len) = 0;
+
+  virtual ~DataStreamer();
+};
+
+DataStreamer *getDataFileStreamer(const std::string &Filename,
+                                  std::string *Err);
+
+}
+
+#endif  // LLVM_SUPPORT_DATASTREAM_H_
diff --git a/include/llvm/Support/DataTypes.h.cmake b/include/llvm/Support/DataTypes.h.cmake
index 8c0220a489b8..a3a6489e8a52 100644
--- a/include/llvm/Support/DataTypes.h.cmake
+++ b/include/llvm/Support/DataTypes.h.cmake
@@ -94,6 +94,9 @@ typedef u_int64_t uint64_t;
 #else /* _MSC_VER */
 /* Visual C++ doesn't provide standard integer headers, but it does provide
    built-in data types. */
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
 #include <stdlib.h>
 #include <stddef.h>
 #include <sys/types.h>
@@ -167,9 +170,24 @@ typedef signed int ssize_t;
 # define UINT64_C(C) C##ui64
 #endif
 
+#ifndef PRId64
+# define PRId64 "I64d"
+#endif
+#ifndef PRIi64
+# define PRIi64 "I64i"
+#endif
+#ifndef PRIo64
+# define PRIo64 "I64o"
+#endif
+#ifndef PRIu64
+# define PRIu64 "I64u"
+#endif
 #ifndef PRIx64
 # define PRIx64 "I64x"
 #endif
+#ifndef PRIX64
+# define PRIX64 "I64X"
+#endif
 
 #endif /* _MSC_VER */
 
diff --git a/include/llvm/Support/DataTypes.h.in b/include/llvm/Support/DataTypes.h.in
index 425805a1669b..b492bb14ba50 100644
--- a/include/llvm/Support/DataTypes.h.in
+++ b/include/llvm/Support/DataTypes.h.in
@@ -167,9 +167,24 @@ typedef signed int ssize_t;
 # define UINT64_C(C) C##ui64
 #endif
 
+#ifndef PRId64
+# define PRId64 "I64d"
+#endif
+#ifndef PRIi64
+# define PRIi64 "I64i"
+#endif
+#ifndef PRIo64
+# define PRIo64 "I64o"
+#endif
+#ifndef PRIu64
+# define PRIu64 "I64u"
+#endif
 #ifndef PRIx64
 # define PRIx64 "I64x"
 #endif
+#ifndef PRIX64
+# define PRIX64 "I64X"
+#endif
 
 #endif /* _MSC_VER */
 
diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h
index 8651fc1abea9..e72327271f23 100644
--- a/include/llvm/Support/Debug.h
+++ b/include/llvm/Support/Debug.h
@@ -35,14 +35,14 @@ class raw_ostream;
 #ifndef DEBUG_TYPE
 #define DEBUG_TYPE ""
 #endif
-  
+
 #ifndef NDEBUG
 /// DebugFlag - This boolean is set to true if the '-debug' command line option
 /// is specified.  This should probably not be referenced directly, instead, use
 /// the DEBUG macro below.
 ///
 extern bool DebugFlag;
-  
+
 /// isCurrentDebugType - Return true if the specified string is the debug type
 /// specified on the command line, or if none was specified on the command line
 /// with the -debug-only=X option.
@@ -54,7 +54,7 @@ bool isCurrentDebugType(const char *Type);
 /// debug output to be produced.
 ///
 void SetCurrentDebugType(const char *Type);
-  
+
 /// DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug
 /// information.  In the '-debug' option is specified on the commandline, and if
 /// this is a debug build, then the code specified as the option to the macro
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index 30f91874db20..8f18a991a9e1 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -22,7 +22,8 @@ namespace llvm {
 // Debug info constants.
 
 enum {
-  LLVMDebugVersion = (11 << 16),        // Current version of debug information.
+  LLVMDebugVersion = (12 << 16),        // Current version of debug information.
+  LLVMDebugVersion11 = (11 << 16),      // Constant for version 11.
   LLVMDebugVersion10 = (10 << 16),      // Constant for version 10.
   LLVMDebugVersion9 = (9 << 16),        // Constant for version 9.
   LLVMDebugVersion8 = (8 << 16),        // Constant for version 8.
@@ -130,6 +131,7 @@ enum dwarf_constants {
   DW_TAG_GNU_template_parameter_pack = 0x4107,
   DW_TAG_GNU_formal_parameter_pack = 0x4108,
   DW_TAG_lo_user = 0x4080,
+  DW_TAG_APPLE_property = 0x4200,
   DW_TAG_hi_user = 0xffff,
 
   // Children flag
@@ -269,6 +271,7 @@ enum dwarf_constants {
   DW_AT_APPLE_property_setter = 0x3fea,
   DW_AT_APPLE_property_attribute = 0x3feb,
   DW_AT_APPLE_objc_complete_type = 0x3fec,
+  DW_AT_APPLE_property = 0x3fed,
 
   // Attribute form encodings
   DW_FORM_addr = 0x01,
@@ -526,6 +529,7 @@ enum dwarf_constants {
   DW_LANG_D = 0x0013,
   DW_LANG_Python = 0x0014,
   DW_LANG_lo_user = 0x8000,
+  DW_LANG_Mips_Assembler = 0x8001,
   DW_LANG_hi_user = 0xffff,
 
   // Identifier case codes
diff --git a/include/llvm/Support/DynamicLibrary.h b/include/llvm/Support/DynamicLibrary.h
index 288936bc0b9b..0f59cbf23947 100644
--- a/include/llvm/Support/DynamicLibrary.h
+++ b/include/llvm/Support/DynamicLibrary.h
@@ -17,6 +17,9 @@
 #include <string>
 
 namespace llvm {
+
+class StringRef;
+
 namespace sys {
 
   /// This class provides a portable interface to dynamic libraries which also
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index c5b85e2e6a12..04953b6e5657 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -599,7 +599,25 @@ enum {
   R_ARM_THM_TLS_DESCSEQ32     = 0x82
 };
 
+// Mips Specific e_flags
+enum {
+  EF_MIPS_NOREORDER = 0x00000001, // Don't reorder instructions
+  EF_MIPS_PIC       = 0x00000002, // Position independent code
+  EF_MIPS_CPIC      = 0x00000004, // Call object with Position independent code
+  EF_MIPS_ARCH_1    = 0x00000000, // MIPS1 instruction set
+  EF_MIPS_ARCH_2    = 0x10000000, // MIPS2 instruction set
+  EF_MIPS_ARCH_3    = 0x20000000, // MIPS3 instruction set
+  EF_MIPS_ARCH_4    = 0x30000000, // MIPS4 instruction set
+  EF_MIPS_ARCH_5    = 0x40000000, // MIPS5 instruction set
+  EF_MIPS_ARCH_32   = 0x50000000, // MIPS32 instruction set per linux not elf.h
+  EF_MIPS_ARCH_64   = 0x60000000, // MIPS64 instruction set per linux not elf.h
+  EF_MIPS_ARCH_32R2 = 0x70000000, // mips32r2
+  EF_MIPS_ARCH_64R2 = 0x80000000, // mips64r2
+  EF_MIPS_ARCH      = 0xf0000000  // Mask for applying EF_MIPS_ARCH_ variant
+};
+
 // ELF Relocation types for Mips
+// .
 enum {
   R_MIPS_NONE              =  0,
   R_MIPS_16                =  1,
@@ -611,6 +629,7 @@ enum {
   R_MIPS_GPREL16           =  7,
   R_MIPS_LITERAL           =  8,
   R_MIPS_GOT16             =  9,
+  R_MIPS_GOT               =  9,
   R_MIPS_PC16              = 10,
   R_MIPS_CALL16            = 11,
   R_MIPS_GPREL32           = 12,
@@ -717,6 +736,9 @@ enum {
   SHT_GROUP         = 17, // Section group.
   SHT_SYMTAB_SHNDX  = 18, // Indices for SHN_XINDEX entries.
   SHT_LOOS          = 0x60000000, // Lowest operating system-specific type.
+  SHT_GNU_verdef    = 0x6ffffffd, // GNU version definitions.
+  SHT_GNU_verneed   = 0x6ffffffe, // GNU version references.
+  SHT_GNU_versym    = 0x6fffffff, // GNU symbol versions table.
   SHT_HIOS          = 0x6fffffff, // Highest operating system-specific type.
   SHT_LOPROC        = 0x70000000, // Lowest processor architecture-specific type.
   // Fixme: All this is duplicated in MCSectionELF. Why??
@@ -871,6 +893,7 @@ enum {
   STT_TLS     = 6,   // Thread local data object
   STT_LOOS    = 7,   // Lowest operating system-specific symbol type
   STT_HIOS    = 8,   // Highest operating system-specific symbol type
+  STT_GNU_IFUNC = 10, // GNU indirect function
   STT_LOPROC  = 13,  // Lowest processor-specific symbol type
   STT_HIPROC  = 15   // Highest processor-specific symbol type
 };
@@ -1084,6 +1107,33 @@ enum {
   DF_STATIC_TLS = 0x10  // Reject attempts to load dynamically.
 };
 
+// ElfXX_VerDef structure version (GNU versioning)
+enum {
+  VER_DEF_NONE    = 0,
+  VER_DEF_CURRENT = 1
+};
+
+// VerDef Flags (ElfXX_VerDef::vd_flags)
+enum {
+  VER_FLG_BASE = 0x1,
+  VER_FLG_WEAK = 0x2,
+  VER_FLG_INFO = 0x4
+};
+
+// Special constants for the version table. (SHT_GNU_versym/.gnu.version)
+enum {
+  VER_NDX_LOCAL  = 0,      // Unversioned local symbol
+  VER_NDX_GLOBAL = 1,      // Unversioned global symbol
+  VERSYM_VERSION = 0x7fff, // Version Index mask
+  VERSYM_HIDDEN  = 0x8000  // Hidden bit (non-default version)
+};
+
+// ElfXX_VerNeed structure version (GNU versioning)
+enum {
+  VER_NEED_NONE = 0,
+  VER_NEED_CURRENT = 1
+};
+
 } // end namespace ELF
 
 } // end namespace llvm
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
index af1b506d6cf4..733ab7548fca 100644
--- a/include/llvm/Support/Endian.h
+++ b/include/llvm/Support/Endian.h
@@ -98,6 +98,9 @@ public:
   operator value_type() const {
     return endian::read_le<value_type, unaligned>(Value);
   }
+  void operator=(value_type newValue) {
+    endian::write_le<value_type, unaligned>((void *)&Value, newValue);
+  }
 private:
   uint8_t Value[sizeof(value_type)];
 };
@@ -108,6 +111,9 @@ public:
   operator value_type() const {
     return endian::read_be<value_type, unaligned>(Value);
   }
+  void operator=(value_type newValue) {
+    endian::write_be<value_type, unaligned>((void *)&Value, newValue);
+  }
 private:
   uint8_t Value[sizeof(value_type)];
 };
@@ -118,6 +124,9 @@ public:
   operator value_type() const {
     return endian::read_le<value_type, aligned>(&Value);
   }
+  void operator=(value_type newValue) {
+    endian::write_le<value_type, aligned>((void *)&Value, newValue);
+  }
 private:
   value_type Value;
 };
@@ -128,6 +137,9 @@ public:
   operator value_type() const {
     return endian::read_be<value_type, aligned>(&Value);
   }
+  void operator=(value_type newValue) {
+    endian::write_be<value_type, aligned>((void *)&Value, newValue);
+  }
 private:
   value_type Value;
 };
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index a868e5f9f70b..e6f9926af6f8 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -27,14 +27,21 @@
 #ifndef LLVM_SUPPORT_FILE_SYSTEM_H
 #define LLVM_SUPPORT_FILE_SYSTEM_H
 
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/PathV1.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/system_error.h"
 #include <ctime>
 #include <iterator>
+#include <stack>
 #include <string>
+#include <vector>
+
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
 
 namespace llvm {
 namespace sys {
@@ -91,7 +98,20 @@ struct space_info {
 ///               a platform specific member to store the result.
 class file_status
 {
-  // implementation defined status field.
+  #if defined(LLVM_ON_UNIX)
+  dev_t st_dev;
+  ino_t st_ino;
+  #elif defined (LLVM_ON_WIN32)
+  uint32_t LastWriteTimeHigh;
+  uint32_t LastWriteTimeLow;
+  uint32_t VolumeSerialNumber;
+  uint32_t FileSizeHigh;
+  uint32_t FileSizeLow;
+  uint32_t FileIndexHigh;
+  uint32_t FileIndexLow;
+  #endif
+  friend bool equivalent(file_status A, file_status B);
+  friend error_code status(const Twine &path, file_status &result);
   file_type Type;
 public:
   explicit file_status(file_type v=file_type::status_error)
@@ -101,6 +121,44 @@ public:
   void type(file_type v) { Type = v; }
 };
 
+/// file_magic - An "enum class" enumeration of file types based on magic (the first
+///         N bytes of the file).
+struct file_magic {
+  enum _ {
+    unknown = 0,              ///< Unrecognized file
+    bitcode,                  ///< Bitcode file
+    archive,                  ///< ar style archive file
+    elf_relocatable,          ///< ELF Relocatable object file
+    elf_executable,           ///< ELF Executable image
+    elf_shared_object,        ///< ELF dynamically linked shared lib
+    elf_core,                 ///< ELF core image
+    macho_object,             ///< Mach-O Object file
+    macho_executable,         ///< Mach-O Executable
+    macho_fixed_virtual_memory_shared_lib, ///< Mach-O Shared Lib, FVM
+    macho_core,               ///< Mach-O Core File
+    macho_preload_executabl,  ///< Mach-O Preloaded Executable
+    macho_dynamically_linked_shared_lib, ///< Mach-O dynlinked shared lib
+    macho_dynamic_linker,     ///< The Mach-O dynamic linker
+    macho_bundle,             ///< Mach-O Bundle file
+    macho_dynamically_linked_shared_lib_stub, ///< Mach-O Shared lib stub
+    macho_dsym_companion,     ///< Mach-O dSYM companion file
+    coff_object,              ///< COFF object file
+    pecoff_executable         ///< PECOFF executable file
+  };
+
+  bool is_object() const {
+    return v_ == unknown ? false : true;
+  }
+
+  file_magic() : v_(unknown) {}
+  file_magic(_ v) : v_(v) {}
+  explicit file_magic(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+
+private:
+  int v_;
+};
+
 /// @}
 /// @name Physical Operators
 /// @{
@@ -241,6 +299,8 @@ bool equivalent(file_status A, file_status B);
 
 /// @brief Do paths represent the same thing?
 ///
+/// assert(status_known(A) || status_known(B));
+///
 /// @param A Input path A.
 /// @param B Input path B.
 /// @param result Set to true if stat(A) and stat(B) have the same device and
@@ -397,13 +457,16 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result);
 error_code get_magic(const Twine &path, uint32_t len,
                      SmallVectorImpl<char> &result);
 
+/// @brief Identify the type of a binary file based on how magical it is.
+file_magic identify_magic(StringRef magic);
+
 /// @brief Get and identify \a path's type based on its content.
 ///
 /// @param path Input path.
 /// @param result Set to the type of file, or LLVMFileType::Unknown_FileType.
 /// @results errc::success if result has been successfully set, otherwise a
 ///          platform specific error_code.
-error_code identify_magic(const Twine &path, LLVMFileType &result);
+error_code identify_magic(const Twine &path, file_magic &result);
 
 /// @brief Get library paths the system linker uses.
 ///
@@ -479,76 +542,171 @@ public:
   bool operator>=(const directory_entry& rhs) const;
 };
 
+namespace detail {
+  struct DirIterState;
+
+  error_code directory_iterator_construct(DirIterState&, StringRef);
+  error_code directory_iterator_increment(DirIterState&);
+  error_code directory_iterator_destruct(DirIterState&);
+
+  /// DirIterState - Keeps state for the directory_iterator. It is reference
+  /// counted in order to preserve InputIterator semantics on copy.
+  struct DirIterState : public RefCountedBase<DirIterState> {
+    DirIterState()
+      : IterationHandle(0) {}
+
+    ~DirIterState() {
+      directory_iterator_destruct(*this);
+    }
+
+    intptr_t IterationHandle;
+    directory_entry CurrentEntry;
+  };
+}
+
 /// directory_iterator - Iterates through the entries in path. There is no
 /// operator++ because we need an error_code. If it's really needed we can make
 /// it call report_fatal_error on error.
 class directory_iterator {
-  intptr_t IterationHandle;
-  directory_entry CurrentEntry;
-
-  // Platform implementations implement these functions to handle iteration.
-  friend error_code directory_iterator_construct(directory_iterator &it,
-                                                 StringRef path);
-  friend error_code directory_iterator_increment(directory_iterator &it);
-  friend error_code directory_iterator_destruct(directory_iterator &it);
+  IntrusiveRefCntPtr<detail::DirIterState> State;
 
 public:
-  explicit directory_iterator(const Twine &path, error_code &ec)
-  : IterationHandle(0) {
+  explicit directory_iterator(const Twine &path, error_code &ec) {
+    State = new detail::DirIterState;
     SmallString<128> path_storage;
-    ec = directory_iterator_construct(*this, path.toStringRef(path_storage));
+    ec = detail::directory_iterator_construct(*State,
+            path.toStringRef(path_storage));
   }
 
-  /// Construct end iterator.
-  directory_iterator() : IterationHandle(0) {}
-
-  ~directory_iterator() {
-    directory_iterator_destruct(*this);
+  explicit directory_iterator(const directory_entry &de, error_code &ec) {
+    State = new detail::DirIterState;
+    ec = detail::directory_iterator_construct(*State, de.path());
   }
 
+  /// Construct end iterator.
+  directory_iterator() : State(new detail::DirIterState) {}
+
   // No operator++ because we need error_code.
   directory_iterator &increment(error_code &ec) {
-    ec = directory_iterator_increment(*this);
+    ec = directory_iterator_increment(*State);
     return *this;
   }
 
-  const directory_entry &operator*() const { return CurrentEntry; }
-  const directory_entry *operator->() const { return &CurrentEntry; }
+  const directory_entry &operator*() const { return State->CurrentEntry; }
+  const directory_entry *operator->() const { return &State->CurrentEntry; }
+
+  bool operator==(const directory_iterator &RHS) const {
+    return State->CurrentEntry == RHS.State->CurrentEntry;
+  }
 
   bool operator!=(const directory_iterator &RHS) const {
-    return CurrentEntry != RHS.CurrentEntry;
+    return !(*this == RHS);
   }
   // Other members as required by
   // C++ Std, 24.1.1 Input iterators [input.iterators]
 };
 
+namespace detail {
+  /// RecDirIterState - Keeps state for the recursive_directory_iterator. It is
+  /// reference counted in order to preserve InputIterator semantics on copy.
+  struct RecDirIterState : public RefCountedBase<RecDirIterState> {
+    RecDirIterState()
+      : Level(0)
+      , HasNoPushRequest(false) {}
+
+    std::stack<directory_iterator, std::vector<directory_iterator> > Stack;
+    uint16_t Level;
+    bool HasNoPushRequest;
+  };
+}
+
 /// recursive_directory_iterator - Same as directory_iterator except for it
 /// recurses down into child directories.
 class recursive_directory_iterator {
-  uint16_t  Level;
-  bool HasNoPushRequest;
-  // implementation directory iterator status
+  IntrusiveRefCntPtr<detail::RecDirIterState> State;
 
 public:
-  explicit recursive_directory_iterator(const Twine &path, error_code &ec);
+  recursive_directory_iterator() {}
+  explicit recursive_directory_iterator(const Twine &path, error_code &ec)
+    : State(new detail::RecDirIterState) {
+    State->Stack.push(directory_iterator(path, ec));
+    if (State->Stack.top() == directory_iterator())
+      State.reset();
+  }
   // No operator++ because we need error_code.
-  directory_iterator &increment(error_code &ec);
+  recursive_directory_iterator &increment(error_code &ec) {
+    static const directory_iterator end_itr;
+
+    if (State->HasNoPushRequest)
+      State->HasNoPushRequest = false;
+    else {
+      file_status st;
+      if ((ec = State->Stack.top()->status(st))) return *this;
+      if (is_directory(st)) {
+        State->Stack.push(directory_iterator(*State->Stack.top(), ec));
+        if (ec) return *this;
+        if (State->Stack.top() != end_itr) {
+          ++State->Level;
+          return *this;
+        }
+        State->Stack.pop();
+      }
+    }
+
+    while (!State->Stack.empty()
+           && State->Stack.top().increment(ec) == end_itr) {
+      State->Stack.pop();
+      --State->Level;
+    }
+
+    // Check if we are done. If so, create an end iterator.
+    if (State->Stack.empty())
+      State.reset();
 
-  const directory_entry &operator*() const;
-  const directory_entry *operator->() const;
+    return *this;
+  }
+
+  const directory_entry &operator*() const { return *State->Stack.top(); }
+  const directory_entry *operator->() const { return &*State->Stack.top(); }
 
   // observers
-  /// Gets the current level. path is at level 0.
-  int level() const;
+  /// Gets the current level. Starting path is at level 0.
+  int level() const { return State->Level; }
+
   /// Returns true if no_push has been called for this directory_entry.
-  bool no_push_request() const;
+  bool no_push_request() const { return State->HasNoPushRequest; }
 
   // modifiers
   /// Goes up one level if Level > 0.
-  void pop();
+  void pop() {
+    assert(State && "Cannot pop and end itertor!");
+    assert(State->Level > 0 && "Cannot pop an iterator with level < 1");
+
+    static const directory_iterator end_itr;
+    error_code ec;
+    do {
+      if (ec)
+        report_fatal_error("Error incrementing directory iterator.");
+      State->Stack.pop();
+      --State->Level;
+    } while (!State->Stack.empty()
+             && State->Stack.top().increment(ec) == end_itr);
+
+    // Check if we are done. If so, create an end iterator.
+    if (State->Stack.empty())
+      State.reset();
+  }
+
   /// Does not go down into the current directory_entry.
-  void no_push();
+  void no_push() { State->HasNoPushRequest = true; }
 
+  bool operator==(const recursive_directory_iterator &RHS) const {
+    return State == RHS.State;
+  }
+
+  bool operator!=(const recursive_directory_iterator &RHS) const {
+    return !(*this == RHS);
+  }
   // Other members as required by
   // C++ Std, 24.1.1 Input iterators [input.iterators]
 };
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index eab0c9d18db1..ae32da59dc22 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -296,26 +296,26 @@ public:
 template<typename GraphType>
 raw_ostream &WriteGraph(raw_ostream &O, const GraphType &G,
                         bool ShortNames = false,
-                        const std::string &Title = "") {
+                        const Twine &Title = "") {
   // Start the graph emission process...
   GraphWriter<GraphType> W(O, G, ShortNames);
 
   // Emit the graph.
-  W.writeGraph(Title);
+  W.writeGraph(Title.str());
 
   return O;
 }
 
 template<typename GraphType>
-sys::Path WriteGraph(const GraphType &G, const std::string &Name,
-                     bool ShortNames = false, const std::string &Title = "") {
+sys::Path WriteGraph(const GraphType &G, const Twine &Name,
+                     bool ShortNames = false, const Twine &Title = "") {
   std::string ErrMsg;
   sys::Path Filename = sys::Path::GetTemporaryDirectory(&ErrMsg);
   if (Filename.isEmpty()) {
     errs() << "Error: " << ErrMsg << "\n";
     return Filename;
   }
-  Filename.appendComponent(Name + ".dot");
+  Filename.appendComponent((Name + ".dot").str());
   if (Filename.makeUnique(true,&ErrMsg)) {
     errs() << "Error: " << ErrMsg << "\n";
     return sys::Path();
@@ -341,8 +341,8 @@ sys::Path WriteGraph(const GraphType &G, const std::string &Name,
 /// then cleanup.  For use from the debugger.
 ///
 template<typename GraphType>
-void ViewGraph(const GraphType &G, const std::string &Name,
-               bool ShortNames = false, const std::string &Title = "",
+void ViewGraph(const GraphType &G, const Twine &Name,
+               bool ShortNames = false, const Twine &Title = "",
                GraphProgram::Name Program = GraphProgram::DOT) {
   sys::Path Filename = llvm::WriteGraph(G, Name, ShortNames, Title);
 
diff --git a/include/llvm/Support/Host.h b/include/llvm/Support/Host.h
index f77d4c1182bb..b33101632268 100644
--- a/include/llvm/Support/Host.h
+++ b/include/llvm/Support/Host.h
@@ -33,14 +33,14 @@ namespace sys {
     return !isLittleEndianHost();
   }
 
-  /// getHostTriple() - Return the target triple of the running
-  /// system.
+  /// getDefaultTargetTriple() - Return the default target triple the compiler
+  /// has been configured to produce code for.
   ///
   /// The target triple is a string in the format of:
   ///   CPU_TYPE-VENDOR-OPERATING_SYSTEM
   /// or
   ///   CPU_TYPE-VENDOR-KERNEL-OPERATING_SYSTEM
-  std::string getHostTriple();
+  std::string getDefaultTargetTriple();
 
   /// getHostCPUName - Get the LLVM name for the host CPU. The particular format
   /// of the name is target dependent, and suitable for passing as -mcpu to the
diff --git a/include/llvm/Support/IRReader.h b/include/llvm/Support/IRReader.h
index 292c001e09f4..6d8a9b30ae1f 100644
--- a/include/llvm/Support/IRReader.h
+++ b/include/llvm/Support/IRReader.h
@@ -40,7 +40,8 @@ namespace llvm {
       std::string ErrMsg;
       Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg);
       if (M == 0) {
-        Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg);
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                           ErrMsg);
         // ParseBitcodeFile does not take ownership of the Buffer in the
         // case of an error.
         delete Buffer;
@@ -60,7 +61,7 @@ namespace llvm {
                                      LLVMContext &Context) {
     OwningPtr<MemoryBuffer> File;
     if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
-      Err = SMDiagnostic(Filename,
+      Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
                          "Could not open input file: " + ec.message());
       return 0;
     }
@@ -80,7 +81,8 @@ namespace llvm {
       std::string ErrMsg;
       Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
       if (M == 0)
-        Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg);
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                           ErrMsg);
       // ParseBitcodeFile does not take ownership of the Buffer.
       delete Buffer;
       return M;
@@ -97,7 +99,7 @@ namespace llvm {
                              LLVMContext &Context) {
     OwningPtr<MemoryBuffer> File;
     if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
-      Err = SMDiagnostic(Filename,
+      Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
                          "Could not open input file: " + ec.message());
       return 0;
     }
diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/Support/InstVisitor.h
index a661c4fac68d..52de8f660dd1 100644
--- a/include/llvm/Support/InstVisitor.h
+++ b/include/llvm/Support/InstVisitor.h
@@ -14,6 +14,7 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
@@ -157,54 +158,74 @@ public:
   // Specific Instruction type classes... note that all of the casts are
   // necessary because we use the instruction classes as opaque types...
   //
-  RetTy visitReturnInst(ReturnInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitBranchInst(BranchInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitSwitchInst(SwitchInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitIndirectBrInst(IndirectBrInst &I)      { DELEGATE(TerminatorInst);}
-  RetTy visitInvokeInst(InvokeInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitUnwindInst(UnwindInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitResumeInst(ResumeInst &I)              { DELEGATE(TerminatorInst);}
-  RetTy visitUnreachableInst(UnreachableInst &I)    { DELEGATE(TerminatorInst);}
-  RetTy visitICmpInst(ICmpInst &I)                  { DELEGATE(CmpInst);}
-  RetTy visitFCmpInst(FCmpInst &I)                  { DELEGATE(CmpInst);}
-  RetTy visitAllocaInst(AllocaInst &I)              { DELEGATE(Instruction); }
-  RetTy visitLoadInst(LoadInst     &I)              { DELEGATE(Instruction); }
-  RetTy visitStoreInst(StoreInst   &I)              { DELEGATE(Instruction); }
-  RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I){ DELEGATE(Instruction); }
-  RetTy visitAtomicRMWInst(AtomicRMWInst &I)        { DELEGATE(Instruction); }
-  RetTy visitFenceInst(FenceInst   &I)              { DELEGATE(Instruction); }
-  RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction); }
-  RetTy visitPHINode(PHINode       &I)              { DELEGATE(Instruction); }
-  RetTy visitTruncInst(TruncInst &I)                { DELEGATE(CastInst); }
-  RetTy visitZExtInst(ZExtInst &I)                  { DELEGATE(CastInst); }
-  RetTy visitSExtInst(SExtInst &I)                  { DELEGATE(CastInst); }
-  RetTy visitFPTruncInst(FPTruncInst &I)            { DELEGATE(CastInst); }
-  RetTy visitFPExtInst(FPExtInst &I)                { DELEGATE(CastInst); }
-  RetTy visitFPToUIInst(FPToUIInst &I)              { DELEGATE(CastInst); }
-  RetTy visitFPToSIInst(FPToSIInst &I)              { DELEGATE(CastInst); }
-  RetTy visitUIToFPInst(UIToFPInst &I)              { DELEGATE(CastInst); }
-  RetTy visitSIToFPInst(SIToFPInst &I)              { DELEGATE(CastInst); }
-  RetTy visitPtrToIntInst(PtrToIntInst &I)          { DELEGATE(CastInst); }
-  RetTy visitIntToPtrInst(IntToPtrInst &I)          { DELEGATE(CastInst); }
-  RetTy visitBitCastInst(BitCastInst &I)            { DELEGATE(CastInst); }
-  RetTy visitSelectInst(SelectInst &I)              { DELEGATE(Instruction); }
-  RetTy visitCallInst(CallInst     &I)              { DELEGATE(Instruction); }
-  RetTy visitVAArgInst(VAArgInst   &I)              { DELEGATE(Instruction); }
+  RetTy visitReturnInst(ReturnInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitBranchInst(BranchInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitSwitchInst(SwitchInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitIndirectBrInst(IndirectBrInst &I)    { DELEGATE(TerminatorInst);}
+  RetTy visitResumeInst(ResumeInst &I)            { DELEGATE(TerminatorInst);}
+  RetTy visitUnreachableInst(UnreachableInst &I)  { DELEGATE(TerminatorInst);}
+  RetTy visitICmpInst(ICmpInst &I)                { DELEGATE(CmpInst);}
+  RetTy visitFCmpInst(FCmpInst &I)                { DELEGATE(CmpInst);}
+  RetTy visitAllocaInst(AllocaInst &I)            { DELEGATE(UnaryInstruction);}
+  RetTy visitLoadInst(LoadInst     &I)            { DELEGATE(UnaryInstruction);}
+  RetTy visitStoreInst(StoreInst   &I)            { DELEGATE(Instruction);}
+  RetTy visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { DELEGATE(Instruction);}
+  RetTy visitAtomicRMWInst(AtomicRMWInst &I)      { DELEGATE(Instruction);}
+  RetTy visitFenceInst(FenceInst   &I)            { DELEGATE(Instruction);}
+  RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction);}
+  RetTy visitPHINode(PHINode       &I)            { DELEGATE(Instruction);}
+  RetTy visitTruncInst(TruncInst &I)              { DELEGATE(CastInst);}
+  RetTy visitZExtInst(ZExtInst &I)                { DELEGATE(CastInst);}
+  RetTy visitSExtInst(SExtInst &I)                { DELEGATE(CastInst);}
+  RetTy visitFPTruncInst(FPTruncInst &I)          { DELEGATE(CastInst);}
+  RetTy visitFPExtInst(FPExtInst &I)              { DELEGATE(CastInst);}
+  RetTy visitFPToUIInst(FPToUIInst &I)            { DELEGATE(CastInst);}
+  RetTy visitFPToSIInst(FPToSIInst &I)            { DELEGATE(CastInst);}
+  RetTy visitUIToFPInst(UIToFPInst &I)            { DELEGATE(CastInst);}
+  RetTy visitSIToFPInst(SIToFPInst &I)            { DELEGATE(CastInst);}
+  RetTy visitPtrToIntInst(PtrToIntInst &I)        { DELEGATE(CastInst);}
+  RetTy visitIntToPtrInst(IntToPtrInst &I)        { DELEGATE(CastInst);}
+  RetTy visitBitCastInst(BitCastInst &I)          { DELEGATE(CastInst);}
+  RetTy visitSelectInst(SelectInst &I)            { DELEGATE(Instruction);}
+  RetTy visitVAArgInst(VAArgInst   &I)            { DELEGATE(UnaryInstruction);}
   RetTy visitExtractElementInst(ExtractElementInst &I) { DELEGATE(Instruction);}
-  RetTy visitInsertElementInst(InsertElementInst &I) { DELEGATE(Instruction); }
-  RetTy visitShuffleVectorInst(ShuffleVectorInst &I) { DELEGATE(Instruction); }
-  RetTy visitExtractValueInst(ExtractValueInst &I)  { DELEGATE(Instruction);}
-  RetTy visitInsertValueInst(InsertValueInst &I)    { DELEGATE(Instruction); }
-  RetTy visitLandingPadInst(LandingPadInst &I)      { DELEGATE(Instruction); }
+  RetTy visitInsertElementInst(InsertElementInst &I) { DELEGATE(Instruction);}
+  RetTy visitShuffleVectorInst(ShuffleVectorInst &I) { DELEGATE(Instruction);}
+  RetTy visitExtractValueInst(ExtractValueInst &I){ DELEGATE(UnaryInstruction);}
+  RetTy visitInsertValueInst(InsertValueInst &I)  { DELEGATE(Instruction); }
+  RetTy visitLandingPadInst(LandingPadInst &I)    { DELEGATE(Instruction); }
+
+  // Call and Invoke are slightly different as they delegate first through
+  // a generic CallSite visitor.
+  RetTy visitCallInst(CallInst &I) {
+    return static_cast<SubClass*>(this)->visitCallSite(&I);
+  }
+  RetTy visitInvokeInst(InvokeInst &I) {
+    return static_cast<SubClass*>(this)->visitCallSite(&I);
+  }
 
   // Next level propagators: If the user does not overload a specific
   // instruction type, they can overload one of these to get the whole class
   // of instructions...
   //
-  RetTy visitTerminatorInst(TerminatorInst &I) { DELEGATE(Instruction); }
-  RetTy visitBinaryOperator(BinaryOperator &I) { DELEGATE(Instruction); }
-  RetTy visitCmpInst(CmpInst &I)               { DELEGATE(Instruction); }
-  RetTy visitCastInst(CastInst &I)             { DELEGATE(Instruction); }
+  RetTy visitCastInst(CastInst &I)                { DELEGATE(UnaryInstruction);}
+  RetTy visitBinaryOperator(BinaryOperator &I)    { DELEGATE(Instruction);}
+  RetTy visitCmpInst(CmpInst &I)                  { DELEGATE(Instruction);}
+  RetTy visitTerminatorInst(TerminatorInst &I)    { DELEGATE(Instruction);}
+  RetTy visitUnaryInstruction(UnaryInstruction &I){ DELEGATE(Instruction);}
+
+  // Provide a special visitor for a 'callsite' that visits both calls and
+  // invokes. When unimplemented, properly delegates to either the terminator or
+  // regular instruction visitor.
+  RetTy visitCallSite(CallSite CS) {
+    assert(CS);
+    Instruction &I = *CS.getInstruction();
+    if (CS.isCall())
+      DELEGATE(Instruction);
+
+    assert(CS.isInvoke());
+    DELEGATE(TerminatorInst);
+  }
 
   // If the user wants a 'default' case, they can choose to override this
   // function.  If this function is not overloaded in the user's subclass, then
diff --git a/include/llvm/Support/JSONParser.h b/include/llvm/Support/JSONParser.h
new file mode 100644
index 000000000000..11149f1e47ba
--- /dev/null
+++ b/include/llvm/Support/JSONParser.h
@@ -0,0 +1,448 @@
+//===--- JSONParser.h - Simple JSON parser ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements a JSON parser.
+//
+//  See http://www.json.org/ for an overview.
+//  See http://www.ietf.org/rfc/rfc4627.txt for the full standard.
+//
+//  FIXME: Currently this supports a subset of JSON. Specifically, support
+//  for numbers, booleans and null for values is missing.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_JSON_PARSER_H
+#define LLVM_SUPPORT_JSON_PARSER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SourceMgr.h"
+
+namespace llvm {
+
+class JSONContainer;
+class JSONString;
+class JSONValue;
+class JSONKeyValuePair;
+
+/// \brief Base class for a parsable JSON atom.
+///
+/// This class has no semantics other than being a unit of JSON data which can
+/// be parsed out of a JSON document.
+class JSONAtom {
+public:
+  /// \brief Possible types of JSON objects.
+  enum Kind { JK_KeyValuePair, JK_Array, JK_Object, JK_String };
+
+  /// \brief Returns the type of this value.
+  Kind getKind() const { return MyKind; }
+
+  static bool classof(const JSONAtom *Atom) { return true; }
+
+protected:
+  JSONAtom(Kind MyKind) : MyKind(MyKind) {}
+
+private:
+  Kind MyKind;
+};
+
+/// \brief A parser for JSON text.
+///
+/// Use an object of JSONParser to iterate over the values of a JSON text.
+/// All objects are parsed during the iteration, so you can only iterate once
+/// over the JSON text, but the cost of partial iteration is minimized.
+/// Create a new JSONParser if you want to iterate multiple times.
+class JSONParser {
+public:
+  /// \brief Create a JSONParser for the given input.
+  ///
+  /// Parsing is started via parseRoot(). Access to the object returned from
+  /// parseRoot() will parse the input lazily.
+  JSONParser(StringRef Input, SourceMgr *SM);
+
+  /// \brief Returns the outermost JSON value (either an array or an object).
+  ///
+  /// Can return NULL if the input does not start with an array or an object.
+  /// The object is not parsed yet - the caller must iterate over the
+  /// returned object to trigger parsing.
+  ///
+  /// A JSONValue can be either a JSONString, JSONObject or JSONArray.
+  JSONValue *parseRoot();
+
+  /// \brief Parses the JSON text and returns whether it is valid JSON.
+  ///
+  /// In case validate() return false, failed() will return true and
+  /// getErrorMessage() will return the parsing error.
+  bool validate();
+
+  /// \brief Returns true if an error occurs during parsing.
+  ///
+  /// If there was an error while parsing an object that was created by
+  /// iterating over the result of 'parseRoot', 'failed' will return true.
+  bool failed() const;
+
+private:
+  /// \brief These methods manage the implementation details of parsing new JSON
+  /// atoms.
+  /// @{
+  JSONString *parseString();
+  JSONValue *parseValue();
+  JSONKeyValuePair *parseKeyValuePair();
+  /// @}
+
+  /// \brief Helpers to parse the elements out of both forms of containers.
+  /// @{
+  const JSONAtom *parseElement(JSONAtom::Kind ContainerKind);
+  StringRef::iterator parseFirstElement(JSONAtom::Kind ContainerKind,
+                                        char StartChar, char EndChar,
+                                        const JSONAtom *&Element);
+  StringRef::iterator parseNextElement(JSONAtom::Kind ContainerKind,
+                                       char EndChar,
+                                       const JSONAtom *&Element);
+  /// @}
+
+  /// \brief Whitespace parsing.
+  /// @{
+  void nextNonWhitespace();
+  bool isWhitespace();
+  /// @}
+
+  /// \brief These methods are used for error handling.
+  /// {
+  void setExpectedError(StringRef Expected, StringRef Found);
+  void setExpectedError(StringRef Expected, char Found);
+  bool errorIfAtEndOfFile(StringRef Message);
+  bool errorIfNotAt(char C, StringRef Message);
+  /// }
+
+  /// \brief Skips all elements in the given container.
+  bool skipContainer(const JSONContainer &Container);
+
+  /// \brief Skips to the next position behind the given JSON atom.
+  bool skip(const JSONAtom &Atom);
+
+  /// All nodes are allocated by the parser and will be deallocated when the
+  /// parser is destroyed.
+  BumpPtrAllocator ValueAllocator;
+
+  /// \brief The original input to the parser.
+  MemoryBuffer *InputBuffer;
+
+  /// \brief The source manager used for diagnostics and buffer management.
+  SourceMgr *SM;
+
+  /// \brief The current position in the parse stream.
+  StringRef::iterator Position;
+
+  /// \brief The end position for fast EOF checks without introducing
+  /// unnecessary dereferences.
+  StringRef::iterator End;
+
+  /// \brief If true, an error has occurred.
+  bool Failed;
+
+  friend class JSONContainer;
+};
+
+
+/// \brief Base class for JSON value objects.
+///
+/// This object represents an abstract JSON value. It is the root node behind
+/// the group of JSON entities that can represent top-level values in a JSON
+/// document. It has no API, and is just a placeholder in the type hierarchy of
+/// nodes.
+class JSONValue : public JSONAtom {
+protected:
+  JSONValue(Kind MyKind) : JSONAtom(MyKind) {}
+
+public:
+  /// \brief dyn_cast helpers
+  ///@{
+  static bool classof(const JSONAtom *Atom) {
+    switch (Atom->getKind()) {
+      case JK_Array:
+      case JK_Object:
+      case JK_String:
+        return true;
+      case JK_KeyValuePair:
+        return false;
+    }
+    llvm_unreachable("Invalid JSONAtom kind");
+  }
+  static bool classof(const JSONValue *Value) { return true; }
+  ///@}
+};
+
+/// \brief Gives access to the text of a JSON string.
+///
+/// FIXME: Implement a method to return the unescaped text.
+class JSONString : public JSONValue {
+public:
+  /// \brief Returns the underlying parsed text of the string.
+  ///
+  /// This is the unescaped content of the JSON text.
+  /// See http://www.ietf.org/rfc/rfc4627.txt for details.
+  StringRef getRawText() const { return RawText; }
+
+private:
+  JSONString(StringRef RawText) : JSONValue(JK_String), RawText(RawText) {}
+
+  StringRef RawText;
+
+  friend class JSONParser;
+
+public:
+  /// \brief dyn_cast helpers
+  ///@{
+  static bool classof(const JSONAtom *Atom) {
+    return Atom->getKind() == JK_String;
+  }
+  static bool classof(const JSONString *String) { return true; }
+  ///@}
+};
+
+/// \brief A (key, value) tuple of type (JSONString *, JSONValue *).
+///
+/// Note that JSONKeyValuePair is not a JSONValue, it is a bare JSONAtom.
+/// JSONKeyValuePairs can be elements of a JSONObject, but not of a JSONArray.
+/// They are not viable as top-level values either.
+class JSONKeyValuePair : public JSONAtom {
+public:
+  const JSONString * const Key;
+  const JSONValue * const Value;
+
+private:
+  JSONKeyValuePair(const JSONString *Key, const JSONValue *Value)
+      : JSONAtom(JK_KeyValuePair), Key(Key), Value(Value) {}
+
+  friend class JSONParser;
+
+public:
+  /// \brief dyn_cast helpers
+  ///@{
+  static bool classof(const JSONAtom *Atom) {
+    return Atom->getKind() == JK_KeyValuePair;
+  }
+  static bool classof(const JSONKeyValuePair *KeyValuePair) { return true; }
+  ///@}
+};
+
+/// \brief Implementation of JSON containers (arrays and objects).
+///
+/// JSONContainers drive the lazy parsing of JSON arrays and objects via
+/// forward iterators.
+class JSONContainer : public JSONValue {
+private:
+  /// \brief An iterator that parses the underlying container during iteration.
+  ///
+  /// Iterators on the same collection use shared state, so when multiple copies
+  /// of an iterator exist, only one is allowed to be used for iteration;
+  /// iterating multiple copies of an iterator of the same collection will lead
+  /// to undefined behavior.
+  class AtomIterator {
+  public:
+    AtomIterator(const AtomIterator &I) : Container(I.Container) {}
+
+    /// \brief Iterator interface.
+    ///@{
+    bool operator==(const AtomIterator &I) const {
+      if (isEnd() || I.isEnd())
+        return isEnd() == I.isEnd();
+      return Container->Position == I.Container->Position;
+    }
+    bool operator!=(const AtomIterator &I) const {
+      return !(*this == I);
+    }
+    AtomIterator &operator++() {
+      Container->parseNextElement();
+      return *this;
+    }
+    const JSONAtom *operator*() {
+      return Container->Current;
+    }
+    ///@}
+
+  private:
+    /// \brief Create an iterator for which 'isEnd' returns true.
+    AtomIterator() : Container(0) {}
+
+    /// \brief Create an iterator for the given container.
+    AtomIterator(const JSONContainer *Container) : Container(Container) {}
+
+    bool isEnd() const {
+      return Container == 0 || Container->Position == StringRef::iterator();
+    }
+
+    const JSONContainer * const Container;
+
+    friend class JSONContainer;
+  };
+
+protected:
+  /// \brief An iterator for the specified AtomT.
+  ///
+  /// Used for the implementation of iterators for JSONArray and JSONObject.
+  template <typename AtomT>
+  class IteratorTemplate : public std::iterator<std::forward_iterator_tag,
+                                                const AtomT*> {
+  public:
+    explicit IteratorTemplate(const AtomIterator& AtomI)
+      : AtomI(AtomI) {}
+
+    bool operator==(const IteratorTemplate &I) const {
+      return AtomI == I.AtomI;
+    }
+    bool operator!=(const IteratorTemplate &I) const { return !(*this == I); }
+
+    IteratorTemplate &operator++() {
+      ++AtomI;
+      return *this;
+    }
+
+    const AtomT *operator*() { return dyn_cast<AtomT>(*AtomI); }
+
+  private:
+    AtomIterator AtomI;
+  };
+
+  JSONContainer(JSONParser *Parser, char StartChar, char EndChar,
+                JSONAtom::Kind ContainerKind)
+    : JSONValue(ContainerKind), Parser(Parser),
+      Position(), Current(0), Started(false),
+      StartChar(StartChar), EndChar(EndChar) {}
+
+  /// \brief Returns a lazy parsing iterator over the container.
+  ///
+  /// As the iterator drives the parse stream, begin() must only be called
+  /// once per container.
+  AtomIterator atom_begin() const {
+    if (Started)
+      report_fatal_error("Cannot parse container twice.");
+    Started = true;
+    // Set up the position and current element when we begin iterating over the
+    // container.
+    Position = Parser->parseFirstElement(getKind(), StartChar, EndChar, Current);
+    return AtomIterator(this);
+  }
+  AtomIterator atom_end() const {
+    return AtomIterator();
+  }
+
+private:
+  AtomIterator atom_current() const {
+    if (!Started)
+      return atom_begin();
+
+    return AtomIterator(this);
+  }
+
+  /// \brief Parse the next element in the container into the Current element.
+  ///
+  /// This routine is called as an iterator into this container walks through
+  /// its elements. It mutates the container's internal current node to point to
+  /// the next atom of the container.
+  void parseNextElement() const {
+    Parser->skip(*Current);
+    Position = Parser->parseNextElement(getKind(), EndChar, Current);
+  }
+
+  // For parsing, JSONContainers call back into the JSONParser.
+  JSONParser * const Parser;
+
+  // 'Position', 'Current' and 'Started' store the state of the parse stream
+  // for iterators on the container, they don't change the container's elements
+  // and are thus marked as mutable.
+  mutable StringRef::iterator Position;
+  mutable const JSONAtom *Current;
+  mutable bool Started;
+
+  const char StartChar;
+  const char EndChar;
+
+  friend class JSONParser;
+
+public:
+  /// \brief dyn_cast helpers
+  ///@{
+  static bool classof(const JSONAtom *Atom) {
+    switch (Atom->getKind()) {
+      case JK_Array:
+      case JK_Object:
+        return true;
+      case JK_KeyValuePair:
+      case JK_String:
+        return false;
+    }
+    llvm_unreachable("Invalid JSONAtom kind");
+  }
+  static bool classof(const JSONContainer *Container) { return true; }
+  ///@}
+};
+
+/// \brief A simple JSON array.
+class JSONArray : public JSONContainer {
+public:
+  typedef IteratorTemplate<JSONValue> const_iterator;
+
+  /// \brief Returns a lazy parsing iterator over the container.
+  ///
+  /// As the iterator drives the parse stream, begin() must only be called
+  /// once per container.
+  const_iterator begin() const { return const_iterator(atom_begin()); }
+  const_iterator end() const { return const_iterator(atom_end()); }
+
+private:
+  JSONArray(JSONParser *Parser)
+    : JSONContainer(Parser, '[', ']', JSONAtom::JK_Array) {}
+
+public:
+  /// \brief dyn_cast helpers
+  ///@{
+  static bool classof(const JSONAtom *Atom) {
+    return Atom->getKind() == JSONAtom::JK_Array;
+  }
+  static bool classof(const JSONArray *Array) { return true; }
+  ///@}
+
+  friend class JSONParser;
+};
+
+/// \brief A JSON object: an iterable list of JSON key-value pairs.
+class JSONObject : public JSONContainer {
+public:
+  typedef IteratorTemplate<JSONKeyValuePair> const_iterator;
+
+  /// \brief Returns a lazy parsing iterator over the container.
+  ///
+  /// As the iterator drives the parse stream, begin() must only be called
+  /// once per container.
+  const_iterator begin() const { return const_iterator(atom_begin()); }
+  const_iterator end() const { return const_iterator(atom_end()); }
+
+private:
+  JSONObject(JSONParser *Parser)
+    : JSONContainer(Parser, '{', '}', JSONAtom::JK_Object) {}
+
+public:
+  /// \brief dyn_cast helpers
+  ///@{
+  static bool classof(const JSONAtom *Atom) {
+    return Atom->getKind() == JSONAtom::JK_Object;
+  }
+  static bool classof(const JSONObject *Object) { return true; }
+  ///@}
+
+  friend class JSONParser;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_JSON_PARSER_H
diff --git a/include/llvm/Support/LockFileManager.h b/include/llvm/Support/LockFileManager.h
new file mode 100644
index 000000000000..e2fa8ebc56e4
--- /dev/null
+++ b/include/llvm/Support/LockFileManager.h
@@ -0,0 +1,74 @@
+//===--- LockFileManager.h - File-level locking utility ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_SUPPORT_LOCKFILEMANAGER_H
+#define LLVM_SUPPORT_LOCKFILEMANAGER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/system_error.h"
+#include <utility> // for std::pair
+
+namespace llvm {
+
+/// \brief Class that manages the creation of a lock file to aid
+/// implicit coordination between different processes.
+///
+/// The implicit coordination works by creating a ".lock" file alongside
+/// the file that we're coordinating for, using the atomicity of the file
+/// system to ensure that only a single process can create that ".lock" file.
+/// When the lock file is removed, the owning process has finished the
+/// operation.
+class LockFileManager {
+public:
+  /// \brief Describes the state of a lock file.
+  enum LockFileState {
+    /// \brief The lock file has been created and is owned by this instance
+    /// of the object.
+    LFS_Owned,
+    /// \brief The lock file already exists and is owned by some other
+    /// instance.
+    LFS_Shared,
+    /// \brief An error occurred while trying to create or find the lock
+    /// file.
+    LFS_Error
+  };
+
+private:
+  SmallString<128> LockFileName;
+  SmallString<128> UniqueLockFileName;
+
+  Optional<std::pair<std::string, int> > Owner;
+  Optional<error_code> Error;
+
+  LockFileManager(const LockFileManager &);
+  LockFileManager &operator=(const LockFileManager &);
+
+  static Optional<std::pair<std::string, int> >
+  readLockFile(StringRef LockFileName);
+
+  static bool processStillExecuting(StringRef Hostname, int PID);
+
+public:
+
+  LockFileManager(StringRef FileName);
+  ~LockFileManager();
+
+  /// \brief Determine the state of the lock file.
+  LockFileState getState() const;
+
+  operator LockFileState() const { return getState(); }
+
+  /// \brief For a shared lock, wait until the owner releases the lock.
+  void waitForUnlock();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_LOCKFILEMANAGER_H
diff --git a/include/llvm/Support/MachO.h b/include/llvm/Support/MachO.h
index 5b6858613ff6..44a7a791c522 100644
--- a/include/llvm/Support/MachO.h
+++ b/include/llvm/Support/MachO.h
@@ -114,6 +114,10 @@ namespace llvm {
       LoadCommandVersionMinIPhoneOS       = 0x00000025u, // LC_VERSION_MIN_IPHONEOS
       LoadCommandFunctionStarts           = 0x00000026u, // LC_FUNCTION_STARTS
       LoadCommandDyldEnvironment          = 0x00000027u, // LC_DYLD_ENVIRONMENT
+      LoadCommandMain                     = 0x80000028u, // LC_MAIN
+      LoadCommandDataInCode               = 0x00000029u, // LC_DATA_IN_CODE
+      LoadCommandSourceVersion            = 0x0000002Au, // LC_SOURCE_VERSION
+      LoadCommandCodeSignDRs              = 0x0000002Bu, // LC_DYLIB_CODE_SIGN_DRS
 
       // Constant bits for the "flags" field in llvm::MachO::segment_command
       SegmentCommandFlagBitHighVM             = 0x1u, // SG_HIGHVM
@@ -240,6 +244,9 @@ namespace llvm {
       NListSectionNoSection     = 0u, // NO_SECT
       NListSectionMaxSection    = 0xffu, // MAX_SECT
 
+      NListDescWeakRef          = 0x40u,
+      NListDescWeakDef          = 0x80u,
+
       // Constant values for the "n_type" field in llvm::MachO::nlist and
       // llvm::MachO::nlist_64 when "(n_type & NlistMaskStab) != 0"
       StabGlobalSymbol          = 0x20u,  // N_GSYM	
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index 53e73ad35f49..4171d1bec8dc 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -16,6 +16,7 @@
 
 #include "llvm/Support/Atomic.h"
 #include "llvm/Support/Threading.h"
+#include "llvm/Support/Valgrind.h"
 
 namespace llvm {
 
@@ -65,6 +66,7 @@ public:
     void* tmp = Ptr;
     if (llvm_is_multithreaded()) sys::MemoryFence();
     if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
+    TsanHappensAfter(this);
 
     return *static_cast<C*>(Ptr);
   }
@@ -72,6 +74,7 @@ public:
     void* tmp = Ptr;
     if (llvm_is_multithreaded()) sys::MemoryFence();
     if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
+    TsanHappensAfter(this);
 
     return static_cast<C*>(Ptr);
   }
@@ -79,6 +82,7 @@ public:
     void* tmp = Ptr;
     if (llvm_is_multithreaded()) sys::MemoryFence();
     if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
+    TsanHappensAfter(this);
 
     return *static_cast<C*>(Ptr);
   }
@@ -86,6 +90,7 @@ public:
     void* tmp = Ptr;
     if (llvm_is_multithreaded()) sys::MemoryFence();
     if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
+    TsanHappensAfter(this);
 
     return static_cast<C*>(Ptr);
   }
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 4627557f7f1f..d085c94f2adc 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -51,6 +51,13 @@ inline bool isInt<32>(int64_t x) {
   return static_cast<int32_t>(x) == x;
 }
 
+/// isShiftedInt<N,S> - Checks if a signed integer is an N bit number shifted
+///                     left by S.
+template<unsigned N, unsigned S>
+inline bool isShiftedInt(int64_t x) {
+  return isInt<N+S>(x) && (x % (1<<S) == 0);
+}
+
 /// isUInt - Checks if an unsigned integer fits into the given bit width.
 template<unsigned N>
 inline bool isUInt(uint64_t x) {
@@ -70,6 +77,13 @@ inline bool isUInt<32>(uint64_t x) {
   return static_cast<uint32_t>(x) == x;
 }
 
+/// isShiftedUInt<N,S> - Checks if a unsigned integer is an N bit number shifted
+///                     left by S.
+template<unsigned N, unsigned S>
+inline bool isShiftedUInt(uint64_t x) {
+  return isUInt<N+S>(x) && (x % (1<<S) == 0);
+}
+
 /// isUIntN - Checks if an unsigned integer fits into the given (dynamic)
 /// bit width.
 inline bool isUIntN(unsigned N, uint64_t x) {
diff --git a/include/llvm/Support/MemoryObject.h b/include/llvm/Support/MemoryObject.h
index dec0f134b306..b778b08de932 100644
--- a/include/llvm/Support/MemoryObject.h
+++ b/include/llvm/Support/MemoryObject.h
@@ -23,19 +23,19 @@ class MemoryObject {
 public:
   /// Destructor      - Override as necessary.
   virtual ~MemoryObject();
-  
+
   /// getBase         - Returns the lowest valid address in the region.
   ///
   /// @result         - The lowest valid address.
   virtual uint64_t getBase() const = 0;
-  
+
   /// getExtent       - Returns the size of the region in bytes.  (The region is
-  ///                   contiguous, so the highest valid address of the region 
+  ///                   contiguous, so the highest valid address of the region
   ///                   is getBase() + getExtent() - 1).
   ///
   /// @result         - The size of the region.
   virtual uint64_t getExtent() const = 0;
-  
+
   /// readByte        - Tries to read a single byte from the region.
   ///
   /// @param address  - The address of the byte, in the same space as getBase().
@@ -43,7 +43,7 @@ public:
   /// @result         - 0 if successful; -1 if not.  Failure may be due to a
   ///                   bounds violation or an implementation-specific error.
   virtual int readByte(uint64_t address, uint8_t* ptr) const = 0;
-  
+
   /// readBytes       - Tries to read a contiguous range of bytes from the
   ///                   region, up to the end of the region.
   ///                   You should override this function if there is a quicker
@@ -67,4 +67,3 @@ public:
 }
 
 #endif
-
diff --git a/include/llvm/Support/PathV1.h b/include/llvm/Support/PathV1.h
index 45165ded619c..f4bedf92c441 100644
--- a/include/llvm/Support/PathV1.h
+++ b/include/llvm/Support/PathV1.h
@@ -131,20 +131,6 @@ namespace sys {
       /// @brief Find a library.
       static Path FindLibrary(std::string& short_name);
 
-      /// Construct a path to the default LLVM configuration directory. The
-      /// implementation must ensure that this is a well-known (same on many
-      /// systems) directory in which llvm configuration files exist. For
-      /// example, on Unix, the /etc/llvm directory has been selected.
-      /// @brief Construct a path to the default LLVM configuration directory
-      static Path GetLLVMDefaultConfigDir();
-
-      /// Construct a path to the LLVM installed configuration directory. The
-      /// implementation must ensure that this refers to the "etc" directory of
-      /// the LLVM installation. This is the location where configuration files
-      /// will be located for a particular installation of LLVM on a machine.
-      /// @brief Construct a path to the LLVM installed configuration directory
-      static Path GetLLVMConfigDir();
-
       /// Construct a path to the current user's home directory. The
       /// implementation must use an operating system specific mechanism for
       /// determining the user's home directory. For example, the environment
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index f0fb516d5f73..221fa8b3ebf9 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -31,6 +31,7 @@
 
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
+#include "llvm/Operator.h"
 
 namespace llvm {
 namespace PatternMatch {
@@ -97,12 +98,19 @@ struct apint_match {
       Res = &CI->getValue();
       return true;
     }
+    // FIXME: Remove this.
     if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
       if (ConstantInt *CI =
           dyn_cast_or_null<ConstantInt>(CV->getSplatValue())) {
         Res = &CI->getValue();
         return true;
       }
+    if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(V))
+      if (ConstantInt *CI =
+          dyn_cast_or_null<ConstantInt>(CV->getSplatValue())) {
+        Res = &CI->getValue();
+        return true;
+      }
     return false;
   }
 };
@@ -143,9 +151,13 @@ struct cst_pred_ty : public Predicate {
   bool match(ITy *V) {
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
       return this->isValue(CI->getValue());
+    // FIXME: Remove this.
     if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
       if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
         return this->isValue(CI->getValue());
+    if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(V))
+      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
+        return this->isValue(CI->getValue());
     return false;
   }
 };
@@ -163,12 +175,22 @@ struct api_pred_ty : public Predicate {
         Res = &CI->getValue();
         return true;
       }
+    
+    // FIXME: remove.
     if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
       if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
         if (this->isValue(CI->getValue())) {
           Res = &CI->getValue();
           return true;
         }
+    
+    if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(V))
+      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
+        if (this->isValue(CI->getValue())) {
+          Res = &CI->getValue();
+          return true;
+        }
+
     return false;
   }
 };
@@ -441,6 +463,26 @@ m_IDiv(const LHS &L, const RHS &R) {
 }
 
 //===----------------------------------------------------------------------===//
+// Class that matches exact binary ops.
+//
+template<typename SubPattern_t>
+struct Exact_match {
+  SubPattern_t SubPattern;
+
+  Exact_match(const SubPattern_t &SP) : SubPattern(SP) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (PossiblyExactOperator *PEO = dyn_cast<PossiblyExactOperator>(V))
+      return PEO->isExact() && SubPattern.match(V);
+    return false;
+  }
+};
+
+template<typename T>
+inline Exact_match<T> m_Exact(const T &SubPattern) { return SubPattern; }
+
+//===----------------------------------------------------------------------===//
 // Matchers for CmpInst classes
 //
 
@@ -529,10 +571,8 @@ struct CastClass_match {
 
   template<typename OpTy>
   bool match(OpTy *V) {
-    if (CastInst *I = dyn_cast<CastInst>(V))
-      return I->getOpcode() == Opcode && Op.match(I->getOperand(0));
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      return CE->getOpcode() == Opcode && Op.match(CE->getOperand(0));
+    if (Operator *O = dyn_cast<Operator>(V))
+      return O->getOpcode() == Opcode && Op.match(O->getOperand(0));
     return false;
   }
 };
@@ -585,21 +625,18 @@ struct not_match {
 
   template<typename OpTy>
   bool match(OpTy *V) {
-    if (Instruction *I = dyn_cast<Instruction>(V))
-      if (I->getOpcode() == Instruction::Xor)
-        return matchIfNot(I->getOperand(0), I->getOperand(1));
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      if (CE->getOpcode() == Instruction::Xor)
-        return matchIfNot(CE->getOperand(0), CE->getOperand(1));
+    if (Operator *O = dyn_cast<Operator>(V))
+      if (O->getOpcode() == Instruction::Xor)
+        return matchIfNot(O->getOperand(0), O->getOperand(1));
     return false;
   }
 private:
   bool matchIfNot(Value *LHS, Value *RHS) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS))
-      return CI->isAllOnesValue() && L.match(LHS);
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(RHS))
-      return CV->isAllOnesValue() && L.match(LHS);
-    return false;
+    return (isa<ConstantInt>(RHS) || isa<ConstantDataVector>(RHS) ||
+            // FIXME: Remove CV.
+            isa<ConstantVector>(RHS)) &&
+           cast<Constant>(RHS)->isAllOnesValue() &&
+           L.match(LHS);
   }
 };
 
@@ -615,19 +652,16 @@ struct neg_match {
 
   template<typename OpTy>
   bool match(OpTy *V) {
-    if (Instruction *I = dyn_cast<Instruction>(V))
-      if (I->getOpcode() == Instruction::Sub)
-        return matchIfNeg(I->getOperand(0), I->getOperand(1));
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      if (CE->getOpcode() == Instruction::Sub)
-        return matchIfNeg(CE->getOperand(0), CE->getOperand(1));
+    if (Operator *O = dyn_cast<Operator>(V))
+      if (O->getOpcode() == Instruction::Sub)
+        return matchIfNeg(O->getOperand(0), O->getOperand(1));
     return false;
   }
 private:
   bool matchIfNeg(Value *LHS, Value *RHS) {
-    if (ConstantInt *C = dyn_cast<ConstantInt>(LHS))
-      return C->isZero() && L.match(RHS);
-    return false;
+    return ((isa<ConstantInt>(LHS) && cast<ConstantInt>(LHS)->isZero()) ||
+            isa<ConstantAggregateZero>(LHS)) &&
+           L.match(RHS);
   }
 };
 
@@ -644,12 +678,9 @@ struct fneg_match {
 
   template<typename OpTy>
   bool match(OpTy *V) {
-    if (Instruction *I = dyn_cast<Instruction>(V))
-      if (I->getOpcode() == Instruction::FSub)
-        return matchIfFNeg(I->getOperand(0), I->getOperand(1));
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      if (CE->getOpcode() == Instruction::FSub)
-        return matchIfFNeg(CE->getOperand(0), CE->getOperand(1));
+    if (Operator *O = dyn_cast<Operator>(V))
+      if (O->getOpcode() == Instruction::FSub)
+        return matchIfFNeg(O->getOperand(0), O->getOperand(1));
     return false;
   }
 private:
diff --git a/include/llvm/Support/Process.h b/include/llvm/Support/Process.h
index 27ef2670093a..33799229ff35 100644
--- a/include/llvm/Support/Process.h
+++ b/include/llvm/Support/Process.h
@@ -138,9 +138,6 @@ namespace sys {
 
       /// Resets the terminals colors, or returns an escape sequence to do so.
       static const char *ResetColor();
-
-      /// Change the program working directory to that given by \arg Path.
-      static void SetWorkingDirectory(std::string Path);
     /// @}
   };
 }
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
index a5026573aa94..a85f23550ec8 100644
--- a/include/llvm/Support/Program.h
+++ b/include/llvm/Support/Program.h
@@ -17,6 +17,7 @@
 #include "llvm/Support/Path.h"
 
 namespace llvm {
+class error_code;
 namespace sys {
 
   // TODO: Add operations to communicate with the process, redirect its I/O,
@@ -122,12 +123,12 @@ namespace sys {
     /// @brief Construct a Program by finding it by name.
     static Path FindProgramByName(const std::string& name);
 
-    // These methods change the specified standard stream (stdin,
-    // stdout, or stderr) to binary mode. They return true if an error
-    // occurred
-    static bool ChangeStdinToBinary();
-    static bool ChangeStdoutToBinary();
-    static bool ChangeStderrToBinary();
+    // These methods change the specified standard stream (stdin, stdout, or
+    // stderr) to binary mode. They return errc::success if the specified stream
+    // was changed. Otherwise a platform dependent error is returned.
+    static error_code ChangeStdinToBinary();
+    static error_code ChangeStdoutToBinary();
+    static error_code ChangeStderrToBinary();
 
     /// A convenience function equivalent to Program prg; prg.Execute(..);
     /// prg.Wait(..);
diff --git a/include/llvm/Support/Recycler.h b/include/llvm/Support/Recycler.h
index d8f8c7894142..fa6e189e97bd 100644
--- a/include/llvm/Support/Recycler.h
+++ b/include/llvm/Support/Recycler.h
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/ilist.h"
 #include "llvm/Support/AlignOf.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 
 namespace llvm {
@@ -52,7 +53,7 @@ struct ilist_traits<RecyclerStruct> :
   static void noteHead(RecyclerStruct*, RecyclerStruct*) {}
 
   static void deleteNode(RecyclerStruct *) {
-    assert(0 && "Recycler's ilist_traits shouldn't see a deleteNode call!");
+    llvm_unreachable("Recycler's ilist_traits shouldn't see a deleteNode call!");
   }
 };
 
diff --git a/include/llvm/Support/SMLoc.h b/include/llvm/Support/SMLoc.h
index 02db32794b6d..d48bfcc30c5b 100644
--- a/include/llvm/Support/SMLoc.h
+++ b/include/llvm/Support/SMLoc.h
@@ -15,9 +15,11 @@
 #ifndef SUPPORT_SMLOC_H
 #define SUPPORT_SMLOC_H
 
+#include <cassert>
+
 namespace llvm {
 
-// SMLoc - Represents a location in source code.
+/// SMLoc - Represents a location in source code.
 class SMLoc {
   const char *Ptr;
 public:
@@ -38,7 +40,23 @@ public:
   }
 };
 
-}
+/// SMRange - Represents a range in source code.  Note that unlike standard STL
+/// ranges, the locations specified are considered to be *inclusive*.  For
+/// example, [X,X] *does* include X, it isn't an empty range.
+class SMRange {
+public:
+  SMLoc Start, End;
+
+  SMRange() {}
+  SMRange(SMLoc Start, SMLoc End) : Start(Start), End(End) {
+    assert(Start.isValid() == End.isValid() &&
+           "Start and end should either both be valid or both be invalid!");
+  }
+  
+  bool isValid() const { return Start.isValid(); }
+};
+  
+} // end namespace llvm
 
 #endif
 
diff --git a/include/llvm/Support/SaveAndRestore.h b/include/llvm/Support/SaveAndRestore.h
new file mode 100644
index 000000000000..ffa99b968d3c
--- /dev/null
+++ b/include/llvm/Support/SaveAndRestore.h
@@ -0,0 +1,47 @@
+//===-- SaveAndRestore.h - Utility  -------------------------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file provides utility classes that uses RAII to save and restore
+//  values.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SAVERESTORE
+#define LLVM_ADT_SAVERESTORE
+
+namespace llvm {
+
+// SaveAndRestore - A utility class that uses RAII to save and restore
+//  the value of a variable.
+template<typename T>
+struct SaveAndRestore {
+  SaveAndRestore(T& x) : X(x), old_value(x) {}
+  SaveAndRestore(T& x, const T &new_value) : X(x), old_value(x) {
+    X = new_value;
+  }
+  ~SaveAndRestore() { X = old_value; }
+  T get() { return old_value; }
+private:
+  T& X;
+  T old_value;
+};
+
+// SaveOr - Similar to SaveAndRestore.  Operates only on bools; the old
+//  value of a variable is saved, and during the dstor the old value is
+//  or'ed with the new value.
+struct SaveOr {
+  SaveOr(bool& x) : X(x), old_value(x) { x = false; }
+  ~SaveOr() { X |= old_value; }
+private:
+  bool& X;
+  const bool old_value;
+};
+
+}
+#endif
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index deb8cafa06d2..58b8fab52402 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -17,10 +17,8 @@
 #define SUPPORT_SOURCEMGR_H
 
 #include "llvm/Support/SMLoc.h"
-
+#include "llvm/ADT/ArrayRef.h"
 #include <string>
-#include <vector>
-#include <cassert>
 
 namespace llvm {
   class MemoryBuffer;
@@ -33,10 +31,16 @@ namespace llvm {
 /// and handles diagnostic wrangling.
 class SourceMgr {
 public:
+  enum DiagKind {
+    DK_Error,
+    DK_Warning,
+    DK_Note
+  };
+  
   /// DiagHandlerTy - Clients that want to handle their own diagnostics in a
   /// custom way can register a function pointer+context as a diagnostic
   /// handler.  It gets called each time PrintMessage is invoked.
-  typedef void (*DiagHandlerTy)(const SMDiagnostic&, void *Context);
+  typedef void (*DiagHandlerTy)(const SMDiagnostic &, void *Context);
 private:
   struct SrcBuffer {
     /// Buffer - The memory buffer for the file.
@@ -124,11 +128,8 @@ public:
   /// PrintMessage - Emit a message about the specified location with the
   /// specified string.
   ///
-  /// @param Type - If non-null, the kind of message (e.g., "error") which is
-  /// prefixed to the message.
-  /// @param ShowLine - Should the diagnostic show the source line.
-  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
-                    bool ShowLine = true) const;
+  void PrintMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg,
+                    ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const;
 
 
   /// GetMessage - Return an SMDiagnostic at the specified location with the
@@ -136,10 +137,8 @@ public:
   ///
   /// @param Type - If non-null, the kind of message (e.g., "error") which is
   /// prefixed to the message.
-  /// @param ShowLine - Should the diagnostic show the source line.
-  SMDiagnostic GetMessage(SMLoc Loc,
-                          const Twine &Msg, const char *Type,
-                          bool ShowLine = true) const;
+  SMDiagnostic GetMessage(SMLoc Loc, DiagKind Kind, const Twine &Msg, 
+                          ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const;
 
   /// PrintIncludeStack - Prints the names of included files and the line of the
   /// file they were included from.  A diagnostic handler can use this before
@@ -158,35 +157,38 @@ class SMDiagnostic {
   SMLoc Loc;
   std::string Filename;
   int LineNo, ColumnNo;
+  SourceMgr::DiagKind Kind;
   std::string Message, LineContents;
-  unsigned ShowLine : 1;
+  std::vector<std::pair<unsigned, unsigned> > Ranges;
 
 public:
   // Null diagnostic.
-  SMDiagnostic() : SM(0), LineNo(0), ColumnNo(0), ShowLine(0) {}
+  SMDiagnostic()
+    : SM(0), LineNo(0), ColumnNo(0), Kind(SourceMgr::DK_Error) {}
   // Diagnostic with no location (e.g. file not found, command line arg error).
-  SMDiagnostic(const std::string &filename, const std::string &Msg)
-    : SM(0), Filename(filename), LineNo(-1), ColumnNo(-1),
-      Message(Msg), ShowLine(false) {}
+  SMDiagnostic(const std::string &filename, SourceMgr::DiagKind Kind,
+               const std::string &Msg)
+    : SM(0), Filename(filename), LineNo(-1), ColumnNo(-1), Kind(Kind),
+      Message(Msg) {}
   
   // Diagnostic with a location.
   SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
-               int Line, int Col,
+               int Line, int Col, SourceMgr::DiagKind Kind,
                const std::string &Msg, const std::string &LineStr,
-               bool showline = true)
-    : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg),
-      LineContents(LineStr), ShowLine(showline) {}
+               ArrayRef<std::pair<unsigned,unsigned> > Ranges);
 
   const SourceMgr *getSourceMgr() const { return SM; }
   SMLoc getLoc() const { return Loc; }
   const std::string &getFilename() const { return Filename; }
   int getLineNo() const { return LineNo; }
   int getColumnNo() const { return ColumnNo; }
+  SourceMgr::DiagKind getKind() const { return Kind; }
   const std::string &getMessage() const { return Message; }
   const std::string &getLineContents() const { return LineContents; }
-  bool getShowLine() const { return ShowLine; }
-  
-  void Print(const char *ProgName, raw_ostream &S) const;
+  const std::vector<std::pair<unsigned, unsigned> > &getRanges() const {
+    return Ranges;
+  }
+  void print(const char *ProgName, raw_ostream &S) const;
 };
 
 }  // end llvm namespace
diff --git a/include/llvm/Support/StreamableMemoryObject.h b/include/llvm/Support/StreamableMemoryObject.h
new file mode 100644
index 000000000000..531dbb216d7a
--- /dev/null
+++ b/include/llvm/Support/StreamableMemoryObject.h
@@ -0,0 +1,181 @@
+//===- StreamableMemoryObject.h - Streamable data interface -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef STREAMABLEMEMORYOBJECT_H_
+#define STREAMABLEMEMORYOBJECT_H_
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/DataStream.h"
+#include <vector>
+
+namespace llvm {
+
+/// StreamableMemoryObject - Interface to data which might be streamed.
+/// Streamability has 2 important implications/restrictions. First, the data
+/// might not yet exist in memory when the request is made. This just means
+/// that readByte/readBytes might have to block or do some work to get it.
+/// More significantly, the exact size of the object might not be known until
+/// it has all been fetched. This means that to return the right result,
+/// getExtent must also wait for all the data to arrive; therefore it should
+/// not be called on objects which are actually streamed (this would defeat
+/// the purpose of streaming). Instead, isValidAddress and isObjectEnd can be
+/// used to test addresses without knowing the exact size of the stream.
+/// Finally, getPointer can be used instead of readBytes to avoid extra copying.
+class StreamableMemoryObject : public MemoryObject {
+ public:
+  /// Destructor      - Override as necessary.
+  virtual ~StreamableMemoryObject();
+
+  /// getBase         - Returns the lowest valid address in the region.
+  ///
+  /// @result         - The lowest valid address.
+  virtual uint64_t getBase() const = 0;
+
+  /// getExtent       - Returns the size of the region in bytes.  (The region is
+  ///                   contiguous, so the highest valid address of the region
+  ///                   is getBase() + getExtent() - 1).
+  ///                   May block until all bytes in the stream have been read
+  ///
+  /// @result         - The size of the region.
+  virtual uint64_t getExtent() const = 0;
+
+  /// readByte        - Tries to read a single byte from the region.
+  ///                   May block until (address - base) bytes have been read
+  /// @param address  - The address of the byte, in the same space as getBase().
+  /// @param ptr      - A pointer to a byte to be filled in.  Must be non-NULL.
+  /// @result         - 0 if successful; -1 if not.  Failure may be due to a
+  ///                   bounds violation or an implementation-specific error.
+  virtual int readByte(uint64_t address, uint8_t* ptr) const = 0;
+
+  /// readBytes       - Tries to read a contiguous range of bytes from the
+  ///                   region, up to the end of the region.
+  ///                   May block until (address - base + size) bytes have
+  ///                   been read. Additionally, StreamableMemoryObjects will
+  ///                   not do partial reads - if size bytes cannot be read,
+  ///                   readBytes will fail.
+  ///
+  /// @param address  - The address of the first byte, in the same space as
+  ///                   getBase().
+  /// @param size     - The maximum number of bytes to copy.
+  /// @param buf      - A pointer to a buffer to be filled in.  Must be non-NULL
+  ///                   and large enough to hold size bytes.
+  /// @param copied   - A pointer to a nunber that is filled in with the number
+  ///                   of bytes actually read.  May be NULL.
+  /// @result         - 0 if successful; -1 if not.  Failure may be due to a
+  ///                   bounds violation or an implementation-specific error.
+  virtual int readBytes(uint64_t address,
+                        uint64_t size,
+                        uint8_t* buf,
+                        uint64_t* copied) const = 0;
+
+  /// getPointer  - Ensures that the requested data is in memory, and returns
+  ///               A pointer to it. More efficient than using readBytes if the
+  ///               data is already in memory.
+  ///               May block until (address - base + size) bytes have been read
+  /// @param address - address of the byte, in the same space as getBase()
+  /// @param size    - amount of data that must be available on return
+  /// @result        - valid pointer to the requested data
+  virtual const uint8_t *getPointer(uint64_t address, uint64_t size) const = 0;
+
+  /// isValidAddress - Returns true if the address is within the object
+  ///                  (i.e. between base and base + extent - 1 inclusive)
+  ///                  May block until (address - base) bytes have been read
+  /// @param address - address of the byte, in the same space as getBase()
+  /// @result        - true if the address may be read with readByte()
+  virtual bool isValidAddress(uint64_t address) const = 0;
+
+  /// isObjectEnd    - Returns true if the address is one past the end of the
+  ///                  object (i.e. if it is equal to base + extent)
+  ///                  May block until (address - base) bytes have been read
+  /// @param address - address of the byte, in the same space as getBase()
+  /// @result        - true if the address is equal to base + extent
+  virtual bool isObjectEnd(uint64_t address) const = 0;
+};
+
+/// StreamingMemoryObject - interface to data which is actually streamed from
+/// a DataStreamer. In addition to inherited members, it has the
+/// dropLeadingBytes and setKnownObjectSize methods which are not applicable
+/// to non-streamed objects.
+class StreamingMemoryObject : public StreamableMemoryObject {
+public:
+  StreamingMemoryObject(DataStreamer *streamer);
+  virtual uint64_t getBase() const { return 0; }
+  virtual uint64_t getExtent() const;
+  virtual int readByte(uint64_t address, uint8_t* ptr) const;
+  virtual int readBytes(uint64_t address,
+                        uint64_t size,
+                        uint8_t* buf,
+                        uint64_t* copied) const ;
+  virtual const uint8_t *getPointer(uint64_t address, uint64_t size) const {
+    // This could be fixed by ensuring the bytes are fetched and making a copy,
+    // requiring that the bitcode size be known, or otherwise ensuring that
+    // the memory doesn't go away/get reallocated, but it's
+    // not currently necessary. Users that need the pointer don't stream.
+    assert(0 && "getPointer in streaming memory objects not allowed");
+    return NULL;
+  }
+  virtual bool isValidAddress(uint64_t address) const;
+  virtual bool isObjectEnd(uint64_t address) const;
+
+  /// Drop s bytes from the front of the stream, pushing the positions of the
+  /// remaining bytes down by s. This is used to skip past the bitcode header,
+  /// since we don't know a priori if it's present, and we can't put bytes
+  /// back into the stream once we've read them.
+  bool dropLeadingBytes(size_t s);
+
+  /// If the data object size is known in advance, many of the operations can
+  /// be made more efficient, so this method should be called before reading
+  /// starts (although it can be called anytime).
+  void setKnownObjectSize(size_t size);
+
+private:
+  const static uint32_t kChunkSize = 4096 * 4;
+  mutable std::vector<unsigned char> Bytes;
+  OwningPtr<DataStreamer> Streamer;
+  mutable size_t BytesRead;   // Bytes read from stream
+  size_t BytesSkipped;// Bytes skipped at start of stream (e.g. wrapper/header)
+  mutable size_t ObjectSize; // 0 if unknown, set if wrapper seen or EOF reached
+  mutable bool EOFReached;
+
+  // Fetch enough bytes such that Pos can be read or EOF is reached
+  // (i.e. BytesRead > Pos). Return true if Pos can be read.
+  // Unlike most of the functions in BitcodeReader, returns true on success.
+  // Most of the requests will be small, but we fetch at kChunkSize bytes
+  // at a time to avoid making too many potentially expensive GetBytes calls
+  bool fetchToPos(size_t Pos) const {
+    if (EOFReached) return Pos < ObjectSize;
+    while (Pos >= BytesRead) {
+      Bytes.resize(BytesRead + BytesSkipped + kChunkSize);
+      size_t bytes = Streamer->GetBytes(&Bytes[BytesRead + BytesSkipped],
+                                        kChunkSize);
+      BytesRead += bytes;
+      if (bytes < kChunkSize) {
+        if (ObjectSize && BytesRead < Pos)
+          assert(0 && "Unexpected short read fetching bitcode");
+        if (BytesRead <= Pos) { // reached EOF/ran out of bytes
+          ObjectSize = BytesRead;
+          EOFReached = true;
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  StreamingMemoryObject(const StreamingMemoryObject&);  // DO NOT IMPLEMENT
+  void operator=(const StreamingMemoryObject&);  // DO NOT IMPLEMENT
+};
+
+StreamableMemoryObject *getNonStreamedMemoryObject(
+    const unsigned char *Start, const unsigned char *End);
+
+}
+#endif  // STREAMABLEMEMORYOBJECT_H_
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index 45f249d7ed0e..88081307ac6b 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -44,12 +44,14 @@ namespace llvm {
   class MCTargetAsmLexer;
   class MCTargetAsmParser;
   class TargetMachine;
+  class TargetOptions;
   class raw_ostream;
   class formatted_raw_ostream;
 
   MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
                                 bool isVerboseAsm,
                                 bool useLoc, bool useCFI,
+                                bool useDwarfDirectory,
                                 MCInstPrinter *InstPrint,
                                 MCCodeEmitter *CE,
                                 MCAsmBackend *TAB,
@@ -73,7 +75,8 @@ namespace llvm {
                                             StringRef TT);
     typedef MCCodeGenInfo *(*MCCodeGenInfoCtorFnTy)(StringRef TT,
                                                     Reloc::Model RM,
-                                                    CodeModel::Model CM);
+                                                    CodeModel::Model CM,
+                                                    CodeGenOpt::Level OL);
     typedef MCInstrInfo *(*MCInstrInfoCtorFnTy)(void);
     typedef MCInstrAnalysis *(*MCInstrAnalysisCtorFnTy)(const MCInstrInfo*Info);
     typedef MCRegisterInfo *(*MCRegInfoCtorFnTy)(StringRef TT);
@@ -84,8 +87,10 @@ namespace llvm {
                                                   StringRef TT,
                                                   StringRef CPU,
                                                   StringRef Features,
+                                                  const TargetOptions &Options,
                                                   Reloc::Model RM,
-                                                  CodeModel::Model CM);
+                                                  CodeModel::Model CM,
+                                                  CodeGenOpt::Level OL);
     typedef AsmPrinter *(*AsmPrinterCtorTy)(TargetMachine &TM,
                                             MCStreamer &Streamer);
     typedef MCAsmBackend *(*MCAsmBackendCtorTy)(const Target &T, StringRef TT);
@@ -99,6 +104,8 @@ namespace llvm {
     typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T,
                                                   unsigned SyntaxVariant,
                                                   const MCAsmInfo &MAI,
+                                                  const MCInstrInfo &MII,
+                                                  const MCRegisterInfo &MRI,
                                                   const MCSubtargetInfo &STI);
     typedef MCCodeEmitter *(*MCCodeEmitterCtorTy)(const MCInstrInfo &II,
                                                   const MCSubtargetInfo &STI,
@@ -116,6 +123,7 @@ namespace llvm {
                                              bool isVerboseAsm,
                                              bool useLoc,
                                              bool useCFI,
+                                             bool useDwarfDirectory,
                                              MCInstPrinter *InstPrint,
                                              MCCodeEmitter *CE,
                                              MCAsmBackend *TAB,
@@ -143,8 +151,8 @@ namespace llvm {
     /// registered.
     MCAsmInfoCtorFnTy MCAsmInfoCtorFn;
 
-    /// MCCodeGenInfoCtorFn - Constructor function for this target's MCCodeGenInfo,
-    /// if registered.
+    /// MCCodeGenInfoCtorFn - Constructor function for this target's
+    /// MCCodeGenInfo, if registered.
     MCCodeGenInfoCtorFnTy MCCodeGenInfoCtorFn;
 
     /// MCInstrInfoCtorFn - Constructor function for this target's MCInstrInfo,
@@ -275,10 +283,11 @@ namespace llvm {
     /// createMCCodeGenInfo - Create a MCCodeGenInfo implementation.
     ///
     MCCodeGenInfo *createMCCodeGenInfo(StringRef Triple, Reloc::Model RM,
-                                       CodeModel::Model CM) const {
+                                       CodeModel::Model CM,
+                                       CodeGenOpt::Level OL) const {
       if (!MCCodeGenInfoCtorFn)
         return 0;
-      return MCCodeGenInfoCtorFn(Triple, RM, CM);
+      return MCCodeGenInfoCtorFn(Triple, RM, CM, OL);
     }
 
     /// createMCInstrInfo - Create a MCInstrInfo implementation.
@@ -329,12 +338,14 @@ namespace llvm {
     /// either the target triple from the module, or the target triple of the
     /// host if that does not exist.
     TargetMachine *createTargetMachine(StringRef Triple, StringRef CPU,
-                               StringRef Features,
-                               Reloc::Model RM = Reloc::Default,
-                               CodeModel::Model CM = CodeModel::Default) const {
+                             StringRef Features, const TargetOptions &Options,
+                             Reloc::Model RM = Reloc::Default,
+                             CodeModel::Model CM = CodeModel::Default,
+                             CodeGenOpt::Level OL = CodeGenOpt::Default) const {
       if (!TargetMachineCtorFn)
         return 0;
-      return TargetMachineCtorFn(*this, Triple, CPU, Features, RM, CM);
+      return TargetMachineCtorFn(*this, Triple, CPU, Features, Options,
+                                 RM, CM, OL);
     }
 
     /// createMCAsmBackend - Create a target specific assembly parser.
@@ -383,10 +394,12 @@ namespace llvm {
 
     MCInstPrinter *createMCInstPrinter(unsigned SyntaxVariant,
                                        const MCAsmInfo &MAI,
+                                       const MCInstrInfo &MII,
+                                       const MCRegisterInfo &MRI,
                                        const MCSubtargetInfo &STI) const {
       if (!MCInstPrinterCtorFn)
         return 0;
-      return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI, STI);
+      return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI, MII, MRI, STI);
     }
 
 
@@ -426,13 +439,14 @@ namespace llvm {
                                   bool isVerboseAsm,
                                   bool useLoc,
                                   bool useCFI,
+                                  bool useDwarfDirectory,
                                   MCInstPrinter *InstPrint,
                                   MCCodeEmitter *CE,
                                   MCAsmBackend *TAB,
                                   bool ShowInst) const {
       // AsmStreamerCtorFn is default to llvm::createAsmStreamer
       return AsmStreamerCtorFn(Ctx, OS, isVerboseAsm, useLoc, useCFI,
-                               InstPrint, CE, TAB, ShowInst);
+                               useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
     }
 
     /// @}
@@ -776,7 +790,7 @@ namespace llvm {
   /// extern "C" void LLVMInitializeFooTargetInfo() {
   ///   RegisterTarget<Triple::foo> X(TheFooTarget, "foo", "Foo description");
   /// }
-  template<Triple::ArchType TargetArchType = Triple::InvalidArch,
+  template<Triple::ArchType TargetArchType = Triple::UnknownArch,
            bool HasJIT = false>
   struct RegisterTarget {
     RegisterTarget(Target &T, const char *Name, const char *Desc) {
@@ -840,8 +854,8 @@ namespace llvm {
       TargetRegistry::RegisterMCCodeGenInfo(T, &Allocator);
     }
   private:
-    static MCCodeGenInfo *Allocator(StringRef TT,
-                                    Reloc::Model RM, CodeModel::Model CM) {
+    static MCCodeGenInfo *Allocator(StringRef TT, Reloc::Model RM,
+                                    CodeModel::Model CM, CodeGenOpt::Level OL) {
       return new MCCodeGenInfoImpl();
     }
   };
@@ -1010,9 +1024,11 @@ namespace llvm {
   private:
     static TargetMachine *Allocator(const Target &T, StringRef TT,
                                     StringRef CPU, StringRef FS,
+                                    const TargetOptions &Options,
                                     Reloc::Model RM,
-                                    CodeModel::Model CM) {
-      return new TargetMachineImpl(T, TT, CPU, FS, RM, CM);
+                                    CodeModel::Model CM,
+                                    CodeGenOpt::Level OL) {
+      return new TargetMachineImpl(T, TT, CPU, FS, Options, RM, CM, OL);
     }
   };
 
diff --git a/include/llvm/Support/TargetSelect.h b/include/llvm/Support/TargetSelect.h
index 83ff68caaeac..a86e953f00ea 100644
--- a/include/llvm/Support/TargetSelect.h
+++ b/include/llvm/Support/TargetSelect.h
@@ -149,6 +149,18 @@ namespace llvm {
 #endif
   }  
 
+  /// InitializeNativeTargetDisassembler - The main program should call
+  /// this function to initialize the native target disassembler.
+  inline bool InitializeNativeTargetDisassembler() {
+  // If we have a native target, initialize the corresponding disassembler.
+#ifdef LLVM_NATIVE_DISASSEMBLER
+    LLVM_NATIVE_DISASSEMBLER();
+    return false;
+#else
+    return true;
+#endif
+  }  
+
 }
 
 #endif
diff --git a/include/llvm/Support/Valgrind.h b/include/llvm/Support/Valgrind.h
index 7662eaaff5a9..e14764703932 100644
--- a/include/llvm/Support/Valgrind.h
+++ b/include/llvm/Support/Valgrind.h
@@ -16,8 +16,23 @@
 #ifndef LLVM_SYSTEM_VALGRIND_H
 #define LLVM_SYSTEM_VALGRIND_H
 
+#include "llvm/Support/Compiler.h"
+#include "llvm/Config/llvm-config.h"
 #include <stddef.h>
 
+#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
+// tsan (Thread Sanitizer) is a valgrind-based tool that detects these exact
+// functions by name.
+extern "C" {
+LLVM_ATTRIBUTE_WEAK void AnnotateHappensAfter(const char *file, int line,
+                                              const volatile void *cv);
+LLVM_ATTRIBUTE_WEAK void AnnotateHappensBefore(const char *file, int line,
+                                               const volatile void *cv);
+LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesBegin(const char *file, int line);
+LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesEnd(const char *file, int line);
+}
+#endif
+
 namespace llvm {
 namespace sys {
   // True if Valgrind is controlling this process.
@@ -26,6 +41,34 @@ namespace sys {
   // Discard valgrind's translation of code in the range [Addr .. Addr + Len).
   // Otherwise valgrind may continue to execute the old version of the code.
   void ValgrindDiscardTranslations(const void *Addr, size_t Len);
+
+#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
+  // Thread Sanitizer is a valgrind tool that finds races in code.
+  // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
+
+  // This marker is used to define a happens-before arc. The race detector will
+  // infer an arc from the begin to the end when they share the same pointer
+  // argument.
+  #define TsanHappensBefore(cv) \
+    AnnotateHappensBefore(__FILE__, __LINE__, cv)
+
+  // This marker defines the destination of a happens-before arc.
+  #define TsanHappensAfter(cv) \
+    AnnotateHappensAfter(__FILE__, __LINE__, cv)
+
+  // Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
+  #define TsanIgnoreWritesBegin() \
+    AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
+
+  // Resume checking for racy writes.
+  #define TsanIgnoreWritesEnd() \
+    AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
+#else
+  #define TsanHappensBefore(cv)
+  #define TsanHappensAfter(cv)
+  #define TsanIgnoreWritesBegin()
+  #define TsanIgnoreWritesEnd()
+#endif
 }
 }
 
diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h
index c0cdc35e99bf..b7210b2063ea 100644
--- a/include/llvm/Support/ValueHandle.h
+++ b/include/llvm/Support/ValueHandle.h
@@ -49,52 +49,61 @@ protected:
     Tracking,
     Weak
   };
-private:
 
+private:
   PointerIntPair<ValueHandleBase**, 2, HandleBaseKind> PrevPair;
   ValueHandleBase *Next;
-  Value *VP;
+
+  // A subclass may want to store some information along with the value
+  // pointer. Allow them to do this by making the value pointer a pointer-int
+  // pair. The 'setValPtrInt' and 'getValPtrInt' methods below give them this
+  // access.
+  PointerIntPair<Value*, 2> VP;
   
   explicit ValueHandleBase(const ValueHandleBase&); // DO NOT IMPLEMENT.
 public:
   explicit ValueHandleBase(HandleBaseKind Kind)
-    : PrevPair(0, Kind), Next(0), VP(0) {}
+    : PrevPair(0, Kind), Next(0), VP(0, 0) {}
   ValueHandleBase(HandleBaseKind Kind, Value *V)
-    : PrevPair(0, Kind), Next(0), VP(V) {
-    if (isValid(VP))
+    : PrevPair(0, Kind), Next(0), VP(V, 0) {
+    if (isValid(VP.getPointer()))
       AddToUseList();
   }
   ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS)
     : PrevPair(0, Kind), Next(0), VP(RHS.VP) {
-    if (isValid(VP))
+    if (isValid(VP.getPointer()))
       AddToExistingUseList(RHS.getPrevPtr());
   }
   ~ValueHandleBase() {
-    if (isValid(VP))
+    if (isValid(VP.getPointer()))
       RemoveFromUseList();
   }
 
   Value *operator=(Value *RHS) {
-    if (VP == RHS) return RHS;
-    if (isValid(VP)) RemoveFromUseList();
-    VP = RHS;
-    if (isValid(VP)) AddToUseList();
+    if (VP.getPointer() == RHS) return RHS;
+    if (isValid(VP.getPointer())) RemoveFromUseList();
+    VP.setPointer(RHS);
+    if (isValid(VP.getPointer())) AddToUseList();
     return RHS;
   }
 
   Value *operator=(const ValueHandleBase &RHS) {
-    if (VP == RHS.VP) return RHS.VP;
-    if (isValid(VP)) RemoveFromUseList();
-    VP = RHS.VP;
-    if (isValid(VP)) AddToExistingUseList(RHS.getPrevPtr());
-    return VP;
+    if (VP.getPointer() == RHS.VP.getPointer()) return RHS.VP.getPointer();
+    if (isValid(VP.getPointer())) RemoveFromUseList();
+    VP.setPointer(RHS.VP.getPointer());
+    if (isValid(VP.getPointer())) AddToExistingUseList(RHS.getPrevPtr());
+    return VP.getPointer();
   }
 
   Value *operator->() const { return getValPtr(); }
   Value &operator*() const { return *getValPtr(); }
 
 protected:
-  Value *getValPtr() const { return VP; }
+  Value *getValPtr() const { return VP.getPointer(); }
+
+  void setValPtrInt(unsigned K) { VP.setInt(K); }
+  unsigned getValPtrInt() const { return VP.getInt(); }
+
   static bool isValid(Value *V) {
     return V &&
            V != DenseMapInfo<Value *>::getEmptyKey() &&
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
new file mode 100644
index 000000000000..b24cacd3c328
--- /dev/null
+++ b/include/llvm/Support/YAMLParser.h
@@ -0,0 +1,549 @@
+//===--- YAMLParser.h - Simple YAML parser --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This is a YAML 1.2 parser.
+//
+//  See http://www.yaml.org/spec/1.2/spec.html for the full standard.
+//
+//  This currently does not implement the following:
+//    * Multi-line literal folding.
+//    * Tag resolution.
+//    * UTF-16.
+//    * BOMs anywhere other than the first Unicode scalar value in the file.
+//
+//  The most important class here is Stream. This represents a YAML stream with
+//  0, 1, or many documents.
+//
+//  SourceMgr sm;
+//  StringRef input = getInput();
+//  yaml::Stream stream(input, sm);
+//
+//  for (yaml::document_iterator di = stream.begin(), de = stream.end();
+//       di != de; ++di) {
+//    yaml::Node *n = di->getRoot();
+//    if (n) {
+//      // Do something with n...
+//    } else
+//      break;
+//  }
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_YAML_PARSER_H
+#define LLVM_SUPPORT_YAML_PARSER_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/SMLoc.h"
+
+#include <limits>
+#include <utility>
+
+namespace llvm {
+class MemoryBuffer;
+class SourceMgr;
+class raw_ostream;
+class Twine;
+
+namespace yaml {
+
+class document_iterator;
+class Document;
+class Node;
+class Scanner;
+struct Token;
+
+/// @brief Dump all the tokens in this stream to OS.
+/// @returns true if there was an error, false otherwise.
+bool dumpTokens(StringRef Input, raw_ostream &);
+
+/// @brief Scans all tokens in input without outputting anything. This is used
+///        for benchmarking the tokenizer.
+/// @returns true if there was an error, false otherwise.
+bool scanTokens(StringRef Input);
+
+/// @brief Escape \a Input for a double quoted scalar.
+std::string escape(StringRef Input);
+
+/// @brief This class represents a YAML stream potentially containing multiple
+///        documents.
+class Stream {
+public:
+  Stream(StringRef Input, SourceMgr &);
+  ~Stream();
+
+  document_iterator begin();
+  document_iterator end();
+  void skip();
+  bool failed();
+  bool validate() {
+    skip();
+    return !failed();
+  }
+
+  void printError(Node *N, const Twine &Msg);
+
+private:
+  OwningPtr<Scanner> scanner;
+  OwningPtr<Document> CurrentDoc;
+
+  friend class Document;
+
+  /// @brief Validate a %YAML x.x directive.
+  void handleYAMLDirective(const Token &);
+};
+
+/// @brief Abstract base class for all Nodes.
+class Node {
+public:
+  enum NodeKind {
+    NK_Null,
+    NK_Scalar,
+    NK_KeyValue,
+    NK_Mapping,
+    NK_Sequence,
+    NK_Alias
+  };
+
+  Node(unsigned int Type, OwningPtr<Document>&, StringRef Anchor);
+
+  /// @brief Get the value of the anchor attached to this node. If it does not
+  ///        have one, getAnchor().size() will be 0.
+  StringRef getAnchor() const { return Anchor; }
+
+  SMRange getSourceRange() const { return SourceRange; }
+  void setSourceRange(SMRange SR) { SourceRange = SR; }
+
+  // These functions forward to Document and Scanner.
+  Token &peekNext();
+  Token getNext();
+  Node *parseBlockNode();
+  BumpPtrAllocator &getAllocator();
+  void setError(const Twine &Message, Token &Location) const;
+  bool failed() const;
+
+  virtual void skip() {};
+
+  unsigned int getType() const { return TypeID; }
+  static inline bool classof(const Node *) { return true; }
+
+  void *operator new ( size_t Size
+                     , BumpPtrAllocator &Alloc
+                     , size_t Alignment = 16) throw() {
+    return Alloc.Allocate(Size, Alignment);
+  }
+
+  void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() {
+    Alloc.Deallocate(Ptr);
+  }
+
+protected:
+  OwningPtr<Document> &Doc;
+  SMRange SourceRange;
+
+  void operator delete(void *) throw() {}
+
+  virtual ~Node() {}
+
+private:
+  unsigned int TypeID;
+  StringRef Anchor;
+};
+
+/// @brief A null value.
+///
+/// Example:
+///   !!null null
+class NullNode : public Node {
+public:
+  NullNode(OwningPtr<Document> &D) : Node(NK_Null, D, StringRef()) {}
+
+  static inline bool classof(const NullNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_Null;
+  }
+};
+
+/// @brief A scalar node is an opaque datum that can be presented as a
+///        series of zero or more Unicode scalar values.
+///
+/// Example:
+///   Adena
+class ScalarNode : public Node {
+public:
+  ScalarNode(OwningPtr<Document> &D, StringRef Anchor, StringRef Val)
+    : Node(NK_Scalar, D, Anchor)
+    , Value(Val) {
+    SMLoc Start = SMLoc::getFromPointer(Val.begin());
+    SMLoc End = SMLoc::getFromPointer(Val.end() - 1);
+    SourceRange = SMRange(Start, End);
+  }
+
+  // Return Value without any escaping or folding or other fun YAML stuff. This
+  // is the exact bytes that are contained in the file (after conversion to
+  // utf8).
+  StringRef getRawValue() const { return Value; }
+
+  /// @brief Gets the value of this node as a StringRef.
+  ///
+  /// @param Storage is used to store the content of the returned StringRef iff
+  ///        it requires any modification from how it appeared in the source.
+  ///        This happens with escaped characters and multi-line literals.
+  StringRef getValue(SmallVectorImpl<char> &Storage) const;
+
+  static inline bool classof(const ScalarNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_Scalar;
+  }
+
+private:
+  StringRef Value;
+
+  StringRef unescapeDoubleQuoted( StringRef UnquotedValue
+                                , StringRef::size_type Start
+                                , SmallVectorImpl<char> &Storage) const;
+};
+
+/// @brief A key and value pair. While not technically a Node under the YAML
+///        representation graph, it is easier to treat them this way.
+///
+/// TODO: Consider making this not a child of Node.
+///
+/// Example:
+///   Section: .text
+class KeyValueNode : public Node {
+public:
+  KeyValueNode(OwningPtr<Document> &D)
+    : Node(NK_KeyValue, D, StringRef())
+    , Key(0)
+    , Value(0)
+  {}
+
+  /// @brief Parse and return the key.
+  ///
+  /// This may be called multiple times.
+  ///
+  /// @returns The key, or nullptr if failed() == true.
+  Node *getKey();
+
+  /// @brief Parse and return the value.
+  ///
+  /// This may be called multiple times.
+  ///
+  /// @returns The value, or nullptr if failed() == true.
+  Node *getValue();
+
+  virtual void skip() {
+    getKey()->skip();
+    getValue()->skip();
+  }
+
+  static inline bool classof(const KeyValueNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_KeyValue;
+  }
+
+private:
+  Node *Key;
+  Node *Value;
+};
+
+/// @brief This is an iterator abstraction over YAML collections shared by both
+///        sequences and maps.
+///
+/// BaseT must have a ValueT* member named CurrentEntry and a member function
+/// increment() which must set CurrentEntry to 0 to create an end iterator.
+template <class BaseT, class ValueT>
+class basic_collection_iterator
+  : public std::iterator<std::forward_iterator_tag, ValueT> {
+public:
+  basic_collection_iterator() : Base(0) {}
+  basic_collection_iterator(BaseT *B) : Base(B) {}
+
+  ValueT *operator ->() const {
+    assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
+    return Base->CurrentEntry;
+  }
+
+  ValueT &operator *() const {
+    assert(Base && Base->CurrentEntry &&
+           "Attempted to dereference end iterator!");
+    return *Base->CurrentEntry;
+  }
+
+  operator ValueT*() const {
+    assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
+    return Base->CurrentEntry;
+  }
+
+  bool operator !=(const basic_collection_iterator &Other) const {
+    if(Base != Other.Base)
+      return true;
+    return (Base && Other.Base) && Base->CurrentEntry
+                                   != Other.Base->CurrentEntry;
+  }
+
+  basic_collection_iterator &operator++() {
+    assert(Base && "Attempted to advance iterator past end!");
+    Base->increment();
+    // Create an end iterator.
+    if (Base->CurrentEntry == 0)
+      Base = 0;
+    return *this;
+  }
+
+private:
+  BaseT *Base;
+};
+
+// The following two templates are used for both MappingNode and Sequence Node.
+template <class CollectionType>
+typename CollectionType::iterator begin(CollectionType &C) {
+  assert(C.IsAtBeginning && "You may only iterate over a collection once!");
+  C.IsAtBeginning = false;
+  typename CollectionType::iterator ret(&C);
+  ++ret;
+  return ret;
+}
+
+template <class CollectionType>
+void skip(CollectionType &C) {
+  // TODO: support skipping from the middle of a parsed collection ;/
+  assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
+  if (C.IsAtBeginning)
+    for (typename CollectionType::iterator i = begin(C), e = C.end();
+                                           i != e; ++i)
+      i->skip();
+}
+
+/// @brief Represents a YAML map created from either a block map for a flow map.
+///
+/// This parses the YAML stream as increment() is called.
+///
+/// Example:
+///   Name: _main
+///   Scope: Global
+class MappingNode : public Node {
+public:
+  enum MappingType {
+    MT_Block,
+    MT_Flow,
+    MT_Inline //< An inline mapping node is used for "[key: value]".
+  };
+
+  MappingNode(OwningPtr<Document> &D, StringRef Anchor, MappingType MT)
+    : Node(NK_Mapping, D, Anchor)
+    , Type(MT)
+    , IsAtBeginning(true)
+    , IsAtEnd(false)
+    , CurrentEntry(0)
+  {}
+
+  friend class basic_collection_iterator<MappingNode, KeyValueNode>;
+  typedef basic_collection_iterator<MappingNode, KeyValueNode> iterator;
+  template <class T> friend typename T::iterator yaml::begin(T &);
+  template <class T> friend void yaml::skip(T &);
+
+  iterator begin() {
+    return yaml::begin(*this);
+  }
+
+  iterator end() { return iterator(); }
+
+  virtual void skip() {
+    yaml::skip(*this);
+  }
+
+  static inline bool classof(const MappingNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_Mapping;
+  }
+
+private:
+  MappingType Type;
+  bool IsAtBeginning;
+  bool IsAtEnd;
+  KeyValueNode *CurrentEntry;
+
+  void increment();
+};
+
+/// @brief Represents a YAML sequence created from either a block sequence for a
+///        flow sequence.
+///
+/// This parses the YAML stream as increment() is called.
+///
+/// Example:
+///   - Hello
+///   - World
+class SequenceNode : public Node {
+public:
+  enum SequenceType {
+    ST_Block,
+    ST_Flow,
+    // Use for:
+    //
+    // key:
+    // - val1
+    // - val2
+    //
+    // As a BlockMappingEntry and BlockEnd are not created in this case.
+    ST_Indentless
+  };
+
+  SequenceNode(OwningPtr<Document> &D, StringRef Anchor, SequenceType ST)
+    : Node(NK_Sequence, D, Anchor)
+    , SeqType(ST)
+    , IsAtBeginning(true)
+    , IsAtEnd(false)
+    , WasPreviousTokenFlowEntry(true) // Start with an imaginary ','.
+    , CurrentEntry(0)
+  {}
+
+  friend class basic_collection_iterator<SequenceNode, Node>;
+  typedef basic_collection_iterator<SequenceNode, Node> iterator;
+  template <class T> friend typename T::iterator yaml::begin(T &);
+  template <class T> friend void yaml::skip(T &);
+
+  void increment();
+
+  iterator begin() {
+    return yaml::begin(*this);
+  }
+
+  iterator end() { return iterator(); }
+
+  virtual void skip() {
+    yaml::skip(*this);
+  }
+
+  static inline bool classof(const SequenceNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_Sequence;
+  }
+
+private:
+  SequenceType SeqType;
+  bool IsAtBeginning;
+  bool IsAtEnd;
+  bool WasPreviousTokenFlowEntry;
+  Node *CurrentEntry;
+};
+
+/// @brief Represents an alias to a Node with an anchor.
+///
+/// Example:
+///   *AnchorName
+class AliasNode : public Node {
+public:
+  AliasNode(OwningPtr<Document> &D, StringRef Val)
+    : Node(NK_Alias, D, StringRef()), Name(Val) {}
+
+  StringRef getName() const { return Name; }
+  Node *getTarget();
+
+  static inline bool classof(const ScalarNode *) { return true; }
+  static inline bool classof(const Node *N) {
+    return N->getType() == NK_Alias;
+  }
+
+private:
+  StringRef Name;
+};
+
+/// @brief A YAML Stream is a sequence of Documents. A document contains a root
+///        node.
+class Document {
+public:
+  /// @brief Root for parsing a node. Returns a single node.
+  Node *parseBlockNode();
+
+  Document(Stream &ParentStream);
+
+  /// @brief Finish parsing the current document and return true if there are
+  ///        more. Return false otherwise.
+  bool skip();
+
+  /// @brief Parse and return the root level node.
+  Node *getRoot() {
+    if (Root)
+      return Root;
+    return Root = parseBlockNode();
+  }
+
+private:
+  friend class Node;
+  friend class document_iterator;
+
+  /// @brief Stream to read tokens from.
+  Stream &stream;
+
+  /// @brief Used to allocate nodes to. All are destroyed without calling their
+  ///        destructor when the document is destroyed.
+  BumpPtrAllocator NodeAllocator;
+
+  /// @brief The root node. Used to support skipping a partially parsed
+  ///        document.
+  Node *Root;
+
+  Token &peekNext();
+  Token getNext();
+  void setError(const Twine &Message, Token &Location) const;
+  bool failed() const;
+
+  void handleTagDirective(const Token &Tag) {
+    // TODO: Track tags.
+  }
+
+  /// @brief Parse %BLAH directives and return true if any were encountered.
+  bool parseDirectives();
+
+  /// @brief Consume the next token and error if it is not \a TK.
+  bool expectToken(int TK);
+};
+
+/// @brief Iterator abstraction for Documents over a Stream.
+class document_iterator {
+public:
+  document_iterator() : Doc(NullDoc) {}
+  document_iterator(OwningPtr<Document> &D) : Doc(D) {}
+
+  bool operator !=(const document_iterator &Other) {
+    return Doc != Other.Doc;
+  }
+
+  document_iterator operator ++() {
+    if (!Doc->skip()) {
+      Doc.reset(0);
+    } else {
+      Stream &S = Doc->stream;
+      Doc.reset(new Document(S));
+    }
+    return *this;
+  }
+
+  Document &operator *() {
+    return *Doc;
+  }
+
+  OwningPtr<Document> &operator ->() {
+    return Doc;
+  }
+
+private:
+  static OwningPtr<Document> NullDoc;
+  OwningPtr<Document> &Doc;
+};
+
+}
+}
+
+#endif
diff --git a/include/llvm/Support/system_error.h b/include/llvm/Support/system_error.h
index 2c15b69c9420..af812069b9fe 100644
--- a/include/llvm/Support/system_error.h
+++ b/include/llvm/Support/system_error.h
@@ -470,17 +470,6 @@ template <> struct hash<std::error_code>;
 
 namespace llvm {
 
-template <class T, T v>
-struct integral_constant {
-  typedef T value_type;
-  static const value_type value = v;
-  typedef integral_constant<T,v> type;
-  operator value_type() { return value; }
-};
-
-typedef integral_constant<bool, true> true_type;
-typedef integral_constant<bool, false> false_type;
-
 // is_error_code_enum
 
 template <class Tp> struct is_error_code_enum : public false_type {};
@@ -738,6 +727,10 @@ class error_code {
 public:
   error_code() : _val_(0), _cat_(&system_category()) {}
 
+  static error_code success() {
+    return error_code();
+  }
+
   error_code(int _val, const error_category& _cat)
     : _val_(_val), _cat_(&_cat) {}
 
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index 515295bdd66f..a3a551f851f3 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -17,6 +17,8 @@
 #ifndef LLVM_SUPPORT_TYPE_TRAITS_H
 #define LLVM_SUPPORT_TYPE_TRAITS_H
 
+#include "llvm/Support/DataTypes.h"
+#include <cstddef>
 #include <utility>
 
 // This is actually the conforming implementation which works with abstract
@@ -64,22 +66,99 @@ struct isPodLike {
 // std::pair's are pod-like if their elements are.
 template<typename T, typename U>
 struct isPodLike<std::pair<T, U> > {
-  static const bool value = isPodLike<T>::value & isPodLike<U>::value;
+  static const bool value = isPodLike<T>::value && isPodLike<U>::value;
 };
   
 
+template <class T, T v>
+struct integral_constant {
+  typedef T value_type;
+  static const value_type value = v;
+  typedef integral_constant<T,v> type;
+  operator value_type() { return value; }
+};
+
+typedef integral_constant<bool, true> true_type;
+typedef integral_constant<bool, false> false_type;
+
 /// \brief Metafunction that determines whether the two given types are 
 /// equivalent.
-template<typename T, typename U>
-struct is_same {
-  static const bool value = false;
+template<typename T, typename U> struct is_same       : public false_type {};
+template<typename T>             struct is_same<T, T> : public true_type {};
+
+/// \brief Metafunction that removes const qualification from a type.
+template <typename T> struct remove_const          { typedef T type; };
+template <typename T> struct remove_const<const T> { typedef T type; };
+
+/// \brief Metafunction that removes volatile qualification from a type.
+template <typename T> struct remove_volatile             { typedef T type; };
+template <typename T> struct remove_volatile<volatile T> { typedef T type; };
+
+/// \brief Metafunction that removes both const and volatile qualification from
+/// a type.
+template <typename T> struct remove_cv {
+  typedef typename remove_const<typename remove_volatile<T>::type>::type type;
 };
 
-template<typename T>
-struct is_same<T, T> {
-  static const bool value = true;
+/// \brief Helper to implement is_integral metafunction.
+template <typename T> struct is_integral_impl           : false_type {};
+template <> struct is_integral_impl<         bool>      : true_type {};
+template <> struct is_integral_impl<         char>      : true_type {};
+template <> struct is_integral_impl<  signed char>      : true_type {};
+template <> struct is_integral_impl<unsigned char>      : true_type {};
+template <> struct is_integral_impl<         wchar_t>   : true_type {};
+template <> struct is_integral_impl<         short>     : true_type {};
+template <> struct is_integral_impl<unsigned short>     : true_type {};
+template <> struct is_integral_impl<         int>       : true_type {};
+template <> struct is_integral_impl<unsigned int>       : true_type {};
+template <> struct is_integral_impl<         long>      : true_type {};
+template <> struct is_integral_impl<unsigned long>      : true_type {};
+template <> struct is_integral_impl<         long long> : true_type {};
+template <> struct is_integral_impl<unsigned long long> : true_type {};
+
+/// \brief Metafunction that determines whether the given type is an integral
+/// type.
+template <typename T>
+struct is_integral : is_integral_impl<T> {};
+
+/// \brief Metafunction to remove reference from a type.
+template <typename T> struct remove_reference { typedef T type; };
+template <typename T> struct remove_reference<T&> { typedef T type; };
+
+/// \brief Metafunction that determines whether the given type is a pointer
+/// type.
+template <typename T> struct is_pointer : false_type {};
+template <typename T> struct is_pointer<T*> : true_type {};
+template <typename T> struct is_pointer<T* const> : true_type {};
+template <typename T> struct is_pointer<T* volatile> : true_type {};
+template <typename T> struct is_pointer<T* const volatile> : true_type {};
+
+/// \brief Metafunction that determines whether the given type is either an
+/// integral type or an enumeration type.
+///
+/// Note that this accepts potentially more integral types than we whitelist
+/// above for is_integral because it is based on merely being convertible
+/// implicitly to an integral type.
+template <typename T> class is_integral_or_enum {
+  // Provide an overload which can be called with anything implicitly
+  // convertible to an unsigned long long. This should catch integer types and
+  // enumeration types at least. We blacklist classes with conversion operators
+  // below.
+  static double check_int_convertible(unsigned long long);
+  static char check_int_convertible(...);
+
+  typedef typename remove_reference<T>::type UnderlyingT;
+  static UnderlyingT &nonce_instance;
+
+public:
+  enum {
+    value = (!is_class<UnderlyingT>::value && !is_pointer<UnderlyingT>::value &&
+             !is_same<UnderlyingT, float>::value &&
+             !is_same<UnderlyingT, double>::value &&
+             sizeof(char) != sizeof(check_int_convertible(nonce_instance)))
+  };
 };
-  
+
 // enable_if_c - Enable/disable a template based on a metafunction
 template<bool Cond, typename T = void>
 struct enable_if_c {
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index afce76099867..5e68c10a47ac 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -20,6 +20,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 
@@ -32,7 +33,6 @@ class BitsRecTy;
 class IntRecTy;
 class StringRecTy;
 class ListRecTy;
-class CodeRecTy;
 class DagRecTy;
 class RecordRecTy;
 
@@ -43,7 +43,6 @@ class BitInit;
 class BitsInit;
 class IntInit;
 class StringInit;
-class CodeInit;
 class ListInit;
 class UnOpInit;
 class BinOpInit;
@@ -68,6 +67,7 @@ class RecordKeeper;
 
 class RecTy {
   ListRecTy *ListTy;
+  virtual void anchor();
 public:
   RecTy() : ListTy(0) {}
   virtual ~RecTy() {}
@@ -99,7 +99,6 @@ public:   // These methods should only be called from subclasses of Init
   virtual Init *convertValue( TernOpInit *UI) {
     return convertValue((TypedInit*)UI);
   }
-  virtual Init *convertValue(  CodeInit *CI) { return 0; }
   virtual Init *convertValue(VarBitInit *VB) { return 0; }
   virtual Init *convertValue(   DefInit *DI) { return 0; }
   virtual Init *convertValue(   DagInit *DI) { return 0; }
@@ -119,7 +118,6 @@ public:   // These methods should only be called by subclasses of RecTy.
   virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
   virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
   virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
 };
@@ -144,7 +142,6 @@ public:
   virtual Init *convertValue(   IntInit *II);
   virtual Init *convertValue(StringInit *SI) { return 0; }
   virtual Init *convertValue(  ListInit *LI) { return 0; }
-  virtual Init *convertValue(  CodeInit *CI) { return 0; }
   virtual Init *convertValue(VarBitInit *VB) { return (Init*)VB; }
   virtual Init *convertValue(   DefInit *DI) { return 0; }
   virtual Init *convertValue(   DagInit *DI) { return 0; }
@@ -165,7 +162,6 @@ public:
   virtual bool baseClassOf(const IntRecTy    *RHS) const { return true; }
   virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
   virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
   virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
 
@@ -189,7 +185,6 @@ public:
   virtual Init *convertValue(   IntInit *II);
   virtual Init *convertValue(StringInit *SI) { return 0; }
   virtual Init *convertValue(  ListInit *LI) { return 0; }
-  virtual Init *convertValue(  CodeInit *CI) { return 0; }
   virtual Init *convertValue(VarBitInit *VB) { return 0; }
   virtual Init *convertValue(   DefInit *DI) { return 0; }
   virtual Init *convertValue(   DagInit *DI) { return 0; }
@@ -212,7 +207,6 @@ public:
   virtual bool baseClassOf(const IntRecTy    *RHS) const { return true; }
   virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
   virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
   virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
 
@@ -233,7 +227,6 @@ public:
   virtual Init *convertValue(   IntInit *II) { return (Init*)II; }
   virtual Init *convertValue(StringInit *SI) { return 0; }
   virtual Init *convertValue(  ListInit *LI) { return 0; }
-  virtual Init *convertValue(  CodeInit *CI) { return 0; }
   virtual Init *convertValue(VarBitInit *VB) { return 0; }
   virtual Init *convertValue(   DefInit *DI) { return 0; }
   virtual Init *convertValue(   DagInit *DI) { return 0; }
@@ -255,7 +248,6 @@ public:
   virtual bool baseClassOf(const IntRecTy    *RHS) const { return true; }
   virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
   virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
   virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
 
@@ -279,7 +271,6 @@ public:
   virtual Init *convertValue( BinOpInit *BO);
   virtual Init *convertValue( TernOpInit *BO) { return RecTy::convertValue(BO);}
 
-  virtual Init *convertValue(  CodeInit *CI) { return 0; }
   virtual Init *convertValue(VarBitInit *VB) { return 0; }
   virtual Init *convertValue(   DefInit *DI) { return 0; }
   virtual Init *convertValue(   DagInit *DI) { return 0; }
@@ -298,7 +289,6 @@ public:
   virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const StringRecTy *RHS) const { return true; }
   virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
   virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
 };
@@ -322,7 +312,6 @@ public:
   virtual Init *convertValue(   IntInit *II) { return 0; }
   virtual Init *convertValue(StringInit *SI) { return 0; }
   virtual Init *convertValue(  ListInit *LI);
-  virtual Init *convertValue(  CodeInit *CI) { return 0; }
   virtual Init *convertValue(VarBitInit *VB) { return 0; }
   virtual Init *convertValue(   DefInit *DI) { return 0; }
   virtual Init *convertValue(   DagInit *DI) { return 0; }
@@ -346,47 +335,6 @@ public:
   virtual bool baseClassOf(const ListRecTy   *RHS) const {
     return RHS->getElementType()->typeIsConvertibleTo(Ty);
   }
-  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
-};
-
-/// CodeRecTy - 'code' - Represent an code fragment, function or method.
-///
-class CodeRecTy : public RecTy {
-  static CodeRecTy Shared;
-  CodeRecTy() {}
-public:
-  static CodeRecTy *get() { return &Shared; }
-
-  virtual Init *convertValue( UnsetInit *UI) { return (Init*)UI; }
-  virtual Init *convertValue(   BitInit *BI) { return 0; }
-  virtual Init *convertValue(  BitsInit *BI) { return 0; }
-  virtual Init *convertValue(   IntInit *II) { return 0; }
-  virtual Init *convertValue(StringInit *SI) { return 0; }
-  virtual Init *convertValue(  ListInit *LI) { return 0; }
-  virtual Init *convertValue(  CodeInit *CI) { return (Init*)CI; }
-  virtual Init *convertValue(VarBitInit *VB) { return 0; }
-  virtual Init *convertValue(   DefInit *DI) { return 0; }
-  virtual Init *convertValue(   DagInit *DI) { return 0; }
-  virtual Init *convertValue( UnOpInit *UI) { return RecTy::convertValue(UI);}
-  virtual Init *convertValue( BinOpInit *UI) { return RecTy::convertValue(UI);}
-  virtual Init *convertValue( TernOpInit *UI) { return RecTy::convertValue(UI);}
-  virtual Init *convertValue( TypedInit *TI);
-  virtual Init *convertValue(   VarInit *VI) { return RecTy::convertValue(VI);}
-  virtual Init *convertValue( FieldInit *FI) { return RecTy::convertValue(FI);}
-
-  std::string getAsString() const { return "code"; }
-
-  bool typeIsConvertibleTo(const RecTy *RHS) const {
-    return RHS->baseClassOf(this);
-  }
-  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
-  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
-  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return true; }
   virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
 };
@@ -405,7 +353,6 @@ public:
   virtual Init *convertValue(   IntInit *II) { return 0; }
   virtual Init *convertValue(StringInit *SI) { return 0; }
   virtual Init *convertValue(  ListInit *LI) { return 0; }
-  virtual Init *convertValue(  CodeInit *CI) { return 0; }
   virtual Init *convertValue(VarBitInit *VB) { return 0; }
   virtual Init *convertValue(   DefInit *DI) { return 0; }
   virtual Init *convertValue( UnOpInit *BO);
@@ -427,7 +374,6 @@ public:
   virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
   virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
   virtual bool baseClassOf(const DagRecTy    *RHS) const { return true; }
   virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
 };
@@ -451,7 +397,6 @@ public:
   virtual Init *convertValue(   IntInit *II) { return 0; }
   virtual Init *convertValue(StringInit *SI) { return 0; }
   virtual Init *convertValue(  ListInit *LI) { return 0; }
-  virtual Init *convertValue(  CodeInit *CI) { return 0; }
   virtual Init *convertValue(VarBitInit *VB) { return 0; }
   virtual Init *convertValue( UnOpInit *UI) { return RecTy::convertValue(UI);}
   virtual Init *convertValue( BinOpInit *UI) { return RecTy::convertValue(UI);}
@@ -472,7 +417,6 @@ public:
   virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
   virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
-  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
   virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
   virtual bool baseClassOf(const RecordRecTy *RHS) const;
 };
@@ -489,6 +433,7 @@ RecTy *resolveTypes(RecTy *T1, RecTy *T2);
 class Init {
   Init(const Init &);  // Do not define.
   Init &operator=(const Init &);  // Do not define.
+  virtual void anchor();
 
 protected:
   Init(void) {}
@@ -617,6 +562,7 @@ class UnsetInit : public Init {
   UnsetInit() : Init() {}
   UnsetInit(const UnsetInit &);  // Do not define.
   UnsetInit &operator=(const UnsetInit &Other);  // Do not define.
+  virtual void anchor();
 
 public:
   static UnsetInit *get();
@@ -638,6 +584,7 @@ class BitInit : public Init {
   explicit BitInit(bool V) : Value(V) {}
   BitInit(const BitInit &Other);  // Do not define.
   BitInit &operator=(BitInit &Other);  // Do not define.
+  virtual void anchor();
 
 public:
   static BitInit *get(bool V);
@@ -725,8 +672,7 @@ public:
   ///
   virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
                                     unsigned Bit) const {
-    assert(0 && "Illegal bit reference off int");
-    return 0;
+    llvm_unreachable("Illegal bit reference off int");
   }
 
   /// resolveListElementReference - This method is used to implement
@@ -734,8 +680,7 @@ public:
   /// now, we return the resolved value, otherwise we return null.
   virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
                                             unsigned Elt) const {
-    assert(0 && "Illegal element reference off int");
-    return 0;
+    llvm_unreachable("Illegal element reference off int");
   }
 };
 
@@ -750,9 +695,10 @@ class StringInit : public TypedInit {
 
   StringInit(const StringInit &Other);  // Do not define.
   StringInit &operator=(const StringInit &Other);  // Do not define.
+  virtual void anchor();
 
 public:
-  static StringInit *get(const std::string &V);
+  static StringInit *get(StringRef);
 
   const std::string &getValue() const { return Value; }
 
@@ -769,8 +715,7 @@ public:
   ///
   virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
                                     unsigned Bit) const {
-    assert(0 && "Illegal bit reference off string");
-    return 0;
+    llvm_unreachable("Illegal bit reference off string");
   }
 
   /// resolveListElementReference - This method is used to implement
@@ -778,31 +723,8 @@ public:
   /// now, we return the resolved value, otherwise we return null.
   virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
                                             unsigned Elt) const {
-    assert(0 && "Illegal element reference off string");
-    return 0;
-  }
-};
-
-/// CodeInit - "[{...}]" - Represent a code fragment.
-///
-class CodeInit : public Init {
-  std::string Value;
-
-  explicit CodeInit(const std::string &V) : Value(V) {}
-
-  CodeInit(const CodeInit &Other);  // Do not define.
-  CodeInit &operator=(const CodeInit &Other);  // Do not define.
-
-public:
-  static CodeInit *get(const std::string &V);
-
-  const std::string &getValue() const { return Value; }
-
-  virtual Init *convertInitializerTo(RecTy *Ty) const {
-    return Ty->convertValue(const_cast<CodeInit *>(this));
+    llvm_unreachable("Illegal element reference off string");
   }
-
-  virtual std::string getAsString() const { return "[{" + Value + "}]"; }
 };
 
 /// ListInit - [AL, AH, CL] - Represent a list of defs
@@ -861,8 +783,7 @@ public:
   ///
   virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
                                     unsigned Bit) const {
-    assert(0 && "Illegal bit reference off list");
-    return 0;
+    llvm_unreachable("Illegal bit reference off list");
   }
 
   /// resolveListElementReference - This method is used to implement
@@ -1058,9 +979,11 @@ public:
 /// VarInit - 'Opcode' - Represent a reference to an entire variable object.
 ///
 class VarInit : public TypedInit {
-  std::string VarName;
+  Init *VarName;
 
   explicit VarInit(const std::string &VN, RecTy *T)
+      : TypedInit(T), VarName(StringInit::get(VN)) {}
+  explicit VarInit(Init *VN, RecTy *T)
       : TypedInit(T), VarName(VN) {}
 
   VarInit(const VarInit &Other);  // Do not define.
@@ -1074,7 +997,11 @@ public:
     return Ty->convertValue(const_cast<VarInit *>(this));
   }
 
-  const std::string &getName() const { return VarName; }
+  const std::string &getName() const;
+  Init *getNameInit() const { return VarName; }
+  std::string getNameInitAsString() const {
+    return getNameInit()->getAsUnquotedString();
+  }
 
   virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
                                     unsigned Bit) const;
@@ -1092,7 +1019,7 @@ public:
   ///
   virtual Init *resolveReferences(Record &R, const RecordVal *RV) const;
 
-  virtual std::string getAsString() const { return VarName; }
+  virtual std::string getAsString() const { return getName(); }
 };
 
 
@@ -1201,8 +1128,7 @@ public:
   ///
   virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
                                     unsigned Bit) const {
-    assert(0 && "Illegal bit reference off def");
-    return 0;
+    llvm_unreachable("Illegal bit reference off def");
   }
 
   /// resolveListElementReference - This method is used to implement
@@ -1210,8 +1136,7 @@ public:
   /// now, we return the resolved value, otherwise we return null.
   virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
                                             unsigned Elt) const {
-    assert(0 && "Illegal element reference off def");
-    return 0;
+    llvm_unreachable("Illegal element reference off def");
   }
 };
 
@@ -1320,14 +1245,12 @@ public:
 
   virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
                                     unsigned Bit) const {
-    assert(0 && "Illegal bit reference off dag");
-    return 0;
+    llvm_unreachable("Illegal bit reference off dag");
   }
 
   virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
                                             unsigned Elt) const {
-    assert(0 && "Illegal element reference off dag");
-    return 0;
+    llvm_unreachable("Illegal element reference off dag");
   }
 };
 
@@ -1345,6 +1268,10 @@ public:
   RecordVal(const std::string &N, RecTy *T, unsigned P);
 
   const std::string &getName() const;
+  const Init *getNameInit() const { return Name; }
+  std::string getNameInitAsString() const {
+    return getNameInit()->getAsUnquotedString();
+  }
 
   unsigned getPrefix() const { return Prefix; }
   RecTy *getType() const { return Ty; }
@@ -1375,7 +1302,7 @@ class Record {
   unsigned ID;
   Init *Name;
   SMLoc Loc;
-  std::vector<std::string> TemplateArgs;
+  std::vector<Init *> TemplateArgs;
   std::vector<RecordVal> Values;
   std::vector<Record*> SuperClasses;
 
@@ -1384,13 +1311,21 @@ class Record {
 
   DefInit *TheInit;
 
+  void init();
   void checkName();
 
 public:
 
   // Constructs a record.
   explicit Record(const std::string &N, SMLoc loc, RecordKeeper &records) :
-    ID(LastID++), Name(StringInit::get(N)), Loc(loc), TrackedRecords(records), TheInit(0) {}
+    ID(LastID++), Name(StringInit::get(N)), Loc(loc), TrackedRecords(records),
+      TheInit(0) {
+    init();
+  }
+  explicit Record(Init *N, SMLoc loc, RecordKeeper &records) :
+    ID(LastID++), Name(N), Loc(loc), TrackedRecords(records), TheInit(0) {
+    init();
+  }
   ~Record() {}
 
 
@@ -1400,6 +1335,13 @@ public:
   unsigned getID() const { return ID; }
 
   const std::string &getName() const;
+  Init *getNameInit() const {
+    return Name;
+  }
+  const std::string getNameInitAsString() const {
+    return getNameInit()->getAsUnquotedString();
+  }
+
   void setName(Init *Name);               // Also updates RecordKeeper.
   void setName(const std::string &Name);  // Also updates RecordKeeper.
 
@@ -1408,46 +1350,69 @@ public:
   /// get the corresponding DefInit.
   DefInit *getDefInit();
 
-  const std::vector<std::string> &getTemplateArgs() const {
+  const std::vector<Init *> &getTemplateArgs() const {
     return TemplateArgs;
   }
   const std::vector<RecordVal> &getValues() const { return Values; }
   const std::vector<Record*>   &getSuperClasses() const { return SuperClasses; }
 
-  bool isTemplateArg(StringRef Name) const {
+  bool isTemplateArg(Init *Name) const {
     for (unsigned i = 0, e = TemplateArgs.size(); i != e; ++i)
       if (TemplateArgs[i] == Name) return true;
     return false;
   }
+  bool isTemplateArg(StringRef Name) const {
+    return isTemplateArg(StringInit::get(Name.str()));
+  }
 
-  const RecordVal *getValue(StringRef Name) const {
+  const RecordVal *getValue(const Init *Name) const {
     for (unsigned i = 0, e = Values.size(); i != e; ++i)
-      if (Values[i].getName() == Name) return &Values[i];
+      if (Values[i].getNameInit() == Name) return &Values[i];
     return 0;
   }
-  RecordVal *getValue(StringRef Name) {
+  const RecordVal *getValue(StringRef Name) const {
+    return getValue(StringInit::get(Name));
+  }
+  RecordVal *getValue(const Init *Name) {
     for (unsigned i = 0, e = Values.size(); i != e; ++i)
-      if (Values[i].getName() == Name) return &Values[i];
+      if (Values[i].getNameInit() == Name) return &Values[i];
     return 0;
   }
+  RecordVal *getValue(StringRef Name) {
+    return getValue(StringInit::get(Name));
+  }
 
-  void addTemplateArg(StringRef Name) {
+  void addTemplateArg(Init *Name) {
     assert(!isTemplateArg(Name) && "Template arg already defined!");
     TemplateArgs.push_back(Name);
   }
+  void addTemplateArg(StringRef Name) {
+    addTemplateArg(StringInit::get(Name.str()));
+  }
 
   void addValue(const RecordVal &RV) {
-    assert(getValue(RV.getName()) == 0 && "Value already added!");
+    assert(getValue(RV.getNameInit()) == 0 && "Value already added!");
     Values.push_back(RV);
+    if (Values.size() > 1)
+      // Keep NAME at the end of the list.  It makes record dumps a
+      // bit prettier and allows TableGen tests to be written more
+      // naturally.  Tests can use CHECK-NEXT to look for Record
+      // fields they expect to see after a def.  They can't do that if
+      // NAME is the first Record field.
+      std::swap(Values[Values.size() - 2], Values[Values.size() - 1]);
   }
 
-  void removeValue(StringRef Name) {
+  void removeValue(Init *Name) {
     for (unsigned i = 0, e = Values.size(); i != e; ++i)
-      if (Values[i].getName() == Name) {
+      if (Values[i].getNameInit() == Name) {
         Values.erase(Values.begin()+i);
         return;
       }
-    assert(0 && "Cannot remove an entry that does not exist!");
+    llvm_unreachable("Cannot remove an entry that does not exist!");
+  }
+
+  void removeValue(StringRef Name) {
+    removeValue(StringInit::get(Name.str()));
   }
 
   bool isSubClassOf(const Record *R) const {
@@ -1459,7 +1424,7 @@ public:
 
   bool isSubClassOf(StringRef Name) const {
     for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
-      if (SuperClasses[i]->getName() == Name)
+      if (SuperClasses[i]->getNameInitAsString() == Name)
         return true;
     return false;
   }
@@ -1553,12 +1518,6 @@ public:
   /// the value is not the right type.
   ///
   DagInit *getValueAsDag(StringRef FieldName) const;
-
-  /// getValueAsCode - This method looks up the specified field and returns
-  /// its value as the string data in a CodeInit, throwing an exception if the
-  /// field does not exist or if the value is not a code object.
-  ///
-  std::string getValueAsCode(StringRef FieldName) const;
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const Record &R);
@@ -1576,6 +1535,7 @@ struct MultiClass {
 
 class RecordKeeper {
   std::map<std::string, Record*> Classes, Defs;
+
 public:
   ~RecordKeeper() {
     for (std::map<std::string, Record*>::iterator I = Classes.begin(),
@@ -1598,12 +1558,12 @@ public:
     return I == Defs.end() ? 0 : I->second;
   }
   void addClass(Record *R) {
-    assert(getClass(R->getName()) == 0 && "Class already exists!");
-    Classes.insert(std::make_pair(R->getName(), R));
+    assert(getClass(R->getNameInitAsString()) == 0 && "Class already exists!");
+    Classes.insert(std::make_pair(R->getNameInitAsString(), R));
   }
   void addDef(Record *R) {
-    assert(getDef(R->getName()) == 0 && "Def already exists!");
-    Defs.insert(std::make_pair(R->getName(), R));
+    assert(getDef(R->getNameInitAsString()) == 0 && "Def already exists!");
+    Defs.insert(std::make_pair(R->getNameInitAsString(), R));
   }
 
   /// removeClass - Remove, but do not delete, the specified record.
@@ -1650,6 +1610,16 @@ struct LessRecordFieldName {
 
 raw_ostream &operator<<(raw_ostream &OS, const RecordKeeper &RK);
 
+/// QualifyName - Return an Init with a qualifier prefix referring
+/// to CurRec's name.
+Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
+                  Init *Name, const std::string &Scoper);
+
+/// QualifyName - Return an Init with a qualifier prefix referring
+/// to CurRec's name.
+Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
+                  const std::string &Name, const std::string &Scoper);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/TableGen/TableGenAction.h b/include/llvm/TableGen/TableGenAction.h
index 9f1c23c5b457..733ae626447c 100644
--- a/include/llvm/TableGen/TableGenAction.h
+++ b/include/llvm/TableGen/TableGenAction.h
@@ -21,6 +21,7 @@ class raw_ostream;
 class RecordKeeper;
 
 class TableGenAction {
+  virtual void anchor();
 public:
   virtual ~TableGenAction() {}
 
diff --git a/include/llvm/TableGen/TableGenBackend.h b/include/llvm/TableGen/TableGenBackend.h
index 853f92e406fb..3ebcd92d0e48 100644
--- a/include/llvm/TableGen/TableGenBackend.h
+++ b/include/llvm/TableGen/TableGenBackend.h
@@ -16,7 +16,6 @@
 #define LLVM_TABLEGEN_TABLEGENBACKEND_H
 
 #include "llvm/Support/raw_ostream.h"
-#include <string>
 
 namespace llvm {
 
@@ -24,6 +23,7 @@ class Record;
 class RecordKeeper;
 
 struct TableGenBackend {
+  virtual void anchor();
   virtual ~TableGenBackend() {}
 
   // run - All TableGen backends should implement the run method, which should
@@ -34,7 +34,7 @@ struct TableGenBackend {
 public:   // Useful helper routines...
   /// EmitSourceFileHeader - Output a LLVM style file header to the specified
   /// ostream.
-  void EmitSourceFileHeader(const std::string &Desc, raw_ostream &OS) const;
+  void EmitSourceFileHeader(StringRef Desc, raw_ostream &OS) const;
 
 };
 
diff --git a/include/llvm/Target/Mangler.h b/include/llvm/Target/Mangler.h
index c1c118b08cab..d5e165e58b91 100644
--- a/include/llvm/Target/Mangler.h
+++ b/include/llvm/Target/Mangler.h
@@ -17,11 +17,9 @@
 #include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
-class StringRef;
 class Twine;
-class Value;
 class GlobalValue;
-template <typename T> class SmallVectorImpl; 
+template <typename T> class SmallVectorImpl;
 class MCContext;
 class MCSymbol;
 class TargetData;
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index aa9a4f5af18c..fa1ec5594522 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -22,8 +22,12 @@ include "llvm/Intrinsics.td"
 class RegisterClass; // Forward def
 
 // SubRegIndex - Use instances of SubRegIndex to identify subregisters.
-class SubRegIndex {
+class SubRegIndex<list<SubRegIndex> comps = []> {
   string Namespace = "";
+
+  // ComposedOf - A list of two SubRegIndex instances, [A, B].
+  // This indicates that this SubRegIndex is the result of composing A and B.
+  list<SubRegIndex> ComposedOf = comps;
 }
 
 // RegAltNameIndex - The alternate name set to use for register operands of
@@ -83,9 +87,15 @@ class Register<string n, list<string> altNames = []> {
   // CostPerUse - Additional cost of instructions using this register compared
   // to other registers in its class. The register allocator will try to
   // minimize the number of instructions using a register with a CostPerUse.
-  // This is used by the x86-64 and ARM Thumb targets where some registers 
+  // This is used by the x86-64 and ARM Thumb targets where some registers
   // require larger instruction encodings.
   int CostPerUse = 0;
+
+  // CoveredBySubRegs - When this bit is set, the value of this register is
+  // completely determined by the value of its sub-registers.  For example, the
+  // x86 register AX is covered by its sub-registers AL and AH, but EAX is not
+  // covered by its sub-register AX.
+  bit CoveredBySubRegs = 0;
 }
 
 // RegisterWithSubRegs - This can be used to define instances of Register which
@@ -194,12 +204,15 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
 //
 // (decimate GPR, 2) - Pick every N'th element, starting with the first.
 //
+// (interleave A, B, ...) - Interleave the elements from each argument list.
+//
 // All of these operators work on ordered sets, not lists. That means
 // duplicates are removed from sub-expressions.
 
 // Set operators. The rest is defined in TargetSelectionDAG.td.
 def sequence;
 def decimate;
+def interleave;
 
 // RegisterTuples - Automatically generate super-registers by forming tuples of
 // sub-registers. This is useful for modeling register sequence constraints
@@ -356,6 +369,15 @@ class Instruction {
   // associated with them. Once we've migrated all of them over to true
   // pseudo-instructions that are lowered to real instructions prior to
   // the printer/emitter, we can remove this attribute and just use isPseudo.
+  //
+  // The intended use is:
+  // isPseudo: Does not have encoding information and should be expanded,
+  //   at the latest, during lowering to MCInst.
+  //
+  // isCodeGenOnly: Does have encoding information and can go through to the
+  //   CodeEmitter unchanged, but duplicates a canonical instruction
+  //   definition's encoding and should be ignored when constructing the
+  //   assembler match tables.
   bit isCodeGenOnly = 0;
 
   // Is this instruction a pseudo instruction for use by the assembler parser.
@@ -414,7 +436,7 @@ class Predicate<string cond> {
 /// NoHonorSignDependentRounding - This predicate is true if support for
 /// sign-dependent-rounding is not enabled.
 def NoHonorSignDependentRounding
- : Predicate<"!HonorSignDependentRoundingFPMath()">;
+ : Predicate<"!TM.Options.HonorSignDependentRoundingFPMath()">;
 
 class Requires<list<Predicate> preds> {
   list<Predicate> Predicates = preds;
@@ -679,6 +701,11 @@ def COPY : Instruction {
   let neverHasSideEffects = 1;
   let isAsCheapAsAMove = 1;
 }
+def BUNDLE : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "BUNDLE";
+}
 }
 
 //===----------------------------------------------------------------------===//
@@ -698,7 +725,15 @@ class AsmParser {
   // function of the AsmParser class to call on every matched instruction.
   // This can be used to perform target specific instruction post-processing.
   string AsmParserInstCleanup  = "";
+}
+def DefaultAsmParser : AsmParser;
 
+//===----------------------------------------------------------------------===//
+// AsmParserVariant - Subtargets can have multiple different assembly parsers 
+// (e.g. AT&T vs Intel syntax on X86 for example). This class can be
+// implemented by targets to describe such variants.
+//
+class AsmParserVariant {
   // Variant - AsmParsers can be of multiple different variants.  Variants are
   // used to support targets that need to parser multiple formats for the
   // assembly language.
@@ -715,7 +750,7 @@ class AsmParser {
   // purposes of matching.
   string RegisterPrefix = "";
 }
-def DefaultAsmParser : AsmParser;
+def DefaultAsmParserVariant : AsmParserVariant;
 
 /// AssemblerPredicate - This is a Predicate that can be used when the assembler
 /// matches instructions and aliases.
@@ -724,7 +759,20 @@ class AssemblerPredicate<string cond> {
   string AssemblerCondString = cond;
 }
 
-
+/// TokenAlias - This class allows targets to define assembler token
+/// operand aliases. That is, a token literal operand which is equivalent
+/// to another, canonical, token literal. For example, ARM allows:
+///   vmov.u32 s4, #0  -> vmov.i32, #0
+/// 'u32' is a more specific designator for the 32-bit integer type specifier
+/// and is legal for any instruction which accepts 'i32' as a datatype suffix.
+///   def : TokenAlias<".u32", ".i32">;
+///
+/// This works by marking the match class of 'From' as a subclass of the
+/// match class of 'To'.
+class TokenAlias<string From, string To> {
+  string FromToken = From;
+  string ToToken = To;
+}
 
 /// MnemonicAlias - This class allows targets to define assembler mnemonic
 /// aliases.  This should be used when all forms of one mnemonic are accepted
@@ -813,6 +861,10 @@ class Target {
   // AssemblyParsers - The AsmParser instances available for this target.
   list<AsmParser> AssemblyParsers = [DefaultAsmParser];
 
+  /// AssemblyParserVariants - The AsmParserVariant instances available for 
+  /// this target.
+  list<AsmParserVariant> AssemblyParserVariants = [DefaultAsmParserVariant];
+
   // AssemblyWriters - The AsmWriter instances available for this target.
   list<AsmWriter> AssemblyWriters = [DefaultAsmWriter];
 }
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h
index 275957e01532..a6251e7d3345 100644
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -14,6 +14,10 @@
 #ifndef LLVM_TARGET_TARGETCALLINGCONV_H
 #define LLVM_TARGET_TARGETCALLINGCONV_H
 
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include <string>
+
 namespace llvm {
 
 namespace ISD {
diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td
index 6da3ba13bb35..a53ed29f1ec1 100644
--- a/include/llvm/Target/TargetCallingConv.td
+++ b/include/llvm/Target/TargetCallingConv.td
@@ -133,3 +133,14 @@ class CCDelegateTo<CallingConv cc> : CCAction {
 class CallingConv<list<CCAction> actions> {
   list<CCAction> Actions = actions;
 }
+
+/// CalleeSavedRegs - A list of callee saved registers for a given calling
+/// convention.  The order of registers is used by PrologEpilogInsertion when
+/// allocation stack slots for saved registers.
+///
+/// For each CalleeSavedRegs def, TableGen will emit a FOO_SaveList array for
+/// returning from getCalleeSavedRegs(), and a FOO_RegMask bit mask suitable for
+/// returning from getCallPreservedMask().
+class CalleeSavedRegs<dag saves> {
+  dag SaveList = saves;
+}
diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h
index 26fd1870ac39..d116f392fb31 100644
--- a/include/llvm/Target/TargetData.h
+++ b/include/llvm/Target/TargetData.h
@@ -44,7 +44,7 @@ enum AlignTypeEnum {
   AGGREGATE_ALIGN = 'a',             ///< Aggregate alignment
   STACK_ALIGN = 's'                  ///< Stack objects alignment
 };
-  
+
 /// Target alignment element.
 ///
 /// Stores the alignment data associated with a given alignment type (pointer,
@@ -80,7 +80,7 @@ private:
   unsigned      StackNaturalAlign;     ///< Stack natural alignment
 
   SmallVector<unsigned char, 8> LegalIntWidths; ///< Legal Integers.
-  
+
   /// Alignments- Where the primitive type alignment data is stored.
   ///
   /// @sa init().
@@ -88,7 +88,7 @@ private:
   /// pointers vs. 64-bit pointers by extending TargetAlignment, but for now,
   /// we don't.
   SmallVector<TargetAlignElem, 16> Alignments;
-  
+
   /// InvalidAlignmentElem - This member is a signal that a requested alignment
   /// type and bit width were not found in the SmallVector.
   static const TargetAlignElem InvalidAlignmentElem;
@@ -112,19 +112,30 @@ private:
     return &align != &InvalidAlignmentElem;
   }
 
+  /// Initialise a TargetData object with default values, ensure that the
+  /// target data pass is registered.
+  void init();
+
 public:
   /// Default ctor.
   ///
   /// @note This has to exist, because this is a pass, but it should never be
   /// used.
   TargetData();
-  
+
   /// Constructs a TargetData from a specification string. See init().
   explicit TargetData(StringRef TargetDescription)
     : ImmutablePass(ID) {
-    init(TargetDescription);
+    std::string errMsg = parseSpecifier(TargetDescription, this);
+    assert(errMsg == "" && "Invalid target data layout string.");
+    (void)errMsg;
   }
 
+  /// Parses a target data specification string. Returns an error message
+  /// if the string is malformed, or the empty string on success. Optionally
+  /// initialises a TargetData object if passed a non-null pointer.
+  static std::string parseSpecifier(StringRef TargetDescription, TargetData* td = 0);
+
   /// Initialize target data from properties stored in the module.
   explicit TargetData(const Module *M);
 
@@ -141,9 +152,6 @@ public:
 
   ~TargetData();  // Not virtual, do not subclass this class
 
-  //! Parse a target data layout string and initialize TargetData alignments.
-  void init(StringRef TargetDescription);
-
   /// Target endianness...
   bool isLittleEndian() const { return LittleEndian; }
   bool isBigEndian() const { return !LittleEndian; }
@@ -152,7 +160,7 @@ public:
   /// TargetData.  This representation is in the same format accepted by the
   /// string constructor above.
   std::string getStringRepresentation() const;
-  
+
   /// isLegalInteger - This function returns true if the specified type is
   /// known to be a native integer type supported by the CPU.  For example,
   /// i64 is not native on most 32-bit CPUs and i37 is not native on any known
@@ -166,7 +174,7 @@ public:
         return true;
     return false;
   }
-  
+
   bool isIllegalInteger(unsigned Width) const {
     return !isLegalInteger(Width);
   }
@@ -251,11 +259,11 @@ public:
   /// getABITypeAlignment - Return the minimum ABI-required alignment for the
   /// specified type.
   unsigned getABITypeAlignment(Type *Ty) const;
-  
+
   /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
   /// an integer type of the specified bitwidth.
   unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const;
-  
+
 
   /// getCallFrameTypeAlignment - Return the minimum ABI-required alignment
   /// for the specified type when it is part of a call frame.
@@ -305,7 +313,7 @@ public:
     assert((Alignment & (Alignment-1)) == 0 && "Alignment must be power of 2!");
     return (Val + (Alignment-1)) & ~UIntTy(Alignment-1);
   }
-  
+
   static char ID; // Pass identification, replacement for typeid
 };
 
diff --git a/include/llvm/Target/TargetELFWriterInfo.h b/include/llvm/Target/TargetELFWriterInfo.h
index b97f3e2f4d0f..114295e8f985 100644
--- a/include/llvm/Target/TargetELFWriterInfo.h
+++ b/include/llvm/Target/TargetELFWriterInfo.h
@@ -15,9 +15,6 @@
 #define LLVM_TARGET_TARGETELFWRITERINFO_H
 
 namespace llvm {
-  class Function;
-  class TargetData;
-  class TargetMachine;
 
   //===--------------------------------------------------------------------===//
   //                          TargetELFWriterInfo
diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h
index 4c759b2ccb9f..d56db7b5118e 100644
--- a/include/llvm/Target/TargetFrameLowering.h
+++ b/include/llvm/Target/TargetFrameLowering.h
@@ -15,8 +15,6 @@
 #define LLVM_TARGET_TARGETFRAMELOWERING_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/ADT/ArrayRef.h"
 
 #include <utility>
 #include <vector>
@@ -24,8 +22,6 @@
 namespace llvm {
   class CalleeSavedInfo;
   class MachineFunction;
-  class MachineBasicBlock;
-  class MachineMove;
   class RegScavenger;
 
 /// Information about stack frame layout on the target.  It holds the direction
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 07f614d61d93..d1e380c5602a 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -15,6 +15,7 @@
 #define LLVM_TARGET_TARGETINSTRINFO_H
 
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
@@ -278,8 +279,7 @@ public:
   /// This is only invoked in cases where AnalyzeBranch returns success. It
   /// returns the number of instructions that were removed.
   virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::RemoveBranch!");
-    return 0;
+    llvm_unreachable("Target didn't implement TargetInstrInfo::RemoveBranch!");
   }
 
   /// InsertBranch - Insert branch code into the end of the specified
@@ -296,8 +296,7 @@ public:
                                 MachineBasicBlock *FBB,
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::InsertBranch!");
-    return 0;
+    llvm_unreachable("Target didn't implement TargetInstrInfo::InsertBranch!");
   }
 
   /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
@@ -353,12 +352,28 @@ public:
     return false;
   }
 
+  /// isProfitableToUnpredicate - Return true if it's profitable to unpredicate
+  /// one side of a 'diamond', i.e. two sides of if-else predicated on mutually
+  /// exclusive predicates.
+  /// e.g.
+  ///   subeq  r0, r1, #1
+  ///   addne  r0, r1, #1
+  /// =>
+  ///   sub    r0, r1, #1
+  ///   addne  r0, r1, #1
+  ///
+  /// This may be profitable is conditional instructions are always executed.
+  virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+                                         MachineBasicBlock &FMBB) const {
+    return false;
+  }
+
   /// copyPhysReg - Emit instructions to copy a pair of physical registers.
   virtual void copyPhysReg(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MI, DebugLoc DL,
                            unsigned DestReg, unsigned SrcReg,
                            bool KillSrc) const {
-    assert(0 && "Target didn't implement TargetInstrInfo::copyPhysReg!");
+    llvm_unreachable("Target didn't implement TargetInstrInfo::copyPhysReg!");
   }
 
   /// storeRegToStackSlot - Store the specified register of the given register
@@ -371,7 +386,8 @@ public:
                                    unsigned SrcReg, bool isKill, int FrameIndex,
                                    const TargetRegisterClass *RC,
                                    const TargetRegisterInfo *TRI) const {
-  assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!");
+    llvm_unreachable("Target didn't implement "
+                     "TargetInstrInfo::storeRegToStackSlot!");
   }
 
   /// loadRegFromStackSlot - Load the specified register of the given register
@@ -383,7 +399,8 @@ public:
                                     unsigned DestReg, int FrameIndex,
                                     const TargetRegisterClass *RC,
                                     const TargetRegisterInfo *TRI) const {
-  assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!");
+    llvm_unreachable("Target didn't implement "
+                     "TargetInstrInfo::loadRegFromStackSlot!");
   }
 
   /// expandPostRAPseudo - This function is called for all pseudo instructions
@@ -535,7 +552,7 @@ public:
 
   /// isUnpredicatedTerminator - Returns true if the instruction is a
   /// terminator instruction that has not been predicated.
-  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const = 0;
 
   /// PredicateInstruction - Convert the instruction into a predicated
   /// instruction. It returns true if the operation was successful.
@@ -646,7 +663,16 @@ public:
 
   virtual int getOperandLatency(const InstrItineraryData *ItinData,
                                 SDNode *DefNode, unsigned DefIdx,
-                                SDNode *UseNode, unsigned UseIdx) const;
+                                SDNode *UseNode, unsigned UseIdx) const = 0;
+
+  /// getOutputLatency - Compute and return the output dependency latency of a
+  /// a given pair of defs which both target the same register. This is usually
+  /// one.
+  virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
+                                    const MachineInstr *DefMI, unsigned DefIdx,
+                                    const MachineInstr *DepMI) const {
+    return 1;
+  }
 
   /// getInstrLatency - Compute the instruction latency of a given instruction.
   /// If the instruction has higher cost when predicated, it's returned via
@@ -656,7 +682,7 @@ public:
                               unsigned *PredCost = 0) const;
 
   virtual int getInstrLatency(const InstrItineraryData *ItinData,
-                              SDNode *Node) const;
+                              SDNode *Node) const = 0;
 
   /// isHighLatencyDef - Return true if this opcode has high latency to its
   /// result.
@@ -718,6 +744,80 @@ public:
   ///
   virtual void setExecutionDomain(MachineInstr *MI, unsigned Domain) const {}
 
+
+  /// getPartialRegUpdateClearance - Returns the preferred minimum clearance
+  /// before an instruction with an unwanted partial register update.
+  ///
+  /// Some instructions only write part of a register, and implicitly need to
+  /// read the other parts of the register.  This may cause unwanted stalls
+  /// preventing otherwise unrelated instructions from executing in parallel in
+  /// an out-of-order CPU.
+  ///
+  /// For example, the x86 instruction cvtsi2ss writes its result to bits
+  /// [31:0] of the destination xmm register. Bits [127:32] are unaffected, so
+  /// the instruction needs to wait for the old value of the register to become
+  /// available:
+  ///
+  ///   addps %xmm1, %xmm0
+  ///   movaps %xmm0, (%rax)
+  ///   cvtsi2ss %rbx, %xmm0
+  ///
+  /// In the code above, the cvtsi2ss instruction needs to wait for the addps
+  /// instruction before it can issue, even though the high bits of %xmm0
+  /// probably aren't needed.
+  ///
+  /// This hook returns the preferred clearance before MI, measured in
+  /// instructions.  Other defs of MI's operand OpNum are avoided in the last N
+  /// instructions before MI.  It should only return a positive value for
+  /// unwanted dependencies.  If the old bits of the defined register have
+  /// useful values, or if MI is determined to otherwise read the dependency,
+  /// the hook should return 0.
+  ///
+  /// The unwanted dependency may be handled by:
+  ///
+  /// 1. Allocating the same register for an MI def and use.  That makes the
+  ///    unwanted dependency identical to a required dependency.
+  ///
+  /// 2. Allocating a register for the def that has no defs in the previous N
+  ///    instructions.
+  ///
+  /// 3. Calling breakPartialRegDependency() with the same arguments.  This
+  ///    allows the target to insert a dependency breaking instruction.
+  ///
+  virtual unsigned
+  getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+                               const TargetRegisterInfo *TRI) const {
+    // The default implementation returns 0 for no partial register dependency.
+    return 0;
+  }
+
+  /// breakPartialRegDependency - Insert a dependency-breaking instruction
+  /// before MI to eliminate an unwanted dependency on OpNum.
+  ///
+  /// If it wasn't possible to avoid a def in the last N instructions before MI
+  /// (see getPartialRegUpdateClearance), this hook will be called to break the
+  /// unwanted dependency.
+  ///
+  /// On x86, an xorps instruction can be used as a dependency breaker:
+  ///
+  ///   addps %xmm1, %xmm0
+  ///   movaps %xmm0, (%rax)
+  ///   xorps %xmm0, %xmm0
+  ///   cvtsi2ss %rbx, %xmm0
+  ///
+  /// An <imp-kill> operand should be added to MI if an instruction was
+  /// inserted.  This ties the instructions together in the post-ra scheduler.
+  ///
+  virtual void
+  breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+                            const TargetRegisterInfo *TRI) const {}
+
+  /// Create machine specific model for scheduling.
+  virtual DFAPacketizer*
+    CreateTargetScheduleState(const TargetMachine*, const ScheduleDAG*) const {
+    return NULL;
+  }
+
 private:
   int CallFrameSetupOpcode, CallFrameDestroyOpcode;
 };
@@ -746,6 +846,7 @@ public:
   virtual bool hasStoreToStackSlot(const MachineInstr *MI,
                                    const MachineMemOperand *&MMO,
                                    int &FrameIndex) const;
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
   virtual bool PredicateInstruction(MachineInstr *MI,
                             const SmallVectorImpl<MachineOperand> &Pred) const;
   virtual void reMaterialize(MachineBasicBlock &MBB,
@@ -761,6 +862,13 @@ public:
   virtual bool isSchedulingBoundary(const MachineInstr *MI,
                                     const MachineBasicBlock *MBB,
                                     const MachineFunction &MF) const;
+  using TargetInstrInfo::getOperandLatency;
+  virtual int getOperandLatency(const InstrItineraryData *ItinData,
+                                SDNode *DefNode, unsigned DefIdx,
+                                SDNode *UseNode, unsigned UseIdx) const;
+  using TargetInstrInfo::getInstrLatency;
+  virtual int getInstrLatency(const InstrItineraryData *ItinData,
+                              SDNode *Node) const;
 
   bool usePreRAHazardRecognizer() const;
 
diff --git a/include/llvm/Target/TargetJITInfo.h b/include/llvm/Target/TargetJITInfo.h
index b198eb62f0c6..044afd9b7392 100644
--- a/include/llvm/Target/TargetJITInfo.h
+++ b/include/llvm/Target/TargetJITInfo.h
@@ -17,9 +17,9 @@
 #ifndef LLVM_TARGET_TARGETJITINFO_H
 #define LLVM_TARGET_TARGETJITINFO_H
 
-#include <cassert>
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/DataTypes.h"
+#include <cassert>
 
 namespace llvm {
   class Function;
@@ -30,6 +30,7 @@ namespace llvm {
   /// TargetJITInfo - Target specific information required by the Just-In-Time
   /// code generator.
   class TargetJITInfo {
+    virtual void anchor();
   public:
     virtual ~TargetJITInfo() {}
 
@@ -45,8 +46,8 @@ namespace llvm {
     /// ptr.
     virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
                                              JITCodeEmitter &JCE) {
-      assert(0 && "This target doesn't implement emitGlobalValueIndirectSym!");
-      return 0;
+      llvm_unreachable("This target doesn't implement "
+                       "emitGlobalValueIndirectSym!");
     }
 
     /// Records the required size and alignment for a call stub in bytes.
@@ -57,8 +58,6 @@ namespace llvm {
     /// Returns the maximum size and alignment for a call stub on this target.
     virtual StubLayout getStubLayout() {
       llvm_unreachable("This target doesn't implement getStubLayout!");
-      StubLayout Result = {0, 0};
-      return Result;
     }
 
     /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
@@ -68,15 +67,13 @@ namespace llvm {
     /// aligned from the address the JCE was set up to emit at.
     virtual void *emitFunctionStub(const Function* F, void *Target,
                                    JITCodeEmitter &JCE) {
-      assert(0 && "This target doesn't implement emitFunctionStub!");
-      return 0;
+      llvm_unreachable("This target doesn't implement emitFunctionStub!");
     }
 
     /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
     /// specific basic block.
     virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase) {
-      assert(0 && "This target doesn't implement getPICJumpTableEntry!");
-      return 0;
+      llvm_unreachable("This target doesn't implement getPICJumpTableEntry!");
     }
 
     /// LazyResolverFn - This typedef is used to represent the function that
@@ -97,8 +94,7 @@ namespace llvm {
     /// function, and giving the JIT the target function used to do the lazy
     /// resolving.
     virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn) {
-      assert(0 && "Not implemented for this target!");
-      return 0;
+      llvm_unreachable("Not implemented for this target!");
     }
 
     /// relocate - Before the JIT can run a block of code that has been emitted,
@@ -114,8 +110,7 @@ namespace llvm {
     /// handling thread local variables. This method returns a value only
     /// meaningful to the target.
     virtual char* allocateThreadLocalMemory(size_t size) {
-      assert(0 && "This target does not implement thread local storage!");
-      return 0;
+      llvm_unreachable("This target does not implement thread local storage!");
     }
 
     /// needsGOT - Allows a target to specify that it would like the
diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h
index 02a1a3ca23a4..70e26bf3c5a4 100644
--- a/include/llvm/Target/TargetLibraryInfo.h
+++ b/include/llvm/Target/TargetLibraryInfo.h
@@ -11,33 +11,204 @@
 #define LLVM_TARGET_TARGETLIBRARYINFO_H
 
 #include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
   class Triple;
 
   namespace LibFunc {
     enum Func {
-      /// void *memset(void *b, int c, size_t len);
-      memset,
-      
-      // void *memcpy(void *s1, const void *s2, size_t n);
+      /// double acos(double x);
+      acos,
+      /// long double acosl(long double x);
+      acosl,
+      /// float acosf(float x);
+      acosf,
+      /// double asin(double x);
+      asin,
+      /// long double asinl(long double x);
+      asinl,
+      /// float asinf(float x);
+      asinf,
+      /// double atan(double x);
+      atan,
+      /// long double atanl(long double x);
+      atanl,
+      /// float atanf(float x);
+      atanf,
+      /// double atan2(double y, double x);
+      atan2,
+      /// long double atan2l(long double y, long double x);
+      atan2l,
+      /// float atan2f(float y, float x);
+      atan2f,
+      /// double ceil(double x);
+      ceil,
+      /// long double ceill(long double x);
+      ceill,
+      /// float ceilf(float x);
+      ceilf,
+      /// double copysign(double x, double y);
+      copysign,
+      /// float copysignf(float x, float y);
+      copysignf,
+      /// long double copysignl(long double x, long double y);
+      copysignl,
+      /// double cos(double x);
+      cos,
+      /// long double cosl(long double x);
+      cosl,
+      /// float cosf(float x);
+      cosf,
+      /// double cosh(double x);
+      cosh,
+      /// long double coshl(long double x);
+      coshl,
+      /// float coshf(float x);
+      coshf,
+      /// double exp(double x);
+      exp,
+      /// long double expl(long double x);
+      expl,
+      /// float expf(float x);
+      expf,
+      /// double exp2(double x);
+      exp2,
+      /// long double exp2l(long double x);
+      exp2l,
+      /// float exp2f(float x);
+      exp2f,
+      /// double expm1(double x);
+      expm1,
+      /// long double expm1l(long double x);
+      expm1l,
+      /// float expm1f(float x);
+      expl1f,
+      /// double fabs(double x);
+      fabs,
+      /// long double fabsl(long double x);
+      fabsl,
+      /// float fabsf(float x);
+      fabsf,
+      /// double floor(double x);
+      floor,
+      /// long double floorl(long double x);
+      floorl,
+      /// float floorf(float x);
+      floorf,
+      /// int fiprintf(FILE *stream, const char *format, ...);
+      fiprintf,
+      /// double fmod(double x, double y);
+      fmod,
+      /// long double fmodl(long double x, long double y);
+      fmodl,
+      /// float fmodf(float x, float y);
+      fmodf,
+      /// int fputs(const char *s, FILE *stream);
+      fputs,
+      /// size_t fwrite(const void *ptr, size_t size, size_t nitems,
+      /// FILE *stream);
+      fwrite,
+      /// int iprintf(const char *format, ...);
+      iprintf,
+      /// double log(double x);
+      log,
+      /// long double logl(long double x);
+      logl,
+      /// float logf(float x);
+      logf,
+      /// double log2(double x);
+      log2,
+      /// double long double log2l(long double x);
+      log2l,
+      /// float log2f(float x);
+      log2f,
+      /// double log10(double x);
+      log10,
+      /// long double log10l(long double x);
+      log10l,
+      /// float log10f(float x);
+      log10f,
+      /// double log1p(double x);
+      log1p,
+      /// long double log1pl(long double x);
+      log1pl,
+      /// float log1pf(float x);
+      log1pf,
+      /// void *memcpy(void *s1, const void *s2, size_t n);
       memcpy,
-      
-      // void *memmove(void *s1, const void *s2, size_t n);
+      /// void *memmove(void *s1, const void *s2, size_t n);
       memmove,
-      
+      /// void *memset(void *b, int c, size_t len);
+      memset,
       /// void memset_pattern16(void *b, const void *pattern16, size_t len);
       memset_pattern16,
-      
-      /// int iprintf(const char *format, ...);
-      iprintf,
-      
+      /// double nearbyint(double x);
+      nearbyint,
+      /// float nearbyintf(float x);
+      nearbyintf,
+      /// long double nearbyintl(long double x);
+      nearbyintl,
+      /// double pow(double x, double y);
+      pow,
+      /// float powf(float x, float y);
+      powf,
+      /// long double powl(long double x, long double y);
+      powl,
+      /// double rint(double x);
+      rint,
+      /// float rintf(float x);
+      rintf,
+      /// long dobule rintl(long double x);
+      rintl,
+      /// double sin(double x);
+      sin,
+      /// long double sinl(long double x);
+      sinl,
+      /// float sinf(float x);
+      sinf,
+      /// double sinh(double x);
+      sinh,
+      /// long double sinhl(long double x);
+      sinhl,
+      /// float sinhf(float x);
+      sinhf,
       /// int siprintf(char *str, const char *format, ...);
       siprintf,
-      
-      /// int fiprintf(FILE *stream, const char *format, ...);
-      fiprintf,
-      
+      /// double sqrt(double x);
+      sqrt,
+      /// long double sqrtl(long double x);
+      sqrtl,
+      /// float sqrtf(float x);
+      sqrtf,
+      /// double tan(double x);
+      tan,
+      /// long double tanl(long double x);
+      tanl,
+      /// float tanf(float x);
+      tanf,
+      /// double tanh(double x);
+      tanh,
+      /// long double tanhl(long double x);
+      tanhl,
+      /// float tanhf(float x);
+      tanhf,
+      /// double trunc(double x);
+      trunc,
+      /// float truncf(float x);
+      truncf,
+      /// long double truncl(long double x);
+      truncl,
+      /// int __cxa_atexit(void (*f)(void *), void *p, void *d);
+      cxa_atexit,
+      /// void __cxa_guard_abort(guard_t *guard);
+      /// guard_t is int64_t in Itanium ABI or int32_t on ARM eabi.
+      cxa_guard_abort,      
+      /// int __cxa_guard_acquire(guard_t *guard);
+      cxa_guard_acquire,
+      /// void __cxa_guard_release(guard_t *guard);
+      cxa_guard_release,
+
       NumLibFuncs
     };
   }
@@ -46,7 +217,24 @@ namespace llvm {
 /// library functions are available for the current target, and allows a
 /// frontend to disable optimizations through -fno-builtin etc.
 class TargetLibraryInfo : public ImmutablePass {
-  unsigned char AvailableArray[(LibFunc::NumLibFuncs+7)/8];
+  virtual void anchor();
+  unsigned char AvailableArray[(LibFunc::NumLibFuncs+3)/4];
+  llvm::DenseMap<unsigned, std::string> CustomNames;
+  static const char* StandardNames[LibFunc::NumLibFuncs];
+
+  enum AvailabilityState {
+    StandardName = 3, // (memset to all ones)
+    CustomName = 1,
+    Unavailable = 0  // (memset to all zeros)
+  };
+  void setState(LibFunc::Func F, AvailabilityState State) {
+    AvailableArray[F/4] &= ~(3 << 2*(F&3));
+    AvailableArray[F/4] |= State << 2*(F&3);
+  }
+  AvailabilityState getState(LibFunc::Func F) const {
+    return static_cast<AvailabilityState>((AvailableArray[F/4] >> 2*(F&3)) & 3);
+  }
+
 public:
   static char ID;
   TargetLibraryInfo();
@@ -56,19 +244,39 @@ public:
   /// has - This function is used by optimizations that want to match on or form
   /// a given library function.
   bool has(LibFunc::Func F) const {
-    return (AvailableArray[F/8] & (1 << (F&7))) != 0;
+    return getState(F) != Unavailable;
+  }
+
+  StringRef getName(LibFunc::Func F) const {
+    AvailabilityState State = getState(F);
+    if (State == Unavailable)
+      return StringRef();
+    if (State == StandardName)
+      return StandardNames[F];
+    assert(State == CustomName);
+    return CustomNames.find(F)->second;
   }
 
   /// setUnavailable - this can be used by whatever sets up TargetLibraryInfo to
   /// ban use of specific library functions.
   void setUnavailable(LibFunc::Func F) {
-    AvailableArray[F/8] &= ~(1 << (F&7));
+    setState(F, Unavailable);
   }
 
   void setAvailable(LibFunc::Func F) {
-    AvailableArray[F/8] |= 1 << (F&7);
+    setState(F, StandardName);
   }
-  
+
+  void setAvailableWithName(LibFunc::Func F, StringRef Name) {
+    if (StandardNames[F] != Name) {
+      setState(F, CustomName);
+      CustomNames[F] = Name;
+      assert(CustomNames.find(F) != CustomNames.end());
+    } else {
+      setState(F, StandardName);
+    }
+  }
+
   /// disableAllFunctions - This disables all builtins, which is used for
   /// options like -fno-builtin.
   void disableAllFunctions();
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 013e70a05c03..720c9df99e2b 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -25,7 +25,6 @@
 #include "llvm/CallingConv.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Attributes.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/Support/DebugLoc.h"
@@ -36,41 +35,34 @@
 #include <vector>
 
 namespace llvm {
-  class AllocaInst;
-  class APFloat;
   class CallInst;
   class CCState;
-  class Function;
   class FastISel;
   class FunctionLoweringInfo;
   class ImmutableCallSite;
+  class IntrinsicInst;
   class MachineBasicBlock;
   class MachineFunction;
-  class MachineFrameInfo;
   class MachineInstr;
   class MachineJumpTableInfo;
   class MCContext;
   class MCExpr;
-  class SDNode;
-  class SDValue;
-  class SelectionDAG;
   template<typename T> class SmallVectorImpl;
   class TargetData;
-  class TargetMachine;
   class TargetRegisterClass;
   class TargetLoweringObjectFile;
   class Value;
 
-  // FIXME: should this be here?
-  namespace TLSModel {
-    enum Model {
-      GeneralDynamic,
-      LocalDynamic,
-      InitialExec,
-      LocalExec
+  namespace Sched {
+    enum Preference {
+      None,             // No preference
+      Source,           // Follow source order.
+      RegPressure,      // Scheduling for lowest register pressure.
+      Hybrid,           // Scheduling for both latency and register pressure.
+      ILP,              // Scheduling for ILP in low register pressure mode.
+      VLIW              // Scheduling for VLIW targets.
     };
   }
-  TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc);
 
 
 //===----------------------------------------------------------------------===//
@@ -94,7 +86,7 @@ public:
     Custom      // Use the LowerOperation hook to implement custom lowering.
   };
 
-  /// LegalizeAction - This enum indicates whether a types are legal for a
+  /// LegalizeTypeAction - This enum indicates whether a types are legal for a
   /// target, and if not, what action should be used to make them valid.
   enum LegalizeTypeAction {
     TypeLegal,           // The target natively supports this type.
@@ -115,8 +107,6 @@ public:
 
   static ISD::NodeType getExtendForContent(BooleanContent Content) {
     switch (Content) {
-    default:
-      assert(false && "Unknown BooleanContent!");
     case UndefinedBooleanContent:
       // Extend by adding rubbish bits.
       return ISD::ANY_EXTEND;
@@ -127,6 +117,7 @@ public:
       // Extend by copying the sign bit.
       return ISD::SIGN_EXTEND;
     }
+    llvm_unreachable("Invalid content kind");
   }
 
   /// NOTE: The constructor takes ownership of TLOF.
@@ -199,9 +190,9 @@ public:
 
   /// getRegClassFor - Return the register class that should be used for the
   /// specified value type.
-  virtual TargetRegisterClass *getRegClassFor(EVT VT) const {
+  virtual const TargetRegisterClass *getRegClassFor(EVT VT) const {
     assert(VT.isSimple() && "getRegClassFor called on illegal type!");
-    TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
+    const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
     assert(RC && "This value type is not natively supported!");
     return RC;
   }
@@ -292,11 +283,9 @@ public:
         VT = getTypeToTransformTo(Context, VT);
         break;
       default:
-        assert(false && "Type is not legal nor is it to be expanded!");
-        return VT;
+        llvm_unreachable("Type is not legal nor is it to be expanded!");
       }
     }
-    return VT;
   }
 
   /// getVectorTypeBreakdown - Vector types are broken down into some number of
@@ -520,8 +509,19 @@ public:
   /// AllowUnknown is true, this will return MVT::Other for types with no EVT
   /// counterpart (e.g. structs), otherwise it will assert.
   EVT getValueType(Type *Ty, bool AllowUnknown = false) const {
-    EVT VT = EVT::getEVT(Ty, AllowUnknown);
-    return VT == MVT::iPTR ? PointerTy : VT;
+    // Lower scalar pointers to native pointer types.
+    if (Ty->isPointerTy()) return PointerTy;
+
+    if (Ty->isVectorTy()) {
+      VectorType *VTy = cast<VectorType>(Ty);
+      Type *Elm = VTy->getElementType();
+      // Lower vectors of pointers to native pointer types.
+      if (Elm->isPointerTy()) 
+        Elm = EVT(PointerTy).getTypeForEVT(Ty->getContext());
+      return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
+                       VTy->getNumElements());
+    }
+    return EVT::getEVT(Ty, AllowUnknown);
   }
 
   /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
@@ -554,8 +554,7 @@ public:
     if (VT.isInteger()) {
       return getRegisterType(Context, getTypeToTransformTo(Context, VT));
     }
-    assert(0 && "Unsupported extended type!");
-    return EVT(MVT::Other); // Not reached
+    llvm_unreachable("Unsupported extended type!");
   }
 
   /// getNumRegisters - Return the number of registers that this ValueType will
@@ -580,8 +579,7 @@ public:
       unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
       return (BitWidth + RegWidth - 1) / RegWidth;
     }
-    assert(0 && "Unsupported extended type!");
-    return 0; // Not reached
+    llvm_unreachable("Unsupported extended type!");
   }
 
   /// ShouldShrinkFPConstant - If true, then instruction selection should
@@ -646,7 +644,7 @@ public:
   /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
   /// means there isn't a need to check it against alignment requirement,
   /// probably because the source does not need to be loaded. If
-  /// 'NonScalarIntSafe' is true, that means it's safe to return a
+  /// 'IsZeroVal' is true, that means it's safe to return a
   /// non-scalar-integer type, e.g. empty string source, constant, or loaded
   /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
   /// constant so it does not need to be loaded.
@@ -654,7 +652,7 @@ public:
   /// target-independent logic.
   virtual EVT getOptimalMemOpType(uint64_t /*Size*/,
                                   unsigned /*DstAlign*/, unsigned /*SrcAlign*/,
-                                  bool /*NonScalarIntSafe*/,
+                                  bool /*IsZeroVal*/,
                                   bool /*MemcpyStrSrc*/,
                                   MachineFunction &/*MF*/) const {
     return MVT::Other;
@@ -679,10 +677,10 @@ public:
     return StackPointerRegisterToSaveRestore;
   }
 
-  /// getExceptionAddressRegister - If a physical register, this returns
+  /// getExceptionPointerRegister - If a physical register, this returns
   /// the register that receives the exception address on entry to a landing
   /// pad.
-  unsigned getExceptionAddressRegister() const {
+  unsigned getExceptionPointerRegister() const {
     return ExceptionPointerRegister;
   }
 
@@ -772,8 +770,7 @@ public:
   LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
                             const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,
                             MCContext &/*Ctx*/) const {
-    assert(0 && "Need to implement this hook if target has custom JTIs");
-    return 0;
+    llvm_unreachable("Need to implement this hook if target has custom JTIs");
   }
 
   /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
@@ -865,7 +862,6 @@ public:
   /// Mask are known to be either zero or one and return them in the
   /// KnownZero/KnownOne bitsets.
   virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-                                              const APInt &Mask,
                                               APInt &KnownZero,
                                               APInt &KnownOne,
                                               const SelectionDAG &DAG,
@@ -1035,7 +1031,7 @@ protected:
   /// addRegisterClass - Add the specified register class as an available
   /// regclass for the specified value type.  This indicates the selector can
   /// handle values of that class natively.
-  void addRegisterClass(EVT VT, TargetRegisterClass *RC) {
+  void addRegisterClass(EVT VT, const TargetRegisterClass *RC) {
     assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
     AvailableRegClasses.push_back(std::make_pair(VT, RC));
     RegClassForVT[VT.getSimpleVT().SimpleTy] = RC;
@@ -1141,26 +1137,28 @@ protected:
     JumpBufAlignment = Align;
   }
 
-  /// setMinFunctionAlignment - Set the target's minimum function alignment.
+  /// setMinFunctionAlignment - Set the target's minimum function alignment (in
+  /// log2(bytes))
   void setMinFunctionAlignment(unsigned Align) {
     MinFunctionAlignment = Align;
   }
 
   /// setPrefFunctionAlignment - Set the target's preferred function alignment.
   /// This should be set if there is a performance benefit to
-  /// higher-than-minimum alignment
+  /// higher-than-minimum alignment (in log2(bytes))
   void setPrefFunctionAlignment(unsigned Align) {
     PrefFunctionAlignment = Align;
   }
 
   /// setPrefLoopAlignment - Set the target's preferred loop alignment. Default
   /// alignment is zero, it means the target does not care about loop alignment.
+  /// The alignment is specified in log2(bytes).
   void setPrefLoopAlignment(unsigned Align) {
     PrefLoopAlignment = Align;
   }
 
   /// setMinStackArgumentAlignment - Set the minimum stack alignment of an
-  /// argument.
+  /// argument (in log2(bytes)).
   void setMinStackArgumentAlignment(unsigned Align) {
     MinStackArgumentAlignment = Align;
   }
@@ -1196,8 +1194,7 @@ public:
                          const SmallVectorImpl<ISD::InputArg> &/*Ins*/,
                          DebugLoc /*dl*/, SelectionDAG &/*DAG*/,
                          SmallVectorImpl<SDValue> &/*InVals*/) const {
-    assert(0 && "Not Implemented");
-    return SDValue();    // this is here to silence compiler errors
+    llvm_unreachable("Not Implemented");
   }
 
   /// LowerCallTo - This function lowers an abstract call to a function into an
@@ -1224,7 +1221,8 @@ public:
   LowerCallTo(SDValue Chain, Type *RetTy, bool RetSExt, bool RetZExt,
               bool isVarArg, bool isInreg, unsigned NumFixedArgs,
               CallingConv::ID CallConv, bool isTailCall,
-              bool isReturnValueUsed, SDValue Callee, ArgListTy &Args,
+              bool doesNotRet, bool isReturnValueUsed,
+              SDValue Callee, ArgListTy &Args,
               SelectionDAG &DAG, DebugLoc dl) const;
 
   /// LowerCall - This hook must be implemented to lower calls into the
@@ -1236,14 +1234,13 @@ public:
   virtual SDValue
     LowerCall(SDValue /*Chain*/, SDValue /*Callee*/,
               CallingConv::ID /*CallConv*/, bool /*isVarArg*/,
-              bool &/*isTailCall*/,
+              bool /*doesNotRet*/, bool &/*isTailCall*/,
               const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
               const SmallVectorImpl<SDValue> &/*OutVals*/,
               const SmallVectorImpl<ISD::InputArg> &/*Ins*/,
               DebugLoc /*dl*/, SelectionDAG &/*DAG*/,
               SmallVectorImpl<SDValue> &/*InVals*/) const {
-    assert(0 && "Not Implemented");
-    return SDValue();    // this is here to silence compiler errors
+    llvm_unreachable("Not Implemented");
   }
 
   /// HandleByVal - Target-specific cleanup for formal ByVal parameters.
@@ -1273,14 +1270,15 @@ public:
                 const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
                 const SmallVectorImpl<SDValue> &/*OutVals*/,
                 DebugLoc /*dl*/, SelectionDAG &/*DAG*/) const {
-    assert(0 && "Not Implemented");
-    return SDValue();    // this is here to silence compiler errors
+    llvm_unreachable("Not Implemented");
   }
 
   /// isUsedByReturnOnly - Return true if result of the specified node is used
-  /// by a return node only. This is used to determine whether it is possible
+  /// by a return node only. It also compute and return the input chain for the
+  /// tail call.
+  /// This is used to determine whether it is possible
   /// to codegen a libcall as tail call at legalization time.
-  virtual bool isUsedByReturnOnly(SDNode *) const {
+  virtual bool isUsedByReturnOnly(SDNode *, SDValue &Chain) const {
     return false;
   }
 
@@ -1339,7 +1337,7 @@ public:
   virtual void ReplaceNodeResults(SDNode * /*N*/,
                                   SmallVectorImpl<SDValue> &/*Results*/,
                                   SelectionDAG &/*DAG*/) const {
-    assert(0 && "ReplaceNodeResults not implemented for this target!");
+    llvm_unreachable("ReplaceNodeResults not implemented for this target!");
   }
 
   /// getTargetNodeName() - This method returns the name of a target specific
@@ -1531,6 +1529,17 @@ public:
     AddrMode() : BaseGV(0), BaseOffs(0), HasBaseReg(false), Scale(0) {}
   };
 
+  /// GetAddrModeArguments - CodeGenPrepare sinks address calculations into the
+  /// same BB as Load/Store instructions reading the address.  This allows as
+  /// much computation as possible to be done in the address mode for that
+  /// operand.  This hook lets targets also pass back when this should be done
+  /// on intrinsics which load/store.
+  virtual bool GetAddrModeArguments(IntrinsicInst *I,
+                                    SmallVectorImpl<Value*> &Ops,
+                                    Type *&AccessTy) const {
+    return false;
+  }
+
   /// isLegalAddressingMode - Return true if the addressing mode represented by
   /// AM is legal for this target, for a load/store of the specified type.
   /// The type may be VoidTy, in which case only return true if the addressing
@@ -1581,6 +1590,18 @@ public:
     return false;
   }
 
+  /// isFNegFree - Return true if an fneg operation is free to the point where
+  /// it is never worthwhile to replace it with a bitwise operation.
+  virtual bool isFNegFree(EVT) const {
+    return false;
+  }
+
+  /// isFAbsFree - Return true if an fneg operation is free to the point where
+  /// it is never worthwhile to replace it with a bitwise operation.
+  virtual bool isFAbsFree(EVT) const {
+    return false;
+  }
+
   /// isNarrowingProfitable - Return true if it's profitable to narrow
   /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
   /// from i32 to i8 but not from i32 to i16.
@@ -1593,9 +1614,9 @@ public:
   //
   SDValue BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
                          SelectionDAG &DAG) const;
-  SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG,
+  SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
                       std::vector<SDNode*>* Created) const;
-  SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG,
+  SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
                       std::vector<SDNode*>* Created) const;
 
 
@@ -1753,7 +1774,7 @@ private:
 
   /// RegClassForVT - This indicates the default register class to use for
   /// each ValueType the target supports natively.
-  TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
+  const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
   unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
   EVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
 
@@ -1925,12 +1946,9 @@ private:
     // Vectors with illegal element types are expanded.
     EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2);
     return LegalizeKind(TypeSplitVector, NVT);
-
-    assert(false && "Unable to handle this kind of vector type");
-    return LegalizeKind(TypeLegal, VT);
   }
 
-  std::vector<std::pair<EVT, TargetRegisterClass*> > AvailableRegClasses;
+  std::vector<std::pair<EVT, const TargetRegisterClass*> > AvailableRegClasses;
 
   /// TargetDAGCombineArray - Targets can specify ISD nodes that they would
   /// like PerformDAGCombine callbacks for by calling setTargetDAGCombine(),
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 7d06cec0a4e1..d631f58aab74 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -15,18 +15,17 @@
 #ifndef LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
 #define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
 
-#include "llvm/ADT/StringRef.h"
+#include "llvm/Module.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/SectionKind.h"
+#include "llvm/ADT/ArrayRef.h"
 
 namespace llvm {
   class MachineModuleInfo;
   class Mangler;
-  class MCAsmInfo;
   class MCContext;
   class MCExpr;
   class MCSection;
-  class MCSectionMachO;
   class MCSymbol;
   class MCStreamer;
   class GlobalValue;
@@ -53,7 +52,13 @@ public:
   virtual void emitPersonalityValue(MCStreamer &Streamer,
                                     const TargetMachine &TM,
                                     const MCSymbol *Sym) const;
-  
+
+  /// emitModuleFlags - Emit the module flags that the platform cares about.
+  virtual void emitModuleFlags(MCStreamer &,
+                               ArrayRef<Module::ModuleFlagEntry>,
+                               Mangler *, const TargetMachine &) const {
+  }
+
   /// shouldEmitUsedDirectiveFor - This hook allows targets to selectively
   /// decide not to emit the UsedDirective for some symbols in llvm.used.
   /// FIXME: REMOVE this (rdar://7071300)
@@ -86,9 +91,7 @@ public:
                                     const TargetMachine &TM) const {
     return SectionForGlobal(GV, getKindForGlobal(GV, TM), Mang, TM);
   }
-  
-  
-  
+
   /// getExplicitSectionGlobal - Targets should implement this method to assign
   /// a section to globals with an explicit section specfied.  The
   /// implementation of this method can assume that GV->hasSection() is true.
@@ -121,7 +124,18 @@ public:
   const MCExpr *
   getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding,
                            MCStreamer &Streamer) const;
-  
+
+  virtual const MCSection *
+  getStaticCtorSection(unsigned Priority = 65535) const {
+    (void)Priority;
+    return StaticCtorSection;
+  }
+  virtual const MCSection *
+  getStaticDtorSection(unsigned Priority = 65535) const {
+    (void)Priority;
+    return StaticDtorSection;
+  }
+
 protected:
   virtual const MCSection *
   SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 8a8d14229055..1a0560478a41 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -14,7 +14,8 @@
 #ifndef LLVM_TARGET_TARGETMACHINE_H
 #define LLVM_TARGET_TARGETMACHINE_H
 
-#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/StringRef.h"
 #include <cassert>
 #include <string>
@@ -23,11 +24,10 @@ namespace llvm {
 
 class InstrItineraryData;
 class JITCodeEmitter;
+class GlobalValue;
 class MCAsmInfo;
 class MCCodeGenInfo;
 class MCContext;
-class Pass;
-class PassManager;
 class PassManagerBase;
 class Target;
 class TargetData;
@@ -37,32 +37,13 @@ class TargetInstrInfo;
 class TargetIntrinsicInfo;
 class TargetJITInfo;
 class TargetLowering;
+class TargetPassConfig;
 class TargetRegisterInfo;
 class TargetSelectionDAGInfo;
 class TargetSubtargetInfo;
 class formatted_raw_ostream;
 class raw_ostream;
 
-// Code generation optimization level.
-namespace CodeGenOpt {
-  enum Level {
-    None,        // -O0
-    Less,        // -O1
-    Default,     // -O2, -Os
-    Aggressive   // -O3
-  };
-}
-
-namespace Sched {
-  enum Preference {
-    None,             // No preference
-    Latency,          // Scheduling for shortest total latency.
-    RegPressure,      // Scheduling for lowest register pressure.
-    Hybrid,           // Scheduling for both latency and register pressure.
-    ILP               // Scheduling for ILP in low register pressure mode.
-  };
-}
-
 //===----------------------------------------------------------------------===//
 ///
 /// TargetMachine - Primary interface to the complete machine description for
@@ -74,7 +55,7 @@ class TargetMachine {
   void operator=(const TargetMachine &);  // DO NOT IMPLEMENT
 protected: // Can only create subclasses.
   TargetMachine(const Target &T, StringRef TargetTriple,
-                StringRef CPU, StringRef FS);
+                StringRef CPU, StringRef FS, const TargetOptions &Options);
 
   /// getSubtargetImpl - virtual method implemented by subclasses that returns
   /// a reference to that target's TargetSubtargetInfo-derived member variable.
@@ -101,6 +82,7 @@ protected: // Can only create subclasses.
   unsigned MCSaveTempLabels : 1;
   unsigned MCUseLoc : 1;
   unsigned MCUseCFI : 1;
+  unsigned MCUseDwarfDirectory : 1;
 
 public:
   virtual ~TargetMachine();
@@ -111,6 +93,8 @@ public:
   const StringRef getTargetCPU() const { return TargetCPU; }
   const StringRef getTargetFeatureString() const { return TargetFS; }
 
+  TargetOptions Options;
+
   // Interfaces to the major aspects of target machine information:
   // -- Instruction opcode and operand information
   // -- Pipelines and scheduling information
@@ -196,6 +180,14 @@ public:
   /// setMCUseCFI - Set whether all we should use dwarf's .cfi_* directives.
   void setMCUseCFI(bool Value) { MCUseCFI = Value; }
 
+  /// hasMCUseDwarfDirectory - Check whether we should use .file directives with
+  /// explicit directories.
+  bool hasMCUseDwarfDirectory() const { return MCUseDwarfDirectory; }
+
+  /// setMCUseDwarfDirectory - Set whether all we should use .file directives
+  /// with explicit directories.
+  void setMCUseDwarfDirectory(bool Value) { MCUseDwarfDirectory = Value; }
+
   /// getRelocationModel - Returns the code generation relocation model. The
   /// choices are static, PIC, and dynamic-no-pic, and target default.
   Reloc::Model getRelocationModel() const;
@@ -204,6 +196,18 @@ public:
   /// medium, large, and target default.
   CodeModel::Model getCodeModel() const;
 
+  /// getTLSModel - Returns the TLS model which should be used for the given
+  /// global variable.
+  TLSModel::Model getTLSModel(const GlobalValue *GV) const;
+
+  /// getOptLevel - Returns the optimization level: None, Less,
+  /// Default, or Aggressive.
+  CodeGenOpt::Level getOptLevel() const;
+
+  void setFastISel(bool Enable) { Options.EnableFastISel = Enable; }
+
+  bool shouldPrintMachineCode() const { return Options.PrintMachineCode; }
+
   /// getAsmVerbosityDefault - Returns the default value of asm verbosity.
   ///
   static bool getAsmVerbosityDefault();
@@ -236,10 +240,6 @@ public:
     CGFT_Null         // Do not emit any output.
   };
 
-  /// getEnableTailMergeDefault - the default setting for -enable-tail-merge
-  /// on this target.  User flag overrides.
-  virtual bool getEnableTailMergeDefault() const { return true; }
-
   /// addPassesToEmitFile - Add passes to the specified pass manager to get the
   /// specified file emitted.  Typically this will involve several steps of code
   /// generation.  This method should return true if emission of this file type
@@ -247,8 +247,7 @@ public:
   virtual bool addPassesToEmitFile(PassManagerBase &,
                                    formatted_raw_ostream &,
                                    CodeGenFileType,
-                                   CodeGenOpt::Level,
-                                   bool = true) {
+                                   bool /*DisableVerify*/ = true) {
     return true;
   }
 
@@ -260,8 +259,7 @@ public:
   ///
   virtual bool addPassesToEmitMachineCode(PassManagerBase &,
                                           JITCodeEmitter &,
-                                          CodeGenOpt::Level,
-                                          bool = true) {
+                                          bool /*DisableVerify*/ = true) {
     return true;
   }
 
@@ -273,8 +271,7 @@ public:
   virtual bool addPassesToEmitMC(PassManagerBase &,
                                  MCContext *&,
                                  raw_ostream &,
-                                 CodeGenOpt::Level,
-                                 bool = true) {
+                                 bool /*DisableVerify*/ = true) {
     return true;
   }
 };
@@ -285,25 +282,21 @@ public:
 class LLVMTargetMachine : public TargetMachine {
 protected: // Can only create subclasses.
   LLVMTargetMachine(const Target &T, StringRef TargetTriple,
-                    StringRef CPU, StringRef FS,
-                    Reloc::Model RM, CodeModel::Model CM);
-
-private:
-  /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for
-  /// both emitting to assembly files or machine code output.
-  ///
-  bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
-                              bool DisableVerify, MCContext *&OutCtx);
+                    StringRef CPU, StringRef FS, TargetOptions Options,
+                    Reloc::Model RM, CodeModel::Model CM,
+                    CodeGenOpt::Level OL);
 
 public:
+  /// createPassConfig - Create a pass configuration object to be used by
+  /// addPassToEmitX methods for generating a pipeline of CodeGen passes.
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
   /// addPassesToEmitFile - Add passes to the specified pass manager to get the
   /// specified file emitted.  Typically this will involve several steps of code
-  /// generation.  If OptLevel is None, the code generator should emit code as
-  /// fast as possible, though the generated code may be less efficient.
+  /// generation.
   virtual bool addPassesToEmitFile(PassManagerBase &PM,
                                    formatted_raw_ostream &Out,
                                    CodeGenFileType FileType,
-                                   CodeGenOpt::Level,
                                    bool DisableVerify = true);
 
   /// addPassesToEmitMachineCode - Add passes to the specified pass manager to
@@ -314,7 +307,6 @@ public:
   ///
   virtual bool addPassesToEmitMachineCode(PassManagerBase &PM,
                                           JITCodeEmitter &MCE,
-                                          CodeGenOpt::Level,
                                           bool DisableVerify = true);
 
   /// addPassesToEmitMC - Add passes to the specified pass manager to get
@@ -325,65 +317,15 @@ public:
   virtual bool addPassesToEmitMC(PassManagerBase &PM,
                                  MCContext *&Ctx,
                                  raw_ostream &OS,
-                                 CodeGenOpt::Level OptLevel,
                                  bool DisableVerify = true);
 
-  /// Target-Independent Code Generator Pass Configuration Options.
-
-  /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM
-  /// passes (which are run just before instruction selector).
-  virtual bool addPreISel(PassManagerBase &, CodeGenOpt::Level) {
-    return true;
-  }
-
-  /// addInstSelector - This method should install an instruction selector pass,
-  /// which converts from LLVM code to machine instructions.
-  virtual bool addInstSelector(PassManagerBase &, CodeGenOpt::Level) {
-    return true;
-  }
-
-  /// addPreRegAlloc - This method may be implemented by targets that want to
-  /// run passes immediately before register allocation. This should return
-  /// true if -print-machineinstrs should print after these passes.
-  virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level) {
-    return false;
-  }
-
-  /// addPostRegAlloc - This method may be implemented by targets that want
-  /// to run passes after register allocation but before prolog-epilog
-  /// insertion.  This should return true if -print-machineinstrs should print
-  /// after these passes.
-  virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level) {
-    return false;
-  }
-
-  /// addPreSched2 - This method may be implemented by targets that want to
-  /// run passes after prolog-epilog insertion and before the second instruction
-  /// scheduling pass.  This should return true if -print-machineinstrs should
-  /// print after these passes.
-  virtual bool addPreSched2(PassManagerBase &, CodeGenOpt::Level) {
-    return false;
-  }
-
-  /// addPreEmitPass - This pass may be implemented by targets that want to run
-  /// passes immediately before machine code is emitted.  This should return
-  /// true if -print-machineinstrs should print out the code after the passes.
-  virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level) {
-    return false;
-  }
-
-
   /// addCodeEmitter - This pass should be overridden by the target to add a
   /// code emitter, if supported.  If this is not supported, 'true' should be
   /// returned.
-  virtual bool addCodeEmitter(PassManagerBase &, CodeGenOpt::Level,
+  virtual bool addCodeEmitter(PassManagerBase &,
                               JITCodeEmitter &) {
     return true;
   }
-
-  /// getEnableTailMergeDefault - the default setting for -enable-tail-merge
-  /// on this target.  User flag overrides.
-  virtual bool getEnableTailMergeDefault() const { return true; }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h
index 37f7b2fb8db5..f0b181e345b7 100644
--- a/include/llvm/Target/TargetOpcodes.h
+++ b/include/llvm/Target/TargetOpcodes.h
@@ -82,7 +82,12 @@ namespace TargetOpcode {
 
     /// COPY - Target-independent register copy. This instruction can also be
     /// used to copy between subregisters of virtual registers.
-    COPY = 13
+    COPY = 13,
+
+    /// BUNDLE - This instruction represents an instruction bundle. Instructions
+    /// which immediately follow a BUNDLE instruction which are marked with
+    /// 'InsideBundle' flag are inside the bundle.
+    BUNDLE
   };
 } // end namespace TargetOpcode
 } // end namespace llvm
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index e07e8c1cea08..12a275731536 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -15,151 +15,177 @@
 #ifndef LLVM_TARGET_TARGETOPTIONS_H
 #define LLVM_TARGET_TARGETOPTIONS_H
 
+#include <string>
+
 namespace llvm {
   class MachineFunction;
+  class StringRef;
 
   // Possible float ABI settings. Used with FloatABIType in TargetOptions.h.
   namespace FloatABI {
     enum ABIType {
-      Default, // Target-specific (either soft of hard depending on triple, etc).
+      Default, // Target-specific (either soft or hard depending on triple, etc).
       Soft, // Soft float.
       Hard  // Hard float.
     };
   }
-  
-  /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
-  /// option is specified on the command line, and should enable debugging
-  /// output from the code generator.
-  extern bool PrintMachineCode;
-
-  /// NoFramePointerElim - This flag is enabled when the -disable-fp-elim is
-  /// specified on the command line.  If the target supports the frame pointer
-  /// elimination optimization, this option should disable it.
-  extern bool NoFramePointerElim;
-
-  /// NoFramePointerElimNonLeaf - This flag is enabled when the
-  /// -disable-non-leaf-fp-elim is specified on the command line. If the target
-  /// supports the frame pointer elimination optimization, this option should
-  /// disable it for non-leaf functions.
-  extern bool NoFramePointerElimNonLeaf;
-
-  /// DisableFramePointerElim - This returns true if frame pointer elimination
-  /// optimization should be disabled for the given machine function.
-  extern bool DisableFramePointerElim(const MachineFunction &MF);
-
-  /// LessPreciseFPMAD - This flag is enabled when the
-  /// -enable-fp-mad is specified on the command line.  When this flag is off
-  /// (the default), the code generator is not allowed to generate mad
-  /// (multiply add) if the result is "less precise" than doing those operations
-  /// individually.
-  extern bool LessPreciseFPMADOption;
-  extern bool LessPreciseFPMAD();
-
-  /// NoExcessFPPrecision - This flag is enabled when the
-  /// -disable-excess-fp-precision flag is specified on the command line.  When
-  /// this flag is off (the default), the code generator is allowed to produce
-  /// results that are "more precise" than IEEE allows.  This includes use of
-  /// FMA-like operations and use of the X86 FP registers without rounding all
-  /// over the place.
-  extern bool NoExcessFPPrecision;
-
-  /// UnsafeFPMath - This flag is enabled when the
-  /// -enable-unsafe-fp-math flag is specified on the command line.  When
-  /// this flag is off (the default), the code generator is not allowed to
-  /// produce results that are "less precise" than IEEE allows.  This includes
-  /// use of X86 instructions like FSIN and FCOS instead of libcalls.
-  /// UnsafeFPMath implies LessPreciseFPMAD.
-  extern bool UnsafeFPMath;
-
-  /// NoInfsFPMath - This flag is enabled when the
-  /// -enable-no-infs-fp-math flag is specified on the command line. When
-  /// this flag is off (the default), the code generator is not allowed to
-  /// assume the FP arithmetic arguments and results are never +-Infs.
-  extern bool NoInfsFPMath;
-
-  /// NoNaNsFPMath - This flag is enabled when the
-  /// -enable-no-nans-fp-math flag is specified on the command line. When
-  /// this flag is off (the default), the code generator is not allowed to
-  /// assume the FP arithmetic arguments and results are never NaNs.
-  extern bool NoNaNsFPMath;
-
-  /// HonorSignDependentRoundingFPMath - This returns true when the
-  /// -enable-sign-dependent-rounding-fp-math is specified.  If this returns
-  /// false (the default), the code generator is allowed to assume that the
-  /// rounding behavior is the default (round-to-zero for all floating point to
-  /// integer conversions, and round-to-nearest for all other arithmetic
-  /// truncations).  If this is enabled (set to true), the code generator must
-  /// assume that the rounding mode may dynamically change.
-  extern bool HonorSignDependentRoundingFPMathOption;
-  extern bool HonorSignDependentRoundingFPMath();
-  
-  /// UseSoftFloat - This flag is enabled when the -soft-float flag is specified
-  /// on the command line.  When this flag is on, the code generator will
-  /// generate libcalls to the software floating point library instead of
-  /// target FP instructions.
-  extern bool UseSoftFloat;
-
-  /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
-  /// on the command line. This setting may either be Default, Soft, or Hard.
-  /// Default selects the target's default behavior. Soft selects the ABI for
-  /// UseSoftFloat, but does not inidcate that FP hardware may not be used.
-  /// Such a combination is unfortunately popular (e.g. arm-apple-darwin).
-  /// Hard presumes that the normal FP ABI is used.
-  extern FloatABI::ABIType FloatABIType;
-
-  /// NoZerosInBSS - By default some codegens place zero-initialized data to
-  /// .bss section. This flag disables such behaviour (necessary, e.g. for
-  /// crt*.o compiling).
-  extern bool NoZerosInBSS;
-
-  /// JITExceptionHandling - This flag indicates that the JIT should emit
-  /// exception handling information.
-  extern bool JITExceptionHandling;
-
-  /// JITEmitDebugInfo - This flag indicates that the JIT should try to emit
-  /// debug information and notify a debugger about it.
-  extern bool JITEmitDebugInfo;
-
-  /// JITEmitDebugInfoToDisk - This flag indicates that the JIT should write
-  /// the object files generated by the JITEmitDebugInfo flag to disk.  This
-  /// flag is hidden and is only for debugging the debug info.
-  extern bool JITEmitDebugInfoToDisk;
-
-  /// GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is
-  /// specified on the commandline. When the flag is on, participating targets
-  /// will perform tail call optimization on all calls which use the fastcc
-  /// calling convention and which satisfy certain target-independent
-  /// criteria (being at the end of a function, having the same return type
-  /// as their parent function, etc.), using an alternate ABI if necessary.
-  extern bool GuaranteedTailCallOpt;
-
-  /// StackAlignmentOverride - Override default stack alignment for target.
-  extern unsigned StackAlignmentOverride;
-
-  /// RealignStack - This flag indicates whether the stack should be
-  /// automatically realigned, if needed.
-  extern bool RealignStack;
-
-  /// DisableJumpTables - This flag indicates jump tables should not be 
-  /// generated.
-  extern bool DisableJumpTables;
-
-  /// EnableFastISel - This flag enables fast-path instruction selection
-  /// which trades away generated code quality in favor of reducing
-  /// compile time.
-  extern bool EnableFastISel;
-  
-  /// StrongPHIElim - This flag enables more aggressive PHI elimination
-  /// wth earlier copy coalescing.
-  extern bool StrongPHIElim;
-
-  /// getTrapFunctionName - If this returns a non-empty string, this means isel
-  /// should lower Intrinsic::trap to a call to the specified function name
-  /// instead of an ISD::TRAP node.
-  extern StringRef getTrapFunctionName();
-
-  extern bool EnableSegmentedStacks;
 
+  class TargetOptions {
+  public:
+    TargetOptions()
+        : PrintMachineCode(false), NoFramePointerElim(false),
+          NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false),
+          NoExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),
+          NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false),
+          UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false),
+          JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false),
+          GuaranteedTailCallOpt(false), DisableTailCalls(false),
+          StackAlignmentOverride(0), RealignStack(true),
+          DisableJumpTables(false), EnableFastISel(false),
+          PositionIndependentExecutable(false), EnableSegmentedStacks(false),
+          TrapFuncName(""), FloatABIType(FloatABI::Default)
+    {}
+
+    /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
+    /// option is specified on the command line, and should enable debugging
+    /// output from the code generator.
+    unsigned PrintMachineCode : 1;
+
+    /// NoFramePointerElim - This flag is enabled when the -disable-fp-elim is
+    /// specified on the command line.  If the target supports the frame pointer
+    /// elimination optimization, this option should disable it.
+    unsigned NoFramePointerElim : 1;
+
+    /// NoFramePointerElimNonLeaf - This flag is enabled when the
+    /// -disable-non-leaf-fp-elim is specified on the command line. If the
+    /// target supports the frame pointer elimination optimization, this option
+    /// should disable it for non-leaf functions.
+    unsigned NoFramePointerElimNonLeaf : 1;
+
+    /// DisableFramePointerElim - This returns true if frame pointer elimination
+    /// optimization should be disabled for the given machine function.
+    bool DisableFramePointerElim(const MachineFunction &MF) const;
+
+    /// LessPreciseFPMAD - This flag is enabled when the
+    /// -enable-fp-mad is specified on the command line.  When this flag is off
+    /// (the default), the code generator is not allowed to generate mad
+    /// (multiply add) if the result is "less precise" than doing those
+    /// operations individually.
+    unsigned LessPreciseFPMADOption : 1;
+    bool LessPreciseFPMAD() const;
+
+    /// NoExcessFPPrecision - This flag is enabled when the
+    /// -disable-excess-fp-precision flag is specified on the command line.
+    /// When this flag is off (the default), the code generator is allowed to
+    /// produce results that are "more precise" than IEEE allows.  This includes
+    /// use of FMA-like operations and use of the X86 FP registers without
+    /// rounding all over the place.
+    unsigned NoExcessFPPrecision : 1;
+
+    /// UnsafeFPMath - This flag is enabled when the
+    /// -enable-unsafe-fp-math flag is specified on the command line.  When
+    /// this flag is off (the default), the code generator is not allowed to
+    /// produce results that are "less precise" than IEEE allows.  This includes
+    /// use of X86 instructions like FSIN and FCOS instead of libcalls.
+    /// UnsafeFPMath implies LessPreciseFPMAD.
+    unsigned UnsafeFPMath : 1;
+
+    /// NoInfsFPMath - This flag is enabled when the
+    /// -enable-no-infs-fp-math flag is specified on the command line. When
+    /// this flag is off (the default), the code generator is not allowed to
+    /// assume the FP arithmetic arguments and results are never +-Infs.
+    unsigned NoInfsFPMath : 1;
+
+    /// NoNaNsFPMath - This flag is enabled when the
+    /// -enable-no-nans-fp-math flag is specified on the command line. When
+    /// this flag is off (the default), the code generator is not allowed to
+    /// assume the FP arithmetic arguments and results are never NaNs.
+    unsigned NoNaNsFPMath : 1;
+
+    /// HonorSignDependentRoundingFPMath - This returns true when the
+    /// -enable-sign-dependent-rounding-fp-math is specified.  If this returns
+    /// false (the default), the code generator is allowed to assume that the
+    /// rounding behavior is the default (round-to-zero for all floating point
+    /// to integer conversions, and round-to-nearest for all other arithmetic
+    /// truncations).  If this is enabled (set to true), the code generator must
+    /// assume that the rounding mode may dynamically change.
+    unsigned HonorSignDependentRoundingFPMathOption : 1;
+    bool HonorSignDependentRoundingFPMath() const;
+
+    /// UseSoftFloat - This flag is enabled when the -soft-float flag is
+    /// specified on the command line.  When this flag is on, the code generator
+    /// will generate libcalls to the software floating point library instead of
+    /// target FP instructions.
+    unsigned UseSoftFloat : 1;
+
+    /// NoZerosInBSS - By default some codegens place zero-initialized data to
+    /// .bss section. This flag disables such behaviour (necessary, e.g. for
+    /// crt*.o compiling).
+    unsigned NoZerosInBSS : 1;
+
+    /// JITExceptionHandling - This flag indicates that the JIT should emit
+    /// exception handling information.
+    unsigned JITExceptionHandling : 1;
+
+    /// JITEmitDebugInfo - This flag indicates that the JIT should try to emit
+    /// debug information and notify a debugger about it.
+    unsigned JITEmitDebugInfo : 1;
+
+    /// JITEmitDebugInfoToDisk - This flag indicates that the JIT should write
+    /// the object files generated by the JITEmitDebugInfo flag to disk.  This
+    /// flag is hidden and is only for debugging the debug info.
+    unsigned JITEmitDebugInfoToDisk : 1;
+
+    /// GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is
+    /// specified on the commandline. When the flag is on, participating targets
+    /// will perform tail call optimization on all calls which use the fastcc
+    /// calling convention and which satisfy certain target-independent
+    /// criteria (being at the end of a function, having the same return type
+    /// as their parent function, etc.), using an alternate ABI if necessary.
+    unsigned GuaranteedTailCallOpt : 1;
+
+    /// DisableTailCalls - This flag controls whether we will use tail calls.
+    /// Disabling them may be useful to maintain a correct call stack.
+    unsigned DisableTailCalls : 1;
+
+    /// StackAlignmentOverride - Override default stack alignment for target.
+    unsigned StackAlignmentOverride;
+
+    /// RealignStack - This flag indicates whether the stack should be
+    /// automatically realigned, if needed.
+    unsigned RealignStack : 1;
+
+    /// DisableJumpTables - This flag indicates jump tables should not be
+    /// generated.
+    unsigned DisableJumpTables : 1;
+
+    /// EnableFastISel - This flag enables fast-path instruction selection
+    /// which trades away generated code quality in favor of reducing
+    /// compile time.
+    unsigned EnableFastISel : 1;
+
+    /// PositionIndependentExecutable - This flag indicates whether the code
+    /// will eventually be linked into a single executable, despite the PIC
+    /// relocation model being in use. It's value is undefined (and irrelevant)
+    /// if the relocation model is anything other than PIC.
+    unsigned PositionIndependentExecutable : 1;
+
+    unsigned EnableSegmentedStacks : 1;
+
+    /// getTrapFunctionName - If this returns a non-empty string, this means
+    /// isel should lower Intrinsic::trap to a call to the specified function
+    /// name instead of an ISD::TRAP node.
+    std::string TrapFuncName;
+    StringRef getTrapFunctionName() const;
+
+    /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
+    /// on the command line. This setting may either be Default, Soft, or Hard.
+    /// Default selects the target's default behavior. Soft selects the ABI for
+    /// UseSoftFloat, but does not indicate that FP hardware may not be used.
+    /// Such a combination is unfortunately popular (e.g. arm-apple-darwin).
+    /// Hard presumes that the normal FP ABI is used.
+    FloatABI::ABIType FloatABIType;
+  };
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index 682aa50736db..7d8a46b49ac9 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/CallingConv.h"
 #include <cassert>
 #include <functional>
 
@@ -33,25 +34,18 @@ class raw_ostream;
 
 class TargetRegisterClass {
 public:
-  typedef const unsigned* iterator;
-  typedef const unsigned* const_iterator;
-  typedef const EVT* vt_iterator;
+  typedef const uint16_t* iterator;
+  typedef const uint16_t* const_iterator;
+  typedef const MVT::SimpleValueType* vt_iterator;
   typedef const TargetRegisterClass* const * sc_iterator;
-private:
+
+  // Instance variables filled by tablegen, do not use!
   const MCRegisterClass *MC;
   const vt_iterator VTs;
   const unsigned *SubClassMask;
   const sc_iterator SuperClasses;
   const sc_iterator SuperRegClasses;
-public:
-  TargetRegisterClass(const MCRegisterClass *MC, const EVT *vts,
-                      const unsigned *subcm,
-                      const TargetRegisterClass * const *supcs,
-                      const TargetRegisterClass * const *superregcs)
-    : MC(MC), VTs(vts), SubClassMask(subcm), SuperClasses(supcs),
-      SuperRegClasses(superregcs) {}
-
-  virtual ~TargetRegisterClass() {}     // Allow subclasses
+  ArrayRef<uint16_t> (*OrderFunc)(const MachineFunction&);
 
   /// getID() - Return the register class ID number.
   ///
@@ -108,7 +102,7 @@ public:
   ///
   bool hasType(EVT vt) const {
     for(int i = 0; VTs[i] != MVT::Other; ++i)
-      if (VTs[i] == vt)
+      if (EVT(VTs[i]) == vt)
         return true;
     return false;
   }
@@ -165,7 +159,7 @@ public:
   /// getSubClassMask - Returns a bit vector of subclasses, including this one.
   /// The vector is indexed by class IDs, see hasSubClassEq() above for how to
   /// use it.
-  const unsigned *getSubClassMask() const {
+  const uint32_t *getSubClassMask() const {
     return SubClassMask;
   }
 
@@ -196,9 +190,8 @@ public:
   ///
   /// By default, this method returns all registers in the class.
   ///
-  virtual
-  ArrayRef<unsigned> getRawAllocationOrder(const MachineFunction &MF) const {
-    return makeArrayRef(begin(), getNumRegs());
+  ArrayRef<uint16_t> getRawAllocationOrder(const MachineFunction &MF) const {
+    return OrderFunc ? OrderFunc(MF) : makeArrayRef(begin(), getNumRegs());
   }
 };
 
@@ -209,6 +202,13 @@ struct TargetRegisterInfoDesc {
   bool inAllocatableClass;      // Register belongs to an allocatable regclass.
 };
 
+/// Each TargetRegisterClass has a per register weight, and weight
+/// limit which must be less than the limits of its pressure sets.
+struct RegClassWeight {
+  unsigned RegWeigt;
+  unsigned WeightLimit;
+};
+
 /// TargetRegisterInfo base class - We assume that the target defines a static
 /// array of TargetRegisterDesc objects that represent all of the machine
 /// registers that the target has.  As such, we simply have to track a pointer
@@ -332,7 +332,7 @@ public:
     if (regA == regB) return true;
     if (isVirtualRegister(regA) || isVirtualRegister(regB))
       return false;
-    for (const unsigned *regList = getOverlaps(regA)+1; *regList; ++regList) {
+    for (const uint16_t *regList = getOverlaps(regA)+1; *regList; ++regList) {
       if (*regList == regB) return true;
     }
     return false;
@@ -347,7 +347,7 @@ public:
   /// isSuperRegister - Returns true if regB is a super-register of regA.
   ///
   bool isSuperRegister(unsigned regA, unsigned regB) const {
-    for (const unsigned *regList = getSuperRegisters(regA); *regList;++regList){
+    for (const uint16_t *regList = getSuperRegisters(regA); *regList;++regList){
       if (*regList == regB) return true;
     }
     return false;
@@ -356,10 +356,33 @@ public:
   /// getCalleeSavedRegs - Return a null-terminated list of all of the
   /// callee saved registers on this target. The register should be in the
   /// order of desired callee-save stack frame offset. The first register is
-  /// closed to the incoming stack pointer if stack grows down, and vice versa.
-  virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF = 0)
+  /// closest to the incoming stack pointer if stack grows down, and vice versa.
+  ///
+  virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF = 0)
                                                                       const = 0;
 
+  /// getCallPreservedMask - Return a mask of call-preserved registers for the
+  /// given calling convention on the current sub-target.  The mask should
+  /// include all call-preserved aliases.  This is used by the register
+  /// allocator to determine which registers can be live across a call.
+  ///
+  /// The mask is an array containing (TRI::getNumRegs()+31)/32 entries.
+  /// A set bit indicates that all bits of the corresponding register are
+  /// preserved across the function call.  The bit mask is expected to be
+  /// sub-register complete, i.e. if A is preserved, so are all its
+  /// sub-registers.
+  ///
+  /// Bits are numbered from the LSB, so the bit for physical register Reg can
+  /// be found as (Mask[Reg / 32] >> Reg % 32) & 1.
+  ///
+  /// A NULL pointer means that no register mask will be used, and call
+  /// instructions should use implicit-def operands to indicate call clobbered
+  /// registers.
+  ///
+  virtual const uint32_t *getCallPreservedMask(CallingConv::ID) const {
+    // The default mask clobbers everything.  All targets should override.
+    return 0;
+  }
 
   /// getReservedRegs - Returns a bitset indexed by physical register number
   /// indicating if a register is a special register that has particular uses
@@ -367,24 +390,11 @@ public:
   /// used by register scavenger to determine what registers are free.
   virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
 
-  /// getSubReg - Returns the physical register number of sub-register "Index"
-  /// for physical register RegNo. Return zero if the sub-register does not
-  /// exist.
-  virtual unsigned getSubReg(unsigned RegNo, unsigned Index) const = 0;
-
-  /// getSubRegIndex - For a given register pair, return the sub-register index
-  /// if the second register is a sub-register of the first. Return zero
-  /// otherwise.
-  virtual unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const = 0;
-
   /// getMatchingSuperReg - Return a super-register of the specified register
   /// Reg so its sub-register of index SubIdx is Reg.
   unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
                                const TargetRegisterClass *RC) const {
-    for (const unsigned *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs)
-      if (Reg == getSubReg(SR, SubIdx) && RC->contains(SR))
-        return SR;
-    return 0;
+    return MCRegisterInfo::getMatchingSuperReg(Reg, SubIdx, RC->MC);
   }
 
   /// canCombineSubRegIndices - Given a register class and a list of
@@ -402,11 +412,11 @@ public:
   /// getMatchingSuperRegClass - Return a subclass of the specified register
   /// class A so that each register in it has a sub-register of the
   /// specified sub-register index which is in the specified register class B.
+  ///
+  /// TableGen will synthesize missing A sub-classes.
   virtual const TargetRegisterClass *
   getMatchingSuperRegClass(const TargetRegisterClass *A,
-                           const TargetRegisterClass *B, unsigned Idx) const {
-    return 0;
-  }
+                           const TargetRegisterClass *B, unsigned Idx) const =0;
 
   /// getSubClassWithSubReg - Returns the largest legal sub-class of RC that
   /// supports the sub-register index Idx.
@@ -419,6 +429,7 @@ public:
   /// supported by the full GR32 register class in 64-bit mode, but only by the
   /// GR32_ABCD regiister class in 32-bit mode.
   ///
+  /// TableGen will synthesize missing RC sub-classes.
   virtual const TargetRegisterClass *
   getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx) const =0;
 
@@ -469,8 +480,7 @@ public:
   /// values.  If a target supports multiple different pointer register classes,
   /// kind specifies which one is indicated.
   virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const {
-    assert(0 && "Target didn't implement getPointerRegClass!");
-    return 0; // Must return a value in order to compile with VS 2005
+    llvm_unreachable("Target didn't implement getPointerRegClass!");
   }
 
   /// getCrossCopyRegClass - Returns a legal register class to copy a register
@@ -497,18 +507,37 @@ public:
   /// getRegPressureLimit - Return the register pressure "high water mark" for
   /// the specific register class. The scheduler is in high register pressure
   /// mode (for the specific register class) if it goes over the limit.
+  ///
+  /// Note: this is the old register pressure model that relies on a manually
+  /// specified representative register class per value type.
   virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                        MachineFunction &MF) const {
     return 0;
   }
 
+  /// Get the weight in units of pressure for this register class.
+  virtual const RegClassWeight &getRegClassWeight(
+    const TargetRegisterClass *RC) const = 0;
+
+  /// Get the number of dimensions of register pressure.
+  virtual unsigned getNumRegPressureSets() const = 0;
+
+  /// Get the register unit pressure limit for this dimension.
+  /// This limit must be adjusted dynamically for reserved registers.
+  virtual unsigned getRegPressureSetLimit(unsigned Idx) const = 0;
+
+  /// Get the dimensions of register pressure impacted by this register class.
+  /// Returns a -1 terminated array of pressure set IDs.
+  virtual const int *getRegClassPressureSets(
+    const TargetRegisterClass *RC) const = 0;
+
   /// getRawAllocationOrder - Returns the register allocation order for a
   /// specified register class with a target-dependent hint. The returned list
   /// may contain reserved registers that cannot be allocated.
   ///
   /// Register allocators need only call this function to resolve
   /// target-dependent hints, but it should work without hinting as well.
-  virtual ArrayRef<unsigned>
+  virtual ArrayRef<uint16_t>
   getRawAllocationOrder(const TargetRegisterClass *RC,
                         unsigned HintType, unsigned HintReg,
                         const MachineFunction &MF) const {
@@ -607,22 +636,22 @@ public:
   virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB,
                                             unsigned BaseReg, int FrameIdx,
                                             int64_t Offset) const {
-    assert(0 && "materializeFrameBaseRegister does not exist on this target");
+    llvm_unreachable("materializeFrameBaseRegister does not exist on this "
+                     "target");
   }
 
   /// resolveFrameIndex - Resolve a frame index operand of an instruction
   /// to reference the indicated base register plus offset instead.
   virtual void resolveFrameIndex(MachineBasicBlock::iterator I,
                                  unsigned BaseReg, int64_t Offset) const {
-    assert(0 && "resolveFrameIndex does not exist on this target");
+    llvm_unreachable("resolveFrameIndex does not exist on this target");
   }
 
   /// isFrameOffsetLegal - Determine whether a given offset immediate is
   /// encodable to resolve a frame index.
   virtual bool isFrameOffsetLegal(const MachineInstr *MI,
                                   int64_t Offset) const {
-    assert(0 && "isFrameOffsetLegal does not exist on this target");
-    return false; // Must return a value in order to compile with VS 2005
+    llvm_unreachable("isFrameOffsetLegal does not exist on this target");
   }
 
   /// eliminateCallFramePseudoInstr - This method is called during prolog/epilog
@@ -636,7 +665,8 @@ public:
   eliminateCallFramePseudoInstr(MachineFunction &MF,
                                 MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MI) const {
-    assert(0 && "Call Frame Pseudo Instructions do not exist on this target!");
+    llvm_unreachable("Call Frame Pseudo Instructions do not exist on this "
+                     "target!");
   }
 
 
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 612635ea746d..f55cf0e6306c 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -352,6 +352,8 @@ def bswap      : SDNode<"ISD::BSWAP"      , SDTIntUnaryOp>;
 def ctlz       : SDNode<"ISD::CTLZ"       , SDTIntUnaryOp>;
 def cttz       : SDNode<"ISD::CTTZ"       , SDTIntUnaryOp>;
 def ctpop      : SDNode<"ISD::CTPOP"      , SDTIntUnaryOp>;
+def ctlz_zero_undef : SDNode<"ISD::CTLZ_ZERO_UNDEF", SDTIntUnaryOp>;
+def cttz_zero_undef : SDNode<"ISD::CTTZ_ZERO_UNDEF", SDTIntUnaryOp>;
 def sext       : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>;
 def zext       : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>;
 def anyext     : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
@@ -655,6 +657,51 @@ def zextloadi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
   return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
 }]>;
 
+def extloadvi1  : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i1;
+}]>;
+def extloadvi8  : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def extloadvi16 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def extloadvi32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+def extloadvf32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::f32;
+}]>;
+def extloadvf64 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::f64;
+}]>;
+
+def sextloadvi1  : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i1;
+}]>;
+def sextloadvi8  : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def sextloadvi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def sextloadvi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def zextloadvi1  : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i1;
+}]>;
+def zextloadvi8  : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def zextloadvi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def zextloadvi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
 // store fragments.
 def unindexedstore : PatFrag<(ops node:$val, node:$ptr),
                              (st node:$val, node:$ptr), [{
diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h
index 9556c7ab5abe..fc23b2c6b58d 100644
--- a/include/llvm/Target/TargetSubtargetInfo.h
+++ b/include/llvm/Target/TargetSubtargetInfo.h
@@ -15,7 +15,7 @@
 #define LLVM_TARGET_TARGETSUBTARGETINFO_H
 
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CodeGen.h"
 
 namespace llvm {
 
@@ -39,7 +39,7 @@ public:
   // AntiDepBreakMode - Type of anti-dependence breaking that should
   // be performed before post-RA scheduling.
   typedef enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode;
-  typedef SmallVectorImpl<TargetRegisterClass*> RegClassVector;
+  typedef SmallVectorImpl<const TargetRegisterClass*> RegClassVector;
 
   virtual ~TargetSubtargetInfo();
 
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index f9d7f9e6b98a..18176e8fdbb1 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -94,6 +94,7 @@ Pass *createFunctionInliningPass(int Threshold);
 /// createAlwaysInlinerPass - Return a new pass object that inlines only 
 /// functions that are marked as "always_inline".
 Pass *createAlwaysInlinerPass();
+Pass *createAlwaysInlinerPass(bool InsertLifetime);
 
 //===----------------------------------------------------------------------===//
 /// createPruneEHPass - Return a new pass object which transforms invoke
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index 3ac4c591c94f..7c3cfc870156 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -31,7 +31,7 @@ namespace llvm {
 ///
 struct Inliner : public CallGraphSCCPass {
   explicit Inliner(char &ID);
-  explicit Inliner(char &ID, int Threshold);
+  explicit Inliner(char &ID, int Threshold, bool InsertLifetime);
 
   /// getAnalysisUsage - For this class, we declare that we require and preserve
   /// the call graph.  If the derived class implements this method, it should
@@ -65,28 +65,21 @@ struct Inliner : public CallGraphSCCPass {
   ///
   virtual InlineCost getInlineCost(CallSite CS) = 0;
 
-  // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
-  // higher threshold to determine if the function call should be inlined.
+  /// removeDeadFunctions - Remove dead functions.
   ///
-  virtual float getInlineFudgeFactor(CallSite CS) = 0;
+  /// This also includes a hack in the form of the 'AlwaysInlineOnly' flag
+  /// which restricts it to deleting functions with an 'AlwaysInline'
+  /// attribute. This is useful for the InlineAlways pass that only wants to
+  /// deal with that subset of the functions.
+  bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false);
 
-  /// resetCachedCostInfo - erase any cached cost data from the derived class.
-  /// If the derived class has no such data this can be empty.
-  /// 
-  virtual void resetCachedCostInfo(Function* Caller) = 0;
-
-  /// growCachedCostInfo - update the cached cost info for Caller after Callee
-  /// has been inlined.
-  virtual void growCachedCostInfo(Function *Caller, Function *Callee) = 0;
-
-  /// removeDeadFunctions - Remove dead functions that are not included in
-  /// DNR (Do Not Remove) list.
-  bool removeDeadFunctions(CallGraph &CG, 
-                           SmallPtrSet<const Function *, 16> *DNR = NULL);
 private:
   // InlineThreshold - Cache the value here for easy access.
   unsigned InlineThreshold;
 
+  // InsertLifetime - Insert @llvm.lifetime intrinsics.
+  bool InsertLifetime;
+
   /// shouldInline - Return true if the inliner should attempt to
   /// inline at the given CallSite.
   bool shouldInline(CallSite CS);
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index cc74e7fefe16..47ce90265bd5 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -60,6 +60,10 @@ public:
     /// out of the frontend.
     EP_EarlyAsPossible,
 
+    /// EP_ModuleOptimizerEarly - This extension point allows adding passes
+    /// just before the main module-level optimization passes.
+    EP_ModuleOptimizerEarly,
+
     /// EP_LoopOptimizerEnd - This extension point allows adding loop passes to
     /// the end of the loop optimizer.
     EP_LoopOptimizerEnd,
@@ -67,7 +71,16 @@ public:
     /// EP_ScalarOptimizerLate - This extension point allows adding optimization
     /// passes after most of the main optimizations, but before the last
     /// cleanup-ish optimizations.
-    EP_ScalarOptimizerLate
+    EP_ScalarOptimizerLate,
+
+    /// EP_OptimizerLast -- This extension point allows adding passes that
+    /// run after everything else.
+    EP_OptimizerLast,
+
+    /// EP_EnabledOnOptLevel0 - This extension point allows adding passes that
+    /// should not be disabled by O0 optimization level. The passes will be
+    /// inserted after the inlining pass.
+    EP_EnabledOnOptLevel0
   };
 
   /// The Optimization Level - Specify the basic optimization level.
@@ -90,6 +103,7 @@ public:
   bool DisableSimplifyLibCalls;
   bool DisableUnitAtATime;
   bool DisableUnrollLoops;
+  bool Vectorize;
 
 private:
   /// ExtensionList - This is list of all of the extensions that are registered.
@@ -117,8 +131,9 @@ public:
   /// populateModulePassManager - This sets up the primary pass manager.
   void populateModulePassManager(PassManagerBase &MPM);
   void populateLTOPassManager(PassManagerBase &PM, bool Internalize,
-                              bool RunInliner);
+                              bool RunInliner, bool DisableGVNLoadPRE = false);
 };
+
 /// Registers a function for adding a standard set of passes.  This should be
 /// used by optimizer plugins to allow all front ends to transparently use
 /// them.  Create a static instance of this class in your plugin, providing a
@@ -129,5 +144,6 @@ struct RegisterStandardPasses {
     PassManagerBuilder::addGlobalExtension(Ty, Fn);
   }
 };
+
 } // end namespace llvm
 #endif
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 8d552317f236..bbf3a69d246d 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -17,6 +17,7 @@
 namespace llvm {
 
 class ModulePass;
+class FunctionPass;
 
 // Insert edge profiling instrumentation
 ModulePass *createEdgeProfilerPass();
@@ -29,7 +30,13 @@ ModulePass *createPathProfilerPass();
 
 // Insert GCOV profiling instrumentation
 ModulePass *createGCOVProfilerPass(bool EmitNotes = true, bool EmitData = true,
-                                   bool Use402Format = false);
+                                   bool Use402Format = false,
+                                   bool UseExtraChecksum = false);
+
+// Insert AddressSanitizer (address sanity checking) instrumentation
+ModulePass *createAddressSanitizerPass();
+// Insert ThreadSanitizer (race detection) instrumentation
+FunctionPass *createThreadSanitizerPass();
 
 } // End llvm namespace
 
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index b1536f906d8c..7f055d446171 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -112,6 +112,8 @@ Pass *createLICMPass();
 //
 Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0);
 
+Pass *createGlobalMergePass(const TargetLowering *TLI = 0);
+
 //===----------------------------------------------------------------------===//
 //
 // LoopUnswitch - This pass is a simple loop unswitching pass.
@@ -307,12 +309,6 @@ extern char &InstructionNamerID;
   
 //===----------------------------------------------------------------------===//
 //
-// GEPSplitter - Split complex GEPs into simple ones
-//
-FunctionPass *createGEPSplitterPass();
-
-//===----------------------------------------------------------------------===//
-//
 // Sink - Code Sinking
 //
 FunctionPass *createSinkingPass();
@@ -331,6 +327,12 @@ Pass *createCorrelatedValuePropagationPass();
 
 //===----------------------------------------------------------------------===//
 //
+// ObjCARCAPElim - ObjC ARC autorelease pool elimination.
+//
+Pass *createObjCARCAPElimPass();
+
+//===----------------------------------------------------------------------===//
+//
 // ObjCARCExpand - ObjC ARC preliminary simplifications.
 //
 Pass *createObjCARCExpandPass();
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 6fcd160e64e4..867b9e43849d 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -173,9 +173,8 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P);
 /// complicated to handle the case where one of the edges being split
 /// is an exit of a loop with other exits).
 ///
-BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds,
-                                   unsigned NumPreds, const char *Suffix,
-                                   Pass *P = 0);
+BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock*> Preds,
+                                   const char *Suffix, Pass *P = 0);
 
 /// SplitLandingPadPredecessors - This method transforms the landing pad,
 /// OrigBB, by introducing two new basic blocks into the function. One of those
diff --git a/include/llvm/Transforms/Utils/BasicInliner.h b/include/llvm/Transforms/Utils/BasicInliner.h
deleted file mode 100644
index 4bca6b8c4417..000000000000
--- a/include/llvm/Transforms/Utils/BasicInliner.h
+++ /dev/null
@@ -1,55 +0,0 @@
-//===- BasicInliner.h - Basic function level inliner ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a simple function based inliner that does not use
-// call graph information. 
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BASICINLINER_H
-#define BASICINLINER_H
-
-#include "llvm/Analysis/InlineCost.h"
-
-namespace llvm {
-
-  class Function;
-  class TargetData;
-  struct BasicInlinerImpl;
-
-  /// BasicInliner - BasicInliner provides function level inlining interface.
-  /// Clients provide list of functions which are inline without using
-  /// module level call graph information. Note that the BasicInliner is
-  /// free to delete a function if it is inlined into all call sites.
-  class BasicInliner {
-  public:
-    
-    explicit BasicInliner(TargetData *T = NULL);
-    ~BasicInliner();
-
-    /// addFunction - Add function into the list of functions to process.
-    /// All functions must be inserted using this interface before invoking
-    /// inlineFunctions().
-    void addFunction(Function *F);
-
-    /// neverInlineFunction - Sometimes a function is never to be inlined 
-    /// because of one or other reason. 
-    void neverInlineFunction(Function *F);
-
-    /// inlineFuctions - Walk all call sites in all functions supplied by
-    /// client. Inline as many call sites as possible. Delete completely
-    /// inlined functions.
-    void inlineFunctions();
-
-  private:
-    BasicInlinerImpl *Impl;
-  };
-}
-
-#endif
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index e82593838467..17cd58eb014e 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -20,6 +20,7 @@
 namespace llvm {
   class Value;
   class TargetData;
+  class TargetLibraryInfo;
   
   /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
   Value *CastToCStr(Value *V, IRBuilder<> &B);
@@ -68,7 +69,7 @@ namespace llvm {
   /// 'Op' and returns one value with the same type.  If 'Op' is a long double,
   /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f'
   /// suffix.
-  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B,
+  Value *EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
                               const AttrListPtr &Attrs);
 
   /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
@@ -86,12 +87,13 @@ namespace llvm {
 
   /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
   /// pointer and File is a pointer to FILE.
-  void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetData *TD);
+  void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetData *TD,
+                 const TargetLibraryInfo *TLI);
 
   /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
   /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
   void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
-                  const TargetData *TD);
+                  const TargetData *TD, const TargetLibraryInfo *TLI);
 
   /// SimplifyFortifiedLibCalls - Helper class for folding checked library
   /// calls (e.g. __strcpy_chk) into their unchecked counterparts.
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 674c2d002e85..b7b5d29b320f 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -56,21 +56,13 @@ struct ClonedCodeInfo {
   /// call instruction.
   bool ContainsCalls;
   
-  /// ContainsUnwinds - This is set to true if the cloned code contains an
-  /// unwind instruction.
-  bool ContainsUnwinds;
-  
   /// ContainsDynamicAllocas - This is set to true if the cloned code contains
   /// a 'dynamic' alloca.  Dynamic allocas are allocas that are either not in
   /// the entry block or they are in the entry block but are not a constant
   /// size.
   bool ContainsDynamicAllocas;
   
-  ClonedCodeInfo() {
-    ContainsCalls = false;
-    ContainsUnwinds = false;
-    ContainsDynamicAllocas = false;
-  }
+  ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {}
 };
 
 
@@ -134,8 +126,8 @@ inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){
 /// Clone OldFunc into NewFunc, transforming the old arguments into references
 /// to VMap values.  Note that if NewFunc already has basic blocks, the ones
 /// cloned into it will be added to the end of the function.  This function
-/// fills in a list of return instructions, and can optionally append the
-/// specified suffix to all values cloned.
+/// fills in a list of return instructions, and can optionally remap types
+/// and/or append the specified suffix to all values cloned.
 ///
 /// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue
 /// mappings.
@@ -145,7 +137,8 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                        bool ModuleLevelChanges,
                        SmallVectorImpl<ReturnInst*> &Returns,
                        const char *NameSuffix = "", 
-                       ClonedCodeInfo *CodeInfo = 0);
+                       ClonedCodeInfo *CodeInfo = 0,
+                       ValueMapTypeRemapper *TypeMapper = 0);
 
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
 /// except that it does some simple constant prop and DCE on the fly.  The
@@ -204,9 +197,9 @@ public:
 /// exists in the instruction stream.  Similarly this will inline a recursive
 /// function by one level.
 ///
-bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI);
-bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI);
-bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI);
+bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI, bool InsertLifetime = true);
+bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI, bool InsertLifetime = true);
+bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI, bool InsertLifetime = true);
 
 } // End llvm namespace
 
diff --git a/include/llvm/Transforms/Utils/CmpInstAnalysis.h b/include/llvm/Transforms/Utils/CmpInstAnalysis.h
new file mode 100644
index 000000000000..7ad7bddce503
--- /dev/null
+++ b/include/llvm/Transforms/Utils/CmpInstAnalysis.h
@@ -0,0 +1,66 @@
+//===-- CmpInstAnalysis.h - Utils to help fold compare insts ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file holds routines to help analyse compare instructions
+// and fold them into constants or other compare instructions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_CMPINSTANALYSIS_H
+#define LLVM_TRANSFORMS_UTILS_CMPINSTANALYSIS_H
+
+#include "llvm/InstrTypes.h"
+
+namespace llvm {
+  class ICmpInst;
+  class Value;
+
+  /// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
+  /// are carefully arranged to allow folding of expressions such as:
+  ///
+  ///      (A < B) | (A > B) --> (A != B)
+  ///
+  /// Note that this is only valid if the first and second predicates have the
+  /// same sign. Is illegal to do: (A u< B) | (A s> B)
+  ///
+  /// Three bits are used to represent the condition, as follows:
+  ///   0  A > B
+  ///   1  A == B
+  ///   2  A < B
+  ///
+  /// <=>  Value  Definition
+  /// 000     0   Always false
+  /// 001     1   A >  B
+  /// 010     2   A == B
+  /// 011     3   A >= B
+  /// 100     4   A <  B
+  /// 101     5   A != B
+  /// 110     6   A <= B
+  /// 111     7   Always true
+  ///
+  unsigned getICmpCode(const ICmpInst *ICI, bool InvertPred = false);
+
+  /// getICmpValue - This is the complement of getICmpCode, which turns an
+  /// opcode and two operands into either a constant true or false, or the
+  /// predicate for a new ICmp instruction. The sign is passed in to determine
+  /// which kind of predicate to use in the new icmp instruction.
+  /// Non-NULL return value will be a true or false constant.
+  /// NULL return means a new ICmp is needed.  The predicate for which is
+  /// output in NewICmpPred.
+  Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+                      CmpInst::Predicate &NewICmpPred);
+
+  /// PredicatesFoldable - Return true if both predicates match sign or if at
+  /// least one of them is an equality comparison (which is signless).
+  bool PredicatesFoldable(CmpInst::Predicate p1, CmpInst::Predicate p2);
+
+} // end namespace llvm
+
+#endif
+
diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h
new file mode 100644
index 000000000000..2c0ec9b118cf
--- /dev/null
+++ b/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -0,0 +1,33 @@
+//===-- ModuleUtils.h - Functions to manipulate Modules ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on Modules.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_MODULE_UTILS_H
+#define LLVM_TRANSFORMS_UTILS_MODULE_UTILS_H
+
+namespace llvm {
+
+class Module;
+class Function;
+
+/// Append F to the list of global ctors of module M with the given Priority.
+/// This wraps the function in the appropriate structure and stores it along
+/// side other global constructors. For details see
+/// http://llvm.org/docs/LangRef.html#intg_global_ctors
+void appendToGlobalCtors(Module &M, Function *F, int Priority);
+
+/// Same as appendToGlobalCtors(), but for global dtors.
+void appendToGlobalDtors(Module &M, Function *F, int Priority);
+
+} // End llvm namespace
+
+#endif //  LLVM_TRANSFORMS_UTILS_MODULE_UTILS_H
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index 064e5501a455..4c821491b210 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -14,16 +14,18 @@
 #ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATER_H
 #define LLVM_TRANSFORMS_UTILS_SSAUPDATER_H
 
+#include "llvm/ADT/StringRef.h"
+
 namespace llvm {
-  class Value;
   class BasicBlock;
-  class Use;
-  class PHINode;
+  class Instruction;
+  class LoadInst;
   template<typename T> class SmallVectorImpl;
   template<typename T> class SSAUpdaterTraits;
-  class DbgDeclareInst;
-  class DIBuilder;
-  class BumpPtrAllocator;
+  class PHINode;
+  class Type;
+  class Use;
+  class Value;
 
 /// SSAUpdater - This class updates SSA form for a set of values defined in
 /// multiple blocks.  This is used when code duplication or another unstructured
@@ -137,12 +139,7 @@ public:
   /// passed into the run method).  Clients should implement this with a more
   /// efficient version if possible.
   virtual bool isInstInList(Instruction *I,
-                            const SmallVectorImpl<Instruction*> &Insts) const {
-    for (unsigned i = 0, e = Insts.size(); i != e; ++i)
-      if (Insts[i] == I)
-        return true;
-    return false;
-  }
+                            const SmallVectorImpl<Instruction*> &Insts) const;
   
   /// doExtraRewritesBeforeFinalDeletion - This hook is invoked after all the
   /// stores are found and inserted as available values, but 
diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
index 5a03d224ff7c..a9adbd73c152 100644
--- a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
+++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -15,8 +15,16 @@
 #ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H
 #define LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+
 namespace llvm {
 
+class CastInst;
+class PHINode;
 template<typename T> class SSAUpdaterTraits;
 
 template<typename UpdaterT>
@@ -372,7 +380,7 @@ public:
       if (!SomePHI)
         break;
       if (CheckIfPHIMatches(SomePHI)) {
-        RecordMatchingPHI(SomePHI);
+        RecordMatchingPHIs(BlockList);
         break;
       }
       // Match failed: clear all the PHITag values.
@@ -429,38 +437,17 @@ public:
     return true;
   }
 
-  /// RecordMatchingPHI - For a PHI node that matches, record it and its input
-  /// PHIs in both the BBMap and the AvailableVals mapping.
-  void RecordMatchingPHI(PhiT *PHI) {
-    SmallVector<PhiT*, 20> WorkList;
-    WorkList.push_back(PHI);
-
-    // Record this PHI.
-    BlkT *BB = PHI->getParent();
-    ValT PHIVal = Traits::GetPHIValue(PHI);
-    (*AvailableVals)[BB] = PHIVal;
-    BBMap[BB]->AvailableVal = PHIVal;
-
-    while (!WorkList.empty()) {
-      PHI = WorkList.pop_back_val();
-
-      // Iterate through the PHI's incoming values.
-      for (typename Traits::PHI_iterator I = Traits::PHI_begin(PHI),
-             E = Traits::PHI_end(PHI); I != E; ++I) {
-        ValT IncomingVal = I.getIncomingValue();
-        PhiT *IncomingPHI = Traits::ValueIsPHI(IncomingVal, Updater);
-        if (!IncomingPHI) continue;
-        BB = IncomingPHI->getParent();
-        BBInfo *Info = BBMap[BB];
-        if (!Info || Info->AvailableVal)
-          continue;
-
-        // Record the PHI and add it to the worklist.
-        (*AvailableVals)[BB] = IncomingVal;
-        Info->AvailableVal = IncomingVal;
-        WorkList.push_back(IncomingPHI);
+  /// RecordMatchingPHIs - For each PHI node that matches, record it in both
+  /// the BBMap and the AvailableVals mapping.
+  void RecordMatchingPHIs(BlockListTy *BlockList) {
+    for (typename BlockListTy::iterator I = BlockList->begin(),
+           E = BlockList->end(); I != E; ++I)
+      if (PhiT *PHI = (*I)->PHITag) {
+        BlkT *BB = PHI->getParent();
+        ValT PHIVal = Traits::GetPHIValue(PHI);
+        (*AvailableVals)[BB] = PHIVal;
+        BBMap[BB]->AvailableVal = PHIVal;
       }
-    }
   }
 };
 
diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h
index 524cf5ad9793..2632d186ff9b 100644
--- a/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -17,21 +17,23 @@
 #define LLVM_TRANSFORMS_UTILS_SIMPLIFYINDVAR_H
 
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
 
 extern cl::opt<bool> DisableIVRewrite;
 
+class CastInst;
+class IVUsers;
 class Loop;
-class LoopInfo;
-class DominatorTree;
-class ScalarEvolution;
 class LPPassManager;
-class IVUsers;
+class PHINode;
+class ScalarEvolution;
 
 /// Interface for visiting interesting IV users that are recognized but not
 /// simplified by this utility.
 class IVVisitor {
+  virtual void anchor();
 public:
   virtual ~IVVisitor() {}
   virtual void visitCast(CastInst *Cast) = 0;
@@ -47,12 +49,6 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
 bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
                      SmallVectorImpl<WeakVH> &Dead);
 
-/// simplifyIVUsers - Simplify instructions recorded by the IVUsers pass.
-/// This is a legacy implementation to reproduce the behavior of the
-/// IndVarSimplify pass prior to DisableIVRewrite.
-bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM,
-                     SmallVectorImpl<WeakVH> &Dead);
-
 } // namespace llvm
 
 #endif
diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h
index 7212a8c76069..f175e8371e79 100644
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -22,9 +22,12 @@ class Loop;
 class LoopInfo;
 class LPPassManager;
 
-bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
+bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
                 unsigned TripMultiple, LoopInfo* LI, LPPassManager* LPM);
 
+bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+                             LPPassManager* LPM);
+
 }
 
 #endif
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index 03846567dbcc..8594707a8482 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -20,7 +20,7 @@
 namespace llvm {
   class Value;
   class Instruction;
-  typedef ValueMap<const Value *, TrackingVH<Value> > ValueToValueMapTy;
+  typedef ValueMap<const Value *, WeakVH> ValueToValueMapTy;
 
   /// ValueMapTypeRemapper - This is a class that can be implemented by clients
   /// to remap types when cloning constants and instructions.
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
new file mode 100644
index 000000000000..7701ceb4d0b3
--- /dev/null
+++ b/include/llvm/Transforms/Vectorize.h
@@ -0,0 +1,106 @@
+//===-- Vectorize.h - Vectorization Transformations -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines prototypes for accessor functions that expose passes
+// in the Vectorize transformations library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_VECTORIZE_H
+#define LLVM_TRANSFORMS_VECTORIZE_H
+
+namespace llvm {
+class BasicBlock;
+class BasicBlockPass;
+
+//===----------------------------------------------------------------------===//
+/// @brief Vectorize configuration.
+struct VectorizeConfig {
+  //===--------------------------------------------------------------------===//
+  // Target architecture related parameters
+
+  /// @brief The size of the native vector registers.
+  unsigned VectorBits;
+
+  /// @brief Vectorize integer values.
+  bool VectorizeInts;
+
+  /// @brief Vectorize floating-point values.
+  bool VectorizeFloats;
+
+  /// @brief Vectorize casting (conversion) operations.
+  bool VectorizeCasts;
+
+  /// @brief Vectorize floating-point math intrinsics.
+  bool VectorizeMath;
+
+  /// @brief Vectorize the fused-multiply-add intrinsic.
+  bool VectorizeFMA;
+
+  /// @brief Vectorize loads and stores.
+  bool VectorizeMemOps;
+
+  /// @brief Only generate aligned loads and stores.
+  bool AlignedOnly;
+
+  //===--------------------------------------------------------------------===//
+  // Misc parameters
+
+  /// @brief The required chain depth for vectorization.
+  unsigned ReqChainDepth;
+
+  /// @brief The maximum search distance for instruction pairs.
+  unsigned SearchLimit;
+
+  /// @brief The maximum number of candidate pairs with which to use a full
+  ///        cycle check.
+  unsigned MaxCandPairsForCycleCheck;
+
+  /// @brief Replicating one element to a pair breaks the chain.
+  bool SplatBreaksChain;
+
+  /// @brief The maximum number of pairable instructions per group.
+  unsigned MaxInsts;
+
+  /// @brief The maximum number of pairing iterations.
+  unsigned MaxIter;
+
+  /// @brief Don't boost the chain-depth contribution of loads and stores.
+  bool NoMemOpBoost;
+
+  /// @brief Use a fast instruction dependency analysis.
+  bool FastDep;
+
+  /// @brief Initialize the VectorizeConfig from command line options.
+  VectorizeConfig();
+};
+
+//===----------------------------------------------------------------------===//
+//
+// BBVectorize - A basic-block vectorization pass.
+//
+BasicBlockPass *
+createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
+
+//===----------------------------------------------------------------------===//
+/// @brief Vectorize the BasicBlock.
+///
+/// @param BB The BasicBlock to be vectorized
+/// @param P  The current running pass, should require AliasAnalysis and
+///           ScalarEvolution. After the vectorization, AliasAnalysis,
+///           ScalarEvolution and CFG are preserved.
+///
+/// @return True if the BB is changed, false otherwise.
+///
+bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
+                         const VectorizeConfig &C = VectorizeConfig());
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index 43b7dc578886..185258d8ff2a 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -16,6 +16,7 @@
 #define LLVM_TYPE_H
 
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
@@ -25,6 +26,7 @@ class raw_ostream;
 class Module;
 class LLVMContext;
 class LLVMContextImpl;
+class StringRef;
 template<class GraphType> struct GraphTraits;
 
 /// The instances of the Type class are immutable: once they are created,
@@ -47,23 +49,24 @@ public:
   enum TypeID {
     // PrimitiveTypes - make sure LastPrimitiveTyID stays up to date.
     VoidTyID = 0,    ///<  0: type with no size
-    FloatTyID,       ///<  1: 32-bit floating point type
-    DoubleTyID,      ///<  2: 64-bit floating point type
-    X86_FP80TyID,    ///<  3: 80-bit floating point type (X87)
-    FP128TyID,       ///<  4: 128-bit floating point type (112-bit mantissa)
-    PPC_FP128TyID,   ///<  5: 128-bit floating point type (two 64-bits, PowerPC)
-    LabelTyID,       ///<  6: Labels
-    MetadataTyID,    ///<  7: Metadata
-    X86_MMXTyID,     ///<  8: MMX vectors (64 bits, X86 specific)
+    HalfTyID,        ///<  1: 16-bit floating point type
+    FloatTyID,       ///<  2: 32-bit floating point type
+    DoubleTyID,      ///<  3: 64-bit floating point type
+    X86_FP80TyID,    ///<  4: 80-bit floating point type (X87)
+    FP128TyID,       ///<  5: 128-bit floating point type (112-bit mantissa)
+    PPC_FP128TyID,   ///<  6: 128-bit floating point type (two 64-bits, PowerPC)
+    LabelTyID,       ///<  7: Labels
+    MetadataTyID,    ///<  8: Metadata
+    X86_MMXTyID,     ///<  9: MMX vectors (64 bits, X86 specific)
 
     // Derived types... see DerivedTypes.h file.
     // Make sure FirstDerivedTyID stays up to date!
-    IntegerTyID,     ///<  9: Arbitrary bit width integers
-    FunctionTyID,    ///< 10: Functions
-    StructTyID,      ///< 11: Structures
-    ArrayTyID,       ///< 12: Arrays
-    PointerTyID,     ///< 13: Pointers
-    VectorTyID,      ///< 14: SIMD 'packed' format, or other vector type
+    IntegerTyID,     ///< 10: Arbitrary bit width integers
+    FunctionTyID,    ///< 11: Functions
+    StructTyID,      ///< 12: Structures
+    ArrayTyID,       ///< 13: Arrays
+    PointerTyID,     ///< 14: Pointers
+    VectorTyID,      ///< 15: SIMD 'packed' format, or other vector type
 
     NumTypeIDs,                         // Must remain as last defined ID
     LastPrimitiveTyID = X86_MMXTyID,
@@ -74,21 +77,32 @@ private:
   /// Context - This refers to the LLVMContext in which this type was uniqued.
   LLVMContext &Context;
 
-  TypeID   ID : 8;            // The current base type of this type.
-  unsigned SubclassData : 24; // Space for subclasses to store data
+  // Due to Ubuntu GCC bug 910363:
+  // https://bugs.launchpad.net/ubuntu/+source/gcc-4.5/+bug/910363
+  // Bitpack ID and SubclassData manually.
+  // Note: TypeID : low 8 bit; SubclassData : high 24 bit.
+  uint32_t IDAndSubclassData;
 
 protected:
   friend class LLVMContextImpl;
   explicit Type(LLVMContext &C, TypeID tid)
-    : Context(C), ID(tid), SubclassData(0),
-      NumContainedTys(0), ContainedTys(0) {}
+    : Context(C), IDAndSubclassData(0),
+      NumContainedTys(0), ContainedTys(0) {
+    setTypeID(tid);
+  }
   ~Type() {}
-
-  unsigned getSubclassData() const { return SubclassData; }
+  
+  void setTypeID(TypeID ID) {
+    IDAndSubclassData = (ID & 0xFF) | (IDAndSubclassData & 0xFFFFFF00);
+    assert(getTypeID() == ID && "TypeID data too large for field");
+  }
+  
+  unsigned getSubclassData() const { return IDAndSubclassData >> 8; }
+  
   void setSubclassData(unsigned val) {
-    SubclassData = val;
+    IDAndSubclassData = (IDAndSubclassData & 0xFF) | (val << 8);
     // Ensure we don't have any accidental truncation.
-    assert(SubclassData == val && "Subclass data too large for field");
+    assert(getSubclassData() == val && "Subclass data too large for field");
   }
 
   /// NumContainedTys - Keeps track of how many Type*'s there are in the
@@ -116,49 +130,54 @@ public:
   /// getTypeID - Return the type id for the type.  This will return one
   /// of the TypeID enum elements defined above.
   ///
-  TypeID getTypeID() const { return ID; }
+  TypeID getTypeID() const { return (TypeID)(IDAndSubclassData & 0xFF); }
 
   /// isVoidTy - Return true if this is 'void'.
-  bool isVoidTy() const { return ID == VoidTyID; }
+  bool isVoidTy() const { return getTypeID() == VoidTyID; }
+
+  /// isHalfTy - Return true if this is 'half', a 16-bit IEEE fp type.
+  bool isHalfTy() const { return getTypeID() == HalfTyID; }
 
   /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
-  bool isFloatTy() const { return ID == FloatTyID; }
+  bool isFloatTy() const { return getTypeID() == FloatTyID; }
   
   /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
-  bool isDoubleTy() const { return ID == DoubleTyID; }
+  bool isDoubleTy() const { return getTypeID() == DoubleTyID; }
 
   /// isX86_FP80Ty - Return true if this is x86 long double.
-  bool isX86_FP80Ty() const { return ID == X86_FP80TyID; }
+  bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; }
 
   /// isFP128Ty - Return true if this is 'fp128'.
-  bool isFP128Ty() const { return ID == FP128TyID; }
+  bool isFP128Ty() const { return getTypeID() == FP128TyID; }
 
   /// isPPC_FP128Ty - Return true if this is powerpc long double.
-  bool isPPC_FP128Ty() const { return ID == PPC_FP128TyID; }
+  bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; }
 
   /// isFloatingPointTy - Return true if this is one of the five floating point
   /// types
   bool isFloatingPointTy() const {
-    return ID == FloatTyID || ID == DoubleTyID ||
-      ID == X86_FP80TyID || ID == FP128TyID || ID == PPC_FP128TyID;
+    return getTypeID() == HalfTyID || getTypeID() == FloatTyID ||
+           getTypeID() == DoubleTyID ||
+           getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID ||
+           getTypeID() == PPC_FP128TyID;
   }
 
   /// isX86_MMXTy - Return true if this is X86 MMX.
-  bool isX86_MMXTy() const { return ID == X86_MMXTyID; }
+  bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
 
   /// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP.
   ///
   bool isFPOrFPVectorTy() const;
  
   /// isLabelTy - Return true if this is 'label'.
-  bool isLabelTy() const { return ID == LabelTyID; }
+  bool isLabelTy() const { return getTypeID() == LabelTyID; }
 
   /// isMetadataTy - Return true if this is 'metadata'.
-  bool isMetadataTy() const { return ID == MetadataTyID; }
+  bool isMetadataTy() const { return getTypeID() == MetadataTyID; }
 
   /// isIntegerTy - True if this is an instance of IntegerType.
   ///
-  bool isIntegerTy() const { return ID == IntegerTyID; } 
+  bool isIntegerTy() const { return getTypeID() == IntegerTyID; } 
 
   /// isIntegerTy - Return true if this is an IntegerType of the given width.
   bool isIntegerTy(unsigned Bitwidth) const;
@@ -170,23 +189,23 @@ public:
   
   /// isFunctionTy - True if this is an instance of FunctionType.
   ///
-  bool isFunctionTy() const { return ID == FunctionTyID; }
+  bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
 
   /// isStructTy - True if this is an instance of StructType.
   ///
-  bool isStructTy() const { return ID == StructTyID; }
+  bool isStructTy() const { return getTypeID() == StructTyID; }
 
   /// isArrayTy - True if this is an instance of ArrayType.
   ///
-  bool isArrayTy() const { return ID == ArrayTyID; }
+  bool isArrayTy() const { return getTypeID() == ArrayTyID; }
 
   /// isPointerTy - True if this is an instance of PointerType.
   ///
-  bool isPointerTy() const { return ID == PointerTyID; }
+  bool isPointerTy() const { return getTypeID() == PointerTyID; }
 
   /// isVectorTy - True if this is an instance of VectorType.
   ///
-  bool isVectorTy() const { return ID == VectorTyID; }
+  bool isVectorTy() const { return getTypeID() == VectorTyID; }
 
   /// canLosslesslyBitCastTo - Return true if this type could be converted 
   /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts 
@@ -202,14 +221,14 @@ public:
   /// Here are some useful little methods to query what type derived types are
   /// Note that all other types can just compare to see if this == Type::xxxTy;
   ///
-  bool isPrimitiveType() const { return ID <= LastPrimitiveTyID; }
-  bool isDerivedType()   const { return ID >= FirstDerivedTyID; }
+  bool isPrimitiveType() const { return getTypeID() <= LastPrimitiveTyID; }
+  bool isDerivedType()   const { return getTypeID() >= FirstDerivedTyID; }
 
   /// isFirstClassType - Return true if the type is "first class", meaning it
   /// is a valid type for a Value.
   ///
   bool isFirstClassType() const {
-    return ID != FunctionTyID && ID != VoidTyID;
+    return getTypeID() != FunctionTyID && getTypeID() != VoidTyID;
   }
 
   /// isSingleValueType - Return true if the type is a valid type for a
@@ -217,8 +236,9 @@ public:
   /// and array types.
   ///
   bool isSingleValueType() const {
-    return (ID != VoidTyID && isPrimitiveType()) ||
-            ID == IntegerTyID || ID == PointerTyID || ID == VectorTyID;
+    return (getTypeID() != VoidTyID && isPrimitiveType()) ||
+            getTypeID() == IntegerTyID || getTypeID() == PointerTyID ||
+            getTypeID() == VectorTyID;
   }
 
   /// isAggregateType - Return true if the type is an aggregate type. This
@@ -227,7 +247,7 @@ public:
   /// does not include vector types.
   ///
   bool isAggregateType() const {
-    return ID == StructTyID || ID == ArrayTyID;
+    return getTypeID() == StructTyID || getTypeID() == ArrayTyID;
   }
 
   /// isSized - Return true if it makes sense to take the size of this type.  To
@@ -236,12 +256,14 @@ public:
   ///
   bool isSized() const {
     // If it's a primitive, it is always sized.
-    if (ID == IntegerTyID || isFloatingPointTy() || ID == PointerTyID ||
-        ID == X86_MMXTyID)
+    if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
+        getTypeID() == PointerTyID ||
+        getTypeID() == X86_MMXTyID)
       return true;
     // If it is not something that can have a size (e.g. a function or label),
     // it doesn't have a size.
-    if (ID != StructTyID && ID != ArrayTyID && ID != VectorTyID)
+    if (getTypeID() != StructTyID && getTypeID() != ArrayTyID &&
+        getTypeID() != VectorTyID)
       return false;
     // Otherwise we have to try harder to decide.
     return isSizedDerivedType();
@@ -294,6 +316,34 @@ public:
   unsigned getNumContainedTypes() const { return NumContainedTys; }
 
   //===--------------------------------------------------------------------===//
+  // Helper methods corresponding to subclass methods.  This forces a cast to
+  // the specified subclass and calls its accessor.  "getVectorNumElements" (for
+  // example) is shorthand for cast<VectorType>(Ty)->getNumElements().  This is
+  // only intended to cover the core methods that are frequently used, helper
+  // methods should not be added here.
+  
+  unsigned getIntegerBitWidth() const;
+
+  Type *getFunctionParamType(unsigned i) const;
+  unsigned getFunctionNumParams() const;
+  bool isFunctionVarArg() const;
+  
+  StringRef getStructName() const;
+  unsigned getStructNumElements() const;
+  Type *getStructElementType(unsigned N) const;
+  
+  Type *getSequentialElementType() const;
+  
+  uint64_t getArrayNumElements() const;
+  Type *getArrayElementType() const { return getSequentialElementType(); }
+
+  unsigned getVectorNumElements() const;
+  Type *getVectorElementType() const { return getSequentialElementType(); }
+
+  unsigned getPointerAddressSpace() const;
+  Type *getPointerElementType() const { return getSequentialElementType(); }
+  
+  //===--------------------------------------------------------------------===//
   // Static members exported by the Type class itself.  Useful for getting
   // instances of Type.
   //
@@ -306,6 +356,7 @@ public:
   //
   static Type *getVoidTy(LLVMContext &C);
   static Type *getLabelTy(LLVMContext &C);
+  static Type *getHalfTy(LLVMContext &C);
   static Type *getFloatTy(LLVMContext &C);
   static Type *getDoubleTy(LLVMContext &C);
   static Type *getMetadataTy(LLVMContext &C);
@@ -324,6 +375,7 @@ public:
   // Convenience methods for getting pointer types with one of the above builtin
   // types as pointee.
   //
+  static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0);
   static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
   static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
   static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
diff --git a/include/llvm/User.h b/include/llvm/User.h
index 62bc9f034618..c52f32f425c4 100644
--- a/include/llvm/User.h
+++ b/include/llvm/User.h
@@ -19,6 +19,7 @@
 #ifndef LLVM_USER_H
 #define LLVM_USER_H
 
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Value.h"
 
 namespace llvm {
@@ -34,6 +35,7 @@ class User : public Value {
   void *operator new(size_t);     // Do not implement
   template <unsigned>
   friend struct HungoffOperandTraits;
+  virtual void anchor();
 protected:
   /// OperandList - This is a pointer to the array of Uses for this User.
   /// For nodes of fixed arity (e.g. a binary operator) this array will live
@@ -64,11 +66,11 @@ public:
   void operator delete(void *Usr);
   /// placement delete - required by std, but never called.
   void operator delete(void*, unsigned) {
-    assert(0 && "Constructor throws?");
+    llvm_unreachable("Constructor throws?");
   }
   /// placement delete - required by std, but never called.
   void operator delete(void*, unsigned, bool) {
-    assert(0 && "Constructor throws?");
+    llvm_unreachable("Constructor throws?");
   }
 protected:
   template <int Idx, typename U> static Use &OpFrom(const U *that) {
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index a71e2fdefd72..a82ac45c49ed 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -15,9 +15,7 @@
 #define LLVM_VALUE_H
 
 #include "llvm/Use.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
-#include <string>
 
 namespace llvm {
 
@@ -32,8 +30,6 @@ class GlobalAlias;
 class InlineAsm;
 class ValueSymbolTable;
 template<typename ValueTy> class StringMapEntry;
-template <typename ValueTy = Value>
-class AssertingVH;
 typedef StringMapEntry<Value*> ValueName;
 class raw_ostream;
 class AssemblyAnnotationWriter;
@@ -42,6 +38,7 @@ class LLVMContext;
 class Twine;
 class MDNode;
 class Type;
+class StringRef;
 
 //===----------------------------------------------------------------------===//
 //                                 Value Class
@@ -110,26 +107,16 @@ public:
   /// All values hold a context through their type.
   LLVMContext &getContext() const;
 
-  // All values can potentially be named...
-  bool hasName() const { return Name != 0; }
+  // All values can potentially be named.
+  bool hasName() const { return Name != 0 && SubclassID != MDStringVal; }
   ValueName *getValueName() const { return Name; }
+  void setValueName(ValueName *VN) { Name = VN; }
   
   /// getName() - Return a constant reference to the value's name. This is cheap
   /// and guaranteed to return the same reference as long as the value is not
   /// modified.
-  ///
-  /// This is currently guaranteed to return a StringRef for which data() points
-  /// to a valid null terminated string. The use of StringRef.data() is 
-  /// deprecated here, however, and clients should not rely on it. If such 
-  /// behavior is needed, clients should use expensive getNameStr(), or switch 
-  /// to an interface that does not depend on null termination.
   StringRef getName() const;
 
-  /// getNameStr() - Return the name of the specified value, *constructing a
-  /// string* to hold it.  This is guaranteed to construct a string and is very
-  /// expensive, clients should use getName() unless necessary.
-  std::string getNameStr() const;
-
   /// setName() - Change the name of the value, choosing a new unique name if
   /// the provided name is taken.
   ///
@@ -205,6 +192,8 @@ public:
     BlockAddressVal,          // This is an instance of BlockAddress
     ConstantExprVal,          // This is an instance of ConstantExpr
     ConstantAggregateZeroVal, // This is an instance of ConstantAggregateZero
+    ConstantDataArrayVal,     // This is an instance of ConstantDataArray
+    ConstantDataVectorVal,    // This is an instance of ConstantDataVector
     ConstantIntVal,           // This is an instance of ConstantInt
     ConstantFPVal,            // This is an instance of ConstantFP
     ConstantArrayVal,         // This is an instance of ConstantArray
@@ -273,14 +262,32 @@ public:
     return true; // Values are always values.
   }
 
-  /// stripPointerCasts - This method strips off any unneeded pointer
-  /// casts from the specified value, returning the original uncasted value.
-  /// Note that the returned value has pointer type if the specified value does.
+  /// stripPointerCasts - This method strips off any unneeded pointer casts and
+  /// all-zero GEPs from the specified value, returning the original uncasted
+  /// value. If this is called on a non-pointer value, it returns 'this'.
   Value *stripPointerCasts();
   const Value *stripPointerCasts() const {
     return const_cast<Value*>(this)->stripPointerCasts();
   }
 
+  /// stripInBoundsConstantOffsets - This method strips off unneeded pointer casts and
+  /// all-constant GEPs from the specified value, returning the original
+  /// pointer value. If this is called on a non-pointer value, it returns
+  /// 'this'.
+  Value *stripInBoundsConstantOffsets();
+  const Value *stripInBoundsConstantOffsets() const {
+    return const_cast<Value*>(this)->stripInBoundsConstantOffsets();
+  }
+
+  /// stripInBoundsOffsets - This method strips off unneeded pointer casts and
+  /// any in-bounds Offsets from the specified value, returning the original
+  /// pointer value. If this is called on a non-pointer value, it returns
+  /// 'this'.
+  Value *stripInBoundsOffsets();
+  const Value *stripInBoundsOffsets() const {
+    return const_cast<Value*>(this)->stripInBoundsOffsets();
+  }
+
   /// isDereferenceablePointer - Test if this value is always a pointer to
   /// allocated and suitably aligned memory for a simple load or store.
   bool isDereferenceablePointer() const;
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index bd132c05c327..95c834b451c9 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -440,3 +440,19 @@ bool llvm::isIdentifiedObject(const Value *V) {
     return A->hasNoAliasAttr() || A->hasByValAttr();
   return false;
 }
+
+/// isKnownNonNull - Return true if we know that the specified value is never
+/// null.
+bool llvm::isKnownNonNull(const Value *V) {
+  // Alloca never returns null, malloc might.
+  if (isa<AllocaInst>(V)) return true;
+
+  // A byval argument is never null.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasByValAttr();
+
+  // Global values are not null unless extern weak.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    return !GV->hasExternalWeakLinkage();
+  return false;
+}
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index d947220e078d..9f219f563739 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -127,9 +127,8 @@ AliasAnalysis::AliasResult
 AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
   AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
 
-  const char *AliasString;
+  const char *AliasString = 0;
   switch (R) {
-  default: llvm_unreachable("Unknown alias type!");
   case NoAlias:   No++;   AliasString = "No alias"; break;
   case MayAlias:  May++;  AliasString = "May alias"; break;
   case PartialAlias: Partial++; AliasString = "Partial alias"; break;
@@ -154,9 +153,8 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
                                     const Location &Loc) {
   ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
 
-  const char *MRString;
+  const char *MRString = 0;
   switch (R) {
-  default:       llvm_unreachable("Unknown mod/ref type!");
   case NoModRef: NoMR++;     MRString = "NoModRef"; break;
   case Ref:      JustRef++;  MRString = "JustRef"; break;
   case Mod:      JustMod++;  MRString = "JustMod"; break;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 37271b94a201..ac72983a8d7b 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -193,8 +193,6 @@ bool AAEval::runOnFunction(Function &F) {
       case AliasAnalysis::MustAlias:
         PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
         ++MustAlias; break;
-      default:
-        errs() << "Unknown alias query result!\n";
       }
     }
   }
@@ -223,8 +221,6 @@ bool AAEval::runOnFunction(Function &F) {
       case AliasAnalysis::ModRef:
         PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
         ++ModRef; break;
-      default:
-        errs() << "Unknown alias query result!\n";
       }
     }
   }
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 3fcd3b55de57..f80e2fba8010 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -189,7 +189,9 @@ bool AliasSet::aliasesUnknownInst(Instruction *Inst, AliasAnalysis &AA) const {
   }
 
   for (iterator I = begin(), E = end(); I != E; ++I)
-    if (AA.getModRefInfo(Inst, I.getPointer(), I.getSize()) !=
+    if (AA.getModRefInfo(Inst, AliasAnalysis::Location(I.getPointer(),
+                                                       I.getSize(),
+                                                       I.getTBAAInfo())) !=
            AliasAnalysis::NoModRef)
       return true;
 
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index af400ba7e70e..20ecfd26a986 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -42,22 +42,6 @@ using namespace llvm;
 // Useful predicates
 //===----------------------------------------------------------------------===//
 
-/// isKnownNonNull - Return true if we know that the specified value is never
-/// null.
-static bool isKnownNonNull(const Value *V) {
-  // Alloca never returns null, malloc might.
-  if (isa<AllocaInst>(V)) return true;
-  
-  // A byval argument is never null.
-  if (const Argument *A = dyn_cast<Argument>(V))
-    return A->hasByValAttr();
-
-  // Global values are not null unless extern weak.
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
-    return !GV->hasExternalWeakLinkage();
-  return false;
-}
-
 /// isNonEscapingLocalObject - Return true if the pointer is to a function-local
 /// object that never escapes from the function.
 static bool isNonEscapingLocalObject(const Value *V) {
@@ -100,42 +84,59 @@ static bool isEscapeSource(const Value *V) {
 
 /// getObjectSize - Return the size of the object specified by V, or
 /// UnknownSize if unknown.
-static uint64_t getObjectSize(const Value *V, const TargetData &TD) {
+static uint64_t getObjectSize(const Value *V, const TargetData &TD,
+                              bool RoundToAlign = false) {
   Type *AccessTy;
+  unsigned Align;
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
     if (!GV->hasDefinitiveInitializer())
       return AliasAnalysis::UnknownSize;
     AccessTy = GV->getType()->getElementType();
+    Align = GV->getAlignment();
   } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
     if (!AI->isArrayAllocation())
       AccessTy = AI->getType()->getElementType();
     else
       return AliasAnalysis::UnknownSize;
+    Align = AI->getAlignment();
   } else if (const CallInst* CI = extractMallocCall(V)) {
-    if (!isArrayMalloc(V, &TD))
+    if (!RoundToAlign && !isArrayMalloc(V, &TD))
       // The size is the argument to the malloc call.
       if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
         return C->getZExtValue();
     return AliasAnalysis::UnknownSize;
   } else if (const Argument *A = dyn_cast<Argument>(V)) {
-    if (A->hasByValAttr())
+    if (A->hasByValAttr()) {
       AccessTy = cast<PointerType>(A->getType())->getElementType();
-    else
+      Align = A->getParamAlignment();
+    } else {
       return AliasAnalysis::UnknownSize;
+    }
   } else {
     return AliasAnalysis::UnknownSize;
   }
-  
-  if (AccessTy->isSized())
-    return TD.getTypeAllocSize(AccessTy);
-  return AliasAnalysis::UnknownSize;
+
+  if (!AccessTy->isSized())
+    return AliasAnalysis::UnknownSize;
+
+  uint64_t Size = TD.getTypeAllocSize(AccessTy);
+  // If there is an explicitly specified alignment, and we need to
+  // take alignment into account, round up the size. (If the alignment
+  // is implicit, getTypeAllocSize is sufficient.)
+  if (RoundToAlign && Align)
+    Size = RoundUpToAlignment(Size, Align);
+
+  return Size;
 }
 
 /// isObjectSmallerThan - Return true if we can prove that the object specified
 /// by V is smaller than Size.
 static bool isObjectSmallerThan(const Value *V, uint64_t Size,
                                 const TargetData &TD) {
-  uint64_t ObjectSize = getObjectSize(V, TD);
+  // This function needs to use the aligned object size because we allow
+  // reads a bit past the end given sufficient alignment.
+  uint64_t ObjectSize = getObjectSize(V, TD, /*RoundToAlign*/true);
+  
   return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
 }
 
@@ -706,8 +707,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
       // pointer were passed to arguments that were neither of these, then it
       // couldn't be no-capture.
       if (!(*CI)->getType()->isPointerTy() ||
-          (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) &&
-           !CS.paramHasAttr(ArgNo+1, Attribute::ByVal)))
+          (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
         continue;
       
       // If this is a no-capture pointer argument, see if we can tell that it
@@ -978,10 +978,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
   //
   // TODO: Returning PartialAlias instead of MayAlias is a mild hack; the
   // practical effect of this is protecting TBAA in the case of dynamic
-  // indices into arrays of unions. An alternative way to solve this would
-  // be to have clang emit extra metadata for unions and/or union accesses.
-  // A union-specific solution wouldn't handle the problem for malloc'd
-  // memory however.
+  // indices into arrays of unions or malloc'd memory.
   return PartialAlias;
 }
 
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index d16665fa55cf..8a660f737c9b 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -58,6 +58,6 @@ void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
 /// that we should not rely on the value itself, but only on the comparison to
 /// the other block frequencies. We do this to avoid using of floating points.
 ///
-BlockFrequency BlockFrequencyInfo::getBlockFreq(BasicBlock *BB) const {
+BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
   return BFI->getBlockFreq(BB);
 }
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index bde3b76708fa..2730ce6c63bf 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -12,11 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Constants.h"
+#include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -29,121 +32,118 @@ INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
 
 char BranchProbabilityInfo::ID = 0;
 
-namespace {
-// Please note that BranchProbabilityAnalysis is not a FunctionPass.
-// It is created by BranchProbabilityInfo (which is a FunctionPass), which
-// provides a clear interface. Thanks to that, all heuristics and other
-// private methods are hidden in the .cpp file.
-class BranchProbabilityAnalysis {
-
-  typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
-
-  DenseMap<Edge, uint32_t> *Weights;
-
-  BranchProbabilityInfo *BP;
-
-  LoopInfo *LI;
-
-
-  // Weights are for internal use only. They are used by heuristics to help to
-  // estimate edges' probability. Example:
-  //
-  // Using "Loop Branch Heuristics" we predict weights of edges for the
-  // block BB2.
-  //         ...
-  //          |
-  //          V
-  //         BB1<-+
-  //          |   |
-  //          |   | (Weight = 124)
-  //          V   |
-  //         BB2--+
-  //          |
-  //          | (Weight = 4)
-  //          V
-  //         BB3
-  //
-  // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
-  // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
-
-  static const uint32_t LBH_TAKEN_WEIGHT = 124;
-  static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
-
-  static const uint32_t RH_TAKEN_WEIGHT = 24;
-  static const uint32_t RH_NONTAKEN_WEIGHT = 8;
-
-  static const uint32_t PH_TAKEN_WEIGHT = 20;
-  static const uint32_t PH_NONTAKEN_WEIGHT = 12;
-
-  static const uint32_t ZH_TAKEN_WEIGHT = 20;
-  static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
-
-  // Standard weight value. Used when none of the heuristics set weight for
-  // the edge.
-  static const uint32_t NORMAL_WEIGHT = 16;
-
-  // Minimum weight of an edge. Please note, that weight is NEVER 0.
-  static const uint32_t MIN_WEIGHT = 1;
-
-  // Return TRUE if BB leads directly to a Return Instruction.
-  static bool isReturningBlock(BasicBlock *BB) {
-    SmallPtrSet<BasicBlock *, 8> Visited;
-
-    while (true) {
-      TerminatorInst *TI = BB->getTerminator();
-      if (isa<ReturnInst>(TI))
-        return true;
-
-      if (TI->getNumSuccessors() > 1)
-        break;
-
-      // It is unreachable block which we can consider as a return instruction.
-      if (TI->getNumSuccessors() == 0)
-        return true;
-
-      Visited.insert(BB);
-      BB = TI->getSuccessor(0);
+// Weights are for internal use only. They are used by heuristics to help to
+// estimate edges' probability. Example:
+//
+// Using "Loop Branch Heuristics" we predict weights of edges for the
+// block BB2.
+//         ...
+//          |
+//          V
+//         BB1<-+
+//          |   |
+//          |   | (Weight = 124)
+//          V   |
+//         BB2--+
+//          |
+//          | (Weight = 4)
+//          V
+//         BB3
+//
+// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875
+// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125
+static const uint32_t LBH_TAKEN_WEIGHT = 124;
+static const uint32_t LBH_NONTAKEN_WEIGHT = 4;
+
+/// \brief Unreachable-terminating branch taken weight.
+///
+/// This is the weight for a branch being taken to a block that terminates
+/// (eventually) in unreachable. These are predicted as unlikely as possible.
+static const uint32_t UR_TAKEN_WEIGHT = 1;
+
+/// \brief Unreachable-terminating branch not-taken weight.
+///
+/// This is the weight for a branch not being taken toward a block that
+/// terminates (eventually) in unreachable. Such a branch is essentially never
+/// taken. Set the weight to an absurdly high value so that nested loops don't
+/// easily subsume it.
+static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1;
+
+static const uint32_t PH_TAKEN_WEIGHT = 20;
+static const uint32_t PH_NONTAKEN_WEIGHT = 12;
+
+static const uint32_t ZH_TAKEN_WEIGHT = 20;
+static const uint32_t ZH_NONTAKEN_WEIGHT = 12;
+
+static const uint32_t FPH_TAKEN_WEIGHT = 20;
+static const uint32_t FPH_NONTAKEN_WEIGHT = 12;
+
+// Standard weight value. Used when none of the heuristics set weight for
+// the edge.
+static const uint32_t NORMAL_WEIGHT = 16;
+
+// Minimum weight of an edge. Please note, that weight is NEVER 0.
+static const uint32_t MIN_WEIGHT = 1;
+
+static uint32_t getMaxWeightFor(BasicBlock *BB) {
+  return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
+}
 
-      // Stop if cycle is detected.
-      if (Visited.count(BB))
-        return false;
-    }
 
+/// \brief Calculate edge weights for successors lead to unreachable.
+///
+/// Predict that a successor which leads necessarily to an
+/// unreachable-terminated block as extremely unlikely.
+bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
+  TerminatorInst *TI = BB->getTerminator();
+  if (TI->getNumSuccessors() == 0) {
+    if (isa<UnreachableInst>(TI))
+      PostDominatedByUnreachable.insert(BB);
     return false;
   }
 
-  uint32_t getMaxWeightFor(BasicBlock *BB) const {
-    return UINT32_MAX / BB->getTerminator()->getNumSuccessors();
-  }
+  SmallPtrSet<BasicBlock *, 4> UnreachableEdges;
+  SmallPtrSet<BasicBlock *, 4> ReachableEdges;
 
-public:
-  BranchProbabilityAnalysis(DenseMap<Edge, uint32_t> *W,
-                            BranchProbabilityInfo *BP, LoopInfo *LI)
-    : Weights(W), BP(BP), LI(LI) {
+  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    if (PostDominatedByUnreachable.count(*I))
+      UnreachableEdges.insert(*I);
+    else
+      ReachableEdges.insert(*I);
   }
 
-  // Metadata Weights
-  bool calcMetadataWeights(BasicBlock *BB);
+  // If all successors are in the set of blocks post-dominated by unreachable,
+  // this block is too.
+  if (UnreachableEdges.size() == TI->getNumSuccessors())
+    PostDominatedByUnreachable.insert(BB);
 
-  // Return Heuristics
-  bool calcReturnHeuristics(BasicBlock *BB);
-
-  // Pointer Heuristics
-  bool calcPointerHeuristics(BasicBlock *BB);
-
-  // Loop Branch Heuristics
-  bool calcLoopBranchHeuristics(BasicBlock *BB);
+  // Skip probabilities if this block has a single successor or if all were
+  // reachable.
+  if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
+    return false;
 
-  // Zero Heurestics
-  bool calcZeroHeuristics(BasicBlock *BB);
+  uint32_t UnreachableWeight =
+    std::max(UR_TAKEN_WEIGHT / UnreachableEdges.size(), MIN_WEIGHT);
+  for (SmallPtrSet<BasicBlock *, 4>::iterator I = UnreachableEdges.begin(),
+                                              E = UnreachableEdges.end();
+       I != E; ++I)
+    setEdgeWeight(BB, *I, UnreachableWeight);
+
+  if (ReachableEdges.empty())
+    return true;
+  uint32_t ReachableWeight =
+    std::max(UR_NONTAKEN_WEIGHT / ReachableEdges.size(), NORMAL_WEIGHT);
+  for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReachableEdges.begin(),
+                                              E = ReachableEdges.end();
+       I != E; ++I)
+    setEdgeWeight(BB, *I, ReachableWeight);
 
-  bool runOnFunction(Function &F);
-};
-} // end anonymous namespace
+  return true;
+}
 
 // Propagate existing explicit probabilities from either profile data or
 // 'expect' intrinsic processing.
-bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) {
+bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
   TerminatorInst *TI = BB->getTerminator();
   if (TI->getNumSuccessors() == 1)
     return false;
@@ -174,54 +174,14 @@ bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) {
   }
   assert(Weights.size() == TI->getNumSuccessors() && "Checked above");
   for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-    BP->setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]);
+    setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]);
 
   return true;
 }
 
-// Calculate Edge Weights using "Return Heuristics". Predict a successor which
-// leads directly to Return Instruction will not be taken.
-bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){
-  if (BB->getTerminator()->getNumSuccessors() == 1)
-    return false;
-
-  SmallPtrSet<BasicBlock *, 4> ReturningEdges;
-  SmallPtrSet<BasicBlock *, 4> StayEdges;
-
-  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
-    BasicBlock *Succ = *I;
-    if (isReturningBlock(Succ))
-      ReturningEdges.insert(Succ);
-    else
-      StayEdges.insert(Succ);
-  }
-
-  if (uint32_t numStayEdges = StayEdges.size()) {
-    uint32_t stayWeight = RH_TAKEN_WEIGHT / numStayEdges;
-    if (stayWeight < NORMAL_WEIGHT)
-      stayWeight = NORMAL_WEIGHT;
-
-    for (SmallPtrSet<BasicBlock *, 4>::iterator I = StayEdges.begin(),
-         E = StayEdges.end(); I != E; ++I)
-      BP->setEdgeWeight(BB, *I, stayWeight);
-  }
-
-  if (uint32_t numRetEdges = ReturningEdges.size()) {
-    uint32_t retWeight = RH_NONTAKEN_WEIGHT / numRetEdges;
-    if (retWeight < MIN_WEIGHT)
-      retWeight = MIN_WEIGHT;
-    for (SmallPtrSet<BasicBlock *, 4>::iterator I = ReturningEdges.begin(),
-         E = ReturningEdges.end(); I != E; ++I) {
-      BP->setEdgeWeight(BB, *I, retWeight);
-    }
-  }
-
-  return ReturningEdges.size() > 0;
-}
-
 // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion
 // between two pointer or pointer and NULL will fail.
-bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
+bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
   BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
   if (!BI || !BI->isConditional())
     return false;
@@ -249,16 +209,14 @@ bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
   if (!isProb)
     std::swap(Taken, NonTaken);
 
-  BP->setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT);
-  BP->setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT);
+  setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT);
+  setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT);
   return true;
 }
 
 // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
 // as taken, exiting edges as not-taken.
-bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
-  uint32_t numSuccs = BB->getTerminator()->getNumSuccessors();
-
+bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
   Loop *L = LI->getLoopFor(BB);
   if (!L)
     return false;
@@ -267,17 +225,13 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
   SmallPtrSet<BasicBlock *, 8> ExitingEdges;
   SmallPtrSet<BasicBlock *, 8> InEdges; // Edges from header to the loop.
 
-  bool isHeader = BB == L->getHeader();
-
   for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
-    BasicBlock *Succ = *I;
-    Loop *SuccL = LI->getLoopFor(Succ);
-    if (SuccL != L)
-      ExitingEdges.insert(Succ);
-    else if (Succ == L->getHeader())
-      BackEdges.insert(Succ);
-    else if (isHeader)
-      InEdges.insert(Succ);
+    if (!L->contains(*I))
+      ExitingEdges.insert(*I);
+    else if (L->getHeader() == *I)
+      BackEdges.insert(*I);
+    else
+      InEdges.insert(*I);
   }
 
   if (uint32_t numBackEdges = BackEdges.size()) {
@@ -288,7 +242,7 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
     for (SmallPtrSet<BasicBlock *, 8>::iterator EI = BackEdges.begin(),
          EE = BackEdges.end(); EI != EE; ++EI) {
       BasicBlock *Back = *EI;
-      BP->setEdgeWeight(BB, Back, backWeight);
+      setEdgeWeight(BB, Back, backWeight);
     }
   }
 
@@ -300,27 +254,26 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) {
     for (SmallPtrSet<BasicBlock *, 8>::iterator EI = InEdges.begin(),
          EE = InEdges.end(); EI != EE; ++EI) {
       BasicBlock *Back = *EI;
-      BP->setEdgeWeight(BB, Back, inWeight);
+      setEdgeWeight(BB, Back, inWeight);
     }
   }
 
-  uint32_t numExitingEdges = ExitingEdges.size();
-  if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) {
-    uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges;
+  if (uint32_t numExitingEdges = ExitingEdges.size()) {
+    uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges;
     if (exitWeight < MIN_WEIGHT)
       exitWeight = MIN_WEIGHT;
 
     for (SmallPtrSet<BasicBlock *, 8>::iterator EI = ExitingEdges.begin(),
          EE = ExitingEdges.end(); EI != EE; ++EI) {
       BasicBlock *Exiting = *EI;
-      BP->setEdgeWeight(BB, Exiting, exitWeight);
+      setEdgeWeight(BB, Exiting, exitWeight);
     }
   }
 
   return true;
 }
 
-bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) {
+bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
   BranchInst * BI = dyn_cast<BranchInst>(BB->getTerminator());
   if (!BI || !BI->isConditional())
     return false;
@@ -375,45 +328,94 @@ bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) {
   if (!isProb)
     std::swap(Taken, NonTaken);
 
-  BP->setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT);
-  BP->setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT);
+  setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT);
+  setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT);
 
   return true;
 }
 
+bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
+  BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isConditional())
+    return false;
 
-bool BranchProbabilityAnalysis::runOnFunction(Function &F) {
-
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
-    BasicBlock *BB = I++;
-
-    if (calcMetadataWeights(BB))
-      continue;
+  Value *Cond = BI->getCondition();
+  FCmpInst *FCmp = dyn_cast<FCmpInst>(Cond);
+  if (!FCmp)
+    return false;
 
-    if (calcLoopBranchHeuristics(BB))
-      continue;
+  bool isProb;
+  if (FCmp->isEquality()) {
+    // f1 == f2 -> Unlikely
+    // f1 != f2 -> Likely
+    isProb = !FCmp->isTrueWhenEqual();
+  } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) {
+    // !isnan -> Likely
+    isProb = true;
+  } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) {
+    // isnan -> Unlikely
+    isProb = false;
+  } else {
+    return false;
+  }
 
-    if (calcReturnHeuristics(BB))
-      continue;
+  BasicBlock *Taken = BI->getSuccessor(0);
+  BasicBlock *NonTaken = BI->getSuccessor(1);
 
-    if (calcPointerHeuristics(BB))
-      continue;
+  if (!isProb)
+    std::swap(Taken, NonTaken);
 
-    calcZeroHeuristics(BB);
-  }
+  setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT);
+  setEdgeWeight(BB, NonTaken, FPH_NONTAKEN_WEIGHT);
 
-  return false;
+  return true;
 }
 
 void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<LoopInfo>();
-    AU.setPreservesAll();
+  AU.addRequired<LoopInfo>();
+  AU.setPreservesAll();
 }
 
 bool BranchProbabilityInfo::runOnFunction(Function &F) {
-  LoopInfo &LI = getAnalysis<LoopInfo>();
-  BranchProbabilityAnalysis BPA(&Weights, this, &LI);
-  return BPA.runOnFunction(F);
+  LastF = &F; // Store the last function we ran on for printing.
+  LI = &getAnalysis<LoopInfo>();
+  assert(PostDominatedByUnreachable.empty());
+
+  // Walk the basic blocks in post-order so that we can build up state about
+  // the successors of a block iteratively.
+  for (po_iterator<BasicBlock *> I = po_begin(&F.getEntryBlock()),
+                                 E = po_end(&F.getEntryBlock());
+       I != E; ++I) {
+    DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n");
+    if (calcUnreachableHeuristics(*I))
+      continue;
+    if (calcMetadataWeights(*I))
+      continue;
+    if (calcLoopBranchHeuristics(*I))
+      continue;
+    if (calcPointerHeuristics(*I))
+      continue;
+    if (calcZeroHeuristics(*I))
+      continue;
+    calcFloatingPointHeuristics(*I);
+  }
+
+  PostDominatedByUnreachable.clear();
+  return false;
+}
+
+void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const {
+  OS << "---- Branch Probabilities ----\n";
+  // We print the probabilities from the last function the analysis ran over,
+  // or the function it is currently running over.
+  assert(LastF && "Cannot print prior to running over a function");
+  for (Function::const_iterator BI = LastF->begin(), BE = LastF->end();
+       BI != BE; ++BI) {
+    for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI);
+         SI != SE; ++SI) {
+      printEdgeProbability(OS << "  ", BI, *SI);
+    }
+  }
 }
 
 uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
@@ -434,12 +436,8 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
 bool BranchProbabilityInfo::
 isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
   // Hot probability is at least 4/5 = 80%
-  uint32_t Weight = getEdgeWeight(Src, Dst);
-  uint32_t Sum = getSumForBlock(Src);
-
-  // FIXME: Implement BranchProbability::compare then change this code to
-  // compare this BranchProbability against a static "hot" BranchProbability.
-  return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+  // FIXME: Compare against a static "hot" BranchProbability.
+  return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
 }
 
 BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
@@ -461,8 +459,8 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
     }
   }
 
-  // FIXME: Use BranchProbability::compare.
-  if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4)
+  // Hot probability is at least 4/5 = 80%
+  if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5))
     return MaxSucc;
 
   return 0;
@@ -483,8 +481,8 @@ getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const {
 void BranchProbabilityInfo::
 setEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst, uint32_t Weight) {
   Weights[std::make_pair(Src, Dst)] = Weight;
-  DEBUG(dbgs() << "set edge " << Src->getNameStr() << " -> "
-               << Dst->getNameStr() << " weight to " << Weight
+  DEBUG(dbgs() << "set edge " << Src->getName() << " -> "
+               << Dst->getName() << " weight to " << Weight
                << (isEdgeHot(Src, Dst) ? " [is HOT now]\n" : "\n"));
 }
 
@@ -499,11 +497,12 @@ getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const {
 }
 
 raw_ostream &
-BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src,
-                                            BasicBlock *Dst) const {
+BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
+                                            const BasicBlock *Src,
+                                            const BasicBlock *Dst) const {
 
   const BranchProbability Prob = getEdgeProbability(Src, Dst);
-  OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr()
+  OS << "edge " << Src->getName() << " -> " << Dst->getName()
      << " probability is " << Prob
      << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
 
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 7bb063fbbbcf..76854000bd23 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -77,7 +77,7 @@ namespace {
     }
 
     virtual bool runOnFunction(Function &F) {
-      std::string Filename = "cfg." + F.getNameStr() + ".dot";
+      std::string Filename = "cfg." + F.getName().str() + ".dot";
       errs() << "Writing '" << Filename << "'...";
       
       std::string ErrorInfo;
@@ -111,7 +111,7 @@ namespace {
     }
     
     virtual bool runOnFunction(Function &F) {
-      std::string Filename = "cfg." + F.getNameStr() + ".dot";
+      std::string Filename = "cfg." + F.getName().str() + ".dot";
       errs() << "Writing '" << Filename << "'...";
 
       std::string ErrorInfo;
@@ -143,7 +143,7 @@ INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
 /// being a 'dot' and 'gv' program in your path.
 ///
 void Function::viewCFG() const {
-  ViewGraph(this, "cfg" + getNameStr());
+  ViewGraph(this, "cfg" + getName());
 }
 
 /// viewCFGOnly - This function is meant for use from the debugger.  It works
@@ -152,7 +152,7 @@ void Function::viewCFG() const {
 /// his can make the graph smaller.
 ///
 void Function::viewCFGOnly() const {
-  ViewGraph(this, "cfg" + getNameStr(), true);
+  ViewGraph(this, "cfg" + getName(), true);
 }
 
 FunctionPass *llvm::createCFGPrinterPass () {
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index e79459d7a409..2e3ec8bebc86 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_library(LLVMAnalysis
   BranchProbabilityInfo.cpp
   CFGPrinter.cpp
   CaptureTracking.cpp
+  CodeMetrics.cpp
   ConstantFolding.cpp
   DIBuilder.cpp
   DbgInfoPrinter.cpp
@@ -58,10 +59,4 @@ add_llvm_library(LLVMAnalysis
   ValueTracking.cpp
   )
 
-add_llvm_library_dependencies(LLVMAnalysis
-  LLVMCore
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(IPA)
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index b2c27d1dfc4b..dd33eeb1b376 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -16,25 +16,35 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/Value.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/Analysis/CaptureTracking.h"
 using namespace llvm;
 
-/// As its comment mentions, PointerMayBeCaptured can be expensive.
-/// However, it's not easy for BasicAA to cache the result, because
-/// it's an ImmutablePass. To work around this, bound queries at a
-/// fixed number of uses.
-///
-/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
-/// a cache. Then we can move the code from BasicAliasAnalysis into
-/// that path, and remove this threshold.
-static int const Threshold = 20;
+CaptureTracker::~CaptureTracker() {}
+
+namespace {
+  struct SimpleCaptureTracker : public CaptureTracker {
+    explicit SimpleCaptureTracker(bool ReturnCaptures)
+      : ReturnCaptures(ReturnCaptures), Captured(false) {}
+
+    void tooManyUses() { Captured = true; }
+
+    bool shouldExplore(Use *U) { return true; }
+
+    bool captured(Use *U) {
+      if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
+	return false;
+
+      Captured = true;
+      return true;
+    }
+
+    bool ReturnCaptures;
+
+    bool Captured;
+  };
+}
 
 /// PointerMayBeCaptured - Return true if this pointer value may be captured
 /// by the enclosing function (which is required to exist).  This routine can
@@ -45,6 +55,26 @@ static int const Threshold = 20;
 /// counts as capturing it or not.
 bool llvm::PointerMayBeCaptured(const Value *V,
                                 bool ReturnCaptures, bool StoreCaptures) {
+  assert(!isa<GlobalValue>(V) &&
+         "It doesn't make sense to ask whether a global is captured.");
+
+  // TODO: If StoreCaptures is not true, we could do Fancy analysis
+  // to determine whether this store is not actually an escape point.
+  // In that case, BasicAliasAnalysis should be updated as well to
+  // take advantage of this.
+  (void)StoreCaptures;
+
+  SimpleCaptureTracker SCT(ReturnCaptures);
+  PointerMayBeCaptured(V, &SCT);
+  return SCT.Captured;
+}
+
+/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
+/// a cache. Then we can move the code from BasicAliasAnalysis into
+/// that path, and remove this threshold.
+static int const Threshold = 20;
+
+void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
   assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
   SmallVector<Use*, Threshold> Worklist;
   SmallSet<Use*, Threshold> Visited;
@@ -55,9 +85,10 @@ bool llvm::PointerMayBeCaptured(const Value *V,
     // If there are lots of uses, conservatively say that the value
     // is captured to avoid taking too much compile time.
     if (Count++ >= Threshold)
-      return true;
+      return Tracker->tooManyUses();
 
     Use *U = &UI.getUse();
+    if (!Tracker->shouldExplore(U)) continue;
     Visited.insert(U);
     Worklist.push_back(U);
   }
@@ -86,11 +117,10 @@ bool llvm::PointerMayBeCaptured(const Value *V,
       // (think of self-referential objects).
       CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
       for (CallSite::arg_iterator A = B; A != E; ++A)
-        if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture))
+        if (A->get() == V && !CS.doesNotCapture(A - B))
           // The parameter is not marked 'nocapture' - captured.
-          return true;
-      // Only passed via 'nocapture' arguments, or is the called function - not
-      // captured.
+          if (Tracker->captured(U))
+            return;
       break;
     }
     case Instruction::Load:
@@ -99,18 +129,11 @@ bool llvm::PointerMayBeCaptured(const Value *V,
     case Instruction::VAArg:
       // "va-arg" from a pointer does not cause it to be captured.
       break;
-    case Instruction::Ret:
-      if (ReturnCaptures)
-        return true;
-      break;
     case Instruction::Store:
       if (V == I->getOperand(0))
         // Stored the pointer - conservatively assume it may be captured.
-        // TODO: If StoreCaptures is not true, we could do Fancy analysis
-        // to determine whether this store is not actually an escape point.
-        // In that case, BasicAliasAnalysis should be updated as well to
-        // take advantage of this.
-        return true;
+        if (Tracker->captured(U))
+          return;
       // Storing to the pointee does not cause the pointer to be captured.
       break;
     case Instruction::BitCast:
@@ -122,7 +145,8 @@ bool llvm::PointerMayBeCaptured(const Value *V,
            UI != UE; ++UI) {
         Use *U = &UI.getUse();
         if (Visited.insert(U))
-          Worklist.push_back(U);
+          if (Tracker->shouldExplore(U))
+            Worklist.push_back(U);
       }
       break;
     case Instruction::ICmp:
@@ -136,13 +160,16 @@ bool llvm::PointerMayBeCaptured(const Value *V,
             break;
       // Otherwise, be conservative. There are crazy ways to capture pointers
       // using comparisons.
-      return true;
+      if (Tracker->captured(U))
+        return;
+      break;
     default:
       // Something else - be conservative and say it is captured.
-      return true;
+      if (Tracker->captured(U))
+        return;
+      break;
     }
   }
 
-  // All uses examined - not captured.
-  return false;
+  // All uses examined.
 }
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
new file mode 100644
index 000000000000..316e7bc9349a
--- /dev/null
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -0,0 +1,184 @@
+//===- CodeMetrics.cpp - Code cost measurements ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements code cost measurement utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Target/TargetData.h"
+
+using namespace llvm;
+
+/// callIsSmall - If a call is likely to lower to a single target instruction,
+/// or is otherwise deemed small return true.
+/// TODO: Perhaps calls like memcpy, strcpy, etc?
+bool llvm::callIsSmall(const Function *F) {
+  if (!F) return false;
+
+  if (F->hasLocalLinkage()) return false;
+
+  if (!F->hasName()) return false;
+
+  StringRef Name = F->getName();
+
+  // These will all likely lower to a single selection DAG node.
+  if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
+      Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
+      Name == "sin" || Name == "sinf" || Name == "sinl" ||
+      Name == "cos" || Name == "cosf" || Name == "cosl" ||
+      Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
+    return true;
+
+  // These are all likely to be optimized into something smaller.
+  if (Name == "pow" || Name == "powf" || Name == "powl" ||
+      Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
+      Name == "floor" || Name == "floorf" || Name == "ceil" ||
+      Name == "round" || Name == "ffs" || Name == "ffsl" ||
+      Name == "abs" || Name == "labs" || Name == "llabs")
+    return true;
+
+  return false;
+}
+
+bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) {
+  if (isa<PHINode>(I))
+    return true;
+
+  // If a GEP has all constant indices, it will probably be folded with
+  // a load/store.
+  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+    return GEP->hasAllConstantIndices();
+
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::dbg_declare:
+    case Intrinsic::dbg_value:
+    case Intrinsic::invariant_start:
+    case Intrinsic::invariant_end:
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::objectsize:
+    case Intrinsic::ptr_annotation:
+    case Intrinsic::var_annotation:
+      // These intrinsics don't count as size.
+      return true;
+    }
+  }
+
+  if (const CastInst *CI = dyn_cast<CastInst>(I)) {
+    // Noop casts, including ptr <-> int,  don't count.
+    if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI))
+      return true;
+    // trunc to a native type is free (assuming the target has compare and
+    // shift-right of the same width).
+    if (TD && isa<TruncInst>(CI) &&
+        TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
+      return true;
+    // Result of a cmp instruction is often extended (to be used by other
+    // cmp instructions, logical or return instructions). These are usually
+    // nop on most sane targets.
+    if (isa<CmpInst>(CI->getOperand(0)))
+      return true;
+  }
+
+  return false;
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
+                                    const TargetData *TD) {
+  ++NumBlocks;
+  unsigned NumInstsBeforeThisBB = NumInsts;
+  for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+       II != E; ++II) {
+    if (isInstructionFree(II, TD))
+      continue;
+
+    // Special handling for calls.
+    if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+      ImmutableCallSite CS(cast<Instruction>(II));
+
+      if (const Function *F = CS.getCalledFunction()) {
+        // If a function is both internal and has a single use, then it is
+        // extremely likely to get inlined in the future (it was probably
+        // exposed by an interleaved devirtualization pass).
+        if (!CS.isNoInline() && F->hasInternalLinkage() && F->hasOneUse())
+          ++NumInlineCandidates;
+
+        // If this call is to function itself, then the function is recursive.
+        // Inlining it into other functions is a bad idea, because this is
+        // basically just a form of loop peeling, and our metrics aren't useful
+        // for that case.
+        if (F == BB->getParent())
+          isRecursive = true;
+      }
+
+      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
+        // Each argument to a call takes on average one instruction to set up.
+        NumInsts += CS.arg_size();
+
+        // We don't want inline asm to count as a call - that would prevent loop
+        // unrolling. The argument setup cost is still real, though.
+        if (!isa<InlineAsm>(CS.getCalledValue()))
+          ++NumCalls;
+      }
+    }
+
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (!AI->isStaticAlloca())
+        this->usesDynamicAlloca = true;
+    }
+
+    if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
+      ++NumVectorInsts;
+
+    ++NumInsts;
+  }
+
+  if (isa<ReturnInst>(BB->getTerminator()))
+    ++NumRets;
+
+  // We never want to inline functions that contain an indirectbr.  This is
+  // incorrect because all the blockaddress's (in static global initializers
+  // for example) would be referring to the original function, and this indirect
+  // jump would jump from the inlined copy of the function into the original
+  // function which is extremely undefined behavior.
+  // FIXME: This logic isn't really right; we can safely inline functions
+  // with indirectbr's as long as no other function or global references the
+  // blockaddress of a block within the current function.  And as a QOI issue,
+  // if someone is using a blockaddress without an indirectbr, and that
+  // reference somehow ends up in another function or global, we probably
+  // don't want to inline this function.
+  if (isa<IndirectBrInst>(BB->getTerminator()))
+    containsIndirectBr = true;
+
+  // Remember NumInsts for this BB.
+  NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+}
+
+void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
+  // If this function contains a call that "returns twice" (e.g., setjmp or
+  // _setjmp) and it isn't marked with "returns twice" itself, never inline it.
+  // This is a hack because we depend on the user marking their local variables
+  // as volatile if they are live across a setjmp call, and they probably
+  // won't do this in callers.
+  exposesReturnsTwice = F->callsFunctionThatReturnsTwice() &&
+    !F->hasFnAttr(Attribute::ReturnsTwice);
+
+  // Look at the size of the callee.
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    analyzeBasicBlock(&*BB, TD);
+}
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index df79849c3cf4..7a0a4e1e8246 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Operator.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -51,6 +52,42 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
   if (C->isAllOnesValue() && !DestTy->isX86_MMXTy())
     return Constant::getAllOnesValue(DestTy);
 
+  // Handle a vector->integer cast.
+  if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) {
+    ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+    if (CDV == 0)
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    unsigned NumSrcElts = CDV->getType()->getNumElements();
+    
+    Type *SrcEltTy = CDV->getType()->getElementType();
+    
+    // If the vector is a vector of floating point, convert it to vector of int
+    // to simplify things.
+    if (SrcEltTy->isFloatingPointTy()) {
+      unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+      Type *SrcIVTy =
+        VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
+      // Ask VMCore to do the conversion now that #elts line up.
+      C = ConstantExpr::getBitCast(C, SrcIVTy);
+      CDV = cast<ConstantDataVector>(C);
+    }
+    
+    // Now that we know that the input value is a vector of integers, just shift
+    // and insert them into our result.
+    unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy);
+    APInt Result(IT->getBitWidth(), 0);
+    for (unsigned i = 0; i != NumSrcElts; ++i) {
+      Result <<= BitShift;
+      if (TD.isLittleEndian())
+        Result |= CDV->getElementAsInteger(NumSrcElts-i-1);
+      else
+        Result |= CDV->getElementAsInteger(i);
+    }
+   
+    return ConstantInt::get(IT, Result);
+  }
+  
   // The code below only handles casts to vectors currently.
   VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
   if (DestVTy == 0)
@@ -64,17 +101,16 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
   }
   
   // If this is a bitcast from constant vector -> vector, fold it.
-  ConstantVector *CV = dyn_cast<ConstantVector>(C);
-  if (CV == 0)
+  if (!isa<ConstantDataVector>(C) && !isa<ConstantVector>(C))
     return ConstantExpr::getBitCast(C, DestTy);
   
   // If the element types match, VMCore can fold it.
   unsigned NumDstElt = DestVTy->getNumElements();
-  unsigned NumSrcElt = CV->getNumOperands();
+  unsigned NumSrcElt = C->getType()->getVectorNumElements();
   if (NumDstElt == NumSrcElt)
     return ConstantExpr::getBitCast(C, DestTy);
   
-  Type *SrcEltTy = CV->getType()->getElementType();
+  Type *SrcEltTy = C->getType()->getVectorElementType();
   Type *DstEltTy = DestVTy->getElementType();
   
   // Otherwise, we're changing the number of elements in a vector, which 
@@ -94,7 +130,6 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
       VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
     // Recursively handle this integer conversion, if possible.
     C = FoldBitCast(C, DestIVTy, TD);
-    if (!C) return ConstantExpr::getBitCast(C, DestTy);
     
     // Finally, VMCore can handle this now that #elts line up.
     return ConstantExpr::getBitCast(C, DestTy);
@@ -108,8 +143,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
       VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
     // Ask VMCore to do the conversion now that #elts line up.
     C = ConstantExpr::getBitCast(C, SrcIVTy);
-    CV = dyn_cast<ConstantVector>(C);
-    if (!CV)  // If VMCore wasn't able to fold it, bail out.
+    // If VMCore wasn't able to fold it, bail out.
+    if (!isa<ConstantVector>(C) &&  // FIXME: Remove ConstantVector.
+        !isa<ConstantDataVector>(C))
       return C;
   }
   
@@ -131,7 +167,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
       Constant *Elt = Zero;
       unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
       for (unsigned j = 0; j != Ratio; ++j) {
-        Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++));
+        Constant *Src =dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++));
         if (!Src)  // Reject constantexpr elements.
           return ConstantExpr::getBitCast(C, DestTy);
         
@@ -148,28 +184,29 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
       }
       Result.push_back(Elt);
     }
-  } else {
-    // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
-    unsigned Ratio = NumDstElt/NumSrcElt;
-    unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+    return ConstantVector::get(Result);
+  }
+  
+  // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  unsigned Ratio = NumDstElt/NumSrcElt;
+  unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+  
+  // Loop over each source value, expanding into multiple results.
+  for (unsigned i = 0; i != NumSrcElt; ++i) {
+    Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i));
+    if (!Src)  // Reject constantexpr elements.
+      return ConstantExpr::getBitCast(C, DestTy);
     
-    // Loop over each source value, expanding into multiple results.
-    for (unsigned i = 0; i != NumSrcElt; ++i) {
-      Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i));
-      if (!Src)  // Reject constantexpr elements.
-        return ConstantExpr::getBitCast(C, DestTy);
+    unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+    for (unsigned j = 0; j != Ratio; ++j) {
+      // Shift the piece of the value into the right place, depending on
+      // endianness.
+      Constant *Elt = ConstantExpr::getLShr(Src, 
+                                  ConstantInt::get(Src->getType(), ShiftAmt));
+      ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
       
-      unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
-      for (unsigned j = 0; j != Ratio; ++j) {
-        // Shift the piece of the value into the right place, depending on
-        // endianness.
-        Constant *Elt = ConstantExpr::getLShr(Src, 
-                                    ConstantInt::get(Src->getType(), ShiftAmt));
-        ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
-        
-        // Truncate and remember this piece.
-        Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
-      }
+      // Truncate and remember this piece.
+      Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
     }
   }
   
@@ -272,7 +309,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
     }
     return false;
   }
-
+  
   if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
     const StructLayout *SL = TD.getStructLayout(CS->getType());
     unsigned Index = SL->getElementContainingOffset(ByteOffset);
@@ -310,12 +347,20 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
     // not reached.
   }
 
-  if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
-    uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType());
+  if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
+      isa<ConstantDataSequential>(C)) {
+    Type *EltTy = cast<SequentialType>(C->getType())->getElementType();
+    uint64_t EltSize = TD.getTypeAllocSize(EltTy);
     uint64_t Index = ByteOffset / EltSize;
     uint64_t Offset = ByteOffset - Index * EltSize;
-    for (; Index != CA->getType()->getNumElements(); ++Index) {
-      if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr,
+    uint64_t NumElts;
+    if (ArrayType *AT = dyn_cast<ArrayType>(C->getType()))
+      NumElts = AT->getNumElements();
+    else
+      NumElts = cast<VectorType>(C->getType())->getNumElements();
+    
+    for (; Index != NumElts; ++Index) {
+      if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr,
                               BytesLeft, TD))
         return false;
       if (EltSize >= BytesLeft)
@@ -327,30 +372,12 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
     }
     return true;
   }
-  
-  if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
-    uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType());
-    uint64_t Index = ByteOffset / EltSize;
-    uint64_t Offset = ByteOffset - Index * EltSize;
-    for (; Index != CV->getType()->getNumElements(); ++Index) {
-      if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr,
-                              BytesLeft, TD))
-        return false;
-      if (EltSize >= BytesLeft)
-        return true;
       
-      Offset = 0;
-      BytesLeft -= EltSize;
-      CurPtr += EltSize;
-    }
-    return true;
-  }
-  
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
     if (CE->getOpcode() == Instruction::IntToPtr &&
         CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) 
-        return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, 
-                                  BytesLeft, TD);
+      return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, 
+                                BytesLeft, TD);
   }
 
   // Otherwise, unknown initializer type.
@@ -445,9 +472,9 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
   
   // Instead of loading constant c string, use corresponding integer value
   // directly if string length is small enough.
-  std::string Str;
-  if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) {
-    unsigned StrLen = Str.length();
+  StringRef Str;
+  if (TD && getConstantStringInfo(CE, Str) && !Str.empty()) {
+    unsigned StrLen = Str.size();
     Type *Ty = cast<PointerType>(CE->getType())->getElementType();
     unsigned NumBits = Ty->getPrimitiveSizeInBits();
     // Replace load with immediate integer if the result is an integer or fp
@@ -542,8 +569,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
 /// explicitly cast them so that they aren't implicitly casted by the
 /// getelementptr.
 static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
-                                Type *ResultTy,
-                                const TargetData *TD) {
+                                Type *ResultTy, const TargetData *TD,
+                                const TargetLibraryInfo *TLI) {
   if (!TD) return 0;
   Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
 
@@ -568,7 +595,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
   Constant *C =
     ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   return C;
 }
@@ -576,10 +603,11 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
 /// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
 /// constant expression, do so.
 static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
-                                         Type *ResultTy,
-                                         const TargetData *TD) {
+                                         Type *ResultTy, const TargetData *TD,
+                                         const TargetLibraryInfo *TLI) {
   Constant *Ptr = Ops[0];
-  if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
+  if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized() ||
+      !Ptr->getType()->isPointerTy())
     return 0;
   
   Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
@@ -602,7 +630,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
           Res = ConstantExpr::getSub(Res, CE->getOperand(1));
           Res = ConstantExpr::getIntToPtr(Res, ResultTy);
           if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
-            Res = ConstantFoldConstantExpression(ResCE, TD);
+            Res = ConstantFoldConstantExpression(ResCE, TD, TLI);
           return Res;
         }
       }
@@ -729,7 +757,9 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
 /// Note that this fails if not all of the operands are constant.  Otherwise,
 /// this function can only fail when attempting to fold instructions like loads
 /// and stores, which have no constant expression form.
-Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+Constant *llvm::ConstantFoldInstruction(Instruction *I,
+                                        const TargetData *TD,
+                                        const TargetLibraryInfo *TLI) {
   // Handle PHI nodes quickly here...
   if (PHINode *PN = dyn_cast<PHINode>(I)) {
     Constant *CommonValue = 0;
@@ -765,7 +795,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
 
   if (const CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
-                                           TD);
+                                           TD, TLI);
   
   if (const LoadInst *LI = dyn_cast<LoadInst>(I))
     return ConstantFoldLoadInst(LI, TD);
@@ -781,28 +811,29 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
                                     cast<Constant>(EVI->getAggregateOperand()),
                                     EVI->getIndices());
 
-  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
 }
 
 /// ConstantFoldConstantExpression - Attempt to fold the constant expression
 /// using the specified TargetData.  If successful, the constant result is
 /// result is returned, if not, null is returned.
 Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                               const TargetData *TD) {
+                                               const TargetData *TD,
+                                               const TargetLibraryInfo *TLI) {
   SmallVector<Constant*, 8> Ops;
   for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
        i != e; ++i) {
     Constant *NewC = cast<Constant>(*i);
     // Recursively fold the ConstantExpr's operands.
     if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
-      NewC = ConstantFoldConstantExpression(NewCE, TD);
+      NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
     Ops.push_back(NewC);
   }
 
   if (CE->isCompare())
     return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
-                                           TD);
-  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD);
+                                           TD, TLI);
+  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
 }
 
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -817,7 +848,8 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
 ///
 Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, 
                                          ArrayRef<Constant *> Ops,
-                                         const TargetData *TD) {
+                                         const TargetData *TD,
+                                         const TargetLibraryInfo *TLI) {                                         
   // Handle easy binops first.
   if (Instruction::isBinaryOp(Opcode)) {
     if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
@@ -830,11 +862,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
   switch (Opcode) {
   default: return 0;
   case Instruction::ICmp:
-  case Instruction::FCmp: assert(0 && "Invalid for compares");
+  case Instruction::FCmp: llvm_unreachable("Invalid for compares");
   case Instruction::Call:
     if (Function *F = dyn_cast<Function>(Ops.back()))
       if (canConstantFoldCallTo(F))
-        return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1));
+        return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI);
     return 0;
   case Instruction::PtrToInt:
     // If the input is a inttoptr, eliminate the pair.  This requires knowing
@@ -888,9 +920,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
   case Instruction::GetElementPtr:
-    if (Constant *C = CastGEPIndices(Ops, DestTy, TD))
+    if (Constant *C = CastGEPIndices(Ops, DestTy, TD, TLI))
       return C;
-    if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD))
+    if (Constant *C = SymbolicallyEvaluateGEP(Ops, DestTy, TD, TLI))
       return C;
     
     return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1));
@@ -903,7 +935,8 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
 ///
 Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                 Constant *Ops0, Constant *Ops1, 
-                                                const TargetData *TD) {
+                                                const TargetData *TD,
+                                                const TargetLibraryInfo *TLI) {
   // fold: icmp (inttoptr x), null         -> icmp x, 0
   // fold: icmp (ptrtoint x), 0            -> icmp x, null
   // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
@@ -920,7 +953,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
         Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
                                                    IntPtrTy, false);
         Constant *Null = Constant::getNullValue(C->getType());
-        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
       }
       
       // Only do this transformation if the int is intptrty in size, otherwise
@@ -929,7 +962,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
           CE0->getType() == IntPtrTy) {
         Constant *C = CE0->getOperand(0);
         Constant *Null = Constant::getNullValue(C->getType());
-        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI);
       }
     }
     
@@ -944,7 +977,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
                                                       IntPtrTy, false);
           Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
                                                       IntPtrTy, false);
-          return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD);
+          return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD, TLI);
         }
 
         // Only do this transformation if the int is intptrty in size, otherwise
@@ -953,7 +986,7 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
              CE0->getType() == IntPtrTy &&
              CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
           return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
-                                                 CE1->getOperand(0), TD);
+                                                 CE1->getOperand(0), TD, TLI);
       }
     }
     
@@ -962,13 +995,15 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
     if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
         CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
       Constant *LHS = 
-        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD);
+        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,
+                                        TD, TLI);
       Constant *RHS = 
-        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD);
+        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,
+                                        TD, TLI);
       unsigned OpC = 
         Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
       Constant *Ops[] = { LHS, RHS };
-      return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD);
+      return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, TD, TLI);
     }
   }
   
@@ -981,56 +1016,30 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
 /// constant expression, or null if something is funny and we can't decide.
 Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, 
                                                        ConstantExpr *CE) {
-  if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType()))
+  if (!CE->getOperand(1)->isNullValue())
     return 0;  // Do not allow stepping over the value!
-  
+
   // Loop over all of the operands, tracking down which value we are
-  // addressing...
-  gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
-  for (++I; I != E; ++I)
-    if (StructType *STy = dyn_cast<StructType>(*I)) {
-      ConstantInt *CU = cast<ConstantInt>(I.getOperand());
-      assert(CU->getZExtValue() < STy->getNumElements() &&
-             "Struct index out of range!");
-      unsigned El = (unsigned)CU->getZExtValue();
-      if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
-        C = CS->getOperand(El);
-      } else if (isa<ConstantAggregateZero>(C)) {
-        C = Constant::getNullValue(STy->getElementType(El));
-      } else if (isa<UndefValue>(C)) {
-        C = UndefValue::get(STy->getElementType(El));
-      } else {
-        return 0;
-      }
-    } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) {
-      if (ArrayType *ATy = dyn_cast<ArrayType>(*I)) {
-        if (CI->getZExtValue() >= ATy->getNumElements())
-         return 0;
-        if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
-          C = CA->getOperand(CI->getZExtValue());
-        else if (isa<ConstantAggregateZero>(C))
-          C = Constant::getNullValue(ATy->getElementType());
-        else if (isa<UndefValue>(C))
-          C = UndefValue::get(ATy->getElementType());
-        else
-          return 0;
-      } else if (VectorType *VTy = dyn_cast<VectorType>(*I)) {
-        if (CI->getZExtValue() >= VTy->getNumElements())
-          return 0;
-        if (ConstantVector *CP = dyn_cast<ConstantVector>(C))
-          C = CP->getOperand(CI->getZExtValue());
-        else if (isa<ConstantAggregateZero>(C))
-          C = Constant::getNullValue(VTy->getElementType());
-        else if (isa<UndefValue>(C))
-          C = UndefValue::get(VTy->getElementType());
-        else
-          return 0;
-      } else {
-        return 0;
-      }
-    } else {
-      return 0;
-    }
+  // addressing.
+  for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) {
+    C = C->getAggregateElement(CE->getOperand(i));
+    if (C == 0) return 0;
+  }
+  return C;
+}
+
+/// ConstantFoldLoadThroughGEPIndices - Given a constant and getelementptr
+/// indices (with an *implied* zero pointer index that is not in the list),
+/// return the constant value being addressed by a virtual load, or null if
+/// something is funny and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
+                                                  ArrayRef<Constant*> Indices) {
+  // Loop over all of the operands, tracking down which value we are
+  // addressing.
+  for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+    C = C->getAggregateElement(Indices[i]);
+    if (C == 0) return 0;
+  }
   return C;
 }
 
@@ -1045,6 +1054,7 @@ bool
 llvm::canConstantFoldCallTo(const Function *F) {
   switch (F->getIntrinsicID()) {
   case Intrinsic::sqrt:
+  case Intrinsic::pow:
   case Intrinsic::powi:
   case Intrinsic::bswap:
   case Intrinsic::ctpop:
@@ -1115,7 +1125,6 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
   if (Ty->isDoubleTy())
     return ConstantFP::get(Ty->getContext(), APFloat(V));
   llvm_unreachable("Can only constant fold float/double");
-  return 0; // dummy return to suppress warning
 }
 
 static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
@@ -1132,7 +1141,6 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
   if (Ty->isDoubleTy())
     return ConstantFP::get(Ty->getContext(), APFloat(V));
   llvm_unreachable("Can only constant fold float/double");
-  return 0; // dummy return to suppress warning
 }
 
 /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
@@ -1143,11 +1151,8 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
 /// available for the result. Returns null if the conversion cannot be
 /// performed, otherwise returns the Constant value resulting from the
 /// conversion.
-static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
-                                          Type *Ty) {
-  assert(Op && "Called with NULL operand");
-  APFloat Val(Op->getValueAPF());
-
+static Constant *ConstantFoldConvertToInt(const APFloat &Val,
+                                          bool roundTowardZero, Type *Ty) {
   // All of these conversion intrinsics form an integer of at most 64bits.
   unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
   assert(ResultWidth <= 64 &&
@@ -1168,7 +1173,8 @@ static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
 /// ConstantFoldCall - Attempt to constant fold a call to the specified function
 /// with the specified arguments, returning null if unsuccessful.
 Constant *
-llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
+llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
+                       const TargetLibraryInfo *TLI) {
   if (!F->hasName()) return 0;
   StringRef Name = F->getName();
 
@@ -1183,6 +1189,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
 
         return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
       }
+      if (!TLI)
+        return 0;
 
       if (!Ty->isFloatTy() && !Ty->isDoubleTy())
         return 0;
@@ -1201,43 +1209,43 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
                                      Op->getValueAPF().convertToDouble();
       switch (Name[0]) {
       case 'a':
-        if (Name == "acos")
+        if (Name == "acos" && TLI->has(LibFunc::acos))
           return ConstantFoldFP(acos, V, Ty);
-        else if (Name == "asin")
+        else if (Name == "asin" && TLI->has(LibFunc::asin))
           return ConstantFoldFP(asin, V, Ty);
-        else if (Name == "atan")
+        else if (Name == "atan" && TLI->has(LibFunc::atan))
           return ConstantFoldFP(atan, V, Ty);
         break;
       case 'c':
-        if (Name == "ceil")
+        if (Name == "ceil" && TLI->has(LibFunc::ceil))
           return ConstantFoldFP(ceil, V, Ty);
-        else if (Name == "cos")
+        else if (Name == "cos" && TLI->has(LibFunc::cos))
           return ConstantFoldFP(cos, V, Ty);
-        else if (Name == "cosh")
+        else if (Name == "cosh" && TLI->has(LibFunc::cosh))
           return ConstantFoldFP(cosh, V, Ty);
-        else if (Name == "cosf")
+        else if (Name == "cosf" && TLI->has(LibFunc::cosf))
           return ConstantFoldFP(cos, V, Ty);
         break;
       case 'e':
-        if (Name == "exp")
+        if (Name == "exp" && TLI->has(LibFunc::exp))
           return ConstantFoldFP(exp, V, Ty);
   
-        if (Name == "exp2") {
+        if (Name == "exp2" && TLI->has(LibFunc::exp2)) {
           // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
           // C99 library.
           return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
         }
         break;
       case 'f':
-        if (Name == "fabs")
+        if (Name == "fabs" && TLI->has(LibFunc::fabs))
           return ConstantFoldFP(fabs, V, Ty);
-        else if (Name == "floor")
+        else if (Name == "floor" && TLI->has(LibFunc::floor))
           return ConstantFoldFP(floor, V, Ty);
         break;
       case 'l':
-        if (Name == "log" && V > 0)
+        if (Name == "log" && V > 0 && TLI->has(LibFunc::log))
           return ConstantFoldFP(log, V, Ty);
-        else if (Name == "log10" && V > 0)
+        else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
           return ConstantFoldFP(log10, V, Ty);
         else if (F->getIntrinsicID() == Intrinsic::sqrt &&
                  (Ty->isFloatTy() || Ty->isDoubleTy())) {
@@ -1248,21 +1256,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
         }
         break;
       case 's':
-        if (Name == "sin")
+        if (Name == "sin" && TLI->has(LibFunc::sin))
           return ConstantFoldFP(sin, V, Ty);
-        else if (Name == "sinh")
+        else if (Name == "sinh" && TLI->has(LibFunc::sinh))
           return ConstantFoldFP(sinh, V, Ty);
-        else if (Name == "sqrt" && V >= 0)
+        else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt))
           return ConstantFoldFP(sqrt, V, Ty);
-        else if (Name == "sqrtf" && V >= 0)
+        else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))
           return ConstantFoldFP(sqrt, V, Ty);
-        else if (Name == "sinf")
+        else if (Name == "sinf" && TLI->has(LibFunc::sinf))
           return ConstantFoldFP(sin, V, Ty);
         break;
       case 't':
-        if (Name == "tan")
+        if (Name == "tan" && TLI->has(LibFunc::tan))
           return ConstantFoldFP(tan, V, Ty);
-        else if (Name == "tanh")
+        else if (Name == "tanh" && TLI->has(LibFunc::tanh))
           return ConstantFoldFP(tanh, V, Ty);
         break;
       default:
@@ -1277,10 +1285,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
         return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
       case Intrinsic::ctpop:
         return ConstantInt::get(Ty, Op->getValue().countPopulation());
-      case Intrinsic::cttz:
-        return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
-      case Intrinsic::ctlz:
-        return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
       case Intrinsic::convert_from_fp16: {
         APFloat Val(Op->getValue());
 
@@ -1300,24 +1304,31 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
       }
     }
 
-    if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) {
+    // Support ConstantVector in case we have an Undef in the top.
+    if (isa<ConstantVector>(Operands[0]) || 
+        isa<ConstantDataVector>(Operands[0])) {
+      Constant *Op = cast<Constant>(Operands[0]);
       switch (F->getIntrinsicID()) {
       default: break;
       case Intrinsic::x86_sse_cvtss2si:
       case Intrinsic::x86_sse_cvtss2si64:
       case Intrinsic::x86_sse2_cvtsd2si:
       case Intrinsic::x86_sse2_cvtsd2si64:
-        if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
-          return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty);
+        if (ConstantFP *FPOp =
+              dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+          return ConstantFoldConvertToInt(FPOp->getValueAPF(),
+                                          /*roundTowardZero=*/false, Ty);
       case Intrinsic::x86_sse_cvttss2si:
       case Intrinsic::x86_sse_cvttss2si64:
       case Intrinsic::x86_sse2_cvttsd2si:
       case Intrinsic::x86_sse2_cvttsd2si64:
-        if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
-          return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty);
+        if (ConstantFP *FPOp =
+              dyn_cast_or_null<ConstantFP>(Op->getAggregateElement(0U)))
+          return ConstantFoldConvertToInt(FPOp->getValueAPF(), 
+                                          /*roundTowardZero=*/true, Ty);
       }
     }
-
+  
     if (isa<UndefValue>(Operands[0])) {
       if (F->getIntrinsicID() == Intrinsic::bswap)
         return Operands[0];
@@ -1337,16 +1348,21 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
       if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
         if (Op2->getType() != Op1->getType())
           return 0;
-        
+
         double Op2V = Ty->isFloatTy() ? 
                       (double)Op2->getValueAPF().convertToFloat():
                       Op2->getValueAPF().convertToDouble();
 
-        if (Name == "pow")
+        if (F->getIntrinsicID() == Intrinsic::pow) {
           return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
-        if (Name == "fmod")
+        }
+        if (!TLI)
+          return 0;
+        if (Name == "pow" && TLI->has(LibFunc::pow))
+          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+        if (Name == "fmod" && TLI->has(LibFunc::fmod))
           return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
-        if (Name == "atan2")
+        if (Name == "atan2" && TLI->has(LibFunc::atan2))
           return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
       } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
         if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
@@ -1361,7 +1377,6 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
       return 0;
     }
     
-    
     if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
       if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
         switch (F->getIntrinsicID()) {
@@ -1375,7 +1390,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
           APInt Res;
           bool Overflow;
           switch (F->getIntrinsicID()) {
-          default: assert(0 && "Invalid case");
+          default: llvm_unreachable("Invalid case");
           case Intrinsic::sadd_with_overflow:
             Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
             break;
@@ -1401,6 +1416,14 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands) {
           };
           return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
         }
+        case Intrinsic::cttz:
+          // FIXME: This should check for Op2 == 1, and become unreachable if
+          // Op1 == 0.
+          return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros());
+        case Intrinsic::ctlz:
+          // FIXME: This should check for Op2 == 1, and become unreachable if
+          // Op1 == 0.
+          return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros());
         }
       }
       
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index bfa429d54120..85913b11bef4 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -17,6 +17,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 
 using namespace llvm;
@@ -76,10 +77,11 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
                                   StringRef Directory, StringRef Producer,
                                   bool isOptimized, StringRef Flags,
                                   unsigned RunTimeVer) {
-  assert (Lang <= dwarf::DW_LANG_D && Lang >= dwarf::DW_LANG_C89
-	  && "Invalid Language tag");
-  assert (!Filename.empty() 
-	  && "Unable to create compile unit without filename");
+  assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) ||
+          (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
+         "Invalid Language tag");
+  assert(!Filename.empty() &&
+         "Unable to create compile unit without filename");
   Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
   TempEnumTypes = MDNode::getTemporary(VMContext, TElts);
   Value *THElts[] = { TempEnumTypes };
@@ -189,7 +191,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
   return DIType(MDNode::get(VMContext, Elts));
 }
 
-/// createQaulifiedType - Create debugging information entry for a qualified
+/// createQualifiedType - Create debugging information entry for a qualified
 /// type, e.g. 'const int'.
 DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
   // Qualified types are encoded in DIDerivedType format.
@@ -358,13 +360,58 @@ DIType DIBuilder::createObjCIVar(StringRef Name,
   return DIType(MDNode::get(VMContext, Elts));
 }
 
+/// createObjCIVar - Create debugging information entry for Objective-C
+/// instance variable.
+DIType DIBuilder::createObjCIVar(StringRef Name,
+                                 DIFile File, unsigned LineNumber,
+                                 uint64_t SizeInBits, uint64_t AlignInBits,
+                                 uint64_t OffsetInBits, unsigned Flags,
+                                 DIType Ty, MDNode *PropertyNode) {
+  // TAG_member is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    getNonCompileUnitScope(File),
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty,
+    PropertyNode
+  };
+  return DIType(MDNode::get(VMContext, Elts));
+}
+
+/// createObjCProperty - Create debugging information entry for Objective-C
+/// property.
+DIObjCProperty DIBuilder::createObjCProperty(StringRef Name,
+					     DIFile File, unsigned LineNumber,
+                                             StringRef GetterName,
+                                             StringRef SetterName, 
+                                             unsigned PropertyAttributes,
+					     DIType Ty) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property),
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    MDString::get(VMContext, GetterName),
+    MDString::get(VMContext, SetterName),
+    ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes),
+    Ty
+  };
+  return DIObjCProperty(MDNode::get(VMContext, Elts));
+}
+
 /// createClassType - Create debugging information entry for a class.
 DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
                                   DIFile File, unsigned LineNumber,
                                   uint64_t SizeInBits, uint64_t AlignInBits,
                                   uint64_t OffsetInBits, unsigned Flags,
                                   DIType DerivedFrom, DIArray Elements,
-                                  MDNode *VTableHoder, MDNode *TemplateParams) {
+                                  MDNode *VTableHolder, MDNode *TemplateParams) {
  // TAG_class_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
@@ -379,7 +426,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
     DerivedFrom,
     Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    VTableHoder,
+    VTableHolder,
     TemplateParams
   };
   return DIType(MDNode::get(VMContext, Elts));
@@ -440,7 +487,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name,
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    NULL,
     Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -465,7 +512,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0),
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    NULL,
     Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -484,9 +531,9 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0),
     ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    NULL,
     ParameterTypes,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -500,7 +547,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
                                         DIFile File, unsigned LineNumber,
                                         uint64_t SizeInBits,
                                         uint64_t AlignInBits,
-					DIArray Elements) {
+                                        DIArray Elements) {
   // TAG_enumeration_type is encoded in DICompositeType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
@@ -512,7 +559,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name,
     ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    NULL,
     Elements,
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -628,6 +675,31 @@ DIType DIBuilder::createTemporaryType(DIFile F) {
   return DIType(Node);
 }
 
+/// createForwardDecl - Create a temporary forward-declared type that
+/// can be RAUW'd if the full type is seen.
+DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIFile F,
+                                    unsigned Line, unsigned RuntimeLang) {
+  // Create a temporary MDNode.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    NULL, // TheCU
+    MDString::get(VMContext, Name),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+    // To ease transition include sizes etc of 0.
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext),
+                     DIDescriptor::FlagFwdDecl),
+    NULL,
+    DIArray(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
+  };
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts);
+  return DIType(Node);
+}
+
 /// getOrCreateArray - Get a DIArray, create one if required.
 DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
   if (Elements.empty()) {
@@ -738,7 +810,7 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
   Elts.push_back(MDString::get(VMContext, Name));
   Elts.push_back(F);
   Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext),
-				  (LineNo | (ArgNo << 24))));
+                                  (LineNo | (ArgNo << 24))));
   Elts.push_back(Ty);
   Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
   Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
@@ -754,6 +826,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
                                        DIFile File, unsigned LineNo,
                                        DIType Ty,
                                        bool isLocalToUnit, bool isDefinition,
+                                       unsigned ScopeLine,
                                        unsigned Flags, bool isOptimized,
                                        Function *Fn,
                                        MDNode *TParams,
@@ -777,13 +850,14 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context,
     ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
     ConstantInt::get(Type::getInt32Ty(VMContext), 0),
-    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    NULL,
     ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
     ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
     Fn,
     TParams,
     Decl,
-    THolder
+    THolder,
+    ConstantInt::get(Type::getInt32Ty(VMContext), ScopeLine)
   };
   MDNode *Node = MDNode::get(VMContext, Elts);
 
@@ -831,7 +905,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context,
     Fn,
     TParam,
     llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
-    THolder
+    THolder,
+    // FIXME: Do we want to use a different scope lines?
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
   };
   MDNode *Node = MDNode::get(VMContext, Elts);
   return DISubprogram(Node);
@@ -854,7 +930,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
 /// createLexicalBlockFile - This creates a new MDNode that encapsulates
 /// an existing scope with a new filename.
 DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
-						     DIFile File) {
+                                                     DIFile File) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
     Scope,
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 44457d3c3de9..f61a8f3a5eb9 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -68,7 +68,7 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
     return 0;
 
   if (Elt < DbgNode->getNumOperands())
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt)))
+    if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
       return CI->getZExtValue();
 
   return 0;
@@ -289,6 +289,10 @@ bool DIDescriptor::isEnumerator() const {
   return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
 }
 
+/// isObjCProperty - Return true if the specified tag is DW_TAG
+bool DIDescriptor::isObjCProperty() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property;
+}
 //===----------------------------------------------------------------------===//
 // Simple Descriptor Constructors and other Methods
 //===----------------------------------------------------------------------===//
@@ -373,6 +377,19 @@ bool DICompileUnit::Verify() const {
   return true;
 }
 
+/// Verify - Verify that an ObjC property is well formed.
+bool DIObjCProperty::Verify() const {
+  if (!DbgNode)
+    return false;
+  unsigned Tag = getTag();
+  if (Tag != dwarf::DW_TAG_APPLE_property) return false;
+  DIType Ty = getType();
+  if (!Ty.Verify()) return false;
+
+  // Don't worry about the rest of the strings for now.
+  return true;
+}
+
 /// Verify - Verify that a type descriptor is well formed.
 bool DIType::Verify() const {
   if (!DbgNode)
@@ -482,6 +499,7 @@ bool DINameSpace::Verify() const {
 /// return base type size.
 uint64_t DIDerivedType::getOriginalTypeSize() const {
   unsigned Tag = getTag();
+
   if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef ||
       Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
       Tag == dwarf::DW_TAG_restrict_type) {
@@ -490,7 +508,13 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
     // approach.
     if (!BaseType.isValid())
       return getSizeInBits();
-    if (BaseType.isDerivedType())
+    // If this is a derived type, go ahead and get the base type, unless
+    // it's a reference then it's just the size of the field. Pointer types
+    // have no need of this since they're a different type of qualification
+    // on the type.
+    if (BaseType.getTag() == dwarf::DW_TAG_reference_type)
+      return getSizeInBits();
+    else if (BaseType.isDerivedType())
       return DIDerivedType(BaseType).getOriginalTypeSize();
     else
       return BaseType.getSizeInBits();
@@ -499,6 +523,13 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
   return getSizeInBits();
 }
 
+/// getObjCProperty - Return property node, if this ivar is associated with one.
+MDNode *DIDerivedType::getObjCProperty() const {
+  if (getVersion() <= LLVMDebugVersion11 || DbgNode->getNumOperands() <= 10)
+    return NULL;
+  return dyn_cast_or_null<MDNode>(DbgNode->getOperand(10));
+}
+
 /// isInlinedFnArgument - Return true if this variable provides debugging
 /// information for an inlined function arguments.
 bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
@@ -565,8 +596,7 @@ StringRef DIScope::getFilename() const {
     return DIType(DbgNode).getFilename();
   if (isFile())
     return DIFile(DbgNode).getFilename();
-  assert(0 && "Invalid DIScope!");
-  return StringRef();
+  llvm_unreachable("Invalid DIScope!");
 }
 
 StringRef DIScope::getDirectory() const {
@@ -586,8 +616,7 @@ StringRef DIScope::getDirectory() const {
     return DIType(DbgNode).getDirectory();
   if (isFile())
     return DIFile(DbgNode).getDirectory();
-  assert(0 && "Invalid DIScope!");
-  return StringRef();
+  llvm_unreachable("Invalid DIScope!");
 }
 
 DIArray DICompileUnit::getEnumTypes() const {
@@ -632,6 +661,32 @@ DIArray DICompileUnit::getGlobalVariables() const {
 }
 
 //===----------------------------------------------------------------------===//
+// DIDescriptor: vtable anchors for all descriptors.
+//===----------------------------------------------------------------------===//
+
+void DIScope::anchor() { }
+
+void DICompileUnit::anchor() { }
+
+void DIFile::anchor() { }
+
+void DIType::anchor() { }
+
+void DIBasicType::anchor() { }
+
+void DIDerivedType::anchor() { }
+
+void DICompositeType::anchor() { }
+
+void DISubprogram::anchor() { }
+
+void DILexicalBlock::anchor() { }
+
+void DINameSpace::anchor() { }
+
+void DILexicalBlockFile::anchor() { }
+
+//===----------------------------------------------------------------------===//
 // DIDescriptor: dump routines for all descriptors.
 //===----------------------------------------------------------------------===//
 
@@ -679,8 +734,13 @@ void DIType::print(raw_ostream &OS) const {
 
   if (isBasicType())
     DIBasicType(DbgNode).print(OS);
-  else if (isDerivedType())
-    DIDerivedType(DbgNode).print(OS);
+  else if (isDerivedType()) {
+    DIDerivedType DTy = DIDerivedType(DbgNode);
+    DTy.print(OS);
+    DICompositeType CTy = getDICompositeType(DTy);
+    if (CTy.Verify())
+      CTy.print(OS);
+  }
   else if (isCompositeType())
     DICompositeType(DbgNode).print(OS);
   else {
@@ -698,7 +758,9 @@ void DIBasicType::print(raw_ostream &OS) const {
 
 /// print - Print derived type.
 void DIDerivedType::print(raw_ostream &OS) const {
-  OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS);
+  OS << "\n\t Derived From: ";
+  getTypeDerivedFrom().print(OS);
+  OS << "\n\t";
 }
 
 /// print - Print composite type.
@@ -725,6 +787,9 @@ void DISubprogram::print(raw_ostream &OS) const {
   if (isDefinition())
     OS << " [def] ";
 
+  if (getScopeLineNumber() != getLineNumber())
+    OS << " [Scope: " << getScopeLineNumber() << "] ";
+
   OS << "\n";
 }
 
@@ -927,9 +992,30 @@ DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) {
 
 /// processModule - Process entire module and collect debug info.
 void DebugInfoFinder::processModule(Module &M) {
-  if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu"))
-    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i)
-      addCompileUnit(DICompileUnit(CU_Nodes->getOperand(i)));
+  if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) {
+    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+      DICompileUnit CU(CU_Nodes->getOperand(i));
+      addCompileUnit(CU);
+      if (CU.getVersion() > LLVMDebugVersion10) {
+        DIArray GVs = CU.getGlobalVariables();
+        for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) {
+          DIGlobalVariable DIG(GVs.getElement(i));
+          if (addGlobalVariable(DIG))
+            processType(DIG.getType());
+        }
+        DIArray SPs = CU.getSubprograms();
+        for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
+          processSubprogram(DISubprogram(SPs.getElement(i)));
+        DIArray EnumTypes = CU.getEnumTypes();
+        for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
+          processType(DIType(EnumTypes.getElement(i)));
+        DIArray RetainedTypes = CU.getRetainedTypes();
+        for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i)
+          processType(DIType(RetainedTypes.getElement(i)));
+        return;
+      }
+    }
+  }
 
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index 6de4e1e1d7de..1604576ec4ae 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -35,6 +35,8 @@ namespace {
   };
 }
 
+void DominanceFrontier::anchor() { }
+
 const DominanceFrontier::DomSetType &
 DominanceFrontier::calculate(const DominatorTree &DT,
                              const DomTreeNode *Node) {
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index eae83fdc369c..8ffef29870ae 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMipa
   GlobalsModRef.cpp
   IPA.cpp
   )
-
-add_llvm_library_dependencies(LLVMipa
-  LLVMAnalysis
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 2e79eab51ff7..0df3e8a38218 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -127,16 +127,9 @@ private:
       }
     }
 
-    // Loop over all of the users of the function, looking for non-call uses.
-    for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){
-      User *U = *I;
-      if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
-          || !CallSite(cast<Instruction>(U)).isCallee(I)) {
-        // Not a call, or being used as a parameter rather than as the callee.
-        ExternalCallingNode->addCalledFunction(CallSite(), Node);
-        break;
-      }
-    }
+    // If this function has its address taken, anything could call it.
+    if (F->hasAddressTaken())
+      ExternalCallingNode->addCalledFunction(CallSite(), Node);
 
     // If this function is not defined in this translation unit, it could call
     // anything.
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index b226d66cd78a..c1d8e3e65a62 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -467,6 +468,11 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
         } else if (isMalloc(&cast<Instruction>(*II)) ||
                    isFreeCall(&cast<Instruction>(*II))) {
           FunctionEffect |= ModRef;
+        } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(&*II)) {
+          // The callgraph doesn't include intrinsic calls.
+          Function *Callee = Intrinsic->getCalledFunction();
+          ModRefBehavior Behaviour = AliasAnalysis::getModRefBehavior(Callee);
+          FunctionEffect |= (Behaviour & ModRef);
         }
 
     if ((FunctionEffect & Mod) == 0)
diff --git a/lib/Analysis/IPA/LLVMBuild.txt b/lib/Analysis/IPA/LLVMBuild.txt
new file mode 100644
index 000000000000..980e91809b55
--- /dev/null
+++ b/lib/Analysis/IPA/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Analysis/IPA/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = IPA
+parent = Libraries
+library_name = ipa
+required_libraries = Analysis Core Support
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index d0ca8920ab9f..b80966b65a17 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -79,10 +79,44 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
   return false;
 }
 
-/// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
+/// Return true if all loop headers that dominate this block are in simplified
+/// form.
+static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
+                                 const LoopInfo *LI,
+                                 SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+  Loop *NearestLoop = 0;
+  for (DomTreeNode *Rung = DT->getNode(BB);
+       Rung; Rung = Rung->getIDom()) {
+    BasicBlock *DomBB = Rung->getBlock();
+    Loop *DomLoop = LI->getLoopFor(DomBB);
+    if (DomLoop && DomLoop->getHeader() == DomBB) {
+      // If the domtree walk reaches a loop with no preheader, return false.
+      if (!DomLoop->isLoopSimplifyForm())
+        return false;
+      // If we have already checked this loop nest, stop checking.
+      if (SimpleLoopNests.count(DomLoop))
+        break;
+      // If we have not already checked this loop nest, remember the loop
+      // header nearest to BB. The nearest loop may not contain BB.
+      if (!NearestLoop)
+        NearestLoop = DomLoop;
+    }
+  }
+  if (NearestLoop)
+    SimpleLoopNests.insert(NearestLoop);
+  return true;
+}
+
+/// AddUsersImpl - Inspect the specified instruction.  If it is a
 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
 /// return true.  Otherwise, return false.
-bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+bool IVUsers::AddUsersImpl(Instruction *I,
+                           SmallPtrSet<Loop*,16> &SimpleLoopNests) {
+  // Add this IV user to the Processed set before returning false to ensure that
+  // all IV users are members of the set. See IVUsers::isIVUserOrOperand.
+  if (!Processed.insert(I))
+    return true;    // Instruction already handled.
+
   if (!SE->isSCEVable(I->getType()))
     return false;   // Void and FP expressions cannot be reduced.
 
@@ -93,9 +127,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
     return false;
 
-  if (!Processed.insert(I))
-    return true;    // Instruction already handled.
-
   // Get the symbolic expression for this instruction.
   const SCEV *ISE = SE->getSCEV(I);
 
@@ -115,6 +146,18 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     if (isa<PHINode>(User) && Processed.count(User))
       continue;
 
+    // Only consider IVUsers that are dominated by simplified loop
+    // headers. Otherwise, SCEVExpander will crash.
+    BasicBlock *UseBB = User->getParent();
+    // A phi's use is live out of its predecessor block.
+    if (PHINode *PHI = dyn_cast<PHINode>(User)) {
+      unsigned OperandNo = UI.getOperandNo();
+      unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
+      UseBB = PHI->getIncomingBlock(ValNo);
+    }
+    if (!isSimplifiedLoopNest(UseBB, DT, LI, SimpleLoopNests))
+      return false;
+
     // Descend recursively, but not into PHI nodes outside the current loop.
     // It's important to see the entire expression outside the loop to get
     // choices that depend on addressing mode use right, although we won't
@@ -124,12 +167,12 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
     bool AddUserToIVUsers = false;
     if (LI->getLoopFor(User->getParent()) != L) {
       if (isa<PHINode>(User) || Processed.count(User) ||
-          !AddUsersIfInteresting(User)) {
+          !AddUsersImpl(User, SimpleLoopNests)) {
         DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
                      << "   OF SCEV: " << *ISE << '\n');
         AddUserToIVUsers = true;
       }
-    } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
+    } else if (Processed.count(User) || !AddUsersImpl(User, SimpleLoopNests)) {
       DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
                    << "   OF SCEV: " << *ISE << '\n');
       AddUserToIVUsers = true;
@@ -153,6 +196,15 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   return true;
 }
 
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+  // SCEVExpander can only handle users that are dominated by simplified loop
+  // entries. Keep track of all loops that are only dominated by other simple
+  // loops so we don't traverse the domtree for each user.
+  SmallPtrSet<Loop*,16> SimpleLoopNests;
+
+  return AddUsersImpl(I, SimpleLoopNests);
+}
+
 IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
   IVUses.push_back(new IVStrideUse(this, User, Operand));
   return IVUses.back();
@@ -268,6 +320,7 @@ void IVStrideUse::transformToPostInc(const Loop *L) {
 
 void IVStrideUse::deleted() {
   // Remove this user from the list.
+  Parent->Processed.erase(this->getUser());
   Parent->IVUses.erase(this);
   // this now dangles!
 }
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index e12e322c2a99..3e3d2ab75380 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -11,645 +11,1012 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "inline-cost"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/CallingConv.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 
 using namespace llvm;
 
-/// callIsSmall - If a call is likely to lower to a single target instruction,
-/// or is otherwise deemed small return true.
-/// TODO: Perhaps calls like memcpy, strcpy, etc?
-bool llvm::callIsSmall(const Function *F) {
-  if (!F) return false;
+STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
+
+namespace {
+
+class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
+  typedef InstVisitor<CallAnalyzer, bool> Base;
+  friend class InstVisitor<CallAnalyzer, bool>;
+
+  // TargetData if available, or null.
+  const TargetData *const TD;
+
+  // The called function.
+  Function &F;
+
+  int Threshold;
+  int Cost;
+  const bool AlwaysInline;
+
+  bool IsRecursive;
+  bool ExposesReturnsTwice;
+  bool HasDynamicAlloca;
+  unsigned NumInstructions, NumVectorInstructions;
+  int FiftyPercentVectorBonus, TenPercentVectorBonus;
+  int VectorBonus;
+
+  // While we walk the potentially-inlined instructions, we build up and
+  // maintain a mapping of simplified values specific to this callsite. The
+  // idea is to propagate any special information we have about arguments to
+  // this call through the inlinable section of the function, and account for
+  // likely simplifications post-inlining. The most important aspect we track
+  // is CFG altering simplifications -- when we prove a basic block dead, that
+  // can cause dramatic shifts in the cost of inlining a function.
+  DenseMap<Value *, Constant *> SimplifiedValues;
+
+  // Keep track of the values which map back (through function arguments) to
+  // allocas on the caller stack which could be simplified through SROA.
+  DenseMap<Value *, Value *> SROAArgValues;
+
+  // The mapping of caller Alloca values to their accumulated cost savings. If
+  // we have to disable SROA for one of the allocas, this tells us how much
+  // cost must be added.
+  DenseMap<Value *, int> SROAArgCosts;
+
+  // Keep track of values which map to a pointer base and constant offset.
+  DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
+
+  // Custom simplification helper routines.
+  bool isAllocaDerivedArg(Value *V);
+  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
+                            DenseMap<Value *, int>::iterator &CostIt);
+  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
+  void disableSROA(Value *V);
+  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+                          int InstructionCost);
+  bool handleSROACandidate(bool IsSROAValid,
+                           DenseMap<Value *, int>::iterator CostIt,
+                           int InstructionCost);
+  bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
+  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
+
+  // Custom analysis routines.
+  bool analyzeBlock(BasicBlock *BB);
+
+  // Disable several entry points to the visitor so we don't accidentally use
+  // them by declaring but not defining them here.
+  void visit(Module *);     void visit(Module &);
+  void visit(Function *);   void visit(Function &);
+  void visit(BasicBlock *); void visit(BasicBlock &);
+
+  // Provide base case for our instruction visit.
+  bool visitInstruction(Instruction &I);
+
+  // Our visit overrides.
+  bool visitAlloca(AllocaInst &I);
+  bool visitPHI(PHINode &I);
+  bool visitGetElementPtr(GetElementPtrInst &I);
+  bool visitBitCast(BitCastInst &I);
+  bool visitPtrToInt(PtrToIntInst &I);
+  bool visitIntToPtr(IntToPtrInst &I);
+  bool visitCastInst(CastInst &I);
+  bool visitUnaryInstruction(UnaryInstruction &I);
+  bool visitICmp(ICmpInst &I);
+  bool visitSub(BinaryOperator &I);
+  bool visitBinaryOperator(BinaryOperator &I);
+  bool visitLoad(LoadInst &I);
+  bool visitStore(StoreInst &I);
+  bool visitCallSite(CallSite CS);
+
+public:
+  CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold)
+    : TD(TD), F(Callee), Threshold(Threshold), Cost(0),
+      AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
+      IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false),
+      NumInstructions(0), NumVectorInstructions(0),
+      FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+      NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+      NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+      NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) {
+  }
 
-  if (F->hasLocalLinkage()) return false;
+  bool analyzeCall(CallSite CS);
 
-  if (!F->hasName()) return false;
+  int getThreshold() { return Threshold; }
+  int getCost() { return Cost; }
 
-  StringRef Name = F->getName();
+  // Keep a bunch of stats about the cost savings found so we can print them
+  // out when debugging.
+  unsigned NumConstantArgs;
+  unsigned NumConstantOffsetPtrArgs;
+  unsigned NumAllocaArgs;
+  unsigned NumConstantPtrCmps;
+  unsigned NumConstantPtrDiffs;
+  unsigned NumInstructionsSimplified;
+  unsigned SROACostSavings;
+  unsigned SROACostSavingsLost;
 
-  // These will all likely lower to a single selection DAG node.
-  if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
-      Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
-      Name == "sin" || Name == "sinf" || Name == "sinl" ||
-      Name == "cos" || Name == "cosf" || Name == "cosl" ||
-      Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
-    return true;
+  void dump();
+};
 
-  // These are all likely to be optimized into something smaller.
-  if (Name == "pow" || Name == "powf" || Name == "powl" ||
-      Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
-      Name == "floor" || Name == "floorf" || Name == "ceil" ||
-      Name == "round" || Name == "ffs" || Name == "ffsl" ||
-      Name == "abs" || Name == "labs" || Name == "llabs")
-    return true;
+} // namespace
 
-  return false;
+/// \brief Test whether the given value is an Alloca-derived function argument.
+bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
+  return SROAArgValues.count(V);
 }
 
-/// analyzeBasicBlock - Fill in the current structure with information gleaned
-/// from the specified block.
-void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
-                                    const TargetData *TD) {
-  ++NumBlocks;
-  unsigned NumInstsBeforeThisBB = NumInsts;
-  for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
-       II != E; ++II) {
-    if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
-
-    // Special handling for calls.
-    if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
-      if (isa<DbgInfoIntrinsic>(II))
-        continue;  // Debug intrinsics don't count as size.
-
-      ImmutableCallSite CS(cast<Instruction>(II));
-
-      if (const Function *F = CS.getCalledFunction()) {
-        // If a function is both internal and has a single use, then it is
-        // extremely likely to get inlined in the future (it was probably
-        // exposed by an interleaved devirtualization pass).
-        if (F->hasInternalLinkage() && F->hasOneUse())
-          ++NumInlineCandidates;
-
-        // If this call is to function itself, then the function is recursive.
-        // Inlining it into other functions is a bad idea, because this is
-        // basically just a form of loop peeling, and our metrics aren't useful
-        // for that case.
-        if (F == BB->getParent())
-          isRecursive = true;
-      }
+/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
+/// Returns false if V does not map to a SROA-candidate.
+bool CallAnalyzer::lookupSROAArgAndCost(
+    Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
+  if (SROAArgValues.empty() || SROAArgCosts.empty())
+    return false;
 
-      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
-        // Each argument to a call takes on average one instruction to set up.
-        NumInsts += CS.arg_size();
+  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
+  if (ArgIt == SROAArgValues.end())
+    return false;
 
-        // We don't want inline asm to count as a call - that would prevent loop
-        // unrolling. The argument setup cost is still real, though.
-        if (!isa<InlineAsm>(CS.getCalledValue()))
-          ++NumCalls;
-      }
-    }
+  Arg = ArgIt->second;
+  CostIt = SROAArgCosts.find(Arg);
+  return CostIt != SROAArgCosts.end();
+}
 
-    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
-      if (!AI->isStaticAlloca())
-        this->usesDynamicAlloca = true;
-    }
+/// \brief Disable SROA for the candidate marked by this cost iterator.
+///
+/// This markes the candidate as no longer viable for SROA, and adds the cost
+/// savings associated with it back into the inline cost measurement.
+void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
+  // If we're no longer able to perform SROA we need to undo its cost savings
+  // and prevent subsequent analysis.
+  Cost += CostIt->second;
+  SROACostSavings -= CostIt->second;
+  SROACostSavingsLost += CostIt->second;
+  SROAArgCosts.erase(CostIt);
+}
 
-    if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
-      ++NumVectorInsts;
+/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
+void CallAnalyzer::disableSROA(Value *V) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
+    disableSROA(CostIt);
+}
 
-    if (const CastInst *CI = dyn_cast<CastInst>(II)) {
-      // Noop casts, including ptr <-> int,  don't count.
-      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
-          isa<PtrToIntInst>(CI))
-        continue;
-      // trunc to a native type is free (assuming the target has compare and
-      // shift-right of the same width).
-      if (isa<TruncInst>(CI) && TD &&
-          TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
-        continue;
-      // Result of a cmp instruction is often extended (to be used by other
-      // cmp instructions, logical or return instructions). These are usually
-      // nop on most sane targets.
-      if (isa<CmpInst>(CI->getOperand(0)))
-        continue;
-    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
-      // If a GEP has all constant indices, it will probably be folded with
-      // a load/store.
-      if (GEPI->hasAllConstantIndices())
-        continue;
+/// \brief Accumulate the given cost for a particular SROA candidate.
+void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+                                      int InstructionCost) {
+  CostIt->second += InstructionCost;
+  SROACostSavings += InstructionCost;
+}
+
+/// \brief Helper for the common pattern of handling a SROA candidate.
+/// Either accumulates the cost savings if the SROA remains valid, or disables
+/// SROA for the candidate.
+bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
+                                       DenseMap<Value *, int>::iterator CostIt,
+                                       int InstructionCost) {
+  if (IsSROAValid) {
+    accumulateSROACost(CostIt, InstructionCost);
+    return true;
+  }
+
+  disableSROA(CostIt);
+  return false;
+}
+
+/// \brief Check whether a GEP's indices are all constant.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
+  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+    if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
+      return false;
+
+  return true;
+}
+
+/// \brief Accumulate a constant GEP offset into an APInt if possible.
+///
+/// Returns false if unable to compute the offset for any reason. Respects any
+/// simplified values known during the analysis of this callsite.
+bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
+  if (!TD)
+    return false;
+
+  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  assert(IntPtrWidth == Offset.getBitWidth());
+
+  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+       GTI != GTE; ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+    if (!OpC)
+      if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
+        OpC = dyn_cast<ConstantInt>(SimpleOp);
+    if (!OpC)
+      return false;
+    if (OpC->isZero()) continue;
+
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = TD->getStructLayout(STy);
+      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+      continue;
     }
 
-    ++NumInsts;
+    APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
+    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
   }
+  return true;
+}
 
-  if (isa<ReturnInst>(BB->getTerminator()))
-    ++NumRets;
+bool CallAnalyzer::visitAlloca(AllocaInst &I) {
+  // FIXME: Check whether inlining will turn a dynamic alloca into a static
+  // alloca, and handle that case.
 
-  // We never want to inline functions that contain an indirectbr.  This is
-  // incorrect because all the blockaddress's (in static global initializers
-  // for example) would be referring to the original function, and this indirect
-  // jump would jump from the inlined copy of the function into the original
-  // function which is extremely undefined behavior.
-  if (isa<IndirectBrInst>(BB->getTerminator()))
-    containsIndirectBr = true;
+  // We will happily inline static alloca instructions or dynamic alloca
+  // instructions in always-inline situations.
+  if (AlwaysInline || I.isStaticAlloca())
+    return Base::visitAlloca(I);
 
-  // Remember NumInsts for this BB.
-  NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
+  // a variety of reasons, and so we would like to not inline them into
+  // functions which don't currently have a dynamic alloca. This simply
+  // disables inlining altogether in the presence of a dynamic alloca.
+  HasDynamicAlloca = true;
+  return false;
 }
 
-// CountCodeReductionForConstant - Figure out an approximation for how many
-// instructions will be constant folded if the specified value is constant.
-//
-unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
-  unsigned Reduction = 0;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
-    User *U = *UI;
-    if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
-      // We will be able to eliminate all but one of the successors.
-      const TerminatorInst &TI = cast<TerminatorInst>(*U);
-      const unsigned NumSucc = TI.getNumSuccessors();
-      unsigned Instrs = 0;
-      for (unsigned I = 0; I != NumSucc; ++I)
-        Instrs += NumBBInsts[TI.getSuccessor(I)];
-      // We don't know which blocks will be eliminated, so use the average size.
-      Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
-    } else {
-      // Figure out if this instruction will be removed due to simple constant
-      // propagation.
-      Instruction &Inst = cast<Instruction>(*U);
-
-      // We can't constant propagate instructions which have effects or
-      // read memory.
-      //
-      // FIXME: It would be nice to capture the fact that a load from a
-      // pointer-to-constant-global is actually a *really* good thing to zap.
-      // Unfortunately, we don't know the pointer that may get propagated here,
-      // so we can't make this decision.
-      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
-          isa<AllocaInst>(Inst))
-        continue;
+bool CallAnalyzer::visitPHI(PHINode &I) {
+  // FIXME: We should potentially be tracking values through phi nodes,
+  // especially when they collapse to a single value due to deleted CFG edges
+  // during inlining.
 
-      bool AllOperandsConstant = true;
-      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
-        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
-          AllOperandsConstant = false;
-          break;
-        }
+  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
+  // though we don't want to propagate it's bonuses. The idea is to disable
+  // SROA if it *might* be used in an inappropriate manner.
 
-      if (AllOperandsConstant) {
-        // We will get to remove this instruction...
-        Reduction += InlineConstants::InstrCost;
+  // Phi nodes are always zero-cost.
+  return true;
+}
 
-        // And any other instructions that use it which become constants
-        // themselves.
-        Reduction += CountCodeReductionForConstant(&Inst);
+bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
+                                            SROAArg, CostIt);
+
+  // Try to fold GEPs of constant-offset call site argument pointers. This
+  // requires target data and inbounds GEPs.
+  if (TD && I.isInBounds()) {
+    // Check if we have a base + offset for the pointer.
+    Value *Ptr = I.getPointerOperand();
+    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
+    if (BaseAndOffset.first) {
+      // Check if the offset of this GEP is constant, and if so accumulate it
+      // into Offset.
+      if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
+        // Non-constant GEPs aren't folded, and disable SROA.
+        if (SROACandidate)
+          disableSROA(CostIt);
+        return false;
       }
+
+      // Add the result as a new mapping to Base + Offset.
+      ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+      // Also handle SROA candidates here, we already know that the GEP is
+      // all-constant indexed.
+      if (SROACandidate)
+        SROAArgValues[&I] = SROAArg;
+
+      return true;
     }
   }
-  return Reduction;
-}
 
-// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
-// the function will be if it is inlined into a context where an argument
-// becomes an alloca.
-//
-unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
-  if (!V->getType()->isPointerTy()) return 0;  // Not a pointer
-  unsigned Reduction = 0;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
-    Instruction *I = cast<Instruction>(*UI);
-    if (isa<LoadInst>(I) || isa<StoreInst>(I))
-      Reduction += InlineConstants::InstrCost;
-    else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
-      // If the GEP has variable indices, we won't be able to do much with it.
-      if (GEP->hasAllConstantIndices())
-        Reduction += CountCodeReductionForAlloca(GEP);
-    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
-      // Track pointer through bitcasts.
-      Reduction += CountCodeReductionForAlloca(BCI);
-    } else {
-      // If there is some other strange instruction, we're not going to be able
-      // to do much if we inline this.
-      return 0;
-    }
+  if (isGEPOffsetConstant(I)) {
+    if (SROACandidate)
+      SROAArgValues[&I] = SROAArg;
+
+    // Constant GEPs are modeled as free.
+    return true;
   }
 
-  return Reduction;
+  // Variable GEPs will require math and will disable SROA.
+  if (SROACandidate)
+    disableSROA(CostIt);
+  return false;
 }
 
-/// analyzeFunction - Fill in the current structure with information gleaned
-/// from the specified function.
-void CodeMetrics::analyzeFunction(Function *F, const TargetData *TD) {
-  // If this function contains a call to setjmp or _setjmp, never inline
-  // it.  This is a hack because we depend on the user marking their local
-  // variables as volatile if they are live across a setjmp call, and they
-  // probably won't do this in callers.
-  if (F->callsFunctionThatReturnsTwice())
-    callsSetJmp = true;
-
-  // Look at the size of the callee.
-  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-    analyzeBasicBlock(&*BB, TD);
-}
+bool CallAnalyzer::visitBitCast(BitCastInst &I) {
+  // Propagate constants through bitcasts.
+  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+    if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
 
-/// analyzeFunction - Fill in the current structure with information gleaned
-/// from the specified function.
-void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F,
-                                                       const TargetData *TD) {
-  Metrics.analyzeFunction(F, TD);
-
-  // A function with exactly one return has it removed during the inlining
-  // process (see InlineFunction), so don't count it.
-  // FIXME: This knowledge should really be encoded outside of FunctionInfo.
-  if (Metrics.NumRets==1)
-    --Metrics.NumInsts;
-
-  // Check out all of the arguments to the function, figuring out how much
-  // code can be eliminated if one of the arguments is a constant.
-  ArgumentWeights.reserve(F->arg_size());
-  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
-    ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I),
-                                      Metrics.CountCodeReductionForAlloca(I)));
+  // Track base/offsets through casts
+  std::pair<Value *, APInt> BaseAndOffset
+    = ConstantOffsetPtrs.lookup(I.getOperand(0));
+  // Casts don't change the offset, just wrap it up.
+  if (BaseAndOffset.first)
+    ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+  // Also look for SROA candidates here.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+    SROAArgValues[&I] = SROAArg;
+
+  // Bitcasts are always zero cost.
+  return true;
 }
 
-/// NeverInline - returns true if the function should never be inlined into
-/// any caller
-bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
-  return (Metrics.callsSetJmp || Metrics.isRecursive ||
-          Metrics.containsIndirectBr);
+bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
+  // Propagate constants through ptrtoint.
+  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+    if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
+
+  // Track base/offset pairs when converted to a plain integer provided the
+  // integer is large enough to represent the pointer.
+  unsigned IntegerSize = I.getType()->getScalarSizeInBits();
+  if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
+    std::pair<Value *, APInt> BaseAndOffset
+      = ConstantOffsetPtrs.lookup(I.getOperand(0));
+    if (BaseAndOffset.first)
+      ConstantOffsetPtrs[&I] = BaseAndOffset;
+  }
+
+  // This is really weird. Technically, ptrtoint will disable SROA. However,
+  // unless that ptrtoint is *used* somewhere in the live basic blocks after
+  // inlining, it will be nuked, and SROA should proceed. All of the uses which
+  // would block SROA would also block SROA if applied directly to a pointer,
+  // and so we can just add the integer in here. The only places where SROA is
+  // preserved either cannot fire on an integer, or won't in-and-of themselves
+  // disable SROA (ext) w/o some later use that we would see and disable.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+    SROAArgValues[&I] = SROAArg;
+
+  // A ptrtoint cast is free so long as the result is large enough to store the
+  // pointer, and a legal integer type.
+  return TD && TD->isLegalInteger(IntegerSize) &&
+         IntegerSize >= TD->getPointerSizeInBits();
 }
-// getSpecializationBonus - The heuristic used to determine the per-call
-// performance boost for using a specialization of Callee with argument
-// specializedArgNo replaced by a constant.
-int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
-         SmallVectorImpl<unsigned> &SpecializedArgNos)
-{
-  if (Callee->mayBeOverridden())
-    return 0;
 
-  int Bonus = 0;
-  // If this function uses the coldcc calling convention, prefer not to
-  // specialize it.
-  if (Callee->getCallingConv() == CallingConv::Cold)
-    Bonus -= InlineConstants::ColdccPenalty;
-
-  // Get information about the callee.
-  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
-  // If we haven't calculated this information yet, do so now.
-  if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee, TD);
-
-  unsigned ArgNo = 0;
-  unsigned i = 0;
-  for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end();
-       I != E; ++I, ++ArgNo)
-    if (ArgNo == SpecializedArgNos[i]) {
-      ++i;
-      Bonus += CountBonusForConstant(I);
+bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
+  // Propagate constants through ptrtoint.
+  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+    if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
     }
 
-  // Calls usually take a long time, so they make the specialization gain
-  // smaller.
-  Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+  // Track base/offset pairs when round-tripped through a pointer without
+  // modifications provided the integer is not too large.
+  Value *Op = I.getOperand(0);
+  unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
+  if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
+    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
+    if (BaseAndOffset.first)
+      ConstantOffsetPtrs[&I] = BaseAndOffset;
+  }
+
+  // "Propagate" SROA here in the same manner as we do for ptrtoint above.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
+    SROAArgValues[&I] = SROAArg;
 
-  return Bonus;
+  // An inttoptr cast is free so long as the input is a legal integer type
+  // which doesn't contain values outside the range of a pointer.
+  return TD && TD->isLegalInteger(IntegerSize) &&
+         IntegerSize <= TD->getPointerSizeInBits();
 }
 
-// ConstantFunctionBonus - Figure out how much of a bonus we can get for
-// possibly devirtualizing a function. We'll subtract the size of the function
-// we may wish to inline from the indirect call bonus providing a limit on
-// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize
-// inlining because we decide we don't want to give a bonus for
-// devirtualizing.
-int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
+bool CallAnalyzer::visitCastInst(CastInst &I) {
+  // Propagate constants through ptrtoint.
+  if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+    if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
+      SimplifiedValues[&I] = C;
+      return true;
+    }
 
-  // This could just be NULL.
-  if (!C) return 0;
+  // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
+  disableSROA(I.getOperand(0));
 
-  Function *F = dyn_cast<Function>(C);
-  if (!F) return 0;
+  // No-op casts don't have any cost.
+  if (I.isLosslessCast())
+    return true;
 
-  int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
-  return (Bonus > 0) ? 0 : Bonus;
+  // trunc to a native type is free (assuming the target has compare and
+  // shift-right of the same width).
+  if (TD && isa<TruncInst>(I) &&
+      TD->isLegalInteger(TD->getTypeSizeInBits(I.getType())))
+    return true;
+
+  // Result of a cmp instruction is often extended (to be used by other
+  // cmp instructions, logical or return instructions). These are usually
+  // no-ops on most sane targets.
+  if (isa<CmpInst>(I.getOperand(0)))
+    return true;
+
+  // Assume the rest of the casts require work.
+  return false;
 }
 
-// CountBonusForConstant - Figure out an approximation for how much per-call
-// performance boost we can expect if the specified value is constant.
-int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
-  unsigned Bonus = 0;
-  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
-    User *U = *UI;
-    if (CallInst *CI = dyn_cast<CallInst>(U)) {
-      // Turning an indirect call into a direct call is a BIG win
-      if (CI->getCalledValue() == V)
-        Bonus += ConstantFunctionBonus(CallSite(CI), C);
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
-      // Turning an indirect call into a direct call is a BIG win
-      if (II->getCalledValue() == V)
-        Bonus += ConstantFunctionBonus(CallSite(II), C);
+bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
+  Value *Operand = I.getOperand(0);
+  Constant *Ops[1] = { dyn_cast<Constant>(Operand) };
+  if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand)))
+    if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
+                                               Ops, TD)) {
+      SimplifiedValues[&I] = C;
+      return true;
     }
-    // FIXME: Eliminating conditional branches and switches should
-    // also yield a per-call performance boost.
-    else {
-      // Figure out the bonuses that wll accrue due to simple constant
-      // propagation.
-      Instruction &Inst = cast<Instruction>(*U);
-
-      // We can't constant propagate instructions which have effects or
-      // read memory.
-      //
-      // FIXME: It would be nice to capture the fact that a load from a
-      // pointer-to-constant-global is actually a *really* good thing to zap.
-      // Unfortunately, we don't know the pointer that may get propagated here,
-      // so we can't make this decision.
-      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
-          isa<AllocaInst>(Inst))
-        continue;
 
-      bool AllOperandsConstant = true;
-      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
-        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
-          AllOperandsConstant = false;
-          break;
-        }
+  // Disable any SROA on the argument to arbitrary unary operators.
+  disableSROA(Operand);
 
-      if (AllOperandsConstant)
-        Bonus += CountBonusForConstant(&Inst);
+  return false;
+}
+
+bool CallAnalyzer::visitICmp(ICmpInst &I) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  // First try to handle simplified comparisons.
+  if (!isa<Constant>(LHS))
+    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+      LHS = SimpleLHS;
+  if (!isa<Constant>(RHS))
+    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+      RHS = SimpleRHS;
+  if (Constant *CLHS = dyn_cast<Constant>(LHS))
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+        SimplifiedValues[&I] = C;
+        return true;
+      }
+
+  // Otherwise look for a comparison between constant offset pointers with
+  // a common base.
+  Value *LHSBase, *RHSBase;
+  APInt LHSOffset, RHSOffset;
+  llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+  if (LHSBase) {
+    llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+    if (RHSBase && LHSBase == RHSBase) {
+      // We have common bases, fold the icmp to a constant based on the
+      // offsets.
+      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+      if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+        SimplifiedValues[&I] = C;
+        ++NumConstantPtrCmps;
+        return true;
+      }
     }
   }
 
-  return Bonus;
-}
+  // If the comparison is an equality comparison with null, we can simplify it
+  // for any alloca-derived argument.
+  if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
+    if (isAllocaDerivedArg(I.getOperand(0))) {
+      // We can actually predict the result of comparisons between an
+      // alloca-derived value and null. Note that this fires regardless of
+      // SROA firing.
+      bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
+      SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
+                                        : ConstantInt::getFalse(I.getType());
+      return true;
+    }
+
+  // Finally check for SROA candidates in comparisons.
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+    if (isa<ConstantPointerNull>(I.getOperand(1))) {
+      accumulateSROACost(CostIt, InlineConstants::InstrCost);
+      return true;
+    }
 
-int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
-  // Get information about the callee.
-  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
-  // If we haven't calculated this information yet, do so now.
-  if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee, TD);
-
-  // InlineCost - This value measures how good of an inline candidate this call
-  // site is to inline.  A lower inline cost make is more likely for the call to
-  // be inlined.  This value may go negative.
-  //
-  int InlineCost = 0;
-
-  // Compute any size reductions we can expect due to arguments being passed into
-  // the function.
-  //
-  unsigned ArgNo = 0;
-  CallSite::arg_iterator I = CS.arg_begin();
-  for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
-       FI != FE; ++I, ++FI, ++ArgNo) {
-
-    // If an alloca is passed in, inlining this function is likely to allow
-    // significant future optimization possibilities (like scalar promotion, and
-    // scalarization), so encourage the inlining of the function.
-    //
-    if (isa<AllocaInst>(I))
-      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
-
-    // If this is a constant being passed into the function, use the argument
-    // weights calculated for the callee to determine how much will be folded
-    // away with this information.
-    else if (isa<Constant>(I))
-      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;
+    disableSROA(CostIt);
   }
 
-  // Each argument passed in has a cost at both the caller and the callee
-  // sides.  Measurements show that each argument costs about the same as an
-  // instruction.
-  InlineCost -= (CS.arg_size() * InlineConstants::InstrCost);
+  return false;
+}
 
-  // Now that we have considered all of the factors that make the call site more
-  // likely to be inlined, look at factors that make us not want to inline it.
+bool CallAnalyzer::visitSub(BinaryOperator &I) {
+  // Try to handle a special case: we can fold computing the difference of two
+  // constant-related pointers.
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  Value *LHSBase, *RHSBase;
+  APInt LHSOffset, RHSOffset;
+  llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+  if (LHSBase) {
+    llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+    if (RHSBase && LHSBase == RHSBase) {
+      // We have common bases, fold the subtract to a constant based on the
+      // offsets.
+      Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+      Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+      if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
+        SimplifiedValues[&I] = C;
+        ++NumConstantPtrDiffs;
+        return true;
+      }
+    }
+  }
 
-  // Calls usually take a long time, so they make the inlining gain smaller.
-  InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+  // Otherwise, fall back to the generic logic for simplifying and handling
+  // instructions.
+  return Base::visitSub(I);
+}
 
-  // Look at the size of the callee. Each instruction counts as 5.
-  InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
+bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  if (!isa<Constant>(LHS))
+    if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+      LHS = SimpleLHS;
+  if (!isa<Constant>(RHS))
+    if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+      RHS = SimpleRHS;
+  Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD);
+  if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
+    SimplifiedValues[&I] = C;
+    return true;
+  }
 
-  return InlineCost;
-}
+  // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
+  disableSROA(LHS);
+  disableSROA(RHS);
 
-int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
-  // Get information about the callee.
-  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
-  // If we haven't calculated this information yet, do so now.
-  if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee, TD);
-
-  bool isDirectCall = CS.getCalledFunction() == Callee;
-  Instruction *TheCall = CS.getInstruction();
-  int Bonus = 0;
-
-  // If there is only one call of the function, and it has internal linkage,
-  // make it almost guaranteed to be inlined.
-  //
-  if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
-    Bonus += InlineConstants::LastCallToStaticBonus;
-
-  // If the instruction after the call, or if the normal destination of the
-  // invoke is an unreachable instruction, the function is noreturn.  As such,
-  // there is little point in inlining this.
-  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
-    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
-      Bonus += InlineConstants::NoreturnPenalty;
-  } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
-    Bonus += InlineConstants::NoreturnPenalty;
-
-  // If this function uses the coldcc calling convention, prefer not to inline
-  // it.
-  if (Callee->getCallingConv() == CallingConv::Cold)
-    Bonus += InlineConstants::ColdccPenalty;
-
-  // Add to the inline quality for properties that make the call valuable to
-  // inline.  This includes factors that indicate that the result of inlining
-  // the function will be optimizable.  Currently this just looks at arguments
-  // passed into the function.
-  //
-  CallSite::arg_iterator I = CS.arg_begin();
-  for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
-       FI != FE; ++I, ++FI)
-    // Compute any constant bonus due to inlining we want to give here.
-    if (isa<Constant>(I))
-      Bonus += CountBonusForConstant(FI, cast<Constant>(I));
-
-  return Bonus;
+  return false;
 }
 
-// getInlineCost - The heuristic used to determine if we should inline the
-// function call or not.
-//
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
-                               SmallPtrSet<const Function*, 16> &NeverInline) {
-  return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
-}
+bool CallAnalyzer::visitLoad(LoadInst &I) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+    if (I.isSimple()) {
+      accumulateSROACost(CostIt, InlineConstants::InstrCost);
+      return true;
+    }
 
-InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
-                               Function *Callee,
-                               SmallPtrSet<const Function*, 16> &NeverInline) {
-  Instruction *TheCall = CS.getInstruction();
-  Function *Caller = TheCall->getParent()->getParent();
+    disableSROA(CostIt);
+  }
 
-  // Don't inline functions which can be redefined at link-time to mean
-  // something else.  Don't inline functions marked noinline or call sites
-  // marked noinline.
-  if (Callee->mayBeOverridden() ||
-      Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
-      CS.isNoInline())
-    return llvm::InlineCost::getNever();
+  return false;
+}
 
-  // Get information about the callee.
-  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+bool CallAnalyzer::visitStore(StoreInst &I) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+    if (I.isSimple()) {
+      accumulateSROACost(CostIt, InlineConstants::InstrCost);
+      return true;
+    }
 
-  // If we haven't calculated this information yet, do so now.
-  if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee, TD);
+    disableSROA(CostIt);
+  }
 
-  // If we should never inline this, return a huge cost.
-  if (CalleeFI->NeverInline())
-    return InlineCost::getNever();
+  return false;
+}
 
-  // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we
-  // could move this up and avoid computing the FunctionInfo for
-  // things we are going to just return always inline for. This
-  // requires handling setjmp somewhere else, however.
-  if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
-    return InlineCost::getAlways();
+bool CallAnalyzer::visitCallSite(CallSite CS) {
+  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->canReturnTwice() &&
+      !F.hasFnAttr(Attribute::ReturnsTwice)) {
+    // This aborts the entire analysis.
+    ExposesReturnsTwice = true;
+    return false;
+  }
 
-  if (CalleeFI->Metrics.usesDynamicAlloca) {
-    // Get information about the caller.
-    FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+    switch (II->getIntrinsicID()) {
+    default:
+      return Base::visitCallSite(CS);
+
+    case Intrinsic::dbg_declare:
+    case Intrinsic::dbg_value:
+    case Intrinsic::invariant_start:
+    case Intrinsic::invariant_end:
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::memset:
+    case Intrinsic::memcpy:
+    case Intrinsic::memmove:
+    case Intrinsic::objectsize:
+    case Intrinsic::ptr_annotation:
+    case Intrinsic::var_annotation:
+      // SROA can usually chew through these intrinsics and they have no cost
+      // so don't pay the price of analyzing them in detail.
+      return true;
+    }
+  }
+
+  if (Function *F = CS.getCalledFunction()) {
+    if (F == CS.getInstruction()->getParent()->getParent()) {
+      // This flag will fully abort the analysis, so don't bother with anything
+      // else.
+      IsRecursive = true;
+      return false;
+    }
 
-    // If we haven't calculated this information yet, do so now.
-    if (CallerFI.Metrics.NumBlocks == 0) {
-      CallerFI.analyzeFunction(Caller, TD);
+    if (!callIsSmall(F)) {
+      // We account for the average 1 instruction per call argument setup
+      // here.
+      Cost += CS.arg_size() * InlineConstants::InstrCost;
 
-      // Recompute the CalleeFI pointer, getting Caller could have invalidated
-      // it.
-      CalleeFI = &CachedFunctionInfo[Callee];
+      // Everything other than inline ASM will also have a significant cost
+      // merely from making the call.
+      if (!isa<InlineAsm>(CS.getCalledValue()))
+        Cost += InlineConstants::CallPenalty;
     }
 
-    // Don't inline a callee with dynamic alloca into a caller without them.
-    // Functions containing dynamic alloca's are inefficient in various ways;
-    // don't create more inefficiency.
-    if (!CallerFI.Metrics.usesDynamicAlloca)
-      return InlineCost::getNever();
+    return Base::visitCallSite(CS);
   }
 
-  // InlineCost - This value measures how good of an inline candidate this call
-  // site is to inline.  A lower inline cost make is more likely for the call to
-  // be inlined.  This value may go negative due to the fact that bonuses
-  // are negative numbers.
-  //
-  int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee);
-  return llvm::InlineCost::get(InlineCost);
+  // Otherwise we're in a very special case -- an indirect function call. See
+  // if we can be particularly clever about this.
+  Value *Callee = CS.getCalledValue();
+
+  // First, pay the price of the argument setup. We account for the average
+  // 1 instruction per call argument setup here.
+  Cost += CS.arg_size() * InlineConstants::InstrCost;
+
+  // Next, check if this happens to be an indirect function call to a known
+  // function in this inline context. If not, we've done all we can.
+  Function *F = dyn_cast_or_null<Function>(SimplifiedValues.lookup(Callee));
+  if (!F)
+    return Base::visitCallSite(CS);
+
+  // If we have a constant that we are calling as a function, we can peer
+  // through it and see the function target. This happens not infrequently
+  // during devirtualization and so we want to give it a hefty bonus for
+  // inlining, but cap that bonus in the event that inlining wouldn't pan
+  // out. Pretend to inline the function, with a custom threshold.
+  CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold);
+  if (CA.analyzeCall(CS)) {
+    // We were able to inline the indirect call! Subtract the cost from the
+    // bonus we want to apply, but don't go below zero.
+    Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
+  }
+
+  return Base::visitCallSite(CS);
 }
 
-// getSpecializationCost - The heuristic used to determine the code-size
-// impact of creating a specialized version of Callee with argument
-// SpecializedArgNo replaced by a constant.
-InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
-                               SmallVectorImpl<unsigned> &SpecializedArgNos)
-{
-  // Don't specialize functions which can be redefined at link-time to mean
-  // something else.
-  if (Callee->mayBeOverridden())
-    return llvm::InlineCost::getNever();
+bool CallAnalyzer::visitInstruction(Instruction &I) {
+  // We found something we don't understand or can't handle. Mark any SROA-able
+  // values in the operand list as no longer viable.
+  for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI)
+    disableSROA(*OI);
+
+  return false;
+}
 
-  // Get information about the callee.
-  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
 
-  // If we haven't calculated this information yet, do so now.
-  if (CalleeFI->Metrics.NumBlocks == 0)
-    CalleeFI->analyzeFunction(Callee, TD);
+/// \brief Analyze a basic block for its contribution to the inline cost.
+///
+/// This method walks the analyzer over every instruction in the given basic
+/// block and accounts for their cost during inlining at this callsite. It
+/// aborts early if the threshold has been exceeded or an impossible to inline
+/// construct has been detected. It returns false if inlining is no longer
+/// viable, and true if inlining remains viable.
+bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
+  for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end());
+       I != E; ++I) {
+    ++NumInstructions;
+    if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
+      ++NumVectorInstructions;
+
+    // If the instruction simplified to a constant, there is no cost to this
+    // instruction. Visit the instructions using our InstVisitor to account for
+    // all of the per-instruction logic. The visit tree returns true if we
+    // consumed the instruction in any way, and false if the instruction's base
+    // cost should count against inlining.
+    if (Base::visit(I))
+      ++NumInstructionsSimplified;
+    else
+      Cost += InlineConstants::InstrCost;
+
+    // If the visit this instruction detected an uninlinable pattern, abort.
+    if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca)
+      return false;
+
+    if (NumVectorInstructions > NumInstructions/2)
+      VectorBonus = FiftyPercentVectorBonus;
+    else if (NumVectorInstructions > NumInstructions/10)
+      VectorBonus = TenPercentVectorBonus;
+    else
+      VectorBonus = 0;
+
+    // Check if we've past the threshold so we don't spin in huge basic
+    // blocks that will never inline.
+    if (!AlwaysInline && Cost > (Threshold + VectorBonus))
+      return false;
+  }
 
-  int Cost = 0;
+  return true;
+}
 
-  // Look at the original size of the callee.  Each instruction counts as 5.
-  Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
+  if (!TD || !V->getType()->isPointerTy())
+    return 0;
 
-  // Offset that with the amount of code that can be constant-folded
-  // away with the given arguments replaced by constants.
-  for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
-       ae = SpecializedArgNos.end(); an != ae; ++an)
-    Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
+  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  APInt Offset = APInt::getNullValue(IntPtrWidth);
+
+  // Even though we don't look through PHI nodes, we could be called on an
+  // instruction in an unreachable block, which may be on a cycle.
+  SmallPtrSet<Value *, 4> Visited;
+  Visited.insert(V);
+  do {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
+        return 0;
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        break;
+      V = GA->getAliasee();
+    } else {
+      break;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  } while (Visited.insert(V));
 
-  return llvm::InlineCost::get(Cost);
+  Type *IntPtrTy = TD->getIntPtrType(V->getContext());
+  return cast<ConstantInt>(ConstantInt::get(IntPtrTy, Offset));
 }
 
-// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
-// higher threshold to determine if the function call should be inlined.
-float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
-  Function *Callee = CS.getCalledFunction();
-
-  // Get information about the callee.
-  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
-
-  // If we haven't calculated this information yet, do so now.
-  if (CalleeFI.Metrics.NumBlocks == 0)
-    CalleeFI.analyzeFunction(Callee, TD);
-
-  float Factor = 1.0f;
-  // Single BB functions are often written to be inlined.
-  if (CalleeFI.Metrics.NumBlocks == 1)
-    Factor += 0.5f;
-
-  // Be more aggressive if the function contains a good chunk (if it mades up
-  // at least 10% of the instructions) of vector instructions.
-  if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2)
-    Factor += 2.0f;
-  else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10)
-    Factor += 1.5f;
-  return Factor;
-}
+/// \brief Analyze a call site for potential inlining.
+///
+/// Returns true if inlining this call is viable, and false if it is not
+/// viable. It computes the cost and adjusts the threshold based on numerous
+/// factors and heuristics. If this method returns false but the computed cost
+/// is below the computed threshold, then inlining was forcibly disabled by
+/// some artifact of the rountine.
+bool CallAnalyzer::analyzeCall(CallSite CS) {
+  ++NumCallsAnalyzed;
+
+  // Track whether the post-inlining function would have more than one basic
+  // block. A single basic block is often intended for inlining. Balloon the
+  // threshold by 50% until we pass the single-BB phase.
+  bool SingleBB = true;
+  int SingleBBBonus = Threshold / 2;
+  Threshold += SingleBBBonus;
+
+  // Unless we are always-inlining, perform some tweaks to the cost and
+  // threshold based on the direct callsite information.
+  if (!AlwaysInline) {
+    // We want to more aggressively inline vector-dense kernels, so up the
+    // threshold, and we'll lower it if the % of vector instructions gets too
+    // low.
+    assert(NumInstructions == 0);
+    assert(NumVectorInstructions == 0);
+    FiftyPercentVectorBonus = Threshold;
+    TenPercentVectorBonus = Threshold / 2;
+
+    // Subtract off one instruction per call argument as those will be free after
+    // inlining.
+    Cost -= CS.arg_size() * InlineConstants::InstrCost;
+
+    // If there is only one call of the function, and it has internal linkage,
+    // the cost of inlining it drops dramatically.
+    if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction())
+      Cost += InlineConstants::LastCallToStaticBonus;
+
+    // If the instruction after the call, or if the normal destination of the
+    // invoke is an unreachable instruction, the function is noreturn.  As such,
+    // there is little point in inlining this unless there is literally zero cost.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+      if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+        Threshold = 1;
+    } else if (isa<UnreachableInst>(++BasicBlock::iterator(CS.getInstruction())))
+      Threshold = 1;
+
+    // If this function uses the coldcc calling convention, prefer not to inline
+    // it.
+    if (F.getCallingConv() == CallingConv::Cold)
+      Cost += InlineConstants::ColdccPenalty;
+
+    // Check if we're done. This can happen due to bonuses and penalties.
+    if (Cost > Threshold)
+      return false;
+  }
 
-/// growCachedCostInfo - update the cached cost info for Caller after Callee has
-/// been inlined.
-void
-InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
-  CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics;
+  if (F.empty())
+    return true;
 
-  // For small functions we prefer to recalculate the cost for better accuracy.
-  if (CallerMetrics.NumBlocks < 10 && CallerMetrics.NumInsts < 1000) {
-    resetCachedCostInfo(Caller);
-    return;
+  // Track whether we've seen a return instruction. The first return
+  // instruction is free, as at least one will usually disappear in inlining.
+  bool HasReturn = false;
+
+  // Populate our simplified values by mapping from function arguments to call
+  // arguments with known important simplifications.
+  CallSite::arg_iterator CAI = CS.arg_begin();
+  for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end();
+       FAI != FAE; ++FAI, ++CAI) {
+    assert(CAI != CS.arg_end());
+    if (Constant *C = dyn_cast<Constant>(CAI))
+      SimplifiedValues[FAI] = C;
+
+    Value *PtrArg = *CAI;
+    if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
+      ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
+
+      // We can SROA any pointer arguments derived from alloca instructions.
+      if (isa<AllocaInst>(PtrArg)) {
+        SROAArgValues[FAI] = PtrArg;
+        SROAArgCosts[PtrArg] = 0;
+      }
+    }
   }
+  NumConstantArgs = SimplifiedValues.size();
+  NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
+  NumAllocaArgs = SROAArgValues.size();
+
+  // The worklist of live basic blocks in the callee *after* inlining. We avoid
+  // adding basic blocks of the callee which can be proven to be dead for this
+  // particular call site in order to get more accurate cost estimates. This
+  // requires a somewhat heavyweight iteration pattern: we need to walk the
+  // basic blocks in a breadth-first order as we insert live successors. To
+  // accomplish this, prioritizing for small iterations because we exit after
+  // crossing our threshold, we use a small-size optimized SetVector.
+  typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
+                                  SmallPtrSet<BasicBlock *, 16> > BBSetVector;
+  BBSetVector BBWorklist;
+  BBWorklist.insert(&F.getEntryBlock());
+  // Note that we *must not* cache the size, this loop grows the worklist.
+  for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) {
+    // Bail out the moment we cross the threshold. This means we'll under-count
+    // the cost, but only when undercounting doesn't matter.
+    if (!AlwaysInline && Cost > (Threshold + VectorBonus))
+      break;
+
+    BasicBlock *BB = BBWorklist[Idx];
+    if (BB->empty())
+      continue;
+
+    // Handle the terminator cost here where we can track returns and other
+    // function-wide constructs.
+    TerminatorInst *TI = BB->getTerminator();
+
+    // We never want to inline functions that contain an indirectbr.  This is
+    // incorrect because all the blockaddress's (in static global initializers
+    // for example) would be referring to the original function, and this indirect
+    // jump would jump from the inlined copy of the function into the original
+    // function which is extremely undefined behavior.
+    // FIXME: This logic isn't really right; we can safely inline functions
+    // with indirectbr's as long as no other function or global references the
+    // blockaddress of a block within the current function.  And as a QOI issue,
+    // if someone is using a blockaddress without an indirectbr, and that
+    // reference somehow ends up in another function or global, we probably
+    // don't want to inline this function.
+    if (isa<IndirectBrInst>(TI))
+      return false;
+
+    if (!HasReturn && isa<ReturnInst>(TI))
+      HasReturn = true;
+    else
+      Cost += InlineConstants::InstrCost;
+
+    // Analyze the cost of this block. If we blow through the threshold, this
+    // returns false, and we can bail on out.
+    if (!analyzeBlock(BB)) {
+      if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca)
+        return false;
+      break;
+    }
 
-  // For large functions, we can save a lot of computation time by skipping
-  // recalculations.
-  if (CallerMetrics.NumCalls > 0)
-    --CallerMetrics.NumCalls;
+    // Add in the live successors by first checking whether we have terminator
+    // that may be simplified based on the values simplified by this call.
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isConditional()) {
+        Value *Cond = BI->getCondition();
+        if (ConstantInt *SimpleCond
+              = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+          BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0));
+          continue;
+        }
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      Value *Cond = SI->getCondition();
+      if (ConstantInt *SimpleCond
+            = dyn_cast_or_null<ConstantInt>(SimplifiedValues.lookup(Cond))) {
+        BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor());
+        continue;
+      }
+    }
 
-  if (Callee == 0) return;
+    // If we're unable to select a particular successor, just count all of
+    // them.
+    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx)
+      BBWorklist.insert(TI->getSuccessor(TIdx));
+
+    // If we had any successors at this point, than post-inlining is likely to
+    // have them as well. Note that we assume any basic blocks which existed
+    // due to branches or switches which folded above will also fold after
+    // inlining.
+    if (SingleBB && TI->getNumSuccessors() > 1) {
+      // Take off the bonus we applied to the threshold.
+      Threshold -= SingleBBBonus;
+      SingleBB = false;
+    }
+  }
 
-  CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics;
+  Threshold += VectorBonus;
 
-  // If we don't have metrics for the callee, don't recalculate them just to
-  // update an approximation in the caller.  Instead, just recalculate the
-  // caller info from scratch.
-  if (CalleeMetrics.NumBlocks == 0) {
-    resetCachedCostInfo(Caller);
-    return;
-  }
+  return AlwaysInline || Cost < Threshold;
+}
 
-  // Since CalleeMetrics were already calculated, we know that the CallerMetrics
-  // reference isn't invalidated: both were in the DenseMap.
-  CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
-
-  // FIXME: If any of these three are true for the callee, the callee was
-  // not inlined into the caller, so I think they're redundant here.
-  CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp;
-  CallerMetrics.isRecursive |= CalleeMetrics.isRecursive;
-  CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr;
-
-  CallerMetrics.NumInsts += CalleeMetrics.NumInsts;
-  CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks;
-  CallerMetrics.NumCalls += CalleeMetrics.NumCalls;
-  CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts;
-  CallerMetrics.NumRets += CalleeMetrics.NumRets;
-
-  // analyzeBasicBlock counts each function argument as an inst.
-  if (CallerMetrics.NumInsts >= Callee->arg_size())
-    CallerMetrics.NumInsts -= Callee->arg_size();
-  else
-    CallerMetrics.NumInsts = 0;
-
-  // We are not updating the argument weights. We have already determined that
-  // Caller is a fairly large function, so we accept the loss of precision.
+/// \brief Dump stats about this call's analysis.
+void CallAnalyzer::dump() {
+#define DEBUG_PRINT_STAT(x) llvm::dbgs() << "      " #x ": " << x << "\n"
+  DEBUG_PRINT_STAT(NumConstantArgs);
+  DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs);
+  DEBUG_PRINT_STAT(NumAllocaArgs);
+  DEBUG_PRINT_STAT(NumConstantPtrCmps);
+  DEBUG_PRINT_STAT(NumConstantPtrDiffs);
+  DEBUG_PRINT_STAT(NumInstructionsSimplified);
+  DEBUG_PRINT_STAT(SROACostSavings);
+  DEBUG_PRINT_STAT(SROACostSavingsLost);
+#undef DEBUG_PRINT_STAT
 }
 
-/// clear - empty the cache of inline costs
-void InlineCostAnalyzer::clear() {
-  CachedFunctionInfo.clear();
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) {
+  return getInlineCost(CS, CS.getCalledFunction(), Threshold);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee,
+                                             int Threshold) {
+  // Don't inline functions which can be redefined at link-time to mean
+  // something else.  Don't inline functions marked noinline or call sites
+  // marked noinline.
+  if (!Callee || Callee->mayBeOverridden() ||
+      Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline())
+    return llvm::InlineCost::getNever();
+
+  DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName() << "...\n");
+
+  CallAnalyzer CA(TD, *Callee, Threshold);
+  bool ShouldInline = CA.analyzeCall(CS);
+
+  DEBUG(CA.dump());
+
+  // Check if there was a reason to force inlining or no inlining.
+  if (!ShouldInline && CA.getCost() < CA.getThreshold())
+    return InlineCost::getNever();
+  if (ShouldInline && CA.getCost() >= CA.getThreshold())
+    return InlineCost::getAlways();
+
+  return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
 }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 131cc97d2379..16e7a726595d 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -18,13 +18,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "instsimplify"
+#include "llvm/GlobalAlias.h"
 #include "llvm/Operator.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Target/TargetData.h"
@@ -37,23 +41,28 @@ STATISTIC(NumExpand,  "Number of expansions");
 STATISTIC(NumFactor , "Number of factorizations");
 STATISTIC(NumReassoc, "Number of reassociations");
 
-static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
-                              const DominatorTree *, unsigned);
-static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
-                            const DominatorTree *, unsigned);
-static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
-                              const DominatorTree *, unsigned);
-static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
-                             const DominatorTree *, unsigned);
-static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
-                              const DominatorTree *, unsigned);
+struct Query {
+  const TargetData *TD;
+  const TargetLibraryInfo *TLI;
+  const DominatorTree *DT;
+
+  Query(const TargetData *td, const TargetLibraryInfo *tli,
+        const DominatorTree *dt) : TD(td), TLI(tli), DT(dt) {};
+};
+
+static Value *SimplifyAndInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const Query &,
+                            unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
+                              unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
+static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
 
 /// getFalse - For a boolean type, or a vector of boolean type, return false, or
 /// a vector with every element false, as appropriate for the type.
 static Constant *getFalse(Type *Ty) {
-  assert((Ty->isIntegerTy(1) ||
-          (Ty->isVectorTy() &&
-           cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+  assert(Ty->getScalarType()->isIntegerTy(1) &&
          "Expected i1 type or a vector of i1!");
   return Constant::getNullValue(Ty);
 }
@@ -61,13 +70,25 @@ static Constant *getFalse(Type *Ty) {
 /// getTrue - For a boolean type, or a vector of boolean type, return true, or
 /// a vector with every element true, as appropriate for the type.
 static Constant *getTrue(Type *Ty) {
-  assert((Ty->isIntegerTy(1) ||
-          (Ty->isVectorTy() &&
-           cast<VectorType>(Ty)->getElementType()->isIntegerTy(1))) &&
+  assert(Ty->getScalarType()->isIntegerTy(1) &&
          "Expected i1 type or a vector of i1!");
   return Constant::getAllOnesValue(Ty);
 }
 
+/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"?
+static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
+                          Value *RHS) {
+  CmpInst *Cmp = dyn_cast<CmpInst>(V);
+  if (!Cmp)
+    return false;
+  CmpInst::Predicate CPred = Cmp->getPredicate();
+  Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1);
+  if (CPred == Pred && CLHS == LHS && CRHS == RHS)
+    return true;
+  return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS &&
+    CRHS == LHS;
+}
+
 /// ValueDominatesPHI - Does the given value dominate the specified phi node?
 static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
   Instruction *I = dyn_cast<Instruction>(V);
@@ -75,9 +96,20 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
     // Arguments and constants dominate all instructions.
     return true;
 
+  // If we are processing instructions (and/or basic blocks) that have not been
+  // fully added to a function, the parent nodes may still be null. Simply
+  // return the conservative answer in these cases.
+  if (!I->getParent() || !P->getParent() || !I->getParent()->getParent())
+    return false;
+
   // If we have a DominatorTree then do a precise test.
-  if (DT)
+  if (DT) {
+    if (!DT->isReachableFromEntry(P->getParent()))
+      return true;
+    if (!DT->isReachableFromEntry(I->getParent()))
+      return false;
     return DT->dominates(I, P);
+  }
 
   // Otherwise, if the instruction is in the entry block, and is not an invoke,
   // then it obviously dominates all phi nodes.
@@ -94,8 +126,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
 /// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
 /// Returns the simplified value, or null if no simplification was performed.
 static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                          unsigned OpcToExpand, const TargetData *TD,
-                          const DominatorTree *DT, unsigned MaxRecurse) {
+                          unsigned OpcToExpand, const Query &Q,
+                          unsigned MaxRecurse) {
   Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
@@ -107,8 +139,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
       // It does!  Try turning it into "(A op C) op' (B op C)".
       Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
       // Do "A op C" and "B op C" both simplify?
-      if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse))
-        if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+      if (Value *L = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
           // They do! Return "L op' R" if it simplifies or is already available.
           // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
           if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
@@ -117,8 +149,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
             return LHS;
           }
           // Otherwise return "L op' R" if it simplifies.
-          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
-                                       MaxRecurse)) {
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
             ++NumExpand;
             return V;
           }
@@ -131,8 +162,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
       // It does!  Try turning it into "(A op B) op' (A op C)".
       Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
       // Do "A op B" and "A op C" both simplify?
-      if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse))
-        if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) {
+      if (Value *L = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, A, C, Q, MaxRecurse)) {
           // They do! Return "L op' R" if it simplifies or is already available.
           // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
           if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
@@ -141,8 +172,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
             return RHS;
           }
           // Otherwise return "L op' R" if it simplifies.
-          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
-                                       MaxRecurse)) {
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, Q, MaxRecurse)) {
             ++NumExpand;
             return V;
           }
@@ -157,8 +187,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 /// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
 /// Returns the simplified value, or null if no simplification was performed.
 static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                             unsigned OpcToExtract, const TargetData *TD,
-                             const DominatorTree *DT, unsigned MaxRecurse) {
+                             unsigned OpcToExtract, const Query &Q,
+                             unsigned MaxRecurse) {
   Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
@@ -182,7 +212,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
     Value *DD = A == C ? D : C;
     // Form "A op' (B op DD)" if it simplifies completely.
     // Does "B op DD" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) {
       // It does!  Return "A op' V" if it simplifies or is already available.
       // If V equals B then "A op' V" is just the LHS.  If V equals DD then
       // "A op' V" is just the RHS.
@@ -191,7 +221,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
         return V == B ? LHS : RHS;
       }
       // Otherwise return "A op' V" if it simplifies.
-      if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) {
         ++NumFactor;
         return W;
       }
@@ -205,7 +235,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
     Value *CC = B == D ? C : D;
     // Form "(A op CC) op' B" if it simplifies completely..
     // Does "A op CC" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) {
       // It does!  Return "V op' B" if it simplifies or is already available.
       // If V equals A then "V op' B" is just the LHS.  If V equals CC then
       // "V op' B" is just the RHS.
@@ -214,7 +244,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
         return V == A ? LHS : RHS;
       }
       // Otherwise return "V op' B" if it simplifies.
-      if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) {
         ++NumFactor;
         return W;
       }
@@ -227,9 +257,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 /// SimplifyAssociativeBinOp - Generic simplifications for associative binary
 /// operations.  Returns the simpler value, or null if none was found.
 static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
-                                       const TargetData *TD,
-                                       const DominatorTree *DT,
-                                       unsigned MaxRecurse) {
+                                       const Query &Q, unsigned MaxRecurse) {
   Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
   assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
 
@@ -247,12 +275,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = RHS;
 
     // Does "B op C" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, B, C, Q, MaxRecurse)) {
       // It does!  Return "A op V" if it simplifies or is already available.
       // If V equals B then "A op V" is just the LHS.
       if (V == B) return LHS;
       // Otherwise return "A op V" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, A, V, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -266,12 +294,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = Op1->getOperand(1);
 
     // Does "A op B" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, A, B, Q, MaxRecurse)) {
       // It does!  Return "V op C" if it simplifies or is already available.
       // If V equals B then "V op C" is just the RHS.
       if (V == B) return RHS;
       // Otherwise return "V op C" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, V, C, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -289,12 +317,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = RHS;
 
     // Does "C op A" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
       // It does!  Return "V op B" if it simplifies or is already available.
       // If V equals A then "V op B" is just the LHS.
       if (V == A) return LHS;
       // Otherwise return "V op B" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, V, B, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -308,12 +336,12 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
     Value *C = Op1->getOperand(1);
 
     // Does "C op A" simplify?
-    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+    if (Value *V = SimplifyBinOp(Opcode, C, A, Q, MaxRecurse)) {
       // It does!  Return "B op V" if it simplifies or is already available.
       // If V equals C then "B op V" is just the RHS.
       if (V == C) return RHS;
       // Otherwise return "B op V" if it simplifies.
-      if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) {
+      if (Value *W = SimplifyBinOp(Opcode, B, V, Q, MaxRecurse)) {
         ++NumReassoc;
         return W;
       }
@@ -328,9 +356,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
 /// evaluating it on both branches of the select results in the same value.
 /// Returns the common value if so, otherwise returns null.
 static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
-                                    const TargetData *TD,
-                                    const DominatorTree *DT,
-                                    unsigned MaxRecurse) {
+                                    const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
     return 0;
@@ -347,11 +373,11 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
   Value *TV;
   Value *FV;
   if (SI == LHS) {
-    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse);
-    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse);
+    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, Q, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, Q, MaxRecurse);
   } else {
-    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse);
-    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse);
+    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), Q, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), Q, MaxRecurse);
   }
 
   // If they simplified to the same value, then return the common value.
@@ -402,8 +428,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
 /// result in the same value.  Returns the common value if so, otherwise returns
 /// null.
 static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
-                                  Value *RHS, const TargetData *TD,
-                                  const DominatorTree *DT,
+                                  Value *RHS, const Query &Q,
                                   unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
@@ -416,40 +441,67 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
   }
   assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
   SelectInst *SI = cast<SelectInst>(LHS);
+  Value *Cond = SI->getCondition();
+  Value *TV = SI->getTrueValue();
+  Value *FV = SI->getFalseValue();
 
   // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
   // Does "cmp TV, RHS" simplify?
-  if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT,
-                                    MaxRecurse)) {
-    // It does!  Does "cmp FV, RHS" simplify?
-    if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT,
-                                      MaxRecurse)) {
-      // It does!  If they simplified to the same value, then use it as the
-      // result of the original comparison.
-      if (TCmp == FCmp)
-        return TCmp;
-      Value *Cond = SI->getCondition();
-      // If the false value simplified to false, then the result of the compare
-      // is equal to "Cond && TCmp".  This also catches the case when the false
-      // value simplified to false and the true value to true, returning "Cond".
-      if (match(FCmp, m_Zero()))
-        if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse))
-          return V;
-      // If the true value simplified to true, then the result of the compare
-      // is equal to "Cond || FCmp".
-      if (match(TCmp, m_One()))
-        if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse))
-          return V;
-      // Finally, if the false value simplified to true and the true value to
-      // false, then the result of the compare is equal to "!Cond".
-      if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
-        if (Value *V =
-            SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
-                            TD, DT, MaxRecurse))
-          return V;
-    }
+  Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, Q, MaxRecurse);
+  if (TCmp == Cond) {
+    // It not only simplified, it simplified to the select condition.  Replace
+    // it with 'true'.
+    TCmp = getTrue(Cond->getType());
+  } else if (!TCmp) {
+    // It didn't simplify.  However if "cmp TV, RHS" is equal to the select
+    // condition then we can replace it with 'true'.  Otherwise give up.
+    if (!isSameCompare(Cond, Pred, TV, RHS))
+      return 0;
+    TCmp = getTrue(Cond->getType());
+  }
+
+  // Does "cmp FV, RHS" simplify?
+  Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, Q, MaxRecurse);
+  if (FCmp == Cond) {
+    // It not only simplified, it simplified to the select condition.  Replace
+    // it with 'false'.
+    FCmp = getFalse(Cond->getType());
+  } else if (!FCmp) {
+    // It didn't simplify.  However if "cmp FV, RHS" is equal to the select
+    // condition then we can replace it with 'false'.  Otherwise give up.
+    if (!isSameCompare(Cond, Pred, FV, RHS))
+      return 0;
+    FCmp = getFalse(Cond->getType());
   }
 
+  // If both sides simplified to the same value, then use it as the result of
+  // the original comparison.
+  if (TCmp == FCmp)
+    return TCmp;
+
+  // The remaining cases only make sense if the select condition has the same
+  // type as the result of the comparison, so bail out if this is not so.
+  if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy())
+    return 0;
+  // If the false value simplified to false, then the result of the compare
+  // is equal to "Cond && TCmp".  This also catches the case when the false
+  // value simplified to false and the true value to true, returning "Cond".
+  if (match(FCmp, m_Zero()))
+    if (Value *V = SimplifyAndInst(Cond, TCmp, Q, MaxRecurse))
+      return V;
+  // If the true value simplified to true, then the result of the compare
+  // is equal to "Cond || FCmp".
+  if (match(TCmp, m_One()))
+    if (Value *V = SimplifyOrInst(Cond, FCmp, Q, MaxRecurse))
+      return V;
+  // Finally, if the false value simplified to true and the true value to
+  // false, then the result of the compare is equal to "!Cond".
+  if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+    if (Value *V =
+        SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
+                        Q, MaxRecurse))
+      return V;
+
   return 0;
 }
 
@@ -458,8 +510,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
 /// it on the incoming phi values yields the same result for every value.  If so
 /// returns the common value, otherwise returns null.
 static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
-                                 const TargetData *TD, const DominatorTree *DT,
-                                 unsigned MaxRecurse) {
+                                 const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
     return 0;
@@ -468,13 +519,13 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
   if (isa<PHINode>(LHS)) {
     PI = cast<PHINode>(LHS);
     // Bail out if RHS and the phi may be mutually interdependent due to a loop.
-    if (!ValueDominatesPHI(RHS, PI, DT))
+    if (!ValueDominatesPHI(RHS, PI, Q.DT))
       return 0;
   } else {
     assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
     PI = cast<PHINode>(RHS);
     // Bail out if LHS and the phi may be mutually interdependent due to a loop.
-    if (!ValueDominatesPHI(LHS, PI, DT))
+    if (!ValueDominatesPHI(LHS, PI, Q.DT))
       return 0;
   }
 
@@ -485,8 +536,8 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
     // If the incoming value is the phi node itself, it can safely be skipped.
     if (Incoming == PI) continue;
     Value *V = PI == LHS ?
-      SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) :
-      SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse);
+      SimplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse) :
+      SimplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
     if (!V || (CommonValue && V != CommonValue))
@@ -502,8 +553,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
 /// incoming phi values yields the same result every time.  If so returns the
 /// common result, otherwise returns null.
 static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
-                               const TargetData *TD, const DominatorTree *DT,
-                               unsigned MaxRecurse) {
+                               const Query &Q, unsigned MaxRecurse) {
   // Recursion is always used, so bail out at once if we already hit the limit.
   if (!MaxRecurse--)
     return 0;
@@ -517,7 +567,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
   PHINode *PI = cast<PHINode>(LHS);
 
   // Bail out if RHS and the phi may be mutually interdependent due to a loop.
-  if (!ValueDominatesPHI(RHS, PI, DT))
+  if (!ValueDominatesPHI(RHS, PI, Q.DT))
     return 0;
 
   // Evaluate the BinOp on the incoming phi values.
@@ -526,7 +576,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
     Value *Incoming = PI->getIncomingValue(i);
     // If the incoming value is the phi node itself, it can safely be skipped.
     if (Incoming == PI) continue;
-    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse);
+    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse);
     // If the operation failed to simplify, or simplified to a different value
     // to previously, then give up.
     if (!V || (CommonValue && V != CommonValue))
@@ -540,13 +590,12 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
 /// SimplifyAddInst - Given operands for an Add, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const TargetData *TD, const DominatorTree *DT,
-                              unsigned MaxRecurse) {
+                              const Query &Q, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
-      return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
-                                      Ops, TD);
+      return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), Ops,
+                                      Q.TD, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -576,17 +625,17 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 
   /// i1 add -> xor.
   if (MaxRecurse && Op0->getType()->isIntegerTy(1))
-    if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
       return V;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT,
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, Q,
                                           MaxRecurse))
     return V;
 
   // Mul distributes over Add.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
-                                TD, DT, MaxRecurse))
+                                Q, MaxRecurse))
     return V;
 
   // Threading Add over selects and phi nodes is pointless, so don't bother.
@@ -602,20 +651,116 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+                             const TargetData *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+                           RecursionLimit);
+}
+
+/// \brief Accumulate the constant integer offset a GEP represents.
+///
+/// Given a getelementptr instruction/constantexpr, accumulate the constant
+/// offset from the base pointer into the provided APInt 'Offset'. Returns true
+/// if the GEP has all-constant indices. Returns false if any non-constant
+/// index is encountered leaving the 'Offset' in an undefined state. The
+/// 'Offset' APInt must be the bitwidth of the target's pointer size.
+static bool accumulateGEPOffset(const TargetData &TD, GEPOperator *GEP,
+                                APInt &Offset) {
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  assert(IntPtrWidth == Offset.getBitWidth());
+
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E;
+       ++I, ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(*I);
+    if (!OpC) return false;
+    if (OpC->isZero()) continue;
+
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = TD.getStructLayout(STy);
+      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+      continue;
+    }
+
+    APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType()));
+    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
+  }
+  return true;
+}
+
+/// \brief Compute the base pointer and cumulative constant offsets for V.
+///
+/// This strips all constant offsets off of V, leaving it the base pointer, and
+/// accumulates the total constant offset applied in the returned constant. It
+/// returns 0 if V is not a pointer, and returns the constant '0' if there are
+/// no constant offsets applied.
+static Constant *stripAndComputeConstantOffsets(const TargetData &TD,
+                                                Value *&V) {
+  if (!V->getType()->isPointerTy())
+    return 0;
+
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  APInt Offset = APInt::getNullValue(IntPtrWidth);
+
+  // Even though we don't look through PHI nodes, we could be called on an
+  // instruction in an unreachable block, which may be on a cycle.
+  SmallPtrSet<Value *, 4> Visited;
+  Visited.insert(V);
+  do {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if (!GEP->isInBounds() || !accumulateGEPOffset(TD, GEP, Offset))
+        break;
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        break;
+      V = GA->getAliasee();
+    } else {
+      break;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  } while (Visited.insert(V));
+
+  Type *IntPtrTy = TD.getIntPtrType(V->getContext());
+  return ConstantInt::get(IntPtrTy, Offset);
+}
+
+/// \brief Compute the constant difference between two pointer values.
+/// If the difference is not a constant, returns zero.
+static Constant *computePointerDifference(const TargetData &TD,
+                                          Value *LHS, Value *RHS) {
+  Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
+  if (!LHSOffset)
+    return 0;
+  Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
+  if (!RHSOffset)
+    return 0;
+
+  // If LHS and RHS are not related via constant offsets to the same base
+  // value, there is nothing we can do here.
+  if (LHS != RHS)
+    return 0;
+
+  // Otherwise, the difference of LHS - RHS can be computed as:
+  //    LHS - RHS
+  //  = (LHSOffset + Base) - (RHSOffset + Base)
+  //  = LHSOffset - RHSOffset
+  return ConstantExpr::getSub(LHSOffset, RHSOffset);
 }
 
 /// SimplifySubInst - Given operands for a Sub, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const TargetData *TD, const DominatorTree *DT,
-                              unsigned MaxRecurse) {
+                              const Query &Q, unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0))
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, Q.TD, Q.TLI);
     }
 
   // X - undef -> undef
@@ -643,19 +788,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   Value *Y = 0, *Z = Op1;
   if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
     // See if "V === Y - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
       // It does!  Now see if "X + V" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT,
-                                   MaxRecurse-1)) {
+      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, Q, MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
       }
     // See if "V === X - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
       // It does!  Now see if "Y + V" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT,
-                                   MaxRecurse-1)) {
+      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, Q, MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
@@ -667,19 +810,17 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   X = Op0;
   if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
     // See if "V === X - Y" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
       // It does!  Now see if "V - Z" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT,
-                                   MaxRecurse-1)) {
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, Q, MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
       }
     // See if "V === X - Z" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, Q, MaxRecurse-1))
       // It does!  Now see if "V - Y" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT,
-                                   MaxRecurse-1)) {
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, Q, MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
@@ -691,23 +832,39 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   Z = Op0;
   if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
     // See if "V === Z - X" simplifies.
-    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, Q, MaxRecurse-1))
       // It does!  Now see if "V + Y" simplifies.
-      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT,
-                                   MaxRecurse-1)) {
+      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, Q, MaxRecurse-1)) {
         // It does, we successfully reassociated!
         ++NumReassoc;
         return W;
       }
 
+  // trunc(X) - trunc(Y) -> trunc(X - Y) if everything simplifies.
+  if (MaxRecurse && match(Op0, m_Trunc(m_Value(X))) &&
+      match(Op1, m_Trunc(m_Value(Y))))
+    if (X->getType() == Y->getType())
+      // See if "V === X - Y" simplifies.
+      if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
+        // It does!  Now see if "trunc V" simplifies.
+        if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1))
+          // It does, return the simplified "trunc V".
+          return W;
+
+  // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...).
+  if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) &&
+      match(Op1, m_PtrToInt(m_Value(Y))))
+    if (Constant *Result = computePointerDifference(*Q.TD, X, Y))
+      return ConstantExpr::getIntegerCast(Result, Op0->getType(), true);
+
   // Mul distributes over Sub.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
-                                TD, DT, MaxRecurse))
+                                Q, MaxRecurse))
     return V;
 
   // i1 sub -> xor.
   if (MaxRecurse && Op0->getType()->isIntegerTy(1))
-    if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1))
       return V;
 
   // Threading Sub over selects and phi nodes is pointless, so don't bother.
@@ -723,19 +880,21 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 }
 
 Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+                             const TargetData *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+                           RecursionLimit);
 }
 
 /// SimplifyMulInst - Given operands for a Mul, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
-                              const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
+                              unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, Q.TD, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -755,40 +914,37 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
     return Op0;
 
   // (X / Y) * Y -> X if the division is exact.
-  Value *X = 0, *Y = 0;
-  if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y
-      (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y)
-    BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1);
-    if (Div->isExact())
-      return X;
-  }
+  Value *X = 0;
+  if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
+      match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0)))))   // Y * (X / Y)
+    return X;
 
   // i1 mul -> and.
   if (MaxRecurse && Op0->getType()->isIntegerTy(1))
-    if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1))
+    if (Value *V = SimplifyAndInst(Op0, Op1, Q, MaxRecurse-1))
       return V;
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT,
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, Q,
                                           MaxRecurse))
     return V;
 
   // Mul distributes over Add.  Try some generic simplifications based on this.
   if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
-                             TD, DT, MaxRecurse))
+                             Q, MaxRecurse))
     return V;
 
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT,
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, Q,
                                          MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT,
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, Q,
                                       MaxRecurse))
       return V;
 
@@ -796,19 +952,19 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
 }
 
 Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyMulInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
-                          const TargetData *TD, const DominatorTree *DT,
-                          unsigned MaxRecurse) {
+                          const Query &Q, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
     }
   }
 
@@ -842,7 +998,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   Value *X = 0, *Y = 0;
   if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
     if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
-    BinaryOperator *Mul = cast<BinaryOperator>(Op0);
+    OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0);
     // If the Mul knows it does not overflow, then we are good to go.
     if ((isSigned && Mul->hasNoSignedWrap()) ||
         (!isSigned && Mul->hasNoUnsignedWrap()))
@@ -861,13 +1017,13 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   return 0;
@@ -875,36 +1031,38 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
 
 /// SimplifySDivInst - Given operands for an SDiv, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
-                               const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse))
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
     return V;
 
   return 0;
 }
 
 Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifySDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyUDivInst - Given operands for a UDiv, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
-                               const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse))
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
     return V;
 
   return 0;
 }
 
 Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyUDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
-static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
-                               const DominatorTree *, unsigned) {
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned) {
   // undef / X -> undef    (the undef could be a snan).
   if (match(Op0, m_Undef()))
     return Op0;
@@ -917,19 +1075,19 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
 }
 
 Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyFDivInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyRem - Given operands for an SRem or URem, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
-                          const TargetData *TD, const DominatorTree *DT,
-                          unsigned MaxRecurse) {
+                          const Query &Q, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
     }
   }
 
@@ -964,13 +1122,13 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   return 0;
@@ -978,36 +1136,38 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
 
 /// SimplifySRemInst - Given operands for an SRem, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
-                               const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse))
+static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
     return V;
 
   return 0;
 }
 
 Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifySRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyURemInst - Given operands for a URem, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
-                               const DominatorTree *DT, unsigned MaxRecurse) {
-  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse))
+static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
     return V;
 
   return 0;
 }
 
 Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyURemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
-static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
-                               const DominatorTree *, unsigned) {
+static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
+                               unsigned) {
   // undef % X -> undef    (the undef could be a snan).
   if (match(Op0, m_Undef()))
     return Op0;
@@ -1020,19 +1180,19 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *,
 }
 
 Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
                               const DominatorTree *DT) {
-  return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyFRemInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
-                            const TargetData *TD, const DominatorTree *DT,
-                            unsigned MaxRecurse) {
+                            const Query &Q, unsigned MaxRecurse) {
   if (Constant *C0 = dyn_cast<Constant>(Op0)) {
     if (Constant *C1 = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { C0, C1 };
-      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, TD);
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, Q.TD, Q.TLI);
     }
   }
 
@@ -1057,13 +1217,13 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
       return V;
 
   return 0;
@@ -1072,9 +1232,8 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
 /// SimplifyShlInst - Given operands for an Shl, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                              const TargetData *TD, const DominatorTree *DT,
-                              unsigned MaxRecurse) {
-  if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse))
+                              const Query &Q, unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // undef << X -> 0
@@ -1083,23 +1242,23 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 
   // (X >> A) << A -> X
   Value *X;
-  if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) &&
-      cast<PossiblyExactOperator>(Op0)->isExact())
+  if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
     return X;
   return 0;
 }
 
 Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
-                             const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+                             const TargetData *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, Query (TD, TLI, DT),
+                           RecursionLimit);
 }
 
 /// SimplifyLShrInst - Given operands for an LShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                               const TargetData *TD, const DominatorTree *DT,
-                               unsigned MaxRecurse) {
-  if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse))
+                               const Query &Q, unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // undef >>l X -> 0
@@ -1116,16 +1275,18 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
 }
 
 Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT) {
+  return ::SimplifyLShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+                            RecursionLimit);
 }
 
 /// SimplifyAShrInst - Given operands for an AShr, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                               const TargetData *TD, const DominatorTree *DT,
-                               unsigned MaxRecurse) {
-  if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse))
+                               const Query &Q, unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse))
     return V;
 
   // all ones >>a X -> all ones
@@ -1146,19 +1307,22 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
 }
 
 Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
-                              const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT) {
+  return ::SimplifyAShrInst(Op0, Op1, isExact, Query (TD, TLI, DT),
+                            RecursionLimit);
 }
 
 /// SimplifyAndInst - Given operands for an And, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
-                              const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
+                              unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, Q.TD, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1197,37 +1361,46 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
       (A == Op0 || B == Op0))
     return Op0;
 
+  // A & (-A) = A if A is a power of two or zero.
+  if (match(Op0, m_Neg(m_Specific(Op1))) ||
+      match(Op1, m_Neg(m_Specific(Op0)))) {
+    if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true))
+      return Op0;
+    if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true))
+      return Op1;
+  }
+
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT,
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, Q,
                                           MaxRecurse))
     return V;
 
   // And distributes over Or.  Try some generic simplifications based on this.
   if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
-                             TD, DT, MaxRecurse))
+                             Q, MaxRecurse))
     return V;
 
   // And distributes over Xor.  Try some generic simplifications based on this.
   if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
-                             TD, DT, MaxRecurse))
+                             Q, MaxRecurse))
     return V;
 
   // Or distributes over And.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
-                                TD, DT, MaxRecurse))
+                                Q, MaxRecurse))
     return V;
 
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT,
+    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, Q,
                                          MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT,
+    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, Q,
                                       MaxRecurse))
       return V;
 
@@ -1235,19 +1408,20 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
 }
 
 Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyAndInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyOrInst - Given operands for an Or, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
-                             const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
+                             unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, Q.TD, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1297,51 +1471,51 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
     return Constant::getAllOnesValue(Op0->getType());
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, Q,
                                           MaxRecurse))
     return V;
 
   // Or distributes over And.  Try some generic simplifications based on this.
-  if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And,
-                             TD, DT, MaxRecurse))
+  if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And, Q,
+                             MaxRecurse))
     return V;
 
   // And distributes over Or.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
-                                TD, DT, MaxRecurse))
+                                Q, MaxRecurse))
     return V;
 
   // If the operation is with the result of a select instruction, check whether
   // operating on either branch of the select always yields the same value.
   if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
-    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT,
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, Q,
                                          MaxRecurse))
       return V;
 
   // If the operation is with the result of a phi instruction, check whether
   // operating on all incoming values of the phi always yields the same value.
   if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
-    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT,
-                                      MaxRecurse))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
       return V;
 
   return 0;
 }
 
 Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+                            const TargetLibraryInfo *TLI,
                             const DominatorTree *DT) {
-  return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyOrInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyXorInst - Given operands for a Xor, see if we can
 /// fold the result.  If not, this returns null.
-static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
-                              const DominatorTree *DT, unsigned MaxRecurse) {
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
+                              unsigned MaxRecurse) {
   if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
     if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
       Constant *Ops[] = { CLHS, CRHS };
       return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
-                                      Ops, TD);
+                                      Ops, Q.TD, Q.TLI);
     }
 
     // Canonicalize the constant to the RHS.
@@ -1366,13 +1540,13 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
     return Constant::getAllOnesValue(Op0->getType());
 
   // Try some generic simplifications for associative operations.
-  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT,
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, Q,
                                           MaxRecurse))
     return V;
 
   // And distributes over Xor.  Try some generic simplifications based on this.
   if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
-                                TD, DT, MaxRecurse))
+                                Q, MaxRecurse))
     return V;
 
   // Threading Xor over selects and phi nodes is pointless, so don't bother.
@@ -1388,8 +1562,9 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
 }
 
 Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
-  return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
+  return ::SimplifyXorInst(Op0, Op1, Query (TD, TLI, DT), RecursionLimit);
 }
 
 static Type *GetCompareTy(Value *Op) {
@@ -1416,17 +1591,56 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
   return 0;
 }
 
+static Constant *computePointerICmp(const TargetData &TD,
+                                    CmpInst::Predicate Pred,
+                                    Value *LHS, Value *RHS) {
+  // We can only fold certain predicates on pointer comparisons.
+  switch (Pred) {
+  default:
+    return 0;
+
+    // Equality comaprisons are easy to fold.
+  case CmpInst::ICMP_EQ:
+  case CmpInst::ICMP_NE:
+    break;
+
+    // We can only handle unsigned relational comparisons because 'inbounds' on
+    // a GEP only protects against unsigned wrapping.
+  case CmpInst::ICMP_UGT:
+  case CmpInst::ICMP_UGE:
+  case CmpInst::ICMP_ULT:
+  case CmpInst::ICMP_ULE:
+    // However, we have to switch them to their signed variants to handle
+    // negative indices from the base pointer.
+    Pred = ICmpInst::getSignedPredicate(Pred);
+    break;
+  }
+
+  Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS);
+  if (!LHSOffset)
+    return 0;
+  Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS);
+  if (!RHSOffset)
+    return 0;
+
+  // If LHS and RHS are not related via constant offsets to the same base
+  // value, there is nothing we can do here.
+  if (LHS != RHS)
+    return 0;
+
+  return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset);
+}
+
 /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const TargetData *TD, const DominatorTree *DT,
-                               unsigned MaxRecurse) {
+                               const Query &Q, unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
 
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
 
     // If we have a constant, make sure it is on the RHS.
     std::swap(LHS, RHS);
@@ -1443,8 +1657,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
 
   // Special case logic when the operands have i1 type.
-  if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() &&
-       cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) {
+  if (OpTy->getScalarType()->isIntegerTy(1)) {
     switch (Pred) {
     default: break;
     case ICmpInst::ICMP_EQ:
@@ -1480,63 +1693,101 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
-  // icmp <alloca*>, <global/alloca*/null> - Different stack variables have
-  // different addresses, and what's more the address of a stack variable is
-  // never null or equal to the address of a global.  Note that generalizing
-  // to the case where LHS is a global variable address or null is pointless,
-  // since if both LHS and RHS are constants then we already constant folded
-  // the compare, and if only one of them is then we moved it to RHS already.
-  if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
-                               isa<ConstantPointerNull>(RHS)))
-    // We already know that LHS != RHS.
-    return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+  // icmp <object*>, <object*/null> - Different identified objects have
+  // different addresses (unless null), and what's more the address of an
+  // identified local is never equal to another argument (again, barring null).
+  // Note that generalizing to the case where LHS is a global variable address
+  // or null is pointless, since if both LHS and RHS are constants then we
+  // already constant folded the compare, and if only one of them is then we
+  // moved it to RHS already.
+  Value *LHSPtr = LHS->stripPointerCasts();
+  Value *RHSPtr = RHS->stripPointerCasts();
+  if (LHSPtr == RHSPtr)
+    return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+  // Be more aggressive about stripping pointer adjustments when checking a
+  // comparison of an alloca address to another object.  We can rip off all
+  // inbounds GEP operations, even if they are variable.
+  LHSPtr = LHSPtr->stripInBoundsOffsets();
+  if (llvm::isIdentifiedObject(LHSPtr)) {
+    RHSPtr = RHSPtr->stripInBoundsOffsets();
+    if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) {
+      // If both sides are different identified objects, they aren't equal
+      // unless they're null.
+      if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) &&
+          Pred == CmpInst::ICMP_EQ)
+        return ConstantInt::get(ITy, false);
+
+      // A local identified object (alloca or noalias call) can't equal any
+      // incoming argument, unless they're both null.
+      if (isa<Instruction>(LHSPtr) && isa<Argument>(RHSPtr) &&
+          Pred == CmpInst::ICMP_EQ)
+        return ConstantInt::get(ITy, false);
+    }
+
+    // Assume that the constant null is on the right.
+    if (llvm::isKnownNonNull(LHSPtr) && isa<ConstantPointerNull>(RHSPtr)) {
+      if (Pred == CmpInst::ICMP_EQ)
+        return ConstantInt::get(ITy, false);
+      else if (Pred == CmpInst::ICMP_NE)
+        return ConstantInt::get(ITy, true);
+    }
+  } else if (isa<Argument>(LHSPtr)) {
+    RHSPtr = RHSPtr->stripInBoundsOffsets();
+    // An alloca can't be equal to an argument.
+    if (isa<AllocaInst>(RHSPtr)) {
+      if (Pred == CmpInst::ICMP_EQ)
+        return ConstantInt::get(ITy, false);
+      else if (Pred == CmpInst::ICMP_NE)
+        return ConstantInt::get(ITy, true);
+    }
+  }
 
   // If we are comparing with zero then try hard since this is a common case.
   if (match(RHS, m_Zero())) {
     bool LHSKnownNonNegative, LHSKnownNegative;
     switch (Pred) {
-    default:
-      assert(false && "Unknown ICmp predicate!");
+    default: llvm_unreachable("Unknown ICmp predicate!");
     case ICmpInst::ICMP_ULT:
       return getFalse(ITy);
     case ICmpInst::ICMP_UGE:
       return getTrue(ITy);
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULE:
-      if (isKnownNonZero(LHS, TD))
+      if (isKnownNonZero(LHS, Q.TD))
         return getFalse(ITy);
       break;
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_UGT:
-      if (isKnownNonZero(LHS, TD))
+      if (isKnownNonZero(LHS, Q.TD))
         return getTrue(ITy);
       break;
     case ICmpInst::ICMP_SLT:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
       if (LHSKnownNegative)
         return getTrue(ITy);
       if (LHSKnownNonNegative)
         return getFalse(ITy);
       break;
     case ICmpInst::ICMP_SLE:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
       if (LHSKnownNegative)
         return getTrue(ITy);
-      if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
         return getFalse(ITy);
       break;
     case ICmpInst::ICMP_SGE:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
       if (LHSKnownNegative)
         return getFalse(ITy);
       if (LHSKnownNonNegative)
         return getTrue(ITy);
       break;
     case ICmpInst::ICMP_SGT:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.TD);
       if (LHSKnownNegative)
         return getFalse(ITy);
-      if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.TD))
         return getTrue(ITy);
       break;
     }
@@ -1564,6 +1815,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       // 'srem x, CI2' produces (-|CI2|, |CI2|).
       Upper = CI2->getValue().abs();
       Lower = (-Upper) + 1;
+    } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) {
+      // 'udiv CI2, x' produces [0, CI2].
+      Upper = CI2->getValue() + 1;
     } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
       // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
       APInt NegOne = APInt::getAllOnesValue(Width);
@@ -1616,19 +1870,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
 
     // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
     // if the integer type is the same size as the pointer type.
-    if (MaxRecurse && TD && isa<PtrToIntInst>(LI) &&
-        TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+    if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
+        Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
       if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
         // Transfer the cast to the constant.
         if (Value *V = SimplifyICmpInst(Pred, SrcOp,
                                         ConstantExpr::getIntToPtr(RHSC, SrcTy),
-                                        TD, DT, MaxRecurse-1))
+                                        Q, MaxRecurse-1))
           return V;
       } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
         if (RI->getOperand(0)->getType() == SrcTy)
           // Compare without the cast.
           if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          TD, DT, MaxRecurse-1))
+                                          Q, MaxRecurse-1))
             return V;
       }
     }
@@ -1640,7 +1894,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
           // Compare X and Y.  Note that signed predicates become unsigned.
           if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                          SrcOp, RI->getOperand(0), TD, DT,
+                                          SrcOp, RI->getOperand(0), Q,
                                           MaxRecurse-1))
             return V;
       }
@@ -1656,15 +1910,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // also a case of comparing two zero-extended values.
         if (RExt == CI && MaxRecurse)
           if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                          SrcOp, Trunc, TD, DT, MaxRecurse-1))
+                                        SrcOp, Trunc, Q, MaxRecurse-1))
             return V;
 
         // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
         // there.  Use this to work out the result of the comparison.
         if (RExt != CI) {
           switch (Pred) {
-          default:
-            assert(false && "Unknown ICmp predicate!");
+          default: llvm_unreachable("Unknown ICmp predicate!");
           // LHS <u RHS.
           case ICmpInst::ICMP_EQ:
           case ICmpInst::ICMP_UGT:
@@ -1701,7 +1954,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
           // Compare X and Y.  Note that the predicate does not change.
           if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          TD, DT, MaxRecurse-1))
+                                          Q, MaxRecurse-1))
             return V;
       }
       // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
@@ -1715,16 +1968,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         // If the re-extended constant didn't change then this is effectively
         // also a case of comparing two sign-extended values.
         if (RExt == CI && MaxRecurse)
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT,
-                                          MaxRecurse-1))
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
             return V;
 
         // Otherwise the upper bits of LHS are all equal, while RHS has varying
         // bits there.  Use this to work out the result of the comparison.
         if (RExt != CI) {
           switch (Pred) {
-          default:
-            assert(false && "Unknown ICmp predicate!");
+          default: llvm_unreachable("Unknown ICmp predicate!");
           case ICmpInst::ICMP_EQ:
             return ConstantInt::getFalse(CI->getContext());
           case ICmpInst::ICMP_NE:
@@ -1751,7 +2002,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
             if (MaxRecurse)
               if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
                                               Constant::getNullValue(SrcTy),
-                                              TD, DT, MaxRecurse-1))
+                                              Q, MaxRecurse-1))
                 return V;
             break;
           case ICmpInst::ICMP_ULT:
@@ -1760,7 +2011,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
             if (MaxRecurse)
               if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
                                               Constant::getNullValue(SrcTy),
-                                              TD, DT, MaxRecurse-1))
+                                              Q, MaxRecurse-1))
                 return V;
             break;
           }
@@ -1794,14 +2045,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     if ((A == RHS || B == RHS) && NoLHSWrapProblem)
       if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
                                       Constant::getNullValue(RHS->getType()),
-                                      TD, DT, MaxRecurse-1))
+                                      Q, MaxRecurse-1))
         return V;
 
     // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
     if ((C == LHS || D == LHS) && NoRHSWrapProblem)
       if (Value *V = SimplifyICmpInst(Pred,
                                       Constant::getNullValue(LHS->getType()),
-                                      C == LHS ? D : C, TD, DT, MaxRecurse-1))
+                                      C == LHS ? D : C, Q, MaxRecurse-1))
         return V;
 
     // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
@@ -1810,7 +2061,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       // Determine Y and Z in the form icmp (X+Y), (X+Z).
       Value *Y = (A == C || A == D) ? B : A;
       Value *Z = (C == A || C == B) ? D : C;
-      if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1))
+      if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1))
         return V;
     }
   }
@@ -1822,7 +2073,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       break;
     case ICmpInst::ICMP_SGT:
     case ICmpInst::ICMP_SGE:
-      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -1832,7 +2083,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getFalse(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE:
-      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -1849,7 +2100,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       break;
     case ICmpInst::ICMP_SGT:
     case ICmpInst::ICMP_SGE:
-      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -1859,7 +2110,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getTrue(ITy);
     case ICmpInst::ICMP_SLT:
     case ICmpInst::ICMP_SLE:
-      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD);
+      ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD);
       if (!KnownNonNegative)
         break;
       // fall-through
@@ -1870,6 +2121,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
+  // x udiv y <=u x.
+  if (LBO && match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) {
+    // icmp pred (X /u Y), X
+    if (Pred == ICmpInst::ICMP_UGT)
+      return getFalse(ITy);
+    if (Pred == ICmpInst::ICMP_ULE)
+      return getTrue(ITy);
+  }
+
   if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
       LBO->getOperand(1) == RBO->getOperand(1)) {
     switch (LBO->getOpcode()) {
@@ -1884,7 +2144,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       if (!LBO->isExact() || !RBO->isExact())
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
-                                      RBO->getOperand(0), TD, DT, MaxRecurse-1))
+                                      RBO->getOperand(0), Q, MaxRecurse-1))
         return V;
       break;
     case Instruction::Shl: {
@@ -1895,7 +2155,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       if (!NSW && ICmpInst::isSigned(Pred))
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
-                                      RBO->getOperand(0), TD, DT, MaxRecurse-1))
+                                      RBO->getOperand(0), Q, MaxRecurse-1))
         return V;
       break;
     }
@@ -1949,7 +2209,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -1963,7 +2223,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
           return V;
       break;
     }
@@ -2019,7 +2279,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, TD, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -2033,7 +2293,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, TD, DT, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
           return V;
       break;
     }
@@ -2090,37 +2350,66 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getFalse(ITy);
   }
 
+  // Simplify comparisons of related pointers using a powerful, recursive
+  // GEP-walk when we have target data available..
+  if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy())
+    if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS))
+      return C;
+
+  if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
+    if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) {
+      if (GLHS->getPointerOperand() == GRHS->getPointerOperand() &&
+          GLHS->hasAllConstantIndices() && GRHS->hasAllConstantIndices() &&
+          (ICmpInst::isEquality(Pred) ||
+           (GLHS->isInBounds() && GRHS->isInBounds() &&
+            Pred == ICmpInst::getSignedPredicate(Pred)))) {
+        // The bases are equal and the indices are constant.  Build a constant
+        // expression GEP with the same indices and a null base pointer to see
+        // what constant folding can make out of it.
+        Constant *Null = Constant::getNullValue(GLHS->getPointerOperandType());
+        SmallVector<Value *, 4> IndicesLHS(GLHS->idx_begin(), GLHS->idx_end());
+        Constant *NewLHS = ConstantExpr::getGetElementPtr(Null, IndicesLHS);
+
+        SmallVector<Value *, 4> IndicesRHS(GRHS->idx_begin(), GRHS->idx_end());
+        Constant *NewRHS = ConstantExpr::getGetElementPtr(Null, IndicesRHS);
+        return ConstantExpr::getICmp(Pred, NewLHS, NewRHS);
+      }
+    }
+  }
+
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   // If the comparison is with the result of a phi instruction, check whether
   // doing the compare with each incoming phi value yields a common result.
   if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   return 0;
 }
 
 Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT) {
+  return ::SimplifyICmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+                            RecursionLimit);
 }
 
 /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const TargetData *TD, const DominatorTree *DT,
-                               unsigned MaxRecurse) {
+                               const Query &Q, unsigned MaxRecurse) {
   CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
   assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
 
   if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
     if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.TD, Q.TLI);
 
     // If we have a constant, make sure it is on the RHS.
     std::swap(LHS, RHS);
@@ -2188,27 +2477,31 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // If the comparison is with the result of a select instruction, check whether
   // comparing with either branch of the select always yields the same value.
   if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   // If the comparison is with the result of a phi instruction, check whether
   // doing the compare with each incoming phi value yields a common result.
   if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
       return V;
 
   return 0;
 }
 
 Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+                              const TargetData *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT) {
+  return ::SimplifyFCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+                            RecursionLimit);
 }
 
 /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
 /// the result.  If not, this returns null.
-Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
-                                const TargetData *TD, const DominatorTree *) {
+static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
+                                 Value *FalseVal, const Query &Q,
+                                 unsigned MaxRecurse) {
   // select true, X, Y  -> X
   // select false, X, Y -> Y
   if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
@@ -2231,12 +2524,22 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
   return 0;
 }
 
+Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+                                const TargetData *TD,
+                                const TargetLibraryInfo *TLI,
+                                const DominatorTree *DT) {
+  return ::SimplifySelectInst(Cond, TrueVal, FalseVal, Query (TD, TLI, DT),
+                              RecursionLimit);
+}
+
 /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
 /// fold the result.  If not, this returns null.
-Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
-                             const TargetData *TD, const DominatorTree *) {
+static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
   // The type of the GEP pointer operand.
-  PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+  PointerType *PtrTy = dyn_cast<PointerType>(Ops[0]->getType());
+  // The GEP pointer operand is not a pointer, it's a vector of pointers.
+  if (!PtrTy)
+    return 0;
 
   // getelementptr P -> P.
   if (Ops.size() == 1)
@@ -2255,9 +2558,9 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
       if (C->isZero())
         return Ops[0];
     // getelementptr P, N -> P if P points to a type of zero size.
-    if (TD) {
+    if (Q.TD) {
       Type *Ty = PtrTy->getElementType();
-      if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
+      if (Ty->isSized() && Q.TD->getTypeAllocSize(Ty) == 0)
         return Ops[0];
     }
   }
@@ -2270,12 +2573,17 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops,
   return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
 }
 
+Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const TargetData *TD,
+                             const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyGEPInst(Ops, Query (TD, TLI, DT), RecursionLimit);
+}
+
 /// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
 /// can fold the result.  If not, this returns null.
-Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
-                                     ArrayRef<unsigned> Idxs,
-                                     const TargetData *,
-                                     const DominatorTree *) {
+static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
+                                      ArrayRef<unsigned> Idxs, const Query &Q,
+                                      unsigned) {
   if (Constant *CAgg = dyn_cast<Constant>(Agg))
     if (Constant *CVal = dyn_cast<Constant>(Val))
       return ConstantFoldInsertValueInstruction(CAgg, CVal, Idxs);
@@ -2300,8 +2608,17 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
   return 0;
 }
 
+Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
+                                     ArrayRef<unsigned> Idxs,
+                                     const TargetData *TD,
+                                     const TargetLibraryInfo *TLI,
+                                     const DominatorTree *DT) {
+  return ::SimplifyInsertValueInst(Agg, Val, Idxs, Query (TD, TLI, DT),
+                                   RecursionLimit);
+}
+
 /// SimplifyPHINode - See if we can fold the given phi.  If not, returns null.
-static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
+static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
   // If all of the PHI's incoming values are the same then replace the PHI node
   // with the common value.
   Value *CommonValue = 0;
@@ -2329,67 +2646,77 @@ static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
   // instruction, we cannot return X as the result of the PHI node unless it
   // dominates the PHI block.
   if (HasUndefInput)
-    return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0;
+    return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0;
 
   return CommonValue;
 }
 
+static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
+  if (Constant *C = dyn_cast<Constant>(Op))
+    return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.TD, Q.TLI);
+
+  return 0;
+}
+
+Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const TargetData *TD,
+                               const TargetLibraryInfo *TLI,
+                               const DominatorTree *DT) {
+  return ::SimplifyTruncInst(Op, Ty, Query (TD, TLI, DT), RecursionLimit);
+}
 
 //=== Helper functions for higher up the class hierarchy.
 
 /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                            const TargetData *TD, const DominatorTree *DT,
-                            unsigned MaxRecurse) {
+                            const Query &Q, unsigned MaxRecurse) {
   switch (Opcode) {
   case Instruction::Add:
     return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           TD, DT, MaxRecurse);
+                           Q, MaxRecurse);
   case Instruction::Sub:
     return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           TD, DT, MaxRecurse);
-  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse);
+                           Q, MaxRecurse);
+  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, Q, MaxRecurse);
+  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::SRem: return SimplifySRemInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::URem: return SimplifyURemInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, Q, MaxRecurse);
   case Instruction::Shl:
     return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
-                           TD, DT, MaxRecurse);
+                           Q, MaxRecurse);
   case Instruction::LShr:
-    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
   case Instruction::AShr:
-    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
-  case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse);
-  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse);
+    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, Q, MaxRecurse);
+  case Instruction::And: return SimplifyAndInst(LHS, RHS, Q, MaxRecurse);
+  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, Q, MaxRecurse);
+  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, Q, MaxRecurse);
   default:
     if (Constant *CLHS = dyn_cast<Constant>(LHS))
       if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
         Constant *COps[] = {CLHS, CRHS};
-        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, TD);
+        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, Q.TD,
+                                        Q.TLI);
       }
 
     // If the operation is associative, try some generic simplifications.
     if (Instruction::isAssociative(Opcode))
-      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT,
-                                              MaxRecurse))
+      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, Q, MaxRecurse))
         return V;
 
-    // If the operation is with the result of a select instruction, check whether
+    // If the operation is with the result of a select instruction check whether
     // operating on either branch of the select always yields the same value.
     if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
-      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT,
-                                           MaxRecurse))
+      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, Q, MaxRecurse))
         return V;
 
     // If the operation is with the result of a phi instruction, check whether
     // operating on all incoming values of the phi always yields the same value.
     if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
-      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse))
+      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse))
         return V;
 
     return 0;
@@ -2397,119 +2724,136 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
 }
 
 Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
-                           const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit);
+                           const TargetData *TD, const TargetLibraryInfo *TLI,
+                           const DominatorTree *DT) {
+  return ::SimplifyBinOp(Opcode, LHS, RHS, Query (TD, TLI, DT), RecursionLimit);
 }
 
 /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
 /// fold the result.
 static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                              const TargetData *TD, const DominatorTree *DT,
-                              unsigned MaxRecurse) {
+                              const Query &Q, unsigned MaxRecurse) {
   if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
-    return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
-  return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+    return SimplifyICmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
+  return SimplifyFCmpInst(Predicate, LHS, RHS, Q, MaxRecurse);
 }
 
 Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                             const TargetData *TD, const DominatorTree *DT) {
-  return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+                             const TargetData *TD, const TargetLibraryInfo *TLI,
+                             const DominatorTree *DT) {
+  return ::SimplifyCmpInst(Predicate, LHS, RHS, Query (TD, TLI, DT),
+                           RecursionLimit);
+}
+
+static Value *SimplifyCallInst(CallInst *CI, const Query &) {
+  // call undef -> undef
+  if (isa<UndefValue>(CI->getCalledValue()))
+    return UndefValue::get(CI->getType());
+
+  return 0;
 }
 
 /// SimplifyInstruction - See if we can compute a simplified version of this
 /// instruction.  If not, this returns null.
 Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
+                                 const TargetLibraryInfo *TLI,
                                  const DominatorTree *DT) {
   Value *Result;
 
   switch (I->getOpcode()) {
   default:
-    Result = ConstantFoldInstruction(I, TD);
+    Result = ConstantFoldInstruction(I, TD, TLI);
     break;
   case Instruction::Add:
     Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
-                             TD, DT);
+                             TD, TLI, DT);
     break;
   case Instruction::Sub:
     Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
-                             TD, DT);
+                             TD, TLI, DT);
     break;
   case Instruction::Mul:
-    Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::SDiv:
-    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::UDiv:
-    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::FDiv:
-    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::SRem:
-    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::URem:
-    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::FRem:
-    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::Shl:
     Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
                              cast<BinaryOperator>(I)->hasNoSignedWrap(),
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
-                             TD, DT);
+                             TD, TLI, DT);
     break;
   case Instruction::LShr:
     Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
                               cast<BinaryOperator>(I)->isExact(),
-                              TD, DT);
+                              TD, TLI, DT);
     break;
   case Instruction::AShr:
     Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
                               cast<BinaryOperator>(I)->isExact(),
-                              TD, DT);
+                              TD, TLI, DT);
     break;
   case Instruction::And:
-    Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::Or:
-    Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::Xor:
-    Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::ICmp:
     Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
-                              I->getOperand(0), I->getOperand(1), TD, DT);
+                              I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::FCmp:
     Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
-                              I->getOperand(0), I->getOperand(1), TD, DT);
+                              I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
   case Instruction::Select:
     Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
-                                I->getOperand(2), TD, DT);
+                                I->getOperand(2), TD, TLI, DT);
     break;
   case Instruction::GetElementPtr: {
     SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
-    Result = SimplifyGEPInst(Ops, TD, DT);
+    Result = SimplifyGEPInst(Ops, TD, TLI, DT);
     break;
   }
   case Instruction::InsertValue: {
     InsertValueInst *IV = cast<InsertValueInst>(I);
     Result = SimplifyInsertValueInst(IV->getAggregateOperand(),
                                      IV->getInsertedValueOperand(),
-                                     IV->getIndices(), TD, DT);
+                                     IV->getIndices(), TD, TLI, DT);
     break;
   }
   case Instruction::PHI:
-    Result = SimplifyPHINode(cast<PHINode>(I), DT);
+    Result = SimplifyPHINode(cast<PHINode>(I), Query (TD, TLI, DT));
+    break;
+  case Instruction::Call:
+    Result = SimplifyCallInst(cast<CallInst>(I), Query (TD, TLI, DT));
+    break;
+  case Instruction::Trunc:
+    Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT);
     break;
   }
 
@@ -2519,57 +2863,84 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
   return Result == I ? UndefValue::get(I->getType()) : Result;
 }
 
-/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
-/// delete the From instruction.  In addition to a basic RAUW, this does a
-/// recursive simplification of the newly formed instructions.  This catches
-/// things where one simplification exposes other opportunities.  This only
-/// simplifies and deletes scalar operations, it does not change the CFG.
+/// \brief Implementation of recursive simplification through an instructions
+/// uses.
 ///
-void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
-                                     const TargetData *TD,
-                                     const DominatorTree *DT) {
-  assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
-
-  // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
-  // we can know if it gets deleted out from under us or replaced in a
-  // recursive simplification.
-  WeakVH FromHandle(From);
-  WeakVH ToHandle(To);
-
-  while (!From->use_empty()) {
-    // Update the instruction to use the new value.
-    Use &TheUse = From->use_begin().getUse();
-    Instruction *User = cast<Instruction>(TheUse.getUser());
-    TheUse = To;
-
-    // Check to see if the instruction can be folded due to the operand
-    // replacement.  For example changing (or X, Y) into (or X, -1) can replace
-    // the 'or' with -1.
-    Value *SimplifiedVal;
-    {
-      // Sanity check to make sure 'User' doesn't dangle across
-      // SimplifyInstruction.
-      AssertingVH<> UserHandle(User);
-
-      SimplifiedVal = SimplifyInstruction(User, TD, DT);
-      if (SimplifiedVal == 0) continue;
-    }
+/// This is the common implementation of the recursive simplification routines.
+/// If we have a pre-simplified value in 'SimpleV', that is forcibly used to
+/// replace the instruction 'I'. Otherwise, we simply add 'I' to the list of
+/// instructions to process and attempt to simplify it using
+/// InstructionSimplify.
+///
+/// This routine returns 'true' only when *it* simplifies something. The passed
+/// in simplified value does not count toward this.
+static bool replaceAndRecursivelySimplifyImpl(Instruction *I, Value *SimpleV,
+                                              const TargetData *TD,
+                                              const TargetLibraryInfo *TLI,
+                                              const DominatorTree *DT) {
+  bool Simplified = false;
+  SmallSetVector<Instruction *, 8> Worklist;
+
+  // If we have an explicit value to collapse to, do that round of the
+  // simplification loop by hand initially.
+  if (SimpleV) {
+    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+         ++UI)
+      if (*UI != I)
+        Worklist.insert(cast<Instruction>(*UI));
+
+    // Replace the instruction with its simplified value.
+    I->replaceAllUsesWith(SimpleV);
+
+    // Gracefully handle edge cases where the instruction is not wired into any
+    // parent block.
+    if (I->getParent())
+      I->eraseFromParent();
+  } else {
+    Worklist.insert(I);
+  }
 
-    // Recursively simplify this user to the new value.
-    ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT);
-    From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
-    To = ToHandle;
+  // Note that we must test the size on each iteration, the worklist can grow.
+  for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+    I = Worklist[Idx];
+
+    // See if this instruction simplifies.
+    SimpleV = SimplifyInstruction(I, TD, TLI, DT);
+    if (!SimpleV)
+      continue;
 
-    assert(ToHandle && "To value deleted by recursive simplification?");
+    Simplified = true;
 
-    // If the recursive simplification ended up revisiting and deleting
-    // 'From' then we're done.
-    if (From == 0)
-      return;
+    // Stash away all the uses of the old instruction so we can check them for
+    // recursive simplifications after a RAUW. This is cheaper than checking all
+    // uses of To on the recursive step in most cases.
+    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE;
+         ++UI)
+      Worklist.insert(cast<Instruction>(*UI));
+
+    // Replace the instruction with its simplified value.
+    I->replaceAllUsesWith(SimpleV);
+
+    // Gracefully handle edge cases where the instruction is not wired into any
+    // parent block.
+    if (I->getParent())
+      I->eraseFromParent();
   }
+  return Simplified;
+}
 
-  // If 'From' has value handles referring to it, do a real RAUW to update them.
-  From->replaceAllUsesWith(To);
+bool llvm::recursivelySimplifyInstruction(Instruction *I,
+                                          const TargetData *TD,
+                                          const TargetLibraryInfo *TLI,
+                                          const DominatorTree *DT) {
+  return replaceAndRecursivelySimplifyImpl(I, 0, TD, TLI, DT);
+}
 
-  From->eraseFromParent();
+bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
+                                         const TargetData *TD,
+                                         const TargetLibraryInfo *TLI,
+                                         const DominatorTree *DT) {
+  assert(I != SimpleV && "replaceAndRecursivelySimplify(X,X) is not valid!");
+  assert(SimpleV && "Must provide a simplified value.");
+  return replaceAndRecursivelySimplifyImpl(I, SimpleV, TD, TLI, DT);
 }
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
new file mode 100644
index 000000000000..a8a8079d1e5a
--- /dev/null
+++ b/lib/Analysis/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/Analysis/LLVMBuild.txt -----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = IPA
+
+[component_0]
+type = Library
+name = Analysis
+parent = Libraries
+required_libraries = Core Support Target
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index f80595c7dbed..5ca2746c9f6a 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -20,20 +20,25 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include <map>
 #include <stack>
 using namespace llvm;
+using namespace PatternMatch;
 
 char LazyValueInfo::ID = 0;
-INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
+INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
+                "Lazy Value Information Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(LazyValueInfo, "lazy-value-info",
                 "Lazy Value Information Analysis", false, true)
 
 namespace llvm {
@@ -61,10 +66,10 @@ class LVILatticeVal {
     constant,
     /// notconstant - This Value is known to not have the specified value.
     notconstant,
-    
+
     /// constantrange - The Value falls within this range.
     constantrange,
-    
+
     /// overdefined - This value is not known to be constant, and we know that
     /// it has a value.
     overdefined
@@ -207,7 +212,7 @@ public:
 
         // Unless we can prove that the two Constants are different, we must
         // move to overdefined.
-        // FIXME: use TargetData for smarter constant folding.
+        // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
         if (ConstantInt *Res = dyn_cast<ConstantInt>(
                 ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
                                                 getConstant(),
@@ -233,7 +238,7 @@ public:
 
         // Unless we can prove that the two Constants are different, we must
         // move to overdefined.
-        // FIXME: use TargetData for smarter constant folding.
+        // FIXME: use TargetData/TargetLibraryInfo for smarter constant folding.
         if (ConstantInt *Res = dyn_cast<ConstantInt>(
                 ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
                                                 getNotConstant(),
@@ -305,50 +310,6 @@ namespace {
   };
 }
 
-namespace llvm {
-  template<>
-  struct DenseMapInfo<LVIValueHandle> {
-    typedef DenseMapInfo<Value*> PointerInfo;
-    static inline LVIValueHandle getEmptyKey() {
-      return LVIValueHandle(PointerInfo::getEmptyKey(),
-                            static_cast<LazyValueInfoCache*>(0));
-    }
-    static inline LVIValueHandle getTombstoneKey() {
-      return LVIValueHandle(PointerInfo::getTombstoneKey(),
-                            static_cast<LazyValueInfoCache*>(0));
-    }
-    static unsigned getHashValue(const LVIValueHandle &Val) {
-      return PointerInfo::getHashValue(Val);
-    }
-    static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) {
-      return LHS == RHS;
-    }
-  };
-  
-  template<>
-  struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > {
-    typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy;
-    typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo;
-    typedef DenseMapInfo<Value*> BPointerInfo;
-    static inline PairTy getEmptyKey() {
-      return std::make_pair(APointerInfo::getEmptyKey(),
-                            BPointerInfo::getEmptyKey());
-    }
-    static inline PairTy getTombstoneKey() {
-      return std::make_pair(APointerInfo::getTombstoneKey(), 
-                            BPointerInfo::getTombstoneKey());
-    }
-    static unsigned getHashValue( const PairTy &Val) {
-      return APointerInfo::getHashValue(Val.first) ^ 
-             BPointerInfo::getHashValue(Val.second);
-    }
-    static bool isEqual(const PairTy &LHS, const PairTy &RHS) {
-      return APointerInfo::isEqual(LHS.first, RHS.first) &&
-             BPointerInfo::isEqual(LHS.second, RHS.second);
-    }
-  };
-}
-
 namespace { 
   /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
   /// maintains information about queries across the clients' queries.
@@ -360,14 +321,18 @@ namespace {
 
     /// ValueCache - This is all of the cached information for all values,
     /// mapped from Value* to key information.
-    DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+    std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
     
     /// OverDefinedCache - This tracks, on a per-block basis, the set of 
     /// values that are over-defined at the end of that block.  This is required
     /// for cache updating.
     typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
     DenseSet<OverDefinedPairTy> OverDefinedCache;
-    
+
+    /// SeenBlocks - Keep track of all blocks that we have ever seen, so we
+    /// don't spend time removing unused blocks from our caches.
+    DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
+
     /// BlockValueStack - This stack holds the state of the value solver
     /// during a query.  It basically emulates the callstack of the naive
     /// recursive value lookup process.
@@ -438,6 +403,7 @@ namespace {
     
     /// clear - Empty the cache.
     void clear() {
+      SeenBlocks.clear();
       ValueCache.clear();
       OverDefinedCache.clear();
     }
@@ -466,6 +432,12 @@ void LVIValueHandle::deleted() {
 }
 
 void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+  // Shortcut if we have never seen this block.
+  DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+  if (I == SeenBlocks.end())
+    return;
+  SeenBlocks.erase(I);
+
   SmallVector<OverDefinedPairTy, 4> ToErase;
   for (DenseSet<OverDefinedPairTy>::iterator  I = OverDefinedCache.begin(),
        E = OverDefinedCache.end(); I != E; ++I) {
@@ -477,7 +449,7 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
        E = ToErase.end(); I != E; ++I)
     OverDefinedCache.erase(*I);
 
-  for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator
+  for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
        I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
     I->second.erase(BB);
 }
@@ -505,6 +477,7 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
   if (Constant *VC = dyn_cast<Constant>(Val))
     return LVILatticeVal::get(VC);
 
+  SeenBlocks.insert(BB);
   return lookup(Val)[BB];
 }
 
@@ -513,6 +486,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
     return true;
 
   ValueCacheEntryTy &Cache = lookup(Val);
+  SeenBlocks.insert(BB);
   LVILatticeVal &BBLV = Cache[BB];
   
   // OverDefinedCacheUpdater is a helper object that will update
@@ -823,9 +797,8 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
       // If the condition of the branch is an equality comparison, we may be
       // able to infer the value.
       ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
-      if (ICI && ICI->getOperand(0) == Val &&
-          isa<Constant>(ICI->getOperand(1))) {
-        if (ICI->isEquality()) {
+      if (ICI && isa<Constant>(ICI->getOperand(1))) {
+        if (ICI->isEquality() && ICI->getOperand(0) == Val) {
           // We know that V has the RHS constant if this is a true SETEQ or
           // false SETNE. 
           if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
@@ -835,12 +808,23 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
           return true;
         }
 
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+        // Recognize the range checking idiom that InstCombine produces.
+        // (X-C1) u< C2 --> [C1, C1+C2)
+        ConstantInt *NegOffset = 0;
+        if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
+          match(ICI->getOperand(0), m_Add(m_Specific(Val),
+                                          m_ConstantInt(NegOffset)));
+
+        ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
+        if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
           // Calculate the range of values that would satisfy the comparison.
           ConstantRange CmpRange(CI->getValue(), CI->getValue()+1);
           ConstantRange TrueValues =
             ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
 
+          if (NegOffset) // Apply the offset from above.
+            TrueValues = TrueValues.subtract(NegOffset->getValue());
+
           // If we're interested in the false dest, invert the condition.
           if (!isTrueDest) TrueValues = TrueValues.inverse();
           
@@ -882,10 +866,11 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
       // BBFrom to BBTo.
       unsigned NumEdges = 0;
       ConstantInt *EdgeVal = 0;
-      for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
-        if (SI->getSuccessor(i) != BBTo) continue;
+      for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+           i != e; ++i) {
+        if (i.getCaseSuccessor() != BBTo) continue;
         if (NumEdges++) break;
-        EdgeVal = SI->getCaseValue(i);
+        EdgeVal = i.getCaseValue();
       }
       assert(EdgeVal && "Missing successor?");
       if (NumEdges == 1) {
@@ -1007,12 +992,19 @@ static LazyValueInfoCache &getCache(void *&PImpl) {
 bool LazyValueInfo::runOnFunction(Function &F) {
   if (PImpl)
     getCache(PImpl).clear();
-  
+
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+
   // Fully lazy.
   return false;
 }
 
+void LazyValueInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetLibraryInfo>();
+}
+
 void LazyValueInfo::releaseMemory() {
   // If the cache was allocated, free it.
   if (PImpl) {
@@ -1061,7 +1053,8 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
   // If we know the value is a constant, evaluate the conditional.
   Constant *Res = 0;
   if (Result.isConstant()) {
-    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
+    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD,
+                                          TLI);
     if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
       return ResCI->isZero() ? False : True;
     return Unknown;
@@ -1102,13 +1095,15 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
     if (Pred == ICmpInst::ICMP_EQ) {
       // !C1 == C -> false iff C1 == C.
       Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
-                                            Result.getNotConstant(), C, TD);
+                                            Result.getNotConstant(), C, TD,
+                                            TLI);
       if (Res->isNullValue())
         return False;
     } else if (Pred == ICmpInst::ICMP_NE) {
       // !C1 != C -> true iff C1 == C.
       Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
-                                            Result.getNotConstant(), C, TD);
+                                            Result.getNotConstant(), C, TD,
+                                            TLI);
       if (Res->isNullValue())
         return True;
     }
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 38d677d502a7..83bdf5286ad7 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
 #include "llvm/IntrinsicInst.h"
@@ -103,6 +104,7 @@ namespace {
     AliasAnalysis *AA;
     DominatorTree *DT;
     TargetData *TD;
+    TargetLibraryInfo *TLI;
 
     std::string Messages;
     raw_string_ostream MessagesStr;
@@ -117,6 +119,7 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
       AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetLibraryInfo>();
       AU.addRequired<DominatorTree>();
     }
     virtual void print(raw_ostream &O, const Module *M) const {}
@@ -149,6 +152,7 @@ namespace {
 char Lint::ID = 0;
 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
                       false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
@@ -174,6 +178,7 @@ bool Lint::runOnFunction(Function &F) {
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<DominatorTree>();
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
   visit(F);
   dbgs() << MessagesStr.str();
   Messages.clear();
@@ -411,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I,
 
     if (Align != 0) {
       unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType());
-      APInt Mask = APInt::getAllOnesValue(BitWidth),
-                   KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD);
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      ComputeMaskedBits(Ptr, KnownZero, KnownOne, TD);
       Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))),
               "Undefined behavior: Memory reference address is misaligned", &I);
     }
@@ -471,9 +475,8 @@ static bool isZero(Value *V, TargetData *TD) {
   if (isa<UndefValue>(V)) return true;
 
   unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
-  APInt Mask = APInt::getAllOnesValue(BitWidth),
-               KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, KnownZero, KnownOne, TD);
   return KnownZero.isAllOnesValue();
 }
 
@@ -614,10 +617,10 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
 
   // As a last resort, try SimplifyInstruction or constant folding.
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
-    if (Value *W = SimplifyInstruction(Inst, TD, DT))
+    if (Value *W = SimplifyInstruction(Inst, TD, TLI, DT))
       return findValueImpl(W, OffsetOk, Visited);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (Value *W = ConstantFoldConstantExpression(CE, TD))
+    if (Value *W = ConstantFoldConstantExpression(CE, TD, TLI))
       if (W != V)
         return findValueImpl(W, OffsetOk, Visited);
   }
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 0e6bcbfae4f6..873a27543dd6 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -17,6 +17,7 @@
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Operator.h"
 using namespace llvm;
 
@@ -160,10 +161,15 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
 /// MaxInstsToScan specifies the maximum instructions to scan in the block.  If
 /// it is set to 0, it will scan the whole block. You can also optionally
 /// specify an alias analysis implementation, which makes this more precise.
+///
+/// If TBAATag is non-null and a load or store is found, the TBAA tag from the
+/// load or store is recorded there.  If there is no TBAA tag or if no access
+/// is found, it is left unmodified.
 Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
                                       BasicBlock::iterator &ScanFrom,
                                       unsigned MaxInstsToScan,
-                                      AliasAnalysis *AA) {
+                                      AliasAnalysis *AA,
+                                      MDNode **TBAATag) {
   if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
 
   // If we're using alias analysis to disambiguate get the size of *Ptr.
@@ -191,15 +197,19 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
     // (This is true even if the load is volatile or atomic, although
     // those cases are unlikely.)
     if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
-      if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
+      if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) {
+        if (TBAATag) *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa);
         return LI;
+      }
     
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       // If this is a store through Ptr, the value is available!
       // (This is true even if the store is volatile or atomic, although
       // those cases are unlikely.)
-      if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
+      if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) {
+        if (TBAATag) *TBAATag = SI->getMetadata(LLVMContext::MD_tbaa);
         return SI->getOperand(0);
+      }
       
       // If Ptr is an alloca and this is a store to a different alloca, ignore
       // the store.  This is a trivial form of alias analysis that is important
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index 3997ac478b52..463269d9d984 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -91,8 +91,6 @@ static Value *GetPointerOperand(Value *I) {
   if (StoreInst *i = dyn_cast<StoreInst>(I))
     return i->getPointerOperand();
   llvm_unreachable("Value is no load or store instruction!");
-  // Never reached.
-  return 0;
 }
 
 static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA,
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 85aaccaefc37..f7a60a1737d4 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
@@ -95,7 +96,7 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
   // Test if the value is already loop-invariant.
   if (isLoopInvariant(I))
     return true;
-  if (!I->isSafeToSpeculativelyExecute())
+  if (!isSafeToSpeculativelyExecute(I))
     return false;
   if (I->mayReadFromMemory())
     return false;
@@ -165,99 +166,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
   return 0;
 }
 
-/// getTripCount - Return a loop-invariant LLVM value indicating the number of
-/// times the loop will be executed.  Note that this means that the backedge
-/// of the loop executes N-1 times.  If the trip-count cannot be determined,
-/// this returns null.
-///
-/// The IndVarSimplify pass transforms loops to have a form that this
-/// function easily understands.
-///
-Value *Loop::getTripCount() const {
-  // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
-  // canonical induction variable and V is the trip count of the loop.
-  PHINode *IV = getCanonicalInductionVariable();
-  if (IV == 0 || IV->getNumIncomingValues() != 2) return 0;
-
-  bool P0InLoop = contains(IV->getIncomingBlock(0));
-  Value *Inc = IV->getIncomingValue(!P0InLoop);
-  BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop);
-
-  if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
-    if (BI->isConditional()) {
-      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
-        if (ICI->getOperand(0) == Inc) {
-          if (BI->getSuccessor(0) == getHeader()) {
-            if (ICI->getPredicate() == ICmpInst::ICMP_NE)
-              return ICI->getOperand(1);
-          } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
-            return ICI->getOperand(1);
-          }
-        }
-      }
-    }
-
-  return 0;
-}
-
-/// getSmallConstantTripCount - Returns the trip count of this loop as a
-/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-/// or not constant. Will also return 0 if the trip count is very large
-/// (>= 2^32)
-unsigned Loop::getSmallConstantTripCount() const {
-  Value* TripCount = this->getTripCount();
-  if (TripCount) {
-    if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
-      // Guard against huge trip counts.
-      if (TripCountC->getValue().getActiveBits() <= 32) {
-        return (unsigned)TripCountC->getZExtValue();
-      }
-    }
-  }
-  return 0;
-}
-
-/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
-/// trip count of this loop as a normal unsigned value, if possible. This
-/// means that the actual trip count is always a multiple of the returned
-/// value (don't forget the trip count could very well be zero as well!).
-///
-/// Returns 1 if the trip count is unknown or not guaranteed to be the
-/// multiple of a constant (which is also the case if the trip count is simply
-/// constant, use getSmallConstantTripCount for that case), Will also return 1
-/// if the trip count is very large (>= 2^32).
-unsigned Loop::getSmallConstantTripMultiple() const {
-  Value* TripCount = this->getTripCount();
-  // This will hold the ConstantInt result, if any
-  ConstantInt *Result = NULL;
-  if (TripCount) {
-    // See if the trip count is constant itself
-    Result = dyn_cast<ConstantInt>(TripCount);
-    // if not, see if it is a multiplication
-    if (!Result)
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
-        switch (BO->getOpcode()) {
-        case BinaryOperator::Mul:
-          Result = dyn_cast<ConstantInt>(BO->getOperand(1));
-          break;
-        case BinaryOperator::Shl:
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
-            if (CI->getValue().getActiveBits() <= 5)
-              return 1u << CI->getZExtValue();
-          break;
-        default:
-          break;
-        }
-      }
-  }
-  // Guard against huge trip counts.
-  if (Result && Result->getValue().getActiveBits() <= 32) {
-    return (unsigned)Result->getZExtValue();
-  } else {
-    return 1;
-  }
-}
-
 /// isLCSSAForm - Return true if the Loop is in LCSSA form
 bool Loop::isLCSSAForm(DominatorTree &DT) const {
   // Sort the blocks vector so that we can use binary search to do quick
@@ -297,6 +205,17 @@ bool Loop::isLoopSimplifyForm() const {
   return getLoopPreheader() && getLoopLatch() && hasDedicatedExits();
 }
 
+/// isSafeToClone - Return true if the loop body is safe to clone in practice.
+/// Routines that reform the loop CFG and split edges often fail on indirectbr.
+bool Loop::isSafeToClone() const {
+  // Return false if any loop blocks contain indirectbrs.
+  for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
+    if (isa<IndirectBrInst>((*I)->getTerminator()))
+      return false;
+  }
+  return true;
+}
+
 /// hasDedicatedExits - Return true if no exit block for the loop
 /// has a predecessor that is outside the loop.
 bool Loop::hasDedicatedExits() const {
@@ -477,21 +396,19 @@ void UnloopUpdater::updateBlockParents() {
 /// removeBlocksFromAncestors - Remove unloop's blocks from all ancestors below
 /// their new parents.
 void UnloopUpdater::removeBlocksFromAncestors() {
-  // Remove unloop's blocks from all ancestors below their new parents.
+  // Remove all unloop's blocks (including those in nested subloops) from
+  // ancestors below the new parent loop.
   for (Loop::block_iterator BI = Unloop->block_begin(),
          BE = Unloop->block_end(); BI != BE; ++BI) {
-    Loop *NewParent = LI->getLoopFor(*BI);
-    // If this block is an immediate subloop, remove all blocks (including
-    // nested subloops) from ancestors below the new parent loop.
-    // Otherwise, if this block is in a nested subloop, skip it.
-    if (SubloopParents.count(NewParent))
-      NewParent = SubloopParents[NewParent];
-    else if (Unloop->contains(NewParent))
-      continue;
-
+    Loop *OuterParent = LI->getLoopFor(*BI);
+    if (Unloop->contains(OuterParent)) {
+      while (OuterParent->getParentLoop() != Unloop)
+        OuterParent = OuterParent->getParentLoop();
+      OuterParent = SubloopParents[OuterParent];
+    }
     // Remove blocks from former Ancestors except Unloop itself which will be
     // deleted.
-    for (Loop *OldParent = Unloop->getParentLoop(); OldParent != NewParent;
+    for (Loop *OldParent = Unloop->getParentLoop(); OldParent != OuterParent;
          OldParent = OldParent->getParentLoop()) {
       assert(OldParent && "new loop is not an ancestor of the original");
       OldParent->removeBlockFromLoop(*BI);
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 5ba1f4045d1a..aba700ac5c34 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -14,10 +14,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/DebugInfoProbe.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Timer.h"
 using namespace llvm;
 
@@ -54,20 +52,6 @@ char PrintLoopPass::ID = 0;
 }
 
 //===----------------------------------------------------------------------===//
-// DebugInfoProbe
-
-static DebugInfoProbeInfo *TheDebugProbe;
-static void createDebugInfoProbe() {
-  if (TheDebugProbe) return;
-
-  // Constructed the first time this is called. This guarantees that the
-  // object will be constructed, if -enable-debug-info-probe is set,
-  // before static globals, thus it will be destroyed before them.
-  static ManagedStatic<DebugInfoProbeInfo> DIP;
-  TheDebugProbe = &*DIP;
-}
-
-//===----------------------------------------------------------------------===//
 // LPPassManager
 //
 
@@ -195,7 +179,6 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
 bool LPPassManager::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfo>();
   bool Changed = false;
-  createDebugInfoProbe();
 
   // Collect inherited analysis from Module level pass manager.
   populateInheritedAnalysis(TPM->activeStack);
@@ -227,21 +210,19 @@ bool LPPassManager::runOnFunction(Function &F) {
     // Run all passes on the current Loop.
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       LoopPass *P = getContainedPass(Index);
+
       dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
                    CurrentLoop->getHeader()->getName());
       dumpRequiredSet(P);
 
       initializeAnalysisImpl(P);
-      if (TheDebugProbe)
-        TheDebugProbe->initialize(P, F);
+
       {
         PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
         TimeRegion PassTimer(getPassTimer(P));
 
         Changed |= P->runOnLoop(CurrentLoop, *this);
       }
-      if (TheDebugProbe)
-        TheDebugProbe->finalize(P, F);
 
       if (Changed)
         dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index fde07ea4f98d..22414b36d5a4 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -130,7 +130,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
         AliasAnalysis::Location Loc = AA.getLocation(LI);
         MDA.getNonLocalPointerDependency(Loc, true, LI->getParent(), NLDI);
       } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-        if (!LI->isUnordered()) {
+        if (!SI->isUnordered()) {
           // FIXME: Handle atomic/volatile stores.
           Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
                                            static_cast<BasicBlock *>(0)));
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 8d451c46f9b0..b145650b0f0a 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -48,10 +48,10 @@ static bool isMallocCall(const CallInst *CI) {
   // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
   // attribute will exist.
   FunctionType *FTy = Callee->getFunctionType();
-  if (FTy->getNumParams() != 1)
-    return false;
-  return FTy->getParamType(0)->isIntegerTy(32) ||
-         FTy->getParamType(0)->isIntegerTy(64);
+  return FTy->getReturnType() == Type::getInt8PtrTy(FTy->getContext()) &&
+         FTy->getNumParams() == 1 &&
+         (FTy->getParamType(0)->isIntegerTy(32) ||
+          FTy->getParamType(0)->isIntegerTy(64));
 }
 
 /// extractMallocCall - Returns the corresponding CallInst if the instruction
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 92967c08dc21..3a544f35d502 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Function.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -91,6 +92,7 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
 bool MemoryDependenceAnalysis::runOnFunction(Function &) {
   AA = &getAnalysis<AliasAnalysis>();
   TD = getAnalysisIfAvailable<TargetData>();
+  DT = getAnalysisIfAvailable<DominatorTree>();
   if (PredCache == 0)
     PredCache.reset(new PredIteratorCache());
   return false;
@@ -321,14 +323,100 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs,
         !TD.fitsInLegalInteger(NewLoadByteSize*8))
       return 0;
 
+    if (LIOffs+NewLoadByteSize > MemLocEnd &&
+        LI->getParent()->getParent()->hasFnAttr(Attribute::AddressSafety)) {
+      // We will be reading past the location accessed by the original program.
+      // While this is safe in a regular build, Address Safety analysis tools
+      // may start reporting false warnings. So, don't do widening.
+      return 0;
+    }
+
     // If a load of this width would include all of MemLoc, then we succeed.
     if (LIOffs+NewLoadByteSize >= MemLocEnd)
       return NewLoadByteSize;
     
     NewLoadByteSize <<= 1;
   }
-  
-  return 0;
+}
+
+namespace {
+  /// Only find pointer captures which happen before the given instruction. Uses
+  /// the dominator tree to determine whether one instruction is before another.
+  struct CapturesBefore : public CaptureTracker {
+    CapturesBefore(const Instruction *I, DominatorTree *DT)
+      : BeforeHere(I), DT(DT), Captured(false) {}
+
+    void tooManyUses() { Captured = true; }
+
+    bool shouldExplore(Use *U) {
+      Instruction *I = cast<Instruction>(U->getUser());
+      BasicBlock *BB = I->getParent();
+      if (BeforeHere != I &&
+          (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I)))
+        return false;
+      return true;
+    }
+
+    bool captured(Use *U) {
+      Instruction *I = cast<Instruction>(U->getUser());
+      BasicBlock *BB = I->getParent();
+      if (BeforeHere != I &&
+          (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I)))
+        return false;
+      Captured = true;
+      return true;
+    }
+
+    const Instruction *BeforeHere;
+    DominatorTree *DT;
+
+    bool Captured;
+  };
+}
+
+AliasAnalysis::ModRefResult
+MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst,
+                                        const AliasAnalysis::Location &MemLoc) {
+  AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
+  if (MR != AliasAnalysis::ModRef) return MR;
+
+  // FIXME: this is really just shoring-up a deficiency in alias analysis.
+  // BasicAA isn't willing to spend linear time determining whether an alloca
+  // was captured before or after this particular call, while we are. However,
+  // with a smarter AA in place, this test is just wasting compile time.
+  if (!DT) return AliasAnalysis::ModRef;
+  const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD);
+  if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object))
+    return AliasAnalysis::ModRef;
+  ImmutableCallSite CS(Inst);
+  if (!CS.getInstruction()) return AliasAnalysis::ModRef;
+
+  CapturesBefore CB(Inst, DT);
+  llvm::PointerMayBeCaptured(Object, &CB);
+
+  if (isa<Constant>(Object) || CS.getInstruction() == Object || CB.Captured)
+    return AliasAnalysis::ModRef;
+
+  unsigned ArgNo = 0;
+  for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+       CI != CE; ++CI, ++ArgNo) {
+    // Only look at the no-capture or byval pointer arguments.  If this
+    // pointer were passed to arguments that were neither of these, then it
+    // couldn't be no-capture.
+    if (!(*CI)->getType()->isPointerTy() ||
+        (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+      continue;
+
+    // If this is a no-capture pointer argument, see if we can tell that it
+    // is impossible to alias the pointer we're checking.  If not, we have to
+    // assume that the call could touch the pointer, even though it doesn't
+    // escape.
+    if (!AA->isNoAlias(AliasAnalysis::Location(*CI),
+                       AliasAnalysis::Location(Object))) {
+      return AliasAnalysis::ModRef;
+    }
+  }
+  return AliasAnalysis::NoModRef;
 }
 
 /// getPointerDependencyFrom - Return the instruction on which a memory
@@ -478,7 +566,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     }
 
     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
-    switch (AA->getModRefInfo(Inst, MemLoc)) {
+    switch (getModRefInfo(Inst, MemLoc)) {
     case AliasAnalysis::NoModRef:
       // If the call has no effect on the queried pointer, just ignore it.
       continue;
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 7e22ddc61c09..38cb1c91f8f8 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/Dominators.h"
@@ -27,7 +28,7 @@ static bool CanPHITrans(Instruction *Inst) {
     return true;
 
   if (isa<CastInst>(Inst) &&
-      Inst->isSafeToSpeculativelyExecute())
+      isSafeToSpeculativelyExecute(Inst))
     return true;
 
   if (Inst->getOpcode() == Instruction::Add &&
@@ -73,7 +74,6 @@ static bool VerifySubExpr(Value *Expr,
     errs() << *I << '\n';
     llvm_unreachable("Either something is missing from InstInputs or "
                      "CanPHITrans is wrong.");
-    return false;
   }
 
   // Validate the operands of the instruction.
@@ -100,7 +100,6 @@ bool PHITransAddr::Verify() const {
     for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
       errs() << "  InstInput #" << i << " is " << *InstInputs[i] << "\n";
     llvm_unreachable("This is unexpected.");
-    return false;
   }
 
   // a-ok.
@@ -186,7 +185,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
   // operands need to be phi translated, and if so, reconstruct it.
 
   if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
-    if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+    if (!isSafeToSpeculativelyExecute(Cast)) return 0;
     Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
     if (PHIIn == 0) return 0;
     if (PHIIn == Cast->getOperand(0))
@@ -228,7 +227,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
       return GEP;
 
     // Simplify the GEP to handle 'gep x, 0' -> x etc.
-    if (Value *V = SimplifyGEPInst(GEPOps, TD, DT)) {
+    if (Value *V = SimplifyGEPInst(GEPOps, TD, TLI, DT)) {
       for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
         RemoveInstInputs(GEPOps[i], InstInputs);
 
@@ -284,7 +283,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
         }
 
     // See if the add simplifies away.
-    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) {
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, TLI, DT)) {
       // If we simplified the operands, the LHS is no longer an input, but Res
       // is.
       RemoveInstInputs(LHS, InstInputs);
@@ -381,7 +380,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
 
   // Handle cast of PHI translatable value.
   if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
-    if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+    if (!isSafeToSpeculativelyExecute(Cast)) return 0;
     Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
                                               CurBB, PredBB, DT, NewInsts);
     if (OpVal == 0) return 0;
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
index 0e3b6e69ce34..80c5222a27a5 100644
--- a/lib/Analysis/PathNumbering.cpp
+++ b/lib/Analysis/PathNumbering.cpp
@@ -386,8 +386,8 @@ void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
     }
 
     TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
-    if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator)
-       || isa<ResumeInst>(terminator) || isa<UnwindInst>(terminator))
+    if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator) ||
+       isa<ResumeInst>(terminator))
       addEdge(currentNode, getExit(),0);
 
     currentNode->setColor(BallLarusNode::GRAY);
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
index 0ae734e259db..0fcdfe75aefd 100644
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ b/lib/Analysis/PathProfileVerifier.cpp
@@ -137,22 +137,22 @@ bool PathProfileVerifier::runOnModule (Module &M) {
         BasicBlock* source = nextEdge->getSource();
         BasicBlock* target = nextEdge->getTarget();
         unsigned duplicateNumber = nextEdge->getDuplicateNumber();
-        DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber
-              << "}--> " << target->getNameStr());
+        DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber
+                     << "}--> " << target->getName());
 
         // Ensure all the referenced edges exist
         // TODO: make this a separate function
         if( !arrayMap.count(source) ) {
-          errs() << "  error [" << F->getNameStr() << "()]: source '"
-                 << source->getNameStr()
+          errs() << "  error [" << F->getName() << "()]: source '"
+                 << source->getName()
                  << "' does not exist in the array map.\n";
         } else if( !arrayMap[source].count(target) ) {
-          errs() << "  error [" << F->getNameStr() << "()]: target '"
-                 << target->getNameStr()
+          errs() << "  error [" << F->getName() << "()]: target '"
+                 << target->getName()
                  << "' does not exist in the array map.\n";
         } else if( !arrayMap[source][target].count(duplicateNumber) ) {
-          errs() << "  error [" << F->getNameStr() << "()]: edge "
-                 << source->getNameStr() << " -> " << target->getNameStr()
+          errs() << "  error [" << F->getName() << "()]: edge "
+                 << source->getName() << " -> " << target->getName()
                  << " duplicate number " << duplicateNumber
                  << " does not exist in the array map.\n";
         } else {
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index b594e2ba5506..63468f842612 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -332,7 +332,7 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) {
   // Clear Minimal Edges.
   MinimalWeight.clear();
 
-  DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n");
+  DEBUG(dbgs() << "Working on function " << F.getName() << "\n");
 
   // Since the entry block is the first one and has no predecessors, the edge
   // (0,entry) is inserted with the starting weight of 1.
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 098079bcffc4..c4da8079a511 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -160,7 +160,7 @@ bool LoaderPass::runOnModule(Module &M) {
     ReadCount = 0;
     for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
       if (F->isDeclaration()) continue;
-      DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+      DEBUG(dbgs() << "Working on " << F->getName() << "\n");
       readEdge(getEdge(0,&F->getEntryBlock()), Counters);
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
         TerminatorInst *TI = BB->getTerminator();
@@ -181,7 +181,7 @@ bool LoaderPass::runOnModule(Module &M) {
     ReadCount = 0;
     for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
       if (F->isDeclaration()) continue;
-      DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+      DEBUG(dbgs() << "Working on " << F->getName() << "\n");
       readEdge(getEdge(0,&F->getEntryBlock()), Counters);
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
         TerminatorInst *TI = BB->getTerminator();
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index a01751849c51..0cb158865afe 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -30,7 +30,7 @@ static cl::opt<bool,false>
 ProfileVerifierDisableAssertions("profile-verifier-noassert",
      cl::desc("Disable assertions"));
 
-namespace llvm {
+namespace {
   template<class FType, class BType>
   class ProfileVerifierPassT : public FunctionPass {
 
@@ -125,8 +125,8 @@ namespace llvm {
         outCount++;
       }
     }
-    dbgs() << "Block " << BB->getNameStr()                << " in " 
-           << BB->getParent()->getNameStr()               << ":"
+    dbgs() << "Block " << BB->getName()                   << " in "
+           << BB->getParent()->getName()                  << ":"
            << "BBWeight="  << format("%20.20g",BBWeight)  << ","
            << "inWeight="  << format("%20.20g",inWeight)  << ","
            << "inCount="   << inCount                     << ","
@@ -143,8 +143,8 @@ namespace llvm {
 
   template<class FType, class BType>
   void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
-    dbgs() << "TROUBLE: Block " << DI->BB->getNameStr()       << " in "
-           << DI->BB->getParent()->getNameStr()               << ":"
+    dbgs() << "TROUBLE: Block " << DI->BB->getName()          << " in "
+           << DI->BB->getParent()->getName()                  << ":"
            << "BBWeight="  << format("%20.20g",DI->BBWeight)  << ","
            << "inWeight="  << format("%20.20g",DI->inWeight)  << ","
            << "inCount="   << DI->inCount                     << ","
@@ -201,13 +201,13 @@ namespace llvm {
     double EdgeWeight = PI->getEdgeWeight(E);
     if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
       dbgs() << "Edge " << E << " in Function " 
-             << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+             << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
       ASSERTMESSAGE("Edge has missing value");
       return 0;
     } else {
       if (EdgeWeight < 0) {
         dbgs() << "Edge " << E << " in Function " 
-               << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+               << ProfileInfoT<FType, BType>::getFunction(E)->getName() << ": ";
         ASSERTMESSAGE("Edge has negative value");
       }
       return EdgeWeight;
@@ -220,8 +220,8 @@ namespace llvm {
                                                       DetailedBlockInfo *DI) {
     if (Error) {
       DEBUG(debugEntry(DI));
-      dbgs() << "Block " << DI->BB->getNameStr() << " in Function " 
-             << DI->BB->getParent()->getNameStr() << ": ";
+      dbgs() << "Block " << DI->BB->getName() << " in Function "
+             << DI->BB->getParent()->getName() << ": ";
       ASSERTMESSAGE(Message);
     }
     return;
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 52753cbe85af..b507b1e340f5 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -186,18 +186,16 @@ std::string Region::getNameStr() const {
     raw_string_ostream OS(entryName);
 
     WriteAsOperand(OS, getEntry(), false);
-    entryName = OS.str();
   } else
-    entryName = getEntry()->getNameStr();
+    entryName = getEntry()->getName();
 
   if (getExit()) {
     if (getExit()->getName().empty()) {
       raw_string_ostream OS(exitName);
 
       WriteAsOperand(OS, getExit(), false);
-      exitName = OS.str();
     } else
-      exitName = getExit()->getNameStr();
+      exitName = getExit()->getName();
   } else
     exitName = "<Function Return>";
 
@@ -652,7 +650,7 @@ void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) {
   // This basic block is a start block of a region. It is already in the
   // BBtoRegion relation. Only the child basic blocks have to be updated.
   if (it != BBtoRegion.end()) {
-    Region *newRegion = it->second;;
+    Region *newRegion = it->second;
     region->addSubRegion(getTopMostParent(newRegion));
     region = newRegion;
   } else {
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index e0ac56c65e76..1d55642079a0 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -74,6 +74,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
@@ -108,6 +109,7 @@ INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
                 "Scalar Evolution Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
                 "Scalar Evolution Analysis", false, true)
 char ScalarEvolution::ID = 0;
@@ -188,6 +190,14 @@ void SCEV::print(raw_ostream &OS) const {
         OS << OpStr;
     }
     OS << ")";
+    switch (NAry->getSCEVType()) {
+    case scAddExpr:
+    case scMulExpr:
+      if (NAry->getNoWrapFlags(FlagNUW))
+        OS << "<nuw>";
+      if (NAry->getNoWrapFlags(FlagNSW))
+        OS << "<nsw>";
+    }
     return;
   }
   case scUDivExpr: {
@@ -249,11 +259,9 @@ Type *SCEV::getType() const {
     return cast<SCEVUnknown>(this)->getType();
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return 0;
-  default: break;
+  default:
+    llvm_unreachable("Unknown SCEV kind!");
   }
-  llvm_unreachable("Unknown SCEV kind!");
-  return 0;
 }
 
 bool SCEV::isZero() const {
@@ -274,6 +282,20 @@ bool SCEV::isAllOnesValue() const {
   return false;
 }
 
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+bool SCEV::isNonConstantNegative() const {
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this);
+  if (!Mul) return false;
+
+  // If there is a constant factor, it will be first.
+  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+  if (!SC) return false;
+
+  // Return true if the value is negative, this matches things like (-42 * V).
+  return SC->getValue()->getValue().isNegative();
+}
+
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
   SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
 
@@ -587,11 +609,8 @@ namespace {
       }
 
       default:
-        break;
+        llvm_unreachable("Unknown SCEV kind!");
       }
-
-      llvm_unreachable("Unknown SCEV kind!");
-      return 0;
     }
   };
 }
@@ -2581,7 +2600,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
 
   Constant *C = ConstantExpr::getSizeOf(AllocTy);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
   return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2590,7 +2609,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) {
 const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) {
   Constant *C = ConstantExpr::getAlignOf(AllocTy);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
   return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2607,7 +2626,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy,
 
   Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
   return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -2617,7 +2636,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy,
                                              Constant *FieldNo) {
   Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI))
       C = Folded;
   Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
   return getTruncateOrZeroExtend(getSCEV(C), Ty);
@@ -3108,7 +3127,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
   // PHI's incoming blocks are in a different loop, in which case doing so
   // risks breaking LCSSA form. Instcombine would normally zap these, but
   // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V = SimplifyInstruction(PN, TD, DT))
+  if (Value *V = SimplifyInstruction(PN, TD, TLI, DT))
     if (LI->replacementPreservesLCSSAForm(PN, V))
       return getSCEV(V);
 
@@ -3168,7 +3187,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
 
   // Add the total offset from all the GEP indices to the base.
   return getAddExpr(BaseS, TotalOffset,
-                    isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap);
+                    isInBounds ? SCEV::FlagNUW : SCEV::FlagAnyWrap);
 }
 
 /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -3242,9 +3261,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
     unsigned BitWidth = getTypeSizeInBits(U->getType());
-    APInt Mask = APInt::getAllOnesValue(BitWidth);
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+    ComputeMaskedBits(U->getValue(), Zeros, Ones);
     return Zeros.countTrailingOnes();
   }
 
@@ -3382,9 +3400,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
-    APInt Mask = APInt::getAllOnesValue(BitWidth);
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+    ComputeMaskedBits(U->getValue(), Zeros, Ones, TD);
     if (Ones == ~Zeros + 1)
       return setUnsignedRange(U, ConservativeResult);
     return setUnsignedRange(U,
@@ -3584,6 +3601,12 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     // because it leads to N-1 getAddExpr calls for N ultimate operands.
     // Instead, gather up all the operands and make a single getAddExpr call.
     // LLVM IR canonical form means we need only traverse the left operands.
+    //
+    // Don't apply this instruction's NSW or NUW flags to the new
+    // expression. The instruction may be guarded by control flow that the
+    // no-wrap behavior depends on. Non-control-equivalent instructions can be
+    // mapped to the same SCEV expression, and it would be incorrect to transfer
+    // NSW/NUW semantics to those operations.
     SmallVector<const SCEV *, 4> AddOps;
     AddOps.push_back(getSCEV(U->getOperand(1)));
     for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
@@ -3598,16 +3621,10 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
         AddOps.push_back(Op1);
     }
     AddOps.push_back(getSCEV(U->getOperand(0)));
-    SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
-    OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(V);
-    if (OBO->hasNoSignedWrap())
-      setFlags(Flags, SCEV::FlagNSW);
-    if (OBO->hasNoUnsignedWrap())
-      setFlags(Flags, SCEV::FlagNUW);
-    return getAddExpr(AddOps, Flags);
+    return getAddExpr(AddOps);
   }
   case Instruction::Mul: {
-    // See the Add code above.
+    // Don't transfer NSW/NUW for the same reason as AddExpr.
     SmallVector<const SCEV *, 4> MulOps;
     MulOps.push_back(getSCEV(U->getOperand(1)));
     for (Value *Op = U->getOperand(0);
@@ -3641,9 +3658,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       // knew about to reconstruct a low-bits mask value.
       unsigned LZ = A.countLeadingZeros();
       unsigned BitWidth = A.getBitWidth();
-      APInt AllOnes = APInt::getAllOnesValue(BitWidth);
       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
+      ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD);
 
       APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
 
@@ -3915,13 +3931,19 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
 //
 
 /// getSmallConstantTripCount - Returns the maximum trip count of this loop as a
-/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
-/// or not constant. Will also return 0 if the maximum trip count is very large
-/// (>= 2^32)
-unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
-                                                    BasicBlock *ExitBlock) {
+/// normal unsigned value. Returns 0 if the trip count is unknown or not
+/// constant. Will also return 0 if the maximum trip count is very large (>=
+/// 2^32).
+///
+/// This "trip count" assumes that control exits via ExitingBlock. More
+/// precisely, it is the number of times that control may reach ExitingBlock
+/// before taking the branch. For loops with multiple exits, it may not be the
+/// number times that the loop header executes because the loop may exit
+/// prematurely via another branch.
+unsigned ScalarEvolution::
+getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock) {
   const SCEVConstant *ExitCount =
-    dyn_cast<SCEVConstant>(getExitCount(L, ExitBlock));
+    dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
   if (!ExitCount)
     return 0;
 
@@ -3944,9 +3966,12 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
 /// multiple of a constant (which is also the case if the trip count is simply
 /// constant, use getSmallConstantTripCount for that case), Will also return 1
 /// if the trip count is very large (>= 2^32).
-unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
-                                                       BasicBlock *ExitBlock) {
-  const SCEV *ExitCount = getExitCount(L, ExitBlock);
+///
+/// As explained in the comments for getSmallConstantTripCount, this assumes
+/// that control exits the loop via ExitingBlock.
+unsigned ScalarEvolution::
+getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock) {
+  const SCEV *ExitCount = getExitCount(L, ExitingBlock);
   if (ExitCount == getCouldNotCompute())
     return 1;
 
@@ -4153,13 +4178,19 @@ void ScalarEvolution::forgetValue(Value *V) {
 }
 
 /// getExact - Get the exact loop backedge taken count considering all loop
-/// exits. If all exits are computable, this is the minimum computed count.
+/// exits. A computable result can only be return for loops with a single exit.
+/// Returning the minimum taken count among all exits is incorrect because one
+/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
+/// the limit of each loop test is never skipped. This is a valid assumption as
+/// long as the loop exits via that test. For precise results, it is the
+/// caller's responsibility to specify the relevant loop exit using
+/// getExact(ExitingBlock, SE).
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
   // If any exits were not computable, the loop is not computable.
   if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
 
-  // We need at least one computable exit.
+  // We need exactly one computable exit.
   if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
   assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
 
@@ -4171,8 +4202,8 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
 
     if (!BECount)
       BECount = ENT->ExactNotTaken;
-    else
-      BECount = SE->getUMinFromMismatchedTypes(BECount, ENT->ExactNotTaken);
+    else if (BECount != ENT->ExactNotTaken)
+      return SE->getCouldNotCompute();
   }
   assert(BECount && "Invalid not taken count for loop exit");
   return BECount;
@@ -4253,8 +4284,15 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
 
     if (MaxBECount == getCouldNotCompute())
       MaxBECount = EL.Max;
-    else if (EL.Max != getCouldNotCompute())
-      MaxBECount = getUMinFromMismatchedTypes(MaxBECount, EL.Max);
+    else if (EL.Max != getCouldNotCompute()) {
+      // We cannot take the "min" MaxBECount, because non-unit stride loops may
+      // skip some loop tests. Taking the max over the exits is sufficiently
+      // conservative.  TODO: We could do better taking into consideration
+      // that (1) the loop has unit stride (2) the last loop test is
+      // less-than/greater-than (3) any loop test is less-than/greater-than AND
+      // falls-through some constant times less then the other tests.
+      MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
+    }
   }
 
   return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
@@ -4539,40 +4577,6 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
   return cast<SCEVConstant>(Val)->getValue();
 }
 
-/// GetAddressedElementFromGlobal - Given a global variable with an initializer
-/// and a GEP expression (missing the pointer index) indexing into it, return
-/// the addressed element of the initializer or null if the index expression is
-/// invalid.
-static Constant *
-GetAddressedElementFromGlobal(GlobalVariable *GV,
-                              const std::vector<ConstantInt*> &Indices) {
-  Constant *Init = GV->getInitializer();
-  for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
-    uint64_t Idx = Indices[i]->getZExtValue();
-    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
-      assert(Idx < CS->getNumOperands() && "Bad struct index!");
-      Init = cast<Constant>(CS->getOperand(Idx));
-    } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
-      if (Idx >= CA->getNumOperands()) return 0;  // Bogus program
-      Init = cast<Constant>(CA->getOperand(Idx));
-    } else if (isa<ConstantAggregateZero>(Init)) {
-      if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
-        assert(Idx < STy->getNumElements() && "Bad struct index!");
-        Init = Constant::getNullValue(STy->getElementType(Idx));
-      } else if (ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
-        if (Idx >= ATy->getNumElements()) return 0;  // Bogus program
-        Init = Constant::getNullValue(ATy->getElementType());
-      } else {
-        llvm_unreachable("Unknown constant aggregate type!");
-      }
-      return 0;
-    } else {
-      return 0; // Unknown initializer type
-    }
-  }
-  return Init;
-}
-
 /// ComputeLoadConstantCompareExitLimit - Given an exit condition of
 /// 'icmp op load X, cst', try to see if we can compute the backedge
 /// execution count.
@@ -4600,7 +4604,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
 
   // Okay, we allow one non-constant index into the GEP instruction.
   Value *VarIdx = 0;
-  std::vector<ConstantInt*> Indexes;
+  std::vector<Constant*> Indexes;
   unsigned VarIdxNum = 0;
   for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
     if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
@@ -4612,6 +4616,10 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
       Indexes.push_back(0);
     }
 
+  // Loop-invariant loads may be a byproduct of loop optimization. Skip them.
+  if (!VarIdx)
+    return getCouldNotCompute();
+
   // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
   // Check to see if X is a loop variant variable value now.
   const SCEV *Idx = getSCEV(VarIdx);
@@ -4634,7 +4642,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
     // Form the GEP offset.
     Indexes[VarIdxNum] = Val;
 
-    Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+    Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
+                                                         Indexes);
     if (Result == 0) break;  // Cannot compute!
 
     // Evaluate the condition for this iteration.
@@ -4658,7 +4667,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
 /// specified type, assuming that all operands were constants.
 static bool CanConstantFold(const Instruction *I) {
   if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
-      isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I))
+      isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) ||
+      isa<LoadInst>(I))
     return true;
 
   if (const CallInst *CI = dyn_cast<CallInst>(I))
@@ -4748,16 +4758,23 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
 /// reason, return null.
 static Constant *EvaluateExpression(Value *V, const Loop *L,
                                     DenseMap<Instruction *, Constant *> &Vals,
-                                    const TargetData *TD) {
+                                    const TargetData *TD,
+                                    const TargetLibraryInfo *TLI) {
   // Convenient constant check, but redundant for recursive calls.
   if (Constant *C = dyn_cast<Constant>(V)) return C;
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return 0;
 
-  Instruction *I = cast<Instruction>(V);
   if (Constant *C = Vals.lookup(I)) return C;
 
-  assert(!isa<PHINode>(I) && "loop header phis should be mapped to constant");
-  assert(canConstantEvolve(I, L) && "cannot evaluate expression in this loop");
-  (void)L;
+  // An instruction inside the loop depends on a value outside the loop that we
+  // weren't given a mapping for, or a value such as a call inside the loop.
+  if (!canConstantEvolve(I, L)) return 0;
+
+  // An unmapped PHI can be due to a branch or another loop inside this loop,
+  // or due to this not being the initial iteration through a loop where we
+  // couldn't compute the evolution of this particular PHI last time.
+  if (isa<PHINode>(I)) return 0;
 
   std::vector<Constant*> Operands(I->getNumOperands());
 
@@ -4768,16 +4785,21 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
       if (!Operands[i]) return 0;
       continue;
     }
-    Constant *C = EvaluateExpression(Operand, L, Vals, TD);
+    Constant *C = EvaluateExpression(Operand, L, Vals, TD, TLI);
     Vals[Operand] = C;
     if (!C) return 0;
     Operands[i] = C;
   }
 
-  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+  if (CmpInst *CI = dyn_cast<CmpInst>(I))
     return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
-                                           Operands[1], TD);
-  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD);
+                                           Operands[1], TD, TLI);
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (!LI->isVolatile())
+      return ConstantFoldLoadFromConstPtr(Operands[0], TD);
+  }
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD,
+                                  TLI);
 }
 
 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
@@ -4798,23 +4820,26 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
 
   Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
 
-  // FIXME: Nick's fix for PR11034 will seed constants for multiple header phis.
   DenseMap<Instruction *, Constant *> CurrentIterVals;
+  BasicBlock *Header = L->getHeader();
+  assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
 
   // Since the loop is canonicalized, the PHI node must have two entries.  One
   // entry must be a constant (coming in from outside of the loop), and the
   // second must be derived from the same PHI.
   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  Constant *StartCST =
-    dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
-  if (StartCST == 0)
-    return RetVal = 0;  // Must be a constant.
-  CurrentIterVals[PN] = StartCST;
+  PHINode *PHI = 0;
+  for (BasicBlock::iterator I = Header->begin();
+       (PHI = dyn_cast<PHINode>(I)); ++I) {
+    Constant *StartCST =
+      dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+    if (StartCST == 0) continue;
+    CurrentIterVals[PHI] = StartCST;
+  }
+  if (!CurrentIterVals.count(PN))
+    return RetVal = 0;
 
   Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
-  if (getConstantEvolvingPHI(BEValue, L) != PN &&
-      !isa<Constant>(BEValue))
-    return RetVal = 0;  // Not derived from same PHI.
 
   // Execute the loop symbolically to determine the exit value.
   if (BEs.getActiveBits() >= 32)
@@ -4826,15 +4851,46 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
     if (IterationNum == NumIterations)
       return RetVal = CurrentIterVals[PN];  // Got exit value!
 
-    // Compute the value of the PHI node for the next iteration.
+    // Compute the value of the PHIs for the next iteration.
     // EvaluateExpression adds non-phi values to the CurrentIterVals map.
-    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD);
-    if (NextPHI == CurrentIterVals[PN])
-      return RetVal = NextPHI;  // Stopped evolving!
+    DenseMap<Instruction *, Constant *> NextIterVals;
+    Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD,
+                                           TLI);
     if (NextPHI == 0)
       return 0;        // Couldn't evaluate!
-    DenseMap<Instruction *, Constant *> NextIterVals;
     NextIterVals[PN] = NextPHI;
+
+    bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
+
+    // Also evaluate the other PHI nodes.  However, we don't get to stop if we
+    // cease to be able to evaluate one of them or if they stop evolving,
+    // because that doesn't necessarily prevent us from computing PN.
+    SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
+    for (DenseMap<Instruction *, Constant *>::const_iterator
+           I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
+      PHINode *PHI = dyn_cast<PHINode>(I->first);
+      if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
+      PHIsToCompute.push_back(std::make_pair(PHI, I->second));
+    }
+    // We use two distinct loops because EvaluateExpression may invalidate any
+    // iterators into CurrentIterVals.
+    for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
+             I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
+      PHINode *PHI = I->first;
+      Constant *&NextPHI = NextIterVals[PHI];
+      if (!NextPHI) {   // Not already computed.
+        Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
+        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+      }
+      if (NextPHI != I->second)
+        StoppedEvolving = false;
+    }
+
+    // If all entries in CurrentIterVals == NextIterVals then we can stop
+    // iterating, the loop can't continue to change.
+    if (StoppedEvolving)
+      return RetVal = CurrentIterVals[PN];
+
     CurrentIterVals.swap(NextIterVals);
   }
 }
@@ -4844,9 +4900,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
 /// try to evaluate a few iterations of the loop until we get the exit
 /// condition gets a value of ExitWhen (true or false).  If we cannot
 /// evaluate the trip count of the loop, return getCouldNotCompute().
-const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
-                                                           Value *Cond,
-                                                           bool ExitWhen) {
+const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
+                                                          Value *Cond,
+                                                          bool ExitWhen) {
   PHINode *PN = getConstantEvolvingPHI(Cond, L);
   if (PN == 0) return getCouldNotCompute();
 
@@ -4854,29 +4910,33 @@ const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
   // That's the only form we support here.
   if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
 
+  DenseMap<Instruction *, Constant *> CurrentIterVals;
+  BasicBlock *Header = L->getHeader();
+  assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
+
   // One entry must be a constant (coming in from outside of the loop), and the
   // second must be derived from the same PHI.
   bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  Constant *StartCST =
-    dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
-  if (StartCST == 0) return getCouldNotCompute();  // Must be a constant.
-
-  Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
-  if (getConstantEvolvingPHI(BEValue, L) != PN &&
-      !isa<Constant>(BEValue))
-    return getCouldNotCompute();  // Not derived from same PHI.
+  PHINode *PHI = 0;
+  for (BasicBlock::iterator I = Header->begin();
+       (PHI = dyn_cast<PHINode>(I)); ++I) {
+    Constant *StartCST =
+      dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+    if (StartCST == 0) continue;
+    CurrentIterVals[PHI] = StartCST;
+  }
+  if (!CurrentIterVals.count(PN))
+    return getCouldNotCompute();
 
   // Okay, we find a PHI node that defines the trip count of this loop.  Execute
   // the loop symbolically to determine when the condition gets a value of
   // "ExitWhen".
-  unsigned IterationNum = 0;
+
   unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
-  for (Constant *PHIVal = StartCST;
-       IterationNum != MaxIterations; ++IterationNum) {
-    DenseMap<Instruction *, Constant *> PHIValMap;
-    PHIValMap[PN] = PHIVal;
+  for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
     ConstantInt *CondVal =
-      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, PHIValMap, TD));
+      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, L, CurrentIterVals,
+                                                       TD, TLI));
 
     // Couldn't symbolically evaluate.
     if (!CondVal) return getCouldNotCompute();
@@ -4886,11 +4946,29 @@ const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
       return getConstant(Type::getInt32Ty(getContext()), IterationNum);
     }
 
-    // Compute the value of the PHI node for the next iteration.
-    Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD);
-    if (NextPHI == 0 || NextPHI == PHIVal)
-      return getCouldNotCompute();// Couldn't evaluate or not making progress...
-    PHIVal = NextPHI;
+    // Update all the PHI nodes for the next iteration.
+    DenseMap<Instruction *, Constant *> NextIterVals;
+
+    // Create a list of which PHIs we need to compute. We want to do this before
+    // calling EvaluateExpression on them because that may invalidate iterators
+    // into CurrentIterVals.
+    SmallVector<PHINode *, 8> PHIsToCompute;
+    for (DenseMap<Instruction *, Constant *>::const_iterator
+           I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
+      PHINode *PHI = dyn_cast<PHINode>(I->first);
+      if (!PHI || PHI->getParent() != Header) continue;
+      PHIsToCompute.push_back(PHI);
+    }
+    for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
+             E = PHIsToCompute.end(); I != E; ++I) {
+      PHINode *PHI = *I;
+      Constant *&NextPHI = NextIterVals[PHI];
+      if (NextPHI) continue;    // Already computed!
+
+      Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
+      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD, TLI);
+    }
+    CurrentIterVals.swap(NextIterVals);
   }
 
   // Too many iterations were needed to evaluate.
@@ -4921,6 +4999,98 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
   return C;
 }
 
+/// This builds up a Constant using the ConstantExpr interface.  That way, we
+/// will return Constants for objects which aren't represented by a
+/// SCEVConstant, because SCEVConstant is restricted to ConstantInt.
+/// Returns NULL if the SCEV isn't representable as a Constant.
+static Constant *BuildConstantFromSCEV(const SCEV *V) {
+  switch (V->getSCEVType()) {
+    default:  // TODO: smax, umax.
+    case scCouldNotCompute:
+    case scAddRecExpr:
+      break;
+    case scConstant:
+      return cast<SCEVConstant>(V)->getValue();
+    case scUnknown:
+      return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue());
+    case scSignExtend: {
+      const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V);
+      if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand()))
+        return ConstantExpr::getSExt(CastOp, SS->getType());
+      break;
+    }
+    case scZeroExtend: {
+      const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V);
+      if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand()))
+        return ConstantExpr::getZExt(CastOp, SZ->getType());
+      break;
+    }
+    case scTruncate: {
+      const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V);
+      if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand()))
+        return ConstantExpr::getTrunc(CastOp, ST->getType());
+      break;
+    }
+    case scAddExpr: {
+      const SCEVAddExpr *SA = cast<SCEVAddExpr>(V);
+      if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) {
+        if (C->getType()->isPointerTy())
+          C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext()));
+        for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
+          Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
+          if (!C2) return 0;
+
+          // First pointer!
+          if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
+            std::swap(C, C2);
+            // The offsets have been converted to bytes.  We can add bytes to an
+            // i8* by GEP with the byte count in the first index.
+            C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext()));
+          }
+
+          // Don't bother trying to sum two pointers. We probably can't
+          // statically compute a load that results from it anyway.
+          if (C2->getType()->isPointerTy())
+            return 0;
+
+          if (C->getType()->isPointerTy()) {
+            if (cast<PointerType>(C->getType())->getElementType()->isStructTy())
+              C2 = ConstantExpr::getIntegerCast(
+                  C2, Type::getInt32Ty(C->getContext()), true);
+            C = ConstantExpr::getGetElementPtr(C, C2);
+          } else
+            C = ConstantExpr::getAdd(C, C2);
+        }
+        return C;
+      }
+      break;
+    }
+    case scMulExpr: {
+      const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
+      if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
+        // Don't bother with pointers at all.
+        if (C->getType()->isPointerTy()) return 0;
+        for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
+          Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
+          if (!C2 || C2->getType()->isPointerTy()) return 0;
+          C = ConstantExpr::getMul(C, C2);
+        }
+        return C;
+      }
+      break;
+    }
+    case scUDivExpr: {
+      const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V);
+      if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS()))
+        if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS()))
+          if (LHS->getType() == RHS->getType())
+            return ConstantExpr::getUDiv(LHS, RHS);
+      break;
+    }
+  }
+  return 0;
+}
+
 const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
   if (isa<SCEVConstant>(V)) return V;
 
@@ -4973,11 +5143,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
           const SCEV *OpV = getSCEVAtScope(OrigV, L);
           MadeImprovement |= OrigV != OpV;
 
-          Constant *C = 0;
-          if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV))
-            C = SC->getValue();
-          if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV))
-            C = dyn_cast<Constant>(SU->getValue());
+          Constant *C = BuildConstantFromSCEV(OpV);
           if (!C) return V;
           if (C->getType() != Op->getType())
             C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
@@ -4992,10 +5158,14 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
           Constant *C = 0;
           if (const CmpInst *CI = dyn_cast<CmpInst>(I))
             C = ConstantFoldCompareInstOperands(CI->getPredicate(),
-                                                Operands[0], Operands[1], TD);
-          else
+                                                Operands[0], Operands[1], TD,
+                                                TLI);
+          else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+            if (!LI->isVolatile())
+              C = ConstantFoldLoadFromConstPtr(Operands[0], TD);
+          } else
             C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                         Operands, TD);
+                                         Operands, TD, TLI);
           if (!C) return V;
           return getSCEV(C);
         }
@@ -5113,7 +5283,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
   }
 
   llvm_unreachable("Unknown SCEV type!");
-  return 0;
 }
 
 /// getSCEVAtScope - This is a convenience function which does
@@ -5350,10 +5519,10 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
   // behavior. Loops must exhibit defined behavior until a wrapped value is
   // actually used. So the trip count computed by udiv could be smaller than the
   // number of well-defined iterations.
-  if (AddRec->getNoWrapFlags(SCEV::FlagNW))
+  if (AddRec->getNoWrapFlags(SCEV::FlagNW)) {
     // FIXME: We really want an "isexact" bit for udiv.
     return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-
+  }
   // Then, try to solve the above equation provided that Start is constant.
   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
     return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
@@ -5744,7 +5913,6 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
   switch (Pred) {
   default:
     llvm_unreachable("Unexpected ICmpInst::Predicate value!");
-    break;
   case ICmpInst::ICMP_SGT:
     Pred = ICmpInst::ICMP_SLT;
     std::swap(LHS, RHS);
@@ -6089,8 +6257,9 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     return getCouldNotCompute();
 
   // Check to see if we have a flag which makes analysis easy.
-  bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) :
-                           AddRec->getNoWrapFlags(SCEV::FlagNUW);
+  bool NoWrap = isSigned ?
+    AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) :
+    AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW));
 
   if (AddRec->isAffine()) {
     unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
@@ -6381,6 +6550,7 @@ bool ScalarEvolution::runOnFunction(Function &F) {
   this->F = &F;
   LI = &getAnalysis<LoopInfo>();
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
   DT = &getAnalysis<DominatorTree>();
   return false;
 }
@@ -6417,6 +6587,7 @@ void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequiredTransitive<LoopInfo>();
   AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequired<TargetLibraryInfo>();
 }
 
 bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
@@ -6592,11 +6763,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
     return LoopInvariant;
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return LoopVariant;
-  default: break;
+  default: llvm_unreachable("Unknown SCEV kind!");
   }
-  llvm_unreachable("Unknown SCEV kind!");
-  return LoopVariant;
 }
 
 bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
@@ -6678,11 +6846,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
     return ProperlyDominatesBlock;
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return DoesNotDominateBlock;
-  default: break;
+  default:
+    llvm_unreachable("Unknown SCEV kind!");
   }
-  llvm_unreachable("Unknown SCEV kind!");
-  return DoesNotDominateBlock;
 }
 
 bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
@@ -6728,11 +6894,9 @@ bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
     return false;
   case scCouldNotCompute:
     llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    return false;
-  default: break;
+  default:
+    llvm_unreachable("Unknown SCEV kind!");
   }
-  llvm_unreachable("Unknown SCEV kind!");
-  return false;
 }
 
 void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 47f0f321161b..69507beeaae9 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -19,6 +19,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/STLExtras.h"
 
 using namespace llvm;
@@ -30,6 +31,19 @@ using namespace llvm;
 Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
                                        Instruction::CastOps Op,
                                        BasicBlock::iterator IP) {
+  // This function must be called with the builder having a valid insertion
+  // point. It doesn't need to be the actual IP where the uses of the returned
+  // cast will be added, but it must dominate such IP.
+  // We use this precondition to produce a cast that will dominate all its
+  // uses. In particular, this is crucial for the case where the builder's
+  // insertion point *is* the point where we were asked to put the cast.
+  // Since we don't know the the builder's insertion point is actually
+  // where the uses will be added (only that it dominates it), we are
+  // not allowed to move it.
+  BasicBlock::iterator BIP = Builder.GetInsertPoint();
+
+  Instruction *Ret = NULL;
+
   // Check to see if there is already a cast!
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
        UI != E; ++UI) {
@@ -37,27 +51,35 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
     if (U->getType() == Ty)
       if (CastInst *CI = dyn_cast<CastInst>(U))
         if (CI->getOpcode() == Op) {
-          // If the cast isn't where we want it, fix it.
-          if (BasicBlock::iterator(CI) != IP) {
+          // If the cast isn't where we want it, create a new cast at IP.
+          // Likewise, do not reuse a cast at BIP because it must dominate
+          // instructions that might be inserted before BIP.
+          if (BasicBlock::iterator(CI) != IP || BIP == IP) {
             // Create a new cast, and leave the old cast in place in case
             // it is being used as an insert point. Clear its operand
             // so that it doesn't hold anything live.
-            Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP);
-            NewCI->takeName(CI);
-            CI->replaceAllUsesWith(NewCI);
+            Ret = CastInst::Create(Op, V, Ty, "", IP);
+            Ret->takeName(CI);
+            CI->replaceAllUsesWith(Ret);
             CI->setOperand(0, UndefValue::get(V->getType()));
-            rememberInstruction(NewCI);
-            return NewCI;
+            break;
           }
-          rememberInstruction(CI);
-          return CI;
+          Ret = CI;
+          break;
         }
   }
 
   // Create a new cast.
-  Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP);
-  rememberInstruction(I);
-  return I;
+  if (!Ret)
+    Ret = CastInst::Create(Op, V, Ty, V->getName(), IP);
+
+  // We assert at the end of the function since IP might point to an
+  // instruction with different dominance properties than a cast
+  // (an invoke for example) and not dominate BIP (but the cast does).
+  assert(SE.DT->dominates(Ret, BIP));
+
+  rememberInstruction(Ret);
+  return Ret;
 }
 
 /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
@@ -73,9 +95,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
          "InsertNoopCastOfTo cannot change sizes!");
 
   // Short-circuit unnecessary bitcasts.
-  if (Op == Instruction::BitCast && V->getType() == Ty)
-    return V;
-
+  if (Op == Instruction::BitCast) {
+    if (V->getType() == Ty)
+      return V;
+    if (CastInst *CI = dyn_cast<CastInst>(V)) {
+      if (CI->getOperand(0)->getType() == Ty)
+        return CI->getOperand(0);
+    }
+  }
   // Short-circuit unnecessary inttoptr<->ptrtoint casts.
   if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
       SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
@@ -115,8 +142,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
   BasicBlock::iterator IP = I; ++IP;
   if (InvokeInst *II = dyn_cast<InvokeInst>(I))
     IP = II->getNormalDest()->begin();
-  while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP) ||
-         isa<LandingPadInst>(IP))
+  while (isa<PHINode>(IP) || isa<LandingPadInst>(IP))
     ++IP;
   return ReuseOrCreateCast(I, Ty, Op, IP);
 }
@@ -492,6 +518,9 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     V = InsertNoopCastOfTo(V,
        Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
 
+    assert(!isa<Instruction>(V) ||
+           SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint()));
+
     // Expand the operands for a plain byte offset.
     Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
 
@@ -588,20 +617,6 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   return expand(SE.getAddExpr(Ops));
 }
 
-/// isNonConstantNegative - Return true if the specified scev is negated, but
-/// not a constant.
-static bool isNonConstantNegative(const SCEV *F) {
-  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F);
-  if (!Mul) return false;
-
-  // If there is a constant factor, it will be first.
-  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
-  if (!SC) return false;
-
-  // Return true if the value is negative, this matches things like (-42 * V).
-  return SC->getValue()->getValue().isNegative();
-}
-
 /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
 /// SCEV expansion. If they are nested, this is the most nested. If they are
 /// neighboring, pick the later.
@@ -655,7 +670,6 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
     return RelevantLoops[D] = Result;
   }
   llvm_unreachable("Unexpected SCEV type!");
-  return 0;
 }
 
 namespace {
@@ -680,10 +694,10 @@ public:
     // If one operand is a non-constant negative and the other is not,
     // put the non-constant negative on the right so that a sub can
     // be used instead of a negate and add.
-    if (isNonConstantNegative(LHS.second)) {
-      if (!isNonConstantNegative(RHS.second))
+    if (LHS.second->isNonConstantNegative()) {
+      if (!RHS.second->isNonConstantNegative())
         return false;
-    } else if (isNonConstantNegative(RHS.second))
+    } else if (RHS.second->isNonConstantNegative())
       return true;
 
     // Otherwise they are equivalent according to this comparison.
@@ -744,7 +758,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
       for (++I; I != E && I->first == CurLoop; ++I)
         NewOps.push_back(I->second);
       Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
-    } else if (isNonConstantNegative(Op)) {
+    } else if (Op->isNonConstantNegative()) {
       // Instead of doing a negate and add, just do a subtract.
       Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
       Sum = InsertNoopCastOfTo(Sum, Ty);
@@ -875,58 +889,138 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
   return isNormalAddRecExprPHI(PN, IncV, L);
 }
 
-/// Determine if this cyclic phi is in a form that would have been generated by
-/// LSR. We don't care if the phi was actually expanded in this pass, as long
-/// as it is in a low-cost form, for example, no implied multiplication. This
-/// should match any patterns generated by getAddRecExprPHILiterally and
-/// expandAddtoGEP.
-bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
-                                           const Loop *L) {
+/// getIVIncOperand returns an induction variable increment's induction
+/// variable operand.
+///
+/// If allowScale is set, any type of GEP is allowed as long as the nonIV
+/// operands dominate InsertPos.
+///
+/// If allowScale is not set, ensure that a GEP increment conforms to one of the
+/// simple patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP. If the pattern isn't recognized, return NULL.
+Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
+                                           Instruction *InsertPos,
+                                           bool allowScale) {
+  if (IncV == InsertPos)
+    return NULL;
+
   switch (IncV->getOpcode()) {
+  default:
+    return NULL;
   // Check for a simple Add/Sub or GEP of a loop invariant step.
   case Instruction::Add:
-  case Instruction::Sub:
-    return IncV->getOperand(0) == PN
-      && L->isLoopInvariant(IncV->getOperand(1));
+  case Instruction::Sub: {
+    Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
+    if (!OInst || SE.DT->dominates(OInst, InsertPos))
+      return dyn_cast<Instruction>(IncV->getOperand(0));
+    return NULL;
+  }
   case Instruction::BitCast:
-    IncV = dyn_cast<GetElementPtrInst>(IncV->getOperand(0));
-    if (!IncV)
-      return false;
-    // fall-thru to GEP handling
-  case Instruction::GetElementPtr: {
-    // This must be a pointer addition of constants (pretty) or some number of
-    // address-size elements (ugly).
+    return dyn_cast<Instruction>(IncV->getOperand(0));
+  case Instruction::GetElementPtr:
     for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
          I != E; ++I) {
       if (isa<Constant>(*I))
         continue;
-      // ugly geps have 2 operands.
-      // i1* is used by the expander to represent an address-size element.
+      if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
+        if (!SE.DT->dominates(OInst, InsertPos))
+          return NULL;
+      }
+      if (allowScale) {
+        // allow any kind of GEP as long as it can be hoisted.
+        continue;
+      }
+      // This must be a pointer addition of constants (pretty), which is already
+      // handled, or some number of address-size elements (ugly). Ugly geps
+      // have 2 operands. i1* is used by the expander to represent an
+      // address-size element.
       if (IncV->getNumOperands() != 2)
-        return false;
+        return NULL;
       unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
       if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
           && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
-        return false;
-      // Ensure the operands dominate the insertion point. I don't know of a
-      // case when this would not be true, so this is somewhat untested.
-      if (L == IVIncInsertLoop) {
-        for (User::op_iterator OI = IncV->op_begin()+1,
-               OE = IncV->op_end(); OI != OE; ++OI)
-          if (Instruction *OInst = dyn_cast<Instruction>(OI))
-            if (!SE.DT->dominates(OInst, IVIncInsertPos))
-              return false;
-      }
+        return NULL;
       break;
     }
-    IncV = dyn_cast<Instruction>(IncV->getOperand(0));
-    if (IncV && IncV->getOpcode() == Instruction::BitCast)
-      IncV = dyn_cast<Instruction>(IncV->getOperand(0));
-    return IncV == PN;
+    return dyn_cast<Instruction>(IncV->getOperand(0));
   }
-  default:
+}
+
+/// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
+/// it available to other uses in this loop. Recursively hoist any operands,
+/// until we reach a value that dominates InsertPos.
+bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
+  if (SE.DT->dominates(IncV, InsertPos))
+      return true;
+
+  // InsertPos must itself dominate IncV so that IncV's new position satisfies
+  // its existing users.
+  if (!SE.DT->dominates(InsertPos->getParent(), IncV->getParent()))
     return false;
+
+  // Check that the chain of IV operands leading back to Phi can be hoisted.
+  SmallVector<Instruction*, 4> IVIncs;
+  for(;;) {
+    Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
+    if (!Oper)
+      return false;
+    // IncV is safe to hoist.
+    IVIncs.push_back(IncV);
+    IncV = Oper;
+    if (SE.DT->dominates(IncV, InsertPos))
+      break;
+  }
+  for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(),
+         E = IVIncs.rend(); I != E; ++I) {
+    (*I)->moveBefore(InsertPos);
+  }
+  return true;
+}
+
+/// Determine if this cyclic phi is in a form that would have been generated by
+/// LSR. We don't care if the phi was actually expanded in this pass, as long
+/// as it is in a low-cost form, for example, no implied multiplication. This
+/// should match any patterns generated by getAddRecExprPHILiterally and
+/// expandAddtoGEP.
+bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
+                                           const Loop *L) {
+  for(Instruction *IVOper = IncV;
+      (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
+                                /*allowScale=*/false));) {
+    if (IVOper == PN)
+      return true;
   }
+  return false;
+}
+
+/// expandIVInc - Expand an IV increment at Builder's current InsertPos.
+/// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
+/// need to materialize IV increments elsewhere to handle difficult situations.
+Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
+                                 Type *ExpandTy, Type *IntTy,
+                                 bool useSubtract) {
+  Value *IncV;
+  // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+  if (ExpandTy->isPointerTy()) {
+    PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+    // If the step isn't constant, don't use an implicitly scaled GEP, because
+    // that would require a multiply inside the loop.
+    if (!isa<ConstantInt>(StepV))
+      GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+                                  GEPPtrTy->getAddressSpace());
+    const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+    IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+    if (IncV->getType() != PN->getType()) {
+      IncV = Builder.CreateBitCast(IncV, PN->getType());
+      rememberInstruction(IncV);
+    }
+  } else {
+    IncV = useSubtract ?
+      Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+      Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
+    rememberInstruction(IncV);
+  }
+  return IncV;
 }
 
 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
@@ -956,26 +1050,28 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
       if (LSRMode) {
         if (!isExpandedAddRecExprPHI(PN, IncV, L))
           continue;
+        if (L == IVIncInsertLoop && !hoistIVInc(IncV, IVIncInsertPos))
+          continue;
       }
       else {
         if (!isNormalAddRecExprPHI(PN, IncV, L))
           continue;
+        if (L == IVIncInsertLoop)
+          do {
+            if (SE.DT->dominates(IncV, IVIncInsertPos))
+              break;
+            // Make sure the increment is where we want it. But don't move it
+            // down past a potential existing post-inc user.
+            IncV->moveBefore(IVIncInsertPos);
+            IVIncInsertPos = IncV;
+            IncV = cast<Instruction>(IncV->getOperand(0));
+          } while (IncV != PN);
       }
       // Ok, the add recurrence looks usable.
       // Remember this PHI, even in post-inc mode.
       InsertedValues.insert(PN);
       // Remember the increment.
       rememberInstruction(IncV);
-      if (L == IVIncInsertLoop)
-        do {
-          if (SE.DT->dominates(IncV, IVIncInsertPos))
-            break;
-          // Make sure the increment is where we want it. But don't move it
-          // down past a potential existing post-inc user.
-          IncV->moveBefore(IVIncInsertPos);
-          IVIncInsertPos = IncV;
-          IncV = cast<Instruction>(IncV->getOperand(0));
-        } while (IncV != PN);
       return PN;
     }
   }
@@ -984,6 +1080,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
   BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
 
+  // Another AddRec may need to be recursively expanded below. For example, if
+  // this AddRec is quadratic, the StepV may itself be an AddRec in this
+  // loop. Remove this loop from the PostIncLoops set before expanding such
+  // AddRecs. Otherwise, we cannot find a valid position for the step
+  // (i.e. StepV can never dominate its loop header).  Ideally, we could do
+  // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
+  // so it's not worth implementing SmallPtrSet::swap.
+  PostIncLoopSet SavedPostIncLoops = PostIncLoops;
+  PostIncLoops.clear();
+
   // Expand code for the start value.
   Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
                                 L->getHeader()->begin());
@@ -993,16 +1099,16 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
          SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
                                   L->getHeader()));
 
-  // Expand code for the step value. Insert instructions right before the
-  // terminator corresponding to the back-edge. Do this before creating the PHI
-  // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
-  // negative, insert a sub instead of an add for the increment (unless it's a
-  // constant, because subtracts of constants are canonicalized to adds).
+  // Expand code for the step value. Do this before creating the PHI so that PHI
+  // reuse code doesn't see an incomplete PHI.
   const SCEV *Step = Normalized->getStepRecurrence(SE);
-  bool isPointer = ExpandTy->isPointerTy();
-  bool isNegative = !isPointer && isNonConstantNegative(Step);
-  if (isNegative)
+  // If the stride is negative, insert a sub instead of an add for the increment
+  // (unless it's a constant, because subtracts of constants are canonicalized
+  // to adds).
+  bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+  if (useSubtract)
     Step = SE.getNegativeSCEV(Step);
+  // Expand the step somewhere that dominates the loop header.
   Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
 
   // Create the PHI.
@@ -1023,33 +1129,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
       continue;
     }
 
-    // Create a step value and add it to the PHI. If IVIncInsertLoop is
-    // non-null and equal to the addrec's loop, insert the instructions
-    // at IVIncInsertPos.
+    // Create a step value and add it to the PHI.
+    // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
+    // instructions at IVIncInsertPos.
     Instruction *InsertPos = L == IVIncInsertLoop ?
       IVIncInsertPos : Pred->getTerminator();
     Builder.SetInsertPoint(InsertPos);
-    Value *IncV;
-    // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
-    if (isPointer) {
-      PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
-      // If the step isn't constant, don't use an implicitly scaled GEP, because
-      // that would require a multiply inside the loop.
-      if (!isa<ConstantInt>(StepV))
-        GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
-                                    GEPPtrTy->getAddressSpace());
-      const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
-      IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
-      if (IncV->getType() != PN->getType()) {
-        IncV = Builder.CreateBitCast(IncV, PN->getType());
-        rememberInstruction(IncV);
-      }
-    } else {
-      IncV = isNegative ?
-        Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
-        Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
-      rememberInstruction(IncV);
-    }
+    Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+
     PN->addIncoming(IncV, Pred);
   }
 
@@ -1057,6 +1144,10 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   if (SaveInsertBB)
     restoreInsertPoint(SaveInsertBB, SaveInsertPt);
 
+  // After expanding subexpressions, restore the PostIncLoops set so the caller
+  // can ensure that IVIncrement dominates the current uses.
+  PostIncLoops = SavedPostIncLoops;
+
   // Remember this PHI, even in post-inc mode.
   InsertedValues.insert(PN);
 
@@ -1124,10 +1215,31 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
     // For an expansion to use the postinc form, the client must call
     // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
     // or dominated by IVIncInsertPos.
-    assert((!isa<Instruction>(Result) ||
-            SE.DT->dominates(cast<Instruction>(Result),
-                             Builder.GetInsertPoint())) &&
-           "postinc expansion does not dominate use");
+    if (isa<Instruction>(Result)
+        && !SE.DT->dominates(cast<Instruction>(Result),
+                             Builder.GetInsertPoint())) {
+      // The induction variable's postinc expansion does not dominate this use.
+      // IVUsers tries to prevent this case, so it is rare. However, it can
+      // happen when an IVUser outside the loop is not dominated by the latch
+      // block. Adjusting IVIncInsertPos before expansion begins cannot handle
+      // all cases. Consider a phi outide whose operand is replaced during
+      // expansion with the value of the postinc user. Without fundamentally
+      // changing the way postinc users are tracked, the only remedy is
+      // inserting an extra IV increment. StepV might fold into PostLoopOffset,
+      // but hopefully expandCodeFor handles that.
+      bool useSubtract =
+        !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
+      if (useSubtract)
+        Step = SE.getNegativeSCEV(Step);
+      // Expand the step somewhere that dominates the loop header.
+      BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+      BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+      Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+      // Restore the insertion point to the place where the caller has
+      // determined dominates all uses.
+      restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+      Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
+    }
   }
 
   // Re-apply any non-loop-dominating scale.
@@ -1363,10 +1475,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
 }
 
 Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
-                                   Instruction *I) {
-  BasicBlock::iterator IP = I;
-  while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
-    ++IP;
+                                   Instruction *IP) {
   Builder.SetInsertPoint(IP->getParent(), IP);
   return expandCodeFor(SH, Ty);
 }
@@ -1392,14 +1501,23 @@ Value *SCEVExpander::expand(const SCEV *S) {
       if (!L) break;
       if (BasicBlock *Preheader = L->getLoopPreheader())
         InsertPt = Preheader->getTerminator();
+      else {
+        // LSR sets the insertion point for AddRec start/step values to the
+        // block start to simplify value reuse, even though it's an invalid
+        // position. SCEVExpander must correct for this in all cases.
+        InsertPt = L->getHeader()->getFirstInsertionPt();
+      }
     } else {
       // If the SCEV is computable at this level, insert it into the header
       // after the PHIs (and after any other instructions that we've inserted
       // there) so that it is guaranteed to dominate any user inside the loop.
       if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
         InsertPt = L->getHeader()->getFirstInsertionPt();
-      while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
+      while (InsertPt != Builder.GetInsertPoint()
+             && (isInsertedInstruction(InsertPt)
+                 || isa<DbgInfoIntrinsic>(InsertPt))) {
         InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
+      }
       break;
     }
 
@@ -1434,23 +1552,9 @@ void SCEVExpander::rememberInstruction(Value *I) {
     InsertedPostIncValues.insert(I);
   else
     InsertedValues.insert(I);
-
-  // If we just claimed an existing instruction and that instruction had
-  // been the insert point, adjust the insert point forward so that
-  // subsequently inserted code will be dominated.
-  if (Builder.GetInsertPoint() == I) {
-    BasicBlock::iterator It = cast<Instruction>(I);
-    do { ++It; } while (isInsertedInstruction(It) ||
-                        isa<DbgInfoIntrinsic>(It));
-    Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
-  }
 }
 
 void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
-  // If we acquired more instructions since the old insert point was saved,
-  // advance past them.
-  while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I;
-
   Builder.SetInsertPoint(BB, I);
 }
 
@@ -1478,40 +1582,13 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
   return V;
 }
 
-/// hoistStep - Attempt to hoist an IV increment above a potential use.
-///
-/// To successfully hoist, two criteria must be met:
-/// - IncV operands dominate InsertPos and
-/// - InsertPos dominates IncV
-///
-/// Meeting the second condition means that we don't need to check all of IncV's
-/// existing uses (it's moving up in the domtree).
-///
-/// This does not yet recursively hoist the operands, although that would
-/// not be difficult.
-///
-/// This does not require a SCEVExpander instance and could be replaced by a
-/// general code-insertion helper.
-bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos,
-                             const DominatorTree *DT) {
-  if (DT->dominates(IncV, InsertPos))
-    return true;
-
-  if (!DT->dominates(InsertPos->getParent(), IncV->getParent()))
-    return false;
-
-  if (IncV->mayHaveSideEffects())
-    return false;
-
-  // Attempt to hoist IncV
-  for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end();
-       OI != OE; ++OI) {
-    Instruction *OInst = dyn_cast<Instruction>(OI);
-    if (OInst && !DT->dominates(OInst, InsertPos))
-      return false;
-  }
-  IncV->moveBefore(InsertPos);
-  return true;
+/// Sort values by integer width for replaceCongruentIVs.
+static bool width_descending(Value *lhs, Value *rhs) {
+  // Put pointers at the back and make sure pointer < pointer = false.
+  if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy())
+    return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy();
+  return rhs->getType()->getPrimitiveSizeInBits()
+    < lhs->getType()->getPrimitiveSizeInBits();
 }
 
 /// replaceCongruentIVs - Check for congruent phis in this loop header and
@@ -1521,23 +1598,45 @@ bool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos,
 /// This does not depend on any SCEVExpander state but should be used in
 /// the same context that SCEVExpander is used.
 unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
-                                           SmallVectorImpl<WeakVH> &DeadInsts) {
+                                           SmallVectorImpl<WeakVH> &DeadInsts,
+                                           const TargetLowering *TLI) {
+  // Find integer phis in order of increasing width.
+  SmallVector<PHINode*, 8> Phis;
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
+    Phis.push_back(Phi);
+  }
+  if (TLI)
+    std::sort(Phis.begin(), Phis.end(), width_descending);
+
   unsigned NumElim = 0;
   DenseMap<const SCEV *, PHINode *> ExprToIVMap;
-  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
-    PHINode *Phi = cast<PHINode>(I);
+  // Process phis from wide to narrow. Mapping wide phis to the their truncation
+  // so narrow phis can reuse them.
+  for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(),
+         PEnd = Phis.end(); PIter != PEnd; ++PIter) {
+    PHINode *Phi = *PIter;
+
     if (!SE.isSCEVable(Phi->getType()))
       continue;
 
     PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
     if (!OrigPhiRef) {
       OrigPhiRef = Phi;
+      if (Phi->getType()->isIntegerTy() && TLI
+          && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+        // This phi can be freely truncated to the narrowest phi type. Map the
+        // truncated expression to it so it will be reused for narrow types.
+        const SCEV *TruncExpr =
+          SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
+        ExprToIVMap[TruncExpr] = Phi;
+      }
       continue;
     }
 
-    // If one phi derives from the other via GEPs, types may differ.
-    // We could consider adding a bitcast here to handle it.
-    if (OrigPhiRef->getType() != Phi->getType())
+    // Replacing a pointer phi with an integer phi or vice-versa doesn't make
+    // sense.
+    if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
       continue;
 
     if (BasicBlock *LatchBlock = L->getLoopLatch()) {
@@ -1546,32 +1645,56 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
       Instruction *IsomorphicInc =
         cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
 
-      // If this phi is more canonical, swap it with the original.
-      if (!isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)
-          && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) {
+      // If this phi has the same width but is more canonical, replace the
+      // original with it. As part of the "more canonical" determination,
+      // respect a prior decision to use an IV chain.
+      if (OrigPhiRef->getType() == Phi->getType()
+          && !(ChainedPhis.count(Phi)
+               || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L))
+          && (ChainedPhis.count(Phi)
+              || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
         std::swap(OrigPhiRef, Phi);
         std::swap(OrigInc, IsomorphicInc);
       }
       // Replacing the congruent phi is sufficient because acyclic redundancy
       // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
       // that a phi is congruent, it's often the head of an IV user cycle that
-      // is isomorphic with the original phi. So it's worth eagerly cleaning up
-      // the common case of a single IV increment.
-      if (OrigInc != IsomorphicInc &&
-          OrigInc->getType() == IsomorphicInc->getType() &&
-          SE.getSCEV(OrigInc) == SE.getSCEV(IsomorphicInc) &&
-          hoistStep(OrigInc, IsomorphicInc, DT)) {
+      // is isomorphic with the original phi. It's worth eagerly cleaning up the
+      // common case of a single IV increment so that DeleteDeadPHIs can remove
+      // cycles that had postinc uses.
+      const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc),
+                                                   IsomorphicInc->getType());
+      if (OrigInc != IsomorphicInc
+          && TruncExpr == SE.getSCEV(IsomorphicInc)
+          && ((isa<PHINode>(OrigInc) && isa<PHINode>(IsomorphicInc))
+              || hoistIVInc(OrigInc, IsomorphicInc))) {
         DEBUG_WITH_TYPE(DebugType, dbgs()
                         << "INDVARS: Eliminated congruent iv.inc: "
                         << *IsomorphicInc << '\n');
-        IsomorphicInc->replaceAllUsesWith(OrigInc);
+        Value *NewInc = OrigInc;
+        if (OrigInc->getType() != IsomorphicInc->getType()) {
+          Instruction *IP = isa<PHINode>(OrigInc)
+            ? (Instruction*)L->getHeader()->getFirstInsertionPt()
+            : OrigInc->getNextNode();
+          IRBuilder<> Builder(IP);
+          Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
+          NewInc = Builder.
+            CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
+        }
+        IsomorphicInc->replaceAllUsesWith(NewInc);
         DeadInsts.push_back(IsomorphicInc);
       }
     }
     DEBUG_WITH_TYPE(DebugType, dbgs()
                     << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
     ++NumElim;
-    Phi->replaceAllUsesWith(OrigPhiRef);
+    Value *NewIV = OrigPhiRef;
+    if (OrigPhiRef->getType() != Phi->getType()) {
+      IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt());
+      Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
+      NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
+    }
+    Phi->replaceAllUsesWith(NewIV);
     DeadInsts.push_back(Phi);
   }
   return NumElim;
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index c66ecd6e8727..dd2ed4ff831c 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -118,7 +118,6 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
     // Conservatively use AnyWrap until/unless we need FlagNW.
     const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
     switch (Kind) {
-    default: llvm_unreachable("Unexpected transform name!");
     case NormalizeAutodetect:
       if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
         const SCEV *TransformedStep =
@@ -191,7 +190,6 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
   }
 
   llvm_unreachable("Unexpected SCEV kind!");
-  return 0;
 }
 
 /// Manage recursive transformation across an expression DAG. Revisiting
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index d8c207b4bd49..c819666ee444 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -194,8 +194,8 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
     Succs.assign(TI.getNumSuccessors(), true);
     return;
   }
-  
-  Succs[SI.findCaseValue(cast<ConstantInt>(C))] = true;
+  SwitchInst::CaseIt Case = SI.findCaseValue(cast<ConstantInt>(C));
+  Succs[Case.getSuccessorIndex()] = true;
 }
 
 
@@ -327,13 +327,13 @@ void SparseSolver::Solve(Function &F) {
 }
 
 void SparseSolver::Print(Function &F, raw_ostream &OS) const {
-  OS << "\nFUNCTION: " << F.getNameStr() << "\n";
+  OS << "\nFUNCTION: " << F.getName() << "\n";
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     if (!BBExecutable.count(BB))
       OS << "INFEASIBLE: ";
     OS << "\t";
     if (BB->hasName())
-      OS << BB->getNameStr() << ":\n";
+      OS << BB->getName() << ":\n";
     else
       OS << "; anon bb\n";
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
index 68a39cd581f4..ff5010bad7bb 100644
--- a/lib/Analysis/Trace.cpp
+++ b/lib/Analysis/Trace.cpp
@@ -34,7 +34,7 @@ Module *Trace::getModule() const {
 ///
 void Trace::print(raw_ostream &O) const {
   Function *F = getFunction();
-  O << "; Trace from function " << F->getNameStr() << ", blocks:\n";
+  O << "; Trace from function " << F->getName() << ", blocks:\n";
   for (const_iterator i = begin(), e = end(); i != e; ++i) {
     O << "; ";
     WriteAsOperand(O, *i, true, getModule());
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 4d94f619fda1..a430f6281ef0 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -20,8 +20,10 @@
 #include "llvm/GlobalAlias.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
 #include "llvm/Operator.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/PatternMatch.h"
@@ -41,10 +43,176 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) {
   return TD ? TD->getPointerSizeInBits() : 0;
 }
 
-/// ComputeMaskedBits - Determine which of the bits specified in Mask are
-/// known to be either zero or one and return them in the KnownZero/KnownOne
-/// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
-/// processing.
+static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+                                    APInt &KnownZero, APInt &KnownOne,
+                                    APInt &KnownZero2, APInt &KnownOne2,
+                                    const TargetData *TD, unsigned Depth) {
+  if (!Add) {
+    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
+      // We know that the top bits of C-X are clear if X contains less bits
+      // than C (i.e. no wrap-around can happen).  For example, 20-X is
+      // positive if we can prove that X is >= 0 and < 16.
+      if (!CLHS->getValue().isNegative()) {
+        unsigned BitWidth = KnownZero.getBitWidth();
+        unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+        // NLZ can't be BitWidth with no sign bit
+        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+        llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
+    
+        // If all of the MaskV bits are known to be zero, then we know the
+        // output top bits are zero, because we now know that the output is
+        // from [0-C].
+        if ((KnownZero2 & MaskV) == MaskV) {
+          unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+          // Top bits known zero.
+          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
+        }
+      }
+    }
+  }
+
+  unsigned BitWidth = KnownZero.getBitWidth();
+
+  // If one of the operands has trailing zeros, then the bits that the
+  // other operand has in those bit positions will be preserved in the
+  // result. For an add, this works with either operand. For a subtract,
+  // this only works if the known zeros are in the right operand.
+  APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+  llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1);
+  assert((LHSKnownZero & LHSKnownOne) == 0 &&
+         "Bits known to be one AND zero?");
+  unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
+
+  llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
+  assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+  unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
+
+  // Determine which operand has more trailing zeros, and use that
+  // many bits from the other operand.
+  if (LHSKnownZeroOut > RHSKnownZeroOut) {
+    if (Add) {
+      APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
+      KnownZero |= KnownZero2 & Mask;
+      KnownOne  |= KnownOne2 & Mask;
+    } else {
+      // If the known zeros are in the left operand for a subtract,
+      // fall back to the minimum known zeros in both operands.
+      KnownZero |= APInt::getLowBitsSet(BitWidth,
+                                        std::min(LHSKnownZeroOut,
+                                                 RHSKnownZeroOut));
+    }
+  } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
+    APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
+    KnownZero |= LHSKnownZero & Mask;
+    KnownOne  |= LHSKnownOne & Mask;
+  }
+
+  // Are we still trying to solve for the sign bit?
+  if (!KnownZero.isNegative() && !KnownOne.isNegative()) {
+    if (NSW) {
+      if (Add) {
+        // Adding two positive numbers can't wrap into negative
+        if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
+          KnownZero |= APInt::getSignBit(BitWidth);
+        // and adding two negative numbers can't wrap into positive.
+        else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
+          KnownOne |= APInt::getSignBit(BitWidth);
+      } else {
+        // Subtracting a negative number from a positive one can't wrap
+        if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
+          KnownZero |= APInt::getSignBit(BitWidth);
+        // neither can subtracting a positive number from a negative one.
+        else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
+          KnownOne |= APInt::getSignBit(BitWidth);
+      }
+    }
+  }
+}
+
+static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
+                                 APInt &KnownZero, APInt &KnownOne,
+                                 APInt &KnownZero2, APInt &KnownOne2,
+                                 const TargetData *TD, unsigned Depth) {
+  unsigned BitWidth = KnownZero.getBitWidth();
+  ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1);
+  ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+  assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+  bool isKnownNegative = false;
+  bool isKnownNonNegative = false;
+  // If the multiplication is known not to overflow, compute the sign bit.
+  if (NSW) {
+    if (Op0 == Op1) {
+      // The product of a number with itself is non-negative.
+      isKnownNonNegative = true;
+    } else {
+      bool isKnownNonNegativeOp1 = KnownZero.isNegative();
+      bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
+      bool isKnownNegativeOp1 = KnownOne.isNegative();
+      bool isKnownNegativeOp0 = KnownOne2.isNegative();
+      // The product of two numbers with the same sign is non-negative.
+      isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
+        (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
+      // The product of a negative number and a non-negative number is either
+      // negative or zero.
+      if (!isKnownNonNegative)
+        isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
+                           isKnownNonZero(Op0, TD, Depth)) ||
+                          (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
+                           isKnownNonZero(Op1, TD, Depth));
+    }
+  }
+
+  // If low bits are zero in either operand, output low known-0 bits.
+  // Also compute a conserative estimate for high known-0 bits.
+  // More trickiness is possible, but this is sufficient for the
+  // interesting case of alignment computation.
+  KnownOne.clearAllBits();
+  unsigned TrailZ = KnownZero.countTrailingOnes() +
+                    KnownZero2.countTrailingOnes();
+  unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
+                             KnownZero2.countLeadingOnes(),
+                             BitWidth) - BitWidth;
+
+  TrailZ = std::min(TrailZ, BitWidth);
+  LeadZ = std::min(LeadZ, BitWidth);
+  KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+              APInt::getHighBitsSet(BitWidth, LeadZ);
+
+  // Only make use of no-wrap flags if we failed to compute the sign bit
+  // directly.  This matters if the multiplication always overflows, in
+  // which case we prefer to follow the result of the direct computation,
+  // though as the program is invoking undefined behaviour we can choose
+  // whatever we like here.
+  if (isKnownNonNegative && !KnownOne.isNegative())
+    KnownZero.setBit(BitWidth - 1);
+  else if (isKnownNegative && !KnownZero.isNegative())
+    KnownOne.setBit(BitWidth - 1);
+}
+
+void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
+  unsigned BitWidth = KnownZero.getBitWidth();
+  unsigned NumRanges = Ranges.getNumOperands() / 2;
+  assert(NumRanges >= 1);
+
+  // Use the high end of the ranges to find leading zeros.
+  unsigned MinLeadingZeros = BitWidth;
+  for (unsigned i = 0; i < NumRanges; ++i) {
+    ConstantInt *Lower = cast<ConstantInt>(Ranges.getOperand(2*i + 0));
+    ConstantInt *Upper = cast<ConstantInt>(Ranges.getOperand(2*i + 1));
+    ConstantRange Range(Lower->getValue(), Upper->getValue());
+    if (Range.isWrappedSet())
+      MinLeadingZeros = 0; // -1 has no zeros
+    unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros();
+    MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros);
+  }
+
+  KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros);
+}
+/// ComputeMaskedBits - Determine which of the bits are known to be either zero
+/// or one and return them in the KnownZero/KnownOne bit sets.
+///
 /// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
 /// we cannot optimize based on the assumption that it is zero without changing
 /// it to be an explicit zero.  If we don't change it to zero, other code could
@@ -54,67 +222,75 @@ static unsigned getBitWidth(Type *Ty, const TargetData *TD) {
 ///
 /// This function is defined on values with integer type, values with pointer
 /// type (but only if TD is non-null), and vectors of integers.  In the case
-/// where V is a vector, the mask, known zero, and known one values are the
+/// where V is a vector, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
-void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
-                             APInt &KnownZero, APInt &KnownOne,
+void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
                              const TargetData *TD, unsigned Depth) {
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
-  unsigned BitWidth = Mask.getBitWidth();
-  assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
-         && "Not integer or pointer type!");
+  unsigned BitWidth = KnownZero.getBitWidth();
+
+  assert((V->getType()->isIntOrIntVectorTy() ||
+          V->getType()->getScalarType()->isPointerTy()) &&
+         "Not integer or pointer type!");
   assert((!TD ||
           TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
          (!V->getType()->isIntOrIntVectorTy() ||
           V->getType()->getScalarSizeInBits() == BitWidth) &&
-         KnownZero.getBitWidth() == BitWidth && 
+         KnownZero.getBitWidth() == BitWidth &&
          KnownOne.getBitWidth() == BitWidth &&
          "V, Mask, KnownOne and KnownZero should have same BitWidth");
 
   if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
     // We know all of the bits for a constant!
-    KnownOne = CI->getValue() & Mask;
-    KnownZero = ~KnownOne & Mask;
+    KnownOne = CI->getValue();
+    KnownZero = ~KnownOne;
     return;
   }
   // Null and aggregate-zero are all-zeros.
   if (isa<ConstantPointerNull>(V) ||
       isa<ConstantAggregateZero>(V)) {
     KnownOne.clearAllBits();
-    KnownZero = Mask;
+    KnownZero = APInt::getAllOnesValue(BitWidth);
     return;
   }
   // Handle a constant vector by taking the intersection of the known bits of
-  // each element.
-  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+  // each element.  There is no real need to handle ConstantVector here, because
+  // we don't handle undef in any particularly useful way.
+  if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
+    // We know that CDS must be a vector of integers. Take the intersection of
+    // each element.
     KnownZero.setAllBits(); KnownOne.setAllBits();
-    for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
-      APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
-      ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
-                        TD, Depth);
-      KnownZero &= KnownZero2;
-      KnownOne &= KnownOne2;
+    APInt Elt(KnownZero.getBitWidth(), 0);
+    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      Elt = CDS->getElementAsInteger(i);
+      KnownZero &= ~Elt;
+      KnownOne &= Elt;      
     }
     return;
   }
+  
   // The address of an aligned GlobalValue has trailing zeros.
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     unsigned Align = GV->getAlignment();
-    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
-      Type *ObjectType = GV->getType()->getElementType();
-      // If the object is defined in the current Module, we'll be giving
-      // it the preferred alignment. Otherwise, we have to assume that it
-      // may only have the minimum ABI alignment.
-      if (!GV->isDeclaration() && !GV->mayBeOverridden())
-        Align = TD->getPrefTypeAlignment(ObjectType);
-      else
-        Align = TD->getABITypeAlignment(ObjectType);
+    if (Align == 0 && TD) {
+      if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+        Type *ObjectType = GVar->getType()->getElementType();
+        if (ObjectType->isSized()) {
+          // If the object is defined in the current Module, we'll be giving
+          // it the preferred alignment. Otherwise, we have to assume that it
+          // may only have the minimum ABI alignment.
+          if (!GVar->isDeclaration() && !GVar->isWeakForLinker())
+            Align = TD->getPreferredAlignment(GVar);
+          else
+            Align = TD->getABITypeAlignment(ObjectType);
+        }
+      }
     }
     if (Align > 0)
-      KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
-                                              CountTrailingZeros_32(Align));
+      KnownZero = APInt::getLowBitsSet(BitWidth,
+                                       CountTrailingZeros_32(Align));
     else
       KnownZero.clearAllBits();
     KnownOne.clearAllBits();
@@ -126,8 +302,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     if (GA->mayBeOverridden()) {
       KnownZero.clearAllBits(); KnownOne.clearAllBits();
     } else {
-      ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne,
-                        TD, Depth+1);
+      ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1);
     }
     return;
   }
@@ -136,15 +311,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // Get alignment information off byval arguments if specified in the IR.
     if (A->hasByValAttr())
       if (unsigned Align = A->getParamAlignment())
-        KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
-                                                CountTrailingZeros_32(Align));
+        KnownZero = APInt::getLowBitsSet(BitWidth,
+                                         CountTrailingZeros_32(Align));
     return;
   }
 
   // Start out not knowing anything.
   KnownZero.clearAllBits(); KnownOne.clearAllBits();
 
-  if (Depth == MaxDepth || Mask == 0)
+  if (Depth == MaxDepth)
     return;  // Limit search depth.
 
   Operator *I = dyn_cast<Operator>(V);
@@ -153,12 +328,14 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
   APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
   switch (I->getOpcode()) {
   default: break;
+  case Instruction::Load:
+    if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
+      computeMaskedBitsLoad(*MD, KnownZero);
+    return;
   case Instruction::And: {
     // If either the LHS or the RHS are Zero, the result is zero.
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
-    APInt Mask2(Mask & ~KnownZero);
-    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
-                      Depth+1);
+    ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
     
@@ -169,10 +346,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     return;
   }
   case Instruction::Or: {
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
-    APInt Mask2(Mask & ~KnownOne);
-    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
-                      Depth+1);
+    ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
     
@@ -183,9 +358,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     return;
   }
   case Instruction::Xor: {
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
-    ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD,
-                      Depth+1);
+    ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
     
@@ -197,55 +371,32 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     return;
   }
   case Instruction::Mul: {
-    APInt Mask2 = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
-    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
-                      Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    
-    // If low bits are zero in either operand, output low known-0 bits.
-    // Also compute a conserative estimate for high known-0 bits.
-    // More trickiness is possible, but this is sufficient for the
-    // interesting case of alignment computation.
-    KnownOne.clearAllBits();
-    unsigned TrailZ = KnownZero.countTrailingOnes() +
-                      KnownZero2.countTrailingOnes();
-    unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
-                               KnownZero2.countLeadingOnes(),
-                               BitWidth) - BitWidth;
-
-    TrailZ = std::min(TrailZ, BitWidth);
-    LeadZ = std::min(LeadZ, BitWidth);
-    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
-                APInt::getHighBitsSet(BitWidth, LeadZ);
-    KnownZero &= Mask;
-    return;
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW,
+                         KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth);
+    break;
   }
   case Instruction::UDiv: {
     // For the purposes of computing leading zeros we can conservatively
     // treat a udiv as a logical right shift by the power of 2 known to
     // be less than the denominator.
-    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(I->getOperand(0),
-                      AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
     unsigned LeadZ = KnownZero2.countLeadingOnes();
 
     KnownOne2.clearAllBits();
     KnownZero2.clearAllBits();
-    ComputeMaskedBits(I->getOperand(1),
-                      AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
     unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
     if (RHSUnknownLeadingOnes != BitWidth)
       LeadZ = std::min(BitWidth,
                        LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
 
-    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
     return;
   }
   case Instruction::Select:
-    ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1);
-    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD,
+    ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD,
                       Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
@@ -278,11 +429,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     else
       SrcBitWidth = SrcTy->getScalarSizeInBits();
     
-    APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth);
     KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
     KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
-    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
-                      Depth+1);
+    ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
     KnownZero = KnownZero.zextOrTrunc(BitWidth);
     KnownOne = KnownOne.zextOrTrunc(BitWidth);
     // Any top bits are known to be zero.
@@ -296,8 +445,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         // TODO: For now, not handling conversions like:
         // (bitcast i64 %x to <2 x i32>)
         !I->getType()->isVectorTy()) {
-      ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
-                        Depth+1);
+      ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
       return;
     }
     break;
@@ -306,11 +454,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // Compute the bits in the result that are not present in the input.
     unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
       
-    APInt MaskIn = Mask.trunc(SrcBitWidth);
     KnownZero = KnownZero.trunc(SrcBitWidth);
     KnownOne = KnownOne.trunc(SrcBitWidth);
-    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
-                      Depth+1);
+    ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
@@ -327,9 +473,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      APInt Mask2(Mask.lshr(ShiftAmt));
-      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
-                        Depth+1);
+      ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
       assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
       KnownZero <<= ShiftAmt;
       KnownOne  <<= ShiftAmt;
@@ -344,9 +488,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
       
       // Unsigned shift right.
-      APInt Mask2(Mask.shl(ShiftAmt));
-      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD,
-                        Depth+1);
+      ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1);
       assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
@@ -362,9 +504,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
       
       // Signed shift right.
-      APInt Mask2(Mask.shl(ShiftAmt));
-      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
-                        Depth+1);
+      ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
       assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
       KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
       KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
@@ -378,100 +518,25 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     }
     break;
   case Instruction::Sub: {
-    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
-      // We know that the top bits of C-X are clear if X contains less bits
-      // than C (i.e. no wrap-around can happen).  For example, 20-X is
-      // positive if we can prove that X is >= 0 and < 16.
-      if (!CLHS->getValue().isNegative()) {
-        unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
-        // NLZ can't be BitWidth with no sign bit
-        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2,
-                          TD, Depth+1);
-    
-        // If all of the MaskV bits are known to be zero, then we know the
-        // output top bits are zero, because we now know that the output is
-        // from [0-C].
-        if ((KnownZero2 & MaskV) == MaskV) {
-          unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
-          // Top bits known zero.
-          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
-        }
-      }        
-    }
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
+                            KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
+                            Depth);
+    break;
   }
-  // fall through
   case Instruction::Add: {
-    // If one of the operands has trailing zeros, then the bits that the
-    // other operand has in those bit positions will be preserved in the
-    // result. For an add, this works with either operand. For a subtract,
-    // this only works if the known zeros are in the right operand.
-    APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
-                                       BitWidth - Mask.countLeadingZeros());
-    ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
-                      Depth+1);
-    assert((LHSKnownZero & LHSKnownOne) == 0 &&
-           "Bits known to be one AND zero?");
-    unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
-
-    ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD, 
-                      Depth+1);
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
-    unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
-
-    // Determine which operand has more trailing zeros, and use that
-    // many bits from the other operand.
-    if (LHSKnownZeroOut > RHSKnownZeroOut) {
-      if (I->getOpcode() == Instruction::Add) {
-        APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
-        KnownZero |= KnownZero2 & Mask;
-        KnownOne  |= KnownOne2 & Mask;
-      } else {
-        // If the known zeros are in the left operand for a subtract,
-        // fall back to the minimum known zeros in both operands.
-        KnownZero |= APInt::getLowBitsSet(BitWidth,
-                                          std::min(LHSKnownZeroOut,
-                                                   RHSKnownZeroOut));
-      }
-    } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
-      APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
-      KnownZero |= LHSKnownZero & Mask;
-      KnownOne  |= LHSKnownOne & Mask;
-    }
-
-    // Are we still trying to solve for the sign bit?
-    if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){
-      OverflowingBinaryOperator *OBO = cast<OverflowingBinaryOperator>(I);
-      if (OBO->hasNoSignedWrap()) {
-        if (I->getOpcode() == Instruction::Add) {
-          // Adding two positive numbers can't wrap into negative
-          if (LHSKnownZero.isNegative() && KnownZero2.isNegative())
-            KnownZero |= APInt::getSignBit(BitWidth);
-          // and adding two negative numbers can't wrap into positive.
-          else if (LHSKnownOne.isNegative() && KnownOne2.isNegative())
-            KnownOne |= APInt::getSignBit(BitWidth);
-        } else {
-          // Subtracting a negative number from a positive one can't wrap
-          if (LHSKnownZero.isNegative() && KnownOne2.isNegative())
-            KnownZero |= APInt::getSignBit(BitWidth);
-          // neither can subtracting a positive number from a negative one.
-          else if (LHSKnownOne.isNegative() && KnownZero2.isNegative())
-            KnownOne |= APInt::getSignBit(BitWidth);
-        }
-      }
-    }
-
-    return;
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
+                            KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
+                            Depth);
+    break;
   }
   case Instruction::SRem:
     if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
       APInt RA = Rem->getValue().abs();
       if (RA.isPowerOf2()) {
         APInt LowBits = RA - 1;
-        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
-        ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, 
-                          Depth+1);
+        ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
 
         // The low bits of the first operand are unchanged by the srem.
         KnownZero = KnownZero2 & LowBits;
@@ -487,19 +552,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
           KnownOne |= ~LowBits;
 
-        KnownZero &= Mask;
-        KnownOne &= Mask;
-
         assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
       }
     }
 
     // The sign bit is the LHS's sign bit, except when the result of the
     // remainder is zero.
-    if (Mask.isNegative() && KnownZero.isNonNegative()) {
-      APInt Mask2 = APInt::getSignBit(BitWidth);
+    if (KnownZero.isNonNegative()) {
       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-      ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+      ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
                         Depth+1);
       // If it's known zero, our sign bit is also zero.
       if (LHSKnownZero.isNegative())
@@ -512,27 +573,24 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       APInt RA = Rem->getValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        APInt Mask2 = LowBits & Mask;
-        KnownZero |= ~LowBits & Mask;
-        ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+        ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD,
                           Depth+1);
         assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+        KnownZero |= ~LowBits;
+        KnownOne &= LowBits;
         break;
       }
     }
 
     // Since the result is less than or equal to either operand, any leading
     // zero bits in either operand must also exist in the result.
-    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne,
-                      TD, Depth+1);
-    ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2,
-                      TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
 
     unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
                                 KnownZero2.countLeadingOnes());
     KnownOne.clearAllBits();
-    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
     break;
   }
 
@@ -543,17 +601,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       Align = TD->getABITypeAlignment(AI->getType()->getElementType());
     
     if (Align > 0)
-      KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
-                                              CountTrailingZeros_32(Align));
+      KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align));
     break;
   }
   case Instruction::GetElementPtr: {
     // Analyze all of the subscripts of this getelementptr instruction
     // to determine if we can prove known low zero bits.
-    APInt LocalMask = APInt::getAllOnesValue(BitWidth);
     APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
-    ComputeMaskedBits(I->getOperand(0), LocalMask,
-                      LocalKnownZero, LocalKnownOne, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
+                      Depth+1);
     unsigned TrailZ = LocalKnownZero.countTrailingOnes();
 
     gep_type_iterator GTI = gep_type_begin(I);
@@ -573,17 +629,15 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         if (!IndexedTy->isSized()) return;
         unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
         uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
-        LocalMask = APInt::getAllOnesValue(GEPOpiBits);
         LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
-        ComputeMaskedBits(Index, LocalMask,
-                          LocalKnownZero, LocalKnownOne, TD, Depth+1);
+        ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1);
         TrailZ = std::min(TrailZ,
                           unsigned(CountTrailingZeros_64(TypeSize) +
                                    LocalKnownZero.countTrailingOnes()));
       }
     }
     
-    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask;
+    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ);
     break;
   }
   case Instruction::PHI: {
@@ -618,17 +672,13 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
             break;
           // Ok, we have a PHI of the form L op= R. Check for low
           // zero bits.
-          APInt Mask2 = APInt::getAllOnesValue(BitWidth);
-          ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
-          Mask2 = APInt::getLowBitsSet(BitWidth,
-                                       KnownZero2.countTrailingOnes());
+          ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1);
 
           // We need to take the minimum number of known bits
           APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
-          ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1);
+          ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1);
 
-          KnownZero = Mask &
-                      APInt::getLowBitsSet(BitWidth,
+          KnownZero = APInt::getLowBitsSet(BitWidth,
                                            std::min(KnownZero2.countTrailingOnes(),
                                                     KnownZero3.countTrailingOnes()));
           break;
@@ -657,8 +707,8 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
         KnownOne2 = APInt(BitWidth, 0);
         // Recurse, but cap the recursion to one level, because we don't
         // want to waste time spinning around in loops.
-        ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne,
-                          KnownZero2, KnownOne2, TD, MaxDepth-1);
+        ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
+                          MaxDepth-1);
         KnownZero &= KnownZero2;
         KnownOne &= KnownOne2;
         // If all bits have been ruled out, there's no need to check
@@ -673,10 +723,17 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
       switch (II->getIntrinsicID()) {
       default: break;
-      case Intrinsic::ctpop:
       case Intrinsic::ctlz:
       case Intrinsic::cttz: {
         unsigned LowBits = Log2_32(BitWidth)+1;
+        // If this call is undefined for 0, the result will be less than 2^n.
+        if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
+          LowBits -= 1;
+        KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        break;
+      }
+      case Intrinsic::ctpop: {
+        unsigned LowBits = Log2_32(BitWidth)+1;
         KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
         break;
       }
@@ -687,6 +744,34 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
       }
     }
     break;
+  case Instruction::ExtractValue:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
+      ExtractValueInst *EVI = cast<ExtractValueInst>(I);
+      if (EVI->getNumIndices() != 1) break;
+      if (EVI->getIndices()[0] == 0) {
+        switch (II->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::sadd_with_overflow:
+          ComputeMaskedBitsAddSub(true, II->getArgOperand(0),
+                                  II->getArgOperand(1), false, KnownZero,
+                                  KnownOne, KnownZero2, KnownOne2, TD, Depth);
+          break;
+        case Intrinsic::usub_with_overflow:
+        case Intrinsic::ssub_with_overflow:
+          ComputeMaskedBitsAddSub(false, II->getArgOperand(0),
+                                  II->getArgOperand(1), false, KnownZero,
+                                  KnownOne, KnownZero2, KnownOne2, TD, Depth);
+          break;
+        case Intrinsic::umul_with_overflow:
+        case Intrinsic::smul_with_overflow:
+          ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1),
+                               false, KnownZero, KnownOne,
+                               KnownZero2, KnownOne2, TD, Depth);
+          break;
+        }
+      }
+    }
   }
 }
 
@@ -702,8 +787,7 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
   }
   APInt ZeroBits(BitWidth, 0);
   APInt OneBits(BitWidth, 0);
-  ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD,
-                    Depth);
+  ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth);
   KnownOne = OneBits[BitWidth - 1];
   KnownZero = ZeroBits[BitWidth - 1];
 }
@@ -712,10 +796,15 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
 /// bit set when defined. For vectors return true if every element is known to
 /// be a power of two when defined.  Supports values with integer or pointer
 /// types and vectors of integers.
-bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-    return CI->getValue().isPowerOf2();
-  // TODO: Handle vector constants.
+bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, bool OrZero,
+                        unsigned Depth) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    if (C->isNullValue())
+      return OrZero;
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+      return CI->getValue().isPowerOf2();
+    // TODO: Handle vector constants.
+  }
 
   // 1 << X is clearly a power of two if the one is not shifted off the end.  If
   // it is shifted off the end then the result is undefined.
@@ -731,21 +820,36 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
   if (Depth++ == MaxDepth)
     return false;
 
+  Value *X = 0, *Y = 0;
+  // A shift of a power of two is a power of two or zero.
+  if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
+                 match(V, m_Shr(m_Value(X), m_Value()))))
+    return isPowerOfTwo(X, TD, /*OrZero*/true, Depth);
+
   if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
-    return isPowerOfTwo(ZI->getOperand(0), TD, Depth);
+    return isPowerOfTwo(ZI->getOperand(0), TD, OrZero, Depth);
 
   if (SelectInst *SI = dyn_cast<SelectInst>(V))
-    return isPowerOfTwo(SI->getTrueValue(), TD, Depth) &&
-      isPowerOfTwo(SI->getFalseValue(), TD, Depth);
+    return isPowerOfTwo(SI->getTrueValue(), TD, OrZero, Depth) &&
+      isPowerOfTwo(SI->getFalseValue(), TD, OrZero, Depth);
+
+  if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
+    // A power of two and'd with anything is a power of two or zero.
+    if (isPowerOfTwo(X, TD, /*OrZero*/true, Depth) ||
+        isPowerOfTwo(Y, TD, /*OrZero*/true, Depth))
+      return true;
+    // X & (-X) is always a power of two or zero.
+    if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
+      return true;
+    return false;
+  }
 
   // An exact divide or right shift can only shift off zero bits, so the result
   // is a power of two only if the first operand is a power of two and not
   // copying a sign bit (sdiv int_min, 2).
-  if (match(V, m_LShr(m_Value(), m_Value())) ||
-      match(V, m_UDiv(m_Value(), m_Value()))) {
-    PossiblyExactOperator *PEO = cast<PossiblyExactOperator>(V);
-    if (PEO->isExact())
-      return isPowerOfTwo(PEO->getOperand(0), TD, Depth);
+  if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
+      match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
+    return isPowerOfTwo(cast<Operator>(V)->getOperand(0), TD, OrZero, Depth);
   }
 
   return false;
@@ -767,7 +871,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
   }
 
   // The remaining tests are all recursive, so bail out if we hit the limit.
-  if (Depth++ == MaxDepth)
+  if (Depth++ >= MaxDepth)
     return false;
 
   unsigned BitWidth = getBitWidth(V->getType(), TD);
@@ -785,13 +889,13 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
   // if the lowest bit is shifted off the end.
   if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
     // shl nuw can't remove any non-zero bits.
-    BinaryOperator *BO = cast<BinaryOperator>(V);
+    OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
     if (BO->hasNoUnsignedWrap())
       return isKnownNonZero(X, TD, Depth);
 
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
-    ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth);
+    ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth);
     if (KnownOne[0])
       return true;
   }
@@ -799,7 +903,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
   // defined if the sign bit is shifted off the end.
   else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
     // shr exact can only shift out zero bits.
-    BinaryOperator *BO = cast<BinaryOperator>(V);
+    PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
     if (BO->isExact())
       return isKnownNonZero(X, TD, Depth);
 
@@ -809,10 +913,8 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
       return true;
   }
   // div exact can only produce a zero if the dividend is zero.
-  else if (match(V, m_IDiv(m_Value(X), m_Value()))) {
-    BinaryOperator *BO = cast<BinaryOperator>(V);
-    if (BO->isExact())
-      return isKnownNonZero(X, TD, Depth);
+  else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
+    return isKnownNonZero(X, TD, Depth);
   }
   // X + Y.
   else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
@@ -835,20 +937,29 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
       APInt Mask = APInt::getSignedMaxValue(BitWidth);
       // The sign bit of X is set.  If some other bit is set then X is not equal
       // to INT_MIN.
-      ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth);
+      ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth);
       if ((KnownOne & Mask) != 0)
         return true;
       // The sign bit of Y is set.  If some other bit is set then Y is not equal
       // to INT_MIN.
-      ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth);
+      ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth);
       if ((KnownOne & Mask) != 0)
         return true;
     }
 
     // The sum of a non-negative number and a power of two is not zero.
-    if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth))
+    if (XKnownNonNegative && isPowerOfTwo(Y, TD, /*OrZero*/false, Depth))
       return true;
-    if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth))
+    if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth))
+      return true;
+  }
+  // X * Y.
+  else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
+    OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+    // If X and Y are non-zero then so is X * Y as long as the multiplication
+    // does not overflow.
+    if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
+        isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth))
       return true;
   }
   // (C ? X : Y) != 0 if X != 0 and Y != 0.
@@ -861,8 +972,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
   if (!BitWidth) return false;
   APInt KnownZero(BitWidth, 0);
   APInt KnownOne(BitWidth, 0);
-  ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne,
-                    TD, Depth);
+  ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
   return KnownOne != 0;
 }
 
@@ -878,7 +988,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
 bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
                              const TargetData *TD, unsigned Depth) {
   APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
-  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+  ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
   assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
   return (KnownZero & Mask) == Mask;
 }
@@ -917,30 +1027,28 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
     Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
     return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
     
-  case Instruction::AShr:
+  case Instruction::AShr: {
     Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
-    // ashr X, C   -> adds C sign bits.
-    if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
-      Tmp += C->getZExtValue();
+    // ashr X, C   -> adds C sign bits.  Vectors too.
+    const APInt *ShAmt;
+    if (match(U->getOperand(1), m_APInt(ShAmt))) {
+      Tmp += ShAmt->getZExtValue();
       if (Tmp > TyBits) Tmp = TyBits;
     }
-    // vector ashr X, <C, C, C, C>  -> adds C sign bits
-    if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) {
-      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
-        Tmp += CI->getZExtValue();
-        if (Tmp > TyBits) Tmp = TyBits;
-      }
-    }
     return Tmp;
-  case Instruction::Shl:
-    if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+  }
+  case Instruction::Shl: {
+    const APInt *ShAmt;
+    if (match(U->getOperand(1), m_APInt(ShAmt))) {
       // shl destroys sign bits.
       Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
-      if (C->getZExtValue() >= TyBits ||      // Bad shift.
-          C->getZExtValue() >= Tmp) break;    // Shifted all sign bits out.
-      return Tmp - C->getZExtValue();
+      Tmp2 = ShAmt->getZExtValue();
+      if (Tmp2 >= TyBits ||      // Bad shift.
+          Tmp2 >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - Tmp2;
     }
     break;
+  }
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:    // NOT is handled here.
@@ -971,13 +1079,11 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
     if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
       if (CRHS->isAllOnesValue()) {
         APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
-        APInt Mask = APInt::getAllOnesValue(TyBits);
-        ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD,
-                          Depth+1);
+        ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
         
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
-        if ((KnownZero | APInt(TyBits, 1)) == Mask)
+        if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
           return TyBits;
         
         // If we are subtracting one from a positive number, there is no carry
@@ -998,12 +1104,10 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
     if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
       if (CLHS->isNullValue()) {
         APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
-        APInt Mask = APInt::getAllOnesValue(TyBits);
-        ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne, 
-                          TD, Depth+1);
+        ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
-        if ((KnownZero | APInt(TyBits, 1)) == Mask)
+        if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
           return TyBits;
         
         // If the input is known to be positive (the sign bit is known clear),
@@ -1045,8 +1149,8 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
   // Finally, if we can prove that the top bits of the result are 0's or 1's,
   // use this information.
   APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
-  APInt Mask = APInt::getAllOnesValue(TyBits);
-  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+  APInt Mask;
+  ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
   
   if (KnownZero.isNegative()) {        // sign bit is 0
     Mask = KnownZero;
@@ -1282,23 +1386,21 @@ Value *llvm::isBytewiseValue(Value *V) {
     }
   }
   
-  // A ConstantArray is splatable if all its members are equal and also
-  // splatable.
-  if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
-    if (CA->getNumOperands() == 0)
-      return 0;
-    
-    Value *Val = isBytewiseValue(CA->getOperand(0));
+  // A ConstantDataArray/Vector is splatable if all its members are equal and
+  // also splatable.
+  if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(V)) {
+    Value *Elt = CA->getElementAsConstant(0);
+    Value *Val = isBytewiseValue(Elt);
     if (!Val)
       return 0;
     
-    for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I)
-      if (CA->getOperand(I-1) != CA->getOperand(I))
+    for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
+      if (CA->getElementAsConstant(I) != Elt)
         return 0;
     
     return Val;
   }
-  
+
   // Conceptually, we could handle things like:
   //   %a = zext i8 %X to i16
   //   %b = shl i16 %a, 8
@@ -1395,50 +1497,44 @@ static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
 Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
                                Instruction *InsertBefore) {
   // Nothing to index? Just return V then (this is useful at the end of our
-  // recursion)
+  // recursion).
   if (idx_range.empty())
     return V;
-  // We have indices, so V should have an indexable type
-  assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
-         && "Not looking at a struct or array?");
-  assert(ExtractValueInst::getIndexedType(V->getType(), idx_range)
-         && "Invalid indices for type?");
-  CompositeType *PTy = cast<CompositeType>(V->getType());
-
-  if (isa<UndefValue>(V))
-    return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
-                                                              idx_range));
-  else if (isa<ConstantAggregateZero>(V))
-    return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, 
-                                                                  idx_range));
-  else if (Constant *C = dyn_cast<Constant>(V)) {
-    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
-      // Recursively process this constant
-      return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1),
-                               InsertBefore);
-  } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
+  // We have indices, so V should have an indexable type.
+  assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
+         "Not looking at a struct or array?");
+  assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
+         "Invalid indices for type?");
+
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    C = C->getAggregateElement(idx_range[0]);
+    if (C == 0) return 0;
+    return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
+  }
+    
+  if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
     // Loop the indices for the insertvalue instruction in parallel with the
     // requested indices
     const unsigned *req_idx = idx_range.begin();
     for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
          i != e; ++i, ++req_idx) {
       if (req_idx == idx_range.end()) {
-        if (InsertBefore)
-          // The requested index identifies a part of a nested aggregate. Handle
-          // this specially. For example,
-          // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
-          // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
-          // %C = extractvalue {i32, { i32, i32 } } %B, 1
-          // This can be changed into
-          // %A = insertvalue {i32, i32 } undef, i32 10, 0
-          // %C = insertvalue {i32, i32 } %A, i32 11, 1
-          // which allows the unused 0,0 element from the nested struct to be
-          // removed.
-          return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
-                                   InsertBefore);
-        else
-          // We can't handle this without inserting insertvalues
+        // We can't handle this without inserting insertvalues
+        if (!InsertBefore)
           return 0;
+
+        // The requested index identifies a part of a nested aggregate. Handle
+        // this specially. For example,
+        // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
+        // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
+        // %C = extractvalue {i32, { i32, i32 } } %B, 1
+        // This can be changed into
+        // %A = insertvalue {i32, i32 } undef, i32 10, 0
+        // %C = insertvalue {i32, i32 } %A, i32 11, 1
+        // which allows the unused 0,0 element from the nested struct to be
+        // removed.
+        return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
+                                 InsertBefore);
       }
       
       // This insert value inserts something else than what we are looking for.
@@ -1454,7 +1550,9 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
     return FindInsertedValue(I->getInsertedValueOperand(),
                              makeArrayRef(req_idx, idx_range.end()),
                              InsertBefore);
-  } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
+  }
+  
+  if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
     // If we're extracting a value from an aggregrate that was extracted from
     // something else, we can extract from that something else directly instead.
     // However, we will need to chain I's indices with the requested indices.
@@ -1486,7 +1584,8 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
 Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
                                               const TargetData &TD) {
   Operator *PtrOp = dyn_cast<Operator>(Ptr);
-  if (PtrOp == 0) return Ptr;
+  if (PtrOp == 0 || Ptr->getType()->isVectorTy())
+    return Ptr;
   
   // Just look through bitcasts.
   if (PtrOp->getOpcode() == Instruction::BitCast)
@@ -1521,34 +1620,19 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
 }
 
 
-/// GetConstantStringInfo - This function computes the length of a
+/// getConstantStringInfo - This function computes the length of a
 /// null-terminated C string pointed to by V.  If successful, it returns true
 /// and returns the string in Str.  If unsuccessful, it returns false.
-bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
-                                 uint64_t Offset,
-                                 bool StopAtNul) {
-  // If V is NULL then return false;
-  if (V == NULL) return false;
-
-  // Look through bitcast instructions.
-  if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
-    return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);
-  
-  // If the value is not a GEP instruction nor a constant expression with a
-  // GEP instruction, then return false because ConstantArray can't occur
-  // any other way
-  const User *GEP = 0;
-  if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
-    GEP = GEPI;
-  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (CE->getOpcode() == Instruction::BitCast)
-      return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
-    if (CE->getOpcode() != Instruction::GetElementPtr)
-      return false;
-    GEP = CE;
-  }
+bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
+                                 uint64_t Offset, bool TrimAtNul) {
+  assert(V);
+
+  // Look through bitcast instructions and geps.
+  V = V->stripPointerCasts();
   
-  if (GEP) {
+  // If the value is a GEP instructionor  constant expression, treat it as an
+  // offset.
+  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
     // Make sure the GEP has exactly three arguments.
     if (GEP->getNumOperands() != 3)
       return false;
@@ -1573,51 +1657,48 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
       StartIdx = CI->getZExtValue();
     else
       return false;
-    return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset,
-                                 StopAtNul);
+    return getConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset);
   }
-  
+
   // The GEP instruction, constant or instruction, must reference a global
   // variable that is a constant and is initialized. The referenced constant
   // initializer is the array that we'll use for optimization.
-  const GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
   if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return false;
-  const Constant *GlobalInit = GV->getInitializer();
-  
-  // Handle the ConstantAggregateZero case
-  if (isa<ConstantAggregateZero>(GlobalInit)) {
+
+  // Handle the all-zeros case
+  if (GV->getInitializer()->isNullValue()) {
     // This is a degenerate case. The initializer is constant zero so the
     // length of the string must be zero.
-    Str.clear();
+    Str = "";
     return true;
   }
   
   // Must be a Constant Array
-  const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
+  const ConstantDataArray *Array =
+    dyn_cast<ConstantDataArray>(GV->getInitializer());
+  if (Array == 0 || !Array->isString())
     return false;
   
   // Get the number of elements in the array
-  uint64_t NumElts = Array->getType()->getNumElements();
-  
+  uint64_t NumElts = Array->getType()->getArrayNumElements();
+
+  // Start out with the entire array in the StringRef.
+  Str = Array->getAsString();
+
   if (Offset > NumElts)
     return false;
   
-  // Traverse the constant array from 'Offset' which is the place the GEP refers
-  // to in the array.
-  Str.reserve(NumElts-Offset);
-  for (unsigned i = Offset; i != NumElts; ++i) {
-    const Constant *Elt = Array->getOperand(i);
-    const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
-    if (!CI) // This array isn't suitable, non-int initializer.
-      return false;
-    if (StopAtNul && CI->isZero())
-      return true; // we found end of string, success!
-    Str += (char)CI->getZExtValue();
-  }
+  // Skip over 'offset' bytes.
+  Str = Str.substr(Offset);
   
-  // The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
+  if (TrimAtNul) {
+    // Trim off the \0 and anything after it.  If the array is not nul
+    // terminated, we just return the whole end of string.  The client may know
+    // some other way that the string is length-bound.
+    Str = Str.substr(0, Str.find('\0'));
+  }
   return true;
 }
 
@@ -1629,8 +1710,7 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
 /// the specified pointer, return 'len+1'.  If we can't, return 0.
 static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
   // Look through noop bitcast instructions.
-  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
-    return GetStringLengthH(BCI->getOperand(0), PHIs);
+  V = V->stripPointerCasts();
 
   // If this is a PHI node, there are two cases: either we have already seen it
   // or we haven't.
@@ -1666,75 +1746,13 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
     if (Len1 != Len2) return 0;
     return Len1;
   }
-
-  // If the value is not a GEP instruction nor a constant expression with a
-  // GEP instruction, then return unknown.
-  User *GEP = 0;
-  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
-    GEP = GEPI;
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (CE->getOpcode() != Instruction::GetElementPtr)
-      return 0;
-    GEP = CE;
-  } else {
-    return 0;
-  }
-
-  // Make sure the GEP has exactly three arguments.
-  if (GEP->getNumOperands() != 3)
-    return 0;
-
-  // Check to make sure that the first operand of the GEP is an integer and
-  // has value 0 so that we are sure we're indexing into the initializer.
-  if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
-    if (!Idx->isZero())
-      return 0;
-  } else
-    return 0;
-
-  // If the second index isn't a ConstantInt, then this is a variable index
-  // into the array.  If this occurs, we can't say anything meaningful about
-  // the string.
-  uint64_t StartIdx = 0;
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
-    StartIdx = CI->getZExtValue();
-  else
-    return 0;
-
-  // The GEP instruction, constant or instruction, must reference a global
-  // variable that is a constant and is initialized. The referenced constant
-  // initializer is the array that we'll use for optimization.
-  GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
-  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
-      GV->mayBeOverridden())
+  
+  // Otherwise, see if we can read the string.
+  StringRef StrData;
+  if (!getConstantStringInfo(V, StrData))
     return 0;
-  Constant *GlobalInit = GV->getInitializer();
-
-  // Handle the ConstantAggregateZero case, which is a degenerate case. The
-  // initializer is constant zero so the length of the string must be zero.
-  if (isa<ConstantAggregateZero>(GlobalInit))
-    return 1;  // Len = 0 offset by 1.
-
-  // Must be a Constant Array
-  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
-  if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
-    return false;
-
-  // Get the number of elements in the array
-  uint64_t NumElts = Array->getType()->getNumElements();
-
-  // Traverse the constant array from StartIdx (derived above) which is
-  // the place the GEP refers to in the array.
-  for (unsigned i = StartIdx; i != NumElts; ++i) {
-    Constant *Elt = Array->getOperand(i);
-    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
-    if (!CI) // This array isn't suitable, non-int initializer.
-      return 0;
-    if (CI->isZero())
-      return i-StartIdx+1; // We found end of string, success!
-  }
 
-  return 0; // The array isn't null terminated, conservatively return 'unknown'.
+  return StrData.size()+1;
 }
 
 /// GetStringLength - If we can compute the length of the string pointed to by
@@ -1793,3 +1811,94 @@ bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
   }
   return true;
 }
+
+bool llvm::isSafeToSpeculativelyExecute(const Value *V,
+                                        const TargetData *TD) {
+  const Operator *Inst = dyn_cast<Operator>(V);
+  if (!Inst)
+    return false;
+
+  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+    if (Constant *C = dyn_cast<Constant>(Inst->getOperand(i)))
+      if (C->canTrap())
+        return false;
+
+  switch (Inst->getOpcode()) {
+  default:
+    return true;
+  case Instruction::UDiv:
+  case Instruction::URem:
+    // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
+    return isKnownNonZero(Inst->getOperand(1), TD);
+  case Instruction::SDiv:
+  case Instruction::SRem: {
+    Value *Op = Inst->getOperand(1);
+    // x / y is undefined if y == 0
+    if (!isKnownNonZero(Op, TD))
+      return false;
+    // x / y might be undefined if y == -1
+    unsigned BitWidth = getBitWidth(Op->getType(), TD);
+    if (BitWidth == 0)
+      return false;
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(Op, KnownZero, KnownOne, TD);
+    return !!KnownZero;
+  }
+  case Instruction::Load: {
+    const LoadInst *LI = cast<LoadInst>(Inst);
+    if (!LI->isUnordered())
+      return false;
+    return LI->getPointerOperand()->isDereferenceablePointer();
+  }
+  case Instruction::Call: {
+   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+     switch (II->getIntrinsicID()) {
+       // These synthetic intrinsics have no side-effects, and just mark
+       // information about their operands.
+       // FIXME: There are other no-op synthetic instructions that potentially
+       // should be considered at least *safe* to speculate...
+       case Intrinsic::dbg_declare:
+       case Intrinsic::dbg_value:
+         return true;
+
+       case Intrinsic::bswap:
+       case Intrinsic::ctlz:
+       case Intrinsic::ctpop:
+       case Intrinsic::cttz:
+       case Intrinsic::objectsize:
+       case Intrinsic::sadd_with_overflow:
+       case Intrinsic::smul_with_overflow:
+       case Intrinsic::ssub_with_overflow:
+       case Intrinsic::uadd_with_overflow:
+       case Intrinsic::umul_with_overflow:
+       case Intrinsic::usub_with_overflow:
+         return true;
+       // TODO: some fp intrinsics are marked as having the same error handling
+       // as libm. They're safe to speculate when they won't error.
+       // TODO: are convert_{from,to}_fp16 safe?
+       // TODO: can we list target-specific intrinsics here?
+       default: break;
+     }
+   }
+    return false; // The called function could have undefined behavior or
+                  // side-effects, even if marked readnone nounwind.
+  }
+  case Instruction::VAArg:
+  case Instruction::Alloca:
+  case Instruction::Invoke:
+  case Instruction::PHI:
+  case Instruction::Store:
+  case Instruction::Ret:
+  case Instruction::Br:
+  case Instruction::IndirectBr:
+  case Instruction::Switch:
+  case Instruction::Unreachable:
+  case Instruction::Fence:
+  case Instruction::LandingPad:
+  case Instruction::AtomicRMW:
+  case Instruction::AtomicCmpXchg:
+  case Instruction::Resume:
+    return false; // Misc instructions which have effects
+  }
+}
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index eef6fe0b1c1d..68873e2768d3 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -12,9 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "ArchiveInternals.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Module.h"
+#include <cstdio>
 #include <cstdlib>
 #include <memory>
 using namespace llvm;
@@ -504,7 +506,7 @@ Archive::findModuleDefiningSymbol(const std::string& symbol,
 // Modules that define those symbols.
 bool
 Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
-                                    std::set<Module*>& result,
+                                    SmallVectorImpl<Module*>& result,
                                     std::string* error) {
   if (!mapfile || !base) {
     if (error)
@@ -569,21 +571,26 @@ Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
   // At this point we have a valid symbol table (one way or another) so we
   // just use it to quickly find the symbols requested.
 
+  SmallPtrSet<Module*, 16> Added;
   for (std::set<std::string>::iterator I=symbols.begin(),
-       E=symbols.end(); I != E;) {
+         Next = I,
+         E=symbols.end(); I != E; I = Next) {
+    // Increment Next before we invalidate it.
+    ++Next;
+
     // See if this symbol exists
     Module* m = findModuleDefiningSymbol(*I,error);
-    if (m) {
-      // The symbol exists, insert the Module into our result, duplicates will
-      // be ignored.
-      result.insert(m);
-
-      // Remove the symbol now that its been resolved, being careful to
-      // post-increment the iterator.
-      symbols.erase(I++);
-    } else {
-      ++I;
-    }
+    if (!m)
+      continue;
+    bool NewMember = Added.insert(m);
+    if (!NewMember)
+      continue;
+
+    // The symbol exists, insert the Module into our result.
+    result.push_back(m);
+
+    // Remove the symbol now that its been resolved.
+    symbols.erase(I);
   }
   return true;
 }
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 8fcc7aa29cc8..9ef29432ddf2 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -182,11 +182,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where,
   if (hasSlash || filePath.str().length() > 15)
     flags |= ArchiveMember::HasLongFilenameFlag;
 
-  sys::LLVMFileType type;
+  sys::fs::file_magic type;
   if (sys::fs::identify_magic(mbr->path.str(), type))
-    type = sys::Unknown_FileType;
+    type = sys::fs::file_magic::unknown;
   switch (type) {
-    case sys::Bitcode_FileType:
+    case sys::fs::file_magic::bitcode:
       flags |= ArchiveMember::BitcodeFlag;
       break;
     default:
diff --git a/lib/Archive/CMakeLists.txt b/lib/Archive/CMakeLists.txt
index b52974e0753d..7ff478a41a59 100644
--- a/lib/Archive/CMakeLists.txt
+++ b/lib/Archive/CMakeLists.txt
@@ -3,9 +3,3 @@ add_llvm_library(LLVMArchive
   ArchiveReader.cpp
   ArchiveWriter.cpp
   )
-
-add_llvm_library_dependencies(LLVMArchive
-  LLVMBitReader
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/Archive/LLVMBuild.txt b/lib/Archive/LLVMBuild.txt
new file mode 100644
index 000000000000..d68550b45fe8
--- /dev/null
+++ b/lib/Archive/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Archive/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Archive
+parent = Libraries
+required_libraries = BitReader Core Support
diff --git a/lib/AsmParser/CMakeLists.txt b/lib/AsmParser/CMakeLists.txt
index 749601510b5b..985ebe200988 100644
--- a/lib/AsmParser/CMakeLists.txt
+++ b/lib/AsmParser/CMakeLists.txt
@@ -4,8 +4,3 @@ add_llvm_library(LLVMAsmParser
   LLParser.cpp
   Parser.cpp
   )
-
-add_llvm_library_dependencies(LLVMAsmParser
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index d0dd98627bab..8818168f643d 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -29,7 +29,7 @@
 using namespace llvm;
 
 bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
-  ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
+  ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
   return true;
 }
 
@@ -55,18 +55,22 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
   return Result;
 }
 
+static char parseHexChar(char C) {
+  if (C >= '0' && C <= '9')
+    return C-'0';
+  if (C >= 'A' && C <= 'F')
+    return C-'A'+10;
+  if (C >= 'a' && C <= 'f')
+    return C-'a'+10;
+  return 0;
+}
+
 uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
   uint64_t Result = 0;
   for (; Buffer != End; ++Buffer) {
     uint64_t OldRes = Result;
     Result *= 16;
-    char C = *Buffer;
-    if (C >= '0' && C <= '9')
-      Result += C-'0';
-    else if (C >= 'A' && C <= 'F')
-      Result += C-'A'+10;
-    else if (C >= 'a' && C <= 'f')
-      Result += C-'a'+10;
+    Result += parseHexChar(*Buffer);
 
     if (Result < OldRes) {   // Uh, oh, overflow detected!!!
       Error("constant bigger than 64 bits detected!");
@@ -82,24 +86,12 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End,
   for (int i=0; i<16; i++, Buffer++) {
     assert(Buffer != End);
     Pair[0] *= 16;
-    char C = *Buffer;
-    if (C >= '0' && C <= '9')
-      Pair[0] += C-'0';
-    else if (C >= 'A' && C <= 'F')
-      Pair[0] += C-'A'+10;
-    else if (C >= 'a' && C <= 'f')
-      Pair[0] += C-'a'+10;
+    Pair[0] += parseHexChar(*Buffer);
   }
   Pair[1] = 0;
   for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
     Pair[1] *= 16;
-    char C = *Buffer;
-    if (C >= '0' && C <= '9')
-      Pair[1] += C-'0';
-    else if (C >= 'A' && C <= 'F')
-      Pair[1] += C-'A'+10;
-    else if (C >= 'a' && C <= 'f')
-      Pair[1] += C-'a'+10;
+    Pair[1] += parseHexChar(*Buffer);
   }
   if (Buffer != End)
     Error("constant bigger than 128 bits detected!");
@@ -113,24 +105,12 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
   for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
     assert(Buffer != End);
     Pair[1] *= 16;
-    char C = *Buffer;
-    if (C >= '0' && C <= '9')
-      Pair[1] += C-'0';
-    else if (C >= 'A' && C <= 'F')
-      Pair[1] += C-'A'+10;
-    else if (C >= 'a' && C <= 'f')
-      Pair[1] += C-'a'+10;
+    Pair[1] += parseHexChar(*Buffer);
   }
   Pair[0] = 0;
   for (int i=0; i<16; i++, Buffer++) {
     Pair[0] *= 16;
-    char C = *Buffer;
-    if (C >= '0' && C <= '9')
-      Pair[0] += C-'0';
-    else if (C >= 'A' && C <= 'F')
-      Pair[0] += C-'A'+10;
-    else if (C >= 'a' && C <= 'f')
-      Pair[0] += C-'a'+10;
+    Pair[0] += parseHexChar(*Buffer);
   }
   if (Buffer != End)
     Error("constant bigger than 128 bits detected!");
@@ -149,9 +129,7 @@ static void UnEscapeLexed(std::string &Str) {
         *BOut++ = '\\'; // Two \ becomes one
         BIn += 2;
       } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
-        char Tmp = BIn[3]; BIn[3] = 0;      // Terminate string
-        *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
-        BIn[3] = Tmp;                       // Restore character
+        *BOut = parseHexChar(BIn[1]) * 16 + parseHexChar(BIn[2]);
         BIn += 3;                           // Skip over handled chars
         ++BOut;
       } else {
@@ -503,6 +481,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(tail);
   KEYWORD(target);
   KEYWORD(triple);
+  KEYWORD(unwind);
   KEYWORD(deplibs);
   KEYWORD(datalayout);
   KEYWORD(volatile);
@@ -570,6 +549,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(noimplicitfloat);
   KEYWORD(naked);
   KEYWORD(nonlazybind);
+  KEYWORD(address_safety);
 
   KEYWORD(type);
   KEYWORD(opaque);
@@ -596,6 +576,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
     TyVal = LLVMTY; return lltok::Type; }
   TYPEKEYWORD("void",      Type::getVoidTy(Context));
+  TYPEKEYWORD("half",      Type::getHalfTy(Context));
   TYPEKEYWORD("float",     Type::getFloatTy(Context));
   TYPEKEYWORD("double",    Type::getDoubleTy(Context));
   TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
@@ -642,7 +623,6 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(indirectbr,  IndirectBr);
   INSTKEYWORD(invoke,      Invoke);
   INSTKEYWORD(resume,      Resume);
-  INSTKEYWORD(unwind,      Unwind);
   INSTKEYWORD(unreachable, Unreachable);
 
   INSTKEYWORD(alloca,      Alloca);
@@ -715,7 +695,7 @@ lltok::Kind LLLexer::Lex0x() {
   if (Kind == 'J') {
     // HexFPConstant - Floating point constant represented in IEEE format as a
     // hexadecimal number for when exponential notation is not precise enough.
-    // Float and double only.
+    // Half, Float, and double only.
     APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
     return lltok::APFloat;
   }
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 33b913572375..09aea5b01825 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -42,7 +42,6 @@ namespace llvm {
     APFloat APFloatVal;
     APSInt  APSIntVal;
 
-    std::string TheError;
   public:
     explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &,
                      LLVMContext &C);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index cafaab01afd9..068be3d47c33 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -120,11 +120,6 @@ bool LLParser::ValidateEndOfModule() {
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
     UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
 
-  // Upgrade to new EH scheme. N.B. This will go away in 3.1.
-  UpgradeExceptionHandling(M);
-
-  // Check debug info intrinsics.
-  CheckDebugInfoIntrinsics(M);
   return false;
 }
 
@@ -879,7 +874,7 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
 /// ParseOptionalAttrs - Parse a potentially empty attribute list.  AttrKind
 /// indicates what kind of attribute list this is: 0: function arg, 1: result,
 /// 2: function attr.
-bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
+bool LLParser::ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind) {
   Attrs = Attribute::None;
   LocTy AttrLoc = Lex.getLoc();
 
@@ -924,6 +919,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
     case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
     case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
     case lltok::kw_nonlazybind:     Attrs |= Attribute::NonLazyBind; break;
+    case lltok::kw_address_safety:  Attrs |= Attribute::AddressSafety; break;
 
     case lltok::kw_alignstack: {
       unsigned Alignment;
@@ -1047,13 +1043,11 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
   case lltok::kw_cc: {
       unsigned ArbitraryCC;
       Lex.Lex();
-      if (ParseUInt32(ArbitraryCC)) {
+      if (ParseUInt32(ArbitraryCC))
         return true;
-      } else
-        CC = static_cast<CallingConv::ID>(ArbitraryCC);
-        return false;
+      CC = static_cast<CallingConv::ID>(ArbitraryCC);
+      return false;
     }
-    break;
   }
 
   Lex.Lex();
@@ -1069,7 +1063,7 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst,
       return TokError("expected metadata after comma");
 
     std::string Name = Lex.getStrVal();
-    unsigned MDK = M->getMDKindID(Name.c_str());
+    unsigned MDK = M->getMDKindID(Name);
     Lex.Lex();
 
     MDNode *Node;
@@ -1358,8 +1352,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
     // Parse the argument.
     LocTy ArgLoc;
     Type *ArgTy = 0;
-    unsigned ArgAttrs1 = Attribute::None;
-    unsigned ArgAttrs2 = Attribute::None;
+    Attributes ArgAttrs1;
+    Attributes ArgAttrs2;
     Value *V;
     if (ParseType(ArgTy, ArgLoc))
       return true;
@@ -1399,7 +1393,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
   } else {
     LocTy TypeLoc = Lex.getLoc();
     Type *ArgTy = 0;
-    unsigned Attrs;
+    Attributes Attrs;
     std::string Name;
 
     if (ParseType(ArgTy) ||
@@ -1466,7 +1460,7 @@ bool LLParser::ParseFunctionType(Type *&Result) {
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     if (!ArgList[i].Name.empty())
       return Error(ArgList[i].Loc, "argument name invalid in function type");
-    if (ArgList[i].Attrs != 0)
+    if (ArgList[i].Attrs)
       return Error(ArgList[i].Loc,
                    "argument attributes invalid in function type");
   }
@@ -1612,7 +1606,8 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
     if ((unsigned)Size != Size)
       return Error(SizeLoc, "size too large for vector");
     if (!VectorType::isValidElementType(EltTy))
-      return Error(TypeLoc, "vector element type must be fp or integer");
+      return Error(TypeLoc,
+       "vector element type must be fp, integer or a pointer to these types");
     Result = VectorType::get(EltTy, unsigned(Size));
   } else {
     if (!ArrayType::isValidElementType(EltTy))
@@ -1971,9 +1966,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return Error(ID.Loc, "constant vector must not be empty");
 
     if (!Elts[0]->getType()->isIntegerTy() &&
-        !Elts[0]->getType()->isFloatingPointTy())
+        !Elts[0]->getType()->isFloatingPointTy() &&
+        !Elts[0]->getType()->isPointerTy())
       return Error(FirstEltLoc,
-                   "vector elements must have integer or floating point type");
+            "vector elements must have integer, pointer or floating point type");
 
     // Verify that all the vector elements have the same type.
     for (unsigned i = 1, e = Elts.size(); i != e; ++i)
@@ -2022,7 +2018,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
   }
   case lltok::kw_c:  // c "foo"
     Lex.Lex();
-    ID.ConstantVal = ConstantArray::get(Context, Lex.getStrVal(), false);
+    ID.ConstantVal = ConstantDataArray::getString(Context, Lex.getStrVal(),
+                                                  false);
     if (ParseToken(lltok::StringConstant, "expected string")) return true;
     ID.Kind = ValID::t_Constant;
     return false;
@@ -2165,7 +2162,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     } else {
       assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
       if (!Val0->getType()->isIntOrIntVectorTy() &&
-          !Val0->getType()->isPointerTy())
+          !Val0->getType()->getScalarType()->isPointerTy())
         return Error(ID.Loc, "icmp requires pointer or integer operands");
       ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
     }
@@ -2299,7 +2296,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
 
     if (Opc == Instruction::GetElementPtr) {
-      if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
+      if (Elts.size() == 0 ||
+          !Elts[0]->getType()->getScalarType()->isPointerTy())
         return Error(ID.Loc, "getelementptr requires pointer operand");
 
       ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
@@ -2440,7 +2438,6 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
     return Error(ID.Loc, "functions are not values, refer to them as pointers");
 
   switch (ID.Kind) {
-  default: llvm_unreachable("Unknown ValID!");
   case ValID::t_LocalID:
     if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
     V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc);
@@ -2485,13 +2482,16 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
         !ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
       return Error(ID.Loc, "floating point constant invalid for type");
 
-    // The lexer has no type info, so builds all float and double FP constants
-    // as double.  Fix this here.  Long double does not need this.
-    if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble &&
-        Ty->isFloatTy()) {
+    // The lexer has no type info, so builds all half, float, and double FP
+    // constants as double.  Fix this here.  Long double does not need this.
+    if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble) {
       bool Ignored;
-      ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
-                            &Ignored);
+      if (Ty->isHalfTy())
+        ID.APFloatVal.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven,
+                              &Ignored);
+      else if (Ty->isFloatTy())
+        ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+                              &Ignored);
     }
     V = ConstantFP::get(Context, ID.APFloatVal);
 
@@ -2549,6 +2549,7 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
       return Error(ID.Loc, "constant expression type mismatch");
     return false;
   }
+  llvm_unreachable("Invalid ValID");
 }
 
 bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
@@ -2585,7 +2586,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   LocTy LinkageLoc = Lex.getLoc();
   unsigned Linkage;
 
-  unsigned Visibility, RetAttrs;
+  unsigned Visibility;
+  Attributes RetAttrs;
   CallingConv::ID CC;
   Type *RetType = 0;
   LocTy RetTypeLoc = Lex.getLoc();
@@ -2649,7 +2651,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   SmallVector<ArgInfo, 8> ArgList;
   bool isVarArg;
-  unsigned FuncAttrs;
+  Attributes FuncAttrs;
   std::string Section;
   unsigned Alignment;
   std::string GC;
@@ -2835,7 +2837,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
     }
 
     switch (ParseInstruction(Inst, BB, PFS)) {
-    default: assert(0 && "Unknown ParseInstruction result!");
+    default: llvm_unreachable("Unknown ParseInstruction result!");
     case InstError: return true;
     case InstNormal:
       BB->getInstList().push_back(Inst);
@@ -2881,7 +2883,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   switch (Token) {
   default:                    return Error(Loc, "expected instruction opcode");
   // Terminator Instructions.
-  case lltok::kw_unwind:      Inst = new UnwindInst(Context); return false;
   case lltok::kw_unreachable: Inst = new UnreachableInst(Context); return false;
   case lltok::kw_ret:         return ParseRet(Inst, BB, PFS);
   case lltok::kw_br:          return ParseBr(Inst, PFS);
@@ -2953,19 +2954,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_tail:           return ParseCall(Inst, PFS, true);
   // Memory.
   case lltok::kw_alloca:         return ParseAlloc(Inst, PFS);
-  case lltok::kw_load:           return ParseLoad(Inst, PFS, false);
-  case lltok::kw_store:          return ParseStore(Inst, PFS, false);
+  case lltok::kw_load:           return ParseLoad(Inst, PFS);
+  case lltok::kw_store:          return ParseStore(Inst, PFS);
   case lltok::kw_cmpxchg:        return ParseCmpXchg(Inst, PFS);
   case lltok::kw_atomicrmw:      return ParseAtomicRMW(Inst, PFS);
   case lltok::kw_fence:          return ParseFence(Inst, PFS);
-  case lltok::kw_volatile:
-    // For compatibility; canonical location is after load
-    if (EatIfPresent(lltok::kw_load))
-      return ParseLoad(Inst, PFS, true);
-    else if (EatIfPresent(lltok::kw_store))
-      return ParseStore(Inst, PFS, true);
-    else
-      return TokError("expected 'load' or 'store'");
   case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
   case lltok::kw_extractvalue:  return ParseExtractValue(Inst, PFS);
   case lltok::kw_insertvalue:   return ParseInsertValue(Inst, PFS);
@@ -3169,7 +3162,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
 ///       OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
 bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   LocTy CallLoc = Lex.getLoc();
-  unsigned RetAttrs, FnAttrs;
+  Attributes RetAttrs, FnAttrs;
   CallingConv::ID CC;
   Type *RetType = 0;
   LocTy RetTypeLoc;
@@ -3342,7 +3335,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
   } else {
     assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
     if (!LHS->getType()->isIntOrIntVectorTy() &&
-        !LHS->getType()->isPointerTy())
+        !LHS->getType()->getScalarType()->isPointerTy())
       return Error(Loc, "icmp requires integer operands");
     Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
   }
@@ -3462,7 +3455,7 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
     return true;
 
   if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2))
-    return Error(Loc, "invalid extractelement operands");
+    return Error(Loc, "invalid shufflevector operands");
 
   Inst = new ShuffleVectorInst(Op0, Op1, Op2);
   return false;
@@ -3568,7 +3561,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
 ///       ParameterList OptionalAttrs
 bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
                          bool isTail) {
-  unsigned RetAttrs, FnAttrs;
+  Attributes RetAttrs, FnAttrs;
   CallingConv::ID CC;
   Type *RetType = 0;
   LocTy RetTypeLoc;
@@ -3689,10 +3682,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
 ///   ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)?
 ///   ::= 'load' 'atomic' 'volatile'? TypeAndValue 
 ///       'singlethread'? AtomicOrdering (',' 'align' i32)?
-///   Compatibility:
-///   ::= 'volatile' 'load' TypeAndValue (',' 'align' i32)?
-int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
-                        bool isVolatile) {
+int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Val; LocTy Loc;
   unsigned Alignment = 0;
   bool AteExtraComma = false;
@@ -3701,15 +3691,12 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
   SynchronizationScope Scope = CrossThread;
 
   if (Lex.getKind() == lltok::kw_atomic) {
-    if (isVolatile)
-      return TokError("mixing atomic with old volatile placement");
     isAtomic = true;
     Lex.Lex();
   }
 
+  bool isVolatile = false;
   if (Lex.getKind() == lltok::kw_volatile) {
-    if (isVolatile)
-      return TokError("duplicate volatile before and after store");
     isVolatile = true;
     Lex.Lex();
   }
@@ -3736,10 +3723,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
 ///   ::= 'store' 'volatile'? TypeAndValue ',' TypeAndValue (',' 'align' i32)?
 ///   ::= 'store' 'atomic' 'volatile'? TypeAndValue ',' TypeAndValue
 ///       'singlethread'? AtomicOrdering (',' 'align' i32)?
-///   Compatibility:
-///   ::= 'volatile' 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
-int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
-                         bool isVolatile) {
+int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Val, *Ptr; LocTy Loc, PtrLoc;
   unsigned Alignment = 0;
   bool AteExtraComma = false;
@@ -3748,15 +3732,12 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
   SynchronizationScope Scope = CrossThread;
 
   if (Lex.getKind() == lltok::kw_atomic) {
-    if (isVolatile)
-      return TokError("mixing atomic with old volatile placement");
     isAtomic = true;
     Lex.Lex();
   }
 
+  bool isVolatile = false;
   if (Lex.getKind() == lltok::kw_volatile) {
-    if (isVolatile)
-      return TokError("duplicate volatile before and after store");
     isVolatile = true;
     Lex.Lex();
   }
@@ -3902,13 +3883,15 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
 /// ParseGetElementPtr
 ///   ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
 int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
-  Value *Ptr, *Val; LocTy Loc, EltLoc;
+  Value *Ptr = 0;
+  Value *Val = 0;
+  LocTy Loc, EltLoc;
 
   bool InBounds = EatIfPresent(lltok::kw_inbounds);
 
   if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
 
-  if (!Ptr->getType()->isPointerTy())
+  if (!Ptr->getType()->getScalarType()->isPointerTy())
     return Error(Loc, "base of getelementptr must be a pointer");
 
   SmallVector<Value*, 16> Indices;
@@ -3919,11 +3902,23 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
       break;
     }
     if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
-    if (!Val->getType()->isIntegerTy())
+    if (!Val->getType()->getScalarType()->isIntegerTy())
       return Error(EltLoc, "getelementptr index must be an integer");
+    if (Val->getType()->isVectorTy() != Ptr->getType()->isVectorTy())
+      return Error(EltLoc, "getelementptr index type missmatch");
+    if (Val->getType()->isVectorTy()) {
+      unsigned ValNumEl = cast<VectorType>(Val->getType())->getNumElements();
+      unsigned PtrNumEl = cast<VectorType>(Ptr->getType())->getNumElements();
+      if (ValNumEl != PtrNumEl)
+        return Error(EltLoc,
+          "getelementptr vector index has a wrong number of elements");
+    }
     Indices.push_back(Val);
   }
 
+  if (Val && Val->getType()->isVectorTy() && Indices.size() != 1)
+    return Error(EltLoc, "vector getelementptrs must have a single index");
+
   if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices))
     return Error(Loc, "invalid getelementptr indices");
   Inst = GetElementPtrInst::Create(Ptr, Indices);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index cbc3c23e8631..dda880838117 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -15,6 +15,7 @@
 #define LLVM_ASMPARSER_LLPARSER_H
 
 #include "LLLexer.h"
+#include "llvm/Attributes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Type.h"
@@ -171,7 +172,7 @@ namespace llvm {
       return ParseUInt32(Val);
     }
     bool ParseOptionalAddrSpace(unsigned &AddrSpace);
-    bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind);
+    bool ParseOptionalAttrs(Attributes &Attrs, unsigned AttrKind);
     bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
     bool ParseOptionalLinkage(unsigned &Linkage) {
       bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
@@ -304,8 +305,8 @@ namespace llvm {
     struct ParamInfo {
       LocTy Loc;
       Value *V;
-      unsigned Attrs;
-      ParamInfo(LocTy loc, Value *v, unsigned attrs)
+      Attributes Attrs;
+      ParamInfo(LocTy loc, Value *v, Attributes attrs)
         : Loc(loc), V(v), Attrs(attrs) {}
     };
     bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
@@ -325,9 +326,9 @@ namespace llvm {
     struct ArgInfo {
       LocTy Loc;
       Type *Ty;
-      unsigned Attrs;
+      Attributes Attrs;
       std::string Name;
-      ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N)
+      ArgInfo(LocTy L, Type *ty, Attributes Attr, const std::string &N)
         : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
     };
     bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
@@ -363,8 +364,8 @@ namespace llvm {
     bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
     bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
     int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
-    int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
-    int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+    int ParseLoad(Instruction *&I, PerFunctionState &PFS);
+    int ParseStore(Instruction *&I, PerFunctionState &PFS);
     int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
     int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
     int ParseFence(Instruction *&I, PerFunctionState &PFS);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 8f167725ed58..adf5d4f4d0f9 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -50,6 +50,7 @@ namespace lltok {
     kw_tail,
     kw_target,
     kw_triple,
+    kw_unwind, 
     kw_deplibs,
     kw_datalayout,
     kw_volatile,
@@ -102,6 +103,7 @@ namespace lltok {
     kw_noimplicitfloat,
     kw_naked,
     kw_nonlazybind,
+    kw_address_safety,
 
     kw_type,
     kw_opaque,
@@ -126,7 +128,7 @@ namespace lltok {
 
     kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter,
 
-    kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind, kw_resume,
+    kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_resume,
     kw_unreachable,
 
     kw_alloca, kw_load, kw_store, kw_fence, kw_cmpxchg, kw_atomicrmw,
diff --git a/lib/AsmParser/LLVMBuild.txt b/lib/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..3bc31ed910a7
--- /dev/null
+++ b/lib/AsmParser/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/AsmParser/LLVMBuild.txt ----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AsmParser
+parent = Libraries
+required_libraries = Core Support
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 59fb471f2b93..21b7fd411e3d 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -44,7 +44,7 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
                                 LLVMContext &Context) {
   OwningPtr<MemoryBuffer> File;
   if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
-    Err = SMDiagnostic(Filename,
+    Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
                        "Could not open input file: " + ec.message());
     return 0;
   }
diff --git a/lib/Bitcode/LLVMBuild.txt b/lib/Bitcode/LLVMBuild.txt
new file mode 100644
index 000000000000..af9936bbe829
--- /dev/null
+++ b/lib/Bitcode/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Bitcode/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = Reader Writer
+
+[component_0]
+type = Group
+name = Bitcode
+parent = Libraries
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 46565f36af16..e3990403bd71 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -22,11 +22,19 @@
 #include "llvm/AutoUpgrade.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataStream.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/OperandTraits.h"
 using namespace llvm;
 
+void BitcodeReader::materializeForwardReferencedFunctions() {
+  while (!BlockAddrFwdRefs.empty()) {
+    Function *F = BlockAddrFwdRefs.begin()->first;
+    F->Materialize();
+  }
+}
+
 void BitcodeReader::FreeState() {
   if (BufferOwned)
     delete Buffer;
@@ -394,7 +402,7 @@ Type *BitcodeReader::getTypeByID(unsigned ID) {
   // The type table size is always specified correctly.
   if (ID >= TypeList.size())
     return 0;
-  
+
   if (Type *Ty = TypeList[ID])
     return Ty;
 
@@ -403,14 +411,6 @@ Type *BitcodeReader::getTypeByID(unsigned ID) {
   return TypeList[ID] = StructType::create(Context);
 }
 
-/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable.
-Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) {
-  if (ID >= TypeList.size())
-    TypeList.resize(ID+1);
-  
-  return TypeList[ID];
-}
-
 
 //===----------------------------------------------------------------------===//
 //  Functions for parsing blocks from the bitcode file
@@ -462,8 +462,8 @@ bool BitcodeReader::ParseAttributeBlock() {
       // If Function attributes are using index 0 then transfer them
       // to index ~0. Index 0 is used for return value attributes but used to be
       // used for function attributes.
-      Attributes RetAttribute = Attribute::None;
-      Attributes FnAttribute = Attribute::None;
+      Attributes RetAttribute;
+      Attributes FnAttribute;
       for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
         // FIXME: remove in LLVM 3.0
         // The alignment is stored as a 16-bit raw value from bits 31--16.
@@ -473,23 +473,24 @@ bool BitcodeReader::ParseAttributeBlock() {
         if (Alignment && !isPowerOf2_32(Alignment))
           return Error("Alignment is not a power of two.");
 
-        Attributes ReconstitutedAttr = Record[i+1] & 0xffff;
+        Attributes ReconstitutedAttr(Record[i+1] & 0xffff);
         if (Alignment)
           ReconstitutedAttr |= Attribute::constructAlignmentFromInt(Alignment);
-        ReconstitutedAttr |= (Record[i+1] & (0xffffull << 32)) >> 11;
-        Record[i+1] = ReconstitutedAttr;
+        ReconstitutedAttr |=
+            Attributes((Record[i+1] & (0xffffull << 32)) >> 11);
 
+        Record[i+1] = ReconstitutedAttr.Raw();
         if (Record[i] == 0)
-          RetAttribute = Record[i+1];
+          RetAttribute = ReconstitutedAttr;
         else if (Record[i] == ~0U)
-          FnAttribute = Record[i+1];
+          FnAttribute = ReconstitutedAttr;
       }
 
-      unsigned OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn|
+      Attributes OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn|
                               Attribute::ReadOnly|Attribute::ReadNone);
 
       if (FnAttribute == Attribute::None && RetAttribute != Attribute::None &&
-          (RetAttribute & OldRetAttrs) != 0) {
+          (RetAttribute & OldRetAttrs)) {
         if (FnAttribute == Attribute::None) { // add a slot so they get added.
           Record.push_back(~0U);
           Record.push_back(0);
@@ -506,8 +507,9 @@ bool BitcodeReader::ParseAttributeBlock() {
         } else if (Record[i] == ~0U) {
           if (FnAttribute != Attribute::None)
             Attrs.push_back(AttributeWithIndex::get(~0U, FnAttribute));
-        } else if (Record[i+1] != Attribute::None)
-          Attrs.push_back(AttributeWithIndex::get(Record[i], Record[i+1]));
+        } else if (Attributes(Record[i+1]) != Attribute::None)
+          Attrs.push_back(AttributeWithIndex::get(Record[i],
+                                                  Attributes(Record[i+1])));
       }
 
       MAttributes.push_back(AttrListPtr::get(Attrs.begin(), Attrs.end()));
@@ -521,7 +523,7 @@ bool BitcodeReader::ParseAttributeBlock() {
 bool BitcodeReader::ParseTypeTable() {
   if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
     return Error("Malformed block record");
-  
+
   return ParseTypeTableBody();
 }
 
@@ -533,7 +535,7 @@ bool BitcodeReader::ParseTypeTableBody() {
   unsigned NumRecords = 0;
 
   SmallString<64> TypeName;
-  
+
   // Read all the records for this type table.
   while (1) {
     unsigned Code = Stream.ReadCode();
@@ -573,6 +575,9 @@ bool BitcodeReader::ParseTypeTableBody() {
     case bitc::TYPE_CODE_VOID:      // VOID
       ResultTy = Type::getVoidTy(Context);
       break;
+    case bitc::TYPE_CODE_HALF:     // HALF
+      ResultTy = Type::getHalfTy(Context);
+      break;
     case bitc::TYPE_CODE_FLOAT:     // FLOAT
       ResultTy = Type::getFloatTy(Context);
       break;
@@ -615,12 +620,12 @@ bool BitcodeReader::ParseTypeTableBody() {
       ResultTy = PointerType::get(ResultTy, AddressSpace);
       break;
     }
-    case bitc::TYPE_CODE_FUNCTION: {
+    case bitc::TYPE_CODE_FUNCTION_OLD: {
       // FIXME: attrid is dead, remove it in LLVM 3.0
       // FUNCTION: [vararg, attrid, retty, paramty x N]
       if (Record.size() < 3)
         return Error("Invalid FUNCTION type record");
-      std::vector<Type*> ArgTys;
+      SmallVector<Type*, 8> ArgTys;
       for (unsigned i = 3, e = Record.size(); i != e; ++i) {
         if (Type *T = getTypeByID(Record[i]))
           ArgTys.push_back(T);
@@ -635,10 +640,29 @@ bool BitcodeReader::ParseTypeTableBody() {
       ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
       break;
     }
+    case bitc::TYPE_CODE_FUNCTION: {
+      // FUNCTION: [vararg, retty, paramty x N]
+      if (Record.size() < 2)
+        return Error("Invalid FUNCTION type record");
+      SmallVector<Type*, 8> ArgTys;
+      for (unsigned i = 2, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          ArgTys.push_back(T);
+        else
+          break;
+      }
+      
+      ResultTy = getTypeByID(Record[1]);
+      if (ResultTy == 0 || ArgTys.size() < Record.size()-2)
+        return Error("invalid type in function type");
+
+      ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+      break;
+    }
     case bitc::TYPE_CODE_STRUCT_ANON: {  // STRUCT: [ispacked, eltty x N]
       if (Record.size() < 1)
         return Error("Invalid STRUCT type record");
-      std::vector<Type*> EltTys;
+      SmallVector<Type*, 8> EltTys;
       for (unsigned i = 1, e = Record.size(); i != e; ++i) {
         if (Type *T = getTypeByID(Record[i]))
           EltTys.push_back(T);
@@ -728,247 +752,6 @@ bool BitcodeReader::ParseTypeTableBody() {
   }
 }
 
-// FIXME: Remove in LLVM 3.1
-bool BitcodeReader::ParseOldTypeTable() {
-  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD))
-    return Error("Malformed block record");
-
-  if (!TypeList.empty())
-    return Error("Multiple TYPE_BLOCKs found!");
-  
-  
-  // While horrible, we have no good ordering of types in the bc file.  Just
-  // iteratively parse types out of the bc file in multiple passes until we get
-  // them all.  Do this by saving a cursor for the start of the type block.
-  BitstreamCursor StartOfTypeBlockCursor(Stream);
-  
-  unsigned NumTypesRead = 0;
-  
-  SmallVector<uint64_t, 64> Record;
-RestartScan:
-  unsigned NextTypeID = 0;
-  bool ReadAnyTypes = false;
-  
-  // Read all the records for this type table.
-  while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (NextTypeID != TypeList.size())
-        return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD");
-      
-      // If we haven't read all of the types yet, iterate again.
-      if (NumTypesRead != TypeList.size()) {
-        // If we didn't successfully read any types in this pass, then we must
-        // have an unhandled forward reference.
-        if (!ReadAnyTypes)
-          return Error("Obsolete bitcode contains unhandled recursive type");
-        
-        Stream = StartOfTypeBlockCursor;
-        goto RestartScan;
-      }
-      
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of type table block");
-      return false;
-    }
-    
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      // No known subblocks, always skip them.
-      Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      continue;
-    }
-    
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
-    }
-    
-    // Read a record.
-    Record.clear();
-    Type *ResultTy = 0;
-    switch (Stream.ReadRecord(Code, Record)) {
-    default: return Error("unknown type in type table");
-    case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
-      // TYPE_CODE_NUMENTRY contains a count of the number of types in the
-      // type list.  This allows us to reserve space.
-      if (Record.size() < 1)
-        return Error("Invalid TYPE_CODE_NUMENTRY record");
-      TypeList.resize(Record[0]);
-      continue;
-    case bitc::TYPE_CODE_VOID:      // VOID
-      ResultTy = Type::getVoidTy(Context);
-      break;
-    case bitc::TYPE_CODE_FLOAT:     // FLOAT
-      ResultTy = Type::getFloatTy(Context);
-      break;
-    case bitc::TYPE_CODE_DOUBLE:    // DOUBLE
-      ResultTy = Type::getDoubleTy(Context);
-      break;
-    case bitc::TYPE_CODE_X86_FP80:  // X86_FP80
-      ResultTy = Type::getX86_FP80Ty(Context);
-      break;
-    case bitc::TYPE_CODE_FP128:     // FP128
-      ResultTy = Type::getFP128Ty(Context);
-      break;
-    case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
-      ResultTy = Type::getPPC_FP128Ty(Context);
-      break;
-    case bitc::TYPE_CODE_LABEL:     // LABEL
-      ResultTy = Type::getLabelTy(Context);
-      break;
-    case bitc::TYPE_CODE_METADATA:  // METADATA
-      ResultTy = Type::getMetadataTy(Context);
-      break;
-    case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
-      ResultTy = Type::getX86_MMXTy(Context);
-      break;
-    case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
-      if (Record.size() < 1)
-        return Error("Invalid Integer type record");
-      ResultTy = IntegerType::get(Context, Record[0]);
-      break;
-    case bitc::TYPE_CODE_OPAQUE:    // OPAQUE
-      if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0)
-        ResultTy = StructType::create(Context);
-      break;
-    case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD
-      if (NextTypeID >= TypeList.size()) break;
-      // If we already read it, don't reprocess.
-      if (TypeList[NextTypeID] &&
-          !cast<StructType>(TypeList[NextTypeID])->isOpaque())
-        break;
-
-      // Set a type.
-      if (TypeList[NextTypeID] == 0)
-        TypeList[NextTypeID] = StructType::create(Context);
-
-      std::vector<Type*> EltTys;
-      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
-        if (Type *Elt = getTypeByIDOrNull(Record[i]))
-          EltTys.push_back(Elt);
-        else
-          break;
-      }
-
-      if (EltTys.size() != Record.size()-1)
-        break;      // Not all elements are ready.
-      
-      cast<StructType>(TypeList[NextTypeID])->setBody(EltTys, Record[0]);
-      ResultTy = TypeList[NextTypeID];
-      TypeList[NextTypeID] = 0;
-      break;
-    }
-    case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
-      //          [pointee type, address space]
-      if (Record.size() < 1)
-        return Error("Invalid POINTER type record");
-      unsigned AddressSpace = 0;
-      if (Record.size() == 2)
-        AddressSpace = Record[1];
-      if ((ResultTy = getTypeByIDOrNull(Record[0])))
-        ResultTy = PointerType::get(ResultTy, AddressSpace);
-      break;
-    }
-    case bitc::TYPE_CODE_FUNCTION: {
-      // FIXME: attrid is dead, remove it in LLVM 3.0
-      // FUNCTION: [vararg, attrid, retty, paramty x N]
-      if (Record.size() < 3)
-        return Error("Invalid FUNCTION type record");
-      std::vector<Type*> ArgTys;
-      for (unsigned i = 3, e = Record.size(); i != e; ++i) {
-        if (Type *Elt = getTypeByIDOrNull(Record[i]))
-          ArgTys.push_back(Elt);
-        else
-          break;
-      }
-      if (ArgTys.size()+3 != Record.size())
-        break;  // Something was null.
-      if ((ResultTy = getTypeByIDOrNull(Record[2])))
-        ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
-      break;
-    }
-    case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
-      if (Record.size() < 2)
-        return Error("Invalid ARRAY type record");
-      if ((ResultTy = getTypeByIDOrNull(Record[1])))
-        ResultTy = ArrayType::get(ResultTy, Record[0]);
-      break;
-    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
-      if (Record.size() < 2)
-        return Error("Invalid VECTOR type record");
-      if ((ResultTy = getTypeByIDOrNull(Record[1])))
-        ResultTy = VectorType::get(ResultTy, Record[0]);
-      break;
-    }
-    
-    if (NextTypeID >= TypeList.size())
-      return Error("invalid TYPE table");
-    
-    if (ResultTy && TypeList[NextTypeID] == 0) {
-      ++NumTypesRead;
-      ReadAnyTypes = true;
-      
-      TypeList[NextTypeID] = ResultTy;
-    }
-    
-    ++NextTypeID;
-  }
-}
-
-
-bool BitcodeReader::ParseOldTypeSymbolTable() {
-  if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD))
-    return Error("Malformed block record");
-
-  SmallVector<uint64_t, 64> Record;
-
-  // Read all the records for this type table.
-  std::string TypeName;
-  while (1) {
-    unsigned Code = Stream.ReadCode();
-    if (Code == bitc::END_BLOCK) {
-      if (Stream.ReadBlockEnd())
-        return Error("Error at end of type symbol table block");
-      return false;
-    }
-
-    if (Code == bitc::ENTER_SUBBLOCK) {
-      // No known subblocks, always skip them.
-      Stream.ReadSubBlockID();
-      if (Stream.SkipBlock())
-        return Error("Malformed block record");
-      continue;
-    }
-
-    if (Code == bitc::DEFINE_ABBREV) {
-      Stream.ReadAbbrevRecord();
-      continue;
-    }
-
-    // Read a record.
-    Record.clear();
-    switch (Stream.ReadRecord(Code, Record)) {
-    default:  // Default behavior: unknown type.
-      break;
-    case bitc::TST_CODE_ENTRY:    // TST_ENTRY: [typeid, namechar x N]
-      if (ConvertToString(Record, 1, TypeName))
-        return Error("Invalid TST_ENTRY record");
-      unsigned TypeID = Record[0];
-      if (TypeID >= TypeList.size())
-        return Error("Invalid Type ID in TST_ENTRY record");
-
-      // Only apply the type name to a struct type with no name.
-      if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID]))
-        if (!STy->isLiteral() && !STy->hasName())
-          STy->setName(TypeName);
-      TypeName.clear();
-      break;
-    }
-  }
-}
-
 bool BitcodeReader::ParseValueSymbolTable() {
   if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
     return Error("Malformed block record");
@@ -1262,7 +1045,9 @@ bool BitcodeReader::ParseConstants() {
     case bitc::CST_CODE_FLOAT: {    // FLOAT: [fpval]
       if (Record.empty())
         return Error("Invalid FLOAT record");
-      if (CurTy->isFloatTy())
+      if (CurTy->isHalfTy())
+        V = ConstantFP::get(Context, APFloat(APInt(16, (uint16_t)Record[0])));
+      else if (CurTy->isFloatTy())
         V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0])));
       else if (CurTy->isDoubleTy())
         V = ConstantFP::get(Context, APFloat(APInt(64, Record[0])));
@@ -1286,7 +1071,7 @@ bool BitcodeReader::ParseConstants() {
         return Error("Invalid CST_AGGREGATE record");
 
       unsigned Size = Record.size();
-      std::vector<Constant*> Elts;
+      SmallVector<Constant*, 16> Elts;
 
       if (StructType *STy = dyn_cast<StructType>(CurTy)) {
         for (unsigned i = 0; i != Size; ++i)
@@ -1308,35 +1093,78 @@ bool BitcodeReader::ParseConstants() {
       }
       break;
     }
-    case bitc::CST_CODE_STRING: { // STRING: [values]
+    case bitc::CST_CODE_STRING:    // STRING: [values]
+    case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
       if (Record.empty())
-        return Error("Invalid CST_AGGREGATE record");
-
-      ArrayType *ATy = cast<ArrayType>(CurTy);
-      Type *EltTy = ATy->getElementType();
+        return Error("Invalid CST_STRING record");
 
       unsigned Size = Record.size();
-      std::vector<Constant*> Elts;
+      SmallString<16> Elts;
       for (unsigned i = 0; i != Size; ++i)
-        Elts.push_back(ConstantInt::get(EltTy, Record[i]));
-      V = ConstantArray::get(ATy, Elts);
+        Elts.push_back(Record[i]);
+      V = ConstantDataArray::getString(Context, Elts,
+                                       BitCode == bitc::CST_CODE_CSTRING);
       break;
     }
-    case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
+    case bitc::CST_CODE_DATA: {// DATA: [n x value]
       if (Record.empty())
-        return Error("Invalid CST_AGGREGATE record");
-
-      ArrayType *ATy = cast<ArrayType>(CurTy);
-      Type *EltTy = ATy->getElementType();
-
+        return Error("Invalid CST_DATA record");
+      
+      Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
       unsigned Size = Record.size();
-      std::vector<Constant*> Elts;
-      for (unsigned i = 0; i != Size; ++i)
-        Elts.push_back(ConstantInt::get(EltTy, Record[i]));
-      Elts.push_back(Constant::getNullValue(EltTy));
-      V = ConstantArray::get(ATy, Elts);
+      
+      if (EltTy->isIntegerTy(8)) {
+        SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end());
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isIntegerTy(16)) {
+        SmallVector<uint16_t, 16> Elts(Record.begin(), Record.end());
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isIntegerTy(32)) {
+        SmallVector<uint32_t, 16> Elts(Record.begin(), Record.end());
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isIntegerTy(64)) {
+        SmallVector<uint64_t, 16> Elts(Record.begin(), Record.end());
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isFloatTy()) {
+        SmallVector<float, 16> Elts;
+        for (unsigned i = 0; i != Size; ++i) {
+          union { uint32_t I; float F; };
+          I = Record[i];
+          Elts.push_back(F);
+        }
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isDoubleTy()) {
+        SmallVector<double, 16> Elts;
+        for (unsigned i = 0; i != Size; ++i) {
+          union { uint64_t I; double F; };
+          I = Record[i];
+          Elts.push_back(F);
+        }
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else {
+        return Error("Unknown element type in CE_DATA");
+      }
       break;
     }
+
     case bitc::CST_CODE_CE_BINOP: {  // CE_BINOP: [opcode, opval, opval]
       if (Record.size() < 3) return Error("Invalid CE_BINOP record");
       int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
@@ -1517,6 +1345,50 @@ bool BitcodeReader::ParseConstants() {
   return false;
 }
 
+bool BitcodeReader::ParseUseLists() {
+  if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+  
+  // Read all the records.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of use-list table block");
+      return false;
+    }
+    
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+    
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+    
+    // Read a use list record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: unknown type.
+      break;
+    case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
+      unsigned RecordLength = Record.size();
+      if (RecordLength < 1)
+        return Error ("Invalid UseList reader!");
+      UseListRecords.push_back(Record);
+      break;
+    }
+    }
+  }
+}
+
 /// RememberAndSkipFunctionBody - When we see the block for a function body,
 /// remember where it is and then skip it.  This lets us lazily deserialize the
 /// functions.
@@ -1538,8 +1410,36 @@ bool BitcodeReader::RememberAndSkipFunctionBody() {
   return false;
 }
 
-bool BitcodeReader::ParseModule() {
-  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+bool BitcodeReader::GlobalCleanup() {
+  // Patch the initializers for globals and aliases up.
+  ResolveGlobalAndAliasInits();
+  if (!GlobalInits.empty() || !AliasInits.empty())
+    return Error("Malformed global initializer set");
+
+  // Look for intrinsic functions which need to be upgraded at some point
+  for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
+       FI != FE; ++FI) {
+    Function *NewFn;
+    if (UpgradeIntrinsicFunction(FI, NewFn))
+      UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+  }
+
+  // Look for global variables which need to be renamed.
+  for (Module::global_iterator
+         GI = TheModule->global_begin(), GE = TheModule->global_end();
+       GI != GE; ++GI)
+    UpgradeGlobalVariable(GI);
+  // Force deallocation of memory for these vectors to favor the client that
+  // want lazy deserialization.
+  std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
+  std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
+  return false;
+}
+
+bool BitcodeReader::ParseModule(bool Resume) {
+  if (Resume)
+    Stream.JumpToBit(NextUnreadBit);
+  else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return Error("Malformed block record");
 
   SmallVector<uint64_t, 64> Record;
@@ -1553,33 +1453,7 @@ bool BitcodeReader::ParseModule() {
       if (Stream.ReadBlockEnd())
         return Error("Error at end of module block");
 
-      // Patch the initializers for globals and aliases up.
-      ResolveGlobalAndAliasInits();
-      if (!GlobalInits.empty() || !AliasInits.empty())
-        return Error("Malformed global initializer set");
-      if (!FunctionsWithBodies.empty())
-        return Error("Too few function bodies found");
-
-      // Look for intrinsic functions which need to be upgraded at some point
-      for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
-           FI != FE; ++FI) {
-        Function* NewFn;
-        if (UpgradeIntrinsicFunction(FI, NewFn))
-          UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
-      }
-
-      // Look for global variables which need to be renamed.
-      for (Module::global_iterator
-             GI = TheModule->global_begin(), GE = TheModule->global_end();
-           GI != GE; ++GI)
-        UpgradeGlobalVariable(GI);
-
-      // Force deallocation of memory for these vectors to favor the client that
-      // want lazy deserialization.
-      std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
-      std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
-      std::vector<Function*>().swap(FunctionsWithBodies);
-      return false;
+      return GlobalCleanup();
     }
 
     if (Code == bitc::ENTER_SUBBLOCK) {
@@ -1600,17 +1474,10 @@ bool BitcodeReader::ParseModule() {
         if (ParseTypeTable())
           return true;
         break;
-      case bitc::TYPE_BLOCK_ID_OLD:
-        if (ParseOldTypeTable())
-          return true;
-        break;
-      case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
-        if (ParseOldTypeSymbolTable())
-          return true;
-        break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
         if (ParseValueSymbolTable())
           return true;
+        SeenValueSymbolTable = true;
         break;
       case bitc::CONSTANTS_BLOCK_ID:
         if (ParseConstants() || ResolveGlobalAndAliasInits())
@@ -1623,13 +1490,29 @@ bool BitcodeReader::ParseModule() {
       case bitc::FUNCTION_BLOCK_ID:
         // If this is the first function body we've seen, reverse the
         // FunctionsWithBodies list.
-        if (!HasReversedFunctionsWithBodies) {
+        if (!SeenFirstFunctionBody) {
           std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
-          HasReversedFunctionsWithBodies = true;
+          if (GlobalCleanup())
+            return true;
+          SeenFirstFunctionBody = true;
         }
 
         if (RememberAndSkipFunctionBody())
           return true;
+        // For streaming bitcode, suspend parsing when we reach the function
+        // bodies. Subsequent materialization calls will resume it when
+        // necessary. For streaming, the function bodies must be at the end of
+        // the bitcode. If the bitcode file is old, the symbol table will be
+        // at the end instead and will not have been seen yet. In this case,
+        // just finish the parse now.
+        if (LazyStreamer && SeenValueSymbolTable) {
+          NextUnreadBit = Stream.GetCurrentBitNo();
+          return false;
+        }
+        break;
+      case bitc::USELIST_BLOCK_ID:
+        if (ParseUseLists())
+          return true;
         break;
       }
       continue;
@@ -1784,8 +1667,10 @@ bool BitcodeReader::ParseModule() {
 
       // If this is a function with a body, remember the prototype we are
       // creating now, so that we can match up the body with them later.
-      if (!isProto)
+      if (!isProto) {
         FunctionsWithBodies.push_back(Func);
+        if (LazyStreamer) DeferredFunctionInfo[Func] = 0;
+      }
       break;
     }
     // ALIAS: [alias type, aliasee val#, linkage]
@@ -1824,24 +1709,7 @@ bool BitcodeReader::ParseModule() {
 bool BitcodeReader::ParseBitcodeInto(Module *M) {
   TheModule = 0;
 
-  unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
-  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
-
-  if (Buffer->getBufferSize() & 3) {
-    if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
-      return Error("Invalid bitcode signature");
-    else
-      return Error("Bitcode stream should be a multiple of 4 bytes in length");
-  }
-
-  // If we have a wrapper header, parse it and ignore the non-bc file contents.
-  // The magic number is 0x0B17C0DE stored in little endian.
-  if (isBitcodeWrapper(BufPtr, BufEnd))
-    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
-      return Error("Invalid bitcode wrapper header");
-
-  StreamFile.init(BufPtr, BufEnd);
-  Stream.init(StreamFile);
+  if (InitStream()) return true;
 
   // Sniff for the signature.
   if (Stream.Read(8) != 'B' ||
@@ -1883,8 +1751,9 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) {
       if (TheModule)
         return Error("Multiple MODULE_BLOCKs in same stream");
       TheModule = M;
-      if (ParseModule())
+      if (ParseModule(false))
         return true;
+      if (LazyStreamer) return false;
       break;
     default:
       if (Stream.SkipBlock())
@@ -1952,20 +1821,7 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
 }
 
 bool BitcodeReader::ParseTriple(std::string &Triple) {
-  if (Buffer->getBufferSize() & 3)
-    return Error("Bitcode stream should be a multiple of 4 bytes in length");
-
-  unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
-  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
-
-  // If we have a wrapper header, parse it and ignore the non-bc file contents.
-  // The magic number is 0x0B17C0DE stored in little endian.
-  if (isBitcodeWrapper(BufPtr, BufEnd))
-    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
-      return Error("Invalid bitcode wrapper header");
-
-  StreamFile.init(BufPtr, BufEnd);
-  Stream.init(StreamFile);
+  if (InitStream()) return true;
 
   // Sniff for the signature.
   if (Stream.Read(8) != 'B' ||
@@ -2517,10 +2373,6 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
-    case bitc::FUNC_CODE_INST_UNWIND: // UNWIND
-      I = new UnwindInst(Context);
-      InstructionList.push_back(I);
-      break;
     case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
       I = new UnreachableInst(Context);
       InstructionList.push_back(I);
@@ -2845,6 +2697,19 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
   return false;
 }
 
+/// FindFunctionInStream - Find the function body in the bitcode stream
+bool BitcodeReader::FindFunctionInStream(Function *F,
+       DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator) {
+  while (DeferredFunctionInfoIterator->second == 0) {
+    if (Stream.AtEndOfStream())
+      return Error("Could not find Function in stream");
+    // ParseModule will parse the next body in the stream and set its
+    // position in the DeferredFunctionInfo map.
+    if (ParseModule(true)) return true;
+  }
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // GVMaterializer implementation
 //===----------------------------------------------------------------------===//
@@ -2865,6 +2730,10 @@ bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) {
 
   DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
   assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
+  // If its position is recorded as 0, its body is somewhere in the stream
+  // but we haven't seen it yet.
+  if (DFII->second == 0)
+    if (LazyStreamer && FindFunctionInStream(F, DFII)) return true;
 
   // Move the bit stream to the saved position of the deferred function body.
   Stream.JumpToBit(DFII->second);
@@ -2920,6 +2789,12 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
         Materialize(F, ErrInfo))
       return true;
 
+  // At this point, if there are any function bodies, the current bit is
+  // pointing to the END_BLOCK record after them. Now make sure the rest
+  // of the bits in the module have been read.
+  if (NextUnreadBit)
+    ParseModule(true);
+
   // Upgrade any intrinsic calls that slipped through (should not happen!) and
   // delete the old functions to clean up. We can't do this unless the entire
   // module is materialized because there could always be another function body
@@ -2939,15 +2814,60 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
   }
   std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
 
-  // Upgrade to new EH scheme. N.B. This will go away in 3.1.
-  UpgradeExceptionHandling(M);
+  return false;
+}
+
+bool BitcodeReader::InitStream() {
+  if (LazyStreamer) return InitLazyStream();
+  return InitStreamFromBuffer();
+}
+
+bool BitcodeReader::InitStreamFromBuffer() {
+  const unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+  const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+  if (Buffer->getBufferSize() & 3) {
+    if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
+      return Error("Invalid bitcode signature");
+    else
+      return Error("Bitcode stream should be a multiple of 4 bytes in length");
+  }
+
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, BufEnd))
+    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
+      return Error("Invalid bitcode wrapper header");
 
-  // Check debug info intrinsics.
-  CheckDebugInfoIntrinsics(TheModule);
+  StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
+  Stream.init(*StreamFile);
 
   return false;
 }
 
+bool BitcodeReader::InitLazyStream() {
+  // Check and strip off the bitcode wrapper; BitstreamReader expects never to
+  // see it.
+  StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer);
+  StreamFile.reset(new BitstreamReader(Bytes));
+  Stream.init(*StreamFile);
+
+  unsigned char buf[16];
+  if (Bytes->readBytes(0, 16, buf, NULL) == -1)
+    return Error("Bitcode stream must be at least 16 bytes in length");
+
+  if (!isBitcode(buf, buf + 16))
+    return Error("Invalid bitcode signature");
+
+  if (isBitcodeWrapper(buf, buf + 4)) {
+    const unsigned char *bitcodeStart = buf;
+    const unsigned char *bitcodeEnd = buf + 16;
+    SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false);
+    Bytes->dropLeadingBytes(bitcodeStart - buf);
+    Bytes->setKnownObjectSize(bitcodeEnd - bitcodeStart);
+  }
+  return false;
+}
 
 //===----------------------------------------------------------------------===//
 // External interface
@@ -2970,6 +2890,27 @@ Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
   }
   // Have the BitcodeReader dtor delete 'Buffer'.
   R->setBufferOwned(true);
+
+  R->materializeForwardReferencedFunctions();
+
+  return M;
+}
+
+
+Module *llvm::getStreamedBitcodeModule(const std::string &name,
+                                       DataStreamer *streamer,
+                                       LLVMContext &Context,
+                                       std::string *ErrMsg) {
+  Module *M = new Module(name, Context);
+  BitcodeReader *R = new BitcodeReader(streamer, Context);
+  M->setMaterializer(R);
+  if (R->ParseBitcodeInto(M)) {
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+    delete M;  // Also deletes R.
+    return 0;
+  }
+  R->setBufferOwned(false); // no buffer to delete
   return M;
 }
 
@@ -2990,6 +2931,9 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
     return 0;
   }
 
+  // TODO: Restore the use-lists to the in-memory state when the bitcode was
+  // written.  We must defer until the Module has been fully materialized.
+
   return M;
 }
 
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 6e6118cac0dc..e7c4e94f785f 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -126,8 +126,11 @@ class BitcodeReader : public GVMaterializer {
   Module *TheModule;
   MemoryBuffer *Buffer;
   bool BufferOwned;
-  BitstreamReader StreamFile;
+  OwningPtr<BitstreamReader> StreamFile;
   BitstreamCursor Stream;
+  DataStreamer *LazyStreamer;
+  uint64_t NextUnreadBit;
+  bool SeenValueSymbolTable;
   
   const char *ErrorString;
   
@@ -135,6 +138,7 @@ class BitcodeReader : public GVMaterializer {
   BitcodeReaderValueList ValueList;
   BitcodeReaderMDValueList MDValueList;
   SmallVector<Instruction *, 64> InstructionList;
+  SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
 
   std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
   std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
@@ -160,9 +164,10 @@ class BitcodeReader : public GVMaterializer {
   // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
   DenseMap<unsigned, unsigned> MDKindMap;
   
-  // After the module header has been read, the FunctionsWithBodies list is 
-  // reversed.  This keeps track of whether we've done this yet.
-  bool HasReversedFunctionsWithBodies;
+  // Several operations happen after the module header has been read, but
+  // before function bodies are processed. This keeps track of whether
+  // we've done this yet.
+  bool SeenFirstFunctionBody;
   
   /// DeferredFunctionInfo - When function bodies are initially scanned, this
   /// map contains info about where to find deferred function body in the
@@ -177,13 +182,22 @@ class BitcodeReader : public GVMaterializer {
 public:
   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
     : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
-      ErrorString(0), ValueList(C), MDValueList(C) {
-    HasReversedFunctionsWithBodies = false;
+      LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false),
+      ErrorString(0), ValueList(C), MDValueList(C),
+      SeenFirstFunctionBody(false) {
+  }
+  explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
+    : Context(C), TheModule(0), Buffer(0), BufferOwned(false),
+      LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
+      ErrorString(0), ValueList(C), MDValueList(C),
+      SeenFirstFunctionBody(false) {
   }
   ~BitcodeReader() {
     FreeState();
   }
-  
+
+  void materializeForwardReferencedFunctions();
+
   void FreeState();
   
   /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
@@ -211,7 +225,6 @@ public:
   bool ParseTriple(std::string &Triple);
 private:
   Type *getTypeByID(unsigned ID);
-  Type *getTypeByIDOrNull(unsigned ID);
   Value *getFnValueByID(unsigned ID, Type *Ty) {
     if (Ty && Ty->isMetadataTy())
       return MDValueList.getValueFwdRef(ID);
@@ -256,21 +269,26 @@ private:
   }
 
   
-  bool ParseModule();
+  bool ParseModule(bool Resume);
   bool ParseAttributeBlock();
   bool ParseTypeTable();
-  bool ParseOldTypeTable();         // FIXME: Remove in LLVM 3.1
   bool ParseTypeTableBody();
 
-  bool ParseOldTypeSymbolTable();   // FIXME: Remove in LLVM 3.1
   bool ParseValueSymbolTable();
   bool ParseConstants();
   bool RememberAndSkipFunctionBody();
   bool ParseFunctionBody(Function *F);
+  bool GlobalCleanup();
   bool ResolveGlobalAndAliasInits();
   bool ParseMetadata();
   bool ParseMetadataAttachment();
   bool ParseModuleTriple(std::string &Triple);
+  bool ParseUseLists();
+  bool InitStream();
+  bool InitStreamFromBuffer();
+  bool InitLazyStream();
+  bool FindFunctionInStream(Function *F,
+         DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator);
 };
   
 } // End llvm namespace
diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt
index 37bebc449635..693d4310b834 100644
--- a/lib/Bitcode/Reader/CMakeLists.txt
+++ b/lib/Bitcode/Reader/CMakeLists.txt
@@ -2,8 +2,3 @@ add_llvm_library(LLVMBitReader
   BitReader.cpp
   BitcodeReader.cpp
   )
-
-add_llvm_library_dependencies(LLVMBitReader
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/Bitcode/Reader/LLVMBuild.txt b/lib/Bitcode/Reader/LLVMBuild.txt
new file mode 100644
index 000000000000..c85a87bfebc8
--- /dev/null
+++ b/lib/Bitcode/Reader/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Bitcode/Reader/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = BitReader
+parent = Bitcode
+required_libraries = Core Support
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 5b3d96953a0e..b25d2e96d594 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Operator.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -31,6 +32,12 @@
 #include <map>
 using namespace llvm;
 
+static cl::opt<bool>
+EnablePreserveUseListOrdering("enable-bc-uselist-preserve",
+                              cl::desc("Turn on experimental support for "
+                                       "use-list order preservation."),
+                              cl::init(false), cl::Hidden);
+
 /// These are manifest constants used by the bitcode writer. They do not need to
 /// be kept in sync with the reader, but need to be consistent within this file.
 enum {
@@ -119,7 +126,6 @@ static unsigned GetEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
 
 static unsigned GetEncodedOrdering(AtomicOrdering Ordering) {
   switch (Ordering) {
-  default: llvm_unreachable("Unknown atomic ordering");
   case NotAtomic: return bitc::ORDERING_NOTATOMIC;
   case Unordered: return bitc::ORDERING_UNORDERED;
   case Monotonic: return bitc::ORDERING_MONOTONIC;
@@ -128,14 +134,15 @@ static unsigned GetEncodedOrdering(AtomicOrdering Ordering) {
   case AcquireRelease: return bitc::ORDERING_ACQREL;
   case SequentiallyConsistent: return bitc::ORDERING_SEQCST;
   }
+  llvm_unreachable("Invalid ordering");
 }
 
 static unsigned GetEncodedSynchScope(SynchronizationScope SynchScope) {
   switch (SynchScope) {
-  default: llvm_unreachable("Unknown synchronization scope");
   case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD;
   case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD;
   }
+  llvm_unreachable("Invalid synch scope");
 }
 
 static void WriteStringRecord(unsigned Code, StringRef Str,
@@ -172,10 +179,11 @@ static void WriteAttributeTable(const ValueEnumerator &VE,
       // Store the alignment in the bitcode as a 16-bit raw value instead of a
       // 5-bit log2 encoded value. Shift the bits above the alignment up by
       // 11 bits.
-      uint64_t FauxAttr = PAWI.Attrs & 0xffff;
+      uint64_t FauxAttr = PAWI.Attrs.Raw() & 0xffff;
       if (PAWI.Attrs & Attribute::Alignment)
-        FauxAttr |= (1ull<<16)<<(((PAWI.Attrs & Attribute::Alignment)-1) >> 16);
-      FauxAttr |= (PAWI.Attrs & (0x3FFull << 21)) << 11;
+        FauxAttr |= (1ull<<16)<<
+            (((PAWI.Attrs & Attribute::Alignment).Raw()-1) >> 16);
+      FauxAttr |= (PAWI.Attrs.Raw() & (0x3FFull << 21)) << 11;
 
       Record.push_back(FauxAttr);
     }
@@ -194,11 +202,12 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
   SmallVector<uint64_t, 64> TypeVals;
 
+  uint64_t NumBits = Log2_32_Ceil(VE.getTypes().size()+1);
+
   // Abbrev for TYPE_CODE_POINTER.
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
   Abbv->Add(BitCodeAbbrevOp(0));  // Addrspace = 0
   unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
 
@@ -206,10 +215,9 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // isvararg
-  Abbv->Add(BitCodeAbbrevOp(0));  // FIXME: DEAD value, remove in LLVM 3.0
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
   unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
 
   // Abbrev for TYPE_CODE_STRUCT_ANON.
@@ -217,8 +225,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
   unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
 
   // Abbrev for TYPE_CODE_STRUCT_NAME.
@@ -233,16 +241,16 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
   unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
   
   // Abbrev for TYPE_CODE_ARRAY.
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // size
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
+
   unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
 
   // Emit an entry count so the reader can reserve space.
@@ -259,6 +267,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     switch (T->getTypeID()) {
     default: llvm_unreachable("Unknown type!");
     case Type::VoidTyID:      Code = bitc::TYPE_CODE_VOID;   break;
+    case Type::HalfTyID:      Code = bitc::TYPE_CODE_HALF;   break;
     case Type::FloatTyID:     Code = bitc::TYPE_CODE_FLOAT;  break;
     case Type::DoubleTyID:    Code = bitc::TYPE_CODE_DOUBLE; break;
     case Type::X86_FP80TyID:  Code = bitc::TYPE_CODE_X86_FP80; break;
@@ -284,10 +293,9 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     }
     case Type::FunctionTyID: {
       FunctionType *FT = cast<FunctionType>(T);
-      // FUNCTION: [isvararg, attrid, retty, paramty x N]
+      // FUNCTION: [isvararg, retty, paramty x N]
       Code = bitc::TYPE_CODE_FUNCTION;
       TypeVals.push_back(FT->isVarArg());
-      TypeVals.push_back(0);  // FIXME: DEAD: remove in llvm 3.0
       TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
       for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
         TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
@@ -350,7 +358,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
 
 static unsigned getEncodedLinkage(const GlobalValue *GV) {
   switch (GV->getLinkage()) {
-  default: llvm_unreachable("Invalid linkage!");
   case GlobalValue::ExternalLinkage:                 return 0;
   case GlobalValue::WeakAnyLinkage:                  return 1;
   case GlobalValue::AppendingLinkage:                return 2;
@@ -368,15 +375,16 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) {
   case GlobalValue::LinkerPrivateWeakLinkage:        return 14;
   case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15;
   }
+  llvm_unreachable("Invalid linkage");
 }
 
 static unsigned getEncodedVisibility(const GlobalValue *GV) {
   switch (GV->getVisibility()) {
-  default: llvm_unreachable("Invalid visibility!");
   case GlobalValue::DefaultVisibility:   return 0;
   case GlobalValue::HiddenVisibility:    return 1;
   case GlobalValue::ProtectedVisibility: return 2;
   }
+  llvm_unreachable("Invalid visibility");
 }
 
 // Emit top-level description of module, including target triple, inline asm,
@@ -499,8 +507,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
 
   // Emit the function proto information.
   for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
-    // FUNCTION:  [type, callingconv, isproto, paramattr,
-    //             linkage, alignment, section, visibility, gc, unnamed_addr]
+    // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
+    //             section, visibility, gc, unnamed_addr]
     Vals.push_back(VE.getTypeID(F->getType()));
     Vals.push_back(F->getCallingConv());
     Vals.push_back(F->isDeclaration());
@@ -520,6 +528,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
   // Emit the alias information.
   for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
        AI != E; ++AI) {
+    // ALIAS: [alias type, aliasee val#, linkage, visibility]
     Vals.push_back(VE.getTypeID(AI->getType()));
     Vals.push_back(VE.getValueID(AI->getAliasee()));
     Vals.push_back(getEncodedLinkage(AI));
@@ -819,7 +828,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
     } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
       Code = bitc::CST_CODE_FLOAT;
       Type *Ty = CFP->getType();
-      if (Ty->isFloatTy() || Ty->isDoubleTy()) {
+      if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) {
         Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
       } else if (Ty->isX86_FP80Ty()) {
         // api needed to prevent premature destruction
@@ -836,34 +845,56 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
       } else {
         assert (0 && "Unknown FP type!");
       }
-    } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
-      const ConstantArray *CA = cast<ConstantArray>(C);
+    } else if (isa<ConstantDataSequential>(C) &&
+               cast<ConstantDataSequential>(C)->isString()) {
+      const ConstantDataSequential *Str = cast<ConstantDataSequential>(C);
       // Emit constant strings specially.
-      unsigned NumOps = CA->getNumOperands();
+      unsigned NumElts = Str->getNumElements();
       // If this is a null-terminated string, use the denser CSTRING encoding.
-      if (CA->getOperand(NumOps-1)->isNullValue()) {
+      if (Str->isCString()) {
         Code = bitc::CST_CODE_CSTRING;
-        --NumOps;  // Don't encode the null, which isn't allowed by char6.
+        --NumElts;  // Don't encode the null, which isn't allowed by char6.
       } else {
         Code = bitc::CST_CODE_STRING;
         AbbrevToUse = String8Abbrev;
       }
       bool isCStr7 = Code == bitc::CST_CODE_CSTRING;
       bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING;
-      for (unsigned i = 0; i != NumOps; ++i) {
-        unsigned char V = cast<ConstantInt>(CA->getOperand(i))->getZExtValue();
+      for (unsigned i = 0; i != NumElts; ++i) {
+        unsigned char V = Str->getElementAsInteger(i);
         Record.push_back(V);
         isCStr7 &= (V & 128) == 0;
         if (isCStrChar6)
           isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
       }
-
+      
       if (isCStrChar6)
         AbbrevToUse = CString6Abbrev;
       else if (isCStr7)
         AbbrevToUse = CString7Abbrev;
-    } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
-               isa<ConstantVector>(V)) {
+    } else if (const ConstantDataSequential *CDS = 
+                  dyn_cast<ConstantDataSequential>(C)) {
+      Code = bitc::CST_CODE_DATA;
+      Type *EltTy = CDS->getType()->getElementType();
+      if (isa<IntegerType>(EltTy)) {
+        for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
+          Record.push_back(CDS->getElementAsInteger(i));
+      } else if (EltTy->isFloatTy()) {
+        for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+          union { float F; uint32_t I; };
+          F = CDS->getElementAsFloat(i);
+          Record.push_back(I);
+        }
+      } else {
+        assert(EltTy->isDoubleTy() && "Unknown ConstantData element type");
+        for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+          union { double F; uint64_t I; };
+          F = CDS->getElementAsDouble(i);
+          Record.push_back(I);
+        }
+      }
+    } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+               isa<ConstantVector>(C)) {
       Code = bitc::CST_CODE_AGGREGATE;
       for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
         Record.push_back(VE.getValueID(C->getOperand(i)));
@@ -1105,10 +1136,18 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     }
     break;
   case Instruction::Switch:
-    Code = bitc::FUNC_CODE_INST_SWITCH;
-    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
-    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    {
+      Code = bitc::FUNC_CODE_INST_SWITCH;
+      SwitchInst &SI = cast<SwitchInst>(I);
+      Vals.push_back(VE.getTypeID(SI.getCondition()->getType()));
+      Vals.push_back(VE.getValueID(SI.getCondition()));
+      Vals.push_back(VE.getValueID(SI.getDefaultDest()));
+      for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+           i != e; ++i) {
+        Vals.push_back(VE.getValueID(i.getCaseValue()));
+        Vals.push_back(VE.getValueID(i.getCaseSuccessor()));
+      }
+    }
     break;
   case Instruction::IndirectBr:
     Code = bitc::FUNC_CODE_INST_INDIRECTBR;
@@ -1146,9 +1185,6 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     Code = bitc::FUNC_CODE_INST_RESUME;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
     break;
-  case Instruction::Unwind:
-    Code = bitc::FUNC_CODE_INST_UNWIND;
-    break;
   case Instruction::Unreachable:
     Code = bitc::FUNC_CODE_INST_UNREACHABLE;
     AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
@@ -1573,6 +1609,102 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Stream.ExitBlock();
 }
 
+// Sort the Users based on the order in which the reader parses the bitcode 
+// file.
+static bool bitcodereader_order(const User *lhs, const User *rhs) {
+  // TODO: Implement.
+  return true;
+}
+
+static void WriteUseList(const Value *V, const ValueEnumerator &VE,
+                         BitstreamWriter &Stream) {
+
+  // One or zero uses can't get out of order.
+  if (V->use_empty() || V->hasNUses(1))
+    return;
+
+  // Make a copy of the in-memory use-list for sorting.
+  unsigned UseListSize = std::distance(V->use_begin(), V->use_end());
+  SmallVector<const User*, 8> UseList;
+  UseList.reserve(UseListSize);
+  for (Value::const_use_iterator I = V->use_begin(), E = V->use_end();
+       I != E; ++I) {
+    const User *U = *I;
+    UseList.push_back(U);
+  }
+
+  // Sort the copy based on the order read by the BitcodeReader.
+  std::sort(UseList.begin(), UseList.end(), bitcodereader_order);
+
+  // TODO: Generate a diff between the BitcodeWriter in-memory use-list and the
+  // sorted list (i.e., the expected BitcodeReader in-memory use-list).
+
+  // TODO: Emit the USELIST_CODE_ENTRYs.
+}
+
+static void WriteFunctionUseList(const Function *F, ValueEnumerator &VE,
+                                 BitstreamWriter &Stream) {
+  VE.incorporateFunction(*F);
+
+  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+       AI != AE; ++AI)
+    WriteUseList(AI, VE, Stream);
+  for (Function::const_iterator BB = F->begin(), FE = F->end(); BB != FE;
+       ++BB) {
+    WriteUseList(BB, VE, Stream);
+    for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); II != IE;
+         ++II) {
+      WriteUseList(II, VE, Stream);
+      for (User::const_op_iterator OI = II->op_begin(), E = II->op_end();
+           OI != E; ++OI) {
+        if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+            isa<InlineAsm>(*OI))
+          WriteUseList(*OI, VE, Stream);
+      }
+    }
+  }
+  VE.purgeFunction();
+}
+
+// Emit use-lists.
+static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE,
+                                BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3);
+
+  // XXX: this modifies the module, but in a way that should never change the
+  // behavior of any pass or codegen in LLVM. The problem is that GVs may
+  // contain entries in the use_list that do not exist in the Module and are
+  // not stored in the .bc file.
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    I->removeDeadConstantUsers();
+  
+  // Write the global variables.
+  for (Module::const_global_iterator GI = M->global_begin(), 
+         GE = M->global_end(); GI != GE; ++GI) {
+    WriteUseList(GI, VE, Stream);
+
+    // Write the global variable initializers.
+    if (GI->hasInitializer())
+      WriteUseList(GI->getInitializer(), VE, Stream);
+  }
+
+  // Write the functions.
+  for (Module::const_iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) {
+    WriteUseList(FI, VE, Stream);
+    if (!FI->isDeclaration())
+      WriteFunctionUseList(FI, VE, Stream);
+  }
+
+  // Write the aliases.
+  for (Module::const_alias_iterator AI = M->alias_begin(), AE = M->alias_end();
+       AI != AE; ++AI) {
+    WriteUseList(AI, VE, Stream);
+    WriteUseList(AI->getAliasee(), VE, Stream);
+  }
+
+  Stream.ExitBlock();
+}
 
 /// WriteModule - Emit the specified module to the bitstream.
 static void WriteModule(const Module *M, BitstreamWriter &Stream) {
@@ -1607,17 +1739,21 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   // Emit metadata.
   WriteModuleMetadata(M, VE, Stream);
 
-  // Emit function bodies.
-  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
-    if (!F->isDeclaration())
-      WriteFunction(*F, VE, Stream);
-
   // Emit metadata.
   WriteModuleMetadataStore(M, Stream);
 
   // Emit names for globals/functions etc.
   WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
 
+  // Emit use-lists.
+  if (EnablePreserveUseListOrdering)
+    WriteModuleUseLists(M, VE, Stream);
+
+  // Emit function bodies.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
+    if (!F->isDeclaration())
+      WriteFunction(*F, VE, Stream);
+
   Stream.ExitBlock();
 }
 
@@ -1639,7 +1775,17 @@ enum {
   DarwinBCHeaderSize = 5*4
 };
 
-static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) {
+static void WriteInt32ToBuffer(uint32_t Value, SmallVectorImpl<char> &Buffer,
+                               uint32_t &Position) {
+  Buffer[Position + 0] = (unsigned char) (Value >>  0);
+  Buffer[Position + 1] = (unsigned char) (Value >>  8);
+  Buffer[Position + 2] = (unsigned char) (Value >> 16);
+  Buffer[Position + 3] = (unsigned char) (Value >> 24);
+  Position += 4;
+}
+
+static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
+                                         const Triple &TT) {
   unsigned CPUType = ~0U;
 
   // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
@@ -1666,63 +1812,55 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) {
     CPUType = DARWIN_CPU_TYPE_ARM;
 
   // Traditional Bitcode starts after header.
+  assert(Buffer.size() >= DarwinBCHeaderSize &&
+         "Expected header size to be reserved");
   unsigned BCOffset = DarwinBCHeaderSize;
+  unsigned BCSize = Buffer.size()-DarwinBCHeaderSize;
 
-  Stream.Emit(0x0B17C0DE, 32);
-  Stream.Emit(0         , 32);  // Version.
-  Stream.Emit(BCOffset  , 32);
-  Stream.Emit(0         , 32);  // Filled in later.
-  Stream.Emit(CPUType   , 32);
-}
-
-/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and
-/// finalize the header.
-static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
-  // Update the size field in the header.
-  Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
+  // Write the magic and version.
+  unsigned Position = 0;
+  WriteInt32ToBuffer(0x0B17C0DE , Buffer, Position);
+  WriteInt32ToBuffer(0          , Buffer, Position); // Version.
+  WriteInt32ToBuffer(BCOffset   , Buffer, Position);
+  WriteInt32ToBuffer(BCSize     , Buffer, Position);
+  WriteInt32ToBuffer(CPUType    , Buffer, Position);
 
   // If the file is not a multiple of 16 bytes, insert dummy padding.
-  while (BufferSize & 15) {
-    Stream.Emit(0, 8);
-    ++BufferSize;
-  }
+  while (Buffer.size() & 15)
+    Buffer.push_back(0);
 }
 
-
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
 void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
-  std::vector<unsigned char> Buffer;
-  BitstreamWriter Stream(Buffer);
-
+  SmallVector<char, 1024> Buffer;
   Buffer.reserve(256*1024);
 
-  WriteBitcodeToStream( M, Stream );
-
-  // Write the generated bitstream to "Out".
-  Out.write((char*)&Buffer.front(), Buffer.size());
-}
-
-/// WriteBitcodeToStream - Write the specified module to the specified output
-/// stream.
-void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
-  // If this is darwin or another generic macho target, emit a file header and
-  // trailer if needed.
+  // If this is darwin or another generic macho target, reserve space for the
+  // header.
   Triple TT(M->getTargetTriple());
   if (TT.isOSDarwin())
-    EmitDarwinBCHeader(Stream, TT);
-
-  // Emit the file header.
-  Stream.Emit((unsigned)'B', 8);
-  Stream.Emit((unsigned)'C', 8);
-  Stream.Emit(0x0, 4);
-  Stream.Emit(0xC, 4);
-  Stream.Emit(0xE, 4);
-  Stream.Emit(0xD, 4);
-
-  // Emit the module.
-  WriteModule(M, Stream);
+    Buffer.insert(Buffer.begin(), DarwinBCHeaderSize, 0);
+
+  // Emit the module into the buffer.
+  {
+    BitstreamWriter Stream(Buffer);
+
+    // Emit the file header.
+    Stream.Emit((unsigned)'B', 8);
+    Stream.Emit((unsigned)'C', 8);
+    Stream.Emit(0x0, 4);
+    Stream.Emit(0xC, 4);
+    Stream.Emit(0xE, 4);
+    Stream.Emit(0xD, 4);
+
+    // Emit the module.
+    WriteModule(M, Stream);
+  }
 
   if (TT.isOSDarwin())
-    EmitDarwinBCTrailer(Stream, Stream.getBuffer().size());
+    EmitDarwinBCHeaderAndTrailer(Buffer, TT);
+
+  // Write the generated bitstream to "Out".
+  Out.write((char*)&Buffer.front(), Buffer.size());
 }
diff --git a/lib/Bitcode/Writer/CMakeLists.txt b/lib/Bitcode/Writer/CMakeLists.txt
index 3cf905697a42..f097b097c337 100644
--- a/lib/Bitcode/Writer/CMakeLists.txt
+++ b/lib/Bitcode/Writer/CMakeLists.txt
@@ -4,8 +4,3 @@ add_llvm_library(LLVMBitWriter
   BitcodeWriterPass.cpp
   ValueEnumerator.cpp
   )
-
-add_llvm_library_dependencies(LLVMBitWriter
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/Bitcode/Writer/LLVMBuild.txt b/lib/Bitcode/Writer/LLVMBuild.txt
new file mode 100644
index 000000000000..7d9e1de771b9
--- /dev/null
+++ b/lib/Bitcode/Writer/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Bitcode/Writer/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = BitWriter
+parent = Bitcode
+required_libraries = Core Support
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 9ae9905b9f1d..1ed9004eb5a1 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -19,6 +19,8 @@
 #include "llvm/Module.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -107,7 +109,6 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
   OptimizeConstants(FirstConstant, Values.size());
 }
 
-
 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
   InstructionMapType::const_iterator I = InstructionMap.find(Inst);
   assert(I != InstructionMap.end() && "Instruction is not mapped!");
@@ -130,6 +131,43 @@ unsigned ValueEnumerator::getValueID(const Value *V) const {
   return I->second-1;
 }
 
+void ValueEnumerator::dump() const {
+  print(dbgs(), ValueMap, "Default");
+  dbgs() << '\n';
+  print(dbgs(), MDValueMap, "MetaData");
+  dbgs() << '\n';
+}
+
+void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
+                            const char *Name) const {
+
+  OS << "Map Name: " << Name << "\n";
+  OS << "Size: " << Map.size() << "\n";
+  for (ValueMapType::const_iterator I = Map.begin(),
+         E = Map.end(); I != E; ++I) {
+
+    const Value *V = I->first;
+    if (V->hasName())
+      OS << "Value: " << V->getName();
+    else
+      OS << "Value: [null]\n";
+    V->dump();
+
+    OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
+    for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+         UI != UE; ++UI) {
+      if (UI != V->use_begin())
+        OS << ",";
+      if((*UI)->hasName())
+        OS << " " << (*UI)->getName();
+      else
+        OS << " [null]";
+
+    }
+    OS <<  "\n\n";
+  }
+}
+
 // Optimize constant ordering.
 namespace {
   struct CstSortPredicate {
@@ -283,10 +321,6 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
   if (const Constant *C = dyn_cast<Constant>(V)) {
     if (isa<GlobalValue>(C)) {
       // Initializers for globals are handled explicitly elsewhere.
-    } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
-      // Do not enumerate the initializers for an array of simple characters.
-      // The initializers just pollute the value table, and we emit the strings
-      // specially.
     } else if (C->getNumOperands()) {
       // If a constant has operands, enumerate them.  This makes sure that if a
       // constant has uses (for example an array of const ints), that they are
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index b6fc920e412b..a6ca53606248 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -32,6 +32,7 @@ class NamedMDNode;
 class AttrListPtr;
 class ValueSymbolTable;
 class MDSymbolTable;
+class raw_ostream;
 
 class ValueEnumerator {
 public:
@@ -83,6 +84,9 @@ private:
 public:
   ValueEnumerator(const Module *M);
 
+  void dump() const;
+  void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
+
   unsigned getValueID(const Value *V) const;
 
   unsigned getTypeID(Type *T) const {
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 25842a7876a2..822a564441ac 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -148,7 +148,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   assert(State == NULL);
   State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
 
-  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+  bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
   std::vector<unsigned> &KillIndices = State->GetKillIndices();
   std::vector<unsigned> &DefIndices = State->GetDefIndices();
 
@@ -157,7 +157,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
     // In a return block, examine the function live-out regs.
     for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
          E = MRI.liveout_end(); I != E; ++I) {
-      for (const unsigned *Alias = TRI->getOverlaps(*I);
+      for (const uint16_t *Alias = TRI->getOverlaps(*I);
            unsigned Reg = *Alias; ++Alias) {
         State->UnionGroups(Reg, 0);
         KillIndices[Reg] = BB->size();
@@ -173,7 +173,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
          SE = BB->succ_end(); SI != SE; ++SI)
     for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
            E = (*SI)->livein_end(); I != E; ++I) {
-      for (const unsigned *Alias = TRI->getOverlaps(*I);
+      for (const uint16_t *Alias = TRI->getOverlaps(*I);
            unsigned Reg = *Alias; ++Alias) {
         State->UnionGroups(Reg, 0);
         KillIndices[Reg] = BB->size();
@@ -186,10 +186,10 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // callee-saved register that is not saved in the prolog.
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   BitVector Pristine = MFI->getPristineRegs(BB);
-  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+  for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
-    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+    for (const uint16_t *Alias = TRI->getOverlaps(Reg);
          unsigned AliasReg = *Alias; ++Alias) {
       State->UnionGroups(AliasReg, 0);
       KillIndices[AliasReg] = BB->size();
@@ -265,7 +265,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
         IsImplicitDefUse(MI, MO)) {
       const unsigned Reg = MO.getReg();
       PassthruRegs.insert(Reg);
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+      for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
            *Subreg; ++Subreg) {
         PassthruRegs.insert(*Subreg);
       }
@@ -333,7 +333,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
     DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
   }
   // Repeat for subregisters.
-  for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+  for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
        *Subreg; ++Subreg) {
     unsigned SubregReg = *Subreg;
     if (!State->IsLive(SubregReg)) {
@@ -384,7 +384,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
     // defined in a call must not be changed (ABI).
-    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+    if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
         TII->isPredicated(MI)) {
       DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
       State->UnionGroups(Reg, 0);
@@ -392,7 +392,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
 
     // Any aliased that are live at this point are completely or
     // partially defined here, so group those aliases with Reg.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+    for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
       unsigned AliasReg = *Alias;
       if (State->IsLive(AliasReg)) {
         State->UnionGroups(Reg, AliasReg);
@@ -423,7 +423,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
       continue;
 
     // Update def for Reg and aliases.
-    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+    for (const uint16_t *Alias = TRI->getOverlaps(Reg);
          unsigned AliasReg = *Alias; ++Alias)
       DefIndices[AliasReg] = Count;
   }
@@ -451,8 +451,8 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
   // instruction which may not be executed. The second R6 def may or may not
   // re-define R6 so it's not safe to change it since the last R6 use cannot be
   // changed.
-  bool Special = MI->getDesc().isCall() ||
-    MI->getDesc().hasExtraSrcRegAllocReq() ||
+  bool Special = MI->isCall() ||
+    MI->hasExtraSrcRegAllocReq() ||
     TII->isPredicated(MI);
 
   // Scan the register uses for this instruction and update
@@ -678,7 +678,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
         goto next_super_reg;
       } else {
         bool found = false;
-        for (const unsigned *Alias = TRI->getAliasSet(NewReg);
+        for (const uint16_t *Alias = TRI->getAliasSet(NewReg);
              *Alias; ++Alias) {
           unsigned AliasReg = *Alias;
           if (State->IsLive(AliasReg) ||
@@ -780,6 +780,9 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
        I != E; --Count) {
     MachineInstr *MI = --I;
 
+    if (MI->isDebugValue())
+      continue;
+
     DEBUG(dbgs() << "Anti: ");
     DEBUG(MI->dump());
 
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 1005f102bea6..87f64311a655 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -41,7 +41,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
   if (HintPair.first) {
     const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
     // The remaining allocation order may depend on the hint.
-    ArrayRef<unsigned> Order =
+    ArrayRef<uint16_t> Order =
       TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
                                 VRM.getMachineFunction());
     if (Order.empty())
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index d1e48a1f2e96..0ce7e0c3b5f6 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -34,8 +34,7 @@ public:
   /// AllocationOrder - Create a new AllocationOrder for VirtReg.
   /// @param VirtReg      Virtual register to allocate for.
   /// @param VRM          Virtual register map for function.
-  /// @param ReservedRegs Set of reserved registers as returned by
-  ///        TargetRegisterInfo::getReservedRegs().
+  /// @param RegClassInfo Information about reserved and allocatable registers.
   AllocationOrder(unsigned VirtReg,
                   const VirtRegMap &VRM,
                   const RegisterClassInfo &RegClassInfo);
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index fafc01044d4f..00874d411378 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -1,4 +1,4 @@
-//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Analysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
@@ -149,33 +150,37 @@ llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
 /// consideration of global floating-point math flags.
 ///
 ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
-  ISD::CondCode FPC, FOC;
   switch (Pred) {
-  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
-  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
-  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
-  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
-  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
-  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
-  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
-  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
-  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
-  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
-  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
-  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
-  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
-  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
-  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
-  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
-  default:
-    llvm_unreachable("Invalid FCmp predicate opcode!");
-    FOC = FPC = ISD::SETFALSE;
-    break;
+  case FCmpInst::FCMP_FALSE: return ISD::SETFALSE;
+  case FCmpInst::FCMP_OEQ:   return ISD::SETOEQ;
+  case FCmpInst::FCMP_OGT:   return ISD::SETOGT;
+  case FCmpInst::FCMP_OGE:   return ISD::SETOGE;
+  case FCmpInst::FCMP_OLT:   return ISD::SETOLT;
+  case FCmpInst::FCMP_OLE:   return ISD::SETOLE;
+  case FCmpInst::FCMP_ONE:   return ISD::SETONE;
+  case FCmpInst::FCMP_ORD:   return ISD::SETO;
+  case FCmpInst::FCMP_UNO:   return ISD::SETUO;
+  case FCmpInst::FCMP_UEQ:   return ISD::SETUEQ;
+  case FCmpInst::FCMP_UGT:   return ISD::SETUGT;
+  case FCmpInst::FCMP_UGE:   return ISD::SETUGE;
+  case FCmpInst::FCMP_ULT:   return ISD::SETULT;
+  case FCmpInst::FCMP_ULE:   return ISD::SETULE;
+  case FCmpInst::FCMP_UNE:   return ISD::SETUNE;
+  case FCmpInst::FCMP_TRUE:  return ISD::SETTRUE;
+  default: llvm_unreachable("Invalid FCmp predicate opcode!");
+  }
+}
+
+ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) {
+  switch (CC) {
+    case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ;
+    case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE;
+    case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT;
+    case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE;
+    case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT;
+    case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE;
+    default: return CC;
   }
-  if (NoNaNsFPMath)
-    return FOC;
-  else
-    return FPC;
 }
 
 /// getICmpCondCode - Return the ISD condition code corresponding to
@@ -195,7 +200,6 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
   case ICmpInst::ICMP_UGT: return ISD::SETUGT;
   default:
     llvm_unreachable("Invalid ICmp predicate opcode!");
-    return ISD::SETNE;
   }
 }
 
@@ -221,12 +225,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   // longjmp on x86), it can end up causing miscompilation that has not
   // been fully understood.
   if (!Ret &&
-      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+      (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt ||
+       !isa<UnreachableInst>(Term))) return false;
 
   // If I will have a chain, make sure no other instruction that will have a
   // chain interposes between I and the return.
   if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
-      !I->isSafeToSpeculativelyExecute())
+      !isSafeToSpeculativelyExecute(I))
     for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
          --BBI) {
       if (&*BBI == I)
@@ -235,7 +240,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
       if (isa<DbgInfoIntrinsic>(BBI))
         continue;
       if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
-          !BBI->isSafeToSpeculativelyExecute())
+          !isSafeToSpeculativelyExecute(BBI))
         return false;
     }
 
@@ -250,7 +255,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore noalias because it doesn't affect the call sequence.
   const Function *F = ExitBB->getParent();
-  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
   if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
     return false;
 
@@ -285,12 +290,12 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
 }
 
 bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
-                                const TargetLowering &TLI) {
+                                SDValue &Chain, const TargetLowering &TLI) {
   const Function *F = DAG.getMachineFunction().getFunction();
 
   // Conservatively require the attributes of the call to match those of
   // the return. Ignore noalias because it doesn't affect the call sequence.
-  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
   if (CallerRetAttr & ~Attribute::NoAlias)
     return false;
 
@@ -299,5 +304,5 @@ bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
     return false;
 
   // Check if the only use is a function return node.
-  return TLI.isUsedByReturnOnly(Node);
+  return TLI.isUsedByReturnOnly(Node, Chain);
 }
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 3f2387325360..b60fda86a6ba 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
@@ -36,6 +37,12 @@
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
+cl::opt<bool>
+EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden,
+  cl::desc("Generate ARM EHABI tables with unwinding descriptors"),
+  cl::init(false));
+
+
 ARMException::ARMException(AsmPrinter *A)
   : DwarfException(A),
     shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
@@ -72,13 +79,15 @@ void ARMException::EndFunction() {
       Asm->OutStreamer.EmitPersonality(PerSym);
     }
 
-    // Map all labels and get rid of any dead landing pads.
-    MMI->TidyLandingPads();
+    if (EnableARMEHABIDescriptors) {
+      // Map all labels and get rid of any dead landing pads.
+      MMI->TidyLandingPads();
 
-    Asm->OutStreamer.EmitHandlerData();
+      Asm->OutStreamer.EmitHandlerData();
 
-    // Emit actual exception table
-    EmitExceptionTable();
+      // Emit actual exception table
+      EmitExceptionTable();
+    }
   }
 
   Asm->OutStreamer.EmitFnEnd();
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 1999f3608788..b0b2ff4882af 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -100,6 +100,7 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
     OutStreamer(Streamer),
     LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
   DD = 0; DE = 0; MMI = 0; LI = 0;
+  CurrentFnSym = CurrentFnSymForSize = 0;
   GCMetadataPrinters = 0;
   VerboseAsm = Streamer.isVerboseAsm();
 }
@@ -613,6 +614,10 @@ bool AsmPrinter::needsSEHMoves() {
     MF->getFunction()->needsUnwindTableEntry();
 }
 
+bool AsmPrinter::needsRelocationsForDwarfStringPool() const {
+  return MAI->doesDwarfUseRelocationsForStringPool();
+}
+
 void AsmPrinter::emitPrologLabel(const MachineInstr &MI) {
   MCSymbol *Label = MI.getOperand(0).getMCSymbol();
 
@@ -732,6 +737,18 @@ void AsmPrinter::EmitFunctionBody() {
       OutStreamer.EmitRawText(StringRef("\tnop\n"));
   }
 
+  const Function *F = MF->getFunction();
+  for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) {
+    const BasicBlock *BB = i;
+    if (!BB->hasAddressTaken())
+      continue;
+    MCSymbol *Sym = GetBlockAddressSymbol(BB);
+    if (Sym->isDefined())
+      continue;
+    OutStreamer.AddComment("Address of block that was removed by CodeGen");
+    OutStreamer.EmitLabel(Sym);
+  }
+
   // Emit target-specific gunk after the function body.
   EmitFunctionBodyEnd();
 
@@ -745,7 +762,8 @@ void AsmPrinter::EmitFunctionBody() {
 
     const MCExpr *SizeExp =
       MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
-                              MCSymbolRefExpr::Create(CurrentFnSym, OutContext),
+                              MCSymbolRefExpr::Create(CurrentFnSymForSize,
+                                                      OutContext),
                               OutContext);
     OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
   }
@@ -780,7 +798,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
   int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
 
-  for (const unsigned *SR = TRI->getSuperRegisters(MLoc.getReg());
+  for (const uint16_t *SR = TRI->getSuperRegisters(MLoc.getReg());
        *SR && Reg < 0; ++SR) {
     Reg = TRI->getDwarfRegNum(*SR, false);
     // FIXME: Get the bit range this register uses of the superregister
@@ -841,6 +859,12 @@ bool AsmPrinter::doFinalization(Module &M) {
     EmitVisibility(Name, V, false);
   }
 
+  // Emit module flags.
+  SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
+  M.getModuleFlagsMetadata(ModuleFlags);
+  if (!ModuleFlags.empty())
+    getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM);
+
   // Finalize debug and EH information.
   if (DE) {
     {
@@ -929,6 +953,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   this->MF = &MF;
   // Get the function symbol.
   CurrentFnSym = Mang->getSymbol(MF.getFunction());
+  CurrentFnSymForSize = CurrentFnSym;
 
   if (isVerbose())
     LI = &getAnalysis<MachineLoopInfo>();
@@ -1120,7 +1145,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
   const MCExpr *Value = 0;
   switch (MJTI->getEntryKind()) {
   case MachineJumpTableInfo::EK_Inline:
-    llvm_unreachable("Cannot emit EK_Inline jump table entry"); break;
+    llvm_unreachable("Cannot emit EK_Inline jump table entry");
   case MachineJumpTableInfo::EK_Custom32:
     Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
                                                               OutContext);
@@ -1139,6 +1164,15 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     return;
   }
 
+  case MachineJumpTableInfo::EK_GPRel64BlockAddress: {
+    // EK_GPRel64BlockAddress - Each entry is an address of block, encoded
+    // with a relocation as gp-relative, e.g.:
+    //     .gpdword LBB123
+    MCSymbol *MBBSym = MBB->getSymbol();
+    OutStreamer.EmitGPRel64Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+    return;
+  }
+
   case MachineJumpTableInfo::EK_LabelDifference32: {
     // EK_LabelDifference32 - Each entry is the address of the block minus
     // the address of the jump table.  This is used for PIC jump tables where
@@ -1191,12 +1225,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 
   assert(GV->hasInitializer() && "Not a special LLVM global!");
 
-  const TargetData *TD = TM.getTargetData();
-  unsigned Align = Log2_32(TD->getPointerPrefAlignment());
   if (GV->getName() == "llvm.global_ctors") {
-    OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
-    EmitAlignment(Align);
-    EmitXXStructorList(GV->getInitializer());
+    EmitXXStructorList(GV->getInitializer(), /* isCtor */ true);
 
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1208,9 +1238,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   }
 
   if (GV->getName() == "llvm.global_dtors") {
-    OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
-    EmitAlignment(Align);
-    EmitXXStructorList(GV->getInitializer());
+    EmitXXStructorList(GV->getInitializer(), /* isCtor */ false);
 
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1240,7 +1268,7 @@ void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
   }
 }
 
-typedef std::pair<int, Constant*> Structor;
+typedef std::pair<unsigned, Constant*> Structor;
 
 static bool priority_order(const Structor& lhs, const Structor& rhs) {
   return lhs.first < rhs.first;
@@ -1248,7 +1276,7 @@ static bool priority_order(const Structor& lhs, const Structor& rhs) {
 
 /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
 /// priority.
-void AsmPrinter::EmitXXStructorList(const Constant *List) {
+void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
   // Should be an array of '{ int, void ()* }' structs.  The first value is the
   // init priority.
   if (!isa<ConstantArray>(List)) return;
@@ -1274,19 +1302,20 @@ void AsmPrinter::EmitXXStructorList(const Constant *List) {
                                        CS->getOperand(1)));
   }
 
-  // Emit the function pointers in reverse priority order.
-  switch (MAI->getStructorOutputOrder()) {
-  case Structors::None:
-    break;
-  case Structors::PriorityOrder:
-    std::sort(Structors.begin(), Structors.end(), priority_order);
-    break;
-  case Structors::ReversePriorityOrder:
-    std::sort(Structors.rbegin(), Structors.rend(), priority_order);
-    break;
+  // Emit the function pointers in the target-specific order
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+  std::stable_sort(Structors.begin(), Structors.end(), priority_order);
+  for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
+    const MCSection *OutputSection =
+      (isCtor ?
+       getObjFileLowering().getStaticCtorSection(Structors[i].first) :
+       getObjFileLowering().getStaticDtorSection(Structors[i].first));
+    OutStreamer.SwitchSection(OutputSection);
+    if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
+      EmitAlignment(Align);
+    EmitXXStructor(Structors[i].second);
   }
-  for (unsigned i = 0, e = Structors.size(); i != e; ++i)
-    EmitGlobalConstant(Structors[i].second);
 }
 
 //===--------------------------------------------------------------------===//
@@ -1423,7 +1452,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
   const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
   if (CE == 0) {
     llvm_unreachable("Unknown constant value to lower!");
-    return MCConstantExpr::Create(0, Ctx);
   }
 
   switch (CE->getOpcode()) {
@@ -1445,7 +1473,6 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
                      !AP.MF ? 0 : AP.MF->getFunction()->getParent());
       report_fatal_error(OS.str());
     }
-    return MCConstantExpr::Create(0, Ctx);
   case Instruction::GetElementPtr: {
     const TargetData &TD = *AP.TM.getTargetData();
     // Generate a symbolic expression for the byte address
@@ -1543,6 +1570,19 @@ static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
 /// isRepeatedByteSequence - Determine whether the given value is
 /// composed of a repeated sequence of identical bytes and return the
 /// byte value.  If it is not a repeated sequence, return -1.
+static int isRepeatedByteSequence(const ConstantDataSequential *V) {
+  StringRef Data = V->getRawDataValues();
+  assert(!Data.empty() && "Empty aggregates should be CAZ node");
+  char C = Data[0];
+  for (unsigned i = 1, e = Data.size(); i != e; ++i)
+    if (Data[i] != C) return -1;
+  return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1.
+}
+
+
+/// isRepeatedByteSequence - Determine whether the given value is
+/// composed of a repeated sequence of identical bytes and return the
+/// byte value.  If it is not a repeated sequence, return -1.
 static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -1568,8 +1608,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
   if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
     // Make sure all array elements are sequences of the same repeated
     // byte.
-    if (CA->getNumOperands() == 0) return -1;
-
+    assert(CA->getNumOperands() != 0 && "Should be a CAZ");
     int Byte = isRepeatedByteSequence(CA->getOperand(0), TM);
     if (Byte == -1) return -1;
 
@@ -1580,37 +1619,92 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
     }
     return Byte;
   }
+  
+  if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V))
+    return isRepeatedByteSequence(CDS);
 
   return -1;
 }
 
-static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
-                                    AsmPrinter &AP) {
-  if (AddrSpace != 0 || !CA->isString()) {
-    // Not a string.  Print the values in successive locations.
-
-    // See if we can aggregate some values.  Make sure it can be
-    // represented as a series of bytes of the constant value.
-    int Value = isRepeatedByteSequence(CA, AP.TM);
-
-    if (Value != -1) {
-      uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
-      AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+static void EmitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
+                                             unsigned AddrSpace,AsmPrinter &AP){
+  
+  // See if we can aggregate this into a .fill, if so, emit it as such.
+  int Value = isRepeatedByteSequence(CDS, AP.TM);
+  if (Value != -1) {
+    uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CDS->getType());
+    // Don't emit a 1-byte object as a .fill.
+    if (Bytes > 1)
+      return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+  }
+  
+  // If this can be emitted with .ascii/.asciz, emit it as such.
+  if (CDS->isString())
+    return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace);
+
+  // Otherwise, emit the values in successive locations.
+  unsigned ElementByteSize = CDS->getElementByteSize();
+  if (isa<IntegerType>(CDS->getElementType())) {
+    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+                                                CDS->getElementAsInteger(i));
+      AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i),
+                                  ElementByteSize, AddrSpace);
+    }
+  } else if (ElementByteSize == 4) {
+    // FP Constants are printed as integer constants to avoid losing
+    // precision.
+    assert(CDS->getElementType()->isFloatTy());
+    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      union {
+        float F;
+        uint32_t I;
+      };
+      
+      F = CDS->getElementAsFloat(i);
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << "float " << F << '\n';
+      AP.OutStreamer.EmitIntValue(I, 4, AddrSpace);
     }
-    else {
-      for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-        EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+  } else {
+    assert(CDS->getElementType()->isDoubleTy());
+    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+      union {
+        double F;
+        uint64_t I;
+      };
+      
+      F = CDS->getElementAsDouble(i);
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << "double " << F << '\n';
+      AP.OutStreamer.EmitIntValue(I, 8, AddrSpace);
     }
-    return;
   }
 
-  // Otherwise, it can be emitted as .ascii.
-  SmallVector<char, 128> TmpVec;
-  TmpVec.reserve(CA->getNumOperands());
-  for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-    TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+  const TargetData &TD = *AP.TM.getTargetData();
+  unsigned Size = TD.getTypeAllocSize(CDS->getType());
+  unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) *
+                        CDS->getNumElements();
+  if (unsigned Padding = Size - EmittedSize)
+    AP.OutStreamer.EmitZeros(Padding, AddrSpace);
 
-  AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace);
+}
+
+static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+                                    AsmPrinter &AP) {
+  // See if we can aggregate some values.  Make sure it can be
+  // represented as a series of bytes of the constant value.
+  int Value = isRepeatedByteSequence(CA, AP.TM);
+
+  if (Value != -1) {
+    uint64_t Bytes = AP.TM.getTargetData()->getTypeAllocSize(CA->getType());
+    AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace);
+  }
+  else {
+    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+      EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+  }
 }
 
 static void EmitGlobalConstantVector(const ConstantVector *CV,
@@ -1656,29 +1750,44 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
 
 static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
                                  AsmPrinter &AP) {
-  // FP Constants are printed as integer constants to avoid losing
-  // precision.
-  if (CFP->getType()->isDoubleTy()) {
+  if (CFP->getType()->isHalfTy()) {
     if (AP.isVerbose()) {
-      double Val = CFP->getValueAPF().convertToDouble();
-      AP.OutStreamer.GetCommentOS() << "double " << Val << '\n';
+      SmallString<10> Str;
+      CFP->getValueAPF().toString(Str);
+      AP.OutStreamer.GetCommentOS() << "half " << Str << '\n';
     }
-
     uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-    AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+    AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace);
     return;
   }
 
   if (CFP->getType()->isFloatTy()) {
     if (AP.isVerbose()) {
       float Val = CFP->getValueAPF().convertToFloat();
-      AP.OutStreamer.GetCommentOS() << "float " << Val << '\n';
+      uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+      AP.OutStreamer.GetCommentOS() << "float " << Val << '\n'
+                                    << " (" << format("0x%x", IntVal) << ")\n";
     }
     uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
     AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
     return;
   }
 
+  // FP Constants are printed as integer constants to avoid losing
+  // precision.
+  if (CFP->getType()->isDoubleTy()) {
+    if (AP.isVerbose()) {
+      double Val = CFP->getValueAPF().convertToDouble();
+      uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+      AP.OutStreamer.GetCommentOS() << "double " << Val << '\n'
+                                    << " (" << format("0x%lx", IntVal) << ")\n";
+    }
+
+    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+    AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+    return;
+  }
+
   if (CFP->getType()->isX86_FP80Ty()) {
     // all long double variants are printed as hex
     // API needed to prevent premature destruction
@@ -1742,20 +1851,20 @@ static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
 
 static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
                                    AsmPrinter &AP) {
-  if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
-    uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+  const TargetData *TD = AP.TM.getTargetData();
+  uint64_t Size = TD->getTypeAllocSize(CV->getType());
+  if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV))
     return AP.OutStreamer.EmitZeros(Size, AddrSpace);
-  }
 
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
     switch (Size) {
     case 1:
     case 2:
     case 4:
     case 8:
       if (AP.isVerbose())
-        AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+        AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n",
+                                                CI->getZExtValue());
       AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
       return;
     default:
@@ -1764,29 +1873,45 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
     }
   }
 
-  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
-    return EmitGlobalConstantArray(CVA, AddrSpace, AP);
-
-  if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
-    return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
-
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
     return EmitGlobalConstantFP(CFP, AddrSpace, AP);
 
   if (isa<ConstantPointerNull>(CV)) {
-    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
     AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
     return;
   }
 
+  if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
+    return EmitGlobalConstantDataSequential(CDS, AddrSpace, AP);
+  
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+    return EmitGlobalConstantArray(CVA, AddrSpace, AP);
+
+  if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+    return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
+
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
+    // vectors).
+    if (CE->getOpcode() == Instruction::BitCast)
+      return EmitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP);
+
+    if (Size > 8) {
+      // If the constant expression's size is greater than 64-bits, then we have
+      // to emit the value in chunks. Try to constant fold the value and emit it
+      // that way.
+      Constant *New = ConstantFoldConstantExpression(CE, TD);
+      if (New && New != CE)
+        return EmitGlobalConstantImpl(New, AddrSpace, AP);
+    }
+  }
+  
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
     return EmitGlobalConstantVector(V, AddrSpace, AP);
-
+    
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
-  AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
-                         AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
-                           AddrSpace);
+  AP.OutStreamer.EmitValue(LowerConstant(CV, AP), Size, AddrSpace);
 }
 
 /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
@@ -1953,7 +2078,7 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
 void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
   // Emit an alignment directive for this block, if needed.
   if (unsigned Align = MBB->getAlignment())
-    EmitAlignment(Log2_32(Align));
+    EmitAlignment(Align);
 
   // If the block has its address taken, emit any labels that were used to
   // reference the block.  It is possible that there is more than one label
@@ -1970,27 +2095,22 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
       OutStreamer.EmitLabel(Syms[i]);
   }
 
+  // Print some verbose block comments.
+  if (isVerbose()) {
+    if (const BasicBlock *BB = MBB->getBasicBlock())
+      if (BB->hasName())
+        OutStreamer.AddComment("%" + BB->getName());
+    EmitBasicBlockLoopComments(*MBB, LI, *this);
+  }
+
   // Print the main label for the block.
   if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
     if (isVerbose() && OutStreamer.hasRawTextSupport()) {
-      if (const BasicBlock *BB = MBB->getBasicBlock())
-        if (BB->hasName())
-          OutStreamer.AddComment("%" + BB->getName());
-
-      EmitBasicBlockLoopComments(*MBB, LI, *this);
-
       // NOTE: Want this comment at start of line, don't emit with AddComment.
       OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
                               Twine(MBB->getNumber()) + ":");
     }
   } else {
-    if (isVerbose()) {
-      if (const BasicBlock *BB = MBB->getBasicBlock())
-        if (BB->hasName())
-          OutStreamer.AddComment("%" + BB->getName());
-      EmitBasicBlockLoopComments(*MBB, LI, *this);
-    }
-
     OutStreamer.EmitLabel(MBB->getSymbol());
   }
 }
@@ -2048,7 +2168,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
     MachineInstr &MI = *II;
 
     // If it is not a simple branch, we are in a table somewhere.
-    if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+    if (!MI.isBranch() || MI.isIndirectBranch())
       return false;
 
     // If we are the operands of one of the branches, this is not
@@ -2090,6 +2210,4 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
     }
 
   report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
-  return 0;
 }
-
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 4d6c28118427..90d511cbab0a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -35,23 +36,8 @@ using namespace llvm;
 void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
-    
-  if (MAI->hasLEB128()) {
-    OutStreamer.EmitSLEB128IntValue(Value);
-    return;
-  }
 
-  // If we don't have .sleb128, emit as .bytes.
-  int Sign = Value >> (8 * sizeof(Value) - 1);
-  bool IsMore;
-  
-  do {
-    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
-    Value >>= 7;
-    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
-    if (IsMore) Byte |= 0x80;
-    OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
-  } while (IsMore);
+  OutStreamer.EmitSLEB128IntValue(Value);
 }
 
 /// EmitULEB128 - emit the specified signed leb128 value.
@@ -60,25 +46,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
   if (isVerbose() && Desc)
     OutStreamer.AddComment(Desc);
 
-  // FIXME: Should we add a PadTo option to the streamer?
-  if (MAI->hasLEB128() && PadTo == 0) {
-    OutStreamer.EmitULEB128IntValue(Value); 
-    return;
-  }
-  
-  // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
-  do {
-    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
-    Value >>= 7;
-    if (Value || PadTo != 0) Byte |= 0x80;
-    OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
-  } while (Value);
-
-  if (PadTo) {
-    if (PadTo > 1)
-      OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
-    OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
-  }
+  OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo);
 }
 
 /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
@@ -143,7 +111,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
     return 0;
   
   switch (Encoding & 0x07) {
-  default: assert(0 && "Invalid encoded value.");
+  default: llvm_unreachable("Invalid encoded value.");
   case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
   case dwarf::DW_EH_PE_udata2: return 2;
   case dwarf::DW_EH_PE_udata4: return 4;
@@ -177,9 +145,8 @@ void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
 void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
                                    const MCSymbol *SectionLabel) const {
   // On COFF targets, we have to emit the special .secrel32 directive.
-  if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) {
-    // FIXME: MCize.
-    OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName()));
+  if (MAI->getDwarfSectionOffsetDirective()) {
+    OutStreamer.EmitCOFFSecRel32(Label);
     return;
   }
   
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 8eda889155a2..d60585465be0 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -326,7 +326,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
           OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
         }
 
-        if (OpNo >= MI->getNumOperands()) {
+	// We may have a location metadata attached to the end of the
+	// instruction, and at no point should see metadata at any
+	// other point while processing. It's an error if so.
+        if (OpNo >= MI->getNumOperands() ||
+	    MI->getOperand(OpNo).isMetadata()) {
           Error = true;
         } else {
           unsigned OpFlags = MI->getOperand(OpNo).getImm();
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index 67d927348b54..58fe2ed9d357 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMAsmPrinter
   AsmPrinterDwarf.cpp
   AsmPrinterInlineAsm.cpp
   DIE.cpp
+  DwarfAccelTable.cpp
   DwarfCFIException.cpp
   DwarfCompileUnit.cpp
   DwarfDebug.cpp
@@ -11,13 +12,3 @@ add_llvm_library(LLVMAsmPrinter
   OcamlGCPrinter.cpp
   Win64Exception.cpp
   )
-
-add_llvm_library_dependencies(LLVMAsmPrinter
-  LLVMAnalysis
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  LLVMTarget
-  )
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 9c1ce761b0c5..3776848e3f47 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -112,15 +112,6 @@ DIE::~DIE() {
     delete Children[i];
 }
 
-/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
-///
-DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) {
-  DIEInteger *DI = new (A) DIEInteger(0);
-  Values.insert(Values.begin(), DI);
-  Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
-  return DI;
-}
-
 #ifndef NDEBUG
 void DIE::print(raw_ostream &O, unsigned IncIndent) {
   IndentCount += IncIndent;
@@ -174,6 +165,7 @@ void DIE::dump() {
 }
 #endif
 
+void DIEValue::anchor() { }
 
 #ifndef NDEBUG
 void DIEValue::dump() {
@@ -223,33 +215,14 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
   case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
   case dwarf::DW_FORM_addr:  return AP->getTargetData().getPointerSize();
-  default: llvm_unreachable("DIE Value form not supported yet"); break;
+  default: llvm_unreachable("DIE Value form not supported yet");
   }
-  return 0;
 }
 
 #ifndef NDEBUG
 void DIEInteger::print(raw_ostream &O) {
-  O << "Int: " << (int64_t)Integer
-    << format("  0x%llx", (unsigned long long)Integer);
-}
-#endif
-
-//===----------------------------------------------------------------------===//
-// DIEString Implementation
-//===----------------------------------------------------------------------===//
-
-/// EmitValue - Emit string value.
-///
-void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
-  AP->OutStreamer.EmitBytes(Str, /*addrspace*/0);
-  // Emit nul terminator.
-  AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
-}
-
-#ifndef NDEBUG
-void DIEString::print(raw_ostream &O) {
-  O << "Str: \"" << Str << "\"";
+  O << "Int: " << (int64_t)Integer << "  0x";
+  O.write_hex(Integer);
 }
 #endif
 
@@ -267,6 +240,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
 ///
 unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
+  if (Form == dwarf::DW_FORM_strp) return 4;
   return AP->getTargetData().getPointerSize();
 }
 
@@ -290,6 +264,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
 ///
 unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
+  if (Form == dwarf::DW_FORM_strp) return 4;
   return AP->getTargetData().getPointerSize();
 }
 
@@ -335,7 +310,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
 ///
 void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   switch (Form) {
-  default: assert(0 && "Improper form for block");    break;
+  default: llvm_unreachable("Improper form for block");
   case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break;
   case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);   break;
   case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);   break;
@@ -355,9 +330,8 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
   case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
   case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
   case dwarf::DW_FORM_block:  return Size + MCAsmInfo::getULEB128Size(Size);
-  default: llvm_unreachable("Improper form for block"); break;
+  default: llvm_unreachable("Improper form for block");
   }
-  return 0;
 }
 
 #ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 7d61f1edff4a..f93ea1b045b2 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -31,17 +31,17 @@ namespace llvm {
   class DIEAbbrevData {
     /// Attribute - Dwarf attribute code.
     ///
-    unsigned Attribute;
+    uint16_t Attribute;
 
     /// Form - Dwarf form code.
     ///
-    unsigned Form;
+    uint16_t Form;
   public:
-    DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {}
+    DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {}
 
     // Accessors.
-    unsigned getAttribute() const { return Attribute; }
-    unsigned getForm()      const { return Form; }
+    uint16_t getAttribute() const { return Attribute; }
+    uint16_t getForm()      const { return Form; }
 
     /// Profile - Used to gather unique data for the abbreviation folding set.
     ///
@@ -54,41 +54,41 @@ namespace llvm {
   class DIEAbbrev : public FoldingSetNode {
     /// Tag - Dwarf tag code.
     ///
-    unsigned Tag;
+    uint16_t Tag;
 
-    /// Unique number for node.
+    /// ChildrenFlag - Dwarf children flag.
     ///
-    unsigned Number;
+    uint16_t ChildrenFlag;
 
-    /// ChildrenFlag - Dwarf children flag.
+    /// Unique number for node.
     ///
-    unsigned ChildrenFlag;
+    unsigned Number;
 
     /// Data - Raw data bytes for abbreviation.
     ///
     SmallVector<DIEAbbrevData, 8> Data;
 
   public:
-    DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
+    DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {}
 
     // Accessors.
-    unsigned getTag() const { return Tag; }
+    uint16_t getTag() const { return Tag; }
     unsigned getNumber() const { return Number; }
-    unsigned getChildrenFlag() const { return ChildrenFlag; }
+    uint16_t getChildrenFlag() const { return ChildrenFlag; }
     const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
-    void setTag(unsigned T) { Tag = T; }
-    void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+    void setTag(uint16_t T) { Tag = T; }
+    void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; }
     void setNumber(unsigned N) { Number = N; }
 
     /// AddAttribute - Adds another set of attribute information to the
     /// abbreviation.
-    void AddAttribute(unsigned Attribute, unsigned Form) {
+    void AddAttribute(uint16_t Attribute, uint16_t Form) {
       Data.push_back(DIEAbbrevData(Attribute, Form));
     }
 
     /// AddFirstAttribute - Adds a set of attribute information to the front
     /// of the abbreviation.
-    void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+    void AddFirstAttribute(uint16_t Attribute, uint16_t Form) {
       Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
     }
 
@@ -113,10 +113,6 @@ namespace llvm {
 
   class DIE {
   protected:
-    /// Abbrev - Buffer for constructing abbreviation.
-    ///
-    DIEAbbrev Abbrev;
-
     /// Offset - Offset in debug info section.
     ///
     unsigned Offset;
@@ -125,6 +121,10 @@ namespace llvm {
     ///
     unsigned Size;
 
+    /// Abbrev - Buffer for constructing abbreviation.
+    ///
+    DIEAbbrev Abbrev;
+
     /// Children DIEs.
     ///
     std::vector<DIE *> Children;
@@ -139,8 +139,8 @@ namespace llvm {
     mutable unsigned IndentCount;
   public:
     explicit DIE(unsigned Tag)
-      : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0),
-        Size(0), Parent (0), IndentCount(0) {}
+      : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0),
+        IndentCount(0) {}
     virtual ~DIE();
 
     // Accessors.
@@ -163,16 +163,6 @@ namespace llvm {
       Values.push_back(Value);
     }
 
-    /// SiblingOffset - Return the offset of the debug information entry's
-    /// sibling.
-    unsigned getSiblingOffset() const { return Offset + Size; }
-
-    /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
-    /// The caller is responsible for deleting the return value at or after the
-    /// same time it destroys this DIE.
-    ///
-    DIEValue *addSiblingOffset(BumpPtrAllocator &A);
-
     /// addChild - Add a child to the DIE.
     ///
     void addChild(DIE *Child) {
@@ -195,12 +185,12 @@ namespace llvm {
   /// DIEValue - A debug information entry value.
   ///
   class DIEValue {
+    virtual void anchor();
   public:
     enum {
       isInteger,
       isString,
       isLabel,
-      isSectionOffset,
       isDelta,
       isEntry,
       isBlock
@@ -276,33 +266,6 @@ namespace llvm {
   };
 
   //===--------------------------------------------------------------------===//
-  /// DIEString - A string value DIE. This DIE keeps string reference only.
-  ///
-  class DIEString : public DIEValue {
-    const StringRef Str;
-  public:
-    explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {}
-
-    /// EmitValue - Emit string value.
-    ///
-    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
-
-    /// SizeOf - Determine size of string value in bytes.
-    ///
-    virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const {
-      return Str.size() + sizeof(char); // sizeof('\0');
-    }
-
-    // Implement isa/cast/dyncast.
-    static bool classof(const DIEString *) { return true; }
-    static bool classof(const DIEValue *S) { return S->getType() == isString; }
-
-#ifndef NDEBUG
-    virtual void print(raw_ostream &O);
-#endif
-  };
-
-  //===--------------------------------------------------------------------===//
   /// DIELabel - A label expression DIE.
   //
   class DIELabel : public DIEValue {
@@ -359,7 +322,7 @@ namespace llvm {
   };
 
   //===--------------------------------------------------------------------===//
-  /// DIEntry - A pointer to another debug information entry.  An instance of
+  /// DIEEntry - A pointer to another debug information entry.  An instance of
   /// this class can also be used as a proxy for a debug information entry not
   /// yet defined (ie. types.)
   class DIEEntry : public DIEValue {
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
new file mode 100644
index 000000000000..660684d1bea5
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -0,0 +1,287 @@
+//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfAccelTable.h"
+#include "DwarfDebug.h"
+#include "DIE.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) {
+  switch (AT) {
+  case eAtomTypeNULL: return "eAtomTypeNULL";
+  case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset";
+  case eAtomTypeCUOffset: return "eAtomTypeCUOffset";
+  case eAtomTypeTag: return "eAtomTypeTag";
+  case eAtomTypeNameFlags: return "eAtomTypeNameFlags";
+  case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags";
+  } 
+  llvm_unreachable("invalid AtomType!");
+}
+
+// The general case would need to have a less hard coded size for the
+// length of the HeaderData, however, if we're constructing based on a
+// single Atom then we know it will always be: 4 + 4 + 2 + 2.
+DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) :
+  Header(12),
+  HeaderData(atom) {
+}
+
+// The length of the header data is always going to be 4 + 4 + 4*NumAtoms.
+DwarfAccelTable::DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &atomList) :
+  Header(8 + (atomList.size() * 4)),
+  HeaderData(atomList) {
+}
+
+DwarfAccelTable::~DwarfAccelTable() {
+  for (size_t i = 0, e = Data.size(); i < e; ++i)
+    delete Data[i];
+  for (StringMap<DataArray>::iterator
+         EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI)
+    for (DataArray::iterator DI = EI->second.begin(),
+           DE = EI->second.end(); DI != DE; ++DI)
+      delete (*DI);
+}
+
+void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) {
+  // If the string is in the list already then add this die to the list
+  // otherwise add a new one.
+  DataArray &DIEs = Entries[Name];
+  DIEs.push_back(new HashDataContents(die, Flags));
+}
+
+void DwarfAccelTable::ComputeBucketCount(void) {
+  // First get the number of unique hashes.
+  std::vector<uint32_t> uniques(Data.size());
+  for (size_t i = 0, e = Data.size(); i < e; ++i)
+    uniques[i] = Data[i]->HashValue;
+  array_pod_sort(uniques.begin(), uniques.end());
+  std::vector<uint32_t>::iterator p =
+    std::unique(uniques.begin(), uniques.end());
+  uint32_t num = std::distance(uniques.begin(), p);
+
+  // Then compute the bucket size, minimum of 1 bucket.
+  if (num > 1024) Header.bucket_count = num/4;
+  if (num > 16) Header.bucket_count = num/2;
+  else Header.bucket_count = num > 0 ? num : 1;
+
+  Header.hashes_count = num;
+}
+
+namespace {
+  // DIESorter - comparison predicate that sorts DIEs by their offset.
+  struct DIESorter {
+    bool operator()(const struct DwarfAccelTable::HashDataContents *A,
+                    const struct DwarfAccelTable::HashDataContents *B) const {
+      return A->Die->getOffset() < B->Die->getOffset();
+    }
+  };
+}
+
+void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) {
+  // Create the individual hash data outputs.
+  for (StringMap<DataArray>::iterator
+         EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+    struct HashData *Entry = new HashData((*EI).getKeyData());
+
+    // Unique the entries.
+    std::stable_sort(EI->second.begin(), EI->second.end(), DIESorter());
+    EI->second.erase(std::unique(EI->second.begin(), EI->second.end()),
+                       EI->second.end());
+
+    for (DataArray::const_iterator DI = EI->second.begin(),
+           DE = EI->second.end();
+         DI != DE; ++DI)
+      Entry->addData((*DI));
+    Data.push_back(Entry);
+  }
+
+  // Figure out how many buckets we need, then compute the bucket
+  // contents and the final ordering. We'll emit the hashes and offsets
+  // by doing a walk during the emission phase. We add temporary
+  // symbols to the data so that we can reference them during the offset
+  // later, we'll emit them when we emit the data.
+  ComputeBucketCount();
+
+  // Compute bucket contents and final ordering.
+  Buckets.resize(Header.bucket_count);
+  for (size_t i = 0, e = Data.size(); i < e; ++i) {
+    uint32_t bucket = Data[i]->HashValue % Header.bucket_count;
+    Buckets[bucket].push_back(Data[i]);
+    Data[i]->Sym = Asm->GetTempSymbol(Prefix, i);
+  }
+}
+
+// Emits the header for the table via the AsmPrinter.
+void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) {
+  Asm->OutStreamer.AddComment("Header Magic");
+  Asm->EmitInt32(Header.magic);
+  Asm->OutStreamer.AddComment("Header Version");
+  Asm->EmitInt16(Header.version);
+  Asm->OutStreamer.AddComment("Header Hash Function");
+  Asm->EmitInt16(Header.hash_function);
+  Asm->OutStreamer.AddComment("Header Bucket Count");
+  Asm->EmitInt32(Header.bucket_count);
+  Asm->OutStreamer.AddComment("Header Hash Count");
+  Asm->EmitInt32(Header.hashes_count);
+  Asm->OutStreamer.AddComment("Header Data Length");
+  Asm->EmitInt32(Header.header_data_len);
+  Asm->OutStreamer.AddComment("HeaderData Die Offset Base");
+  Asm->EmitInt32(HeaderData.die_offset_base);
+  Asm->OutStreamer.AddComment("HeaderData Atom Count");
+  Asm->EmitInt32(HeaderData.Atoms.size());
+  for (size_t i = 0; i < HeaderData.Atoms.size(); i++) {
+    Atom A = HeaderData.Atoms[i];
+    Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type));
+    Asm->EmitInt16(A.type);
+    Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form));
+    Asm->EmitInt16(A.form);
+  }
+}
+
+// Walk through and emit the buckets for the table. This will look
+// like a list of numbers of how many elements are in each bucket.
+void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) {
+  unsigned index = 0;
+  for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+    Asm->OutStreamer.AddComment("Bucket " + Twine(i));
+    if (Buckets[i].size() != 0)
+      Asm->EmitInt32(index);
+    else
+      Asm->EmitInt32(UINT32_MAX);
+    index += Buckets[i].size();
+  }
+}
+
+// Walk through the buckets and emit the individual hashes for each
+// bucket.
+void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) {
+  for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i));
+      Asm->EmitInt32((*HI)->HashValue);
+    } 
+  }
+}
+
+// Walk through the buckets and emit the individual offsets for each
+// element in each bucket. This is done via a symbol subtraction from the
+// beginning of the section. The non-section symbol will be output later
+// when we emit the actual data.
+void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) {
+  for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i));
+      MCContext &Context = Asm->OutStreamer.getContext();
+      const MCExpr *Sub =
+        MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context),
+                                MCSymbolRefExpr::Create(SecBegin, Context),
+                                Context);
+      Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0);
+    }
+  }
+}
+
+// Walk through the buckets and emit the full data for each element in
+// the bucket. For the string case emit the dies and the various offsets.
+// Terminate each HashData bucket with 0.
+void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
+  uint64_t PrevHash = UINT64_MAX;
+  for (size_t i = 0, e = Buckets.size(); i < e; ++i) {
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI) {
+      // Remember to emit the label for our offset.
+      Asm->OutStreamer.EmitLabel((*HI)->Sym);
+      Asm->OutStreamer.AddComment((*HI)->Str);
+      Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
+                             D->getStringPool());
+      Asm->OutStreamer.AddComment("Num DIEs");
+      Asm->EmitInt32((*HI)->Data.size());
+      for (std::vector<struct HashDataContents*>::const_iterator
+             DI = (*HI)->Data.begin(), DE = (*HI)->Data.end();
+           DI != DE; ++DI) {
+        // Emit the DIE offset
+        Asm->EmitInt32((*DI)->Die->getOffset());
+        // If we have multiple Atoms emit that info too.
+        // FIXME: A bit of a hack, we either emit only one atom or all info.
+        if (HeaderData.Atoms.size() > 1) {
+          Asm->EmitInt16((*DI)->Die->getTag());
+          Asm->EmitInt8((*DI)->Flags);
+        }
+      }
+      // Emit a 0 to terminate the data unless we have a hash collision.
+      if (PrevHash != (*HI)->HashValue)
+        Asm->EmitInt32(0);
+      PrevHash = (*HI)->HashValue;
+    }
+  }
+}
+
+// Emit the entire data structure to the output file.
+void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin,
+                           DwarfDebug *D) {
+  // Emit the header.
+  EmitHeader(Asm);
+
+  // Emit the buckets.
+  EmitBuckets(Asm);
+
+  // Emit the hashes.
+  EmitHashes(Asm);
+
+  // Emit the offsets.
+  EmitOffsets(Asm, SecBegin);
+
+  // Emit the hash data.
+  EmitData(Asm, D);
+}
+
+#ifndef NDEBUG
+void DwarfAccelTable::print(raw_ostream &O) {
+
+  Header.print(O);
+  HeaderData.print(O);
+
+  O << "Entries: \n";
+  for (StringMap<DataArray>::const_iterator
+         EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) {
+    O << "Name: " << EI->getKeyData() << "\n";
+    for (DataArray::const_iterator DI = EI->second.begin(),
+           DE = EI->second.end();
+         DI != DE; ++DI)
+      (*DI)->print(O);
+  }
+
+  O << "Buckets and Hashes: \n";
+  for (size_t i = 0, e = Buckets.size(); i < e; ++i)
+    for (HashList::const_iterator HI = Buckets[i].begin(),
+           HE = Buckets[i].end(); HI != HE; ++HI)
+      (*HI)->print(O);
+
+  O << "Data: \n";
+    for (std::vector<HashData*>::const_iterator
+           DI = Data.begin(), DE = Data.end(); DI != DE; ++DI)
+      (*DI)->print(O);
+  
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
new file mode 100644
index 000000000000..2278d4c784f4
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -0,0 +1,290 @@
+//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf accelerator tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "DIE.h"
+#include <vector>
+#include <map>
+
+// The dwarf accelerator tables are an indirect hash table optimized
+// for null lookup rather than access to known data. They are output into
+// an on-disk format that looks like this:
+//
+// .-------------.
+// |  HEADER     |
+// |-------------|
+// |  BUCKETS    |
+// |-------------|
+// |  HASHES     |
+// |-------------|
+// |  OFFSETS    |
+// |-------------|
+// |  DATA       |
+// `-------------'
+//
+// where the header contains a magic number, version, type of hash function,
+// the number of buckets, total number of hashes, and room for a special
+// struct of data and the length of that struct.
+//
+// The buckets contain an index (e.g. 6) into the hashes array. The hashes
+// section contains all of the 32-bit hash values in contiguous memory, and
+// the offsets contain the offset into the data area for the particular
+// hash.
+// 
+// For a lookup example, we could hash a function name and take it modulo the
+// number of buckets giving us our bucket. From there we take the bucket value
+// as an index into the hashes table and look at each successive hash as long
+// as the hash value is still the same modulo result (bucket value) as earlier.
+// If we have a match we look at that same entry in the offsets table and
+// grab the offset in the data for our final match.
+
+namespace llvm {
+
+class AsmPrinter;
+class DIE;
+class DwarfDebug;
+  
+class DwarfAccelTable {
+
+  enum HashFunctionType {
+    eHashFunctionDJB = 0u
+  };
+
+  static uint32_t HashDJB (StringRef Str) {
+    uint32_t h = 5381;
+    for (unsigned i = 0, e = Str.size(); i != e; ++i)
+      h = ((h << 5) + h) + Str[i];
+    return h;
+  }
+
+  // Helper function to compute the number of buckets needed based on
+  // the number of unique hashes.
+  void ComputeBucketCount (void);
+  
+  struct TableHeader {
+    uint32_t   magic;           // 'HASH' magic value to allow endian detection
+    uint16_t   version;         // Version number.
+    uint16_t   hash_function;   // The hash function enumeration that was used.
+    uint32_t   bucket_count;    // The number of buckets in this hash table.
+    uint32_t   hashes_count;    // The total number of unique hash values
+                                // and hash data offsets in this table.
+    uint32_t   header_data_len; // The bytes to skip to get to the hash
+                                // indexes (buckets) for correct alignment.
+    // Also written to disk is the implementation specific header data.
+
+    static const uint32_t MagicHash = 0x48415348;
+    
+    TableHeader (uint32_t data_len) :
+      magic (MagicHash), version (1), hash_function (eHashFunctionDJB),
+      bucket_count (0), hashes_count (0), header_data_len (data_len)
+    {}
+
+#ifndef NDEBUG
+    void print(raw_ostream &O) {
+      O << "Magic: " << format("0x%x", magic) << "\n"
+        << "Version: " << version << "\n"
+        << "Hash Function: " << hash_function << "\n"
+        << "Bucket Count: " << bucket_count << "\n"
+        << "Header Data Length: " << header_data_len << "\n";
+    }
+    void dump() { print(dbgs()); }
+#endif
+  };
+
+public:
+  // The HeaderData describes the form of each set of data. In general this
+  // is as a list of atoms (atom_count) where each atom contains a type
+  // (AtomType type) of data, and an encoding form (form). In the case of
+  // data that is referenced via DW_FORM_ref_* the die_offset_base is
+  // used to describe the offset for all forms in the list of atoms.
+  // This also serves as a public interface of sorts.
+  // When written to disk this will have the form:
+  //
+  // uint32_t die_offset_base
+  // uint32_t atom_count
+  // atom_count Atoms  
+  enum AtomType {
+    eAtomTypeNULL       = 0u,
+    eAtomTypeDIEOffset  = 1u,   // DIE offset, check form for encoding
+    eAtomTypeCUOffset   = 2u,   // DIE offset of the compiler unit header that
+                                // contains the item in question
+    eAtomTypeTag        = 3u,   // DW_TAG_xxx value, should be encoded as
+                                // DW_FORM_data1 (if no tags exceed 255) or
+                                // DW_FORM_data2.
+    eAtomTypeNameFlags  = 4u,   // Flags from enum NameFlags
+    eAtomTypeTypeFlags  = 5u    // Flags from enum TypeFlags
+  };
+
+  enum TypeFlags {
+    eTypeFlagClassMask = 0x0000000fu,
+    
+    // Always set for C++, only set for ObjC if this is the
+    // @implementation for a class.
+    eTypeFlagClassIsImplementation  = ( 1u << 1 )
+  };  
+  
+  // Make these public so that they can be used as a general interface to
+  // the class.
+  struct Atom {
+    AtomType type; // enum AtomType
+    uint16_t form; // DWARF DW_FORM_ defines
+
+    Atom(AtomType type, uint16_t form) : type(type), form(form) {}
+    static const char * AtomTypeString(enum AtomType);
+#ifndef NDEBUG
+    void print(raw_ostream &O) {
+      O << "Type: " << AtomTypeString(type) << "\n"
+        << "Form: " << dwarf::FormEncodingString(form) << "\n";
+    }
+    void dump() {
+      print(dbgs());
+    }
+#endif
+  };
+
+ private:
+  struct TableHeaderData {
+    
+    uint32_t die_offset_base;
+    std::vector<Atom> Atoms;
+
+    TableHeaderData(std::vector<DwarfAccelTable::Atom> &AtomList,
+                    uint32_t offset = 0) :
+      die_offset_base(offset) {
+      for (size_t i = 0, e = AtomList.size(); i != e; ++i)
+        Atoms.push_back(AtomList[i]);
+    }
+    
+    TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0)
+    : die_offset_base(offset) {
+      Atoms.push_back(Atom);
+    }
+    
+#ifndef NDEBUG
+    void print (raw_ostream &O) {
+      O << "die_offset_base: " << die_offset_base << "\n";
+      for (size_t i = 0; i < Atoms.size(); i++)
+        Atoms[i].print(O);
+    }
+    void dump() {
+      print(dbgs());
+    }
+#endif
+  };
+
+  // The data itself consists of a str_offset, a count of the DIEs in the
+  // hash and the offsets to the DIEs themselves.
+  // On disk each data section is ended with a 0 KeyType as the end of the
+  // hash chain.
+  // On output this looks like:
+  // uint32_t str_offset
+  // uint32_t hash_data_count
+  // HashData[hash_data_count]
+public:
+  struct HashDataContents {
+    DIE *Die; // Offsets
+    char Flags; // Specific flags to output
+
+    HashDataContents(DIE *D, char Flags) :
+      Die(D),
+      Flags(Flags) { }
+    #ifndef NDEBUG
+    void print(raw_ostream &O) const {
+      O << "  Offset: " << Die->getOffset() << "\n";
+      O << "  Tag: " << dwarf::TagString(Die->getTag()) << "\n";
+      O << "  Flags: " << Flags << "\n";
+    }
+    #endif
+  };
+private:
+  struct HashData {
+    StringRef Str;
+    uint32_t HashValue;
+    MCSymbol *Sym;
+    std::vector<struct HashDataContents*> Data; // offsets
+    HashData(StringRef S) : Str(S) {
+      HashValue = DwarfAccelTable::HashDJB(S);
+    }
+    void addData(struct HashDataContents *Datum) { Data.push_back(Datum); }
+    #ifndef NDEBUG
+    void print(raw_ostream &O) {
+      O << "Name: " << Str << "\n";
+      O << "  Hash Value: " << format("0x%x", HashValue) << "\n";
+      O << "  Symbol: " ;
+      if (Sym) Sym->print(O);
+      else O << "<none>";
+      O << "\n";
+      for (size_t i = 0; i < Data.size(); i++) {
+        O << "  Offset: " << Data[i]->Die->getOffset() << "\n";
+        O << "  Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n";
+        O << "  Flags: " << Data[i]->Flags << "\n";
+      }
+    }
+    void dump() {
+      print(dbgs());
+    }
+    #endif
+  };
+
+  DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT
+  void operator=(const DwarfAccelTable&);  // DO NOT IMPLEMENT
+
+  // Internal Functions
+  void EmitHeader(AsmPrinter *);
+  void EmitBuckets(AsmPrinter *);
+  void EmitHashes(AsmPrinter *);
+  void EmitOffsets(AsmPrinter *, MCSymbol *);
+  void EmitData(AsmPrinter *, DwarfDebug *D);
+  
+  // Output Variables
+  TableHeader Header;
+  TableHeaderData HeaderData;
+  std::vector<HashData*> Data;
+
+  // String Data
+  typedef std::vector<struct HashDataContents*> DataArray;
+  typedef StringMap<DataArray> StringEntries;
+  StringEntries Entries;
+
+  // Buckets/Hashes/Offsets
+  typedef std::vector<HashData*> HashList;
+  typedef std::vector<HashList> BucketList;
+  BucketList Buckets;
+  HashList Hashes;
+  
+  // Public Implementation
+ public:
+  DwarfAccelTable(DwarfAccelTable::Atom);
+  DwarfAccelTable(std::vector<DwarfAccelTable::Atom> &);
+  ~DwarfAccelTable();
+  void AddName(StringRef, DIE*, char = 0);
+  void FinalizeTable(AsmPrinter *, const char *);
+  void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *);
+#ifndef NDEBUG
+  void print(raw_ostream &O);
+  void dump() { print(dbgs()); }
+#endif
+};
+
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 8ed4f4c43a7c..d975f1f97bea 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -142,12 +142,14 @@ void DwarfCFIException::EndFunction() {
 
   Asm->OutStreamer.EmitCFIEndProc();
 
+  if (!shouldEmitPersonality)
+    return;
+
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
                                                 Asm->getFunctionNumber()));
 
   // Map all labels and get rid of any dead landing pads.
   MMI->TidyLandingPads();
 
-  if (shouldEmitPersonality)
-    EmitExceptionTable();
+  EmitExceptionTable();
 }
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 6fe476d02ef7..69dc454ae1d7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -13,12 +13,14 @@
 
 #define DEBUG_TYPE "dwarfdebug"
 
+#include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
 #include "DwarfDebug.h"
 #include "llvm/Constants.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
@@ -30,8 +32,9 @@
 using namespace llvm;
 
 /// CompileUnit - Compile unit constructor.
-CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW)
-  : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A,
+			 DwarfDebug *DW)
+  : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
   DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
 }
 
@@ -67,12 +70,19 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute,
   Die->addValue(Attribute, Form, Value);
 }
 
-/// addString - Add a string attribute data and value. DIEString only
-/// keeps string reference.
-void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form,
-                            StringRef String) {
-  DIEValue *Value = new (DIEValueAllocator) DIEString(String);
-  Die->addValue(Attribute, Form, Value);
+/// addString - Add a string attribute data and value. We always emit a
+/// reference to the string pool instead of immediate strings so that DIEs have
+/// more predictable sizes.
+void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) {
+  MCSymbol *Symb = DD->getStringPoolEntry(String);
+  DIEValue *Value;
+  if (Asm->needsRelocationsForDwarfStringPool())
+    Value = new (DIEValueAllocator) DIELabel(Symb);
+  else {
+    MCSymbol *StringPool = DD->getStringPool();
+    Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
+  }
+  Die->addValue(Attribute, dwarf::DW_FORM_strp, Value);
 }
 
 /// addLabel - Add a Dwarf label attribute data and value.
@@ -98,7 +108,6 @@ void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
   Die->addValue(Attribute, Form, createDIEEntry(Entry));
 }
 
-
 /// addBlock - Add block data.
 ///
 void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
@@ -135,8 +144,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
   unsigned Line = G.getLineNumber();
   if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(),
-                                            G.getDirectory());
+  unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -148,14 +156,14 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) {
   // Verify subprogram.
   if (!SP.Verify())
     return;
-  // If the line number is 0, don't add it.
-  if (SP.getLineNumber() == 0)
-    return;
 
+  // If the line number is 0, don't add it.
   unsigned Line = SP.getLineNumber();
-  if (!SP.getContext().Verify())
+  if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory());
+
+  unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(),
+                                            SP.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -169,9 +177,28 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) {
     return;
 
   unsigned Line = Ty.getLineNumber();
-  if (Line == 0 || !Ty.getContext().Verify())
+  if (Line == 0)
     return;
-  unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory());
+  unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(),
+                                            Ty.getDirectory());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
+  // Verify type.
+  if (!Ty.Verify())
+    return;
+
+  unsigned Line = Ty.getLineNumber();
+  if (Line == 0)
+    return;
+  DIFile File = Ty.getFile();
+  unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(),
+					    File.getDirectory());
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -458,7 +485,7 @@ static bool isTypeSigned(DIType Ty, int *SizeInBits) {
 /// addConstantValue - Add constant value entry in variable DIE.
 bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
                                    DIType Ty) {
-  assert (MO.isImm() && "Invalid machine operand!");
+  assert(MO.isImm() && "Invalid machine operand!");
   DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
   int SizeInBits = -1;
   bool SignedConstant = isTypeSigned(Ty, &SizeInBits);
@@ -558,8 +585,8 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
       Buffer.addChild(getOrCreateTemplateValueParameterDIE(
                         DITemplateValueParameter(Element)));
   }
-
 }
+
 /// addToContextOwner - Add Die into the list of its context owner's children.
 void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) {
   if (Context.isType()) {
@@ -598,13 +625,29 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
     assert(Ty.isDerivedType() && "Unknown kind of DIType");
     constructTypeDIE(*TyDIE, DIDerivedType(Ty));
   }
-
+  // If this is a named finished type then include it in the list of types
+  // for the accelerator tables.
+  if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
+    bool IsImplementation = 0;
+    if (Ty.isCompositeType()) {
+      DICompositeType CT(Ty);
+      // A runtime language of 0 actually means C/C++ and that any
+      // non-negative value is some version of Objective-C/C++.
+      IsImplementation = (CT.getRunTimeLang() == 0) ||
+        CT.isObjcClassComplete();
+    }
+    unsigned Flags = IsImplementation ?
+                     DwarfAccelTable::eTypeFlagClassIsImplementation : 0;
+    addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
+  }
+  
   addToContextOwner(TyDIE, Ty.getContext());
   return TyDIE;
 }
 
 /// addType - Add a new type attribute to the specified entity.
-void CompileUnit::addType(DIE *Entity, DIType Ty) {
+void CompileUnit::addType(DIE *Entity, DIType Ty,
+			  unsigned Attribute) {
   if (!Ty.Verify())
     return;
 
@@ -612,7 +655,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) {
   DIEEntry *Entry = getDIEEntry(Ty);
   // If it exists then use the existing value.
   if (Entry) {
-    Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+    Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
     return;
   }
 
@@ -622,7 +665,7 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) {
   // Set up proxy.
   Entry = createDIEEntry(Buffer);
   insertDIEEntry(Ty, Entry);
-  Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+  Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry);
 
   // If this is a complete composite type then include it in the
   // list of global types.
@@ -662,7 +705,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
   StringRef Name = BTy.getName();
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addString(&Buffer, dwarf::DW_AT_name, Name);
 
   if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) {
     Buffer.setTag(dwarf::DW_TAG_unspecified_type);
@@ -671,8 +714,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
   }
 
   Buffer.setTag(dwarf::DW_TAG_base_type);
-  addUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
-	  BTy.getEncoding());
+  addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+          BTy.getEncoding());
 
   uint64_t Size = BTy.getSizeInBits() >> 3;
   addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -696,10 +739,10 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
 
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addString(&Buffer, dwarf::DW_AT_name, Name);
 
   // Add size if non-zero (derived types might be zero-sized.)
-  if (Size)
+  if (Size && Tag != dwarf::DW_TAG_pointer_type)
     addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
 
   // Add source line info if available and TyDesc is not a forward declaration.
@@ -755,8 +798,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         Buffer.addChild(Arg);
       }
     }
-    // Add prototype flag.
-    if (isPrototyped)
+    // Add prototype flag if we're dealing with a C language and the
+    // function has been prototyped.
+    if (isPrototyped &&
+	(Language == dwarf::DW_LANG_C89 ||
+	 Language == dwarf::DW_LANG_C99 ||
+	 Language == dwarf::DW_LANG_ObjC))
       addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
   }
     break;
@@ -779,13 +826,13 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
         DISubprogram SP(Element);
         ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
         if (SP.isProtected())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
                   dwarf::DW_ACCESS_protected);
         else if (SP.isPrivate())
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
                   dwarf::DW_ACCESS_private);
         else 
-          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
         if (SP.isExplicit())
           addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
@@ -793,15 +840,54 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       else if (Element.isVariable()) {
         DIVariable DV(Element);
         ElemDie = new DIE(dwarf::DW_TAG_variable);
-        addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
-                  DV.getName());
+        addString(ElemDie, dwarf::DW_AT_name, DV.getName());
         addType(ElemDie, DV.getType());
         addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
         addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
         addSourceLine(ElemDie, DV);
-      } else if (Element.isDerivedType())
-        ElemDie = createMemberDIE(DIDerivedType(Element));
-      else
+      } else if (Element.isDerivedType()) {
+	DIDerivedType DDTy(Element);
+	if (DDTy.getTag() == dwarf::DW_TAG_friend) {
+	  ElemDie = new DIE(dwarf::DW_TAG_friend);
+	  addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend);
+	} else
+	  ElemDie = createMemberDIE(DIDerivedType(Element));
+      } else if (Element.isObjCProperty()) {
+        DIObjCProperty Property(Element);
+        ElemDie = new DIE(Property.getTag());
+        StringRef PropertyName = Property.getObjCPropertyName();
+        addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
+	addType(ElemDie, Property.getType());
+	addSourceLine(ElemDie, Property);
+        StringRef GetterName = Property.getObjCPropertyGetterName();
+        if (!GetterName.empty())
+          addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
+        StringRef SetterName = Property.getObjCPropertySetterName();
+        if (!SetterName.empty())
+          addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
+        unsigned PropertyAttributes = 0;
+        if (Property.isReadOnlyObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
+        if (Property.isReadWriteObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite;
+        if (Property.isAssignObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign;
+        if (Property.isRetainObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain;
+        if (Property.isCopyObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy;
+        if (Property.isNonAtomicObjCProperty())
+          PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic;
+        if (PropertyAttributes)
+          addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, 
+                 PropertyAttributes);
+
+        DIEEntry *Entry = getDIEEntry(Element);
+        if (!Entry) {
+          Entry = createDIEEntry(ElemDie);
+          insertDIEEntry(Element, Entry);
+        }
+      } else
         continue;
       Buffer.addChild(ElemDie);
     }
@@ -809,11 +895,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     if (CTy.isAppleBlockExtension())
       addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
 
-    unsigned RLang = CTy.getRunTimeLang();
-    if (RLang)
-      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
-              dwarf::DW_FORM_data1, RLang);
-
     DICompositeType ContainingType = CTy.getContainingType();
     if (DIDescriptor(ContainingType).isCompositeType())
       addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
@@ -827,7 +908,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
       addUInt(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type,
               dwarf::DW_FORM_flag, 1);
 
-    if (Tag == dwarf::DW_TAG_class_type) 
+    // Add template parameters to a class, structure or union types.
+    // FIXME: The support isn't in the metadata for this yet.
+    if (Tag == dwarf::DW_TAG_class_type ||
+        Tag == dwarf::DW_TAG_structure_type ||
+        Tag == dwarf::DW_TAG_union_type)
       addTemplateParams(Buffer, CTy.getTemplateParams());
 
     break;
@@ -838,11 +923,11 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
 
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
-    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addString(&Buffer, dwarf::DW_AT_name, Name);
 
   if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
       || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
-    {
+  {
     // Add size if non-zero (derived types might be zero-sized.)
     if (Size)
       addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -857,6 +942,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
     // Add source line info if available.
     if (!CTy.isForwardDecl())
       addSourceLine(&Buffer, CTy);
+
+    // No harm in adding the runtime language to the declaration.
+    unsigned RLang = CTy.getRunTimeLang();
+    if (RLang)
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+              dwarf::DW_FORM_data1, RLang);
   }
 }
 
@@ -870,7 +961,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
 
   ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
   addType(ParamDIE, TP.getType());
-  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+  addString(ParamDIE, dwarf::DW_AT_name, TP.getName());
   return ParamDIE;
 }
 
@@ -885,7 +976,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV)
   ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
   addType(ParamDIE, TPV.getType());
   if (!TPV.getName().empty())
-    addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+    addString(ParamDIE, dwarf::DW_AT_name, TPV.getName());
   addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
           TPV.getValue());
   return ParamDIE;
@@ -898,8 +989,11 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) {
     return NDie;
   NDie = new DIE(dwarf::DW_TAG_namespace);
   insertDIE(NS, NDie);
-  if (!NS.getName().empty())
-    addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
+  if (!NS.getName().empty()) {
+    addString(NDie, dwarf::DW_AT_name, NS.getName());
+    addAccelNamespace(NS.getName(), NDie);
+  } else
+    addAccelNamespace("(anonymous namespace)", NDie);
   addSourceLine(NDie, NS);
   addToContextOwner(NDie, NS.getContext());
   return NDie;
@@ -921,6 +1015,12 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
   if (SPDie)
     return SPDie;
 
+  DISubprogram SPDecl = SP.getFunctionDeclaration();
+  DIE *DeclDie = NULL;
+  if (SPDecl.isSubprogram()) {
+    DeclDie = getOrCreateSubprogramDIE(SPDecl);
+  }
+
   SPDie = new DIE(dwarf::DW_TAG_subprogram);
   
   // DW_TAG_inlined_subroutine may refer to this DIE.
@@ -932,25 +1032,36 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
   // Add function template parameters.
   addTemplateParams(*SPDie, SP.getTemplateParams());
 
+  // Unfortunately this code needs to stay here to work around
+  // a bug in older gdbs that requires the linkage name to resolve
+  // multiple template functions.
   StringRef LinkageName = SP.getLinkageName();
   if (!LinkageName.empty())
-    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, 
-                    dwarf::DW_FORM_string,
-                    getRealLinkageName(LinkageName));
+    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
+              getRealLinkageName(LinkageName));
 
   // If this DIE is going to refer declaration info using AT_specification
   // then there is no need to add other attributes.
-  if (SP.getFunctionDeclaration().isSubprogram())
+  if (DeclDie) {
+    // Refer function declaration directly.
+    addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+                DeclDie);
+
     return SPDie;
+  }
 
   // Constructors and operators for anonymous aggregates do not have names.
   if (!SP.getName().empty())
-    addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, 
-                    SP.getName());
+    addString(SPDie, dwarf::DW_AT_name, SP.getName());
 
   addSourceLine(SPDie, SP);
 
-  if (SP.isPrototyped()) 
+  // Add the prototype if we have a prototype and we have a C like
+  // language.
+  if (SP.isPrototyped() &&
+      (Language == dwarf::DW_LANG_C89 ||
+       Language == dwarf::DW_LANG_C99 ||
+       Language == dwarf::DW_LANG_ObjC))
     addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
   // Add Return Type.
@@ -965,7 +1076,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
 
   unsigned VK = SP.getVirtuality();
   if (VK) {
-    addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+    addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
     DIEBlock *Block = getDIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
     addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
@@ -1052,31 +1163,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
   insertDIE(N, VariableDIE);
 
   // Add name.
-  addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
-                   GV.getDisplayName());
+  addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
   StringRef LinkageName = GV.getLinkageName();
   bool isGlobalVariable = GV.getGlobal() != NULL;
   if (!LinkageName.empty() && isGlobalVariable)
-    addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, 
-                     dwarf::DW_FORM_string,
-                     getRealLinkageName(LinkageName));
+    addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name,
+              getRealLinkageName(LinkageName));
   // Add type.
   DIType GTy = GV.getType();
   addType(VariableDIE, GTy);
 
   // Add scoping info.
-  if (!GV.isLocalToUnit()) {
+  if (!GV.isLocalToUnit())
     addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-    // Expose as global. 
-    addGlobal(GV.getName(), VariableDIE);
-  }
+
   // Add line number info.
   addSourceLine(VariableDIE, GV);
   // Add to context owner.
   DIDescriptor GVContext = GV.getContext();
   addToContextOwner(VariableDIE, GVContext);
   // Add location.
+  bool addToAccelTable = false;
+  DIE *VariableSpecDIE = NULL;
   if (isGlobalVariable) {
+    addToAccelTable = true;
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
     addLabel(Block, 0, dwarf::DW_FORM_udata,
@@ -1086,7 +1196,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
     if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
         !GVContext.isFile() && !isSubprogramContext(GVContext)) {
       // Create specification DIE.
-      DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+      VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
       addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
                   dwarf::DW_FORM_ref4, VariableDIE);
       addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
@@ -1095,11 +1205,12 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
       addDie(VariableSpecDIE);
     } else {
       addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
-    } 
+    }
   } else if (const ConstantInt *CI = 
              dyn_cast_or_null<ConstantInt>(GV.getConstant()))
     addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType());
   else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+    addToAccelTable = true;
     // GV is a merged global.
     DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
     Value *Ptr = CE->getOperand(0);
@@ -1114,6 +1225,16 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
     addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
   }
 
+  if (addToAccelTable) {
+    DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE;
+    addAccelName(GV.getName(), AddrDIE);
+
+    // If the linkage name is different than the name, go ahead and output
+    // that as well into the name table.
+    if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
+      addAccelName(GV.getLinkageName(), AddrDIE);
+  }
+
   return;
 }
 
@@ -1121,8 +1242,8 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) {
 void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
   DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
   addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
-  int64_t L = SR.getLo();
-  int64_t H = SR.getHi();
+  uint64_t L = SR.getLo();
+  uint64_t H = SR.getHi();
 
   // The L value defines the lower bounds which is typically zero for C/C++. The
   // H value is the upper bounds.  Values are 64 bit.  H - L + 1 is the size
@@ -1135,8 +1256,8 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy)
     return;
   }
   if (L)
-    addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
-  addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+    addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+  addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
   Buffer.addChild(DW_Subrange);
 }
 
@@ -1175,7 +1296,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer,
 DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) {
   DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
   StringRef Name = ETy.getName();
-  addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  addString(Enumerator, dwarf::DW_AT_name, Name);
   int64_t Value = ETy.getEnumValue();
   addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
   return Enumerator;
@@ -1212,8 +1333,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) {
     addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
                             dwarf::DW_FORM_ref4, AbsDIE);
   else {
-    addString(VariableDie, dwarf::DW_AT_name, 
-                          dwarf::DW_FORM_string, Name);
+    addString(VariableDie, dwarf::DW_AT_name, Name);
     addSourceLine(VariableDie, DV->getVariable());
     addType(VariableDie, DV->getType());
   }
@@ -1308,7 +1428,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
   DIE *MemberDie = new DIE(DT.getTag());
   StringRef Name = DT.getName();
   if (!Name.empty())
-    addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addString(MemberDie, dwarf::DW_AT_name, Name);
 
   addType(MemberDie, DT.getTypeDerivedFrom());
 
@@ -1366,32 +1486,35 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) {
     addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
 
   if (DT.isProtected())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
   else if (DT.isPrivate())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_private);
   // Otherwise C++ member and base classes are considered public.
   else 
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_public);
   if (DT.isVirtual())
-    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1,
             dwarf::DW_VIRTUALITY_virtual);
 
   // Objective-C properties.
+  if (MDNode *PNode = DT.getObjCProperty())
+    if (DIEEntry *PropertyDie = getDIEEntry(PNode))
+      MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, 
+                          PropertyDie);
+
+  // This is only for backward compatibility.
   StringRef PropertyName = DT.getObjCPropertyName();
   if (!PropertyName.empty()) {
-    addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string,
-              PropertyName);
+    addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
     StringRef GetterName = DT.getObjCPropertyGetterName();
     if (!GetterName.empty())
-      addString(MemberDie, dwarf::DW_AT_APPLE_property_getter,
-                dwarf::DW_FORM_string, GetterName);
+      addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName);
     StringRef SetterName = DT.getObjCPropertySetterName();
     if (!SetterName.empty())
-      addString(MemberDie, dwarf::DW_AT_APPLE_property_setter,
-                dwarf::DW_FORM_string, SetterName);
+      addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName);
     unsigned PropertyAttributes = 0;
     if (DT.isReadOnlyObjCProperty())
       PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 785926579fa4..45e407e27ffa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -29,13 +29,17 @@ class ConstantInt;
 class DbgVariable;
 
 //===----------------------------------------------------------------------===//
-/// CompileUnit - This dwarf writer support class manages information associate
+/// CompileUnit - This dwarf writer support class manages information associated
 /// with a source file.
 class CompileUnit {
   /// ID - File identifier for source.
   ///
   unsigned ID;
 
+  /// Language - The DW_AT_language of the compile unit
+  ///
+  unsigned Language;
+
   /// Die - Compile unit debug information entry.
   ///
   const OwningPtr<DIE> CUDie;
@@ -56,14 +60,17 @@ class CompileUnit {
   /// descriptors to debug information entries using a DIEEntry proxy.
   DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
 
-  /// Globals - A map of globally visible named entities for this unit.
-  ///
-  StringMap<DIE*> Globals;
-
   /// GlobalTypes - A map of globally visible types for this unit.
   ///
   StringMap<DIE*> GlobalTypes;
 
+  /// AccelNames - A map of names for the name accelerator table.
+  ///
+  StringMap<std::vector<DIE*> > AccelNames;
+  StringMap<std::vector<DIE*> > AccelObjC;
+  StringMap<std::vector<DIE*> > AccelNamespace;
+  StringMap<std::vector<std::pair<DIE*, unsigned> > > AccelTypes;
+
   /// DIEBlocks - A list of all the DIEBlocks in use.
   std::vector<DIEBlock *> DIEBlocks;
 
@@ -73,27 +80,56 @@ class CompileUnit {
   DenseMap<DIE *, const MDNode *> ContainingTypeMap;
 
 public:
-  CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+  CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW);
   ~CompileUnit();
 
   // Accessors.
   unsigned getID()                  const { return ID; }
+  unsigned getLanguage()            const { return Language; }
   DIE* getCUDie()                   const { return CUDie.get(); }
-  const StringMap<DIE*> &getGlobals()     const { return Globals; }
   const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
 
+  const StringMap<std::vector<DIE*> > &getAccelNames() const {
+    return AccelNames;
+  }
+  const StringMap<std::vector<DIE*> > &getAccelObjC() const {
+    return AccelObjC;
+  }
+  const StringMap<std::vector<DIE*> > &getAccelNamespace() const {
+    return AccelNamespace;
+  }
+  const StringMap<std::vector<std::pair<DIE*, unsigned > > >
+  &getAccelTypes() const {
+    return AccelTypes;
+  }
+  
   /// hasContent - Return true if this compile unit has something to write out.
   ///
   bool hasContent() const { return !CUDie->getChildren().empty(); }
 
-  /// addGlobal - Add a new global entity to the compile unit.
-  ///
-  void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
-
   /// addGlobalType - Add a new global type to the compile unit.
   ///
   void addGlobalType(DIType Ty);
 
+
+  /// addAccelName - Add a new name to the name accelerator table.
+  void addAccelName(StringRef Name, DIE *Die) {
+    std::vector<DIE*> &DIEs = AccelNames[Name];
+    DIEs.push_back(Die);
+  }
+  void addAccelObjC(StringRef Name, DIE *Die) {
+    std::vector<DIE*> &DIEs = AccelObjC[Name];
+    DIEs.push_back(Die);
+  }
+  void addAccelNamespace(StringRef Name, DIE *Die) {
+    std::vector<DIE*> &DIEs = AccelNamespace[Name];
+    DIEs.push_back(Die);
+  }
+  void addAccelType(StringRef Name, std::pair<DIE *, unsigned> Die) {
+    std::vector<std::pair<DIE*, unsigned > > &DIEs = AccelTypes[Name];
+    DIEs.push_back(Die);
+  }
+  
   /// getDIE - Returns the debug information entry map slot for the
   /// specified debug variable.
   DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
@@ -150,8 +186,7 @@ public:
 
   /// addString - Add a string attribute data and value.
   ///
-  void addString(DIE *Die, unsigned Attribute, unsigned Form,
-                 const StringRef Str);
+  void addString(DIE *Die, unsigned Attribute, const StringRef Str);
 
   /// addLabel - Add a Dwarf label attribute data and value.
   ///
@@ -178,6 +213,7 @@ public:
   void addSourceLine(DIE *Die, DISubprogram SP);
   void addSourceLine(DIE *Die, DIType Ty);
   void addSourceLine(DIE *Die, DINameSpace NS);
+  void addSourceLine(DIE *Die, DIObjCProperty Ty);
 
   /// addAddress - Add an address attribute to a die based on the location
   /// provided.
@@ -225,8 +261,10 @@ public:
   /// addToContextOwner - Add Die into the list of its context owner's children.
   void addToContextOwner(DIE *Die, DIDescriptor Context);
 
-  /// addType - Add a new type attribute to the specified entity.
-  void addType(DIE *Entity, DIType Ty);
+  /// addType - Add a new type attribute to the specified entity. This takes
+  /// and attribute parameter because DW_AT_friend attributes are also
+  /// type references.
+  void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type);
 
   /// getOrCreateNameSpace - Create a DIE for DINameSpace.
   DIE *getOrCreateNameSpace(DINameSpace NS);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1b7e370fca09..cb7887890cda 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -14,10 +14,12 @@
 #define DEBUG_TYPE "dwarfdebug"
 #include "DwarfDebug.h"
 #include "DIE.h"
+#include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Instructions.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -52,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
      cl::desc("Make an absence of debug location information explicit."),
      cl::init(false));
 
+static cl::opt<bool> DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
+     cl::desc("Output prototype dwarf accelerator tables."),
+     cl::init(false));
+
 namespace {
   const char *DWARFGroupName = "DWARF Emission";
   const char *DbgTimerName = "DWARF Debug Writer";
@@ -128,6 +134,11 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   DwarfStrSectionSym = TextSectionSym = 0;
   DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
   FunctionBeginSym = FunctionEndSym = 0;
+
+  // Turn on accelerator tables for Darwin.
+  if (Triple(M->getTargetTriple()).isOSDarwin())
+    DwarfAccelTables = true;
+  
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
     beginModule(M);
@@ -136,6 +147,22 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
 DwarfDebug::~DwarfDebug() {
 }
 
+/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
+/// temporary label to it if SymbolStem is specified.
+static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+                                const char *SymbolStem = 0) {
+  Asm->OutStreamer.SwitchSection(Section);
+  if (!SymbolStem) return 0;
+
+  MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+  Asm->OutStreamer.EmitLabel(TmpSym);
+  return TmpSym;
+}
+
+MCSymbol *DwarfDebug::getStringPool() {
+  return Asm->GetTempSymbol("section_str");
+}
+
 MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
   std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
   if (Entry.first) return Entry.first;
@@ -144,7 +171,6 @@ MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
   return Entry.first = Asm->GetTempSymbol("string", Entry.second);
 }
 
-
 /// assignAbbrevNumber - Define a unique number for the abbreviation.
 ///
 void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
@@ -178,6 +204,63 @@ static StringRef getRealLinkageName(StringRef LinkageName) {
   return LinkageName;
 }
 
+static bool isObjCClass(StringRef Name) {
+  return Name.startswith("+") || Name.startswith("-");
+}
+
+static bool hasObjCCategory(StringRef Name) {
+  if (!isObjCClass(Name)) return false;
+
+  size_t pos = Name.find(')');
+  if (pos != std::string::npos) {
+    if (Name[pos+1] != ' ') return false;
+    return true;
+  }
+  return false;
+}
+
+static void getObjCClassCategory(StringRef In, StringRef &Class,
+                                 StringRef &Category) {
+  if (!hasObjCCategory(In)) {
+    Class = In.slice(In.find('[') + 1, In.find(' '));
+    Category = "";
+    return;
+  }
+
+  Class = In.slice(In.find('[') + 1, In.find('('));
+  Category = In.slice(In.find('[') + 1, In.find(' '));
+  return;
+}
+
+static StringRef getObjCMethodName(StringRef In) {
+  return In.slice(In.find(' ') + 1, In.find(']'));
+}
+
+// Add the various names to the Dwarf accelerator table names.
+static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
+                               DIE* Die) {
+  if (!SP.isDefinition()) return;
+  
+  TheCU->addAccelName(SP.getName(), Die);
+
+  // If the linkage name is different than the name, go ahead and output
+  // that as well into the name table.
+  if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
+    TheCU->addAccelName(SP.getLinkageName(), Die);
+
+  // If this is an Objective-C selector name add it to the ObjC accelerator
+  // too.
+  if (isObjCClass(SP.getName())) {
+    StringRef Class, Category;
+    getObjCClassCategory(SP.getName(), Class, Category);
+    TheCU->addAccelObjC(Class, Die);
+    if (Category != "")
+      TheCU->addAccelObjC(Category, Die);
+    // Also add the base method name to the name table.
+    TheCU->addAccelName(getObjCMethodName(SP.getName()), Die);
+  }
+}
+
 /// updateSubprogramScopeDIE - Find DIE for the given subprogram and
 /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
 /// If there are global variables in this scope then create and insert
@@ -190,11 +273,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
   DISubprogram SP(SPNode);
 
   DISubprogram SPDecl = SP.getFunctionDeclaration();
-  if (SPDecl.isSubprogram())
-    // Refer function declaration directly.
-    SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
-                      SPCU->getOrCreateSubprogramDIE(SPDecl));
-  else {
+  if (!SPDecl.isSubprogram()) {
     // There is not any need to generate specification DIE for a function
     // defined at compile unit level. If a function is defined inside another
     // function then gdb prefers the definition at top level and but does not
@@ -203,7 +282,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
     if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
         !SP.getContext().isFile() &&
         !isSubprogramContext(SP.getContext())) {
-      SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
       
       // Add arguments.
       DICompositeType SPTy = SP.getType();
@@ -241,6 +320,10 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
   MachineLocation Location(RI->getFrameRegister(*Asm->MF));
   SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
 
+  // Add name to the name table, we do this here because we're guaranteed
+  // to have concrete versions of our DW_TAG_subprogram nodes.
+  addSubprogramNames(SPCU, SP, SPDie);
+  
   return SPDie;
 }
 
@@ -248,7 +331,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
 /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
 DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, 
                                           LexicalScope *Scope) {
-
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
   if (Scope->isAbstractScope())
     return ScopeDIE;
@@ -294,10 +376,9 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
 /// of the function.
 DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
                                           LexicalScope *Scope) {
-
   const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
-  assert (Ranges.empty() == false
-          && "LexicalScope does not have instruction markers!");
+  assert(Ranges.empty() == false &&
+         "LexicalScope does not have instruction markers!");
 
   if (!Scope->getScopeNode())
     return NULL;
@@ -314,8 +395,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
   const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
 
   if (StartLabel == 0 || EndLabel == 0) {
-    assert (0 && "Unexpected Start and End labels for a inlined scope!");
-    return 0;
+    llvm_unreachable("Unexpected Start and End labels for a inlined scope!");
   }
   assert(StartLabel->isDefined() &&
          "Invalid starting label for an inlined scope!");
@@ -358,16 +438,20 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     I = InlineInfo.find(InlinedSP);
 
   if (I == InlineInfo.end()) {
-    InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel,
-                                                             ScopeDIE));
+    InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE));
     InlinedSPNodes.push_back(InlinedSP);
   } else
     I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
 
   DILocation DL(Scope->getInlinedAt());
-  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
+  TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0,
+                 GetOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
   TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
 
+  // Add name to the name table, we do this here because we're guaranteed
+  // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
+  addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
+  
   return ScopeDIE;
 }
 
@@ -376,7 +460,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   if (!Scope || !Scope->getScopeNode())
     return NULL;
 
-  SmallVector <DIE *, 8> Children;
+  SmallVector<DIE *, 8> Children;
 
   // Collect arguments for current function.
   if (LScopes.isCurrentFunctionScope(Scope))
@@ -426,39 +510,39 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
     ScopeDIE->addChild(*I);
 
   if (DS.isSubprogram())
-   TheCU->addPubTypes(DISubprogram(DS));
+    TheCU->addPubTypes(DISubprogram(DS));
 
- return ScopeDIE;
+  return ScopeDIE;
 }
 
 /// GetOrCreateSourceID - Look up the source id with the given directory and
 /// source file names. If none currently exists, create a new id and insert it
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
-
 unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, 
                                          StringRef DirName) {
   // If FE did not provide a file name, then assume stdin.
   if (FileName.empty())
     return GetOrCreateSourceID("<stdin>", StringRef());
 
-  // MCStream expects full path name as filename.
-  if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
-    SmallString<128> FullPathName = DirName;
-    sys::path::append(FullPathName, FileName);
-    // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
-    return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
-  }
+  // TODO: this might not belong here. See if we can factor this better.
+  if (DirName == CompilationDir)
+    DirName = "";
 
-  StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
-  if (Entry.getValue())
-    return Entry.getValue();
+  unsigned SrcId = SourceIdMap.size()+1;
 
-  unsigned SrcId = SourceIdMap.size();
-  Entry.setValue(SrcId);
+  // We look up the file/dir pair by concatenating them with a zero byte.
+  SmallString<128> NamePair;
+  NamePair += DirName;
+  NamePair += '\0'; // Zero bytes are not allowed in paths.
+  NamePair += FileName;
+
+  StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
+  if (Ent.getValue() != SrcId)
+    return Ent.getValue();
 
   // Print out a .file directive to specify files for .loc directives.
-  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName);
 
   return SrcId;
 }
@@ -468,39 +552,36 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
 CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   DICompileUnit DIUnit(N);
   StringRef FN = DIUnit.getFilename();
-  StringRef Dir = DIUnit.getDirectory();
-  unsigned ID = GetOrCreateSourceID(FN, Dir);
+  CompilationDir = DIUnit.getDirectory();
+  unsigned ID = GetOrCreateSourceID(FN, CompilationDir);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this);
-  NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
-                   DIUnit.getProducer());
+  CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this);
+  NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
   NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                  DIUnit.getLanguage());
-  NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
-  // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
-  // simplifies debug range entries.
-  NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
+  NewCU->addString(Die, dwarf::DW_AT_name, FN);
+  // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+  // into an entity.
+  NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
   // DW_AT_stmt_list is a offset of line number information for this
   // compile unit in debug_line section.
-  if(Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
+  if (Asm->MAI->doesDwarfRequireRelocationForSectionOffset())
     NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
                     Asm->GetTempSymbol("section_line"));
   else
     NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
 
-  if (!Dir.empty())
-    NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+  if (!CompilationDir.empty())
+    NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
   if (DIUnit.isOptimized())
     NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
 
   StringRef Flags = DIUnit.getFlags();
   if (!Flags.empty())
-    NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, 
-                     Flags);
+    NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
   
-  unsigned RVer = DIUnit.getRunTimeVersion();
-  if (RVer)
+  if (unsigned RVer = DIUnit.getRunTimeVersion())
     NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
             dwarf::DW_FORM_data1, RVer);
 
@@ -513,6 +594,11 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
 /// construct SubprogramDIE - Construct subprogram DIE.
 void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, 
                                         const MDNode *N) {
+  CompileUnit *&CURef = SPMap[N];
+  if (CURef)
+    return;
+  CURef = TheCU;
+
   DISubprogram SP(N);
   if (!SP.isDefinition())
     // This is a method declaration which will be handled while constructing
@@ -527,10 +613,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
   // Add to context owner.
   TheCU->addToContextOwner(SubprogramDie, SP.getContext());
 
-  // Expose as global.
-  TheCU->addGlobal(SP.getName(), SubprogramDie);
-
-  SPMap[N] = TheCU;
   return;
 }
 
@@ -676,7 +758,7 @@ void DwarfDebug::endModule() {
         
         // Construct subprogram DIE and add variables DIEs.
         CompileUnit *SPCU = CUMap.lookup(TheCU);
-        assert (SPCU && "Unable to find Compile Unit!");
+        assert(SPCU && "Unable to find Compile Unit!");
         constructSubprogramDIE(SPCU, SP);
         DIE *ScopeDIE = SPCU->getDIE(SP);
         for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
@@ -697,6 +779,13 @@ void DwarfDebug::endModule() {
     DIE *ISP = *AI;
     FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
+  for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
+         AE = AbstractSPDies.end(); AI != AE; ++AI) {
+    DIE *ISP = AI->second;
+    if (InlinedSubprogramDIEs.count(ISP))
+      continue;
+    FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+  }
 
   // Emit DW_AT_containing_type attribute to connect types with their
   // vtable holding type.
@@ -727,9 +816,14 @@ void DwarfDebug::endModule() {
   // Corresponding abbreviations into a abbrev section.
   emitAbbreviations();
 
-  // Emit info into a debug pubnames section.
-  emitDebugPubNames();
-
+  // Emit info into a dwarf accelerator table sections.
+  if (DwarfAccelTables) {
+    emitAccelNames();
+    emitAccelObjC();
+    emitAccelNamespaces();
+    emitAccelTypes();
+  }
+  
   // Emit info into a debug pubtypes section.
   emitDebugPubTypes();
 
@@ -837,7 +931,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
 /// isDbgValueInDefinedReg - Return true if debug value, encoded by
 /// DBG_VALUE instruction, is in a defined reg.
 static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
-  assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+  assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
   return MI->getNumOperands() == 3 &&
          MI->getOperand(0).isReg() && MI->getOperand(0).getReg() &&
          MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
@@ -867,8 +961,7 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
   if (MI->getOperand(0).isCImm())
     return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
 
-  assert (0 && "Unexpected 3 operand DBG_VALUE instruction!");
-  return DotDebugLocEntry();
+  llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
 }
 
 /// collectVariableInfo - Find variables for each lexical scope.
@@ -964,7 +1057,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
       }
 
       // The value is valid until the next DBG_VALUE or clobber.
-      DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin));
+      DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel,
+                                                    Begin));
     }
     DotDebugLocEntries.push_back(DotDebugLocEntry());
   }
@@ -999,12 +1093,15 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   if (!MI->isDebugValue()) {
     DebugLoc DL = MI->getDebugLoc();
     if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) {
-      unsigned Flags = DWARF2_FLAG_IS_STMT;
+      unsigned Flags = 0;
       PrevInstLoc = DL;
       if (DL == PrologEndLoc) {
         Flags |= DWARF2_FLAG_PROLOGUE_END;
         PrologEndLoc = DebugLoc();
       }
+      if (PrologEndLoc.isUnknown())
+        Flags |= DWARF2_FLAG_IS_STMT;
+
       if (!DL.isUnknown()) {
         const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
         recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
@@ -1099,12 +1196,19 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
 }
 
 /// getFnDebugLoc - Walk up the scope chain of given debug loc and find
-/// line number  info for the function.
+/// line number info for the function.
 static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
   const MDNode *Scope = getScopeNode(DL, Ctx);
   DISubprogram SP = getDISubprogram(Scope);
-  if (SP.Verify()) 
-    return DebugLoc::get(SP.getLineNumber(), 0, SP);
+  if (SP.Verify()) {
+    // Check for number of operands since the compatibility is
+    // cheap here.
+    if (SP->getNumOperands() > 19)
+      return DebugLoc::get(SP.getScopeLineNumber(), 0, SP);
+    else
+      return DebugLoc::get(SP.getLineNumber(), 0, SP);
+  }
+
   return DebugLoc();
 }
 
@@ -1135,7 +1239,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
       const MachineInstr *MI = II;
 
       if (MI->isDebugValue()) {
-        assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
+        assert(MI->getNumOperands() > 1 && "Invalid machine instruction!");
 
         // Keep track of user variables.
         const MDNode *Var =
@@ -1206,7 +1310,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                MOE = MI->operands_end(); MOI != MOE; ++MOI) {
           if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg())
             continue;
-          for (const unsigned *AI = TRI->getOverlaps(MOI->getReg());
+          for (const uint16_t *AI = TRI->getOverlaps(MOI->getReg());
                unsigned Reg = *AI; ++AI) {
             const MDNode *Var = LiveUserVar[Reg];
             if (!Var)
@@ -1277,7 +1381,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                                        MF->getFunction()->getContext());
     recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
                      FnStartDL.getScope(MF->getFunction()->getContext()),
-                     DWARF2_FLAG_IS_STMT);
+                     0);
   }
 }
 
@@ -1303,7 +1407,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
   CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
-  assert (TheCU && "Unable to find compile unit!");
+  assert(TheCU && "Unable to find compile unit!");
 
   // Construct abstract scopes.
   ArrayRef<LexicalScope *> AList = LScopes.getAbstractScopesList();
@@ -1327,7 +1431,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   
   DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
   
-  if (!DisableFramePointerElim(*MF))
+  if (!MF->getTarget().Options.DisableFramePointerElim(*MF))
     TheCU->addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
                    dwarf::DW_FORM_flag, 1);
 
@@ -1380,7 +1484,7 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
       Fn = DB.getFilename();
       Dir = DB.getDirectory();
     } else
-      assert(0 && "Unexpected scope info");
+      llvm_unreachable("Unexpected scope info");
 
     Src = GetOrCreateSourceID(Fn, Dir);
   }
@@ -1398,10 +1502,6 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
   // Get the children.
   const std::vector<DIE *> &Children = Die->getChildren();
 
-  // If not last sibling and has children then add sibling offset attribute.
-  if (!Last && !Children.empty())
-    Die->addSiblingOffset(DIEValueAllocator);
-
   // Record the abbreviation.
   assignAbbrevNumber(Die->getAbbrev());
 
@@ -1454,18 +1554,6 @@ void DwarfDebug::computeSizeAndOffsets() {
   }
 }
 
-/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
-/// temporary label to it if SymbolStem is specified.
-static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
-                                const char *SymbolStem = 0) {
-  Asm->OutStreamer.SwitchSection(Section);
-  if (!SymbolStem) return 0;
-
-  MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
-  Asm->OutStreamer.EmitLabel(TmpSym);
-  return TmpSym;
-}
-
 /// EmitSectionLabels - Emit initial Dwarf sections with a label at
 /// the start of each one.
 void DwarfDebug::EmitSectionLabels() {
@@ -1483,7 +1571,6 @@ void DwarfDebug::EmitSectionLabels() {
 
   EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
   EmitSectionSym(Asm, TLOF.getDwarfLocSection());
-  EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
   EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
   DwarfStrSectionSym =
     EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
@@ -1525,9 +1612,6 @@ void DwarfDebug::emitDIE(DIE *Die) {
       Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
 
     switch (Attr) {
-    case dwarf::DW_AT_sibling:
-      Asm->EmitInt32(Die->getSiblingOffset());
-      break;
     case dwarf::DW_AT_abstract_origin: {
       DIEEntry *E = cast<DIEEntry>(Values[i]);
       DIE *Origin = E->getEntry();
@@ -1539,7 +1623,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
       // DW_AT_range Value encodes offset in debug_range section.
       DIEInteger *V = cast<DIEInteger>(Values[i]);
 
-      if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) {
+      if (Asm->MAI->doesDwarfUseLabelOffsetForRanges()) {
         Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
                                  V->getValue(),
                                  4);
@@ -1678,62 +1762,133 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(1);
 }
 
-/// emitDebugPubNames - Emit visible names into a debug pubnames section.
-///
-void DwarfDebug::emitDebugPubNames() {
+/// emitAccelNames - Emit visible names into a hashed accelerator table
+/// section.
+void DwarfDebug::emitAccelNames() {
+  DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+                                           dwarf::DW_FORM_data4));
   for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
          E = CUMap.end(); I != E; ++I) {
     CompileUnit *TheCU = I->second;
-    // Start the dwarf pubnames section.
-    Asm->OutStreamer.SwitchSection(
-      Asm->getObjFileLowering().getDwarfPubNamesSection());
+    const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNames();
+    for (StringMap<std::vector<DIE*> >::const_iterator
+           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      const std::vector<DIE *> &Entities = GI->second;
+      for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+             DE = Entities.end(); DI != DE; ++DI)
+        AT.AddName(Name, (*DI));
+    }
+  }
 
-    Asm->OutStreamer.AddComment("Length of Public Names Info");
-    Asm->EmitLabelDifference(
-      Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
-      Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
+  AT.FinalizeTable(Asm, "Names");
+  Asm->OutStreamer.SwitchSection(
+    Asm->getObjFileLowering().getDwarfAccelNamesSection());
+  MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin");
+  Asm->OutStreamer.EmitLabel(SectionBegin);
 
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
-                                                  TheCU->getID()));
+  // Emit the full data.
+  AT.Emit(Asm, SectionBegin, this);
+}
 
-    Asm->OutStreamer.AddComment("DWARF Version");
-    Asm->EmitInt16(dwarf::DWARF_VERSION);
+/// emitAccelObjC - Emit objective C classes and categories into a hashed
+/// accelerator table section.
+void DwarfDebug::emitAccelObjC() {
+  DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+                                           dwarf::DW_FORM_data4));
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelObjC();
+    for (StringMap<std::vector<DIE*> >::const_iterator
+           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      const std::vector<DIE *> &Entities = GI->second;
+      for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+             DE = Entities.end(); DI != DE; ++DI)
+        AT.AddName(Name, (*DI));
+    }
+  }
 
-    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
-    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
-                           DwarfInfoSectionSym);
+  AT.FinalizeTable(Asm, "ObjC");
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+                                 .getDwarfAccelObjCSection());
+  MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin");
+  Asm->OutStreamer.EmitLabel(SectionBegin);
 
-    Asm->OutStreamer.AddComment("Compilation Unit Length");
-    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
-                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
-                             4);
+  // Emit the full data.
+  AT.Emit(Asm, SectionBegin, this);
+}
 
-    const StringMap<DIE*> &Globals = TheCU->getGlobals();
-    for (StringMap<DIE*>::const_iterator
-           GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+/// emitAccelNamespace - Emit namespace dies into a hashed accelerator
+/// table.
+void DwarfDebug::emitAccelNamespaces() {
+  DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+                                           dwarf::DW_FORM_data4));
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    const StringMap<std::vector<DIE*> > &Names = TheCU->getAccelNamespace();
+    for (StringMap<std::vector<DIE*> >::const_iterator
+           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
       const char *Name = GI->getKeyData();
-      DIE *Entity = GI->second;
+      const std::vector<DIE *> &Entities = GI->second;
+      for (std::vector<DIE *>::const_iterator DI = Entities.begin(),
+             DE = Entities.end(); DI != DE; ++DI)
+        AT.AddName(Name, (*DI));
+    }
+  }
 
-      Asm->OutStreamer.AddComment("DIE offset");
-      Asm->EmitInt32(Entity->getOffset());
+  AT.FinalizeTable(Asm, "namespac");
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+                                 .getDwarfAccelNamespaceSection());
+  MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin");
+  Asm->OutStreamer.EmitLabel(SectionBegin);
 
-      if (Asm->isVerbose())
-        Asm->OutStreamer.AddComment("External Name");
-      Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
-    }
+  // Emit the full data.
+  AT.Emit(Asm, SectionBegin, this);
+}
 
-    Asm->OutStreamer.AddComment("End Mark");
-    Asm->EmitInt32(0);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
-                                                  TheCU->getID()));
+/// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+void DwarfDebug::emitAccelTypes() {
+  std::vector<DwarfAccelTable::Atom> Atoms;
+  Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
+                                        dwarf::DW_FORM_data4));
+  Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag,
+                                        dwarf::DW_FORM_data2));
+  Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags,
+                                        dwarf::DW_FORM_data1));
+  DwarfAccelTable AT(Atoms);
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    const StringMap<std::vector<std::pair<DIE*, unsigned > > > &Names
+      = TheCU->getAccelTypes();
+    for (StringMap<std::vector<std::pair<DIE*, unsigned> > >::const_iterator
+           GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      const std::vector<std::pair<DIE *, unsigned> > &Entities = GI->second;
+      for (std::vector<std::pair<DIE *, unsigned> >::const_iterator DI
+             = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI)
+        AT.AddName(Name, (*DI).first, (*DI).second);
+    }
   }
+
+  AT.FinalizeTable(Asm, "types");
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering()
+                                 .getDwarfAccelTypesSection());
+  MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin");
+  Asm->OutStreamer.EmitLabel(SectionBegin);
+
+  // Emit the full data.
+  AT.Emit(Asm, SectionBegin, this);
 }
 
 void DwarfDebug::emitDebugPubTypes() {
   for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
          E = CUMap.end(); I != E; ++I) {
     CompileUnit *TheCU = I->second;
-    // Start the dwarf pubnames section.
+    // Start the dwarf pubtypes section.
     Asm->OutStreamer.SwitchSection(
       Asm->getObjFileLowering().getDwarfPubTypesSection());
     Asm->OutStreamer.AddComment("Length of Public Types Info");
@@ -1766,6 +1921,7 @@ void DwarfDebug::emitDebugPubTypes() {
       Asm->EmitInt32(Entity->getOffset());
 
       if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+      // Emit the name with a terminating null byte.
       Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
     }
 
@@ -1801,8 +1957,10 @@ void DwarfDebug::emitDebugStr() {
     // Emit a label for reference from debug information entries.
     Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
 
-    // Emit the string itself.
-    Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
+    // Emit the string itself with a terminating null byte.
+    Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(),
+                                         Entries[i].second->getKeyLength()+1),
+                               0/*addrspace*/);
   }
 }
 
@@ -1958,7 +2116,7 @@ void DwarfDebug::emitDebugMacInfo() {
 /// __debug_info section, and the low_pc is the starting address for the
 /// inlining instance.
 void DwarfDebug::emitDebugInlineInfo() {
-  if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
+  if (!Asm->MAI->doesDwarfUseInlineInfoSection())
     return;
 
   if (!FirstCU)
@@ -1990,10 +2148,9 @@ void DwarfDebug::emitDebugInlineInfo() {
     StringRef Name = SP.getName();
 
     Asm->OutStreamer.AddComment("MIPS linkage name");
-    if (LName.empty()) {
-      Asm->OutStreamer.EmitBytes(Name, 0);
-      Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
-    } else
+    if (LName.empty())
+      Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+    else
       Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
                              DwarfStrSectionSym);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 35653be5c897..83f30f5b446f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -30,7 +30,8 @@
 namespace llvm {
 
 class CompileUnit;
-class DbgConcreteScope;
+class ConstantInt;
+class ConstantFP;
 class DbgVariable;
 class MachineFrameInfo;
 class MachineModuleInfo;
@@ -207,8 +208,8 @@ class DwarfDebug {
   ///
   std::vector<DIEAbbrev *> Abbreviations;
 
-  /// SourceIdMap - Source id map, i.e. pair of directory id and source file
-  /// id mapped to a unique id.
+  /// SourceIdMap - Source id map, i.e. pair of source filename and directory,
+  /// separated by a zero byte, mapped to a unique id.
   StringMap<unsigned> SourceIdMap;
 
   /// StringPool - A String->Symbol mapping of strings used by indirect
@@ -216,8 +217,6 @@ class DwarfDebug {
   StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
   unsigned NextStringPoolNumber;
   
-  MCSymbol *getStringPoolEntry(StringRef Str);
-
   /// SectionMap - Provides a unique id per text section.
   ///
   UniqueVector<const MCSection*> SectionMap;
@@ -239,12 +238,12 @@ class DwarfDebug {
   /// DotDebugLocEntries - Collection of DotDebugLocEntry.
   SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
 
-  /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+  /// InlinedSubprogramDIEs - Collection of subprogram DIEs that are marked
   /// (at the end of the module) as DW_AT_inline.
   SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
 
   /// InlineInfo - Keep track of inlined functions and their location.  This
-  /// information is used to populate debug_inlined section.
+  /// information is used to populate the debug_inlined section.
   typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
   DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
   SmallVector<const MDNode *, 4> InlinedSPNodes;
@@ -304,6 +303,10 @@ class DwarfDebug {
   MCSymbol *DwarfDebugLocSectionSym;
   MCSymbol *FunctionBeginSym, *FunctionEndSym;
 
+  // As an optimization, there is no need to emit an entry in the directory
+  // table for the same directory as DW_at_comp_dir.
+  StringRef CompilationDir;
+
 private:
 
   /// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -340,7 +343,7 @@ private:
   /// the start of each one.
   void EmitSectionLabels();
 
-  /// emitDIE - Recusively Emits a debug information entry.
+  /// emitDIE - Recursively Emits a debug information entry.
   ///
   void emitDIE(DIE *Die);
 
@@ -365,10 +368,22 @@ private:
   ///
   void emitEndOfLineMatrix(unsigned SectionEnd);
 
-  /// emitDebugPubNames - Emit visible names into a debug pubnames section.
-  ///
-  void emitDebugPubNames();
+  /// emitAccelNames - Emit visible names into a hashed accelerator table
+  /// section.
+  void emitAccelNames();
+  
+  /// emitAccelObjC - Emit objective C classes and categories into a hashed
+  /// accelerator table section.
+  void emitAccelObjC();
+
+  /// emitAccelNamespace - Emit namespace dies into a hashed accelerator
+  /// table.
+  void emitAccelNamespaces();
 
+  /// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+  ///
+  void emitAccelTypes();
+  
   /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
   ///
   void emitDebugPubTypes();
@@ -407,10 +422,10 @@ private:
   /// 3. an unsigned LEB128 number indicating the number of distinct inlining 
   /// instances for the function.
   /// 
-  /// The rest of the entry consists of a {die_offset, low_pc}  pair for each 
+  /// The rest of the entry consists of a {die_offset, low_pc} pair for each 
   /// inlined instance; the die_offset points to the inlined_subroutine die in
-  /// the __debug_info section, and the low_pc is the starting address  for the
-  ///  inlining instance.
+  /// the __debug_info section, and the low_pc is the starting address for the
+  /// inlining instance.
   void emitDebugInlineInfo();
 
   /// constructCompileUnit - Create new CompileUnit for the given 
@@ -426,8 +441,8 @@ private:
   void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
                         unsigned Flags);
   
-  /// identifyScopeMarkers() - Indentify instructions that are marking
-  /// beginning of or end of a scope.
+  /// identifyScopeMarkers() - Indentify instructions that are marking the
+  /// beginning of or ending of a scope.
   void identifyScopeMarkers();
 
   /// addCurrentFnArgument - If Var is an current function argument that add
@@ -472,7 +487,7 @@ public:
   void collectInfoFromNamedMDNodes(Module *M);
 
   /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
-  /// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
+  /// FIXME - Remove this when DragonEgg switches to DIBuilder.
   bool collectLegacyDebugInfo(Module *M);
 
   /// beginModule - Emit all Dwarf sections that should come prior to the
@@ -504,6 +519,13 @@ public:
 
   /// createSubprogramDIE - Create new DIE using SP.
   DIE *createSubprogramDIE(DISubprogram SP);
+
+  /// getStringPool - returns the entry into the start of the pool.
+  MCSymbol *getStringPool();
+
+  /// getStringPoolEntry - returns an entry into the string pool with the given
+  /// string text.
+  MCSymbol *getStringPoolEntry(StringRef Str);
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 18b726b173dc..70cc2e56b3e1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -184,7 +185,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
 /// CallToNoUnwindFunction - Return `true' if this is a call to a function
 /// marked `nounwind'. Return `false' otherwise.
 bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
-  assert(MI->getDesc().isCall() && "This should be a call instruction!");
+  assert(MI->isCall() && "This should be a call instruction!");
 
   bool MarkedNoUnwind = false;
   bool SawFunc = false;
@@ -243,7 +244,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
     for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
          MI != E; ++MI) {
       if (!MI->isLabel()) {
-        if (MI->getDesc().isCall())
+        if (MI->isCall())
           SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
         continue;
       }
@@ -529,10 +530,8 @@ void DwarfException::EmitExceptionTable() {
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
       if (VerboseAsm) {
-        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
-                                    llvm::utostr(idx) + " <<");
-        Asm->OutStreamer.AddComment(Twine("  On exception at call site ") +
-                                    llvm::utostr(idx));
+        Asm->OutStreamer.AddComment(">> Call Site " + Twine(idx) + " <<");
+        Asm->OutStreamer.AddComment("  On exception at call site "+Twine(idx));
       }
       Asm->EmitULEB128(idx);
 
@@ -543,8 +542,8 @@ void DwarfException::EmitExceptionTable() {
         if (S.Action == 0)
           Asm->OutStreamer.AddComment("  Action: cleanup");
         else
-          Asm->OutStreamer.AddComment(Twine("  Action: ") +
-                                      llvm::utostr((S.Action - 1) / 2 + 1));
+          Asm->OutStreamer.AddComment("  Action: " +
+                                      Twine((S.Action - 1) / 2 + 1));
       }
       Asm->EmitULEB128(S.Action);
     }
@@ -596,8 +595,7 @@ void DwarfException::EmitExceptionTable() {
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
       if (VerboseAsm)
-        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
-                                    llvm::utostr(++Entry) + " <<");
+        Asm->OutStreamer.AddComment(">> Call Site " + Twine(++Entry) + " <<");
       Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
       if (VerboseAsm)
         Asm->OutStreamer.AddComment(Twine("  Call between ") +
@@ -625,8 +623,8 @@ void DwarfException::EmitExceptionTable() {
         if (S.Action == 0)
           Asm->OutStreamer.AddComment("  On action: cleanup");
         else
-          Asm->OutStreamer.AddComment(Twine("  On action: ") +
-                                      llvm::utostr((S.Action - 1) / 2 + 1));
+          Asm->OutStreamer.AddComment("  On action: " +
+                                      Twine((S.Action - 1) / 2 + 1));
       }
       Asm->EmitULEB128(S.Action);
     }
@@ -640,8 +638,7 @@ void DwarfException::EmitExceptionTable() {
 
     if (VerboseAsm) {
       // Emit comments that decode the action table.
-      Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
-                                  llvm::utostr(++Entry) + " <<");
+      Asm->OutStreamer.AddComment(">> Action Record " + Twine(++Entry) + " <<");
     }
 
     // Type Filter
@@ -650,11 +647,11 @@ void DwarfException::EmitExceptionTable() {
     //   type of the catch clauses or the types in the exception specification.
     if (VerboseAsm) {
       if (Action.ValueForTypeID > 0)
-        Asm->OutStreamer.AddComment(Twine("  Catch TypeInfo ") +
-                                    llvm::itostr(Action.ValueForTypeID));
+        Asm->OutStreamer.AddComment("  Catch TypeInfo " +
+                                    Twine(Action.ValueForTypeID));
       else if (Action.ValueForTypeID < 0)
-        Asm->OutStreamer.AddComment(Twine("  Filter TypeInfo ") +
-                                    llvm::itostr(Action.ValueForTypeID));
+        Asm->OutStreamer.AddComment("  Filter TypeInfo " +
+                                    Twine(Action.ValueForTypeID));
       else
         Asm->OutStreamer.AddComment("  Cleanup");
     }
@@ -669,8 +666,7 @@ void DwarfException::EmitExceptionTable() {
         Asm->OutStreamer.AddComment("  No further actions");
       } else {
         unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
-        Asm->OutStreamer.AddComment(Twine("  Continue to action ") +
-                                    llvm::utostr(NextAction));
+        Asm->OutStreamer.AddComment("  Continue to action "+Twine(NextAction));
       }
     }
     Asm->EmitSLEB128(Action.NextAction);
@@ -687,7 +683,7 @@ void DwarfException::EmitExceptionTable() {
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalVariable *GV = *I;
     if (VerboseAsm)
-      Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--));
+      Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--));
     if (GV)
       Asm->EmitReference(GV, TTypeEncoding);
     else
@@ -707,7 +703,7 @@ void DwarfException::EmitExceptionTable() {
     if (VerboseAsm) {
       --Entry;
       if (TypeID != 0)
-        Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry));
+        Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
     }
 
     Asm->EmitULEB128(TypeID);
@@ -719,17 +715,17 @@ void DwarfException::EmitExceptionTable() {
 /// EndModule - Emit all exception information that should come after the
 /// content.
 void DwarfException::EndModule() {
-  assert(0 && "Should be implemented");
+  llvm_unreachable("Should be implemented");
 }
 
 /// BeginFunction - Gather pre-function exception information. Assumes it's
 /// being emitted immediately after the function entry point.
 void DwarfException::BeginFunction(const MachineFunction *MF) {
-  assert(0 && "Should be implemented");
+  llvm_unreachable("Should be implemented");
 }
 
 /// EndFunction - Gather and emit post-function exception information.
 ///
 void DwarfException::EndFunction() {
-  assert(0 && "Should be implemented");
+  llvm_unreachable("Should be implemented");
 }
diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..20b1f7b45b31
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/CodeGen/AsmPrinter/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AsmPrinter
+parent = Libraries
+required_libraries = Analysis CodeGen Core MC MCParser Support Target
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 75288b0934cb..ef1d2baed9ce 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -61,29 +62,33 @@ TailMergeSize("tail-merge-size",
 
 namespace {
   /// BranchFolderPass - Wrap branch folder in a machine function pass.
-  class BranchFolderPass : public MachineFunctionPass,
-                           public BranchFolder {
+  class BranchFolderPass : public MachineFunctionPass {
   public:
     static char ID;
-    explicit BranchFolderPass(bool defaultEnableTailMerge)
-      : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge, true) {}
+    explicit BranchFolderPass(): MachineFunctionPass(ID) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetPassConfig>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
   };
 }
 
 char BranchFolderPass::ID = 0;
+char &llvm::BranchFolderPassID = BranchFolderPass::ID;
 
-FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
-  return new BranchFolderPass(DefaultEnableTailMerge);
-}
+INITIALIZE_PASS(BranchFolderPass, "branch-folder",
+                "Control Flow Optimizer", false, false)
 
 bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
-  return OptimizeFunction(MF,
-                          MF.getTarget().getInstrInfo(),
-                          MF.getTarget().getRegisterInfo(),
-                          getAnalysisIfAvailable<MachineModuleInfo>());
+  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+  BranchFolder Folder(PassConfig->getEnableTailMerge(), /*CommonHoist=*/true);
+  return Folder.OptimizeFunction(MF,
+                                 MF.getTarget().getInstrInfo(),
+                                 MF.getTarget().getRegisterInfo(),
+                                 getAnalysisIfAvailable<MachineModuleInfo>());
 }
 
 
@@ -132,7 +137,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
       break;
     unsigned Reg = I->getOperand(0).getReg();
     ImpDefRegs.insert(Reg);
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+    for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs)
       ImpDefRegs.insert(SubReg);
     ++I;
@@ -179,8 +184,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
   TII = tii;
   TRI = tri;
   MMI = mmi;
+  RS = NULL;
 
-  RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+  // Use a RegScavenger to help update liveness when required.
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  if (MRI.tracksLiveness() && TRI->requiresRegisterScavenging(MF))
+    RS = new RegScavenger();
+  else
+    MRI.invalidateLiveness();
 
   // Fix CFG.  The later algorithms expect it to be right.
   bool MadeChange = false;
@@ -208,7 +219,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     delete RS;
     return MadeChange;
   }
-  
+
   // Walk the function to find jump tables that are live.
   BitVector JTIsLive(JTI->getJumpTables().size());
   for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
@@ -432,10 +443,9 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
   for (; I != E; ++I) {
     if (I->isDebugValue())
       continue;
-    const MCInstrDesc &MCID = I->getDesc();
-    if (MCID.isCall())
+    if (I->isCall())
       Time += 10;
-    else if (MCID.mayLoad() || MCID.mayStore())
+    else if (I->mayLoad() || I->mayStore())
       Time += 2;
     else
       ++Time;
@@ -484,8 +494,9 @@ BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
     // an object with itself.
 #ifndef _GLIBCXX_DEBUG
     llvm_unreachable("Predecessor appears twice");
-#endif
+#else
     return false;
+#endif
   }
 }
 
@@ -502,7 +513,7 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
       break;
     }
     --I;
-    if (!I->getDesc().isTerminator()) break;
+    if (!I->isTerminator()) break;
     ++NumTerms;
   }
   return NumTerms;
@@ -550,8 +561,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
   // heuristics.
   unsigned EffectiveTailLen = CommonTailLen;
   if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
-      !MBB1->back().getDesc().isBarrier() &&
-      !MBB2->back().getDesc().isBarrier())
+      !MBB1->back().isBarrier() &&
+      !MBB2->back().isBarrier())
     ++EffectiveTailLen;
 
   // Check if the common tail is long enough to be worthwhile.
@@ -870,6 +881,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         // Visit each predecessor only once.
         if (!UniquePreds.insert(PBB))
           continue;
+        // Skip blocks which may jump to a landing pad. Can't tail merge these.
+        if (PBB->getLandingPadSuccessor())
+          continue;
         MachineBasicBlock *TBB = 0, *FBB = 0;
         SmallVector<MachineOperand, 4> Cond;
         if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
@@ -924,8 +938,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
       if (MergePotentials.size() >= 2)
         MadeChange |= TryTailMergeBlocks(IBB, PredBB);
       // Reinsert an unconditional branch if needed.
-      // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
-      PredBB = prior(I);      // this may have been changed in TryTailMergeBlocks
+      // The 1 below can occur as a result of removing blocks in
+      // TryTailMergeBlocks.
+      PredBB = prior(I);     // this may have been changed in TryTailMergeBlocks
       if (MergePotentials.size() == 1 &&
           MergePotentials.begin()->getBlock() != PredBB)
         FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
@@ -980,7 +995,7 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
     if (!MBBI->isDebugValue())
       break;
   }
-  return (MBBI->getDesc().isBranch());
+  return (MBBI->isBranch());
 }
 
 /// IsBetterFallthrough - Return true if it would be clearly better to
@@ -1008,7 +1023,23 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
   MachineBasicBlock::iterator MBB2I = --MBB2->end();
   while (MBB2I->isDebugValue())
     --MBB2I;
-  return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+  return MBB2I->isCall() && !MBB1I->isCall();
+}
+
+/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch
+/// instructions on the block. Always use the DebugLoc of the first
+/// branching instruction found unless its absent, in which case use the
+/// DebugLoc of the second if present.
+static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return DebugLoc();
+  --I;
+  while (I->isDebugValue() && I != MBB.begin())
+    --I;
+  if (I->isBranch())
+    return I->getDebugLoc();
+  return DebugLoc();
 }
 
 /// OptimizeBlock - Analyze and optimize control flow related to the specified
@@ -1016,7 +1047,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
 bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   bool MadeChange = false;
   MachineFunction &MF = *MBB->getParent();
-  DebugLoc dl;  // FIXME: this is nowhere
 ReoptimizeBlock:
 
   MachineFunction::iterator FallThrough = MBB;
@@ -1065,6 +1095,7 @@ ReoptimizeBlock:
     // destination, remove the branch, replacing it with an unconditional one or
     // a fall-through.
     if (PriorTBB && PriorTBB == PriorFBB) {
+      DebugLoc dl = getBranchDebugLoc(PrevBB);
       TII->RemoveBranch(PrevBB);
       PriorCond.clear();
       if (PriorTBB != MBB)
@@ -1091,7 +1122,7 @@ ReoptimizeBlock:
         MachineBasicBlock::iterator PrevBBIter = PrevBB.end();
         --PrevBBIter;
         MachineBasicBlock::iterator MBBIter = MBB->begin();
-        // Check if DBG_VALUE at the end of PrevBB is identical to the 
+        // Check if DBG_VALUE at the end of PrevBB is identical to the
         // DBG_VALUE at the beginning of MBB.
         while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end()
                && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) {
@@ -1103,7 +1134,7 @@ ReoptimizeBlock:
         }
       }
       PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
-      PrevBB.removeSuccessor(PrevBB.succ_begin());;
+      PrevBB.removeSuccessor(PrevBB.succ_begin());
       assert(PrevBB.succ_empty());
       PrevBB.transferSuccessors(MBB);
       MadeChange = true;
@@ -1122,6 +1153,7 @@ ReoptimizeBlock:
     // If the prior block branches somewhere else on the condition and here if
     // the condition is false, remove the uncond second branch.
     if (PriorFBB == MBB) {
+      DebugLoc dl = getBranchDebugLoc(PrevBB);
       TII->RemoveBranch(PrevBB);
       TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
       MadeChange = true;
@@ -1135,6 +1167,7 @@ ReoptimizeBlock:
     if (PriorTBB == MBB) {
       SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
       if (!TII->ReverseBranchCondition(NewPriorCond)) {
+        DebugLoc dl = getBranchDebugLoc(PrevBB);
         TII->RemoveBranch(PrevBB);
         TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
         MadeChange = true;
@@ -1172,6 +1205,7 @@ ReoptimizeBlock:
           DEBUG(dbgs() << "\nMoving MBB: " << *MBB
                        << "To make fallthrough to: " << *PriorTBB << "\n");
 
+          DebugLoc dl = getBranchDebugLoc(PrevBB);
           TII->RemoveBranch(PrevBB);
           TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
 
@@ -1201,6 +1235,7 @@ ReoptimizeBlock:
     if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
       SmallVector<MachineOperand, 4> NewCond(CurCond);
       if (!TII->ReverseBranchCondition(NewCond)) {
+        DebugLoc dl = getBranchDebugLoc(*MBB);
         TII->RemoveBranch(*MBB);
         TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
         MadeChange = true;
@@ -1214,6 +1249,7 @@ ReoptimizeBlock:
     if (CurTBB && CurCond.empty() && CurFBB == 0 &&
         IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
         !MBB->hasAddressTaken()) {
+      DebugLoc dl = getBranchDebugLoc(*MBB);
       // This block may contain just an unconditional branch.  Because there can
       // be 'non-branch terminators' in the block, try removing the branch and
       // then seeing if the block is empty.
@@ -1256,8 +1292,9 @@ ReoptimizeBlock:
               assert(PriorFBB == 0 && "Machine CFG out of date!");
               PriorFBB = MBB;
             }
+            DebugLoc pdl = getBranchDebugLoc(PrevBB);
             TII->RemoveBranch(PrevBB);
-            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
+            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
           }
 
           // Iterate through all the predecessors, revectoring each in-turn.
@@ -1281,9 +1318,10 @@ ReoptimizeBlock:
               bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
                       NewCurFBB, NewCurCond, true);
               if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+                DebugLoc pdl = getBranchDebugLoc(*PMBB);
                 TII->RemoveBranch(*PMBB);
                 NewCurCond.clear();
-                TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
+                TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl);
                 MadeChange = true;
                 ++NumBranchOpts;
                 PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
@@ -1343,7 +1381,7 @@ ReoptimizeBlock:
           if (CurFallsThru) {
             MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
             CurCond.clear();
-            TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
+            TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc());
           }
           MBB->moveAfter(PredBB);
           MadeChange = true;
@@ -1446,7 +1484,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
       continue;
     if (MO.isUse()) {
       Uses.insert(Reg);
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
         Uses.insert(*AS);
     } else if (!MO.isDead())
       // Don't try to hoist code in the rare case the terminator defines a
@@ -1469,6 +1507,9 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
   bool IsDef = false;
   for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
     const MachineOperand &MO = PI->getOperand(i);
+    // If PI has a regmask operand, it is probably a call. Separate away.
+    if (MO.isRegMask())
+      return Loc;
     if (!MO.isReg() || MO.isUse())
       continue;
     unsigned Reg = MO.getReg();
@@ -1505,16 +1546,16 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
       continue;
     if (MO.isUse()) {
       Uses.insert(Reg);
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
         Uses.insert(*AS);
     } else {
       if (Uses.count(Reg)) {
         Uses.erase(Reg);
-        for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+        for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
           Uses.erase(*SR); // Use getSubRegisters to be conservative
       }
       Defs.insert(Reg);
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
         Defs.insert(*AS);
     }
   }
@@ -1581,6 +1622,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
     bool IsSafe = true;
     for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = TIB->getOperand(i);
+      // Don't attempt to hoist instructions with register masks.
+      if (MO.isRegMask()) {
+        IsSafe = false;
+        break;
+      }
       if (!MO.isReg())
         continue;
       unsigned Reg = MO.getReg();
@@ -1615,6 +1661,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
           IsSafe = false;
           break;
         }
+
+        if (MO.isKill() && Uses.count(Reg))
+          // Kills a register that's read by the instruction at the point of
+          // insertion. Remove the kill marker.
+          MO.setIsKill(false);
       }
     }
     if (!IsSafe)
@@ -1632,7 +1683,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
       unsigned Reg = MO.getReg();
       if (!Reg || !LocalDefsSet.count(Reg))
         continue;
-      for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+      for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
         LocalDefsSet.erase(*OR);
     }
 
@@ -1645,11 +1696,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
       if (!Reg)
         continue;
       LocalDefs.push_back(Reg);
-      for (const unsigned *OR = TRI->getOverlaps(Reg); *OR; ++OR)
+      for (const uint16_t *OR = TRI->getOverlaps(Reg); *OR; ++OR)
         LocalDefsSet.insert(*OR);
     }
 
-    HasDups = true;;
+    HasDups = true;
     ++TIB;
     ++FIB;
   }
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 9a5e55160114..21729cd6c380 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -9,10 +9,9 @@ add_llvm_library(LLVMCodeGen
   CodePlacementOpt.cpp
   CriticalAntiDepBreaker.cpp
   DeadMachineInstructionElim.cpp
+  DFAPacketizer.cpp
   DwarfEHPrepare.cpp
   EdgeBundles.cpp
-  ELFCodeEmitter.cpp
-  ELFWriter.cpp
   ExecutionDepsFix.cpp
   ExpandISelPseudos.cpp
   ExpandPostRAPseudos.cpp
@@ -23,6 +22,7 @@ add_llvm_library(LLVMCodeGen
   InlineSpiller.cpp
   InterferenceCache.cpp
   IntrinsicLowering.cpp
+  JITCodeEmitter.cpp
   LLVMTargetMachine.cpp
   LatencyPriorityQueue.cpp
   LexicalScopes.cpp
@@ -37,7 +37,10 @@ add_llvm_library(LLVMCodeGen
   LocalStackSlotAllocation.cpp
   MachineBasicBlock.cpp
   MachineBlockFrequencyInfo.cpp
+  MachineBlockPlacement.cpp
   MachineBranchProbabilityInfo.cpp
+  MachineCodeEmitter.cpp
+  MachineCopyPropagation.cpp
   MachineCSE.cpp
   MachineDominators.cpp
   MachineFunction.cpp
@@ -45,6 +48,7 @@ add_llvm_library(LLVMCodeGen
   MachineFunctionPass.cpp
   MachineFunctionPrinterPass.cpp
   MachineInstr.cpp
+  MachineInstrBundle.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
   MachineLoopRanges.cpp
@@ -53,9 +57,9 @@ add_llvm_library(LLVMCodeGen
   MachinePassRegistry.cpp
   MachineRegisterInfo.cpp
   MachineSSAUpdater.cpp
+  MachineScheduler.cpp
   MachineSink.cpp
   MachineVerifier.cpp
-  ObjectCodeEmitter.cpp
   OcamlGC.cpp
   OptimizePHIs.cpp
   PHIElimination.cpp
@@ -66,17 +70,16 @@ add_llvm_library(LLVMCodeGen
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
+  RegAllocBase.cpp
   RegAllocBasic.cpp
   RegAllocFast.cpp
   RegAllocGreedy.cpp
-  RegAllocLinearScan.cpp
   RegAllocPBQP.cpp
   RegisterClassInfo.cpp
   RegisterCoalescer.cpp
   RegisterScavenging.cpp
   RenderMachineFunction.cpp
   ScheduleDAG.cpp
-  ScheduleDAGEmit.cpp
   ScheduleDAGInstrs.cpp
   ScheduleDAGPrinter.cpp
   ScoreboardHazardRecognizer.cpp
@@ -87,27 +90,17 @@ add_llvm_library(LLVMCodeGen
   Spiller.cpp
   SpillPlacement.cpp
   SplitKit.cpp
-  Splitter.cpp
   StackProtector.cpp
   StackSlotColoring.cpp
   StrongPHIElimination.cpp
   TailDuplication.cpp
+  TargetFrameLoweringImpl.cpp
   TargetInstrInfoImpl.cpp
   TargetLoweringObjectFileImpl.cpp
+  TargetOptionsImpl.cpp
   TwoAddressInstructionPass.cpp
   UnreachableBlockElim.cpp
   VirtRegMap.cpp
-  VirtRegRewriter.cpp
-  )
-
-add_llvm_library_dependencies(LLVMCodeGen
-  LLVMAnalysis
-  LLVMCore
-  LLVMMC
-  LLVMScalarOpts
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
   )
 
 add_subdirectory(SelectionDAG)
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 14eb0541dc8d..2b7dfdbe41a0 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -58,7 +58,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
 
 /// MarkAllocated - Mark a register and all of its aliases as allocated.
 void CCState::MarkAllocated(unsigned Reg) {
-  for (const unsigned *Alias = TRI.getOverlaps(Reg);
+  for (const uint16_t *Alias = TRI.getOverlaps(Reg);
        unsigned Reg = *Alias; ++Alias)
     UsedRegs[Reg/32] |= 1 << (Reg&31);
 }
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 424535ba2a1c..a81bb5cc5566 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -19,36 +19,49 @@ using namespace llvm;
 
 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
 void llvm::initializeCodeGen(PassRegistry &Registry) {
+  initializeBranchFolderPassPass(Registry);
   initializeCalculateSpillWeightsPass(Registry);
+  initializeCodePlacementOptPass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
+  initializeExpandPostRAPass(Registry);
+  initializeExpandISelPseudosPass(Registry);
+  initializeFinalizeMachineBundlesPass(Registry);
+  initializeGCMachineCodeAnalysisPass(Registry);
   initializeGCModuleInfoPass(Registry);
   initializeIfConverterPass(Registry);
   initializeLiveDebugVariablesPass(Registry);
   initializeLiveIntervalsPass(Registry);
   initializeLiveStacksPass(Registry);
   initializeLiveVariablesPass(Registry);
+  initializeLocalStackSlotPassPass(Registry);
   initializeMachineBlockFrequencyInfoPass(Registry);
+  initializeMachineBlockPlacementPass(Registry);
+  initializeMachineBlockPlacementStatsPass(Registry);
+  initializeMachineCopyPropagationPass(Registry);
   initializeMachineCSEPass(Registry);
   initializeMachineDominatorTreePass(Registry);
   initializeMachineLICMPass(Registry);
   initializeMachineLoopInfoPass(Registry);
   initializeMachineModuleInfoPass(Registry);
+  initializeMachineSchedulerPass(Registry);
   initializeMachineSinkingPass(Registry);
   initializeMachineVerifierPassPass(Registry);
   initializeOptimizePHIsPass(Registry);
   initializePHIEliminationPass(Registry);
   initializePeepholeOptimizerPass(Registry);
+  initializePostRASchedulerPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
   initializePEIPass(Registry);
-  initializeRALinScanPass(Registry);
   initializeRegisterCoalescerPass(Registry);
   initializeRenderMachineFunctionPass(Registry);
   initializeSlotIndexesPass(Registry);
-  initializeLoopSplitterPass(Registry);
   initializeStackProtectorPass(Registry);
   initializeStackSlotColoringPass(Registry);
   initializeStrongPHIEliminationPass(Registry);
+  initializeTailDuplicatePassPass(Registry);
+  initializeTargetPassConfigPass(Registry);
   initializeTwoAddressInstructionPassPass(Registry);
+  initializeUnpackMachineBundlesPass(Registry);
   initializeUnreachableBlockElimPass(Registry);
   initializeUnreachableMachineBlockElimPass(Registry);
   initializeVirtRegMapPass(Registry);
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 270c337ef67e..c13c05e26a20 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -39,9 +39,6 @@ namespace {
     CodePlacementOpt() : MachineFunctionPass(ID) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const {
-      return "Code Placement Optimizer";
-    }
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<MachineLoopInfo>();
@@ -69,9 +66,9 @@ namespace {
   char CodePlacementOpt::ID = 0;
 } // end anonymous namespace
 
-FunctionPass *llvm::createCodePlacementOptPass() {
-  return new CodePlacementOpt();
-}
+char &llvm::CodePlacementOptID = CodePlacementOpt::ID;
+INITIALIZE_PASS(CodePlacementOpt, "code-placement",
+                "Code Placement Optimizer", false, false)
 
 /// HasFallthrough - Test whether the given branch has a fallthrough, either as
 /// a plain fallthrough or as a fallthrough case of a conditional branch.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 84c4d59c0e41..bad50103b9c3 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -35,7 +35,8 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
   RegClassInfo(RCI),
   Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
   KillIndices(TRI->getNumRegs(), 0),
-  DefIndices(TRI->getNumRegs(), 0) {}
+  DefIndices(TRI->getNumRegs(), 0),
+  KeepRegs(TRI->getNumRegs(), false) {}
 
 CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
 }
@@ -52,9 +53,9 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   }
 
   // Clear "do not change" set.
-  KeepRegs.clear();
+  KeepRegs.reset();
 
-  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+  bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
 
   // Determine the live-out physregs for this block.
   if (IsReturnBlock) {
@@ -63,14 +64,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
          E = MRI.liveout_end(); I != E; ++I) {
       unsigned Reg = *I;
       Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[Reg] = BB->size();
+      KillIndices[Reg] = BBSize;
       DefIndices[Reg] = ~0u;
 
       // Repeat, for all aliases.
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
         unsigned AliasReg = *Alias;
         Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-        KillIndices[AliasReg] = BB->size();
+        KillIndices[AliasReg] = BBSize;
         DefIndices[AliasReg] = ~0u;
       }
     }
@@ -85,14 +86,14 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
            E = (*SI)->livein_end(); I != E; ++I) {
       unsigned Reg = *I;
       Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[Reg] = BB->size();
+      KillIndices[Reg] = BBSize;
       DefIndices[Reg] = ~0u;
 
       // Repeat, for all aliases.
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
         unsigned AliasReg = *Alias;
         Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-        KillIndices[AliasReg] = BB->size();
+        KillIndices[AliasReg] = BBSize;
         DefIndices[AliasReg] = ~0u;
       }
     }
@@ -102,18 +103,18 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // callee-saved register that is not saved in the prolog.
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   BitVector Pristine = MFI->getPristineRegs(BB);
-  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+  for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
     Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
-    KillIndices[Reg] = BB->size();
+    KillIndices[Reg] = BBSize;
     DefIndices[Reg] = ~0u;
 
     // Repeat, for all aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+    for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
       unsigned AliasReg = *Alias;
       Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
-      KillIndices[AliasReg] = BB->size();
+      KillIndices[AliasReg] = BBSize;
       DefIndices[AliasReg] = ~0u;
     }
   }
@@ -121,7 +122,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
 
 void CriticalAntiDepBreaker::FinishBlock() {
   RegRefs.clear();
-  KeepRegs.clear();
+  KeepRegs.reset();
 }
 
 void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
@@ -193,8 +194,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
   // instruction which may not be executed. The second R6 def may or may not
   // re-define R6 so it's not safe to change it since the last R6 use cannot be
   // changed.
-  bool Special = MI->getDesc().isCall() ||
-    MI->getDesc().hasExtraSrcRegAllocReq() ||
+  bool Special = MI->isCall() ||
+    MI->hasExtraSrcRegAllocReq() ||
     TII->isPredicated(MI);
 
   // Scan the register operands for this instruction and update
@@ -217,7 +218,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
       Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
 
     // Now check for aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+    for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
       // If an alias of the reg is used during the live range, give up.
       // Note that this allows us to skip checking if AntiDepReg
       // overlaps with any of the aliases, among other things.
@@ -233,10 +234,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
       RegRefs.insert(std::make_pair(Reg, &MO));
 
     if (MO.isUse() && Special) {
-      if (KeepRegs.insert(Reg)) {
-        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+      if (!KeepRegs.test(Reg)) {
+        KeepRegs.set(Reg);
+        for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
              *Subreg; ++Subreg)
-          KeepRegs.insert(*Subreg);
+          KeepRegs.set(*Subreg);
       }
     }
   }
@@ -253,6 +255,17 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
     // address updates.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
+
+      if (MO.isRegMask())
+        for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
+          if (MO.clobbersPhysReg(i)) {
+            DefIndices[i] = Count;
+            KillIndices[i] = ~0u;
+            KeepRegs.reset(i);
+            Classes[i] = 0;
+            RegRefs.erase(i);
+          }
+
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
@@ -265,21 +278,21 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
       assert(((KillIndices[Reg] == ~0u) !=
               (DefIndices[Reg] == ~0u)) &&
              "Kill and Def maps aren't consistent for Reg!");
-      KeepRegs.erase(Reg);
+      KeepRegs.reset(Reg);
       Classes[Reg] = 0;
       RegRefs.erase(Reg);
       // Repeat, for all subregs.
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+      for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
            *Subreg; ++Subreg) {
         unsigned SubregReg = *Subreg;
         DefIndices[SubregReg] = Count;
         KillIndices[SubregReg] = ~0u;
-        KeepRegs.erase(SubregReg);
+        KeepRegs.reset(SubregReg);
         Classes[SubregReg] = 0;
         RegRefs.erase(SubregReg);
       }
       // Conservatively mark super-registers as unusable.
-      for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+      for (const uint16_t *Super = TRI->getSuperRegisters(Reg);
            *Super; ++Super) {
         unsigned SuperReg = *Super;
         Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
@@ -315,7 +328,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
                "Kill and Def maps aren't consistent for Reg!");
     }
     // Repeat, for all aliases.
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+    for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
       unsigned AliasReg = *Alias;
       if (KillIndices[AliasReg] == ~0u) {
         KillIndices[AliasReg] = Count;
@@ -355,6 +368,9 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &CheckOper = MI->getOperand(i);
 
+      if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg))
+        return true;
+
       if (!CheckOper.isReg() || !CheckOper.isDef() ||
           CheckOper.getReg() != NewReg)
         continue;
@@ -427,6 +443,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
 
   // Keep a map of the MachineInstr*'s back to the SUnit representing them.
   // This is used for updating debug information.
+  //
+  // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap
   DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
 
   // Find the node at the bottom of the critical path.
@@ -535,7 +553,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
           if (!RegClassInfo.isAllocatable(AntiDepReg))
             // Don't break anti-dependencies on non-allocatable registers.
             AntiDepReg = 0;
-          else if (KeepRegs.count(AntiDepReg))
+          else if (KeepRegs.test(AntiDepReg))
             // Don't break anti-dependencies if an use down below requires
             // this exact register.
             AntiDepReg = 0;
@@ -572,7 +590,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
     // defined in a call must not be changed (ABI).
-    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+    if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
         TII->isPredicated(MI))
       // If this instruction's defs have special allocation requirement, don't
       // break this anti-dependency.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 07107802972d..77462593896e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -24,7 +24,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
 #include <map>
 
 namespace llvm {
@@ -66,7 +65,7 @@ class TargetRegisterInfo;
 
     /// KeepRegs - A set of registers which are live and cannot be changed to
     /// break anti-dependencies.
-    SmallSet<unsigned, 4> KeepRegs;
+    BitVector KeepRegs;
 
   public:
     CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
new file mode 100644
index 000000000000..bfbe7790998f
--- /dev/null
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -0,0 +1,223 @@
+//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This class implements a deterministic finite automaton (DFA) based
+// packetizing mechanism for VLIW architectures. It provides APIs to
+// determine whether there exists a legal mapping of instructions to
+// functional unit assignments in a packet. The DFA is auto-generated from
+// the target's Schedule.td file.
+//
+// A DFA consists of 3 major elements: states, inputs, and transitions. For
+// the packetizing mechanism, the input is the set of instruction classes for
+// a target. The state models all possible combinations of functional unit
+// consumption for a given set of instructions in a packet. A transition
+// models the addition of an instruction to a packet. In the DFA constructed
+// by this class, if an instruction can be added to a packet, then a valid
+// transition exists from the corresponding state. Invalid transitions
+// indicate that the instruction cannot be added to the current packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+using namespace llvm;
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+                             const unsigned *SET):
+  InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
+  DFAStateEntryTable(SET) {}
+
+
+//
+// ReadTable - Read the DFA transition table and update CachedTable.
+//
+// Format of the transition tables:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+//                           transitions
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
+//                         for the ith state
+//
+void DFAPacketizer::ReadTable(unsigned int state) {
+  unsigned ThisState = DFAStateEntryTable[state];
+  unsigned NextStateInTable = DFAStateEntryTable[state+1];
+  // Early exit in case CachedTable has already contains this
+  // state's transitions.
+  if (CachedTable.count(UnsignPair(state,
+                                   DFAStateInputTable[ThisState][0])))
+    return;
+
+  for (unsigned i = ThisState; i < NextStateInTable; i++)
+    CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
+      DFAStateInputTable[i][1];
+}
+
+
+// canReserveResources - Check if the resources occupied by a MCInstrDesc
+// are available in the current state.
+bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
+  unsigned InsnClass = MID->getSchedClass();
+  const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+  unsigned FuncUnits = IS->getUnits();
+  UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+  ReadTable(CurrentState);
+  return (CachedTable.count(StateTrans) != 0);
+}
+
+
+// reserveResources - Reserve the resources occupied by a MCInstrDesc and
+// change the current state to reflect that change.
+void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
+  unsigned InsnClass = MID->getSchedClass();
+  const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
+  unsigned FuncUnits = IS->getUnits();
+  UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+  ReadTable(CurrentState);
+  assert(CachedTable.count(StateTrans) != 0);
+  CurrentState = CachedTable[StateTrans];
+}
+
+
+// canReserveResources - Check if the resources occupied by a machine
+// instruction are available in the current state.
+bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) {
+  const llvm::MCInstrDesc &MID = MI->getDesc();
+  return canReserveResources(&MID);
+}
+
+// reserveResources - Reserve the resources occupied by a machine
+// instruction and change the current state to reflect that change.
+void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) {
+  const llvm::MCInstrDesc &MID = MI->getDesc();
+  reserveResources(&MID);
+}
+
+namespace llvm {
+// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
+// Schedule method to build the dependence graph.
+class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+public:
+  DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
+                   MachineDominatorTree &MDT, bool IsPostRA);
+  // Schedule - Actual scheduling work.
+  void schedule();
+};
+}
+
+DefaultVLIWScheduler::DefaultVLIWScheduler(
+  MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+  bool IsPostRA) :
+  ScheduleDAGInstrs(MF, MLI, MDT, IsPostRA) {
+}
+
+void DefaultVLIWScheduler::schedule() {
+  // Build the scheduling graph.
+  buildSchedGraph(0);
+}
+
+// VLIWPacketizerList Ctor
+VLIWPacketizerList::VLIWPacketizerList(
+  MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+  bool IsPostRA) : TM(MF.getTarget()), MF(MF)  {
+  TII = TM.getInstrInfo();
+  ResourceTracker = TII->CreateTargetScheduleState(&TM, 0);
+  VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
+}
+
+// VLIWPacketizerList Dtor
+VLIWPacketizerList::~VLIWPacketizerList() {
+  if (VLIWScheduler)
+    delete VLIWScheduler;
+
+  if (ResourceTracker)
+    delete ResourceTracker;
+}
+
+// endPacket - End the current packet, bundle packet instructions and reset
+// DFA state.
+void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
+                                         MachineInstr *MI) {
+  if (CurrentPacketMIs.size() > 1) {
+    MachineInstr *MIFirst = CurrentPacketMIs.front();
+    finalizeBundle(*MBB, MIFirst, MI);
+  }
+  CurrentPacketMIs.clear();
+  ResourceTracker->clearResources();
+}
+
+// PacketizeMIs - Bundle machine instructions into packets.
+void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
+                                      MachineBasicBlock::iterator BeginItr,
+                                      MachineBasicBlock::iterator EndItr) {
+  assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
+  VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size());
+  VLIWScheduler->schedule();
+  VLIWScheduler->exitRegion();
+
+  // Generate MI -> SU map.
+  //std::map <MachineInstr*, SUnit*> MIToSUnit;
+  MIToSUnit.clear();
+  for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) {
+    SUnit *SU = &VLIWScheduler->SUnits[i];
+    MIToSUnit[SU->getInstr()] = SU;
+  }
+
+  // The main packetizer loop.
+  for (; BeginItr != EndItr; ++BeginItr) {
+    MachineInstr *MI = BeginItr;
+
+    this->initPacketizerState();
+
+    // End the current packet if needed.
+    if (this->isSoloInstruction(MI)) {
+      endPacket(MBB, MI);
+      continue;
+    }
+
+    // Ignore pseudo instructions.
+    if (this->ignorePseudoInstruction(MI, MBB))
+      continue;
+
+    SUnit *SUI = MIToSUnit[MI];
+    assert(SUI && "Missing SUnit Info!");
+
+    // Ask DFA if machine resource is available for MI.
+    bool ResourceAvail = ResourceTracker->canReserveResources(MI);
+    if (ResourceAvail) {
+      // Dependency check for MI with instructions in CurrentPacketMIs.
+      for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
+           VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
+        MachineInstr *MJ = *VI;
+        SUnit *SUJ = MIToSUnit[MJ];
+        assert(SUJ && "Missing SUnit Info!");
+
+        // Is it legal to packetize SUI and SUJ together.
+        if (!this->isLegalToPacketizeTogether(SUI, SUJ)) {
+          // Allow packetization if dependency can be pruned.
+          if (!this->isLegalToPruneDependencies(SUI, SUJ)) {
+            // End the packet if dependency cannot be pruned.
+            endPacket(MBB, MI);
+            break;
+          } // !isLegalToPruneDependencies.
+        } // !isLegalToPacketizeTogether.
+      } // For all instructions in CurrentPacketMIs.
+    } else {
+      // End the packet if resource is not available.
+      endPacket(MBB, MI);
+    }
+
+    // Add MI to the current packet.
+    BeginItr = this->addToPacket(MI);
+  } // For all instructions in BB.
+
+  // End any packet left behind.
+  endPacket(MBB, EndItr);
+}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 6de6c0cb81bd..aa10d1d41f2b 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -28,11 +28,12 @@ STATISTIC(NumDeletes,          "Number of dead instructions deleted");
 namespace {
   class DeadMachineInstructionElim : public MachineFunctionPass {
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    
+
     const TargetRegisterInfo *TRI;
     const MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
     BitVector LivePhysRegs;
+    BitVector ReservedRegs;
 
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -45,14 +46,11 @@ namespace {
   };
 }
 char DeadMachineInstructionElim::ID = 0;
+char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID;
 
 INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
                 "Remove dead machine instructions", false, false)
 
-FunctionPass *llvm::createDeadMachineInstructionElimPass() {
-  return new DeadMachineInstructionElim();
-}
-
 bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
   // Technically speaking inline asm without side effects and no defs can still
   // be deleted. But there is so much bad inline asm code out there, we should
@@ -70,10 +68,14 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg() && MO.isDef()) {
       unsigned Reg = MO.getReg();
-      if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
-          LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) {
-        // This def has a non-debug use. Don't delete the instruction!
-        return false;
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        // Don't delete live physreg defs, or any reserved register defs.
+        if (LivePhysRegs.test(Reg) || ReservedRegs.test(Reg))
+          return false;
+      } else {
+        if (!MRI->use_nodbg_empty(Reg))
+          // This def has a non-debug use. Don't delete the instruction!
+          return false;
       }
     }
   }
@@ -89,7 +91,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
 
   // Treat reserved registers as always live.
-  BitVector ReservedRegs = TRI->getReservedRegs(MF);
+  ReservedRegs = TRI->getReservedRegs(MF);
 
   // Loop over all instructions in all blocks, from bottom to top, so that it's
   // more likely that chains of dependent but ultimately dead instructions will
@@ -102,7 +104,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
     LivePhysRegs = ReservedRegs;
 
     // Also add any explicit live-out physregs for this block.
-    if (!MBB->empty() && MBB->back().getDesc().isReturn())
+    if (!MBB->empty() && MBB->back().isReturn())
       for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
            LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
         unsigned Reg = *LOI;
@@ -169,10 +171,13 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
             // Check the subreg set, not the alias set, because a def
             // of a super-register may still be partially live after
             // this def.
-            for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+            for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
                  *SubRegs; ++SubRegs)
               LivePhysRegs.reset(*SubRegs);
           }
+        } else if (MO.isRegMask()) {
+          // Register mask of preserved registers. All clobbers are dead.
+          LivePhysRegs.clearBitsNotInMask(MO.getRegMask());
         }
       }
       // Record the physreg uses, after the defs, in case a physreg is
@@ -183,7 +188,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
           unsigned Reg = MO.getReg();
           if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
             LivePhysRegs.set(Reg);
-            for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
+            for (const uint16_t *AliasSet = TRI->getAliasSet(Reg);
                  *AliasSet; ++AliasSet)
               LivePhysRegs.set(*AliasSet);
           }
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index ed9e409d3e5a..944dd4fb41c8 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -28,98 +28,34 @@
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 using namespace llvm;
 
-STATISTIC(NumLandingPadsSplit,     "Number of landing pads split");
-STATISTIC(NumUnwindsLowered,       "Number of unwind instructions lowered");
-STATISTIC(NumResumesLowered,       "Number of eh.resume calls lowered");
-STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+STATISTIC(NumResumesLowered, "Number of resume calls lowered");
 
 namespace {
   class DwarfEHPrepare : public FunctionPass {
     const TargetMachine *TM;
     const TargetLowering *TLI;
 
-    // The eh.exception intrinsic.
-    Function *ExceptionValueIntrinsic;
-
-    // The eh.selector intrinsic.
-    Function *SelectorIntrinsic;
-
-    // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
-    Constant *URoR;
-
-    // The EH language-specific catch-all type.
-    GlobalVariable *EHCatchAllValue;
-
-    // _Unwind_Resume or the target equivalent.
+    // RewindFunction - _Unwind_Resume or the target equivalent.
     Constant *RewindFunction;
 
-    // We both use and preserve dominator info.
-    DominatorTree *DT;
-
-    // The function we are running on.
-    Function *F;
-
-    // The landing pads for this function.
-    typedef SmallPtrSet<BasicBlock*, 8> BBSet;
-    BBSet LandingPads;
-
-    bool InsertUnwindResumeCalls();
-
-    bool NormalizeLandingPads();
-    bool LowerUnwindsAndResumes();
-    bool MoveExceptionValueCalls();
-
-    Instruction *CreateExceptionValueCall(BasicBlock *BB);
-
-    /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
-    /// use the "llvm.eh.catch.all.value" call need to convert to using its
-    /// initializer instead.
-    bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
-
-    bool HasCatchAllInSelector(IntrinsicInst *);
+    bool InsertUnwindResumeCalls(Function &Fn);
+    Instruction *GetExceptionObject(ResumeInst *RI);
 
-    /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
-    void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
-                                 SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels);
-
-    /// FindAllURoRInvokes - Find all URoR invokes in the function.
-    void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
-
-    /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
-    /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
-    /// a landing pad within the current function. This is a candidate to merge
-    /// the selector associated with the URoR invoke with the one from the
-    /// URoR's landing pad.
-    bool HandleURoRInvokes();
-
-    /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
-    /// with the eh.exception call. This recursively looks past instructions
-    /// which don't change the EH pointer value, like casts or PHI nodes.
-    bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
-                             SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
-                             SmallPtrSet<PHINode*, 32> &SeenPHIs);
-      
   public:
     static char ID; // Pass identification, replacement for typeid.
     DwarfEHPrepare(const TargetMachine *tm) :
       FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
-      ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
-      URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+      RewindFunction(0) {
         initializeDominatorTreePass(*PassRegistry::getPassRegistry());
       }
 
     virtual bool runOnFunction(Function &Fn);
 
-    // getAnalysisUsage - We need the dominator tree for handling URoR.
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<DominatorTree>();
-      AU.addPreserved<DominatorTree>();
-    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
 
     const char *getPassName() const {
       return "Exception handling preparation";
     }
-
   };
 } // end anonymous namespace
 
@@ -129,543 +65,52 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
   return new DwarfEHPrepare(tm);
 }
 
-/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
-/// catch-all.
-bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) {
-  if (!EHCatchAllValue) return false;
-
-  unsigned ArgIdx = II->getNumArgOperands() - 1;
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx));
-  return GV == EHCatchAllValue;
-}
-
-/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
-void DwarfEHPrepare::
-FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
-                        SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
-  for (Value::use_iterator
-         I = SelectorIntrinsic->use_begin(),
-         E = SelectorIntrinsic->use_end(); I != E; ++I) {
-    IntrinsicInst *II = cast<IntrinsicInst>(*I);
-
-    if (II->getParent()->getParent() != F)
-      continue;
-
-    if (!HasCatchAllInSelector(II))
-      Sels.insert(II);
-    else
-      CatchAllSels.insert(II);
-  }
-}
-
-/// FindAllURoRInvokes - Find all URoR invokes in the function.
-void DwarfEHPrepare::
-FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
-  for (Value::use_iterator
-         I = URoR->use_begin(),
-         E = URoR->use_end(); I != E; ++I) {
-    if (InvokeInst *II = dyn_cast<InvokeInst>(*I))
-      URoRInvokes.insert(II);
-  }
-}
-
-/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
-/// the "llvm.eh.catch.all.value" call need to convert to using its
-/// initializer instead.
-bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
-  if (!EHCatchAllValue) return false;
-
-  if (!SelectorIntrinsic) {
-    SelectorIntrinsic =
-      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
-    if (!SelectorIntrinsic) return false;
-  }
-
-  bool Changed = false;
-  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
-         I = Sels.begin(), E = Sels.end(); I != E; ++I) {
-    IntrinsicInst *Sel = *I;
-
-    // Index of the "llvm.eh.catch.all.value" variable.
-    unsigned OpIdx = Sel->getNumArgOperands() - 1;
-    GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
-    if (GV != EHCatchAllValue) continue;
-    Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
-    Changed = true;
-  }
-
-  return Changed;
-}
-
-/// FindSelectorAndURoR - Find the eh.selector call associated with the
-/// eh.exception call. And indicate if there is a URoR "invoke" associated with
-/// the eh.exception call. This recursively looks past instructions which don't
-/// change the EH pointer value, like casts or PHI nodes.
-bool
-DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
-                                    SmallPtrSet<IntrinsicInst*, 8> &SelCalls,
-                                    SmallPtrSet<PHINode*, 32> &SeenPHIs) {
-  bool Changed = false;
-
-  for (Value::use_iterator
-         I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
-    Instruction *II = dyn_cast<Instruction>(*I);
-    if (!II || II->getParent()->getParent() != F) continue;
-    
-    if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
-      if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
-        SelCalls.insert(Sel);
-    } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
-      if (Invoke->getCalledFunction() == URoR)
-        URoRInvoke = true;
-    } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
-      Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs);
-    } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
-      if (SeenPHIs.insert(PN))
-        // Don't process a PHI node more than once.
-        Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs);
-    }
-  }
-
-  return Changed;
-}
-
-/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
-/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
-/// landing pad within the current function. This is a candidate to merge the
-/// selector associated with the URoR invoke with the one from the URoR's
-/// landing pad.
-bool DwarfEHPrepare::HandleURoRInvokes() {
-  if (!EHCatchAllValue) {
-    EHCatchAllValue =
-      F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
-    if (!EHCatchAllValue) return false;
-  }
-
-  if (!SelectorIntrinsic) {
-    SelectorIntrinsic =
-      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
-    if (!SelectorIntrinsic) return false;
-  }
-
-  SmallPtrSet<IntrinsicInst*, 32> Sels;
-  SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
-  FindAllCleanupSelectors(Sels, CatchAllSels);
-
-  if (!URoR) {
-    URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
-    if (!URoR) return CleanupSelectors(CatchAllSels);
-  }
-
-  SmallPtrSet<InvokeInst*, 32> URoRInvokes;
-  FindAllURoRInvokes(URoRInvokes);
-
-  SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
-
-  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
-         SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
-    const BasicBlock *SelBB = (*SI)->getParent();
-    for (SmallPtrSet<InvokeInst*, 32>::iterator
-           UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
-      const BasicBlock *URoRBB = (*UI)->getParent();
-      if (DT->dominates(SelBB, URoRBB)) {
-        SelsToConvert.insert(*SI);
-        break;
+/// GetExceptionObject - Return the exception object from the value passed into
+/// the 'resume' instruction (typically an aggregate). Clean up any dead
+/// instructions, including the 'resume' instruction.
+Instruction *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
+  Value *V = RI->getOperand(0);
+  Instruction *ExnObj = 0;
+  InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
+  LoadInst *SelLoad = 0;
+  InsertValueInst *ExcIVI = 0;
+  bool EraseIVIs = false;
+
+  if (SelIVI) {
+    if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) {
+      ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0));
+      if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) &&
+          ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) {
+        ExnObj = cast<Instruction>(ExcIVI->getOperand(1));
+        SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1));
+        EraseIVIs = true;
       }
     }
   }
 
-  bool Changed = false;
-
-  if (Sels.size() != SelsToConvert.size()) {
-    // If we haven't been able to convert all of the clean-up selectors, then
-    // loop through the slow way to see if they still need to be converted.
-    if (!ExceptionValueIntrinsic) {
-      ExceptionValueIntrinsic =
-        Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
-      if (!ExceptionValueIntrinsic)
-        return CleanupSelectors(CatchAllSels);
-    }
-
-    for (Value::use_iterator
-           I = ExceptionValueIntrinsic->use_begin(),
-           E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
-      IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
-      if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
-
-      bool URoRInvoke = false;
-      SmallPtrSet<IntrinsicInst*, 8> SelCalls;
-      SmallPtrSet<PHINode*, 32> SeenPHIs;
-      Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs);
-
-      if (URoRInvoke) {
-        // This EH pointer is being used by an invoke of an URoR instruction and
-        // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
-        // need to convert it to a 'catch-all'.
-        for (SmallPtrSet<IntrinsicInst*, 8>::iterator
-               SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
-          if (!HasCatchAllInSelector(*SI))
-              SelsToConvert.insert(*SI);
-      }
-    }
-  }
-
-  if (!SelsToConvert.empty()) {
-    // Convert all clean-up eh.selectors, which are associated with "invokes" of
-    // URoR calls, into catch-all eh.selectors.
-    Changed = true;
-
-    for (SmallPtrSet<IntrinsicInst*, 8>::iterator
-           SI = SelsToConvert.begin(), SE = SelsToConvert.end();
-         SI != SE; ++SI) {
-      IntrinsicInst *II = *SI;
-
-      // Use the exception object pointer and the personality function
-      // from the original selector.
-      CallSite CS(II);
-      IntrinsicInst::op_iterator I = CS.arg_begin();
-      IntrinsicInst::op_iterator E = CS.arg_end();
-      IntrinsicInst::op_iterator B = prior(E);
-
-      // Exclude last argument if it is an integer.
-      if (isa<ConstantInt>(B)) E = B;
+  if (!ExnObj)
+    ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI);
 
-      // Add exception object pointer (front).
-      // Add personality function (next).
-      // Add in any filter IDs (rest).
-      SmallVector<Value*, 8> Args(I, E);
+  RI->eraseFromParent();
 
-      Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
-
-      CallInst *NewSelector =
-        CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II);
-
-      NewSelector->setTailCall(II->isTailCall());
-      NewSelector->setAttributes(II->getAttributes());
-      NewSelector->setCallingConv(II->getCallingConv());
-
-      II->replaceAllUsesWith(NewSelector);
-      II->eraseFromParent();
-    }
+  if (EraseIVIs) {
+    if (SelIVI->getNumUses() == 0)
+      SelIVI->eraseFromParent();
+    if (ExcIVI->getNumUses() == 0)
+      ExcIVI->eraseFromParent();
+    if (SelLoad && SelLoad->getNumUses() == 0)
+      SelLoad->eraseFromParent();
   }
 
-  Changed |= CleanupSelectors(CatchAllSels);
-  return Changed;
-}
-
-/// NormalizeLandingPads - Normalize and discover landing pads, noting them
-/// in the LandingPads set.  A landing pad is normal if the only CFG edges
-/// that end at it are unwind edges from invoke instructions. If we inlined
-/// through an invoke we could have a normal branch from the previous
-/// unwind block through to the landing pad for the original invoke.
-/// Abnormal landing pads are fixed up by redirecting all unwind edges to
-/// a new basic block which falls through to the original.
-bool DwarfEHPrepare::NormalizeLandingPads() {
-  bool Changed = false;
-
-  const MCAsmInfo *MAI = TM->getMCAsmInfo();
-  bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
-
-  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
-    TerminatorInst *TI = I->getTerminator();
-    if (!isa<InvokeInst>(TI))
-      continue;
-    BasicBlock *LPad = TI->getSuccessor(1);
-    // Skip landing pads that have already been normalized.
-    if (LandingPads.count(LPad))
-      continue;
-
-    // Check that only invoke unwind edges end at the landing pad.
-    bool OnlyUnwoundTo = true;
-    bool SwitchOK = usingSjLjEH;
-    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
-         PI != PE; ++PI) {
-      TerminatorInst *PT = (*PI)->getTerminator();
-      // The SjLj dispatch block uses a switch instruction. This is effectively
-      // an unwind edge, so we can disregard it here. There will only ever
-      // be one dispatch, however, so if there are multiple switches, one
-      // of them truly is a normal edge, not an unwind edge.
-      if (SwitchOK && isa<SwitchInst>(PT)) {
-        SwitchOK = false;
-        continue;
-      }
-      if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
-        OnlyUnwoundTo = false;
-        break;
-      }
-    }
-
-    if (OnlyUnwoundTo) {
-      // Only unwind edges lead to the landing pad.  Remember the landing pad.
-      LandingPads.insert(LPad);
-      continue;
-    }
-
-    // At least one normal edge ends at the landing pad.  Redirect the unwind
-    // edges to a new basic block which falls through into this one.
-
-    // Create the new basic block.
-    BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
-                                           LPad->getName() + "_unwind_edge");
-
-    // Insert it into the function right before the original landing pad.
-    LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
-
-    // Redirect unwind edges from the original landing pad to NewBB.
-    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
-      TerminatorInst *PT = (*PI++)->getTerminator();
-      if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
-        // Unwind to the new block.
-        PT->setSuccessor(1, NewBB);
-    }
-
-    // If there are any PHI nodes in LPad, we need to update them so that they
-    // merge incoming values from NewBB instead.
-    for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
-      PHINode *PN = cast<PHINode>(II);
-      pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
-
-      // Check to see if all of the values coming in via unwind edges are the
-      // same.  If so, we don't need to create a new PHI node.
-      Value *InVal = PN->getIncomingValueForBlock(*PB);
-      for (pred_iterator PI = PB; PI != PE; ++PI) {
-        if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
-          InVal = 0;
-          break;
-        }
-      }
-
-      if (InVal == 0) {
-        // Different unwind edges have different values.  Create a new PHI node
-        // in NewBB.
-        PHINode *NewPN = PHINode::Create(PN->getType(),
-                                         PN->getNumIncomingValues(),
-                                         PN->getName()+".unwind", NewBB);
-        // Add an entry for each unwind edge, using the value from the old PHI.
-        for (pred_iterator PI = PB; PI != PE; ++PI)
-          NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
-
-        // Now use this new PHI as the common incoming value for NewBB in PN.
-        InVal = NewPN;
-      }
-
-      // Revector exactly one entry in the PHI node to come from NewBB
-      // and delete all other entries that come from unwind edges.  If
-      // there are both normal and unwind edges from the same predecessor,
-      // this leaves an entry for the normal edge.
-      for (pred_iterator PI = PB; PI != PE; ++PI)
-        PN->removeIncomingValue(*PI);
-      PN->addIncoming(InVal, NewBB);
-    }
-
-    // Add a fallthrough from NewBB to the original landing pad.
-    BranchInst::Create(LPad, NewBB);
-
-    // Now update DominatorTree analysis information.
-    DT->splitBlock(NewBB);
-
-    // Remember the newly constructed landing pad.  The original landing pad
-    // LPad is no longer a landing pad now that all unwind edges have been
-    // revectored to NewBB.
-    LandingPads.insert(NewBB);
-    ++NumLandingPadsSplit;
-    Changed = true;
-  }
-
-  return Changed;
-}
-
-/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
-/// rethrowing any previously caught exception.  This will crash horribly
-/// at runtime if there is no such exception: using unwind to throw a new
-/// exception is currently not supported.
-bool DwarfEHPrepare::LowerUnwindsAndResumes() {
-  SmallVector<Instruction*, 16> ResumeInsts;
-
-  for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) {
-    for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){
-      if (isa<UnwindInst>(bi))
-        ResumeInsts.push_back(bi);
-      else if (CallInst *call = dyn_cast<CallInst>(bi))
-        if (Function *fn = dyn_cast<Function>(call->getCalledValue()))
-          if (fn->getName() == "llvm.eh.resume")
-            ResumeInsts.push_back(bi);
-    }
-  }
-
-  if (ResumeInsts.empty()) return false;
-
-  // Find the rewind function if we didn't already.
-  if (!RewindFunction) {
-    LLVMContext &Ctx = ResumeInsts[0]->getContext();
-    FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
-                                          Type::getInt8PtrTy(Ctx), false);
-    const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
-    RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
-  }
-
-  bool Changed = false;
-
-  for (SmallVectorImpl<Instruction*>::iterator
-         I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) {
-    Instruction *RI = *I;
-
-    // Replace the resuming instruction with a call to _Unwind_Resume (or the
-    // appropriate target equivalent).
-
-    llvm::Value *ExnValue;
-    if (isa<UnwindInst>(RI))
-      ExnValue = CreateExceptionValueCall(RI->getParent());
-    else
-      ExnValue = cast<CallInst>(RI)->getArgOperand(0);
-
-    // Create the call...
-    CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI);
-    CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
-
-    // ...followed by an UnreachableInst, if it was an unwind.
-    // Calls to llvm.eh.resume are typically already followed by this.
-    if (isa<UnwindInst>(RI))
-      new UnreachableInst(RI->getContext(), RI);
-
-    if (isa<UnwindInst>(RI))
-      ++NumUnwindsLowered;
-    else
-      ++NumResumesLowered;
-
-    // Nuke the resume instruction.
-    RI->eraseFromParent();
-
-    Changed = true;
-  }
-
-  return Changed;
-}
-
-/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
-/// landing pads by replacing calls outside of landing pads with direct use of
-/// a register holding the appropriate value; this requires adding calls inside
-/// all landing pads to initialize the register.  Also, move eh.exception calls
-/// inside landing pads to the start of the landing pad (optional, but may make
-/// things simpler for later passes).
-bool DwarfEHPrepare::MoveExceptionValueCalls() {
-  // If the eh.exception intrinsic is not declared in the module then there is
-  // nothing to do.  Speed up compilation by checking for this common case.
-  if (!ExceptionValueIntrinsic &&
-      !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
-    return false;
-
-  bool Changed = false;
-
-  // Move calls to eh.exception that are inside a landing pad to the start of
-  // the landing pad.
-  for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end();
-       LI != LE; ++LI) {
-    BasicBlock *LP = *LI;
-    for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end();
-         II != IE;)
-      if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
-        // Found a call to eh.exception.
-        if (!EI->use_empty()) {
-          // If there is already a call to eh.exception at the start of the
-          // landing pad, then get hold of it; otherwise create such a call.
-          Value *CallAtStart = CreateExceptionValueCall(LP);
-
-          // If the call was at the start of a landing pad then leave it alone.
-          if (EI == CallAtStart)
-            continue;
-          EI->replaceAllUsesWith(CallAtStart);
-        }
-        EI->eraseFromParent();
-        ++NumExceptionValuesMoved;
-        Changed = true;
-      }
-  }
-
-  // Look for calls to eh.exception that are not in a landing pad.  If one is
-  // found, then a register that holds the exception value will be created in
-  // each landing pad, and the SSAUpdater will be used to compute the values
-  // returned by eh.exception calls outside of landing pads.
-  SSAUpdater SSA;
-
-  // Remember where we found the eh.exception call, to avoid rescanning earlier
-  // basic blocks which we already know contain no eh.exception calls.
-  bool FoundCallOutsideLandingPad = false;
-  Function::iterator BB = F->begin();
-  for (Function::iterator BE = F->end(); BB != BE; ++BB) {
-    // Skip over landing pads.
-    if (LandingPads.count(BB))
-      continue;
-
-    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
-         II != IE; ++II)
-      if (isa<EHExceptionInst>(II)) {
-        SSA.Initialize(II->getType(), II->getName());
-        FoundCallOutsideLandingPad = true;
-        break;
-      }
-
-    if (FoundCallOutsideLandingPad)
-      break;
-  }
-
-  // If all calls to eh.exception are in landing pads then we are done.
-  if (!FoundCallOutsideLandingPad)
-    return Changed;
-
-  // Add a call to eh.exception at the start of each landing pad, and tell the
-  // SSAUpdater that this is the value produced by the landing pad.
-  for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
-       LI != LE; ++LI)
-    SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI));
-
-  // Now turn all calls to eh.exception that are not in a landing pad into a use
-  // of the appropriate register.
-  for (Function::iterator BE = F->end(); BB != BE; ++BB) {
-    // Skip over landing pads.
-    if (LandingPads.count(BB))
-      continue;
-
-    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
-         II != IE;)
-      if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
-        // Found a call to eh.exception, replace it with the value from any
-        // upstream landing pad(s).
-        EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB));
-        EI->eraseFromParent();
-        ++NumExceptionValuesMoved;
-      }
-  }
-
-  return true;
-}
-
-/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
-/// the start of the basic block (unless there already is one, in which case
-/// the existing call is returned).
-Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
-  Instruction *Start = BB->getFirstNonPHIOrDbg();
-  // Is this a call to eh.exception?
-  if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
-    if (CI->getIntrinsicID() == Intrinsic::eh_exception)
-      // Reuse the existing call.
-      return Start;
-
-  // Find the eh.exception intrinsic if we didn't already.
-  if (!ExceptionValueIntrinsic)
-    ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
-                                                       Intrinsic::eh_exception);
-
-  // Create the call.
-  return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+  return ExnObj;
 }
 
 /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present
 /// into calls to the appropriate _Unwind_Resume function.
-bool DwarfEHPrepare::InsertUnwindResumeCalls() {
+bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
   bool UsesNewEH = false;
   SmallVector<ResumeInst*, 16> Resumes;
-  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+  for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     TerminatorInst *TI = I->getTerminator();
     if (ResumeInst *RI = dyn_cast<ResumeInst>(TI))
       Resumes.push_back(RI);
@@ -682,27 +127,45 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
     FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
                                           Type::getInt8PtrTy(Ctx), false);
     const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
-    RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+    RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy);
   }
 
   // Create the basic block where the _Unwind_Resume call will live.
-  LLVMContext &Ctx = F->getContext();
-  BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", F);
-  PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), Resumes.size(),
+  LLVMContext &Ctx = Fn.getContext();
+  unsigned ResumesSize = Resumes.size();
+
+  if (ResumesSize == 1) {
+    // Instead of creating a new BB and PHI node, just append the call to
+    // _Unwind_Resume to the end of the single resume block.
+    ResumeInst *RI = Resumes.front();
+    BasicBlock *UnwindBB = RI->getParent();
+    Instruction *ExnObj = GetExceptionObject(RI);
+
+    // Call the _Unwind_Resume function.
+    CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB);
+    CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+
+    // We never expect _Unwind_Resume to return.
+    new UnreachableInst(Ctx, UnwindBB);
+    return true;
+  }
+
+  BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn);
+  PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesSize,
                                 "exn.obj", UnwindBB);
 
   // Extract the exception object from the ResumeInst and add it to the PHI node
   // that feeds the _Unwind_Resume call.
-  BasicBlock *UnwindBBDom = Resumes[0]->getParent();
   for (SmallVectorImpl<ResumeInst*>::iterator
          I = Resumes.begin(), E = Resumes.end(); I != E; ++I) {
     ResumeInst *RI = *I;
-    BranchInst::Create(UnwindBB, RI->getParent());
-    ExtractValueInst *ExnObj = ExtractValueInst::Create(RI->getOperand(0),
-                                                        0, "exn.obj", RI);
-    PN->addIncoming(ExnObj, RI->getParent());
-    UnwindBBDom = DT->findNearestCommonDominator(RI->getParent(), UnwindBBDom);
-    RI->eraseFromParent();
+    BasicBlock *Parent = RI->getParent();
+    BranchInst::Create(UnwindBB, Parent);
+
+    Instruction *ExnObj = GetExceptionObject(RI);
+    PN->addIncoming(ExnObj, Parent);
+
+    ++NumResumesLowered;
   }
 
   // Call the function.
@@ -711,40 +174,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() {
 
   // We never expect _Unwind_Resume to return.
   new UnreachableInst(Ctx, UnwindBB);
-
-  // Now update DominatorTree analysis information.
-  DT->addNewBlock(UnwindBB, UnwindBBDom);
   return true;
 }
 
 bool DwarfEHPrepare::runOnFunction(Function &Fn) {
-  bool Changed = false;
-
-  // Initialize internal state.
-  DT = &getAnalysis<DominatorTree>(); // FIXME: We won't need this with the new EH.
-  F = &Fn;
-
-  if (InsertUnwindResumeCalls()) {
-    // FIXME: The reset of this function can go once the new EH is done.
-    LandingPads.clear();
-    return true;
-  }
-
-  // Ensure that only unwind edges end at landing pads (a landing pad is a
-  // basic block where an invoke unwind edge ends).
-  Changed |= NormalizeLandingPads();
-
-  // Turn unwind instructions and eh.resume calls into libcalls.
-  Changed |= LowerUnwindsAndResumes();
-
-  // TODO: Move eh.selector calls to landing pads and combine them.
-
-  // Move eh.exception calls to landing pads.
-  Changed |= MoveExceptionValueCalls();
-
-  Changed |= HandleURoRInvokes();
-
-  LandingPads.clear();
-
+  bool Changed = InsertUnwindResumeCalls(Fn);
   return Changed;
 }
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
deleted file mode 100644
index 5b634682cc87..000000000000
--- a/lib/CodeGen/ELF.h
+++ /dev/null
@@ -1,227 +0,0 @@
-//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header contains common, non-processor-specific data structures and
-// constants for the ELF file format.
-//
-// The details of the ELF32 bits in this file are largely based on the Tool
-// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
-// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the
-// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CODEGEN_ELF_H
-#define CODEGEN_ELF_H
-
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-  class GlobalValue;
-
-  /// ELFSym - This struct contains information about each symbol that is
-  /// added to logical symbol table for the module.  This is eventually
-  /// turned into a real symbol table in the file.
-  struct ELFSym {
-
-    // ELF symbols are related to llvm ones by being one of the two llvm
-    // types, for the other ones (section, file, func) a null pointer is
-    // assumed by default.
-    union {
-      const GlobalValue *GV;  // If this is a pointer to a GV
-      const char *Ext;        // If this is a pointer to a named symbol
-    } Source;
-
-    // Describes from which source type this ELF symbol comes from,
-    // they can be GlobalValue, ExternalSymbol or neither.
-    enum {
-      isGV,      // The Source.GV field is valid.
-      isExtSym,  // The Source.ExtSym field is valid.
-      isOther    // Not a GlobalValue or External Symbol
-    };
-    unsigned SourceType;
-
-    bool isGlobalValue() const { return SourceType == isGV; }
-    bool isExternalSym() const { return SourceType == isExtSym; }
-
-    // getGlobalValue - If this is a global value which originated the
-    // elf symbol, return a reference to it.
-    const GlobalValue *getGlobalValue() const {
-      assert(SourceType == isGV && "This is not a global value");
-      return Source.GV;
-    }
-
-    // getExternalSym - If this is an external symbol which originated the
-    // elf symbol, return a reference to it.
-    const char *getExternalSymbol() const {
-      assert(SourceType == isExtSym && "This is not an external symbol");
-      return Source.Ext;
-    }
-
-    // getGV - From a global value return a elf symbol to represent it
-    static ELFSym *getGV(const GlobalValue *GV, unsigned Bind,
-                         unsigned Type, unsigned Visibility) {
-      ELFSym *Sym = new ELFSym();
-      Sym->Source.GV = GV;
-      Sym->setBind(Bind);
-      Sym->setType(Type);
-      Sym->setVisibility(Visibility);
-      Sym->SourceType = isGV;
-      return Sym;
-    }
-
-    // getExtSym - Create and return an elf symbol to represent an
-    // external symbol
-    static ELFSym *getExtSym(const char *Ext) {
-      ELFSym *Sym = new ELFSym();
-      Sym->Source.Ext = Ext;
-      Sym->setBind(ELF::STB_GLOBAL);
-      Sym->setType(ELF::STT_NOTYPE);
-      Sym->setVisibility(ELF::STV_DEFAULT);
-      Sym->SourceType = isExtSym;
-      return Sym;
-    }
-
-    // getSectionSym - Returns a elf symbol to represent an elf section
-    static ELFSym *getSectionSym() {
-      ELFSym *Sym = new ELFSym();
-      Sym->setBind(ELF::STB_LOCAL);
-      Sym->setType(ELF::STT_SECTION);
-      Sym->setVisibility(ELF::STV_DEFAULT);
-      Sym->SourceType = isOther;
-      return Sym;
-    }
-
-    // getFileSym - Returns a elf symbol to represent the module identifier
-    static ELFSym *getFileSym() {
-      ELFSym *Sym = new ELFSym();
-      Sym->setBind(ELF::STB_LOCAL);
-      Sym->setType(ELF::STT_FILE);
-      Sym->setVisibility(ELF::STV_DEFAULT);
-      Sym->SectionIdx = 0xfff1;  // ELFSection::SHN_ABS;
-      Sym->SourceType = isOther;
-      return Sym;
-    }
-
-    // getUndefGV - Returns a STT_NOTYPE symbol
-    static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) {
-      ELFSym *Sym = new ELFSym();
-      Sym->Source.GV = GV;
-      Sym->setBind(Bind);
-      Sym->setType(ELF::STT_NOTYPE);
-      Sym->setVisibility(ELF::STV_DEFAULT);
-      Sym->SectionIdx = 0;  //ELFSection::SHN_UNDEF;
-      Sym->SourceType = isGV;
-      return Sym;
-    }
-
-    // ELF specific fields
-    unsigned NameIdx;         // Index in .strtab of name, once emitted.
-    uint64_t Value;
-    unsigned Size;
-    uint8_t Info;
-    uint8_t Other;
-    unsigned short SectionIdx;
-
-    // Symbol index into the Symbol table
-    unsigned SymTabIdx;
-
-    ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
-               Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0),
-               SymTabIdx(0) {}
-
-    unsigned getBind() const { return (Info >> 4) & 0xf; }
-    unsigned getType() const { return Info & 0xf; }
-    bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; }
-    bool isFileType() const { return getType() == ELF::STT_FILE; }
-
-    void setBind(unsigned X) {
-      assert(X == (X & 0xF) && "Bind value out of range!");
-      Info = (Info & 0x0F) | (X << 4);
-    }
-
-    void setType(unsigned X) {
-      assert(X == (X & 0xF) && "Type value out of range!");
-      Info = (Info & 0xF0) | X;
-    }
-
-    void setVisibility(unsigned V) {
-      assert(V == (V & 0x3) && "Visibility value out of range!");
-      Other = V;
-    }
-  };
-
-  /// ELFSection - This struct contains information about each section that is
-  /// emitted to the file.  This is eventually turned into the section header
-  /// table at the end of the file.
-  class ELFSection : public BinaryObject {
-    public:
-    // ELF specific fields
-    unsigned NameIdx;   // sh_name - .shstrtab idx of name, once emitted.
-    unsigned Type;      // sh_type - Section contents & semantics 
-    unsigned Flags;     // sh_flags - Section flags.
-    uint64_t Addr;      // sh_addr - The mem addr this section is in.
-    unsigned Offset;    // sh_offset - Offset from the file start
-    unsigned Size;      // sh_size - The section size.
-    unsigned Link;      // sh_link - Section header table index link.
-    unsigned Info;      // sh_info - Auxiliary information.
-    unsigned Align;     // sh_addralign - Alignment of section.
-    unsigned EntSize;   // sh_entsize - Size of entries in the section e
-
-    /// SectionIdx - The number of the section in the Section Table.
-    unsigned short SectionIdx;
-
-    /// Sym - The symbol to represent this section if it has one.
-    ELFSym *Sym;
-
-    /// getSymIndex - Returns the symbol table index of the symbol
-    /// representing this section.
-    unsigned getSymbolTableIndex() const {
-      assert(Sym && "section not present in the symbol table");
-      return Sym->SymTabIdx;
-    }
-
-    ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
-      : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
-        Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {}
-  };
-
-  /// ELFRelocation - This class contains all the information necessary to
-  /// to generate any 32-bit or 64-bit ELF relocation entry.
-  class ELFRelocation {
-    uint64_t r_offset;    // offset in the section of the object this applies to
-    uint32_t r_symidx;    // symbol table index of the symbol to use
-    uint32_t r_type;      // machine specific relocation type
-    int64_t  r_add;       // explicit relocation addend
-    bool     r_rela;      // if true then the addend is part of the entry
-                          // otherwise the addend is at the location specified
-                          // by r_offset
-  public:
-    uint64_t getInfo(bool is64Bit) const {
-      if (is64Bit)
-        return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL);
-      else
-        return (r_symidx << 8)  + (r_type & 0xFFL);
-    }
-
-    uint64_t getOffset() const { return r_offset; }
-    int64_t getAddend() const { return r_add; }
-
-    ELFRelocation(uint64_t off, uint32_t sym, uint32_t type,
-                  bool rela = true, int64_t addend = 0) :
-      r_offset(off), r_symidx(sym), r_type(type),
-      r_add(addend), r_rela(rela) {}
-  };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
deleted file mode 100644
index 660424c3c141..000000000000
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ /dev/null
@@ -1,205 +0,0 @@
-//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "elfce"
-
-#include "ELF.h"
-#include "ELFWriter.h"
-#include "ELFCodeEmitter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-//===----------------------------------------------------------------------===//
-//                       ELFCodeEmitter Implementation
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-/// startFunction - This callback is invoked when a new machine function is
-/// about to be emitted.
-void ELFCodeEmitter::startFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "processing function: "
-        << MF.getFunction()->getName() << "\n");
-
-  // Get the ELF Section that this function belongs in.
-  ES = &EW.getTextSection(MF.getFunction());
-
-  // Set the desired binary object to be used by the code emitters
-  setBinaryObject(ES);
-
-  // Get the function alignment in bytes
-  unsigned Align = (1 << MF.getAlignment());
-
-  // The function must start on its required alignment
-  ES->emitAlignment(Align);
-
-  // Update the section alignment if needed.
-  ES->Align = std::max(ES->Align, Align);
-
-  // Record the function start offset
-  FnStartOff = ES->getCurrentPCOffset();
-
-  // Emit constant pool and jump tables to their appropriate sections.
-  // They need to be emitted before the function because in some targets
-  // the later may reference JT or CP entry address.
-  emitConstantPool(MF.getConstantPool());
-  if (MF.getJumpTableInfo())
-    emitJumpTables(MF.getJumpTableInfo());
-}
-
-/// finishFunction - This callback is invoked after the function is completely
-/// finished.
-bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
-  // Add a symbol to represent the function.
-  const Function *F = MF.getFunction();
-  ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC,
-                                EW.getGlobalELFVisibility(F));
-  FnSym->SectionIdx = ES->SectionIdx;
-  FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
-  EW.AddPendingGlobalSymbol(F, true);
-
-  // Offset from start of Section
-  FnSym->Value = FnStartOff;
-
-  if (!F->hasPrivateLinkage())
-    EW.SymbolList.push_back(FnSym);
-
-  // Patch up Jump Table Section relocations to use the real MBBs offsets
-  // now that the MBB label offsets inside the function are known.
-  if (MF.getJumpTableInfo()) {
-    ELFSection &JTSection = EW.getJumpTableSection();
-    for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
-         MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
-      MachineRelocation &MR = *MRI;
-      uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
-      MR.setResultPointer((void*)MBBOffset);
-      MR.setConstantVal(ES->SectionIdx);
-      JTSection.addRelocation(MR);
-    }
-  }
-
-  // If we have emitted any relocations to function-specific objects such as
-  // basic blocks, constant pools entries, or jump tables, record their
-  // addresses now so that we can rewrite them with the correct addresses later
-  for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
-    MachineRelocation &MR = Relocations[i];
-    intptr_t Addr;
-    if (MR.isGlobalValue()) {
-      EW.AddPendingGlobalSymbol(MR.getGlobalValue());
-    } else if (MR.isExternalSymbol()) {
-      EW.AddPendingExternalSymbol(MR.getExternalSymbol());
-    } else if (MR.isBasicBlock()) {
-      Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
-      MR.setConstantVal(ES->SectionIdx);
-      MR.setResultPointer((void*)Addr);
-    } else if (MR.isConstantPoolIndex()) {
-      Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
-      MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
-      MR.setResultPointer((void*)Addr);
-    } else if (MR.isJumpTableIndex()) {
-      ELFSection &JTSection = EW.getJumpTableSection();
-      Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
-      MR.setConstantVal(JTSection.SectionIdx);
-      MR.setResultPointer((void*)Addr);
-    } else {
-      llvm_unreachable("Unhandled relocation type");
-    }
-    ES->addRelocation(MR);
-  }
-
-  // Clear per-function data structures.
-  JTRelocations.clear();
-  Relocations.clear();
-  CPLocations.clear();
-  CPSections.clear();
-  JTLocations.clear();
-  MBBLocations.clear();
-  return false;
-}
-
-/// emitConstantPool - For each constant pool entry, figure out which section
-/// the constant should live in and emit the constant
-void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
-  const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
-  if (CP.empty()) return;
-
-  // TODO: handle PIC codegen
-  assert(TM.getRelocationModel() != Reloc::PIC_ &&
-         "PIC codegen not yet handled for elf constant pools!");
-
-  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
-    MachineConstantPoolEntry CPE = CP[i];
-
-    // Record the constant pool location and the section index
-    ELFSection &CstPool = EW.getConstantPoolSection(CPE);
-    CPLocations.push_back(CstPool.size());
-    CPSections.push_back(CstPool.SectionIdx);
-
-    if (CPE.isMachineConstantPoolEntry())
-      assert(0 && "CPE.isMachineConstantPoolEntry not supported yet");
-
-    // Emit the constant to constant pool section
-    EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
-  }
-}
-
-/// emitJumpTables - Emit all the jump tables for a given jump table info
-/// record to the appropriate section.
-void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
-  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
-  if (JT.empty()) return;
-
-  // FIXME: handle PIC codegen
-  assert(TM.getRelocationModel() != Reloc::PIC_ &&
-         "PIC codegen not yet handled for elf jump tables!");
-
-  const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
-  unsigned EntrySize = 4; //MJTI->getEntrySize();
-
-  // Get the ELF Section to emit the jump table
-  ELFSection &JTSection = EW.getJumpTableSection();
-
-  // For each JT, record its offset from the start of the section
-  for (unsigned i = 0, e = JT.size(); i != e; ++i) {
-    const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
-
-    // Record JT 'i' offset in the JT section
-    JTLocations.push_back(JTSection.size());
-
-    // Each MBB entry in the Jump table section has a relocation entry
-    // against the current text section.
-    for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
-      unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
-      MachineRelocation MR =
-        MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
-
-      // Add the relocation to the Jump Table section
-      JTRelocations.push_back(MR);
-
-      // Output placeholder for MBB in the JT section
-      for (unsigned s=0; s < EntrySize; ++s)
-        JTSection.emitByte(0);
-    }
-  }
-}
-
-} // end namespace llvm
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
deleted file mode 100644
index 8671c674eecf..000000000000
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ELFCODEEMITTER_H
-#define ELFCODEEMITTER_H
-
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-#include <vector>
-
-namespace llvm {
-  class ELFWriter;
-  class ELFSection;
-
-  /// ELFCodeEmitter - This class is used by the ELFWriter to 
-  /// emit the code for functions to the ELF file.
-  class ELFCodeEmitter : public ObjectCodeEmitter {
-    ELFWriter &EW;
-
-    /// Target machine description
-    TargetMachine &TM;
-
-    /// Section containing code for functions
-    ELFSection *ES;
-
-    /// Relocations - Record relocations needed by the current function 
-    std::vector<MachineRelocation> Relocations;
-
-    /// JTRelocations - Record relocations needed by the relocation
-    /// section.
-    std::vector<MachineRelocation> JTRelocations;
-
-    /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
-    uintptr_t FnStartOff;
-  public:
-    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
-
-    /// addRelocation - Register new relocations for this function
-    void addRelocation(const MachineRelocation &MR) {
-      Relocations.push_back(MR);
-    }
-
-    /// emitConstantPool - For each constant pool entry, figure out which
-    /// section the constant should live in and emit data to it
-    void emitConstantPool(MachineConstantPool *MCP);
-
-    /// emitJumpTables - Emit all the jump tables for a given jump table
-    /// info and record them to the appropriate section.
-    void emitJumpTables(MachineJumpTableInfo *MJTI);
-
-    void startFunction(MachineFunction &F);
-    bool finishFunction(MachineFunction &F);
-
-    /// emitLabel - Emits a label
-    virtual void emitLabel(MCSymbol *Label) {
-      assert(0 && "emitLabel not implemented");
-    }
-
-    /// getLabelAddress - Return the address of the specified LabelID, 
-    /// only usable after the LabelID has been emitted.
-    virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
-      assert(0 && "getLabelAddress not implemented");
-      return 0;
-    }
-
-    virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
-
-};  // end class ELFCodeEmitter
-
-} // end namespace llvm
-
-#endif
-
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
deleted file mode 100644
index f2c218565854..000000000000
--- a/lib/CodeGen/ELFWriter.cpp
+++ /dev/null
@@ -1,1105 +0,0 @@
-//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the target-independent ELF writer.  This file writes out
-// the ELF file in the following order:
-//
-//  #1. ELF Header
-//  #2. '.text' section
-//  #3. '.data' section
-//  #4. '.bss' section  (conceptual position in file)
-//  ...
-//  #X. '.shstrtab' section
-//  #Y. Section Table
-//
-// The entries in the section table are laid out as:
-//  #0. Null entry [required]
-//  #1. ".text" entry - the program code
-//  #2. ".data" entry - global variables with initializers.     [ if needed ]
-//  #3. ".bss" entry  - global variables without initializers.  [ if needed ]
-//  ...
-//  #N. ".shstrtab" entry - String table for the section names.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "elfwriter"
-#include "ELF.h"
-#include "ELFWriter.h"
-#include "ELFCodeEmitter.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetELFWriterInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-using namespace llvm;
-
-char ELFWriter::ID = 0;
-
-//===----------------------------------------------------------------------===//
-//                          ELFWriter Implementation
-//===----------------------------------------------------------------------===//
-
-ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
-  : MachineFunctionPass(ID), O(o), TM(tm),
-    OutContext(*new MCContext(*TM.getMCAsmInfo(), *TM.getRegisterInfo(),
-                              &TM.getTargetLowering()->getObjFileLowering())),
-    TLOF(TM.getTargetLowering()->getObjFileLowering()),
-    is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
-    isLittleEndian(TM.getTargetData()->isLittleEndian()),
-    ElfHdr(isLittleEndian, is64Bit) {
-
-  MAI = TM.getMCAsmInfo();
-  TEW = TM.getELFWriterInfo();
-
-  // Create the object code emitter object for this target.
-  ElfCE = new ELFCodeEmitter(*this);
-
-  // Initial number of sections
-  NumSections = 0;
-}
-
-ELFWriter::~ELFWriter() {
-  delete ElfCE;
-  delete &OutContext;
-
-  while(!SymbolList.empty()) {
-    delete SymbolList.back(); 
-    SymbolList.pop_back();
-  }
-
-  while(!PrivateSyms.empty()) {
-    delete PrivateSyms.back(); 
-    PrivateSyms.pop_back();
-  }
-
-  while(!SectionList.empty()) {
-    delete SectionList.back(); 
-    SectionList.pop_back();
-  }
-
-  // Release the name mangler object.
-  delete Mang; Mang = 0;
-}
-
-// doInitialization - Emit the file header and all of the global variables for
-// the module to the ELF file.
-bool ELFWriter::doInitialization(Module &M) {
-  // Initialize TargetLoweringObjectFile.
-  const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
-  
-  Mang = new Mangler(OutContext, *TM.getTargetData());
-
-  // ELF Header
-  // ----------
-  // Fields e_shnum e_shstrndx are only known after all section have
-  // been emitted. They locations in the ouput buffer are recorded so
-  // to be patched up later.
-  //
-  // Note
-  // ----
-  // emitWord method behaves differently for ELF32 and ELF64, writing
-  // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
-
-  ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
-  ElfHdr.emitByte('E');  // e_ident[EI_MAG1]
-  ElfHdr.emitByte('L');  // e_ident[EI_MAG2]
-  ElfHdr.emitByte('F');  // e_ident[EI_MAG3]
-
-  ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
-  ElfHdr.emitByte(TEW->getEIData());  // e_ident[EI_DATA]
-  ElfHdr.emitByte(ELF::EV_CURRENT);   // e_ident[EI_VERSION]
-  ElfHdr.emitAlignment(16);           // e_ident[EI_NIDENT-EI_PAD]
-
-  ElfHdr.emitWord16(ELF::ET_REL);        // e_type
-  ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
-  ElfHdr.emitWord32(ELF::EV_CURRENT);    // e_version
-  ElfHdr.emitWord(0);                    // e_entry, no entry point in .o file
-  ElfHdr.emitWord(0);                    // e_phoff, no program header for .o
-  ELFHdr_e_shoff_Offset = ElfHdr.size();
-  ElfHdr.emitWord(0);                    // e_shoff = sec hdr table off in bytes
-  ElfHdr.emitWord32(TEW->getEFlags());   // e_flags = whatever the target wants
-  ElfHdr.emitWord16(TEW->getHdrSize());  // e_ehsize = ELF header size
-  ElfHdr.emitWord16(0);                  // e_phentsize = prog header entry size
-  ElfHdr.emitWord16(0);                  // e_phnum = # prog header entries = 0
-
-  // e_shentsize = Section header entry size
-  ElfHdr.emitWord16(TEW->getSHdrSize());
-
-  // e_shnum     = # of section header ents
-  ELFHdr_e_shnum_Offset = ElfHdr.size();
-  ElfHdr.emitWord16(0); // Placeholder
-
-  // e_shstrndx  = Section # of '.shstrtab'
-  ELFHdr_e_shstrndx_Offset = ElfHdr.size();
-  ElfHdr.emitWord16(0); // Placeholder
-
-  // Add the null section, which is required to be first in the file.
-  getNullSection();
-
-  // The first entry in the symtab is the null symbol and the second
-  // is a local symbol containing the module/file name
-  SymbolList.push_back(new ELFSym());
-  SymbolList.push_back(ELFSym::getFileSym());
-
-  return false;
-}
-
-// AddPendingGlobalSymbol - Add a global to be processed and to
-// the global symbol lookup, use a zero index because the table
-// index will be determined later.
-void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, 
-                                       bool AddToLookup /* = false */) {
-  PendingGlobals.insert(GV);
-  if (AddToLookup) 
-    GblSymLookup[GV] = 0;
-}
-
-// AddPendingExternalSymbol - Add the external to be processed
-// and to the external symbol lookup, use a zero index because
-// the symbol table index will be determined later.
-void ELFWriter::AddPendingExternalSymbol(const char *External) {
-  PendingExternals.insert(External);
-  ExtSymLookup[External] = 0;
-}
-
-ELFSection &ELFWriter::getDataSection() {
-  const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
-  return getSection(Data->getSectionName(), Data->getType(), 
-                    Data->getFlags(), 4);
-}
-
-ELFSection &ELFWriter::getBSSSection() {
-  const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
-  return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
-}
-
-// getCtorSection - Get the static constructor section
-ELFSection &ELFWriter::getCtorSection() {
-  const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
-  return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); 
-}
-
-// getDtorSection - Get the static destructor section
-ELFSection &ELFWriter::getDtorSection() {
-  const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
-  return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
-}
-
-// getTextSection - Get the text section for the specified function
-ELFSection &ELFWriter::getTextSection(const Function *F) {
-  const MCSectionELF *Text = 
-    (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
-  return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
-}
-
-// getJumpTableSection - Get a read only section for constants when 
-// emitting jump tables. TODO: add PIC support
-ELFSection &ELFWriter::getJumpTableSection() {
-  const MCSectionELF *JT = 
-    (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
-  return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
-                    TM.getTargetData()->getPointerABIAlignment());
-}
-
-// getConstantPoolSection - Get a constant pool section based on the machine 
-// constant pool entry type and relocation info.
-ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
-  SectionKind Kind;
-  switch (CPE.getRelocationInfo()) {
-  default: llvm_unreachable("Unknown section kind");
-  case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
-  case 1:
-    Kind = SectionKind::getReadOnlyWithRelLocal();
-    break;
-  case 0:
-    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
-    case 4:  Kind = SectionKind::getMergeableConst4(); break;
-    case 8:  Kind = SectionKind::getMergeableConst8(); break;
-    case 16: Kind = SectionKind::getMergeableConst16(); break;
-    default: Kind = SectionKind::getMergeableConst(); break;
-    }
-  }
-
-  const MCSectionELF *CPSect = 
-    (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
-  return getSection(CPSect->getSectionName(), CPSect->getType(), 
-                    CPSect->getFlags(), CPE.getAlignment());
-}
-
-// getRelocSection - Return the relocation section of section 'S'. 'RelA' 
-// is true if the relocation section contains entries with addends.
-ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
-  unsigned SectionType = TEW->hasRelocationAddend() ?
-                ELF::SHT_RELA : ELF::SHT_REL;
-
-  std::string SectionName(".rel");
-  if (TEW->hasRelocationAddend())
-    SectionName.append("a");
-  SectionName.append(S.getName());
-
-  return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
-}
-
-// getGlobalELFVisibility - Returns the ELF specific visibility type
-unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
-  switch (GV->getVisibility()) {
-  default:
-    llvm_unreachable("unknown visibility type");
-  case GlobalValue::DefaultVisibility:
-    return ELF::STV_DEFAULT;
-  case GlobalValue::HiddenVisibility:
-    return ELF::STV_HIDDEN;
-  case GlobalValue::ProtectedVisibility:
-    return ELF::STV_PROTECTED;
-  }
-  return 0;
-}
-
-// getGlobalELFBinding - Returns the ELF specific binding type
-unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
-  if (GV->hasInternalLinkage())
-    return ELF::STB_LOCAL;
-
-  if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
-    return ELF::STB_WEAK;
-
-  return ELF::STB_GLOBAL;
-}
-
-// getGlobalELFType - Returns the ELF specific type for a global
-unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
-  if (GV->isDeclaration())
-    return ELF::STT_NOTYPE;
-
-  if (isa<Function>(GV))
-    return ELF::STT_FUNC;
-
-  return ELF::STT_OBJECT;
-}
-
-// IsELFUndefSym - True if the global value must be marked as a symbol
-// which points to a SHN_UNDEF section. This means that the symbol has
-// no definition on the module.
-static bool IsELFUndefSym(const GlobalValue *GV) {
-  return GV->isDeclaration() || (isa<Function>(GV));
-}
-
-// AddToSymbolList - Update the symbol lookup and If the symbol is 
-// private add it to PrivateSyms list, otherwise to SymbolList. 
-void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
-  assert(GblSym->isGlobalValue() && "Symbol must be a global value");
-
-  const GlobalValue *GV = GblSym->getGlobalValue(); 
-  if (GV->hasPrivateLinkage()) {
-    // For a private symbols, keep track of the index inside 
-    // the private list since it will never go to the symbol 
-    // table and won't be patched up later.
-    PrivateSyms.push_back(GblSym);
-    GblSymLookup[GV] = PrivateSyms.size()-1;
-  } else {
-    // Non private symbol are left with zero indices until 
-    // they are patched up during the symbol table emition 
-    // (where the indicies are created).
-    SymbolList.push_back(GblSym);
-    GblSymLookup[GV] = 0;
-  }
-}
-
-/// HasCommonSymbols - True if this section holds common symbols, this is
-/// indicated on the ELF object file by a symbol with SHN_COMMON section
-/// header index.
-static bool HasCommonSymbols(const MCSectionELF &S) {
-  // FIXME: this is wrong, a common symbol can be in .data for example.
-  if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
-    return true;
-
-  return false;
-}
-
-
-// EmitGlobal - Choose the right section for global and emit it
-void ELFWriter::EmitGlobal(const GlobalValue *GV) {
-
-  // Check if the referenced symbol is already emitted
-  if (GblSymLookup.find(GV) != GblSymLookup.end())
-    return;
-
-  // Handle ELF Bind, Visibility and Type for the current symbol
-  unsigned SymBind = getGlobalELFBinding(GV);
-  unsigned SymType = getGlobalELFType(GV);
-  bool IsUndefSym = IsELFUndefSym(GV);
-
-  ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
-    : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
-
-  if (!IsUndefSym) {
-    assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
-    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-
-    // Handle special llvm globals
-    if (EmitSpecialLLVMGlobal(GVar))
-      return;
-
-    // Get the ELF section where this global belongs from TLOF
-    const MCSectionELF *S = 
-      (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
-    ELFSection &ES = 
-      getSection(S->getSectionName(), S->getType(), S->getFlags());
-    SectionKind Kind = S->getKind();
-
-    // The symbol align should update the section alignment if needed
-    const TargetData *TD = TM.getTargetData();
-    unsigned Align = TD->getPreferredAlignment(GVar);
-    unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
-    GblSym->Size = Size;
-
-    if (HasCommonSymbols(*S)) { // Symbol must go to a common section
-      GblSym->SectionIdx = ELF::SHN_COMMON;
-
-      // A new linkonce section is created for each global in the
-      // common section, the default alignment is 1 and the symbol
-      // value contains its alignment.
-      ES.Align = 1;
-      GblSym->Value = Align;
-
-    } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
-      GblSym->SectionIdx = ES.SectionIdx;
-
-      // Update the size with alignment and the next object can
-      // start in the right offset in the section
-      if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
-      ES.Align = std::max(ES.Align, Align);
-
-      // GblSym->Value should contain the virtual offset inside the section.
-      // Virtual because the BSS space is not allocated on ELF objects
-      GblSym->Value = ES.Size;
-      ES.Size += Size;
-
-    } else { // The symbol must go to some kind of data section
-      GblSym->SectionIdx = ES.SectionIdx;
-
-      // GblSym->Value should contain the symbol offset inside the section,
-      // and all symbols should start on their required alignment boundary
-      ES.Align = std::max(ES.Align, Align);
-      ES.emitAlignment(Align);
-      GblSym->Value = ES.size();
-
-      // Emit the global to the data section 'ES'
-      EmitGlobalConstant(GVar->getInitializer(), ES);
-    }
-  }
-
-  AddToSymbolList(GblSym);
-}
-
-void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
-                                         ELFSection &GblS) {
-
-  // Print the fields in successive locations. Pad to align if needed!
-  const TargetData *TD = TM.getTargetData();
-  unsigned Size = TD->getTypeAllocSize(CVS->getType());
-  const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
-  uint64_t sizeSoFar = 0;
-  for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
-    const Constant* field = CVS->getOperand(i);
-
-    // Check if padding is needed and insert one or more 0s.
-    uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
-    uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
-                        - cvsLayout->getElementOffset(i)) - fieldSize;
-    sizeSoFar += fieldSize + padSize;
-
-    // Now print the actual field value.
-    EmitGlobalConstant(field, GblS);
-
-    // Insert padding - this may include padding to increase the size of the
-    // current field up to the ABI size (if the struct is not packed) as well
-    // as padding to ensure that the next field starts at the right offset.
-    GblS.emitZeros(padSize);
-  }
-  assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
-         "Layout of constant struct may be incorrect!");
-}
-
-void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
-  const TargetData *TD = TM.getTargetData();
-  unsigned Size = TD->getTypeAllocSize(CV->getType());
-
-  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
-    for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
-      EmitGlobalConstant(CVA->getOperand(i), GblS);
-    return;
-  } else if (isa<ConstantAggregateZero>(CV)) {
-    GblS.emitZeros(Size);
-    return;
-  } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
-    EmitGlobalConstantStruct(CVS, GblS);
-    return;
-  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-    APInt Val = CFP->getValueAPF().bitcastToAPInt();
-    if (CFP->getType()->isDoubleTy())
-      GblS.emitWord64(Val.getZExtValue());
-    else if (CFP->getType()->isFloatTy())
-      GblS.emitWord32(Val.getZExtValue());
-    else if (CFP->getType()->isX86_FP80Ty()) {
-      unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
-                         TD->getTypeStoreSize(CFP->getType());
-      GblS.emitWordFP80(Val.getRawData(), PadSize);
-    } else if (CFP->getType()->isPPC_FP128Ty())
-      llvm_unreachable("PPC_FP128Ty global emission not implemented");
-    return;
-  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
-    if (Size == 1)
-      GblS.emitByte(CI->getZExtValue());
-    else if (Size == 2) 
-      GblS.emitWord16(CI->getZExtValue());
-    else if (Size == 4)
-      GblS.emitWord32(CI->getZExtValue());
-    else 
-      EmitGlobalConstantLargeInt(CI, GblS);
-    return;
-  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
-    VectorType *PTy = CP->getType();
-    for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
-      EmitGlobalConstant(CP->getOperand(I), GblS);
-    return;
-  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
-    // Resolve a constant expression which returns a (Constant, Offset)
-    // pair. If 'Res.first' is a GlobalValue, emit a relocation with 
-    // the offset 'Res.second', otherwise emit a global constant like
-    // it is always done for not contant expression types.
-    CstExprResTy Res = ResolveConstantExpr(CE);
-    const Constant *Op = Res.first;
-
-    if (isa<GlobalValue>(Op))
-      EmitGlobalDataRelocation(cast<const GlobalValue>(Op), 
-                               TD->getTypeAllocSize(Op->getType()), 
-                               GblS, Res.second);
-    else
-      EmitGlobalConstant(Op, GblS);
-
-    return;
-  } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
-    // Fill the data entry with zeros or emit a relocation entry
-    if (isa<ConstantPointerNull>(CV))
-      GblS.emitZeros(Size);
-    else 
-      EmitGlobalDataRelocation(cast<const GlobalValue>(CV), 
-                               Size, GblS);
-    return;
-  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
-    // This is a constant address for a global variable or function and
-    // therefore must be referenced using a relocation entry.
-    EmitGlobalDataRelocation(GV, Size, GblS);
-    return;
-  }
-
-  std::string msg;
-  raw_string_ostream ErrorMsg(msg);
-  ErrorMsg << "Constant unimp for type: " << *CV->getType();
-  report_fatal_error(ErrorMsg.str());
-}
-
-// ResolveConstantExpr - Resolve the constant expression until it stop
-// yielding other constant expressions.
-CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
-  const TargetData *TD = TM.getTargetData();
-  
-  // There ins't constant expression inside others anymore
-  if (!isa<ConstantExpr>(CV))
-    return std::make_pair(CV, 0);
-
-  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
-  switch (CE->getOpcode()) {
-  case Instruction::BitCast:
-    return ResolveConstantExpr(CE->getOperand(0));
-  
-  case Instruction::GetElementPtr: {
-    const Constant *ptrVal = CE->getOperand(0);
-    SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
-    int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), idxVec);
-    return std::make_pair(ptrVal, Offset);
-  }
-  case Instruction::IntToPtr: {
-    Constant *Op = CE->getOperand(0);
-    Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
-                                      false/*ZExt*/);
-    return ResolveConstantExpr(Op);
-  }
-  case Instruction::PtrToInt: {
-    Constant *Op = CE->getOperand(0);
-    Type *Ty = CE->getType();
-
-    // We can emit the pointer value into this slot if the slot is an
-    // integer slot greater or equal to the size of the pointer.
-    if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
-      return ResolveConstantExpr(Op);
-
-    llvm_unreachable("Integer size less then pointer size");
-  }
-  case Instruction::Add:
-  case Instruction::Sub: {
-    // Only handle cases where there's a constant expression with GlobalValue
-    // as first operand and ConstantInt as second, which are the cases we can
-    // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
-    // 1)  Instruction::Add  => (global) + CstInt
-    // 2)  Instruction::Sub  => (global) + -CstInt
-    const Constant *Op0 = CE->getOperand(0); 
-    const Constant *Op1 = CE->getOperand(1); 
-    assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
-
-    CstExprResTy Res = ResolveConstantExpr(Op0);
-    assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
-
-    const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
-    switch (CE->getOpcode()) {
-    case Instruction::Add: 
-      return std::make_pair(Res.first, RHS.getSExtValue());
-    case Instruction::Sub:
-      return std::make_pair(Res.first, (-RHS).getSExtValue());
-    }
-  }
-  }
-
-  report_fatal_error(CE->getOpcodeName() +
-                     StringRef(": Unsupported ConstantExpr type"));
-
-  return std::make_pair(CV, 0); // silence warning
-}
-
-void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
-                                         ELFSection &GblS, int64_t Offset) {
-  // Create the relocation entry for the global value
-  MachineRelocation MR =
-    MachineRelocation::getGV(GblS.getCurrentPCOffset(),
-                             TEW->getAbsoluteLabelMachineRelTy(),
-                             const_cast<GlobalValue*>(GV),
-                             Offset);
-
-  // Fill the data entry with zeros
-  GblS.emitZeros(Size);
-
-  // Add the relocation entry for the current data section
-  GblS.addRelocation(MR);
-}
-
-void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, 
-                                           ELFSection &S) {
-  const TargetData *TD = TM.getTargetData();
-  unsigned BitWidth = CI->getBitWidth();
-  assert(isPowerOf2_32(BitWidth) &&
-         "Non-power-of-2-sized integers not handled!");
-
-  const uint64_t *RawData = CI->getValue().getRawData();
-  uint64_t Val = 0;
-  for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
-    Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
-    S.emitWord64(Val);
-  }
-}
-
-/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
-/// special global used by LLVM.  If so, emit it and return true, otherwise
-/// do nothing and return false.
-bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
-  if (GV->getName() == "llvm.used")
-    llvm_unreachable("not implemented yet");
-
-  // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
-  if (GV->getSection() == "llvm.metadata" ||
-      GV->hasAvailableExternallyLinkage())
-    return true;
-  
-  if (!GV->hasAppendingLinkage()) return false;
-
-  assert(GV->hasInitializer() && "Not a special LLVM global!");
-  
-  const TargetData *TD = TM.getTargetData();
-  unsigned Align = TD->getPointerPrefAlignment();
-  if (GV->getName() == "llvm.global_ctors") {
-    ELFSection &Ctor = getCtorSection();
-    Ctor.emitAlignment(Align);
-    EmitXXStructorList(GV->getInitializer(), Ctor);
-    return true;
-  } 
-  
-  if (GV->getName() == "llvm.global_dtors") {
-    ELFSection &Dtor = getDtorSection();
-    Dtor.emitAlignment(Align);
-    EmitXXStructorList(GV->getInitializer(), Dtor);
-    return true;
-  }
-  
-  return false;
-}
-
-/// EmitXXStructorList - Emit the ctor or dtor list.  This just emits out the 
-/// function pointers, ignoring the init priority.
-void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) {
-  // Should be an array of '{ i32, void ()* }' structs.  The first value is the
-  // init priority, which we ignore.
-  if (List->isNullValue()) return;
-  const ConstantArray *InitList = cast<ConstantArray>(List);
-  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-    if (InitList->getOperand(i)->isNullValue())
-      continue;
-    ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
-
-    if (CS->getOperand(1)->isNullValue())
-      continue;
-
-    // Emit the function pointer.
-    EmitGlobalConstant(CS->getOperand(1), Xtor);
-  }
-}
-
-bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
-  // Nothing to do here, this is all done through the ElfCE object above.
-  return false;
-}
-
-/// doFinalization - Now that the module has been completely processed, emit
-/// the ELF file to 'O'.
-bool ELFWriter::doFinalization(Module &M) {
-  // Emit .data section placeholder
-  getDataSection();
-
-  // Emit .bss section placeholder
-  getBSSSection();
-
-  // Build and emit data, bss and "common" sections.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    EmitGlobal(I);
-
-  // Emit all pending globals
-  for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
-       I != E; ++I)
-    EmitGlobal(*I);
-
-  // Emit all pending externals
-  for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
-       I != E; ++I)
-    SymbolList.push_back(ELFSym::getExtSym(*I));
-
-  // Emit a symbol for each section created until now, skip null section
-  for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
-    ELFSection &ES = *SectionList[i];
-    ELFSym *SectionSym = ELFSym::getSectionSym();
-    SectionSym->SectionIdx = ES.SectionIdx;
-    SymbolList.push_back(SectionSym);
-    ES.Sym = SymbolList.back();
-  }
-
-  // Emit string table
-  EmitStringTable(M.getModuleIdentifier());
-
-  // Emit the symbol table now, if non-empty.
-  EmitSymbolTable();
-
-  // Emit the relocation sections.
-  EmitRelocations();
-
-  // Emit the sections string table.
-  EmitSectionTableStringTable();
-
-  // Dump the sections and section table to the .o file.
-  OutputSectionsAndSectionTable();
-
-  return false;
-}
-
-// RelocateField - Patch relocatable field with 'Offset' in 'BO'
-// using a 'Value' of known 'Size'
-void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
-                              int64_t Value, unsigned Size) {
-  if (Size == 32)
-    BO.fixWord32(Value, Offset);
-  else if (Size == 64)
-    BO.fixWord64(Value, Offset);
-  else
-    llvm_unreachable("don't know howto patch relocatable field");
-}
-
-/// EmitRelocations - Emit relocations
-void ELFWriter::EmitRelocations() {
-
-  // True if the target uses the relocation entry to hold the addend,
-  // otherwise the addend is written directly to the relocatable field.
-  bool HasRelA = TEW->hasRelocationAddend();
-
-  // Create Relocation sections for each section which needs it.
-  for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
-    ELFSection &S = *SectionList[i];
-
-    // This section does not have relocations
-    if (!S.hasRelocations()) continue;
-    ELFSection &RelSec = getRelocSection(S);
-
-    // 'Link' - Section hdr idx of the associated symbol table
-    // 'Info' - Section hdr idx of the section to which the relocation applies
-    ELFSection &SymTab = getSymbolTableSection();
-    RelSec.Link = SymTab.SectionIdx;
-    RelSec.Info = S.SectionIdx;
-    RelSec.EntSize = TEW->getRelocationEntrySize();
-
-    // Get the relocations from Section
-    std::vector<MachineRelocation> Relos = S.getRelocations();
-    for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
-         MRE = Relos.end(); MRI != MRE; ++MRI) {
-      MachineRelocation &MR = *MRI;
-
-      // Relocatable field offset from the section start
-      unsigned RelOffset = MR.getMachineCodeOffset();
-
-      // Symbol index in the symbol table
-      unsigned SymIdx = 0;
-
-      // Target specific relocation field type and size
-      unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
-      unsigned RelTySize = TEW->getRelocationTySize(RelType);
-      int64_t Addend = 0;
-
-      // There are several machine relocations types, and each one of
-      // them needs a different approach to retrieve the symbol table index.
-      if (MR.isGlobalValue()) {
-        const GlobalValue *G = MR.getGlobalValue();
-        int64_t GlobalOffset = MR.getConstantVal();
-        SymIdx = GblSymLookup[G];
-        if (G->hasPrivateLinkage()) {
-          // If the target uses a section offset in the relocation:
-          // SymIdx + Addend = section sym for global + section offset
-          unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
-          Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
-          SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
-        } else {
-          Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
-        }
-      } else if (MR.isExternalSymbol()) {
-        const char *ExtSym = MR.getExternalSymbol();
-        SymIdx = ExtSymLookup[ExtSym];
-        Addend = TEW->getDefaultAddendForRelTy(RelType);
-      } else {
-        // Get the symbol index for the section symbol
-        unsigned SectionIdx = MR.getConstantVal();
-        SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
-
-        // The symbol offset inside the section
-        int64_t SymOffset = (int64_t)MR.getResultPointer();
-
-        // For pc relative relocations where symbols are defined in the same
-        // section they are referenced, ignore the relocation entry and patch
-        // the relocatable field with the symbol offset directly.
-        if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
-          int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
-          RelocateField(S, RelOffset, Value, RelTySize);
-          continue;
-        }
-
-        Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
-      }
-
-      // The target without addend on the relocation symbol must be
-      // patched in the relocation place itself to contain the addend
-      // otherwise write zeros to make sure there is no garbage there
-      RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
-
-      // Get the relocation entry and emit to the relocation section
-      ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
-      EmitRelocation(RelSec, Rel, HasRelA);
-    }
-  }
-}
-
-/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel'
-void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel,
-                               bool HasRelA) {
-  RelSec.emitWord(Rel.getOffset());
-  RelSec.emitWord(Rel.getInfo(is64Bit));
-  if (HasRelA)
-    RelSec.emitWord(Rel.getAddend());
-}
-
-/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
-void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
-  if (is64Bit) {
-    SymbolTable.emitWord32(Sym.NameIdx);
-    SymbolTable.emitByte(Sym.Info);
-    SymbolTable.emitByte(Sym.Other);
-    SymbolTable.emitWord16(Sym.SectionIdx);
-    SymbolTable.emitWord64(Sym.Value);
-    SymbolTable.emitWord64(Sym.Size);
-  } else {
-    SymbolTable.emitWord32(Sym.NameIdx);
-    SymbolTable.emitWord32(Sym.Value);
-    SymbolTable.emitWord32(Sym.Size);
-    SymbolTable.emitByte(Sym.Info);
-    SymbolTable.emitByte(Sym.Other);
-    SymbolTable.emitWord16(Sym.SectionIdx);
-  }
-}
-
-/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
-/// Section Header Table
-void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
-                                  const ELFSection &SHdr) {
-  SHdrTab.emitWord32(SHdr.NameIdx);
-  SHdrTab.emitWord32(SHdr.Type);
-  if (is64Bit) {
-    SHdrTab.emitWord64(SHdr.Flags);
-    SHdrTab.emitWord(SHdr.Addr);
-    SHdrTab.emitWord(SHdr.Offset);
-    SHdrTab.emitWord64(SHdr.Size);
-    SHdrTab.emitWord32(SHdr.Link);
-    SHdrTab.emitWord32(SHdr.Info);
-    SHdrTab.emitWord64(SHdr.Align);
-    SHdrTab.emitWord64(SHdr.EntSize);
-  } else {
-    SHdrTab.emitWord32(SHdr.Flags);
-    SHdrTab.emitWord(SHdr.Addr);
-    SHdrTab.emitWord(SHdr.Offset);
-    SHdrTab.emitWord32(SHdr.Size);
-    SHdrTab.emitWord32(SHdr.Link);
-    SHdrTab.emitWord32(SHdr.Info);
-    SHdrTab.emitWord32(SHdr.Align);
-    SHdrTab.emitWord32(SHdr.EntSize);
-  }
-}
-
-/// EmitStringTable - If the current symbol table is non-empty, emit the string
-/// table for it
-void ELFWriter::EmitStringTable(const std::string &ModuleName) {
-  if (!SymbolList.size()) return;  // Empty symbol table.
-  ELFSection &StrTab = getStringTableSection();
-
-  // Set the zero'th symbol to a null byte, as required.
-  StrTab.emitByte(0);
-
-  // Walk on the symbol list and write symbol names into the string table.
-  unsigned Index = 1;
-  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
-    ELFSym &Sym = *(*I);
-
-    std::string Name;
-    if (Sym.isGlobalValue()) {
-      SmallString<40> NameStr;
-      Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false);
-      Name.append(NameStr.begin(), NameStr.end());
-    } else if (Sym.isExternalSym())
-      Name.append(Sym.getExternalSymbol());
-    else if (Sym.isFileType())
-      Name.append(ModuleName);
-
-    if (Name.empty()) {
-      Sym.NameIdx = 0;
-    } else {
-      Sym.NameIdx = Index;
-      StrTab.emitString(Name);
-
-      // Keep track of the number of bytes emitted to this section.
-      Index += Name.size()+1;
-    }
-  }
-  assert(Index == StrTab.size());
-  StrTab.Size = Index;
-}
-
-// SortSymbols - On the symbol table local symbols must come before
-// all other symbols with non-local bindings. The return value is
-// the position of the first non local symbol.
-unsigned ELFWriter::SortSymbols() {
-  unsigned FirstNonLocalSymbol;
-  std::vector<ELFSym*> LocalSyms, OtherSyms;
-
-  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
-    if ((*I)->isLocalBind())
-      LocalSyms.push_back(*I);
-    else
-      OtherSyms.push_back(*I);
-  }
-  SymbolList.clear();
-  FirstNonLocalSymbol = LocalSyms.size();
-
-  for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
-    SymbolList.push_back(LocalSyms[i]);
-
-  for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
-    SymbolList.push_back(*I);
-
-  LocalSyms.clear();
-  OtherSyms.clear();
-
-  return FirstNonLocalSymbol;
-}
-
-/// EmitSymbolTable - Emit the symbol table itself.
-void ELFWriter::EmitSymbolTable() {
-  if (!SymbolList.size()) return;  // Empty symbol table.
-
-  // Now that we have emitted the string table and know the offset into the
-  // string table of each symbol, emit the symbol table itself.
-  ELFSection &SymTab = getSymbolTableSection();
-  SymTab.Align = TEW->getPrefELFAlignment();
-
-  // Section Index of .strtab.
-  SymTab.Link = getStringTableSection().SectionIdx;
-
-  // Size of each symtab entry.
-  SymTab.EntSize = TEW->getSymTabEntrySize();
-
-  // Reorder the symbol table with local symbols first!
-  unsigned FirstNonLocalSymbol = SortSymbols();
-
-  // Emit all the symbols to the symbol table.
-  for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
-    ELFSym &Sym = *SymbolList[i];
-
-    // Emit symbol to the symbol table
-    EmitSymbol(SymTab, Sym);
-
-    // Record the symbol table index for each symbol
-    if (Sym.isGlobalValue())
-      GblSymLookup[Sym.getGlobalValue()] = i;
-    else if (Sym.isExternalSym())
-      ExtSymLookup[Sym.getExternalSymbol()] = i;
-
-    // Keep track on the symbol index into the symbol table
-    Sym.SymTabIdx = i;
-  }
-
-  // One greater than the symbol table index of the last local symbol
-  SymTab.Info = FirstNonLocalSymbol;
-  SymTab.Size = SymTab.size();
-}
-
-/// EmitSectionTableStringTable - This method adds and emits a section for the
-/// ELF Section Table string table: the string table that holds all of the
-/// section names.
-void ELFWriter::EmitSectionTableStringTable() {
-  // First step: add the section for the string table to the list of sections:
-  ELFSection &SHStrTab = getSectionHeaderStringTableSection();
-
-  // Now that we know which section number is the .shstrtab section, update the
-  // e_shstrndx entry in the ELF header.
-  ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
-
-  // Set the NameIdx of each section in the string table and emit the bytes for
-  // the string table.
-  unsigned Index = 0;
-
-  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
-    ELFSection &S = *(*I);
-    // Set the index into the table.  Note if we have lots of entries with
-    // common suffixes, we could memoize them here if we cared.
-    S.NameIdx = Index;
-    SHStrTab.emitString(S.getName());
-
-    // Keep track of the number of bytes emitted to this section.
-    Index += S.getName().size()+1;
-  }
-
-  // Set the size of .shstrtab now that we know what it is.
-  assert(Index == SHStrTab.size());
-  SHStrTab.Size = Index;
-}
-
-/// OutputSectionsAndSectionTable - Now that we have constructed the file header
-/// and all of the sections, emit these to the ostream destination and emit the
-/// SectionTable.
-void ELFWriter::OutputSectionsAndSectionTable() {
-  // Pass #1: Compute the file offset for each section.
-  size_t FileOff = ElfHdr.size();   // File header first.
-
-  // Adjust alignment of all section if needed, skip the null section.
-  for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
-    ELFSection &ES = *SectionList[i];
-    if (!ES.size()) {
-      ES.Offset = FileOff;
-      continue;
-    }
-
-    // Update Section size
-    if (!ES.Size)
-      ES.Size = ES.size();
-
-    // Align FileOff to whatever the alignment restrictions of the section are.
-    if (ES.Align)
-      FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
-
-    ES.Offset = FileOff;
-    FileOff += ES.Size;
-  }
-
-  // Align Section Header.
-  unsigned TableAlign = TEW->getPrefELFAlignment();
-  FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
-
-  // Now that we know where all of the sections will be emitted, set the e_shnum
-  // entry in the ELF header.
-  ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);
-
-  // Now that we know the offset in the file of the section table, update the
-  // e_shoff address in the ELF header.
-  ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);
-
-  // Now that we know all of the data in the file header, emit it and all of the
-  // sections!
-  O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
-  FileOff = ElfHdr.size();
-
-  // Section Header Table blob
-  BinaryObject SHdrTable(isLittleEndian, is64Bit);
-
-  // Emit all of sections to the file and build the section header table.
-  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
-    ELFSection &S = *(*I);
-    DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
-                 << ", Size: " << S.Size << ", Offset: " << S.Offset
-                 << ", SectionData Size: " << S.size() << "\n");
-
-    // Align FileOff to whatever the alignment restrictions of the section are.
-    if (S.size()) {
-      if (S.Align)  {
-        for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
-             FileOff != NewFileOff; ++FileOff)
-          O << (char)0xAB;
-      }
-      O.write((char *)&S.getData()[0], S.Size);
-      FileOff += S.Size;
-    }
-
-    EmitSectionHeader(SHdrTable, S);
-  }
-
-  // Align output for the section table.
-  for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
-       FileOff != NewFileOff; ++FileOff)
-    O << (char)0xAB;
-
-  // Emit the section table itself.
-  O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
-}
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
deleted file mode 100644
index 6f7fbace8aba..000000000000
--- a/lib/CodeGen/ELFWriter.h
+++ /dev/null
@@ -1,251 +0,0 @@
-//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ELFWriter class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ELFWRITER_H
-#define ELFWRITER_H
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include <map>
-
-namespace llvm {
-  class BinaryObject;
-  class Constant;
-  class ConstantInt;
-  class ConstantStruct;
-  class ELFCodeEmitter;
-  class ELFRelocation;
-  class ELFSection;
-  struct ELFSym;
-  class GlobalVariable;
-  class JITDebugRegisterer;
-  class Mangler;
-  class MachineCodeEmitter;
-  class MachineConstantPoolEntry;
-  class ObjectCodeEmitter;
-  class MCAsmInfo;
-  class TargetELFWriterInfo;
-  class TargetLoweringObjectFile;
-  class raw_ostream;
-  class SectionKind;
-  class MCContext;
-  class TargetMachine;
-
-  typedef std::vector<ELFSym*>::iterator ELFSymIter;
-  typedef std::vector<ELFSection*>::iterator ELFSectionIter;
-  typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
-  typedef SetVector<const char *>::const_iterator PendingExtsIter;
-  typedef std::pair<const Constant *, int64_t> CstExprResTy;
-
-  /// ELFWriter - This class implements the common target-independent code for
-  /// writing ELF files.  Targets should derive a class from this to
-  /// parameterize the output format.
-  ///
-  class ELFWriter : public MachineFunctionPass {
-    friend class ELFCodeEmitter;
-    friend class JITDebugRegisterer;
-  public:
-    static char ID;
-
-    /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
-    ObjectCodeEmitter *getObjectCodeEmitter() {
-      return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
-    }
-
-    ELFWriter(raw_ostream &O, TargetMachine &TM);
-    ~ELFWriter();
-
-  protected:
-    /// Output stream to send the resultant object file to.
-    raw_ostream &O;
-
-    /// Target machine description.
-    TargetMachine &TM;
-
-    /// Context object for machine code objects.
-    MCContext &OutContext;
-    
-    /// Target Elf Writer description.
-    const TargetELFWriterInfo *TEW;
-
-    /// Mang - The object used to perform name mangling for this module.
-    Mangler *Mang;
-
-    /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
-    /// code for functions to the .o file.
-    ELFCodeEmitter *ElfCE;
-
-    /// TLOF - Target Lowering Object File, provide section names for globals 
-    /// and other object file specific stuff
-    const TargetLoweringObjectFile &TLOF;
-
-    /// MAI - Target Asm Info, provide information about section names for
-    /// globals and other target specific stuff.
-    const MCAsmInfo *MAI;
-
-    //===------------------------------------------------------------------===//
-    // Properties inferred automatically from the target machine.
-    //===------------------------------------------------------------------===//
-
-    /// is64Bit/isLittleEndian - This information is inferred from the target
-    /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
-    bool is64Bit, isLittleEndian;
-
-    /// doInitialization - Emit the file header and all of the global variables
-    /// for the module to the ELF file.
-    bool doInitialization(Module &M);
-    bool runOnMachineFunction(MachineFunction &MF);
-
-    /// doFinalization - Now that the module has been completely processed, emit
-    /// the ELF file to 'O'.
-    bool doFinalization(Module &M);
-
-  private:
-    /// Blob containing the Elf header
-    BinaryObject ElfHdr;
-
-    /// SectionList - This is the list of sections that we have emitted to the
-    /// file. Once the file has been completely built, the section header table
-    /// is constructed from this info.
-    std::vector<ELFSection*> SectionList;
-    unsigned NumSections;   // Always = SectionList.size()
-
-    /// SectionLookup - This is a mapping from section name to section number in
-    /// the SectionList. Used to quickly gather the Section Index from MAI names
-    std::map<std::string, ELFSection*> SectionLookup;
-
-    /// PendingGlobals - Globals not processed as symbols yet.
-    SetVector<const GlobalValue*> PendingGlobals;
-
-    /// GblSymLookup - This is a mapping from global value to a symbol index
-    /// in the symbol table or private symbols list. This is useful since reloc
-    /// symbol references must be quickly mapped to their indices on the lists.
-    std::map<const GlobalValue*, uint32_t> GblSymLookup;
-
-    /// PendingExternals - Externals not processed as symbols yet.
-    SetVector<const char *> PendingExternals;
-
-    /// ExtSymLookup - This is a mapping from externals to a symbol index
-    /// in the symbol table list. This is useful since reloc symbol references
-    /// must be quickly mapped to their symbol table indices.
-    std::map<const char *, uint32_t> ExtSymLookup;
-
-    /// SymbolList - This is the list of symbols emitted to the symbol table.
-    /// When the SymbolList is finally built, local symbols must be placed in
-    /// the beginning while non-locals at the end.
-    std::vector<ELFSym*> SymbolList;
-
-    /// PrivateSyms - Record private symbols, every symbol here must never be
-    /// present in the SymbolList.
-    std::vector<ELFSym*> PrivateSyms;
-
-    /// getSection - Return the section with the specified name, creating a new
-    /// section if one does not already exist.
-    ELFSection &getSection(const std::string &Name, unsigned Type,
-                           unsigned Flags = 0, unsigned Align = 0) {
-      ELFSection *&SN = SectionLookup[Name];
-      if (SN) return *SN;
-
-      SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
-      SN = SectionList.back();
-      SN->SectionIdx = NumSections++;
-      SN->Type = Type;
-      SN->Flags = Flags;
-      SN->Link = ELF::SHN_UNDEF;
-      SN->Align = Align;
-      return *SN;
-    }
-
-    ELFSection &getNonExecStackSection() {
-      return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1);
-    }
-
-    ELFSection &getSymbolTableSection() {
-      return getSection(".symtab", ELF::SHT_SYMTAB, 0);
-    }
-
-    ELFSection &getStringTableSection() {
-      return getSection(".strtab", ELF::SHT_STRTAB, 0, 1);
-    }
-
-    ELFSection &getSectionHeaderStringTableSection() {
-      return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1);
-    }
-
-    ELFSection &getNullSection() {
-      return getSection("", ELF::SHT_NULL, 0);
-    }
-
-    ELFSection &getDataSection();
-    ELFSection &getBSSSection();
-    ELFSection &getCtorSection();
-    ELFSection &getDtorSection();
-    ELFSection &getJumpTableSection();
-    ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
-    ELFSection &getTextSection(const Function *F);
-    ELFSection &getRelocSection(ELFSection &S);
-
-    // Helpers for obtaining ELF specific info.
-    unsigned getGlobalELFBinding(const GlobalValue *GV);
-    unsigned getGlobalELFType(const GlobalValue *GV);
-    unsigned getGlobalELFVisibility(const GlobalValue *GV);
-
-    // AddPendingGlobalSymbol - Add a global to be processed and to
-    // the global symbol lookup, use a zero index because the table
-    // index will be determined later.
-    void AddPendingGlobalSymbol(const GlobalValue *GV, 
-                                bool AddToLookup = false);
-    
-    // AddPendingExternalSymbol - Add the external to be processed
-    // and to the external symbol lookup, use a zero index because
-    // the symbol table index will be determined later.
-    void AddPendingExternalSymbol(const char *External);
-
-    // AddToSymbolList - Update the symbol lookup and If the symbol is 
-    // private add it to PrivateSyms list, otherwise to SymbolList. 
-    void AddToSymbolList(ELFSym *GblSym);
-
-    // As we complete the ELF file, we need to update fields in the ELF header
-    // (e.g. the location of the section table).  These members keep track of
-    // the offset in ELFHeader of these various pieces to update and other
-    // locations in the file.
-    unsigned ELFHdr_e_shoff_Offset;     // e_shoff    in ELF header.
-    unsigned ELFHdr_e_shstrndx_Offset;  // e_shstrndx in ELF header.
-    unsigned ELFHdr_e_shnum_Offset;     // e_shnum    in ELF header.
-
-  private:
-    void EmitGlobal(const GlobalValue *GV);
-    void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
-    void EmitGlobalConstantStruct(const ConstantStruct *CVS,
-                                  ELFSection &GblS);
-    void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
-    void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, 
-                                  ELFSection &GblS, int64_t Offset = 0);
-    bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
-    void EmitXXStructorList(const Constant *List, ELFSection &Xtor);
-    void EmitRelocations();
-    void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
-    void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
-    void EmitSectionTableStringTable();
-    void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
-    void EmitSymbolTable();
-    void EmitStringTable(const std::string &ModuleName);
-    void OutputSectionsAndSectionTable();
-    void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
-                       unsigned Size);
-    unsigned SortSymbols();
-    CstExprResTy ResolveConstantExpr(const Constant *CV);
-  };
-}
-
-#endif
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index a7aba89b87f3..3bb04657b58a 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -77,7 +77,7 @@ void EdgeBundles::view() const {
 /// Specialize WriteGraph, the standard implementation won't work.
 raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
                               bool ShortNames,
-                              const std::string &Title) {
+                              const Twine &Title) {
   const MachineFunction *MF = G.getMachineFunction();
 
   O << "digraph {\n";
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 01dccdb71e4b..a48c5400abcb 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -26,7 +26,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -45,7 +45,7 @@ using namespace llvm;
 /// DomainValue for each register, but it may contain multiple execution
 /// domains. A register value is initially created in a single execution
 /// domain, but if we were forced to pay the penalty of a domain crossing, we
-/// keep track of the fact the the register is now available in multiple
+/// keep track of the fact that the register is now available in multiple
 /// domains.
 namespace {
 struct DomainValue {
@@ -57,8 +57,10 @@ struct DomainValue {
   // domains where the register is available for free.
   unsigned AvailableDomains;
 
-  // Position of the last defining instruction.
-  unsigned Dist;
+  // Pointer to the next DomainValue in a chain.  When two DomainValues are
+  // merged, Victim.Next is set to point to Victor, so old DomainValue
+  // references can be updated by folowing the chain.
+  DomainValue *Next;
 
   // Twiddleable instructions using or defining these registers.
   SmallVector<MachineInstr*, 8> Instrs;
@@ -92,16 +94,33 @@ struct DomainValue {
     return CountTrailingZeros_32(AvailableDomains);
   }
 
-  DomainValue() { clear(); }
+  DomainValue() : Refs(0) { clear(); }
 
+  // Clear this DomainValue and point to next which has all its data.
   void clear() {
-    Refs = AvailableDomains = Dist = 0;
+    AvailableDomains = 0;
+    Next = 0;
     Instrs.clear();
   }
 };
 }
 
 namespace {
+/// LiveReg - Information about a live register.
+struct LiveReg {
+  /// Value currently in this register, or NULL when no value is being tracked.
+  /// This counts as a DomainValue reference.
+  DomainValue *Value;
+
+  /// Instruction that defined this register, relative to the beginning of the
+  /// current basic block.  When a LiveReg is used to represent a live-out
+  /// register, this value is relative to the end of the basic block, so it
+  /// will be a negative number.
+  int Def;
+};
+} // anonynous namespace
+
+namespace {
 class ExeDepsFix : public MachineFunctionPass {
   static char ID;
   SpecificBumpPtrAllocator<DomainValue> Allocator;
@@ -111,13 +130,19 @@ class ExeDepsFix : public MachineFunctionPass {
   MachineFunction *MF;
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
-  MachineBasicBlock *MBB;
   std::vector<int> AliasMap;
   const unsigned NumRegs;
-  DomainValue **LiveRegs;
-  typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+  LiveReg *LiveRegs;
+  typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap;
   LiveOutMap LiveOuts;
-  unsigned Distance;
+
+  /// Current instruction number.
+  /// The first instruction in each basic block is 0.
+  int CurInstr;
+
+  /// True when the current block has a predecessor that hasn't been visited
+  /// yet.
+  bool SeenUnknownBackEdge;
 
 public:
   ExeDepsFix(const TargetRegisterClass *rc)
@@ -131,26 +156,33 @@ public:
   virtual bool runOnMachineFunction(MachineFunction &MF);
 
   virtual const char *getPassName() const {
-    return "SSE execution domain fixup";
+    return "Execution dependency fix";
   }
 
 private:
   // Register mapping.
-  int RegIndex(unsigned Reg);
+  int regIndex(unsigned Reg);
 
   // DomainValue allocation.
-  DomainValue *Alloc(int domain = -1);
-  void Recycle(DomainValue*);
+  DomainValue *alloc(int domain = -1);
+  DomainValue *retain(DomainValue *DV) {
+    if (DV) ++DV->Refs;
+    return DV;
+  }
+  void release(DomainValue*);
+  DomainValue *resolve(DomainValue*&);
 
   // LiveRegs manipulations.
-  void SetLiveReg(int rx, DomainValue *DV);
-  void Kill(int rx);
-  void Force(int rx, unsigned domain);
-  void Collapse(DomainValue *dv, unsigned domain);
-  bool Merge(DomainValue *A, DomainValue *B);
-
-  void enterBasicBlock();
-  void visitGenericInstr(MachineInstr*);
+  void setLiveReg(int rx, DomainValue *DV);
+  void kill(int rx);
+  void force(int rx, unsigned domain);
+  void collapse(DomainValue *dv, unsigned domain);
+  bool merge(DomainValue *A, DomainValue *B);
+
+  void enterBasicBlock(MachineBasicBlock*);
+  void leaveBasicBlock(MachineBasicBlock*);
+  void visitInstr(MachineInstr*);
+  void processDefs(MachineInstr*, bool Kill);
   void visitSoftInstr(MachineInstr*, unsigned mask);
   void visitHardInstr(MachineInstr*, unsigned domain);
 };
@@ -160,83 +192,108 @@ char ExeDepsFix::ID = 0;
 
 /// Translate TRI register number to an index into our smaller tables of
 /// interesting registers. Return -1 for boring registers.
-int ExeDepsFix::RegIndex(unsigned Reg) {
+int ExeDepsFix::regIndex(unsigned Reg) {
   assert(Reg < AliasMap.size() && "Invalid register");
   return AliasMap[Reg];
 }
 
-DomainValue *ExeDepsFix::Alloc(int domain) {
+DomainValue *ExeDepsFix::alloc(int domain) {
   DomainValue *dv = Avail.empty() ?
                       new(Allocator.Allocate()) DomainValue :
                       Avail.pop_back_val();
-  dv->Dist = Distance;
   if (domain >= 0)
     dv->addDomain(domain);
+  assert(dv->Refs == 0 && "Reference count wasn't cleared");
+  assert(!dv->Next && "Chained DomainValue shouldn't have been recycled");
   return dv;
 }
 
-void ExeDepsFix::Recycle(DomainValue *dv) {
-  assert(dv && "Cannot recycle NULL");
-  dv->clear();
-  Avail.push_back(dv);
+/// release - Release a reference to DV.  When the last reference is released,
+/// collapse if needed.
+void ExeDepsFix::release(DomainValue *DV) {
+  while (DV) {
+    assert(DV->Refs && "Bad DomainValue");
+    if (--DV->Refs)
+      return;
+
+    // There are no more DV references. Collapse any contained instructions.
+    if (DV->AvailableDomains && !DV->isCollapsed())
+      collapse(DV, DV->getFirstDomain());
+
+    DomainValue *Next = DV->Next;
+    DV->clear();
+    Avail.push_back(DV);
+    // Also release the next DomainValue in the chain.
+    DV = Next;
+  }
+}
+
+/// resolve - Follow the chain of dead DomainValues until a live DomainValue is
+/// reached.  Update the referenced pointer when necessary.
+DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) {
+  DomainValue *DV = DVRef;
+  if (!DV || !DV->Next)
+    return DV;
+
+  // DV has a chain. Find the end.
+  do DV = DV->Next;
+  while (DV->Next);
+
+  // Update DVRef to point to DV.
+  retain(DV);
+  release(DVRef);
+  DVRef = DV;
+  return DV;
 }
 
 /// Set LiveRegs[rx] = dv, updating reference counts.
-void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) {
+void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) {
   assert(unsigned(rx) < NumRegs && "Invalid index");
-  if (!LiveRegs) {
-    LiveRegs = new DomainValue*[NumRegs];
-    std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
-  }
+  assert(LiveRegs && "Must enter basic block first.");
 
-  if (LiveRegs[rx] == dv)
+  if (LiveRegs[rx].Value == dv)
     return;
-  if (LiveRegs[rx]) {
-    assert(LiveRegs[rx]->Refs && "Bad refcount");
-    if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
-  }
-  LiveRegs[rx] = dv;
-  if (dv) ++dv->Refs;
+  if (LiveRegs[rx].Value)
+    release(LiveRegs[rx].Value);
+  LiveRegs[rx].Value = retain(dv);
 }
 
 // Kill register rx, recycle or collapse any DomainValue.
-void ExeDepsFix::Kill(int rx) {
+void ExeDepsFix::kill(int rx) {
   assert(unsigned(rx) < NumRegs && "Invalid index");
-  if (!LiveRegs || !LiveRegs[rx]) return;
-
-  // Before killing the last reference to an open DomainValue, collapse it to
-  // the first available domain.
-  if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
-    Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
-  else
-    SetLiveReg(rx, 0);
+  assert(LiveRegs && "Must enter basic block first.");
+  if (!LiveRegs[rx].Value)
+    return;
+
+  release(LiveRegs[rx].Value);
+  LiveRegs[rx].Value = 0;
 }
 
 /// Force register rx into domain.
-void ExeDepsFix::Force(int rx, unsigned domain) {
+void ExeDepsFix::force(int rx, unsigned domain) {
   assert(unsigned(rx) < NumRegs && "Invalid index");
-  DomainValue *dv;
-  if (LiveRegs && (dv = LiveRegs[rx])) {
+  assert(LiveRegs && "Must enter basic block first.");
+  if (DomainValue *dv = LiveRegs[rx].Value) {
     if (dv->isCollapsed())
       dv->addDomain(domain);
     else if (dv->hasDomain(domain))
-      Collapse(dv, domain);
+      collapse(dv, domain);
     else {
       // This is an incompatible open DomainValue. Collapse it to whatever and
       // force the new value into domain. This costs a domain crossing.
-      Collapse(dv, dv->getFirstDomain());
-      assert(LiveRegs[rx] && "Not live after collapse?");
-      LiveRegs[rx]->addDomain(domain);
+      collapse(dv, dv->getFirstDomain());
+      assert(LiveRegs[rx].Value && "Not live after collapse?");
+      LiveRegs[rx].Value->addDomain(domain);
     }
   } else {
     // Set up basic collapsed DomainValue.
-    SetLiveReg(rx, Alloc(domain));
+    setLiveReg(rx, alloc(domain));
   }
 }
 
 /// Collapse open DomainValue into given domain. If there are multiple
 /// registers using dv, they each get a unique collapsed DomainValue.
-void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
+void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) {
   assert(dv->hasDomain(domain) && "Cannot collapse");
 
   // Collapse all the instructions.
@@ -247,13 +304,13 @@ void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) {
   // If there are multiple users, give them new, unique DomainValues.
   if (LiveRegs && dv->Refs > 1)
     for (unsigned rx = 0; rx != NumRegs; ++rx)
-      if (LiveRegs[rx] == dv)
-        SetLiveReg(rx, Alloc(domain));
+      if (LiveRegs[rx].Value == dv)
+        setLiveReg(rx, alloc(domain));
 }
 
 /// Merge - All instructions and registers in B are moved to A, and B is
 /// released.
-bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
+bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
   assert(!A->isCollapsed() && "Cannot merge into collapsed");
   assert(!B->isCollapsed() && "Cannot merge from collapsed");
   if (A == B)
@@ -263,47 +320,188 @@ bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) {
   if (!common)
     return false;
   A->AvailableDomains = common;
-  A->Dist = std::max(A->Dist, B->Dist);
   A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+
+  // Clear the old DomainValue so we won't try to swizzle instructions twice.
+  B->clear();
+  // All uses of B are referred to A.
+  B->Next = retain(A);
+
   for (unsigned rx = 0; rx != NumRegs; ++rx)
-    if (LiveRegs[rx] == B)
-      SetLiveReg(rx, A);
+    if (LiveRegs[rx].Value == B)
+      setLiveReg(rx, A);
   return true;
 }
 
-void ExeDepsFix::enterBasicBlock() {
-  // Try to coalesce live-out registers from predecessors.
-  for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
+void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
+  // Detect back-edges from predecessors we haven't processed yet.
+  SeenUnknownBackEdge = false;
+
+  // Reset instruction counter in each basic block.
+  CurInstr = 0;
+
+  // Set up LiveRegs to represent registers entering MBB.
+  if (!LiveRegs)
+    LiveRegs = new LiveReg[NumRegs];
+
+  // Default values are 'nothing happened a long time ago'.
+  for (unsigned rx = 0; rx != NumRegs; ++rx) {
+    LiveRegs[rx].Value = 0;
+    LiveRegs[rx].Def = -(1 << 20);
+  }
+
+  // This is the entry block.
+  if (MBB->pred_empty()) {
+    for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
          e = MBB->livein_end(); i != e; ++i) {
-    int rx = RegIndex(*i);
-    if (rx < 0) continue;
-    for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
-           pe = MBB->pred_end(); pi != pe; ++pi) {
-      LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
-      if (fi == LiveOuts.end()) continue;
-      DomainValue *pdv = fi->second[rx];
-      if (!pdv) continue;
-      if (!LiveRegs || !LiveRegs[rx]) {
-        SetLiveReg(rx, pdv);
+      int rx = regIndex(*i);
+      if (rx < 0)
+        continue;
+      // Treat function live-ins as if they were defined just before the first
+      // instruction.  Usually, function arguments are set up immediately
+      // before the call.
+      LiveRegs[rx].Def = -1;
+    }
+    DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n");
+    return;
+  }
+
+  // Try to coalesce live-out registers from predecessors.
+  for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+       pe = MBB->pred_end(); pi != pe; ++pi) {
+    LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+    if (fi == LiveOuts.end()) {
+      SeenUnknownBackEdge = true;
+      continue;
+    }
+    assert(fi->second && "Can't have NULL entries");
+
+    for (unsigned rx = 0; rx != NumRegs; ++rx) {
+      // Use the most recent predecessor def for each register.
+      LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def);
+
+      DomainValue *pdv = resolve(fi->second[rx].Value);
+      if (!pdv)
+        continue;
+      if (!LiveRegs[rx].Value) {
+        setLiveReg(rx, pdv);
         continue;
       }
 
       // We have a live DomainValue from more than one predecessor.
-      if (LiveRegs[rx]->isCollapsed()) {
+      if (LiveRegs[rx].Value->isCollapsed()) {
         // We are already collapsed, but predecessor is not. Force him.
-        unsigned domain = LiveRegs[rx]->getFirstDomain();
-        if (!pdv->isCollapsed() && pdv->hasDomain(domain))
-          Collapse(pdv, domain);
+        unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
+        if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
+          collapse(pdv, Domain);
         continue;
       }
 
       // Currently open, merge in predecessor.
       if (!pdv->isCollapsed())
-        Merge(LiveRegs[rx], pdv);
+        merge(LiveRegs[rx].Value, pdv);
       else
-        Force(rx, pdv->getFirstDomain());
+        force(rx, pdv->getFirstDomain());
+    }
+  }
+  DEBUG(dbgs() << "BB#" << MBB->getNumber()
+        << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n"));
+}
+
+void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
+  assert(LiveRegs && "Must enter basic block first.");
+  // Save live registers at end of MBB - used by enterBasicBlock().
+  // Also use LiveOuts as a visited set to detect back-edges.
+  bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second;
+
+  if (First) {
+    // LiveRegs was inserted in LiveOuts.  Adjust all defs to be relative to
+    // the end of this block instead of the beginning.
+    for (unsigned i = 0, e = NumRegs; i != e; ++i)
+      LiveRegs[i].Def -= CurInstr;
+  } else {
+    // Insertion failed, this must be the second pass.
+    // Release all the DomainValues instead of keeping them.
+    for (unsigned i = 0, e = NumRegs; i != e; ++i)
+      release(LiveRegs[i].Value);
+    delete[] LiveRegs;
+  }
+  LiveRegs = 0;
+}
+
+void ExeDepsFix::visitInstr(MachineInstr *MI) {
+  if (MI->isDebugValue())
+    return;
+
+  // Update instructions with explicit execution domains.
+  std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI);
+  if (DomP.first) {
+    if (DomP.second)
+      visitSoftInstr(MI, DomP.second);
+    else
+      visitHardInstr(MI, DomP.first);
+  }
+
+  // Process defs to track register ages, and kill values clobbered by generic
+  // instructions.
+  processDefs(MI, !DomP.first);
+}
+
+// Update def-ages for registers defined by MI.
+// If Kill is set, also kill off DomainValues clobbered by the defs.
+void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
+  assert(!MI->isDebugValue() && "Won't process debug values");
+  const MCInstrDesc &MCID = MI->getDesc();
+  for (unsigned i = 0,
+         e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
+         i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    if (MO.isImplicit())
+      break;
+    if (MO.isUse())
+      continue;
+    int rx = regIndex(MO.getReg());
+    if (rx < 0)
+      continue;
+
+    // This instruction explicitly defines rx.
+    DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr
+                 << '\t' << *MI);
+
+    // How many instructions since rx was last written?
+    unsigned Clearance = CurInstr - LiveRegs[rx].Def;
+    LiveRegs[rx].Def = CurInstr;
+
+    // Kill off domains redefined by generic instructions.
+    if (Kill)
+      kill(rx);
+
+    // Verify clearance before partial register updates.
+    unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI);
+    if (!Pref)
+      continue;
+    DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
+    if (Pref > Clearance) {
+      DEBUG(dbgs() << ": Break dependency.\n");
+      TII->breakPartialRegDependency(MI, i, TRI);
+      continue;
+    }
+
+    // The current clearance seems OK, but we may be ignoring a def from a
+    // back-edge.
+    if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) {
+      DEBUG(dbgs() << ": OK.\n");
+      continue;
     }
+
+    // A def from an unprocessed back-edge may make us break this dependency.
+    DEBUG(dbgs() << ": Wait for back-edge to resolve.\n");
   }
+
+  ++CurInstr;
 }
 
 // A hard instruction only works in one domain. All input registers will be
@@ -314,19 +512,19 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) {
                 e = mi->getDesc().getNumOperands(); i != e; ++i) {
     MachineOperand &mo = mi->getOperand(i);
     if (!mo.isReg()) continue;
-    int rx = RegIndex(mo.getReg());
+    int rx = regIndex(mo.getReg());
     if (rx < 0) continue;
-    Force(rx, domain);
+    force(rx, domain);
   }
 
   // Kill all defs and force them.
   for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
     MachineOperand &mo = mi->getOperand(i);
     if (!mo.isReg()) continue;
-    int rx = RegIndex(mo.getReg());
+    int rx = regIndex(mo.getReg());
     if (rx < 0) continue;
-    Kill(rx);
-    Force(rx, domain);
+    kill(rx);
+    force(rx, domain);
   }
 }
 
@@ -343,9 +541,9 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
                   e = mi->getDesc().getNumOperands(); i != e; ++i) {
       MachineOperand &mo = mi->getOperand(i);
       if (!mo.isReg()) continue;
-      int rx = RegIndex(mo.getReg());
+      int rx = regIndex(mo.getReg());
       if (rx < 0) continue;
-      if (DomainValue *dv = LiveRegs[rx]) {
+      if (DomainValue *dv = LiveRegs[rx].Value) {
         // Bitmask of domains that dv and available have in common.
         unsigned common = dv->getCommonDomains(available);
         // Is it possible to use this collapsed register for free?
@@ -360,7 +558,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
         else
           // Open DomainValue is not compatible with instruction. It is useless
           // now.
-          Kill(rx);
+          kill(rx);
       }
     }
 
@@ -374,94 +572,89 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
 
   // Kill off any remaining uses that don't match available, and build a list of
   // incoming DomainValues that we want to merge.
-  SmallVector<DomainValue*,4> doms;
+  SmallVector<LiveReg, 4> Regs;
   for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
     int rx = *i;
-    DomainValue *dv = LiveRegs[rx];
+    const LiveReg &LR = LiveRegs[rx];
     // This useless DomainValue could have been missed above.
-    if (!dv->getCommonDomains(available)) {
-      Kill(*i);
+    if (!LR.Value->getCommonDomains(available)) {
+      kill(rx);
       continue;
     }
-    // sorted, uniqued insert.
-    bool inserted = false;
-    for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
-           i != e && !inserted; ++i) {
-      if (dv == *i)
-        inserted = true;
-      else if (dv->Dist < (*i)->Dist) {
-        inserted = true;
-        doms.insert(i, dv);
+    // Sorted insertion.
+    bool Inserted = false;
+    for (SmallVector<LiveReg, 4>::iterator i = Regs.begin(), e = Regs.end();
+           i != e && !Inserted; ++i) {
+      if (LR.Def < i->Def) {
+        Inserted = true;
+        Regs.insert(i, LR);
       }
     }
-    if (!inserted)
-      doms.push_back(dv);
+    if (!Inserted)
+      Regs.push_back(LR);
   }
 
   // doms are now sorted in order of appearance. Try to merge them all, giving
   // priority to the latest ones.
   DomainValue *dv = 0;
-  while (!doms.empty()) {
+  while (!Regs.empty()) {
     if (!dv) {
-      dv = doms.pop_back_val();
+      dv = Regs.pop_back_val().Value;
+      // Force the first dv to match the current instruction.
+      dv->AvailableDomains = dv->getCommonDomains(available);
+      assert(dv->AvailableDomains && "Domain should have been filtered");
       continue;
     }
 
-    DomainValue *latest = doms.pop_back_val();
-    if (Merge(dv, latest)) continue;
+    DomainValue *Latest = Regs.pop_back_val().Value;
+    // Skip already merged values.
+    if (Latest == dv || Latest->Next)
+      continue;
+    if (merge(dv, Latest))
+      continue;
 
     // If latest didn't merge, it is useless now. Kill all registers using it.
     for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
-      if (LiveRegs[*i] == latest)
-        Kill(*i);
+      if (LiveRegs[*i].Value == Latest)
+        kill(*i);
   }
 
   // dv is the DomainValue we are going to use for this instruction.
-  if (!dv)
-    dv = Alloc();
-  dv->Dist = Distance;
-  dv->AvailableDomains = available;
+  if (!dv) {
+    dv = alloc();
+    dv->AvailableDomains = available;
+  }
   dv->Instrs.push_back(mi);
 
   // Finally set all defs and non-collapsed uses to dv.
   for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
     MachineOperand &mo = mi->getOperand(i);
     if (!mo.isReg()) continue;
-    int rx = RegIndex(mo.getReg());
+    int rx = regIndex(mo.getReg());
     if (rx < 0) continue;
-    if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
-      Kill(rx);
-      SetLiveReg(rx, dv);
+    if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) {
+      kill(rx);
+      setLiveReg(rx, dv);
     }
   }
 }
 
-void ExeDepsFix::visitGenericInstr(MachineInstr *mi) {
-  // Process explicit defs, kill any relevant registers redefined.
-  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
-    MachineOperand &mo = mi->getOperand(i);
-    if (!mo.isReg()) continue;
-    int rx = RegIndex(mo.getReg());
-    if (rx < 0) continue;
-    Kill(rx);
-  }
-}
-
 bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   TII = MF->getTarget().getInstrInfo();
   TRI = MF->getTarget().getRegisterInfo();
-  MBB = 0;
   LiveRegs = 0;
-  Distance = 0;
   assert(NumRegs == RC->getNumRegs() && "Bad regclass");
 
+  DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
+               << RC->getName() << " **********\n");
+
   // If no relevant registers are used in the function, we can skip it
   // completely.
   bool anyregs = false;
   for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
        I != E; ++I)
-    if (MF->getRegInfo().isPhysRegUsed(*I)) {
+    if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) {
       anyregs = true;
       break;
     }
@@ -473,43 +666,48 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
     // or -1.
     AliasMap.resize(TRI->getNumRegs(), -1);
     for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i)
-      for (const unsigned *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
+      for (const uint16_t *AI = TRI->getOverlaps(RC->getRegister(i)); *AI; ++AI)
         AliasMap[*AI] = i;
   }
 
   MachineBasicBlock *Entry = MF->begin();
-  SmallPtrSet<MachineBasicBlock*, 16> Visited;
-  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
-         DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
-         DFI != DFE; ++DFI) {
-    MBB = *DFI;
-    enterBasicBlock();
+  ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
+  SmallVector<MachineBasicBlock*, 16> Loops;
+  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+    MachineBasicBlock *MBB = *MBBI;
+    enterBasicBlock(MBB);
+    if (SeenUnknownBackEdge)
+      Loops.push_back(MBB);
     for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-        ++I) {
-      MachineInstr *mi = I;
-      if (mi->isDebugValue()) continue;
-      ++Distance;
-      std::pair<uint16_t, uint16_t> domp = TII->getExecutionDomain(mi);
-      if (domp.first)
-        if (domp.second)
-          visitSoftInstr(mi, domp.second);
-        else
-          visitHardInstr(mi, domp.first);
-      else if (LiveRegs)
-        visitGenericInstr(mi);
-    }
+        ++I)
+      visitInstr(I);
+    leaveBasicBlock(MBB);
+  }
 
-    // Save live registers at end of MBB - used by enterBasicBlock().
-    if (LiveRegs)
-      LiveOuts.insert(std::make_pair(MBB, LiveRegs));
-    LiveRegs = 0;
+  // Visit all the loop blocks again in order to merge DomainValues from
+  // back-edges.
+  for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = Loops[i];
+    enterBasicBlock(MBB);
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+        ++I)
+      if (!I->isDebugValue())
+        processDefs(I, false);
+    leaveBasicBlock(MBB);
   }
 
-  // Clear the LiveOuts vectors. Should we also collapse any remaining
-  // DomainValues?
-  for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
-         i != e; ++i)
-    delete[] i->second;
+  // Clear the LiveOuts vectors and collapse any remaining DomainValues.
+  for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
+         MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
+    LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
+    if (FI == LiveOuts.end() || !FI->second)
+      continue;
+    for (unsigned i = 0, e = NumRegs; i != e; ++i)
+      if (FI->second[i].Value)
+        release(FI->second[i].Value);
+    delete[] FI->second;
+  }
   LiveOuts.clear();
   Avail.clear();
   Allocator.DestroyAll();
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index a67140ece4a5..2c4a93543cc3 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -32,10 +32,6 @@ namespace {
   private:
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
-    const char *getPassName() const {
-      return "Expand ISel Pseudo-instructions";
-    }
-
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
@@ -43,12 +39,9 @@ namespace {
 } // end anonymous namespace
 
 char ExpandISelPseudos::ID = 0;
+char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
 INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
-                "Expand CodeGen Pseudo-instructions", false, false)
-
-FunctionPass *llvm::createExpandISelPseudosPass() {
-  return new ExpandISelPseudos();
-}
+                "Expand ISel Pseudo-instructions", false, false)
 
 bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
@@ -62,8 +55,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr *MI = MBBI++;
 
       // If MI is a pseudo, expand it.
-      const MCInstrDesc &MCID = MI->getDesc();
-      if (MCID.usesCustomInsertionHook()) {
+      if (MI->usesCustomInsertionHook()) {
         Changed = true;
         MachineBasicBlock *NewMBB =
           TLI->EmitInstrWithCustomInserter(MI, MBB);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index e2a14a8dfd97..b14afc286d49 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -36,10 +36,6 @@ public:
   static char ID; // Pass identification, replacement for typeid
   ExpandPostRA() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const {
-    return "Post-RA pseudo instruction expansion pass";
-  }
-
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.setPreservesCFG();
     AU.addPreservedID(MachineLoopInfoID);
@@ -61,10 +57,10 @@ private:
 } // end anonymous namespace
 
 char ExpandPostRA::ID = 0;
+char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
 
-FunctionPass *llvm::createExpandPostRAPseudosPass() {
-  return new ExpandPostRA();
-}
+INITIALIZE_PASS(ExpandPostRA, "postrapseudos",
+                "Post-RA pseudo instruction expansion pass", false, false)
 
 /// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
 /// and the lowered replacement instructions immediately precede it.
@@ -207,7 +203,7 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
       ++mi;
 
       // Only expand pseudos.
-      if (!MI->getDesc().isPseudo())
+      if (!MI->isPseudo())
         continue;
 
       // Give targets a chance to expand even standard pseudos.
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index d757cf409d50..1caf8c233976 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -143,12 +143,12 @@ void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
 
 static const char *DescKind(GC::PointKind Kind) {
   switch (Kind) {
-    default: llvm_unreachable("Unknown GC point kind");
     case GC::Loop:     return "loop";
     case GC::Return:   return "return";
     case GC::PreCall:  return "pre-call";
     case GC::PostCall: return "post-call";
   }
+  llvm_unreachable("Invalid point kind");
 }
 
 bool Printer::runOnFunction(Function &F) {
@@ -156,12 +156,12 @@ bool Printer::runOnFunction(Function &F) {
   
   GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
   
-  OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+  OS << "GC roots for " << FD->getFunction().getName() << ":\n";
   for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
                                       RE = FD->roots_end(); RI != RE; ++RI)
     OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
   
-  OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+  OS << "GC safe points for " << FD->getFunction().getName() << ":\n";
   for (GCFunctionInfo::iterator PI = FD->begin(),
                                 PE = FD->end(); PI != PE; ++PI) {
     
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 766c6ee542a9..506b5cf09457 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -10,8 +10,8 @@
 // This file implements target- and collector-independent garbage collection
 // infrastructure.
 //
-// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
-// are identified in SelectionDAGISel.
+// GCMachineCodeAnalysis identifies the GC safe points in the machine code.
+// Roots are identified in SelectionDAGISel.
 //
 //===----------------------------------------------------------------------===//
 
@@ -35,9 +35,9 @@
 using namespace llvm;
 
 namespace {
-  
+
   /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
-  /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as 
+  /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as
   /// directed by the GCStrategy. It also performs automatic root initialization
   /// and custom intrinsic lowering.
   class LowerIntrinsics : public FunctionPass {
@@ -47,47 +47,46 @@ namespace {
     bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
     static bool InsertRootInitializers(Function &F,
                                        AllocaInst **Roots, unsigned Count);
-    
+
   public:
     static char ID;
-    
+
     LowerIntrinsics();
     const char *getPassName() const;
     void getAnalysisUsage(AnalysisUsage &AU) const;
-    
+
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
   };
-  
-  
-  /// MachineCodeAnalysis - This is a target-independent pass over the machine 
+
+
+  /// GCMachineCodeAnalysis - This is a target-independent pass over the machine
   /// function representation to identify safe points for the garbage collector
   /// in the machine code. It inserts labels at safe points and populates a
   /// GCMetadata record for each function.
-  class MachineCodeAnalysis : public MachineFunctionPass {
+  class GCMachineCodeAnalysis : public MachineFunctionPass {
     const TargetMachine *TM;
     GCFunctionInfo *FI;
     MachineModuleInfo *MMI;
     const TargetInstrInfo *TII;
-    
+
     void FindSafePoints(MachineFunction &MF);
     void VisitCallPoint(MachineBasicBlock::iterator MI);
-    MCSymbol *InsertLabel(MachineBasicBlock &MBB, 
+    MCSymbol *InsertLabel(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI,
                           DebugLoc DL) const;
-    
+
     void FindStackOffsets(MachineFunction &MF);
-    
+
   public:
     static char ID;
-    
-    MachineCodeAnalysis();
-    const char *getPassName() const;
+
+    GCMachineCodeAnalysis();
     void getAnalysisUsage(AnalysisUsage &AU) const;
-    
+
     bool runOnMachineFunction(MachineFunction &MF);
   };
-  
+
 }
 
 // -----------------------------------------------------------------------------
@@ -97,6 +96,7 @@ GCStrategy::GCStrategy() :
   CustomReadBarriers(false),
   CustomWriteBarriers(false),
   CustomRoots(false),
+  CustomSafePoints(false),
   InitRoots(true),
   UsesMetadata(false)
 {}
@@ -104,18 +104,24 @@ GCStrategy::GCStrategy() :
 GCStrategy::~GCStrategy() {
   for (iterator I = begin(), E = end(); I != E; ++I)
     delete *I;
-  
+
   Functions.clear();
 }
- 
+
 bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
- 
+
 bool GCStrategy::performCustomLowering(Function &F) {
   dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+  llvm_unreachable("must override performCustomLowering");
+}
+
+
+bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) {
+  dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n";
   llvm_unreachable(0);
-  return 0;
 }
 
+
 GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
   GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
   Functions.push_back(FI);
@@ -132,7 +138,7 @@ INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
 FunctionPass *llvm::createGCLoweringPass() {
   return new LowerIntrinsics();
 }
- 
+
 char LowerIntrinsics::ID = 0;
 
 LowerIntrinsics::LowerIntrinsics()
@@ -143,7 +149,7 @@ LowerIntrinsics::LowerIntrinsics()
 const char *LowerIntrinsics::getPassName() const {
   return "Lower Garbage Collection Instructions";
 }
-    
+
 void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
   FunctionPass::getAnalysisUsage(AU);
   AU.addRequired<GCModuleInfo>();
@@ -161,22 +167,22 @@ bool LowerIntrinsics::doInitialization(Module &M) {
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     if (!I->isDeclaration() && I->hasGC())
       MI->getFunctionInfo(*I); // Instantiate the GC strategy.
-  
+
   bool MadeChange = false;
   for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
     if (NeedsCustomLoweringPass(**I))
       if ((*I)->initializeCustomLowering(M))
         MadeChange = true;
-  
+
   return MadeChange;
 }
 
-bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, 
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
                                                           unsigned Count) {
   // Scroll past alloca instructions.
   BasicBlock::iterator IP = F.getEntryBlock().begin();
   while (isa<AllocaInst>(IP)) ++IP;
-  
+
   // Search for initializers in the initial BB.
   SmallPtrSet<AllocaInst*,16> InitedRoots;
   for (; !CouldBecomeSafePoint(IP); ++IP)
@@ -184,10 +190,10 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
       if (AllocaInst *AI =
           dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
         InitedRoots.insert(AI);
-  
+
   // Add root initializers.
   bool MadeChange = false;
-  
+
   for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
     if (!InitedRoots.count(*I)) {
       StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
@@ -196,7 +202,7 @@ bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots,
       SI->insertAfter(*I);
       MadeChange = true;
     }
-  
+
   return MadeChange;
 }
 
@@ -220,26 +226,26 @@ bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
 bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
   // The natural definition of instructions which could introduce safe points
   // are:
-  // 
+  //
   //   - call, invoke (AfterCall, BeforeCall)
   //   - phis (Loops)
   //   - invoke, ret, unwind (Exit)
-  // 
+  //
   // However, instructions as seemingly inoccuous as arithmetic can become
   // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
   // it is necessary to take a conservative approach.
-  
+
   if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
       isa<StoreInst>(I) || isa<LoadInst>(I))
     return false;
-  
+
   // llvm.gcroot is safe because it doesn't do anything at runtime.
   if (CallInst *CI = dyn_cast<CallInst>(I))
     if (Function *F = CI->getCalledFunction())
       if (unsigned IID = F->getIntrinsicID())
         if (IID == Intrinsic::gcroot)
           return false;
-  
+
   return true;
 }
 
@@ -249,15 +255,15 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
   // Quick exit for functions that do not use GC.
   if (!F.hasGC())
     return false;
-  
+
   GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
   GCStrategy &S = FI.getStrategy();
-  
+
   bool MadeChange = false;
-  
+
   if (NeedsDefaultLoweringPass(S))
     MadeChange |= PerformDefaultLowering(F, S);
-  
+
   bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
   if (UseCustomLoweringPass)
     MadeChange |= S.performCustomLowering(F);
@@ -275,9 +281,9 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
   bool LowerWr = !S.customWriteBarrier();
   bool LowerRd = !S.customReadBarrier();
   bool InitRoots = S.initializeRoots();
-  
+
   SmallVector<AllocaInst*, 32> Roots;
-  
+
   bool MadeChange = false;
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
@@ -313,104 +319,104 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
         default:
           continue;
         }
-        
+
         MadeChange = true;
       }
     }
   }
-  
+
   if (Roots.size())
     MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
-  
+
   return MadeChange;
 }
 
 // -----------------------------------------------------------------------------
 
-FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
-  return new MachineCodeAnalysis();
-}
+char GCMachineCodeAnalysis::ID = 0;
+char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID;
 
-char MachineCodeAnalysis::ID = 0;
+INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis",
+                "Analyze Machine Code For Garbage Collection", false, false)
 
-MachineCodeAnalysis::MachineCodeAnalysis()
+GCMachineCodeAnalysis::GCMachineCodeAnalysis()
   : MachineFunctionPass(ID) {}
 
-const char *MachineCodeAnalysis::getPassName() const {
-  return "Analyze Machine Code For Garbage Collection";
-}
-
-void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
   AU.setPreservesAll();
   AU.addRequired<MachineModuleInfo>();
   AU.addRequired<GCModuleInfo>();
 }
 
-MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
-                                           MachineBasicBlock::iterator MI,
-                                           DebugLoc DL) const {
+MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MI,
+                                             DebugLoc DL) const {
   MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
   BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
   return Label;
 }
 
-void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
   // Find the return address (next instruction), too, so as to bracket the call
   // instruction.
-  MachineBasicBlock::iterator RAI = CI; 
-  ++RAI;                                
-  
+  MachineBasicBlock::iterator RAI = CI;
+  ++RAI;
+
   if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
     MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
     FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
   }
-  
+
   if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
     MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
     FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
   }
 }
 
-void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
   for (MachineFunction::iterator BBI = MF.begin(),
                                  BBE = MF.end(); BBI != BBE; ++BBI)
     for (MachineBasicBlock::iterator MI = BBI->begin(),
                                      ME = BBI->end(); MI != ME; ++MI)
-      if (MI->getDesc().isCall())
+      if (MI->isCall())
         VisitCallPoint(MI);
 }
 
-void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
   const TargetFrameLowering *TFI = TM->getFrameLowering();
   assert(TFI && "TargetRegisterInfo not available!");
-  
+
   for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
                                       RE = FI->roots_end(); RI != RE; ++RI)
     RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
 }
 
-bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
   // Quick exit for functions that do not use GC.
   if (!MF.getFunction()->hasGC())
     return false;
-  
+
   FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
   if (!FI->getStrategy().needsSafePoints())
     return false;
-  
+
   TM = &MF.getTarget();
   MMI = &getAnalysis<MachineModuleInfo>();
   TII = TM->getInstrInfo();
-  
+
   // Find the size of the stack frame.
   FI->setFrameSize(MF.getFrameInfo()->getStackSize());
-  
+
   // Find all safe points.
-  FindSafePoints(MF);
-  
+  if (FI->getStrategy().customSafePoints()) {
+    FI->getStrategy().findCustomSafePoints(*FI, MF);
+  } else {
+    FindSafePoints(MF);
+  }
+
   // Find the stack offsets for all roots.
   FindStackOffsets(MF);
-  
+
   return false;
 }
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index ce7ed293daac..75ae5b9c2c27 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -62,6 +62,7 @@ STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
 STATISTIC(NumDiamonds,     "Number of diamond if-conversions performed");
 STATISTIC(NumIfConvBBs,    "Number of if-converted blocks");
 STATISTIC(NumDupBBs,       "Number of duplicated blocks");
+STATISTIC(NumUnpred,       "Number of true blocks of diamonds unpredicated");
 
 namespace {
   class IfConverter : public MachineFunctionPass {
@@ -169,7 +170,6 @@ namespace {
     }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const { return "If Converter"; }
 
   private:
     bool ReverseBranchCondition(BBInfo &BBI);
@@ -195,7 +195,8 @@ namespace {
     void PredicateBlock(BBInfo &BBI,
                         MachineBasicBlock::iterator E,
                         SmallVectorImpl<MachineOperand> &Cond,
-                        SmallSet<unsigned, 4> &Redefs);
+                        SmallSet<unsigned, 4> &Redefs,
+                        SmallSet<unsigned, 4> *LaterRedefs = 0);
     void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
                                SmallVectorImpl<MachineOperand> &Cond,
                                SmallSet<unsigned, 4> &Redefs,
@@ -251,12 +252,12 @@ namespace {
   char IfConverter::ID = 0;
 }
 
+char &llvm::IfConverterID = IfConverter::ID;
+
 INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
 
-FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
-
 bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TLI = MF.getTarget().getTargetLowering();
   TII = MF.getTarget().getInstrInfo();
@@ -313,8 +314,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
 
       bool RetVal = false;
       switch (Kind) {
-      default: assert(false && "Unexpected!");
-        break;
+      default: llvm_unreachable("Unexpected!");
       case ICSimple:
       case ICSimpleFalse: {
         bool isFalse = Kind == ICSimpleFalse;
@@ -573,12 +573,12 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
   // blocks, move the end iterators up past any branch instructions.
   while (TIE != TIB) {
     --TIE;
-    if (!TIE->getDesc().isBranch())
+    if (!TIE->isBranch())
       break;
   }
   while (FIE != FIB) {
     --FIE;
-    if (!FIE->getDesc().isBranch())
+    if (!FIE->isBranch())
       break;
   }
 
@@ -651,12 +651,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     if (I->isDebugValue())
       continue;
 
-    const MCInstrDesc &MCID = I->getDesc();
-    if (MCID.isNotDuplicable())
+    if (I->isNotDuplicable())
       BBI.CannotBeCopied = true;
 
     bool isPredicated = TII->isPredicated(I);
-    bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
+    bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch();
 
     if (!isCondBr) {
       if (!isPredicated) {
@@ -963,7 +962,7 @@ static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
          E = BB->livein_end(); I != E; ++I) {
     unsigned Reg = *I;
     Redefs.insert(Reg);
-    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+    for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
          *Subreg; ++Subreg)
       Redefs.insert(*Subreg);
   }
@@ -984,7 +983,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
       Defs.push_back(Reg);
     else if (MO.isKill()) {
       Redefs.erase(Reg);
-      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+      for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
         Redefs.erase(*SR);
     }
   }
@@ -997,7 +996,7 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
                                                 true/*IsImp*/,false/*IsKill*/));
     } else {
       Redefs.insert(Reg);
-      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+      for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
         Redefs.insert(*SR);
     }
   }
@@ -1035,7 +1034,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
 
   if (Kind == ICSimpleFalse)
     if (TII->ReverseBranchCondition(Cond))
-      assert(false && "Unable to reverse branch condition!");
+      llvm_unreachable("Unable to reverse branch condition!");
 
   // Initialize liveins to the first BB. These are potentiall redefined by
   // predicated instructions.
@@ -1108,7 +1107,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
 
   if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
     if (TII->ReverseBranchCondition(Cond))
-      assert(false && "Unable to reverse branch condition!");
+      llvm_unreachable("Unable to reverse branch condition!");
 
   if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
     if (ReverseBranchCondition(*CvtBBI)) {
@@ -1155,7 +1154,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
     SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
                                            CvtBBI->BrCond.end());
     if (TII->ReverseBranchCondition(RevCond))
-      assert(false && "Unable to reverse branch condition!");
+      llvm_unreachable("Unable to reverse branch condition!");
     TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
     BBI.BB->addSuccessor(CvtBBI->FalseBB);
   }
@@ -1227,7 +1226,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   BBInfo *BBI2 = &FalseBBI;
   SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
   if (TII->ReverseBranchCondition(RevCond))
-    assert(false && "Unable to reverse branch condition!");
+    llvm_unreachable("Unable to reverse branch condition!");
   SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
   SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
 
@@ -1281,7 +1280,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
   BBI2->BB->erase(BBI2->BB->begin(), DI2);
 
-  // Predicate the 'true' block after removing its branch.
+  // Remove branch from 'true' block and remove duplicated instructions.
   BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
   DI1 = BBI1->BB->end();
   for (unsigned i = 0; i != NumDups2; ) {
@@ -1294,9 +1293,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
       ++i;
   }
   BBI1->BB->erase(DI1, BBI1->BB->end());
-  PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs);
 
-  // Predicate the 'false' block.
+  // Remove 'false' block branch and find the last instruction to predicate.
   BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
   DI2 = BBI2->BB->end();
   while (NumDups2 != 0) {
@@ -1308,6 +1306,55 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
     if (!DI2->isDebugValue())
       --NumDups2;
   }
+
+  // Remember which registers would later be defined by the false block.
+  // This allows us not to predicate instructions in the true block that would
+  // later be re-defined. That is, rather than
+  //   subeq  r0, r1, #1
+  //   addne  r0, r1, #1
+  // generate:
+  //   sub    r0, r1, #1
+  //   addne  r0, r1, #1
+  SmallSet<unsigned, 4> RedefsByFalse;
+  SmallSet<unsigned, 4> ExtUses;
+  if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
+    for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
+      if (FI->isDebugValue())
+        continue;
+      SmallVector<unsigned, 4> Defs;
+      for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = FI->getOperand(i);
+        if (!MO.isReg())
+          continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg)
+          continue;
+        if (MO.isDef()) {
+          Defs.push_back(Reg);
+        } else if (!RedefsByFalse.count(Reg)) {
+          // These are defined before ctrl flow reach the 'false' instructions.
+          // They cannot be modified by the 'true' instructions.
+          ExtUses.insert(Reg);
+          for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+            ExtUses.insert(*SR);
+        }
+      }
+
+      for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+        unsigned Reg = Defs[i];
+        if (!ExtUses.count(Reg)) {
+          RedefsByFalse.insert(Reg);
+          for (const uint16_t *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+            RedefsByFalse.insert(*SR);
+        }
+      }
+    }
+  }
+
+  // Predicate the 'true' block.
+  PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse);
+
+  // Predicate the 'false' block.
   PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
 
   // Merge the true block into the entry of the diamond.
@@ -1319,7 +1366,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   // fold the tail block in as well. Otherwise, unless it falls through to the
   // tail, add a unconditional branch to it.
   if (TailBB) {
-    BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+    BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()];
     bool CanMergeTail = !TailBBI.HasFallThrough;
     // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
     // check if there are any other predecessors besides those.
@@ -1356,15 +1403,49 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   return true;
 }
 
+static bool MaySpeculate(const MachineInstr *MI,
+                         SmallSet<unsigned, 4> &LaterRedefs,
+                         const TargetInstrInfo *TII) {
+  bool SawStore = true;
+  if (!MI->isSafeToMove(TII, 0, SawStore))
+    return false;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isDef() && !LaterRedefs.count(Reg))
+      return false;
+  }
+
+  return true;
+}
+
 /// PredicateBlock - Predicate instructions from the start of the block to the
 /// specified end with the specified condition.
 void IfConverter::PredicateBlock(BBInfo &BBI,
                                  MachineBasicBlock::iterator E,
                                  SmallVectorImpl<MachineOperand> &Cond,
-                                 SmallSet<unsigned, 4> &Redefs) {
+                                 SmallSet<unsigned, 4> &Redefs,
+                                 SmallSet<unsigned, 4> *LaterRedefs) {
+  bool AnyUnpred = false;
+  bool MaySpec = LaterRedefs != 0;
   for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
     if (I->isDebugValue() || TII->isPredicated(I))
       continue;
+    // It may be possible not to predicate an instruction if it's the 'true'
+    // side of a diamond and the 'false' side may re-define the instruction's
+    // defs.
+    if (MaySpec && MaySpeculate(I, *LaterRedefs, TII)) {
+      AnyUnpred = true;
+      continue;
+    }
+    // If any instruction is predicated, then every instruction after it must
+    // be predicated.
+    MaySpec = false;
     if (!TII->PredicateInstruction(I, Cond)) {
 #ifndef NDEBUG
       dbgs() << "Unable to predicate " << *I << "!\n";
@@ -1383,6 +1464,8 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
   BBI.NonPredSize = 0;
 
   ++NumIfConvBBs;
+  if (AnyUnpred)
+    ++NumUnpred;
 }
 
 /// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
@@ -1395,9 +1478,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
 
   for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
          E = FromBBI.BB->end(); I != E; ++I) {
-    const MCInstrDesc &MCID = I->getDesc();
     // Do not copy the end of the block branches.
-    if (IgnoreBr && MCID.isBranch())
+    if (IgnoreBr && I->isBranch())
       break;
 
     MachineInstr *MI = MF.CloneMachineInstr(I);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 726af4696578..d5ea666e4a17 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -14,14 +14,15 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "Spiller.h"
-#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -173,8 +174,7 @@ private:
   void reMaterializeAll();
 
   bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
-  bool foldMemoryOperand(MachineBasicBlock::iterator MI,
-                         const SmallVectorImpl<unsigned> &Ops,
+  bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
                          MachineInstr *LoadMI = 0);
   void insertReload(LiveInterval &NewLI, SlotIndex,
                     MachineBasicBlock::iterator MI);
@@ -578,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
     if (unsigned SrcReg = isFullCopyOf(MI, Reg)) {
       if (isSibling(SrcReg)) {
         LiveInterval &SrcLI = LIS.getInterval(SrcReg);
-        LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex());
+        LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true));
         assert(SrcLR && "Copy from non-existing value");
         // Check if this COPY kills its source.
         SVI->second.KillsSource = (SrcLR->end == VNI->def);
@@ -644,16 +644,18 @@ void InlineSpiller::analyzeSiblingValues() {
       if (VNI->isUnused())
         continue;
       MachineInstr *DefMI = 0;
+      if (!VNI->isPHIDef()) {
+       DefMI = LIS.getInstructionFromIndex(VNI->def);
+       assert(DefMI && "No defining instruction");
+      }
       // Check possible sibling copies.
-      if (VNI->isPHIDef() || VNI->getCopy()) {
+      if (VNI->isPHIDef() || DefMI->isCopy()) {
         VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
         assert(OrigVNI && "Def outside original live range");
         if (OrigVNI->def != VNI->def)
           DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
       }
-      if (!DefMI && !VNI->isPHIDef())
-        DefMI = LIS.getInstructionFromIndex(VNI->def);
-      if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) {
+      if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) {
         DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@'
                      << VNI->def << " may remat from " << *DefMI);
       }
@@ -665,8 +667,8 @@ void InlineSpiller::analyzeSiblingValues() {
 /// a spill at a better location.
 bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
   SlotIndex Idx = LIS.getInstructionIndex(CopyMI);
-  VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex());
-  assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy");
+  VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot());
+  assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
   SibValueMap::iterator I = SibValues.find(VNI);
   if (I == SibValues.end())
     return false;
@@ -726,7 +728,6 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) {
                           MRI.getRegClass(SVI.SpillReg), &TRI);
   --MII; // Point to store instruction.
   LIS.InsertMachineInstrInMaps(MII);
-  VRM.addSpillSlotUse(StackSlot, MII);
   DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII);
 
   ++NumSpills;
@@ -760,7 +761,7 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
     // Find all spills and copies of VNI.
     for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg);
          MachineInstr *MI = UI.skipInstruction();) {
-      if (!MI->isCopy() && !MI->getDesc().mayStore())
+      if (!MI->isCopy() && !MI->mayStore())
         continue;
       SlotIndex Idx = LIS.getInstructionIndex(MI);
       if (LI->getVNInfoAt(Idx) != VNI)
@@ -770,9 +771,9 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
       if (unsigned DstReg = isFullCopyOf(MI, Reg)) {
         if (isSibling(DstReg)) {
            LiveInterval &DstLI = LIS.getInterval(DstReg);
-           VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex());
+           VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
            assert(DstVNI && "Missing defined value");
-           assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot");
+           assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
            WorkList.push_back(std::make_pair(&DstLI, DstVNI));
         }
         continue;
@@ -811,7 +812,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
       MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
       for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
              PE = MBB->pred_end(); PI != PE; ++PI) {
-        VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot());
+        VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI));
         if (PVNI)
           WorkList.push_back(std::make_pair(LI, PVNI));
       }
@@ -824,7 +825,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
       continue;
     LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg());
     assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy");
-    VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex());
+    VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true));
     assert(SnipVNI && "Snippet undefined before copy");
     WorkList.push_back(std::make_pair(&SnipLI, SnipVNI));
   } while (!WorkList.empty());
@@ -833,7 +834,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
 /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
 bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
                                      MachineBasicBlock::iterator MI) {
-  SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex();
+  SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true);
   VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex());
 
   if (!ParentVNI) {
@@ -855,7 +856,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
   SibValueMap::const_iterator SibI = SibValues.find(ParentVNI);
   if (SibI != SibValues.end())
     RM.OrigMI = SibI->second.DefMI;
-  if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) {
+  if (!Edit->canRematerializeAt(RM, UseIdx, false)) {
     markValueUsed(&VirtReg, ParentVNI);
     DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
     return false;
@@ -863,42 +864,37 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
 
   // If the instruction also writes VirtReg.reg, it had better not require the
   // same register for uses and defs.
-  bool Reads, Writes;
-  SmallVector<unsigned, 8> Ops;
-  tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops);
-  if (Writes) {
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(Ops[i]);
-      if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
-        markValueUsed(&VirtReg, ParentVNI);
-        DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
-        return false;
-      }
-    }
+  SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+  MIBundleOperands::RegInfo RI =
+    MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops);
+  if (RI.Tied) {
+    markValueUsed(&VirtReg, ParentVNI);
+    DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+    return false;
   }
 
   // Before rematerializing into a register for a single instruction, try to
   // fold a load into the instruction. That avoids allocating a new register.
-  if (RM.OrigMI->getDesc().canFoldAsLoad() &&
-      foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+  if (RM.OrigMI->canFoldAsLoad() &&
+      foldMemoryOperand(Ops, RM.OrigMI)) {
     Edit->markRematerialized(RM.ParentVNI);
     ++NumFoldedLoads;
     return true;
   }
 
   // Alocate a new register for the remat.
-  LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM);
+  LiveInterval &NewLI = Edit->createFrom(Original);
   NewLI.markNotSpillable();
 
   // Finally we can rematerialize OrigMI before MI.
   SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
-                                           LIS, TII, TRI);
+                                           TRI);
   DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t'
                << *LIS.getInstructionFromIndex(DefIdx));
 
   // Replace operands
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(Ops[i]);
+    MachineOperand &MO = MI->getOperand(Ops[i].second);
     if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
       MO.setReg(NewLI.reg);
       MO.setIsKill();
@@ -906,8 +902,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
   }
   DEBUG(dbgs() << "\t        " << UseIdx << '\t' << *MI);
 
-  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator());
-  NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
+  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator());
+  NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI));
   DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
   ++NumRemats;
   return true;
@@ -917,7 +913,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
 /// and trim the live ranges after.
 void InlineSpiller::reMaterializeAll() {
   // analyzeSiblingValues has already tested all relevant defining instructions.
-  if (!Edit->anyRematerializable(LIS, TII, AA))
+  if (!Edit->anyRematerializable(AA))
     return;
 
   UsedValues.clear();
@@ -929,7 +925,7 @@ void InlineSpiller::reMaterializeAll() {
     LiveInterval &LI = LIS.getInterval(Reg);
     for (MachineRegisterInfo::use_nodbg_iterator
          RI = MRI.use_nodbg_begin(Reg);
-         MachineInstr *MI = RI.skipInstruction();)
+         MachineInstr *MI = RI.skipBundle();)
       anyRemat |= reMaterializeFor(LI, MI);
   }
   if (!anyRemat)
@@ -958,7 +954,7 @@ void InlineSpiller::reMaterializeAll() {
   if (DeadDefs.empty())
     return;
   DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n");
-  Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+  Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
 
   // Get rid of deleted and empty intervals.
   for (unsigned i = RegsToSpill.size(); i != 0; --i) {
@@ -970,7 +966,7 @@ void InlineSpiller::reMaterializeAll() {
     LiveInterval &LI = LIS.getInterval(Reg);
     if (!LI.empty())
       continue;
-    Edit->eraseVirtReg(Reg, LIS);
+    Edit->eraseVirtReg(Reg);
     RegsToSpill.erase(RegsToSpill.begin() + (i - 1));
   }
   DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n");
@@ -1008,23 +1004,35 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) {
   return true;
 }
 
-/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
-/// @param MI     Instruction using or defining the current register.
-/// @param Ops    Operand indices from readsWritesVirtualRegister().
+/// foldMemoryOperand - Try folding stack slot references in Ops into their
+/// instructions.
+///
+/// @param Ops    Operand indices from analyzeVirtReg().
 /// @param LoadMI Load instruction to use instead of stack slot when non-null.
-/// @return       True on success, and MI will be erased.
-bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
-                                      const SmallVectorImpl<unsigned> &Ops,
-                                      MachineInstr *LoadMI) {
+/// @return       True on success.
+bool InlineSpiller::
+foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
+                  MachineInstr *LoadMI) {
+  if (Ops.empty())
+    return false;
+  // Don't attempt folding in bundles.
+  MachineInstr *MI = Ops.front().first;
+  if (Ops.back().first != MI || MI->isBundled())
+    return false;
+
   bool WasCopy = MI->isCopy();
+  unsigned ImpReg = 0;
+
   // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
   // operands.
   SmallVector<unsigned, 8> FoldOps;
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    unsigned Idx = Ops[i];
+    unsigned Idx = Ops[i].second;
     MachineOperand &MO = MI->getOperand(Idx);
-    if (MO.isImplicit())
+    if (MO.isImplicit()) {
+      ImpReg = MO.getReg();
       continue;
+    }
     // FIXME: Teach targets to deal with subregs.
     if (MO.getSubReg())
       return false;
@@ -1042,13 +1050,24 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
   if (!FoldMI)
     return false;
   LIS.ReplaceMachineInstrInMaps(MI, FoldMI);
-  if (!LoadMI)
-    VRM.addSpillSlotUse(StackSlot, FoldMI);
   MI->eraseFromParent();
-  DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+
+  // TII.foldMemoryOperand may have left some implicit operands on the
+  // instruction.  Strip them.
+  if (ImpReg)
+    for (unsigned i = FoldMI->getNumOperands(); i; --i) {
+      MachineOperand &MO = FoldMI->getOperand(i - 1);
+      if (!MO.isReg() || !MO.isImplicit())
+        break;
+      if (MO.getReg() == ImpReg)
+        FoldMI->RemoveOperand(i - 1);
+    }
+
+  DEBUG(dbgs() << "\tfolded:  " << LIS.getInstructionIndex(FoldMI) << '\t'
+               << *FoldMI);
   if (!WasCopy)
     ++NumFolded;
-  else if (Ops.front() == 0)
+  else if (Ops.front().second == 0)
     ++NumSpills;
   else
     ++NumReloads;
@@ -1063,11 +1082,9 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
   TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot,
                            MRI.getRegClass(NewLI.reg), &TRI);
   --MI; // Point to load instruction.
-  SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
-  VRM.addSpillSlotUse(StackSlot, MI);
+  SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
   DEBUG(dbgs() << "\treload:  " << LoadIdx << '\t' << *MI);
-  VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
-                                       LIS.getVNInfoAllocator());
+  VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator());
   NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
   ++NumReloads;
 }
@@ -1079,10 +1096,9 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI,
   TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot,
                           MRI.getRegClass(NewLI.reg), &TRI);
   --MI; // Point to store instruction.
-  SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex();
-  VRM.addSpillSlotUse(StackSlot, MI);
+  SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot();
   DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
-  VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+  VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
   NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
   ++NumSpills;
 }
@@ -1093,8 +1109,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
   LiveInterval &OldLI = LIS.getInterval(Reg);
 
   // Iterate over instructions using Reg.
-  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg);
-       MachineInstr *MI = RI.skipInstruction();) {
+  for (MachineRegisterInfo::reg_iterator RegI = MRI.reg_begin(Reg);
+       MachineInstr *MI = RegI.skipBundle();) {
 
     // Debug values are not allowed to affect codegen.
     if (MI->isDebugValue()) {
@@ -1123,14 +1139,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
       continue;
 
     // Analyze instruction.
-    bool Reads, Writes;
-    SmallVector<unsigned, 8> Ops;
-    tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops);
+    SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops;
+    MIBundleOperands::RegInfo RI =
+      MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops);
 
     // Find the slot index where this instruction reads and writes OldLI.
     // This is usually the def slot, except for tied early clobbers.
-    SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
-    if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex()))
+    SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
+    if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true)))
       if (SlotIndex::isSameInstr(Idx, VNI->def))
         Idx = VNI->def;
 
@@ -1143,7 +1159,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
         SnippetCopies.insert(MI);
         continue;
       }
-      if (Writes) {
+      if (RI.Writes) {
         // Hoist the spill of a sib-reg copy.
         if (hoistSpill(OldLI, MI)) {
           // This COPY is now dead, the value is already in the stack slot.
@@ -1160,24 +1176,24 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
     }
 
     // Attempt to fold memory ops.
-    if (foldMemoryOperand(MI, Ops))
+    if (foldMemoryOperand(Ops))
       continue;
 
     // Allocate interval around instruction.
     // FIXME: Infer regclass from instruction alone.
-    LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM);
+    LiveInterval &NewLI = Edit->createFrom(Reg);
     NewLI.markNotSpillable();
 
-    if (Reads)
+    if (RI.Reads)
       insertReload(NewLI, Idx, MI);
 
     // Rewrite instruction operands.
     bool hasLiveDef = false;
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(Ops[i]);
+      MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
       MO.setReg(NewLI.reg);
       if (MO.isUse()) {
-        if (!MI->isRegTiedToDefOperand(Ops[i]))
+        if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
           MO.setIsKill();
       } else {
         if (!MO.isDead())
@@ -1187,15 +1203,15 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
     DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI);
 
     // FIXME: Use a second vreg if instruction has no tied ops.
-    if (Writes) {
-     if (hasLiveDef)
-      insertSpill(NewLI, OldLI, Idx, MI);
-     else {
-       // This instruction defines a dead value.  We don't need to spill it,
-       // but do create a live range for the dead value.
-       VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator());
-       NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI));
-     }
+    if (RI.Writes) {
+      if (hasLiveDef)
+        insertSpill(NewLI, OldLI, Idx, MI);
+      else {
+        // This instruction defines a dead value.  We don't need to spill it,
+        // but do create a live range for the dead value.
+        VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
+        NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI));
+      }
     }
 
     DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
@@ -1208,7 +1224,7 @@ void InlineSpiller::spillAll() {
   if (StackSlot == VirtRegMap::NO_STACK_SLOT) {
     StackSlot = VRM.assignVirt2StackSlot(Original);
     StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original));
-    StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator());
+    StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator());
   } else
     StackInt = &LSS.getInterval(StackSlot);
 
@@ -1228,7 +1244,7 @@ void InlineSpiller::spillAll() {
   // Hoisted spills may cause dead code.
   if (!DeadDefs.empty()) {
     DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n");
-    Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII);
+    Edit->eliminateDeadDefs(DeadDefs, RegsToSpill);
   }
 
   // Finally delete the SnippetCopies.
@@ -1237,7 +1253,6 @@ void InlineSpiller::spillAll() {
          MachineInstr *MI = RI.skipInstruction();) {
       assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
       // FIXME: Do this with a LiveRangeEdit callback.
-      VRM.RemoveMachineInstrFromMaps(MI);
       LIS.RemoveMachineInstrFromMaps(MI);
       MI->eraseFromParent();
     }
@@ -1245,7 +1260,7 @@ void InlineSpiller::spillAll() {
 
   // Delete all spilled registers.
   for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
-    Edit->eraseVirtReg(RegsToSpill[i], LIS);
+    Edit->eraseVirtReg(RegsToSpill[i]);
 }
 
 void InlineSpiller::spill(LiveRangeEdit &edit) {
@@ -1274,5 +1289,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
   if (!RegsToSpill.empty())
     spillAll();
 
-  Edit->calculateRegClassAndHint(MF, LIS, Loops);
+  Edit->calculateRegClassAndHint(MF, Loops);
 }
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 29b47bd67ece..8368b58880a3 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -1,4 +1,4 @@
-//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===//
+//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,6 +15,7 @@
 #include "InterferenceCache.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 
 using namespace llvm;
 
@@ -24,13 +25,14 @@ InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
 void InterferenceCache::init(MachineFunction *mf,
                              LiveIntervalUnion *liuarray,
                              SlotIndexes *indexes,
+                             LiveIntervals *lis,
                              const TargetRegisterInfo *tri) {
   MF = mf;
   LIUArray = liuarray;
   TRI = tri;
   PhysRegEntries.assign(TRI->getNumRegs(), 0);
   for (unsigned i = 0; i != CacheEntries; ++i)
-    Entries[i].clear(mf, indexes);
+    Entries[i].clear(mf, indexes, lis);
 }
 
 InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
@@ -78,7 +80,7 @@ void InterferenceCache::Entry::reset(unsigned physReg,
   PhysReg = physReg;
   Blocks.resize(MF->getNumBlockIDs());
   Aliases.clear();
-  for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
+  for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) {
     LiveIntervalUnion *LIU = LIUArray + *AS;
     Aliases.push_back(std::make_pair(LIU, LIU->getTag()));
   }
@@ -94,7 +96,7 @@ void InterferenceCache::Entry::reset(unsigned physReg,
 bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
                                      const TargetRegisterInfo *TRI) {
   unsigned i = 0, e = Aliases.size();
-  for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
+  for (const uint16_t *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) {
     LiveIntervalUnion *LIU = LIUArray + *AS;
     if (i == e ||  Aliases[i].first != LIU)
       return false;
@@ -121,6 +123,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
 
   MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
   BlockInterference *BI = &Blocks[MBBNum];
+  ArrayRef<SlotIndex> RegMaskSlots;
+  ArrayRef<const uint32_t*> RegMaskBits;
   for (;;) {
     BI->Tag = Tag;
     BI->First = BI->Last = SlotIndex();
@@ -137,6 +141,18 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
         BI->First = StartI;
     }
 
+    // Also check for register mask interference.
+    RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum);
+    RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum);
+    SlotIndex Limit = BI->First.isValid() ? BI->First : Stop;
+    for (unsigned i = 0, e = RegMaskSlots.size();
+         i != e && RegMaskSlots[i] < Limit; ++i)
+      if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) {
+        // Register mask i clobbers PhysReg before the LIU interference.
+        BI->First = RegMaskSlots[i];
+        break;
+      }
+
     PrevPos = Stop;
     if (BI->First.isValid())
       break;
@@ -166,4 +182,15 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
     if (Backup)
       ++I;
   }
+
+  // Also check for register mask interference.
+  SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start;
+  for (unsigned i = RegMaskSlots.size();
+       i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i)
+    if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) {
+      // Register mask i-1 clobbers PhysReg after the LIU interference.
+      // Model the regmask clobber as a dead def.
+      BI->Last = RegMaskSlots[i-1].getDeadSlot();
+      break;
+    }
 }
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 4df0a9e5c393..485a325aa146 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -18,10 +18,11 @@
 
 namespace llvm {
 
+class LiveIntervals;
+
 class InterferenceCache {
   const TargetRegisterInfo *TRI;
   LiveIntervalUnion *LIUArray;
-  SlotIndexes *Indexes;
   MachineFunction *MF;
 
   /// BlockInterference - information about the interference in a single basic
@@ -52,6 +53,9 @@ class InterferenceCache {
     /// Indexes - Mapping block numbers to SlotIndex ranges.
     SlotIndexes *Indexes;
 
+    /// LIS - Used for accessing register mask interference maps.
+    LiveIntervals *LIS;
+
     /// PrevPos - The previous position the iterators were moved to.
     SlotIndex PrevPos;
 
@@ -71,13 +75,14 @@ class InterferenceCache {
     void update(unsigned MBBNum);
 
   public:
-    Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {}
+    Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {}
 
-    void clear(MachineFunction *mf, SlotIndexes *indexes) {
+    void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
       assert(!hasRefs() && "Cannot clear cache entry with references");
       PhysReg = 0;
       MF = mf;
       Indexes = indexes;
+      LIS = lis;
     }
 
     unsigned getPhysReg() const { return PhysReg; }
@@ -124,10 +129,10 @@ class InterferenceCache {
   Entry *get(unsigned PhysReg);
 
 public:
-  InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {}
+  InterferenceCache() : TRI(0), LIUArray(0), MF(0), RoundRobin(0) {}
 
   /// init - Prepare cache for a new function.
-  void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*,
+  void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*,
             const TargetRegisterInfo *);
 
   /// getMaxCursors - Return the maximum number of concurrent cursors that can
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 0f92c2d06bdd..a9ca42f69b97 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -448,11 +448,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
   case Intrinsic::dbg_declare:
     break;    // Simply strip out debugging intrinsics
 
-  case Intrinsic::eh_exception:
-  case Intrinsic::eh_selector:
-    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
-    break;
-
   case Intrinsic::eh_typeid_for:
     // Return something different to eh_selector.
     CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
diff --git a/lib/CodeGen/JITCodeEmitter.cpp b/lib/CodeGen/JITCodeEmitter.cpp
new file mode 100644
index 000000000000..96a53892f6d3
--- /dev/null
+++ b/lib/CodeGen/JITCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/JITCodeEmitter.cpp - Code emission --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+using namespace llvm;
+
+void JITCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
new file mode 100644
index 000000000000..fee0347ea659
--- /dev/null
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/CodeGen/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmPrinter SelectionDAG
+
+[component_0]
+type = Library
+name = CodeGen
+parent = Libraries
+required_libraries = Analysis Core MC Scalar Support Target TransformUtils
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 187147a3e252..a1f479a4275f 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -11,82 +11,42 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-namespace llvm {
-  bool EnableFastISel;
-}
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+  cl::desc("Enable the \"fast\" instruction selector"));
 
-static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
-    cl::desc("Disable Post Regalloc"));
-static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
-    cl::desc("Disable branch folding"));
-static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
-    cl::desc("Disable tail duplication"));
-static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
-    cl::desc("Disable pre-register allocation tail duplication"));
-static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
-    cl::desc("Disable code placement"));
-static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
-    cl::desc("Disable Stack Slot Coloring"));
-static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
-    cl::desc("Disable Machine Dead Code Elimination"));
-static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
-    cl::desc("Disable Machine LICM"));
-static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
-    cl::desc("Disable Machine Common Subexpression Elimination"));
-static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
-    cl::Hidden,
-    cl::desc("Disable Machine LICM"));
-static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
-    cl::desc("Disable Machine Sinking"));
-static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
-    cl::desc("Disable Loop Strength Reduction Pass"));
-static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
-    cl::desc("Disable Codegen Prepare"));
-static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
-    cl::desc("Print LLVM IR produced by the loop-reduce pass"));
-static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
-    cl::desc("Print LLVM IR input to isel pass"));
-static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
-    cl::desc("Dump garbage collector data"));
 static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
     cl::desc("Show encoding in .s output"));
 static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
     cl::desc("Show instruction structure in .s output"));
-static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
-    cl::desc("Enable MC API logging"));
-static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
-    cl::desc("Verify generated machine code"),
-    cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
 
 static cl::opt<cl::boolOrDefault>
 AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
@@ -94,25 +54,20 @@ AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
 
 static bool getVerboseAsm() {
   switch (AsmVerbose) {
-  default:
   case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
   case cl::BOU_TRUE:  return true;
   case cl::BOU_FALSE: return false;
   }
+  llvm_unreachable("Invalid verbose asm state");
 }
 
-// Enable or disable FastISel. Both options are needed, because
-// FastISel is enabled by default with -fast, and we wish to be
-// able to enable or disable fast-isel independently from -O0.
-static cl::opt<cl::boolOrDefault>
-EnableFastISelOption("fast-isel", cl::Hidden,
-  cl::desc("Enable the \"fast\" instruction selector"));
-
 LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
                                      StringRef CPU, StringRef FS,
-                                     Reloc::Model RM, CodeModel::Model CM)
-  : TargetMachine(T, Triple, CPU, FS) {
-  CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM);
+                                     TargetOptions Options,
+                                     Reloc::Model RM, CodeModel::Model CM,
+                                     CodeGenOpt::Level OL)
+  : TargetMachine(T, Triple, CPU, FS, Options) {
+  CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL);
   AsmInfo = T.createMCAsmInfo(Triple);
   // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0,
   // and if the old one gets included then MCAsmInfo will be NULL and
@@ -123,16 +78,88 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
          "and that InitializeAllTargetMCs() is being invoked!");
 }
 
+/// Turn exception handling constructs into something the code generators can
+/// handle.
+static void addPassesToHandleExceptions(TargetMachine *TM,
+                                        PassManagerBase &PM) {
+  switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+  case ExceptionHandling::SjLj:
+    // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+    // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+    // catch info can get misplaced when a selector ends up more than one block
+    // removed from the parent invoke(s). This could happen when a landing
+    // pad is shared by multiple invokes and is also a target of a normal
+    // edge from elsewhere.
+    PM.add(createSjLjEHPreparePass(TM->getTargetLowering()));
+    // FALLTHROUGH
+  case ExceptionHandling::DwarfCFI:
+  case ExceptionHandling::ARM:
+  case ExceptionHandling::Win64:
+    PM.add(createDwarfEHPass(TM));
+    break;
+  case ExceptionHandling::None:
+    PM.add(createLowerInvokePass(TM->getTargetLowering()));
+
+    // The lower invoke pass may create unreachable code. Remove it.
+    PM.add(createUnreachableBlockEliminationPass());
+    break;
+  }
+}
+
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
+                                          PassManagerBase &PM,
+                                          bool DisableVerify) {
+  // Targets may override createPassConfig to provide a target-specific sublass.
+  TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+
+  // Set PassConfig options provided by TargetMachine.
+  PassConfig->setDisableVerify(DisableVerify);
+
+  PM.add(PassConfig);
+
+  PassConfig->addIRPasses();
+
+  addPassesToHandleExceptions(TM, PM);
+
+  PassConfig->addISelPrepare();
+
+  // Install a MachineModuleInfo class, which is an immutable pass that holds
+  // all the per-module stuff we're generating, including MCContext.
+  MachineModuleInfo *MMI =
+    new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(),
+                          &TM->getTargetLowering()->getObjFileLowering());
+  PM.add(MMI);
+  MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref.
+
+  // Set up a MachineFunction for the rest of CodeGen to work on.
+  PM.add(new MachineFunctionAnalysis(*TM));
+
+  // Enable FastISel with -fast, but allow that to be overridden.
+  if (EnableFastISelOption == cl::BOU_TRUE ||
+      (TM->getOptLevel() == CodeGenOpt::None &&
+       EnableFastISelOption != cl::BOU_FALSE))
+    TM->setFastISel(true);
+
+  // Ask the target for an isel.
+  if (PassConfig->addInstSelector())
+    return NULL;
+
+  PassConfig->addMachinePasses();
+
+  PassConfig->setInitialized();
+
+  return Context;
+}
+
 bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                             formatted_raw_ostream &Out,
                                             CodeGenFileType FileType,
-                                            CodeGenOpt::Level OptLevel,
                                             bool DisableVerify) {
   // Add common CodeGen passes.
-  MCContext *Context = 0;
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+  MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+  if (!Context)
     return true;
-  assert(Context != 0 && "Failed to get MCContext");
 
   if (hasMCSaveTempLabels())
     Context->setAllowTemporaryLabels(false);
@@ -142,10 +169,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   OwningPtr<MCStreamer> AsmStreamer;
 
   switch (FileType) {
-  default: return true;
   case CGFT_AssemblyFile: {
     MCInstPrinter *InstPrinter =
-      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
+      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI,
+                                      *getInstrInfo(),
+                                      Context->getRegisterInfo(), STI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = 0;
@@ -160,6 +188,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                                   getVerboseAsm(),
                                                   hasMCUseLoc(),
                                                   hasMCUseCFI(),
+                                                  hasMCUseDwarfDirectory(),
                                                   InstPrinter,
                                                   MCE, MAB,
                                                   ShowMCInst);
@@ -189,9 +218,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
     break;
   }
 
-  if (EnableMCLogging)
-    AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
-
   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
   FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
   if (Printer == 0)
@@ -214,14 +240,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
 ///
 bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
                                                    JITCodeEmitter &JCE,
-                                                   CodeGenOpt::Level OptLevel,
                                                    bool DisableVerify) {
   // Add common CodeGen passes.
-  MCContext *Ctx = 0;
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+  MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify);
+  if (!Context)
     return true;
 
-  addCodeEmitter(PM, OptLevel, JCE);
+  addCodeEmitter(PM, JCE);
   PM.add(createGCInfoDeleter());
 
   return false; // success!
@@ -235,10 +260,10 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
 bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
                                           MCContext *&Ctx,
                                           raw_ostream &Out,
-                                          CodeGenOpt::Level OptLevel,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+  Ctx = addPassesToGenerateCode(this, PM, DisableVerify);
+  if (!Ctx)
     return true;
 
   if (hasMCSaveTempLabels())
@@ -247,7 +272,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
   // Create the code emitter for the target if it exists.  If not, .o file
   // emission fails.
   const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
-  MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI, *Ctx);
+  MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(),STI,
+                                                       *Ctx);
   MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple());
   if (MCE == 0 || MAB == 0)
     return true;
@@ -271,227 +297,3 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
 
   return false; // success!
 }
-
-static void printNoVerify(PassManagerBase &PM, const char *Banner) {
-  if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-}
-
-static void printAndVerify(PassManagerBase &PM,
-                           const char *Banner) {
-  if (PrintMachineCode)
-    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-
-  if (VerifyMachineCode)
-    PM.add(createMachineVerifierPass(Banner));
-}
-
-/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
-/// emitting to assembly files or machine code output.
-///
-bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
-                                               CodeGenOpt::Level OptLevel,
-                                               bool DisableVerify,
-                                               MCContext *&OutContext) {
-  // Standard LLVM-Level Passes.
-
-  // Basic AliasAnalysis support.
-  // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
-  // BasicAliasAnalysis wins if they disagree. This is intended to help
-  // support "obvious" type-punning idioms.
-  PM.add(createTypeBasedAliasAnalysisPass());
-  PM.add(createBasicAliasAnalysisPass());
-
-  // Before running any passes, run the verifier to determine if the input
-  // coming from the front-end and/or optimizer is valid.
-  if (!DisableVerify)
-    PM.add(createVerifierPass());
-
-  // Run loop strength reduction before anything else.
-  if (OptLevel != CodeGenOpt::None && !DisableLSR) {
-    PM.add(createLoopStrengthReducePass(getTargetLowering()));
-    if (PrintLSR)
-      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
-  }
-
-  PM.add(createGCLoweringPass());
-
-  // Make sure that no unreachable blocks are instruction selected.
-  PM.add(createUnreachableBlockEliminationPass());
-
-  // Turn exception handling constructs into something the code generators can
-  // handle.
-  switch (getMCAsmInfo()->getExceptionHandlingType()) {
-  case ExceptionHandling::SjLj:
-    // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
-    // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
-    // catch info can get misplaced when a selector ends up more than one block
-    // removed from the parent invoke(s). This could happen when a landing
-    // pad is shared by multiple invokes and is also a target of a normal
-    // edge from elsewhere.
-    PM.add(createSjLjEHPass(getTargetLowering()));
-    // FALLTHROUGH
-  case ExceptionHandling::DwarfCFI:
-  case ExceptionHandling::ARM:
-  case ExceptionHandling::Win64:
-    PM.add(createDwarfEHPass(this));
-    break;
-  case ExceptionHandling::None:
-    PM.add(createLowerInvokePass(getTargetLowering()));
-
-    // The lower invoke pass may create unreachable code. Remove it.
-    PM.add(createUnreachableBlockEliminationPass());
-    break;
-  }
-
-  if (OptLevel != CodeGenOpt::None && !DisableCGP)
-    PM.add(createCodeGenPreparePass(getTargetLowering()));
-
-  PM.add(createStackProtectorPass(getTargetLowering()));
-
-  addPreISel(PM, OptLevel);
-
-  if (PrintISelInput)
-    PM.add(createPrintFunctionPass("\n\n"
-                                   "*** Final LLVM Code input to ISel ***\n",
-                                   &dbgs()));
-
-  // All passes which modify the LLVM IR are now complete; run the verifier
-  // to ensure that the IR is valid.
-  if (!DisableVerify)
-    PM.add(createVerifierPass());
-
-  // Standard Lower-Level Passes.
-
-  // Install a MachineModuleInfo class, which is an immutable pass that holds
-  // all the per-module stuff we're generating, including MCContext.
-  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
-                                                 *getRegisterInfo(),
-                                     &getTargetLowering()->getObjFileLowering());
-  PM.add(MMI);
-  OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
-
-  // Set up a MachineFunction for the rest of CodeGen to work on.
-  PM.add(new MachineFunctionAnalysis(*this, OptLevel));
-
-  // Enable FastISel with -fast, but allow that to be overridden.
-  if (EnableFastISelOption == cl::BOU_TRUE ||
-      (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
-    EnableFastISel = true;
-
-  // Ask the target for an isel.
-  if (addInstSelector(PM, OptLevel))
-    return true;
-
-  // Print the instruction selected machine code...
-  printAndVerify(PM, "After Instruction Selection");
-
-  // Expand pseudo-instructions emitted by ISel.
-  PM.add(createExpandISelPseudosPass());
-
-  // Pre-ra tail duplication.
-  if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
-    PM.add(createTailDuplicatePass(true));
-    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
-  }
-
-  // Optimize PHIs before DCE: removing dead PHI cycles may make more
-  // instructions dead.
-  if (OptLevel != CodeGenOpt::None)
-    PM.add(createOptimizePHIsPass());
-
-  // If the target requests it, assign local variables to stack slots relative
-  // to one another and simplify frame index references where possible.
-  PM.add(createLocalStackSlotAllocationPass());
-
-  if (OptLevel != CodeGenOpt::None) {
-    // With optimization, dead code should already be eliminated. However
-    // there is one known exception: lowered code for arguments that are only
-    // used by tail calls, where the tail calls reuse the incoming stack
-    // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
-    if (!DisableMachineDCE)
-      PM.add(createDeadMachineInstructionElimPass());
-    printAndVerify(PM, "After codegen DCE pass");
-
-    if (!DisableMachineLICM)
-      PM.add(createMachineLICMPass());
-    if (!DisableMachineCSE)
-      PM.add(createMachineCSEPass());
-    if (!DisableMachineSink)
-      PM.add(createMachineSinkingPass());
-    printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
-
-    PM.add(createPeepholeOptimizerPass());
-    printAndVerify(PM, "After codegen peephole optimization pass");
-  }
-
-  // Run pre-ra passes.
-  if (addPreRegAlloc(PM, OptLevel))
-    printAndVerify(PM, "After PreRegAlloc passes");
-
-  // Perform register allocation.
-  PM.add(createRegisterAllocator(OptLevel));
-  printAndVerify(PM, "After Register Allocation");
-
-  // Perform stack slot coloring and post-ra machine LICM.
-  if (OptLevel != CodeGenOpt::None) {
-    // FIXME: Re-enable coloring with register when it's capable of adding
-    // kill markers.
-    if (!DisableSSC)
-      PM.add(createStackSlotColoringPass(false));
-
-    // Run post-ra machine LICM to hoist reloads / remats.
-    if (!DisablePostRAMachineLICM)
-      PM.add(createMachineLICMPass(false));
-
-    printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
-  }
-
-  // Run post-ra passes.
-  if (addPostRegAlloc(PM, OptLevel))
-    printAndVerify(PM, "After PostRegAlloc passes");
-
-  PM.add(createExpandPostRAPseudosPass());
-  printAndVerify(PM, "After ExpandPostRAPseudos");
-
-  // Insert prolog/epilog code.  Eliminate abstract frame index references...
-  PM.add(createPrologEpilogCodeInserter());
-  printAndVerify(PM, "After PrologEpilogCodeInserter");
-
-  // Run pre-sched2 passes.
-  if (addPreSched2(PM, OptLevel))
-    printAndVerify(PM, "After PreSched2 passes");
-
-  // Second pass scheduler.
-  if (OptLevel != CodeGenOpt::None && !DisablePostRA) {
-    PM.add(createPostRAScheduler(OptLevel));
-    printAndVerify(PM, "After PostRAScheduler");
-  }
-
-  // Branch folding must be run after regalloc and prolog/epilog insertion.
-  if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
-    PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
-    printNoVerify(PM, "After BranchFolding");
-  }
-
-  // Tail duplication.
-  if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
-    PM.add(createTailDuplicatePass(false));
-    printNoVerify(PM, "After TailDuplicate");
-  }
-
-  PM.add(createGCMachineCodeAnalysisPass());
-
-  if (PrintGCInfo)
-    PM.add(createGCInfoPrinter(dbgs()));
-
-  if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
-    PM.add(createCodePlacementOptPass());
-    printNoVerify(PM, "After CodePlacementOpt");
-  }
-
-  if (addPreEmitPass(PM, OptLevel))
-    printNoVerify(PM, "After PreEmit passes");
-
-  return false;
-}
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 0eb009ddac29..deab05a412c9 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -46,7 +46,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
 
   // Finally, just to provide a stable ordering, use the node number as a
   // deciding factor.
-  return LHSNum < RHSNum;
+  return RHSNum < LHSNum;
 }
 
 
@@ -84,11 +84,11 @@ void LatencyPriorityQueue::push(SUnit *SU) {
 }
 
 
-// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// scheduledNode - As nodes are scheduled, we look to see if there are any
 // successor nodes that have a single unscheduled predecessor.  If so, that
 // single predecessor has a higher priority, since scheduling it will make
 // the node available.
-void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+void LatencyPriorityQueue::scheduledNode(SUnit *SU) {
   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     AdjustPriorityOfUnscheduledPreds(I->getSUnit());
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index a12e1a36d113..f1abcbb1dd5c 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -311,6 +311,8 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) {
   return Result;
 }
 
+void LexicalScope::anchor() { }
+
 /// dump - Print data structures.
 void LexicalScope::dump() const {
 #ifndef NDEBUG
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 3dfe4c0e8cfa..2187833031ee 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -226,7 +226,7 @@ public:
                  LiveInterval *LI, const VNInfo *VNI,
                  SmallVectorImpl<SlotIndex> *Kills,
                  LiveIntervals &LIS, MachineDominatorTree &MDT,
-		 UserValueScopes &UVS);
+                 UserValueScopes &UVS);
 
   /// addDefsFromCopies - The value in LI/LocNo may be copies to other
   /// registers. Determine if any of the copies are available at the kill
@@ -468,7 +468,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
       // DBG_VALUE has no slot index, use the previous instruction instead.
       SlotIndex Idx = MBBI == MBB->begin() ?
         LIS->getMBBStartIdx(MBB) :
-        LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+        LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot();
       // Handle consecutive DBG_VALUE instructions with the same slot index.
       do {
         if (handleDebugValue(MBBI, Idx)) {
@@ -486,7 +486,7 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
                           LiveInterval *LI, const VNInfo *VNI,
                           SmallVectorImpl<SlotIndex> *Kills,
                           LiveIntervals &LIS, MachineDominatorTree &MDT,
-			  UserValueScopes &UVS) {
+                          UserValueScopes &UVS) {
   SmallVector<SlotIndex, 16> Todo;
   Todo.push_back(Idx);
   do {
@@ -575,15 +575,15 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
     // Is LocNo extended to reach this copy? If not, another def may be blocking
     // it, or we are looking at a wrong value of LI.
     SlotIndex Idx = LIS.getInstructionIndex(MI);
-    LocMap::iterator I = locInts.find(Idx.getUseIndex());
+    LocMap::iterator I = locInts.find(Idx.getRegSlot(true));
     if (!I.valid() || I.value() != LocNo)
       continue;
 
     if (!LIS.hasInterval(DstReg))
       continue;
     LiveInterval *DstLI = &LIS.getInterval(DstReg);
-    const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex());
-    assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value");
+    const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot());
+    assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value");
     CopyValues.push_back(std::make_pair(DstLI, DstVNI));
   }
 
@@ -620,7 +620,7 @@ void
 UserValue::computeIntervals(MachineRegisterInfo &MRI,
                             LiveIntervals &LIS,
                             MachineDominatorTree &MDT,
-			    UserValueScopes &UVS) {
+                            UserValueScopes &UVS) {
   SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
 
   // Collect all defs to be extended (Skipping undefs).
@@ -841,7 +841,7 @@ bool
 UserValue::splitRegister(unsigned OldReg, ArrayRef<LiveInterval*> NewRegs) {
   bool DidChange = false;
   // Split locations referring to OldReg. Iterate backwards so splitLocation can
-  // safely erase unuused locations.
+  // safely erase unused locations.
   for (unsigned i = locations.size(); i ; --i) {
     unsigned LocNo = i-1;
     const MachineOperand *Loc = &locations[LocNo];
@@ -889,8 +889,7 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
       // index is no longer available. That means the user value is in a
       // non-existent sub-register, and %noreg is exactly what we want.
       Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
-    } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
-               VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+    } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) {
       // FIXME: Translate SubIdx to a stackslot offset.
       Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
     } else {
@@ -921,8 +920,8 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
   }
 
   // Don't insert anything after the first terminator, though.
-  return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
-                                    llvm::next(MachineBasicBlock::iterator(MI));
+  return MI->isTerminator() ? MBB->getFirstTerminator() :
+                              llvm::next(MachineBasicBlock::iterator(MI));
 }
 
 DebugLoc UserValue::findDebugLoc() {
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index b69945aea98f..ac18843ac30d 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -381,37 +381,40 @@ void LiveInterval::join(LiveInterval &Other,
   for (unsigned i = 0; i != NumVals; ++i) {
     unsigned LHSValID = LHSValNoAssignments[i];
     if (i != LHSValID ||
-        (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i)))
+        (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) {
       MustMapCurValNos = true;
+      break;
+    }
   }
 
   // If we have to apply a mapping to our base interval assignment, rewrite it
   // now.
   if (MustMapCurValNos) {
     // Map the first live range.
+
     iterator OutIt = begin();
     OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
-    ++OutIt;
-    for (iterator I = OutIt, E = end(); I != E; ++I) {
-      OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+    for (iterator I = next(OutIt), E = end(); I != E; ++I) {
+      VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+      assert(nextValNo != 0 && "Huh?");
 
       // If this live range has the same value # as its immediate predecessor,
       // and if they are neighbors, remove one LiveRange.  This happens when we
-      // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
-      if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) {
-        (OutIt-1)->end = OutIt->end;
+      // have [0,4:0)[4,7:1) and map 0/1 onto the same value #.
+      if (OutIt->valno == nextValNo && OutIt->end == I->start) {
+        OutIt->end = I->end;
       } else {
-        if (I != OutIt) {
+        // Didn't merge. Move OutIt to the next interval,
+        ++OutIt;
+        OutIt->valno = nextValNo;
+        if (OutIt != I) {
           OutIt->start = I->start;
           OutIt->end = I->end;
         }
-
-        // Didn't merge, on to the next one.
-        ++OutIt;
       }
     }
-
     // If we merge some live ranges, chop off the end.
+    ++OutIt;
     ranges.erase(OutIt, end());
   }
 
@@ -639,8 +642,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
           OS << "-phidef";
         if (vni->hasPHIKill())
           OS << "-phikill";
-        if (vni->hasRedefByEC())
-          OS << "-ec";
       }
     }
   }
@@ -680,15 +681,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
       // Connect to values live out of predecessors.
       for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
            PE = MBB->pred_end(); PI != PE; ++PI)
-        if (const VNInfo *PVNI =
-              LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()))
+        if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
           EqClass.join(VNI->id, PVNI->id);
     } else {
       // Normal value defined by an instruction. Check for two-addr redef.
       // FIXME: This could be coincidental. Should we really check for a tied
       // operand constraint?
       // Note that VNI->def may be a use slot for an early clobber def.
-      if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+      if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def))
         EqClass.join(VNI->id, UVNI->id);
     }
   }
@@ -716,7 +716,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
       continue;
     // DBG_VALUE instructions should have been eliminated earlier.
     SlotIndex Idx = LIS.getInstructionIndex(MI);
-    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+    Idx = Idx.getRegSlot(MO.isUse());
     const VNInfo *VNI = LI.getVNInfoAt(Idx);
     assert(VNI && "Interval not live at use.");
     MO.setReg(LIV[getEqClass(VNI)]->reg);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index b1e202a273d3..3ade66097cbd 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -15,31 +15,22 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "liveintervals"
+#define DEBUG_TYPE "regalloc"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "VirtRegMap.h"
 #include "llvm/Value.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ProcessImplicitDefs.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
@@ -52,19 +43,14 @@ static cl::opt<bool> DisableReMat("disable-rematerialization",
                                   cl::init(false), cl::Hidden);
 
 STATISTIC(numIntervals , "Number of original intervals");
-STATISTIC(numFolds     , "Number of loads/stores folded into instructions");
-STATISTIC(numSplits    , "Number of intervals split");
 
 char LiveIntervals::ID = 0;
 INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
                 "Live Interval Analysis", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_DEPENDENCY(LiveVariables)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(PHIElimination)
-INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
-INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
                 "Live Interval Analysis", false, false)
 
@@ -74,18 +60,8 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<LiveVariables>();
   AU.addPreserved<LiveVariables>();
-  AU.addRequired<MachineLoopInfo>();
-  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreservedID(MachineLoopInfoID);
   AU.addPreservedID(MachineDominatorsID);
-
-  if (!StrongPHIElim) {
-    AU.addPreservedID(PHIEliminationID);
-    AU.addRequiredID(PHIEliminationID);
-  }
-
-  AU.addRequiredID(TwoAddressInstructionPassID);
-  AU.addPreserved<ProcessImplicitDefs>();
-  AU.addRequired<ProcessImplicitDefs>();
   AU.addPreserved<SlotIndexes>();
   AU.addRequiredTransitive<SlotIndexes>();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -98,14 +74,12 @@ void LiveIntervals::releaseMemory() {
     delete I->second;
 
   r2iMap_.clear();
+  RegMaskSlots.clear();
+  RegMaskBits.clear();
+  RegMaskBlocks.clear();
 
   // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
   VNInfoAllocator.Reset();
-  while (!CloneMIs.empty()) {
-    MachineInstr *MI = CloneMIs.back();
-    CloneMIs.pop_back();
-    mf_->DeleteMachineInstr(MI);
-  }
 }
 
 /// runOnMachineFunction - Register allocate the whole function
@@ -120,6 +94,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   lv_ = &getAnalysis<LiveVariables>();
   indexes_ = &getAnalysis<SlotIndexes>();
   allocatableRegs_ = tri_->getAllocatableSet(fn);
+  reservedRegs_ = tri_->getReservedRegs(fn);
 
   computeIntervals();
 
@@ -132,10 +107,21 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
 /// print - Implement the dump method.
 void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
   OS << "********** INTERVALS **********\n";
-  for (const_iterator I = begin(), E = end(); I != E; ++I) {
-    I->second->print(OS, tri_);
-    OS << "\n";
-  }
+
+  // Dump the physregs.
+  for (unsigned Reg = 1, RegE = tri_->getNumRegs(); Reg != RegE; ++Reg)
+    if (const LiveInterval *LI = r2iMap_.lookup(Reg)) {
+      LI->print(OS, tri_);
+      OS << '\n';
+    }
+
+  // Dump the virtregs.
+  for (unsigned Reg = 0, RegE = mri_->getNumVirtRegs(); Reg != RegE; ++Reg)
+    if (const LiveInterval *LI =
+        r2iMap_.lookup(TargetRegisterInfo::index2VirtReg(Reg))) {
+      LI->print(OS, tri_);
+      OS << '\n';
+    }
 
   printInstrs(OS);
 }
@@ -149,103 +135,6 @@ void LiveIntervals::dumpInstrs() const {
   printInstrs(dbgs());
 }
 
-bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
-                                         VirtRegMap &vrm, unsigned reg) {
-  // We don't handle fancy stuff crossing basic block boundaries
-  if (li.ranges.size() != 1)
-    return true;
-  const LiveRange &range = li.ranges.front();
-  SlotIndex idx = range.start.getBaseIndex();
-  SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex();
-
-  // Skip deleted instructions
-  MachineInstr *firstMI = getInstructionFromIndex(idx);
-  while (!firstMI && idx != end) {
-    idx = idx.getNextIndex();
-    firstMI = getInstructionFromIndex(idx);
-  }
-  if (!firstMI)
-    return false;
-
-  // Find last instruction in range
-  SlotIndex lastIdx = end.getPrevIndex();
-  MachineInstr *lastMI = getInstructionFromIndex(lastIdx);
-  while (!lastMI && lastIdx != idx) {
-    lastIdx = lastIdx.getPrevIndex();
-    lastMI = getInstructionFromIndex(lastIdx);
-  }
-  if (!lastMI)
-    return false;
-
-  // Range cannot cross basic block boundaries or terminators
-  MachineBasicBlock *MBB = firstMI->getParent();
-  if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator())
-    return true;
-
-  MachineBasicBlock::const_iterator E = lastMI;
-  ++E;
-  for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) {
-    const MachineInstr &MI = *I;
-
-    // Allow copies to and from li.reg
-    if (MI.isCopy())
-      if (MI.getOperand(0).getReg() == li.reg ||
-          MI.getOperand(1).getReg() == li.reg)
-        continue;
-
-    // Check for operands using reg
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e;  ++i) {
-      const MachineOperand& mop = MI.getOperand(i);
-      if (!mop.isReg())
-        continue;
-      unsigned PhysReg = mop.getReg();
-      if (PhysReg == 0 || PhysReg == li.reg)
-        continue;
-      if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
-        if (!vrm.hasPhys(PhysReg))
-          continue;
-        PhysReg = vrm.getPhys(PhysReg);
-      }
-      if (PhysReg && tri_->regsOverlap(PhysReg, reg))
-        return true;
-    }
-  }
-
-  // No conflicts found.
-  return false;
-}
-
-bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
-                                  SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
-  for (LiveInterval::Ranges::const_iterator
-         I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    for (SlotIndex index = I->start.getBaseIndex(),
-           end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
-           index != end;
-           index = index.getNextIndex()) {
-      MachineInstr *MI = getInstructionFromIndex(index);
-      if (!MI)
-        continue;               // skip deleted instructions
-
-      if (JoinedCopies.count(MI))
-        continue;
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand& MO = MI->getOperand(i);
-        if (!MO.isReg())
-          continue;
-        unsigned PhysReg = MO.getReg();
-        if (PhysReg == 0 || PhysReg == Reg ||
-            TargetRegisterInfo::isVirtualRegister(PhysReg))
-          continue;
-        if (tri_->regsOverlap(Reg, PhysReg))
-          return true;
-      }
-    }
-  }
-
-  return false;
-}
-
 static
 bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
   unsigned Reg = MI.getOperand(MOIdx).getReg();
@@ -271,9 +160,9 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
   if (!MO.getSubReg() || MO.isEarlyClobber())
     return false;
 
-  SlotIndex RedefIndex = MIIdx.getDefIndex();
+  SlotIndex RedefIndex = MIIdx.getRegSlot();
   const LiveRange *OldLR =
-    interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+    interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
   MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
   if (DefMI != 0) {
     return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
@@ -296,34 +185,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
   LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
   if (interval.empty()) {
     // Get the Idx of the defining instructions.
-    SlotIndex defIndex = MIIdx.getDefIndex();
-    // Earlyclobbers move back one, so that they overlap the live range
-    // of inputs.
-    if (MO.isEarlyClobber())
-      defIndex = MIIdx.getUseIndex();
-
-    // Make sure the first definition is not a partial redefinition. Add an
-    // <imp-def> of the full register.
-    // FIXME: LiveIntervals shouldn't modify the code like this.  Whoever
-    // created the machine instruction should annotate it with <undef> flags
-    // as needed.  Then we can simply assert here.  The REG_SEQUENCE lowering
-    // is the main suspect.
-    if (MO.getSubReg()) {
-      mi->addRegisterDefined(interval.reg);
-      // Mark all defs of interval.reg on this instruction as reading <undef>.
-      for (unsigned i = MOIdx, e = mi->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO2 = mi->getOperand(i);
-        if (MO2.isReg() && MO2.getReg() == interval.reg && MO2.getSubReg())
-          MO2.setIsUndef();
-      }
-    }
+    SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
 
-    MachineInstr *CopyMI = NULL;
-    if (mi->isCopyLike()) {
-      CopyMI = mi;
-    }
+    // Make sure the first definition is not a partial redefinition.
+    assert(!MO.readsReg() && "First def cannot also read virtual register "
+           "missing <undef> flag?");
 
-    VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+    VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
     assert(ValNo->id == 0 && "First value in interval is not 0?");
 
     // Loop over all of the blocks that the vreg is defined in.  There are
@@ -334,9 +202,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // FIXME: what about dead vars?
       SlotIndex killIdx;
       if (vi.Kills[0] != mi)
-        killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex();
+        killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot();
       else
-        killIdx = defIndex.getStoreIndex();
+        killIdx = defIndex.getDeadSlot();
 
       // If the kill happens after the definition, we have an intra-block
       // live range.
@@ -384,14 +252,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
     for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
       MachineInstr *Kill = vi.Kills[i];
       SlotIndex Start = getMBBStartIdx(Kill->getParent());
-      SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex();
+      SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot();
 
       // Create interval with one of a NEW value number.  Note that this value
       // number isn't actually defined by an instruction, weird huh? :)
       if (PHIJoin) {
         assert(getInstructionFromIndex(Start) == 0 &&
                "PHI def index points at actual instruction.");
-        ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
+        ValNo = interval.getNextValue(Start, VNInfoAllocator);
         ValNo->setIsPHIDef(true);
       }
       LiveRange LR(Start, killIdx, ValNo);
@@ -422,14 +290,12 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // are actually two values in the live interval.  Because of this we
       // need to take the LiveRegion that defines this register and split it
       // into two values.
-      SlotIndex RedefIndex = MIIdx.getDefIndex();
-      if (MO.isEarlyClobber())
-        RedefIndex = MIIdx.getUseIndex();
+      SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber());
 
       const LiveRange *OldLR =
-        interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+        interval.getLiveRangeContaining(RedefIndex.getRegSlot(true));
       VNInfo *OldValNo = OldLR->valno;
-      SlotIndex DefIndex = OldValNo->def.getDefIndex();
+      SlotIndex DefIndex = OldValNo->def.getRegSlot();
 
       // Delete the previous value, which should be short and continuous,
       // because the 2-addr copy must be in the same MBB as the redef.
@@ -440,12 +306,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
 
       // Value#0 is now defined by the 2-addr instruction.
-      OldValNo->def  = RedefIndex;
-      OldValNo->setCopy(0);
-
-      // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
-      if (PartReDef && mi->isCopyLike())
-        OldValNo->setCopy(&*mi);
+      OldValNo->def = RedefIndex;
 
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -455,7 +316,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // If this redefinition is dead, we need to add a dummy unit live
       // range covering the def slot.
       if (MO.isDead())
-        interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(),
+        interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(),
                                     OldValNo));
 
       DEBUG({
@@ -467,15 +328,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // live until the end of the block.  We've already taken care of the
       // rest of the live range.
 
-      SlotIndex defIndex = MIIdx.getDefIndex();
+      SlotIndex defIndex = MIIdx.getRegSlot();
       if (MO.isEarlyClobber())
-        defIndex = MIIdx.getUseIndex();
+        defIndex = MIIdx.getRegSlot(true);
 
-      VNInfo *ValNo;
-      MachineInstr *CopyMI = NULL;
-      if (mi->isCopyLike())
-        CopyMI = mi;
-      ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+      VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator);
 
       SlotIndex killIndex = getMBBEndIdx(mbb);
       LiveRange LR(defIndex, killIndex, ValNo);
@@ -490,21 +347,26 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
   DEBUG(dbgs() << '\n');
 }
 
+static bool isRegLiveIntoSuccessor(const MachineBasicBlock *MBB, unsigned Reg) {
+  for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+                                              SE = MBB->succ_end();
+       SI != SE; ++SI) {
+    const MachineBasicBlock* succ = *SI;
+    if (succ->isLiveIn(Reg))
+      return true;
+  }
+  return false;
+}
+
 void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
                                               MachineBasicBlock::iterator mi,
                                               SlotIndex MIIdx,
                                               MachineOperand& MO,
-                                              LiveInterval &interval,
-                                              MachineInstr *CopyMI) {
-  // A physical register cannot be live across basic block, so its
-  // lifetime must end somewhere in its defining basic block.
+                                              LiveInterval &interval) {
   DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
 
   SlotIndex baseIndex = MIIdx;
-  SlotIndex start = baseIndex.getDefIndex();
-  // Earlyclobbers move back one.
-  if (MO.isEarlyClobber())
-    start = MIIdx.getUseIndex();
+  SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber());
   SlotIndex end = start;
 
   // If it is not used after definition, it is considered dead at
@@ -514,7 +376,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
   // advance below compensates.
   if (MO.isDead()) {
     DEBUG(dbgs() << " dead");
-    end = start.getStoreIndex();
+    end = start.getDeadSlot();
     goto exit;
   }
 
@@ -531,21 +393,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
 
     if (mi->killsRegister(interval.reg, tri_)) {
       DEBUG(dbgs() << " killed");
-      end = baseIndex.getDefIndex();
+      end = baseIndex.getRegSlot();
       goto exit;
     } else {
       int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
       if (DefIdx != -1) {
         if (mi->isRegTiedToUseOperand(DefIdx)) {
           // Two-address instruction.
-          end = baseIndex.getDefIndex();
+          end = baseIndex.getRegSlot(mi->getOperand(DefIdx).isEarlyClobber());
         } else {
           // Another instruction redefines the register before it is ever read.
           // Then the register is essentially dead at the instruction that
           // defines it. Hence its interval is:
           // [defSlot(def), defSlot(def)+1)
           DEBUG(dbgs() << " dead");
-          end = start.getStoreIndex();
+          end = start.getDeadSlot();
         }
         goto exit;
       }
@@ -554,12 +416,19 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
     baseIndex = baseIndex.getNextIndex();
   }
 
-  // The only case we should have a dead physreg here without a killing or
-  // instruction where we know it's dead is if it is live-in to the function
-  // and never used. Another possible case is the implicit use of the
-  // physical register has been deleted by two-address pass.
-  end = start.getStoreIndex();
+  // If we get here the register *should* be live out.
+  assert(!isAllocatable(interval.reg) && "Physregs shouldn't be live out!");
 
+  // FIXME: We need saner rules for reserved regs.
+  if (isReserved(interval.reg)) {
+    end = start.getDeadSlot();
+  } else {
+    // Unreserved, unallocable registers like EFLAGS can be live across basic
+    // block boundaries.
+    assert(isRegLiveIntoSuccessor(MBB, interval.reg) &&
+           "Unreserved reg not live-out?");
+    end = getMBBEndIdx(MBB);
+  }
 exit:
   assert(start < end && "did not find end of interval?");
 
@@ -567,9 +436,7 @@ exit:
   VNInfo *ValNo = interval.getVNInfoAt(start);
   bool Extend = ValNo != 0;
   if (!Extend)
-    ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
-  if (Extend && MO.isEarlyClobber())
-    ValNo->setHasRedefByEC(true);
+    ValNo = interval.getNextValue(start, VNInfoAllocator);
   LiveRange LR(start, end, ValNo);
   interval.addRange(LR);
   DEBUG(dbgs() << " +" << LR << '\n');
@@ -583,18 +450,20 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
     handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
                              getOrCreateInterval(MO.getReg()));
-  else {
-    MachineInstr *CopyMI = NULL;
-    if (MI->isCopyLike())
-      CopyMI = MI;
+  else
     handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
-                              getOrCreateInterval(MO.getReg()), CopyMI);
-  }
+                              getOrCreateInterval(MO.getReg()));
 }
 
 void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
                                          SlotIndex MIIdx,
-                                         LiveInterval &interval, bool isAlias) {
+                                         LiveInterval &interval) {
+  assert(TargetRegisterInfo::isPhysicalRegister(interval.reg) &&
+         "Only physical registers can be live in.");
+  assert((!isAllocatable(interval.reg) || MBB->getParent()->begin() ||
+          MBB->isLandingPad()) &&
+          "Allocatable live-ins only valid for entry blocks and landing pads.");
+
   DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
 
   // Look for kills, if it reaches a def before it's killed, then it shouldn't
@@ -621,16 +490,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   while (mi != E) {
     if (mi->killsRegister(interval.reg, tri_)) {
       DEBUG(dbgs() << " killed");
-      end = baseIndex.getDefIndex();
+      end = baseIndex.getRegSlot();
       SeenDefUse = true;
       break;
-    } else if (mi->definesRegister(interval.reg, tri_)) {
+    } else if (mi->modifiesRegister(interval.reg, tri_)) {
       // Another instruction redefines the register before it is ever read.
       // Then the register is essentially dead at the instruction that defines
       // it. Hence its interval is:
       // [defSlot(def), defSlot(def)+1)
       DEBUG(dbgs() << " dead");
-      end = start.getStoreIndex();
+      end = start.getDeadSlot();
       SeenDefUse = true;
       break;
     }
@@ -644,10 +513,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
 
   // Live-in register might not be used at all.
   if (!SeenDefUse) {
-    if (isAlias) {
+    if (isAllocatable(interval.reg) ||
+        !isRegLiveIntoSuccessor(MBB, interval.reg)) {
+      // Allocatable registers are never live through.
+      // Non-allocatable registers that aren't live into any successors also
+      // aren't live through.
       DEBUG(dbgs() << " dead");
-      end = MIIdx.getStoreIndex();
+      return;
     } else {
+      // If we get here the register is non-allocatable and live into some
+      // successor. We'll conservatively assume it's live-through.
       DEBUG(dbgs() << " live through");
       end = getMBBEndIdx(MBB);
     }
@@ -656,8 +531,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
   SlotIndex defIdx = getMBBStartIdx(MBB);
   assert(getInstructionFromIndex(defIdx) == 0 &&
          "PHI def index points at actual instruction.");
-  VNInfo *vni =
-    interval.getNextValue(defIdx, 0, VNInfoAllocator);
+  VNInfo *vni = interval.getNextValue(defIdx, VNInfoAllocator);
   vni->setIsPHIDef(true);
   LiveRange LR(start, end, vni);
 
@@ -674,10 +548,14 @@ void LiveIntervals::computeIntervals() {
                << "********** Function: "
                << ((Value*)mf_->getFunction())->getName() << '\n');
 
+  RegMaskBlocks.resize(mf_->getNumBlockIDs());
+
   SmallVector<unsigned, 8> UndefUses;
   for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock *MBB = MBBI;
+    RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size();
+
     if (MBB->empty())
       continue;
 
@@ -690,11 +568,6 @@ void LiveIntervals::computeIntervals() {
     for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
            LE = MBB->livein_end(); LI != LE; ++LI) {
       handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
-      // Multiple live-ins can alias the same register.
-      for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS)
-        if (!hasInterval(*AS))
-          handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
-                               true);
     }
 
     // Skip over empty initial indices.
@@ -706,10 +579,20 @@ void LiveIntervals::computeIntervals() {
       DEBUG(dbgs() << MIIndex << "\t" << *MI);
       if (MI->isDebugValue())
         continue;
+      assert(indexes_->getInstructionFromIndex(MIIndex) == MI &&
+             "Lost SlotIndex synchronization");
 
       // Handle defs.
       for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
         MachineOperand &MO = MI->getOperand(i);
+
+        // Collect register masks.
+        if (MO.isRegMask()) {
+          RegMaskSlots.push_back(MIIndex.getRegSlot());
+          RegMaskBits.push_back(MO.getRegMask());
+          continue;
+        }
+
         if (!MO.isReg() || !MO.getReg())
           continue;
 
@@ -723,6 +606,10 @@ void LiveIntervals::computeIntervals() {
       // Move to the next instr slot.
       MIIndex = indexes_->getNextNonNullIndex(MIIndex);
     }
+
+    // Compute the number of register mask instructions in this block.
+    std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+    RMB.second = RegMaskSlots.size() - RMB.first;;
   }
 
   // Create empty intervals for registers defined by implicit_def's (except
@@ -754,7 +641,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
                                  SmallVectorImpl<MachineInstr*> *dead) {
   DEBUG(dbgs() << "Shrink: " << *li << '\n');
   assert(TargetRegisterInfo::isVirtualRegister(li->reg)
-         && "Can't only shrink physical registers");
+         && "Can only shrink virtual registers");
   // Find all the values used, including PHI kills.
   SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
 
@@ -766,8 +653,10 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
        MachineInstr *UseMI = I.skipInstruction();) {
     if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
       continue;
-    SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
-    VNInfo *VNI = li->getVNInfoAt(Idx);
+    SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot();
+    // Note: This intentionally picks up the wrong VNI in case of an EC redef.
+    // See below.
+    VNInfo *VNI = li->getVNInfoBefore(Idx);
     if (!VNI) {
       // This shouldn't happen: readsVirtualRegister returns true, but there is
       // no live value. It is likely caused by a target getting <undef> flags
@@ -777,11 +666,12 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
                     << *li << '\n');
       continue;
     }
-    if (VNI->def == Idx) {
-      // Special case: An early-clobber tied operand reads and writes the
-      // register one slot early.
-      Idx = Idx.getPrevSlot();
-      VNI = li->getVNInfoAt(Idx);
+    // Special case: An early-clobber tied operand reads and writes the
+    // register one slot early.  The getVNInfoBefore call above would have
+    // picked up the value defined by UseMI.  Adjust the kill slot and value.
+    if (SlotIndex::isSameInstr(VNI->def, Idx)) {
+      Idx = VNI->def;
+      VNI = li->getVNInfoBefore(Idx);
       assert(VNI && "Early-clobber tied value not available");
     }
     WorkList.push_back(std::make_pair(Idx, VNI));
@@ -794,14 +684,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
     VNInfo *VNI = *I;
     if (VNI->isUnused())
       continue;
-    NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
-
-    // A use tied to an early-clobber def ends at the load slot and isn't caught
-    // above. Catch it here instead. This probably only ever happens for inline
-    // assembly.
-    if (VNI->def.isUse())
-      if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex()))
-        WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI));
+    NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI));
   }
 
   // Keep track of the PHIs that are in use.
@@ -812,11 +695,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
     SlotIndex Idx = WorkList.back().first;
     VNInfo *VNI = WorkList.back().second;
     WorkList.pop_back();
-    const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+    const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot());
     SlotIndex BlockStart = getMBBStartIdx(MBB);
 
     // Extend the live range for VNI to be live at Idx.
-    if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) {
+    if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
       (void)ExtVNI;
       assert(ExtVNI == VNI && "Unexpected existing value number");
       // Is this a PHIDef we haven't seen before?
@@ -827,9 +710,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
            PE = MBB->pred_end(); PI != PE; ++PI) {
         if (!LiveOut.insert(*PI))
           continue;
-        SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+        SlotIndex Stop = getMBBEndIdx(*PI);
         // A predecessor is not required to have a live-out value for a PHI.
-        if (VNInfo *PVNI = li->getVNInfoAt(Stop))
+        if (VNInfo *PVNI = li->getVNInfoBefore(Stop))
           WorkList.push_back(std::make_pair(Stop, PVNI));
       }
       continue;
@@ -837,15 +720,16 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
 
     // VNI is live-in to MBB.
     DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
-    NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+    NewLI.addRange(LiveRange(BlockStart, Idx, VNI));
 
     // Make sure VNI is live-out from the predecessors.
     for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
          PE = MBB->pred_end(); PI != PE; ++PI) {
       if (!LiveOut.insert(*PI))
         continue;
-      SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
-      assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+      SlotIndex Stop = getMBBEndIdx(*PI);
+      assert(li->getVNInfoBefore(Stop) == VNI &&
+             "Wrong value out of predecessor");
       WorkList.push_back(std::make_pair(Stop, VNI));
     }
   }
@@ -859,7 +743,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
       continue;
     LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
     assert(LII != NewLI.end() && "Missing live range for PHI");
-    if (LII->end != VNI->def.getNextSlot())
+    if (LII->end != VNI->def.getDeadSlot())
       continue;
     if (VNI->isPHIDef()) {
       // This is a dead PHI. Remove it.
@@ -890,28 +774,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
 // Register allocator hooks.
 //
 
-MachineBasicBlock::iterator
-LiveIntervals::getLastSplitPoint(const LiveInterval &li,
-                                 MachineBasicBlock *mbb) const {
-  const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
-
-  // If li is not live into a landing pad, we can insert spill code before the
-  // first terminator.
-  if (!lpad || !isLiveInToMBB(li, lpad))
-    return mbb->getFirstTerminator();
-
-  // When there is a landing pad, spill code must go before the call instruction
-  // that can throw.
-  MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
-  while (I != B) {
-    --I;
-    if (I->getDesc().isCall())
-      return I;
-  }
-  // The block contains no calls that can throw, so use the first terminator.
-  return mbb->getFirstTerminator();
-}
-
 void LiveIntervals::addKillFlags() {
   for (iterator I = begin(), E = end(); I != E; ++I) {
     unsigned Reg = I->first;
@@ -924,8 +786,8 @@ void LiveIntervals::addKillFlags() {
     // Every instruction that kills Reg corresponds to a live range end point.
     for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
          ++RI) {
-      // A LOAD index indicates an MBB edge.
-      if (RI->end.isLoad())
+      // A block index indicates an MBB edge.
+      if (RI->end.isBlock())
         continue;
       MachineInstr *MI = getInstructionFromIndex(RI->end);
       if (!MI)
@@ -949,16 +811,10 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
     if (Reg == 0 || Reg == li.reg)
       continue;
 
-    if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
-        !allocatableRegs_[Reg])
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isAllocatable(Reg))
       continue;
-    // FIXME: For now, only remat MI with at most one register operand.
-    assert(!RegOp &&
-           "Can't rematerialize instruction with multiple register operand!");
     RegOp = MO.getReg();
-#ifndef NDEBUG
-    break;
-#endif
+    break; // Found vreg operand - leave the loop.
   }
   return RegOp;
 }
@@ -1011,14 +867,6 @@ LiveIntervals::isReMaterializable(const LiveInterval &li,
   return true;
 }
 
-/// isReMaterializable - Returns true if the definition MI of the specified
-/// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
-                                       const VNInfo *ValNo, MachineInstr *MI) {
-  bool Dummy2;
-  return isReMaterializable(li, ValNo, MI, 0, Dummy2);
-}
-
 /// isReMaterializable - Returns true if every definition of MI of every
 /// val# of the specified interval is re-materializable.
 bool
@@ -1044,1141 +892,653 @@ LiveIntervals::isReMaterializable(const LiveInterval &li,
   return true;
 }
 
-/// FilterFoldedOps - Filter out two-address use operands. Return
-/// true if it finds any issue with the operands that ought to prevent
-/// folding.
-static bool FilterFoldedOps(MachineInstr *MI,
-                            SmallVector<unsigned, 2> &Ops,
-                            unsigned &MRInfo,
-                            SmallVector<unsigned, 2> &FoldOps) {
-  MRInfo = 0;
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    unsigned OpIdx = Ops[i];
-    MachineOperand &MO = MI->getOperand(OpIdx);
-    // FIXME: fold subreg use.
-    if (MO.getSubReg())
-      return true;
-    if (MO.isDef())
-      MRInfo |= (unsigned)VirtRegMap::isMod;
-    else {
-      // Filter out two-address use operand(s).
-      if (MI->isRegTiedToDefOperand(OpIdx)) {
-        MRInfo = VirtRegMap::isModRef;
-        continue;
-      }
-      MRInfo |= (unsigned)VirtRegMap::isRef;
-    }
-    FoldOps.push_back(OpIdx);
-  }
-  return false;
+MachineBasicBlock*
+LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
+  // A local live range must be fully contained inside the block, meaning it is
+  // defined and killed at instructions, not at block boundaries. It is not
+  // live in or or out of any block.
+  //
+  // It is technically possible to have a PHI-defined live range identical to a
+  // single block, but we are going to return false in that case.
+
+  SlotIndex Start = LI.beginIndex();
+  if (Start.isBlock())
+    return NULL;
+
+  SlotIndex Stop = LI.endIndex();
+  if (Stop.isBlock())
+    return NULL;
+
+  // getMBBFromIndex doesn't need to search the MBB table when both indexes
+  // belong to proper instructions.
+  MachineBasicBlock *MBB1 = indexes_->getMBBFromIndex(Start);
+  MachineBasicBlock *MBB2 = indexes_->getMBBFromIndex(Stop);
+  return MBB1 == MBB2 ? MBB1 : NULL;
 }
 
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+  // Limit the loop depth ridiculousness.
+  if (loopDepth > 200)
+    loopDepth = 200;
 
-/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
-/// slot / to reg or any rematerialized load into ith operand of specified
-/// MI. If it is successul, MI is updated with the newly created MI and
-/// returns true.
-bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
-                                         VirtRegMap &vrm, MachineInstr *DefMI,
-                                         SlotIndex InstrIdx,
-                                         SmallVector<unsigned, 2> &Ops,
-                                         bool isSS, int Slot, unsigned Reg) {
-  // If it is an implicit def instruction, just delete it.
-  if (MI->isImplicitDef()) {
-    RemoveMachineInstrFromMaps(MI);
-    vrm.RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
-    ++numFolds;
-    return true;
-  }
+  // The loop depth is used to roughly estimate the number of times the
+  // instruction is executed. Something like 10^d is simple, but will quickly
+  // overflow a float. This expression behaves like 10^d for small d, but is
+  // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+  // headroom before overflow.
+  // By the way, powf() might be unavailable here. For consistency,
+  // We may take pow(double,double).
+  float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
 
-  // Filter the list of operand indexes that are to be folded. Abort if
-  // any operand will prevent folding.
-  unsigned MRInfo = 0;
-  SmallVector<unsigned, 2> FoldOps;
-  if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
-    return false;
+  return (isDef + isUse) * lc;
+}
 
-  // The only time it's safe to fold into a two address instruction is when
-  // it's folding reload and spill from / into a spill stack slot.
-  if (DefMI && (MRInfo & VirtRegMap::isMod))
-    return false;
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+                                                  MachineInstr* startInst) {
+  LiveInterval& Interval = getOrCreateInterval(reg);
+  VNInfo* VN = Interval.getNextValue(
+    SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+    getVNInfoAllocator());
+  VN->setHasPHIKill(true);
+  LiveRange LR(
+     SlotIndex(getInstructionIndex(startInst).getRegSlot()),
+     getMBBEndIdx(startInst->getParent()), VN);
+  Interval.addRange(LR);
 
-  MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot)
-                           : tii_->foldMemoryOperand(MI, FoldOps, DefMI);
-  if (fmi) {
-    // Remember this instruction uses the spill slot.
-    if (isSS) vrm.addSpillSlotUse(Slot, fmi);
-
-    // Attempt to fold the memory reference into the instruction. If
-    // we can do this, we don't need to insert spill code.
-    if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
-      vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
-    vrm.transferSpillPts(MI, fmi);
-    vrm.transferRestorePts(MI, fmi);
-    vrm.transferEmergencySpills(MI, fmi);
-    ReplaceMachineInstrInMaps(MI, fmi);
-    MI->eraseFromParent();
-    MI = fmi;
-    ++numFolds;
-    return true;
-  }
-  return false;
+  return LR;
 }
 
-/// canFoldMemoryOperand - Returns true if the specified load / store
-/// folding is possible.
-bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
-                                         SmallVector<unsigned, 2> &Ops,
-                                         bool ReMat) const {
-  // Filter the list of operand indexes that are to be folded. Abort if
-  // any operand will prevent folding.
-  unsigned MRInfo = 0;
-  SmallVector<unsigned, 2> FoldOps;
-  if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+
+//===----------------------------------------------------------------------===//
+//                          Register mask functions
+//===----------------------------------------------------------------------===//
+
+bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
+                                             BitVector &UsableRegs) {
+  if (LI.empty())
     return false;
+  LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end();
+
+  // Use a smaller arrays for local live ranges.
+  ArrayRef<SlotIndex> Slots;
+  ArrayRef<const uint32_t*> Bits;
+  if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) {
+    Slots = getRegMaskSlotsInBlock(MBB->getNumber());
+    Bits = getRegMaskBitsInBlock(MBB->getNumber());
+  } else {
+    Slots = getRegMaskSlots();
+    Bits = getRegMaskBits();
+  }
 
-  // It's only legal to remat for a use, not a def.
-  if (ReMat && (MRInfo & VirtRegMap::isMod))
+  // We are going to enumerate all the register mask slots contained in LI.
+  // Start with a binary search of RegMaskSlots to find a starting point.
+  ArrayRef<SlotIndex>::iterator SlotI =
+    std::lower_bound(Slots.begin(), Slots.end(), LiveI->start);
+  ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
+
+  // No slots in range, LI begins after the last call.
+  if (SlotI == SlotE)
     return false;
 
-  return tii_->canFoldMemoryOperand(MI, FoldOps);
+  bool Found = false;
+  for (;;) {
+    assert(*SlotI >= LiveI->start);
+    // Loop over all slots overlapping this segment.
+    while (*SlotI < LiveI->end) {
+      // *SlotI overlaps LI. Collect mask bits.
+      if (!Found) {
+        // This is the first overlap. Initialize UsableRegs to all ones.
+        UsableRegs.clear();
+        UsableRegs.resize(tri_->getNumRegs(), true);
+        Found = true;
+      }
+      // Remove usable registers clobbered by this mask.
+      UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]);
+      if (++SlotI == SlotE)
+        return Found;
+    }
+    // *SlotI is beyond the current LI segment.
+    LiveI = LI.advanceTo(LiveI, *SlotI);
+    if (LiveI == LiveE)
+      return Found;
+    // Advance SlotI until it overlaps.
+    while (*SlotI < LiveI->start)
+      if (++SlotI == SlotE)
+        return Found;
+  }
 }
 
-bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
-  LiveInterval::Ranges::const_iterator itr = li.ranges.begin();
+//===----------------------------------------------------------------------===//
+//                         IntervalUpdate class.
+//===----------------------------------------------------------------------===//
 
-  MachineBasicBlock *mbb =  indexes_->getMBBCoveringRange(itr->start, itr->end);
+// HMEditor is a toolkit used by handleMove to trim or extend live intervals.
+class LiveIntervals::HMEditor {
+private:
+  LiveIntervals& LIS;
+  const MachineRegisterInfo& MRI;
+  const TargetRegisterInfo& TRI;
+  SlotIndex NewIdx;
+
+  typedef std::pair<LiveInterval*, LiveRange*> IntRangePair;
+  typedef DenseSet<IntRangePair> RangeSet;
+
+  struct RegRanges {
+    LiveRange* Use;
+    LiveRange* EC;
+    LiveRange* Dead;
+    LiveRange* Def;
+    RegRanges() : Use(0), EC(0), Dead(0), Def(0) {}
+  };
+  typedef DenseMap<unsigned, RegRanges> BundleRanges;
+
+public:
+  HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
+           const TargetRegisterInfo& TRI, SlotIndex NewIdx)
+    : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {}
+
+  // Update intervals for all operands of MI from OldIdx to NewIdx.
+  // This assumes that MI used to be at OldIdx, and now resides at
+  // NewIdx.
+  void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) {
+    assert(NewIdx != OldIdx && "No-op move? That's a bit strange.");
+
+    // Collect the operands.
+    RangeSet Entering, Internal, Exiting;
+    bool hasRegMaskOp = false;
+    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
+
+    // To keep the LiveRanges valid within an interval, move the ranges closest
+    // to the destination first. This prevents ranges from overlapping, to that
+    // APIs like removeRange still work.
+    if (NewIdx < OldIdx) {
+      moveAllEnteringFrom(OldIdx, Entering);
+      moveAllInternalFrom(OldIdx, Internal);
+      moveAllExitingFrom(OldIdx, Exiting);
+    }
+    else {
+      moveAllExitingFrom(OldIdx, Exiting);
+      moveAllInternalFrom(OldIdx, Internal);
+      moveAllEnteringFrom(OldIdx, Entering);
+    }
 
-  if (mbb == 0)
-    return false;
+    if (hasRegMaskOp)
+      updateRegMaskSlots(OldIdx);
 
-  for (++itr; itr != li.ranges.end(); ++itr) {
-    MachineBasicBlock *mbb2 =
-      indexes_->getMBBCoveringRange(itr->start, itr->end);
+#ifndef NDEBUG
+    LIValidator validator;
+    std::for_each(Entering.begin(), Entering.end(), validator);
+    std::for_each(Internal.begin(), Internal.end(), validator);
+    std::for_each(Exiting.begin(), Exiting.end(), validator);
+    assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
+#endif
 
-    if (mbb2 != mbb)
-      return false;
   }
 
-  return true;
-}
+  // Update intervals for all operands of MI to refer to BundleStart's
+  // SlotIndex.
+  void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) {
+    if (MI == BundleStart)
+      return; // Bundling instr with itself - nothing to do.
+
+    SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI);
+    assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI &&
+           "SlotIndex <-> Instruction mapping broken for MI");
+
+    // Collect all ranges already in the bundle.
+    MachineBasicBlock::instr_iterator BII(BundleStart);
+    RangeSet Entering, Internal, Exiting;
+    bool hasRegMaskOp = false;
+    collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
+    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
+    for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) {
+      if (&*BII == MI)
+        continue;
+      collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
+      assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
+    }
 
-/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
-/// interval on to-be re-materialized operands of MI) with new register.
-void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
-                                       MachineInstr *MI, unsigned NewVReg,
-                                       VirtRegMap &vrm) {
-  // There is an implicit use. That means one of the other operand is
-  // being remat'ed and the remat'ed instruction has li.reg as an
-  // use operand. Make sure we rewrite that as well.
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isReg())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-    if (!vrm.isReMaterialized(Reg))
-      continue;
-    MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg);
-    MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg);
-    if (UseMO)
-      UseMO->setReg(NewVReg);
-  }
-}
+    BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
 
-/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions
-/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
-bool LiveIntervals::
-rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
-                 bool TrySplit, SlotIndex index, SlotIndex end,
-                 MachineInstr *MI,
-                 MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
-                 unsigned Slot, int LdSlot,
-                 bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
-                 VirtRegMap &vrm,
-                 const TargetRegisterClass* rc,
-                 SmallVector<int, 4> &ReMatIds,
-                 const MachineLoopInfo *loopInfo,
-                 unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
-                 DenseMap<unsigned,unsigned> &MBBVRegsMap,
-                 std::vector<LiveInterval*> &NewLIs) {
-  bool CanFold = false;
- RestartInstruction:
-  for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
-    MachineOperand& mop = MI->getOperand(i);
-    if (!mop.isReg())
-      continue;
-    unsigned Reg = mop.getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-    if (Reg != li.reg)
-      continue;
+    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
+    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
 
-    bool TryFold = !DefIsReMat;
-    bool FoldSS = true; // Default behavior unless it's a remat.
-    int FoldSlot = Slot;
-    if (DefIsReMat) {
-      // If this is the rematerializable definition MI itself and
-      // all of its uses are rematerialized, simply delete it.
-      if (MI == ReMatOrigDefMI && CanDelete) {
-        DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
-                     << *MI << '\n');
-        RemoveMachineInstrFromMaps(MI);
-        vrm.RemoveMachineInstrFromMaps(MI);
-        MI->eraseFromParent();
-        break;
-      }
+    DEBUG(dbgs() << "Entering: " << Entering.size() << "\n");
+    DEBUG(dbgs() << "Internal: " << Internal.size() << "\n");
+    DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n");
 
-      // If def for this use can't be rematerialized, then try folding.
-      // If def is rematerializable and it's a load, also try folding.
-      TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad));
-      if (isLoad) {
-        // Try fold loads (from stack slot, constant pool, etc.) into uses.
-        FoldSS = isLoadSS;
-        FoldSlot = LdSlot;
-      }
-    }
+    moveAllEnteringFromInto(OldIdx, Entering, BR);
+    moveAllInternalFromInto(OldIdx, Internal, BR);
+    moveAllExitingFromInto(OldIdx, Exiting, BR);
 
-    // Scan all of the operands of this instruction rewriting operands
-    // to use NewVReg instead of li.reg as appropriate.  We do this for
-    // two reasons:
-    //
-    //   1. If the instr reads the same spilled vreg multiple times, we
-    //      want to reuse the NewVReg.
-    //   2. If the instr is a two-addr instruction, we are required to
-    //      keep the src/dst regs pinned.
-    //
-    // Keep track of whether we replace a use and/or def so that we can
-    // create the spill interval with the appropriate range.
-    SmallVector<unsigned, 2> Ops;
-    tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
-
-    // Create a new virtual register for the spill interval.
-    // Create the new register now so we can map the fold instruction
-    // to the new register so when it is unfolded we get the correct
-    // answer.
-    bool CreatedNewVReg = false;
-    if (NewVReg == 0) {
-      NewVReg = mri_->createVirtualRegister(rc);
-      vrm.grow();
-      CreatedNewVReg = true;
-
-      // The new virtual register should get the same allocation hints as the
-      // old one.
-      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg);
-      if (Hint.first || Hint.second)
-        mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second);
-    }
 
-    if (!TryFold)
-      CanFold = false;
-    else {
-      // Do not fold load / store here if we are splitting. We'll find an
-      // optimal point to insert a load / store later.
-      if (!TrySplit) {
-        if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
-                                 Ops, FoldSS, FoldSlot, NewVReg)) {
-          // Folding the load/store can completely change the instruction in
-          // unpredictable ways, rescan it from the beginning.
-
-          if (FoldSS) {
-            // We need to give the new vreg the same stack slot as the
-            // spilled interval.
-            vrm.assignVirt2StackSlot(NewVReg, FoldSlot);
-          }
-
-          HasUse = false;
-          HasDef = false;
-          CanFold = false;
-          if (isNotInMIMap(MI))
-            break;
-          goto RestartInstruction;
-        }
-      } else {
-        // We'll try to fold it later if it's profitable.
-        CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat);
-      }
-    }
+#ifndef NDEBUG
+    LIValidator validator;
+    std::for_each(Entering.begin(), Entering.end(), validator);
+    std::for_each(Internal.begin(), Internal.end(), validator);
+    std::for_each(Exiting.begin(), Exiting.end(), validator);
+    assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
+#endif
+  }
 
-    mop.setReg(NewVReg);
-    if (mop.isImplicit())
-      rewriteImplicitOps(li, MI, NewVReg, vrm);
-
-    // Reuse NewVReg for other reads.
-    bool HasEarlyClobber = false;
-    for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
-      MachineOperand &mopj = MI->getOperand(Ops[j]);
-      mopj.setReg(NewVReg);
-      if (mopj.isImplicit())
-        rewriteImplicitOps(li, MI, NewVReg, vrm);
-      if (mopj.isEarlyClobber())
-        HasEarlyClobber = true;
-    }
+private:
 
-    if (CreatedNewVReg) {
-      if (DefIsReMat) {
-        vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
-        if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
-          // Each valnum may have its own remat id.
-          ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
-        } else {
-          vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]);
-        }
-        if (!CanDelete || (HasUse && HasDef)) {
-          // If this is a two-addr instruction then its use operands are
-          // rematerializable but its def is not. It should be assigned a
-          // stack slot.
-          vrm.assignVirt2StackSlot(NewVReg, Slot);
-        }
-      } else {
-        vrm.assignVirt2StackSlot(NewVReg, Slot);
+#ifndef NDEBUG
+  class LIValidator {
+  private:
+    DenseSet<const LiveInterval*> Checked, Bogus;
+  public:
+    void operator()(const IntRangePair& P) {
+      const LiveInterval* LI = P.first;
+      if (Checked.count(LI))
+        return;
+      Checked.insert(LI);
+      if (LI->empty())
+        return;
+      SlotIndex LastEnd = LI->begin()->start;
+      for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end();
+           LRI != LRE; ++LRI) {
+        const LiveRange& LR = *LRI;
+        if (LastEnd > LR.start || LR.start >= LR.end)
+          Bogus.insert(LI);
+        LastEnd = LR.end;
       }
-    } else if (HasUse && HasDef &&
-               vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) {
-      // If this interval hasn't been assigned a stack slot (because earlier
-      // def is a deleted remat def), do it now.
-      assert(Slot != VirtRegMap::NO_STACK_SLOT);
-      vrm.assignVirt2StackSlot(NewVReg, Slot);
     }
 
-    // Re-matting an instruction with virtual register use. Add the
-    // register as an implicit use on the use MI.
-    if (DefIsReMat && ImpUse)
-      MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
-
-    // Create a new register interval for this spill / remat.
-    LiveInterval &nI = getOrCreateInterval(NewVReg);
-    if (CreatedNewVReg) {
-      NewLIs.push_back(&nI);
-      MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg));
-      if (TrySplit)
-        vrm.setIsSplitFromReg(NewVReg, li.reg);
+    bool rangesOk() const {
+      return Bogus.empty();
     }
+  };
+#endif
 
-    if (HasUse) {
-      if (CreatedNewVReg) {
-        LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
-                     nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
-        DEBUG(dbgs() << " +" << LR);
-        nI.addRange(LR);
-      } else {
-        // Extend the split live interval to this def / use.
-        SlotIndex End = index.getDefIndex();
-        LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
-                     nI.getValNumInfo(nI.getNumValNums()-1));
-        DEBUG(dbgs() << " +" << LR);
-        nI.addRange(LR);
+  // Collect IntRangePairs for all operands of MI that may need fixing.
+  // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes'
+  // maps).
+  void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal,
+                     RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) {
+    hasRegMaskOp = false;
+    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+                                    MOE = MI->operands_end();
+         MOI != MOE; ++MOI) {
+      const MachineOperand& MO = *MOI;
+
+      if (MO.isRegMask()) {
+        hasRegMaskOp = true;
+        continue;
       }
-    }
-    if (HasDef) {
-      // An early clobber starts at the use slot, except for an early clobber
-      // tied to a use operand (yes, that is a thing).
-      LiveRange LR(HasEarlyClobber && !HasUse ?
-                   index.getUseIndex() : index.getDefIndex(),
-                   index.getStoreIndex(),
-                   nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
-      DEBUG(dbgs() << " +" << LR);
-      nI.addRange(LR);
-    }
 
-    DEBUG({
-        dbgs() << "\t\t\t\tAdded new interval: ";
-        nI.print(dbgs(), tri_);
-        dbgs() << '\n';
-      });
-  }
-  return CanFold;
-}
-bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
-                                   const VNInfo *VNI,
-                                   MachineBasicBlock *MBB,
-                                   SlotIndex Idx) const {
-  return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB));
-}
+      if (!MO.isReg() || MO.getReg() == 0)
+        continue;
 
-/// RewriteInfo - Keep track of machine instrs that will be rewritten
-/// during spilling.
-namespace {
-  struct RewriteInfo {
-    SlotIndex Index;
-    MachineInstr *MI;
-    RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {}
-  };
+      unsigned Reg = MO.getReg();
 
-  struct RewriteInfoCompare {
-    bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const {
-      return LHS.Index < RHS.Index;
-    }
-  };
-}
+      // TODO: Currently we're skipping uses that are reserved or have no
+      // interval, but we're not updating their kills. This should be
+      // fixed.
+      if (!LIS.hasInterval(Reg) ||
+          (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
+        continue;
 
-void LiveIntervals::
-rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
-                    LiveInterval::Ranges::const_iterator &I,
-                    MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
-                    unsigned Slot, int LdSlot,
-                    bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
-                    VirtRegMap &vrm,
-                    const TargetRegisterClass* rc,
-                    SmallVector<int, 4> &ReMatIds,
-                    const MachineLoopInfo *loopInfo,
-                    BitVector &SpillMBBs,
-                    DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes,
-                    BitVector &RestoreMBBs,
-                    DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes,
-                    DenseMap<unsigned,unsigned> &MBBVRegsMap,
-                    std::vector<LiveInterval*> &NewLIs) {
-  bool AllCanFold = true;
-  unsigned NewVReg = 0;
-  SlotIndex start = I->start.getBaseIndex();
-  SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
-
-  // First collect all the def / use in this live range that will be rewritten.
-  // Make sure they are sorted according to instruction index.
-  std::vector<RewriteInfo> RewriteMIs;
-  for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
-         re = mri_->reg_end(); ri != re; ) {
-    MachineInstr *MI = &*ri;
-    MachineOperand &O = ri.getOperand();
-    ++ri;
-    if (MI->isDebugValue()) {
-      // Modify DBG_VALUE now that the value is in a spill slot.
-      if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) {
-        uint64_t Offset = MI->getOperand(1).getImm();
-        const MDNode *MDPtr = MI->getOperand(2).getMetadata();
-        DebugLoc DL = MI->getDebugLoc();
-        int FI = isLoadSS ? LdSlot : (int)Slot;
-        if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI,
-                                                           Offset, MDPtr, DL)) {
-          DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
-          ReplaceMachineInstrInMaps(MI, NewDV);
-          MachineBasicBlock *MBB = MI->getParent();
-          MBB->insert(MBB->erase(MI), NewDV);
-          continue;
+      LiveInterval* LI = &LIS.getInterval(Reg);
+
+      if (MO.readsReg()) {
+        LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
+        if (LR != 0)
+          Entering.insert(std::make_pair(LI, LR));
+      }
+      if (MO.isDef()) {
+        if (MO.isEarlyClobber()) {
+          LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot(true));
+          assert(LR != 0 && "No EC range?");
+          if (LR->end > OldIdx.getDeadSlot())
+            Exiting.insert(std::make_pair(LI, LR));
+          else
+            Internal.insert(std::make_pair(LI, LR));
+        } else if (MO.isDead()) {
+          LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
+          assert(LR != 0 && "No dead-def range?");
+          Internal.insert(std::make_pair(LI, LR));
+        } else {
+          LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getDeadSlot());
+          assert(LR && LR->end > OldIdx.getDeadSlot() &&
+                 "Non-dead-def should have live range exiting.");
+          Exiting.insert(std::make_pair(LI, LR));
         }
       }
-
-      DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
-      RemoveMachineInstrFromMaps(MI);
-      vrm.RemoveMachineInstrFromMaps(MI);
-      MI->eraseFromParent();
-      continue;
     }
-    assert(!(O.isImplicit() && O.isUse()) &&
-           "Spilling register that's used as implicit use?");
-    SlotIndex index = getInstructionIndex(MI);
-    if (index < start || index >= end)
-      continue;
-
-    if (O.isUndef())
-      // Must be defined by an implicit def. It should not be spilled. Note,
-      // this is for correctness reason. e.g.
-      // 8   %reg1024<def> = IMPLICIT_DEF
-      // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-      // The live range [12, 14) are not part of the r1024 live interval since
-      // it's defined by an implicit def. It will not conflicts with live
-      // interval of r1025. Now suppose both registers are spilled, you can
-      // easily see a situation where both registers are reloaded before
-      // the INSERT_SUBREG and both target registers that would overlap.
-      continue;
-    RewriteMIs.push_back(RewriteInfo(index, MI));
   }
-  std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
-
-  unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0;
-  // Now rewrite the defs and uses.
-  for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
-    RewriteInfo &rwi = RewriteMIs[i];
-    ++i;
-    SlotIndex index = rwi.Index;
-    MachineInstr *MI = rwi.MI;
-    // If MI def and/or use the same register multiple times, then there
-    // are multiple entries.
-    while (i != e && RewriteMIs[i].MI == MI) {
-      assert(RewriteMIs[i].Index == index);
-      ++i;
-    }
-    MachineBasicBlock *MBB = MI->getParent();
 
-    if (ImpUse && MI != ReMatDefMI) {
-      // Re-matting an instruction with virtual register use. Prevent interval
-      // from being spilled.
-      getInterval(ImpUse).markNotSpillable();
-    }
+  // Collect IntRangePairs for all operands of MI that may need fixing.
+  void collectRangesInBundle(MachineInstr* MI, RangeSet& Entering,
+                             RangeSet& Exiting, SlotIndex MIStartIdx,
+                             SlotIndex MIEndIdx) {
+    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+                                    MOE = MI->operands_end();
+         MOI != MOE; ++MOI) {
+      const MachineOperand& MO = *MOI;
+      assert(!MO.isRegMask() && "Can't have RegMasks in bundles.");
+      if (!MO.isReg() || MO.getReg() == 0)
+        continue;
 
-    unsigned MBBId = MBB->getNumber();
-    unsigned ThisVReg = 0;
-    if (TrySplit) {
-      DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId);
-      if (NVI != MBBVRegsMap.end()) {
-        ThisVReg = NVI->second;
-        // One common case:
-        // x = use
-        // ...
-        // ...
-        // def = ...
-        //     = use
-        // It's better to start a new interval to avoid artificially
-        // extend the new interval.
-        if (MI->readsWritesVirtualRegister(li.reg) ==
-            std::make_pair(false,true)) {
-          MBBVRegsMap.erase(MBB->getNumber());
-          ThisVReg = 0;
-        }
-      }
-    }
+      unsigned Reg = MO.getReg();
+
+      // TODO: Currently we're skipping uses that are reserved or have no
+      // interval, but we're not updating their kills. This should be
+      // fixed.
+      if (!LIS.hasInterval(Reg) ||
+          (TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg)))
+        continue;
 
-    bool IsNew = ThisVReg == 0;
-    if (IsNew) {
-      // This ends the previous live interval. If all of its def / use
-      // can be folded, give it a low spill weight.
-      if (NewVReg && TrySplit && AllCanFold) {
-        LiveInterval &nI = getOrCreateInterval(NewVReg);
-        nI.weight /= 10.0F;
+      LiveInterval* LI = &LIS.getInterval(Reg);
+
+      if (MO.readsReg()) {
+        LiveRange* LR = LI->getLiveRangeContaining(MIStartIdx);
+        if (LR != 0)
+          Entering.insert(std::make_pair(LI, LR));
+      }
+      if (MO.isDef()) {
+        assert(!MO.isEarlyClobber() && "Early clobbers not allowed in bundles.");
+        assert(!MO.isDead() && "Dead-defs not allowed in bundles.");
+        LiveRange* LR = LI->getLiveRangeContaining(MIEndIdx.getDeadSlot());
+        assert(LR != 0 && "Internal ranges not allowed in bundles.");
+        Exiting.insert(std::make_pair(LI, LR));
       }
-      AllCanFold = true;
     }
-    NewVReg = ThisVReg;
-
-    bool HasDef = false;
-    bool HasUse = false;
-    bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit,
-                         index, end, MI, ReMatOrigDefMI, ReMatDefMI,
-                         Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
-                         CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg,
-                         ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs);
-    if (!HasDef && !HasUse)
-      continue;
+  }
 
-    AllCanFold &= CanFold;
+  BundleRanges createBundleRanges(RangeSet& Entering, RangeSet& Internal, RangeSet& Exiting) {
+    BundleRanges BR;
 
-    // Update weight of spill interval.
-    LiveInterval &nI = getOrCreateInterval(NewVReg);
-    if (!TrySplit) {
-      // The spill weight is now infinity as it cannot be spilled again.
-      nI.markNotSpillable();
-      continue;
+    for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+         EI != EE; ++EI) {
+      LiveInterval* LI = EI->first;
+      LiveRange* LR = EI->second;
+      BR[LI->reg].Use = LR;
     }
 
-    // Keep track of the last def and first use in each MBB.
-    if (HasDef) {
-      if (MI != ReMatOrigDefMI || !CanDelete) {
-        bool HasKill = false;
-        if (!HasUse)
-          HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex());
-        else {
-          // If this is a two-address code, then this index starts a new VNInfo.
-          const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex());
-          if (VNI)
-            HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex());
-        }
-        DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
-          SpillIdxes.find(MBBId);
-        if (!HasKill) {
-          if (SII == SpillIdxes.end()) {
-            std::vector<SRInfo> S;
-            S.push_back(SRInfo(index, NewVReg, true));
-            SpillIdxes.insert(std::make_pair(MBBId, S));
-          } else if (SII->second.back().vreg != NewVReg) {
-            SII->second.push_back(SRInfo(index, NewVReg, true));
-          } else if (index > SII->second.back().index) {
-            // If there is an earlier def and this is a two-address
-            // instruction, then it's not possible to fold the store (which
-            // would also fold the load).
-            SRInfo &Info = SII->second.back();
-            Info.index = index;
-            Info.canFold = !HasUse;
-          }
-          SpillMBBs.set(MBBId);
-        } else if (SII != SpillIdxes.end() &&
-                   SII->second.back().vreg == NewVReg &&
-                   index > SII->second.back().index) {
-          // There is an earlier def that's not killed (must be two-address).
-          // The spill is no longer needed.
-          SII->second.pop_back();
-          if (SII->second.empty()) {
-            SpillIdxes.erase(MBBId);
-            SpillMBBs.reset(MBBId);
-          }
-        }
+    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+         II != IE; ++II) {
+      LiveInterval* LI = II->first;
+      LiveRange* LR = II->second;
+      if (LR->end.isDead()) {
+        BR[LI->reg].Dead = LR;
+      } else {
+        BR[LI->reg].EC = LR;
       }
     }
 
-    if (HasUse) {
-      DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
-        SpillIdxes.find(MBBId);
-      if (SII != SpillIdxes.end() &&
-          SII->second.back().vreg == NewVReg &&
-          index > SII->second.back().index)
-        // Use(s) following the last def, it's not safe to fold the spill.
-        SII->second.back().canFold = false;
-      DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
-        RestoreIdxes.find(MBBId);
-      if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg)
-        // If we are splitting live intervals, only fold if it's the first
-        // use and there isn't another use later in the MBB.
-        RII->second.back().canFold = false;
-      else if (IsNew) {
-        // Only need a reload if there isn't an earlier def / use.
-        if (RII == RestoreIdxes.end()) {
-          std::vector<SRInfo> Infos;
-          Infos.push_back(SRInfo(index, NewVReg, true));
-          RestoreIdxes.insert(std::make_pair(MBBId, Infos));
-        } else {
-          RII->second.push_back(SRInfo(index, NewVReg, true));
-        }
-        RestoreMBBs.set(MBBId);
-      }
+    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+         EI != EE; ++EI) {
+      LiveInterval* LI = EI->first;
+      LiveRange* LR = EI->second;
+      BR[LI->reg].Def = LR;
     }
 
-    // Update spill weight.
-    unsigned loopDepth = loopInfo->getLoopDepth(MBB);
-    nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
+    return BR;
   }
 
-  if (NewVReg && TrySplit && AllCanFold) {
-    // If all of its def / use can be folded, give it a low spill weight.
-    LiveInterval &nI = getOrCreateInterval(NewVReg);
-    nI.weight /= 10.0F;
+  void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) {
+    MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx);
+    if (!OldKillMI->killsRegister(reg))
+      return; // Bail out if we don't have kill flags on the old register.
+    MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
+    assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
+    assert(!NewKillMI->killsRegister(reg) && "New kill instr is already a kill.");
+    OldKillMI->clearRegisterKills(reg, &TRI);
+    NewKillMI->addRegisterKilled(reg, &TRI);
   }
-}
 
-bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index,
-                        unsigned vr, BitVector &RestoreMBBs,
-                        DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
-  if (!RestoreMBBs[Id])
-    return false;
-  std::vector<SRInfo> &Restores = RestoreIdxes[Id];
-  for (unsigned i = 0, e = Restores.size(); i != e; ++i)
-    if (Restores[i].index == index &&
-        Restores[i].vreg == vr &&
-        Restores[i].canFold)
-      return true;
-  return false;
-}
-
-void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index,
-                        unsigned vr, BitVector &RestoreMBBs,
-                        DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
-  if (!RestoreMBBs[Id])
-    return;
-  std::vector<SRInfo> &Restores = RestoreIdxes[Id];
-  for (unsigned i = 0, e = Restores.size(); i != e; ++i)
-    if (Restores[i].index == index && Restores[i].vreg)
-      Restores[i].index = SlotIndex();
-}
+  void updateRegMaskSlots(SlotIndex OldIdx) {
+    SmallVectorImpl<SlotIndex>::iterator RI =
+      std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
+                       OldIdx);
+    assert(*RI == OldIdx && "No RegMask at OldIdx.");
+    *RI = NewIdx;
+    assert(*prior(RI) < *RI && *RI < *next(RI) &&
+           "RegSlots out of order. Did you move one call across another?");
+  }
 
-/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
-/// spilled and create empty intervals for their uses.
-void
-LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
-                                    const TargetRegisterClass* rc,
-                                    std::vector<LiveInterval*> &NewLIs) {
-  for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
-         re = mri_->reg_end(); ri != re; ) {
-    MachineOperand &O = ri.getOperand();
-    MachineInstr *MI = &*ri;
-    ++ri;
-    if (MI->isDebugValue()) {
-      // Remove debug info for now.
-      O.setReg(0U);
-      DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
-      continue;
-    }
-    if (O.isDef()) {
-      assert(MI->isImplicitDef() &&
-             "Register def was not rewritten?");
-      RemoveMachineInstrFromMaps(MI);
-      vrm.RemoveMachineInstrFromMaps(MI);
-      MI->eraseFromParent();
-    } else {
-      // This must be an use of an implicit_def so it's not part of the live
-      // interval. Create a new empty live interval for it.
-      // FIXME: Can we simply erase some of the instructions? e.g. Stores?
-      unsigned NewVReg = mri_->createVirtualRegister(rc);
-      vrm.grow();
-      vrm.setIsImplicitlyDefined(NewVReg);
-      NewLIs.push_back(&getOrCreateInterval(NewVReg));
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI->getOperand(i);
-        if (MO.isReg() && MO.getReg() == li.reg) {
-          MO.setReg(NewVReg);
-          MO.setIsUndef();
-        }
-      }
+  // Return the last use of reg between NewIdx and OldIdx.
+  SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) {
+    SlotIndex LastUse = NewIdx;
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI.use_nodbg_begin(Reg),
+           UE = MRI.use_nodbg_end();
+         UI != UE; UI.skipInstruction()) {
+      const MachineInstr* MI = &*UI;
+      SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI);
+      if (InstSlot > LastUse && InstSlot < OldIdx)
+        LastUse = InstSlot;
     }
+    return LastUse;
   }
-}
-
-float
-LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
-  // Limit the loop depth ridiculousness.
-  if (loopDepth > 200)
-    loopDepth = 200;
-
-  // The loop depth is used to roughly estimate the number of times the
-  // instruction is executed. Something like 10^d is simple, but will quickly
-  // overflow a float. This expression behaves like 10^d for small d, but is
-  // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
-  // headroom before overflow.
-  // By the way, powf() might be unavailable here. For consistency,
-  // We may take pow(double,double).
-  float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth);
-
-  return (isDef + isUse) * lc;
-}
 
-static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
-  for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
-    NewLIs[i]->weight =
-      normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
-}
+  void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+    bool LiveThrough = LR->end > OldIdx.getRegSlot();
+    if (LiveThrough)
+      return;
+    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
+    if (LastUse != NewIdx)
+      moveKillFlags(LI->reg, NewIdx, LastUse);
+    LR->end = LastUse.getRegSlot();
+  }
 
-std::vector<LiveInterval*> LiveIntervals::
-addIntervalsForSpills(const LiveInterval &li,
-                      const SmallVectorImpl<LiveInterval*> *SpillIs,
-                      const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
-  assert(li.isSpillable() && "attempt to spill already spilled interval!");
-
-  DEBUG({
-      dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
-      li.print(dbgs(), tri_);
-      dbgs() << '\n';
-    });
-
-  // Each bit specify whether a spill is required in the MBB.
-  BitVector SpillMBBs(mf_->getNumBlockIDs());
-  DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes;
-  BitVector RestoreMBBs(mf_->getNumBlockIDs());
-  DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes;
-  DenseMap<unsigned,unsigned> MBBVRegsMap;
-  std::vector<LiveInterval*> NewLIs;
-  const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
-
-  unsigned NumValNums = li.getNumValNums();
-  SmallVector<MachineInstr*, 4> ReMatDefs;
-  ReMatDefs.resize(NumValNums, NULL);
-  SmallVector<MachineInstr*, 4> ReMatOrigDefs;
-  ReMatOrigDefs.resize(NumValNums, NULL);
-  SmallVector<int, 4> ReMatIds;
-  ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT);
-  BitVector ReMatDelete(NumValNums);
-  unsigned Slot = VirtRegMap::MAX_STACK_SLOT;
-
-  // Spilling a split live interval. It cannot be split any further. Also,
-  // it's also guaranteed to be a single val# / range interval.
-  if (vrm.getPreSplitReg(li.reg)) {
-    vrm.setIsSplitFromReg(li.reg, 0);
-    // Unset the split kill marker on the last use.
-    SlotIndex KillIdx = vrm.getKillPoint(li.reg);
-    if (KillIdx != SlotIndex()) {
-      MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
-      assert(KillMI && "Last use disappeared?");
-      int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
-      assert(KillOp != -1 && "Last use disappeared?");
-      KillMI->getOperand(KillOp).setIsKill(false);
-    }
-    vrm.removeKillPoint(li.reg);
-    bool DefIsReMat = vrm.isReMaterialized(li.reg);
-    Slot = vrm.getStackSlot(li.reg);
-    assert(Slot != VirtRegMap::MAX_STACK_SLOT);
-    MachineInstr *ReMatDefMI = DefIsReMat ?
-      vrm.getReMaterializedMI(li.reg) : NULL;
-    int LdSlot = 0;
-    bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
-    bool isLoad = isLoadSS ||
-      (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad()));
-    bool IsFirstRange = true;
-    for (LiveInterval::Ranges::const_iterator
-           I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-      // If this is a split live interval with multiple ranges, it means there
-      // are two-address instructions that re-defined the value. Only the
-      // first def can be rematerialized!
-      if (IsFirstRange) {
-        // Note ReMatOrigDefMI has already been deleted.
-        rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI,
-                             Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
-                             false, vrm, rc, ReMatIds, loopInfo,
-                             SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
-                             MBBVRegsMap, NewLIs);
-      } else {
-        rewriteInstructionsForSpills(li, false, I, NULL, 0,
-                             Slot, 0, false, false, false,
-                             false, vrm, rc, ReMatIds, loopInfo,
-                             SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
-                             MBBVRegsMap, NewLIs);
+  void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+    // Extend the LiveRange if NewIdx is past the end.
+    if (NewIdx > LR->end) {
+      // Move kill flags if OldIdx was not originally the end
+      // (otherwise LR->end points to an invalid slot).
+      if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
+        assert(LR->end > OldIdx && "LiveRange does not cover original slot");
+        moveKillFlags(LI->reg, LR->end, NewIdx);
       }
-      IsFirstRange = false;
+      LR->end = NewIdx.getRegSlot();
     }
-
-    handleSpilledImpDefs(li, vrm, rc, NewLIs);
-    normalizeSpillWeights(NewLIs);
-    return NewLIs;
   }
 
-  bool TrySplit = !intervalIsInOneMBB(li);
-  if (TrySplit)
-    ++numSplits;
-  bool NeedStackSlot = false;
-  for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
-       i != e; ++i) {
-    const VNInfo *VNI = *i;
-    unsigned VN = VNI->id;
-    if (VNI->isUnused())
-      continue; // Dead val#.
-    // Is the def for the val# rematerializable?
-    MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
-    bool dummy;
-    if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
-      // Remember how to remat the def of this val#.
-      ReMatOrigDefs[VN] = ReMatDefMI;
-      // Original def may be modified so we have to make a copy here.
-      MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
-      CloneMIs.push_back(Clone);
-      ReMatDefs[VN] = Clone;
-
-      bool CanDelete = true;
-      if (VNI->hasPHIKill()) {
-        // A kill is a phi node, not all of its uses can be rematerialized.
-        // It must not be deleted.
-        CanDelete = false;
-        // Need a stack slot if there is any live range where uses cannot be
-        // rematerialized.
-        NeedStackSlot = true;
-      }
-      if (CanDelete)
-        ReMatDelete.set(VN);
+  void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) {
+    bool GoingUp = NewIdx < OldIdx;
+
+    if (GoingUp) {
+      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+           EI != EE; ++EI)
+        moveEnteringUpFrom(OldIdx, *EI);
     } else {
-      // Need a stack slot if there is any live range where uses cannot be
-      // rematerialized.
-      NeedStackSlot = true;
+      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+           EI != EE; ++EI)
+        moveEnteringDownFrom(OldIdx, *EI);
     }
   }
 
-  // One stack slot per live interval.
-  if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
-    if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
-      Slot = vrm.assignVirt2StackSlot(li.reg);
-
-    // This case only occurs when the prealloc splitter has already assigned
-    // a stack slot to this vreg.
-    else
-      Slot = vrm.getStackSlot(li.reg);
+  void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
+           LR->end <= OldIdx.getDeadSlot() &&
+           "Range should be internal to OldIdx.");
+    LiveRange Tmp(*LR);
+    Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber());
+    Tmp.valno->def = Tmp.start;
+    Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot();
+    LI->removeRange(*LR);
+    LI->addRange(Tmp);
   }
 
-  // Create new intervals and rewrite defs and uses.
-  for (LiveInterval::Ranges::const_iterator
-         I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id];
-    MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id];
-    bool DefIsReMat = ReMatDefMI != NULL;
-    bool CanDelete = ReMatDelete[I->valno->id];
-    int LdSlot = 0;
-    bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
-    bool isLoad = isLoadSS ||
-      (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad());
-    rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI,
-                               Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
-                               CanDelete, vrm, rc, ReMatIds, loopInfo,
-                               SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
-                               MBBVRegsMap, NewLIs);
+  void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) {
+    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+         II != IE; ++II)
+      moveInternalFrom(OldIdx, *II);
   }
 
-  // Insert spills / restores if we are splitting.
-  if (!TrySplit) {
-    handleSpilledImpDefs(li, vrm, rc, NewLIs);
-    normalizeSpillWeights(NewLIs);
-    return NewLIs;
+  void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) {
+    LiveRange* LR = P.second;
+    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
+           "Range should start in OldIdx.");
+    assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx.");
+    SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber());
+    LR->start = NewStart;
+    LR->valno->def = NewStart;
   }
 
-  SmallPtrSet<LiveInterval*, 4> AddedKill;
-  SmallVector<unsigned, 2> Ops;
-  if (NeedStackSlot) {
-    int Id = SpillMBBs.find_first();
-    while (Id != -1) {
-      std::vector<SRInfo> &spills = SpillIdxes[Id];
-      for (unsigned i = 0, e = spills.size(); i != e; ++i) {
-        SlotIndex index = spills[i].index;
-        unsigned VReg = spills[i].vreg;
-        LiveInterval &nI = getOrCreateInterval(VReg);
-        bool isReMat = vrm.isReMaterialized(VReg);
-        MachineInstr *MI = getInstructionFromIndex(index);
-        bool CanFold = false;
-        bool FoundUse = false;
-        Ops.clear();
-        if (spills[i].canFold) {
-          CanFold = true;
-          for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
-            MachineOperand &MO = MI->getOperand(j);
-            if (!MO.isReg() || MO.getReg() != VReg)
-              continue;
-
-            Ops.push_back(j);
-            if (MO.isDef())
-              continue;
-            if (isReMat ||
-                (!FoundUse && !alsoFoldARestore(Id, index, VReg,
-                                                RestoreMBBs, RestoreIdxes))) {
-              // MI has two-address uses of the same register. If the use
-              // isn't the first and only use in the BB, then we can't fold
-              // it. FIXME: Move this to rewriteInstructionsForSpills.
-              CanFold = false;
-              break;
-            }
-            FoundUse = true;
-          }
-        }
-        // Fold the store into the def if possible.
-        bool Folded = false;
-        if (CanFold && !Ops.empty()) {
-          if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){
-            Folded = true;
-            if (FoundUse) {
-              // Also folded uses, do not issue a load.
-              eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
-              nI.removeRange(index.getLoadIndex(), index.getDefIndex());
-            }
-            nI.removeRange(index.getDefIndex(), index.getStoreIndex());
-          }
-        }
-
-        // Otherwise tell the spiller to issue a spill.
-        if (!Folded) {
-          LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
-          bool isKill = LR->end == index.getStoreIndex();
-          if (!MI->registerDefIsDead(nI.reg))
-            // No need to spill a dead def.
-            vrm.addSpillPoint(VReg, isKill, MI);
-          if (isKill)
-            AddedKill.insert(&nI);
-        }
-      }
-      Id = SpillMBBs.find_next(Id);
-    }
+  void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) {
+    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+         EI != EE; ++EI)
+      moveExitingFrom(OldIdx, *EI);
   }
 
-  int Id = RestoreMBBs.find_first();
-  while (Id != -1) {
-    std::vector<SRInfo> &restores = RestoreIdxes[Id];
-    for (unsigned i = 0, e = restores.size(); i != e; ++i) {
-      SlotIndex index = restores[i].index;
-      if (index == SlotIndex())
-        continue;
-      unsigned VReg = restores[i].vreg;
-      LiveInterval &nI = getOrCreateInterval(VReg);
-      bool isReMat = vrm.isReMaterialized(VReg);
-      MachineInstr *MI = getInstructionFromIndex(index);
-      bool CanFold = false;
-      Ops.clear();
-      if (restores[i].canFold) {
-        CanFold = true;
-        for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
-          MachineOperand &MO = MI->getOperand(j);
-          if (!MO.isReg() || MO.getReg() != VReg)
-            continue;
-
-          if (MO.isDef()) {
-            // If this restore were to be folded, it would have been folded
-            // already.
-            CanFold = false;
-            break;
-          }
-          Ops.push_back(j);
-        }
-      }
+  void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P,
+                              BundleRanges& BR) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+    bool LiveThrough = LR->end > OldIdx.getRegSlot();
+    if (LiveThrough) {
+      assert((LR->start < NewIdx || BR[LI->reg].Def == LR) &&
+             "Def in bundle should be def range.");
+      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
+             "If bundle has use for this reg it should be LR.");
+      BR[LI->reg].Use = LR;
+      return;
+    }
 
-      // Fold the load into the use if possible.
-      bool Folded = false;
-      if (CanFold && !Ops.empty()) {
-        if (!isReMat)
-          Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg);
-        else {
-          MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
-          int LdSlot = 0;
-          bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
-          // If the rematerializable def is a load, also try to fold it.
-          if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad())
-            Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
-                                          Ops, isLoadSS, LdSlot, VReg);
-          if (!Folded) {
-            unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
-            if (ImpUse) {
-              // Re-matting an instruction with virtual register use. Add the
-              // register as an implicit use on the use MI and mark the register
-              // interval as unspillable.
-              LiveInterval &ImpLi = getInterval(ImpUse);
-              ImpLi.markNotSpillable();
-              MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
-            }
-          }
-        }
-      }
-      // If folding is not possible / failed, then tell the spiller to issue a
-      // load / rematerialization for us.
-      if (Folded)
-        nI.removeRange(index.getLoadIndex(), index.getDefIndex());
-      else
-        vrm.addRestorePoint(VReg, MI);
+    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
+    moveKillFlags(LI->reg, OldIdx, LastUse);
+
+    if (LR->start < NewIdx) {
+      // Becoming a new entering range.
+      assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 &&
+             "Bundle shouldn't be re-defining reg mid-range.");
+      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
+             "Bundle shouldn't have different use range for same reg.");
+      LR->end = LastUse.getRegSlot();
+      BR[LI->reg].Use = LR;
+    } else {
+      // Becoming a new Dead-def.
+      assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) &&
+             "Live range starting at unexpected slot.");
+      assert(BR[LI->reg].Def == LR && "Reg should have def range.");
+      assert(BR[LI->reg].Dead == 0 &&
+               "Can't have def and dead def of same reg in a bundle.");
+      LR->end = LastUse.getDeadSlot();
+      BR[LI->reg].Dead = BR[LI->reg].Def;
+      BR[LI->reg].Def = 0;
     }
-    Id = RestoreMBBs.find_next(Id);
   }
 
-  // Finalize intervals: add kills, finalize spill weights, and filter out
-  // dead intervals.
-  std::vector<LiveInterval*> RetNewLIs;
-  for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
-    LiveInterval *LI = NewLIs[i];
-    if (!LI->empty()) {
-      if (!AddedKill.count(LI)) {
-        LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
-        SlotIndex LastUseIdx = LR->end.getBaseIndex();
-        MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
-        int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
-        assert(UseIdx != -1);
-        if (!LastUse->isRegTiedToDefOperand(UseIdx)) {
-          LastUse->getOperand(UseIdx).setIsKill();
-          vrm.addKillPoint(LI->reg, LastUseIdx);
-        }
-      }
-      RetNewLIs.push_back(LI);
+  void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P,
+                                BundleRanges& BR) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+    if (NewIdx > LR->end) {
+      // Range extended to bundle. Add to bundle uses.
+      // Note: Currently adds kill flags to bundle start.
+      assert(BR[LI->reg].Use == 0 &&
+             "Bundle already has use range for reg.");
+      moveKillFlags(LI->reg, LR->end, NewIdx);
+      LR->end = NewIdx.getRegSlot();
+      BR[LI->reg].Use = LR;
+    } else {
+      assert(BR[LI->reg].Use != 0 &&
+             "Bundle should already have a use range for reg.");
     }
   }
 
-  handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
-  normalizeSpillWeights(RetNewLIs);
-  return RetNewLIs;
-}
-
-/// hasAllocatableSuperReg - Return true if the specified physical register has
-/// any super register that's allocatable.
-bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
-  for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS)
-    if (allocatableRegs_[*AS] && hasInterval(*AS))
-      return true;
-  return false;
-}
+  void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering,
+                               BundleRanges& BR) {
+    bool GoingUp = NewIdx < OldIdx;
 
-/// getRepresentativeReg - Find the largest super register of the specified
-/// physical register.
-unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
-  // Find the largest super-register that is allocatable.
-  unsigned BestReg = Reg;
-  for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
-    unsigned SuperReg = *AS;
-    if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) {
-      BestReg = SuperReg;
-      break;
+    if (GoingUp) {
+      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+           EI != EE; ++EI)
+        moveEnteringUpFromInto(OldIdx, *EI, BR);
+    } else {
+      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
+           EI != EE; ++EI)
+        moveEnteringDownFromInto(OldIdx, *EI, BR);
     }
   }
-  return BestReg;
-}
 
-/// getNumConflictsWithPhysReg - Return the number of uses and defs of the
-/// specified interval that conflicts with the specified physical register.
-unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
-                                                   unsigned PhysReg) const {
-  unsigned NumConflicts = 0;
-  const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg));
-  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
-         E = mri_->reg_end(); I != E; ++I) {
-    MachineOperand &O = I.getOperand();
-    MachineInstr *MI = O.getParent();
-    if (MI->isDebugValue())
-      continue;
-    SlotIndex Index = getInstructionIndex(MI);
-    if (pli.liveAt(Index))
-      ++NumConflicts;
+  void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P,
+                            BundleRanges& BR) {
+    // TODO: Sane rules for moving ranges into bundles.
   }
-  return NumConflicts;
-}
 
-/// spillPhysRegAroundRegDefsUses - Spill the specified physical register
-/// around all defs and uses of the specified interval. Return true if it
-/// was able to cut its interval.
-bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
-                                            unsigned PhysReg, VirtRegMap &vrm) {
-  unsigned SpillReg = getRepresentativeReg(PhysReg);
-
-  DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
-               << " represented by " << tri_->getName(SpillReg) << '\n');
-
-  for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
-    // If there are registers which alias PhysReg, but which are not a
-    // sub-register of the chosen representative super register. Assert
-    // since we can't handle it yet.
-    assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) ||
-           tri_->isSuperRegister(*AS, SpillReg));
-
-  bool Cut = false;
-  SmallVector<unsigned, 4> PRegs;
-  if (hasInterval(SpillReg))
-    PRegs.push_back(SpillReg);
-  for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
-    if (hasInterval(*SR))
-      PRegs.push_back(*SR);
-
-  DEBUG({
-    dbgs() << "Trying to spill:";
-    for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
-      dbgs() << ' ' << tri_->getName(PRegs[i]);
-    dbgs() << '\n';
-  });
-
-  SmallPtrSet<MachineInstr*, 8> SeenMIs;
-  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
-         E = mri_->reg_end(); I != E; ++I) {
-    MachineOperand &O = I.getOperand();
-    MachineInstr *MI = O.getParent();
-    if (MI->isDebugValue() || SeenMIs.count(MI))
-      continue;
-    SeenMIs.insert(MI);
-    SlotIndex Index = getInstructionIndex(MI);
-    bool LiveReg = false;
-    for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
-      unsigned PReg = PRegs[i];
-      LiveInterval &pli = getInterval(PReg);
-      if (!pli.liveAt(Index))
-        continue;
-      LiveReg = true;
-      SlotIndex StartIdx = Index.getLoadIndex();
-      SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
-      if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
-        std::string msg;
-        raw_string_ostream Msg(msg);
-        Msg << "Ran out of registers during register allocation!";
-        if (MI->isInlineAsm()) {
-          Msg << "\nPlease check your inline asm statement for invalid "
-              << "constraints:\n";
-          MI->print(Msg, tm_);
-        }
-        report_fatal_error(Msg.str());
+  void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal,
+                               BundleRanges& BR) {
+    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
+         II != IE; ++II)
+      moveInternalFromInto(OldIdx, *II, BR);
+  }
+
+  void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P,
+                           BundleRanges& BR) {
+    LiveInterval* LI = P.first;
+    LiveRange* LR = P.second;
+
+    assert(LR->start.isRegister() &&
+           "Don't know how to merge exiting ECs into bundles yet.");
+
+    if (LR->end > NewIdx.getDeadSlot()) {
+      // This range is becoming an exiting range on the bundle.
+      // If there was an old dead-def of this reg, delete it.
+      if (BR[LI->reg].Dead != 0) {
+        LI->removeRange(*BR[LI->reg].Dead);
+        BR[LI->reg].Dead = 0;
+      }
+      assert(BR[LI->reg].Def == 0 &&
+             "Can't have two defs for the same variable exiting a bundle.");
+      LR->start = NewIdx.getRegSlot();
+      LR->valno->def = LR->start;
+      BR[LI->reg].Def = LR;
+    } else {
+      // This range is becoming internal to the bundle.
+      assert(LR->end == NewIdx.getRegSlot() &&
+             "Can't bundle def whose kill is before the bundle");
+      if (BR[LI->reg].Dead || BR[LI->reg].Def) {
+        // Already have a def for this. Just delete range.
+        LI->removeRange(*LR);
+      } else {
+        // Make range dead, record.
+        LR->end = NewIdx.getDeadSlot();
+        BR[LI->reg].Dead = LR;
+        assert(BR[LI->reg].Use == LR &&
+               "Range becoming dead should currently be use.");
       }
-      pli.removeRange(StartIdx, EndIdx);
-      LiveReg = true;
+      // In both cases the range is no longer a use on the bundle.
+      BR[LI->reg].Use = 0;
     }
-    if (!LiveReg)
-      continue;
-    DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
-    vrm.addEmergencySpill(SpillReg, MI);
-    Cut = true;
   }
-  return Cut;
-}
 
-LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
-                                                  MachineInstr* startInst) {
-  LiveInterval& Interval = getOrCreateInterval(reg);
-  VNInfo* VN = Interval.getNextValue(
-    SlotIndex(getInstructionIndex(startInst).getDefIndex()),
-    startInst, getVNInfoAllocator());
-  VN->setHasPHIKill(true);
-  LiveRange LR(
-     SlotIndex(getInstructionIndex(startInst).getDefIndex()),
-     getMBBEndIdx(startInst->getParent()), VN);
-  Interval.addRange(LR);
+  void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting,
+                              BundleRanges& BR) {
+    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
+         EI != EE; ++EI)
+      moveExitingFromInto(OldIdx, *EI, BR);
+  }
 
-  return LR;
+};
+
+void LiveIntervals::handleMove(MachineInstr* MI) {
+  SlotIndex OldIndex = indexes_->getInstructionIndex(MI);
+  indexes_->removeMachineInstrFromMaps(MI);
+  SlotIndex NewIndex = MI->isInsideBundle() ?
+                        indexes_->getInstructionIndex(MI) :
+                        indexes_->insertMachineInstrInMaps(MI);
+  assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
+         OldIndex < getMBBEndIdx(MI->getParent()) &&
+         "Cannot handle moves across basic block boundaries.");
+  assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
+
+  HMEditor HME(*this, *mri_, *tri_, NewIndex);
+  HME.moveAllRangesFrom(MI, OldIndex);
 }
 
+void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, MachineInstr* BundleStart) {
+  SlotIndex NewIndex = indexes_->getInstructionIndex(BundleStart);
+  HMEditor HME(*this, *mri_, *tri_, NewIndex);
+  HME.moveAllRangesInto(MI, BundleStart);
+}
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 110fe1e62024..60a68806c55e 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -21,6 +21,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
+#include <algorithm>
+
 using namespace llvm;
 
 
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index 5d64d285f39a..dbf5ac122d5d 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -20,8 +20,6 @@
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/CodeGen/LiveInterval.h"
 
-#include <algorithm>
-
 namespace llvm {
 
 class MachineLoopRange;
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index a7d5af5198e5..d8ab7918ae25 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -65,7 +65,7 @@ void LiveRangeCalc::extend(LiveInterval *LI,
   assert(DomTree && "Missing dominator tree");
 
   MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill.getPrevSlot());
-  assert(Kill && "No MBB at Kill");
+  assert(KillMBB && "No MBB at Kill");
 
   // Is there a def in the same MBB we can extend?
   if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill))
@@ -237,7 +237,7 @@ void LiveRangeCalc::updateSSA(SlotIndexes *Indexes,
         assert(Alloc && "Need VNInfo allocator to create PHI-defs");
         SlotIndex Start, End;
         tie(Start, End) = Indexes->getMBBRange(MBB);
-        VNInfo *VNI = I->LI->getNextValue(Start, 0, *Alloc);
+        VNInfo *VNI = I->LI->getNextValue(Start, *Alloc);
         VNI->setIsPHIDef(true);
         I->Value = VNI;
         // This block is done, we know the final value.
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index b23f85165360..695f53631e1b 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -1,4 +1,4 @@
-//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
@@ -29,13 +29,14 @@ STATISTIC(NumDCEDeleted,     "Number of instructions deleted by DCE");
 STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
 STATISTIC(NumFracRanges,     "Number of live ranges fractured by DCE");
 
-LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
-                                        LiveIntervals &LIS,
-                                        VirtRegMap &VRM) {
-  MachineRegisterInfo &MRI = VRM.getRegInfo();
+void LiveRangeEdit::Delegate::anchor() { }
+
+LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) {
   unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
-  VRM.grow();
-  VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg));
+  if (VRM) {
+    VRM->grow();
+    VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
+  }
   LiveInterval &LI = LIS.getOrCreateInterval(VReg);
   newRegs_.push_back(&LI);
   return LI;
@@ -43,37 +44,32 @@ LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg,
 
 bool LiveRangeEdit::checkRematerializable(VNInfo *VNI,
                                           const MachineInstr *DefMI,
-                                          const TargetInstrInfo &tii,
                                           AliasAnalysis *aa) {
   assert(DefMI && "Missing instruction");
   scannedRemattable_ = true;
-  if (!tii.isTriviallyReMaterializable(DefMI, aa))
+  if (!TII.isTriviallyReMaterializable(DefMI, aa))
     return false;
   remattable_.insert(VNI);
   return true;
 }
 
-void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
-                                   const TargetInstrInfo &tii,
-                                   AliasAnalysis *aa) {
+void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
   for (LiveInterval::vni_iterator I = parent_.vni_begin(),
        E = parent_.vni_end(); I != E; ++I) {
     VNInfo *VNI = *I;
     if (VNI->isUnused())
       continue;
-    MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+    MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def);
     if (!DefMI)
       continue;
-    checkRematerializable(VNI, DefMI, tii, aa);
+    checkRematerializable(VNI, DefMI, aa);
   }
   scannedRemattable_ = true;
 }
 
-bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
-                                        const TargetInstrInfo &tii,
-                                        AliasAnalysis *aa) {
+bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) {
   if (!scannedRemattable_)
-    scanRemattable(lis, tii, aa);
+    scanRemattable(aa);
   return !remattable_.empty();
 }
 
@@ -81,24 +77,18 @@ bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
 /// OrigIdx are also available with the same value at UseIdx.
 bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
                                        SlotIndex OrigIdx,
-                                       SlotIndex UseIdx,
-                                       LiveIntervals &lis) {
-  OrigIdx = OrigIdx.getUseIndex();
-  UseIdx = UseIdx.getUseIndex();
+                                       SlotIndex UseIdx) {
+  OrigIdx = OrigIdx.getRegSlot(true);
+  UseIdx = UseIdx.getRegSlot(true);
   for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = OrigMI->getOperand(i);
     if (!MO.isReg() || !MO.getReg() || MO.isDef())
       continue;
     // Reserved registers are OK.
-    if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+    if (MO.isUndef() || !LIS.hasInterval(MO.getReg()))
       continue;
-    // We cannot depend on virtual registers in uselessRegs_.
-    if (uselessRegs_)
-      for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui)
-        if ((*uselessRegs_)[ui]->reg == MO.getReg())
-          return false;
 
-    LiveInterval &li = lis.getInterval(MO.getReg());
+    LiveInterval &li = LIS.getInterval(MO.getReg());
     const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
     if (!OVNI)
       continue;
@@ -110,8 +100,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
 
 bool LiveRangeEdit::canRematerializeAt(Remat &RM,
                                        SlotIndex UseIdx,
-                                       bool cheapAsAMove,
-                                       LiveIntervals &lis) {
+                                       bool cheapAsAMove) {
   assert(scannedRemattable_ && "Call anyRematerializable first");
 
   // Use scanRemattable info.
@@ -121,19 +110,19 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM,
   // No defining instruction provided.
   SlotIndex DefIdx;
   if (RM.OrigMI)
-    DefIdx = lis.getInstructionIndex(RM.OrigMI);
+    DefIdx = LIS.getInstructionIndex(RM.OrigMI);
   else {
     DefIdx = RM.ParentVNI->def;
-    RM.OrigMI = lis.getInstructionFromIndex(DefIdx);
+    RM.OrigMI = LIS.getInstructionFromIndex(DefIdx);
     assert(RM.OrigMI && "No defining instruction for remattable value");
   }
 
   // If only cheap remats were requested, bail out early.
-  if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+  if (cheapAsAMove && !RM.OrigMI->isAsCheapAsAMove())
     return false;
 
   // Verify that all used registers are available with the same values.
-  if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis))
+  if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx))
     return false;
 
   return true;
@@ -143,27 +132,22 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator MI,
                                          unsigned DestReg,
                                          const Remat &RM,
-                                         LiveIntervals &lis,
-                                         const TargetInstrInfo &tii,
                                          const TargetRegisterInfo &tri,
                                          bool Late) {
   assert(RM.OrigMI && "Invalid remat");
-  tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+  TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
   rematted_.insert(RM.ParentVNI);
-  return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
-           .getDefIndex();
+  return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late)
+           .getRegSlot();
 }
 
-void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) {
+void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
   if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg))
     LIS.removeInterval(Reg);
 }
 
 bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
-                               SmallVectorImpl<MachineInstr*> &Dead,
-                               MachineRegisterInfo &MRI,
-                               LiveIntervals &LIS,
-                               const TargetInstrInfo &TII) {
+                               SmallVectorImpl<MachineInstr*> &Dead) {
   MachineInstr *DefMI = 0, *UseMI = 0;
 
   // Check that there is a single def and a single use.
@@ -174,7 +158,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
     if (MO.isDef()) {
       if (DefMI && DefMI != MI)
         return false;
-      if (!MI->getDesc().canFoldAsLoad())
+      if (!MI->canFoldAsLoad())
         return false;
       DefMI = MI;
     } else if (!MO.isUndef()) {
@@ -209,19 +193,17 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
 }
 
 void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
-                                      LiveIntervals &LIS, VirtRegMap &VRM,
-                                      const TargetInstrInfo &TII) {
+                                      ArrayRef<unsigned> RegsBeingSpilled) {
   SetVector<LiveInterval*,
             SmallVector<LiveInterval*, 8>,
             SmallPtrSet<LiveInterval*, 8> > ToShrink;
-  MachineRegisterInfo &MRI = VRM.getRegInfo();
 
   for (;;) {
     // Erase all dead defs.
     while (!Dead.empty()) {
       MachineInstr *MI = Dead.pop_back_val();
       assert(MI->allDefsAreDead() && "Def isn't really dead");
-      SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex();
+      SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot();
 
       // Never delete inline asm.
       if (MI->isInlineAsm()) {
@@ -265,7 +247,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
             LI.removeValNo(VNI);
             if (LI.empty()) {
               ToShrink.remove(&LI);
-              eraseVirtReg(Reg, LIS);
+              eraseVirtReg(Reg);
             }
           }
         }
@@ -284,12 +266,26 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
     // Shrink just one live interval. Then delete new dead defs.
     LiveInterval *LI = ToShrink.back();
     ToShrink.pop_back();
-    if (foldAsLoad(LI, Dead, MRI, LIS, TII))
+    if (foldAsLoad(LI, Dead))
       continue;
     if (delegate_)
       delegate_->LRE_WillShrinkVirtReg(LI->reg);
     if (!LIS.shrinkToUses(LI, &Dead))
       continue;
+    
+    // Don't create new intervals for a register being spilled.
+    // The new intervals would have to be spilled anyway so its not worth it.
+    // Also they currently aren't spilled so creating them and not spilling
+    // them results in incorrect code.
+    bool BeingSpilled = false;
+    for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
+      if (LI->reg == RegsBeingSpilled[i]) {
+        BeingSpilled = true;
+        break;
+      }
+    }
+    
+    if (BeingSpilled) continue;
 
     // LI may have been separated, create new intervals.
     LI->RenumberValues(LIS);
@@ -298,16 +294,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
     if (NumComp <= 1)
       continue;
     ++NumFracRanges;
-    bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg;
+    bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg;
     DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
     SmallVector<LiveInterval*, 8> Dups(1, LI);
     for (unsigned i = 1; i != NumComp; ++i) {
-      Dups.push_back(&createFrom(LI->reg, LIS, VRM));
+      Dups.push_back(&createFrom(LI->reg));
       // If LI is an original interval that hasn't been split yet, make the new
       // intervals their own originals instead of referring to LI. The original
       // interval must contain all the split products, and LI doesn't.
       if (IsOriginal)
-        VRM.setIsSplitFromReg(Dups.back()->reg, 0);
+        VRM->setIsSplitFromReg(Dups.back()->reg, 0);
       if (delegate_)
         delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
     }
@@ -316,10 +312,8 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
 }
 
 void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
-                                             LiveIntervals &LIS,
                                              const MachineLoopInfo &Loops) {
   VirtRegAuxInfo VRAI(MF, LIS, Loops);
-  MachineRegisterInfo &MRI = MF.getRegInfo();
   for (iterator I = begin(), E = end(); I != E; ++I) {
     LiveInterval &LI = **I;
     if (MRI.recomputeRegClass(LI.reg, MF.getTarget()))
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
deleted file mode 100644
index 9b0a671ea9e5..000000000000
--- a/lib/CodeGen/LiveRangeEdit.h
+++ /dev/null
@@ -1,206 +0,0 @@
-//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// The LiveRangeEdit class represents changes done to a virtual register when it
-// is spilled or split.
-//
-// The parent register is never changed. Instead, a number of new virtual
-// registers are created and added to the newRegs vector.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
-#define LLVM_CODEGEN_LIVERANGEEDIT_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/LiveInterval.h"
-
-namespace llvm {
-
-class AliasAnalysis;
-class LiveIntervals;
-class MachineLoopInfo;
-class MachineRegisterInfo;
-class VirtRegMap;
-
-class LiveRangeEdit {
-public:
-  /// Callback methods for LiveRangeEdit owners.
-  struct Delegate {
-    /// Called immediately before erasing a dead machine instruction.
-    virtual void LRE_WillEraseInstruction(MachineInstr *MI) {}
-
-    /// Called when a virtual register is no longer used. Return false to defer
-    /// its deletion from LiveIntervals.
-    virtual bool LRE_CanEraseVirtReg(unsigned) { return true; }
-
-    /// Called before shrinking the live range of a virtual register.
-    virtual void LRE_WillShrinkVirtReg(unsigned) {}
-
-    /// Called after cloning a virtual register.
-    /// This is used for new registers representing connected components of Old.
-    virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {}
-
-    virtual ~Delegate() {}
-  };
-
-private:
-  LiveInterval &parent_;
-  SmallVectorImpl<LiveInterval*> &newRegs_;
-  Delegate *const delegate_;
-  const SmallVectorImpl<LiveInterval*> *uselessRegs_;
-
-  /// firstNew_ - Index of the first register added to newRegs_.
-  const unsigned firstNew_;
-
-  /// scannedRemattable_ - true when remattable values have been identified.
-  bool scannedRemattable_;
-
-  /// remattable_ - Values defined by remattable instructions as identified by
-  /// tii.isTriviallyReMaterializable().
-  SmallPtrSet<const VNInfo*,4> remattable_;
-
-  /// rematted_ - Values that were actually rematted, and so need to have their
-  /// live range trimmed or entirely removed.
-  SmallPtrSet<const VNInfo*,4> rematted_;
-
-  /// scanRemattable - Identify the parent_ values that may rematerialize.
-  void scanRemattable(LiveIntervals &lis,
-                      const TargetInstrInfo &tii,
-                      AliasAnalysis *aa);
-
-  /// allUsesAvailableAt - Return true if all registers used by OrigMI at
-  /// OrigIdx are also available with the same value at UseIdx.
-  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
-                          SlotIndex UseIdx, LiveIntervals &lis);
-
-  /// foldAsLoad - If LI has a single use and a single def that can be folded as
-  /// a load, eliminate the register by folding the def into the use.
-  bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr*> &Dead,
-                  MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&);
-
-public:
-  /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
-  /// @param parent The register being spilled or split.
-  /// @param newRegs List to receive any new registers created. This needn't be
-  ///                empty initially, any existing registers are ignored.
-  /// @param uselessRegs List of registers that can't be used when
-  ///        rematerializing values because they are about to be removed.
-  LiveRangeEdit(LiveInterval &parent,
-                SmallVectorImpl<LiveInterval*> &newRegs,
-                Delegate *delegate = 0,
-                const SmallVectorImpl<LiveInterval*> *uselessRegs = 0)
-    : parent_(parent), newRegs_(newRegs),
-      delegate_(delegate),
-      uselessRegs_(uselessRegs),
-      firstNew_(newRegs.size()),
-      scannedRemattable_(false) {}
-
-  LiveInterval &getParent() const { return parent_; }
-  unsigned getReg() const { return parent_.reg; }
-
-  /// Iterator for accessing the new registers added by this edit.
-  typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
-  iterator begin() const { return newRegs_.begin()+firstNew_; }
-  iterator end() const { return newRegs_.end(); }
-  unsigned size() const { return newRegs_.size()-firstNew_; }
-  bool empty() const { return size() == 0; }
-  LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
-
-  ArrayRef<LiveInterval*> regs() const {
-    return makeArrayRef(newRegs_).slice(firstNew_);
-  }
-
-  /// FIXME: Temporary accessors until we can get rid of
-  /// LiveIntervals::AddIntervalsForSpills
-  SmallVectorImpl<LiveInterval*> *getNewVRegs() { return &newRegs_; }
-  const SmallVectorImpl<LiveInterval*> *getUselessVRegs() {
-    return uselessRegs_;
-  }
-
-  /// createFrom - Create a new virtual register based on OldReg.
-  LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&);
-
-  /// create - Create a new register with the same class and original slot as
-  /// parent.
-  LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) {
-    return createFrom(getReg(), LIS, VRM);
-  }
-
-  /// anyRematerializable - Return true if any parent values may be
-  /// rematerializable.
-  /// This function must be called before any rematerialization is attempted.
-  bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
-                           AliasAnalysis*);
-
-  /// checkRematerializable - Manually add VNI to the list of rematerializable
-  /// values if DefMI may be rematerializable.
-  bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI,
-                             const TargetInstrInfo&, AliasAnalysis*);
-
-  /// Remat - Information needed to rematerialize at a specific location.
-  struct Remat {
-    VNInfo *ParentVNI;      // parent_'s value at the remat location.
-    MachineInstr *OrigMI;   // Instruction defining ParentVNI.
-    explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
-  };
-
-  /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
-  /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
-  /// When cheapAsAMove is set, only cheap remats are allowed.
-  bool canRematerializeAt(Remat &RM,
-                          SlotIndex UseIdx,
-                          bool cheapAsAMove,
-                          LiveIntervals &lis);
-
-  /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
-  /// instruction into MBB before MI. The new instruction is mapped, but
-  /// liveness is not updated.
-  /// Return the SlotIndex of the new instruction.
-  SlotIndex rematerializeAt(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI,
-                            unsigned DestReg,
-                            const Remat &RM,
-                            LiveIntervals&,
-                            const TargetInstrInfo&,
-                            const TargetRegisterInfo&,
-                            bool Late = false);
-
-  /// markRematerialized - explicitly mark a value as rematerialized after doing
-  /// it manually.
-  void markRematerialized(const VNInfo *ParentVNI) {
-    rematted_.insert(ParentVNI);
-  }
-
-  /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
-  bool didRematerialize(const VNInfo *ParentVNI) const {
-    return rematted_.count(ParentVNI);
-  }
-
-  /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try
-  /// to erase it from LIS.
-  void eraseVirtReg(unsigned Reg, LiveIntervals &LIS);
-
-  /// eliminateDeadDefs - Try to delete machine instructions that are now dead
-  /// (allDefsAreDead returns true). This may cause live intervals to be trimmed
-  /// and further dead efs to be eliminated.
-  void eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
-                         LiveIntervals&, VirtRegMap&,
-                         const TargetInstrInfo&);
-
-  /// calculateRegClassAndHint - Recompute register class and hint for each new
-  /// register.
-  void calculateRegClassAndHint(MachineFunction&, LiveIntervals&,
-                                const MachineLoopInfo&);
-};
-
-}
-
-#endif
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 2ca90f9f05c0..5a0d97d132dd 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -14,7 +14,7 @@
 // the instruction, but are never used after the instruction (i.e., they are
 // killed).
 //
-// This class computes live variables using are sparse implementation based on
+// This class computes live variables using a sparse implementation based on
 // the machine code SSA form.  This class computes live variable information for
 // each virtual and _register allocatable_ physical register in a function.  It
 // uses the dominance properties of SSA form to efficiently compute live
@@ -33,6 +33,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -41,6 +42,7 @@
 using namespace llvm;
 
 char LiveVariables::ID = 0;
+char &llvm::LiveVariablesID = LiveVariables::ID;
 INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
                 "Live Variable Analysis", false, false)
 INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
@@ -90,7 +92,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
                                             MachineBasicBlock *MBB,
                                     std::vector<MachineBasicBlock*> &WorkList) {
   unsigned BBNum = MBB->getNumber();
-  
+
   // Check to see if this basic block is one of the killing blocks.  If so,
   // remove it.
   for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
@@ -98,7 +100,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
       VRInfo.Kills.erase(VRInfo.Kills.begin()+i);  // Erase entry
       break;
     }
-  
+
   if (MBB == DefBlock) return;  // Terminate recursion
 
   if (VRInfo.AliveBlocks.test(BBNum))
@@ -107,6 +109,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
   // Mark the variable known alive in this bb
   VRInfo.AliveBlocks.set(BBNum);
 
+  assert(MBB != &MF->front() && "Can't find reaching def for virtreg");
   WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend());
 }
 
@@ -130,7 +133,6 @@ void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
   unsigned BBNum = MBB->getNumber();
 
   VarInfo& VRInfo = getVarInfo(reg);
-  VRInfo.NumUses++;
 
   // Check to see if this basic block is already a kill block.
   if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
@@ -190,7 +192,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
   unsigned LastDefReg = 0;
   unsigned LastDefDist = 0;
   MachineInstr *LastDef = NULL;
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
     MachineInstr *Def = PhysRegDef[SubReg];
     if (!Def)
@@ -214,7 +216,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
     unsigned DefReg = MO.getReg();
     if (TRI->isSubRegister(Reg, DefReg)) {
       PartDefRegs.insert(DefReg);
-      for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg);
+      for (const uint16_t *SubRegs = TRI->getSubRegisters(DefReg);
            unsigned SubReg = *SubRegs; ++SubRegs)
         PartDefRegs.insert(SubReg);
     }
@@ -245,7 +247,7 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
                                                            true/*IsImp*/));
       PhysRegDef[Reg] = LastPartialDef;
       SmallSet<unsigned, 8> Processed;
-      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+      for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
            unsigned SubReg = *SubRegs; ++SubRegs) {
         if (Processed.count(SubReg))
           continue;
@@ -257,20 +259,19 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
                                                              false/*IsDef*/,
                                                              true/*IsImp*/));
         PhysRegDef[SubReg] = LastPartialDef;
-        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+        for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
           Processed.insert(*SS);
       }
     }
-  }
-  else if (LastDef && !PhysRegUse[Reg] &&
-           !LastDef->findRegisterDefOperand(Reg))
+  } else if (LastDef && !PhysRegUse[Reg] &&
+             !LastDef->findRegisterDefOperand(Reg))
     // Last def defines the super register, add an implicit def of reg.
-    LastDef->addOperand(MachineOperand::CreateReg(Reg,
-                                                 true/*IsDef*/, true/*IsImp*/));
+    LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+                                                  true/*IsImp*/));
 
   // Remember this use.
   PhysRegUse[Reg]  = MI;
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs)
     PhysRegUse[SubReg] =  MI;
 }
@@ -286,7 +287,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
   MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
   unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
   unsigned LastPartDefDist = 0;
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
     MachineInstr *Def = PhysRegDef[SubReg];
     if (Def && Def != LastDef) {
@@ -331,11 +332,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
   // Or whole register is defined, but only partly used.
   // AX<dead> = AL<imp-def>
   //    = AL<kill>
-  // AX = 
+  // AX =
   MachineInstr *LastPartDef = 0;
   unsigned LastPartDefDist = 0;
   SmallSet<unsigned, 8> PartUses;
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
     MachineInstr *Def = PhysRegDef[SubReg];
     if (Def && Def != LastDef) {
@@ -350,7 +351,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
     }
     if (MachineInstr *Use = PhysRegUse[SubReg]) {
       PartUses.insert(SubReg);
-      for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+      for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
         PartUses.insert(*SS);
       unsigned Dist = DistanceMap[Use];
       if (Dist > LastRefOrPartRefDist) {
@@ -366,7 +367,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
     // EAX<dead>  = op  AL<imp-def>
     // That is, EAX def is dead but AL def extends pass it.
     PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+    for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs) {
       if (!PartUses.count(SubReg))
         continue;
@@ -387,11 +388,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
       else {
         LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
         PhysRegUse[SubReg] = LastRefOrPartRef;
-        for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg);
+        for (const uint16_t *SSRegs = TRI->getSubRegisters(SubReg);
              unsigned SSReg = *SSRegs; ++SSRegs)
           PhysRegUse[SSReg] = LastRefOrPartRef;
       }
-      for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+      for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
         PartUses.erase(*SS);
     }
   } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
@@ -419,16 +420,37 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
   return true;
 }
 
+void LiveVariables::HandleRegMask(const MachineOperand &MO) {
+  // Call HandlePhysRegKill() for all live registers clobbered by Mask.
+  // Clobbered registers are always dead, sp there is no need to use
+  // HandlePhysRegDef().
+  for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) {
+    // Skip dead regs.
+    if (!PhysRegDef[Reg] && !PhysRegUse[Reg])
+      continue;
+    // Skip mask-preserved regs.
+    if (!MO.clobbersPhysReg(Reg))
+      continue;
+    // Kill the largest clobbered super-register.
+    // This avoids needless implicit operands.
+    unsigned Super = Reg;
+    for (const uint16_t *SR = TRI->getSuperRegisters(Reg); *SR; ++SR)
+      if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
+        Super = *SR;
+    HandlePhysRegKill(Super, 0);
+  }
+}
+
 void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
                                      SmallVector<unsigned, 4> &Defs) {
   // What parts of the register are previously defined?
   SmallSet<unsigned, 32> Live;
   if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
     Live.insert(Reg);
-    for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+    for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
       Live.insert(*SS);
   } else {
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+    for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs) {
       // If a register isn't itself defined, but all parts that make up of it
       // are defined, then consider it also defined.
@@ -440,7 +462,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
         continue;
       if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
         Live.insert(SubReg);
-        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+        for (const uint16_t *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
           Live.insert(*SS);
       }
     }
@@ -450,7 +472,7 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
   // is referenced.
   HandlePhysRegKill(Reg, MI);
   // Only some of the sub-registers are used.
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs) {
     if (!Live.count(SubReg))
       // Skip if this sub-register isn't defined.
@@ -469,7 +491,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
     Defs.pop_back();
     PhysRegDef[Reg]  = MI;
     PhysRegUse[Reg]  = NULL;
-    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+    for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
          unsigned SubReg = *SubRegs; ++SubRegs) {
       PhysRegDef[SubReg]  = MI;
       PhysRegUse[SubReg]  = NULL;
@@ -492,6 +514,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
   PHIJoins.clear();
 
+  // FIXME: LiveIntervals will be updated to remove its dependence on
+  // LiveVariables to improve compilation time and eliminate bizarre pass
+  // dependencies. Until then, we can't change much in -O0.
+  if (!MRI->isSSA())
+    report_fatal_error("regalloc=... not currently supported with -O0");
+
   analyzePHINodes(mf);
 
   // Calculate live variable information in depth first order on the CFG of the
@@ -536,8 +564,13 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
       // Clear kill and dead markers. LV will recompute them.
       SmallVector<unsigned, 4> UseRegs;
       SmallVector<unsigned, 4> DefRegs;
+      SmallVector<unsigned, 1> RegMasks;
       for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
         MachineOperand &MO = MI->getOperand(i);
+        if (MO.isRegMask()) {
+          RegMasks.push_back(i);
+          continue;
+        }
         if (!MO.isReg() || MO.getReg() == 0)
           continue;
         unsigned MOReg = MO.getReg();
@@ -559,6 +592,10 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
           HandlePhysRegUse(MOReg, MI);
       }
 
+      // Process all masked registers. (Call clobbers).
+      for (unsigned i = 0, e = RegMasks.size(); i != e; ++i)
+        HandleRegMask(MI->getOperand(RegMasks[i]));
+
       // Process all defs.
       for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
         unsigned MOReg = DefRegs[i];
@@ -590,8 +627,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
     // them.  The tail callee need not take the same registers as input
     // that it produces as output, and there are dependencies for its input
     // registers elsewhere.
-    if (!MBB->empty() && MBB->back().getDesc().isReturn()
-        && !MBB->back().getDesc().isCall()) {
+    if (!MBB->empty() && MBB->back().isReturn()
+        && !MBB->back().isCall()) {
       MachineInstr *Ret = &MBB->back();
 
       for (MachineRegisterInfo::liveout_iterator
@@ -607,10 +644,27 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
       }
     }
 
+    // MachineCSE may CSE instructions which write to non-allocatable physical
+    // registers across MBBs. Remember if any reserved register is liveout.
+    SmallSet<unsigned, 4> LiveOuts;
+    for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+           SE = MBB->succ_end(); SI != SE; ++SI) {
+      MachineBasicBlock *SuccMBB = *SI;
+      if (SuccMBB->isLandingPad())
+        continue;
+      for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
+             LE = SuccMBB->livein_end(); LI != LE; ++LI) {
+        unsigned LReg = *LI;
+        if (!TRI->isInAllocatableClass(LReg))
+          // Ignore other live-ins, e.g. those that are live into landing pads.
+          LiveOuts.insert(LReg);
+      }
+    }
+
     // Loop over PhysRegDef / PhysRegUse, killing any registers that are
     // available at the end of the basic block.
     for (unsigned i = 0; i != NumRegs; ++i)
-      if (PhysRegDef[i] || PhysRegUse[i])
+      if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
         HandlePhysRegDef(i, 0, Defs);
 
     std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
@@ -754,7 +808,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
   const unsigned NumNew = BB->getNumber();
 
   // All registers used by PHI nodes in SuccBB must be live through BB.
-  for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+  for (MachineBasicBlock::iterator BBI = SuccBB->begin(),
          BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
       if (BBI->getOperand(i+1).getMBB() == BB)
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 1318d6212497..238bf52dfed7 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -71,19 +71,15 @@ namespace {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-    const char *getPassName() const {
-      return "Local Stack Slot Allocation";
-    }
 
   private:
   };
 } // end anonymous namespace
 
 char LocalStackSlotPass::ID = 0;
-
-FunctionPass *llvm::createLocalStackSlotAllocationPass() {
-  return new LocalStackSlotPass();
-}
+char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID;
+INITIALIZE_PASS(LocalStackSlotPass, "localstackalloc",
+                "Local Stack Slot Allocation", false, false)
 
 bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
   MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 4c5fe4c480a6..6c8a1072697c 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -73,7 +73,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
 
   // Make sure the instructions have their operands in the reginfo lists.
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+  for (MachineBasicBlock::instr_iterator
+         I = N->instr_begin(), E = N->instr_end(); I != E; ++I)
     I->AddRegOperandsToUseLists(RegInfo);
 
   LeakDetector::removeGarbageObject(N);
@@ -120,8 +121,8 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
 /// lists.
 void ilist_traits<MachineInstr>::
 transferNodesFromList(ilist_traits<MachineInstr> &fromList,
-                      MachineBasicBlock::iterator first,
-                      MachineBasicBlock::iterator last) {
+                      ilist_iterator<MachineInstr> first,
+                      ilist_iterator<MachineInstr> last) {
   assert(Parent->getParent() == fromList.Parent->getParent() &&
         "MachineInstr parent mismatch!");
 
@@ -140,33 +141,75 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
-  iterator I = begin();
-  while (I != end() && I->isPHI())
+  instr_iterator I = instr_begin(), E = instr_end();
+  while (I != E && I->isPHI())
     ++I;
+  assert(!I->isInsideBundle() && "First non-phi MI cannot be inside a bundle!");
   return I;
 }
 
 MachineBasicBlock::iterator
 MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
-  while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+  iterator E = end();
+  while (I != E && (I->isPHI() || I->isLabel() || I->isDebugValue()))
     ++I;
+  // FIXME: This needs to change if we wish to bundle labels / dbg_values
+  // inside the bundle.
+  assert(!I->isInsideBundle() &&
+         "First non-phi / non-label instruction is inside a bundle!");
   return I;
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
-  iterator I = end();
-  while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
+  iterator B = begin(), E = end(), I = E;
+  while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
     ; /*noop */
-  while (I != end() && !I->getDesc().isTerminator())
+  while (I != E && !I->isTerminator())
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getFirstTerminator() const {
+  const_iterator B = begin(), E = end(), I = E;
+  while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+    ; /*noop */
+  while (I != E && !I->isTerminator())
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
+  instr_iterator B = instr_begin(), E = instr_end(), I = E;
+  while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+    ; /*noop */
+  while (I != E && !I->isTerminator())
     ++I;
   return I;
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
-  iterator B = begin(), I = end();
+  // Skip over end-of-block dbg_value instructions.
+  instr_iterator B = instr_begin(), I = instr_end();
+  while (I != B) {
+    --I;
+    // Return instruction that starts a bundle.
+    if (I->isDebugValue() || I->isInsideBundle())
+      continue;
+    return I;
+  }
+  // The block is all debug values.
+  return end();
+}
+
+MachineBasicBlock::const_iterator
+MachineBasicBlock::getLastNonDebugInstr() const {
+  // Skip over end-of-block dbg_value instructions.
+  const_instr_iterator B = instr_begin(), I = instr_end();
   while (I != B) {
     --I;
-    if (I->isDebugValue())
+    // Return instruction that starts a bundle.
+    if (I->isDebugValue() || I->isInsideBundle())
       continue;
     return I;
   }
@@ -195,6 +238,18 @@ StringRef MachineBasicBlock::getName() const {
     return "(null)";
 }
 
+/// Return a hopefully unique identifier for this block.
+std::string MachineBasicBlock::getFullName() const {
+  std::string Name;
+  if (getParent())
+    Name = (getParent()->getFunction()->getName() + ":").str();
+  if (getBasicBlock())
+    Name += getBasicBlock()->getName();
+  else
+    Name += (Twine("BB") + Twine(getNumber())).str();
+  return Name;
+}
+
 void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   const MachineFunction *MF = getParent();
   if (!MF) {
@@ -203,8 +258,6 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     return;
   }
 
-  if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
-
   if (Indexes)
     OS << Indexes->getMBBStartIdx(this) << '\t';
 
@@ -218,6 +271,12 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
   }
   if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
   if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+  if (Alignment) {
+    OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
+       << " bytes)";
+    Comma = ", ";
+  }
+
   OS << '\n';
 
   const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
@@ -237,13 +296,15 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
     OS << '\n';
   }
 
-  for (const_iterator I = begin(); I != end(); ++I) {
+  for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
     if (Indexes) {
       if (Indexes->hasIndex(I))
         OS << Indexes->getInstructionIndex(I);
       OS << '\t';
     }
     OS << '\t';
+    if (I->isInsideBundle())
+      OS << "  * ";
     I->print(OS, &getParent()->getTarget());
   }
 
@@ -260,8 +321,8 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
 void MachineBasicBlock::removeLiveIn(unsigned Reg) {
   std::vector<unsigned>::iterator I =
     std::find(LiveIns.begin(), LiveIns.end(), Reg);
-  assert(I != LiveIns.end() && "Not a live in!");
-  LiveIns.erase(I);
+  if (I != LiveIns.end())
+    LiveIns.erase(I);
 }
 
 bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
@@ -297,8 +358,22 @@ void MachineBasicBlock::updateTerminator() {
         TII->RemoveBranch(*this);
     } else {
       // The block has an unconditional fallthrough. If its successor is not
-      // its layout successor, insert a branch.
-      TBB = *succ_begin();
+      // its layout successor, insert a branch. First we have to locate the
+      // only non-landing-pad successor, as that is the fallthrough block.
+      for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
+        if ((*SI)->isLandingPad())
+          continue;
+        assert(!TBB && "Found more than one non-landing-pad successor!");
+        TBB = *SI;
+      }
+
+      // If there is no non-landing-pad successor, the block has no
+      // fall-through edges to be concerned with.
+      if (!TBB)
+        return;
+
+      // Finally update the unconditional successor to be reached via a branch
+      // if it would not be reached by fallthrough.
       if (!isLayoutSuccessor(TBB))
         TII->InsertBranch(*this, TBB, 0, Cond, dl);
     }
@@ -435,8 +510,8 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
     fromMBB->removeSuccessor(Succ);
 
     // Fix up any PHI nodes in the successor.
-    for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
-         MI != ME && MI->isPHI(); ++MI)
+    for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
+           ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
       for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
         MachineOperand &MO = MI->getOperand(i);
         if (MO.getMBB() == fromMBB)
@@ -473,13 +548,10 @@ bool MachineBasicBlock::canFallThrough() {
   if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
     // If we couldn't analyze the branch, examine the last instruction.
     // If the block doesn't end in a known control barrier, assume fallthrough
-    // is possible. The isPredicable check is needed because this code can be
+    // is possible. The isPredicated check is needed because this code can be
     // called during IfConversion, where an instruction which is normally a
-    // Barrier is predicated and thus no longer an actual control barrier. This
-    // is over-conservative though, because if an instruction isn't actually
-    // predicated we could still treat it like a barrier.
-    return empty() || !back().getDesc().isBarrier() ||
-           back().getDesc().isPredicable();
+    // Barrier is predicated and thus no longer an actual control barrier.
+    return empty() || !back().isBarrier() || TII->isPredicated(&back());
   }
 
   // If there is no branch, control always falls through.
@@ -538,14 +610,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   // Collect a list of virtual registers killed by the terminators.
   SmallVector<unsigned, 4> KilledRegs;
   if (LV)
-    for (iterator I = getFirstTerminator(), E = end(); I != E; ++I) {
+    for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
+         I != E; ++I) {
       MachineInstr *MI = I;
       for (MachineInstr::mop_iterator OI = MI->operands_begin(),
            OE = MI->operands_end(); OI != OE; ++OI) {
-        if (!OI->isReg() || !OI->isUse() || !OI->isKill() || OI->isUndef())
+        if (!OI->isReg() || OI->getReg() == 0 ||
+            !OI->isUse() || !OI->isKill() || OI->isUndef())
           continue;
         unsigned Reg = OI->getReg();
-        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+        if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
             LV->getVarInfo(Reg).removeKill(MI)) {
           KilledRegs.push_back(Reg);
           DEBUG(dbgs() << "Removing terminator kill: " << *MI);
@@ -565,7 +639,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   }
 
   // Fix PHI nodes in Succ so they refer to NMBB instead of this
-  for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+  for (MachineBasicBlock::instr_iterator
+         i = Succ->instr_begin(),e = Succ->instr_end();
        i != e && i->isPHI(); ++i)
     for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
       if (i->getOperand(ni+1).getMBB() == this)
@@ -577,14 +652,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
     NMBB->addLiveIn(*I);
 
   // Update LiveVariables.
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
   if (LV) {
     // Restore kills of virtual registers that were killed by the terminators.
     while (!KilledRegs.empty()) {
       unsigned Reg = KilledRegs.pop_back_val();
-      for (iterator I = end(), E = begin(); I != E;) {
-        if (!(--I)->addRegisterKilled(Reg, NULL, /* addIfNotFound= */ false))
+      for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
+        if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
           continue;
-        LV->getVarInfo(Reg).Kills.push_back(I);
+        if (TargetRegisterInfo::isVirtualRegister(Reg))
+          LV->getVarInfo(Reg).Kills.push_back(I);
         DEBUG(dbgs() << "Restored terminator kill: " << *I);
         break;
       }
@@ -650,6 +727,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   return NMBB;
 }
 
+MachineBasicBlock::iterator
+MachineBasicBlock::erase(MachineBasicBlock::iterator I) {
+  if (I->isBundle()) {
+    MachineBasicBlock::iterator E = llvm::next(I);
+    return Insts.erase(I.getInstrIterator(), E.getInstrIterator());
+  }
+
+  return Insts.erase(I.getInstrIterator());
+}
+
+MachineInstr *MachineBasicBlock::remove(MachineInstr *I) {
+  if (I->isBundle()) {
+    instr_iterator MII = llvm::next(I);
+    iterator E = end();
+    while (MII != E && MII->isInsideBundle()) {
+      MachineInstr *MI = &*MII++;
+      Insts.remove(MI);
+    }
+  }
+
+  return Insts.remove(I);
+}
+
+void MachineBasicBlock::splice(MachineBasicBlock::iterator where,
+                               MachineBasicBlock *Other,
+                               MachineBasicBlock::iterator From) {
+  if (From->isBundle()) {
+    MachineBasicBlock::iterator To = llvm::next(From);
+    Insts.splice(where.getInstrIterator(), Other->Insts,
+                 From.getInstrIterator(), To.getInstrIterator());
+    return;
+  }
+
+  Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator());
+}
+
 /// removeFromParent - This method unlinks 'this' from the containing function,
 /// and returns it, but does not delete it.
 MachineBasicBlock *MachineBasicBlock::removeFromParent() {
@@ -673,10 +786,10 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
                                                MachineBasicBlock *New) {
   assert(Old != New && "Cannot replace self with self!");
 
-  MachineBasicBlock::iterator I = end();
-  while (I != begin()) {
+  MachineBasicBlock::instr_iterator I = instr_end();
+  while (I != instr_begin()) {
     --I;
-    if (!I->getDesc().isTerminator()) break;
+    if (!I->isTerminator()) break;
 
     // Scan the operands of this machine instruction, replacing any uses of Old
     // with New.
@@ -755,27 +868,27 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
 /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
 /// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
 DebugLoc
-MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
+MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
   DebugLoc DL;
-  MachineBasicBlock::iterator E = end();
-  if (MBBI != E) {
-    // Skip debug declarations, we don't want a DebugLoc from them.
-    MachineBasicBlock::iterator MBBI2 = MBBI;
-    while (MBBI2 != E && MBBI2->isDebugValue())
-      MBBI2++;
-    if (MBBI2 != E)
-      DL = MBBI2->getDebugLoc();
-  }
+  instr_iterator E = instr_end();
+  if (MBBI == E)
+    return DL;
+
+  // Skip debug declarations, we don't want a DebugLoc from them.
+  while (MBBI != E && MBBI->isDebugValue())
+    MBBI++;
+  if (MBBI != E)
+    DL = MBBI->getDebugLoc();
   return DL;
 }
 
 /// getSuccWeight - Return weight of the edge from this block to MBB.
 ///
-uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+uint32_t MachineBasicBlock::getSuccWeight(const MachineBasicBlock *succ) const {
   if (Weights.empty())
     return 0;
 
-  succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+  const_succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
   return *getWeightIterator(I);
 }
 
@@ -789,6 +902,16 @@ getWeightIterator(MachineBasicBlock::succ_iterator I) {
   return Weights.begin() + index;
 }
 
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::const_weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
+  assert(Weights.size() == Successors.size() && "Async weight list!");
+  const size_t index = std::distance(Successors.begin(), I);
+  assert(index < Weights.size() && "Not a current successor!");
+  return Weights.begin() + index;
+}
+
 void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
                           bool t) {
   OS << "BB#" << MBB->getNumber();
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index b92cda961474..a079d6e59139 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -56,6 +56,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
 /// the other block frequencies. We do this to avoid using of floating points.
 ///
 BlockFrequency MachineBlockFrequencyInfo::
-getBlockFreq(MachineBasicBlock *MBB) const {
+getBlockFreq(const MachineBasicBlock *MBB) const {
   return MBFI->getBlockFreq(MBB);
 }
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
new file mode 100644
index 000000000000..22d7212007fc
--- /dev/null
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -0,0 +1,1001 @@
+//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations using the CFG
+// structure and branch probability estimates.
+//
+// The pass strives to preserve the structure of the CFG (that is, retain
+// a topological ordering of basic blocks) in the absense of a *strong* signal
+// to the contrary from probabilities. However, within the CFG structure, it
+// attempts to choose an ordering which favors placing more likely sequences of
+// blocks adjacent to each other.
+//
+// The algorithm works from the inner-most loop within a function outward, and
+// at each stage walks through the basic blocks, trying to coalesce them into
+// sequential chains where allowed by the CFG (or demanded by heavy
+// probabilities). Finally, it walks the blocks in topological order, and the
+// first time it reaches a chain of basic blocks, it schedules them in the
+// function in-order.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement2"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCondBranches, "Number of conditional branches");
+STATISTIC(NumUncondBranches, "Number of uncondittional branches");
+STATISTIC(CondBranchTakenFreq,
+          "Potential frequency of taking conditional branches");
+STATISTIC(UncondBranchTakenFreq,
+          "Potential frequency of taking unconditional branches");
+
+namespace {
+class BlockChain;
+/// \brief Type for our function-wide basic block -> block chain mapping.
+typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType;
+}
+
+namespace {
+/// \brief A chain of blocks which will be laid out contiguously.
+///
+/// This is the datastructure representing a chain of consecutive blocks that
+/// are profitable to layout together in order to maximize fallthrough
+/// probabilities. We also can use a block chain to represent a sequence of
+/// basic blocks which have some external (correctness) requirement for
+/// sequential layout.
+///
+/// Eventually, the block chains will form a directed graph over the function.
+/// We provide an SCC-supporting-iterator in order to quicky build and walk the
+/// SCCs of block chains within a function.
+///
+/// The block chains also have support for calculating and caching probability
+/// information related to the chain itself versus other chains. This is used
+/// for ranking during the final layout of block chains.
+class BlockChain {
+  /// \brief The sequence of blocks belonging to this chain.
+  ///
+  /// This is the sequence of blocks for a particular chain. These will be laid
+  /// out in-order within the function.
+  SmallVector<MachineBasicBlock *, 4> Blocks;
+
+  /// \brief A handle to the function-wide basic block to block chain mapping.
+  ///
+  /// This is retained in each block chain to simplify the computation of child
+  /// block chains for SCC-formation and iteration. We store the edges to child
+  /// basic blocks, and map them back to their associated chains using this
+  /// structure.
+  BlockToChainMapType &BlockToChain;
+
+public:
+  /// \brief Construct a new BlockChain.
+  ///
+  /// This builds a new block chain representing a single basic block in the
+  /// function. It also registers itself as the chain that block participates
+  /// in with the BlockToChain mapping.
+  BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB)
+    : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) {
+    assert(BB && "Cannot create a chain with a null basic block");
+    BlockToChain[BB] = this;
+  }
+
+  /// \brief Iterator over blocks within the chain.
+  typedef SmallVectorImpl<MachineBasicBlock *>::const_iterator iterator;
+
+  /// \brief Beginning of blocks within the chain.
+  iterator begin() const { return Blocks.begin(); }
+
+  /// \brief End of blocks within the chain.
+  iterator end() const { return Blocks.end(); }
+
+  /// \brief Merge a block chain into this one.
+  ///
+  /// This routine merges a block chain into this one. It takes care of forming
+  /// a contiguous sequence of basic blocks, updating the edge list, and
+  /// updating the block -> chain mapping. It does not free or tear down the
+  /// old chain, but the old chain's block list is no longer valid.
+  void merge(MachineBasicBlock *BB, BlockChain *Chain) {
+    assert(BB);
+    assert(!Blocks.empty());
+
+    // Fast path in case we don't have a chain already.
+    if (!Chain) {
+      assert(!BlockToChain[BB]);
+      Blocks.push_back(BB);
+      BlockToChain[BB] = this;
+      return;
+    }
+
+    assert(BB == *Chain->begin());
+    assert(Chain->begin() != Chain->end());
+
+    // Update the incoming blocks to point to this chain, and add them to the
+    // chain structure.
+    for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end();
+         BI != BE; ++BI) {
+      Blocks.push_back(*BI);
+      assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain");
+      BlockToChain[*BI] = this;
+    }
+  }
+
+#ifndef NDEBUG
+  /// \brief Dump the blocks in this chain.
+  void dump() LLVM_ATTRIBUTE_USED {
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      (*I)->dump();
+  }
+#endif // NDEBUG
+
+  /// \brief Count of predecessors within the loop currently being processed.
+  ///
+  /// This count is updated at each loop we process to represent the number of
+  /// in-loop predecessors of this chain.
+  unsigned LoopPredecessors;
+};
+}
+
+namespace {
+class MachineBlockPlacement : public MachineFunctionPass {
+  /// \brief A typedef for a block filter set.
+  typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+
+  /// \brief A handle to the branch probability pass.
+  const MachineBranchProbabilityInfo *MBPI;
+
+  /// \brief A handle to the function-wide block frequency pass.
+  const MachineBlockFrequencyInfo *MBFI;
+
+  /// \brief A handle to the loop info.
+  const MachineLoopInfo *MLI;
+
+  /// \brief A handle to the target's instruction info.
+  const TargetInstrInfo *TII;
+
+  /// \brief A handle to the target's lowering info.
+  const TargetLowering *TLI;
+
+  /// \brief Allocator and owner of BlockChain structures.
+  ///
+  /// We build BlockChains lazily by merging together high probability BB
+  /// sequences acording to the "Algo2" in the paper mentioned at the top of
+  /// the file. To reduce malloc traffic, we allocate them using this slab-like
+  /// allocator, and destroy them after the pass completes.
+  SpecificBumpPtrAllocator<BlockChain> ChainAllocator;
+
+  /// \brief Function wide BasicBlock to BlockChain mapping.
+  ///
+  /// This mapping allows efficiently moving from any given basic block to the
+  /// BlockChain it participates in, if any. We use it to, among other things,
+  /// allow implicitly defining edges between chains as the existing edges
+  /// between basic blocks.
+  DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
+
+  void markChainSuccessors(BlockChain &Chain,
+                           MachineBasicBlock *LoopHeaderBB,
+                           SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+                           const BlockFilterSet *BlockFilter = 0);
+  MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
+                                         BlockChain &Chain,
+                                         const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *selectBestCandidateBlock(
+      BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+      const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *getFirstUnplacedBlock(
+      MachineFunction &F,
+      const BlockChain &PlacedChain,
+      MachineFunction::iterator &PrevUnplacedBlockIt,
+      const BlockFilterSet *BlockFilter);
+  void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
+                  SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+                  const BlockFilterSet *BlockFilter = 0);
+  MachineBasicBlock *findBestLoopTop(MachineFunction &F,
+                                     MachineLoop &L,
+                                     const BlockFilterSet &LoopBlockSet);
+  void buildLoopChains(MachineFunction &F, MachineLoop &L);
+  void buildCFGChains(MachineFunction &F);
+  void AlignLoops(MachineFunction &F);
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  MachineBlockPlacement() : MachineFunctionPass(ID) {
+    initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &F);
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<MachineBranchProbabilityInfo>();
+    AU.addRequired<MachineBlockFrequencyInfo>();
+    AU.addRequired<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+}
+
+char MachineBlockPlacement::ID = 0;
+char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2",
+                      "Branch Probability Basic Block Placement", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2",
+                    "Branch Probability Basic Block Placement", false, false)
+
+#ifndef NDEBUG
+/// \brief Helper to print the name of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockName(MachineBasicBlock *BB) {
+  std::string Result;
+  raw_string_ostream OS(Result);
+  OS << "BB#" << BB->getNumber()
+     << " (derived from LLVM BB '" << BB->getName() << "')";
+  OS.flush();
+  return Result;
+}
+
+/// \brief Helper to print the number of a MBB.
+///
+/// Only used by debug logging.
+static std::string getBlockNum(MachineBasicBlock *BB) {
+  std::string Result;
+  raw_string_ostream OS(Result);
+  OS << "BB#" << BB->getNumber();
+  OS.flush();
+  return Result;
+}
+#endif
+
+/// \brief Mark a chain's successors as having one fewer preds.
+///
+/// When a chain is being merged into the "placed" chain, this routine will
+/// quickly walk the successors of each block in the chain and mark them as
+/// having one fewer active predecessor. It also adds any successors of this
+/// chain which reach the zero-predecessor state to the worklist passed in.
+void MachineBlockPlacement::markChainSuccessors(
+    BlockChain &Chain,
+    MachineBasicBlock *LoopHeaderBB,
+    SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+    const BlockFilterSet *BlockFilter) {
+  // Walk all the blocks in this chain, marking their successors as having
+  // a predecessor placed.
+  for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end();
+       CBI != CBE; ++CBI) {
+    // Add any successors for which this is the only un-placed in-loop
+    // predecessor to the worklist as a viable candidate for CFG-neutral
+    // placement. No subsequent placement of this block will violate the CFG
+    // shape, so we get to use heuristics to choose a favorable placement.
+    for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(),
+                                          SE = (*CBI)->succ_end();
+         SI != SE; ++SI) {
+      if (BlockFilter && !BlockFilter->count(*SI))
+        continue;
+      BlockChain &SuccChain = *BlockToChain[*SI];
+      // Disregard edges within a fixed chain, or edges to the loop header.
+      if (&Chain == &SuccChain || *SI == LoopHeaderBB)
+        continue;
+
+      // This is a cross-chain edge that is within the loop, so decrement the
+      // loop predecessor count of the destination chain.
+      if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0)
+        BlockWorkList.push_back(*SuccChain.begin());
+    }
+  }
+}
+
+/// \brief Select the best successor for a block.
+///
+/// This looks across all successors of a particular block and attempts to
+/// select the "best" one to be the layout successor. It only considers direct
+/// successors which also pass the block filter. It will attempt to avoid
+/// breaking CFG structure, but cave and break such structures in the case of
+/// very hot successor edges.
+///
+/// \returns The best successor block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
+    MachineBasicBlock *BB, BlockChain &Chain,
+    const BlockFilterSet *BlockFilter) {
+  const BranchProbability HotProb(4, 5); // 80%
+
+  MachineBasicBlock *BestSucc = 0;
+  // FIXME: Due to the performance of the probability and weight routines in
+  // the MBPI analysis, we manually compute probabilities using the edge
+  // weights. This is suboptimal as it means that the somewhat subtle
+  // definition of edge weight semantics is encoded here as well. We should
+  // improve the MBPI interface to effeciently support query patterns such as
+  // this.
+  uint32_t BestWeight = 0;
+  uint32_t WeightScale = 0;
+  uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
+  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+                                        SE = BB->succ_end();
+       SI != SE; ++SI) {
+    if (BlockFilter && !BlockFilter->count(*SI))
+      continue;
+    BlockChain &SuccChain = *BlockToChain[*SI];
+    if (&SuccChain == &Chain) {
+      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Already merged!\n");
+      continue;
+    }
+    if (*SI != *SuccChain.begin()) {
+      DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> Mid chain!\n");
+      continue;
+    }
+
+    uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI);
+    BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+
+    // Only consider successors which are either "hot", or wouldn't violate
+    // any CFG constraints.
+    if (SuccChain.LoopPredecessors != 0) {
+      if (SuccProb < HotProb) {
+        DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> CFG conflict\n");
+        continue;
+      }
+
+      // Make sure that a hot successor doesn't have a globally more important
+      // predecessor.
+      BlockFrequency CandidateEdgeFreq
+        = MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+      bool BadCFGConflict = false;
+      for (MachineBasicBlock::pred_iterator PI = (*SI)->pred_begin(),
+                                            PE = (*SI)->pred_end();
+           PI != PE; ++PI) {
+        if (*PI == *SI || (BlockFilter && !BlockFilter->count(*PI)) ||
+            BlockToChain[*PI] == &Chain)
+          continue;
+        BlockFrequency PredEdgeFreq
+          = MBFI->getBlockFreq(*PI) * MBPI->getEdgeProbability(*PI, *SI);
+        if (PredEdgeFreq >= CandidateEdgeFreq) {
+          BadCFGConflict = true;
+          break;
+        }
+      }
+      if (BadCFGConflict) {
+        DEBUG(dbgs() << "    " << getBlockName(*SI)
+                               << " -> non-cold CFG conflict\n");
+        continue;
+      }
+    }
+
+    DEBUG(dbgs() << "    " << getBlockName(*SI) << " -> " << SuccProb
+                 << " (prob)"
+                 << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
+                 << "\n");
+    if (BestSucc && BestWeight >= SuccWeight)
+      continue;
+    BestSucc = *SI;
+    BestWeight = SuccWeight;
+  }
+  return BestSucc;
+}
+
+namespace {
+/// \brief Predicate struct to detect blocks already placed.
+class IsBlockPlaced {
+  const BlockChain &PlacedChain;
+  const BlockToChainMapType &BlockToChain;
+
+public:
+  IsBlockPlaced(const BlockChain &PlacedChain,
+                const BlockToChainMapType &BlockToChain)
+      : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {}
+
+  bool operator()(MachineBasicBlock *BB) const {
+    return BlockToChain.lookup(BB) == &PlacedChain;
+  }
+};
+}
+
+/// \brief Select the best block from a worklist.
+///
+/// This looks through the provided worklist as a list of candidate basic
+/// blocks and select the most profitable one to place. The definition of
+/// profitable only really makes sense in the context of a loop. This returns
+/// the most frequently visited block in the worklist, which in the case of
+/// a loop, is the one most desirable to be physically close to the rest of the
+/// loop body in order to improve icache behavior.
+///
+/// \returns The best block found, or null if none are viable.
+MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
+    BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList,
+    const BlockFilterSet *BlockFilter) {
+  // Once we need to walk the worklist looking for a candidate, cleanup the
+  // worklist of already placed entries.
+  // FIXME: If this shows up on profiles, it could be folded (at the cost of
+  // some code complexity) into the loop below.
+  WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
+                                IsBlockPlaced(Chain, BlockToChain)),
+                 WorkList.end());
+
+  MachineBasicBlock *BestBlock = 0;
+  BlockFrequency BestFreq;
+  for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
+                                                      WBE = WorkList.end();
+       WBI != WBE; ++WBI) {
+    BlockChain &SuccChain = *BlockToChain[*WBI];
+    if (&SuccChain == &Chain) {
+      DEBUG(dbgs() << "    " << getBlockName(*WBI)
+                   << " -> Already merged!\n");
+      continue;
+    }
+    assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block");
+
+    BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI);
+    DEBUG(dbgs() << "    " << getBlockName(*WBI) << " -> " << CandidateFreq
+                 << " (freq)\n");
+    if (BestBlock && BestFreq >= CandidateFreq)
+      continue;
+    BestBlock = *WBI;
+    BestFreq = CandidateFreq;
+  }
+  return BestBlock;
+}
+
+/// \brief Retrieve the first unplaced basic block.
+///
+/// This routine is called when we are unable to use the CFG to walk through
+/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
+/// We walk through the function's blocks in order, starting from the
+/// LastUnplacedBlockIt. We update this iterator on each call to avoid
+/// re-scanning the entire sequence on repeated calls to this routine.
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+    MachineFunction &F, const BlockChain &PlacedChain,
+    MachineFunction::iterator &PrevUnplacedBlockIt,
+    const BlockFilterSet *BlockFilter) {
+  for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
+       ++I) {
+    if (BlockFilter && !BlockFilter->count(I))
+      continue;
+    if (BlockToChain[I] != &PlacedChain) {
+      PrevUnplacedBlockIt = I;
+      // Now select the head of the chain to which the unplaced block belongs
+      // as the block to place. This will force the entire chain to be placed,
+      // and satisfies the requirements of merging chains.
+      return *BlockToChain[I]->begin();
+    }
+  }
+  return 0;
+}
+
+void MachineBlockPlacement::buildChain(
+    MachineBasicBlock *BB,
+    BlockChain &Chain,
+    SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
+    const BlockFilterSet *BlockFilter) {
+  assert(BB);
+  assert(BlockToChain[BB] == &Chain);
+  MachineFunction &F = *BB->getParent();
+  MachineFunction::iterator PrevUnplacedBlockIt = F.begin();
+
+  MachineBasicBlock *LoopHeaderBB = BB;
+  markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter);
+  BB = *llvm::prior(Chain.end());
+  for (;;) {
+    assert(BB);
+    assert(BlockToChain[BB] == &Chain);
+    assert(*llvm::prior(Chain.end()) == BB);
+    MachineBasicBlock *BestSucc = 0;
+
+    // Look for the best viable successor if there is one to place immediately
+    // after this block.
+    BestSucc = selectBestSuccessor(BB, Chain, BlockFilter);
+
+    // If an immediate successor isn't available, look for the best viable
+    // block among those we've identified as not violating the loop's CFG at
+    // this point. This won't be a fallthrough, but it will increase locality.
+    if (!BestSucc)
+      BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter);
+
+    if (!BestSucc) {
+      BestSucc = getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt,
+                                       BlockFilter);
+      if (!BestSucc)
+        break;
+
+      DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the "
+                      "layout successor until the CFG reduces\n");
+    }
+
+    // Place this block, updating the datastructures to reflect its placement.
+    BlockChain &SuccChain = *BlockToChain[BestSucc];
+    // Zero out LoopPredecessors for the successor we're about to merge in case
+    // we selected a successor that didn't fit naturally into the CFG.
+    SuccChain.LoopPredecessors = 0;
+    DEBUG(dbgs() << "Merging from " << getBlockNum(BB)
+                 << " to " << getBlockNum(BestSucc) << "\n");
+    markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter);
+    Chain.merge(BestSucc, &SuccChain);
+    BB = *llvm::prior(Chain.end());
+  }
+
+  DEBUG(dbgs() << "Finished forming chain for header block "
+               << getBlockNum(*Chain.begin()) << "\n");
+}
+
+/// \brief Find the best loop top block for layout.
+///
+/// This routine implements the logic to analyze the loop looking for the best
+/// block to layout at the top of the loop. Typically this is done to maximize
+/// fallthrough opportunities.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(MachineFunction &F,
+                                       MachineLoop &L,
+                                       const BlockFilterSet &LoopBlockSet) {
+  // We don't want to layout the loop linearly in all cases. If the loop header
+  // is just a normal basic block in the loop, we want to look for what block
+  // within the loop is the best one to layout at the top. However, if the loop
+  // header has be pre-merged into a chain due to predecessors not having
+  // analyzable branches, *and* the predecessor it is merged with is *not* part
+  // of the loop, rotating the header into the middle of the loop will create
+  // a non-contiguous range of blocks which is Very Bad. So start with the
+  // header and only rotate if safe.
+  BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+  if (!LoopBlockSet.count(*HeaderChain.begin()))
+    return L.getHeader();
+
+  BlockFrequency BestExitEdgeFreq;
+  MachineBasicBlock *ExitingBB = 0;
+  MachineBasicBlock *LoopingBB = 0;
+  // If there are exits to outer loops, loop rotation can severely limit
+  // fallthrough opportunites unless it selects such an exit. Keep a set of
+  // blocks where rotating to exit with that block will reach an outer loop.
+  SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop;
+
+  DEBUG(dbgs() << "Finding best loop exit for: "
+               << getBlockName(L.getHeader()) << "\n");
+  for (MachineLoop::block_iterator I = L.block_begin(),
+                                   E = L.block_end();
+       I != E; ++I) {
+    BlockChain &Chain = *BlockToChain[*I];
+    // Ensure that this block is at the end of a chain; otherwise it could be
+    // mid-way through an inner loop or a successor of an analyzable branch.
+    if (*I != *llvm::prior(Chain.end()))
+      continue;
+
+    // Now walk the successors. We need to establish whether this has a viable
+    // exiting successor and whether it has a viable non-exiting successor.
+    // We store the old exiting state and restore it if a viable looping
+    // successor isn't found.
+    MachineBasicBlock *OldExitingBB = ExitingBB;
+    BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
+    // We also compute and store the best looping successor for use in layout.
+    MachineBasicBlock *BestLoopSucc = 0;
+    // FIXME: Due to the performance of the probability and weight routines in
+    // the MBPI analysis, we use the internal weights. This is only valid
+    // because it is purely a ranking function, we don't care about anything
+    // but the relative values.
+    uint32_t BestLoopSuccWeight = 0;
+    // FIXME: We also manually compute the probabilities to avoid quadratic
+    // behavior.
+    uint32_t WeightScale = 0;
+    uint32_t SumWeight = MBPI->getSumForBlock(*I, WeightScale);
+    for (MachineBasicBlock::succ_iterator SI = (*I)->succ_begin(),
+                                          SE = (*I)->succ_end();
+         SI != SE; ++SI) {
+      if ((*SI)->isLandingPad())
+        continue;
+      if (*SI == *I)
+        continue;
+      BlockChain &SuccChain = *BlockToChain[*SI];
+      // Don't split chains, either this chain or the successor's chain.
+      if (&Chain == &SuccChain || *SI != *SuccChain.begin()) {
+        DEBUG(dbgs() << "    " << (LoopBlockSet.count(*SI) ? "looping: "
+                                                           : "exiting: ")
+                     << getBlockName(*I) << " -> "
+                     << getBlockName(*SI) << " (chain conflict)\n");
+        continue;
+      }
+
+      uint32_t SuccWeight = MBPI->getEdgeWeight(*I, *SI);
+      if (LoopBlockSet.count(*SI)) {
+        DEBUG(dbgs() << "    looping: " << getBlockName(*I) << " -> "
+                     << getBlockName(*SI) << " (" << SuccWeight << ")\n");
+        if (BestLoopSucc && BestLoopSuccWeight >= SuccWeight)
+          continue;
+
+        BestLoopSucc = *SI;
+        BestLoopSuccWeight = SuccWeight;
+        continue;
+      }
+
+      BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+      BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(*I) * SuccProb;
+      DEBUG(dbgs() << "    exiting: " << getBlockName(*I) << " -> "
+                   << getBlockName(*SI) << " (" << ExitEdgeFreq << ")\n");
+      // Note that we slightly bias this toward an existing layout successor to
+      // retain incoming order in the absence of better information.
+      // FIXME: Should we bias this more strongly? It's pretty weak.
+      if (!ExitingBB || ExitEdgeFreq > BestExitEdgeFreq ||
+          ((*I)->isLayoutSuccessor(*SI) &&
+           !(ExitEdgeFreq < BestExitEdgeFreq))) {
+        BestExitEdgeFreq = ExitEdgeFreq;
+        ExitingBB = *I;
+      }
+
+      if (MachineLoop *ExitLoop = MLI->getLoopFor(*SI))
+        if (ExitLoop->contains(&L))
+          BlocksExitingToOuterLoop.insert(*I);
+    }
+
+    // Restore the old exiting state, no viable looping successor was found.
+    if (!BestLoopSucc) {
+      ExitingBB = OldExitingBB;
+      BestExitEdgeFreq = OldBestExitEdgeFreq;
+      continue;
+    }
+
+    // If this was best exiting block thus far, also record the looping block.
+    if (ExitingBB == *I)
+      LoopingBB = BestLoopSucc;
+  }
+  // Without a candidate exitting block or with only a single block in the
+  // loop, just use the loop header to layout the loop.
+  if (!ExitingBB || L.getNumBlocks() == 1)
+    return L.getHeader();
+
+  // Also, if we have exit blocks which lead to outer loops but didn't select
+  // one of them as the exiting block we are rotating toward, disable loop
+  // rotation altogether.
+  if (!BlocksExitingToOuterLoop.empty() &&
+      !BlocksExitingToOuterLoop.count(ExitingBB))
+    return L.getHeader();
+
+  assert(LoopingBB && "All successors of a loop block are exit blocks!");
+  DEBUG(dbgs() << "  Best exiting block: " << getBlockName(ExitingBB) << "\n");
+  DEBUG(dbgs() << "  Best top block: " << getBlockName(LoopingBB) << "\n");
+  return LoopingBB;
+}
+
+/// \brief Forms basic block chains from the natural loop structures.
+///
+/// These chains are designed to preserve the existing *structure* of the code
+/// as much as possible. We can then stitch the chains together in a way which
+/// both preserves the topological structure and minimizes taken conditional
+/// branches.
+void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
+                                            MachineLoop &L) {
+  // First recurse through any nested loops, building chains for those inner
+  // loops.
+  for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI)
+    buildLoopChains(F, **LI);
+
+  SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+  BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+
+  MachineBasicBlock *LayoutTop = findBestLoopTop(F, L, LoopBlockSet);
+  BlockChain &LoopChain = *BlockToChain[LayoutTop];
+
+  // FIXME: This is a really lame way of walking the chains in the loop: we
+  // walk the blocks, and use a set to prevent visiting a particular chain
+  // twice.
+  SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+  assert(LoopChain.LoopPredecessors == 0);
+  UpdatedPreds.insert(&LoopChain);
+  for (MachineLoop::block_iterator BI = L.block_begin(),
+                                   BE = L.block_end();
+       BI != BE; ++BI) {
+    BlockChain &Chain = *BlockToChain[*BI];
+    if (!UpdatedPreds.insert(&Chain))
+      continue;
+
+    assert(Chain.LoopPredecessors == 0);
+    for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+         BCI != BCE; ++BCI) {
+      assert(BlockToChain[*BCI] == &Chain);
+      for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+                                            PE = (*BCI)->pred_end();
+           PI != PE; ++PI) {
+        if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI))
+          continue;
+        ++Chain.LoopPredecessors;
+      }
+    }
+
+    if (Chain.LoopPredecessors == 0)
+      BlockWorkList.push_back(*Chain.begin());
+  }
+
+  buildChain(LayoutTop, LoopChain, BlockWorkList, &LoopBlockSet);
+
+  DEBUG({
+    // Crash at the end so we get all of the debugging output first.
+    bool BadLoop = false;
+    if (LoopChain.LoopPredecessors) {
+      BadLoop = true;
+      dbgs() << "Loop chain contains a block without its preds placed!\n"
+             << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
+             << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n";
+    }
+    for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end();
+         BCI != BCE; ++BCI)
+      if (!LoopBlockSet.erase(*BCI)) {
+        // We don't mark the loop as bad here because there are real situations
+        // where this can occur. For example, with an unanalyzable fallthrough
+        // from a loop block to a non-loop block or vice versa.
+        dbgs() << "Loop chain contains a block not contained by the loop!\n"
+               << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
+               << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+               << "  Bad block:    " << getBlockName(*BCI) << "\n";
+      }
+
+    if (!LoopBlockSet.empty()) {
+      BadLoop = true;
+      for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(),
+                                    LBE = LoopBlockSet.end();
+           LBI != LBE; ++LBI)
+        dbgs() << "Loop contains blocks never placed into a chain!\n"
+               << "  Loop header:  " << getBlockName(*L.block_begin()) << "\n"
+               << "  Chain header: " << getBlockName(*LoopChain.begin()) << "\n"
+               << "  Bad block:    " << getBlockName(*LBI) << "\n";
+    }
+    assert(!BadLoop && "Detected problems with the placement of this loop.");
+  });
+}
+
+void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
+  // Ensure that every BB in the function has an associated chain to simplify
+  // the assumptions of the remaining algorithm.
+  SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    MachineBasicBlock *BB = FI;
+    BlockChain *Chain
+      = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
+    // Also, merge any blocks which we cannot reason about and must preserve
+    // the exact fallthrough behavior for.
+    for (;;) {
+      Cond.clear();
+      MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+      if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
+        break;
+
+      MachineFunction::iterator NextFI(llvm::next(FI));
+      MachineBasicBlock *NextBB = NextFI;
+      // Ensure that the layout successor is a viable block, as we know that
+      // fallthrough is a possibility.
+      assert(NextFI != FE && "Can't fallthrough past the last block.");
+      DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
+                   << getBlockName(BB) << " -> " << getBlockName(NextBB)
+                   << "\n");
+      Chain->merge(NextBB, 0);
+      FI = NextFI;
+      BB = NextBB;
+    }
+  }
+
+  // Build any loop-based chains.
+  for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE;
+       ++LI)
+    buildLoopChains(F, **LI);
+
+  SmallVector<MachineBasicBlock *, 16> BlockWorkList;
+
+  SmallPtrSet<BlockChain *, 4> UpdatedPreds;
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    MachineBasicBlock *BB = &*FI;
+    BlockChain &Chain = *BlockToChain[BB];
+    if (!UpdatedPreds.insert(&Chain))
+      continue;
+
+    assert(Chain.LoopPredecessors == 0);
+    for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end();
+         BCI != BCE; ++BCI) {
+      assert(BlockToChain[*BCI] == &Chain);
+      for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(),
+                                            PE = (*BCI)->pred_end();
+           PI != PE; ++PI) {
+        if (BlockToChain[*PI] == &Chain)
+          continue;
+        ++Chain.LoopPredecessors;
+      }
+    }
+
+    if (Chain.LoopPredecessors == 0)
+      BlockWorkList.push_back(*Chain.begin());
+  }
+
+  BlockChain &FunctionChain = *BlockToChain[&F.front()];
+  buildChain(&F.front(), FunctionChain, BlockWorkList);
+
+  typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType;
+  DEBUG({
+    // Crash at the end so we get all of the debugging output first.
+    bool BadFunc = false;
+    FunctionBlockSetType FunctionBlockSet;
+    for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+      FunctionBlockSet.insert(FI);
+
+    for (BlockChain::iterator BCI = FunctionChain.begin(),
+                              BCE = FunctionChain.end();
+         BCI != BCE; ++BCI)
+      if (!FunctionBlockSet.erase(*BCI)) {
+        BadFunc = true;
+        dbgs() << "Function chain contains a block not in the function!\n"
+               << "  Bad block:    " << getBlockName(*BCI) << "\n";
+      }
+
+    if (!FunctionBlockSet.empty()) {
+      BadFunc = true;
+      for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(),
+                                          FBE = FunctionBlockSet.end();
+           FBI != FBE; ++FBI)
+        dbgs() << "Function contains blocks never placed into a chain!\n"
+               << "  Bad block:    " << getBlockName(*FBI) << "\n";
+    }
+    assert(!BadFunc && "Detected problems with the block placement.");
+  });
+
+  // Splice the blocks into place.
+  MachineFunction::iterator InsertPos = F.begin();
+  for (BlockChain::iterator BI = FunctionChain.begin(),
+                            BE = FunctionChain.end();
+       BI != BE; ++BI) {
+    DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain "
+                                                  : "          ... ")
+          << getBlockName(*BI) << "\n");
+    if (InsertPos != MachineFunction::iterator(*BI))
+      F.splice(InsertPos, *BI);
+    else
+      ++InsertPos;
+
+    // Update the terminator of the previous block.
+    if (BI == FunctionChain.begin())
+      continue;
+    MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI));
+
+    // FIXME: It would be awesome of updateTerminator would just return rather
+    // than assert when the branch cannot be analyzed in order to remove this
+    // boiler plate.
+    Cond.clear();
+    MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+    if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond))
+      PrevBB->updateTerminator();
+  }
+
+  // Fixup the last block.
+  Cond.clear();
+  MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+  if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
+    F.back().updateTerminator();
+}
+
+/// \brief Recursive helper to align a loop and any nested loops.
+static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) {
+  // Recurse through nested loops.
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    AlignLoop(F, *I, Align);
+
+  L->getTopBlock()->setAlignment(Align);
+}
+
+/// \brief Align loop headers to target preferred alignments.
+void MachineBlockPlacement::AlignLoops(MachineFunction &F) {
+  if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    return;
+
+  unsigned Align = TLI->getPrefLoopAlignment();
+  if (!Align)
+    return;  // Don't care about loop alignment.
+
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
+    AlignLoop(F, *I, Align);
+}
+
+bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) {
+  // Check for single-block functions and skip them.
+  if (llvm::next(F.begin()) == F.end())
+    return false;
+
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  TII = F.getTarget().getInstrInfo();
+  TLI = F.getTarget().getTargetLowering();
+  assert(BlockToChain.empty());
+
+  buildCFGChains(F);
+  AlignLoops(F);
+
+  BlockToChain.clear();
+  ChainAllocator.DestroyAll();
+
+  // We always return true as we have no way to track whether the final order
+  // differs from the original order.
+  return true;
+}
+
+namespace {
+/// \brief A pass to compute block placement statistics.
+///
+/// A separate pass to compute interesting statistics for evaluating block
+/// placement. This is separate from the actual placement pass so that they can
+/// be computed in the absense of any placement transformations or when using
+/// alternative placement strategies.
+class MachineBlockPlacementStats : public MachineFunctionPass {
+  /// \brief A handle to the branch probability pass.
+  const MachineBranchProbabilityInfo *MBPI;
+
+  /// \brief A handle to the function-wide block frequency pass.
+  const MachineBlockFrequencyInfo *MBFI;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  MachineBlockPlacementStats() : MachineFunctionPass(ID) {
+    initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &F);
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<MachineBranchProbabilityInfo>();
+    AU.addRequired<MachineBlockFrequencyInfo>();
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+}
+
+char MachineBlockPlacementStats::ID = 0;
+char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
+INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
+                      "Basic Block Placement Stats", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
+                    "Basic Block Placement Stats", false, false)
+
+bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
+  // Check for single-block functions and skip them.
+  if (llvm::next(F.begin()) == F.end())
+    return false;
+
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+
+  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    BlockFrequency BlockFreq = MBFI->getBlockFreq(I);
+    Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches
+                                                  : NumUncondBranches;
+    Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq
+                                                      : UncondBranchTakenFreq;
+    for (MachineBasicBlock::succ_iterator SI = I->succ_begin(),
+                                          SE = I->succ_end();
+         SI != SE; ++SI) {
+      // Skip if this successor is a fallthrough.
+      if (I->isLayoutSuccessor(*SI))
+        continue;
+
+      BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI);
+      ++NumBranches;
+      BranchTakenFreq += EdgeFreq.getFrequency();
+    }
+  }
+
+  return false;
+}
+
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index c13fa6bc5333..0cc1af07952d 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -26,26 +26,43 @@ INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
 
 char MachineBranchProbabilityInfo::ID = 0;
 
-uint32_t MachineBranchProbabilityInfo::
-getSumForBlock(MachineBasicBlock *MBB) const {
-  uint32_t Sum = 0;
+void MachineBranchProbabilityInfo::anchor() { }
 
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
+  // First we compute the sum with 64-bits of precision, ensuring that cannot
+  // overflow by bounding the number of weights considered. Hopefully no one
+  // actually needs 2^32 successors.
+  assert(MBB->succ_size() < UINT32_MAX);
+  uint64_t Sum = 0;
+  Scale = 1;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    MachineBasicBlock *Succ = *I;
-    uint32_t Weight = getEdgeWeight(MBB, Succ);
-    uint32_t PrevSum = Sum;
-
+    uint32_t Weight = getEdgeWeight(MBB, *I);
     Sum += Weight;
-    assert(Sum > PrevSum); (void) PrevSum;
   }
 
+  // If the computed sum fits in 32-bits, we're done.
+  if (Sum <= UINT32_MAX)
+    return Sum;
+
+  // Otherwise, compute the scale necessary to cause the weights to fit, and
+  // re-sum with that scale applied.
+  assert((Sum / UINT32_MAX) < UINT32_MAX);
+  Scale = (Sum / UINT32_MAX) + 1;
+  Sum = 0;
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    uint32_t Weight = getEdgeWeight(MBB, *I);
+    Sum += Weight / Scale;
+  }
+  assert(Sum <= UINT32_MAX);
   return Sum;
 }
 
 uint32_t
-MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
-                                            MachineBasicBlock *Dst) const {
+MachineBranchProbabilityInfo::getEdgeWeight(const MachineBasicBlock *Src,
+                                            const MachineBasicBlock *Dst) const {
   uint32_t Weight = Src->getSuccWeight(Dst);
   if (!Weight)
     return DEFAULT_WEIGHT;
@@ -55,37 +72,24 @@ MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
 bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
                                              MachineBasicBlock *Dst) const {
   // Hot probability is at least 4/5 = 80%
-  uint32_t Weight = getEdgeWeight(Src, Dst);
-  uint32_t Sum = getSumForBlock(Src);
-
-  // FIXME: Implement BranchProbability::compare then change this code to
-  // compare this BranchProbability against a static "hot" BranchProbability.
-  return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+  // FIXME: Compare against a static "hot" BranchProbability.
+  return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
 }
 
 MachineBasicBlock *
 MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
-  uint32_t Sum = 0;
   uint32_t MaxWeight = 0;
   MachineBasicBlock *MaxSucc = 0;
-
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    MachineBasicBlock *Succ = *I;
-    uint32_t Weight = getEdgeWeight(MBB, Succ);
-    uint32_t PrevSum = Sum;
-
-    Sum += Weight;
-    assert(Sum > PrevSum); (void) PrevSum;
-
+    uint32_t Weight = getEdgeWeight(MBB, *I);
     if (Weight > MaxWeight) {
       MaxWeight = Weight;
-      MaxSucc = Succ;
+      MaxSucc = *I;
     }
   }
 
-  // FIXME: Use BranchProbability::compare.
-  if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4)
+  if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5))
     return MaxSucc;
 
   return 0;
@@ -94,8 +98,9 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
 BranchProbability
 MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
                                                  MachineBasicBlock *Dst) const {
-  uint32_t N = getEdgeWeight(Src, Dst);
-  uint32_t D = getSumForBlock(Src);
+  uint32_t Scale = 1;
+  uint32_t D = getSumForBlock(Src, Scale);
+  uint32_t N = getEdgeWeight(Src, Dst) / Scale;
 
   return BranchProbability(N, D);
 }
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 7eda8c129dc4..a63688e9ec62 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -26,13 +26,14 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
-
 using namespace llvm;
 
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
 STATISTIC(NumPhysCSEs,
           "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCrossBBCSEs,
+          "Number of cross-MBB physreg referencing CS eliminated");
 STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
 
 namespace {
@@ -49,7 +50,7 @@ namespace {
     }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -62,6 +63,8 @@ namespace {
     virtual void releaseMemory() {
       ScopeMap.clear();
       Exps.clear();
+      AllocatableRegs.clear();
+      ReservedRegs.clear();
     }
 
   private:
@@ -75,6 +78,8 @@ namespace {
     ScopedHTType VNT;
     SmallVector<MachineInstr*, 64> Exps;
     unsigned CurrVN;
+    BitVector AllocatableRegs;
+    BitVector ReservedRegs;
 
     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
     bool isPhysDefTriviallyDead(unsigned Reg,
@@ -82,9 +87,12 @@ namespace {
                                 MachineBasicBlock::const_iterator E) const ;
     bool hasLivePhysRegDefUses(const MachineInstr *MI,
                                const MachineBasicBlock *MBB,
-                               SmallSet<unsigned,8> &PhysRefs) const;
+                               SmallSet<unsigned,8> &PhysRefs,
+                               SmallVector<unsigned,2> &PhysDefs) const;
     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
-                          SmallSet<unsigned,8> &PhysRefs) const;
+                          SmallSet<unsigned,8> &PhysRefs,
+                          SmallVector<unsigned,2> &PhysDefs,
+                          bool &NonLocal) const;
     bool isCSECandidate(MachineInstr *MI);
     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
                            MachineInstr *CSMI, MachineInstr *MI);
@@ -99,6 +107,7 @@ namespace {
 } // end anonymous namespace
 
 char MachineCSE::ID = 0;
+char &llvm::MachineCSEID = MachineCSE::ID;
 INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
                 "Machine Common Subexpression Elimination", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -106,8 +115,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(MachineCSE, "machine-cse",
                 "Machine Common Subexpression Elimination", false, false)
 
-FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
-
 bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
                                           MachineBasicBlock *MBB) {
   bool Changed = false;
@@ -163,6 +170,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
     bool SeenDef = false;
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = I->getOperand(i);
+      if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
+        SeenDef = true;
       if (!MO.isReg() || !MO.getReg())
         continue;
       if (!TRI->regsOverlap(MO.getReg(), Reg))
@@ -173,7 +182,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
       SeenDef = true;
     }
     if (SeenDef)
-      // See a def of Reg (or an alias) before encountering any use, it's 
+      // See a def of Reg (or an alias) before encountering any use, it's
       // trivially dead.
       return true;
 
@@ -189,7 +198,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
 /// instruction does not uses a physical register.
 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
                                        const MachineBasicBlock *MBB,
-                                       SmallSet<unsigned,8> &PhysRefs) const {
+                                       SmallSet<unsigned,8> &PhysRefs,
+                                       SmallVector<unsigned,2> &PhysDefs) const{
   MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
@@ -207,7 +217,9 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
         (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
       continue;
     PhysRefs.insert(Reg);
-    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+    if (MO.isDef())
+      PhysDefs.push_back(Reg);
+    for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
       PhysRefs.insert(*Alias);
   }
 
@@ -215,25 +227,56 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
 }
 
 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
-                                  SmallSet<unsigned,8> &PhysRefs) const {
+                                  SmallSet<unsigned,8> &PhysRefs,
+                                  SmallVector<unsigned,2> &PhysDefs,
+                                  bool &NonLocal) const {
   // For now conservatively returns false if the common subexpression is
-  // not in the same basic block as the given instruction.
-  MachineBasicBlock *MBB = MI->getParent();
-  if (CSMI->getParent() != MBB)
-    return false;
+  // not in the same basic block as the given instruction. The only exception
+  // is if the common subexpression is in the sole predecessor block.
+  const MachineBasicBlock *MBB = MI->getParent();
+  const MachineBasicBlock *CSMBB = CSMI->getParent();
+
+  bool CrossMBB = false;
+  if (CSMBB != MBB) {
+    if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
+      return false;
+
+    for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
+      if (AllocatableRegs.test(PhysDefs[i]) || ReservedRegs.test(PhysDefs[i]))
+        // Avoid extending live range of physical registers if they are
+        //allocatable or reserved.
+        return false;
+    }
+    CrossMBB = true;
+  }
   MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
   MachineBasicBlock::const_iterator E = MI;
+  MachineBasicBlock::const_iterator EE = CSMBB->end();
   unsigned LookAheadLeft = LookAheadLimit;
   while (LookAheadLeft) {
     // Skip over dbg_value's.
-    while (I != E && I->isDebugValue())
+    while (I != E && I != EE && I->isDebugValue())
       ++I;
 
+    if (I == EE) {
+      assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
+      (void)CrossMBB;
+      CrossMBB = false;
+      NonLocal = true;
+      I = MBB->begin();
+      EE = MBB->end();
+      continue;
+    }
+
     if (I == E)
       return true;
 
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = I->getOperand(i);
+      // RegMasks go on instructions like calls that clobber lots of physregs.
+      // Don't attempt to CSE across such an instruction.
+      if (MO.isRegMask())
+        return false;
       if (!MO.isReg() || !MO.isDef())
         continue;
       unsigned MOReg = MO.getReg();
@@ -260,12 +303,11 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
     return false;
 
   // Ignore stuff that we obviously can't move.
-  const MCInstrDesc &MCID = MI->getDesc();  
-  if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
+  if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
       MI->hasUnmodeledSideEffects())
     return false;
 
-  if (MCID.mayLoad()) {
+  if (MI->mayLoad()) {
     // Okay, this instruction does a load. As a refinement, we allow the target
     // to decide whether the loaded value is actually a constant. If so, we can
     // actually use it as a load.
@@ -287,7 +329,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
   // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
   // an immediate predecessor. We don't want to increase register pressure and
   // end up causing other computation to be spilled.
-  if (MI->getDesc().isAsCheapAsAMove()) {
+  if (MI->isAsCheapAsAMove()) {
     MachineBasicBlock *CSBB = CSMI->getParent();
     MachineBasicBlock *BB = MI->getParent();
     if (CSBB != BB && !CSBB->isSuccessor(BB))
@@ -376,7 +418,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
 
     // Commute commutable instructions.
     bool Commuted = false;
-    if (!FoundCSE && MI->getDesc().isCommutable()) {
+    if (!FoundCSE && MI->isCommutable()) {
       MachineInstr *NewMI = TII->commuteInstruction(MI);
       if (NewMI) {
         Commuted = true;
@@ -394,16 +436,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
     // If the instruction defines physical registers and the values *may* be
     // used, then it's not safe to replace it with a common subexpression.
     // It's also not safe if the instruction uses physical registers.
+    bool CrossMBBPhysDef = false;
     SmallSet<unsigned,8> PhysRefs;
-    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
+    SmallVector<unsigned, 2> PhysDefs;
+    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, PhysDefs)) {
       FoundCSE = false;
 
-      // ... Unless the CS is local and it also defines the physical register
-      // which is not clobbered in between and the physical register uses 
-      // were not clobbered.
+      // ... Unless the CS is local or is in the sole predecessor block
+      // and it also defines the physical register which is not clobbered
+      // in between and the physical register uses were not clobbered.
       unsigned CSVN = VNT.lookup(MI);
       MachineInstr *CSMI = Exps[CSVN];
-      if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+      if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
         FoundCSE = true;
     }
 
@@ -458,6 +502,18 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
         MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
         MRI->clearKillFlags(CSEPairs[i].second);
       }
+
+      if (CrossMBBPhysDef) {
+        // Add physical register defs now coming in from a predecessor to MBB
+        // livein list.
+        while (!PhysDefs.empty()) {
+          unsigned LiveIn = PhysDefs.pop_back_val();
+          if (!MBB->isLiveIn(LiveIn))
+            MBB->addLiveIn(LiveIn);
+        }
+        ++NumCrossBBCSEs;
+      }
+
       MI->eraseFromParent();
       ++NumCSEs;
       if (!PhysRefs.empty())
@@ -542,5 +598,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<MachineDominatorTree>();
+  AllocatableRegs = TRI->getAllocatableSet(MF);
+  ReservedRegs = TRI->getReservedRegs(MF);
   return PerformCSE(DT->getRootNode());
 }
diff --git a/lib/CodeGen/MachineCodeEmitter.cpp b/lib/CodeGen/MachineCodeEmitter.cpp
new file mode 100644
index 000000000000..81b49784c052
--- /dev/null
+++ b/lib/CodeGen/MachineCodeEmitter.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/CodeGen/MachineCodeEmitter.cpp - Code emission -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+
+using namespace llvm;
+
+void MachineCodeEmitter::anchor() { }
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
new file mode 100644
index 000000000000..9730eaacf6e4
--- /dev/null
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -0,0 +1,340 @@
+//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level copy propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-cp"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes, "Number of dead copies deleted");
+
+namespace {
+  class MachineCopyPropagation : public MachineFunctionPass {
+    const TargetRegisterInfo *TRI;
+    BitVector ReservedRegs;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    MachineCopyPropagation() : MachineFunctionPass(ID) {
+     initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  private:
+    typedef SmallVector<unsigned, 4> DestList;
+    typedef DenseMap<unsigned, DestList> SourceMap;
+
+    void SourceNoLongerAvailable(unsigned Reg,
+                                 SourceMap &SrcMap,
+                                 DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
+    bool CopyPropagateBlock(MachineBasicBlock &MBB);
+  };
+}
+char MachineCopyPropagation::ID = 0;
+char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
+
+INITIALIZE_PASS(MachineCopyPropagation, "machine-cp",
+                "Machine Copy Propagation Pass", false, false)
+
+void
+MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg,
+                              SourceMap &SrcMap,
+                              DenseMap<unsigned, MachineInstr*> &AvailCopyMap) {
+  SourceMap::iterator SI = SrcMap.find(Reg);
+  if (SI != SrcMap.end()) {
+    const DestList& Defs = SI->second;
+    for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
+         I != E; ++I) {
+      unsigned MappedDef = *I;
+      // Source of copy is no longer available for propagation.
+      if (AvailCopyMap.erase(MappedDef)) {
+        for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+          AvailCopyMap.erase(*SR);
+      }
+    }
+  }
+  for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+    SI = SrcMap.find(*AS);
+    if (SI != SrcMap.end()) {
+      const DestList& Defs = SI->second;
+      for (DestList::const_iterator I = Defs.begin(), E = Defs.end();
+           I != E; ++I) {
+        unsigned MappedDef = *I;
+        if (AvailCopyMap.erase(MappedDef)) {
+          for (const uint16_t *SR = TRI->getSubRegisters(MappedDef); *SR; ++SR)
+            AvailCopyMap.erase(*SR);
+        }
+      }
+    }
+  }
+}
+
+static bool NoInterveningSideEffect(const MachineInstr *CopyMI,
+                                    const MachineInstr *MI) {
+  const MachineBasicBlock *MBB = CopyMI->getParent();
+  if (MI->getParent() != MBB)
+    return false;
+  MachineBasicBlock::const_iterator I = CopyMI;
+  MachineBasicBlock::const_iterator E = MBB->end();
+  MachineBasicBlock::const_iterator E2 = MI;
+
+  ++I;
+  while (I != E && I != E2) {
+    if (I->hasUnmodeledSideEffects() || I->isCall() ||
+        I->isTerminator())
+      return false;
+    ++I;
+  }
+  return true;
+}
+
+/// isNopCopy - Return true if the specified copy is really a nop. That is
+/// if the source of the copy is the same of the definition of the copy that
+/// supplied the source. If the source of the copy is a sub-register than it
+/// must check the sub-indices match. e.g.
+/// ecx = mov eax
+/// al  = mov cl
+/// But not
+/// ecx = mov eax
+/// al  = mov ch
+static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
+                      const TargetRegisterInfo *TRI) {
+  unsigned SrcSrc = CopyMI->getOperand(1).getReg();
+  if (Def == SrcSrc)
+    return true;
+  if (TRI->isSubRegister(SrcSrc, Def)) {
+    unsigned SrcDef = CopyMI->getOperand(0).getReg();
+    unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def);
+    if (!SubIdx)
+      return false;
+    return SubIdx == TRI->getSubRegIndex(SrcDef, Src);
+  }
+
+  return false;
+}
+
+bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
+  SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;  // Candidates for deletion
+  DenseMap<unsigned, MachineInstr*> AvailCopyMap;    // Def -> available copies map
+  DenseMap<unsigned, MachineInstr*> CopyMap;         // Def -> copies map
+  SourceMap SrcMap; // Src -> Def map
+
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) {
+    MachineInstr *MI = &*I;
+    ++I;
+
+    if (MI->isCopy()) {
+      unsigned Def = MI->getOperand(0).getReg();
+      unsigned Src = MI->getOperand(1).getReg();
+
+      if (TargetRegisterInfo::isVirtualRegister(Def) ||
+          TargetRegisterInfo::isVirtualRegister(Src))
+        report_fatal_error("MachineCopyPropagation should be run after"
+                           " register allocation!");
+
+      DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src);
+      if (CI != AvailCopyMap.end()) {
+        MachineInstr *CopyMI = CI->second;
+        if (!ReservedRegs.test(Def) &&
+            (!ReservedRegs.test(Src) || NoInterveningSideEffect(CopyMI, MI)) &&
+            isNopCopy(CopyMI, Def, Src, TRI)) {
+          // The two copies cancel out and the source of the first copy
+          // hasn't been overridden, eliminate the second one. e.g.
+          //  %ECX<def> = COPY %EAX<kill>
+          //  ... nothing clobbered EAX.
+          //  %EAX<def> = COPY %ECX
+          // =>
+          //  %ECX<def> = COPY %EAX
+          //
+          // Also avoid eliminating a copy from reserved registers unless the
+          // definition is proven not clobbered. e.g.
+          // %RSP<def> = COPY %RAX
+          // CALL
+          // %RAX<def> = COPY %RSP
+
+          // Clear any kills of Def between CopyMI and MI. This extends the
+          // live range.
+          for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
+            I->clearRegisterKills(Def, TRI);
+
+          MI->eraseFromParent();
+          Changed = true;
+          ++NumDeletes;
+          continue;
+        }
+      }
+
+      // If Src is defined by a previous copy, it cannot be eliminated.
+      CI = CopyMap.find(Src);
+      if (CI != CopyMap.end())
+        MaybeDeadCopies.remove(CI->second);
+      for (const uint16_t *AS = TRI->getAliasSet(Src); *AS; ++AS) {
+        CI = CopyMap.find(*AS);
+        if (CI != CopyMap.end())
+          MaybeDeadCopies.remove(CI->second);
+      }
+
+      // Copy is now a candidate for deletion.
+      MaybeDeadCopies.insert(MI);
+
+      // If 'Src' is previously source of another copy, then this earlier copy's
+      // source is no longer available. e.g.
+      // %xmm9<def> = copy %xmm2
+      // ...
+      // %xmm2<def> = copy %xmm0
+      // ...
+      // %xmm2<def> = copy %xmm9
+      SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap);
+
+      // Remember Def is defined by the copy.
+      // ... Make sure to clear the def maps of aliases first.
+      for (const uint16_t *AS = TRI->getAliasSet(Def); *AS; ++AS) {
+        CopyMap.erase(*AS);
+        AvailCopyMap.erase(*AS);
+      }
+      CopyMap[Def] = MI;
+      AvailCopyMap[Def] = MI;
+      for (const uint16_t *SR = TRI->getSubRegisters(Def); *SR; ++SR) {
+        CopyMap[*SR] = MI;
+        AvailCopyMap[*SR] = MI;
+      }
+
+      // Remember source that's copied to Def. Once it's clobbered, then
+      // it's no longer available for copy propagation.
+      if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) ==
+          SrcMap[Src].end()) {
+        SrcMap[Src].push_back(Def);
+      }
+
+      continue;
+    }
+
+    // Not a copy.
+    SmallVector<unsigned, 2> Defs;
+    int RegMaskOpNum = -1;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isRegMask())
+        RegMaskOpNum = i;
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+
+      if (TargetRegisterInfo::isVirtualRegister(Reg))
+        report_fatal_error("MachineCopyPropagation should be run after"
+                           " register allocation!");
+
+      if (MO.isDef()) {
+        Defs.push_back(Reg);
+        continue;
+      }
+
+      // If 'Reg' is defined by a copy, the copy is no longer a candidate
+      // for elimination.
+      DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(Reg);
+      if (CI != CopyMap.end())
+        MaybeDeadCopies.remove(CI->second);
+      for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+        CI = CopyMap.find(*AS);
+        if (CI != CopyMap.end())
+          MaybeDeadCopies.remove(CI->second);
+      }
+    }
+
+    // The instruction has a register mask operand which means that it clobbers
+    // a large set of registers.  It is possible to use the register mask to
+    // prune the available copies, but treat it like a basic block boundary for
+    // now.
+    if (RegMaskOpNum >= 0) {
+      // Erase any MaybeDeadCopies whose destination register is clobbered.
+      const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum);
+      for (SmallSetVector<MachineInstr*, 8>::iterator
+           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+           DI != DE; ++DI) {
+        unsigned Reg = (*DI)->getOperand(0).getReg();
+        if (ReservedRegs.test(Reg) || !MaskMO.clobbersPhysReg(Reg))
+          continue;
+        (*DI)->eraseFromParent();
+        Changed = true;
+        ++NumDeletes;
+      }
+
+      // Clear all data structures as if we were beginning a new basic block.
+      MaybeDeadCopies.clear();
+      AvailCopyMap.clear();
+      CopyMap.clear();
+      SrcMap.clear();
+      continue;
+    }
+
+    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+      unsigned Reg = Defs[i];
+
+      // No longer defined by a copy.
+      CopyMap.erase(Reg);
+      AvailCopyMap.erase(Reg);
+      for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+        CopyMap.erase(*AS);
+        AvailCopyMap.erase(*AS);
+      }
+
+      // If 'Reg' is previously source of a copy, it is no longer available for
+      // copy propagation.
+      SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap);
+    }
+  }
+
+  // If MBB doesn't have successors, delete the copies whose defs are not used.
+  // If MBB does have successors, then conservative assume the defs are live-out
+  // since we don't want to trust live-in lists.
+  if (MBB.succ_empty()) {
+    for (SmallSetVector<MachineInstr*, 8>::iterator
+           DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
+         DI != DE; ++DI) {
+      if (!ReservedRegs.test((*DI)->getOperand(0).getReg())) {
+        (*DI)->eraseFromParent();
+        Changed = true;
+        ++NumDeletes;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+
+  TRI = MF.getTarget().getRegisterInfo();
+  ReservedRegs = TRI->getReservedRegs(MF);
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    Changed |= CopyPropagateBlock(*I);
+
+  return Changed;
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 20066a067b8f..d8c2f6a2eaef 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -13,12 +13,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Config/config.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -28,6 +25,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetData.h"
@@ -197,9 +195,10 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
 MachineMemOperand *
 MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
                                       uint64_t s, unsigned base_alignment,
-                                      const MDNode *TBAAInfo) {
+                                      const MDNode *TBAAInfo,
+                                      const MDNode *Ranges) {
   return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
-                                           TBAAInfo);
+                                           TBAAInfo, Ranges);
 }
 
 MachineMemOperand *
@@ -286,7 +285,13 @@ void MachineFunction::dump() const {
 }
 
 void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
-  OS << "# Machine code for function " << Fn->getName() << ":\n";
+  OS << "# Machine code for function " << Fn->getName() << ": ";
+  if (RegInfo) {
+    OS << (RegInfo->isSSA() ? "SSA" : "Post SSA");
+    if (!RegInfo->tracksLiveness())
+      OS << ", not tracking liveness";
+  }
+  OS << '\n';
 
   // Print Frame Information
   FrameInfo->print(*this, OS);
@@ -335,7 +340,7 @@ namespace llvm {
   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
 
     static std::string getGraphName(const MachineFunction *F) {
-      return "CFG for '" + F->getFunction()->getNameStr() + "' function";
+      return "CFG for '" + F->getFunction()->getName().str() + "' function";
     }
 
     std::string getNodeLabel(const MachineBasicBlock *Node,
@@ -368,7 +373,7 @@ namespace llvm {
 void MachineFunction::viewCFG() const
 {
 #ifndef NDEBUG
-  ViewGraph(this, "mf" + getFunction()->getNameStr());
+  ViewGraph(this, "mf" + getFunction()->getName());
 #else
   errs() << "MachineFunction::viewCFG is only available in debug builds on "
          << "systems with Graphviz or gv!\n";
@@ -378,7 +383,7 @@ void MachineFunction::viewCFG() const
 void MachineFunction::viewCFGOnly() const
 {
 #ifndef NDEBUG
-  ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
+  ViewGraph(this, "mf" + getFunction()->getName(), true);
 #else
   errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
          << "systems with Graphviz or gv!\n";
@@ -464,7 +469,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
   if (!isCalleeSavedInfoValid())
     return BV;
 
-  for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+  for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
     BV.set(*CSR);
 
   // The entry MBB always has all CSRs pristine.
@@ -532,6 +537,8 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
   switch (getEntryKind()) {
   case MachineJumpTableInfo::EK_BlockAddress:
     return TD.getPointerSize();
+  case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+    return 8;
   case MachineJumpTableInfo::EK_GPRel32BlockAddress:
   case MachineJumpTableInfo::EK_LabelDifference32:
   case MachineJumpTableInfo::EK_Custom32:
@@ -539,8 +546,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
   case MachineJumpTableInfo::EK_Inline:
     return 0;
   }
-  assert(0 && "Unknown jump table encoding!");
-  return ~0;
+  llvm_unreachable("Unknown jump table encoding!");
 }
 
 /// getEntryAlignment - Return the alignment of each entry in the jump table.
@@ -551,6 +557,8 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
   switch (getEntryKind()) {
   case MachineJumpTableInfo::EK_BlockAddress:
     return TD.getPointerABIAlignment();
+  case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+    return TD.getABIIntegerTypeAlignment(64);
   case MachineJumpTableInfo::EK_GPRel32BlockAddress:
   case MachineJumpTableInfo::EK_LabelDifference32:
   case MachineJumpTableInfo::EK_Custom32:
@@ -558,8 +566,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
   case MachineJumpTableInfo::EK_Inline:
     return 1;
   }
-  assert(0 && "Unknown jump table encoding!");
-  return ~0;
+  llvm_unreachable("Unknown jump table encoding!");
 }
 
 /// createJumpTableIndex - Create a new jump table entry in the jump table info.
@@ -619,6 +626,8 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); }
 //  MachineConstantPool implementation
 //===----------------------------------------------------------------------===//
 
+void MachineConstantPoolValue::anchor() { }
+
 Type *MachineConstantPoolEntry::getType() const {
   if (isMachineConstantPoolEntry())
     return Val.MachineCPVal->getType();
@@ -653,35 +662,37 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
   // reject them.
   if (A->getType() == B->getType()) return false;
 
+  // We can't handle structs or arrays.
+  if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
+      isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
+    return false;
+  
   // For now, only support constants with the same size.
-  if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType()))
+  uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
+  if (StoreSize != TD->getTypeStoreSize(B->getType()) || 
+      StoreSize > 128)
     return false;
 
-  // If a floating-point value and an integer value have the same encoding,
-  // they can share a constant-pool entry.
-  if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A))
-    if (const ConstantInt *BI = dyn_cast<ConstantInt>(B))
-      return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
-  if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B))
-    if (const ConstantInt *AI = dyn_cast<ConstantInt>(A))
-      return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
-
-  // Two vectors can share an entry if each pair of corresponding
-  // elements could.
-  if (const ConstantVector *AV = dyn_cast<ConstantVector>(A))
-    if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
-      if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
-        return false;
-      for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
-        if (!CanShareConstantPoolEntry(AV->getOperand(i),
-                                       BV->getOperand(i), TD))
-          return false;
-      return true;
-    }
-
-  // TODO: Handle other cases.
-
-  return false;
+  Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
+
+  // Try constant folding a bitcast of both instructions to an integer.  If we
+  // get two identical ConstantInt's, then we are good to share them.  We use
+  // the constant folding APIs to do this so that we get the benefit of
+  // TargetData.
+  if (isa<PointerType>(A->getType()))
+    A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+                                 const_cast<Constant*>(A), TD);
+  else if (A->getType() != IntTy)
+    A = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+                                 const_cast<Constant*>(A), TD);
+  if (isa<PointerType>(B->getType()))
+    B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy,
+                                 const_cast<Constant*>(B), TD);
+  else if (B->getType() != IntTy)
+    B = ConstantFoldInstOperands(Instruction::BitCast, IntTy,
+                                 const_cast<Constant*>(B), TD);
+  
+  return A == B;
 }
 
 /// getConstantPoolIndex - Create a new entry in the constant pool or return
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 054c750c9f2b..35591e1649d3 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -19,9 +19,8 @@ using namespace llvm;
 
 char MachineFunctionAnalysis::ID = 0;
 
-MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
-                                                 CodeGenOpt::Level OL) :
-  FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) :
+  FunctionPass(ID), TM(tm), MF(0) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
 }
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index a240667f7d6a..e553a0463a2a 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -178,6 +179,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
   IsKill = isKill;
   IsDead = isDead;
   IsUndef = isUndef;
+  IsInternalRead = false;
   IsEarlyClobber = false;
   IsDebug = isDebug;
   SubReg = 0;
@@ -191,7 +193,6 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
     return false;
 
   switch (getType()) {
-  default: llvm_unreachable("Unrecognized operand type");
   case MachineOperand::MO_Register:
     return getReg() == Other.getReg() && isDef() == Other.isDef() &&
            getSubReg() == Other.getSubReg();
@@ -216,11 +217,14 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
            getOffset() == Other.getOffset();
   case MachineOperand::MO_BlockAddress:
     return getBlockAddress() == Other.getBlockAddress();
+  case MO_RegisterMask:
+    return getRegMask() == Other.getRegMask();
   case MachineOperand::MO_MCSymbol:
     return getMCSymbol() == Other.getMCSymbol();
   case MachineOperand::MO_Metadata:
     return getMetadata() == Other.getMetadata();
   }
+  llvm_unreachable("Invalid machine operand type");
 }
 
 /// print - Print the specified machine operand.
@@ -240,7 +244,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << PrintReg(getReg(), TRI, getSubReg());
 
     if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
-        isEarlyClobber()) {
+        isInternalRead() || isEarlyClobber()) {
       OS << '<';
       bool NeedComma = false;
       if (isDef()) {
@@ -256,14 +260,26 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
           NeedComma = true;
       }
 
-      if (isKill() || isDead() || isUndef()) {
+      if (isKill() || isDead() || isUndef() || isInternalRead()) {
         if (NeedComma) OS << ',';
-        if (isKill())  OS << "kill";
-        if (isDead())  OS << "dead";
+        NeedComma = false;
+        if (isKill()) {
+          OS << "kill";
+          NeedComma = true;
+        }
+        if (isDead()) {
+          OS << "dead";
+          NeedComma = true;
+        }
         if (isUndef()) {
-          if (isKill() || isDead())
-            OS << ',';
+          if (NeedComma) OS << ',';
           OS << "undef";
+          NeedComma = true;
+        }
+        if (isInternalRead()) {
+          if (NeedComma) OS << ',';
+          OS << "internal";
+          NeedComma = true;
         }
       }
       OS << '>';
@@ -311,6 +327,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
     OS << '>';
     break;
+  case MachineOperand::MO_RegisterMask:
+    OS << "<regmask>";
+    break;
   case MachineOperand::MO_Metadata:
     OS << '<';
     WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
@@ -319,8 +338,6 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
   case MachineOperand::MO_MCSymbol:
     OS << "<MCSym=" << *getMCSymbol() << '>';
     break;
-  default:
-    llvm_unreachable("Unrecognized operand type");
   }
 
   if (unsigned TF = getTargetFlags())
@@ -364,10 +381,11 @@ MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
 
 MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
                                      uint64_t s, unsigned int a,
-                                     const MDNode *TBAAInfo)
+                                     const MDNode *TBAAInfo,
+                                     const MDNode *Ranges)
   : PtrInfo(ptrinfo), Size(s),
     Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
-    TBAAInfo(TBAAInfo) {
+    TBAAInfo(TBAAInfo), Ranges(Ranges) {
   assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
          "invalid pointer value");
   assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
@@ -465,7 +483,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 /// MCID NULL and no operands.
 MachineInstr::MachineInstr()
   : MCID(0), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0),
+    NumMemRefs(0), MemRefs(0),
     Parent(0) {
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
@@ -473,10 +491,10 @@ MachineInstr::MachineInstr()
 
 void MachineInstr::addImplicitDefUseOperands() {
   if (MCID->ImplicitDefs)
-    for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+    for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
       addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
   if (MCID->ImplicitUses)
-    for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
+    for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
       addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
@@ -485,7 +503,7 @@ void MachineInstr::addImplicitDefUseOperands() {
 /// the MCInstrDesc.
 MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0) {
+    NumMemRefs(0), MemRefs(0), Parent(0) {
   unsigned NumImplicitOps = 0;
   if (!NoImp)
     NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -500,7 +518,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
 MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
   unsigned NumImplicitOps = 0;
   if (!NoImp)
     NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -516,7 +534,7 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
 /// basic block.
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0) {
+    NumMemRefs(0), MemRefs(0), Parent(0) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   unsigned NumImplicitOps =
     MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -532,7 +550,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
                            const MCInstrDesc &tid)
   : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
   unsigned NumImplicitOps =
     MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
@@ -547,7 +565,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
   : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0),
-    MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+    NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
     Parent(0), debugLoc(MI.getDebugLoc()) {
   Operands.reserve(MI.getNumOperands());
 
@@ -722,17 +740,33 @@ void MachineInstr::RemoveOperand(unsigned OpNo) {
 void MachineInstr::addMemOperand(MachineFunction &MF,
                                  MachineMemOperand *MO) {
   mmo_iterator OldMemRefs = MemRefs;
-  mmo_iterator OldMemRefsEnd = MemRefsEnd;
+  uint16_t OldNumMemRefs = NumMemRefs;
 
-  size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+  uint16_t NewNum = NumMemRefs + 1;
   mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
-  mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
 
-  std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+  std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs);
   NewMemRefs[NewNum - 1] = MO;
 
   MemRefs = NewMemRefs;
-  MemRefsEnd = NewMemRefsEnd;
+  NumMemRefs = NewNum;
+}
+
+bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
+  const MachineBasicBlock *MBB = getParent();
+  MachineBasicBlock::const_instr_iterator MII = *this; ++MII;
+  while (MII != MBB->end() && MII->isInsideBundle()) {
+    if (MII->getDesc().getFlags() & Mask) {
+      if (Type == AnyInBundle)
+        return true;
+    } else {
+      if (Type == AllInBundle)
+        return false;
+    }
+    ++MII;
+  }
+
+  return Type == AllInBundle;
 }
 
 bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
@@ -743,6 +777,19 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
       Other->getNumOperands() != getNumOperands())
     return false;
 
+  if (isBundle()) {
+    // Both instructions are bundles, compare MIs inside the bundle.
+    MachineBasicBlock::const_instr_iterator I1 = *this;
+    MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
+    MachineBasicBlock::const_instr_iterator I2 = *Other;
+    MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
+    while (++I1 != E1 && I1->isInsideBundle()) {
+      ++I2;
+      if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+        return false;
+    }
+  }
+
   // Check operands to make sure they match.
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
@@ -789,6 +836,18 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
 /// block, and returns it, but does not delete it.
 MachineInstr *MachineInstr::removeFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
+
+  // If it's a bundle then remove the MIs inside the bundle as well.
+  if (isBundle()) {
+    MachineBasicBlock *MBB = getParent();
+    MachineBasicBlock::instr_iterator MII = *this; ++MII;
+    MachineBasicBlock::instr_iterator E = MBB->instr_end();
+    while (MII != E && MII->isInsideBundle()) {
+      MachineInstr *MI = &*MII;
+      ++MII;
+      MBB->remove(MI);
+    }
+  }
   getParent()->remove(this);
   return this;
 }
@@ -798,6 +857,17 @@ MachineInstr *MachineInstr::removeFromParent() {
 /// block, and deletes it.
 void MachineInstr::eraseFromParent() {
   assert(getParent() && "Not embedded in a basic block!");
+  // If it's a bundle then remove the MIs inside the bundle as well.
+  if (isBundle()) {
+    MachineBasicBlock *MBB = getParent();
+    MachineBasicBlock::instr_iterator MII = *this; ++MII;
+    MachineBasicBlock::instr_iterator E = MBB->instr_end();
+    while (MII != E && MII->isInsideBundle()) {
+      MachineInstr *MI = &*MII;
+      ++MII;
+      MBB->erase(MI);
+    }
+  }
   getParent()->erase(this);
 }
 
@@ -817,6 +887,16 @@ unsigned MachineInstr::getNumExplicitOperands() const {
   return NumOperands;
 }
 
+/// isBundled - Return true if this instruction part of a bundle. This is true
+/// if either itself or its following instruction is marked "InsideBundle".
+bool MachineInstr::isBundled() const {
+  if (isInsideBundle())
+    return true;
+  MachineBasicBlock::const_instr_iterator nextMI = this;
+  ++nextMI;
+  return nextMI != Parent->instr_end() && nextMI->isInsideBundle();
+}
+
 bool MachineInstr::isStackAligningInlineAsm() const {
   if (isInlineAsm()) {
     unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -887,6 +967,20 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
   return NULL;
 }
 
+/// getBundleSize - Return the number of instructions inside the MI bundle.
+unsigned MachineInstr::getBundleSize() const {
+  assert(isBundle() && "Expecting a bundle");
+
+  MachineBasicBlock::const_instr_iterator I = *this;
+  unsigned Size = 0;
+  while ((++I)->isInsideBundle()) {
+    ++Size;
+  }
+  assert(Size > 1 && "Malformed bundle");
+
+  return Size;
+}
+
 /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
 /// the specific register or -1 if it is not found. It further tightens
 /// the search criteria to a use that kills the register if isKill is true.
@@ -948,6 +1042,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
   bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
+    // Accept regmask operands when Overlap is set.
+    // Ignore them when looking for a specific def operand (Overlap == false).
+    if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg))
+      return i;
     if (!MO.isReg() || !MO.isDef())
       continue;
     unsigned MOReg = MO.getReg();
@@ -1118,6 +1216,8 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
 
 /// copyPredicates - Copies predicate operand(s) from MI.
 void MachineInstr::copyPredicates(const MachineInstr *MI) {
+  assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles");
+
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isPredicable())
     return;
@@ -1159,13 +1259,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
                                 AliasAnalysis *AA,
                                 bool &SawStore) const {
   // Ignore stuff that we obviously can't move.
-  if (MCID->mayStore() || MCID->isCall()) {
+  if (mayStore() || isCall()) {
     SawStore = true;
     return false;
   }
 
   if (isLabel() || isDebugValue() ||
-      MCID->isTerminator() || hasUnmodeledSideEffects())
+      isTerminator() || hasUnmodeledSideEffects())
     return false;
 
   // See if this instruction does a load.  If so, we have to guarantee that the
@@ -1173,7 +1273,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
   // destination. The check for isInvariantLoad gives the targe the chance to
   // classify the load as always returning a constant, e.g. a constant pool
   // load.
-  if (MCID->mayLoad() && !isInvariantLoad(AA))
+  if (mayLoad() && !isInvariantLoad(AA))
     // Otherwise, this is a real load.  If there is a store between the load and
     // end of block, or if the load is volatile, we can't move it.
     return !SawStore && !hasVolatileMemoryRef();
@@ -1213,9 +1313,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
 /// have no volatile memory references.
 bool MachineInstr::hasVolatileMemoryRef() const {
   // An instruction known never to access memory won't have a volatile access.
-  if (!MCID->mayStore() &&
-      !MCID->mayLoad() &&
-      !MCID->isCall() &&
+  if (!mayStore() &&
+      !mayLoad() &&
+      !isCall() &&
       !hasUnmodeledSideEffects())
     return false;
 
@@ -1239,7 +1339,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
 /// *all* loads the instruction does are invariant (if it does multiple loads).
 bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
   // If the instruction doesn't load at all, it isn't an invariant load.
-  if (!MCID->mayLoad())
+  if (!mayLoad())
     return false;
 
   // If the instruction has lost its memoperands, conservatively assume that
@@ -1253,6 +1353,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
        E = memoperands_end(); I != E; ++I) {
     if ((*I)->isVolatile()) return false;
     if ((*I)->isStore()) return false;
+    if ((*I)->isInvariant()) return true;
 
     if (const Value *V = (*I)->getValue()) {
       // A load from a constant PseudoSourceValue is invariant.
@@ -1291,7 +1392,7 @@ unsigned MachineInstr::isConstantValuePHI() const {
 }
 
 bool MachineInstr::hasUnmodeledSideEffects() const {
-  if (getDesc().hasUnmodeledSideEffects())
+  if (hasProperty(MCID::UnmodeledSideEffects))
     return true;
   if (isInlineAsm()) {
     unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
@@ -1384,7 +1485,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     OS << " = ";
 
   // Print the opcode name.
-  OS << getDesc().getName();
+  if (TM && TM->getInstrInfo())
+    OS << TM->getInstrInfo()->getName(getOpcode());
+  else
+    OS << "UNKNOWN";
 
   // Print the rest of the operands.
   bool OmittedAnyCallClobbers = false;
@@ -1419,14 +1523,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     // call instructions much less noisy on targets where calls clobber lots
     // of registers. Don't rely on MO.isDead() because we may be called before
     // LiveVariables is run, or we may be looking at a non-allocatable reg.
-    if (MF && getDesc().isCall() &&
+    if (MF && isCall() &&
         MO.isReg() && MO.isImplicit() && MO.isDef()) {
       unsigned Reg = MO.getReg();
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         const MachineRegisterInfo &MRI = MF->getRegInfo();
         if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
           bool HasAliasLive = false;
-          for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+          for (const uint16_t *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
                unsigned AliasReg = *Alias; ++Alias)
             if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
               HasAliasLive = true;
@@ -1617,6 +1721,20 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
   return Found;
 }
 
+void MachineInstr::clearRegisterKills(unsigned Reg,
+                                      const TargetRegisterInfo *RegInfo) {
+  if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+    RegInfo = 0;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+      continue;
+    unsigned OpReg = MO.getReg();
+    if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg)))
+      MO.setIsKill(false);
+  }
+}
+
 bool MachineInstr::addRegisterDead(unsigned IncomingReg,
                                    const TargetRegisterInfo *RegInfo,
                                    bool AddIfNotFound) {
@@ -1689,16 +1807,21 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg,
                                        true  /*IsImp*/));
 }
 
-void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
                                          const TargetRegisterInfo &TRI) {
+  bool HasRegMask = false;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     MachineOperand &MO = getOperand(i);
+    if (MO.isRegMask()) {
+      HasRegMask = true;
+      continue;
+    }
     if (!MO.isReg() || !MO.isDef()) continue;
     unsigned Reg = MO.getReg();
-    if (Reg == 0) continue;
+    if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     bool Dead = true;
-    for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(),
-         E = UsedRegs.end(); I != E; ++I)
+    for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+         I != E; ++I)
       if (TRI.regsOverlap(*I, Reg)) {
         Dead = false;
         break;
@@ -1706,53 +1829,66 @@ void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRe
     // If there are no uses, including partial uses, the def is dead.
     if (Dead) MO.setIsDead();
   }
+
+  // This is a call with a register mask operand.
+  // Mask clobbers are always dead, so add defs for the non-dead defines.
+  if (HasRegMask)
+    for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+         I != E; ++I)
+      addRegisterDefined(*I, &TRI);
 }
 
 unsigned
 MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
-  unsigned Hash = MI->getOpcode() * 37;
+  // Build up a buffer of hash code components.
+  //
+  // FIXME: This is a total hack. We should have a hash_value overload for
+  // MachineOperand, but currently that doesn't work because there are many
+  // different ideas of "equality" and thus different sets of information that
+  // contribute to the hash code. This one happens to want to take a specific
+  // subset. And it's still not clear that this routine uses the *correct*
+  // subset of information when computing the hash code. The goal is to use the
+  // same inputs for the hash code here that MachineInstr::isIdenticalTo uses to
+  // test for equality when passed the 'IgnoreVRegDefs' filter flag. It would
+  // be very useful to factor the selection of relevant inputs out of the two
+  // functions and into a common routine, but it's not clear how that can be
+  // done.
+  SmallVector<size_t, 8> HashComponents;
+  HashComponents.reserve(MI->getNumOperands() + 1);
+  HashComponents.push_back(MI->getOpcode());
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    uint64_t Key = (uint64_t)MO.getType() << 32;
     switch (MO.getType()) {
     default: break;
     case MachineOperand::MO_Register:
       if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
         continue;  // Skip virtual register defs.
-      Key |= MO.getReg();
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getReg()));
       break;
     case MachineOperand::MO_Immediate:
-      Key |= MO.getImm();
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getImm()));
       break;
     case MachineOperand::MO_FrameIndex:
     case MachineOperand::MO_ConstantPoolIndex:
     case MachineOperand::MO_JumpTableIndex:
-      Key |= MO.getIndex();
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getIndex()));
       break;
     case MachineOperand::MO_MachineBasicBlock:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getMBB()));
       break;
     case MachineOperand::MO_GlobalAddress:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getGlobal()));
       break;
     case MachineOperand::MO_BlockAddress:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+      HashComponents.push_back(hash_combine(MO.getType(),
+                                            MO.getBlockAddress()));
       break;
     case MachineOperand::MO_MCSymbol:
-      Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+      HashComponents.push_back(hash_combine(MO.getType(), MO.getMCSymbol()));
       break;
     }
-    Key += ~(Key << 32);
-    Key ^= (Key >> 22);
-    Key += ~(Key << 13);
-    Key ^= (Key >> 8);
-    Key += (Key << 3);
-    Key ^= (Key >> 15);
-    Key += ~(Key << 27);
-    Key ^= (Key >> 31);
-    Hash = (unsigned)Key + Hash * 37;
-  }
-  return Hash;
+  }
+  return hash_combine_range(HashComponents.begin(), HashComponents.end());
 }
 
 void MachineInstr::emitError(StringRef Msg) const {
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
new file mode 100644
index 000000000000..73489a7160bf
--- /dev/null
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -0,0 +1,278 @@
+//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+namespace {
+  class UnpackMachineBundles : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification
+    UnpackMachineBundles() : MachineFunctionPass(ID) {
+      initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+  };
+} // end anonymous namespace
+
+char UnpackMachineBundles::ID = 0;
+char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID;
+INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
+                "Unpack machine instruction bundles", false, false)
+
+bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+
+    for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(),
+           MIE = MBB->instr_end(); MII != MIE; ) {
+      MachineInstr *MI = &*MII;
+
+      // Remove BUNDLE instruction and the InsideBundle flags from bundled
+      // instructions.
+      if (MI->isBundle()) {
+        while (++MII != MIE && MII->isInsideBundle()) {
+          MII->setIsInsideBundle(false);
+          for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = MII->getOperand(i);
+            if (MO.isReg() && MO.isInternalRead())
+              MO.setIsInternalRead(false);
+          }
+        }
+        MI->eraseFromParent();
+
+        Changed = true;
+        continue;
+      }
+
+      ++MII;
+    }
+  }
+
+  return Changed;
+}
+
+
+namespace {
+  class FinalizeMachineBundles : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification
+    FinalizeMachineBundles() : MachineFunctionPass(ID) {
+      initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+  };
+} // end anonymous namespace
+
+char FinalizeMachineBundles::ID = 0;
+char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID;
+INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles",
+                "Finalize machine instruction bundles", false, false)
+
+bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) {
+  return llvm::finalizeBundles(MF);
+}
+
+
+/// finalizeBundle - Finalize a machine instruction bundle which includes
+/// a sequence of instructions starting from FirstMI to LastMI (exclusive).
+/// This routine adds a BUNDLE instruction to represent the bundle, it adds
+/// IsInternalRead markers to MachineOperands which are defined inside the
+/// bundle, and it copies externally visible defs and uses to the BUNDLE
+/// instruction.
+void llvm::finalizeBundle(MachineBasicBlock &MBB,
+                          MachineBasicBlock::instr_iterator FirstMI,
+                          MachineBasicBlock::instr_iterator LastMI) {
+  assert(FirstMI != LastMI && "Empty bundle?");
+
+  const TargetMachine &TM = MBB.getParent()->getTarget();
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(),
+                                    TII->get(TargetOpcode::BUNDLE));
+
+  SmallVector<unsigned, 8> LocalDefs;
+  SmallSet<unsigned, 8> LocalDefSet;
+  SmallSet<unsigned, 8> DeadDefSet;
+  SmallSet<unsigned, 8> KilledDefSet;
+  SmallVector<unsigned, 8> ExternUses;
+  SmallSet<unsigned, 8> ExternUseSet;
+  SmallSet<unsigned, 8> KilledUseSet;
+  SmallSet<unsigned, 8> UndefUseSet;
+  SmallVector<MachineOperand*, 4> Defs;
+  for (; FirstMI != LastMI; ++FirstMI) {
+    for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = FirstMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      if (MO.isDef()) {
+        Defs.push_back(&MO);
+        continue;
+      }
+
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+      if (LocalDefSet.count(Reg)) {
+        MO.setIsInternalRead();
+        if (MO.isKill())
+          // Internal def is now killed.
+          KilledDefSet.insert(Reg);
+      } else {
+        if (ExternUseSet.insert(Reg)) {
+          ExternUses.push_back(Reg);
+          if (MO.isUndef())
+            UndefUseSet.insert(Reg);
+        }
+        if (MO.isKill())
+          // External def is now killed.
+          KilledUseSet.insert(Reg);
+      }
+    }
+
+    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+      MachineOperand &MO = *Defs[i];
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+
+      if (LocalDefSet.insert(Reg)) {
+        LocalDefs.push_back(Reg);
+        if (MO.isDead()) {
+          DeadDefSet.insert(Reg);
+        }
+      } else {
+        // Re-defined inside the bundle, it's no longer killed.
+        KilledDefSet.erase(Reg);
+        if (!MO.isDead())
+          // Previously defined but dead.
+          DeadDefSet.erase(Reg);
+      }
+
+      if (!MO.isDead()) {
+        for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
+             unsigned SubReg = *SubRegs; ++SubRegs) {
+          if (LocalDefSet.insert(SubReg))
+            LocalDefs.push_back(SubReg);
+        }
+      }
+    }
+
+    FirstMI->setIsInsideBundle();
+    Defs.clear();
+  }
+
+  SmallSet<unsigned, 8> Added;
+  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+    unsigned Reg = LocalDefs[i];
+    if (Added.insert(Reg)) {
+      // If it's not live beyond end of the bundle, mark it dead.
+      bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg);
+      MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) |
+                 getImplRegState(true));
+    }
+  }
+
+  for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) {
+    unsigned Reg = ExternUses[i];
+    bool isKill = KilledUseSet.count(Reg);
+    bool isUndef = UndefUseSet.count(Reg);
+    MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) |
+               getImplRegState(true));
+  }
+}
+
+/// finalizeBundle - Same functionality as the previous finalizeBundle except
+/// the last instruction in the bundle is not provided as an input. This is
+/// used in cases where bundles are pre-determined by marking instructions
+/// with 'InsideBundle' marker. It returns the MBB instruction iterator that
+/// points to the end of the bundle.
+MachineBasicBlock::instr_iterator
+llvm::finalizeBundle(MachineBasicBlock &MBB,
+                     MachineBasicBlock::instr_iterator FirstMI) {
+  MachineBasicBlock::instr_iterator E = MBB.instr_end();
+  MachineBasicBlock::instr_iterator LastMI = llvm::next(FirstMI);
+  while (LastMI != E && LastMI->isInsideBundle())
+    ++LastMI;
+  finalizeBundle(MBB, FirstMI, LastMI);
+  return LastMI;
+}
+
+/// finalizeBundles - Finalize instruction bundles in the specified
+/// MachineFunction. Return true if any bundles are finalized.
+bool llvm::finalizeBundles(MachineFunction &MF) {
+  bool Changed = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock &MBB = *I;
+
+    MachineBasicBlock::instr_iterator MII = MBB.instr_begin();
+    assert(!MII->isInsideBundle() &&
+           "First instr cannot be inside bundle before finalization!");
+
+    MachineBasicBlock::instr_iterator MIE = MBB.instr_end();
+    if (MII == MIE)
+      continue;
+    for (++MII; MII != MIE; ) {
+      if (!MII->isInsideBundle())
+        ++MII;
+      else {
+        MII = finalizeBundle(MBB, llvm::prior(MII));
+        Changed = true;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineOperand iterator
+//===----------------------------------------------------------------------===//
+
+MachineOperandIteratorBase::RegInfo
+MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg,
+                    SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) {
+  RegInfo RI = { false, false, false };
+  for(; isValid(); ++*this) {
+    MachineOperand &MO = deref();
+    if (!MO.isReg() || MO.getReg() != Reg)
+      continue;
+
+    // Remember each (MI, OpNo) that refers to Reg.
+    if (Ops)
+      Ops->push_back(std::make_pair(MO.getParent(), getOperandNo()));
+
+    // Both defs and uses can read virtual registers.
+    if (MO.readsReg()) {
+      RI.Reads = true;
+      if (MO.isDef())
+        RI.Tied = true;
+    }
+
+    // Only defs can write.
+    if (MO.isDef())
+      RI.Writes = true;
+    else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo()))
+      RI.Tied = true;
+  }
+  return RI;
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index a1f80d5282e0..8c562cc4454a 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -45,7 +45,7 @@ using namespace llvm;
 static cl::opt<bool>
 AvoidSpeculation("avoid-speculation",
                  cl::desc("MachineLICM should avoid speculation"),
-                 cl::init(false), cl::Hidden);
+                 cl::init(true), cl::Hidden);
 
 STATISTIC(NumHoisted,
           "Number of machine instructions hoisted out of loops");
@@ -60,8 +60,6 @@ STATISTIC(NumPostRAHoisted,
 
 namespace {
   class MachineLICM : public MachineFunctionPass {
-    bool PreRegAlloc;
-
     const TargetMachine   *TM;
     const TargetInstrInfo *TII;
     const TargetLowering *TLI;
@@ -69,6 +67,7 @@ namespace {
     const MachineFrameInfo *MFI;
     MachineRegisterInfo *MRI;
     const InstrItineraryData *InstrItins;
+    bool PreRegAlloc;
 
     // Various analyses that we use...
     AliasAnalysis        *AA;      // Alias analysis info.
@@ -81,7 +80,13 @@ namespace {
     MachineLoop *CurLoop;          // The current loop we are working on.
     MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
 
-    BitVector AllocatableSet;
+    // Exit blocks for CurLoop.
+    SmallVector<MachineBasicBlock*, 8> ExitBlocks;
+
+    bool isExitBlock(const MachineBasicBlock *MBB) const {
+      return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
+        ExitBlocks.end();
+    }
 
     // Track 'estimated' register pressure.
     SmallSet<unsigned, 32> RegSeen;
@@ -122,8 +127,6 @@ namespace {
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
-    const char *getPassName() const { return "Machine Instruction LICM"; }
-
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<MachineDominatorTree>();
@@ -165,7 +168,9 @@ namespace {
 
     /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
     /// gather register def and frame object update information.
-    void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
+    void ProcessMI(MachineInstr *MI,
+                   BitVector &PhysRegDefs,
+                   BitVector &PhysRegClobbers,
                    SmallSet<int, 32> &StoredFIs,
                    SmallVector<CandidateInfo, 32> &Candidates);
 
@@ -182,12 +187,12 @@ namespace {
     /// invariant. I.e., all virtual register operands are defined outside of
     /// the loop, physical registers aren't accessed (explicitly or implicitly),
     /// and the instruction is hoistable.
-    /// 
+    ///
     bool IsLoopInvariantInst(MachineInstr &I);
 
-    /// HasAnyPHIUse - Return true if the specified register is used by any
-    /// phi node.
-    bool HasAnyPHIUse(unsigned Reg) const;
+    /// HasLoopPHIUse - Return true if the specified instruction is used by any
+    /// phi node in the current loop.
+    bool HasLoopPHIUse(const MachineInstr *MI) const;
 
     /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
     /// and an use in the current loop, return true if the target considered
@@ -200,7 +205,7 @@ namespace {
     /// CanCauseHighRegPressure - Visit BBs from header to current BB,
     /// check if hoisting an instruction of the given cost matrix can cause high
     /// register pressure.
-    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
 
     /// UpdateBackTraceRegPressure - Traverse the back trace from header to
     /// the current block and update their register pressures to reflect the
@@ -215,13 +220,25 @@ namespace {
     /// If not then a load from this mbb may not be safe to hoist.
     bool IsGuaranteedToExecute(MachineBasicBlock *BB);
 
-    /// HoistRegion - Walk the specified region of the CFG (defined by all
-    /// blocks dominated by the specified block, and that are in the current
-    /// loop) in depth first order w.r.t the DominatorTree. This allows us to
-    /// visit definitions before uses, allowing us to hoist a loop body in one
-    /// pass without iteration.
+    void EnterScope(MachineBasicBlock *MBB);
+
+    void ExitScope(MachineBasicBlock *MBB);
+
+    /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
+    /// dominator tree node if its a leaf or all of its children are done. Walk
+    /// up the dominator tree to destroy ancestors which are now done.
+    void ExitScopeIfDone(MachineDomTreeNode *Node,
+                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+
+    /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+    /// blocks dominated by the specified header block, and that are in the
+    /// current loop) in depth first order w.r.t the DominatorTree. This allows
+    /// us to visit definitions before uses, allowing us to hoist a loop body in
+    /// one pass without iteration.
     ///
-    void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+    void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+    void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
 
     /// getRegisterClassIDAndCost - For a given MI, register, and the operand
     /// index, return the ID and cost of its representative register class by
@@ -278,6 +295,7 @@ namespace {
 } // end anonymous namespace
 
 char MachineLICM::ID = 0;
+char &llvm::MachineLICMID = MachineLICM::ID;
 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
                 "Machine Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
@@ -286,10 +304,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(MachineLICM, "machinelicm",
                 "Machine Loop Invariant Code Motion", false, false)
 
-FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
-  return new MachineLICM(PreRegAlloc);
-}
-
 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
 /// loop that has a unique predecessor.
 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
@@ -305,12 +319,6 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
 }
 
 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
-  if (PreRegAlloc)
-    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
-  else
-    DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
-  DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
-
   Changed = FirstInLoop = false;
   TM = &MF.getTarget();
   TII = TM->getInstrInfo();
@@ -319,7 +327,14 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   MFI = MF.getFrameInfo();
   MRI = &MF.getRegInfo();
   InstrItins = TM->getInstrItineraryData();
-  AllocatableSet = TRI->getAllocatableSet(MF);
+
+  PreRegAlloc = MRI->isSSA();
+
+  if (PreRegAlloc)
+    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+  else
+    DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+  DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
 
   if (PreRegAlloc) {
     // Estimate register pressure during pre-regalloc pass.
@@ -341,6 +356,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   while (!Worklist.empty()) {
     CurLoop = Worklist.pop_back_val();
     CurPreheader = 0;
+    ExitBlocks.clear();
 
     // If this is done before regalloc, only visit outer-most preheader-sporting
     // loops.
@@ -349,6 +365,8 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
       continue;
     }
 
+    CurLoop->getExitBlocks(ExitBlocks);
+
     if (!PreRegAlloc)
       HoistRegionPostRA();
     else {
@@ -356,7 +374,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
       // being hoisted.
       MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
       FirstInLoop = true;
-      HoistRegion(N, true);
+      HoistOutOfLoop(N);
       CSEMap.clear();
     }
   }
@@ -383,7 +401,8 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
 /// gather register def and frame object update information.
 void MachineLICM::ProcessMI(MachineInstr *MI,
-                            unsigned *PhysRegDefs,
+                            BitVector &PhysRegDefs,
+                            BitVector &PhysRegClobbers,
                             SmallSet<int, 32> &StoredFIs,
                             SmallVector<CandidateInfo, 32> &Candidates) {
   bool RuledOut = false;
@@ -402,6 +421,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
       continue;
     }
 
+    // We can't hoist an instruction defining a physreg that is clobbered in
+    // the loop.
+    if (MO.isRegMask()) {
+      PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
+      continue;
+    }
+
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
@@ -411,7 +437,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
            "Not expecting virtual register!");
 
     if (!MO.isDef()) {
-      if (Reg && PhysRegDefs[Reg])
+      if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
         // If it's using a non-loop-invariant register, then it's obviously not
         // safe to hoist.
         HasNonInvariantUse = true;
@@ -419,9 +445,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
     }
 
     if (MO.isImplicit()) {
-      ++PhysRegDefs[Reg];
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-        ++PhysRegDefs[*AS];
+      for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+        PhysRegClobbers.set(*AS);
       if (!MO.isDead())
         // Non-dead implicit def? This cannot be hoisted.
         RuledOut = true;
@@ -438,14 +463,17 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
       Def = Reg;
 
     // If we have already seen another instruction that defines the same
-    // register, then this is not safe.
-    if (++PhysRegDefs[Reg] > 1)
-      // MI defined register is seen defined by another instruction in
-      // the loop, it cannot be a LICM candidate.
-      RuledOut = true;
-    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-      if (++PhysRegDefs[*AS] > 1)
+    // register, then this is not safe.  Two defs is indicated by setting a
+    // PhysRegClobbers bit.
+    for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS) {
+      if (PhysRegDefs.test(*AS))
+        PhysRegClobbers.set(*AS);
+      if (PhysRegClobbers.test(*AS))
+        // MI defined register is seen defined by another instruction in
+        // the loop, it cannot be a LICM candidate.
         RuledOut = true;
+      PhysRegDefs.set(*AS);
+    }
   }
 
   // Only consider reloads for now and remats which do not have register
@@ -461,9 +489,13 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
 /// invariants out to the preheader.
 void MachineLICM::HoistRegionPostRA() {
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
   unsigned NumRegs = TRI->getNumRegs();
-  unsigned *PhysRegDefs = new unsigned[NumRegs];
-  std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
+  BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
+  BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
 
   SmallVector<CandidateInfo, 32> Candidates;
   SmallSet<int, 32> StoredFIs;
@@ -485,16 +517,31 @@ void MachineLICM::HoistRegionPostRA() {
     for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
            E = BB->livein_end(); I != E; ++I) {
       unsigned Reg = *I;
-      ++PhysRegDefs[Reg];
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-        ++PhysRegDefs[*AS];
+      for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+        PhysRegDefs.set(*AS);
     }
 
     SpeculationState = SpeculateUnknown;
     for (MachineBasicBlock::iterator
            MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
       MachineInstr *MI = &*MII;
-      ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
+      ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
+    }
+  }
+
+  // Gather the registers read / clobbered by the terminator.
+  BitVector TermRegs(NumRegs);
+  MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
+  if (TI != Preheader->end()) {
+    for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = TI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      for (const uint16_t *AS = TRI->getOverlaps(Reg); *AS; ++AS)
+        TermRegs.set(*AS);
     }
   }
 
@@ -503,19 +550,25 @@ void MachineLICM::HoistRegionPostRA() {
   //    instruction in the loop.
   // 2. If the candidate is a load from stack slot (always true for now),
   //    check if the slot is stored anywhere in the loop.
+  // 3. Make sure candidate def should not clobber
+  //    registers read by the terminator. Similarly its def should not be
+  //    clobbered by the terminator.
   for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
     if (Candidates[i].FI != INT_MIN &&
         StoredFIs.count(Candidates[i].FI))
       continue;
 
-    if (PhysRegDefs[Candidates[i].Def] == 1) {
+    unsigned Def = Candidates[i].Def;
+    if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
       bool Safe = true;
       MachineInstr *MI = Candidates[i].MI;
       for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
         const MachineOperand &MO = MI->getOperand(j);
         if (!MO.isReg() || MO.isDef() || !MO.getReg())
           continue;
-        if (PhysRegDefs[MO.getReg()]) {
+        unsigned Reg = MO.getReg();
+        if (PhysRegDefs.test(Reg) ||
+            PhysRegClobbers.test(Reg)) {
           // If it's using a non-loop-invariant register, then it's obviously
           // not safe to hoist.
           Safe = false;
@@ -526,8 +579,6 @@ void MachineLICM::HoistRegionPostRA() {
         HoistPostRA(MI, Candidates[i].Def);
     }
   }
-
-  delete[] PhysRegDefs;
 }
 
 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
@@ -556,26 +607,17 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
 /// dirty work.
 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
   MachineBasicBlock *Preheader = getCurPreheader();
-  if (!Preheader) return;
 
   // Now move the instructions to the predecessor, inserting it before any
   // terminator instructions.
-  DEBUG({
-      dbgs() << "Hoisting " << *MI;
-      if (Preheader->getBasicBlock())
-        dbgs() << " to MachineBasicBlock "
-               << Preheader->getName();
-      if (MI->getParent()->getBasicBlock())
-        dbgs() << " from MachineBasicBlock "
-               << MI->getParent()->getName();
-      dbgs() << "\n";
-    });
+  DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
+               << MI->getParent()->getNumber() << ": " << *MI);
 
   // Splice the instruction to the preheader.
   MachineBasicBlock *MBB = MI->getParent();
   Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
 
-  // Add register to livein list to all the BBs in the current loop since a 
+  // Add register to livein list to all the BBs in the current loop since a
   // loop invariant must be kept live throughout the whole loop. This is
   // important to ensure later passes do not scavenge the def register.
   AddToLiveIns(Def);
@@ -589,7 +631,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
   if (SpeculationState != SpeculateUnknown)
     return SpeculationState == SpeculateFalse;
-    
+
   if (BB != CurLoop->getHeader()) {
     // Check loop exiting blocks.
     SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
@@ -605,57 +647,126 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
   return true;
 }
 
-/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
-/// dominated by the specified block, and that are in the current loop) in depth
-/// first order w.r.t the DominatorTree. This allows us to visit definitions
-/// before uses, allowing us to hoist a loop body in one pass without iteration.
-///
-void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
-  assert(N != 0 && "Null dominator tree node?");
-  MachineBasicBlock *BB = N->getBlock();
+void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
 
-  // If the header of the loop containing this basic block is a landing pad,
-  // then don't try to hoist instructions out of this loop.
-  const MachineLoop *ML = MLI->getLoopFor(BB);
-  if (ML && ML->getHeader()->isLandingPad()) return;
+  // Remember livein register pressure.
+  BackTrace.push_back(RegPressure);
+}
 
-  // If this subregion is not in the top level loop at all, exit.
-  if (!CurLoop->contains(BB)) return;
+void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+  BackTrace.pop_back();
+}
 
-  MachineBasicBlock *Preheader = getCurPreheader();
-  if (!Preheader)
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
+                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+  if (OpenChildren[Node])
     return;
 
-  if (IsHeader) {
+  // Pop scope.
+  ExitScope(Node->getBlock());
+
+  // Now traverse upwards to pop ancestors whose offsprings are all done.
+  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+    unsigned Left = --OpenChildren[Parent];
+    if (Left != 0)
+      break;
+    ExitScope(Parent->getBlock());
+    Node = Parent;
+  }
+}
+
+/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
+/// blocks dominated by the specified header block, and that are in the
+/// current loop) in depth first order w.r.t the DominatorTree. This allows
+/// us to visit definitions before uses, allowing us to hoist a loop body in
+/// one pass without iteration.
+///
+void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
+  SmallVector<MachineDomTreeNode*, 32> Scopes;
+  SmallVector<MachineDomTreeNode*, 8> WorkList;
+  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+  // Perform a DFS walk to determine the order of visit.
+  WorkList.push_back(HeaderN);
+  do {
+    MachineDomTreeNode *Node = WorkList.pop_back_val();
+    assert(Node != 0 && "Null dominator tree node?");
+    MachineBasicBlock *BB = Node->getBlock();
+
+    // If the header of the loop containing this basic block is a landing pad,
+    // then don't try to hoist instructions out of this loop.
+    const MachineLoop *ML = MLI->getLoopFor(BB);
+    if (ML && ML->getHeader()->isLandingPad())
+      continue;
+
+    // If this subregion is not in the top level loop at all, exit.
+    if (!CurLoop->contains(BB))
+      continue;
+
+    Scopes.push_back(Node);
+    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+    unsigned NumChildren = Children.size();
+
+    // Don't hoist things out of a large switch statement.  This often causes
+    // code to be hoisted that wasn't going to be executed, and increases
+    // register pressure in a situation where it's likely to matter.
+    if (BB->succ_size() >= 25)
+      NumChildren = 0;
+
+    OpenChildren[Node] = NumChildren;
+    // Add children in reverse order as then the next popped worklist node is
+    // the first child of this node.  This means we ultimately traverse the
+    // DOM tree in exactly the same order as if we'd recursed.
+    for (int i = (int)NumChildren-1; i >= 0; --i) {
+      MachineDomTreeNode *Child = Children[i];
+      ParentMap[Child] = Node;
+      WorkList.push_back(Child);
+    }
+  } while (!WorkList.empty());
+
+  if (Scopes.size() != 0) {
+    MachineBasicBlock *Preheader = getCurPreheader();
+    if (!Preheader)
+      return;
+
     // Compute registers which are livein into the loop headers.
     RegSeen.clear();
     BackTrace.clear();
     InitRegPressure(Preheader);
   }
 
-  // Remember livein register pressure.
-  BackTrace.push_back(RegPressure);
+  // Now perform LICM.
+  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+    MachineDomTreeNode *Node = Scopes[i];
+    MachineBasicBlock *MBB = Node->getBlock();
 
-  SpeculationState = SpeculateUnknown;
-  for (MachineBasicBlock::iterator
-         MII = BB->begin(), E = BB->end(); MII != E; ) {
-    MachineBasicBlock::iterator NextMII = MII; ++NextMII;
-    MachineInstr *MI = &*MII;
-    if (!Hoist(MI, Preheader))
-      UpdateRegPressure(MI);
-    MII = NextMII;
-  }
+    MachineBasicBlock *Preheader = getCurPreheader();
+    if (!Preheader)
+      continue;
 
-  // Don't hoist things out of a large switch statement.  This often causes
-  // code to be hoisted that wasn't going to be executed, and increases
-  // register pressure in a situation where it's likely to matter.
-  if (BB->succ_size() < 25) {
-    const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
-    for (unsigned I = 0, E = Children.size(); I != E; ++I)
-      HoistRegion(Children[I]);
-  }
+    EnterScope(MBB);
 
-  BackTrace.pop_back();
+    // Process the block
+    SpeculationState = SpeculateUnknown;
+    for (MachineBasicBlock::iterator
+         MII = MBB->begin(), E = MBB->end(); MII != E; ) {
+      MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+      MachineInstr *MI = &*MII;
+      if (!Hoist(MI, Preheader))
+        UpdateRegPressure(MI);
+      MII = NextMII;
+    }
+
+    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+    ExitScopeIfDone(Node, OpenChildren, ParentMap);
+  }
 }
 
 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
@@ -670,7 +781,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
                                        unsigned &RCId, unsigned &RCCost) const {
   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
   EVT VT = *RC->vt_begin();
-  if (VT == MVT::untyped) {
+  if (VT == MVT::Untyped) {
     RCId = RC->getID();
     RCCost = 1;
   } else {
@@ -678,7 +789,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
     RCCost = TLI->getRepRegClassCostFor(VT);
   }
 }
-                                      
+
 /// InitRegPressure - Find all virtual register references that are liveout of
 /// the preheader to initialize the starting "register pressure". Note this
 /// does not count live through (livein but not used) registers.
@@ -762,6 +873,21 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
   }
 }
 
+/// isLoadFromGOTOrConstantPool - Return true if this machine instruction
+/// loads from global offset table or constant pool.
+static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
+  assert (MI.mayLoad() && "Expected MI that loads!");
+  for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
+         E = MI.memoperands_end(); I != E; ++I) {
+    if (const Value *V = (*I)->getValue()) {
+      if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+        if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
+          return true;
+    }
+  }
+  return false;
+}
+
 /// IsLICMCandidate - Returns true if the instruction may be a suitable
 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
 /// not safe to hoist it.
@@ -773,9 +899,12 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
 
   // If it is load then check if it is guaranteed to execute by making sure that
   // it dominates all exiting blocks. If it doesn't, then there is a path out of
-  // the loop which does not execute this load, so we can't hoist it.
+  // the loop which does not execute this load, so we can't hoist it. Loads
+  // from constant memory are not safe to speculate all the time, for example
+  // indexed load from a jump table.
   // Stores and side effects are already checked by isSafeToMove.
-  if (I.getDesc().mayLoad() && !IsGuaranteedToExecute(I.getParent()))
+  if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
+      !IsGuaranteedToExecute(I.getParent()))
     return false;
 
   return true;
@@ -785,7 +914,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
 /// invariant. I.e., all virtual register operands are defined outside of the
 /// loop, physical registers aren't accessed explicitly, and there are no side
 /// effects that aren't captured by the operands or other flags.
-/// 
+///
 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
   if (!IsLICMCandidate(I))
     return false;
@@ -806,18 +935,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI->def_empty(Reg))
-          return false;
-        if (AllocatableSet.test(Reg))
+        if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
           return false;
-        // Check for a def among the register's aliases too.
-        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-          unsigned AliasReg = *Alias;
-          if (!MRI->def_empty(AliasReg))
-            return false;
-          if (AllocatableSet.test(AliasReg))
-            return false;
-        }
         // Otherwise it's safe to move.
         continue;
       } else if (!MO.isDead()) {
@@ -847,22 +966,40 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
 }
 
 
-/// HasAnyPHIUse - Return true if the specified register is used by any
-/// phi node.
-bool MachineLICM::HasAnyPHIUse(unsigned Reg) const {
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
-    if (UseMI->isPHI())
-      return true;
-    // Look pass copies as well.
-    if (UseMI->isCopy()) {
-      unsigned Def = UseMI->getOperand(0).getReg();
-      if (TargetRegisterInfo::isVirtualRegister(Def) &&
-          HasAnyPHIUse(Def))
-        return true;
+/// HasLoopPHIUse - Return true if the specified instruction is used by a
+/// phi node and hoisting it could cause a copy to be inserted.
+bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
+  SmallVector<const MachineInstr*, 8> Work(1, MI);
+  do {
+    MI = Work.pop_back_val();
+    for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+      if (!MO->isReg() || !MO->isDef())
+        continue;
+      unsigned Reg = MO->getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+           UE = MRI->use_end(); UI != UE; ++UI) {
+        MachineInstr *UseMI = &*UI;
+        // A PHI may cause a copy to be inserted.
+        if (UseMI->isPHI()) {
+          // A PHI inside the loop causes a copy because the live range of Reg is
+          // extended across the PHI.
+          if (CurLoop->contains(UseMI))
+            return true;
+          // A PHI in an exit block can cause a copy to be inserted if the PHI
+          // has multiple predecessors in the loop with different values.
+          // For now, approximate by rejecting all exit blocks.
+          if (isExitBlock(UseMI->getParent()))
+            return true;
+          continue;
+        }
+        // Look past copies as well.
+        if (UseMI->isCopy() && CurLoop->contains(UseMI))
+          Work.push_back(UseMI);
+      }
     }
-  }
+  } while (!Work.empty());
   return false;
 }
 
@@ -903,7 +1040,7 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or
 /// the operand latency between its def and a use is one or less.
 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
-  if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+  if (MI.isAsCheapAsAMove() || MI.isCopyLike())
     return true;
   if (!InstrItins || InstrItins->isEmpty())
     return false;
@@ -930,16 +1067,25 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
 /// if hoisting an instruction of the given cost matrix can cause high
 /// register pressure.
-bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
+                                          bool CheapInstr) {
   for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
        CI != CE; ++CI) {
-    if (CI->second <= 0) 
+    if (CI->second <= 0)
       continue;
 
     unsigned RCId = CI->first;
+    unsigned Limit = RegLimit[RCId];
+    int Cost = CI->second;
+
+    // Don't hoist cheap instructions if they would increase register pressure,
+    // even if we're under the limit.
+    if (CheapInstr)
+      return true;
+
     for (unsigned i = BackTrace.size(); i != 0; --i) {
       SmallVector<unsigned, 8> &RP = BackTrace[i-1];
-      if (RP[RCId] + CI->second >= RegLimit[RCId])
+      if (RP[RCId] + Cost >= Limit)
         return true;
     }
   }
@@ -999,87 +1145,95 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   if (MI.isImplicitDef())
     return true;
 
-  // If the instruction is cheap, only hoist if it is re-materilizable. LICM
-  // will increase register pressure. It's probably not worth it if the
-  // instruction is cheap.
-  // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
-  // these tend to help performance in low register pressure situation. The
-  // trade off is it may cause spill in high pressure situation. It will end up
-  // adding a store in the loop preheader. But the reload is no more expensive.
-  // The side benefit is these loads are frequently CSE'ed.
-  if (IsCheapInstruction(MI)) {
-    if (!TII->isTriviallyReMaterializable(&MI, AA))
-      return false;
-  } else {
-    // Estimate register pressure to determine whether to LICM the instruction.
-    // In low register pressure situation, we can be more aggressive about 
-    // hoisting. Also, favors hoisting long latency instructions even in
-    // moderately high pressure situation.
-    // FIXME: If there are long latency loop-invariant instructions inside the
-    // loop at this point, why didn't the optimizer's LICM hoist them?
-    DenseMap<unsigned, int> Cost;
-    for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = MI.getOperand(i);
-      if (!MO.isReg() || MO.isImplicit())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(Reg))
-        continue;
+  // Besides removing computation from the loop, hoisting an instruction has
+  // these effects:
+  //
+  // - The value defined by the instruction becomes live across the entire
+  //   loop. This increases register pressure in the loop.
+  //
+  // - If the value is used by a PHI in the loop, a copy will be required for
+  //   lowering the PHI after extending the live range.
+  //
+  // - When hoisting the last use of a value in the loop, that value no longer
+  //   needs to be live in the loop. This lowers register pressure in the loop.
+
+  bool CheapInstr = IsCheapInstruction(MI);
+  bool CreatesCopy = HasLoopPHIUse(&MI);
+
+  // Don't hoist a cheap instruction if it would create a copy in the loop.
+  if (CheapInstr && CreatesCopy) {
+    DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
+    return false;
+  }
 
-      unsigned RCId, RCCost;
-      getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
-      if (MO.isDef()) {
-        if (HasHighOperandLatency(MI, i, Reg)) {
-          ++NumHighLatency;
-          return true;
-        }
+  // Rematerializable instructions should always be hoisted since the register
+  // allocator can just pull them down again when needed.
+  if (TII->isTriviallyReMaterializable(&MI, AA))
+    return true;
+
+  // Estimate register pressure to determine whether to LICM the instruction.
+  // In low register pressure situation, we can be more aggressive about
+  // hoisting. Also, favors hoisting long latency instructions even in
+  // moderately high pressure situation.
+  // Cheap instructions will only be hoisted if they don't increase register
+  // pressure at all.
+  // FIXME: If there are long latency loop-invariant instructions inside the
+  // loop at this point, why didn't the optimizer's LICM hoist them?
+  DenseMap<unsigned, int> Cost;
+  for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
 
-        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-        if (CI != Cost.end())
-          CI->second += RCCost;
-        else
-          Cost.insert(std::make_pair(RCId, RCCost));
-      } else if (isOperandKill(MO, MRI)) {
-        // Is a virtual register use is a kill, hoisting it out of the loop
-        // may actually reduce register pressure or be register pressure
-        // neutral.
-        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
-        if (CI != Cost.end())
-          CI->second -= RCCost;
-        else
-          Cost.insert(std::make_pair(RCId, -RCCost));
+    unsigned RCId, RCCost;
+    getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
+    if (MO.isDef()) {
+      if (HasHighOperandLatency(MI, i, Reg)) {
+        DEBUG(dbgs() << "Hoist High Latency: " << MI);
+        ++NumHighLatency;
+        return true;
       }
+      Cost[RCId] += RCCost;
+    } else if (isOperandKill(MO, MRI)) {
+      // Is a virtual register use is a kill, hoisting it out of the loop
+      // may actually reduce register pressure or be register pressure
+      // neutral.
+      Cost[RCId] -= RCCost;
     }
+  }
 
-    // Visit BBs from header to current BB, if hoisting this doesn't cause
-    // high register pressure, then it's safe to proceed.
-    if (!CanCauseHighRegPressure(Cost)) {
-      ++NumLowRP;
-      return true;
-    }
+  // Visit BBs from header to current BB, if hoisting this doesn't cause
+  // high register pressure, then it's safe to proceed.
+  if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
+    DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
+    ++NumLowRP;
+    return true;
+  }
 
-    // Do not "speculate" in high register pressure situation. If an
-    // instruction is not guaranteed to be executed in the loop, it's best to be
-    // conservative.
-    if (AvoidSpeculation &&
-        (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI)))
-      return false;
+  // Don't risk increasing register pressure if it would create copies.
+  if (CreatesCopy) {
+    DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
+    return false;
+  }
 
-    // High register pressure situation, only hoist if the instruction is going to
-    // be remat'ed.
-    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
-        !MI.isInvariantLoad(AA))
-      return false;
+  // Do not "speculate" in high register pressure situation. If an
+  // instruction is not guaranteed to be executed in the loop, it's best to be
+  // conservative.
+  if (AvoidSpeculation &&
+      (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
+    DEBUG(dbgs() << "Won't speculate: " << MI);
+    return false;
   }
 
-  // If result(s) of this instruction is used by PHIs outside of the loop, then
-  // don't hoist it if the instruction because it will introduce an extra copy.
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.isDef())
-      continue;
-    if (HasAnyPHIUse(MO.getReg()))
-      return false;
+  // High register pressure situation, only hoist if the instruction is going
+  // to be remat'ed.
+  if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+      !MI.isInvariantLoad(AA)) {
+    DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
+    return false;
   }
 
   return true;
@@ -1087,7 +1241,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
 
 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   // Don't unfold simple loads.
-  if (MI->getDesc().canFoldAsLoad())
+  if (MI->canFoldAsLoad())
     return 0;
 
   // If not, we may be able to unfold a load and hoist that.
@@ -1123,8 +1277,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   assert(NewMIs.size() == 2 &&
          "Unfolded a load into multiple instructions!");
   MachineBasicBlock *MBB = MI->getParent();
-  MBB->insert(MI, NewMIs[0]);
-  MBB->insert(MI, NewMIs[1]);
+  MachineBasicBlock::iterator Pos = MI;
+  MBB->insert(Pos, NewMIs[0]);
+  MBB->insert(Pos, NewMIs[1]);
   // If unfolding produced a load that wasn't loop-invariant or profitable to
   // hoist, discard the new instructions and bail.
   if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
@@ -1180,6 +1335,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
 
     // Replace virtual registers defined by MI by their counterparts defined
     // by Dup.
+    SmallVector<unsigned, 2> Defs;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = MI->getOperand(i);
 
@@ -1190,11 +1346,33 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
              "Instructions with different phys regs are not identical!");
 
       if (MO.isReg() && MO.isDef() &&
-          !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
-        MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
-        MRI->clearKillFlags(Dup->getOperand(i).getReg());
+          !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+        Defs.push_back(i);
+    }
+
+    SmallVector<const TargetRegisterClass*, 2> OrigRCs;
+    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+      unsigned Idx = Defs[i];
+      unsigned Reg = MI->getOperand(Idx).getReg();
+      unsigned DupReg = Dup->getOperand(Idx).getReg();
+      OrigRCs.push_back(MRI->getRegClass(DupReg));
+
+      if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
+        // Restore old RCs if more than one defs.
+        for (unsigned j = 0; j != i; ++j)
+          MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
+        return false;
       }
     }
+
+    for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+      unsigned Idx = Defs[i];
+      unsigned Reg = MI->getOperand(Idx).getReg();
+      unsigned DupReg = Dup->getOperand(Idx).getReg();
+      MRI->replaceRegWith(Reg, DupReg);
+      MRI->clearKillFlags(DupReg);
+    }
+
     MI->eraseFromParent();
     ++NumCSEed;
     return true;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 80c4854238af..ea98b23c6d57 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -257,7 +257,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
   : ImmutablePass(ID), Context(MAI, MRI, MOFI),
     ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0),
     CallsUnwindInit(0), DbgInfoAvailable(false),
-    CallsExternalVAFunctionWithFloatingPointArguments(false) {
+    UsesVAFloatArgument(false) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
   // Always emit some info, by default "no personality" info.
   Personalities.push_back(NULL);
@@ -268,9 +268,9 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
 MachineModuleInfo::MachineModuleInfo()
   : ImmutablePass(ID),
     Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) {
-  assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
-         "should always be explicitly constructed by LLVMTargetMachine");
-  abort();
+  llvm_unreachable("This MachineModuleInfo constructor should never be called, "
+                   "MMI should always be explicitly constructed by "
+                   "LLVMTargetMachine");
 }
 
 MachineModuleInfo::~MachineModuleInfo() {
@@ -503,8 +503,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
 /// indexes.
 void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
                                               ArrayRef<unsigned> Sites) {
-  for (unsigned I = 0, E = Sites.size(); I != E; ++I)
-    LPadToCallSiteMap[Sym].push_back(Sites[I]);
+  LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
 }
 
 /// getTypeIDFor - Return the type id for the specified typeinfo.  This is
@@ -541,8 +540,7 @@ try_next:;
   // Add the new filter.
   int FilterID = -(1 + FilterIds.size());
   FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
-  for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
-    FilterIds.push_back(TyIds[I]);
+  FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
   FilterEnds.push_back(FilterIds.size());
   FilterIds.push_back(0); // terminator
   return FilterID;
@@ -561,13 +559,13 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
   const Function* Personality = NULL;
 
   // Scan landing pads. If there is at least one non-NULL personality - use it.
-  for (unsigned i = 0; i != LandingPads.size(); ++i)
+  for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
     if (LandingPads[i].Personality) {
       Personality = LandingPads[i].Personality;
       break;
     }
 
-  for (unsigned i = 0; i < Personalities.size(); ++i) {
+  for (unsigned i = 0, e = Personalities.size(); i < e; ++i) {
     if (Personalities[i] == Personality)
       return i;
   }
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
index 9f4ef1287803..58e067bcb9b2 100644
--- a/lib/CodeGen/MachinePassRegistry.cpp
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -16,6 +16,7 @@
 
 using namespace llvm;
 
+void MachinePassRegistryListener::anchor() { }
 
 /// Add - Adds a function pass to the registration list.
 ///
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 266ebf64a3fc..7ea151713a6d 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -18,11 +18,12 @@
 using namespace llvm;
 
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
-  : TRI(&TRI), IsSSA(true) {
+  : TRI(&TRI), IsSSA(true), TracksLiveness(true) {
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
   UsedPhysRegs.resize(TRI.getNumRegs());
-  
+  UsedPhysRegMask.resize(TRI.getNumRegs());
+
   // Create the physreg use/def lists.
   PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
   memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
@@ -30,9 +31,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
 
 MachineRegisterInfo::~MachineRegisterInfo() {
 #ifndef NDEBUG
-  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
-    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
-           "Vreg use list non-empty still?");
+  clearVirtRegs();
   for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
     assert(!PhysRegUseDefLists[i] &&
            "PhysRegUseDefLists has entries after all instructions are deleted");
@@ -76,12 +75,14 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) {
   // Accumulate constraints from all uses.
   for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E;
        ++I) {
-    // TRI doesn't have accurate enough information to model this yet.
-    if (I.getOperand().getSubReg())
-      return false;
     const TargetRegisterClass *OpRC =
       I->getRegClassConstraint(I.getOperandNo(), TII, TRI);
-    if (OpRC)
+    if (unsigned SubIdx = I.getOperand().getSubReg()) {
+      if (OpRC)
+        NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx);
+      else
+        NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx);
+    } else if (OpRC)
       NewRC = TRI->getCommonSubClass(NewRC, OpRC);
     if (!NewRC || NewRC == OldRC)
       return false;
@@ -115,6 +116,16 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
   return Reg;
 }
 
+/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
+void MachineRegisterInfo::clearVirtRegs() {
+#ifndef NDEBUG
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+           "Vreg use list non-empty still?");
+#endif
+  VRegInfo.clear();
+}
+
 /// HandleVRegListReallocation - We just added a virtual register to the
 /// VRegInfo info list and it reallocated.  Update the use/def lists info
 /// pointers.
@@ -150,9 +161,8 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
 /// form, so there should only be one definition.
 MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
   // Since we are in SSA form, we can use the first definition.
-  if (!def_empty(Reg))
-    return &*def_begin(Reg);
-  return 0;
+  def_iterator I = def_begin(Reg);
+  return !I.atEnd() ? &*I : 0;
 }
 
 bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
@@ -242,18 +252,31 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
     }
 }
 
-void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
-  for (int i = UsedPhysRegs.find_first(); i >= 0;
-       i = UsedPhysRegs.find_next(i))
-         for (const unsigned *SS = TRI.getSubRegisters(i);
-              unsigned SubReg = *SS; ++SS)
-           if (SubReg > unsigned(i))
-             UsedPhysRegs.set(SubReg);
-}
-
 #ifndef NDEBUG
 void MachineRegisterInfo::dumpUses(unsigned Reg) const {
   for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
     I.getOperand().getParent()->dump();
 }
 #endif
+
+void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
+  ReservedRegs = TRI->getReservedRegs(MF);
+}
+
+bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
+                                            const MachineFunction &MF) const {
+  assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+
+  // Check if any overlapping register is modified.
+  for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
+    if (!def_empty(*R))
+      return false;
+
+  // Check if any overlapping register is allocatable so it may be used later.
+  if (AllocatableRegs.empty())
+    AllocatableRegs = TRI->getAllocatableSet(MF);
+  for (const uint16_t *R = TRI->getOverlaps(PhysReg); *R; ++R)
+    if (AllocatableRegs.test(*R))
+      return false;
+  return true;
+}
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 84d6df25397c..070a55704dc5 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -81,7 +81,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
   if (BB->empty())
     return 0;
 
-  MachineBasicBlock::iterator I = BB->front();
+  MachineBasicBlock::iterator I = BB->begin();
   if (!I->isPHI())
     return 0;
 
@@ -182,7 +182,7 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
     return DupPHI;
 
   // Otherwise, we do need a PHI: insert one now.
-  MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+  MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
   MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
                                            Loc, VRC, MRI, TII);
 
@@ -214,7 +214,6 @@ MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
   }
 
   llvm_unreachable("MachineOperand::getParent() failure?");
-  return 0;
 }
 
 /// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
@@ -311,7 +310,7 @@ public:
   /// Add it into the specified block and return the register.
   static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
                                  MachineSSAUpdater *Updater) {
-    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin();
     MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
                                      Updater->VRC, Updater->MRI,
                                      Updater->TII);
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
new file mode 100644
index 000000000000..1d3241b8cc6b
--- /dev/null
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -0,0 +1,614 @@
+//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineScheduler schedules machine instructions after phi elimination. It
+// preserves LiveIntervals so it can be invoked before register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "misched"
+
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+
+#include <queue>
+
+using namespace llvm;
+
+static cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
+                                  cl::desc("Force top-down list scheduling"));
+static cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
+                                  cl::desc("Force bottom-up list scheduling"));
+
+#ifndef NDEBUG
+static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
+  cl::desc("Pop up a window to show MISched dags after they are processed"));
+
+static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
+  cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
+#else
+static bool ViewMISchedDAGs = false;
+#endif // NDEBUG
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Scheduling Pass and Registry
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// MachineScheduler runs after coalescing and before register allocation.
+class MachineScheduler : public MachineSchedContext,
+                         public MachineFunctionPass {
+public:
+  MachineScheduler();
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual void releaseMemory() {}
+
+  virtual bool runOnMachineFunction(MachineFunction&);
+
+  virtual void print(raw_ostream &O, const Module* = 0) const;
+
+  static char ID; // Class identification, replacement for typeinfo
+};
+} // namespace
+
+char MachineScheduler::ID = 0;
+
+char &llvm::MachineSchedulerID = MachineScheduler::ID;
+
+INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
+                      "Machine Instruction Scheduler", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachineScheduler, "misched",
+                    "Machine Instruction Scheduler", false, false)
+
+MachineScheduler::MachineScheduler()
+: MachineFunctionPass(ID) {
+  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
+}
+
+void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequiredID(MachineDominatorsID);
+  AU.addRequired<MachineLoopInfo>();
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<TargetPassConfig>();
+  AU.addRequired<SlotIndexes>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<LiveIntervals>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachinePassRegistry MachineSchedRegistry::Registry;
+
+/// A dummy default scheduler factory indicates whether the scheduler
+/// is overridden on the command line.
+static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
+  return 0;
+}
+
+/// MachineSchedOpt allows command line selection of the scheduler.
+static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
+               RegisterPassParser<MachineSchedRegistry> >
+MachineSchedOpt("misched",
+                cl::init(&useDefaultMachineSched), cl::Hidden,
+                cl::desc("Machine instruction scheduler to use"));
+
+static MachineSchedRegistry
+DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
+                     useDefaultMachineSched);
+
+/// Forward declare the standard machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
+
+/// Top-level MachineScheduler pass driver.
+///
+/// Visit blocks in function order. Divide each block into scheduling regions
+/// and visit them bottom-up. Visiting regions bottom-up is not required, but is
+/// consistent with the DAG builder, which traverses the interior of the
+/// scheduling regions bottom-up.
+///
+/// This design avoids exposing scheduling boundaries to the DAG builder,
+/// simplifying the DAG builder's support for "special" target instructions.
+/// At the same time the design allows target schedulers to operate across
+/// scheduling boundaries, for example to bundle the boudary instructions
+/// without reordering them. This creates complexity, because the target
+/// scheduler must update the RegionBegin and RegionEnd positions cached by
+/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
+/// design would be to split blocks at scheduling boundaries, but LLVM has a
+/// general bias against block splitting purely for implementation simplicity.
+bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
+  // Initialize the context of the pass.
+  MF = &mf;
+  MLI = &getAnalysis<MachineLoopInfo>();
+  MDT = &getAnalysis<MachineDominatorTree>();
+  PassConfig = &getAnalysis<TargetPassConfig>();
+  AA = &getAnalysis<AliasAnalysis>();
+
+  LIS = &getAnalysis<LiveIntervals>();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  // Select the scheduler, or set the default.
+  MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
+  if (Ctor == useDefaultMachineSched) {
+    // Get the default scheduler set by the target.
+    Ctor = MachineSchedRegistry::getDefault();
+    if (!Ctor) {
+      Ctor = createConvergingSched;
+      MachineSchedRegistry::setDefault(Ctor);
+    }
+  }
+  // Instantiate the selected scheduler.
+  OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
+
+  // Visit all machine basic blocks.
+  for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
+       MBB != MBBEnd; ++MBB) {
+
+    Scheduler->startBlock(MBB);
+
+    // Break the block into scheduling regions [I, RegionEnd), and schedule each
+    // region as soon as it is discovered. RegionEnd points the the scheduling
+    // boundary at the bottom of the region. The DAG does not include RegionEnd,
+    // but the region does (i.e. the next RegionEnd is above the previous
+    // RegionBegin). If the current block has no terminator then RegionEnd ==
+    // MBB->end() for the bottom region.
+    //
+    // The Scheduler may insert instructions during either schedule() or
+    // exitRegion(), even for empty regions. So the local iterators 'I' and
+    // 'RegionEnd' are invalid across these calls.
+    unsigned RemainingCount = MBB->size();
+    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+        RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
+      // Avoid decrementing RegionEnd for blocks with no terminator.
+      if (RegionEnd != MBB->end()
+          || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
+        --RegionEnd;
+        // Count the boundary instruction.
+        --RemainingCount;
+      }
+
+      // The next region starts above the previous region. Look backward in the
+      // instruction stream until we find the nearest boundary.
+      MachineBasicBlock::iterator I = RegionEnd;
+      for(;I != MBB->begin(); --I, --RemainingCount) {
+        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
+          break;
+      }
+      // Notify the scheduler of the region, even if we may skip scheduling
+      // it. Perhaps it still needs to be bundled.
+      Scheduler->enterRegion(MBB, I, RegionEnd, RemainingCount);
+
+      // Skip empty scheduling regions (0 or 1 schedulable instructions).
+      if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
+        // Close the current region. Bundle the terminator if needed.
+        // This invalidates 'RegionEnd' and 'I'.
+        Scheduler->exitRegion();
+        continue;
+      }
+      DEBUG(dbgs() << "MachineScheduling " << MF->getFunction()->getName()
+            << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
+            if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
+            else dbgs() << "End";
+            dbgs() << " Remaining: " << RemainingCount << "\n");
+
+      // Schedule a region: possibly reorder instructions.
+      // This invalidates 'RegionEnd' and 'I'.
+      Scheduler->schedule();
+
+      // Close the current region.
+      Scheduler->exitRegion();
+
+      // Scheduling has invalidated the current iterator 'I'. Ask the
+      // scheduler for the top of it's scheduled region.
+      RegionEnd = Scheduler->begin();
+    }
+    assert(RemainingCount == 0 && "Instruction count mismatch!");
+    Scheduler->finishBlock();
+  }
+  Scheduler->finalizeSchedule();
+  DEBUG(LIS->print(dbgs()));
+  return true;
+}
+
+void MachineScheduler::print(raw_ostream &O, const Module* m) const {
+  // unimplemented
+}
+
+//===----------------------------------------------------------------------===//
+// MachineSchedStrategy - Interface to a machine scheduling algorithm.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ScheduleDAGMI;
+
+/// MachineSchedStrategy - Interface used by ScheduleDAGMI to drive the selected
+/// scheduling algorithm.
+///
+/// If this works well and targets wish to reuse ScheduleDAGMI, we may expose it
+/// in ScheduleDAGInstrs.h
+class MachineSchedStrategy {
+public:
+  virtual ~MachineSchedStrategy() {}
+
+  /// Initialize the strategy after building the DAG for a new region.
+  virtual void initialize(ScheduleDAGMI *DAG) = 0;
+
+  /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
+  /// schedule the node at the top of the unscheduled region. Otherwise it will
+  /// be scheduled at the bottom.
+  virtual SUnit *pickNode(bool &IsTopNode) = 0;
+
+  /// When all predecessor dependencies have been resolved, free this node for
+  /// top-down scheduling.
+  virtual void releaseTopNode(SUnit *SU) = 0;
+  /// When all successor dependencies have been resolved, free this node for
+  /// bottom-up scheduling.
+  virtual void releaseBottomNode(SUnit *SU) = 0;
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+// ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
+// preservation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ScheduleDAGMI is an implementation of ScheduleDAGInstrs that schedules
+/// machine instructions while updating LiveIntervals.
+class ScheduleDAGMI : public ScheduleDAGInstrs {
+  AliasAnalysis *AA;
+  MachineSchedStrategy *SchedImpl;
+
+  /// The top of the unscheduled zone.
+  MachineBasicBlock::iterator CurrentTop;
+
+  /// The bottom of the unscheduled zone.
+  MachineBasicBlock::iterator CurrentBottom;
+
+  /// The number of instructions scheduled so far. Used to cut off the
+  /// scheduler at the point determined by misched-cutoff.
+  unsigned NumInstrsScheduled;
+public:
+  ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
+    ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
+    AA(C->AA), SchedImpl(S), CurrentTop(), CurrentBottom(),
+    NumInstrsScheduled(0) {}
+
+  ~ScheduleDAGMI() {
+    delete SchedImpl;
+  }
+
+  MachineBasicBlock::iterator top() const { return CurrentTop; }
+  MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
+
+  /// Implement ScheduleDAGInstrs interface.
+  void schedule();
+
+protected:
+  void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
+  bool checkSchedLimit();
+
+  void releaseSucc(SUnit *SU, SDep *SuccEdge);
+  void releaseSuccessors(SUnit *SU);
+  void releasePred(SUnit *SU, SDep *PredEdge);
+  void releasePredecessors(SUnit *SU);
+};
+} // namespace
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
+/// NumPredsLeft reaches zero, release the successor node.
+void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+  SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+    SchedImpl->releaseTopNode(SuccSU);
+}
+
+/// releaseSuccessors - Call releaseSucc on each of SU's successors.
+void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    releaseSucc(SU, &*I);
+  }
+}
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
+/// NumSuccsLeft reaches zero, release the predecessor node.
+void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    PredSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --PredSU->NumSuccsLeft;
+  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
+    SchedImpl->releaseBottomNode(PredSU);
+}
+
+/// releasePredecessors - Call releasePred on each of SU's predecessors.
+void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    releasePred(SU, &*I);
+  }
+}
+
+void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
+                                    MachineBasicBlock::iterator InsertPos) {
+  // Fix RegionBegin if the first instruction moves down.
+  if (&*RegionBegin == MI)
+    RegionBegin = llvm::next(RegionBegin);
+  BB->splice(InsertPos, BB, MI);
+  LIS->handleMove(MI);
+  // Fix RegionBegin if another instruction moves above the first instruction.
+  if (RegionBegin == InsertPos)
+    RegionBegin = MI;
+}
+
+bool ScheduleDAGMI::checkSchedLimit() {
+#ifndef NDEBUG
+  if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
+    CurrentTop = CurrentBottom;
+    return false;
+  }
+  ++NumInstrsScheduled;
+#endif
+  return true;
+}
+
+/// schedule - Called back from MachineScheduler::runOnMachineFunction
+/// after setting up the current scheduling region.
+void ScheduleDAGMI::schedule() {
+  buildSchedGraph(AA);
+
+  DEBUG(dbgs() << "********** MI Scheduling **********\n");
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  if (ViewMISchedDAGs) viewGraph();
+
+  SchedImpl->initialize(this);
+
+  // Release edges from the special Entry node or to the special Exit node.
+  releaseSuccessors(&EntrySU);
+  releasePredecessors(&ExitSU);
+
+  // Release all DAG roots for scheduling.
+  for (std::vector<SUnit>::iterator I = SUnits.begin(), E = SUnits.end();
+       I != E; ++I) {
+    // A SUnit is ready to top schedule if it has no predecessors.
+    if (I->Preds.empty())
+      SchedImpl->releaseTopNode(&(*I));
+    // A SUnit is ready to bottom schedule if it has no successors.
+    if (I->Succs.empty())
+      SchedImpl->releaseBottomNode(&(*I));
+  }
+
+  CurrentTop = RegionBegin;
+  CurrentBottom = RegionEnd;
+  bool IsTopNode = false;
+  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+    DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
+          << " Scheduling Instruction:\n"; SU->dump(this));
+    if (!checkSchedLimit())
+      break;
+
+    // Move the instruction to its new location in the instruction stream.
+    MachineInstr *MI = SU->getInstr();
+
+    if (IsTopNode) {
+      assert(SU->isTopReady() && "node still has unscheduled dependencies");
+      if (&*CurrentTop == MI)
+        ++CurrentTop;
+      else
+        moveInstruction(MI, CurrentTop);
+      // Release dependent instructions for scheduling.
+      releaseSuccessors(SU);
+    }
+    else {
+      assert(SU->isBottomReady() && "node still has unscheduled dependencies");
+      if (&*llvm::prior(CurrentBottom) == MI)
+        --CurrentBottom;
+      else {
+        if (&*CurrentTop == MI)
+          CurrentTop = llvm::next(CurrentTop);
+        moveInstruction(MI, CurrentBottom);
+        CurrentBottom = MI;
+      }
+      // Release dependent instructions for scheduling.
+      releasePredecessors(SU);
+    }
+    SU->isScheduled = true;
+  }
+  assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+}
+
+//===----------------------------------------------------------------------===//
+// ConvergingScheduler - Implementation of the standard MachineSchedStrategy.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
+/// the schedule.
+class ConvergingScheduler : public MachineSchedStrategy {
+  ScheduleDAGMI *DAG;
+
+  unsigned NumTopReady;
+  unsigned NumBottomReady;
+
+public:
+  virtual void initialize(ScheduleDAGMI *dag) {
+    DAG = dag;
+
+    assert((!ForceTopDown || !ForceBottomUp) &&
+           "-misched-topdown incompatible with -misched-bottomup");
+  }
+
+  virtual SUnit *pickNode(bool &IsTopNode) {
+    if (DAG->top() == DAG->bottom())
+      return NULL;
+
+    // As an initial placeholder heuristic, schedule in the direction that has
+    // the fewest choices.
+    SUnit *SU;
+    if (ForceTopDown || (!ForceBottomUp && NumTopReady <= NumBottomReady)) {
+      SU = DAG->getSUnit(DAG->top());
+      IsTopNode = true;
+    }
+    else {
+      SU = DAG->getSUnit(llvm::prior(DAG->bottom()));
+      IsTopNode = false;
+    }
+    if (SU->isTopReady()) {
+      assert(NumTopReady > 0 && "bad ready count");
+      --NumTopReady;
+    }
+    if (SU->isBottomReady()) {
+      assert(NumBottomReady > 0 && "bad ready count");
+      --NumBottomReady;
+    }
+    return SU;
+  }
+
+  virtual void releaseTopNode(SUnit *SU) {
+    ++NumTopReady;
+  }
+  virtual void releaseBottomNode(SUnit *SU) {
+    ++NumBottomReady;
+  }
+};
+} // namespace
+
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
+  assert((!ForceTopDown || !ForceBottomUp) &&
+         "-misched-topdown incompatible with -misched-bottomup");
+  return new ScheduleDAGMI(C, new ConvergingScheduler());
+}
+static MachineSchedRegistry
+ConvergingSchedRegistry("converge", "Standard converging scheduler.",
+                        createConvergingSched);
+
+//===----------------------------------------------------------------------===//
+// Machine Instruction Shuffler for Correctness Testing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace {
+/// Apply a less-than relation on the node order, which corresponds to the
+/// instruction order prior to scheduling. IsReverse implements greater-than.
+template<bool IsReverse>
+struct SUnitOrder {
+  bool operator()(SUnit *A, SUnit *B) const {
+    if (IsReverse)
+      return A->NodeNum > B->NodeNum;
+    else
+      return A->NodeNum < B->NodeNum;
+  }
+};
+
+/// Reorder instructions as much as possible.
+class InstructionShuffler : public MachineSchedStrategy {
+  bool IsAlternating;
+  bool IsTopDown;
+
+  // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
+  // gives nodes with a higher number higher priority causing the latest
+  // instructions to be scheduled first.
+  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
+    TopQ;
+  // When scheduling bottom-up, use greater-than as the queue priority.
+  PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
+    BottomQ;
+public:
+  InstructionShuffler(bool alternate, bool topdown)
+    : IsAlternating(alternate), IsTopDown(topdown) {}
+
+  virtual void initialize(ScheduleDAGMI *) {
+    TopQ.clear();
+    BottomQ.clear();
+  }
+
+  /// Implement MachineSchedStrategy interface.
+  /// -----------------------------------------
+
+  virtual SUnit *pickNode(bool &IsTopNode) {
+    SUnit *SU;
+    if (IsTopDown) {
+      do {
+        if (TopQ.empty()) return NULL;
+        SU = TopQ.top();
+        TopQ.pop();
+      } while (SU->isScheduled);
+      IsTopNode = true;
+    }
+    else {
+      do {
+        if (BottomQ.empty()) return NULL;
+        SU = BottomQ.top();
+        BottomQ.pop();
+      } while (SU->isScheduled);
+      IsTopNode = false;
+    }
+    if (IsAlternating)
+      IsTopDown = !IsTopDown;
+    return SU;
+  }
+
+  virtual void releaseTopNode(SUnit *SU) {
+    TopQ.push(SU);
+  }
+  virtual void releaseBottomNode(SUnit *SU) {
+    BottomQ.push(SU);
+  }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
+  bool Alternate = !ForceTopDown && !ForceBottomUp;
+  bool TopDown = !ForceBottomUp;
+  assert((TopDown || !ForceTopDown) &&
+         "-misched-topdown incompatible with -misched-bottomup");
+  return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
+}
+static MachineSchedRegistry ShufflerRegistry(
+  "shuffle", "Shuffle machine instructions alternating directions",
+  createInstructionShuffler);
+#endif // !NDEBUG
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 29cfb49953b9..1ce546b578ad 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -32,7 +32,7 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-static cl::opt<bool> 
+static cl::opt<bool>
 SplitEdges("machine-sink-split",
            cl::desc("Split critical edges during machine sinking"),
            cl::init(true), cl::Hidden);
@@ -90,12 +90,19 @@ namespace {
     bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
                                  MachineBasicBlock *DefMBB,
                                  bool &BreakPHIEdge, bool &LocalUse) const;
+    MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB,
+               bool &BreakPHIEdge);
+    bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+                              MachineBasicBlock *MBB,
+                              MachineBasicBlock *SuccToSinkTo);
+
     bool PerformTrivialForwardCoalescing(MachineInstr *MI,
                                          MachineBasicBlock *MBB);
   };
 } // end anonymous namespace
 
 char MachineSinking::ID = 0;
+char &llvm::MachineSinkingID = MachineSinking::ID;
 INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
                 "Machine code sinking", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -104,8 +111,6 @@ INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(MachineSinking, "machine-sink",
                 "Machine code sinking", false, false)
 
-FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
-
 bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
                                                      MachineBasicBlock *MBB) {
   if (!MI->isCopy())
@@ -147,14 +152,10 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
          "Only makes sense for vregs");
 
+  // Ignore debug uses because debug info doesn't affect the code.
   if (MRI->use_nodbg_empty(Reg))
     return true;
 
-  // Ignoring debug uses is necessary so debug info doesn't affect the code.
-  // This may leave a referencing dbg_value in the original block, before
-  // the definition of the vreg.  Dwarf generator handles this although the
-  // user might not get the right info at runtime.
-
   // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
   // into and they are all PHI nodes. In this case, machine-sink must break
   // the critical edge first. e.g.
@@ -291,7 +292,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
   if (!CEBCandidates.insert(std::make_pair(From, To)))
     return true;
 
-  if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+  if (!MI->isCopy() && !MI->isAsCheapAsAMove())
     return true;
 
   // MI is cheap, we probably don't want to break the critical edge for it.
@@ -382,9 +383,9 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
   return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
 }
 
-/// collectDebgValues - Scan instructions following MI and collect any 
+/// collectDebgValues - Scan instructions following MI and collect any
 /// matching DBG_VALUEs.
-static void collectDebugValues(MachineInstr *MI, 
+static void collectDebugValues(MachineInstr *MI,
                                SmallVector<MachineInstr *, 2> & DbgValues) {
   DbgValues.clear();
   if (!MI->getOperand(0).isReg())
@@ -401,35 +402,76 @@ static void collectDebugValues(MachineInstr *MI,
   }
 }
 
-/// SinkInstruction - Determine whether it is safe to sink the specified machine
-/// instruction out of its current block into a successor.
-bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
-  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
-  // be close to the source to make it easier to coalesce.
-  if (AvoidsSinking(MI, MRI))
+/// isPostDominatedBy - Return true if A is post dominated by B.
+static bool isPostDominatedBy(MachineBasicBlock *A, MachineBasicBlock *B) {
+
+  // FIXME - Use real post dominator.
+  if (A->succ_size() != 2)
+    return false;
+  MachineBasicBlock::succ_iterator I = A->succ_begin();
+  if (B == *I)
+    ++I;
+  MachineBasicBlock *OtherSuccBlock = *I;
+  if (OtherSuccBlock->succ_size() != 1 ||
+      *(OtherSuccBlock->succ_begin()) != B)
     return false;
 
-  // Check if it's safe to move the instruction.
-  if (!MI->isSafeToMove(TII, AA, SawStore))
+  return true;
+}
+
+/// isProfitableToSinkTo - Return true if it is profitable to sink MI.
+bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI,
+                                          MachineBasicBlock *MBB,
+                                          MachineBasicBlock *SuccToSinkTo) {
+  assert (MI && "Invalid MachineInstr!");
+  assert (SuccToSinkTo && "Invalid SinkTo Candidate BB");
+
+  if (MBB == SuccToSinkTo)
     return false;
 
-  // FIXME: This should include support for sinking instructions within the
-  // block they are currently in to shorten the live ranges.  We often get
-  // instructions sunk into the top of a large block, but it would be better to
-  // also sink them down before their first use in the block.  This xform has to
-  // be careful not to *increase* register pressure though, e.g. sinking
-  // "x = y + z" down if it kills y and z would increase the live ranges of y
-  // and z and only shrink the live range of x.
+  // It is profitable if SuccToSinkTo does not post dominate current block.
+  if (!isPostDominatedBy(MBB, SuccToSinkTo))
+      return true;
+
+  // Check if only use in post dominated block is PHI instruction.
+  bool NonPHIUse = false;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+       I != E; ++I) {
+    MachineInstr *UseInst = &*I;
+    MachineBasicBlock *UseBlock = UseInst->getParent();
+    if (UseBlock == SuccToSinkTo && !UseInst->isPHI())
+      NonPHIUse = true;
+  }
+  if (!NonPHIUse)
+    return true;
+
+  // If SuccToSinkTo post dominates then also it may be profitable if MI
+  // can further profitably sinked into another block in next round.
+  bool BreakPHIEdge = false;
+  // FIXME - If finding successor is compile time expensive then catch results.
+  if (MachineBasicBlock *MBB2 = FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge))
+    return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2);
+
+  // If SuccToSinkTo is final destination and it is a post dominator of current
+  // block then it is not profitable to sink MI into SuccToSinkTo block.
+  return false;
+}
+
+/// FindSuccToSinkTo - Find a successor to sink this instruction to.
+MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
+                                   MachineBasicBlock *MBB,
+                                   bool &BreakPHIEdge) {
+
+  assert (MI && "Invalid MachineInstr!");
+  assert (MBB && "Invalid MachineBasicBlock!");
 
   // Loop over all the operands of the specified instruction.  If there is
   // anything we can't handle, bail out.
-  MachineBasicBlock *ParentBlock = MI->getParent();
 
   // SuccToSinkTo - This is the successor to sink this instruction to, once we
   // decide.
   MachineBasicBlock *SuccToSinkTo = 0;
-
-  bool BreakPHIEdge = false;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;  // Ignore non-register operands.
@@ -442,24 +484,11 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI->def_empty(Reg))
-          return false;
-
-        if (AllocatableSet.test(Reg))
-          return false;
-
-        // Check for a def among the register's aliases too.
-        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-          unsigned AliasReg = *Alias;
-          if (!MRI->def_empty(AliasReg))
-            return false;
-
-          if (AllocatableSet.test(AliasReg))
-            return false;
-        }
+        if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+          return NULL;
       } else if (!MO.isDead()) {
         // A def that isn't dead. We can't move it.
-        return false;
+        return NULL;
       }
     } else {
       // Virtual register uses are always safe to sink.
@@ -467,7 +496,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
 
       // If it's not safe to move defs of the register class, then abort.
       if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
-        return false;
+        return NULL;
 
       // FIXME: This picks a successor to sink into based on having one
       // successor that dominates all the uses.  However, there are cases where
@@ -488,48 +517,79 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
         // If a previous operand picked a block to sink to, then this operand
         // must be sinkable to the same block.
         bool LocalUse = false;
-        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
                                      BreakPHIEdge, LocalUse))
-          return false;
+          return NULL;
 
         continue;
       }
 
       // Otherwise, we should look at all the successors and decide which one
       // we should sink to.
-      for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
-           E = ParentBlock->succ_end(); SI != E; ++SI) {
+      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+           E = MBB->succ_end(); SI != E; ++SI) {
+        MachineBasicBlock *SuccBlock = *SI;
         bool LocalUse = false;
-        if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+        if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB,
                                     BreakPHIEdge, LocalUse)) {
-          SuccToSinkTo = *SI;
+          SuccToSinkTo = SuccBlock;
           break;
         }
         if (LocalUse)
           // Def is used locally, it's never safe to move this def.
-          return false;
+          return NULL;
       }
 
       // If we couldn't find a block to sink to, ignore this instruction.
       if (SuccToSinkTo == 0)
-        return false;
+        return NULL;
+      else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
+        return NULL;
     }
   }
 
-  // If there are no outputs, it must have side-effects.
-  if (SuccToSinkTo == 0)
-    return false;
+  // It is not possible to sink an instruction into its own block.  This can
+  // happen with loops.
+  if (MBB == SuccToSinkTo)
+    return NULL;
 
   // It's not safe to sink instructions to EH landing pad. Control flow into
   // landing pad is implicitly defined.
-  if (SuccToSinkTo->isLandingPad())
+  if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+    return NULL;
+
+  return SuccToSinkTo;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+  // be close to the source to make it easier to coalesce.
+  if (AvoidsSinking(MI, MRI))
     return false;
 
-  // It is not possible to sink an instruction into its own block.  This can
-  // happen with loops.
-  if (MI->getParent() == SuccToSinkTo)
+  // Check if it's safe to move the instruction.
+  if (!MI->isSafeToMove(TII, AA, SawStore))
     return false;
 
+  // FIXME: This should include support for sinking instructions within the
+  // block they are currently in to shorten the live ranges.  We often get
+  // instructions sunk into the top of a large block, but it would be better to
+  // also sink them down before their first use in the block.  This xform has to
+  // be careful not to *increase* register pressure though, e.g. sinking
+  // "x = y + z" down if it kills y and z would increase the live ranges of y
+  // and z and only shrink the live range of x.
+
+  bool BreakPHIEdge = false;
+  MachineBasicBlock *ParentBlock = MI->getParent();
+  MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
+
+  // If there are no outputs, it must have side-effects.
+  if (SuccToSinkTo == 0)
+    return false;
+
+
   // If the instruction to move defines a dead physical register which is live
   // when leaving the basic block, don't move it because it could turn into a
   // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 26847d39e7ad..74ba94d1fcc0 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -28,6 +28,7 @@
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -69,14 +70,17 @@ namespace {
     unsigned foundErrors;
 
     typedef SmallVector<unsigned, 16> RegVector;
+    typedef SmallVector<const uint32_t*, 4> RegMaskVector;
     typedef DenseSet<unsigned> RegSet;
     typedef DenseMap<unsigned, const MachineInstr*> RegMap;
 
     const MachineInstr *FirstTerminator;
 
     BitVector regsReserved;
+    BitVector regsAllocatable;
     RegSet regsLive;
     RegVector regsDefined, regsDead, regsKilled;
+    RegMaskVector regMasks;
     RegSet regsLiveInButUnused;
 
     SlotIndex lastIndex;
@@ -85,7 +89,7 @@ namespace {
     void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
       RV.push_back(Reg);
       if (TargetRegisterInfo::isPhysicalRegister(Reg))
-        for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+        for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
           RV.push_back(*R);
     }
 
@@ -175,6 +179,10 @@ namespace {
       return Reg < regsReserved.size() && regsReserved.test(Reg);
     }
 
+    bool isAllocatable(unsigned Reg) {
+      return Reg < regsAllocatable.size() && regsAllocatable.test(Reg);
+    }
+
     // Analysis information if available
     LiveVariables *LiveVars;
     LiveIntervals *LiveInts;
@@ -194,6 +202,7 @@ namespace {
     void report(const char *msg, const MachineInstr *MI);
     void report(const char *msg, const MachineOperand *MO, unsigned MONum);
 
+    void checkLiveness(const MachineOperand *MO, unsigned MONum);
     void markReachable(const MachineBasicBlock *MBB);
     void calcRegsPassed();
     void checkPHIOps(const MachineBasicBlock *MBB);
@@ -279,13 +288,17 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
        MFI!=MFE; ++MFI) {
     visitMachineBasicBlockBefore(MFI);
-    for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
-           MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+    for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
+           MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
       if (MBBI->getParent() != MFI) {
         report("Bad instruction parent pointer", MFI);
         *OS << "Instruction: " << *MBBI;
         continue;
       }
+      // Skip BUNDLE instruction for now. FIXME: We should add code to verify
+      // the BUNDLE's specifically.
+      if (MBBI->isBundle())
+        continue;
       visitMachineInstrBefore(MBBI);
       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
         visitMachineOperand(&MBBI->getOperand(I), I);
@@ -305,6 +318,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   regsDefined.clear();
   regsDead.clear();
   regsKilled.clear();
+  regMasks.clear();
   regsLiveInButUnused.clear();
   MBBInfoMap.clear();
 
@@ -320,7 +334,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
     MF->print(*OS, Indexes);
   }
   *OS << "*** Bad machine code: " << msg << " ***\n"
-      << "- function:    " << MF->getFunction()->getNameStr() << "\n";
+      << "- function:    " << MF->getFunction()->getName() << "\n";
 }
 
 void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
@@ -370,12 +384,15 @@ void MachineVerifier::visitMachineFunctionBefore() {
   // A sub-register of a reserved register is also reserved
   for (int Reg = regsReserved.find_first(); Reg>=0;
        Reg = regsReserved.find_next(Reg)) {
-    for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+    for (const uint16_t *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
       // FIXME: This should probably be:
       // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
       regsReserved.set(*Sub);
     }
   }
+
+  regsAllocatable = TRI->getAllocatableSet(*MF);
+
   markReachable(&MF->front());
 }
 
@@ -393,6 +410,20 @@ void
 MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   FirstTerminator = 0;
 
+  if (MRI->isSSA()) {
+    // If this block has allocatable physical registers live-in, check that
+    // it is an entry block or landing pad.
+    for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
+           LE = MBB->livein_end();
+         LI != LE; ++LI) {
+      unsigned reg = *LI;
+      if (isAllocatable(reg) && !MBB->isLandingPad() &&
+          MBB != MBB->getParent()->begin()) {
+        report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
+      }
+    }
+  }
+
   // Count the number of landing pad successors.
   SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
@@ -435,7 +466,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
         report("MBB exits via unconditional fall-through but its successor "
                "differs from its CFG successor!", MBB);
       }
-      if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+      if (!MBB->empty() && MBB->back().isBarrier() &&
           !TII->isPredicated(&MBB->back())) {
         report("MBB exits via unconditional fall-through but ends with a "
                "barrier instruction!", MBB);
@@ -456,10 +487,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       if (MBB->empty()) {
         report("MBB exits via unconditional branch but doesn't contain "
                "any instructions!", MBB);
-      } else if (!MBB->back().getDesc().isBarrier()) {
+      } else if (!MBB->back().isBarrier()) {
         report("MBB exits via unconditional branch but doesn't end with a "
                "barrier instruction!", MBB);
-      } else if (!MBB->back().getDesc().isTerminator()) {
+      } else if (!MBB->back().isTerminator()) {
         report("MBB exits via unconditional branch but the branch isn't a "
                "terminator instruction!", MBB);
       }
@@ -479,10 +510,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       if (MBB->empty()) {
         report("MBB exits via conditional branch/fall-through but doesn't "
                "contain any instructions!", MBB);
-      } else if (MBB->back().getDesc().isBarrier()) {
+      } else if (MBB->back().isBarrier()) {
         report("MBB exits via conditional branch/fall-through but ends with a "
                "barrier instruction!", MBB);
-      } else if (!MBB->back().getDesc().isTerminator()) {
+      } else if (!MBB->back().isTerminator()) {
         report("MBB exits via conditional branch/fall-through but the branch "
                "isn't a terminator instruction!", MBB);
       }
@@ -499,10 +530,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       if (MBB->empty()) {
         report("MBB exits via conditional branch/branch but doesn't "
                "contain any instructions!", MBB);
-      } else if (!MBB->back().getDesc().isBarrier()) {
+      } else if (!MBB->back().isBarrier()) {
         report("MBB exits via conditional branch/branch but doesn't end with a "
                "barrier instruction!", MBB);
-      } else if (!MBB->back().getDesc().isTerminator()) {
+      } else if (!MBB->back().isTerminator()) {
         report("MBB exits via conditional branch/branch but the branch "
                "isn't a terminator instruction!", MBB);
       }
@@ -523,7 +554,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       continue;
     }
     regsLive.insert(*I);
-    for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+    for (const uint16_t *R = TRI->getSubRegisters(*I); *R; R++)
       regsLive.insert(*R);
   }
   regsLiveInButUnused = regsLive;
@@ -533,7 +564,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   BitVector PR = MFI->getPristineRegs(MBB);
   for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
     regsLive.insert(I);
-    for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+    for (const uint16_t *R = TRI->getSubRegisters(I); *R; R++)
       regsLive.insert(*R);
   }
 
@@ -555,19 +586,22 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   // Check the MachineMemOperands for basic consistency.
   for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
        E = MI->memoperands_end(); I != E; ++I) {
-    if ((*I)->isLoad() && !MCID.mayLoad())
+    if ((*I)->isLoad() && !MI->mayLoad())
       report("Missing mayLoad flag", MI);
-    if ((*I)->isStore() && !MCID.mayStore())
+    if ((*I)->isStore() && !MI->mayStore())
       report("Missing mayStore flag", MI);
   }
 
   // Debug values must not have a slot index.
-  // Other instructions must have one.
+  // Other instructions must have one, unless they are inside a bundle.
   if (LiveInts) {
     bool mapped = !LiveInts->isNotInMIMap(MI);
     if (MI->isDebugValue()) {
       if (mapped)
         report("Debug instruction has a slot index", MI);
+    } else if (MI->isInsideBundle()) {
+      if (mapped)
+        report("Instruction inside bundle has a slot index", MI);
     } else {
       if (!mapped)
         report("Missing slot index", MI);
@@ -575,7 +609,9 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
   }
 
   // Ensure non-terminators don't follow terminators.
-  if (MCID.isTerminator()) {
+  // Ignore predicated terminators formed by if conversion.
+  // FIXME: If conversion shouldn't need to violate this rule.
+  if (MI->isTerminator() && !TII->isPredicated(MI)) {
     if (!FirstTerminator)
       FirstTerminator = MI;
   } else if (FirstTerminator) {
@@ -606,7 +642,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     // Don't check if it's the last operand in a variadic instruction. See,
     // e.g., LDM_RET in the arm back end.
     if (MO->isReg() &&
-        !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+        !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) {
       if (MO->isDef() && !MCOI.isOptionalDef())
           report("Explicit operand marked as def", MO, MONum);
       if (MO->isImplicit())
@@ -614,7 +650,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     }
   } else {
     // ARM adds %reg0 operands to indicate predicates. We'll allow that.
-    if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
+    if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg())
       report("Extra explicit operand on non-variadic instruction", MO, MONum);
   }
 
@@ -623,112 +659,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     const unsigned Reg = MO->getReg();
     if (!Reg)
       return;
+    if (MRI->tracksLiveness() && !MI->isDebugValue())
+      checkLiveness(MO, MONum);
 
-    // Check Live Variables.
-    if (MI->isDebugValue()) {
-      // Liveness checks are not valid for debug values.
-    } else if (MO->isUse() && !MO->isUndef()) {
-      regsLiveInButUnused.erase(Reg);
-
-      bool isKill = false;
-      unsigned defIdx;
-      if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
-        // A two-addr use counts as a kill if use and def are the same.
-        unsigned DefReg = MI->getOperand(defIdx).getReg();
-        if (Reg == DefReg)
-          isKill = true;
-        else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-          report("Two-address instruction operands must be identical",
-                 MO, MONum);
-        }
-      } else
-        isKill = MO->isKill();
-
-      if (isKill)
-        addRegWithSubRegs(regsKilled, Reg);
-
-      // Check that LiveVars knows this kill.
-      if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
-          MO->isKill()) {
-        LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
-        if (std::find(VI.Kills.begin(),
-                      VI.Kills.end(), MI) == VI.Kills.end())
-          report("Kill missing from LiveVariables", MO, MONum);
-      }
-
-      // Check LiveInts liveness and kill.
-      if (TargetRegisterInfo::isVirtualRegister(Reg) &&
-          LiveInts && !LiveInts->isNotInMIMap(MI)) {
-        SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
-        if (LiveInts->hasInterval(Reg)) {
-          const LiveInterval &LI = LiveInts->getInterval(Reg);
-          if (!LI.liveAt(UseIdx)) {
-            report("No live range at use", MO, MONum);
-            *OS << UseIdx << " is not live in " << LI << '\n';
-          }
-          // Check for extra kill flags.
-          // Note that we allow missing kill flags for now.
-          if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
-            report("Live range continues after kill flag", MO, MONum);
-            *OS << "Live range: " << LI << '\n';
-          }
-        } else {
-          report("Virtual register has no Live interval", MO, MONum);
-        }
-      }
-
-      // Use of a dead register.
-      if (!regsLive.count(Reg)) {
-        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-          // Reserved registers may be used even when 'dead'.
-          if (!isReserved(Reg))
-            report("Using an undefined physical register", MO, MONum);
-        } else {
-          BBInfo &MInfo = MBBInfoMap[MI->getParent()];
-          // We don't know which virtual registers are live in, so only complain
-          // if vreg was killed in this MBB. Otherwise keep track of vregs that
-          // must be live in. PHI instructions are handled separately.
-          if (MInfo.regsKilled.count(Reg))
-            report("Using a killed virtual register", MO, MONum);
-          else if (!MI->isPHI())
-            MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
-        }
-      }
-    } else if (MO->isDef()) {
-      // Register defined.
-      // TODO: verify that earlyclobber ops are not used.
-      if (MO->isDead())
-        addRegWithSubRegs(regsDead, Reg);
-      else
-        addRegWithSubRegs(regsDefined, Reg);
-
-      // Verify SSA form.
-      if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
-          llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
-        report("Multiple virtual register defs in SSA form", MO, MONum);
-
-      // Check LiveInts for a live range, but only for virtual registers.
-      if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
-          !LiveInts->isNotInMIMap(MI)) {
-        SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
-        if (LiveInts->hasInterval(Reg)) {
-          const LiveInterval &LI = LiveInts->getInterval(Reg);
-          if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
-            assert(VNI && "NULL valno is not allowed");
-            if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
-              report("Inconsistent valno->def", MO, MONum);
-              *OS << "Valno " << VNI->id << " is not defined at "
-                  << DefIdx << " in " << LI << '\n';
-            }
-          } else {
-            report("No live range at def", MO, MONum);
-            *OS << DefIdx << " is not live in " << LI << '\n';
-          }
-        } else {
-          report("Virtual register has no Live interval", MO, MONum);
-        }
-      }
-    }
 
     // Check register classes.
     if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
@@ -790,6 +723,10 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     break;
   }
 
+  case MachineOperand::MO_RegisterMask:
+    regMasks.push_back(MO->getRegMask());
+    break;
+
   case MachineOperand::MO_MachineBasicBlock:
     if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
       report("PHI operand is not in the CFG", MO, MONum);
@@ -800,11 +737,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         LiveInts && !LiveInts->isNotInMIMap(MI)) {
       LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
       SlotIndex Idx = LiveInts->getInstructionIndex(MI);
-      if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+      if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
         report("Instruction loads from dead spill slot", MO, MONum);
         *OS << "Live stack: " << LI << '\n';
       }
-      if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+      if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
         report("Instruction stores to dead spill slot", MO, MONum);
         *OS << "Live stack: " << LI << '\n';
       }
@@ -816,10 +753,127 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   }
 }
 
+void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
+  const MachineInstr *MI = MO->getParent();
+  const unsigned Reg = MO->getReg();
+
+  // Both use and def operands can read a register.
+  if (MO->readsReg()) {
+    regsLiveInButUnused.erase(Reg);
+
+    bool isKill = false;
+    unsigned defIdx;
+    if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
+      // A two-addr use counts as a kill if use and def are the same.
+      unsigned DefReg = MI->getOperand(defIdx).getReg();
+      if (Reg == DefReg)
+        isKill = true;
+      else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        report("Two-address instruction operands must be identical", MO, MONum);
+      }
+    } else
+      isKill = MO->isKill();
+
+    if (isKill)
+      addRegWithSubRegs(regsKilled, Reg);
+
+    // Check that LiveVars knows this kill.
+    if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+        MO->isKill()) {
+      LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+      if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end())
+        report("Kill missing from LiveVariables", MO, MONum);
+    }
+
+    // Check LiveInts liveness and kill.
+    if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+        LiveInts && !LiveInts->isNotInMIMap(MI)) {
+      SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true);
+      if (LiveInts->hasInterval(Reg)) {
+        const LiveInterval &LI = LiveInts->getInterval(Reg);
+        if (!LI.liveAt(UseIdx)) {
+          report("No live range at use", MO, MONum);
+          *OS << UseIdx << " is not live in " << LI << '\n';
+        }
+        // Check for extra kill flags.
+        // Note that we allow missing kill flags for now.
+        if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) {
+          report("Live range continues after kill flag", MO, MONum);
+          *OS << "Live range: " << LI << '\n';
+        }
+      } else {
+        report("Virtual register has no Live interval", MO, MONum);
+      }
+    }
+
+    // Use of a dead register.
+    if (!regsLive.count(Reg)) {
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        // Reserved registers may be used even when 'dead'.
+        if (!isReserved(Reg))
+          report("Using an undefined physical register", MO, MONum);
+      } else {
+        BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+        // We don't know which virtual registers are live in, so only complain
+        // if vreg was killed in this MBB. Otherwise keep track of vregs that
+        // must be live in. PHI instructions are handled separately.
+        if (MInfo.regsKilled.count(Reg))
+          report("Using a killed virtual register", MO, MONum);
+        else if (!MI->isPHI())
+          MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+      }
+    }
+  }
+
+  if (MO->isDef()) {
+    // Register defined.
+    // TODO: verify that earlyclobber ops are not used.
+    if (MO->isDead())
+      addRegWithSubRegs(regsDead, Reg);
+    else
+      addRegWithSubRegs(regsDefined, Reg);
+
+    // Verify SSA form.
+    if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+        llvm::next(MRI->def_begin(Reg)) != MRI->def_end())
+      report("Multiple virtual register defs in SSA form", MO, MONum);
+
+    // Check LiveInts for a live range, but only for virtual registers.
+    if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+        !LiveInts->isNotInMIMap(MI)) {
+      SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot();
+      if (LiveInts->hasInterval(Reg)) {
+        const LiveInterval &LI = LiveInts->getInterval(Reg);
+        if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+          assert(VNI && "NULL valno is not allowed");
+          if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
+            report("Inconsistent valno->def", MO, MONum);
+            *OS << "Valno " << VNI->id << " is not defined at "
+              << DefIdx << " in " << LI << '\n';
+          }
+        } else {
+          report("No live range at def", MO, MONum);
+          *OS << DefIdx << " is not live in " << LI << '\n';
+        }
+      } else {
+        report("Virtual register has no Live interval", MO, MONum);
+      }
+    }
+  }
+}
+
 void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
   BBInfo &MInfo = MBBInfoMap[MI->getParent()];
   set_union(MInfo.regsKilled, regsKilled);
   set_subtract(regsLive, regsKilled); regsKilled.clear();
+  // Kill any masked registers.
+  while (!regMasks.empty()) {
+    const uint32_t *Mask = regMasks.pop_back_val();
+    for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I)
+      if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+          MachineOperand::clobbersPhysReg(Mask, *I))
+        regsDead.push_back(*I);
+  }
   set_subtract(regsLive, regsDead);   regsDead.clear();
   set_union(regsLive, regsDefined);   regsDefined.clear();
 
@@ -855,7 +909,7 @@ MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
 void MachineVerifier::calcRegsPassed() {
   // First push live-out regs to successors' vregsPassed. Remember the MBBs that
   // have any vregsPassed.
-  DenseSet<const MachineBasicBlock*> todo;
+  SmallPtrSet<const MachineBasicBlock*, 8> todo;
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI) {
     const MachineBasicBlock &MBB(*MFI);
@@ -892,7 +946,7 @@ void MachineVerifier::calcRegsPassed() {
 // similar to calcRegsPassed, only backwards.
 void MachineVerifier::calcRegsRequired() {
   // First push live-in regs to predecessors' vregsRequired.
-  DenseSet<const MachineBasicBlock*> todo;
+  SmallPtrSet<const MachineBasicBlock*, 8> todo;
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI) {
     const MachineBasicBlock &MBB(*MFI);
@@ -925,9 +979,10 @@ void MachineVerifier::calcRegsRequired() {
 // Check PHI instructions at the beginning of MBB. It is assumed that
 // calcRegsPassed has been run so BBInfo::isLiveOut is valid.
 void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+  SmallPtrSet<const MachineBasicBlock*, 8> seen;
   for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
        BBI != BBE && BBI->isPHI(); ++BBI) {
-    DenseSet<const MachineBasicBlock*> seen;
+    seen.clear();
 
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
       unsigned Reg = BBI->getOperand(i).getReg();
@@ -968,8 +1023,17 @@ void MachineVerifier::visitMachineFunctionAfter() {
   }
 
   // Now check liveness info if available
-  if (LiveVars || LiveInts)
-    calcRegsRequired();
+  calcRegsRequired();
+
+  if (MRI->isSSA() && !MF->empty()) {
+    BBInfo &MInfo = MBBInfoMap[&MF->front()];
+    for (RegSet::iterator
+         I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
+         ++I)
+      report("Virtual register def doesn't dominate all uses.",
+             MRI->getVRegDef(*I));
+  }
+
   if (LiveVars)
     verifyLiveVariables();
   if (LiveInts)
@@ -1065,33 +1129,43 @@ void MachineVerifier::verifyLiveIntervals() {
           report("No instruction at def index", MF);
           *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
               << " in " << LI << '\n';
-        } else if (!MI->modifiesRegister(LI.reg, TRI)) {
-          report("Defining instruction does not modify register", MI);
-          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+          continue;
         }
 
+        bool hasDef = false;
         bool isEarlyClobber = false;
-        if (MI) {
-          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
-               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
-            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
-                MOI->isEarlyClobber()) {
-              isEarlyClobber = true;
-              break;
-            }
+        for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+          if (!MOI->isReg() || !MOI->isDef())
+            continue;
+          if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+            if (MOI->getReg() != LI.reg)
+              continue;
+          } else {
+            if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+                !TRI->regsOverlap(LI.reg, MOI->getReg()))
+              continue;
           }
+          hasDef = true;
+          if (MOI->isEarlyClobber())
+            isEarlyClobber = true;
+        }
+
+        if (!hasDef) {
+          report("Defining instruction does not modify register", MI);
+          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
         }
 
         // Early clobber defs begin at USE slots, but other defs must begin at
         // DEF slots.
         if (isEarlyClobber) {
-          if (!VNI->def.isUse()) {
-            report("Early clobber def must be at a USE slot", MF);
+          if (!VNI->def.isEarlyClobber()) {
+            report("Early clobber def must be at an early-clobber slot", MF);
             *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
                 << " in " << LI << '\n';
           }
-        } else if (!VNI->def.isDef()) {
-          report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+        } else if (!VNI->def.isRegister()) {
+          report("Non-PHI, non-early clobber def must be at a register slot",
+                 MF);
           *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
               << " in " << LI << '\n';
         }
@@ -1137,32 +1211,76 @@ void MachineVerifier::verifyLiveIntervals() {
         *OS << " in " << LI << '\n';
         continue;
       }
-      if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
-        // The live segment is ending inside EndMBB
-        const MachineInstr *MI =
-                        LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
-        if (!MI) {
-          report("Live segment doesn't end at a valid instruction", EndMBB);
+
+      // No more checks for live-out segments.
+      if (I->end == LiveInts->getMBBEndIdx(EndMBB))
+        continue;
+
+      // The live segment is ending inside EndMBB
+      const MachineInstr *MI =
+        LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+      if (!MI) {
+        report("Live segment doesn't end at a valid instruction", EndMBB);
         I->print(*OS);
         *OS << " in " << LI << '\n' << "Basic block starts at "
-            << MBBStartIdx << '\n';
-        } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
-                   !MI->readsVirtualRegister(LI.reg)) {
-          // A live range can end with either a redefinition, a kill flag on a
-          // use, or a dead flag on a def.
-          // FIXME: Should we check for each of these?
-          bool hasDeadDef = false;
-          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
-               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
-            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
-              hasDeadDef = true;
-              break;
-            }
-          }
+          << MBBStartIdx << '\n';
+        continue;
+      }
+
+      // The block slot must refer to a basic block boundary.
+      if (I->end.isBlock()) {
+        report("Live segment ends at B slot of an instruction", MI);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+      }
+
+      if (I->end.isDead()) {
+        // Segment ends on the dead slot.
+        // That means there must be a dead def.
+        if (!SlotIndex::isSameInstr(I->start, I->end)) {
+          report("Live segment ending at dead slot spans instructions", MI);
+          I->print(*OS);
+          *OS << " in " << LI << '\n';
+        }
+      }
+
+      // A live segment can only end at an early-clobber slot if it is being
+      // redefined by an early-clobber def.
+      if (I->end.isEarlyClobber()) {
+        if (I+1 == E || (I+1)->start != I->end) {
+          report("Live segment ending at early clobber slot must be "
+                 "redefined by an EC def in the same instruction", MI);
+          I->print(*OS);
+          *OS << " in " << LI << '\n';
+        }
+      }
+
+      // The following checks only apply to virtual registers. Physreg liveness
+      // is too weird to check.
+      if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+        // A live range can end with either a redefinition, a kill flag on a
+        // use, or a dead flag on a def.
+        bool hasRead = false;
+        bool hasDeadDef = false;
+        for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) {
+          if (!MOI->isReg() || MOI->getReg() != LI.reg)
+            continue;
+          if (MOI->readsReg())
+            hasRead = true;
+          if (MOI->isDef() && MOI->isDead())
+            hasDeadDef = true;
+        }
 
+        if (I->end.isDead()) {
           if (!hasDeadDef) {
-            report("Instruction killing live segment neither defines nor reads "
-                   "register", MI);
+            report("Instruction doesn't have a dead def operand", MI);
+            I->print(*OS);
+            *OS << " in " << LI << '\n';
+          }
+        } else {
+          if (!hasRead) {
+            report("Instruction ending live range doesn't read the register",
+                   MI);
             I->print(*OS);
             *OS << " in " << LI << '\n';
           }
@@ -1192,8 +1310,8 @@ void MachineVerifier::verifyLiveIntervals() {
         // Check that VNI is live-out of all predecessors.
         for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
              PE = MFI->pred_end(); PI != PE; ++PI) {
-          SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
-          const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+          SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI);
+          const VNInfo *PVNI = LI.getVNInfoBefore(PEnd);
 
           if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI))
             continue;
@@ -1201,7 +1319,7 @@ void MachineVerifier::verifyLiveIntervals() {
           if (!PVNI) {
             report("Register not marked live out of predecessor", *PI);
             *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
-                << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+                << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
                 << PEnd << " in " << LI << '\n';
             continue;
           }
diff --git a/lib/CodeGen/ObjectCodeEmitter.cpp b/lib/CodeGen/ObjectCodeEmitter.cpp
deleted file mode 100644
index cf05275d7a31..000000000000
--- a/lib/CodeGen/ObjectCodeEmitter.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/BinaryObject.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/CodeGen/ObjectCodeEmitter.h"
-
-//===----------------------------------------------------------------------===//
-//                       ObjectCodeEmitter Implementation
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {}
-ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {}
-ObjectCodeEmitter::~ObjectCodeEmitter() {}
-
-/// setBinaryObject - set the BinaryObject we are writting to
-void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; }
-
-/// emitByte - This callback is invoked when a byte needs to be
-/// written to the data stream, without buffer overflow testing.
-void ObjectCodeEmitter::emitByte(uint8_t B) {
-  BO->emitByte(B);
-}
-
-/// emitWordLE - This callback is invoked when a 32-bit word needs to be
-/// written to the data stream in little-endian format.
-void ObjectCodeEmitter::emitWordLE(uint32_t W) {
-  BO->emitWordLE(W);
-}
-
-/// emitWordBE - This callback is invoked when a 32-bit word needs to be
-/// written to the data stream in big-endian format.
-void ObjectCodeEmitter::emitWordBE(uint32_t W) {
-  BO->emitWordBE(W);
-}
-
-/// emitDWordLE - This callback is invoked when a 64-bit word needs to be
-/// written to the data stream in little-endian format.
-void ObjectCodeEmitter::emitDWordLE(uint64_t W) {
-  BO->emitDWordLE(W);
-}
-
-/// emitDWordBE - This callback is invoked when a 64-bit word needs to be
-/// written to the data stream in big-endian format.
-void ObjectCodeEmitter::emitDWordBE(uint64_t W) {
-  BO->emitDWordBE(W);
-}
-
-/// emitAlignment - Align 'BO' to the necessary alignment boundary.
-void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */,
-                                      uint8_t fill /* 0 */) {
-  BO->emitAlignment(Alignment, fill);
-}
-
-/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) {
-  BO->emitULEB128Bytes(Value);
-}
-
-/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) {
-  BO->emitSLEB128Bytes(Value);
-}
-
-/// emitString - This callback is invoked when a String needs to be
-/// written to the data stream.
-void ObjectCodeEmitter::emitString(const std::string &String) {
-  BO->emitString(String);
-}
-
-/// getCurrentPCValue - This returns the address that the next emitted byte
-/// will be output to.
-uintptr_t ObjectCodeEmitter::getCurrentPCValue() const {
-  return BO->getCurrentPCOffset();
-}
-
-/// getCurrentPCOffset - Return the offset from the start of the emitted
-/// buffer that we are currently writing to.
-uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const {
-  return BO->getCurrentPCOffset();
-}
-
-/// addRelocation - Whenever a relocatable address is needed, it should be
-/// noted with this interface.
-void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) {
-  BO->addRelocation(relocation);
-}
-
-/// StartMachineBasicBlock - This should be called by the target when a new
-/// basic block is about to be emitted.  This way the MCE knows where the
-/// start of the block is, and can implement getMachineBasicBlockAddress.
-void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) {
-  if (MBBLocations.size() <= (unsigned)MBB->getNumber())
-    MBBLocations.resize((MBB->getNumber()+1)*2);
-  MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
-}
-
-/// getMachineBasicBlockAddress - Return the address of the specified
-/// MachineBasicBlock, only usable after the label for the MBB has been
-/// emitted.
-uintptr_t
-ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
-  assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
-         MBBLocations[MBB->getNumber()] && "MBB not emitted!");
-  return MBBLocations[MBB->getNumber()];
-}
-
-/// getJumpTableEntryAddress - Return the address of the jump table with index
-/// 'Index' in the function that last called initJumpTableInfo.
-uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const {
-  assert(JTLocations.size() > Index && "JT not emitted!");
-  return JTLocations[Index];
-}
-
-/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
-/// the constant pool that was last emitted with the emitConstantPool method.
-uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const {
-  assert(CPLocations.size() > Index && "CP not emitted!");
-  return CPLocations[Index];
-}
-
-/// getConstantPoolEntrySection - Return the section of the 'Index' entry in
-/// the constant pool that was last emitted with the emitConstantPool method.
-uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const {
-  assert(CPSections.size() > Index && "CP not emitted!");
-  return CPSections[Index];
-}
-
-} // end namespace llvm
-
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index c05be130ec61..6da313e632af 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -56,11 +56,10 @@ namespace {
 }
 
 char OptimizePHIs::ID = 0;
+char &llvm::OptimizePHIsID = OptimizePHIs::ID;
 INITIALIZE_PASS(OptimizePHIs, "opt-phis",
                 "Optimize machine instruction PHIs", false, false)
 
-FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
-
 bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
   MRI = &Fn.getRegInfo();
   TII = Fn.getTarget().getInstrInfo();
@@ -165,7 +164,11 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
     InstrSet PHIsInCycle;
     if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
         SingleValReg != 0) {
-      MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+      unsigned OldReg = MI->getOperand(0).getReg();
+      if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
+        continue;
+
+      MRI->replaceRegWith(OldReg, SingleValReg);
       MI->eraseFromParent();
       ++NumPHICycles;
       Changed = true;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 6994aa58fbd5..0ed4c34bb105 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -92,11 +92,15 @@ STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
 STATISTIC(NumReused, "Number of reused lowered phis");
 
 char PHIElimination::ID = 0;
-INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
-                "Eliminate PHI nodes for register allocation", false, false)
-
 char& llvm::PHIEliminationID = PHIElimination::ID;
 
+INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination",
+                      "Eliminate PHI nodes for register allocation",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination",
+                    "Eliminate PHI nodes for register allocation", false, false)
+
 void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<LiveVariables>();
   AU.addPreserved<MachineDominatorTree>();
@@ -241,7 +245,6 @@ void PHIElimination::LowerAtomicPHINode(
       LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
 
       // Increment use count of the newly created virtual register.
-      VI.NumUses++;
       LV->setPHIJoin(IncomingReg);
 
       // When we are reusing the incoming register, it may already have been
@@ -410,7 +413,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
     return false;   // Quick exit for basic blocks without PHIs.
 
   bool Changed = false;
-  for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+  for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
        BBI != BBE && BBI->isPHI(); ++BBI) {
     for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
       unsigned Reg = BBI->getOperand(i).getReg();
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 315aedddb9ef..53d1fcf7377a 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -12,62 +12,617 @@
 //
 //===---------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+    cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+    cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+    cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+    cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> EnableBlockPlacement("enable-block-placement",
+    cl::Hidden, cl::desc("Enable probability-driven block placement"));
+static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats",
+    cl::Hidden, cl::desc("Collect probability-driven block placement stats"));
+static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
+    cl::desc("Disable code placement"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+    cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden,
+    cl::desc("Disable Machine Dead Code Elimination"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+    cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
+    cl::desc("Disable Machine Common Subexpression Elimination"));
+static cl::opt<cl::boolOrDefault>
+OptimizeRegAlloc("optimize-regalloc", cl::Hidden,
+    cl::desc("Enable optimized register allocation compilation path."));
+static cl::opt<cl::boolOrDefault>
+EnableMachineSched("enable-misched", cl::Hidden,
+    cl::desc("Enable the machine instruction scheduling pass."));
+static cl::opt<bool> EnableStrongPHIElim("strong-phi-elim", cl::Hidden,
+    cl::desc("Use strong PHI elimination."));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+    cl::Hidden,
+    cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+    cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+    cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+    cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
+    cl::desc("Disable Copy Propagation pass"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+    cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+    cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+    cl::desc("Dump garbage collector data"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+    cl::desc("Verify generated machine code"),
+    cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+
+/// Allow standard passes to be disabled by command line options. This supports
+/// simple binary flags that either suppress the pass or do nothing.
+/// i.e. -disable-mypass=false has no effect.
+/// These should be converted to boolOrDefault in order to use applyOverride.
+static AnalysisID applyDisable(AnalysisID ID, bool Override) {
+  if (Override)
+    return &NoPassID;
+  return ID;
+}
+
+/// Allow Pass selection to be overriden by command line options. This supports
+/// flags with ternary conditions. TargetID is passed through by default. The
+/// pass is suppressed when the option is false. When the option is true, the
+/// StandardID is selected if the target provides no default.
+static AnalysisID applyOverride(AnalysisID TargetID, cl::boolOrDefault Override,
+                                AnalysisID StandardID) {
+  switch (Override) {
+  case cl::BOU_UNSET:
+    return TargetID;
+  case cl::BOU_TRUE:
+    if (TargetID != &NoPassID)
+      return TargetID;
+    if (StandardID == &NoPassID)
+      report_fatal_error("Target cannot enable pass");
+    return StandardID;
+  case cl::BOU_FALSE:
+    return &NoPassID;
+  }
+  llvm_unreachable("Invalid command line option state");
+}
+
+/// Allow standard passes to be disabled by the command line, regardless of who
+/// is adding the pass.
+///
+/// StandardID is the pass identified in the standard pass pipeline and provided
+/// to addPass(). It may be a target-specific ID in the case that the target
+/// directly adds its own pass, but in that case we harmlessly fall through.
+///
+/// TargetID is the pass that the target has configured to override StandardID.
+///
+/// StandardID may be a pseudo ID. In that case TargetID is the name of the real
+/// pass to run. This allows multiple options to control a single pass depending
+/// on where in the pipeline that pass is added.
+static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) {
+  if (StandardID == &PostRASchedulerID)
+    return applyDisable(TargetID, DisablePostRA);
+
+  if (StandardID == &BranchFolderPassID)
+    return applyDisable(TargetID, DisableBranchFold);
+
+  if (StandardID == &TailDuplicateID)
+    return applyDisable(TargetID, DisableTailDuplicate);
+
+  if (StandardID == &TargetPassConfig::EarlyTailDuplicateID)
+    return applyDisable(TargetID, DisableEarlyTailDup);
+
+  if (StandardID == &MachineBlockPlacementID)
+    return applyDisable(TargetID, DisableCodePlace);
+
+  if (StandardID == &CodePlacementOptID)
+    return applyDisable(TargetID, DisableCodePlace);
+
+  if (StandardID == &StackSlotColoringID)
+    return applyDisable(TargetID, DisableSSC);
+
+  if (StandardID == &DeadMachineInstructionElimID)
+    return applyDisable(TargetID, DisableMachineDCE);
+
+  if (StandardID == &MachineLICMID)
+    return applyDisable(TargetID, DisableMachineLICM);
+
+  if (StandardID == &MachineCSEID)
+    return applyDisable(TargetID, DisableMachineCSE);
+
+  if (StandardID == &MachineSchedulerID)
+    return applyOverride(TargetID, EnableMachineSched, StandardID);
+
+  if (StandardID == &TargetPassConfig::PostRAMachineLICMID)
+    return applyDisable(TargetID, DisablePostRAMachineLICM);
+
+  if (StandardID == &MachineSinkingID)
+    return applyDisable(TargetID, DisableMachineSink);
+
+  if (StandardID == &MachineCopyPropagationID)
+    return applyDisable(TargetID, DisableCopyProp);
+
+  return TargetID;
+}
+
 //===---------------------------------------------------------------------===//
+/// TargetPassConfig
+//===---------------------------------------------------------------------===//
+
+INITIALIZE_PASS(TargetPassConfig, "targetpassconfig",
+                "Target Pass Configuration", false, false)
+char TargetPassConfig::ID = 0;
+
+static char NoPassIDAnchor = 0;
+char &llvm::NoPassID = NoPassIDAnchor;
+
+// Pseudo Pass IDs.
+char TargetPassConfig::EarlyTailDuplicateID = 0;
+char TargetPassConfig::PostRAMachineLICMID = 0;
+
+namespace llvm {
+class PassConfigImpl {
+public:
+  // List of passes explicitly substituted by this target. Normally this is
+  // empty, but it is a convenient way to suppress or replace specific passes
+  // that are part of a standard pass pipeline without overridding the entire
+  // pipeline. This mechanism allows target options to inherit a standard pass's
+  // user interface. For example, a target may disable a standard pass by
+  // default by substituting NoPass, and the user may still enable that standard
+  // pass with an explicit command line option.
+  DenseMap<AnalysisID,AnalysisID> TargetPasses;
+};
+} // namespace llvm
+
+// Out of line virtual method.
+TargetPassConfig::~TargetPassConfig() {
+  delete Impl;
+}
+
+// Out of line constructor provides default values for pass options and
+// registers all common codegen passes.
+TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
+  : ImmutablePass(ID), TM(tm), PM(pm), Impl(0), Initialized(false),
+    DisableVerify(false),
+    EnableTailMerge(true) {
+
+  Impl = new PassConfigImpl();
+
+  // Register all target independent codegen passes to activate their PassIDs,
+  // including this pass itself.
+  initializeCodeGen(*PassRegistry::getPassRegistry());
+
+  // Substitute Pseudo Pass IDs for real ones.
+  substitutePass(EarlyTailDuplicateID, TailDuplicateID);
+  substitutePass(PostRAMachineLICMID, MachineLICMID);
+
+  // Temporarily disable experimental passes.
+  substitutePass(MachineSchedulerID, NoPassID);
+}
+
+/// createPassConfig - Create a pass configuration object to be used by
+/// addPassToEmitX methods for generating a pipeline of CodeGen passes.
+///
+/// Targets may override this to extend TargetPassConfig.
+TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new TargetPassConfig(this, PM);
+}
+
+TargetPassConfig::TargetPassConfig()
+  : ImmutablePass(ID), PM(*(PassManagerBase*)0) {
+  llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
+}
+
+// Helper to verify the analysis is really immutable.
+void TargetPassConfig::setOpt(bool &Opt, bool Val) {
+  assert(!Initialized && "PassConfig is immutable");
+  Opt = Val;
+}
+
+void TargetPassConfig::substitutePass(char &StandardID, char &TargetID) {
+  Impl->TargetPasses[&StandardID] = &TargetID;
+}
+
+AnalysisID TargetPassConfig::getPassSubstitution(AnalysisID ID) const {
+  DenseMap<AnalysisID, AnalysisID>::const_iterator
+    I = Impl->TargetPasses.find(ID);
+  if (I == Impl->TargetPasses.end())
+    return ID;
+  return I->second;
+}
+
+/// Add a CodeGen pass at this point in the pipeline after checking for target
+/// and command line overrides.
+AnalysisID TargetPassConfig::addPass(char &ID) {
+  assert(!Initialized && "PassConfig is immutable");
+
+  AnalysisID TargetID = getPassSubstitution(&ID);
+  AnalysisID FinalID = overridePass(&ID, TargetID);
+  if (FinalID == &NoPassID)
+    return FinalID;
+
+  Pass *P = Pass::createPass(FinalID);
+  if (!P)
+    llvm_unreachable("Pass ID not registered");
+  PM.add(P);
+  return FinalID;
+}
+
+void TargetPassConfig::printAndVerify(const char *Banner) const {
+  if (TM->shouldPrintMachineCode())
+    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+  if (VerifyMachineCode)
+    PM.add(createMachineVerifierPass(Banner));
+}
+
+/// Add common target configurable passes that perform LLVM IR to IR transforms
+/// following machine independent optimization.
+void TargetPassConfig::addIRPasses() {
+  // Basic AliasAnalysis support.
+  // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+  // BasicAliasAnalysis wins if they disagree. This is intended to help
+  // support "obvious" type-punning idioms.
+  PM.add(createTypeBasedAliasAnalysisPass());
+  PM.add(createBasicAliasAnalysisPass());
+
+  // Before running any passes, run the verifier to determine if the input
+  // coming from the front-end and/or optimizer is valid.
+  if (!DisableVerify)
+    PM.add(createVerifierPass());
+
+  // Run loop strength reduction before anything else.
+  if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
+    PM.add(createLoopStrengthReducePass(getTargetLowering()));
+    if (PrintLSR)
+      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+  }
+
+  PM.add(createGCLoweringPass());
+
+  // Make sure that no unreachable blocks are instruction selected.
+  PM.add(createUnreachableBlockEliminationPass());
+}
+
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
+  if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
+    PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+  PM.add(createStackProtectorPass(getTargetLowering()));
+
+  addPreISel();
+
+  if (PrintISelInput)
+    PM.add(createPrintFunctionPass("\n\n"
+                                   "*** Final LLVM Code input to ISel ***\n",
+                                   &dbgs()));
+
+  // All passes which modify the LLVM IR are now complete; run the verifier
+  // to ensure that the IR is valid.
+  if (!DisableVerify)
+    PM.add(createVerifierPass());
+}
+
+/// Add the complete set of target-independent postISel code generator passes.
 ///
-/// RegisterRegAlloc class - Track the registration of register allocators.
+/// This can be read as the standard order of major LLVM CodeGen stages. Stages
+/// with nontrivial configuration or multiple passes are broken out below in
+/// add%Stage routines.
 ///
+/// Any TargetPassConfig::addXX routine may be overriden by the Target. The
+/// addPre/Post methods with empty header implementations allow injecting
+/// target-specific fixups just before or after major stages. Additionally,
+/// targets have the flexibility to change pass order within a stage by
+/// overriding default implementation of add%Stage routines below. Each
+/// technique has maintainability tradeoffs because alternate pass orders are
+/// not well supported. addPre/Post works better if the target pass is easily
+/// tied to a common pass. But if it has subtle dependencies on multiple passes,
+/// the target should override the stage instead.
+///
+/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection
+/// before/after any target-independent pass. But it's currently overkill.
+void TargetPassConfig::addMachinePasses() {
+  // Print the instruction selected machine code...
+  printAndVerify("After Instruction Selection");
+
+  // Expand pseudo-instructions emitted by ISel.
+  addPass(ExpandISelPseudosID);
+
+  // Add passes that optimize machine instructions in SSA form.
+  if (getOptLevel() != CodeGenOpt::None) {
+    addMachineSSAOptimization();
+  }
+  else {
+    // If the target requests it, assign local variables to stack slots relative
+    // to one another and simplify frame index references where possible.
+    addPass(LocalStackSlotAllocationID);
+  }
+
+  // Run pre-ra passes.
+  if (addPreRegAlloc())
+    printAndVerify("After PreRegAlloc passes");
+
+  // Run register allocation and passes that are tightly coupled with it,
+  // including phi elimination and scheduling.
+  if (getOptimizeRegAlloc())
+    addOptimizedRegAlloc(createRegAllocPass(true));
+  else
+    addFastRegAlloc(createRegAllocPass(false));
+
+  // Run post-ra passes.
+  if (addPostRegAlloc())
+    printAndVerify("After PostRegAlloc passes");
+
+  // Insert prolog/epilog code.  Eliminate abstract frame index references...
+  addPass(PrologEpilogCodeInserterID);
+  printAndVerify("After PrologEpilogCodeInserter");
+
+  /// Add passes that optimize machine instructions after register allocation.
+  if (getOptLevel() != CodeGenOpt::None)
+    addMachineLateOptimization();
+
+  // Expand pseudo instructions before second scheduling pass.
+  addPass(ExpandPostRAPseudosID);
+  printAndVerify("After ExpandPostRAPseudos");
+
+  // Run pre-sched2 passes.
+  if (addPreSched2())
+    printAndVerify("After PreSched2 passes");
+
+  // Second pass scheduler.
+  if (getOptLevel() != CodeGenOpt::None) {
+    addPass(PostRASchedulerID);
+    printAndVerify("After PostRAScheduler");
+  }
+
+  // GC
+  addPass(GCMachineCodeAnalysisID);
+  if (PrintGCInfo)
+    PM.add(createGCInfoPrinter(dbgs()));
+
+  // Basic block placement.
+  if (getOptLevel() != CodeGenOpt::None)
+    addBlockPlacement();
+
+  if (addPreEmitPass())
+    printAndVerify("After PreEmit passes");
+}
+
+/// Add passes that optimize machine instructions in SSA form.
+void TargetPassConfig::addMachineSSAOptimization() {
+  // Pre-ra tail duplication.
+  if (addPass(EarlyTailDuplicateID) != &NoPassID)
+    printAndVerify("After Pre-RegAlloc TailDuplicate");
+
+  // Optimize PHIs before DCE: removing dead PHI cycles may make more
+  // instructions dead.
+  addPass(OptimizePHIsID);
+
+  // If the target requests it, assign local variables to stack slots relative
+  // to one another and simplify frame index references where possible.
+  addPass(LocalStackSlotAllocationID);
+
+  // With optimization, dead code should already be eliminated. However
+  // there is one known exception: lowered code for arguments that are only
+  // used by tail calls, where the tail calls reuse the incoming stack
+  // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+  addPass(DeadMachineInstructionElimID);
+  printAndVerify("After codegen DCE pass");
+
+  addPass(MachineLICMID);
+  addPass(MachineCSEID);
+  addPass(MachineSinkingID);
+  printAndVerify("After Machine LICM, CSE and Sinking passes");
+
+  addPass(PeepholeOptimizerID);
+  printAndVerify("After codegen peephole optimization pass");
+}
+
+//===---------------------------------------------------------------------===//
+/// Register Allocation Pass Configuration
 //===---------------------------------------------------------------------===//
+
+bool TargetPassConfig::getOptimizeRegAlloc() const {
+  switch (OptimizeRegAlloc) {
+  case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None;
+  case cl::BOU_TRUE:  return true;
+  case cl::BOU_FALSE: return false;
+  }
+  llvm_unreachable("Invalid optimize-regalloc state");
+}
+
+/// RegisterRegAlloc's global Registry tracks allocator registration.
 MachinePassRegistry RegisterRegAlloc::Registry;
 
-static FunctionPass *createDefaultRegisterAllocator() { return 0; }
+/// A dummy default pass factory indicates whether the register allocator is
+/// overridden on the command line.
+static FunctionPass *useDefaultRegisterAllocator() { return 0; }
 static RegisterRegAlloc
 defaultRegAlloc("default",
                 "pick register allocator based on -O option",
-                createDefaultRegisterAllocator);
+                useDefaultRegisterAllocator);
 
-//===---------------------------------------------------------------------===//
-///
-/// RegAlloc command line options.
-///
-//===---------------------------------------------------------------------===//
+/// -regalloc=... command line option.
 static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
                RegisterPassParser<RegisterRegAlloc> >
 RegAlloc("regalloc",
-         cl::init(&createDefaultRegisterAllocator),
+         cl::init(&useDefaultRegisterAllocator),
          cl::desc("Register allocator to use"));
 
 
-//===---------------------------------------------------------------------===//
+/// Instantiate the default register allocator pass for this target for either
+/// the optimized or unoptimized allocation path. This will be added to the pass
+/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc
+/// in the optimized case.
 ///
-/// createRegisterAllocator - choose the appropriate register allocator.
+/// A target that uses the standard regalloc pass order for fast or optimized
+/// allocation may still override this for per-target regalloc
+/// selection. But -regalloc=... always takes precedence.
+FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) {
+  if (Optimized)
+    return createGreedyRegisterAllocator();
+  else
+    return createFastRegisterAllocator();
+}
+
+/// Find and instantiate the register allocation pass requested by this target
+/// at the current optimization level.  Different register allocators are
+/// defined as separate passes because they may require different analysis.
 ///
-//===---------------------------------------------------------------------===//
-FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
+/// This helper ensures that the regalloc= option is always available,
+/// even for targets that override the default allocator.
+///
+/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs,
+/// this can be folded into addPass.
+FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
   RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
 
+  // Initialize the global default.
   if (!Ctor) {
     Ctor = RegAlloc;
     RegisterRegAlloc::setDefault(RegAlloc);
   }
+  if (Ctor != useDefaultRegisterAllocator)
+    return Ctor();
 
-  // This forces linking of the linear scan register allocator,
-  // so -regalloc=linearscan still works in clang.
-  if (Ctor == createLinearScanRegisterAllocator)
-    return createLinearScanRegisterAllocator();
+  // With no -regalloc= override, ask the target for a regalloc pass.
+  return createTargetRegisterAllocator(Optimized);
+}
 
-  if (Ctor != createDefaultRegisterAllocator)
-    return Ctor();
+/// Add the minimum set of target-independent passes that are required for
+/// register allocation. No coalescing or scheduling.
+void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+  addPass(PHIEliminationID);
+  addPass(TwoAddressInstructionPassID);
 
-  // When the 'default' allocator is requested, pick one based on OptLevel.
-  switch (OptLevel) {
-  case CodeGenOpt::None:
-    return createFastRegisterAllocator();
-  default:
-    return createGreedyRegisterAllocator();
+  PM.add(RegAllocPass);
+  printAndVerify("After Register Allocation");
+}
+
+/// Add standard target-independent passes that are tightly coupled with
+/// optimized register allocation, including coalescing, machine instruction
+/// scheduling, and register allocation itself.
+void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+  // LiveVariables currently requires pure SSA form.
+  //
+  // FIXME: Once TwoAddressInstruction pass no longer uses kill flags,
+  // LiveVariables can be removed completely, and LiveIntervals can be directly
+  // computed. (We still either need to regenerate kill flags after regalloc, or
+  // preferably fix the scavenger to not depend on them).
+  addPass(LiveVariablesID);
+
+  // Add passes that move from transformed SSA into conventional SSA. This is a
+  // "copy coalescing" problem.
+  //
+  if (!EnableStrongPHIElim) {
+    // Edge splitting is smarter with machine loop info.
+    addPass(MachineLoopInfoID);
+    addPass(PHIEliminationID);
+  }
+  addPass(TwoAddressInstructionPassID);
+
+  // FIXME: Either remove this pass completely, or fix it so that it works on
+  // SSA form. We could modify LiveIntervals to be independent of this pass, But
+  // it would be even better to simply eliminate *all* IMPLICIT_DEFs before
+  // leaving SSA.
+  addPass(ProcessImplicitDefsID);
+
+  if (EnableStrongPHIElim)
+    addPass(StrongPHIEliminationID);
+
+  addPass(RegisterCoalescerID);
+
+  // PreRA instruction scheduling.
+  if (addPass(MachineSchedulerID) != &NoPassID)
+    printAndVerify("After Machine Scheduling");
+
+  // Add the selected register allocation pass.
+  PM.add(RegAllocPass);
+  printAndVerify("After Register Allocation");
+
+  // FinalizeRegAlloc is convenient until MachineInstrBundles is more mature,
+  // but eventually, all users of it should probably be moved to addPostRA and
+  // it can go away.  Currently, it's the intended place for targets to run
+  // FinalizeMachineBundles, because passes other than MachineScheduling an
+  // RegAlloc itself may not be aware of bundles.
+  if (addFinalizeRegAlloc())
+    printAndVerify("After RegAlloc finalization");
+
+  // Perform stack slot coloring and post-ra machine LICM.
+  //
+  // FIXME: Re-enable coloring with register when it's capable of adding
+  // kill markers.
+  addPass(StackSlotColoringID);
+
+  // Run post-ra machine LICM to hoist reloads / remats.
+  //
+  // FIXME: can this move into MachineLateOptimization?
+  addPass(PostRAMachineLICMID);
+
+  printAndVerify("After StackSlotColoring and postra Machine LICM");
+}
+
+//===---------------------------------------------------------------------===//
+/// Post RegAlloc Pass Configuration
+//===---------------------------------------------------------------------===//
+
+/// Add passes that optimize machine instructions after register allocation.
+void TargetPassConfig::addMachineLateOptimization() {
+  // Branch folding must be run after regalloc and prolog/epilog insertion.
+  if (addPass(BranchFolderPassID) != &NoPassID)
+    printAndVerify("After BranchFolding");
+
+  // Tail duplication.
+  if (addPass(TailDuplicateID) != &NoPassID)
+    printAndVerify("After TailDuplicate");
+
+  // Copy propagation.
+  if (addPass(MachineCopyPropagationID) != &NoPassID)
+    printAndVerify("After copy propagation pass");
+}
+
+/// Add standard basic block placement passes.
+void TargetPassConfig::addBlockPlacement() {
+  AnalysisID ID = &NoPassID;
+  if (EnableBlockPlacement) {
+    // MachineBlockPlacement is an experimental pass which is disabled by
+    // default currently. Eventually it should subsume CodePlacementOpt, so
+    // when enabled, the other is disabled.
+    ID = addPass(MachineBlockPlacementID);
+  } else {
+    ID = addPass(CodePlacementOptID);
+  }
+  if (ID != &NoPassID) {
+    // Run a separate pass to collect block placement statistics.
+    if (EnableBlockPlacementStats)
+      addPass(MachineBlockPlacementStatsID);
+
+    printAndVerify("After machine block placement.");
   }
 }
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index bbc7ce2d0a42..9c5c029000c0 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -39,7 +39,7 @@
 //   =>
 //     v1 = bitcast v0
 //        = v0
-// 
+//
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "peephole-opt"
@@ -68,7 +68,7 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
 STATISTIC(NumReuse,      "Number of extension results reused");
 STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
 STATISTIC(NumCmps,       "Number of compares eliminated");
-STATISTIC(NumImmFold,    "Number of move immediate foled");
+STATISTIC(NumImmFold,    "Number of move immediate folded");
 
 namespace {
   class PeepholeOptimizer : public MachineFunctionPass {
@@ -109,22 +109,19 @@ namespace {
 }
 
 char PeepholeOptimizer::ID = 0;
+char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
                 "Peephole Optimizations", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
                 "Peephole Optimizations", false, false)
 
-FunctionPass *llvm::createPeepholeOptimizerPass() {
-  return new PeepholeOptimizer();
-}
-
 /// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
 /// a single register and writes a single register and it does not modify the
 /// source, and if the source value is preserved as a sub-register of the
 /// result, then replace all reachable uses of the source with the subreg of the
 /// result.
-/// 
+///
 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
 /// the code. Since this code does not currently share EXTRACTs, just ignore all
 /// debug uses.
@@ -134,7 +131,7 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
   unsigned SrcReg, DstReg, SubIdx;
   if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
     return false;
-  
+
   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
       TargetRegisterInfo::isPhysicalRegister(SrcReg))
     return false;
@@ -240,6 +237,10 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
       if (PHIBBs.count(UseMBB))
         continue;
 
+      // About to add uses of DstReg, clear DstReg's kill flags.
+      if (!Changed)
+        MRI->clearKillFlags(DstReg);
+
       unsigned NewVR = MRI->createVirtualRegister(RC);
       BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
               TII->get(TargetOpcode::COPY), NewVR)
@@ -292,7 +293,7 @@ bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
   assert(Def && Src && "Malformed bitcast instruction!");
 
   MachineInstr *DefMI = MRI->getVRegDef(Src);
-  if (!DefMI || !DefMI->getDesc().isBitcast())
+  if (!DefMI || !DefMI->isBitcast())
     return false;
 
   unsigned SrcSrc = 0;
@@ -353,7 +354,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
                                         SmallSet<unsigned, 4> &ImmDefRegs,
                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
   const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isMoveImmediate())
+  if (!MI->isMoveImmediate())
     return false;
   if (MCID.getNumDefs() != 1)
     return false;
@@ -363,7 +364,7 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
     ImmDefRegs.insert(Reg);
     return true;
   }
-  
+
   return false;
 }
 
@@ -395,7 +396,7 @@ bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   if (DisablePeephole)
     return false;
-  
+
   TM  = &MF.getTarget();
   TII = TM->getInstrInfo();
   MRI = &MF.getRegInfo();
@@ -408,7 +409,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   DenseMap<unsigned, MachineInstr*> ImmDefMIs;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     MachineBasicBlock *MBB = &*I;
-    
+
     bool SeenMoveImm = false;
     LocalMIs.clear();
     ImmDefRegs.clear();
@@ -428,17 +429,15 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
-      const MCInstrDesc &MCID = MI->getDesc();
-
-      if (MCID.isBitcast()) {
+      if (MI->isBitcast()) {
         if (OptimizeBitcastInstr(MI, MBB)) {
           // MI is deleted.
           LocalMIs.erase(MI);
           Changed = true;
           MII = First ? I->begin() : llvm::next(PMII);
           continue;
-        }        
-      } else if (MCID.isCompare()) {
+        }
+      } else if (MI->isCompare()) {
         if (OptimizeCmpInstr(MI, MBB)) {
           // MI is deleted.
           LocalMIs.erase(MI);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index c73e87733cb4..24d3e5ab0c9d 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -23,7 +23,6 @@
 #include "AggressiveAntiDepBreaker.h"
 #include "CriticalAntiDepBreaker.h"
 #include "RegisterClassInfo.h"
-#include "ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
@@ -32,6 +31,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetLowering.h"
@@ -45,7 +45,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
-#include <set>
 using namespace llvm;
 
 STATISTIC(NumNoops, "Number of noops inserted");
@@ -82,16 +81,15 @@ namespace {
     AliasAnalysis *AA;
     const TargetInstrInfo *TII;
     RegisterClassInfo RegClassInfo;
-    CodeGenOpt::Level OptLevel;
 
   public:
     static char ID;
-    PostRAScheduler(CodeGenOpt::Level ol) :
-      MachineFunctionPass(ID), OptLevel(ol) {}
+    PostRAScheduler() : MachineFunctionPass(ID) {}
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<AliasAnalysis>();
+      AU.addRequired<TargetPassConfig>();
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
@@ -99,10 +97,6 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
-    const char *getPassName() const {
-      return "Post RA top-down list latency scheduler";
-    }
-
     bool runOnMachineFunction(MachineFunction &Fn);
   };
   char PostRAScheduler::ID = 0;
@@ -130,36 +124,49 @@ namespace {
     /// AA - AliasAnalysis for making memory reference queries.
     AliasAnalysis *AA;
 
-    /// KillIndices - The index of the most recent kill (proceding bottom-up),
-    /// or ~0u if the register is not live.
-    std::vector<unsigned> KillIndices;
+    /// LiveRegs - true if the register is live.
+    BitVector LiveRegs;
+
+    /// The schedule. Null SUnit*'s represent noop instructions.
+    std::vector<SUnit*> Sequence;
 
   public:
     SchedulePostRATDList(
       MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
       AliasAnalysis *AA, const RegisterClassInfo&,
       TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
-      SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+      SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs);
 
     ~SchedulePostRATDList();
 
-    /// StartBlock - Initialize register live-range state for scheduling in
+    /// startBlock - Initialize register live-range state for scheduling in
     /// this block.
     ///
-    void StartBlock(MachineBasicBlock *BB);
+    void startBlock(MachineBasicBlock *BB);
+
+    /// Initialize the scheduler state for the next scheduling region.
+    virtual void enterRegion(MachineBasicBlock *bb,
+                             MachineBasicBlock::iterator begin,
+                             MachineBasicBlock::iterator end,
+                             unsigned endcount);
+
+    /// Notify that the scheduler has finished scheduling the current region.
+    virtual void exitRegion();
 
     /// Schedule - Schedule the instruction range using list scheduling.
     ///
-    void Schedule();
+    void schedule();
+
+    void EmitSchedule();
 
     /// Observe - Update liveness information to account for the current
     /// instruction, which will not be scheduled.
     ///
     void Observe(MachineInstr *MI, unsigned Count);
 
-    /// FinishBlock - Clean up register live-range state.
+    /// finishBlock - Clean up register live-range state.
     ///
-    void FinishBlock();
+    void finishBlock();
 
     /// FixupKills - Fix register kill flags that have been made
     /// invalid due to scheduling
@@ -177,16 +184,23 @@ namespace {
     // adjustments may be made to the instruction if necessary. Return
     // true if the operand has been deleted, false if not.
     bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+
+    void dumpSchedule() const;
   };
 }
 
+char &llvm::PostRASchedulerID = PostRAScheduler::ID;
+
+INITIALIZE_PASS(PostRAScheduler, "post-RA-sched",
+                "Post RA top-down list latency scheduler", false, false)
+
 SchedulePostRATDList::SchedulePostRATDList(
   MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
   AliasAnalysis *AA, const RegisterClassInfo &RCI,
   TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
-  SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
-  : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
-    KillIndices(TRI->getNumRegs())
+  SmallVectorImpl<const TargetRegisterClass*> &CriticalPathRCs)
+  : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA),
+    LiveRegs(TRI->getNumRegs())
 {
   const TargetMachine &TM = MF.getTarget();
   const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
@@ -204,16 +218,48 @@ SchedulePostRATDList::~SchedulePostRATDList() {
   delete AntiDepBreak;
 }
 
+/// Initialize state associated with the next scheduling region.
+void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb,
+                 MachineBasicBlock::iterator begin,
+                 MachineBasicBlock::iterator end,
+                 unsigned endcount) {
+  ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
+  Sequence.clear();
+}
+
+/// Print the schedule before exiting the region.
+void SchedulePostRATDList::exitRegion() {
+  DEBUG({
+      dbgs() << "*** Final schedule ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
+  ScheduleDAGInstrs::exitRegion();
+}
+
+/// dumpSchedule - dump the scheduled Sequence.
+void SchedulePostRATDList::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(this);
+    else
+      dbgs() << "**** NOOP ****\n";
+  }
+}
+
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   TII = Fn.getTarget().getInstrInfo();
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
   AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+  TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
+
   RegClassInfo.runOnMachineFunction(Fn);
 
   // Check for explicit enable/disable of post-ra scheduling.
-  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
-  SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
+  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode =
+    TargetSubtargetInfo::ANTIDEP_NONE;
+  SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs;
   if (EnablePostRAScheduler.getPosition() > 0) {
     if (!EnablePostRAScheduler)
       return false;
@@ -221,7 +267,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     // Check that post-RA scheduling is enabled for this target.
     // This may upgrade the AntiDepMode.
     const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
-    if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
+    if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode,
+                                  CriticalPathRCs))
       return false;
   }
 
@@ -248,13 +295,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       static int bbcnt = 0;
       if (bbcnt++ % DebugDiv != DebugMod)
         continue;
-      dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
-        ":BB#" << MBB->getNumber() << " ***\n";
+      dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getName()
+             << ":BB#" << MBB->getNumber() << " ***\n";
     }
 #endif
 
     // Initialize register live-range state for scheduling in this block.
-    Scheduler.StartBlock(MBB);
+    Scheduler.startBlock(MBB);
 
     // Schedule each sequence of instructions not interrupted by a label
     // or anything else that effectively needs to shut down scheduling.
@@ -262,8 +309,13 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     unsigned Count = MBB->size(), CurrentCount = Count;
     for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
       MachineInstr *MI = llvm::prior(I);
-      if (TII->isSchedulingBoundary(MI, MBB, Fn)) {
-        Scheduler.Run(MBB, I, Current, CurrentCount);
+      // Calls are not scheduling boundaries before register allocation, but
+      // post-ra we don't gain anything by scheduling across calls since we
+      // don't need to worry about register pressure.
+      if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
+        Scheduler.enterRegion(MBB, I, Current, CurrentCount);
+        Scheduler.schedule();
+        Scheduler.exitRegion();
         Scheduler.EmitSchedule();
         Current = MI;
         CurrentCount = Count - 1;
@@ -271,15 +323,19 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       }
       I = MI;
       --Count;
+      if (MI->isBundle())
+        Count -= MI->getBundleSize();
     }
     assert(Count == 0 && "Instruction count mismatch!");
     assert((MBB->begin() == Current || CurrentCount != 0) &&
            "Instruction count mismatch!");
-    Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+    Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+    Scheduler.schedule();
+    Scheduler.exitRegion();
     Scheduler.EmitSchedule();
 
     // Clean up register live-range state.
-    Scheduler.FinishBlock();
+    Scheduler.finishBlock();
 
     // Update register kills
     Scheduler.FixupKills(MBB);
@@ -291,9 +347,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 /// StartBlock - Initialize register live-range state for scheduling in
 /// this block.
 ///
-void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
   // Call the superclass.
-  ScheduleDAGInstrs::StartBlock(BB);
+  ScheduleDAGInstrs::startBlock(BB);
 
   // Reset the hazard recognizer and anti-dep breaker.
   HazardRec->Reset();
@@ -303,14 +359,14 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
 
 /// Schedule - Schedule the instruction range using list scheduling.
 ///
-void SchedulePostRATDList::Schedule() {
+void SchedulePostRATDList::schedule() {
   // Build the scheduling graph.
-  BuildSchedGraph(AA);
+  buildSchedGraph(AA);
 
   if (AntiDepBreak != NULL) {
     unsigned Broken =
-      AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
-                                          InsertPosIndex, DbgValues);
+      AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
+                                          EndIndex, DbgValues);
 
     if (Broken != 0) {
       // We made changes. Update the dependency graph.
@@ -319,11 +375,8 @@ void SchedulePostRATDList::Schedule() {
       // the def's anti-dependence *and* output-dependence edges due to
       // that register, and add new anti-dependence and output-dependence
       // edges based on the next live range of the register.
-      SUnits.clear();
-      Sequence.clear();
-      EntrySU = SUnit();
-      ExitSU = SUnit();
-      BuildSchedGraph(AA);
+      ScheduleDAG::clearDAG();
+      buildSchedGraph(AA);
 
       NumFixedAnti += Broken;
     }
@@ -343,38 +396,36 @@ void SchedulePostRATDList::Schedule() {
 ///
 void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
   if (AntiDepBreak != NULL)
-    AntiDepBreak->Observe(MI, Count, InsertPosIndex);
+    AntiDepBreak->Observe(MI, Count, EndIndex);
 }
 
 /// FinishBlock - Clean up register live-range state.
 ///
-void SchedulePostRATDList::FinishBlock() {
+void SchedulePostRATDList::finishBlock() {
   if (AntiDepBreak != NULL)
     AntiDepBreak->FinishBlock();
 
   // Call the superclass.
-  ScheduleDAGInstrs::FinishBlock();
+  ScheduleDAGInstrs::finishBlock();
 }
 
 /// StartBlockForKills - Initialize register live-range state for updating kills
 ///
 void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
-  // Initialize the indices to indicate that no registers are live.
-  for (unsigned i = 0; i < TRI->getNumRegs(); ++i)
-    KillIndices[i] = ~0u;
+  // Start with no live registers.
+  LiveRegs.reset();
 
   // Determine the live-out physregs for this block.
-  if (!BB->empty() && BB->back().getDesc().isReturn()) {
+  if (!BB->empty() && BB->back().isReturn()) {
     // In a return block, examine the function live-out regs.
     for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
            E = MRI.liveout_end(); I != E; ++I) {
       unsigned Reg = *I;
-      KillIndices[Reg] = BB->size();
+      LiveRegs.set(Reg);
       // Repeat, for all subregs.
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-           *Subreg; ++Subreg) {
-        KillIndices[*Subreg] = BB->size();
-      }
+      for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg)
+        LiveRegs.set(*Subreg);
     }
   }
   else {
@@ -384,12 +435,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
       for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
              E = (*SI)->livein_end(); I != E; ++I) {
         unsigned Reg = *I;
-        KillIndices[Reg] = BB->size();
+        LiveRegs.set(Reg);
         // Repeat, for all subregs.
-        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-             *Subreg; ++Subreg) {
-          KillIndices[*Subreg] = BB->size();
-        }
+        for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg)
+          LiveRegs.set(*Subreg);
       }
     }
   }
@@ -404,7 +454,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
   }
 
   // If MO itself is live, clear the kill flag...
-  if (KillIndices[MO.getReg()] != ~0u) {
+  if (LiveRegs.test(MO.getReg())) {
     MO.setIsKill(false);
     return false;
   }
@@ -414,9 +464,9 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
   MO.setIsKill(false);
   bool AllDead = true;
   const unsigned SuperReg = MO.getReg();
-  for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
+  for (const uint16_t *Subreg = TRI->getSubRegisters(SuperReg);
        *Subreg; ++Subreg) {
-    if (KillIndices[*Subreg] != ~0u) {
+    if (LiveRegs.test(*Subreg)) {
       MI->addOperand(MachineOperand::CreateReg(*Subreg,
                                                true  /*IsDef*/,
                                                true  /*IsImp*/,
@@ -437,7 +487,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
 void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
 
-  std::set<unsigned> killedRegs;
+  BitVector killedRegs(TRI->getNumRegs());
   BitVector ReservedRegs = TRI->getReservedRegs(MF);
 
   StartBlockForKills(MBB);
@@ -455,6 +505,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
     // are completely defined.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
+      if (MO.isRegMask())
+        LiveRegs.clearBitsNotInMask(MO.getRegMask());
       if (!MO.isReg()) continue;
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
@@ -462,19 +514,18 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       // Ignore two-addr defs.
       if (MI->isRegTiedToUseOperand(i)) continue;
 
-      KillIndices[Reg] = ~0u;
+      LiveRegs.reset(Reg);
 
       // Repeat for all subregs.
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-           *Subreg; ++Subreg) {
-        KillIndices[*Subreg] = ~0u;
-      }
+      for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg)
+        LiveRegs.reset(*Subreg);
     }
 
     // Examine all used registers and set/clear kill flag. When a
     // register is used multiple times we only set the kill flag on
     // the first use.
-    killedRegs.clear();
+    killedRegs.reset();
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
       if (!MO.isReg() || !MO.isUse()) continue;
@@ -482,12 +533,12 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
 
       bool kill = false;
-      if (killedRegs.find(Reg) == killedRegs.end()) {
+      if (!killedRegs.test(Reg)) {
         kill = true;
         // A register is not killed if any subregs are live...
-        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+        for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
              *Subreg; ++Subreg) {
-          if (KillIndices[*Subreg] != ~0u) {
+          if (LiveRegs.test(*Subreg)) {
             kill = false;
             break;
           }
@@ -496,7 +547,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
         // If subreg is not live, then register is killed if it became
         // live in this instruction
         if (kill)
-          kill = (KillIndices[Reg] == ~0u);
+          kill = !LiveRegs.test(Reg);
       }
 
       if (MO.isKill() != kill) {
@@ -506,7 +557,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
         DEBUG(MI->dump());
       }
 
-      killedRegs.insert(Reg);
+      killedRegs.set(Reg);
     }
 
     // Mark any used register (that is not using undef) and subregs as
@@ -517,12 +568,11 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       unsigned Reg = MO.getReg();
       if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
 
-      KillIndices[Reg] = Count;
+      LiveRegs.set(Reg);
 
-      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
-           *Subreg; ++Subreg) {
-        KillIndices[*Subreg] = Count;
-      }
+      for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg)
+        LiveRegs.set(*Subreg);
     }
   }
 }
@@ -585,7 +635,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
 
   ReleaseSuccessors(SU);
   SU->isScheduled = true;
-  AvailableQueue.ScheduledNode(SU);
+  AvailableQueue.scheduledNode(SU);
 }
 
 /// ListScheduleTopDown - The main loop of list scheduling for top-down
@@ -699,14 +749,46 @@ void SchedulePostRATDList::ListScheduleTopDown() {
   }
 
 #ifndef NDEBUG
-  VerifySchedule(/*isBottomUp=*/false);
-#endif
+  unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false);
+  unsigned Noops = 0;
+  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+    if (!Sequence[i])
+      ++Noops;
+  assert(Sequence.size() - Noops == ScheduledNodes &&
+         "The number of nodes scheduled doesn't match the expected number!");
+#endif // NDEBUG
 }
 
-//===----------------------------------------------------------------------===//
-//                         Public Constructor Functions
-//===----------------------------------------------------------------------===//
+// EmitSchedule - Emit the machine code in scheduled order.
+void SchedulePostRATDList::EmitSchedule() {
+  RegionBegin = RegionEnd;
+
+  // If first instruction was a DBG_VALUE then put it back.
+  if (FirstDbgValue)
+    BB->splice(RegionEnd, BB, FirstDbgValue);
+
+  // Then re-insert them according to the given schedule.
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      BB->splice(RegionEnd, BB, SU->getInstr());
+    else
+      // Null SUnit* is a noop.
+      TII->insertNoop(*BB, RegionEnd);
+
+    // Update the Begin iterator, as the first instruction in the block
+    // may have been scheduled later.
+    if (i == 0)
+      RegionBegin = prior(RegionEnd);
+  }
 
-FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) {
-  return new PostRAScheduler(OptLevel);
+  // Reinsert any remaining debug_values.
+  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
+         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
+    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
+    MachineInstr *DbgValue = P.first;
+    MachineBasicBlock::iterator OrigPrivMI = P.second;
+    BB->splice(++OrigPrivMI, BB, DbgValue);
+  }
+  DbgValues.clear();
+  FirstDbgValue = NULL;
 }
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index b1d8c9760225..1ad3479afb4c 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,6 +26,8 @@
 using namespace llvm;
 
 char ProcessImplicitDefs::ID = 0;
+char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID;
+
 INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
                 "Process Implicit Definitions", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveVariables)
@@ -36,7 +38,6 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addPreserved<AliasAnalysis>();
   AU.addPreserved<LiveVariables>();
-  AU.addRequired<LiveVariables>();
   AU.addPreservedID(MachineLoopInfoID);
   AU.addPreservedID(MachineDominatorsID);
   AU.addPreservedID(TwoAddressInstructionPassID);
@@ -50,10 +51,10 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
                                             SmallSet<unsigned, 8> &ImpDefRegs) {
   switch(OpIdx) {
   case 1:
-    return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
+    return MI->isCopy() && (!MI->getOperand(0).readsReg() ||
                             ImpDefRegs.count(MI->getOperand(0).getReg()));
   case 2:
-    return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
+    return MI->isSubregToReg() && (!MI->getOperand(0).readsReg() ||
                                   ImpDefRegs.count(MI->getOperand(0).getReg()));
   default: return false;
   }
@@ -66,7 +67,7 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
     MachineOperand &MO1 = MI->getOperand(1);
     if (MO1.getReg() != Reg)
       return false;
-    if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
+    if (!MO0.readsReg() || ImpDefRegs.count(MO0.getReg()))
       return true;
     return false;
   }
@@ -87,7 +88,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
   TII = fn.getTarget().getInstrInfo();
   TRI = fn.getTarget().getRegisterInfo();
   MRI = &fn.getRegInfo();
-  LV = &getAnalysis<LiveVariables>();
+  LV = getAnalysisIfAvailable<LiveVariables>();
 
   SmallSet<unsigned, 8> ImpDefRegs;
   SmallVector<MachineInstr*, 8> ImpDefMIs;
@@ -105,23 +106,24 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       MachineInstr *MI = &*I;
       ++I;
       if (MI->isImplicitDef()) {
-        if (MI->getOperand(0).getSubReg())
+        ImpDefMIs.push_back(MI);
+        // Is this a sub-register read-modify-write?
+        if (MI->getOperand(0).readsReg())
           continue;
         unsigned Reg = MI->getOperand(0).getReg();
         ImpDefRegs.insert(Reg);
         if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-          for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+          for (const uint16_t *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
             ImpDefRegs.insert(*SS);
         }
-        ImpDefMIs.push_back(MI);
         continue;
       }
 
       // Eliminate %reg1032:sub<def> = COPY undef.
-      if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
+      if (MI->isCopy() && MI->getOperand(0).readsReg()) {
         MachineOperand &MO = MI->getOperand(1);
         if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
-          if (MO.isKill()) {
+          if (LV && MO.isKill()) {
             LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg());
             vi.removeKill(MI);
           }
@@ -140,7 +142,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
       bool ChangedToImpDef = false;
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand& MO = MI->getOperand(i);
-        if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef())
+        if (!MO.isReg() || !MO.readsReg())
           continue;
         unsigned Reg = MO.getReg();
         if (!Reg)
@@ -155,8 +157,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
             MI->RemoveOperand(j);
           if (isKill) {
             ImpDefRegs.erase(Reg);
-            LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
-            vi.removeKill(MI);
+            if (LV) {
+              LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
+              vi.removeKill(MI);
+            }
           }
           ChangedToImpDef = true;
           Changed = true;
@@ -172,10 +176,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
           continue;
         }
         if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
-          // Make sure other uses of 
+          // Make sure other reads of Reg are also marked <undef>.
           for (unsigned j = i+1; j != e; ++j) {
             MachineOperand &MOJ = MI->getOperand(j);
-            if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+            if (MOJ.isReg() && MOJ.getReg() == Reg && MOJ.readsReg())
               MOJ.setIsUndef();
           }
           ImpDefRegs.erase(Reg);
@@ -265,7 +269,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
           }
 
           // Update LiveVariables varinfo if the instruction is a kill.
-          if (isKill) {
+          if (LV && isKill) {
             LiveVariables::VarInfo& vi = LV->getVarInfo(Reg);
             vi.removeKill(RMI);
           }
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 32c932552bed..458915ea5d93 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -45,24 +45,22 @@
 using namespace llvm;
 
 char PEI::ID = 0;
+char &llvm::PrologEpilogCodeInserterID = PEI::ID;
 
 INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
                 "Prologue/Epilogue Insertion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
 INITIALIZE_PASS_END(PEI, "prologepilog",
-                "Prologue/Epilogue Insertion", false, false)
+                    "Prologue/Epilogue Insertion & Frame Finalization",
+                    false, false)
 
 STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
 STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
 STATISTIC(NumBytesStackSpace,
           "Number of bytes used for stack in all functions");
 
-/// createPrologEpilogCodeInserter - This function returns a pass that inserts
-/// prolog and epilog code, and eliminates abstract frame references.
-///
-FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
-
 /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
 /// frame indexes with appropriate references.
 ///
@@ -71,6 +69,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
   const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
 
+  assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
+
   RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
 
@@ -125,6 +125,9 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
     scavengeFrameVirtualRegs(Fn);
 
+  // Clear any vregs created by virtual scavenging.
+  Fn.getRegInfo().clearVirtRegs();
+
   delete RS;
   clearAllSets();
   return true;
@@ -207,7 +210,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   MachineFrameInfo *MFI = Fn.getFrameInfo();
 
   // Get the callee saved register list...
-  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
 
   // These are used to keep track the callee-save area. Initialize them.
   MinCSFrameIndex = INT_MAX;
@@ -224,17 +227,9 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
   std::vector<CalleeSavedInfo> CSI;
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
-    if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+    if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
       // If the reg is modified, save it!
       CSI.push_back(CalleeSavedInfo(Reg));
-    } else {
-      for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
-           *AliasSet; ++AliasSet) {  // Check alias registers too.
-        if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
-          CSI.push_back(CalleeSavedInfo(Reg));
-          break;
-        }
-      }
     }
   }
 
@@ -332,7 +327,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
       // Skip over all terminator instructions, which are part of the return
       // sequence.
       MachineBasicBlock::iterator I2 = I;
-      while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+      while (I2 != MBB->begin() && (--I2)->isTerminator())
         I = I2;
 
       bool AtStart = I == MBB->begin();
@@ -426,11 +421,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
 
       // Skip over all terminator instructions, which are part of the
       // return sequence.
-      if (! I->getDesc().isTerminator()) {
+      if (! I->isTerminator()) {
         ++I;
       } else {
         MachineBasicBlock::iterator I2 = I;
-        while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+        while (I2 != MBB->begin() && (--I2)->isTerminator())
           I = I2;
       }
     }
@@ -698,7 +693,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   // Add epilogue to restore the callee-save registers in each exiting block
   for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
     // If last instruction is a return instruction, add an epilogue
-    if (!I->empty() && I->back().getDesc().isReturn())
+    if (!I->empty() && I->back().isReturn())
       TFI.emitEpilogue(Fn, *I);
   }
 
@@ -706,7 +701,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   // we've been asked for it.  This, when linked with a runtime with support
   // for segmented stacks (libgcc is one), will result in allocating stack
   // space in small chunks instead of one large contiguous block.
-  if (EnableSegmentedStacks)
+  if (Fn.getTarget().Options.EnableSegmentedStacks)
     TFI.adjustForSegmentedStacks(Fn);
 }
 
@@ -813,6 +808,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
 /// scavengeFrameVirtualRegs - Replace all frame index virtual registers
 /// with physical registers. Use the register scavenger to find an
 /// appropriate register to use.
+///
+/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
+/// iterate over the vreg use list, which at this point only contains machine
+/// operands for which eliminateFrameIndex need a new scratch reg.
 void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
   // Run through the instructions and find any virtual registers.
   for (MachineFunction::iterator BB = Fn.begin(),
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index e2391591ad06..0d140a9bb481 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -40,10 +40,6 @@ namespace llvm {
       initializePEIPass(*PassRegistry::getPassRegistry());
     }
 
-    const char *getPassName() const {
-      return "Prolog/Epilog Insertion & Frame Finalization";
-    }
-
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
 
     /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 73b66d868f3d..49599b3ab980 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -87,7 +87,6 @@ bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
       this == getJumpTable())
     return true;
   llvm_unreachable("Unknown PseudoSourceValue!");
-  return false;
 }
 
 bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
@@ -97,7 +96,6 @@ bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
       this == getJumpTable())
     return false;
   llvm_unreachable("Unknown PseudoSourceValue!");
-  return true;
 }
 
 bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
new file mode 100644
index 000000000000..b00eceb17f11
--- /dev/null
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -0,0 +1,280 @@
+//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class which provides comon functionality
+// for LiveIntervalUnion-based register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+STATISTIC(NumAssigned     , "Number of registers assigned");
+STATISTIC(NumUnassigned   , "Number of registers unassigned");
+STATISTIC(NumNewQueued    , "Number of new live ranges queued");
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+               cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+  LiveVirtRegBitSet VisitedVRegs;
+  OwningArrayPtr<LiveVirtRegBitSet>
+    unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+  // Verify disjoint unions.
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+    LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+    PhysReg2LiveUnion[PhysReg].verify(VRegs);
+    // Union + intersection test could be done efficiently in one pass, but
+    // don't add a method to SparseBitVector unless we really need it.
+    assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+    VisitedVRegs |= VRegs;
+  }
+
+  // Verify vreg coverage.
+  for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+       liItr != liEnd; ++liItr) {
+    unsigned reg = liItr->first;
+    if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+    if (!VRM->hasPhys(reg)) continue; // spilled?
+    unsigned PhysReg = VRM->getPhys(reg);
+    if (!unionVRegs[PhysReg].test(reg)) {
+      dbgs() << "LiveVirtReg " << reg << " not in union " <<
+        TRI->getName(PhysReg) << "\n";
+      llvm_unreachable("unallocated live vreg");
+    }
+  }
+  // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+//                         RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+                                        unsigned NRegs) {
+  NumRegs = NRegs;
+  Array =
+    static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+  for (unsigned r = 0; r != NRegs; ++r)
+    new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+  NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+  TRI = &vrm.getTargetRegInfo();
+  MRI = &vrm.getRegInfo();
+  VRM = &vrm;
+  LIS = &lis;
+  MRI->freezeReservedRegs(vrm.getMachineFunction());
+  RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
+
+  const unsigned NumRegs = TRI->getNumRegs();
+  if (NumRegs != PhysReg2LiveUnion.numRegs()) {
+    PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
+    // Cache an interferece query for each physical reg
+    Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+  }
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+  if (!Array)
+    return;
+  for (unsigned r = 0; r != NumRegs; ++r)
+    Array[r].~LiveIntervalUnion();
+  free(Array);
+  NumRegs =  0;
+  Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+  for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
+    PhysReg2LiveUnion[r].clear();
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+  NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+  for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+    unsigned RegNum = I->first;
+    LiveInterval &VirtReg = *I->second;
+    if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+      PhysReg2LiveUnion[RegNum].unify(VirtReg);
+    else
+      enqueue(&VirtReg);
+  }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+               << " to " << PrintReg(PhysReg, TRI) << '\n');
+  assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+  VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+  MRI->setPhysRegUsed(PhysReg);
+  PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+  ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+               << " from " << PrintReg(PhysReg, TRI) << '\n');
+  assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+  PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+  VRM->clearVirt(VirtReg.reg);
+  ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+  seedLiveRegs();
+
+  // Continue assigning vregs one at a time to available physical registers.
+  while (LiveInterval *VirtReg = dequeue()) {
+    assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
+
+    // Unused registers can appear when the spiller coalesces snippets.
+    if (MRI->reg_nodbg_empty(VirtReg->reg)) {
+      DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
+      LIS->removeInterval(VirtReg->reg);
+      continue;
+    }
+
+    // Invalidate all interference queries, live ranges could have changed.
+    invalidateVirtRegs();
+
+    // selectOrSplit requests the allocator to return an available physical
+    // register if possible and populate a list of new live intervals that
+    // result from splitting.
+    DEBUG(dbgs() << "\nselectOrSplit "
+                 << MRI->getRegClass(VirtReg->reg)->getName()
+                 << ':' << *VirtReg << '\n');
+    typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+    VirtRegVec SplitVRegs;
+    unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+    if (AvailablePhysReg == ~0u) {
+      // selectOrSplit failed to find a register!
+      const char *Msg = "ran out of registers during register allocation";
+      // Probably caused by an inline asm.
+      MachineInstr *MI;
+      for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
+           (MI = I.skipInstruction());)
+        if (MI->isInlineAsm())
+          break;
+      if (MI)
+        MI->emitError(Msg);
+      else
+        report_fatal_error(Msg);
+      // Keep going after reporting the error.
+      VRM->assignVirt2Phys(VirtReg->reg,
+                 RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+      continue;
+    }
+
+    if (AvailablePhysReg)
+      assign(*VirtReg, AvailablePhysReg);
+
+    for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+         I != E; ++I) {
+      LiveInterval *SplitVirtReg = *I;
+      assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
+      if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
+        DEBUG(dbgs() << "not queueing unused  " << *SplitVirtReg << '\n');
+        LIS->removeInterval(SplitVirtReg->reg);
+        continue;
+      }
+      DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+      assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+             "expect split value in virtual register");
+      enqueue(SplitVirtReg);
+      ++NumNewQueued;
+    }
+  }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+                                                unsigned PhysReg) {
+  for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+    if (query(VirtReg, *AliasI).checkInterference())
+      return *AliasI;
+  return 0;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+  NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+  SlotIndexes *Indexes = LIS->getSlotIndexes();
+  if (MF->size() <= 1)
+    return;
+
+  LiveIntervalUnion::SegmentIter SI;
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+    if (LiveUnion.empty())
+      continue;
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
+    MachineFunction::iterator MBB = llvm::next(MF->begin());
+    MachineFunction::iterator MFE = MF->end();
+    SlotIndex Start, Stop;
+    tie(Start, Stop) = Indexes->getMBBRange(MBB);
+    SI.setMap(LiveUnion.getMap());
+    SI.find(Start);
+    while (SI.valid()) {
+      if (SI.start() <= Start) {
+        if (!MBB->isLiveIn(PhysReg))
+          MBB->addLiveIn(PhysReg);
+        DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
+                     << PrintReg(SI.value()->reg, TRI));
+      } else if (SI.start() > Stop)
+        MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
+      if (++MBB == MFE)
+        break;
+      tie(Start, Stop) = Indexes->getMBBRange(MBB);
+      SI.advanceTo(Start);
+    }
+    DEBUG(dbgs() << '\n');
+  }
+}
+
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 031642117efc..072fe2bdb656 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -49,11 +49,6 @@ class VirtRegMap;
 class LiveIntervals;
 class Spiller;
 
-// Forward declare a priority queue of live virtual registers. If an
-// implementation needs to prioritize by anything other than spill weight, then
-// this will become an abstract base class with virtual calls to push/get.
-class LiveVirtRegQueue;
-
 /// RegAllocBase provides the register allocation driver and interface that can
 /// be extended to add interesting heuristics.
 ///
@@ -67,7 +62,6 @@ class RegAllocBase {
   // registers may have changed.
   unsigned UserTag;
 
-protected:
   // Array of LiveIntervalUnions indexed by physical register.
   class LiveUnionArray {
     unsigned NumRegs;
@@ -88,17 +82,19 @@ protected:
     }
   };
 
-  const TargetRegisterInfo *TRI;
-  MachineRegisterInfo *MRI;
-  VirtRegMap *VRM;
-  LiveIntervals *LIS;
-  RegisterClassInfo RegClassInfo;
   LiveUnionArray PhysReg2LiveUnion;
 
   // Current queries, one per physreg. They must be reinitialized each time we
   // query on a new live virtual register.
   OwningArrayPtr<LiveIntervalUnion::Query> Queries;
 
+protected:
+  const TargetRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  VirtRegMap *VRM;
+  LiveIntervals *LIS;
+  RegisterClassInfo RegClassInfo;
+
   RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {}
 
   virtual ~RegAllocBase() {}
@@ -115,16 +111,17 @@ protected:
     return Queries[PhysReg];
   }
 
+  // Get direct access to the underlying LiveIntervalUnion for PhysReg.
+  LiveIntervalUnion &getLiveUnion(unsigned PhysReg) {
+    return PhysReg2LiveUnion[PhysReg];
+  }
+
   // Invalidate all cached information about virtual registers - live ranges may
   // have changed.
   void invalidateVirtRegs() { ++UserTag; }
 
   // The top-level driver. The output is a VirtRegMap that us updated with
   // physical register assignments.
-  //
-  // If an implementation wants to override the LiveInterval comparator, we
-  // should modify this interface to allow passing in an instance derived from
-  // LiveVirtRegQueue.
   void allocatePhysRegs();
 
   // Get a temporary reference to a Spiller instance.
@@ -160,12 +157,6 @@ protected:
   /// allocation is making progress.
   void unassign(LiveInterval &VirtReg, unsigned PhysReg);
 
-  // Helper for spilling all live virtual registers currently unified under preg
-  // that interfere with the most recently queried lvr.  Return true if spilling
-  // was successful, and append any new spilled/split intervals to splitLVRs.
-  bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
-                          SmallVectorImpl<LiveInterval*> &SplitVRegs);
-
   /// addMBBLiveIns - Add physreg liveins to basic blocks.
   void addMBBLiveIns(MachineFunction *);
 
@@ -183,9 +174,6 @@ public:
 
 private:
   void seedLiveRegs();
-
-  void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
-                SmallVectorImpl<LiveInterval*> &SplitVRegs);
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 5496d69fd3df..77ee3148f31a 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -1,4 +1,4 @@
-//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,18 +15,15 @@
 #define DEBUG_TYPE "regalloc"
 #include "RegAllocBase.h"
 #include "LiveDebugVariables.h"
-#include "LiveIntervalUnion.h"
-#include "LiveRangeEdit.h"
 #include "RenderMachineFunction.h"
 #include "Spiller.h"
 #include "VirtRegMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Function.h"
 #include "llvm/PassAnalysisSupport.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -37,35 +34,17 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SparseBitVector.h"
-#endif
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Timer.h"
 
 #include <cstdlib>
 #include <queue>
 
 using namespace llvm;
 
-STATISTIC(NumAssigned     , "Number of registers assigned");
-STATISTIC(NumUnassigned   , "Number of registers unassigned");
-STATISTIC(NumNewQueued    , "Number of new live ranges queued");
-
 static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
                                       createBasicRegisterAllocator);
 
-// Temporary verification option until we can put verification inside
-// MachineVerifier.
-static cl::opt<bool, true>
-VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
-               cl::desc("Verify during register allocation"));
-
-const char *RegAllocBase::TimerGroupName = "Register Allocation";
-bool RegAllocBase::VerifyEnabled = false;
-
 namespace {
   struct CompSpillWeight {
     bool operator()(LiveInterval *A, LiveInterval *B) const {
@@ -93,6 +72,11 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
   std::auto_ptr<Spiller> SpillerInstance;
   std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
                       CompSpillWeight> Queue;
+
+  // Scratch space.  Allocated here to avoid repeated malloc calls in
+  // selectOrSplit().
+  BitVector UsableRegs;
+
 public:
   RABasic();
 
@@ -128,6 +112,15 @@ public:
   /// Perform register allocation.
   virtual bool runOnMachineFunction(MachineFunction &mf);
 
+  // Helper for spilling all live virtual registers currently unified under preg
+  // that interfere with the most recently queried lvr.  Return true if spilling
+  // was successful, and append any new spilled/split intervals to splitLVRs.
+  bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+                          SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+  void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+                SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
   static char ID;
 };
 
@@ -139,8 +132,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
   initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
-  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
   initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
   initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
@@ -157,9 +150,6 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<LiveDebugVariables>();
   AU.addPreserved<LiveDebugVariables>();
-  if (StrongPHIElim)
-    AU.addRequiredID(StrongPHIEliminationID);
-  AU.addRequiredTransitiveID(RegisterCoalescerPassID);
   AU.addRequired<CalculateSpillWeights>();
   AU.addRequired<LiveStacks>();
   AU.addPreserved<LiveStacks>();
@@ -178,204 +168,10 @@ void RABasic::releaseMemory() {
   RegAllocBase::releaseMemory();
 }
 
-#ifndef NDEBUG
-// Verify each LiveIntervalUnion.
-void RegAllocBase::verify() {
-  LiveVirtRegBitSet VisitedVRegs;
-  OwningArrayPtr<LiveVirtRegBitSet>
-    unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
-
-  // Verify disjoint unions.
-  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
-    DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
-    LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
-    PhysReg2LiveUnion[PhysReg].verify(VRegs);
-    // Union + intersection test could be done efficiently in one pass, but
-    // don't add a method to SparseBitVector unless we really need it.
-    assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
-    VisitedVRegs |= VRegs;
-  }
-
-  // Verify vreg coverage.
-  for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
-       liItr != liEnd; ++liItr) {
-    unsigned reg = liItr->first;
-    if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
-    if (!VRM->hasPhys(reg)) continue; // spilled?
-    unsigned PhysReg = VRM->getPhys(reg);
-    if (!unionVRegs[PhysReg].test(reg)) {
-      dbgs() << "LiveVirtReg " << reg << " not in union " <<
-        TRI->getName(PhysReg) << "\n";
-      llvm_unreachable("unallocated live vreg");
-    }
-  }
-  // FIXME: I'm not sure how to verify spilled intervals.
-}
-#endif //!NDEBUG
-
-//===----------------------------------------------------------------------===//
-//                         RegAllocBase Implementation
-//===----------------------------------------------------------------------===//
-
-// Instantiate a LiveIntervalUnion for each physical register.
-void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
-                                        unsigned NRegs) {
-  NumRegs = NRegs;
-  Array =
-    static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
-  for (unsigned r = 0; r != NRegs; ++r)
-    new(Array + r) LiveIntervalUnion(r, allocator);
-}
-
-void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
-  NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
-  TRI = &vrm.getTargetRegInfo();
-  MRI = &vrm.getRegInfo();
-  VRM = &vrm;
-  LIS = &lis;
-  RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
-
-  const unsigned NumRegs = TRI->getNumRegs();
-  if (NumRegs != PhysReg2LiveUnion.numRegs()) {
-    PhysReg2LiveUnion.init(UnionAllocator, NumRegs);
-    // Cache an interferece query for each physical reg
-    Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
-  }
-}
-
-void RegAllocBase::LiveUnionArray::clear() {
-  if (!Array)
-    return;
-  for (unsigned r = 0; r != NumRegs; ++r)
-    Array[r].~LiveIntervalUnion();
-  free(Array);
-  NumRegs =  0;
-  Array = 0;
-}
-
-void RegAllocBase::releaseMemory() {
-  for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r)
-    PhysReg2LiveUnion[r].clear();
-}
-
-// Visit all the live registers. If they are already assigned to a physical
-// register, unify them with the corresponding LiveIntervalUnion, otherwise push
-// them on the priority queue for later assignment.
-void RegAllocBase::seedLiveRegs() {
-  NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
-  for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
-    unsigned RegNum = I->first;
-    LiveInterval &VirtReg = *I->second;
-    if (TargetRegisterInfo::isPhysicalRegister(RegNum))
-      PhysReg2LiveUnion[RegNum].unify(VirtReg);
-    else
-      enqueue(&VirtReg);
-  }
-}
-
-void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
-  DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
-               << " to " << PrintReg(PhysReg, TRI) << '\n');
-  assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
-  VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
-  MRI->setPhysRegUsed(PhysReg);
-  PhysReg2LiveUnion[PhysReg].unify(VirtReg);
-  ++NumAssigned;
-}
-
-void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
-  DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
-               << " from " << PrintReg(PhysReg, TRI) << '\n');
-  assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
-  PhysReg2LiveUnion[PhysReg].extract(VirtReg);
-  VRM->clearVirt(VirtReg.reg);
-  ++NumUnassigned;
-}
-
-// Top-level driver to manage the queue of unassigned VirtRegs and call the
-// selectOrSplit implementation.
-void RegAllocBase::allocatePhysRegs() {
-  seedLiveRegs();
-
-  // Continue assigning vregs one at a time to available physical registers.
-  while (LiveInterval *VirtReg = dequeue()) {
-    assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned");
-
-    // Unused registers can appear when the spiller coalesces snippets.
-    if (MRI->reg_nodbg_empty(VirtReg->reg)) {
-      DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n');
-      LIS->removeInterval(VirtReg->reg);
-      continue;
-    }
-
-    // Invalidate all interference queries, live ranges could have changed.
-    invalidateVirtRegs();
-
-    // selectOrSplit requests the allocator to return an available physical
-    // register if possible and populate a list of new live intervals that
-    // result from splitting.
-    DEBUG(dbgs() << "\nselectOrSplit "
-                 << MRI->getRegClass(VirtReg->reg)->getName()
-                 << ':' << *VirtReg << '\n');
-    typedef SmallVector<LiveInterval*, 4> VirtRegVec;
-    VirtRegVec SplitVRegs;
-    unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
-
-    if (AvailablePhysReg == ~0u) {
-      // selectOrSplit failed to find a register!
-      const char *Msg = "ran out of registers during register allocation";
-      // Probably caused by an inline asm.
-      MachineInstr *MI;
-      for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
-           (MI = I.skipInstruction());)
-        if (MI->isInlineAsm())
-          break;
-      if (MI)
-        MI->emitError(Msg);
-      else
-        report_fatal_error(Msg);
-      // Keep going after reporting the error.
-      VRM->assignVirt2Phys(VirtReg->reg,
-                 RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
-      continue;
-    }
-
-    if (AvailablePhysReg)
-      assign(*VirtReg, AvailablePhysReg);
-
-    for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
-         I != E; ++I) {
-      LiveInterval *SplitVirtReg = *I;
-      assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned");
-      if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) {
-        DEBUG(dbgs() << "not queueing unused  " << *SplitVirtReg << '\n');
-        LIS->removeInterval(SplitVirtReg->reg);
-        continue;
-      }
-      DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
-      assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
-             "expect split value in virtual register");
-      enqueue(SplitVirtReg);
-      ++NumNewQueued;
-    }
-  }
-}
-
-// Check if this live virtual register interferes with a physical register. If
-// not, then check for interference on each register that aliases with the
-// physical register. Return the interfering register.
-unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
-                                                unsigned PhysReg) {
-  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
-    if (query(VirtReg, *AliasI).checkInterference())
-      return *AliasI;
-  return 0;
-}
-
-// Helper for spillInteferences() that spills all interfering vregs currently
+// Helper for spillInterferences() that spills all interfering vregs currently
 // assigned to this physical register.
-void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
-                            SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+void RABasic::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+                       SmallVectorImpl<LiveInterval*> &SplitVRegs) {
   LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
   assert(Q.seenAllInterferences() && "need collectInterferences()");
   const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
@@ -391,7 +187,7 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
     unassign(SpilledVReg, PhysReg);
 
     // Spill the extracted interval.
-    LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills);
+    LiveRangeEdit LRE(SpilledVReg, SplitVRegs, *MF, *LIS, VRM);
     spiller().spill(LRE);
   }
   // After extracting segments, the query's results are invalid. But keep the
@@ -402,14 +198,13 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
 // Spill or split all live virtual registers currently unified under PhysReg
 // that interfere with VirtReg. The newly spilled or split live intervals are
 // returned by appending them to SplitVRegs.
-bool
-RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
                                  SmallVectorImpl<LiveInterval*> &SplitVRegs) {
   // Record each interference and determine if all are spillable before mutating
   // either the union or live intervals.
   unsigned NumInterferences = 0;
   // Collect interferences assigned to any alias of the physical register.
-  for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+  for (const uint16_t *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
     LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
     NumInterferences += QAlias.collectInterferingVRegs();
     if (QAlias.seenUnspillableVReg()) {
@@ -421,52 +216,11 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
   assert(NumInterferences > 0 && "expect interference");
 
   // Spill each interfering vreg allocated to PhysReg or an alias.
-  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+  for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
     spillReg(VirtReg, *AliasI, SplitVRegs);
   return true;
 }
 
-// Add newly allocated physical registers to the MBB live in sets.
-void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
-  NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
-  SlotIndexes *Indexes = LIS->getSlotIndexes();
-  if (MF->size() <= 1)
-    return;
-
-  LiveIntervalUnion::SegmentIter SI;
-  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
-    LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
-    if (LiveUnion.empty())
-      continue;
-    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " live-in:");
-    MachineFunction::iterator MBB = llvm::next(MF->begin());
-    MachineFunction::iterator MFE = MF->end();
-    SlotIndex Start, Stop;
-    tie(Start, Stop) = Indexes->getMBBRange(MBB);
-    SI.setMap(LiveUnion.getMap());
-    SI.find(Start);
-    while (SI.valid()) {
-      if (SI.start() <= Start) {
-        if (!MBB->isLiveIn(PhysReg))
-          MBB->addLiveIn(PhysReg);
-        DEBUG(dbgs() << "\tBB#" << MBB->getNumber() << ':'
-                     << PrintReg(SI.value()->reg, TRI));
-      } else if (SI.start() > Stop)
-        MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex());
-      if (++MBB == MFE)
-        break;
-      tie(Start, Stop) = Indexes->getMBBRange(MBB);
-      SI.advanceTo(Start);
-    }
-    DEBUG(dbgs() << '\n');
-  }
-}
-
-
-//===----------------------------------------------------------------------===//
-//                         RABasic Implementation
-//===----------------------------------------------------------------------===//
-
 // Driver for the register assignment and splitting heuristics.
 // Manages iteration over the LiveIntervalUnions.
 //
@@ -481,6 +235,10 @@ void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
 // selectOrSplit().
 unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
                                 SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  // Check for register mask interference.  When live ranges cross calls, the
+  // set of usable registers is reduced to the callee-saved ones.
+  bool CrossRegMasks = LIS->checkRegMaskInterference(VirtReg, UsableRegs);
+
   // Populate a list of physical register spill candidates.
   SmallVector<unsigned, 8> PhysRegSpillCands;
 
@@ -491,6 +249,11 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
        ++I) {
     unsigned PhysReg = *I;
 
+    // If PhysReg is clobbered by a register mask, it isn't useful for
+    // allocation or spilling.
+    if (CrossRegMasks && !UsableRegs.test(PhysReg))
+      continue;
+
     // Check interference and as a side effect, intialize queries for this
     // VirtReg and its aliases.
     unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
@@ -498,9 +261,9 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
       // Found an available register.
       return PhysReg;
     }
-    Queries[interfReg].collectInterferingVRegs(1);
-    LiveInterval *interferingVirtReg =
-      Queries[interfReg].interferingVRegs().front();
+    LiveIntervalUnion::Query &IntfQ = query(VirtReg, interfReg);
+    IntfQ.collectInterferingVRegs(1);
+    LiveInterval *interferingVirtReg = IntfQ.interferingVRegs().front();
 
     // The current VirtReg must either be spillable, or one of its interferences
     // must have less spill weight.
@@ -524,7 +287,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
   DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
   if (!VirtReg.isSpillable())
     return ~0u;
-  LiveRangeEdit LRE(VirtReg, SplitVRegs);
+  LiveRangeEdit LRE(VirtReg, SplitVRegs, *MF, *LIS, VRM);
   spiller().spill(LRE);
 
   // The live virtual register requesting allocation was spilled, so tell
@@ -579,7 +342,10 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
   // Write out new DBG_VALUE instructions.
   getAnalysis<LiveDebugVariables>().emitDebugValues(VRM);
 
-  // The pass output is in VirtRegMap. Release all the transient data.
+  // All machine operands and other references to virtual registers have been
+  // replaced. Remove the virtual registers and release all the transient data.
+  VRM->clearAllVirt();
+  MRI->clearVirtRegs();
   releaseMemory();
 
   return true;
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index b36a445291b7..e09b7f8d26be 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -32,6 +32,7 @@
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include <algorithm>
@@ -49,10 +50,7 @@ namespace {
   public:
     static char ID;
     RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
-               isBulkSpilling(false) {
-      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
-      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
-    }
+               isBulkSpilling(false) {}
   private:
     const TargetMachine *TM;
     MachineFunction *MF;
@@ -71,16 +69,20 @@ namespace {
     // Everything we know about a live virtual register.
     struct LiveReg {
       MachineInstr *LastUse;    // Last instr to use reg.
+      unsigned VirtReg;         // Virtual register number.
       unsigned PhysReg;         // Currently held here.
       unsigned short LastOpNum; // OpNum on LastUse.
       bool Dirty;               // Register needs spill.
 
-      LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0),
-                              Dirty(false) {}
+      explicit LiveReg(unsigned v)
+        : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {}
+
+      unsigned getSparseSetKey() const {
+        return TargetRegisterInfo::virtReg2Index(VirtReg);
+      }
     };
 
-    typedef DenseMap<unsigned, LiveReg> LiveRegMap;
-    typedef LiveRegMap::value_type LiveRegEntry;
+    typedef SparseSet<LiveReg> LiveRegMap;
 
     // LiveVirtRegs - This map contains entries for each virtual register
     // that is currently available in a physical register.
@@ -137,8 +139,6 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
-      AU.addRequiredID(PHIEliminationID);
-      AU.addRequiredID(TwoAddressInstructionPassID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -159,14 +159,23 @@ namespace {
     void usePhysReg(MachineOperand&);
     void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
     unsigned calcSpillCost(unsigned PhysReg) const;
-    void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg);
-    void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint);
+    void assignVirtToPhysReg(LiveReg&, unsigned PhysReg);
+    LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
+      return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+    }
+    LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
+      return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+    }
+    LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg);
+    LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator,
+                                      unsigned Hint);
     LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
                                        unsigned VirtReg, unsigned Hint);
     LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
                                        unsigned VirtReg, unsigned Hint);
     void spillAll(MachineInstr *MI);
     bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+    void addRetOperands(MachineBasicBlock *MBB);
   };
   char RAFast::ID = 0;
 }
@@ -222,10 +231,10 @@ void RAFast::addKillFlag(const LiveReg &LR) {
 
 /// killVirtReg - Mark virtreg as no longer available.
 void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
-  addKillFlag(LRI->second);
-  const LiveReg &LR = LRI->second;
-  assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
-  PhysRegState[LR.PhysReg] = regFree;
+  addKillFlag(*LRI);
+  assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg &&
+         "Broken RegState mapping");
+  PhysRegState[LRI->PhysReg] = regFree;
   // Erase from LiveVirtRegs unless we're spilling in bulk.
   if (!isBulkSpilling)
     LiveVirtRegs.erase(LRI);
@@ -235,7 +244,7 @@ void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
 void RAFast::killVirtReg(unsigned VirtReg) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "killVirtReg needs a virtual register");
-  LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+  LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
   if (LRI != LiveVirtRegs.end())
     killVirtReg(LRI);
 }
@@ -245,7 +254,7 @@ void RAFast::killVirtReg(unsigned VirtReg) {
 void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Spilling a physical register is illegal!");
-  LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+  LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
   assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
   spillVirtReg(MI, LRI);
 }
@@ -253,18 +262,18 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
 /// spillVirtReg - Do the actual work of spilling.
 void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
                           LiveRegMap::iterator LRI) {
-  LiveReg &LR = LRI->second;
-  assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+  LiveReg &LR = *LRI;
+  assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping");
 
   if (LR.Dirty) {
     // If this physreg is used by the instruction, we want to kill it on the
     // instruction, not on the spill.
     bool SpillKill = LR.LastUse != MI;
     LR.Dirty = false;
-    DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+    DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI)
                  << " in " << PrintReg(LR.PhysReg, TRI));
-    const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
-    int FI = getStackSpaceFor(LRI->first, RC);
+    const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg);
+    int FI = getStackSpaceFor(LRI->VirtReg, RC);
     DEBUG(dbgs() << " to stack slot #" << FI << "\n");
     TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
     ++NumStores;   // Update statistics
@@ -272,7 +281,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
     // If this register is used by DBG_VALUE then insert new DBG_VALUE to
     // identify spilled location as the place to find corresponding variable's
     // value.
-    SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first];
+    SmallVector<MachineInstr *, 4> &LRIDbgValues =
+      LiveDbgValueMap[LRI->VirtReg];
     for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
       MachineInstr *DBG = LRIDbgValues[li];
       const MDNode *MDPtr =
@@ -295,8 +305,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
         DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
       }
     }
-    // Now this register is spilled there is should not be any DBG_VALUE pointing
-    // to this register because they are all pointing to spilled value now.
+    // Now this register is spilled there is should not be any DBG_VALUE
+    // pointing to this register because they are all pointing to spilled value
+    // now.
     LRIDbgValues.clear();
     if (SpillKill)
       LR.LastUse = 0; // Don't kill register again
@@ -343,7 +354,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
   }
 
   // Maybe a superregister is reserved?
-  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+  for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
     switch (PhysRegState[Alias]) {
     case regDisabled:
@@ -397,7 +408,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
 
   // This is a disabled register, disable all aliases.
   PhysRegState[PhysReg] = NewState;
-  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+  for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
     switch (unsigned VirtReg = PhysRegState[Alias]) {
     case regDisabled:
@@ -435,14 +446,17 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
     DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
                  << PrintReg(PhysReg, TRI) << " is reserved already.\n");
     return spillImpossible;
-  default:
-    return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+  default: {
+    LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+    assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+    return I->Dirty ? spillDirty : spillClean;
+  }
   }
 
   // This is a disabled register, add up cost of aliases.
   DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
   unsigned Cost = 0;
-  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+  for (const uint16_t *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
     if (UsedInInstr.test(Alias))
       return spillImpossible;
@@ -454,10 +468,13 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
       break;
     case regReserved:
       return spillImpossible;
-    default:
-      Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+    default: {
+      LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg);
+      assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+      Cost += I->Dirty ? spillDirty : spillClean;
       break;
     }
+    }
   }
   return Cost;
 }
@@ -467,17 +484,27 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
 /// that PhysReg is the proper container for VirtReg now.  The physical
 /// register must not be used for anything else when this is called.
 ///
-void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
-  DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) {
+  DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to "
                << PrintReg(PhysReg, TRI) << "\n");
-  PhysRegState[PhysReg] = LRE.first;
-  assert(!LRE.second.PhysReg && "Already assigned a physreg");
-  LRE.second.PhysReg = PhysReg;
+  PhysRegState[PhysReg] = LR.VirtReg;
+  assert(!LR.PhysReg && "Already assigned a physreg");
+  LR.PhysReg = PhysReg;
+}
+
+RAFast::LiveRegMap::iterator
+RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
+  LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
+  assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared");
+  assignVirtToPhysReg(*LRI, PhysReg);
+  return LRI;
 }
 
 /// allocVirtReg - Allocate a physical register for VirtReg.
-void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
-  const unsigned VirtReg = LRE.first;
+RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
+                                                  LiveRegMap::iterator LRI,
+                                                  unsigned Hint) {
+  const unsigned VirtReg = LRI->VirtReg;
 
   assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
          "Can only allocate virtual registers");
@@ -496,7 +523,9 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
     if (Cost < spillDirty) {
       if (Cost)
         definePhysReg(MI, Hint, regFree);
-      return assignVirtToPhysReg(LRE, Hint);
+      // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+      // That invalidates LRI, so run a new lookup for VirtReg.
+      return assignVirtToPhysReg(VirtReg, Hint);
     }
   }
 
@@ -505,8 +534,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
   // First try to find a completely free register.
   for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
     unsigned PhysReg = *I;
-    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
-      return assignVirtToPhysReg(LRE, PhysReg);
+    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) {
+      assignVirtToPhysReg(*LRI, PhysReg);
+      return LRI;
+    }
   }
 
   DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
@@ -519,21 +550,25 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
     DEBUG(dbgs() << "\tCost: " << Cost << "\n");
     DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
     // Cost is 0 when all aliases are already disabled.
-    if (Cost == 0)
-      return assignVirtToPhysReg(LRE, *I);
+    if (Cost == 0) {
+      assignVirtToPhysReg(*LRI, *I);
+      return LRI;
+    }
     if (Cost < BestCost)
       BestReg = *I, BestCost = Cost;
   }
 
   if (BestReg) {
     definePhysReg(MI, BestReg, regFree);
-    return assignVirtToPhysReg(LRE, BestReg);
+    // definePhysReg may kill virtual registers and modify LiveVirtRegs.
+    // That invalidates LRI, so run a new lookup for VirtReg.
+    return assignVirtToPhysReg(VirtReg, BestReg);
   }
 
   // Nothing we can do. Report an error and keep going with a bad allocation.
   MI->emitError("ran out of registers during register allocation");
   definePhysReg(MI, *AO.begin(), regFree);
-  assignVirtToPhysReg(LRE, *AO.begin());
+  return assignVirtToPhysReg(VirtReg, *AO.begin());
 }
 
 /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
@@ -544,8 +579,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
          "Not a virtual register");
   LiveRegMap::iterator LRI;
   bool New;
-  tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
-  LiveReg &LR = LRI->second;
+  tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
   if (New) {
     // If there is no hint, peek at the only use of this register.
     if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
@@ -555,18 +589,18 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
       if (UseMI.isCopyLike())
         Hint = UseMI.getOperand(0).getReg();
     }
-    allocVirtReg(MI, *LRI, Hint);
-  } else if (LR.LastUse) {
+    LRI = allocVirtReg(MI, LRI, Hint);
+  } else if (LRI->LastUse) {
     // Redefining a live register - kill at the last use, unless it is this
     // instruction defining VirtReg multiple times.
-    if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
-      addKillFlag(LR);
+    if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse())
+      addKillFlag(*LRI);
   }
-  assert(LR.PhysReg && "Register not assigned");
-  LR.LastUse = MI;
-  LR.LastOpNum = OpNum;
-  LR.Dirty = true;
-  UsedInInstr.set(LR.PhysReg);
+  assert(LRI->PhysReg && "Register not assigned");
+  LRI->LastUse = MI;
+  LRI->LastOpNum = OpNum;
+  LRI->Dirty = true;
+  UsedInInstr.set(LRI->PhysReg);
   return LRI;
 }
 
@@ -578,18 +612,17 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
          "Not a virtual register");
   LiveRegMap::iterator LRI;
   bool New;
-  tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
-  LiveReg &LR = LRI->second;
+  tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
   MachineOperand &MO = MI->getOperand(OpNum);
   if (New) {
-    allocVirtReg(MI, *LRI, Hint);
+    LRI = allocVirtReg(MI, LRI, Hint);
     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
     int FrameIndex = getStackSpaceFor(VirtReg, RC);
     DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
-                 << PrintReg(LR.PhysReg, TRI) << "\n");
-    TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
+                 << PrintReg(LRI->PhysReg, TRI) << "\n");
+    TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI);
     ++NumLoads;
-  } else if (LR.Dirty) {
+  } else if (LRI->Dirty) {
     if (isLastUseOfLocalReg(MO)) {
       DEBUG(dbgs() << "Killing last use: " << MO << "\n");
       if (MO.isUse())
@@ -614,10 +647,10 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
     DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
     MO.setIsDead(false);
   }
-  assert(LR.PhysReg && "Register not assigned");
-  LR.LastUse = MI;
-  LR.LastOpNum = OpNum;
-  UsedInInstr.set(LR.PhysReg);
+  assert(LRI->PhysReg && "Register not assigned");
+  LRI->LastUse = MI;
+  LRI->LastOpNum = OpNum;
+  UsedInInstr.set(LRI->PhysReg);
   return LRI;
 }
 
@@ -674,7 +707,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     UsedInInstr.set(Reg);
     if (ThroughRegs.count(PhysRegState[Reg]))
       definePhysReg(MI, Reg, regFree);
-    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+    for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
       UsedInInstr.set(*AS);
       if (ThroughRegs.count(PhysRegState[*AS]))
         definePhysReg(MI, *AS, regFree);
@@ -682,7 +715,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
   }
 
   SmallVector<unsigned, 8> PartialDefs;
-  DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n");
+  DEBUG(dbgs() << "Allocating tied uses.\n");
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg()) continue;
@@ -694,7 +727,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
       DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
         << DefIdx << ".\n");
       LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
-      unsigned PhysReg = LRI->second.PhysReg;
+      unsigned PhysReg = LRI->PhysReg;
       setPhysReg(MI, i, PhysReg);
       // Note: we don't update the def operand yet. That would cause the normal
       // def-scan to attempt spilling.
@@ -703,16 +736,25 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
       // Reload the register, but don't assign to the operand just yet.
       // That would confuse the later phys-def processing pass.
       LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
-      PartialDefs.push_back(LRI->second.PhysReg);
-    } else if (MO.isEarlyClobber()) {
-      // Note: defineVirtReg may invalidate MO.
-      LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
-      unsigned PhysReg = LRI->second.PhysReg;
-      if (setPhysReg(MI, i, PhysReg))
-        VirtDead.push_back(Reg);
+      PartialDefs.push_back(LRI->PhysReg);
     }
   }
 
+  DEBUG(dbgs() << "Allocating early clobbers.\n");
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+    if (!MO.isEarlyClobber())
+      continue;
+    // Note: defineVirtReg may invalidate MO.
+    LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+    unsigned PhysReg = LRI->PhysReg;
+    if (setPhysReg(MI, i, PhysReg))
+      VirtDead.push_back(Reg);
+  }
+
   // Restore UsedInInstr to a state usable for allocating normal virtual uses.
   UsedInInstr.reset();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -730,32 +772,66 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     UsedInInstr.set(PartialDefs[i]);
 }
 
-void RAFast::AllocateBasicBlock() {
-  DEBUG(dbgs() << "\nAllocating " << *MBB);
+/// addRetOperand - ensure that a return instruction has an operand for each
+/// value live out of the function.
+///
+/// Things marked both call and return are tail calls; do not do this for them.
+/// The tail callee need not take the same registers as input that it produces
+/// as output, and there are dependencies for its input registers elsewhere.
+///
+/// FIXME: This should be done as part of instruction selection, and this helper
+/// should be deleted. Until then, we use custom logic here to create the proper
+/// operand under all circumstances. We can't use addRegisterKilled because that
+/// doesn't make sense for undefined values. We can't simply avoid calling it
+/// for undefined values, because we must ensure that the operand always exists.
+void RAFast::addRetOperands(MachineBasicBlock *MBB) {
+  if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall())
+    return;
+
+  MachineInstr *MI = &MBB->back();
+
+  for (MachineRegisterInfo::liveout_iterator
+         I = MBB->getParent()->getRegInfo().liveout_begin(),
+         E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) {
+    unsigned Reg = *I;
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+           "Cannot have a live-out virtual register.");
+
+    bool hasDef = PhysRegState[Reg] == regReserved;
+
+    // Check if this register already has an operand.
+    bool Found = false;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+
+      unsigned OperReg = MO.getReg();
+      if (!TargetRegisterInfo::isPhysicalRegister(OperReg))
+        continue;
 
-  // FIXME: This should probably be added by instruction selection instead?
-  // If the last instruction in the block is a return, make sure to mark it as
-  // using all of the live-out values in the function.  Things marked both call
-  // and return are tail calls; do not do this for them.  The tail callee need
-  // not take the same registers as input that it produces as output, and there
-  // are dependencies for its input registers elsewhere.
-  if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
-      !MBB->back().getDesc().isCall()) {
-    MachineInstr *Ret = &MBB->back();
-
-    for (MachineRegisterInfo::liveout_iterator
-         I = MF->getRegInfo().liveout_begin(),
-         E = MF->getRegInfo().liveout_end(); I != E; ++I) {
-      assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
-             "Cannot have a live-out virtual register.");
-
-      // Add live-out registers as implicit uses.
-      Ret->addRegisterKilled(*I, TRI, true);
+      if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) {
+        // If the ret already has an operand for this physreg or a superset,
+        // don't duplicate it. Set the kill flag if the value is defined.
+        if (hasDef && !MO.isKill())
+          MO.setIsKill();
+        Found = true;
+        break;
+      }
     }
+    if (!Found)
+      MI->addOperand(MachineOperand::CreateReg(Reg,
+                                               false /*IsDef*/,
+                                               true  /*IsImp*/,
+                                               hasDef/*IsKill*/));
   }
+}
+
+void RAFast::AllocateBasicBlock() {
+  DEBUG(dbgs() << "\nAllocating " << *MBB);
 
   PhysRegState.assign(TRI->getNumRegs(), regDisabled);
-  assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
+  assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
 
   MachineBasicBlock::iterator MII = MBB->begin();
 
@@ -783,25 +859,26 @@ void RAFast::AllocateBasicBlock() {
           case regReserved:
             dbgs() << "*";
             break;
-          default:
+          default: {
             dbgs() << '=' << PrintReg(PhysRegState[Reg]);
-            if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
+            LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]);
+            assert(I != LiveVirtRegs.end() && "Missing VirtReg entry");
+            if (I->Dirty)
               dbgs() << "*";
-            assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
-                   "Bad inverse map");
+            assert(I->PhysReg == Reg && "Bad inverse map");
             break;
           }
+          }
         }
         dbgs() << '\n';
         // Check that LiveVirtRegs is the inverse.
         for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
              e = LiveVirtRegs.end(); i != e; ++i) {
-           assert(TargetRegisterInfo::isVirtualRegister(i->first) &&
+           assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
                   "Bad map key");
-           assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) &&
+           assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
                   "Bad map value");
-           assert(PhysRegState[i->second.PhysReg] == i->first &&
-                  "Bad inverse map");
+           assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
         }
       });
 
@@ -815,10 +892,9 @@ void RAFast::AllocateBasicBlock() {
           if (!MO.isReg()) continue;
           unsigned Reg = MO.getReg();
           if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
-          LiveDbgValueMap[Reg].push_back(MI);
-          LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+          LiveRegMap::iterator LRI = findLiveVirtReg(Reg);
           if (LRI != LiveVirtRegs.end())
-            setPhysReg(MI, i, LRI->second.PhysReg);
+            setPhysReg(MI, i, LRI->PhysReg);
           else {
             int SS = StackSlotForVirtReg[Reg];
             if (SS == -1) {
@@ -849,6 +925,7 @@ void RAFast::AllocateBasicBlock() {
               }
             }
           }
+          LiveDbgValueMap[Reg].push_back(MI);
         }
       }
       // Next instruction.
@@ -932,7 +1009,7 @@ void RAFast::AllocateBasicBlock() {
       if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
       if (MO.isUse()) {
         LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
-        unsigned PhysReg = LRI->second.PhysReg;
+        unsigned PhysReg = LRI->PhysReg;
         CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
         if (setPhysReg(MI, i, PhysReg))
           killVirtReg(LRI);
@@ -953,13 +1030,13 @@ void RAFast::AllocateBasicBlock() {
         // Look for physreg defs and tied uses.
         if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
         UsedInInstr.set(Reg);
-        for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        for (const uint16_t *AS = TRI->getAliasSet(Reg); *AS; ++AS)
           UsedInInstr.set(*AS);
       }
     }
 
     unsigned DefOpEnd = MI->getNumOperands();
-    if (MCID.isCall()) {
+    if (MI->isCall()) {
       // Spill all virtregs before a call. This serves two purposes: 1. If an
       // exception is thrown, the landing pad is going to expect to find
       // registers in their spill slots, and 2. we don't have to wade through
@@ -988,7 +1065,7 @@ void RAFast::AllocateBasicBlock() {
         continue;
       }
       LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
-      unsigned PhysReg = LRI->second.PhysReg;
+      unsigned PhysReg = LRI->PhysReg;
       if (setPhysReg(MI, i, PhysReg)) {
         VirtDead.push_back(Reg);
         CopyDst = 0; // cancel coalescing;
@@ -1024,6 +1101,9 @@ void RAFast::AllocateBasicBlock() {
     MBB->erase(Coalesced[i]);
   NumCopies += Coalesced.size();
 
+  // addRetOperands must run after we've seen all defs in this block.
+  addRetOperands(MBB);
+
   DEBUG(MBB->dump());
 }
 
@@ -1038,12 +1118,16 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   TM = &Fn.getTarget();
   TRI = TM->getRegisterInfo();
   TII = TM->getInstrInfo();
+  MRI->freezeReservedRegs(Fn);
   RegClassInfo.runOnMachineFunction(Fn);
   UsedInInstr.resize(TRI->getNumRegs());
 
+  assert(!MRI->isSSA() && "regalloc requires leaving SSA");
+
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers
   StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
+  LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
 
   // Loop over all of the basic blocks, eliminating virtual register references
   for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
@@ -1052,16 +1136,17 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
     AllocateBasicBlock();
   }
 
-  // Make sure the set of used physregs is closed under subreg operations.
-  MRI->closePhysRegsUsed(*TRI);
-
   // Add the clobber lists for all the instructions we skipped earlier.
   for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
        I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
-    if (const unsigned *Defs = (*I)->getImplicitDefs())
+    if (const uint16_t *Defs = (*I)->getImplicitDefs())
       while (*Defs)
         MRI->setPhysRegUsed(*Defs++);
 
+  // All machine operands and other references to virtual registers have been
+  // replaced. Remove the virtual registers.
+  MRI->clearVirtRegs();
+
   SkippedInstrs.clear();
   StackSlotForVirtReg.clear();
   LiveDbgValueMap.clear();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index f54a2c85d100..3f2a617100c3 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -16,7 +16,6 @@
 #include "AllocationOrder.h"
 #include "InterferenceCache.h"
 #include "LiveDebugVariables.h"
-#include "LiveRangeEdit.h"
 #include "RegAllocBase.h"
 #include "Spiller.h"
 #include "SpillPlacement.h"
@@ -29,6 +28,7 @@
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -168,6 +168,19 @@ class RAGreedy : public MachineFunctionPass,
     }
   };
 
+  // Register mask interference. The current VirtReg is checked for register
+  // mask interference on entry to selectOrSplit().  If there is no
+  // interference, UsableRegs is left empty.  If there is interference,
+  // UsableRegs has a bit mask of registers that can be used without register
+  // mask interference.
+  BitVector UsableRegs;
+
+  /// clobberedByRegMask - Returns true if PhysReg is not directly usable
+  /// because of register mask clobbers.
+  bool clobberedByRegMask(unsigned PhysReg) const {
+    return !UsableRegs.empty() && !UsableRegs.test(PhysReg);
+  }
+
   // splitting state.
   std::auto_ptr<SplitAnalysis> SA;
   std::auto_ptr<SplitEditor> SE;
@@ -248,7 +261,6 @@ public:
   static char ID;
 
 private:
-  void LRE_WillEraseInstruction(MachineInstr*);
   bool LRE_CanEraseVirtReg(unsigned);
   void LRE_WillShrinkVirtReg(unsigned);
   void LRE_DidCloneVirtReg(unsigned, unsigned);
@@ -308,8 +320,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
-  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
   initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
   initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
@@ -328,9 +340,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<SlotIndexes>();
   AU.addRequired<LiveDebugVariables>();
   AU.addPreserved<LiveDebugVariables>();
-  if (StrongPHIElim)
-    AU.addRequiredID(StrongPHIEliminationID);
-  AU.addRequiredTransitiveID(RegisterCoalescerPassID);
   AU.addRequired<CalculateSpillWeights>();
   AU.addRequired<LiveStacks>();
   AU.addPreserved<LiveStacks>();
@@ -350,11 +359,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
 //                     LiveRangeEdit delegate methods
 //===----------------------------------------------------------------------===//
 
-void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) {
-  // LRE itself will remove from SlotIndexes and parent basic block.
-  VRM->RemoveMachineInstrFromMaps(MI);
-}
-
 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
   if (unsigned PhysReg = VRM->getPhys(VirtReg)) {
     unassign(LIS->getInterval(VirtReg), PhysReg);
@@ -424,13 +428,13 @@ void RAGreedy::enqueue(LiveInterval *LI) {
       Prio |= (1u << 30);
   }
 
-  Queue.push(std::make_pair(Prio, Reg));
+  Queue.push(std::make_pair(Prio, ~Reg));
 }
 
 LiveInterval *RAGreedy::dequeue() {
   if (Queue.empty())
     return 0;
-  LiveInterval *LI = &LIS->getInterval(Queue.top().second);
+  LiveInterval *LI = &LIS->getInterval(~Queue.top().second);
   Queue.pop();
   return LI;
 }
@@ -446,9 +450,12 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
                              SmallVectorImpl<LiveInterval*> &NewVRegs) {
   Order.rewind();
   unsigned PhysReg;
-  while ((PhysReg = Order.next()))
+  while ((PhysReg = Order.next())) {
+    if (clobberedByRegMask(PhysReg))
+      continue;
     if (!checkPhysRegInterference(VirtReg, PhysReg))
       break;
+  }
   if (!PhysReg || Order.isHint(PhysReg))
     return PhysReg;
 
@@ -457,7 +464,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
   // If we missed a simple hint, try to cheaply evict interference from the
   // preferred register.
   if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
-    if (Order.isHint(Hint)) {
+    if (Order.isHint(Hint) && !clobberedByRegMask(Hint)) {
       DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
       EvictionCost MaxCost(1);
       if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
@@ -532,7 +539,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
     Cascade = NextCascade;
 
   EvictionCost Cost;
-  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+  for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
     LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
     // If there is 10 or more interferences, chances are one is heavier.
     if (Q.collectInterferingVRegs(10) >= 10)
@@ -590,7 +597,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
 
   DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
                << " interference: Cascade " << Cascade << '\n');
-  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+  for (const uint16_t *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
     LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
     assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
     for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
@@ -629,6 +636,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
 
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
+    if (clobberedByRegMask(PhysReg))
+      continue;
     if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
       continue;
     // The first use of a callee-saved register in a function has cost 1.
@@ -1118,6 +1127,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
       }
       --NumCands;
       GlobalCand[Worst] = GlobalCand[NumCands];
+      if (BestCand == NumCands)
+        BestCand = Worst;
     }
 
     if (GlobalCand.size() <= NumCands)
@@ -1172,7 +1183,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     return 0;
 
   // Prepare split editor.
-  LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit, SplitSpillMode);
 
   // Assign all edge bundles to the preferred candidate, or NoCand.
@@ -1220,7 +1231,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
   unsigned Reg = VirtReg.reg;
   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
-  LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit, SplitSpillMode);
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
@@ -1268,7 +1279,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
                               SmallVectorImpl<float> &GapWeight) {
   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
-  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  ArrayRef<SlotIndex> Uses = SA->getUseSlots();
   const unsigned NumGaps = Uses.size()-1;
 
   // Start and end points for the interference check.
@@ -1280,7 +1291,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
   GapWeight.assign(NumGaps, 0.0f);
 
   // Add interference from each overlapping register.
-  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+  for (const uint16_t *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
     if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
            .checkInterference())
       continue;
@@ -1292,7 +1303,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg,
     // surrounding the instruction. The exception is interference before
     // StartIdx and after StopIdx.
     //
-    LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+    LiveIntervalUnion::SegmentIter IntI = getLiveUnion(*AI).find(StartIdx);
     for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
       // Skip the gaps before IntI.
       while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
@@ -1329,7 +1340,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   // that the interval is continuous from FirstInstr to LastInstr. We should
   // make sure that we don't do anything illegal to such an interval, though.
 
-  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  ArrayRef<SlotIndex> Uses = SA->getUseSlots();
   if (Uses.size() <= 2)
     return 0;
   const unsigned NumGaps = Uses.size()-1;
@@ -1337,10 +1348,40 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   DEBUG({
     dbgs() << "tryLocalSplit: ";
     for (unsigned i = 0, e = Uses.size(); i != e; ++i)
-      dbgs() << ' ' << SA->UseSlots[i];
+      dbgs() << ' ' << Uses[i];
     dbgs() << '\n';
   });
 
+  // If VirtReg is live across any register mask operands, compute a list of
+  // gaps with register masks.
+  SmallVector<unsigned, 8> RegMaskGaps;
+  if (!UsableRegs.empty()) {
+    // Get regmask slots for the whole block.
+    ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
+    DEBUG(dbgs() << RMS.size() << " regmasks in block:");
+    // Constrain to VirtReg's live range.
+    unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
+                                   Uses.front().getRegSlot()) - RMS.begin();
+    unsigned re = RMS.size();
+    for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
+      // Look for Uses[i] <= RMS <= Uses[i+1].
+      assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
+      if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
+        continue;
+      // Skip a regmask on the same instruction as the last use. It doesn't
+      // overlap the live range.
+      if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
+        break;
+      DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
+      RegMaskGaps.push_back(i);
+      // Advance ri to the next gap. A regmask on one of the uses counts in
+      // both gaps.
+      while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
+        ++ri;
+    }
+    DEBUG(dbgs() << '\n');
+  }
+
   // Since we allow local split results to be split again, there is a risk of
   // creating infinite loops. It is tempting to require that the new live
   // ranges have less instructions than the original. That would guarantee
@@ -1375,6 +1416,11 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
     // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
     calcGapWeights(PhysReg, GapWeight);
 
+    // Remove any gaps with regmask clobbers.
+    if (clobberedByRegMask(PhysReg))
+      for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
+        GapWeight[RegMaskGaps[i]] = HUGE_VALF;
+
     // Try to find the best sequence of gaps to close.
     // The new spill weight must be larger than any gap interference.
 
@@ -1466,7 +1512,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
                << '-' << Uses[BestAfter] << ", " << BestDiff
                << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
 
-  LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
+  LiveRangeEdit LREdit(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   SE->reset(LREdit);
 
   SE->openIntv();
@@ -1553,6 +1599,11 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
 
 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  // Check if VirtReg is live across any calls.
+  UsableRegs.clear();
+  if (LIS->checkRegMaskInterference(VirtReg, UsableRegs))
+    DEBUG(dbgs() << "Live across regmasks.\n");
+
   // First try assigning a free register.
   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
   if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
@@ -1593,7 +1644,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
 
   // Finally spill VirtReg itself.
   NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
-  LiveRangeEdit LRE(VirtReg, NewVRegs, this);
+  LiveRangeEdit LRE(VirtReg, NewVRegs, *MF, *LIS, VRM, this);
   spiller().spill(LRE);
   setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
 
@@ -1628,7 +1679,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   ExtraRegInfo.clear();
   ExtraRegInfo.resize(MRI->getNumVirtRegs());
   NextCascade = 1;
-  IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
+  IntfCache.init(MF, &getLiveUnion(0), Indexes, LIS, TRI);
   GlobalCand.resize(32);  // This will grow as needed.
 
   allocatePhysRegs();
@@ -1647,7 +1698,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
     DebugVars->emitDebugValues(VRM);
   }
 
-  // The pass output is in VirtRegMap. Release all the transient data.
+  // All machine operands and other references to virtual registers have been
+  // replaced. Remove the virtual registers and release all the transient data.
+  VRM->clearAllVirt();
+  MRI->clearVirtRegs();
   releaseMemory();
 
   return true;
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
deleted file mode 100644
index ce3fb90b1126..000000000000
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ /dev/null
@@ -1,1543 +0,0 @@
-//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a linear scan register allocator.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "regalloc"
-#include "LiveDebugVariables.h"
-#include "LiveRangeEdit.h"
-#include "VirtRegMap.h"
-#include "VirtRegRewriter.h"
-#include "RegisterClassInfo.h"
-#include "Spiller.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/EquivalenceClasses.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <queue>
-#include <memory>
-#include <cmath>
-
-using namespace llvm;
-
-STATISTIC(NumIters     , "Number of iterations performed");
-STATISTIC(NumBacktracks, "Number of times we had to backtrack");
-STATISTIC(NumCoalesce,   "Number of copies coalesced");
-STATISTIC(NumDowngrade,  "Number of registers downgraded");
-
-static cl::opt<bool>
-NewHeuristic("new-spilling-heuristic",
-             cl::desc("Use new spilling heuristic"),
-             cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-TrivCoalesceEnds("trivial-coalesce-ends",
-                  cl::desc("Attempt trivial coalescing of interval ends"),
-                  cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-AvoidWAWHazard("avoid-waw-hazard",
-               cl::desc("Avoid write-write hazards for some register classes"),
-               cl::init(false), cl::Hidden);
-
-static RegisterRegAlloc
-linearscanRegAlloc("linearscan", "linear scan register allocator",
-                   createLinearScanRegisterAllocator);
-
-namespace {
-  // When we allocate a register, add it to a fixed-size queue of
-  // registers to skip in subsequent allocations. This trades a small
-  // amount of register pressure and increased spills for flexibility in
-  // the post-pass scheduler.
-  //
-  // Note that in a the number of registers used for reloading spills
-  // will be one greater than the value of this option.
-  //
-  // One big limitation of this is that it doesn't differentiate between
-  // different register classes. So on x86-64, if there is xmm register
-  // pressure, it can caused fewer GPRs to be held in the queue.
-  static cl::opt<unsigned>
-  NumRecentlyUsedRegs("linearscan-skip-count",
-                      cl::desc("Number of registers for linearscan to remember"
-                               "to skip."),
-                      cl::init(0),
-                      cl::Hidden);
-
-  struct RALinScan : public MachineFunctionPass {
-    static char ID;
-    RALinScan() : MachineFunctionPass(ID) {
-      initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
-      initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
-      initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
-      initializeRegisterCoalescerPass(
-        *PassRegistry::getPassRegistry());
-      initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
-      initializeLiveStacksPass(*PassRegistry::getPassRegistry());
-      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
-      initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
-      initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
-      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
-      
-      // Initialize the queue to record recently-used registers.
-      if (NumRecentlyUsedRegs > 0)
-        RecentRegs.resize(NumRecentlyUsedRegs, 0);
-      RecentNext = RecentRegs.begin();
-      avoidWAW_ = 0;
-    }
-
-    typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
-    typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
-  private:
-    /// RelatedRegClasses - This structure is built the first time a function is
-    /// compiled, and keeps track of which register classes have registers that
-    /// belong to multiple classes or have aliases that are in other classes.
-    EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
-    DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
-
-    // NextReloadMap - For each register in the map, it maps to the another
-    // register which is defined by a reload from the same stack slot and
-    // both reloads are in the same basic block.
-    DenseMap<unsigned, unsigned> NextReloadMap;
-
-    // DowngradedRegs - A set of registers which are being "downgraded", i.e.
-    // un-favored for allocation.
-    SmallSet<unsigned, 8> DowngradedRegs;
-
-    // DowngradeMap - A map from virtual registers to physical registers being
-    // downgraded for the virtual registers.
-    DenseMap<unsigned, unsigned> DowngradeMap;
-
-    MachineFunction* mf_;
-    MachineRegisterInfo* mri_;
-    const TargetMachine* tm_;
-    const TargetRegisterInfo* tri_;
-    const TargetInstrInfo* tii_;
-    BitVector allocatableRegs_;
-    BitVector reservedRegs_;
-    LiveIntervals* li_;
-    MachineLoopInfo *loopInfo;
-    RegisterClassInfo RegClassInfo;
-
-    /// handled_ - Intervals are added to the handled_ set in the order of their
-    /// start value.  This is uses for backtracking.
-    std::vector<LiveInterval*> handled_;
-
-    /// fixed_ - Intervals that correspond to machine registers.
-    ///
-    IntervalPtrs fixed_;
-
-    /// active_ - Intervals that are currently being processed, and which have a
-    /// live range active for the current point.
-    IntervalPtrs active_;
-
-    /// inactive_ - Intervals that are currently being processed, but which have
-    /// a hold at the current point.
-    IntervalPtrs inactive_;
-
-    typedef std::priority_queue<LiveInterval*,
-                                SmallVector<LiveInterval*, 64>,
-                                greater_ptr<LiveInterval> > IntervalHeap;
-    IntervalHeap unhandled_;
-
-    /// regUse_ - Tracks register usage.
-    SmallVector<unsigned, 32> regUse_;
-    SmallVector<unsigned, 32> regUseBackUp_;
-
-    /// vrm_ - Tracks register assignments.
-    VirtRegMap* vrm_;
-
-    std::auto_ptr<VirtRegRewriter> rewriter_;
-
-    std::auto_ptr<Spiller> spiller_;
-
-    // The queue of recently-used registers.
-    SmallVector<unsigned, 4> RecentRegs;
-    SmallVector<unsigned, 4>::iterator RecentNext;
-
-    // Last write-after-write register written.
-    unsigned avoidWAW_;
-
-    // Record that we just picked this register.
-    void recordRecentlyUsed(unsigned reg) {
-      assert(reg != 0 && "Recently used register is NOREG!");
-      if (!RecentRegs.empty()) {
-        *RecentNext++ = reg;
-        if (RecentNext == RecentRegs.end())
-          RecentNext = RecentRegs.begin();
-      }
-    }
-
-  public:
-    virtual const char* getPassName() const {
-      return "Linear Scan Register Allocator";
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      AU.addRequired<AliasAnalysis>();
-      AU.addPreserved<AliasAnalysis>();
-      AU.addRequired<LiveIntervals>();
-      AU.addPreserved<SlotIndexes>();
-      if (StrongPHIElim)
-        AU.addRequiredID(StrongPHIEliminationID);
-      // Make sure PassManager knows which analyses to make available
-      // to coalescing and which analyses coalescing invalidates.
-      AU.addRequiredTransitiveID(RegisterCoalescerPassID);
-      AU.addRequired<CalculateSpillWeights>();
-      AU.addRequiredID(LiveStacksID);
-      AU.addPreservedID(LiveStacksID);
-      AU.addRequired<MachineLoopInfo>();
-      AU.addPreserved<MachineLoopInfo>();
-      AU.addRequired<VirtRegMap>();
-      AU.addPreserved<VirtRegMap>();
-      AU.addRequired<LiveDebugVariables>();
-      AU.addPreserved<LiveDebugVariables>();
-      AU.addRequiredID(MachineDominatorsID);
-      AU.addPreservedID(MachineDominatorsID);
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-
-    /// runOnMachineFunction - register allocate the whole function
-    bool runOnMachineFunction(MachineFunction&);
-
-    // Determine if we skip this register due to its being recently used.
-    bool isRecentlyUsed(unsigned reg) const {
-      return reg == avoidWAW_ ||
-       std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end();
-    }
-
-  private:
-    /// linearScan - the linear scan algorithm
-    void linearScan();
-
-    /// initIntervalSets - initialize the interval sets.
-    ///
-    void initIntervalSets();
-
-    /// processActiveIntervals - expire old intervals and move non-overlapping
-    /// ones to the inactive list.
-    void processActiveIntervals(SlotIndex CurPoint);
-
-    /// processInactiveIntervals - expire old intervals and move overlapping
-    /// ones to the active list.
-    void processInactiveIntervals(SlotIndex CurPoint);
-
-    /// hasNextReloadInterval - Return the next liveinterval that's being
-    /// defined by a reload from the same SS as the specified one.
-    LiveInterval *hasNextReloadInterval(LiveInterval *cur);
-
-    /// DowngradeRegister - Downgrade a register for allocation.
-    void DowngradeRegister(LiveInterval *li, unsigned Reg);
-
-    /// UpgradeRegister - Upgrade a register for allocation.
-    void UpgradeRegister(unsigned Reg);
-
-    /// assignRegOrStackSlotAtInterval - assign a register if one
-    /// is available, or spill.
-    void assignRegOrStackSlotAtInterval(LiveInterval* cur);
-
-    void updateSpillWeights(std::vector<float> &Weights,
-                            unsigned reg, float weight,
-                            const TargetRegisterClass *RC);
-
-    /// findIntervalsToSpill - Determine the intervals to spill for the
-    /// specified interval. It's passed the physical registers whose spill
-    /// weight is the lowest among all the registers whose live intervals
-    /// conflict with the interval.
-    void findIntervalsToSpill(LiveInterval *cur,
-                            std::vector<std::pair<unsigned,float> > &Candidates,
-                            unsigned NumCands,
-                            SmallVector<LiveInterval*, 8> &SpillIntervals);
-
-    /// attemptTrivialCoalescing - If a simple interval is defined by a copy,
-    /// try to allocate the definition to the same register as the source,
-    /// if the register is not defined during the life time of the interval.
-    /// This eliminates a copy, and is used to coalesce copies which were not
-    /// coalesced away before allocation either due to dest and src being in
-    /// different register classes or because the coalescer was overly
-    /// conservative.
-    unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg);
-
-    ///
-    /// Register usage / availability tracking helpers.
-    ///
-
-    void initRegUses() {
-      regUse_.resize(tri_->getNumRegs(), 0);
-      regUseBackUp_.resize(tri_->getNumRegs(), 0);
-    }
-
-    void finalizeRegUses() {
-#ifndef NDEBUG
-      // Verify all the registers are "freed".
-      bool Error = false;
-      for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
-        if (regUse_[i] != 0) {
-          dbgs() << tri_->getName(i) << " is still in use!\n";
-          Error = true;
-        }
-      }
-      if (Error)
-        llvm_unreachable(0);
-#endif
-      regUse_.clear();
-      regUseBackUp_.clear();
-    }
-
-    void addRegUse(unsigned physReg) {
-      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
-             "should be physical register!");
-      ++regUse_[physReg];
-      for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as)
-        ++regUse_[*as];
-    }
-
-    void delRegUse(unsigned physReg) {
-      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
-             "should be physical register!");
-      assert(regUse_[physReg] != 0);
-      --regUse_[physReg];
-      for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) {
-        assert(regUse_[*as] != 0);
-        --regUse_[*as];
-      }
-    }
-
-    bool isRegAvail(unsigned physReg) const {
-      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
-             "should be physical register!");
-      return regUse_[physReg] == 0;
-    }
-
-    void backUpRegUses() {
-      regUseBackUp_ = regUse_;
-    }
-
-    void restoreRegUses() {
-      regUse_ = regUseBackUp_;
-    }
-
-    ///
-    /// Register handling helpers.
-    ///
-
-    /// getFreePhysReg - return a free physical register for this virtual
-    /// register interval if we have one, otherwise return 0.
-    unsigned getFreePhysReg(LiveInterval* cur);
-    unsigned getFreePhysReg(LiveInterval* cur,
-                            const TargetRegisterClass *RC,
-                            unsigned MaxInactiveCount,
-                            SmallVector<unsigned, 256> &inactiveCounts,
-                            bool SkipDGRegs);
-
-    /// getFirstNonReservedPhysReg - return the first non-reserved physical
-    /// register in the register class.
-    unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
-      ArrayRef<unsigned> O = RegClassInfo.getOrder(RC);
-      assert(!O.empty() && "All registers reserved?!");
-      return O.front();
-    }
-
-    void ComputeRelatedRegClasses();
-
-    template <typename ItTy>
-    void printIntervals(const char* const str, ItTy i, ItTy e) const {
-      DEBUG({
-          if (str)
-            dbgs() << str << " intervals:\n";
-
-          for (; i != e; ++i) {
-            dbgs() << '\t' << *i->first << " -> ";
-
-            unsigned reg = i->first->reg;
-            if (TargetRegisterInfo::isVirtualRegister(reg))
-              reg = vrm_->getPhys(reg);
-
-            dbgs() << tri_->getName(reg) << '\n';
-          }
-        });
-    }
-  };
-  char RALinScan::ID = 0;
-}
-
-INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
-                      "Linear Scan Register Allocator", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
-INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
-INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
-INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
-                    "Linear Scan Register Allocator", false, false)
-
-void RALinScan::ComputeRelatedRegClasses() {
-  // First pass, add all reg classes to the union, and determine at least one
-  // reg class that each register is in.
-  bool HasAliases = false;
-  for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
-       E = tri_->regclass_end(); RCI != E; ++RCI) {
-    RelatedRegClasses.insert(*RCI);
-    for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
-         I != E; ++I) {
-      HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
-
-      const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
-      if (PRC) {
-        // Already processed this register.  Just make sure we know that
-        // multiple register classes share a register.
-        RelatedRegClasses.unionSets(PRC, *RCI);
-      } else {
-        PRC = *RCI;
-      }
-    }
-  }
-
-  // Second pass, now that we know conservatively what register classes each reg
-  // belongs to, add info about aliases.  We don't need to do this for targets
-  // without register aliases.
-  if (HasAliases)
-    for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
-         I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
-         I != E; ++I)
-      for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
-        const TargetRegisterClass *AliasClass = 
-          OneClassForEachPhysReg.lookup(*AS);
-        if (AliasClass)
-          RelatedRegClasses.unionSets(I->second, AliasClass);
-      }
-}
-
-/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
-/// allocate the definition the same register as the source register if the
-/// register is not defined during live time of the interval. If the interval is
-/// killed by a copy, try to use the destination register. This eliminates a
-/// copy. This is used to coalesce copies which were not coalesced away before
-/// allocation either due to dest and src being in different register classes or
-/// because the coalescer was overly conservative.
-unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
-  unsigned Preference = vrm_->getRegAllocPref(cur.reg);
-  if ((Preference && Preference == Reg) || !cur.containsOneValue())
-    return Reg;
-
-  // We cannot handle complicated live ranges. Simple linear stuff only.
-  if (cur.ranges.size() != 1)
-    return Reg;
-
-  const LiveRange &range = cur.ranges.front();
-
-  VNInfo *vni = range.valno;
-  if (vni->isUnused() || !vni->def.isValid())
-    return Reg;
-
-  unsigned CandReg;
-  {
-    MachineInstr *CopyMI;
-    if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
-      // Defined by a copy, try to extend SrcReg forward
-      CandReg = CopyMI->getOperand(1).getReg();
-    else if (TrivCoalesceEnds &&
-            (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
-             CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg())
-      // Only used by a copy, try to extend DstReg backwards
-      CandReg = CopyMI->getOperand(0).getReg();
-    else
-      return Reg;
-
-    // If the target of the copy is a sub-register then don't coalesce.
-    if(CopyMI->getOperand(0).getSubReg())
-      return Reg;
-  }
-
-  if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
-    if (!vrm_->isAssignedReg(CandReg))
-      return Reg;
-    CandReg = vrm_->getPhys(CandReg);
-  }
-  if (Reg == CandReg)
-    return Reg;
-
-  const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
-  if (!RC->contains(CandReg))
-    return Reg;
-
-  if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg))
-    return Reg;
-
-  // Try to coalesce.
-  DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
-        << '\n');
-  vrm_->clearVirt(cur.reg);
-  vrm_->assignVirt2Phys(cur.reg, CandReg);
-
-  ++NumCoalesce;
-  return CandReg;
-}
-
-bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
-  mf_ = &fn;
-  mri_ = &fn.getRegInfo();
-  tm_ = &fn.getTarget();
-  tri_ = tm_->getRegisterInfo();
-  tii_ = tm_->getInstrInfo();
-  allocatableRegs_ = tri_->getAllocatableSet(fn);
-  reservedRegs_ = tri_->getReservedRegs(fn);
-  li_ = &getAnalysis<LiveIntervals>();
-  loopInfo = &getAnalysis<MachineLoopInfo>();
-  RegClassInfo.runOnMachineFunction(fn);
-
-  // We don't run the coalescer here because we have no reason to
-  // interact with it.  If the coalescer requires interaction, it
-  // won't do anything.  If it doesn't require interaction, we assume
-  // it was run as a separate pass.
-
-  // If this is the first function compiled, compute the related reg classes.
-  if (RelatedRegClasses.empty())
-    ComputeRelatedRegClasses();
-
-  // Also resize register usage trackers.
-  initRegUses();
-
-  vrm_ = &getAnalysis<VirtRegMap>();
-  if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
-
-  spiller_.reset(createSpiller(*this, *mf_, *vrm_));
-
-  initIntervalSets();
-
-  linearScan();
-
-  // Rewrite spill code and update the PhysRegsUsed set.
-  rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
-
-  // Write out new DBG_VALUE instructions.
-  getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
-
-  assert(unhandled_.empty() && "Unhandled live intervals remain!");
-
-  finalizeRegUses();
-
-  fixed_.clear();
-  active_.clear();
-  inactive_.clear();
-  handled_.clear();
-  NextReloadMap.clear();
-  DowngradedRegs.clear();
-  DowngradeMap.clear();
-  spiller_.reset(0);
-
-  return true;
-}
-
-/// initIntervalSets - initialize the interval sets.
-///
-void RALinScan::initIntervalSets()
-{
-  assert(unhandled_.empty() && fixed_.empty() &&
-         active_.empty() && inactive_.empty() &&
-         "interval sets should be empty on initialization");
-
-  handled_.reserve(li_->getNumIntervals());
-
-  for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
-    if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
-      if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) {
-        mri_->setPhysRegUsed(i->second->reg);
-        fixed_.push_back(std::make_pair(i->second, i->second->begin()));
-      }
-    } else {
-      if (i->second->empty()) {
-        assignRegOrStackSlotAtInterval(i->second);
-      }
-      else
-        unhandled_.push(i->second);
-    }
-  }
-}
-
-void RALinScan::linearScan() {
-  // linear scan algorithm
-  DEBUG({
-      dbgs() << "********** LINEAR SCAN **********\n"
-             << "********** Function: "
-             << mf_->getFunction()->getName() << '\n';
-      printIntervals("fixed", fixed_.begin(), fixed_.end());
-    });
-
-  while (!unhandled_.empty()) {
-    // pick the interval with the earliest start point
-    LiveInterval* cur = unhandled_.top();
-    unhandled_.pop();
-    ++NumIters;
-    DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n');
-
-    assert(!cur->empty() && "Empty interval in unhandled set.");
-
-    processActiveIntervals(cur->beginIndex());
-    processInactiveIntervals(cur->beginIndex());
-
-    assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
-           "Can only allocate virtual registers!");
-
-    // Allocating a virtual register. try to find a free
-    // physical register or spill an interval (possibly this one) in order to
-    // assign it one.
-    assignRegOrStackSlotAtInterval(cur);
-
-    DEBUG({
-        printIntervals("active", active_.begin(), active_.end());
-        printIntervals("inactive", inactive_.begin(), inactive_.end());
-      });
-  }
-
-  // Expire any remaining active intervals
-  while (!active_.empty()) {
-    IntervalPtr &IP = active_.back();
-    unsigned reg = IP.first->reg;
-    DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n");
-    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-           "Can only allocate virtual registers!");
-    reg = vrm_->getPhys(reg);
-    delRegUse(reg);
-    active_.pop_back();
-  }
-
-  // Expire any remaining inactive intervals
-  DEBUG({
-      for (IntervalPtrs::reverse_iterator
-             i = inactive_.rbegin(); i != inactive_.rend(); ++i)
-        dbgs() << "\tinterval " << *i->first << " expired\n";
-    });
-  inactive_.clear();
-
-  // Add live-ins to every BB except for entry. Also perform trivial coalescing.
-  MachineFunction::iterator EntryMBB = mf_->begin();
-  SmallVector<MachineBasicBlock*, 8> LiveInMBBs;
-  for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
-    LiveInterval &cur = *i->second;
-    unsigned Reg = 0;
-    bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg);
-    if (isPhys)
-      Reg = cur.reg;
-    else if (vrm_->isAssignedReg(cur.reg))
-      Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg));
-    if (!Reg)
-      continue;
-    // Ignore splited live intervals.
-    if (!isPhys && vrm_->getPreSplitReg(cur.reg))
-      continue;
-
-    for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
-         I != E; ++I) {
-      const LiveRange &LR = *I;
-      if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
-        for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
-          if (LiveInMBBs[i] != EntryMBB) {
-            assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
-                   "Adding a virtual register to livein set?");
-            LiveInMBBs[i]->addLiveIn(Reg);
-          }
-        LiveInMBBs.clear();
-      }
-    }
-  }
-
-  DEBUG(dbgs() << *vrm_);
-
-  // Look for physical registers that end up not being allocated even though
-  // register allocator had to spill other registers in its register class.
-  if (!vrm_->FindUnusedRegisters(li_))
-    return;
-}
-
-/// processActiveIntervals - expire old intervals and move non-overlapping ones
-/// to the inactive list.
-void RALinScan::processActiveIntervals(SlotIndex CurPoint)
-{
-  DEBUG(dbgs() << "\tprocessing active intervals:\n");
-
-  for (unsigned i = 0, e = active_.size(); i != e; ++i) {
-    LiveInterval *Interval = active_[i].first;
-    LiveInterval::iterator IntervalPos = active_[i].second;
-    unsigned reg = Interval->reg;
-
-    IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
-
-    if (IntervalPos == Interval->end()) {     // Remove expired intervals.
-      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
-      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-             "Can only allocate virtual registers!");
-      reg = vrm_->getPhys(reg);
-      delRegUse(reg);
-
-      // Pop off the end of the list.
-      active_[i] = active_.back();
-      active_.pop_back();
-      --i; --e;
-
-    } else if (IntervalPos->start > CurPoint) {
-      // Move inactive intervals to inactive list.
-      DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n");
-      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-             "Can only allocate virtual registers!");
-      reg = vrm_->getPhys(reg);
-      delRegUse(reg);
-      // add to inactive.
-      inactive_.push_back(std::make_pair(Interval, IntervalPos));
-
-      // Pop off the end of the list.
-      active_[i] = active_.back();
-      active_.pop_back();
-      --i; --e;
-    } else {
-      // Otherwise, just update the iterator position.
-      active_[i].second = IntervalPos;
-    }
-  }
-}
-
-/// processInactiveIntervals - expire old intervals and move overlapping
-/// ones to the active list.
-void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
-{
-  DEBUG(dbgs() << "\tprocessing inactive intervals:\n");
-
-  for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
-    LiveInterval *Interval = inactive_[i].first;
-    LiveInterval::iterator IntervalPos = inactive_[i].second;
-    unsigned reg = Interval->reg;
-
-    IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
-
-    if (IntervalPos == Interval->end()) {       // remove expired intervals.
-      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
-
-      // Pop off the end of the list.
-      inactive_[i] = inactive_.back();
-      inactive_.pop_back();
-      --i; --e;
-    } else if (IntervalPos->start <= CurPoint) {
-      // move re-activated intervals in active list
-      DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n");
-      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-             "Can only allocate virtual registers!");
-      reg = vrm_->getPhys(reg);
-      addRegUse(reg);
-      // add to active
-      active_.push_back(std::make_pair(Interval, IntervalPos));
-
-      // Pop off the end of the list.
-      inactive_[i] = inactive_.back();
-      inactive_.pop_back();
-      --i; --e;
-    } else {
-      // Otherwise, just update the iterator position.
-      inactive_[i].second = IntervalPos;
-    }
-  }
-}
-
-/// updateSpillWeights - updates the spill weights of the specifed physical
-/// register and its weight.
-void RALinScan::updateSpillWeights(std::vector<float> &Weights,
-                                   unsigned reg, float weight,
-                                   const TargetRegisterClass *RC) {
-  SmallSet<unsigned, 4> Processed;
-  SmallSet<unsigned, 4> SuperAdded;
-  SmallVector<unsigned, 4> Supers;
-  Weights[reg] += weight;
-  Processed.insert(reg);
-  for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
-    Weights[*as] += weight;
-    Processed.insert(*as);
-    if (tri_->isSubRegister(*as, reg) &&
-        SuperAdded.insert(*as) &&
-        RC->contains(*as)) {
-      Supers.push_back(*as);
-    }
-  }
-
-  // If the alias is a super-register, and the super-register is in the
-  // register class we are trying to allocate. Then add the weight to all
-  // sub-registers of the super-register even if they are not aliases.
-  // e.g. allocating for GR32, bh is not used, updating bl spill weight.
-  //      bl should get the same spill weight otherwise it will be chosen
-  //      as a spill candidate since spilling bh doesn't make ebx available.
-  for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
-    for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
-      if (!Processed.count(*sr))
-        Weights[*sr] += weight;
-  }
-}
-
-static
-RALinScan::IntervalPtrs::iterator
-FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
-  for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
-       I != E; ++I)
-    if (I->first == LI) return I;
-  return IP.end();
-}
-
-static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
-                                    SlotIndex Point){
-  for (unsigned i = 0, e = V.size(); i != e; ++i) {
-    RALinScan::IntervalPtr &IP = V[i];
-    LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
-                                                IP.second, Point);
-    if (I != IP.first->begin()) --I;
-    IP.second = I;
-  }
-}
-
-/// getConflictWeight - Return the number of conflicts between cur
-/// live interval and defs and uses of Reg weighted by loop depthes.
-static
-float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
-                        MachineRegisterInfo *mri_,
-                        MachineLoopInfo *loopInfo) {
-  float Conflicts = 0;
-  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
-         E = mri_->reg_end(); I != E; ++I) {
-    MachineInstr *MI = &*I;
-    if (cur->liveAt(li_->getInstructionIndex(MI))) {
-      unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
-      Conflicts += std::pow(10.0f, (float)loopDepth);
-    }
-  }
-  return Conflicts;
-}
-
-/// findIntervalsToSpill - Determine the intervals to spill for the
-/// specified interval. It's passed the physical registers whose spill
-/// weight is the lowest among all the registers whose live intervals
-/// conflict with the interval.
-void RALinScan::findIntervalsToSpill(LiveInterval *cur,
-                            std::vector<std::pair<unsigned,float> > &Candidates,
-                            unsigned NumCands,
-                            SmallVector<LiveInterval*, 8> &SpillIntervals) {
-  // We have figured out the *best* register to spill. But there are other
-  // registers that are pretty good as well (spill weight within 3%). Spill
-  // the one that has fewest defs and uses that conflict with cur.
-  float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
-  SmallVector<LiveInterval*, 8> SLIs[3];
-
-  DEBUG({
-      dbgs() << "\tConsidering " << NumCands << " candidates: ";
-      for (unsigned i = 0; i != NumCands; ++i)
-        dbgs() << tri_->getName(Candidates[i].first) << " ";
-      dbgs() << "\n";
-    });
-
-  // Calculate the number of conflicts of each candidate.
-  for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
-    unsigned Reg = i->first->reg;
-    unsigned PhysReg = vrm_->getPhys(Reg);
-    if (!cur->overlapsFrom(*i->first, i->second))
-      continue;
-    for (unsigned j = 0; j < NumCands; ++j) {
-      unsigned Candidate = Candidates[j].first;
-      if (tri_->regsOverlap(PhysReg, Candidate)) {
-        if (NumCands > 1)
-          Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
-        SLIs[j].push_back(i->first);
-      }
-    }
-  }
-
-  for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
-    unsigned Reg = i->first->reg;
-    unsigned PhysReg = vrm_->getPhys(Reg);
-    if (!cur->overlapsFrom(*i->first, i->second-1))
-      continue;
-    for (unsigned j = 0; j < NumCands; ++j) {
-      unsigned Candidate = Candidates[j].first;
-      if (tri_->regsOverlap(PhysReg, Candidate)) {
-        if (NumCands > 1)
-          Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
-        SLIs[j].push_back(i->first);
-      }
-    }
-  }
-
-  // Which is the best candidate?
-  unsigned BestCandidate = 0;
-  float MinConflicts = Conflicts[0];
-  for (unsigned i = 1; i != NumCands; ++i) {
-    if (Conflicts[i] < MinConflicts) {
-      BestCandidate = i;
-      MinConflicts = Conflicts[i];
-    }
-  }
-
-  std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(),
-            std::back_inserter(SpillIntervals));
-}
-
-namespace {
-  struct WeightCompare {
-  private:
-    const RALinScan &Allocator;
-
-  public:
-    WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {}
-
-    typedef std::pair<unsigned, float> RegWeightPair;
-    bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
-      return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first);
-    }
-  };
-}
-
-static bool weightsAreClose(float w1, float w2) {
-  if (!NewHeuristic)
-    return false;
-
-  float diff = w1 - w2;
-  if (diff <= 0.02f)  // Within 0.02f
-    return true;
-  return (diff / w2) <= 0.05f;  // Within 5%.
-}
-
-LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
-  DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
-  if (I == NextReloadMap.end())
-    return 0;
-  return &li_->getInterval(I->second);
-}
-
-void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
-  for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
-    bool isNew = DowngradedRegs.insert(*AS);
-    (void)isNew; // Silence compiler warning.
-    assert(isNew && "Multiple reloads holding the same register?");
-    DowngradeMap.insert(std::make_pair(li->reg, *AS));
-  }
-  ++NumDowngrade;
-}
-
-void RALinScan::UpgradeRegister(unsigned Reg) {
-  if (Reg) {
-    DowngradedRegs.erase(Reg);
-    for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
-      DowngradedRegs.erase(*AS);
-  }
-}
-
-namespace {
-  struct LISorter {
-    bool operator()(LiveInterval* A, LiveInterval* B) {
-      return A->beginIndex() < B->beginIndex();
-    }
-  };
-}
-
-/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
-/// spill.
-void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
-  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
-  DEBUG(dbgs() << "\tallocating current interval from "
-               << RC->getName() << ": ");
-
-  // This is an implicitly defined live interval, just assign any register.
-  if (cur->empty()) {
-    unsigned physReg = vrm_->getRegAllocPref(cur->reg);
-    if (!physReg)
-      physReg = getFirstNonReservedPhysReg(RC);
-    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
-    // Note the register is not really in use.
-    vrm_->assignVirt2Phys(cur->reg, physReg);
-    return;
-  }
-
-  backUpRegUses();
-
-  std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
-  SlotIndex StartPosition = cur->beginIndex();
-  const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
-  // If start of this live interval is defined by a move instruction and its
-  // source is assigned a physical register that is compatible with the target
-  // register class, then we should try to assign it the same register.
-  // This can happen when the move is from a larger register class to a smaller
-  // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
-  if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
-    VNInfo *vni = cur->begin()->valno;
-    if (!vni->isUnused() && vni->def.isValid()) {
-      MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
-      if (CopyMI && CopyMI->isCopy()) {
-        unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
-        unsigned SrcReg = CopyMI->getOperand(1).getReg();
-        unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg();
-        unsigned Reg = 0;
-        if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
-          Reg = SrcReg;
-        else if (vrm_->isAssignedReg(SrcReg))
-          Reg = vrm_->getPhys(SrcReg);
-        if (Reg) {
-          if (SrcSubReg)
-            Reg = tri_->getSubReg(Reg, SrcSubReg);
-          if (DstSubReg)
-            Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
-          if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
-            mri_->setRegAllocationHint(cur->reg, 0, Reg);
-        }
-      }
-    }
-  }
-
-  // For every interval in inactive we overlap with, mark the
-  // register as not free and update spill weights.
-  for (IntervalPtrs::const_iterator i = inactive_.begin(),
-         e = inactive_.end(); i != e; ++i) {
-    unsigned Reg = i->first->reg;
-    assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
-           "Can only allocate virtual registers!");
-    const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
-    // If this is not in a related reg class to the register we're allocating,
-    // don't check it.
-    if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
-        cur->overlapsFrom(*i->first, i->second-1)) {
-      Reg = vrm_->getPhys(Reg);
-      addRegUse(Reg);
-      SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
-    }
-  }
-
-  // Speculatively check to see if we can get a register right now.  If not,
-  // we know we won't be able to by adding more constraints.  If so, we can
-  // check to see if it is valid.  Doing an exhaustive search of the fixed_ list
-  // is very bad (it contains all callee clobbered registers for any functions
-  // with a call), so we want to avoid doing that if possible.
-  unsigned physReg = getFreePhysReg(cur);
-  unsigned BestPhysReg = physReg;
-  if (physReg) {
-    // We got a register.  However, if it's in the fixed_ list, we might
-    // conflict with it.  Check to see if we conflict with it or any of its
-    // aliases.
-    SmallSet<unsigned, 8> RegAliases;
-    for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
-      RegAliases.insert(*AS);
-
-    bool ConflictsWithFixed = false;
-    for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
-      IntervalPtr &IP = fixed_[i];
-      if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
-        // Okay, this reg is on the fixed list.  Check to see if we actually
-        // conflict.
-        LiveInterval *I = IP.first;
-        if (I->endIndex() > StartPosition) {
-          LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
-          IP.second = II;
-          if (II != I->begin() && II->start > StartPosition)
-            --II;
-          if (cur->overlapsFrom(*I, II)) {
-            ConflictsWithFixed = true;
-            break;
-          }
-        }
-      }
-    }
-
-    // Okay, the register picked by our speculative getFreePhysReg call turned
-    // out to be in use.  Actually add all of the conflicting fixed registers to
-    // regUse_ so we can do an accurate query.
-    if (ConflictsWithFixed) {
-      // For every interval in fixed we overlap with, mark the register as not
-      // free and update spill weights.
-      for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
-        IntervalPtr &IP = fixed_[i];
-        LiveInterval *I = IP.first;
-
-        const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
-        if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
-            I->endIndex() > StartPosition) {
-          LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
-          IP.second = II;
-          if (II != I->begin() && II->start > StartPosition)
-            --II;
-          if (cur->overlapsFrom(*I, II)) {
-            unsigned reg = I->reg;
-            addRegUse(reg);
-            SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
-          }
-        }
-      }
-
-      // Using the newly updated regUse_ object, which includes conflicts in the
-      // future, see if there are any registers available.
-      physReg = getFreePhysReg(cur);
-    }
-  }
-
-  // Restore the physical register tracker, removing information about the
-  // future.
-  restoreRegUses();
-
-  // If we find a free register, we are done: assign this virtual to
-  // the free physical register and add this interval to the active
-  // list.
-  if (physReg) {
-    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
-    assert(RC->contains(physReg) && "Invalid candidate");
-    vrm_->assignVirt2Phys(cur->reg, physReg);
-    addRegUse(physReg);
-    active_.push_back(std::make_pair(cur, cur->begin()));
-    handled_.push_back(cur);
-
-    // Remember physReg for avoiding a write-after-write hazard in the next
-    // instruction.
-    if (AvoidWAWHazard &&
-        tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg)))
-      avoidWAW_ = physReg;
-
-    // "Upgrade" the physical register since it has been allocated.
-    UpgradeRegister(physReg);
-    if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
-      // "Downgrade" physReg to try to keep physReg from being allocated until
-      // the next reload from the same SS is allocated.
-      mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
-      DowngradeRegister(cur, physReg);
-    }
-    return;
-  }
-  DEBUG(dbgs() << "no free registers\n");
-
-  // Compile the spill weights into an array that is better for scanning.
-  std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
-  for (std::vector<std::pair<unsigned, float> >::iterator
-       I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
-    updateSpillWeights(SpillWeights, I->first, I->second, RC);
-
-  // for each interval in active, update spill weights.
-  for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
-       i != e; ++i) {
-    unsigned reg = i->first->reg;
-    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-           "Can only allocate virtual registers!");
-    reg = vrm_->getPhys(reg);
-    updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
-  }
-
-  DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
-
-  // Find a register to spill.
-  float minWeight = HUGE_VALF;
-  unsigned minReg = 0;
-
-  bool Found = false;
-  std::vector<std::pair<unsigned,float> > RegsWeights;
-  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
-  if (!minReg || SpillWeights[minReg] == HUGE_VALF)
-    for (unsigned i = 0; i != Order.size(); ++i) {
-      unsigned reg = Order[i];
-      float regWeight = SpillWeights[reg];
-      // Skip recently allocated registers and reserved registers.
-      if (minWeight > regWeight && !isRecentlyUsed(reg))
-        Found = true;
-      RegsWeights.push_back(std::make_pair(reg, regWeight));
-    }
-
-  // If we didn't find a register that is spillable, try aliases?
-  if (!Found) {
-    for (unsigned i = 0; i != Order.size(); ++i) {
-      unsigned reg = Order[i];
-      // No need to worry about if the alias register size < regsize of RC.
-      // We are going to spill all registers that alias it anyway.
-      for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
-        RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as]));
-    }
-  }
-
-  // Sort all potential spill candidates by weight.
-  std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this));
-  minReg = RegsWeights[0].first;
-  minWeight = RegsWeights[0].second;
-  if (minWeight == HUGE_VALF) {
-    // All registers must have inf weight. Just grab one!
-    minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC);
-    if (cur->weight == HUGE_VALF ||
-        li_->getApproximateInstructionCount(*cur) == 0) {
-      // Spill a physical register around defs and uses.
-      if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
-        // spillPhysRegAroundRegDefsUses may have invalidated iterator stored
-        // in fixed_. Reset them.
-        for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
-          IntervalPtr &IP = fixed_[i];
-          LiveInterval *I = IP.first;
-          if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg))
-            IP.second = I->advanceTo(I->begin(), StartPosition);
-        }
-
-        DowngradedRegs.clear();
-        assignRegOrStackSlotAtInterval(cur);
-      } else {
-        assert(false && "Ran out of registers during register allocation!");
-        report_fatal_error("Ran out of registers during register allocation!");
-      }
-      return;
-    }
-  }
-
-  // Find up to 3 registers to consider as spill candidates.
-  unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1;
-  while (LastCandidate > 1) {
-    if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight))
-      break;
-    --LastCandidate;
-  }
-
-  DEBUG({
-      dbgs() << "\t\tregister(s) with min weight(s): ";
-
-      for (unsigned i = 0; i != LastCandidate; ++i)
-        dbgs() << tri_->getName(RegsWeights[i].first)
-               << " (" << RegsWeights[i].second << ")\n";
-    });
-
-  // If the current has the minimum weight, we need to spill it and
-  // add any added intervals back to unhandled, and restart
-  // linearscan.
-  if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
-    DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
-    SmallVector<LiveInterval*, 8> added;
-    LiveRangeEdit LRE(*cur, added);
-    spiller_->spill(LRE);
-
-    std::sort(added.begin(), added.end(), LISorter());
-    if (added.empty())
-      return;  // Early exit if all spills were folded.
-
-    // Merge added with unhandled.  Note that we have already sorted
-    // intervals returned by addIntervalsForSpills by their starting
-    // point.
-    // This also update the NextReloadMap. That is, it adds mapping from a
-    // register defined by a reload from SS to the next reload from SS in the
-    // same basic block.
-    MachineBasicBlock *LastReloadMBB = 0;
-    LiveInterval *LastReload = 0;
-    int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
-    for (unsigned i = 0, e = added.size(); i != e; ++i) {
-      LiveInterval *ReloadLi = added[i];
-      if (ReloadLi->weight == HUGE_VALF &&
-          li_->getApproximateInstructionCount(*ReloadLi) == 0) {
-        SlotIndex ReloadIdx = ReloadLi->beginIndex();
-        MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
-        int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
-        if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
-          // Last reload of same SS is in the same MBB. We want to try to
-          // allocate both reloads the same register and make sure the reg
-          // isn't clobbered in between if at all possible.
-          assert(LastReload->beginIndex() < ReloadIdx);
-          NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
-        }
-        LastReloadMBB = ReloadMBB;
-        LastReload = ReloadLi;
-        LastReloadSS = ReloadSS;
-      }
-      unhandled_.push(ReloadLi);
-    }
-    return;
-  }
-
-  ++NumBacktracks;
-
-  // Push the current interval back to unhandled since we are going
-  // to re-run at least this iteration. Since we didn't modify it it
-  // should go back right in the front of the list
-  unhandled_.push(cur);
-
-  assert(TargetRegisterInfo::isPhysicalRegister(minReg) &&
-         "did not choose a register to spill?");
-
-  // We spill all intervals aliasing the register with
-  // minimum weight, rollback to the interval with the earliest
-  // start point and let the linear scan algorithm run again
-  SmallVector<LiveInterval*, 8> spillIs;
-
-  // Determine which intervals have to be spilled.
-  findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs);
-
-  // Set of spilled vregs (used later to rollback properly)
-  SmallSet<unsigned, 8> spilled;
-
-  // The earliest start of a Spilled interval indicates up to where
-  // in handled we need to roll back
-  assert(!spillIs.empty() && "No spill intervals?");
-  SlotIndex earliestStart = spillIs[0]->beginIndex();
-
-  // Spill live intervals of virtual regs mapped to the physical register we
-  // want to clear (and its aliases).  We only spill those that overlap with the
-  // current interval as the rest do not affect its allocation. we also keep
-  // track of the earliest start of all spilled live intervals since this will
-  // mark our rollback point.
-  SmallVector<LiveInterval*, 8> added;
-  while (!spillIs.empty()) {
-    LiveInterval *sli = spillIs.back();
-    spillIs.pop_back();
-    DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
-    if (sli->beginIndex() < earliestStart)
-      earliestStart = sli->beginIndex();
-    LiveRangeEdit LRE(*sli, added, 0, &spillIs);
-    spiller_->spill(LRE);
-    spilled.insert(sli->reg);
-  }
-
-  // Include any added intervals in earliestStart.
-  for (unsigned i = 0, e = added.size(); i != e; ++i) {
-    SlotIndex SI = added[i]->beginIndex();
-    if (SI < earliestStart)
-      earliestStart = SI;
-  }
-
-  DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
-
-  // Scan handled in reverse order up to the earliest start of a
-  // spilled live interval and undo each one, restoring the state of
-  // unhandled.
-  while (!handled_.empty()) {
-    LiveInterval* i = handled_.back();
-    // If this interval starts before t we are done.
-    if (!i->empty() && i->beginIndex() < earliestStart)
-      break;
-    DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n');
-    handled_.pop_back();
-
-    // When undoing a live interval allocation we must know if it is active or
-    // inactive to properly update regUse_ and the VirtRegMap.
-    IntervalPtrs::iterator it;
-    if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
-      active_.erase(it);
-      assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
-      if (!spilled.count(i->reg))
-        unhandled_.push(i);
-      delRegUse(vrm_->getPhys(i->reg));
-      vrm_->clearVirt(i->reg);
-    } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
-      inactive_.erase(it);
-      assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
-      if (!spilled.count(i->reg))
-        unhandled_.push(i);
-      vrm_->clearVirt(i->reg);
-    } else {
-      assert(TargetRegisterInfo::isVirtualRegister(i->reg) &&
-             "Can only allocate virtual registers!");
-      vrm_->clearVirt(i->reg);
-      unhandled_.push(i);
-    }
-
-    DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
-    if (ii == DowngradeMap.end())
-      // It interval has a preference, it must be defined by a copy. Clear the
-      // preference now since the source interval allocation may have been
-      // undone as well.
-      mri_->setRegAllocationHint(i->reg, 0, 0);
-    else {
-      UpgradeRegister(ii->second);
-    }
-  }
-
-  // Rewind the iterators in the active, inactive, and fixed lists back to the
-  // point we reverted to.
-  RevertVectorIteratorsTo(active_, earliestStart);
-  RevertVectorIteratorsTo(inactive_, earliestStart);
-  RevertVectorIteratorsTo(fixed_, earliestStart);
-
-  // Scan the rest and undo each interval that expired after t and
-  // insert it in active (the next iteration of the algorithm will
-  // put it in inactive if required)
-  for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
-    LiveInterval *HI = handled_[i];
-    if (!HI->expiredAt(earliestStart) &&
-        HI->expiredAt(cur->beginIndex())) {
-      DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n');
-      active_.push_back(std::make_pair(HI, HI->begin()));
-      assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
-      addRegUse(vrm_->getPhys(HI->reg));
-    }
-  }
-
-  // Merge added with unhandled.
-  // This also update the NextReloadMap. That is, it adds mapping from a
-  // register defined by a reload from SS to the next reload from SS in the
-  // same basic block.
-  MachineBasicBlock *LastReloadMBB = 0;
-  LiveInterval *LastReload = 0;
-  int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
-  std::sort(added.begin(), added.end(), LISorter());
-  for (unsigned i = 0, e = added.size(); i != e; ++i) {
-    LiveInterval *ReloadLi = added[i];
-    if (ReloadLi->weight == HUGE_VALF &&
-        li_->getApproximateInstructionCount(*ReloadLi) == 0) {
-      SlotIndex ReloadIdx = ReloadLi->beginIndex();
-      MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
-      int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
-      if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
-        // Last reload of same SS is in the same MBB. We want to try to
-        // allocate both reloads the same register and make sure the reg
-        // isn't clobbered in between if at all possible.
-        assert(LastReload->beginIndex() < ReloadIdx);
-        NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
-      }
-      LastReloadMBB = ReloadMBB;
-      LastReload = ReloadLi;
-      LastReloadSS = ReloadSS;
-    }
-    unhandled_.push(ReloadLi);
-  }
-}
-
-unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
-                                   const TargetRegisterClass *RC,
-                                   unsigned MaxInactiveCount,
-                                   SmallVector<unsigned, 256> &inactiveCounts,
-                                   bool SkipDGRegs) {
-  unsigned FreeReg = 0;
-  unsigned FreeRegInactiveCount = 0;
-
-  std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
-  // Resolve second part of the hint (if possible) given the current allocation.
-  unsigned physReg = Hint.second;
-  if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
-    physReg = vrm_->getPhys(physReg);
-
-  ArrayRef<unsigned> Order;
-  if (Hint.first)
-    Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_);
-  else
-    Order = RegClassInfo.getOrder(RC);
-
-  assert(!Order.empty() && "No allocatable register in this register class!");
-
-  // Scan for the first available register.
-  for (unsigned i = 0; i != Order.size(); ++i) {
-    unsigned Reg = Order[i];
-    // Ignore "downgraded" registers.
-    if (SkipDGRegs && DowngradedRegs.count(Reg))
-      continue;
-    // Skip reserved registers.
-    if (reservedRegs_.test(Reg))
-      continue;
-    // Skip recently allocated registers.
-    if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) {
-      FreeReg = Reg;
-      if (FreeReg < inactiveCounts.size())
-        FreeRegInactiveCount = inactiveCounts[FreeReg];
-      else
-        FreeRegInactiveCount = 0;
-      break;
-    }
-  }
-
-  // If there are no free regs, or if this reg has the max inactive count,
-  // return this register.
-  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) {
-    // Remember what register we picked so we can skip it next time.
-    if (FreeReg != 0) recordRecentlyUsed(FreeReg);
-    return FreeReg;
-  }
-
-  // Continue scanning the registers, looking for the one with the highest
-  // inactive count.  Alkis found that this reduced register pressure very
-  // slightly on X86 (in rev 1.94 of this file), though this should probably be
-  // reevaluated now.
-  for (unsigned i = 0; i != Order.size(); ++i) {
-    unsigned Reg = Order[i];
-    // Ignore "downgraded" registers.
-    if (SkipDGRegs && DowngradedRegs.count(Reg))
-      continue;
-    // Skip reserved registers.
-    if (reservedRegs_.test(Reg))
-      continue;
-    if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
-        FreeRegInactiveCount < inactiveCounts[Reg] &&
-        (!SkipDGRegs || !isRecentlyUsed(Reg))) {
-      FreeReg = Reg;
-      FreeRegInactiveCount = inactiveCounts[Reg];
-      if (FreeRegInactiveCount == MaxInactiveCount)
-        break;    // We found the one with the max inactive count.
-    }
-  }
-
-  // Remember what register we picked so we can skip it next time.
-  recordRecentlyUsed(FreeReg);
-
-  return FreeReg;
-}
-
-/// getFreePhysReg - return a free physical register for this virtual register
-/// interval if we have one, otherwise return 0.
-unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
-  SmallVector<unsigned, 256> inactiveCounts;
-  unsigned MaxInactiveCount = 0;
-
-  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
-  const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
-  for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
-       i != e; ++i) {
-    unsigned reg = i->first->reg;
-    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
-           "Can only allocate virtual registers!");
-
-    // If this is not in a related reg class to the register we're allocating,
-    // don't check it.
-    const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
-    if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
-      reg = vrm_->getPhys(reg);
-      if (inactiveCounts.size() <= reg)
-        inactiveCounts.resize(reg+1);
-      ++inactiveCounts[reg];
-      MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
-    }
-  }
-
-  // If copy coalescer has assigned a "preferred" register, check if it's
-  // available first.
-  unsigned Preference = vrm_->getRegAllocPref(cur->reg);
-  if (Preference) {
-    DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
-    if (isRegAvail(Preference) &&
-        RC->contains(Preference))
-      return Preference;
-  }
-
-  unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
-                                    true);
-  if (FreeReg)
-    return FreeReg;
-  return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
-}
-
-FunctionPass* llvm::createLinearScanRegisterAllocator() {
-  return new RALinScan();
-}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 0d2cf2d6184c..a2846145bc7e 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -32,14 +32,17 @@
 #define DEBUG_TYPE "regalloc"
 
 #include "RenderMachineFunction.h"
-#include "Splitter.h"
+#include "Spiller.h"
 #include "VirtRegMap.h"
-#include "VirtRegRewriter.h"
 #include "RegisterCoalescer.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -54,6 +57,7 @@
 #include <limits>
 #include <memory>
 #include <set>
+#include <sstream>
 #include <vector>
 
 using namespace llvm;
@@ -67,10 +71,12 @@ pbqpCoalescing("pbqp-coalescing",
                 cl::desc("Attempt coalescing during PBQP register allocation."),
                 cl::init(false), cl::Hidden);
 
+#ifndef NDEBUG
 static cl::opt<bool>
-pbqpPreSplitting("pbqp-pre-splitting",
-                 cl::desc("Pre-split before PBQP register allocation."),
-                 cl::init(false), cl::Hidden);
+pbqpDumpGraphs("pbqp-dump-graphs",
+               cl::desc("Dump graphs for each function/round in the compilation unit."),
+               cl::init(false), cl::Hidden);
+#endif
 
 namespace {
 
@@ -88,11 +94,9 @@ public:
       : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
     initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
-    initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
     initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
     initializeLiveStacksPass(*PassRegistry::getPassRegistry());
     initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
-    initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
     initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
     initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
   }
@@ -132,6 +136,7 @@ private:
   MachineRegisterInfo *mri;
   RenderMachineFunction *rmf;
 
+  std::auto_ptr<Spiller> spiller;
   LiveIntervals *lis;
   LiveStacks *lss;
   VirtRegMap *vrm;
@@ -141,10 +146,6 @@ private:
   /// \brief Finds the initial set of vreg intervals to allocate.
   void findVRegIntervalsToAlloc();
 
-  /// \brief Adds a stack interval if the given live interval has been
-  /// spilled. Used to support stack slot coloring.
-  void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
-
   /// \brief Given a solved PBQP problem maps this solution back to a register
   /// assignment.
   bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
@@ -170,7 +171,7 @@ PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
   VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
   assert(nodeItr != vreg2Node.end() && "No node for vreg.");
   return nodeItr->second;
-  
+
 }
 
 const PBQPRAProblem::AllowedSet&
@@ -195,9 +196,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
                                                 const RegSet &vregs) {
 
   typedef std::vector<const LiveInterval*> LIVector;
-
+  ArrayRef<SlotIndex> regMaskSlots = lis->getRegMaskSlots();
   MachineRegisterInfo *mri = &mf->getRegInfo();
-  const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();  
+  const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
 
   std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
   PBQP::Graph &g = p->getGraph();
@@ -214,7 +215,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
 
   BitVector reservedRegs = tri->getReservedRegs(*mf);
 
-  // Iterate over vregs. 
+  // Iterate over vregs.
   for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
        vregItr != vregEnd; ++vregItr) {
     unsigned vreg = *vregItr;
@@ -224,7 +225,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     // Compute an initial allowed set for the current vreg.
     typedef std::vector<unsigned> VRAllowed;
     VRAllowed vrAllowed;
-    ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf);
+    ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
     for (unsigned i = 0; i != rawOrder.size(); ++i) {
       unsigned preg = rawOrder[i];
       if (!reservedRegs.test(preg)) {
@@ -232,7 +233,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
       }
     }
 
-    // Remove any physical registers which overlap.
+    RegSet overlappingPRegs;
+
+    // Record physical registers whose ranges overlap.
     for (RegSet::const_iterator pregItr = pregs.begin(),
                                 pregEnd = pregs.end();
          pregItr != pregEnd; ++pregItr) {
@@ -243,9 +246,41 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
         continue;
       }
 
-      if (!vregLI->overlaps(*pregLI)) {
-        continue;
+      if (vregLI->overlaps(*pregLI))
+        overlappingPRegs.insert(preg);      
+    }
+
+    // Record any overlaps with regmask operands.
+    BitVector regMaskOverlaps(tri->getNumRegs());
+    for (ArrayRef<SlotIndex>::iterator rmItr = regMaskSlots.begin(),
+                                       rmEnd = regMaskSlots.end();
+         rmItr != rmEnd; ++rmItr) {
+      SlotIndex rmIdx = *rmItr;
+      if (vregLI->liveAt(rmIdx)) {
+        MachineInstr *rmMI = lis->getInstructionFromIndex(rmIdx);
+        const uint32_t* regMask = 0;
+        for (MachineInstr::mop_iterator mopItr = rmMI->operands_begin(),
+                                        mopEnd = rmMI->operands_end();
+             mopItr != mopEnd; ++mopItr) {
+          if (mopItr->isRegMask()) {
+            regMask = mopItr->getRegMask();
+            break;
+          }
+        }
+        assert(regMask != 0 && "Couldn't find register mask.");
+        regMaskOverlaps.setBitsNotInMask(regMask);
       }
+    }
+
+    for (unsigned preg = 0; preg < tri->getNumRegs(); ++preg) {
+      if (regMaskOverlaps.test(preg))
+        overlappingPRegs.insert(preg);
+    }
+
+    for (RegSet::const_iterator pregItr = overlappingPRegs.begin(),
+                                pregEnd = overlappingPRegs.end();
+         pregItr != pregEnd; ++pregItr) {
+      unsigned preg = *pregItr;
 
       // Remove the register from the allowed set.
       VRAllowed::iterator eraseItr =
@@ -256,7 +291,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
       }
 
       // Also remove any aliases.
-      const unsigned *aliasItr = tri->getAliasSet(preg);
+      const uint16_t *aliasItr = tri->getAliasSet(preg);
       if (aliasItr != 0) {
         for (; *aliasItr != 0; ++aliasItr) {
           VRAllowed::iterator eraseItr =
@@ -270,7 +305,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     }
 
     // Construct the node.
-    PBQP::Graph::NodeItr node = 
+    PBQP::Graph::NodeItr node =
       g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
 
     // Record the mapping and allowed set in the problem.
@@ -371,7 +406,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
 
       const float copyFactor = 0.5; // Cost of copy relative to load. Current
       // value plucked randomly out of the air.
-                                      
+
       PBQP::PBQPNum cBenefit =
         copyFactor * LiveIntervals::getSpillWeight(false, true,
                                                    loopInfo->getLoopDepth(mbb));
@@ -382,7 +417,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
         }
 
         const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
-        unsigned pregOpt = 0;  
+        unsigned pregOpt = 0;
         while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
           ++pregOpt;
         }
@@ -407,7 +442,7 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
             std::swap(allowed1, allowed2);
           }
         }
-            
+
         addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
                            cBenefit);
       }
@@ -439,27 +474,29 @@ void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
 
       if (preg1 == preg2) {
         costMat[i + 1][j + 1] += -benefit;
-      } 
+      }
     }
   }
 }
 
 
 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+  au.setPreservesCFG();
+  au.addRequired<AliasAnalysis>();
+  au.addPreserved<AliasAnalysis>();
   au.addRequired<SlotIndexes>();
   au.addPreserved<SlotIndexes>();
   au.addRequired<LiveIntervals>();
   //au.addRequiredID(SplitCriticalEdgesID);
-  au.addRequiredID(RegisterCoalescerPassID);
   if (customPassID)
     au.addRequiredID(*customPassID);
   au.addRequired<CalculateSpillWeights>();
   au.addRequired<LiveStacks>();
   au.addPreserved<LiveStacks>();
+  au.addRequired<MachineDominatorTree>();
+  au.addPreserved<MachineDominatorTree>();
   au.addRequired<MachineLoopInfo>();
   au.addPreserved<MachineLoopInfo>();
-  if (pbqpPreSplitting)
-    au.addRequired<LoopSplitter>();
   au.addRequired<VirtRegMap>();
   au.addRequired<RenderMachineFunction>();
   MachineFunctionPass::getAnalysisUsage(au);
@@ -488,29 +525,6 @@ void RegAllocPBQP::findVRegIntervalsToAlloc() {
   }
 }
 
-void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
-                                    MachineRegisterInfo* mri) {
-  int stackSlot = vrm->getStackSlot(spilled->reg);
-
-  if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
-    return;
-  }
-
-  const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
-  LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
-
-  VNInfo *vni;
-  if (stackInterval.getNumValNums() != 0) {
-    vni = stackInterval.getValNumInfo(0);
-  } else {
-    vni = stackInterval.getNextValue(
-      SlotIndex(), 0, lss->getVNInfoAllocator());
-  }
-
-  LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
-  stackInterval.MergeRangesInAsValue(rhsInterval, vni);
-}
-
 bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
                                      const PBQP::Solution &solution) {
   // Set to true if we have any spills
@@ -529,28 +543,22 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
     unsigned alloc = solution.getSelection(node);
 
     if (problem.isPRegOption(vreg, alloc)) {
-      unsigned preg = problem.getPRegForOption(vreg, alloc);    
+      unsigned preg = problem.getPRegForOption(vreg, alloc);
       DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
       assert(preg != 0 && "Invalid preg selected.");
-      vrm->assignVirt2Phys(vreg, preg);      
+      vrm->assignVirt2Phys(vreg, preg);
     } else if (problem.isSpillOption(vreg, alloc)) {
       vregsToAlloc.erase(vreg);
-      const LiveInterval* spillInterval = &lis->getInterval(vreg);
-      double oldWeight = spillInterval->weight;
-      rmf->rememberUseDefs(spillInterval);
-      std::vector<LiveInterval*> newSpills =
-        lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm);
-      addStackInterval(spillInterval, mri);
-      rmf->rememberSpills(spillInterval, newSpills);
-
-      (void) oldWeight;
+      SmallVector<LiveInterval*, 8> newSpills;
+      LiveRangeEdit LRE(lis->getInterval(vreg), newSpills, *mf, *lis, vrm);
+      spiller->spill(LRE);
+
       DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
-                   << oldWeight << ", New vregs: ");
+                   << LRE.getParent().weight << ", New vregs: ");
 
       // Copy any newly inserted live intervals into the list of regs to
       // allocate.
-      for (std::vector<LiveInterval*>::const_iterator
-           itr = newSpills.begin(), end = newSpills.end();
+      for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end();
            itr != end; ++itr) {
         assert(!(*itr)->empty() && "Empty spill range.");
         DEBUG(dbgs() << (*itr)->reg << " ");
@@ -560,9 +568,9 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
       DEBUG(dbgs() << ")\n");
 
       // We need another round if spill intervals were added.
-      anotherRoundNeeded |= !newSpills.empty();
+      anotherRoundNeeded |= !LRE.empty();
     } else {
-      assert(false && "Unknown allocation option.");
+      llvm_unreachable("Unknown allocation option.");
     }
   }
 
@@ -642,7 +650,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   tm = &mf->getTarget();
   tri = tm->getRegisterInfo();
   tii = tm->getInstrInfo();
-  mri = &mf->getRegInfo(); 
+  mri = &mf->getRegInfo();
 
   lis = &getAnalysis<LiveIntervals>();
   lss = &getAnalysis<LiveStacks>();
@@ -650,7 +658,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   rmf = &getAnalysis<RenderMachineFunction>();
 
   vrm = &getAnalysis<VirtRegMap>();
+  spiller.reset(createInlineSpiller(*this, MF, *vrm));
 
+  mri->freezeReservedRegs(MF);
 
   DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
 
@@ -666,6 +676,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   // Find the vreg intervals in need of allocation.
   findVRegIntervalsToAlloc();
 
+  const Function* func = mf->getFunction();
+  std::string fqn =
+    func->getParent()->getModuleIdentifier() + "." +
+    func->getName().str();
+  (void)fqn;
+
   // If there are non-empty intervals allocate them using pbqp.
   if (!vregsToAlloc.empty()) {
 
@@ -677,6 +693,20 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 
       std::auto_ptr<PBQPRAProblem> problem =
         builder->build(mf, lis, loopInfo, vregsToAlloc);
+
+#ifndef NDEBUG
+      if (pbqpDumpGraphs) {
+        std::ostringstream rs;
+        rs << round;
+        std::string graphFileName(fqn + "." + rs.str() + ".pbqpgraph");
+        std::string tmp;
+        raw_fd_ostream os(graphFileName.c_str(), tmp);
+        DEBUG(dbgs() << "Dumping graph for round " << round << " to \""
+              << graphFileName << "\"\n");
+        problem->getGraph().dump(os);
+      }
+#endif
+
       PBQP::Solution solution =
         PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
           problem->getGraph());
@@ -698,9 +728,12 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
 
   // Run rewriter
-  std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+  vrm->rewrite(lis->getSlotIndexes());
 
-  rewriter->runOnMachineFunction(*mf, *vrm, lis);
+  // All machine operands and other references to virtual registers have been
+  // replaced. Remove the virtual registers.
+  vrm->clearAllVirt();
+  mri->clearVirtRegs();
 
   return true;
 }
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 786d279c2b8c..17165fa72665 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -18,12 +18,16 @@
 #include "RegisterClassInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Target/TargetMachine.h"
-
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+static cl::opt<unsigned>
+StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
+         cl::desc("Limit all regclasses to N registers"));
+
 RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
 {}
 
@@ -39,14 +43,14 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   }
 
   // Does this MF have different CSRs?
-  const unsigned *CSR = TRI->getCalleeSavedRegs(MF);
+  const uint16_t *CSR = TRI->getCalleeSavedRegs(MF);
   if (Update || CSR != CalleeSaved) {
     // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
     // overlapping CSR.
     CSRNum.clear();
     CSRNum.resize(TRI->getNumRegs(), 0);
     for (unsigned N = 0; unsigned Reg = CSR[N]; ++N)
-      for (const unsigned *AS = TRI->getOverlaps(Reg);
+      for (const uint16_t *AS = TRI->getOverlaps(Reg);
            unsigned Alias = *AS; ++AS)
         CSRNum[Alias] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ...
     Update = true;
@@ -81,7 +85,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
 
   // FIXME: Once targets reserve registers instead of removing them from the
   // allocation order, we can simply use begin/end here.
-  ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF);
+  ArrayRef<uint16_t> RawOrder = RC->getRawAllocationOrder(*MF);
   for (unsigned i = 0; i != RawOrder.size(); ++i) {
     unsigned PhysReg = RawOrder[i];
     // Remove reserved registers from the allocation order.
@@ -99,6 +103,10 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
   // CSR aliases go after the volatile registers, preserve the target's order.
   std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]);
 
+  // Register allocator stress test.  Clip register class to N registers.
+  if (StressRA && RCI.NumRegs > StressRA)
+    RCI.NumRegs = StressRA;
+
   // Check if RC is a proper sub-class.
   if (const TargetRegisterClass *Super = TRI->getLargestLegalSuperClass(RC))
     if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs)
diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h
index 2c1407096cd7..400e1f48ce54 100644
--- a/lib/CodeGen/RegisterClassInfo.h
+++ b/lib/CodeGen/RegisterClassInfo.h
@@ -49,7 +49,7 @@ class RegisterClassInfo {
 
   // Callee saved registers of last MF. Assumed to be valid until the next
   // runOnFunction() call.
-  const unsigned *CalleeSaved;
+  const uint16_t *CalleeSaved;
 
   // Map register number to CalleeSaved index + 1;
   SmallVector<uint8_t, 4> CSRNum;
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 9b414d6212c7..75f88cafdf01 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -13,7 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "regcoalescing"
+#define DEBUG_TYPE "regalloc"
 #include "RegisterCoalescer.h"
 #include "LiveDebugVariables.h"
 #include "RegisterClassInfo.h"
@@ -169,10 +169,6 @@ namespace {
     /// it as well.
     bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
 
-    /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
-    /// VNInfo copy flag for DstReg and all aliases.
-    void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
-
     /// markAsJoined - Remember that CopyMI has already been joined.
     void markAsJoined(MachineInstr *CopyMI);
 
@@ -197,7 +193,7 @@ namespace {
   };
 } /// end anonymous namespace
 
-char &llvm::RegisterCoalescerPassID = RegisterCoalescer::ID;
+char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
 
 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
                       "Simple Register Coalescing", false, false)
@@ -205,9 +201,6 @@ INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
-INITIALIZE_PASS_DEPENDENCY(PHIElimination)
-INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
                     "Simple Register Coalescing", false, false)
@@ -379,9 +372,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
   AU.addPreservedID(MachineDominatorsID);
-  AU.addPreservedID(StrongPHIEliminationID);
-  AU.addPreservedID(PHIEliminationID);
-  AU.addPreservedID(TwoAddressInstructionPassID);
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -423,7 +413,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
     LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
   LiveInterval &IntB =
     LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
-  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
 
   // BValNo is a value number in B that is defined by a copy from A.  'B3' in
   // the example above.
@@ -434,40 +424,19 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // Get the location that B is defined at.  Two options: either this value has
   // an unknown definition point or it is defined at CopyIdx.  If unknown, we
   // can't process it.
-  if (!BValNo->isDefByCopy()) return false;
-  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+  if (BValNo->def != CopyIdx) return false;
 
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+  SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
   LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
   // The live range might not exist after fun with physreg coalescing.
   if (ALR == IntA.end()) return false;
   VNInfo *AValNo = ALR->valno;
-  // If it's re-defined by an early clobber somewhere in the live range, then
-  // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
-  // See PR3149:
-  // 172     %ECX<def> = MOV32rr %reg1039<kill>
-  // 180     INLINEASM <es:subl $5,$1
-  //         sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
-  //         %EAX<kill>,
-  // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
-  // 188     %EAX<def> = MOV32rr %EAX<kill>
-  // 196     %ECX<def> = MOV32rr %ECX<kill>
-  // 204     %ECX<def> = MOV32rr %ECX<kill>
-  // 212     %EAX<def> = MOV32rr %EAX<kill>
-  // 220     %EAX<def> = MOV32rr %EAX
-  // 228     %reg1039<def> = MOV32rr %ECX<kill>
-  // The early clobber operand ties ECX input to the ECX def.
-  //
-  // The live interval of ECX is represented as this:
-  // %reg20,inf = [46,47:1)[174,230:0)  0@174-(230) 1@46-(47)
-  // The coalescer has no idea there was a def in the middle of [174,230].
-  if (AValNo->hasRedefByEC())
-    return false;
 
   // If AValNo is defined as a copy from IntB, we can potentially process this.
   // Get the instruction that defines this value number.
-  if (!CP.isCoalescable(AValNo->getCopy()))
+  MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
+  if (!CP.isCoalescable(ACopyMI))
     return false;
 
   // Get the LiveRange in IntB that this value number starts with.
@@ -492,7 +461,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // of its aliases is overlapping the live interval of the virtual register.
   // If so, do not coalesce.
   if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
-    for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+    for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
       if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) {
         DEBUG({
             dbgs() << "\t\tInterfere with alias ";
@@ -511,8 +480,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // We are about to delete CopyMI, so need to remove it as the 'instruction
   // that defines this value #'. Update the valnum with the new defining
   // instruction #.
-  BValNo->def  = FillerStart;
-  BValNo->setCopy(0);
+  BValNo->def = FillerStart;
 
   // Okay, we can merge them.  We need to insert a new liverange:
   // [ValLR.end, BLR.begin) of either value number, then we merge the
@@ -522,12 +490,12 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
   // If the IntB live range is assigned to a physical register, and if that
   // physreg has sub-registers, update their live intervals as well.
   if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
-    for (const unsigned *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
+    for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) {
       if (!LIS->hasInterval(*SR))
         continue;
       LiveInterval &SRLI = LIS->getInterval(*SR);
       SRLI.addRange(LiveRange(FillerStart, FillerEnd,
-                              SRLI.getNextValue(FillerStart, 0,
+                              SRLI.getNextValue(FillerStart,
                                                 LIS->getVNInfoAllocator())));
     }
   }
@@ -554,9 +522,11 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
     ValLREndInst->getOperand(UIdx).setIsKill(false);
   }
 
-  // If the copy instruction was killing the destination register before the
-  // merge, find the last use and trim the live range. That will also add the
-  // isKill marker.
+  // Rewrite the copy. If the copy instruction was killing the destination
+  // register before the merge, find the last use and trim the live range. That
+  // will also add the isKill marker.
+  CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(),
+                             *TRI);
   if (ALR->end == CopyIdx)
     LIS->shrinkToUses(&IntA);
 
@@ -625,7 +595,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   if (!LIS->hasInterval(CP.getDstReg()))
     return false;
 
-  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
+  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
 
   LiveInterval &IntA =
     LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
@@ -635,13 +605,13 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   // BValNo is a value number in B that is defined by a copy from A. 'B3' in
   // the example above.
   VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
-  if (!BValNo || !BValNo->isDefByCopy())
+  if (!BValNo || BValNo->def != CopyIdx)
     return false;
 
   assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
 
   // AValNo is the value number in A that defines the copy, A3 in the example.
-  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
   assert(AValNo && "COPY source not live");
 
   // If other defs can reach uses of this def, then it's not safe to perform
@@ -651,8 +621,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
   if (!DefMI)
     return false;
-  const MCInstrDesc &MCID = DefMI->getDesc();
-  if (!MCID.isCommutable())
+  if (!DefMI->isCommutable())
     return false;
   // If DefMI is a two-address instruction then commuting it will change the
   // destination register.
@@ -684,7 +653,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   // Abort if the aliases of IntB.reg have values that are not simply the
   // clobbers from the superreg.
   if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
-    for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
+    for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS)
       if (LIS->hasInterval(*AS) &&
           HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0))
         return false;
@@ -718,7 +687,8 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
     return false;
   if (NewMI != DefMI) {
     LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
-    MBB->insert(DefMI, NewMI);
+    MachineBasicBlock::iterator Pos = DefMI;
+    MBB->insert(Pos, NewMI);
     MBB->erase(DefMI);
   }
   unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
@@ -747,7 +717,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
       UseMO.setReg(NewReg);
       continue;
     }
-    SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex();
+    SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true);
     LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
     if (ULR == IntA.end() || ULR->valno != AValNo)
       continue;
@@ -765,7 +735,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
 
     // This copy will become a noop. If it's defining a new val#, merge it into
     // BValNo.
-    SlotIndex DefIdx = UseIdx.getDefIndex();
+    SlotIndex DefIdx = UseIdx.getRegSlot();
     VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
     if (!DVNI)
       continue;
@@ -779,7 +749,6 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
   // is updated.
   VNInfo *ValNo = BValNo;
   ValNo->def = AValNo->def;
-  ValNo->setCopy(0);
   for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
        AI != AE; ++AI) {
     if (AI->valno != AValNo) continue;
@@ -799,7 +768,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
                                                        bool preserveSrcInt,
                                                        unsigned DstReg,
                                                        MachineInstr *CopyMI) {
-  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex();
+  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
   LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
   assert(SrcLR != SrcInt.end() && "Live range not found!");
   VNInfo *ValNo = SrcLR->valno;
@@ -809,14 +778,14 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
   if (!DefMI)
     return false;
   assert(DefMI && "Defining instruction disappeared");
-  const MCInstrDesc &MCID = DefMI->getDesc();
-  if (!MCID.isAsCheapAsAMove())
+  if (!DefMI->isAsCheapAsAMove())
     return false;
   if (!TII->isTriviallyReMaterializable(DefMI, AA))
     return false;
   bool SawStore = false;
   if (!DefMI->isSafeToMove(TII, AA, SawStore))
     return false;
+  const MCInstrDesc &MCID = DefMI->getDesc();
   if (MCID.getNumDefs() != 1)
     return false;
   if (!DefMI->isImplicitDef()) {
@@ -831,27 +800,52 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
       return false;
   }
 
-  RemoveCopyFlag(DstReg, CopyMI);
-
   MachineBasicBlock *MBB = CopyMI->getParent();
   MachineBasicBlock::iterator MII =
     llvm::next(MachineBasicBlock::iterator(CopyMI));
   TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
   MachineInstr *NewMI = prior(MII);
 
+  // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
+  // We need to remember these so we can add intervals once we insert
+  // NewMI into SlotIndexes.
+  SmallVector<unsigned, 4> NewMIImplDefs;
+  for (unsigned i = NewMI->getDesc().getNumOperands(),
+         e = NewMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = NewMI->getOperand(i);
+    if (MO.isReg()) {
+      assert(MO.isDef() && MO.isImplicit() && MO.isDead() &&
+             TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
+      NewMIImplDefs.push_back(MO.getReg());
+    }
+  }
+
   // CopyMI may have implicit operands, transfer them over to the newly
   // rematerialized instruction. And update implicit def interval valnos.
   for (unsigned i = CopyMI->getDesc().getNumOperands(),
          e = CopyMI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = CopyMI->getOperand(i);
-    if (MO.isReg() && MO.isImplicit())
-      NewMI->addOperand(MO);
-    if (MO.isDef())
-      RemoveCopyFlag(MO.getReg(), CopyMI);
+    if (MO.isReg()) {
+      assert(MO.isImplicit() && "No explicit operands after implict operands.");
+      // Discard VReg implicit defs.
+      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+        NewMI->addOperand(MO);
+      }
+    }
   }
 
-  NewMI->copyImplicitOps(CopyMI);
   LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+
+  SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+  for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
+    unsigned reg = NewMIImplDefs[i];
+    LiveInterval &li = LIS->getInterval(reg);
+    VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(),
+                                        LIS->getVNInfoAllocator());
+    LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN);
+    li.addRange(lr);
+  }
+
   CopyMI->eraseFromParent();
   ReMatCopies.insert(CopyMI);
   ReMatDefs.insert(DefMI);
@@ -887,7 +881,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
     DstInt = SrcInt;
   SrcInt = 0;
 
-  VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex());
+  VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot());
   assert(DeadVNI && "No value defined in DstInt");
   DstInt->removeValNo(DeadVNI);
 
@@ -941,13 +935,10 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
     SmallVector<unsigned,8> Ops;
     bool Reads, Writes;
     tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
-    bool Kills = false, Deads = false;
 
     // Replace SrcReg with DstReg in all UseMI operands.
     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
       MachineOperand &MO = UseMI->getOperand(Ops[i]);
-      Kills |= MO.isKill();
-      Deads |= MO.isDead();
 
       // Make sure we don't create read-modify-write defs accidentally.  We
       // assume here that a SrcReg def cannot be joined into a live DstReg.  If
@@ -967,19 +958,6 @@ RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
     if (JoinedCopies.count(UseMI))
       continue;
 
-    if (SubIdx) {
-      // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
-      // read-modify-write of DstReg.
-      if (Deads)
-        UseMI->addRegisterDead(DstReg, TRI);
-      else if (!Reads && Writes)
-        UseMI->addRegisterDefined(DstReg, TRI);
-
-      // Kill flags apply to the whole physical register.
-      if (DstIsPhys && Kills)
-        UseMI->addRegisterKilled(DstReg, TRI);
-    }
-
     DEBUG({
         dbgs() << "\t\tupdated: ";
         if (!UseMI->isDebugValue())
@@ -996,7 +974,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
                                   const TargetRegisterInfo *TRI) {
   if (li.empty()) {
     if (TargetRegisterInfo::isPhysicalRegister(li.reg))
-      for (const unsigned* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
+      for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) {
         if (!LIS->hasInterval(*SR))
           continue;
         LiveInterval &sli = LIS->getInterval(*SR);
@@ -1013,7 +991,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS,
 /// the val# it defines. If the live interval becomes empty, remove it as well.
 bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
                                              MachineInstr *DefMI) {
-  SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex();
+  SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot();
   LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
   if (DefIdx != MLR->valno->def)
     return false;
@@ -1021,27 +999,6 @@ bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
   return removeIntervalIfEmpty(li, LIS, TRI);
 }
 
-void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
-                                              const MachineInstr *CopyMI) {
-  SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex();
-  if (LIS->hasInterval(DstReg)) {
-    LiveInterval &LI = LIS->getInterval(DstReg);
-    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->def == DefIdx)
-        LR->valno->setCopy(0);
-  }
-  if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
-    return;
-  for (const unsigned* AS = TRI->getAliasSet(DstReg); *AS; ++AS) {
-    if (!LIS->hasInterval(*AS))
-      continue;
-    LiveInterval &LI = LIS->getInterval(*AS);
-    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->def == DefIdx)
-        LR->valno->setCopy(0);
-  }
-}
-
 /// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
 /// We need to be careful about coalescing a source physical register with a
 /// virtual register. Once the coalescing is done, it cannot be broken and these
@@ -1279,7 +1236,7 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
     }
   }
 
-  // SrcReg is guarateed to be the register whose live interval that is
+  // SrcReg is guaranteed to be the register whose live interval that is
   // being merged.
   LIS->removeInterval(CP.getSrcReg());
 
@@ -1368,9 +1325,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
   // FIXME: This is very conservative. For example, we don't handle
   // physical registers.
 
-  MachineInstr *MI = VNI->getCopy();
+  MachineInstr *MI = li.getInstructionFromIndex(VNI->def);
 
-  if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+  if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys())
     return false;
 
   unsigned Dst = MI->getOperand(0).getReg();
@@ -1388,11 +1345,9 @@ static bool RegistersDefinedFromSameValue(LiveIntervals &li,
   assert(Dst == A);
 
   VNInfo *Other = LR->valno;
-  if (!Other->isDefByCopy())
-    return false;
-  const MachineInstr *OtherMI = Other->getCopy();
+  const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def);
 
-  if (!OtherMI->isFullCopy())
+  if (!OtherMI || !OtherMI->isFullCopy())
     return false;
 
   unsigned OtherDst = OtherMI->getOperand(0).getReg();
@@ -1431,7 +1386,44 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
   // than the full interfeence check below. We allow overlapping live ranges
   // only when one is a copy of the other.
   if (CP.isPhys()) {
-    for (const unsigned *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
+    // Optimization for reserved registers like ESP.
+    // We can only merge with a reserved physreg if RHS has a single value that
+    // is a copy of CP.DstReg().  The live range of the reserved register will
+    // look like a set of dead defs - we don't properly track the live range of
+    // reserved registers.
+    if (RegClassInfo.isReserved(CP.getDstReg())) {
+      assert(CP.isFlipped() && RHS.containsOneValue() &&
+             "Invalid join with reserved register");
+      // Deny any overlapping intervals.  This depends on all the reserved
+      // register live ranges to look like dead defs.
+      for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) {
+        if (!LIS->hasInterval(*AS)) {
+          // Make sure at least DstReg itself exists before attempting a join.
+          if (*AS == CP.getDstReg())
+            LIS->getOrCreateInterval(CP.getDstReg());
+          continue;
+        }
+        if (RHS.overlaps(LIS->getInterval(*AS))) {
+          DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n');
+          return false;
+        }
+      }
+      // Skip any value computations, we are not adding new values to the
+      // reserved register.  Also skip merging the live ranges, the reserved
+      // register live range doesn't need to be accurate as long as all the
+      // defs are there.
+      return true;
+    }
+
+    // Check if a register mask clobbers DstReg.
+    BitVector UsableRegs;
+    if (LIS->checkRegMaskInterference(RHS, UsableRegs) &&
+        !UsableRegs.test(CP.getDstReg())) {
+      DEBUG(dbgs() << "\t\tRegister mask interference.\n");
+      return false;
+    }
+
+    for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){
       if (!LIS->hasInterval(*AS))
         continue;
       const LiveInterval &LHS = LIS->getInterval(*AS);
@@ -1485,12 +1477,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
   for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
        i != e; ++i) {
     VNInfo *VNI = *i;
-    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+    if (VNI->isUnused() || VNI->isPHIDef())
+      continue;
+    MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
+    assert(MI && "Missing def");
+    if (!MI->isCopyLike())  // Src not defined by a copy?
       continue;
-
-    // Never join with a register that has EarlyClobber redefs.
-    if (VNI->hasRedefByEC())
-      return false;
 
     // Figure out the value # from the RHS.
     LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
@@ -1499,7 +1491,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
 
     // DstReg is known to be a register in the LHS interval.  If the src is
     // from the RHS interval, we can use its value #.
-    MachineInstr *MI = VNI->getCopy();
     if (!CP.isCoalescable(MI) &&
         !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
       continue;
@@ -1512,12 +1503,12 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
   for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
        i != e; ++i) {
     VNInfo *VNI = *i;
-    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+    if (VNI->isUnused() || VNI->isPHIDef())
+      continue;
+    MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def);
+    assert(MI && "Missing def");
+    if (!MI->isCopyLike())  // Src not defined by a copy?
       continue;
-
-    // Never join with a register that has EarlyClobber redefs.
-    if (VNI->hasRedefByEC())
-      return false;
 
     // Figure out the value # from the LHS.
     LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
@@ -1526,7 +1517,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
 
     // DstReg is known to be a register in the RHS interval.  If the src is
     // from the LHS interval, we can use its value #.
-    MachineInstr *MI = VNI->getCopy();
     if (!CP.isCoalescable(MI) &&
         !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies))
         continue;
@@ -1600,10 +1590,6 @@ bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
       if (LHSValNoAssignments[I->valno->id] !=
           RHSValNoAssignments[J->valno->id])
         return false;
-      // If it's re-defined by an early clobber somewhere in the live range,
-      // then conservatively abort coalescing.
-      if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
-        return false;
     }
 
     if (I->end < J->end)
@@ -1905,8 +1891,8 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
           unsigned Reg = MO.getReg();
           if (!Reg)
             continue;
+          DeadDefs.push_back(Reg);
           if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-            DeadDefs.push_back(Reg);
             // Remat may also enable register class inflation.
             if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
               InflateRegs.push_back(Reg);
@@ -1936,7 +1922,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
 
       // Check for now unnecessary kill flags.
       if (LIS->isNotInMIMap(MI)) continue;
-      SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex();
+      SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot();
       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
         MachineOperand &MO = MI->getOperand(i);
         if (!MO.isReg() || !MO.isKill()) continue;
@@ -1950,7 +1936,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
         // remain alive.
         if (!TargetRegisterInfo::isPhysicalRegister(reg))
           continue;
-        for (const unsigned *SR = TRI->getSubRegisters(reg);
+        for (const uint16_t *SR = TRI->getSubRegisters(reg);
              unsigned S = *SR; ++SR)
           if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx))
             MI->addRegisterDefined(S, TRI);
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 472c48377fef..310b933cab9b 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,4 +1,4 @@
-//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
+//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the abstract interface for register coalescers, 
+// This file contains the abstract interface for register coalescers,
 // allowing them to interact with and query register allocators.
 //
 //===----------------------------------------------------------------------===//
@@ -47,7 +47,7 @@ namespace llvm {
     /// CrossClass - True when both regs are virtual, and newRC is constrained.
     bool CrossClass;
 
-    /// Flipped - True when DstReg and SrcReg are reversed from the oriignal
+    /// Flipped - True when DstReg and SrcReg are reversed from the original
     /// copy instruction.
     bool Flipped;
 
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index ca02aa1b8143..03bd82e225dc 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
 void RegScavenger::setUsed(unsigned Reg) {
   RegsAvailable.reset(Reg);
 
-  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+  for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
        unsigned SubReg = *SubRegs; ++SubRegs)
     RegsAvailable.reset(SubReg);
 }
@@ -45,7 +45,7 @@ void RegScavenger::setUsed(unsigned Reg) {
 bool RegScavenger::isAliasUsed(unsigned Reg) const {
   if (isUsed(Reg))
     return true;
-  for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R)
+  for (const uint16_t *R = TRI->getAliasSet(Reg); *R; ++R)
     if (isUsed(*R))
       return true;
   return false;
@@ -59,9 +59,6 @@ void RegScavenger::initRegState() {
   // All registers started out unused.
   RegsAvailable.set();
 
-  // Reserved registers are always used.
-  RegsAvailable ^= ReservedRegs;
-
   if (!MBB)
     return;
 
@@ -86,17 +83,24 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
   assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
          "Target changed?");
 
+  // It is not possible to use the register scavenger after late optimization
+  // passes that don't preserve accurate liveness information.
+  assert(MRI->tracksLiveness() &&
+         "Cannot use register scavenger with inaccurate liveness");
+
   // Self-initialize.
   if (!MBB) {
     NumPhysRegs = TRI->getNumRegs();
     RegsAvailable.resize(NumPhysRegs);
+    KillRegs.resize(NumPhysRegs);
+    DefRegs.resize(NumPhysRegs);
 
     // Create reserved registers bitvector.
     ReservedRegs = TRI->getReservedRegs(MF);
 
     // Create callee-saved registers bitvector.
     CalleeSavedRegs.resize(NumPhysRegs);
-    const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+    const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
     if (CSRegs != NULL)
       for (unsigned i = 0; CSRegs[i]; ++i)
         CalleeSavedRegs.set(CSRegs[i]);
@@ -110,13 +114,7 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
 
 void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
   BV.set(Reg);
-  for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
-    BV.set(*R);
-}
-
-void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) {
-  BV.set(Reg);
-  for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++)
+  for (const uint16_t *R = TRI->getSubRegisters(Reg); *R; R++)
     BV.set(*R);
 }
 
@@ -148,12 +146,12 @@ void RegScavenger::forward() {
   // predicated, conservatively assume "kill" markers do not actually kill the
   // register. Similarly ignores "dead" markers.
   bool isPred = TII->isPredicated(MI);
-  BitVector EarlyClobberRegs(NumPhysRegs);
-  BitVector KillRegs(NumPhysRegs);
-  BitVector DefRegs(NumPhysRegs);
-  BitVector DeadRegs(NumPhysRegs);
+  KillRegs.reset();
+  DefRegs.reset();
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isRegMask())
+      (isPred ? DefRegs : KillRegs).setBitsNotInMask(MO.getRegMask());
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
@@ -164,21 +162,19 @@ void RegScavenger::forward() {
       // Ignore undef uses.
       if (MO.isUndef())
         continue;
-      // Two-address operands implicitly kill.
-      if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
+      if (!isPred && MO.isKill())
         addRegWithSubRegs(KillRegs, Reg);
     } else {
       assert(MO.isDef());
       if (!isPred && MO.isDead())
-        addRegWithSubRegs(DeadRegs, Reg);
+        addRegWithSubRegs(KillRegs, Reg);
       else
         addRegWithSubRegs(DefRegs, Reg);
-      if (MO.isEarlyClobber())
-        addRegWithAliases(EarlyClobberRegs, Reg);
     }
   }
 
   // Verify uses and defs.
+#ifndef NDEBUG
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg())
@@ -199,17 +195,18 @@ void RegScavenger::forward() {
         // Ideally we would like a way to model this, but leaving the
         // insert_subreg around causes both correctness and performance issues.
         bool SubUsed = false;
-        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+        for (const uint16_t *SubRegs = TRI->getSubRegisters(Reg);
              unsigned SubReg = *SubRegs; ++SubRegs)
           if (isUsed(SubReg)) {
             SubUsed = true;
             break;
           }
-        assert(SubUsed && "Using an undefined register!");
+        if (!SubUsed) {
+          MBB->getParent()->verify(NULL, "In Register Scavenger");
+          llvm_unreachable("Using an undefined register!");
+        }
         (void)SubUsed;
       }
-      assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
-             "Using an early clobbered register!");
     } else {
       assert(MO.isDef());
 #if 0
@@ -221,18 +218,20 @@ void RegScavenger::forward() {
 #endif
     }
   }
+#endif // NDEBUG
 
   // Commit the changes.
   setUnused(KillRegs);
-  setUnused(DeadRegs);
   setUsed(DefRegs);
 }
 
 void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+  used = RegsAvailable;
+  used.flip();
   if (includeReserved)
-    used = ~RegsAvailable;
+    used |= ReservedRegs;
   else
-    used = ~RegsAvailable & ~ReservedRegs;
+    used.reset(ReservedRegs);
 }
 
 unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
@@ -286,6 +285,8 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
     // Remove any candidates touched by instruction.
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = MI->getOperand(i);
+      if (MO.isRegMask())
+        Candidates.clearBitsNotInMask(MO.getRegMask());
       if (!MO.isReg() || MO.isUndef() || !MO.getReg())
         continue;
       if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
@@ -296,7 +297,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
         continue;
       }
       Candidates.reset(MO.getReg());
-      for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
+      for (const uint16_t *R = TRI->getAliasSet(MO.getReg()); *R; R++)
         Candidates.reset(*R);
     }
     // If we're not in a virtual reg's live range, this is a valid
@@ -347,9 +348,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
   // RegsAvailable, as RegsAvailable does not take aliases into account.
   // That's what getRegsAvailable() is for.
   BitVector Available = getRegsAvailable(RC);
-
-  if ((Candidates & Available).any())
-     Candidates &= Available;
+  Available &= Candidates;
+  if (Available.any())
+    Candidates = Available;
 
   // Find the register whose use is furthest away.
   MachineBasicBlock::iterator UseMI;
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index 8b02ec44273a..6020908d9112 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===//
+//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -560,12 +560,13 @@ namespace llvm {
 
     // For uses/defs recorded use/def indexes override current liveness and
     // instruction operands (Only for the interval which records the indexes).
-    if (i.isUse() || i.isDef()) {
+    // FIXME: This is all wrong, uses and defs share the same slots.
+    if (i.isEarlyClobber() || i.isRegister()) {
       UseDefs::const_iterator udItr = useDefs.find(li);
       if (udItr != useDefs.end()) {
         const SlotSet &slotSet = udItr->second;
         if (slotSet.count(i)) {
-          if (i.isUse()) {
+          if (i.isEarlyClobber()) {
             return Used;
           }
           // else
@@ -586,9 +587,9 @@ namespace llvm {
           return AliveStack;
         }
       } else {
-        if (i.isDef() && mi->definesRegister(li->reg, tri)) {
+        if (i.isRegister() && mi->definesRegister(li->reg, tri)) {
           return Defined;
-        } else if (i.isUse() && mi->readsRegister(li->reg)) {
+        } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) {
           return Used;
         } else {
           if (vrm == 0 || 
@@ -804,7 +805,7 @@ namespace llvm {
       os << indent + s(2) << "<tr height=6ex>\n";
       
       // Render the code column.
-      if (i.isLoad()) {
+      if (i.isBlock()) {
         MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
         mi = sis->getInstructionFromIndex(i);
 
@@ -823,7 +824,7 @@ namespace llvm {
           }
           os << indent + s(4) << "</td>\n";
         } else {
-          i = i.getStoreIndex(); // <- Will be incremented to the next index.
+          i = i.getDeadSlot(); // <- Will be incremented to the next index.
           continue;
         }
       }
@@ -952,10 +953,10 @@ namespace llvm {
          rItr != rEnd; ++rItr) {
       const MachineInstr *mi = &*rItr;
       if (mi->readsRegister(li->reg)) {
-        useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex());
+        useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true));
       }
       if (mi->definesRegister(li->reg)) {
-        useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex());
+        useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot());
       }
     }
   }
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1e9b5c89f172..8fd64265fda6 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -31,6 +31,8 @@ static cl::opt<bool> StressSchedOpt(
   cl::desc("Stress test instruction scheduling"));
 #endif
 
+void SchedulingPriorityQueue::anchor() { }
+
 ScheduleDAG::ScheduleDAG(MachineFunction &mf)
   : TM(mf.getTarget()),
     TII(TM.getInstrInfo()),
@@ -44,42 +46,17 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
 
 ScheduleDAG::~ScheduleDAG() {}
 
-/// getInstrDesc helper to handle SDNodes.
-const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
-  if (!Node || !Node->isMachineOpcode()) return NULL;
-  return &TII->get(Node->getMachineOpcode());
-}
-
-/// dump - dump the schedule.
-void ScheduleDAG::dumpSchedule() const {
-  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
-    if (SUnit *SU = Sequence[i])
-      SU->dump(this);
-    else
-      dbgs() << "**** NOOP ****\n";
-  }
-}
-
-
-/// Run - perform scheduling.
-///
-void ScheduleDAG::Run(MachineBasicBlock *bb,
-                      MachineBasicBlock::iterator insertPos) {
-  BB = bb;
-  InsertPos = insertPos;
-
+/// Clear the DAG state (e.g. between scheduling regions).
+void ScheduleDAG::clearDAG() {
   SUnits.clear();
-  Sequence.clear();
   EntrySU = SUnit();
   ExitSU = SUnit();
+}
 
-  Schedule();
-
-  DEBUG({
-      dbgs() << "*** Final schedule ***\n";
-      dumpSchedule();
-      dbgs() << '\n';
-    });
+/// getInstrDesc helper to handle SDNodes.
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+  if (!Node || !Node->isMachineOpcode()) return NULL;
+  return &TII->get(Node->getMachineOpcode());
 }
 
 /// addPred - This adds the specified edge as a pred of the current node if
@@ -313,13 +290,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
       case SDep::Output:      dbgs() << "out "; break;
       case SDep::Order:       dbgs() << "ch  "; break;
       }
-      dbgs() << "#";
-      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
       if (I->isAssignedRegDep())
-        dbgs() << " Reg=" << G->TRI->getName(I->getReg());
+        dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI);
       dbgs() << "\n";
     }
   }
@@ -334,8 +310,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
       case SDep::Output:      dbgs() << "out "; break;
       case SDep::Order:       dbgs() << "ch  "; break;
       }
-      dbgs() << "#";
-      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
@@ -346,13 +321,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
 }
 
 #ifndef NDEBUG
-/// VerifySchedule - Verify that all SUnits were scheduled and that
-/// their state is consistent.
+/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that
+/// their state is consistent. Return the number of scheduled nodes.
 ///
-void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) {
   bool AnyNotSched = false;
   unsigned DeadNodes = 0;
-  unsigned Noops = 0;
   for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
     if (!SUnits[i].isScheduled) {
       if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
@@ -393,12 +367,8 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
       }
     }
   }
-  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
-    if (!Sequence[i])
-      ++Noops;
   assert(!AnyNotSched);
-  assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
-         "The number of nodes scheduled doesn't match the expected number!");
+  return SUnits.size() - DeadNodes;
 }
 #endif
 
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
deleted file mode 100644
index f8b1bc76eb8b..000000000000
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements the Emit routines for the ScheduleDAG class, which creates
-// MachineInstrs according to the computed schedule.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pre-RA-sched"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-using namespace llvm;
-
-void ScheduleDAG::EmitNoop() {
-  TII->insertNoop(*BB, InsertPos);
-}
-
-void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
-                                  DenseMap<SUnit*, unsigned> &VRBaseMap) {
-  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
-       I != E; ++I) {
-    if (I->isCtrl()) continue;  // ignore chain preds
-    if (I->getSUnit()->CopyDstRC) {
-      // Copy to physical register.
-      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
-      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
-      // Find the destination physical register.
-      unsigned Reg = 0;
-      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
-             EE = SU->Succs.end(); II != EE; ++II) {
-        if (II->isCtrl()) continue;  // ignore chain preds
-        if (II->getReg()) {
-          Reg = II->getReg();
-          break;
-        }
-      }
-      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
-        .addReg(VRI->second);
-    } else {
-      // Copy from physical register.
-      assert(I->getReg() && "Unknown physical register!");
-      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
-      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
-      (void)isNew; // Silence compiler warning.
-      assert(isNew && "Node emitted out of order - early");
-      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
-        .addReg(I->getReg());
-    }
-    break;
-  }
-}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 34b8ab0b47f2..6be1ab7f5b08 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,14 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "sched-instrs"
-#include "ScheduleDAGInstrs.h"
 #include "llvm/Operator.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -33,25 +34,17 @@ using namespace llvm;
 
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineLoopInfo &mli,
-                                     const MachineDominatorTree &mdt)
+                                     const MachineDominatorTree &mdt,
+                                     bool IsPostRAFlag,
+                                     LiveIntervals *lis)
   : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
-    InstrItins(mf.getTarget().getInstrItineraryData()),
-    Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()),
-    LoopRegs(MLI, MDT), FirstDbgValue(0) {
+    InstrItins(mf.getTarget().getInstrItineraryData()), LIS(lis),
+    IsPostRA(IsPostRAFlag), UnitLatencies(false), LoopRegs(MLI, MDT),
+    FirstDbgValue(0) {
+  assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
   DbgValues.clear();
-}
-
-/// Run - perform scheduling.
-///
-void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
-                            MachineBasicBlock::iterator begin,
-                            MachineBasicBlock::iterator end,
-                            unsigned endcount) {
-  BB = bb;
-  Begin = begin;
-  InsertPosIndex = endcount;
-
-  ScheduleDAG::Run(bb, end);
+  assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
+         "Virtual registers must be removed prior to PostRA scheduling");
 }
 
 /// getUnderlyingObjectFromInt - This is the function that does the work of
@@ -133,19 +126,58 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
   return 0;
 }
 
-void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+void ScheduleDAGInstrs::startBlock(MachineBasicBlock *BB) {
   LoopRegs.Deps.clear();
   if (MachineLoop *ML = MLI.getLoopFor(BB))
-    if (BB == ML->getLoopLatch()) {
-      MachineBasicBlock *Header = ML->getHeader();
-      for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
-           E = Header->livein_end(); I != E; ++I)
-        LoopLiveInRegs.insert(*I);
+    if (BB == ML->getLoopLatch())
       LoopRegs.VisitLoop(ML);
-    }
 }
 
-/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+void ScheduleDAGInstrs::finishBlock() {
+  // Nothing to do.
+}
+
+/// Initialize the map with the number of registers.
+void Reg2SUnitsMap::setRegLimit(unsigned Limit) {
+  PhysRegSet.setUniverse(Limit);
+  SUnits.resize(Limit);
+}
+
+/// Clear the map without deallocating storage.
+void Reg2SUnitsMap::clear() {
+  for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) {
+    SUnits[*I].clear();
+  }
+  PhysRegSet.clear();
+}
+
+/// Initialize the DAG and common scheduler state for the current scheduling
+/// region. This does not actually create the DAG, only clears it. The
+/// scheduling driver may call BuildSchedGraph multiple times per scheduling
+/// region.
+void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb,
+                                    MachineBasicBlock::iterator begin,
+                                    MachineBasicBlock::iterator end,
+                                    unsigned endcount) {
+  BB = bb;
+  RegionBegin = begin;
+  RegionEnd = end;
+  EndIndex = endcount;
+  MISUnitMap.clear();
+
+  // Check to see if the scheduler cares about latencies.
+  UnitLatencies = forceUnitLatencies();
+
+  ScheduleDAG::clearDAG();
+}
+
+/// Close the current scheduling region. Don't clear any state in case the
+/// driver wants to refer to the previous scheduling region.
+void ScheduleDAGInstrs::exitRegion() {
+  // Nothing to do.
+}
+
+/// addSchedBarrierDeps - Add dependencies from instructions in the current
 /// list of instructions being scheduled to scheduling barrier by adding
 /// the exit SU to the register defs and use list. This is because we want to
 /// make sure instructions which define registers that are either used by
@@ -153,11 +185,11 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
 /// especially important when the definition latency of the return value(s)
 /// are too high to be hidden by the branch or when the liveout registers
 /// used by instructions in the fallthrough block.
-void ScheduleDAGInstrs::AddSchedBarrierDeps() {
-  MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+void ScheduleDAGInstrs::addSchedBarrierDeps() {
+  MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
   ExitSU.setInstr(ExitMI);
   bool AllDepKnown = ExitMI &&
-    (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+    (ExitMI->isCall() || ExitMI->isBarrier());
   if (ExitMI && AllDepKnown) {
     // If it's a call or a barrier, add dependencies on the defs and uses of
     // instruction.
@@ -167,29 +199,313 @@ void ScheduleDAGInstrs::AddSchedBarrierDeps() {
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
 
-      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
-      Uses[Reg].push_back(&ExitSU);
+      if (TRI->isPhysicalRegister(Reg))
+        Uses[Reg].push_back(&ExitSU);
+      else {
+        assert(!IsPostRA && "Virtual register encountered after regalloc.");
+        addVRegUseDeps(&ExitSU, i);
+      }
     }
   } else {
     // For others, e.g. fallthrough, conditional branch, assume the exit
     // uses all the registers that are livein to the successor blocks.
-    SmallSet<unsigned, 8> Seen;
+    assert(Uses.empty() && "Uses in set before adding deps?");
     for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
            SE = BB->succ_end(); SI != SE; ++SI)
       for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
              E = (*SI)->livein_end(); I != E; ++I) {
         unsigned Reg = *I;
-        if (Seen.insert(Reg))
+        if (!Uses.contains(Reg))
           Uses[Reg].push_back(&ExitSU);
       }
   }
 }
 
-void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
-  // We'll be allocating one SUnit for each instruction, plus one for
-  // the region exit node.
+/// MO is an operand of SU's instruction that defines a physical register. Add
+/// data dependencies from SU to any uses of the physical register.
+void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
+                                           const MachineOperand &MO) {
+  assert(MO.isDef() && "expect physreg def");
+
+  // Ask the target if address-backscheduling is desirable, and if so how much.
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+  unsigned DataLatency = SU->Latency;
+
+  for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+    if (!Uses.contains(*Alias))
+      continue;
+    std::vector<SUnit*> &UseList = Uses[*Alias];
+    for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+      SUnit *UseSU = UseList[i];
+      if (UseSU == SU)
+        continue;
+      unsigned LDataLatency = DataLatency;
+      // Optionally add in a special extra latency for nodes that
+      // feed addresses.
+      // TODO: Perhaps we should get rid of
+      // SpecialAddressLatency and just move this into
+      // adjustSchedDependency for the targets that care about it.
+      if (SpecialAddressLatency != 0 && !UnitLatencies &&
+          UseSU != &ExitSU) {
+        MachineInstr *UseMI = UseSU->getInstr();
+        const MCInstrDesc &UseMCID = UseMI->getDesc();
+        int RegUseIndex = UseMI->findRegisterUseOperandIdx(*Alias);
+        assert(RegUseIndex >= 0 && "UseMI doesn't use register!");
+        if (RegUseIndex >= 0 &&
+            (UseMI->mayLoad() || UseMI->mayStore()) &&
+            (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
+            UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+          LDataLatency += SpecialAddressLatency;
+      }
+      // Adjust the dependence latency using operand def/use
+      // information (if any), and then allow the target to
+      // perform its own adjustments.
+      const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
+      if (!UnitLatencies) {
+        computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+        ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+      }
+      UseSU->addPred(dep);
+    }
+  }
+}
+
+/// addPhysRegDeps - Add register dependencies (data, anti, and output) from
+/// this SUnit to following instructions in the same scheduling region that
+/// depend the physical register referenced at OperIdx.
+void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
+  const MachineInstr *MI = SU->getInstr();
+  const MachineOperand &MO = MI->getOperand(OperIdx);
+
+  // Optionally add output and anti dependencies. For anti
+  // dependencies we use a latency of 0 because for a multi-issue
+  // target we want to allow the defining instruction to issue
+  // in the same cycle as the using instruction.
+  // TODO: Using a latency of 1 here for output dependencies assumes
+  //       there's no cost for reusing registers.
+  SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+  for (const uint16_t *Alias = TRI->getOverlaps(MO.getReg()); *Alias; ++Alias) {
+    if (!Defs.contains(*Alias))
+      continue;
+    std::vector<SUnit *> &DefList = Defs[*Alias];
+    for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+      SUnit *DefSU = DefList[i];
+      if (DefSU == &ExitSU)
+        continue;
+      if (DefSU != SU &&
+          (Kind != SDep::Output || !MO.isDead() ||
+           !DefSU->getInstr()->registerDefIsDead(*Alias))) {
+        if (Kind == SDep::Anti)
+          DefSU->addPred(SDep(SU, Kind, 0, /*Reg=*/*Alias));
+        else {
+          unsigned AOLat = TII->getOutputLatency(InstrItins, MI, OperIdx,
+                                                 DefSU->getInstr());
+          DefSU->addPred(SDep(SU, Kind, AOLat, /*Reg=*/*Alias));
+        }
+      }
+    }
+  }
+
+  if (!MO.isDef()) {
+    // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+    // retrieve the existing SUnits list for this register's uses.
+    // Push this SUnit on the use list.
+    Uses[MO.getReg()].push_back(SU);
+  }
+  else {
+    addPhysRegDataDeps(SU, MO);
+
+    // Either insert a new Reg2SUnits entry with an empty SUnits list, or
+    // retrieve the existing SUnits list for this register's defs.
+    std::vector<SUnit *> &DefList = Defs[MO.getReg()];
+
+    // If a def is going to wrap back around to the top of the loop,
+    // backschedule it.
+    if (!UnitLatencies && DefList.empty()) {
+      LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(MO.getReg());
+      if (I != LoopRegs.Deps.end()) {
+        const MachineOperand *UseMO = I->second.first;
+        unsigned Count = I->second.second;
+        const MachineInstr *UseMI = UseMO->getParent();
+        unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+        const MCInstrDesc &UseMCID = UseMI->getDesc();
+        const TargetSubtargetInfo &ST =
+          TM.getSubtarget<TargetSubtargetInfo>();
+        unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+        // TODO: If we knew the total depth of the region here, we could
+        // handle the case where the whole loop is inside the region but
+        // is large enough that the isScheduleHigh trick isn't needed.
+        if (UseMOIdx < UseMCID.getNumOperands()) {
+          // Currently, we only support scheduling regions consisting of
+          // single basic blocks. Check to see if the instruction is in
+          // the same region by checking to see if it has the same parent.
+          if (UseMI->getParent() != MI->getParent()) {
+            unsigned Latency = SU->Latency;
+            if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+              Latency += SpecialAddressLatency;
+            // This is a wild guess as to the portion of the latency which
+            // will be overlapped by work done outside the current
+            // scheduling region.
+            Latency -= std::min(Latency, Count);
+            // Add the artificial edge.
+            ExitSU.addPred(SDep(SU, SDep::Order, Latency,
+                                /*Reg=*/0, /*isNormalMemory=*/false,
+                                /*isMustAlias=*/false,
+                                /*isArtificial=*/true));
+          } else if (SpecialAddressLatency > 0 &&
+                     UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+            // The entire loop body is within the current scheduling region
+            // and the latency of this operation is assumed to be greater
+            // than the latency of the loop.
+            // TODO: Recursively mark data-edge predecessors as
+            //       isScheduleHigh too.
+            SU->isScheduleHigh = true;
+          }
+        }
+        LoopRegs.Deps.erase(I);
+      }
+    }
+
+    // clear this register's use list
+    if (Uses.contains(MO.getReg()))
+      Uses[MO.getReg()].clear();
+
+    if (!MO.isDead())
+      DefList.clear();
+
+    // Calls will not be reordered because of chain dependencies (see
+    // below). Since call operands are dead, calls may continue to be added
+    // to the DefList making dependence checking quadratic in the size of
+    // the block. Instead, we leave only one call at the back of the
+    // DefList.
+    if (SU->isCall) {
+      while (!DefList.empty() && DefList.back()->isCall)
+        DefList.pop_back();
+    }
+    // Defs are pushed in the order they are visited and never reordered.
+    DefList.push_back(SU);
+  }
+}
+
+/// addVRegDefDeps - Add register output and data dependencies from this SUnit
+/// to instructions that occur later in the same scheduling region if they read
+/// from or write to the virtual register defined at OperIdx.
+///
+/// TODO: Hoist loop induction variable increments. This has to be
+/// reevaluated. Generally, IV scheduling should be done before coalescing.
+void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
+  const MachineInstr *MI = SU->getInstr();
+  unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+  // SSA defs do not have output/anti dependencies.
+  // The current operand is a def, so we have at least one.
+  if (llvm::next(MRI.def_begin(Reg)) == MRI.def_end())
+    return;
+
+  // Add output dependence to the next nearest def of this vreg.
+  //
+  // Unless this definition is dead, the output dependence should be
+  // transitively redundant with antidependencies from this definition's
+  // uses. We're conservative for now until we have a way to guarantee the uses
+  // are not eliminated sometime during scheduling. The output dependence edge
+  // is also useful if output latency exceeds def-use latency.
+  VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+  if (DefI == VRegDefs.end())
+    VRegDefs.insert(VReg2SUnit(Reg, SU));
+  else {
+    SUnit *DefSU = DefI->SU;
+    if (DefSU != SU && DefSU != &ExitSU) {
+      unsigned OutLatency = TII->getOutputLatency(InstrItins, MI, OperIdx,
+                                                  DefSU->getInstr());
+      DefSU->addPred(SDep(SU, SDep::Output, OutLatency, Reg));
+    }
+    DefI->SU = SU;
+  }
+}
+
+/// addVRegUseDeps - Add a register data dependency if the instruction that
+/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a
+/// register antidependency from this SUnit to instructions that occur later in
+/// the same scheduling region if they write the virtual register.
+///
+/// TODO: Handle ExitSU "uses" properly.
+void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
+  MachineInstr *MI = SU->getInstr();
+  unsigned Reg = MI->getOperand(OperIdx).getReg();
+
+  // Lookup this operand's reaching definition.
+  assert(LIS && "vreg dependencies requires LiveIntervals");
+  SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot();
+  LiveInterval *LI = &LIS->getInterval(Reg);
+  VNInfo *VNI = LI->getVNInfoBefore(UseIdx);
+  // VNI will be valid because MachineOperand::readsReg() is checked by caller.
+  MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
+  // Phis and other noninstructions (after coalescing) have a NULL Def.
+  if (Def) {
+    SUnit *DefSU = getSUnit(Def);
+    if (DefSU) {
+      // The reaching Def lives within this scheduling region.
+      // Create a data dependence.
+      //
+      // TODO: Handle "special" address latencies cleanly.
+      const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
+      if (!UnitLatencies) {
+        // Adjust the dependence latency using operand def/use information, then
+        // allow the target to perform its own adjustments.
+        computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
+        const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
+        ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
+      }
+      SU->addPred(dep);
+    }
+  }
+
+  // Add antidependence to the following def of the vreg it uses.
+  VReg2SUnitMap::iterator DefI = findVRegDef(Reg);
+  if (DefI != VRegDefs.end() && DefI->SU != SU)
+    DefI->SU->addPred(SDep(SU, SDep::Anti, 0, Reg));
+}
+
+/// Create an SUnit for each real instruction, numbered in top-down toplological
+/// order. The instruction order A < B, implies that no edge exists from B to A.
+///
+/// Map each real instruction to its SUnit.
+///
+/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
+/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
+/// instead of pointers.
+///
+/// MachineScheduler relies on initSUnits numbering the nodes by their order in
+/// the original instruction list.
+void ScheduleDAGInstrs::initSUnits() {
+  // We'll be allocating one SUnit for each real instruction in the region,
+  // which is contained within a basic block.
   SUnits.reserve(BB->size());
 
+  for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) {
+    MachineInstr *MI = I;
+    if (MI->isDebugValue())
+      continue;
+
+    SUnit *SU = newSUnit(MI);
+    MISUnitMap[MI] = SU;
+
+    SU->isCall = MI->isCall();
+    SU->isCommutable = MI->isCommutable();
+
+    // Assign the Latency field of SU using target-provided information.
+    if (UnitLatencies)
+      SU->Latency = 1;
+    else
+      computeLatency(SU);
+  }
+}
+
+void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA) {
+  // Create an SUnit for each real instruction.
+  initSUnits();
+
   // We build scheduling units by walking a block's instruction list from bottom
   // to top.
 
@@ -203,29 +519,29 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
   std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
 
-  // Check to see if the scheduler cares about latencies.
-  bool UnitLatencies = ForceUnitLatencies();
-
-  // Ask the target if address-backscheduling is desirable, and if so how much.
-  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
-  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
-
   // Remove any stale debug info; sometimes BuildSchedGraph is called again
   // without emitting the info from the previous call.
   DbgValues.clear();
   FirstDbgValue = NULL;
 
+  assert(Defs.empty() && Uses.empty() &&
+         "Only BuildGraph should update Defs/Uses");
+  Defs.setRegLimit(TRI->getNumRegs());
+  Uses.setRegLimit(TRI->getNumRegs());
+
+  assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
+  // FIXME: Allow SparseSet to reserve space for the creation of virtual
+  // registers during scheduling. Don't artificially inflate the Universe
+  // because we want to assert that vregs are not created during DAG building.
+  VRegDefs.setUniverse(MRI.getNumVirtRegs());
+
   // Model data dependencies between instructions being scheduled and the
   // ExitSU.
-  AddSchedBarrierDeps();
-
-  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
-    assert(Defs[i].empty() && "Only BuildGraph should push/pop Defs");
-  }
+  addSchedBarrierDeps();
 
   // Walk the list of instructions, from bottom moving up.
   MachineInstr *PrevMI = NULL;
-  for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+  for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
     if (MI && PrevMI) {
@@ -238,19 +554,11 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       continue;
     }
 
-    const MCInstrDesc &MCID = MI->getDesc();
-    assert(!MCID.isTerminator() && !MI->isLabel() &&
+    assert(!MI->isTerminator() && !MI->isLabel() &&
            "Cannot schedule terminators or labels!");
-    // Create the SUnit for this MI.
-    SUnit *SU = NewSUnit(MI);
-    SU->isCall = MCID.isCall();
-    SU->isCommutable = MCID.isCommutable();
 
-    // Assign the Latency field of SU using target-provided information.
-    if (UnitLatencies)
-      SU->Latency = 1;
-    else
-      ComputeLatency(SU);
+    SUnit *SU = MISUnitMap[MI];
+    assert(SU && "No SUnit mapped to this MI");
 
     // Add register-based dependencies (data, anti, and output).
     for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
@@ -259,152 +567,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       unsigned Reg = MO.getReg();
       if (Reg == 0) continue;
 
-      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
-
-      std::vector<SUnit *> &UseList = Uses[Reg];
-      // Defs are push in the order they are visited and never reordered.
-      std::vector<SUnit *> &DefList = Defs[Reg];
-      // Optionally add output and anti dependencies. For anti
-      // dependencies we use a latency of 0 because for a multi-issue
-      // target we want to allow the defining instruction to issue
-      // in the same cycle as the using instruction.
-      // TODO: Using a latency of 1 here for output dependencies assumes
-      //       there's no cost for reusing registers.
-      SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
-      unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
-      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
-        SUnit *DefSU = DefList[i];
-        if (DefSU == &ExitSU)
-          continue;
-        if (DefSU != SU &&
-            (Kind != SDep::Output || !MO.isDead() ||
-             !DefSU->getInstr()->registerDefIsDead(Reg)))
-          DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
-      }
-      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-        std::vector<SUnit *> &MemDefList = Defs[*Alias];
-        for (unsigned i = 0, e = MemDefList.size(); i != e; ++i) {
-          SUnit *DefSU = MemDefList[i];
-          if (DefSU == &ExitSU)
-            continue;
-          if (DefSU != SU &&
-              (Kind != SDep::Output || !MO.isDead() ||
-               !DefSU->getInstr()->registerDefIsDead(*Alias)))
-            DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
-        }
-      }
-
-      if (MO.isDef()) {
-        // Add any data dependencies.
-        unsigned DataLatency = SU->Latency;
-        for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
-          SUnit *UseSU = UseList[i];
-          if (UseSU == SU)
-            continue;
-          unsigned LDataLatency = DataLatency;
-          // Optionally add in a special extra latency for nodes that
-          // feed addresses.
-          // TODO: Do this for register aliases too.
-          // TODO: Perhaps we should get rid of
-          // SpecialAddressLatency and just move this into
-          // adjustSchedDependency for the targets that care about it.
-          if (SpecialAddressLatency != 0 && !UnitLatencies &&
-              UseSU != &ExitSU) {
-            MachineInstr *UseMI = UseSU->getInstr();
-            const MCInstrDesc &UseMCID = UseMI->getDesc();
-            int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
-            assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
-            if (RegUseIndex >= 0 &&
-                (UseMCID.mayLoad() || UseMCID.mayStore()) &&
-                (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
-                UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
-              LDataLatency += SpecialAddressLatency;
-          }
-          // Adjust the dependence latency using operand def/use
-          // information (if any), and then allow the target to
-          // perform its own adjustments.
-          const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
-          if (!UnitLatencies) {
-            ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
-            ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
-          }
-          UseSU->addPred(dep);
-        }
-        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
-          std::vector<SUnit *> &UseList = Uses[*Alias];
-          for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
-            SUnit *UseSU = UseList[i];
-            if (UseSU == SU)
-              continue;
-            const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
-            if (!UnitLatencies) {
-              ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
-              ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
-            }
-            UseSU->addPred(dep);
-          }
-        }
-
-        // If a def is going to wrap back around to the top of the loop,
-        // backschedule it.
-        if (!UnitLatencies && DefList.empty()) {
-          LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
-          if (I != LoopRegs.Deps.end()) {
-            const MachineOperand *UseMO = I->second.first;
-            unsigned Count = I->second.second;
-            const MachineInstr *UseMI = UseMO->getParent();
-            unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
-            const MCInstrDesc &UseMCID = UseMI->getDesc();
-            // TODO: If we knew the total depth of the region here, we could
-            // handle the case where the whole loop is inside the region but
-            // is large enough that the isScheduleHigh trick isn't needed.
-            if (UseMOIdx < UseMCID.getNumOperands()) {
-              // Currently, we only support scheduling regions consisting of
-              // single basic blocks. Check to see if the instruction is in
-              // the same region by checking to see if it has the same parent.
-              if (UseMI->getParent() != MI->getParent()) {
-                unsigned Latency = SU->Latency;
-                if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
-                  Latency += SpecialAddressLatency;
-                // This is a wild guess as to the portion of the latency which
-                // will be overlapped by work done outside the current
-                // scheduling region.
-                Latency -= std::min(Latency, Count);
-                // Add the artificial edge.
-                ExitSU.addPred(SDep(SU, SDep::Order, Latency,
-                                    /*Reg=*/0, /*isNormalMemory=*/false,
-                                    /*isMustAlias=*/false,
-                                    /*isArtificial=*/true));
-              } else if (SpecialAddressLatency > 0 &&
-                         UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
-                // The entire loop body is within the current scheduling region
-                // and the latency of this operation is assumed to be greater
-                // than the latency of the loop.
-                // TODO: Recursively mark data-edge predecessors as
-                //       isScheduleHigh too.
-                SU->isScheduleHigh = true;
-              }
-            }
-            LoopRegs.Deps.erase(I);
-          }
-        }
-
-        UseList.clear();
-        if (!MO.isDead())
-          DefList.clear();
-
-        // Calls will not be reordered because of chain dependencies (see
-        // below). Since call operands are dead, calls may continue to be added
-        // to the DefList making dependence checking quadratic in the size of
-        // the block. Instead, we leave only one call at the back of the
-        // DefList.
-        if (SU->isCall) {
-          while (!DefList.empty() && DefList.back()->isCall)
-            DefList.pop_back();
-        }
-        DefList.push_back(SU);
-      } else {
-        UseList.push_back(SU);
+      if (TRI->isPhysicalRegister(Reg))
+        addPhysRegDeps(SU, j);
+      else {
+        assert(!IsPostRA && "Virtual register encountered!");
+        if (MO.isDef())
+          addVRegDefDeps(SU, j);
+        else if (MO.readsReg()) // ignore undef operands
+          addVRegUseDeps(SU, j);
       }
     }
 
@@ -419,9 +589,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
     // produce more precise dependence information.
 #define STORE_LOAD_LATENCY 1
     unsigned TrueMemOrderLatency = 0;
-    if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
+    if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
         (MI->hasVolatileMemoryRef() &&
-         (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+         (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) {
       // Be conservative with these and add dependencies on all memory
       // references, even those that are known to not alias.
       for (std::map<const Value *, SUnit *>::iterator I =
@@ -460,7 +630,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       PendingLoads.clear();
       AliasMemDefs.clear();
       AliasMemUses.clear();
-    } else if (MCID.mayStore()) {
+    } else if (MI->mayStore()) {
       bool MayAlias = true;
       TrueMemOrderLatency = STORE_LOAD_LATENCY;
       if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
@@ -516,7 +686,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
                             /*Reg=*/0, /*isNormalMemory=*/false,
                             /*isMustAlias=*/false,
                             /*isArtificial=*/true));
-    } else if (MCID.mayLoad()) {
+    } else if (MI->mayLoad()) {
       bool MayAlias = true;
       TrueMemOrderLatency = 0;
       if (MI->isInvariantLoad(AA)) {
@@ -558,32 +728,27 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   if (PrevMI)
     FirstDbgValue = PrevMI;
 
-  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
-    Defs[i].clear();
-    Uses[i].clear();
-  }
+  Defs.clear();
+  Uses.clear();
+  VRegDefs.clear();
   PendingLoads.clear();
 }
 
-void ScheduleDAGInstrs::FinishBlock() {
-  // Nothing to do.
-}
-
-void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
   // Compute the latency for the node.
   if (!InstrItins || InstrItins->isEmpty()) {
     SU->Latency = 1;
 
     // Simplistic target-independent heuristic: assume that loads take
     // extra time.
-    if (SU->getInstr()->getDesc().mayLoad())
+    if (SU->getInstr()->mayLoad())
       SU->Latency += 2;
   } else {
     SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
   }
 }
 
-void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
+void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
                                               SDep& dep) const {
   if (!InstrItins || InstrItins->isEmpty())
     return;
@@ -608,7 +773,9 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
       //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
       // What we want is to compute latency between def of %D6/%D7 and use of
       // %Q3 instead.
-      DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+      unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+      if (DefMI->getOperand(Op2).isReg())
+        DefIdx = Op2;
     }
     MachineInstr *UseMI = Use->getInstr();
     // For all uses of the register, calculate the maxmimum latency
@@ -656,43 +823,8 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
   return oss.str();
 }
 
-// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
-  // For MachineInstr-based scheduling, we're rescheduling the instructions in
-  // the block, so start by removing them from the block.
-  while (Begin != InsertPos) {
-    MachineBasicBlock::iterator I = Begin;
-    ++Begin;
-    BB->remove(I);
-  }
-
-  // If first instruction was a DBG_VALUE then put it back.
-  if (FirstDbgValue)
-    BB->insert(InsertPos, FirstDbgValue);
-
-  // Then re-insert them according to the given schedule.
-  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
-    if (SUnit *SU = Sequence[i])
-      BB->insert(InsertPos, SU->getInstr());
-    else
-      // Null SUnit* is a noop.
-      EmitNoop();
-  }
-
-  // Update the Begin iterator, as the first instruction in the block
-  // may have been scheduled later.
-  if (!Sequence.empty())
-    Begin = Sequence[0]->getInstr();
-
-  // Reinsert any remaining debug_values.
-  for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
-         DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
-    std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
-    MachineInstr *DbgValue = P.first;
-    MachineInstr *OrigPrivMI = P.second;
-    BB->insertAfter(OrigPrivMI, DbgValue);
-  }
-  DbgValues.clear();
-  FirstDbgValue = NULL;
-  return BB;
+/// Return the basic block label. It is not necessarilly unique because a block
+/// contains multiple scheduling regions. But it is fine for visualization.
+std::string ScheduleDAGInstrs::getDAGName() const {
+  return "dag." + BB->getFullName();
 }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
deleted file mode 100644
index 666bdf548c71..000000000000
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ /dev/null
@@ -1,212 +0,0 @@
-//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ScheduleDAGInstrs class, which implements
-// scheduling for a MachineInstr-based dependency graph.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SCHEDULEDAGINSTRS_H
-#define SCHEDULEDAGINSTRS_H
-
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include <map>
-
-namespace llvm {
-  class MachineLoopInfo;
-  class MachineDominatorTree;
-
-  /// LoopDependencies - This class analyzes loop-oriented register
-  /// dependencies, which are used to guide scheduling decisions.
-  /// For example, loop induction variable increments should be
-  /// scheduled as soon as possible after the variable's last use.
-  ///
-  class LLVM_LIBRARY_VISIBILITY LoopDependencies {
-    const MachineLoopInfo &MLI;
-    const MachineDominatorTree &MDT;
-
-  public:
-    typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
-      LoopDeps;
-    LoopDeps Deps;
-
-    LoopDependencies(const MachineLoopInfo &mli,
-                     const MachineDominatorTree &mdt) :
-      MLI(mli), MDT(mdt) {}
-
-    /// VisitLoop - Clear out any previous state and analyze the given loop.
-    ///
-    void VisitLoop(const MachineLoop *Loop) {
-      assert(Deps.empty() && "stale loop dependencies");
-
-      MachineBasicBlock *Header = Loop->getHeader();
-      SmallSet<unsigned, 8> LoopLiveIns;
-      for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
-           LE = Header->livein_end(); LI != LE; ++LI)
-        LoopLiveIns.insert(*LI);
-
-      const MachineDomTreeNode *Node = MDT.getNode(Header);
-      const MachineBasicBlock *MBB = Node->getBlock();
-      assert(Loop->contains(MBB) &&
-             "Loop does not contain header!");
-      VisitRegion(Node, MBB, Loop, LoopLiveIns);
-    }
-
-  private:
-    void VisitRegion(const MachineDomTreeNode *Node,
-                     const MachineBasicBlock *MBB,
-                     const MachineLoop *Loop,
-                     const SmallSet<unsigned, 8> &LoopLiveIns) {
-      unsigned Count = 0;
-      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
-           I != E; ++I) {
-        const MachineInstr *MI = I;
-        if (MI->isDebugValue())
-          continue;
-        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-          const MachineOperand &MO = MI->getOperand(i);
-          if (!MO.isReg() || !MO.isUse())
-            continue;
-          unsigned MOReg = MO.getReg();
-          if (LoopLiveIns.count(MOReg))
-            Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
-        }
-        ++Count; // Not every iteration due to dbg_value above.
-      }
-
-      const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
-      for (std::vector<MachineDomTreeNode*>::const_iterator I =
-           Children.begin(), E = Children.end(); I != E; ++I) {
-        const MachineDomTreeNode *ChildNode = *I;
-        MachineBasicBlock *ChildBlock = ChildNode->getBlock();
-        if (Loop->contains(ChildBlock))
-          VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
-      }
-    }
-  };
-
-  /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
-  /// MachineInstrs.
-  class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
-    const MachineLoopInfo &MLI;
-    const MachineDominatorTree &MDT;
-    const MachineFrameInfo *MFI;
-    const InstrItineraryData *InstrItins;
-
-    /// Defs, Uses - Remember where defs and uses of each physical register
-    /// are as we iterate upward through the instructions. This is allocated
-    /// here instead of inside BuildSchedGraph to avoid the need for it to be
-    /// initialized and destructed for each block.
-    std::vector<std::vector<SUnit *> > Defs;
-    std::vector<std::vector<SUnit *> > Uses;
-
-    /// PendingLoads - Remember where unknown loads are after the most recent
-    /// unknown store, as we iterate. As with Defs and Uses, this is here
-    /// to minimize construction/destruction.
-    std::vector<SUnit *> PendingLoads;
-
-    /// LoopRegs - Track which registers are used for loop-carried dependencies.
-    ///
-    LoopDependencies LoopRegs;
-
-    /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
-    /// back-edge-aware scheduling.
-    ///
-    SmallSet<unsigned, 8> LoopLiveInRegs;
-
-  protected:
-
-    /// DbgValues - Remember instruction that preceeds DBG_VALUE.
-    typedef std::vector<std::pair<MachineInstr *, MachineInstr *> >
-      DbgValueVector;
-    DbgValueVector DbgValues;
-    MachineInstr *FirstDbgValue;
-
-  public:
-    MachineBasicBlock::iterator Begin;    // The beginning of the range to
-                                          // be scheduled. The range extends
-                                          // to InsertPos.
-    unsigned InsertPosIndex;              // The index in BB of InsertPos.
-
-    explicit ScheduleDAGInstrs(MachineFunction &mf,
-                               const MachineLoopInfo &mli,
-                               const MachineDominatorTree &mdt);
-
-    virtual ~ScheduleDAGInstrs() {}
-
-    /// NewSUnit - Creates a new SUnit and return a ptr to it.
-    ///
-    SUnit *NewSUnit(MachineInstr *MI) {
-#ifndef NDEBUG
-      const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
-#endif
-      SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
-      assert((Addr == 0 || Addr == &SUnits[0]) &&
-             "SUnits std::vector reallocated on the fly!");
-      SUnits.back().OrigNode = &SUnits.back();
-      return &SUnits.back();
-    }
-
-    /// Run - perform scheduling.
-    ///
-    void Run(MachineBasicBlock *bb,
-             MachineBasicBlock::iterator begin,
-             MachineBasicBlock::iterator end,
-             unsigned endindex);
-
-    /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
-    /// input.
-    virtual void BuildSchedGraph(AliasAnalysis *AA);
-
-    /// AddSchedBarrierDeps - Add dependencies from instructions in the current
-    /// list of instructions being scheduled to scheduling barrier. We want to
-    /// make sure instructions which define registers that are either used by
-    /// the terminator or are live-out are properly scheduled. This is
-    /// especially important when the definition latency of the return value(s)
-    /// are too high to be hidden by the branch or when the liveout registers
-    /// used by instructions in the fallthrough block.
-    void AddSchedBarrierDeps();
-
-    /// ComputeLatency - Compute node latency.
-    ///
-    virtual void ComputeLatency(SUnit *SU);
-
-    /// ComputeOperandLatency - Override dependence edge latency using
-    /// operand use/def information
-    ///
-    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
-                                       SDep& dep) const;
-
-    virtual MachineBasicBlock *EmitSchedule();
-
-    /// StartBlock - Prepare to perform scheduling in the given block.
-    ///
-    virtual void StartBlock(MachineBasicBlock *BB);
-
-    /// Schedule - Order nodes according to selected style, filling
-    /// in the Sequence member.
-    ///
-    virtual void Schedule() = 0;
-
-    /// FinishBlock - Clean up after scheduling in the given block.
-    ///
-    virtual void FinishBlock();
-
-    virtual void dumpNode(const SUnit *SU) const;
-
-    virtual std::string getGraphNodeLabel(const SUnit *SU) const;
-  };
-}
-
-#endif
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 4b55a2284f85..38feee95a58e 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -25,7 +25,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
 #include <fstream>
 using namespace llvm;
 
@@ -42,12 +41,12 @@ namespace llvm {
     static bool renderGraphFromBottomUp() {
       return true;
     }
-    
+
     static bool hasNodeAddressLabel(const SUnit *Node,
                                     const ScheduleDAG *Graph) {
       return true;
     }
-    
+
     /// If you want to override the dot attributes printed for a particular
     /// edge, override this method.
     static std::string getEdgeAttributes(const SUnit *Node,
@@ -59,7 +58,7 @@ namespace llvm {
         return "color=blue,style=dashed";
       return "";
     }
-    
+
 
     std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
     static std::string getNodeAttributes(const SUnit *N,
@@ -82,18 +81,17 @@ std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
 /// rendered using 'dot'.
 ///
-void ScheduleDAG::viewGraph() {
-// This code is only for debugging!
+void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) {
+  // This code is only for debugging!
 #ifndef NDEBUG
-  if (BB->getBasicBlock())
-    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
-              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + 
-              ":" + BB->getBasicBlock()->getNameStr());
-  else
-    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
-              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr());
+  ViewGraph(this, Name, false, Title);
 #else
   errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
          << "systems with Graphviz or gv!\n";
 #endif  // NDEBUG
 }
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAG::viewGraph() {
+  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index b80c01ed58b9..3d22035974da 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -140,8 +140,6 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
 
       unsigned freeUnits = IS->getUnits();
       switch (IS->getReservationKind()) {
-      default:
-       assert(0 && "Invalid FU reservation");
       case InstrStage::Required:
         // Required FUs conflict with both reserved and required ones
         freeUnits &= ~ReservedScoreboard[StageCycle];
@@ -194,8 +192,6 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
 
       unsigned freeUnits = IS->getUnits();
       switch (IS->getReservationKind()) {
-      default:
-       assert(0 && "Invalid FU reservation");
       case InstrStage::Required:
         // Required FUs conflict with both reserved and required ones
         freeUnits &= ~ReservedScoreboard[cycle + i];
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 2282f0e6eb83..a6bdc3be32e0 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -10,24 +10,16 @@ add_llvm_library(LLVMSelectionDAG
   LegalizeTypesGeneric.cpp
   LegalizeVectorOps.cpp
   LegalizeVectorTypes.cpp
+  ResourcePriorityQueue.cpp
   ScheduleDAGFast.cpp
-  ScheduleDAGList.cpp
   ScheduleDAGRRList.cpp
   ScheduleDAGSDNodes.cpp
   SelectionDAG.cpp
   SelectionDAGBuilder.cpp
+  SelectionDAGDumper.cpp
   SelectionDAGISel.cpp
   SelectionDAGPrinter.cpp
+  ScheduleDAGVLIW.cpp
   TargetLowering.cpp
   TargetSelectionDAGInfo.cpp
   )
-
-add_llvm_library_dependencies(LLVMSelectionDAG
-  LLVMAnalysis
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7b878688df63..d1b998f8d840 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22,7 +22,6 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
@@ -64,7 +63,24 @@ namespace {
     bool LegalTypes;
 
     // Worklist of all of the nodes that need to be simplified.
-    std::vector<SDNode*> WorkList;
+    //
+    // This has the semantics that when adding to the worklist,
+    // the item added must be next to be processed. It should
+    // also only appear once. The naive approach to this takes
+    // linear time.
+    //
+    // To reduce the insert/remove time to logarithmic, we use
+    // a set and a vector to maintain our worklist.
+    //
+    // The set contains the items on the worklist, but does not
+    // maintain the order they should be visited.
+    //
+    // The vector maintains the order nodes should be visited, but may
+    // contain duplicate or removed nodes. When choosing a node to
+    // visit, we pop off the order stack until we find an item that is
+    // also in the contents set. All operations are O(log N).
+    SmallPtrSet<SDNode*, 64> WorkListContents;
+    SmallVector<SDNode*, 64> WorkListOrder;
 
     // AA - Used for DAG load/store alias analysis.
     AliasAnalysis &AA;
@@ -84,18 +100,17 @@ namespace {
     SDValue visit(SDNode *N);
 
   public:
-    /// AddToWorkList - Add to the work list making sure it's instance is at the
-    /// the back (next to be processed.)
+    /// AddToWorkList - Add to the work list making sure its instance is at the
+    /// back (next to be processed.)
     void AddToWorkList(SDNode *N) {
-      removeFromWorkList(N);
-      WorkList.push_back(N);
+      WorkListContents.insert(N);
+      WorkListOrder.push_back(N);
     }
 
     /// removeFromWorkList - remove all instances of N from the worklist.
     ///
     void removeFromWorkList(SDNode *N) {
-      WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
-                     WorkList.end());
+      WorkListContents.erase(N);
     }
 
     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
@@ -159,7 +174,9 @@ namespace {
     SDValue visitADD(SDNode *N);
     SDValue visitSUB(SDNode *N);
     SDValue visitADDC(SDNode *N);
+    SDValue visitSUBC(SDNode *N);
     SDValue visitADDE(SDNode *N);
+    SDValue visitSUBE(SDNode *N);
     SDValue visitMUL(SDNode *N);
     SDValue visitSDIV(SDNode *N);
     SDValue visitUDIV(SDNode *N);
@@ -181,7 +198,9 @@ namespace {
     SDValue visitSRA(SDNode *N);
     SDValue visitSRL(SDNode *N);
     SDValue visitCTLZ(SDNode *N);
+    SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
     SDValue visitCTTZ(SDNode *N);
+    SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
     SDValue visitCTPOP(SDNode *N);
     SDValue visitSELECT(SDNode *N);
     SDValue visitSELECT_CC(SDNode *N);
@@ -279,7 +298,7 @@ namespace {
 
   public:
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
-      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
         OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
 
     /// Run - runs the dag combiner on all nodes in the work list
@@ -362,6 +381,8 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 /// specified expression for the same cost as the expression itself, or 2 if we
 /// can compute the negated form more cheaply than the expression itself.
 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+                               const TargetLowering &TLI,
+                               const TargetOptions *Options,
                                unsigned Depth = 0) {
   // No compile time optimizations on this type.
   if (Op.getValueType() == MVT::ppcf128)
@@ -384,34 +405,44 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
     return LegalOperations ? 0 : 1;
   case ISD::FADD:
     // FIXME: determine better conditions for this xform.
-    if (!UnsafeFPMath) return 0;
+    if (!Options->UnsafeFPMath) return 0;
+
+    // After operation legalization, it might not be legal to create new FSUBs.
+    if (LegalOperations &&
+        !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
+      return 0;
 
     // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
-    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+                                    Options, Depth + 1))
       return V;
     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
-    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+                              Depth + 1);
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
-    if (!UnsafeFPMath) return 0;
+    if (!Options->UnsafeFPMath) return 0;
 
     // fold (fneg (fsub A, B)) -> (fsub B, A)
     return 1;
 
   case ISD::FMUL:
   case ISD::FDIV:
-    if (HonorSignDependentRoundingFPMath()) return 0;
+    if (Options->HonorSignDependentRoundingFPMath()) return 0;
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
-    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
+                                    Options, Depth + 1))
       return V;
 
-    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
+                              Depth + 1);
 
   case ISD::FP_EXTEND:
   case ISD::FP_ROUND:
   case ISD::FSIN:
-    return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+    return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
+                              Depth + 1);
   }
 }
 
@@ -435,10 +466,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
   }
   case ISD::FADD:
     // FIXME: determine better conditions for this xform.
-    assert(UnsafeFPMath);
+    assert(DAG.getTarget().Options.UnsafeFPMath);
 
     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
-    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+                           DAG.getTargetLoweringInfo(),
+                           &DAG.getTarget().Options, Depth+1))
       return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
                                               LegalOperations, Depth+1),
@@ -450,7 +483,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
                        Op.getOperand(0));
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
-    assert(UnsafeFPMath);
+    assert(DAG.getTarget().Options.UnsafeFPMath);
 
     // fold (fneg (fsub 0, B)) -> B
     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
@@ -463,10 +496,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
   case ISD::FMUL:
   case ISD::FDIV:
-    assert(!HonorSignDependentRoundingFPMath());
+    assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath());
 
     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
-    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
+                           DAG.getTargetLoweringInfo(),
+                           &DAG.getTarget().Options, Depth+1))
       return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
                                               LegalOperations, Depth+1),
@@ -944,14 +979,13 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
 void DAGCombiner::Run(CombineLevel AtLevel) {
   // set the instance variables, so that the various visit routines may use it.
   Level = AtLevel;
-  LegalOperations = Level >= NoIllegalOperations;
-  LegalTypes = Level >= NoIllegalTypes;
+  LegalOperations = Level >= AfterLegalizeVectorOps;
+  LegalTypes = Level >= AfterLegalizeTypes;
 
   // Add all the dag nodes to the worklist.
-  WorkList.reserve(DAG.allnodes_size());
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
        E = DAG.allnodes_end(); I != E; ++I)
-    WorkList.push_back(I);
+    AddToWorkList(I);
 
   // Create a dummy node (which is not added to allnodes), that adds a reference
   // to the root node, preventing it from being deleted, and tracking any
@@ -962,11 +996,17 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
   // done.  Set it to null to avoid confusion.
   DAG.setRoot(SDValue());
 
-  // while the worklist isn't empty, inspect the node on the end of it and
+  // while the worklist isn't empty, find a node and
   // try and combine it.
-  while (!WorkList.empty()) {
-    SDNode *N = WorkList.back();
-    WorkList.pop_back();
+  while (!WorkListContents.empty()) {
+    SDNode *N;
+    // The WorkListOrder holds the SDNodes in order, but it may contain duplicates.
+    // In order to avoid a linear scan, we use a set (O(log N)) to hold what the
+    // worklist *should* contain, and check the node we want to visit is should
+    // actually be visited.
+    do {
+      N = WorkListOrder.pop_back_val();
+    } while (!WorkListContents.erase(N));
 
     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
     // N is deleted from the DAG, since they too may now be dead or may have a
@@ -1050,7 +1090,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::ADD:                return visitADD(N);
   case ISD::SUB:                return visitSUB(N);
   case ISD::ADDC:               return visitADDC(N);
+  case ISD::SUBC:               return visitSUBC(N);
   case ISD::ADDE:               return visitADDE(N);
+  case ISD::SUBE:               return visitSUBE(N);
   case ISD::MUL:                return visitMUL(N);
   case ISD::SDIV:               return visitSDIV(N);
   case ISD::UDIV:               return visitUDIV(N);
@@ -1071,7 +1113,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::SRA:                return visitSRA(N);
   case ISD::SRL:                return visitSRL(N);
   case ISD::CTLZ:               return visitCTLZ(N);
+  case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
   case ISD::CTTZ:               return visitCTTZ(N);
+  case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
   case ISD::CTPOP:              return visitCTPOP(N);
   case ISD::SELECT:             return visitSELECT(N);
   case ISD::SELECT_CC:          return visitSELECT_CC(N);
@@ -1408,16 +1452,14 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (VT.isInteger() && !VT.isVector()) {
     APInt LHSZero, LHSOne;
     APInt RHSZero, RHSOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
-    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+    DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
 
     if (LHSZero.getBoolValue()) {
-      DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+      DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
 
       // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
       // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
-      if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
-          (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+      if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
         return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
     }
   }
@@ -1486,8 +1528,8 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   EVT VT = N0.getValueType();
 
   // If the flag result is dead, turn this into an ADD.
-  if (N->hasNUsesOfValue(0, 1))
-    return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+  if (!N->hasAnyUseOfValue(1))
+    return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1),
                      DAG.getNode(ISD::CARRY_FALSE,
                                  N->getDebugLoc(), MVT::Glue));
 
@@ -1503,16 +1545,14 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
   APInt LHSZero, LHSOne;
   APInt RHSZero, RHSOne;
-  APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
-  DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+  DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
 
   if (LHSZero.getBoolValue()) {
-    DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+    DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
 
     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
-    if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
-        (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+    if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
       return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
                        DAG.getNode(ISD::CARRY_FALSE,
                                    N->getDebugLoc(), MVT::Glue));
@@ -1535,7 +1575,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
 
   // fold (adde x, y, false) -> (addc x, y)
   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
-    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1);
 
   return SDValue();
 }
@@ -1645,6 +1685,51 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitSUBC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // If the flag result is dead, turn this into an SUB.
+  if (!N->hasAnyUseOfValue(1))
+    return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1),
+                     DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                 MVT::Glue));
+
+  // fold (subc x, x) -> 0 + no borrow
+  if (N0 == N1)
+    return CombineTo(N, DAG.getConstant(0, VT),
+                     DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                 MVT::Glue));
+
+  // fold (subc x, 0) -> x + no borrow
+  if (N1C && N1C->isNullValue())
+    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                        MVT::Glue));
+
+  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
+  if (N0C && N0C->isAllOnesValue())
+    return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0),
+                     DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(),
+                                 MVT::Glue));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSUBE(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue CarryIn = N->getOperand(2);
+
+  // fold (sube x, y, false) -> (subc x, y)
+  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+    return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1);
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitMUL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -1756,7 +1841,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   if (N0C && N1C && !N1C->isNullValue())
     return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
   // fold (sdiv X, 1) -> X
-  if (N1C && N1C->getSExtValue() == 1LL)
+  if (N1C && N1C->getAPIntValue() == 1LL)
     return N0;
   // fold (sdiv X, -1) -> 0-X
   if (N1C && N1C->isAllOnesValue())
@@ -1770,17 +1855,15 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
                          N0, N1);
   }
   // fold (sdiv X, pow2) -> simple ops after legalize
-  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
-      (isPowerOf2_64(N1C->getSExtValue()) ||
-       isPowerOf2_64(-N1C->getSExtValue()))) {
+  if (N1C && !N1C->isNullValue() &&
+      (N1C->getAPIntValue().isPowerOf2() ||
+       (-N1C->getAPIntValue()).isPowerOf2())) {
     // If dividing by powers of two is cheap, then don't perform the following
     // fold.
     if (TLI.isPow2DivCheap())
       return SDValue();
 
-    int64_t pow2 = N1C->getSExtValue();
-    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
-    unsigned lg2 = Log2_64(abs2);
+    unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
 
     // Splat the sign bit into the register
     SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
@@ -1800,7 +1883,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
 
     // If we're dividing by a positive value, we're done.  Otherwise, we must
     // negate the result.
-    if (pow2 > 0)
+    if (N1C->getAPIntValue().isNonNegative())
       return SRA;
 
     AddToWorkList(SRA.getNode());
@@ -1810,8 +1893,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
 
   // if integer divide is expensive and we satisfy the requirements, emit an
   // alternate sequence.
-  if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
-      !TLI.isIntDivCheap()) {
+  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
     SDValue Op = BuildSDIV(N);
     if (Op.getNode()) return Op;
   }
@@ -2250,6 +2332,67 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
                        ORNode, N0.getOperand(1));
   }
 
+  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
+  // Only perform this optimization after type legalization and before
+  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
+  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
+  // we don't want to undo this promotion.
+  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
+  // on scalars.
+  if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR)
+      && Level == AfterLegalizeVectorOps) {
+    SDValue In0 = N0.getOperand(0);
+    SDValue In1 = N1.getOperand(0);
+    EVT In0Ty = In0.getValueType();
+    EVT In1Ty = In1.getValueType();
+    // If both incoming values are integers, and the original types are the same.
+    if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
+      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1);
+      SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op);
+      AddToWorkList(Op.getNode());
+      return BC;
+    }
+  }
+
+  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
+  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
+  // If both shuffles use the same mask, and both shuffle within a single
+  // vector, then it is worthwhile to move the swizzle after the operation.
+  // The type-legalizer generates this pattern when loading illegal
+  // vector types from memory. In many cases this allows additional shuffle
+  // optimizations.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+      N0.getOperand(1).getOpcode() == ISD::UNDEF &&
+      N1.getOperand(1).getOpcode() == ISD::UNDEF) {
+    ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
+    ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
+
+    assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() &&
+           "Inputs to shuffles are not the same type");
+
+    unsigned NumElts = VT.getVectorNumElements();
+
+    // Check that both shuffles use the same mask. The masks are known to be of
+    // the same length because the result vector type is the same.
+    bool SameMask = true;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx0 = SVN0->getMaskElt(i);
+      int Idx1 = SVN1->getMaskElt(i);
+      if (Idx0 != Idx1) {
+        SameMask = false;
+        break;
+      }
+    }
+
+    if (SameMask) {
+      SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
+                               N0.getOperand(0), N1.getOperand(0));
+      AddToWorkList(Op.getNode());
+      return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op,
+                                  DAG.getUNDEF(VT), &SVN0->getMask()[0]);
+    }
+  }
+
   return SDValue();
 }
 
@@ -2312,6 +2455,88 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
+  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 
+  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
+  // already be zero by virtue of the width of the base type of the load.
+  //
+  // the 'X' node here can either be nothing or an extract_vector_elt to catch
+  // more cases.
+  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+       N0.getOperand(0).getOpcode() == ISD::LOAD) ||
+      N0.getOpcode() == ISD::LOAD) {
+    LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
+                                         N0 : N0.getOperand(0) );
+
+    // Get the constant (if applicable) the zero'th operand is being ANDed with.
+    // This can be a pure constant or a vector splat, in which case we treat the
+    // vector as a scalar and use the splat value.
+    APInt Constant = APInt::getNullValue(1);
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+      Constant = C->getAPIntValue();
+    } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
+      APInt SplatValue, SplatUndef;
+      unsigned SplatBitSize;
+      bool HasAnyUndefs;
+      bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
+                                             SplatBitSize, HasAnyUndefs);
+      if (IsSplat) {
+        // Undef bits can contribute to a possible optimisation if set, so
+        // set them.
+        SplatValue |= SplatUndef;
+
+        // The splat value may be something like "0x00FFFFFF", which means 0 for
+        // the first vector value and FF for the rest, repeating. We need a mask
+        // that will apply equally to all members of the vector, so AND all the
+        // lanes of the constant together.
+        EVT VT = Vector->getValueType(0);
+        unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+        Constant = APInt::getAllOnesValue(BitWidth);
+        for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i)
+          Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+      }
+    }
+
+    // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
+    // actually legal and isn't going to get expanded, else this is a false
+    // optimisation.
+    bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
+                                                    Load->getMemoryVT());
+
+    // Resize the constant to the same size as the original memory access before
+    // extension. If it is still the AllOnesValue then this AND is completely
+    // unneeded.
+    Constant =
+      Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+
+    bool B;
+    switch (Load->getExtensionType()) {
+    default: B = false; break;
+    case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
+    case ISD::ZEXTLOAD:
+    case ISD::NON_EXTLOAD: B = true; break;
+    }
+
+    if (B && Constant.isAllOnesValue()) {
+      // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
+      // preserve semantics once we get rid of the AND.
+      SDValue NewLoad(Load, 0);
+      if (Load->getExtensionType() == ISD::EXTLOAD) {
+        NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
+                              Load->getValueType(0), Load->getDebugLoc(),
+                              Load->getChain(), Load->getBasePtr(),
+                              Load->getOffset(), Load->getMemoryVT(),
+                              Load->getMemOperand());
+        // Replace uses of the EXTLOAD with the new ZEXTLOAD.
+        CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
+      }
+
+      // Fold the AND away, taking care not to fold to the old load node if we
+      // replaced it.
+      CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
+
+      return SDValue(N, 0); // Return N so it doesn't get rechecked!
+    }
+  }
   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
@@ -3323,7 +3548,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
 
   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
   //                               (and (srl x, (sub c1, c2), MASK)
-  if (N1C && N0.getOpcode() == ISD::SRL &&
+  // Only fold this if the inner shift has no other uses -- if it does, folding
+  // this will increase the total number of instructions.
+  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
     if (c1 < VT.getSizeInBits()) {
@@ -3603,8 +3830,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (N1C && N0.getOpcode() == ISD::CTLZ &&
       N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
     APInt KnownZero, KnownOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
-    DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+    DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
 
     // If any of the input bits are KnownOne, then the input couldn't be all
     // zeros, thus the result of the srl will always be zero.
@@ -3612,7 +3838,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
     // If all of the bits input the to ctlz node are known to be zero, then
     // the result of the ctlz is "32" and the result of the shift is one.
-    APInt UnknownBits = ~KnownZero & Mask;
+    APInt UnknownBits = ~KnownZero;
     if (UnknownBits == 0) return DAG.getConstant(1, VT);
 
     // Otherwise, check to see if there is exactly one bit input to the ctlz.
@@ -3713,6 +3939,16 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ctlz_zero_undef c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -3723,6 +3959,16 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (cttz_zero_undef c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -4108,12 +4354,17 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // Only do this before legalize for now.
     if (VT.isVector() && !LegalOperations) {
       EVT N0VT = N0.getOperand(0).getValueType();
-        // We know that the # elements of the results is the same as the
-        // # elements of the compare (and the # elements of the compare result
-        // for that matter).  Check to see that they are the same size.  If so,
-        // we know that the element size of the sext'd result matches the
-        // element size of the compare operands.
-      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+      // On some architectures (such as SSE/NEON/etc) the SETCC result type is
+      // of the same size as the compared operands. Only optimize sext(setcc())
+      // if this is the case.
+      EVT SVT = TLI.getSetCCResultType(N0VT);
+
+      // We know that the # elements of the results is the same as the
+      // # elements of the compare (and the # elements of the compare result
+      // for that matter).  Check to see that they are the same size.  If so,
+      // we know that the element size of the sext'd result matches the
+      // element size of the compare operands.
+      if (VT.getSizeInBits() == SVT.getSizeInBits())
         return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
                              N0.getOperand(1),
                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
@@ -4127,11 +4378,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
         EVT MatchingVectorType =
           EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
                            N0VT.getVectorNumElements());
-        SDValue VsetCC =
-          DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
-                        N0.getOperand(1),
-                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
-        return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+
+        if (SVT == MatchingVectorType) {
+          SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType,
+                                 N0.getOperand(0), N0.getOperand(1),
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
+          return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+        }
       }
     }
 
@@ -4162,6 +4415,44 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   return SDValue();
 }
 
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero in KnownZero.
+// This function computes KnownZero to avoid a duplicated call to
+// ComputeMaskedBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+                         APInt &KnownZero) {
+  APInt KnownOne;
+  if (N->getOpcode() == ISD::TRUNCATE) {
+    Op = N->getOperand(0);
+    DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+    return true;
+  }
+
+  if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
+      cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
+    return false;
+
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  assert(Op0.getValueType() == Op1.getValueType());
+
+  ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0);
+  ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1);
+  if (COp0 && COp0->isNullValue())
+    Op = Op1;
+  else if (COp1 && COp1->isNullValue())
+    Op = Op0;
+  else
+    return false;
+
+  DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+
+  if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
+    return false;
+
+  return true;
+}
+
 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -4175,6 +4466,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
                        N0.getOperand(0));
 
+  // fold (zext (truncate x)) -> (zext x) or
+  //      (zext (truncate x)) -> (truncate x)
+  // This is valid when the truncated bits of x are already zero.
+  // FIXME: We should extend this to work for vectors too.
+  SDValue Op;
+  APInt KnownZero;
+  if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
+    APInt TruncatedBits =
+      (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
+      APInt(Op.getValueSizeInBits(), 0) :
+      APInt::getBitsSet(Op.getValueSizeInBits(),
+                        N0.getValueSizeInBits(),
+                        std::min(Op.getValueSizeInBits(),
+                                 VT.getSizeInBits()));
+    if (TruncatedBits == (KnownZero & TruncatedBits)) {
+      if (VT.bitsGT(Op.getValueType()))
+        return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op);
+      if (VT.bitsLT(Op.getValueType()))
+        return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+
+      return Op;
+    }
+  }
+
   // fold (zext (truncate (load x))) -> (zext (smaller load x))
   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
   if (N0.getOpcode() == ISD::TRUNCATE) {
@@ -4567,6 +4882,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
   switch (V.getOpcode()) {
   default: break;
+  case ISD::Constant: {
+    const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+    assert(CV != 0 && "Const value should be ConstSDNode.");
+    const APInt &CVal = CV->getAPIntValue();
+    APInt NewVal = CVal & Mask;
+    if (NewVal != CVal) {
+      return DAG.getConstant(NewVal, V.getValueType());
+    }
+    break;
+  }
   case ISD::OR:
   case ISD::XOR:
     // If the LHS or RHS don't contribute bits to the or, drop them.
@@ -4705,7 +5030,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   if (ExtType == ISD::NON_EXTLOAD)
     Load =  DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
                         LN0->getPointerInfo().getWithOffset(PtrOff),
-                        LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+                        LN0->isVolatile(), LN0->isNonTemporal(),
+                        LN0->isInvariant(), NewAlign);
   else
     Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
                           LN0->getPointerInfo().getWithOffset(PtrOff),
@@ -4844,6 +5170,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
+  bool isLE = TLI.isLittleEndian();
 
   // noop truncate
   if (N0.getValueType() == N->getValueType(0))
@@ -4871,6 +5198,44 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
       return N0.getOperand(0);
   }
 
+  // Fold extract-and-trunc into a narrow extract. For example:
+  //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
+  //   i32 y = TRUNCATE(i64 x)
+  //        -- becomes --
+  //   v16i8 b = BITCAST (v2i64 val)
+  //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
+  //
+  // Note: We only run this optimization after type legalization (which often
+  // creates this pattern) and before operation legalization after which
+  // we need to be more careful about the vector instructions that we generate.
+  if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+      LegalTypes && !LegalOperations && N0->hasOneUse()) {
+
+    EVT VecTy = N0.getOperand(0).getValueType();
+    EVT ExTy = N0.getValueType();
+    EVT TrTy = N->getValueType(0);
+
+    unsigned NumElem = VecTy.getVectorNumElements();
+    unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
+
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
+    assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
+
+    SDValue EltNo = N0->getOperand(1);
+    if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
+      int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+
+      int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
+
+      SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                              NVT, N0.getOperand(0));
+
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+                         N->getDebugLoc(), TrTy, V,
+                         DAG.getConstant(Index, MVT::i32));
+    }
+  }
+
   // See if we can simplify the input to this truncate through knowledge that
   // only the low bits are being used.
   // For example "trunc (or (shl x, 8), y)" // -> trunc y
@@ -4934,7 +5299,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
       return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
                          LD1->getBasePtr(), LD1->getPointerInfo(),
-                         false, false, Align);
+                         false, false, false, Align);
   }
 
   return SDValue();
@@ -5004,7 +5369,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
       SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
                                  LN0->getBasePtr(), LN0->getPointerInfo(),
                                  LN0->isVolatile(), LN0->isNonTemporal(),
-                                 OrigAlign);
+                                 LN0->isInvariant(), OrigAlign);
       AddToWorkList(N);
       CombineTo(N0.getNode(),
                 DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
@@ -5017,7 +5382,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
   // This often reduces constant pool loads.
-  if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+  if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) ||
+       (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) &&
       N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
     SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
                                   N0.getOperand(0));
@@ -5247,20 +5613,24 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
   if (N0CFP && !N1CFP)
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
   // fold (fadd A, 0) -> A
-  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+      N1CFP->getValueAPF().isZero())
     return N0;
   // fold (fadd A, (fneg B)) -> (fsub A, B)
-  if (isNegatibleForFree(N1, LegalOperations) == 2)
+  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+      isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
   // fold (fadd (fneg A), B) -> (fsub B, A)
-  if (isNegatibleForFree(N0, LegalOperations) == 2)
+  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
+      isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations));
 
   // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
-  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
-      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+  if (DAG.getTarget().Options.UnsafeFPMath && N1CFP &&
+      N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
+      isa<ConstantFPSDNode>(N0.getOperand(1)))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
                                    N0.getOperand(1), N1));
@@ -5285,20 +5655,39 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   if (N0CFP && N1CFP && VT != MVT::ppcf128)
     return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
   // fold (fsub A, 0) -> A
-  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1CFP && N1CFP->getValueAPF().isZero())
     return N0;
   // fold (fsub 0, B) -> -B
-  if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
-    if (isNegatibleForFree(N1, LegalOperations))
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N0CFP && N0CFP->getValueAPF().isZero()) {
+    if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
       return GetNegatedExpression(N1, DAG, LegalOperations);
     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
       return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
   }
   // fold (fsub A, (fneg B)) -> (fadd A, B)
-  if (isNegatibleForFree(N1, LegalOperations))
+  if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options))
     return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations));
 
+  // If 'unsafe math' is enabled, fold
+  //    (fsub x, (fadd x, y)) -> (fneg y) &
+  //    (fsub x, (fadd y, x)) -> (fneg y)
+  if (DAG.getTarget().Options.UnsafeFPMath) {
+    if (N1.getOpcode() == ISD::FADD) {
+      SDValue N10 = N1->getOperand(0);
+      SDValue N11 = N1->getOperand(1);
+
+      if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI,
+                                          &DAG.getTarget().Options))
+        return GetNegatedExpression(N11, DAG, LegalOperations);
+      else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI,
+                                               &DAG.getTarget().Options))
+        return GetNegatedExpression(N10, DAG, LegalOperations);
+    }
+  }
+
   return SDValue();
 }
 
@@ -5308,6 +5697,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -5322,10 +5712,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   if (N0CFP && !N1CFP)
     return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
   // fold (fmul A, 0) -> 0
-  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1CFP && N1CFP->getValueAPF().isZero())
     return N1;
   // fold (fmul A, 0) -> 0, vector edition.
-  if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      ISD::isBuildVectorAllZeros(N1.getNode()))
     return N1;
   // fold (fmul X, 2.0) -> (fadd X, X)
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
@@ -5336,8 +5728,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
 
   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+                                       &DAG.getTarget().Options)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 
+                                         &DAG.getTarget().Options)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
       if (LHSNeg == 2 || RHSNeg == 2)
@@ -5348,7 +5742,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   }
 
   // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
-  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+  if (DAG.getTarget().Options.UnsafeFPMath &&
+      N1CFP && N0.getOpcode() == ISD::FMUL &&
       N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
     return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
                        DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
@@ -5363,6 +5758,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // fold vector ops
   if (VT.isVector()) {
@@ -5374,10 +5770,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   if (N0CFP && N1CFP && VT != MVT::ppcf128)
     return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
 
+  // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
+  if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) {
+    // Compute the reciprocal 1.0 / c2.
+    APFloat N1APF = N1CFP->getValueAPF();
+    APFloat Recip(N1APF.getSemantics(), 1); // 1.0
+    APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
+    // Only do the transform if the reciprocal is a legal fp immediate that
+    // isn't too nasty (eg NaN, denormal, ...).
+    if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
+        (!LegalOperations ||
+         // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
+         // backend)... we should handle this gracefully after Legalize.
+         // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
+         TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+         TLI.isFPImmLegal(Recip, VT)))
+      return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0,
+                         DAG.getConstantFP(Recip, VT));
+  }
 
   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
-  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
-    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI,
+                                       &DAG.getTarget().Options)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI,
+                                         &DAG.getTarget().Options)) {
       // Both can be negated for free, check to see if at least one is cheaper
       // negated.
       if (LHSNeg == 2 || RHSNeg == 2)
@@ -5463,7 +5879,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   // fold (sint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128 &&
       // ...but only if the target supports immediate floating-point values
-      (Level == llvm::Unrestricted ||
+      (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
 
@@ -5488,7 +5904,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   // fold (uint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128 &&
       // ...but only if the target supports immediate floating-point values
-      (Level == llvm::Unrestricted ||
+      (!LegalOperations ||
        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
 
@@ -5630,12 +6046,13 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
-  if (isNegatibleForFree(N0, LegalOperations))
+  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
+                         &DAG.getTarget().Options))
     return GetNegatedExpression(N0, DAG, LegalOperations);
 
   // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
   // constant pool values.
-  if (N0.getOpcode() == ISD::BITCAST &&
+  if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST &&
       !VT.isVector() &&
       N0.getNode()->hasOneUse() &&
       N0.getOperand(0).getValueType().isInteger()) {
@@ -5671,7 +6088,8 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
 
   // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
   // constant pool values.
-  if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+  if (!TLI.isFAbsFree(VT) && 
+      N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
       N0.getOperand(0).getValueType().isInteger() &&
       !N0.getOperand(0).getValueType().isVector()) {
     SDValue Int = N0.getOperand(0);
@@ -5860,6 +6278,47 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
   return SDValue();
 }
 
+/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that
+/// uses N as its base pointer and that N may be folded in the load / store
+/// addressing mode.
+static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
+                                    SelectionDAG &DAG,
+                                    const TargetLowering &TLI) {
+  EVT VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
+    if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
+      return false;
+    VT = Use->getValueType(0);
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
+    if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
+      return false;
+    VT = ST->getValue().getValueType();
+  } else
+    return false;
+
+  TargetLowering::AddrMode AM;
+  if (N->getOpcode() == ISD::ADD) {
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (Offset)
+      // [reg +/- imm]
+      AM.BaseOffs = Offset->getSExtValue();
+    else
+      // [reg +/- reg]
+      AM.Scale = 1;
+  } else if (N->getOpcode() == ISD::SUB) {
+    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    if (Offset)
+      // [reg +/- imm]
+      AM.BaseOffs = -Offset->getSExtValue();
+    else
+      // [reg +/- reg]
+      AM.Scale = 1;
+  } else
+    return false;
+
+  return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext()));
+}
+
 /// CombineToPreIndexedLoadStore - Try turning a load / store into a
 /// pre-indexed load / store when the base pointer is an add or subtract
 /// and it has other uses besides the load / store. After the
@@ -5867,7 +6326,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
 /// the add / subtract in and all of its other uses are redirected to the
 /// new load / store.
 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
-  if (!LegalOperations)
+  if (Level < AfterLegalizeDAG)
     return false;
 
   bool isLoad = true;
@@ -5946,10 +6405,9 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
     if (N->hasPredecessorHelper(Use, Visited, Worklist))
       return false;
 
-    if (!((Use->getOpcode() == ISD::LOAD &&
-           cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
-          (Use->getOpcode() == ISD::STORE &&
-           cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+    // If Ptr may be folded in addressing mode of other use, then it's
+    // not profitable to do this transformation.
+    if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
       RealUse = true;
   }
 
@@ -5999,7 +6457,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
 /// load / store effectively and all of its uses are redirected to the
 /// new load / store.
 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
-  if (!LegalOperations)
+  if (Level < AfterLegalizeDAG)
     return false;
 
   bool isLoad = true;
@@ -6046,7 +6504,8 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
         continue;
 
       // Try turning it into a post-indexed load / store except when
-      // 1) All uses are load / store ops that use it as base ptr.
+      // 1) All uses are load / store ops that use it as base ptr (and
+      //    it may be folded as addressing mmode).
       // 2) Op must be independent of N, i.e. Op is neither a predecessor
       //    nor a successor of N. Otherwise, if Op is folded that would
       //    create a cycle.
@@ -6069,10 +6528,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
           for (SDNode::use_iterator III = Use->use_begin(),
                  EEE = Use->use_end(); III != EEE; ++III) {
             SDNode *UseUse = *III;
-            if (!((UseUse->getOpcode() == ISD::LOAD &&
-                   cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
-                  (UseUse->getOpcode() == ISD::STORE &&
-                   cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+            if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 
               RealUse = true;
           }
 
@@ -6139,7 +6595,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   if (!LD->isVolatile()) {
     if (N->getValueType(1) == MVT::Other) {
       // Unindexed loads.
-      if (N->hasNUsesOfValue(0, 0)) {
+      if (!N->hasAnyUseOfValue(0)) {
         // It's not safe to use the two value CombineTo variant here. e.g.
         // v1, chain2 = load chain1, loc
         // v2, chain3 = load chain2, loc
@@ -6164,7 +6620,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
     } else {
       // Indexed loads.
       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
-      if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+      if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
         DEBUG(dbgs() << "\nReplacing.7 ";
               N->dump(&DAG);
@@ -6222,7 +6678,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
         ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
                                BetterChain, Ptr, LD->getPointerInfo(),
                                LD->isVolatile(), LD->isNonTemporal(),
-                               LD->getAlignment());
+                               LD->isInvariant(), LD->getAlignment());
       } else {
         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
                                   LD->getValueType(0),
@@ -6486,7 +6942,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
                                   LD->getChain(), NewPtr,
                                   LD->getPointerInfo().getWithOffset(PtrOff),
                                   LD->isVolatile(), LD->isNonTemporal(),
-                                  NewAlign);
+                                  LD->isInvariant(), NewAlign);
       SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
                                    DAG.getConstant(NewImm, NewVT));
       SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
@@ -6546,7 +7002,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
     SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
                                 LD->getChain(), LD->getBasePtr(),
                                 LD->getPointerInfo(),
-                                false, false, LDAlign);
+                                false, false, false, LDAlign);
 
     SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
                                  NewLD, ST->getBasePtr(),
@@ -6823,13 +7279,14 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // (vextract (scalar_to_vector val, 0) -> val
   SDValue InVec = N->getOperand(0);
+  EVT VT = InVec.getValueType();
+  EVT NVT = N->getValueType(0);
 
   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
     // Check if the result type doesn't match the inserted element type. A
     // SCALAR_TO_VECTOR may truncate the inserted element and the
     // EXTRACT_VECTOR_ELT may widen the extracted vector.
     SDValue InOp = InVec.getOperand(0);
-    EVT NVT = N->getValueType(0);
     if (InOp.getValueType() != NVT) {
       assert(InOp.getValueType().isInteger() && NVT.isInteger());
       return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
@@ -6837,6 +7294,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     return InOp;
   }
 
+  SDValue EltNo = N->getOperand(1);
+  bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+
+  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
+  // We only perform this optimization before the op legalization phase because
+  // we may introduce new vector instructions which are not backed by TD patterns.
+  // For example on AVX, extracting elements from a wide vector without using
+  // extract_subvector.
+  if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
+      && ConstEltNo && !LegalOperations) {
+    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    int NumElem = VT.getVectorNumElements();
+    ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
+    // Find the new index to extract from.
+    int OrigElt = SVOp->getMaskElt(Elt);
+
+    // Extracting an undef index is undef.
+    if (OrigElt == -1)
+      return DAG.getUNDEF(NVT);
+
+    // Select the right vector half to extract from.
+    if (OrigElt < NumElem) {
+      InVec = InVec->getOperand(0);
+    } else {
+      InVec = InVec->getOperand(1);
+      OrigElt -= NumElem;
+    }
+
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT,
+                       InVec, DAG.getConstant(OrigElt, MVT::i32));
+  }
+
   // Perform only after legalization to ensure build_vector / vector_shuffle
   // optimizations have already been done.
   if (!LegalOperations) return SDValue();
@@ -6844,17 +7333,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
-  SDValue EltNo = N->getOperand(1);
 
-  if (isa<ConstantSDNode>(EltNo)) {
+  if (ConstEltNo) {
     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
     bool NewLoad = false;
     bool BCNumEltsChanged = false;
-    EVT VT = InVec.getValueType();
     EVT ExtVT = VT.getVectorElementType();
     EVT LVT = ExtVT;
 
+    // If the result of load has to be truncated, then it's not necessarily
+    // profitable.
+    if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
+      return SDValue();
+
     if (InVec.getOpcode() == ISD::BITCAST) {
+      // Don't duplicate a load with other uses.
+      if (!InVec.hasOneUse())
+        return SDValue();
+
       EVT BCVT = InVec.getOperand(0).getValueType();
       if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
         return SDValue();
@@ -6872,12 +7368,20 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
                InVec.getOperand(0).getValueType() == ExtVT &&
                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+      // Don't duplicate a load with other uses.
+      if (!InVec.hasOneUse())
+        return SDValue();
+
       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
       // =>
       // (load $addr+1*size)
 
+      // Don't duplicate a load with other uses.
+      if (!InVec.hasOneUse())
+        return SDValue();
+
       // If the bit convert changed the number of elements, it is unsafe
       // to examine the mask.
       if (BCNumEltsChanged)
@@ -6888,14 +7392,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
 
-      if (InVec.getOpcode() == ISD::BITCAST)
+      if (InVec.getOpcode() == ISD::BITCAST) {
+        // Don't duplicate a load with other uses.
+        if (!InVec.hasOneUse())
+          return SDValue();
+
         InVec = InVec.getOperand(0);
+      }
       if (ISD::isNormalLoad(InVec.getNode())) {
         LN0 = cast<LoadSDNode>(InVec);
         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
       }
     }
 
+    // Make sure we found a non-volatile load and the extractelement is
+    // the only use.
     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
       return SDValue();
 
@@ -6929,9 +7440,45 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
                            DAG.getConstant(PtrOff, PtrType));
     }
 
-    return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
-                       LN0->getPointerInfo().getWithOffset(PtrOff),
-                       LN0->isVolatile(), LN0->isNonTemporal(), Align);
+    // The replacement we need to do here is a little tricky: we need to
+    // replace an extractelement of a load with a load.
+    // Use ReplaceAllUsesOfValuesWith to do the replacement.
+    // Note that this replacement assumes that the extractvalue is the only
+    // use of the load; that's okay because we don't want to perform this
+    // transformation in other cases anyway.
+    SDValue Load;
+    SDValue Chain;
+    if (NVT.bitsGT(LVT)) {
+      // If the result type of vextract is wider than the load, then issue an
+      // extending load instead.
+      ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT)
+        ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+      Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(),
+                            NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff),
+                            LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align);
+      Chain = Load.getValue(1);
+    } else {
+      Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+                         LN0->getPointerInfo().getWithOffset(PtrOff),
+                         LN0->isVolatile(), LN0->isNonTemporal(), 
+                         LN0->isInvariant(), Align);
+      Chain = Load.getValue(1);
+      if (NVT.bitsLT(LVT))
+        Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load);
+      else
+        Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load);
+    }
+    WorkListRemover DeadNodes(*this);
+    SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) };
+    SDValue To[] = { Load, Chain };
+    DAG.ReplaceAllUsesOfValuesWith(From, To, 2, &DeadNodes);
+    // Since we're explcitly calling ReplaceAllUses, add the new node to the
+    // worklist explicitly as well.
+    AddToWorkList(Load.getNode());
+    AddUsersToWorkList(Load.getNode()); // Add users too
+    // Make sure to revisit this node to clean it up; it will usually be dead.
+    AddToWorkList(N);
+    return SDValue(N, 0);
   }
 
   return SDValue();
@@ -6939,11 +7486,122 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
 
 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   unsigned NumInScalars = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
   EVT VT = N->getValueType(0);
+  // Check to see if this is a BUILD_VECTOR of a bunch of values
+  // which come from any_extend or zero_extend nodes. If so, we can create
+  // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
+  // optimizations. We do not handle sign-extend because we can't fill the sign
+  // using shuffles.
+  EVT SourceType = MVT::Other;
+  bool AllAnyExt = true;
+  bool AllUndef = true;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    SDValue In = N->getOperand(i);
+    // Ignore undef inputs.
+    if (In.getOpcode() == ISD::UNDEF) continue;
+    AllUndef = false;
+
+    bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
+    bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
+
+    // Abort if the element is not an extension.
+    if (!ZeroExt && !AnyExt) {
+      SourceType = MVT::Other;
+      break;
+    }
+
+    // The input is a ZeroExt or AnyExt. Check the original type.
+    EVT InTy = In.getOperand(0).getValueType();
+
+    // Check that all of the widened source types are the same.
+    if (SourceType == MVT::Other)
+      // First time.
+      SourceType = InTy;
+    else if (InTy != SourceType) {
+      // Multiple income types. Abort.
+      SourceType = MVT::Other;
+      break;
+    }
+
+    // Check if all of the extends are ANY_EXTENDs.
+    AllAnyExt &= AnyExt;
+  }
+
+  if (AllUndef)
+    return DAG.getUNDEF(VT);
+
+  // In order to have valid types, all of the inputs must be extended from the
+  // same source type and all of the inputs must be any or zero extend.
+  // Scalar sizes must be a power of two.
+  EVT OutScalarTy = N->getValueType(0).getScalarType();
+  bool ValidTypes = SourceType != MVT::Other &&
+                 isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
+                 isPowerOf2_32(SourceType.getSizeInBits());
+
+  // We perform this optimization post type-legalization because
+  // the type-legalizer often scalarizes integer-promoted vectors.
+  // Performing this optimization before may create bit-casts which
+  // will be type-legalized to complex code sequences.
+  // We perform this optimization only before the operation legalizer because we
+  // may introduce illegal operations.
+  // Create a new simpler BUILD_VECTOR sequence which other optimizations can
+  // turn into a single shuffle instruction.
+  if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) &&
+      ValidTypes) {
+    bool isLE = TLI.isLittleEndian();
+    unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
+    assert(ElemRatio > 1 && "Invalid element size ratio");
+    SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
+                                 DAG.getConstant(0, SourceType);
+
+    unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements();
+    SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
+
+    // Populate the new build_vector
+    for (unsigned i=0; i < N->getNumOperands(); ++i) {
+      SDValue Cast = N->getOperand(i);
+      assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
+              Cast.getOpcode() == ISD::ZERO_EXTEND ||
+              Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
+      SDValue In;
+      if (Cast.getOpcode() == ISD::UNDEF)
+        In = DAG.getUNDEF(SourceType);
+      else
+        In = Cast->getOperand(0);
+      unsigned Index = isLE ? (i * ElemRatio) :
+                              (i * ElemRatio + (ElemRatio - 1));
+
+      assert(Index < Ops.size() && "Invalid index");
+      Ops[Index] = In;
+    }
+
+    // The type of the new BUILD_VECTOR node.
+    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
+    assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() &&
+           "Invalid vector size");
+    // Check if the new vector type is legal.
+    if (!isTypeLegal(VecVT)) return SDValue();
+
+    // Make the new BUILD_VECTOR.
+    SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                                 VecVT, &Ops[0], Ops.size());
+
+    // The new BUILD_VECTOR node has the potential to be further optimized.
+    AddToWorkList(BV.getNode());
+    // Bitcast to the desired type.
+    return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV);
+  }
 
   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
   // at most two distinct vectors, turn this into a shuffle node.
+
+  // May only combine to shuffle after legalize if shuffle is legal.
+  if (LegalOperations &&
+      !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))
+    return SDValue();
+
   SDValue VecIn1, VecIn2;
   for (unsigned i = 0; i != NumInScalars; ++i) {
     // Ignore undef inputs.
@@ -6957,15 +7615,8 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
       break;
     }
 
-    // If the input vector type disagrees with the result of the build_vector,
-    // we can't make a shuffle.
+    // We allow up to two distinct input vectors.
     SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
-    if (ExtractedFromVec.getValueType() != VT) {
-      VecIn1 = VecIn2 = SDValue(0, 0);
-      break;
-    }
-
-    // Otherwise, remember this.  We allow up to two distinct input vectors.
     if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
       continue;
 
@@ -6980,7 +7631,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
     }
   }
 
-  // If everything is good, we can make a shuffle operation.
+    // If everything is good, we can make a shuffle operation.
   if (VecIn1.getNode()) {
     SmallVector<int, 8> Mask;
     for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -7006,14 +7657,39 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
       Mask.push_back(Idx+NumInScalars);
     }
 
-    // Add count and size info.
+    // We can't generate a shuffle node with mismatched input and output types.
+    // Attempt to transform a single input vector to the correct type.
+    if ((VT != VecIn1.getValueType())) {
+      // We don't support shuffeling between TWO values of different types.
+      if (VecIn2.getNode() != 0)
+        return SDValue();
+
+      // We only support widening of vectors which are half the size of the
+      // output registers. For example XMM->YMM widening on X86 with AVX.
+      if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits())
+        return SDValue();
+
+      // Widen the input vector by adding undef values.
+      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+                           VecIn1, DAG.getUNDEF(VecIn1.getValueType()));
+    }
+
+    // If VecIn2 is unused then change it to undef.
+    VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+
+    // Check that we were able to transform all incoming values to the same type.
+    if (VecIn2.getValueType() != VecIn1.getValueType() ||
+        VecIn1.getValueType() != VT)
+          return SDValue();
+
+    // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
     if (!isTypeLegal(VT))
       return SDValue();
 
     // Return the new VECTOR_SHUFFLE node.
     SDValue Ops[2];
     Ops[0] = VecIn1;
-    Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+    Ops[1] = VecIn2;
     return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
   }
 
@@ -7045,19 +7721,23 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
       return SDValue();
 
-    // Combine:
-    //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
-    // Into:
-    //    indicies are equal => V1
-    //    otherwise => (extract_subvec V1, ExtIdx)
-    //
-    SDValue InsIdx = N->getOperand(1);
-    SDValue ExtIdx = V->getOperand(2);
+    // Only handle cases where both indexes are constants with the same type.
+    ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+    ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
 
-    if (InsIdx == ExtIdx)
-      return V->getOperand(1);
-    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
-                       V->getOperand(0), N->getOperand(1));
+    if (InsIdx && ExtIdx &&
+        InsIdx->getValueType(0).getSizeInBits() <= 64 &&
+        ExtIdx->getValueType(0).getSizeInBits() <= 64) {
+      // Combine:
+      //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
+      // Into:
+      //    indices are equal => V1
+      //    otherwise => (extract_subvec V1, ExtIdx)
+      if (InsIdx->getZExtValue() == ExtIdx->getZExtValue())
+        return V->getOperand(1);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT,
+                         V->getOperand(0), N->getOperand(1));
+    }
   }
 
   return SDValue();
@@ -7068,15 +7748,63 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   unsigned NumElts = VT.getVectorNumElements();
 
   SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
+
+  // Canonicalize shuffle undef, undef -> undef
+  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(VT);
+
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
 
-  assert(N0.getValueType().getVectorNumElements() == NumElts &&
-        "Vector shuffle must be normalized in DAG");
+  // Canonicalize shuffle v, v -> v, undef
+  if (N0 == N1) {
+    SmallVector<int, 8> NewMask;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx >= (int)NumElts) Idx -= NumElts;
+      NewMask.push_back(Idx);
+    }
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT),
+                                &NewMask[0]);
+  }
+
+  // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
+  if (N0.getOpcode() == ISD::UNDEF) {
+    SmallVector<int, 8> NewMask;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx >= 0) {
+        if (Idx < (int)NumElts)
+          Idx += NumElts;
+        else
+          Idx -= NumElts;
+      }
+      NewMask.push_back(Idx);
+    }
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT),
+                                &NewMask[0]);
+  }
 
-  // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+  // Remove references to rhs if it is undef
+  if (N1.getOpcode() == ISD::UNDEF) {
+    bool Changed = false;
+    SmallVector<int, 8> NewMask;
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (Idx >= (int)NumElts) {
+        Idx = -1;
+        Changed = true;
+      }
+      NewMask.push_back(Idx);
+    }
+    if (Changed)
+      return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]);
+  }
 
   // If it is a splat, check if the argument vector is another splat or a
   // build_vector with all scalar elements the same.
-  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
     SDNode *V = N0.getNode();
 
@@ -7115,6 +7843,40 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
         return N0;
     }
   }
+
+  // If this shuffle node is simply a swizzle of another shuffle node,
+  // and it reverses the swizzle of the previous shuffle then we can
+  // optimize shuffle(shuffle(x, undef), undef) -> x.
+  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
+      N1.getOpcode() == ISD::UNDEF) {
+
+    ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
+
+    // Shuffle nodes can only reverse shuffles with a single non-undef value.
+    if (N0.getOperand(1).getOpcode() != ISD::UNDEF)
+      return SDValue();
+
+    // The incoming shuffle must be of the same type as the result of the
+    // current shuffle.
+    assert(OtherSV->getOperand(0).getValueType() == VT &&
+           "Shuffle types don't match");
+
+    for (unsigned i = 0; i != NumElts; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      assert(Idx < (int)NumElts && "Index references undef operand");
+      // Next, this index comes from the first value, which is the incoming
+      // shuffle. Adopt the incoming index.
+      if (Idx >= 0)
+        Idx = OtherSV->getMaskElt(Idx);
+
+      // The combined shuffle must map each index to itself.
+      if (Idx >= 0 && (unsigned)Idx != i)
+        return SDValue();
+    }
+
+    return OtherSV->getOperand(0);
+  }
+
   return SDValue();
 }
 
@@ -7190,7 +7952,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
         SDValue Elt = RHS.getOperand(i);
         if (!isa<ConstantSDNode>(Elt))
           return SDValue();
-        else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+
+        if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
           Indices.push_back(i);
         else if (cast<ConstantSDNode>(Elt)->isNullValue())
           Indices.push_back(NumElts);
@@ -7261,8 +8024,19 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
       }
 
       EVT VT = LHSOp.getValueType();
-      assert(RHSOp.getValueType() == VT &&
-             "SimplifyVBinOp with different BUILD_VECTOR element types");
+      EVT RVT = RHSOp.getValueType();
+      if (RVT != VT) {
+        // Integer BUILD_VECTOR operands may have types larger than the element
+        // size (e.g., when the element type is not legal).  Prior to type
+        // legalization, the types may not match between the two BUILD_VECTORS.
+        // Truncate one of the operands to make them match.
+        if (RVT.getSizeInBits() > VT.getSizeInBits()) {
+          RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp);
+        } else {
+          LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp);
+          VT = RVT;
+        }
+      }
       SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
                                    LHSOp, RHSOp);
       if (FoldOp.getOpcode() != ISD::UNDEF &&
@@ -7374,8 +8148,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
 
       if ((LLD->hasAnyUseOfValue(1) &&
            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
-          (LLD->hasAnyUseOfValue(1) &&
-           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+          (RLD->hasAnyUseOfValue(1) &&
+           (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
         return false;
 
       Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
@@ -7393,7 +8167,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
                          // FIXME: Discards pointer info.
                          LLD->getChain(), Addr, MachinePointerInfo(),
                          LLD->isVolatile(), LLD->isNonTemporal(),
-                         LLD->getAlignment());
+                         LLD->isInvariant(), LLD->getAlignment());
     } else {
       Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
                             RLD->getExtensionType() : LLD->getExtensionType(),
@@ -7509,7 +8283,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         AddToWorkList(CPIdx.getNode());
         return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
                            MachinePointerInfo::getConstantPool(), false,
-                           false, Alignment);
+                           false, false, Alignment);
 
       }
     }
@@ -7517,8 +8291,6 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   // Check to see if we can perform the "gzip trick", transforming
   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
   if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
-      N0.getValueType().isInteger() &&
-      N2.getValueType().isInteger() &&
       (N1C->isNullValue() ||                         // (a < 0) ? b : 0
        (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
     EVT XType = N0.getValueType();
@@ -7720,7 +8492,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   std::vector<SDNode*> Built;
-  SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+  SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
 
   for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
        ii != ee; ++ii)
@@ -7734,7 +8506,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   std::vector<SDNode*> Built;
-  SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+  SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
 
   for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
        ii != ee; ++ii)
@@ -7856,30 +8628,20 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
 /// FindAliasInfo - Extracts the relevant alias information from the memory
 /// node.  Returns true if the operand was a load.
 bool DAGCombiner::FindAliasInfo(SDNode *N,
-                        SDValue &Ptr, int64_t &Size,
-                        const Value *&SrcValue,
-                        int &SrcValueOffset,
-                        unsigned &SrcValueAlign,
-                        const MDNode *&TBAAInfo) const {
-  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
-    Ptr = LD->getBasePtr();
-    Size = LD->getMemoryVT().getSizeInBits() >> 3;
-    SrcValue = LD->getSrcValue();
-    SrcValueOffset = LD->getSrcValueOffset();
-    SrcValueAlign = LD->getOriginalAlignment();
-    TBAAInfo = LD->getTBAAInfo();
-    return true;
-  }
-  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
-    Ptr = ST->getBasePtr();
-    Size = ST->getMemoryVT().getSizeInBits() >> 3;
-    SrcValue = ST->getSrcValue();
-    SrcValueOffset = ST->getSrcValueOffset();
-    SrcValueAlign = ST->getOriginalAlignment();
-    TBAAInfo = ST->getTBAAInfo();
-    return false;
-  }
-  llvm_unreachable("FindAliasInfo expected a memory operand");
+                                SDValue &Ptr, int64_t &Size,
+                                const Value *&SrcValue,
+                                int &SrcValueOffset,
+                                unsigned &SrcValueAlign,
+                                const MDNode *&TBAAInfo) const {
+  LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
+
+  Ptr = LS->getBasePtr();
+  Size = LS->getMemoryVT().getSizeInBits() >> 3;
+  SrcValue = LS->getSrcValue();
+  SrcValueOffset = LS->getSrcValueOffset();
+  SrcValueAlign = LS->getOriginalAlignment();
+  TBAAInfo = LS->getTBAAInfo();
+  return isa<LoadSDNode>(LS);
 }
 
 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index e8f8c73d6883..0c1ac6982d2a 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -39,6 +39,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "isel"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
@@ -58,8 +59,15 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
+STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
+          "target-independent selector");
+STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
+          "target-specific selector");
+STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
+
 /// startNewBlock - Set the current block to which generated machine
 /// instructions will be appended, and clear the local CSE map.
 ///
@@ -96,6 +104,11 @@ bool FastISel::hasTrivialKill(const Value *V) const {
         !hasTrivialKill(Cast->getOperand(0)))
       return false;
 
+  // GEPs with all zero indices are trivially coalesced by fast-isel.
+  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+    if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0)))
+      return false;
+
   // Only instructions with a single use in the same basic block are considered
   // to have trivial kills.
   return I->hasOneUse() &&
@@ -123,15 +136,8 @@ unsigned FastISel::getRegForValue(const Value *V) {
       return 0;
   }
 
-  // Look up the value to see if we already have a register for it. We
-  // cache values defined by Instructions across blocks, and other values
-  // only locally. This is because Instructions already have the SSA
-  // def-dominates-use requirement enforced.
-  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
-  if (I != FuncInfo.ValueMap.end())
-    return I->second;
-
-  unsigned Reg = LocalValueMap[V];
+  // Look up the value to see if we already have a register for it.
+  unsigned Reg = lookUpRegForValue(V);
   if (Reg != 0)
     return Reg;
 
@@ -186,7 +192,7 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
       uint32_t IntBitWidth = IntVT.getSizeInBits();
       bool isExact;
       (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
-                                APFloat::rmTowardZero, &isExact);
+                                  APFloat::rmTowardZero, &isExact);
       if (isExact) {
         APInt IntVal(IntBitWidth, x);
 
@@ -297,6 +303,18 @@ void FastISel::recomputeInsertPt() {
     ++FuncInfo.InsertPt;
 }
 
+void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
+                              MachineBasicBlock::iterator E) {
+  assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!");
+  while (I != E) {
+    MachineInstr *Dead = &*I;
+    ++I;
+    Dead->eraseFromParent();
+    ++NumFastIselDead;
+  }
+  recomputeInsertPt();
+}
+
 FastISel::SavePoint FastISel::enterLocalValueArea() {
   MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
   DebugLoc OldDL = DL;
@@ -377,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
       ISDOpcode = ISD::SRA;
     }
 
+    // Transform "urem x, pow2" -> "and x, pow2-1".
+    if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+        isPowerOf2_64(Imm)) {
+      --Imm;
+      ISDOpcode = ISD::AND;
+    }
+
     unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
                                       Op0IsKill, Imm, VT.getSimpleVT());
     if (ResultReg == 0) return false;
@@ -427,6 +452,11 @@ bool FastISel::SelectGetElementPtr(const User *I) {
 
   bool NIsKill = hasTrivialKill(I->getOperand(0));
 
+  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
+  // into a single N = N + TotalOffset.
+  uint64_t TotalOffs = 0;
+  // FIXME: What's a good SWAG number for MaxOffs?
+  uint64_t MaxOffs = 2048;
   Type *Ty = I->getOperand(0)->getType();
   MVT VT = TLI.getPointerTy();
   for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
@@ -436,14 +466,15 @@ bool FastISel::SelectGetElementPtr(const User *I) {
       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
         // N = N + Offset
-        uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
-        // FIXME: This can be optimized by combining the add with a
-        // subsequent one.
-        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
-        if (N == 0)
-          // Unhandled operand. Halt "fast" selection and bail.
-          return false;
-        NIsKill = true;
+        TotalOffs += TD.getStructLayout(StTy)->getElementOffset(Field);
+        if (TotalOffs >= MaxOffs) {
+          N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+          if (N == 0)
+            // Unhandled operand. Halt "fast" selection and bail.
+            return false;
+          NIsKill = true;
+          TotalOffs = 0;
+        }
       }
       Ty = StTy->getElementType(Field);
     } else {
@@ -452,14 +483,26 @@ bool FastISel::SelectGetElementPtr(const User *I) {
       // If this is a constant subscript, handle it quickly.
       if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
         if (CI->isZero()) continue;
-        uint64_t Offs =
+        // N = N + Offset
+        TotalOffs += 
           TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
-        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+        if (TotalOffs >= MaxOffs) {
+          N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+          if (N == 0)
+            // Unhandled operand. Halt "fast" selection and bail.
+            return false;
+          NIsKill = true;
+          TotalOffs = 0;
+        }
+        continue;
+      }
+      if (TotalOffs) {
+        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
         if (N == 0)
           // Unhandled operand. Halt "fast" selection and bail.
           return false;
         NIsKill = true;
-        continue;
+        TotalOffs = 0;
       }
 
       // N = N + Idx * ElementSize;
@@ -484,6 +527,12 @@ bool FastISel::SelectGetElementPtr(const User *I) {
         return false;
     }
   }
+  if (TotalOffs) {
+    N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT);
+    if (N == 0)
+      // Unhandled operand. Halt "fast" selection and bail.
+      return false;
+  }
 
   // We successfully emitted code for the given LLVM Instruction.
   UpdateValueMap(I, N);
@@ -512,21 +561,32 @@ bool FastISel::SelectCall(const User *I) {
     return true;
   }
 
+  MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
+  ComputeUsesVAFloatArgument(*Call, &MMI);
+
   const Function *F = Call->getCalledFunction();
   if (!F) return false;
 
   // Handle selected intrinsic function calls.
   switch (F->getIntrinsicID()) {
   default: break;
+    // At -O0 we don't care about the lifetime intrinsics.
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+    return true;
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(Call);
     if (!DIVariable(DI->getVariable()).Verify() ||
-        !FuncInfo.MF->getMMI().hasDebugInfo())
+        !FuncInfo.MF->getMMI().hasDebugInfo()) {
+      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
+    }
 
     const Value *Address = DI->getAddress();
-    if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+    if (!Address || isa<UndefValue>(Address)) {
+      DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
       return true;
+    }
 
     unsigned Reg = 0;
     unsigned Offset = 0;
@@ -534,16 +594,36 @@ bool FastISel::SelectCall(const User *I) {
       // Some arguments' frame index is recorded during argument lowering.
       Offset = FuncInfo.getArgumentFrameIndex(Arg);
       if (Offset)
-	Reg = TRI.getFrameRegister(*FuncInfo.MF);
+        Reg = TRI.getFrameRegister(*FuncInfo.MF);
     }
     if (!Reg)
-      Reg = getRegForValue(Address);
+      Reg = lookUpRegForValue(Address);
+
+    // If we have a VLA that has a "use" in a metadata node that's then used
+    // here but it has no other uses, then we have a problem. E.g.,
+    //
+    //   int foo (const int *x) {
+    //     char a[*x];
+    //     return 0;
+    //   }
+    //
+    // If we assign 'a' a vreg and fast isel later on has to use the selection
+    // DAG isel, it will want to copy the value to the vreg. However, there are
+    // no uses, which goes counter to what selection DAG isel expects.
+    if (!Reg && !Address->use_empty() && isa<Instruction>(Address) &&
+        (!isa<AllocaInst>(Address) ||
+         !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address))))
+      Reg = FuncInfo.InitializeRegForValue(Address);
 
     if (Reg)
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
               TII.get(TargetOpcode::DBG_VALUE))
         .addReg(Reg, RegState::Debug).addImm(Offset)
         .addMetadata(DI->getVariable());
+    else
+      // We can't yet handle anything else here because it would require
+      // generating code, thus altering codegen because of debug info.
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
     return true;
   }
   case Intrinsic::dbg_value: {
@@ -581,60 +661,6 @@ bool FastISel::SelectCall(const User *I) {
     }
     return true;
   }
-  case Intrinsic::eh_exception: {
-    EVT VT = TLI.getValueType(Call->getType());
-    if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand)
-      break;
-
-    assert(FuncInfo.MBB->isLandingPad() &&
-           "Call to eh.exception not in landing pad!");
-    unsigned Reg = TLI.getExceptionAddressRegister();
-    const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
-    unsigned ResultReg = createResultReg(RC);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(Reg);
-    UpdateValueMap(Call, ResultReg);
-    return true;
-  }
-  case Intrinsic::eh_selector: {
-    EVT VT = TLI.getValueType(Call->getType());
-    if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand)
-      break;
-    if (FuncInfo.MBB->isLandingPad())
-      AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB);
-    else {
-#ifndef NDEBUG
-      FuncInfo.CatchInfoLost.insert(Call);
-#endif
-      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-      unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
-    }
-
-    unsigned Reg = TLI.getExceptionSelectorRegister();
-    EVT SrcVT = TLI.getPointerTy();
-    const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
-    unsigned ResultReg = createResultReg(RC);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            ResultReg).addReg(Reg);
-
-    bool ResultRegIsKill = hasTrivialKill(Call);
-
-    // Cast the register to the type of the selector.
-    if (SrcVT.bitsGT(MVT::i32))
-      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
-                             ResultReg, ResultRegIsKill);
-    else if (SrcVT.bitsLT(MVT::i32))
-      ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
-                             ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
-    if (ResultReg == 0)
-      // Unhandled operand. Halt "fast" selection and bail.
-      return false;
-
-    UpdateValueMap(Call, ResultReg);
-
-    return true;
-  }
   case Intrinsic::objectsize: {
     ConstantInt *CI = cast<ConstantInt>(Call->getArgOperand(1));
     unsigned long long Res = CI->isZero() ? -1ULL : 0;
@@ -726,8 +752,8 @@ bool FastISel::SelectBitCast(const User *I) {
   // First, try to perform the bitcast by inserting a reg-reg copy.
   unsigned ResultReg = 0;
   if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
-    TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
-    TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+    const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+    const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
     // Don't attempt a cross-class copy. It will likely fail.
     if (SrcClass == DstClass) {
       ResultReg = createResultReg(DstClass);
@@ -758,17 +784,33 @@ FastISel::SelectInstruction(const Instruction *I) {
 
   DL = I->getDebugLoc();
 
+  MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
+
   // First, try doing target-independent selection.
   if (SelectOperator(I, I->getOpcode())) {
+    ++NumFastIselSuccessIndependent;
     DL = DebugLoc();
     return true;
   }
+  // Remove dead code.  However, ignore call instructions since we've flushed 
+  // the local value map and recomputed the insert point.
+  if (!isa<CallInst>(I)) {
+    recomputeInsertPt();
+    if (SavedInsertPt != FuncInfo.InsertPt)
+      removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
+  }
 
   // Next, try calling the target to attempt to handle the instruction.
+  SavedInsertPt = FuncInfo.InsertPt;
   if (TargetSelectInstruction(I)) {
+    ++NumFastIselSuccessTarget;
     DL = DebugLoc();
     return true;
   }
+  // Check for dead code and remove as necessary.
+  recomputeInsertPt();
+  if (SavedInsertPt != FuncInfo.InsertPt)
+    removeDeadCode(FuncInfo.InsertPt, SavedInsertPt);
 
   DL = DebugLoc();
   return false;
@@ -779,8 +821,11 @@ FastISel::SelectInstruction(const Instruction *I) {
 /// the CFG.
 void
 FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
-  if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
-    // The unconditional fall-through case, which needs no instructions.
+
+  if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+    // For more accurate line information if this is the only instruction
+    // in the block then emit it, otherwise we have the unconditional
+    // fall-through case, which needs no instructions.
   } else {
     // The unconditional branch case.
     TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
@@ -1354,8 +1399,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
       // exactly one register for each non-void instruction.
       EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
       if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
-        // Promote MVT::i1.
-        if (VT == MVT::i1)
+        // Handle integer promotions, though, because they're common and easy.
+        if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
           VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
         else {
           FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b052740a1abe..8dde919079d9 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "function-lowering-info"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -68,7 +69,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
   GetReturnInfo(Fn->getReturnType(),
                 Fn->getAttributes().getRetAttributes(), Outs, TLI);
   CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF,
-				      Fn->isVarArg(),
+                                      Fn->isVarArg(),
                                       Outs, Fn->getContext());
 
   // Initialize the mapping of values to registers.  This is only set up for
@@ -92,14 +93,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
         // candidate. I.e., it would trigger the creation of a stack protector.
         bool MayNeedSP =
           (AI->isArrayAllocation() ||
-           (TySize > 8 && isa<ArrayType>(Ty) &&
+           (TySize >= 8 && isa<ArrayType>(Ty) &&
             cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
         StaticAllocaMap[AI] =
-          MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
+          MF->getFrameInfo()->CreateStackObject(TySize, Align, false,
+                                                MayNeedSP);
       }
 
   for (; BB != EB; ++BB)
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
       // Mark values used outside their block as exported, by allocating
       // a virtual register for them.
       if (isUsedOutsideOfDefiningBlock(I))
@@ -355,7 +358,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
 /// argument. This overrides previous frame index entry for this argument,
 /// if any.
 void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A,
-                                                      int FI) {
+                                                 int FI) {
   ByValArgFrameIndexMap[A] = FI;
 }
 
@@ -367,10 +370,34 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
     ByValArgFrameIndexMap.find(A);
   if (I != ByValArgFrameIndexMap.end())
     return I->second;
-  DEBUG(dbgs() << "Argument does not have assigned frame index!");
+  DEBUG(dbgs() << "Argument does not have assigned frame index!\n");
   return 0;
 }
 
+/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
+/// being passed to this variadic function, and set the MachineModuleInfo's
+/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
+/// reference to _fltused on Windows, which will link in MSVCRT's
+/// floating-point support.
+void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
+                                      MachineModuleInfo *MMI)
+{
+  FunctionType *FT = cast<FunctionType>(
+    I.getCalledValue()->getType()->getContainedType(0));
+  if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
+    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+      Type* T = I.getArgOperand(i)->getType();
+      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
+           i != e; ++i) {
+        if (i->isFloatingPointTy()) {
+          MMI->setUsesVAFloatArgument(true);
+          return;
+        }
+      }
+    }
+  }
+}
+
 /// AddCatchInfo - Extract the personality and type infos from an eh.selector
 /// call, and add them to the specified machine basic block.
 void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
@@ -425,34 +452,6 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
   }
 }
 
-void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
-                         MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
-  SmallPtrSet<const BasicBlock*, 4> Visited;
-
-  // The 'eh.selector' call may not be in the direct successor of a basic block,
-  // but could be several successors deeper. If we don't find it, try going one
-  // level further. <rdar://problem/8824861>
-  while (Visited.insert(SuccBB)) {
-    for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
-         I != E; ++I)
-      if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
-        // Apply the catch info to LPad.
-        AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
-#ifndef NDEBUG
-        if (!FLI.MBBMap[SuccBB]->isLandingPad())
-          FLI.CatchInfoFound.insert(EHSel);
-#endif
-        return;
-      }
-
-    const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
-    if (Br && Br->isUnconditional())
-      SuccBB = Br->getSuccessor(0);
-    else
-      break;
-  }
-}
-
 /// AddLandingPadInfo - Extract the exception handling information from the
 /// landingpad instruction and add them to the specified machine module info.
 void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 2ff66f8f8715..1467d887789c 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -294,7 +294,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     const TargetRegisterClass *DstRC = 0;
     if (IIOpNum < II->getNumOperands())
       DstRC = TII->getRegClass(*II, IIOpNum, TRI);
-    assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
+    assert((DstRC || (MI->isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
            "Don't have operand info for this instruction!");
     if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
       unsigned NewVReg = MRI->createVirtualRegister(DstRC);
@@ -351,6 +351,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
     MI->addOperand(MachineOperand::CreateFPImm(CFP));
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+  } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask()));
   } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
                                             TGA->getTargetFlags()));
@@ -574,14 +576,19 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
   for (unsigned i = 1; i != NumOps; ++i) {
     SDValue Op = Node->getOperand(i);
     if ((i & 1) == 0) {
-      unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
-      unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
-      const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
-      const TargetRegisterClass *SRC =
+      RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
+      // Skip physical registers as they don't have a vreg to get and we'll
+      // insert copies for them in TwoAddressInstructionPass anyway.
+      if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+        unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+        unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+        const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+        const TargetRegisterClass *SRC =
         TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
-      if (SRC && SRC != RC) {
-        MRI->setRegClass(NewVReg, SRC);
-        RC = SRC;
+        if (SRC && SRC != RC) {
+          MRI->setRegClass(NewVReg, SRC);
+          RC = SRC;
+        }
       }
     }
     AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
@@ -700,33 +707,6 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   // Create the new machine instruction.
   MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
 
-  // The MachineInstr constructor adds implicit-def operands. Scan through
-  // these to determine which are dead.
-  if (MI->getNumOperands() != 0 &&
-      Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
-    // First, collect all used registers.
-    SmallVector<unsigned, 8> UsedRegs;
-    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
-      if (F->getOpcode() == ISD::CopyFromReg)
-        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
-      else {
-        // Collect declared implicit uses.
-        const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
-        UsedRegs.append(MCID.getImplicitUses(),
-                        MCID.getImplicitUses() + MCID.getNumImplicitUses());
-        // In addition to declared implicit uses, we must also check for
-        // direct RegisterSDNode operands.
-        for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
-          if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
-            unsigned Reg = R->getReg();
-            if (TargetRegisterInfo::isPhysicalRegister(Reg))
-              UsedRegs.push_back(Reg);
-          }
-      }
-    // Then mark unused registers as dead.
-    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
-  }
-
   // Add result register values for things that are defined by this
   // instruction.
   if (NumResults)
@@ -751,30 +731,63 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
   // hook knows where in the block to insert the replacement code.
   MBB->insert(InsertPos, MI);
 
+  // The MachineInstr may also define physregs instead of virtregs.  These
+  // physreg values can reach other instructions in different ways:
+  //
+  // 1. When there is a use of a Node value beyond the explicitly defined
+  //    virtual registers, we emit a CopyFromReg for one of the implicitly
+  //    defined physregs.  This only happens when HasPhysRegOuts is true.
+  //
+  // 2. A CopyFromReg reading a physreg may be glued to this instruction.
+  //
+  // 3. A glued instruction may implicitly use a physreg.
+  //
+  // 4. A glued instruction may use a RegisterSDNode operand.
+  //
+  // Collect all the used physreg defs, and make sure that any unused physreg
+  // defs are marked as dead.
+  SmallVector<unsigned, 8> UsedRegs;
+
   // Additional results must be physical register defs.
   if (HasPhysRegOuts) {
     for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
       unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
-      if (Node->hasAnyUseOfValue(i))
-        EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
-      // If there are no uses, mark the register as dead now, so that
-      // MachineLICM/Sink can see that it's dead. Don't do this if the
-      // node has a Glue value, for the benefit of targets still using
-      // Glue for values in physregs.
-      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
-        MI->addRegisterDead(Reg, TRI);
+      if (!Node->hasAnyUseOfValue(i))
+        continue;
+      // This implicitly defined physreg has a use.
+      UsedRegs.push_back(Reg);
+      EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
     }
   }
 
-  // If the instruction has implicit defs and the node doesn't, mark the
-  // implicit def as dead.  If the node has any glue outputs, we don't do this
-  // because we don't know what implicit defs are being used by glued nodes.
-  if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
-    if (const unsigned *IDList = II.getImplicitDefs()) {
-      for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
-           i != e; ++i)
-        MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+  // Scan the glue chain for any used physregs.
+  if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) {
+      if (F->getOpcode() == ISD::CopyFromReg) {
+        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+        continue;
+      } else if (F->getOpcode() == ISD::CopyToReg) {
+        // Skip CopyToReg nodes that are internal to the glue chain.
+        continue;
+      }
+      // Collect declared implicit uses.
+      const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+      UsedRegs.append(MCID.getImplicitUses(),
+                      MCID.getImplicitUses() + MCID.getNumImplicitUses());
+      // In addition to declared implicit uses, we must also check for
+      // direct RegisterSDNode operands.
+      for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+        if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+          unsigned Reg = R->getReg();
+          if (TargetRegisterInfo::isPhysicalRegister(Reg))
+            UsedRegs.push_back(Reg);
+        }
     }
+  }
+
+  // Finally mark unused registers as dead.
+  if (!UsedRegs.empty() || II.getImplicitDefs())
+    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
 
   // Run post-isel target hook to adjust this instruction if needed.
 #ifdef NDEBUG
@@ -794,10 +807,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
     Node->dump();
 #endif
     llvm_unreachable("This target-independent node should have been selected!");
-    break;
   case ISD::EntryToken:
     llvm_unreachable("EntryToken should have been excluded from the schedule!");
-    break;
   case ISD::MERGE_VALUES:
   case ISD::TokenFactor: // fall thru
     break;
diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
new file mode 100644
index 000000000000..81d2e000a2e8
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/CodeGen/SelectionDAG/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SelectionDAG
+parent = CodeGen
+required_libraries = Analysis CodeGen Core MC Support Target TransformUtils
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 63255ae2ebd9..a96a99781f4e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -46,37 +46,18 @@ using namespace llvm;
 /// will attempt merge setcc and brc instructions into brcc's.
 ///
 namespace {
-class SelectionDAGLegalize {
+class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener {
   const TargetMachine &TM;
   const TargetLowering &TLI;
   SelectionDAG &DAG;
 
-  // Libcall insertion helpers.
-
-  /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
-  /// legalized.  We use this to ensure that calls are properly serialized
-  /// against each other, including inserted libcalls.
-  SDValue LastCALLSEQ_END;
-
-  /// IsLegalizingCall - This member is used *only* for purposes of providing
-  /// helpful assertions that a libcall isn't created while another call is
-  /// being legalized (which could lead to non-serialized call sequences).
-  bool IsLegalizingCall;
-
-  /// LegalizedNodes - For nodes that are of legal width, and that have more
-  /// than one use, this map indicates what regularized operand to use.  This
-  /// allows us to avoid legalizing the same thing more than once.
-  DenseMap<SDValue, SDValue> LegalizedNodes;
+  /// LegalizePosition - The iterator for walking through the node list.
+  SelectionDAG::allnodes_iterator LegalizePosition;
 
-  void AddLegalizedOperand(SDValue From, SDValue To) {
-    LegalizedNodes.insert(std::make_pair(From, To));
-    // If someone requests legalization of the new node, return itself.
-    if (From != To)
-      LegalizedNodes.insert(std::make_pair(To, To));
+  /// LegalizedNodes - The set of nodes which have already been legalized.
+  SmallPtrSet<SDNode *, 16> LegalizedNodes;
 
-    // Transfer SDDbgValues.
-    DAG.TransferDbgValues(From, To);
-  }
+  // Libcall insertion helpers.
 
 public:
   explicit SelectionDAGLegalize(SelectionDAG &DAG);
@@ -84,9 +65,8 @@ public:
   void LegalizeDAG();
 
 private:
-  /// LegalizeOp - Return a legal replacement for the given operation, with
-  /// all legal operands.
-  SDValue LegalizeOp(SDValue O);
+  /// LegalizeOp - Legalizes the given operation.
+  void LegalizeOp(SDNode *Node);
 
   SDValue OptimizeFloatStore(StoreSDNode *ST);
 
@@ -105,10 +85,7 @@ private:
   /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
   SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
                                      SDValue N1, SDValue N2,
-                                     SmallVectorImpl<int> &Mask) const;
-
-  bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
-                                    SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+                                     ArrayRef<int> Mask) const;
 
   void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
                              DebugLoc dl);
@@ -150,10 +127,46 @@ private:
   SDValue ExpandInsertToVectorThroughStack(SDValue Op);
   SDValue ExpandVectorBuildThroughStack(SDNode* Node);
 
+  SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+
   std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
 
-  void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
-  void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void ExpandNode(SDNode *Node);
+  void PromoteNode(SDNode *Node);
+
+  void ForgetNode(SDNode *N) {
+    LegalizedNodes.erase(N);
+    if (LegalizePosition == SelectionDAG::allnodes_iterator(N))
+      ++LegalizePosition;
+  }
+
+public:
+  // DAGUpdateListener implementation.
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    ForgetNode(N);
+  }
+  virtual void NodeUpdated(SDNode *N) {}
+
+  // Node replacement helpers
+  void ReplacedNode(SDNode *N) {
+    if (N->use_empty()) {
+      DAG.RemoveDeadNode(N, this);
+    } else {
+      ForgetNode(N);
+    }
+  }
+  void ReplaceNode(SDNode *Old, SDNode *New) {
+    DAG.ReplaceAllUsesWith(Old, New, this);
+    ReplacedNode(Old);
+  }
+  void ReplaceNode(SDValue Old, SDValue New) {
+    DAG.ReplaceAllUsesWith(Old, New, this);
+    ReplacedNode(Old.getNode());
+  }
+  void ReplaceNode(SDNode *Old, const SDValue *New) {
+    DAG.ReplaceAllUsesWith(Old, New, this);
+    ReplacedNode(Old);
+  }
 };
 }
 
@@ -164,7 +177,7 @@ private:
 SDValue
 SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
                                                  SDValue N1, SDValue N2,
-                                             SmallVectorImpl<int> &Mask) const {
+                                                 ArrayRef<int> Mask) const {
   unsigned NumMaskElts = VT.getVectorNumElements();
   unsigned NumDestElts = NVT.getVectorNumElements();
   unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
@@ -195,145 +208,37 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
 }
 
 void SelectionDAGLegalize::LegalizeDAG() {
-  LastCALLSEQ_END = DAG.getEntryNode();
-  IsLegalizingCall = false;
-
-  // The legalize process is inherently a bottom-up recursive process (users
-  // legalize their uses before themselves).  Given infinite stack space, we
-  // could just start legalizing on the root and traverse the whole graph.  In
-  // practice however, this causes us to run out of stack space on large basic
-  // blocks.  To avoid this problem, compute an ordering of the nodes where each
-  // node is only legalized after all of its operands are legalized.
   DAG.AssignTopologicalOrder();
-  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
-    LegalizeOp(SDValue(I, 0));
 
-  // Finally, it's possible the root changed.  Get the new root.
-  SDValue OldRoot = DAG.getRoot();
-  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
-  DAG.setRoot(LegalizedNodes[OldRoot]);
-
-  LegalizedNodes.clear();
-
-  // Remove dead nodes now.
-  DAG.RemoveDeadNodes();
-}
-
-
-/// FindCallEndFromCallStart - Given a chained node that is part of a call
-/// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
-  // Nested CALLSEQ_START/END constructs aren't yet legal,
-  // but we can DTRT and handle them correctly here.
-  if (Node->getOpcode() == ISD::CALLSEQ_START)
-    depth++;
-  else if (Node->getOpcode() == ISD::CALLSEQ_END) {
-    depth--;
-    if (depth == 0)
-      return Node;
-  }
-  if (Node->use_empty())
-    return 0;   // No CallSeqEnd
-
-  // The chain is usually at the end.
-  SDValue TheChain(Node, Node->getNumValues()-1);
-  if (TheChain.getValueType() != MVT::Other) {
-    // Sometimes it's at the beginning.
-    TheChain = SDValue(Node, 0);
-    if (TheChain.getValueType() != MVT::Other) {
-      // Otherwise, hunt for it.
-      for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
-        if (Node->getValueType(i) == MVT::Other) {
-          TheChain = SDValue(Node, i);
-          break;
-        }
-
-      // Otherwise, we walked into a node without a chain.
-      if (TheChain.getValueType() != MVT::Other)
-        return 0;
+  // Visit all the nodes. We start in topological order, so that we see
+  // nodes with their original operands intact. Legalization can produce
+  // new nodes which may themselves need to be legalized. Iterate until all
+  // nodes have been legalized.
+  for (;;) {
+    bool AnyLegalized = false;
+    for (LegalizePosition = DAG.allnodes_end();
+         LegalizePosition != DAG.allnodes_begin(); ) {
+      --LegalizePosition;
+
+      SDNode *N = LegalizePosition;
+      if (LegalizedNodes.insert(N)) {
+        AnyLegalized = true;
+        LegalizeOp(N);
+      }
     }
-  }
-
-  for (SDNode::use_iterator UI = Node->use_begin(),
-       E = Node->use_end(); UI != E; ++UI) {
-
-    // Make sure to only follow users of our token chain.
-    SDNode *User = *UI;
-    for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
-      if (User->getOperand(i) == TheChain)
-        if (SDNode *Result = FindCallEndFromCallStart(User, depth))
-          return Result;
-  }
-  return 0;
-}
-
-/// FindCallStartFromCallEnd - Given a chained node that is part of a call
-/// sequence, find the CALLSEQ_START node that initiates the call sequence.
-static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
-  int nested = 0;
-  assert(Node && "Didn't find callseq_start for a call??");
-  while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
-    Node = Node->getOperand(0).getNode();
-    assert(Node->getOperand(0).getValueType() == MVT::Other &&
-           "Node doesn't have a token chain argument!");
-    switch (Node->getOpcode()) {
-    default:
+    if (!AnyLegalized)
       break;
-    case ISD::CALLSEQ_START:
-      if (!nested)
-        return Node;
-      nested--;
-      break;
-    case ISD::CALLSEQ_END:
-      nested++;
-      break;
-    }
-  }
-  return 0;
-}
-
-/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
-/// see if any uses can reach Dest.  If no dest operands can get to dest,
-/// legalize them, legalize ourself, and return false, otherwise, return true.
-///
-/// Keep track of the nodes we fine that actually do lead to Dest in
-/// NodesLeadingTo.  This avoids retraversing them exponential number of times.
-///
-bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
-                                     SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
-  if (N == Dest) return true;  // N certainly leads to Dest :)
-
-  // If we've already processed this node and it does lead to Dest, there is no
-  // need to reprocess it.
-  if (NodesLeadingTo.count(N)) return true;
-
-  // If the first result of this node has been already legalized, then it cannot
-  // reach N.
-  if (LegalizedNodes.count(SDValue(N, 0))) return false;
-
-  // Okay, this node has not already been legalized.  Check and legalize all
-  // operands.  If none lead to Dest, then we can legalize this node.
-  bool OperandsLeadToDest = false;
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    OperandsLeadToDest |=     // If an operand leads to Dest, so do we.
-      LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest,
-                                   NodesLeadingTo);
 
-  if (OperandsLeadToDest) {
-    NodesLeadingTo.insert(N);
-    return true;
   }
 
-  // Okay, this node looks safe, legalize it and return false.
-  LegalizeOp(SDValue(N, 0));
-  return false;
+  // Remove dead nodes now.
+  DAG.RemoveDeadNodes();
 }
 
 /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
 /// a load from the constant pool.
-static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
-                                SelectionDAG &DAG, const TargetLowering &TLI) {
+SDValue
+SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
   bool Extend = false;
   DebugLoc dl = CFP->getDebugLoc();
 
@@ -369,20 +274,27 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
 
   SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
-  if (Extend)
-    return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
-                          DAG.getEntryNode(),
-                          CPIdx, MachinePointerInfo::getConstantPool(),
-                          VT, false, false, Alignment);
-  return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
-                     MachinePointerInfo::getConstantPool(), false, false,
-                     Alignment);
+  if (Extend) {
+    SDValue Result =
+      DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
+                     DAG.getEntryNode(),
+                     CPIdx, MachinePointerInfo::getConstantPool(),
+                     VT, false, false, Alignment);
+    return Result;
+  }
+  SDValue Result =
+    DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+                MachinePointerInfo::getConstantPool(), false, false, false,
+                Alignment);
+  return Result;
 }
 
 /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
-static
-SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
-                             const TargetLowering &TLI) {
+static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+                                 const TargetLowering &TLI,
+                                 SelectionDAGLegalize *DAGLegalize) {
+  assert(ST->getAddressingMode() == ISD::UNINDEXED &&
+         "unaligned indexed stores not implemented!");
   SDValue Chain = ST->getChain();
   SDValue Ptr = ST->getBasePtr();
   SDValue Val = ST->getValue();
@@ -397,8 +309,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // same size, then a (misaligned) int store.
       // FIXME: Does not handle truncating floating point stores!
       SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
-      return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
-                          ST->isVolatile(), ST->isNonTemporal(), Alignment);
+      Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+                           ST->isVolatile(), ST->isNonTemporal(), Alignment);
+      DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+      return;
     }
     // Do a (aligned) store to a stack slot, then copy from the stack slot
     // to the final destination using (unaligned) integer loads and stores.
@@ -427,7 +341,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
       // Load one integer register's worth from the stack slot.
       SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
                                  MachinePointerInfo(),
-                                 false, false, 0);
+                                 false, false, false, 0);
       // Store it to the final location.  Remember the store.
       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
                                   ST->getPointerInfo().getWithOffset(Offset),
@@ -458,8 +372,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
                                        ST->isNonTemporal(),
                                        MinAlign(ST->getAlignment(), Offset)));
     // The order of the stores doesn't matter - say it with a TokenFactor.
-    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
-                       Stores.size());
+    SDValue Result =
+      DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                  Stores.size());
+    DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
+    return;
   }
   assert(ST->getMemoryVT().isInteger() &&
          !ST->getMemoryVT().isVector() &&
@@ -488,13 +405,18 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
                              NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
                              Alignment);
 
-  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+  SDValue Result =
+    DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+  DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
 }
 
 /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
-static
-SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
-                            const TargetLowering &TLI) {
+static void
+ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+                    const TargetLowering &TLI,
+                    SDValue &ValResult, SDValue &ChainResult) {
+  assert(LD->getAddressingMode() == ISD::UNINDEXED &&
+         "unaligned indexed loads not implemented!");
   SDValue Chain = LD->getChain();
   SDValue Ptr = LD->getBasePtr();
   EVT VT = LD->getValueType(0);
@@ -507,13 +429,15 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       // then bitconvert to floating point or vector.
       SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
                                     LD->isVolatile(),
-                                    LD->isNonTemporal(), LD->getAlignment());
+                                    LD->isNonTemporal(),
+                                    LD->isInvariant(), LD->getAlignment());
       SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
       if (VT.isFloatingPoint() && LoadedVT != VT)
         Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
 
-      SDValue Ops[] = { Result, Chain };
-      return DAG.getMergeValues(Ops, 2, dl);
+      ValResult = Result;
+      ChainResult = Chain;
+      return;
     }
 
     // Copy the value to a (aligned) stack slot using (unaligned) integer
@@ -537,6 +461,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
       SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
                                  LD->getPointerInfo().getWithOffset(Offset),
                                  LD->isVolatile(), LD->isNonTemporal(),
+                                 LD->isInvariant(),
                                  MinAlign(LD->getAlignment(), Offset));
       // Follow the load with a store to the stack slot.  Remember the store.
       Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
@@ -572,8 +497,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
                           MachinePointerInfo(), LoadedVT, false, false, 0);
 
     // Callers expect a MERGE_VALUES node.
-    SDValue Ops[] = { Load, TF };
-    return DAG.getMergeValues(Ops, 2, dl);
+    ValResult = Load;
+    ChainResult = TF;
+    return;
   }
   assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
          "Unaligned load of unsupported type.");
@@ -626,8 +552,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
                              Hi.getValue(1));
 
-  SDValue Ops[] = { Result, TF };
-  return DAG.getMergeValues(Ops, 2, dl);
+  ValResult = Result;
+  ChainResult = TF;
 }
 
 /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
@@ -672,7 +598,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
                          false, false, 0);
   // Load the updated vector.
   return DAG.getLoad(VT, dl, Ch, StackPtr,
-                     MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
+                     MachinePointerInfo::getFixedStack(SPFI), false, false, 
+                     false, 0);
 }
 
 
@@ -763,11 +690,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
 
 /// LegalizeOp - Return a legal replacement for the given operation, with
 /// all legal operands.
-SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
-  if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
-    return Op;
+void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+  if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+    return;
 
-  SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
 
   for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
@@ -782,13 +708,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
             Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
            "Unexpected illegal type!");
 
-  // Note that LegalizeOp may be reentered even from single-use nodes, which
-  // means that we always must cache transformed nodes.
-  DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
-  if (I != LegalizedNodes.end()) return I->second;
-
   SDValue Tmp1, Tmp2, Tmp3, Tmp4;
-  SDValue Result = Op;
   bool isCustom = false;
 
   // Figure out the correct action; the way to query this varies by opcode
@@ -798,10 +718,15 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_VOID:
-  case ISD::VAARG:
   case ISD::STACKSAVE:
     Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
     break;
+  case ISD::VAARG:
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                                    Node->getValueType(0));
+    if (Action != TargetLowering::Promote)
+      Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+    break;
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
   case ISD::EXTRACT_VECTOR_ELT:
@@ -865,7 +790,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   case ISD::FRAME_TO_ARGS_OFFSET:
   case ISD::EH_SJLJ_SETJMP:
   case ISD::EH_SJLJ_LONGJMP:
-  case ISD::EH_SJLJ_DISPATCHSETUP:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be expanded.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -882,17 +806,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     if (Action == TargetLowering::Legal)
       Action = TargetLowering::Custom;
     break;
-  case ISD::BUILD_VECTOR:
-    // A weird case: legalization for BUILD_VECTOR never legalizes the
-    // operands!
-    // FIXME: This really sucks... changing it isn't semantically incorrect,
-    // but it massively pessimizes the code for floating-point BUILD_VECTORs
-    // because ConstantFP operands get legalized into constant pool loads
-    // before the BUILD_VECTOR code can see them.  It doesn't usually bite,
-    // though, because BUILD_VECTORS usually get lowered into other nodes
-    // which get legalized properly.
-    SimpleFinishLegalizing = false;
-    break;
   default:
     if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
       Action = TargetLowering::Legal;
@@ -903,22 +816,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   }
 
   if (SimpleFinishLegalizing) {
-    SmallVector<SDValue, 8> Ops, ResultVals;
+    SmallVector<SDValue, 8> Ops;
     for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
-      Ops.push_back(LegalizeOp(Node->getOperand(i)));
+      Ops.push_back(Node->getOperand(i));
     switch (Node->getOpcode()) {
     default: break;
-    case ISD::BR:
-    case ISD::BRIND:
-    case ISD::BR_JT:
-    case ISD::BR_CC:
-    case ISD::BRCOND:
-      // Branches tweak the chain to include LastCALLSEQ_END
-      Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
-                           LastCALLSEQ_END);
-      Ops[0] = LegalizeOp(Ops[0]);
-      LastCALLSEQ_END = DAG.getEntryNode();
-      break;
     case ISD::SHL:
     case ISD::SRL:
     case ISD::SRA:
@@ -926,57 +828,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     case ISD::ROTR:
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
-      if (!Ops[1].getValueType().isVector())
-        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
-                                                      Ops[1]));
+      if (!Ops[1].getValueType().isVector()) {
+        SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]);
+        HandleSDNode Handle(SAO);
+        LegalizeOp(SAO.getNode());
+        Ops[1] = Handle.getValue();
+      }
       break;
     case ISD::SRL_PARTS:
     case ISD::SRA_PARTS:
     case ISD::SHL_PARTS:
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
-      if (!Ops[2].getValueType().isVector())
-        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
-                                                      Ops[2]));
+      if (!Ops[2].getValueType().isVector()) {
+        SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]);
+        HandleSDNode Handle(SAO);
+        LegalizeOp(SAO.getNode());
+        Ops[2] = Handle.getValue();
+      }
       break;
     }
 
-    Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(),
-                                            Ops.size()), 0);
+    SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+    if (NewNode != Node) {
+      DAG.ReplaceAllUsesWith(Node, NewNode, this);
+      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+        DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i));
+      ReplacedNode(Node);
+      Node = NewNode;
+    }
     switch (Action) {
     case TargetLowering::Legal:
-      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
-        ResultVals.push_back(Result.getValue(i));
-      break;
+      return;
     case TargetLowering::Custom:
       // FIXME: The handling for custom lowering with multiple results is
       // a complete mess.
-      Tmp1 = TLI.LowerOperation(Result, DAG);
+      Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
       if (Tmp1.getNode()) {
+        SmallVector<SDValue, 8> ResultVals;
         for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
           if (e == 1)
             ResultVals.push_back(Tmp1);
           else
             ResultVals.push_back(Tmp1.getValue(i));
         }
-        break;
+        if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) {
+          DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this);
+          for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+            DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]);
+          ReplacedNode(Node);
+        }
+        return;
       }
 
       // FALL THROUGH
     case TargetLowering::Expand:
-      ExpandNode(Result.getNode(), ResultVals);
-      break;
+      ExpandNode(Node);
+      return;
     case TargetLowering::Promote:
-      PromoteNode(Result.getNode(), ResultVals);
-      break;
-    }
-    if (!ResultVals.empty()) {
-      for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
-        if (ResultVals[i] != SDValue(Node, i))
-          ResultVals[i] = LegalizeOp(ResultVals[i]);
-        AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
-      }
-      return ResultVals[Op.getResNo()];
+      PromoteNode(Node);
+      return;
     }
   }
 
@@ -987,160 +898,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     Node->dump( &DAG);
     dbgs() << "\n";
 #endif
-    assert(0 && "Do not know how to legalize this operator!");
+    llvm_unreachable("Do not know how to legalize this operator!");
 
-  case ISD::SRA:
-  case ISD::SRL:
-  case ISD::SHL: {
-    // Scalarize vector SRA/SRL/SHL.
-    EVT VT = Node->getValueType(0);
-    assert(VT.isVector() && "Unable to legalize non-vector shift");
-    assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
-    unsigned NumElem = VT.getVectorNumElements();
-
-    SmallVector<SDValue, 8> Scalars;
-    for (unsigned Idx = 0; Idx < NumElem; Idx++) {
-      SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                               VT.getScalarType(),
-                               Node->getOperand(0), DAG.getIntPtrConstant(Idx));
-      SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                               VT.getScalarType(),
-                               Node->getOperand(1), DAG.getIntPtrConstant(Idx));
-      Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
-                                    VT.getScalarType(), Ex, Sh));
-    }
-    Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
-                         &Scalars[0], Scalars.size());
-    break;
-  }
-
-  case ISD::BUILD_VECTOR:
-    switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
-    default: assert(0 && "This action is not supported yet!");
-    case TargetLowering::Custom:
-      Tmp3 = TLI.LowerOperation(Result, DAG);
-      if (Tmp3.getNode()) {
-        Result = Tmp3;
-        break;
-      }
-      // FALLTHROUGH
-    case TargetLowering::Expand:
-      Result = ExpandBUILD_VECTOR(Result.getNode());
-      break;
-    }
-    break;
-  case ISD::CALLSEQ_START: {
-    SDNode *CallEnd = FindCallEndFromCallStart(Node);
-
-    // Recursively Legalize all of the inputs of the call end that do not lead
-    // to this call start.  This ensures that any libcalls that need be inserted
-    // are inserted *before* the CALLSEQ_START.
-    {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
-    for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
-      LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
-                                   NodesLeadingTo);
-    }
-
-    // Now that we have legalized all of the inputs (which may have inserted
-    // libcalls), create the new CALLSEQ_START node.
-    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
-
-    // Merge in the last call to ensure that this call starts after the last
-    // call ended.
-    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
-      Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                         Tmp1, LastCALLSEQ_END);
-      Tmp1 = LegalizeOp(Tmp1);
-    }
-
-    // Do not try to legalize the target-specific arguments (#1+).
-    if (Tmp1 != Node->getOperand(0)) {
-      SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
-      Ops[0] = Tmp1;
-      Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0],
-                                              Ops.size()), Result.getResNo());
-    }
-
-    // Remember that the CALLSEQ_START is legalized.
-    AddLegalizedOperand(Op.getValue(0), Result);
-    if (Node->getNumValues() == 2)    // If this has a flag result, remember it.
-      AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
-
-    // Now that the callseq_start and all of the non-call nodes above this call
-    // sequence have been legalized, legalize the call itself.  During this
-    // process, no libcalls can/will be inserted, guaranteeing that no calls
-    // can overlap.
-    assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
-    // Note that we are selecting this call!
-    LastCALLSEQ_END = SDValue(CallEnd, 0);
-    IsLegalizingCall = true;
-
-    // Legalize the call, starting from the CALLSEQ_END.
-    LegalizeOp(LastCALLSEQ_END);
-    assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
-    return Result;
-  }
+  case ISD::CALLSEQ_START:
   case ISD::CALLSEQ_END:
-    // If the CALLSEQ_START node hasn't been legalized first, legalize it.  This
-    // will cause this node to be legalized as well as handling libcalls right.
-    if (LastCALLSEQ_END.getNode() != Node) {
-      LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
-      DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
-      assert(I != LegalizedNodes.end() &&
-             "Legalizing the call start should have legalized this node!");
-      return I->second;
-    }
-
-    // Otherwise, the call start has been legalized and everything is going
-    // according to plan.  Just legalize ourselves normally here.
-    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
-    // Do not try to legalize the target-specific arguments (#1+), except for
-    // an optional flag input.
-    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
-      if (Tmp1 != Node->getOperand(0)) {
-        SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
-        Ops[0] = Tmp1;
-        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                &Ops[0], Ops.size()),
-                         Result.getResNo());
-      }
-    } else {
-      Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
-      if (Tmp1 != Node->getOperand(0) ||
-          Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
-        SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
-        Ops[0] = Tmp1;
-        Ops.back() = Tmp2;
-        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                &Ops[0], Ops.size()),
-                         Result.getResNo());
-      }
-    }
-    assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
-    // This finishes up call legalization.
-    IsLegalizingCall = false;
-
-    // If the CALLSEQ_END node has a flag, remember that we legalized it.
-    AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
-    if (Node->getNumValues() == 2)
-      AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
-    return Result.getValue(Op.getResNo());
+    break;
   case ISD::LOAD: {
     LoadSDNode *LD = cast<LoadSDNode>(Node);
-    Tmp1 = LegalizeOp(LD->getChain());   // Legalize the chain.
-    Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+    Tmp1 = LD->getChain();   // Legalize the chain.
+    Tmp2 = LD->getBasePtr(); // Legalize the base pointer.
 
     ISD::LoadExtType ExtType = LD->getExtensionType();
     if (ExtType == ISD::NON_EXTLOAD) {
       EVT VT = Node->getValueType(0);
-      Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                              Tmp1, Tmp2, LD->getOffset()),
-                       Result.getResNo());
-      Tmp3 = Result.getValue(0);
-      Tmp4 = Result.getValue(1);
+      Tmp3 = SDValue(Node, 0);
+      Tmp4 = SDValue(Node, 1);
 
       switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
-      default: assert(0 && "This action is not supported yet!");
+      default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         // If this is an unaligned load and the target doesn't support it,
         // expand it.
@@ -1148,20 +923,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
           unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
           if (LD->getAlignment() < ABIAlignment){
-            Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
-                                         DAG, TLI);
-            Tmp3 = Result.getOperand(0);
-            Tmp4 = Result.getOperand(1);
-            Tmp3 = LegalizeOp(Tmp3);
-            Tmp4 = LegalizeOp(Tmp4);
+            ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+                                DAG, TLI, Tmp3, Tmp4);
           }
         }
         break;
       case TargetLowering::Custom:
         Tmp1 = TLI.LowerOperation(Tmp3, DAG);
         if (Tmp1.getNode()) {
-          Tmp3 = LegalizeOp(Tmp1);
-          Tmp4 = LegalizeOp(Tmp1.getValue(1));
+          Tmp3 = Tmp1;
+          Tmp4 = Tmp1.getValue(1);
         }
         break;
       case TargetLowering::Promote: {
@@ -1172,17 +943,19 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
 
         Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
                            LD->isVolatile(), LD->isNonTemporal(),
-                           LD->getAlignment());
-        Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
-        Tmp4 = LegalizeOp(Tmp1.getValue(1));
+                           LD->isInvariant(), LD->getAlignment());
+        Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1);
+        Tmp4 = Tmp1.getValue(1);
         break;
       }
       }
-      // Since loads produce two values, make sure to remember that we
-      // legalized both of them.
-      AddLegalizedOperand(SDValue(Node, 0), Tmp3);
-      AddLegalizedOperand(SDValue(Node, 1), Tmp4);
-      return Op.getResNo() ? Tmp4 : Tmp3;
+      if (Tmp4.getNode() != Node) {
+        assert(Tmp3.getNode() != Node && "Load must be completely replaced");
+        DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4);
+        ReplacedNode(Node);
+      }
+      return;
     }
 
     EVT SrcVT = LD->getMemoryVT();
@@ -1213,9 +986,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
       ISD::LoadExtType NewExtType =
         ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
 
-      Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
-                              Tmp1, Tmp2, LD->getPointerInfo(),
-                              NVT, isVolatile, isNonTemporal, Alignment);
+      SDValue Result =
+        DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+                       Tmp1, Tmp2, LD->getPointerInfo(),
+                       NVT, isVolatile, isNonTemporal, Alignment);
 
       Ch = Result.getValue(1); // The chain.
 
@@ -1230,8 +1004,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                              Result.getValueType(), Result,
                              DAG.getValueType(SrcVT));
 
-      Tmp1 = LegalizeOp(Result);
-      Tmp2 = LegalizeOp(Ch);
+      Tmp1 = Result;
+      Tmp2 = Ch;
     } else if (SrcWidth & (SrcWidth - 1)) {
       // If not loading a power-of-2 number of bits, expand as two loads.
       assert(!SrcVT.isVector() && "Unsupported extload!");
@@ -1274,7 +1048,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                       TLI.getShiftAmountTy(Hi.getValueType())));
 
         // Join the hi and lo parts.
-        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+        Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
       } else {
         // Big endian - avoid unaligned loads.
         // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
@@ -1304,29 +1078,25 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                       TLI.getShiftAmountTy(Hi.getValueType())));
 
         // Join the hi and lo parts.
-        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+        Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
       }
 
-      Tmp1 = LegalizeOp(Result);
-      Tmp2 = LegalizeOp(Ch);
+      Tmp2 = Ch;
     } else {
       switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
-      default: assert(0 && "This action is not supported yet!");
+      default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Custom:
         isCustom = true;
         // FALLTHROUGH
       case TargetLowering::Legal:
-        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                Tmp1, Tmp2, LD->getOffset()),
-                         Result.getResNo());
-        Tmp1 = Result.getValue(0);
-        Tmp2 = Result.getValue(1);
+        Tmp1 = SDValue(Node, 0);
+        Tmp2 = SDValue(Node, 1);
 
         if (isCustom) {
-          Tmp3 = TLI.LowerOperation(Result, DAG);
+          Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG);
           if (Tmp3.getNode()) {
-            Tmp1 = LegalizeOp(Tmp3);
-            Tmp2 = LegalizeOp(Tmp3.getValue(1));
+            Tmp1 = Tmp3;
+            Tmp2 = Tmp3.getValue(1);
           }
         } else {
           // If this is an unaligned load and the target doesn't support it,
@@ -1337,12 +1107,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
             unsigned ABIAlignment =
               TLI.getTargetData()->getABITypeAlignment(Ty);
             if (LD->getAlignment() < ABIAlignment){
-              Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
-                                           DAG, TLI);
-              Tmp1 = Result.getOperand(0);
-              Tmp2 = Result.getOperand(1);
-              Tmp1 = LegalizeOp(Tmp1);
-              Tmp2 = LegalizeOp(Tmp2);
+              ExpandUnalignedLoad(cast<LoadSDNode>(Node),
+                                  DAG, TLI, Tmp1, Tmp2);
             }
           }
         }
@@ -1352,7 +1118,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
                                      LD->getPointerInfo(),
                                      LD->isVolatile(), LD->isNonTemporal(),
-                                     LD->getAlignment());
+                                     LD->isInvariant(), LD->getAlignment());
           unsigned ExtendOp;
           switch (ExtType) {
           case ISD::EXTLOAD:
@@ -1363,95 +1129,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
           default: llvm_unreachable("Unexpected extend load type!");
           }
-          Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
-          Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
-          Tmp2 = LegalizeOp(Load.getValue(1));
+          Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+          Tmp2 = Load.getValue(1);
           break;
         }
 
-        // If this is a promoted vector load, and the vector element types are
-        // legal, then scalarize it.
-        if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
-          TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
-          SmallVector<SDValue, 8> LoadVals;
-          SmallVector<SDValue, 8> LoadChains;
-          unsigned NumElem = SrcVT.getVectorNumElements();
-          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
-
-          for (unsigned Idx=0; Idx<NumElem; Idx++) {
-            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                                DAG.getIntPtrConstant(Stride));
-            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
-                  Node->getValueType(0).getScalarType(),
-                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
-                  SrcVT.getScalarType(),
-                  LD->isVolatile(), LD->isNonTemporal(),
-                  LD->getAlignment());
-
-            LoadVals.push_back(ScalarLoad.getValue(0));
-            LoadChains.push_back(ScalarLoad.getValue(1));
-          }
-          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-            &LoadChains[0], LoadChains.size());
-          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
-            Node->getValueType(0), &LoadVals[0], LoadVals.size());
-
-          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
-          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
-          break;
-        }
-
-        // If this is a promoted vector load, and the vector element types are
-        // illegal, create the promoted vector from bitcasted segments.
-        if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
-          EVT MemElemTy = Node->getValueType(0).getScalarType();
-          EVT SrcSclrTy = SrcVT.getScalarType();
-          unsigned SizeRatio =
-            (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
-
-          SmallVector<SDValue, 8> LoadVals;
-          SmallVector<SDValue, 8> LoadChains;
-          unsigned NumElem = SrcVT.getVectorNumElements();
-          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
-
-          for (unsigned Idx=0; Idx<NumElem; Idx++) {
-            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                                DAG.getIntPtrConstant(Stride));
-            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
-                  SrcVT.getScalarType(),
-                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
-                  SrcVT.getScalarType(),
-                  LD->isVolatile(), LD->isNonTemporal(),
-                  LD->getAlignment());
-            if (TLI.isBigEndian()) {
-              // MSB (which is garbage, comes first)
-              LoadVals.push_back(ScalarLoad.getValue(0));
-              for (unsigned i = 0; i<SizeRatio-1; ++i)
-                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
-            } else {
-              // LSB (which is data, comes first)
-              for (unsigned i = 0; i<SizeRatio-1; ++i)
-                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
-              LoadVals.push_back(ScalarLoad.getValue(0));
-            }
-            LoadChains.push_back(ScalarLoad.getValue(1));
-          }
-
-          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-            &LoadChains[0], LoadChains.size());
-          EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
-            SrcVT.getScalarType(), NumElem*SizeRatio);
-          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, 
-            TempWideVector, &LoadVals[0], LoadVals.size());
-
-          // Cast to the correct type
-          ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
-
-          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
-          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
-          break;
-
-        }
+        assert(!SrcVT.isVector() &&
+               "Vector Loads are handled in LegalizeVectorOps");
 
         // FIXME: This does not work for vectors on most targets.  Sign- and
         // zero-extend operations are currently folded into extending loads,
@@ -1461,10 +1145,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                "EXTLOAD should always be supported!");
         // Turn the unsupported load into an EXTLOAD followed by an explicit
         // zero/sign extend inreg.
-        Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
-                                Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
-                                LD->isVolatile(), LD->isNonTemporal(),
-                                LD->getAlignment());
+        SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+                                        Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+                                        LD->isVolatile(), LD->isNonTemporal(),
+                                        LD->getAlignment());
         SDValue ValRes;
         if (ExtType == ISD::SEXTLOAD)
           ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
@@ -1472,42 +1156,41 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
                                Result, DAG.getValueType(SrcVT));
         else
           ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
-        Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
-        Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+        Tmp1 = ValRes;
+        Tmp2 = Result.getValue(1);
         break;
       }
     }
 
     // Since loads produce two values, make sure to remember that we legalized
     // both of them.
-    AddLegalizedOperand(SDValue(Node, 0), Tmp1);
-    AddLegalizedOperand(SDValue(Node, 1), Tmp2);
-    return Op.getResNo() ? Tmp2 : Tmp1;
+    if (Tmp2.getNode() != Node) {
+      assert(Tmp1.getNode() != Node && "Load must be completely replaced");
+      DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1);
+      DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2);
+      ReplacedNode(Node);
+    }
+    break;
   }
   case ISD::STORE: {
     StoreSDNode *ST = cast<StoreSDNode>(Node);
-    Tmp1 = LegalizeOp(ST->getChain());    // Legalize the chain.
-    Tmp2 = LegalizeOp(ST->getBasePtr());  // Legalize the pointer.
+    Tmp1 = ST->getChain();
+    Tmp2 = ST->getBasePtr();
     unsigned Alignment = ST->getAlignment();
     bool isVolatile = ST->isVolatile();
     bool isNonTemporal = ST->isNonTemporal();
 
     if (!ST->isTruncatingStore()) {
       if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
-        Result = SDValue(OptStore, 0);
+        ReplaceNode(ST, OptStore);
         break;
       }
 
       {
-        Tmp3 = LegalizeOp(ST->getValue());
-        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                Tmp1, Tmp3, Tmp2,
-                                                ST->getOffset()),
-                         Result.getResNo());
-
+        Tmp3 = ST->getValue();
         EVT VT = Tmp3.getValueType();
         switch (TLI.getOperationAction(ISD::STORE, VT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1515,27 +1198,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
             unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
-              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
-                                            DAG, TLI);
+              ExpandUnalignedStore(cast<StoreSDNode>(Node),
+                                   DAG, TLI, this);
           }
           break;
         case TargetLowering::Custom:
-          Tmp1 = TLI.LowerOperation(Result, DAG);
-          if (Tmp1.getNode()) Result = Tmp1;
+          Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG);
+          if (Tmp1.getNode())
+            ReplaceNode(SDValue(Node, 0), Tmp1);
           break;
-        case TargetLowering::Promote:
+        case TargetLowering::Promote: {
           assert(VT.isVector() && "Unknown legal promote case!");
           Tmp3 = DAG.getNode(ISD::BITCAST, dl,
                              TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
-          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
-                                ST->getPointerInfo(), isVolatile,
-                                isNonTemporal, Alignment);
+          SDValue Result =
+            DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+                         ST->getPointerInfo(), isVolatile,
+                         isNonTemporal, Alignment);
+          ReplaceNode(SDValue(Node, 0), Result);
           break;
         }
+        }
         break;
       }
     } else {
-      Tmp3 = LegalizeOp(ST->getValue());
+      Tmp3 = ST->getValue();
 
       EVT StVT = ST->getMemoryVT();
       unsigned StWidth = StVT.getSizeInBits();
@@ -1547,8 +1234,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
                                     StVT.getStoreSizeInBits());
         Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
-        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
-                                   NVT, isVolatile, isNonTemporal, Alignment);
+        SDValue Result =
+          DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                            NVT, isVolatile, isNonTemporal, Alignment);
+        ReplaceNode(SDValue(Node, 0), Result);
       } else if (StWidth & (StWidth - 1)) {
         // If not storing a power-of-2 number of bits, expand as two stores.
         assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1602,17 +1291,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         }
 
         // The order of the stores doesn't matter.
-        Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+        SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+        ReplaceNode(SDValue(Node, 0), Result);
       } else {
-        if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
-            Tmp2 != ST->getBasePtr())
-          Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
-                                                  Tmp1, Tmp3, Tmp2,
-                                                  ST->getOffset()),
-                           Result.getResNo());
-
         switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
-        default: assert(0 && "This action is not supported yet!");
+        default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal:
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
@@ -1620,120 +1303,24 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
             Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
             unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
             if (ST->getAlignment() < ABIAlignment)
-              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
-                                            DAG, TLI);
+              ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
           }
           break;
         case TargetLowering::Custom:
-          Result = TLI.LowerOperation(Result, DAG);
+          ReplaceNode(SDValue(Node, 0),
+                      TLI.LowerOperation(SDValue(Node, 0), DAG));
           break;
         case TargetLowering::Expand:
-
-          EVT WideScalarVT = Tmp3.getValueType().getScalarType();
-          EVT NarrowScalarVT = StVT.getScalarType();
-
-          if (StVT.isVector()) {
-            unsigned NumElem = StVT.getVectorNumElements();
-            // The type of the data we want to save
-            EVT RegVT = Tmp3.getValueType();
-            EVT RegSclVT = RegVT.getScalarType();
-            // The type of data as saved in memory.
-            EVT MemSclVT = StVT.getScalarType();
-
-            bool RegScalarLegal = TLI.isTypeLegal(RegSclVT);
-            bool MemScalarLegal = TLI.isTypeLegal(MemSclVT);
-
-            // We need to expand this store. If the register element type
-            // is legal then we can scalarize the vector and use
-            // truncating stores.
-            if (RegScalarLegal) {
-              // Cast floats into integers
-              unsigned ScalarSize = MemSclVT.getSizeInBits();
-              EVT EltVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
-
-              // Round odd types to the next pow of two.
-              if (!isPowerOf2_32(ScalarSize))
-                ScalarSize = NextPowerOf2(ScalarSize);
-
-              // Store Stride in bytes
-              unsigned Stride = ScalarSize/8;
-              // Extract each of the elements from the original vector
-              // and save them into memory individually.
-              SmallVector<SDValue, 8> Stores;
-              for (unsigned Idx = 0; Idx < NumElem; Idx++) {
-                SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                                RegSclVT, Tmp3, DAG.getIntPtrConstant(Idx));
-
-                Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                                   DAG.getIntPtrConstant(Stride));
-
-                // This scalar TruncStore may be illegal, but we lehalize it
-                // later.
-                SDValue Store = DAG.getTruncStore(Tmp1, dl, Ex, Tmp2,
-                      ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
-                      isVolatile, isNonTemporal, Alignment);
-
-                Stores.push_back(Store);
-              }
-
-              Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                                   &Stores[0], Stores.size());
-              break;
-            }
-
-            // The scalar register type is illegal.
-            // For example saving <2 x i64> -> <2 x i32> on a x86.
-            // In here we bitcast the value into a vector of smaller parts and
-            // save it using smaller scalars.
-            if (!RegScalarLegal && MemScalarLegal) {
-              // Store Stride in bytes
-              unsigned Stride = MemSclVT.getSizeInBits()/8;
-
-              unsigned SizeRatio =
-                (RegSclVT.getSizeInBits() / MemSclVT.getSizeInBits());
-
-              EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(),
-                                                 MemSclVT,
-                                                 SizeRatio * NumElem);
-
-              // Cast the wide elem vector to wider vec with smaller elem type.
-              // Example <2 x i64> -> <4 x i32>
-              Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
-
-              SmallVector<SDValue, 8> Stores;
-              for (unsigned Idx=0; Idx < NumElem * SizeRatio; Idx++) {
-                // Extract the Ith element.
-                SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                               NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
-                // Bump pointer.
-                Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
-                                   DAG.getIntPtrConstant(Stride));
-
-                // Store if, this element is:
-                //  - First element on big endian, or
-                //  - Last element on little endian
-                if (( TLI.isBigEndian() && (Idx % SizeRatio == 0)) ||
-                    ((!TLI.isBigEndian() && (Idx % SizeRatio == SizeRatio-1)))) {
-                  SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
-                                  ST->getPointerInfo().getWithOffset(Idx*Stride),
-                                           isVolatile, isNonTemporal, Alignment);
-                  Stores.push_back(Store);
-                }
-              }
-              Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                                   &Stores[0], Stores.size());
-              break;
-            }
-
-            assert(false && "Unable to legalize the vector trunc store!");
-          }// is vector
-
+          assert(!StVT.isVector() &&
+                 "Vector Stores are handled in LegalizeVectorOps");
 
           // TRUNCSTORE:i16 i32 -> STORE i16
           assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
-          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
-                                isVolatile, isNonTemporal, Alignment);
+          SDValue Result =
+            DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                         isVolatile, isNonTemporal, Alignment);
+          ReplaceNode(SDValue(Node, 0), Result);
           break;
         }
       }
@@ -1741,17 +1328,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
     break;
   }
   }
-  assert(Result.getValueType() == Op.getValueType() &&
-         "Bad legalization!");
-
-  // Make sure that the generated code is itself legal.
-  if (Result != Op)
-    Result = LegalizeOp(Result);
-
-  // Note that LegalizeOp may be reentered even from single-use nodes, which
-  // means that we always must cache transformed nodes.
-  AddLegalizedOperand(Op, Result);
-  return Result;
 }
 
 SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
@@ -1778,7 +1354,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
 
   if (Op.getValueType().isVector())
     return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
-                       false, false, 0);
+                       false, false, false, 0);
   return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
                         MachinePointerInfo(),
                         Vec.getValueType().getVectorElementType(),
@@ -1826,7 +1402,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
 
   // Finally, load the updated vector.
   return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
-                     false, false, 0);
+                     false, false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1876,7 +1452,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
     StoreChain = DAG.getEntryNode();
 
   // Result is a load from the stack slot.
-  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, 
+                     false, false, false, 0);
 }
 
 SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1905,7 +1482,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
       assert(FloatVT.isByteSized() && "Unsupported floating point type!");
       // Load out a legal integer with the same sign bit as the float.
       SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
     } else { // Little endian
       SDValue LoadPtr = StackPtr;
       // The float may be wider than the integer we are going to load.  Advance
@@ -1916,7 +1493,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
                             LoadPtr, DAG.getIntPtrConstant(ByteOffset));
       // Load a legal integer containing the sign bit.
       SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
       // Move the sign bit to the top bit of the loaded integer.
       unsigned BitShift = LoadTy.getSizeInBits() -
         (FloatVT.getSizeInBits() - 8 * ByteOffset);
@@ -1984,7 +1561,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
   EVT OpVT = LHS.getValueType();
   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
   switch (TLI.getCondCodeAction(CCCode, OpVT)) {
-  default: assert(0 && "Unknown condition code action!");
+  default: llvm_unreachable("Unknown condition code action!");
   case TargetLowering::Legal:
     // Nothing to do.
     break;
@@ -1992,7 +1569,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
-    default: assert(0 && "Don't know how to expand this condition!");
+    default: llvm_unreachable("Don't know how to expand this condition!");
     case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
     case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
@@ -2058,7 +1635,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
   // Result is a load from the stack slot.
   if (SlotSize == DestSize)
     return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
-                       false, false, DestAlign);
+                       false, false, false, DestAlign);
 
   assert(SlotSize < DestSize && "Unknown extension!");
   return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
@@ -2081,7 +1658,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
                                  false, false, 0);
   return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
                      MachinePointerInfo::getFixedStack(SPFI),
-                     false, false, 0);
+                     false, false, false, 0);
 }
 
 
@@ -2127,7 +1704,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
 
   // If all elements are constants, create a load from the constant pool.
   if (isConstant) {
-    std::vector<Constant*> CV;
+    SmallVector<Constant*, 16> CV;
     for (unsigned i = 0, e = NumElems; i != e; ++i) {
       if (ConstantFPSDNode *V =
           dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
@@ -2155,7 +1732,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
     unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
     return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                        MachinePointerInfo::getConstantPool(),
-                       false, false, Alignment);
+                       false, false, false, Alignment);
   }
 
   if (!MoreThanTwoValues) {
@@ -2190,12 +1767,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
 // and leave the Hi part unset.
 SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
                                             bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
-  // The input chain to this libcall is the entry node of the function.
-  // Legalizing the call will automatically add the previous call to the
-  // dependence.
-  SDValue InChain = DAG.getEntryNode();
-
   TargetLowering::ArgListTy Args;
   TargetLowering::ArgListEntry Entry;
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
@@ -2209,26 +1780,31 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
 
-  // Splice the libcall in wherever FindInputOutputChains tells us to.
   Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
 
+  // By default, the input chain to this libcall is the entry node of the
+  // function. If the libcall is going to be emitted as a tail call then
+  // TLI.isUsedByReturnOnly will change it to the right chain if the return
+  // node which is being folded has a non-entry input chain.
+  SDValue InChain = DAG.getEntryNode();
+
   // isTailCall may be true since the callee does not reference caller stack
   // frame. Check if it's in the right position.
-  bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
+  SDValue TCChain = InChain;
+  bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI);
+  if (isTailCall)
+    InChain = TCChain;
+
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), isTailCall,
-                    /*isReturnValueUsed=*/true,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
   if (!CallInfo.second.getNode())
     // It's a tailcall, return the chain (which is the DAG root).
     return DAG.getRoot();
 
-  // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
-  // was added by LowerCallTo (guaranteeing proper serialization of calls).
-  LegalizeOp(CallInfo.second);
   return CallInfo.first;
 }
 
@@ -2254,15 +1830,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
   std::pair<SDValue,SDValue> CallInfo =
   TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                  false, 0, TLI.getLibcallCallingConv(LC), false,
-                  /*isReturnValueUsed=*/true,
+                  false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                  /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                   Callee, Args, DAG, dl);
 
-  // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
-  // was added by LowerCallTo (guaranteeing proper serialization of calls).
-  LegalizeOp(CallInfo.second);
-
   return CallInfo.first;
 }
 
@@ -2272,7 +1843,6 @@ std::pair<SDValue, SDValue>
 SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
                                          SDNode *Node,
                                          bool isSigned) {
-  assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
   SDValue InChain = Node->getOperand(0);
 
   TargetLowering::ArgListTy Args;
@@ -2289,18 +1859,13 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
 
-  // Splice the libcall in wherever FindInputOutputChains tells us to.
   Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
-                    /*isReturnValueUsed=*/true,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
-  // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
-  // was added by LowerCallTo (guaranteeing proper serialization of calls).
-  LegalizeOp(CallInfo.second);
   return CallInfo;
 }
 
@@ -2311,7 +1876,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
                                               RTLIB::Libcall Call_PPCF128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::f32: LC = Call_F32; break;
   case MVT::f64: LC = Call_F64; break;
   case MVT::f80: LC = Call_F80; break;
@@ -2328,7 +1893,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
                                                RTLIB::Libcall Call_I128) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i8:   LC = Call_I8; break;
   case MVT::i16:  LC = Call_I16; break;
   case MVT::i32:  LC = Call_I32; break;
@@ -2343,7 +1908,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
                                      const TargetLowering &TLI) {
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2388,7 +1953,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
 
   RTLIB::Libcall LC;
   switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unexpected request for libcall!");
+  default: llvm_unreachable("Unexpected request for libcall!");
   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -2426,21 +1991,16 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
 
-  // Splice the libcall in wherever FindInputOutputChains tells us to.
   DebugLoc dl = Node->getDebugLoc();
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
-                    /*isReturnValueUsed=*/true, Callee, Args, DAG, dl);
-
-  // Legalize the call sequence, starting with the chain.  This will advance
-  // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that
-  // was added by LowerCallTo (guaranteeing proper serialization of calls).
-  LegalizeOp(CallInfo.second);
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, dl);
 
   // Remainder is loaded back from the stack frame.
-  SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr,
-                            MachinePointerInfo(), false, false, 0);
+  SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr,
+                            MachinePointerInfo(), false, false, false, 0);
   Results.push_back(CallInfo.first);
   Results.push_back(Rem);
 }
@@ -2489,7 +2049,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                   false, false, 0);
     // load the constructed double
     SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
-                               MachinePointerInfo(), false, false, 0);
+                               MachinePointerInfo(), false, false, false, 0);
     // FP constant to bias correct the final result
     SDValue Bias = DAG.getConstantFP(isSigned ?
                                      BitsToDouble(0x4330000080000000ULL) :
@@ -2611,7 +2171,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   // offset depending on the data type.
   uint64_t FF;
   switch (Op0.getValueType().getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unsupported integer type!");
+  default: llvm_unreachable("Unsupported integer type!");
   case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
   case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
   case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
@@ -2629,13 +2189,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   if (DestVT == MVT::f32)
     FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
                              MachinePointerInfo::getConstantPool(),
-                             false, false, Alignment);
+                             false, false, false, Alignment);
   else {
-    FudgeInReg =
-      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
-                                DAG.getEntryNode(), CPIdx,
-                                MachinePointerInfo::getConstantPool(),
-                                MVT::f32, false, false, Alignment));
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+                                  DAG.getEntryNode(), CPIdx,
+                                  MachinePointerInfo::getConstantPool(),
+                                  MVT::f32, false, false, Alignment);
+    HandleSDNode Handle(Load);
+    LegalizeOp(Load.getNode());
+    FudgeInReg = Handle.getValue();
   }
 
   return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
@@ -2731,7 +2293,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
   EVT SHVT = TLI.getShiftAmountTy(VT);
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
   switch (VT.getSimpleVT().SimpleTy) {
-  default: assert(0 && "Unhandled Expand type in BSWAP!");
+  default: llvm_unreachable("Unhandled Expand type in BSWAP!");
   case MVT::i16:
     Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
     Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
@@ -2788,7 +2350,7 @@ static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
 SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
                                              DebugLoc dl) {
   switch (Opc) {
-  default: assert(0 && "Cannot expand this yet!");
+  default: llvm_unreachable("Cannot expand this yet!");
   case ISD::CTPOP: {
     EVT VT = Op.getValueType();
     EVT ShVT = TLI.getShiftAmountTy(VT);
@@ -2831,6 +2393,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
 
     return Op;
   }
+  case ISD::CTLZ_ZERO_UNDEF:
+    // This trivially expands to CTLZ.
+    return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
   case ISD::CTLZ: {
     // for now, we do this:
     // x = x | (x >> 1);
@@ -2852,6 +2417,9 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
     Op = DAG.getNOT(dl, Op, VT);
     return DAG.getNode(ISD::CTPOP, dl, VT, Op);
   }
+  case ISD::CTTZ_ZERO_UNDEF:
+    // This trivially expands to CTTZ.
+    return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
   case ISD::CTTZ: {
     // for now, we use: { return popcount(~x & (x - 1)); }
     // unless the target has ctlz but not ctpop, in which case we use:
@@ -2881,7 +2449,6 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
   switch (Opc) {
   default:
     llvm_unreachable("Unhandled atomic intrinsic Expand!");
-    break;
   case ISD::ATOMIC_SWAP:
     switch (VT.SimpleTy) {
     default: llvm_unreachable("Unexpected value type for atomic!");
@@ -2959,14 +2526,16 @@ std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
   return ExpandChainLibCall(LC, Node, false);
 }
 
-void SelectionDAGLegalize::ExpandNode(SDNode *Node,
-                                      SmallVectorImpl<SDValue> &Results) {
+void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+  SmallVector<SDValue, 8> Results;
   DebugLoc dl = Node->getDebugLoc();
   SDValue Tmp1, Tmp2, Tmp3, Tmp4;
   switch (Node->getOpcode()) {
   case ISD::CTPOP:
   case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:
     Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
     Results.push_back(Tmp1);
     break;
@@ -2986,7 +2555,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::PREFETCH:
   case ISD::VAEND:
   case ISD::EH_SJLJ_LONGJMP:
-  case ISD::EH_SJLJ_DISPATCHSETUP:
     // If the target didn't expand these, there's nothing to do, so just
     // preserve the chain and be done.
     Results.push_back(Node->getOperand(0));
@@ -3006,7 +2574,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
                       false, false, false, false, 0, CallingConv::C,
                       /*isTailCall=*/false,
-                      /*isReturnValueUsed=*/true,
+                      /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("__sync_synchronize",
                                             TLI.getPointerTy()),
                       Args, DAG, dl);
@@ -3083,7 +2651,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
                       false, false, false, false, 0, CallingConv::C,
                       /*isTailCall=*/false,
-                      /*isReturnValueUsed=*/true,
+                      /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                       DAG.getExternalSymbol("abort", TLI.getPointerTy()),
                       Args, DAG, dl);
     Results.push_back(CallResult.second);
@@ -3166,7 +2734,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     unsigned Align = Node->getConstantOperandVal(3);
 
     SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
-                                     MachinePointerInfo(V), false, false, 0);
+                                     MachinePointerInfo(V), 
+                                     false, false, false, 0);
     SDValue VAList = VAListLoad;
 
     if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -3191,7 +2760,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                         MachinePointerInfo(V), false, false, 0);
     // Load the actual argument out of the pointer VAList
     Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
-                                  false, false, 0));
+                                  false, false, false, 0));
     Results.push_back(Results[0].getValue(1));
     break;
   }
@@ -3202,7 +2771,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
     Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
                        Node->getOperand(2), MachinePointerInfo(VS),
-                       false, false, 0);
+                       false, false, false, 0);
     Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
                         MachinePointerInfo(VD), false, false, 0);
     Results.push_back(Tmp1);
@@ -3236,15 +2805,57 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                               Node->getOperand(2), dl));
     break;
   case ISD::VECTOR_SHUFFLE: {
-    SmallVector<int, 8> Mask;
-    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+    SmallVector<int, 32> NewMask;
+    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
 
     EVT VT = Node->getValueType(0);
     EVT EltVT = VT.getVectorElementType();
-    if (!TLI.isTypeLegal(EltVT))
-      EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+    SDValue Op0 = Node->getOperand(0);
+    SDValue Op1 = Node->getOperand(1);
+    if (!TLI.isTypeLegal(EltVT)) {
+
+      EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+
+      // BUILD_VECTOR operands are allowed to be wider than the element type.
+      // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept it
+      if (NewEltVT.bitsLT(EltVT)) {
+
+        // Convert shuffle node.
+        // If original node was v4i64 and the new EltVT is i32,
+        // cast operands to v8i32 and re-build the mask.
+
+        // Calculate new VT, the size of the new VT should be equal to original.
+        EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, 
+                                      VT.getSizeInBits()/NewEltVT.getSizeInBits());
+        assert(NewVT.bitsEq(VT));
+
+        // cast operands to new VT
+        Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0);
+        Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1);
+
+        // Convert the shuffle mask
+        unsigned int factor = NewVT.getVectorNumElements()/VT.getVectorNumElements();
+
+        // EltVT gets smaller
+        assert(factor > 0);
+
+        for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+          if (Mask[i] < 0) {
+            for (unsigned fi = 0; fi < factor; ++fi)
+              NewMask.push_back(Mask[i]);
+          }
+          else {
+            for (unsigned fi = 0; fi < factor; ++fi)
+              NewMask.push_back(Mask[i]*factor+fi);
+          }
+        }
+        Mask = NewMask;
+        VT = NewVT;
+      }
+      EltVT = NewEltVT;
+    }
     unsigned NumElems = VT.getVectorNumElements();
-    SmallVector<SDValue, 8> Ops;
+    SmallVector<SDValue, 16> Ops;
     for (unsigned i = 0; i != NumElems; ++i) {
       if (Mask[i] < 0) {
         Ops.push_back(DAG.getUNDEF(EltVT));
@@ -3253,14 +2864,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
       unsigned Idx = Mask[i];
       if (Idx < NumElems)
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                  Node->getOperand(0),
+                                  Op0,
                                   DAG.getIntPtrConstant(Idx)));
       else
         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
-                                  Node->getOperand(1),
+                                  Op1,
                                   DAG.getIntPtrConstant(Idx - NumElems)));
     }
+
     Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+    // We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
     Results.push_back(Tmp1);
     break;
   }
@@ -3408,10 +3022,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
     // Check to see if this FP immediate is already legal.
     // If this is a legal constant, turn it into a TargetConstantFP node.
-    if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
-      Results.push_back(SDValue(Node, 0));
-    else
-      Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+    if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+      Results.push_back(ExpandConstantFP(CFP, true));
     break;
   }
   case ISD::EHSELECTION: {
@@ -3423,13 +3035,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     break;
   }
   case ISD::EXCEPTIONADDR: {
-    unsigned Reg = TLI.getExceptionAddressRegister();
+    unsigned Reg = TLI.getExceptionPointerRegister();
     assert(Reg && "Can't expand to unknown register!");
     Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
                                          Node->getValueType(0)));
     Results.push_back(Results[0].getValue(1));
     break;
   }
+  case ISD::FSUB: {
+    EVT VT = Node->getValueType(0);
+    assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
+           TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
+           "Don't know how to expand this FP subtraction!");
+    Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
+    Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+    Results.push_back(Tmp1);
+    break;
+  }
   case ISD::SUB: {
     EVT VT = Node->getValueType(0);
     assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
@@ -3657,6 +3279,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                DAG.getIntPtrConstant(0));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
                             DAG.getIntPtrConstant(1));
+      // Ret is a node with an illegal type. Because such things are not
+      // generally permitted during this phase of legalization, delete the
+      // node. The above EXTRACT_ELEMENT nodes should have been folded.
+      DAG.DeleteNode(Ret.getNode());
     }
 
     if (isSigned) {
@@ -3797,7 +3423,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
                           Tmp2, Tmp3, Tmp4, dl);
-    LastCALLSEQ_END = DAG.getEntryNode();
 
     assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
     Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
@@ -3807,6 +3432,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(Tmp1);
     break;
   }
+  case ISD::BUILD_VECTOR:
+    Results.push_back(ExpandBUILD_VECTOR(Node));
+    break;
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::SHL: {
+    // Scalarize vector SRA/SRL/SHL.
+    EVT VT = Node->getValueType(0);
+    assert(VT.isVector() && "Unable to legalize non-vector shift");
+    assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal");
+    unsigned NumElem = VT.getVectorNumElements();
+
+    SmallVector<SDValue, 8> Scalars;
+    for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+      SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                               VT.getScalarType(),
+                               Node->getOperand(0), DAG.getIntPtrConstant(Idx));
+      SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                               VT.getScalarType(),
+                               Node->getOperand(1), DAG.getIntPtrConstant(Idx));
+      Scalars.push_back(DAG.getNode(Node->getOpcode(), dl,
+                                    VT.getScalarType(), Ex, Sh));
+    }
+    SDValue Result =
+      DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
+                  &Scalars[0], Scalars.size());
+    ReplaceNode(SDValue(Node, 0), Result);
+    break;
+  }
   case ISD::GLOBAL_OFFSET_TABLE:
   case ISD::GlobalAddress:
   case ISD::GlobalTLSAddress:
@@ -3817,13 +3471,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_VOID:
     // FIXME: Custom lowering for these operations shouldn't return null!
-    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
-      Results.push_back(SDValue(Node, i));
     break;
   }
+
+  // Replace the original node with the legalized result.
+  if (!Results.empty())
+    ReplaceNode(Node, Results.data());
 }
-void SelectionDAGLegalize::PromoteNode(SDNode *Node,
-                                       SmallVectorImpl<SDValue> &Results) {
+
+void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+  SmallVector<SDValue, 8> Results;
   EVT OVT = Node->getValueType(0);
   if (Node->getOpcode() == ISD::UINT_TO_FP ||
       Node->getOpcode() == ISD::SINT_TO_FP ||
@@ -3835,20 +3492,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
   SDValue Tmp1, Tmp2, Tmp3;
   switch (Node->getOpcode()) {
   case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTPOP:
     // Zero extend the argument.
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
-    // Perform the larger operation.
+    // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is
+    // already the correct result.
     Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     if (Node->getOpcode() == ISD::CTTZ) {
-      //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+      // FIXME: This should set a bit in the zero extended value instead.
       Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
                           Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
                           ISD::SETEQ);
       Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
                           DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
-    } else if (Node->getOpcode() == ISD::CTLZ) {
+    } else if (Node->getOpcode() == ISD::CTLZ ||
+               Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
       // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
       Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
                           DAG.getConstant(NVT.getSizeInBits() -
@@ -3877,6 +3538,33 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
                                  Node->getOpcode() == ISD::SINT_TO_FP, dl);
     Results.push_back(Tmp1);
     break;
+  case ISD::VAARG: {
+    SDValue Chain = Node->getOperand(0); // Get the chain.
+    SDValue Ptr = Node->getOperand(1); // Get the pointer.
+
+    unsigned TruncOp;
+    if (OVT.isVector()) {
+      TruncOp = ISD::BITCAST;
+    } else {
+      assert(OVT.isInteger()
+        && "VAARG promotion is supported only for vectors or integer types");
+      TruncOp = ISD::TRUNCATE;
+    }
+
+    // Perform the larger operation, then convert back
+    Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2),
+             Node->getConstantOperandVal(3));
+    Chain = Tmp1.getValue(1);
+
+    Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1);
+
+    // Modified the chain result - switch anything that used the old chain to
+    // use the new one.
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain);
+    ReplacedNode(Node);
+    break;
+  }
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR: {
@@ -3924,8 +3612,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
     break;
   }
   case ISD::VECTOR_SHUFFLE: {
-    SmallVector<int, 8> Mask;
-    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask();
 
     // Cast the two input vectors.
     Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
@@ -3950,7 +3637,31 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
                                   Tmp1, Tmp2, Node->getOperand(2)));
     break;
   }
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::FPOW: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
+    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                                  Tmp3, DAG.getIntPtrConstant(0)));
+    break;
   }
+  case ISD::FLOG2:
+  case ISD::FEXP2:
+  case ISD::FLOG:
+  case ISD::FEXP: {
+    Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+    Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
+                                  Tmp2, DAG.getIntPtrConstant(0)));
+    break;
+  }
+  }
+
+  // Replace the original node with the legalized result.
+  if (!Results.empty())
+    ReplaceNode(Node, Results.data());
 }
 
 // SelectionDAG::Legalize - This is the entry point for the file.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 7c1cc69d6a2f..e3938968b205 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -479,8 +479,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
   if (L->getExtensionType() == ISD::NON_EXTLOAD) {
     NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
                        NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
-                       L->getPointerInfo(), NVT,
-                       L->isVolatile(), L->isNonTemporal(), L->getAlignment());
+                       L->getPointerInfo(), NVT, L->isVolatile(), 
+                       L->isNonTemporal(), false, L->getAlignment());
     // Legalized the chain result - switch anything that used the old chain to
     // use the new one.
     ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -492,7 +492,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
                      L->getMemoryVT(), dl, L->getChain(),
                      L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
                      L->getMemoryVT(), L->isVolatile(),
-                     L->isNonTemporal(), L->getAlignment());
+                     L->isNonTemporal(), false, L->getAlignment());
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
@@ -672,7 +672,7 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
     case ISD::SETUEQ:
       LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
       break;
-    default: assert(false && "Do not know how to soften this setcc!");
+    default: llvm_unreachable("Do not know how to soften this setcc!");
     }
   }
 
@@ -1212,7 +1212,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
 
   switch (SrcVT.getSimpleVT().SimpleTy) {
   default:
-    assert(false && "Unsupported UINT_TO_FP!");
+    llvm_unreachable("Unsupported UINT_TO_FP!");
   case MVT::i32:
     Parts = TwoE32;
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index a5c4c2ded4c5..95ddb1e0f6fb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,7 +20,6 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -57,8 +56,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
   case ISD::CONVERT_RNDSAT:
                          Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTLZ:        Res = PromoteIntRes_CTLZ(N); break;
   case ISD::CTPOP:       Res = PromoteIntRes_CTPOP(N); break;
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTTZ:        Res = PromoteIntRes_CTTZ(N); break;
   case ISD::EXTRACT_VECTOR_ELT:
                          Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
@@ -211,13 +212,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   switch (getTypeAction(InVT)) {
-  default:
-    assert(false && "Unknown type action!");
-    break;
   case TargetLowering::TypeLegal:
     break;
   case TargetLowering::TypePromoteInteger:
-    if (NOutVT.bitsEq(NInVT))
+    if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector())
       // The input promotes to the same size.  Convert the promoted value.
       return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
     break;
@@ -251,9 +249,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
   }
   case TargetLowering::TypeWidenVector:
-    if (OutVT.bitsEq(NInVT))
-      // The input is widened to the same size.  Convert to the widened value.
-      return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
+    // The input is widened to the same size. Convert to the widened value.
+    // Make sure that the outgoing value is not a vector, because this would
+    // make us bitcast between two vectors which are legalized in different ways.
+    if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector())
+      return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp));
   }
 
   return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -312,7 +312,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   EVT OVT = N->getValueType(0);
   EVT NVT = Op.getValueType();
-  Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+  Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
   // Subtract off the extra leading bits in the bigger type.
   return DAG.getNode(ISD::SUB, dl, NVT, Op,
                      DAG.getConstant(NVT.getSizeInBits() -
@@ -330,13 +330,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
   EVT OVT = N->getValueType(0);
   EVT NVT = Op.getValueType();
   DebugLoc dl = N->getDebugLoc();
-  // The count is the same in the promoted type except if the original
-  // value was zero.  This can be handled by setting the bit just off
-  // the top of the original type.
-  APInt TopBit(NVT.getSizeInBits(), 0);
-  TopBit.setBit(OVT.getSizeInBits());
-  Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
-  return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+  if (N->getOpcode() == ISD::CTTZ) {
+    // The count is the same in the promoted type except if the original
+    // value was zero.  This can be handled by setting the bit just off
+    // the top of the original type.
+    APInt TopBit(NVT.getSizeInBits(), 0);
+    TopBit.setBit(OVT.getSizeInBits());
+    Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+  }
+  return DAG.getNode(N->getOpcode(), dl, NVT, Op);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -486,7 +488,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
-  SDValue Mask = GetPromotedInteger(N->getOperand(0));
+  SDValue Mask = N->getOperand(0);
+  EVT OpTy = N->getOperand(1).getValueType();
+
+  // Promote all the way up to the canonical SetCC type.
+  Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy));
   SDValue LHS = GetPromotedInteger(N->getOperand(1));
   SDValue RHS = GetPromotedInteger(N->getOperand(2));
   return DAG.getNode(ISD::VSELECT, N->getDebugLoc(),
@@ -1098,8 +1104,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::AssertZext:  ExpandIntRes_AssertZext(N, Lo, Hi); break;
   case ISD::BSWAP:       ExpandIntRes_BSWAP(N, Lo, Hi); break;
   case ISD::Constant:    ExpandIntRes_Constant(N, Lo, Hi); break;
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTLZ:        ExpandIntRes_CTLZ(N, Lo, Hi); break;
   case ISD::CTPOP:       ExpandIntRes_CTPOP(N, Lo, Hi); break;
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTTZ:        ExpandIntRes_CTTZ(N, Lo, Hi); break;
   case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
   case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
@@ -1171,7 +1179,6 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
   switch (Opc) {
   default:
     llvm_unreachable("Unhandled atomic intrinsic Expand!");
-    break;
   case ISD::ATOMIC_SWAP:
     switch (VT.SimpleTy) {
     default: llvm_unreachable("Unexpected value type for atomic!");
@@ -1355,7 +1362,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
   APInt KnownZero, KnownOne;
-  DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+  DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne);
 
   // If we don't know anything about the high bits, exit.
   if (((KnownZero|KnownOne) & HighBitMask) == 0)
@@ -1390,15 +1397,15 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     }
   }
 
-#if 0
-  // FIXME: This code is broken for shifts with a zero amount!
   // If we know that all of the high bits of the shift amount are zero, then we
   // can do this as a couple of simple shifts.
   if ((KnownZero & HighBitMask) == HighBitMask) {
-    // Compute 32-amt.
-    SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
-                                 DAG.getConstant(NVTBits, ShTy),
-                                 Amt);
+    // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined
+    // shift if x is zero.  We can use XOR here because x is known to be smaller
+    // than 32.
+    SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt,
+                               DAG.getConstant(NVTBits-1, ShTy));
+
     unsigned Op1, Op2;
     switch (N->getOpcode()) {
     default: llvm_unreachable("Unknown shift");
@@ -1407,13 +1414,23 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     case ISD::SRA:  Op1 = ISD::SRL; Op2 = ISD::SHL; break;
     }
 
-    Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
-    Hi = DAG.getNode(ISD::OR, NVT,
-                     DAG.getNode(Op1, NVT, InH, Amt),
-                     DAG.getNode(Op2, NVT, InL, Amt2));
+    // When shifting right the arithmetic for Lo and Hi is swapped.
+    if (N->getOpcode() != ISD::SHL)
+      std::swap(InL, InH);
+
+    // Use a little trick to get the bits that move from Lo to Hi. First
+    // shift by one bit.
+    SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, ShTy));
+    // Then compute the remaining shift with amount-1.
+    SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2);
+
+    Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt);
+    Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2);
+
+    if (N->getOpcode() != ISD::SHL)
+      std::swap(Hi, Lo);
     return true;
   }
-#endif
 
   return false;
 }
@@ -1493,8 +1510,6 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
     Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
     return true;
   }
-
-  return false;
 }
 
 void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
@@ -1702,8 +1717,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
   SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
                                    DAG.getConstant(0, NVT), ISD::SETNE);
 
-  SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
-  SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+  SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi);
 
   Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
                    DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
@@ -1732,8 +1747,8 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
   SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
                                    DAG.getConstant(0, NVT), ISD::SETNE);
 
-  SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
-  SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+  SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi);
 
   Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
                    DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
@@ -1778,6 +1793,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   unsigned Alignment = N->getAlignment();
   bool isVolatile = N->isVolatile();
   bool isNonTemporal = N->isNonTemporal();
+  bool isInvariant = N->isInvariant();
   DebugLoc dl = N->getDebugLoc();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
@@ -1808,7 +1824,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
   } else if (TLI.isLittleEndian()) {
     // Little-endian - low bits are at low addresses.
     Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
-                     isVolatile, isNonTemporal, Alignment);
+                     isVolatile, isNonTemporal, isInvariant, Alignment);
 
     unsigned ExcessBits =
       N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
@@ -2305,12 +2321,14 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
   SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
-		    0, TLI.getLibcallCallingConv(LC), false,
-		    true, Func, Args, DAG, dl);
+		    0, TLI.getLibcallCallingConv(LC),
+                    /*isTailCall=*/false,
+		    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
+                    Func, Args, DAG, dl);
 
   SplitInteger(CallInfo.first, Lo, Hi);
   SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
-			      MachinePointerInfo(), false, false, 0);
+			      MachinePointerInfo(), false, false, false, 0);
   SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
                              DAG.getConstant(0, PtrVT),
                              ISD::SETNE);
@@ -2781,7 +2799,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
     else if (SrcVT == MVT::i128)
       FF = APInt(32, F32TwoE128);
     else
-      assert(false && "Unsupported UINT_TO_FP!");
+      llvm_unreachable("Unsupported UINT_TO_FP!");
 
     // Check whether the sign bit is set.
     SDValue Lo, Hi;
@@ -2926,38 +2944,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue Op0 = N->getOperand(1);
-  SDValue Op1 = N->getOperand(1);
-  assert(Op0.getValueType() == Op1.getValueType() &&
-         "Invalid input vector types");
-
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
 
+  EVT InElemTy = OutVT.getVectorElementType();
   EVT OutElemTy = NOutVT.getVectorElementType();
 
-  unsigned NumElem0 = Op0.getValueType().getVectorNumElements();
-  unsigned NumElem1 = Op1.getValueType().getVectorNumElements();
+  unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
   unsigned NumOutElem = NOutVT.getVectorNumElements();
-  assert(NumElem0 + NumElem1 == NumOutElem &&
-         "Invalid number of incoming elements");
+  unsigned NumOperands = N->getNumOperands();
+  assert(NumElem * NumOperands == NumOutElem &&
+         "Unexpected number of elements");
 
   // Take the elements from the first vector.
   SmallVector<SDValue, 8> Ops(NumOutElem);
-  for (unsigned i = 0; i < NumElem0; ++i) {
-    SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                              Op0.getValueType().getScalarType(), Op0,
-                              DAG.getIntPtrConstant(i));
-    Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
-  }
-
-  // Take the elements from the second vector
-  for (unsigned i = 0; i < NumElem1; ++i) {
-    SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                              Op1.getValueType().getScalarType(), Op1,
-                              DAG.getIntPtrConstant(i));
-    Ops[i + NumElem0] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+  for (unsigned i = 0; i < NumOperands; ++i) {
+    SDValue Op = N->getOperand(i);
+    for (unsigned j = 0; j < NumElem; ++j) {
+      SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                InElemTy, Op, DAG.getIntPtrConstant(j));
+      Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+    }
   }
 
   return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index a4bb577433cc..439aa4de5cf5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -222,8 +222,6 @@ bool DAGTypeLegalizer::run() {
     for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
       EVT ResultVT = N->getValueType(i);
       switch (getTypeAction(ResultVT)) {
-      default:
-        assert(false && "Unknown action!");
       case TargetLowering::TypeLegal:
         break;
       // The following calls must take care of *all* of the node's results,
@@ -275,8 +273,6 @@ ScanOperands:
 
       EVT OpVT = N->getOperand(i).getValueType();
       switch (getTypeAction(OpVT)) {
-      default:
-        assert(false && "Unknown action!");
       case TargetLowering::TypeLegal:
         continue;
       // The following calls must either replace all of the node's results
@@ -752,7 +748,11 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
 }
 
 void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+  // Note that in some cases vector operation operands may be greater than
+  // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
+  // a constant i8 operand.
+  assert(Result.getValueType().getSizeInBits() >=
+         Op.getValueType().getVectorElementType().getSizeInBits() &&
          "Invalid type for scalarized vector");
   AnalyzeNewValue(Result);
 
@@ -889,7 +889,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
                                MachinePointerInfo(), false, false, 0);
   // Result is a load from the stack slot.
   return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
-                     false, false, 0);
+                     false, false, false, 0);
 }
 
 /// CustomLowerNode - Replace the node's results with custom code provided
@@ -1056,8 +1056,9 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
   std::pair<SDValue,SDValue> CallInfo =
     TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
-                    false, 0, TLI.getLibcallCallingConv(LC), false,
-                    /*isReturnValueUsed=*/true,
+                    false, 0, TLI.getLibcallCallingConv(LC),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, dl);
   return CallInfo.first;
 }
@@ -1084,12 +1085,11 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
   SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
                                          TLI.getPointerTy());
 
-  // Splice the libcall in wherever FindInputOutputChains tells us to.
   Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
   std::pair<SDValue, SDValue> CallInfo =
     TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
                     0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
-                    /*isReturnValueUsed=*/true,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
                     Callee, Args, DAG, Node->getDebugLoc());
 
   return CallInfo;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index abacdac686bc..e8664458e9a6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -521,6 +521,7 @@ private:
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+  SDValue ScalarizeVecRes_VSELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
   SDValue ScalarizeVecRes_SETCC(SDNode *N);
@@ -633,6 +634,7 @@ private:
   SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue WidenVecOp_STORE(SDNode* N);
+  SDValue WidenVecOp_SETCC(SDNode* N);
 
   SDValue WidenVecOp_Convert(SDNode *N);
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 8e7e4985e4d0..a8ff7c65abde 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -21,7 +21,6 @@
 
 #include "LegalizeTypes.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -46,8 +45,6 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
 
   // Handle some special cases efficiently.
   switch (getTypeAction(InVT)) {
-    default:
-      assert(false && "Unknown type action!");
     case TargetLowering::TypeLegal:
     case TargetLowering::TypePromoteInteger:
       break;
@@ -130,7 +127,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
                                false, false, 0);
 
   // Load the first half from the stack slot.
-  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, 
+                   false, false, false, 0);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -140,7 +138,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
   // Load the second half from the stack slot.
   Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
                    PtrInfo.getWithOffset(IncrementSize), false,
-                   false, MinAlign(Alignment, IncrementSize));
+                   false, false, MinAlign(Alignment, IncrementSize));
 
   // Handle endianness of the load.
   if (TLI.isBigEndian())
@@ -212,11 +210,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
   unsigned Alignment = LD->getAlignment();
   bool isVolatile = LD->isVolatile();
   bool isNonTemporal = LD->isNonTemporal();
+  bool isInvariant = LD->isInvariant();
 
   assert(NVT.isByteSized() && "Expanded type not byte sized!");
 
   Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
-                   isVolatile, isNonTemporal, Alignment);
+                   isVolatile, isNonTemporal, isInvariant, Alignment);
 
   // Increment the pointer to the other half.
   unsigned IncrementSize = NVT.getSizeInBits() / 8;
@@ -224,7 +223,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
                     DAG.getIntPtrConstant(IncrementSize));
   Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
                    LD->getPointerInfo().getWithOffset(IncrementSize),
-                   isVolatile, isNonTemporal,
+                   isVolatile, isNonTemporal, isInvariant,
                    MinAlign(Alignment, IncrementSize));
 
   // Build a factor node to remember that this load is independent of the
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index f815b00db5d6..3ae8345bd198 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -64,6 +64,8 @@ class VectorLegalizer {
   // Implement vselect in terms of XOR, AND, OR when blend is not supported
   // by the target.
   SDValue ExpandVSELECT(SDValue Op);
+  SDValue ExpandLoad(SDValue Op);
+  SDValue ExpandStore(SDValue Op);
   SDValue ExpandFNEG(SDValue Op);
   // Implements vector promotion; this is essentially just bitcasting the
   // operands to a different type and bitcasting the result back to the
@@ -124,6 +126,33 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   SDValue Result =
     SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
 
+  if (Op.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+    ISD::LoadExtType ExtType = LD->getExtensionType();
+    if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
+      if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT()))
+        return TranslateLegalizeResults(Op, Result);
+      Changed = true;
+      return LegalizeOp(ExpandLoad(Op));
+    }
+  } else if (Op.getOpcode() == ISD::STORE) {
+    StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+    EVT StVT = ST->getMemoryVT();
+    EVT ValVT = ST->getValue().getValueType();
+    if (StVT.isVector() && ST->isTruncatingStore())
+      switch (TLI.getTruncStoreAction(ValVT, StVT)) {
+      default: llvm_unreachable("This action is not supported yet!");
+      case TargetLowering::Legal:
+        return TranslateLegalizeResults(Op, Result);
+      case TargetLowering::Custom:
+        Changed = true;
+        return LegalizeOp(TLI.LowerOperation(Result, DAG));
+      case TargetLowering::Expand:
+        Changed = true;
+        return LegalizeOp(ExpandStore(Op));
+      }
+  }
+
   bool HasVectorValue = false;
   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
        J != E;
@@ -156,8 +185,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::SRL:
   case ISD::ROTL:
   case ISD::ROTR:
-  case ISD::CTTZ:
   case ISD::CTLZ:
+  case ISD::CTTZ:
+  case ISD::CTLZ_ZERO_UNDEF:
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTPOP:
   case ISD::SELECT:
   case ISD::VSELECT:
@@ -262,6 +293,97 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
   return DAG.getNode(ISD::BITCAST, dl, VT, Op);
 }
 
+
+SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
+  DebugLoc dl = Op.getDebugLoc();
+  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
+  SDValue Chain = LD->getChain();
+  SDValue BasePTR = LD->getBasePtr();
+  EVT SrcVT = LD->getMemoryVT();
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+
+  SmallVector<SDValue, 8> LoadVals;
+  SmallVector<SDValue, 8> LoadChains;
+  unsigned NumElem = SrcVT.getVectorNumElements();
+  unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+  for (unsigned Idx=0; Idx<NumElem; Idx++) {
+    SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl,
+              Op.getNode()->getValueType(0).getScalarType(),
+              Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride),
+              SrcVT.getScalarType(),
+              LD->isVolatile(), LD->isNonTemporal(),
+              LD->getAlignment());
+
+    BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+                       DAG.getIntPtrConstant(Stride));
+
+     LoadVals.push_back(ScalarLoad.getValue(0));
+     LoadChains.push_back(ScalarLoad.getValue(1));
+  }
+
+  SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+  SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
+            Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size());
+
+  AddLegalizedOperand(Op.getValue(0), Value);
+  AddLegalizedOperand(Op.getValue(1), NewChain);
+
+  return (Op.getResNo() ? NewChain : Value);
+}
+
+SDValue VectorLegalizer::ExpandStore(SDValue Op) {
+  DebugLoc dl = Op.getDebugLoc();
+  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
+  SDValue Chain = ST->getChain();
+  SDValue BasePTR = ST->getBasePtr();
+  SDValue Value = ST->getValue();
+  EVT StVT = ST->getMemoryVT();
+
+  unsigned Alignment = ST->getAlignment();
+  bool isVolatile = ST->isVolatile();
+  bool isNonTemporal = ST->isNonTemporal();
+
+  unsigned NumElem = StVT.getVectorNumElements();
+  // The type of the data we want to save
+  EVT RegVT = Value.getValueType();
+  EVT RegSclVT = RegVT.getScalarType();
+  // The type of data as saved in memory.
+  EVT MemSclVT = StVT.getScalarType();
+
+  // Cast floats into integers
+  unsigned ScalarSize = MemSclVT.getSizeInBits();
+
+  // Round odd types to the next pow of two.
+  if (!isPowerOf2_32(ScalarSize))
+    ScalarSize = NextPowerOf2(ScalarSize);
+
+  // Store Stride in bytes
+  unsigned Stride = ScalarSize/8;
+  // Extract each of the elements from the original vector
+  // and save them into memory individually.
+  SmallVector<SDValue, 8> Stores;
+  for (unsigned Idx = 0; Idx < NumElem; Idx++) {
+    SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+               RegSclVT, Value, DAG.getIntPtrConstant(Idx));
+
+    // This scalar TruncStore may be illegal, but we legalize it later.
+    SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR,
+               ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT,
+               isVolatile, isNonTemporal, Alignment);
+
+    BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
+                                DAG.getIntPtrConstant(Stride));
+
+    Stores.push_back(Store);
+  }
+  SDValue TF =  DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                            &Stores[0], Stores.size());
+  AddLegalizedOperand(Op, TF);
+  return TF;
+}
+
 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   // Implement VSELECT in terms of XOR, AND, OR
   // on platforms which do not support blend natively.
@@ -274,10 +396,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
 
   // If we can't even use the basic vector operations of
   // AND,OR,XOR, we will have to scalarize the op.
-  if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::XOR, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::OR, VT))
-        return DAG.UnrollVectorOp(Op.getNode());
+  // Notice that the operation may be 'promoted' which means that it is
+  // 'bitcasted' to another type which is handled.
+  if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand)
+    return DAG.UnrollVectorOp(Op.getNode());
 
   assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits()
          && "Invalid mask size");
@@ -301,9 +425,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
   DebugLoc DL = Op.getDebugLoc();
 
   // Make sure that the SINT_TO_FP and SRL instructions are available.
-  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) ||
-      !TLI.isOperationLegalOrCustom(ISD::SRL, VT))
-      return DAG.UnrollVectorOp(Op.getNode());
+  if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
+      TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
+    return DAG.UnrollVectorOp(Op.getNode());
 
  EVT SVT = VT.getScalarType();
   assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 107a42b2951c..5f23f01dafb4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -21,7 +21,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -59,6 +58,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::LOAD:           R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+  case ISD::VSELECT:           R = ScalarizeVecRes_VSELECT(N); break;
   case ISD::SELECT:            R = ScalarizeVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         R = ScalarizeVecRes_SELECT_CC(N); break;
   case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
@@ -194,7 +194,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
                                N->getPointerInfo(),
                                N->getMemoryVT().getVectorElementType(),
                                N->isVolatile(), N->isNonTemporal(),
-                               N->getOriginalAlignment());
+                               N->isInvariant(), N->getOriginalAlignment());
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
@@ -227,6 +227,37 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
   return InOp;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+  SDValue Cond = GetScalarizedVector(N->getOperand(0));
+  SDValue LHS = GetScalarizedVector(N->getOperand(1));
+  TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false);
+  TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true);
+  if (ScalarBool != VecBool) {
+    EVT CondVT = Cond.getValueType();
+    switch (ScalarBool) {
+      case TargetLowering::UndefinedBooleanContent:
+        break;
+      case TargetLowering::ZeroOrOneBooleanContent:
+        assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+               VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+        // Vector read from all ones, scalar expects a single 1 so mask.
+        Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT,
+                           Cond, DAG.getConstant(1, CondVT));
+        break;
+      case TargetLowering::ZeroOrNegativeOneBooleanContent:
+        assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+               VecBool == TargetLowering::ZeroOrOneBooleanContent);
+        // Vector reads from a one, scalar from all ones so sign extend.
+        Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT,
+                           Cond, DAG.getValueType(MVT::i1));
+        break;
+    }
+  }
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), Cond, LHS,
+                     GetScalarizedVector(N->getOperand(2)));
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
   SDValue LHS = GetScalarizedVector(N->getOperand(1));
   return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
@@ -405,6 +436,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
         N->dump(&DAG);
         dbgs() << "\n");
   SDValue Lo, Hi;
+  
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
 
   switch (N->getOpcode()) {
   default:
@@ -442,8 +477,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::ANY_EXTEND:
   case ISD::CONVERT_RNDSAT:
   case ISD::CTLZ:
-  case ISD::CTPOP:
   case ISD::CTTZ:
+  case ISD::CTLZ_ZERO_UNDEF:
+  case ISD::CTTZ_ZERO_UNDEF:
+  case ISD::CTPOP:
   case ISD::FABS:
   case ISD::FCEIL:
   case ISD::FCOS:
@@ -677,7 +714,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
   // Load the Lo part from the stack slot.
   Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
-                   false, false, 0);
+                   false, false, false, 0);
 
   // Increment the pointer to the other part.
   unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
@@ -686,7 +723,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
 
   // Load the Hi part from the stack slot.
   Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
-                   false, false, MinAlign(Alignment, IncrementSize));
+                   false, false, false, MinAlign(Alignment, IncrementSize));
 }
 
 void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -713,20 +750,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   unsigned Alignment = LD->getOriginalAlignment();
   bool isVolatile = LD->isVolatile();
   bool isNonTemporal = LD->isNonTemporal();
+  bool isInvariant = LD->isInvariant();
 
   EVT LoMemVT, HiMemVT;
   GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
 
   Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
                    LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
-                   Alignment);
+                   isInvariant, Alignment);
 
   unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
                     DAG.getIntPtrConstant(IncrementSize));
   Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
                    LD->getPointerInfo().getWithOffset(IncrementSize),
-                   HiMemVT, isVolatile, isNonTemporal, Alignment);
+                   HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment);
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -773,46 +811,18 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
   DebugLoc dl = N->getDebugLoc();
   GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
 
-  // Split the input.
+  // If the input also splits, handle it directly for a compile time speedup.
+  // Otherwise split it by hand.
   EVT InVT = N->getOperand(0).getValueType();
-  switch (getTypeAction(InVT)) {
-  default: llvm_unreachable("Unexpected type action!");
-  case TargetLowering::TypeLegal: {
+  if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
+    GetSplitVector(N->getOperand(0), Lo, Hi);
+  } else {
     EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
                                  LoVT.getVectorNumElements());
     Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
                      DAG.getIntPtrConstant(0));
     Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
-    break;
-  }
-  case TargetLowering::TypePromoteInteger: {
-    SDValue InOp = GetPromotedInteger(N->getOperand(0));
-    EVT InNVT = EVT::getVectorVT(*DAG.getContext(),
-                                 InOp.getValueType().getVectorElementType(),
-                                 LoVT.getVectorNumElements());
-    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(0));
-    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
-    break;
-  }
-  case TargetLowering::TypeSplitVector:
-    GetSplitVector(N->getOperand(0), Lo, Hi);
-    break;
-  case TargetLowering::TypeWidenVector: {
-    // If the result needs to be split and the input needs to be widened,
-    // the two types must have different lengths. Use the widened result
-    // and extract from it to do the split.
-    SDValue InOp = GetWidenedVector(N->getOperand(0));
-    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
-                                 LoVT.getVectorNumElements());
-    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(0));
-    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
-                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
-    break;
-  }
   }
 
   if (N->getOpcode() == ISD::FP_ROUND) {
@@ -1239,6 +1249,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
   case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+  case ISD::VSELECT:
   case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
   case ISD::SETCC:             Res = WidenVecRes_SETCC(N); break;
@@ -1590,12 +1601,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   switch (getTypeAction(InVT)) {
-  default:
-    assert(false && "Unknown type action!");
-    break;
   case TargetLowering::TypeLegal:
     break;
   case TargetLowering::TypePromoteInteger:
+    // If the incoming type is a vector that is being promoted, then
+    // we know that the elements are arranged differently and that we
+    // must perform the conversion using a stack slot.
+    if (InVT.isVector())
+      break;
+
     // If the InOp is promoted to the same size, convert it.  Otherwise,
     // fall out of the switch and widen the promoted input.
     InOp = GetPromotedInteger(InOp);
@@ -1928,7 +1942,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
   SDValue InOp1 = GetWidenedVector(N->getOperand(1));
   SDValue InOp2 = GetWidenedVector(N->getOperand(2));
   assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
-  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
                      WidenVT, Cond1, InOp1, InOp2);
 }
 
@@ -2032,6 +2046,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
   case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
   case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
   case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
+  case ISD::SETCC:              Res = WidenVecOp_SETCC(N); break;
 
   case ISD::FP_EXTEND:
   case ISD::FP_TO_SINT:
@@ -2165,6 +2180,32 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
                        MVT::Other,&StChain[0],StChain.size());
 }
 
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+  SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+
+  // WARNING: In this code we widen the compare instruction with garbage.
+  // This garbage may contain denormal floats which may be slow. Is this a real
+  // concern ? Should we zero the unused lanes if this is a float compare ?
+
+  // Get a new SETCC node to compare the newly widened operands.
+  // Only some of the compared elements are legal.
+  EVT SVT = TLI.getSetCCResultType(InOp0.getValueType());
+  SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+                     SVT, InOp0, InOp1, N->getOperand(2));
+
+  // Extract the needed results from the result vector.
+  EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+                               SVT.getVectorElementType(),
+                               N->getValueType(0).getVectorNumElements());
+  SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+                           ResVT, WideSETCC, DAG.getIntPtrConstant(0));
+
+  return PromoteTargetBoolean(CC, N->getValueType(0)); 
+}
+
+
 //===----------------------------------------------------------------------===//
 // Vector Widening Utilities
 //===----------------------------------------------------------------------===//
@@ -2276,6 +2317,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
   unsigned  Align    = LD->getAlignment();
   bool      isVolatile = LD->isVolatile();
   bool      isNonTemporal = LD->isNonTemporal();
+  bool      isInvariant = LD->isInvariant();
 
   int LdWidth = LdVT.getSizeInBits();
   int WidthDiff = WidenWidth - LdWidth;          // Difference
@@ -2285,7 +2327,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
   EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
   int NewVTWidth = NewVT.getSizeInBits();
   SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
-                             isVolatile, isNonTemporal, Align);
+                             isVolatile, isNonTemporal, isInvariant, Align);
   LdChain.push_back(LdOp.getValue(1));
 
   // Check if we can load the element with one instruction
@@ -2323,18 +2365,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
                           DAG.getIntPtrConstant(Increment));
 
+    SDValue L;
     if (LdWidth < NewVTWidth) {
       // Our current type we are using is too large, find a better size
       NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
       NewVTWidth = NewVT.getSizeInBits();
-    }
-
-    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+      L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
                                LD->getPointerInfo().getWithOffset(Offset),
                                isVolatile,
-                               isNonTemporal, MinAlign(Align, Increment));
-    LdChain.push_back(LdOp.getValue(1));
-    LdOps.push_back(LdOp);
+                               isNonTemporal, isInvariant,
+                               MinAlign(Align, Increment));
+      LdChain.push_back(L.getValue(1));
+      if (L->getValueType(0).isVector()) {
+        SmallVector<SDValue, 16> Loads;
+        Loads.push_back(L);
+        unsigned size = L->getValueSizeInBits(0);
+        while (size < LdOp->getValueSizeInBits(0)) {
+          Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+          size += L->getValueSizeInBits(0);
+        }
+        L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
+                        &Loads[0], Loads.size());
+      }
+    } else {
+      L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+                      LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+                      isNonTemporal, isInvariant, MinAlign(Align, Increment));
+      LdChain.push_back(L.getValue(1));
+    }
+
+    LdOps.push_back(L);
+
 
     LdWidth -= NewVTWidth;
   }
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
new file mode 100644
index 000000000000..ff0136e08cd9
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -0,0 +1,657 @@
+//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ResourcePriorityQueue class, which is a
+// SchedulingPriorityQueue that prioritizes instructions using DFA state to
+// reduce the length of the critical path through the basic block
+// on VLIW platforms.
+// The scheduler is basically a top-down adaptable list scheduler with DFA
+// resource tracking added to the cost function.
+// DFA is queried as a state machine to model "packets/bundles" during
+// schedule. Currently packets/bundles are discarded at the end of
+// scheduling, affecting only order of instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
+  cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable use of DFA during scheduling"));
+
+static cl::opt<signed> RegPressureThreshold(
+  "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5),
+  cl::desc("Track reg pressure and switch priority to in-depth"));
+
+
+ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
+  Picker(this),
+  InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData())
+{
+   TII = IS->getTargetLowering().getTargetMachine().getInstrInfo();
+   TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo();
+   TLI = &IS->getTargetLowering();
+
+   const TargetMachine &tm = (*IS->MF).getTarget();
+   ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
+   // This hard requirment could be relaxed, but for now
+   // do not let it procede.
+   assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
+
+   unsigned NumRC = TRI->getNumRegClasses();
+   RegLimit.resize(NumRC);
+   RegPressure.resize(NumRC);
+   std::fill(RegLimit.begin(), RegLimit.end(), 0);
+   std::fill(RegPressure.begin(), RegPressure.end(), 0);
+   for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+        E = TRI->regclass_end(); I != E; ++I)
+     RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF);
+
+   ParallelLiveRanges = 0;
+   HorizontalVerticalBalance = 0;
+}
+
+unsigned
+ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
+  unsigned NumberDeps = 0;
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+
+    SUnit *PredSU = I->getSUnit();
+    const SDNode *ScegN = PredSU->getNode();
+
+    if (!ScegN)
+      continue;
+
+    // If value is passed to CopyToReg, it is probably
+    // live outside BB.
+    switch (ScegN->getOpcode()) {
+      default:  break;
+      case ISD::TokenFactor:    break;
+      case ISD::CopyFromReg:    NumberDeps++;  break;
+      case ISD::CopyToReg:      break;
+      case ISD::INLINEASM:      break;
+    }
+    if (!ScegN->isMachineOpcode())
+      continue;
+
+    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+      EVT VT = ScegN->getValueType(i);
+      if (TLI->isTypeLegal(VT)
+         && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+        NumberDeps++;
+        break;
+      }
+    }
+  }
+  return NumberDeps;
+}
+
+unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
+                                                    unsigned RCId) {
+  unsigned NumberDeps = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+
+    SUnit *SuccSU = I->getSUnit();
+    const SDNode *ScegN = SuccSU->getNode();
+    if (!ScegN)
+      continue;
+
+    // If value is passed to CopyToReg, it is probably
+    // live outside BB.
+    switch (ScegN->getOpcode()) {
+      default:  break;
+      case ISD::TokenFactor:    break;
+      case ISD::CopyFromReg:    break;
+      case ISD::CopyToReg:      NumberDeps++;  break;
+      case ISD::INLINEASM:      break;
+    }
+    if (!ScegN->isMachineOpcode())
+      continue;
+
+    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+      const SDValue &Op = ScegN->getOperand(i);
+      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      if (TLI->isTypeLegal(VT)
+         && (TLI->getRegClassFor(VT)->getID() == RCId)) {
+        NumberDeps++;
+        break;
+      }
+    }
+  }
+  return NumberDeps;
+}
+
+static unsigned numberCtrlDepsInSU(SUnit *SU) {
+  unsigned NumberDeps = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    if (I->isCtrl())
+      NumberDeps++;
+
+  return NumberDeps;
+}
+
+static unsigned numberCtrlPredInSU(SUnit *SU) {
+  unsigned NumberDeps = 0;
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (I->isCtrl())
+      NumberDeps++;
+
+  return NumberDeps;
+}
+
+///
+/// Initialize nodes.
+///
+void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) {
+  SUnits = &sunits;
+  NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = &(*SUnits)[i];
+    initNumRegDefsLeft(SU);
+    SU->NodeQueueId = 0;
+  }
+}
+
+/// This heuristic is used if DFA scheduling is not desired
+/// for some VLIW platform.
+bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+  // The isScheduleHigh flag allows nodes with wraparound dependencies that
+  // cannot easily be modeled as edges with latencies to be scheduled as
+  // soon as possible in a top-down schedule.
+  if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+    return false;
+
+  if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+    return true;
+
+  unsigned LHSNum = LHS->NodeNum;
+  unsigned RHSNum = RHS->NodeNum;
+
+  // The most important heuristic is scheduling the critical path.
+  unsigned LHSLatency = PQ->getLatency(LHSNum);
+  unsigned RHSLatency = PQ->getLatency(RHSNum);
+  if (LHSLatency < RHSLatency) return true;
+  if (LHSLatency > RHSLatency) return false;
+
+  // After that, if two nodes have identical latencies, look to see if one will
+  // unblock more other nodes than the other.
+  unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+  unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+  if (LHSBlocked < RHSBlocked) return true;
+  if (LHSBlocked > RHSBlocked) return false;
+
+  // Finally, just to provide a stable ordering, use the node number as a
+  // deciding factor.
+  return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->getSUnit();
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return 0;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+  return OnlyAvailablePred;
+}
+
+void ResourcePriorityQueue::push(SUnit *SU) {
+  // Look at all of the successors of this node.  Count the number of nodes that
+  // this node is the sole unscheduled node for.
+  unsigned NumNodesBlocking = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I)
+    if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+      ++NumNodesBlocking;
+
+  NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+  Queue.push_back(SU);
+}
+
+/// Check if scheduling of this SU is possible
+/// in the current packet.
+bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
+  if (!SU || !SU->getNode())
+    return false;
+
+  // If this is a compound instruction,
+  // it is likely to be a call. Do not delay it.
+  if (SU->getNode()->getGluedNode())
+    return true;
+
+  // First see if the pipeline could receive this instruction
+  // in the current cycle.
+  if (SU->getNode()->isMachineOpcode())
+    switch (SU->getNode()->getMachineOpcode()) {
+    default:
+      if (!ResourcesModel->canReserveResources(&TII->get(
+          SU->getNode()->getMachineOpcode())))
+           return false;
+    case TargetOpcode::EXTRACT_SUBREG:
+    case TargetOpcode::INSERT_SUBREG:
+    case TargetOpcode::SUBREG_TO_REG:
+    case TargetOpcode::REG_SEQUENCE:
+    case TargetOpcode::IMPLICIT_DEF:
+        break;
+    }
+
+  // Now see if there are no other dependencies
+  // to instructions alredy in the packet.
+  for (unsigned i = 0, e = Packet.size(); i != e; ++i)
+    for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
+         E = Packet[i]->Succs.end(); I != E; ++I) {
+      // Since we do not add pseudos to packets, might as well
+      // ignor order deps.
+      if (I->isCtrl())
+        continue;
+
+      if (I->getSUnit() == SU)
+        return false;
+    }
+
+  return true;
+}
+
+/// Keep track of available resources.
+void ResourcePriorityQueue::reserveResources(SUnit *SU) {
+  // If this SU does not fit in the packet
+  // start a new one.
+  if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+  }
+
+  if (SU->getNode() && SU->getNode()->isMachineOpcode()) {
+    switch (SU->getNode()->getMachineOpcode()) {
+    default:
+      ResourcesModel->reserveResources(&TII->get(
+        SU->getNode()->getMachineOpcode()));
+      break;
+    case TargetOpcode::EXTRACT_SUBREG:
+    case TargetOpcode::INSERT_SUBREG:
+    case TargetOpcode::SUBREG_TO_REG:
+    case TargetOpcode::REG_SEQUENCE:
+    case TargetOpcode::IMPLICIT_DEF:
+      break;
+    }
+    Packet.push_back(SU);
+  }
+  // Forcefully end packet for PseudoOps.
+  else {
+    ResourcesModel->clearResources();
+    Packet.clear();
+  }
+
+  // If packet is now full, reset the state so in the next cycle
+  // we start fresh.
+  if (Packet.size() >= InstrItins->IssueWidth) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+  }
+}
+
+signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) {
+  signed RegBalance    = 0;
+
+  if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+    return RegBalance;
+
+  // Gen estimate.
+  for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) {
+      EVT VT = SU->getNode()->getValueType(i);
+      if (TLI->isTypeLegal(VT)
+          && TLI->getRegClassFor(VT)
+          && TLI->getRegClassFor(VT)->getID() == RCId)
+        RegBalance += numberRCValSuccInSU(SU, RCId);
+  }
+  // Kill estimate.
+  for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) {
+      const SDValue &Op = SU->getNode()->getOperand(i);
+      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+      if (isa<ConstantSDNode>(Op.getNode()))
+        continue;
+
+      if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT)
+          && TLI->getRegClassFor(VT)->getID() == RCId)
+        RegBalance -= numberRCValPredInSU(SU, RCId);
+  }
+  return RegBalance;
+}
+
+/// Estimates change in reg pressure from this SU.
+/// It is acheived by trivial tracking of defined
+/// and used vregs in dependent instructions.
+/// The RawPressure flag makes this function to ignore
+/// existing reg file sizes, and report raw def/use
+/// balance.
+signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) {
+  signed RegBalance    = 0;
+
+  if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode())
+    return RegBalance;
+
+  if (RawPressure) {
+    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+             E = TRI->regclass_end(); I != E; ++I) {
+      const TargetRegisterClass *RC = *I;
+      RegBalance += rawRegPressureDelta(SU, RC->getID());
+    }
+  }
+  else {
+    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+         E = TRI->regclass_end(); I != E; ++I) {
+      const TargetRegisterClass *RC = *I;
+      if ((RegPressure[RC->getID()] +
+           rawRegPressureDelta(SU, RC->getID()) > 0) &&
+          (RegPressure[RC->getID()] +
+           rawRegPressureDelta(SU, RC->getID())  >= RegLimit[RC->getID()]))
+        RegBalance += rawRegPressureDelta(SU, RC->getID());
+    }
+  }
+
+  return RegBalance;
+}
+
+// Constants used to denote relative importance of
+// heuristic components for cost computation.
+static const unsigned PriorityOne = 200;
+static const unsigned PriorityTwo = 100;
+static const unsigned PriorityThree = 50;
+static const unsigned PriorityFour = 15;
+static const unsigned PriorityFive = 5;
+static const unsigned ScaleOne = 20;
+static const unsigned ScaleTwo = 10;
+static const unsigned ScaleThree = 5;
+static const unsigned FactorOne = 2;
+
+/// Returns single number reflecting benefit of scheduling SU
+/// in the current cycle.
+signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
+  // Initial trivial priority.
+  signed ResCount = 1;
+
+  // Do not waste time on a node that is already scheduled.
+  if (SU->isScheduled)
+    return ResCount;
+
+  // Forced priority is high.
+  if (SU->isScheduleHigh)
+    ResCount += PriorityOne;
+
+  // Adaptable scheduling
+  // A small, but very parallel
+  // region, where reg pressure is an issue.
+  if (HorizontalVerticalBalance > RegPressureThreshold) {
+    // Critical path first
+    ResCount += (SU->getHeight() * ScaleTwo);
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+
+    // Consider change to reg pressure from scheduling
+    // this SU.
+    ResCount -= (regPressureDelta(SU,true) * ScaleOne);
+  }
+  // Default heuristic, greeady and
+  // critical path driven.
+  else {
+    // Critical path first.
+    ResCount += (SU->getHeight() * ScaleTwo);
+    // Now see how many instructions is blocked by this SU.
+    ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo);
+    // If resources are available for it, multiply the
+    // chance of scheduling.
+    if (isResourceAvailable(SU))
+      ResCount <<= FactorOne;
+
+    ResCount -= (regPressureDelta(SU) * ScaleTwo);
+  }
+
+  // These are platform specific things.
+  // Will need to go into the back end
+  // and accessed from here via a hook.
+  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+    if (N->isMachineOpcode()) {
+      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+      if (TID.isCall())
+        ResCount += (PriorityThree + (ScaleThree*N->getNumValues()));
+    }
+    else
+      switch (N->getOpcode()) {
+      default:  break;
+      case ISD::TokenFactor:
+      case ISD::CopyFromReg:
+      case ISD::CopyToReg:
+        ResCount += PriorityFive;
+        break;
+
+      case ISD::INLINEASM:
+        ResCount += PriorityFour;
+        break;
+      }
+  }
+  return ResCount;
+}
+
+
+/// Main resource tracking point.
+void ResourcePriorityQueue::scheduledNode(SUnit *SU) {
+  // Use NULL entry as an event marker to reset
+  // the DFA state.
+  if (!SU) {
+    ResourcesModel->clearResources();
+    Packet.clear();
+    return;
+  }
+
+  const SDNode *ScegN = SU->getNode();
+  // Update reg pressure tracking.
+  // First update current node.
+  if (ScegN->isMachineOpcode()) {
+    // Estimate generated regs.
+    for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) {
+      EVT VT = ScegN->getValueType(i);
+
+      if (TLI->isTypeLegal(VT)) {
+        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+        if (RC)
+          RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID());
+      }
+    }
+    // Estimate killed regs.
+    for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) {
+      const SDValue &Op = ScegN->getOperand(i);
+      EVT VT = Op.getNode()->getValueType(Op.getResNo());
+
+      if (TLI->isTypeLegal(VT)) {
+        const TargetRegisterClass *RC = TLI->getRegClassFor(VT);
+        if (RC) {
+          if (RegPressure[RC->getID()] >
+            (numberRCValPredInSU(SU, RC->getID())))
+            RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID());
+          else RegPressure[RC->getID()] = 0;
+        }
+      }
+    }
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+                              I != E; ++I) {
+      if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0))
+        continue;
+      --I->getSUnit()->NumRegDefsLeft;
+    }
+  }
+
+  // Reserve resources for this SU.
+  reserveResources(SU);
+
+  // Adjust number of parallel live ranges.
+  // Heuristic is simple - node with no data successors reduces
+  // number of live ranges. All others, increase it.
+  unsigned NumberNonControlDeps = 0;
+
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+                                  I != E; ++I) {
+    adjustPriorityOfUnscheduledPreds(I->getSUnit());
+    if (!I->isCtrl())
+      NumberNonControlDeps++;
+  }
+
+  if (!NumberNonControlDeps) {
+    if (ParallelLiveRanges >= SU->NumPreds)
+      ParallelLiveRanges -= SU->NumPreds;
+    else
+      ParallelLiveRanges = 0;
+
+  }
+  else
+    ParallelLiveRanges += SU->NumRegDefsLeft;
+
+  // Track parallel live chains.
+  HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU));
+  HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU));
+}
+
+void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
+  unsigned  NodeNumDefs = 0;
+  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+    if (N->isMachineOpcode()) {
+      const MCInstrDesc &TID = TII->get(N->getMachineOpcode());
+      // No register need be allocated for this.
+      if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+        NodeNumDefs = 0;
+        break;
+      }
+      NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs());
+    }
+    else
+      switch(N->getOpcode()) {
+        default:     break;
+        case ISD::CopyFromReg:
+          NodeNumDefs++;
+          break;
+        case ISD::INLINEASM:
+          NodeNumDefs++;
+          break;
+      }
+
+  SU->NumRegDefsLeft = NodeNumDefs;
+}
+
+/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled.  If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet.  If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
+  if (SU->isAvailable) return;  // All preds scheduled.
+
+  SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+  if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
+    return;
+
+  // Okay, we found a single predecessor that is available, but not scheduled.
+  // Since it is available, it must be in the priority queue.  First remove it.
+  remove(OnlyAvailablePred);
+
+  // Reinsert the node into the priority queue, which recomputes its
+  // NumNodesSolelyBlocking value.
+  push(OnlyAvailablePred);
+}
+
+
+/// Main access point - returns next instructions
+/// to be placed in scheduling sequence.
+SUnit *ResourcePriorityQueue::pop() {
+  if (empty())
+    return 0;
+
+  std::vector<SUnit *>::iterator Best = Queue.begin();
+  if (!DisableDFASched) {
+    signed BestCost = SUSchedulingCost(*Best);
+    for (std::vector<SUnit *>::iterator I = Queue.begin(),
+           E = Queue.end(); I != E; ++I) {
+      if (*I == *Best)
+        continue;
+
+      if (SUSchedulingCost(*I) > BestCost) {
+        BestCost = SUSchedulingCost(*I);
+        Best = I;
+      }
+    }
+  }
+  // Use default TD scheduling mechanism.
+  else {
+    for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+       E = Queue.end(); I != E; ++I)
+      if (Picker(*Best, *I))
+        Best = I;
+  }
+
+  SUnit *V = *Best;
+  if (Best != prior(Queue.end()))
+    std::swap(*Best, Queue.back());
+
+  Queue.pop_back();
+
+  return V;
+}
+
+
+void ResourcePriorityQueue::remove(SUnit *SU) {
+  assert(!Queue.empty() && "Queue is empty!");
+  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+  if (I != prior(Queue.end()))
+    std::swap(*I, Queue.back());
+
+  Queue.pop_back();
+}
+
+
+#ifdef NDEBUG
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void ResourcePriorityQueue::dump(ScheduleDAG *DAG) const {
+  ResourcePriorityQueue q = *this;
+  while (!q.empty()) {
+    SUnit *su = q.pop();
+    dbgs() << "Height " << su->getHeight() << ": ";
+    su->dump(DAG);
+  }
+}
+#endif
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index b275c6321ae4..24da432a47a1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -43,7 +43,7 @@ namespace {
     SmallVector<SUnit *, 16> Queue;
 
     bool empty() const { return Queue.empty(); }
-    
+
     void push(SUnit *U) {
       Queue.push_back(U);
     }
@@ -101,8 +101,8 @@ private:
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
   void ListScheduleBottomUp();
 
-  /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
-  bool ForceUnitLatencies() const { return true; }
+  /// forceUnitLatencies - The fast scheduler doesn't care about real latencies.
+  bool forceUnitLatencies() const { return true; }
 };
 }  // end anonymous namespace
 
@@ -112,7 +112,7 @@ void ScheduleDAGFast::Schedule() {
   DEBUG(dbgs() << "********** List Scheduling **********\n");
 
   NumLiveRegs = 0;
-  LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
   LiveRegCycles.resize(TRI->getNumRegs(), 0);
 
   // Build the scheduling graph.
@@ -159,7 +159,7 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
     ReleasePred(SU, &*I);
     if (I->isAssignedRegDep()) {
       // This is a physical register dependency and it's impossible or
-      // expensive to copy the register. Make sure nothing that can 
+      // expensive to copy the register. Make sure nothing that can
       // clobber the register is scheduled between the predecessor and
       // this node.
       if (!LiveRegDefs[I->getReg()]) {
@@ -245,10 +245,10 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
                                    SDValue(LoadNode, 1));
 
-    SUnit *NewSU = NewSUnit(N);
+    SUnit *NewSU = newSUnit(N);
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NewSU->NodeNum);
-      
+
     const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
     for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
       if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
@@ -268,7 +268,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
       LoadSU = &SUnits[LoadNode->getNodeId()];
       isNewLoad = false;
     } else {
-      LoadSU = NewSUnit(LoadNode);
+      LoadSU = newSUnit(LoadNode);
       LoadNode->setNodeId(LoadSU->NodeNum);
     }
 
@@ -329,7 +329,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
         D.setSUnit(LoadSU);
         AddPred(SuccDep, D);
       }
-    } 
+    }
     if (isNewLoad) {
       AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
     }
@@ -381,11 +381,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
                                               const TargetRegisterClass *DestRC,
                                               const TargetRegisterClass *SrcRC,
                                                SmallVector<SUnit*, 2> &Copies) {
-  SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+  SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
   CopyFromSU->CopySrcRC = SrcRC;
   CopyFromSU->CopyDstRC = DestRC;
 
-  SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+  SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
   CopyToSU->CopySrcRC = DestRC;
   CopyToSU->CopyDstRC = SrcRC;
 
@@ -425,7 +425,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
   assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
   unsigned NumRes = MCID.getNumDefs();
-  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+  for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
     if (Reg == *ImpDef)
       break;
     ++NumRes;
@@ -447,7 +447,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
       Added = true;
     }
   }
-  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+  for (const uint16_t *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
     if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
       if (RegAdded.insert(*Alias)) {
         LRegs.push_back(*Alias);
@@ -508,7 +508,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
     if (!MCID.ImplicitDefs)
       continue;
-    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
+    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
     }
   }
@@ -630,7 +630,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
   std::reverse(Sequence.begin(), Sequence.end());
 
 #ifndef NDEBUG
-  VerifySchedule(/*isBottomUp=*/true);
+  VerifyScheduledSequence(/*isBottomUp=*/true);
 #endif
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
deleted file mode 100644
index 430283d5eff9..000000000000
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ /dev/null
@@ -1,265 +0,0 @@
-//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements a top-down list scheduler, using standard algorithms.
-// The basic approach uses a priority queue of available nodes to schedule.
-// One at a time, nodes are taken from the priority queue (thus in priority
-// order), checked for legality to schedule, and emitted if legal.
-//
-// Nodes may not be legal to schedule either due to structural hazards (e.g.
-// pipeline or resource constraints) or because an input to the instruction has
-// not completed execution.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pre-RA-sched"
-#include "ScheduleDAGSDNodes.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include <climits>
-using namespace llvm;
-
-STATISTIC(NumNoops , "Number of noops inserted");
-STATISTIC(NumStalls, "Number of pipeline stalls");
-
-static RegisterScheduler
-  tdListDAGScheduler("list-td", "Top-down list scheduler",
-                     createTDListDAGScheduler);
-
-namespace {
-//===----------------------------------------------------------------------===//
-/// ScheduleDAGList - The actual list scheduler implementation.  This supports
-/// top-down scheduling.
-///
-class ScheduleDAGList : public ScheduleDAGSDNodes {
-private:
-  /// AvailableQueue - The priority queue to use for the available SUnits.
-  ///
-  SchedulingPriorityQueue *AvailableQueue;
-
-  /// PendingQueue - This contains all of the instructions whose operands have
-  /// been issued, but their results are not ready yet (due to the latency of
-  /// the operation).  Once the operands become available, the instruction is
-  /// added to the AvailableQueue.
-  std::vector<SUnit*> PendingQueue;
-
-  /// HazardRec - The hazard recognizer to use.
-  ScheduleHazardRecognizer *HazardRec;
-
-public:
-  ScheduleDAGList(MachineFunction &mf,
-                  SchedulingPriorityQueue *availqueue)
-    : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
-
-    const TargetMachine &tm = mf.getTarget();
-    HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
-  }
-
-  ~ScheduleDAGList() {
-    delete HazardRec;
-    delete AvailableQueue;
-  }
-
-  void Schedule();
-
-private:
-  void ReleaseSucc(SUnit *SU, const SDep &D);
-  void ReleaseSuccessors(SUnit *SU);
-  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
-  void ListScheduleTopDown();
-};
-}  // end anonymous namespace
-
-/// Schedule - Schedule the DAG using list scheduling.
-void ScheduleDAGList::Schedule() {
-  DEBUG(dbgs() << "********** List Scheduling **********\n");
-
-  // Build the scheduling graph.
-  BuildSchedGraph(NULL);
-
-  AvailableQueue->initNodes(SUnits);
-
-  ListScheduleTopDown();
-
-  AvailableQueue->releaseState();
-}
-
-//===----------------------------------------------------------------------===//
-//  Top-Down Scheduling
-//===----------------------------------------------------------------------===//
-
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
-/// the PendingQueue if the count reaches zero. Also update its cycle bound.
-void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
-  SUnit *SuccSU = D.getSUnit();
-
-#ifndef NDEBUG
-  if (SuccSU->NumPredsLeft == 0) {
-    dbgs() << "*** Scheduling failed! ***\n";
-    SuccSU->dump(this);
-    dbgs() << " has been released too many times!\n";
-    llvm_unreachable(0);
-  }
-#endif
-  --SuccSU->NumPredsLeft;
-
-  SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
-
-  // If all the node's predecessors are scheduled, this node is ready
-  // to be scheduled. Ignore the special ExitSU node.
-  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
-    PendingQueue.push_back(SuccSU);
-}
-
-void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
-  // Top down: release successors.
-  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    assert(!I->isAssignedRegDep() &&
-           "The list-td scheduler doesn't yet support physreg dependencies!");
-
-    ReleaseSucc(SU, *I);
-  }
-}
-
-/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
-/// count of its successors. If a successor pending count is zero, add it to
-/// the Available queue.
-void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
-  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
-  DEBUG(SU->dump(this));
-
-  Sequence.push_back(SU);
-  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
-  SU->setDepthToAtLeast(CurCycle);
-
-  ReleaseSuccessors(SU);
-  SU->isScheduled = true;
-  AvailableQueue->ScheduledNode(SU);
-}
-
-/// ListScheduleTopDown - The main loop of list scheduling for top-down
-/// schedulers.
-void ScheduleDAGList::ListScheduleTopDown() {
-  unsigned CurCycle = 0;
-
-  // Release any successors of the special Entry node.
-  ReleaseSuccessors(&EntrySU);
-
-  // All leaves to Available queue.
-  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    // It is available if it has no predecessors.
-    if (SUnits[i].Preds.empty()) {
-      AvailableQueue->push(&SUnits[i]);
-      SUnits[i].isAvailable = true;
-    }
-  }
-
-  // While Available queue is not empty, grab the node with the highest
-  // priority. If it is not ready put it back.  Schedule the node.
-  std::vector<SUnit*> NotReady;
-  Sequence.reserve(SUnits.size());
-  while (!AvailableQueue->empty() || !PendingQueue.empty()) {
-    // Check to see if any of the pending instructions are ready to issue.  If
-    // so, add them to the available queue.
-    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-      if (PendingQueue[i]->getDepth() == CurCycle) {
-        AvailableQueue->push(PendingQueue[i]);
-        PendingQueue[i]->isAvailable = true;
-        PendingQueue[i] = PendingQueue.back();
-        PendingQueue.pop_back();
-        --i; --e;
-      } else {
-        assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
-      }
-    }
-
-    // If there are no instructions available, don't try to issue anything, and
-    // don't advance the hazard recognizer.
-    if (AvailableQueue->empty()) {
-      ++CurCycle;
-      continue;
-    }
-
-    SUnit *FoundSUnit = 0;
-
-    bool HasNoopHazards = false;
-    while (!AvailableQueue->empty()) {
-      SUnit *CurSUnit = AvailableQueue->pop();
-
-      ScheduleHazardRecognizer::HazardType HT =
-        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
-      if (HT == ScheduleHazardRecognizer::NoHazard) {
-        FoundSUnit = CurSUnit;
-        break;
-      }
-
-      // Remember if this is a noop hazard.
-      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
-
-      NotReady.push_back(CurSUnit);
-    }
-
-    // Add the nodes that aren't ready back onto the available list.
-    if (!NotReady.empty()) {
-      AvailableQueue->push_all(NotReady);
-      NotReady.clear();
-    }
-
-    // If we found a node to schedule, do it now.
-    if (FoundSUnit) {
-      ScheduleNodeTopDown(FoundSUnit, CurCycle);
-      HazardRec->EmitInstruction(FoundSUnit);
-
-      // If this is a pseudo-op node, we don't want to increment the current
-      // cycle.
-      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
-        ++CurCycle;
-    } else if (!HasNoopHazards) {
-      // Otherwise, we have a pipeline stall, but no other problem, just advance
-      // the current cycle and try again.
-      DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
-      HazardRec->AdvanceCycle();
-      ++NumStalls;
-      ++CurCycle;
-    } else {
-      // Otherwise, we have no instructions to issue and we have instructions
-      // that will fault if we don't do this right.  This is the case for
-      // processors without pipeline interlocks and other cases.
-      DEBUG(dbgs() << "*** Emitting noop\n");
-      HazardRec->EmitNoop();
-      Sequence.push_back(0);   // NULL here means noop
-      ++NumNoops;
-      ++CurCycle;
-    }
-  }
-
-#ifndef NDEBUG
-  VerifySchedule(/*isBottomUp=*/false);
-#endif
-}
-
-//===----------------------------------------------------------------------===//
-//                         Public Constructor Functions
-//===----------------------------------------------------------------------===//
-
-/// createTDListDAGScheduler - This creates a top-down list scheduler.
-ScheduleDAGSDNodes *
-llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
-  return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
-}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index e757defd3895..2cb5d37d689e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -45,10 +45,6 @@ static RegisterScheduler
                        "Bottom-up register reduction list scheduling",
                        createBURRListDAGScheduler);
 static RegisterScheduler
-  tdrListrDAGScheduler("list-tdrr",
-                       "Top-down register reduction list scheduling",
-                       createTDRRListDAGScheduler);
-static RegisterScheduler
   sourceListDAGScheduler("source",
                          "Similar to list-burr but schedules in source "
                          "order when possible",
@@ -93,6 +89,9 @@ static cl::opt<bool> DisableSchedCriticalPath(
 static cl::opt<bool> DisableSchedHeight(
   "disable-sched-height", cl::Hidden, cl::init(false),
   cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+static cl::opt<bool> Disable2AddrHack(
+  "disable-2addr-hack", cl::Hidden, cl::init(true),
+  cl::desc("Disable scheduler's two-address hack"));
 
 static cl::opt<int> MaxReorderWindow(
   "max-sched-reorder", cl::Hidden, cl::init(6),
@@ -103,17 +102,6 @@ static cl::opt<unsigned> AvgIPC(
   "sched-avg-ipc", cl::Hidden, cl::init(1),
   cl::desc("Average inst/cycle whan no target itinerary exists."));
 
-#ifndef NDEBUG
-namespace {
-  // For sched=list-ilp, Count the number of times each factor comes into play.
-  enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
-         FactStatic, FactOther, NumFactors };
-}
-static const char *FactorName[NumFactors] =
-{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
-static int FactorCount[NumFactors];
-#endif //!NDEBUG
-
 namespace {
 //===----------------------------------------------------------------------===//
 /// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -121,10 +109,6 @@ namespace {
 ///
 class ScheduleDAGRRList : public ScheduleDAGSDNodes {
 private:
-  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
-  /// it is top-down.
-  bool isBottomUp;
-
   /// NeedLatency - True if the scheduler will make use of latency information.
   ///
   bool NeedLatency;
@@ -162,11 +146,15 @@ private:
   /// and similar queries.
   ScheduleDAGTopologicalSort Topo;
 
+  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
+  // DAG crawling.
+  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
+
 public:
   ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
                     SchedulingPriorityQueue *availqueue,
                     CodeGenOpt::Level OptLevel)
-    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+    : ScheduleDAGSDNodes(mf),
       NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
       Topo(SUnits) {
 
@@ -221,8 +209,6 @@ private:
 
   void ReleasePred(SUnit *SU, const SDep *PredEdge);
   void ReleasePredecessors(SUnit *SU);
-  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
-  void ReleaseSuccessors(SUnit *SU);
   void ReleasePending();
   void AdvanceToCycle(unsigned NextCycle);
   void AdvancePastStalls(SUnit *SU);
@@ -242,15 +228,11 @@ private:
   SUnit *PickNodeToScheduleBottomUp();
   void ListScheduleBottomUp();
 
-  void ScheduleNodeTopDown(SUnit*);
-  void ListScheduleTopDown();
-
-
   /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
   /// Updates the topological ordering if required.
   SUnit *CreateNewSUnit(SDNode *N) {
     unsigned NumSUnits = SUnits.size();
-    SUnit *NewNode = NewSUnit(N);
+    SUnit *NewNode = newSUnit(N);
     // Update the topological ordering.
     if (NewNode->NodeNum >= NumSUnits)
       Topo.InitDAGTopologicalSorting();
@@ -268,9 +250,9 @@ private:
     return NewNode;
   }
 
-  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
   /// need actual latency information but the hybrid scheduler does.
-  bool ForceUnitLatencies() const {
+  bool forceUnitLatencies() const {
     return !NeedLatency;
   }
 };
@@ -278,7 +260,7 @@ private:
 
 /// GetCostForDef - Looks up the register class and cost for a given definition.
 /// Typically this just means looking up the representative register class,
-/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// but for untyped values (MVT::Untyped) it means inspecting the node's
 /// opcode to determine what register class is being generated.
 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
                           const TargetLowering *TLI,
@@ -289,7 +271,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
 
   // Special handling for untyped values.  These values can only come from
   // the expansion of custom DAG-to-DAG patterns.
-  if (VT == MVT::untyped) {
+  if (VT == MVT::Untyped) {
     const SDNode *Node = RegDefPos.GetNode();
     unsigned Opcode = Node->getMachineOpcode();
 
@@ -319,18 +301,16 @@ void ScheduleDAGRRList::Schedule() {
   DEBUG(dbgs()
         << "********** List Scheduling BB#" << BB->getNumber()
         << " '" << BB->getName() << "' **********\n");
-#ifndef NDEBUG
-  for (int i = 0; i < NumFactors; ++i) {
-    FactorCount[i] = 0;
-  }
-#endif //!NDEBUG
 
   CurCycle = 0;
   IssueCount = 0;
   MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
   NumLiveRegs = 0;
-  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
-  LiveRegGens.resize(TRI->getNumRegs(), NULL);
+  // Allocate slots for each physical register, plus one for a special register
+  // to track the virtual resource of a calling sequence.
+  LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
+  LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
+  CallSeqEndForStart.clear();
 
   // Build the scheduling graph.
   BuildSchedGraph(NULL);
@@ -343,18 +323,16 @@ void ScheduleDAGRRList::Schedule() {
 
   HazardRec->Reset();
 
-  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
-  if (isBottomUp)
-    ListScheduleBottomUp();
-  else
-    ListScheduleTopDown();
+  // Execute the actual scheduling loop.
+  ListScheduleBottomUp();
 
-#ifndef NDEBUG
-  for (int i = 0; i < NumFactors; ++i) {
-    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
-  }
-#endif // !NDEBUG
   AvailableQueue->releaseState();
+
+  DEBUG({
+      dbgs() << "*** Final schedule ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
 }
 
 //===----------------------------------------------------------------------===//
@@ -376,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
 #endif
   --PredSU->NumSuccsLeft;
 
-  if (!ForceUnitLatencies()) {
+  if (!forceUnitLatencies()) {
     // Updating predecessor's height. This is now the cycle when the
     // predecessor can be scheduled without causing a pipeline stall.
     PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
@@ -403,6 +381,109 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
   }
 }
 
+/// IsChainDependent - Test if Outer is reachable from Inner through
+/// chain dependencies.
+static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
+                             unsigned NestLevel,
+                             const TargetInstrInfo *TII) {
+  SDNode *N = Outer;
+  for (;;) {
+    if (N == Inner)
+      return true;
+    // For a TokenFactor, examine each operand. There may be multiple ways
+    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+    // most nesting in order to ensure that we find the corresponding match.
+    if (N->getOpcode() == ISD::TokenFactor) {
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
+          return true;
+      return false;
+    }
+    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+    if (N->isMachineOpcode()) {
+      if (N->getMachineOpcode() ==
+          (unsigned)TII->getCallFrameDestroyOpcode()) {
+        ++NestLevel;
+      } else if (N->getMachineOpcode() ==
+                 (unsigned)TII->getCallFrameSetupOpcode()) {
+        if (NestLevel == 0)
+          return false;
+        --NestLevel;
+      }
+    }
+    // Otherwise, find the chain and continue climbing.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other) {
+        N = N->getOperand(i).getNode();
+        goto found_chain_operand;
+      }
+    return false;
+  found_chain_operand:;
+    if (N->getOpcode() == ISD::EntryToken)
+      return false;
+  }
+}
+
+/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
+/// the corresponding (lowered) CALLSEQ_BEGIN node.
+///
+/// NestLevel and MaxNested are used in recursion to indcate the current level
+/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
+/// level seen so far.
+///
+/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
+/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
+static SDNode *
+FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
+                 const TargetInstrInfo *TII) {
+  for (;;) {
+    // For a TokenFactor, examine each operand. There may be multiple ways
+    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
+    // most nesting in order to ensure that we find the corresponding match.
+    if (N->getOpcode() == ISD::TokenFactor) {
+      SDNode *Best = 0;
+      unsigned BestMaxNest = MaxNest;
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+        unsigned MyNestLevel = NestLevel;
+        unsigned MyMaxNest = MaxNest;
+        if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
+                                           MyNestLevel, MyMaxNest, TII))
+          if (!Best || (MyMaxNest > BestMaxNest)) {
+            Best = New;
+            BestMaxNest = MyMaxNest;
+          }
+      }
+      assert(Best);
+      MaxNest = BestMaxNest;
+      return Best;
+    }
+    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
+    if (N->isMachineOpcode()) {
+      if (N->getMachineOpcode() ==
+          (unsigned)TII->getCallFrameDestroyOpcode()) {
+        ++NestLevel;
+        MaxNest = std::max(MaxNest, NestLevel);
+      } else if (N->getMachineOpcode() ==
+                 (unsigned)TII->getCallFrameSetupOpcode()) {
+        assert(NestLevel != 0);
+        --NestLevel;
+        if (NestLevel == 0)
+          return N;
+      }
+    }
+    // Otherwise, find the chain and continue climbing.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other) {
+        N = N->getOperand(i).getNode();
+        goto found_chain_operand;
+      }
+    return 0;
+  found_chain_operand:;
+    if (N->getOpcode() == ISD::EntryToken)
+      return 0;
+  }
+}
+
 /// Call ReleasePred for each predecessor, then update register live def/gen.
 /// Always update LiveRegDefs for a register dependence even if the current SU
 /// also defines the register. This effectively create one large live range
@@ -440,6 +521,27 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
       }
     }
   }
+
+  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
+  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
+  // these nodes, to prevent other calls from being interscheduled with them.
+  unsigned CallResource = TRI->getNumRegs();
+  if (!LiveRegDefs[CallResource])
+    for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
+      if (Node->isMachineOpcode() &&
+          Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+        unsigned NestLevel = 0;
+        unsigned MaxNest = 0;
+        SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+
+        SUnit *Def = &SUnits[N->getNodeId()];
+        CallSeqEndForStart[Def] = SU;
+
+        ++NumLiveRegs;
+        LiveRegDefs[CallResource] = Def;
+        LiveRegGens[CallResource] = SU;
+        break;
+      }
 }
 
 /// Check to see if any of the pending instructions are ready to issue.  If
@@ -457,8 +559,7 @@ void ScheduleDAGRRList::ReleasePending() {
   // Check to see if any of the pending instructions are ready to issue.  If
   // so, add them to the available queue.
   for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-    unsigned ReadyCycle =
-      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+    unsigned ReadyCycle = PendingQueue[i]->getHeight();
     if (ReadyCycle < MinAvailableCycle)
       MinAvailableCycle = ReadyCycle;
 
@@ -487,10 +588,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
   }
   else {
     for (; CurCycle != NextCycle; ++CurCycle) {
-      if (isBottomUp)
-        HazardRec->RecedeCycle();
-      else
-        HazardRec->AdvanceCycle();
+      HazardRec->RecedeCycle();
     }
   }
   // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
@@ -511,7 +609,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
   // currently need to treat these nodes like real instructions.
   // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
 
-  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+  unsigned ReadyCycle = SU->getHeight();
 
   // Bump CurCycle to account for latency. We assume the latency of other
   // available instructions may be hidden by the stall (not a full pipe stall).
@@ -522,7 +620,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
   // Calls are scheduled in their preceding cycle, so don't conflict with
   // hazards from instructions after the call. EmitNode will reset the
   // scoreboard state before emitting the call.
-  if (isBottomUp && SU->isCall)
+  if (SU->isCall)
     return;
 
   // FIXME: For resource conflicts in very long non-pipelined stages, we
@@ -530,7 +628,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
   int Stalls = 0;
   while (true) {
     ScheduleHazardRecognizer::HazardType HT =
-      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+      HazardRec->getHazardType(SU, -Stalls);
 
     if (HT == ScheduleHazardRecognizer::NoHazard)
       break;
@@ -568,17 +666,13 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
     HazardRec->Reset();
     return;
   }
-  if (isBottomUp && SU->isCall) {
+  if (SU->isCall) {
     // Calls are scheduled with their preceding instructions. For bottom-up
     // scheduling, clear the pipeline state before emitting.
     HazardRec->Reset();
   }
 
   HazardRec->EmitInstruction(SU);
-
-  if (!isBottomUp && SU->isCall) {
-    HazardRec->Reset();
-  }
 }
 
 static void resetVRegCycle(SUnit *SU);
@@ -607,7 +701,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
 
   Sequence.push_back(SU);
 
-  AvailableQueue->ScheduledNode(SU);
+  AvailableQueue->scheduledNode(SU);
 
   // If HazardRec is disabled, and each inst counts as one cycle, then
   // advance CurCycle before ReleasePredecessors to avoid useless pushes to
@@ -630,6 +724,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
       LiveRegGens[I->getReg()] = NULL;
     }
   }
+  // Release the special call resource dependence, if this is the beginning
+  // of a call.
+  unsigned CallResource = TRI->getNumRegs();
+  if (LiveRegDefs[CallResource] == SU)
+    for (const SDNode *SUNode = SU->getNode(); SUNode;
+         SUNode = SUNode->getGluedNode()) {
+      if (SUNode->isMachineOpcode() &&
+          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+        --NumLiveRegs;
+        LiveRegDefs[CallResource] = NULL;
+        LiveRegGens[CallResource] = NULL;
+      }
+    }
 
   resetVRegCycle(SU);
 
@@ -686,15 +794,41 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
     }
   }
 
+  // Reclaim the special call resource dependence, if this is the beginning
+  // of a call.
+  unsigned CallResource = TRI->getNumRegs();
+  for (const SDNode *SUNode = SU->getNode(); SUNode;
+       SUNode = SUNode->getGluedNode()) {
+    if (SUNode->isMachineOpcode() &&
+        SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
+      ++NumLiveRegs;
+      LiveRegDefs[CallResource] = SU;
+      LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+    }
+  }
+
+  // Release the special call resource dependence, if this is the end
+  // of a call.
+  if (LiveRegGens[CallResource] == SU)
+    for (const SDNode *SUNode = SU->getNode(); SUNode;
+         SUNode = SUNode->getGluedNode()) {
+      if (SUNode->isMachineOpcode() &&
+          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+        --NumLiveRegs;
+        LiveRegDefs[CallResource] = NULL;
+        LiveRegGens[CallResource] = NULL;
+      }
+    }
+
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
        I != E; ++I) {
     if (I->isAssignedRegDep()) {
+      if (!LiveRegDefs[I->getReg()])
+        ++NumLiveRegs;
       // This becomes the nearest def. Note that an earlier def may still be
       // pending if this is a two-address node.
       LiveRegDefs[I->getReg()] = SU;
-      if (!LiveRegDefs[I->getReg()]) {
-        ++NumLiveRegs;
-      }
       if (LiveRegGens[I->getReg()] == NULL ||
           I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
         LiveRegGens[I->getReg()] = I->getSUnit();
@@ -714,7 +848,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
   else {
     AvailableQueue->push(SU);
   }
-  AvailableQueue->UnscheduledNode(SU);
+  AvailableQueue->unscheduledNode(SU);
 }
 
 /// After backtracking, the hazard checker needs to be restored to a state
@@ -805,6 +939,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
       return NULL;
 
+    // unfolding an x86 DEC64m operation results in store, dec, load which
+    // can't be handled here so quit
+    if (NewNodes.size() == 3)
+      return NULL;
+
     DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
     assert(NewNodes.size() == 2 && "Expected a load folding node!");
 
@@ -830,7 +969,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
       LoadNode->setNodeId(LoadSU->NodeNum);
 
       InitNumRegDefsLeft(LoadSU);
-      ComputeLatency(LoadSU);
+      computeLatency(LoadSU);
     }
 
     SUnit *NewSU = CreateNewSUnit(N);
@@ -848,7 +987,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
       NewSU->isCommutable = true;
 
     InitNumRegDefsLeft(NewSU);
-    ComputeLatency(NewSU);
+    computeLatency(NewSU);
 
     // Record all the edges to and from the old SU, by category.
     SmallVector<SDep, 4> ChainPreds;
@@ -1027,7 +1166,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
   assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
   unsigned NumRes = MCID.getNumDefs();
-  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+  for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
     if (Reg == *ImpDef)
       break;
     ++NumRes;
@@ -1042,7 +1181,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
                                SmallSet<unsigned, 4> &RegAdded,
                                SmallVector<unsigned, 4> &LRegs,
                                const TargetRegisterInfo *TRI) {
-  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+  for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
 
     // Check if Ref is live.
     if (!LiveRegDefs[*AliasI]) continue;
@@ -1057,6 +1196,31 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
   }
 }
 
+/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
+/// by RegMask, and add them to LRegs.
+static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
+                                     std::vector<SUnit*> &LiveRegDefs,
+                                     SmallSet<unsigned, 4> &RegAdded,
+                                     SmallVector<unsigned, 4> &LRegs) {
+  // Look at all live registers. Skip Reg0 and the special CallResource.
+  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
+    if (!LiveRegDefs[i]) continue;
+    if (LiveRegDefs[i] == SU) continue;
+    if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
+    if (RegAdded.insert(i))
+      LRegs.push_back(i);
+  }
+}
+
+/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
+static const uint32_t *getNodeRegMask(const SDNode *N) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (const RegisterMaskSDNode *Op =
+        dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
+      return Op->getRegMask();
+  return NULL;
+}
+
 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
 /// scheduling of the given node to satisfy live physical register dependencies.
 /// If the specific node is the last one that's available to schedule, do
@@ -1108,10 +1272,27 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
 
     if (!Node->isMachineOpcode())
       continue;
+    // If we're in the middle of scheduling a call, don't begin scheduling
+    // another call. Also, don't allow any physical registers to be live across
+    // the call.
+    if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
+      // Check the special calling-sequence resource.
+      unsigned CallResource = TRI->getNumRegs();
+      if (LiveRegDefs[CallResource]) {
+        SDNode *Gen = LiveRegGens[CallResource]->getNode();
+        while (SDNode *Glued = Gen->getGluedNode())
+          Gen = Glued;
+        if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource))
+          LRegs.push_back(CallResource);
+      }
+    }
+    if (const uint32_t *RegMask = getNodeRegMask(Node))
+      CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+
     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
     if (!MCID.ImplicitDefs)
       continue;
-    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
+    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
   }
 
@@ -1300,99 +1481,10 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
   std::reverse(Sequence.begin(), Sequence.end());
 
 #ifndef NDEBUG
-  VerifySchedule(isBottomUp);
-#endif
-}
-
-//===----------------------------------------------------------------------===//
-//  Top-Down Scheduling
-//===----------------------------------------------------------------------===//
-
-/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
-/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
-void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
-  SUnit *SuccSU = SuccEdge->getSUnit();
-
-#ifndef NDEBUG
-  if (SuccSU->NumPredsLeft == 0) {
-    dbgs() << "*** Scheduling failed! ***\n";
-    SuccSU->dump(this);
-    dbgs() << " has been released too many times!\n";
-    llvm_unreachable(0);
-  }
-#endif
-  --SuccSU->NumPredsLeft;
-
-  // If all the node's predecessors are scheduled, this node is ready
-  // to be scheduled. Ignore the special ExitSU node.
-  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
-    SuccSU->isAvailable = true;
-    AvailableQueue->push(SuccSU);
-  }
-}
-
-void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
-  // Top down: release successors
-  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    assert(!I->isAssignedRegDep() &&
-           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
-
-    ReleaseSucc(SU, &*I);
-  }
-}
-
-/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
-/// count of its successors. If a successor pending count is zero, add it to
-/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
-  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
-  DEBUG(SU->dump(this));
-
-  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
-  SU->setDepthToAtLeast(CurCycle);
-  Sequence.push_back(SU);
-
-  ReleaseSuccessors(SU);
-  SU->isScheduled = true;
-  AvailableQueue->ScheduledNode(SU);
-}
-
-/// ListScheduleTopDown - The main loop of list scheduling for top-down
-/// schedulers.
-void ScheduleDAGRRList::ListScheduleTopDown() {
-  AvailableQueue->setCurCycle(CurCycle);
-
-  // Release any successors of the special Entry node.
-  ReleaseSuccessors(&EntrySU);
-
-  // All leaves to Available queue.
-  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
-    // It is available if it has no predecessors.
-    if (SUnits[i].Preds.empty()) {
-      AvailableQueue->push(&SUnits[i]);
-      SUnits[i].isAvailable = true;
-    }
-  }
-
-  // While Available queue is not empty, grab the node with the highest
-  // priority. If it is not ready put it back.  Schedule the node.
-  Sequence.reserve(SUnits.size());
-  while (!AvailableQueue->empty()) {
-    SUnit *CurSU = AvailableQueue->pop();
-
-    if (CurSU)
-      ScheduleNodeTopDown(CurSU);
-    ++CurCycle;
-    AvailableQueue->setCurCycle(CurCycle);
-  }
-
-#ifndef NDEBUG
-  VerifySchedule(isBottomUp);
+  VerifyScheduledSequence(/*isBottomUp=*/true);
 #endif
 }
 
-
 //===----------------------------------------------------------------------===//
 //                RegReductionPriorityQueue Definition
 //===----------------------------------------------------------------------===//
@@ -1437,21 +1529,6 @@ struct bu_ls_rr_sort : public queue_sort {
   bool operator()(SUnit* left, SUnit* right) const;
 };
 
-// td_ls_rr_sort - Priority function for top down register pressure reduction
-// scheduler.
-struct td_ls_rr_sort : public queue_sort {
-  enum {
-    IsBottomUp = false,
-    HasReadyFilter = false
-  };
-
-  RegReductionPQBase *SPQ;
-  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
-  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
-  bool operator()(const SUnit* left, const SUnit* right) const;
-};
-
 // src_ls_rr_sort - Priority function for source order scheduler.
 struct src_ls_rr_sort : public queue_sort {
   enum {
@@ -1510,6 +1587,7 @@ protected:
   std::vector<SUnit*> Queue;
   unsigned CurQueueId;
   bool TracksRegPressure;
+  bool SrcOrder;
 
   // SUnits - The SUnits for the current graph.
   std::vector<SUnit> *SUnits;
@@ -1535,11 +1613,12 @@ public:
   RegReductionPQBase(MachineFunction &mf,
                      bool hasReadyFilter,
                      bool tracksrp,
+                     bool srcorder,
                      const TargetInstrInfo *tii,
                      const TargetRegisterInfo *tri,
                      const TargetLowering *tli)
     : SchedulingPriorityQueue(hasReadyFilter),
-      CurQueueId(0), TracksRegPressure(tracksrp),
+      CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
       MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
     if (TracksRegPressure) {
       unsigned NumRC = TRI->getNumRegClasses();
@@ -1610,9 +1689,9 @@ public:
 
   int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
 
-  void ScheduledNode(SUnit *SU);
+  void scheduledNode(SUnit *SU);
 
-  void UnscheduledNode(SUnit *SU);
+  void unscheduledNode(SUnit *SU);
 
 protected:
   bool canClobber(const SUnit *SU, const SUnit *Op);
@@ -1654,10 +1733,12 @@ class RegReductionPriorityQueue : public RegReductionPQBase {
 public:
   RegReductionPriorityQueue(MachineFunction &mf,
                             bool tracksrp,
+                            bool srcorder,
                             const TargetInstrInfo *tii,
                             const TargetRegisterInfo *tri,
                             const TargetLowering *tli)
-    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
+                         tii, tri, tli),
       Picker(this) {}
 
   bool isBottomUp() const { return SF::IsBottomUp; }
@@ -1680,10 +1761,7 @@ public:
     SF DumpPicker = Picker;
     while (!DumpQueue.empty()) {
       SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
-      if (isBottomUp())
-        dbgs() << "Height " << SU->getHeight() << ": ";
-      else
-        dbgs() << "Depth " << SU->getDepth() << ": ";
+      dbgs() << "Height " << SU->getHeight() << ": ";
       SU->dump(DAG);
     }
   }
@@ -1692,9 +1770,6 @@ public:
 typedef RegReductionPriorityQueue<bu_ls_rr_sort>
 BURegReductionPriorityQueue;
 
-typedef RegReductionPriorityQueue<td_ls_rr_sort>
-TDRegReductionPriorityQueue;
-
 typedef RegReductionPriorityQueue<src_ls_rr_sort>
 SrcRegReductionPriorityQueue;
 
@@ -1919,7 +1994,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
   return PDiff;
 }
 
-void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+void RegReductionPQBase::scheduledNode(SUnit *SU) {
   if (!TracksRegPressure)
     return;
 
@@ -1988,7 +2063,7 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
   dumpRegPressure();
 }
 
-void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+void RegReductionPQBase::unscheduledNode(SUnit *SU) {
   if (!TracksRegPressure)
     return;
 
@@ -2235,37 +2310,29 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
   int LHeight = (int)left->getHeight() + LPenalty;
   int RHeight = (int)right->getHeight() + RPenalty;
 
-  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
     BUHasStall(left, LHeight, SPQ);
-  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
     BUHasStall(right, RHeight, SPQ);
 
   // If scheduling one of the node will cause a pipeline stall, delay it.
   // If scheduling either one of the node will cause a pipeline stall, sort
   // them according to their height.
   if (LStall) {
-    if (!RStall) {
-      DEBUG(++FactorCount[FactStall]);
+    if (!RStall)
       return 1;
-    }
-    if (LHeight != RHeight) {
-      DEBUG(++FactorCount[FactStall]);
+    if (LHeight != RHeight)
       return LHeight > RHeight ? 1 : -1;
-    }
-  } else if (RStall) {
-    DEBUG(++FactorCount[FactStall]);
+  } else if (RStall)
     return -1;
-  }
 
   // If either node is scheduling for latency, sort them by height/depth
   // and latency.
-  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
-                     right->SchedulingPref == Sched::Latency)) {
+  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
+                     right->SchedulingPref == Sched::ILP)) {
     if (DisableSchedCycles) {
-      if (LHeight != RHeight) {
-        DEBUG(++FactorCount[FactHeight]);
+      if (LHeight != RHeight)
         return LHeight > RHeight ? 1 : -1;
-      }
     }
     else {
       // If neither instruction stalls (!LStall && !RStall) then
@@ -2274,17 +2341,14 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
       int LDepth = left->getDepth() - LPenalty;
       int RDepth = right->getDepth() - RPenalty;
       if (LDepth != RDepth) {
-        DEBUG(++FactorCount[FactDepth]);
         DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
               << ") depth " << LDepth << " vs SU (" << right->NodeNum
               << ") depth " << RDepth << "\n");
         return LDepth < RDepth ? 1 : -1;
       }
     }
-    if (left->Latency != right->Latency) {
-      DEBUG(++FactorCount[FactOther]);
+    if (left->Latency != right->Latency)
       return left->Latency > right->Latency ? 1 : -1;
-    }
   }
   return 0;
 }
@@ -2298,7 +2362,6 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
     bool LHasPhysReg = left->hasPhysRegDefs;
     bool RHasPhysReg = right->hasPhysRegDefs;
     if (LHasPhysReg != RHasPhysReg) {
-      DEBUG(++FactorCount[FactRegUses]);
       #ifndef NDEBUG
       const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"};
       #endif
@@ -2324,10 +2387,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
     LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
   }
 
-  if (LPriority != RPriority) {
-    DEBUG(++FactorCount[FactStatic]);
+  if (LPriority != RPriority)
     return LPriority > RPriority;
-  }
 
   // One or both of the nodes are calls and their sethi-ullman numbers are the
   // same, then keep source order.
@@ -2360,18 +2421,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
   // This creates more short live intervals.
   unsigned LDist = closestSucc(left);
   unsigned RDist = closestSucc(right);
-  if (LDist != RDist) {
-    DEBUG(++FactorCount[FactOther]);
+  if (LDist != RDist)
     return LDist < RDist;
-  }
 
   // How many registers becomes live when the node is scheduled.
   unsigned LScratch = calcMaxScratches(left);
   unsigned RScratch = calcMaxScratches(right);
-  if (LScratch != RScratch) {
-    DEBUG(++FactorCount[FactOther]);
+  if (LScratch != RScratch)
     return LScratch > RScratch;
-  }
 
   // Comparing latency against a call makes little sense unless the node
   // is register pressure-neutral.
@@ -2386,20 +2443,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
       return result > 0;
   }
   else {
-    if (left->getHeight() != right->getHeight()) {
-      DEBUG(++FactorCount[FactHeight]);
+    if (left->getHeight() != right->getHeight())
       return left->getHeight() > right->getHeight();
-    }
 
-    if (left->getDepth() != right->getDepth()) {
-      DEBUG(++FactorCount[FactDepth]);
+    if (left->getDepth() != right->getDepth())
       return left->getDepth() < right->getDepth();
-    }
   }
 
   assert(left->NodeQueueId && right->NodeQueueId &&
          "NodeQueueId cannot be zero");
-  DEBUG(++FactorCount[FactOther]);
   return (left->NodeQueueId > right->NodeQueueId);
 }
 
@@ -2459,13 +2511,11 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   // Avoid causing spills. If register pressure is high, schedule for
   // register pressure reduction.
   if (LHigh && !RHigh) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
           << right->NodeNum << ")\n");
     return true;
   }
   else if (!LHigh && RHigh) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
           << left->NodeNum << ")\n");
     return false;
@@ -2529,7 +2579,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
     RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
   }
   if (!DisableSchedRegPressure && LPDiff != RPDiff) {
-    DEBUG(++FactorCount[FactPressureDiff]);
     DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
           << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
     return LPDiff > RPDiff;
@@ -2538,7 +2587,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
     bool LReduce = canEnableCoalescing(left);
     bool RReduce = canEnableCoalescing(right);
-    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
     if (LReduce && !RReduce) return false;
     if (RReduce && !LReduce) return true;
   }
@@ -2546,17 +2594,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
   if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
     DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
           << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
-    DEBUG(++FactorCount[FactRegUses]);
     return LLiveUses < RLiveUses;
   }
 
   if (!DisableSchedStalls) {
     bool LStall = BUHasStall(left, left->getHeight(), SPQ);
     bool RStall = BUHasStall(right, right->getHeight(), SPQ);
-    if (LStall != RStall) {
-      DEBUG(++FactorCount[FactHeight]);
+    if (LStall != RStall)
       return left->getHeight() > right->getHeight();
-    }
   }
 
   if (!DisableSchedCriticalPath) {
@@ -2565,17 +2610,14 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
       DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
             << left->getDepth() << " != SU(" << right->NodeNum << "): "
             << right->getDepth() << "\n");
-      DEBUG(++FactorCount[FactDepth]);
       return left->getDepth() < right->getDepth();
     }
   }
 
   if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
     int spread = (int)left->getHeight() - (int)right->getHeight();
-    if (std::abs(spread) > MaxReorderWindow) {
-      DEBUG(++FactorCount[FactHeight]);
+    if (std::abs(spread) > MaxReorderWindow)
       return left->getHeight() > right->getHeight();
-    }
   }
 
   return BURRSort(left, right, SPQ);
@@ -2584,9 +2626,10 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
   SUnits = &sunits;
   // Add pseudo dependency edges for two-address nodes.
-  AddPseudoTwoAddrDeps();
+  if (!Disable2AddrHack)
+    AddPseudoTwoAddrDeps();
   // Reroute edges to nodes with multiple uses.
-  if (!TracksRegPressure)
+  if (!TracksRegPressure && !SrcOrder)
     PrescheduleNodesWithMultipleUses();
   // Calculate node priorities.
   CalculateSethiUllmanNumbers();
@@ -2628,9 +2671,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
                                          ScheduleDAGRRList *scheduleDAG,
                                          const TargetInstrInfo *TII,
                                          const TargetRegisterInfo *TRI) {
-  const unsigned *ImpDefs
+  const uint16_t *ImpDefs
     = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
-  if(!ImpDefs)
+  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
+  if(!ImpDefs && !RegMask)
     return false;
 
   for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
@@ -2641,14 +2685,18 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
       if (!PI->isAssignedRegDep())
         continue;
 
-      for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) {
-        // Return true if SU clobbers this physical register use and the
-        // definition of the register reaches from DepSU. IsReachable queries a
-        // topological forward sort of the DAG (following the successors).
-        if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
-            scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
-          return true;
-      }
+      if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
+          scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+        return true;
+
+      if (ImpDefs)
+        for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+          // Return true if SU clobbers this physical register use and the
+          // definition of the register reaches from DepSU. IsReachable queries
+          // a topological forward sort of the DAG (following the successors).
+          if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
+              scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
+            return true;
     }
   }
   return false;
@@ -2661,16 +2709,17 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
                                   const TargetRegisterInfo *TRI) {
   SDNode *N = SuccSU->getNode();
   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+  const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
   assert(ImpDefs && "Caller should check hasPhysRegDefs");
   for (const SDNode *SUNode = SU->getNode(); SUNode;
        SUNode = SUNode->getGluedNode()) {
     if (!SUNode->isMachineOpcode())
       continue;
-    const unsigned *SUImpDefs =
+    const uint16_t *SUImpDefs =
       TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
-    if (!SUImpDefs)
-      return false;
+    const uint32_t *SURegMask = getNodeRegMask(SUNode);
+    if (!SUImpDefs && !SURegMask)
+      continue;
     for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
       EVT VT = N->getValueType(i);
       if (VT == MVT::Glue || VT == MVT::Other)
@@ -2678,6 +2727,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
       if (!N->hasAnyUseOfValue(i))
         continue;
       unsigned Reg = ImpDefs[i - NumDefs];
+      if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
+        return true;
+      if (!SUImpDefs)
+        continue;
       for (;*SUImpDefs; ++SUImpDefs) {
         unsigned SUReg = *SUImpDefs;
         if (TRI->regsOverlap(Reg, SUReg))
@@ -2887,69 +2940,6 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
   }
 }
 
-/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
-/// predecessors of the successors of the SUnit SU. Stop when the provided
-/// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
-                                                    unsigned Limit) {
-  unsigned Sum = 0;
-  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
-       I != E; ++I) {
-    const SUnit *SuccSU = I->getSUnit();
-    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
-         EE = SuccSU->Preds.end(); II != EE; ++II) {
-      SUnit *PredSU = II->getSUnit();
-      if (!PredSU->isScheduled)
-        if (++Sum > Limit)
-          return Sum;
-    }
-  }
-  return Sum;
-}
-
-
-// Top down
-bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
-  if (int res = checkSpecialNodes(left, right))
-    return res < 0;
-
-  unsigned LPriority = SPQ->getNodePriority(left);
-  unsigned RPriority = SPQ->getNodePriority(right);
-  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
-  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
-  bool LIsFloater = LIsTarget && left->NumPreds == 0;
-  bool RIsFloater = RIsTarget && right->NumPreds == 0;
-  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
-  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
-
-  if (left->NumSuccs == 0 && right->NumSuccs != 0)
-    return false;
-  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
-    return true;
-
-  if (LIsFloater)
-    LBonus -= 2;
-  if (RIsFloater)
-    RBonus -= 2;
-  if (left->NumSuccs == 1)
-    LBonus += 2;
-  if (right->NumSuccs == 1)
-    RBonus += 2;
-
-  if (LPriority+LBonus != RPriority+RBonus)
-    return LPriority+LBonus < RPriority+RBonus;
-
-  if (left->getDepth() != right->getDepth())
-    return left->getDepth() < right->getDepth();
-
-  if (left->NumSuccsLeft != right->NumSuccsLeft)
-    return left->NumSuccsLeft > right->NumSuccsLeft;
-
-  assert(left->NodeQueueId && right->NodeQueueId &&
-         "NodeQueueId cannot be zero");
-  return (left->NodeQueueId > right->NodeQueueId);
-}
-
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
@@ -2962,21 +2952,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
 
   BURegReductionPriorityQueue *PQ =
-    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
-  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
-  PQ->setScheduleDAG(SD);
-  return SD;
-}
-
-llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
-                                 CodeGenOpt::Level OptLevel) {
-  const TargetMachine &TM = IS->TM;
-  const TargetInstrInfo *TII = TM.getInstrInfo();
-  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
-  TDRegReductionPriorityQueue *PQ =
-    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+    new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0);
   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
   return SD;
@@ -2990,7 +2966,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
   const TargetRegisterInfo *TRI = TM.getRegisterInfo();
 
   SrcRegReductionPriorityQueue *PQ =
-    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+    new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0);
   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
   return SD;
@@ -3005,7 +2981,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
   const TargetLowering *TLI = &IS->getTargetLowering();
 
   HybridBURRPriorityQueue *PQ =
-    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+    new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
 
   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
@@ -3021,7 +2997,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
   const TargetLowering *TLI = &IS->getTargetLowering();
 
   ILPBURRPriorityQueue *PQ =
-    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+    new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
   ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
   PQ->setScheduleDAG(SD);
   return SD;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 71f07d6fa47a..69dd813b24e0 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -17,6 +17,8 @@
 #include "ScheduleDAGSDNodes.h"
 #include "InstrEmitter.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -44,20 +46,26 @@ static cl::opt<int> HighLatencyCycles(
            "instructions take for targets with no itinerary"));
 
 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
-  : ScheduleDAG(mf),
+  : ScheduleDAG(mf), BB(0), DAG(0),
     InstrItins(mf.getTarget().getInstrItineraryData()) {}
 
 /// Run - perform scheduling.
 ///
-void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
-                             MachineBasicBlock::iterator insertPos) {
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
+  BB = bb;
   DAG = dag;
-  ScheduleDAG::Run(bb, insertPos);
+
+  // Clear the scheduler's SUnit DAG.
+  ScheduleDAG::clearDAG();
+  Sequence.clear();
+
+  // Invoke the target's selection of scheduler.
+  Schedule();
 }
 
 /// NewSUnit - Creates a new SUnit and return a ptr to it.
 ///
-SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
 #ifndef NDEBUG
   const SUnit *Addr = 0;
   if (!SUnits.empty())
@@ -79,7 +87,7 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
 }
 
 SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
-  SUnit *SU = NewSUnit(Old->getNode());
+  SUnit *SU = newSUnit(Old->getNode());
   SU->OrigNode = Old->OrigNode;
   SU->Latency = Old->Latency;
   SU->isVRegCycle = Old->isVRegCycle;
@@ -302,7 +310,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     // If this node has already been processed, stop now.
     if (NI->getNodeId() != -1) continue;
 
-    SUnit *NodeSUnit = NewSUnit(NI);
+    SUnit *NodeSUnit = newSUnit(NI);
 
     // See if anything is glued to this node, if so, add them to glued
     // nodes.  Nodes can have at most one glue input and one glue output.  Glue
@@ -360,7 +368,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
     InitNumRegDefsLeft(NodeSUnit);
 
     // Assign the Latency field of NodeSUnit using target-provided information.
-    ComputeLatency(NodeSUnit);
+    computeLatency(NodeSUnit);
   }
 
   // Find all call operands.
@@ -382,7 +390,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
   const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 
   // Check to see if the scheduler cares about latencies.
-  bool UnitLatencies = ForceUnitLatencies();
+  bool UnitLatencies = forceUnitLatencies();
 
   // Pass 2: add the preds, succs, etc.
   for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
@@ -448,7 +456,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                                OpLatency, PhysReg);
         if (!isChain && !UnitLatencies) {
-          ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+          computeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
           ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
         }
 
@@ -541,7 +549,7 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
   }
 }
 
-void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+void ScheduleDAGSDNodes::computeLatency(SUnit *SU) {
   SDNode *N = SU->getNode();
 
   // TokenFactor operands are considered zero latency, and some schedulers
@@ -553,7 +561,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   }
 
   // Check to see if the scheduler cares about latencies.
-  if (ForceUnitLatencies()) {
+  if (forceUnitLatencies()) {
     SU->Latency = 1;
     return;
   }
@@ -575,10 +583,10 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
       SU->Latency += TII->getInstrLatency(InstrItins, N);
 }
 
-void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
                                                unsigned OpIdx, SDep& dep) const{
   // Check to see if the scheduler cares about latencies.
-  if (ForceUnitLatencies())
+  if (forceUnitLatencies())
     return;
 
   if (dep.getKind() != SDep::Data)
@@ -621,6 +629,30 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
   }
 }
 
+void ScheduleDAGSDNodes::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(this);
+    else
+      dbgs() << "**** NOOP ****\n";
+  }
+}
+
+#ifndef NDEBUG
+/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that
+/// their state is consistent with the nodes listed in Sequence.
+///
+void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) {
+  unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp);
+  unsigned Noops = 0;
+  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+    if (!Sequence[i])
+      ++Noops;
+  assert(Sequence.size() - Noops == ScheduledNodes &&
+         "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif // NDEBUG
+
 namespace {
   struct OrderSorter {
     bool operator()(const std::pair<unsigned, MachineInstr*> &A,
@@ -686,9 +718,48 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
   ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
 }
 
+void ScheduleDAGSDNodes::
+EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+                MachineBasicBlock::iterator InsertPos) {
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    if (I->getSUnit()->CopyDstRC) {
+      // Copy to physical register.
+      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+      // Find the destination physical register.
+      unsigned Reg = 0;
+      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+             EE = SU->Succs.end(); II != EE; ++II) {
+        if (II->isCtrl()) continue;  // ignore chain preds
+        if (II->getReg()) {
+          Reg = II->getReg();
+          break;
+        }
+      }
+      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+        .addReg(VRI->second);
+    } else {
+      // Copy from physical register.
+      assert(I->getReg() && "Unknown physical register!");
+      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+      (void)isNew; // Silence compiler warning.
+      assert(isNew && "Node emitted out of order - early");
+      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+        .addReg(I->getReg());
+    }
+    break;
+  }
+}
 
-/// EmitSchedule - Emit the machine code in scheduled order.
-MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+/// EmitSchedule - Emit the machine code in scheduled order. Return the new
+/// InsertPos and MachineBasicBlock that contains this insertion
+/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does
+/// not necessarily refer to returned BB. The emitter may split blocks.
+MachineBasicBlock *ScheduleDAGSDNodes::
+EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
   InstrEmitter Emitter(BB, InsertPos);
   DenseMap<SDValue, unsigned> VRBaseMap;
   DenseMap<SUnit*, unsigned> CopyVRBaseMap;
@@ -711,7 +782,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     SUnit *SU = Sequence[i];
     if (!SU) {
       // Null SUnit* is a noop.
-      EmitNoop();
+      TII->insertNoop(*Emitter.getBlock(), InsertPos);
       continue;
     }
 
@@ -719,7 +790,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     // SDNode and any glued SDNodes and append them to the block.
     if (!SU->getNode()) {
       // Emit a copy.
-      EmitPhysRegCopy(SU, CopyVRBaseMap);
+      EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos);
       continue;
     }
 
@@ -784,19 +855,24 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
     }
     // Add trailing DbgValue's before the terminator. FIXME: May want to add
     // some of them before one or more conditional branches?
+    SmallVector<MachineInstr*, 8> DbgMIs;
     while (DI != DE) {
-      MachineBasicBlock *InsertBB = Emitter.getBlock();
-      MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
-      if (!(*DI)->isInvalidated()) {
-        MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
-        if (DbgMI)
-          InsertBB->insert(Pos, DbgMI);
-      }
+      if (!(*DI)->isInvalidated())
+        if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+          DbgMIs.push_back(DbgMI);
       ++DI;
     }
+
+    MachineBasicBlock *InsertBB = Emitter.getBlock();
+    MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator();
+    InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end());
   }
 
-  BB = Emitter.getBlock();
   InsertPos = Emitter.getInsertPos();
-  return BB;
+  return Emitter.getBlock();
+}
+
+/// Return the basic block label.
+std::string ScheduleDAGSDNodes::getDAGName() const {
+  return "sunit-dag." + BB->getFullName();
 }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 9c27b2ea02ec..75940ec33ddc 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -35,17 +35,20 @@ namespace llvm {
   ///
   class ScheduleDAGSDNodes : public ScheduleDAG {
   public:
+    MachineBasicBlock *BB;
     SelectionDAG *DAG;                    // DAG of the current basic block
     const InstrItineraryData *InstrItins;
 
+    /// The schedule. Null SUnit*'s represent noop instructions.
+    std::vector<SUnit*> Sequence;
+
     explicit ScheduleDAGSDNodes(MachineFunction &mf);
 
     virtual ~ScheduleDAGSDNodes() {}
 
     /// Run - perform scheduling.
     ///
-    void Run(SelectionDAG *dag, MachineBasicBlock *bb,
-             MachineBasicBlock::iterator insertPos);
+    void Run(SelectionDAG *dag, MachineBasicBlock *bb);
 
     /// isPassiveNode - Return true if the node is a non-scheduled leaf.
     ///
@@ -53,6 +56,7 @@ namespace llvm {
       if (isa<ConstantSDNode>(Node))       return true;
       if (isa<ConstantFPSDNode>(Node))     return true;
       if (isa<RegisterSDNode>(Node))       return true;
+      if (isa<RegisterMaskSDNode>(Node))   return true;
       if (isa<GlobalAddressSDNode>(Node))  return true;
       if (isa<BasicBlockSDNode>(Node))     return true;
       if (isa<FrameIndexSDNode>(Node))     return true;
@@ -67,7 +71,7 @@ namespace llvm {
 
     /// NewSUnit - Creates a new SUnit and return a ptr to it.
     ///
-    SUnit *NewSUnit(SDNode *N);
+    SUnit *newSUnit(SDNode *N);
 
     /// Clone - Creates a clone of the specified SUnit. It does not copy the
     /// predecessors / successors info nor the temporary scheduling states.
@@ -78,7 +82,7 @@ namespace llvm {
     /// are input.  This SUnit graph is similar to the SelectionDAG, but
     /// excludes nodes that aren't interesting to scheduling, and represents
     /// flagged together nodes with a single SUnit.
-    virtual void BuildSchedGraph(AliasAnalysis *AA);
+    void BuildSchedGraph(AliasAnalysis *AA);
 
     /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
     /// CopyToReg and its only active data operands are CopyFromReg within a
@@ -90,30 +94,41 @@ namespace llvm {
     ///
     void InitNumRegDefsLeft(SUnit *SU);
 
-    /// ComputeLatency - Compute node latency.
+    /// computeLatency - Compute node latency.
     ///
-    virtual void ComputeLatency(SUnit *SU);
+    virtual void computeLatency(SUnit *SU);
 
-    /// ComputeOperandLatency - Override dependence edge latency using
+    /// computeOperandLatency - Override dependence edge latency using
     /// operand use/def information
     ///
-    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+    virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
                                        SDep& dep) const { }
 
-    virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+    virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
                                        unsigned OpIdx, SDep& dep) const;
 
-    virtual MachineBasicBlock *EmitSchedule();
-
     /// Schedule - Order nodes according to selected style, filling
     /// in the Sequence member.
     ///
     virtual void Schedule() = 0;
 
+    /// VerifyScheduledSequence - Verify that all SUnits are scheduled and
+    /// consistent with the Sequence of scheduled instructions.
+    void VerifyScheduledSequence(bool isBottomUp);
+
+    /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+    /// according to the order specified in Sequence.
+    ///
+    MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
     virtual void dumpNode(const SUnit *SU) const;
 
+    void dumpSchedule() const;
+
     virtual std::string getGraphNodeLabel(const SUnit *SU) const;
 
+    virtual std::string getDAGName() const;
+
     virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
 
     /// RegDefIter - In place iteration over the values defined by an
@@ -159,6 +174,9 @@ namespace llvm {
     /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
     void BuildSchedUnits();
     void AddSchedEdges();
+
+    void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
+                         MachineBasicBlock::iterator InsertPos);
   };
 }
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
new file mode 100644
index 000000000000..c8512914c1e2
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -0,0 +1,276 @@
+//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+  VLIWScheduler("vliw-td", "VLIW scheduler",
+                createVLIWDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGVLIW - The actual DFA list scheduler implementation.  This
+/// supports / top-down scheduling.
+///
+class ScheduleDAGVLIW : public ScheduleDAGSDNodes {
+private:
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  ///
+  SchedulingPriorityQueue *AvailableQueue;
+
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands become available, the instruction is
+  /// added to the AvailableQueue.
+  std::vector<SUnit*> PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  ScheduleHazardRecognizer *HazardRec;
+
+  /// AA - AliasAnalysis for making memory reference queries.
+  AliasAnalysis *AA;
+
+public:
+  ScheduleDAGVLIW(MachineFunction &mf,
+                  AliasAnalysis *aa,
+                  SchedulingPriorityQueue *availqueue)
+    : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+
+    const TargetMachine &tm = mf.getTarget();
+    HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+  }
+
+  ~ScheduleDAGVLIW() {
+    delete HazardRec;
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+private:
+  void releaseSucc(SUnit *SU, const SDep &D);
+  void releaseSuccessors(SUnit *SU);
+  void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void listScheduleTopDown();
+};
+}  // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGVLIW::Schedule() {
+  DEBUG(dbgs()
+        << "********** List Scheduling BB#" << BB->getNumber()
+        << " '" << BB->getName() << "' **********\n");
+
+  // Build the scheduling graph.
+  BuildSchedGraph(AA);
+
+  AvailableQueue->initNodes(SUnits);
+
+  listScheduleTopDown();
+
+  AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
+  SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+
+  SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+  // If all the node's predecessors are scheduled, this node is ready
+  // to be scheduled. Ignore the special ExitSU node.
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+    PendingQueue.push_back(SuccSU);
+  }
+}
+
+void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) {
+  // Top down: release successors.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    assert(!I->isAssignedRegDep() &&
+           "The list-td scheduler doesn't yet support physreg dependencies!");
+
+    releaseSucc(SU, *I);
+  }
+}
+
+/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  Sequence.push_back(SU);
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
+
+  releaseSuccessors(SU);
+  SU->isScheduled = true;
+  AvailableQueue->scheduledNode(SU);
+}
+
+/// listScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGVLIW::listScheduleTopDown() {
+  unsigned CurCycle = 0;
+
+  // Release any successors of the special Entry node.
+  releaseSuccessors(&EntrySU);
+
+  // All leaves to AvailableQueue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.empty()) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+
+  // While AvailableQueue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  std::vector<SUnit*> NotReady;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+    // Check to see if any of the pending instructions are ready to issue.  If
+    // so, add them to the available queue.
+    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+      if (PendingQueue[i]->getDepth() == CurCycle) {
+        AvailableQueue->push(PendingQueue[i]);
+        PendingQueue[i]->isAvailable = true;
+        PendingQueue[i] = PendingQueue.back();
+        PendingQueue.pop_back();
+        --i; --e;
+      }
+      else {
+        assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+      }
+    }
+
+    // If there are no instructions available, don't try to issue anything, and
+    // don't advance the hazard recognizer.
+    if (AvailableQueue->empty()) {
+      // Reset DFA state.
+      AvailableQueue->scheduledNode(0);
+      ++CurCycle;
+      continue;
+    }
+
+    SUnit *FoundSUnit = 0;
+
+    bool HasNoopHazards = false;
+    while (!AvailableQueue->empty()) {
+      SUnit *CurSUnit = AvailableQueue->pop();
+
+      ScheduleHazardRecognizer::HazardType HT =
+        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+      if (HT == ScheduleHazardRecognizer::NoHazard) {
+        FoundSUnit = CurSUnit;
+        break;
+      }
+
+      // Remember if this is a noop hazard.
+      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+      NotReady.push_back(CurSUnit);
+    }
+
+    // Add the nodes that aren't ready back onto the available list.
+    if (!NotReady.empty()) {
+      AvailableQueue->push_all(NotReady);
+      NotReady.clear();
+    }
+
+    // If we found a node to schedule, do it now.
+    if (FoundSUnit) {
+      scheduleNodeTopDown(FoundSUnit, CurCycle);
+      HazardRec->EmitInstruction(FoundSUnit);
+
+      // If this is a pseudo-op node, we don't want to increment the current
+      // cycle.
+      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
+        ++CurCycle;
+    } else if (!HasNoopHazards) {
+      // Otherwise, we have a pipeline stall, but no other problem, just advance
+      // the current cycle and try again.
+      DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+      HazardRec->AdvanceCycle();
+      ++NumStalls;
+      ++CurCycle;
+    } else {
+      // Otherwise, we have no instructions to issue and we have instructions
+      // that will fault if we don't do this right.  This is the case for
+      // processors without pipeline interlocks and other cases.
+      DEBUG(dbgs() << "*** Emitting noop\n");
+      HazardRec->EmitNoop();
+      Sequence.push_back(0);   // NULL here means noop
+      ++NumNoops;
+      ++CurCycle;
+    }
+  }
+
+#ifndef NDEBUG
+  VerifyScheduledSequence(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createVLIWDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS));
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 20bea8e4c9e9..92671d1678c6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -28,7 +28,6 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLowering.h"
@@ -63,6 +62,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
 static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
   switch (VT.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("Unknown FP format");
+  case MVT::f16:     return &APFloat::IEEEhalf;
   case MVT::f32:     return &APFloat::IEEEsingle;
   case MVT::f64:     return &APFloat::IEEEdouble;
   case MVT::f80:     return &APFloat::x87DoubleExtended;
@@ -125,20 +125,29 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
   if (i == e) return false;
 
   // Do not accept build_vectors that aren't all constants or which have non-~0
-  // elements.
+  // elements. We have to be a bit careful here, as the type of the constant
+  // may not be the same as the type of the vector elements due to type
+  // legalization (the elements are promoted to a legal type for the target and
+  // a vector of a type may be legal when the base element type is not).
+  // We only want to check enough bits to cover the vector elements, because
+  // we care if the resultant vector is all ones, not whether the individual
+  // constants are.
   SDValue NotZero = N->getOperand(i);
+  unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
   if (isa<ConstantSDNode>(NotZero)) {
-    if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+    if (cast<ConstantSDNode>(NotZero)->getAPIntValue().countTrailingOnes() <
+        EltSize)
       return false;
   } else if (isa<ConstantFPSDNode>(NotZero)) {
-    if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
-                bitcastToAPInt().isAllOnesValue())
+    if (cast<ConstantFPSDNode>(NotZero)->getValueAPF()
+              .bitcastToAPInt().countTrailingOnes() < EltSize)
       return false;
   } else
     return false;
 
   // Okay, we have at least one ~0 value, check to see if the rest match or are
-  // undefs.
+  // undefs. Even with the above element type twiddling, this should be OK, as
+  // the same type legalization should have applied to all the elements.
   for (++i; i != e; ++i)
     if (N->getOperand(i) != NotZero &&
         N->getOperand(i).getOpcode() != ISD::UNDEF)
@@ -384,7 +393,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::Register:
     ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
     break;
-
+  case ISD::RegisterMask:
+    ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask());
+    break;
   case ISD::SRCVALUE:
     ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
     break;
@@ -475,7 +486,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
 ///
 static inline unsigned
 encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
-                     bool isNonTemporal) {
+                     bool isNonTemporal, bool isInvariant) {
   assert((ConvType & 3) == ConvType &&
          "ConvType may not require more than 2 bits!");
   assert((AM & 7) == AM &&
@@ -483,7 +494,8 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
   return ConvType |
          (AM << 2) |
          (isVolatile << 5) |
-         (isNonTemporal << 6);
+         (isNonTemporal << 6) |
+         (isInvariant << 7);
 }
 
 //===----------------------------------------------------------------------===//
@@ -564,6 +576,12 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
 
 void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
   SmallVector<SDNode*, 16> DeadNodes(1, N);
+
+  // Create a dummy node that adds a reference to the root node, preventing
+  // it from being deleted.  (This matters if the root is an operand of the
+  // dead node.)
+  HandleSDNode Dummy(getRoot());
+
   RemoveDeadNodes(DeadNodes, UpdateListener);
 }
 
@@ -834,9 +852,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
 }
 
 // EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm)
+SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
   : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
-    EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+    OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
     Root(getEntryNode()), Ordering(0) {
   AllNodes.push_back(&EntryNode);
   Ordering = new SDNodeOrdering();
@@ -1025,16 +1043,14 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
     return getConstantFP(APFloat((float)Val), VT, isTarget);
   else if (EltVT==MVT::f64)
     return getConstantFP(APFloat(Val), VT, isTarget);
-  else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+  else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) {
     bool ignored;
     APFloat apf = APFloat(Val);
     apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
                 &ignored);
     return getConstantFP(apf, VT, isTarget);
-  } else {
-    assert(0 && "Unsupported type in getConstantFP");
-    return SDValue();
-  }
+  } else
+    llvm_unreachable("Unsupported type in getConstantFP");
 }
 
 SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
@@ -1369,6 +1385,20 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   return SDValue(N, 0);
 }
 
+SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0);
+  ID.AddPointer(RegMask);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
 SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
   FoldingSetNodeID ID;
   SDValue Ops[] = { Root };
@@ -1598,7 +1628,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
 bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
                                      unsigned Depth) const {
   APInt KnownZero, KnownOne;
-  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
   assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
   return (KnownZero & Mask) == Mask;
 }
@@ -1607,15 +1637,12 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
 /// known to be either zero or one and return them in the KnownZero/KnownOne
 /// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
 /// processing.
-void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
-                                     APInt &KnownZero, APInt &KnownOne,
-                                     unsigned Depth) const {
-  unsigned BitWidth = Mask.getBitWidth();
-  assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
-         "Mask size mismatches value type size!");
+void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
+                                     APInt &KnownOne, unsigned Depth) const {
+  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
 
   KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
-  if (Depth == 6 || Mask == 0)
+  if (Depth == 6)
     return;  // Limit search depth.
 
   APInt KnownZero2, KnownOne2;
@@ -1623,14 +1650,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   switch (Op.getOpcode()) {
   case ISD::Constant:
     // We know all of the bits for a constant!
-    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
-    KnownZero = ~KnownOne & Mask;
+    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+    KnownZero = ~KnownOne;
     return;
   case ISD::AND:
     // If either the LHS or the RHS are Zero, the result is zero.
-    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
-                      KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1640,9 +1666,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     KnownZero |= KnownZero2;
     return;
   case ISD::OR:
-    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
-                      KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1652,8 +1677,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     KnownOne |= KnownOne2;
     return;
   case ISD::XOR: {
-    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1665,9 +1690,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::MUL: {
-    APInt Mask2 = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1686,33 +1710,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     LeadZ = std::min(LeadZ, BitWidth);
     KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
                 APInt::getHighBitsSet(BitWidth, LeadZ);
-    KnownZero &= Mask;
     return;
   }
   case ISD::UDIV: {
     // For the purposes of computing leading zeros we can conservatively
     // treat a udiv as a logical right shift by the power of 2 known to
     // be less than the denominator.
-    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(Op.getOperand(0),
-                      AllOnes, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     unsigned LeadZ = KnownZero2.countLeadingOnes();
 
     KnownOne2.clearAllBits();
     KnownZero2.clearAllBits();
-    ComputeMaskedBits(Op.getOperand(1),
-                      AllOnes, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
     unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
     if (RHSUnknownLeadingOnes != BitWidth)
       LeadZ = std::min(BitWidth,
                        LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
 
-    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
     return;
   }
   case ISD::SELECT:
-    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1721,8 +1741,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     KnownZero &= KnownZero2;
     return;
   case ISD::SELECT_CC:
-    ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
-    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -1754,8 +1774,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       if (ShAmt >= BitWidth)
         return;
 
-      ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
-                        KnownZero, KnownOne, Depth+1);
+      ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
       assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero <<= ShAmt;
       KnownOne  <<= ShAmt;
@@ -1772,13 +1791,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       if (ShAmt >= BitWidth)
         return;
 
-      ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
-                        KnownZero, KnownOne, Depth+1);
+      ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
       assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero = KnownZero.lshr(ShAmt);
       KnownOne  = KnownOne.lshr(ShAmt);
 
-      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
       KnownZero |= HighBits;  // High bits known zero.
     }
     return;
@@ -1790,15 +1808,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       if (ShAmt >= BitWidth)
         return;
 
-      APInt InDemandedMask = (Mask << ShAmt);
       // If any of the demanded bits are produced by the sign extension, we also
       // demand the input sign bit.
-      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
-      if (HighBits.getBoolValue())
-        InDemandedMask |= APInt::getSignBit(BitWidth);
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
 
-      ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
-                        Depth+1);
+      ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
       assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
       KnownZero = KnownZero.lshr(ShAmt);
       KnownOne  = KnownOne.lshr(ShAmt);
@@ -1820,10 +1834,10 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 
     // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
-    APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+    APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits);
 
     APInt InSignBit = APInt::getSignBit(EBits);
-    APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+    APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits);
 
     // If the sign extended bits are demanded, we know that the sign
     // bit is demanded.
@@ -1831,8 +1845,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     if (NewBits.getBoolValue())
       InputDemandedBits |= InSignBit;
 
-    ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
-                      KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    KnownOne &= InputDemandedBits;
+    KnownZero &= InputDemandedBits;
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
 
     // If the sign bit of the input is known set or clear, then we know the
@@ -1850,7 +1865,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTLZ:
+  case ISD::CTLZ_ZERO_UNDEF:
   case ISD::CTPOP: {
     unsigned LowBits = Log2_32(BitWidth)+1;
     KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
@@ -1858,22 +1875,23 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     return;
   }
   case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
     if (ISD::isZEXTLoad(Op.getNode())) {
-      LoadSDNode *LD = cast<LoadSDNode>(Op);
       EVT VT = LD->getMemoryVT();
       unsigned MemBits = VT.getScalarType().getSizeInBits();
-      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+    } else if (const MDNode *Ranges = LD->getRanges()) {
+      computeMaskedBitsLoad(*Ranges, KnownZero);
     }
     return;
   }
   case ISD::ZERO_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
-    APInt InMask    = Mask.trunc(InBits);
+    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
-    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
     KnownZero |= NewBits;
@@ -1883,17 +1901,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt InSignBit = APInt::getSignBit(InBits);
-    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
-    APInt InMask = Mask.trunc(InBits);
-
-    // If any of the sign extended bits are demanded, we know that the sign
-    // bit is demanded. Temporarily set this bit in the mask for our callee.
-    if (NewBits.getBoolValue())
-      InMask |= InSignBit;
+    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
 
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
-    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
 
     // Note if the sign bit is known to be zero or one.
     bool SignBitKnownZero = KnownZero.isNegative();
@@ -1901,13 +1913,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     assert(!(SignBitKnownZero && SignBitKnownOne) &&
            "Sign bit can't be known to be both zero and one!");
 
-    // If the sign bit wasn't actually demanded by our caller, we don't
-    // want it set in the KnownZero and KnownOne result values. Reset the
-    // mask and reapply it to the result values.
-    InMask = Mask.trunc(InBits);
-    KnownZero &= InMask;
-    KnownOne  &= InMask;
-
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
 
@@ -1921,10 +1926,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::ANY_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt InMask = Mask.trunc(InBits);
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
-    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
     return;
@@ -1932,10 +1936,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::TRUNCATE: {
     EVT InVT = Op.getOperand(0).getValueType();
     unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt InMask = Mask.zext(InBits);
     KnownZero = KnownZero.zext(InBits);
     KnownOne = KnownOne.zext(InBits);
-    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     KnownZero = KnownZero.trunc(BitWidth);
     KnownOne = KnownOne.trunc(BitWidth);
@@ -1944,9 +1947,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::AssertZext: {
     EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
     APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
-    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
-                      KnownOne, Depth+1);
-    KnownZero |= (~InMask) & Mask;
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    KnownZero |= (~InMask);
     return;
   }
   case ISD::FGETSIGN:
@@ -1963,8 +1965,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
         unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
         // NLZ can't be BitWidth with no sign bit
         APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
-                          Depth+1);
+        ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
 
         // If all of the MaskV bits are known to be zero, then we know the
         // output top bits are zero, because we now know that the output is
@@ -1972,7 +1973,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
         if ((KnownZero2 & MaskV) == MaskV) {
           unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
           // Top bits known zero.
-          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2);
         }
       }
     }
@@ -1983,13 +1984,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
     // Output known-0 bits are known if clear or set in both the low clear bits
     // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
     // low 3 bits clear.
-    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
-                                       BitWidth - Mask.countLeadingZeros());
-    ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
     unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
 
-    ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
     KnownZeroOut = std::min(KnownZeroOut,
                             KnownZero2.countTrailingOnes());
@@ -2013,7 +2012,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       if (RA.isPowerOf2()) {
         APInt LowBits = RA - 1;
         APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
-        ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+        ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
 
         // The low bits of the first operand are unchanged by the srem.
         KnownZero = KnownZero2 & LowBits;
@@ -2028,10 +2027,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
         // the upper bits are all one.
         if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
           KnownOne |= ~LowBits;
-
-        KnownZero &= Mask;
-        KnownOne &= Mask;
-
         assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
       }
     }
@@ -2041,9 +2036,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
       const APInt &RA = Rem->getAPIntValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        APInt Mask2 = LowBits & Mask;
-        KnownZero |= ~LowBits & Mask;
-        ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+        KnownZero |= ~LowBits;
+        ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
         assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
         break;
       }
@@ -2051,16 +2045,13 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 
     // Since the result is less than or equal to either operand, any leading
     // zero bits in either operand must also exist in the result.
-    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
-                      Depth+1);
-    ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
-                      Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
 
     uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
                                 KnownZero2.countLeadingOnes());
     KnownOne.clearAllBits();
-    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
     return;
   }
   case ISD::FrameIndex:
@@ -2080,8 +2071,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_VOID:
     // Allow the target to implement this method for its nodes.
-    TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
-                                       Depth);
+    TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
     return;
   }
 }
@@ -2205,12 +2195,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
       if (CRHS->isAllOnesValue()) {
         APInt KnownZero, KnownOne;
-        APInt Mask = APInt::getAllOnesValue(VTBits);
-        ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+        ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
 
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
-        if ((KnownZero | APInt(VTBits, 1)) == Mask)
+        if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
           return VTBits;
 
         // If we are subtracting one from a positive number, there is no carry
@@ -2221,8 +2210,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
 
     Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
     if (Tmp2 == 1) return 1;
-      return std::min(Tmp, Tmp2)-1;
-    break;
+    return std::min(Tmp, Tmp2)-1;
 
   case ISD::SUB:
     Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
@@ -2232,11 +2220,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
       if (CLHS->isNullValue()) {
         APInt KnownZero, KnownOne;
-        APInt Mask = APInt::getAllOnesValue(VTBits);
-        ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+        ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
         // If the input is known to be 0 or 1, the output is 0/-1, which is all
         // sign bits set.
-        if ((KnownZero | APInt(VTBits, 1)) == Mask)
+        if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
           return VTBits;
 
         // If the input is known to be positive (the sign bit is known clear),
@@ -2251,8 +2238,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     // is, at worst, one more bit than the inputs.
     Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
     if (Tmp == 1) return 1;  // Early out.
-      return std::min(Tmp, Tmp2)-1;
-    break;
+    return std::min(Tmp, Tmp2)-1;
   case ISD::TRUNCATE:
     // FIXME: it's tricky to do anything useful for this, but it is an important
     // case for targets like X86.
@@ -2286,9 +2272,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
   // Finally, if we can prove that the top bits of the result are 0's or 1's,
   // use this information.
   APInt KnownZero, KnownOne;
-  APInt Mask = APInt::getAllOnesValue(VTBits);
-  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
 
+  APInt Mask;
   if (KnownZero.isNegative()) {        // sign bit is 0
     Mask = KnownZero;
   } else if (KnownOne.isNegative()) {  // sign bit is 1;
@@ -2328,7 +2314,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
 
 bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
   // If we're told that NaNs won't happen, assume they won't.
-  if (NoNaNsFPMath)
+  if (getTarget().Options.NoNaNsFPMath)
     return true;
 
   // If the value is a constant, we can obviously see if it is a NaN or not.
@@ -2423,8 +2409,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     case ISD::CTPOP:
       return getConstant(Val.countPopulation(), VT);
     case ISD::CTLZ:
+    case ISD::CTLZ_ZERO_UNDEF:
       return getConstant(Val.countLeadingZeros(), VT);
     case ISD::CTTZ:
+    case ISD::CTTZ_ZERO_UNDEF:
       return getConstant(Val.countTrailingZeros(), VT);
     }
   }
@@ -2440,7 +2428,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
       case ISD::FABS:
         V.clearSign();
         return getConstantFP(V, VT);
-      case ISD::FP_ROUND:
       case ISD::FP_EXTEND: {
         bool ignored;
         // This can return overflow, underflow, or inexact; we don't care.
@@ -2561,17 +2548,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
            "Vector element count mismatch!");
     if (OpOpcode == ISD::TRUNCATE)
       return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
-    else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
-             OpOpcode == ISD::ANY_EXTEND) {
+    if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+        OpOpcode == ISD::ANY_EXTEND) {
       // If the source is smaller than the dest, we still need an extend.
       if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
             .bitsLT(VT.getScalarType()))
         return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
-      else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+      if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
         return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
-      else
-        return Operand.getNode()->getOperand(0);
+      return Operand.getNode()->getOperand(0);
     }
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
     break;
   case ISD::BITCAST:
     // Basic sanity checking.
@@ -2601,7 +2589,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     break;
   case ISD::FNEG:
     // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
-    if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+    if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
       return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
                      Operand.getNode()->getOperand(0));
     if (OpOpcode == ISD::FNEG)  // --X -> X
@@ -2736,7 +2724,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   case ISD::FMUL:
   case ISD::FDIV:
   case ISD::FREM:
-    if (UnsafeFPMath) {
+    if (getTarget().Options.UnsafeFPMath) {
       if (Opcode == ISD::FADD) {
         // 0+x --> x
         if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
@@ -3005,6 +2993,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
       default: break;
       }
     }
+
+    if (Opcode == ISD::FP_ROUND) {
+      APFloat V = N1CFP->getValueAPF();    // make copy
+      bool ignored;
+      // This can return overflow, underflow, or inexact; we don't care.
+      // FIXME need to be more flexible about rounding mode.
+      (void)V.convert(*EVTToAPFloatSemantics(VT),
+                      APFloat::rmNearestTiesToEven, &ignored);
+      return getConstantFP(V, VT);
+    }
   }
 
   // Canonicalize an UNDEF to the RHS, even over a constant.
@@ -3059,7 +3057,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     case ISD::FMUL:
     case ISD::FDIV:
     case ISD::FREM:
-      if (UnsafeFPMath)
+      if (getTarget().Options.UnsafeFPMath)
         return N2;
       break;
     case ISD::MUL:
@@ -3133,16 +3131,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   case ISD::SELECT:
     if (N1C) {
      if (N1C->getZExtValue())
-        return N2;             // select true, X, Y -> X
-      else
-        return N3;             // select false, X, Y -> Y
+       return N2;             // select true, X, Y -> X
+     return N3;             // select false, X, Y -> Y
     }
 
     if (N2 == N3) return N2;   // select C, X, X -> X
     break;
   case ISD::VECTOR_SHUFFLE:
     llvm_unreachable("should use getVectorShuffle constructor!");
-    break;
   case ISD::INSERT_SUBVECTOR: {
     SDValue Index = N3;
     if (VT.isSimple() && N1.getValueType().isSimple()
@@ -3275,8 +3271,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
 /// used when a memcpy is turned into a memset when the source is a constant
 /// string ptr.
 static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
-                                  const TargetLowering &TLI,
-                                  std::string &Str, unsigned Offset) {
+                                  const TargetLowering &TLI, StringRef Str) {
   // Handle vector with all elements zero.
   if (Str.empty()) {
     if (VT.isInteger())
@@ -3294,15 +3289,18 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
   }
 
   assert(!VT.isVector() && "Can't handle vector type here!");
-  unsigned NumBits = VT.getSizeInBits();
-  unsigned MSB = NumBits / 8;
+  unsigned NumVTBytes = VT.getSizeInBits() / 8;
+  unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
+
   uint64_t Val = 0;
-  if (TLI.isLittleEndian())
-    Offset = Offset + MSB - 1;
-  for (unsigned i = 0; i != MSB; ++i) {
-    Val = (Val << 8) | (unsigned char)Str[Offset];
-    Offset += TLI.isLittleEndian() ? -1 : 1;
+  if (TLI.isLittleEndian()) {
+    for (unsigned i = 0; i != NumBytes; ++i)
+      Val |= (uint64_t)(unsigned char)Str[i] << i*8;
+  } else {
+    for (unsigned i = 0; i != NumBytes; ++i)
+      Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
   }
+
   return DAG.getConstant(Val, VT);
 }
 
@@ -3317,7 +3315,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
 
 /// isMemSrcFromString - Returns true if memcpy source is a string constant.
 ///
-static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
   unsigned SrcDelta = 0;
   GlobalAddressSDNode *G = NULL;
   if (Src.getOpcode() == ISD::GlobalAddress)
@@ -3331,11 +3329,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
   if (!G)
     return false;
 
-  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
-  if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
-    return true;
-
-  return false;
+  return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false);
 }
 
 /// FindOptimalMemOpLowering - Determines the optimial series memory ops
@@ -3345,7 +3339,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
 static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      unsigned Limit, uint64_t Size,
                                      unsigned DstAlign, unsigned SrcAlign,
-                                     bool NonScalarIntSafe,
+                                     bool IsZeroVal,
                                      bool MemcpyStrSrc,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
@@ -3359,7 +3353,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
   // not need to be loaded.
   EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
-                                   NonScalarIntSafe, MemcpyStrSrc,
+                                   IsZeroVal, MemcpyStrSrc,
                                    DAG.getMachineFunction());
 
   if (VT == MVT::Other) {
@@ -3438,7 +3432,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   unsigned SrcAlign = DAG.InferPtrAlignment(Src);
   if (Align > SrcAlign)
     SrcAlign = Align;
-  std::string Str;
+  StringRef Str;
   bool CopyFromStr = isMemSrcFromString(Src, Str);
   bool isZeroStr = CopyFromStr && Str.empty();
   unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
@@ -3475,7 +3469,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       // We only handle zero vectors here.
       // FIXME: Handle other cases where store of vector immediate is done in
       // a single instruction.
-      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
       Store = DAG.getStore(Chain, dl, Value,
                            getMemBasePlusOffset(Dst, DstOff, DAG),
                            DstPtrInfo.getWithOffset(DstOff), isVol,
@@ -3562,7 +3556,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
     Value = DAG.getLoad(VT, dl, Chain,
                         getMemBasePlusOffset(Src, SrcOff, DAG),
                         SrcPtrInfo.getWithOffset(SrcOff), isVol,
-                        false, SrcAlign);
+                        false, false, SrcAlign);
     LoadValues.push_back(Value);
     LoadChains.push_back(Value.getValue(1));
     SrcOff += VTSize;
@@ -3606,11 +3600,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
-  bool NonScalarIntSafe =
+  bool IsZeroVal =
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
   if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
                                 Size, (DstAlignCanChange ? 0 : Align), 0,
-                                NonScalarIntSafe, false, DAG, TLI))
+                                IsZeroVal, false, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3717,8 +3711,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
-                    /*isReturnValueUsed=*/false,
+                    TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
                                       TLI.getPointerTy()),
                     Args, *this, dl);
@@ -3769,8 +3764,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
-                    /*isReturnValueUsed=*/false,
+                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
                                       TLI.getPointerTy()),
                     Args, *this, dl);
@@ -3829,8 +3825,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
   std::pair<SDValue,SDValue> CallResult =
     TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
                     false, false, false, false, 0,
-                    TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
-                    /*isReturnValueUsed=*/false,
+                    TLI.getLibcallCallingConv(RTLIB::MEMSET),
+                    /*isTailCall=*/false,
+                    /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
                     getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
                                       TLI.getPointerTy()),
                     Args, *this, dl);
@@ -4138,8 +4135,9 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                       EVT VT, DebugLoc dl, SDValue Chain,
                       SDValue Ptr, SDValue Offset,
                       MachinePointerInfo PtrInfo, EVT MemVT,
-                      bool isVolatile, bool isNonTemporal,
-                      unsigned Alignment, const MDNode *TBAAInfo) {
+                      bool isVolatile, bool isNonTemporal, bool isInvariant,
+                      unsigned Alignment, const MDNode *TBAAInfo,
+                      const MDNode *Ranges) {
   assert(Chain.getValueType() == MVT::Other && 
         "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
@@ -4150,6 +4148,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
     Flags |= MachineMemOperand::MOVolatile;
   if (isNonTemporal)
     Flags |= MachineMemOperand::MONonTemporal;
+  if (isInvariant)
+    Flags |= MachineMemOperand::MOInvariant;
 
   // If we don't have a PtrInfo, infer the trivial frame index case to simplify
   // clients.
@@ -4159,7 +4159,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
-                            TBAAInfo);
+                            TBAAInfo, Ranges);
   return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
 }
 
@@ -4196,7 +4196,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
   ID.AddInteger(MemVT.getRawBits());
   ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
-                                     MMO->isNonTemporal()));
+                                     MMO->isNonTemporal(), 
+                                     MMO->isInvariant()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -4213,10 +4214,13 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
                               SDValue Chain, SDValue Ptr,
                               MachinePointerInfo PtrInfo,
                               bool isVolatile, bool isNonTemporal,
-                              unsigned Alignment, const MDNode *TBAAInfo) {
+                              bool isInvariant, unsigned Alignment, 
+                              const MDNode *TBAAInfo,
+                              const MDNode *Ranges) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
-                 PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+                 PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment,
+                 TBAAInfo, Ranges);
 }
 
 SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
@@ -4226,7 +4230,7 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
                                  unsigned Alignment, const MDNode *TBAAInfo) {
   SDValue Undef = getUNDEF(Ptr.getValueType());
   return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
-                 PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+                 PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment,
                  TBAAInfo);
 }
 
@@ -4239,8 +4243,8 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
          "Load is already a indexed load!");
   return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
                  LD->getChain(), Base, Offset, LD->getPointerInfo(),
-                 LD->getMemoryVT(),
-                 LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
+                 LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), 
+                 false, LD->getAlignment());
 }
 
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
@@ -4282,7 +4286,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(VT.getRawBits());
   ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal()));
+                                     MMO->isNonTemporal(), MMO->isInvariant()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4349,7 +4353,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
   ID.AddInteger(SVT.getRawBits());
   ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal()));
+                                     MMO->isNonTemporal(), MMO->isInvariant()));
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4903,6 +4907,20 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
   return N;
 }
 
+/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// the line number information on the merged node since it is not possible to
+/// preserve the information that operation is associated with multiple lines.
+/// This will make the debugger working better at -O0, were there is a higher
+/// probability having other instructions associated with that line.
+///
+SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) {
+  DebugLoc NLoc = N->getDebugLoc();
+  if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) {
+    N->setDebugLoc(DebugLoc());
+  }
+  return N;
+}
+
 /// MorphNodeTo - This *mutates* the specified node to have the specified
 /// return type, opcode, and operands.
 ///
@@ -4924,7 +4942,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
     if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
-      return ON;
+      return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc());
   }
 
   if (!RemoveNodeFromCSEMaps(N))
@@ -5128,8 +5146,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
     IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
-      return cast<MachineSDNode>(E);
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      return cast<MachineSDNode>(UpdadeDebugLocOnMergedSDNode(E, DL));
+    }
   }
 
   // Allocate a new MachineSDNode.
@@ -5290,6 +5309,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User, &Listener);
   }
+
+  // If we just RAUW'd the root, take note.
+  if (FromN == getRoot())
+    setRoot(To);
 }
 
 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
@@ -5335,6 +5358,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User, &Listener);
   }
+
+  // If we just RAUW'd the root, take note.
+  if (From == getRoot().getNode())
+    setRoot(SDValue(To, getRoot().getResNo()));
 }
 
 /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
@@ -5373,6 +5400,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User, &Listener);
   }
+
+  // If we just RAUW'd the root, take note.
+  if (From == getRoot().getNode())
+    setRoot(SDValue(To[getRoot().getResNo()]));
 }
 
 /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
@@ -5431,6 +5462,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
     // already exists there, recursively merge the results together.
     AddModifiedNodeToCSEMaps(User, &Listener);
   }
+
+  // If we just RAUW'd the root, take note.
+  if (From == getRoot())
+    setRoot(To);
 }
 
 namespace {
@@ -5657,7 +5692,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
                      MachineMemOperand *mmo)
  : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
   SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
-                                      MMO->isNonTemporal());
+                                      MMO->isNonTemporal(), MMO->isInvariant());
   assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
   assert(isNonTemporal() == MMO->isNonTemporal() &&
          "Non-temporal encoding error!");
@@ -5670,7 +5705,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
    : SDNode(Opc, dl, VTs, Ops, NumOps),
      MemoryVT(memvt), MMO(mmo) {
   SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
-                                      MMO->isNonTemporal());
+                                      MMO->isNonTemporal(), MMO->isInvariant());
   assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
   assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
 }
@@ -5846,565 +5881,6 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
   return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
 }
 
-std::string SDNode::getOperationName(const SelectionDAG *G) const {
-  switch (getOpcode()) {
-  default:
-    if (getOpcode() < ISD::BUILTIN_OP_END)
-      return "<<Unknown DAG Node>>";
-    if (isMachineOpcode()) {
-      if (G)
-        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
-          if (getMachineOpcode() < TII->getNumOpcodes())
-            return TII->get(getMachineOpcode()).getName();
-      return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
-    }
-    if (G) {
-      const TargetLowering &TLI = G->getTargetLoweringInfo();
-      const char *Name = TLI.getTargetNodeName(getOpcode());
-      if (Name) return Name;
-      return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
-    }
-    return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
-
-#ifndef NDEBUG
-  case ISD::DELETED_NODE:
-    return "<<Deleted Node!>>";
-#endif
-  case ISD::PREFETCH:      return "Prefetch";
-  case ISD::MEMBARRIER:    return "MemBarrier";
-  case ISD::ATOMIC_FENCE:    return "AtomicFence";
-  case ISD::ATOMIC_CMP_SWAP:    return "AtomicCmpSwap";
-  case ISD::ATOMIC_SWAP:        return "AtomicSwap";
-  case ISD::ATOMIC_LOAD_ADD:    return "AtomicLoadAdd";
-  case ISD::ATOMIC_LOAD_SUB:    return "AtomicLoadSub";
-  case ISD::ATOMIC_LOAD_AND:    return "AtomicLoadAnd";
-  case ISD::ATOMIC_LOAD_OR:     return "AtomicLoadOr";
-  case ISD::ATOMIC_LOAD_XOR:    return "AtomicLoadXor";
-  case ISD::ATOMIC_LOAD_NAND:   return "AtomicLoadNand";
-  case ISD::ATOMIC_LOAD_MIN:    return "AtomicLoadMin";
-  case ISD::ATOMIC_LOAD_MAX:    return "AtomicLoadMax";
-  case ISD::ATOMIC_LOAD_UMIN:   return "AtomicLoadUMin";
-  case ISD::ATOMIC_LOAD_UMAX:   return "AtomicLoadUMax";
-  case ISD::ATOMIC_LOAD:        return "AtomicLoad";
-  case ISD::ATOMIC_STORE:       return "AtomicStore";
-  case ISD::PCMARKER:      return "PCMarker";
-  case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
-  case ISD::SRCVALUE:      return "SrcValue";
-  case ISD::MDNODE_SDNODE: return "MDNode";
-  case ISD::EntryToken:    return "EntryToken";
-  case ISD::TokenFactor:   return "TokenFactor";
-  case ISD::AssertSext:    return "AssertSext";
-  case ISD::AssertZext:    return "AssertZext";
-
-  case ISD::BasicBlock:    return "BasicBlock";
-  case ISD::VALUETYPE:     return "ValueType";
-  case ISD::Register:      return "Register";
-
-  case ISD::Constant:      return "Constant";
-  case ISD::ConstantFP:    return "ConstantFP";
-  case ISD::GlobalAddress: return "GlobalAddress";
-  case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
-  case ISD::FrameIndex:    return "FrameIndex";
-  case ISD::JumpTable:     return "JumpTable";
-  case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
-  case ISD::RETURNADDR: return "RETURNADDR";
-  case ISD::FRAMEADDR: return "FRAMEADDR";
-  case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
-  case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
-  case ISD::LSDAADDR: return "LSDAADDR";
-  case ISD::EHSELECTION: return "EHSELECTION";
-  case ISD::EH_RETURN: return "EH_RETURN";
-  case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
-  case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
-  case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
-  case ISD::ConstantPool:  return "ConstantPool";
-  case ISD::ExternalSymbol: return "ExternalSymbol";
-  case ISD::BlockAddress:  return "BlockAddress";
-  case ISD::INTRINSIC_WO_CHAIN:
-  case ISD::INTRINSIC_VOID:
-  case ISD::INTRINSIC_W_CHAIN: {
-    unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
-    unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
-    if (IID < Intrinsic::num_intrinsics)
-      return Intrinsic::getName((Intrinsic::ID)IID);
-    else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
-      return TII->getName(IID);
-    llvm_unreachable("Invalid intrinsic ID");
-  }
-
-  case ISD::BUILD_VECTOR:   return "BUILD_VECTOR";
-  case ISD::TargetConstant: return "TargetConstant";
-  case ISD::TargetConstantFP:return "TargetConstantFP";
-  case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
-  case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
-  case ISD::TargetFrameIndex: return "TargetFrameIndex";
-  case ISD::TargetJumpTable:  return "TargetJumpTable";
-  case ISD::TargetConstantPool:  return "TargetConstantPool";
-  case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
-  case ISD::TargetBlockAddress: return "TargetBlockAddress";
-
-  case ISD::CopyToReg:     return "CopyToReg";
-  case ISD::CopyFromReg:   return "CopyFromReg";
-  case ISD::UNDEF:         return "undef";
-  case ISD::MERGE_VALUES:  return "merge_values";
-  case ISD::INLINEASM:     return "inlineasm";
-  case ISD::EH_LABEL:      return "eh_label";
-  case ISD::HANDLENODE:    return "handlenode";
-
-  // Unary operators
-  case ISD::FABS:   return "fabs";
-  case ISD::FNEG:   return "fneg";
-  case ISD::FSQRT:  return "fsqrt";
-  case ISD::FSIN:   return "fsin";
-  case ISD::FCOS:   return "fcos";
-  case ISD::FTRUNC: return "ftrunc";
-  case ISD::FFLOOR: return "ffloor";
-  case ISD::FCEIL:  return "fceil";
-  case ISD::FRINT:  return "frint";
-  case ISD::FNEARBYINT: return "fnearbyint";
-  case ISD::FEXP:   return "fexp";
-  case ISD::FEXP2:  return "fexp2";
-  case ISD::FLOG:   return "flog";
-  case ISD::FLOG2:  return "flog2";
-  case ISD::FLOG10: return "flog10";
-
-  // Binary operators
-  case ISD::ADD:    return "add";
-  case ISD::SUB:    return "sub";
-  case ISD::MUL:    return "mul";
-  case ISD::MULHU:  return "mulhu";
-  case ISD::MULHS:  return "mulhs";
-  case ISD::SDIV:   return "sdiv";
-  case ISD::UDIV:   return "udiv";
-  case ISD::SREM:   return "srem";
-  case ISD::UREM:   return "urem";
-  case ISD::SMUL_LOHI:  return "smul_lohi";
-  case ISD::UMUL_LOHI:  return "umul_lohi";
-  case ISD::SDIVREM:    return "sdivrem";
-  case ISD::UDIVREM:    return "udivrem";
-  case ISD::AND:    return "and";
-  case ISD::OR:     return "or";
-  case ISD::XOR:    return "xor";
-  case ISD::SHL:    return "shl";
-  case ISD::SRA:    return "sra";
-  case ISD::SRL:    return "srl";
-  case ISD::ROTL:   return "rotl";
-  case ISD::ROTR:   return "rotr";
-  case ISD::FADD:   return "fadd";
-  case ISD::FSUB:   return "fsub";
-  case ISD::FMUL:   return "fmul";
-  case ISD::FDIV:   return "fdiv";
-  case ISD::FMA:    return "fma";
-  case ISD::FREM:   return "frem";
-  case ISD::FCOPYSIGN: return "fcopysign";
-  case ISD::FGETSIGN:  return "fgetsign";
-  case ISD::FPOW:   return "fpow";
-
-  case ISD::FPOWI:  return "fpowi";
-  case ISD::SETCC:       return "setcc";
-  case ISD::SELECT:      return "select";
-  case ISD::VSELECT:     return "vselect";
-  case ISD::SELECT_CC:   return "select_cc";
-  case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";
-  case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";
-  case ISD::CONCAT_VECTORS:      return "concat_vectors";
-  case ISD::INSERT_SUBVECTOR:    return "insert_subvector";
-  case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";
-  case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";
-  case ISD::VECTOR_SHUFFLE:      return "vector_shuffle";
-  case ISD::CARRY_FALSE:         return "carry_false";
-  case ISD::ADDC:        return "addc";
-  case ISD::ADDE:        return "adde";
-  case ISD::SADDO:       return "saddo";
-  case ISD::UADDO:       return "uaddo";
-  case ISD::SSUBO:       return "ssubo";
-  case ISD::USUBO:       return "usubo";
-  case ISD::SMULO:       return "smulo";
-  case ISD::UMULO:       return "umulo";
-  case ISD::SUBC:        return "subc";
-  case ISD::SUBE:        return "sube";
-  case ISD::SHL_PARTS:   return "shl_parts";
-  case ISD::SRA_PARTS:   return "sra_parts";
-  case ISD::SRL_PARTS:   return "srl_parts";
-
-  // Conversion operators.
-  case ISD::SIGN_EXTEND: return "sign_extend";
-  case ISD::ZERO_EXTEND: return "zero_extend";
-  case ISD::ANY_EXTEND:  return "any_extend";
-  case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
-  case ISD::TRUNCATE:    return "truncate";
-  case ISD::FP_ROUND:    return "fp_round";
-  case ISD::FLT_ROUNDS_: return "flt_rounds";
-  case ISD::FP_ROUND_INREG: return "fp_round_inreg";
-  case ISD::FP_EXTEND:   return "fp_extend";
-
-  case ISD::SINT_TO_FP:  return "sint_to_fp";
-  case ISD::UINT_TO_FP:  return "uint_to_fp";
-  case ISD::FP_TO_SINT:  return "fp_to_sint";
-  case ISD::FP_TO_UINT:  return "fp_to_uint";
-  case ISD::BITCAST:     return "bitcast";
-  case ISD::FP16_TO_FP32: return "fp16_to_fp32";
-  case ISD::FP32_TO_FP16: return "fp32_to_fp16";
-
-  case ISD::CONVERT_RNDSAT: {
-    switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
-    default: llvm_unreachable("Unknown cvt code!");
-    case ISD::CVT_FF:  return "cvt_ff";
-    case ISD::CVT_FS:  return "cvt_fs";
-    case ISD::CVT_FU:  return "cvt_fu";
-    case ISD::CVT_SF:  return "cvt_sf";
-    case ISD::CVT_UF:  return "cvt_uf";
-    case ISD::CVT_SS:  return "cvt_ss";
-    case ISD::CVT_SU:  return "cvt_su";
-    case ISD::CVT_US:  return "cvt_us";
-    case ISD::CVT_UU:  return "cvt_uu";
-    }
-  }
-
-    // Control flow instructions
-  case ISD::BR:      return "br";
-  case ISD::BRIND:   return "brind";
-  case ISD::BR_JT:   return "br_jt";
-  case ISD::BRCOND:  return "brcond";
-  case ISD::BR_CC:   return "br_cc";
-  case ISD::CALLSEQ_START:  return "callseq_start";
-  case ISD::CALLSEQ_END:    return "callseq_end";
-
-    // Other operators
-  case ISD::LOAD:               return "load";
-  case ISD::STORE:              return "store";
-  case ISD::VAARG:              return "vaarg";
-  case ISD::VACOPY:             return "vacopy";
-  case ISD::VAEND:              return "vaend";
-  case ISD::VASTART:            return "vastart";
-  case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
-  case ISD::EXTRACT_ELEMENT:    return "extract_element";
-  case ISD::BUILD_PAIR:         return "build_pair";
-  case ISD::STACKSAVE:          return "stacksave";
-  case ISD::STACKRESTORE:       return "stackrestore";
-  case ISD::TRAP:               return "trap";
-
-  // Bit manipulation
-  case ISD::BSWAP:   return "bswap";
-  case ISD::CTPOP:   return "ctpop";
-  case ISD::CTTZ:    return "cttz";
-  case ISD::CTLZ:    return "ctlz";
-
-  // Trampolines
-  case ISD::INIT_TRAMPOLINE: return "init_trampoline";
-  case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline";
-
-  case ISD::CONDCODE:
-    switch (cast<CondCodeSDNode>(this)->get()) {
-    default: llvm_unreachable("Unknown setcc condition!");
-    case ISD::SETOEQ:  return "setoeq";
-    case ISD::SETOGT:  return "setogt";
-    case ISD::SETOGE:  return "setoge";
-    case ISD::SETOLT:  return "setolt";
-    case ISD::SETOLE:  return "setole";
-    case ISD::SETONE:  return "setone";
-
-    case ISD::SETO:    return "seto";
-    case ISD::SETUO:   return "setuo";
-    case ISD::SETUEQ:  return "setue";
-    case ISD::SETUGT:  return "setugt";
-    case ISD::SETUGE:  return "setuge";
-    case ISD::SETULT:  return "setult";
-    case ISD::SETULE:  return "setule";
-    case ISD::SETUNE:  return "setune";
-
-    case ISD::SETEQ:   return "seteq";
-    case ISD::SETGT:   return "setgt";
-    case ISD::SETGE:   return "setge";
-    case ISD::SETLT:   return "setlt";
-    case ISD::SETLE:   return "setle";
-    case ISD::SETNE:   return "setne";
-    }
-  }
-}
-
-const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
-  switch (AM) {
-  default:
-    return "";
-  case ISD::PRE_INC:
-    return "<pre-inc>";
-  case ISD::PRE_DEC:
-    return "<pre-dec>";
-  case ISD::POST_INC:
-    return "<post-inc>";
-  case ISD::POST_DEC:
-    return "<post-dec>";
-  }
-}
-
-std::string ISD::ArgFlagsTy::getArgFlagsString() {
-  std::string S = "< ";
-
-  if (isZExt())
-    S += "zext ";
-  if (isSExt())
-    S += "sext ";
-  if (isInReg())
-    S += "inreg ";
-  if (isSRet())
-    S += "sret ";
-  if (isByVal())
-    S += "byval ";
-  if (isNest())
-    S += "nest ";
-  if (getByValAlign())
-    S += "byval-align:" + utostr(getByValAlign()) + " ";
-  if (getOrigAlign())
-    S += "orig-align:" + utostr(getOrigAlign()) + " ";
-  if (getByValSize())
-    S += "byval-size:" + utostr(getByValSize()) + " ";
-  return S + ">";
-}
-
-void SDNode::dump() const { dump(0); }
-void SDNode::dump(const SelectionDAG *G) const {
-  print(dbgs(), G);
-  dbgs() << '\n';
-}
-
-void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
-  OS << (void*)this << ": ";
-
-  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
-    if (i) OS << ",";
-    if (getValueType(i) == MVT::Other)
-      OS << "ch";
-    else
-      OS << getValueType(i).getEVTString();
-  }
-  OS << " = " << getOperationName(G);
-}
-
-void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
-  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
-    if (!MN->memoperands_empty()) {
-      OS << "<";
-      OS << "Mem:";
-      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
-           e = MN->memoperands_end(); i != e; ++i) {
-        OS << **i;
-        if (llvm::next(i) != e)
-          OS << " ";
-      }
-      OS << ">";
-    }
-  } else if (const ShuffleVectorSDNode *SVN =
-               dyn_cast<ShuffleVectorSDNode>(this)) {
-    OS << "<";
-    for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
-      int Idx = SVN->getMaskElt(i);
-      if (i) OS << ",";
-      if (Idx < 0)
-        OS << "u";
-      else
-        OS << Idx;
-    }
-    OS << ">";
-  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
-    OS << '<' << CSDN->getAPIntValue() << '>';
-  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
-    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
-      OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
-    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
-      OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
-    else {
-      OS << "<APFloat(";
-      CSDN->getValueAPF().bitcastToAPInt().dump();
-      OS << ")>";
-    }
-  } else if (const GlobalAddressSDNode *GADN =
-             dyn_cast<GlobalAddressSDNode>(this)) {
-    int64_t offset = GADN->getOffset();
-    OS << '<';
-    WriteAsOperand(OS, GADN->getGlobal());
-    OS << '>';
-    if (offset > 0)
-      OS << " + " << offset;
-    else
-      OS << " " << offset;
-    if (unsigned int TF = GADN->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
-    OS << "<" << FIDN->getIndex() << ">";
-  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
-    OS << "<" << JTDN->getIndex() << ">";
-    if (unsigned int TF = JTDN->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
-    int offset = CP->getOffset();
-    if (CP->isMachineConstantPoolEntry())
-      OS << "<" << *CP->getMachineCPVal() << ">";
-    else
-      OS << "<" << *CP->getConstVal() << ">";
-    if (offset > 0)
-      OS << " + " << offset;
-    else
-      OS << " " << offset;
-    if (unsigned int TF = CP->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
-    OS << "<";
-    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
-    if (LBB)
-      OS << LBB->getName() << " ";
-    OS << (const void*)BBDN->getBasicBlock() << ">";
-  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
-    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
-  } else if (const ExternalSymbolSDNode *ES =
-             dyn_cast<ExternalSymbolSDNode>(this)) {
-    OS << "'" << ES->getSymbol() << "'";
-    if (unsigned int TF = ES->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
-    if (M->getValue())
-      OS << "<" << M->getValue() << ">";
-    else
-      OS << "<null>";
-  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
-    if (MD->getMD())
-      OS << "<" << MD->getMD() << ">";
-    else
-      OS << "<null>";
-  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
-    OS << ":" << N->getVT().getEVTString();
-  }
-  else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
-    OS << "<" << *LD->getMemOperand();
-
-    bool doExt = true;
-    switch (LD->getExtensionType()) {
-    default: doExt = false; break;
-    case ISD::EXTLOAD: OS << ", anyext"; break;
-    case ISD::SEXTLOAD: OS << ", sext"; break;
-    case ISD::ZEXTLOAD: OS << ", zext"; break;
-    }
-    if (doExt)
-      OS << " from " << LD->getMemoryVT().getEVTString();
-
-    const char *AM = getIndexedModeName(LD->getAddressingMode());
-    if (*AM)
-      OS << ", " << AM;
-
-    OS << ">";
-  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
-    OS << "<" << *ST->getMemOperand();
-
-    if (ST->isTruncatingStore())
-      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
-
-    const char *AM = getIndexedModeName(ST->getAddressingMode());
-    if (*AM)
-      OS << ", " << AM;
-
-    OS << ">";
-  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
-    OS << "<" << *M->getMemOperand() << ">";
-  } else if (const BlockAddressSDNode *BA =
-               dyn_cast<BlockAddressSDNode>(this)) {
-    OS << "<";
-    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
-    OS << ", ";
-    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
-    OS << ">";
-    if (unsigned int TF = BA->getTargetFlags())
-      OS << " [TF=" << TF << ']';
-  }
-
-  if (G)
-    if (unsigned Order = G->GetOrdering(this))
-      OS << " [ORD=" << Order << ']';
-
-  if (getNodeId() != -1)
-    OS << " [ID=" << getNodeId() << ']';
-
-  DebugLoc dl = getDebugLoc();
-  if (G && !dl.isUnknown()) {
-    DIScope
-      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
-    OS << " dbg:";
-    // Omit the directory, since it's usually long and uninteresting.
-    if (Scope.Verify())
-      OS << Scope.getFilename();
-    else
-      OS << "<unknown>";
-    OS << ':' << dl.getLine();
-    if (dl.getCol() != 0)
-      OS << ':' << dl.getCol();
-  }
-}
-
-void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
-  print_types(OS, G);
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
-    if (i) OS << ", "; else OS << " ";
-    OS << (void*)getOperand(i).getNode();
-    if (unsigned RN = getOperand(i).getResNo())
-      OS << ":" << RN;
-  }
-  print_details(OS, G);
-}
-
-static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
-                                  const SelectionDAG *G, unsigned depth,
-                                  unsigned indent) {
-  if (depth == 0)
-    return;
-
-  OS.indent(indent);
-
-  N->print(OS, G);
-
-  if (depth < 1)
-    return;
-
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    // Don't follow chain operands.
-    if (N->getOperand(i).getValueType() == MVT::Other)
-      continue;
-    OS << '\n';
-    printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
-  }
-}
-
-void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
-                            unsigned depth) const {
-  printrWithDepthHelper(OS, this, G, depth, 0);
-}
-
-void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
-  // Don't print impossibly deep things.
-  printrWithDepth(OS, G, 10);
-}
-
-void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
-  printrWithDepth(dbgs(), G, depth);
-}
-
-void SDNode::dumprFull(const SelectionDAG *G) const {
-  // Don't print impossibly deep things.
-  dumprWithDepth(G, 10);
-}
-
-static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
-    if (N->getOperand(i).getNode()->hasOneUse())
-      DumpNodes(N->getOperand(i).getNode(), indent+2, G);
-    else
-      dbgs() << "\n" << std::string(indent+2, ' ')
-           << (void*)N->getOperand(i).getNode() << ": <multiple use>";
-
-
-  dbgs() << "\n";
-  dbgs().indent(indent);
-  N->dump(G);
-}
-
 SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
   assert(N->getNumValues() == 1 &&
          "Can't unroll a vector with multiple results!");
@@ -6527,20 +6003,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   const GlobalValue *GV;
   int64_t GVOffset = 0;
   if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
-    // If GV has specified alignment, then use it. Otherwise, use the preferred
-    // alignment.
-    unsigned Align = GV->getAlignment();
-    if (!Align) {
-      if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
-        if (GVar->hasInitializer()) {
-          const TargetData *TD = TLI.getTargetData();
-          Align = TD->getPreferredAlignment(GVar);
-        }
-      }
-      if (!Align)
-        Align = TLI.getTargetData()->getABITypeAlignment(GV->getType());
-    }
-    return MinAlign(Align, GVOffset);
+    unsigned PtrWidth = TLI.getPointerTy().getSizeInBits();
+    APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
+    llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
+                            TLI.getTargetData());
+    unsigned AlignBits = KnownZero.countTrailingOnes();
+    unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
+    if (Align)
+      return MinAlign(Align, GVOffset);
   }
 
   // If this is a direct reference to a stack slot, use information about the
@@ -6566,74 +6036,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   return 0;
 }
 
-void SelectionDAG::dump() const {
-  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
-
-  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
-       I != E; ++I) {
-    const SDNode *N = I;
-    if (!N->hasOneUse() && N != getRoot().getNode())
-      DumpNodes(N, 2, this);
-  }
-
-  if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
-
-  dbgs() << "\n\n";
-}
-
-void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
-  print_types(OS, G);
-  print_details(OS, G);
-}
-
-typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
-static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
-                       const SelectionDAG *G, VisitedSDNodeSet &once) {
-  if (!once.insert(N))          // If we've been here before, return now.
-    return;
-
-  // Dump the current SDNode, but don't end the line yet.
-  OS << std::string(indent, ' ');
-  N->printr(OS, G);
-
-  // Having printed this SDNode, walk the children:
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    const SDNode *child = N->getOperand(i).getNode();
-
-    if (i) OS << ",";
-    OS << " ";
-
-    if (child->getNumOperands() == 0) {
-      // This child has no grandchildren; print it inline right here.
-      child->printr(OS, G);
-      once.insert(child);
-    } else {         // Just the address. FIXME: also print the child's opcode.
-      OS << (void*)child;
-      if (unsigned RN = N->getOperand(i).getResNo())
-        OS << ":" << RN;
-    }
-  }
-
-  OS << "\n";
-
-  // Dump children that have grandchildren on their own line(s).
-  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    const SDNode *child = N->getOperand(i).getNode();
-    DumpNodesr(OS, child, indent+2, G, once);
-  }
-}
-
-void SDNode::dumpr() const {
-  VisitedSDNodeSet once;
-  DumpNodesr(dbgs(), this, 0, 0, once);
-}
-
-void SDNode::dumpr(const SelectionDAG *G) const {
-  VisitedSDNodeSet once;
-  DumpNodesr(dbgs(), this, 0, G, once);
-}
-
-
 // getAddressSpace - Return the address space this GlobalAddress belongs to.
 unsigned GlobalAddressSDNode::getAddressSpace() const {
   return getGlobal()->getType()->getAddressSpace();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 095b4001696f..94cb95804f69 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -41,13 +41,13 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
@@ -197,7 +197,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
     // FP_ROUND's are always exact here.
     if (ValueVT.bitsLT(Val.getValueType()))
       return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
-                         DAG.getIntPtrConstant(1));
+                         DAG.getTargetConstant(1, TLI.getPointerTy()));
 
     return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
   }
@@ -206,7 +206,6 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
   llvm_unreachable("Unknown mismatch!");
-  return SDValue();
 }
 
 /// getCopyFromParts - Create a value that contains the specified legal parts
@@ -353,10 +352,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
       assert(NumParts == 1 && "Do not know what to promote to!");
       Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
     } else {
-      assert(PartVT.isInteger() && ValueVT.isInteger() &&
+      assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+             ValueVT.isInteger() &&
              "Unknown mismatch!");
       ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
       Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+      if (PartVT == MVT::x86mmx)
+        Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
     }
   } else if (PartBits == ValueVT.getSizeInBits()) {
     // Different types of the same size.
@@ -364,10 +366,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
     Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
   } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
     // If the parts cover less bits than value has, truncate the value.
-    assert(PartVT.isInteger() && ValueVT.isInteger() &&
+    assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
+           ValueVT.isInteger() &&
            "Unknown mismatch!");
     ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
     Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+    if (PartVT == MVT::x86mmx)
+      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
   }
 
   // The value may have changed - recompute ValueVT.
@@ -813,9 +818,11 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
   }
 }
 
-void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+                               const TargetLibraryInfo *li) {
   AA = &aa;
   GFI = gfi;
+  LibInfo = li;
   TD = DAG.getTarget().getTargetData();
   LPadToCallSiteMap.clear();
 }
@@ -964,7 +971,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
         DAG.AddDbgValue(SDV, Val.getNode(), false);
       }
     } else
-      DEBUG(dbgs() << "Dropping debug info for " << DI);
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
     DanglingDebugInfoMap[V] = DanglingDebugInfo();
   }
 }
@@ -1054,6 +1061,23 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
       return DAG.getMergeValues(&Constants[0], Constants.size(),
                                 getCurDebugLoc());
     }
+    
+    if (const ConstantDataSequential *CDS =
+          dyn_cast<ConstantDataSequential>(C)) {
+      SmallVector<SDValue, 4> Ops;
+      for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+        SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
+        // Add each leaf value from the operand to the Constants list
+        // to form a flattened list of all the values.
+        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+          Ops.push_back(SDValue(Val, i));
+      }
+
+      if (isa<ArrayType>(CDS->getType()))
+        return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
+      return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                                      VT, &Ops[0], Ops.size());
+    }
 
     if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
       assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
@@ -1088,9 +1112,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
     // Now that we know the number and type of the elements, get that number of
     // elements into the Ops array based on what kind of constant it is.
     SmallVector<SDValue, 16> Ops;
-    if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+    if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
       for (unsigned i = 0; i != NumElements; ++i)
-        Ops.push_back(getValue(CP->getOperand(i)));
+        Ops.push_back(getValue(CV->getOperand(i)));
     } else {
       assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
       EVT EltVT = TLI.getValueType(VecTy->getElementType());
@@ -1126,7 +1150,6 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
   }
 
   llvm_unreachable("Can't get register for value!");
-  return SDValue();
 }
 
 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
@@ -1285,8 +1308,8 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
 }
 
 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
-                                            MachineBasicBlock *Dst) {
+uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
+                                            const MachineBasicBlock *Dst) const {
   BranchProbabilityInfo *BPI = FuncInfo.BPI;
   if (!BPI)
     return 0;
@@ -1336,6 +1359,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
         Condition = getICmpCondCode(IC->getPredicate());
       } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
         Condition = getFCmpCondCode(FC->getPredicate());
+        if (TM.Options.NoNaNsFPMath)
+          Condition = getFCmpCodeWithoutNaN(Condition);
       } else {
         Condition = ISD::SETEQ; // silence warning.
         llvm_unreachable("Unknown compare instruction");
@@ -1811,8 +1836,8 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
   CopyToExportRegsIfNeeded(&I);
 
   // Update successor info
-  InvokeMBB->addSuccessor(Return);
-  InvokeMBB->addSuccessor(LandingPad);
+  addSuccessorWithWeight(InvokeMBB, Return);
+  addSuccessorWithWeight(InvokeMBB, LandingPad);
 
   // Drop into normal successor.
   DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
@@ -1820,9 +1845,6 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
                           DAG.getBasicBlock(Return)));
 }
 
-void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
-}
-
 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
   llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
 }
@@ -1835,6 +1857,12 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
   AddLandingPadInfo(LP, MMI, MBB);
 
+  // If there aren't registers to copy the values into (e.g., during SjLj
+  // exceptions), then don't bother to create these DAG nodes.
+  if (TLI.getExceptionPointerRegister() == 0 &&
+      TLI.getExceptionSelectorRegister() == 0)
+    return;
+
   SmallVector<EVT, 2> ValueVTs;
   ComputeValueVTs(TLI, LP.getType(), ValueVTs);
 
@@ -2003,7 +2031,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
 }
 
 static inline bool areJTsAllowed(const TargetLowering &TLI) {
-  return !DisableJumpTables &&
+  return !TLI.getTargetMachine().Options.DisableJumpTables &&
           (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
            TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
 }
@@ -2190,7 +2218,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
 
   CaseRange LHSR(CR.Range.first, Pivot);
   CaseRange RHSR(Pivot, CR.Range.second);
-  Constant *C = Pivot->Low;
+  const Constant *C = Pivot->Low;
   MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
 
   // We know that we branch to the LHS if the Value being switched on is
@@ -2383,14 +2411,14 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
 
   BranchProbabilityInfo *BPI = FuncInfo.BPI;
   // Start with "simple" cases
-  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
-    BasicBlock *SuccBB = SI.getSuccessor(i);
+  for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+       i != e; ++i) {
+    const BasicBlock *SuccBB = i.getCaseSuccessor();
     MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
 
     uint32_t ExtraWeight = BPI ? BPI->getEdgeWeight(SI.getParent(), SuccBB) : 0;
 
-    Cases.push_back(Case(SI.getSuccessorValue(i),
-                         SI.getSuccessorValue(i),
+    Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(),
                          SMBB, ExtraWeight));
   }
   std::sort(Cases.begin(), Cases.end(), CaseCmp());
@@ -2457,7 +2485,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
 
   // If there is only the default destination, branch to it if it is not the
   // next basic block.  Otherwise, just fall through.
-  if (SI.getNumCases() == 1) {
+  if (!SI.getNumCases()) {
     // Update machine-CFG edges.
 
     // If this is not a fall-through branch, emit the branch.
@@ -2626,6 +2654,8 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Condition = getFCmpCondCode(predicate);
+  if (TM.Options.NoNaNsFPMath)
+    Condition = getFCmpCodeWithoutNaN(Condition);
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
 }
@@ -2685,11 +2715,12 @@ void SelectionDAGBuilder::visitFPTrunc(const User &I) {
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
-                           DestVT, N, DAG.getIntPtrConstant(0)));
+                           DestVT, N,
+                           DAG.getTargetConstant(0, TLI.getPointerTy())));
 }
 
 void SelectionDAGBuilder::visitFPExt(const User &I){
-  // FPTrunc is never a no-op cast, no need to check
+  // FPExt is never a no-op cast, no need to check
   SDValue N = getValue(I.getOperand(0));
   EVT DestVT = TLI.getValueType(I.getType());
   setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
@@ -2772,33 +2803,25 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
                            TLI.getValueType(I.getType()), InVec, InIdx));
 }
 
-// Utility for visitShuffleVector - Returns true if the mask is mask starting
-// from SIndx and increasing to the element length (undefs are allowed).
-static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
-  unsigned MaskNumElts = Mask.size();
-  for (unsigned i = 0; i != MaskNumElts; ++i)
-    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+// Utility for visitShuffleVector - Return true if every element in Mask,
+// begining from position Pos and ending in Pos+Size, falls within the
+// specified sequential range [L, L+Pos). or is undef.
+static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
+                                unsigned Pos, unsigned Size, int Low) {
+  for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
+    if (Mask[i] >= 0 && Mask[i] != Low)
       return false;
   return true;
 }
 
 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
-  SmallVector<int, 8> Mask;
   SDValue Src1 = getValue(I.getOperand(0));
   SDValue Src2 = getValue(I.getOperand(1));
 
-  // Convert the ConstantVector mask operand into an array of ints, with -1
-  // representing undef values.
-  SmallVector<Constant*, 8> MaskElts;
-  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
-  unsigned MaskNumElts = MaskElts.size();
-  for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (isa<UndefValue>(MaskElts[i]))
-      Mask.push_back(-1);
-    else
-      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
-  }
-
+  SmallVector<int, 8> Mask;
+  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
+  unsigned MaskNumElts = Mask.size();
+  
   EVT VT = TLI.getValueType(I.getType());
   EVT SrcVT = Src1.getValueType();
   unsigned SrcNumElts = SrcVT.getVectorNumElements();
@@ -2814,11 +2837,23 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
     // Mask is longer than the source vectors and is a multiple of the source
     // vectors.  We can use concatenate vector to make the mask and vectors
     // lengths match.
-    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
-      // The shuffle is concatenating two vectors together.
-      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
-                               VT, Src1, Src2));
-      return;
+    if (SrcNumElts*2 == MaskNumElts) {
+      // First check for Src1 in low and Src2 in high
+      if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
+          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
+        // The shuffle is concatenating two vectors together.
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                                 VT, Src1, Src2));
+        return;
+      }
+      // Then check for Src2 in low and Src1 in high
+      if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
+          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
+        // The shuffle is concatenating two vectors together.
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                                 VT, Src2, Src1));
+        return;
+      }
     }
 
     // Pad both vectors with undefs to make them the same length as the mask.
@@ -2843,10 +2878,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
     SmallVector<int, 8> MappedOps;
     for (unsigned i = 0; i != MaskNumElts; ++i) {
       int Idx = Mask[i];
-      if (Idx < (int)SrcNumElts)
-        MappedOps.push_back(Idx);
-      else
-        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+      if (Idx >= (int)SrcNumElts)
+        Idx -= SrcNumElts - MaskNumElts;
+      MappedOps.push_back(Idx);
     }
 
     setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2858,13 +2892,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
     // Analyze the access pattern of the vector to see if we can extract
     // two subvectors and do the shuffle. The analysis is done by calculating
     // the range of elements the mask access on both vectors.
-    int MinRange[2] = { static_cast<int>(SrcNumElts+1),
-                        static_cast<int>(SrcNumElts+1)};
+    int MinRange[2] = { static_cast<int>(SrcNumElts),
+                        static_cast<int>(SrcNumElts)};
     int MaxRange[2] = {-1, -1};
 
     for (unsigned i = 0; i != MaskNumElts; ++i) {
       int Idx = Mask[i];
-      int Input = 0;
+      unsigned Input = 0;
       if (Idx < 0)
         continue;
 
@@ -2880,35 +2914,31 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
 
     // Check if the access is smaller than the vector size and can we find
     // a reasonable extract index.
-    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not
-                                 // Extract.
+    int RangeUse[2] = { -1, -1 };  // 0 = Unused, 1 = Extract, -1 = Can not
+                                   // Extract.
     int StartIdx[2];  // StartIdx to extract from
-    for (int Input=0; Input < 2; ++Input) {
-      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+    for (unsigned Input = 0; Input < 2; ++Input) {
+      if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
         RangeUse[Input] = 0; // Unused
         StartIdx[Input] = 0;
-      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
-        // Fits within range but we should see if we can find a good
-        // start index that is a multiple of the mask length.
-        if (MaxRange[Input] < (int)MaskNumElts) {
-          RangeUse[Input] = 1; // Extract from beginning of the vector
-          StartIdx[Input] = 0;
-        } else {
-          StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
-          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
-              StartIdx[Input] + MaskNumElts <= SrcNumElts)
-            RangeUse[Input] = 1; // Extract from a multiple of the mask length.
-        }
+        continue;
       }
+
+      // Find a good start index that is a multiple of the mask length. Then
+      // see if the rest of the elements are in range.
+      StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+      if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+          StartIdx[Input] + MaskNumElts <= SrcNumElts)
+        RangeUse[Input] = 1; // Extract from a multiple of the mask length.
     }
 
     if (RangeUse[0] == 0 && RangeUse[1] == 0) {
       setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
       return;
     }
-    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+    if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
       // Extract appropriate subvector and generate a vector shuffle
-      for (int Input=0; Input < 2; ++Input) {
+      for (unsigned Input = 0; Input < 2; ++Input) {
         SDValue &Src = Input == 0 ? Src1 : Src2;
         if (RangeUse[Input] == 0)
           Src = DAG.getUNDEF(VT);
@@ -2921,12 +2951,13 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
       SmallVector<int, 8> MappedOps;
       for (unsigned i = 0; i != MaskNumElts; ++i) {
         int Idx = Mask[i];
-        if (Idx < 0)
-          MappedOps.push_back(Idx);
-        else if (Idx < (int)SrcNumElts)
-          MappedOps.push_back(Idx - StartIdx[0]);
-        else
-          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+        if (Idx >= 0) {
+          if (Idx < (int)SrcNumElts)
+            Idx -= StartIdx[0];
+          else
+            Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
+        }
+        MappedOps.push_back(Idx);
       }
 
       setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
@@ -2942,22 +2973,20 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   EVT PtrVT = TLI.getPointerTy();
   SmallVector<SDValue,8> Ops;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
-    if (Mask[i] < 0) {
-      Ops.push_back(DAG.getUNDEF(EltVT));
-    } else {
-      int Idx = Mask[i];
-      SDValue Res;
+    int Idx = Mask[i];
+    SDValue Res;
 
-      if (Idx < (int)SrcNumElts)
-        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
-                          EltVT, Src1, DAG.getConstant(Idx, PtrVT));
-      else
-        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
-                          EltVT, Src2,
-                          DAG.getConstant(Idx - SrcNumElts, PtrVT));
+    if (Idx < 0) {
+      Res = DAG.getUNDEF(EltVT);
+    } else {
+      SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
+      if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
 
-      Ops.push_back(Res);
+      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                        EltVT, Src, DAG.getConstant(Idx, PtrVT));
     }
+
+    Ops.push_back(Res);
   }
 
   setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
@@ -3042,7 +3071,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
 
 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   SDValue N = getValue(I.getOperand(0));
-  Type *Ty = I.getOperand(0)->getType();
+  // Note that the pointer operand may be a vector of pointers. Take the scalar
+  // element which holds a pointer.
+  Type *Ty = I.getOperand(0)->getType()->getScalarType();
 
   for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
        OI != E; ++OI) {
@@ -3096,7 +3127,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
           unsigned Amt = ElementSize.logBase2();
           IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
                              N.getValueType(), IdxN,
-                             DAG.getConstant(Amt, TLI.getPointerTy()));
+                             DAG.getConstant(Amt, IdxN.getValueType()));
         } else {
           SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
           IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
@@ -3175,8 +3206,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
 
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+  bool isInvariant = I.getMetadata("invariant.load") != 0;
   unsigned Alignment = I.getAlignment();
   const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
 
   SmallVector<EVT, 4> ValueVTs;
   SmallVector<uint64_t, 4> Offsets;
@@ -3224,7 +3257,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
                             DAG.getConstant(Offsets[i], PtrVT));
     SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
                             A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
-                            isNonTemporal, Alignment, TBAAInfo);
+                            isNonTemporal, isInvariant, Alignment, TBAAInfo,
+                            Ranges);
 
     Values[i] = L;
     Chains[ChainI] = L.getValue(1);
@@ -3358,7 +3392,7 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
   DebugLoc dl = getCurDebugLoc();
   ISD::NodeType NT;
   switch (I.getOperation()) {
-  default: llvm_unreachable("Unknown atomicrmw operation"); return;
+  default: llvm_unreachable("Unknown atomicrmw operation");
   case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
   case AtomicRMWInst::Add:  NT = ISD::ATOMIC_LOAD_ADD; break;
   case AtomicRMWInst::Sub:  NT = ISD::ATOMIC_LOAD_SUB; break;
@@ -3496,24 +3530,16 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
   // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
   if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
       Info.opc == ISD::INTRINSIC_W_CHAIN)
-    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+    Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
 
   // Add all operands of the call to the operand list.
   for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
     SDValue Op = getValue(I.getArgOperand(i));
-    assert(TLI.isTypeLegal(Op.getValueType()) &&
-           "Intrinsic uses a non-legal type?");
     Ops.push_back(Op);
   }
 
   SmallVector<EVT, 4> ValueVTs;
   ComputeValueVTs(TLI, I.getType(), ValueVTs);
-#ifndef NDEBUG
-  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
-    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
-           "Intrinsic uses a non-legal type?");
-  }
-#endif // NDEBUG
 
   if (HasChain)
     ValueVTs.push_back(MVT::Other);
@@ -3556,6 +3582,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
     }
 
     setValue(&I, Result);
+  } else {
+    // Assign order to result here. If the intrinsic does not produce a result,
+    // it won't be mapped to a SDNode and visit() will not assign it an order
+    // number.
+    ++SDNodeOrder;
+    AssignOrderingToNode(Result.getNode());
   }
 }
 
@@ -3597,17 +3629,6 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) {
   return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
 }
 
-// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
-const char *
-SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
-  SDValue Op1 = getValue(I.getArgOperand(0));
-  SDValue Op2 = getValue(I.getArgOperand(1));
-
-  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
-  setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
-  return 0;
-}
-
 /// visitExp - Lower an exp intrinsic. Handles the special sequences for
 /// limited-precision mode.
 void
@@ -4367,9 +4388,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
     const SDValue &CFR = Ext.getOperand(0);
     if (CFR.getOpcode() == ISD::CopyFromReg)
       return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
-    else
-      if (CFR.getOpcode() == ISD::TRUNCATE)
-        return getTruncatedArgReg(CFR);
+    if (CFR.getOpcode() == ISD::TRUNCATE)
+      return getTruncatedArgReg(CFR);
   }
   return 0;
 }
@@ -4398,7 +4418,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
   // Some arguments' frame index is recorded during argument lowering.
   Offset = FuncInfo.getArgumentFrameIndex(Arg);
   if (Offset)
-      Reg = TRI->getFrameRegister(MF);
+    Reg = TRI->getFrameRegister(MF);
 
   if (!Reg && N.getNode()) {
     if (N.getOpcode() == ISD::CopyFromReg)
@@ -4473,9 +4493,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              getValue(I.getArgOperand(0))));
     return 0;
   case Intrinsic::setjmp:
-    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+    return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
   case Intrinsic::longjmp:
-    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+    return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
   case Intrinsic::memcpy: {
     // Assert for address < 256 since we support only user defined address
     // spaces.
@@ -4531,8 +4551,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     MDNode *Variable = DI.getVariable();
     const Value *Address = DI.getAddress();
-    if (!Address || !DIVariable(Variable).Verify())
+    if (!Address || !DIVariable(Variable).Verify()) {
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return 0;
+    }
 
     // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
     // but do not always have a corresponding SDNode built.  The SDNodeOrder
@@ -4543,7 +4565,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     // Check if address has undef value.
     if (isa<UndefValue>(Address) ||
         (Address->use_empty() && !isa<Argument>(Address))) {
-      DEBUG(dbgs() << "Dropping debug info for " << DI);
+      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       return 0;
     }
 
@@ -4553,11 +4575,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       N = UnusedArgNodeMap[Address];
     SDDbgValue *SDV;
     if (N.getNode()) {
-      // Parameters are handled specially.
-      bool isParameter =
-        DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
       if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
         Address = BCI->getOperand(0);
+      // Parameters are handled specially.
+      bool isParameter =
+        (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
+         isa<Argument>(Address));
+
       const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
 
       if (isParameter && !AI) {
@@ -4577,7 +4601,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                               0, dl, SDNodeOrder);
       else {
         // Can't do anything with other non-AI cases yet.
-        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
+        DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
+        DEBUG(Address->dump());
         return 0;
       }
       DAG.AddDbgValue(SDV, N.getNode(), isParameter);
@@ -4599,7 +4625,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
             }
           }
         }
-        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       }
     }
     return 0;
@@ -4645,7 +4671,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       } else {
         // We may expand this to cover more cases.  One case where we have no
         // data available is an unreferenced parameter.
-        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
       }
     }
 
@@ -4654,8 +4680,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       V = BCI->getOperand(0);
     const AllocaInst *AI = dyn_cast<AllocaInst>(V);
     // Don't handle byval struct arguments or VLAs, for example.
-    if (!AI)
+    if (!AI) {
+      DEBUG(dbgs() << "Dropping debug location info for:\n  " << DI << "\n");
+      DEBUG(dbgs() << "  Last seen at:\n    " << *V << "\n");
       return 0;
+    }
     DenseMap<const AllocaInst*, int>::iterator SI =
       FuncInfo.StaticAllocaMap.find(AI);
     if (SI == FuncInfo.StaticAllocaMap.end())
@@ -4667,43 +4696,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
     return 0;
   }
-  case Intrinsic::eh_exception: {
-    // Insert the EXCEPTIONADDR instruction.
-    assert(FuncInfo.MBB->isLandingPad() &&
-           "Call to eh.exception not in landing pad!");
-    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-    SDValue Ops[1];
-    Ops[0] = DAG.getRoot();
-    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
-    setValue(&I, Op);
-    DAG.setRoot(Op.getValue(1));
-    return 0;
-  }
-
-  case Intrinsic::eh_selector: {
-    MachineBasicBlock *CallMBB = FuncInfo.MBB;
-    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-    if (CallMBB->isLandingPad())
-      AddCatchInfo(I, &MMI, CallMBB);
-    else {
-#ifndef NDEBUG
-      FuncInfo.CatchInfoLost.insert(&I);
-#endif
-      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
-      unsigned Reg = TLI.getExceptionSelectorRegister();
-      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
-    }
-
-    // Insert the EHSELECTION instruction.
-    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
-    SDValue Ops[2];
-    Ops[0] = getValue(I.getArgOperand(0));
-    Ops[1] = getRoot();
-    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
-    DAG.setRoot(Op.getValue(1));
-    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
-    return 0;
-  }
 
   case Intrinsic::eh_typeid_for: {
     // Find the type id for the given typeinfo.
@@ -4775,11 +4767,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                             getRoot(), getValue(I.getArgOperand(0))));
     return 0;
   }
-  case Intrinsic::eh_sjlj_dispatch_setup: {
-    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
-                            getRoot(), getValue(I.getArgOperand(0))));
-    return 0;
-  }
 
   case Intrinsic::x86_mmx_pslli_w:
   case Intrinsic::x86_mmx_pslli_d:
@@ -4841,6 +4828,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     setValue(&I, Res);
     return 0;
   }
+  case Intrinsic::x86_avx_vinsertf128_pd_256:
+  case Intrinsic::x86_avx_vinsertf128_ps_256:
+  case Intrinsic::x86_avx_vinsertf128_si_256:
+  case Intrinsic::x86_avx2_vinserti128: {
+    DebugLoc dl = getCurDebugLoc();
+    EVT DestVT = TLI.getValueType(I.getType());
+    EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
+    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
+                   ElVT.getVectorNumElements();
+    Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
+                      getValue(I.getArgOperand(0)),
+                      getValue(I.getArgOperand(1)),
+                      DAG.getConstant(Idx, MVT::i32));
+    setValue(&I, Res);
+    return 0;
+  }
   case Intrinsic::convertff:
   case Intrinsic::convertfsi:
   case Intrinsic::convertfui:
@@ -4852,6 +4855,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::convertuu: {
     ISD::CvtCode Code = ISD::CVT_INVALID;
     switch (Intrinsic) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
     case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
     case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
     case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
@@ -4946,14 +4950,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return 0;
   case Intrinsic::cttz: {
     SDValue Arg = getValue(I.getArgOperand(0));
+    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
     EVT Ty = Arg.getValueType();
-    setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+    setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
+                             dl, Ty, Arg));
     return 0;
   }
   case Intrinsic::ctlz: {
     SDValue Arg = getValue(I.getArgOperand(0));
+    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
     EVT Ty = Arg.getValueType();
-    setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+    setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
+                             dl, Ty, Arg));
     return 0;
   }
   case Intrinsic::ctpop: {
@@ -5052,7 +5060,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::gcread:
   case Intrinsic::gcwrite:
     llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
-    return 0;
   case Intrinsic::flt_rounds:
     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
     return 0;
@@ -5064,7 +5071,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
 
   case Intrinsic::trap: {
-    StringRef TrapFuncName = getTrapFunctionName();
+    StringRef TrapFuncName = TM.Options.getTrapFunctionName();
     if (TrapFuncName.empty()) {
       DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
       return 0;
@@ -5073,25 +5080,36 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     std::pair<SDValue, SDValue> Result =
       TLI.LowerCallTo(getRoot(), I.getType(),
                  false, false, false, false, 0, CallingConv::C,
-                 /*isTailCall=*/false, /*isReturnValueUsed=*/true,
+                 /*isTailCall=*/false,
+                 /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                  DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
                  Args, DAG, getCurDebugLoc());
     DAG.setRoot(Result.second);
     return 0;
   }
   case Intrinsic::uadd_with_overflow:
-    return implVisitAluOverflow(I, ISD::UADDO);
   case Intrinsic::sadd_with_overflow:
-    return implVisitAluOverflow(I, ISD::SADDO);
   case Intrinsic::usub_with_overflow:
-    return implVisitAluOverflow(I, ISD::USUBO);
   case Intrinsic::ssub_with_overflow:
-    return implVisitAluOverflow(I, ISD::SSUBO);
   case Intrinsic::umul_with_overflow:
-    return implVisitAluOverflow(I, ISD::UMULO);
-  case Intrinsic::smul_with_overflow:
-    return implVisitAluOverflow(I, ISD::SMULO);
+  case Intrinsic::smul_with_overflow: {
+    ISD::NodeType Op;
+    switch (Intrinsic) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
+    case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
+    case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
+    case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
+    case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
+    case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
+    }
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
 
+    SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+    setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+    return 0;
+  }
   case Intrinsic::prefetch: {
     SDValue Ops[5];
     unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
@@ -5226,7 +5244,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
 
   // If there's a possibility that fast-isel has already selected some amount
   // of the current basic block, don't emit a tail call.
-  if (isTailCall && EnableFastISel)
+  if (isTailCall && TM.Options.EnableFastISel)
     isTailCall = false;
 
   std::pair<SDValue,SDValue> Result =
@@ -5236,6 +5254,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                     CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
                     CS.getCallingConv(),
                     isTailCall,
+                    CS.doesNotReturn(),
                     !CS.getInstruction()->use_empty(),
                     Callee, Args, DAG, getCurDebugLoc());
   assert((isTailCall || Result.second.getNode()) &&
@@ -5264,7 +5283,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
                               Add,
                   MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
-                              false, false, 1);
+                              false, false, false, 1);
       Values[i] = L;
       Chains[i] = L.getValue(1);
     }
@@ -5375,7 +5394,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
   SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
                                         Ptr, MachinePointerInfo(PtrVal),
                                         false /*volatile*/,
-                                        false /*nontemporal*/, 1 /* align=1 */);
+                                        false /*nontemporal*/, 
+                                        false /*isinvariant*/, 1 /* align=1 */);
 
   if (!ConstantMemory)
     Builder.PendingLoads.push_back(LoadVal.getValue(1));
@@ -5470,23 +5490,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     return;
   }
 
-  // See if any floating point values are being passed to this function. This is
-  // used to emit an undefined reference to fltused on Windows.
-  FunctionType *FT =
-    cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-  if (FT->isVarArg() &&
-      !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
-    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
-      Type* T = I.getArgOperand(i)->getType();
-      for (po_iterator<Type*> i = po_begin(T), e = po_end(T);
-           i != e; ++i) {
-        if (!i->isFloatingPointTy()) continue;
-        MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
-        break;
-      }
-    }
-  }
+  ComputeUsesVAFloatArgument(I, &MMI);
 
   const char *RenameFn = 0;
   if (Function *F = I.getCalledFunction()) {
@@ -5509,7 +5514,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
     // can't be a library call.
     if (!F->hasLocalLinkage() && F->hasName()) {
       StringRef Name = F->getName();
-      if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+      if ((LibInfo->has(LibFunc::copysign) && Name == "copysign") ||
+          (LibInfo->has(LibFunc::copysignf) && Name == "copysignf") ||
+          (LibInfo->has(LibFunc::copysignl) && Name == "copysignl")) {
         if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5520,7 +5527,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    LHS.getValueType(), LHS, RHS));
           return;
         }
-      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+      } else if ((LibInfo->has(LibFunc::fabs) && Name == "fabs") ||
+                 (LibInfo->has(LibFunc::fabsf) && Name == "fabsf") ||
+                 (LibInfo->has(LibFunc::fabsl) && Name == "fabsl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType()) {
@@ -5529,7 +5538,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+      } else if ((LibInfo->has(LibFunc::sin) && Name == "sin") ||
+                 (LibInfo->has(LibFunc::sinf) && Name == "sinf") ||
+                 (LibInfo->has(LibFunc::sinl) && Name == "sinl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5539,7 +5550,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+      } else if ((LibInfo->has(LibFunc::cos) && Name == "cos") ||
+                 (LibInfo->has(LibFunc::cosf) && Name == "cosf") ||
+                 (LibInfo->has(LibFunc::cosl) && Name == "cosl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5549,7 +5562,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
-      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+      } else if ((LibInfo->has(LibFunc::sqrt) && Name == "sqrt") ||
+                 (LibInfo->has(LibFunc::sqrtf) && Name == "sqrtf") ||
+                 (LibInfo->has(LibFunc::sqrtl) && Name == "sqrtl")) {
         if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
             I.getArgOperand(0)->getType()->isFloatingPointTy() &&
             I.getType() == I.getArgOperand(0)->getType() &&
@@ -5559,6 +5574,85 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
                                    Tmp.getValueType(), Tmp));
           return;
         }
+      } else if ((LibInfo->has(LibFunc::floor) && Name == "floor") ||
+                 (LibInfo->has(LibFunc::floorf) && Name == "floorf") ||
+                 (LibInfo->has(LibFunc::floorl) && Name == "floorl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FFLOOR, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::nearbyint) && Name == "nearbyint") ||
+                 (LibInfo->has(LibFunc::nearbyintf) && Name == "nearbyintf") ||
+                 (LibInfo->has(LibFunc::nearbyintl) && Name == "nearbyintl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FNEARBYINT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::ceil) && Name == "ceil") ||
+                 (LibInfo->has(LibFunc::ceilf) && Name == "ceilf") ||
+                 (LibInfo->has(LibFunc::ceill) && Name == "ceill")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FCEIL, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::rint) && Name == "rint") ||
+                 (LibInfo->has(LibFunc::rintf) && Name == "rintf") ||
+                 (LibInfo->has(LibFunc::rintl) && Name == "rintl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FRINT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::trunc) && Name == "trunc") ||
+                 (LibInfo->has(LibFunc::truncf) && Name == "truncf") ||
+                 (LibInfo->has(LibFunc::truncl) && Name == "truncl")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FTRUNC, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::log2) && Name == "log2") ||
+                 (LibInfo->has(LibFunc::log2f) && Name == "log2f") ||
+                 (LibInfo->has(LibFunc::log2l) && Name == "log2l")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FLOG2, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if ((LibInfo->has(LibFunc::exp2) && Name == "exp2") ||
+                 (LibInfo->has(LibFunc::exp2f) && Name == "exp2f") ||
+                 (LibInfo->has(LibFunc::exp2l) && Name == "exp2l")) {
+        if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FEXP2, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
       } else if (Name == "memcmp") {
         if (visitMemCmpCall(I))
           return;
@@ -5596,22 +5690,6 @@ public:
     : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
   }
 
-  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
-  /// busy in OutputRegs/InputRegs.
-  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
-                         std::set<unsigned> &OutputRegs,
-                         std::set<unsigned> &InputRegs,
-                         const TargetRegisterInfo &TRI) const {
-    if (isOutReg) {
-      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
-        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
-    }
-    if (isInReg) {
-      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
-        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
-    }
-  }
-
   /// getCallOperandValEVT - Return the EVT of the Value* that this operand
   /// corresponds to.  If there is no Value* for this operand, it returns
   /// MVT::Other.
@@ -5659,18 +5737,6 @@ public:
 
     return TLI.getValueType(OpTy, true);
   }
-
-private:
-  /// MarkRegAndAliases - Mark the specified register and all aliases in the
-  /// specified set.
-  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
-                                const TargetRegisterInfo &TRI) {
-    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
-    Regs.insert(Reg);
-    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
-      for (; *Aliases; ++Aliases)
-        Regs.insert(*Aliases);
-  }
 };
 
 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
@@ -5684,39 +5750,13 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
 /// allocation.  This produces generally horrible, but correct, code.
 ///
 ///   OpInfo describes the operand.
-///   Input and OutputRegs are the set of already allocated physical registers.
 ///
 static void GetRegistersForValue(SelectionDAG &DAG,
                                  const TargetLowering &TLI,
                                  DebugLoc DL,
-                                 SDISelAsmOperandInfo &OpInfo,
-                                 std::set<unsigned> &OutputRegs,
-                                 std::set<unsigned> &InputRegs) {
+                                 SDISelAsmOperandInfo &OpInfo) {
   LLVMContext &Context = *DAG.getContext();
 
-  // Compute whether this value requires an input register, an output register,
-  // or both.
-  bool isOutReg = false;
-  bool isInReg = false;
-  switch (OpInfo.Type) {
-  case InlineAsm::isOutput:
-    isOutReg = true;
-
-    // If there is an input constraint that matches this, we need to reserve
-    // the input register so no other inputs allocate to it.
-    isInReg = OpInfo.hasMatchingInput();
-    break;
-  case InlineAsm::isInput:
-    isInReg = true;
-    isOutReg = false;
-    break;
-  case InlineAsm::isClobber:
-    isOutReg = true;
-    isInReg = true;
-    break;
-  }
-
-
   MachineFunction &MF = DAG.getMachineFunction();
   SmallVector<unsigned, 4> Regs;
 
@@ -5790,8 +5830,6 @@ static void GetRegistersForValue(SelectionDAG &DAG,
     }
 
     OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
-    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
     return;
   }
 
@@ -5822,8 +5860,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
   /// ConstraintOperands - Information about all of the constraints.
   SDISelAsmOperandInfoVector ConstraintOperands;
 
-  std::set<unsigned> OutputRegs, InputRegs;
-
   TargetLowering::AsmOperandInfoVector
     TargetConstraints = TLI.ParseConstraints(CS);
 
@@ -5956,7 +5992,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // constant pool entry to get its address.
       const Value *OpVal = OpInfo.CallOperandVal;
       if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
-          isa<ConstantVector>(OpVal)) {
+          isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
         OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
                                                  TLI.getPointerTy());
       } else {
@@ -5985,8 +6021,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // If this constraint is for a specific register, allocate it before
     // anything else.
     if (OpInfo.ConstraintType == TargetLowering::C_Register)
-      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
-                           InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
   }
 
   // Second pass - Loop over all of the operands, assigning virtual or physregs
@@ -5997,8 +6032,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // C_Register operands have already been allocated, Other/Memory don't need
     // to be.
     if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
-      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs,
-                           InputRegs);
+      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
   }
 
   // AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -6052,9 +6086,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
       // Copy the output from the appropriate register.  Find a register that
       // we can use.
-      if (OpInfo.AssignedRegs.Regs.empty())
-        report_fatal_error("Couldn't allocate output reg for constraint '" +
-                           Twine(OpInfo.ConstraintCode) + "'!");
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        LLVMContext &Ctx = *DAG.getContext();
+        Ctx.emitError(CS.getInstruction(),  
+                      "couldn't allocate output register for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'");
+        break;
+      }
 
       // If this is an indirect operand, store through the pointer after the
       // asm.
@@ -6154,9 +6192,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         std::vector<SDValue> Ops;
         TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
                                          Ops, DAG);
-        if (Ops.empty())
-          report_fatal_error("Invalid operand for inline asm constraint '" +
-                             Twine(OpInfo.ConstraintCode) + "'!");
+        if (Ops.empty()) {
+          LLVMContext &Ctx = *DAG.getContext();
+          Ctx.emitError(CS.getInstruction(),
+                        "invalid operand for inline asm constraint '" +
+                        Twine(OpInfo.ConstraintCode) + "'");
+          break;
+        }
 
         // Add information to the INLINEASM node to know about this input.
         unsigned ResOpType =
@@ -6187,9 +6229,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
              "Don't know how to handle indirect register inputs yet!");
 
       // Copy the input into the appropriate registers.
-      if (OpInfo.AssignedRegs.Regs.empty())
-        report_fatal_error("Couldn't allocate input reg for constraint '" +
-                           Twine(OpInfo.ConstraintCode) + "'!");
+      if (OpInfo.AssignedRegs.Regs.empty()) {
+        LLVMContext &Ctx = *DAG.getContext();
+        Ctx.emitError(CS.getInstruction(), 
+                      "couldn't allocate input reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'");
+        break;
+      }
 
       OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
                                         Chain, &Flag);
@@ -6327,7 +6373,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
                             bool RetSExt, bool RetZExt, bool isVarArg,
                             bool isInreg, unsigned NumFixedArgs,
                             CallingConv::ID CallConv, bool isTailCall,
-                            bool isReturnValueUsed,
+                            bool doesNotRet, bool isReturnValueUsed,
                             SDValue Callee,
                             ArgListTy &Args, SelectionDAG &DAG,
                             DebugLoc dl) const {
@@ -6424,7 +6470,7 @@ TargetLowering::LowerCallTo(SDValue Chain, Type *RetTy,
   }
 
   SmallVector<SDValue, 4> InVals;
-  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, doesNotRet, isTailCall,
                     Outs, OutVals, Ins, dl, DAG, InVals);
 
   // Verify that the target's LowerCall behaved as expected.
@@ -6493,7 +6539,6 @@ void TargetLowering::LowerOperationWrapper(SDNode *N,
 
 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("LowerOperation not implemented for this target!");
-  return SDValue();
 }
 
 void
@@ -6515,10 +6560,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the
 /// entry block, return true.  This includes arguments used by switches, since
 /// the switch may expand into multiple basic blocks.
-static bool isOnlyUsedInEntryBlock(const Argument *A) {
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
   // With FastISel active, we may be splitting blocks, so force creation
   // of virtual registers for all non-dead arguments.
-  if (EnableFastISel)
+  if (FastISel)
     return A->use_empty();
 
   const BasicBlock *Entry = A->getParent()->begin();
@@ -6708,7 +6753,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
                                      SDB->getCurDebugLoc());
 
     SDB->setValue(I, Res);
-    if (!EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
       if (LoadSDNode *LNode = 
           dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
         if (FrameIndexSDNode *FI =
@@ -6718,7 +6763,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
 
     // If this argument is live outside of the entry block, insert a copy from
     // wherever we got it to the vreg that other BB's will reference it as.
-    if (!EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
       // If we can, though, try to skip creating an unnecessary vreg.
       // FIXME: This isn't very clean... it would be nice to make this more
       // general.  It's also subtly incompatible with the hacks FastISel
@@ -6729,7 +6774,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
         continue;
       }
     }
-    if (!isOnlyUsedInEntryBlock(I)) {
+    if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
       FuncInfo->InitializeRegForValue(I);
       SDB->CopyToExportRegsIfNeeded(I);
     }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0a21ca3472ca..8393b414926a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -67,11 +67,11 @@ class SIToFPInst;
 class StoreInst;
 class SwitchInst;
 class TargetData;
+class TargetLibraryInfo;
 class TargetLowering;
 class TruncInst;
 class UIToFPInst;
 class UnreachableInst;
-class UnwindInst;
 class VAArgInst;
 class ZExtInst;
 
@@ -129,13 +129,13 @@ private:
   /// Case - A struct to record the Value for a switch case, and the
   /// case's target basic block.
   struct Case {
-    Constant* Low;
-    Constant* High;
+    const Constant *Low;
+    const Constant *High;
     MachineBasicBlock* BB;
     uint32_t ExtraWeight;
 
     Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
-    Case(Constant* low, Constant* high, MachineBasicBlock* bb,
+    Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
          uint32_t extraweight) : Low(low), High(high), BB(bb),
          ExtraWeight(extraweight) { }
 
@@ -294,6 +294,7 @@ public:
   SelectionDAG &DAG;
   const TargetData *TD;
   AliasAnalysis *AA;
+  const TargetLibraryInfo *LibInfo;
 
   /// SwitchCases - Vector of CaseBlock structures used to communicate
   /// SwitchInst code generation information.
@@ -338,7 +339,8 @@ public:
       HasTailCall(false), Context(dag.getContext()) {
   }
 
-  void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+  void init(GCFunctionInfo *gfi, AliasAnalysis &aa,
+            const TargetLibraryInfo *li);
 
   /// clear - Clear out the current SelectionDAG and the associated
   /// state and prepare this SelectionDAGBuilder object to be used
@@ -451,7 +453,8 @@ private:
                                 MachineBasicBlock* Default,
                                 MachineBasicBlock *SwitchBB);
 
-  uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+  uint32_t getEdgeWeight(const MachineBasicBlock *Src,
+                         const MachineBasicBlock *Dst) const;
   void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
                               uint32_t Weight = 0);
 public:
@@ -471,7 +474,6 @@ private:
   // These all get lowered before this pass.
   void visitInvoke(const InvokeInst &I);
   void visitResume(const ResumeInst &I);
-  void visitUnwind(const UnwindInst &I);
 
   void visitBinary(const User &I, unsigned OpCode);
   void visitShift(const User &I, unsigned Opcode);
@@ -554,8 +556,6 @@ private:
   void visitUserOp2(const Instruction &I) {
     llvm_unreachable("UserOp2 should not exist at instruction selection time!");
   }
-  
-  const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
 
   void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
new file mode 100644
index 000000000000..f981afb437b0
--- /dev/null
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -0,0 +1,631 @@
+//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::dump method and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+  switch (getOpcode()) {
+  default:
+    if (getOpcode() < ISD::BUILTIN_OP_END)
+      return "<<Unknown DAG Node>>";
+    if (isMachineOpcode()) {
+      if (G)
+        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+          if (getMachineOpcode() < TII->getNumOpcodes())
+            return TII->getName(getMachineOpcode());
+      return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+    }
+    if (G) {
+      const TargetLowering &TLI = G->getTargetLoweringInfo();
+      const char *Name = TLI.getTargetNodeName(getOpcode());
+      if (Name) return Name;
+      return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+    }
+    return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+  case ISD::DELETED_NODE:               return "<<Deleted Node!>>";
+#endif
+  case ISD::PREFETCH:                   return "Prefetch";
+  case ISD::MEMBARRIER:                 return "MemBarrier";
+  case ISD::ATOMIC_FENCE:               return "AtomicFence";
+  case ISD::ATOMIC_CMP_SWAP:            return "AtomicCmpSwap";
+  case ISD::ATOMIC_SWAP:                return "AtomicSwap";
+  case ISD::ATOMIC_LOAD_ADD:            return "AtomicLoadAdd";
+  case ISD::ATOMIC_LOAD_SUB:            return "AtomicLoadSub";
+  case ISD::ATOMIC_LOAD_AND:            return "AtomicLoadAnd";
+  case ISD::ATOMIC_LOAD_OR:             return "AtomicLoadOr";
+  case ISD::ATOMIC_LOAD_XOR:            return "AtomicLoadXor";
+  case ISD::ATOMIC_LOAD_NAND:           return "AtomicLoadNand";
+  case ISD::ATOMIC_LOAD_MIN:            return "AtomicLoadMin";
+  case ISD::ATOMIC_LOAD_MAX:            return "AtomicLoadMax";
+  case ISD::ATOMIC_LOAD_UMIN:           return "AtomicLoadUMin";
+  case ISD::ATOMIC_LOAD_UMAX:           return "AtomicLoadUMax";
+  case ISD::ATOMIC_LOAD:                return "AtomicLoad";
+  case ISD::ATOMIC_STORE:               return "AtomicStore";
+  case ISD::PCMARKER:                   return "PCMarker";
+  case ISD::READCYCLECOUNTER:           return "ReadCycleCounter";
+  case ISD::SRCVALUE:                   return "SrcValue";
+  case ISD::MDNODE_SDNODE:              return "MDNode";
+  case ISD::EntryToken:                 return "EntryToken";
+  case ISD::TokenFactor:                return "TokenFactor";
+  case ISD::AssertSext:                 return "AssertSext";
+  case ISD::AssertZext:                 return "AssertZext";
+
+  case ISD::BasicBlock:                 return "BasicBlock";
+  case ISD::VALUETYPE:                  return "ValueType";
+  case ISD::Register:                   return "Register";
+  case ISD::RegisterMask:               return "RegisterMask";
+  case ISD::Constant:                   return "Constant";
+  case ISD::ConstantFP:                 return "ConstantFP";
+  case ISD::GlobalAddress:              return "GlobalAddress";
+  case ISD::GlobalTLSAddress:           return "GlobalTLSAddress";
+  case ISD::FrameIndex:                 return "FrameIndex";
+  case ISD::JumpTable:                  return "JumpTable";
+  case ISD::GLOBAL_OFFSET_TABLE:        return "GLOBAL_OFFSET_TABLE";
+  case ISD::RETURNADDR:                 return "RETURNADDR";
+  case ISD::FRAMEADDR:                  return "FRAMEADDR";
+  case ISD::FRAME_TO_ARGS_OFFSET:       return "FRAME_TO_ARGS_OFFSET";
+  case ISD::EXCEPTIONADDR:              return "EXCEPTIONADDR";
+  case ISD::LSDAADDR:                   return "LSDAADDR";
+  case ISD::EHSELECTION:                return "EHSELECTION";
+  case ISD::EH_RETURN:                  return "EH_RETURN";
+  case ISD::EH_SJLJ_SETJMP:             return "EH_SJLJ_SETJMP";
+  case ISD::EH_SJLJ_LONGJMP:            return "EH_SJLJ_LONGJMP";
+  case ISD::ConstantPool:               return "ConstantPool";
+  case ISD::ExternalSymbol:             return "ExternalSymbol";
+  case ISD::BlockAddress:               return "BlockAddress";
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+    unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+    if (IID < Intrinsic::num_intrinsics)
+      return Intrinsic::getName((Intrinsic::ID)IID);
+    else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+      return TII->getName(IID);
+    llvm_unreachable("Invalid intrinsic ID");
+  }
+
+  case ISD::BUILD_VECTOR:               return "BUILD_VECTOR";
+  case ISD::TargetConstant:             return "TargetConstant";
+  case ISD::TargetConstantFP:           return "TargetConstantFP";
+  case ISD::TargetGlobalAddress:        return "TargetGlobalAddress";
+  case ISD::TargetGlobalTLSAddress:     return "TargetGlobalTLSAddress";
+  case ISD::TargetFrameIndex:           return "TargetFrameIndex";
+  case ISD::TargetJumpTable:            return "TargetJumpTable";
+  case ISD::TargetConstantPool:         return "TargetConstantPool";
+  case ISD::TargetExternalSymbol:       return "TargetExternalSymbol";
+  case ISD::TargetBlockAddress:         return "TargetBlockAddress";
+
+  case ISD::CopyToReg:                  return "CopyToReg";
+  case ISD::CopyFromReg:                return "CopyFromReg";
+  case ISD::UNDEF:                      return "undef";
+  case ISD::MERGE_VALUES:               return "merge_values";
+  case ISD::INLINEASM:                  return "inlineasm";
+  case ISD::EH_LABEL:                   return "eh_label";
+  case ISD::HANDLENODE:                 return "handlenode";
+
+  // Unary operators
+  case ISD::FABS:                       return "fabs";
+  case ISD::FNEG:                       return "fneg";
+  case ISD::FSQRT:                      return "fsqrt";
+  case ISD::FSIN:                       return "fsin";
+  case ISD::FCOS:                       return "fcos";
+  case ISD::FTRUNC:                     return "ftrunc";
+  case ISD::FFLOOR:                     return "ffloor";
+  case ISD::FCEIL:                      return "fceil";
+  case ISD::FRINT:                      return "frint";
+  case ISD::FNEARBYINT:                 return "fnearbyint";
+  case ISD::FEXP:                       return "fexp";
+  case ISD::FEXP2:                      return "fexp2";
+  case ISD::FLOG:                       return "flog";
+  case ISD::FLOG2:                      return "flog2";
+  case ISD::FLOG10:                     return "flog10";
+
+  // Binary operators
+  case ISD::ADD:                        return "add";
+  case ISD::SUB:                        return "sub";
+  case ISD::MUL:                        return "mul";
+  case ISD::MULHU:                      return "mulhu";
+  case ISD::MULHS:                      return "mulhs";
+  case ISD::SDIV:                       return "sdiv";
+  case ISD::UDIV:                       return "udiv";
+  case ISD::SREM:                       return "srem";
+  case ISD::UREM:                       return "urem";
+  case ISD::SMUL_LOHI:                  return "smul_lohi";
+  case ISD::UMUL_LOHI:                  return "umul_lohi";
+  case ISD::SDIVREM:                    return "sdivrem";
+  case ISD::UDIVREM:                    return "udivrem";
+  case ISD::AND:                        return "and";
+  case ISD::OR:                         return "or";
+  case ISD::XOR:                        return "xor";
+  case ISD::SHL:                        return "shl";
+  case ISD::SRA:                        return "sra";
+  case ISD::SRL:                        return "srl";
+  case ISD::ROTL:                       return "rotl";
+  case ISD::ROTR:                       return "rotr";
+  case ISD::FADD:                       return "fadd";
+  case ISD::FSUB:                       return "fsub";
+  case ISD::FMUL:                       return "fmul";
+  case ISD::FDIV:                       return "fdiv";
+  case ISD::FMA:                        return "fma";
+  case ISD::FREM:                       return "frem";
+  case ISD::FCOPYSIGN:                  return "fcopysign";
+  case ISD::FGETSIGN:                   return "fgetsign";
+  case ISD::FPOW:                       return "fpow";
+
+  case ISD::FPOWI:                      return "fpowi";
+  case ISD::SETCC:                      return "setcc";
+  case ISD::SELECT:                     return "select";
+  case ISD::VSELECT:                    return "vselect";
+  case ISD::SELECT_CC:                  return "select_cc";
+  case ISD::INSERT_VECTOR_ELT:          return "insert_vector_elt";
+  case ISD::EXTRACT_VECTOR_ELT:         return "extract_vector_elt";
+  case ISD::CONCAT_VECTORS:             return "concat_vectors";
+  case ISD::INSERT_SUBVECTOR:           return "insert_subvector";
+  case ISD::EXTRACT_SUBVECTOR:          return "extract_subvector";
+  case ISD::SCALAR_TO_VECTOR:           return "scalar_to_vector";
+  case ISD::VECTOR_SHUFFLE:             return "vector_shuffle";
+  case ISD::CARRY_FALSE:                return "carry_false";
+  case ISD::ADDC:                       return "addc";
+  case ISD::ADDE:                       return "adde";
+  case ISD::SADDO:                      return "saddo";
+  case ISD::UADDO:                      return "uaddo";
+  case ISD::SSUBO:                      return "ssubo";
+  case ISD::USUBO:                      return "usubo";
+  case ISD::SMULO:                      return "smulo";
+  case ISD::UMULO:                      return "umulo";
+  case ISD::SUBC:                       return "subc";
+  case ISD::SUBE:                       return "sube";
+  case ISD::SHL_PARTS:                  return "shl_parts";
+  case ISD::SRA_PARTS:                  return "sra_parts";
+  case ISD::SRL_PARTS:                  return "srl_parts";
+
+  // Conversion operators.
+  case ISD::SIGN_EXTEND:                return "sign_extend";
+  case ISD::ZERO_EXTEND:                return "zero_extend";
+  case ISD::ANY_EXTEND:                 return "any_extend";
+  case ISD::SIGN_EXTEND_INREG:          return "sign_extend_inreg";
+  case ISD::TRUNCATE:                   return "truncate";
+  case ISD::FP_ROUND:                   return "fp_round";
+  case ISD::FLT_ROUNDS_:                return "flt_rounds";
+  case ISD::FP_ROUND_INREG:             return "fp_round_inreg";
+  case ISD::FP_EXTEND:                  return "fp_extend";
+
+  case ISD::SINT_TO_FP:                 return "sint_to_fp";
+  case ISD::UINT_TO_FP:                 return "uint_to_fp";
+  case ISD::FP_TO_SINT:                 return "fp_to_sint";
+  case ISD::FP_TO_UINT:                 return "fp_to_uint";
+  case ISD::BITCAST:                    return "bitcast";
+  case ISD::FP16_TO_FP32:               return "fp16_to_fp32";
+  case ISD::FP32_TO_FP16:               return "fp32_to_fp16";
+
+  case ISD::CONVERT_RNDSAT: {
+    switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+    default: llvm_unreachable("Unknown cvt code!");
+    case ISD::CVT_FF:                   return "cvt_ff";
+    case ISD::CVT_FS:                   return "cvt_fs";
+    case ISD::CVT_FU:                   return "cvt_fu";
+    case ISD::CVT_SF:                   return "cvt_sf";
+    case ISD::CVT_UF:                   return "cvt_uf";
+    case ISD::CVT_SS:                   return "cvt_ss";
+    case ISD::CVT_SU:                   return "cvt_su";
+    case ISD::CVT_US:                   return "cvt_us";
+    case ISD::CVT_UU:                   return "cvt_uu";
+    }
+  }
+
+    // Control flow instructions
+  case ISD::BR:                         return "br";
+  case ISD::BRIND:                      return "brind";
+  case ISD::BR_JT:                      return "br_jt";
+  case ISD::BRCOND:                     return "brcond";
+  case ISD::BR_CC:                      return "br_cc";
+  case ISD::CALLSEQ_START:              return "callseq_start";
+  case ISD::CALLSEQ_END:                return "callseq_end";
+
+    // Other operators
+  case ISD::LOAD:                       return "load";
+  case ISD::STORE:                      return "store";
+  case ISD::VAARG:                      return "vaarg";
+  case ISD::VACOPY:                     return "vacopy";
+  case ISD::VAEND:                      return "vaend";
+  case ISD::VASTART:                    return "vastart";
+  case ISD::DYNAMIC_STACKALLOC:         return "dynamic_stackalloc";
+  case ISD::EXTRACT_ELEMENT:            return "extract_element";
+  case ISD::BUILD_PAIR:                 return "build_pair";
+  case ISD::STACKSAVE:                  return "stacksave";
+  case ISD::STACKRESTORE:               return "stackrestore";
+  case ISD::TRAP:                       return "trap";
+
+  // Bit manipulation
+  case ISD::BSWAP:                      return "bswap";
+  case ISD::CTPOP:                      return "ctpop";
+  case ISD::CTTZ:                       return "cttz";
+  case ISD::CTTZ_ZERO_UNDEF:            return "cttz_zero_undef";
+  case ISD::CTLZ:                       return "ctlz";
+  case ISD::CTLZ_ZERO_UNDEF:            return "ctlz_zero_undef";
+
+  // Trampolines
+  case ISD::INIT_TRAMPOLINE:            return "init_trampoline";
+  case ISD::ADJUST_TRAMPOLINE:          return "adjust_trampoline";
+
+  case ISD::CONDCODE:
+    switch (cast<CondCodeSDNode>(this)->get()) {
+    default: llvm_unreachable("Unknown setcc condition!");
+    case ISD::SETOEQ:                   return "setoeq";
+    case ISD::SETOGT:                   return "setogt";
+    case ISD::SETOGE:                   return "setoge";
+    case ISD::SETOLT:                   return "setolt";
+    case ISD::SETOLE:                   return "setole";
+    case ISD::SETONE:                   return "setone";
+
+    case ISD::SETO:                     return "seto";
+    case ISD::SETUO:                    return "setuo";
+    case ISD::SETUEQ:                   return "setue";
+    case ISD::SETUGT:                   return "setugt";
+    case ISD::SETUGE:                   return "setuge";
+    case ISD::SETULT:                   return "setult";
+    case ISD::SETULE:                   return "setule";
+    case ISD::SETUNE:                   return "setune";
+
+    case ISD::SETEQ:                    return "seteq";
+    case ISD::SETGT:                    return "setgt";
+    case ISD::SETGE:                    return "setge";
+    case ISD::SETLT:                    return "setlt";
+    case ISD::SETLE:                    return "setle";
+    case ISD::SETNE:                    return "setne";
+
+    case ISD::SETTRUE:                  return "settrue";
+    case ISD::SETTRUE2:                 return "settrue2";
+    case ISD::SETFALSE:                 return "setfalse";
+    case ISD::SETFALSE2:                return "setfalse2";
+    }
+  }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+  switch (AM) {
+  default:              return "";
+  case ISD::PRE_INC:    return "<pre-inc>";
+  case ISD::PRE_DEC:    return "<pre-dec>";
+  case ISD::POST_INC:   return "<post-inc>";
+  case ISD::POST_DEC:   return "<post-dec>";
+  }
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+  print(dbgs(), G);
+  dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+  OS << (void*)this << ": ";
+
+  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+    if (i) OS << ",";
+    if (getValueType(i) == MVT::Other)
+      OS << "ch";
+    else
+      OS << getValueType(i).getEVTString();
+  }
+  OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+    if (!MN->memoperands_empty()) {
+      OS << "<";
+      OS << "Mem:";
+      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+           e = MN->memoperands_end(); i != e; ++i) {
+        OS << **i;
+        if (llvm::next(i) != e)
+          OS << " ";
+      }
+      OS << ">";
+    }
+  } else if (const ShuffleVectorSDNode *SVN =
+               dyn_cast<ShuffleVectorSDNode>(this)) {
+    OS << "<";
+    for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (i) OS << ",";
+      if (Idx < 0)
+        OS << "u";
+      else
+        OS << Idx;
+    }
+    OS << ">";
+  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+    OS << '<' << CSDN->getAPIntValue() << '>';
+  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+      OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+      OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+    else {
+      OS << "<APFloat(";
+      CSDN->getValueAPF().bitcastToAPInt().dump();
+      OS << ")>";
+    }
+  } else if (const GlobalAddressSDNode *GADN =
+             dyn_cast<GlobalAddressSDNode>(this)) {
+    int64_t offset = GADN->getOffset();
+    OS << '<';
+    WriteAsOperand(OS, GADN->getGlobal());
+    OS << '>';
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = GADN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+    OS << "<" << FIDN->getIndex() << ">";
+  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+    OS << "<" << JTDN->getIndex() << ">";
+    if (unsigned int TF = JTDN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+    int offset = CP->getOffset();
+    if (CP->isMachineConstantPoolEntry())
+      OS << "<" << *CP->getMachineCPVal() << ">";
+    else
+      OS << "<" << *CP->getConstVal() << ">";
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = CP->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+    OS << "<";
+    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+    if (LBB)
+      OS << LBB->getName() << " ";
+    OS << (const void*)BBDN->getBasicBlock() << ">";
+  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+  } else if (const ExternalSymbolSDNode *ES =
+             dyn_cast<ExternalSymbolSDNode>(this)) {
+    OS << "'" << ES->getSymbol() << "'";
+    if (unsigned int TF = ES->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+    if (M->getValue())
+      OS << "<" << M->getValue() << ">";
+    else
+      OS << "<null>";
+  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+    if (MD->getMD())
+      OS << "<" << MD->getMD() << ">";
+    else
+      OS << "<null>";
+  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+    OS << ":" << N->getVT().getEVTString();
+  }
+  else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+    OS << "<" << *LD->getMemOperand();
+
+    bool doExt = true;
+    switch (LD->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD:  OS << ", anyext"; break;
+    case ISD::SEXTLOAD: OS << ", sext"; break;
+    case ISD::ZEXTLOAD: OS << ", zext"; break;
+    }
+    if (doExt)
+      OS << " from " << LD->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(LD->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+    OS << "<" << *ST->getMemOperand();
+
+    if (ST->isTruncatingStore())
+      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(ST->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+    OS << "<" << *M->getMemOperand() << ">";
+  } else if (const BlockAddressSDNode *BA =
+               dyn_cast<BlockAddressSDNode>(this)) {
+    OS << "<";
+    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+    OS << ", ";
+    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+    OS << ">";
+    if (unsigned int TF = BA->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  }
+
+  if (G)
+    if (unsigned Order = G->GetOrdering(this))
+      OS << " [ORD=" << Order << ']';
+
+  if (getNodeId() != -1)
+    OS << " [ID=" << getNodeId() << ']';
+
+  DebugLoc dl = getDebugLoc();
+  if (G && !dl.isUnknown()) {
+    DIScope
+      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+    OS << " dbg:";
+    // Omit the directory, since it's usually long and uninteresting.
+    if (Scope.Verify())
+      OS << Scope.getFilename();
+    else
+      OS << "<unknown>";
+    OS << ':' << dl.getLine();
+    if (dl.getCol() != 0)
+      OS << ':' << dl.getCol();
+  }
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i).getNode()->hasOneUse())
+      DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+    else
+      dbgs() << "\n" << std::string(indent+2, ' ')
+             << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+  dbgs() << '\n';
+  dbgs().indent(indent);
+  N->dump(G);
+}
+
+void SelectionDAG::dump() const {
+  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+       I != E; ++I) {
+    const SDNode *N = I;
+    if (!N->hasOneUse() && N != getRoot().getNode())
+      DumpNodes(N, 2, this);
+  }
+
+  if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+  dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+                       const SelectionDAG *G, VisitedSDNodeSet &once) {
+  if (!once.insert(N))          // If we've been here before, return now.
+    return;
+
+  // Dump the current SDNode, but don't end the line yet.
+  OS.indent(indent);
+  N->printr(OS, G);
+
+  // Having printed this SDNode, walk the children:
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+
+    if (i) OS << ",";
+    OS << " ";
+
+    if (child->getNumOperands() == 0) {
+      // This child has no grandchildren; print it inline right here.
+      child->printr(OS, G);
+      once.insert(child);
+    } else {         // Just the address. FIXME: also print the child's opcode.
+      OS << (void*)child;
+      if (unsigned RN = N->getOperand(i).getResNo())
+        OS << ":" << RN;
+    }
+  }
+
+  OS << "\n";
+
+  // Dump children that have grandchildren on their own line(s).
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+    DumpNodesr(OS, child, indent+2, G, once);
+  }
+}
+
+void SDNode::dumpr() const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+                                  const SelectionDAG *G, unsigned depth,
+                                  unsigned indent) {
+  if (depth == 0)
+    return;
+
+  OS.indent(indent);
+
+  N->print(OS, G);
+
+  if (depth < 1)
+    return;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    // Don't follow chain operands.
+    if (N->getOperand(i).getValueType() == MVT::Other)
+      continue;
+    OS << '\n';
+    printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+  }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+                            unsigned depth) const {
+  printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+  printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  dumprWithDepth(G, 10);
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (i) OS << ", "; else OS << " ";
+    OS << (void*)getOperand(i).getNode();
+    if (unsigned RN = getOperand(i).getResNo())
+      OS << ":" << RN;
+  }
+  print_details(OS, G);
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 68b9146adfe1..605509bd227a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -61,6 +62,80 @@ STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
 STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
 STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
 
+#ifndef NDEBUG
+static cl::opt<bool>
+EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden,
+          cl::desc("Enable extra verbose messages in the \"fast\" "
+                   "instruction selector"));
+  // Terminators
+STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret");
+STATISTIC(NumFastIselFailBr,"Fast isel fails on Br");
+STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch");
+STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr");
+STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke");
+STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume");
+STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable");
+
+  // Standard binary operators...
+STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add");
+STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd");
+STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub");
+STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub");
+STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul");
+STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul");
+STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv");
+STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv");
+STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv");
+STATISTIC(NumFastIselFailURem,"Fast isel fails on URem");
+STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem");
+STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem");
+
+  // Logical operators...
+STATISTIC(NumFastIselFailAnd,"Fast isel fails on And");
+STATISTIC(NumFastIselFailOr,"Fast isel fails on Or");
+STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor");
+
+  // Memory instructions...
+STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca");
+STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load");
+STATISTIC(NumFastIselFailStore,"Fast isel fails on Store");
+STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg");
+STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM");
+STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence");
+STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr");
+
+  // Convert instructions...
+STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc");
+STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt");
+STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt");
+STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc");
+STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt");
+STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI");
+STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI");
+STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP");
+STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP");
+STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr");
+STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt");
+STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast");
+
+  // Other instructions...
+STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp");
+STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp");
+STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI");
+STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select");
+STATISTIC(NumFastIselFailCall,"Fast isel fails on Call");
+STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl");
+STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr");
+STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr");
+STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg");
+STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement");
+STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement");
+STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector");
+STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue");
+STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue");
+STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad");
+#endif
+
 static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
@@ -142,14 +217,15 @@ namespace llvm {
                                              CodeGenOpt::Level OptLevel) {
     const TargetLowering &TLI = IS->getTargetLowering();
 
-    if (OptLevel == CodeGenOpt::None)
+    if (OptLevel == CodeGenOpt::None ||
+        TLI.getSchedulingPreference() == Sched::Source)
       return createSourceListDAGScheduler(IS, OptLevel);
-    if (TLI.getSchedulingPreference() == Sched::Latency)
-      return createTDListDAGScheduler(IS, OptLevel);
     if (TLI.getSchedulingPreference() == Sched::RegPressure)
       return createBURRListDAGScheduler(IS, OptLevel);
     if (TLI.getSchedulingPreference() == Sched::Hybrid)
       return createHybridListDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == Sched::VLIW)
+      return createVLIWDAGScheduler(IS, OptLevel);
     assert(TLI.getSchedulingPreference() == Sched::ILP &&
            "Unknown sched type!");
     return createILPListDAGScheduler(IS, OptLevel);
@@ -174,12 +250,11 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
           "TargetLowering::EmitInstrWithCustomInserter!";
 #endif
   llvm_unreachable(0);
-  return 0;
 }
 
 void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                    SDNode *Node) const {
-  assert(!MI->getDesc().hasPostISelHook() &&
+  assert(!MI->hasPostISelHook() &&
          "If a target marks an instruction with 'hasPostISelHook', "
          "it must implement TargetLowering::AdjustInstrPostInstrSelection!");
 }
@@ -188,11 +263,13 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
 // SelectionDAGISel code
 //===----------------------------------------------------------------------===//
 
+void SelectionDAGISel::ISelUpdater::anchor() { }
+
 SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
                                    CodeGenOpt::Level OL) :
   MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
   FuncInfo(new FunctionLoweringInfo(TLI)),
-  CurDAG(new SelectionDAG(tm)),
+  CurDAG(new SelectionDAG(tm, OL)),
   SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
   GFI(),
   OptLevel(OL),
@@ -200,6 +277,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
     initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
     initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
     initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+    initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
   }
 
 SelectionDAGISel::~SelectionDAGISel() {
@@ -213,6 +291,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
   AU.addPreserved<GCModuleInfo>();
+  AU.addRequired<TargetLibraryInfo>();
   if (UseMBPI && OptLevel != CodeGenOpt::None)
     AU.addRequired<BranchProbabilityInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -258,9 +337,9 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
 
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   // Do some sanity-checking on the command-line options.
-  assert((!EnableFastISelVerbose || EnableFastISel) &&
+  assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
-  assert((!EnableFastISelAbort || EnableFastISel) &&
+  assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
          "-fast-isel-abort requires -fast-isel");
 
   const Function &Fn = *mf.getFunction();
@@ -270,6 +349,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   RegInfo = &MF->getRegInfo();
   AA = &getAnalysis<AliasAnalysis>();
+  LibInfo = &getAnalysis<TargetLibraryInfo>();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
 
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
@@ -284,7 +364,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   else
     FuncInfo->BPI = 0;
 
-  SDB->init(GFI, *AA);
+  SDB->init(GFI, *AA, LibInfo);
 
   SelectAllBasicBlocks(Fn);
 
@@ -348,7 +428,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
                   TII.get(TargetOpcode::DBG_VALUE))
           .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
           .addImm(Offset).addMetadata(Variable);
-        EntryMBB->insertAfter(CopyUseMI, NewMI);
+        MachineBasicBlock::iterator Pos = CopyUseMI;
+        EntryMBB->insertAfter(Pos, NewMI);
       }
     }
   }
@@ -374,7 +455,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   }
 
   // Determine if there is a call to setjmp in the machine function.
-  MF->setCallsSetJmp(Fn.callsFunctionThatReturnsTwice());
+  MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
 
   // Replace forward-declared registers with the registers containing
   // the desired value.
@@ -427,7 +508,6 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
 
   Worklist.push_back(CurDAG->getRoot().getNode());
 
-  APInt Mask;
   APInt KnownZero;
   APInt KnownOne;
 
@@ -458,8 +538,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
       continue;
 
     unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
-    Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
-    CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+    CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne);
     FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
   } while (!Worklist.empty());
 }
@@ -478,8 +557,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 #endif
   {
     BlockNumber = FuncInfo->MBB->getNumber();
-    BlockName = MF->getFunction()->getNameStr() + ":" +
-                FuncInfo->MBB->getBasicBlock()->getNameStr();
+    BlockName = MF->getFunction()->getName().str() + ":" +
+                FuncInfo->MBB->getBasicBlock()->getName().str();
   }
   DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
         << " '" << BlockName << "'\n"; CurDAG->dump());
@@ -489,7 +568,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // Run the DAG combiner in pre-legalize mode.
   {
     NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
-    CurDAG->Combine(Unrestricted, *AA, OptLevel);
+    CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
   }
 
   DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
@@ -517,7 +596,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     {
       NamedRegionTimer T("DAG Combining after legalize types", GroupName,
                          TimePassesIsEnabled);
-      CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+      CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
     }
 
     DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
@@ -542,7 +621,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     {
       NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
                          TimePassesIsEnabled);
-      CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+      CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
     }
 
     DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
@@ -564,7 +643,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // Run the DAG combiner in post-legalize mode.
   {
     NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
-    CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+    CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
   }
 
   DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
@@ -592,7 +671,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   {
     NamedRegionTimer T("Instruction Scheduling", GroupName,
                        TimePassesIsEnabled);
-    Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
+    Scheduler->Run(CurDAG, FuncInfo->MBB);
   }
 
   if (ViewSUnitDAGs) Scheduler->viewGraph();
@@ -603,8 +682,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   {
     NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
 
-    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
-    FuncInfo->InsertPt = Scheduler->InsertPos;
+    // FuncInfo->InsertPt is passed by reference and set to the end of the
+    // scheduled instructions.
+    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt);
   }
 
   // If the block was split, make sure we update any references that are used to
@@ -693,43 +773,18 @@ void SelectionDAGISel::PrepareEHLandingPad() {
 
   // Assign the call site to the landing pad's begin label.
   MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
-    
+
   const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
 
   // Mark exception register as live in.
-  unsigned Reg = TLI.getExceptionAddressRegister();
+  unsigned Reg = TLI.getExceptionPointerRegister();
   if (Reg) MBB->addLiveIn(Reg);
 
   // Mark exception selector register as live in.
   Reg = TLI.getExceptionSelectorRegister();
   if (Reg) MBB->addLiveIn(Reg);
-
-  // FIXME: Hack around an exception handling flaw (PR1508): the personality
-  // function and list of typeids logically belong to the invoke (or, if you
-  // like, the basic block containing the invoke), and need to be associated
-  // with it in the dwarf exception handling tables.  Currently however the
-  // information is provided by an intrinsic (eh.selector) that can be moved
-  // to unexpected places by the optimizers: if the unwind edge is critical,
-  // then breaking it can result in the intrinsics being in the successor of
-  // the landing pad, not the landing pad itself.  This results
-  // in exceptions not being caught because no typeids are associated with
-  // the invoke.  This may not be the only way things can go wrong, but it
-  // is the only way we try to work around for the moment.
-  const BasicBlock *LLVMBB = MBB->getBasicBlock();
-  const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
-
-  if (Br && Br->isUnconditional()) { // Critical edge?
-    BasicBlock::const_iterator I, E;
-    for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
-      if (isa<EHSelectorInst>(I))
-        break;
-
-    if (I == E)
-      // No catch info found - try to extract some from the successor.
-      CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
-  }
 }
 
 /// TryToFoldFastISelLoad - We're checking to see if we can fold the specified
@@ -822,10 +877,90 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
          !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
 }
 
+#ifndef NDEBUG
+// Collect per Instruction statistics for fast-isel misses.  Only those
+// instructions that cause the bail are accounted for.  It does not account for
+// instructions higher in the block.  Thus, summing the per instructions stats
+// will not add up to what is reported by NumFastIselFailures.
+static void collectFailStats(const Instruction *I) {
+  switch (I->getOpcode()) {
+  default: assert (0 && "<Invalid operator> ");
+
+  // Terminators
+  case Instruction::Ret:         NumFastIselFailRet++; return;
+  case Instruction::Br:          NumFastIselFailBr++; return;
+  case Instruction::Switch:      NumFastIselFailSwitch++; return;
+  case Instruction::IndirectBr:  NumFastIselFailIndirectBr++; return;
+  case Instruction::Invoke:      NumFastIselFailInvoke++; return;
+  case Instruction::Resume:      NumFastIselFailResume++; return;
+  case Instruction::Unreachable: NumFastIselFailUnreachable++; return;
+
+  // Standard binary operators...
+  case Instruction::Add:  NumFastIselFailAdd++; return;
+  case Instruction::FAdd: NumFastIselFailFAdd++; return;
+  case Instruction::Sub:  NumFastIselFailSub++; return;
+  case Instruction::FSub: NumFastIselFailFSub++; return;
+  case Instruction::Mul:  NumFastIselFailMul++; return;
+  case Instruction::FMul: NumFastIselFailFMul++; return;
+  case Instruction::UDiv: NumFastIselFailUDiv++; return;
+  case Instruction::SDiv: NumFastIselFailSDiv++; return;
+  case Instruction::FDiv: NumFastIselFailFDiv++; return;
+  case Instruction::URem: NumFastIselFailURem++; return;
+  case Instruction::SRem: NumFastIselFailSRem++; return;
+  case Instruction::FRem: NumFastIselFailFRem++; return;
+
+  // Logical operators...
+  case Instruction::And: NumFastIselFailAnd++; return;
+  case Instruction::Or:  NumFastIselFailOr++; return;
+  case Instruction::Xor: NumFastIselFailXor++; return;
+
+  // Memory instructions...
+  case Instruction::Alloca:        NumFastIselFailAlloca++; return;
+  case Instruction::Load:          NumFastIselFailLoad++; return;
+  case Instruction::Store:         NumFastIselFailStore++; return;
+  case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return;
+  case Instruction::AtomicRMW:     NumFastIselFailAtomicRMW++; return;
+  case Instruction::Fence:         NumFastIselFailFence++; return;
+  case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return;
+
+  // Convert instructions...
+  case Instruction::Trunc:    NumFastIselFailTrunc++; return;
+  case Instruction::ZExt:     NumFastIselFailZExt++; return;
+  case Instruction::SExt:     NumFastIselFailSExt++; return;
+  case Instruction::FPTrunc:  NumFastIselFailFPTrunc++; return;
+  case Instruction::FPExt:    NumFastIselFailFPExt++; return;
+  case Instruction::FPToUI:   NumFastIselFailFPToUI++; return;
+  case Instruction::FPToSI:   NumFastIselFailFPToSI++; return;
+  case Instruction::UIToFP:   NumFastIselFailUIToFP++; return;
+  case Instruction::SIToFP:   NumFastIselFailSIToFP++; return;
+  case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return;
+  case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return;
+  case Instruction::BitCast:  NumFastIselFailBitCast++; return;
+
+  // Other instructions...
+  case Instruction::ICmp:           NumFastIselFailICmp++; return;
+  case Instruction::FCmp:           NumFastIselFailFCmp++; return;
+  case Instruction::PHI:            NumFastIselFailPHI++; return;
+  case Instruction::Select:         NumFastIselFailSelect++; return;
+  case Instruction::Call:           NumFastIselFailCall++; return;
+  case Instruction::Shl:            NumFastIselFailShl++; return;
+  case Instruction::LShr:           NumFastIselFailLShr++; return;
+  case Instruction::AShr:           NumFastIselFailAShr++; return;
+  case Instruction::VAArg:          NumFastIselFailVAArg++; return;
+  case Instruction::ExtractElement: NumFastIselFailExtractElement++; return;
+  case Instruction::InsertElement:  NumFastIselFailInsertElement++; return;
+  case Instruction::ShuffleVector:  NumFastIselFailShuffleVector++; return;
+  case Instruction::ExtractValue:   NumFastIselFailExtractValue++; return;
+  case Instruction::InsertValue:    NumFastIselFailInsertValue++; return;
+  case Instruction::LandingPad:     NumFastIselFailLandingPad++; return;
+  }
+}
+#endif
+
 void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
   // Initialize the Fast-ISel state, if needed.
   FastISel *FastIS = 0;
-  if (EnableFastISel)
+  if (TM.Options.EnableFastISel)
     FastIS = TLI.createFastISel(*FuncInfo);
 
   // Iterate over all basic blocks in the function.
@@ -894,13 +1029,16 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           FastIS->setLastLocalValue(0);
       }
 
+      unsigned NumFastIselRemaining = std::distance(Begin, End);
       // Do FastISel on as many instructions as possible.
       for (; BI != Begin; --BI) {
         const Instruction *Inst = llvm::prior(BI);
 
         // If we no longer require this instruction, skip it.
-        if (isFoldedOrDeadInstruction(Inst, FuncInfo))
+        if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
+          --NumFastIselRemaining;
           continue;
+        }
 
         // Bottom-up: reset the insert pos at the top, after any local-value
         // instructions.
@@ -908,6 +1046,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
         // Try to select the instruction with FastISel.
         if (FastIS->SelectInstruction(Inst)) {
+          --NumFastIselRemaining;
           ++NumFastIselSuccess;
           // If fast isel succeeded, skip over all the folded instructions, and
           // then see if there is a load right before the selected instructions.
@@ -920,15 +1059,23 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           }
           if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) &&
               BeforeInst->hasOneUse() &&
-              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS))
+              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), Inst, FastIS)) {
             // If we succeeded, don't re-select the load.
             BI = llvm::next(BasicBlock::const_iterator(BeforeInst));
+            --NumFastIselRemaining;
+            ++NumFastIselSuccess;
+          }
           continue;
         }
 
+#ifndef NDEBUG
+        if (EnableFastISelVerbose2)
+          collectFailStats(Inst);
+#endif
+
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(Inst)) {
-          ++NumFastIselFailures;
+
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel missed call: ";
             Inst->dump();
@@ -943,24 +1090,30 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           bool HadTailCall = false;
           SelectBasicBlock(Inst, BI, HadTailCall);
 
+          // Recompute NumFastIselRemaining as Selection DAG instruction
+          // selection may have handled the call, input args, etc.
+          unsigned RemainingNow = std::distance(Begin, BI);
+          NumFastIselFailures += NumFastIselRemaining - RemainingNow;
+
           // If the call was emitted as a tail call, we're done with the block.
           if (HadTailCall) {
             --BI;
             break;
           }
 
+          NumFastIselRemaining = RemainingNow;
           continue;
         }
 
         if (isa<TerminatorInst>(Inst) && !isa<BranchInst>(Inst)) {
           // Don't abort, and use a different message for terminator misses.
-          ++NumFastIselFailures;
+          NumFastIselFailures += NumFastIselRemaining;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel missed terminator: ";
             Inst->dump();
           }
         } else {
-          ++NumFastIselFailures;
+          NumFastIselFailures += NumFastIselRemaining;
           if (EnableFastISelVerbose || EnableFastISelAbort) {
             dbgs() << "FastISel miss: ";
             Inst->dump();
@@ -1289,7 +1442,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
   APInt NeededMask = DesiredMask & ~ActualMask;
 
   APInt KnownZero, KnownOne;
-  CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+  CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne);
 
   // If all the missing bits in the or are already known to be set, match!
   if ((NeededMask & KnownOne) == NeededMask)
@@ -2025,6 +2178,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
   case ISD::EntryToken:       // These nodes remain the same.
   case ISD::BasicBlock:
   case ISD::Register:
+  case ISD::RegisterMask:
   //case ISD::VALUETYPE:
   //case ISD::CONDCODE:
   case ISD::HANDLENODE:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index cd1647b17b9b..6cde05aea82a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -19,7 +19,6 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -28,7 +27,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
 using namespace llvm;
 
 namespace llvm {
@@ -148,7 +146,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
 void SelectionDAG::viewGraph(const std::string &Title) {
 // This code is only for debugging!
 #ifndef NDEBUG
-  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
             false, Title);
 #else
   errs() << "SelectionDAG::viewGraph is only available in debug builds on "
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 907d8d9da1af..09a2b1f3d7a5 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -36,31 +36,9 @@ using namespace llvm;
 /// - the promotion of vector elements. This feature is disabled by default
 /// and only enabled using this flag.
 static cl::opt<bool>
-AllowPromoteIntElem("promote-elements", cl::Hidden,
+AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true),
   cl::desc("Allow promotion of integer vector element types"));
 
-namespace llvm {
-TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
-  bool isLocal = GV->hasLocalLinkage();
-  bool isDeclaration = GV->isDeclaration();
-  // FIXME: what should we do for protected and internal visibility?
-  // For variables, is internal different from hidden?
-  bool isHidden = GV->hasHiddenVisibility();
-
-  if (reloc == Reloc::PIC_) {
-    if (isLocal || isHidden)
-      return TLSModel::LocalDynamic;
-    else
-      return TLSModel::GeneralDynamic;
-  } else {
-    if (!isDeclaration || isHidden)
-      return TLSModel::LocalExec;
-    else
-      return TLSModel::InitialExec;
-  }
-}
-}
-
 /// InitLibcallNames - Set default libcall names.
 ///
 static void InitLibcallNames(const char **Names) {
@@ -572,21 +550,42 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   // ConstantFP nodes default to expand.  Targets can either change this to
   // Legal, in which case all fp constants are legal, or use isFPImmLegal()
   // to optimize expansions for certain constants.
+  setOperationAction(ISD::ConstantFP, MVT::f16, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
   setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
 
   // These library functions default to expand.
-  setOperationAction(ISD::FLOG , MVT::f64, Expand);
-  setOperationAction(ISD::FLOG2, MVT::f64, Expand);
-  setOperationAction(ISD::FLOG10,MVT::f64, Expand);
-  setOperationAction(ISD::FEXP , MVT::f64, Expand);
-  setOperationAction(ISD::FEXP2, MVT::f64, Expand);
-  setOperationAction(ISD::FLOG , MVT::f32, Expand);
-  setOperationAction(ISD::FLOG2, MVT::f32, Expand);
-  setOperationAction(ISD::FLOG10,MVT::f32, Expand);
-  setOperationAction(ISD::FEXP , MVT::f32, Expand);
-  setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f16, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f16, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f16, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f16, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f16, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f16, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f16, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f16, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f16, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f32, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f32, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f32, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f32, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f32, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG ,  MVT::f64, Expand);
+  setOperationAction(ISD::FLOG2,  MVT::f64, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f64, Expand);
+  setOperationAction(ISD::FEXP ,  MVT::f64, Expand);
+  setOperationAction(ISD::FEXP2,  MVT::f64, Expand);
+  setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::f64, Expand);
+  setOperationAction(ISD::FRINT,  MVT::f64, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
 
   // Default ISD::TRAP to expand (which turns it into abort).
   setOperationAction(ISD::TRAP, MVT::Other, Expand);
@@ -610,7 +609,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   ExceptionSelectorRegister = 0;
   BooleanContents = UndefinedBooleanContent;
   BooleanVectorContents = UndefinedBooleanContent;
-  SchedPreferenceInfo = Sched::Latency;
+  SchedPreferenceInfo = Sched::ILP;
   JumpBufSize = 0;
   JumpBufAlignment = 0;
   MinFunctionAlignment = 0;
@@ -1080,8 +1079,12 @@ unsigned TargetLowering::getJumpTableEncoding() const {
 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
                                                  SelectionDAG &DAG) const {
   // If our PIC model is GP relative, use the global offset table as the base.
-  if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
+  unsigned JTEncoding = getJumpTableEncoding();
+
+  if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
+      (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
     return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+
   return Table;
 }
 
@@ -1223,7 +1226,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (Depth != 0) {
       // If not at the root, Just compute the KnownZero/KnownOne bits to
       // simplify things downstream.
-      TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+      TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
       return false;
     }
     // If this is the root being simplified, allow it to have multiple uses,
@@ -1242,8 +1245,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   switch (Op.getOpcode()) {
   case ISD::Constant:
     // We know all of the bits for a constant!
-    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
-    KnownZero = ~KnownOne & NewMask;
+    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
+    KnownZero = ~KnownOne;
     return false;   // Don't fall through, will infinitely loop.
   case ISD::AND:
     // If the RHS is a constant, check to see if the LHS would be zero without
@@ -1253,8 +1256,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       APInt LHSZero, LHSOne;
       // Do not increment Depth here; that can cause an infinite loop.
-      TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
-                                LHSZero, LHSOne, Depth);
+      TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
       // If the LHS already has zeros where RHSC does, this and is dead.
       if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
         return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1473,9 +1475,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
         SDValue InnerOp = InOp.getNode()->getOperand(0);
         EVT InnerVT = InnerOp.getValueType();
-        if ((APInt::getHighBitsSet(BitWidth,
-                                   BitWidth - InnerVT.getSizeInBits()) &
-               DemandedMask) == 0 &&
+        unsigned InnerBits = InnerVT.getSizeInBits();
+        if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 &&
             isTypeDesirableForOp(ISD::SHL, InnerVT)) {
           EVT ShTy = getShiftAmountTy(InnerVT);
           if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
@@ -1545,7 +1546,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // always convert this into a logical shr, even if the shift amount is
     // variable.  The low bit of the shift cannot be an input sign bit unless
     // the shift amount is >= the size of the datatype, which is undefined.
-    if (DemandedMask == 1)
+    if (NewMask == 1)
       return TLO.CombineTo(Op,
                            TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
                                            Op.getOperand(0), Op.getOperand(1)));
@@ -1588,23 +1589,40 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     }
     break;
   case ISD::SIGN_EXTEND_INREG: {
-    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+    APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
+    // If we only care about the highest bit, don't bother shifting right.
+    if (MsbMask == DemandedMask) {
+      unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+      SDValue InOp = Op.getOperand(0);
+
+      // Compute the correct shift amount type, which must be getShiftAmountTy
+      // for scalar types after legalization.
+      EVT ShiftAmtTy = Op.getValueType();
+      if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
+        ShiftAmtTy = getShiftAmountTy(ShiftAmtTy);
+
+      SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, ShiftAmtTy);
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
+                                            Op.getValueType(), InOp, ShiftAmt));
+    }
 
     // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
     APInt NewBits =
       APInt::getHighBitsSet(BitWidth,
-                            BitWidth - EVT.getScalarType().getSizeInBits());
+                            BitWidth - ExVT.getScalarType().getSizeInBits());
 
     // If none of the extended bits are demanded, eliminate the sextinreg.
     if ((NewBits & NewMask) == 0)
       return TLO.CombineTo(Op, Op.getOperand(0));
 
     APInt InSignBit =
-      APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
+      APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
     APInt InputDemandedBits =
       APInt::getLowBitsSet(BitWidth,
-                           EVT.getScalarType().getSizeInBits()) &
+                           ExVT.getScalarType().getSizeInBits()) &
       NewMask;
 
     // Since the sign extended bits are demanded, we know that the sign
@@ -1622,7 +1640,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // If the input sign bit is known zero, convert this into a zero extension.
     if (KnownZero.intersects(InSignBit))
       return TLO.CombineTo(Op,
-                           TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+                          TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
 
     if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
       KnownOne |= NewBits;
@@ -1688,11 +1706,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     // If the sign bit is known one, the top bits match.
     if (KnownOne.intersects(InSignBit)) {
-      KnownOne  |= NewBits;
-      KnownZero &= ~NewBits;
+      KnownOne |= NewBits;
+      assert((KnownZero & NewBits) == 0);
     } else {   // Otherwise, top bits aren't known.
-      KnownOne  &= ~NewBits;
-      KnownZero &= ~NewBits;
+      assert((KnownOne & NewBits) == 0);
+      assert((KnownZero & NewBits) == 0);
     }
     break;
   }
@@ -1783,7 +1801,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::BITCAST:
     // If this is an FP->Int bitcast and if the sign bit is the only
     // thing demanded, turn this into a FGETSIGN.
-    if (!Op.getOperand(0).getValueType().isVector() &&
+    if (!TLO.LegalOperations() &&
+        !Op.getValueType().isVector() &&
+        !Op.getOperand(0).getValueType().isVector() &&
         NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
         Op.getOperand(0).getValueType().isFloatingPoint()) {
       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
@@ -1824,7 +1844,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   // FALL THROUGH
   default:
     // Just use ComputeMaskedBits to compute output bits.
-    TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+    TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
     break;
   }
 
@@ -1840,7 +1860,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 /// in Mask are known to be either zero or one and return them in the
 /// KnownZero/KnownOne bitsets.
 void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                    const APInt &Mask,
                                                     APInt &KnownZero,
                                                     APInt &KnownOne,
                                                     const SelectionDAG &DAG,
@@ -1851,7 +1870,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
           Op.getOpcode() == ISD::INTRINSIC_VOID) &&
          "Should use MaskedValueIsZero if you don't know whether Op"
          " is a target node!");
-  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
 }
 
 /// ComputeNumSignBitsForTargetNode - This method can be implemented by
@@ -1895,9 +1914,8 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
   // Fall back to ComputeMaskedBits to catch other known cases.
   EVT OpVT = Val.getValueType();
   unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
-  APInt Mask = APInt::getAllOnesValue(BitWidth);
   APInt KnownZero, KnownOne;
-  DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+  DAG.ComputeMaskedBits(Val, KnownZero, KnownOne);
   return (KnownZero.countPopulation() == BitWidth - 1) &&
          (KnownOne.countPopulation() == 1);
 }
@@ -2060,7 +2078,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
           unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
           SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
                                 Lod->getPointerInfo().getWithOffset(bestOffset),
-                                        false, false, NewAlign);
+                                        false, false, false, NewAlign);
           return DAG.getSetCC(dl, VT,
                               DAG.getNode(ISD::AND, dl, newVT, NewLoad,
                                       DAG.getConstant(bestMask.trunc(bestWidth),
@@ -2393,8 +2411,15 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
   if (N0 == N1) {
     // We can always fold X == X for integer setcc's.
-    if (N0.getValueType().isInteger())
-      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    if (N0.getValueType().isInteger()) {
+      switch (getBooleanContents(N0.getValueType().isVector())) {
+      case UndefinedBooleanContent: 
+      case ZeroOrOneBooleanContent: 
+        return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+      case ZeroOrNegativeOneBooleanContent:
+        return DAG.getConstant(ISD::isTrueWhenEqual(Cond) ? -1 : 0, VT);
+      }
+    }
     unsigned UOF = ISD::getUnorderedFlavor(Cond);
     if (UOF == 2)   // FP operators that are undefined on NaNs.
       return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
@@ -2428,6 +2453,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
       }
 
+      // If RHS is a legal immediate value for a compare instruction, we need
+      // to be careful about increasing register pressure needlessly.
+      bool LegalRHSImm = false;
+
       if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
         if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
           // Turn (X+C1) == C2 --> X == C2-C1
@@ -2462,25 +2491,33 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                            Cond);
           }
         }
+
+        // Could RHSC fold directly into a compare?
+        if (RHSC->getValueType(0).getSizeInBits() <= 64)
+          LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
       }
 
       // Simplify (X+Z) == X -->  Z == 0
-      if (N0.getOperand(0) == N1)
-        return DAG.getSetCC(dl, VT, N0.getOperand(1),
-                        DAG.getConstant(0, N0.getValueType()), Cond);
-      if (N0.getOperand(1) == N1) {
-        if (DAG.isCommutativeBinOp(N0.getOpcode()))
-          return DAG.getSetCC(dl, VT, N0.getOperand(0),
-                          DAG.getConstant(0, N0.getValueType()), Cond);
-        else if (N0.getNode()->hasOneUse()) {
-          assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
-          // (Z-X) == X  --> Z == X<<1
-          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
-                                     N1,
+      // Don't do this if X is an immediate that can fold into a cmp
+      // instruction and X+Z has other uses. It could be an induction variable
+      // chain, and the transform would increase register pressure.
+      if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
+        if (N0.getOperand(0) == N1)
+          return DAG.getSetCC(dl, VT, N0.getOperand(1),
+                              DAG.getConstant(0, N0.getValueType()), Cond);
+        if (N0.getOperand(1) == N1) {
+          if (DAG.isCommutativeBinOp(N0.getOpcode()))
+            return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                                DAG.getConstant(0, N0.getValueType()), Cond);
+          else if (N0.getNode()->hasOneUse()) {
+            assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+            // (Z-X) == X  --> Z == X<<1
+            SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1,
                        DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
-          if (!DCI.isCalledByLegalizer())
-            DCI.AddToWorklist(SH.getNode());
-          return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+            if (!DCI.isCalledByLegalizer())
+              DCI.AddToWorklist(SH.getNode());
+            return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+          }
         }
       }
     }
@@ -2984,7 +3021,6 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
 /// is.
 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   switch (CT) {
-  default: llvm_unreachable("Unknown constraint type!");
   case TargetLowering::C_Other:
   case TargetLowering::C_Unknown:
     return 0;
@@ -2995,6 +3031,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   case TargetLowering::C_Memory:
     return 3;
   }
+  llvm_unreachable("Invalid constraint type");
 }
 
 /// Examine constraint type and operand type and determine a weight value.
@@ -3242,8 +3279,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.  See:
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
-                                  std::vector<SDNode*>* Created) const {
+SDValue TargetLowering::
+BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+          std::vector<SDNode*>* Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl= N->getDebugLoc();
 
@@ -3258,10 +3296,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
   // Multiply the numerator (operand 0) by the magic value
   // FIXME: We should support doing a MUL in a wider type
   SDValue Q;
-  if (isOperationLegalOrCustom(ISD::MULHS, VT))
+  if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) :
+                            isOperationLegalOrCustom(ISD::MULHS, VT))
     Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
                     DAG.getConstant(magics.m, VT));
-  else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+  else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) :
+                                 isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
     Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
                               N->getOperand(0),
                               DAG.getConstant(magics.m, VT)).getNode(), 1);
@@ -3299,8 +3339,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.  See:
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
-                                  std::vector<SDNode*>* Created) const {
+SDValue TargetLowering::
+BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
+          std::vector<SDNode*>* Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
@@ -3332,9 +3373,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
 
   // Multiply the numerator (operand 0) by the magic value
   // FIXME: We should support doing a MUL in a wider type
-  if (isOperationLegalOrCustom(ISD::MULHU, VT))
+  if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) :
+                            isOperationLegalOrCustom(ISD::MULHU, VT))
     Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT));
-  else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+  else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) :
+                                 isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
     Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
                             DAG.getConstant(magics.m, VT)).getNode(), 1);
   else
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 2609256c8ffa..0016047a134e 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -116,8 +116,7 @@ namespace {
           // Branches and invokes do not escape, only unwind, resume, and return
           // do.
           TerminatorInst *TI = CurBB->getTerminator();
-          if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI) &&
-              !isa<ResumeInst>(TI))
+          if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
             continue;
 
           Builder.SetInsertPoint(TI->getParent(), TI);
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 160f38f69236..21ae2f5e56eb 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -93,6 +93,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
   }
   AU.addPreserved<MachineLoopInfo>();
   AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<TargetPassConfig>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -124,7 +125,7 @@ MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
 }
 
 bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
-  return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+  return (MBB && !MBB->empty() && MBB->back().isReturn());
 }
 
 // Initialize shrink wrapping DFA sets, called before iterations.
@@ -158,7 +159,7 @@ void PEI::initShrinkWrappingInfo() {
   // via --shrink-wrap-func=<funcname>.
 #ifndef NDEBUG
   if (ShrinkWrapFunc != "") {
-    std::string MFName = MF->getFunction()->getNameStr();
+    std::string MFName = MF->getFunction()->getName().str();
     ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
   }
 #endif
@@ -1045,7 +1046,7 @@ std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
     return "";
 
   if (MBB->getBasicBlock())
-    return MBB->getBasicBlock()->getNameStr();
+    return MBB->getBasicBlock()->getName().str();
 
   std::ostringstream name;
   name << "_MBB_" << MBB->getNumber();
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index ded2459d4278..9a86f32d8f96 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -1,4 +1,4 @@
-//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -29,21 +29,20 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include <set>
 using namespace llvm;
 
-static cl::opt<bool> DisableOldSjLjEH("disable-old-sjlj-eh", cl::Hidden,
-    cl::desc("Disable the old SjLj EH preparation pass"));
-
 STATISTIC(NumInvokes, "Number of invokes replaced");
-STATISTIC(NumUnwinds, "Number of unwinds replaced");
 STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 
 namespace {
-  class SjLjEHPass : public FunctionPass {
+  class SjLjEHPrepare : public FunctionPass {
     const TargetLowering *TLI;
     Type *FunctionContextTy;
     Constant *RegisterFn;
@@ -54,16 +53,12 @@ namespace {
     Constant *StackRestoreFn;
     Constant *LSDAAddrFn;
     Value *PersonalityFn;
-    Constant *SelectorFn;
-    Constant *ExceptionFn;
     Constant *CallSiteFn;
-    Constant *DispatchSetupFn;
     Constant *FuncCtxFn;
-    Value *CallSite;
-    DenseMap<InvokeInst*, BasicBlock*> LPadSuccMap;
+    AllocaInst *FuncCtx;
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit SjLjEHPass(const TargetLowering *tli = NULL)
+    explicit SjLjEHPrepare(const TargetLowering *tli = NULL)
       : FunctionPass(ID), TLI(tli) { }
     bool doInitialization(Module &M);
     bool runOnFunction(Function &F);
@@ -75,28 +70,24 @@ namespace {
 
   private:
     bool setupEntryBlockAndCallSites(Function &F);
+    void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+                              Value *SelVal);
     Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads);
     void lowerIncomingArguments(Function &F);
     void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst*> Invokes);
-
-    void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
-    void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
-                            SwitchInst *CatchSwitch);
-    void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
-    void splitLandingPad(InvokeInst *II);
-    bool insertSjLjEHSupport(Function &F);
+    void insertCallSiteStore(Instruction *I, int Number);
   };
 } // end anonymous namespace
 
-char SjLjEHPass::ID = 0;
+char SjLjEHPrepare::ID = 0;
 
-// Public Interface To the SjLjEHPass pass.
-FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
-  return new SjLjEHPass(TLI);
+// Public Interface To the SjLjEHPrepare pass.
+FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) {
+  return new SjLjEHPrepare(TLI);
 }
 // doInitialization - Set up decalarations and types needed to process
 // exceptions.
-bool SjLjEHPass::doInitialization(Module &M) {
+bool SjLjEHPrepare::doInitialization(Module &M) {
   // Build the function context structure.
   // builtin_setjmp uses a five word jbuf
   Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
@@ -123,11 +114,7 @@ bool SjLjEHPass::doInitialization(Module &M) {
   StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
   BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
   LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
-  SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
-  ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
   CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
-  DispatchSetupFn
-    = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
   FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
   PersonalityFn = 0;
 
@@ -136,583 +123,67 @@ bool SjLjEHPass::doInitialization(Module &M) {
 
 /// insertCallSiteStore - Insert a store of the call-site value to the
 /// function context
-void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
-                                     Value *CallSite) {
-  ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
-                                              Number);
-  // Insert a store of the call-site number
-  new StoreInst(CallSiteNoC, CallSite, true, I);  // volatile
-}
-
-/// splitLandingPad - Split a landing pad. This takes considerable care because
-/// of PHIs and other nasties. The problem is that the jump table needs to jump
-/// to the landing pad block. However, the landing pad block can be jumped to
-/// only by an invoke instruction. So we clone the landingpad instruction into
-/// its own basic block, have the invoke jump to there. The landingpad
-/// instruction's basic block's successor is now the target for the jump table.
-///
-/// But because of PHI nodes, we need to create another basic block for the jump
-/// table to jump to. This is definitely a hack, because the values for the PHI
-/// nodes may not be defined on the edge from the jump table. But that's okay,
-/// because the jump table is simply a construct to mimic what is happening in
-/// the CFG. So the values are mysteriously there, even though there is no value
-/// for the PHI from the jump table's edge (hence calling this a hack).
-void SjLjEHPass::splitLandingPad(InvokeInst *II) {
-  SmallVector<BasicBlock*, 2> NewBBs;
-  SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(),
-                              ".1", ".2", this, NewBBs);
-
-  // Create an empty block so that the jump table has something to jump to
-  // which doesn't have any PHI nodes.
-  BasicBlock *LPad = NewBBs[0];
-  BasicBlock *Succ = *succ_begin(LPad);
-  BasicBlock *JumpTo = BasicBlock::Create(II->getContext(), "jt.land",
-                                          LPad->getParent(), Succ);
-  LPad->getTerminator()->eraseFromParent();
-  BranchInst::Create(JumpTo, LPad);
-  BranchInst::Create(Succ, JumpTo);
-  LPadSuccMap[II] = JumpTo;
-
-  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
-    PHINode *PN = cast<PHINode>(I);
-    Value *Val = PN->removeIncomingValue(LPad, false);
-    PN->addIncoming(Val, JumpTo);
-  }
-}
-
-/// markInvokeCallSite - Insert code to mark the call_site for this invoke
-void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
-                                    Value *CallSite,
-                                    SwitchInst *CatchSwitch) {
-  ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
-                                              InvokeNo);
-  // The runtime comes back to the dispatcher with the call_site - 1 in
-  // the context. Odd, but there it is.
-  ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
-                                             InvokeNo - 1);
-
-  // If the unwind edge has phi nodes, split the edge.
-  if (isa<PHINode>(II->getUnwindDest()->begin())) {
-    // FIXME: New EH - This if-condition will be always true in the new scheme.
-    if (II->getUnwindDest()->isLandingPad())
-      splitLandingPad(II);
-    else
-      SplitCriticalEdge(II, 1, this);
-
-    // If there are any phi nodes left, they must have a single predecessor.
-    while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
-      PN->replaceAllUsesWith(PN->getIncomingValue(0));
-      PN->eraseFromParent();
-    }
-  }
+void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) {
+  IRBuilder<> Builder(I);
 
-  // Insert the store of the call site value
-  insertCallSiteStore(II, InvokeNo, CallSite);
-
-  // Record the call site value for the back end so it stays associated with
-  // the invoke.
-  CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
-
-  // Add a switch case to our unwind block.
-  if (BasicBlock *SuccBB = LPadSuccMap[II]) {
-    CatchSwitch->addCase(SwitchValC, SuccBB);
-  } else {
-    CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
-  }
+  // Get a reference to the call_site field.
+  Type *Int32Ty = Type::getInt32Ty(I->getContext());
+  Value *Zero = ConstantInt::get(Int32Ty, 0);
+  Value *One = ConstantInt::get(Int32Ty, 1);
+  Value *Idxs[2] = { Zero, One };
+  Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site");
 
-  // We still want this to look like an invoke so we emit the LSDA properly,
-  // so we don't transform the invoke into a call here.
+  // Insert a store of the call-site number
+  ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+                                              Number);
+  Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/);
 }
 
 /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
 /// we reach blocks we've already seen.
-static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
-  if (!LiveBBs.insert(BB).second) return; // already been here.
+static void MarkBlocksLiveIn(BasicBlock *BB,
+                             SmallPtrSet<BasicBlock*, 64> &LiveBBs) {
+  if (!LiveBBs.insert(BB)) return; // already been here.
 
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     MarkBlocksLiveIn(*PI, LiveBBs);
 }
 
-/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
-/// we spill into a stack location, guaranteeing that there is nothing live
-/// across the unwind edge.  This process also splits all critical edges
-/// coming out of invoke's.
-/// FIXME: Move this function to a common utility file (Local.cpp?) so
-/// both SjLj and LowerInvoke can use it.
-void SjLjEHPass::
-splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
-  // First step, split all critical edges from invoke instructions.
-  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
-    InvokeInst *II = Invokes[i];
-    SplitCriticalEdge(II, 0, this);
-
-    // FIXME: New EH - This if-condition will be always true in the new scheme.
-    if (II->getUnwindDest()->isLandingPad())
-      splitLandingPad(II);
-    else
-      SplitCriticalEdge(II, 1, this);
-
-    assert(!isa<PHINode>(II->getNormalDest()) &&
-           !isa<PHINode>(II->getUnwindDest()) &&
-           "Critical edge splitting left single entry phi nodes?");
-  }
-
-  Function *F = Invokes.back()->getParent()->getParent();
-
-  // To avoid having to handle incoming arguments specially, we lower each arg
-  // to a copy instruction in the entry block.  This ensures that the argument
-  // value itself cannot be live across the entry block.
-  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
-  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
-        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
-    ++AfterAllocaInsertPt;
-  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
-       AI != E; ++AI) {
-    Type *Ty = AI->getType();
-    // Aggregate types can't be cast, but are legal argument types, so we have
-    // to handle them differently. We use an extract/insert pair as a
-    // lightweight method to achieve the same goal.
-    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
-      Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
-      Instruction *NI = InsertValueInst::Create(AI, EI, 0);
-      NI->insertAfter(EI);
-      AI->replaceAllUsesWith(NI);
-      // Set the operand of the instructions back to the AllocaInst.
-      EI->setOperand(0, AI);
-      NI->setOperand(0, AI);
-    } else {
-      // This is always a no-op cast because we're casting AI to AI->getType()
-      // so src and destination types are identical. BitCast is the only
-      // possibility.
-      CastInst *NC = new BitCastInst(
-        AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
-      AI->replaceAllUsesWith(NC);
-      // Set the operand of the cast instruction back to the AllocaInst.
-      // Normally it's forbidden to replace a CastInst's operand because it
-      // could cause the opcode to reflect an illegal conversion. However,
-      // we're replacing it here with the same value it was constructed with.
-      // We do this because the above replaceAllUsesWith() clobbered the
-      // operand, but we want this one to remain.
-      NC->setOperand(0, AI);
-    }
-  }
-
-  // Finally, scan the code looking for instructions with bad live ranges.
-  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
-      // Ignore obvious cases we don't have to handle.  In particular, most
-      // instructions either have no uses or only have a single use inside the
-      // current block.  Ignore them quickly.
-      Instruction *Inst = II;
-      if (Inst->use_empty()) continue;
-      if (Inst->hasOneUse() &&
-          cast<Instruction>(Inst->use_back())->getParent() == BB &&
-          !isa<PHINode>(Inst->use_back())) continue;
-
-      // If this is an alloca in the entry block, it's not a real register
-      // value.
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
-        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
-          continue;
-
-      // Avoid iterator invalidation by copying users to a temporary vector.
-      SmallVector<Instruction*,16> Users;
-      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
-           UI != E; ++UI) {
-        Instruction *User = cast<Instruction>(*UI);
-        if (User->getParent() != BB || isa<PHINode>(User))
-          Users.push_back(User);
-      }
-
-      // Find all of the blocks that this value is live in.
-      std::set<BasicBlock*> LiveBBs;
-      LiveBBs.insert(Inst->getParent());
-      while (!Users.empty()) {
-        Instruction *U = Users.back();
-        Users.pop_back();
-
-        if (!isa<PHINode>(U)) {
-          MarkBlocksLiveIn(U->getParent(), LiveBBs);
-        } else {
-          // Uses for a PHI node occur in their predecessor block.
-          PHINode *PN = cast<PHINode>(U);
-          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-            if (PN->getIncomingValue(i) == Inst)
-              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
-        }
-      }
-
-      // Now that we know all of the blocks that this thing is live in, see if
-      // it includes any of the unwind locations.
-      bool NeedsSpill = false;
-      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
-        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
-        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock))
-          NeedsSpill = true;
-      }
-
-      // If we decided we need a spill, do it.
-      // FIXME: Spilling this way is overkill, as it forces all uses of
-      // the value to be reloaded from the stack slot, even those that aren't
-      // in the unwind blocks. We should be more selective.
-      if (NeedsSpill) {
-        ++NumSpilled;
-        DemoteRegToStack(*Inst, true);
-      }
-    }
-}
-
-/// CreateLandingPadLoad - Load the exception handling values and insert them
-/// into a structure.
-static Instruction *CreateLandingPadLoad(Function &F, Value *ExnAddr,
-                                         Value *SelAddr,
-                                         BasicBlock::iterator InsertPt) {
-  Value *Exn = new LoadInst(ExnAddr, "exn", false,
-                            InsertPt);
-  Type *Ty = Type::getInt8PtrTy(F.getContext());
-  Exn = CastInst::Create(Instruction::IntToPtr, Exn, Ty, "", InsertPt);
-  Value *Sel = new LoadInst(SelAddr, "sel", false, InsertPt);
-
-  Ty = StructType::get(Exn->getType(), Sel->getType(), NULL);
-  InsertValueInst *LPadVal = InsertValueInst::Create(llvm::UndefValue::get(Ty),
-                                                     Exn, 0,
-                                                     "lpad.val", InsertPt);
-  return InsertValueInst::Create(LPadVal, Sel, 1, "lpad.val", InsertPt);
-}
-
-/// ReplaceLandingPadVal - Replace the landingpad instruction's value with a
-/// load from the stored values (via CreateLandingPadLoad). This looks through
-/// PHI nodes, and removes them if they are dead.
-static void ReplaceLandingPadVal(Function &F, Instruction *Inst, Value *ExnAddr,
-                                 Value *SelAddr) {
-  if (Inst->use_empty()) return;
-
-  while (!Inst->use_empty()) {
-    Instruction *I = cast<Instruction>(Inst->use_back());
-
-    if (PHINode *PN = dyn_cast<PHINode>(I)) {
-      ReplaceLandingPadVal(F, PN, ExnAddr, SelAddr);
-      if (PN->use_empty()) PN->eraseFromParent();
-      continue;
-    }
-
-    I->replaceUsesOfWith(Inst, CreateLandingPadLoad(F, ExnAddr, SelAddr, I));
-  }
-}
-
-bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
-  SmallVector<ReturnInst*,16> Returns;
-  SmallVector<UnwindInst*,16> Unwinds;
-  SmallVector<InvokeInst*,16> Invokes;
-
-  // Look through the terminators of the basic blocks to find invokes, returns
-  // and unwinds.
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
-      // Remember all return instructions in case we insert an invoke into this
-      // function.
-      Returns.push_back(RI);
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
-      Invokes.push_back(II);
-    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-      Unwinds.push_back(UI);
-    }
-  }
-
-  NumInvokes += Invokes.size();
-  NumUnwinds += Unwinds.size();
-
-  // If we don't have any invokes, there's nothing to do.
-  if (Invokes.empty()) return false;
-
-  // Find the eh.selector.*, eh.exception and alloca calls.
-  //
-  // Remember any allocas() that aren't in the entry block, as the
-  // jmpbuf saved SP will need to be updated for them.
-  //
-  // We'll use the first eh.selector to determine the right personality
-  // function to use. For SJLJ, we always use the same personality for the
-  // whole function, not on a per-selector basis.
-  // FIXME: That's a bit ugly. Better way?
-  SmallVector<CallInst*,16> EH_Selectors;
-  SmallVector<CallInst*,16> EH_Exceptions;
-  SmallVector<Instruction*,16> JmpbufUpdatePoints;
-
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    // Note: Skip the entry block since there's nothing there that interests
-    // us. eh.selector and eh.exception shouldn't ever be there, and we
-    // want to disregard any allocas that are there.
-    // 
-    // FIXME: This is awkward. The new EH scheme won't need to skip the entry
-    //        block.
-    if (BB == F.begin()) {
-      if (InvokeInst *II = dyn_cast<InvokeInst>(F.begin()->getTerminator())) {
-        // FIXME: This will be always non-NULL in the new EH.
-        if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
-          if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
-      }
-
-      continue;
-    }
-
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      if (CallInst *CI = dyn_cast<CallInst>(I)) {
-        if (CI->getCalledFunction() == SelectorFn) {
-          if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
-          EH_Selectors.push_back(CI);
-        } else if (CI->getCalledFunction() == ExceptionFn) {
-          EH_Exceptions.push_back(CI);
-        } else if (CI->getCalledFunction() == StackRestoreFn) {
-          JmpbufUpdatePoints.push_back(CI);
-        }
-      } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
-        JmpbufUpdatePoints.push_back(AI);
-      } else if (InvokeInst *II = dyn_cast<InvokeInst>(I)) {
-        // FIXME: This will be always non-NULL in the new EH.
-        if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
-          if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn();
-      }
-    }
-  }
-
-  // If we don't have any eh.selector calls, we can't determine the personality
-  // function. Without a personality function, we can't process exceptions.
-  if (!PersonalityFn) return false;
-
-  // We have invokes, so we need to add register/unregister calls to get this
-  // function onto the global unwind stack.
-  //
-  // First thing we need to do is scan the whole function for values that are
-  // live across unwind edges.  Each value that is live across an unwind edge we
-  // spill into a stack location, guaranteeing that there is nothing live across
-  // the unwind edge.  This process also splits all critical edges coming out of
-  // invoke's.
-  splitLiveRangesAcrossInvokes(Invokes);
-
-
-  SmallVector<LandingPadInst*, 16> LandingPads;
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
-      // FIXME: This will be always non-NULL in the new EH.
-      if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst())
-        LandingPads.push_back(LPI);
+/// substituteLPadValues - Substitute the values returned by the landingpad
+/// instruction with those returned by the personality function.
+void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
+                                         Value *SelVal) {
+  SmallVector<Value*, 8> UseWorkList(LPI->use_begin(), LPI->use_end());
+  while (!UseWorkList.empty()) {
+    Value *Val = UseWorkList.pop_back_val();
+    ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val);
+    if (!EVI) continue;
+    if (EVI->getNumIndices() != 1) continue;
+    if (*EVI->idx_begin() == 0)
+      EVI->replaceAllUsesWith(ExnVal);
+    else if (*EVI->idx_begin() == 1)
+      EVI->replaceAllUsesWith(SelVal);
+    if (EVI->getNumUses() == 0)
+      EVI->eraseFromParent();
   }
 
+  if (LPI->getNumUses() == 0)  return;
 
-  BasicBlock *EntryBB = F.begin();
-  // Create an alloca for the incoming jump buffer ptr and the new jump buffer
-  // that needs to be restored on all exits from the function.  This is an
-  // alloca because the value needs to be added to the global context list.
-  unsigned Align = 4; // FIXME: Should be a TLI check?
-  AllocaInst *FunctionContext =
-    new AllocaInst(FunctionContextTy, 0, Align,
-                   "fcn_context", F.begin()->begin());
-
-  Value *Idxs[2];
-  Type *Int32Ty = Type::getInt32Ty(F.getContext());
-  Value *Zero = ConstantInt::get(Int32Ty, 0);
-  // We need to also keep around a reference to the call_site field
-  Idxs[0] = Zero;
-  Idxs[1] = ConstantInt::get(Int32Ty, 1);
-  CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, "call_site",
-                                       EntryBB->getTerminator());
-
-  // The exception selector comes back in context->data[1]
-  Idxs[1] = ConstantInt::get(Int32Ty, 2);
-  Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, "fc_data",
-                                            EntryBB->getTerminator());
-  Idxs[1] = ConstantInt::get(Int32Ty, 1);
-  Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
-                                                  "exc_selector_gep",
-                                                  EntryBB->getTerminator());
-  // The exception value comes back in context->data[0]
-  Idxs[1] = Zero;
-  Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
-                                                   "exception_gep",
-                                                   EntryBB->getTerminator());
-
-  // The result of the eh.selector call will be replaced with a a reference to
-  // the selector value returned in the function context. We leave the selector
-  // itself so the EH analysis later can use it.
-  for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
-    CallInst *I = EH_Selectors[i];
-    Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
-    I->replaceAllUsesWith(SelectorVal);
-  }
-
-  // eh.exception calls are replaced with references to the proper location in
-  // the context. Unlike eh.selector, the eh.exception calls are removed
-  // entirely.
-  for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
-    CallInst *I = EH_Exceptions[i];
-    // Possible for there to be duplicates, so check to make sure the
-    // instruction hasn't already been removed.
-    if (!I->getParent()) continue;
-    Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
-    Type *Ty = Type::getInt8PtrTy(F.getContext());
-    Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
-
-    I->replaceAllUsesWith(Val);
-    I->eraseFromParent();
-  }
-
-  for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
-    ReplaceLandingPadVal(F, LandingPads[i], ExceptionAddr, SelectorAddr);
-
-  // The entry block changes to have the eh.sjlj.setjmp, with a conditional
-  // branch to a dispatch block for non-zero returns. If we return normally,
-  // we're not handling an exception and just register the function context and
-  // continue.
-
-  // Create the dispatch block.  The dispatch block is basically a big switch
-  // statement that goes to all of the invoke landing pads.
-  BasicBlock *DispatchBlock =
-    BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
-
-  // Insert a load of the callsite in the dispatch block, and a switch on its
-  // value. By default, we issue a trap statement.
-  BasicBlock *TrapBlock =
-    BasicBlock::Create(F.getContext(), "trapbb", &F);
-  CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap),
-                   "", TrapBlock);
-  new UnreachableInst(F.getContext(), TrapBlock);
-
-  Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
-                                     DispatchBlock);
-  SwitchInst *DispatchSwitch =
-    SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(),
-                       DispatchBlock);
-  // Split the entry block to insert the conditional branch for the setjmp.
-  BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
-                                                   "eh.sjlj.setjmp.cont");
-
-  // Populate the Function Context
-  //   1. LSDA address
-  //   2. Personality function address
-  //   3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
-
-  // LSDA address
-  Idxs[0] = Zero;
-  Idxs[1] = ConstantInt::get(Int32Ty, 4);
-  Value *LSDAFieldPtr =
-    GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
-                              EntryBB->getTerminator());
-  Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
-                                 EntryBB->getTerminator());
-  new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
-
-  Idxs[1] = ConstantInt::get(Int32Ty, 3);
-  Value *PersonalityFieldPtr =
-    GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep",
-                              EntryBB->getTerminator());
-  new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
-                EntryBB->getTerminator());
-
-  // Save the frame pointer.
-  Idxs[1] = ConstantInt::get(Int32Ty, 5);
-  Value *JBufPtr
-    = GetElementPtrInst::Create(FunctionContext, Idxs, "jbuf_gep",
-                                EntryBB->getTerminator());
-  Idxs[1] = ConstantInt::get(Int32Ty, 0);
-  Value *FramePtr =
-    GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep",
-                              EntryBB->getTerminator());
-
-  Value *Val = CallInst::Create(FrameAddrFn,
-                                ConstantInt::get(Int32Ty, 0),
-                                "fp",
-                                EntryBB->getTerminator());
-  new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
-
-  // Save the stack pointer.
-  Idxs[1] = ConstantInt::get(Int32Ty, 2);
-  Value *StackPtr =
-    GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep",
-                              EntryBB->getTerminator());
-
-  Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
-  new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
-
-  // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
-  Value *SetjmpArg =
-    CastInst::Create(Instruction::BitCast, JBufPtr,
-                     Type::getInt8PtrTy(F.getContext()), "",
-                     EntryBB->getTerminator());
-  Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
-                                        "",
-                                        EntryBB->getTerminator());
-
-  // Add a call to dispatch_setup after the setjmp call. This is expanded to any
-  // target-specific setup that needs to be done.
-  CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator());
+  // There are still some uses of LPI. Construct an aggregate with the exception
+  // values and replace the LPI with that aggregate.
+  Type *LPadType = LPI->getType();
+  Value *LPadVal = UndefValue::get(LPadType);
+  IRBuilder<>
+    Builder(llvm::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+  LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
+  LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
 
-  // check the return value of the setjmp. non-zero goes to dispatcher.
-  Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
-                                 ICmpInst::ICMP_EQ, DispatchVal, Zero,
-                                 "notunwind");
-  // Nuke the uncond branch.
-  EntryBB->getTerminator()->eraseFromParent();
-
-  // Put in a new condbranch in its place.
-  BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
-
-  // Register the function context and make sure it's known to not throw
-  CallInst *Register =
-    CallInst::Create(RegisterFn, FunctionContext, "",
-                     ContBlock->getTerminator());
-  Register->setDoesNotThrow();
-
-  // At this point, we are all set up, update the invoke instructions to mark
-  // their call_site values, and fill in the dispatch switch accordingly.
-  for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
-    markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
-
-  // Mark call instructions that aren't nounwind as no-action (call_site ==
-  // -1). Skip the entry block, as prior to then, no function context has been
-  // created for this function and any unexpected exceptions thrown will go
-  // directly to the caller's context, which is what we want anyway, so no need
-  // to do anything here.
-  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
-    for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
-      if (CallInst *CI = dyn_cast<CallInst>(I)) {
-        // Ignore calls to the EH builtins (eh.selector, eh.exception)
-        Constant *Callee = CI->getCalledFunction();
-        if (Callee != SelectorFn && Callee != ExceptionFn
-            && !CI->doesNotThrow())
-          insertCallSiteStore(CI, -1, CallSite);
-      } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
-        insertCallSiteStore(RI, -1, CallSite);
-      }
-  }
-
-  // Replace all unwinds with a branch to the unwind handler.
-  // ??? Should this ever happen with sjlj exceptions?
-  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
-    BranchInst::Create(TrapBlock, Unwinds[i]);
-    Unwinds[i]->eraseFromParent();
-  }
-
-  // Following any allocas not in the entry block, update the saved SP in the
-  // jmpbuf to the new value.
-  for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
-    Instruction *AI = JmpbufUpdatePoints[i];
-    Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
-    StackAddr->insertAfter(AI);
-    Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
-    StoreStackAddr->insertAfter(StackAddr);
-  }
-
-  // Finally, for any returns from this function, if this function contains an
-  // invoke, add a call to unregister the function context.
-  for (unsigned i = 0, e = Returns.size(); i != e; ++i)
-    CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
-
-  return true;
+  LPI->replaceAllUsesWith(LPadVal);
 }
 
 /// setupFunctionContext - Allocate the function context on the stack and fill
 /// it with all of the data that we know at this point.
-Value *SjLjEHPass::
+Value *SjLjEHPrepare::
 setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
   BasicBlock *EntryBB = F.begin();
 
@@ -721,56 +192,42 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
   // because the value needs to be added to the global context list.
   unsigned Align =
     TLI->getTargetData()->getPrefTypeAlignment(FunctionContextTy);
-  AllocaInst *FuncCtx =
+  FuncCtx =
     new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin());
 
   // Fill in the function context structure.
-  Value *Idxs[2];
   Type *Int32Ty = Type::getInt32Ty(F.getContext());
   Value *Zero = ConstantInt::get(Int32Ty, 0);
   Value *One = ConstantInt::get(Int32Ty, 1);
+  Value *Two = ConstantInt::get(Int32Ty, 2);
+  Value *Three = ConstantInt::get(Int32Ty, 3);
+  Value *Four = ConstantInt::get(Int32Ty, 4);
 
-  // Keep around a reference to the call_site field.
-  Idxs[0] = Zero;
-  Idxs[1] = One;
-  CallSite = GetElementPtrInst::Create(FuncCtx, Idxs, "call_site",
-                                       EntryBB->getTerminator());
-
-  // Reference the __data field.
-  Idxs[1] = ConstantInt::get(Int32Ty, 2);
-  Value *FCData = GetElementPtrInst::Create(FuncCtx, Idxs, "__data",
-                                            EntryBB->getTerminator());
-
-  // The exception value comes back in context->__data[0].
-  Idxs[1] = Zero;
-  Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs,
-                                                   "exception_gep",
-                                                   EntryBB->getTerminator());
-
-  // The exception selector comes back in context->__data[1].
-  Idxs[1] = One;
-  Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs,
-                                                  "exn_selector_gep",
-                                                  EntryBB->getTerminator());
+  Value *Idxs[2] = { Zero, 0 };
 
   for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
     LandingPadInst *LPI = LPads[I];
     IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
 
+    // Reference the __data field.
+    Idxs[1] = Two;
+    Value *FCData = Builder.CreateGEP(FuncCtx, Idxs, "__data");
+
+    // The exception values come back in context->__data[0].
+    Idxs[1] = Zero;
+    Value *ExceptionAddr = Builder.CreateGEP(FCData, Idxs, "exception_gep");
     Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
     ExnVal = Builder.CreateIntToPtr(ExnVal, Type::getInt8PtrTy(F.getContext()));
-    Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
 
-    Type *LPadType = LPI->getType();
-    Value *LPadVal = UndefValue::get(LPadType);
-    LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
-    LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
+    Idxs[1] = One;
+    Value *SelectorAddr = Builder.CreateGEP(FCData, Idxs, "exn_selector_gep");
+    Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
 
-    LPI->replaceAllUsesWith(LPadVal);
+    substituteLPadValues(LPI, ExnVal, SelVal);
   }
 
   // Personality function
-  Idxs[1] = ConstantInt::get(Int32Ty, 3);
+  Idxs[1] = Three;
   if (!PersonalityFn)
     PersonalityFn = LPads[0]->getPersonalityFn();
   Value *PersonalityFieldPtr =
@@ -780,11 +237,11 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
                 EntryBB->getTerminator());
 
   // LSDA address
-  Idxs[1] = ConstantInt::get(Int32Ty, 4);
-  Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
-                                                  EntryBB->getTerminator());
   Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
                                  EntryBB->getTerminator());
+  Idxs[1] = Four;
+  Value *LSDAFieldPtr = GetElementPtrInst::Create(FuncCtx, Idxs, "lsda_gep",
+                                                  EntryBB->getTerminator());
   new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
 
   return FuncCtx;
@@ -794,7 +251,7 @@ setupFunctionContext(Function &F, ArrayRef<LandingPadInst*> LPads) {
 /// specially, we lower each arg to a copy instruction in the entry block. This
 /// ensures that the argument value itself cannot be live out of the entry
 /// block.
-void SjLjEHPass::lowerIncomingArguments(Function &F) {
+void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
   BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin();
   while (isa<AllocaInst>(AfterAllocaInsPt) &&
          isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
@@ -838,8 +295,8 @@ void SjLjEHPass::lowerIncomingArguments(Function &F) {
 
 /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind
 /// edge and spill them.
-void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
-                                        ArrayRef<InvokeInst*> Invokes) {
+void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
+                                           ArrayRef<InvokeInst*> Invokes) {
   // Finally, scan the code looking for instructions with bad live ranges.
   for (Function::iterator
          BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
@@ -870,7 +327,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
       }
 
       // Find all of the blocks that this value is live in.
-      std::set<BasicBlock*> LiveBBs;
+      SmallPtrSet<BasicBlock*, 64> LiveBBs;
       LiveBBs.insert(Inst->getParent());
       while (!Users.empty()) {
         Instruction *U = Users.back();
@@ -893,7 +350,10 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
       for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
         BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
         if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+          DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around "
+                << UnwindBlock->getName() << "\n");
           NeedsSpill = true;
+          break;
         }
       }
 
@@ -902,36 +362,60 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F,
       // the value to be reloaded from the stack slot, even those that aren't
       // in the unwind blocks. We should be more selective.
       if (NeedsSpill) {
-        ++NumSpilled;
         DemoteRegToStack(*Inst, true);
+        ++NumSpilled;
       }
     }
   }
+
+  // Go through the landing pads and remove any PHIs there.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+    BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+    LandingPadInst *LPI = UnwindBlock->getLandingPadInst();
+
+    // Place PHIs into a set to avoid invalidating the iterator.
+    SmallPtrSet<PHINode*, 8> PHIsToDemote;
+    for (BasicBlock::iterator
+           PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN)
+      PHIsToDemote.insert(cast<PHINode>(PN));
+    if (PHIsToDemote.empty()) continue;
+
+    // Demote the PHIs to the stack.
+    for (SmallPtrSet<PHINode*, 8>::iterator
+           I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I)
+      DemotePHIToStack(*I);
+
+    // Move the landingpad instruction back to the top of the landing pad block.
+    LPI->moveBefore(UnwindBlock->begin());
+  }
 }
 
 /// setupEntryBlockAndCallSites - Setup the entry block by creating and filling
 /// the function context and marking the call sites with the appropriate
 /// values. These values are used by the DWARF EH emitter.
-bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
+bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
   SmallVector<ReturnInst*,     16> Returns;
   SmallVector<InvokeInst*,     16> Invokes;
-  SmallVector<LandingPadInst*, 16> LPads;
+  SmallSetVector<LandingPadInst*, 16> LPads;
 
   // Look through the terminators of the basic blocks to find invokes.
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
       Invokes.push_back(II);
-      LPads.push_back(II->getUnwindDest()->getLandingPadInst());
+      LPads.insert(II->getUnwindDest()->getLandingPadInst());
     } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
       Returns.push_back(RI);
     }
 
   if (Invokes.empty()) return false;
 
+  NumInvokes += Invokes.size();
+
   lowerIncomingArguments(F);
   lowerAcrossUnwindEdges(F, Invokes);
 
-  Value *FuncCtx = setupFunctionContext(F, LPads);
+  Value *FuncCtx =
+    setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
   BasicBlock *EntryBB = F.begin();
   Type *Int32Ty = Type::getInt32Ty(F.getContext());
 
@@ -979,7 +463,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
   // At this point, we are all set up, update the invoke instructions to mark
   // their call_site values.
   for (unsigned I = 0, E = Invokes.size(); I != E; ++I) {
-    insertCallSiteStore(Invokes[I], I + 1, CallSite);
+    insertCallSiteStore(Invokes[I], I + 1);
 
     ConstantInt *CallSiteNum =
       ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1);
@@ -998,9 +482,9 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
     for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
       if (CallInst *CI = dyn_cast<CallInst>(I)) {
         if (!CI->doesNotThrow())
-          insertCallSiteStore(CI, -1, CallSite);
+          insertCallSiteStore(CI, -1);
       } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) {
-        insertCallSiteStore(RI, -1, CallSite);
+        insertCallSiteStore(RI, -1);
       }
 
   // Register the function context and make sure it's known to not throw
@@ -1008,6 +492,25 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
                                         EntryBB->getTerminator());
   Register->setDoesNotThrow();
 
+  // Following any allocas not in the entry block, update the saved SP in the
+  // jmpbuf to the new value.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (BB == F.begin())
+      continue;
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        if (CI->getCalledFunction() != StackRestoreFn)
+          continue;
+      } else if (!isa<AllocaInst>(I)) {
+        continue;
+      }
+      Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+      StackAddr->insertAfter(I);
+      Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+      StoreStackAddr->insertAfter(StackAddr);
+    }
+  }
+
   // Finally, for any returns from this function, if this function contains an
   // invoke, add a call to unregister the function context.
   for (unsigned I = 0, E = Returns.size(); I != E; ++I)
@@ -1016,11 +519,7 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) {
   return true;
 }
 
-bool SjLjEHPass::runOnFunction(Function &F) {
-  bool Res = false;
-  if (!DisableOldSjLjEH)
-    Res = insertSjLjEHSupport(F);
-  else
-    Res = setupEntryBlockAndCallSites(F);
+bool SjLjEHPrepare::runOnFunction(Function &F) {
+  bool Res = setupEntryBlockAndCallSites(F);
   return Res;
 }
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index ca79cafcf4be..c5bd3a3cae63 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -76,7 +76,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
     MachineBasicBlock *mbb = &*mbbItr;
 
     // Insert an index for the MBB start.
-    SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
+    SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block);
 
     for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
          miItr != miEnd; ++miItr) {
@@ -88,7 +88,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
       push_back(createEntry(mi, index += SlotIndex::InstrDist));
 
       // Save this base index in the maps.
-      mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+      mi2iMap.insert(std::make_pair(mi, SlotIndex(back(),
+                                                  SlotIndex::Slot_Block)));
  
       ++functionSize;
     }
@@ -97,14 +98,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
     push_back(createEntry(0, index += SlotIndex::InstrDist));
 
     MBBRanges[mbb->getNumber()].first = blockStartIndex;
-    MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD);
+    MBBRanges[mbb->getNumber()].second = SlotIndex(back(),
+                                                   SlotIndex::Slot_Block);
     idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
   }
 
   // Sort the Idx2MBBMap
   std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
 
-  DEBUG(dump());
+  DEBUG(mf->print(dbgs(), this));
 
   // And we're done!
   return false;
@@ -166,7 +168,7 @@ void SlotIndexes::dump() const {
 // Print a SlotIndex to a raw_ostream.
 void SlotIndex::print(raw_ostream &os) const {
   if (isValid())
-    os << entry().getIndex() << "LudS"[getSlot()];
+    os << entry().getIndex() << "Berd"[getSlot()];
   else
     os << "invalid";
 }
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index b6bbcd7176dd..4cd22eb60f55 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -11,8 +11,8 @@
 
 #include "Spiller.h"
 #include "VirtRegMap.h"
-#include "LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -29,7 +29,7 @@
 using namespace llvm;
 
 namespace {
-  enum SpillerName { trivial, standard, inline_ };
+  enum SpillerName { trivial, inline_ };
 }
 
 static cl::opt<SpillerName>
@@ -37,10 +37,9 @@ spillerOpt("spiller",
            cl::desc("Spiller to use: (default: standard)"),
            cl::Prefix,
            cl::values(clEnumVal(trivial,   "trivial spiller"),
-                      clEnumVal(standard,  "default spiller"),
                       clEnumValN(inline_,  "inline", "inline spiller"),
                       clEnumValEnd),
-           cl::init(standard));
+           cl::init(trivial));
 
 // Spiller virtual destructor implementation.
 Spiller::~Spiller() {}
@@ -73,8 +72,9 @@ protected:
   /// Add spill ranges for every use/def of the live interval, inserting loads
   /// immediately before each use, and stores after each def. No folding or
   /// remat is attempted.
-  void trivialSpillEverywhere(LiveInterval *li,
-                              SmallVectorImpl<LiveInterval*> &newIntervals) {
+  void trivialSpillEverywhere(LiveRangeEdit& LRE) {
+    LiveInterval* li = &LRE.getParent();
+
     DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
 
     assert(li->weight != HUGE_VALF &&
@@ -116,17 +116,14 @@ protected:
       }
 
       // Create a new vreg & interval for this instr.
-      unsigned newVReg = mri->createVirtualRegister(trc);
-      vrm->grow();
-      vrm->assignVirt2StackSlot(newVReg, ss);
-      LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+      LiveInterval *newLI = &LRE.create();
       newLI->weight = HUGE_VALF;
 
       // Update the reg operands & kill flags.
       for (unsigned i = 0; i < indices.size(); ++i) {
         unsigned mopIdx = indices[i];
         MachineOperand &mop = mi->getOperand(mopIdx);
-        mop.setReg(newVReg);
+        mop.setReg(newLI->reg);
         if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
           mop.setIsKill(true);
         }
@@ -136,33 +133,29 @@ protected:
       // Insert reload if necessary.
       MachineBasicBlock::iterator miItr(mi);
       if (hasUse) {
-        tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc,
+        tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc,
                                   tri);
         MachineInstr *loadInstr(prior(miItr));
         SlotIndex loadIndex =
-          lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
-        vrm->addSpillSlotUse(ss, loadInstr);
+          lis->InsertMachineInstrInMaps(loadInstr).getRegSlot();
         SlotIndex endIndex = loadIndex.getNextIndex();
         VNInfo *loadVNI =
-          newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
+          newLI->getNextValue(loadIndex, lis->getVNInfoAllocator());
         newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
       }
 
       // Insert store if necessary.
       if (hasDef) {
-        tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg,
+        tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg,
                                  true, ss, trc, tri);
         MachineInstr *storeInstr(llvm::next(miItr));
         SlotIndex storeIndex =
-          lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
-        vrm->addSpillSlotUse(ss, storeInstr);
+          lis->InsertMachineInstrInMaps(storeInstr).getRegSlot();
         SlotIndex beginIndex = storeIndex.getPrevIndex();
         VNInfo *storeVNI =
-          newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
+          newLI->getNextValue(beginIndex, lis->getVNInfoAllocator());
         newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
       }
-
-      newIntervals.push_back(newLI);
     }
   }
 };
@@ -182,60 +175,20 @@ public:
 
   void spill(LiveRangeEdit &LRE) {
     // Ignore spillIs - we don't use it.
-    trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs());
+    trivialSpillEverywhere(LRE);
   }
 };
 
 } // end anonymous namespace
 
-namespace {
-
-/// Falls back on LiveIntervals::addIntervalsForSpills.
-class StandardSpiller : public Spiller {
-protected:
-  MachineFunction *mf;
-  LiveIntervals *lis;
-  LiveStacks *lss;
-  MachineLoopInfo *loopInfo;
-  VirtRegMap *vrm;
-public:
-  StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
-                  VirtRegMap &vrm)
-    : mf(&mf),
-      lis(&pass.getAnalysis<LiveIntervals>()),
-      lss(&pass.getAnalysis<LiveStacks>()),
-      loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
-      vrm(&vrm) {}
-
-  /// Falls back on LiveIntervals::addIntervalsForSpills.
-  void spill(LiveRangeEdit &LRE) {
-    std::vector<LiveInterval*> added =
-      lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(),
-                                 loopInfo, *vrm);
-    LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(),
-                              added.begin(), added.end());
-
-    // Update LiveStacks.
-    int SS = vrm->getStackSlot(LRE.getReg());
-    if (SS == VirtRegMap::NO_STACK_SLOT)
-      return;
-    const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg());
-    LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
-    if (!SI.hasAtLeastOneValue())
-      SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
-    SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0));
-  }
-};
-
-} // end anonymous namespace
+void Spiller::anchor() { }
 
 llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
                                    MachineFunction &mf,
                                    VirtRegMap &vrm) {
   switch (spillerOpt) {
-  default: assert(0 && "unknown spiller");
   case trivial: return new TrivialSpiller(pass, mf, vrm);
-  case standard: return new StandardSpiller(pass, mf, vrm);
   case inline_: return createInlineSpiller(pass, mf, vrm);
   }
+  llvm_unreachable("Invalid spiller optimization");
 }
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 41f1727da439..b7d5beaab1b2 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -22,6 +22,7 @@ namespace llvm {
   /// Implementations are utility classes which insert spill or remat code on
   /// demand.
   class Spiller {
+    virtual void anchor();
   public:
     virtual ~Spiller() = 0;
 
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 63627800af69..9959f74d5f27 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -14,10 +14,10 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "SplitKit.h"
-#include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -62,13 +62,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
   const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
   const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
   std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
+  SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
 
   // Compute split points on the first call. The pair is independent of the
   // current live interval.
   if (!LSP.first.isValid()) {
     MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator();
     if (FirstTerm == MBB->end())
-      LSP.first = LIS.getMBBEndIdx(MBB);
+      LSP.first = MBBEnd;
     else
       LSP.first = LIS.getInstructionIndex(FirstTerm);
 
@@ -80,7 +81,7 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
     for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
          I != E;) {
       --I;
-      if (I->getDesc().isCall()) {
+      if (I->isCall()) {
         LSP.second = LIS.getInstructionIndex(I);
         break;
       }
@@ -89,10 +90,32 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
 
   // If CurLI is live into a landing pad successor, move the last split point
   // back to the call that may throw.
-  if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad))
-    return LSP.second;
-  else
+  if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad))
+    return LSP.first;
+
+  // Find the value leaving MBB.
+  const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd);
+  if (!VNI)
+    return LSP.first;
+
+  // If the value leaving MBB was defined after the call in MBB, it can't
+  // really be live-in to the landing pad.  This can happen if the landing pad
+  // has a PHI, and this register is undef on the exceptional edge.
+  // <rdar://problem/10664933>
+  if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd)
     return LSP.first;
+
+  // Value is properly live-in to the landing pad.
+  // Only allow splits before the call.
+  return LSP.second;
+}
+
+MachineBasicBlock::iterator
+SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) {
+  SlotIndex LSP = getLastSplitPoint(MBB->getNumber());
+  if (LSP == LIS.getMBBEndIdx(MBB))
+    return MBB->end();
+  return LIS.getInstructionFromIndex(LSP);
 }
 
 /// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
@@ -112,7 +135,7 @@ void SplitAnalysis::analyzeUses() {
        I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E;
        ++I)
     if (!I.getOperand().isUndef())
-      UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex());
+      UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot());
 
   array_pod_sort(UseSlots.begin(), UseSlots.end());
 
@@ -328,7 +351,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
 
   // We don't need an AliasAnalysis since we will only be performing
   // cheap-as-a-copy remats anyway.
-  Edit->anyRematerializable(LIS, TII, 0);
+  Edit->anyRematerializable(0);
 }
 
 void SplitEditor::dump() const {
@@ -351,7 +374,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
   LiveInterval *LI = Edit->get(RegIdx);
 
   // Create a new value.
-  VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+  VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
 
   // Use insert for lookup, so we can add missing values with a second lookup.
   std::pair<ValueMap::iterator, bool> InsP =
@@ -366,14 +389,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
   // If the previous value was a simple mapping, add liveness for it now.
   if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
     SlotIndex Def = OldVNI->def;
-    LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI));
+    LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI));
     // No longer a simple mapping.  Switch to a complex, non-forced mapping.
     InsP.first->second = ValueForcePair();
   }
 
   // This is a complex mapping, add liveness for VNI
   SlotIndex Def = VNI->def;
-  LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+  LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
 
   return VNI;
 }
@@ -393,7 +416,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
   // This was previously a single mapping. Make sure the old def is represented
   // by a trivial live range.
   SlotIndex Def = VNI->def;
-  Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+  Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI));
   // Mark as complex mapped, forced.
   VFP = ValueForcePair(0, true);
 }
@@ -413,33 +436,31 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
 
   // Attempt cheap-as-a-copy rematerialization.
   LiveRangeEdit::Remat RM(ParentVNI);
-  if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) {
-    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late);
+  if (Edit->canRematerializeAt(RM, UseIdx, true)) {
+    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
     ++NumRemats;
   } else {
     // Can't remat, just insert a copy from parent.
     CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
                .addReg(Edit->getReg());
     Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late)
-            .getDefIndex();
+            .getRegSlot();
     ++NumCopies;
   }
 
   // Define the value in Reg.
-  VNInfo *VNI = defValue(RegIdx, ParentVNI, Def);
-  VNI->setCopy(CopyMI);
-  return VNI;
+  return defValue(RegIdx, ParentVNI, Def);
 }
 
 /// Create a new virtual register and live interval.
 unsigned SplitEditor::openIntv() {
   // Create the complement as index 0.
   if (Edit->empty())
-    Edit->create(LIS, VRM);
+    Edit->create();
 
   // Create the open interval.
   OpenIdx = Edit->size();
-  Edit->create(LIS, VRM);
+  Edit->create();
   return OpenIdx;
 }
 
@@ -497,7 +518,7 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
   }
   DEBUG(dbgs() << ": valno " << ParentVNI->id);
   VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
-                              LIS.getLastSplitPoint(Edit->getParent(), &MBB));
+                              SA.getLastSplitPointIter(&MBB));
   RegAssign.insert(VNI->def, End, OpenIdx);
   DEBUG(dump());
   return VNI->def;
@@ -586,7 +607,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
 void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
   assert(OpenIdx && "openIntv not called before overlapIntv");
   const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
-  assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) &&
+  assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) &&
          "Parent changes value in extended range");
   assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
          "Range cannot span basic blocks");
@@ -640,7 +661,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) {
       DEBUG(dbgs() << "  cannot find simple kill of RegIdx " << RegIdx << '\n');
       forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
     } else {
-      SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex();
+      SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot();
       DEBUG(dbgs() << "  move kill to " << Kill << '\t' << *MBBI);
       AssignI.setStop(Kill);
     }
@@ -780,7 +801,7 @@ void SplitEditor::hoistCopiesForSize() {
     SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot();
     Dom.second =
       defFromParent(0, ParentVNI, Last, *Dom.first,
-                    LIS.getLastSplitPoint(Edit->getParent(), Dom.first))->def;
+                    SA.getLastSplitPointIter(Dom.first))->def;
   }
 
   // Remove redundant back-copies that are now known to be dominated by another
@@ -958,7 +979,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
     // use the same register as the def, so just do that always.
     SlotIndex Idx = LIS.getInstructionIndex(MI);
     if (MO.isDef() || MO.isUndef())
-      Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex();
+      Idx = Idx.getRegSlot(MO.isEarlyClobber());
 
     // Rewrite to the mapped register at Idx.
     unsigned RegIdx = RegAssign.lookup(Idx);
@@ -981,7 +1002,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
       if (!Edit->getParent().liveAt(Idx))
         continue;
     } else
-      Idx = Idx.getUseIndex();
+      Idx = Idx.getRegSlot(true);
 
     getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(),
                              &MDT, &LIS.getVNInfoAllocator());
@@ -994,8 +1015,8 @@ void SplitEditor::deleteRematVictims() {
     LiveInterval *LI = *I;
     for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end();
            LII != LIE; ++LII) {
-      // Dead defs end at the store slot.
-      if (LII->end != LII->valno->def.getNextSlot())
+      // Dead defs end at the dead slot.
+      if (LII->end != LII->valno->def.getDeadSlot())
         continue;
       MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def);
       assert(MI && "Missing instruction for dead def");
@@ -1012,7 +1033,7 @@ void SplitEditor::deleteRematVictims() {
   if (Dead.empty())
     return;
 
-  Edit->eliminateDeadDefs(Dead, LIS, VRM, TII);
+  Edit->eliminateDeadDefs(Dead);
 }
 
 void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
@@ -1030,7 +1051,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
     VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def);
     VNI->setIsPHIDef(ParentVNI->isPHIDef());
-    VNI->setCopy(ParentVNI->getCopy());
 
     // Force rematted values to be recomputed everywhere.
     // The new live ranges may be truncated.
@@ -1049,7 +1069,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     break;
   case SM_Speed:
     llvm_unreachable("Spill mode 'speed' not implemented yet");
-    break;
   }
 
   // Transfer the simply mapped values, check if any are skipped.
@@ -1089,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     SmallVector<LiveInterval*, 8> dups;
     dups.push_back(li);
     for (unsigned j = 1; j != NumComp; ++j)
-      dups.push_back(&Edit->create(LIS, VRM));
+      dups.push_back(&Edit->create());
     ConEQ.Distribute(&dups[0], MRI);
     // The new intervals all map back to i.
     if (LRMap)
@@ -1097,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
   }
 
   // Calculate spill weight and allocation hints for new intervals.
-  Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops);
+  Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops);
 
   assert(!LRMap || LRMap->size() == Edit->size());
 }
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index d8fc2122a3c7..4005a3d5cbbf 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -46,9 +46,6 @@ public:
   const MachineLoopInfo &Loops;
   const TargetInstrInfo &TII;
 
-  // Sorted slot indexes of using instructions.
-  SmallVector<SlotIndex, 8> UseSlots;
-
   /// Additional information about basic blocks where the current variable is
   /// live. Such a block will look like one of these templates:
   ///
@@ -85,6 +82,9 @@ private:
   // Current live interval.
   const LiveInterval *CurLI;
 
+  // Sorted slot indexes of using instructions.
+  SmallVector<SlotIndex, 8> UseSlots;
+
   /// LastSplitPoint - Last legal split point in each basic block in the current
   /// function. The first entry is the first terminator, the second entry is the
   /// last valid split point for a variable that is live in to a landing pad
@@ -135,7 +135,7 @@ public:
   /// getParent - Return the last analyzed interval.
   const LiveInterval &getParent() const { return *CurLI; }
 
-  /// getLastSplitPoint - Return that base index of the last valid split point
+  /// getLastSplitPoint - Return the base index of the last valid split point
   /// in the basic block numbered Num.
   SlotIndex getLastSplitPoint(unsigned Num) {
     // Inline the common simple case.
@@ -145,6 +145,9 @@ public:
     return computeLastSplitPoint(Num);
   }
 
+  /// getLastSplitPointIter - Returns the last split point as an iterator.
+  MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*);
+
   /// isOriginalEndpoint - Return true if the original live range was killed or
   /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
   /// and 'use' for an early-clobber def.
@@ -152,6 +155,10 @@ public:
   /// splitting.
   bool isOriginalEndpoint(SlotIndex Idx) const;
 
+  /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI.
+  /// This include both use and def operands, at most one entry per instruction.
+  ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; }
+
   /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks
   /// where CurLI has uses.
   ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; }
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
deleted file mode 100644
index 77973b72bbc8..000000000000
--- a/lib/CodeGen/Splitter.cpp
+++ /dev/null
@@ -1,827 +0,0 @@
-//===-- llvm/CodeGen/Splitter.cpp -  Splitter -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loopsplitter"
-
-#include "Splitter.h"
-
-#include "llvm/Module.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-using namespace llvm;
-
-char LoopSplitter::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
-                "Split virtual regists across loop boundaries.", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
-                "Split virtual regists across loop boundaries.", false, false)
-
-namespace llvm {
-
-  class StartSlotComparator {
-  public:
-    StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
-    bool operator()(const MachineBasicBlock *mbb1,
-                    const MachineBasicBlock *mbb2) const {
-      return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
-    }
-  private:
-    LiveIntervals &lis;
-  };
-
-  class LoopSplit {
-  public:
-    LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
-      : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
-      assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
-             "Cannot split physical registers.");
-    }
-
-    LiveInterval& getLI() const { return li; }
-
-    MachineLoop& getLoop() const { return loop; }
-
-    bool isValid() const { return valid; }
-
-    bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
-
-    void invalidate() { valid = false; }
-
-    void splitIncoming() { inSplit = true; }
-
-    void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
-
-    void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
-
-    void apply() {
-      assert(valid && "Attempt to apply invalid split.");
-      applyIncoming();
-      applyOutgoing();
-      copyRanges();
-      renameInside();
-    }
-
-  private:
-    LoopSplitter &ls;
-    LiveInterval &li;
-    MachineLoop &loop;
-    bool valid, inSplit;
-    std::set<MachineLoop::Edge> outSplits;
-    std::vector<MachineInstr*> loopInstrs;
-
-    LiveInterval *newLI;
-    std::map<VNInfo*, VNInfo*> vniMap;
-
-    LiveInterval* getNewLI() {
-      if (newLI == 0) {
-        const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
-        unsigned vreg = ls.mri->createVirtualRegister(trc);
-        newLI = &ls.lis->getOrCreateInterval(vreg);
-      }
-      return newLI;
-    }
-
-    VNInfo* getNewVNI(VNInfo *oldVNI) {
-      VNInfo *newVNI = vniMap[oldVNI];
-
-      if (newVNI == 0) {
-        newVNI = getNewLI()->createValueCopy(oldVNI,
-                                             ls.lis->getVNInfoAllocator());
-        vniMap[oldVNI] = newVNI;
-      }
-
-      return newVNI;
-    }
-
-    void applyIncoming() {
-      if (!inSplit) {
-        return;
-      }
-
-      MachineBasicBlock *preHeader = loop.getLoopPreheader();
-      if (preHeader == 0) {
-        assert(ls.canInsertPreHeader(loop) &&
-               "Can't insert required preheader.");
-        preHeader = &ls.insertPreHeader(loop);
-      }
-
-      LiveRange *preHeaderRange =
-        ls.lis->findExitingRange(li, preHeader);
-      assert(preHeaderRange != 0 && "Range not live into preheader.");
-
-      // Insert the new copy.
-      MachineInstr *copy = BuildMI(*preHeader,
-                                   preHeader->getFirstTerminator(),
-                                   DebugLoc(),
-                                   ls.tii->get(TargetOpcode::COPY))
-        .addReg(getNewLI()->reg, RegState::Define)
-        .addReg(li.reg, RegState::Kill);
-
-      ls.lis->InsertMachineInstrInMaps(copy);
-
-      SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
-
-      VNInfo *newVal = getNewVNI(preHeaderRange->valno);
-      newVal->def = copyDefIdx;
-      newVal->setCopy(copy);
-      li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
-
-      getNewLI()->addRange(LiveRange(copyDefIdx,
-                                     ls.lis->getMBBEndIdx(preHeader),
-                                     newVal));
-    }
-
-    void applyOutgoing() {
-
-      for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
-                                                 osEnd = outSplits.end();
-           osItr != osEnd; ++osItr) {
-        MachineLoop::Edge edge = *osItr;
-        MachineBasicBlock *outBlock = edge.second;
-        if (ls.isCriticalEdge(edge)) {
-          assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
-          outBlock = &ls.splitEdge(edge, loop);
-        }
-        LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
-        assert(outRange != 0 && "No exiting range?");
-
-        MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
-                                     DebugLoc(),
-                                     ls.tii->get(TargetOpcode::COPY))
-          .addReg(li.reg, RegState::Define)
-          .addReg(getNewLI()->reg, RegState::Kill);
-
-        ls.lis->InsertMachineInstrInMaps(copy);
-
-        SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
-        
-        // Blow away output range definition.
-        outRange->valno->def = ls.lis->getInvalidIndex();
-        li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
-
-        SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
-        assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
-               "PHI def index points at actual instruction.");
-        VNInfo *newVal =
-          getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
-
-        getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
-                                       copyDefIdx, newVal));
-                                       
-      }
-    }
-
-    void copyRange(LiveRange &lr) {
-      std::pair<bool, LoopSplitter::SlotPair> lsr =
-        ls.getLoopSubRange(lr, loop);
-      
-      if (!lsr.first)
-        return;
-
-      LiveRange loopRange(lsr.second.first, lsr.second.second,
-                          getNewVNI(lr.valno));
-
-      li.removeRange(loopRange.start, loopRange.end, true);
-
-      getNewLI()->addRange(loopRange);
-    }
-
-    void copyRanges() {
-      for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
-                                                iEnd = loopInstrs.end();
-           iItr != iEnd; ++iItr) {
-        MachineInstr &instr = **iItr;
-        SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
-        if (instr.modifiesRegister(li.reg, 0)) {
-          LiveRange *defRange =
-            li.getLiveRangeContaining(instrIdx.getDefIndex());
-          if (defRange != 0) // May have caught this already.
-            copyRange(*defRange);
-        }
-        if (instr.readsRegister(li.reg, 0)) {
-          LiveRange *useRange =
-            li.getLiveRangeContaining(instrIdx.getUseIndex());
-          if (useRange != 0) { // May have caught this already.
-            copyRange(*useRange);
-          }
-        }
-      }
-
-      for (MachineLoop::block_iterator bbItr = loop.block_begin(),
-                                       bbEnd = loop.block_end();
-           bbItr != bbEnd; ++bbItr) {
-        MachineBasicBlock &loopBlock = **bbItr;
-        LiveRange *enteringRange =
-          ls.lis->findEnteringRange(li, &loopBlock);
-        if (enteringRange != 0) {
-          copyRange(*enteringRange);
-        }
-      }
-    } 
-
-    void renameInside() {
-      for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
-                                                iEnd = loopInstrs.end();
-           iItr != iEnd; ++iItr) {
-        MachineInstr &instr = **iItr;
-        for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
-          MachineOperand &mop = instr.getOperand(i);
-          if (mop.isReg() && mop.getReg() == li.reg) {
-            mop.setReg(getNewLI()->reg);
-          }
-        }
-      }
-    }
-
-  };
-
-  void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
-    au.addRequired<MachineDominatorTree>();
-    au.addPreserved<MachineDominatorTree>();
-    au.addRequired<MachineLoopInfo>();
-    au.addPreserved<MachineLoopInfo>();
-    au.addPreservedID(RegisterCoalescerPassID);
-    au.addPreserved<CalculateSpillWeights>();
-    au.addPreserved<LiveStacks>();
-    au.addRequired<SlotIndexes>();
-    au.addPreserved<SlotIndexes>();
-    au.addRequired<LiveIntervals>();
-    au.addPreserved<LiveIntervals>();
-    MachineFunctionPass::getAnalysisUsage(au);
-  }
-
-  bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
-
-    mf = &fn;
-    mri = &mf->getRegInfo();
-    tii = mf->getTarget().getInstrInfo();
-    tri = mf->getTarget().getRegisterInfo();
-    sis = &getAnalysis<SlotIndexes>();
-    lis = &getAnalysis<LiveIntervals>();
-    mli = &getAnalysis<MachineLoopInfo>();
-    mdt = &getAnalysis<MachineDominatorTree>();
-
-    fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
-      mf->getFunction()->getName().str();
-
-    dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
-
-    dumpOddTerminators();
-
-//      dbgs() << "----------------------------------------\n";
-//      lis->dump();
-//      dbgs() << "----------------------------------------\n";
-       
-//     std::deque<MachineLoop*> loops;
-//     std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-//     dbgs() << "Loops:\n";
-//     while (!loops.empty()) {
-//       MachineLoop &loop = *loops.front();
-//       loops.pop_front();
-//       std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
-
-//       dumpLoopInfo(loop);
-//     }
-    
-    //lis->dump();
-    //exit(0);
-
-    // Setup initial intervals.
-    for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
-         liItr != liEnd; ++liItr) {
-      LiveInterval *li = liItr->second;
-
-      if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
-          !lis->intervalIsInOneMBB(*li)) {
-        intervals.push_back(li);
-      }
-    }
-
-    processIntervals();
-
-    intervals.clear();
-
-//     dbgs() << "----------------------------------------\n";
-//     lis->dump();
-//     dbgs() << "----------------------------------------\n";
-
-    dumpOddTerminators();
-
-    //exit(1);
-
-    return false;
-  }
-
-  void LoopSplitter::releaseMemory() {
-    fqn.clear();
-    intervals.clear();
-    loopRangeMap.clear();
-  }
-
-  void LoopSplitter::dumpOddTerminators() {
-    for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
-         bbItr != bbEnd; ++bbItr) {
-      MachineBasicBlock *mbb = &*bbItr;
-      MachineBasicBlock *a = 0, *b = 0;
-      SmallVector<MachineOperand, 4> c;
-      if (tii->AnalyzeBranch(*mbb, a, b, c)) {
-        dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
-        dbgs() << "  Terminators:\n";
-        for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
-             iItr != iEnd; ++iItr) {
-          MachineInstr *instr= &*iItr;
-          dbgs() << "    " << *instr << "";
-        }
-        dbgs() << "\n  Listed successors: [ ";
-        for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
-             sItr != sEnd; ++sItr) {
-          MachineBasicBlock *succMBB = *sItr;
-          dbgs() << succMBB->getNumber() << " ";
-        }
-        dbgs() << "]\n\n";
-      }
-    }
-  }
-
-  void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
-    MachineBasicBlock &headerBlock = *loop.getHeader();
-    typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
-    ExitEdgesList exitEdges;
-    loop.getExitEdges(exitEdges);
-
-    dbgs() << "  Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
-    for (std::vector<MachineBasicBlock*>::const_iterator
-           subBlockItr = loop.getBlocks().begin(),
-           subBlockEnd = loop.getBlocks().end();
-         subBlockItr != subBlockEnd; ++subBlockItr) {
-      MachineBasicBlock &subBlock = **subBlockItr;
-      dbgs() << "BB#" << subBlock.getNumber() << " ";
-    }
-    dbgs() << "], Exit edges: [ ";
-    for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
-                                 exitEdgeEnd = exitEdges.end();
-         exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
-      MachineLoop::Edge &exitEdge = *exitEdgeItr;
-      dbgs() << "(MBB#" << exitEdge.first->getNumber()
-             << ", MBB#" << exitEdge.second->getNumber() << ") ";
-    }
-    dbgs() << "], Sub-Loop Headers: [ ";
-    for (MachineLoop::iterator subLoopItr = loop.begin(),
-                               subLoopEnd = loop.end();
-         subLoopItr != subLoopEnd; ++subLoopItr) {
-      MachineLoop &subLoop = **subLoopItr;
-      MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
-      dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
-    }
-    dbgs() << "]\n";
-  }
-
-  void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
-    mbb.updateTerminator();
-
-    for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
-         miItr != miEnd; ++miItr) {
-      if (lis->isNotInMIMap(miItr)) {
-        lis->InsertMachineInstrInMaps(miItr);
-      }
-    }
-  }
-
-  bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
-    MachineBasicBlock *header = loop.getHeader();
-    MachineBasicBlock *a = 0, *b = 0;
-    SmallVector<MachineOperand, 4> c;
-
-    for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
-                                          pbEnd = header->pred_end();
-         pbItr != pbEnd; ++pbItr) {
-      MachineBasicBlock *predBlock = *pbItr;
-      if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
-        return false;
-      }
-    }
-
-    MachineFunction::iterator headerItr(header);
-    if (headerItr == mf->begin())
-      return true;
-    MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
-    assert(headerLayoutPred != 0 && "Header should have layout pred.");
-
-    return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
-  }
-
-  MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
-    assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
-
-    MachineBasicBlock &header = *loop.getHeader();
-
-    // Save the preds - we'll need to update them once we insert the preheader.
-    typedef std::set<MachineBasicBlock*> HeaderPreds;
-    HeaderPreds headerPreds;
-
-    for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
-                                          predEnd = header.pred_end();
-         predItr != predEnd; ++predItr) {
-      if (!loop.contains(*predItr))
-        headerPreds.insert(*predItr);
-    }
-
-    assert(!headerPreds.empty() && "No predecessors for header?");
-
-    //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
-
-    MachineBasicBlock *preHeader =
-      mf->CreateMachineBasicBlock(header.getBasicBlock());
-
-    assert(preHeader != 0 && "Failed to create pre-header.");
-
-    mf->insert(header, preHeader);
-
-    for (HeaderPreds::iterator hpItr = headerPreds.begin(),
-                               hpEnd = headerPreds.end(); 
-         hpItr != hpEnd; ++hpItr) {
-      assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
-      MachineBasicBlock &hp = **hpItr;
-      hp.ReplaceUsesOfBlockWith(&header, preHeader);
-    }
-    preHeader->addSuccessor(&header);
-
-    MachineBasicBlock *oldLayoutPred =
-      llvm::prior(MachineFunction::iterator(preHeader));
-    if (oldLayoutPred != 0) {
-      updateTerminators(*oldLayoutPred);
-    }
-
-    lis->InsertMBBInMaps(preHeader);
-
-    if (MachineLoop *parentLoop = loop.getParentLoop()) {
-      assert(parentLoop->getHeader() != loop.getHeader() &&
-             "Parent loop has same header?");
-      parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
-
-      // Invalidate all parent loop ranges.
-      while (parentLoop != 0) {
-        loopRangeMap.erase(parentLoop);
-        parentLoop = parentLoop->getParentLoop();
-      }
-    }
-
-    for (LiveIntervals::iterator liItr = lis->begin(),
-                                 liEnd = lis->end();
-         liItr != liEnd; ++liItr) {
-      LiveInterval &li = *liItr->second;
-
-      // Is this safe for physregs?
-      // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
-      if (!lis->isLiveInToMBB(li, &header))
-        continue;
-
-      if (lis->isLiveInToMBB(li, preHeader)) {
-        assert(lis->isLiveOutOfMBB(li, preHeader) &&
-               "Range terminates in newly added preheader?");
-        continue;
-      }
-
-      bool insertRange = false;
-
-      for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
-                                            predEnd = preHeader->pred_end();
-           predItr != predEnd; ++predItr) {
-        MachineBasicBlock *predMBB = *predItr;
-        if (lis->isLiveOutOfMBB(li, predMBB)) {
-          insertRange = true;
-          break;
-        }
-      }
-
-      if (!insertRange)
-        continue;
-
-      SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
-      assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
-             "PHI def index points at actual instruction.");
-      VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
-      li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
-                            lis->getMBBEndIdx(preHeader),
-                            newVal));
-    }
-
-
-    //dbgs() << "Dumping SlotIndexes:\n";
-    //sis->dump();
-
-    //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
-
-    return *preHeader;
-  }
-
-  bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
-    assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
-    if (edge.second->pred_size() > 1)
-      return true;
-    return false;
-  }
-
-  bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
-    MachineFunction::iterator outBlockItr(edge.second);
-    if (outBlockItr == mf->begin())
-      return true;
-    MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
-    assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
-    MachineBasicBlock *a = 0, *b = 0;
-    SmallVector<MachineOperand, 4> c;
-    return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
-            !tii->AnalyzeBranch(*edge.first, a, b, c));
-  }
-
-  MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
-                                             MachineLoop &loop) {
-
-    MachineBasicBlock &inBlock = *edge.first;
-    MachineBasicBlock &outBlock = *edge.second;
-
-    assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
-           "Splitting non-critical edge?");
-
-    //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
-    //       << " -> MBB#" << outBlock.getNumber() << ")...";
-
-    MachineBasicBlock *splitBlock =
-      mf->CreateMachineBasicBlock();
-
-    assert(splitBlock != 0 && "Failed to create split block.");
-
-    mf->insert(&outBlock, splitBlock);
-
-    inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
-    splitBlock->addSuccessor(&outBlock);
-
-    MachineBasicBlock *oldLayoutPred =
-      llvm::prior(MachineFunction::iterator(splitBlock));
-    if (oldLayoutPred != 0) {
-      updateTerminators(*oldLayoutPred);
-    }
-
-    lis->InsertMBBInMaps(splitBlock);
-
-    loopRangeMap.erase(&loop);
-
-    MachineLoop *splitParentLoop = loop.getParentLoop();
-    while (splitParentLoop != 0 &&
-           !splitParentLoop->contains(&outBlock)) {
-      splitParentLoop = splitParentLoop->getParentLoop();
-    }
-
-    if (splitParentLoop != 0) {
-      assert(splitParentLoop->contains(&loop) &&
-             "Split-block parent doesn't contain original loop?");
-      splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
-      
-      // Invalidate all parent loop ranges.
-      while (splitParentLoop != 0) {
-        loopRangeMap.erase(splitParentLoop);
-        splitParentLoop = splitParentLoop->getParentLoop();
-      }
-    }
-
-
-    for (LiveIntervals::iterator liItr = lis->begin(),
-                                 liEnd = lis->end();
-         liItr != liEnd; ++liItr) {
-      LiveInterval &li = *liItr->second;
-      bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
-                       lis->isLiveInToMBB(li, &outBlock);
-      if (lis->isLiveInToMBB(li, splitBlock)) {
-        if (!intersects) {
-          li.removeRange(lis->getMBBStartIdx(splitBlock),
-                         lis->getMBBEndIdx(splitBlock), true);
-        }
-      } else if (intersects) {
-        SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
-        assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
-               "PHI def index points at actual instruction.");
-        VNInfo *newVal = li.getNextValue(newDefIdx, 0,
-                                         lis->getVNInfoAllocator());
-        li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
-                              lis->getMBBEndIdx(splitBlock),
-                              newVal));
-      }
-    }
-
-    //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
-
-    return *splitBlock;
-  }
-
-  LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
-    typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
-    LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
-    if (lrItr == loopRangeMap.end()) {
-      LoopMBBSet loopMBBs((StartSlotComparator(*lis))); 
-      std::copy(loop.block_begin(), loop.block_end(),
-                std::inserter(loopMBBs, loopMBBs.begin()));
-
-      assert(!loopMBBs.empty() && "No blocks in loop?");
-
-      LoopRanges &loopRanges = loopRangeMap[&loop];
-      assert(loopRanges.empty() && "Loop encountered but not processed?");
-      SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
-      loopRanges.push_back(
-        std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
-                       lis->getInvalidIndex()));
-      for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
-                                curBlockEnd = loopMBBs.end();
-           curBlockItr != curBlockEnd; ++curBlockItr) {
-        SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
-        if (newStart != oldEnd) {
-          loopRanges.back().second = oldEnd;
-          loopRanges.push_back(std::make_pair(newStart,
-                                              lis->getInvalidIndex()));
-        }
-        oldEnd = lis->getMBBEndIdx(*curBlockItr);
-      }
-
-      loopRanges.back().second =
-        lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
-
-      return loopRanges;
-    }
-    return lrItr->second;
-  }
-
-  std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
-                                                           const LiveRange &lr,
-                                                           MachineLoop &loop) {
-    LoopRanges &loopRanges = getLoopRanges(loop);
-    LoopRanges::iterator lrItr = loopRanges.begin(),
-                         lrEnd = loopRanges.end();
-    while (lrItr != lrEnd && lr.start >= lrItr->second) {
-      ++lrItr;
-    }
-
-    if (lrItr == lrEnd) {
-      SlotIndex invalid = lis->getInvalidIndex();
-      return std::make_pair(false, SlotPair(invalid, invalid));
-    }
-
-    SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
-    SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
-
-    return std::make_pair(true, SlotPair(srStart, srEnd));      
-  }
-
-  void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
-    LoopRanges &loopRanges = getLoopRanges(loop);
-    dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
-    for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
-         lrItr != lrEnd; ++lrItr) {
-      dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
-    }
-    dbgs() << "]\n";
-  }
-
-  void LoopSplitter::processHeader(LoopSplit &split) {
-    MachineBasicBlock &header = *split.getLoop().getHeader();
-    //dbgs() << "  Processing loop header BB#" << header.getNumber() << "\n";
-
-    if (!lis->isLiveInToMBB(split.getLI(), &header))
-      return; // Not live in, but nothing wrong so far.
-
-    MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
-    if (!preHeader) {
-
-      if (!canInsertPreHeader(split.getLoop())) {
-        split.invalidate();
-        return; // Couldn't insert a pre-header. Bail on this interval.
-      }
-
-      for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
-                                            predEnd = header.pred_end();
-           predItr != predEnd; ++predItr) {
-        if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
-          split.splitIncoming();
-          break;
-        }
-      }
-    } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
-      split.splitIncoming();
-    }
-  }
-
-  void LoopSplitter::processLoopExits(LoopSplit &split) {
-    typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
-    ExitEdgesList exitEdges;
-    split.getLoop().getExitEdges(exitEdges);
-
-    //dbgs() << "  Processing loop exits:\n";
-
-    for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
-                                 exitEdgeEnd = exitEdges.end();
-         exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
-      MachineLoop::Edge exitEdge = *exitEdgeItr;
-
-      LiveRange *outRange =
-        split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
-
-      if (outRange != 0) {
-        if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
-          split.invalidate();
-          return;
-        }
-
-        split.splitOutgoing(exitEdge);
-      }
-    }
-  }
-
-  void LoopSplitter::processLoopUses(LoopSplit &split) {
-    std::set<MachineInstr*> processed;
-
-    for (MachineRegisterInfo::reg_iterator
-           rItr = mri->reg_begin(split.getLI().reg),
-           rEnd = mri->reg_end();
-      rItr != rEnd; ++rItr) {
-      MachineInstr &instr = *rItr;
-      if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
-        split.addLoopInstr(&instr);
-        processed.insert(&instr);
-      }
-    }
-
-    //dbgs() << "  Rewriting reg" << li.reg << " to reg" << newLI->reg
-    //       << " in blocks [ ";
-    //dbgs() << "]\n";
-  }
-
-  bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
-    assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
-           "Attempt to split physical register.");
-
-    LoopSplit split(*this, li, loop);
-    processHeader(split);
-    if (split.isValid())
-      processLoopExits(split);
-    if (split.isValid())
-      processLoopUses(split);
-    if (split.isValid() /* && split.isWorthwhile() */) {
-      split.apply();
-      DEBUG(dbgs() << "Success.\n");
-      return true;
-    }
-    DEBUG(dbgs() << "Failed.\n");
-    return false;
-  }
-
-  void LoopSplitter::processInterval(LiveInterval &li) {
-    std::deque<MachineLoop*> loops;
-    std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
-
-    while (!loops.empty()) {
-      MachineLoop &loop = *loops.front();
-      loops.pop_front();
-      DEBUG(
-        dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
-               << loop.getHeader()->getNumber() << " ";
-      );
-      if (!splitOverLoop(li, loop)) {
-        // Couldn't split over outer loop, schedule sub-loops to be checked.
-	std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
-      }
-    }
-  }
-
-  void LoopSplitter::processIntervals() {
-    while (!intervals.empty()) {
-      LiveInterval &li = *intervals.front();
-      intervals.pop_front();
-
-      assert(!lis->intervalIsInOneMBB(li) &&
-             "Single interval in process worklist.");
-
-      processInterval(li);      
-    }
-  }
-
-}
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
deleted file mode 100644
index 9fb1b8b30139..000000000000
--- a/lib/CodeGen/Splitter.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SPLITTER_H
-#define LLVM_CODEGEN_SPLITTER_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-
-#include <deque>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
-  class LiveInterval;
-  class LiveIntervals;
-  struct LiveRange;
-  class LoopSplit;
-  class MachineDominatorTree;
-  class MachineRegisterInfo;
-  class SlotIndexes;
-  class TargetInstrInfo;
-  class VNInfo;
-
-  class LoopSplitter : public MachineFunctionPass {
-    friend class LoopSplit;
-  public:
-    static char ID;
-
-    LoopSplitter() : MachineFunctionPass(ID) {
-      initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &au) const;
-
-    virtual bool runOnMachineFunction(MachineFunction &fn);
-
-    virtual void releaseMemory();
-
-
-  private:
-
-    MachineFunction *mf;
-    LiveIntervals *lis;
-    MachineLoopInfo *mli;
-    MachineRegisterInfo *mri;
-    MachineDominatorTree *mdt;
-    SlotIndexes *sis;
-    const TargetInstrInfo *tii;
-    const TargetRegisterInfo *tri;
-
-    std::string fqn;
-    std::deque<LiveInterval*> intervals;
-
-    typedef std::pair<SlotIndex, SlotIndex> SlotPair;
-    typedef std::vector<SlotPair> LoopRanges;
-    typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
-    LoopRangeMap loopRangeMap;
-
-    void dumpLoopInfo(MachineLoop &loop);
-
-    void dumpOddTerminators();
-
-    void updateTerminators(MachineBasicBlock &mbb);
-
-    bool canInsertPreHeader(MachineLoop &loop);
-    MachineBasicBlock& insertPreHeader(MachineLoop &loop);
-
-    bool isCriticalEdge(MachineLoop::Edge &edge);
-    bool canSplitEdge(MachineLoop::Edge &edge);
-    MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
-
-    LoopRanges& getLoopRanges(MachineLoop &loop);
-    std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
-                                              MachineLoop &loop);
-
-    void dumpLoopRanges(MachineLoop &loop);
-
-    void processHeader(LoopSplit &split);
-    void processLoopExits(LoopSplit &split);
-    void processLoopUses(LoopSplit &split);
-
-    bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
-
-    void processInterval(LiveInterval &li);
-
-    void processIntervals();
-  };
-
-}
-
-#endif
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 1f0e5a2711ae..43a6ad8c97a4 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -123,16 +123,11 @@ bool StackProtector::RequiresStackProtector() const {
           // protectors.
           return true;
 
-        if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
-          // We apparently only care about character arrays.
-          if (!AT->getElementType()->isIntegerTy(8))
-            continue;
-
+        if (ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType()))
           // If an array has more than SSPBufferSize bytes of allocated space,
           // then we emit stack protectors.
           if (SSPBufferSize <= TD->getTypeAllocSize(AT))
             return true;
-        }
       }
   }
 
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 57cbe1ba5960..1e940b1d0711 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "stackcoloring"
-#include "VirtRegMap.h"
 #include "llvm/Function.h"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/Passes.h"
@@ -40,29 +39,17 @@ DisableSharing("no-stack-slot-sharing",
              cl::init(false), cl::Hidden,
              cl::desc("Suppress slot sharing during stack coloring"));
 
-static cl::opt<bool>
-ColorWithRegsOpt("color-ss-with-regs",
-                 cl::init(false), cl::Hidden,
-                 cl::desc("Color stack slots with free registers"));
-
-
 static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
 
 STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
-STATISTIC(NumRegRepl,    "Number of stack slot refs replaced with reg refs");
-STATISTIC(NumLoadElim,   "Number of loads eliminated");
-STATISTIC(NumStoreElim,  "Number of stores eliminated");
 STATISTIC(NumDead,       "Number of trivially dead stack accesses eliminated");
 
 namespace {
   class StackSlotColoring : public MachineFunctionPass {
     bool ColorWithRegs;
     LiveStacks* LS;
-    VirtRegMap* VRM;
     MachineFrameInfo *MFI;
-    MachineRegisterInfo *MRI;
     const TargetInstrInfo  *TII;
-    const TargetRegisterInfo *TRI;
     const MachineLoopInfo *loopInfo;
 
     // SSIntervals - Spill slot intervals.
@@ -98,18 +85,12 @@ namespace {
       MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
         initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
       }
-    StackSlotColoring(bool RegColor) :
-      MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
-        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
-      }
-    
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
       AU.addRequired<SlotIndexes>();
       AU.addPreserved<SlotIndexes>();
       AU.addRequired<LiveStacks>();
-      AU.addRequired<VirtRegMap>();
-      AU.addPreserved<VirtRegMap>();      
       AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineLoopInfo>();
       AU.addPreservedID(MachineDominatorsID);
@@ -117,9 +98,6 @@ namespace {
     }
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char* getPassName() const {
-      return "Stack Slot Coloring";
-    }
 
   private:
     void InitializeSlots();
@@ -127,41 +105,23 @@ namespace {
     bool OverlapWithAssignments(LiveInterval *li, int Color) const;
     int ColorSlot(LiveInterval *li);
     bool ColorSlots(MachineFunction &MF);
-    bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
-                                SmallVector<SmallVector<int, 4>, 16> &RevMap,
-                                BitVector &SlotIsReg);
     void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
                             MachineFunction &MF);
-    bool PropagateBackward(MachineBasicBlock::iterator MII,
-                           MachineBasicBlock *MBB,
-                           unsigned OldReg, unsigned NewReg);
-    bool PropagateForward(MachineBasicBlock::iterator MII,
-                          MachineBasicBlock *MBB,
-                          unsigned OldReg, unsigned NewReg);
-    void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
-                                    unsigned Reg, const TargetRegisterClass *RC,
-                                    SmallSet<unsigned, 4> &Defs,
-                                    MachineFunction &MF);
-    bool AllMemRefsCanBeUnfolded(int SS);
     bool RemoveDeadStores(MachineBasicBlock* MBB);
   };
 } // end anonymous namespace
 
 char StackSlotColoring::ID = 0;
+char &llvm::StackSlotColoringID = StackSlotColoring::ID;
 
 INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
                 "Stack Slot Coloring", false, false)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
                 "Stack Slot Coloring", false, false)
 
-FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
-  return new StackSlotColoring(RegColor);
-}
-
 namespace {
   // IntervalSorter - Comparison predicate that sort live intervals by
   // their weight.
@@ -248,79 +208,6 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
   return false;
 }
 
-/// ColorSlotsWithFreeRegs - If there are any free registers available, try
-/// replacing spill slots references with registers instead.
-bool
-StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
-                                   SmallVector<SmallVector<int, 4>, 16> &RevMap,
-                                   BitVector &SlotIsReg) {
-  if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters())
-    return false;
-
-  bool Changed = false;
-  DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
-  for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
-    LiveInterval *li = SSIntervals[i];
-    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
-    if (!UsedColors[SS] || li->weight < 20)
-      // If the weight is < 20, i.e. two references in a loop with depth 1,
-      // don't bother with it.
-      continue;
-
-    // These slots allow to share the same registers.
-    bool AllColored = true;
-    SmallVector<unsigned, 4> ColoredRegs;
-    for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) {
-      int RSS = RevMap[SS][j];
-      const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS);
-      // If it's not colored to another stack slot, try coloring it
-      // to a "free" register.
-      if (!RC) {
-        AllColored = false;
-        continue;
-      }
-      unsigned Reg = VRM->getFirstUnusedRegister(RC);
-      if (!Reg) {
-        AllColored = false;
-        continue;
-      }
-      if (!AllMemRefsCanBeUnfolded(RSS)) {
-        AllColored = false;
-        continue;
-      } else {
-        DEBUG(dbgs() << "Assigning fi#" << RSS << " to "
-                     << TRI->getName(Reg) << '\n');
-        ColoredRegs.push_back(Reg);
-        SlotMapping[RSS] = Reg;
-        SlotIsReg.set(RSS);
-        Changed = true;
-      }
-    }
-
-    // Register and its sub-registers are no longer free.
-    while (!ColoredRegs.empty()) {
-      unsigned Reg = ColoredRegs.back();
-      ColoredRegs.pop_back();
-      VRM->setRegisterUsed(Reg);
-      // If reg is a callee-saved register, it will have to be spilled in
-      // the prologue.
-      MRI->setPhysRegUsed(Reg);
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
-        VRM->setRegisterUsed(*AS);
-        MRI->setPhysRegUsed(*AS);
-      }
-    }
-    // This spill slot is dead after the rewrites
-    if (AllColored) {
-      MFI->RemoveStackObject(SS);
-      ++NumEliminated;
-    }
-  }
-  DEBUG(dbgs() << '\n');
-
-  return Changed;
-}
-
 /// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
 ///
 int StackSlotColoring::ColorSlot(LiveInterval *li) {
@@ -372,7 +259,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   SmallVector<int, 16> SlotMapping(NumObjs, -1);
   SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
   SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
-  BitVector SlotIsReg(NumObjs);
   BitVector UsedColors(NumObjs);
 
   DEBUG(dbgs() << "Color spill slot intervals:\n");
@@ -404,31 +290,19 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   DEBUG(dbgs() << '\n');
 #endif
 
-  // Can we "color" a stack slot with a unused register?
-  Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg);
-
   if (!Changed)
     return false;
 
   // Rewrite all MO_FrameIndex operands.
   SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
   for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
-    bool isReg = SlotIsReg[SS];
     int NewFI = SlotMapping[SS];
-    if (NewFI == -1 || (NewFI == (int)SS && !isReg))
+    if (NewFI == -1 || (NewFI == (int)SS))
       continue;
 
-    const TargetRegisterClass *RC = LS->getIntervalRegClass(SS);
     SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
     for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
-      if (!isReg)
-        RewriteInstruction(RefMIs[i], SS, NewFI, MF);
-      else {
-        // Rewrite to use a register instead.
-        unsigned MBBId = RefMIs[i]->getParent()->getNumber();
-        SmallSet<unsigned, 4> &Defs = NewDefs[MBBId];
-        UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF);
-      }
+      RewriteInstruction(RefMIs[i], SS, NewFI, MF);
   }
 
   // Delete unused stack slots.
@@ -441,28 +315,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
   return true;
 }
 
-/// AllMemRefsCanBeUnfolded - Return true if all references of the specified
-/// spill slot index can be unfolded.
-bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
-  SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
-  for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) {
-    MachineInstr *MI = RefMIs[i];
-    if (TII->isLoadFromStackSlot(MI, SS) ||
-        TII->isStoreToStackSlot(MI, SS))
-      // Restore and spill will become copies.
-      return true;
-    if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false))
-      return false;
-    for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
-      MachineOperand &MO = MI->getOperand(j);
-      if (MO.isFI() && MO.getIndex() != SS)
-        // If it uses another frameindex, we can, currently* unfold it.
-        return false;
-    }
-  }
-  return true;
-}
-
 /// RewriteInstruction - Rewrite specified instruction by replacing references
 /// to old frame index with new one.
 void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
@@ -489,179 +341,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
       (*I)->setValue(NewSV);
 }
 
-/// PropagateBackward - Traverse backward and look for the definition of
-/// OldReg. If it can successfully update all of the references with NewReg,
-/// do so and return true.
-bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
-                                          MachineBasicBlock *MBB,
-                                          unsigned OldReg, unsigned NewReg) {
-  if (MII == MBB->begin())
-    return false;
-
-  SmallVector<MachineOperand*, 4> Uses;
-  SmallVector<MachineOperand*, 4> Refs;
-  while (--MII != MBB->begin()) {
-    bool FoundDef = false;  // Not counting 2address def.
-
-    Uses.clear();
-    const MCInstrDesc &MCID = MII->getDesc();
-    for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MII->getOperand(i);
-      if (!MO.isReg())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (Reg == 0)
-        continue;
-      if (Reg == OldReg) {
-        if (MO.isImplicit())
-          return false;
-
-        // Abort the use is actually a sub-register def. We don't have enough
-        // information to figure out if it is really legal.
-        if (MO.getSubReg() || MII->isSubregToReg())
-          return false;
-
-        const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
-        if (RC && !RC->contains(NewReg))
-          return false;
-
-        if (MO.isUse()) {
-          Uses.push_back(&MO);
-        } else {
-          Refs.push_back(&MO);
-          if (!MII->isRegTiedToUseOperand(i))
-            FoundDef = true;
-        }
-      } else if (TRI->regsOverlap(Reg, NewReg)) {
-        return false;
-      } else if (TRI->regsOverlap(Reg, OldReg)) {
-        if (!MO.isUse() || !MO.isKill())
-          return false;
-      }
-    }
-
-    if (FoundDef) {
-      // Found non-two-address def. Stop here.
-      for (unsigned i = 0, e = Refs.size(); i != e; ++i)
-        Refs[i]->setReg(NewReg);
-      return true;
-    }
-
-    // Two-address uses must be updated as well.
-    for (unsigned i = 0, e = Uses.size(); i != e; ++i)
-      Refs.push_back(Uses[i]);
-  }
-  return false;
-}
-
-/// PropagateForward - Traverse forward and look for the kill of OldReg. If
-/// it can successfully update all of the uses with NewReg, do so and
-/// return true.
-bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
-                                         MachineBasicBlock *MBB,
-                                         unsigned OldReg, unsigned NewReg) {
-  if (MII == MBB->end())
-    return false;
-
-  SmallVector<MachineOperand*, 4> Uses;
-  while (++MII != MBB->end()) {
-    bool FoundKill = false;
-    const MCInstrDesc &MCID = MII->getDesc();
-    for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MII->getOperand(i);
-      if (!MO.isReg())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (Reg == 0)
-        continue;
-      if (Reg == OldReg) {
-        if (MO.isDef() || MO.isImplicit())
-          return false;
-
-        // Abort the use is actually a sub-register use. We don't have enough
-        // information to figure out if it is really legal.
-        if (MO.getSubReg())
-          return false;
-
-        const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
-        if (RC && !RC->contains(NewReg))
-          return false;
-        if (MO.isKill())
-          FoundKill = true;
-
-        Uses.push_back(&MO);
-      } else if (TRI->regsOverlap(Reg, NewReg) ||
-                 TRI->regsOverlap(Reg, OldReg))
-        return false;
-    }
-    if (FoundKill) {
-      for (unsigned i = 0, e = Uses.size(); i != e; ++i)
-        Uses[i]->setReg(NewReg);
-      return true;
-    }
-  }
-  return false;
-}
-
-/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding
-/// folded memory references and replacing those references with register
-/// references instead.
-void
-StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
-                                               unsigned Reg,
-                                               const TargetRegisterClass *RC,
-                                               SmallSet<unsigned, 4> &Defs,
-                                               MachineFunction &MF) {
-  MachineBasicBlock *MBB = MI->getParent();
-  if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
-    if (PropagateForward(MI, MBB, DstReg, Reg)) {
-      DEBUG(dbgs() << "Eliminated load: ");
-      DEBUG(MI->dump());
-      ++NumLoadElim;
-    } else {
-      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
-              DstReg).addReg(Reg);
-      ++NumRegRepl;
-    }
-
-    if (!Defs.count(Reg)) {
-      // If this is the first use of Reg in this MBB and it wasn't previously
-      // defined in MBB, add it to livein.
-      MBB->addLiveIn(Reg);
-      Defs.insert(Reg);
-    }
-  } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
-    if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
-      DEBUG(dbgs() << "Eliminated store: ");
-      DEBUG(MI->dump());
-      ++NumStoreElim;
-    } else {
-      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg)
-        .addReg(SrcReg);
-      ++NumRegRepl;
-    }
-
-    // Remember reg has been defined in MBB.
-    Defs.insert(Reg);
-  } else {
-    SmallVector<MachineInstr*, 4> NewMIs;
-    bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
-    (void)Success; // Silence compiler warning.
-    assert(Success && "Failed to unfold!");
-    MachineInstr *NewMI = NewMIs[0];
-    MBB->insert(MI, NewMI);
-    ++NumRegRepl;
-
-    if (NewMI->readsRegister(Reg)) {
-      if (!Defs.count(Reg))
-        // If this is the first use of Reg in this MBB and it wasn't previously
-        // defined in MBB, add it to livein.
-        MBB->addLiveIn(Reg);
-      Defs.insert(Reg);
-    }
-  }
-  MBB->erase(MI);
-}
 
 /// RemoveDeadStores - Scan through a basic block and look for loads followed
 /// by stores.  If they're both using the same stack slot, then the store is
@@ -679,33 +358,33 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
        I != E; ++I) {
     if (DCELimit != -1 && (int)NumDead >= DCELimit)
       break;
-    
+
     MachineBasicBlock::iterator NextMI = llvm::next(I);
     if (NextMI == MBB->end()) continue;
-    
+
     int FirstSS, SecondSS;
     unsigned LoadReg = 0;
     unsigned StoreReg = 0;
     if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
     if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
     if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
-    
+
     ++NumDead;
     changed = true;
-    
+
     if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
       ++NumDead;
       toErase.push_back(I);
     }
-    
+
     toErase.push_back(NextMI);
     ++I;
   }
-  
+
   for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
        E = toErase.end(); I != E; ++I)
     (*I)->eraseFromParent();
-  
+
   return changed;
 }
 
@@ -713,32 +392,27 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
 bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
   DEBUG({
       dbgs() << "********** Stack Slot Coloring **********\n"
-             << "********** Function: " 
+             << "********** Function: "
              << MF.getFunction()->getName() << '\n';
     });
 
   MFI = MF.getFrameInfo();
-  MRI = &MF.getRegInfo(); 
   TII = MF.getTarget().getInstrInfo();
-  TRI = MF.getTarget().getRegisterInfo();
   LS = &getAnalysis<LiveStacks>();
-  VRM = &getAnalysis<VirtRegMap>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
 
   bool Changed = false;
 
   unsigned NumSlots = LS->getNumIntervals();
-  if (NumSlots < 2) {
-    if (NumSlots == 0 || !VRM->HasUnusedRegisters())
-      // Nothing to do!
-      return false;
-  }
+  if (NumSlots == 0)
+    // Nothing to do!
+    return false;
 
   // If there are calls to setjmp or sigsetjmp, don't perform stack slot
   // coloring. The stack could be modified before the longjmp is executed,
   // resulting in the wrong value being used afterwards. (See
   // <rdar://problem/8007500>.)
-  if (MF.callsSetJmp())
+  if (MF.exposesReturnsTwice())
     return false;
 
   // Gather spill slot references
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 260cc0ee50a5..c6fdc7382435 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -228,7 +228,6 @@ static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
         return &MO;
     }
   }
-  return NULL;
 }
 
 bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
@@ -390,12 +389,10 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
     MachineOperand *LastUse = findLastUse(MBB, SrcReg);
     assert(LastUse);
     SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
-    SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+    SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB));
     LastUse->setIsKill(true);
   }
 
-  LI->renumber();
-
   Allocator.Reset();
   RegNodeMap.clear();
   PHISrcDefs.clear();
@@ -745,7 +742,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
 
     // Set the phi-def flag for the VN at this PHI.
     SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
-    VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+    VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot());
     assert(DestVNI);
     DestVNI->setIsPHIDef(true);
   
@@ -756,7 +753,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
     SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
     DestVNI->def = MBBStartIndex;
     DestLI.addRange(LiveRange(MBBStartIndex,
-                              PHIIndex.getDefIndex(),
+                              PHIIndex.getRegSlot(),
                               DestVNI));
     return;
   }
@@ -779,22 +776,21 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
   SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
   SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
   VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
-                                        CopyInstr,
                                         LI->getVNInfoAllocator());
   CopyVNI->setIsPHIDef(true);
   CopyLI.addRange(LiveRange(MBBStartIndex,
-                            DestCopyIndex.getDefIndex(),
+                            DestCopyIndex.getRegSlot(),
                             CopyVNI));
 
   // Adjust DestReg's live interval to adjust for its new definition at
   // CopyInstr.
   LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
   SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
-  DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+  DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot());
 
-  VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+  VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot());
   assert(DestVNI);
-  DestVNI->def = DestCopyIndex.getDefIndex();
+  DestVNI->def = DestCopyIndex.getRegSlot();
 
   InsertedDestCopies[CopyReg] = CopyInstr;
 }
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 3a6211a0f3e6..8ebfbcae785b 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -56,10 +56,10 @@ typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
 namespace {
   /// TailDuplicatePass - Perform tail duplication.
   class TailDuplicatePass : public MachineFunctionPass {
-    bool PreRegAlloc;
     const TargetInstrInfo *TII;
     MachineModuleInfo *MMI;
     MachineRegisterInfo *MRI;
+    bool PreRegAlloc;
 
     // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
     SmallVector<unsigned, 16> SSAUpdateVRs;
@@ -70,11 +70,10 @@ namespace {
 
   public:
     static char ID;
-    explicit TailDuplicatePass(bool PreRA) :
-      MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+    explicit TailDuplicatePass() :
+      MachineFunctionPass(ID), PreRegAlloc(false) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
-    virtual const char *getPassName() const { return "Tail Duplication"; }
 
   private:
     void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
@@ -118,14 +117,16 @@ namespace {
   char TailDuplicatePass::ID = 0;
 }
 
-FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
-  return new TailDuplicatePass(PreRegAlloc);
-}
+char &llvm::TailDuplicateID = TailDuplicatePass::ID;
+
+INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication",
+                false, false)
 
 bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
   MRI = &MF.getRegInfo();
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  PreRegAlloc = MRI->isSSA();
 
   bool MadeChange = false;
   while (TailDuplicateBlocks(MF))
@@ -432,7 +433,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
         MO.setReg(VI->second);
     }
   }
-  PredBB->insert(PredBB->end(), NewMI);
+  PredBB->insert(PredBB->instr_end(), NewMI);
 }
 
 /// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
@@ -553,7 +554,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
 
   bool HasIndirectbr = false;
   if (!TailBB.empty())
-    HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+    HasIndirectbr = TailBB.back().isIndirectBranch();
 
   if (HasIndirectbr && PreRegAlloc)
     MaxDuplicateCount = 20;
@@ -561,22 +562,21 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
-  for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
-       ++I) {
+  for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
     // Non-duplicable things shouldn't be tail-duplicated.
-    if (I->getDesc().isNotDuplicable())
+    if (I->isNotDuplicable())
       return false;
 
     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
     // A return may expand into a lot more instructions (e.g. reload of callee
     // saved registers) after PEI.
-    if (PreRegAlloc && I->getDesc().isReturn())
+    if (PreRegAlloc && I->isReturn())
       return false;
 
     // Avoid duplicating calls before register allocation. Calls presents a
     // barrier to register allocation so duplicating them may end up increasing
     // spills.
-    if (PreRegAlloc && I->getDesc().isCall())
+    if (PreRegAlloc && I->isCall())
       return false;
 
     if (!I->isPHI() && !I->isDebugValue())
@@ -611,7 +611,7 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
     ++I;
   if (I == E)
     return true;
-  return I->getDesc().isUnconditionalBranch();
+  return I->isUnconditionalBranch();
 }
 
 static bool
@@ -778,8 +778,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
     // Clone the contents of TailBB into PredBB.
     DenseMap<unsigned, unsigned> LocalVRMap;
     SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
-    MachineBasicBlock::iterator I = TailBB->begin();
-    while (I != TailBB->end()) {
+    // Use instr_iterator here to properly handle bundles, e.g.
+    // ARM Thumb2 IT block.
+    MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
+    while (I != TailBB->instr_end()) {
       MachineInstr *MI = &*I;
       ++I;
       if (MI->isPHI()) {
@@ -824,7 +826,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
   SmallVector<MachineOperand, 4> PriorCond;
   // This has to check PrevBB->succ_size() because EH edges are ignored by
   // AnalyzeBranch.
-  if (PrevBB->succ_size() == 1 && 
+  if (PrevBB->succ_size() == 1 &&
       !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
       PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
       !TailBB->hasAddressTaken()) {
@@ -849,6 +851,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
         // Replace def of virtual registers with new registers, and update
         // uses with PHI source register or the new registers.
         MachineInstr *MI = &*I++;
+        assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
         DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
         MI->eraseFromParent();
       }
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
new file mode 100644
index 000000000000..cadb87815dbe
--- /dev/null
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -0,0 +1,45 @@
+//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                         int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+    getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                             int FI, unsigned &FrameReg) const {
+  const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+  // By default, assume all frame indices are referenced via whatever
+  // getFrameRegister() says. The target can override this if it's doing
+  // something different.
+  FrameReg = RI->getFrameRegister(MF);
+  return getFrameIndexOffset(MF, FI);
+}
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index f32678f12b0a..2beb9281e35b 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -77,6 +78,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
   unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
   unsigned Reg1 = MI->getOperand(Idx1).getReg();
   unsigned Reg2 = MI->getOperand(Idx2).getReg();
+  unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0;
+  unsigned SubReg1 = MI->getOperand(Idx1).getSubReg();
+  unsigned SubReg2 = MI->getOperand(Idx2).getSubReg();
   bool Reg1IsKill = MI->getOperand(Idx1).isKill();
   bool Reg2IsKill = MI->getOperand(Idx2).isKill();
   // If destination is tied to either of the commuted source register, then
@@ -85,10 +89,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
       MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
     Reg2IsKill = false;
     Reg0 = Reg2;
+    SubReg0 = SubReg2;
   } else if (HasDef && Reg0 == Reg2 &&
              MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
     Reg1IsKill = false;
     Reg0 = Reg1;
+    SubReg0 = SubReg1;
   }
 
   if (NewMI) {
@@ -97,19 +103,23 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
     MachineFunction &MF = *MI->getParent()->getParent();
     if (HasDef)
       return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
-        .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
-        .addReg(Reg2, getKillRegState(Reg2IsKill))
-        .addReg(Reg1, getKillRegState(Reg2IsKill));
+        .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead), SubReg0)
+        .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
+        .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
     else
       return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
-        .addReg(Reg2, getKillRegState(Reg2IsKill))
-        .addReg(Reg1, getKillRegState(Reg2IsKill));
+        .addReg(Reg2, getKillRegState(Reg2IsKill), SubReg2)
+        .addReg(Reg1, getKillRegState(Reg1IsKill), SubReg1);
   }
 
-  if (HasDef)
+  if (HasDef) {
     MI->getOperand(0).setReg(Reg0);
+    MI->getOperand(0).setSubReg(SubReg0);
+  }
   MI->getOperand(Idx2).setReg(Reg1);
   MI->getOperand(Idx1).setReg(Reg2);
+  MI->getOperand(Idx2).setSubReg(SubReg1);
+  MI->getOperand(Idx1).setSubReg(SubReg2);
   MI->getOperand(Idx2).setIsKill(Reg1IsKill);
   MI->getOperand(Idx1).setIsKill(Reg2IsKill);
   return MI;
@@ -121,6 +131,9 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
 bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
                                                 unsigned &SrcOpIdx1,
                                                 unsigned &SrcOpIdx2) const {
+  assert(!MI->isBundle() &&
+         "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles");
+
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isCommutable())
     return false;
@@ -136,11 +149,28 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
 }
 
 
+bool
+TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  if (!MI->isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (MI->isBranch() && !MI->isBarrier())
+    return true;
+  if (!MI->isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+
 bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
                             const SmallVectorImpl<MachineOperand> &Pred) const {
   bool MadeChange = false;
+
+  assert(!MI->isBundle() &&
+         "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles");
+
   const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isPredicable())
+  if (!MI->isPredicable())
     return false;
 
   for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -218,7 +248,7 @@ TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
 
 MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
                                              MachineFunction &MF) const {
-  assert(!Orig->getDesc().isNotDuplicable() &&
+  assert(!Orig->isNotDuplicable() &&
          "Instruction cannot be duplicated");
   return MF.CloneMachineInstr(Orig);
 }
@@ -288,16 +318,15 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
   if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
     // Add a memory operand, foldMemoryOperandImpl doesn't do that.
     assert((!(Flags & MachineMemOperand::MOStore) ||
-            NewMI->getDesc().mayStore()) &&
+            NewMI->mayStore()) &&
            "Folded a def to a non-store!");
     assert((!(Flags & MachineMemOperand::MOLoad) ||
-            NewMI->getDesc().mayLoad()) &&
+            NewMI->mayLoad()) &&
            "Folded a use to a non-load!");
     const MachineFrameInfo &MFI = *MF.getFrameInfo();
     assert(MFI.getObjectOffset(FI) != -1);
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(
-                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
                               Flags, MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
     NewMI->addMemOperand(MF, MMO);
@@ -332,7 +361,7 @@ MachineInstr*
 TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
                                    const SmallVectorImpl<unsigned> &Ops,
                                    MachineInstr* LoadMI) const {
-  assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+  assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!");
 #ifndef NDEBUG
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
@@ -360,7 +389,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   const TargetMachine &TM = MF.getTarget();
   const TargetInstrInfo &TII = *TM.getInstrInfo();
-  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
 
   // Remat clients assume operand 0 is the defined register.
   if (!MI->getNumOperands() || !MI->getOperand(0).isReg())
@@ -383,10 +411,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
       MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
     return true;
 
-  const MCInstrDesc &MCID = MI->getDesc();
-
   // Avoid instructions obviously unsafe for remat.
-  if (MCID.isNotDuplicable() || MCID.mayStore() ||
+  if (MI->isNotDuplicable() || MI->mayStore() ||
       MI->hasUnmodeledSideEffects())
     return false;
 
@@ -396,7 +422,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
     return false;
 
   // Avoid instructions which load from potentially varying memory.
-  if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
+  if (MI->mayLoad() && !MI->isInvariantLoad(AA))
     return false;
 
   // If any of the registers accessed are non-constant, conservatively assume
@@ -414,19 +440,8 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI.def_empty(Reg))
-          return false;
-        BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0);
-        if (AllocatableRegs.test(Reg))
+        if (!MRI.isConstantPhysReg(Reg, MF))
           return false;
-        // Check for a def among the register's aliases too.
-        for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) {
-          unsigned AliasReg = *Alias;
-          if (!MRI.def_empty(AliasReg))
-            return false;
-          if (AllocatableRegs.test(AliasReg))
-            return false;
-        }
       } else {
         // A physreg def. We can't remat it.
         return false;
@@ -457,7 +472,7 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
                                                const MachineBasicBlock *MBB,
                                                const MachineFunction &MF) const{
   // Terminators and labels can't be scheduled around.
-  if (MI->getDesc().isTerminator() || MI->isLabel())
+  if (MI->isTerminator() || MI->isLabel())
     return true;
 
   // Don't attempt to schedule around any instruction that defines
@@ -493,3 +508,32 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
   return (ScheduleHazardRecognizer *)
     new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
 }
+
+int
+TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
+                                       SDNode *DefNode, unsigned DefIdx,
+                                       SDNode *UseNode, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  if (!DefNode->isMachineOpcode())
+    return -1;
+
+  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+  if (!UseNode->isMachineOpcode())
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
+                                         SDNode *N) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  if (!N->isMachineOpcode())
+    return 1;
+
+  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 3848f4d4d4c4..9925185be120 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -17,6 +17,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -53,11 +54,9 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
     report_fatal_error("We do not support this DWARF encoding yet!");
   case dwarf::DW_EH_PE_absptr:
     return  Mang->getSymbol(GV);
-    break;
   case dwarf::DW_EH_PE_pcrel: {
     return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
                                           Mang->getSymbol(GV)->getName());
-    break;
   }
   }
 }
@@ -78,14 +77,14 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                     Flags,
                                                     SectionKind::getDataRel(),
                                                     0, Label->getName());
+  unsigned Size = TM.getTargetData()->getPointerSize();
   Streamer.SwitchSection(Sec);
-  Streamer.EmitValueToAlignment(8);
+  Streamer.EmitValueToAlignment(TM.getTargetData()->getPointerABIAlignment());
   Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
-  const MCExpr *E = MCConstantExpr::Create(8, getContext());
+  const MCExpr *E = MCConstantExpr::Create(Size, getContext());
   Streamer.EmitELFSize(Label, E);
   Streamer.EmitLabel(Label);
 
-  unsigned Size = TM.getTargetData()->getPointerSize();
   Streamer.EmitSymbolValue(Sym, Size);
 }
 
@@ -189,6 +188,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
 static const char *getSectionPrefixForGlobal(SectionKind Kind) {
   if (Kind.isText())                 return ".text.";
   if (Kind.isReadOnly())             return ".rodata.";
+  if (Kind.isBSS())                  return ".bss.";
 
   if (Kind.isThreadData())           return ".tdata.";
   if (Kind.isThreadBSS())            return ".tbss.";
@@ -217,7 +217,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   // If this global is linkonce/weak and the target handles this by emitting it
   // into a 'uniqued' section name, create and return the section now.
   if ((GV->isWeakForLinker() || EmitUniquedSection) &&
-      !Kind.isCommon() && !Kind.isBSS()) {
+      !Kind.isCommon()) {
     const char *Prefix;
     Prefix = getSectionPrefixForGlobal(Kind);
 
@@ -342,10 +342,92 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
     getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
 }
 
+const MCSection *
+TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
+  // The default scheme is .ctor / .dtor, so we have to invert the priority
+  // numbering.
+  if (Priority == 65535)
+    return StaticCtorSection;
+
+  std::string Name = std::string(".ctors.") + utostr(65535 - Priority);
+  return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+                                    ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                                    SectionKind::getDataRel());
+}
+
+const MCSection *
+TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const {
+  // The default scheme is .ctor / .dtor, so we have to invert the priority
+  // numbering.
+  if (Priority == 65535)
+    return StaticDtorSection;
+
+  std::string Name = std::string(".dtors.") + utostr(65535 - Priority);
+  return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+                                    ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                                    SectionKind::getDataRel());
+}
+
 //===----------------------------------------------------------------------===//
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
+/// emitModuleFlags - Emit the module flags that specify the garbage collection
+/// information.
+void TargetLoweringObjectFileMachO::
+emitModuleFlags(MCStreamer &Streamer,
+                ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+                Mangler *Mang, const TargetMachine &TM) const {
+  unsigned VersionVal = 0;
+  unsigned GCFlags = 0;
+  StringRef SectionVal;
+
+  for (ArrayRef<Module::ModuleFlagEntry>::iterator
+         i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) {
+    const Module::ModuleFlagEntry &MFE = *i;
+
+    // Ignore flags with 'Require' behavior.
+    if (MFE.Behavior == Module::Require)
+      continue;
+
+    StringRef Key = MFE.Key->getString();
+    Value *Val = MFE.Val;
+
+    if (Key == "Objective-C Image Info Version")
+      VersionVal = cast<ConstantInt>(Val)->getZExtValue();
+    else if (Key == "Objective-C Garbage Collection" ||
+             Key == "Objective-C GC Only")
+      GCFlags |= cast<ConstantInt>(Val)->getZExtValue();
+    else if (Key == "Objective-C Image Info Section")
+      SectionVal = cast<MDString>(Val)->getString();
+  }
+
+  // The section is mandatory. If we don't have it, then we don't have GC info.
+  if (SectionVal.empty()) return;
+
+  StringRef Segment, Section;
+  unsigned TAA = 0, StubSize = 0;
+  bool TAAParsed;
+  std::string ErrorCode =
+    MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section,
+                                          TAA, TAAParsed, StubSize);
+  if (!ErrorCode.empty())
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Invalid section specifier '" + Section + "': " +
+                       ErrorCode + ".");
+
+  // Get the section.
+  const MCSectionMachO *S =
+    getContext().getMachOSection(Segment, Section, TAA, StubSize,
+                                 SectionKind::getDataNoRel());
+  Streamer.SwitchSection(S);
+  Streamer.EmitLabel(getContext().
+                     GetOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
+  Streamer.EmitIntValue(VersionVal, 4);
+  Streamer.EmitIntValue(GCFlags, 4);
+  Streamer.AddBlankLine();
+}
+
 const MCSection *TargetLoweringObjectFileMachO::
 getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                          Mangler *Mang, const TargetMachine &TM) const {
@@ -358,11 +440,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
                                           TAA, TAAParsed, StubSize);
   if (!ErrorCode.empty()) {
     // If invalid, report the error with report_fatal_error.
-    report_fatal_error("Global variable '" + GV->getNameStr() +
-                      "' has an invalid section specifier '" + GV->getSection()+
-                      "': " + ErrorCode + ".");
-    // Fall back to dropping it into the data section.
-    return DataSection;
+    report_fatal_error("Global variable '" + GV->getName() +
+                       "' has an invalid section specifier '" +
+                       GV->getSection() + "': " + ErrorCode + ".");
   }
 
   // Get the section.
@@ -379,9 +459,9 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
   // to reject it here.
   if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
     // If invalid, report the error with report_fatal_error.
-    report_fatal_error("Global variable '" + GV->getNameStr() +
-                      "' section type or attributes does not match previous"
-                      " section specifier");
+    report_fatal_error("Global variable '" + GV->getName() +
+                       "' section type or attributes does not match previous"
+                       " section specifier");
   }
 
   return S;
@@ -536,9 +616,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
   // Add information about the stub reference to MachOMMI so that the stub
   // gets emitted by the asmprinter.
   MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
-  MachineModuleInfoImpl::StubValueTy &StubSym =
-      GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
-                                  MachOMMI.getGVStubEntry(SSym);
+  MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
   if (StubSym.getPointer() == 0) {
     MCSymbol *Sym = Mang->getSymbol(GV);
     StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
@@ -568,6 +646,11 @@ getCOFFSectionFlags(SectionKind K) {
       COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
       COFF::IMAGE_SCN_MEM_READ |
       COFF::IMAGE_SCN_MEM_WRITE;
+  else if (K.isThreadLocal())
+    Flags |=
+      COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+      COFF::IMAGE_SCN_MEM_READ |
+      COFF::IMAGE_SCN_MEM_WRITE;
   else if (K.isReadOnly())
     Flags |=
       COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -594,6 +677,8 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
     return ".text$";
   if (Kind.isBSS ())
     return ".bss$";
+  if (Kind.isThreadLocal())
+    return ".tls$";
   if (Kind.isWriteable())
     return ".data$";
   return ".rdata$";
@@ -603,7 +688,6 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
 const MCSection *TargetLoweringObjectFileCOFF::
 SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
                        Mangler *Mang, const TargetMachine &TM) const {
-  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
 
   // If this global is linkonce/weak and the target handles this by emitting it
   // into a 'uniqued' section name, create and return the section now.
@@ -624,6 +708,9 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
   if (Kind.isText())
     return getTextSection();
 
+  if (Kind.isThreadLocal())
+    return getTLSDataSection();
+
   return getDataSection();
 }
 
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
new file mode 100644
index 000000000000..0f59d0169e18
--- /dev/null
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -0,0 +1,52 @@
+//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the methods in the TargetOptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+/// DisableFramePointerElim - This returns true if frame pointer elimination
+/// optimization should be disabled for the given machine function.
+bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
+  // Check to see if we should eliminate non-leaf frame pointers and then
+  // check to see if we should eliminate all frame pointers.
+  if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+    const MachineFrameInfo *MFI = MF.getFrameInfo();
+    return MFI->hasCalls();
+  }
+
+  return NoFramePointerElim;
+}
+
+/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+/// is specified on the command line.  When this flag is off(default), the
+/// code generator is not allowed to generate mad (multiply add) if the
+/// result is "less precise" than doing those operations individually.
+bool TargetOptions::LessPreciseFPMAD() const {
+  return UnsafeFPMath || LessPreciseFPMADOption;
+}
+
+/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+/// that the rounding mode of the FPU can change from its default.
+bool TargetOptions::HonorSignDependentRoundingFPMath() const {
+  return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+}
+
+/// getTrapFunctionName - If this returns a non-empty string, this means isel
+/// should lower Intrinsic::trap to a call to the specified function name
+/// instead of an ISD::TRAP node.
+StringRef TargetOptions::getTrapFunctionName() const {
+  return TrapFuncName;
+}
+
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index d87937822280..c30b1333bb2a 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -36,6 +36,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -56,14 +57,18 @@ STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
 STATISTIC(Num3AddrSunk,        "Number of 3-address instructions sunk");
 STATISTIC(NumReMats,           "Number of instructions re-materialized");
 STATISTIC(NumDeletes,          "Number of dead instructions deleted");
+STATISTIC(NumReSchedUps,       "Number of instructions re-scheduled up");
+STATISTIC(NumReSchedDowns,     "Number of instructions re-scheduled down");
 
 namespace {
   class TwoAddressInstructionPass : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const InstrItineraryData *InstrItins;
     MachineRegisterInfo *MRI;
     LiveVariables *LV;
     AliasAnalysis *AA;
+    CodeGenOpt::Level OptLevel;
 
     // DistanceMap - Keep track the distance of a MI from the start of the
     // current basic block.
@@ -120,6 +125,18 @@ namespace {
                            MachineBasicBlock::iterator &nmi,
                            MachineFunction::iterator &mbbi, unsigned Dist);
 
+    bool isDefTooClose(unsigned Reg, unsigned Dist,
+                       MachineInstr *MI, MachineBasicBlock *MBB);
+
+    bool RescheduleMIBelowKill(MachineBasicBlock *MBB,
+                               MachineBasicBlock::iterator &mi,
+                               MachineBasicBlock::iterator &nmi,
+                               unsigned Reg);
+    bool RescheduleKillAboveMI(MachineBasicBlock *MBB,
+                               MachineBasicBlock::iterator &mi,
+                               MachineBasicBlock::iterator &nmi,
+                               unsigned Reg);
+
     bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
                                  MachineBasicBlock::iterator &nmi,
                                  MachineFunction::iterator &mbbi,
@@ -152,7 +169,6 @@ namespace {
       AU.addPreserved<LiveVariables>();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addPreservedID(MachineDominatorsID);
-      AU.addPreservedID(PHIEliminationID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -225,12 +241,12 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   // appropriate location, we can try to sink the current instruction
   // past it.
   if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
-      KillMI->getDesc().isTerminator())
+      KillMI->isTerminator())
     return false;
 
   // If any of the definitions are used by another instruction between the
   // position and the kill use, then it's not safe to sink it.
-  // 
+  //
   // FIXME: This can be sped up if there is an easy way to query whether an
   // instruction is before or after another instruction. Then we can use
   // MachineRegisterInfo def / use instead.
@@ -273,7 +289,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   KillMO->setIsKill(false);
   KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
   KillMO->setIsKill(true);
-  
+
   if (LV)
     LV->replaceKillInstruction(SavedReg, KillMI, MI);
 
@@ -319,7 +335,7 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
         continue;  // Current use.
       OtherUse = true;
       // There is at least one other use in the MBB that will clobber the
-      // register. 
+      // register.
       if (isTwoAddrUse(UseMI, Reg))
         return true;
     }
@@ -467,6 +483,32 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
   return false;
 }
 
+/// findLocalKill - Look for an instruction below MI in the MBB that kills the
+/// specified register. Returns null if there are any other Reg use between the
+/// instructions.
+static
+MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB,
+                            MachineInstr *MI, MachineRegisterInfo *MRI,
+                            DenseMap<MachineInstr*, unsigned> &DistanceMap) {
+  MachineInstr *KillMI = 0;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         UI = MRI->use_nodbg_begin(Reg),
+         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI == MI || UseMI->getParent() != MBB)
+      continue;
+    if (DistanceMap.count(UseMI))
+      continue;
+    if (!UI.getOperand().isKill())
+      return 0;
+    if (KillMI)
+      return 0;  // -O0 kill markers cannot be trusted?
+    KillMI = UseMI;
+  }
+
+  return KillMI;
+}
+
 /// findOnlyInterestingUse - Given a register, if has a single in-basic block
 /// use, return the use instruction if it's a copy or a two-address use.
 static
@@ -528,6 +570,9 @@ bool
 TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
                                        MachineInstr *MI, MachineBasicBlock *MBB,
                                        unsigned Dist) {
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
   // Determine if it's profitable to commute this two address instruction. In
   // general, we want no uses between this instruction and the definition of
   // the two-address register.
@@ -544,7 +589,7 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
   // %reg1029<def> = MOV8rr %reg1028
   // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
   // insert => %reg1030<def> = MOV8rr %reg1029
-  // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>  
+  // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>
 
   if (!MI->killsRegister(regC))
     return false;
@@ -770,10 +815,9 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
 static bool isSafeToDelete(MachineInstr *MI,
                            const TargetInstrInfo *TII,
                            SmallVector<unsigned, 4> &Kills) {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (MCID.mayStore() || MCID.isCall())
+  if (MI->mayStore() || MI->isCall())
     return false;
-  if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
+  if (MI->isTerminator() || MI->hasUnmodeledSideEffects())
     return false;
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -852,28 +896,316 @@ TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
   return true;
 }
 
+/// RescheduleMIBelowKill - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
+/// instruction in order to eliminate the need for the copy.
+bool
+TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB,
+                                     MachineBasicBlock::iterator &mi,
+                                     MachineBasicBlock::iterator &nmi,
+                                     unsigned Reg) {
+  MachineInstr *MI = &*mi;
+  DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+  if (DI == DistanceMap.end())
+    // Must be created from unfolded load. Don't waste time trying this.
+    return false;
+
+  MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
+  if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+    // Don't mess with copies, they may be coalesced later.
+    return false;
+
+  if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() ||
+      KillMI->isBranch() || KillMI->isTerminator())
+    // Don't move pass calls, etc.
+    return false;
+
+  unsigned DstReg;
+  if (isTwoAddrUse(*KillMI, Reg, DstReg))
+    return false;
+
+  bool SeenStore = true;
+  if (!MI->isSafeToMove(TII, AA, SeenStore))
+    return false;
+
+  if (TII->getInstrLatency(InstrItins, MI) > 1)
+    // FIXME: Needs more sophisticated heuristics.
+    return false;
+
+  SmallSet<unsigned, 2> Uses;
+  SmallSet<unsigned, 2> Kills;
+  SmallSet<unsigned, 2> Defs;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned MOReg = MO.getReg();
+    if (!MOReg)
+      continue;
+    if (MO.isDef())
+      Defs.insert(MOReg);
+    else {
+      Uses.insert(MOReg);
+      if (MO.isKill() && MOReg != Reg)
+        Kills.insert(MOReg);
+    }
+  }
+
+  // Move the copies connected to MI down as well.
+  MachineBasicBlock::iterator From = MI;
+  MachineBasicBlock::iterator To = llvm::next(From);
+  while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) {
+    Defs.insert(To->getOperand(0).getReg());
+    ++To;
+  }
+
+  // Check if the reschedule will not break depedencies.
+  unsigned NumVisited = 0;
+  MachineBasicBlock::iterator KillPos = KillMI;
+  ++KillPos;
+  for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) {
+    MachineInstr *OtherMI = I;
+    // DBG_VALUE cannot be counted against the limit.
+    if (OtherMI->isDebugValue())
+      continue;
+    if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
+      return false;
+    ++NumVisited;
+    if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+        OtherMI->isBranch() || OtherMI->isTerminator())
+      // Don't move pass calls, etc.
+      return false;
+    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = OtherMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (!MOReg)
+        continue;
+      if (MO.isDef()) {
+        if (Uses.count(MOReg))
+          // Physical register use would be clobbered.
+          return false;
+        if (!MO.isDead() && Defs.count(MOReg))
+          // May clobber a physical register def.
+          // FIXME: This may be too conservative. It's ok if the instruction
+          // is sunken completely below the use.
+          return false;
+      } else {
+        if (Defs.count(MOReg))
+          return false;
+        if (MOReg != Reg &&
+            ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg)))
+          // Don't want to extend other live ranges and update kills.
+          return false;
+      }
+    }
+  }
+
+  // Move debug info as well.
+  while (From != MBB->begin() && llvm::prior(From)->isDebugValue())
+    --From;
+
+  // Copies following MI may have been moved as well.
+  nmi = To;
+  MBB->splice(KillPos, MBB, From, To);
+  DistanceMap.erase(DI);
+
+  if (LV) {
+    // Update live variables
+    LV->removeVirtualRegisterKilled(Reg, KillMI);
+    LV->addVirtualRegisterKilled(Reg, MI);
+  } else {
+    for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = KillMI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+        continue;
+      MO.setIsKill(false);
+    }
+    MI->addRegisterKilled(Reg, 0);
+  }
+
+  return true;
+}
+
+/// isDefTooClose - Return true if the re-scheduling will put the given
+/// instruction too close to the defs of its register dependencies.
+bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
+                                              MachineInstr *MI,
+                                              MachineBasicBlock *MBB) {
+  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg),
+         DE = MRI->def_end(); DI != DE; ++DI) {
+    MachineInstr *DefMI = &*DI;
+    if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike())
+      continue;
+    if (DefMI == MI)
+      return true; // MI is defining something KillMI uses
+    DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(DefMI);
+    if (DDI == DistanceMap.end())
+      return true;  // Below MI
+    unsigned DefDist = DDI->second;
+    assert(Dist > DefDist && "Visited def already?");
+    if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
+      return true;
+  }
+  return false;
+}
+
+/// RescheduleKillAboveMI - If there is one more local instruction that reads
+/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
+/// current two-address instruction in order to eliminate the need for the
+/// copy.
+bool
+TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB,
+                                     MachineBasicBlock::iterator &mi,
+                                     MachineBasicBlock::iterator &nmi,
+                                     unsigned Reg) {
+  MachineInstr *MI = &*mi;
+  DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+  if (DI == DistanceMap.end())
+    // Must be created from unfolded load. Don't waste time trying this.
+    return false;
+
+  MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap);
+  if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike())
+    // Don't mess with copies, they may be coalesced later.
+    return false;
+
+  unsigned DstReg;
+  if (isTwoAddrUse(*KillMI, Reg, DstReg))
+    return false;
+
+  bool SeenStore = true;
+  if (!KillMI->isSafeToMove(TII, AA, SeenStore))
+    return false;
+
+  SmallSet<unsigned, 2> Uses;
+  SmallSet<unsigned, 2> Kills;
+  SmallSet<unsigned, 2> Defs;
+  SmallSet<unsigned, 2> LiveDefs;
+  for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = KillMI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned MOReg = MO.getReg();
+    if (MO.isUse()) {
+      if (!MOReg)
+        continue;
+      if (isDefTooClose(MOReg, DI->second, MI, MBB))
+        return false;
+      Uses.insert(MOReg);
+      if (MO.isKill() && MOReg != Reg)
+        Kills.insert(MOReg);
+    } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+      Defs.insert(MOReg);
+      if (!MO.isDead())
+        LiveDefs.insert(MOReg);
+    }
+  }
+
+  // Check if the reschedule will not break depedencies.
+  unsigned NumVisited = 0;
+  MachineBasicBlock::iterator KillPos = KillMI;
+  for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) {
+    MachineInstr *OtherMI = I;
+    // DBG_VALUE cannot be counted against the limit.
+    if (OtherMI->isDebugValue())
+      continue;
+    if (NumVisited > 10)  // FIXME: Arbitrary limit to reduce compile time cost.
+      return false;
+    ++NumVisited;
+    if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() ||
+        OtherMI->isBranch() || OtherMI->isTerminator())
+      // Don't move pass calls, etc.
+      return false;
+    SmallVector<unsigned, 2> OtherDefs;
+    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = OtherMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (!MOReg)
+        continue;
+      if (MO.isUse()) {
+        if (Defs.count(MOReg))
+          // Moving KillMI can clobber the physical register if the def has
+          // not been seen.
+          return false;
+        if (Kills.count(MOReg))
+          // Don't want to extend other live ranges and update kills.
+          return false;
+      } else {
+        OtherDefs.push_back(MOReg);
+      }
+    }
+
+    for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) {
+      unsigned MOReg = OtherDefs[i];
+      if (Uses.count(MOReg))
+        return false;
+      if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+          LiveDefs.count(MOReg))
+        return false;
+      // Physical register def is seen.
+      Defs.erase(MOReg);
+    }
+  }
+
+  // Move the old kill above MI, don't forget to move debug info as well.
+  MachineBasicBlock::iterator InsertPos = mi;
+  while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue())
+    --InsertPos;
+  MachineBasicBlock::iterator From = KillMI;
+  MachineBasicBlock::iterator To = llvm::next(From);
+  while (llvm::prior(From)->isDebugValue())
+    --From;
+  MBB->splice(InsertPos, MBB, From, To);
+
+  nmi = llvm::prior(InsertPos); // Backtrack so we process the moved instr.
+  DistanceMap.erase(DI);
+
+  if (LV) {
+    // Update live variables
+    LV->removeVirtualRegisterKilled(Reg, KillMI);
+    LV->addVirtualRegisterKilled(Reg, MI);
+  } else {
+    for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = KillMI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+        continue;
+      MO.setIsKill(false);
+    }
+    MI->addRegisterKilled(Reg, 0);
+  }
+  return true;
+}
+
 /// TryInstructionTransform - For the case where an instruction has a single
 /// pair of tied register operands, attempt some transformations that may
 /// either eliminate the tied operands or improve the opportunities for
-/// coalescing away the register copy.  Returns true if the tied operands
-/// are eliminated altogether.
+/// coalescing away the register copy.  Returns true if no copy needs to be
+/// inserted to untie mi's operands (either because they were untied, or
+/// because mi was rescheduled, and will be visited again later).
 bool TwoAddressInstructionPass::
 TryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineBasicBlock::iterator &nmi,
                         MachineFunction::iterator &mbbi,
                         unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
                         SmallPtrSet<MachineInstr*, 8> &Processed) {
-  const MCInstrDesc &MCID = mi->getDesc();
-  unsigned regA = mi->getOperand(DstIdx).getReg();
-  unsigned regB = mi->getOperand(SrcIdx).getReg();
+  if (OptLevel == CodeGenOpt::None)
+    return false;
+
+  MachineInstr &MI = *mi;
+  unsigned regA = MI.getOperand(DstIdx).getReg();
+  unsigned regB = MI.getOperand(SrcIdx).getReg();
 
   assert(TargetRegisterInfo::isVirtualRegister(regB) &&
          "cannot make instruction into two-address form");
 
   // If regA is dead and the instruction can be deleted, just delete
   // it so it doesn't clobber regB.
-  bool regBKilled = isKilled(*mi, regB, MRI, TII);
-  if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+  bool regBKilled = isKilled(MI, regB, MRI, TII);
+  if (!regBKilled && MI.getOperand(DstIdx).isDead() &&
       DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
     ++NumDeletes;
     return true; // Done with this instruction.
@@ -885,20 +1217,20 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   unsigned regCIdx = ~0U;
   bool TryCommute = false;
   bool AggressiveCommute = false;
-  if (MCID.isCommutable() && mi->getNumOperands() >= 3 &&
-      TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+  if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
+      TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
     if (SrcIdx == SrcOp1)
       regCIdx = SrcOp2;
     else if (SrcIdx == SrcOp2)
       regCIdx = SrcOp1;
 
     if (regCIdx != ~0U) {
-      regC = mi->getOperand(regCIdx).getReg();
-      if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+      regC = MI.getOperand(regCIdx).getReg();
+      if (!regBKilled && isKilled(MI, regC, MRI, TII))
         // If C dies but B does not, swap the B and C operands.
         // This makes the live ranges of A and C joinable.
         TryCommute = true;
-      else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+      else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) {
         TryCommute = true;
         AggressiveCommute = true;
       }
@@ -913,10 +1245,17 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     return false;
   }
 
+  // If there is one more use of regB later in the same MBB, consider
+  // re-schedule this MI below it.
+  if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) {
+    ++NumReSchedDowns;
+    return true;
+  }
+
   if (TargetRegisterInfo::isVirtualRegister(regA))
     ScanUses(regA, &*mbbi, Processed);
 
-  if (MCID.isConvertibleTo3Addr()) {
+  if (MI.isConvertibleTo3Addr()) {
     // This instruction is potentially convertible to a true
     // three-address instruction.  Check if it is profitable.
     if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
@@ -928,6 +1267,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
     }
   }
 
+  // If there is one more use of regB later in the same MBB, consider
+  // re-schedule it before this MI if it's legal.
+  if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) {
+    ++NumReSchedUps;
+    return true;
+  }
+
   // If this is an instruction with a load folded into it, try unfolding
   // the load, e.g. avoid this:
   //   movq %rdx, %rcx
@@ -936,11 +1282,11 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   //   movq (%rax), %rcx
   //   addq %rdx, %rcx
   // because it's preferable to schedule a load than a register copy.
-  if (MCID.mayLoad() && !regBKilled) {
+  if (MI.mayLoad() && !regBKilled) {
     // Determine if a load can be unfolded.
     unsigned LoadRegIndex;
     unsigned NewOpc =
-      TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+      TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
                                       /*UnfoldLoad=*/true,
                                       /*UnfoldStore=*/false,
                                       &LoadRegIndex);
@@ -950,12 +1296,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
         MachineFunction &MF = *mbbi->getParent();
 
         // Unfold the load.
-        DEBUG(dbgs() << "2addr:   UNFOLDING: " << *mi);
+        DEBUG(dbgs() << "2addr:   UNFOLDING: " << MI);
         const TargetRegisterClass *RC =
           TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
         unsigned Reg = MRI->createVirtualRegister(RC);
         SmallVector<MachineInstr *, 2> NewMIs;
-        if (!TII->unfoldMemoryOperand(MF, mi, Reg,
+        if (!TII->unfoldMemoryOperand(MF, &MI, Reg,
                                       /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
                                       NewMIs)) {
           DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
@@ -986,21 +1332,21 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
           // Success, or at least we made an improvement. Keep the unfolded
           // instructions and discard the original.
           if (LV) {
-            for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
-              MachineOperand &MO = mi->getOperand(i);
-              if (MO.isReg() && 
+            for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+              MachineOperand &MO = MI.getOperand(i);
+              if (MO.isReg() &&
                   TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
                 if (MO.isUse()) {
                   if (MO.isKill()) {
                     if (NewMIs[0]->killsRegister(MO.getReg()))
-                      LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]);
+                      LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]);
                     else {
                       assert(NewMIs[1]->killsRegister(MO.getReg()) &&
                              "Kill missing after load unfold!");
-                      LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]);
+                      LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]);
                     }
                   }
-                } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) {
+                } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) {
                   if (NewMIs[1]->registerDefIsDead(MO.getReg()))
                     LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
                   else {
@@ -1013,7 +1359,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
             }
             LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
           }
-          mi->eraseFromParent();
+          MI.eraseFromParent();
           mi = NewMIs[1];
           if (TransformSuccess)
             return true;
@@ -1035,18 +1381,19 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
 /// runOnMachineFunction - Reduce two-address instructions to two operands.
 ///
 bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
-  DEBUG(dbgs() << "Machine Function\n");
   const TargetMachine &TM = MF.getTarget();
   MRI = &MF.getRegInfo();
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
+  InstrItins = TM.getInstrItineraryData();
   LV = getAnalysisIfAvailable<LiveVariables>();
   AA = &getAnalysis<AliasAnalysis>();
+  OptLevel = TM.getOptLevel();
 
   bool MadeChange = false;
 
   DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
-  DEBUG(dbgs() << "********** Function: " 
+  DEBUG(dbgs() << "********** Function: "
         << MF.getFunction()->getName() << '\n');
 
   // This pass takes the function out of SSA form.
@@ -1177,7 +1524,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
           // If it's safe and profitable, remat the definition instead of
           // copying it.
           if (DefMI &&
-              DefMI->getDesc().isAsCheapAsAMove() &&
+              DefMI->isAsCheapAsAMove() &&
               DefMI->isSafeToReMat(TII, AA, regB) &&
               isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
             DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
@@ -1248,19 +1595,19 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
         MadeChange = true;
 
         DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
-      }
 
-      // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
-      if (mi->isInsertSubreg()) {
-        // From %reg = INSERT_SUBREG %reg, %subreg, subidx
-        // To   %reg:subidx = COPY %subreg
-        unsigned SubIdx = mi->getOperand(3).getImm();
-        mi->RemoveOperand(3);
-        assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
-        mi->getOperand(0).setSubReg(SubIdx);
-        mi->RemoveOperand(1);
-        mi->setDesc(TII->get(TargetOpcode::COPY));
-        DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+        // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+        if (mi->isInsertSubreg()) {
+          // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+          // To   %reg:subidx = COPY %subreg
+          unsigned SubIdx = mi->getOperand(3).getImm();
+          mi->RemoveOperand(3);
+          assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+          mi->getOperand(0).setSubReg(SubIdx);
+          mi->RemoveOperand(1);
+          mi->setDesc(TII->get(TargetOpcode::COPY));
+          DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+        }
       }
 
       // Clear TiedOperands here instead of at the top of the loop
@@ -1298,6 +1645,36 @@ static void UpdateRegSequenceSrcs(unsigned SrcReg,
   }
 }
 
+// Find the first def of Reg, assuming they are all in the same basic block.
+static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) {
+  SmallPtrSet<MachineInstr*, 8> Defs;
+  MachineInstr *First = 0;
+  for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg);
+       MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI))
+    First = MI;
+  if (!First)
+    return 0;
+
+  MachineBasicBlock *MBB = First->getParent();
+  MachineBasicBlock::iterator A = First, B = First;
+  bool Moving;
+  do {
+    Moving = false;
+    if (A != MBB->begin()) {
+      Moving = true;
+      --A;
+      if (Defs.erase(A)) First = A;
+    }
+    if (B != MBB->end()) {
+      Defs.erase(B);
+      ++B;
+      Moving = true;
+    }
+  } while (Moving && !Defs.empty());
+  assert(Defs.empty() && "Instructions outside basic block!");
+  return First;
+}
+
 /// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
 /// EXTRACT_SUBREG from the same register and to the same virtual register
 /// with different sub-register indices, attempt to combine the
@@ -1380,8 +1757,10 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
         CanCoalesce = false;
         break;
       }
-      // Keep track of one of the uses.
-      SomeMI = UseMI;
+      // Keep track of one of the uses.  Preferably the first one which has a
+      // <def,undef> flag.
+      if (!SomeMI || UseMI->getOperand(0).isUndef())
+        SomeMI = UseMI;
     }
     if (!CanCoalesce)
       continue;
@@ -1390,7 +1769,9 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
     MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
                                    SomeMI->getDebugLoc(),
                                    TII->get(TargetOpcode::COPY))
-      .addReg(DstReg, RegState::Define, NewDstSubIdx)
+      .addReg(DstReg, RegState::Define |
+                      getUndefRegState(SomeMI->getOperand(0).isUndef()),
+              NewDstSubIdx)
       .addReg(SrcReg, 0, NewSrcSubIdx);
 
     // Remove all the old extract instructions.
@@ -1452,26 +1833,30 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
     SmallSet<unsigned, 4> Seen;
     for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
       unsigned SrcReg = MI->getOperand(i).getReg();
+      unsigned SrcSubIdx = MI->getOperand(i).getSubReg();
       unsigned SubIdx = MI->getOperand(i+1).getImm();
-      if (MI->getOperand(i).getSubReg() ||
-          TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
-        DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
-        llvm_unreachable(0);
+      // DefMI of NULL means the value does not have a vreg in this block
+      // i.e., its a physical register or a subreg.
+      // In either case we force a copy to be generated.
+      MachineInstr *DefMI = NULL;
+      if (!MI->getOperand(i).getSubReg() &&
+          !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+        DefMI = MRI->getVRegDef(SrcReg);
       }
 
-      MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
-      if (DefMI->isImplicitDef()) {
+      if (DefMI && DefMI->isImplicitDef()) {
         DefMI->eraseFromParent();
         continue;
       }
       IsImpDef = false;
 
       // Remember COPY sources. These might be candidate for coalescing.
-      if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
+      if (DefMI && DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
         RealSrcs.push_back(DefMI->getOperand(1).getReg());
 
       bool isKill = MI->getOperand(i).isKill();
-      if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
+      if (!DefMI || !Seen.insert(SrcReg) ||
+          MI->getParent() != DefMI->getParent() ||
           !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
           !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
                                          MRI->getRegClass(SrcReg), SubIdx)) {
@@ -1504,9 +1889,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
         MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
                                 MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
             .addReg(DstReg, RegState::Define, SubIdx)
-            .addReg(SrcReg, getKillRegState(isKill));
+            .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx);
         MI->getOperand(i).setReg(0);
-        if (LV && isKill)
+        if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
           LV->replaceKillInstruction(SrcReg, MI, CopyMI);
         DEBUG(dbgs() << "Inserted: " << *CopyMI);
       }
@@ -1519,11 +1904,27 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
       UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
     }
 
+    // Set <def,undef> flags on the first DstReg def in the basic block.
+    // It marks the beginning of the live range. All the other defs are
+    // read-modify-write.
+    if (MachineInstr *Def = findFirstDef(DstReg, MRI)) {
+      for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = Def->getOperand(i);
+        if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
+          MO.setIsUndef();
+      }
+      // Make sure there is a full non-subreg imp-def operand on the
+      // instruction.  This shouldn't be necessary, but it seems that at least
+      // RAFast requires it.
+      Def->addRegisterDefined(DstReg, TRI);
+      DEBUG(dbgs() << "First def: " << *Def);
+    }
+
     if (IsImpDef) {
       DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
       MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
       for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
-        MI->RemoveOperand(j);      
+        MI->RemoveOperand(j);
     } else {
       DEBUG(dbgs() << "Eliminated: " << *MI);
       MI->eraseFromParent();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 8a1cdc01c494..3bab93bdc098 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -16,10 +16,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "virtregmap"
+#define DEBUG_TYPE "regalloc"
 #include "VirtRegMap.h"
 #include "llvm/Function.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -32,12 +31,8 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -58,34 +53,11 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
   TRI = mf.getTarget().getRegisterInfo();
   MF = &mf;
 
-  ReMatId = MAX_STACK_SLOT+1;
-  LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
-  
   Virt2PhysMap.clear();
   Virt2StackSlotMap.clear();
-  Virt2ReMatIdMap.clear();
   Virt2SplitMap.clear();
-  Virt2SplitKillMap.clear();
-  ReMatMap.clear();
-  ImplicitDefed.clear();
-  SpillSlotToUsesMap.clear();
-  MI2VirtMap.clear();
-  SpillPt2VirtMap.clear();
-  RestorePt2VirtMap.clear();
-  EmergencySpillMap.clear();
-  EmergencySpillSlots.clear();
-  
-  SpillSlotToUsesMap.resize(8);
-  ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
-
-  allocatableRCRegs.clear();
-  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
-         E = TRI->regclass_end(); I != E; ++I)
-    allocatableRCRegs.insert(std::make_pair(*I,
-                                            TRI->getAllocatableSet(mf, *I)));
 
   grow();
-  
   return false;
 }
 
@@ -93,24 +65,12 @@ void VirtRegMap::grow() {
   unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
   Virt2PhysMap.resize(NumRegs);
   Virt2StackSlotMap.resize(NumRegs);
-  Virt2ReMatIdMap.resize(NumRegs);
   Virt2SplitMap.resize(NumRegs);
-  Virt2SplitKillMap.resize(NumRegs);
-  ReMatMap.resize(NumRegs);
-  ImplicitDefed.resize(NumRegs);
 }
 
 unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
   int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
                                                       RC->getAlignment());
-  if (LowSpillSlot == NO_STACK_SLOT)
-    LowSpillSlot = SS;
-  if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
-    HighSpillSlot = SS;
-  assert(SS >= LowSpillSlot && "Unexpected low spill slot");
-  unsigned Idx = SS-LowSpillSlot;
-  while (Idx >= SpillSlotToUsesMap.size())
-    SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
   ++NumSpillSlots;
   return SS;
 }
@@ -144,118 +104,6 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
   Virt2StackSlotMap[virtReg] = SS;
 }
 
-int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
-  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-  assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
-         "attempt to assign re-mat id to already spilled register");
-  Virt2ReMatIdMap[virtReg] = ReMatId;
-  return ReMatId++;
-}
-
-void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) {
-  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-  assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
-         "attempt to assign re-mat id to already spilled register");
-  Virt2ReMatIdMap[virtReg] = id;
-}
-
-int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
-  std::map<const TargetRegisterClass*, int>::iterator I =
-    EmergencySpillSlots.find(RC);
-  if (I != EmergencySpillSlots.end())
-    return I->second;
-  return EmergencySpillSlots[RC] = createSpillSlot(RC);
-}
-
-void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
-  if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) {
-    // If FI < LowSpillSlot, this stack reference was produced by
-    // instruction selection and is not a spill
-    if (FI >= LowSpillSlot) {
-      assert(FI >= 0 && "Spill slot index should not be negative!");
-      assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
-             && "Invalid spill slot");
-      SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI);
-    }
-  }
-}
-
-void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
-                            MachineInstr *NewMI, ModRef MRInfo) {
-  // Move previous memory references folded to new instruction.
-  MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
-  for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
-         E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
-    MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
-    MI2VirtMap.erase(I++);
-  }
-
-  // add new memory reference
-  MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
-}
-
-void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) {
-  MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI);
-  MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo)));
-}
-
-void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (!MO.isFI())
-      continue;
-    int FI = MO.getIndex();
-    if (MF->getFrameInfo()->isFixedObjectIndex(FI))
-      continue;
-    // This stack reference was produced by instruction selection and
-    // is not a spill
-    if (FI < LowSpillSlot)
-      continue;
-    assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
-           && "Invalid spill slot");
-    SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI);
-  }
-  MI2VirtMap.erase(MI);
-  SpillPt2VirtMap.erase(MI);
-  RestorePt2VirtMap.erase(MI);
-  EmergencySpillMap.erase(MI);
-}
-
-/// FindUnusedRegisters - Gather a list of allocatable registers that
-/// have not been allocated to any virtual register.
-bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
-  unsigned NumRegs = TRI->getNumRegs();
-  UnusedRegs.reset();
-  UnusedRegs.resize(NumRegs);
-
-  BitVector Used(NumRegs);
-  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
-    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
-    if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
-      Used.set(Virt2PhysMap[Reg]);
-  }
-
-  BitVector Allocatable = TRI->getAllocatableSet(*MF);
-  bool AnyUnused = false;
-  for (unsigned Reg = 1; Reg < NumRegs; ++Reg) {
-    if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) {
-      bool ReallyUnused = true;
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
-        if (Used[*AS] || LIs->hasInterval(*AS)) {
-          ReallyUnused = false;
-          break;
-        }
-      }
-      if (ReallyUnused) {
-        AnyUnused = true;
-        UnusedRegs.set(Reg);
-      }
-    }
-  }
-
-  return AnyUnused;
-}
-
 void VirtRegMap::rewrite(SlotIndexes *Indexes) {
   DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
                << "********** Function: "
@@ -264,23 +112,32 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
   SmallVector<unsigned, 8> SuperDeads;
   SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
+#ifndef NDEBUG
+  BitVector Reserved = TRI->getReservedRegs(*MF);
+#endif
 
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
        MBBI != MBBE; ++MBBI) {
     DEBUG(MBBI->print(dbgs(), Indexes));
-    for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
-         MII != MIE;) {
+    for (MachineBasicBlock::instr_iterator
+           MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
       MachineInstr *MI = MII;
       ++MII;
 
       for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
            MOE = MI->operands_end(); MOI != MOE; ++MOI) {
         MachineOperand &MO = *MOI;
+
+        // Make sure MRI knows about registers clobbered by regmasks.
+        if (MO.isRegMask())
+          MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
+
         if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
           continue;
         unsigned VirtReg = MO.getReg();
         unsigned PhysReg = getPhys(VirtReg);
         assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+        assert(!Reserved.test(PhysReg) && "Reserved register assignment");
 
         // Preserve semantics of sub-register operands.
         if (MO.getSubReg()) {
@@ -332,7 +189,6 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) {
         ++NumIdCopies;
         if (MI->getNumOperands() == 2) {
           DEBUG(dbgs() << "Deleting identity copy.\n");
-          RemoveMachineInstrFromMaps(MI);
           if (Indexes)
             Indexes->removeMachineInstrFromMaps(MI);
           // It's safe to erase MI because MII has already been incremented.
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 03abff356934..8cac31137e3d 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -18,22 +18,14 @@
 #define LLVM_CODEGEN_VIRTREGMAP_H
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include <map>
 
 namespace llvm {
-  class LiveIntervals;
   class MachineInstr;
   class MachineFunction;
   class MachineRegisterInfo;
   class TargetInstrInfo;
-  class TargetRegisterInfo;
   class raw_ostream;
   class SlotIndexes;
 
@@ -45,18 +37,12 @@ namespace llvm {
       MAX_STACK_SLOT = (1L << 18)-1
     };
 
-    enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
-    typedef std::multimap<MachineInstr*,
-                          std::pair<unsigned, ModRef> > MI2VirtMapTy;
-
   private:
     MachineRegisterInfo *MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     MachineFunction *MF;
 
-    DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs;
-
     /// Virt2PhysMap - This is a virtual to physical register
     /// mapping. Each virtual register is required to have an entry in
     /// it; even spilled virtual registers (the register mapped to a
@@ -70,71 +56,10 @@ namespace llvm {
     /// at.
     IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
 
-    /// Virt2ReMatIdMap - This is virtual register to rematerialization id
-    /// mapping. Each spilled virtual register that should be remat'd has an
-    /// entry in it which corresponds to the remat id.
-    IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap;
-
     /// Virt2SplitMap - This is virtual register to splitted virtual register
     /// mapping.
     IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
 
-    /// Virt2SplitKillMap - This is splitted virtual register to its last use
-    /// (kill) index mapping.
-    IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
-
-    /// ReMatMap - This is virtual register to re-materialized instruction
-    /// mapping. Each virtual register whose definition is going to be
-    /// re-materialized has an entry in it.
-    IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap;
-
-    /// MI2VirtMap - This is MachineInstr to virtual register
-    /// mapping. In the case of memory spill code being folded into
-    /// instructions, we need to know which virtual register was
-    /// read/written by this instruction.
-    MI2VirtMapTy MI2VirtMap;
-
-    /// SpillPt2VirtMap - This records the virtual registers which should
-    /// be spilled right after the MachineInstr due to live interval
-    /// splitting.
-    std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >
-    SpillPt2VirtMap;
-
-    /// RestorePt2VirtMap - This records the virtual registers which should
-    /// be restored right before the MachineInstr due to live interval
-    /// splitting.
-    std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap;
-
-    /// EmergencySpillMap - This records the physical registers that should
-    /// be spilled / restored around the MachineInstr since the register
-    /// allocator has run out of registers.
-    std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap;
-
-    /// EmergencySpillSlots - This records emergency spill slots used to
-    /// spill physical registers when the register allocator runs out of
-    /// registers. Ideally only one stack slot is used per function per
-    /// register class.
-    std::map<const TargetRegisterClass*, int> EmergencySpillSlots;
-
-    /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
-    /// virtual register, an unique id is being assigned. This keeps track of
-    /// the highest id used so far. Note, this starts at (1<<18) to avoid
-    /// conflicts with stack slot numbers.
-    int ReMatId;
-
-    /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes.
-    int LowSpillSlot, HighSpillSlot;
-
-    /// SpillSlotToUsesMap - Records uses for each register spill slot.
-    SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap;
-
-    /// ImplicitDefed - One bit for each virtual register. If set it indicates
-    /// the register is implicitly defined.
-    BitVector ImplicitDefed;
-
-    /// UnusedRegs - A list of physical registers that have not been used.
-    BitVector UnusedRegs;
-
     /// createSpillSlot - Allocate a spill slot for RC from MFI.
     unsigned createSpillSlot(const TargetRegisterClass *RC);
 
@@ -144,11 +69,7 @@ namespace llvm {
   public:
     static char ID;
     VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
-                   Virt2StackSlotMap(NO_STACK_SLOT), 
-                   Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
-                   Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
-                   ReMatId(MAX_STACK_SLOT+1),
-                   LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
+                   Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { }
     virtual bool runOnMachineFunction(MachineFunction &MF);
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -235,8 +156,7 @@ namespace llvm {
     /// @brief returns true if the specified virtual register is not
     /// mapped to a stack slot or rematerialized.
     bool isAssignedReg(unsigned virtReg) const {
-      if (getStackSlot(virtReg) == NO_STACK_SLOT &&
-          getReMatId(virtReg) == NO_STACK_SLOT)
+      if (getStackSlot(virtReg) == NO_STACK_SLOT)
         return true;
       // Split register can be assigned a physical register as well as a
       // stack slot or remat id.
@@ -250,13 +170,6 @@ namespace llvm {
       return Virt2StackSlotMap[virtReg];
     }
 
-    /// @brief returns the rematerialization id mapped to the specified virtual
-    /// register
-    int getReMatId(unsigned virtReg) const {
-      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
-      return Virt2ReMatIdMap[virtReg];
-    }
-
     /// @brief create a mapping for the specifed virtual register to
     /// the next available stack slot
     int assignVirt2StackSlot(unsigned virtReg);
@@ -264,250 +177,6 @@ namespace llvm {
     /// the specified stack slot
     void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
 
-    /// @brief assign an unique re-materialization id to the specified
-    /// virtual register.
-    int assignVirtReMatId(unsigned virtReg);
-    /// @brief assign an unique re-materialization id to the specified
-    /// virtual register.
-    void assignVirtReMatId(unsigned virtReg, int id);
-
-    /// @brief returns true if the specified virtual register is being
-    /// re-materialized.
-    bool isReMaterialized(unsigned virtReg) const {
-      return ReMatMap[virtReg] != NULL;
-    }
-
-    /// @brief returns the original machine instruction being re-issued
-    /// to re-materialize the specified virtual register.
-    MachineInstr *getReMaterializedMI(unsigned virtReg) const {
-      return ReMatMap[virtReg];
-    }
-
-    /// @brief records the specified virtual register will be
-    /// re-materialized and the original instruction which will be re-issed
-    /// for this purpose.  If parameter all is true, then all uses of the
-    /// registers are rematerialized and it's safe to delete the definition.
-    void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
-      ReMatMap[virtReg] = def;
-    }
-
-    /// @brief record the last use (kill) of a split virtual register.
-    void addKillPoint(unsigned virtReg, SlotIndex index) {
-      Virt2SplitKillMap[virtReg] = index;
-    }
-
-    SlotIndex getKillPoint(unsigned virtReg) const {
-      return Virt2SplitKillMap[virtReg];
-    }
-
-    /// @brief remove the last use (kill) of a split virtual register.
-    void removeKillPoint(unsigned virtReg) {
-      Virt2SplitKillMap[virtReg] = SlotIndex();
-    }
-
-    /// @brief returns true if the specified MachineInstr is a spill point.
-    bool isSpillPt(MachineInstr *Pt) const {
-      return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end();
-    }
-
-    /// @brief returns the virtual registers that should be spilled due to
-    /// splitting right after the specified MachineInstr.
-    std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) {
-      return SpillPt2VirtMap[Pt];
-    }
-
-    /// @brief records the specified MachineInstr as a spill point for virtReg.
-    void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) {
-      std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
-        I = SpillPt2VirtMap.find(Pt);
-      if (I != SpillPt2VirtMap.end())
-        I->second.push_back(std::make_pair(virtReg, isKill));
-      else {
-        std::vector<std::pair<unsigned,bool> > Virts;
-        Virts.push_back(std::make_pair(virtReg, isKill));
-        SpillPt2VirtMap.insert(std::make_pair(Pt, Virts));
-      }
-    }
-
-    /// @brief - transfer spill point information from one instruction to
-    /// another.
-    void transferSpillPts(MachineInstr *Old, MachineInstr *New) {
-      std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
-        I = SpillPt2VirtMap.find(Old);
-      if (I == SpillPt2VirtMap.end())
-        return;
-      while (!I->second.empty()) {
-        unsigned virtReg = I->second.back().first;
-        bool isKill = I->second.back().second;
-        I->second.pop_back();
-        addSpillPoint(virtReg, isKill, New);
-      }
-      SpillPt2VirtMap.erase(I);
-    }
-
-    /// @brief returns true if the specified MachineInstr is a restore point.
-    bool isRestorePt(MachineInstr *Pt) const {
-      return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end();
-    }
-
-    /// @brief returns the virtual registers that should be restoreed due to
-    /// splitting right after the specified MachineInstr.
-    std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) {
-      return RestorePt2VirtMap[Pt];
-    }
-
-    /// @brief records the specified MachineInstr as a restore point for virtReg.
-    void addRestorePoint(unsigned virtReg, MachineInstr *Pt) {
-      std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
-        RestorePt2VirtMap.find(Pt);
-      if (I != RestorePt2VirtMap.end())
-        I->second.push_back(virtReg);
-      else {
-        std::vector<unsigned> Virts;
-        Virts.push_back(virtReg);
-        RestorePt2VirtMap.insert(std::make_pair(Pt, Virts));
-      }
-    }
-
-    /// @brief - transfer restore point information from one instruction to
-    /// another.
-    void transferRestorePts(MachineInstr *Old, MachineInstr *New) {
-      std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
-        RestorePt2VirtMap.find(Old);
-      if (I == RestorePt2VirtMap.end())
-        return;
-      while (!I->second.empty()) {
-        unsigned virtReg = I->second.back();
-        I->second.pop_back();
-        addRestorePoint(virtReg, New);
-      }
-      RestorePt2VirtMap.erase(I);
-    }
-
-    /// @brief records that the specified physical register must be spilled
-    /// around the specified machine instr.
-    void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) {
-      if (EmergencySpillMap.find(MI) != EmergencySpillMap.end())
-        EmergencySpillMap[MI].push_back(PhysReg);
-      else {
-        std::vector<unsigned> PhysRegs;
-        PhysRegs.push_back(PhysReg);
-        EmergencySpillMap.insert(std::make_pair(MI, PhysRegs));
-      }
-    }
-
-    /// @brief returns true if one or more physical registers must be spilled
-    /// around the specified instruction.
-    bool hasEmergencySpills(MachineInstr *MI) const {
-      return EmergencySpillMap.find(MI) != EmergencySpillMap.end();
-    }
-
-    /// @brief returns the physical registers to be spilled and restored around
-    /// the instruction.
-    std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) {
-      return EmergencySpillMap[MI];
-    }
-
-    /// @brief - transfer emergency spill information from one instruction to
-    /// another.
-    void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) {
-      std::map<MachineInstr*,std::vector<unsigned> >::iterator I =
-        EmergencySpillMap.find(Old);
-      if (I == EmergencySpillMap.end())
-        return;
-      while (!I->second.empty()) {
-        unsigned virtReg = I->second.back();
-        I->second.pop_back();
-        addEmergencySpill(virtReg, New);
-      }
-      EmergencySpillMap.erase(I);
-    }
-
-    /// @brief return or get a emergency spill slot for the register class.
-    int getEmergencySpillSlot(const TargetRegisterClass *RC);
-
-    /// @brief Return lowest spill slot index.
-    int getLowSpillSlot() const {
-      return LowSpillSlot;
-    }
-
-    /// @brief Return highest spill slot index.
-    int getHighSpillSlot() const {
-      return HighSpillSlot;
-    }
-
-    /// @brief Records a spill slot use.
-    void addSpillSlotUse(int FrameIndex, MachineInstr *MI);
-
-    /// @brief Returns true if spill slot has been used.
-    bool isSpillSlotUsed(int FrameIndex) const {
-      assert(FrameIndex >= 0 && "Spill slot index should not be negative!");
-      return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty();
-    }
-
-    /// @brief Mark the specified register as being implicitly defined.
-    void setIsImplicitlyDefined(unsigned VirtReg) {
-      ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
-    }
-
-    /// @brief Returns true if the virtual register is implicitly defined.
-    bool isImplicitlyDefined(unsigned VirtReg) const {
-      return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
-    }
-
-    /// @brief Updates information about the specified virtual register's value
-    /// folded into newMI machine instruction.
-    void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI,
-                    ModRef MRInfo);
-
-    /// @brief Updates information about the specified virtual register's value
-    /// folded into the specified machine instruction.
-    void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo);
-
-    /// @brief returns the virtual registers' values folded in memory
-    /// operands of this instruction
-    std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
-    getFoldedVirts(MachineInstr* MI) const {
-      return MI2VirtMap.equal_range(MI);
-    }
-    
-    /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the
-    /// the folded instruction map and spill point map.
-    void RemoveMachineInstrFromMaps(MachineInstr *MI);
-
-    /// FindUnusedRegisters - Gather a list of allocatable registers that
-    /// have not been allocated to any virtual register.
-    bool FindUnusedRegisters(LiveIntervals* LIs);
-
-    /// HasUnusedRegisters - Return true if there are any allocatable registers
-    /// that have not been allocated to any virtual register.
-    bool HasUnusedRegisters() const {
-      return !UnusedRegs.none();
-    }
-
-    /// setRegisterUsed - Remember the physical register is now used.
-    void setRegisterUsed(unsigned Reg) {
-      UnusedRegs.reset(Reg);
-    }
-
-    /// isRegisterUnused - Return true if the physical register has not been
-    /// used.
-    bool isRegisterUnused(unsigned Reg) const {
-      return UnusedRegs[Reg];
-    }
-
-    /// getFirstUnusedRegister - Return the first physical register that has not
-    /// been used.
-    unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) {
-      int Reg = UnusedRegs.find_first();
-      while (Reg != -1) {
-        if (allocatableRCRegs[RC][Reg])
-          return (unsigned)Reg;
-        Reg = UnusedRegs.find_next(Reg);
-      }
-      return 0;
-    }
-
     /// rewrite - Rewrite all instructions in MF to use only physical registers
     /// by mapping all virtual register operands to their assigned physical
     /// registers.
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
deleted file mode 100644
index a5ec797b27db..000000000000
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ /dev/null
@@ -1,2633 +0,0 @@
-//===-- llvm/CodeGen/Rewriter.cpp -  Rewriter -----------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "virtregrewriter"
-#include "VirtRegRewriter.h"
-#include "VirtRegMap.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumDSE     , "Number of dead stores elided");
-STATISTIC(NumDSS     , "Number of dead spill slots removed");
-STATISTIC(NumCommutes, "Number of instructions commuted");
-STATISTIC(NumDRM     , "Number of re-materializable defs elided");
-STATISTIC(NumStores  , "Number of stores added");
-STATISTIC(NumPSpills , "Number of physical register spills");
-STATISTIC(NumOmitted , "Number of reloads omitted");
-STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
-STATISTIC(NumCopified, "Number of available reloads turned into copies");
-STATISTIC(NumReMats  , "Number of re-materialization");
-STATISTIC(NumLoads   , "Number of loads added");
-STATISTIC(NumReused  , "Number of values reused");
-STATISTIC(NumDCE     , "Number of copies elided");
-STATISTIC(NumSUnfold , "Number of stores unfolded");
-STATISTIC(NumModRefUnfold, "Number of modref unfolded");
-
-namespace {
-  enum RewriterName { local, trivial };
-}
-
-static cl::opt<RewriterName>
-RewriterOpt("rewriter",
-            cl::desc("Rewriter to use (default=local)"),
-            cl::Prefix,
-            cl::values(clEnumVal(local,   "local rewriter"),
-                       clEnumVal(trivial, "trivial rewriter"),
-                       clEnumValEnd),
-            cl::init(local));
-
-static cl::opt<bool>
-ScheduleSpills("schedule-spills",
-               cl::desc("Schedule spill code"),
-               cl::init(false));
-
-VirtRegRewriter::~VirtRegRewriter() {}
-
-/// substitutePhysReg - Replace virtual register in MachineOperand with a
-/// physical register. Do the right thing with the sub-register index.
-/// Note that operands may be added, so the MO reference is no longer valid.
-static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
-                              const TargetRegisterInfo &TRI) {
-  if (MO.getSubReg()) {
-    MO.substPhysReg(Reg, TRI);
-
-    // Any kill flags apply to the full virtual register, so they also apply to
-    // the full physical register.
-    // We assume that partial defs have already been decorated with a super-reg
-    // <imp-def> operand by LiveIntervals.
-    MachineInstr &MI = *MO.getParent();
-    if (MO.isUse() && !MO.isUndef() &&
-        (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
-      MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
-  } else {
-    MO.setReg(Reg);
-  }
-}
-
-namespace {
-
-/// This class is intended for use with the new spilling framework only. It
-/// rewrites vreg def/uses to use the assigned preg, but does not insert any
-/// spill code.
-struct TrivialRewriter : public VirtRegRewriter {
-
-  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
-                            LiveIntervals* LIs) {
-    DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
-    DEBUG(dbgs() << "********** Function: "
-          << MF.getFunction()->getName() << '\n');
-    DEBUG(dbgs() << "**** Machine Instrs"
-          << "(NOTE! Does not include spills and reloads!) ****\n");
-    DEBUG(MF.dump());
-
-    MachineRegisterInfo *mri = &MF.getRegInfo();
-    const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo();
-
-    bool changed = false;
-
-    for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
-         liItr != liEnd; ++liItr) {
-
-      const LiveInterval *li = liItr->second;
-      unsigned reg = li->reg;
-
-      if (TargetRegisterInfo::isPhysicalRegister(reg)) {
-        if (!li->empty())
-          mri->setPhysRegUsed(reg);
-      }
-      else {
-        if (!VRM.hasPhys(reg))
-          continue;
-        unsigned pReg = VRM.getPhys(reg);
-        mri->setPhysRegUsed(pReg);
-        // Copy the register use-list before traversing it.
-        SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
-        for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg),
-               E = mri->reg_end(); I != E; ++I)
-          reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
-        for (unsigned N=0; N != reglist.size(); ++N)
-          substitutePhysReg(reglist[N].first->getOperand(reglist[N].second),
-                            pReg, *tri);
-        changed |= !reglist.empty();
-      }
-    }
-
-    DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
-    DEBUG(MF.dump());
-
-    return changed;
-  }
-
-};
-
-}
-
-// ************************************************************************ //
-
-namespace {
-
-/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
-/// from top down, keep track of which spill slots or remat are available in
-/// each register.
-///
-/// Note that not all physregs are created equal here.  In particular, some
-/// physregs are reloads that we are allowed to clobber or ignore at any time.
-/// Other physregs are values that the register allocated program is using
-/// that we cannot CHANGE, but we can read if we like.  We keep track of this
-/// on a per-stack-slot / remat id basis as the low bit in the value of the
-/// SpillSlotsAvailable entries.  The predicate 'canClobberPhysReg()' checks
-/// this bit and addAvailable sets it if.
-class AvailableSpills {
-  const TargetRegisterInfo *TRI;
-  const TargetInstrInfo *TII;
-
-  // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled
-  // or remat'ed virtual register values that are still available, due to
-  // being loaded or stored to, but not invalidated yet.
-  std::map<int, unsigned> SpillSlotsOrReMatsAvailable;
-
-  // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable,
-  // indicating which stack slot values are currently held by a physreg.  This
-  // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a
-  // physreg is modified.
-  std::multimap<unsigned, int> PhysRegsAvailable;
-
-  void disallowClobberPhysRegOnly(unsigned PhysReg);
-
-  void ClobberPhysRegOnly(unsigned PhysReg);
-public:
-  AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
-    : TRI(tri), TII(tii) {
-  }
-
-  /// clear - Reset the state.
-  void clear() {
-    SpillSlotsOrReMatsAvailable.clear();
-    PhysRegsAvailable.clear();
-  }
-
-  const TargetRegisterInfo *getRegInfo() const { return TRI; }
-
-  /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
-  /// available in a physical register, return that PhysReg, otherwise
-  /// return 0.
-  unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
-    std::map<int, unsigned>::const_iterator I =
-      SpillSlotsOrReMatsAvailable.find(Slot);
-    if (I != SpillSlotsOrReMatsAvailable.end()) {
-      return I->second >> 1;  // Remove the CanClobber bit.
-    }
-    return 0;
-  }
-
-  /// addAvailable - Mark that the specified stack slot / remat is available
-  /// in the specified physreg.  If CanClobber is true, the physreg can be
-  /// modified at any time without changing the semantics of the program.
-  void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
-    // If this stack slot is thought to be available in some other physreg,
-    // remove its record.
-    ModifyStackSlotOrReMat(SlotOrReMat);
-
-    PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat));
-    SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) |
-                                              (unsigned)CanClobber;
-
-    if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DEBUG(dbgs() << "Remembering RM#"
-                   << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
-    else
-      DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
-    DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
-          << (CanClobber ? " canclobber" : "") << "\n");
-  }
-
-  /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
-  /// the value of the specified stackslot register if it desires. The
-  /// specified stack slot must be available in a physreg for this query to
-  /// make sense.
-  bool canClobberPhysRegForSS(int SlotOrReMat) const {
-    assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
-           "Value not available!");
-    return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
-  }
-
-  /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
-  /// physical register where values for some stack slot(s) might be
-  /// available.
-  bool canClobberPhysReg(unsigned PhysReg) const {
-    std::multimap<unsigned, int>::const_iterator I =
-      PhysRegsAvailable.lower_bound(PhysReg);
-    while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
-      int SlotOrReMat = I->second;
-      I++;
-      if (!canClobberPhysRegForSS(SlotOrReMat))
-        return false;
-    }
-    return true;
-  }
-
-  /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
-  /// stackslot register. The register is still available but is no longer
-  /// allowed to be modifed.
-  void disallowClobberPhysReg(unsigned PhysReg);
-
-  /// ClobberPhysReg - This is called when the specified physreg changes
-  /// value.  We use this to invalidate any info about stuff that lives in
-  /// it and any of its aliases.
-  void ClobberPhysReg(unsigned PhysReg);
-
-  /// ModifyStackSlotOrReMat - This method is called when the value in a stack
-  /// slot changes.  This removes information about which register the
-  /// previous value for this slot lives in (as the previous value is dead
-  /// now).
-  void ModifyStackSlotOrReMat(int SlotOrReMat);
-
-  /// ClobberSharingStackSlots - When a register mapped to a stack slot changes,
-  /// other stack slots sharing the same register are no longer valid.
-  void ClobberSharingStackSlots(int StackSlot);
-
-  /// AddAvailableRegsToLiveIn - Availability information is being kept coming
-  /// into the specified MBB. Add available physical registers as potential
-  /// live-in's. If they are reused in the MBB, they will be added to the
-  /// live-in set to make register scavenger and post-allocation scheduler.
-  void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills,
-                                std::vector<MachineOperand*> &KillOps);
-};
-
-}
-
-// ************************************************************************ //
-
-// Given a location where a reload of a spilled register or a remat of
-// a constant is to be inserted, attempt to find a safe location to
-// insert the load at an earlier point in the basic-block, to hide
-// latency of the load and to avoid address-generation interlock
-// issues.
-static MachineBasicBlock::iterator
-ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
-                 MachineBasicBlock::iterator const Begin,
-                 unsigned PhysReg,
-                 const TargetRegisterInfo *TRI,
-                 bool DoReMat,
-                 int SSorRMId,
-                 const TargetInstrInfo *TII,
-                 const MachineFunction &MF)
-{
-  if (!ScheduleSpills)
-    return InsertLoc;
-
-  // Spill backscheduling is of primary interest to addresses, so
-  // don't do anything if the register isn't in the register class
-  // used for pointers.
-
-  const TargetLowering *TL = MF.getTarget().getTargetLowering();
-
-  if (!TL->isTypeLegal(TL->getPointerTy()))
-    // Believe it or not, this is true on 16-bit targets like PIC16.
-    return InsertLoc;
-
-  const TargetRegisterClass *ptrRegClass =
-    TL->getRegClassFor(TL->getPointerTy());
-  if (!ptrRegClass->contains(PhysReg))
-    return InsertLoc;
-
-  // Scan upwards through the preceding instructions. If an instruction doesn't
-  // reference the stack slot or the register we're loading, we can
-  // backschedule the reload up past it.
-  MachineBasicBlock::iterator NewInsertLoc = InsertLoc;
-  while (NewInsertLoc != Begin) {
-    MachineBasicBlock::iterator Prev = prior(NewInsertLoc);
-    for (unsigned i = 0; i < Prev->getNumOperands(); ++i) {
-      MachineOperand &Op = Prev->getOperand(i);
-      if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId)
-        goto stop;
-    }
-    if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 ||
-        Prev->findRegisterDefOperand(PhysReg))
-      goto stop;
-    for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias)
-      if (Prev->findRegisterUseOperandIdx(*Alias) != -1 ||
-          Prev->findRegisterDefOperand(*Alias))
-        goto stop;
-    NewInsertLoc = Prev;
-  }
-stop:;
-
-  // If we made it to the beginning of the block, turn around and move back
-  // down just past any existing reloads. They're likely to be reloads/remats
-  // for instructions earlier than what our current reload/remat is for, so
-  // they should be scheduled earlier.
-  if (NewInsertLoc == Begin) {
-    int FrameIdx;
-    while (InsertLoc != NewInsertLoc &&
-           (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) ||
-            TII->isTriviallyReMaterializable(NewInsertLoc)))
-      ++NewInsertLoc;
-  }
-
-  return NewInsertLoc;
-}
-
-namespace {
-
-// ReusedOp - For each reused operand, we keep track of a bit of information,
-// in case we need to rollback upon processing a new operand.  See comments
-// below.
-struct ReusedOp {
-  // The MachineInstr operand that reused an available value.
-  unsigned Operand;
-
-  // StackSlotOrReMat - The spill slot or remat id of the value being reused.
-  unsigned StackSlotOrReMat;
-
-  // PhysRegReused - The physical register the value was available in.
-  unsigned PhysRegReused;
-
-  // AssignedPhysReg - The physreg that was assigned for use by the reload.
-  unsigned AssignedPhysReg;
-
-  // VirtReg - The virtual register itself.
-  unsigned VirtReg;
-
-  ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
-           unsigned vreg)
-    : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr),
-      AssignedPhysReg(apr), VirtReg(vreg) {}
-};
-
-/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
-/// is reused instead of reloaded.
-class ReuseInfo {
-  MachineInstr &MI;
-  std::vector<ReusedOp> Reuses;
-  BitVector PhysRegsClobbered;
-public:
-  ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
-    PhysRegsClobbered.resize(tri->getNumRegs());
-  }
-
-  bool hasReuses() const {
-    return !Reuses.empty();
-  }
-
-  /// addReuse - If we choose to reuse a virtual register that is already
-  /// available instead of reloading it, remember that we did so.
-  void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
-                unsigned PhysRegReused, unsigned AssignedPhysReg,
-                unsigned VirtReg) {
-    // If the reload is to the assigned register anyway, no undo will be
-    // required.
-    if (PhysRegReused == AssignedPhysReg) return;
-
-    // Otherwise, remember this.
-    Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
-                              AssignedPhysReg, VirtReg));
-  }
-
-  void markClobbered(unsigned PhysReg) {
-    PhysRegsClobbered.set(PhysReg);
-  }
-
-  bool isClobbered(unsigned PhysReg) const {
-    return PhysRegsClobbered.test(PhysReg);
-  }
-
-  /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
-  /// is some other operand that is using the specified register, either pick
-  /// a new register to use, or evict the previous reload and use this reg.
-  unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
-                           MachineFunction &MF, MachineInstr *MI,
-                           AvailableSpills &Spills,
-                           std::vector<MachineInstr*> &MaybeDeadStores,
-                           SmallSet<unsigned, 8> &Rejected,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps,
-                           VirtRegMap &VRM);
-
-  /// GetRegForReload - Helper for the above GetRegForReload(). Add a
-  /// 'Rejected' set to remember which registers have been considered and
-  /// rejected for the reload. This avoids infinite looping in case like
-  /// this:
-  /// t1 := op t2, t3
-  /// t2 <- assigned r0 for use by the reload but ended up reuse r1
-  /// t3 <- assigned r1 for use by the reload but ended up reuse r0
-  /// t1 <- desires r1
-  ///       sees r1 is taken by t2, tries t2's reload register r0
-  ///       sees r0 is taken by t3, tries t3's reload register r1
-  ///       sees r1 is taken by t2, tries t2's reload register r0 ...
-  unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI,
-                           AvailableSpills &Spills,
-                           std::vector<MachineInstr*> &MaybeDeadStores,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps,
-                           VirtRegMap &VRM) {
-    SmallSet<unsigned, 8> Rejected;
-    MachineFunction &MF = *MI->getParent()->getParent();
-    const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
-    return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores,
-                           Rejected, RegKills, KillOps, VRM);
-  }
-};
-
-}
-
-// ****************** //
-// Utility Functions  //
-// ****************** //
-
-/// findSinglePredSuccessor - Return via reference a vector of machine basic
-/// blocks each of which is a successor of the specified BB and has no other
-/// predecessor.
-static void findSinglePredSuccessor(MachineBasicBlock *MBB,
-                                   SmallVectorImpl<MachineBasicBlock *> &Succs){
-  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-         SE = MBB->succ_end(); SI != SE; ++SI) {
-    MachineBasicBlock *SuccMBB = *SI;
-    if (SuccMBB->pred_size() == 1)
-      Succs.push_back(SuccMBB);
-  }
-}
-
-/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
-/// but not re-defined and it's being reused. Remove the kill flag for the
-/// register and unset the kill's marker and last kill operand.
-static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
-                                   BitVector &RegKills,
-                                   std::vector<MachineOperand*> &KillOps) {
-  DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
-
-  MachineOperand *KillOp = KillOps[Reg];
-  KillOp->setIsKill(false);
-  // KillOps[Reg] might be a def of a super-register.
-  unsigned KReg = KillOp->getReg();
-  if (!RegKills[KReg])
-    return;
-
-  assert(KillOps[KReg]->getParent() == KillOp->getParent() &&
-         "invalid superreg kill flags");
-  KillOps[KReg] = NULL;
-  RegKills.reset(KReg);
-
-  // If it's a def of a super-register. Its other sub-regsters are no
-  // longer killed as well.
-  for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
-    DEBUG(dbgs() << "  Resurrect subreg " << TRI->getName(*SR) << "\n");
-
-    assert(KillOps[*SR]->getParent() == KillOp->getParent() &&
-           "invalid subreg kill flags");
-    KillOps[*SR] = NULL;
-    RegKills.reset(*SR);
-  }
-}
-
-/// ResurrectKill - Invalidate kill info associated with a previous MI. An
-/// optimization may have decided that it's safe to reuse a previously killed
-/// register. If we fail to erase the invalid kill flags, then the register
-/// scavenger may later clobber the register used by this MI. Note that this
-/// must be done even if this MI is being deleted! Consider:
-///
-/// USE $r1 (vreg1) <kill>
-/// ...
-/// $r1(vreg3) = COPY $r1 (vreg2)
-///
-/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
-/// vreg1's only use is a kill. The rewriter doesn't know it should be live
-/// until it rewrites vreg2. At that points it sees that the copy is dead and
-/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
-/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
-/// vreg1 before deleting the copy.
-static void ResurrectKill(MachineInstr &MI, unsigned Reg,
-                          const TargetRegisterInfo* TRI, BitVector &RegKills,
-                          std::vector<MachineOperand*> &KillOps) {
-  if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
-    ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
-    return;
-  }
-  // No previous kill for this reg. Check for subreg kills as well.
-  // d4 =
-  // store d4, fi#0
-  // ...
-  //    = s8<kill>
-  // ...
-  //    = d4  <avoiding reload>
-  for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
-    unsigned SReg = *SR;
-    if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
-      ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
-  }
-}
-
-/// InvalidateKills - MI is going to be deleted. If any of its operands are
-/// marked kill, then invalidate the information.
-static void InvalidateKills(MachineInstr &MI,
-                            const TargetRegisterInfo* TRI,
-                            BitVector &RegKills,
-                            std::vector<MachineOperand*> &KillOps,
-                            SmallVector<unsigned, 2> *KillRegs = NULL) {
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-    if (KillRegs)
-      KillRegs->push_back(Reg);
-    assert(Reg < KillOps.size());
-    if (KillOps[Reg] == &MO) {
-      // This operand was the kill, now no longer.
-      KillOps[Reg] = NULL;
-      RegKills.reset(Reg);
-      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
-        if (RegKills[*SR]) {
-          assert(KillOps[*SR] == &MO && "bad subreg kill flags");
-          KillOps[*SR] = NULL;
-          RegKills.reset(*SR);
-        }
-      }
-    }
-    else {
-      // This operand may have reused a previously killed reg. Keep it live in
-      // case it continues to be used after erasing this instruction.
-      ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
-    }
-  }
-}
-
-/// InvalidateRegDef - If the def operand of the specified def MI is now dead
-/// (since its spill instruction is removed), mark it isDead. Also checks if
-/// the def MI has other definition operands that are not dead. Returns it by
-/// reference.
-static bool InvalidateRegDef(MachineBasicBlock::iterator I,
-                             MachineInstr &NewDef, unsigned Reg,
-                             bool &HasLiveDef,
-                             const TargetRegisterInfo *TRI) {
-  // Due to remat, it's possible this reg isn't being reused. That is,
-  // the def of this reg (by prev MI) is now dead.
-  MachineInstr *DefMI = I;
-  MachineOperand *DefOp = NULL;
-  for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = DefMI->getOperand(i);
-    if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
-      continue;
-    if (MO.getReg() == Reg)
-      DefOp = &MO;
-    else if (!MO.isDead())
-      HasLiveDef = true;
-  }
-  if (!DefOp)
-    return false;
-
-  bool FoundUse = false, Done = false;
-  MachineBasicBlock::iterator E = &NewDef;
-  ++I; ++E;
-  for (; !Done && I != E; ++I) {
-    MachineInstr *NMI = I;
-    for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
-      MachineOperand &MO = NMI->getOperand(j);
-      if (!MO.isReg() || MO.getReg() == 0 ||
-          (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
-        continue;
-      if (MO.isUse())
-        FoundUse = true;
-      Done = true; // Stop after scanning all the operands of this MI.
-    }
-  }
-  if (!FoundUse) {
-    // Def is dead!
-    DefOp->setIsDead();
-    return true;
-  }
-  return false;
-}
-
-/// UpdateKills - Track and update kill info. If a MI reads a register that is
-/// marked kill, then it must be due to register reuse. Transfer the kill info
-/// over.
-static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
-                        BitVector &RegKills,
-                        std::vector<MachineOperand*> &KillOps) {
-  // These do not affect kill info at all.
-  if (MI.isDebugValue())
-    return;
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.isUse() || MO.isUndef())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (Reg == 0)
-      continue;
-
-    // This operand may have reused a previously killed reg. Keep it live.
-    ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
-
-    if (MO.isKill()) {
-      RegKills.set(Reg);
-      KillOps[Reg] = &MO;
-      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
-        RegKills.set(*SR);
-        KillOps[*SR] = &MO;
-      }
-    }
-  }
-
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.getReg() || !MO.isDef())
-      continue;
-    unsigned Reg = MO.getReg();
-    RegKills.reset(Reg);
-    KillOps[Reg] = NULL;
-    // It also defines (or partially define) aliases.
-    for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
-      RegKills.reset(*SR);
-      KillOps[*SR] = NULL;
-    }
-    for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
-      RegKills.reset(*SR);
-      KillOps[*SR] = NULL;
-    }
-  }
-}
-
-/// ReMaterialize - Re-materialize definition for Reg targeting DestReg.
-///
-static void ReMaterialize(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator &MII,
-                          unsigned DestReg, unsigned Reg,
-                          const TargetInstrInfo *TII,
-                          const TargetRegisterInfo *TRI,
-                          VirtRegMap &VRM) {
-  MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
-#ifndef NDEBUG
-  const MCInstrDesc &MCID = ReMatDefMI->getDesc();
-  assert(MCID.getNumDefs() == 1 &&
-         "Don't know how to remat instructions that define > 1 values!");
-#endif
-  TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
-  MachineInstr *NewMI = prior(MII);
-  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = NewMI->getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0)
-      continue;
-    unsigned VirtReg = MO.getReg();
-    if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
-      continue;
-    assert(MO.isUse());
-    unsigned Phys = VRM.getPhys(VirtReg);
-    assert(Phys && "Virtual register is not assigned a register?");
-    substitutePhysReg(MO, Phys, *TRI);
-  }
-  ++NumReMats;
-}
-
-/// findSuperReg - Find the SubReg's super-register of given register class
-/// where its SubIdx sub-register is SubReg.
-static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
-                             unsigned SubIdx, const TargetRegisterInfo *TRI) {
-  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
-       I != E; ++I) {
-    unsigned Reg = *I;
-    if (TRI->getSubReg(Reg, SubIdx) == SubReg)
-      return Reg;
-  }
-  return 0;
-}
-
-// ******************************** //
-// Available Spills Implementation  //
-// ******************************** //
-
-/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
-/// stackslot register. The register is still available but is no longer
-/// allowed to be modifed.
-void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
-  std::multimap<unsigned, int>::iterator I =
-    PhysRegsAvailable.lower_bound(PhysReg);
-  while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
-    int SlotOrReMat = I->second;
-    I++;
-    assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
-           "Bidirectional map mismatch!");
-    SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
-    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
-         << " copied, it is available for use but can no longer be modified\n");
-  }
-}
-
-/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
-/// stackslot register and its aliases. The register and its aliases may
-/// still available but is no longer allowed to be modifed.
-void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
-  for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
-    disallowClobberPhysRegOnly(*AS);
-  disallowClobberPhysRegOnly(PhysReg);
-}
-
-/// ClobberPhysRegOnly - This is called when the specified physreg changes
-/// value.  We use this to invalidate any info about stuff we thing lives in it.
-void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
-  std::multimap<unsigned, int>::iterator I =
-    PhysRegsAvailable.lower_bound(PhysReg);
-  while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
-    int SlotOrReMat = I->second;
-    PhysRegsAvailable.erase(I++);
-    assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
-           "Bidirectional map mismatch!");
-    SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
-    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
-          << " clobbered, invalidating ");
-    if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
-      DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
-    else
-      DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n");
-  }
-}
-
-/// ClobberPhysReg - This is called when the specified physreg changes
-/// value.  We use this to invalidate any info about stuff we thing lives in
-/// it and any of its aliases.
-void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
-  for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
-    ClobberPhysRegOnly(*AS);
-  ClobberPhysRegOnly(PhysReg);
-}
-
-/// AddAvailableRegsToLiveIn - Availability information is being kept coming
-/// into the specified MBB. Add available physical registers as potential
-/// live-in's. If they are reused in the MBB, they will be added to the
-/// live-in set to make register scavenger and post-allocation scheduler.
-void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
-                                        BitVector &RegKills,
-                                        std::vector<MachineOperand*> &KillOps) {
-  std::set<unsigned> NotAvailable;
-  for (std::multimap<unsigned, int>::iterator
-         I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
-       I != E; ++I) {
-    unsigned Reg = I->first;
-    const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg);
-    // FIXME: A temporary workaround. We can't reuse available value if it's
-    // not safe to move the def of the virtual register's class. e.g.
-    // X86::RFP* register classes. Do not add it as a live-in.
-    if (!TII->isSafeToMoveRegClassDefs(RC))
-      // This is no longer available.
-      NotAvailable.insert(Reg);
-    else {
-      MBB.addLiveIn(Reg);
-      if (RegKills[Reg])
-        ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
-    }
-
-    // Skip over the same register.
-    std::multimap<unsigned, int>::iterator NI = llvm::next(I);
-    while (NI != E && NI->first == Reg) {
-      ++I;
-      ++NI;
-    }
-  }
-
-  for (std::set<unsigned>::iterator I = NotAvailable.begin(),
-         E = NotAvailable.end(); I != E; ++I) {
-    ClobberPhysReg(*I);
-    for (const unsigned *SubRegs = TRI->getSubRegisters(*I);
-       *SubRegs; ++SubRegs)
-      ClobberPhysReg(*SubRegs);
-  }
-}
-
-/// ModifyStackSlotOrReMat - This method is called when the value in a stack
-/// slot changes.  This removes information about which register the previous
-/// value for this slot lives in (as the previous value is dead now).
-void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
-  std::map<int, unsigned>::iterator It =
-    SpillSlotsOrReMatsAvailable.find(SlotOrReMat);
-  if (It == SpillSlotsOrReMatsAvailable.end()) return;
-  unsigned Reg = It->second >> 1;
-  SpillSlotsOrReMatsAvailable.erase(It);
-
-  // This register may hold the value of multiple stack slots, only remove this
-  // stack slot from the set of values the register contains.
-  std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
-  for (; ; ++I) {
-    assert(I != PhysRegsAvailable.end() && I->first == Reg &&
-           "Map inverse broken!");
-    if (I->second == SlotOrReMat) break;
-  }
-  PhysRegsAvailable.erase(I);
-}
-
-void AvailableSpills::ClobberSharingStackSlots(int StackSlot) {
-  std::map<int, unsigned>::iterator It =
-    SpillSlotsOrReMatsAvailable.find(StackSlot);
-  if (It == SpillSlotsOrReMatsAvailable.end()) return;
-  unsigned Reg = It->second >> 1;
-
-  // Erase entries in PhysRegsAvailable for other stack slots.
-  std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
-  while (I != PhysRegsAvailable.end() && I->first == Reg) {
-    std::multimap<unsigned, int>::iterator NextI = llvm::next(I);
-    if (I->second != StackSlot) {
-      DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in "
-                   << PrintReg(Reg, TRI) << '\n');
-      SpillSlotsOrReMatsAvailable.erase(I->second);
-      PhysRegsAvailable.erase(I);
-    }
-    I = NextI;
-  }
-}
-
-// ************************** //
-// Reuse Info Implementation  //
-// ************************** //
-
-/// GetRegForReload - We are about to emit a reload into PhysReg.  If there
-/// is some other operand that is using the specified register, either pick
-/// a new register to use, or evict the previous reload and use this reg.
-unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
-                         unsigned PhysReg,
-                         MachineFunction &MF,
-                         MachineInstr *MI, AvailableSpills &Spills,
-                         std::vector<MachineInstr*> &MaybeDeadStores,
-                         SmallSet<unsigned, 8> &Rejected,
-                         BitVector &RegKills,
-                         std::vector<MachineOperand*> &KillOps,
-                         VirtRegMap &VRM) {
-  const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
-  const TargetRegisterInfo *TRI = Spills.getRegInfo();
-
-  if (Reuses.empty()) return PhysReg;  // This is most often empty.
-
-  for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
-    ReusedOp &Op = Reuses[ro];
-    // If we find some other reuse that was supposed to use this register
-    // exactly for its reload, we can change this reload to use ITS reload
-    // register. That is, unless its reload register has already been
-    // considered and subsequently rejected because it has also been reused
-    // by another operand.
-    if (Op.PhysRegReused == PhysReg &&
-        Rejected.count(Op.AssignedPhysReg) == 0 &&
-        RC->contains(Op.AssignedPhysReg)) {
-      // Yup, use the reload register that we didn't use before.
-      unsigned NewReg = Op.AssignedPhysReg;
-      Rejected.insert(PhysReg);
-      return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores,
-                             Rejected, RegKills, KillOps, VRM);
-    } else {
-      // Otherwise, we might also have a problem if a previously reused
-      // value aliases the new register. If so, codegen the previous reload
-      // and use this one.
-      unsigned PRRU = Op.PhysRegReused;
-      if (TRI->regsOverlap(PRRU, PhysReg)) {
-        // Okay, we found out that an alias of a reused register
-        // was used.  This isn't good because it means we have
-        // to undo a previous reuse.
-        MachineBasicBlock *MBB = MI->getParent();
-        const TargetRegisterClass *AliasRC =
-          MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg);
-
-        // Copy Op out of the vector and remove it, we're going to insert an
-        // explicit load for it.
-        ReusedOp NewOp = Op;
-        Reuses.erase(Reuses.begin()+ro);
-
-        // MI may be using only a sub-register of PhysRegUsed.
-        unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg();
-        unsigned SubIdx = 0;
-        assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) &&
-               "A reuse cannot be a virtual register");
-        if (PRRU != RealPhysRegUsed) {
-          // What was the sub-register index?
-          SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
-          assert(SubIdx &&
-                 "Operand physreg is not a sub-register of PhysRegUsed");
-        }
-
-        // Ok, we're going to try to reload the assigned physreg into the
-        // slot that we were supposed to in the first place.  However, that
-        // register could hold a reuse.  Check to see if it conflicts or
-        // would prefer us to use a different register.
-        unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg,
-                                              MF, MI, Spills, MaybeDeadStores,
-                                              Rejected, RegKills, KillOps, VRM);
-
-        bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
-        int SSorRMId = DoReMat
-          ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat;
-
-        // Back-schedule reloads and remats.
-        MachineBasicBlock::iterator InsertLoc =
-          ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI,
-                           DoReMat, SSorRMId, TII, MF);
-
-        if (DoReMat) {
-          ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
-                        TRI, VRM);
-        } else {
-          TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
-                                    NewOp.StackSlotOrReMat, AliasRC, TRI);
-          MachineInstr *LoadMI = prior(InsertLoc);
-          VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
-          // Any stores to this stack slot are not dead anymore.
-          MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
-          ++NumLoads;
-        }
-        Spills.ClobberPhysReg(NewPhysReg);
-        Spills.ClobberPhysReg(NewOp.PhysRegReused);
-
-        unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg;
-        MI->getOperand(NewOp.Operand).setReg(RReg);
-        MI->getOperand(NewOp.Operand).setSubReg(0);
-
-        Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
-        UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-        DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-
-        DEBUG(dbgs() << "Reuse undone!\n");
-        --NumReused;
-
-        // Finally, PhysReg is now available, go ahead and use it.
-        return PhysReg;
-      }
-    }
-  }
-  return PhysReg;
-}
-
-// ************************************************************************ //
-
-/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
-/// stack slot mod/ref. It also checks if it's possible to unfold the
-/// instruction by having it define a specified physical register instead.
-static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
-                                 const TargetInstrInfo *TII,
-                                 const TargetRegisterInfo *TRI,
-                                 VirtRegMap &VRM) {
-  if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
-    return false;
-
-  bool Found = false;
-  VirtRegMap::MI2VirtMapTy::const_iterator I, End;
-  for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
-    unsigned VirtReg = I->second.first;
-    VirtRegMap::ModRef MR = I->second.second;
-    if (MR & VirtRegMap::isModRef)
-      if (VRM.getStackSlot(VirtReg) == SS) {
-        Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
-        break;
-      }
-  }
-  if (!Found)
-    return false;
-
-  // Does the instruction uses a register that overlaps the scratch register?
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0)
-      continue;
-    unsigned Reg = MO.getReg();
-    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-      if (!VRM.hasPhys(Reg))
-        continue;
-      Reg = VRM.getPhys(Reg);
-    }
-    if (TRI->regsOverlap(PhysReg, Reg))
-      return false;
-  }
-  return true;
-}
-
-/// FindFreeRegister - Find a free register of a given register class by looking
-/// at (at most) the last two machine instructions.
-static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
-                                 MachineBasicBlock &MBB,
-                                 const TargetRegisterClass *RC,
-                                 const TargetRegisterInfo *TRI,
-                                 BitVector &AllocatableRegs) {
-  BitVector Defs(TRI->getNumRegs());
-  BitVector Uses(TRI->getNumRegs());
-  SmallVector<unsigned, 4> LocalUses;
-  SmallVector<unsigned, 4> Kills;
-
-  // Take a look at 2 instructions at most.
-  unsigned Count = 0;
-  while (Count < 2) {
-    if (MII == MBB.begin())
-      break;
-    MachineInstr *PrevMI = prior(MII);
-    MII = PrevMI;
-
-    if (PrevMI->isDebugValue())
-      continue; // Skip over dbg_value instructions.
-    ++Count;
-
-    for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = PrevMI->getOperand(i);
-      if (!MO.isReg() || MO.getReg() == 0)
-        continue;
-      unsigned Reg = MO.getReg();
-      if (MO.isDef()) {
-        Defs.set(Reg);
-        for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-          Defs.set(*AS);
-      } else  {
-        LocalUses.push_back(Reg);
-        if (MO.isKill() && AllocatableRegs[Reg])
-          Kills.push_back(Reg);
-      }
-    }
-
-    for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
-      unsigned Kill = Kills[i];
-      if (!Defs[Kill] && !Uses[Kill] &&
-          RC->contains(Kill))
-        return Kill;
-    }
-    for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
-      unsigned Reg = LocalUses[i];
-      Uses.set(Reg);
-      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
-        Uses.set(*AS);
-    }
-  }
-
-  return 0;
-}
-
-static
-void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
-                         const TargetRegisterInfo &TRI) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.getReg() == VirtReg)
-      substitutePhysReg(MO, PhysReg, TRI);
-  }
-}
-
-namespace {
-
-struct RefSorter {
-  bool operator()(const std::pair<MachineInstr*, int> &A,
-                  const std::pair<MachineInstr*, int> &B) {
-    return A.second < B.second;
-  }
-};
-
-// ***************************** //
-// Local Spiller Implementation  //
-// ***************************** //
-
-class LocalRewriter : public VirtRegRewriter {
-  MachineRegisterInfo *MRI;
-  const TargetRegisterInfo *TRI;
-  const TargetInstrInfo *TII;
-  VirtRegMap *VRM;
-  LiveIntervals *LIs;
-  BitVector AllocatableRegs;
-  DenseMap<MachineInstr*, unsigned> DistanceMap;
-  DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
-
-  MachineBasicBlock *MBB;       // Basic block currently being processed.
-
-public:
-
-  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
-                            LiveIntervals* LIs);
-
-private:
-  void EraseInstr(MachineInstr *MI) {
-    VRM->RemoveMachineInstrFromMaps(MI);
-    LIs->RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
-  }
-
-  bool OptimizeByUnfold2(unsigned VirtReg, int SS,
-                         MachineBasicBlock::iterator &MII,
-                         std::vector<MachineInstr*> &MaybeDeadStores,
-                         AvailableSpills &Spills,
-                         BitVector &RegKills,
-                         std::vector<MachineOperand*> &KillOps);
-
-  bool OptimizeByUnfold(MachineBasicBlock::iterator &MII,
-                        std::vector<MachineInstr*> &MaybeDeadStores,
-                        AvailableSpills &Spills,
-                        BitVector &RegKills,
-                        std::vector<MachineOperand*> &KillOps);
-
-  bool CommuteToFoldReload(MachineBasicBlock::iterator &MII,
-                           unsigned VirtReg, unsigned SrcReg, int SS,
-                           AvailableSpills &Spills,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps,
-                           const TargetRegisterInfo *TRI);
-
-  void SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
-                           int Idx, unsigned PhysReg, int StackSlot,
-                           const TargetRegisterClass *RC,
-                           bool isAvailable, MachineInstr *&LastStore,
-                           AvailableSpills &Spills,
-                           SmallSet<MachineInstr*, 4> &ReMatDefs,
-                           BitVector &RegKills,
-                           std::vector<MachineOperand*> &KillOps);
-
-  void TransferDeadness(unsigned Reg, BitVector &RegKills,
-                        std::vector<MachineOperand*> &KillOps);
-
-  bool InsertEmergencySpills(MachineInstr *MI);
-
-  bool InsertRestores(MachineInstr *MI,
-                      AvailableSpills &Spills,
-                      BitVector &RegKills,
-                      std::vector<MachineOperand*> &KillOps);
-
-  bool InsertSpills(MachineInstr *MI);
-
-  void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
-                   std::vector<MachineInstr*> &MaybeDeadStores,
-                   BitVector &RegKills,
-                   ReuseInfo &ReusedOperands,
-                   std::vector<MachineOperand*> &KillOps);
-
-  void RewriteMBB(LiveIntervals *LIs,
-                  AvailableSpills &Spills, BitVector &RegKills,
-                  std::vector<MachineOperand*> &KillOps);
-};
-}
-
-bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
-                                         LiveIntervals* lis) {
-  MRI = &MF.getRegInfo();
-  TRI = MF.getTarget().getRegisterInfo();
-  TII = MF.getTarget().getInstrInfo();
-  VRM = &vrm;
-  LIs = lis;
-  AllocatableRegs = TRI->getAllocatableSet(MF);
-  DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
-        << MF.getFunction()->getName() << "':\n");
-  DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
-        " reloads!) ****\n");
-  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
-
-  // Spills - Keep track of which spilled values are available in physregs
-  // so that we can choose to reuse the physregs instead of emitting
-  // reloads. This is usually refreshed per basic block.
-  AvailableSpills Spills(TRI, TII);
-
-  // Keep track of kill information.
-  BitVector RegKills(TRI->getNumRegs());
-  std::vector<MachineOperand*> KillOps;
-  KillOps.resize(TRI->getNumRegs(), NULL);
-
-  // SingleEntrySuccs - Successor blocks which have a single predecessor.
-  SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
-  SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
-
-  // Traverse the basic blocks depth first.
-  MachineBasicBlock *Entry = MF.begin();
-  SmallPtrSet<MachineBasicBlock*,16> Visited;
-  for (df_ext_iterator<MachineBasicBlock*,
-         SmallPtrSet<MachineBasicBlock*,16> >
-         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
-       DFI != E; ++DFI) {
-    MBB = *DFI;
-    if (!EarlyVisited.count(MBB))
-      RewriteMBB(LIs, Spills, RegKills, KillOps);
-
-    // If this MBB is the only predecessor of a successor. Keep the
-    // availability information and visit it next.
-    do {
-      // Keep visiting single predecessor successor as long as possible.
-      SinglePredSuccs.clear();
-      findSinglePredSuccessor(MBB, SinglePredSuccs);
-      if (SinglePredSuccs.empty())
-        MBB = 0;
-      else {
-        // FIXME: More than one successors, each of which has MBB has
-        // the only predecessor.
-        MBB = SinglePredSuccs[0];
-        if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
-          Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
-          RewriteMBB(LIs, Spills, RegKills, KillOps);
-        }
-      }
-    } while (MBB);
-
-    // Clear the availability info.
-    Spills.clear();
-  }
-
-  DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
-  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
-
-  // Mark unused spill slots.
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  int SS = VRM->getLowSpillSlot();
-  if (SS != VirtRegMap::NO_STACK_SLOT) {
-    for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) {
-      SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS];
-      if (!VRM->isSpillSlotUsed(SS)) {
-        MFI->RemoveStackObject(SS);
-        for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
-          MachineInstr *DVMI = DbgValues[j];
-          DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
-          EraseInstr(DVMI);
-        }
-        ++NumDSS;
-      }
-      DbgValues.clear();
-    }
-  }
-  Slot2DbgValues.clear();
-
-  return true;
-}
-
-/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
-/// a scratch register is available.
-///     xorq  %r12<kill>, %r13
-///     addq  %rax, -184(%rbp)
-///     addq  %r13, -184(%rbp)
-/// ==>
-///     xorq  %r12<kill>, %r13
-///     movq  -184(%rbp), %r12
-///     addq  %rax, %r12
-///     addq  %r13, %r12
-///     movq  %r12, -184(%rbp)
-bool LocalRewriter::
-OptimizeByUnfold2(unsigned VirtReg, int SS,
-                  MachineBasicBlock::iterator &MII,
-                  std::vector<MachineInstr*> &MaybeDeadStores,
-                  AvailableSpills &Spills,
-                  BitVector &RegKills,
-                  std::vector<MachineOperand*> &KillOps) {
-
-  MachineBasicBlock::iterator NextMII = llvm::next(MII);
-  // Skip over dbg_value instructions.
-  while (NextMII != MBB->end() && NextMII->isDebugValue())
-    NextMII = llvm::next(NextMII);
-  if (NextMII == MBB->end())
-    return false;
-
-  if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
-    return false;
-
-  // Now let's see if the last couple of instructions happens to have freed up
-  // a register.
-  const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-  unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs);
-  if (!PhysReg)
-    return false;
-
-  MachineFunction &MF = *MBB->getParent();
-  TRI = MF.getTarget().getRegisterInfo();
-  MachineInstr &MI = *MII;
-  if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM))
-    return false;
-
-  // If the next instruction also folds the same SS modref and can be unfoled,
-  // then it's worthwhile to issue a load from SS into the free register and
-  // then unfold these instructions.
-  if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM))
-    return false;
-
-  // Back-schedule reloads and remats.
-  ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
-
-  // Load from SS to the spare physical register.
-  TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI);
-  // This invalidates Phys.
-  Spills.ClobberPhysReg(PhysReg);
-  // Remember it's available.
-  Spills.addAvailable(SS, PhysReg);
-  MaybeDeadStores[SS] = NULL;
-
-  // Unfold current MI.
-  SmallVector<MachineInstr*, 4> NewMIs;
-  if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
-    llvm_unreachable("Unable unfold the load / store folding instruction!");
-  assert(NewMIs.size() == 1);
-  AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
-  VRM->transferRestorePts(&MI, NewMIs[0]);
-  MII = MBB->insert(MII, NewMIs[0]);
-  InvalidateKills(MI, TRI, RegKills, KillOps);
-  EraseInstr(&MI);
-  ++NumModRefUnfold;
-
-  // Unfold next instructions that fold the same SS.
-  do {
-    MachineInstr &NextMI = *NextMII;
-    NextMII = llvm::next(NextMII);
-    NewMIs.clear();
-    if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
-      llvm_unreachable("Unable unfold the load / store folding instruction!");
-    assert(NewMIs.size() == 1);
-    AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
-    VRM->transferRestorePts(&NextMI, NewMIs[0]);
-    MBB->insert(NextMII, NewMIs[0]);
-    InvalidateKills(NextMI, TRI, RegKills, KillOps);
-    EraseInstr(&NextMI);
-    ++NumModRefUnfold;
-    // Skip over dbg_value instructions.
-    while (NextMII != MBB->end() && NextMII->isDebugValue())
-      NextMII = llvm::next(NextMII);
-    if (NextMII == MBB->end())
-      break;
-  } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
-
-  // Store the value back into SS.
-  TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI);
-  MachineInstr *StoreMI = prior(NextMII);
-  VRM->addSpillSlotUse(SS, StoreMI);
-  VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-
-  return true;
-}
-
-/// OptimizeByUnfold - Turn a store folding instruction into a load folding
-/// instruction. e.g.
-///     xorl  %edi, %eax
-///     movl  %eax, -32(%ebp)
-///     movl  -36(%ebp), %eax
-///     orl   %eax, -32(%ebp)
-/// ==>
-///     xorl  %edi, %eax
-///     orl   -36(%ebp), %eax
-///     mov   %eax, -32(%ebp)
-/// This enables unfolding optimization for a subsequent instruction which will
-/// also eliminate the newly introduced store instruction.
-bool LocalRewriter::
-OptimizeByUnfold(MachineBasicBlock::iterator &MII,
-                 std::vector<MachineInstr*> &MaybeDeadStores,
-                 AvailableSpills &Spills,
-                 BitVector &RegKills,
-                 std::vector<MachineOperand*> &KillOps) {
-  MachineFunction &MF = *MBB->getParent();
-  MachineInstr &MI = *MII;
-  unsigned UnfoldedOpc = 0;
-  unsigned UnfoldPR = 0;
-  unsigned UnfoldVR = 0;
-  int FoldedSS = VirtRegMap::NO_STACK_SLOT;
-  VirtRegMap::MI2VirtMapTy::const_iterator I, End;
-  for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
-    // Only transform a MI that folds a single register.
-    if (UnfoldedOpc)
-      return false;
-    UnfoldVR = I->second.first;
-    VirtRegMap::ModRef MR = I->second.second;
-    // MI2VirtMap be can updated which invalidate the iterator.
-    // Increment the iterator first.
-    ++I;
-    if (VRM->isAssignedReg(UnfoldVR))
-      continue;
-    // If this reference is not a use, any previous store is now dead.
-    // Otherwise, the store to this stack slot is not dead anymore.
-    FoldedSS = VRM->getStackSlot(UnfoldVR);
-    MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
-    if (DeadStore && (MR & VirtRegMap::isModRef)) {
-      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
-      if (!PhysReg || !DeadStore->readsRegister(PhysReg))
-        continue;
-      UnfoldPR = PhysReg;
-      UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
-                                                    false, true);
-    }
-  }
-
-  if (!UnfoldedOpc) {
-    if (!UnfoldVR)
-      return false;
-
-    // Look for other unfolding opportunities.
-    return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills,
-                             RegKills, KillOps);
-  }
-
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
-      continue;
-    unsigned VirtReg = MO.getReg();
-    if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
-      continue;
-    if (VRM->isAssignedReg(VirtReg)) {
-      unsigned PhysReg = VRM->getPhys(VirtReg);
-      if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
-        return false;
-    } else if (VRM->isReMaterialized(VirtReg))
-      continue;
-    int SS = VRM->getStackSlot(VirtReg);
-    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-    if (PhysReg) {
-      if (TRI->regsOverlap(PhysReg, UnfoldPR))
-        return false;
-      continue;
-    }
-    if (VRM->hasPhys(VirtReg)) {
-      PhysReg = VRM->getPhys(VirtReg);
-      if (!TRI->regsOverlap(PhysReg, UnfoldPR))
-        continue;
-    }
-
-    // Ok, we'll need to reload the value into a register which makes
-    // it impossible to perform the store unfolding optimization later.
-    // Let's see if it is possible to fold the load if the store is
-    // unfolded. This allows us to perform the store unfolding
-    // optimization.
-    SmallVector<MachineInstr*, 4> NewMIs;
-    if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
-      assert(NewMIs.size() == 1);
-      MachineInstr *NewMI = NewMIs.back();
-      MBB->insert(MII, NewMI);
-      NewMIs.clear();
-      int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
-      assert(Idx != -1);
-      SmallVector<unsigned, 1> Ops;
-      Ops.push_back(Idx);
-      MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS);
-      NewMI->eraseFromParent();
-      if (FoldedMI) {
-        VRM->addSpillSlotUse(SS, FoldedMI);
-        if (!VRM->hasPhys(UnfoldVR))
-          VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
-        VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
-        MII = FoldedMI;
-        InvalidateKills(MI, TRI, RegKills, KillOps);
-        EraseInstr(&MI);
-        return true;
-      }
-    }
-  }
-
-  return false;
-}
-
-/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
-/// where SrcReg is r1 and it is tied to r0. Return true if after
-/// commuting this instruction it will be r0 = op r2, r1.
-static bool CommuteChangesDestination(MachineInstr *DefMI,
-                                      const MCInstrDesc &MCID,
-                                      unsigned SrcReg,
-                                      const TargetInstrInfo *TII,
-                                      unsigned &DstIdx) {
-  if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3)
-    return false;
-  if (!DefMI->getOperand(1).isReg() ||
-      DefMI->getOperand(1).getReg() != SrcReg)
-    return false;
-  unsigned DefIdx;
-  if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
-    return false;
-  unsigned SrcIdx1, SrcIdx2;
-  if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
-    return false;
-  if (SrcIdx1 == 1 && SrcIdx2 == 2) {
-    DstIdx = 2;
-    return true;
-  }
-  return false;
-}
-
-/// CommuteToFoldReload -
-/// Look for
-/// r1 = load fi#1
-/// r1 = op r1, r2<kill>
-/// store r1, fi#1
-///
-/// If op is commutable and r2 is killed, then we can xform these to
-/// r2 = op r2, fi#1
-/// store r2, fi#1
-bool LocalRewriter::
-CommuteToFoldReload(MachineBasicBlock::iterator &MII,
-                    unsigned VirtReg, unsigned SrcReg, int SS,
-                    AvailableSpills &Spills,
-                    BitVector &RegKills,
-                    std::vector<MachineOperand*> &KillOps,
-                    const TargetRegisterInfo *TRI) {
-  if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
-    return false;
-
-  MachineInstr &MI = *MII;
-  MachineBasicBlock::iterator DefMII = prior(MII);
-  MachineInstr *DefMI = DefMII;
-  const MCInstrDesc &MCID = DefMI->getDesc();
-  unsigned NewDstIdx;
-  if (DefMII != MBB->begin() &&
-      MCID.isCommutable() &&
-      CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) {
-    MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
-    unsigned NewReg = NewDstMO.getReg();
-    if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
-      return false;
-    MachineInstr *ReloadMI = prior(DefMII);
-    int FrameIdx;
-    unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
-    if (DestReg != SrcReg || FrameIdx != SS)
-      return false;
-    int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
-    if (UseIdx == -1)
-      return false;
-    unsigned DefIdx;
-    if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
-      return false;
-    assert(DefMI->getOperand(DefIdx).isReg() &&
-           DefMI->getOperand(DefIdx).getReg() == SrcReg);
-
-    // Now commute def instruction.
-    MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
-    if (!CommutedMI)
-      return false;
-    MBB->insert(MII, CommutedMI);
-    SmallVector<unsigned, 1> Ops;
-    Ops.push_back(NewDstIdx);
-    MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS);
-    // Not needed since foldMemoryOperand returns new MI.
-    CommutedMI->eraseFromParent();
-    if (!FoldedMI)
-      return false;
-
-    VRM->addSpillSlotUse(SS, FoldedMI);
-    VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
-    // Insert new def MI and spill MI.
-    const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-    TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI);
-    MII = prior(MII);
-    MachineInstr *StoreMI = MII;
-    VRM->addSpillSlotUse(SS, StoreMI);
-    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-    MII = FoldedMI;  // Update MII to backtrack.
-
-    // Delete all 3 old instructions.
-    InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
-    EraseInstr(ReloadMI);
-    InvalidateKills(*DefMI, TRI, RegKills, KillOps);
-    EraseInstr(DefMI);
-    InvalidateKills(MI, TRI, RegKills, KillOps);
-    EraseInstr(&MI);
-
-    // If NewReg was previously holding value of some SS, it's now clobbered.
-    // This has to be done now because it's a physical register. When this
-    // instruction is re-visited, it's ignored.
-    Spills.ClobberPhysReg(NewReg);
-
-    ++NumCommutes;
-    return true;
-  }
-
-  return false;
-}
-
-/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
-/// the last store to the same slot is now dead. If so, remove the last store.
-void LocalRewriter::
-SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
-                    int Idx, unsigned PhysReg, int StackSlot,
-                    const TargetRegisterClass *RC,
-                    bool isAvailable, MachineInstr *&LastStore,
-                    AvailableSpills &Spills,
-                    SmallSet<MachineInstr*, 4> &ReMatDefs,
-                    BitVector &RegKills,
-                    std::vector<MachineOperand*> &KillOps) {
-
-  MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
-  TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC,
-                           TRI);
-  MachineInstr *StoreMI = prior(oldNextMII);
-  VRM->addSpillSlotUse(StackSlot, StoreMI);
-  DEBUG(dbgs() << "Store:\t" << *StoreMI);
-
-  // If there is a dead store to this stack slot, nuke it now.
-  if (LastStore) {
-    DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
-    ++NumDSE;
-    SmallVector<unsigned, 2> KillRegs;
-    InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
-    MachineBasicBlock::iterator PrevMII = LastStore;
-    bool CheckDef = PrevMII != MBB->begin();
-    if (CheckDef)
-      --PrevMII;
-    EraseInstr(LastStore);
-    if (CheckDef) {
-      // Look at defs of killed registers on the store. Mark the defs
-      // as dead since the store has been deleted and they aren't
-      // being reused.
-      for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
-        bool HasOtherDef = false;
-        if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
-          MachineInstr *DeadDef = PrevMII;
-          if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
-            // FIXME: This assumes a remat def does not have side effects.
-            EraseInstr(DeadDef);
-            ++NumDRM;
-          }
-        }
-      }
-    }
-  }
-
-  // Allow for multi-instruction spill sequences, as on PPC Altivec.  Presume
-  // the last of multiple instructions is the actual store.
-  LastStore = prior(oldNextMII);
-
-  // If the stack slot value was previously available in some other
-  // register, change it now.  Otherwise, make the register available,
-  // in PhysReg.
-  Spills.ModifyStackSlotOrReMat(StackSlot);
-  Spills.ClobberPhysReg(PhysReg);
-  Spills.addAvailable(StackSlot, PhysReg, isAvailable);
-  ++NumStores;
-}
-
-/// isSafeToDelete - Return true if this instruction doesn't produce any side
-/// effect and all of its defs are dead.
-static bool isSafeToDelete(MachineInstr &MI) {
-  const MCInstrDesc &MCID = MI.getDesc();
-  if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() ||
-      MCID.isCall() || MCID.isBarrier() || MCID.isReturn() ||
-      MI.isLabel() || MI.isDebugValue() ||
-      MI.hasUnmodeledSideEffects())
-    return false;
-
-  // Technically speaking inline asm without side effects and no defs can still
-  // be deleted. But there is so much bad inline asm code out there, we should
-  // let them be.
-  if (MI.isInlineAsm())
-    return false;
-
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || !MO.getReg())
-      continue;
-    if (MO.isDef() && !MO.isDead())
-      return false;
-    if (MO.isUse() && MO.isKill())
-      // FIXME: We can't remove kill markers or else the scavenger will assert.
-      // An alternative is to add a ADD pseudo instruction to replace kill
-      // markers.
-      return false;
-  }
-  return true;
-}
-
-/// TransferDeadness - A identity copy definition is dead and it's being
-/// removed. Find the last def or use and mark it as dead / kill.
-void LocalRewriter::
-TransferDeadness(unsigned Reg, BitVector &RegKills,
-                 std::vector<MachineOperand*> &KillOps) {
-  SmallPtrSet<MachineInstr*, 4> Seens;
-  SmallVector<std::pair<MachineInstr*, int>,8> Refs;
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
-         RE = MRI->reg_end(); RI != RE; ++RI) {
-    MachineInstr *UDMI = &*RI;
-    if (UDMI->isDebugValue() || UDMI->getParent() != MBB)
-      continue;
-    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
-    if (DI == DistanceMap.end())
-      continue;
-    if (Seens.insert(UDMI))
-      Refs.push_back(std::make_pair(UDMI, DI->second));
-  }
-
-  if (Refs.empty())
-    return;
-  std::sort(Refs.begin(), Refs.end(), RefSorter());
-
-  while (!Refs.empty()) {
-    MachineInstr *LastUDMI = Refs.back().first;
-    Refs.pop_back();
-
-    MachineOperand *LastUD = NULL;
-    for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = LastUDMI->getOperand(i);
-      if (!MO.isReg() || MO.getReg() != Reg)
-        continue;
-      if (!LastUD || (LastUD->isUse() && MO.isDef()))
-        LastUD = &MO;
-      if (LastUDMI->isRegTiedToDefOperand(i))
-        break;
-    }
-    if (LastUD->isDef()) {
-      // If the instruction has no side effect, delete it and propagate
-      // backward further. Otherwise, mark is dead and we are done.
-      if (!isSafeToDelete(*LastUDMI)) {
-        LastUD->setIsDead();
-        break;
-      }
-      EraseInstr(LastUDMI);
-    } else {
-      LastUD->setIsKill();
-      RegKills.set(Reg);
-      KillOps[Reg] = LastUD;
-      break;
-    }
-  }
-}
-
-/// InsertEmergencySpills - Insert emergency spills before MI if requested by
-/// VRM. Return true if spills were inserted.
-bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
-  if (!VRM->hasEmergencySpills(MI))
-    return false;
-  MachineBasicBlock::iterator MII = MI;
-  SmallSet<int, 4> UsedSS;
-  std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
-  for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
-    unsigned PhysReg = EmSpills[i];
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
-    assert(RC && "Unable to determine register class!");
-    int SS = VRM->getEmergencySpillSlot(RC);
-    if (UsedSS.count(SS))
-      llvm_unreachable("Need to spill more than one physical registers!");
-    UsedSS.insert(SS);
-    TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI);
-    MachineInstr *StoreMI = prior(MII);
-    VRM->addSpillSlotUse(SS, StoreMI);
-
-    // Back-schedule reloads and remats.
-    MachineBasicBlock::iterator InsertLoc =
-      ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
-                       TII, *MBB->getParent());
-
-    TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI);
-
-    MachineInstr *LoadMI = prior(InsertLoc);
-    VRM->addSpillSlotUse(SS, LoadMI);
-    ++NumPSpills;
-    DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
-  }
-  return true;
-}
-
-/// InsertRestores - Restore registers before MI is requested by VRM. Return
-/// true is any instructions were inserted.
-bool LocalRewriter::InsertRestores(MachineInstr *MI,
-                                   AvailableSpills &Spills,
-                                   BitVector &RegKills,
-                                   std::vector<MachineOperand*> &KillOps) {
-  if (!VRM->isRestorePt(MI))
-    return false;
-  MachineBasicBlock::iterator MII = MI;
-  std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI);
-  for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
-    unsigned VirtReg = RestoreRegs[e-i-1];  // Reverse order.
-    if (!VRM->getPreSplitReg(VirtReg))
-      continue; // Split interval spilled again.
-    unsigned Phys = VRM->getPhys(VirtReg);
-    MRI->setPhysRegUsed(Phys);
-
-    // Check if the value being restored if available. If so, it must be
-    // from a predecessor BB that fallthrough into this BB. We do not
-    // expect:
-    // BB1:
-    // r1 = load fi#1
-    // ...
-    //    = r1<kill>
-    // ... # r1 not clobbered
-    // ...
-    //    = load fi#1
-    bool DoReMat = VRM->isReMaterialized(VirtReg);
-    int SSorRMId = DoReMat
-      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
-    unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-    if (InReg == Phys) {
-      // If the value is already available in the expected register, save
-      // a reload / remat.
-      if (SSorRMId)
-        DEBUG(dbgs() << "Reusing RM#"
-                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
-      else
-        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
-      DEBUG(dbgs() << " from physreg "
-                   << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
-                   <<" instead of reloading into physreg "
-                   << TRI->getName(Phys) << '\n');
-
-      // Reusing a physreg may resurrect it. But we expect ProcessUses to update
-      // the kill flags for the current instruction after processing it.
-
-      ++NumOmitted;
-      continue;
-    } else if (InReg && InReg != Phys) {
-      if (SSorRMId)
-        DEBUG(dbgs() << "Reusing RM#"
-                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
-      else
-        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
-      DEBUG(dbgs() << " from physreg "
-                   << TRI->getName(InReg) << " for " << PrintReg(VirtReg)
-                   <<" by copying it into physreg "
-                   << TRI->getName(Phys) << '\n');
-
-      // If the reloaded / remat value is available in another register,
-      // copy it to the desired register.
-
-      // Back-schedule reloads and remats.
-      MachineBasicBlock::iterator InsertLoc =
-        ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
-                         *MBB->getParent());
-      MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(),
-                                     TII->get(TargetOpcode::COPY), Phys)
-                               .addReg(InReg, RegState::Kill);
-
-      // This invalidates Phys.
-      Spills.ClobberPhysReg(Phys);
-      // Remember it's available.
-      Spills.addAvailable(SSorRMId, Phys);
-
-      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
-      DEBUG(dbgs() << '\t' << *CopyMI);
-      ++NumCopified;
-      continue;
-    }
-
-    // Back-schedule reloads and remats.
-    MachineBasicBlock::iterator InsertLoc =
-      ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
-                       *MBB->getParent());
-
-    if (VRM->isReMaterialized(VirtReg)) {
-      ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
-    } else {
-      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-      TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI);
-      MachineInstr *LoadMI = prior(InsertLoc);
-      VRM->addSpillSlotUse(SSorRMId, LoadMI);
-      ++NumLoads;
-      DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
-    }
-
-    // This invalidates Phys.
-    Spills.ClobberPhysReg(Phys);
-    // Remember it's available.
-    Spills.addAvailable(SSorRMId, Phys);
-
-    UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-    DEBUG(dbgs() << '\t' << *prior(MII));
-  }
-  return true;
-}
-
-/// InsertSpills - Insert spills after MI if requested by VRM. Return
-/// true if spills were inserted.
-bool LocalRewriter::InsertSpills(MachineInstr *MI) {
-  if (!VRM->isSpillPt(MI))
-    return false;
-  MachineBasicBlock::iterator MII = MI;
-  std::vector<std::pair<unsigned,bool> > &SpillRegs =
-    VRM->getSpillPtSpills(MI);
-  for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
-    unsigned VirtReg = SpillRegs[i].first;
-    bool isKill = SpillRegs[i].second;
-    if (!VRM->getPreSplitReg(VirtReg))
-      continue; // Split interval spilled again.
-    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-    unsigned Phys = VRM->getPhys(VirtReg);
-    int StackSlot = VRM->getStackSlot(VirtReg);
-    MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
-    TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
-                             RC, TRI);
-    MachineInstr *StoreMI = prior(oldNextMII);
-    VRM->addSpillSlotUse(StackSlot, StoreMI);
-    DEBUG(dbgs() << "Store:\t" << *StoreMI);
-    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
-  }
-  return true;
-}
-
-
-/// ProcessUses - Process all of MI's spilled operands and all available
-/// operands.
-void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
-                                std::vector<MachineInstr*> &MaybeDeadStores,
-                                BitVector &RegKills,
-                                ReuseInfo &ReusedOperands,
-                                std::vector<MachineOperand*> &KillOps) {
-  // Clear kill info.
-  SmallSet<unsigned, 2> KilledMIRegs;
-  SmallVector<unsigned, 4> VirtUseOps;
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI.getOperand(i);
-    if (!MO.isReg() || MO.getReg() == 0)
-      continue;   // Ignore non-register operands.
-
-    unsigned VirtReg = MO.getReg();
-
-    if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
-      // Ignore physregs for spilling, but remember that it is used by this
-      // function.
-      MRI->setPhysRegUsed(VirtReg);
-      continue;
-    }
-
-    // We want to process implicit virtual register uses first.
-    if (MO.isImplicit())
-      // If the virtual register is implicitly defined, emit a implicit_def
-      // before so scavenger knows it's "defined".
-      // FIXME: This is a horrible hack done the by register allocator to
-      // remat a definition with virtual register operand.
-      VirtUseOps.insert(VirtUseOps.begin(), i);
-    else
-      VirtUseOps.push_back(i);
-
-    // A partial def causes problems because the same operand both reads and
-    // writes the register. This rewriter is designed to rewrite uses and defs
-    // separately, so a partial def would already have been rewritten to a
-    // physreg by the time we get to processing defs.
-    // Add an implicit use operand to model the partial def.
-    if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
-        MI.findRegisterUseOperandIdx(VirtReg) == -1) {
-      VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
-      MI.addOperand(MachineOperand::CreateReg(VirtReg,
-                                              false,  // isDef
-                                              true)); // isImplicit
-      DEBUG(dbgs() << "Partial redef: " << MI);
-    }
-  }
-
-  // Process all of the spilled uses and all non spilled reg references.
-  SmallVector<int, 2> PotentialDeadStoreSlots;
-  KilledMIRegs.clear();
-  for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
-    unsigned i = VirtUseOps[j];
-    unsigned VirtReg = MI.getOperand(i).getReg();
-    assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
-           "Not a virtual register?");
-
-    unsigned SubIdx = MI.getOperand(i).getSubReg();
-    if (VRM->isAssignedReg(VirtReg)) {
-      // This virtual register was assigned a physreg!
-      unsigned Phys = VRM->getPhys(VirtReg);
-      MRI->setPhysRegUsed(Phys);
-      if (MI.getOperand(i).isDef())
-        ReusedOperands.markClobbered(Phys);
-      substitutePhysReg(MI.getOperand(i), Phys, *TRI);
-      if (VRM->isImplicitlyDefined(VirtReg))
-        // FIXME: Is this needed?
-        BuildMI(*MBB, &MI, MI.getDebugLoc(),
-                TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
-      continue;
-    }
-
-    // This virtual register is now known to be a spilled value.
-    if (!MI.getOperand(i).isUse())
-      continue;  // Handle defs in the loop below (handle use&def here though)
-
-    bool AvoidReload = MI.getOperand(i).isUndef();
-    // Check if it is defined by an implicit def. It should not be spilled.
-    // Note, this is for correctness reason. e.g.
-    // 8   %reg1024<def> = IMPLICIT_DEF
-    // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
-    // The live range [12, 14) are not part of the r1024 live interval since
-    // it's defined by an implicit def. It will not conflicts with live
-    // interval of r1025. Now suppose both registers are spilled, you can
-    // easily see a situation where both registers are reloaded before
-    // the INSERT_SUBREG and both target registers that would overlap.
-    bool DoReMat = VRM->isReMaterialized(VirtReg);
-    int SSorRMId = DoReMat
-      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
-    int ReuseSlot = SSorRMId;
-
-    // Check to see if this stack slot is available.
-    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
-    // If this is a sub-register use, make sure the reuse register is in the
-    // right register class. For example, for x86 not all of the 32-bit
-    // registers have accessible sub-registers.
-    // Similarly so for EXTRACT_SUBREG. Consider this:
-    // EDI = op
-    // MOV32_mr fi#1, EDI
-    // ...
-    //       = EXTRACT_SUBREG fi#1
-    // fi#1 is available in EDI, but it cannot be reused because it's not in
-    // the right register file.
-    if (PhysReg && !AvoidReload && SubIdx) {
-      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-      if (!RC->contains(PhysReg))
-        PhysReg = 0;
-    }
-
-    if (PhysReg && !AvoidReload) {
-      // This spilled operand might be part of a two-address operand.  If this
-      // is the case, then changing it will necessarily require changing the
-      // def part of the instruction as well.  However, in some cases, we
-      // aren't allowed to modify the reused register.  If none of these cases
-      // apply, reuse it.
-      bool CanReuse = true;
-      bool isTied = MI.isRegTiedToDefOperand(i);
-      if (isTied) {
-        // Okay, we have a two address operand.  We can reuse this physreg as
-        // long as we are allowed to clobber the value and there isn't an
-        // earlier def that has already clobbered the physreg.
-        CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
-          Spills.canClobberPhysReg(PhysReg);
-      }
-      // If this is an asm, and a PhysReg alias is used elsewhere as an
-      // earlyclobber operand, we can't also use it as an input.
-      if (MI.isInlineAsm()) {
-        for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
-          MachineOperand &MOk = MI.getOperand(k);
-          if (MOk.isReg() && MOk.isEarlyClobber() &&
-              TRI->regsOverlap(MOk.getReg(), PhysReg)) {
-            CanReuse = false;
-            DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
-                         << " for " << PrintReg(VirtReg) << ": " << MOk
-                         << '\n');
-            break;
-          }
-        }
-      }
-
-      if (CanReuse) {
-        // If this stack slot value is already available, reuse it!
-        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-          DEBUG(dbgs() << "Reusing RM#"
-                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-        else
-          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-        DEBUG(dbgs() << " from physreg "
-              << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg)
-              << " instead of reloading into "
-              << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n');
-        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-        MI.getOperand(i).setReg(RReg);
-        MI.getOperand(i).setSubReg(0);
-
-        // Reusing a physreg may resurrect it. But we expect ProcessUses to
-        // update the kill flags for the current instr after processing it.
-
-        // The only technical detail we have is that we don't know that
-        // PhysReg won't be clobbered by a reloaded stack slot that occurs
-        // later in the instruction.  In particular, consider 'op V1, V2'.
-        // If V1 is available in physreg R0, we would choose to reuse it
-        // here, instead of reloading it into the register the allocator
-        // indicated (say R1).  However, V2 might have to be reloaded
-        // later, and it might indicate that it needs to live in R0.  When
-        // this occurs, we need to have information available that
-        // indicates it is safe to use R1 for the reload instead of R0.
-        //
-        // To further complicate matters, we might conflict with an alias,
-        // or R0 and R1 might not be compatible with each other.  In this
-        // case, we actually insert a reload for V1 in R1, ensuring that
-        // we can get at R0 or its alias.
-        ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
-                                VRM->getPhys(VirtReg), VirtReg);
-        if (isTied)
-          // Only mark it clobbered if this is a use&def operand.
-          ReusedOperands.markClobbered(PhysReg);
-        ++NumReused;
-
-        if (MI.getOperand(i).isKill() &&
-            ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
-          // The store of this spilled value is potentially dead, but we
-          // won't know for certain until we've confirmed that the re-use
-          // above is valid, which means waiting until the other operands
-          // are processed. For now we just track the spill slot, we'll
-          // remove it after the other operands are processed if valid.
-
-          PotentialDeadStoreSlots.push_back(ReuseSlot);
-        }
-
-        // Mark is isKill if it's there no other uses of the same virtual
-        // register and it's not a two-address operand. IsKill will be
-        // unset if reg is reused.
-        if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
-          MI.getOperand(i).setIsKill();
-          KilledMIRegs.insert(VirtReg);
-        }
-        continue;
-      }  // CanReuse
-
-      // Otherwise we have a situation where we have a two-address instruction
-      // whose mod/ref operand needs to be reloaded.  This reload is already
-      // available in some register "PhysReg", but if we used PhysReg as the
-      // operand to our 2-addr instruction, the instruction would modify
-      // PhysReg.  This isn't cool if something later uses PhysReg and expects
-      // to get its initial value.
-      //
-      // To avoid this problem, and to avoid doing a load right after a store,
-      // we emit a copy from PhysReg into the designated register for this
-      // operand.
-      //
-      // This case also applies to an earlyclobber'd PhysReg.
-      unsigned DesignatedReg = VRM->getPhys(VirtReg);
-      assert(DesignatedReg && "Must map virtreg to physreg!");
-
-      // Note that, if we reused a register for a previous operand, the
-      // register we want to reload into might not actually be
-      // available.  If this occurs, use the register indicated by the
-      // reuser.
-      if (ReusedOperands.hasReuses())
-        DesignatedReg = ReusedOperands.
-          GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
-                          MaybeDeadStores, RegKills, KillOps, *VRM);
-
-      // If the mapped designated register is actually the physreg we have
-      // incoming, we don't need to inserted a dead copy.
-      if (DesignatedReg == PhysReg) {
-        // If this stack slot value is already available, reuse it!
-        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
-          DEBUG(dbgs() << "Reusing RM#"
-                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
-        else
-          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
-        DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
-              << " for " << PrintReg(VirtReg)
-              << " instead of reloading into same physreg.\n");
-        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-        MI.getOperand(i).setReg(RReg);
-        MI.getOperand(i).setSubReg(0);
-        ReusedOperands.markClobbered(RReg);
-        ++NumReused;
-        continue;
-      }
-
-      MRI->setPhysRegUsed(DesignatedReg);
-      ReusedOperands.markClobbered(DesignatedReg);
-
-      // Back-schedule reloads and remats.
-      MachineBasicBlock::iterator InsertLoc =
-        ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
-                         SSorRMId, TII, *MBB->getParent());
-      MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
-                                     TII->get(TargetOpcode::COPY),
-                                     DesignatedReg).addReg(PhysReg);
-      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
-      // This invalidates DesignatedReg.
-      Spills.ClobberPhysReg(DesignatedReg);
-
-      Spills.addAvailable(ReuseSlot, DesignatedReg);
-      unsigned RReg =
-        SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
-      MI.getOperand(i).setReg(RReg);
-      MI.getOperand(i).setSubReg(0);
-      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-      ++NumReused;
-      continue;
-    } // if (PhysReg)
-
-    // Otherwise, reload it and remember that we have it.
-    PhysReg = VRM->getPhys(VirtReg);
-    assert(PhysReg && "Must map virtreg to physreg!");
-
-    // Note that, if we reused a register for a previous operand, the
-    // register we want to reload into might not actually be
-    // available.  If this occurs, use the register indicated by the
-    // reuser.
-    if (ReusedOperands.hasReuses())
-      PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
-                  Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-
-    MRI->setPhysRegUsed(PhysReg);
-    ReusedOperands.markClobbered(PhysReg);
-    if (AvoidReload)
-      ++NumAvoided;
-    else {
-      // Back-schedule reloads and remats.
-      MachineBasicBlock::iterator InsertLoc =
-        ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
-                         SSorRMId, TII, *MBB->getParent());
-
-      if (DoReMat) {
-        ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
-      } else {
-        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
-        TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
-        MachineInstr *LoadMI = prior(InsertLoc);
-        VRM->addSpillSlotUse(SSorRMId, LoadMI);
-        ++NumLoads;
-        DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
-      }
-      // This invalidates PhysReg.
-      Spills.ClobberPhysReg(PhysReg);
-
-      // Any stores to this stack slot are not dead anymore.
-      if (!DoReMat)
-        MaybeDeadStores[SSorRMId] = NULL;
-      Spills.addAvailable(SSorRMId, PhysReg);
-      // Assumes this is the last use. IsKill will be unset if reg is reused
-      // unless it's a two-address operand.
-      if (!MI.isRegTiedToDefOperand(i) &&
-          KilledMIRegs.count(VirtReg) == 0) {
-        MI.getOperand(i).setIsKill();
-        KilledMIRegs.insert(VirtReg);
-      }
-
-      UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
-      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
-    }
-    unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-    MI.getOperand(i).setReg(RReg);
-    MI.getOperand(i).setSubReg(0);
-  }
-
-  // Ok - now we can remove stores that have been confirmed dead.
-  for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
-    // This was the last use and the spilled value is still available
-    // for reuse. That means the spill was unnecessary!
-    int PDSSlot = PotentialDeadStoreSlots[j];
-    MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
-    if (DeadStore) {
-      DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
-      InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-      EraseInstr(DeadStore);
-      MaybeDeadStores[PDSSlot] = NULL;
-      ++NumDSE;
-    }
-  }
-}
-
-/// rewriteMBB - Keep track of which spills are available even after the
-/// register allocator is done with them.  If possible, avoid reloading vregs.
-void
-LocalRewriter::RewriteMBB(LiveIntervals *LIs,
-                          AvailableSpills &Spills, BitVector &RegKills,
-                          std::vector<MachineOperand*> &KillOps) {
-
-  DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
-               << MBB->getName() << "':\n");
-
-  MachineFunction &MF = *MBB->getParent();
-
-  // MaybeDeadStores - When we need to write a value back into a stack slot,
-  // keep track of the inserted store.  If the stack slot value is never read
-  // (because the value was used from some available register, for example), and
-  // subsequently stored to, the original store is dead.  This map keeps track
-  // of inserted stores that are not used.  If we see a subsequent store to the
-  // same stack slot, the original store is deleted.
-  std::vector<MachineInstr*> MaybeDeadStores;
-  MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
-
-  // ReMatDefs - These are rematerializable def MIs which are not deleted.
-  SmallSet<MachineInstr*, 4> ReMatDefs;
-
-  // Keep track of the registers we have already spilled in case there are
-  // multiple defs of the same register in MI.
-  SmallSet<unsigned, 8> SpilledMIRegs;
-
-  RegKills.reset();
-  KillOps.clear();
-  KillOps.resize(TRI->getNumRegs(), NULL);
-
-  DistanceMap.clear();
-  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
-       MII != E; ) {
-    MachineBasicBlock::iterator NextMII = llvm::next(MII);
-
-    if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps))
-      NextMII = llvm::next(MII);
-
-    if (InsertEmergencySpills(MII))
-      NextMII = llvm::next(MII);
-
-    InsertRestores(MII, Spills, RegKills, KillOps);
-
-    if (InsertSpills(MII))
-      NextMII = llvm::next(MII);
-
-    bool Erased = false;
-    bool BackTracked = false;
-    MachineInstr &MI = *MII;
-
-    // Remember DbgValue's which reference stack slots.
-    if (MI.isDebugValue() && MI.getOperand(0).isFI())
-      Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI);
-
-    /// ReusedOperands - Keep track of operand reuse in case we need to undo
-    /// reuse.
-    ReuseInfo ReusedOperands(MI, TRI);
-
-    ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
-
-    DEBUG(dbgs() << '\t' << MI);
-
-
-    // If we have folded references to memory operands, make sure we clear all
-    // physical registers that may contain the value of the spilled virtual
-    // register
-
-    // Copy the folded virts to a small vector, we may change MI2VirtMap.
-    SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts;
-    // C++0x FTW!
-    for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator,
-                   VirtRegMap::MI2VirtMapTy::const_iterator> FVRange =
-           VRM->getFoldedVirts(&MI);
-         FVRange.first != FVRange.second; ++FVRange.first)
-      FoldedVirts.push_back(FVRange.first->second);
-
-    SmallSet<int, 2> FoldedSS;
-    for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
-      unsigned VirtReg = FoldedVirts[FVI].first;
-      VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
-      DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << "  MR: " << MR);
-
-      int SS = VRM->getStackSlot(VirtReg);
-      if (SS == VirtRegMap::NO_STACK_SLOT)
-        continue;
-      FoldedSS.insert(SS);
-      DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
-
-      // If this folded instruction is just a use, check to see if it's a
-      // straight load from the virt reg slot.
-      if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
-        int FrameIdx;
-        unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
-        if (DestReg && FrameIdx == SS) {
-          // If this spill slot is available, turn it into a copy (or nothing)
-          // instead of leaving it as a load!
-          if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
-            DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
-            if (DestReg != InReg) {
-              MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
-              MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(),
-                                             TII->get(TargetOpcode::COPY))
-                .addReg(DestReg, RegState::Define, DefMO->getSubReg())
-                .addReg(InReg, RegState::Kill);
-              // Revisit the copy so we make sure to notice the effects of the
-              // operation on the destreg (either needing to RA it if it's
-              // virtual or needing to clobber any values if it's physical).
-              NextMII = CopyMI;
-              NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
-              BackTracked = true;
-            } else {
-              DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-              // InvalidateKills resurrects any prior kill of the copy's source
-              // allowing the source reg to be reused in place of the copy.
-              Spills.disallowClobberPhysReg(InReg);
-            }
-
-            InvalidateKills(MI, TRI, RegKills, KillOps);
-            EraseInstr(&MI);
-            Erased = true;
-            goto ProcessNextInst;
-          }
-        } else {
-          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-          SmallVector<MachineInstr*, 4> NewMIs;
-          if (PhysReg &&
-              TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
-            MBB->insert(MII, NewMIs[0]);
-            InvalidateKills(MI, TRI, RegKills, KillOps);
-            EraseInstr(&MI);
-            Erased = true;
-            --NextMII;  // backtrack to the unfolded instruction.
-            BackTracked = true;
-            goto ProcessNextInst;
-          }
-        }
-      }
-
-      // If this reference is not a use, any previous store is now dead.
-      // Otherwise, the store to this stack slot is not dead anymore.
-      MachineInstr* DeadStore = MaybeDeadStores[SS];
-      if (DeadStore) {
-        bool isDead = !(MR & VirtRegMap::isRef);
-        MachineInstr *NewStore = NULL;
-        if (MR & VirtRegMap::isModRef) {
-          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
-          SmallVector<MachineInstr*, 4> NewMIs;
-          // We can reuse this physreg as long as we are allowed to clobber
-          // the value and there isn't an earlier def that has already clobbered
-          // the physreg.
-          if (PhysReg &&
-              !ReusedOperands.isClobbered(PhysReg) &&
-              Spills.canClobberPhysReg(PhysReg) &&
-              !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
-            MachineOperand *KillOpnd =
-              DeadStore->findRegisterUseOperand(PhysReg, true);
-            // Note, if the store is storing a sub-register, it's possible the
-            // super-register is needed below.
-            if (KillOpnd && !KillOpnd->getSubReg() &&
-                TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
-              MBB->insert(MII, NewMIs[0]);
-              NewStore = NewMIs[1];
-              MBB->insert(MII, NewStore);
-              VRM->addSpillSlotUse(SS, NewStore);
-              InvalidateKills(MI, TRI, RegKills, KillOps);
-              EraseInstr(&MI);
-              Erased = true;
-              --NextMII;
-              --NextMII;  // backtrack to the unfolded instruction.
-              BackTracked = true;
-              isDead = true;
-              ++NumSUnfold;
-            }
-          }
-        }
-
-        if (isDead) {  // Previous store is dead.
-          // If we get here, the store is dead, nuke it now.
-          DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
-          InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
-          EraseInstr(DeadStore);
-          if (!NewStore)
-            ++NumDSE;
-        }
-
-        MaybeDeadStores[SS] = NULL;
-        if (NewStore) {
-          // Treat this store as a spill merged into a copy. That makes the
-          // stack slot value available.
-          VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
-          goto ProcessNextInst;
-        }
-      }
-
-      // If the spill slot value is available, and this is a new definition of
-      // the value, the value is not available anymore.
-      if (MR & VirtRegMap::isMod) {
-        // Notice that the value in this stack slot has been modified.
-        Spills.ModifyStackSlotOrReMat(SS);
-
-        // If this is *just* a mod of the value, check to see if this is just a
-        // store to the spill slot (i.e. the spill got merged into the copy). If
-        // so, realize that the vreg is available now, and add the store to the
-        // MaybeDeadStore info.
-        int StackSlot;
-        if (!(MR & VirtRegMap::isRef)) {
-          if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
-            assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
-                   "Src hasn't been allocated yet?");
-
-            if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot,
-                                    Spills, RegKills, KillOps, TRI)) {
-              NextMII = llvm::next(MII);
-              BackTracked = true;
-              goto ProcessNextInst;
-            }
-
-            // Okay, this is certainly a store of SrcReg to [StackSlot].  Mark
-            // this as a potentially dead store in case there is a subsequent
-            // store into the stack slot without a read from it.
-            MaybeDeadStores[StackSlot] = &MI;
-
-            // If the stack slot value was previously available in some other
-            // register, change it now.  Otherwise, make the register
-            // available in PhysReg.
-            Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
-          }
-        }
-      }
-    }
-
-    // Process all of the spilled defs.
-    SpilledMIRegs.clear();
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
-      if (!(MO.isReg() && MO.getReg() && MO.isDef()))
-        continue;
-
-      unsigned VirtReg = MO.getReg();
-      if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
-        // Check to see if this is a noop copy.  If so, eliminate the
-        // instruction before considering the dest reg to be changed.
-        // Also check if it's copying from an "undef", if so, we can't
-        // eliminate this or else the undef marker is lost and it will
-        // confuses the scavenger. This is extremely rare.
-        if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() &&
-            MI.getNumOperands() == 2) {
-          ++NumDCE;
-          DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-          SmallVector<unsigned, 2> KillRegs;
-          InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
-          if (MO.isDead() && !KillRegs.empty()) {
-            // Source register or an implicit super/sub-register use is killed.
-            assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg()));
-            // Last def is now dead.
-            TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
-          }
-          EraseInstr(&MI);
-          Erased = true;
-          Spills.disallowClobberPhysReg(VirtReg);
-          goto ProcessNextInst;
-        }
-
-        // If it's not a no-op copy, it clobbers the value in the destreg.
-        Spills.ClobberPhysReg(VirtReg);
-        ReusedOperands.markClobbered(VirtReg);
-
-        // Check to see if this instruction is a load from a stack slot into
-        // a register.  If so, this provides the stack slot value in the reg.
-        int FrameIdx;
-        if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
-          assert(DestReg == VirtReg && "Unknown load situation!");
-
-          // If it is a folded reference, then it's not safe to clobber.
-          bool Folded = FoldedSS.count(FrameIdx);
-          // Otherwise, if it wasn't available, remember that it is now!
-          Spills.addAvailable(FrameIdx, DestReg, !Folded);
-          goto ProcessNextInst;
-        }
-
-        continue;
-      }
-
-      unsigned SubIdx = MO.getSubReg();
-      bool DoReMat = VRM->isReMaterialized(VirtReg);
-      if (DoReMat)
-        ReMatDefs.insert(&MI);
-
-      // The only vregs left are stack slot definitions.
-      int StackSlot = VRM->getStackSlot(VirtReg);
-      const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
-
-      // If this def is part of a two-address operand, make sure to execute
-      // the store from the correct physical register.
-      unsigned PhysReg;
-      unsigned TiedOp;
-      if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
-        PhysReg = MI.getOperand(TiedOp).getReg();
-        if (SubIdx) {
-          unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
-          assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
-                 "Can't find corresponding super-register!");
-          PhysReg = SuperReg;
-        }
-      } else {
-        PhysReg = VRM->getPhys(VirtReg);
-        if (ReusedOperands.isClobbered(PhysReg)) {
-          // Another def has taken the assigned physreg. It must have been a
-          // use&def which got it due to reuse. Undo the reuse!
-          PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
-                      Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-        }
-      }
-
-      // If StackSlot is available in a register that also holds other stack
-      // slots, clobber those stack slots now.
-      Spills.ClobberSharingStackSlots(StackSlot);
-
-      assert(PhysReg && "VR not assigned a physical register?");
-      MRI->setPhysRegUsed(PhysReg);
-      unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
-      ReusedOperands.markClobbered(RReg);
-      MI.getOperand(i).setReg(RReg);
-      MI.getOperand(i).setSubReg(0);
-
-      if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) {
-        MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
-        SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
-          LastStore, Spills, ReMatDefs, RegKills, KillOps);
-        NextMII = llvm::next(MII);
-
-        // Check to see if this is a noop copy.  If so, eliminate the
-        // instruction before considering the dest reg to be changed.
-        if (MI.isIdentityCopy()) {
-          ++NumDCE;
-          DEBUG(dbgs() << "Removing now-noop copy: " << MI);
-          InvalidateKills(MI, TRI, RegKills, KillOps);
-          EraseInstr(&MI);
-          Erased = true;
-          UpdateKills(*LastStore, TRI, RegKills, KillOps);
-          goto ProcessNextInst;
-        }
-      }
-    }
-    ProcessNextInst:
-    // Delete dead instructions without side effects.
-    if (!Erased && !BackTracked && isSafeToDelete(MI)) {
-      InvalidateKills(MI, TRI, RegKills, KillOps);
-      EraseInstr(&MI);
-      Erased = true;
-    }
-    if (!Erased)
-      DistanceMap.insert(std::make_pair(&MI, DistanceMap.size()));
-    if (!Erased && !BackTracked) {
-      for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
-        UpdateKills(*II, TRI, RegKills, KillOps);
-    }
-    MII = NextMII;
-  }
-
-}
-
-llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
-  switch (RewriterOpt) {
-  default: llvm_unreachable("Unreachable!");
-  case local:
-    return new LocalRewriter();
-  case trivial:
-    return new TrivialRewriter();
-  }
-}
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
deleted file mode 100644
index 93474e0d7ff7..000000000000
--- a/lib/CodeGen/VirtRegRewriter.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
-#define LLVM_CODEGEN_VIRTREGREWRITER_H
-
-namespace llvm {
-  class LiveIntervals;
-  class MachineFunction;
-  class VirtRegMap;
-  
-  /// VirtRegRewriter interface: Implementations of this interface assign
-  /// spilled virtual registers to stack slots, rewriting the code.
-  struct VirtRegRewriter {
-    virtual ~VirtRegRewriter();
-    virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
-                                      LiveIntervals* LIs) = 0;
-  };
-
-  /// createVirtRegRewriter - Create an return a rewriter object, as specified
-  /// on the command line.
-  VirtRegRewriter* createVirtRegRewriter();
-
-}
-
-#endif
diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt
index fdffcb6a77c6..441f1e86dcd8 100644
--- a/lib/DebugInfo/CMakeLists.txt
+++ b/lib/DebugInfo/CMakeLists.txt
@@ -10,7 +10,3 @@ add_llvm_library(LLVMDebugInfo
   DWARFDebugLine.cpp
   DWARFFormValue.cpp
   )
-
-add_llvm_library_dependencies(LLVMDebugInfo
-  LLVMSupport
-  )
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index e1ac398b1011..dccadc4ea4da 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -165,3 +165,5 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t address) {
 
   return DILineInfo(fileName.c_str(), row.Line, row.Column);
 }
+
+void DWARFContextInMemory::anchor() { }
diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h
index 746a4639f277..d2e763a87a45 100644
--- a/lib/DebugInfo/DWARFContext.h
+++ b/lib/DebugInfo/DWARFContext.h
@@ -86,6 +86,7 @@ public:
 /// DWARFContext. It assumes all content is available in memory and stores
 /// pointers to it.
 class DWARFContextInMemory : public DWARFContext {
+  virtual void anchor();
   StringRef InfoSection;
   StringRef AbbrevSection;
   StringRef ARangeSection;
diff --git a/lib/DebugInfo/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARFDebugAbbrev.cpp
index a11ae3f2908e..6e6c37e30945 100644
--- a/lib/DebugInfo/DWARFDebugAbbrev.cpp
+++ b/lib/DebugInfo/DWARFDebugAbbrev.cpp
@@ -83,7 +83,7 @@ void DWARFDebugAbbrev::dump(raw_ostream &OS) const {
 
   DWARFAbbreviationDeclarationCollMapConstIter pos;
   for (pos = AbbrevCollMap.begin(); pos != AbbrevCollMap.end(); ++pos) {
-    OS << format("Abbrev table for offset: 0x%8.8x\n", pos->first);
+    OS << format("Abbrev table for offset: 0x%8.8" PRIx64 "\n", pos->first);
     pos->second.dump(OS);
   }
 }
diff --git a/lib/DebugInfo/DWARFDebugAbbrev.h b/lib/DebugInfo/DWARFDebugAbbrev.h
index 03189b132127..c7c0436866c4 100644
--- a/lib/DebugInfo/DWARFDebugAbbrev.h
+++ b/lib/DebugInfo/DWARFDebugAbbrev.h
@@ -25,21 +25,21 @@ typedef DWARFAbbreviationDeclarationColl::const_iterator
   DWARFAbbreviationDeclarationCollConstIter;
 
 class DWARFAbbreviationDeclarationSet {
-  uint64_t Offset;
+  uint32_t Offset;
   uint32_t IdxOffset;
   std::vector<DWARFAbbreviationDeclaration> Decls;
   public:
   DWARFAbbreviationDeclarationSet()
     : Offset(0), IdxOffset(0) {}
 
-  DWARFAbbreviationDeclarationSet(uint64_t offset, uint32_t idxOffset)
+  DWARFAbbreviationDeclarationSet(uint32_t offset, uint32_t idxOffset)
     : Offset(offset), IdxOffset(idxOffset) {}
 
   void clear() {
     IdxOffset = 0;
     Decls.clear();
   }
-  uint64_t getOffset() const { return Offset; }
+  uint32_t getOffset() const { return Offset; }
   void dump(raw_ostream &OS) const;
   bool extract(DataExtractor data, uint32_t* offset_ptr);
 
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp
index b0c0354383b9..2efbfd1f92fb 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.cpp
+++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp
@@ -122,8 +122,9 @@ void DWARFDebugArangeSet::dump(raw_ostream &OS) const {
   const uint32_t hex_width = Header.AddrSize * 2;
   for (DescriptorConstIter pos = ArangeDescriptors.begin(),
        end = ArangeDescriptors.end(); pos != end; ++pos)
-    OS << format("[0x%*.*llx -", hex_width, hex_width, pos->Address)
-       << format(" 0x%*.*llx)\n", hex_width, hex_width, pos->getEndAddress());
+    OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address)
+       << format(" 0x%*.*" PRIx64 ")\n",
+                 hex_width, hex_width, pos->getEndAddress());
 }
 
 
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
index 576d37d7813a..178814535612 100644
--- a/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -100,13 +100,14 @@ void DWARFDebugAranges::dump(raw_ostream &OS) const {
   const uint32_t num_ranges = getNumRanges();
   for (uint32_t i = 0; i < num_ranges; ++i) {
     const Range &range = Aranges[i];
-    OS << format("0x%8.8x: [0x%8.8llx - 0x%8.8llx)\n", range.Offset,
-                 (uint64_t)range.LoPC, (uint64_t)range.HiPC());
+    OS << format("0x%8.8x: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n",
+                 range.Offset, (uint64_t)range.LoPC, (uint64_t)range.HiPC());
   }
 }
 
 void DWARFDebugAranges::Range::dump(raw_ostream &OS) const {
-  OS << format("{0x%8.8x}: [0x%8.8llx - 0x%8.8llx)\n", Offset, LoPC, HiPC());
+  OS << format("{0x%8.8x}: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n",
+               Offset, LoPC, HiPC());
 }
 
 void DWARFDebugAranges::appendRange(uint32_t offset, uint64_t low_pc,
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index 1b089adbe13b..236db97c44af 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -26,7 +26,7 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS,
   uint32_t offset = Offset;
 
   if (debug_info_data.isValidOffset(offset)) {
-    uint64_t abbrCode = debug_info_data.getULEB128(&offset);
+    uint32_t abbrCode = debug_info_data.getULEB128(&offset);
 
     OS << format("\n0x%8.8x: ", Offset);
     if (abbrCode) {
@@ -203,8 +203,6 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu,
     AbbrevDecl = NULL;
     return true; // NULL debug tag entry
   }
-
-  return false;
 }
 
 bool
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h
index aff2e8556729..37b3bcdd96e6 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.h
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -23,7 +23,7 @@ class DWARFFormValue;
 /// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data.
 class DWARFDebugInfoEntryMinimal {
   /// Offset within the .debug_info of the start of this entry.
-  uint64_t Offset;
+  uint32_t Offset;
 
   /// How many to subtract from "this" to get the parent.
   /// If zero this die has no parent.
@@ -52,7 +52,7 @@ public:
 
   uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
   bool isNULL() const { return AbbrevDecl == 0; }
-  uint64_t getOffset() const { return Offset; }
+  uint32_t getOffset() const { return Offset; }
   uint32_t getNumAttributes() const {
     return !isNULL() ? AbbrevDecl->getNumAttributes() : 0;
   }
diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp
index fe1ef78b026e..117fa31aa86f 100644
--- a/lib/DebugInfo/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARFDebugLine.cpp
@@ -41,8 +41,9 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
           "----------------\n";
     for (uint32_t i = 0; i < FileNames.size(); ++i) {
       const FileNameEntry& fileEntry = FileNames[i];
-      OS << format("file_names[%3u] %4u ", i+1, fileEntry.DirIdx)
-         << format("0x%8.8x 0x%8.8x ", fileEntry.ModTime, fileEntry.Length)
+      OS << format("file_names[%3u] %4" PRIu64 " ", i+1, fileEntry.DirIdx)
+         << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ",
+                   fileEntry.ModTime, fileEntry.Length)
          << fileEntry.Name << '\n';
     }
   }
@@ -68,7 +69,7 @@ void DWARFDebugLine::Row::reset(bool default_is_stmt) {
 }
 
 void DWARFDebugLine::Row::dump(raw_ostream &OS) const {
-  OS << format("0x%16.16llx %6u %6u", Address, Line, Column)
+  OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column)
      << format(" %6u %3u ", File, Isa)
      << (IsStmt ? " is_stmt" : "")
      << (BasicBlock ? " basic_block" : "")
diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp
index 705efe5549b6..ee2a3ab7b789 100644
--- a/lib/DebugInfo/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARFFormValue.cpp
@@ -263,12 +263,12 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
   bool cu_relative_offset = false;
 
   switch (Form) {
-  case DW_FORM_addr:      OS << format("0x%016x", uvalue); break;
+  case DW_FORM_addr:      OS << format("0x%016" PRIx64, uvalue); break;
   case DW_FORM_flag:
-  case DW_FORM_data1:     OS << format("0x%02x", uvalue);  break;
-  case DW_FORM_data2:     OS << format("0x%04x", uvalue);  break;
-  case DW_FORM_data4:     OS << format("0x%08x", uvalue);  break;
-  case DW_FORM_data8:     OS << format("0x%016x", uvalue); break;
+  case DW_FORM_data1:     OS << format("0x%02x", (uint8_t)uvalue); break;
+  case DW_FORM_data2:     OS << format("0x%04x", (uint16_t)uvalue); break;
+  case DW_FORM_data4:     OS << format("0x%08x", (uint32_t)uvalue); break;
+  case DW_FORM_data8:     OS << format("0x%016" PRIx64, uvalue); break;
   case DW_FORM_string:
     OS << '"';
     OS.write_escaped(getAsCString(NULL));
@@ -280,7 +280,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
   case DW_FORM_block4:
     if (uvalue > 0) {
       switch (Form) {
-      case DW_FORM_block:  OS << format("<0x%llx> ", uvalue);            break;
+      case DW_FORM_block:  OS << format("<0x%" PRIx64 "> ", uvalue);     break;
       case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue);  break;
       case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break;
       case DW_FORM_block4: OS << format("<0x%8.8x> ", (uint32_t)uvalue); break;
@@ -314,7 +314,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
     break;
   }
   case DW_FORM_ref_addr:
-    OS << format("0x%016x", uvalue);
+    OS << format("0x%016" PRIx64, uvalue);
     break;
   case DW_FORM_ref1:
     cu_relative_offset = true;
@@ -330,11 +330,11 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
     break;
   case DW_FORM_ref8:
     cu_relative_offset = true;
-    OS << format("cu + 0x%8.8llx", uvalue);
+    OS << format("cu + 0x%8.8" PRIx64, uvalue);
     break;
   case DW_FORM_ref_udata:
     cu_relative_offset = true;
-    OS << format("cu + 0x%llx", uvalue);
+    OS << format("cu + 0x%" PRIx64, uvalue);
     break;
 
     // All DW_FORM_indirect attributes should be resolved prior to calling
@@ -348,7 +348,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const {
   }
 
   if (cu_relative_offset)
-    OS << format(" => {0x%8.8x}", (uvalue + (cu ? cu->getOffset() : 0)));
+    OS << format(" => {0x%8.8" PRIx64 "}", uvalue + (cu ? cu->getOffset() : 0));
 }
 
 const char*
diff --git a/lib/DebugInfo/LLVMBuild.txt b/lib/DebugInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..210b9f9d356e
--- /dev/null
+++ b/lib/DebugInfo/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/DebugInfo/LLVMBuild.txt ----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = DebugInfo
+parent = Libraries
+required_libraries = Support
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index fb14d41e91d2..cb11bfe93c7c 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -1,17 +1,20 @@
+
+
 add_llvm_library(LLVMExecutionEngine
   ExecutionEngine.cpp
   ExecutionEngineBindings.cpp
   TargetSelect.cpp
   )
 
-add_llvm_library_dependencies(LLVMExecutionEngine
-  LLVMCore
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(Interpreter)
 add_subdirectory(JIT)
 add_subdirectory(MCJIT)
 add_subdirectory(RuntimeDyld)
+
+if( LLVM_USE_OPROFILE )
+  add_subdirectory(OProfileJIT)
+endif( LLVM_USE_OPROFILE )
+
+if( LLVM_USE_INTEL_JITEVENTS )
+  add_subdirectory(IntelJITEvents)
+endif( LLVM_USE_INTEL_JITEVENTS )
diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h
new file mode 100644
index 000000000000..1c07c947142f
--- /dev/null
+++ b/lib/ExecutionEngine/EventListenerCommon.h
@@ -0,0 +1,67 @@
+//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common functionality for JITEventListener implementations
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EVENT_LISTENER_COMMON_H
+#define EVENT_LISTENER_COMMON_H
+
+#include "llvm/Metadata.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+
+namespace jitprofiling {
+
+class FilenameCache {
+  // Holds the filename of each Scope, so that we can pass a null-terminated
+  // string into oprofile.  Use an AssertingVH rather than a ValueMap because we
+  // shouldn't be modifying any MDNodes while this map is alive.
+  DenseMap<AssertingVH<MDNode>, std::string> Filenames;
+  DenseMap<AssertingVH<MDNode>, std::string> Paths;
+
+ public:
+  const char *getFilename(MDNode *Scope) {
+    std::string &Filename = Filenames[Scope];
+    if (Filename.empty()) {
+      DIScope DIScope(Scope);
+      Filename = DIScope.getFilename();
+    }
+    return Filename.c_str();
+  }
+
+  const char *getFullPath(MDNode *Scope) {
+    std::string &P = Paths[Scope];
+    if (P.empty()) {
+      DIScope DIScope(Scope);
+      StringRef DirName = DIScope.getDirectory();
+      StringRef FileName = DIScope.getFilename();
+      SmallString<256> FullPath;
+      if (DirName != "." && DirName != "") {
+        FullPath = DirName;
+      }
+      if (FileName != "") {
+        sys::path::append(FullPath, FileName);
+      }
+      P = FullPath.str();
+    }
+    return P.c_str();
+  }
+};
+
+} // namespace jitprofiling
+
+} // namespace llvm
+
+#endif //EVENT_LISTENER_COMMON_H
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 525877b68900..a744d0c1e798 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cmath>
@@ -41,14 +42,12 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(
   Module *M,
   std::string *ErrorStr,
   JITMemoryManager *JMM,
-  CodeGenOpt::Level OptLevel,
   bool GVsWithCode,
   TargetMachine *TM) = 0;
 ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
   Module *M,
   std::string *ErrorStr,
   JITMemoryManager *JMM,
-  CodeGenOpt::Level OptLevel,
   bool GVsWithCode,
   TargetMachine *TM) = 0;
 ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
@@ -308,13 +307,12 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
 
   // Should be an array of '{ i32, void ()* }' structs.  The first value is
   // the init priority, which we ignore.
-  if (isa<ConstantAggregateZero>(GV->getInitializer()))
+  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (InitList == 0)
     return;
-  ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer());
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-    if (isa<ConstantAggregateZero>(InitList->getOperand(i)))
-      continue;
-    ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(i));
+    ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
+    if (CS == 0) continue;
 
     Constant *FP = CS->getOperand(1);
     if (FP->isNullValue())
@@ -404,14 +402,15 @@ ExecutionEngine *ExecutionEngine::create(Module *M,
                                          std::string *ErrorStr,
                                          CodeGenOpt::Level OptLevel,
                                          bool GVsWithCode) {
-  return EngineBuilder(M)
+  EngineBuilder EB =  EngineBuilder(M)
       .setEngineKind(ForceInterpreter
                      ? EngineKind::Interpreter
                      : EngineKind::JIT)
       .setErrorStr(ErrorStr)
       .setOptLevel(OptLevel)
-      .setAllocateGVsWithCode(GVsWithCode)
-      .create();
+      .setAllocateGVsWithCode(GVsWithCode);
+
+  return EB.create();
 }
 
 /// createJIT - This is the factory method for creating a JIT for the current
@@ -420,7 +419,7 @@ ExecutionEngine *ExecutionEngine::create(Module *M,
 ExecutionEngine *ExecutionEngine::createJIT(Module *M,
                                             std::string *ErrorStr,
                                             JITMemoryManager *JMM,
-                                            CodeGenOpt::Level OptLevel,
+                                            CodeGenOpt::Level OL,
                                             bool GVsWithCode,
                                             Reloc::Model RM,
                                             CodeModel::Model CMM) {
@@ -432,18 +431,25 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M,
 
   // Use the defaults for extra parameters.  Users can use EngineBuilder to
   // set them.
-  StringRef MArch = "";
-  StringRef MCPU = "";
-  SmallVector<std::string, 1> MAttrs;
-
-  TargetMachine *TM =
-    EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs, RM, CMM, ErrorStr);
+  EngineBuilder EB(M);
+  EB.setEngineKind(EngineKind::JIT);
+  EB.setErrorStr(ErrorStr);
+  EB.setRelocationModel(RM);
+  EB.setCodeModel(CMM);
+  EB.setAllocateGVsWithCode(GVsWithCode);
+  EB.setOptLevel(OL);
+  EB.setJITMemoryManager(JMM);
+
+  // TODO: permit custom TargetOptions here
+  TargetMachine *TM = EB.selectTarget();
   if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
 
-  return ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel, GVsWithCode, TM);
+  return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM);
 }
 
-ExecutionEngine *EngineBuilder::create() {
+ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
+  OwningPtr<TargetMachine> TheTM(TM); // Take ownership.
+
   // Make sure we can resolve symbols in the program as well. The zero arg
   // to the function tells DynamicLibrary to load the program, not a library.
   if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
@@ -464,21 +470,24 @@ ExecutionEngine *EngineBuilder::create() {
 
   // Unless the interpreter was explicitly selected or the JIT is not linked,
   // try making a JIT.
-  if (WhichEngine & EngineKind::JIT) {
-    if (TargetMachine *TM = EngineBuilder::selectTarget(M, MArch, MCPU, MAttrs,
-                                                        RelocModel, CMModel,
-                                                        ErrorStr)) {
-      if (UseMCJIT && ExecutionEngine::MCJITCtor) {
-        ExecutionEngine *EE =
-          ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel,
-                                     AllocateGVsWithCode, TM);
-        if (EE) return EE;
-      } else if (ExecutionEngine::JITCtor) {
-        ExecutionEngine *EE =
-          ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel,
-                                   AllocateGVsWithCode, TM);
-        if (EE) return EE;
-      }
+  if ((WhichEngine & EngineKind::JIT) && TheTM) {
+    Triple TT(M->getTargetTriple());
+    if (!TM->getTarget().hasJIT()) {
+      errs() << "WARNING: This target JIT is not designed for the host"
+             << " you are running.  If bad things happen, please choose"
+             << " a different -march switch.\n";
+    }
+
+    if (UseMCJIT && ExecutionEngine::MCJITCtor) {
+      ExecutionEngine *EE =
+        ExecutionEngine::MCJITCtor(M, ErrorStr, JMM,
+                                   AllocateGVsWithCode, TheTM.take());
+      if (EE) return EE;
+    } else if (ExecutionEngine::JITCtor) {
+      ExecutionEngine *EE =
+        ExecutionEngine::JITCtor(M, ErrorStr, JMM,
+                                 AllocateGVsWithCode, TheTM.take());
+      if (EE) return EE;
     }
   }
 
@@ -944,30 +953,47 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
 void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
   DEBUG(dbgs() << "JIT: Initializing " << Addr << " ");
   DEBUG(Init->dump());
-  if (isa<UndefValue>(Init)) {
+  if (isa<UndefValue>(Init))
     return;
-  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
+  
+  if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
     unsigned ElementSize =
       getTargetData()->getTypeAllocSize(CP->getType()->getElementType());
     for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
       InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize);
     return;
-  } else if (isa<ConstantAggregateZero>(Init)) {
+  }
+  
+  if (isa<ConstantAggregateZero>(Init)) {
     memset(Addr, 0, (size_t)getTargetData()->getTypeAllocSize(Init->getType()));
     return;
-  } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
+  }
+  
+  if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
     unsigned ElementSize =
       getTargetData()->getTypeAllocSize(CPA->getType()->getElementType());
     for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
       InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize);
     return;
-  } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
+  }
+  
+  if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
     const StructLayout *SL =
       getTargetData()->getStructLayout(cast<StructType>(CPS->getType()));
     for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
       InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i));
     return;
-  } else if (Init->getType()->isFirstClassType()) {
+  }
+
+  if (const ConstantDataSequential *CDS =
+               dyn_cast<ConstantDataSequential>(Init)) {
+    // CDS is already laid out in host memory order.
+    StringRef Data = CDS->getRawDataValues();
+    memcpy(Addr, Data.data(), Data.size());
+    return;
+  }
+
+  if (Init->getType()->isFirstClassType()) {
     GenericValue Val = getConstantValue(Init);
     StoreValueToMemory(Val, (GenericValue*)Addr, Init->getType());
     return;
@@ -1123,6 +1149,6 @@ void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
 void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
                                                     const GlobalValue *,
                                                     const GlobalValue *) {
-  assert(false && "The ExecutionEngine doesn't know how to handle a"
-         " RAUW on a value it has a global mapping for.");
+  llvm_unreachable("The ExecutionEngine doesn't know how to handle a"
+                   " RAUW on a value it has a global mapping for.");
 }
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index f8f1f4a78ee5..75e680ab3612 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -76,9 +76,7 @@ double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) {
     return unwrap(GenVal)->DoubleVal;
   default:
     llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
-    break;
   }
-  return 0; // Not reached
 }
 
 void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
diff --git a/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
new file mode 100644
index 000000000000..7d67d0d8bee1
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/CMakeLists.txt
@@ -0,0 +1,11 @@
+
+include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+set(system_libs
+  ${system_libs}
+  jitprofiling
+  )
+
+add_llvm_library(LLVMIntelJITEvents
+  IntelJITEventListener.cpp
+  )
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
new file mode 100644
index 000000000000..5dfa78f34a33
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -0,0 +1,183 @@
+//===-- IntelJITEventListener.cpp - Tell Intel profiler about JITed code --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object to tell Intel(R) VTune(TM)
+// Amplifier XE 2011 about JITted functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
+#define DEBUG_TYPE "amplifier-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/ValueHandle.h"
+#include "EventListenerCommon.h"
+
+using namespace llvm;
+using namespace llvm::jitprofiling;
+
+namespace {
+
+class IntelJITEventListener : public JITEventListener {
+  typedef DenseMap<void*, unsigned int> MethodIDMap;
+
+  IntelJITEventsWrapper& Wrapper;
+  MethodIDMap MethodIDs;
+  FilenameCache Filenames;
+
+public:
+  IntelJITEventListener(IntelJITEventsWrapper& libraryWrapper)
+  : Wrapper(libraryWrapper) {
+  }
+
+  ~IntelJITEventListener() {
+  }
+
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *FnStart, size_t FnSize,
+                                     const EmittedFunctionDetails &Details);
+
+  virtual void NotifyFreeingMachineCode(void *OldPtr);
+};
+
+static LineNumberInfo LineStartToIntelJITFormat(
+    uintptr_t StartAddress,
+    uintptr_t Address,
+    DebugLoc Loc) {
+  LineNumberInfo Result;
+
+  Result.Offset = Address - StartAddress;
+  Result.LineNumber = Loc.getLine();
+
+  return Result;
+}
+
+static iJIT_Method_Load FunctionDescToIntelJITFormat(
+    IntelJITEventsWrapper& Wrapper,
+    const char* FnName,
+    uintptr_t FnStart,
+    size_t FnSize) {
+  iJIT_Method_Load Result;
+  memset(&Result, 0, sizeof(iJIT_Method_Load));
+
+  Result.method_id = Wrapper.iJIT_GetNewMethodID();
+  Result.method_name = const_cast<char*>(FnName);
+  Result.method_load_address = reinterpret_cast<void*>(FnStart);
+  Result.method_size = FnSize;
+
+  Result.class_id = 0;
+  Result.class_file_name = NULL;
+  Result.user_data = NULL;
+  Result.user_data_size = 0;
+  Result.env = iJDE_JittingAPI;
+
+  return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void IntelJITEventListener::NotifyFunctionEmitted(
+    const Function &F, void *FnStart, size_t FnSize,
+    const EmittedFunctionDetails &Details) {
+  iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(Wrapper,
+                                      F.getName().data(),
+                                      reinterpret_cast<uint64_t>(FnStart),
+                                      FnSize);
+
+  std::vector<LineNumberInfo> LineInfo;
+
+  if (!Details.LineStarts.empty()) {
+    // Now convert the line number information from the address/DebugLoc
+    // format in Details to the offset/lineno in Intel JIT API format.
+
+    LineInfo.reserve(Details.LineStarts.size() + 1);
+
+    DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+    assert(!FirstLoc.isUnknown()
+           && "LineStarts should not contain unknown DebugLocs");
+
+    MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+    DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+    if (FunctionDI.Verify()) {
+      FunctionMessage.source_file_name = const_cast<char*>(
+                                          Filenames.getFullPath(FirstLocScope));
+
+      LineNumberInfo FirstLine;
+      FirstLine.Offset = 0;
+      FirstLine.LineNumber = FunctionDI.getLineNumber();
+      LineInfo.push_back(FirstLine);
+    }
+
+    for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator I =
+          Details.LineStarts.begin(), E = Details.LineStarts.end();
+          I != E; ++I) {
+      // This implementation ignores the DebugLoc filename because the Intel
+      // JIT API does not support multiple source files associated with a single
+      // JIT function
+      LineInfo.push_back(LineStartToIntelJITFormat(
+                          reinterpret_cast<uintptr_t>(FnStart),
+                          I->Address,
+                          I->Loc));
+
+      // If we have no file name yet for the function, use the filename from
+      // the first instruction that has one
+      if (FunctionMessage.source_file_name == 0) {
+        MDNode *scope = I->Loc.getScope(
+					Details.MF->getFunction()->getContext());
+        FunctionMessage.source_file_name = const_cast<char*>(
+                                                  Filenames.getFullPath(scope));
+      }
+    }
+
+    FunctionMessage.line_number_size = LineInfo.size();
+    FunctionMessage.line_number_table = &*LineInfo.begin();
+  } else {
+    FunctionMessage.line_number_size = 0;
+    FunctionMessage.line_number_table = 0;
+  }
+
+  Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED,
+													 &FunctionMessage);
+  MethodIDs[FnStart] = FunctionMessage.method_id;
+}
+
+void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
+  MethodIDMap::iterator I = MethodIDs.find(FnStart);
+  if (I != MethodIDs.end()) {
+    Wrapper.iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, &I->second);
+    MethodIDs.erase(I);
+  }
+}
+
+}  // anonymous namespace.
+
+namespace llvm {
+JITEventListener *JITEventListener::createIntelJITEventListener() {
+  static OwningPtr<IntelJITEventsWrapper> JITProfilingWrapper(
+                                            new IntelJITEventsWrapper);
+  return new IntelJITEventListener(*JITProfilingWrapper);
+}
+
+// for testing
+JITEventListener *JITEventListener::createIntelJITEventListener(
+                                      IntelJITEventsWrapper* TestImpl) {
+  return new IntelJITEventListener(*TestImpl);
+}
+
+} // namespace llvm
+
diff --git a/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
new file mode 100644
index 000000000000..80d227326441
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/JITProfileAmplifier/LLVMBuild.txt --*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+
+[component_0]
+type = Library
+name = IntelJITEvents
+parent = ExecutionEngine
diff --git a/lib/ExecutionEngine/IntelJITEvents/Makefile b/lib/ExecutionEngine/IntelJITEvents/Makefile
new file mode 100644
index 000000000000..ba75ac6f6462
--- /dev/null
+++ b/lib/ExecutionEngine/IntelJITEvents/Makefile
@@ -0,0 +1,17 @@
+##===- lib/ExecutionEngine/JITProfile/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMIntelJITEvents
+
+include $(LEVEL)/Makefile.config
+
+SOURCES := IntelJITEventListener.cpp
+CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR) -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
index 4fb58c2e3783..d331f830b62e 100644
--- a/lib/ExecutionEngine/Interpreter/CMakeLists.txt
+++ b/lib/ExecutionEngine/Interpreter/CMakeLists.txt
@@ -12,14 +12,6 @@ add_llvm_library(LLVMInterpreter
   Interpreter.cpp
   )
 
-add_llvm_library_dependencies(LLVMInterpreter
-  LLVMCodeGen
-  LLVMCore
-  LLVMExecutionEngine
-  LLVMSupport
-  LLVMTarget
-  )
-
 if( LLVM_ENABLE_FFI )
   target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} )
 endif()
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 27917da07a2c..af47be9c5b56 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -625,24 +625,6 @@ void Interpreter::visitReturnInst(ReturnInst &I) {
   popStackAndReturnValueToCaller(RetTy, Result);
 }
 
-void Interpreter::visitUnwindInst(UnwindInst &I) {
-  // Unwind stack
-  Instruction *Inst;
-  do {
-    ECStack.pop_back();
-    if (ECStack.empty())
-      report_fatal_error("Empty stack during unwind!");
-    Inst = ECStack.back().Caller.getInstruction();
-  } while (!(Inst && isa<InvokeInst>(Inst)));
-
-  // Return from invoke
-  ExecutionContext &InvokingSF = ECStack.back();
-  InvokingSF.Caller = CallSite();
-
-  // Go to exceptional destination BB of invoke instruction
-  SwitchToNewBasicBlock(cast<InvokeInst>(Inst)->getUnwindDest(), InvokingSF);
-}
-
 void Interpreter::visitUnreachableInst(UnreachableInst &I) {
   report_fatal_error("Program executed an 'unreachable' instruction!");
 }
@@ -668,12 +650,10 @@ void Interpreter::visitSwitchInst(SwitchInst &I) {
 
   // Check to see if any of the cases match...
   BasicBlock *Dest = 0;
-  unsigned NumCases = I.getNumCases();
-  // Skip the first item since that's the default case.
-  for (unsigned i = 1; i < NumCases; ++i) {
-    GenericValue CaseVal = getOperandValue(I.getCaseValue(i), SF);
+  for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) {
+    GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF);
     if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) {
-      Dest = cast<BasicBlock>(I.getSuccessor(i));
+      Dest = cast<BasicBlock>(i.getCaseSuccessor());
       break;
     }
   }
@@ -1253,8 +1233,7 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
     break;
   default:
     dbgs() << "Unhandled ConstantExpr: " << *CE << "\n";
-    llvm_unreachable(0);
-    return GenericValue();
+    llvm_unreachable("Unhandled ConstantExpr");
   }
   return Dest;
 }
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 055875c9456a..7a206ebf73d7 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -94,15 +94,16 @@ static ExFunc lookupFunction(const Function *F) {
   FunctionType *FT = F->getFunctionType();
   for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
     ExtName += getTypeID(FT->getContainedType(i));
-  ExtName + "_" + F->getNameStr();
+  ExtName += "_" + F->getName().str();
 
   sys::ScopedLock Writer(*FunctionsLock);
   ExFunc FnPtr = FuncNames[ExtName];
   if (FnPtr == 0)
-    FnPtr = FuncNames["lle_X_" + F->getNameStr()];
+    FnPtr = FuncNames["lle_X_" + F->getName().str()];
   if (FnPtr == 0)  // Try calling a generic function... if it exists...
     FnPtr = (ExFunc)(intptr_t)
-      sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_"+F->getNameStr());
+      sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" +
+                                                    F->getName().str());
   if (FnPtr != 0)
     ExportedFunctions->insert(std::make_pair(F, FnPtr));  // Cache for later
   return FnPtr;
@@ -296,14 +297,8 @@ GenericValue Interpreter::callExternalFunction(Function *F,
 //  Functions "exported" to the running application...
 //
 
-// Visual Studio warns about returning GenericValue in extern "C" linkage
-#ifdef _MSC_VER
-    #pragma warning(disable : 4190)
-#endif
-
-extern "C" {  // Don't add C++ manglings to llvm mangling :)
-
 // void atexit(Function*)
+static
 GenericValue lle_X_atexit(FunctionType *FT,
                           const std::vector<GenericValue> &Args) {
   assert(Args.size() == 1);
@@ -314,6 +309,7 @@ GenericValue lle_X_atexit(FunctionType *FT,
 }
 
 // void exit(int)
+static
 GenericValue lle_X_exit(FunctionType *FT,
                         const std::vector<GenericValue> &Args) {
   TheInterpreter->exitCalled(Args[0]);
@@ -321,6 +317,7 @@ GenericValue lle_X_exit(FunctionType *FT,
 }
 
 // void abort(void)
+static
 GenericValue lle_X_abort(FunctionType *FT,
                          const std::vector<GenericValue> &Args) {
   //FIXME: should we report or raise here?
@@ -331,6 +328,7 @@ GenericValue lle_X_abort(FunctionType *FT,
 
 // int sprintf(char *, const char *, ...) - a very rough implementation to make
 // output useful.
+static
 GenericValue lle_X_sprintf(FunctionType *FT,
                            const std::vector<GenericValue> &Args) {
   char *OutputBuffer = (char *)GVTOP(Args[0]);
@@ -408,11 +406,11 @@ GenericValue lle_X_sprintf(FunctionType *FT,
       break;
     }
   }
-  return GV;
 }
 
 // int printf(const char *, ...) - a very rough implementation to make output
 // useful.
+static
 GenericValue lle_X_printf(FunctionType *FT,
                           const std::vector<GenericValue> &Args) {
   char Buffer[10000];
@@ -425,6 +423,7 @@ GenericValue lle_X_printf(FunctionType *FT,
 }
 
 // int sscanf(const char *format, ...);
+static
 GenericValue lle_X_sscanf(FunctionType *FT,
                           const std::vector<GenericValue> &args) {
   assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!");
@@ -440,6 +439,7 @@ GenericValue lle_X_sscanf(FunctionType *FT,
 }
 
 // int scanf(const char *format, ...);
+static
 GenericValue lle_X_scanf(FunctionType *FT,
                          const std::vector<GenericValue> &args) {
   assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!");
@@ -456,6 +456,7 @@ GenericValue lle_X_scanf(FunctionType *FT,
 
 // int fprintf(FILE *, const char *, ...) - a very rough implementation to make
 // output useful.
+static
 GenericValue lle_X_fprintf(FunctionType *FT,
                            const std::vector<GenericValue> &Args) {
   assert(Args.size() >= 2);
@@ -469,14 +470,6 @@ GenericValue lle_X_fprintf(FunctionType *FT,
   return GV;
 }
 
-} // End extern "C"
-
-// Done with externals; turn the warning back on
-#ifdef _MSC_VER
-    #pragma warning(default: 4190)
-#endif
-
-
 void Interpreter::initializeExternalFunctions() {
   sys::ScopedLock Writer(*FunctionsLock);
   FuncNames["lle_X_atexit"]       = lle_X_atexit;
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index ee2b4596f38f..28c5775ab468 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -115,6 +115,12 @@ public:
   virtual GenericValue runFunction(Function *F,
                                    const std::vector<GenericValue> &ArgValues);
 
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) {
+    // FIXME: not implemented.
+    return 0;
+  }
+
   /// recompileAndRelinkFunction - For the interpreter, functions are always
   /// up-to-date.
   ///
@@ -165,7 +171,6 @@ public:
   void visitCallSite(CallSite CS);
   void visitCallInst(CallInst &I) { visitCallSite (CallSite (&I)); }
   void visitInvokeInst(InvokeInst &I) { visitCallSite (CallSite (&I)); }
-  void visitUnwindInst(UnwindInst &I);
   void visitUnreachableInst(UnreachableInst &I);
 
   void visitShl(BinaryOperator &I);
diff --git a/lib/ExecutionEngine/Interpreter/LLVMBuild.txt b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt
new file mode 100644
index 000000000000..327b320afe2b
--- /dev/null
+++ b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/ExecutionEngine/Interpreter/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Interpreter
+parent = ExecutionEngine
+required_libraries = CodeGen Core ExecutionEngine Support Target
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index 598e50e79460..52bb38970db9 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -2,19 +2,8 @@
 add_definitions(-DENABLE_X86_JIT)
 
 add_llvm_library(LLVMJIT
-  Intercept.cpp
   JIT.cpp
-  JITDebugRegisterer.cpp
   JITDwarfEmitter.cpp
   JITEmitter.cpp
   JITMemoryManager.cpp
-  OProfileJITEventListener.cpp
-  )
-
-add_llvm_library_dependencies(LLVMJIT
-  LLVMCore
-  LLVMExecutionEngine
-  LLVMRuntimeDyld
-  LLVMSupport
-  LLVMTarget
   )
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
deleted file mode 100644
index 2251a8e6b077..000000000000
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- Intercept.cpp - System function interception routines -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// If a function call occurs to an external function, the JIT is designed to use
-// the dynamic loader interface to find a function to call.  This is useful for
-// calling system calls and library functions that are not available in LLVM.
-// Some system calls, however, need to be handled specially.  For this reason,
-// we intercept some of them here and use our own stubs to handle them.
-//
-//===----------------------------------------------------------------------===//
-
-#include "JIT.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-using namespace llvm;
-
-// AtExitHandlers - List of functions to call when the program exits,
-// registered with the atexit() library function.
-static std::vector<void (*)()> AtExitHandlers;
-
-/// runAtExitHandlers - Run any functions registered by the program's
-/// calls to atexit(3), which we intercept and store in
-/// AtExitHandlers.
-///
-static void runAtExitHandlers() {
-  while (!AtExitHandlers.empty()) {
-    void (*Fn)() = AtExitHandlers.back();
-    AtExitHandlers.pop_back();
-    Fn();
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Function stubs that are invoked instead of certain library calls
-//===----------------------------------------------------------------------===//
-
-// Force the following functions to be linked in to anything that uses the
-// JIT. This is a hack designed to work around the all-too-clever Glibc
-// strategy of making these functions work differently when inlined vs. when
-// not inlined, and hiding their real definitions in a separate archive file
-// that the dynamic linker can't see. For more info, search for
-// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-#if defined(__linux__)
-#if defined(HAVE_SYS_STAT_H)
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-/* stat functions are redirecting to __xstat with a version number.  On x86-64
- * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
- * available as an exported symbol, so we have to add it explicitly.
- */
-namespace {
-class StatSymbols {
-public:
-  StatSymbols() {
-    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
-    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
-    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
-    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
-    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
-    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
-    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
-    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
-    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
-    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
-    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
-  }
-};
-}
-static StatSymbols initStatSymbols;
-#endif // __linux__
-
-// jit_exit - Used to intercept the "exit" library call.
-static void jit_exit(int Status) {
-  runAtExitHandlers();   // Run atexit handlers...
-  exit(Status);
-}
-
-// jit_atexit - Used to intercept the "atexit" library call.
-static int jit_atexit(void (*Fn)()) {
-  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
-  return 0;  // Always successful
-}
-
-static int jit_noop() {
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-//
-/// getPointerToNamedFunction - This method returns the address of the specified
-/// function by using the dynamic loader interface.  As such it is only useful
-/// for resolving library symbols, not code generated symbols.
-///
-void *JIT::getPointerToNamedFunction(const std::string &Name,
-                                     bool AbortOnFailure) {
-  if (!isSymbolSearchingDisabled()) {
-    // Check to see if this is one of the functions we want to intercept.  Note,
-    // we cast to intptr_t here to silence a -pedantic warning that complains
-    // about casting a function pointer to a normal pointer.
-    if (Name == "exit") return (void*)(intptr_t)&jit_exit;
-    if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
-
-    // We should not invoke parent's ctors/dtors from generated main()!
-    // On Mingw and Cygwin, the symbol __main is resolved to
-    // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
-    // (and register wrong callee's dtors with atexit(3)).
-    // We expect ExecutionEngine::runStaticConstructorsDestructors()
-    // is called before ExecutionEngine::runFunctionAsMain() is called.
-    if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
-    const char *NameStr = Name.c_str();
-    // If this is an asm specifier, skip the sentinal.
-    if (NameStr[0] == 1) ++NameStr;
-
-    // If it's an external function, look it up in the process image...
-    void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
-    if (Ptr) return Ptr;
-
-    // If it wasn't found and if it starts with an underscore ('_') character,
-    // and has an asm specifier, try again without the underscore.
-    if (Name[0] == 1 && NameStr[0] == '_') {
-      Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
-      if (Ptr) return Ptr;
-    }
-
-    // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
-    // are references to hidden visibility symbols that dlsym cannot resolve.
-    // If we have one of these, strip off $LDBLStub and try again.
-#if defined(__APPLE__) && defined(__ppc__)
-    if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
-        memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
-      // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
-      // This mirrors logic in libSystemStubs.a.
-      std::string Prefix = std::string(Name.begin(), Name.end()-9);
-      if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
-        return Ptr;
-      if (void *Ptr = getPointerToNamedFunction(Prefix, false))
-        return Ptr;
-    }
-#endif
-  }
-
-  /// If a LazyFunctionCreator is installed, use it to get/create the function.
-  if (LazyFunctionCreator)
-    if (void *RP = LazyFunctionCreator(Name))
-      return RP;
-
-  if (AbortOnFailure) {
-    report_fatal_error("Program used external function '"+Name+
-                      "' which could not be resolved!");
-  }
-  return 0;
-}
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index d773009065b5..a942299f3bbd 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetJITInfo.h"
@@ -206,7 +207,6 @@ void DarwinRegisterFrame(void* FrameBegin) {
 ExecutionEngine *JIT::createJIT(Module *M,
                                 std::string *ErrorStr,
                                 JITMemoryManager *JMM,
-                                CodeGenOpt::Level OptLevel,
                                 bool GVsWithCode,
                                 TargetMachine *TM) {
   // Try to register the program as a source of symbols to resolve against.
@@ -216,7 +216,7 @@ ExecutionEngine *JIT::createJIT(Module *M,
 
   // If the target supports JIT code generation, create the JIT.
   if (TargetJITInfo *TJ = TM->getJITInfo()) {
-    return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+    return new JIT(M, *TM, *TJ, JMM, GVsWithCode);
   } else {
     if (ErrorStr)
       *ErrorStr = "target does not support JIT code generation";
@@ -268,9 +268,10 @@ extern "C" {
 }
 
 JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
-         JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
-  : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
-    isAlreadyCodeGenerating(false) {
+         JITMemoryManager *jmm, bool GVsWithCode)
+  : ExecutionEngine(M), TM(tm), TJI(tji),
+    JMM(jmm ? jmm : JITMemoryManager::CreateDefaultMemManager()),
+    AllocateGVsWithCode(GVsWithCode), isAlreadyCodeGenerating(false) {
   setTargetData(TM.getTargetData());
 
   jitstate = new JITState(M);
@@ -288,7 +289,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
 
   // Turn the machine code intermediate representation into bytes in memory that
   // may be executed.
-  if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
+  if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
     report_fatal_error("Target does not support machine code emission!");
   }
 
@@ -323,6 +324,7 @@ JIT::~JIT() {
   AllJits->Remove(this);
   delete jitstate;
   delete JCE;
+  // JMM is a ownership of JCE, so we no need delete JMM here.
   delete &TM;
 }
 
@@ -341,7 +343,7 @@ void JIT::addModule(Module *M) {
 
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
-    if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+    if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
       report_fatal_error("Target does not support machine code emission!");
     }
 
@@ -372,7 +374,7 @@ bool JIT::removeModule(Module *M) {
 
     // Turn the machine code intermediate representation into bytes in memory
     // that may be executed.
-    if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+    if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
       report_fatal_error("Target does not support machine code emission!");
     }
 
@@ -476,7 +478,6 @@ GenericValue JIT::runFunction(Function *F,
     case Type::FP128TyID:
     case Type::PPC_FP128TyID:
       llvm_unreachable("long double not supported yet");
-      return rv;
     case Type::PointerTyID:
       return PTOGV(((void*(*)())(intptr_t)FPtr)());
     }
@@ -708,12 +709,32 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
   if (I != getBasicBlockAddressMap(locked).end()) {
     return I->second;
   } else {
-    assert(0 && "JIT does not have BB address for address-of-label, was"
-           " it eliminated by optimizer?");
-    return 0;
+    llvm_unreachable("JIT does not have BB address for address-of-label, was"
+                     " it eliminated by optimizer?");
   }
 }
 
+void *JIT::getPointerToNamedFunction(const std::string &Name,
+                                     bool AbortOnFailure){
+  if (!isSymbolSearchingDisabled()) {
+    void *ptr = JMM->getPointerToNamedFunction(Name, false);
+    if (ptr)
+      return ptr;
+  }
+
+  /// If a LazyFunctionCreator is installed, use it to get/create the function.
+  if (LazyFunctionCreator)
+    if (void *RP = LazyFunctionCreator(Name))
+      return RP;
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
+
+
 /// getOrEmitGlobalVariable - Return the address of the specified global
 /// variable, possibly emitting it to memory if needed.  This is used by the
 /// Emitter.
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 92dcb0e99586..2ae155bebf40 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -58,6 +58,7 @@ class JIT : public ExecutionEngine {
   TargetMachine &TM;       // The current target we are compiling to
   TargetJITInfo &TJI;      // The JITInfo for the target we are compiling to
   JITCodeEmitter *JCE;     // JCE object
+  JITMemoryManager *JMM;
   std::vector<JITEventListener*> EventListeners;
 
   /// AllocateGVsWithCode - Some applications require that global variables and
@@ -78,8 +79,7 @@ class JIT : public ExecutionEngine {
 
 
   JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
-      JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
-      bool AllocateGVsWithCode);
+      JITMemoryManager *JMM, bool AllocateGVsWithCode);
 public:
   ~JIT();
 
@@ -118,15 +118,15 @@ public:
                                    const std::vector<GenericValue> &ArgValues);
 
   /// getPointerToNamedFunction - This method returns the address of the
-  /// specified function by using the dlsym function call.  As such it is only
+  /// specified function by using the MemoryManager. As such it is only
   /// useful for resolving library symbols, not code generated symbols.
   ///
   /// If AbortOnFailure is false and no function with the given name is
   /// found, this function silently returns a null pointer. Otherwise,
   /// it prints a message to stderr and aborts.
   ///
-  void *getPointerToNamedFunction(const std::string &Name,
-                                  bool AbortOnFailure = true);
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true);
 
   // CompilationCallback - Invoked the first time that a call site is found,
   // which causes lazy compilation of the target function.
@@ -185,7 +185,6 @@ public:
   static ExecutionEngine *createJIT(Module *M,
                                     std::string *ErrorStr,
                                     JITMemoryManager *JMM,
-                                    CodeGenOpt::Level OptLevel,
                                     bool GVsWithCode,
                                     TargetMachine *TM);
 
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
deleted file mode 100644
index e71c20b89fda..000000000000
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ /dev/null
@@ -1,211 +0,0 @@
-//===-- JITDebugRegisterer.cpp - Register debug symbols for JIT -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a JITDebugRegisterer object that is used by the JIT to
-// register debug info with debuggers like GDB.
-//
-//===----------------------------------------------------------------------===//
-
-#include "JITDebugRegisterer.h"
-#include "../../CodeGen/ELF.h"
-#include "../../CodeGen/ELFWriter.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/MutexGuard.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Mutex.h"
-#include <string>
-
-namespace llvm {
-
-// This must be kept in sync with gdb/gdb/jit.h .
-extern "C" {
-
-  // Debuggers puts a breakpoint in this function.
-  LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { }
-
-  // We put information about the JITed function in this global, which the
-  // debugger reads.  Make sure to specify the version statically, because the
-  // debugger checks the version before we can set it during runtime.
-  struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
-
-}
-
-namespace {
-
-  /// JITDebugLock - Used to serialize all code registration events, since they
-  /// modify global variables.
-  sys::Mutex JITDebugLock;
-
-}
-
-JITDebugRegisterer::JITDebugRegisterer(TargetMachine &tm) : TM(tm), FnMap() { }
-
-JITDebugRegisterer::~JITDebugRegisterer() {
-  // Free all ELF memory.
-  for (RegisteredFunctionsMap::iterator I = FnMap.begin(), E = FnMap.end();
-       I != E; ++I) {
-    // Call the private method that doesn't update the map so our iterator
-    // doesn't break.
-    UnregisterFunctionInternal(I);
-  }
-  FnMap.clear();
-}
-
-std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
-  // Stack allocate an empty module with an empty LLVMContext for the ELFWriter
-  // API.  We don't use the real module because then the ELFWriter would write
-  // out unnecessary GlobalValues during finalization.
-  LLVMContext Context;
-  Module M("", Context);
-
-  // Make a buffer for the ELF in memory.
-  std::string Buffer;
-  raw_string_ostream O(Buffer);
-  ELFWriter EW(O, TM);
-  EW.doInitialization(M);
-
-  // Copy the binary into the .text section.  This isn't necessary, but it's
-  // useful to be able to disassemble the ELF by hand.
-  ELFSection &Text = EW.getTextSection(const_cast<Function *>(F));
-  Text.Addr = (uint64_t)I.FnStart;
-  // TODO: We could eliminate this copy if we somehow used a pointer/size pair
-  // instead of a vector.
-  Text.getData().assign(I.FnStart, I.FnEnd);
-
-  // Copy the exception handling call frame information into the .eh_frame
-  // section.  This allows GDB to get a good stack trace, particularly on
-  // linux x86_64.  Mark this as a PROGBITS section that needs to be loaded
-  // into memory at runtime.
-  ELFSection &EH = EW.getSection(".eh_frame", ELF::SHT_PROGBITS,
-                                 ELF::SHF_ALLOC);
-  // Pointers in the DWARF EH info are all relative to the EH frame start,
-  // which is stored here.
-  EH.Addr = (uint64_t)I.EhStart;
-  // TODO: We could eliminate this copy if we somehow used a pointer/size pair
-  // instead of a vector.
-  EH.getData().assign(I.EhStart, I.EhEnd);
-
-  // Add this single function to the symbol table, so the debugger prints the
-  // name instead of '???'.  We give the symbol default global visibility.
-  ELFSym *FnSym = ELFSym::getGV(F,
-                                ELF::STB_GLOBAL,
-                                ELF::STT_FUNC,
-                                ELF::STV_DEFAULT);
-  FnSym->SectionIdx = Text.SectionIdx;
-  FnSym->Size = I.FnEnd - I.FnStart;
-  FnSym->Value = 0;  // Offset from start of section.
-  EW.SymbolList.push_back(FnSym);
-
-  EW.doFinalization(M);
-  O.flush();
-
-  // When trying to debug why GDB isn't getting the debug info right, it's
-  // awfully helpful to write the object file to disk so that it can be
-  // inspected with readelf and objdump.
-  if (JITEmitDebugInfoToDisk) {
-    std::string Filename;
-    raw_string_ostream O2(Filename);
-    O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getNameStr() << ".o";
-    O2.flush();
-    std::string Errors;
-    raw_fd_ostream O3(Filename.c_str(), Errors);
-    O3 << Buffer;
-    O3.close();
-  }
-
-  return Buffer;
-}
-
-void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) {
-  // TODO: Support non-ELF platforms.
-  if (!TM.getELFWriterInfo())
-    return;
-
-  std::string Buffer = MakeELF(F, I);
-
-  jit_code_entry *JITCodeEntry = new jit_code_entry();
-  JITCodeEntry->symfile_addr = Buffer.c_str();
-  JITCodeEntry->symfile_size = Buffer.size();
-
-  // Add a mapping from F to the entry and buffer, so we can delete this
-  // info later.
-  FnMap[F] = std::make_pair(Buffer, JITCodeEntry);
-
-  // Acquire the lock and do the registration.
-  {
-    MutexGuard locked(JITDebugLock);
-    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
-
-    // Insert this entry at the head of the list.
-    JITCodeEntry->prev_entry = NULL;
-    jit_code_entry *NextEntry = __jit_debug_descriptor.first_entry;
-    JITCodeEntry->next_entry = NextEntry;
-    if (NextEntry != NULL) {
-      NextEntry->prev_entry = JITCodeEntry;
-    }
-    __jit_debug_descriptor.first_entry = JITCodeEntry;
-    __jit_debug_descriptor.relevant_entry = JITCodeEntry;
-    __jit_debug_register_code();
-  }
-}
-
-void JITDebugRegisterer::UnregisterFunctionInternal(
-    RegisteredFunctionsMap::iterator I) {
-  jit_code_entry *&JITCodeEntry = I->second.second;
-
-  // Acquire the lock and do the unregistration.
-  {
-    MutexGuard locked(JITDebugLock);
-    __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN;
-
-    // Remove the jit_code_entry from the linked list.
-    jit_code_entry *PrevEntry = JITCodeEntry->prev_entry;
-    jit_code_entry *NextEntry = JITCodeEntry->next_entry;
-    if (NextEntry) {
-      NextEntry->prev_entry = PrevEntry;
-    }
-    if (PrevEntry) {
-      PrevEntry->next_entry = NextEntry;
-    } else {
-      assert(__jit_debug_descriptor.first_entry == JITCodeEntry);
-      __jit_debug_descriptor.first_entry = NextEntry;
-    }
-
-    // Tell GDB which entry we removed, and unregister the code.
-    __jit_debug_descriptor.relevant_entry = JITCodeEntry;
-    __jit_debug_register_code();
-  }
-
-  delete JITCodeEntry;
-  JITCodeEntry = NULL;
-
-  // Free the ELF file in memory.
-  std::string &Buffer = I->second.first;
-  Buffer.clear();
-}
-
-void JITDebugRegisterer::UnregisterFunction(const Function *F) {
-  // TODO: Support non-ELF platforms.
-  if (!TM.getELFWriterInfo())
-    return;
-
-  RegisteredFunctionsMap::iterator I = FnMap.find(F);
-  if (I == FnMap.end()) return;
-  UnregisterFunctionInternal(I);
-  FnMap.erase(I);
-}
-
-} // end namespace llvm
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
deleted file mode 100644
index dce506bbfefd..000000000000
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
+++ /dev/null
@@ -1,116 +0,0 @@
-//===-- JITDebugRegisterer.h - Register debug symbols for JIT -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a JITDebugRegisterer object that is used by the JIT to
-// register debug info with debuggers like GDB.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
-#define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/DataTypes.h"
-#include <string>
-
-// This must be kept in sync with gdb/gdb/jit.h .
-extern "C" {
-
-  typedef enum {
-    JIT_NOACTION = 0,
-    JIT_REGISTER_FN,
-    JIT_UNREGISTER_FN
-  } jit_actions_t;
-
-  struct jit_code_entry {
-    struct jit_code_entry *next_entry;
-    struct jit_code_entry *prev_entry;
-    const char *symfile_addr;
-    uint64_t symfile_size;
-  };
-
-  struct jit_descriptor {
-    uint32_t version;
-    // This should be jit_actions_t, but we want to be specific about the
-    // bit-width.
-    uint32_t action_flag;
-    struct jit_code_entry *relevant_entry;
-    struct jit_code_entry *first_entry;
-  };
-
-}
-
-namespace llvm {
-
-class ELFSection;
-class Function;
-class TargetMachine;
-
-
-/// This class encapsulates information we want to send to the debugger.
-///
-struct DebugInfo {
-  uint8_t *FnStart;
-  uint8_t *FnEnd;
-  uint8_t *EhStart;
-  uint8_t *EhEnd;
-
-  DebugInfo() : FnStart(0), FnEnd(0), EhStart(0), EhEnd(0) {}
-};
-
-typedef DenseMap< const Function*, std::pair<std::string, jit_code_entry*> >
-  RegisteredFunctionsMap;
-
-/// This class registers debug info for JITed code with an attached debugger.
-/// Without proper debug info, GDB can't do things like source level debugging
-/// or even produce a proper stack trace on linux-x86_64.  To use this class,
-/// whenever a function is JITed, create a DebugInfo struct and pass it to the
-/// RegisterFunction method.  The method will then do whatever is necessary to
-/// inform the debugger about the JITed function.
-class JITDebugRegisterer {
-
-  TargetMachine &TM;
-
-  /// FnMap - A map of functions that have been registered to the associated
-  /// temporary files.  Used for cleanup.
-  RegisteredFunctionsMap FnMap;
-
-  /// MakeELF - Builds the ELF file in memory and returns a std::string that
-  /// contains the ELF.
-  std::string MakeELF(const Function *F, DebugInfo &I);
-
-public:
-  JITDebugRegisterer(TargetMachine &tm);
-
-  /// ~JITDebugRegisterer - Unregisters all code and frees symbol files.
-  ///
-  ~JITDebugRegisterer();
-
-  /// RegisterFunction - Register debug info for the given function with an
-  /// attached debugger.  Clients must call UnregisterFunction on all
-  /// registered functions before deleting them to free the associated symbol
-  /// file and unregister it from the debugger.
-  void RegisterFunction(const Function *F, DebugInfo &I);
-
-  /// UnregisterFunction - Unregister the debug info for the given function
-  /// from the debugger and free associated memory.
-  void UnregisterFunction(const Function *F);
-
-private:
-  /// UnregisterFunctionInternal - Unregister the debug info for the given
-  /// function from the debugger and delete any temporary files.  The private
-  /// version of this method does not remove the function from FnMap so that it
-  /// can be called while iterating over FnMap.
-  void UnregisterFunctionInternal(RegisteredFunctionsMap::iterator I);
-
-};
-
-} // end namespace llvm
-
-#endif // LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 8f84ac7b4126..42a136e72d45 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -313,7 +313,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
     for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
           MI != E; ++MI) {
       if (!MI->isLabel()) {
-        MayThrow |= MI->getDesc().isCall();
+        MayThrow |= MI->isCall();
         continue;
       }
 
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 24020ee6d689..504c8bdffd1b 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -14,7 +14,6 @@
 
 #define DEBUG_TYPE "jit"
 #include "JIT.h"
-#include "JITDebugRegisterer.h"
 #include "JITDwarfEmitter.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Constants.h"
@@ -77,8 +76,8 @@ namespace {
   struct NoRAUWValueMapConfig : public ValueMapConfig<ValueTy> {
     typedef JITResolverState *ExtraData;
     static void onRAUW(JITResolverState *, Value *Old, Value *New) {
-      assert(false && "The JIT doesn't know how to handle a"
-             " RAUW on a value it has emitted.");
+      llvm_unreachable("The JIT doesn't know how to handle a"
+                       " RAUW on a value it has emitted.");
     }
   };
 
@@ -324,9 +323,6 @@ namespace {
     /// DE - The dwarf emitter for the jit.
     OwningPtr<JITDwarfEmitter> DE;
 
-    /// DR - The debug registerer for the jit.
-    OwningPtr<JITDebugRegisterer> DR;
-
     /// LabelLocations - This vector is a mapping from Label ID's to their
     /// address.
     DenseMap<MCSymbol*, uintptr_t> LabelLocations;
@@ -362,22 +358,22 @@ namespace {
     /// Instance of the JIT
     JIT *TheJIT;
 
+    bool JITExceptionHandling;
+
   public:
     JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
       : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
-        EmittedFunctions(this), TheJIT(&jit) {
+        EmittedFunctions(this), TheJIT(&jit),
+        JITExceptionHandling(TM.Options.JITExceptionHandling) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
         DEBUG(dbgs() << "JIT is managing a GOT\n");
       }
 
-      if (JITExceptionHandling || JITEmitDebugInfo) {
+      if (JITExceptionHandling) {
         DE.reset(new JITDwarfEmitter(jit));
       }
-      if (JITEmitDebugInfo) {
-        DR.reset(new JITDebugRegisterer(TM));
-      }
     }
     ~JITEmitter() {
       delete MemMgr;
@@ -968,7 +964,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
       }
     });
 
-  if (JITExceptionHandling || JITEmitDebugInfo) {
+  if (JITExceptionHandling) {
     uintptr_t ActualSize = 0;
     SavedBufferBegin = BufferBegin;
     SavedBufferEnd = BufferEnd;
@@ -983,7 +979,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
                                                 EhStart);
     MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
                               FrameRegister);
-    uint8_t *EhEnd = CurBufferPtr;
     BufferBegin = SavedBufferBegin;
     BufferEnd = SavedBufferEnd;
     CurBufferPtr = SavedCurBufferPtr;
@@ -991,15 +986,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
     if (JITExceptionHandling) {
       TheJIT->RegisterTable(F.getFunction(), FrameRegister);
     }
-
-    if (JITEmitDebugInfo) {
-      DebugInfo I;
-      I.FnStart = FnStart;
-      I.FnEnd = FnEnd;
-      I.EhStart = EhStart;
-      I.EhEnd = EhEnd;
-      DR->RegisterFunction(F.getFunction(), I);
-    }
   }
 
   if (MMI)
@@ -1037,17 +1023,13 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
     EmittedFunctions.erase(Emitted);
   }
 
-  if(JITExceptionHandling) {
+  if (JITExceptionHandling) {
     TheJIT->DeregisterTable(F);
   }
-
-  if (JITEmitDebugInfo) {
-    DR->UnregisterFunction(F);
-  }
 }
 
 
-void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
+void *JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
   if (BufferBegin)
     return JITCodeEmitter::allocateSpace(Size, Alignment);
 
@@ -1059,7 +1041,7 @@ void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
   return CurBufferPtr;
 }
 
-void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
+void *JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
   // Delegate this call through the memory manager.
   return MemMgr->allocateGlobal(Size, Alignment);
 }
@@ -1179,6 +1161,9 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
     }
     break;
   }
+  case MachineJumpTableInfo::EK_GPRel64BlockAddress:
+    llvm_unreachable(
+           "JT Info emission not implemented for GPRel64BlockAddress yet.");
   }
 }
 
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index eec23cec0af9..2d1775c05c10 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -23,10 +23,22 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
 #include <vector>
 #include <cassert>
 #include <climits>
 #include <cstring>
+
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
 using namespace llvm;
 
 STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT");
@@ -314,6 +326,11 @@ namespace {
     /// should allocate a separate slab.
     static const size_t DefaultSizeThreshold;
 
+    /// getPointerToNamedFunction - This method returns the address of the
+    /// specified function by using the dlsym function call.
+    virtual void *getPointerToNamedFunction(const std::string &Name,
+                                            bool AbortOnFailure = true);
+
     void AllocateGOT();
 
     // Testing methods.
@@ -441,6 +458,50 @@ namespace {
       return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
     }
 
+    /// allocateCodeSection - Allocate memory for a code section.
+    uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                 unsigned SectionID) {
+      // FIXME: Alignement handling.
+      FreeRangeHeader* candidateBlock = FreeMemoryList;
+      FreeRangeHeader* head = FreeMemoryList;
+      FreeRangeHeader* iter = head->Next;
+
+      uintptr_t largest = candidateBlock->BlockSize;
+
+      // Search for the largest free block.
+      while (iter != head) {
+        if (iter->BlockSize > largest) {
+          largest = iter->BlockSize;
+          candidateBlock = iter;
+        }
+        iter = iter->Next;
+      }
+
+      largest = largest - sizeof(MemoryRangeHeader);
+
+      // If this block isn't big enough for the allocation desired, allocate
+      // another block of memory and add it to the free list.
+      if (largest < Size || largest <= FreeRangeHeader::getMinBlockSize()) {
+        DEBUG(dbgs() << "JIT: Allocating another slab of memory for function.");
+        candidateBlock = allocateNewCodeSlab((size_t)Size);
+      }
+
+      // Select this candidate block for allocation
+      CurBlock = candidateBlock;
+
+      // Allocate the entire memory block.
+      FreeMemoryList = candidateBlock->AllocateBlock();
+      // Release the memory at the end of this block that isn't needed.
+      FreeMemoryList = CurBlock->TrimAllocationToSize(FreeMemoryList, Size);
+      return (uint8_t *)(CurBlock + 1);
+    }
+
+    /// allocateDataSection - Allocate memory for a data section.
+    uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                 unsigned SectionID) {
+      return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+    }
+
     /// startExceptionTable - Use startFunctionBody to allocate memory for the
     /// function's exception table.
     uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
@@ -713,6 +774,139 @@ bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
   return true;
 }
 
+//===----------------------------------------------------------------------===//
+// getPointerToNamedFunction() implementation.
+//===----------------------------------------------------------------------===//
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+  while (!AtExitHandlers.empty()) {
+    void (*Fn)() = AtExitHandlers.back();
+    AtExitHandlers.pop_back();
+    Fn();
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+/* stat functions are redirecting to __xstat with a version number.  On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+  StatSymbols() {
+    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+  }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+  runAtExitHandlers();   // Run atexit handlers...
+  exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
+  return 0;  // Always successful
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface.  As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
+                                     bool AbortOnFailure) {
+  // Check to see if this is one of the functions we want to intercept.  Note,
+  // we cast to intptr_t here to silence a -pedantic warning that complains
+  // about casting a function pointer to a normal pointer.
+  if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+  if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+  // We should not invoke parent's ctors/dtors from generated main()!
+  // On Mingw and Cygwin, the symbol __main is resolved to
+  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+  // (and register wrong callee's dtors with atexit(3)).
+  // We expect ExecutionEngine::runStaticConstructorsDestructors()
+  // is called before ExecutionEngine::runFunctionAsMain() is called.
+  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+  const char *NameStr = Name.c_str();
+  // If this is an asm specifier, skip the sentinal.
+  if (NameStr[0] == 1) ++NameStr;
+
+  // If it's an external function, look it up in the process image...
+  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+  if (Ptr) return Ptr;
+
+  // If it wasn't found and if it starts with an underscore ('_') character,
+  // try again without the underscore.
+  if (NameStr[0] == '_') {
+    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+    if (Ptr) return Ptr;
+  }
+
+  // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
+  // are references to hidden visibility symbols that dlsym cannot resolve.
+  // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+  if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+      memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+    // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+    // This mirrors logic in libSystemStubs.a.
+    std::string Prefix = std::string(Name.begin(), Name.end()-9);
+    if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+      return Ptr;
+    if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+      return Ptr;
+  }
+#endif
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
+
+
+
 JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
   return new DefaultJITMemoryManager();
 }
diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt
new file mode 100644
index 000000000000..ca2a56537aab
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/ExecutionEngine/JIT/LLVMBuild.txt ------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = JIT
+parent = ExecutionEngine
+required_libraries = CodeGen Core ExecutionEngine MC RuntimeDyld Support Target
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
deleted file mode 100644
index 9a9ed6d33484..000000000000
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ /dev/null
@@ -1,192 +0,0 @@
-//===-- OProfileJITEventListener.cpp - Tell OProfile about JITted code ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a JITEventListener object that calls into OProfile to tell
-// it about JITted functions.  For now, we only record function names and sizes,
-// but eventually we'll also record line number information.
-//
-// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the
-// definition of the interface we're using.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "oprofile-jit-event-listener"
-#include "llvm/Function.h"
-#include "llvm/Metadata.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Errno.h"
-#include "llvm/Config/config.h"
-#include <stddef.h>
-using namespace llvm;
-
-#if USE_OPROFILE
-
-#include <opagent.h>
-
-namespace {
-
-class OProfileJITEventListener : public JITEventListener {
-  op_agent_t Agent;
-public:
-  OProfileJITEventListener();
-  ~OProfileJITEventListener();
-
-  virtual void NotifyFunctionEmitted(const Function &F,
-                                     void *FnStart, size_t FnSize,
-                                     const EmittedFunctionDetails &Details);
-  virtual void NotifyFreeingMachineCode(void *OldPtr);
-};
-
-OProfileJITEventListener::OProfileJITEventListener()
-    : Agent(op_open_agent()) {
-  if (Agent == NULL) {
-    const std::string err_str = sys::StrError();
-    DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n");
-  } else {
-    DEBUG(dbgs() << "Connected to OProfile agent.\n");
-  }
-}
-
-OProfileJITEventListener::~OProfileJITEventListener() {
-  if (Agent != NULL) {
-    if (op_close_agent(Agent) == -1) {
-      const std::string err_str = sys::StrError();
-      DEBUG(dbgs() << "Failed to disconnect from OProfile agent: "
-                   << err_str << "\n");
-    } else {
-      DEBUG(dbgs() << "Disconnected from OProfile agent.\n");
-    }
-  }
-}
-
-class FilenameCache {
-  // Holds the filename of each Scope, so that we can pass a null-terminated
-  // string into oprofile.  Use an AssertingVH rather than a ValueMap because we
-  // shouldn't be modifying any MDNodes while this map is alive.
-  DenseMap<AssertingVH<MDNode>, std::string> Filenames;
-
- public:
-  const char *getFilename(MDNode *Scope) {
-    std::string &Filename = Filenames[Scope];
-    if (Filename.empty()) {
-      Filename = DIScope(Scope).getFilename();
-    }
-    return Filename.c_str();
-  }
-};
-
-static debug_line_info LineStartToOProfileFormat(
-    const MachineFunction &MF, FilenameCache &Filenames,
-    uintptr_t Address, DebugLoc Loc) {
-  debug_line_info Result;
-  Result.vma = Address;
-  Result.lineno = Loc.getLine();
-  Result.filename = Filenames.getFilename(
-    Loc.getScope(MF.getFunction()->getContext()));
-  DEBUG(dbgs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to "
-               << Result.filename << ":" << Result.lineno << "\n");
-  return Result;
-}
-
-// Adds the just-emitted function to the symbol table.
-void OProfileJITEventListener::NotifyFunctionEmitted(
-    const Function &F, void *FnStart, size_t FnSize,
-    const EmittedFunctionDetails &Details) {
-  assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
-  if (op_write_native_code(Agent, F.getName().data(),
-                           reinterpret_cast<uint64_t>(FnStart),
-                           FnStart, FnSize) == -1) {
-    DEBUG(dbgs() << "Failed to tell OProfile about native function "
-          << F.getName() << " at ["
-          << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
-    return;
-  }
-
-  if (!Details.LineStarts.empty()) {
-    // Now we convert the line number information from the address/DebugLoc
-    // format in Details to the address/filename/lineno format that OProfile
-    // expects.  Note that OProfile 0.9.4 has a bug that causes it to ignore
-    // line numbers for addresses above 4G.
-    FilenameCache Filenames;
-    std::vector<debug_line_info> LineInfo;
-    LineInfo.reserve(1 + Details.LineStarts.size());
-
-    DebugLoc FirstLoc = Details.LineStarts[0].Loc;
-    assert(!FirstLoc.isUnknown()
-           && "LineStarts should not contain unknown DebugLocs");
-    MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
-    DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
-    if (FunctionDI.Verify()) {
-      // If we have debug info for the function itself, use that as the line
-      // number of the first several instructions.  Otherwise, after filling
-      // LineInfo, we'll adjust the address of the first line number to point at
-      // the start of the function.
-      debug_line_info line_info;
-      line_info.vma = reinterpret_cast<uintptr_t>(FnStart);
-      line_info.lineno = FunctionDI.getLineNumber();
-      line_info.filename = Filenames.getFilename(FirstLocScope);
-      LineInfo.push_back(line_info);
-    }
-
-    for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
-           I = Details.LineStarts.begin(), E = Details.LineStarts.end();
-         I != E; ++I) {
-      LineInfo.push_back(LineStartToOProfileFormat(
-                           *Details.MF, Filenames, I->Address, I->Loc));
-    }
-
-    // In case the function didn't have line info of its own, adjust the first
-    // line info's address to include the start of the function.
-    LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
-
-    if (op_write_debug_line_info(Agent, FnStart,
-                                 LineInfo.size(), &*LineInfo.begin()) == -1) {
-      DEBUG(dbgs()
-            << "Failed to tell OProfile about line numbers for native function "
-            << F.getName() << " at ["
-            << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
-    }
-  }
-}
-
-// Removes the being-deleted function from the symbol table.
-void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
-  assert(FnStart && "Invalid function pointer");
-  if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) {
-    DEBUG(dbgs()
-          << "Failed to tell OProfile about unload of native function at "
-          << FnStart << "\n");
-  }
-}
-
-}  // anonymous namespace.
-
-namespace llvm {
-JITEventListener *createOProfileJITEventListener() {
-  return new OProfileJITEventListener;
-}
-}
-
-#else  // USE_OPROFILE
-
-namespace llvm {
-// By defining this to return NULL, we can let clients call it unconditionally,
-// even if they haven't configured with the OProfile libraries.
-JITEventListener *createOProfileJITEventListener() {
-  return NULL;
-}
-}  // namespace llvm
-
-#endif  // USE_OPROFILE
diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt
new file mode 100644
index 000000000000..1f94a4fb9ecd
--- /dev/null
+++ b/lib/ExecutionEngine/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/ExecutionEngine/LLVMBuild.txt ----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = Interpreter JIT MCJIT RuntimeDyld IntelJITEvents OProfileJIT
+
+[component_0]
+type = Library
+name = ExecutionEngine
+parent = Libraries
+required_libraries = Core MC Support Target
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
index aae8a1b2c521..fef71768b493 100644
--- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -1,12 +1,4 @@
 add_llvm_library(LLVMMCJIT
   MCJIT.cpp
-  Intercept.cpp
-  )
-
-add_llvm_library_dependencies(LLVMMCJIT
-  LLVMCore
-  LLVMExecutionEngine
-  LLVMRuntimeDyld
-  LLVMSupport
-  LLVMTarget
+  MCJITMemoryManager.cpp
   )
diff --git a/lib/ExecutionEngine/MCJIT/Intercept.cpp b/lib/ExecutionEngine/MCJIT/Intercept.cpp
deleted file mode 100644
index f83f4282e016..000000000000
--- a/lib/ExecutionEngine/MCJIT/Intercept.cpp
+++ /dev/null
@@ -1,162 +0,0 @@
-//===-- Intercept.cpp - System function interception routines -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// If a function call occurs to an external function, the JIT is designed to use
-// the dynamic loader interface to find a function to call.  This is useful for
-// calling system calls and library functions that are not available in LLVM.
-// Some system calls, however, need to be handled specially.  For this reason,
-// we intercept some of them here and use our own stubs to handle them.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCJIT.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-using namespace llvm;
-
-// AtExitHandlers - List of functions to call when the program exits,
-// registered with the atexit() library function.
-static std::vector<void (*)()> AtExitHandlers;
-
-/// runAtExitHandlers - Run any functions registered by the program's
-/// calls to atexit(3), which we intercept and store in
-/// AtExitHandlers.
-///
-static void runAtExitHandlers() {
-  while (!AtExitHandlers.empty()) {
-    void (*Fn)() = AtExitHandlers.back();
-    AtExitHandlers.pop_back();
-    Fn();
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Function stubs that are invoked instead of certain library calls
-//===----------------------------------------------------------------------===//
-
-// Force the following functions to be linked in to anything that uses the
-// JIT. This is a hack designed to work around the all-too-clever Glibc
-// strategy of making these functions work differently when inlined vs. when
-// not inlined, and hiding their real definitions in a separate archive file
-// that the dynamic linker can't see. For more info, search for
-// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-#if defined(__linux__)
-#if defined(HAVE_SYS_STAT_H)
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-/* stat functions are redirecting to __xstat with a version number.  On x86-64
- * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
- * available as an exported symbol, so we have to add it explicitly.
- */
-namespace {
-class StatSymbols {
-public:
-  StatSymbols() {
-    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
-    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
-    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
-    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
-    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
-    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
-    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
-    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
-    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
-    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
-    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
-  }
-};
-}
-static StatSymbols initStatSymbols;
-#endif // __linux__
-
-// jit_exit - Used to intercept the "exit" library call.
-static void jit_exit(int Status) {
-  runAtExitHandlers();   // Run atexit handlers...
-  exit(Status);
-}
-
-// jit_atexit - Used to intercept the "atexit" library call.
-static int jit_atexit(void (*Fn)()) {
-  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
-  return 0;  // Always successful
-}
-
-static int jit_noop() {
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-//
-/// getPointerToNamedFunction - This method returns the address of the specified
-/// function by using the dynamic loader interface.  As such it is only useful
-/// for resolving library symbols, not code generated symbols.
-///
-void *MCJIT::getPointerToNamedFunction(const std::string &Name,
-                                       bool AbortOnFailure) {
-  if (!isSymbolSearchingDisabled()) {
-    // Check to see if this is one of the functions we want to intercept.  Note,
-    // we cast to intptr_t here to silence a -pedantic warning that complains
-    // about casting a function pointer to a normal pointer.
-    if (Name == "exit") return (void*)(intptr_t)&jit_exit;
-    if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
-
-    // We should not invoke parent's ctors/dtors from generated main()!
-    // On Mingw and Cygwin, the symbol __main is resolved to
-    // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
-    // (and register wrong callee's dtors with atexit(3)).
-    // We expect ExecutionEngine::runStaticConstructorsDestructors()
-    // is called before ExecutionEngine::runFunctionAsMain() is called.
-    if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
-    const char *NameStr = Name.c_str();
-    // If this is an asm specifier, skip the sentinal.
-    if (NameStr[0] == 1) ++NameStr;
-
-    // If it's an external function, look it up in the process image...
-    void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
-    if (Ptr) return Ptr;
-
-    // If it wasn't found and if it starts with an underscore ('_') character,
-    // and has an asm specifier, try again without the underscore.
-    if (Name[0] == 1 && NameStr[0] == '_') {
-      Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
-      if (Ptr) return Ptr;
-    }
-
-    // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
-    // are references to hidden visibility symbols that dlsym cannot resolve.
-    // If we have one of these, strip off $LDBLStub and try again.
-#if defined(__APPLE__) && defined(__ppc__)
-    if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
-        memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
-      // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
-      // This mirrors logic in libSystemStubs.a.
-      std::string Prefix = std::string(Name.begin(), Name.end()-9);
-      if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
-        return Ptr;
-      if (void *Ptr = getPointerToNamedFunction(Prefix, false))
-        return Ptr;
-    }
-#endif
-  }
-
-  /// If a LazyFunctionCreator is installed, use it to get/create the function.
-  if (LazyFunctionCreator)
-    if (void *RP = LazyFunctionCreator(Name))
-      return RP;
-
-  if (AbortOnFailure) {
-    report_fatal_error("Program used external function '"+Name+
-                      "' which could not be resolved!");
-  }
-  return 0;
-}
diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
new file mode 100644
index 000000000000..90f4d2f75e24
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/ExecutionEngine/MCJIT/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MCJIT
+parent = ExecutionEngine
+required_libraries = Core ExecutionEngine RuntimeDyld Support Target
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 7c8a740dc862..44f89cf78309 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -36,7 +36,6 @@ extern "C" void LLVMLinkInMCJIT() {
 ExecutionEngine *MCJIT::createJIT(Module *M,
                                   std::string *ErrorStr,
                                   JITMemoryManager *JMM,
-                                  CodeGenOpt::Level OptLevel,
                                   bool GVsWithCode,
                                   TargetMachine *TM) {
   // Try to register the program as a source of symbols to resolve against.
@@ -46,8 +45,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
 
   // If the target supports JIT code generation, create the JIT.
   if (TargetJITInfo *TJ = TM->getJITInfo())
-    return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), OptLevel,
-                     GVsWithCode);
+    return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM, M), GVsWithCode);
 
   if (ErrorStr)
     *ErrorStr = "target does not support JIT code generation";
@@ -55,8 +53,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
 }
 
 MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
-             RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel,
-             bool AllocateGVsWithCode)
+             RTDyldMemoryManager *MM, bool AllocateGVsWithCode)
   : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) {
 
   setTargetData(TM->getTargetData());
@@ -64,7 +61,7 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
 
   // Turn the machine code intermediate representation into bytes in memory
   // that may be executed.
-  if (TM->addPassesToEmitMC(PM, Ctx, OS, CodeGenOpt::Default, false)) {
+  if (TM->addPassesToEmitMC(PM, Ctx, OS, false)) {
     report_fatal_error("Target does not support MC emission!");
   }
 
@@ -77,9 +74,9 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
   OS.flush();
 
   // Load the object into the dynamic linker.
-  // FIXME: It would be nice to avoid making yet another copy.
-  MemoryBuffer *MB = MemoryBuffer::getMemBufferCopy(StringRef(Buffer.data(),
-                                                              Buffer.size()));
+  MemoryBuffer *MB = MemoryBuffer::getMemBuffer(StringRef(Buffer.data(),
+                                                          Buffer.size()),
+                                                "", false);
   if (Dyld.loadObject(MB))
     report_fatal_error(Dyld.getErrorString());
   // Resolve any relocations.
@@ -88,11 +85,11 @@ MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji,
 
 MCJIT::~MCJIT() {
   delete MemMgr;
+  delete TM;
 }
 
 void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
   report_fatal_error("not yet implemented");
-  return 0;
 }
 
 void *MCJIT::getPointerToFunction(Function *F) {
@@ -211,12 +208,30 @@ GenericValue MCJIT::runFunction(Function *F,
     case Type::FP128TyID:
     case Type::PPC_FP128TyID:
       llvm_unreachable("long double not supported yet");
-      return rv;
     case Type::PointerTyID:
       return PTOGV(((void*(*)())(intptr_t)FPtr)());
     }
   }
 
-  assert(0 && "Full-featured argument passing not supported yet!");
-  return GenericValue();
+  llvm_unreachable("Full-featured argument passing not supported yet!");
+}
+
+void *MCJIT::getPointerToNamedFunction(const std::string &Name,
+                                       bool AbortOnFailure){
+  if (!isSymbolSearchingDisabled() && MemMgr) {
+    void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
+    if (ptr)
+      return ptr;
+  }
+
+  /// If a LazyFunctionCreator is installed, use it to get/create the function.
+  if (LazyFunctionCreator)
+    if (void *RP = LazyFunctionCreator(Name))
+      return RP;
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
 }
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index b64c21a97360..2b3df9884eb2 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -24,8 +24,7 @@ namespace llvm {
 
 class MCJIT : public ExecutionEngine {
   MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji,
-        RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel,
-        bool AllocateGVsWithCode);
+        RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode);
 
   TargetMachine *TM;
   MCContext *Ctx;
@@ -66,8 +65,17 @@ public:
   /// found, this function silently returns a null pointer. Otherwise,
   /// it prints a message to stderr and aborts.
   ///
-  void *getPointerToNamedFunction(const std::string &Name,
-                                  bool AbortOnFailure = true);
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true);
+
+  /// mapSectionAddress - map a section to its target address space value.
+  /// Map the address of a JIT section as returned from the memory manager
+  /// to the address in the target process as the running code will see it.
+  /// This is the address which will be used for relocation resolution.
+  virtual void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress) {
+    Dyld.mapSectionAddress(LocalAddress, TargetAddress);
+  }
+
   /// @}
   /// @name (Private) Registration Interfaces
   /// @{
@@ -79,7 +87,6 @@ public:
   static ExecutionEngine *createJIT(Module *M,
                                     std::string *ErrorStr,
                                     JITMemoryManager *JMM,
-                                    CodeGenOpt::Level OptLevel,
                                     bool GVsWithCode,
                                     TargetMachine *TM);
 
diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp
new file mode 100644
index 000000000000..457fe5e3ef06
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.cpp
@@ -0,0 +1,14 @@
+//==-- MCJITMemoryManager.cpp - Definition for the Memory Manager -*-C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJITMemoryManager.h"
+
+using namespace llvm;
+
+void MCJITMemoryManager::anchor() { }
diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
index 40bc031a0771..a68949aa41c8 100644
--- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
+++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h
@@ -21,45 +21,30 @@ namespace llvm {
 // and the RuntimeDyld interface that maps objects, by name, onto their
 // matching LLVM IR counterparts in the module(s) being compiled.
 class MCJITMemoryManager : public RTDyldMemoryManager {
+  virtual void anchor();
   JITMemoryManager *JMM;
 
   // FIXME: Multiple modules.
   Module *M;
 public:
-  MCJITMemoryManager(JITMemoryManager *jmm, Module *m) : JMM(jmm), M(m) {}
+  MCJITMemoryManager(JITMemoryManager *jmm, Module *m) :
+    JMM(jmm?jmm:JITMemoryManager::CreateDefaultMemManager()), M(m) {}
+  // We own the JMM, so make sure to delete it.
+  ~MCJITMemoryManager() { delete JMM; }
+
+  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID) {
+    return JMM->allocateSpace(Size, Alignment);
+  }
 
-  // Allocate ActualSize bytes, or more, for the named function. Return
-  // a pointer to the allocated memory and update Size to reflect how much
-  // memory was acutally allocated.
-  uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) {
-    // FIXME: This should really reference the MCAsmInfo to get the global
-    //        prefix.
-    if (Name[0] == '_') ++Name;
-    Function *F = M->getFunction(Name);
-    // Some ObjC names have a prefixed \01 in the IR. If we failed to find
-    // the symbol and it's of the ObjC conventions (starts with "-"), try
-    // prepending a \01 and see if we can find it that way.
-    if (!F && Name[0] == '-')
-      F = M->getFunction((Twine("\1") + Name).str());
-    assert(F && "No matching function in JIT IR Module!");
-    return JMM->startFunctionBody(F, Size);
+  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID) {
+    return JMM->allocateSpace(Size, Alignment);
   }
 
-  // Mark the end of the function, including how much of the allocated
-  // memory was actually used.
-  void endFunctionBody(const char *Name, uint8_t *FunctionStart,
-                       uint8_t *FunctionEnd) {
-    // FIXME: This should really reference the MCAsmInfo to get the global
-    //        prefix.
-    if (Name[0] == '_') ++Name;
-    Function *F = M->getFunction(Name);
-    // Some ObjC names have a prefixed \01 in the IR. If we failed to find
-    // the symbol and it's of the ObjC conventions (starts with "-"), try
-    // prepending a \01 and see if we can find it that way.
-    if (!F && Name[0] == '-')
-      F = M->getFunction((Twine("\1") + Name).str());
-    assert(F && "No matching function in JIT IR Module!");
-    JMM->endFunctionBody(F, FunctionStart, FunctionEnd);
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) {
+    return JMM->getPointerToNamedFunction(Name, AbortOnFailure);
   }
 
 };
diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile
index 9a649a52cf9e..c26e0ada5bc1 100644
--- a/lib/ExecutionEngine/Makefile
+++ b/lib/ExecutionEngine/Makefile
@@ -8,6 +8,17 @@
 ##===----------------------------------------------------------------------===##
 LEVEL = ../..
 LIBRARYNAME = LLVMExecutionEngine
+
+include $(LEVEL)/Makefile.config
+
 PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld
 
-include $(LEVEL)/Makefile.common
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+PARALLEL_DIRS += IntelJITEvents
+endif
+
+ifeq ($(USE_OPROFILE), 1)
+PARALLEL_DIRS += OProfileJIT
+endif
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt b/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
new file mode 100644
index 000000000000..d585136eb0ac
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+include_directories( ${LLVM_OPROFILE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMOProfileJIT
+  OProfileJITEventListener.cpp
+  OProfileWrapper.cpp
+  )
diff --git a/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
new file mode 100644
index 000000000000..4516dfa2dab2
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/ExecutionEngine/OProfileJIT/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+
+[component_0]
+type = Library
+name = OProfileJIT
+parent = ExecutionEngine
diff --git a/lib/ExecutionEngine/OProfileJIT/Makefile b/lib/ExecutionEngine/OProfileJIT/Makefile
new file mode 100644
index 000000000000..fd3adce26c1f
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/Makefile
@@ -0,0 +1,18 @@
+##===- lib/ExecutionEngine/OProfileJIT/Makefile ------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMOProfileJIT
+
+include $(LEVEL)/Makefile.config
+
+SOURCES += OProfileJITEventListener.cpp \
+  OProfileWrapper.cpp
+CPPFLAGS += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
new file mode 100644
index 000000000000..e6142e3678da
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -0,0 +1,177 @@
+//===-- OProfileJITEventListener.cpp - Tell OProfile about JITted code ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that uses OProfileWrapper to tell
+// oprofile about JITted functions, including source line information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
+#define DEBUG_TYPE "oprofile-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Errno.h"
+#include "EventListenerCommon.h"
+
+#include <dirent.h>
+#include <fcntl.h>
+
+using namespace llvm;
+using namespace llvm::jitprofiling;
+
+namespace {
+
+class OProfileJITEventListener : public JITEventListener {
+  OProfileWrapper& Wrapper;
+
+  void initialize();
+
+public:
+  OProfileJITEventListener(OProfileWrapper& LibraryWrapper)
+  : Wrapper(LibraryWrapper) {
+    initialize();
+  }
+
+  ~OProfileJITEventListener();
+
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                void *FnStart, size_t FnSize,
+                                const JITEvent_EmittedFunctionDetails &Details);
+
+  virtual void NotifyFreeingMachineCode(void *OldPtr);
+};
+
+void OProfileJITEventListener::initialize() {
+  if (!Wrapper.op_open_agent()) {
+    const std::string err_str = sys::StrError();
+    DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n");
+  } else {
+    DEBUG(dbgs() << "Connected to OProfile agent.\n");
+  }
+}
+
+OProfileJITEventListener::~OProfileJITEventListener() {
+  if (Wrapper.isAgentAvailable()) {
+    if (Wrapper.op_close_agent() == -1) {
+      const std::string err_str = sys::StrError();
+      DEBUG(dbgs() << "Failed to disconnect from OProfile agent: "
+                   << err_str << "\n");
+    } else {
+      DEBUG(dbgs() << "Disconnected from OProfile agent.\n");
+    }
+  }
+}
+
+static debug_line_info LineStartToOProfileFormat(
+    const MachineFunction &MF, FilenameCache &Filenames,
+    uintptr_t Address, DebugLoc Loc) {
+  debug_line_info Result;
+  Result.vma = Address;
+  Result.lineno = Loc.getLine();
+  Result.filename = Filenames.getFilename(
+    Loc.getScope(MF.getFunction()->getContext()));
+  DEBUG(dbgs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to "
+               << Result.filename << ":" << Result.lineno << "\n");
+  return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void OProfileJITEventListener::NotifyFunctionEmitted(
+    const Function &F, void *FnStart, size_t FnSize,
+    const JITEvent_EmittedFunctionDetails &Details) {
+  assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
+  if (Wrapper.op_write_native_code(F.getName().data(),
+                           reinterpret_cast<uint64_t>(FnStart),
+                           FnStart, FnSize) == -1) {
+    DEBUG(dbgs() << "Failed to tell OProfile about native function "
+          << F.getName() << " at ["
+          << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+    return;
+  }
+
+  if (!Details.LineStarts.empty()) {
+    // Now we convert the line number information from the address/DebugLoc
+    // format in Details to the address/filename/lineno format that OProfile
+    // expects.  Note that OProfile 0.9.4 has a bug that causes it to ignore
+    // line numbers for addresses above 4G.
+    FilenameCache Filenames;
+    std::vector<debug_line_info> LineInfo;
+    LineInfo.reserve(1 + Details.LineStarts.size());
+
+    DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+    assert(!FirstLoc.isUnknown()
+           && "LineStarts should not contain unknown DebugLocs");
+    MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+    DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+    if (FunctionDI.Verify()) {
+      // If we have debug info for the function itself, use that as the line
+      // number of the first several instructions.  Otherwise, after filling
+      // LineInfo, we'll adjust the address of the first line number to point at
+      // the start of the function.
+      debug_line_info line_info;
+      line_info.vma = reinterpret_cast<uintptr_t>(FnStart);
+      line_info.lineno = FunctionDI.getLineNumber();
+      line_info.filename = Filenames.getFilename(FirstLocScope);
+      LineInfo.push_back(line_info);
+    }
+
+    for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
+           I = Details.LineStarts.begin(), E = Details.LineStarts.end();
+         I != E; ++I) {
+      LineInfo.push_back(LineStartToOProfileFormat(
+                           *Details.MF, Filenames, I->Address, I->Loc));
+    }
+
+    // In case the function didn't have line info of its own, adjust the first
+    // line info's address to include the start of the function.
+    LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
+
+    if (Wrapper.op_write_debug_line_info(FnStart, LineInfo.size(),
+                                      &*LineInfo.begin()) == -1) {
+      DEBUG(dbgs()
+            << "Failed to tell OProfile about line numbers for native function "
+            << F.getName() << " at ["
+            << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+    }
+  }
+}
+
+// Removes the being-deleted function from the symbol table.
+void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
+  assert(FnStart && "Invalid function pointer");
+  if (Wrapper.op_unload_native_code(reinterpret_cast<uint64_t>(FnStart)) == -1) {
+    DEBUG(dbgs()
+          << "Failed to tell OProfile about unload of native function at "
+          << FnStart << "\n");
+  }
+}
+
+}  // anonymous namespace.
+
+namespace llvm {
+JITEventListener *JITEventListener::createOProfileJITEventListener() {
+  static OwningPtr<OProfileWrapper> JITProfilingWrapper(new OProfileWrapper);
+  return new OProfileJITEventListener(*JITProfilingWrapper);
+}
+
+// for testing
+JITEventListener *JITEventListener::createOProfileJITEventListener(
+                                      OProfileWrapper* TestImpl) {
+  return new OProfileJITEventListener(*TestImpl);
+}
+
+} // namespace llvm
+
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
new file mode 100644
index 000000000000..d67f5370b862
--- /dev/null
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -0,0 +1,263 @@
+//===-- OProfileWrapper.cpp - OProfile JIT API Wrapper implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface in OProfileWrapper.h. It is responsible
+// for loading the opagent dynamic library when the first call to an op_
+// function occurs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
+
+#define DEBUG_TYPE "oprofile-wrapper"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/ADT/SmallString.h"
+
+#include <sstream>
+#include <cstring>
+#include <stddef.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+namespace {
+
+// Global mutex to ensure a single thread initializes oprofile agent.
+llvm::sys::Mutex OProfileInitializationMutex;
+
+} // anonymous namespace
+
+namespace llvm {
+
+OProfileWrapper::OProfileWrapper()
+: Agent(0),
+  OpenAgentFunc(0),
+  CloseAgentFunc(0),
+  WriteNativeCodeFunc(0),
+  WriteDebugLineInfoFunc(0),
+  UnloadNativeCodeFunc(0),
+  MajorVersionFunc(0),
+  MinorVersionFunc(0),
+  IsOProfileRunningFunc(0),
+  Initialized(false) {
+}
+
+bool OProfileWrapper::initialize() {
+  using namespace llvm;
+  using namespace llvm::sys;
+
+  MutexGuard Guard(OProfileInitializationMutex);
+
+  if (Initialized)
+    return OpenAgentFunc != 0;
+
+  Initialized = true;
+
+  // If the oprofile daemon is not running, don't load the opagent library
+  if (!isOProfileRunning()) {
+    DEBUG(dbgs() << "OProfile daemon is not detected.\n");
+    return false;
+  }
+
+  std::string error;
+  if(!DynamicLibrary::LoadLibraryPermanently("libopagent.so", &error)) {
+    DEBUG(dbgs()
+            << "OProfile connector library libopagent.so could not be loaded: "
+            << error << "\n");
+  }
+
+  // Get the addresses of the opagent functions
+  OpenAgentFunc = (op_open_agent_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_open_agent");
+  CloseAgentFunc = (op_close_agent_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_close_agent");
+  WriteNativeCodeFunc = (op_write_native_code_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_write_native_code");
+  WriteDebugLineInfoFunc = (op_write_debug_line_info_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_write_debug_line_info");
+  UnloadNativeCodeFunc = (op_unload_native_code_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_unload_native_code");
+  MajorVersionFunc = (op_major_version_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_major_version");
+  MinorVersionFunc = (op_major_version_ptr_t)(intptr_t)
+          DynamicLibrary::SearchForAddressOfSymbol("op_minor_version");
+
+  // With missing functions, we can do nothing
+  if (!OpenAgentFunc
+      || !CloseAgentFunc
+      || !WriteNativeCodeFunc
+      || !WriteDebugLineInfoFunc
+      || !UnloadNativeCodeFunc) {
+    OpenAgentFunc = 0;
+    CloseAgentFunc = 0;
+    WriteNativeCodeFunc = 0;
+    WriteDebugLineInfoFunc = 0;
+    UnloadNativeCodeFunc = 0;
+    return false;
+  }
+
+  return true;
+}
+
+bool OProfileWrapper::isOProfileRunning() {
+  if (IsOProfileRunningFunc != 0)
+    return IsOProfileRunningFunc();
+  return checkForOProfileProcEntry();
+}
+
+bool OProfileWrapper::checkForOProfileProcEntry() {
+  DIR* ProcDir;
+
+  ProcDir = opendir("/proc");
+  if (!ProcDir)
+    return false;
+
+  // Walk the /proc tree looking for the oprofile daemon
+  struct dirent* Entry;
+  while (0 != (Entry = readdir(ProcDir))) {
+    if (Entry->d_type == DT_DIR) {
+      // Build a path from the current entry name
+      SmallString<256> CmdLineFName;
+      raw_svector_ostream(CmdLineFName) << "/proc/" << Entry->d_name
+                                        << "/cmdline";
+
+      // Open the cmdline file
+      int CmdLineFD = open(CmdLineFName.c_str(), S_IRUSR);
+      if (CmdLineFD != -1) {
+        char    ExeName[PATH_MAX+1];
+        char*   BaseName = 0;
+
+        // Read the cmdline file
+        ssize_t NumRead = read(CmdLineFD, ExeName, PATH_MAX+1);
+        close(CmdLineFD);
+        ssize_t Idx = 0;
+
+        // Find the terminator for the first string
+        while (Idx < NumRead-1 && ExeName[Idx] != 0) {
+          Idx++;
+        }
+
+        // Go back to the last non-null character
+        Idx--;
+
+        // Find the last path separator in the first string
+        while (Idx > 0) {
+          if (ExeName[Idx] == '/') {
+            BaseName = ExeName + Idx + 1;
+            break;
+          }
+          Idx--;
+        }
+
+        // Test this to see if it is the oprofile daemon
+        if (BaseName != 0 && !strcmp("oprofiled", BaseName)) {
+          // If it is, we're done
+          closedir(ProcDir);
+          return true;
+        }
+      }
+    }
+  }
+
+  // We've looked through all the files and didn't find the daemon
+  closedir(ProcDir);
+  return false;
+}
+
+bool OProfileWrapper::op_open_agent() {
+  if (!Initialized)
+    initialize();
+
+  if (OpenAgentFunc != 0) {
+    Agent = OpenAgentFunc();
+    return Agent != 0;
+  }
+
+  return false;
+}
+
+int OProfileWrapper::op_close_agent() {
+  if (!Initialized)
+    initialize();
+
+  int ret = -1;
+  if (Agent && CloseAgentFunc) {
+    ret = CloseAgentFunc(Agent);
+    if (ret == 0) {
+      Agent = 0;
+    }
+  }
+  return ret;
+}
+
+bool OProfileWrapper::isAgentAvailable() {
+  return Agent != 0;
+}
+
+int OProfileWrapper::op_write_native_code(const char* Name,
+                                          uint64_t Addr,
+                                          void const* Code,
+                                          const unsigned int Size) {
+  if (!Initialized)
+    initialize();
+
+  if (Agent && WriteNativeCodeFunc)
+    return WriteNativeCodeFunc(Agent, Name, Addr, Code, Size);
+
+  return -1;
+}
+
+int OProfileWrapper::op_write_debug_line_info(
+  void const* Code,
+  size_t NumEntries,
+  struct debug_line_info const* Info) {
+  if (!Initialized)
+    initialize();
+
+  if (Agent && WriteDebugLineInfoFunc)
+    return WriteDebugLineInfoFunc(Agent, Code, NumEntries, Info);
+
+  return -1;
+}
+
+int OProfileWrapper::op_major_version() {
+  if (!Initialized)
+    initialize();
+
+  if (Agent && MajorVersionFunc)
+    return MajorVersionFunc();
+
+  return -1;
+}
+
+int OProfileWrapper::op_minor_version() {
+  if (!Initialized)
+    initialize();
+
+  if (Agent && MinorVersionFunc)
+    return MinorVersionFunc();
+
+  return -1;
+}
+
+int  OProfileWrapper::op_unload_native_code(uint64_t Addr) {
+  if (!Initialized)
+    initialize();
+
+  if (Agent && UnloadNativeCodeFunc)
+    return UnloadNativeCodeFunc(Agent, Addr);
+
+  return -1;
+}
+
+} // namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
index c236d1d9d115..002e63cd3b6b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -1,9 +1,5 @@
 add_llvm_library(LLVMRuntimeDyld
   RuntimeDyld.cpp
   RuntimeDyldMachO.cpp
-  )
-
-add_llvm_library_dependencies(LLVMRuntimeDyld
-  LLVMObject
-  LLVMSupport
+  RuntimeDyldELF.cpp
   )
diff --git a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
new file mode 100644
index 000000000000..97dc86129a33
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = RuntimeDyld
+parent = ExecutionEngine
+required_libraries = Object Support
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 33dd70502798..63cec1aca3b1 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -1,4 +1,4 @@
-//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,6 +13,10 @@
 
 #define DEBUG_TYPE "dyld"
 #include "RuntimeDyldImpl.h"
+#include "RuntimeDyldELF.h"
+#include "RuntimeDyldMachO.h"
+#include "llvm/Support/Path.h"
+
 using namespace llvm;
 using namespace llvm::object;
 
@@ -22,35 +26,383 @@ RuntimeDyldImpl::~RuntimeDyldImpl() {}
 
 namespace llvm {
 
-void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
-                                      uint8_t *EndAddress) {
-  // Allocate memory for the function via the memory manager.
-  uintptr_t Size = EndAddress - StartAddress + 1;
-  uintptr_t AllocSize = Size;
-  uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), AllocSize);
-  assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) &&
-         "Memory manager failed to allocate enough memory!");
-  // Copy the function payload into the memory block.
-  memcpy(Mem, StartAddress, Size);
-  MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size);
-  // Remember where we put it.
-  Functions[Name] = sys::MemoryBlock(Mem, Size);
-  // Default the assigned address for this symbol to wherever this
-  // allocated it.
-  SymbolTable[Name] = Mem;
-  DEBUG(dbgs() << "    allocated to [" << Mem << ", " << Mem + Size << "]\n");
-}
+namespace {
+  // Helper for extensive error checking in debug builds.
+  error_code Check(error_code Err) {
+    if (Err) {
+      report_fatal_error(Err.message());
+    }
+    return Err;
+  }
+} // end anonymous namespace
 
 // Resolve the relocations for all symbols we currently know about.
 void RuntimeDyldImpl::resolveRelocations() {
-  // Just iterate over the symbols in our symbol table and assign their
-  // addresses.
-  StringMap<uint8_t*>::iterator i = SymbolTable.begin();
-  StringMap<uint8_t*>::iterator e = SymbolTable.end();
-  for (;i != e; ++i)
-    reassignSymbolAddress(i->getKey(), i->getValue());
+  // First, resolve relocations associated with external symbols.
+  resolveSymbols();
+
+  // Just iterate over the sections we have and resolve all the relocations
+  // in them. Gross overkill, but it gets the job done.
+  for (int i = 0, e = Sections.size(); i != e; ++i) {
+    reassignSectionAddress(i, Sections[i].LoadAddress);
+  }
 }
 
+void RuntimeDyldImpl::mapSectionAddress(void *LocalAddress,
+                                        uint64_t TargetAddress) {
+  for (unsigned i = 0, e = Sections.size(); i != e; ++i) {
+    if (Sections[i].Address == LocalAddress) {
+      reassignSectionAddress(i, TargetAddress);
+      return;
+    }
+  }
+  llvm_unreachable("Attempting to remap address of unknown section!");
+}
+
+bool RuntimeDyldImpl::loadObject(const MemoryBuffer *InputBuffer) {
+  // FIXME: ObjectFile don't modify MemoryBuffer.
+  //        It should use const MemoryBuffer as parameter.
+  OwningPtr<ObjectFile> obj(ObjectFile::createObjectFile(
+                                       const_cast<MemoryBuffer*>(InputBuffer)));
+  if (!obj)
+    report_fatal_error("Unable to create object image from memory buffer!");
+
+  Arch = (Triple::ArchType)obj->getArch();
+
+  LocalSymbolMap LocalSymbols;     // Functions and data symbols from the
+                                   // object file.
+  ObjSectionToIDMap LocalSections; // Used sections from the object file
+  CommonSymbolMap   CommonSymbols; // Common symbols requiring allocation
+  uint64_t          CommonSize = 0;
+
+  error_code err;
+  // Parse symbols
+  DEBUG(dbgs() << "Parse symbols:\n");
+  for (symbol_iterator i = obj->begin_symbols(), e = obj->end_symbols();
+       i != e; i.increment(err)) {
+    Check(err);
+    object::SymbolRef::Type SymType;
+    StringRef Name;
+    Check(i->getType(SymType));
+    Check(i->getName(Name));
+
+    uint32_t flags;
+    Check(i->getFlags(flags));
+
+    bool isCommon = flags & SymbolRef::SF_Common;
+    if (isCommon) {
+      // Add the common symbols to a list.  We'll allocate them all below.
+      uint64_t Size = 0;
+      Check(i->getSize(Size));
+      CommonSize += Size;
+      CommonSymbols[*i] = Size;
+    } else {
+      if (SymType == object::SymbolRef::ST_Function ||
+          SymType == object::SymbolRef::ST_Data) {
+        uint64_t FileOffset;
+        StringRef sData;
+        section_iterator si = obj->end_sections();
+        Check(i->getFileOffset(FileOffset));
+        Check(i->getSection(si));
+        if (si == obj->end_sections()) continue;
+        Check(si->getContents(sData));
+        const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() +
+                                (uintptr_t)FileOffset;
+        uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)sData.begin());
+        unsigned SectionID =
+          findOrEmitSection(*si,
+                            SymType == object::SymbolRef::ST_Function,
+                            LocalSections);
+        bool isGlobal = flags & SymbolRef::SF_Global;
+        LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset);
+        DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset)
+                     << " flags: " << flags
+                     << " SID: " << SectionID
+                     << " Offset: " << format("%p", SectOffset));
+        if (isGlobal)
+          SymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
+      }
+    }
+    DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n");
+  }
+
+  // Allocate common symbols
+  if (CommonSize != 0)
+    emitCommonSymbols(CommonSymbols, CommonSize, LocalSymbols);
+
+  // Parse and proccess relocations
+  DEBUG(dbgs() << "Parse relocations:\n");
+  for (section_iterator si = obj->begin_sections(),
+       se = obj->end_sections(); si != se; si.increment(err)) {
+    Check(err);
+    bool isFirstRelocation = true;
+    unsigned SectionID = 0;
+    StubMap Stubs;
+
+    for (relocation_iterator i = si->begin_relocations(),
+         e = si->end_relocations(); i != e; i.increment(err)) {
+      Check(err);
+
+      // If it's first relocation in this section, find its SectionID
+      if (isFirstRelocation) {
+        SectionID = findOrEmitSection(*si, true, LocalSections);
+        DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n");
+        isFirstRelocation = false;
+      }
+
+      ObjRelocationInfo RI;
+      RI.SectionID = SectionID;
+      Check(i->getAdditionalInfo(RI.AdditionalInfo));
+      Check(i->getOffset(RI.Offset));
+      Check(i->getSymbol(RI.Symbol));
+      Check(i->getType(RI.Type));
+
+      DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo
+                   << " Offset: " << format("%p", (uintptr_t)RI.Offset)
+                   << " Type: " << (uint32_t)(RI.Type & 0xffffffffL)
+                   << "\n");
+      processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs);
+    }
+  }
+  return false;
+}
+
+unsigned RuntimeDyldImpl::emitCommonSymbols(const CommonSymbolMap &Map,
+                                            uint64_t TotalSize,
+                                            LocalSymbolMap &LocalSymbols) {
+  // Allocate memory for the section
+  unsigned SectionID = Sections.size();
+  uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*),
+                                              SectionID);
+  if (!Addr)
+    report_fatal_error("Unable to allocate memory for common symbols!");
+  uint64_t Offset = 0;
+  Sections.push_back(SectionEntry(Addr, TotalSize, TotalSize, 0));
+  memset(Addr, 0, TotalSize);
+
+  DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID
+               << " new addr: " << format("%p", Addr)
+               << " DataSize: " << TotalSize
+               << "\n");
+
+  // Assign the address of each symbol
+  for (CommonSymbolMap::const_iterator it = Map.begin(), itEnd = Map.end();
+       it != itEnd; it++) {
+    uint64_t Size = it->second;
+    StringRef Name;
+    it->first.getName(Name);
+    LocalSymbols[Name.data()] = SymbolLoc(SectionID, Offset);
+    Offset += Size;
+    Addr += Size;
+  }
+
+  return SectionID;
+}
+
+unsigned RuntimeDyldImpl::emitSection(const SectionRef &Section,
+                                      bool IsCode) {
+
+  unsigned StubBufSize = 0,
+           StubSize = getMaxStubSize();
+  error_code err;
+  if (StubSize > 0) {
+    for (relocation_iterator i = Section.begin_relocations(),
+         e = Section.end_relocations(); i != e; i.increment(err), Check(err))
+      StubBufSize += StubSize;
+  }
+  StringRef data;
+  uint64_t Alignment64;
+  Check(Section.getContents(data));
+  Check(Section.getAlignment(Alignment64));
+
+  unsigned Alignment = (unsigned)Alignment64 & 0xffffffffL;
+  bool IsRequired;
+  bool IsVirtual;
+  bool IsZeroInit;
+  uint64_t DataSize;
+  Check(Section.isRequiredForExecution(IsRequired));
+  Check(Section.isVirtual(IsVirtual));
+  Check(Section.isZeroInit(IsZeroInit));
+  Check(Section.getSize(DataSize));
+
+  unsigned Allocate;
+  unsigned SectionID = Sections.size();
+  uint8_t *Addr;
+  const char *pData = 0;
+
+  // Some sections, such as debug info, don't need to be loaded for execution.
+  // Leave those where they are.
+  if (IsRequired) {
+    Allocate = DataSize + StubBufSize;
+    Addr = IsCode
+      ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID)
+      : MemMgr->allocateDataSection(Allocate, Alignment, SectionID);
+    if (!Addr)
+      report_fatal_error("Unable to allocate section memory!");
+
+    // Virtual sections have no data in the object image, so leave pData = 0
+    if (!IsVirtual)
+      pData = data.data();
+
+    // Zero-initialize or copy the data from the image
+    if (IsZeroInit || IsVirtual)
+      memset(Addr, 0, DataSize);
+    else
+      memcpy(Addr, pData, DataSize);
+
+    DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+                 << " obj addr: " << format("%p", pData)
+                 << " new addr: " << format("%p", Addr)
+                 << " DataSize: " << DataSize
+                 << " StubBufSize: " << StubBufSize
+                 << " Allocate: " << Allocate
+                 << "\n");
+  }
+  else {
+    // Even if we didn't load the section, we need to record an entry for it
+    //   to handle later processing (and by 'handle' I mean don't do anything
+    //   with these sections).
+    Allocate = 0;
+    Addr = 0;
+    DEBUG(dbgs() << "emitSection SectionID: " << SectionID
+                 << " obj addr: " << format("%p", data.data())
+                 << " new addr: 0"
+                 << " DataSize: " << DataSize
+                 << " StubBufSize: " << StubBufSize
+                 << " Allocate: " << Allocate
+                 << "\n");
+  }
+
+  Sections.push_back(SectionEntry(Addr, Allocate, DataSize,(uintptr_t)pData));
+  return SectionID;
+}
+
+unsigned RuntimeDyldImpl::findOrEmitSection(const SectionRef &Section,
+                                            bool IsCode,
+                                            ObjSectionToIDMap &LocalSections) {
+
+  unsigned SectionID = 0;
+  ObjSectionToIDMap::iterator i = LocalSections.find(Section);
+  if (i != LocalSections.end())
+    SectionID = i->second;
+  else {
+    SectionID = emitSection(Section, IsCode);
+    LocalSections[Section] = SectionID;
+  }
+  return SectionID;
+}
+
+void RuntimeDyldImpl::AddRelocation(const RelocationValueRef &Value,
+                                   unsigned SectionID, uintptr_t Offset,
+                                   uint32_t RelType) {
+  DEBUG(dbgs() << "AddRelocation SymNamePtr: " << format("%p", Value.SymbolName)
+               << " SID: " << Value.SectionID
+               << " Addend: " << format("%p", Value.Addend)
+               << " Offset: " << format("%p", Offset)
+               << " RelType: " << format("%x", RelType)
+               << "\n");
+
+  if (Value.SymbolName == 0) {
+    Relocations[Value.SectionID].push_back(RelocationEntry(
+      SectionID,
+      Offset,
+      RelType,
+      Value.Addend));
+  } else
+    SymbolRelocations[Value.SymbolName].push_back(RelocationEntry(
+      SectionID,
+      Offset,
+      RelType,
+      Value.Addend));
+}
+
+uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
+  // TODO: There is only ARM far stub now. We should add the Thumb stub,
+  // and stubs for branches Thumb - ARM and ARM - Thumb.
+  if (Arch == Triple::arm) {
+    uint32_t *StubAddr = (uint32_t*)Addr;
+    *StubAddr = 0xe51ff004; // ldr pc,<label>
+    return (uint8_t*)++StubAddr;
+  }
+  else
+    return Addr;
+}
+
+// Assign an address to a symbol name and resolve all the relocations
+// associated with it.
+void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
+                                             uint64_t Addr) {
+  // The address to use for relocation resolution is not
+  // the address of the local section buffer. We must be doing
+  // a remote execution environment of some sort. Re-apply any
+  // relocations referencing this section with the given address.
+  //
+  // Addr is a uint64_t because we can't assume the pointer width
+  // of the target is the same as that of the host. Just use a generic
+  // "big enough" type.
+  Sections[SectionID].LoadAddress = Addr;
+  DEBUG(dbgs() << "Resolving relocations Section #" << SectionID
+          << "\t" << format("%p", (uint8_t *)Addr)
+          << "\n");
+  resolveRelocationList(Relocations[SectionID], Addr);
+}
+
+void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
+                                             uint64_t Value) {
+    // Ignore relocations for sections that were not loaded
+    if (Sections[RE.SectionID].Address != 0) {
+      uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
+      DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
+            << " + " << RE.Offset << " (" << format("%p", Target) << ")"
+            << " Data: " << RE.Data
+            << " Addend: " << RE.Addend
+            << "\n");
+
+      resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
+                        Value, RE.Data, RE.Addend);
+  }
+}
+
+void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
+                                            uint64_t Value) {
+  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+    resolveRelocationEntry(Relocs[i], Value);
+  }
+}
+
+// resolveSymbols - Resolve any relocations to the specified symbols if
+// we know where it lives.
+void RuntimeDyldImpl::resolveSymbols() {
+  StringMap<RelocationList>::iterator i = SymbolRelocations.begin(),
+                                      e = SymbolRelocations.end();
+  for (; i != e; i++) {
+    StringRef Name = i->first();
+    RelocationList &Relocs = i->second;
+    StringMap<SymbolLoc>::const_iterator Loc = SymbolTable.find(Name);
+    if (Loc == SymbolTable.end()) {
+      // This is an external symbol, try to get it address from
+      // MemoryManager.
+      uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(),
+                                                                   true);
+      DEBUG(dbgs() << "Resolving relocations Name: " << Name
+              << "\t" << format("%p", Addr)
+              << "\n");
+      resolveRelocationList(Relocs, (uintptr_t)Addr);
+    } else {
+      // Change the relocation to be section relative rather than symbol
+      // relative and move it to the resolved relocation list.
+      DEBUG(dbgs() << "Resolving symbol '" << Name << "'\n");
+      for (int i = 0, e = Relocs.size(); i != e; ++i) {
+        RelocationEntry Entry = Relocs[i];
+        Entry.Addend += Loc->second.second;
+        Relocations[Loc->second.first].push_back(Entry);
+      }
+      Relocs.clear();
+    }
+  }
+}
+
+
 //===----------------------------------------------------------------------===//
 // RuntimeDyld class implementation
 RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
@@ -64,12 +416,36 @@ RuntimeDyld::~RuntimeDyld() {
 
 bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
   if (!Dyld) {
-    if (RuntimeDyldMachO::isKnownFormat(InputBuffer))
-      Dyld = new RuntimeDyldMachO(MM);
-    else
-      report_fatal_error("Unknown object format!");
+    sys::LLVMFileType type = sys::IdentifyFileType(
+            InputBuffer->getBufferStart(),
+            static_cast<unsigned>(InputBuffer->getBufferSize()));
+    switch (type) {
+      case sys::ELF_Relocatable_FileType:
+      case sys::ELF_Executable_FileType:
+      case sys::ELF_SharedObject_FileType:
+      case sys::ELF_Core_FileType:
+        Dyld = new RuntimeDyldELF(MM);
+        break;
+      case sys::Mach_O_Object_FileType:
+      case sys::Mach_O_Executable_FileType:
+      case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+      case sys::Mach_O_Core_FileType:
+      case sys::Mach_O_PreloadExecutable_FileType:
+      case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+      case sys::Mach_O_DynamicLinker_FileType:
+      case sys::Mach_O_Bundle_FileType:
+      case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+      case sys::Mach_O_DSYMCompanion_FileType:
+        Dyld = new RuntimeDyldMachO(MM);
+        break;
+      case sys::Unknown_FileType:
+      case sys::Bitcode_FileType:
+      case sys::Archive_FileType:
+      case sys::COFF_FileType:
+        report_fatal_error("Incompatible object format!");
+    }
   } else {
-    if(!Dyld->isCompatibleFormat(InputBuffer))
+    if (!Dyld->isCompatibleFormat(InputBuffer))
       report_fatal_error("Incompatible object format!");
   }
 
@@ -84,8 +460,14 @@ void RuntimeDyld::resolveRelocations() {
   Dyld->resolveRelocations();
 }
 
-void RuntimeDyld::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
-  Dyld->reassignSymbolAddress(Name, Addr);
+void RuntimeDyld::reassignSectionAddress(unsigned SectionID,
+                                         uint64_t Addr) {
+  Dyld->reassignSectionAddress(SectionID, Addr);
+}
+
+void RuntimeDyld::mapSectionAddress(void *LocalAddress,
+                                    uint64_t TargetAddress) {
+  Dyld->mapSectionAddress(LocalAddress, TargetAddress);
 }
 
 StringRef RuntimeDyld::getErrorString() {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
new file mode 100644
index 000000000000..57fefee5dedc
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -0,0 +1,262 @@
+//===-- RuntimeDyldELF.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of ELF support for the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "RuntimeDyldELF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+
+
+void RuntimeDyldELF::resolveX86_64Relocation(uint8_t *LocalAddress,
+                                             uint64_t FinalAddress,
+                                             uint64_t Value,
+                                             uint32_t Type,
+                                             int64_t Addend) {
+  switch (Type) {
+  default:
+    llvm_unreachable("Relocation type not implemented yet!");
+  break;
+  case ELF::R_X86_64_64: {
+    uint64_t *Target = (uint64_t*)(LocalAddress);
+    *Target = Value + Addend;
+    break;
+  }
+  case ELF::R_X86_64_32:
+  case ELF::R_X86_64_32S: {
+    Value += Addend;
+    // FIXME: Handle the possibility of this assertion failing
+    assert((Type == ELF::R_X86_64_32 && !(Value & 0xFFFFFFFF00000000ULL)) ||
+           (Type == ELF::R_X86_64_32S &&
+            (Value & 0xFFFFFFFF00000000ULL) == 0xFFFFFFFF00000000ULL));
+    uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
+    uint32_t *Target = reinterpret_cast<uint32_t*>(LocalAddress);
+    *Target = TruncatedAddr;
+    break;
+  }
+  case ELF::R_X86_64_PC32: {
+    uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress);
+    int64_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
+    assert(RealOffset <= 214783647 && RealOffset >= -214783648);
+    int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
+    *Placeholder = TruncOffset;
+    break;
+  }
+  }
+}
+
+void RuntimeDyldELF::resolveX86Relocation(uint8_t *LocalAddress,
+                                          uint32_t FinalAddress,
+                                          uint32_t Value,
+                                          uint32_t Type,
+                                          int32_t Addend) {
+  switch (Type) {
+  case ELF::R_386_32: {
+    uint32_t *Target = (uint32_t*)(LocalAddress);
+    uint32_t Placeholder = *Target;
+    *Target = Placeholder + Value + Addend;
+    break;
+  }
+  case ELF::R_386_PC32: {
+    uint32_t *Placeholder = reinterpret_cast<uint32_t*>(LocalAddress);
+    uint32_t RealOffset = *Placeholder + Value + Addend - FinalAddress;
+    *Placeholder = RealOffset;
+    break;
+    }
+    default:
+      // There are other relocation types, but it appears these are the
+      //  only ones currently used by the LLVM ELF object writer
+      llvm_unreachable("Relocation type not implemented yet!");
+      break;
+  }
+}
+
+void RuntimeDyldELF::resolveARMRelocation(uint8_t *LocalAddress,
+                                          uint32_t FinalAddress,
+                                          uint32_t Value,
+                                          uint32_t Type,
+                                          int32_t Addend) {
+  // TODO: Add Thumb relocations.
+  uint32_t* TargetPtr = (uint32_t*)LocalAddress;
+  Value += Addend;
+
+  DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: " << LocalAddress
+               << " FinalAddress: " << format("%p",FinalAddress)
+               << " Value: " << format("%x",Value)
+               << " Type: " << format("%x",Type)
+               << " Addend: " << format("%x",Addend)
+               << "\n");
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Not implemented relocation type!");
+
+  // Just write 32bit value to relocation address
+  case ELF::R_ARM_ABS32 :
+    *TargetPtr = Value;
+    break;
+
+  // Write first 16 bit of 32 bit value to the mov instruction.
+  // Last 4 bit should be shifted.
+  case ELF::R_ARM_MOVW_ABS_NC :
+    Value = Value & 0xFFFF;
+    *TargetPtr |= Value & 0xFFF;
+    *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+    break;
+
+  // Write last 16 bit of 32 bit value to the mov instruction.
+  // Last 4 bit should be shifted.
+  case ELF::R_ARM_MOVT_ABS :
+    Value = (Value >> 16) & 0xFFFF;
+    *TargetPtr |= Value & 0xFFF;
+    *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+    break;
+
+  // Write 24 bit relative value to the branch instruction.
+  case ELF::R_ARM_PC24 :    // Fall through.
+  case ELF::R_ARM_CALL :    // Fall through.
+  case ELF::R_ARM_JUMP24 :
+    int32_t RelValue = static_cast<int32_t>(Value - FinalAddress - 8);
+    RelValue = (RelValue & 0x03FFFFFC) >> 2;
+    *TargetPtr &= 0xFF000000;
+    *TargetPtr |= RelValue;
+    break;
+  }
+}
+
+void RuntimeDyldELF::resolveRelocation(uint8_t *LocalAddress,
+                                       uint64_t FinalAddress,
+                                       uint64_t Value,
+                                       uint32_t Type,
+                                       int64_t Addend) {
+  switch (Arch) {
+  case Triple::x86_64:
+    resolveX86_64Relocation(LocalAddress, FinalAddress, Value, Type, Addend);
+    break;
+  case Triple::x86:
+    resolveX86Relocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL),
+                         (uint32_t)(Value & 0xffffffffL), Type,
+                         (uint32_t)(Addend & 0xffffffffL));
+    break;
+  case Triple::arm:    // Fall through.
+  case Triple::thumb:
+    resolveARMRelocation(LocalAddress, (uint32_t)(FinalAddress & 0xffffffffL),
+                         (uint32_t)(Value & 0xffffffffL), Type,
+                         (uint32_t)(Addend & 0xffffffffL));
+    break;
+  default: llvm_unreachable("Unsupported CPU type!");
+  }
+}
+
+void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
+                                          const ObjectFile &Obj,
+                                          ObjSectionToIDMap &ObjSectionToID,
+                                          LocalSymbolMap &Symbols,
+                                          StubMap &Stubs) {
+
+  uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL);
+  intptr_t Addend = (intptr_t)Rel.AdditionalInfo;
+  RelocationValueRef Value;
+  StringRef TargetName;
+  const SymbolRef &Symbol = Rel.Symbol;
+  Symbol.getName(TargetName);
+  DEBUG(dbgs() << "\t\tRelType: " << RelType
+               << " Addend: " << Addend
+               << " TargetName: " << TargetName
+               << "\n");
+  // First look the symbol in object file symbols.
+  LocalSymbolMap::iterator lsi = Symbols.find(TargetName.data());
+  if (lsi != Symbols.end()) {
+    Value.SectionID = lsi->second.first;
+    Value.Addend = lsi->second.second;
+  } else {
+    // Second look the symbol in global symbol table.
+    StringMap<SymbolLoc>::iterator gsi = SymbolTable.find(TargetName.data());
+    if (gsi != SymbolTable.end()) {
+      Value.SectionID = gsi->second.first;
+      Value.Addend = gsi->second.second;
+    } else {
+      SymbolRef::Type SymType;
+      Symbol.getType(SymType);
+      switch (SymType) {
+        case SymbolRef::ST_Debug: {
+          // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously
+          // and can be changed by another developers. Maybe best way is add
+          // a new symbol type ST_Section to SymbolRef and use it.
+          section_iterator si = Obj.end_sections();
+          Symbol.getSection(si);
+          if (si == Obj.end_sections())
+            llvm_unreachable("Symbol section not found, bad object file format!");
+          DEBUG(dbgs() << "\t\tThis is section symbol\n");
+          Value.SectionID = findOrEmitSection((*si), true, ObjSectionToID);
+          Value.Addend = Addend;
+          break;
+        }
+        case SymbolRef::ST_Unknown: {
+          Value.SymbolName = TargetName.data();
+          Value.Addend = Addend;
+          break;
+        }
+        default:
+          llvm_unreachable("Unresolved symbol type!");
+          break;
+      }
+    }
+  }
+  DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID
+               << " Rel.Offset: " << Rel.Offset
+               << "\n");
+  if (Arch == Triple::arm &&
+      (RelType == ELF::R_ARM_PC24 ||
+       RelType == ELF::R_ARM_CALL ||
+       RelType == ELF::R_ARM_JUMP24)) {
+    // This is an ARM branch relocation, need to use a stub function.
+    DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
+    SectionEntry &Section = Sections[Rel.SectionID];
+    uint8_t *Target = Section.Address + Rel.Offset;
+
+    //  Look up for existing stub.
+    StubMap::const_iterator i = Stubs.find(Value);
+    if (i != Stubs.end()) {
+      resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address +
+                        i->second, RelType, 0);
+      DEBUG(dbgs() << " Stub function found\n");
+    } else {
+      // Create a new stub function.
+      DEBUG(dbgs() << " Create a new stub function\n");
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+                                                   Section.StubOffset);
+      AddRelocation(Value, Rel.SectionID,
+                    StubTargetAddr - Section.Address, ELF::R_ARM_ABS32);
+      resolveRelocation(Target, Section.LoadAddress, (uint64_t)Section.Address +
+                        Section.StubOffset, RelType, 0);
+      Section.StubOffset += getMaxStubSize();
+    }
+  } else
+    AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType);
+}
+
+bool RuntimeDyldELF::isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
+  StringRef Magic = InputBuffer->getBuffer().slice(0, ELF::EI_NIDENT);
+  return (memcmp(Magic.data(), ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
+}
+} // namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
new file mode 100644
index 000000000000..36566da57a58
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -0,0 +1,62 @@
+//===-- RuntimeDyldELF.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ELF support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_ELF_H
+#define LLVM_RUNTIME_DYLD_ELF_H
+
+#include "RuntimeDyldImpl.h"
+
+using namespace llvm;
+
+
+namespace llvm {
+class RuntimeDyldELF : public RuntimeDyldImpl {
+protected:
+  void resolveX86_64Relocation(uint8_t *LocalAddress,
+                               uint64_t FinalAddress,
+                               uint64_t Value,
+                               uint32_t Type,
+                               int64_t Addend);
+
+  void resolveX86Relocation(uint8_t *LocalAddress,
+                            uint32_t FinalAddress,
+                            uint32_t Value,
+                            uint32_t Type,
+                            int32_t Addend);
+
+  void resolveARMRelocation(uint8_t *LocalAddress,
+                            uint32_t FinalAddress,
+                            uint32_t Value,
+                            uint32_t Type,
+                            int32_t Addend);
+
+  virtual void resolveRelocation(uint8_t *LocalAddress,
+                                 uint64_t FinalAddress,
+                                 uint64_t Value,
+                                 uint32_t Type,
+                                 int64_t Addend);
+
+  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+                                    const ObjectFile &Obj,
+                                    ObjSectionToIDMap &ObjSectionToID,
+                                    LocalSymbolMap &Symbols, StubMap &Stubs);
+
+public:
+  RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+  bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 7190a3c36fe9..bf678af6ece7 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -1,4 +1,4 @@
-//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,42 +15,128 @@
 #define LLVM_RUNTIME_DYLD_IMPL_H
 
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/Memory.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/system_error.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/Triple.h"
+#include <map>
+#include "llvm/Support/Format.h"
 
 using namespace llvm;
 using namespace llvm::object;
 
 namespace llvm {
+
+class SectionEntry {
+public:
+  uint8_t* Address;
+  size_t Size;
+  uint64_t LoadAddress;   // For each section, the address it will be
+                          // considered to live at for relocations. The same
+                          // as the pointer to the above memory block for
+                          // hosted JITs.
+  uintptr_t StubOffset;   // It's used for architecturies with stub
+                          // functions for far relocations like ARM.
+  uintptr_t ObjAddress;   // Section address in object file. It's use for
+                          // calculate MachO relocation addend
+  SectionEntry(uint8_t* address, size_t size, uintptr_t stubOffset,
+               uintptr_t objAddress)
+    : Address(address), Size(size), LoadAddress((uintptr_t)address),
+      StubOffset(stubOffset), ObjAddress(objAddress) {}
+};
+
+class RelocationEntry {
+public:
+  unsigned    SectionID;  // Section the relocation is contained in.
+  uintptr_t   Offset;     // Offset into the section for the relocation.
+  uint32_t    Data;       // Relocatino data. Including type of relocation
+                          // and another flags and parameners from
+  intptr_t    Addend;     // Addend encoded in the instruction itself, if any,
+                          // plus the offset into the source section for
+                          // the symbol once the relocation is resolvable.
+  RelocationEntry(unsigned id, uint64_t offset, uint32_t data, int64_t addend)
+    : SectionID(id), Offset(offset), Data(data), Addend(addend) {}
+};
+
+// Raw relocation data from object file
+class ObjRelocationInfo {
+public:
+  unsigned  SectionID;
+  uint64_t  Offset;
+  SymbolRef Symbol;
+  uint64_t  Type;
+  int64_t   AdditionalInfo;
+};
+
+class RelocationValueRef {
+public:
+  unsigned  SectionID;
+  intptr_t  Addend;
+  const char *SymbolName;
+  RelocationValueRef(): SectionID(0), Addend(0), SymbolName(0) {}
+
+  inline bool operator==(const RelocationValueRef &Other) const {
+    return std::memcmp(this, &Other, sizeof(RelocationValueRef)) == 0;
+  }
+  inline bool operator <(const RelocationValueRef &Other) const {
+    return std::memcmp(this, &Other, sizeof(RelocationValueRef)) < 0;
+  }
+};
+
 class RuntimeDyldImpl {
 protected:
-  unsigned CPUType;
-  unsigned CPUSubtype;
-
   // The MemoryManager to load objects into.
   RTDyldMemoryManager *MemMgr;
 
-  // FIXME: This all assumes we're dealing with external symbols for anything
-  //        explicitly referenced. I.e., we can index by name and things
-  //        will work out. In practice, this may not be the case, so we
-  //        should find a way to effectively generalize.
+  // A list of emmitted sections.
+  typedef SmallVector<SectionEntry, 64> SectionList;
+  SectionList Sections;
 
-  // For each function, we have a MemoryBlock of it's instruction data.
-  StringMap<sys::MemoryBlock> Functions;
+  // Keep a map of sections from object file to the SectionID which
+  // references it.
+  typedef std::map<SectionRef, unsigned> ObjSectionToIDMap;
 
   // Master symbol table. As modules are loaded and external symbols are
-  // resolved, their addresses are stored here.
-  StringMap<uint8_t*> SymbolTable;
+  // resolved, their addresses are stored here as a SectionID/Offset pair.
+  typedef std::pair<unsigned, uintptr_t> SymbolLoc;
+  StringMap<SymbolLoc> SymbolTable;
+  typedef DenseMap<const char*, SymbolLoc> LocalSymbolMap;
+
+  // Keep a map of common symbols to their sizes
+  typedef std::map<SymbolRef, unsigned> CommonSymbolMap;
+
+  // For each symbol, keep a list of relocations based on it. Anytime
+  // its address is reassigned (the JIT re-compiled the function, e.g.),
+  // the relocations get re-resolved.
+  // The symbol (or section) the relocation is sourced from is the Key
+  // in the relocation list where it's stored.
+  typedef SmallVector<RelocationEntry, 64> RelocationList;
+  // Relocations to sections already loaded. Indexed by SectionID which is the
+  // source of the address. The target where the address will be writen is
+  // SectionID/Offset in the relocation itself.
+  DenseMap<unsigned, RelocationList> Relocations;
+  // Relocations to external symbols that are not yet resolved.
+  // Indexed by symbol name.
+  StringMap<RelocationList> SymbolRelocations;
+
+  typedef std::map<RelocationValueRef, uintptr_t> StubMap;
+
+  Triple::ArchType Arch;
+
+  inline unsigned getMaxStubSize() {
+    if (Arch == Triple::arm || Arch == Triple::thumb)
+      return 8; // 32-bit instruction and 32-bit address
+    else
+      return 0;
+  }
 
   bool HasError;
   std::string ErrorStr;
@@ -62,25 +148,84 @@ protected:
     return true;
   }
 
-  void extractFunction(StringRef Name, uint8_t *StartAddress,
-                       uint8_t *EndAddress);
+  uint8_t *getSectionAddress(unsigned SectionID) {
+    return (uint8_t*)Sections[SectionID].Address;
+  }
 
+  /// \brief Emits a section containing common symbols.
+  /// \return SectionID.
+  unsigned emitCommonSymbols(const CommonSymbolMap &Map,
+                             uint64_t TotalSize,
+                             LocalSymbolMap &Symbols);
+
+  /// \brief Emits section data from the object file to the MemoryManager.
+  /// \param IsCode if it's true then allocateCodeSection() will be
+  ///        used for emmits, else allocateDataSection() will be used.
+  /// \return SectionID.
+  unsigned emitSection(const SectionRef &Section, bool IsCode);
+
+  /// \brief Find Section in LocalSections. If the secton is not found - emit
+  ///        it and store in LocalSections.
+  /// \param IsCode if it's true then allocateCodeSection() will be
+  ///        used for emmits, else allocateDataSection() will be used.
+  /// \return SectionID.
+  unsigned findOrEmitSection(const SectionRef &Section, bool IsCode,
+                             ObjSectionToIDMap &LocalSections);
+
+  /// \brief If Value.SymbolName is NULL then store relocation to the
+  ///        Relocations, else store it in the SymbolRelocations.
+  void AddRelocation(const RelocationValueRef &Value, unsigned SectionID,
+                     uintptr_t Offset, uint32_t RelType);
+
+  /// \brief Emits long jump instruction to Addr.
+  /// \return Pointer to the memory area for emitting target address.
+  uint8_t* createStubFunction(uint8_t *Addr);
+
+  /// \brief Resolves relocations from Relocs list with address from Value.
+  void resolveRelocationList(const RelocationList &Relocs, uint64_t Value);
+  void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value);
+
+  /// \brief A object file specific relocation resolver
+  /// \param Address Address to apply the relocation action
+  /// \param Value Target symbol address to apply the relocation action
+  /// \param Type object file specific relocation type
+  /// \param Addend A constant addend used to compute the value to be stored
+  ///        into the relocatable field
+  virtual void resolveRelocation(uint8_t *LocalAddress,
+                                 uint64_t FinalAddress,
+                                 uint64_t Value,
+                                 uint32_t Type,
+                                 int64_t Addend) = 0;
+
+  /// \brief Parses the object file relocation and store it to Relocations
+  ///        or SymbolRelocations. Its depend from object file type.
+  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+                                    const ObjectFile &Obj,
+                                    ObjSectionToIDMap &ObjSectionToID,
+                                    LocalSymbolMap &Symbols, StubMap &Stubs) = 0;
+
+  void resolveSymbols();
 public:
   RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
 
   virtual ~RuntimeDyldImpl();
 
-  virtual bool loadObject(MemoryBuffer *InputBuffer) = 0;
+  bool loadObject(const MemoryBuffer *InputBuffer);
 
   void *getSymbolAddress(StringRef Name) {
     // FIXME: Just look up as a function for now. Overly simple of course.
     // Work in progress.
-    return SymbolTable.lookup(Name);
+    if (SymbolTable.find(Name) == SymbolTable.end())
+      return 0;
+    SymbolLoc Loc = SymbolTable.lookup(Name);
+    return getSectionAddress(Loc.first) + Loc.second;
   }
 
   void resolveRelocations();
 
-  virtual void reassignSymbolAddress(StringRef Name, uint8_t *Addr) = 0;
+  void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
+
+  void mapSectionAddress(void *LocalAddress, uint64_t TargetAddress);
 
   // Is the linker in an error state?
   bool hasError() { return HasError; }
@@ -92,58 +237,7 @@ public:
   StringRef getErrorString() { return ErrorStr; }
 
   virtual bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const = 0;
-};
-
-
-class RuntimeDyldMachO : public RuntimeDyldImpl {
 
-  // For each symbol, keep a list of relocations based on it. Anytime
-  // its address is reassigned (the JIT re-compiled the function, e.g.),
-  // the relocations get re-resolved.
-  struct RelocationEntry {
-    std::string Target;     // Object this relocation is contained in.
-    uint64_t    Offset;     // Offset into the object for the relocation.
-    uint32_t    Data;       // Second word of the raw macho relocation entry.
-    int64_t     Addend;     // Addend encoded in the instruction itself, if any.
-    bool        isResolved; // Has this relocation been resolved previously?
-
-    RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
-      : Target(t), Offset(offset), Data(data), Addend(addend),
-        isResolved(false) {}
-  };
-  typedef SmallVector<RelocationEntry, 4> RelocationList;
-  StringMap<RelocationList> Relocations;
-
-  // FIXME: Also keep a map of all the relocations contained in an object. Use
-  // this to dynamically answer whether all of the relocations in it have
-  // been resolved or not.
-
-  bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
-                         unsigned Type, unsigned Size);
-  bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
-                               unsigned Type, unsigned Size);
-  bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
-                            unsigned Type, unsigned Size);
-
-  bool loadSegment32(const MachOObject *Obj,
-                     const MachOObject::LoadCommandInfo *SegmentLCI,
-                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-  bool loadSegment64(const MachOObject *Obj,
-                     const MachOObject::LoadCommandInfo *SegmentLCI,
-                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-
-public:
-  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
-
-  bool loadObject(MemoryBuffer *InputBuffer);
-
-  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
-
-  static bool isKnownFormat(const MemoryBuffer *InputBuffer);
-
-  bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
-    return isKnownFormat(InputBuffer);
-  }
 };
 
 } // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 623e9b2acca3..1318b4454255 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -1,4 +1,4 @@
-//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT -*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,73 +15,147 @@
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "RuntimeDyldImpl.h"
+#include "RuntimeDyldMachO.h"
 using namespace llvm;
 using namespace llvm::object;
 
 namespace llvm {
 
-bool RuntimeDyldMachO::
-resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
-                  unsigned Type, unsigned Size) {
+void RuntimeDyldMachO::resolveRelocation(uint8_t *LocalAddress,
+                                         uint64_t FinalAddress,
+                                         uint64_t Value,
+                                         uint32_t Type,
+                                         int64_t Addend) {
+  bool isPCRel = (Type >> 24) & 1;
+  unsigned MachoType = (Type >> 28) & 0xf;
+  unsigned Size = 1 << ((Type >> 25) & 3);
+
+  DEBUG(dbgs() << "resolveRelocation LocalAddress: " << format("%p", LocalAddress)
+        << " FinalAddress: " << format("%p", FinalAddress)
+        << " Value: " << format("%p", Value)
+        << " Addend: " << Addend
+        << " isPCRel: " << isPCRel
+        << " MachoType: " << MachoType
+        << " Size: " << Size
+        << "\n");
+
   // This just dispatches to the proper target specific routine.
-  switch (CPUType) {
-  default: assert(0 && "Unsupported CPU type!");
-  case mach::CTM_x86_64:
-    return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
-                                   isPCRel, Type, Size);
-  case mach::CTM_ARM:
-    return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
-                                isPCRel, Type, Size);
+  switch (Arch) {
+  default: llvm_unreachable("Unsupported CPU type!");
+  case Triple::x86_64:
+    resolveX86_64Relocation(LocalAddress,
+                            FinalAddress,
+                            (uintptr_t)Value,
+                            isPCRel,
+                            MachoType,
+                            Size,
+                            Addend);
+    break;
+  case Triple::x86:
+    resolveI386Relocation(LocalAddress,
+                                 FinalAddress,
+                                 (uintptr_t)Value,
+                                 isPCRel,
+                                 Type,
+                                 Size,
+                                 Addend);
+    break;
+  case Triple::arm:    // Fall through.
+  case Triple::thumb:
+    resolveARMRelocation(LocalAddress,
+                         FinalAddress,
+                         (uintptr_t)Value,
+                         isPCRel,
+                         MachoType,
+                         Size,
+                         Addend);
+    break;
+  }
+}
+
+bool RuntimeDyldMachO::
+resolveI386Relocation(uint8_t *LocalAddress,
+                      uint64_t FinalAddress,
+                      uint64_t Value,
+                      bool isPCRel,
+                      unsigned Type,
+                      unsigned Size,
+                      int64_t Addend) {
+  if (isPCRel)
+    Value -= FinalAddress + 4; // see resolveX86_64Relocation
+
+  switch (Type) {
+  default:
+    llvm_unreachable("Invalid relocation type!");
+  case macho::RIT_Vanilla: {
+    uint8_t *p = LocalAddress;
+    uint64_t ValueToWrite = Value + Addend;
+    for (unsigned i = 0; i < Size; ++i) {
+      *p++ = (uint8_t)(ValueToWrite & 0xff);
+      ValueToWrite >>= 8;
+    }
+  }
+  case macho::RIT_Difference:
+  case macho::RIT_Generic_LocalDifference:
+  case macho::RIT_Generic_PreboundLazyPointer:
+    return Error("Relocation type not implemented yet!");
   }
-  llvm_unreachable("");
 }
 
 bool RuntimeDyldMachO::
-resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
-                        bool isPCRel, unsigned Type,
-                        unsigned Size) {
+resolveX86_64Relocation(uint8_t *LocalAddress,
+                        uint64_t FinalAddress,
+                        uint64_t Value,
+                        bool isPCRel,
+                        unsigned Type,
+                        unsigned Size,
+                        int64_t Addend) {
   // If the relocation is PC-relative, the value to be encoded is the
   // pointer difference.
   if (isPCRel)
     // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
     // address. Is that expected? Only for branches, perhaps?
-    Value -= Address + 4;
+    Value -= FinalAddress + 4;
 
   switch(Type) {
   default:
     llvm_unreachable("Invalid relocation type!");
+  case macho::RIT_X86_64_Signed1:
+  case macho::RIT_X86_64_Signed2:
+  case macho::RIT_X86_64_Signed4:
+  case macho::RIT_X86_64_Signed:
   case macho::RIT_X86_64_Unsigned:
   case macho::RIT_X86_64_Branch: {
+    Value += Addend;
     // Mask in the target value a byte at a time (we don't have an alignment
     // guarantee for the target address, so this is safest).
-    uint8_t *p = (uint8_t*)Address;
+    uint8_t *p = (uint8_t*)LocalAddress;
     for (unsigned i = 0; i < Size; ++i) {
       *p++ = (uint8_t)Value;
       Value >>= 8;
     }
     return false;
   }
-  case macho::RIT_X86_64_Signed:
   case macho::RIT_X86_64_GOTLoad:
   case macho::RIT_X86_64_GOT:
   case macho::RIT_X86_64_Subtractor:
-  case macho::RIT_X86_64_Signed1:
-  case macho::RIT_X86_64_Signed2:
-  case macho::RIT_X86_64_Signed4:
   case macho::RIT_X86_64_TLV:
     return Error("Relocation type not implemented yet!");
   }
-  return false;
 }
 
-bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
-                                         bool isPCRel, unsigned Type,
-                                         unsigned Size) {
+bool RuntimeDyldMachO::
+resolveARMRelocation(uint8_t *LocalAddress,
+                     uint64_t FinalAddress,
+                     uint64_t Value,
+                     bool isPCRel,
+                     unsigned Type,
+                     unsigned Size,
+                     int64_t Addend) {
   // If the relocation is PC-relative, the value to be encoded is the
   // pointer difference.
   if (isPCRel) {
-    Value -= Address;
+    Value -= FinalAddress;
     // ARM PCRel relocations have an effective-PC offset of two instructions
     // (four bytes in Thumb mode, 8 bytes in ARM mode).
     // FIXME: For now, assume ARM mode.
@@ -92,10 +166,9 @@ bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
   default:
     llvm_unreachable("Invalid relocation type!");
   case macho::RIT_Vanilla: {
-    llvm_unreachable("Invalid relocation type!");
     // Mask in the target value a byte at a time (we don't have an alignment
     // guarantee for the target address, so this is safest).
-    uint8_t *p = (uint8_t*)Address;
+    uint8_t *p = (uint8_t*)LocalAddress;
     for (unsigned i = 0; i < Size; ++i) {
       *p++ = (uint8_t)Value;
       Value >>= 8;
@@ -105,7 +178,7 @@ bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
   case macho::RIT_ARM_Branch24Bit: {
     // Mask the value into the target address. We know instructions are
     // 32-bit aligned, so we can do it all at once.
-    uint32_t *p = (uint32_t*)Address;
+    uint32_t *p = (uint32_t*)LocalAddress;
     // The low two bits of the value are not encoded.
     Value >>= 2;
     // Mask the value to 24 bits.
@@ -131,388 +204,84 @@ bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
   return false;
 }
 
-bool RuntimeDyldMachO::
-loadSegment32(const MachOObject *Obj,
-              const MachOObject::LoadCommandInfo *SegmentLCI,
-              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
-  InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
-  Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
-  if (!SegmentLC)
-    return Error("unable to load segment load command");
-
-  for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
-    InMemoryStruct<macho::Section> Sect;
-    Obj->ReadSection(*SegmentLCI, SectNum, Sect);
-    if (!Sect)
-      return Error("unable to load section: '" + Twine(SectNum) + "'");
-
-    // FIXME: For the time being, we're only loading text segments.
-    if (Sect->Flags != 0x80000400)
-      continue;
-
-    // Address and names of symbols in the section.
-    typedef std::pair<uint64_t, StringRef> SymbolEntry;
-    SmallVector<SymbolEntry, 64> Symbols;
-    // Index of all the names, in this section or not. Used when we're
-    // dealing with relocation entries.
-    SmallVector<StringRef, 64> SymbolNames;
-    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
-      InMemoryStruct<macho::SymbolTableEntry> STE;
-      Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
-      if (!STE)
-        return Error("unable to read symbol: '" + Twine(i) + "'");
-      if (STE->SectionIndex > SegmentLC->NumSections)
-        return Error("invalid section index for symbol: '" + Twine(i) + "'");
-      // Get the symbol name.
-      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
-      SymbolNames.push_back(Name);
-
-      // Just skip symbols not defined in this section.
-      if ((unsigned)STE->SectionIndex - 1 != SectNum)
-        continue;
-
-      // FIXME: Check the symbol type and flags.
-      if (STE->Type != 0xF)  // external, defined in this section.
-        continue;
-      // Flags == 0x8 marks a thumb function for ARM, which is fine as it
-      // doesn't require any special handling here.
-      if (STE->Flags != 0x0 && STE->Flags != 0x8)
-        continue;
-
-      // Remember the symbol.
-      Symbols.push_back(SymbolEntry(STE->Value, Name));
-
-      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
-            (Sect->Address + STE->Value) << "\n");
-    }
-    // Sort the symbols by address, just in case they didn't come in that way.
-    array_pod_sort(Symbols.begin(), Symbols.end());
-
-    // If there weren't any functions (odd, but just in case...)
-    if (!Symbols.size())
-      continue;
-
-    // Extract the function data.
-    uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
-                                           SegmentLC->FileSize).data();
-    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
-      uint64_t StartOffset = Sect->Address + Symbols[i].first;
-      uint64_t EndOffset = Symbols[i + 1].first - 1;
-      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
-                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
-      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
-    }
-    // The last symbol we do after since the end address is calculated
-    // differently because there is no next symbol to reference.
-    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
-    uint64_t EndOffset = Sect->Size - 1;
-    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
-                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
-    extractFunction(Symbols[Symbols.size()-1].second,
-                    Base + StartOffset, Base + EndOffset);
-
-    // Now extract the relocation information for each function and process it.
-    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
-      InMemoryStruct<macho::RelocationEntry> RE;
-      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
-      if (RE->Word0 & macho::RF_Scattered)
-        return Error("NOT YET IMPLEMENTED: scattered relocations.");
-      // Word0 of the relocation is the offset into the section where the
-      // relocation should be applied. We need to translate that into an
-      // offset into a function since that's our atom.
-      uint32_t Offset = RE->Word0;
-      // Look for the function containing the address. This is used for JIT
-      // code, so the number of functions in section is almost always going
-      // to be very small (usually just one), so until we have use cases
-      // where that's not true, just use a trivial linear search.
-      unsigned SymbolNum;
-      unsigned NumSymbols = Symbols.size();
-      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
-             "No symbol containing relocation!");
-      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
-        if (Symbols[SymbolNum + 1].first > Offset)
-          break;
-      // Adjust the offset to be relative to the symbol.
-      Offset -= Symbols[SymbolNum].first;
-      // Get the name of the symbol containing the relocation.
-      StringRef TargetName = SymbolNames[SymbolNum];
-
-      bool isExtern = (RE->Word1 >> 27) & 1;
-      // Figure out the source symbol of the relocation. If isExtern is true,
-      // this relocation references the symbol table, otherwise it references
-      // a section in the same object, numbered from 1 through NumSections
-      // (SectionBases is [0, NumSections-1]).
-      // FIXME: Some targets (ARM) use internal relocations even for
-      // externally visible symbols, if the definition is in the same
-      // file as the reference. We need to convert those back to by-name
-      // references. We can resolve the address based on the section
-      // offset and see if we have a symbol at that address. If we do,
-      // use that; otherwise, puke.
-      if (!isExtern)
-        return Error("Internal relocations not supported.");
-      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
-      StringRef SourceName = SymbolNames[SourceNum];
-
-      // FIXME: Get the relocation addend from the target address.
-
-      // Now store the relocation information. Associate it with the source
-      // symbol.
-      Relocations[SourceName].push_back(RelocationEntry(TargetName,
-                                                        Offset,
-                                                        RE->Word1,
-                                                        0 /*Addend*/));
-      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
-                   << " from '" << SourceName << "(Word1: "
-                   << format("0x%x", RE->Word1) << ")\n");
-    }
-  }
-  return false;
-}
-
-
-bool RuntimeDyldMachO::
-loadSegment64(const MachOObject *Obj,
-              const MachOObject::LoadCommandInfo *SegmentLCI,
-              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
-  InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
-  Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
-  if (!Segment64LC)
-    return Error("unable to load segment load command");
-
-  for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
-    InMemoryStruct<macho::Section64> Sect;
-    Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
-    if (!Sect)
-      return Error("unable to load section: '" + Twine(SectNum) + "'");
-
-    // FIXME: For the time being, we're only loading text segments.
-    if (Sect->Flags != 0x80000400)
-      continue;
-
-    // Address and names of symbols in the section.
-    typedef std::pair<uint64_t, StringRef> SymbolEntry;
-    SmallVector<SymbolEntry, 64> Symbols;
-    // Index of all the names, in this section or not. Used when we're
-    // dealing with relocation entries.
-    SmallVector<StringRef, 64> SymbolNames;
-    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
-      InMemoryStruct<macho::Symbol64TableEntry> STE;
-      Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
-      if (!STE)
-        return Error("unable to read symbol: '" + Twine(i) + "'");
-      if (STE->SectionIndex > Segment64LC->NumSections)
-        return Error("invalid section index for symbol: '" + Twine(i) + "'");
-      // Get the symbol name.
-      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
-      SymbolNames.push_back(Name);
-
-      // Just skip symbols not defined in this section.
-      if ((unsigned)STE->SectionIndex - 1 != SectNum)
-        continue;
-
-      // FIXME: Check the symbol type and flags.
-      if (STE->Type != 0xF)  // external, defined in this section.
-        continue;
-      if (STE->Flags != 0x0)
-        continue;
-
-      // Remember the symbol.
-      Symbols.push_back(SymbolEntry(STE->Value, Name));
-
-      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
-            (Sect->Address + STE->Value) << "\n");
+void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel,
+                                            const ObjectFile &Obj,
+                                            ObjSectionToIDMap &ObjSectionToID,
+                                            LocalSymbolMap &Symbols,
+                                            StubMap &Stubs) {
+
+  uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL);
+  RelocationValueRef Value;
+  SectionEntry &Section = Sections[Rel.SectionID];
+  uint8_t *Target = Section.Address + Rel.Offset;
+
+  bool isExtern = (RelType >> 27) & 1;
+  if (isExtern) {
+    StringRef TargetName;
+    const SymbolRef &Symbol = Rel.Symbol;
+    Symbol.getName(TargetName);
+    // First look the symbol in object file symbols.
+    LocalSymbolMap::iterator lsi = Symbols.find(TargetName.data());
+    if (lsi != Symbols.end()) {
+      Value.SectionID = lsi->second.first;
+      Value.Addend = lsi->second.second;
+    } else {
+      // Second look the symbol in global symbol table.
+      StringMap<SymbolLoc>::iterator gsi = SymbolTable.find(TargetName.data());
+      if (gsi != SymbolTable.end()) {
+        Value.SectionID = gsi->second.first;
+        Value.Addend = gsi->second.second;
+      } else
+        Value.SymbolName = TargetName.data();
     }
-    // Sort the symbols by address, just in case they didn't come in that way.
-    array_pod_sort(Symbols.begin(), Symbols.end());
-
-    // If there weren't any functions (odd, but just in case...)
-    if (!Symbols.size())
-      continue;
-
-    // Extract the function data.
-    uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
-                                           Segment64LC->FileSize).data();
-    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
-      uint64_t StartOffset = Sect->Address + Symbols[i].first;
-      uint64_t EndOffset = Symbols[i + 1].first - 1;
-      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
-                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
-      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+  } else {
+    error_code err;
+    uint8_t sectionIndex = static_cast<uint8_t>(RelType & 0xFF);
+    section_iterator si = Obj.begin_sections(),
+                     se = Obj.end_sections();
+    for (uint8_t i = 1; i < sectionIndex; i++) {
+      error_code err;
+      si.increment(err);
+      if (si == se)
+        break;
     }
-    // The last symbol we do after since the end address is calculated
-    // differently because there is no next symbol to reference.
-    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
-    uint64_t EndOffset = Sect->Size - 1;
-    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
-                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
-    extractFunction(Symbols[Symbols.size()-1].second,
-                    Base + StartOffset, Base + EndOffset);
-
-    // Now extract the relocation information for each function and process it.
-    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
-      InMemoryStruct<macho::RelocationEntry> RE;
-      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
-      if (RE->Word0 & macho::RF_Scattered)
-        return Error("NOT YET IMPLEMENTED: scattered relocations.");
-      // Word0 of the relocation is the offset into the section where the
-      // relocation should be applied. We need to translate that into an
-      // offset into a function since that's our atom.
-      uint32_t Offset = RE->Word0;
-      // Look for the function containing the address. This is used for JIT
-      // code, so the number of functions in section is almost always going
-      // to be very small (usually just one), so until we have use cases
-      // where that's not true, just use a trivial linear search.
-      unsigned SymbolNum;
-      unsigned NumSymbols = Symbols.size();
-      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
-             "No symbol containing relocation!");
-      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
-        if (Symbols[SymbolNum + 1].first > Offset)
-          break;
-      // Adjust the offset to be relative to the symbol.
-      Offset -= Symbols[SymbolNum].first;
-      // Get the name of the symbol containing the relocation.
-      StringRef TargetName = SymbolNames[SymbolNum];
-
-      bool isExtern = (RE->Word1 >> 27) & 1;
-      // Figure out the source symbol of the relocation. If isExtern is true,
-      // this relocation references the symbol table, otherwise it references
-      // a section in the same object, numbered from 1 through NumSections
-      // (SectionBases is [0, NumSections-1]).
-      if (!isExtern)
-        return Error("Internal relocations not supported.");
-      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
-      StringRef SourceName = SymbolNames[SourceNum];
-
-      // FIXME: Get the relocation addend from the target address.
-
-      // Now store the relocation information. Associate it with the source
-      // symbol.
-      Relocations[SourceName].push_back(RelocationEntry(TargetName,
-                                                        Offset,
-                                                        RE->Word1,
-                                                        0 /*Addend*/));
-      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
-                   << " from '" << SourceName << "(Word1: "
-                   << format("0x%x", RE->Word1) << ")\n");
+    assert(si != se && "No section containing relocation!");
+    Value.SectionID = findOrEmitSection(*si, true, ObjSectionToID);
+    Value.Addend = *(const intptr_t *)Target;
+    if (Value.Addend) {
+      // The MachO addend is offset from the current section, we need set it
+      // as offset from destination section
+      Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress;
     }
   }
-  return false;
-}
-
-bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) {
-  // If the linker is in an error state, don't do anything.
-  if (hasError())
-    return true;
-  // Load the Mach-O wrapper object.
-  std::string ErrorStr;
-  OwningPtr<MachOObject> Obj(
-    MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
-  if (!Obj)
-    return Error("unable to load object: '" + ErrorStr + "'");
-
-  // Get the CPU type information from the header.
-  const macho::Header &Header = Obj->getHeader();
-
-  // FIXME: Error checking that the loaded object is compatible with
-  //        the system we're running on.
-  CPUType = Header.CPUType;
-  CPUSubtype = Header.CPUSubtype;
 
-  // Validate that the load commands match what we expect.
-  const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
-    *DysymtabLCI = 0;
-  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
-    const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
-    switch (LCI.Command.Type) {
-    case macho::LCT_Segment:
-    case macho::LCT_Segment64:
-      if (SegmentLCI)
-        return Error("unexpected input object (multiple segments)");
-      SegmentLCI = &LCI;
-      break;
-    case macho::LCT_Symtab:
-      if (SymtabLCI)
-        return Error("unexpected input object (multiple symbol tables)");
-      SymtabLCI = &LCI;
-      break;
-    case macho::LCT_Dysymtab:
-      if (DysymtabLCI)
-        return Error("unexpected input object (multiple symbol tables)");
-      DysymtabLCI = &LCI;
-      break;
-    default:
-      return Error("unexpected input object (unexpected load command");
+  if (Arch == Triple::arm && RelType == macho::RIT_ARM_Branch24Bit) {
+    // This is an ARM branch relocation, need to use a stub function.
+
+    //  Look up for existing stub.
+    StubMap::const_iterator i = Stubs.find(Value);
+    if (i != Stubs.end())
+      resolveRelocation(Target, (uint64_t)Target,
+                        (uint64_t)Section.Address + i->second,
+                        RelType, 0);
+    else {
+      // Create a new stub function.
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+                                                   Section.StubOffset);
+      AddRelocation(Value, Rel.SectionID, StubTargetAddr - Section.Address,
+                    macho::RIT_Vanilla);
+      resolveRelocation(Target, (uint64_t)Target,
+                        (uint64_t)Section.Address + Section.StubOffset,
+                        RelType, 0);
+      Section.StubOffset += getMaxStubSize();
     }
-  }
-
-  if (!SymtabLCI)
-    return Error("no symbol table found in object");
-  if (!SegmentLCI)
-    return Error("no symbol table found in object");
-
-  // Read and register the symbol table data.
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
-  Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
-  if (!SymtabLC)
-    return Error("unable to load symbol table load command");
-  Obj->RegisterStringTable(*SymtabLC);
-
-  // Read the dynamic link-edit information, if present (not present in static
-  // objects).
-  if (DysymtabLCI) {
-    InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
-    Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
-    if (!DysymtabLC)
-      return Error("unable to load dynamic link-exit load command");
-
-    // FIXME: We don't support anything interesting yet.
-//    if (DysymtabLC->LocalSymbolsIndex != 0)
-//      return Error("NOT YET IMPLEMENTED: local symbol entries");
-//    if (DysymtabLC->ExternalSymbolsIndex != 0)
-//      return Error("NOT YET IMPLEMENTED: non-external symbol entries");
-//    if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
-//      return Error("NOT YET IMPLEMENTED: undefined symbol entries");
-  }
-
-  // Load the segment load command.
-  if (SegmentLCI->Command.Type == macho::LCT_Segment) {
-    if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
-      return true;
-  } else {
-    if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
-      return true;
-  }
-
-  return false;
+  } else
+    AddRelocation(Value, Rel.SectionID, Rel.Offset, RelType);
 }
 
-// Assign an address to a symbol name and resolve all the relocations
-// associated with it.
-void RuntimeDyldMachO::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
-  // Assign the address in our symbol table.
-  SymbolTable[Name] = Addr;
-
-  RelocationList &Relocs = Relocations[Name];
-  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-    RelocationEntry &RE = Relocs[i];
-    uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
-    bool isPCRel = (RE.Data >> 24) & 1;
-    unsigned Type = (RE.Data >> 28) & 0xf;
-    unsigned Size = 1 << ((RE.Data >> 25) & 3);
-
-    DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
-          << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
-          << " from '" << Name << " (" << format("%p", Addr) << ")"
-          << "(" << (isPCRel ? "pcrel" : "absolute")
-          << ", type: " << Type << ", Size: " << Size << ").\n");
-
-    resolveRelocation(Target, Addr, isPCRel, Type, Size);
-    RE.isResolved = true;
-  }
-}
 
-bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) {
+bool RuntimeDyldMachO::isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
   StringRef Magic = InputBuffer->getBuffer().slice(0, 4);
   if (Magic == "\xFE\xED\xFA\xCE") return true;
   if (Magic == "\xCE\xFA\xED\xFE") return true;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
new file mode 100644
index 000000000000..898b85190e71
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -0,0 +1,70 @@
+//===-- RuntimeDyldMachO.h - Run-time dynamic linker for MC-JIT ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachO support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_MACHO_H
+#define LLVM_RUNTIME_DYLD_MACHO_H
+
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/Support/Format.h"
+#include "RuntimeDyldImpl.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+
+namespace llvm {
+class RuntimeDyldMachO : public RuntimeDyldImpl {
+protected:
+  bool resolveI386Relocation(uint8_t *LocalAddress,
+                             uint64_t FinalAddress,
+                             uint64_t Value,
+                             bool isPCRel,
+                             unsigned Type,
+                             unsigned Size,
+                             int64_t Addend);
+  bool resolveX86_64Relocation(uint8_t *LocalAddress,
+                               uint64_t FinalAddress,
+                               uint64_t Value,
+                               bool isPCRel,
+                               unsigned Type,
+                               unsigned Size,
+                               int64_t Addend);
+  bool resolveARMRelocation(uint8_t *LocalAddress,
+                            uint64_t FinalAddress,
+                            uint64_t Value,
+                            bool isPCRel,
+                            unsigned Type,
+                            unsigned Size,
+                            int64_t Addend);
+
+  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+                                    const ObjectFile &Obj,
+                                    ObjSectionToIDMap &ObjSectionToID,
+                                    LocalSymbolMap &Symbols, StubMap &Stubs);
+
+public:
+  virtual void resolveRelocation(uint8_t *LocalAddress,
+                                 uint64_t FinalAddress,
+                                 uint64_t Value,
+                                 uint32_t Type,
+                                 int64_t Addend);
+                                 
+  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+  bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index 004b8656bf22..42364f9b70f7 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -7,9 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This just asks the TargetRegistry for the appropriate JIT to use, and allows
-// the user to specify a specific one on the commandline with -march=x. Clients
-// should initialize targets prior to calling createJIT.
+// This just asks the TargetRegistry for the appropriate target to use, and
+// allows the user to specify a specific one on the commandline with -march=x,
+// -mcpu=y, and -mattr=a,-b,+c. Clients should initialize targets prior to
+// calling selectTarget().
 //
 //===----------------------------------------------------------------------===//
 
@@ -21,21 +22,27 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
+TargetMachine *EngineBuilder::selectTarget() {
+  StringRef MArch = "";
+  StringRef MCPU = "";
+  SmallVector<std::string, 1> MAttrs;
+  Triple TT(M->getTargetTriple());
+
+  return selectTarget(TT, MArch, MCPU, MAttrs);
+}
+
 /// selectTarget - Pick a target either via -march or by guessing the native
 /// arch.  Add any CPU features specified via -mcpu or -mattr.
-TargetMachine *EngineBuilder::selectTarget(Module *Mod,
+TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
                               StringRef MArch,
                               StringRef MCPU,
-                              const SmallVectorImpl<std::string>& MAttrs,
-                              Reloc::Model RM,
-                              CodeModel::Model CM,
-                              std::string *ErrorStr) {
-  Triple TheTriple(Mod->getTargetTriple());
+                              const SmallVectorImpl<std::string>& MAttrs) {
+  Triple TheTriple(TargetTriple);
   if (TheTriple.getTriple().empty())
-    TheTriple.setTriple(sys::getHostTriple());
+    TheTriple.setTriple(sys::getDefaultTargetTriple());
 
   // Adjust the triple to match what the user requested.
   const Target *TheTarget = 0;
@@ -55,7 +62,7 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
     }
 
     // Adjust the triple to match (if known), otherwise stick with the
-    // module/host triple.
+    // requested/host triple.
     Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
     if (Type != Triple::UnknownArch)
       TheTriple.setArch(Type);
@@ -69,12 +76,6 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
     }
   }
 
-  if (!TheTarget->hasJIT()) {
-    errs() << "WARNING: This target JIT is not designed for the host you are"
-           << " running.  If bad things happen, please choose a different "
-           << "-march switch.\n";
-  }
-
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
   if (!MAttrs.empty()) {
@@ -87,7 +88,9 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
   // Allocate a target...
   TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(),
                                                          MCPU, FeaturesStr,
-                                                         RM, CM);
+                                                         Options,
+                                                         RelocModel, CMModel,
+                                                         OptLevel);
   assert(Target && "Could not allocate target machine!");
   return Target;
 }
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
new file mode 100644
index 000000000000..e22b8cd406b2
--- /dev/null
+++ b/lib/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/LLVMBuild.txt --------------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore
+
+[component_0]
+type = Group
+name = Libraries
+parent = $ROOT
diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt
index 4d8824bfcb3f..0b6d2f4218e3 100644
--- a/lib/Linker/CMakeLists.txt
+++ b/lib/Linker/CMakeLists.txt
@@ -4,11 +4,3 @@ add_llvm_library(LLVMLinker
   LinkModules.cpp
   Linker.cpp
   )
-
-add_llvm_library_dependencies(LLVMLinker
-  LLVMArchive
-  LLVMBitReader
-  LLVMCore
-  LLVMSupport
-  LLVMTransformUtils
-  )
diff --git a/lib/Linker/LLVMBuild.txt b/lib/Linker/LLVMBuild.txt
new file mode 100644
index 000000000000..2b4c232b8067
--- /dev/null
+++ b/lib/Linker/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Linker/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Linker
+parent = Libraries
+required_libraries = Archive BitReader Core Support TransformUtils
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
index 2c4ed7fdc17a..c16d1958cdfb 100644
--- a/lib/Linker/LinkArchives.cpp
+++ b/lib/Linker/LinkArchives.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Module.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/Bitcode/Archive.h"
-#include "llvm/Config/config.h"
 #include <memory>
 #include <set>
 using namespace llvm;
@@ -141,7 +140,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
     // Find the modules we need to link into the target module.  Note that arch
     // keeps ownership of these modules and may return the same Module* from a
     // subsequent call.
-    std::set<Module*> Modules;
+    SmallVector<Module*, 16> Modules;
     if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
       return error("Cannot find symbols in '" + Filename.str() + 
                    "': " + ErrMsg);
@@ -158,7 +157,7 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
         UndefinedSymbols.end());
 
     // Loop over all the Modules that we got back from the archive
-    for (std::set<Module*>::iterator I=Modules.begin(), E=Modules.end();
+    for (SmallVectorImpl<Module*>::iterator I=Modules.begin(), E=Modules.end();
          I != E; ++I) {
 
       // Get the module we must link in.
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 03a962e3be5d..765fcc88235b 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -16,11 +16,16 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include <cctype>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -38,11 +43,16 @@ class TypeMapTy : public ValueMapTypeRemapper {
   /// case we need to roll back.
   SmallVector<Type*, 16> SpeculativeTypes;
   
-  /// DefinitionsToResolve - This is a list of non-opaque structs in the source
-  /// module that are mapped to an opaque struct in the destination module.
-  SmallVector<StructType*, 16> DefinitionsToResolve;
-public:
+  /// SrcDefinitionsToResolve - This is a list of non-opaque structs in the
+  /// source module that are mapped to an opaque struct in the destination
+  /// module.
+  SmallVector<StructType*, 16> SrcDefinitionsToResolve;
   
+  /// DstResolvedOpaqueTypes - This is the set of opaque types in the
+  /// destination modules who are getting a body from the source module.
+  SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
+
+public:
   /// addTypeMapping - Indicate that the specified type in the destination
   /// module is conceptually equivalent to the specified type in the source
   /// module.
@@ -58,6 +68,18 @@ public:
 
   FunctionType *get(FunctionType *T) {return cast<FunctionType>(get((Type*)T));}
 
+  /// dump - Dump out the type map for debugging purposes.
+  void dump() const {
+    for (DenseMap<Type*, Type*>::const_iterator
+           I = MappedTypes.begin(), E = MappedTypes.end(); I != E; ++I) {
+      dbgs() << "TypeMap: ";
+      I->first->dump();
+      dbgs() << " => ";
+      I->second->dump();
+      dbgs() << '\n';
+    }
+  }
+
 private:
   Type *getImpl(Type *T);
   /// remapType - Implement the ValueMapTypeRemapper interface.
@@ -118,11 +140,17 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
       return true;
     }
 
-    // Mapping a non-opaque source type to an opaque dest.  Keep the dest, but
-    // fill it in later.  This doesn't need to be speculative.
+    // Mapping a non-opaque source type to an opaque dest.  If this is the first
+    // type that we're mapping onto this destination type then we succeed.  Keep
+    // the dest, but fill it in later.  This doesn't need to be speculative.  If
+    // this is the second (different) type that we're trying to map onto the
+    // same opaque type then we fail.
     if (cast<StructType>(DstTy)->isOpaque()) {
+      // We can only map one source type onto the opaque destination type.
+      if (!DstResolvedOpaqueTypes.insert(cast<StructType>(DstTy)))
+        return false;
+      SrcDefinitionsToResolve.push_back(SSTy);
       Entry = DstTy;
-      DefinitionsToResolve.push_back(SSTy);
       return true;
     }
   }
@@ -137,6 +165,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
   if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
     if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
       return false;
+    
   } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
     if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
       return false;
@@ -174,9 +203,9 @@ void TypeMapTy::linkDefinedTypeBodies() {
   SmallString<16> TmpName;
   
   // Note that processing entries in this loop (calling 'get') can add new
-  // entries to the DefinitionsToResolve vector.
-  while (!DefinitionsToResolve.empty()) {
-    StructType *SrcSTy = DefinitionsToResolve.pop_back_val();
+  // entries to the SrcDefinitionsToResolve vector.
+  while (!SrcDefinitionsToResolve.empty()) {
+    StructType *SrcSTy = SrcDefinitionsToResolve.pop_back_val();
     StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
     
     // TypeMap is a many-to-one mapping, if there were multiple types that
@@ -204,16 +233,17 @@ void TypeMapTy::linkDefinedTypeBodies() {
       TmpName.clear();
     }
   }
+  
+  DstResolvedOpaqueTypes.clear();
 }
 
-
 /// get - Return the mapped type to use for the specified input type from the
 /// source module.
 Type *TypeMapTy::get(Type *Ty) {
   Type *Result = getImpl(Ty);
   
   // If this caused a reference to any struct type, resolve it before returning.
-  if (!DefinitionsToResolve.empty())
+  if (!SrcDefinitionsToResolve.empty())
     linkDefinedTypeBodies();
   return Result;
 }
@@ -252,7 +282,7 @@ Type *TypeMapTy::getImpl(Type *Ty) {
     
     // Otherwise, rebuild a modified type.
     switch (Ty->getTypeID()) {
-    default: assert(0 && "unknown derived type to remap");
+    default: llvm_unreachable("unknown derived type to remap");
     case Type::ArrayTyID:
       return *Entry = ArrayType::get(ElementTypes[0],
                                      cast<ArrayType>(Ty)->getNumElements());
@@ -304,12 +334,12 @@ Type *TypeMapTy::getImpl(Type *Ty) {
   
   // Otherwise we create a new type and resolve its body later.  This will be
   // resolved by the top level of get().
-  DefinitionsToResolve.push_back(STy);
-  return *Entry = StructType::create(STy->getContext());
+  SrcDefinitionsToResolve.push_back(STy);
+  StructType *DTy = StructType::create(STy->getContext());
+  DstResolvedOpaqueTypes.insert(DTy);
+  return *Entry = DTy;
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // ModuleLinker implementation.
 //===----------------------------------------------------------------------===//
@@ -341,6 +371,9 @@ namespace {
     // Set of items not to link in from source.
     SmallPtrSet<const Value*, 16> DoNotLinkFromSource;
     
+    // Vector of functions to lazily link in.
+    std::vector<Function*> LazilyLinkFunctions;
+    
   public:
     std::string ErrorMsg;
     
@@ -360,7 +393,9 @@ namespace {
     /// getLinkageResult - This analyzes the two global values and determines
     /// what the result will look like in the destination module.
     bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
-                          GlobalValue::LinkageTypes &LT, bool &LinkFromSrc);
+                          GlobalValue::LinkageTypes &LT,
+                          GlobalValue::VisibilityTypes &Vis,
+                          bool &LinkFromSrc);
 
     /// getLinkedToGlobal - Given a global in the source module, return the
     /// global in the destination module that is being linked to, if any.
@@ -384,11 +419,19 @@ namespace {
     }
     
     void computeTypeMapping();
+    bool categorizeModuleFlagNodes(const NamedMDNode *ModFlags,
+                                   DenseMap<MDString*, MDNode*> &ErrorNode,
+                                   DenseMap<MDString*, MDNode*> &WarningNode,
+                                   DenseMap<MDString*, MDNode*> &OverrideNode,
+                                   DenseMap<MDString*,
+                                   SmallSetVector<MDNode*, 8> > &RequireNodes,
+                                   SmallSetVector<MDString*, 16> &SeenIDs);
     
     bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV);
     bool linkGlobalProto(GlobalVariable *SrcGV);
     bool linkFunctionProto(Function *SrcF);
     bool linkAliasProto(GlobalAlias *SrcA);
+    bool linkModuleFlagsMetadata();
     
     void linkAppendingVarInit(const AppendingVarInfo &AVI);
     void linkGlobalInits();
@@ -398,8 +441,6 @@ namespace {
   };
 }
 
-
-
 /// forceRenaming - The LLVM SymbolTable class autorenames globals that conflict
 /// in the symbol table.  This is good for all clients except for us.  Go
 /// through the trouble to force this back.
@@ -421,9 +462,9 @@ static void forceRenaming(GlobalValue *GV, StringRef Name) {
   }
 }
 
-/// CopyGVAttributes - copy additional attributes (those not needed to construct
+/// copyGVAttributes - copy additional attributes (those not needed to construct
 /// a GlobalValue) from the SrcGV to the DestGV.
-static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
+static void copyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
   // Use the maximum alignment, rather than just copying the alignment of SrcGV.
   unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment());
   DestGV->copyAttributesFrom(SrcGV);
@@ -432,21 +473,33 @@ static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
   forceRenaming(DestGV, SrcGV->getName());
 }
 
+static bool isLessConstraining(GlobalValue::VisibilityTypes a,
+                               GlobalValue::VisibilityTypes b) {
+  if (a == GlobalValue::HiddenVisibility)
+    return false;
+  if (b == GlobalValue::HiddenVisibility)
+    return true;
+  if (a == GlobalValue::ProtectedVisibility)
+    return false;
+  if (b == GlobalValue::ProtectedVisibility)
+    return true;
+  return false;
+}
+
 /// getLinkageResult - This analyzes the two global values and determines what
 /// the result will look like in the destination module.  In particular, it
-/// computes the resultant linkage type, computes whether the global in the
-/// source should be copied over to the destination (replacing the existing
-/// one), and computes whether this linkage is an error or not. It also performs
-/// visibility checks: we cannot link together two symbols with different
-/// visibilities.
+/// computes the resultant linkage type and visibility, computes whether the
+/// global in the source should be copied over to the destination (replacing
+/// the existing one), and computes whether this linkage is an error or not.
 bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
-                                    GlobalValue::LinkageTypes &LT, 
+                                    GlobalValue::LinkageTypes &LT,
+                                    GlobalValue::VisibilityTypes &Vis,
                                     bool &LinkFromSrc) {
   assert(Dest && "Must have two globals being queried");
   assert(!Src->hasLocalLinkage() &&
          "If Src has internal linkage, Dest shouldn't be set!");
   
-  bool SrcIsDeclaration = Src->isDeclaration();
+  bool SrcIsDeclaration = Src->isDeclaration() && !Src->isMaterializable();
   bool DestIsDeclaration = Dest->isDeclaration();
   
   if (SrcIsDeclaration) {
@@ -502,13 +555,10 @@ bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
                  "': symbol multiply defined!");
   }
 
-  // Check visibility
-  if (Src->getVisibility() != Dest->getVisibility() &&
-      !SrcIsDeclaration && !DestIsDeclaration &&
-      !Src->hasAvailableExternallyLinkage() &&
-      !Dest->hasAvailableExternallyLinkage())
-    return emitError("Linking globals named '" + Src->getName() +
-                   "': symbols have different visibilities!");
+  // Compute the visibility. We follow the rules in the System V Application
+  // Binary Interface.
+  Vis = isLessConstraining(Src->getVisibility(), Dest->getVisibility()) ?
+    Dest->getVisibility() : Src->getVisibility();
   return false;
 }
 
@@ -539,7 +589,54 @@ void ModuleLinker::computeTypeMapping() {
     if (GlobalValue *DGV = getLinkedToGlobal(I))
       TypeMap.addTypeMapping(DGV->getType(), I->getType());
   }
-  
+
+  // Incorporate types by name, scanning all the types in the source module.
+  // At this point, the destination module may have a type "%foo = { i32 }" for
+  // example.  When the source module got loaded into the same LLVMContext, if
+  // it had the same type, it would have been renamed to "%foo.42 = { i32 }".
+  std::vector<StructType*> SrcStructTypes;
+  SrcM->findUsedStructTypes(SrcStructTypes);
+  SmallPtrSet<StructType*, 32> SrcStructTypesSet(SrcStructTypes.begin(),
+                                                 SrcStructTypes.end());
+
+  std::vector<StructType*> DstStructTypes;
+  DstM->findUsedStructTypes(DstStructTypes);
+  SmallPtrSet<StructType*, 32> DstStructTypesSet(DstStructTypes.begin(),
+                                                 DstStructTypes.end());
+
+  for (unsigned i = 0, e = SrcStructTypes.size(); i != e; ++i) {
+    StructType *ST = SrcStructTypes[i];
+    if (!ST->hasName()) continue;
+    
+    // Check to see if there is a dot in the name followed by a digit.
+    size_t DotPos = ST->getName().rfind('.');
+    if (DotPos == 0 || DotPos == StringRef::npos ||
+        ST->getName().back() == '.' || !isdigit(ST->getName()[DotPos+1]))
+      continue;
+    
+    // Check to see if the destination module has a struct with the prefix name.
+    if (StructType *DST = DstM->getTypeByName(ST->getName().substr(0, DotPos)))
+      // Don't use it if this actually came from the source module. They're in
+      // the same LLVMContext after all. Also don't use it unless the type is
+      // actually used in the destination module. This can happen in situations
+      // like this:
+      //
+      //      Module A                         Module B
+      //      --------                         --------
+      //   %Z = type { %A }                %B = type { %C.1 }
+      //   %A = type { %B.1, [7 x i8] }    %C.1 = type { i8* }
+      //   %B.1 = type { %C }              %A.2 = type { %B.3, [5 x i8] }
+      //   %C = type { i8* }               %B.3 = type { %C.1 }
+      //
+      // When we link Module B with Module A, the '%B' in Module B is
+      // used. However, that would then use '%C.1'. But when we process '%C.1',
+      // we prefer to take the '%C' version. So we are then left with both
+      // '%C.1' and '%C' being used for the same types. This leads to some
+      // variables using one type and some using the other.
+      if (!SrcStructTypesSet.count(DST) && DstStructTypesSet.count(DST))
+        TypeMap.addTypeMapping(DST, ST);
+  }
+
   // Don't bother incorporating aliases, they aren't generally typed well.
   
   // Now that we have discovered all of the type equivalences, get a body for
@@ -590,7 +687,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
                        DstGV->getType()->getAddressSpace());
   
   // Propagate alignment, visibility and section info.
-  CopyGVAttributes(NG, DstGV);
+  copyGVAttributes(NG, DstGV);
   
   AppendingVarInfo AVI;
   AVI.NewGV = NG;
@@ -615,6 +712,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
 /// merge them into the dest module.
 bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
   GlobalValue *DGV = getLinkedToGlobal(SGV);
+  llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
 
   if (DGV) {
     // Concatenation of appending linkage variables is magic and handled later.
@@ -624,9 +722,11 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
     // Determine whether linkage of these two globals follows the source
     // module's definition or the destination module's definition.
     GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+    GlobalValue::VisibilityTypes NV;
     bool LinkFromSrc = false;
-    if (getLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc))
+    if (getLinkageResult(DGV, SGV, NewLinkage, NV, LinkFromSrc))
       return true;
+    NewVisibility = NV;
 
     // If we're not linking from the source, then keep the definition that we
     // have.
@@ -636,9 +736,10 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
         if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
           DGVar->setConstant(true);
       
-      // Set calculated linkage.
+      // Set calculated linkage and visibility.
       DGV->setLinkage(NewLinkage);
-      
+      DGV->setVisibility(*NewVisibility);
+
       // Make sure to remember this mapping.
       ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
       
@@ -660,7 +761,9 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
                        SGV->isThreadLocal(),
                        SGV->getType()->getAddressSpace());
   // Propagate alignment, visibility and section info.
-  CopyGVAttributes(NewDGV, SGV);
+  copyGVAttributes(NewDGV, SGV);
+  if (NewVisibility)
+    NewDGV->setVisibility(*NewVisibility);
 
   if (DGV) {
     DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
@@ -676,17 +779,21 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
 /// destination module if needed, setting up mapping information.
 bool ModuleLinker::linkFunctionProto(Function *SF) {
   GlobalValue *DGV = getLinkedToGlobal(SF);
+  llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
 
   if (DGV) {
     GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
     bool LinkFromSrc = false;
-    if (getLinkageResult(DGV, SF, NewLinkage, LinkFromSrc))
+    GlobalValue::VisibilityTypes NV;
+    if (getLinkageResult(DGV, SF, NewLinkage, NV, LinkFromSrc))
       return true;
-    
+    NewVisibility = NV;
+
     if (!LinkFromSrc) {
       // Set calculated linkage
       DGV->setLinkage(NewLinkage);
-      
+      DGV->setVisibility(*NewVisibility);
+
       // Make sure to remember this mapping.
       ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
       
@@ -702,12 +809,21 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
   // bring SF over.
   Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()),
                                      SF->getLinkage(), SF->getName(), DstM);
-  CopyGVAttributes(NewDF, SF);
+  copyGVAttributes(NewDF, SF);
+  if (NewVisibility)
+    NewDF->setVisibility(*NewVisibility);
 
   if (DGV) {
     // Any uses of DF need to change to NewDF, with cast.
     DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
     DGV->eraseFromParent();
+  } else {
+    // Internal, LO_ODR, or LO linkage - stick in set to ignore and lazily link.
+    if (SF->hasLocalLinkage() || SF->hasLinkOnceLinkage() ||
+        SF->hasAvailableExternallyLinkage()) {
+      DoNotLinkFromSource.insert(SF);
+      LazilyLinkFunctions.push_back(SF);
+    }
   }
   
   ValueMap[SF] = NewDF;
@@ -718,17 +834,21 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
 /// source module.
 bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
   GlobalValue *DGV = getLinkedToGlobal(SGA);
-  
+  llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility;
+
   if (DGV) {
     GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+    GlobalValue::VisibilityTypes NV;
     bool LinkFromSrc = false;
-    if (getLinkageResult(DGV, SGA, NewLinkage, LinkFromSrc))
+    if (getLinkageResult(DGV, SGA, NewLinkage, NV, LinkFromSrc))
       return true;
-    
+    NewVisibility = NV;
+
     if (!LinkFromSrc) {
       // Set calculated linkage.
       DGV->setLinkage(NewLinkage);
-      
+      DGV->setVisibility(*NewVisibility);
+
       // Make sure to remember this mapping.
       ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
       
@@ -744,7 +864,9 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
   GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()),
                                        SGA->getLinkage(), SGA->getName(),
                                        /*aliasee*/0, DstM);
-  CopyGVAttributes(NewDA, SGA);
+  copyGVAttributes(NewDA, SGA);
+  if (NewVisibility)
+    NewDA->setVisibility(*NewVisibility);
 
   if (DGV) {
     // Any uses of DGV need to change to NewDA, with cast.
@@ -756,36 +878,27 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
   return false;
 }
 
+static void getArrayElements(Constant *C, SmallVectorImpl<Constant*> &Dest) {
+  unsigned NumElements = cast<ArrayType>(C->getType())->getNumElements();
+
+  for (unsigned i = 0; i != NumElements; ++i)
+    Dest.push_back(C->getAggregateElement(i));
+}
+                             
 void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
   // Merge the initializer.
   SmallVector<Constant*, 16> Elements;
-  if (ConstantArray *I = dyn_cast<ConstantArray>(AVI.DstInit)) {
-    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-      Elements.push_back(I->getOperand(i));
-  } else {
-    assert(isa<ConstantAggregateZero>(AVI.DstInit));
-    ArrayType *DstAT = cast<ArrayType>(AVI.DstInit->getType());
-    Type *EltTy = DstAT->getElementType();
-    Elements.append(DstAT->getNumElements(), Constant::getNullValue(EltTy));
-  }
+  getArrayElements(AVI.DstInit, Elements);
   
   Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap);
-  if (const ConstantArray *I = dyn_cast<ConstantArray>(SrcInit)) {
-    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-      Elements.push_back(I->getOperand(i));
-  } else {
-    assert(isa<ConstantAggregateZero>(SrcInit));
-    ArrayType *SrcAT = cast<ArrayType>(SrcInit->getType());
-    Type *EltTy = SrcAT->getElementType();
-    Elements.append(SrcAT->getNumElements(), Constant::getNullValue(EltTy));
-  }
+  getArrayElements(SrcInit, Elements);
+  
   ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
   AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements));
 }
 
-
-// linkGlobalInits - Update the initializers in the Dest module now that all
-// globals that may be referenced are in Dest.
+/// linkGlobalInits - Update the initializers in the Dest module now that all
+/// globals that may be referenced are in Dest.
 void ModuleLinker::linkGlobalInits() {
   // Loop over all of the globals in the src module, mapping them over as we go
   for (Module::const_global_iterator I = SrcM->global_begin(),
@@ -802,9 +915,9 @@ void ModuleLinker::linkGlobalInits() {
   }
 }
 
-// linkFunctionBody - Copy the source function over into the dest function and
-// fix up references to values.  At this point we know that Dest is an external
-// function, and that Src is not.
+/// linkFunctionBody - Copy the source function over into the dest function and
+/// fix up references to values.  At this point we know that Dest is an external
+/// function, and that Src is not.
 void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
   assert(Src && Dst && Dst->isDeclaration() && !Src->isDeclaration());
 
@@ -833,7 +946,7 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
   } else {
     // Clone the body of the function into the dest function.
     SmallVector<ReturnInst*, 8> Returns; // Ignore returns.
-    CloneFunctionInto(Dst, Src, ValueMap, false, Returns);
+    CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", NULL, &TypeMap);
   }
   
   // There is no need to map the arguments anymore.
@@ -843,7 +956,7 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
   
 }
 
-
+/// linkAliasBodies - Insert all of the aliases in Src into the Dest module.
 void ModuleLinker::linkAliasBodies() {
   for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end();
        I != E; ++I) {
@@ -856,11 +969,14 @@ void ModuleLinker::linkAliasBodies() {
   }
 }
 
-/// linkNamedMDNodes - Insert all of the named mdnodes in Src into the Dest
+/// linkNamedMDNodes - Insert all of the named MDNodes in Src into the Dest
 /// module.
 void ModuleLinker::linkNamedMDNodes() {
+  const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
   for (Module::const_named_metadata_iterator I = SrcM->named_metadata_begin(),
        E = SrcM->named_metadata_end(); I != E; ++I) {
+    // Don't link module flags here. Do them separately.
+    if (&*I == SrcModFlags) continue;
     NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(I->getName());
     // Add Src elements into Dest node.
     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
@@ -868,10 +984,176 @@ void ModuleLinker::linkNamedMDNodes() {
                                    RF_None, &TypeMap));
   }
 }
+
+/// categorizeModuleFlagNodes - Categorize the module flags according to their
+/// type: Error, Warning, Override, and Require.
+bool ModuleLinker::
+categorizeModuleFlagNodes(const NamedMDNode *ModFlags,
+                          DenseMap<MDString*, MDNode*> &ErrorNode,
+                          DenseMap<MDString*, MDNode*> &WarningNode,
+                          DenseMap<MDString*, MDNode*> &OverrideNode,
+                          DenseMap<MDString*,
+                            SmallSetVector<MDNode*, 8> > &RequireNodes,
+                          SmallSetVector<MDString*, 16> &SeenIDs) {
+  bool HasErr = false;
+
+  for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
+    MDNode *Op = ModFlags->getOperand(I);
+    assert(Op->getNumOperands() == 3 && "Invalid module flag metadata!");
+    assert(isa<ConstantInt>(Op->getOperand(0)) &&
+           "Module flag's first operand must be an integer!");
+    assert(isa<MDString>(Op->getOperand(1)) &&
+           "Module flag's second operand must be an MDString!");
+
+    ConstantInt *Behavior = cast<ConstantInt>(Op->getOperand(0));
+    MDString *ID = cast<MDString>(Op->getOperand(1));
+    Value *Val = Op->getOperand(2);
+    switch (Behavior->getZExtValue()) {
+    default:
+      assert(false && "Invalid behavior in module flag metadata!");
+      break;
+    case Module::Error: {
+      MDNode *&ErrNode = ErrorNode[ID];
+      if (!ErrNode) ErrNode = Op;
+      if (ErrNode->getOperand(2) != Val)
+        HasErr = emitError("linking module flags '" + ID->getString() +
+                           "': IDs have conflicting values");
+      break;
+    }
+    case Module::Warning: {
+      MDNode *&WarnNode = WarningNode[ID];
+      if (!WarnNode) WarnNode = Op;
+      if (WarnNode->getOperand(2) != Val)
+        errs() << "WARNING: linking module flags '" << ID->getString()
+               << "': IDs have conflicting values";
+      break;
+    }
+    case Module::Require:  RequireNodes[ID].insert(Op);     break;
+    case Module::Override: {
+      MDNode *&OvrNode = OverrideNode[ID];
+      if (!OvrNode) OvrNode = Op;
+      if (OvrNode->getOperand(2) != Val)
+        HasErr = emitError("linking module flags '" + ID->getString() +
+                           "': IDs have conflicting override values");
+      break;
+    }
+    }
+
+    SeenIDs.insert(ID);
+  }
+
+  return HasErr;
+}
+
+/// linkModuleFlagsMetadata - Merge the linker flags in Src into the Dest
+/// module.
+bool ModuleLinker::linkModuleFlagsMetadata() {
+  const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
+  if (!SrcModFlags) return false;
+
+  NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata();
+
+  // If the destination module doesn't have module flags yet, then just copy
+  // over the source module's flags.
+  if (DstModFlags->getNumOperands() == 0) {
+    for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
+      DstModFlags->addOperand(SrcModFlags->getOperand(I));
+
+    return false;
+  }
+
+  bool HasErr = false;
+
+  // Otherwise, we have to merge them based on their behaviors. First,
+  // categorize all of the nodes in the modules' module flags. If an error or
+  // warning occurs, then emit the appropriate message(s).
+  DenseMap<MDString*, MDNode*> ErrorNode;
+  DenseMap<MDString*, MDNode*> WarningNode;
+  DenseMap<MDString*, MDNode*> OverrideNode;
+  DenseMap<MDString*, SmallSetVector<MDNode*, 8> > RequireNodes;
+  SmallSetVector<MDString*, 16> SeenIDs;
+
+  HasErr |= categorizeModuleFlagNodes(SrcModFlags, ErrorNode, WarningNode,
+                                      OverrideNode, RequireNodes, SeenIDs);
+  HasErr |= categorizeModuleFlagNodes(DstModFlags, ErrorNode, WarningNode,
+                                      OverrideNode, RequireNodes, SeenIDs);
+
+  // Check that there isn't both an error and warning node for a flag.
+  for (SmallSetVector<MDString*, 16>::iterator
+         I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
+    MDString *ID = *I;
+    if (ErrorNode[ID] && WarningNode[ID])
+      HasErr = emitError("linking module flags '" + ID->getString() +
+                         "': IDs have conflicting behaviors");
+  }
+
+  // Early exit if we had an error.
+  if (HasErr) return true;
+
+  // Get the destination's module flags ready for new operands.
+  DstModFlags->dropAllReferences();
+
+  // Add all of the module flags to the destination module.
+  DenseMap<MDString*, SmallVector<MDNode*, 4> > AddedNodes;
+  for (SmallSetVector<MDString*, 16>::iterator
+         I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
+    MDString *ID = *I;
+    if (OverrideNode[ID]) {
+      DstModFlags->addOperand(OverrideNode[ID]);
+      AddedNodes[ID].push_back(OverrideNode[ID]);
+    } else if (ErrorNode[ID]) {
+      DstModFlags->addOperand(ErrorNode[ID]);
+      AddedNodes[ID].push_back(ErrorNode[ID]);
+    } else if (WarningNode[ID]) {
+      DstModFlags->addOperand(WarningNode[ID]);
+      AddedNodes[ID].push_back(WarningNode[ID]);
+    }
+
+    for (SmallSetVector<MDNode*, 8>::iterator
+           II = RequireNodes[ID].begin(), IE = RequireNodes[ID].end();
+         II != IE; ++II)
+      DstModFlags->addOperand(*II);
+  }
+
+  // Now check that all of the requirements have been satisfied.
+  for (SmallSetVector<MDString*, 16>::iterator
+         I = SeenIDs.begin(), E = SeenIDs.end(); I != E; ++I) {
+    MDString *ID = *I;
+    SmallSetVector<MDNode*, 8> &Set = RequireNodes[ID];
+
+    for (SmallSetVector<MDNode*, 8>::iterator
+           II = Set.begin(), IE = Set.end(); II != IE; ++II) {
+      MDNode *Node = *II;
+      assert(isa<MDNode>(Node->getOperand(2)) &&
+             "Module flag's third operand must be an MDNode!");
+      MDNode *Val = cast<MDNode>(Node->getOperand(2));
+
+      MDString *ReqID = cast<MDString>(Val->getOperand(0));
+      Value *ReqVal = Val->getOperand(1);
+
+      bool HasValue = false;
+      for (SmallVectorImpl<MDNode*>::iterator
+             RI = AddedNodes[ReqID].begin(), RE = AddedNodes[ReqID].end();
+           RI != RE; ++RI) {
+        MDNode *ReqNode = *RI;
+        if (ReqNode->getOperand(2) == ReqVal) {
+          HasValue = true;
+          break;
+        }
+      }
+
+      if (!HasValue)
+        HasErr = emitError("linking module flags '" + ReqID->getString() +
+                           "': does not have the required value");
+    }
+  }
+
+  return HasErr;
+}
   
 bool ModuleLinker::run() {
-  assert(DstM && "Null Destination module");
-  assert(SrcM && "Null Source Module");
+  assert(DstM && "Null destination module");
+  assert(SrcM && "Null source module");
 
   // Inherit the target data from the source module if the destination module
   // doesn't have one already.
@@ -951,7 +1233,6 @@ bool ModuleLinker::run() {
   // Link in the function bodies that are defined in the source module into
   // DstM.
   for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) {
-    
     // Skip if not linking from source.
     if (DoNotLinkFromSource.count(SF)) continue;
     
@@ -964,16 +1245,70 @@ bool ModuleLinker::run() {
     }
     
     linkFunctionBody(cast<Function>(ValueMap[SF]), SF);
+    SF->Dematerialize();
   }
 
   // Resolve all uses of aliases with aliasees.
   linkAliasBodies();
 
-  // Remap all of the named mdnoes in Src into the DstM module. We do this
+  // Remap all of the named MDNodes in Src into the DstM module. We do this
   // after linking GlobalValues so that MDNodes that reference GlobalValues
   // are properly remapped.
   linkNamedMDNodes();
 
+  // Merge the module flags into the DstM module.
+  if (linkModuleFlagsMetadata())
+    return true;
+
+  // Process vector of lazily linked in functions.
+  bool LinkedInAnyFunctions;
+  do {
+    LinkedInAnyFunctions = false;
+    
+    for(std::vector<Function*>::iterator I = LazilyLinkFunctions.begin(),
+        E = LazilyLinkFunctions.end(); I != E; ++I) {
+      if (!*I)
+        continue;
+      
+      Function *SF = *I;
+      Function *DF = cast<Function>(ValueMap[SF]);
+      
+      if (!DF->use_empty()) {
+        
+        // Materialize if necessary.
+        if (SF->isDeclaration()) {
+          if (!SF->isMaterializable())
+            continue;
+          if (SF->Materialize(&ErrorMsg))
+            return true;
+        }
+        
+        // Link in function body.
+        linkFunctionBody(DF, SF);
+        SF->Dematerialize();
+
+        // "Remove" from vector by setting the element to 0.
+        *I = 0;
+        
+        // Set flag to indicate we may have more functions to lazily link in
+        // since we linked in a function.
+        LinkedInAnyFunctions = true;
+      }
+    }
+  } while (LinkedInAnyFunctions);
+  
+  // Remove any prototypes of functions that were not actually linked in.
+  for(std::vector<Function*>::iterator I = LazilyLinkFunctions.begin(),
+      E = LazilyLinkFunctions.end(); I != E; ++I) {
+    if (!*I)
+      continue;
+    
+    Function *SF = *I;
+    Function *DF = cast<Function>(ValueMap[SF]);
+    if (DF->use_empty())
+      DF->eraseFromParent();
+  }
+  
   // Now that all of the types from the source are used, resolve any structs
   // copied over to the dest that didn't exist there.
   TypeMap.linkDefinedTypeBodies();
@@ -985,11 +1320,11 @@ bool ModuleLinker::run() {
 // LinkModules entrypoint.
 //===----------------------------------------------------------------------===//
 
-// LinkModules - This function links two modules together, with the resulting
-// left module modified to be the composite of the two input modules.  If an
-// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
-// the problem.  Upon failure, the Dest module could be in a modified state, and
-// shouldn't be relied on to be consistent.
+/// LinkModules - This function links two modules together, with the resulting
+/// left module modified to be the composite of the two input modules.  If an
+/// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
+/// the problem.  Upon failure, the Dest module could be in a modified state,
+/// and shouldn't be relied on to be consistent.
 bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode, 
                          std::string *ErrorMsg) {
   ModuleLinker TheLinker(Dest, Src, Mode);
@@ -997,6 +1332,6 @@ bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode,
     if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
     return true;
   }
-  
+
   return false;
 }
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index 59fbceb6a308..7c6cf4f3dd78 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Config/config.h"
 #include "llvm/Support/system_error.h"
 using namespace llvm;
 
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index a4ac1bf60529..f11e686fd104 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -20,7 +20,6 @@ add_llvm_library(LLVMMC
   MCInstPrinter.cpp
   MCInstrAnalysis.cpp
   MCLabel.cpp
-  MCLoggingStreamer.cpp
   MCMachOStreamer.cpp
   MCMachObjectTargetWriter.cpp
   MCModule.cpp
@@ -45,10 +44,5 @@ add_llvm_library(LLVMMC
   WinCOFFStreamer.cpp
   )
 
-add_llvm_library_dependencies(LLVMMC
-  LLVMObject
-  LLVMSupport
-  )
-
 add_subdirectory(MCParser)
 add_subdirectory(MCDisassembler)
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 3d16de5604f6..9fc33b6b3e5e 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -11,26 +11,26 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "ELFObjectWriter.h"
+#include "MCELF.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ELF.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
-
-#include "../Target/X86/MCTargetDesc/X86FixupKinds.h"
-#include "../Target/ARM/MCTargetDesc/ARMFixupKinds.h"
-#include "../Target/PowerPC/MCTargetDesc/PPCFixupKinds.h"
 
 #include <vector>
 using namespace llvm;
@@ -38,6 +38,304 @@ using namespace llvm;
 #undef  DEBUG_TYPE
 #define DEBUG_TYPE "reloc-info"
 
+namespace {
+class ELFObjectWriter : public MCObjectWriter {
+  protected:
+
+    static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
+    static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
+    static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
+    static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
+                           bool Used, bool Renamed);
+    static bool isLocal(const MCSymbolData &Data, bool isSignature,
+                        bool isUsedInReloc);
+    static bool IsELFMetaDataSection(const MCSectionData &SD);
+    static uint64_t DataSectionSize(const MCSectionData &SD);
+    static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
+                                       const MCSectionData &SD);
+    static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
+                                          const MCSectionData &SD);
+
+    void WriteDataSectionData(MCAssembler &Asm,
+                              const MCAsmLayout &Layout,
+                              const MCSectionELF &Section);
+
+    /*static bool isFixupKindX86RIPRel(unsigned Kind) {
+      return Kind == X86::reloc_riprel_4byte ||
+        Kind == X86::reloc_riprel_4byte_movq_load;
+    }*/
+
+    /// ELFSymbolData - Helper struct for containing some precomputed
+    /// information on symbols.
+    struct ELFSymbolData {
+      MCSymbolData *SymbolData;
+      uint64_t StringIndex;
+      uint32_t SectionIndex;
+
+      // Support lexicographic sorting.
+      bool operator<(const ELFSymbolData &RHS) const {
+        if (MCELF::GetType(*SymbolData) == ELF::STT_FILE)
+          return true;
+        if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE)
+          return false;
+        return SymbolData->getSymbol().getName() <
+               RHS.SymbolData->getSymbol().getName();
+      }
+    };
+
+    /// The target specific ELF writer instance.
+    llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
+
+    SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
+    SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
+    DenseMap<const MCSymbol *, const MCSymbol *> Renames;
+
+    llvm::DenseMap<const MCSectionData*,
+                   std::vector<ELFRelocationEntry> > Relocations;
+    DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
+
+    /// @}
+    /// @name Symbol Table Data
+    /// @{
+
+    SmallString<256> StringTable;
+    std::vector<ELFSymbolData> LocalSymbolData;
+    std::vector<ELFSymbolData> ExternalSymbolData;
+    std::vector<ELFSymbolData> UndefinedSymbolData;
+
+    /// @}
+
+    bool NeedsGOT;
+
+    bool NeedsSymtabShndx;
+
+    // This holds the symbol table index of the last local symbol.
+    unsigned LastLocalSymbolIndex;
+    // This holds the .strtab section index.
+    unsigned StringTableIndex;
+    // This holds the .symtab section index.
+    unsigned SymbolTableIndex;
+
+    unsigned ShstrtabIndex;
+
+
+    const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
+                                  const MCValue &Target,
+                                  const MCFragment &F,
+                                  const MCFixup &Fixup,
+                                  bool IsPCRel) const;
+
+    // TargetObjectWriter wrappers.
+    const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                   const MCValue &Target,
+                                   const MCFragment &F,
+                                   const MCFixup &Fixup,
+                                   bool IsPCRel) const {
+      return TargetObjectWriter->ExplicitRelSym(Asm, Target, F, Fixup, IsPCRel);
+    }
+
+    bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+    bool hasRelocationAddend() const {
+      return TargetObjectWriter->hasRelocationAddend();
+    }
+    unsigned getEFlags() const {
+      return TargetObjectWriter->getEFlags();
+    }
+    unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                          bool IsPCRel, bool IsRelocWithSymbol,
+                          int64_t Addend) const {
+      return TargetObjectWriter->GetRelocType(Target, Fixup, IsPCRel,
+                                              IsRelocWithSymbol, Addend);
+    }
+
+
+  public:
+    ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                    raw_ostream &_OS, bool IsLittleEndian)
+      : MCObjectWriter(_OS, IsLittleEndian),
+        TargetObjectWriter(MOTW),
+        NeedsGOT(false), NeedsSymtabShndx(false){
+    }
+
+    virtual ~ELFObjectWriter();
+
+    void WriteWord(uint64_t W) {
+      if (is64Bit())
+        Write64(W);
+      else
+        Write32(W);
+    }
+
+    void StringLE16(char *buf, uint16_t Value) {
+      buf[0] = char(Value >> 0);
+      buf[1] = char(Value >> 8);
+    }
+
+    void StringLE32(char *buf, uint32_t Value) {
+      StringLE16(buf, uint16_t(Value >> 0));
+      StringLE16(buf + 2, uint16_t(Value >> 16));
+    }
+
+    void StringLE64(char *buf, uint64_t Value) {
+      StringLE32(buf, uint32_t(Value >> 0));
+      StringLE32(buf + 4, uint32_t(Value >> 32));
+    }
+
+    void StringBE16(char *buf ,uint16_t Value) {
+      buf[0] = char(Value >> 8);
+      buf[1] = char(Value >> 0);
+    }
+
+    void StringBE32(char *buf, uint32_t Value) {
+      StringBE16(buf, uint16_t(Value >> 16));
+      StringBE16(buf + 2, uint16_t(Value >> 0));
+    }
+
+    void StringBE64(char *buf, uint64_t Value) {
+      StringBE32(buf, uint32_t(Value >> 32));
+      StringBE32(buf + 4, uint32_t(Value >> 0));
+    }
+
+    void String8(MCDataFragment &F, uint8_t Value) {
+      char buf[1];
+      buf[0] = Value;
+      F.getContents() += StringRef(buf, 1);
+    }
+
+    void String16(MCDataFragment &F, uint16_t Value) {
+      char buf[2];
+      if (isLittleEndian())
+        StringLE16(buf, Value);
+      else
+        StringBE16(buf, Value);
+      F.getContents() += StringRef(buf, 2);
+    }
+
+    void String32(MCDataFragment &F, uint32_t Value) {
+      char buf[4];
+      if (isLittleEndian())
+        StringLE32(buf, Value);
+      else
+        StringBE32(buf, Value);
+      F.getContents() += StringRef(buf, 4);
+    }
+
+    void String64(MCDataFragment &F, uint64_t Value) {
+      char buf[8];
+      if (isLittleEndian())
+        StringLE64(buf, Value);
+      else
+        StringBE64(buf, Value);
+      F.getContents() += StringRef(buf, 8);
+    }
+
+    void WriteHeader(uint64_t SectionDataSize,
+                     unsigned NumberOfSections);
+
+    void WriteSymbolEntry(MCDataFragment *SymtabF,
+                          MCDataFragment *ShndxF,
+                          uint64_t name, uint8_t info,
+                          uint64_t value, uint64_t size,
+                          uint8_t other, uint32_t shndx,
+                          bool Reserved);
+
+    void WriteSymbol(MCDataFragment *SymtabF,  MCDataFragment *ShndxF,
+                     ELFSymbolData &MSD,
+                     const MCAsmLayout &Layout);
+
+    typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
+    void WriteSymbolTable(MCDataFragment *SymtabF,
+                          MCDataFragment *ShndxF,
+                          const MCAssembler &Asm,
+                          const MCAsmLayout &Layout,
+                          const SectionIndexMapTy &SectionIndexMap);
+
+    virtual void RecordRelocation(const MCAssembler &Asm,
+                                  const MCAsmLayout &Layout,
+                                  const MCFragment *Fragment,
+                                  const MCFixup &Fixup,
+                                  MCValue Target, uint64_t &FixedValue);
+
+    uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+                                         const MCSymbol *S);
+
+    // Map from a group section to the signature symbol
+    typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
+    // Map from a signature symbol to the group section
+    typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
+    // Map from a section to the section with the relocations
+    typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
+    // Map from a section to its offset
+    typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
+
+    /// ComputeSymbolTable - Compute the symbol table data
+    ///
+    /// \param StringTable [out] - The string table data.
+    /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+    /// string table.
+    void ComputeSymbolTable(MCAssembler &Asm,
+                            const SectionIndexMapTy &SectionIndexMap,
+                            RevGroupMapTy RevGroupMap,
+                            unsigned NumRegularSections);
+
+    void ComputeIndexMap(MCAssembler &Asm,
+                         SectionIndexMapTy &SectionIndexMap,
+                         const RelMapTy &RelMap);
+
+    void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                  RelMapTy &RelMap);
+
+    void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
+                          const RelMapTy &RelMap);
+
+    void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                SectionIndexMapTy &SectionIndexMap,
+                                const RelMapTy &RelMap);
+
+    // Create the sections that show up in the symbol table. Currently
+    // those are the .note.GNU-stack section and the group sections.
+    void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                               GroupMapTy &GroupMap,
+                               RevGroupMapTy &RevGroupMap,
+                               SectionIndexMapTy &SectionIndexMap,
+                               const RelMapTy &RelMap);
+
+    virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+                                          const MCAsmLayout &Layout);
+
+    void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
+                            const MCAsmLayout &Layout,
+                            const SectionIndexMapTy &SectionIndexMap,
+                            const SectionOffsetMapTy &SectionOffsetMap);
+
+    void ComputeSectionOrder(MCAssembler &Asm,
+                             std::vector<const MCSectionELF*> &Sections);
+
+    void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+                          uint64_t Address, uint64_t Offset,
+                          uint64_t Size, uint32_t Link, uint32_t Info,
+                          uint64_t Alignment, uint64_t EntrySize);
+
+    void WriteRelocationsFragment(const MCAssembler &Asm,
+                                  MCDataFragment *F,
+                                  const MCSectionData *SD);
+
+    virtual bool
+    IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                           const MCSymbolData &DataA,
+                                           const MCFragment &FB,
+                                           bool InSet,
+                                           bool IsPCRel) const;
+
+    virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+    void WriteSection(MCAssembler &Asm,
+                      const SectionIndexMapTy &SectionIndexMap,
+                      uint32_t GroupSymbolIndex,
+                      uint64_t Offset, uint64_t Size, uint64_t Alignment,
+                      const MCSectionELF &Section);
+  };
+}
+
 bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
   const MCFixupKindInfo &FKI =
     Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind);
@@ -92,11 +390,7 @@ void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
 
   Write8(ELF::EV_CURRENT);        // e_ident[EI_VERSION]
   // e_ident[EI_OSABI]
-  switch (TargetObjectWriter->getOSType()) {
-    case Triple::FreeBSD:  Write8(ELF::ELFOSABI_FREEBSD); break;
-    case Triple::Linux:    Write8(ELF::ELFOSABI_LINUX); break;
-    default:               Write8(ELF::ELFOSABI_NONE); break;
-  }
+  Write8(TargetObjectWriter->getOSABI());
   Write8(0);                  // e_ident[EI_ABIVERSION]
 
   WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD);
@@ -112,7 +406,7 @@ void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
             sizeof(ELF::Elf32_Ehdr)));  // e_shoff = sec hdr table off in bytes
 
   // e_flags = whatever the target wants
-  WriteEFlags();
+  Write32(getEFlags());
 
   // e_ehsize = ELF header size
   Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
@@ -181,7 +475,7 @@ uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
     if (const MCExpr *Value = Symbol.getVariableValue()) {
       int64_t IntValue;
       if (Value->EvaluateAsAbsolute(IntValue, Layout))
-	return (uint64_t)IntValue;
+        return (uint64_t)IntValue;
     }
   }
 
@@ -277,7 +571,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
                                        MCDataFragment *ShndxF,
                                        const MCAssembler &Asm,
                                        const MCAsmLayout &Layout,
-                                     const SectionIndexMapTy &SectionIndexMap) {
+                                    const SectionIndexMapTy &SectionIndexMap) {
   // The string table must be emitted first because we need the index
   // into the string table for all the symbol names.
   assert(StringTable.size() && "Missing string table");
@@ -306,7 +600,8 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
         Section.getType() == ELF::SHT_SYMTAB_SHNDX)
       continue;
     WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0,
-                     ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), false);
+                     ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section),
+                     false);
     LastLocalSymbolIndex++;
   }
 
@@ -416,7 +711,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
       // Offset of the symbol in the section
       int64_t a = Layout.getSymbolOffset(&SDB);
 
-      // Ofeset of the relocation in the section
+      // Offset of the relocation in the section
       int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
       Value += b - a;
     }
@@ -445,11 +740,16 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   FixedValue = Value;
   unsigned Type = GetRelocType(Target, Fixup, IsPCRel,
                                (RelocSymbol != 0), Addend);
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+  if (RelocNeedsGOT(Modifier))
+    NeedsGOT = true;
 
   uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
     Fixup.getOffset();
 
-  adjustFixupOffset(Fixup, RelocOffset);
+  // FIXME: no tests cover this. Is adjustFixupOffset dead code?
+  TargetObjectWriter->adjustFixupOffset(Fixup, RelocOffset);
 
   if (!hasRelocationAddend())
     Addend = 0;
@@ -459,7 +759,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   else
     assert(isInt<32>(Addend));
 
-  ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend);
+  ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend, Fixup);
   Relocations[Fragment->getParent()].push_back(ERE);
 }
 
@@ -745,8 +1045,10 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
                                                MCDataFragment *F,
                                                const MCSectionData *SD) {
   std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
-  // sort by the r_offset just like gnu as does
-  array_pod_sort(Relocs.begin(), Relocs.end());
+
+  // Sort the relocation entries. Most targets just sort by r_offset, but some
+  // (e.g., MIPS) have additional constraints.
+  TargetObjectWriter->sortRelocs(Asm, Relocs);
 
   for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
     ELFRelocationEntry entry = Relocs[e - i - 1];
@@ -1053,14 +1355,10 @@ uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
 void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
                                            const MCAsmLayout &Layout,
                                            const MCSectionELF &Section) {
-  uint64_t FileOff = OS.tell();
   const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
 
-  uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment());
+  uint64_t Padding = OffsetToAlignment(OS.tell(), SD.getAlignment());
   WriteZeros(Padding);
-  FileOff += Padding;
-
-  FileOff += GetSectionFileSize(Layout, SD);
 
   if (IsELFMetaDataSection(SD)) {
     for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
@@ -1070,7 +1368,7 @@ void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
       WriteBytes(cast<MCDataFragment>(F).getContents().str());
     }
   } else {
-    Asm.WriteSectionData(&SD, Layout);
+    Asm.writeSectionData(&SD, Layout);
   }
 }
 
@@ -1226,16 +1524,13 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
   for (unsigned i = 0; i < NumRegularSections + 1; ++i)
     WriteDataSectionData(Asm, Layout, *Sections[i]);
 
-  FileOff = OS.tell();
-  uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment);
+  uint64_t Padding = OffsetToAlignment(OS.tell(), NaturalAlignment);
   WriteZeros(Padding);
 
   // ... then the section header table ...
   WriteSectionHeader(Asm, GroupMap, Layout, SectionIndexMap,
                      SectionOffsetMap);
 
-  FileOff = OS.tell();
-
   // ... and then the remaining sections ...
   for (unsigned i = NumRegularSections + 1; i < NumSections; ++i)
     WriteDataSectionData(Asm, Layout, *Sections[i]);
@@ -1256,577 +1551,5 @@ ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
 MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
                                             raw_ostream &OS,
                                             bool IsLittleEndian) {
-  switch (MOTW->getEMachine()) {
-    case ELF::EM_386:
-    case ELF::EM_X86_64:
-      return new X86ELFObjectWriter(MOTW, OS, IsLittleEndian); break;
-    case ELF::EM_ARM:
-      return new ARMELFObjectWriter(MOTW, OS, IsLittleEndian); break;
-    case ELF::EM_MBLAZE:
-      return new MBlazeELFObjectWriter(MOTW, OS, IsLittleEndian); break;
-    case ELF::EM_PPC:
-    case ELF::EM_PPC64:
-      return new PPCELFObjectWriter(MOTW, OS, IsLittleEndian); break;
-    case ELF::EM_MIPS:
-      return new MipsELFObjectWriter(MOTW, OS, IsLittleEndian); break;
-    default: llvm_unreachable("Unsupported architecture"); break;
-  }
-}
-
-
-/// START OF SUBCLASSES for ELFObjectWriter
-//===- ARMELFObjectWriter -------------------------------------------===//
-
-ARMELFObjectWriter::ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                                       raw_ostream &_OS,
-                                       bool IsLittleEndian)
-  : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
-{}
-
-ARMELFObjectWriter::~ARMELFObjectWriter()
-{}
-
-// FIXME: get the real EABI Version from the Triple.
-void ARMELFObjectWriter::WriteEFlags() {
-  Write32(ELF::EF_ARM_EABIMASK & DefaultEABIVersion);
-}
-
-// In ARM, _MergedGlobals and other most symbols get emitted directly.
-// I.e. not as an offset to a section symbol.
-// This code is an approximation of what ARM/gcc does.
-
-STATISTIC(PCRelCount, "Total number of PIC Relocations");
-STATISTIC(NonPCRelCount, "Total number of non-PIC relocations");
-
-const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
-                                                   const MCValue &Target,
-                                                   const MCFragment &F,
-                                                   const MCFixup &Fixup,
-                                                   bool IsPCRel) const {
-  const MCSymbol &Symbol = Target.getSymA()->getSymbol();
-  bool EmitThisSym = false;
-
-  const MCSectionELF &Section =
-    static_cast<const MCSectionELF&>(Symbol.getSection());
-  bool InNormalSection = true;
-  unsigned RelocType = 0;
-  RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel);
-
-  DEBUG(
-      const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
-      MCSymbolRefExpr::VariantKind Kind2;
-      Kind2 = Target.getSymB() ?  Target.getSymB()->getKind() :
-        MCSymbolRefExpr::VK_None;
-      dbgs() << "considering symbol "
-        << Section.getSectionName() << "/"
-        << Symbol.getName() << "/"
-        << " Rel:" << (unsigned)RelocType
-        << " Kind: " << (int)Kind << "/" << (int)Kind2
-        << " Tmp:"
-        << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/"
-        << Symbol.isVariable() << "/" << Symbol.isTemporary()
-        << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n");
-
-  if (IsPCRel) { ++PCRelCount;
-    switch (RelocType) {
-    default:
-      // Most relocation types are emitted as explicit symbols
-      InNormalSection =
-        StringSwitch<bool>(Section.getSectionName())
-        .Case(".data.rel.ro.local", false)
-        .Case(".data.rel", false)
-        .Case(".bss", false)
-        .Default(true);
-      EmitThisSym = true;
-      break;
-    case ELF::R_ARM_ABS32:
-      // But things get strange with R_ARM_ABS32
-      // In this case, most things that go in .rodata show up
-      // as section relative relocations
-      InNormalSection =
-        StringSwitch<bool>(Section.getSectionName())
-        .Case(".data.rel.ro.local", false)
-        .Case(".data.rel", false)
-        .Case(".rodata", false)
-        .Case(".bss", false)
-        .Default(true);
-      EmitThisSym = false;
-      break;
-    }
-  } else {
-    NonPCRelCount++;
-    InNormalSection =
-      StringSwitch<bool>(Section.getSectionName())
-      .Case(".data.rel.ro.local", false)
-      .Case(".rodata", false)
-      .Case(".data.rel", false)
-      .Case(".bss", false)
-      .Default(true);
-
-    switch (RelocType) {
-    default: EmitThisSym = true; break;
-    case ELF::R_ARM_ABS32: EmitThisSym = false; break;
-    }
-  }
-
-  if (EmitThisSym)
-    return &Symbol;
-  if (! Symbol.isTemporary() && InNormalSection) {
-    return &Symbol;
-  }
-  return NULL;
-}
-
-// Need to examine the Fixup when determining whether to 
-// emit the relocation as an explicit symbol or as a section relative
-// offset
-unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
-                                          const MCFixup &Fixup,
-                                          bool IsPCRel,
-                                          bool IsRelocWithSymbol,
-                                          int64_t Addend) {
-  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
-    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
-
-  unsigned Type = GetRelocTypeInner(Target, Fixup, IsPCRel);
-
-  if (RelocNeedsGOT(Modifier))
-    NeedsGOT = true;
-  
-  return Type;
-}
-
-unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
-                                               const MCFixup &Fixup,
-                                               bool IsPCRel) const  {
-  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
-    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
-
-  unsigned Type = 0;
-  if (IsPCRel) {
-    switch ((unsigned)Fixup.getKind()) {
-    default: assert(0 && "Unimplemented");
-    case FK_Data_4:
-      switch (Modifier) {
-      default: llvm_unreachable("Unsupported Modifier");
-      case MCSymbolRefExpr::VK_None:
-        Type = ELF::R_ARM_REL32;
-        break;
-      case MCSymbolRefExpr::VK_ARM_TLSGD:
-        assert(0 && "unimplemented");
-        break;
-      case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
-        Type = ELF::R_ARM_TLS_IE32;
-        break;
-      }
-      break;
-    case ARM::fixup_arm_uncondbranch:
-      switch (Modifier) {
-      case MCSymbolRefExpr::VK_ARM_PLT:
-        Type = ELF::R_ARM_PLT32;
-        break;
-      default:
-        Type = ELF::R_ARM_CALL;
-        break;
-      }
-      break;
-    case ARM::fixup_arm_condbranch:
-      Type = ELF::R_ARM_JUMP24;
-      break;
-    case ARM::fixup_arm_movt_hi16:
-    case ARM::fixup_arm_movt_hi16_pcrel:
-      Type = ELF::R_ARM_MOVT_PREL;
-      break;
-    case ARM::fixup_arm_movw_lo16:
-    case ARM::fixup_arm_movw_lo16_pcrel:
-      Type = ELF::R_ARM_MOVW_PREL_NC;
-      break;
-    case ARM::fixup_t2_movt_hi16:
-    case ARM::fixup_t2_movt_hi16_pcrel:
-      Type = ELF::R_ARM_THM_MOVT_PREL;
-      break;
-    case ARM::fixup_t2_movw_lo16:
-    case ARM::fixup_t2_movw_lo16_pcrel:
-      Type = ELF::R_ARM_THM_MOVW_PREL_NC;
-      break;
-    case ARM::fixup_arm_thumb_bl:
-    case ARM::fixup_arm_thumb_blx:
-      switch (Modifier) {
-      case MCSymbolRefExpr::VK_ARM_PLT:
-        Type = ELF::R_ARM_THM_CALL;
-        break;
-      default:
-        Type = ELF::R_ARM_NONE;
-        break;
-      }
-      break;
-    }
-  } else {
-    switch ((unsigned)Fixup.getKind()) {
-    default: llvm_unreachable("invalid fixup kind!");
-    case FK_Data_4:
-      switch (Modifier) {
-      default: llvm_unreachable("Unsupported Modifier"); break;
-      case MCSymbolRefExpr::VK_ARM_GOT:
-        Type = ELF::R_ARM_GOT_BREL;
-        break;
-      case MCSymbolRefExpr::VK_ARM_TLSGD:
-        Type = ELF::R_ARM_TLS_GD32;
-        break;
-      case MCSymbolRefExpr::VK_ARM_TPOFF:
-        Type = ELF::R_ARM_TLS_LE32;
-        break;
-      case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
-        Type = ELF::R_ARM_TLS_IE32;
-        break;
-      case MCSymbolRefExpr::VK_None:
-        Type = ELF::R_ARM_ABS32;
-        break;
-      case MCSymbolRefExpr::VK_ARM_GOTOFF:
-        Type = ELF::R_ARM_GOTOFF32;
-        break;
-      }
-      break;
-    case ARM::fixup_arm_ldst_pcrel_12:
-    case ARM::fixup_arm_pcrel_10:
-    case ARM::fixup_arm_adr_pcrel_12:
-    case ARM::fixup_arm_thumb_bl:
-    case ARM::fixup_arm_thumb_cb:
-    case ARM::fixup_arm_thumb_cp:
-    case ARM::fixup_arm_thumb_br:
-      assert(0 && "Unimplemented");
-      break;
-    case ARM::fixup_arm_uncondbranch:
-      Type = ELF::R_ARM_CALL;
-      break;
-    case ARM::fixup_arm_condbranch:
-      Type = ELF::R_ARM_JUMP24;
-      break;
-    case ARM::fixup_arm_movt_hi16:
-      Type = ELF::R_ARM_MOVT_ABS;
-      break;
-    case ARM::fixup_arm_movw_lo16:
-      Type = ELF::R_ARM_MOVW_ABS_NC;
-      break;
-    case ARM::fixup_t2_movt_hi16:
-      Type = ELF::R_ARM_THM_MOVT_ABS;
-      break;
-    case ARM::fixup_t2_movw_lo16:
-      Type = ELF::R_ARM_THM_MOVW_ABS_NC;
-      break;
-    }
-  }
-
-  return Type;
-}
-
-//===- PPCELFObjectWriter -------------------------------------------===//
-
-PPCELFObjectWriter::PPCELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                                             raw_ostream &_OS,
-                                             bool IsLittleEndian)
-  : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {
-}
-
-PPCELFObjectWriter::~PPCELFObjectWriter() {
-}
-
-unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
-                                             const MCFixup &Fixup,
-                                             bool IsPCRel,
-                                             bool IsRelocWithSymbol,
-                                             int64_t Addend) {
-  // determine the type of the relocation
-  unsigned Type;
-  if (IsPCRel) {
-    switch ((unsigned)Fixup.getKind()) {
-    default:
-      llvm_unreachable("Unimplemented");
-    case PPC::fixup_ppc_br24:
-      Type = ELF::R_PPC_REL24;
-      break;
-    case FK_PCRel_4:
-      Type = ELF::R_PPC_REL32;
-      break;
-    }
-  } else {
-    switch ((unsigned)Fixup.getKind()) {
-      default: llvm_unreachable("invalid fixup kind!");
-    case PPC::fixup_ppc_br24:
-      Type = ELF::R_PPC_ADDR24;
-      break;
-    case PPC::fixup_ppc_brcond14:
-      Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_
-      break;
-    case PPC::fixup_ppc_ha16:
-      Type = ELF::R_PPC_ADDR16_HA;
-      break;
-    case PPC::fixup_ppc_lo16:
-      Type = ELF::R_PPC_ADDR16_LO;
-      break;
-    case PPC::fixup_ppc_lo14:
-      Type = ELF::R_PPC_ADDR14;
-      break;
-    case FK_Data_4:
-      Type = ELF::R_PPC_ADDR32;
-      break;
-    case FK_Data_2:
-      Type = ELF::R_PPC_ADDR16;
-      break;
-    }
-  }
-  return Type;
-}
-
-void
-PPCELFObjectWriter::adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
-  switch ((unsigned)Fixup.getKind()) {
-    case PPC::fixup_ppc_ha16:
-    case PPC::fixup_ppc_lo16:
-      RelocOffset += 2;
-      break;
-    default:
-      break;
-  }
-}
-
-//===- MBlazeELFObjectWriter -------------------------------------------===//
-
-MBlazeELFObjectWriter::MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                                             raw_ostream &_OS,
-                                             bool IsLittleEndian)
-  : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {
-}
-
-MBlazeELFObjectWriter::~MBlazeELFObjectWriter() {
-}
-
-unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target,
-                                             const MCFixup &Fixup,
-                                             bool IsPCRel,
-                                             bool IsRelocWithSymbol,
-                                             int64_t Addend) {
-  // determine the type of the relocation
-  unsigned Type;
-  if (IsPCRel) {
-    switch ((unsigned)Fixup.getKind()) {
-    default:
-      llvm_unreachable("Unimplemented");
-    case FK_PCRel_4:
-      Type = ELF::R_MICROBLAZE_64_PCREL;
-      break;
-    case FK_PCRel_2:
-      Type = ELF::R_MICROBLAZE_32_PCREL;
-      break;
-    }
-  } else {
-    switch ((unsigned)Fixup.getKind()) {
-    default: llvm_unreachable("invalid fixup kind!");
-    case FK_Data_4:
-      Type = ((IsRelocWithSymbol || Addend !=0)
-              ? ELF::R_MICROBLAZE_32
-              : ELF::R_MICROBLAZE_64);
-      break;
-    case FK_Data_2:
-      Type = ELF::R_MICROBLAZE_32;
-      break;
-    }
-  }
-  return Type;
-}
-
-//===- X86ELFObjectWriter -------------------------------------------===//
-
-
-X86ELFObjectWriter::X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                                       raw_ostream &_OS,
-                                       bool IsLittleEndian)
-  : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
-{}
-
-X86ELFObjectWriter::~X86ELFObjectWriter()
-{}
-
-unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
-                                          const MCFixup &Fixup,
-                                          bool IsPCRel,
-                                          bool IsRelocWithSymbol,
-                                          int64_t Addend) {
-  // determine the type of the relocation
-
-  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
-    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
-  unsigned Type;
-  if (is64Bit()) {
-    if (IsPCRel) {
-      switch ((unsigned)Fixup.getKind()) {
-      default: llvm_unreachable("invalid fixup kind!");
-
-      case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
-      case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
-      case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
-
-      case FK_PCRel_8:
-        assert(Modifier == MCSymbolRefExpr::VK_None);
-        Type = ELF::R_X86_64_PC64;
-        break;
-      case X86::reloc_signed_4byte:
-      case X86::reloc_riprel_4byte_movq_load:
-      case X86::reloc_riprel_4byte:
-      case FK_PCRel_4:
-        switch (Modifier) {
-        default:
-          llvm_unreachable("Unimplemented");
-        case MCSymbolRefExpr::VK_None:
-          Type = ELF::R_X86_64_PC32;
-          break;
-        case MCSymbolRefExpr::VK_PLT:
-          Type = ELF::R_X86_64_PLT32;
-          break;
-        case MCSymbolRefExpr::VK_GOTPCREL:
-          Type = ELF::R_X86_64_GOTPCREL;
-          break;
-        case MCSymbolRefExpr::VK_GOTTPOFF:
-          Type = ELF::R_X86_64_GOTTPOFF;
-        break;
-        case MCSymbolRefExpr::VK_TLSGD:
-          Type = ELF::R_X86_64_TLSGD;
-          break;
-        case MCSymbolRefExpr::VK_TLSLD:
-          Type = ELF::R_X86_64_TLSLD;
-          break;
-        }
-        break;
-      case FK_PCRel_2:
-        assert(Modifier == MCSymbolRefExpr::VK_None);
-        Type = ELF::R_X86_64_PC16;
-        break;
-      case FK_PCRel_1:
-        assert(Modifier == MCSymbolRefExpr::VK_None);
-        Type = ELF::R_X86_64_PC8;
-        break;
-      }
-    } else {
-      switch ((unsigned)Fixup.getKind()) {
-      default: llvm_unreachable("invalid fixup kind!");
-      case FK_Data_8: Type = ELF::R_X86_64_64; break;
-      case X86::reloc_signed_4byte:
-        switch (Modifier) {
-        default:
-          llvm_unreachable("Unimplemented");
-        case MCSymbolRefExpr::VK_None:
-          Type = ELF::R_X86_64_32S;
-          break;
-        case MCSymbolRefExpr::VK_GOT:
-          Type = ELF::R_X86_64_GOT32;
-          break;
-        case MCSymbolRefExpr::VK_GOTPCREL:
-          Type = ELF::R_X86_64_GOTPCREL;
-          break;
-        case MCSymbolRefExpr::VK_TPOFF:
-          Type = ELF::R_X86_64_TPOFF32;
-          break;
-        case MCSymbolRefExpr::VK_DTPOFF:
-          Type = ELF::R_X86_64_DTPOFF32;
-          break;
-        }
-        break;
-      case FK_Data_4:
-        Type = ELF::R_X86_64_32;
-        break;
-      case FK_Data_2: Type = ELF::R_X86_64_16; break;
-      case FK_PCRel_1:
-      case FK_Data_1: Type = ELF::R_X86_64_8; break;
-      }
-    }
-  } else {
-    if (IsPCRel) {
-      switch (Modifier) {
-      default:
-        llvm_unreachable("Unimplemented");
-      case MCSymbolRefExpr::VK_None:
-        Type = ELF::R_386_PC32;
-        break;
-      case MCSymbolRefExpr::VK_PLT:
-        Type = ELF::R_386_PLT32;
-        break;
-      }
-    } else {
-      switch ((unsigned)Fixup.getKind()) {
-      default: llvm_unreachable("invalid fixup kind!");
-
-      case X86::reloc_global_offset_table:
-        Type = ELF::R_386_GOTPC;
-        break;
-
-      // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
-      // instead?
-      case X86::reloc_signed_4byte:
-      case FK_PCRel_4:
-      case FK_Data_4:
-        switch (Modifier) {
-        default:
-          llvm_unreachable("Unimplemented");
-        case MCSymbolRefExpr::VK_None:
-          Type = ELF::R_386_32;
-          break;
-        case MCSymbolRefExpr::VK_GOT:
-          Type = ELF::R_386_GOT32;
-          break;
-        case MCSymbolRefExpr::VK_GOTOFF:
-          Type = ELF::R_386_GOTOFF;
-          break;
-        case MCSymbolRefExpr::VK_TLSGD:
-          Type = ELF::R_386_TLS_GD;
-          break;
-        case MCSymbolRefExpr::VK_TPOFF:
-          Type = ELF::R_386_TLS_LE_32;
-          break;
-        case MCSymbolRefExpr::VK_INDNTPOFF:
-          Type = ELF::R_386_TLS_IE;
-          break;
-        case MCSymbolRefExpr::VK_NTPOFF:
-          Type = ELF::R_386_TLS_LE;
-          break;
-        case MCSymbolRefExpr::VK_GOTNTPOFF:
-          Type = ELF::R_386_TLS_GOTIE;
-          break;
-        case MCSymbolRefExpr::VK_TLSLDM:
-          Type = ELF::R_386_TLS_LDM;
-          break;
-        case MCSymbolRefExpr::VK_DTPOFF:
-          Type = ELF::R_386_TLS_LDO_32;
-          break;
-        case MCSymbolRefExpr::VK_GOTTPOFF:
-          Type = ELF::R_386_TLS_IE_32;
-          break;
-        }
-        break;
-      case FK_Data_2: Type = ELF::R_386_16; break;
-      case FK_PCRel_1:
-      case FK_Data_1: Type = ELF::R_386_8; break;
-      }
-    }
-  }
-
-  if (RelocNeedsGOT(Modifier))
-    NeedsGOT = true;
-
-  return Type;
-}
-
-MipsELFObjectWriter::MipsELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                                         raw_ostream &_OS,
-                                         bool IsLittleEndian)
-  : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {}
-
-MipsELFObjectWriter::~MipsELFObjectWriter() {}
-
-unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
-                                           const MCFixup &Fixup,
-                                           bool IsPCRel,
-                                           bool IsRelocWithSymbol,
-                                           int64_t Addend) {
-  // tbd
-  return 1;
+  return new ELFObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/lib/MC/ELFObjectWriter.h b/lib/MC/ELFObjectWriter.h
deleted file mode 100644
index 862b085c76bf..000000000000
--- a/lib/MC/ELFObjectWriter.h
+++ /dev/null
@@ -1,446 +0,0 @@
-//===- lib/MC/ELFObjectWriter.h - ELF File Writer -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF object file writer information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_ELFOBJECTWRITER_H
-#define LLVM_MC_ELFOBJECTWRITER_H
-
-#include "MCELF.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
-
-#include <vector>
-
-namespace llvm {
-
-class MCSection;
-class MCDataFragment;
-class MCSectionELF;
-
-class ELFObjectWriter : public MCObjectWriter {
-  protected:
-
-    static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
-    static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
-    static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
-    static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
-                           bool Used, bool Renamed);
-    static bool isLocal(const MCSymbolData &Data, bool isSignature,
-                        bool isUsedInReloc);
-    static bool IsELFMetaDataSection(const MCSectionData &SD);
-    static uint64_t DataSectionSize(const MCSectionData &SD);
-    static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
-                                       const MCSectionData &SD);
-    static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
-                                          const MCSectionData &SD);
-
-    void WriteDataSectionData(MCAssembler &Asm,
-                              const MCAsmLayout &Layout,
-                              const MCSectionELF &Section);
-
-    /*static bool isFixupKindX86RIPRel(unsigned Kind) {
-      return Kind == X86::reloc_riprel_4byte ||
-        Kind == X86::reloc_riprel_4byte_movq_load;
-    }*/
-
-    /// ELFSymbolData - Helper struct for containing some precomputed
-    /// information on symbols.
-    struct ELFSymbolData {
-      MCSymbolData *SymbolData;
-      uint64_t StringIndex;
-      uint32_t SectionIndex;
-
-      // Support lexicographic sorting.
-      bool operator<(const ELFSymbolData &RHS) const {
-        if (MCELF::GetType(*SymbolData) == ELF::STT_FILE)
-          return true;
-        if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE)
-          return false;
-        return SymbolData->getSymbol().getName() <
-               RHS.SymbolData->getSymbol().getName();
-      }
-    };
-
-    /// @name Relocation Data
-    /// @{
-
-    struct ELFRelocationEntry {
-      // Make these big enough for both 32-bit and 64-bit
-      uint64_t r_offset;
-      int Index;
-      unsigned Type;
-      const MCSymbol *Symbol;
-      uint64_t r_addend;
-
-      ELFRelocationEntry()
-        : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {}
-
-      ELFRelocationEntry(uint64_t RelocOffset, int Idx,
-                         unsigned RelType, const MCSymbol *Sym,
-                         uint64_t Addend)
-        : r_offset(RelocOffset), Index(Idx), Type(RelType),
-          Symbol(Sym), r_addend(Addend) {}
-
-      // Support lexicographic sorting.
-      bool operator<(const ELFRelocationEntry &RE) const {
-        return RE.r_offset < r_offset;
-      }
-    };
-
-    /// The target specific ELF writer instance.
-    llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
-
-    SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
-    SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
-    DenseMap<const MCSymbol *, const MCSymbol *> Renames;
-
-    llvm::DenseMap<const MCSectionData*,
-                   std::vector<ELFRelocationEntry> > Relocations;
-    DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
-
-    /// @}
-    /// @name Symbol Table Data
-    /// @{
-
-    SmallString<256> StringTable;
-    std::vector<ELFSymbolData> LocalSymbolData;
-    std::vector<ELFSymbolData> ExternalSymbolData;
-    std::vector<ELFSymbolData> UndefinedSymbolData;
-
-    /// @}
-
-    bool NeedsGOT;
-
-    bool NeedsSymtabShndx;
-
-    // This holds the symbol table index of the last local symbol.
-    unsigned LastLocalSymbolIndex;
-    // This holds the .strtab section index.
-    unsigned StringTableIndex;
-    // This holds the .symtab section index.
-    unsigned SymbolTableIndex;
-
-    unsigned ShstrtabIndex;
-
-
-    virtual const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
-                                          const MCValue &Target,
-                                          const MCFragment &F,
-                                          const MCFixup &Fixup,
-                                          bool IsPCRel) const;
-
-    // For arch-specific emission of explicit reloc symbol
-    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
-                                           const MCValue &Target,
-                                           const MCFragment &F,
-                                           const MCFixup &Fixup,
-                                           bool IsPCRel) const {
-      return NULL;
-    }
-
-    bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
-    bool hasRelocationAddend() const {
-      return TargetObjectWriter->hasRelocationAddend();
-    }
-
-  public:
-    ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                    raw_ostream &_OS, bool IsLittleEndian)
-      : MCObjectWriter(_OS, IsLittleEndian),
-        TargetObjectWriter(MOTW),
-        NeedsGOT(false), NeedsSymtabShndx(false){
-    }
-
-    virtual ~ELFObjectWriter();
-
-    void WriteWord(uint64_t W) {
-      if (is64Bit())
-        Write64(W);
-      else
-        Write32(W);
-    }
-
-    void StringLE16(char *buf, uint16_t Value) {
-      buf[0] = char(Value >> 0);
-      buf[1] = char(Value >> 8);
-    }
-
-    void StringLE32(char *buf, uint32_t Value) {
-      StringLE16(buf, uint16_t(Value >> 0));
-      StringLE16(buf + 2, uint16_t(Value >> 16));
-    }
-
-    void StringLE64(char *buf, uint64_t Value) {
-      StringLE32(buf, uint32_t(Value >> 0));
-      StringLE32(buf + 4, uint32_t(Value >> 32));
-    }
-
-    void StringBE16(char *buf ,uint16_t Value) {
-      buf[0] = char(Value >> 8);
-      buf[1] = char(Value >> 0);
-    }
-
-    void StringBE32(char *buf, uint32_t Value) {
-      StringBE16(buf, uint16_t(Value >> 16));
-      StringBE16(buf + 2, uint16_t(Value >> 0));
-    }
-
-    void StringBE64(char *buf, uint64_t Value) {
-      StringBE32(buf, uint32_t(Value >> 32));
-      StringBE32(buf + 4, uint32_t(Value >> 0));
-    }
-
-    void String8(MCDataFragment &F, uint8_t Value) {
-      char buf[1];
-      buf[0] = Value;
-      F.getContents() += StringRef(buf, 1);
-    }
-
-    void String16(MCDataFragment &F, uint16_t Value) {
-      char buf[2];
-      if (isLittleEndian())
-        StringLE16(buf, Value);
-      else
-        StringBE16(buf, Value);
-      F.getContents() += StringRef(buf, 2);
-    }
-
-    void String32(MCDataFragment &F, uint32_t Value) {
-      char buf[4];
-      if (isLittleEndian())
-        StringLE32(buf, Value);
-      else
-        StringBE32(buf, Value);
-      F.getContents() += StringRef(buf, 4);
-    }
-
-    void String64(MCDataFragment &F, uint64_t Value) {
-      char buf[8];
-      if (isLittleEndian())
-        StringLE64(buf, Value);
-      else
-        StringBE64(buf, Value);
-      F.getContents() += StringRef(buf, 8);
-    }
-
-    virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
-
-    /// Default e_flags = 0
-    virtual void WriteEFlags() { Write32(0); }
-
-    virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
-                          uint64_t name, uint8_t info,
-                          uint64_t value, uint64_t size,
-                          uint8_t other, uint32_t shndx,
-                          bool Reserved);
-
-    virtual void WriteSymbol(MCDataFragment *SymtabF,  MCDataFragment *ShndxF,
-                     ELFSymbolData &MSD,
-                     const MCAsmLayout &Layout);
-
-    typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
-    virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
-                          const MCAssembler &Asm,
-                          const MCAsmLayout &Layout,
-                          const SectionIndexMapTy &SectionIndexMap);
-
-    virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                                  const MCFragment *Fragment, const MCFixup &Fixup,
-                                  MCValue Target, uint64_t &FixedValue);
-
-    virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
-                                         const MCSymbol *S);
-
-    // Map from a group section to the signature symbol
-    typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
-    // Map from a signature symbol to the group section
-    typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
-    // Map from a section to the section with the relocations
-    typedef DenseMap<const MCSectionELF*, const MCSectionELF*> RelMapTy;
-    // Map from a section to its offset
-    typedef DenseMap<const MCSectionELF*, uint64_t> SectionOffsetMapTy;
-
-    /// ComputeSymbolTable - Compute the symbol table data
-    ///
-    /// \param StringTable [out] - The string table data.
-    /// \param StringIndexMap [out] - Map from symbol names to offsets in the
-    /// string table.
-    virtual void ComputeSymbolTable(MCAssembler &Asm,
-                            const SectionIndexMapTy &SectionIndexMap,
-                                    RevGroupMapTy RevGroupMap,
-                                    unsigned NumRegularSections);
-
-    virtual void ComputeIndexMap(MCAssembler &Asm,
-                                 SectionIndexMapTy &SectionIndexMap,
-                                 const RelMapTy &RelMap);
-
-    void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
-                                  RelMapTy &RelMap);
-
-    void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
-                          const RelMapTy &RelMap);
-
-    virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
-                                        SectionIndexMapTy &SectionIndexMap,
-                                        const RelMapTy &RelMap);
-
-    // Create the sections that show up in the symbol table. Currently
-    // those are the .note.GNU-stack section and the group sections.
-    virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
-                                       GroupMapTy &GroupMap,
-                                       RevGroupMapTy &RevGroupMap,
-                                       SectionIndexMapTy &SectionIndexMap,
-                                       const RelMapTy &RelMap);
-
-    virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
-                                          const MCAsmLayout &Layout);
-
-    void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap,
-                            const MCAsmLayout &Layout,
-                            const SectionIndexMapTy &SectionIndexMap,
-                            const SectionOffsetMapTy &SectionOffsetMap);
-
-    void ComputeSectionOrder(MCAssembler &Asm,
-                             std::vector<const MCSectionELF*> &Sections);
-
-    virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
-                          uint64_t Address, uint64_t Offset,
-                          uint64_t Size, uint32_t Link, uint32_t Info,
-                          uint64_t Alignment, uint64_t EntrySize);
-
-    virtual void WriteRelocationsFragment(const MCAssembler &Asm,
-                                          MCDataFragment *F,
-                                          const MCSectionData *SD);
-
-    virtual bool
-    IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                           const MCSymbolData &DataA,
-                                           const MCFragment &FB,
-                                           bool InSet,
-                                           bool IsPCRel) const;
-
-    virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
-    virtual void WriteSection(MCAssembler &Asm,
-                      const SectionIndexMapTy &SectionIndexMap,
-                      uint32_t GroupSymbolIndex,
-                      uint64_t Offset, uint64_t Size, uint64_t Alignment,
-                      const MCSectionELF &Section);
-
-  protected:
-    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                                  bool IsPCRel, bool IsRelocWithSymbol,
-                                  int64_t Addend) = 0;
-    virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) { }
-  };
-
-  //===- X86ELFObjectWriter -------------------------------------------===//
-
-  class X86ELFObjectWriter : public ELFObjectWriter {
-  public:
-    X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                       raw_ostream &_OS,
-                       bool IsLittleEndian);
-
-    virtual ~X86ELFObjectWriter();
-  protected:
-    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                                  bool IsPCRel, bool IsRelocWithSymbol,
-                                  int64_t Addend);
-  };
-
-
-  //===- ARMELFObjectWriter -------------------------------------------===//
-
-  class ARMELFObjectWriter : public ELFObjectWriter {
-  public:
-    // FIXME: MCAssembler can't yet return the Subtarget,
-    enum { DefaultEABIVersion = 0x05000000U };
-
-    ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                       raw_ostream &_OS,
-                       bool IsLittleEndian);
-
-    virtual ~ARMELFObjectWriter();
-
-    virtual void WriteEFlags();
-  protected:
-    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
-                                           const MCValue &Target,
-                                           const MCFragment &F,
-                                           const MCFixup &Fixup,
-                                           bool IsPCRel) const;
-
-    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                                  bool IsPCRel, bool IsRelocWithSymbol,
-                                  int64_t Addend);
-  private:
-    unsigned GetRelocTypeInner(const MCValue &Target,
-                               const MCFixup &Fixup, bool IsPCRel) const;
-    
-  };
-
-  //===- PPCELFObjectWriter -------------------------------------------===//
-
-  class PPCELFObjectWriter : public ELFObjectWriter {
-  public:
-    PPCELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                          raw_ostream &_OS,
-                          bool IsLittleEndian);
-
-    virtual ~PPCELFObjectWriter();
-  protected:
-    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                                  bool IsPCRel, bool IsRelocWithSymbol,
-                                  int64_t Addend);
-    virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
-  };
-
-  //===- MBlazeELFObjectWriter -------------------------------------------===//
-
-  class MBlazeELFObjectWriter : public ELFObjectWriter {
-  public:
-    MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                          raw_ostream &_OS,
-                          bool IsLittleEndian);
-
-    virtual ~MBlazeELFObjectWriter();
-  protected:
-    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                                  bool IsPCRel, bool IsRelocWithSymbol,
-                                  int64_t Addend);
-  };
-
-  //===- MipsELFObjectWriter -------------------------------------------===//
-
-  class MipsELFObjectWriter : public ELFObjectWriter {
-  public:
-    MipsELFObjectWriter(MCELFObjectTargetWriter *MOTW,
-                        raw_ostream &_OS,
-                        bool IsLittleEndian);
-
-    virtual ~MipsELFObjectWriter();
-  protected:
-    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
-                                  bool IsPCRel, bool IsRelocWithSymbol,
-                                  int64_t Addend);
-  };
-}
-
-#endif
diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt
new file mode 100644
index 000000000000..f35dbe4d5d35
--- /dev/null
+++ b/lib/MC/LLVMBuild.txt
@@ -0,0 +1,25 @@
+;===- ./lib/MC/LLVMBuild.txt -----------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = MCDisassembler MCParser
+
+[component_0]
+type = Library
+name = MC
+parent = Libraries
+required_libraries = Object Support
diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp
index 2c150f456cf6..0b2e4ae7ed07 100644
--- a/lib/MC/MCAsmBackend.cpp
+++ b/lib/MC/MCAsmBackend.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCFixupKindInfo.h"
 using namespace llvm;
 
 MCAsmBackend::MCAsmBackend()
@@ -21,14 +22,22 @@ MCAsmBackend::~MCAsmBackend() {
 const MCFixupKindInfo &
 MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
   static const MCFixupKindInfo Builtins[] = {
-    { "FK_Data_1", 0, 8, 0 },
-    { "FK_Data_2", 0, 16, 0 },
-    { "FK_Data_4", 0, 32, 0 },
-    { "FK_Data_8", 0, 64, 0 },
-    { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_Data_1",  0,  8, 0 },
+    { "FK_Data_2",  0, 16, 0 },
+    { "FK_Data_4",  0, 32, 0 },
+    { "FK_Data_8",  0, 64, 0 },
+    { "FK_PCRel_1", 0,  8, MCFixupKindInfo::FKF_IsPCRel },
     { "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
     { "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-    { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel }
+    { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_GPRel_1", 0,  8, 0 },
+    { "FK_GPRel_2", 0, 16, 0 },
+    { "FK_GPRel_4", 0, 32, 0 },
+    { "FK_GPRel_8", 0, 64, 0 },
+    { "FK_SecRel_1", 0,  8, 0 },
+    { "FK_SecRel_2", 0, 16, 0 },
+    { "FK_SecRel_4", 0, 32, 0 },
+    { "FK_SecRel_8", 0, 64, 0 }
   };
   
   assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) &&
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 95861bc61c27..8286c1dfeae1 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -29,7 +29,6 @@ MCAsmInfo::MCAsmInfo() {
   HasSubsectionsViaSymbols = false;
   HasMachoZeroFillDirective = false;
   HasMachoTBSSDirective = false;
-  StructorOutputOrder = Structors::ReversePriorityOrder;
   HasStaticCtorDtorReferenceInStaticMode = false;
   LinkerRequiresNonEmptyDwarfLines = false;
   MaxInstLength = 4;
@@ -50,6 +49,7 @@ MCAsmInfo::MCAsmInfo() {
   AllowQuotesInName = false;
   AllowNameToStartWithDigit = false;
   AllowPeriodsInName = true;
+  AllowUTF8 = true;
   ZeroDirective = "\t.zero\t";
   AsciiDirective = "\t.ascii\t";
   AscizDirective = "\t.asciz\t";
@@ -68,6 +68,7 @@ MCAsmInfo::MCAsmInfo() {
   AlignDirective = "\t.align\t";
   AlignmentIsInBytes = true;
   TextAlignFillValue = 0;
+  GPRel64Directive = 0;
   GPRel32Directive = 0;
   GlobalDirective = "\t.globl\t";
   HasSetDirective = true;
@@ -91,6 +92,7 @@ MCAsmInfo::MCAsmInfo() {
   DwarfRequiresRelocationForSectionOffset = true;
   DwarfSectionOffsetDirective = 0;
   DwarfUsesLabelOffsetForRanges = true;
+  DwarfUsesRelocationsForStringPool = true;
   DwarfRegNumForCFI = false;
   HasMicrosoftFastStdCallMangling = false;
 
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 434d9103a71a..881d99217bda 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -13,9 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCAsmInfoCOFF.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
+void MCAsmInfoCOFF::anchor() { }
+
 MCAsmInfoCOFF::MCAsmInfoCOFF() {
   GlobalPrefix = "_";
   COMMDirectiveAlignmentIsInBytes = false;
@@ -38,3 +39,15 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
 
   SupportsDataRegions = false;
 }
+
+void MCAsmInfoMicrosoft::anchor() { }
+
+MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() {
+  AllowQuotesInName = true;
+}
+
+void MCAsmInfoGNUCOFF::anchor() { }
+
+MCAsmInfoGNUCOFF::MCAsmInfoGNUCOFF() {
+
+}
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index b20e338f7904..c1e26350dc8e 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -18,6 +18,8 @@
 #include "llvm/MC/MCStreamer.h"
 using namespace llvm;
 
+void MCAsmInfoDarwin::anchor() { } 
+
 MCAsmInfoDarwin::MCAsmInfoDarwin() {
   // Common settings for all Darwin targets.
   // Syntax:
@@ -39,7 +41,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
   ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
   HasMachoZeroFillDirective = true;  // Uses .zerofill
   HasMachoTBSSDirective = true; // Uses .tbss
-  StructorOutputOrder = Structors::PriorityOrder;
   HasStaticCtorDtorReferenceInStaticMode = true;
 
   CodeBegin = "L$start$code$";
@@ -57,8 +58,9 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
 
   HiddenVisibilityAttr = MCSA_PrivateExtern;
   HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+
   // Doesn't support protected visibility.
-  ProtectedVisibilityAttr = MCSA_Global;
+  ProtectedVisibilityAttr = MCSA_Invalid;
   
   HasDotTypeDotSizeDirective = false;
   HasNoDeadStrip = true;
@@ -66,4 +68,5 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
 
   DwarfRequiresRelocationForSectionOffset = false;
   DwarfUsesLabelOffsetForRanges = false;
+  DwarfUsesRelocationsForStringPool = false;
 }
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 3fcbb05907bc..11f0f7296337 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/PathV2.h"
 #include <cctype>
 using namespace llvm;
 
@@ -50,6 +51,7 @@ private:
   unsigned ShowInst : 1;
   unsigned UseLoc : 1;
   unsigned UseCFI : 1;
+  unsigned UseDwarfDirectory : 1;
 
   enum EHSymbolFlags { EHGlobal         = 1,
                        EHWeakDefinition = 1 << 1,
@@ -59,17 +61,21 @@ private:
   bool needsSet(const MCExpr *Value);
 
   void EmitRegisterName(int64_t Register);
+  virtual void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame);
+  virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame);
 
 public:
   MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
                 bool isVerboseAsm, bool useLoc, bool useCFI,
+                bool useDwarfDirectory,
                 MCInstPrinter *printer, MCCodeEmitter *emitter,
                 MCAsmBackend *asmbackend,
                 bool showInst)
     : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
       InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
       CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
-      ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI) {
+      ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI),
+      UseDwarfDirectory(useDwarfDirectory) {
     if (InstPrinter && IsVerboseAsm)
       InstPrinter->setCommentStream(CommentStream);
   }
@@ -150,6 +156,7 @@ public:
   virtual void EmitCOFFSymbolStorageClass(int StorageClass);
   virtual void EmitCOFFSymbolType(int Type);
   virtual void EndCOFFSymbolDef();
+  virtual void EmitCOFFSecRel32(MCSymbol const *Symbol);
   virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
   virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                 unsigned ByteAlignment);
@@ -179,6 +186,8 @@ public:
 
   virtual void EmitSLEB128Value(const MCExpr *Value);
 
+  virtual void EmitGPRel64Value(const MCExpr *Value);
+
   virtual void EmitGPRel32Value(const MCExpr *Value);
 
 
@@ -192,19 +201,18 @@ public:
   virtual void EmitCodeAlignment(unsigned ByteAlignment,
                                  unsigned MaxBytesToEmit = 0);
 
-  virtual void EmitValueToOffset(const MCExpr *Offset,
+  virtual bool EmitValueToOffset(const MCExpr *Offset,
                                  unsigned char Value = 0);
 
   virtual void EmitFileDirective(StringRef Filename);
-  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+                                      StringRef Filename);
   virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                                      unsigned Column, unsigned Flags,
                                      unsigned Isa, unsigned Discriminator,
                                      StringRef FileName);
 
   virtual void EmitCFISections(bool EH, bool Debug);
-  virtual void EmitCFIStartProc();
-  virtual void EmitCFIEndProc();
   virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset);
   virtual void EmitCFIDefCfaOffset(int64_t Offset);
   virtual void EmitCFIDefCfaRegister(int64_t Register);
@@ -216,6 +224,7 @@ public:
   virtual void EmitCFISameValue(int64_t Register);
   virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
   virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
+  virtual void EmitCFISignalFrame();
 
   virtual void EmitWin64EHStartProc(const MCSymbol *Symbol);
   virtual void EmitWin64EHEndProc();
@@ -249,7 +258,7 @@ public:
   /// indicated by the hasRawTextSupport() predicate.
   virtual void EmitRawText(StringRef String);
 
-  virtual void Finish();
+  virtual void FinishImpl();
 
   /// @}
 };
@@ -334,7 +343,6 @@ void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
 
 void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   switch (Flag) {
-  default: assert(0 && "Invalid flag!");
   case MCAF_SyntaxUnified:         OS << "\t.syntax unified"; break;
   case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break;
   case MCAF_Code16:                OS << '\t'<< MAI.getCode16Directive(); break;
@@ -386,7 +394,7 @@ void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
 void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                         MCSymbolAttr Attribute) {
   switch (Attribute) {
-  case MCSA_Invalid: assert(0 && "Invalid symbol attribute");
+  case MCSA_Invalid: llvm_unreachable("Invalid symbol attribute");
   case MCSA_ELF_TypeFunction:    /// .type _foo, STT_FUNC  # aka @function
   case MCSA_ELF_TypeIndFunction: /// .type _foo, STT_GNU_IFUNC
   case MCSA_ELF_TypeObject:      /// .type _foo, STT_OBJECT  # aka @object
@@ -398,7 +406,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     OS << "\t.type\t" << *Symbol << ','
        << ((MAI.getCommentString()[0] != '@') ? '@' : '%');
     switch (Attribute) {
-    default: assert(0 && "Unknown ELF .type");
+    default: llvm_unreachable("Unknown ELF .type");
     case MCSA_ELF_TypeFunction:    OS << "function"; break;
     case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break;
     case MCSA_ELF_TypeObject:      OS << "object"; break;
@@ -465,6 +473,11 @@ void MCAsmStreamer::EndCOFFSymbolDef() {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
+  OS << "\t.secrel32\t" << *Symbol << '\n';
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
   assert(MAI.hasDotTypeDotSizeDirective());
   OS << "\t.size\t" << *Symbol << ", " << *Value << '\n';
@@ -652,6 +665,12 @@ void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitGPRel64Value(const MCExpr *Value) {
+  assert(MAI.getGPRel64Directive() != 0);
+  OS << MAI.getGPRel64Directive() << *Value;
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
   assert(MAI.getGPRel32Directive() != 0);
   OS << MAI.getGPRel32Directive() << *Value;
@@ -733,11 +752,12 @@ void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
                        1, MaxBytesToEmit);
 }
 
-void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
                                       unsigned char Value) {
   // FIXME: Verify that Offset is associated with the current section.
   OS << ".org " << *Offset << ", " << (unsigned) Value;
   EmitEOL();
+  return false;
 }
 
 
@@ -748,13 +768,27 @@ void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
   EmitEOL();
 }
 
-bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
+bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+                                           StringRef Filename) {
+  if (!UseDwarfDirectory && !Directory.empty()) {
+    if (sys::path::is_absolute(Filename))
+      return EmitDwarfFileDirective(FileNo, "", Filename);
+
+    SmallString<128> FullPathName = Directory;
+    sys::path::append(FullPathName, Filename);
+    return EmitDwarfFileDirective(FileNo, "", FullPathName);
+  }
+
   if (UseLoc) {
     OS << "\t.file\t" << FileNo << ' ';
+    if (!Directory.empty()) {
+      PrintQuotedString(Directory, OS);
+      OS << ' ';
+    }
     PrintQuotedString(Filename, OS);
     EmitEOL();
   }
-  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename);
 }
 
 void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -816,21 +850,25 @@ void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) {
   EmitEOL();
 }
 
-void MCAsmStreamer::EmitCFIStartProc() {
-  MCStreamer::EmitCFIStartProc();
-
-  if (!UseCFI)
+void MCAsmStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+  if (!UseCFI) {
+    RecordProcStart(Frame);
     return;
+  }
 
   OS << "\t.cfi_startproc";
   EmitEOL();
 }
 
-void MCAsmStreamer::EmitCFIEndProc() {
-  MCStreamer::EmitCFIEndProc();
-
-  if (!UseCFI)
+void MCAsmStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+  if (!UseCFI) {
+    RecordProcEnd(Frame);
     return;
+  }
+
+  // Put a dummy non-null value in Frame.End to mark that this frame has been
+  // closed.
+  Frame.End = (MCSymbol *) 1;
 
   OS << "\t.cfi_endproc";
   EmitEOL();
@@ -965,6 +1003,16 @@ void MCAsmStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitCFISignalFrame() {
+  MCStreamer::EmitCFISignalFrame();
+
+  if (!UseCFI)
+    return;
+
+  OS << "\t.cif_signal_frame";
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) {
   MCStreamer::EmitWin64EHStartProc(Symbol);
 
@@ -1260,10 +1308,16 @@ void MCAsmStreamer::EmitRawText(StringRef String) {
   EmitEOL();
 }
 
-void MCAsmStreamer::Finish() {
+void MCAsmStreamer::FinishImpl() {
+  // FIXME: This header is duplicated with MCObjectStreamer
   // Dump out the dwarf file & directory tables and line tables.
+  const MCSymbol *LineSectionSymbol = NULL;
   if (getContext().hasDwarfFiles() && !UseLoc)
-    MCDwarfFileTable::Emit(this);
+    LineSectionSymbol = MCDwarfFileTable::Emit(this);
+
+  // If we are generating dwarf for assembly source files dump out the sections.
+  if (getContext().getGenDwarfForAssembly())
+    MCGenDwarfInfo::Emit(this, LineSectionSymbol);
 
   if (!UseCFI)
     EmitFrames(false);
@@ -1271,9 +1325,9 @@ void MCAsmStreamer::Finish() {
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
                                     formatted_raw_ostream &OS,
                                     bool isVerboseAsm, bool useLoc,
-                                    bool useCFI, MCInstPrinter *IP,
-                                    MCCodeEmitter *CE, MCAsmBackend *MAB,
-                                    bool ShowInst) {
+                                    bool useCFI, bool useDwarfDirectory,
+                                    MCInstPrinter *IP, MCCodeEmitter *CE,
+                                    MCAsmBackend *MAB, bool ShowInst) {
   return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
-                           IP, CE, MAB, ShowInst);
+                           useDwarfDirectory, IP, CE, MAB, ShowInst);
 }
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 06c8aec91917..66ba9b81f3aa 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -13,13 +13,13 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCAsmBackend.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
@@ -33,7 +33,7 @@ using namespace llvm;
 namespace {
 namespace stats {
 STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
-STATISTIC(EvaluateFixup, "Number of evaluated fixups");
+STATISTIC(evaluateFixup, "Number of evaluated fixups");
 STATISTIC(FragmentLayouts, "Number of fragment layouts");
 STATISTIC(ObjectBytes, "Number of emitted object file bytes");
 STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
@@ -118,7 +118,7 @@ uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
     if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
       report_fatal_error("unable to evaluate offset to undefined symbol '" +
                          Target.getSymB()->getSymbol().getName() + "'");
-      
+
     uint64_t Offset = Target.getConstant();
     if (Target.getSymA())
       Offset += getSymbolOffset(&Assembler.getSymbolData(
@@ -136,7 +136,7 @@ uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
 uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
   // The size is the last fragment's end offset.
   const MCFragment &F = SD->getFragmentList().back();
-  return getFragmentOffset(&F) + getAssembler().ComputeFragmentSize(*this, F);
+  return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F);
 }
 
 uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
@@ -237,13 +237,13 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
   return SD->getFragment()->getAtom();
 }
 
-bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
+bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
                                 const MCFixup &Fixup, const MCFragment *DF,
                                 MCValue &Target, uint64_t &Value) const {
-  ++stats::EvaluateFixup;
+  ++stats::evaluateFixup;
 
   if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout))
-    report_fatal_error("expected relocatable expression");
+    getContext().FatalError(Fixup.getLoc(), "expected relocatable expression");
 
   bool IsPCRel = Backend.getFixupKindInfo(
     Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
@@ -273,13 +273,10 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
 
   Value = Target.getConstant();
 
-  bool IsThumb = false;
   if (const MCSymbolRefExpr *A = Target.getSymA()) {
     const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
     if (Sym.isDefined())
       Value += Layout.getSymbolOffset(&getSymbolData(Sym));
-    if (isThumbFunc(&Sym))
-      IsThumb = true;
   }
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
     const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
@@ -295,24 +292,22 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
 
   if (IsPCRel) {
     uint32_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset();
-    
+
     // A number of ARM fixups in Thumb mode require that the effective PC
     // address be determined as the 32-bit aligned version of the actual offset.
     if (ShouldAlignPC) Offset &= ~0x3;
     Value -= Offset;
   }
 
-  // ARM fixups based from a thumb function address need to have the low
-  // bit set. The actual value is always at least 16-bit aligned, so the
-  // low bit is normally clear and available for use as an ISA flag for
-  // interworking.
-  if (IsThumb)
-    Value |= 1;
+  // Let the backend adjust the fixup value if necessary, including whether
+  // we need a relocation.
+  Backend.processFixupValue(*this, Layout, Fixup, DF, Target, Value,
+                            IsResolved);
 
   return IsResolved;
 }
 
-uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
+uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
                                           const MCFragment &F) const {
   switch (F.getKind()) {
   case MCFragment::FT_Data:
@@ -355,8 +350,7 @@ uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
     return cast<MCDwarfCallFrameFragment>(F).getContents().size();
   }
 
-  assert(0 && "invalid fragment kind");
-  return 0;
+  llvm_unreachable("invalid fragment kind");
 }
 
 void MCAsmLayout::LayoutFragment(MCFragment *F) {
@@ -374,7 +368,7 @@ void MCAsmLayout::LayoutFragment(MCFragment *F) {
   // Compute fragment offset and size.
   uint64_t Offset = 0;
   if (Prev)
-    Offset += Prev->Offset + getAssembler().ComputeFragmentSize(*this, *Prev);
+    Offset += Prev->Offset + getAssembler().computeFragmentSize(*this, *Prev);
 
   F->Offset = Offset;
   LastValidFragment[F->getParent()] = F;
@@ -390,7 +384,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
   ++stats::EmittedFragments;
 
   // FIXME: Embed in fragments instead?
-  uint64_t FragmentSize = Asm.ComputeFragmentSize(Layout, F);
+  uint64_t FragmentSize = Asm.computeFragmentSize(Layout, F);
   switch (F.getKind()) {
   case MCFragment::FT_Align: {
     MCAlignFragment &AF = cast<MCAlignFragment>(F);
@@ -412,7 +406,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     // bytes left to fill use the the Value and ValueSize to fill the rest.
     // If we are aligning with nops, ask that target to emit the right data.
     if (AF.hasEmitNops()) {
-      if (!Asm.getBackend().WriteNopData(Count, OW))
+      if (!Asm.getBackend().writeNopData(Count, OW))
         report_fatal_error("unable to write nop sequence of " +
                           Twine(Count) + " bytes");
       break;
@@ -421,8 +415,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     // Otherwise, write out in multiples of the value size.
     for (uint64_t i = 0; i != Count; ++i) {
       switch (AF.getValueSize()) {
-      default:
-        assert(0 && "Invalid size!");
+      default: llvm_unreachable("Invalid size!");
       case 1: OW->Write8 (uint8_t (AF.getValue())); break;
       case 2: OW->Write16(uint16_t(AF.getValue())); break;
       case 4: OW->Write32(uint32_t(AF.getValue())); break;
@@ -446,8 +439,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
 
     for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) {
       switch (FF.getValueSize()) {
-      default:
-        assert(0 && "Invalid size!");
+      default: llvm_unreachable("Invalid size!");
       case 1: OW->Write8 (uint8_t (FF.getValue())); break;
       case 2: OW->Write16(uint16_t(FF.getValue())); break;
       case 4: OW->Write32(uint32_t(FF.getValue())); break;
@@ -493,7 +485,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
   assert(OW->getStream().tell() - Start == FragmentSize);
 }
 
-void MCAssembler::WriteSectionData(const MCSectionData *SD,
+void MCAssembler::writeSectionData(const MCSectionData *SD,
                                    const MCAsmLayout &Layout) const {
   // Ignore virtual sections.
   if (SD->getSection().isVirtualSection()) {
@@ -503,8 +495,7 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
     for (MCSectionData::const_iterator it = SD->begin(),
            ie = SD->end(); it != ie; ++it) {
       switch (it->getKind()) {
-      default:
-        assert(0 && "Invalid fragment in virtual section!");
+      default: llvm_unreachable("Invalid fragment in virtual section!");
       case MCFragment::FT_Data: {
         // Check that we aren't trying to write a non-zero contents (or fixups)
         // into a virtual section. This is to support clients which use standard
@@ -546,13 +537,13 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
 }
 
 
-uint64_t MCAssembler::HandleFixup(const MCAsmLayout &Layout,
+uint64_t MCAssembler::handleFixup(const MCAsmLayout &Layout,
                                   MCFragment &F,
                                   const MCFixup &Fixup) {
    // Evaluate the fixup.
    MCValue Target;
    uint64_t FixedValue;
-   if (!EvaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+   if (!evaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
      // The fixup was unresolved, we need a relocation. Inform the object
      // writer of the relocation, and give it an opportunity to adjust the
      // fixup value if need be.
@@ -592,7 +583,7 @@ void MCAssembler::Finish() {
   }
 
   // Layout until everything fits.
-  while (LayoutOnce(Layout))
+  while (layoutOnce(Layout))
     continue;
 
   DEBUG_WITH_TYPE("mc-dump", {
@@ -600,7 +591,7 @@ void MCAssembler::Finish() {
       dump(); });
 
   // Finalize the layout, including fragment lowering.
-  FinishLayout(Layout);
+  finishLayout(Layout);
 
   DEBUG_WITH_TYPE("mc-dump", {
       llvm::errs() << "assembler backend - final-layout\n--\n";
@@ -621,8 +612,8 @@ void MCAssembler::Finish() {
         for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
                ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
           MCFixup &Fixup = *it3;
-          uint64_t FixedValue = HandleFixup(Layout, *DF, Fixup);
-          getBackend().ApplyFixup(Fixup, DF->getContents().data(),
+          uint64_t FixedValue = handleFixup(Layout, *DF, Fixup);
+          getBackend().applyFixup(Fixup, DF->getContents().data(),
                                   DF->getContents().size(), FixedValue);
         }
       }
@@ -631,8 +622,8 @@ void MCAssembler::Finish() {
         for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(),
                ie3 = IF->fixup_end(); it3 != ie3; ++it3) {
           MCFixup &Fixup = *it3;
-          uint64_t FixedValue = HandleFixup(Layout, *IF, Fixup);
-          getBackend().ApplyFixup(Fixup, IF->getCode().data(),
+          uint64_t FixedValue = handleFixup(Layout, *IF, Fixup);
+          getBackend().applyFixup(Fixup, IF->getCode().data(),
                                   IF->getCode().size(), FixedValue);
         }
       }
@@ -645,8 +636,8 @@ void MCAssembler::Finish() {
   stats::ObjectBytes += OS.tell() - StartOffset;
 }
 
-bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
-                                       const MCFragment *DF,
+bool MCAssembler::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                       const MCInstFragment *DF,
                                        const MCAsmLayout &Layout) const {
   if (getRelaxAll())
     return true;
@@ -654,34 +645,31 @@ bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
   // If we cannot resolve the fixup value, it requires relaxation.
   MCValue Target;
   uint64_t Value;
-  if (!EvaluateFixup(Layout, Fixup, DF, Target, Value))
+  if (!evaluateFixup(Layout, Fixup, DF, Target, Value))
     return true;
 
-  // Otherwise, relax if the value is too big for a (signed) i8.
-  //
-  // FIXME: This is target dependent!
-  return int64_t(Value) != int64_t(int8_t(Value));
+  return getBackend().fixupNeedsRelaxation(Fixup, Value, DF, Layout);
 }
 
-bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
+bool MCAssembler::fragmentNeedsRelaxation(const MCInstFragment *IF,
                                           const MCAsmLayout &Layout) const {
   // If this inst doesn't ever need relaxation, ignore it. This occurs when we
   // are intentionally pushing out inst fragments, or because we relaxed a
   // previous instruction to one that doesn't need relaxation.
-  if (!getBackend().MayNeedRelaxation(IF->getInst()))
+  if (!getBackend().mayNeedRelaxation(IF->getInst()))
     return false;
 
   for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
          ie = IF->fixup_end(); it != ie; ++it)
-    if (FixupNeedsRelaxation(*it, IF, Layout))
+    if (fixupNeedsRelaxation(*it, IF, Layout))
       return true;
 
   return false;
 }
 
-bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
+bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
                                    MCInstFragment &IF) {
-  if (!FragmentNeedsRelaxation(&IF, Layout))
+  if (!fragmentNeedsRelaxation(&IF, Layout))
     return false;
 
   ++stats::RelaxedInstructions;
@@ -692,7 +680,7 @@ bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
   // Relax the fragment.
 
   MCInst Relaxed;
-  getBackend().RelaxInstruction(IF.getInst(), Relaxed);
+  getBackend().relaxInstruction(IF.getInst(), Relaxed);
 
   // Encode the new instruction.
   //
@@ -715,7 +703,7 @@ bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
   return true;
 }
 
-bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
+bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
   int64_t Value = 0;
   uint64_t OldSize = LF.getContents().size();
   bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout);
@@ -732,8 +720,8 @@ bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
   return OldSize != LF.getContents().size();
 }
 
-bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
-				     MCDwarfLineAddrFragment &DF) {
+bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
+                                     MCDwarfLineAddrFragment &DF) {
   int64_t AddrDelta = 0;
   uint64_t OldSize = DF.getContents().size();
   bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
@@ -749,7 +737,7 @@ bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
   return OldSize != Data.size();
 }
 
-bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
                                               MCDwarfCallFrameFragment &DF) {
   int64_t AddrDelta = 0;
   uint64_t OldSize = DF.getContents().size();
@@ -764,7 +752,7 @@ bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
   return OldSize != Data.size();
 }
 
-bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
+bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout,
                                     MCSectionData &SD) {
   MCFragment *FirstInvalidFragment = NULL;
   // Scan for fragments that need relaxation.
@@ -776,19 +764,19 @@ bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
     default:
           break;
     case MCFragment::FT_Inst:
-      relaxedFrag = RelaxInstruction(Layout, *cast<MCInstFragment>(it2));
+      relaxedFrag = relaxInstruction(Layout, *cast<MCInstFragment>(it2));
       break;
     case MCFragment::FT_Dwarf:
-      relaxedFrag = RelaxDwarfLineAddr(Layout,
+      relaxedFrag = relaxDwarfLineAddr(Layout,
                                        *cast<MCDwarfLineAddrFragment>(it2));
       break;
     case MCFragment::FT_DwarfFrame:
       relaxedFrag =
-        RelaxDwarfCallFrameFragment(Layout,
+        relaxDwarfCallFrameFragment(Layout,
                                     *cast<MCDwarfCallFrameFragment>(it2));
       break;
     case MCFragment::FT_LEB:
-      relaxedFrag = RelaxLEB(Layout, *cast<MCLEBFragment>(it2));
+      relaxedFrag = relaxLEB(Layout, *cast<MCLEBFragment>(it2));
       break;
     }
     // Update the layout, and remember that we relaxed.
@@ -802,20 +790,20 @@ bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
   return false;
 }
 
-bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
+bool MCAssembler::layoutOnce(MCAsmLayout &Layout) {
   ++stats::RelaxationSteps;
 
   bool WasRelaxed = false;
   for (iterator it = begin(), ie = end(); it != ie; ++it) {
     MCSectionData &SD = *it;
-    while(LayoutSectionOnce(Layout, SD))
+    while(layoutSectionOnce(Layout, SD))
       WasRelaxed = true;
   }
 
   return WasRelaxed;
 }
 
-void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
+void MCAssembler::finishLayout(MCAsmLayout &Layout) {
   // The layout is done. Mark every fragment as valid.
   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
     Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
@@ -975,3 +963,13 @@ void MCAssembler::dump() {
   }
   OS << "]>\n";
 }
+
+// anchors for MC*Fragment vtables
+void MCDataFragment::anchor() { }
+void MCInstFragment::anchor() { }
+void MCAlignFragment::anchor() { }
+void MCFillFragment::anchor() { }
+void MCOrgFragment::anchor() { }
+void MCLEBFragment::anchor() { }
+void MCDwarfLineAddrFragment::anchor() { }
+void MCDwarfCallFrameFragment::anchor() { }
diff --git a/lib/MC/MCCodeGenInfo.cpp b/lib/MC/MCCodeGenInfo.cpp
index 236e7de68a8a..d9dcfd0614bc 100644
--- a/lib/MC/MCCodeGenInfo.cpp
+++ b/lib/MC/MCCodeGenInfo.cpp
@@ -15,7 +15,9 @@
 #include "llvm/MC/MCCodeGenInfo.h"
 using namespace llvm;
 
-void MCCodeGenInfo::InitMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM) {
+void MCCodeGenInfo::InitMCCodeGenInfo(Reloc::Model RM, CodeModel::Model CM,
+                                      CodeGenOpt::Level OL) {
   RelocationModel = RM;
   CMModel = CM;
+  OptLevel = OL;
 }
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 82690ee3b3e9..d3c4fb1d7ca5 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -20,6 +20,9 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/Signals.h"
 using namespace llvm;
 
 typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
@@ -28,8 +31,8 @@ typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
 
 
 MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri,
-                     const MCObjectFileInfo *mofi) :
-  MAI(mai), MRI(mri), MOFI(mofi),
+                     const MCObjectFileInfo *mofi, const SourceMgr *mgr) :
+  SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi),
   Allocator(), Symbols(Allocator), UsedNames(Allocator),
   NextUniqueID(0),
   CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0),
@@ -43,6 +46,8 @@ MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri,
   SecureLogUsed = false;
 
   DwarfLocSeen = false;
+  GenDwarfForAssembly = false;
+  GenDwarfFileNumber = 0;
 }
 
 MCContext::~MCContext() {
@@ -248,7 +253,8 @@ const MCSection *MCContext::getCOFFSection(StringRef Section,
 /// directory tables.  If the file number has already been allocated it is an
 /// error and zero is returned and the client reports the error, else the
 /// allocated file number is returned.  The file numbers may be in any order.
-unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
+unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName,
+                                 unsigned FileNumber) {
   // TODO: a FileNumber of zero says to use the next available file number.
   // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
   // to not be less than one.  This needs to be change to be not less than zero.
@@ -266,19 +272,23 @@ unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
   // Get the new MCDwarfFile slot for this FileNumber.
   MCDwarfFile *&File = MCDwarfFiles[FileNumber];
 
-  // Separate the directory part from the basename of the FileName.
-  std::pair<StringRef, StringRef> Slash = FileName.rsplit('/');
+  if (Directory.empty()) {
+    // Separate the directory part from the basename of the FileName.
+    std::pair<StringRef, StringRef> Slash = FileName.rsplit('/');
+    Directory = Slash.second;
+    if (!Directory.empty()) {
+      Directory = Slash.first;
+      FileName = Slash.second;
+    }
+  }
 
   // Find or make a entry in the MCDwarfDirs vector for this Directory.
-  StringRef Name;
-  unsigned DirIndex;
   // Capture directory name.
-  if (Slash.second.empty()) {
-    Name = Slash.first;
-    DirIndex = 0; // For FileNames with no directories a DirIndex of 0 is used.
+  unsigned DirIndex;
+  if (Directory.empty()) {
+    // For FileNames with no directories a DirIndex of 0 is used.
+    DirIndex = 0;
   } else {
-    StringRef Directory = Slash.first;
-    Name = Slash.second;
     DirIndex = 0;
     for (unsigned End = MCDwarfDirs.size(); DirIndex < End; DirIndex++) {
       if (Directory == MCDwarfDirs[DirIndex])
@@ -291,16 +301,16 @@ unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
     }
     // The DirIndex is one based, as DirIndex of 0 is used for FileNames with
     // no directories.  MCDwarfDirs[] is unlike MCDwarfFiles[] in that the
-    // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames are
-    // stored at MCDwarfFiles[FileNumber].Name .
+    // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames
+    // are stored at MCDwarfFiles[FileNumber].Name .
     DirIndex++;
   }
 
   // Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles
   // vector.
-  char *Buf = static_cast<char *>(Allocate(Name.size()));
-  memcpy(Buf, Name.data(), Name.size());
-  File = new (*this) MCDwarfFile(StringRef(Buf, Name.size()), DirIndex);
+  char *Buf = static_cast<char *>(Allocate(FileName.size()));
+  memcpy(Buf, FileName.data(), FileName.size());
+  File = new (*this) MCDwarfFile(StringRef(Buf, FileName.size()), DirIndex);
 
   // return the allocated FileNumber.
   return FileNumber;
@@ -314,3 +324,19 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber) {
 
   return MCDwarfFiles[FileNumber] != 0;
 }
+
+void MCContext::FatalError(SMLoc Loc, const Twine &Msg) {
+  // If we have a source manager and a location, use it. Otherwise just
+  // use the generic report_fatal_error().
+  if (!SrcMgr || Loc == SMLoc())
+    report_fatal_error(Msg);
+
+  // Use the source manager to print the message.
+  SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg);
+
+  // If we reached here, we are failing ungracefully. Run the interrupt handlers
+  // to make sure any special cleanups get done, in particular that we remove
+  // files registered with RemoveFileOnSignal.
+  sys::RunInterruptHandlers();
+  exit(1);
+}
diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt
index 4debb288e5ab..5e2cd8387db1 100644
--- a/lib/MC/MCDisassembler/CMakeLists.txt
+++ b/lib/MC/MCDisassembler/CMakeLists.txt
@@ -2,29 +2,7 @@ add_llvm_library(LLVMMCDisassembler
   Disassembler.cpp
   EDDisassembler.cpp
   EDInst.cpp
+  EDMain.cpp
   EDOperand.cpp
   EDToken.cpp
   )
-
-add_llvm_library_dependencies(LLVMMCDisassembler
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  LLVMTarget
-  )
-
-foreach(t ${LLVM_TARGETS_TO_BUILD})
-  set(td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t})
-  if(EXISTS ${td}/TargetInfo/CMakeLists.txt)
-    add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Info")
-  endif()
-  if(EXISTS ${td}/MCTargetDesc/CMakeLists.txt)
-    add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Desc")
-  endif()
-  if(EXISTS ${td}/AsmParser/CMakeLists.txt)
-    add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}AsmParser")
-  endif()
-  if(EXISTS ${td}/Disassembler/CMakeLists.txt)
-    add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Disassembler")
-  endif()
-endforeach(t)
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 16e66dc98e74..35f675dc6d1b 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -15,10 +15,13 @@
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 class Target;
@@ -36,6 +39,12 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
                                       int TagType, LLVMOpInfoCallback GetOpInfo,
                                       LLVMSymbolLookupCallback SymbolLookUp) {
   // Initialize targets and assembly printers/parsers.
+  // FIXME: Clients are responsible for initializing the targets. And this
+  // would be done by calling routines in "llvm-c/Target.h" which are static
+  // line functions. But the current use of LLVMCreateDisasm() is to dynamically
+  // load libLTO with dlopen() and then lookup the symbols using dlsym().
+  // And since these initialize routines are static that does not work which
+  // is why the call to them in this 'C' library API was added back.
   llvm::InitializeAllTargetInfos();
   llvm::InitializeAllTargetMCs();
   llvm::InitializeAllAsmParsers();
@@ -50,6 +59,9 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
   const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName);
   assert(MAI && "Unable to create target asm info!");
 
+  const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
+  assert(MII && "Unable to create target instruction info!");
+
   const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TripleName);
   assert(MRI && "Unable to create target register info!");
 
@@ -73,13 +85,13 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
   // Set up the instruction printer.
   int AsmPrinterVariant = MAI->getAssemblerDialect();
   MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
-                                                     *MAI, *STI);
+                                                     *MAI, *MII, *MRI, *STI);
   assert(IP && "Unable to create instruction printer!");
 
   LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType,
                                                 GetOpInfo, SymbolLookUp,
                                                 TheTarget, MAI, MRI,
-                                                Ctx, DisAsm, IP);
+                                                STI, MII, Ctx, DisAsm, IP);
   assert(DC && "Allocation failure!");
 
   return DC;
@@ -170,5 +182,5 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
     return Size;
   }
   }
-  return 0;
+  llvm_unreachable("Invalid DecodeStatus!");
 }
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index 238ff7d50025..880a31ad76b9 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -28,7 +28,9 @@ class MCContext;
 class MCAsmInfo;
 class MCDisassembler;
 class MCInstPrinter; 
+class MCInstrInfo;
 class MCRegisterInfo;
+class MCSubtargetInfo;
 class Target;
 
 //
@@ -61,6 +63,10 @@ private:
   llvm::OwningPtr<const llvm::MCAsmInfo> MAI;
   // The register information for the target architecture.
   llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
+  // The subtarget information for the target architecture.
+  llvm::OwningPtr<const llvm::MCSubtargetInfo> MSI;
+  // The instruction information for the target architecture.
+  llvm::OwningPtr<const llvm::MCInstrInfo> MII;
   // The assembly context for creating symbols and MCExprs.
   llvm::OwningPtr<const llvm::MCContext> Ctx;
   // The disassembler for the target architecture.
@@ -78,6 +84,8 @@ public:
                     LLVMSymbolLookupCallback symbolLookUp,
                     const Target *theTarget, const MCAsmInfo *mAI,
                     const MCRegisterInfo *mRI,
+                    const MCSubtargetInfo *mSI,
+                    const MCInstrInfo *mII,
                     llvm::MCContext *ctx, const MCDisassembler *disAsm,
                     MCInstPrinter *iP) : TripleName(tripleName),
                     DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo),
@@ -85,6 +93,8 @@ public:
                     CommentStream(CommentsToEmit) {
     MAI.reset(mAI);
     MRI.reset(mRI);
+    MSI.reset(mSI);
+    MII.reset(mII);
     Ctx.reset(ctx);
     DisAsm.reset(disAsm);
     IP.reset(iP);
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
index 83362a21f77b..b2672ca3ccba 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.cpp
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -22,6 +22,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
@@ -34,10 +35,8 @@
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
 using namespace llvm;
 
-bool EDDisassembler::sInitialized = false;
 EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
 
 struct TripleMap {
@@ -49,8 +48,7 @@ static struct TripleMap triplemap[] = {
   { Triple::x86,          "i386-unknown-unknown"    },
   { Triple::x86_64,       "x86_64-unknown-unknown"  },
   { Triple::arm,          "arm-unknown-unknown"     },
-  { Triple::thumb,        "thumb-unknown-unknown"   },
-  { Triple::InvalidArch,  NULL,                     }
+  { Triple::thumb,        "thumb-unknown-unknown"   }
 };
 
 /// infoFromArch - Returns the TripleMap corresponding to a given architecture,
@@ -77,90 +75,69 @@ static const char *tripleFromArch(Triple::ArchType arch) {
 static int getLLVMSyntaxVariant(Triple::ArchType arch,
                                 EDDisassembler::AssemblySyntax syntax) {
   switch (syntax) {
-  default:
-    return -1;
   // Mappings below from X86AsmPrinter.cpp
   case EDDisassembler::kEDAssemblySyntaxX86ATT:
     if (arch == Triple::x86 || arch == Triple::x86_64)
       return 0;
-    else
-      return -1;
+    break;
   case EDDisassembler::kEDAssemblySyntaxX86Intel:
     if (arch == Triple::x86 || arch == Triple::x86_64)
       return 1;
-    else
-      return -1;
+    break;
   case EDDisassembler::kEDAssemblySyntaxARMUAL:
     if (arch == Triple::arm || arch == Triple::thumb)
       return 0;
-    else
-      return -1;
+    break;
   }
-}
 
-void EDDisassembler::initialize() {
-  if (sInitialized)
-    return;
-  
-  sInitialized = true;
-  
-  InitializeAllTargetInfos();
-  InitializeAllTargetMCs();
-  InitializeAllAsmParsers();
-  InitializeAllDisassemblers();
+  return -1;
 }
 
-#undef BRINGUP_TARGET
-
 EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
                                                 AssemblySyntax syntax) {
+  const char *triple = tripleFromArch(arch);
+  return getDisassembler(StringRef(triple), syntax);
+}
+
+EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
+                                                AssemblySyntax syntax) {
   CPUKey key;
-  key.Arch = arch;
+  key.Triple = str.str();
   key.Syntax = syntax;
-  
+
   EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
-  
+
   if (i != sDisassemblers.end()) {
-    return i->second;
-  } else {
-    EDDisassembler* sdd = new EDDisassembler(key);
-    if (!sdd->valid()) {
-      delete sdd;
-      return NULL;
-    }
-    
-    sDisassemblers[key] = sdd;
-    
-    return sdd;
+    return i->second;  
   }
-  
-  return NULL;
-}
 
-EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
-                                                AssemblySyntax syntax) {
-  return getDisassembler(Triple(str).getArch(), syntax);
+  EDDisassembler *sdd = new EDDisassembler(key);
+  if (!sdd->valid()) {
+    delete sdd;
+    return NULL;
+  }
+
+  sDisassemblers[key] = sdd;
+
+  return sdd;
 }
 
 EDDisassembler::EDDisassembler(CPUKey &key) : 
   Valid(false), 
   HasSemantics(false), 
   ErrorStream(nulls()), 
-  Key(key) {
-  const char *triple = tripleFromArch(key.Arch);
-    
-  if (!triple)
-    return;
+  Key(key),
+  TgtTriple(key.Triple.c_str()) {        
   
-  LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
+  LLVMSyntaxVariant = getLLVMSyntaxVariant(TgtTriple.getArch(), key.Syntax);
   
   if (LLVMSyntaxVariant < 0)
     return;
   
-  std::string tripleString(triple);
+  std::string tripleString(key.Triple);
   std::string errorString;
   
-  Tgt = TargetRegistry::lookupTarget(tripleString, 
+  Tgt = TargetRegistry::lookupTarget(key.Triple, 
                                      errorString);
   
   if (!Tgt)
@@ -189,10 +166,16 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
     return;
     
   InstInfos = Disassembler->getEDInfo();
-  
+
+  MII.reset(Tgt->createMCInstrInfo());
+
+  if (!MII)
+    return;
+
   InstString.reset(new std::string);
   InstStream.reset(new raw_string_ostream(*InstString));
-  InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo, *STI));
+  InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo,
+                                             *MII, *MRI, *STI));
   
   if (!InstPrinter)
     return;
@@ -279,7 +262,7 @@ void EDDisassembler::initMaps(const MCRegisterInfo &registerInfo) {
     RegRMap[registerName] = registerIndex;
   }
   
-  switch (Key.Arch) {
+  switch (TgtTriple.getArch()) {
   default:
     break;
   case Triple::x86:
@@ -337,13 +320,9 @@ int EDDisassembler::printInst(std::string &str, MCInst &inst) {
   return 0;
 }
 
-static void diag_handler(const SMDiagnostic &diag,
-                         void *context)
-{
-  if (context) {
-    EDDisassembler *disassembler = static_cast<EDDisassembler*>(context);
-    diag.Print("", disassembler->ErrorStream);
-  }
+static void diag_handler(const SMDiagnostic &diag, void *context) {
+  if (context)
+    diag.print("", static_cast<EDDisassembler*>(context)->ErrorStream);
 }
 
 int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
@@ -351,7 +330,7 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
                               const std::string &str) {
   int ret = 0;
   
-  switch (Key.Arch) {
+  switch (TgtTriple.getArch()) {
   default:
     return -1;
   case Triple::x86:
@@ -376,8 +355,7 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
                                                          context, *streamer,
                                                          *AsmInfo));
 
-  StringRef triple = tripleFromArch(Key.Arch);
-  OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", ""));
+  OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(Key.Triple.c_str(), "", ""));
   OwningPtr<MCTargetAsmParser>
     TargetParser(Tgt->createMCAsmParser(*STI, *genericParser));
   
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
index 38c22038c510..6f71908d2bcf 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.h
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -25,6 +25,7 @@
 
 #include <map>
 #include <set>
+#include <string>
 #include <vector>
 
 namespace llvm {
@@ -35,8 +36,9 @@ class MCContext;
 class MCAsmInfo;
 class MCAsmLexer;
 class MCDisassembler;
-class MCInstPrinter;
 class MCInst;
+class MCInstPrinter;
+class MCInstrInfo;
 class MCParsedAsmOperand;
 class MCRegisterInfo;
 class MCStreamer;
@@ -74,28 +76,26 @@ struct EDDisassembler {
   ///   pair
   struct CPUKey {
     /// The architecture type
-    llvm::Triple::ArchType Arch;
+    std::string Triple;
     
     /// The assembly syntax
     AssemblySyntax Syntax;
     
     /// operator== - Equality operator
     bool operator==(const CPUKey &key) const {
-      return (Arch == key.Arch &&
+      return (Triple == key.Triple &&
               Syntax == key.Syntax);
     }
     
     /// operator< - Less-than operator
     bool operator<(const CPUKey &key) const {
-      return ((Arch < key.Arch) ||
-              ((Arch == key.Arch) && Syntax < (key.Syntax)));
+      return ((Triple < key.Triple) ||
+              ((Triple == key.Triple) && Syntax < (key.Syntax)));
     }
   };
   
   typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
   
-  /// True if the disassembler registry has been initialized; false if not
-  static bool sInitialized;
   /// A map from disassembler specifications to disassemblers.  Populated
   ///   lazily.
   static DisassemblerMap_t sDisassemblers;
@@ -116,9 +116,6 @@ struct EDDisassembler {
   static EDDisassembler *getDisassembler(llvm::StringRef str,
                                          AssemblySyntax syntax);
   
-  /// initialize - Initializes the disassembler registry and the LLVM backend
-  static void initialize();
-  
   ////////////////////////
   // Per-object members //
   ////////////////////////
@@ -131,14 +128,18 @@ struct EDDisassembler {
   /// The stream to write errors to
   llvm::raw_ostream &ErrorStream;
 
-  /// The architecture/syntax pair for the current architecture
+  /// The triple/syntax pair for the current architecture
   CPUKey Key;
+  /// The Triple fur the current architecture
+  Triple TgtTriple;
   /// The LLVM target corresponding to the disassembler
   const llvm::Target *Tgt;
   /// The assembly information for the target architecture
   llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
   /// The subtarget information for the target architecture
   llvm::OwningPtr<const llvm::MCSubtargetInfo> STI;
+  // The instruction information for the target architecture.
+  llvm::OwningPtr<const llvm::MCInstrInfo> MII;
   // The register information for the target architecture.
   llvm::OwningPtr<const llvm::MCRegisterInfo> MRI;
   /// The disassembler for the target architecture
diff --git a/lib/MC/MCDisassembler/EDMain.cpp b/lib/MC/MCDisassembler/EDMain.cpp
new file mode 100644
index 000000000000..c658717b0249
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDMain.cpp
@@ -0,0 +1,280 @@
+//===-- EDMain.cpp - LLVM Enhanced Disassembly C API ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the enhanced disassembler's public C API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+#include "llvm-c/EnhancedDisassembly.h"
+using namespace llvm;
+
+int EDGetDisassembler(EDDisassemblerRef *disassembler,
+                      const char *triple,
+                      EDAssemblySyntax_t syntax) {
+  EDDisassembler::AssemblySyntax Syntax;
+  switch (syntax) {
+  default: llvm_unreachable("Unknown assembly syntax!");
+  case kEDAssemblySyntaxX86Intel:
+    Syntax = EDDisassembler::kEDAssemblySyntaxX86Intel;
+    break;
+  case kEDAssemblySyntaxX86ATT:
+    Syntax = EDDisassembler::kEDAssemblySyntaxX86ATT;
+    break;
+  case kEDAssemblySyntaxARMUAL:
+    Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL;
+    break;
+  }
+  
+  EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax);
+  
+  if (!ret)
+    return -1;
+  *disassembler = ret;
+  return 0;
+}
+
+int EDGetRegisterName(const char** regName,
+                      EDDisassemblerRef disassembler,
+                      unsigned regID) {
+  const char *name = ((EDDisassembler*)disassembler)->nameWithRegisterID(regID);
+  if (!name)
+    return -1;
+  *regName = name;
+  return 0;
+}
+
+int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
+                             unsigned regID) {
+  return ((EDDisassembler*)disassembler)->registerIsStackPointer(regID) ? 1 : 0;
+}
+
+int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
+                               unsigned regID) {
+  return ((EDDisassembler*)disassembler)->registerIsProgramCounter(regID) ? 1:0;
+}
+
+unsigned int EDCreateInsts(EDInstRef *insts,
+                           unsigned int count,
+                           EDDisassemblerRef disassembler,
+                           ::EDByteReaderCallback byteReader,
+                           uint64_t address,
+                           void *arg) {
+  unsigned int index;
+  
+  for (index = 0; index < count; ++index) {
+    EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader,
+                                                               address, arg);
+    
+    if (!inst)
+      return index;
+    
+    insts[index] = inst;
+    address += inst->byteSize();
+  }
+  
+  return count;
+}
+
+void EDReleaseInst(EDInstRef inst) {
+  delete ((EDInst*)inst);
+}
+
+int EDInstByteSize(EDInstRef inst) {
+  return ((EDInst*)inst)->byteSize();
+}
+
+int EDGetInstString(const char **buf,
+                    EDInstRef inst) {
+  return ((EDInst*)inst)->getString(*buf);
+}
+
+int EDInstID(unsigned *instID, EDInstRef inst) {
+  *instID = ((EDInst*)inst)->instID();
+  return 0;
+}
+
+int EDInstIsBranch(EDInstRef inst) {
+  return ((EDInst*)inst)->isBranch();
+}
+
+int EDInstIsMove(EDInstRef inst) {
+  return ((EDInst*)inst)->isMove();
+}
+
+int EDBranchTargetID(EDInstRef inst) {
+  return ((EDInst*)inst)->branchTargetID();
+}
+
+int EDMoveSourceID(EDInstRef inst) {
+  return ((EDInst*)inst)->moveSourceID();
+}
+
+int EDMoveTargetID(EDInstRef inst) {
+  return ((EDInst*)inst)->moveTargetID();
+}
+
+int EDNumTokens(EDInstRef inst) {
+  return ((EDInst*)inst)->numTokens();
+}
+
+int EDGetToken(EDTokenRef *token,
+               EDInstRef inst,
+               int index) {
+  return ((EDInst*)inst)->getToken(*(EDToken**)token, index);
+}
+
+int EDGetTokenString(const char **buf,
+                     EDTokenRef token) {
+  return ((EDToken*)token)->getString(*buf);
+}
+
+int EDOperandIndexForToken(EDTokenRef token) {
+  return ((EDToken*)token)->operandID();
+}
+
+int EDTokenIsWhitespace(EDTokenRef token) {
+  return ((EDToken*)token)->type() == EDToken::kTokenWhitespace;
+}
+
+int EDTokenIsPunctuation(EDTokenRef token) {
+  return ((EDToken*)token)->type() == EDToken::kTokenPunctuation;
+}
+
+int EDTokenIsOpcode(EDTokenRef token) {
+  return ((EDToken*)token)->type() == EDToken::kTokenOpcode;
+}
+
+int EDTokenIsLiteral(EDTokenRef token) {
+  return ((EDToken*)token)->type() == EDToken::kTokenLiteral;
+}
+
+int EDTokenIsRegister(EDTokenRef token) {
+  return ((EDToken*)token)->type() == EDToken::kTokenRegister;
+}
+
+int EDTokenIsNegativeLiteral(EDTokenRef token) {
+  if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
+    return -1;
+  
+  return ((EDToken*)token)->literalSign();
+}
+
+int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) {
+  if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
+    return -1;
+  
+  return ((EDToken*)token)->literalAbsoluteValue(*value);
+}
+
+int EDRegisterTokenValue(unsigned *registerID,
+                         EDTokenRef token) {
+  if (((EDToken*)token)->type() != EDToken::kTokenRegister)
+    return -1;
+  
+  return ((EDToken*)token)->registerID(*registerID);
+}
+
+int EDNumOperands(EDInstRef inst) {
+  return ((EDInst*)inst)->numOperands();
+}
+
+int EDGetOperand(EDOperandRef *operand,
+                 EDInstRef inst,
+                 int index) {
+  return ((EDInst*)inst)->getOperand(*(EDOperand**)operand, index);
+}
+
+int EDOperandIsRegister(EDOperandRef operand) {
+  return ((EDOperand*)operand)->isRegister();
+}
+
+int EDOperandIsImmediate(EDOperandRef operand) {
+  return ((EDOperand*)operand)->isImmediate();
+}
+
+int EDOperandIsMemory(EDOperandRef operand) {
+  return ((EDOperand*)operand)->isMemory();
+}
+
+int EDRegisterOperandValue(unsigned *value, EDOperandRef operand) {
+  if (!((EDOperand*)operand)->isRegister())
+    return -1;
+  *value = ((EDOperand*)operand)->regVal();
+  return 0;
+}
+
+int EDImmediateOperandValue(uint64_t *value, EDOperandRef operand) {
+  if (!((EDOperand*)operand)->isImmediate())
+    return -1;
+  *value = ((EDOperand*)operand)->immediateVal();
+  return 0;
+}
+
+int EDEvaluateOperand(uint64_t *result, EDOperandRef operand,
+                      ::EDRegisterReaderCallback regReader, void *arg) {
+  return ((EDOperand*)operand)->evaluate(*result, regReader, arg);
+}
+
+#ifdef __BLOCKS__
+
+struct ByteReaderWrapper {
+  EDByteBlock_t byteBlock;
+};
+
+static int readerWrapperCallback(uint8_t *byte, 
+                          uint64_t address,
+                          void *arg) {
+  struct ByteReaderWrapper *wrapper = (struct ByteReaderWrapper *)arg;
+  return wrapper->byteBlock(byte, address);
+}
+
+unsigned int EDBlockCreateInsts(EDInstRef *insts,
+                                int count,
+                                EDDisassemblerRef disassembler,
+                                EDByteBlock_t byteBlock,
+                                uint64_t address) {
+  struct ByteReaderWrapper wrapper;
+  wrapper.byteBlock = byteBlock;
+  
+  return EDCreateInsts(insts,
+                       count,
+                       disassembler, 
+                       readerWrapperCallback, 
+                       address, 
+                       (void*)&wrapper);
+}
+
+int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand,
+                           EDRegisterBlock_t regBlock) {
+  return ((EDOperand*)operand)->evaluate(*result, regBlock);
+}
+
+int EDBlockVisitTokens(EDInstRef inst, ::EDTokenVisitor_t visitor) {
+  return ((EDInst*)inst)->visitTokens((llvm::EDTokenVisitor_t)visitor);
+}
+
+#else
+
+extern "C" unsigned int EDBlockCreateInsts() {
+  return 0;
+}
+
+extern "C" int EDBlockEvaluateOperand() {
+  return -1;
+}
+
+extern "C" int EDBlockVisitTokens() {
+  return -1;
+}
+
+#endif
diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
index 6a4e56ff72c4..48b374659d5e 100644
--- a/lib/MC/MCDisassembler/EDOperand.cpp
+++ b/lib/MC/MCDisassembler/EDOperand.cpp
@@ -30,8 +30,10 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
   MCOpIndex(mcOpIndex) {
   unsigned int numMCOperands = 0;
     
-  if (Disassembler.Key.Arch == Triple::x86 ||
-      Disassembler.Key.Arch == Triple::x86_64) {
+  Triple::ArchType arch = Disassembler.TgtTriple.getArch();
+    
+  if (arch == Triple::x86 ||
+      arch == Triple::x86_64) {
     uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
     
     switch (operandType) {
@@ -54,8 +56,8 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
       break;
     }
   }
-  else if (Disassembler.Key.Arch == Triple::arm ||
-           Disassembler.Key.Arch == Triple::thumb) {
+  else if (arch == Triple::arm ||
+           arch == Triple::thumb) {
     uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
     
     switch (operandType) {
@@ -126,7 +128,9 @@ int EDOperand::evaluate(uint64_t &result,
                         void *arg) {
   uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
   
-  switch (Disassembler.Key.Arch) {
+  Triple::ArchType arch = Disassembler.TgtTriple.getArch();
+  
+  switch (arch) {
   default:
     return -1;  
   case Triple::x86:
@@ -168,7 +172,7 @@ int EDOperand::evaluate(uint64_t &result,
         
       unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
         
-      if (segmentReg != 0 && Disassembler.Key.Arch == Triple::x86_64) {
+      if (segmentReg != 0 && arch == Triple::x86_64) {
         unsigned fsID = Disassembler.registerIDWithName("FS");
         unsigned gsID = Disassembler.registerIDWithName("GS");
         
@@ -200,7 +204,6 @@ int EDOperand::evaluate(uint64_t &result,
       return 0;
     }
     } // switch (operandType)
-    break;
   case Triple::arm:
   case Triple::thumb:
     switch (operandType) {
@@ -236,10 +239,7 @@ int EDOperand::evaluate(uint64_t &result,
       return 0;
     }
     }
-    break;
   }
-  
-  return -1;
 }
 
 int EDOperand::isRegister() {
diff --git a/lib/MC/MCDisassembler/LLVMBuild.txt b/lib/MC/MCDisassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..d73c6adcbb47
--- /dev/null
+++ b/lib/MC/MCDisassembler/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/MC/MCDisassembler/LLVMBuild.txt --------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MCDisassembler
+parent = MC
+required_libraries = MC MCParser Support
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 4658a3093fab..84a34f1d8735 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -19,10 +19,12 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
 using namespace llvm;
 
 // Given a special op, return the address skip amount (in units of
@@ -207,7 +209,7 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
 //
 // This emits the Dwarf file and the line tables.
 //
-void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
+const MCSymbol *MCDwarfFileTable::Emit(MCStreamer *MCOS) {
   MCContext &context = MCOS->getContext();
   // Switch to the section where the table will be emitted into.
   MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection());
@@ -320,6 +322,8 @@ void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
   // This is the end of the section, so set the value of the symbol at the end
   // of this section (that was used in a previous expression).
   MCOS->EmitLabel(LineEndSym);
+
+  return LineStartSym;
 }
 
 /// Utility function to write the encoding to an object writer.
@@ -372,10 +376,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
   // it with DW_LNS_advance_line.
   if (Temp >= DWARF2_LINE_RANGE) {
     OS << char(dwarf::DW_LNS_advance_line);
-    SmallString<32> Tmp;
-    raw_svector_ostream OSE(Tmp);
-    MCObjectWriter::EncodeSLEB128(LineDelta, OSE);
-    OS << OSE.str();
+    MCObjectWriter::EncodeSLEB128(LineDelta, OS);
 
     LineDelta = 0;
     Temp = 0 - DWARF2_LINE_BASE;
@@ -411,10 +412,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
 
   // Otherwise use DW_LNS_advance_pc.
   OS << char(dwarf::DW_LNS_advance_pc);
-  SmallString<32> Tmp;
-  raw_svector_ostream OSE(Tmp);
-  MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
-  OS << OSE.str();
+  MCObjectWriter::EncodeULEB128(AddrDelta, OS);
 
   if (NeedCopy)
     OS << char(dwarf::DW_LNS_copy);
@@ -430,6 +428,349 @@ void MCDwarfFile::dump() const {
   print(dbgs());
 }
 
+// Utility function to write a tuple for .debug_abbrev.
+static void EmitAbbrev(MCStreamer *MCOS, uint64_t Name, uint64_t Form) {
+  MCOS->EmitULEB128IntValue(Name);
+  MCOS->EmitULEB128IntValue(Form);
+}
+
+// When generating dwarf for assembly source files this emits
+// the data for .debug_abbrev section which contains three DIEs.
+static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
+  MCContext &context = MCOS->getContext();
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+
+  // DW_TAG_compile_unit DIE abbrev (1).
+  MCOS->EmitULEB128IntValue(1);
+  MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
+  MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
+  EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
+  EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+  EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
+  EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
+  EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string);
+  StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
+  if (!DwarfDebugFlags.empty())
+    EmitAbbrev(MCOS, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string);
+  EmitAbbrev(MCOS, dwarf::DW_AT_producer, dwarf::DW_FORM_string);
+  EmitAbbrev(MCOS, dwarf::DW_AT_language, dwarf::DW_FORM_data2);
+  EmitAbbrev(MCOS, 0, 0);
+
+  // DW_TAG_label DIE abbrev (2).
+  MCOS->EmitULEB128IntValue(2);
+  MCOS->EmitULEB128IntValue(dwarf::DW_TAG_label);
+  MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
+  EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string);
+  EmitAbbrev(MCOS, dwarf::DW_AT_decl_file, dwarf::DW_FORM_data4);
+  EmitAbbrev(MCOS, dwarf::DW_AT_decl_line, dwarf::DW_FORM_data4);
+  EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
+  EmitAbbrev(MCOS, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag);
+  EmitAbbrev(MCOS, 0, 0);
+
+  // DW_TAG_unspecified_parameters DIE abbrev (3).
+  MCOS->EmitULEB128IntValue(3);
+  MCOS->EmitULEB128IntValue(dwarf::DW_TAG_unspecified_parameters);
+  MCOS->EmitIntValue(dwarf::DW_CHILDREN_no, 1);
+  EmitAbbrev(MCOS, 0, 0);
+
+  // Terminate the abbreviations for this compilation unit.
+  MCOS->EmitIntValue(0, 1);
+}
+
+// When generating dwarf for assembly source files this emits the data for
+// .debug_aranges section.  Which contains a header and a table of pairs of
+// PointerSize'ed values for the address and size of section(s) with line table
+// entries (just the default .text in our case) and a terminating pair of zeros.
+static void EmitGenDwarfAranges(MCStreamer *MCOS) {
+  MCContext &context = MCOS->getContext();
+
+  // Create a symbol at the end of the section that we are creating the dwarf
+  // debugging info to use later in here as part of the expression to calculate
+  // the size of the section for the table.
+  MCOS->SwitchSection(context.getGenDwarfSection());
+  MCSymbol *SectionEndSym = context.CreateTempSymbol();
+  MCOS->EmitLabel(SectionEndSym);
+  context.setGenDwarfSectionEndSym(SectionEndSym);
+
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+
+  // This will be the length of the .debug_aranges section, first account for
+  // the size of each item in the header (see below where we emit these items).
+  int Length = 4 + 2 + 4 + 1 + 1;
+
+  // Figure the padding after the header before the table of address and size
+  // pairs who's values are PointerSize'ed.
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
+  int AddrSize = asmInfo.getPointerSize();
+  int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1));
+  if (Pad == 2 * AddrSize)
+    Pad = 0;
+  Length += Pad;
+
+  // Add the size of the pair of PointerSize'ed values for the address and size
+  // of the one default .text section we have in the table.
+  Length += 2 * AddrSize;
+  // And the pair of terminating zeros.
+  Length += 2 * AddrSize;
+
+
+  // Emit the header for this section.
+  // The 4 byte length not including the 4 byte value for the length.
+  MCOS->EmitIntValue(Length - 4, 4);
+  // The 2 byte version, which is 2.
+  MCOS->EmitIntValue(2, 2);
+  // The 4 byte offset to the compile unit in the .debug_info from the start
+  // of the .debug_info, it is at the start of that section so this is zero.
+  MCOS->EmitIntValue(0, 4);
+  // The 1 byte size of an address.
+  MCOS->EmitIntValue(AddrSize, 1);
+  // The 1 byte size of a segment descriptor, we use a value of zero.
+  MCOS->EmitIntValue(0, 1);
+  // Align the header with the padding if needed, before we put out the table.
+  for(int i = 0; i < Pad; i++)
+    MCOS->EmitIntValue(0, 1);
+
+  // Now emit the table of pairs of PointerSize'ed values for the section(s)
+  // address and size, in our case just the one default .text section.
+  const MCExpr *Addr = MCSymbolRefExpr::Create(
+    context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
+  const MCExpr *Size = MakeStartMinusEndExpr(*MCOS,
+    *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0);
+  MCOS->EmitAbsValue(Addr, AddrSize);
+  MCOS->EmitAbsValue(Size, AddrSize);
+
+  // And finally the pair of terminating zeros.
+  MCOS->EmitIntValue(0, AddrSize);
+  MCOS->EmitIntValue(0, AddrSize);
+}
+
+// When generating dwarf for assembly source files this emits the data for
+// .debug_info section which contains three parts.  The header, the compile_unit
+// DIE and a list of label DIEs.
+static void EmitGenDwarfInfo(MCStreamer *MCOS,
+                             const MCSymbol *AbbrevSectionSymbol,
+                             const MCSymbol *LineSectionSymbol) {
+  MCContext &context = MCOS->getContext();
+
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); 
+
+  // Create a symbol at the start and end of this section used in here for the
+  // expression to calculate the length in the header.
+  MCSymbol *InfoStart = context.CreateTempSymbol();
+  MCOS->EmitLabel(InfoStart);
+  MCSymbol *InfoEnd = context.CreateTempSymbol();
+
+  // First part: the header.
+
+  // The 4 byte total length of the information for this compilation unit, not
+  // including these 4 bytes.
+  const MCExpr *Length = MakeStartMinusEndExpr(*MCOS, *InfoStart, *InfoEnd, 4);
+  MCOS->EmitAbsValue(Length, 4);
+
+  // The 2 byte DWARF version, which is 2.
+  MCOS->EmitIntValue(2, 2);
+
+  // The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev,
+  // it is at the start of that section so this is zero.
+  if (AbbrevSectionSymbol) {
+    MCOS->EmitSymbolValue(AbbrevSectionSymbol, 4);
+  } else {
+    MCOS->EmitIntValue(0, 4);
+  }
+
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
+  int AddrSize = asmInfo.getPointerSize();
+  // The 1 byte size of an address.
+  MCOS->EmitIntValue(AddrSize, 1);
+
+  // Second part: the compile_unit DIE.
+
+  // The DW_TAG_compile_unit DIE abbrev (1).
+  MCOS->EmitULEB128IntValue(1);
+
+  // DW_AT_stmt_list, a 4 byte offset from the start of the .debug_line section,
+  // which is at the start of that section so this is zero.
+  if (LineSectionSymbol) {
+    MCOS->EmitSymbolValue(LineSectionSymbol, 4);
+  } else {
+    MCOS->EmitIntValue(0, 4);
+  }
+
+  // AT_low_pc, the first address of the default .text section.
+  const MCExpr *Start = MCSymbolRefExpr::Create(
+    context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context);
+  MCOS->EmitAbsValue(Start, AddrSize);
+
+  // AT_high_pc, the last address of the default .text section.
+  const MCExpr *End = MCSymbolRefExpr::Create(
+    context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context);
+  MCOS->EmitAbsValue(End, AddrSize);
+
+  // AT_name, the name of the source file.  Reconstruct from the first directory
+  // and file table entries.
+  const std::vector<StringRef> &MCDwarfDirs =
+    context.getMCDwarfDirs();
+  if (MCDwarfDirs.size() > 0) {
+    MCOS->EmitBytes(MCDwarfDirs[0], 0);
+    MCOS->EmitBytes("/", 0);
+  }
+  const std::vector<MCDwarfFile *> &MCDwarfFiles =
+    MCOS->getContext().getMCDwarfFiles();
+  MCOS->EmitBytes(MCDwarfFiles[1]->getName(), 0);
+  MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+  // AT_comp_dir, the working directory the assembly was done in.
+  llvm::sys::Path CWD = llvm::sys::Path::GetCurrentDirectory();
+  MCOS->EmitBytes(StringRef(CWD.c_str()), 0);
+  MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+  // AT_APPLE_flags, the command line arguments of the assembler tool.
+  StringRef DwarfDebugFlags = context.getDwarfDebugFlags();
+  if (!DwarfDebugFlags.empty()){
+    MCOS->EmitBytes(DwarfDebugFlags, 0);
+    MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+  }
+
+  // AT_producer, the version of the assembler tool.
+  MCOS->EmitBytes(StringRef("llvm-mc (based on LLVM "), 0);
+  MCOS->EmitBytes(StringRef(PACKAGE_VERSION), 0);
+  MCOS->EmitBytes(StringRef(")"), 0);
+  MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+  // AT_language, a 4 byte value.  We use DW_LANG_Mips_Assembler as the dwarf2
+  // draft has no standard code for assembler.
+  MCOS->EmitIntValue(dwarf::DW_LANG_Mips_Assembler, 2);
+
+  // Third part: the list of label DIEs.
+
+  // Loop on saved info for dwarf labels and create the DIEs for them.
+  const std::vector<const MCGenDwarfLabelEntry *> &Entries =
+    MCOS->getContext().getMCGenDwarfLabelEntries();
+  for (std::vector<const MCGenDwarfLabelEntry *>::const_iterator it =
+       Entries.begin(), ie = Entries.end(); it != ie;
+       ++it) {
+    const MCGenDwarfLabelEntry *Entry = *it;
+
+    // The DW_TAG_label DIE abbrev (2).
+    MCOS->EmitULEB128IntValue(2);
+
+    // AT_name, of the label without any leading underbar.
+    MCOS->EmitBytes(Entry->getName(), 0);
+    MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
+
+    // AT_decl_file, index into the file table.
+    MCOS->EmitIntValue(Entry->getFileNumber(), 4);
+
+    // AT_decl_line, source line number.
+    MCOS->EmitIntValue(Entry->getLineNumber(), 4);
+
+    // AT_low_pc, start address of the label.
+    const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry->getLabel(),
+                                             MCSymbolRefExpr::VK_None, context);
+    MCOS->EmitAbsValue(AT_low_pc, AddrSize);
+
+    // DW_AT_prototyped, a one byte flag value of 0 saying we have no prototype.
+    MCOS->EmitIntValue(0, 1);
+
+    // The DW_TAG_unspecified_parameters DIE abbrev (3).
+    MCOS->EmitULEB128IntValue(3);
+
+    // Add the NULL DIE terminating the DW_TAG_unspecified_parameters DIE's.
+    MCOS->EmitIntValue(0, 1);
+  }
+  // Deallocate the MCGenDwarfLabelEntry classes that saved away the info
+  // for the dwarf labels.
+  for (std::vector<const MCGenDwarfLabelEntry *>::const_iterator it =
+       Entries.begin(), ie = Entries.end(); it != ie;
+       ++it) {
+    const MCGenDwarfLabelEntry *Entry = *it;
+    delete Entry;
+  }
+
+  // Add the NULL DIE terminating the Compile Unit DIE's.
+  MCOS->EmitIntValue(0, 1);
+
+  // Now set the value of the symbol at the end of the info section.
+  MCOS->EmitLabel(InfoEnd);
+}
+
+//
+// When generating dwarf for assembly source files this emits the Dwarf
+// sections.
+//
+void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) {
+  // Create the dwarf sections in this order (.debug_line already created).
+  MCContext &context = MCOS->getContext();
+  const MCAsmInfo &AsmInfo = context.getAsmInfo();
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfAbbrevSection());
+  MCSymbol *AbbrevSectionSymbol;
+  if (AsmInfo.doesDwarfRequireRelocationForSectionOffset()) {
+    AbbrevSectionSymbol = context.CreateTempSymbol();
+    MCOS->EmitLabel(AbbrevSectionSymbol);
+  } else {
+    AbbrevSectionSymbol = NULL;
+    LineSectionSymbol = NULL;
+  }
+  MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection());
+
+  // If there are no line table entries then do not emit any section contents.
+  if (context.getMCLineSections().empty())
+    return;
+
+  // Output the data for .debug_aranges section.
+  EmitGenDwarfAranges(MCOS);
+
+  // Output the data for .debug_abbrev section.
+  EmitGenDwarfAbbrev(MCOS);
+
+  // Output the data for .debug_info section.
+  EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol);
+}
+
+//
+// When generating dwarf for assembly source files this is called when symbol
+// for a label is created.  If this symbol is not a temporary and is in the
+// section that dwarf is being generated for, save the needed info to create
+// a dwarf label.
+//
+void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
+                                     SourceMgr &SrcMgr, SMLoc &Loc) {
+  // We won't create dwarf labels for temporary symbols or symbols not in
+  // the default text.
+  if (Symbol->isTemporary())
+    return;
+  MCContext &context = MCOS->getContext();
+  if (context.getGenDwarfSection() != MCOS->getCurrentSection())
+    return;
+
+  // The dwarf label's name does not have the symbol name's leading
+  // underbar if any.
+  StringRef Name = Symbol->getName();
+  if (Name.startswith("_"))
+    Name = Name.substr(1, Name.size()-1);
+
+  // Get the dwarf file number to be used for the dwarf label.
+  unsigned FileNumber = context.getGenDwarfFileNumber();
+
+  // Finding the line number is the expensive part which is why we just don't
+  // pass it in as for some symbols we won't create a dwarf label.
+  int CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+  unsigned LineNumber = SrcMgr.FindLineNumber(Loc, CurBuffer);
+
+  // We create a temporary symbol for use for the AT_high_pc and AT_low_pc
+  // values so that they don't have things like an ARM thumb bit from the
+  // original symbol. So when used they won't get a low bit set after
+  // relocation.
+  MCSymbol *Label = context.CreateTempSymbol();
+  MCOS->EmitLabel(Label);
+
+  // Create and entry for the info and add it to the other entries.
+  MCGenDwarfLabelEntry *Entry = 
+    new MCGenDwarfLabelEntry(Name, FileNumber, LineNumber, Label);
+  MCOS->getContext().addMCGenDwarfLabelEntry(Entry);
+}
+
 static int getDataAlignmentFactor(MCStreamer &streamer) {
   MCContext &context = streamer.getContext();
   const MCAsmInfo &asmInfo = context.getAsmInfo();
@@ -445,8 +786,7 @@ static unsigned getSizeForEncoding(MCStreamer &streamer,
   MCContext &context = streamer.getContext();
   unsigned format = symbolEncoding & 0x0f;
   switch (format) {
-  default:
-    assert(0 && "Unknown Encoding");
+  default: llvm_unreachable("Unknown Encoding");
   case dwarf::DW_EH_PE_absptr:
   case dwarf::DW_EH_PE_signed:
     return context.getAsmInfo().getPointerSize();
@@ -520,6 +860,7 @@ namespace {
                             const MCSymbol *personality,
                             unsigned personalityEncoding,
                             const MCSymbol *lsda,
+                            bool IsSignalFrame,
                             unsigned lsdaEncoding);
     MCSymbol *EmitFDE(MCStreamer &streamer,
                       const MCSymbol &cieStart,
@@ -536,28 +877,40 @@ namespace {
 static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding,
                              StringRef Prefix) {
   if (Streamer.isVerboseAsm()) {
-    const char *EncStr = 0;
+    const char *EncStr;
     switch (Encoding) {
-    default: EncStr = "<unknown encoding>";
-    case dwarf::DW_EH_PE_absptr: EncStr = "absptr";
-    case dwarf::DW_EH_PE_omit:   EncStr = "omit";
-    case dwarf::DW_EH_PE_pcrel:  EncStr = "pcrel";
-    case dwarf::DW_EH_PE_udata4: EncStr = "udata4";
-    case dwarf::DW_EH_PE_udata8: EncStr = "udata8";
-    case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4";
-    case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8";
-    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: EncStr = "pcrel udata4";
-    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: EncStr = "pcrel sdata4";
-    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: EncStr = "pcrel udata8";
-    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: EncStr = "pcrel sdata8";
+    default: EncStr = "<unknown encoding>"; break;
+    case dwarf::DW_EH_PE_absptr: EncStr = "absptr"; break;
+    case dwarf::DW_EH_PE_omit:   EncStr = "omit"; break;
+    case dwarf::DW_EH_PE_pcrel:  EncStr = "pcrel"; break;
+    case dwarf::DW_EH_PE_udata4: EncStr = "udata4"; break;
+    case dwarf::DW_EH_PE_udata8: EncStr = "udata8"; break;
+    case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4"; break;
+    case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8"; break;
+    case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4:
+      EncStr = "pcrel udata4";
+      break;
+    case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4:
+      EncStr = "pcrel sdata4";
+      break;
+    case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8:
+      EncStr = "pcrel udata8";
+      break;
+    case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8:
+      EncStr = "screl sdata8";
+      break;
     case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4:
       EncStr = "indirect pcrel udata4";
+      break;
     case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4:
       EncStr = "indirect pcrel sdata4";
+      break;
     case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8:
       EncStr = "indirect pcrel udata8";
+      break;
     case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8:
       EncStr = "indirect pcrel sdata8";
+      break;
     }
 
     Streamer.AddComment(Twine(Prefix) + " = " + EncStr);
@@ -639,11 +992,11 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
     }
     return;
   }
-  case MCCFIInstruction::Remember:
+  case MCCFIInstruction::RememberState:
     if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
     Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
     return;
-  case MCCFIInstruction::Restore:
+  case MCCFIInstruction::RestoreState:
     if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
     Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
     return;
@@ -655,6 +1008,19 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
     Streamer.EmitULEB128IntValue(Reg);
     return;
   }
+  case MCCFIInstruction::Restore: {
+    unsigned Reg = Instr.getDestination().getReg();
+    if (VerboseAsm) {
+      Streamer.AddComment("DW_CFA_restore");
+      Streamer.AddComment(Twine("Reg ") + Twine(Reg));
+    }
+    Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
+    return;
+  }
+  case MCCFIInstruction::Escape:
+    if (VerboseAsm) Streamer.AddComment("Escape bytes");
+    Streamer.EmitBytes(Instr.getValues(), 0);
+    return;
   }
   llvm_unreachable("Unhandled case in switch");
 }
@@ -738,8 +1104,8 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
 
   // Compact Encoding
   Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4);
-  if (VerboseAsm) Streamer.AddComment(Twine("Compact Unwind Encoding: 0x") +
-                                      Twine(llvm::utohexstr(Encoding)));
+  if (VerboseAsm) Streamer.AddComment("Compact Unwind Encoding: 0x" +
+                                      Twine::utohexstr(Encoding));
   Streamer.EmitIntValue(Encoding, Size);
 
 
@@ -766,6 +1132,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
                                           const MCSymbol *personality,
                                           unsigned personalityEncoding,
                                           const MCSymbol *lsda,
+                                          bool IsSignalFrame,
                                           unsigned lsdaEncoding) {
   MCContext &context = streamer.getContext();
   const MCRegisterInfo &MRI = context.getRegisterInfo();
@@ -808,6 +1175,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
     if (lsda)
       Augmentation += "L";
     Augmentation += "R";
+    if (IsSignalFrame)
+      Augmentation += "S";
     streamer.EmitBytes(Augmentation.str(), 0);
   }
   streamer.EmitIntValue(0, 1);
@@ -967,17 +1336,18 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
 
 namespace {
   struct CIEKey {
-    static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1); }
-    static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0); }
+    static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1, false); }
+    static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0, false); }
 
     CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
-           unsigned LsdaEncoding_) : Personality(Personality_),
-                                     PersonalityEncoding(PersonalityEncoding_),
-                                     LsdaEncoding(LsdaEncoding_) {
+           unsigned LsdaEncoding_, bool IsSignalFrame_) :
+      Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
+      LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_) {
     }
     const MCSymbol* Personality;
     unsigned PersonalityEncoding;
     unsigned LsdaEncoding;
+    bool IsSignalFrame;
   };
 }
 
@@ -991,17 +1361,17 @@ namespace llvm {
       return CIEKey::getTombstoneKey();
     }
     static unsigned getHashValue(const CIEKey &Key) {
-      FoldingSetNodeID ID;
-      ID.AddPointer(Key.Personality);
-      ID.AddInteger(Key.PersonalityEncoding);
-      ID.AddInteger(Key.LsdaEncoding);
-      return ID.ComputeHash();
+      return static_cast<unsigned>(hash_combine(Key.Personality,
+                                                Key.PersonalityEncoding,
+                                                Key.LsdaEncoding,
+                                                Key.IsSignalFrame));
     }
     static bool isEqual(const CIEKey &LHS,
                         const CIEKey &RHS) {
       return LHS.Personality == RHS.Personality &&
         LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
-        LHS.LsdaEncoding == RHS.LsdaEncoding;
+        LHS.LsdaEncoding == RHS.LsdaEncoding &&
+        LHS.IsSignalFrame == RHS.IsSignalFrame;
     }
   };
 }
@@ -1016,12 +1386,10 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
   ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
 
   // Emit the compact unwind info if available.
-  // FIXME: This emits both the compact unwind and the old CIE/FDE
-  //        information. Only one of those is needed.
   if (IsEH && MOFI->getCompactUnwindSection())
     for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
       const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
-      if (!Frame.CompactUnwindEncoding)
+      if (Frame.CompactUnwindEncoding)
         Emitter.EmitCompactUnwind(Streamer, Frame);
     }
 
@@ -1039,11 +1407,12 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
   for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) {
     const MCDwarfFrameInfo &Frame = FrameArray[i];
     CIEKey Key(Frame.Personality, Frame.PersonalityEncoding,
-               Frame.LsdaEncoding);
+               Frame.LsdaEncoding, Frame.IsSignalFrame);
     const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
     if (!CIEStart)
       CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality,
                                   Frame.PersonalityEncoding, Frame.Lsda,
+                                  Frame.IsSignalFrame,
                                   Frame.LsdaEncoding);
 
     FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame);
diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp
index dad2e7ba9878..f9f98e0f730e 100644
--- a/lib/MC/MCELF.cpp
+++ b/lib/MC/MCELF.cpp
@@ -37,7 +37,7 @@ void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
   assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
          Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
          Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
-         Type == ELF::STT_TLS);
+         Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
 
   uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
   SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
@@ -48,7 +48,7 @@ unsigned MCELF::GetType(const MCSymbolData &SD) {
   assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
          Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
          Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
-         Type == ELF::STT_TLS);
+         Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC);
   return Type;
 }
 
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index 12a02a9e9740..171ab4d9bf28 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -7,17 +7,40 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 
 using namespace llvm;
 
 MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
-                                                 Triple::OSType OSType_,
+                                                 uint8_t OSABI_,
                                                  uint16_t EMachine_,
                                                  bool HasRelocationAddend_)
-  : OSType(OSType_), EMachine(EMachine_),
+  : OSABI(OSABI_), EMachine(EMachine_),
     HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) {
 }
 
-MCELFObjectTargetWriter::~MCELFObjectTargetWriter() {
+/// Default e_flags = 0
+unsigned MCELFObjectTargetWriter::getEFlags() const {
+  return 0;
+}
+
+const MCSymbol *MCELFObjectTargetWriter::ExplicitRelSym(const MCAssembler &Asm,
+                                                        const MCValue &Target,
+                                                        const MCFragment &F,
+                                                        const MCFixup &Fixup,
+                                                        bool IsPCRel) const {
+  return NULL;
+}
+
+
+void MCELFObjectTargetWriter::adjustFixupOffset(const MCFixup &Fixup,
+                                                uint64_t &RelocOffset) {
+}
+
+void
+MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
+                                    std::vector<ELFRelocationEntry> &Relocs) {
+  // Sort by the r_offset, just like gnu as does.
+  array_pod_sort(Relocs.begin(), Relocs.end());
 }
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 9ada08ea9530..6c4d0e33a115 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -11,13 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCELFStreamer.h"
 #include "MCELF.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
@@ -29,6 +33,123 @@
 
 using namespace llvm;
 
+namespace {
+class MCELFStreamer : public MCObjectStreamer {
+public:
+  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                  raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                raw_ostream &OS, MCCodeEmitter *Emitter,
+                MCAssembler *Assembler)
+    : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
+
+
+  ~MCELFStreamer() {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    llvm_unreachable("ELF doesn't support this directive");
+  }
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    llvm_unreachable("ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    llvm_unreachable("ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolType(int Type) {
+    llvm_unreachable("ELF doesn't support this directive");
+  }
+
+  virtual void EndCOFFSymbolDef() {
+    llvm_unreachable("ELF doesn't support this directive");
+  }
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+     SD.setSize(Value);
+  }
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                     unsigned ByteAlignment);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0) {
+    llvm_unreachable("ELF doesn't support this directive");
+  }
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0) {
+    llvm_unreachable("ELF doesn't support this directive");
+  }
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             unsigned AddrSpace);
+
+  virtual void EmitFileDirective(StringRef Filename);
+
+  virtual void FinishImpl();
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+  void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+  struct LocalCommon {
+    MCSymbolData *SD;
+    uint64_t Size;
+    unsigned ByteAlignment;
+  };
+  std::vector<LocalCommon> LocalCommons;
+
+  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+  /// @}
+  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+                  SectionKind Kind) {
+    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+  }
+
+  void SetSectionData() {
+    SetSection(".data", ELF::SHT_PROGBITS,
+               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+               SectionKind::getDataRel());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionText() {
+    SetSection(".text", ELF::SHT_PROGBITS,
+               ELF::SHF_EXECINSTR |
+               ELF::SHF_ALLOC, SectionKind::getText());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionBss() {
+    SetSection(".bss", ELF::SHT_NOBITS,
+               ELF::SHF_WRITE |
+               ELF::SHF_ALLOC, SectionKind::getBSS());
+    EmitCodeAlignment(4, 0);
+  }
+};
+}
+
 void MCELFStreamer::InitSections() {
   // This emulates the same behavior of GNU as. This makes it easier
   // to compare the output as the major sections are in the same order.
@@ -61,7 +182,7 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
     return;
   }
 
-  assert(0 && "invalid assembler flag!");
+  llvm_unreachable("invalid assembler flag!");
 }
 
 void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
@@ -130,10 +251,8 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   case MCSA_WeakDefinition:
   case MCSA_WeakDefAutoPrivate:
   case MCSA_Invalid:
-  case MCSA_ELF_TypeIndFunction:
   case MCSA_IndirectSymbol:
-    assert(0 && "Invalid symbol attribute for ELF!");
-    break;
+    llvm_unreachable("Invalid symbol attribute for ELF!");
 
   case MCSA_ELF_TypeGnuUniqueObject:
     // Ignore for now.
@@ -162,6 +281,10 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
     MCELF::SetType(SD, ELF::STT_FUNC);
     break;
 
+  case MCSA_ELF_TypeIndFunction:
+    MCELF::SetType(SD, ELF::STT_GNU_IFUNC);
+    break;
+
   case MCSA_ELF_TypeObject:
     MCELF::SetType(SD, ELF::STT_OBJECT);
     break;
@@ -205,10 +328,10 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
 
   if (MCELF::GetBinding(SD) == ELF_STB_Local) {
     const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
-                                                                    ELF::SHT_NOBITS,
-                                                                    ELF::SHF_WRITE |
-                                                                    ELF::SHF_ALLOC,
-                                                                    SectionKind::getBSS());
+                                                         ELF::SHT_NOBITS,
+                                                         ELF::SHF_WRITE |
+                                                         ELF::SHF_ALLOC,
+                                                         SectionKind::getBSS());
     Symbol->setSection(*Section);
 
     struct LocalCommon L = {&SD, Size, ByteAlignment};
@@ -266,6 +389,13 @@ void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
     getCurrentSectionData()->setAlignment(ByteAlignment);
 }
 
+void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+                                  unsigned AddrSpace) {
+  fixSymbolsInTLSFixups(Value);
+  MCObjectStreamer::EmitValueImpl(Value, Size, AddrSpace);
+}
+
+
 // Add a symbol for the file name of this module. This is the second
 // entry in the module's symbol table (the first being the null symbol).
 void MCELFStreamer::EmitFileDirective(StringRef Filename) {
@@ -308,6 +438,10 @@ void  MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
     case MCSymbolRefExpr::VK_ARM_TLSGD:
     case MCSymbolRefExpr::VK_ARM_TPOFF:
     case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+    case MCSymbolRefExpr::VK_Mips_TLSGD:
+    case MCSymbolRefExpr::VK_Mips_GOTTPREL:
+    case MCSymbolRefExpr::VK_Mips_TPREL_HI:
+    case MCSymbolRefExpr::VK_Mips_TPREL_LO:
       break;
     }
     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
@@ -349,7 +483,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
   DF->getContents().append(Code.begin(), Code.end());
 }
 
-void MCELFStreamer::Finish() {
+void MCELFStreamer::FinishImpl() {
   EmitFrames(true);
 
   for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
@@ -372,7 +506,7 @@ void MCELFStreamer::Finish() {
       SectData.setAlignment(ByteAlignment);
   }
 
-  this->MCObjectStreamer::Finish();
+  this->MCObjectStreamer::FinishImpl();
 }
 
 MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
diff --git a/lib/MC/MCELFStreamer.h b/lib/MC/MCELFStreamer.h
deleted file mode 100644
index 10bf77580998..000000000000
--- a/lib/MC/MCELFStreamer.h
+++ /dev/null
@@ -1,141 +0,0 @@
-//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file assembles .s files and emits ELF .o object files.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_MCELFSTREAMER_H
-#define LLVM_MC_MCELFSTREAMER_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSectionELF.h"
-
-namespace llvm {
-
-class MCELFStreamer : public MCObjectStreamer {
-public:
-  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                  raw_ostream &OS, MCCodeEmitter *Emitter)
-    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
-
-  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                raw_ostream &OS, MCCodeEmitter *Emitter,
-                MCAssembler *Assembler)
-    : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
-
-
-  ~MCELFStreamer() {}
-
-  /// @name MCStreamer Interface
-  /// @{
-
-  virtual void InitSections();
-  virtual void ChangeSection(const MCSection *Section);
-  virtual void EmitLabel(MCSymbol *Symbol);
-  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
-  virtual void EmitThumbFunc(MCSymbol *Func);
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
-  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
-  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
-  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                unsigned ByteAlignment);
-  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolType(int Type) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EndCOFFSymbolDef() {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
-     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-     SD.setSize(Value);
-  }
-
-  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                     unsigned ByteAlignment);
-
-  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
-                            unsigned Size = 0, unsigned ByteAlignment = 0) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
-                              uint64_t Size, unsigned ByteAlignment = 0) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
-  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
-                                    unsigned ValueSize = 1,
-                                    unsigned MaxBytesToEmit = 0);
-  virtual void EmitCodeAlignment(unsigned ByteAlignment,
-                                 unsigned MaxBytesToEmit = 0);
-
-  virtual void EmitFileDirective(StringRef Filename);
-
-  virtual void Finish();
-
-private:
-  virtual void EmitInstToFragment(const MCInst &Inst);
-  virtual void EmitInstToData(const MCInst &Inst);
-
-  void fixSymbolsInTLSFixups(const MCExpr *expr);
-
-  struct LocalCommon {
-    MCSymbolData *SD;
-    uint64_t Size;
-    unsigned ByteAlignment;
-  };
-  std::vector<LocalCommon> LocalCommons;
-
-  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
-  /// @}
-  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
-                  SectionKind Kind) {
-    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
-  }
-
-  void SetSectionData() {
-    SetSection(".data", ELF::SHT_PROGBITS,
-               ELF::SHF_WRITE |ELF::SHF_ALLOC,
-               SectionKind::getDataRel());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionText() {
-    SetSection(".text", ELF::SHT_PROGBITS,
-               ELF::SHF_EXECINSTR |
-               ELF::SHF_ALLOC, SectionKind::getText());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionBss() {
-    SetSection(".bss", ELF::SHT_NOBITS,
-               ELF::SHF_WRITE |
-               ELF::SHF_ALLOC, SectionKind::getBSS());
-    EmitCodeAlignment(4, 0);
-  }
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index da297fb1d95a..78801557af3e 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -14,9 +14,11 @@
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -57,7 +59,8 @@ void MCExpr::print(raw_ostream &OS) const {
         SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT ||
         SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF ||
         SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF ||
-        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF)
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET1)
       OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
     else if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
              SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 &&
@@ -70,7 +73,6 @@ void MCExpr::print(raw_ostream &OS) const {
   case MCExpr::Unary: {
     const MCUnaryExpr &UE = cast<MCUnaryExpr>(*this);
     switch (UE.getOpcode()) {
-    default: assert(0 && "Invalid opcode!");
     case MCUnaryExpr::LNot:  OS << '!'; break;
     case MCUnaryExpr::Minus: OS << '-'; break;
     case MCUnaryExpr::Not:   OS << '~'; break;
@@ -91,7 +93,6 @@ void MCExpr::print(raw_ostream &OS) const {
     }
 
     switch (BE.getOpcode()) {
-    default: assert(0 && "Invalid opcode!");
     case MCBinaryExpr::Add:
       // Print "X-42" instead of "X+-42".
       if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
@@ -132,7 +133,7 @@ void MCExpr::print(raw_ostream &OS) const {
   }
   }
 
-  assert(0 && "Invalid expression kind!");
+  llvm_unreachable("Invalid expression kind!");
 }
 
 void MCExpr::dump() const {
@@ -171,7 +172,6 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind,
 
 StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   switch (Kind) {
-  default:
   case VK_Invalid: return "<<invalid>>";
   case VK_None: return "<<none>>";
 
@@ -189,18 +189,39 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_TPOFF: return "TPOFF";
   case VK_DTPOFF: return "DTPOFF";
   case VK_TLVP: return "TLVP";
+  case VK_SECREL: return "SECREL";
   case VK_ARM_PLT: return "(PLT)";
   case VK_ARM_GOT: return "(GOT)";
   case VK_ARM_GOTOFF: return "(GOTOFF)";
   case VK_ARM_TPOFF: return "(tpoff)";
   case VK_ARM_GOTTPOFF: return "(gottpoff)";
   case VK_ARM_TLSGD: return "(tlsgd)";
+  case VK_ARM_TARGET1: return "(target1)";
   case VK_PPC_TOC: return "toc";
   case VK_PPC_DARWIN_HA16: return "ha16";
   case VK_PPC_DARWIN_LO16: return "lo16";
   case VK_PPC_GAS_HA16: return "ha";
   case VK_PPC_GAS_LO16: return "l";
+  case VK_Mips_GPREL: return "GPREL";
+  case VK_Mips_GOT_CALL: return "GOT_CALL";
+  case VK_Mips_GOT16: return "GOT16";
+  case VK_Mips_GOT: return "GOT";
+  case VK_Mips_ABS_HI: return "ABS_HI";
+  case VK_Mips_ABS_LO: return "ABS_LO";
+  case VK_Mips_TLSGD: return "TLSGD";
+  case VK_Mips_TLSLDM: return "TLSLDM";
+  case VK_Mips_DTPREL_HI: return "DTPREL_HI";
+  case VK_Mips_DTPREL_LO: return "DTPREL_LO";
+  case VK_Mips_GOTTPREL: return "GOTTPREL";
+  case VK_Mips_TPREL_HI: return "TPREL_HI";
+  case VK_Mips_TPREL_LO: return "TPREL_LO";
+  case VK_Mips_GPOFF_HI: return "GPOFF_HI";
+  case VK_Mips_GPOFF_LO: return "GPOFF_LO";
+  case VK_Mips_GOT_DISP: return "GOT_DISP";
+  case VK_Mips_GOT_PAGE: return "GOT_PAGE";
+  case VK_Mips_GOT_OFST: return "GOT_OFST";
   }
+  llvm_unreachable("Invalid variant kind");
 }
 
 MCSymbolRefExpr::VariantKind
@@ -337,6 +358,11 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
   if (Addrs && (&SecA != &SecB))
     Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
 
+  // Pointers to Thumb symbols need to have their low-bit set to allow
+  // for interworking.
+  if (Asm->isThumbFunc(&SA))
+    Addend |= 1;
+
   // Clear the symbol expr pointers to indicate we have folded these
   // operands.
   A = B = 0;
@@ -557,8 +583,7 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
   }
   }
 
-  assert(0 && "Invalid assembly expression kind!");
-  return false;
+  llvm_unreachable("Invalid assembly expression kind!");
 }
 
 const MCSection *MCExpr::FindAssociatedSection() const {
@@ -599,6 +624,5 @@ const MCSection *MCExpr::FindAssociatedSection() const {
   }
   }
 
-  assert(0 && "Invalid assembly expression kind!");
-  return 0;
+  llvm_unreachable("Invalid assembly expression kind!");
 }
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index 4cb628b395c3..7bbfd2efa136 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -25,6 +25,8 @@ void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
     OS << "Imm:" << getImm();
   else if (isExpr()) {
     OS << "Expr:(" << *getExpr() << ")";
+  } else if (isInst()) {
+    OS << "Inst:(" << *getInst() << ")";
   } else
     OS << "UNDEFINED";
   OS << ">";
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 2317a2891f8b..847bcc0a1604 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -8,8 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -19,11 +21,11 @@ MCInstPrinter::~MCInstPrinter() {
 /// getOpcodeName - Return the name of the specified opcode enum (e.g.
 /// "MOV32ri") or empty if we can't resolve it.
 StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
-  return "";
+  return MII.getName(Opcode);
 }
 
 void MCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  assert(0 && "Target should implement this");
+  llvm_unreachable("Target should implement this");
 }
 
 void MCInstPrinter::printAnnotation(raw_ostream &OS, StringRef Annot) {
diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp
deleted file mode 100644
index 3fe8ac72c8ec..000000000000
--- a/lib/MC/MCLoggingStreamer.cpp
+++ /dev/null
@@ -1,250 +0,0 @@
-//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-
-class MCLoggingStreamer : public MCStreamer {
-  llvm::OwningPtr<MCStreamer> Child;
-  
-  raw_ostream &OS;
-
-public:
-  MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS)
-    : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {}
-
-  void LogCall(const char *Function) {
-    OS << Function << "\n";
-  }
-
-  void LogCall(const char *Function, const Twine &Message) {
-    OS << Function << ": " << Message << "\n";
-  }
-
-  virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); }
-  
-  virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); }
-
-  virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); }
-
-  virtual void AddComment(const Twine &T) {
-    LogCall("AddComment", T);
-    return Child->AddComment(T);
-  }
-
-  virtual void AddBlankLine() {
-    LogCall("AddBlankLine");
-    return Child->AddBlankLine();
-  }
-
-  virtual void ChangeSection(const MCSection *Section) {
-    LogCall("ChangeSection");
-    return Child->ChangeSection(Section);
-  }
-
-  virtual void InitSections() {
-    LogCall("InitSections");
-    return Child->InitSections();
-  }
-
-  virtual void EmitLabel(MCSymbol *Symbol) {
-    LogCall("EmitLabel");
-    return Child->EmitLabel(Symbol);
-  }
-
-  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
-    LogCall("EmitAssemblerFlag");
-    return Child->EmitAssemblerFlag(Flag);
-  }
-
-  virtual void EmitThumbFunc(MCSymbol *Func) {
-    LogCall("EmitThumbFunc");
-    return Child->EmitThumbFunc(Func);
-  }
-
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
-    LogCall("EmitAssignment");
-    return Child->EmitAssignment(Symbol, Value);
-  }
-
-  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
-    LogCall("EmitWeakReference");
-    return Child->EmitWeakReference(Alias, Symbol);
-  }
-
-  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
-                                        const MCSymbol *LastLabel,
-                                        const MCSymbol *Label,
-                                        unsigned PointerSize) {
-    LogCall("EmitDwarfAdvanceLineAddr");
-    return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
-                                           PointerSize);
-  }
-
-  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
-    LogCall("EmitSymbolAttribute");
-    return Child->EmitSymbolAttribute(Symbol, Attribute);
-  }
-
-  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
-    LogCall("EmitSymbolDesc");
-    return Child->EmitSymbolDesc(Symbol, DescValue);
-  }
-
-  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
-    LogCall("BeginCOFFSymbolDef");
-    return Child->BeginCOFFSymbolDef(Symbol);
-  }
-
-  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
-    LogCall("EmitCOFFSymbolStorageClass");
-    return Child->EmitCOFFSymbolStorageClass(StorageClass);
-  }
-
-  virtual void EmitCOFFSymbolType(int Type) {
-    LogCall("EmitCOFFSymbolType");
-    return Child->EmitCOFFSymbolType(Type);
-  }
-
-  virtual void EndCOFFSymbolDef() {
-    LogCall("EndCOFFSymbolDef");
-    return Child->EndCOFFSymbolDef();
-  }
-
-  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
-    LogCall("EmitELFSize");
-    return Child->EmitELFSize(Symbol, Value);
-  }
-
-  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                unsigned ByteAlignment) {
-    LogCall("EmitCommonSymbol");
-    return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment);
-  }
-
-  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                     unsigned ByteAlignment) {
-    LogCall("EmitLocalCommonSymbol");
-    return Child->EmitLocalCommonSymbol(Symbol, Size, ByteAlignment);
-  }
-  
-  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
-                            unsigned Size = 0, unsigned ByteAlignment = 0) {
-    LogCall("EmitZerofill");
-    return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment);
-  }
-
-  virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
-                               uint64_t Size, unsigned ByteAlignment = 0) {
-    LogCall("EmitTBSSSymbol");
-    return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment);
-  }
-
-  virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
-    LogCall("EmitBytes");
-    return Child->EmitBytes(Data, AddrSpace);
-  }
-
-  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                             unsigned AddrSpace){
-    LogCall("EmitValue");
-    return Child->EmitValueImpl(Value, Size, AddrSpace);
-  }
-
-  virtual void EmitULEB128Value(const MCExpr *Value) {
-    LogCall("EmitULEB128Value");
-    return Child->EmitULEB128Value(Value);
-  }
-
-  virtual void EmitSLEB128Value(const MCExpr *Value) {
-    LogCall("EmitSLEB128Value");
-    return Child->EmitSLEB128Value(Value);
-  }
-
-  virtual void EmitGPRel32Value(const MCExpr *Value) {
-    LogCall("EmitGPRel32Value");
-    return Child->EmitGPRel32Value(Value);
-  }
-
-  virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
-                        unsigned AddrSpace) {
-    LogCall("EmitFill");
-    return Child->EmitFill(NumBytes, FillValue, AddrSpace);
-  }
-
-  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
-                                    unsigned ValueSize = 1,
-                                    unsigned MaxBytesToEmit = 0) {
-    LogCall("EmitValueToAlignment");
-    return Child->EmitValueToAlignment(ByteAlignment, Value,
-                                       ValueSize, MaxBytesToEmit);
-  }
-
-  virtual void EmitCodeAlignment(unsigned ByteAlignment,
-                                 unsigned MaxBytesToEmit = 0) {
-    LogCall("EmitCodeAlignment");
-    return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit);
-  }
-
-  virtual void EmitValueToOffset(const MCExpr *Offset,
-                                 unsigned char Value = 0) {
-    LogCall("EmitValueToOffset");
-    return Child->EmitValueToOffset(Offset, Value);
-  }
-
-  virtual void EmitFileDirective(StringRef Filename) {
-    LogCall("EmitFileDirective", "FileName:" + Filename);
-    return Child->EmitFileDirective(Filename);
-  }
-
-  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
-    LogCall("EmitDwarfFileDirective",
-            "FileNo:" + Twine(FileNo) + " Filename:" + Filename);
-    return Child->EmitDwarfFileDirective(FileNo, Filename);
-  }
-
-  virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
-                                     unsigned Column, unsigned Flags,
-                                     unsigned Isa, unsigned Discriminator,
-                                     StringRef FileName) {
-    LogCall("EmitDwarfLocDirective",
-            "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) +
-            " Column:" + Twine(Column) + " Flags:" + Twine(Flags) +
-            " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator));
-            return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags,
-                                                Isa, Discriminator, FileName);
-  }
-
-  virtual void EmitInstruction(const MCInst &Inst) {
-    LogCall("EmitInstruction");
-    return Child->EmitInstruction(Inst);
-  }
-
-  virtual void EmitRawText(StringRef String) {
-    LogCall("EmitRawText", "\"" + String + "\"");
-    return Child->EmitRawText(String);
-  }
-
-  virtual void Finish() {
-    LogCall("Finish");
-    return Child->Finish();
-  }
-
-};
-
-} // end anonymous namespace.
-
-MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) {
-  return new MCLoggingStreamer(Child, OS);
-}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index aa35815dd19c..bc6cf773217c 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -53,23 +53,23 @@ public:
   virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                 unsigned ByteAlignment);
   virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
-    assert(0 && "macho doesn't support this directive");
+    llvm_unreachable("macho doesn't support this directive");
   }
   virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
-    assert(0 && "macho doesn't support this directive");
+    llvm_unreachable("macho doesn't support this directive");
   }
   virtual void EmitCOFFSymbolType(int Type) {
-    assert(0 && "macho doesn't support this directive");
+    llvm_unreachable("macho doesn't support this directive");
   }
   virtual void EndCOFFSymbolDef() {
-    assert(0 && "macho doesn't support this directive");
+    llvm_unreachable("macho doesn't support this directive");
   }
   virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
-    assert(0 && "macho doesn't support this directive");
+    llvm_unreachable("macho doesn't support this directive");
   }
   virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                      unsigned ByteAlignment) {
-    assert(0 && "macho doesn't support this directive");
+    llvm_unreachable("macho doesn't support this directive");
   }
   virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
                             unsigned Size = 0, unsigned ByteAlignment = 0);
@@ -89,7 +89,7 @@ public:
     //report_fatal_error("unsupported directive: '.file'");
   }
 
-  virtual void Finish();
+  virtual void FinishImpl();
 
   /// @}
 };
@@ -140,7 +140,7 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
 
 void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   // Let the target do whatever target specific stuff it needs to do.
-  getAssembler().getBackend().HandleAssemblerFlag(Flag);
+  getAssembler().getBackend().handleAssemblerFlag(Flag);
   // Do any generic stuff we need to do.
   switch (Flag) {
   case MCAF_SyntaxUnified: return; // no-op here.
@@ -150,14 +150,10 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   case MCAF_SubsectionsViaSymbols:
     getAssembler().setSubsectionsViaSymbols(true);
     return;
-  default:
-    llvm_unreachable("invalid assembler flag!");
   }
 }
 
 void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
-  // FIXME: Flag the function ISA as thumb with DW_AT_APPLE_isa.
-
   // Remember that the function is a thumb function. Fixup and relocation
   // values will need adjusted.
   getAssembler().setIsThumbFunc(Symbol);
@@ -215,8 +211,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
   case MCSA_Protected:
   case MCSA_Weak:
   case MCSA_Local:
-    assert(0 && "Invalid symbol attribute for Mach-O!");
-    break;
+    llvm_unreachable("Invalid symbol attribute for Mach-O!");
 
   case MCSA_Global:
     SD.setExternal(true);
@@ -377,7 +372,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
   DF->getContents().append(Code.begin(), Code.end());
 }
 
-void MCMachOStreamer::Finish() {
+void MCMachOStreamer::FinishImpl() {
   EmitFrames(true);
 
   // We have to set the fragment atom associations so we can relax properly for
@@ -409,7 +404,7 @@ void MCMachOStreamer::Finish() {
     }
   }
 
-  this->MCObjectStreamer::Finish();
+  this->MCObjectStreamer::FinishImpl();
 }
 
 MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp
index b1d09d945a39..f5631608330c 100644
--- a/lib/MC/MCModule.cpp
+++ b/lib/MC/MCModule.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/MCModule.cpp - MCModule implementation --------------------------===//
+//===- lib/MC/MCModule.cpp - MCModule implementation ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index a6c0adb6793f..7ff2d1bf641b 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -55,6 +55,7 @@ namespace {
     virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
     virtual void EmitCOFFSymbolType(int Type) {}
     virtual void EndCOFFSymbolDef() {}
+    virtual void EmitCOFFSecRel32(MCSymbol const *Symbol) {}
 
     virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
     virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -79,11 +80,12 @@ namespace {
     virtual void EmitCodeAlignment(unsigned ByteAlignment,
                                    unsigned MaxBytesToEmit = 0) {}
 
-    virtual void EmitValueToOffset(const MCExpr *Offset,
-                                   unsigned char Value = 0) {}
+    virtual bool EmitValueToOffset(const MCExpr *Offset,
+                                   unsigned char Value = 0) { return false; }
     
     virtual void EmitFileDirective(StringRef Filename) {}
-    virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {
+    virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+                                        StringRef Filename) {
       return false;
     }
     virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -92,7 +94,11 @@ namespace {
                                        StringRef FileName) {}
     virtual void EmitInstruction(const MCInst &Inst) {}
 
-    virtual void Finish() {}
+    virtual void FinishImpl() {}
+
+    virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+      RecordProcEnd(Frame);
+    }
     
     /// @}
   };
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index df8b99d2be6c..b22ae331c9a9 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -56,8 +56,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
 
   TLSThreadInitSection
     = Ctx->getMachOSection("__DATA", "__thread_init",
-                           MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
-                           SectionKind::getDataRel());
+                          MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+                          SectionKind::getDataRel());
 
   CStringSection // .cstring
     = Ctx->getMachOSection("__TEXT", "__cstring",
@@ -152,6 +152,24 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
                            SectionKind::getReadOnly());
 
   // Debug Information.
+  DwarfAccelNamesSection =
+    Ctx->getMachOSection("__DWARF", "__apple_names",
+                         MCSectionMachO::S_ATTR_DEBUG,
+                         SectionKind::getMetadata());
+  DwarfAccelObjCSection =
+    Ctx->getMachOSection("__DWARF", "__apple_objc",
+                         MCSectionMachO::S_ATTR_DEBUG,
+                         SectionKind::getMetadata());
+  // 16 character section limit...
+  DwarfAccelNamespaceSection =
+    Ctx->getMachOSection("__DWARF", "__apple_namespac",
+                         MCSectionMachO::S_ATTR_DEBUG,
+                         SectionKind::getMetadata());
+  DwarfAccelTypesSection =
+    Ctx->getMachOSection("__DWARF", "__apple_types",
+                         MCSectionMachO::S_ATTR_DEBUG,
+                         SectionKind::getMetadata());
+    
   DwarfAbbrevSection =
     Ctx->getMachOSection("__DWARF", "__debug_abbrev",
                          MCSectionMachO::S_ATTR_DEBUG,
@@ -168,10 +186,6 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
     Ctx->getMachOSection("__DWARF", "__debug_frame",
                          MCSectionMachO::S_ATTR_DEBUG,
                          SectionKind::getMetadata());
-  DwarfPubNamesSection =
-    Ctx->getMachOSection("__DWARF", "__debug_pubnames",
-                         MCSectionMachO::S_ATTR_DEBUG,
-                         SectionKind::getMetadata());
   DwarfPubTypesSection =
     Ctx->getMachOSection("__DWARF", "__debug_pubtypes",
                          MCSectionMachO::S_ATTR_DEBUG,
@@ -207,8 +221,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
 void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
   if (T.getArch() == Triple::x86) {
     PersonalityEncoding = (RelocM == Reloc::PIC_)
-      ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
-      : dwarf::DW_EH_PE_absptr;
+     ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+     : dwarf::DW_EH_PE_absptr;
     LSDAEncoding = (RelocM == Reloc::PIC_)
       ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
       : dwarf::DW_EH_PE_absptr;
@@ -216,8 +230,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
       ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
       : dwarf::DW_EH_PE_absptr;
     TTypeEncoding = (RelocM == Reloc::PIC_)
-      ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
-      : dwarf::DW_EH_PE_absptr;
+     ? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
+     : dwarf::DW_EH_PE_absptr;
   } else if (T.getArch() == Triple::x86_64) {
     FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
 
@@ -244,10 +258,22 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
     }
   }
 
+  // Solaris requires different flags for .eh_frame to seemingly every other
+  // platform.
+  EHSectionType = ELF::SHT_PROGBITS;
+  EHSectionFlags = ELF::SHF_ALLOC;
+  if (T.getOS() == Triple::Solaris) {
+    if (T.getArch() == Triple::x86_64)
+      EHSectionType = ELF::SHT_X86_64_UNWIND;
+    else
+      EHSectionFlags |= ELF::SHF_WRITE;
+  }
+
+
   // ELF
   BSSSection =
     Ctx->getELFSection(".bss", ELF::SHT_NOBITS,
-                       ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                       ELF::SHF_WRITE | ELF::SHF_ALLOC,
                        SectionKind::getBSS());
 
   TextSection =
@@ -347,15 +373,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
   DwarfFrameSection =
     Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
-  DwarfPubNamesSection =
-    Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
   DwarfPubTypesSection =
     Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
   DwarfStrSection =
-    Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
-                       SectionKind::getMetadata());
+    Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS,
+                       ELF::SHF_MERGE | ELF::SHF_STRINGS,
+                       SectionKind::getMergeable1ByteCString());
   DwarfLocSection =
     Ctx->getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
@@ -390,12 +414,22 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
                         COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
                         COFF::IMAGE_SCN_MEM_READ,
                         SectionKind::getReadOnly());
-  StaticCtorSection =
-    Ctx->getCOFFSection(".ctors",
-                        COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ |
-                        COFF::IMAGE_SCN_MEM_WRITE,
-                        SectionKind::getDataRel());
+  if (T.getOS() == Triple::Win32) {
+    StaticCtorSection =
+      Ctx->getCOFFSection(".CRT$XCU",
+                          COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                          COFF::IMAGE_SCN_MEM_READ,
+                          SectionKind::getReadOnly());
+  } else {
+    StaticCtorSection =
+      Ctx->getCOFFSection(".ctors",
+                          COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                          COFF::IMAGE_SCN_MEM_READ |
+                          COFF::IMAGE_SCN_MEM_WRITE,
+                          SectionKind::getDataRel());
+  }
+
+
   StaticDtorSection =
     Ctx->getCOFFSection(".dtors",
                         COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -434,11 +468,6 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
                         COFF::IMAGE_SCN_MEM_DISCARDABLE |
                         COFF::IMAGE_SCN_MEM_READ,
                         SectionKind::getMetadata());
-  DwarfPubNamesSection =
-    Ctx->getCOFFSection(".debug_pubnames",
-                        COFF::IMAGE_SCN_MEM_DISCARDABLE |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getMetadata());
   DwarfPubTypesSection =
     Ctx->getCOFFSection(".debug_pubtypes",
                         COFF::IMAGE_SCN_MEM_DISCARDABLE |
@@ -488,6 +517,12 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
                         COFF::IMAGE_SCN_MEM_READ |
                         COFF::IMAGE_SCN_MEM_WRITE,
                         SectionKind::getDataRel());
+  TLSDataSection =
+    Ctx->getCOFFSection(".tls$",
+                        COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                        COFF::IMAGE_SCN_MEM_READ |
+                        COFF::IMAGE_SCN_MEM_WRITE,
+                        SectionKind::getDataRel());
 }
 
 void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
@@ -505,8 +540,12 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
   PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding =
     TTypeEncoding = dwarf::DW_EH_PE_absptr;
 
-  EHFrameSection = 0;           // Created on demand.
-  CompactUnwindSection = 0;     // Used only by selected targets.
+  EHFrameSection = 0;             // Created on demand.
+  CompactUnwindSection = 0;       // Used only by selected targets.
+  DwarfAccelNamesSection = 0;     // Used only by selected targets.
+  DwarfAccelObjCSection = 0;      // Used only by selected targets.
+  DwarfAccelNamespaceSection = 0; // Used only by selected targets.
+  DwarfAccelTypesSection = 0;     // Used only by selected targets.
 
   Triple T(TT);
   Triple::ArchType Arch = T.getArch();
@@ -541,8 +580,8 @@ void MCObjectFileInfo::InitEHFrameSection() {
                            SectionKind::getReadOnly());
   else if (Env == IsELF)
     EHFrameSection =
-      Ctx->getELFSection(".eh_frame", ELF::SHT_PROGBITS,
-                         ELF::SHF_ALLOC,
+      Ctx->getELFSection(".eh_frame", EHSectionType,
+                         EHSectionFlags,
                          SectionKind::getDataRel());
   else
     EHFrameSection =
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index a04ae0812a3e..bad7cfe38a17 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCObjectStreamer.h"
-
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
@@ -105,6 +105,14 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
   DF->getContents().resize(DF->getContents().size() + Size, 0);
 }
 
+void MCObjectStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+  RecordProcStart(Frame);
+}
+
+void MCObjectStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+  RecordProcEnd(Frame);
+}
+
 void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
   MCStreamer::EmitLabel(Symbol);
 
@@ -164,7 +172,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
   MCLineEntry::Make(this, getCurrentSection());
 
   // If this instruction doesn't need relaxation, just emit it as data.
-  if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
+  if (!getAssembler().getBackend().mayNeedRelaxation(Inst)) {
     EmitInstToData(Inst);
     return;
   }
@@ -173,9 +181,9 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
   // possible and emit it as data.
   if (getAssembler().getRelaxAll()) {
     MCInst Relaxed;
-    getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
-    while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
-      getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
+    getAssembler().getBackend().relaxInstruction(Inst, Relaxed);
+    while (getAssembler().getBackend().mayNeedRelaxation(Relaxed))
+      getAssembler().getBackend().relaxInstruction(Relaxed, Relaxed);
     EmitInstToData(Relaxed);
     return;
   }
@@ -224,12 +232,12 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
   new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
 }
 
-void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
-                                        unsigned char Value) {
+bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                         unsigned char Value) {
   int64_t Res;
   if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
     new MCOrgFragment(*Offset, Value, getCurrentSectionData());
-    return;
+    return false;
   }
 
   MCSymbol *CurrentPos = getContext().CreateTempSymbol();
@@ -241,14 +249,30 @@ void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
     MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext());
 
   if (!Delta->EvaluateAsAbsolute(Res, getAssembler()))
-    report_fatal_error("expected assembly-time absolute expression");
+    return true;
   EmitFill(Res, Value, 0);
+  return false;
 }
 
-void MCObjectStreamer::Finish() {
+// Associate GPRel32 fixup with data and resize data area
+void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  DF->addFixup(MCFixup::Create(DF->getContents().size(),
+                               Value,
+                               FK_GPRel_4));
+  DF->getContents().resize(DF->getContents().size() + 4, 0);
+}
+
+void MCObjectStreamer::FinishImpl() {
   // Dump out the dwarf file & directory tables and line tables.
+  const MCSymbol *LineSectionSymbol = NULL;
   if (getContext().hasDwarfFiles())
-    MCDwarfFileTable::Emit(this);
+    LineSectionSymbol = MCDwarfFileTable::Emit(this);
+
+  // If we are generating dwarf for assembly source files dump out the sections.
+  if (getContext().getGenDwarfForAssembly())
+    MCGenDwarfInfo::Emit(this, LineSectionSymbol);
 
   getAssembler().Finish();
 }
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index efe9f68ee22b..030f24793c55 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -33,14 +33,22 @@ void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) {
 }
 
 /// Utility function to encode a ULEB128 value.
-void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS) {
+void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS,
+                                   unsigned Padding) {
   do {
     uint8_t Byte = Value & 0x7f;
     Value >>= 7;
-    if (Value != 0)
+    if (Value != 0 || Padding != 0)
       Byte |= 0x80; // Mark this byte that that more bytes will follow.
     OS << char(Byte);
   } while (Value != 0);
+
+  // Pad with 0x80 and emit a null byte at the end.
+  if (Padding != 0) {
+    for (; Padding != 1; --Padding)
+      OS << '\x80';
+    OS << '\x00';
+  }
 }
 
 bool
@@ -60,6 +68,8 @@ MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
 
   const MCSymbolData &DataA = Asm.getSymbolData(SA);
   const MCSymbolData &DataB = Asm.getSymbolData(SB);
+  if(!DataA.getFragment() || !DataB.getFragment())
+    return false;
 
   return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
                                                 *DataB.getFragment(),
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 16487579abfc..2d61cac62585 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -14,7 +14,6 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -30,6 +29,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
@@ -122,6 +122,9 @@ private:
   int64_t CppHashLineNumber;
   SMLoc CppHashLoc;
 
+  /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
+  unsigned AssemblerDialect;
+
 public:
   AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
             const MCAsmInfo &MAI);
@@ -143,9 +146,20 @@ public:
   virtual MCAsmLexer &getLexer() { return Lexer; }
   virtual MCContext &getContext() { return Ctx; }
   virtual MCStreamer &getStreamer() { return Out; }
+  virtual unsigned getAssemblerDialect() { 
+    if (AssemblerDialect == ~0U)
+      return MAI.getAssemblerDialect(); 
+    else
+      return AssemblerDialect;
+  }
+  virtual void setAssemblerDialect(unsigned i) {
+    AssemblerDialect = i;
+  }
 
-  virtual bool Warning(SMLoc L, const Twine &Msg);
-  virtual bool Error(SMLoc L, const Twine &Msg);
+  virtual bool Warning(SMLoc L, const Twine &Msg,
+                       ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
+  virtual bool Error(SMLoc L, const Twine &Msg,
+                     ArrayRef<SMRange> Ranges = ArrayRef<SMRange>());
 
   const AsmToken &Lex();
 
@@ -171,14 +185,17 @@ private:
   void HandleMacroExit();
 
   void PrintMacroInstantiations();
-  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
-                    bool ShowLine = true) const {
-    SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine);
+  void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
+                    ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) const {
+    SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
   }
   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
 
   /// EnterIncludeFile - Enter the specified file. This returns true on failure.
   bool EnterIncludeFile(const std::string &Filename);
+  /// ProcessIncbinFile - Process the specified file for the .incbin directive.
+  /// This returns true on failure.
+  bool ProcessIncbinFile(const std::string &Filename);
 
   /// \brief Reset the current lexer position to that given by \arg Loc. The
   /// current token is not set; clients should ensure Lex() is called
@@ -225,6 +242,7 @@ private:
 
   bool ParseDirectiveAbort(); // ".abort"
   bool ParseDirectiveInclude(); // ".include"
+  bool ParseDirectiveIncbin(); // ".incbin"
 
   bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
   // ".ifdef" or ".ifndef", depending on expect_defined
@@ -295,6 +313,12 @@ public:
       &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state");
     AddDirectiveHandler<
       &GenericAsmParser::ParseDirectiveCFISameValue>(".cfi_same_value");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFIRestore>(".cfi_restore");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFIEscape>(".cfi_escape");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFISignalFrame>(".cfi_signal_frame");
 
     // Macro directives.
     AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
@@ -328,6 +352,9 @@ public:
   bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveCFISameValue(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRestore(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIEscape(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFISignalFrame(StringRef, SMLoc DirectiveLoc);
 
   bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
   bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
@@ -352,7 +379,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
                      MCStreamer &_Out, const MCAsmInfo &_MAI)
   : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
     GenericParser(new GenericAsmParser), PlatformParser(0),
-    CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0) {
+    CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0), 
+    AssemblerDialect(~0U) {
   // Save the old handler.
   SavedDiagHandler = SrcMgr.getDiagHandler();
   SavedDiagContext = SrcMgr.getDiagContext();
@@ -395,21 +423,21 @@ void AsmParser::PrintMacroInstantiations() {
   // Print the active macro instantiation stack.
   for (std::vector<MacroInstantiation*>::const_reverse_iterator
          it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it)
-    PrintMessage((*it)->InstantiationLoc, "while in macro instantiation",
-                 "note");
+    PrintMessage((*it)->InstantiationLoc, SourceMgr::DK_Note,
+                 "while in macro instantiation");
 }
 
-bool AsmParser::Warning(SMLoc L, const Twine &Msg) {
+bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
   if (FatalAssemblerWarnings)
-    return Error(L, Msg);
-  PrintMessage(L, Msg, "warning");
+    return Error(L, Msg, Ranges);
+  PrintMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
   PrintMacroInstantiations();
   return false;
 }
 
-bool AsmParser::Error(SMLoc L, const Twine &Msg) {
+bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
   HadError = true;
-  PrintMessage(L, Msg, "error");
+  PrintMessage(L, SourceMgr::DK_Error, Msg, Ranges);
   PrintMacroInstantiations();
   return true;
 }
@@ -427,6 +455,21 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) {
   return false;
 }
 
+/// Process the specified .incbin file by seaching for it in the include paths
+/// then just emiting the byte contents of the file to the streamer. This 
+/// returns true on failure.
+bool AsmParser::ProcessIncbinFile(const std::string &Filename) {
+  std::string IncludedFile;
+  int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
+  if (NewBuf == -1)
+    return true;
+
+  // Pick up the bytes from the file and emit them.
+  getStreamer().EmitBytes(SrcMgr.getMemoryBuffer(NewBuf)->getBuffer(),
+                          DEFAULT_ADDRSPACE);
+  return false;
+}
+
 void AsmParser::JumpToLoc(SMLoc Loc) {
   CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
@@ -462,6 +505,17 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   HadError = false;
   AsmCond StartingCondState = TheCondState;
 
+  // If we are generating dwarf for assembly source files save the initial text
+  // section and generate a .file directive.
+  if (getContext().getGenDwarfForAssembly()) {
+    getContext().setGenDwarfSection(getStreamer().getCurrentSection());
+    MCSymbol *SectionStartSym = getContext().CreateTempSymbol();
+    getStreamer().EmitLabel(SectionStartSym);
+    getContext().setGenDwarfSectionStartSym(SectionStartSym);
+    getStreamer().EmitDwarfFileDirective(getContext().nextGenDwarfFileNumber(),
+      StringRef(), SrcMgr.getMemoryBuffer(CurBuffer)->getBufferIdentifier());
+  }
+
   // While we have input, parse each statement.
   while (Lexer.isNot(AsmToken::Eof)) {
     if (!ParseStatement()) continue;
@@ -501,8 +555,9 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
         // FIXME: We would really like to refer back to where the symbol was
         // first referenced for a source location. We need to add something
         // to track that. Currently, we just point to the end of the file.
-        PrintMessage(getLexer().getLoc(), "assembler local symbol '" +
-                     Sym->getName() + "' not defined", "error", false);
+        PrintMessage(getLexer().getLoc(), SourceMgr::DK_Error,
+                     "assembler local symbol '" + Sym->getName() +
+                     "' not defined");
     }
   }
 
@@ -749,8 +804,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E,
   }
   }
 
-  assert(0 && "Invalid expression kind!");
-  return 0;
+  llvm_unreachable("Invalid expression kind!");
 }
 
 /// ParseExpression - Parse an expression and return it.
@@ -787,7 +841,6 @@ bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
     if (!ModifiedRes) {
       return TokError("invalid modifier '" + getTok().getIdentifier() +
                       "' (no symbols present)");
-      return true;
     }
 
     Res = ModifiedRes;
@@ -1036,6 +1089,12 @@ bool AsmParser::ParseStatement() {
     // Emit the label.
     Out.EmitLabel(Sym);
 
+    // If we are generating dwarf for assembly source files then gather the
+    // info to make a dwarf label entry for this label if needed.
+    if (getContext().getGenDwarfForAssembly())
+      MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(),
+                                 IDLoc);
+
     // Consume any end of statement token, if present, to avoid spurious
     // AddBlankLine calls().
     if (Lexer.is(AsmToken::EndOfStatement)) {
@@ -1163,6 +1222,8 @@ bool AsmParser::ParseStatement() {
       return ParseDirectiveAbort();
     if (IDVal == ".include")
       return ParseDirectiveInclude();
+    if (IDVal == ".incbin")
+      return ParseDirectiveIncbin();
 
     if (IDVal == ".code16")
       return TokError(Twine(IDVal) + " not supported yet");
@@ -1205,7 +1266,19 @@ bool AsmParser::ParseStatement() {
     }
     OS << "]";
 
-    PrintMessage(IDLoc, OS.str(), "note");
+    PrintMessage(IDLoc, SourceMgr::DK_Note, OS.str());
+  }
+
+  // If we are generating dwarf for assembly source files and the current
+  // section is the initial text section then generate a .loc directive for
+  // the instruction.
+  if (!HadError && getContext().getGenDwarfForAssembly() &&
+      getContext().getGenDwarfSection() == getStreamer().getCurrentSection() ) {
+    getStreamer().EmitDwarfLocDirective(getContext().getGenDwarfFileNumber(),
+                                        SrcMgr.FindLineNumber(IDLoc, CurBuffer),
+                                        0, DWARF2_LINE_DEFAULT_IS_STMT ?
+                                        DWARF2_FLAG_IS_STMT : 0, 0, 0,
+                                        StringRef());
   }
 
   // If parsing succeeded, match the instruction.
@@ -1294,7 +1367,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
     if (Parser->SavedDiagHandler)
       Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
     else
-      Diag.Print(0, OS);
+      Diag.print(0, OS);
     return;
   }
 
@@ -1309,19 +1382,15 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
   int LineNo = Parser->CppHashLineNumber - 1 +
                (DiagLocLineNo - CppHashLocLineNo);
 
-  SMDiagnostic NewDiag(*Diag.getSourceMgr(),
-                       Diag.getLoc(),
-                       Filename,
-                       LineNo,
-                       Diag.getColumnNo(),
-                       Diag.getMessage(),
-                       Diag.getLineContents(),
-                       Diag.getShowLine());
+  SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(),
+                       Filename, LineNo, Diag.getColumnNo(),
+                       Diag.getKind(), Diag.getMessage(),
+                       Diag.getLineContents(), Diag.getRanges());
 
   if (Parser->SavedDiagHandler)
     Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
   else
-    NewDiag.Print(0, OS);
+    NewDiag.print(0, OS);
 }
 
 bool AsmParser::expandMacro(SmallString<256> &Buf, StringRef Body,
@@ -1458,6 +1527,11 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
     }
     Lex();
   }
+  // If there weren't any arguments, erase the token vector so everything
+  // else knows that. Leaving around the vestigal empty token list confuses
+  // things.
+  if (MacroArguments.size() == 1 && MacroArguments.back().empty())
+    MacroArguments.clear();
 
   // Macro instantiation is lexical, unfortunately. We construct a new buffer
   // to hold the macro body with substitutions.
@@ -1495,23 +1569,27 @@ void AsmParser::HandleMacroExit() {
   ActiveMacros.pop_back();
 }
 
-static void MarkUsed(const MCExpr *Value) {
+static bool IsUsedIn(const MCSymbol *Sym, const MCExpr *Value) {
   switch (Value->getKind()) {
-  case MCExpr::Binary:
-    MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getLHS());
-    MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getRHS());
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Value);
+    return IsUsedIn(Sym, BE->getLHS()) || IsUsedIn(Sym, BE->getRHS());
     break;
+  }
   case MCExpr::Target:
   case MCExpr::Constant:
-    break;
+    return false;
   case MCExpr::SymbolRef: {
-    static_cast<const MCSymbolRefExpr*>(Value)->getSymbol().setUsed(true);
-    break;
+    const MCSymbol &S = static_cast<const MCSymbolRefExpr*>(Value)->getSymbol();
+    if (S.isVariable())
+      return IsUsedIn(Sym, S.getVariableValue());
+    return &S == Sym;
   }
   case MCExpr::Unary:
-    MarkUsed(static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
-    break;
+    return IsUsedIn(Sym, static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
   }
+
+  llvm_unreachable("Unknown expr kind!");
 }
 
 bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
@@ -1522,7 +1600,9 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
   if (ParseExpression(Value))
     return true;
 
-  MarkUsed(Value);
+  // Note: we don't count b as used in "a = b". This is to allow
+  // a = b
+  // b = c
 
   if (Lexer.isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in assignment");
@@ -1544,8 +1624,12 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
     //
     // FIXME: Diagnostics. Note the location of the definition as a label.
     // FIXME: Diagnose assignment to protected identifier (e.g., register name).
-    if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
+    if (IsUsedIn(Sym, Value))
+      return Error(EqualLoc, "Recursive use of '" + Name + "'");
+    else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
       ; // Allow redefinitions of undefined symbols only used in directives.
+    else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
+      ; // Allow redefinitions of variables that haven't yet been used.
     else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef))
       return Error(EqualLoc, "redefinition of '" + Name + "'");
     else if (!Sym->isVariable())
@@ -1912,6 +1996,7 @@ bool AsmParser::ParseDirectiveOrg() {
   CheckForValidSection();
 
   const MCExpr *Offset;
+  SMLoc Loc = getTok().getLoc();
   if (ParseExpression(Offset))
     return true;
 
@@ -1931,9 +2016,11 @@ bool AsmParser::ParseDirectiveOrg() {
 
   Lex();
 
-  // FIXME: Only limited forms of relocatable expressions are accepted here, it
-  // has to be relative to the current section.
-  getStreamer().EmitValueToOffset(Offset, FillExpr);
+  // Only limited forms of relocatable expressions are accepted here, it
+  // has to be relative to the current section. The streamer will return
+  // 'true' if the expression wasn't evaluatable.
+  if (getStreamer().EmitValueToOffset(Offset, FillExpr))
+    return Error(Loc, "expected assembly-time absolute expression");
 
   return false;
 }
@@ -2178,6 +2265,31 @@ bool AsmParser::ParseDirectiveInclude() {
   return false;
 }
 
+/// ParseDirectiveIncbin
+///  ::= .incbin "filename"
+bool AsmParser::ParseDirectiveIncbin() {
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("expected string in '.incbin' directive");
+
+  std::string Filename = getTok().getString();
+  SMLoc IncbinLoc = getLexer().getLoc();
+  Lex();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.incbin' directive");
+
+  // Strip the quotes.
+  Filename = Filename.substr(1, Filename.size()-2);
+
+  // Attempt to process the included file.
+  if (ProcessIncbinFile(Filename)) {
+    Error(IncbinLoc, "Could not find incbin file '" + Filename + "'");
+    return true;
+  }
+
+  return false;
+}
+
 /// ParseDirectiveIf
 /// ::= .if expression
 bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
@@ -2305,7 +2417,8 @@ bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
 }
 
 /// ParseDirectiveFile
-/// ::= .file [number] string
+/// ::= .file [number] filename
+/// ::= .file number directory filename
 bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
   // FIXME: I'm not sure what this is.
   int64_t FileNumber = -1;
@@ -2321,17 +2434,35 @@ bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
   if (getLexer().isNot(AsmToken::String))
     return TokError("unexpected token in '.file' directive");
 
-  StringRef Filename = getTok().getString();
-  Filename = Filename.substr(1, Filename.size()-2);
+  // Usually the directory and filename together, otherwise just the directory.
+  StringRef Path = getTok().getString();
+  Path = Path.substr(1, Path.size()-2);
   Lex();
 
+  StringRef Directory;
+  StringRef Filename;
+  if (getLexer().is(AsmToken::String)) {
+    if (FileNumber == -1)
+      return TokError("explicit path specified, but no file number");
+    Filename = getTok().getString();
+    Filename = Filename.substr(1, Filename.size()-2);
+    Directory = Path;
+    Lex();
+  } else {
+    Filename = Path;
+  }
+
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in '.file' directive");
 
   if (FileNumber == -1)
     getStreamer().EmitFileDirective(Filename);
   else {
-    if (getStreamer().EmitDwarfFileDirective(FileNumber, Filename))
+    if (getContext().getGenDwarfForAssembly() == true)
+      Error(DirectiveLoc, "input can't have .file dwarf directives when -g is "
+                        "used to generate dwarf debug info for assembly code");
+
+    if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename))
       Error(FileNumberLoc, "file number already allocated");
   }
 
@@ -2719,6 +2850,56 @@ bool GenericAsmParser::ParseDirectiveCFISameValue(StringRef IDVal,
   return false;
 }
 
+/// ParseDirectiveCFIRestore
+/// ::= .cfi_restore register
+bool GenericAsmParser::ParseDirectiveCFIRestore(StringRef IDVal,
+						SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  getStreamer().EmitCFIRestore(Register);
+
+  return false;
+}
+
+/// ParseDirectiveCFIEscape
+/// ::= .cfi_escape expression[,...]
+bool GenericAsmParser::ParseDirectiveCFIEscape(StringRef IDVal,
+					       SMLoc DirectiveLoc) {
+  std::string Values;
+  int64_t CurrValue;
+  if (getParser().ParseAbsoluteExpression(CurrValue))
+    return true;
+
+  Values.push_back((uint8_t)CurrValue);
+
+  while (getLexer().is(AsmToken::Comma)) {
+    Lex();
+
+    if (getParser().ParseAbsoluteExpression(CurrValue))
+      return true;
+
+    Values.push_back((uint8_t)CurrValue);
+  }
+
+  getStreamer().EmitCFIEscape(Values);
+  return false;
+}
+
+/// ParseDirectiveCFISignalFrame
+/// ::= .cfi_signal_frame
+bool GenericAsmParser::ParseDirectiveCFISignalFrame(StringRef Directive,
+                                                    SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(getLexer().getLoc(),
+                 "unexpected token in '" + Directive + "' directive");
+
+  getStreamer().EmitCFISignalFrame();
+
+  return false;
+}
+
 /// ParseDirectiveMacrosOnOff
 /// ::= .macros_on
 /// ::= .macros_off
diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt
index 299d28168948..222f237bfd64 100644
--- a/lib/MC/MCParser/CMakeLists.txt
+++ b/lib/MC/MCParser/CMakeLists.txt
@@ -9,8 +9,3 @@ add_llvm_library(LLVMMCParser
   MCAsmParserExtension.cpp
   MCTargetAsmParser.cpp
   )
-
-add_llvm_library_dependencies(LLVMMCParser
-  LLVMMC
-  LLVMSupport
-  )
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 185b5168bd6c..c4cdc3c9f96f 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -45,6 +45,7 @@ class COFFAsmParser : public MCAsmParserExtension {
     AddDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
     AddDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
     AddDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32");
 
     // Win64 EH directives.
     AddDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>(
@@ -102,6 +103,7 @@ class COFFAsmParser : public MCAsmParserExtension {
   bool ParseDirectiveScl(StringRef, SMLoc);
   bool ParseDirectiveType(StringRef, SMLoc);
   bool ParseDirectiveEndef(StringRef, SMLoc);
+  bool ParseDirectiveSecRel32(StringRef, SMLoc);
 
   // Win64 EH directives.
   bool ParseSEHDirectiveStartProc(StringRef, SMLoc);
@@ -217,6 +219,21 @@ bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) {
   return false;
 }
 
+bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
+  StringRef SymbolID;
+  if (getParser().ParseIdentifier(SymbolID))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID);
+
+  Lex();
+  getStreamer().EmitCOFFSecRel32(Symbol);
+  return false;
+}
+
 bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) {
   StringRef SymbolID;
   if (getParser().ParseIdentifier(SymbolID))
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index d89112645cd8..ffc400b203f9 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -476,6 +476,7 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
     .Case("common", MCSA_ELF_TypeCommon)
     .Case("notype", MCSA_ELF_TypeNoType)
     .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+    .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction)
     .Default(MCSA_Invalid);
 
   if (Attr == MCSA_Invalid)
diff --git a/lib/MC/MCParser/LLVMBuild.txt b/lib/MC/MCParser/LLVMBuild.txt
new file mode 100644
index 000000000000..bcb0febf3323
--- /dev/null
+++ b/lib/MC/MCParser/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/MC/MCParser/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MCParser
+parent = MC
+required_libraries = MC Support
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index dceece78ba10..3a3ff147117e 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -25,3 +25,7 @@ SMLoc MCAsmLexer::getLoc() const {
 SMLoc AsmToken::getLoc() const {
   return SMLoc::getFromPointer(Str.data());
 }
+
+SMLoc AsmToken::getEndLoc() const {
+  return SMLoc::getFromPointer(Str.data() + Str.size() - 1);
+}
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index 5239ec753e77..3a825f03b776 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -33,8 +33,8 @@ const AsmToken &MCAsmParser::getTok() {
   return getLexer().getTok();
 }
 
-bool MCAsmParser::TokError(const Twine &Msg) {
-  Error(getLexer().getLoc(), Msg);
+bool MCAsmParser::TokError(const Twine &Msg, ArrayRef<SMRange> Ranges) {
+  Error(getLexer().getLoc(), Msg, Ranges);
   return true;
 }
 
diff --git a/lib/MC/MCPureStreamer.cpp b/lib/MC/MCPureStreamer.cpp
index 086c9229bcdc..a770c974380a 100644
--- a/lib/MC/MCPureStreamer.cpp
+++ b/lib/MC/MCPureStreamer.cpp
@@ -46,9 +46,9 @@ public:
                                     unsigned MaxBytesToEmit = 0);
   virtual void EmitCodeAlignment(unsigned ByteAlignment,
                                  unsigned MaxBytesToEmit = 0);
-  virtual void EmitValueToOffset(const MCExpr *Offset,
+  virtual bool EmitValueToOffset(const MCExpr *Offset,
                                  unsigned char Value = 0);
-  virtual void Finish();
+  virtual void FinishImpl();
 
 
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
@@ -93,9 +93,9 @@ public:
   virtual void EmitFileDirective(StringRef Filename) {
     report_fatal_error("unsupported directive in pure streamer");
   }
-  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+                                      StringRef Filename) {
     report_fatal_error("unsupported directive in pure streamer");
-    return false;
   }
 
   /// @}
@@ -184,9 +184,10 @@ void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
     getCurrentSectionData()->setAlignment(ByteAlignment);
 }
 
-void MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
+bool MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
                                        unsigned char Value) {
   new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+  return false;
 }
 
 void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
@@ -223,10 +224,10 @@ void MCPureStreamer::EmitInstToData(const MCInst &Inst) {
   DF->getContents().append(Code.begin(), Code.end());
 }
 
-void MCPureStreamer::Finish() {
+void MCPureStreamer::FinishImpl() {
   // FIXME: Handle DWARF tables?
 
-  this->MCObjectStreamer::Finish();
+  this->MCObjectStreamer::FinishImpl();
 }
 
 MCStreamer *llvm::createPureStreamer(MCContext &Context, MCAsmBackend &MAB,
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 3afa22b0d0be..43e62ff89a0f 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include <cstdlib>
 using namespace llvm;
@@ -94,17 +93,18 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
 
 /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
 /// client having to pass in a MCExpr for constant integers.
-void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace) {
-  SmallString<32> Tmp;
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace,
+                                     unsigned Padding) {
+  SmallString<128> Tmp;
   raw_svector_ostream OSE(Tmp);
-  MCObjectWriter::EncodeULEB128(Value, OSE);
+  MCObjectWriter::EncodeULEB128(Value, OSE, Padding);
   EmitBytes(OSE.str(), AddrSpace);
 }
 
 /// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
 /// client having to pass in a MCExpr for constant integers.
 void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) {
-  SmallString<32> Tmp;
+  SmallString<128> Tmp;
   raw_svector_ostream OSE(Tmp);
   MCObjectWriter::EncodeSLEB128(Value, OSE);
   EmitBytes(OSE.str(), AddrSpace);
@@ -128,6 +128,10 @@ void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
                 AddrSpace);
 }
 
+void MCStreamer::EmitGPRel64Value(const MCExpr *Value) {
+  report_fatal_error("unsupported directive in streamer");
+}
+
 void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
   report_fatal_error("unsupported directive in streamer");
 }
@@ -142,8 +146,9 @@ void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
 }
 
 bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
+                                        StringRef Directory,
                                         StringRef Filename) {
-  return getContext().GetDwarfFile(Filename, FileNo) == 0;
+  return getContext().GetDwarfFile(Directory, Filename, FileNo) == 0;
 }
 
 void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
@@ -186,9 +191,8 @@ void MCStreamer::EmitDataRegion() {
   if (!MAI.getSupportsDataRegions()) return;
 
   // Generate a unique symbol name.
-  MCSymbol *NewSym = Context.GetOrCreateSymbol(
-      Twine(MAI.getDataBeginLabelName()) +
-        utostr(UniqueDataBeginSuffix++));
+  MCSymbol *NewSym = Context.GetOrCreateSymbol(MAI.getDataBeginLabelName() +
+                                               Twine(UniqueDataBeginSuffix++));
   EmitLabel(NewSym);
 
   RegionIndicator = Data;
@@ -202,9 +206,8 @@ void MCStreamer::EmitCodeRegion() {
   if (!MAI.getSupportsDataRegions()) return;
 
   // Generate a unique symbol name.
-  MCSymbol *NewSym = Context.GetOrCreateSymbol(
-      Twine(MAI.getCodeBeginLabelName()) +
-        utostr(UniqueCodeBeginSuffix++));
+  MCSymbol *NewSym = Context.GetOrCreateSymbol(MAI.getCodeBeginLabelName() +
+                                               Twine(UniqueCodeBeginSuffix++));
   EmitLabel(NewSym);
 
   RegionIndicator = Code;
@@ -218,9 +221,9 @@ void MCStreamer::EmitJumpTable8Region() {
   if (!MAI.getSupportsDataRegions()) return;
 
   // Generate a unique symbol name.
-  MCSymbol *NewSym = Context.GetOrCreateSymbol(
-      Twine(MAI.getJumpTable8BeginLabelName()) +
-        utostr(UniqueDataBeginSuffix++));
+  MCSymbol *NewSym =
+    Context.GetOrCreateSymbol(MAI.getJumpTable8BeginLabelName() +
+                              Twine(UniqueDataBeginSuffix++));
   EmitLabel(NewSym);
 
   RegionIndicator = JumpTable8;
@@ -234,9 +237,9 @@ void MCStreamer::EmitJumpTable16Region() {
   if (!MAI.getSupportsDataRegions()) return;
 
   // Generate a unique symbol name.
-  MCSymbol *NewSym = Context.GetOrCreateSymbol(
-      Twine(MAI.getJumpTable16BeginLabelName()) +
-        utostr(UniqueDataBeginSuffix++));
+  MCSymbol *NewSym =
+    Context.GetOrCreateSymbol(MAI.getJumpTable16BeginLabelName() +
+                              Twine(UniqueDataBeginSuffix++));
   EmitLabel(NewSym);
 
   RegionIndicator = JumpTable16;
@@ -251,9 +254,9 @@ void MCStreamer::EmitJumpTable32Region() {
   if (!MAI.getSupportsDataRegions()) return;
 
   // Generate a unique symbol name.
-  MCSymbol *NewSym = Context.GetOrCreateSymbol(
-      Twine(MAI.getJumpTable32BeginLabelName()) +
-        utostr(UniqueDataBeginSuffix++));
+  MCSymbol *NewSym =
+    Context.GetOrCreateSymbol(MAI.getJumpTable32BeginLabelName() +
+                              Twine(UniqueDataBeginSuffix++));
   EmitLabel(NewSym);
 
   RegionIndicator = JumpTable32;
@@ -277,8 +280,17 @@ void MCStreamer::EmitCFIStartProc() {
     report_fatal_error("Starting a frame before finishing the previous one!");
 
   MCDwarfFrameInfo Frame;
-  Frame.Function = LastSymbol;
+  EmitCFIStartProcImpl(Frame);
 
+  FrameInfos.push_back(Frame);
+  RegionIndicator = Code;
+}
+
+void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
+}
+
+void MCStreamer::RecordProcStart(MCDwarfFrameInfo &Frame) {
+  Frame.Function = LastSymbol;
   // If the function is externally visible, we need to create a local
   // symbol to avoid relocations.
   StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix();
@@ -288,16 +300,20 @@ void MCStreamer::EmitCFIStartProc() {
     Frame.Begin = getContext().CreateTempSymbol();
     EmitLabel(Frame.Begin);
   }
-
-  FrameInfos.push_back(Frame);
-  RegionIndicator = Code;
 }
 
 void MCStreamer::EmitCFIEndProc() {
   EnsureValidFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
-  CurFrame->End = getContext().CreateTempSymbol();
-  EmitLabel(CurFrame->End);
+  EmitCFIEndProcImpl(*CurFrame);
+}
+
+void MCStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
+}
+
+void MCStreamer::RecordProcEnd(MCDwarfFrameInfo &Frame) {
+  Frame.End = getContext().CreateTempSymbol();
+  EmitLabel(Frame.End);
 }
 
 void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
@@ -386,7 +402,7 @@ void MCStreamer::EmitCFIRememberState() {
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   MCSymbol *Label = getContext().CreateTempSymbol();
   EmitLabel(Label);
-  MCCFIInstruction Instruction(MCCFIInstruction::Remember, Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::RememberState, Label);
   CurFrame->Instructions.push_back(Instruction);
 }
 
@@ -396,7 +412,7 @@ void MCStreamer::EmitCFIRestoreState() {
   MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
   MCSymbol *Label = getContext().CreateTempSymbol();
   EmitLabel(Label);
-  MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::RestoreState, Label);
   CurFrame->Instructions.push_back(Instruction);
 }
 
@@ -409,6 +425,30 @@ void MCStreamer::EmitCFISameValue(int64_t Register) {
   CurFrame->Instructions.push_back(Instruction);
 }
 
+void MCStreamer::EmitCFIRestore(int64_t Register) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label, Register);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFIEscape(StringRef Values) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::Escape, Label, Values);
+  CurFrame->Instructions.push_back(Instruction);
+}
+
+void MCStreamer::EmitCFISignalFrame() {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->IsSignalFrame = true;
+}
+
 void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) {
   W64UnwindInfos.push_back(Frame);
   CurrentW64UnwindInfo = W64UnwindInfos.back();
@@ -559,6 +599,10 @@ void MCStreamer::EmitWin64EHEndProlog() {
   EmitLabel(CurFrame->PrologEnd);
 }
 
+void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
+  llvm_unreachable("This file format doesn't support this directive");
+}
+
 void MCStreamer::EmitFnStart() {
   errs() << "Not implemented yet\n";
   abort();
@@ -631,3 +675,10 @@ void MCStreamer::EmitW64Tables() {
 
   MCWin64EHUnwindEmitter::Emit(*this);
 }
+
+void MCStreamer::Finish() {
+  if (!FrameInfos.empty() && !FrameInfos.back().End)
+    report_fatal_error("Unfinished frame!");
+
+  FinishImpl();
+}
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index c2fad1674aa4..e013e77f58af 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -54,17 +54,14 @@ const MCSymbol &MCSymbol::AliasedSymbol() const {
 void MCSymbol::setVariableValue(const MCExpr *Value) {
   assert(!IsUsed && "Cannot set a variable that has already been used.");
   assert(Value && "Invalid variable value!");
-  assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
-         "Invalid redefinition!");
   this->Value = Value;
 
   // Variables should always be marked as in the same "section" as the value.
   const MCSection *Section = Value->FindAssociatedSection();
-  if (Section) {
+  if (Section)
     setSection(*Section);
-  } else {
+  else
     setUndefined();
-  }
 }
 
 void MCSymbol::print(raw_ostream &OS) const {
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a9219ad29c65..8e4066c894ba 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -8,13 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
@@ -584,9 +584,16 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
     // requires the compiler to use .set to absolutize the differences between
     // symbols which the compiler knows to be assembly time constants, so we
     // don't need to worry about considering symbol differences fully resolved.
+    //
+    // If the file isn't using sub-sections-via-symbols, we can make the
+    // same assumptions about any symbol that we normally make about
+    // assembler locals.
 
     if (!Asm.getBackend().hasReliableSymbolDifference()) {
-      if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+      if (!SA.isInSection() || &SecA != &SecB ||
+          (!SA.isTemporary() &&
+           FB.getAtom() != Asm.getSymbolData(SA).getFragment()->getAtom() &&
+           Asm.getSubsectionsViaSymbols()))
         return false;
       return true;
     }
@@ -628,7 +635,7 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
 }
 
 void MachObjectWriter::WriteObject(MCAssembler &Asm,
-				   const MCAsmLayout &Layout) {
+                                   const MCAsmLayout &Layout) {
   unsigned NumSections = Asm.size();
 
   // The section data starts after the header, the segment load command (and
@@ -731,7 +738,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm,
   // Write the actual section data.
   for (MCAssembler::const_iterator it = Asm.begin(),
          ie = Asm.end(); it != ie; ++it) {
-    Asm.WriteSectionData(it, Layout);
+    Asm.writeSectionData(it, Layout);
 
     uint64_t Pad = getPaddingSize(it, Layout);
     for (unsigned int i = 0; i < Pad; ++i)
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 348cd4c9ab1b..be4157994c68 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -13,8 +13,8 @@
 
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringExtras.h"
 #include <algorithm>
 #include <cassert>
 #include <cctype>
@@ -114,7 +114,7 @@ void SubtargetFeatures::AddFeature(const StringRef String,
   // Don't add empty features
   if (!String.empty()) {
     // Convert to lowercase, prepend flag and add to vector
-    Features.push_back(PrependFlag(LowercaseString(String), IsEnabled));
+    Features.push_back(PrependFlag(String.lower(), IsEnabled));
   }
 }
 
@@ -154,21 +154,19 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
   // Print the CPU table.
   errs() << "Available CPUs for this target:\n\n";
   for (size_t i = 0; i != CPUTableSize; i++)
-    errs() << "  " << CPUTable[i].Key
-         << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ')
-         << " - " << CPUTable[i].Desc << ".\n";
-  errs() << "\n";
-  
+    errs() << format("  %-*s - %s.\n",
+                     MaxCPULen, CPUTable[i].Key, CPUTable[i].Desc);
+  errs() << '\n';
+
   // Print the Feature table.
   errs() << "Available features for this target:\n\n";
   for (size_t i = 0; i != FeatTableSize; i++)
-    errs() << "  " << FeatTable[i].Key
-         << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ')
-         << " - " << FeatTable[i].Desc << ".\n";
-  errs() << "\n";
-  
+    errs() << format("  %-*s - %s.\n",
+                     MaxFeatLen, FeatTable[i].Key, FeatTable[i].Desc);
+  errs() << '\n';
+
   errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
-       << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+            "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
   std::exit(1);
 }
 
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index b15e225fc2a3..f706cac8d36c 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -22,8 +22,10 @@
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 
@@ -33,8 +35,6 @@
 
 #include "llvm/Support/TimeValue.h"
 
-#include "../Target/X86/MCTargetDesc/X86FixupKinds.h"
-
 #include <cstdio>
 
 using namespace llvm;
@@ -128,8 +128,9 @@ public:
   typedef DenseMap<MCSymbol  const *, COFFSymbol *>   symbol_map;
   typedef DenseMap<MCSection const *, COFFSection *> section_map;
 
+  llvm::OwningPtr<MCWinCOFFObjectTargetWriter> TargetObjectWriter;
+
   // Root level file contents.
-  bool Is64Bit;
   COFF::header Header;
   sections     Sections;
   symbols      Symbols;
@@ -139,7 +140,7 @@ public:
   section_map SectionMap;
   symbol_map  SymbolMap;
 
-  WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
+  WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS);
   ~WinCOFFObjectWriter();
 
   COFFSymbol *createSymbol(StringRef Name);
@@ -281,6 +282,7 @@ StringTable::StringTable() {
   // The string table data begins with the length of the entire string table
   // including the length header. Allocate space for this header.
   Data.resize(4);
+  update_length();
 }
 
 size_t StringTable::size() const {
@@ -313,13 +315,13 @@ size_t StringTable::insert(llvm::StringRef String) {
 //------------------------------------------------------------------------------
 // WinCOFFObjectWriter class implementation
 
-WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit)
+WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
+                                         raw_ostream &OS)
   : MCObjectWriter(OS, true)
-  , Is64Bit(is64Bit) {
+  , TargetObjectWriter(MOTW) {
   memset(&Header, 0, sizeof(Header));
 
-  Is64Bit ? Header.Machine = COFF::IMAGE_FILE_MACHINE_AMD64
-          : Header.Machine = COFF::IMAGE_FILE_MACHINE_I386;
+  Header.Machine = TargetObjectWriter->getMachine();
 }
 
 WinCOFFObjectWriter::~WinCOFFObjectWriter() {
@@ -694,30 +696,13 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
   if (CrossSection)
     FixupKind = FK_PCRel_4;
 
-  switch (FixupKind) {
-  case FK_PCRel_4:
-  case X86::reloc_riprel_4byte:
-  case X86::reloc_riprel_4byte_movq_load:
-    Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32
-                              : COFF::IMAGE_REL_I386_REL32;
-    // FIXME: Can anyone explain what this does other than adjust for the size
-    // of the offset?
+  Reloc.Data.Type = TargetObjectWriter->getRelocType(FixupKind);
+
+  // FIXME: Can anyone explain what this does other than adjust for the size
+  // of the offset?
+  if (Reloc.Data.Type == COFF::IMAGE_REL_AMD64_REL32 ||
+      Reloc.Data.Type == COFF::IMAGE_REL_I386_REL32)
     FixedValue += 4;
-    break;
-  case FK_Data_4:
-  case X86::reloc_signed_4byte:
-    Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32
-                              : COFF::IMAGE_REL_I386_DIR32;
-    break;
-  case FK_Data_8:
-    if (Is64Bit)
-      Reloc.Data.Type = COFF::IMAGE_REL_AMD64_ADDR64;
-    else
-      llvm_unreachable("unsupported relocation type");
-    break;
-  default:
-    llvm_unreachable("unsupported relocation type");
-  }
 
   coff_section->Relocations.push_back(Reloc);
 }
@@ -798,9 +783,22 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
     }
 
     if (Sec->Relocations.size() > 0) {
-      Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+      bool RelocationsOverflow = Sec->Relocations.size() >= 0xffff;
+
+      if (RelocationsOverflow) {
+        // Signal overflow by setting NumberOfSections to max value. Actual
+        // size is found in reloc #0. Microsoft tools understand this.
+        Sec->Header.NumberOfRelocations = 0xffff;
+      } else {
+        Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+      }
       Sec->Header.PointerToRelocations = offset;
 
+      if (RelocationsOverflow) {
+        // Reloc #0 will contain actual count, so make room for it.
+        offset += COFF::RelocationSize;
+      }
+
       offset += COFF::RelocationSize * Sec->Relocations.size();
 
       for (relocations::iterator cr = Sec->Relocations.begin(),
@@ -835,8 +833,12 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
     MCAssembler::const_iterator j, je;
 
     for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
-      if ((*i)->Number != -1)
+      if ((*i)->Number != -1) {
+        if ((*i)->Relocations.size() >= 0xffff) {
+          (*i)->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
+        }
         WriteSectionHeader((*i)->Header);
+      }
 
     for (i = Sections.begin(), ie = Sections.end(),
          j = Asm.begin(), je = Asm.end();
@@ -849,13 +851,23 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
         assert(OS.tell() == (*i)->Header.PointerToRawData &&
                "Section::PointerToRawData is insane!");
 
-        Asm.WriteSectionData(j, Layout);
+        Asm.writeSectionData(j, Layout);
       }
 
       if ((*i)->Relocations.size() > 0) {
         assert(OS.tell() == (*i)->Header.PointerToRelocations &&
                "Section::PointerToRelocations is insane!");
 
+        if ((*i)->Relocations.size() >= 0xffff) {
+          // In case of overflow, write actual relocation count as first
+          // relocation. Including the synthetic reloc itself (+ 1).
+          COFF::relocation r;
+          r.VirtualAddress = (*i)->Relocations.size() + 1;
+          r.SymbolTableIndex = 0;
+          r.Type = 0;
+          WriteRelocation(r);
+        }
+
         for (relocations::const_iterator k = (*i)->Relocations.begin(),
                                                ke = (*i)->Relocations.end();
                                                k != ke; k++) {
@@ -877,11 +889,16 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
   OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
 }
 
+MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_) :
+  Machine(Machine_) {
+}
+
 //------------------------------------------------------------------------------
 // WinCOFFObjectWriter factory function
 
 namespace llvm {
-  MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) {
-    return new WinCOFFObjectWriter(OS, is64Bit);
+  MCObjectWriter *createWinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
+                                            raw_ostream &OS) {
+    return new WinCOFFObjectWriter(MOTW, OS);
   }
 }
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 7409daf39085..67dc649d4913 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -25,13 +25,13 @@
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCWin64EH.h"
 #include "llvm/MC/MCAsmBackend.h"
-#include "llvm/ADT/StringMap.h"
 
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 namespace {
@@ -60,6 +60,7 @@ public:
   virtual void EmitCOFFSymbolStorageClass(int StorageClass);
   virtual void EmitCOFFSymbolType(int Type);
   virtual void EndCOFFSymbolDef();
+  virtual void EmitCOFFSecRel32(MCSymbol const *Symbol);
   virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
   virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                 unsigned ByteAlignment);
@@ -77,7 +78,7 @@ public:
   virtual void EmitFileDirective(StringRef Filename);
   virtual void EmitInstruction(const MCInst &Instruction);
   virtual void EmitWin64EHHandlerData();
-  virtual void Finish();
+  virtual void FinishImpl();
 
 private:
   virtual void EmitInstToFragment(const MCInst &Inst) {
@@ -251,7 +252,6 @@ void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
 
   default:
     llvm_unreachable("unsupported attribute");
-    break;
   }
 }
 
@@ -293,6 +293,16 @@ void WinCOFFStreamer::EndCOFFSymbolDef() {
   CurSymbol = NULL;
 }
 
+void WinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol)
+{
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  DF->addFixup(MCFixup::Create(DF->getContents().size(),
+                               MCSymbolRefExpr::Create (Symbol, getContext ()),
+                               FK_SecRel_4));
+  DF->getContents().resize(DF->getContents().size() + 4, 0);
+}
+
 void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
   llvm_unreachable("not implemented");
 }
@@ -389,9 +399,9 @@ void WinCOFFStreamer::EmitWin64EHHandlerData() {
   MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentW64UnwindInfo());
 }
 
-void WinCOFFStreamer::Finish() {
+void WinCOFFStreamer::FinishImpl() {
   EmitW64Tables();
-  MCObjectStreamer::Finish();
+  MCObjectStreamer::FinishImpl();
 }
 
 namespace llvm
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index e2eaff53c1f1..c5f15bafcfba 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -13,14 +13,15 @@
 
 #include "llvm/Object/Archive.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 using namespace llvm;
 using namespace object;
 
-namespace {
-const StringRef Magic = "!<arch>\n";
+static const char *Magic = "!<arch>\n";
 
+namespace {
 struct ArchiveMemberHeader {
   char Name[16];
   char LastModified[12];
@@ -32,7 +33,11 @@ struct ArchiveMemberHeader {
 
   ///! Get the name without looking up long names.
   StringRef getName() const {
-    char EndCond = Name[0] == '/' ? ' ' : '/';
+    char EndCond;
+    if (Name[0] == '/' || Name[0] == '#')
+      EndCond = ' ';
+    else
+      EndCond = '/';
     StringRef::size_type end = StringRef(Name, sizeof(Name)).find(EndCond);
     if (end == StringRef::npos)
       end = sizeof(Name);
@@ -47,12 +52,30 @@ struct ArchiveMemberHeader {
     return ret.getZExtValue();
   }
 };
+}
 
-const ArchiveMemberHeader *ToHeader(const char *base) {
+static const ArchiveMemberHeader *ToHeader(const char *base) {
   return reinterpret_cast<const ArchiveMemberHeader *>(base);
 }
+
+
+static bool isInternalMember(const ArchiveMemberHeader &amh) {
+  const char *internals[] = {
+    "/",
+    "//",
+    "#_LLVM_SYM_TAB_#"
+    };
+
+  StringRef name = amh.getName();
+  for (std::size_t i = 0; i < sizeof(internals) / sizeof(*internals); ++i) {
+    if (name == internals[i])
+      return true;
+  }
+  return false;
 }
 
+void Archive::anchor() { }
+
 Archive::Child Archive::Child::getNext() const {
   size_t SpaceToSkip = sizeof(ArchiveMemberHeader) +
     ToHeader(Data.data())->getSize();
@@ -101,6 +124,11 @@ error_code Archive::Child::getName(StringRef &Result) const {
       return object_error::parse_failed;
     Result = addr;
     return object_error::success;
+  } else if (name.startswith("#1/")) {
+    APInt name_size;
+    name.substr(3).getAsInteger(10, name_size);
+    Result = Data.substr(0, name_size.getZExtValue());
+    return object_error::success;
   }
   // It's a simple name.
   if (name[name.size() - 1] == '/')
@@ -111,14 +139,27 @@ error_code Archive::Child::getName(StringRef &Result) const {
 }
 
 uint64_t Archive::Child::getSize() const {
-  return ToHeader(Data.data())->getSize();
+  uint64_t size = ToHeader(Data.data())->getSize();
+  // Don't include attached name.
+  StringRef name =  ToHeader(Data.data())->getName();
+  if (name.startswith("#1/")) {
+    APInt name_size;
+    name.substr(3).getAsInteger(10, name_size);
+    size -= name_size.getZExtValue();
+  }
+  return size;
 }
 
 MemoryBuffer *Archive::Child::getBuffer() const {
   StringRef name;
   if (getName(name)) return NULL;
-  return MemoryBuffer::getMemBuffer(Data.substr(sizeof(ArchiveMemberHeader),
-                                                getSize()),
+  int size = sizeof(ArchiveMemberHeader);
+  if (name.startswith("#1/")) {
+    APInt name_size;
+    name.substr(3).getAsInteger(10, name_size);
+    size += name_size.getZExtValue();
+  }
+  return MemoryBuffer::getMemBuffer(Data.substr(size, getSize()),
                                     name,
                                     false);
 }
@@ -133,8 +174,7 @@ error_code Archive::Child::getAsBinary(OwningPtr<Binary> &Result) const {
 }
 
 Archive::Archive(MemoryBuffer *source, error_code &ec)
-  : Binary(Binary::isArchive, source)
-  , StringTable(Child(this, StringRef(0, 0))) {
+  : Binary(Binary::ID_Archive, source) {
   // Check for sufficient magic.
   if (!source || source->getBufferSize()
                  < (8 + sizeof(ArchiveMemberHeader) + 2) // Smallest archive.
@@ -143,30 +183,90 @@ Archive::Archive(MemoryBuffer *source, error_code &ec)
     return;
   }
 
-  // Get the string table. It's the 3rd member.
-  child_iterator StrTable = begin_children();
+  // Get the special members.
+  child_iterator i = begin_children(false);
   child_iterator e = end_children();
-  for (int i = 0; StrTable != e && i < 2; ++StrTable, ++i) {}
 
-  // Check to see if there were 3 members, or the 3rd member wasn't named "//".
-  StringRef name;
-  if (StrTable != e && !StrTable->getName(name) && name == "//")
-    StringTable = StrTable;
+  if (i != e) ++i; // Nobody cares about the first member.
+  if (i != e) {
+    SymbolTable = i;
+    ++i;
+  }
+  if (i != e) {
+    StringTable = i;
+  }
 
   ec = object_error::success;
 }
 
-Archive::child_iterator Archive::begin_children() const {
-  const char *Loc = Data->getBufferStart() + Magic.size();
+Archive::child_iterator Archive::begin_children(bool skip_internal) const {
+  const char *Loc = Data->getBufferStart() + strlen(Magic);
   size_t Size = sizeof(ArchiveMemberHeader) +
     ToHeader(Loc)->getSize();
-  return Child(this, StringRef(Loc, Size));
+  Child c(this, StringRef(Loc, Size));
+  // Skip internals at the beginning of an archive.
+  if (skip_internal && isInternalMember(*ToHeader(Loc)))
+    return c.getNext();
+  return c;
 }
 
 Archive::child_iterator Archive::end_children() const {
   return Child(this, StringRef(0, 0));
 }
 
-namespace llvm {
+error_code Archive::Symbol::getName(StringRef &Result) const {
+  Result =
+    StringRef(Parent->SymbolTable->getBuffer()->getBufferStart() + StringIndex);
+  return object_error::success;
+}
+
+error_code Archive::Symbol::getMember(child_iterator &Result) const {
+  const char *buf = Parent->SymbolTable->getBuffer()->getBufferStart();
+  uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+  const char *offsets = buf + 4;
+  buf += 4 + (member_count * 4); // Skip offsets.
+  const char *indicies = buf + 4;
+
+  uint16_t offsetindex =
+    *(reinterpret_cast<const support::ulittle16_t*>(indicies)
+      + SymbolIndex);
+
+  uint32_t offset = *(reinterpret_cast<const support::ulittle32_t*>(offsets)
+                      + (offsetindex - 1));
 
-} // end namespace llvm
+  const char *Loc = Parent->getData().begin() + offset;
+  size_t Size = sizeof(ArchiveMemberHeader) +
+    ToHeader(Loc)->getSize();
+  Result = Child(Parent, StringRef(Loc, Size));
+
+  return object_error::success;
+}
+
+Archive::Symbol Archive::Symbol::getNext() const {
+  Symbol t(*this);
+  // Go to one past next null.
+  t.StringIndex =
+    Parent->SymbolTable->getBuffer()->getBuffer().find('\0', t.StringIndex) + 1;
+  ++t.SymbolIndex;
+  return t;
+}
+
+Archive::symbol_iterator Archive::begin_symbols() const {
+  const char *buf = SymbolTable->getBuffer()->getBufferStart();
+  uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+  buf += 4 + (member_count * 4); // Skip offsets.
+  uint32_t symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+  buf += 4 + (symbol_count * 2); // Skip indices.
+  uint32_t string_start_offset =
+    buf - SymbolTable->getBuffer()->getBufferStart();
+  return symbol_iterator(Symbol(this, 0, string_start_offset));
+}
+
+Archive::symbol_iterator Archive::end_symbols() const {
+  const char *buf = SymbolTable->getBuffer()->getBufferStart();
+  uint32_t member_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+  buf += 4 + (member_count * 4); // Skip offsets.
+  uint32_t symbol_count = *reinterpret_cast<const support::ulittle32_t*>(buf);
+  return symbol_iterator(
+    Symbol(this, symbol_count, 0));
+}
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 86eb51a01646..c20fc0cc399d 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -9,8 +9,3 @@ add_llvm_library(LLVMObject
   Object.cpp
   ObjectFile.cpp
   )
-
-add_llvm_library_dependencies(LLVMObject
-  LLVMCore
-  LLVMSupport
-  )
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 750c34d12a18..bd27a56e73b9 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Object/COFF.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
@@ -98,24 +99,10 @@ error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb,
  error_code COFFObjectFile::getSymbolName(DataRefImpl Symb,
                                           StringRef &Result) const {
   const coff_symbol *symb = toSymb(Symb);
-  // Check for string table entry. First 4 bytes are 0.
-  if (symb->Name.Offset.Zeroes == 0) {
-    uint32_t Offset = symb->Name.Offset.Offset;
-    if (error_code ec = getString(Offset, Result))
-      return ec;
-    return object_error::success;
-  }
-
-  if (symb->Name.ShortName[7] == 0)
-    // Null terminated, let ::strlen figure out the length.
-    Result = StringRef(symb->Name.ShortName);
-  else
-    // Not null terminated, use all 8 bytes.
-    Result = StringRef(symb->Name.ShortName, 8);
-  return object_error::success;
+  return getSymbolName(symb, Result);
 }
 
-error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb,
+error_code COFFObjectFile::getSymbolFileOffset(DataRefImpl Symb,
                                             uint64_t &Result) const {
   const coff_symbol *symb = toSymb(Symb);
   const coff_section *Section = NULL;
@@ -127,7 +114,7 @@ error_code COFFObjectFile::getSymbolOffset(DataRefImpl Symb,
   if (Type == 'U' || Type == 'w')
     Result = UnknownAddressOrSize;
   else if (Section)
-    Result = Section->VirtualAddress + symb->Value;
+    Result = Section->PointerToRawData + symb->Value;
   else
     Result = symb->Value;
   return object_error::success;
@@ -145,23 +132,21 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
   if (Type == 'U' || Type == 'w')
     Result = UnknownAddressOrSize;
   else if (Section)
-    Result = reinterpret_cast<uintptr_t>(base() +
-                                         Section->PointerToRawData +
-                                         symb->Value);
+    Result = Section->VirtualAddress + symb->Value;
   else
-    Result = reinterpret_cast<uintptr_t>(base() + symb->Value);
+    Result = symb->Value;
   return object_error::success;
 }
 
 error_code COFFObjectFile::getSymbolType(DataRefImpl Symb,
-                                         SymbolRef::SymbolType &Result) const {
+                                         SymbolRef::Type &Result) const {
   const coff_symbol *symb = toSymb(Symb);
   Result = SymbolRef::ST_Other;
   if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
       symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) {
-    Result = SymbolRef::ST_External;
+    Result = SymbolRef::ST_Unknown;
   } else {
-    if (symb->Type.ComplexType == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
+    if (symb->getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) {
       Result = SymbolRef::ST_Function;
     } else {
       char Type;
@@ -175,10 +160,27 @@ error_code COFFObjectFile::getSymbolType(DataRefImpl Symb,
   return object_error::success;
 }
 
-error_code COFFObjectFile::isSymbolGlobal(DataRefImpl Symb,
-                                          bool &Result) const {
+error_code COFFObjectFile::getSymbolFlags(DataRefImpl Symb,
+                                          uint32_t &Result) const {
   const coff_symbol *symb = toSymb(Symb);
-  Result = (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL);
+  Result = SymbolRef::SF_None;
+
+  // TODO: Correctly set SF_FormatSpecific, SF_ThreadLocal, SF_Common
+
+  if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL &&
+      symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED)
+    Result |= SymbolRef::SF_Undefined;
+
+  // TODO: This are certainly too restrictive.
+  if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
+    Result |= SymbolRef::SF_Global;
+
+  if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
+    Result |= SymbolRef::SF_Weak;
+
+  if (symb->SectionNumber == COFF::IMAGE_SYM_ABSOLUTE)
+    Result |= SymbolRef::SF_Absolute;
+
   return object_error::success;
 }
 
@@ -233,7 +235,9 @@ error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
     if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
       Result = 'w';
       return object_error::success; // Don't do ::toupper.
-    } else
+    } else if (symb->Value != 0) // Check for common symbols.
+      ret = 'c';
+    else
       ret = 'u';
     break;
   case COFF::IMAGE_SYM_ABSOLUTE:
@@ -269,9 +273,18 @@ error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
   return object_error::success;
 }
 
-error_code COFFObjectFile::isSymbolInternal(DataRefImpl Symb,
-                                            bool &Result) const {
-  Result = false;
+error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb,
+                                            section_iterator &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
+  if (symb->SectionNumber <= COFF::IMAGE_SYM_UNDEFINED)
+    Result = end_sections();
+  else {
+    const coff_section *sec = 0;
+    if (error_code ec = getSection(symb->SectionNumber, sec)) return ec;
+    DataRefImpl Sec;
+    Sec.p = reinterpret_cast<uintptr_t>(sec);
+    Result = section_iterator(SectionRef(Sec, this));
+  }
   return object_error::success;
 }
 
@@ -287,24 +300,7 @@ error_code COFFObjectFile::getSectionNext(DataRefImpl Sec,
 error_code COFFObjectFile::getSectionName(DataRefImpl Sec,
                                           StringRef &Result) const {
   const coff_section *sec = toSec(Sec);
-  StringRef name;
-  if (sec->Name[7] == 0)
-    // Null terminated, let ::strlen figure out the length.
-    name = sec->Name;
-  else
-    // Not null terminated, use all 8 bytes.
-    name = StringRef(sec->Name, 8);
-
-  // Check for string table entry. First byte is '/'.
-  if (name[0] == '/') {
-    uint32_t Offset;
-    name.substr(1).getAsInteger(10, Offset);
-    if (error_code ec = getString(Offset, name))
-      return ec;
-  }
-
-  Result = name;
-  return object_error::success;
+  return getSectionName(sec, Result);
 }
 
 error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec,
@@ -324,16 +320,10 @@ error_code COFFObjectFile::getSectionSize(DataRefImpl Sec,
 error_code COFFObjectFile::getSectionContents(DataRefImpl Sec,
                                               StringRef &Result) const {
   const coff_section *sec = toSec(Sec);
-  // The only thing that we need to verify is that the contents is contained
-  // within the file bounds. We don't need to make sure it doesn't cover other
-  // data, as there's nothing that says that is not allowed.
-  uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData;
-  uintptr_t con_end = con_start + sec->SizeOfRawData;
-  if (con_end >= uintptr_t(Data->getBufferEnd()))
-    return object_error::parse_failed;
-  Result = StringRef(reinterpret_cast<const char*>(con_start),
-                     sec->SizeOfRawData);
-  return object_error::success;
+  ArrayRef<uint8_t> Res;
+  error_code EC = getSectionContents(sec, Res);
+  Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size());
+  return EC;
 }
 
 error_code COFFObjectFile::getSectionAlignment(DataRefImpl Sec,
@@ -366,12 +356,33 @@ error_code COFFObjectFile::isSectionBSS(DataRefImpl Sec,
   return object_error::success;
 }
 
+error_code COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+                                                         bool &Result) const {
+  // FIXME: Unimplemented
+  Result = true;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionVirtual(DataRefImpl Sec,
+                                           bool &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::isSectionZeroInit(DataRefImpl Sec,
+                                             bool &Result) const {
+  // FIXME: Unimplemented
+  Result = false;
+  return object_error::success;
+}
+
 error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
                                                  DataRefImpl Symb,
                                                  bool &Result) const {
   const coff_section *sec = toSec(Sec);
   const coff_symbol *symb = toSymb(Symb);
-  const coff_section *symb_sec;
+  const coff_section *symb_sec = 0;
   if (error_code ec = getSection(symb->SectionNumber, symb_sec)) return ec;
   if (symb_sec == sec)
     Result = true;
@@ -383,7 +394,6 @@ error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
 relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
   const coff_section *sec = toSec(Sec);
   DataRefImpl ret;
-  std::memset(&ret, 0, sizeof(ret));
   if (sec->NumberOfRelocations == 0)
     ret.p = 0;
   else
@@ -395,7 +405,6 @@ relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
 relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
   const coff_section *sec = toSec(Sec);
   DataRefImpl ret;
-  std::memset(&ret, 0, sizeof(ret));
   if (sec->NumberOfRelocations == 0)
     ret.p = 0;
   else
@@ -408,7 +417,12 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
 }
 
 COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
-  : ObjectFile(Binary::isCOFF, Object, ec) {
+  : ObjectFile(Binary::ID_COFF, Object, ec)
+  , Header(0)
+  , SectionTable(0)
+  , SymbolTable(0)
+  , StringTable(0)
+  , StringTableSize(0) {
   // Check that we at least have enough room for a header.
   if (!checkSize(Data, ec, sizeof(coff_file_header))) return;
 
@@ -421,7 +435,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
     // PE/COFF, seek through MS-DOS compatibility stub and 4-byte
     // PE signature to find 'normal' COFF header.
     if (!checkSize(Data, ec, 0x3c + 8)) return;
-    HeaderStart += *reinterpret_cast<const ulittle32_t *>(base() + 0x3c);
+    HeaderStart = *reinterpret_cast<const ulittle16_t *>(base() + 0x3c);
     // Check the PE header. ("PE\0\0")
     if (std::memcmp(base() + HeaderStart, "PE\0\0", 4) != 0) {
       ec = object_error::parse_failed;
@@ -443,28 +457,30 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
                  Header->NumberOfSections * sizeof(coff_section)))
     return;
 
-  SymbolTable =
-    reinterpret_cast<const coff_symbol *>(base()
-                                          + Header->PointerToSymbolTable);
-  if (!checkAddr(Data, ec, uintptr_t(SymbolTable),
-                 Header->NumberOfSymbols * sizeof(coff_symbol)))
-    return;
+  if (Header->PointerToSymbolTable != 0) {
+    SymbolTable =
+      reinterpret_cast<const coff_symbol *>(base()
+                                            + Header->PointerToSymbolTable);
+    if (!checkAddr(Data, ec, uintptr_t(SymbolTable),
+                   Header->NumberOfSymbols * sizeof(coff_symbol)))
+      return;
 
-  // Find string table.
-  StringTable = reinterpret_cast<const char *>(base())
-                + Header->PointerToSymbolTable
-                + Header->NumberOfSymbols * sizeof(coff_symbol);
-  if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t)))
-    return;
+    // Find string table.
+    StringTable = reinterpret_cast<const char *>(base())
+                  + Header->PointerToSymbolTable
+                  + Header->NumberOfSymbols * sizeof(coff_symbol);
+    if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t)))
+      return;
 
-  StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable);
-  if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize))
-    return;
-  // Check that the string table is null terminated if has any in it.
-  if (StringTableSize < 4
-      || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) {
-    ec = object_error::parse_failed;
-    return;
+    StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable);
+    if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize))
+      return;
+    // Check that the string table is null terminated if has any in it.
+    if (StringTableSize < 4
+        || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) {
+      ec = object_error::parse_failed;
+      return;
+    }
   }
 
   ec = object_error::success;
@@ -472,7 +488,6 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
 
 symbol_iterator COFFObjectFile::begin_symbols() const {
   DataRefImpl ret;
-  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SymbolTable);
   return symbol_iterator(SymbolRef(ret, this));
 }
@@ -480,21 +495,44 @@ symbol_iterator COFFObjectFile::begin_symbols() const {
 symbol_iterator COFFObjectFile::end_symbols() const {
   // The symbol table ends where the string table begins.
   DataRefImpl ret;
-  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(StringTable);
   return symbol_iterator(SymbolRef(ret, this));
 }
 
+symbol_iterator COFFObjectFile::begin_dynamic_symbols() const {
+  // TODO: implement
+  report_fatal_error("Dynamic symbols unimplemented in COFFObjectFile");
+}
+
+symbol_iterator COFFObjectFile::end_dynamic_symbols() const {
+  // TODO: implement
+  report_fatal_error("Dynamic symbols unimplemented in COFFObjectFile");
+}
+
+library_iterator COFFObjectFile::begin_libraries_needed() const {
+  // TODO: implement
+  report_fatal_error("Libraries needed unimplemented in COFFObjectFile");
+}
+
+library_iterator COFFObjectFile::end_libraries_needed() const {
+  // TODO: implement
+  report_fatal_error("Libraries needed unimplemented in COFFObjectFile");
+}
+
+StringRef COFFObjectFile::getLoadName() const {
+  // COFF does not have this field.
+  return "";
+}
+
+
 section_iterator COFFObjectFile::begin_sections() const {
   DataRefImpl ret;
-  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SectionTable);
   return section_iterator(SectionRef(ret, this));
 }
 
 section_iterator COFFObjectFile::end_sections() const {
   DataRefImpl ret;
-  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
   return section_iterator(SectionRef(ret, this));
 }
@@ -525,6 +563,11 @@ unsigned COFFObjectFile::getArch() const {
   }
 }
 
+error_code COFFObjectFile::getHeader(const coff_file_header *&Res) const {
+  Res = Header;
+  return object_error::success;
+}
+
 error_code COFFObjectFile::getSection(int32_t index,
                                       const coff_section *&Result) const {
   // Check for special index values.
@@ -553,13 +596,69 @@ error_code COFFObjectFile::getString(uint32_t offset,
 
 error_code COFFObjectFile::getSymbol(uint32_t index,
                                      const coff_symbol *&Result) const {
-  if (index > 0 && index < Header->NumberOfSymbols)
+  if (index < Header->NumberOfSymbols)
     Result = SymbolTable + index;
   else
     return object_error::parse_failed;
   return object_error::success;
 }
 
+error_code COFFObjectFile::getSymbolName(const coff_symbol *symbol,
+                                         StringRef &Res) const {
+  // Check for string table entry. First 4 bytes are 0.
+  if (symbol->Name.Offset.Zeroes == 0) {
+    uint32_t Offset = symbol->Name.Offset.Offset;
+    if (error_code ec = getString(Offset, Res))
+      return ec;
+    return object_error::success;
+  }
+
+  if (symbol->Name.ShortName[7] == 0)
+    // Null terminated, let ::strlen figure out the length.
+    Res = StringRef(symbol->Name.ShortName);
+  else
+    // Not null terminated, use all 8 bytes.
+    Res = StringRef(symbol->Name.ShortName, 8);
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionName(const coff_section *Sec,
+                                          StringRef &Res) const {
+  StringRef Name;
+  if (Sec->Name[7] == 0)
+    // Null terminated, let ::strlen figure out the length.
+    Name = Sec->Name;
+  else
+    // Not null terminated, use all 8 bytes.
+    Name = StringRef(Sec->Name, 8);
+
+  // Check for string table entry. First byte is '/'.
+  if (Name[0] == '/') {
+    uint32_t Offset;
+    if (Name.substr(1).getAsInteger(10, Offset))
+      return object_error::parse_failed;
+    if (error_code ec = getString(Offset, Name))
+      return ec;
+  }
+
+  Res = Name;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionContents(const coff_section *Sec,
+                                              ArrayRef<uint8_t> &Res) const {
+  // The only thing that we need to verify is that the contents is contained
+  // within the file bounds. We don't need to make sure it doesn't cover other
+  // data, as there's nothing that says that is not allowed.
+  uintptr_t ConStart = uintptr_t(base()) + Sec->PointerToRawData;
+  uintptr_t ConEnd = ConStart + Sec->SizeOfRawData;
+  if (ConEnd > uintptr_t(Data->getBufferEnd()))
+    return object_error::parse_failed;
+  Res = ArrayRef<uint8_t>(reinterpret_cast<const unsigned char*>(ConStart),
+                          Sec->SizeOfRawData);
+  return object_error::success;
+}
+
 const coff_relocation *COFFObjectFile::toRel(DataRefImpl Rel) const {
   return reinterpret_cast<const coff_relocation*>(Rel.p);
 }
@@ -575,6 +674,11 @@ error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel,
   Res = toRel(Rel)->VirtualAddress;
   return object_error::success;
 }
+error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel,
+                                               uint64_t &Res) const {
+  Res = toRel(Rel)->VirtualAddress;
+  return object_error::success;
+}
 error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel,
                                                SymbolRef &Res) const {
   const coff_relocation* R = toRel(Rel);
@@ -584,7 +688,7 @@ error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel,
   return object_error::success;
 }
 error_code COFFObjectFile::getRelocationType(DataRefImpl Rel,
-                                             uint32_t &Res) const {
+                                             uint64_t &Res) const {
   const coff_relocation* R = toRel(Rel);
   Res = R->Type;
   return object_error::success;
@@ -658,7 +762,6 @@ error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
   const coff_symbol *symb = 0;
   if (error_code ec = getSymbol(reloc->SymbolTableIndex, symb)) return ec;
   DataRefImpl sym;
-  ::memset(&sym, 0, sizeof(sym));
   sym.p = reinterpret_cast<uintptr_t>(symb);
   StringRef symname;
   if (error_code ec = getSymbolName(sym, symname)) return ec;
@@ -666,6 +769,16 @@ error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
   return object_error::success;
 }
 
+error_code COFFObjectFile::getLibraryNext(DataRefImpl LibData,
+                                          LibraryRef &Result) const {
+  report_fatal_error("getLibraryNext not implemented in COFFObjectFile");
+}
+
+error_code COFFObjectFile::getLibraryPath(DataRefImpl LibData,
+                                          StringRef &Result) const {
+  report_fatal_error("getLibraryPath not implemented in COFFObjectFile");
+}
+
 namespace llvm {
 
   ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index 257d08cadff3..ab5f8108af13 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -7,1405 +7,44 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the ELFObjectFile class.
+// Part of the ELFObjectFile class implementation.
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <limits>
-#include <utility>
+#include "llvm/Object/ELF.h"
 
-using namespace llvm;
-using namespace object;
-
-// Templates to choose Elf_Addr and Elf_Off depending on is64Bits.
-namespace {
-template<support::endianness target_endianness>
-struct ELFDataTypeTypedefHelperCommon {
-  typedef support::detail::packed_endian_specific_integral
-    <uint16_t, target_endianness, support::aligned> Elf_Half;
-  typedef support::detail::packed_endian_specific_integral
-    <uint32_t, target_endianness, support::aligned> Elf_Word;
-  typedef support::detail::packed_endian_specific_integral
-    <int32_t, target_endianness, support::aligned> Elf_Sword;
-  typedef support::detail::packed_endian_specific_integral
-    <uint64_t, target_endianness, support::aligned> Elf_Xword;
-  typedef support::detail::packed_endian_specific_integral
-    <int64_t, target_endianness, support::aligned> Elf_Sxword;
-};
-}
-
-namespace {
-template<support::endianness target_endianness, bool is64Bits>
-struct ELFDataTypeTypedefHelper;
-
-/// ELF 32bit types.
-template<support::endianness target_endianness>
-struct ELFDataTypeTypedefHelper<target_endianness, false>
-  : ELFDataTypeTypedefHelperCommon<target_endianness> {
-  typedef support::detail::packed_endian_specific_integral
-    <uint32_t, target_endianness, support::aligned> Elf_Addr;
-  typedef support::detail::packed_endian_specific_integral
-    <uint32_t, target_endianness, support::aligned> Elf_Off;
-};
-
-/// ELF 64bit types.
-template<support::endianness target_endianness>
-struct ELFDataTypeTypedefHelper<target_endianness, true>
-  : ELFDataTypeTypedefHelperCommon<target_endianness>{
-  typedef support::detail::packed_endian_specific_integral
-    <uint64_t, target_endianness, support::aligned> Elf_Addr;
-  typedef support::detail::packed_endian_specific_integral
-    <uint64_t, target_endianness, support::aligned> Elf_Off;
-};
-}
-
-// I really don't like doing this, but the alternative is copypasta.
-#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Addr Elf_Addr; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Off Elf_Off; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Half Elf_Half; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Word Elf_Word; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sword Elf_Sword; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Xword Elf_Xword; \
-typedef typename \
-  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sxword Elf_Sxword;
-
-  // Section header.
-namespace {
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Shdr_Base;
-
-template<support::endianness target_endianness>
-struct Elf_Shdr_Base<target_endianness, false> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
-  Elf_Word sh_name;     // Section name (index into string table)
-  Elf_Word sh_type;     // Section type (SHT_*)
-  Elf_Word sh_flags;    // Section flags (SHF_*)
-  Elf_Addr sh_addr;     // Address where section is to be loaded
-  Elf_Off  sh_offset;   // File offset of section data, in bytes
-  Elf_Word sh_size;     // Size of section, in bytes
-  Elf_Word sh_link;     // Section type-specific header table index link
-  Elf_Word sh_info;     // Section type-specific extra information
-  Elf_Word sh_addralign;// Section address alignment
-  Elf_Word sh_entsize;  // Size of records contained within the section
-};
-
-template<support::endianness target_endianness>
-struct Elf_Shdr_Base<target_endianness, true> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
-  Elf_Word  sh_name;     // Section name (index into string table)
-  Elf_Word  sh_type;     // Section type (SHT_*)
-  Elf_Xword sh_flags;    // Section flags (SHF_*)
-  Elf_Addr  sh_addr;     // Address where section is to be loaded
-  Elf_Off   sh_offset;   // File offset of section data, in bytes
-  Elf_Xword sh_size;     // Size of section, in bytes
-  Elf_Word  sh_link;     // Section type-specific header table index link
-  Elf_Word  sh_info;     // Section type-specific extra information
-  Elf_Xword sh_addralign;// Section address alignment
-  Elf_Xword sh_entsize;  // Size of records contained within the section
-};
-
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Shdr_Impl : Elf_Shdr_Base<target_endianness, is64Bits> {
-  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_entsize;
-  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_size;
-
-  /// @brief Get the number of entities this section contains if it has any.
-  unsigned getEntityCount() const {
-    if (sh_entsize == 0)
-      return 0;
-    return sh_size / sh_entsize;
-  }
-};
-}
-
-namespace {
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Sym_Base;
-
-template<support::endianness target_endianness>
-struct Elf_Sym_Base<target_endianness, false> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
-  Elf_Word      st_name;  // Symbol name (index into string table)
-  Elf_Addr      st_value; // Value or address associated with the symbol
-  Elf_Word      st_size;  // Size of the symbol
-  unsigned char st_info;  // Symbol's type and binding attributes
-  unsigned char st_other; // Must be zero; reserved
-  Elf_Half      st_shndx; // Which section (header table index) it's defined in
-};
-
-template<support::endianness target_endianness>
-struct Elf_Sym_Base<target_endianness, true> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
-  Elf_Word      st_name;  // Symbol name (index into string table)
-  unsigned char st_info;  // Symbol's type and binding attributes
-  unsigned char st_other; // Must be zero; reserved
-  Elf_Half      st_shndx; // Which section (header table index) it's defined in
-  Elf_Addr      st_value; // Value or address associated with the symbol
-  Elf_Xword     st_size;  // Size of the symbol
-};
-
-template<support::endianness target_endianness, bool is64Bits>
-struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
-  using Elf_Sym_Base<target_endianness, is64Bits>::st_info;
-
-  // These accessors and mutators correspond to the ELF32_ST_BIND,
-  // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
-  unsigned char getBinding() const { return st_info >> 4; }
-  unsigned char getType() const { return st_info & 0x0f; }
-  void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
-  void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
-  void setBindingAndType(unsigned char b, unsigned char t) {
-    st_info = (b << 4) + (t & 0x0f);
-  }
-};
-}
-
-namespace {
-template<support::endianness target_endianness, bool is64Bits, bool isRela>
-struct Elf_Rel_Base;
-
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, false, false> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
-  Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
-  Elf_Word      r_info;  // Symbol table index and type of relocation to apply
-};
-
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, true, false> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
-  Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
-  Elf_Xword     r_info;   // Symbol table index and type of relocation to apply
-};
-
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, false, true> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
-  Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
-  Elf_Word      r_info;   // Symbol table index and type of relocation to apply
-  Elf_Sword     r_addend; // Compute value for relocatable field by adding this
-};
-
-template<support::endianness target_endianness>
-struct Elf_Rel_Base<target_endianness, true, true> {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
-  Elf_Addr      r_offset; // Location (file byte offset, or program virtual addr)
-  Elf_Xword     r_info;   // Symbol table index and type of relocation to apply
-  Elf_Sxword    r_addend; // Compute value for relocatable field by adding this.
-};
-
-template<support::endianness target_endianness, bool is64Bits, bool isRela>
-struct Elf_Rel_Impl;
-
-template<support::endianness target_endianness, bool isRela>
-struct Elf_Rel_Impl<target_endianness, true, isRela>
-       : Elf_Rel_Base<target_endianness, true, isRela> {
-  using Elf_Rel_Base<target_endianness, true, isRela>::r_info;
-  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
-
-  // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
-  // and ELF64_R_INFO macros defined in the ELF specification:
-  uint64_t getSymbol() const { return (r_info >> 32); }
-  unsigned char getType() const {
-    return (unsigned char) (r_info & 0xffffffffL);
-  }
-  void setSymbol(uint64_t s) { setSymbolAndType(s, getType()); }
-  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
-  void setSymbolAndType(uint64_t s, unsigned char t) {
-    r_info = (s << 32) + (t&0xffffffffL);
-  }
-};
-
-template<support::endianness target_endianness, bool isRela>
-struct Elf_Rel_Impl<target_endianness, false, isRela>
-       : Elf_Rel_Base<target_endianness, false, isRela> {
-  using Elf_Rel_Base<target_endianness, false, isRela>::r_info;
-  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
-
-  // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
-  // and ELF32_R_INFO macros defined in the ELF specification:
-  uint32_t getSymbol() const { return (r_info >> 8); }
-  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
-  void setSymbol(uint32_t s) { setSymbolAndType(s, getType()); }
-  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
-  void setSymbolAndType(uint32_t s, unsigned char t) {
-    r_info = (s << 8) + t;
-  }
-};
+namespace llvm {
 
-}
+using namespace object;
 
 namespace {
-template<support::endianness target_endianness, bool is64Bits>
-class ELFObjectFile : public ObjectFile {
-  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
-
-  typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
-  typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
-  typedef Elf_Rel_Impl<target_endianness, is64Bits, false> Elf_Rel;
-  typedef Elf_Rel_Impl<target_endianness, is64Bits, true> Elf_Rela;
-
-  struct Elf_Ehdr {
-    unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
-    Elf_Half e_type;     // Type of file (see ET_*)
-    Elf_Half e_machine;  // Required architecture for this file (see EM_*)
-    Elf_Word e_version;  // Must be equal to 1
-    Elf_Addr e_entry;    // Address to jump to in order to start program
-    Elf_Off  e_phoff;    // Program header table's file offset, in bytes
-    Elf_Off  e_shoff;    // Section header table's file offset, in bytes
-    Elf_Word e_flags;    // Processor-specific flags
-    Elf_Half e_ehsize;   // Size of ELF header, in bytes
-    Elf_Half e_phentsize;// Size of an entry in the program header table
-    Elf_Half e_phnum;    // Number of entries in the program header table
-    Elf_Half e_shentsize;// Size of an entry in the section header table
-    Elf_Half e_shnum;    // Number of entries in the section header table
-    Elf_Half e_shstrndx; // Section header table index of section name
-                                  // string table
-    bool checkMagic() const {
-      return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
-    }
-    unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
-    unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
-  };
-
-  typedef SmallVector<const Elf_Shdr*, 1> Sections_t;
-  typedef DenseMap<unsigned, unsigned> IndexMap_t;
-  typedef DenseMap<const Elf_Shdr*, SmallVector<uint32_t, 1> > RelocMap_t;
-
-  const Elf_Ehdr *Header;
-  const Elf_Shdr *SectionHeaderTable;
-  const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
-  const Elf_Shdr *dot_strtab_sec;   // Symbol header string table.
-  Sections_t SymbolTableSections;
-  IndexMap_t SymbolTableSectionsIndexMap;
-  DenseMap<const Elf_Sym*, ELF::Elf64_Word> ExtendedSymbolTable;
-
-  /// @brief Map sections to an array of relocation sections that reference
-  ///        them sorted by section index.
-  RelocMap_t SectionRelocMap;
-
-  /// @brief Get the relocation section that contains \a Rel.
-  const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
-    return getSection(Rel.w.b);
-  }
-
-  void            validateSymbol(DataRefImpl Symb) const;
-  bool            isRelocationHasAddend(DataRefImpl Rel) const;
-  template<typename T>
-  const T        *getEntry(uint16_t Section, uint32_t Entry) const;
-  template<typename T>
-  const T        *getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
-  const Elf_Sym  *getSymbol(DataRefImpl Symb) const;
-  const Elf_Shdr *getSection(DataRefImpl index) const;
-  const Elf_Shdr *getSection(uint32_t index) const;
-  const Elf_Rel  *getRel(DataRefImpl Rel) const;
-  const Elf_Rela *getRela(DataRefImpl Rela) const;
-  const char     *getString(uint32_t section, uint32_t offset) const;
-  const char     *getString(const Elf_Shdr *section, uint32_t offset) const;
-  error_code      getSymbolName(const Elf_Sym *Symb, StringRef &Res) const;
-
-protected:
-  virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
-  virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
-  virtual error_code getSymbolOffset(DataRefImpl Symb, uint64_t &Res) const;
-  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
-  virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
-  virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
-  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
-  virtual error_code isSymbolGlobal(DataRefImpl Symb, bool &Res) const;
-  virtual error_code getSymbolType(DataRefImpl Symb, SymbolRef::SymbolType &Res) const;
-
-  virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
-  virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
-  virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
-  virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
-  virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
-  virtual error_code getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const;
-  virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
-  virtual error_code isSectionData(DataRefImpl Sec, bool &Res) const;
-  virtual error_code isSectionBSS(DataRefImpl Sec, bool &Res) const;
-  virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
-                                           bool &Result) const;
-  virtual relocation_iterator getSectionRelBegin(DataRefImpl Sec) const;
-  virtual relocation_iterator getSectionRelEnd(DataRefImpl Sec) const;
-
-  virtual error_code getRelocationNext(DataRefImpl Rel,
-                                       RelocationRef &Res) const;
-  virtual error_code getRelocationAddress(DataRefImpl Rel,
-                                          uint64_t &Res) const;
-  virtual error_code getRelocationSymbol(DataRefImpl Rel,
-                                         SymbolRef &Res) const;
-  virtual error_code getRelocationType(DataRefImpl Rel,
-                                       uint32_t &Res) const;
-  virtual error_code getRelocationTypeName(DataRefImpl Rel,
-                                           SmallVectorImpl<char> &Result) const;
-  virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel,
-                                                 int64_t &Res) const;
-  virtual error_code getRelocationValueString(DataRefImpl Rel,
-                                           SmallVectorImpl<char> &Result) const;
-
-public:
-  ELFObjectFile(MemoryBuffer *Object, error_code &ec);
-  virtual symbol_iterator begin_symbols() const;
-  virtual symbol_iterator end_symbols() const;
-  virtual section_iterator begin_sections() const;
-  virtual section_iterator end_sections() const;
-
-  virtual uint8_t getBytesInAddress() const;
-  virtual StringRef getFileFormatName() const;
-  virtual unsigned getArch() const;
-
-  uint64_t getNumSections() const;
-  uint64_t getStringTableIndex() const;
-  ELF::Elf64_Word getSymbolTableIndex(const Elf_Sym *symb) const;
-  const Elf_Shdr *getSection(const Elf_Sym *symb) const;
-};
-} // end namespace
-
-template<support::endianness target_endianness, bool is64Bits>
-void ELFObjectFile<target_endianness, is64Bits>
-                  ::validateSymbol(DataRefImpl Symb) const {
-  const Elf_Sym  *symb = getSymbol(Symb);
-  const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
-  // FIXME: We really need to do proper error handling in the case of an invalid
-  //        input file. Because we don't use exceptions, I think we'll just pass
-  //        an error object around.
-  if (!(  symb
-        && SymbolTableSection
-        && symb >= (const Elf_Sym*)(base()
-                   + SymbolTableSection->sh_offset)
-        && symb <  (const Elf_Sym*)(base()
-                   + SymbolTableSection->sh_offset
-                   + SymbolTableSection->sh_size)))
-    // FIXME: Proper error handling.
-    report_fatal_error("Symb must point to a valid symbol!");
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolNext(DataRefImpl Symb,
-                                        SymbolRef &Result) const {
-  validateSymbol(Symb);
-  const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
-
-  ++Symb.d.a;
-  // Check to see if we are at the end of this symbol table.
-  if (Symb.d.a >= SymbolTableSection->getEntityCount()) {
-    // We are at the end. If there are other symbol tables, jump to them.
-    ++Symb.d.b;
-    Symb.d.a = 1; // The 0th symbol in ELF is fake.
-    // Otherwise return the terminator.
-    if (Symb.d.b >= SymbolTableSections.size()) {
-      Symb.d.a = std::numeric_limits<uint32_t>::max();
-      Symb.d.b = std::numeric_limits<uint32_t>::max();
-    }
-  }
-
-  Result = SymbolRef(Symb, this);
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolName(DataRefImpl Symb,
-                                        StringRef &Result) const {
-  validateSymbol(Symb);
-  const Elf_Sym *symb = getSymbol(Symb);
-  return getSymbolName(symb, Result);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-ELF::Elf64_Word ELFObjectFile<target_endianness, is64Bits>
-                      ::getSymbolTableIndex(const Elf_Sym *symb) const {
-  if (symb->st_shndx == ELF::SHN_XINDEX)
-    return ExtendedSymbolTable.lookup(symb);
-  return symb->st_shndx;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>
-                             ::getSection(const Elf_Sym *symb) const {
-  if (symb->st_shndx == ELF::SHN_XINDEX)
-    return getSection(ExtendedSymbolTable.lookup(symb));
-  if (symb->st_shndx >= ELF::SHN_LORESERVE)
-    return 0;
-  return getSection(symb->st_shndx);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolOffset(DataRefImpl Symb,
-                                          uint64_t &Result) const {
-  validateSymbol(Symb);
-  const Elf_Sym  *symb = getSymbol(Symb);
-  const Elf_Shdr *Section;
-  switch (getSymbolTableIndex(symb)) {
-  case ELF::SHN_COMMON:
-   // Undefined symbols have no address yet.
-  case ELF::SHN_UNDEF:
-    Result = UnknownAddressOrSize;
-    return object_error::success;
-  case ELF::SHN_ABS:
-    Result = symb->st_value;
-    return object_error::success;
-  default: Section = getSection(symb);
-  }
-
-  switch (symb->getType()) {
-  case ELF::STT_SECTION:
-    Result = Section ? Section->sh_addr : UnknownAddressOrSize;
-    return object_error::success;
-  case ELF::STT_FUNC:
-  case ELF::STT_OBJECT:
-  case ELF::STT_NOTYPE:
-    Result = symb->st_value;
-    return object_error::success;
-  default:
-    Result = UnknownAddressOrSize;
-    return object_error::success;
-  }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolAddress(DataRefImpl Symb,
-                                           uint64_t &Result) const {
-  validateSymbol(Symb);
-  const Elf_Sym  *symb = getSymbol(Symb);
-  const Elf_Shdr *Section;
-  switch (getSymbolTableIndex(symb)) {
-  case ELF::SHN_COMMON: // Fall through.
-   // Undefined symbols have no address yet.
-  case ELF::SHN_UNDEF:
-    Result = UnknownAddressOrSize;
-    return object_error::success;
-  case ELF::SHN_ABS:
-    Result = reinterpret_cast<uintptr_t>(base()+symb->st_value);
-    return object_error::success;
-  default: Section = getSection(symb);
+  std::pair<unsigned char, unsigned char>
+  getElfArchType(MemoryBuffer *Object) {
+    if (Object->getBufferSize() < ELF::EI_NIDENT)
+      return std::make_pair((uint8_t)ELF::ELFCLASSNONE,(uint8_t)ELF::ELFDATANONE);
+    return std::make_pair( (uint8_t)Object->getBufferStart()[ELF::EI_CLASS]
+                         , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]);
   }
-  const uint8_t* addr = base();
-  if (Section)
-    addr += Section->sh_offset;
-  switch (symb->getType()) {
-  case ELF::STT_SECTION:
-    Result = reinterpret_cast<uintptr_t>(addr);
-    return object_error::success;
-  case ELF::STT_FUNC: // Fall through.
-  case ELF::STT_OBJECT: // Fall through.
-  case ELF::STT_NOTYPE:
-    addr += symb->st_value;
-    Result = reinterpret_cast<uintptr_t>(addr);
-    return object_error::success;
-  default:
-    Result = UnknownAddressOrSize;
-    return object_error::success;
-  }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolSize(DataRefImpl Symb,
-                                        uint64_t &Result) const {
-  validateSymbol(Symb);
-  const Elf_Sym  *symb = getSymbol(Symb);
-  if (symb->st_size == 0)
-    Result = UnknownAddressOrSize;
-  Result = symb->st_size;
-  return object_error::success;
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolNMTypeChar(DataRefImpl Symb,
-                                              char &Result) const {
-  validateSymbol(Symb);
-  const Elf_Sym  *symb = getSymbol(Symb);
-  const Elf_Shdr *Section = getSection(symb);
-
-  char ret = '?';
+// Creates an in-memory object-file by default: createELFObjectFile(Buffer)
+ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
+  std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
+  error_code ec;
 
-  if (Section) {
-    switch (Section->sh_type) {
-    case ELF::SHT_PROGBITS:
-    case ELF::SHT_DYNAMIC:
-      switch (Section->sh_flags) {
-      case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
-        ret = 't'; break;
-      case (ELF::SHF_ALLOC | ELF::SHF_WRITE):
-        ret = 'd'; break;
-      case ELF::SHF_ALLOC:
-      case (ELF::SHF_ALLOC | ELF::SHF_MERGE):
-      case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS):
-        ret = 'r'; break;
-      }
-      break;
-    case ELF::SHT_NOBITS: ret = 'b';
-    }
+  if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
+    return new ELFObjectFile<support::little, false>(Object, ec);
+  else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
+    return new ELFObjectFile<support::big, false>(Object, ec);
+  else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
+    return new ELFObjectFile<support::big, true>(Object, ec);
+  else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) {
+    ELFObjectFile<support::little, true> *result =
+          new ELFObjectFile<support::little, true>(Object, ec);
+    return result;
   }
 
-  switch (getSymbolTableIndex(symb)) {
-  case ELF::SHN_UNDEF:
-    if (ret == '?')
-      ret = 'U';
-    break;
-  case ELF::SHN_ABS: ret = 'a'; break;
-  case ELF::SHN_COMMON: ret = 'c'; break;
-  }
-
-  switch (symb->getBinding()) {
-  case ELF::STB_GLOBAL: ret = ::toupper(ret); break;
-  case ELF::STB_WEAK:
-    if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF)
-      ret = 'w';
-    else
-      if (symb->getType() == ELF::STT_OBJECT)
-        ret = 'V';
-      else
-        ret = 'W';
-  }
-
-  if (ret == '?' && symb->getType() == ELF::STT_SECTION) {
-    StringRef name;
-    if (error_code ec = getSymbolName(Symb, name))
-      return ec;
-    Result = StringSwitch<char>(name)
-      .StartsWith(".debug", 'N')
-      .StartsWith(".note", 'n')
-      .Default('?');
-    return object_error::success;
-  }
-
-  Result = ret;
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolType(DataRefImpl Symb,
-                                        SymbolRef::SymbolType &Result) const {
-  validateSymbol(Symb);
-  const Elf_Sym  *symb = getSymbol(Symb);
-
-  if (getSymbolTableIndex(symb) == ELF::SHN_UNDEF) {
-    Result = SymbolRef::ST_External;
-    return object_error::success;
-  }
-
-  switch (symb->getType()) {
-  case ELF::STT_FUNC:
-    Result = SymbolRef::ST_Function;
-    break;
-  case ELF::STT_OBJECT:
-    Result = SymbolRef::ST_Data;
-    break;
-  default:
-    Result = SymbolRef::ST_Other;
-    break;
-  }
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSymbolGlobal(DataRefImpl Symb,
-                                        bool &Result) const {
-  validateSymbol(Symb);
-  const Elf_Sym  *symb = getSymbol(Symb);
-
-  Result = symb->getBinding() == ELF::STB_GLOBAL;
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSymbolInternal(DataRefImpl Symb,
-                                           bool &Result) const {
-  validateSymbol(Symb);
-  const Elf_Sym  *symb = getSymbol(Symb);
-
-  if (  symb->getType() == ELF::STT_FILE
-     || symb->getType() == ELF::STT_SECTION)
-    Result = true;
-  Result = false;
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const {
-  const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
-  sec += Header->e_shentsize;
-  Sec.p = reinterpret_cast<intptr_t>(sec);
-  Result = SectionRef(Sec, this);
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionName(DataRefImpl Sec,
-                                         StringRef &Result) const {
-  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name));
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionAddress(DataRefImpl Sec,
-                                            uint64_t &Result) const {
-  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  Result = sec->sh_addr;
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionSize(DataRefImpl Sec,
-                                         uint64_t &Result) const {
-  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  Result = sec->sh_size;
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionContents(DataRefImpl Sec,
-                                             StringRef &Result) const {
-  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  const char *start = (const char*)base() + sec->sh_offset;
-  Result = StringRef(start, sec->sh_size);
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionAlignment(DataRefImpl Sec,
-                                              uint64_t &Result) const {
-  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  Result = sec->sh_addralign;
-  return object_error::success;
+  report_fatal_error("Buffer is not an ELF object file!");
 }
 
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSectionText(DataRefImpl Sec,
-                                        bool &Result) const {
-  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  if (sec->sh_flags & ELF::SHF_EXECINSTR)
-    Result = true;
-  else
-    Result = false;
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSectionData(DataRefImpl Sec,
-                                        bool &Result) const {
-  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)
-      && sec->sh_type == ELF::SHT_PROGBITS)
-    Result = true;
-  else
-    Result = false;
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::isSectionBSS(DataRefImpl Sec,
-                                       bool &Result) const {
-  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  if (sec->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE)
-      && sec->sh_type == ELF::SHT_NOBITS)
-    Result = true;
-  else
-    Result = false;
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                          ::sectionContainsSymbol(DataRefImpl Sec,
-                                                  DataRefImpl Symb,
-                                                  bool &Result) const {
-  // FIXME: Unimplemented.
-  Result = false;
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-relocation_iterator ELFObjectFile<target_endianness, is64Bits>
-                                 ::getSectionRelBegin(DataRefImpl Sec) const {
-  DataRefImpl RelData;
-  memset(&RelData, 0, sizeof(RelData));
-  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec);
-  if (sec != 0 && ittr != SectionRelocMap.end()) {
-    RelData.w.a = getSection(ittr->second[0])->sh_info;
-    RelData.w.b = ittr->second[0];
-    RelData.w.c = 0;
-  }
-  return relocation_iterator(RelocationRef(RelData, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-relocation_iterator ELFObjectFile<target_endianness, is64Bits>
-                                 ::getSectionRelEnd(DataRefImpl Sec) const {
-  DataRefImpl RelData;
-  memset(&RelData, 0, sizeof(RelData));
-  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  typename RelocMap_t::const_iterator ittr = SectionRelocMap.find(sec);
-  if (sec != 0 && ittr != SectionRelocMap.end()) {
-    // Get the index of the last relocation section for this section.
-    std::size_t relocsecindex = ittr->second[ittr->second.size() - 1];
-    const Elf_Shdr *relocsec = getSection(relocsecindex);
-    RelData.w.a = relocsec->sh_info;
-    RelData.w.b = relocsecindex;
-    RelData.w.c = relocsec->sh_size / relocsec->sh_entsize;
-  }
-  return relocation_iterator(RelocationRef(RelData, this));
-}
-
-// Relocations
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationNext(DataRefImpl Rel,
-                                            RelocationRef &Result) const {
-  ++Rel.w.c;
-  const Elf_Shdr *relocsec = getSection(Rel.w.b);
-  if (Rel.w.c >= (relocsec->sh_size / relocsec->sh_entsize)) {
-    // We have reached the end of the relocations for this section. See if there
-    // is another relocation section.
-    typename RelocMap_t::mapped_type relocseclist =
-      SectionRelocMap.lookup(getSection(Rel.w.a));
-
-    // Do a binary search for the current reloc section index (which must be
-    // present). Then get the next one.
-    typename RelocMap_t::mapped_type::const_iterator loc =
-      std::lower_bound(relocseclist.begin(), relocseclist.end(), Rel.w.b);
-    ++loc;
-
-    // If there is no next one, don't do anything. The ++Rel.w.c above sets Rel
-    // to the end iterator.
-    if (loc != relocseclist.end()) {
-      Rel.w.b = *loc;
-      Rel.w.a = 0;
-    }
-  }
-  Result = RelocationRef(Rel, this);
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationSymbol(DataRefImpl Rel,
-                                              SymbolRef &Result) const {
-  uint32_t symbolIdx;
-  const Elf_Shdr *sec = getSection(Rel.w.b);
-  switch (sec->sh_type) {
-    default :
-      report_fatal_error("Invalid section type in Rel!");
-    case ELF::SHT_REL : {
-      symbolIdx = getRel(Rel)->getSymbol();
-      break;
-    }
-    case ELF::SHT_RELA : {
-      symbolIdx = getRela(Rel)->getSymbol();
-      break;
-    }
-  }
-  DataRefImpl SymbolData;
-  IndexMap_t::const_iterator it = SymbolTableSectionsIndexMap.find(sec->sh_link);
-  if (it == SymbolTableSectionsIndexMap.end())
-    report_fatal_error("Relocation symbol table not found!");
-  SymbolData.d.a = symbolIdx;
-  SymbolData.d.b = it->second;
-  Result = SymbolRef(SymbolData, this);
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationAddress(DataRefImpl Rel,
-                                               uint64_t &Result) const {
-  uint64_t offset;
-  const Elf_Shdr *sec = getSection(Rel.w.b);
-  switch (sec->sh_type) {
-    default :
-      report_fatal_error("Invalid section type in Rel!");
-    case ELF::SHT_REL : {
-      offset = getRel(Rel)->r_offset;
-      break;
-    }
-    case ELF::SHT_RELA : {
-      offset = getRela(Rel)->r_offset;
-      break;
-    }
-  }
-
-  Result = offset;
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationType(DataRefImpl Rel,
-                                            uint32_t &Result) const {
-  const Elf_Shdr *sec = getSection(Rel.w.b);
-  switch (sec->sh_type) {
-    default :
-      report_fatal_error("Invalid section type in Rel!");
-    case ELF::SHT_REL : {
-      Result = getRel(Rel)->getType();
-      break;
-    }
-    case ELF::SHT_RELA : {
-      Result = getRela(Rel)->getType();
-      break;
-    }
-  }
-  return object_error::success;
-}
-
-#define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \
-  case ELF::enum: res = #enum; break;
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationTypeName(DataRefImpl Rel,
-                                          SmallVectorImpl<char> &Result) const {
-  const Elf_Shdr *sec = getSection(Rel.w.b);
-  uint8_t type;
-  StringRef res;
-  switch (sec->sh_type) {
-    default :
-      return object_error::parse_failed;
-    case ELF::SHT_REL : {
-      type = getRel(Rel)->getType();
-      break;
-    }
-    case ELF::SHT_RELA : {
-      type = getRela(Rel)->getType();
-      break;
-    }
-  }
-  switch (Header->e_machine) {
-  case ELF::EM_X86_64:
-    switch (type) {
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_NONE);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_64);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLT32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_COPY);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GLOB_DAT);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_JUMP_SLOT);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_RELATIVE);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32S);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_16);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC16);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_8);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC8);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPMOD64);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF64);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF64);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSGD);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSLD);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTTPOFF);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC64);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTOFF64);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE64);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32_TLSDESC);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC_CALL);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC);
-    default:
-      res = "Unknown";
-    }
-    break;
-  case ELF::EM_386:
-    switch (type) {
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_NONE);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOT32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PLT32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_COPY);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GLOB_DAT);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_JUMP_SLOT);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_RELATIVE);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTOFF);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTPC);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32PLT);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTIE);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_16);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC16);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_8);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC8);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_PUSH);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_CALL);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_POP);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_PUSH);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_CALL);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_POP);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDO_32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE_32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE_32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPMOD32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPOFF32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF32);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTDESC);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC);
-      LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE);
-    default:
-      res = "Unknown";
-    }
-    break;
-  default:
-    res = "Unknown";
-  }
-  Result.append(res.begin(), res.end());
-  return object_error::success;
-}
-
-#undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationAdditionalInfo(DataRefImpl Rel,
-                                                      int64_t &Result) const {
-  const Elf_Shdr *sec = getSection(Rel.w.b);
-  switch (sec->sh_type) {
-    default :
-      report_fatal_error("Invalid section type in Rel!");
-    case ELF::SHT_REL : {
-      Result = 0;
-      return object_error::success;
-    }
-    case ELF::SHT_RELA : {
-      Result = getRela(Rel)->r_addend;
-      return object_error::success;
-    }
-  }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getRelocationValueString(DataRefImpl Rel,
-                                          SmallVectorImpl<char> &Result) const {
-  const Elf_Shdr *sec = getSection(Rel.w.b);
-  uint8_t type;
-  StringRef res;
-  int64_t addend = 0;
-  uint16_t symbol_index = 0;
-  switch (sec->sh_type) {
-    default :
-      return object_error::parse_failed;
-    case ELF::SHT_REL : {
-      type = getRel(Rel)->getType();
-      symbol_index = getRel(Rel)->getSymbol();
-      // TODO: Read implicit addend from section data.
-      break;
-    }
-    case ELF::SHT_RELA : {
-      type = getRela(Rel)->getType();
-      symbol_index = getRela(Rel)->getSymbol();
-      addend = getRela(Rel)->r_addend;
-      break;
-    }
-  }
-  const Elf_Sym *symb = getEntry<Elf_Sym>(sec->sh_link, symbol_index);
-  StringRef symname;
-  if (error_code ec = getSymbolName(symb, symname))
-    return ec;
-  switch (Header->e_machine) {
-  case ELF::EM_X86_64:
-    switch (type) {
-    case ELF::R_X86_64_32S:
-      res = symname;
-      break;
-    case ELF::R_X86_64_PC32: {
-        std::string fmtbuf;
-        raw_string_ostream fmt(fmtbuf);
-        fmt << symname << (addend < 0 ? "" : "+") << addend << "-P";
-        fmt.flush();
-        Result.append(fmtbuf.begin(), fmtbuf.end());
-      }
-      break;
-    default:
-      res = "Unknown";
-    }
-    break;
-  default:
-    res = "Unknown";
-  }
-  if (Result.empty())
-    Result.append(res.begin(), res.end());
-  return object_error::success;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
-                                                          , error_code &ec)
-  : ObjectFile(Binary::isELF, Object, ec)
-  , SectionHeaderTable(0)
-  , dot_shstrtab_sec(0)
-  , dot_strtab_sec(0) {
-  Header = reinterpret_cast<const Elf_Ehdr *>(base());
-
-  if (Header->e_shoff == 0)
-    return;
-
-  SectionHeaderTable =
-    reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff);
-  uint64_t SectionTableSize = getNumSections() * Header->e_shentsize;
-  if (!(  (const uint8_t *)SectionHeaderTable + SectionTableSize
-         <= base() + Data->getBufferSize()))
-    // FIXME: Proper error handling.
-    report_fatal_error("Section table goes past end of file!");
-
-
-  // To find the symbol tables we walk the section table to find SHT_SYMTAB.
-  const Elf_Shdr* SymbolTableSectionHeaderIndex = 0;
-  const Elf_Shdr* sh = reinterpret_cast<const Elf_Shdr*>(SectionHeaderTable);
-  for (uint64_t i = 0, e = getNumSections(); i != e; ++i) {
-    if (sh->sh_type == ELF::SHT_SYMTAB_SHNDX) {
-      if (SymbolTableSectionHeaderIndex)
-        // FIXME: Proper error handling.
-        report_fatal_error("More than one .symtab_shndx!");
-      SymbolTableSectionHeaderIndex = sh;
-    }
-    if (sh->sh_type == ELF::SHT_SYMTAB) {
-      SymbolTableSectionsIndexMap[i] = SymbolTableSections.size();
-      SymbolTableSections.push_back(sh);
-    }
-    if (sh->sh_type == ELF::SHT_REL || sh->sh_type == ELF::SHT_RELA) {
-      SectionRelocMap[getSection(sh->sh_info)].push_back(i);
-    }
-    ++sh;
-  }
-
-  // Sort section relocation lists by index.
-  for (typename RelocMap_t::iterator i = SectionRelocMap.begin(),
-                                     e = SectionRelocMap.end(); i != e; ++i) {
-    std::sort(i->second.begin(), i->second.end());
-  }
-
-  // Get string table sections.
-  dot_shstrtab_sec = getSection(getStringTableIndex());
-  if (dot_shstrtab_sec) {
-    // Verify that the last byte in the string table in a null.
-    if (((const char*)base() + dot_shstrtab_sec->sh_offset)
-        [dot_shstrtab_sec->sh_size - 1] != 0)
-      // FIXME: Proper error handling.
-      report_fatal_error("String table must end with a null terminator!");
-  }
-
-  // Merge this into the above loop.
-  for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
-                  *e = i + getNumSections() * Header->e_shentsize;
-                   i != e; i += Header->e_shentsize) {
-    const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
-    if (sh->sh_type == ELF::SHT_STRTAB) {
-      StringRef SectionName(getString(dot_shstrtab_sec, sh->sh_name));
-      if (SectionName == ".strtab") {
-        if (dot_strtab_sec != 0)
-          // FIXME: Proper error handling.
-          report_fatal_error("Already found section named .strtab!");
-        dot_strtab_sec = sh;
-        const char *dot_strtab = (const char*)base() + sh->sh_offset;
-          if (dot_strtab[sh->sh_size - 1] != 0)
-            // FIXME: Proper error handling.
-            report_fatal_error("String table must end with a null terminator!");
-      }
-    }
-  }
-
-  // Build symbol name side-mapping if there is one.
-  if (SymbolTableSectionHeaderIndex) {
-    const Elf_Word *ShndxTable = reinterpret_cast<const Elf_Word*>(base() +
-                                      SymbolTableSectionHeaderIndex->sh_offset);
-    error_code ec;
-    for (symbol_iterator si = begin_symbols(),
-                         se = end_symbols(); si != se; si.increment(ec)) {
-      if (ec)
-        report_fatal_error("Fewer extended symbol table entries than symbols!");
-      if (*ShndxTable != ELF::SHN_UNDEF)
-        ExtendedSymbolTable[getSymbol(si->getRawDataRefImpl())] = *ShndxTable;
-      ++ShndxTable;
-    }
-  }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-symbol_iterator ELFObjectFile<target_endianness, is64Bits>
-                             ::begin_symbols() const {
-  DataRefImpl SymbolData;
-  memset(&SymbolData, 0, sizeof(SymbolData));
-  if (SymbolTableSections.size() == 0) {
-    SymbolData.d.a = std::numeric_limits<uint32_t>::max();
-    SymbolData.d.b = std::numeric_limits<uint32_t>::max();
-  } else {
-    SymbolData.d.a = 1; // The 0th symbol in ELF is fake.
-    SymbolData.d.b = 0;
-  }
-  return symbol_iterator(SymbolRef(SymbolData, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-symbol_iterator ELFObjectFile<target_endianness, is64Bits>
-                             ::end_symbols() const {
-  DataRefImpl SymbolData;
-  memset(&SymbolData, 0, sizeof(SymbolData));
-  SymbolData.d.a = std::numeric_limits<uint32_t>::max();
-  SymbolData.d.b = std::numeric_limits<uint32_t>::max();
-  return symbol_iterator(SymbolRef(SymbolData, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-section_iterator ELFObjectFile<target_endianness, is64Bits>
-                              ::begin_sections() const {
-  DataRefImpl ret;
-  memset(&ret, 0, sizeof(DataRefImpl));
-  ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff);
-  return section_iterator(SectionRef(ret, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-section_iterator ELFObjectFile<target_endianness, is64Bits>
-                              ::end_sections() const {
-  DataRefImpl ret;
-  memset(&ret, 0, sizeof(DataRefImpl));
-  ret.p = reinterpret_cast<intptr_t>(base()
-                                     + Header->e_shoff
-                                     + (Header->e_shentsize*getNumSections()));
-  return section_iterator(SectionRef(ret, this));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-uint8_t ELFObjectFile<target_endianness, is64Bits>::getBytesInAddress() const {
-  return is64Bits ? 8 : 4;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-StringRef ELFObjectFile<target_endianness, is64Bits>
-                       ::getFileFormatName() const {
-  switch(Header->e_ident[ELF::EI_CLASS]) {
-  case ELF::ELFCLASS32:
-    switch(Header->e_machine) {
-    case ELF::EM_386:
-      return "ELF32-i386";
-    case ELF::EM_X86_64:
-      return "ELF32-x86-64";
-    case ELF::EM_ARM:
-      return "ELF32-arm";
-    default:
-      return "ELF32-unknown";
-    }
-  case ELF::ELFCLASS64:
-    switch(Header->e_machine) {
-    case ELF::EM_386:
-      return "ELF64-i386";
-    case ELF::EM_X86_64:
-      return "ELF64-x86-64";
-    default:
-      return "ELF64-unknown";
-    }
-  default:
-    // FIXME: Proper error handling.
-    report_fatal_error("Invalid ELFCLASS!");
-  }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
-  switch(Header->e_machine) {
-  case ELF::EM_386:
-    return Triple::x86;
-  case ELF::EM_X86_64:
-    return Triple::x86_64;
-  case ELF::EM_ARM:
-    return Triple::arm;
-  default:
-    return Triple::UnknownArch;
-  }
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>::getNumSections() const {
-  if (Header->e_shnum == ELF::SHN_UNDEF)
-    return SectionHeaderTable->sh_size;
-  return Header->e_shnum;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-uint64_t
-ELFObjectFile<target_endianness, is64Bits>::getStringTableIndex() const {
-  if (Header->e_shnum == ELF::SHN_UNDEF) {
-    if (Header->e_shstrndx == ELF::SHN_HIRESERVE)
-      return SectionHeaderTable->sh_link;
-    if (Header->e_shstrndx >= getNumSections())
-      return 0;
-  }
-  return Header->e_shstrndx;
-}
-
-
-template<support::endianness target_endianness, bool is64Bits>
-template<typename T>
-inline const T *
-ELFObjectFile<target_endianness, is64Bits>::getEntry(uint16_t Section,
-                                                     uint32_t Entry) const {
-  return getEntry<T>(getSection(Section), Entry);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-template<typename T>
-inline const T *
-ELFObjectFile<target_endianness, is64Bits>::getEntry(const Elf_Shdr * Section,
-                                                     uint32_t Entry) const {
-  return reinterpret_cast<const T *>(
-           base()
-           + Section->sh_offset
-           + (Entry * Section->sh_entsize));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
-ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
-  return getEntry<Elf_Sym>(SymbolTableSections[Symb.d.b], Symb.d.a);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rel *
-ELFObjectFile<target_endianness, is64Bits>::getRel(DataRefImpl Rel) const {
-  return getEntry<Elf_Rel>(Rel.w.b, Rel.w.c);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Rela *
-ELFObjectFile<target_endianness, is64Bits>::getRela(DataRefImpl Rela) const {
-  return getEntry<Elf_Rela>(Rela.w.b, Rela.w.c);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
-  const Elf_Shdr *sec = getSection(Symb.d.b);
-  if (sec->sh_type != ELF::SHT_SYMTAB || sec->sh_type != ELF::SHT_DYNSYM)
-    // FIXME: Proper error handling.
-    report_fatal_error("Invalid symbol table section!");
-  return sec;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
-ELFObjectFile<target_endianness, is64Bits>::getSection(uint32_t index) const {
-  if (index == 0)
-    return 0;
-  if (!SectionHeaderTable || index >= getNumSections())
-    // FIXME: Proper error handling.
-    report_fatal_error("Invalid section index!");
-
-  return reinterpret_cast<const Elf_Shdr *>(
-         reinterpret_cast<const char *>(SectionHeaderTable)
-         + (index * Header->e_shentsize));
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const char *ELFObjectFile<target_endianness, is64Bits>
-                         ::getString(uint32_t section,
-                                     ELF::Elf32_Word offset) const {
-  return getString(getSection(section), offset);
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-const char *ELFObjectFile<target_endianness, is64Bits>
-                         ::getString(const Elf_Shdr *section,
-                                     ELF::Elf32_Word offset) const {
-  assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
-  if (offset >= section->sh_size)
-    // FIXME: Proper error handling.
-    report_fatal_error("Symbol name offset outside of string table!");
-  return (const char *)base() + section->sh_offset + offset;
-}
-
-template<support::endianness target_endianness, bool is64Bits>
-error_code ELFObjectFile<target_endianness, is64Bits>
-                        ::getSymbolName(const Elf_Sym *symb,
-                                        StringRef &Result) const {
-  if (symb->st_name == 0) {
-    const Elf_Shdr *section = getSection(symb);
-    if (!section)
-      Result = "";
-    else
-      Result = getString(dot_shstrtab_sec, section->sh_name);
-    return object_error::success;
-  }
-
-  // Use the default symbol table name section.
-  Result = getString(dot_strtab_sec, symb->st_name);
-  return object_error::success;
-}
-
-// EI_CLASS, EI_DATA.
-static std::pair<unsigned char, unsigned char>
-getElfArchType(MemoryBuffer *Object) {
-  if (Object->getBufferSize() < ELF::EI_NIDENT)
-    return std::make_pair((uint8_t)ELF::ELFCLASSNONE,(uint8_t)ELF::ELFDATANONE);
-  return std::make_pair( (uint8_t)Object->getBufferStart()[ELF::EI_CLASS]
-                       , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]);
-}
-
-namespace llvm {
-
-  ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
-    std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
-    error_code ec;
-    if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
-      return new ELFObjectFile<support::little, false>(Object, ec);
-    else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
-      return new ELFObjectFile<support::big, false>(Object, ec);
-    else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB)
-      return new ELFObjectFile<support::little, true>(Object, ec);
-    else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
-      return new ELFObjectFile<support::big, true>(Object, ec);
-    // FIXME: Proper error handling.
-    report_fatal_error("Not an ELF object file!");
-  }
-
 } // end namespace llvm
diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt
new file mode 100644
index 000000000000..69610f991fdc
--- /dev/null
+++ b/lib/Object/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Object/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Object
+parent = Libraries
+required_libraries = Support
diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp
index 9cdac8681ddd..b7e5cdcd6b84 100644
--- a/lib/Object/MachOObject.cpp
+++ b/lib/Object/MachOObject.cpp
@@ -10,11 +10,12 @@
 #include "llvm/Object/MachOObject.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Host.h"
-#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/SwapByteOrder.h"
 
 using namespace llvm;
 using namespace llvm::object;
@@ -359,25 +360,13 @@ void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
 
 void MachOObject::ReadULEB128s(uint64_t Index,
                                SmallVectorImpl<uint64_t> &Out) const {
-  const char *ptr = Buffer->getBufferStart() + Index;
+  DataExtractor extractor(Buffer->getBuffer(), true, 0);
+
+  uint32_t offset = Index;
   uint64_t data = 0;
-  uint64_t delta = 0;
-  uint32_t shift = 0;
-  while (true) {
-    assert(ptr < Buffer->getBufferEnd() && "index out of bounds");
-    assert(shift < 64 && "too big for uint64_t");
-
-    uint8_t byte = *ptr++;
-    delta |= ((byte & 0x7F) << shift);
-    shift += 7;
-    if (byte < 0x80) {
-      if (delta == 0)
-        break;
-      data += delta;
-      Out.push_back(data);
-      delta = 0;
-      shift = 0;
-    }
+  while (uint64_t delta = extractor.getULEB128(&offset)) {
+    data += delta;
+    Out.push_back(data);
   }
 }
 
@@ -393,7 +382,7 @@ void MachOObject::printHeader(raw_ostream &O) const {
   O << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
   O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
   O << "('flag', " << Header.Flags << ")\n";
-  
+
   // Print extended header if 64-bit.
   if (is64Bit())
     O << "('reserved', " << Header64Ext.Reserved << ")\n";
@@ -403,6 +392,6 @@ void MachOObject::print(raw_ostream &O) const {
   O << "Header:\n";
   printHeader(O);
   O << "Load Commands:\n";
-  
+
   O << "Buffer:\n";
 }
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 507df5865eb1..3bcda1700b8c 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 #include <cctype>
@@ -29,11 +30,10 @@ namespace object {
 
 MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO,
                                  error_code &ec)
-    : ObjectFile(Binary::isMachO, Object, ec),
+    : ObjectFile(Binary::ID_MachO, Object, ec),
       MachOObj(MOO),
       RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {
   DataRefImpl DRI;
-  DRI.d.a = DRI.d.b = 0;
   moveToNextSection(DRI);
   uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
   while (DRI.d.a < LoadCommandCount) {
@@ -124,23 +124,27 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
   return object_error::success;
 }
 
-error_code MachOObjectFile::getSymbolOffset(DataRefImpl DRI,
-                                             uint64_t &Result) const {
-  uint64_t SectionOffset;
-  uint8_t SectionIndex;
+error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl DRI,
+                                                uint64_t &Result) const {
   if (MachOObj->is64Bit()) {
     InMemoryStruct<macho::Symbol64TableEntry> Entry;
     getSymbol64TableEntry(DRI, Entry);
     Result = Entry->Value;
-    SectionIndex = Entry->SectionIndex;
+    if (Entry->SectionIndex) {
+      InMemoryStruct<macho::Section64> Section;
+      getSection64(Sections[Entry->SectionIndex-1], Section);
+      Result += Section->Offset - Section->Address;
+    }
   } else {
     InMemoryStruct<macho::SymbolTableEntry> Entry;
     getSymbolTableEntry(DRI, Entry);
     Result = Entry->Value;
-    SectionIndex = Entry->SectionIndex;
+    if (Entry->SectionIndex) {
+      InMemoryStruct<macho::Section> Section;
+      getSection(Sections[Entry->SectionIndex-1], Section);
+      Result += Section->Offset - Section->Address;
+    }
   }
-  getSectionAddress(Sections[SectionIndex-1], SectionOffset);
-  Result -= SectionOffset;
 
   return object_error::success;
 }
@@ -161,7 +165,74 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
 
 error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
                                           uint64_t &Result) const {
-  Result = UnknownAddressOrSize;
+  uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands;
+  uint64_t BeginOffset;
+  uint64_t EndOffset = 0;
+  uint8_t SectionIndex;
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    BeginOffset = Entry->Value;
+    SectionIndex = Entry->SectionIndex;
+    if (!SectionIndex) {
+      uint32_t flags = SymbolRef::SF_None;
+      getSymbolFlags(DRI, flags);
+      if (flags & SymbolRef::SF_Common)
+        Result = Entry->Value;
+      else
+        Result = UnknownAddressOrSize;
+      return object_error::success;
+    }
+    // Unfortunately symbols are unsorted so we need to touch all
+    // symbols from load command
+    DRI.d.b = 0;
+    uint32_t Command = DRI.d.a;
+    while (Command == DRI.d.a) {
+      moveToNextSymbol(DRI);
+      if (DRI.d.a < LoadCommandCount) {
+        getSymbol64TableEntry(DRI, Entry);
+        if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
+          if (!EndOffset || Entry->Value < EndOffset)
+            EndOffset = Entry->Value;
+      }
+      DRI.d.b++;
+    }
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    BeginOffset = Entry->Value;
+    SectionIndex = Entry->SectionIndex;
+    if (!SectionIndex) {
+      uint32_t flags = SymbolRef::SF_None;
+      getSymbolFlags(DRI, flags);
+      if (flags & SymbolRef::SF_Common)
+        Result = Entry->Value;
+      else
+        Result = UnknownAddressOrSize;
+      return object_error::success;
+    }
+    // Unfortunately symbols are unsorted so we need to touch all
+    // symbols from load command
+    DRI.d.b = 0;
+    uint32_t Command = DRI.d.a;
+    while (Command == DRI.d.a) {
+      moveToNextSymbol(DRI);
+      if (DRI.d.a < LoadCommandCount) {
+        getSymbolTableEntry(DRI, Entry);
+        if (Entry->SectionIndex == SectionIndex && Entry->Value > BeginOffset)
+          if (!EndOffset || Entry->Value < EndOffset)
+            EndOffset = Entry->Value;
+      }
+      DRI.d.b++;
+    }
+  }
+  if (!EndOffset) {
+    uint64_t Size;
+    getSectionSize(Sections[SectionIndex-1], Size);
+    getSectionAddress(Sections[SectionIndex-1], EndOffset);
+    EndOffset += Size;
+  }
+  Result = EndOffset - BeginOffset;
   return object_error::success;
 }
 
@@ -200,36 +271,69 @@ error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSymbolInternal(DataRefImpl DRI,
-                                             bool &Result) const {
+error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI,
+                                           uint32_t &Result) const {
+  uint16_t MachOFlags;
+  uint8_t MachOType;
   if (MachOObj->is64Bit()) {
     InMemoryStruct<macho::Symbol64TableEntry> Entry;
     getSymbol64TableEntry(DRI, Entry);
-    Result = Entry->Flags & macho::STF_StabsEntryMask;
+    MachOFlags = Entry->Flags;
+    MachOType = Entry->Type;
   } else {
     InMemoryStruct<macho::SymbolTableEntry> Entry;
     getSymbolTableEntry(DRI, Entry);
-    Result = Entry->Flags & macho::STF_StabsEntryMask;
+    MachOFlags = Entry->Flags;
+    MachOType = Entry->Type;
   }
+
+  // TODO: Correctly set SF_ThreadLocal
+  Result = SymbolRef::SF_None;
+
+  if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
+    Result |= SymbolRef::SF_Undefined;
+
+  if (MachOFlags & macho::STF_StabsEntryMask)
+    Result |= SymbolRef::SF_FormatSpecific;
+
+  if (MachOType & MachO::NlistMaskExternal) {
+    Result |= SymbolRef::SF_Global;
+    if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined)
+      Result |= SymbolRef::SF_Common;
+  }
+
+  if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef))
+    Result |= SymbolRef::SF_Weak;
+
+  if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeAbsolute)
+    Result |= SymbolRef::SF_Absolute;
+
   return object_error::success;
 }
 
-error_code MachOObjectFile::isSymbolGlobal(DataRefImpl Symb, bool &Res) const {
-
+error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
+                                             section_iterator &Res) const {
+  uint8_t index;
   if (MachOObj->is64Bit()) {
     InMemoryStruct<macho::Symbol64TableEntry> Entry;
     getSymbol64TableEntry(Symb, Entry);
-    Res = Entry->Type & MachO::NlistMaskExternal;
+    index = Entry->SectionIndex;
   } else {
     InMemoryStruct<macho::SymbolTableEntry> Entry;
     getSymbolTableEntry(Symb, Entry);
-    Res = Entry->Type & MachO::NlistMaskExternal;
+    index = Entry->SectionIndex;
   }
+
+  if (index == 0)
+    Res = end_sections();
+  else
+    Res = section_iterator(SectionRef(Sections[index-1], this));
+
   return object_error::success;
 }
 
 error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
-                                          SymbolRef::SymbolType &Res) const {
+                                          SymbolRef::Type &Res) const {
   uint8_t n_type;
   if (MachOObj->is64Bit()) {
     InMemoryStruct<macho::Symbol64TableEntry> Entry;
@@ -243,12 +347,14 @@ error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
   Res = SymbolRef::ST_Other;
 
   // If this is a STAB debugging symbol, we can do nothing more.
-  if (n_type & MachO::NlistMaskStab)
+  if (n_type & MachO::NlistMaskStab) {
+    Res = SymbolRef::ST_Debug;
     return object_error::success;
+  }
 
   switch (n_type & MachO::NlistMaskType) {
     case MachO::NListTypeUndefined :
-      Res = SymbolRef::ST_External;
+      Res = SymbolRef::ST_Unknown;
       break;
     case MachO::NListTypeSection :
       Res = SymbolRef::ST_Function;
@@ -261,7 +367,6 @@ error_code MachOObjectFile::getSymbolType(DataRefImpl Symb,
 symbol_iterator MachOObjectFile::begin_symbols() const {
   // DRI.d.a = segment number; DRI.d.b = symbol index.
   DataRefImpl DRI;
-  DRI.d.a = DRI.d.b = 0;
   moveToNextSymbol(DRI);
   return symbol_iterator(SymbolRef(DRI, this));
 }
@@ -269,10 +374,33 @@ symbol_iterator MachOObjectFile::begin_symbols() const {
 symbol_iterator MachOObjectFile::end_symbols() const {
   DataRefImpl DRI;
   DRI.d.a = MachOObj->getHeader().NumLoadCommands;
-  DRI.d.b = 0;
   return symbol_iterator(SymbolRef(DRI, this));
 }
 
+symbol_iterator MachOObjectFile::begin_dynamic_symbols() const {
+  // TODO: implement
+  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+symbol_iterator MachOObjectFile::end_dynamic_symbols() const {
+  // TODO: implement
+  report_fatal_error("Dynamic symbols unimplemented in MachOObjectFile");
+}
+
+library_iterator MachOObjectFile::begin_libraries_needed() const {
+  // TODO: implement
+  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+library_iterator MachOObjectFile::end_libraries_needed() const {
+  // TODO: implement
+  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+StringRef MachOObjectFile::getLoadName() const {
+  // TODO: Implement
+  report_fatal_error("get_load_name() unimplemented in MachOObjectFile");
+}
 
 /*===-- Sections ----------------------------------------------------------===*/
 
@@ -451,12 +579,43 @@ error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI,
   return object_error::success;
 }
 
+error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec,
+                                                          bool &Result) const {
+  // FIXME: Unimplemented
+  Result = true;
+  return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec,
+                                            bool &Result) const {
+  // FIXME: Unimplemented
+  Result = false;
+  return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionZeroInit(DataRefImpl DRI,
+                                              bool &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = (Sect->Flags & MachO::SectionTypeZeroFill ||
+              Sect->Flags & MachO::SectionTypeZeroFillLarge);
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = (Sect->Flags & MachO::SectionTypeZeroFill ||
+              Sect->Flags & MachO::SectionTypeZeroFillLarge);
+  }
+
+  return object_error::success;
+}
+
 error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
                                                   DataRefImpl Symb,
                                                   bool &Result) const {
-  SymbolRef::SymbolType ST;
+  SymbolRef::Type ST;
   getSymbolType(Symb, ST);
-  if (ST == SymbolRef::ST_External) {
+  if (ST == SymbolRef::ST_Unknown) {
     Result = false;
     return object_error::success;
   }
@@ -483,7 +642,6 @@ error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
 
 relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
   DataRefImpl ret;
-  ret.d.a = 0;
   ret.d.b = getSectionIndex(Sec);
   return relocation_iterator(RelocationRef(ret, this));
 }
@@ -506,7 +664,6 @@ relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
 
 section_iterator MachOObjectFile::begin_sections() const {
   DataRefImpl DRI;
-  DRI.d.a = DRI.d.b = 0;
   moveToNextSection(DRI);
   return section_iterator(SectionRef(DRI, this));
 }
@@ -514,7 +671,6 @@ section_iterator MachOObjectFile::begin_sections() const {
 section_iterator MachOObjectFile::end_sections() const {
   DataRefImpl DRI;
   DRI.d.a = MachOObj->getHeader().NumLoadCommands;
-  DRI.d.b = 0;
   return section_iterator(SectionRef(DRI, this));
 }
 
@@ -543,19 +699,43 @@ error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel,
 }
 error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel,
                                                  uint64_t &Res) const {
-  const uint8_t* sectAddress = base();
+  const uint8_t* sectAddress = 0;
   if (MachOObj->is64Bit()) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(Sections[Rel.d.b], Sect);
-    sectAddress += Sect->Offset;
+    sectAddress += Sect->Address;
   } else {
     InMemoryStruct<macho::Section> Sect;
     getSection(Sections[Rel.d.b], Sect);
-    sectAddress += Sect->Offset;
+    sectAddress += Sect->Address;
   }
   InMemoryStruct<macho::RelocationEntry> RE;
   getRelocation(Rel, RE);
-  Res = reinterpret_cast<uintptr_t>(sectAddress + RE->Word0);
+
+  unsigned Arch = getArch();
+  bool isScattered = (Arch != Triple::x86_64) &&
+                     (RE->Word0 & macho::RF_Scattered);
+  uint64_t RelAddr = 0;
+  if (isScattered)
+    RelAddr = RE->Word0 & 0xFFFFFF;
+  else
+    RelAddr = RE->Word0;
+
+  Res = reinterpret_cast<uintptr_t>(sectAddress + RelAddr);
+  return object_error::success;
+}
+error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
+                                                uint64_t &Res) const {
+  InMemoryStruct<macho::RelocationEntry> RE;
+  getRelocation(Rel, RE);
+
+  unsigned Arch = getArch();
+  bool isScattered = (Arch != Triple::x86_64) &&
+                     (RE->Word0 & macho::RF_Scattered);
+  if (isScattered)
+    Res = RE->Word0 & 0xFFFFFF;
+  else
+    Res = RE->Word0;
   return object_error::success;
 }
 error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
@@ -566,7 +746,6 @@ error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
   bool isExtern = (RE->Word1 >> 27) & 1;
 
   DataRefImpl Sym;
-  Sym.d.a = Sym.d.b = 0;
   moveToNextSymbol(Sym);
   if (isExtern) {
     for (unsigned i = 0; i < SymbolIdx; i++) {
@@ -580,14 +759,112 @@ error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel,
   return object_error::success;
 }
 error_code MachOObjectFile::getRelocationType(DataRefImpl Rel,
-                                              uint32_t &Res) const {
+                                              uint64_t &Res) const {
   InMemoryStruct<macho::RelocationEntry> RE;
   getRelocation(Rel, RE);
-  Res = RE->Word1;
+  Res = RE->Word0;
+  Res <<= 32;
+  Res |= RE->Word1;
   return object_error::success;
 }
 error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel,
                                           SmallVectorImpl<char> &Result) const {
+  // TODO: Support scattered relocations.
+  StringRef res;
+  InMemoryStruct<macho::RelocationEntry> RE;
+  getRelocation(Rel, RE);
+
+  unsigned Arch = getArch();
+  bool isScattered = (Arch != Triple::x86_64) &&
+                     (RE->Word0 & macho::RF_Scattered);
+
+  unsigned r_type;
+  if (isScattered)
+    r_type = (RE->Word0 >> 24) & 0xF;
+  else
+    r_type = (RE->Word1 >> 28) & 0xF;
+
+  switch (Arch) {
+    case Triple::x86: {
+      const char* Table[] =  {
+        "GENERIC_RELOC_VANILLA",
+        "GENERIC_RELOC_PAIR",
+        "GENERIC_RELOC_SECTDIFF",
+        "GENERIC_RELOC_PB_LA_PTR",
+        "GENERIC_RELOC_LOCAL_SECTDIFF",
+        "GENERIC_RELOC_TLV" };
+
+      if (r_type > 6)
+        res = "Unknown";
+      else
+        res = Table[r_type];
+      break;
+    }
+    case Triple::x86_64: {
+      const char* Table[] =  {
+        "X86_64_RELOC_UNSIGNED",
+        "X86_64_RELOC_SIGNED",
+        "X86_64_RELOC_BRANCH",
+        "X86_64_RELOC_GOT_LOAD",
+        "X86_64_RELOC_GOT",
+        "X86_64_RELOC_SUBTRACTOR",
+        "X86_64_RELOC_SIGNED_1",
+        "X86_64_RELOC_SIGNED_2",
+        "X86_64_RELOC_SIGNED_4",
+        "X86_64_RELOC_TLV" };
+
+      if (r_type > 9)
+        res = "Unknown";
+      else
+        res = Table[r_type];
+      break;
+    }
+    case Triple::arm: {
+      const char* Table[] =  {
+        "ARM_RELOC_VANILLA",
+        "ARM_RELOC_PAIR",
+        "ARM_RELOC_SECTDIFF",
+        "ARM_RELOC_LOCAL_SECTDIFF",
+        "ARM_RELOC_PB_LA_PTR",
+        "ARM_RELOC_BR24",
+        "ARM_THUMB_RELOC_BR22",
+        "ARM_THUMB_32BIT_BRANCH",
+        "ARM_RELOC_HALF",
+        "ARM_RELOC_HALF_SECTDIFF" };
+
+      if (r_type > 9)
+        res = "Unknown";
+      else
+        res = Table[r_type];
+      break;
+    }
+    case Triple::ppc: {
+      const char* Table[] =  {
+        "PPC_RELOC_VANILLA",
+        "PPC_RELOC_PAIR",
+        "PPC_RELOC_BR14",
+        "PPC_RELOC_BR24",
+        "PPC_RELOC_HI16",
+        "PPC_RELOC_LO16",
+        "PPC_RELOC_HA16",
+        "PPC_RELOC_LO14",
+        "PPC_RELOC_SECTDIFF",
+        "PPC_RELOC_PB_LA_PTR",
+        "PPC_RELOC_HI16_SECTDIFF",
+        "PPC_RELOC_LO16_SECTDIFF",
+        "PPC_RELOC_HA16_SECTDIFF",
+        "PPC_RELOC_JBSR",
+        "PPC_RELOC_LO14_SECTDIFF",
+        "PPC_RELOC_LOCAL_SECTDIFF" };
+
+      res = Table[r_type];
+      break;
+    }
+    case Triple::UnknownArch:
+      res = "Unknown";
+      break;
+  }
+  Result.append(res.begin(), res.end());
   return object_error::success;
 }
 error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
@@ -611,11 +888,356 @@ error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel,
   }
   return object_error::success;
 }
+
+// Helper to advance a section or symbol iterator multiple increments at a time.
+template<class T>
+error_code advance(T &it, size_t Val) {
+  error_code ec;
+  while (Val--) {
+    it.increment(ec);
+  }
+  return ec;
+}
+
+template<class T>
+void advanceTo(T &it, size_t Val) {
+  if (error_code ec = advance(it, Val))
+    report_fatal_error(ec.message());
+}
+
+void MachOObjectFile::printRelocationTargetName(
+                                     InMemoryStruct<macho::RelocationEntry>& RE,
+                                     raw_string_ostream &fmt) const {
+  unsigned Arch = getArch();
+  bool isScattered = (Arch != Triple::x86_64) &&
+                     (RE->Word0 & macho::RF_Scattered);
+
+  // Target of a scattered relocation is an address.  In the interest of
+  // generating pretty output, scan through the symbol table looking for a
+  // symbol that aligns with that address.  If we find one, print it.
+  // Otherwise, we just print the hex address of the target.
+  if (isScattered) {
+    uint32_t Val = RE->Word1;
+
+    error_code ec;
+    for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE;
+        SI.increment(ec)) {
+      if (ec) report_fatal_error(ec.message());
+
+      uint64_t Addr;
+      StringRef Name;
+
+      if ((ec = SI->getAddress(Addr)))
+        report_fatal_error(ec.message());
+      if (Addr != Val) continue;
+      if ((ec = SI->getName(Name)))
+        report_fatal_error(ec.message());
+      fmt << Name;
+      return;
+    }
+
+    // If we couldn't find a symbol that this relocation refers to, try
+    // to find a section beginning instead.
+    for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE;
+         SI.increment(ec)) {
+      if (ec) report_fatal_error(ec.message());
+
+      uint64_t Addr;
+      StringRef Name;
+
+      if ((ec = SI->getAddress(Addr)))
+        report_fatal_error(ec.message());
+      if (Addr != Val) continue;
+      if ((ec = SI->getName(Name)))
+        report_fatal_error(ec.message());
+      fmt << Name;
+      return;
+    }
+
+    fmt << format("0x%x", Val);
+    return;
+  }
+
+  StringRef S;
+  bool isExtern = (RE->Word1 >> 27) & 1;
+  uint32_t Val = RE->Word1 & 0xFFFFFF;
+
+  if (isExtern) {
+    symbol_iterator SI = begin_symbols();
+    advanceTo(SI, Val);
+    SI->getName(S);
+  } else {
+    section_iterator SI = begin_sections();
+    advanceTo(SI, Val);
+    SI->getName(S);
+  }
+
+  fmt << S;
+}
+
 error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
                                           SmallVectorImpl<char> &Result) const {
+  InMemoryStruct<macho::RelocationEntry> RE;
+  getRelocation(Rel, RE);
+
+  unsigned Arch = getArch();
+  bool isScattered = (Arch != Triple::x86_64) &&
+                     (RE->Word0 & macho::RF_Scattered);
+
+  std::string fmtbuf;
+  raw_string_ostream fmt(fmtbuf);
+
+  unsigned Type;
+  if (isScattered)
+    Type = (RE->Word0 >> 24) & 0xF;
+  else
+    Type = (RE->Word1 >> 28) & 0xF;
+
+  bool isPCRel;
+  if (isScattered)
+    isPCRel = ((RE->Word0 >> 30) & 1);
+  else
+    isPCRel = ((RE->Word1 >> 24) & 1);
+
+  // Determine any addends that should be displayed with the relocation.
+  // These require decoding the relocation type, which is triple-specific.
+
+  // X86_64 has entirely custom relocation types.
+  if (Arch == Triple::x86_64) {
+    bool isPCRel = ((RE->Word1 >> 24) & 1);
+
+    switch (Type) {
+      case macho::RIT_X86_64_GOTLoad:   // X86_64_RELOC_GOT_LOAD
+      case macho::RIT_X86_64_GOT: {     // X86_64_RELOC_GOT
+        printRelocationTargetName(RE, fmt);
+        fmt << "@GOT";
+        if (isPCRel) fmt << "PCREL";
+        break;
+      }
+      case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR
+        InMemoryStruct<macho::RelocationEntry> RENext;
+        DataRefImpl RelNext = Rel;
+        RelNext.d.a++;
+        getRelocation(RelNext, RENext);
+
+        // X86_64_SUBTRACTOR must be followed by a relocation of type
+        // X86_64_RELOC_UNSIGNED.
+        // NOTE: Scattered relocations don't exist on x86_64.
+        unsigned RType = (RENext->Word1 >> 28) & 0xF;
+        if (RType != 0)
+          report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
+                             "X86_64_RELOC_SUBTRACTOR.");
+
+        // The X86_64_RELOC_UNSIGNED contains the minuend symbol,
+        // X86_64_SUBTRACTOR contains to the subtrahend.
+        printRelocationTargetName(RENext, fmt);
+        fmt << "-";
+        printRelocationTargetName(RE, fmt);
+      }
+      case macho::RIT_X86_64_TLV:
+        printRelocationTargetName(RE, fmt);
+        fmt << "@TLV";
+        if (isPCRel) fmt << "P";
+        break;
+      case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1
+        printRelocationTargetName(RE, fmt);
+        fmt << "-1";
+        break;
+      case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2
+        printRelocationTargetName(RE, fmt);
+        fmt << "-2";
+        break;
+      case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4
+        printRelocationTargetName(RE, fmt);
+        fmt << "-4";
+        break;
+      default:
+        printRelocationTargetName(RE, fmt);
+        break;
+    }
+  // X86 and ARM share some relocation types in common.
+  } else if (Arch == Triple::x86 || Arch == Triple::arm) {
+    // Generic relocation types...
+    switch (Type) {
+      case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info
+        return object_error::success;
+      case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF
+        InMemoryStruct<macho::RelocationEntry> RENext;
+        DataRefImpl RelNext = Rel;
+        RelNext.d.a++;
+        getRelocation(RelNext, RENext);
+
+        // X86 sect diff's must be followed by a relocation of type
+        // GENERIC_RELOC_PAIR.
+        bool isNextScattered = (Arch != Triple::x86_64) &&
+                               (RENext->Word0 & macho::RF_Scattered);
+        unsigned RType;
+        if (isNextScattered)
+          RType = (RENext->Word0 >> 24) & 0xF;
+        else
+          RType = (RENext->Word1 >> 28) & 0xF;
+        if (RType != 1)
+          report_fatal_error("Expected GENERIC_RELOC_PAIR after "
+                             "GENERIC_RELOC_SECTDIFF.");
+
+        printRelocationTargetName(RE, fmt);
+        fmt << "-";
+        printRelocationTargetName(RENext, fmt);
+        break;
+      }
+    }
+
+    if (Arch == Triple::x86) {
+      // All X86 relocations that need special printing were already
+      // handled in the generic code.
+      switch (Type) {
+        case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF
+          InMemoryStruct<macho::RelocationEntry> RENext;
+          DataRefImpl RelNext = Rel;
+          RelNext.d.a++;
+          getRelocation(RelNext, RENext);
+
+          // X86 sect diff's must be followed by a relocation of type
+          // GENERIC_RELOC_PAIR.
+          bool isNextScattered = (Arch != Triple::x86_64) &&
+                               (RENext->Word0 & macho::RF_Scattered);
+          unsigned RType;
+          if (isNextScattered)
+            RType = (RENext->Word0 >> 24) & 0xF;
+          else
+            RType = (RENext->Word1 >> 28) & 0xF;
+          if (RType != 1)
+            report_fatal_error("Expected GENERIC_RELOC_PAIR after "
+                               "GENERIC_RELOC_LOCAL_SECTDIFF.");
+
+          printRelocationTargetName(RE, fmt);
+          fmt << "-";
+          printRelocationTargetName(RENext, fmt);
+          break;
+        }
+        case macho::RIT_Generic_TLV: {
+          printRelocationTargetName(RE, fmt);
+          fmt << "@TLV";
+          if (isPCRel) fmt << "P";
+          break;
+        }
+        default:
+          printRelocationTargetName(RE, fmt);
+      }
+    } else { // ARM-specific relocations
+      switch (Type) {
+        case macho::RIT_ARM_Half:             // ARM_RELOC_HALF
+        case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF
+          // Half relocations steal a bit from the length field to encode
+          // whether this is an upper16 or a lower16 relocation.
+          bool isUpper;
+          if (isScattered)
+            isUpper = (RE->Word0 >> 28) & 1;
+          else
+            isUpper = (RE->Word1 >> 25) & 1;
+
+          if (isUpper)
+            fmt << ":upper16:(";
+          else
+            fmt << ":lower16:(";
+          printRelocationTargetName(RE, fmt);
+
+          InMemoryStruct<macho::RelocationEntry> RENext;
+          DataRefImpl RelNext = Rel;
+          RelNext.d.a++;
+          getRelocation(RelNext, RENext);
+
+          // ARM half relocs must be followed by a relocation of type
+          // ARM_RELOC_PAIR.
+          bool isNextScattered = (Arch != Triple::x86_64) &&
+                                 (RENext->Word0 & macho::RF_Scattered);
+          unsigned RType;
+          if (isNextScattered)
+            RType = (RENext->Word0 >> 24) & 0xF;
+          else
+            RType = (RENext->Word1 >> 28) & 0xF;
+
+          if (RType != 1)
+            report_fatal_error("Expected ARM_RELOC_PAIR after "
+                               "GENERIC_RELOC_HALF");
+
+          // NOTE: The half of the target virtual address is stashed in the
+          // address field of the secondary relocation, but we can't reverse
+          // engineer the constant offset from it without decoding the movw/movt
+          // instruction to find the other half in its immediate field.
+
+          // ARM_RELOC_HALF_SECTDIFF encodes the second section in the
+          // symbol/section pointer of the follow-on relocation.
+          if (Type == macho::RIT_ARM_HalfDifference) {
+            fmt << "-";
+            printRelocationTargetName(RENext, fmt);
+          }
+
+          fmt << ")";
+          break;
+        }
+        default: {
+          printRelocationTargetName(RE, fmt);
+        }
+      }
+    }
+  } else
+    printRelocationTargetName(RE, fmt);
+
+  fmt.flush();
+  Result.append(fmtbuf.begin(), fmtbuf.end());
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel,
+                                                bool &Result) const {
+  InMemoryStruct<macho::RelocationEntry> RE;
+  getRelocation(Rel, RE);
+
+  unsigned Arch = getArch();
+  bool isScattered = (Arch != Triple::x86_64) &&
+                     (RE->Word0 & macho::RF_Scattered);
+  unsigned Type;
+  if (isScattered)
+    Type = (RE->Word0 >> 24) & 0xF;
+  else
+    Type = (RE->Word1 >> 28) & 0xF;
+
+  Result = false;
+
+  // On arches that use the generic relocations, GENERIC_RELOC_PAIR
+  // is always hidden.
+  if (Arch == Triple::x86 || Arch == Triple::arm) {
+    if (Type == macho::RIT_Pair) Result = true;
+  } else if (Arch == Triple::x86_64) {
+    // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows
+    // an X864_64_RELOC_SUBTRACTOR.
+    if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) {
+      DataRefImpl RelPrev = Rel;
+      RelPrev.d.a--;
+      InMemoryStruct<macho::RelocationEntry> REPrev;
+      getRelocation(RelPrev, REPrev);
+
+      unsigned PrevType = (REPrev->Word1 >> 28) & 0xF;
+
+      if (PrevType == macho::RIT_X86_64_Subtractor) Result = true;
+    }
+  }
+
   return object_error::success;
 }
 
+error_code MachOObjectFile::getLibraryNext(DataRefImpl LibData,
+                                           LibraryRef &Res) const {
+  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
+                                           StringRef &Res) const {
+  report_fatal_error("Needed libraries unimplemented in MachOObjectFile");
+}
+
+
 /*===-- Miscellaneous -----------------------------------------------------===*/
 
 uint8_t MachOObjectFile::getBytesInAddress() const {
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 2ea8db978670..f061ea7cebed 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -18,6 +18,7 @@
 using namespace llvm;
 using namespace object;
 
+// ObjectFile creation
 LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
   return wrap(ObjectFile::createObjectFile(unwrap(MemBuf)));
 }
@@ -26,6 +27,7 @@ void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) {
   delete unwrap(ObjectFile);
 }
 
+// ObjectFile Section iterators
 LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) {
   section_iterator SI = unwrap(ObjectFile)->begin_sections();
   return wrap(new section_iterator(SI));
@@ -46,6 +48,34 @@ void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
   if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message());
 }
 
+void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
+                                 LLVMSymbolIteratorRef Sym) {
+  if (error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect)))
+    report_fatal_error(ec.message());
+}
+
+// ObjectFile Symbol iterators
+LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile) {
+  symbol_iterator SI = unwrap(ObjectFile)->begin_symbols();
+  return wrap(new symbol_iterator(SI));
+}
+
+void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI) {
+  delete unwrap(SI);
+}
+
+LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile,
+                                LLVMSymbolIteratorRef SI) {
+  return (*unwrap(SI) == unwrap(ObjectFile)->end_symbols()) ? 1 : 0;
+}
+
+void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) {
+  error_code ec;
+  unwrap(SI)->increment(ec);
+  if (ec) report_fatal_error("LLVMMoveToNextSymbol failed: " + ec.message());
+}
+
+// SectionRef accessors
 const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
   StringRef ret;
   if (error_code ec = (*unwrap(SI))->getName(ret))
@@ -66,3 +96,123 @@ const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
     report_fatal_error(ec.message());
   return ret.data();
 }
+
+uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) {
+  uint64_t ret;
+  if (error_code ec = (*unwrap(SI))->getAddress(ret))
+    report_fatal_error(ec.message());
+  return ret;
+}
+
+LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI,
+                                 LLVMSymbolIteratorRef Sym) {
+  bool ret;
+  if (error_code ec = (*unwrap(SI))->containsSymbol(**unwrap(Sym), ret))
+    report_fatal_error(ec.message());
+  return ret;
+}
+
+// Section Relocation iterators
+LLVMRelocationIteratorRef LLVMGetRelocations(LLVMSectionIteratorRef Section) {
+  relocation_iterator SI = (*unwrap(Section))->begin_relocations();
+  return wrap(new relocation_iterator(SI));
+}
+
+void LLVMDisposeRelocationIterator(LLVMRelocationIteratorRef SI) {
+  delete unwrap(SI);
+}
+
+LLVMBool LLVMIsRelocationIteratorAtEnd(LLVMSectionIteratorRef Section,
+                                       LLVMRelocationIteratorRef SI) {
+  return (*unwrap(SI) == (*unwrap(Section))->end_relocations()) ? 1 : 0;
+}
+
+void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef SI) {
+  error_code ec;
+  unwrap(SI)->increment(ec);
+  if (ec) report_fatal_error("LLVMMoveToNextRelocation failed: " +
+                             ec.message());
+}
+
+
+// SymbolRef accessors
+const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) {
+  StringRef ret;
+  if (error_code ec = (*unwrap(SI))->getName(ret))
+    report_fatal_error(ec.message());
+  return ret.data();
+}
+
+uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
+  uint64_t ret;
+  if (error_code ec = (*unwrap(SI))->getAddress(ret))
+    report_fatal_error(ec.message());
+  return ret;
+}
+
+uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI) {
+  uint64_t ret;
+  if (error_code ec = (*unwrap(SI))->getFileOffset(ret))
+    report_fatal_error(ec.message());
+  return ret;
+}
+
+uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
+  uint64_t ret;
+  if (error_code ec = (*unwrap(SI))->getSize(ret))
+    report_fatal_error(ec.message());
+  return ret;
+}
+
+// RelocationRef accessors
+uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) {
+  uint64_t ret;
+  if (error_code ec = (*unwrap(RI))->getAddress(ret))
+    report_fatal_error(ec.message());
+  return ret;
+}
+
+uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) {
+  uint64_t ret;
+  if (error_code ec = (*unwrap(RI))->getOffset(ret))
+    report_fatal_error(ec.message());
+  return ret;
+}
+
+LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) {
+  SymbolRef ret;
+  if (error_code ec = (*unwrap(RI))->getSymbol(ret))
+    report_fatal_error(ec.message());
+
+  return wrap(new symbol_iterator(ret));
+}
+
+uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) {
+  uint64_t ret;
+  if (error_code ec = (*unwrap(RI))->getType(ret))
+    report_fatal_error(ec.message());
+  return ret;
+}
+
+// NOTE: Caller takes ownership of returned string.
+const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) {
+  SmallVector<char, 0> ret;
+  if (error_code ec = (*unwrap(RI))->getTypeName(ret))
+    report_fatal_error(ec.message());
+
+  char *str = static_cast<char*>(malloc(ret.size()));
+  std::copy(ret.begin(), ret.end(), str);
+  return str;
+}
+
+// NOTE: Caller takes ownership of returned string.
+const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) {
+  SmallVector<char, 0> ret;
+  if (error_code ec = (*unwrap(RI))->getValueString(ret))
+    report_fatal_error(ec.message());
+
+  char *str = static_cast<char*>(malloc(ret.size()));
+  std::copy(ret.begin(), ret.end(), str);
+  return str;
+}
+
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 69d8ed0e5e9a..b14df9af64f4 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -21,6 +21,8 @@
 using namespace llvm;
 using namespace object;
 
+void ObjectFile::anchor() { }
+
 ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
   : Binary(Type, source) {
 }
@@ -56,7 +58,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
 
 ObjectFile *ObjectFile::createObjectFile(StringRef ObjectPath) {
   OwningPtr<MemoryBuffer> File;
-  if (error_code ec = MemoryBuffer::getFile(ObjectPath, File))
+  if (MemoryBuffer::getFile(ObjectPath, File))
     return NULL;
   return createObjectFile(File.take());
 }
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index f2388944929b..409d4fbd0ae5 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -14,8 +14,9 @@
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <limits.h>
@@ -1150,9 +1151,6 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode,
   assert(lost_fraction != lfExactlyZero);
 
   switch (rounding_mode) {
-  default:
-    llvm_unreachable(0);
-
   case rmNearestTiesToAway:
     return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
 
@@ -1175,6 +1173,7 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode,
   case rmTowardNegative:
     return sign == true;
   }
+  llvm_unreachable("Invalid rounding mode found");
 }
 
 APFloat::opStatus
@@ -1854,20 +1853,33 @@ APFloat::convert(const fltSemantics &toSemantics,
   lostFraction lostFraction;
   unsigned int newPartCount, oldPartCount;
   opStatus fs;
+  int shift;
+  const fltSemantics &fromSemantics = *semantics;
 
-  assertArithmeticOK(*semantics);
+  assertArithmeticOK(fromSemantics);
   assertArithmeticOK(toSemantics);
   lostFraction = lfExactlyZero;
   newPartCount = partCountForBits(toSemantics.precision + 1);
   oldPartCount = partCount();
+  shift = toSemantics.precision - fromSemantics.precision;
 
-  /* Handle storage complications.  If our new form is wider,
-     re-allocate our bit pattern into wider storage.  If it is
-     narrower, we ignore the excess parts, but if narrowing to a
-     single part we need to free the old storage.
-     Be careful not to reference significandParts for zeroes
-     and infinities, since it aborts.  */
+  bool X86SpecialNan = false;
+  if (&fromSemantics == &APFloat::x87DoubleExtended &&
+      &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
+      (!(*significandParts() & 0x8000000000000000ULL) ||
+       !(*significandParts() & 0x4000000000000000ULL))) {
+    // x86 has some unusual NaNs which cannot be represented in any other
+    // format; note them here.
+    X86SpecialNan = true;
+  }
+
+  // If this is a truncation, perform the shift before we narrow the storage.
+  if (shift < 0 && (category==fcNormal || category==fcNaN))
+    lostFraction = shiftRight(significandParts(), oldPartCount, -shift);
+
+  // Fix the storage so it can hold to new value.
   if (newPartCount > oldPartCount) {
+    // The new type requires more storage; make it available.
     integerPart *newParts;
     newParts = new integerPart[newPartCount];
     APInt::tcSet(newParts, 0, newPartCount);
@@ -1875,61 +1887,36 @@ APFloat::convert(const fltSemantics &toSemantics,
       APInt::tcAssign(newParts, significandParts(), oldPartCount);
     freeSignificand();
     significand.parts = newParts;
-  } else if (newPartCount < oldPartCount) {
-    /* Capture any lost fraction through truncation of parts so we get
-       correct rounding whilst normalizing.  */
-    if (category==fcNormal)
-      lostFraction = lostFractionThroughTruncation
-        (significandParts(), oldPartCount, toSemantics.precision);
-    if (newPartCount == 1) {
-        integerPart newPart = 0;
-        if (category==fcNormal || category==fcNaN)
-          newPart = significandParts()[0];
-        freeSignificand();
-        significand.part = newPart;
-    }
+  } else if (newPartCount == 1 && oldPartCount != 1) {
+    // Switch to built-in storage for a single part.
+    integerPart newPart = 0;
+    if (category==fcNormal || category==fcNaN)
+      newPart = significandParts()[0];
+    freeSignificand();
+    significand.part = newPart;
   }
 
+  // Now that we have the right storage, switch the semantics.
+  semantics = &toSemantics;
+
+  // If this is an extension, perform the shift now that the storage is
+  // available.
+  if (shift > 0 && (category==fcNormal || category==fcNaN))
+    APInt::tcShiftLeft(significandParts(), newPartCount, shift);
+
   if (category == fcNormal) {
-    /* Re-interpret our bit-pattern.  */
-    exponent += toSemantics.precision - semantics->precision;
-    semantics = &toSemantics;
     fs = normalize(rounding_mode, lostFraction);
     *losesInfo = (fs != opOK);
   } else if (category == fcNaN) {
-    int shift = toSemantics.precision - semantics->precision;
-    // Do this now so significandParts gets the right answer
-    const fltSemantics *oldSemantics = semantics;
-    semantics = &toSemantics;
-    *losesInfo = false;
-    // No normalization here, just truncate
-    if (shift>0)
-      APInt::tcShiftLeft(significandParts(), newPartCount, shift);
-    else if (shift < 0) {
-      unsigned ushift = -shift;
-      // Figure out if we are losing information.  This happens
-      // if are shifting out something other than 0s, or if the x87 long
-      // double input did not have its integer bit set (pseudo-NaN), or if the
-      // x87 long double input did not have its QNan bit set (because the x87
-      // hardware sets this bit when converting a lower-precision NaN to
-      // x87 long double).
-      if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
-        *losesInfo = true;
-      if (oldSemantics == &APFloat::x87DoubleExtended &&
-          (!(*significandParts() & 0x8000000000000000ULL) ||
-           !(*significandParts() & 0x4000000000000000ULL)))
-        *losesInfo = true;
-      APInt::tcShiftRight(significandParts(), newPartCount, ushift);
-    }
+    *losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
     // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
     // does not give you back the same bits.  This is dubious, and we
     // don't currently do it.  You're really supposed to get
     // an invalid operation signal at runtime, but nobody does that.
     fs = opOK;
   } else {
-    semantics = &toSemantics;
-    fs = opOK;
     *losesInfo = false;
+    fs = opOK;
   }
 
   return fs;
@@ -2695,21 +2682,19 @@ APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
   return writeSignedDecimal (dst, exponent);
 }
 
-// For good performance it is desirable for different APFloats
-// to produce different integers.
-uint32_t
-APFloat::getHashValue() const
-{
-  if (category==fcZero) return sign<<8 | semantics->precision ;
-  else if (category==fcInfinity) return sign<<9 | semantics->precision;
-  else if (category==fcNaN) return 1<<10 | semantics->precision;
-  else {
-    uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
-    const integerPart* p = significandParts();
-    for (int i=partCount(); i>0; i--, p++)
-      hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
-    return hash;
-  }
+hash_code llvm::hash_value(const APFloat &Arg) {
+  if (Arg.category != APFloat::fcNormal)
+    return hash_combine((uint8_t)Arg.category,
+                        // NaN has no sign, fix it at zero.
+                        Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign,
+                        Arg.semantics->precision);
+
+  // Normal floats need their exponent and significand hashed.
+  return hash_combine((uint8_t)Arg.category, (uint8_t)Arg.sign,
+                      Arg.semantics->precision, Arg.exponent,
+                      hash_combine_range(
+                        Arg.significandParts(),
+                        Arg.significandParts() + Arg.partCount()));
 }
 
 // Conversion from APFloat to/from host float/double.  It may eventually be
@@ -3354,7 +3339,7 @@ namespace {
     // Rounding down is just a truncation, except we also want to drop
     // trailing zeros from the new result.
     if (buffer[FirstSignificant - 1] < '5') {
-      while (buffer[FirstSignificant] == '0')
+      while (FirstSignificant < N && buffer[FirstSignificant] == '0')
         FirstSignificant++;
 
       exp += FirstSignificant;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 3774c5223c46..9b81fe776a61 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -14,9 +14,10 @@
 
 #define DEBUG_TYPE "apint"
 #include "llvm/ADT/APInt.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -456,16 +457,6 @@ APInt APInt::XorSlowCase(const APInt& RHS) const {
   return APInt(val, getBitWidth()).clearUnusedBits();
 }
 
-bool APInt::operator !() const {
-  if (isSingleWord())
-    return !VAL;
-
-  for (unsigned i = 0; i < getNumWords(); ++i)
-    if (pVal[i])
-      return false;
-  return true;
-}
-
 APInt APInt::operator*(const APInt& RHS) const {
   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   if (isSingleWord())
@@ -493,12 +484,6 @@ APInt APInt::operator-(const APInt& RHS) const {
   return Result.clearUnusedBits();
 }
 
-bool APInt::operator[](unsigned bitPosition) const {
-  assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
-  return (maskBit(bitPosition) &
-          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
-}
-
 bool APInt::EqualSlowCase(const APInt& RHS) const {
   // Get some facts about the number of bits used in the two operands.
   unsigned n1 = getActiveBits();
@@ -675,93 +660,11 @@ unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
   }
 }
 
-// From http://www.burtleburtle.net, byBob Jenkins.
-// When targeting x86, both GCC and LLVM seem to recognize this as a
-// rotate instruction.
-#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
-
-// From http://www.burtleburtle.net, by Bob Jenkins.
-#define mix(a,b,c) \
-  { \
-    a -= c;  a ^= rot(c, 4);  c += b; \
-    b -= a;  b ^= rot(a, 6);  a += c; \
-    c -= b;  c ^= rot(b, 8);  b += a; \
-    a -= c;  a ^= rot(c,16);  c += b; \
-    b -= a;  b ^= rot(a,19);  a += c; \
-    c -= b;  c ^= rot(b, 4);  b += a; \
-  }
-
-// From http://www.burtleburtle.net, by Bob Jenkins.
-#define final(a,b,c) \
-  { \
-    c ^= b; c -= rot(b,14); \
-    a ^= c; a -= rot(c,11); \
-    b ^= a; b -= rot(a,25); \
-    c ^= b; c -= rot(b,16); \
-    a ^= c; a -= rot(c,4);  \
-    b ^= a; b -= rot(a,14); \
-    c ^= b; c -= rot(b,24); \
-  }
-
-// hashword() was adapted from http://www.burtleburtle.net, by Bob
-// Jenkins.  k is a pointer to an array of uint32_t values; length is
-// the length of the key, in 32-bit chunks.  This version only handles
-// keys that are a multiple of 32 bits in size.
-static inline uint32_t hashword(const uint64_t *k64, size_t length)
-{
-  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
-  uint32_t a,b,c;
-
-  /* Set up the internal state */
-  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
-
-  /*------------------------------------------------- handle most of the key */
-  while (length > 3) {
-    a += k[0];
-    b += k[1];
-    c += k[2];
-    mix(a,b,c);
-    length -= 3;
-    k += 3;
-  }
-
-  /*------------------------------------------- handle the last 3 uint32_t's */
-  switch (length) {                  /* all the case statements fall through */
-  case 3 : c+=k[2];
-  case 2 : b+=k[1];
-  case 1 : a+=k[0];
-    final(a,b,c);
-    case 0:     /* case 0: nothing left to add */
-      break;
-    }
-  /*------------------------------------------------------ report the result */
-  return c;
-}
-
-// hashword8() was adapted from http://www.burtleburtle.net, by Bob
-// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
-// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
-// function into about 35 instructions when inlined.
-static inline uint32_t hashword8(const uint64_t k64)
-{
-  uint32_t a,b,c;
-  a = b = c = 0xdeadbeef + 4;
-  b += k64 >> 32;
-  a += k64 & 0xffffffff;
-  final(a,b,c);
-  return c;
-}
-#undef final
-#undef mix
-#undef rot
+hash_code llvm::hash_value(const APInt &Arg) {
+  if (Arg.isSingleWord())
+    return hash_combine(Arg.VAL);
 
-uint64_t APInt::getHashValue() const {
-  uint64_t hash;
-  if (isSingleWord())
-    hash = hashword8(VAL);
-  else
-    hash = hashword(pVal, getNumWords()*2);
-  return hash;
+  return hash_combine_range(Arg.pVal, Arg.pVal + Arg.getNumWords());
 }
 
 /// HiBits - This function returns the high "numBits" bits of this APInt.
@@ -803,20 +706,9 @@ unsigned APInt::countLeadingZerosSlowCase() const {
   return Count;
 }
 
-static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
-  unsigned Count = 0;
-  if (skip)
-    V <<= skip;
-  while (V && (V & (1ULL << 63))) {
-    Count++;
-    V <<= 1;
-  }
-  return Count;
-}
-
 unsigned APInt::countLeadingOnes() const {
   if (isSingleWord())
-    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
+    return CountLeadingOnes_64(VAL << (APINT_BITS_PER_WORD - BitWidth));
 
   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
   unsigned shift;
@@ -827,13 +719,13 @@ unsigned APInt::countLeadingOnes() const {
     shift = APINT_BITS_PER_WORD - highWordBits;
   }
   int i = getNumWords() - 1;
-  unsigned Count = countLeadingOnes_64(pVal[i], shift);
+  unsigned Count = CountLeadingOnes_64(pVal[i] << shift);
   if (Count == highWordBits) {
     for (i--; i >= 0; --i) {
       if (pVal[i] == -1ULL)
         Count += APINT_BITS_PER_WORD;
       else {
-        Count += countLeadingOnes_64(pVal[i], 0);
+        Count += CountLeadingOnes_64(pVal[i]);
         break;
       }
     }
@@ -870,30 +762,43 @@ unsigned APInt::countPopulationSlowCase() const {
   return Count;
 }
 
+/// Perform a logical right-shift from Src to Dst, which must be equal or
+/// non-overlapping, of Words words, by Shift, which must be less than 64.
+static void lshrNear(uint64_t *Dst, uint64_t *Src, unsigned Words,
+                     unsigned Shift) {
+  uint64_t Carry = 0;
+  for (int I = Words - 1; I >= 0; --I) {
+    uint64_t Tmp = Src[I];
+    Dst[I] = (Tmp >> Shift) | Carry;
+    Carry = Tmp << (64 - Shift);
+  }
+}
+
 APInt APInt::byteSwap() const {
   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
   if (BitWidth == 16)
     return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
-  else if (BitWidth == 32)
+  if (BitWidth == 32)
     return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
-  else if (BitWidth == 48) {
+  if (BitWidth == 48) {
     unsigned Tmp1 = unsigned(VAL >> 16);
     Tmp1 = ByteSwap_32(Tmp1);
     uint16_t Tmp2 = uint16_t(VAL);
     Tmp2 = ByteSwap_16(Tmp2);
     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
-  } else if (BitWidth == 64)
+  }
+  if (BitWidth == 64)
     return APInt(BitWidth, ByteSwap_64(VAL));
-  else {
-    APInt Result(BitWidth, 0);
-    char *pByte = (char*)Result.pVal;
-    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
-      char Tmp = pByte[i];
-      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
-      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
-    }
-    return Result;
+
+  APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
+  for (unsigned I = 0, N = getNumWords(); I != N; ++I)
+    Result.pVal[I] = ByteSwap_64(pVal[N - I - 1]);
+  if (Result.BitWidth != BitWidth) {
+    lshrNear(Result.pVal, Result.pVal, getNumWords(),
+             Result.BitWidth - BitWidth);
+    Result.BitWidth = BitWidth;
   }
+  return Result;
 }
 
 APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
@@ -1110,6 +1015,18 @@ APInt APInt::sextOrTrunc(unsigned width) const {
   return *this;
 }
 
+APInt APInt::zextOrSelf(unsigned width) const {
+  if (BitWidth < width)
+    return zext(width);
+  return *this;
+}
+
+APInt APInt::sextOrSelf(unsigned width) const {
+  if (BitWidth < width)
+    return sext(width);
+  return *this;
+}
+
 /// Arithmetic right-shift this APInt by shiftAmt.
 /// @brief Arithmetic right-shift function.
 APInt APInt::ashr(const APInt &shiftAmt) const {
@@ -1209,7 +1126,7 @@ APInt APInt::lshr(const APInt &shiftAmt) const {
 /// @brief Logical right-shift function.
 APInt APInt::lshr(unsigned shiftAmt) const {
   if (isSingleWord()) {
-    if (shiftAmt == BitWidth)
+    if (shiftAmt >= BitWidth)
       return APInt(BitWidth, 0);
     else
       return APInt(BitWidth, this->VAL >> shiftAmt);
@@ -1232,11 +1149,7 @@ APInt APInt::lshr(unsigned shiftAmt) const {
 
   // If we are shifting less than a word, compute the shift with a simple carry
   if (shiftAmt < APINT_BITS_PER_WORD) {
-    uint64_t carry = 0;
-    for (int i = getNumWords()-1; i >= 0; --i) {
-      val[i] = (pVal[i] >> shiftAmt) | carry;
-      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
-    }
+    lshrNear(val, pVal, getNumWords(), shiftAmt);
     return APInt(val, BitWidth).clearUnusedBits();
   }
 
@@ -1329,14 +1242,10 @@ APInt APInt::rotl(const APInt &rotateAmt) const {
 }
 
 APInt APInt::rotl(unsigned rotateAmt) const {
+  rotateAmt %= BitWidth;
   if (rotateAmt == 0)
     return *this;
-  // Don't get too fancy, just use existing shift/or facilities
-  APInt hi(*this);
-  APInt lo(*this);
-  hi.shl(rotateAmt);
-  lo.lshr(BitWidth - rotateAmt);
-  return hi | lo;
+  return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
 }
 
 APInt APInt::rotr(const APInt &rotateAmt) const {
@@ -1344,14 +1253,10 @@ APInt APInt::rotr(const APInt &rotateAmt) const {
 }
 
 APInt APInt::rotr(unsigned rotateAmt) const {
+  rotateAmt %= BitWidth;
   if (rotateAmt == 0)
     return *this;
-  // Don't get too fancy, just use existing shift/or facilities
-  APInt hi(*this);
-  APInt lo(*this);
-  lo.lshr(rotateAmt);
-  hi.shl(BitWidth - rotateAmt);
-  return hi | lo;
+  return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
 }
 
 // Square Root - this method computes and returns the square root of "this".
@@ -1431,15 +1336,11 @@ APInt APInt::sqrt() const {
   APInt nextSquare((x_old + 1) * (x_old +1));
   if (this->ult(square))
     return x_old;
-  else if (this->ule(nextSquare)) {
-    APInt midpoint((nextSquare - square).udiv(two));
-    APInt offset(*this - square);
-    if (offset.ult(midpoint))
-      return x_old;
-    else
-      return x_old + 1;
-  } else
-    llvm_unreachable("Error in APInt::sqrt computation");
+  assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
+  APInt midpoint((nextSquare - square).udiv(two));
+  APInt offset(*this - square);
+  if (offset.ult(midpoint))
+    return x_old;
   return x_old + 1;
 }
 
@@ -2184,7 +2085,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
                      bool Signed, bool formatAsCLiteral) const {
   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 
           Radix == 36) &&
-         "Radix should be 2, 8, 10, or 16!");
+         "Radix should be 2, 8, 10, 16, or 36!");
 
   const char *Prefix = "";
   if (formatAsCLiteral) {
@@ -2197,9 +2098,13 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
       case 8:
         Prefix = "0";
         break;
+      case 10:
+        break; // No prefix
       case 16:
         Prefix = "0x";
         break;
+      default:
+        llvm_unreachable("Invalid radix!");
     }
   }
 
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 215b0f249d96..b8978302e746 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -22,8 +22,8 @@ namespace llvm {
 
 BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold,
                                    SlabAllocator &allocator)
-    : SlabSize(size), SizeThreshold(threshold), Allocator(allocator),
-      CurSlab(0), BytesAllocated(0) { }
+    : SlabSize(size), SizeThreshold(std::min(size, threshold)),
+      Allocator(allocator), CurSlab(0), BytesAllocated(0) { }
 
 BumpPtrAllocator::~BumpPtrAllocator() {
   DeallocateSlabs(CurSlab);
diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
index 94760cc069fc..3001f6c468aa 100644
--- a/lib/Support/Atomic.cpp
+++ b/lib/Support/Atomic.cpp
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Atomic.h"
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
 
 using namespace llvm;
 
diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp
index a63bf83f2039..84a993e3e5b6 100644
--- a/lib/Support/BlockFrequency.cpp
+++ b/lib/Support/BlockFrequency.cpp
@@ -70,8 +70,13 @@ BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
 
   assert(n <= d && "Probability must be less or equal to 1.");
 
-  // If we can overflow use 96-bit operations.
-  if (n > 0 && Frequency > UINT64_MAX / n) {
+  // Calculate Frequency * n.
+  uint64_t mulLo = (Frequency & UINT32_MAX) * n;
+  uint64_t mulHi = (Frequency >> 32) * n;
+  uint64_t mulRes = (mulHi << 32) + mulLo;
+
+  // If there was overflow use 96-bit operations.
+  if (mulHi > UINT32_MAX || mulRes < mulLo) {
     // 96-bit value represented as W[1]:W[0].
     uint64_t W[2];
 
@@ -82,8 +87,7 @@ BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
     return *this;
   }
 
-  Frequency *= n;
-  Frequency /= d;
+  Frequency = mulRes / d;
   return *this;
 }
 
diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp
index 49d04ed83653..e8b83e59802d 100644
--- a/lib/Support/BranchProbability.cpp
+++ b/lib/Support/BranchProbability.cpp
@@ -13,24 +13,17 @@
 
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-BranchProbability::BranchProbability(uint32_t n, uint32_t d) {
-  assert(d > 0 && "Denomiator cannot be 0!");
-  assert(n <= d && "Probability cannot be bigger than 1!");
-  N = n;
-  D = d;
-}
-
 void BranchProbability::print(raw_ostream &OS) const {
-  OS << N << " / " << D << " = " << ((double)N / D);
+  OS << N << " / " << D << " = " << format("%g%%", ((double)N / D) * 100.0);
 }
 
 void BranchProbability::dump() const {
-  print(dbgs());
-  dbgs() << "\n";
+  dbgs() << *this << '\n';
 }
 
 namespace llvm {
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 63a833c38046..9b3b6c801dd0 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMSupport
   ConstantRange.cpp
   CrashRecoveryContext.cpp
   DataExtractor.cpp
+  DataStream.cpp
   Debug.cpp
   DeltaAlgorithm.cpp
   DAGDeltaAlgorithm.cpp
@@ -25,10 +26,14 @@ add_llvm_library(LLVMSupport
   FoldingSet.cpp
   FormattedStream.cpp
   GraphWriter.cpp
+  Hashing.cpp
   IntEqClasses.cpp
   IntervalMap.cpp
+  IntrusiveRefCntPtr.cpp
   IsInf.cpp
   IsNAN.cpp
+  JSONParser.cpp
+  LockFileManager.cpp
   ManagedStatic.cpp
   MemoryBuffer.cpp
   MemoryObject.cpp
@@ -39,6 +44,7 @@ add_llvm_library(LLVMSupport
   SmallVector.cpp
   SourceMgr.cpp
   Statistic.cpp
+  StreamableMemoryObject.cpp
   StringExtras.cpp
   StringMap.cpp
   StringPool.cpp
@@ -48,6 +54,7 @@ add_llvm_library(LLVMSupport
   ToolOutputFile.cpp
   Triple.cpp
   Twine.cpp
+  YAMLParser.cpp
   raw_os_ostream.cpp
   raw_ostream.cpp
   regcomp.c
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 238adcce0a12..e6fdf16a82de 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -57,6 +57,9 @@ TEMPLATE_INSTANTIATION(class opt<char>);
 TEMPLATE_INSTANTIATION(class opt<bool>);
 } } // end namespace llvm::cl
 
+void GenericOptionValue::anchor() {}
+void OptionValue<boolOrDefault>::anchor() {}
+void OptionValue<std::string>::anchor() {}
 void Option::anchor() {}
 void basic_parser_impl::anchor() {}
 void parser<bool>::anchor() {}
@@ -263,8 +266,8 @@ static bool CommaSeparateAndAddOccurence(Option *Handler, unsigned pos,
 /// and a null value (StringRef()).  The later is accepted for arguments that
 /// don't allow a value (-foo) the former is rejected (-foo=).
 static inline bool ProvideOption(Option *Handler, StringRef ArgName,
-                                 StringRef Value, int argc, char **argv,
-                                 int &i) {
+                                 StringRef Value, int argc,
+                                 const char *const *argv, int &i) {
   // Is this a multi-argument option?
   unsigned NumAdditionalVals = Handler->getNumAdditionalVals();
 
@@ -289,12 +292,6 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
     break;
   case ValueOptional:
     break;
-
-  default:
-    errs() << ProgramName
-         << ": Bad ValueMask flag! CommandLine usage error:"
-         << Handler->getValueExpectedFlag() << "\n";
-    llvm_unreachable(0);
   }
 
   // If this isn't a multi-arg option, just run the handler.
@@ -498,10 +495,10 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
 /// ExpandResponseFiles - Copy the contents of argv into newArgv,
 /// substituting the contents of the response files for the arguments
 /// of type @file.
-static void ExpandResponseFiles(unsigned argc, char** argv,
+static void ExpandResponseFiles(unsigned argc, const char*const* argv,
                                 std::vector<char*>& newArgv) {
   for (unsigned i = 1; i != argc; ++i) {
-    char *arg = argv[i];
+    const char *arg = argv[i];
 
     if (arg[0] == '@') {
       sys::PathWithStatus respFile(++arg);
@@ -531,7 +528,7 @@ static void ExpandResponseFiles(unsigned argc, char** argv,
   }
 }
 
-void cl::ParseCommandLineOptions(int argc, char **argv,
+void cl::ParseCommandLineOptions(int argc, const char * const *argv,
                                  const char *Overview, bool ReadResponseFiles) {
   // Process all registered options.
   SmallVector<Option*, 4> PositionalOpts;
@@ -885,7 +882,6 @@ bool Option::addOccurrence(unsigned pos, StringRef ArgName,
   case OneOrMore:
   case ZeroOrMore:
   case ConsumeAfter: break;
-  default: return error("bad num occurrences flag value!");
   }
 
   return handleOccurrence(pos, ArgName, Value);
@@ -1195,7 +1191,7 @@ printOptionNoValue(const Option &O, size_t GlobalWidth) const {
 static int OptNameCompare(const void *LHS, const void *RHS) {
   typedef std::pair<const char *, Option*> pair_ty;
 
-  return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first);
+  return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first);
 }
 
 // Copy Options into a vector so we can sort them as we like.
@@ -1349,7 +1345,7 @@ class VersionPrinter {
 public:
   void print() {
     raw_ostream &OS = outs();
-    OS << "Low Level Virtual Machine (http://llvm.org/):\n"
+    OS << "LLVM (http://llvm.org/):\n"
        << "  " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
 #ifdef LLVM_VERSION_INFO
     OS << LLVM_VERSION_INFO;
@@ -1369,7 +1365,7 @@ public:
 #if (ENABLE_TIMESTAMPS == 1)
        << "  Built " << __DATE__ << " (" << __TIME__ << ").\n"
 #endif
-       << "  Host: " << sys::getHostTriple() << '\n'
+       << "  Default target: " << sys::getDefaultTargetTriple() << '\n'
        << "  Host CPU: " << CPU << '\n';
   }
   void operator=(bool OptionWasSpecified) {
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index c29cb53fb9c5..5206cf1f9b8c 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -55,7 +55,7 @@ ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
 
   uint32_t W = CR.getBitWidth();
   switch (Pred) {
-    default: assert(0 && "Invalid ICmp predicate to makeICmpRegion()");
+    default: llvm_unreachable("Invalid ICmp predicate to makeICmpRegion()");
     case CmpInst::ICMP_EQ:
       return CR;
     case CmpInst::ICMP_NE:
@@ -161,8 +161,7 @@ APInt ConstantRange::getSetSize() const {
 APInt ConstantRange::getUnsignedMax() const {
   if (isFullSet() || isWrappedSet())
     return APInt::getMaxValue(getBitWidth());
-  else
-    return getUpper() - 1;
+  return getUpper() - 1;
 }
 
 /// getUnsignedMin - Return the smallest unsigned value contained in the
@@ -171,8 +170,7 @@ APInt ConstantRange::getUnsignedMax() const {
 APInt ConstantRange::getUnsignedMin() const {
   if (isFullSet() || (isWrappedSet() && getUpper() != 0))
     return APInt::getMinValue(getBitWidth());
-  else
-    return getLower();
+  return getLower();
 }
 
 /// getSignedMax - Return the largest signed value contained in the
@@ -183,14 +181,11 @@ APInt ConstantRange::getSignedMax() const {
   if (!isWrappedSet()) {
     if (getLower().sle(getUpper() - 1))
       return getUpper() - 1;
-    else
-      return SignedMax;
-  } else {
-    if (getLower().isNegative() == getUpper().isNegative())
-      return SignedMax;
-    else
-      return getUpper() - 1;
+    return SignedMax;
   }
+  if (getLower().isNegative() == getUpper().isNegative())
+    return SignedMax;
+  return getUpper() - 1;
 }
 
 /// getSignedMin - Return the smallest signed value contained in the
@@ -201,18 +196,13 @@ APInt ConstantRange::getSignedMin() const {
   if (!isWrappedSet()) {
     if (getLower().sle(getUpper() - 1))
       return getLower();
-    else
+    return SignedMin;
+  }
+  if ((getUpper() - 1).slt(getLower())) {
+    if (getUpper() != SignedMin)
       return SignedMin;
-  } else {
-    if ((getUpper() - 1).slt(getLower())) {
-      if (getUpper() != SignedMin)
-        return SignedMin;
-      else
-        return getLower();
-    } else {
-      return getLower();
-    }
   }
+  return getLower();
 }
 
 /// contains - Return true if the specified value is in the set.
@@ -223,8 +213,7 @@ bool ConstantRange::contains(const APInt &V) const {
 
   if (!isWrappedSet())
     return Lower.ule(V) && V.ult(Upper);
-  else
-    return Lower.ule(V) || V.ult(Upper);
+  return Lower.ule(V) || V.ult(Upper);
 }
 
 /// contains - Return true if the argument is a subset of this range.
@@ -284,15 +273,14 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
         return ConstantRange(CR.Lower, Upper);
 
       return CR;
-    } else {
-      if (Upper.ult(CR.Upper))
-        return *this;
+    }
+    if (Upper.ult(CR.Upper))
+      return *this;
 
-      if (Lower.ult(CR.Upper))
-        return ConstantRange(Lower, CR.Upper);
+    if (Lower.ult(CR.Upper))
+      return ConstantRange(Lower, CR.Upper);
 
-      return ConstantRange(getBitWidth(), false);
-    }
+    return ConstantRange(getBitWidth(), false);
   }
 
   if (isWrappedSet() && !CR.isWrappedSet()) {
@@ -305,9 +293,9 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
 
       if (getSetSize().ult(CR.getSetSize()))
         return *this;
-      else
-        return CR;
-    } else if (CR.Lower.ult(Lower)) {
+      return CR;
+    }
+    if (CR.Lower.ult(Lower)) {
       if (CR.Upper.ule(Lower))
         return ConstantRange(getBitWidth(), false);
 
@@ -320,15 +308,15 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
     if (CR.Lower.ult(Upper)) {
       if (getSetSize().ult(CR.getSetSize()))
         return *this;
-      else
-        return CR;
+      return CR;
     }
 
     if (CR.Lower.ult(Lower))
       return ConstantRange(Lower, CR.Upper);
 
     return CR;
-  } else if (CR.Upper.ult(Lower)) {
+  }
+  if (CR.Upper.ult(Lower)) {
     if (CR.Lower.ult(Lower))
       return *this;
 
@@ -336,8 +324,7 @@ ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
   }
   if (getSetSize().ult(CR.getSetSize()))
     return *this;
-  else
-    return CR;
+  return CR;
 }
 
 
@@ -362,8 +349,7 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
       APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
       if (d1.ult(d2))
         return ConstantRange(Lower, CR.Upper);
-      else
-        return ConstantRange(CR.Lower, Upper);
+      return ConstantRange(CR.Lower, Upper);
     }
 
     APInt L = Lower, U = Upper;
@@ -396,8 +382,7 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
       APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
       if (d1.ult(d2))
         return ConstantRange(Lower, CR.Upper);
-      else
-        return ConstantRange(CR.Lower, Upper);
+      return ConstantRange(CR.Lower, Upper);
     }
 
     // ----U     L----- : this
@@ -407,13 +392,11 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
 
     // ------U    L---- : this
     //    L-----U       : CR
-    if (CR.Lower.ult(Upper) && CR.Upper.ult(Lower))
-      return ConstantRange(Lower, CR.Upper);
+    assert(CR.Lower.ult(Upper) && CR.Upper.ult(Lower) &&
+           "ConstantRange::unionWith missed a case with one range wrapped");
+    return ConstantRange(Lower, CR.Upper);
   }
 
-  assert(isWrappedSet() && CR.isWrappedSet() &&
-         "ConstantRange::unionWith missed wrapped union unwrapped case");
-
   // ------U    L----  and  ------U    L---- : this
   // -U  L-----------  and  ------------U  L : CR
   if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper))
@@ -466,10 +449,8 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
 /// correspond to the possible range of values as if the source range had been
 /// truncated to the specified type.
 ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
-  unsigned SrcTySize = getBitWidth();
-  assert(SrcTySize > DstTySize && "Not a value truncation");
-  APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize));
-  if (isFullSet() || getSetSize().ugt(Size))
+  assert(getBitWidth() > DstTySize && "Not a value truncation");
+  if (isFullSet() || getSetSize().getActiveBits() > DstTySize)
     return ConstantRange(DstTySize, /*isFullSet=*/true);
 
   return ConstantRange(Lower.trunc(DstTySize), Upper.trunc(DstTySize));
@@ -481,10 +462,9 @@ ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
   unsigned SrcTySize = getBitWidth();
   if (SrcTySize > DstTySize)
     return truncate(DstTySize);
-  else if (SrcTySize < DstTySize)
+  if (SrcTySize < DstTySize)
     return zeroExtend(DstTySize);
-  else
-    return *this;
+  return *this;
 }
 
 /// sextOrTrunc - make this range have the bit width given by \p DstTySize. The
@@ -493,10 +473,9 @@ ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const {
   unsigned SrcTySize = getBitWidth();
   if (SrcTySize > DstTySize)
     return truncate(DstTySize);
-  else if (SrcTySize < DstTySize)
+  if (SrcTySize < DstTySize)
     return signExtend(DstTySize);
-  else
-    return *this;
+  return *this;
 }
 
 ConstantRange
@@ -675,11 +654,10 @@ ConstantRange::lshr(const ConstantRange &Other) const {
 }
 
 ConstantRange ConstantRange::inverse() const {
-  if (isFullSet()) {
+  if (isFullSet())
     return ConstantRange(getBitWidth(), /*isFullSet=*/false);
-  } else if (isEmptySet()) {
+  if (isEmptySet())
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
-  }
   return ConstantRange(Upper, Lower);
 }
 
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index 263114c06f98..e2af0bc17655 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -165,7 +165,6 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo)
   // Note that we don't actually get here because HandleCrash calls
   // longjmp, which means the HandleCrash function never returns.
   llvm_unreachable("Handled the crash, should have longjmp'ed out of here");
-  return EXCEPTION_CONTINUE_SEARCH;
 }
 
 // Because the Enable and Disable calls are static, it means that
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index 814566494d30..1e89c6ad2ff2 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -350,6 +350,9 @@ DAGDeltaAlgorithmImpl::Run() {
   return Required;
 }
 
+void DAGDeltaAlgorithm::anchor() {
+}
+
 DAGDeltaAlgorithm::changeset_ty
 DAGDeltaAlgorithm::Run(const changeset_ty &Changes,
                        const std::vector<edge_ty> &Dependencies) {
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index b946c1df8363..dc21155a0624 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -75,7 +75,7 @@ uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const {
 uint32_t *DataExtractor::getU32(uint32_t *offset_ptr, uint32_t *dst,
                                 uint32_t count) const {
   return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian,
-                        Data.data());;
+                        Data.data());
 }
 
 uint64_t DataExtractor::getU64(uint32_t *offset_ptr) const {
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
new file mode 100644
index 000000000000..94d14a5e36b0
--- /dev/null
+++ b/lib/Support/DataStream.cpp
@@ -0,0 +1,98 @@
+//===--- llvm/Support/DataStream.cpp - Lazy streamed data -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements DataStreamer, which fetches bytes of Data from
+// a stream source. It provides support for streaming (lazy reading) of
+// bitcode. An example implementation of streaming from a file or stdin
+// is included.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "Data-stream"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/system_error.h"
+#include <string>
+#include <cerrno>
+#include <cstdio>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+#include <fcntl.h>
+using namespace llvm;
+
+// Interface goals:
+// * StreamableMemoryObject doesn't care about complexities like using
+//   threads/async callbacks to actually overlap download+compile
+// * Don't want to duplicate Data in memory
+// * Don't need to know total Data len in advance
+// Non-goals:
+// StreamableMemoryObject already has random access so this interface only does
+// in-order streaming (no arbitrary seeking, else we'd have to buffer all the
+// Data here in addition to MemoryObject).  This also means that if we want
+// to be able to to free Data, BitstreamBytes/BitcodeReader will implement it
+
+STATISTIC(NumStreamFetches, "Number of calls to Data stream fetch");
+
+namespace llvm {
+DataStreamer::~DataStreamer() {}
+}
+
+namespace {
+
+// Very simple stream backed by a file. Mostly useful for stdin and debugging;
+// actual file access is probably still best done with mmap.
+class DataFileStreamer : public DataStreamer {
+ int Fd;
+public:
+  DataFileStreamer() : Fd(0) {}
+  virtual ~DataFileStreamer() {
+    close(Fd);
+  }
+  virtual size_t GetBytes(unsigned char *buf, size_t len) {
+    NumStreamFetches++;
+    return read(Fd, buf, len);
+  }
+
+  error_code OpenFile(const std::string &Filename) {
+    if (Filename == "-") {
+      Fd = 0;
+      sys::Program::ChangeStdinToBinary();
+      return error_code::success();
+    }
+  
+    int OpenFlags = O_RDONLY;
+#ifdef O_BINARY
+    OpenFlags |= O_BINARY;  // Open input file in binary mode on win32.
+#endif
+    Fd = ::open(Filename.c_str(), OpenFlags);
+    if (Fd == -1)
+      return error_code(errno, posix_category());
+    return error_code::success();
+  }
+};
+
+}
+
+namespace llvm {
+DataStreamer *getDataFileStreamer(const std::string &Filename,
+                                  std::string *StrError) {
+  DataFileStreamer *s = new DataFileStreamer();
+  if (error_code e = s->OpenFile(Filename)) {
+    *StrError = std::string("Could not open ") + Filename + ": " +
+        e.message() + "\n";
+    return NULL;
+  }
+  return s;
+}
+
+}
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 95a9550f9663..5c59a3ef8ef3 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -95,6 +95,7 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
     return "DW_TAG_GNU_template_parameter_pack";
   case DW_TAG_GNU_formal_parameter_pack:
     return "DW_TAG_GNU_formal_parameter_pack";
+  case DW_TAG_APPLE_property:            return "DW_TAG_APPLE_property";
   }
   return 0;
 }
@@ -245,6 +246,7 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
   case DW_AT_APPLE_property_getter:      return "DW_AT_APPLE_property_getter";
   case DW_AT_APPLE_property_setter:      return "DW_AT_APPLE_property_setter";
   case DW_AT_APPLE_property_attribute:   return "DW_AT_APPLE_property_attribute";
+  case DW_AT_APPLE_property:             return "DW_AT_APPLE_property";
   case DW_AT_APPLE_objc_complete_type:   return "DW_AT_APPLE_objc_complete_type";
   }
   return 0;
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index 4c8c0c63ffc4..f9e9cf036608 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -200,7 +200,6 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
 
   // Now its safe to mmap the files into memory because both files
   // have a non-zero size.
-  error_code ec;
   OwningPtr<MemoryBuffer> F1;
   if (error_code ec = MemoryBuffer::getFile(FileA.c_str(), F1)) {
     if (Error)
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 17b827132f57..c6282c6ab2ab 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -29,24 +30,7 @@ using namespace llvm;
 /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
 /// used to lookup the node in the FoldingSetImpl.
 unsigned FoldingSetNodeIDRef::ComputeHash() const {
-  // This is adapted from SuperFastHash by Paul Hsieh.
-  unsigned Hash = static_cast<unsigned>(Size);
-  for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) {
-    unsigned Data = *BP;
-    Hash         += Data & 0xFFFF;
-    unsigned Tmp  = ((Data >> 16) << 11) ^ Hash;
-    Hash          = (Hash << 16) ^ Tmp;
-    Hash         += Hash >> 11;
-  }
-  
-  // Force "avalanching" of final 127 bits.
-  Hash ^= Hash << 3;
-  Hash += Hash >> 5;
-  Hash ^= Hash << 4;
-  Hash += Hash >> 17;
-  Hash ^= Hash << 25;
-  Hash += Hash >> 6;
-  return Hash;
+  return static_cast<unsigned>(hash_combine_range(Data, Data+Size));
 }
 
 bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
@@ -281,15 +265,15 @@ void FoldingSetImpl::GrowHashTable() {
 FoldingSetImpl::Node
 *FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
                                      void *&InsertPos) {
-  
-  void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets);
+  unsigned IDHash = ID.ComputeHash();
+  void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
   void *Probe = *Bucket;
   
   InsertPos = 0;
   
   FoldingSetNodeID TempID;
   while (Node *NodeInBucket = GetNextPtr(Probe)) {
-    if (NodeEquals(NodeInBucket, ID, TempID))
+    if (NodeEquals(NodeInBucket, ID, IDHash, TempID))
       return NodeInBucket;
     TempID.clear();
 
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index 0dba28a2530c..32126ec39eba 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -11,12 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Config/config.h"
 using namespace llvm;
 
+static cl::opt<bool> ViewBackground("view-background", cl::Hidden,
+  cl::desc("Execute graph viewer in the background. Creates tmp file litter."));
+
 std::string llvm::DOT::EscapeString(const std::string &Label) {
   std::string Str(Label);
   for (unsigned i = 0; i != Str.length(); ++i)
@@ -49,10 +53,28 @@ std::string llvm::DOT::EscapeString(const std::string &Label) {
   return Str;
 }
 
-
+// Execute the graph viewer. Return true if successful.
+static bool LLVM_ATTRIBUTE_UNUSED
+ExecGraphViewer(const sys::Path &ExecPath, std::vector<const char*> &args,
+                const sys::Path &Filename, bool wait, std::string &ErrMsg) {
+  if (wait) {
+    if (sys::Program::ExecuteAndWait(ExecPath, &args[0],0,0,0,0,&ErrMsg)) {
+      errs() << "Error: " << ErrMsg << "\n";
+      return false;
+    }
+    Filename.eraseFromDisk();
+    errs() << " done. \n";
+  }
+  else {
+    sys::Program::ExecuteNoWait(ExecPath, &args[0],0,0,0,&ErrMsg);
+    errs() << "Remember to erase graph file: " << Filename.str() << "\n";
+  }
+  return true;
+}
 
 void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
                         GraphProgram::Name program) {
+  wait &= !ViewBackground;
   std::string ErrMsg;
 #if HAVE_GRAPHVIZ
   sys::Path Graphviz(LLVM_PATH_GRAPHVIZ);
@@ -61,14 +83,10 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   args.push_back(Graphviz.c_str());
   args.push_back(Filename.c_str());
   args.push_back(0);
-  
+
   errs() << "Running 'Graphviz' program... ";
-  if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
-    errs() << "Error: " << ErrMsg << "\n";
+  if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg))
     return;
-  }
-  Filename.eraseFromDisk();
-  errs() << " done. \n";
 
 #elif HAVE_XDOT_PY
   std::vector<const char*> args;
@@ -83,17 +101,12 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
   default: errs() << "Unknown graph layout name; using default.\n";
   }
-  
+
   args.push_back(0);
 
   errs() << "Running 'xdot.py' program... ";
-  if (sys::Program::ExecuteAndWait(sys::Path(LLVM_PATH_XDOT_PY),
-                                   &args[0],0,0,0,0,&ErrMsg)) {
-    errs() << "Error: " << ErrMsg << "\n";
+  if (!ExecGraphViewer(sys::Path(LLVM_PATH_XDOT_PY), args, Filename, wait, ErrMsg))
     return;
-  }
-  Filename.eraseFromDisk();
-  errs() << " done. \n";
 
 #elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
                    HAVE_TWOPI || HAVE_CIRCO))
@@ -150,14 +163,11 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   args.push_back("-o");
   args.push_back(PSFilename.c_str());
   args.push_back(0);
-  
+
   errs() << "Running '" << prog.str() << "' program... ";
 
-  if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
-    errs() << "Error: " << ErrMsg << "\n";
+  if (!ExecGraphViewer(prog, args, Filename, wait, ErrMsg))
     return;
-  }
-  errs() << " done. \n";
 
   sys::Path gv(LLVM_PATH_GV);
   args.clear();
@@ -165,19 +175,11 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   args.push_back(PSFilename.c_str());
   args.push_back("--spartan");
   args.push_back(0);
-  
+
   ErrMsg.clear();
-  if (wait) {
-     if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
-        errs() << "Error: " << ErrMsg << "\n";
-     Filename.eraseFromDisk();
-     PSFilename.eraseFromDisk();
-  }
-  else {
-     sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
-     errs() << "Remember to erase graph files: " << Filename.str() << " "
-            << PSFilename.str() << "\n";
-  }
+  if (!ExecGraphViewer(gv, args, PSFilename, wait, ErrMsg))
+    return;
+
 #elif HAVE_DOTTY
   sys::Path dotty(LLVM_PATH_DOTTY);
 
@@ -185,16 +187,13 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
   args.push_back(dotty.c_str());
   args.push_back(Filename.c_str());
   args.push_back(0);
-  
-  errs() << "Running 'dotty' program... ";
-  if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
-     errs() << "Error: " << ErrMsg << "\n";
-  } else {
+
 // Dotty spawns another app and doesn't wait until it returns
 #if defined (__MINGW32__) || defined (_WINDOWS)
-    return;
+  wait = false;
 #endif
-    Filename.eraseFromDisk();
-  }
+  errs() << "Running 'dotty' program... ";
+  if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg))
+    return;
 #endif
 }
diff --git a/lib/Support/Hashing.cpp b/lib/Support/Hashing.cpp
new file mode 100644
index 000000000000..c69efb7c3cc9
--- /dev/null
+++ b/lib/Support/Hashing.cpp
@@ -0,0 +1,29 @@
+//===-------------- lib/Support/Hashing.cpp -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides implementation bits for the LLVM common hashing
+// infrastructure. Documentation and most of the other information is in the
+// header file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Hashing.h"
+
+using namespace llvm;
+
+// Provide a definition and static initializer for the fixed seed. This
+// initializer should always be zero to ensure its value can never appear to be
+// non-zero, even during dynamic initialization.
+size_t llvm::hashing::detail::fixed_seed_override = 0;
+
+// Implement the function for forced setting of the fixed seed.
+// FIXME: Use atomic operations here so that there is no data race.
+void llvm::set_fixed_execution_hash_seed(size_t fixed_value) {
+  hashing::detail::fixed_seed_override = fixed_value;
+}
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index a19e4b41189b..0f0696438ca6 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -61,6 +61,8 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
     *rECX = registers[2];
     *rEDX = registers[3];
     return false;
+  #else
+    return true;
   #endif
 #elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
   #if defined(__GNUC__)
@@ -87,9 +89,14 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
       mov   dword ptr [esi],edx
     }
     return false;
+// pedantic #else returns to appease -Wunreachable-code (so we don't generate
+// postprocessed code that looks like "return true; return false;")
+  #else
+    return true;
   #endif
-#endif
+#else
   return true;
+#endif
 }
 
 static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
@@ -298,6 +305,10 @@ std::string sys::getHostCPUName() {
         }
       case 16:
         return "amdfam10";
+      case 20:
+        return "btver1";
+      case 21:
+        return "bdver1";
     default:
       return "generic";
     }
diff --git a/lib/Support/IntrusiveRefCntPtr.cpp b/lib/Support/IntrusiveRefCntPtr.cpp
new file mode 100644
index 000000000000..a8b45593ae70
--- /dev/null
+++ b/lib/Support/IntrusiveRefCntPtr.cpp
@@ -0,0 +1,14 @@
+//== IntrusiveRefCntPtr.cpp - Smart Refcounting Pointer ----------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+
+using namespace llvm;
+
+void RefCountedBaseVPTR::anchor() { }
diff --git a/lib/Support/JSONParser.cpp b/lib/Support/JSONParser.cpp
new file mode 100644
index 000000000000..5dfcf297a7ea
--- /dev/null
+++ b/lib/Support/JSONParser.cpp
@@ -0,0 +1,302 @@
+//===--- JSONParser.cpp - Simple JSON parser ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements a JSON parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/JSONParser.h"
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+
+JSONParser::JSONParser(StringRef Input, SourceMgr *SM)
+  : SM(SM), Failed(false) {
+  InputBuffer = MemoryBuffer::getMemBuffer(Input, "JSON");
+  SM->AddNewSourceBuffer(InputBuffer, SMLoc());
+  End = InputBuffer->getBuffer().end();
+  Position = InputBuffer->getBuffer().begin();
+}
+
+JSONValue *JSONParser::parseRoot() {
+  if (Position != InputBuffer->getBuffer().begin())
+    report_fatal_error("Cannot reuse JSONParser.");
+  if (isWhitespace())
+    nextNonWhitespace();
+  if (errorIfAtEndOfFile("'[' or '{' at start of JSON text"))
+    return 0;
+  switch (*Position) {
+    case '[':
+      return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
+    case '{':
+      return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
+    default:
+      setExpectedError("'[' or '{' at start of JSON text", *Position);
+      return 0;
+  }
+}
+
+bool JSONParser::validate() {
+  JSONValue *Root = parseRoot();
+  if (Root == NULL) {
+    return false;
+  }
+  return skip(*Root);
+}
+
+bool JSONParser::skip(const JSONAtom &Atom) {
+  switch(Atom.getKind()) {
+    case JSONAtom::JK_Array:
+    case JSONAtom::JK_Object:
+      return skipContainer(*cast<JSONContainer>(&Atom));
+    case JSONAtom::JK_String:
+      return true;
+    case JSONAtom::JK_KeyValuePair:
+      return skip(*cast<JSONKeyValuePair>(&Atom)->Value);
+  }
+  llvm_unreachable("Impossible enum value.");
+}
+
+// Sets the current error to:
+// "expected <Expected>, but found <Found>".
+void JSONParser::setExpectedError(StringRef Expected, StringRef Found) {
+  SM->PrintMessage(SMLoc::getFromPointer(Position), SourceMgr::DK_Error,
+    "expected " + Expected + ", but found " + Found + ".", ArrayRef<SMRange>());
+  Failed = true;
+}
+
+// Sets the current error to:
+// "expected <Expected>, but found <Found>".
+void JSONParser::setExpectedError(StringRef Expected, char Found) {
+  setExpectedError(Expected, ("'" + StringRef(&Found, 1) + "'").str());
+}
+
+// If there is no character available, returns true and sets the current error
+// to: "expected <Expected>, but found EOF.".
+bool JSONParser::errorIfAtEndOfFile(StringRef Expected) {
+  if (Position == End) {
+    setExpectedError(Expected, "EOF");
+    return true;
+  }
+  return false;
+}
+
+// Sets the current error if the current character is not C to:
+// "expected 'C', but got <current character>".
+bool JSONParser::errorIfNotAt(char C, StringRef Message) {
+  if (*Position != C) {
+    std::string Expected =
+      ("'" + StringRef(&C, 1) + "' " + Message).str();
+    if (Position == End)
+      setExpectedError(Expected, "EOF");
+    else
+      setExpectedError(Expected, *Position);
+    return true;
+  }
+  return false;
+}
+
+// Forbidding inlining improves performance by roughly 20%.
+// FIXME: Remove once llvm optimizes this to the faster version without hints.
+LLVM_ATTRIBUTE_NOINLINE static bool
+wasEscaped(StringRef::iterator First, StringRef::iterator Position);
+
+// Returns whether a character at 'Position' was escaped with a leading '\'.
+// 'First' specifies the position of the first character in the string.
+static bool wasEscaped(StringRef::iterator First,
+                       StringRef::iterator Position) {
+  assert(Position - 1 >= First);
+  StringRef::iterator I = Position - 1;
+  // We calulate the number of consecutive '\'s before the current position
+  // by iterating backwards through our string.
+  while (I >= First && *I == '\\') --I;
+  // (Position - 1 - I) now contains the number of '\'s before the current
+  // position. If it is odd, the character at 'Positon' was escaped.
+  return (Position - 1 - I) % 2 == 1;
+}
+
+// Parses a JSONString, assuming that the current position is on a quote.
+JSONString *JSONParser::parseString() {
+  assert(Position != End);
+  assert(!isWhitespace());
+  if (errorIfNotAt('"', "at start of string"))
+    return 0;
+  StringRef::iterator First = Position + 1;
+
+  // Benchmarking shows that this loop is the hot path of the application with
+  // about 2/3rd of the runtime cycles. Since escaped quotes are not the common
+  // case, and multiple escaped backslashes before escaped quotes are very rare,
+  // we pessimize this case to achieve a smaller inner loop in the common case.
+  // We're doing that by having a quick inner loop that just scans for the next
+  // quote. Once we find the quote we check the last character to see whether
+  // the quote might have been escaped. If the last character is not a '\', we
+  // know the quote was not escaped and have thus found the end of the string.
+  // If the immediately preceding character was a '\', we have to scan backwards
+  // to see whether the previous character was actually an escaped backslash, or
+  // an escape character for the quote. If we find that the current quote was
+  // escaped, we continue parsing for the next quote and repeat.
+  // This optimization brings around 30% performance improvements.
+  do {
+    // Step over the current quote.
+    ++Position;
+    // Find the next quote.
+    while (Position != End && *Position != '"')
+      ++Position;
+    if (errorIfAtEndOfFile("'\"' at end of string"))
+      return 0;
+    // Repeat until the previous character was not a '\' or was an escaped
+    // backslash.
+  } while (*(Position - 1) == '\\' && wasEscaped(First, Position));
+
+  return new (ValueAllocator.Allocate<JSONString>())
+      JSONString(StringRef(First, Position - First));
+}
+
+
+// Advances the position to the next non-whitespace position.
+void JSONParser::nextNonWhitespace() {
+  do {
+    ++Position;
+  } while (isWhitespace());
+}
+
+// Checks if there is a whitespace character at the current position.
+bool JSONParser::isWhitespace() {
+  return *Position == ' ' || *Position == '\t' ||
+         *Position == '\n' || *Position == '\r';
+}
+
+bool JSONParser::failed() const {
+  return Failed;
+}
+
+// Parses a JSONValue, assuming that the current position is at the first
+// character of the value.
+JSONValue *JSONParser::parseValue() {
+  assert(Position != End);
+  assert(!isWhitespace());
+  switch (*Position) {
+    case '[':
+      return new (ValueAllocator.Allocate<JSONArray>(1)) JSONArray(this);
+    case '{':
+      return new (ValueAllocator.Allocate<JSONObject>(1)) JSONObject(this);
+    case '"':
+      return parseString();
+    default:
+      setExpectedError("'[', '{' or '\"' at start of value", *Position);
+      return 0;
+  }
+}
+
+// Parses a JSONKeyValuePair, assuming that the current position is at the first
+// character of the key, value pair.
+JSONKeyValuePair *JSONParser::parseKeyValuePair() {
+  assert(Position != End);
+  assert(!isWhitespace());
+
+  JSONString *Key = parseString();
+  if (Key == 0)
+    return 0;
+
+  nextNonWhitespace();
+  if (errorIfNotAt(':', "between key and value"))
+    return 0;
+
+  nextNonWhitespace();
+  const JSONValue *Value = parseValue();
+  if (Value == 0)
+    return 0;
+
+  return new (ValueAllocator.Allocate<JSONKeyValuePair>(1))
+    JSONKeyValuePair(Key, Value);
+}
+
+/// \brief Parses the first element of a JSON array or object, or closes the
+/// array.
+///
+/// The method assumes that the current position is before the first character
+/// of the element, with possible white space in between. When successful, it
+/// returns the new position after parsing the element. Otherwise, if there is
+/// no next value, it returns a default constructed StringRef::iterator.
+StringRef::iterator JSONParser::parseFirstElement(JSONAtom::Kind ContainerKind,
+                                                  char StartChar, char EndChar,
+                                                  const JSONAtom *&Element) {
+  assert(*Position == StartChar);
+  Element = 0;
+  nextNonWhitespace();
+  if (errorIfAtEndOfFile("value or end of container at start of container"))
+    return StringRef::iterator();
+
+  if (*Position == EndChar)
+    return StringRef::iterator();
+
+  Element = parseElement(ContainerKind);
+  if (Element == 0)
+    return StringRef::iterator();
+
+  return Position;
+}
+
+/// \brief Parses the next element of a JSON array or object, or closes the
+/// array.
+///
+/// The method assumes that the current position is before the ',' which
+/// separates the next element from the current element. When successful, it
+/// returns the new position after parsing the element. Otherwise, if there is
+/// no next value, it returns a default constructed StringRef::iterator.
+StringRef::iterator JSONParser::parseNextElement(JSONAtom::Kind ContainerKind,
+                                                 char EndChar,
+                                                 const JSONAtom *&Element) {
+  Element = 0;
+  nextNonWhitespace();
+  if (errorIfAtEndOfFile("',' or end of container for next element"))
+    return 0;
+
+  if (*Position == ',') {
+    nextNonWhitespace();
+    if (errorIfAtEndOfFile("element in container"))
+      return StringRef::iterator();
+
+    Element = parseElement(ContainerKind);
+    if (Element == 0)
+      return StringRef::iterator();
+
+    return Position;
+  } else if (*Position == EndChar) {
+      return StringRef::iterator();
+  } else {
+    setExpectedError("',' or end of container for next element", *Position);
+    return StringRef::iterator();
+  }
+}
+
+const JSONAtom *JSONParser::parseElement(JSONAtom::Kind ContainerKind) {
+  switch (ContainerKind) {
+    case JSONAtom::JK_Array:
+      return parseValue();
+    case JSONAtom::JK_Object:
+      return parseKeyValuePair();
+    default:
+      llvm_unreachable("Impossible code path");
+  }
+}
+
+bool JSONParser::skipContainer(const JSONContainer &Container) {
+  for (JSONContainer::AtomIterator I = Container.atom_current(),
+                                   E = Container.atom_end();
+       I != E; ++I) {
+    assert(*I != 0);
+    if (!skip(**I))
+      return false;
+  }
+  return !failed();
+}
diff --git a/lib/Support/LLVMBuild.txt b/lib/Support/LLVMBuild.txt
new file mode 100644
index 000000000000..5b88be0203e5
--- /dev/null
+++ b/lib/Support/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./lib/Support/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Support
+parent = Libraries
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
new file mode 100644
index 000000000000..64404a1a8e77
--- /dev/null
+++ b/lib/Support/LockFileManager.cpp
@@ -0,0 +1,216 @@
+//===--- LockFileManager.cpp - File-level Locking Utility------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Support/LockFileManager.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include <fstream>
+#include <sys/types.h>
+#include <sys/stat.h>
+#if LLVM_ON_WIN32
+#include <windows.h>
+#endif
+#if LLVM_ON_UNIX
+#include <unistd.h>
+#endif
+using namespace llvm;
+
+/// \brief Attempt to read the lock file with the given name, if it exists.
+///
+/// \param LockFileName The name of the lock file to read.
+///
+/// \returns The process ID of the process that owns this lock file
+Optional<std::pair<std::string, int> >
+LockFileManager::readLockFile(StringRef LockFileName) {
+  // Check whether the lock file exists. If not, clearly there's nothing
+  // to read, so we just return.
+  bool Exists = false;
+  if (sys::fs::exists(LockFileName, Exists) || !Exists)
+    return Optional<std::pair<std::string, int> >();
+
+  // Read the owning host and PID out of the lock file. If it appears that the
+  // owning process is dead, the lock file is invalid.
+  int PID = 0;
+  std::string Hostname;
+  std::ifstream Input(LockFileName.str().c_str());
+  if (Input >> Hostname >> PID && PID > 0 &&
+      processStillExecuting(Hostname, PID))
+    return std::make_pair(Hostname, PID);
+
+  // Delete the lock file. It's invalid anyway.
+  bool Existed;
+  sys::fs::remove(LockFileName, Existed);
+  return Optional<std::pair<std::string, int> >();
+}
+
+bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) {
+#if LLVM_ON_UNIX
+  char MyHostname[256];
+  MyHostname[255] = 0;
+  MyHostname[0] = 0;
+  gethostname(MyHostname, 255);
+  // Check whether the process is dead. If so, we're done.
+  if (MyHostname == Hostname && getsid(PID) == -1 && errno == ESRCH)
+    return false;
+#endif
+
+  return true;
+}
+
+LockFileManager::LockFileManager(StringRef FileName)
+{
+  LockFileName = FileName;
+  LockFileName += ".lock";
+
+  // If the lock file already exists, don't bother to try to create our own
+  // lock file; it won't work anyway. Just figure out who owns this lock file.
+  if ((Owner = readLockFile(LockFileName)))
+    return;
+
+  // Create a lock file that is unique to this instance.
+  UniqueLockFileName = LockFileName;
+  UniqueLockFileName += "-%%%%%%%%";
+  int UniqueLockFileID;
+  if (error_code EC
+        = sys::fs::unique_file(UniqueLockFileName.str(),
+                                     UniqueLockFileID,
+                                     UniqueLockFileName,
+                                     /*makeAbsolute=*/false)) {
+    Error = EC;
+    return;
+  }
+
+  // Write our process ID to our unique lock file.
+  {
+    raw_fd_ostream Out(UniqueLockFileID, /*shouldClose=*/true);
+
+#if LLVM_ON_UNIX
+    // FIXME: move getpid() call into LLVM
+    char hostname[256];
+    hostname[255] = 0;
+    hostname[0] = 0;
+    gethostname(hostname, 255);
+    Out << hostname << ' ' << getpid();
+#else
+    Out << "localhost 1";
+#endif
+    Out.close();
+
+    if (Out.has_error()) {
+      // We failed to write out PID, so make up an excuse, remove the
+      // unique lock file, and fail.
+      Error = make_error_code(errc::no_space_on_device);
+      bool Existed;
+      sys::fs::remove(UniqueLockFileName.c_str(), Existed);
+      return;
+    }
+  }
+
+  // Create a hard link from the lock file name. If this succeeds, we're done.
+  error_code EC
+    = sys::fs::create_hard_link(UniqueLockFileName.str(),
+                                      LockFileName.str());
+  if (EC == errc::success)
+    return;
+
+  // Creating the hard link failed.
+
+#ifdef LLVM_ON_UNIX
+  // The creation of the hard link may appear to fail, but if stat'ing the
+  // unique file returns a link count of 2, then we can still declare success.
+  struct stat StatBuf;
+  if (stat(UniqueLockFileName.c_str(), &StatBuf) == 0 &&
+      StatBuf.st_nlink == 2)
+    return;
+#endif
+
+  // Someone else managed to create the lock file first. Wipe out our unique
+  // lock file (it's useless now) and read the process ID from the lock file.
+  bool Existed;
+  sys::fs::remove(UniqueLockFileName.str(), Existed);
+  if ((Owner = readLockFile(LockFileName)))
+    return;
+
+  // There is a lock file that nobody owns; try to clean it up and report
+  // an error.
+  sys::fs::remove(LockFileName.str(), Existed);
+  Error = EC;
+}
+
+LockFileManager::LockFileState LockFileManager::getState() const {
+  if (Owner)
+    return LFS_Shared;
+
+  if (Error)
+    return LFS_Error;
+
+  return LFS_Owned;
+}
+
+LockFileManager::~LockFileManager() {
+  if (getState() != LFS_Owned)
+    return;
+
+  // Since we own the lock, remove the lock file and our own unique lock file.
+  bool Existed;
+  sys::fs::remove(LockFileName.str(), Existed);
+  sys::fs::remove(UniqueLockFileName.str(), Existed);
+}
+
+void LockFileManager::waitForUnlock() {
+  if (getState() != LFS_Shared)
+    return;
+
+#if LLVM_ON_WIN32
+  unsigned long Interval = 1;
+#else
+  struct timespec Interval;
+  Interval.tv_sec = 0;
+  Interval.tv_nsec = 1000000;
+#endif
+  // Don't wait more than an hour for the file to appear.
+  const unsigned MaxSeconds = 3600;
+  do {
+    // Sleep for the designated interval, to allow the owning process time to
+    // finish up and remove the lock file.
+    // FIXME: Should we hook in to system APIs to get a notification when the
+    // lock file is deleted?
+#if LLVM_ON_WIN32
+    Sleep(Interval);
+#else
+    nanosleep(&Interval, NULL);
+#endif
+    // If the file no longer exists, we're done.
+    bool Exists = false;
+    if (!sys::fs::exists(LockFileName.str(), Exists) && !Exists)
+      return;
+
+    if (!processStillExecuting((*Owner).first, (*Owner).second))
+      return;
+
+    // Exponentially increase the time we wait for the lock to be removed.
+#if LLVM_ON_WIN32
+    Interval *= 2;
+#else
+    Interval.tv_sec *= 2;
+    Interval.tv_nsec *= 2;
+    if (Interval.tv_nsec >= 1000000000) {
+      ++Interval.tv_sec;
+      Interval.tv_nsec -= 1000000000;
+    }
+#endif
+  } while (
+#if LLVM_ON_WIN32
+           Interval < MaxSeconds * 1000
+#else
+           Interval.tv_sec < (time_t)MaxSeconds
+#endif
+           );
+
+  // Give up.
+}
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index c767c15e71c9..098cccb68df5 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -27,8 +27,15 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
     if (Ptr == 0) {
       void* tmp = Creator ? Creator() : 0;
 
+      TsanHappensBefore(this);
       sys::MemoryFence();
+
+      // This write is racy against the first read in the ManagedStatic
+      // accessors. The race is benign because it does a second read after a
+      // memory fence, at which point it isn't possible to get a partial value.
+      TsanIgnoreWritesBegin();
       Ptr = tmp;
+      TsanIgnoreWritesEnd();
       DeleterFn = Deleter;
       
       // Add to list of managed statics.
@@ -72,4 +79,3 @@ void llvm::llvm_shutdown() {
 
   if (llvm_is_multithreaded()) llvm_stop_multithreaded();
 }
-
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 0771af5fee07..16e5c7a9f72b 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Errno.h"
 #include "llvm/Support/Path.h"
@@ -29,15 +30,12 @@
 #include <sys/stat.h>
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
-#include <sys/uio.h>
 #else
 #include <io.h>
 #endif
 #include <fcntl.h>
 using namespace llvm;
 
-namespace { const llvm::error_code success; }
-
 //===----------------------------------------------------------------------===//
 // MemoryBuffer implementation itself.
 //===----------------------------------------------------------------------===//
@@ -306,7 +304,17 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
                                                       RealMapOffset)) {
       result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
           StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator));
-      return success;
+
+      if (RequiresNullTerminator && result->getBufferEnd()[0] != '\0') {
+        // There could be a racing issue that resulted in the file being larger
+        // than the FileSize passed by the caller. We already have an assertion
+        // for this in MemoryBuffer::init() but have a runtime guarantee that
+        // the buffer will be null-terminated here, so do a copy that adds a
+        // null-terminator.
+        result.reset(MemoryBuffer::getMemBufferCopy(result->getBuffer(),
+                                                    Filename));
+      }
+      return error_code::success();
     }
   }
 
@@ -321,29 +329,35 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
   char *BufPtr = const_cast<char*>(SB->getBufferStart());
 
   size_t BytesLeft = MapSize;
+#ifndef HAVE_PREAD
   if (lseek(FD, Offset, SEEK_SET) == -1)
     return error_code(errno, posix_category());
+#endif
 
   while (BytesLeft) {
+#ifdef HAVE_PREAD
+    ssize_t NumRead = ::pread(FD, BufPtr, BytesLeft, MapSize-BytesLeft+Offset);
+#else
     ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
+#endif
     if (NumRead == -1) {
       if (errno == EINTR)
         continue;
       // Error while reading.
       return error_code(errno, posix_category());
-    } else if (NumRead == 0) {
-      // We hit EOF early, truncate and terminate buffer.
-      Buf->BufferEnd = BufPtr;
-      *BufPtr = 0;
-      result.swap(SB);
-      return success;
+    }
+    if (NumRead == 0) {
+      assert(0 && "We got inaccurate FileSize value or fstat reported an "
+                   "invalid file size.");
+      *BufPtr = '\0'; // null-terminate at the actual size.
+      break;
     }
     BytesLeft -= NumRead;
     BufPtr += NumRead;
   }
 
   result.swap(SB);
-  return success;
+  return error_code::success();
 }
 
 //===----------------------------------------------------------------------===//
@@ -372,5 +386,5 @@ error_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &result) {
   } while (ReadBytes != 0);
 
   result.reset(getMemBufferCopy(Buffer, "<stdin>"));
-  return success;
+  return error_code::success();
 }
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index 8874e943f4c2..da5baab4be46 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -19,7 +19,7 @@
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
 // Define all methods as no-ops if threading is explicitly disabled
 namespace llvm {
 using namespace sys;
@@ -40,109 +40,80 @@ bool MutexImpl::tryacquire() { return true; }
 namespace llvm {
 using namespace sys;
 
-
-// This variable is useful for situations where the pthread library has been
-// compiled with weak linkage for its interface symbols. This allows the
-// threading support to be turned off by simply not linking against -lpthread.
-// In that situation, the value of pthread_mutex_init will be 0 and
-// consequently pthread_enabled will be false. In such situations, all the
-// pthread operations become no-ops and the functions all return false. If
-// pthread_mutex_init does have an address, then mutex support is enabled.
-// Note: all LLVM tools will link against -lpthread if its available since it
-//       is configured into the LIBS variable.
-// Note: this line of code generates a warning if pthread_mutex_init is not
-//       declared with weak linkage. It's safe to ignore the warning.
-static const bool pthread_enabled = true;
-
 // Construct a Mutex using pthread calls
 MutexImpl::MutexImpl( bool recursive)
   : data_(0)
 {
-  if (pthread_enabled)
-  {
-    // Declare the pthread_mutex data structures
-    pthread_mutex_t* mutex =
-      static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
-    pthread_mutexattr_t attr;
-
-    // Initialize the mutex attributes
-    int errorcode = pthread_mutexattr_init(&attr);
-    assert(errorcode == 0);
-
-    // Initialize the mutex as a recursive mutex, if requested, or normal
-    // otherwise.
-    int kind = ( recursive  ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
-    errorcode = pthread_mutexattr_settype(&attr, kind);
-    assert(errorcode == 0);
+  // Declare the pthread_mutex data structures
+  pthread_mutex_t* mutex =
+    static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+  pthread_mutexattr_t attr;
+
+  // Initialize the mutex attributes
+  int errorcode = pthread_mutexattr_init(&attr);
+  assert(errorcode == 0); (void)errorcode;
+
+  // Initialize the mutex as a recursive mutex, if requested, or normal
+  // otherwise.
+  int kind = ( recursive  ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
+  errorcode = pthread_mutexattr_settype(&attr, kind);
+  assert(errorcode == 0);
 
 #if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
-    // Make it a process local mutex
-    errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
-    assert(errorcode == 0);
+  // Make it a process local mutex
+  errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+  assert(errorcode == 0);
 #endif
 
-    // Initialize the mutex
-    errorcode = pthread_mutex_init(mutex, &attr);
-    assert(errorcode == 0);
+  // Initialize the mutex
+  errorcode = pthread_mutex_init(mutex, &attr);
+  assert(errorcode == 0);
 
-    // Destroy the attributes
-    errorcode = pthread_mutexattr_destroy(&attr);
-    assert(errorcode == 0);
+  // Destroy the attributes
+  errorcode = pthread_mutexattr_destroy(&attr);
+  assert(errorcode == 0);
 
-    // Assign the data member
-    data_ = mutex;
-  }
+  // Assign the data member
+  data_ = mutex;
 }
 
 // Destruct a Mutex
 MutexImpl::~MutexImpl()
 {
-  if (pthread_enabled)
-  {
-    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
-    assert(mutex != 0);
-    pthread_mutex_destroy(mutex);
-    free(mutex);
-  }
+  pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+  assert(mutex != 0);
+  pthread_mutex_destroy(mutex);
+  free(mutex);
 }
 
 bool
 MutexImpl::acquire()
 {
-  if (pthread_enabled)
-  {
-    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
-    assert(mutex != 0);
-
-    int errorcode = pthread_mutex_lock(mutex);
-    return errorcode == 0;
-  } else return false;
+  pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+  assert(mutex != 0);
+
+  int errorcode = pthread_mutex_lock(mutex);
+  return errorcode == 0;
 }
 
 bool
 MutexImpl::release()
 {
-  if (pthread_enabled)
-  {
-    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
-    assert(mutex != 0);
-
-    int errorcode = pthread_mutex_unlock(mutex);
-    return errorcode == 0;
-  } else return false;
+  pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+  assert(mutex != 0);
+
+  int errorcode = pthread_mutex_unlock(mutex);
+  return errorcode == 0;
 }
 
 bool
 MutexImpl::tryacquire()
 {
-  if (pthread_enabled)
-  {
-    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
-    assert(mutex != 0);
-
-    int errorcode = pthread_mutex_trylock(mutex);
-    return errorcode == 0;
-  } else return false;
+  pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+  assert(mutex != 0);
+
+  int errorcode = pthread_mutex_trylock(mutex);
+  return errorcode == 0;
 }
 
 }
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index e5b7cd3bfbc2..dcddeda977d1 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -38,16 +38,6 @@ bool Path::operator<(const Path& that) const {
   return path < that.path;
 }
 
-Path
-Path::GetLLVMConfigDir() {
-  Path result;
-#ifdef LLVM_ETCDIR
-  if (result.set(LLVM_ETCDIR))
-    return result;
-#endif
-  return GetLLVMDefaultConfigDir();
-}
-
 LLVMFileType
 sys::IdentifyFileType(const char *magic, unsigned length) {
   assert(magic && "Invalid magic number string");
@@ -100,7 +90,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
     case 0xCF: {
       uint16_t type = 0;
       if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
-          magic[2] == char(0xFA) && 
+          magic[2] == char(0xFA) &&
           (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
         /* Native endian */
         if (length >= 16) type = magic[14] << 8 | magic[15];
@@ -162,31 +152,31 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
 
 bool
 Path::isArchive() const {
-  LLVMFileType type;
+  fs::file_magic type;
   if (fs::identify_magic(str(), type))
     return false;
-  return type == Archive_FileType;
+  return type == fs::file_magic::archive;
 }
 
 bool
 Path::isDynamicLibrary() const {
-  LLVMFileType type;
+  fs::file_magic type;
   if (fs::identify_magic(str(), type))
     return false;
   switch (type) {
     default: return false;
-    case Mach_O_FixedVirtualMemorySharedLib_FileType:
-    case Mach_O_DynamicallyLinkedSharedLib_FileType:
-    case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
-    case ELF_SharedObject_FileType:
-    case COFF_FileType:  return true;
+    case fs::file_magic::macho_fixed_virtual_memory_shared_lib:
+    case fs::file_magic::macho_dynamically_linked_shared_lib:
+    case fs::file_magic::macho_dynamically_linked_shared_lib_stub:
+    case fs::file_magic::elf_shared_object:
+    case fs::file_magic::pecoff_executable:  return true;
   }
 }
 
 bool
 Path::isObjectFile() const {
-  LLVMFileType type;
-  if (fs::identify_magic(str(), type) || type == Unknown_FileType)
+  fs::file_magic type;
+  if (fs::identify_magic(str(), type) || type == fs::file_magic::unknown)
     return false;
   return true;
 }
@@ -222,10 +212,10 @@ Path::appendSuffix(StringRef suffix) {
 
 bool
 Path::isBitcodeFile() const {
-  LLVMFileType type;
+  fs::file_magic type;
   if (fs::identify_magic(str(), type))
     return false;
-  return type == Bitcode_FileType;
+  return type == fs::file_magic::bitcode;
 }
 
 bool Path::hasMagicNumber(StringRef Magic) const {
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index bebe442e2478..e2a69a650db8 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Support/PathV2.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cctype>
 #include <cstdio>
@@ -23,15 +24,13 @@ namespace {
   using llvm::sys::path::is_separator;
 
 #ifdef LLVM_ON_WIN32
-  const StringRef separators = "\\/";
-  const char      prefered_separator = '\\';
+  const char *separators = "\\/";
+  const char  prefered_separator = '\\';
 #else
-  const StringRef separators = "/";
-  const char      prefered_separator = '/';
+  const char  separators = '/';
+  const char  prefered_separator = '/';
 #endif
 
-  const llvm::error_code success;
-
   StringRef find_first_component(StringRef path) {
     // Look for this first component in the following order.
     // * empty (in this case we return an empty string)
@@ -347,7 +346,7 @@ const StringRef root_directory(StringRef path) {
 
 const StringRef relative_path(StringRef path) {
   StringRef root = root_path(path);
-  return root.substr(root.size());
+  return path.substr(root.size());
 }
 
 void append(SmallVectorImpl<char> &path, const Twine &a,
@@ -492,7 +491,7 @@ bool is_separator(char value) {
 
 void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
   result.clear();
-  
+
   // Check whether the temporary directory is specified by an environment
   // variable.
   const char *EnvironmentVariable;
@@ -505,7 +504,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
     result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
     return;
   }
-    
+
   // Fall back to a system default.
   const char *DefaultResult;
 #ifdef LLVM_ON_WIN32
@@ -519,7 +518,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
 #endif
   result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
 }
-  
+
 bool has_root_name(const Twine &path) {
   SmallString<128> path_storage;
   StringRef p = path.toStringRef(path_storage);
@@ -601,12 +600,16 @@ namespace fs {
 error_code make_absolute(SmallVectorImpl<char> &path) {
   StringRef p(path.data(), path.size());
 
-  bool rootName      = path::has_root_name(p),
-       rootDirectory = path::has_root_directory(p);
+  bool rootDirectory = path::has_root_directory(p),
+#ifdef LLVM_ON_WIN32
+       rootName = path::has_root_name(p);
+#else
+       rootName = true;
+#endif
 
   // Already absolute.
   if (rootName && rootDirectory)
-    return success;
+    return error_code::success();
 
   // All of the following conditions will need the current directory.
   SmallString<128> current_dir;
@@ -618,7 +621,7 @@ error_code make_absolute(SmallVectorImpl<char> &path) {
     path::append(current_dir, p);
     // Set path to the result.
     path.swap(current_dir);
-    return success;
+    return error_code::success();
   }
 
   if (!rootName && rootDirectory) {
@@ -627,7 +630,7 @@ error_code make_absolute(SmallVectorImpl<char> &path) {
     path::append(curDirRootName, p);
     // Set path to the result.
     path.swap(curDirRootName);
-    return success;
+    return error_code::success();
   }
 
   if (rootName && !rootDirectory) {
@@ -639,7 +642,7 @@ error_code make_absolute(SmallVectorImpl<char> &path) {
     SmallString<128> res;
     path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
     path.swap(res);
-    return success;
+    return error_code::success();
   }
 
   llvm_unreachable("All rootName and rootDirectory combinations should have "
@@ -651,12 +654,13 @@ error_code create_directories(const Twine &path, bool &existed) {
   StringRef p = path.toStringRef(path_storage);
 
   StringRef parent = path::parent_path(p);
-  bool parent_exists;
+  if (!parent.empty()) {
+    bool parent_exists;
+    if (error_code ec = fs::exists(parent, parent_exists)) return ec;
 
-  if (error_code ec = fs::exists(parent, parent_exists)) return ec;
-
-  if (!parent_exists)
-    if (error_code ec = create_directories(parent, existed)) return ec;
+    if (!parent_exists)
+      if (error_code ec = create_directories(parent, existed)) return ec;
+  }
 
   return create_directory(p, existed);
 }
@@ -678,7 +682,7 @@ error_code is_directory(const Twine &path, bool &result) {
   if (error_code ec = status(path, st))
     return ec;
   result = is_directory(st);
-  return success;
+  return error_code::success();
 }
 
 bool is_regular_file(file_status status) {
@@ -690,7 +694,7 @@ error_code is_regular_file(const Twine &path, bool &result) {
   if (error_code ec = status(path, st))
     return ec;
   result = is_regular_file(st);
-  return success;
+  return error_code::success();
 }
 
 bool is_symlink(file_status status) {
@@ -702,7 +706,7 @@ error_code is_symlink(const Twine &path, bool &result) {
   if (error_code ec = status(path, st))
     return ec;
   result = is_symlink(st);
-  return success;
+  return error_code::success();
 }
 
 bool is_other(file_status status) {
@@ -729,23 +733,134 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
     if (ec == errc::value_too_large) {
       // Magic.size() > file_size(Path).
       result = false;
-      return success;
+      return error_code::success();
     }
     return ec;
   }
 
   result = Magic == Buffer;
-  return success;
+  return error_code::success();
+}
+
+/// @brief Identify the magic in magic.
+file_magic identify_magic(StringRef magic) {
+  switch ((unsigned char)magic[0]) {
+    case 0xDE:  // 0x0B17C0DE = BC wraper
+      if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+          magic[3] == (char)0x0B)
+        return file_magic::bitcode;
+      break;
+    case 'B':
+      if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+        return file_magic::bitcode;
+      break;
+    case '!':
+      if (magic.size() >= 8)
+        if (memcmp(magic.data(),"!<arch>\n",8) == 0)
+          return file_magic::archive;
+      break;
+
+    case '\177':
+      if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+        if (magic.size() >= 18 && magic[17] == 0)
+          switch (magic[16]) {
+            default: break;
+            case 1: return file_magic::elf_relocatable;
+            case 2: return file_magic::elf_executable;
+            case 3: return file_magic::elf_shared_object;
+            case 4: return file_magic::elf_core;
+          }
+      }
+      break;
+
+    case 0xCA:
+      if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+          magic[3] == char(0xBE)) {
+        // This is complicated by an overlap with Java class files.
+        // See the Mach-O section in /usr/share/file/magic for details.
+        if (magic.size() >= 8 && magic[7] < 43)
+          // FIXME: Universal Binary of any type.
+          return file_magic::macho_dynamically_linked_shared_lib;
+      }
+      break;
+
+      // The two magic numbers for mach-o are:
+      // 0xfeedface - 32-bit mach-o
+      // 0xfeedfacf - 64-bit mach-o
+    case 0xFE:
+    case 0xCE:
+    case 0xCF: {
+      uint16_t type = 0;
+      if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+          magic[2] == char(0xFA) &&
+          (magic[3] == char(0xCE) || magic[3] == char(0xCF))) {
+        /* Native endian */
+        if (magic.size() >= 16) type = magic[14] << 8 | magic[15];
+      } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) &&
+                 magic[1] == char(0xFA) && magic[2] == char(0xED) &&
+                 magic[3] == char(0xFE)) {
+        /* Reverse endian */
+        if (magic.size() >= 14) type = magic[13] << 8 | magic[12];
+      }
+      switch (type) {
+        default: break;
+        case 1: return file_magic::macho_object;
+        case 2: return file_magic::macho_executable;
+        case 3: return file_magic::macho_fixed_virtual_memory_shared_lib;
+        case 4: return file_magic::macho_core;
+        case 5: return file_magic::macho_preload_executabl;
+        case 6: return file_magic::macho_dynamically_linked_shared_lib;
+        case 7: return file_magic::macho_dynamic_linker;
+        case 8: return file_magic::macho_bundle;
+        case 9: return file_magic::macho_dynamic_linker;
+        case 10: return file_magic::macho_dsym_companion;
+      }
+      break;
+    }
+    case 0xF0: // PowerPC Windows
+    case 0x83: // Alpha 32-bit
+    case 0x84: // Alpha 64-bit
+    case 0x66: // MPS R4000 Windows
+    case 0x50: // mc68K
+    case 0x4c: // 80386 Windows
+      if (magic[1] == 0x01)
+        return file_magic::coff_object;
+
+    case 0x90: // PA-RISC Windows
+    case 0x68: // mc68K Windows
+      if (magic[1] == 0x02)
+        return file_magic::coff_object;
+      break;
+
+    case 0x4d: // Possible MS-DOS stub on Windows PE file
+      if (magic[1] == 0x5a) {
+        uint32_t off =
+          *reinterpret_cast<const support::ulittle32_t*>(magic.data() + 0x3c);
+        // PE/COFF file, either EXE or DLL.
+        if (off < magic.size() && memcmp(magic.data() + off, "PE\0\0",4) == 0)
+          return file_magic::pecoff_executable;
+      }
+      break;
+
+    case 0x64: // x86-64 Windows.
+      if (magic[1] == char(0x86))
+        return file_magic::coff_object;
+      break;
+
+    default:
+      break;
+  }
+  return file_magic::unknown;
 }
 
-error_code identify_magic(const Twine &path, LLVMFileType &result) {
+error_code identify_magic(const Twine &path, file_magic &result) {
   SmallString<32> Magic;
   error_code ec = get_magic(path, Magic.capacity(), Magic);
   if (ec && ec != errc::value_too_large)
     return ec;
 
-  result = IdentifyFileType(Magic.data(), Magic.size());
-  return success;
+  result = identify_magic(Magic);
+  return error_code::success();
 }
 
 namespace {
@@ -753,7 +868,9 @@ error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
   if (ft == file_type::directory_file) {
     // This code would be a lot better with exceptions ;/.
     error_code ec;
-    for (directory_iterator i(path, ec), e; i != e; i.increment(ec)) {
+    directory_iterator i(path, ec);
+    if (ec) return ec;
+    for (directory_iterator e; i != e; i.increment(ec)) {
       if (ec) return ec;
       file_status st;
       if (error_code ec = i->status(st)) return ec;
@@ -770,7 +887,7 @@ error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
     ++count;
   }
 
-  return success;
+  return error_code::success();
 }
 } // end unnamed namespace
 
diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp
index 01860b082d62..75bc282d9bd4 100644
--- a/lib/Support/Program.cpp
+++ b/lib/Support/Program.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Support/Program.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/system_error.h"
 using namespace llvm;
 using namespace sys;
 
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index d0b1e10b56fb..6a34f2d08524 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -20,7 +20,7 @@
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
 // Define all methods as no-ops if threading is explicitly disabled
 namespace llvm {
 using namespace sys;
@@ -42,107 +42,75 @@ bool RWMutexImpl::writer_release() { return true; }
 namespace llvm {
 using namespace sys;
 
-
-// This variable is useful for situations where the pthread library has been
-// compiled with weak linkage for its interface symbols. This allows the
-// threading support to be turned off by simply not linking against -lpthread.
-// In that situation, the value of pthread_mutex_init will be 0 and
-// consequently pthread_enabled will be false. In such situations, all the
-// pthread operations become no-ops and the functions all return false. If
-// pthread_rwlock_init does have an address, then rwlock support is enabled.
-// Note: all LLVM tools will link against -lpthread if its available since it
-//       is configured into the LIBS variable.
-// Note: this line of code generates a warning if pthread_rwlock_init is not
-//       declared with weak linkage. It's safe to ignore the warning.
-static const bool pthread_enabled = true;
-
 // Construct a RWMutex using pthread calls
 RWMutexImpl::RWMutexImpl()
   : data_(0)
 {
-  if (pthread_enabled)
-  {
-    // Declare the pthread_rwlock data structures
-    pthread_rwlock_t* rwlock =
-      static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
+  // Declare the pthread_rwlock data structures
+  pthread_rwlock_t* rwlock =
+    static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
 
 #ifdef __APPLE__
-    // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
-    bzero(rwlock, sizeof(pthread_rwlock_t));
+  // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
+  bzero(rwlock, sizeof(pthread_rwlock_t));
 #endif
 
-    // Initialize the rwlock
-    int errorcode = pthread_rwlock_init(rwlock, NULL);
-    (void)errorcode;
-    assert(errorcode == 0);
+  // Initialize the rwlock
+  int errorcode = pthread_rwlock_init(rwlock, NULL);
+  (void)errorcode;
+  assert(errorcode == 0);
 
-    // Assign the data member
-    data_ = rwlock;
-  }
+  // Assign the data member
+  data_ = rwlock;
 }
 
 // Destruct a RWMutex
 RWMutexImpl::~RWMutexImpl()
 {
-  if (pthread_enabled)
-  {
-    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
-    assert(rwlock != 0);
-    pthread_rwlock_destroy(rwlock);
-    free(rwlock);
-  }
+  pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+  assert(rwlock != 0);
+  pthread_rwlock_destroy(rwlock);
+  free(rwlock);
 }
 
 bool
 RWMutexImpl::reader_acquire()
 {
-  if (pthread_enabled)
-  {
-    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
-    assert(rwlock != 0);
-
-    int errorcode = pthread_rwlock_rdlock(rwlock);
-    return errorcode == 0;
-  } else return false;
+  pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+  assert(rwlock != 0);
+
+  int errorcode = pthread_rwlock_rdlock(rwlock);
+  return errorcode == 0;
 }
 
 bool
 RWMutexImpl::reader_release()
 {
-  if (pthread_enabled)
-  {
-    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
-    assert(rwlock != 0);
-
-    int errorcode = pthread_rwlock_unlock(rwlock);
-    return errorcode == 0;
-  } else return false;
+  pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+  assert(rwlock != 0);
+
+  int errorcode = pthread_rwlock_unlock(rwlock);
+  return errorcode == 0;
 }
 
 bool
 RWMutexImpl::writer_acquire()
 {
-  if (pthread_enabled)
-  {
-    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
-    assert(rwlock != 0);
-
-    int errorcode = pthread_rwlock_wrlock(rwlock);
-    return errorcode == 0;
-  } else return false;
+  pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+  assert(rwlock != 0);
+
+  int errorcode = pthread_rwlock_wrlock(rwlock);
+  return errorcode == 0;
 }
 
 bool
 RWMutexImpl::writer_release()
 {
-  if (pthread_enabled)
-  {
-    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
-    assert(rwlock != 0);
-
-    int errorcode = pthread_rwlock_unlock(rwlock);
-    return errorcode == 0;
-  } else return false;
+  pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+  assert(rwlock != 0);
+
+  int errorcode = pthread_rwlock_unlock(rwlock);
+  return errorcode == 0;
 }
 
 }
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index 997ce0b74cd2..68d9c29411f0 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/MathExtras.h"
+#include <algorithm>
 #include <cstdlib>
 
 using namespace llvm;
@@ -223,6 +224,56 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
   NumTombstones = RHS.NumTombstones;
 }
 
+void SmallPtrSetImpl::swap(SmallPtrSetImpl &RHS) {
+  if (this == &RHS) return;
+
+  // We can only avoid copying elements if neither set is small.
+  if (!this->isSmall() && !RHS.isSmall()) {
+    std::swap(this->CurArray, RHS.CurArray);
+    std::swap(this->CurArraySize, RHS.CurArraySize);
+    std::swap(this->NumElements, RHS.NumElements);
+    std::swap(this->NumTombstones, RHS.NumTombstones);
+    return;
+  }
+
+  // FIXME: From here on we assume that both sets have the same small size.
+
+  // If only RHS is small, copy the small elements into LHS and move the pointer
+  // from LHS to RHS.
+  if (!this->isSmall() && RHS.isSmall()) {
+    std::copy(RHS.SmallArray, RHS.SmallArray+RHS.CurArraySize,
+              this->SmallArray);
+    std::swap(this->NumElements, RHS.NumElements);
+    std::swap(this->CurArraySize, RHS.CurArraySize);
+    RHS.CurArray = this->CurArray;
+    RHS.NumTombstones = this->NumTombstones;
+    this->CurArray = this->SmallArray;
+    this->NumTombstones = 0;
+    return;
+  }
+
+  // If only LHS is small, copy the small elements into RHS and move the pointer
+  // from RHS to LHS.
+  if (this->isSmall() && !RHS.isSmall()) {
+    std::copy(this->SmallArray, this->SmallArray+this->CurArraySize,
+              RHS.SmallArray);
+    std::swap(RHS.NumElements, this->NumElements);
+    std::swap(RHS.CurArraySize, this->CurArraySize);
+    this->CurArray = RHS.CurArray;
+    this->NumTombstones = RHS.NumTombstones;
+    RHS.CurArray = RHS.SmallArray;
+    RHS.NumTombstones = 0;
+    return;
+  }
+
+  // Both a small, just swap the small elements.
+  assert(this->isSmall() && RHS.isSmall());
+  assert(this->CurArraySize == RHS.CurArraySize);
+  std::swap_ranges(this->SmallArray, this->SmallArray+this->CurArraySize,
+                   RHS.SmallArray);
+  std::swap(this->NumElements, RHS.NumElements);
+}
+
 SmallPtrSetImpl::~SmallPtrSetImpl() {
   if (!isSmall())
     free(CurArray);
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index de042a9f53c8..bbe36b260b9d 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -140,8 +140,9 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
 ///
 /// @param Type - If non-null, the kind of message (e.g., "error") which is
 /// prefixed to the message.
-SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
-                                   const char *Type, bool ShowLine) const {
+SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
+                                   const Twine &Msg,
+                                   ArrayRef<SMRange> Ranges) const {
 
   // First thing to do: find the current buffer containing the specified
   // location.
@@ -156,33 +157,48 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
          LineStart[-1] != '\n' && LineStart[-1] != '\r')
     --LineStart;
 
-  std::string LineStr;
-  if (ShowLine) {
-    // Get the end of the line.
-    const char *LineEnd = Loc.getPointer();
-    while (LineEnd != CurMB->getBufferEnd() &&
-           LineEnd[0] != '\n' && LineEnd[0] != '\r')
-      ++LineEnd;
-    LineStr = std::string(LineStart, LineEnd);
-  }
-
-  std::string PrintedMsg;
-  raw_string_ostream OS(PrintedMsg);
-  if (Type)
-    OS << Type << ": ";
-  OS << Msg;
+  // Get the end of the line.
+  const char *LineEnd = Loc.getPointer();
+  while (LineEnd != CurMB->getBufferEnd() &&
+         LineEnd[0] != '\n' && LineEnd[0] != '\r')
+    ++LineEnd;
+  std::string LineStr(LineStart, LineEnd);
 
+  // Convert any ranges to column ranges that only intersect the line of the
+  // location.
+  SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
+  for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
+    SMRange R = Ranges[i];
+    if (!R.isValid()) continue;
+    
+    // If the line doesn't contain any part of the range, then ignore it.
+    if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
+      continue;
+   
+    // Ignore pieces of the range that go onto other lines.
+    if (R.Start.getPointer() < LineStart)
+      R.Start = SMLoc::getFromPointer(LineStart);
+    if (R.End.getPointer() > LineEnd)
+      R.End = SMLoc::getFromPointer(LineEnd);
+    
+    // Translate from SMLoc ranges to column ranges.
+    ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
+                                       R.End.getPointer()-LineStart));
+  }
+  
   return SMDiagnostic(*this, Loc,
                       CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
-                      Loc.getPointer()-LineStart, OS.str(),
-                      LineStr, ShowLine);
+                      Loc.getPointer()-LineStart, Kind, Msg.str(),
+                      LineStr, ColRanges);
 }
 
-void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
-                             const char *Type, bool ShowLine) const {
+void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
+                             const Twine &Msg, ArrayRef<SMRange> Ranges) const {
+  SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges);
+  
   // Report the message with the diagnostic handler if present.
   if (DiagHandler) {
-    DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), DiagContext);
+    DiagHandler(Diagnostic, DiagContext);
     return;
   }
 
@@ -192,14 +208,24 @@ void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
   assert(CurBuf != -1 && "Invalid or unspecified location!");
   PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
 
-  GetMessage(Loc, Msg, Type, ShowLine).Print(0, OS);
+  Diagnostic.print(0, OS);
 }
 
 //===----------------------------------------------------------------------===//
 // SMDiagnostic Implementation
 //===----------------------------------------------------------------------===//
 
-void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const {
+SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
+                           int Line, int Col, SourceMgr::DiagKind Kind,
+                           const std::string &Msg,
+                           const std::string &LineStr,
+                           ArrayRef<std::pair<unsigned,unsigned> > Ranges)
+  : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
+    Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()) {
+}
+
+
+void SMDiagnostic::print(const char *ProgName, raw_ostream &S) const {
   if (ProgName && ProgName[0])
     S << ProgName << ": ";
 
@@ -217,16 +243,71 @@ void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const {
     S << ": ";
   }
 
+  switch (Kind) {
+  case SourceMgr::DK_Error: S << "error: "; break;
+  case SourceMgr::DK_Warning: S << "warning: "; break;
+  case SourceMgr::DK_Note: S << "note: "; break;
+  }
+  
   S << Message << '\n';
 
-  if (LineNo != -1 && ColumnNo != -1 && ShowLine) {
-    S << LineContents << '\n';
+  if (LineNo == -1 || ColumnNo == -1)
+    return;
 
-    // Print out spaces/tabs before the caret.
-    for (unsigned i = 0; i != unsigned(ColumnNo); ++i)
-      S << (LineContents[i] == '\t' ? '\t' : ' ');
-    S << "^\n";
+  // Build the line with the caret and ranges.
+  std::string CaretLine(LineContents.size()+1, ' ');
+  
+  // Expand any ranges.
+  for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
+    std::pair<unsigned, unsigned> R = Ranges[r];
+    for (unsigned i = R.first,
+         e = std::min(R.second, (unsigned)LineContents.size())+1; i != e; ++i)
+      CaretLine[i] = '~';
+  }
+    
+  // Finally, plop on the caret.
+  if (unsigned(ColumnNo) <= LineContents.size())
+    CaretLine[ColumnNo] = '^';
+  else 
+    CaretLine[LineContents.size()] = '^';
+  
+  // ... and remove trailing whitespace so the output doesn't wrap for it.  We
+  // know that the line isn't completely empty because it has the caret in it at
+  // least.
+  CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
+  
+  // Print out the source line one character at a time, so we can expand tabs.
+  for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
+    if (LineContents[i] != '\t') {
+      S << LineContents[i];
+      ++OutCol;
+      continue;
+    }
+    
+    // If we have a tab, emit at least one space, then round up to 8 columns.
+    do {
+      S << ' ';
+      ++OutCol;
+    } while (OutCol & 7);
+  }
+  S << '\n';
+
+  // Print out the caret line, matching tabs in the source line.
+  for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
+    if (i >= LineContents.size() || LineContents[i] != '\t') {
+      S << CaretLine[i];
+      ++OutCol;
+      continue;
+    }
+    
+    // Okay, we have a tab.  Insert the appropriate number of characters.
+    do {
+      S << CaretLine[i];
+      ++OutCol;
+    } while (OutCol & 7);
   }
+  
+  S << '\n';
 }
 
 
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 1e733d92e610..d8a6ad35ba9c 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
@@ -72,9 +73,12 @@ void Statistic::RegisterStatistic() {
     if (Enabled)
       StatInfo->addStatistic(this);
 
+    TsanHappensBefore(this);
     sys::MemoryFence();
     // Remember we have been registered.
+    TsanIgnoreWritesBegin();
     Initialized = true;
+    TsanIgnoreWritesEnd();
   }
 }
 
@@ -126,13 +130,11 @@ void llvm::PrintStatistics(raw_ostream &OS) {
      << "===" << std::string(73, '-') << "===\n\n";
 
   // Print all of the statistics.
-  for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
-    std::string CountStr = utostr(Stats.Stats[i]->getValue());
-    OS << std::string(MaxValLen-CountStr.size(), ' ')
-       << CountStr << " " << Stats.Stats[i]->getName()
-       << std::string(MaxNameLen-std::strlen(Stats.Stats[i]->getName()), ' ')
-       << " - " << Stats.Stats[i]->getDesc() << "\n";
-  }
+  for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i)
+    OS << format("%*u %-*s - %s\n",
+                 MaxValLen, Stats.Stats[i]->getValue(),
+                 MaxNameLen, Stats.Stats[i]->getName(),
+                 Stats.Stats[i]->getDesc());
 
   OS << '\n';  // Flush the output stream.
   OS.flush();
diff --git a/lib/Support/StreamableMemoryObject.cpp b/lib/Support/StreamableMemoryObject.cpp
new file mode 100644
index 000000000000..c23f07b8fc3c
--- /dev/null
+++ b/lib/Support/StreamableMemoryObject.cpp
@@ -0,0 +1,140 @@
+//===- StreamableMemoryObject.cpp - Streamable data interface -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StreamableMemoryObject.h"
+#include <cassert>
+#include <cstring>
+
+
+using namespace llvm;
+
+namespace {
+
+class RawMemoryObject : public StreamableMemoryObject {
+public:
+  RawMemoryObject(const unsigned char *Start, const unsigned char *End) :
+    FirstChar(Start), LastChar(End) {
+    assert(LastChar > FirstChar && "Invalid start/end range");
+  }
+
+  virtual uint64_t getBase() const { return 0; }
+  virtual uint64_t getExtent() const { return LastChar - FirstChar; }
+  virtual int readByte(uint64_t address, uint8_t* ptr) const;
+  virtual int readBytes(uint64_t address,
+                        uint64_t size,
+                        uint8_t* buf,
+                        uint64_t* copied) const;
+  virtual const uint8_t *getPointer(uint64_t address, uint64_t size) const;
+  virtual bool isValidAddress(uint64_t address) const {
+    return validAddress(address);
+  }
+  virtual bool isObjectEnd(uint64_t address) const {return objectEnd(address);}
+
+private:
+  const uint8_t* const FirstChar;
+  const uint8_t* const LastChar;
+
+  // These are implemented as inline functions here to avoid multiple virtual
+  // calls per public function
+  bool validAddress(uint64_t address) const {
+    return static_cast<ptrdiff_t>(address) < LastChar - FirstChar;
+  }
+  bool objectEnd(uint64_t address) const {
+    return static_cast<ptrdiff_t>(address) == LastChar - FirstChar;
+  }
+
+  RawMemoryObject(const RawMemoryObject&);  // DO NOT IMPLEMENT
+  void operator=(const RawMemoryObject&);  // DO NOT IMPLEMENT
+};
+
+int RawMemoryObject::readByte(uint64_t address, uint8_t* ptr) const {
+  if (!validAddress(address)) return -1;
+  *ptr = *((uint8_t *)(uintptr_t)(address + FirstChar));
+  return 0;
+}
+
+int RawMemoryObject::readBytes(uint64_t address,
+                               uint64_t size,
+                               uint8_t* buf,
+                               uint64_t* copied) const {
+  if (!validAddress(address) || !validAddress(address + size - 1)) return -1;
+  memcpy(buf, (uint8_t *)(uintptr_t)(address + FirstChar), size);
+  if (copied) *copied = size;
+  return size;
+}
+
+const uint8_t *RawMemoryObject::getPointer(uint64_t address,
+                                           uint64_t size) const {
+  return FirstChar + address;
+}
+} // anonymous namespace
+
+namespace llvm {
+// If the bitcode has a header, then its size is known, and we don't have to
+// block until we actually want to read it.
+bool StreamingMemoryObject::isValidAddress(uint64_t address) const {
+  if (ObjectSize && address < ObjectSize) return true;
+    return fetchToPos(address);
+}
+
+bool StreamingMemoryObject::isObjectEnd(uint64_t address) const {
+  if (ObjectSize) return address == ObjectSize;
+  fetchToPos(address);
+  return address == ObjectSize && address != 0;
+}
+
+uint64_t StreamingMemoryObject::getExtent() const {
+  if (ObjectSize) return ObjectSize;
+  size_t pos = BytesRead + kChunkSize;
+  // keep fetching until we run out of bytes
+  while (fetchToPos(pos)) pos += kChunkSize;
+  return ObjectSize;
+}
+
+int StreamingMemoryObject::readByte(uint64_t address, uint8_t* ptr) const {
+  if (!fetchToPos(address)) return -1;
+  *ptr = Bytes[address + BytesSkipped];
+  return 0;
+}
+
+int StreamingMemoryObject::readBytes(uint64_t address,
+                                     uint64_t size,
+                                     uint8_t* buf,
+                                     uint64_t* copied) const {
+  if (!fetchToPos(address + size - 1)) return -1;
+  memcpy(buf, &Bytes[address + BytesSkipped], size);
+  if (copied) *copied = size;
+  return 0;
+}
+
+bool StreamingMemoryObject::dropLeadingBytes(size_t s) {
+  if (BytesRead < s) return true;
+  BytesSkipped = s;
+  BytesRead -= s;
+  return false;
+}
+
+void StreamingMemoryObject::setKnownObjectSize(size_t size) {
+  ObjectSize = size;
+  Bytes.reserve(size);
+}
+
+StreamableMemoryObject *getNonStreamedMemoryObject(
+    const unsigned char *Start, const unsigned char *End) {
+  return new RawMemoryObject(Start, End);
+}
+
+StreamableMemoryObject::~StreamableMemoryObject() { }
+
+StreamingMemoryObject::StreamingMemoryObject(DataStreamer *streamer) :
+  Bytes(kChunkSize), Streamer(streamer), BytesRead(0), BytesSkipped(0),
+  ObjectSize(0), EOFReached(false) {
+  BytesRead = streamer->GetBytes(&Bytes[0], kChunkSize);
+}
+}
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index 49c5ac4252c8..d77ad7f55a18 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -57,24 +57,3 @@ void llvm::SplitString(StringRef Source,
     S = getToken(S.second, Delimiters);
   }
 }
-
-void llvm::StringRef::split(SmallVectorImpl<StringRef> &A,
-                            StringRef Separators, int MaxSplit,
-                            bool KeepEmpty) const {
-  StringRef rest = *this;
-
-  // rest.data() is used to distinguish cases like "a," that splits into
-  // "a" + "" and "a" that splits into "a" + 0.
-  for (int splits = 0;
-       rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
-       ++splits) {
-    std::pair<llvm::StringRef, llvm::StringRef> p = rest.split(Separators);
-
-    if (p.first.size() != 0 || KeepEmpty)
-      A.push_back(p.first);
-    rest = p.second;
-  }
-  // If we have a tail left, add it.
-  if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
-    A.push_back(rest);
-}
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index a1ac512fa244..c131fe07f48d 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -39,11 +39,13 @@ void StringMapImpl::init(unsigned InitSize) {
   NumItems = 0;
   NumTombstones = 0;
   
-  TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
-  
+  TheTable = (StringMapEntryBase **)calloc(NumBuckets+1,
+                                           sizeof(StringMapEntryBase **) +
+                                           sizeof(unsigned));
+
   // Allocate one extra bucket, set it to look filled so the iterators stop at
   // end.
-  TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
+  TheTable[NumBuckets] = (StringMapEntryBase*)2;
 }
 
 
@@ -60,29 +62,29 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
   }
   unsigned FullHashValue = HashString(Name);
   unsigned BucketNo = FullHashValue & (HTSize-1);
-  
+  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+
   unsigned ProbeAmt = 1;
   int FirstTombstone = -1;
   while (1) {
-    ItemBucket &Bucket = TheTable[BucketNo];
-    StringMapEntryBase *BucketItem = Bucket.Item;
+    StringMapEntryBase *BucketItem = TheTable[BucketNo];
     // If we found an empty bucket, this key isn't in the table yet, return it.
     if (BucketItem == 0) {
       // If we found a tombstone, we want to reuse the tombstone instead of an
       // empty bucket.  This reduces probing.
       if (FirstTombstone != -1) {
-        TheTable[FirstTombstone].FullHashValue = FullHashValue;
+        HashTable[FirstTombstone] = FullHashValue;
         return FirstTombstone;
       }
       
-      Bucket.FullHashValue = FullHashValue;
+      HashTable[BucketNo] = FullHashValue;
       return BucketNo;
     }
     
     if (BucketItem == getTombstoneVal()) {
       // Skip over tombstones.  However, remember the first one we see.
       if (FirstTombstone == -1) FirstTombstone = BucketNo;
-    } else if (Bucket.FullHashValue == FullHashValue) {
+    } else if (HashTable[BucketNo] == FullHashValue) {
       // If the full hash value matches, check deeply for a match.  The common
       // case here is that we are only looking at the buckets (for item info
       // being non-null and for the full hash value) not at the items.  This
@@ -115,18 +117,18 @@ int StringMapImpl::FindKey(StringRef Key) const {
   if (HTSize == 0) return -1;  // Really empty table?
   unsigned FullHashValue = HashString(Key);
   unsigned BucketNo = FullHashValue & (HTSize-1);
-  
+  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
+
   unsigned ProbeAmt = 1;
   while (1) {
-    ItemBucket &Bucket = TheTable[BucketNo];
-    StringMapEntryBase *BucketItem = Bucket.Item;
+    StringMapEntryBase *BucketItem = TheTable[BucketNo];
     // If we found an empty bucket, this key isn't in the table yet, return.
     if (BucketItem == 0)
       return -1;
     
     if (BucketItem == getTombstoneVal()) {
       // Ignore tombstones.
-    } else if (Bucket.FullHashValue == FullHashValue) {
+    } else if (HashTable[BucketNo] == FullHashValue) {
       // If the full hash value matches, check deeply for a match.  The common
       // case here is that we are only looking at the buckets (for item info
       // being non-null and for the full hash value) not at the items.  This
@@ -165,8 +167,8 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
   int Bucket = FindKey(Key);
   if (Bucket == -1) return 0;
   
-  StringMapEntryBase *Result = TheTable[Bucket].Item;
-  TheTable[Bucket].Item = getTombstoneVal();
+  StringMapEntryBase *Result = TheTable[Bucket];
+  TheTable[Bucket] = getTombstoneVal();
   --NumItems;
   ++NumTombstones;
   assert(NumItems + NumTombstones <= NumBuckets);
@@ -180,6 +182,7 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
 /// the appropriate mod-of-hashtable-size.
 void StringMapImpl::RehashTable() {
   unsigned NewSize;
+  unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 
   // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
   // the buckets are empty (meaning that many are filled with tombstones),
@@ -194,19 +197,23 @@ void StringMapImpl::RehashTable() {
 
   // Allocate one extra bucket which will always be non-empty.  This allows the
   // iterators to stop at end.
-  ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
-  NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
-  
+  StringMapEntryBase **NewTableArray =
+    (StringMapEntryBase **)calloc(NewSize+1, sizeof(StringMapEntryBase *) +
+                                             sizeof(unsigned));
+  unsigned *NewHashArray = (unsigned *)(NewTableArray + NewSize + 1);
+  NewTableArray[NewSize] = (StringMapEntryBase*)2;
+
   // Rehash all the items into their new buckets.  Luckily :) we already have
   // the hash values available, so we don't have to rehash any strings.
-  for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
-    if (IB->Item && IB->Item != getTombstoneVal()) {
+  for (unsigned I = 0, E = NumBuckets; I != E; ++I) {
+    StringMapEntryBase *Bucket = TheTable[I];
+    if (Bucket && Bucket != getTombstoneVal()) {
       // Fast case, bucket available.
-      unsigned FullHash = IB->FullHashValue;
+      unsigned FullHash = HashTable[I];
       unsigned NewBucket = FullHash & (NewSize-1);
-      if (NewTableArray[NewBucket].Item == 0) {
-        NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
-        NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
+      if (NewTableArray[NewBucket] == 0) {
+        NewTableArray[FullHash & (NewSize-1)] = Bucket;
+        NewHashArray[FullHash & (NewSize-1)] = FullHash;
         continue;
       }
       
@@ -214,11 +221,11 @@ void StringMapImpl::RehashTable() {
       unsigned ProbeSize = 1;
       do {
         NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
-      } while (NewTableArray[NewBucket].Item);
+      } while (NewTableArray[NewBucket]);
       
       // Finally found a slot.  Fill it in.
-      NewTableArray[NewBucket].Item = IB->Item;
-      NewTableArray[NewBucket].FullHashValue = FullHash;
+      NewTableArray[NewBucket] = Bucket;
+      NewHashArray[NewBucket] = FullHash;
     }
   }
   
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index b5b4f9476026..abe570f6df4b 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -10,6 +10,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/edit_distance.h"
 #include <bitset>
 
 using namespace llvm;
@@ -25,6 +27,12 @@ static char ascii_tolower(char x) {
   return x;
 }
 
+static char ascii_toupper(char x) {
+  if (x >= 'a' && x <= 'z')
+    return x - 'a' + 'A';
+  return x;
+}
+
 static bool ascii_isdigit(char x) {
   return x >= '0' && x <= '9';
 }
@@ -78,56 +86,29 @@ int StringRef::compare_numeric(StringRef RHS) const {
 unsigned StringRef::edit_distance(llvm::StringRef Other,
                                   bool AllowReplacements,
                                   unsigned MaxEditDistance) {
-  // The algorithm implemented below is the "classic"
-  // dynamic-programming algorithm for computing the Levenshtein
-  // distance, which is described here:
-  //
-  //   http://en.wikipedia.org/wiki/Levenshtein_distance
-  //
-  // Although the algorithm is typically described using an m x n
-  // array, only two rows are used at a time, so this implemenation
-  // just keeps two separate vectors for those two rows.
-  size_type m = size();
-  size_type n = Other.size();
-
-  const unsigned SmallBufferSize = 64;
-  unsigned SmallBuffer[SmallBufferSize];
-  llvm::OwningArrayPtr<unsigned> Allocated;
-  unsigned *previous = SmallBuffer;
-  if (2*(n + 1) > SmallBufferSize) {
-    previous = new unsigned [2*(n+1)];
-    Allocated.reset(previous);
-  }
-  unsigned *current = previous + (n + 1);
-
-  for (unsigned i = 0; i <= n; ++i)
-    previous[i] = i;
-
-  for (size_type y = 1; y <= m; ++y) {
-    current[0] = y;
-    unsigned BestThisRow = current[0];
-
-    for (size_type x = 1; x <= n; ++x) {
-      if (AllowReplacements) {
-        current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
-                         min(current[x-1], previous[x])+1);
-      }
-      else {
-        if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
-        else current[x] = min(current[x-1], previous[x]) + 1;
-      }
-      BestThisRow = min(BestThisRow, current[x]);
-    }
+  return llvm::ComputeEditDistance(
+      llvm::ArrayRef<char>(data(), size()),
+      llvm::ArrayRef<char>(Other.data(), Other.size()),
+      AllowReplacements, MaxEditDistance);
+}
 
-    if (MaxEditDistance && BestThisRow > MaxEditDistance)
-      return MaxEditDistance + 1;
+//===----------------------------------------------------------------------===//
+// String Operations
+//===----------------------------------------------------------------------===//
 
-    unsigned *tmp = current;
-    current = previous;
-    previous = tmp;
+std::string StringRef::lower() const {
+  std::string Result(size(), char());
+  for (size_type i = 0, e = size(); i != e; ++i) {
+    Result[i] = ascii_tolower(Data[i]);
   }
+  return Result;
+}
 
-  unsigned Result = previous[n];
+std::string StringRef::upper() const {
+  std::string Result(size(), char());
+  for (size_type i = 0, e = size(); i != e; ++i) {
+    Result[i] = ascii_toupper(Data[i]);
+  }
   return Result;
 }
 
@@ -144,9 +125,35 @@ size_t StringRef::find(StringRef Str, size_t From) const {
   size_t N = Str.size();
   if (N > Length)
     return npos;
-  for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
-    if (substr(i, N).equals(Str))
-      return i;
+
+  // For short haystacks or unsupported needles fall back to the naive algorithm
+  if (Length < 16 || N > 255 || N == 0) {
+    for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
+      if (substr(i, N).equals(Str))
+        return i;
+    return npos;
+  }
+
+  if (From >= Length)
+    return npos;
+
+  // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
+  uint8_t BadCharSkip[256];
+  std::memset(BadCharSkip, N, 256);
+  for (unsigned i = 0; i != N-1; ++i)
+    BadCharSkip[(uint8_t)Str[i]] = N-1-i;
+
+  unsigned Len = Length-From, Pos = From;
+  while (Len >= N) {
+    if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
+      return Pos;
+
+    // Otherwise skip the appropriate number of bytes.
+    uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
+    Len -= Skip;
+    Pos += Skip;
+  }
+
   return npos;
 }
 
@@ -223,6 +230,27 @@ StringRef::size_type StringRef::find_last_of(StringRef Chars,
   return npos;
 }
 
+void StringRef::split(SmallVectorImpl<StringRef> &A,
+                      StringRef Separators, int MaxSplit,
+                      bool KeepEmpty) const {
+  StringRef rest = *this;
+
+  // rest.data() is used to distinguish cases like "a," that splits into
+  // "a" + "" and "a" that splits into "a" + 0.
+  for (int splits = 0;
+       rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
+       ++splits) {
+    std::pair<StringRef, StringRef> p = rest.split(Separators);
+
+    if (KeepEmpty || p.first.size() != 0)
+      A.push_back(p.first);
+    rest = p.second;
+  }
+  // If we have a tail left, add it.
+  if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
+    A.push_back(rest);
+}
+
 //===----------------------------------------------------------------------===//
 // Helpful Algorithms
 //===----------------------------------------------------------------------===//
@@ -257,8 +285,8 @@ static unsigned GetAutoSenseRadix(StringRef &Str) {
 
 /// GetAsUnsignedInteger - Workhorse method that converts a integer character
 /// sequence of radix up to 36 to an unsigned long long value.
-static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
-                                 unsigned long long &Result) {
+bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
+                                unsigned long long &Result) {
   // Autosense radix if not specified.
   if (Radix == 0)
     Radix = GetAutoSenseRadix(Str);
@@ -298,17 +326,13 @@ static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
   return false;
 }
 
-bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
-  return GetAsUnsignedInteger(*this, Radix, Result);
-}
-
-
-bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
+bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
+                              long long &Result) {
   unsigned long long ULLVal;
 
   // Handle positive strings first.
-  if (empty() || front() != '-') {
-    if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
+  if (Str.empty() || Str.front() != '-') {
+    if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
         // Check for value so large it overflows a signed value.
         (long long)ULLVal < 0)
       return true;
@@ -317,7 +341,7 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
   }
 
   // Get the positive part of the value.
-  if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
+  if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
       // Reject values so large they'd overflow as negative signed, but allow
       // "-0".  This negates the unsigned so that the negative isn't undefined
       // on signed overflow.
@@ -328,24 +352,6 @@ bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
   return false;
 }
 
-bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
-  long long Val;
-  if (getAsInteger(Radix, Val) ||
-      (int)Val != Val)
-    return true;
-  Result = Val;
-  return false;
-}
-
-bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
-  unsigned long long Val;
-  if (getAsInteger(Radix, Val) ||
-      (unsigned)Val != Val)
-    return true;
-  Result = Val;
-  return false;
-}
-
 bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
   StringRef Str = *this;
 
@@ -420,3 +426,9 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
 
   return false;
 }
+
+
+// Implementation of StringRef hashing.
+hash_code llvm::hash_value(StringRef S) {
+  return hash_combine_range(S.begin(), S.end());
+}
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index 7497bfe035c6..53c8d84e7d45 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -84,7 +84,7 @@ void TargetRegistry::RegisterTarget(Target &T,
 }
 
 const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
-  const Target *TheTarget = lookupTarget(sys::getHostTriple(), Error);
+  const Target *TheTarget = lookupTarget(sys::getDefaultTargetTriple(), Error);
 
   if (TheTarget && !TheTarget->hasJIT()) {
     Error = "No JIT compatible target available for this host";
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
index fdb251c0a36b..08b12b658bea 100644
--- a/lib/Support/ThreadLocal.cpp
+++ b/lib/Support/ThreadLocal.cpp
@@ -19,7 +19,7 @@
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
-#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0
 // Define all methods as no-ops if threading is explicitly disabled
 namespace llvm {
 using namespace sys;
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 8f0bb93eb4d1..7483225fdfb0 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -24,7 +24,7 @@ static bool multithreaded_mode = false;
 static sys::Mutex* global_lock = 0;
 
 bool llvm::llvm_start_multithreaded() {
-#if ENABLE_THREADS != 0
+#if LLVM_ENABLE_THREADS != 0
   assert(!multithreaded_mode && "Already multithreaded!");
   multithreaded_mode = true;
   global_lock = new sys::Mutex(true);
@@ -39,7 +39,7 @@ bool llvm::llvm_start_multithreaded() {
 }
 
 void llvm::llvm_stop_multithreaded() {
-#if ENABLE_THREADS != 0
+#if LLVM_ENABLE_THREADS != 0
   assert(multithreaded_mode && "Not currently multithreaded!");
 
   // We fence here to insure that all threaded operations are complete BEFORE we
@@ -63,7 +63,7 @@ void llvm::llvm_release_global_lock() {
   if (multithreaded_mode) global_lock->release();
 }
 
-#if ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
+#if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H)
 #include <pthread.h>
 
 struct ThreadInfo {
@@ -102,7 +102,7 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
  error:
   ::pthread_attr_destroy(&Attr);
 }
-#elif ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
+#elif LLVM_ENABLE_THREADS!=0 && defined(LLVM_ON_WIN32)
 #include "Windows/Windows.h"
 #include <process.h>
 
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index a9ed5eecfa7e..598e8ad6a1a5 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -168,10 +168,8 @@ void Timer::stopTimer() {
 static void printVal(double Val, double Total, raw_ostream &OS) {
   if (Total < 1e-7)   // Avoid dividing by zero.
     OS << "        -----     ";
-  else {
-    OS << "  " << format("%7.4f", Val) << " (";
-    OS << format("%5.1f", Val*100/Total) << "%)";
-  }
+  else
+    OS << format("  %7.4f (%5.1f%%)", Val, Val*100/Total);
 }
 
 void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
@@ -186,7 +184,7 @@ void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
   OS << "  ";
   
   if (Total.getMemUsed())
-    OS << format("%9lld", (long long)getMemUsed()) << "  ";
+    OS << format("%9" PRId64 "  ", (int64_t)getMemUsed());
 }
 
 
@@ -332,11 +330,9 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
   // If this is not an collection of ungrouped times, print the total time.
   // Ungrouped timers don't really make sense to add up.  We still print the
   // TOTAL line to make the percentages make sense.
-  if (this != DefaultTimerGroup) {
-    OS << "  Total Execution Time: ";
-    OS << format("%5.4f", Total.getProcessTime()) << " seconds (";
-    OS << format("%5.4f", Total.getWallTime()) << " wall clock)\n";
-  }
+  if (this != DefaultTimerGroup)
+    OS << format("  Total Execution Time: %5.4f seconds (%5.4f wall clock)\n",
+                 Total.getProcessTime(), Total.getWallTime());
   OS << '\n';
   
   if (Total.getUserTime())
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index c61af372d79c..44a1b38d98d1 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -9,19 +9,19 @@
 
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 using namespace llvm;
 
 const char *Triple::getArchTypeName(ArchType Kind) {
   switch (Kind) {
-  case InvalidArch: return "<invalid>";
   case UnknownArch: return "unknown";
 
-  case alpha:   return "alpha";
   case arm:     return "arm";
-  case bfin:    return "bfin";
   case cellspu: return "cellspu";
+  case hexagon: return "hexagon";
   case mips:    return "mips";
   case mipsel:  return "mipsel";
   case mips64:  return "mips64";
@@ -29,9 +29,9 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case msp430:  return "msp430";
   case ppc64:   return "powerpc64";
   case ppc:     return "powerpc";
+  case r600:    return "r600";
   case sparc:   return "sparc";
   case sparcv9: return "sparcv9";
-  case systemz: return "s390x";
   case tce:     return "tce";
   case thumb:   return "thumb";
   case x86:     return "i386";
@@ -44,7 +44,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case amdil:   return "amdil";
   }
 
-  return "<invalid>";
+  llvm_unreachable("Invalid ArchType!");
 }
 
 const char *Triple::getArchTypePrefix(ArchType Kind) {
@@ -52,13 +52,9 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   default:
     return 0;
 
-  case alpha:   return "alpha";
-
   case arm:
   case thumb:   return "arm";
 
-  case bfin:    return "bfin";
-
   case cellspu: return "spu";
 
   case ppc64:
@@ -66,6 +62,10 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
 
   case mblaze:  return "mblaze";
 
+  case hexagon:   return "hexagon";
+
+  case r600:    return "r600";
+
   case sparcv9:
   case sparc:   return "sparc";
 
@@ -88,9 +88,11 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
   case Apple: return "apple";
   case PC: return "pc";
   case SCEI: return "scei";
+  case BGP: return "bgp";
+  case BGQ: return "bgq";
   }
 
-  return "<invalid>";
+  llvm_unreachable("Invalid VendorType!");
 }
 
 const char *Triple::getOSTypeName(OSType Kind) {
@@ -110,83 +112,59 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case MinGW32: return "mingw32";
   case NetBSD: return "netbsd";
   case OpenBSD: return "openbsd";
-  case Psp: return "psp";
   case Solaris: return "solaris";
   case Win32: return "win32";
   case Haiku: return "haiku";
   case Minix: return "minix";
   case RTEMS: return "rtems";
   case NativeClient: return "nacl";
+  case CNK: return "cnk";
   }
 
-  return "<invalid>";
+  llvm_unreachable("Invalid OSType");
 }
 
 const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
   switch (Kind) {
   case UnknownEnvironment: return "unknown";
   case GNU: return "gnu";
+  case GNUEABIHF: return "gnueabihf";
   case GNUEABI: return "gnueabi";
   case EABI: return "eabi";
   case MachO: return "macho";
+  case ANDROIDEABI: return "androideabi";
   }
 
-  return "<invalid>";
+  llvm_unreachable("Invalid EnvironmentType!");
 }
 
 Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
-  if (Name == "alpha")
-    return alpha;
-  if (Name == "arm")
-    return arm;
-  if (Name == "bfin")
-    return bfin;
-  if (Name == "cellspu")
-    return cellspu;
-  if (Name == "mips")
-    return mips;
-  if (Name == "mipsel")
-    return mipsel;
-  if (Name == "mips64")
-    return mips64;
-  if (Name == "mips64el")
-    return mips64el;
-  if (Name == "msp430")
-    return msp430;
-  if (Name == "ppc64")
-    return ppc64;
-  if (Name == "ppc32")
-    return ppc;
-  if (Name == "ppc")
-    return ppc;
-  if (Name == "mblaze")
-    return mblaze;
-  if (Name == "sparc")
-    return sparc;
-  if (Name == "sparcv9")
-    return sparcv9;
-  if (Name == "systemz")
-    return systemz;
-  if (Name == "tce")
-    return tce;
-  if (Name == "thumb")
-    return thumb;
-  if (Name == "x86")
-    return x86;
-  if (Name == "x86-64")
-    return x86_64;
-  if (Name == "xcore")
-    return xcore;
-  if (Name == "ptx32")
-    return ptx32;
-  if (Name == "ptx64")
-    return ptx64;
-  if (Name == "le32")
-    return le32;
-  if (Name == "amdil")
-      return amdil;
-
-  return UnknownArch;
+  return StringSwitch<Triple::ArchType>(Name)
+    .Case("arm", arm)
+    .Case("cellspu", cellspu)
+    .Case("mips", mips)
+    .Case("mipsel", mipsel)
+    .Case("mips64", mips64)
+    .Case("mips64el", mips64el)
+    .Case("msp430", msp430)
+    .Case("ppc64", ppc64)
+    .Case("ppc32", ppc)
+    .Case("ppc", ppc)
+    .Case("mblaze", mblaze)
+    .Case("r600", r600)
+    .Case("hexagon", hexagon)
+    .Case("sparc", sparc)
+    .Case("sparcv9", sparcv9)
+    .Case("tce", tce)
+    .Case("thumb", thumb)
+    .Case("x86", x86)
+    .Case("x86-64", x86_64)
+    .Case("xcore", xcore)
+    .Case("ptx32", ptx32)
+    .Case("ptx64", ptx64)
+    .Case("le32", le32)
+    .Case("amdil", amdil)
+    .Default(UnknownArch);
 }
 
 Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
@@ -202,36 +180,22 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
   // This code must be kept in sync with Clang's Darwin specific argument
   // translation.
 
-  if (Str == "ppc" || Str == "ppc601" || Str == "ppc603" || Str == "ppc604" ||
-      Str == "ppc604e" || Str == "ppc750" || Str == "ppc7400" ||
-      Str == "ppc7450" || Str == "ppc970")
-    return Triple::ppc;
-
-  if (Str == "ppc64")
-    return Triple::ppc64;
-
-  if (Str == "i386" || Str == "i486" || Str == "i486SX" || Str == "pentium" ||
-      Str == "i586" || Str == "pentpro" || Str == "i686" || Str == "pentIIm3" ||
-      Str == "pentIIm5" || Str == "pentium4")
-    return Triple::x86;
-
-  if (Str == "x86_64")
-    return Triple::x86_64;
-
-  // This is derived from the driver driver.
-  if (Str == "arm" || Str == "armv4t" || Str == "armv5" || Str == "xscale" ||
-      Str == "armv6" || Str == "armv7" || Str == "armv7f" || Str == "armv7k" ||
-      Str == "armv7s")
-    return Triple::arm;
-
-  if (Str == "ptx32")
-    return Triple::ptx32;
-  if (Str == "ptx64")
-    return Triple::ptx64;
-  if (Str == "amdil")
-      return Triple::amdil;
-
-  return Triple::UnknownArch;
+  return StringSwitch<ArchType>(Str)
+    .Cases("ppc", "ppc601", "ppc603", "ppc604", "ppc604e", Triple::ppc)
+    .Cases("ppc750", "ppc7400", "ppc7450", "ppc970", Triple::ppc)
+    .Case("ppc64", Triple::ppc64)
+    .Cases("i386", "i486", "i486SX", "i586", "i686", Triple::x86)
+    .Cases("pentium", "pentpro", "pentIIm3", "pentIIm5", "pentium4",
+           Triple::x86)
+    .Case("x86_64", Triple::x86_64)
+    // This is derived from the driver driver.
+    .Cases("arm", "armv4t", "armv5", "armv6", Triple::arm)
+    .Cases("armv7", "armv7f", "armv7k", "armv7s", "xscale", Triple::arm)
+    .Case("r600", Triple::r600)
+    .Case("ptx32", Triple::ptx32)
+    .Case("ptx64", Triple::ptx64)
+    .Case("amdil", Triple::amdil)
+    .Default(Triple::UnknownArch);
 }
 
 // Returns architecture name that is understood by the target assembler.
@@ -239,188 +203,150 @@ const char *Triple::getArchNameForAssembler() {
   if (!isOSDarwin() && getVendor() != Triple::Apple)
     return NULL;
 
-  StringRef Str = getArchName();
-  if (Str == "i386")
-    return "i386";
-  if (Str == "x86_64")
-    return "x86_64";
-  if (Str == "powerpc")
-    return "ppc";
-  if (Str == "powerpc64")
-    return "ppc64";
-  if (Str == "mblaze" || Str == "microblaze")
-    return "mblaze";
-  if (Str == "arm")
-    return "arm";
-  if (Str == "armv4t" || Str == "thumbv4t")
-    return "armv4t";
-  if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5"
-      || Str == "thumbv5e")
-    return "armv5";
-  if (Str == "armv6" || Str == "thumbv6")
-    return "armv6";
-  if (Str == "armv7" || Str == "thumbv7")
-    return "armv7";
-  if (Str == "ptx32")
-    return "ptx32";
-  if (Str == "ptx64")
-    return "ptx64";
-  if (Str == "le32")
-    return "le32";
-  if (Str == "amdil")
-      return "amdil";
-  return NULL;
+  return StringSwitch<const char*>(getArchName())
+    .Case("i386", "i386")
+    .Case("x86_64", "x86_64")
+    .Case("powerpc", "ppc")
+    .Case("powerpc64", "ppc64")
+    .Cases("mblaze", "microblaze", "mblaze")
+    .Case("arm", "arm")
+    .Cases("armv4t", "thumbv4t", "armv4t")
+    .Cases("armv5", "armv5e", "thumbv5", "thumbv5e", "armv5")
+    .Cases("armv6", "thumbv6", "armv6")
+    .Cases("armv7", "thumbv7", "armv7")
+    .Case("r600", "r600")
+    .Case("ptx32", "ptx32")
+    .Case("ptx64", "ptx64")
+    .Case("le32", "le32")
+    .Case("amdil", "amdil")
+    .Default(NULL);
 }
 
-//
+static Triple::ArchType parseArch(StringRef ArchName) {
+  return StringSwitch<Triple::ArchType>(ArchName)
+    .Cases("i386", "i486", "i586", "i686", Triple::x86)
+    // FIXME: Do we need to support these?
+    .Cases("i786", "i886", "i986", Triple::x86)
+    .Cases("amd64", "x86_64", Triple::x86_64)
+    .Case("powerpc", Triple::ppc)
+    .Cases("powerpc64", "ppu", Triple::ppc64)
+    .Case("mblaze", Triple::mblaze)
+    .Cases("arm", "xscale", Triple::arm)
+    // FIXME: It would be good to replace these with explicit names for all the
+    // various suffixes supported.
+    .StartsWith("armv", Triple::arm)
+    .Case("thumb", Triple::thumb)
+    .StartsWith("thumbv", Triple::thumb)
+    .Cases("spu", "cellspu", Triple::cellspu)
+    .Case("msp430", Triple::msp430)
+    .Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
+    .Cases("mipsel", "mipsallegrexel", Triple::mipsel)
+    .Cases("mips64", "mips64eb", Triple::mips64)
+    .Case("mips64el", Triple::mips64el)
+    .Case("r600", Triple::r600)
+    .Case("hexagon", Triple::hexagon)
+    .Case("sparc", Triple::sparc)
+    .Case("sparcv9", Triple::sparcv9)
+    .Case("tce", Triple::tce)
+    .Case("xcore", Triple::xcore)
+    .Case("ptx32", Triple::ptx32)
+    .Case("ptx64", Triple::ptx64)
+    .Case("le32", Triple::le32)
+    .Case("amdil", Triple::amdil)
+    .Default(Triple::UnknownArch);
+}
 
-Triple::ArchType Triple::ParseArch(StringRef ArchName) {
-  if (ArchName.size() == 4 && ArchName[0] == 'i' &&
-      ArchName[2] == '8' && ArchName[3] == '6' &&
-      ArchName[1] - '3' < 6) // i[3-9]86
-    return x86;
-  else if (ArchName == "amd64" || ArchName == "x86_64")
-    return x86_64;
-  else if (ArchName == "bfin")
-    return bfin;
-  else if (ArchName == "powerpc")
-    return ppc;
-  else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
-    return ppc64;
-  else if (ArchName == "mblaze")
-    return mblaze;
-  else if (ArchName == "arm" ||
-           ArchName.startswith("armv") ||
-           ArchName == "xscale")
-    return arm;
-  else if (ArchName == "thumb" ||
-           ArchName.startswith("thumbv"))
-    return thumb;
-  else if (ArchName.startswith("alpha"))
-    return alpha;
-  else if (ArchName == "spu" || ArchName == "cellspu")
-    return cellspu;
-  else if (ArchName == "msp430")
-    return msp430;
-  else if (ArchName == "mips" || ArchName == "mipseb" ||
-           ArchName == "mipsallegrex")
-    return mips;
-  else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
-           ArchName == "psp")
-    return mipsel;
-  else if (ArchName == "mips64" || ArchName == "mips64eb")
-    return mips64;
-  else if (ArchName == "mips64el")
-    return mips64el;
-  else if (ArchName == "sparc")
-    return sparc;
-  else if (ArchName == "sparcv9")
-    return sparcv9;
-  else if (ArchName == "s390x")
-    return systemz;
-  else if (ArchName == "tce")
-    return tce;
-  else if (ArchName == "xcore")
-    return xcore;
-  else if (ArchName == "ptx32")
-    return ptx32;
-  else if (ArchName == "ptx64")
-    return ptx64;
-  else if (ArchName == "le32")
-    return le32;
-  else if (ArchName == "amdil")
-      return amdil;
-  else
-    return UnknownArch;
+static Triple::VendorType parseVendor(StringRef VendorName) {
+  return StringSwitch<Triple::VendorType>(VendorName)
+    .Case("apple", Triple::Apple)
+    .Case("pc", Triple::PC)
+    .Case("scei", Triple::SCEI)
+    .Case("bgp", Triple::BGP)
+    .Case("bgq", Triple::BGQ)
+    .Default(Triple::UnknownVendor);
 }
 
-Triple::VendorType Triple::ParseVendor(StringRef VendorName) {
-  if (VendorName == "apple")
-    return Apple;
-  else if (VendorName == "pc")
-    return PC;
-  else if (VendorName == "scei")
-    return SCEI;
-  else
-    return UnknownVendor;
-}
-
-Triple::OSType Triple::ParseOS(StringRef OSName) {
-  if (OSName.startswith("auroraux"))
-    return AuroraUX;
-  else if (OSName.startswith("cygwin"))
-    return Cygwin;
-  else if (OSName.startswith("darwin"))
-    return Darwin;
-  else if (OSName.startswith("dragonfly"))
-    return DragonFly;
-  else if (OSName.startswith("freebsd"))
-    return FreeBSD;
-  else if (OSName.startswith("ios"))
-    return IOS;
-  else if (OSName.startswith("kfreebsd"))
-    return KFreeBSD;
-  else if (OSName.startswith("linux"))
-    return Linux;
-  else if (OSName.startswith("lv2"))
-    return Lv2;
-  else if (OSName.startswith("macosx"))
-    return MacOSX;
-  else if (OSName.startswith("mingw32"))
-    return MinGW32;
-  else if (OSName.startswith("netbsd"))
-    return NetBSD;
-  else if (OSName.startswith("openbsd"))
-    return OpenBSD;
-  else if (OSName.startswith("psp"))
-    return Psp;
-  else if (OSName.startswith("solaris"))
-    return Solaris;
-  else if (OSName.startswith("win32"))
-    return Win32;
-  else if (OSName.startswith("haiku"))
-    return Haiku;
-  else if (OSName.startswith("minix"))
-    return Minix;
-  else if (OSName.startswith("rtems"))
-    return RTEMS;
-  else if (OSName.startswith("nacl"))
-    return NativeClient;
-  else
-    return UnknownOS;
-}
-
-Triple::EnvironmentType Triple::ParseEnvironment(StringRef EnvironmentName) {
-  if (EnvironmentName.startswith("eabi"))
-    return EABI;
-  else if (EnvironmentName.startswith("gnueabi"))
-    return GNUEABI;
-  else if (EnvironmentName.startswith("gnu"))
-    return GNU;
-  else if (EnvironmentName.startswith("macho"))
-    return MachO;
-  else
-    return UnknownEnvironment;
+static Triple::OSType parseOS(StringRef OSName) {
+  return StringSwitch<Triple::OSType>(OSName)
+    .StartsWith("auroraux", Triple::AuroraUX)
+    .StartsWith("cygwin", Triple::Cygwin)
+    .StartsWith("darwin", Triple::Darwin)
+    .StartsWith("dragonfly", Triple::DragonFly)
+    .StartsWith("freebsd", Triple::FreeBSD)
+    .StartsWith("ios", Triple::IOS)
+    .StartsWith("kfreebsd", Triple::KFreeBSD)
+    .StartsWith("linux", Triple::Linux)
+    .StartsWith("lv2", Triple::Lv2)
+    .StartsWith("macosx", Triple::MacOSX)
+    .StartsWith("mingw32", Triple::MinGW32)
+    .StartsWith("netbsd", Triple::NetBSD)
+    .StartsWith("openbsd", Triple::OpenBSD)
+    .StartsWith("solaris", Triple::Solaris)
+    .StartsWith("win32", Triple::Win32)
+    .StartsWith("haiku", Triple::Haiku)
+    .StartsWith("minix", Triple::Minix)
+    .StartsWith("rtems", Triple::RTEMS)
+    .StartsWith("nacl", Triple::NativeClient)
+    .StartsWith("cnk", Triple::CNK)
+    .Default(Triple::UnknownOS);
 }
 
-void Triple::Parse() const {
-  assert(!isInitialized() && "Invalid parse call.");
+static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
+  return StringSwitch<Triple::EnvironmentType>(EnvironmentName)
+    .StartsWith("eabi", Triple::EABI)
+    .StartsWith("gnueabihf", Triple::GNUEABIHF)
+    .StartsWith("gnueabi", Triple::GNUEABI)
+    .StartsWith("gnu", Triple::GNU)
+    .StartsWith("macho", Triple::MachO)
+    .StartsWith("androideabi", Triple::ANDROIDEABI)
+    .Default(Triple::UnknownEnvironment);
+}
 
-  Arch = ParseArch(getArchName());
-  Vendor = ParseVendor(getVendorName());
-  OS = ParseOS(getOSName());
-  Environment = ParseEnvironment(getEnvironmentName());
+/// \brief Construct a triple from the string representation provided.
+///
+/// This stores the string representation and parses the various pieces into
+/// enum members.
+Triple::Triple(const Twine &Str)
+    : Data(Str.str()),
+      Arch(parseArch(getArchName())),
+      Vendor(parseVendor(getVendorName())),
+      OS(parseOS(getOSName())),
+      Environment(parseEnvironment(getEnvironmentName())) {
+}
 
-  assert(isInitialized() && "Failed to initialize!");
+/// \brief Construct a triple from string representations of the architecture,
+/// vendor, and OS.
+///
+/// This joins each argument into a canonical string representation and parses
+/// them into enum members. It leaves the environment unknown and omits it from
+/// the string representation.
+Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr)
+    : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr).str()),
+      Arch(parseArch(ArchStr.str())),
+      Vendor(parseVendor(VendorStr.str())),
+      OS(parseOS(OSStr.str())),
+      Environment() {
+}
+
+/// \brief Construct a triple from string representations of the architecture,
+/// vendor, OS, and environment.
+///
+/// This joins each argument into a canonical string representation and parses
+/// them into enum members.
+Triple::Triple(const Twine &ArchStr, const Twine &VendorStr, const Twine &OSStr,
+               const Twine &EnvironmentStr)
+    : Data((ArchStr + Twine('-') + VendorStr + Twine('-') + OSStr + Twine('-') +
+            EnvironmentStr).str()),
+      Arch(parseArch(ArchStr.str())),
+      Vendor(parseVendor(VendorStr.str())),
+      OS(parseOS(OSStr.str())),
+      Environment(parseEnvironment(EnvironmentStr.str())) {
 }
 
 std::string Triple::normalize(StringRef Str) {
   // Parse into components.
   SmallVector<StringRef, 4> Components;
-  for (size_t First = 0, Last = 0; Last != StringRef::npos; First = Last + 1) {
-    Last = Str.find('-', First);
-    Components.push_back(Str.slice(First, Last));
-  }
+  Str.split(Components, "-");
 
   // If the first component corresponds to a known architecture, preferentially
   // use it for the architecture.  If the second component corresponds to a
@@ -429,16 +355,16 @@ std::string Triple::normalize(StringRef Str) {
   // valid os.
   ArchType Arch = UnknownArch;
   if (Components.size() > 0)
-    Arch = ParseArch(Components[0]);
+    Arch = parseArch(Components[0]);
   VendorType Vendor = UnknownVendor;
   if (Components.size() > 1)
-    Vendor = ParseVendor(Components[1]);
+    Vendor = parseVendor(Components[1]);
   OSType OS = UnknownOS;
   if (Components.size() > 2)
-    OS = ParseOS(Components[2]);
+    OS = parseOS(Components[2]);
   EnvironmentType Environment = UnknownEnvironment;
   if (Components.size() > 3)
-    Environment = ParseEnvironment(Components[3]);
+    Environment = parseEnvironment(Components[3]);
 
   // Note which components are already in their final position.  These will not
   // be moved.
@@ -464,22 +390,21 @@ std::string Triple::normalize(StringRef Str) {
       bool Valid = false;
       StringRef Comp = Components[Idx];
       switch (Pos) {
-      default:
-        assert(false && "unexpected component type!");
+      default: llvm_unreachable("unexpected component type!");
       case 0:
-        Arch = ParseArch(Comp);
+        Arch = parseArch(Comp);
         Valid = Arch != UnknownArch;
         break;
       case 1:
-        Vendor = ParseVendor(Comp);
+        Vendor = parseVendor(Comp);
         Valid = Vendor != UnknownVendor;
         break;
       case 2:
-        OS = ParseOS(Comp);
+        OS = parseOS(Comp);
         Valid = OS != UnknownOS;
         break;
       case 3:
-        Environment = ParseEnvironment(Comp);
+        Environment = parseEnvironment(Comp);
         Valid = Environment != UnknownEnvironment;
         break;
       }
@@ -500,7 +425,8 @@ std::string Triple::normalize(StringRef Str) {
         // components to the right.
         for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
           // Skip over any fixed components.
-          while (i < array_lengthof(Found) && Found[i]) ++i;
+          while (i < array_lengthof(Found) && Found[i])
+            ++i;
           // Place the component at the new position, getting the component
           // that was at this position - it will be moved right.
           std::swap(CurrentComponent, Components[i]);
@@ -528,7 +454,8 @@ std::string Triple::normalize(StringRef Str) {
             Components.push_back(CurrentComponent);
 
           // Advance Idx to the component's new position.
-          while (++Idx < array_lengthof(Found) && Found[Idx]) {}
+          while (++Idx < array_lengthof(Found) && Found[Idx])
+            ;
         } while (Idx < Pos); // Add more until the final position is reached.
       }
       assert(Pos < Components.size() && Components[Pos] == Comp &&
@@ -618,9 +545,47 @@ void Triple::getOSVersion(unsigned &Major, unsigned &Minor,
   }
 }
 
+bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor,
+                              unsigned &Micro) const {
+  getOSVersion(Major, Minor, Micro);
+
+  switch (getOS()) {
+  default: llvm_unreachable("unexpected OS for Darwin triple");
+  case Darwin:
+    // Default to darwin8, i.e., MacOSX 10.4.
+    if (Major == 0)
+      Major = 8;
+    // Darwin version numbers are skewed from OS X versions.
+    if (Major < 4)
+      return false;
+    Micro = 0;
+    Minor = Major - 4;
+    Major = 10;
+    break;
+  case MacOSX:
+    // Default to 10.4.
+    if (Major == 0) {
+      Major = 10;
+      Minor = 4;
+    }
+    if (Major != 10)
+      return false;
+    break;
+  case IOS:
+    // Ignore the version from the triple.  This is only handled because the
+    // the clang driver combines OS X and IOS support into a common Darwin
+    // toolchain that wants to know the OS X version number even when targeting
+    // IOS.
+    Major = 10;
+    Minor = 4;
+    Micro = 0;
+    break;
+  }
+  return true;
+}
+
 void Triple::setTriple(const Twine &Str) {
-  Data = Str.str();
-  Arch = InvalidArch;
+  *this = Triple(Str);
 }
 
 void Triple::setArch(ArchType Kind) {
@@ -670,3 +635,126 @@ void Triple::setEnvironmentName(StringRef Str) {
 void Triple::setOSAndEnvironmentName(StringRef Str) {
   setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
 }
+
+static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
+  switch (Arch) {
+  case llvm::Triple::UnknownArch:
+    return 0;
+
+  case llvm::Triple::msp430:
+    return 16;
+
+  case llvm::Triple::amdil:
+  case llvm::Triple::arm:
+  case llvm::Triple::cellspu:
+  case llvm::Triple::hexagon:
+  case llvm::Triple::le32:
+  case llvm::Triple::mblaze:
+  case llvm::Triple::mips:
+  case llvm::Triple::mipsel:
+  case llvm::Triple::ppc:
+  case llvm::Triple::ptx32:
+  case llvm::Triple::r600:
+  case llvm::Triple::sparc:
+  case llvm::Triple::tce:
+  case llvm::Triple::thumb:
+  case llvm::Triple::x86:
+  case llvm::Triple::xcore:
+    return 32;
+
+  case llvm::Triple::mips64:
+  case llvm::Triple::mips64el:
+  case llvm::Triple::ppc64:
+  case llvm::Triple::ptx64:
+  case llvm::Triple::sparcv9:
+  case llvm::Triple::x86_64:
+    return 64;
+  }
+  llvm_unreachable("Invalid architecture value");
+}
+
+bool Triple::isArch64Bit() const {
+  return getArchPointerBitWidth(getArch()) == 64;
+}
+
+bool Triple::isArch32Bit() const {
+  return getArchPointerBitWidth(getArch()) == 32;
+}
+
+bool Triple::isArch16Bit() const {
+  return getArchPointerBitWidth(getArch()) == 16;
+}
+
+Triple Triple::get32BitArchVariant() const {
+  Triple T(*this);
+  switch (getArch()) {
+  case Triple::UnknownArch:
+  case Triple::msp430:
+    T.setArch(UnknownArch);
+    break;
+
+  case Triple::amdil:
+  case Triple::arm:
+  case Triple::cellspu:
+  case Triple::hexagon:
+  case Triple::le32:
+  case Triple::mblaze:
+  case Triple::mips:
+  case Triple::mipsel:
+  case Triple::ppc:
+  case Triple::ptx32:
+  case Triple::r600:
+  case Triple::sparc:
+  case Triple::tce:
+  case Triple::thumb:
+  case Triple::x86:
+  case Triple::xcore:
+    // Already 32-bit.
+    break;
+
+  case Triple::mips64:    T.setArch(Triple::mips);    break;
+  case Triple::mips64el:  T.setArch(Triple::mipsel);  break;
+  case Triple::ppc64:     T.setArch(Triple::ppc);   break;
+  case Triple::ptx64:     T.setArch(Triple::ptx32);   break;
+  case Triple::sparcv9:   T.setArch(Triple::sparc);   break;
+  case Triple::x86_64:    T.setArch(Triple::x86);     break;
+  }
+  return T;
+}
+
+Triple Triple::get64BitArchVariant() const {
+  Triple T(*this);
+  switch (getArch()) {
+  case Triple::UnknownArch:
+  case Triple::amdil:
+  case Triple::arm:
+  case Triple::cellspu:
+  case Triple::hexagon:
+  case Triple::le32:
+  case Triple::mblaze:
+  case Triple::msp430:
+  case Triple::r600:
+  case Triple::tce:
+  case Triple::thumb:
+  case Triple::xcore:
+    T.setArch(UnknownArch);
+    break;
+
+  case Triple::mips64:
+  case Triple::mips64el:
+  case Triple::ppc64:
+  case Triple::ptx64:
+  case Triple::sparcv9:
+  case Triple::x86_64:
+    // Already 64-bit.
+    break;
+
+  case Triple::mips:    T.setArch(Triple::mips64);    break;
+  case Triple::mipsel:  T.setArch(Triple::mips64el);  break;
+  case Triple::ppc:     T.setArch(Triple::ppc64);     break;
+  case Triple::ptx32:   T.setArch(Triple::ptx64);     break;
+  case Triple::sparc:   T.setArch(Triple::sparcv9);   break;
+  case Triple::x86:     T.setArch(Triple::x86_64);    break;
+  }
+  return T;
+}
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index dda3ce2c6f97..726e2fbcf056 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -35,14 +35,11 @@ static std::string getOSVersion() {
   return info.release;
 }
 
-std::string sys::getHostTriple() {
-  // FIXME: Derive directly instead of relying on the autoconf generated
-  // variable.
+std::string sys::getDefaultTargetTriple() {
+  StringRef TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE);
+  std::pair<StringRef, StringRef> ArchSplit = TargetTripleString.split('-');
 
-  StringRef HostTripleString(LLVM_HOSTTRIPLE);
-  std::pair<StringRef, StringRef> ArchSplit = HostTripleString.split('-');
-
-  // Normalize the arch, since the host triple may not actually match the host.
+  // Normalize the arch, since the target triple may not actually match the target.
   std::string Arch = ArchSplit.first;
 
   std::string Triple(Arch);
@@ -55,7 +52,7 @@ std::string sys::getHostTriple() {
     Triple[1] = '3';
 
   // On darwin, we want to update the version to match that of the
-  // host.
+  // target.
   std::string::size_type DarwinDashIdx = Triple.find("-darwin");
   if (DarwinDashIdx != std::string::npos) {
     Triple.resize(DarwinDashIdx + strlen("-darwin"));
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 85c7c4022f48..ddc1e0f9cec8 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -60,6 +60,11 @@
 #include <mach-o/dyld.h>
 #endif
 
+// For GNU Hurd
+#if defined(__GNU__) && !defined(MAXPATHLEN)
+# define MAXPATHLEN 4096
+#endif
+
 // Put in a hack for Cygwin which falsely reports that the mkdtemp function
 // is available when it is not.
 #ifdef __CYGWIN__
@@ -235,11 +240,6 @@ Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
 }
 
 Path
-Path::GetLLVMDefaultConfigDir() {
-  return Path("/etc/llvm/");
-}
-
-Path
 Path::GetUserHomeDirectory() {
   const char* home = getenv("HOME");
   Path result;
@@ -261,7 +261,7 @@ Path::GetCurrentDirectory() {
 }
 
 #if defined(__FreeBSD__) || defined (__NetBSD__) || \
-    defined(__OpenBSD__) || defined(__minix)
+    defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
 static int
 test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
     const char *dir, const char *bin)
@@ -313,7 +313,7 @@ getprogpath(char ret[PATH_MAX], const char *bin)
   free(pv);
   return (NULL);
 }
-#endif // __FreeBSD__ || __NetBSD__
+#endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__
 
 /// GetMainExecutable - Return the path to the main executable, given the
 /// value of argv[0] from program startup.
@@ -330,7 +330,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
       return Path(link_path);
   }
 #elif defined(__FreeBSD__) || defined (__NetBSD__) || \
-      defined(__OpenBSD__) || defined(__minix)
+      defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__)
   char exe_path[PATH_MAX];
 
   if (getprogpath(exe_path, argv0) != NULL)
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index bbbc344661be..edb101efb0f6 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -46,6 +46,11 @@
 #include <limits.h>
 #endif
 
+// For GNU Hurd
+#if defined(__GNU__) && !defined(PATH_MAX)
+# define PATH_MAX 4096
+#endif
+
 using namespace llvm;
 
 namespace {
@@ -87,7 +92,7 @@ namespace {
     result.clear();
     StringRef d(dir);
     result.append(d.begin(), d.end());
-    return success;
+    return error_code::success();
   }
 }
 
@@ -96,7 +101,12 @@ namespace sys  {
 namespace fs {
 
 error_code current_path(SmallVectorImpl<char> &result) {
+#ifdef MAXPATHLEN
   result.reserve(MAXPATHLEN);
+#else
+// For GNU Hurd
+  result.reserve(1024);
+#endif
 
   while (true) {
     if (::getcwd(result.data(), result.capacity()) == 0) {
@@ -110,7 +120,7 @@ error_code current_path(SmallVectorImpl<char> &result) {
   }
 
   result.set_size(strlen(result.data()));
-  return success;
+  return error_code::success();
 }
 
 error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
@@ -169,7 +179,7 @@ error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
   if (sz_read < 0)
     return error_code(errno, system_category());
 
-  return success;
+  return error_code::success();
 }
 
 error_code create_directory(const Twine &path, bool &existed) {
@@ -183,7 +193,7 @@ error_code create_directory(const Twine &path, bool &existed) {
   } else
     existed = false;
 
-  return success;
+  return error_code::success();
 }
 
 error_code create_hard_link(const Twine &to, const Twine &from) {
@@ -196,7 +206,7 @@ error_code create_hard_link(const Twine &to, const Twine &from) {
   if (::link(t.begin(), f.begin()) == -1)
     return error_code(errno, system_category());
 
-  return success;
+  return error_code::success();
 }
 
 error_code create_symlink(const Twine &to, const Twine &from) {
@@ -209,7 +219,7 @@ error_code create_symlink(const Twine &to, const Twine &from) {
   if (::symlink(t.begin(), f.begin()) == -1)
     return error_code(errno, system_category());
 
-  return success;
+  return error_code::success();
 }
 
 error_code remove(const Twine &path, bool &existed) {
@@ -223,7 +233,7 @@ error_code remove(const Twine &path, bool &existed) {
   } else
     existed = true;
 
-  return success;
+  return error_code::success();
 }
 
 error_code rename(const Twine &from, const Twine &to) {
@@ -245,7 +255,7 @@ error_code rename(const Twine &from, const Twine &to) {
       return error_code(errno, system_category());
   }
 
-  return success;
+  return error_code::success();
 }
 
 error_code resize_file(const Twine &path, uint64_t size) {
@@ -255,7 +265,7 @@ error_code resize_file(const Twine &path, uint64_t size) {
   if (::truncate(p.begin(), size) == -1)
     return error_code(errno, system_category());
 
-  return success;
+  return error_code::success();
 }
 
 error_code exists(const Twine &path, bool &result) {
@@ -270,32 +280,21 @@ error_code exists(const Twine &path, bool &result) {
   } else
     result = true;
 
-  return success;
+  return error_code::success();
 }
 
-error_code equivalent(const Twine &A, const Twine &B, bool &result) {
-  // Get arguments.
-  SmallString<128> a_storage;
-  SmallString<128> b_storage;
-  StringRef a = A.toNullTerminatedStringRef(a_storage);
-  StringRef b = B.toNullTerminatedStringRef(b_storage);
-
-  struct stat stat_a, stat_b;
-  int error_b = ::stat(b.begin(), &stat_b);
-  int error_a = ::stat(a.begin(), &stat_a);
-
-  // If both are invalid, it's an error. If only one is, the result is false.
-  if (error_a != 0 || error_b != 0) {
-    if (error_a == error_b)
-      return error_code(errno, system_category());
-    result = false;
-  } else {
-    result =
-      stat_a.st_dev == stat_b.st_dev &&
-      stat_a.st_ino == stat_b.st_ino;
-  }
+bool equivalent(file_status A, file_status B) {
+  assert(status_known(A) && status_known(B));
+  return A.st_dev == B.st_dev &&
+         A.st_ino == B.st_ino;
+}
 
-  return success;
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+  file_status fsA, fsB;
+  if (error_code ec = status(A, fsA)) return ec;
+  if (error_code ec = status(B, fsB)) return ec;
+  result = equivalent(fsA, fsB);
+  return error_code::success();
 }
 
 error_code file_size(const Twine &path, uint64_t &result) {
@@ -309,7 +308,7 @@ error_code file_size(const Twine &path, uint64_t &result) {
     return make_error_code(errc::operation_not_permitted);
 
   result = status.st_size;
-  return success;
+  return error_code::success();
 }
 
 error_code status(const Twine &path, file_status &result) {
@@ -341,7 +340,10 @@ error_code status(const Twine &path, file_status &result) {
   else
     result = file_status(file_type::type_unknown);
 
-  return success;
+  result.st_dev = status.st_dev;
+  result.st_ino = status.st_ino;
+
+  return error_code::success();
 }
 
 error_code unique_file(const Twine &model, int &result_fd,
@@ -436,10 +438,11 @@ rety_open_create:
   result_path.append(d.begin(), d.end());
 
   result_fd = RandomFD;
-  return success;
+  return error_code::success();
 }
 
-error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+error_code detail::directory_iterator_construct(detail::DirIterState &it,
+                                                StringRef path){
   SmallString<128> path_null(path);
   DIR *directory = ::opendir(path_null.c_str());
   if (directory == 0)
@@ -452,15 +455,15 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
   return directory_iterator_increment(it);
 }
 
-error_code directory_iterator_destruct(directory_iterator& it) {
+error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
   if (it.IterationHandle)
     ::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
   it.IterationHandle = 0;
   it.CurrentEntry = directory_entry();
-  return success;
+  return error_code::success();
 }
 
-error_code directory_iterator_increment(directory_iterator& it) {
+error_code detail::directory_iterator_increment(detail::DirIterState &it) {
   errno = 0;
   dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
   if (cur_dir == 0 && errno != 0) {
@@ -474,7 +477,7 @@ error_code directory_iterator_increment(directory_iterator& it) {
   } else
     return directory_iterator_destruct(it);
 
-  return success;
+  return error_code::success();
 }
 
 error_code get_magic(const Twine &path, uint32_t len,
@@ -505,7 +508,7 @@ error_code get_magic(const Twine &path, uint32_t len,
   }
   std::fclose(file);
   result.set_size(len);
-  return success;
+  return error_code::success();
 }
 
 } // end namespace fs
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index da440fd48f3d..2d7fd384e8bb 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -136,7 +136,7 @@ int Process::GetCurrentGroupId() {
   return getgid();
 }
 
-#ifdef HAVE_MACH_MACH_H
+#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__)
 #include <mach/mach.h>
 #endif
 
@@ -150,7 +150,7 @@ void Process::PreventCoreFiles() {
   setrlimit(RLIMIT_CORE, &rlim);
 #endif
 
-#ifdef HAVE_MACH_MACH_H
+#if defined(HAVE_MACH_MACH_H) && !defined(__GNU__)
   // Disable crash reporting on Mac OS X 10.0-10.4
 
   // get information about the original set of exception ports for the task
@@ -293,7 +293,3 @@ const char *Process::OutputBold(bool bg) {
 const char *Process::ResetColor() {
   return "\033[0m";
 }
-
-void Process::SetWorkingDirectory(std::string Path) {
-  ::chdir(Path.c_str());
-}
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 346baf1744dc..e5990d06ecc2 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -412,19 +412,19 @@ Program::Kill(std::string* ErrMsg) {
   return false;
 }
 
-bool Program::ChangeStdinToBinary(){
+error_code Program::ChangeStdinToBinary(){
   // Do nothing, as Unix doesn't differentiate between text and binary.
-  return false;
+  return make_error_code(errc::success);
 }
 
-bool Program::ChangeStdoutToBinary(){
+error_code Program::ChangeStdoutToBinary(){
   // Do nothing, as Unix doesn't differentiate between text and binary.
-  return false;
+  return make_error_code(errc::success);
 }
 
-bool Program::ChangeStderrToBinary(){
+error_code Program::ChangeStderrToBinary(){
   // Do nothing, as Unix doesn't differentiate between text and binary.
-  return false;
+  return make_error_code(errc::success);
 }
 
 }
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index e286869e775d..c9ec9fce9aa1 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -30,6 +30,10 @@
 #include <dlfcn.h>
 #include <cxxabi.h>
 #endif
+#if HAVE_MACH_MACH_H
+#include <mach/mach.h>
+#endif
+
 using namespace llvm;
 
 static RETSIGTYPE SignalHandler(int Sig);  // defined below.
@@ -261,6 +265,22 @@ static void PrintStackTrace(void *) {
 /// SIGSEGV) is delivered to the process, print a stack trace and then exit.
 void llvm::sys::PrintStackTraceOnErrorSignal() {
   AddSignalHandler(PrintStackTrace, 0);
+
+#if defined(__APPLE__)
+  // Environment variable to disable any kind of crash dialog.
+  if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
+    mach_port_t self = mach_task_self();
+
+    exception_mask_t mask = EXC_MASK_CRASH;
+
+    kern_return_t ret = task_set_exception_ports(self, 
+                             mask,
+                             MACH_PORT_NULL,
+                             EXCEPTION_STATE_IDENTITY | MACH_EXCEPTION_CODES, 
+                             THREAD_STATE_NONE);
+    (void)ret;
+  }
+#endif
 }
 
 
diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp
index 703448524ed9..2b250a357758 100644
--- a/lib/Support/Valgrind.cpp
+++ b/lib/Support/Valgrind.cpp
@@ -52,3 +52,16 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
 }
 
 #endif  // !HAVE_VALGRIND_VALGRIND_H
+
+#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
+// These functions require no implementation, tsan just looks at the arguments
+// they're called with.
+extern "C" {
+void AnnotateHappensBefore(const char *file, int line,
+                           const volatile void *cv) {}
+void AnnotateHappensAfter(const char *file, int line,
+                          const volatile void *cv) {}
+void AnnotateIgnoreWritesBegin(const char *file, int line) {}
+void AnnotateIgnoreWritesEnd(const char *file, int line) {}
+}
+#endif
diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc
index 733830e82f08..2e6d6f190370 100644
--- a/lib/Support/Windows/Host.inc
+++ b/lib/Support/Windows/Host.inc
@@ -17,7 +17,6 @@
 
 using namespace llvm;
 
-std::string sys::getHostTriple() {
-  // FIXME: Adapt to running version.
-  return LLVM_HOSTTRIPLE;
+std::string sys::getDefaultTargetTriple() {
+  return LLVM_DEFAULT_TARGET_TRIPLE;
 }
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 42a92f9c6dfe..d8dc5226ccee 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -66,29 +66,20 @@ Path::operator=(StringRef that) {
   return *this;
 }
 
-// push_back 0 on create, and pop_back on delete.
-struct ScopedNullTerminator {
-  std::string &str;
-  ScopedNullTerminator(std::string &s) : str(s) { str.push_back(0); }
-  ~ScopedNullTerminator() {
-    // str.pop_back(); But wait, C++03 doesn't have this...
-    assert(!str.empty() && str[str.size() - 1] == 0
-      && "Null char not present!");
-    str.resize(str.size() - 1);
-  }
-};
-
 bool
 Path::isValid() const {
   if (path.empty())
     return false;
 
+  size_t len = path.size();
+  // If there is a null character, it and all its successors are ignored.
+  size_t pos = path.find_first_of('\0');
+  if (pos != std::string::npos)
+    len = pos;
+
   // If there is a colon, it must be the second character, preceded by a letter
   // and followed by something.
-  size_t len = path.size();
-  // This code assumes that path is null terminated, so make sure it is.
-  ScopedNullTerminator snt(path);
-  size_t pos = path.rfind(':',len);
+  pos = path.rfind(':',len);
   size_t rootslash = 0;
   if (pos != std::string::npos) {
     if (pos != 1 || !isalpha(path[0]) || len < 3)
@@ -118,13 +109,13 @@ Path::isValid() const {
   for (pos = 0; pos < len; ++pos) {
     // A component may not end in a space.
     if (path[pos] == ' ') {
-      if (path[pos+1] == '/' || path[pos+1] == '\0')
+      if (pos+1 == len || path[pos+1] == '/' || path[pos+1] == '\0')
         return false;
     }
 
     // A component may not end in a period.
     if (path[pos] == '.') {
-      if (path[pos+1] == '/' || path[pos+1] == '\0') {
+      if (pos+1 == len || path[pos+1] == '/') {
         // Unless it is the pseudo-directory "."...
         if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':')
           return true;
@@ -286,14 +277,6 @@ Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
 }
 
 Path
-Path::GetLLVMDefaultConfigDir() {
-  Path ret = GetUserHomeDirectory();
-  if (!ret.appendComponent(".llvm"))
-    assert(0 && "Failed to append .llvm");
-  return ret;
-}
-
-Path
 Path::GetUserHomeDirectory() {
   char buff[MAX_PATH];
   HRESULT res = SHGetFolderPathA(NULL,
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index bc597b2dcc89..e9ce5d9097a3 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -17,7 +17,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Windows.h"
-#include <wincrypt.h>
 #include <fcntl.h>
 #include <io.h>
 #include <sys/stat.h>
@@ -63,7 +62,7 @@ namespace {
     utf16.push_back(0);
     utf16.pop_back();
 
-    return success;
+    return error_code::success();
   }
 
   error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
@@ -93,7 +92,7 @@ namespace {
     utf8.push_back(0);
     utf8.pop_back();
 
-    return success;
+    return error_code::success();
   }
 
   error_code TempDir(SmallVectorImpl<wchar_t> &result) {
@@ -109,17 +108,9 @@ namespace {
     }
 
     result.set_size(len);
-    return success;
+    return error_code::success();
   }
 
-  // Forwarder for ScopedHandle.
-  BOOL WINAPI CryptReleaseContext(HCRYPTPROV Provider) {
-    return ::CryptReleaseContext(Provider, 0);
-  }
-
-  typedef ScopedHandle<HCRYPTPROV, uintptr_t(-1),
-                       BOOL (WINAPI*)(HCRYPTPROV), CryptReleaseContext>
-    ScopedCryptContext;
   bool is_separator(const wchar_t value) {
     switch (value) {
     case L'\\':
@@ -176,7 +167,7 @@ retry_cur_dir:
   if (len == 0)
     return windows_error(::GetLastError());
 
-  return success;
+  return error_code::success();
 }
 
 error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
@@ -199,7 +190,7 @@ error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
   if (res == 0)
     return windows_error(::GetLastError());
 
-  return success;
+  return error_code::success();
 }
 
 error_code create_directory(const Twine &path, bool &existed) {
@@ -219,7 +210,7 @@ error_code create_directory(const Twine &path, bool &existed) {
   } else
     existed = false;
 
-  return success;
+  return error_code::success();
 }
 
 error_code create_hard_link(const Twine &to, const Twine &from) {
@@ -238,7 +229,7 @@ error_code create_hard_link(const Twine &to, const Twine &from) {
   if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
     return windows_error(::GetLastError());
 
-  return success;
+  return error_code::success();
 }
 
 error_code create_symlink(const Twine &to, const Twine &from) {
@@ -261,7 +252,7 @@ error_code create_symlink(const Twine &to, const Twine &from) {
   if (!create_symbolic_link_api(wide_from.begin(), wide_to.begin(), 0))
     return windows_error(::GetLastError());
 
-  return success;
+  return error_code::success();
 }
 
 error_code remove(const Twine &path, bool &existed) {
@@ -294,7 +285,7 @@ error_code remove(const Twine &path, bool &existed) {
       existed = true;
   }
 
-  return success;
+  return error_code::success();
 }
 
 error_code rename(const Twine &from, const Twine &to) {
@@ -314,7 +305,7 @@ error_code rename(const Twine &from, const Twine &to) {
                      MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
     return windows_error(::GetLastError());
 
-  return success;
+  return error_code::success();
 }
 
 error_code resize_file(const Twine &path, uint64_t size) {
@@ -356,72 +347,26 @@ error_code exists(const Twine &path, bool &result) {
     result = false;
   } else
     result = true;
-  return success;
+  return error_code::success();
 }
 
-error_code equivalent(const Twine &A, const Twine &B, bool &result) {
-  // Get arguments.
-  SmallString<128> a_storage;
-  SmallString<128> b_storage;
-  StringRef a = A.toStringRef(a_storage);
-  StringRef b = B.toStringRef(b_storage);
-
-  // Convert to utf-16.
-  SmallVector<wchar_t, 128> wide_a;
-  SmallVector<wchar_t, 128> wide_b;
-  if (error_code ec = UTF8ToUTF16(a, wide_a)) return ec;
-  if (error_code ec = UTF8ToUTF16(b, wide_b)) return ec;
-
-  AutoHandle HandleB(
-    ::CreateFileW(wide_b.begin(),
-                  0,
-                  FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
-                  0,
-                  OPEN_EXISTING,
-                  FILE_FLAG_BACKUP_SEMANTICS,
-                  0));
-
-  AutoHandle HandleA(
-    ::CreateFileW(wide_a.begin(),
-                  0,
-                  FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
-                  0,
-                  OPEN_EXISTING,
-                  FILE_FLAG_BACKUP_SEMANTICS,
-                  0));
-
-  // If both handles are invalid, it's an error.
-  if (HandleA == INVALID_HANDLE_VALUE &&
-      HandleB == INVALID_HANDLE_VALUE)
-    return windows_error(::GetLastError());
-
-  // If only one is invalid, it's false.
-  if (HandleA == INVALID_HANDLE_VALUE &&
-      HandleB == INVALID_HANDLE_VALUE) {
-    result = false;
-    return success;
-  }
-
-  // Get file information.
-  BY_HANDLE_FILE_INFORMATION InfoA, InfoB;
-  if (!::GetFileInformationByHandle(HandleA, &InfoA))
-    return windows_error(::GetLastError());
-  if (!::GetFileInformationByHandle(HandleB, &InfoB))
-    return windows_error(::GetLastError());
+bool equivalent(file_status A, file_status B) {
+  assert(status_known(A) && status_known(B));
+  return A.FileIndexHigh      == B.FileIndexHigh &&
+         A.FileIndexLow       == B.FileIndexLow &&
+         A.FileSizeHigh       == B.FileSizeHigh &&
+         A.FileSizeLow        == B.FileSizeLow &&
+         A.LastWriteTimeHigh  == B.LastWriteTimeHigh &&
+         A.LastWriteTimeLow   == B.LastWriteTimeLow &&
+         A.VolumeSerialNumber == B.VolumeSerialNumber;
+}
 
-  // See if it's all the same.
-  result =
-    InfoA.dwVolumeSerialNumber           == InfoB.dwVolumeSerialNumber &&
-    InfoA.nFileIndexHigh                 == InfoB.nFileIndexHigh &&
-    InfoA.nFileIndexLow                  == InfoB.nFileIndexLow &&
-    InfoA.nFileSizeHigh                  == InfoB.nFileSizeHigh &&
-    InfoA.nFileSizeLow                   == InfoB.nFileSizeLow &&
-    InfoA.ftLastWriteTime.dwLowDateTime  ==
-      InfoB.ftLastWriteTime.dwLowDateTime &&
-    InfoA.ftLastWriteTime.dwHighDateTime ==
-      InfoB.ftLastWriteTime.dwHighDateTime;
-
-  return success;
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+  file_status fsA, fsB;
+  if (error_code ec = status(A, fsA)) return ec;
+  if (error_code ec = status(B, fsB)) return ec;
+  result = equivalent(fsA, fsB);
+  return error_code::success();
 }
 
 error_code file_size(const Twine &path, uint64_t &result) {
@@ -442,7 +387,7 @@ error_code file_size(const Twine &path, uint64_t &result) {
     (uint64_t(FileData.nFileSizeHigh) << (sizeof(FileData.nFileSizeLow) * 8))
     + FileData.nFileSizeLow;
 
-  return success;
+  return error_code::success();
 }
 
 static bool isReservedName(StringRef path) {
@@ -475,11 +420,10 @@ error_code status(const Twine &path, file_status &result) {
   StringRef path8 = path.toStringRef(path_storage);
   if (isReservedName(path8)) {
     result = file_status(file_type::character_file);
-    return success;
+    return error_code::success();
   }
 
-  if (error_code ec = UTF8ToUTF16(path8,
-                                  path_utf16))
+  if (error_code ec = UTF8ToUTF16(path8, path_utf16))
     return ec;
 
   DWORD attr = ::GetFileAttributesW(path_utf16.begin());
@@ -488,7 +432,7 @@ error_code status(const Twine &path, file_status &result) {
 
   // Handle reparse points.
   if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
-    AutoHandle h(
+    ScopedFileHandle h(
       ::CreateFileW(path_utf16.begin(),
                     0, // Attributes only.
                     FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
@@ -496,16 +440,37 @@ error_code status(const Twine &path, file_status &result) {
                     OPEN_EXISTING,
                     FILE_FLAG_BACKUP_SEMANTICS,
                     0));
-    if (h == INVALID_HANDLE_VALUE)
+    if (!h)
       goto handle_status_error;
   }
 
   if (attr & FILE_ATTRIBUTE_DIRECTORY)
     result = file_status(file_type::directory_file);
-  else
+  else {
     result = file_status(file_type::regular_file);
+    ScopedFileHandle h(
+      ::CreateFileW(path_utf16.begin(),
+                    0, // Attributes only.
+                    FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                    NULL,
+                    OPEN_EXISTING,
+                    FILE_FLAG_BACKUP_SEMANTICS,
+                    0));
+    if (!h)
+      goto handle_status_error;
+    BY_HANDLE_FILE_INFORMATION Info;
+    if (!::GetFileInformationByHandle(h, &Info))
+      goto handle_status_error;
+    result.FileIndexHigh      = Info.nFileIndexHigh;
+    result.FileIndexLow       = Info.nFileIndexLow;
+    result.FileSizeHigh       = Info.nFileSizeHigh;
+    result.FileSizeLow        = Info.nFileSizeLow;
+    result.LastWriteTimeHigh  = Info.ftLastWriteTime.dwHighDateTime;
+    result.LastWriteTimeLow   = Info.ftLastWriteTime.dwLowDateTime;
+    result.VolumeSerialNumber = Info.dwVolumeSerialNumber;
+  }
 
-  return success;
+  return error_code::success();
 
 handle_status_error:
   error_code ec = windows_error(::GetLastError());
@@ -519,7 +484,7 @@ handle_status_error:
     return ec;
   }
 
-  return success;
+  return error_code::success();
 }
 
 error_code unique_file(const Twine &model, int &result_fd,
@@ -535,7 +500,7 @@ error_code unique_file(const Twine &model, int &result_fd,
   if (makeAbsolute) {
     // Make model absolute by prepending a temp directory if it's not already.
     bool absolute = path::is_absolute(m);
-  
+
     if (!absolute) {
       SmallVector<wchar_t, 64> temp_dir;
       if (error_code ec = TempDir(temp_dir)) return ec;
@@ -646,7 +611,7 @@ retry_create_file:
   }
 
   result_fd = fd;
-  return success;
+  return error_code::success();
 }
 
 error_code get_magic(const Twine &path, uint32_t len,
@@ -688,10 +653,11 @@ error_code get_magic(const Twine &path, uint32_t len,
   }
 
   result.set_size(len);
-  return success;
+  return error_code::success();
 }
 
-error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+error_code detail::directory_iterator_construct(detail::DirIterState &it,
+                                                StringRef path){
   SmallVector<wchar_t, 128> path_utf16;
 
   if (error_code ec = UTF8ToUTF16(path,
@@ -722,7 +688,7 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
       error_code ec = windows_error(::GetLastError());
       // Check for end.
       if (ec == windows_error::no_more_files)
-        return directory_iterator_destruct(it);
+        return detail::directory_iterator_destruct(it);
       return ec;
     } else
       FilenameLen = ::wcslen(FirstFind.cFileName);
@@ -739,25 +705,25 @@ error_code directory_iterator_construct(directory_iterator &it, StringRef path){
   path::append(directory_entry_path, directory_entry_name_utf8.str());
   it.CurrentEntry = directory_entry(directory_entry_path.str());
 
-  return success;
+  return error_code::success();
 }
 
-error_code directory_iterator_destruct(directory_iterator& it) {
+error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
   if (it.IterationHandle != 0)
     // Closes the handle if it's valid.
     ScopedFindHandle close(HANDLE(it.IterationHandle));
   it.IterationHandle = 0;
   it.CurrentEntry = directory_entry();
-  return success;
+  return error_code::success();
 }
 
-error_code directory_iterator_increment(directory_iterator& it) {
+error_code detail::directory_iterator_increment(detail::DirIterState &it) {
   WIN32_FIND_DATAW FindData;
   if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
     error_code ec = windows_error(::GetLastError());
     // Check for end.
     if (ec == windows_error::no_more_files)
-      return directory_iterator_destruct(it);
+      return detail::directory_iterator_destruct(it);
     return ec;
   }
 
@@ -774,7 +740,7 @@ error_code directory_iterator_increment(directory_iterator& it) {
     return ec;
 
   it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
-  return success;
+  return error_code::success();
 }
 
 } // end namespace fs
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index fe54eb1a7972..913b0734ddc9 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -220,8 +220,4 @@ const char *Process::ResetColor() {
   return 0;
 }
 
-void Process::SetWorkingDirectory(std::string Path) {
-  ::_chdir(Path.c_str());
-}
-
 }
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index e486e6ec2381..80ccaa6ea6b1 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -299,14 +299,14 @@ Program::Execute(const Path& path,
   Data_ = wpi;
 
   // Make sure these get closed no matter what.
-  AutoHandle hThread(pi.hThread);
+  ScopedCommonHandle hThread(pi.hThread);
 
   // Assign the process to a job if a memory limit is defined.
-  AutoHandle hJob(0);
+  ScopedJobHandle hJob;
   if (memoryLimit != 0) {
     hJob = CreateJobObject(0, 0);
     bool success = false;
-    if (hJob != 0) {
+    if (hJob) {
       JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
       memset(&jeli, 0, sizeof(jeli));
       jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
@@ -367,7 +367,17 @@ Program::Wait(const Path &path,
     return -2;
   }
 
-  return status;
+  if (!status)
+    return 0;
+
+  // Pass 10(Warning) and 11(Error) to the callee as negative value.
+  if ((status & 0xBFFF0000U) == 0x80000000U)
+    return (int)status;
+
+  if (status & 0xFF)
+    return status & 0x7FFFFFFF;
+
+  return 1;
 }
 
 bool
@@ -387,19 +397,25 @@ Program::Kill(std::string* ErrMsg) {
   return false;
 }
 
-bool Program::ChangeStdinToBinary(){
+error_code Program::ChangeStdinToBinary(){
   int result = _setmode( _fileno(stdin), _O_BINARY );
-  return result == -1;
+  if (result == -1)
+    return error_code(errno, generic_category());
+  return make_error_code(errc::success);
 }
 
-bool Program::ChangeStdoutToBinary(){
+error_code Program::ChangeStdoutToBinary(){
   int result = _setmode( _fileno(stdout), _O_BINARY );
-  return result == -1;
+  if (result == -1)
+    return error_code(errno, generic_category());
+  return make_error_code(errc::success);
 }
 
-bool Program::ChangeStderrToBinary(){
+error_code Program::ChangeStderrToBinary(){
   int result = _setmode( _fileno(stderr), _O_BINARY );
-  return result == -1;
+  if (result == -1)
+    return error_code(errno, generic_category());
+  return make_error_code(errc::success);
 }
 
 }
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 0d4b8a26b023..38308f6abd85 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -239,7 +239,7 @@ static void RegisterHandler() {
   SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
 
   // Environment variable to disable any kind of crash dialog.
-  if (getenv("LLVM_DISABLE_CRT_DEBUG")) {
+  if (getenv("LLVM_DISABLE_CRASH_REPORT")) {
 #ifdef _MSC_VER
     _CrtSetReportHook(CRTReportHook);
 #endif
@@ -446,7 +446,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
   }
 
   if (ExitOnUnhandledExceptions)
-    _exit(-3);
+    _exit(ep->ExceptionRecord->ExceptionCode);
 
   // Allow dialog box to pop up allowing choice to start debugger.
   if (OldFilter)
diff --git a/lib/Support/Windows/Windows.h b/lib/Support/Windows/Windows.h
index 67b6f015114f..5c1da0d617aa 100644
--- a/lib/Support/Windows/Windows.h
+++ b/lib/Support/Windows/Windows.h
@@ -26,6 +26,7 @@
 
 #include "llvm/Config/config.h" // Get build system configuration settings
 #include <windows.h>
+#include <wincrypt.h>
 #include <shlobj.h>
 #include <cassert>
 #include <string>
@@ -41,70 +42,99 @@ inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
   return true;
 }
 
-class AutoHandle {
-  HANDLE handle;
+template <typename HandleTraits>
+class ScopedHandle {
+  typedef typename HandleTraits::handle_type handle_type;
+  handle_type Handle;
 
+  ScopedHandle(const ScopedHandle &other); // = delete;
+  void operator=(const ScopedHandle &other); // = delete;
 public:
-  AutoHandle(HANDLE h) : handle(h) {}
+  ScopedHandle()
+    : Handle(HandleTraits::GetInvalid()) {}
+
+  explicit ScopedHandle(handle_type h)
+    : Handle(h) {}
 
-  ~AutoHandle() {
-    if (handle)
-      CloseHandle(handle);
+  ~ScopedHandle() {
+    if (HandleTraits::IsValid(Handle))
+      HandleTraits::Close(Handle);
   }
 
-  operator HANDLE() {
-    return handle;
+  handle_type take() {
+    handle_type t = Handle;
+    Handle = HandleTraits::GetInvalid();
+    return t;
   }
 
-  AutoHandle &operator=(HANDLE h) {
-    handle = h;
+  ScopedHandle &operator=(handle_type h) {
+    if (HandleTraits::IsValid(Handle))
+      HandleTraits::Close(Handle);
+    Handle = h;
     return *this;
   }
+
+  // True if Handle is valid.
+  operator bool() const {
+    return HandleTraits::IsValid(Handle) ? true : false;
+  }
+
+  operator handle_type() const {
+    return Handle;
+  }
 };
 
-template <class HandleType, uintptr_t InvalidHandle,
-          class DeleterType, DeleterType D>
-class ScopedHandle {
-  HandleType Handle;
+struct CommonHandleTraits {
+  typedef HANDLE handle_type;
 
-public:
-  ScopedHandle() : Handle(InvalidHandle) {}
-  ScopedHandle(HandleType handle) : Handle(handle) {}
+  static handle_type GetInvalid() {
+    return INVALID_HANDLE_VALUE;
+  }
 
-  ~ScopedHandle() {
-    if (Handle != HandleType(InvalidHandle))
-      D(Handle);
+  static void Close(handle_type h) {
+    ::CloseHandle(h);
   }
 
-  HandleType take() {
-    HandleType temp = Handle;
-    Handle = HandleType(InvalidHandle);
-    return temp;
+  static bool IsValid(handle_type h) {
+    return h != GetInvalid();
   }
+};
 
-  operator HandleType() const { return Handle; }
+struct JobHandleTraits : CommonHandleTraits {
+  static handle_type GetInvalid() {
+    return NULL;
+  }
+};
 
-  ScopedHandle &operator=(HandleType handle) {
-    Handle = handle;
-    return *this;
+struct CryptContextTraits : CommonHandleTraits {
+  typedef HCRYPTPROV handle_type;
+
+  static handle_type GetInvalid() {
+    return 0;
   }
 
-  typedef void (*unspecified_bool_type)();
-  static void unspecified_bool_true() {}
+  static void Close(handle_type h) {
+    ::CryptReleaseContext(h, 0);
+  }
 
-  // True if Handle is valid.
-  operator unspecified_bool_type() const {
-    return Handle == HandleType(InvalidHandle) ? 0 : unspecified_bool_true;
+  static bool IsValid(handle_type h) {
+    return h != GetInvalid();
   }
+};
 
-  bool operator!() const {
-    return Handle == HandleType(InvalidHandle);
+struct FindHandleTraits : CommonHandleTraits {
+  static void Close(handle_type h) {
+    ::FindClose(h);
   }
 };
 
-typedef ScopedHandle<HANDLE, uintptr_t(-1),
-                      BOOL (WINAPI*)(HANDLE), ::FindClose>
-  ScopedFindHandle;
+struct FileHandleTraits : CommonHandleTraits {};
+
+typedef ScopedHandle<CommonHandleTraits> ScopedCommonHandle;
+typedef ScopedHandle<FileHandleTraits>   ScopedFileHandle;
+typedef ScopedHandle<CryptContextTraits> ScopedCryptContext;
+typedef ScopedHandle<FindHandleTraits>   ScopedFindHandle;
+typedef ScopedHandle<JobHandleTraits>    ScopedJobHandle;
 
 namespace llvm {
 template <class T>
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
new file mode 100644
index 000000000000..330519f3019d
--- /dev/null
+++ b/lib/Support/YAMLParser.cpp
@@ -0,0 +1,2117 @@
+//===--- YAMLParser.cpp - Simple YAML parser ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements a YAML parser.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/YAMLParser.h"
+
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+using namespace yaml;
+
+enum UnicodeEncodingForm {
+  UEF_UTF32_LE, //< UTF-32 Little Endian
+  UEF_UTF32_BE, //< UTF-32 Big Endian
+  UEF_UTF16_LE, //< UTF-16 Little Endian
+  UEF_UTF16_BE, //< UTF-16 Big Endian
+  UEF_UTF8,     //< UTF-8 or ascii.
+  UEF_Unknown   //< Not a valid Unicode encoding.
+};
+
+/// EncodingInfo - Holds the encoding type and length of the byte order mark if
+///                it exists. Length is in {0, 2, 3, 4}.
+typedef std::pair<UnicodeEncodingForm, unsigned> EncodingInfo;
+
+/// getUnicodeEncoding - Reads up to the first 4 bytes to determine the Unicode
+///                      encoding form of \a Input.
+///
+/// @param Input A string of length 0 or more.
+/// @returns An EncodingInfo indicating the Unicode encoding form of the input
+///          and how long the byte order mark is if one exists.
+static EncodingInfo getUnicodeEncoding(StringRef Input) {
+  if (Input.size() == 0)
+    return std::make_pair(UEF_Unknown, 0);
+
+  switch (uint8_t(Input[0])) {
+  case 0x00:
+    if (Input.size() >= 4) {
+      if (  Input[1] == 0
+         && uint8_t(Input[2]) == 0xFE
+         && uint8_t(Input[3]) == 0xFF)
+        return std::make_pair(UEF_UTF32_BE, 4);
+      if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
+        return std::make_pair(UEF_UTF32_BE, 0);
+    }
+
+    if (Input.size() >= 2 && Input[1] != 0)
+      return std::make_pair(UEF_UTF16_BE, 0);
+    return std::make_pair(UEF_Unknown, 0);
+  case 0xFF:
+    if (  Input.size() >= 4
+       && uint8_t(Input[1]) == 0xFE
+       && Input[2] == 0
+       && Input[3] == 0)
+      return std::make_pair(UEF_UTF32_LE, 4);
+
+    if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFE)
+      return std::make_pair(UEF_UTF16_LE, 2);
+    return std::make_pair(UEF_Unknown, 0);
+  case 0xFE:
+    if (Input.size() >= 2 && uint8_t(Input[1]) == 0xFF)
+      return std::make_pair(UEF_UTF16_BE, 2);
+    return std::make_pair(UEF_Unknown, 0);
+  case 0xEF:
+    if (  Input.size() >= 3
+       && uint8_t(Input[1]) == 0xBB
+       && uint8_t(Input[2]) == 0xBF)
+      return std::make_pair(UEF_UTF8, 3);
+    return std::make_pair(UEF_Unknown, 0);
+  }
+
+  // It could still be utf-32 or utf-16.
+  if (Input.size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
+    return std::make_pair(UEF_UTF32_LE, 0);
+
+  if (Input.size() >= 2 && Input[1] == 0)
+    return std::make_pair(UEF_UTF16_LE, 0);
+
+  return std::make_pair(UEF_UTF8, 0);
+}
+
+namespace llvm {
+namespace yaml {
+/// Token - A single YAML token.
+struct Token : ilist_node<Token> {
+  enum TokenKind {
+    TK_Error, // Uninitialized token.
+    TK_StreamStart,
+    TK_StreamEnd,
+    TK_VersionDirective,
+    TK_TagDirective,
+    TK_DocumentStart,
+    TK_DocumentEnd,
+    TK_BlockEntry,
+    TK_BlockEnd,
+    TK_BlockSequenceStart,
+    TK_BlockMappingStart,
+    TK_FlowEntry,
+    TK_FlowSequenceStart,
+    TK_FlowSequenceEnd,
+    TK_FlowMappingStart,
+    TK_FlowMappingEnd,
+    TK_Key,
+    TK_Value,
+    TK_Scalar,
+    TK_Alias,
+    TK_Anchor,
+    TK_Tag
+  } Kind;
+
+  /// A string of length 0 or more whose begin() points to the logical location
+  /// of the token in the input.
+  StringRef Range;
+
+  Token() : Kind(TK_Error) {}
+};
+}
+}
+
+namespace llvm {
+template<>
+struct ilist_sentinel_traits<Token> {
+  Token *createSentinel() const {
+    return &Sentinel;
+  }
+  static void destroySentinel(Token*) {}
+
+  Token *provideInitialHead() const { return createSentinel(); }
+  Token *ensureHead(Token*) const { return createSentinel(); }
+  static void noteHead(Token*, Token*) {}
+
+private:
+  mutable Token Sentinel;
+};
+
+template<>
+struct ilist_node_traits<Token> {
+  Token *createNode(const Token &V) {
+    return new (Alloc.Allocate<Token>()) Token(V);
+  }
+  static void deleteNode(Token *V) {}
+
+  void addNodeToList(Token *) {}
+  void removeNodeFromList(Token *) {}
+  void transferNodesFromList(ilist_node_traits &    /*SrcTraits*/,
+                             ilist_iterator<Token> /*first*/,
+                             ilist_iterator<Token> /*last*/) {}
+
+  BumpPtrAllocator Alloc;
+};
+}
+
+typedef ilist<Token> TokenQueueT;
+
+namespace {
+/// @brief This struct is used to track simple keys.
+///
+/// Simple keys are handled by creating an entry in SimpleKeys for each Token
+/// which could legally be the start of a simple key. When peekNext is called,
+/// if the Token To be returned is referenced by a SimpleKey, we continue
+/// tokenizing until that potential simple key has either been found to not be
+/// a simple key (we moved on to the next line or went further than 1024 chars).
+/// Or when we run into a Value, and then insert a Key token (and possibly
+/// others) before the SimpleKey's Tok.
+struct SimpleKey {
+  TokenQueueT::iterator Tok;
+  unsigned Column;
+  unsigned Line;
+  unsigned FlowLevel;
+  bool IsRequired;
+
+  bool operator ==(const SimpleKey &Other) {
+    return Tok == Other.Tok;
+  }
+};
+}
+
+/// @brief The Unicode scalar value of a UTF-8 minimal well-formed code unit
+///        subsequence and the subsequence's length in code units (uint8_t).
+///        A length of 0 represents an error.
+typedef std::pair<uint32_t, unsigned> UTF8Decoded;
+
+static UTF8Decoded decodeUTF8(StringRef Range) {
+  StringRef::iterator Position= Range.begin();
+  StringRef::iterator End = Range.end();
+  // 1 byte: [0x00, 0x7f]
+  // Bit pattern: 0xxxxxxx
+  if ((*Position & 0x80) == 0) {
+     return std::make_pair(*Position, 1);
+  }
+  // 2 bytes: [0x80, 0x7ff]
+  // Bit pattern: 110xxxxx 10xxxxxx
+  if (Position + 1 != End &&
+      ((*Position & 0xE0) == 0xC0) &&
+      ((*(Position + 1) & 0xC0) == 0x80)) {
+    uint32_t codepoint = ((*Position & 0x1F) << 6) |
+                          (*(Position + 1) & 0x3F);
+    if (codepoint >= 0x80)
+      return std::make_pair(codepoint, 2);
+  }
+  // 3 bytes: [0x8000, 0xffff]
+  // Bit pattern: 1110xxxx 10xxxxxx 10xxxxxx
+  if (Position + 2 != End &&
+      ((*Position & 0xF0) == 0xE0) &&
+      ((*(Position + 1) & 0xC0) == 0x80) &&
+      ((*(Position + 2) & 0xC0) == 0x80)) {
+    uint32_t codepoint = ((*Position & 0x0F) << 12) |
+                         ((*(Position + 1) & 0x3F) << 6) |
+                          (*(Position + 2) & 0x3F);
+    // Codepoints between 0xD800 and 0xDFFF are invalid, as
+    // they are high / low surrogate halves used by UTF-16.
+    if (codepoint >= 0x800 &&
+        (codepoint < 0xD800 || codepoint > 0xDFFF))
+      return std::make_pair(codepoint, 3);
+  }
+  // 4 bytes: [0x10000, 0x10FFFF]
+  // Bit pattern: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+  if (Position + 3 != End &&
+      ((*Position & 0xF8) == 0xF0) &&
+      ((*(Position + 1) & 0xC0) == 0x80) &&
+      ((*(Position + 2) & 0xC0) == 0x80) &&
+      ((*(Position + 3) & 0xC0) == 0x80)) {
+    uint32_t codepoint = ((*Position & 0x07) << 18) |
+                         ((*(Position + 1) & 0x3F) << 12) |
+                         ((*(Position + 2) & 0x3F) << 6) |
+                          (*(Position + 3) & 0x3F);
+    if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
+      return std::make_pair(codepoint, 4);
+  }
+  return std::make_pair(0, 0);
+}
+
+namespace llvm {
+namespace yaml {
+/// @brief Scans YAML tokens from a MemoryBuffer.
+class Scanner {
+public:
+  Scanner(const StringRef Input, SourceMgr &SM);
+
+  /// @brief Parse the next token and return it without popping it.
+  Token &peekNext();
+
+  /// @brief Parse the next token and pop it from the queue.
+  Token getNext();
+
+  void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message,
+                  ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+    SM.PrintMessage(Loc, Kind, Message, Ranges);
+  }
+
+  void setError(const Twine &Message, StringRef::iterator Position) {
+    if (Current >= End)
+      Current = End - 1;
+
+    // Don't print out more errors after the first one we encounter. The rest
+    // are just the result of the first, and have no meaning.
+    if (!Failed)
+      printError(SMLoc::getFromPointer(Current), SourceMgr::DK_Error, Message);
+    Failed = true;
+  }
+
+  void setError(const Twine &Message) {
+    setError(Message, Current);
+  }
+
+  /// @brief Returns true if an error occurred while parsing.
+  bool failed() {
+    return Failed;
+  }
+
+private:
+  StringRef currentInput() {
+    return StringRef(Current, End - Current);
+  }
+
+  /// @brief Decode a UTF-8 minimal well-formed code unit subsequence starting
+  ///        at \a Position.
+  ///
+  /// If the UTF-8 code units starting at Position do not form a well-formed
+  /// code unit subsequence, then the Unicode scalar value is 0, and the length
+  /// is 0.
+  UTF8Decoded decodeUTF8(StringRef::iterator Position) {
+    return ::decodeUTF8(StringRef(Position, End - Position));
+  }
+
+  // The following functions are based on the gramar rules in the YAML spec. The
+  // style of the function names it meant to closely match how they are written
+  // in the spec. The number within the [] is the number of the grammar rule in
+  // the spec.
+  //
+  // See 4.2 [Production Naming Conventions] for the meaning of the prefixes.
+  //
+  // c-
+  //   A production starting and ending with a special character.
+  // b-
+  //   A production matching a single line break.
+  // nb-
+  //   A production starting and ending with a non-break character.
+  // s-
+  //   A production starting and ending with a white space character.
+  // ns-
+  //   A production starting and ending with a non-space character.
+  // l-
+  //   A production matching complete line(s).
+
+  /// @brief Skip a single nb-char[27] starting at Position.
+  ///
+  /// A nb-char is 0x9 | [0x20-0x7E] | 0x85 | [0xA0-0xD7FF] | [0xE000-0xFEFE]
+  ///                  | [0xFF00-0xFFFD] | [0x10000-0x10FFFF]
+  ///
+  /// @returns The code unit after the nb-char, or Position if it's not an
+  ///          nb-char.
+  StringRef::iterator skip_nb_char(StringRef::iterator Position);
+
+  /// @brief Skip a single b-break[28] starting at Position.
+  ///
+  /// A b-break is 0xD 0xA | 0xD | 0xA
+  ///
+  /// @returns The code unit after the b-break, or Position if it's not a
+  ///          b-break.
+  StringRef::iterator skip_b_break(StringRef::iterator Position);
+
+  /// @brief Skip a single s-white[33] starting at Position.
+  ///
+  /// A s-white is 0x20 | 0x9
+  ///
+  /// @returns The code unit after the s-white, or Position if it's not a
+  ///          s-white.
+  StringRef::iterator skip_s_white(StringRef::iterator Position);
+
+  /// @brief Skip a single ns-char[34] starting at Position.
+  ///
+  /// A ns-char is nb-char - s-white
+  ///
+  /// @returns The code unit after the ns-char, or Position if it's not a
+  ///          ns-char.
+  StringRef::iterator skip_ns_char(StringRef::iterator Position);
+
+  typedef StringRef::iterator (Scanner::*SkipWhileFunc)(StringRef::iterator);
+  /// @brief Skip minimal well-formed code unit subsequences until Func
+  ///        returns its input.
+  ///
+  /// @returns The code unit after the last minimal well-formed code unit
+  ///          subsequence that Func accepted.
+  StringRef::iterator skip_while( SkipWhileFunc Func
+                                , StringRef::iterator Position);
+
+  /// @brief Scan ns-uri-char[39]s starting at Cur.
+  ///
+  /// This updates Cur and Column while scanning.
+  ///
+  /// @returns A StringRef starting at Cur which covers the longest contiguous
+  ///          sequence of ns-uri-char.
+  StringRef scan_ns_uri_char();
+
+  /// @brief Scan ns-plain-one-line[133] starting at \a Cur.
+  StringRef scan_ns_plain_one_line();
+
+  /// @brief Consume a minimal well-formed code unit subsequence starting at
+  ///        \a Cur. Return false if it is not the same Unicode scalar value as
+  ///        \a Expected. This updates \a Column.
+  bool consume(uint32_t Expected);
+
+  /// @brief Skip \a Distance UTF-8 code units. Updates \a Cur and \a Column.
+  void skip(uint32_t Distance);
+
+  /// @brief Return true if the minimal well-formed code unit subsequence at
+  ///        Pos is whitespace or a new line
+  bool isBlankOrBreak(StringRef::iterator Position);
+
+  /// @brief If IsSimpleKeyAllowed, create and push_back a new SimpleKey.
+  void saveSimpleKeyCandidate( TokenQueueT::iterator Tok
+                             , unsigned AtColumn
+                             , bool IsRequired);
+
+  /// @brief Remove simple keys that can no longer be valid simple keys.
+  ///
+  /// Invalid simple keys are not on the current line or are further than 1024
+  /// columns back.
+  void removeStaleSimpleKeyCandidates();
+
+  /// @brief Remove all simple keys on FlowLevel \a Level.
+  void removeSimpleKeyCandidatesOnFlowLevel(unsigned Level);
+
+  /// @brief Unroll indentation in \a Indents back to \a Col. Creates BlockEnd
+  ///        tokens if needed.
+  bool unrollIndent(int ToColumn);
+
+  /// @brief Increase indent to \a Col. Creates \a Kind token at \a InsertPoint
+  ///        if needed.
+  bool rollIndent( int ToColumn
+                 , Token::TokenKind Kind
+                 , TokenQueueT::iterator InsertPoint);
+
+  /// @brief Skip whitespace and comments until the start of the next token.
+  void scanToNextToken();
+
+  /// @brief Must be the first token generated.
+  bool scanStreamStart();
+
+  /// @brief Generate tokens needed to close out the stream.
+  bool scanStreamEnd();
+
+  /// @brief Scan a %BLAH directive.
+  bool scanDirective();
+
+  /// @brief Scan a ... or ---.
+  bool scanDocumentIndicator(bool IsStart);
+
+  /// @brief Scan a [ or { and generate the proper flow collection start token.
+  bool scanFlowCollectionStart(bool IsSequence);
+
+  /// @brief Scan a ] or } and generate the proper flow collection end token.
+  bool scanFlowCollectionEnd(bool IsSequence);
+
+  /// @brief Scan the , that separates entries in a flow collection.
+  bool scanFlowEntry();
+
+  /// @brief Scan the - that starts block sequence entries.
+  bool scanBlockEntry();
+
+  /// @brief Scan an explicit ? indicating a key.
+  bool scanKey();
+
+  /// @brief Scan an explicit : indicating a value.
+  bool scanValue();
+
+  /// @brief Scan a quoted scalar.
+  bool scanFlowScalar(bool IsDoubleQuoted);
+
+  /// @brief Scan an unquoted scalar.
+  bool scanPlainScalar();
+
+  /// @brief Scan an Alias or Anchor starting with * or &.
+  bool scanAliasOrAnchor(bool IsAlias);
+
+  /// @brief Scan a block scalar starting with | or >.
+  bool scanBlockScalar(bool IsLiteral);
+
+  /// @brief Scan a tag of the form !stuff.
+  bool scanTag();
+
+  /// @brief Dispatch to the next scanning function based on \a *Cur.
+  bool fetchMoreTokens();
+
+  /// @brief The SourceMgr used for diagnostics and buffer management.
+  SourceMgr &SM;
+
+  /// @brief The original input.
+  MemoryBuffer *InputBuffer;
+
+  /// @brief The current position of the scanner.
+  StringRef::iterator Current;
+
+  /// @brief The end of the input (one past the last character).
+  StringRef::iterator End;
+
+  /// @brief Current YAML indentation level in spaces.
+  int Indent;
+
+  /// @brief Current column number in Unicode code points.
+  unsigned Column;
+
+  /// @brief Current line number.
+  unsigned Line;
+
+  /// @brief How deep we are in flow style containers. 0 Means at block level.
+  unsigned FlowLevel;
+
+  /// @brief Are we at the start of the stream?
+  bool IsStartOfStream;
+
+  /// @brief Can the next token be the start of a simple key?
+  bool IsSimpleKeyAllowed;
+
+  /// @brief Is the next token required to start a simple key?
+  bool IsSimpleKeyRequired;
+
+  /// @brief True if an error has occurred.
+  bool Failed;
+
+  /// @brief Queue of tokens. This is required to queue up tokens while looking
+  ///        for the end of a simple key. And for cases where a single character
+  ///        can produce multiple tokens (e.g. BlockEnd).
+  TokenQueueT TokenQueue;
+
+  /// @brief Indentation levels.
+  SmallVector<int, 4> Indents;
+
+  /// @brief Potential simple keys.
+  SmallVector<SimpleKey, 4> SimpleKeys;
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+/// encodeUTF8 - Encode \a UnicodeScalarValue in UTF-8 and append it to result.
+static void encodeUTF8( uint32_t UnicodeScalarValue
+                      , SmallVectorImpl<char> &Result) {
+  if (UnicodeScalarValue <= 0x7F) {
+    Result.push_back(UnicodeScalarValue & 0x7F);
+  } else if (UnicodeScalarValue <= 0x7FF) {
+    uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
+    uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
+    Result.push_back(FirstByte);
+    Result.push_back(SecondByte);
+  } else if (UnicodeScalarValue <= 0xFFFF) {
+    uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
+    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
+    uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
+    Result.push_back(FirstByte);
+    Result.push_back(SecondByte);
+    Result.push_back(ThirdByte);
+  } else if (UnicodeScalarValue <= 0x10FFFF) {
+    uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
+    uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
+    uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
+    uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
+    Result.push_back(FirstByte);
+    Result.push_back(SecondByte);
+    Result.push_back(ThirdByte);
+    Result.push_back(FourthByte);
+  }
+}
+
+bool yaml::dumpTokens(StringRef Input, raw_ostream &OS) {
+  SourceMgr SM;
+  Scanner scanner(Input, SM);
+  while (true) {
+    Token T = scanner.getNext();
+    switch (T.Kind) {
+    case Token::TK_StreamStart:
+      OS << "Stream-Start: ";
+      break;
+    case Token::TK_StreamEnd:
+      OS << "Stream-End: ";
+      break;
+    case Token::TK_VersionDirective:
+      OS << "Version-Directive: ";
+      break;
+    case Token::TK_TagDirective:
+      OS << "Tag-Directive: ";
+      break;
+    case Token::TK_DocumentStart:
+      OS << "Document-Start: ";
+      break;
+    case Token::TK_DocumentEnd:
+      OS << "Document-End: ";
+      break;
+    case Token::TK_BlockEntry:
+      OS << "Block-Entry: ";
+      break;
+    case Token::TK_BlockEnd:
+      OS << "Block-End: ";
+      break;
+    case Token::TK_BlockSequenceStart:
+      OS << "Block-Sequence-Start: ";
+      break;
+    case Token::TK_BlockMappingStart:
+      OS << "Block-Mapping-Start: ";
+      break;
+    case Token::TK_FlowEntry:
+      OS << "Flow-Entry: ";
+      break;
+    case Token::TK_FlowSequenceStart:
+      OS << "Flow-Sequence-Start: ";
+      break;
+    case Token::TK_FlowSequenceEnd:
+      OS << "Flow-Sequence-End: ";
+      break;
+    case Token::TK_FlowMappingStart:
+      OS << "Flow-Mapping-Start: ";
+      break;
+    case Token::TK_FlowMappingEnd:
+      OS << "Flow-Mapping-End: ";
+      break;
+    case Token::TK_Key:
+      OS << "Key: ";
+      break;
+    case Token::TK_Value:
+      OS << "Value: ";
+      break;
+    case Token::TK_Scalar:
+      OS << "Scalar: ";
+      break;
+    case Token::TK_Alias:
+      OS << "Alias: ";
+      break;
+    case Token::TK_Anchor:
+      OS << "Anchor: ";
+      break;
+    case Token::TK_Tag:
+      OS << "Tag: ";
+      break;
+    case Token::TK_Error:
+      break;
+    }
+    OS << T.Range << "\n";
+    if (T.Kind == Token::TK_StreamEnd)
+      break;
+    else if (T.Kind == Token::TK_Error)
+      return false;
+  }
+  return true;
+}
+
+bool yaml::scanTokens(StringRef Input) {
+  llvm::SourceMgr SM;
+  llvm::yaml::Scanner scanner(Input, SM);
+  for (;;) {
+    llvm::yaml::Token T = scanner.getNext();
+    if (T.Kind == Token::TK_StreamEnd)
+      break;
+    else if (T.Kind == Token::TK_Error)
+      return false;
+  }
+  return true;
+}
+
+std::string yaml::escape(StringRef Input) {
+  std::string EscapedInput;
+  for (StringRef::iterator i = Input.begin(), e = Input.end(); i != e; ++i) {
+    if (*i == '\\')
+      EscapedInput += "\\\\";
+    else if (*i == '"')
+      EscapedInput += "\\\"";
+    else if (*i == 0)
+      EscapedInput += "\\0";
+    else if (*i == 0x07)
+      EscapedInput += "\\a";
+    else if (*i == 0x08)
+      EscapedInput += "\\b";
+    else if (*i == 0x09)
+      EscapedInput += "\\t";
+    else if (*i == 0x0A)
+      EscapedInput += "\\n";
+    else if (*i == 0x0B)
+      EscapedInput += "\\v";
+    else if (*i == 0x0C)
+      EscapedInput += "\\f";
+    else if (*i == 0x0D)
+      EscapedInput += "\\r";
+    else if (*i == 0x1B)
+      EscapedInput += "\\e";
+    else if (*i >= 0 && *i < 0x20) { // Control characters not handled above.
+      std::string HexStr = utohexstr(*i);
+      EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
+    } else if (*i & 0x80) { // UTF-8 multiple code unit subsequence.
+      UTF8Decoded UnicodeScalarValue
+        = decodeUTF8(StringRef(i, Input.end() - i));
+      if (UnicodeScalarValue.second == 0) {
+        // Found invalid char.
+        SmallString<4> Val;
+        encodeUTF8(0xFFFD, Val);
+        EscapedInput.insert(EscapedInput.end(), Val.begin(), Val.end());
+        // FIXME: Error reporting.
+        return EscapedInput;
+      }
+      if (UnicodeScalarValue.first == 0x85)
+        EscapedInput += "\\N";
+      else if (UnicodeScalarValue.first == 0xA0)
+        EscapedInput += "\\_";
+      else if (UnicodeScalarValue.first == 0x2028)
+        EscapedInput += "\\L";
+      else if (UnicodeScalarValue.first == 0x2029)
+        EscapedInput += "\\P";
+      else {
+        std::string HexStr = utohexstr(UnicodeScalarValue.first);
+        if (HexStr.size() <= 2)
+          EscapedInput += "\\x" + std::string(2 - HexStr.size(), '0') + HexStr;
+        else if (HexStr.size() <= 4)
+          EscapedInput += "\\u" + std::string(4 - HexStr.size(), '0') + HexStr;
+        else if (HexStr.size() <= 8)
+          EscapedInput += "\\U" + std::string(8 - HexStr.size(), '0') + HexStr;
+      }
+      i += UnicodeScalarValue.second - 1;
+    } else
+      EscapedInput.push_back(*i);
+  }
+  return EscapedInput;
+}
+
+Scanner::Scanner(StringRef Input, SourceMgr &sm)
+  : SM(sm)
+  , Indent(-1)
+  , Column(0)
+  , Line(0)
+  , FlowLevel(0)
+  , IsStartOfStream(true)
+  , IsSimpleKeyAllowed(true)
+  , IsSimpleKeyRequired(false)
+  , Failed(false) {
+  InputBuffer = MemoryBuffer::getMemBuffer(Input, "YAML");
+  SM.AddNewSourceBuffer(InputBuffer, SMLoc());
+  Current = InputBuffer->getBufferStart();
+  End = InputBuffer->getBufferEnd();
+}
+
+Token &Scanner::peekNext() {
+  // If the current token is a possible simple key, keep parsing until we
+  // can confirm.
+  bool NeedMore = false;
+  while (true) {
+    if (TokenQueue.empty() || NeedMore) {
+      if (!fetchMoreTokens()) {
+        TokenQueue.clear();
+        TokenQueue.push_back(Token());
+        return TokenQueue.front();
+      }
+    }
+    assert(!TokenQueue.empty() &&
+            "fetchMoreTokens lied about getting tokens!");
+
+    removeStaleSimpleKeyCandidates();
+    SimpleKey SK;
+    SK.Tok = TokenQueue.front();
+    if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
+        == SimpleKeys.end())
+      break;
+    else
+      NeedMore = true;
+  }
+  return TokenQueue.front();
+}
+
+Token Scanner::getNext() {
+  Token Ret = peekNext();
+  // TokenQueue can be empty if there was an error getting the next token.
+  if (!TokenQueue.empty())
+    TokenQueue.pop_front();
+
+  // There cannot be any referenced Token's if the TokenQueue is empty. So do a
+  // quick deallocation of them all.
+  if (TokenQueue.empty()) {
+    TokenQueue.Alloc.Reset();
+  }
+
+  return Ret;
+}
+
+StringRef::iterator Scanner::skip_nb_char(StringRef::iterator Position) {
+  // Check 7 bit c-printable - b-char.
+  if (   *Position == 0x09
+      || (*Position >= 0x20 && *Position <= 0x7E))
+    return Position + 1;
+
+  // Check for valid UTF-8.
+  if (uint8_t(*Position) & 0x80) {
+    UTF8Decoded u8d = decodeUTF8(Position);
+    if (   u8d.second != 0
+        && u8d.first != 0xFEFF
+        && ( u8d.first == 0x85
+          || ( u8d.first >= 0xA0
+            && u8d.first <= 0xD7FF)
+          || ( u8d.first >= 0xE000
+            && u8d.first <= 0xFFFD)
+          || ( u8d.first >= 0x10000
+            && u8d.first <= 0x10FFFF)))
+      return Position + u8d.second;
+  }
+  return Position;
+}
+
+StringRef::iterator Scanner::skip_b_break(StringRef::iterator Position) {
+  if (*Position == 0x0D) {
+    if (Position + 1 != End && *(Position + 1) == 0x0A)
+      return Position + 2;
+    return Position + 1;
+  }
+
+  if (*Position == 0x0A)
+    return Position + 1;
+  return Position;
+}
+
+
+StringRef::iterator Scanner::skip_s_white(StringRef::iterator Position) {
+  if (Position == End)
+    return Position;
+  if (*Position == ' ' || *Position == '\t')
+    return Position + 1;
+  return Position;
+}
+
+StringRef::iterator Scanner::skip_ns_char(StringRef::iterator Position) {
+  if (Position == End)
+    return Position;
+  if (*Position == ' ' || *Position == '\t')
+    return Position;
+  return skip_nb_char(Position);
+}
+
+StringRef::iterator Scanner::skip_while( SkipWhileFunc Func
+                                       , StringRef::iterator Position) {
+  while (true) {
+    StringRef::iterator i = (this->*Func)(Position);
+    if (i == Position)
+      break;
+    Position = i;
+  }
+  return Position;
+}
+
+static bool is_ns_hex_digit(const char C) {
+  return    (C >= '0' && C <= '9')
+         || (C >= 'a' && C <= 'z')
+         || (C >= 'A' && C <= 'Z');
+}
+
+static bool is_ns_word_char(const char C) {
+  return    C == '-'
+         || (C >= 'a' && C <= 'z')
+         || (C >= 'A' && C <= 'Z');
+}
+
+StringRef Scanner::scan_ns_uri_char() {
+  StringRef::iterator Start = Current;
+  while (true) {
+    if (Current == End)
+      break;
+    if ((   *Current == '%'
+          && Current + 2 < End
+          && is_ns_hex_digit(*(Current + 1))
+          && is_ns_hex_digit(*(Current + 2)))
+        || is_ns_word_char(*Current)
+        || StringRef(Current, 1).find_first_of("#;/?:@&=+$,_.!~*'()[]")
+          != StringRef::npos) {
+      ++Current;
+      ++Column;
+    } else
+      break;
+  }
+  return StringRef(Start, Current - Start);
+}
+
+StringRef Scanner::scan_ns_plain_one_line() {
+  StringRef::iterator start = Current;
+  // The first character must already be verified.
+  ++Current;
+  while (true) {
+    if (Current == End) {
+      break;
+    } else if (*Current == ':') {
+      // Check if the next character is a ns-char.
+      if (Current + 1 == End)
+        break;
+      StringRef::iterator i = skip_ns_char(Current + 1);
+      if (Current + 1 != i) {
+        Current = i;
+        Column += 2; // Consume both the ':' and ns-char.
+      } else
+        break;
+    } else if (*Current == '#') {
+      // Check if the previous character was a ns-char.
+      // The & 0x80 check is to check for the trailing byte of a utf-8
+      if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) {
+        ++Current;
+        ++Column;
+      } else
+        break;
+    } else {
+      StringRef::iterator i = skip_nb_char(Current);
+      if (i == Current)
+        break;
+      Current = i;
+      ++Column;
+    }
+  }
+  return StringRef(start, Current - start);
+}
+
+bool Scanner::consume(uint32_t Expected) {
+  if (Expected >= 0x80)
+    report_fatal_error("Not dealing with this yet");
+  if (Current == End)
+    return false;
+  if (uint8_t(*Current) >= 0x80)
+    report_fatal_error("Not dealing with this yet");
+  if (uint8_t(*Current) == Expected) {
+    ++Current;
+    ++Column;
+    return true;
+  }
+  return false;
+}
+
+void Scanner::skip(uint32_t Distance) {
+  Current += Distance;
+  Column += Distance;
+}
+
+bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
+  if (Position == End)
+    return false;
+  if (   *Position == ' ' || *Position == '\t'
+      || *Position == '\r' || *Position == '\n')
+    return true;
+  return false;
+}
+
+void Scanner::saveSimpleKeyCandidate( TokenQueueT::iterator Tok
+                                    , unsigned AtColumn
+                                    , bool IsRequired) {
+  if (IsSimpleKeyAllowed) {
+    SimpleKey SK;
+    SK.Tok = Tok;
+    SK.Line = Line;
+    SK.Column = AtColumn;
+    SK.IsRequired = IsRequired;
+    SK.FlowLevel = FlowLevel;
+    SimpleKeys.push_back(SK);
+  }
+}
+
+void Scanner::removeStaleSimpleKeyCandidates() {
+  for (SmallVectorImpl<SimpleKey>::iterator i = SimpleKeys.begin();
+                                            i != SimpleKeys.end();) {
+    if (i->Line != Line || i->Column + 1024 < Column) {
+      if (i->IsRequired)
+        setError( "Could not find expected : for simple key"
+                , i->Tok->Range.begin());
+      i = SimpleKeys.erase(i);
+    } else
+      ++i;
+  }
+}
+
+void Scanner::removeSimpleKeyCandidatesOnFlowLevel(unsigned Level) {
+  if (!SimpleKeys.empty() && (SimpleKeys.end() - 1)->FlowLevel == Level)
+    SimpleKeys.pop_back();
+}
+
+bool Scanner::unrollIndent(int ToColumn) {
+  Token T;
+  // Indentation is ignored in flow.
+  if (FlowLevel != 0)
+    return true;
+
+  while (Indent > ToColumn) {
+    T.Kind = Token::TK_BlockEnd;
+    T.Range = StringRef(Current, 1);
+    TokenQueue.push_back(T);
+    Indent = Indents.pop_back_val();
+  }
+
+  return true;
+}
+
+bool Scanner::rollIndent( int ToColumn
+                        , Token::TokenKind Kind
+                        , TokenQueueT::iterator InsertPoint) {
+  if (FlowLevel)
+    return true;
+  if (Indent < ToColumn) {
+    Indents.push_back(Indent);
+    Indent = ToColumn;
+
+    Token T;
+    T.Kind = Kind;
+    T.Range = StringRef(Current, 0);
+    TokenQueue.insert(InsertPoint, T);
+  }
+  return true;
+}
+
+void Scanner::scanToNextToken() {
+  while (true) {
+    while (*Current == ' ' || *Current == '\t') {
+      skip(1);
+    }
+
+    // Skip comment.
+    if (*Current == '#') {
+      while (true) {
+        // This may skip more than one byte, thus Column is only incremented
+        // for code points.
+        StringRef::iterator i = skip_nb_char(Current);
+        if (i == Current)
+          break;
+        Current = i;
+        ++Column;
+      }
+    }
+
+    // Skip EOL.
+    StringRef::iterator i = skip_b_break(Current);
+    if (i == Current)
+      break;
+    Current = i;
+    ++Line;
+    Column = 0;
+    // New lines may start a simple key.
+    if (!FlowLevel)
+      IsSimpleKeyAllowed = true;
+  }
+}
+
+bool Scanner::scanStreamStart() {
+  IsStartOfStream = false;
+
+  EncodingInfo EI = getUnicodeEncoding(currentInput());
+
+  Token T;
+  T.Kind = Token::TK_StreamStart;
+  T.Range = StringRef(Current, EI.second);
+  TokenQueue.push_back(T);
+  Current += EI.second;
+  return true;
+}
+
+bool Scanner::scanStreamEnd() {
+  // Force an ending new line if one isn't present.
+  if (Column != 0) {
+    Column = 0;
+    ++Line;
+  }
+
+  unrollIndent(-1);
+  SimpleKeys.clear();
+  IsSimpleKeyAllowed = false;
+
+  Token T;
+  T.Kind = Token::TK_StreamEnd;
+  T.Range = StringRef(Current, 0);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanDirective() {
+  // Reset the indentation level.
+  unrollIndent(-1);
+  SimpleKeys.clear();
+  IsSimpleKeyAllowed = false;
+
+  StringRef::iterator Start = Current;
+  consume('%');
+  StringRef::iterator NameStart = Current;
+  Current = skip_while(&Scanner::skip_ns_char, Current);
+  StringRef Name(NameStart, Current - NameStart);
+  Current = skip_while(&Scanner::skip_s_white, Current);
+
+  if (Name == "YAML") {
+    Current = skip_while(&Scanner::skip_ns_char, Current);
+    Token T;
+    T.Kind = Token::TK_VersionDirective;
+    T.Range = StringRef(Start, Current - Start);
+    TokenQueue.push_back(T);
+    return true;
+  }
+  return false;
+}
+
+bool Scanner::scanDocumentIndicator(bool IsStart) {
+  unrollIndent(-1);
+  SimpleKeys.clear();
+  IsSimpleKeyAllowed = false;
+
+  Token T;
+  T.Kind = IsStart ? Token::TK_DocumentStart : Token::TK_DocumentEnd;
+  T.Range = StringRef(Current, 3);
+  skip(3);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanFlowCollectionStart(bool IsSequence) {
+  Token T;
+  T.Kind = IsSequence ? Token::TK_FlowSequenceStart
+                      : Token::TK_FlowMappingStart;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+
+  // [ and { may begin a simple key.
+  saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false);
+
+  // And may also be followed by a simple key.
+  IsSimpleKeyAllowed = true;
+  ++FlowLevel;
+  return true;
+}
+
+bool Scanner::scanFlowCollectionEnd(bool IsSequence) {
+  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+  IsSimpleKeyAllowed = false;
+  Token T;
+  T.Kind = IsSequence ? Token::TK_FlowSequenceEnd
+                      : Token::TK_FlowMappingEnd;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+  if (FlowLevel)
+    --FlowLevel;
+  return true;
+}
+
+bool Scanner::scanFlowEntry() {
+  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+  IsSimpleKeyAllowed = true;
+  Token T;
+  T.Kind = Token::TK_FlowEntry;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanBlockEntry() {
+  rollIndent(Column, Token::TK_BlockSequenceStart, TokenQueue.end());
+  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+  IsSimpleKeyAllowed = true;
+  Token T;
+  T.Kind = Token::TK_BlockEntry;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanKey() {
+  if (!FlowLevel)
+    rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
+
+  removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
+  IsSimpleKeyAllowed = !FlowLevel;
+
+  Token T;
+  T.Kind = Token::TK_Key;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanValue() {
+  // If the previous token could have been a simple key, insert the key token
+  // into the token queue.
+  if (!SimpleKeys.empty()) {
+    SimpleKey SK = SimpleKeys.pop_back_val();
+    Token T;
+    T.Kind = Token::TK_Key;
+    T.Range = SK.Tok->Range;
+    TokenQueueT::iterator i, e;
+    for (i = TokenQueue.begin(), e = TokenQueue.end(); i != e; ++i) {
+      if (i == SK.Tok)
+        break;
+    }
+    assert(i != e && "SimpleKey not in token queue!");
+    i = TokenQueue.insert(i, T);
+
+    // We may also need to add a Block-Mapping-Start token.
+    rollIndent(SK.Column, Token::TK_BlockMappingStart, i);
+
+    IsSimpleKeyAllowed = false;
+  } else {
+    if (!FlowLevel)
+      rollIndent(Column, Token::TK_BlockMappingStart, TokenQueue.end());
+    IsSimpleKeyAllowed = !FlowLevel;
+  }
+
+  Token T;
+  T.Kind = Token::TK_Value;
+  T.Range = StringRef(Current, 1);
+  skip(1);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+// Forbidding inlining improves performance by roughly 20%.
+// FIXME: Remove once llvm optimizes this to the faster version without hints.
+LLVM_ATTRIBUTE_NOINLINE static bool
+wasEscaped(StringRef::iterator First, StringRef::iterator Position);
+
+// Returns whether a character at 'Position' was escaped with a leading '\'.
+// 'First' specifies the position of the first character in the string.
+static bool wasEscaped(StringRef::iterator First,
+                       StringRef::iterator Position) {
+  assert(Position - 1 >= First);
+  StringRef::iterator I = Position - 1;
+  // We calculate the number of consecutive '\'s before the current position
+  // by iterating backwards through our string.
+  while (I >= First && *I == '\\') --I;
+  // (Position - 1 - I) now contains the number of '\'s before the current
+  // position. If it is odd, the character at 'Position' was escaped.
+  return (Position - 1 - I) % 2 == 1;
+}
+
+bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
+  StringRef::iterator Start = Current;
+  unsigned ColStart = Column;
+  if (IsDoubleQuoted) {
+    do {
+      ++Current;
+      while (Current != End && *Current != '"')
+        ++Current;
+      // Repeat until the previous character was not a '\' or was an escaped
+      // backslash.
+    } while (*(Current - 1) == '\\' && wasEscaped(Start + 1, Current));
+  } else {
+    skip(1);
+    while (true) {
+      // Skip a ' followed by another '.
+      if (Current + 1 < End && *Current == '\'' && *(Current + 1) == '\'') {
+        skip(2);
+        continue;
+      } else if (*Current == '\'')
+        break;
+      StringRef::iterator i = skip_nb_char(Current);
+      if (i == Current) {
+        i = skip_b_break(Current);
+        if (i == Current)
+          break;
+        Current = i;
+        Column = 0;
+        ++Line;
+      } else {
+        if (i == End)
+          break;
+        Current = i;
+        ++Column;
+      }
+    }
+  }
+  skip(1); // Skip ending quote.
+  Token T;
+  T.Kind = Token::TK_Scalar;
+  T.Range = StringRef(Start, Current - Start);
+  TokenQueue.push_back(T);
+
+  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+  IsSimpleKeyAllowed = false;
+
+  return true;
+}
+
+bool Scanner::scanPlainScalar() {
+  StringRef::iterator Start = Current;
+  unsigned ColStart = Column;
+  unsigned LeadingBlanks = 0;
+  assert(Indent >= -1 && "Indent must be >= -1 !");
+  unsigned indent = static_cast<unsigned>(Indent + 1);
+  while (true) {
+    if (*Current == '#')
+      break;
+
+    while (!isBlankOrBreak(Current)) {
+      if (  FlowLevel && *Current == ':'
+          && !(isBlankOrBreak(Current + 1) || *(Current + 1) == ',')) {
+        setError("Found unexpected ':' while scanning a plain scalar", Current);
+        return false;
+      }
+
+      // Check for the end of the plain scalar.
+      if (  (*Current == ':' && isBlankOrBreak(Current + 1))
+          || (  FlowLevel
+          && (StringRef(Current, 1).find_first_of(",:?[]{}")
+              != StringRef::npos)))
+        break;
+
+      StringRef::iterator i = skip_nb_char(Current);
+      if (i == Current)
+        break;
+      Current = i;
+      ++Column;
+    }
+
+    // Are we at the end?
+    if (!isBlankOrBreak(Current))
+      break;
+
+    // Eat blanks.
+    StringRef::iterator Tmp = Current;
+    while (isBlankOrBreak(Tmp)) {
+      StringRef::iterator i = skip_s_white(Tmp);
+      if (i != Tmp) {
+        if (LeadingBlanks && (Column < indent) && *Tmp == '\t') {
+          setError("Found invalid tab character in indentation", Tmp);
+          return false;
+        }
+        Tmp = i;
+        ++Column;
+      } else {
+        i = skip_b_break(Tmp);
+        if (!LeadingBlanks)
+          LeadingBlanks = 1;
+        Tmp = i;
+        Column = 0;
+        ++Line;
+      }
+    }
+
+    if (!FlowLevel && Column < indent)
+      break;
+
+    Current = Tmp;
+  }
+  if (Start == Current) {
+    setError("Got empty plain scalar", Start);
+    return false;
+  }
+  Token T;
+  T.Kind = Token::TK_Scalar;
+  T.Range = StringRef(Start, Current - Start);
+  TokenQueue.push_back(T);
+
+  // Plain scalars can be simple keys.
+  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+  IsSimpleKeyAllowed = false;
+
+  return true;
+}
+
+bool Scanner::scanAliasOrAnchor(bool IsAlias) {
+  StringRef::iterator Start = Current;
+  unsigned ColStart = Column;
+  skip(1);
+  while(true) {
+    if (   *Current == '[' || *Current == ']'
+        || *Current == '{' || *Current == '}'
+        || *Current == ','
+        || *Current == ':')
+      break;
+    StringRef::iterator i = skip_ns_char(Current);
+    if (i == Current)
+      break;
+    Current = i;
+    ++Column;
+  }
+
+  if (Start == Current) {
+    setError("Got empty alias or anchor", Start);
+    return false;
+  }
+
+  Token T;
+  T.Kind = IsAlias ? Token::TK_Alias : Token::TK_Anchor;
+  T.Range = StringRef(Start, Current - Start);
+  TokenQueue.push_back(T);
+
+  // Alias and anchors can be simple keys.
+  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+  IsSimpleKeyAllowed = false;
+
+  return true;
+}
+
+bool Scanner::scanBlockScalar(bool IsLiteral) {
+  StringRef::iterator Start = Current;
+  skip(1); // Eat | or >
+  while(true) {
+    StringRef::iterator i = skip_nb_char(Current);
+    if (i == Current) {
+      if (Column == 0)
+        break;
+      i = skip_b_break(Current);
+      if (i != Current) {
+        // We got a line break.
+        Column = 0;
+        ++Line;
+        Current = i;
+        continue;
+      } else {
+        // There was an error, which should already have been printed out.
+        return false;
+      }
+    }
+    Current = i;
+    ++Column;
+  }
+
+  if (Start == Current) {
+    setError("Got empty block scalar", Start);
+    return false;
+  }
+
+  Token T;
+  T.Kind = Token::TK_Scalar;
+  T.Range = StringRef(Start, Current - Start);
+  TokenQueue.push_back(T);
+  return true;
+}
+
+bool Scanner::scanTag() {
+  StringRef::iterator Start = Current;
+  unsigned ColStart = Column;
+  skip(1); // Eat !.
+  if (Current == End || isBlankOrBreak(Current)); // An empty tag.
+  else if (*Current == '<') {
+    skip(1);
+    scan_ns_uri_char();
+    if (!consume('>'))
+      return false;
+  } else {
+    // FIXME: Actually parse the c-ns-shorthand-tag rule.
+    Current = skip_while(&Scanner::skip_ns_char, Current);
+  }
+
+  Token T;
+  T.Kind = Token::TK_Tag;
+  T.Range = StringRef(Start, Current - Start);
+  TokenQueue.push_back(T);
+
+  // Tags can be simple keys.
+  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+
+  IsSimpleKeyAllowed = false;
+
+  return true;
+}
+
+bool Scanner::fetchMoreTokens() {
+  if (IsStartOfStream)
+    return scanStreamStart();
+
+  scanToNextToken();
+
+  if (Current == End)
+    return scanStreamEnd();
+
+  removeStaleSimpleKeyCandidates();
+
+  unrollIndent(Column);
+
+  if (Column == 0 && *Current == '%')
+    return scanDirective();
+
+  if (Column == 0 && Current + 4 <= End
+      && *Current == '-'
+      && *(Current + 1) == '-'
+      && *(Current + 2) == '-'
+      && (Current + 3 == End || isBlankOrBreak(Current + 3)))
+    return scanDocumentIndicator(true);
+
+  if (Column == 0 && Current + 4 <= End
+      && *Current == '.'
+      && *(Current + 1) == '.'
+      && *(Current + 2) == '.'
+      && (Current + 3 == End || isBlankOrBreak(Current + 3)))
+    return scanDocumentIndicator(false);
+
+  if (*Current == '[')
+    return scanFlowCollectionStart(true);
+
+  if (*Current == '{')
+    return scanFlowCollectionStart(false);
+
+  if (*Current == ']')
+    return scanFlowCollectionEnd(true);
+
+  if (*Current == '}')
+    return scanFlowCollectionEnd(false);
+
+  if (*Current == ',')
+    return scanFlowEntry();
+
+  if (*Current == '-' && isBlankOrBreak(Current + 1))
+    return scanBlockEntry();
+
+  if (*Current == '?' && (FlowLevel || isBlankOrBreak(Current + 1)))
+    return scanKey();
+
+  if (*Current == ':' && (FlowLevel || isBlankOrBreak(Current + 1)))
+    return scanValue();
+
+  if (*Current == '*')
+    return scanAliasOrAnchor(true);
+
+  if (*Current == '&')
+    return scanAliasOrAnchor(false);
+
+  if (*Current == '!')
+    return scanTag();
+
+  if (*Current == '|' && !FlowLevel)
+    return scanBlockScalar(true);
+
+  if (*Current == '>' && !FlowLevel)
+    return scanBlockScalar(false);
+
+  if (*Current == '\'')
+    return scanFlowScalar(false);
+
+  if (*Current == '"')
+    return scanFlowScalar(true);
+
+  // Get a plain scalar.
+  StringRef FirstChar(Current, 1);
+  if (!(isBlankOrBreak(Current)
+        || FirstChar.find_first_of("-?:,[]{}#&*!|>'\"%@`") != StringRef::npos)
+      || (*Current == '-' && !isBlankOrBreak(Current + 1))
+      || (!FlowLevel && (*Current == '?' || *Current == ':')
+          && isBlankOrBreak(Current + 1))
+      || (!FlowLevel && *Current == ':'
+                      && Current + 2 < End
+                      && *(Current + 1) == ':'
+                      && !isBlankOrBreak(Current + 2)))
+    return scanPlainScalar();
+
+  setError("Unrecognized character while tokenizing.");
+  return false;
+}
+
+Stream::Stream(StringRef Input, SourceMgr &SM)
+  : scanner(new Scanner(Input, SM))
+  , CurrentDoc(0) {}
+
+Stream::~Stream() {}
+
+bool Stream::failed() { return scanner->failed(); }
+
+void Stream::printError(Node *N, const Twine &Msg) {
+  SmallVector<SMRange, 1> Ranges;
+  Ranges.push_back(N->getSourceRange());
+  scanner->printError( N->getSourceRange().Start
+                     , SourceMgr::DK_Error
+                     , Msg
+                     , Ranges);
+}
+
+void Stream::handleYAMLDirective(const Token &t) {
+  // TODO: Ensure version is 1.x.
+}
+
+document_iterator Stream::begin() {
+  if (CurrentDoc)
+    report_fatal_error("Can only iterate over the stream once");
+
+  // Skip Stream-Start.
+  scanner->getNext();
+
+  CurrentDoc.reset(new Document(*this));
+  return document_iterator(CurrentDoc);
+}
+
+document_iterator Stream::end() {
+  return document_iterator();
+}
+
+void Stream::skip() {
+  for (document_iterator i = begin(), e = end(); i != e; ++i)
+    i->skip();
+}
+
+Node::Node(unsigned int Type, OwningPtr<Document> &D, StringRef A)
+  : Doc(D)
+  , TypeID(Type)
+  , Anchor(A) {
+  SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin());
+  SourceRange = SMRange(Start, Start);
+}
+
+Token &Node::peekNext() {
+  return Doc->peekNext();
+}
+
+Token Node::getNext() {
+  return Doc->getNext();
+}
+
+Node *Node::parseBlockNode() {
+  return Doc->parseBlockNode();
+}
+
+BumpPtrAllocator &Node::getAllocator() {
+  return Doc->NodeAllocator;
+}
+
+void Node::setError(const Twine &Msg, Token &Tok) const {
+  Doc->setError(Msg, Tok);
+}
+
+bool Node::failed() const {
+  return Doc->failed();
+}
+
+
+
+StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
+  // TODO: Handle newlines properly. We need to remove leading whitespace.
+  if (Value[0] == '"') { // Double quoted.
+    // Pull off the leading and trailing "s.
+    StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
+    // Search for characters that would require unescaping the value.
+    StringRef::size_type i = UnquotedValue.find_first_of("\\\r\n");
+    if (i != StringRef::npos)
+      return unescapeDoubleQuoted(UnquotedValue, i, Storage);
+    return UnquotedValue;
+  } else if (Value[0] == '\'') { // Single quoted.
+    // Pull off the leading and trailing 's.
+    StringRef UnquotedValue = Value.substr(1, Value.size() - 2);
+    StringRef::size_type i = UnquotedValue.find('\'');
+    if (i != StringRef::npos) {
+      // We're going to need Storage.
+      Storage.clear();
+      Storage.reserve(UnquotedValue.size());
+      for (; i != StringRef::npos; i = UnquotedValue.find('\'')) {
+        StringRef Valid(UnquotedValue.begin(), i);
+        Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+        Storage.push_back('\'');
+        UnquotedValue = UnquotedValue.substr(i + 2);
+      }
+      Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+      return StringRef(Storage.begin(), Storage.size());
+    }
+    return UnquotedValue;
+  }
+  // Plain or block.
+  size_t trimtrail = Value.rfind(' ');
+  return Value.drop_back(
+    trimtrail == StringRef::npos ? 0 : Value.size() - trimtrail);
+}
+
+StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
+                                          , StringRef::size_type i
+                                          , SmallVectorImpl<char> &Storage)
+                                          const {
+  // Use Storage to build proper value.
+  Storage.clear();
+  Storage.reserve(UnquotedValue.size());
+  for (; i != StringRef::npos; i = UnquotedValue.find_first_of("\\\r\n")) {
+    // Insert all previous chars into Storage.
+    StringRef Valid(UnquotedValue.begin(), i);
+    Storage.insert(Storage.end(), Valid.begin(), Valid.end());
+    // Chop off inserted chars.
+    UnquotedValue = UnquotedValue.substr(i);
+
+    assert(!UnquotedValue.empty() && "Can't be empty!");
+
+    // Parse escape or line break.
+    switch (UnquotedValue[0]) {
+    case '\r':
+    case '\n':
+      Storage.push_back('\n');
+      if (   UnquotedValue.size() > 1
+          && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
+        UnquotedValue = UnquotedValue.substr(1);
+      UnquotedValue = UnquotedValue.substr(1);
+      break;
+    default:
+      if (UnquotedValue.size() == 1)
+        // TODO: Report error.
+        break;
+      UnquotedValue = UnquotedValue.substr(1);
+      switch (UnquotedValue[0]) {
+      default: {
+          Token T;
+          T.Range = StringRef(UnquotedValue.begin(), 1);
+          setError("Unrecognized escape code!", T);
+          return "";
+        }
+      case '\r':
+      case '\n':
+        // Remove the new line.
+        if (   UnquotedValue.size() > 1
+            && (UnquotedValue[1] == '\r' || UnquotedValue[1] == '\n'))
+          UnquotedValue = UnquotedValue.substr(1);
+        // If this was just a single byte newline, it will get skipped
+        // below.
+        break;
+      case '0':
+        Storage.push_back(0x00);
+        break;
+      case 'a':
+        Storage.push_back(0x07);
+        break;
+      case 'b':
+        Storage.push_back(0x08);
+        break;
+      case 't':
+      case 0x09:
+        Storage.push_back(0x09);
+        break;
+      case 'n':
+        Storage.push_back(0x0A);
+        break;
+      case 'v':
+        Storage.push_back(0x0B);
+        break;
+      case 'f':
+        Storage.push_back(0x0C);
+        break;
+      case 'r':
+        Storage.push_back(0x0D);
+        break;
+      case 'e':
+        Storage.push_back(0x1B);
+        break;
+      case ' ':
+        Storage.push_back(0x20);
+        break;
+      case '"':
+        Storage.push_back(0x22);
+        break;
+      case '/':
+        Storage.push_back(0x2F);
+        break;
+      case '\\':
+        Storage.push_back(0x5C);
+        break;
+      case 'N':
+        encodeUTF8(0x85, Storage);
+        break;
+      case '_':
+        encodeUTF8(0xA0, Storage);
+        break;
+      case 'L':
+        encodeUTF8(0x2028, Storage);
+        break;
+      case 'P':
+        encodeUTF8(0x2029, Storage);
+        break;
+      case 'x': {
+          if (UnquotedValue.size() < 3)
+            // TODO: Report error.
+            break;
+          unsigned int UnicodeScalarValue;
+          UnquotedValue.substr(1, 2).getAsInteger(16, UnicodeScalarValue);
+          encodeUTF8(UnicodeScalarValue, Storage);
+          UnquotedValue = UnquotedValue.substr(2);
+          break;
+        }
+      case 'u': {
+          if (UnquotedValue.size() < 5)
+            // TODO: Report error.
+            break;
+          unsigned int UnicodeScalarValue;
+          UnquotedValue.substr(1, 4).getAsInteger(16, UnicodeScalarValue);
+          encodeUTF8(UnicodeScalarValue, Storage);
+          UnquotedValue = UnquotedValue.substr(4);
+          break;
+        }
+      case 'U': {
+          if (UnquotedValue.size() < 9)
+            // TODO: Report error.
+            break;
+          unsigned int UnicodeScalarValue;
+          UnquotedValue.substr(1, 8).getAsInteger(16, UnicodeScalarValue);
+          encodeUTF8(UnicodeScalarValue, Storage);
+          UnquotedValue = UnquotedValue.substr(8);
+          break;
+        }
+      }
+      UnquotedValue = UnquotedValue.substr(1);
+    }
+  }
+  Storage.insert(Storage.end(), UnquotedValue.begin(), UnquotedValue.end());
+  return StringRef(Storage.begin(), Storage.size());
+}
+
+Node *KeyValueNode::getKey() {
+  if (Key)
+    return Key;
+  // Handle implicit null keys.
+  {
+    Token &t = peekNext();
+    if (   t.Kind == Token::TK_BlockEnd
+        || t.Kind == Token::TK_Value
+        || t.Kind == Token::TK_Error) {
+      return Key = new (getAllocator()) NullNode(Doc);
+    }
+    if (t.Kind == Token::TK_Key)
+      getNext(); // skip TK_Key.
+  }
+
+  // Handle explicit null keys.
+  Token &t = peekNext();
+  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Value) {
+    return Key = new (getAllocator()) NullNode(Doc);
+  }
+
+  // We've got a normal key.
+  return Key = parseBlockNode();
+}
+
+Node *KeyValueNode::getValue() {
+  if (Value)
+    return Value;
+  getKey()->skip();
+  if (failed())
+    return Value = new (getAllocator()) NullNode(Doc);
+
+  // Handle implicit null values.
+  {
+    Token &t = peekNext();
+    if (   t.Kind == Token::TK_BlockEnd
+        || t.Kind == Token::TK_FlowMappingEnd
+        || t.Kind == Token::TK_Key
+        || t.Kind == Token::TK_FlowEntry
+        || t.Kind == Token::TK_Error) {
+      return Value = new (getAllocator()) NullNode(Doc);
+    }
+
+    if (t.Kind != Token::TK_Value) {
+      setError("Unexpected token in Key Value.", t);
+      return Value = new (getAllocator()) NullNode(Doc);
+    }
+    getNext(); // skip TK_Value.
+  }
+
+  // Handle explicit null values.
+  Token &t = peekNext();
+  if (t.Kind == Token::TK_BlockEnd || t.Kind == Token::TK_Key) {
+    return Value = new (getAllocator()) NullNode(Doc);
+  }
+
+  // We got a normal value.
+  return Value = parseBlockNode();
+}
+
+void MappingNode::increment() {
+  if (failed()) {
+    IsAtEnd = true;
+    CurrentEntry = 0;
+    return;
+  }
+  if (CurrentEntry) {
+    CurrentEntry->skip();
+    if (Type == MT_Inline) {
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      return;
+    }
+  }
+  Token T = peekNext();
+  if (T.Kind == Token::TK_Key || T.Kind == Token::TK_Scalar) {
+    // KeyValueNode eats the TK_Key. That way it can detect null keys.
+    CurrentEntry = new (getAllocator()) KeyValueNode(Doc);
+  } else if (Type == MT_Block) {
+    switch (T.Kind) {
+    case Token::TK_BlockEnd:
+      getNext();
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      break;
+    default:
+      setError("Unexpected token. Expected Key or Block End", T);
+    case Token::TK_Error:
+      IsAtEnd = true;
+      CurrentEntry = 0;
+    }
+  } else {
+    switch (T.Kind) {
+    case Token::TK_FlowEntry:
+      // Eat the flow entry and recurse.
+      getNext();
+      return increment();
+    case Token::TK_FlowMappingEnd:
+      getNext();
+    case Token::TK_Error:
+      // Set this to end iterator.
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      break;
+    default:
+      setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
+                "Mapping End."
+              , T);
+      IsAtEnd = true;
+      CurrentEntry = 0;
+    }
+  }
+}
+
+void SequenceNode::increment() {
+  if (failed()) {
+    IsAtEnd = true;
+    CurrentEntry = 0;
+    return;
+  }
+  if (CurrentEntry)
+    CurrentEntry->skip();
+  Token T = peekNext();
+  if (SeqType == ST_Block) {
+    switch (T.Kind) {
+    case Token::TK_BlockEntry:
+      getNext();
+      CurrentEntry = parseBlockNode();
+      if (CurrentEntry == 0) { // An error occurred.
+        IsAtEnd = true;
+        CurrentEntry = 0;
+      }
+      break;
+    case Token::TK_BlockEnd:
+      getNext();
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      break;
+    default:
+      setError( "Unexpected token. Expected Block Entry or Block End."
+              , T);
+    case Token::TK_Error:
+      IsAtEnd = true;
+      CurrentEntry = 0;
+    }
+  } else if (SeqType == ST_Indentless) {
+    switch (T.Kind) {
+    case Token::TK_BlockEntry:
+      getNext();
+      CurrentEntry = parseBlockNode();
+      if (CurrentEntry == 0) { // An error occurred.
+        IsAtEnd = true;
+        CurrentEntry = 0;
+      }
+      break;
+    default:
+    case Token::TK_Error:
+      IsAtEnd = true;
+      CurrentEntry = 0;
+    }
+  } else if (SeqType == ST_Flow) {
+    switch (T.Kind) {
+    case Token::TK_FlowEntry:
+      // Eat the flow entry and recurse.
+      getNext();
+      WasPreviousTokenFlowEntry = true;
+      return increment();
+    case Token::TK_FlowSequenceEnd:
+      getNext();
+    case Token::TK_Error:
+      // Set this to end iterator.
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      break;
+    case Token::TK_StreamEnd:
+    case Token::TK_DocumentEnd:
+    case Token::TK_DocumentStart:
+      setError("Could not find closing ]!", T);
+      // Set this to end iterator.
+      IsAtEnd = true;
+      CurrentEntry = 0;
+      break;
+    default:
+      if (!WasPreviousTokenFlowEntry) {
+        setError("Expected , between entries!", T);
+        IsAtEnd = true;
+        CurrentEntry = 0;
+        break;
+      }
+      // Otherwise it must be a flow entry.
+      CurrentEntry = parseBlockNode();
+      if (!CurrentEntry) {
+        IsAtEnd = true;
+      }
+      WasPreviousTokenFlowEntry = false;
+      break;
+    }
+  }
+}
+
+Document::Document(Stream &S) : stream(S), Root(0) {
+  if (parseDirectives())
+    expectToken(Token::TK_DocumentStart);
+  Token &T = peekNext();
+  if (T.Kind == Token::TK_DocumentStart)
+    getNext();
+}
+
+bool Document::skip()  {
+  if (stream.scanner->failed())
+    return false;
+  if (!Root)
+    getRoot();
+  Root->skip();
+  Token &T = peekNext();
+  if (T.Kind == Token::TK_StreamEnd)
+    return false;
+  if (T.Kind == Token::TK_DocumentEnd) {
+    getNext();
+    return skip();
+  }
+  return true;
+}
+
+Token &Document::peekNext() {
+  return stream.scanner->peekNext();
+}
+
+Token Document::getNext() {
+  return stream.scanner->getNext();
+}
+
+void Document::setError(const Twine &Message, Token &Location) const {
+  stream.scanner->setError(Message, Location.Range.begin());
+}
+
+bool Document::failed() const {
+  return stream.scanner->failed();
+}
+
+Node *Document::parseBlockNode() {
+  Token T = peekNext();
+  // Handle properties.
+  Token AnchorInfo;
+parse_property:
+  switch (T.Kind) {
+  case Token::TK_Alias:
+    getNext();
+    return new (NodeAllocator) AliasNode(stream.CurrentDoc, T.Range.substr(1));
+  case Token::TK_Anchor:
+    if (AnchorInfo.Kind == Token::TK_Anchor) {
+      setError("Already encountered an anchor for this node!", T);
+      return 0;
+    }
+    AnchorInfo = getNext(); // Consume TK_Anchor.
+    T = peekNext();
+    goto parse_property;
+  case Token::TK_Tag:
+    getNext(); // Skip TK_Tag.
+    T = peekNext();
+    goto parse_property;
+  default:
+    break;
+  }
+
+  switch (T.Kind) {
+  case Token::TK_BlockEntry:
+    // We got an unindented BlockEntry sequence. This is not terminated with
+    // a BlockEnd.
+    // Don't eat the TK_BlockEntry, SequenceNode needs it.
+    return new (NodeAllocator) SequenceNode( stream.CurrentDoc
+                                           , AnchorInfo.Range.substr(1)
+                                           , SequenceNode::ST_Indentless);
+  case Token::TK_BlockSequenceStart:
+    getNext();
+    return new (NodeAllocator)
+      SequenceNode( stream.CurrentDoc
+                  , AnchorInfo.Range.substr(1)
+                  , SequenceNode::ST_Block);
+  case Token::TK_BlockMappingStart:
+    getNext();
+    return new (NodeAllocator)
+      MappingNode( stream.CurrentDoc
+                 , AnchorInfo.Range.substr(1)
+                 , MappingNode::MT_Block);
+  case Token::TK_FlowSequenceStart:
+    getNext();
+    return new (NodeAllocator)
+      SequenceNode( stream.CurrentDoc
+                  , AnchorInfo.Range.substr(1)
+                  , SequenceNode::ST_Flow);
+  case Token::TK_FlowMappingStart:
+    getNext();
+    return new (NodeAllocator)
+      MappingNode( stream.CurrentDoc
+                 , AnchorInfo.Range.substr(1)
+                 , MappingNode::MT_Flow);
+  case Token::TK_Scalar:
+    getNext();
+    return new (NodeAllocator)
+      ScalarNode( stream.CurrentDoc
+                , AnchorInfo.Range.substr(1)
+                , T.Range);
+  case Token::TK_Key:
+    // Don't eat the TK_Key, KeyValueNode expects it.
+    return new (NodeAllocator)
+      MappingNode( stream.CurrentDoc
+                 , AnchorInfo.Range.substr(1)
+                 , MappingNode::MT_Inline);
+  case Token::TK_DocumentStart:
+  case Token::TK_DocumentEnd:
+  case Token::TK_StreamEnd:
+  default:
+    // TODO: Properly handle tags. "[!!str ]" should resolve to !!str "", not
+    //       !!null null.
+    return new (NodeAllocator) NullNode(stream.CurrentDoc);
+  case Token::TK_Error:
+    return 0;
+  }
+  llvm_unreachable("Control flow shouldn't reach here.");
+  return 0;
+}
+
+bool Document::parseDirectives() {
+  bool isDirective = false;
+  while (true) {
+    Token T = peekNext();
+    if (T.Kind == Token::TK_TagDirective) {
+      handleTagDirective(getNext());
+      isDirective = true;
+    } else if (T.Kind == Token::TK_VersionDirective) {
+      stream.handleYAMLDirective(getNext());
+      isDirective = true;
+    } else
+      break;
+  }
+  return isDirective;
+}
+
+bool Document::expectToken(int TK) {
+  Token T = getNext();
+  if (T.Kind != TK) {
+    setError("Unexpected token", T);
+    return false;
+  }
+  return true;
+}
+
+OwningPtr<Document> document_iterator::NullDoc;
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 4927e9a7b9d4..72d3986f41dd 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/system_error.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cctype>
 #include <cerrno>
diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt
index 0db41346911d..82f72b03eefd 100644
--- a/lib/TableGen/CMakeLists.txt
+++ b/lib/TableGen/CMakeLists.txt
@@ -6,11 +6,8 @@ add_llvm_library(LLVMTableGen
   Error.cpp
   Main.cpp
   Record.cpp
+  TableGenAction.cpp
   TableGenBackend.cpp
   TGLexer.cpp
   TGParser.cpp
   )
-
-add_llvm_library_dependencies(LLVMTableGen
-  LLVMSupport
-  )
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index 5b2cbbfec4b5..5071ee77ac43 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -21,11 +21,11 @@ namespace llvm {
 SourceMgr SrcMgr;
 
 void PrintError(SMLoc ErrorLoc, const Twine &Msg) {
-  SrcMgr.PrintMessage(ErrorLoc, Msg, "error");
+  SrcMgr.PrintMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
 }
 
 void PrintError(const char *Loc, const Twine &Msg) {
-  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
+  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
 }
 
 void PrintError(const Twine &Msg) {
diff --git a/lib/TableGen/LLVMBuild.txt b/lib/TableGen/LLVMBuild.txt
new file mode 100644
index 000000000000..54cedfd5918b
--- /dev/null
+++ b/lib/TableGen/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/TableGen/LLVMBuild.txt -----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = TableGen
+parent = Libraries
+required_libraries = Support
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index b7c51cae953c..93eed24b8dc7 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
@@ -29,6 +30,8 @@ using namespace llvm;
 //    std::string wrapper for DenseMap purposes
 //===----------------------------------------------------------------------===//
 
+namespace llvm {
+
 /// TableGenStringKey - This is a wrapper for std::string suitable for
 /// using as a key to a DenseMap.  Because there isn't a particularly
 /// good way to indicate tombstone or empty keys for strings, we want
@@ -43,14 +46,16 @@ public:
   TableGenStringKey(const char *str) : data(str) {}
 
   const std::string &str() const { return data; }
-  
+
+  friend hash_code hash_value(const TableGenStringKey &Value) {
+    using llvm::hash_value;
+    return hash_value(Value.str());
+  }
 private:
   std::string data;
 };
 
 /// Specialize DenseMapInfo for TableGenStringKey.
-namespace llvm {
-
 template<> struct DenseMapInfo<TableGenStringKey> {
   static inline TableGenStringKey getEmptyKey() {
     TableGenStringKey Empty("<<<EMPTY KEY>>>");
@@ -61,7 +66,8 @@ template<> struct DenseMapInfo<TableGenStringKey> {
     return Tombstone;
   }
   static unsigned getHashValue(const TableGenStringKey& Val) {
-    return HashString(Val.str());
+    using llvm::hash_value;
+    return hash_value(Val);
   }
   static bool isEqual(const TableGenStringKey& LHS,
                       const TableGenStringKey& RHS) {
@@ -69,7 +75,7 @@ template<> struct DenseMapInfo<TableGenStringKey> {
   }
 };
 
-}
+} // namespace llvm
 
 //===----------------------------------------------------------------------===//
 //    Type implementations
@@ -78,9 +84,9 @@ template<> struct DenseMapInfo<TableGenStringKey> {
 BitRecTy BitRecTy::Shared;
 IntRecTy IntRecTy::Shared;
 StringRecTy StringRecTy::Shared;
-CodeRecTy CodeRecTy::Shared;
 DagRecTy DagRecTy::Shared;
 
+void RecTy::anchor() { }
 void RecTy::dump() const { print(errs()); }
 
 ListRecTy *RecTy::getListTy() {
@@ -315,12 +321,6 @@ Init *ListRecTy::convertValue(TypedInit *TI) {
   return 0;
 }
 
-Init *CodeRecTy::convertValue(TypedInit *TI) {
-  if (TI->getType()->typeIsConvertibleTo(this))
-    return TI;
-  return 0;
-}
-
 Init *DagRecTy::convertValue(TypedInit *TI) {
   if (TI->getType()->typeIsConvertibleTo(this))
     return TI;
@@ -444,13 +444,18 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
 //    Initializer implementations
 //===----------------------------------------------------------------------===//
 
+void Init::anchor() { }
 void Init::dump() const { return print(errs()); }
 
+void UnsetInit::anchor() { }
+
 UnsetInit *UnsetInit::get() {
   static UnsetInit TheInit;
   return &TheInit;
 }
 
+void BitInit::anchor() { }
+
 BitInit *BitInit::get(bool V) {
   static BitInit True(true);
   static BitInit False(false);
@@ -565,7 +570,9 @@ IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
   return BitsInit::get(NewBits);
 }
 
-StringInit *StringInit::get(const std::string &V) {
+void StringInit::anchor() { }
+
+StringInit *StringInit::get(StringRef V) {
   typedef StringMap<StringInit *> Pool;
   static Pool ThePool;
 
@@ -574,15 +581,6 @@ StringInit *StringInit::get(const std::string &V) {
   return I;
 }
 
-CodeInit *CodeInit::get(const std::string &V) {
-  typedef StringMap<CodeInit *> Pool;
-  static Pool ThePool;
-
-  CodeInit *&I = ThePool[V];
-  if (!I) I = new CodeInit(V);
-  return I;
-}
-
 static void ProfileListInit(FoldingSetNodeID &ID,
                             ArrayRef<Init *> Range,
                             RecTy *EltTy) {
@@ -735,7 +733,6 @@ UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) {
 
 Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   switch (getOpcode()) {
-  default: assert(0 && "Unknown unop");
   case CAST: {
     if (getType()->getAsString() == "string") {
       StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
@@ -747,6 +744,11 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
       if (LHSd) {
         return StringInit::get(LHSd->getDef()->getName());
       }
+
+      IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
+      if (LHSi) {
+        return StringInit::get(LHSi->getAsString());
+      }
     } else {
       StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
       if (LHSs) {
@@ -760,7 +762,9 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
             return VarInit::get(Name, RV->getType());
           }
 
-          std::string TemplateArgName = CurRec->getName()+":"+Name;
+          Init *TemplateArgName = QualifyName(*CurRec, CurMultiClass, Name,
+                                              ":");
+      
           if (CurRec->isTemplateArg(TemplateArgName)) {
             const RecordVal *RV = CurRec->getValue(TemplateArgName);
             assert(RV && "Template arg doesn't exist??");
@@ -773,7 +777,8 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
         }
 
         if (CurMultiClass) {
-          std::string MCName = CurMultiClass->Rec.getName()+"::"+Name;
+          Init *MCName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name, "::");
+
           if (CurMultiClass->Rec.isTemplateArg(MCName)) {
             const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
             assert(RV && "Template arg doesn't exist??");
@@ -885,7 +890,6 @@ BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs,
 
 Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   switch (getOpcode()) {
-  default: assert(0 && "Unknown binop");
   case CONCAT: {
     DagInit *LHSs = dynamic_cast<DagInit*>(LHS);
     DagInit *RHSs = dynamic_cast<DagInit*>(RHS);
@@ -944,7 +948,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
       int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue();
       int64_t Result;
       switch (getOpcode()) {
-      default: assert(0 && "Bad opcode!");
+      default: llvm_unreachable("Bad opcode!");
       case SHL: Result = LHSv << RHSv; break;
       case SRA: Result = LHSv >> RHSv; break;
       case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break;
@@ -1134,7 +1138,6 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
 
 Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   switch (getOpcode()) {
-  default: assert(0 && "Unknown binop");
   case SUBST: {
     DefInit *LHSd = dynamic_cast<DefInit*>(LHS);
     VarInit *LHSv = dynamic_cast<VarInit*>(LHS);
@@ -1298,7 +1301,12 @@ TypedInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
 
 
 VarInit *VarInit::get(const std::string &VN, RecTy *T) {
-  typedef std::pair<RecTy *, TableGenStringKey> Key;
+  Init *Value = StringInit::get(VN);
+  return VarInit::get(Value, T);
+}
+
+VarInit *VarInit::get(Init *VN, RecTy *T) {
+  typedef std::pair<RecTy *, Init *> Key;
   typedef DenseMap<Key, VarInit *> Pool;
   static Pool ThePool;
 
@@ -1309,12 +1317,19 @@ VarInit *VarInit::get(const std::string &VN, RecTy *T) {
   return I;
 }
 
+const std::string &VarInit::getName() const {
+  StringInit *NameString =
+    dynamic_cast<StringInit *>(getNameInit());
+  assert(NameString && "VarInit name is not a string!");
+  return NameString->getValue();
+}
+
 Init *VarInit::resolveBitReference(Record &R, const RecordVal *IRV,
                                    unsigned Bit) const {
-  if (R.isTemplateArg(getName())) return 0;
-  if (IRV && IRV->getName() != getName()) return 0;
+  if (R.isTemplateArg(getNameInit())) return 0;
+  if (IRV && IRV->getNameInit() != getNameInit()) return 0;
 
-  RecordVal *RV = R.getValue(getName());
+  RecordVal *RV = R.getValue(getNameInit());
   assert(RV && "Reference to a non-existent variable?");
   assert(dynamic_cast<BitsInit*>(RV->getValue()));
   BitsInit *BI = (BitsInit*)RV->getValue();
@@ -1333,10 +1348,10 @@ Init *VarInit::resolveBitReference(Record &R, const RecordVal *IRV,
 Init *VarInit::resolveListElementReference(Record &R,
                                            const RecordVal *IRV,
                                            unsigned Elt) const {
-  if (R.isTemplateArg(getName())) return 0;
-  if (IRV && IRV->getName() != getName()) return 0;
+  if (R.isTemplateArg(getNameInit())) return 0;
+  if (IRV && IRV->getNameInit() != getNameInit()) return 0;
 
-  RecordVal *RV = R.getValue(getName());
+  RecordVal *RV = R.getValue(getNameInit());
   assert(RV && "Reference to a non-existent variable?");
   ListInit *LI = dynamic_cast<ListInit*>(RV->getValue());
   if (!LI) {
@@ -1659,7 +1674,7 @@ void RecordVal::dump() const { errs() << *this; }
 
 void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
   if (getPrefix()) OS << "field ";
-  OS << *getType() << " " << getName();
+  OS << *getType() << " " << getNameInitAsString();
 
   if (getValue())
     OS << " = " << *getValue();
@@ -1669,13 +1684,22 @@ void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
 
 unsigned Record::LastID = 0;
 
+void Record::init() {
+  checkName();
+
+  // Every record potentially has a def at the top.  This value is
+  // replaced with the top-level def name at instantiation time.
+  RecordVal DN("NAME", StringRecTy::get(), 0);
+  addValue(DN);
+}
+
 void Record::checkName() {
   // Ensure the record name has string type.
   const TypedInit *TypedName = dynamic_cast<const TypedInit *>(Name);
   assert(TypedName && "Record name is not typed!");
   RecTy *Type = TypedName->getType();
   if (dynamic_cast<StringRecTy *>(Type) == 0) {
-    llvm_unreachable("Record name is not a string!");
+    throw "Record name is not a string!";
   }
 }
 
@@ -1695,20 +1719,13 @@ const std::string &Record::getName() const {
 void Record::setName(Init *NewName) {
   if (TrackedRecords.getDef(Name->getAsUnquotedString()) == this) {
     TrackedRecords.removeDef(Name->getAsUnquotedString());
-    Name = NewName;
     TrackedRecords.addDef(this);
-  } else {
+  } else if (TrackedRecords.getClass(Name->getAsUnquotedString()) == this) {
     TrackedRecords.removeClass(Name->getAsUnquotedString());
-    Name = NewName;
     TrackedRecords.addClass(this);
-  }
+  }  // Otherwise this isn't yet registered.
+  Name = NewName;
   checkName();
-  // Since the Init for the name was changed, see if we can resolve
-  // any of it using members of the Record.
-  Init *ComputedName = Name->resolveReferences(*this, 0);
-  if (ComputedName != Name) {
-    setName(ComputedName);
-  }
   // DO NOT resolve record values to the name at this point because
   // there might be default values for arguments of this def.  Those
   // arguments might not have been resolved yet so we don't want to
@@ -1731,17 +1748,25 @@ void Record::setName(const std::string &Name) {
 /// references.
 void Record::resolveReferencesTo(const RecordVal *RV) {
   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+    if (RV == &Values[i]) // Skip resolve the same field as the given one
+      continue;
     if (Init *V = Values[i].getValue())
       Values[i].setValue(V->resolveReferences(*this, RV));
   }
+  Init *OldName = getNameInit();
+  Init *NewName = Name->resolveReferences(*this, RV);
+  if (NewName != OldName) {
+    // Re-register with RecordKeeper.
+    setName(NewName);
+  }
 }
 
 void Record::dump() const { errs() << *this; }
 
 raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
-  OS << R.getName();
+  OS << R.getNameInitAsString();
 
-  const std::vector<std::string> &TArgs = R.getTemplateArgs();
+  const std::vector<Init *> &TArgs = R.getTemplateArgs();
   if (!TArgs.empty()) {
     OS << "<";
     for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
@@ -1758,7 +1783,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
   if (!SC.empty()) {
     OS << "\t//";
     for (unsigned i = 0, e = SC.size(); i != e; ++i)
-      OS << " " << SC[i]->getName();
+      OS << " " << SC[i]->getNameInitAsString();
   }
   OS << "\n";
 
@@ -1954,18 +1979,6 @@ DagInit *Record::getValueAsDag(StringRef FieldName) const {
         "' does not have a dag initializer!";
 }
 
-std::string Record::getValueAsCode(StringRef FieldName) const {
-  const RecordVal *R = getValue(FieldName);
-  if (R == 0 || R->getValue() == 0)
-    throw "Record `" + getName() + "' does not have a field named `" +
-      FieldName.str() + "'!\n";
-
-  if (CodeInit *CI = dynamic_cast<CodeInit*>(R->getValue()))
-    return CI->getValue();
-  throw "Record `" + getName() + "', field `" + FieldName.str() +
-    "' does not have a code initializer!";
-}
-
 
 void MultiClass::dump() const {
   errs() << "Record:\n";
@@ -2017,3 +2030,39 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
   return Defs;
 }
 
+/// QualifyName - Return an Init with a qualifier prefix referring
+/// to CurRec's name.
+Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
+                        Init *Name, const std::string &Scoper) {
+  RecTy *Type = dynamic_cast<TypedInit *>(Name)->getType();
+
+  BinOpInit *NewName =
+    BinOpInit::get(BinOpInit::STRCONCAT, 
+                      BinOpInit::get(BinOpInit::STRCONCAT,
+                                        CurRec.getNameInit(),
+                                        StringInit::get(Scoper),
+                                        Type)->Fold(&CurRec, CurMultiClass),
+                      Name,
+                      Type);
+
+  if (CurMultiClass && Scoper != "::") {
+    NewName =
+      BinOpInit::get(BinOpInit::STRCONCAT, 
+                        BinOpInit::get(BinOpInit::STRCONCAT,
+                                          CurMultiClass->Rec.getNameInit(),
+                                          StringInit::get("::"),
+                                          Type)->Fold(&CurRec, CurMultiClass),
+                        NewName->Fold(&CurRec, CurMultiClass),
+                        Type);
+  }
+
+  return NewName->Fold(&CurRec, CurMultiClass);
+}
+
+/// QualifyName - Return an Init with a qualifier prefix referring
+/// to CurRec's name.
+Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
+                        const std::string &Name,
+                        const std::string &Scoper) {
+  return QualifyName(CurRec, CurMultiClass, StringInit::get(Name), Scoper);
+}
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 8c1b4290548d..ff322e74fba2 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -15,7 +15,6 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Config/config.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include <cctype>
@@ -23,6 +22,9 @@
 #include <cstdlib>
 #include <cstring>
 #include <cerrno>
+
+#include "llvm/Config/config.h" // for strtoull()/strtoll() define
+
 using namespace llvm;
 
 TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
@@ -80,6 +82,10 @@ int TGLexer::getNextChar() {
   }  
 }
 
+int TGLexer::peekNextChar(int Index) {
+  return *(CurPtr + Index);
+}
+
 tgtok::TokKind TGLexer::LexToken() {
   TokStart = CurPtr;
   // This always consumes at least one character.
@@ -87,10 +93,10 @@ tgtok::TokKind TGLexer::LexToken() {
 
   switch (CurChar) {
   default:
-    // Handle letters: [a-zA-Z_#]
-    if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
+    // Handle letters: [a-zA-Z_]
+    if (isalpha(CurChar) || CurChar == '_')
       return LexIdentifier();
-      
+
     // Unknown character, emit an error.
     return ReturnError(TokStart, "Unexpected character");
   case EOF: return tgtok::Eof;
@@ -107,6 +113,7 @@ tgtok::TokKind TGLexer::LexToken() {
   case ')': return tgtok::r_paren;
   case '=': return tgtok::equal;
   case '?': return tgtok::question;
+  case '#': return tgtok::paste;
       
   case 0:
   case ' ':
@@ -128,8 +135,44 @@ tgtok::TokKind TGLexer::LexToken() {
     return LexToken();
   case '-': case '+':
   case '0': case '1': case '2': case '3': case '4': case '5': case '6':
-  case '7': case '8': case '9':  
+  case '7': case '8': case '9': {
+    int NextChar = 0;
+    if (isdigit(CurChar)) {
+      // Allow identifiers to start with a number if it is followed by
+      // an identifier.  This can happen with paste operations like
+      // foo#8i.
+      int i = 0;
+      do {
+        NextChar = peekNextChar(i++);
+      } while (isdigit(NextChar));
+
+      if (NextChar == 'x' || NextChar == 'b') {
+        // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
+        // likely a number.
+        int NextNextChar = peekNextChar(i);
+        switch (NextNextChar) {
+        default:
+          break;
+        case '0': case '1': 
+          if (NextChar == 'b')
+            return LexNumber();
+          // Fallthrough
+        case '2': case '3': case '4': case '5':
+        case '6': case '7': case '8': case '9':
+        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+          if (NextChar == 'x')
+            return LexNumber();
+          break;
+        }
+      }
+    }
+
+    if (isalpha(NextChar) || NextChar == '_')
+      return LexIdentifier();
+
     return LexNumber();
+  }
   case '"': return LexString();
   case '$': return LexVarName();
   case '[': return LexBracket();
@@ -210,8 +253,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
   const char *IdentStart = TokStart;
 
   // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
-  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
-         *CurPtr == '#')
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     ++CurPtr;
 
   // Check to see if this identifier is a keyword.
@@ -232,6 +274,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
     .Case("dag", tgtok::Dag)
     .Case("class", tgtok::Class)
     .Case("def", tgtok::Def)
+    .Case("foreach", tgtok::Foreach)
     .Case("defm", tgtok::Defm)
     .Case("multiclass", tgtok::MultiClass)
     .Case("field", tgtok::Field)
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index 84d328b12d97..8a850b5cec8e 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -39,9 +39,10 @@ namespace tgtok {
     colon, semi,        // : ;
     comma, period,      // , .
     equal, question,    // = ?
-    
+    paste,              // #
+
     // Keywords.
-    Bit, Bits, Class, Code, Dag, Def, Defm, Field, In, Int, Let, List,
+    Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
     MultiClass, String,
     
     // !keywords.
@@ -109,6 +110,7 @@ private:
   tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
   
   int getNextChar();
+  int peekNextChar(int Index);
   void SkipBCPLComment();
   bool SkipCComment();
   tgtok::TokKind LexIdentifier();
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index e7f00baf4931..04c4fc158ff7 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -64,7 +64,7 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
   if (CurRec == 0)
     CurRec = &CurMultiClass->Rec;
 
-  if (RecordVal *ERV = CurRec->getValue(RV.getName())) {
+  if (RecordVal *ERV = CurRec->getValue(RV.getNameInit())) {
     // The value already exists in the class, treat this as a set.
     if (ERV->setValue(RV.getValue()))
       return Error(Loc, "New definition of '" + RV.getName() + "' of type '" +
@@ -79,7 +79,7 @@ bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
 
 /// SetValue -
 /// Return true on error, false on success.
-bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
+bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
                         const std::vector<unsigned> &BitList, Init *V) {
   if (!V) return false;
 
@@ -87,13 +87,14 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
 
   RecordVal *RV = CurRec->getValue(ValName);
   if (RV == 0)
-    return Error(Loc, "Value '" + ValName + "' unknown!");
+    return Error(Loc, "Value '" + ValName->getAsUnquotedString()
+                 + "' unknown!");
 
   // Do not allow assignments like 'X = X'.  This will just cause infinite loops
   // in the resolution machinery.
   if (BitList.empty())
     if (VarInit *VI = dynamic_cast<VarInit*>(V))
-      if (VI->getName() == ValName)
+      if (VI->getNameInit() == ValName)
         return false;
 
   // If we are assigning to a subset of the bits in the value... then we must be
@@ -103,7 +104,8 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
   if (!BitList.empty()) {
     BitsInit *CurVal = dynamic_cast<BitsInit*>(RV->getValue());
     if (CurVal == 0)
-      return Error(Loc, "Value '" + ValName + "' is not a bits type");
+      return Error(Loc, "Value '" + ValName->getAsUnquotedString()
+                   + "' is not a bits type");
 
     // Convert the incoming value to a bits type of the appropriate size...
     Init *BI = V->convertInitializerTo(BitsRecTy::get(BitList.size()));
@@ -123,7 +125,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
       unsigned Bit = BitList[i];
       if (NewBits[Bit])
         return Error(Loc, "Cannot set bit #" + utostr(Bit) + " of value '" +
-                     ValName + "' more than once");
+                     ValName->getAsUnquotedString() + "' more than once");
       NewBits[Bit] = BInit->getBit(i);
     }
 
@@ -135,9 +137,10 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
   }
 
   if (RV->setValue(V))
-   return Error(Loc, "Value '" + ValName + "' of type '" +
-                RV->getType()->getAsString() +
-                "' is incompatible with initializer '" + V->getAsString() +"'");
+    return Error(Loc, "Value '" + ValName->getAsUnquotedString() + "' of type '"
+                 + RV->getType()->getAsString() +
+                 "' is incompatible with initializer '" + V->getAsString()
+                 + "'");
   return false;
 }
 
@@ -151,7 +154,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
     if (AddValue(CurRec, SubClass.RefLoc, Vals[i]))
       return true;
 
-  const std::vector<std::string> &TArgs = SC->getTemplateArgs();
+  const std::vector<Init *> &TArgs = SC->getTemplateArgs();
 
   // Ensure that an appropriate number of template arguments are specified.
   if (TArgs.size() < SubClass.TemplateArgs.size())
@@ -174,8 +177,8 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
 
     } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
       return Error(SubClass.RefLoc,"Value not specified for template argument #"
-                   + utostr(i) + " (" + TArgs[i] + ") of subclass '" +
-                   SC->getName() + "'!");
+                   + utostr(i) + " (" + TArgs[i]->getAsUnquotedString()
+                   + ") of subclass '" + SC->getNameInitAsString() + "'!");
     }
   }
 
@@ -230,7 +233,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
     CurMC->DefPrototypes.push_back(NewDef);
   }
 
-  const std::vector<std::string> &SMCTArgs = SMC->Rec.getTemplateArgs();
+  const std::vector<Init *> &SMCTArgs = SMC->Rec.getTemplateArgs();
 
   // Ensure that an appropriate number of template arguments are
   // specified.
@@ -278,14 +281,121 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
     } else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) {
       return Error(SubMultiClass.RefLoc,
                    "Value not specified for template argument #"
-                   + utostr(i) + " (" + SMCTArgs[i] + ") of subclass '" +
-                   SMC->Rec.getName() + "'!");
+                   + utostr(i) + " (" + SMCTArgs[i]->getAsUnquotedString()
+                   + ") of subclass '" + SMC->Rec.getNameInitAsString() + "'!");
     }
   }
 
   return false;
 }
 
+/// ProcessForeachDefs - Given a record, apply all of the variable
+/// values in all surrounding foreach loops, creating new records for
+/// each combination of values.
+bool TGParser::ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass,
+                                  SMLoc Loc) {
+  // We want to instantiate a new copy of CurRec for each combination
+  // of nested loop iterator values.  We don't want top instantiate
+  // any copies until we have values for each loop iterator.
+  IterSet IterVals;
+  for (LoopVector::iterator Loop = Loops.begin(), LoopEnd = Loops.end();
+       Loop != LoopEnd;
+       ++Loop) {
+    // Process this loop.
+    if (ProcessForeachDefs(CurRec, CurMultiClass, Loc,
+                           IterVals, *Loop, Loop+1)) {
+      Error(Loc,
+            "Could not process loops for def " + CurRec->getNameInitAsString());
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// ProcessForeachDefs - Given a record, a loop and a loop iterator,
+/// apply each of the variable values in this loop and then process
+/// subloops.
+bool TGParser::ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass,
+                                  SMLoc Loc, IterSet &IterVals,
+                                  ForeachLoop &CurLoop,
+                                  LoopVector::iterator NextLoop) {
+  Init *IterVar = CurLoop.IterVar;
+  ListInit *List = dynamic_cast<ListInit *>(CurLoop.ListValue);
+
+  if (List == 0) {
+    Error(Loc, "Loop list is not a list");
+    return true;
+  }
+
+  // Process each value.
+  for (int64_t i = 0; i < List->getSize(); ++i) {
+    Init *ItemVal = List->resolveListElementReference(*CurRec, 0, i);
+    IterVals.push_back(IterRecord(IterVar, ItemVal));
+
+    if (IterVals.size() == Loops.size()) {
+      // Ok, we have all of the iterator values for this point in the
+      // iteration space.  Instantiate a new record to reflect this
+      // combination of values.
+      Record *IterRec = new Record(*CurRec);
+
+      // Set the iterator values now.
+      for (IterSet::iterator i = IterVals.begin(), iend = IterVals.end();
+           i != iend;
+           ++i) {
+        VarInit *IterVar = dynamic_cast<VarInit *>(i->IterVar);
+        if (IterVar == 0) {
+          Error(Loc, "foreach iterator is unresolved");
+          return true;
+        }
+
+        TypedInit *IVal  = dynamic_cast<TypedInit *>(i->IterValue);
+        if (IVal == 0) {
+          Error(Loc, "foreach iterator value is untyped");
+          return true;
+        }
+
+        IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false));
+
+        if (SetValue(IterRec, Loc, IterVar->getName(),
+                     std::vector<unsigned>(), IVal)) {
+          Error(Loc, "when instantiating this def");
+          return true;
+        }
+
+        // Resolve it next.
+        IterRec->resolveReferencesTo(IterRec->getValue(IterVar->getName()));
+
+        // Remove it.
+        IterRec->removeValue(IterVar->getName());
+      }
+
+      if (Records.getDef(IterRec->getNameInitAsString())) {
+        Error(Loc, "def already exists: " + IterRec->getNameInitAsString());
+        return true;
+      }
+
+      Records.addDef(IterRec);
+      IterRec->resolveReferences();
+    }
+
+    if (NextLoop != Loops.end()) {
+      // Process nested loops.
+      if (ProcessForeachDefs(CurRec, CurMultiClass, Loc, IterVals, *NextLoop,
+                             NextLoop+1)) {
+        Error(Loc,
+              "Could not process loops for def " +
+              CurRec->getNameInitAsString());
+        return true;
+      }
+    }
+
+    // We're done with this iterator.
+    IterVals.pop_back();
+  }
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Parser Code
 //===----------------------------------------------------------------------===//
@@ -293,7 +403,8 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
 /// isObjectStart - Return true if this is a valid first token for an Object.
 static bool isObjectStart(tgtok::TokKind K) {
   return K == tgtok::Class || K == tgtok::Def ||
-         K == tgtok::Defm || K == tgtok::Let || K == tgtok::MultiClass;
+         K == tgtok::Defm || K == tgtok::Let ||
+         K == tgtok::MultiClass || K == tgtok::Foreach;
 }
 
 static std::string GetNewAnonymousName() {
@@ -303,18 +414,39 @@ static std::string GetNewAnonymousName() {
 
 /// ParseObjectName - If an object name is specified, return it.  Otherwise,
 /// return an anonymous name.
-///   ObjectName ::= ID
+///   ObjectName ::= Value [ '#' Value ]*
 ///   ObjectName ::= /*empty*/
 ///
-std::string TGParser::ParseObjectName() {
-  if (Lex.getCode() != tgtok::Id)
-    return GetNewAnonymousName();
+Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
+  switch (Lex.getCode()) {
+  case tgtok::colon:
+  case tgtok::semi:
+  case tgtok::l_brace:
+    // These are all of the tokens that can begin an object body.
+    // Some of these can also begin values but we disallow those cases
+    // because they are unlikely to be useful.
+    return StringInit::get(GetNewAnonymousName());
+  default:
+    break;
+  }
 
-  std::string Ret = Lex.getCurStrVal();
-  Lex.Lex();
-  return Ret;
-}
+  Record *CurRec = 0;
+  if (CurMultiClass)
+    CurRec = &CurMultiClass->Rec;
+
+  RecTy *Type = 0;
+  if (CurRec) {
+    const TypedInit *CurRecName =
+      dynamic_cast<const TypedInit *>(CurRec->getNameInit());
+    if (!CurRecName) {
+      TokError("Record name is not typed!");
+      return 0;
+    }
+    Type = CurRecName->getType();
+  }
 
+  return ParseValue(CurRec, Type, ParseNameMode);
+}
 
 /// ParseClassID - Parse and resolve a reference to a class name.  This returns
 /// null on error.
@@ -570,11 +702,11 @@ bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) {
 /// ParseType - Parse and return a tblgen type.  This returns null on error.
 ///
 ///   Type ::= STRING                       // string type
+///   Type ::= CODE                         // code type
 ///   Type ::= BIT                          // bit type
 ///   Type ::= BITS '<' INTVAL '>'          // bits<x> type
 ///   Type ::= INT                          // int type
 ///   Type ::= LIST '<' Type '>'            // list<x> type
-///   Type ::= CODE                         // code type
 ///   Type ::= DAG                          // dag type
 ///   Type ::= ClassID                      // Record Type
 ///
@@ -582,9 +714,9 @@ RecTy *TGParser::ParseType() {
   switch (Lex.getCode()) {
   default: TokError("Unknown token when expecting a type"); return 0;
   case tgtok::String: Lex.Lex(); return StringRecTy::get();
+  case tgtok::Code:   Lex.Lex(); return StringRecTy::get();
   case tgtok::Bit:    Lex.Lex(); return BitRecTy::get();
   case tgtok::Int:    Lex.Lex(); return IntRecTy::get();
-  case tgtok::Code:   Lex.Lex(); return CodeRecTy::get();
   case tgtok::Dag:    Lex.Lex(); return DagRecTy::get();
   case tgtok::Id:
     if (Record *R = ParseClassID()) return RecordRecTy::get(R);
@@ -633,7 +765,7 @@ RecTy *TGParser::ParseType() {
 ///  IDValue ::= ID [multiclass template argument]
 ///  IDValue ::= ID [def name]
 ///
-Init *TGParser::ParseIDValue(Record *CurRec) {
+Init *TGParser::ParseIDValue(Record *CurRec, IDParseMode Mode) {
   assert(Lex.getCode() == tgtok::Id && "Expected ID in ParseIDValue");
   std::string Name = Lex.getCurStrVal();
   SMLoc Loc = Lex.getLoc();
@@ -644,14 +776,17 @@ Init *TGParser::ParseIDValue(Record *CurRec) {
 /// ParseIDValue - This is just like ParseIDValue above, but it assumes the ID
 /// has already been read.
 Init *TGParser::ParseIDValue(Record *CurRec,
-                             const std::string &Name, SMLoc NameLoc) {
+                             const std::string &Name, SMLoc NameLoc,
+                             IDParseMode Mode) {
   if (CurRec) {
     if (const RecordVal *RV = CurRec->getValue(Name))
       return VarInit::get(Name, RV->getType());
 
-    std::string TemplateArgName = CurRec->getName()+":"+Name;
+    Init *TemplateArgName = QualifyName(*CurRec, CurMultiClass, Name, ":");
+
     if (CurMultiClass)
-      TemplateArgName = CurMultiClass->Rec.getName()+"::"+TemplateArgName;
+      TemplateArgName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name,
+                                    "::");
 
     if (CurRec->isTemplateArg(TemplateArgName)) {
       const RecordVal *RV = CurRec->getValue(TemplateArgName);
@@ -661,7 +796,9 @@ Init *TGParser::ParseIDValue(Record *CurRec,
   }
 
   if (CurMultiClass) {
-    std::string MCName = CurMultiClass->Rec.getName()+"::"+Name;
+    Init *MCName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name,
+                               "::");
+
     if (CurMultiClass->Rec.isTemplateArg(MCName)) {
       const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
       assert(RV && "Template arg doesn't exist??");
@@ -669,11 +806,27 @@ Init *TGParser::ParseIDValue(Record *CurRec,
     }
   }
 
+  // If this is in a foreach loop, make sure it's not a loop iterator
+  for (LoopVector::iterator i = Loops.begin(), iend = Loops.end();
+       i != iend;
+       ++i) {
+    VarInit *IterVar = dynamic_cast<VarInit *>(i->IterVar);
+    if (IterVar && IterVar->getName() == Name)
+      return IterVar;
+  }
+
+  if (Mode == ParseNameMode)
+    return StringInit::get(Name);
+
   if (Record *D = Records.getDef(Name))
     return DefInit::get(D);
 
-  Error(NameLoc, "Variable not defined: '" + Name + "'");
-  return 0;
+  if (Mode == ParseValueMode) {
+    Error(NameLoc, "Variable not defined: '" + Name + "'");
+    return 0;
+  }
+  
+  return StringInit::get(Name);
 }
 
 /// ParseOperation - Parse an operator.  This returns null on error.
@@ -685,7 +838,6 @@ Init *TGParser::ParseOperation(Record *CurRec) {
   default:
     TokError("unknown operation");
     return 0;
-    break;
   case tgtok::XHead:
   case tgtok::XTail:
   case tgtok::XEmpty:
@@ -694,7 +846,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
     RecTy *Type = 0;
 
     switch (Lex.getCode()) {
-    default: assert(0 && "Unhandled code!");
+    default: llvm_unreachable("Unhandled code!");
     case tgtok::XCast:
       Lex.Lex();  // eat the operation
       Code = UnOpInit::CAST;
@@ -810,7 +962,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
     RecTy *Type = 0;
 
     switch (OpTok) {
-    default: assert(0 && "Unhandled code!");
+    default: llvm_unreachable("Unhandled code!");
     case tgtok::XConcat: Code = BinOpInit::CONCAT;Type = DagRecTy::get(); break;
     case tgtok::XSRA:    Code = BinOpInit::SRA;   Type = IntRecTy::get(); break;
     case tgtok::XSRL:    Code = BinOpInit::SRL;   Type = IntRecTy::get(); break;
@@ -874,7 +1026,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
     tgtok::TokKind LexCode = Lex.getCode();
     Lex.Lex();  // eat the operation
     switch (LexCode) {
-    default: assert(0 && "Unhandled code!");
+    default: llvm_unreachable("Unhandled code!");
     case tgtok::XIf:
       Code = TernOpInit::IF;
       break;
@@ -919,7 +1071,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
     Lex.Lex();  // eat the ')'
 
     switch (LexCode) {
-    default: assert(0 && "Unhandled code!");
+    default: llvm_unreachable("Unhandled code!");
     case tgtok::XIf: {
       // FIXME: The `!if' operator doesn't handle non-TypedInit well at
       // all. This can be made much more robust.
@@ -989,8 +1141,6 @@ Init *TGParser::ParseOperation(Record *CurRec) {
                                                              CurMultiClass);
   }
   }
-  TokError("could not parse operation");
-  return 0;
 }
 
 /// ParseOperatorType - Parse a type for an operator.  This returns
@@ -1041,10 +1191,16 @@ RecTy *TGParser::ParseOperatorType() {
 ///   SimpleValue ::= SRLTOK '(' Value ',' Value ')'
 ///   SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
 ///
-Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
+Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
+                                 IDParseMode Mode) {
   Init *R = 0;
   switch (Lex.getCode()) {
   default: TokError("Unknown token when parsing a value"); break;
+  case tgtok::paste:
+    // This is a leading paste operation.  This is deprecated but
+    // still exists in some .td files.  Ignore it.
+    Lex.Lex();  // Skip '#'.
+    return ParseSimpleValue(CurRec, ItemType, Mode);
   case tgtok::IntVal: R = IntInit::get(Lex.getCurIntVal()); Lex.Lex(); break;
   case tgtok::StrVal: {
     std::string Val = Lex.getCurStrVal();
@@ -1060,7 +1216,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
     break;
   }
   case tgtok::CodeFragment:
-    R = CodeInit::get(Lex.getCurStrVal());
+    R = StringInit::get(Lex.getCurStrVal());
     Lex.Lex();
     break;
   case tgtok::question:
@@ -1071,7 +1227,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
     SMLoc NameLoc = Lex.getLoc();
     std::string Name = Lex.getCurStrVal();
     if (Lex.Lex() != tgtok::less)  // consume the Id.
-      return ParseIDValue(CurRec, Name, NameLoc);    // Value ::= IDValue
+      return ParseIDValue(CurRec, Name, NameLoc, Mode);    // Value ::= IDValue
 
     // Value ::= ID '<' ValueListNE '>'
     if (Lex.Lex() == tgtok::greater) {
@@ -1305,8 +1461,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
 ///   ValueSuffix ::= '[' BitList ']'
 ///   ValueSuffix ::= '.' ID
 ///
-Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
-  Init *Result = ParseSimpleValue(CurRec, ItemType);
+Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
+  Init *Result = ParseSimpleValue(CurRec, ItemType, Mode);
   if (Result == 0) return 0;
 
   // Parse the suffixes now if present.
@@ -1314,6 +1470,10 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
     switch (Lex.getCode()) {
     default: return Result;
     case tgtok::l_brace: {
+      if (Mode == ParseNameMode || Mode == ParseForeachMode)
+        // This is the beginning of the object body.
+        return Result;
+
       SMLoc CurlyLoc = Lex.getLoc();
       Lex.Lex(); // eat the '{'
       std::vector<unsigned> Ranges = ParseRangeList();
@@ -1368,6 +1528,56 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
       Result = FieldInit::get(Result, Lex.getCurStrVal());
       Lex.Lex();  // eat field name
       break;
+
+    case tgtok::paste:
+      SMLoc PasteLoc = Lex.getLoc();
+
+      // Create a !strconcat() operation, first casting each operand to
+      // a string if necessary.
+
+      TypedInit *LHS = dynamic_cast<TypedInit *>(Result);
+      if (!LHS) {
+        Error(PasteLoc, "LHS of paste is not typed!");
+        return 0;
+      }
+  
+      if (LHS->getType() != StringRecTy::get()) {
+        LHS = UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get());
+      }
+
+      TypedInit *RHS = 0;
+
+      Lex.Lex();  // Eat the '#'.
+      switch (Lex.getCode()) { 
+      case tgtok::colon:
+      case tgtok::semi:
+      case tgtok::l_brace:
+        // These are all of the tokens that can begin an object body.
+        // Some of these can also begin values but we disallow those cases
+        // because they are unlikely to be useful.
+       
+        // Trailing paste, concat with an empty string.
+        RHS = StringInit::get("");
+        break;
+
+      default:
+        Init *RHSResult = ParseValue(CurRec, ItemType, ParseNameMode);
+        RHS = dynamic_cast<TypedInit *>(RHSResult);
+        if (!RHS) {
+          Error(PasteLoc, "RHS of paste is not typed!");
+          return 0;
+        }
+
+        if (RHS->getType() != StringRecTy::get()) {
+          RHS = UnOpInit::get(UnOpInit::CAST, RHS, StringRecTy::get());
+        }
+  
+        break;
+      }
+
+      Result = BinOpInit::get(BinOpInit::STRCONCAT, LHS, RHS,
+                              StringRecTy::get())->Fold(CurRec, CurMultiClass);
+      break;
     }
   }
 }
@@ -1417,7 +1627,11 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
   RecTy *ItemType = EltTy;
   unsigned int ArgN = 0;
   if (ArgsRec != 0 && EltTy == 0) {
-    const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs();
+    const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
+    if (!TArgs.size()) {
+      TokError("template argument provided to non-template class");
+      return std::vector<Init*>();
+    }
     const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
     if (!RV) {
       errs() << "Cannot find template arg " << ArgN << " (" << TArgs[ArgN]
@@ -1434,7 +1648,7 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
     Lex.Lex();  // Eat the comma
 
     if (ArgsRec != 0 && EltTy == 0) {
-      const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs();
+      const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
       if (ArgN >= TArgs.size()) {
         TokError("too many template arguments");
         return std::vector<Init*>();
@@ -1462,37 +1676,38 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
 ///
 ///  Declaration ::= FIELD? Type ID ('=' Value)?
 ///
-std::string TGParser::ParseDeclaration(Record *CurRec,
+Init *TGParser::ParseDeclaration(Record *CurRec,
                                        bool ParsingTemplateArgs) {
   // Read the field prefix if present.
   bool HasField = Lex.getCode() == tgtok::Field;
   if (HasField) Lex.Lex();
 
   RecTy *Type = ParseType();
-  if (Type == 0) return "";
+  if (Type == 0) return 0;
 
   if (Lex.getCode() != tgtok::Id) {
     TokError("Expected identifier in declaration");
-    return "";
+    return 0;
   }
 
   SMLoc IdLoc = Lex.getLoc();
-  std::string DeclName = Lex.getCurStrVal();
+  Init *DeclName = StringInit::get(Lex.getCurStrVal());
   Lex.Lex();
 
   if (ParsingTemplateArgs) {
     if (CurRec) {
-      DeclName = CurRec->getName() + ":" + DeclName;
+      DeclName = QualifyName(*CurRec, CurMultiClass, DeclName, ":");
     } else {
       assert(CurMultiClass);
     }
     if (CurMultiClass)
-      DeclName = CurMultiClass->Rec.getName() + "::" + DeclName;
+      DeclName = QualifyName(CurMultiClass->Rec, CurMultiClass, DeclName,
+                             "::");
   }
 
   // Add the value.
   if (AddValue(CurRec, IdLoc, RecordVal(DeclName, Type, HasField)))
-    return "";
+    return 0;
 
   // If a value is present, parse it.
   if (Lex.getCode() == tgtok::equal) {
@@ -1501,12 +1716,56 @@ std::string TGParser::ParseDeclaration(Record *CurRec,
     Init *Val = ParseValue(CurRec, Type);
     if (Val == 0 ||
         SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
-      return "";
+      return 0;
   }
 
   return DeclName;
 }
 
+/// ParseForeachDeclaration - Read a foreach declaration, returning
+/// the name of the declared object or a NULL Init on error.  Return
+/// the name of the parsed initializer list through ForeachListName.
+///
+///  ForeachDeclaration ::= ID '=' Value
+///
+Init *TGParser::ParseForeachDeclaration(Init *&ForeachListValue) {
+  if (Lex.getCode() != tgtok::Id) {
+    TokError("Expected identifier in foreach declaration");
+    return 0;
+  }
+
+  Init *DeclName = StringInit::get(Lex.getCurStrVal());
+  Lex.Lex();
+
+  // If a value is present, parse it.
+  if (Lex.getCode() != tgtok::equal) {
+    TokError("Expected '=' in foreach declaration");
+    return 0;
+  }
+  Lex.Lex();  // Eat the '='
+
+  // Expect a list initializer.
+  ForeachListValue = ParseValue(0, 0, ParseForeachMode);
+
+  TypedInit *TypedList = dynamic_cast<TypedInit *>(ForeachListValue);
+  if (TypedList == 0) {
+    TokError("Value list is untyped");
+    return 0;
+  }
+
+  RecTy *ValueType = TypedList->getType();
+  ListRecTy *ListType = dynamic_cast<ListRecTy *>(ValueType);
+  if (ListType == 0) {
+    TokError("Value list is not of list type");
+    return 0;
+  }
+
+  RecTy *IterType = ListType->getElementType();
+  VarInit *IterVar = VarInit::get(DeclName, IterType);
+
+  return IterVar;
+}
+
 /// ParseTemplateArgList - Read a template argument list, which is a non-empty
 /// sequence of template-declarations in <>'s.  If CurRec is non-null, these are
 /// template args for a def, which may or may not be in a multiclass.  If null,
@@ -1521,8 +1780,8 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
   Record *TheRecToAddTo = CurRec ? CurRec : &CurMultiClass->Rec;
 
   // Read the first declaration.
-  std::string TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
-  if (TemplArg.empty())
+  Init *TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
+  if (TemplArg == 0)
     return true;
 
   TheRecToAddTo->addTemplateArg(TemplArg);
@@ -1532,7 +1791,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
 
     // Read the following declarations.
     TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
-    if (TemplArg.empty())
+    if (TemplArg == 0)
       return true;
     TheRecToAddTo->addTemplateArg(TemplArg);
   }
@@ -1550,7 +1809,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
 ///   BodyItem ::= LET ID OptionalBitList '=' Value ';'
 bool TGParser::ParseBodyItem(Record *CurRec) {
   if (Lex.getCode() != tgtok::Let) {
-    if (ParseDeclaration(CurRec, false).empty())
+    if (ParseDeclaration(CurRec, false) == 0)
       return true;
 
     if (Lex.getCode() != tgtok::semi)
@@ -1671,22 +1930,24 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
   Lex.Lex();  // Eat the 'def' token.
 
   // Parse ObjectName and make a record for it.
-  Record *CurRec = new Record(ParseObjectName(), DefLoc, Records);
+  Record *CurRec = new Record(ParseObjectName(CurMultiClass), DefLoc, Records);
 
   if (!CurMultiClass) {
     // Top-level def definition.
 
     // Ensure redefinition doesn't happen.
-    if (Records.getDef(CurRec->getName())) {
-      Error(DefLoc, "def '" + CurRec->getName() + "' already defined");
+    if (Records.getDef(CurRec->getNameInitAsString())) {
+      Error(DefLoc, "def '" + CurRec->getNameInitAsString()
+            + "' already defined");
       return true;
     }
     Records.addDef(CurRec);
   } else {
     // Otherwise, a def inside a multiclass, add it to the multiclass.
     for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); i != e; ++i)
-      if (CurMultiClass->DefPrototypes[i]->getName() == CurRec->getName()) {
-        Error(DefLoc, "def '" + CurRec->getName() +
+      if (CurMultiClass->DefPrototypes[i]->getNameInit()
+          == CurRec->getNameInit()) {
+        Error(DefLoc, "def '" + CurRec->getNameInitAsString() +
               "' already defined in this multiclass!");
         return true;
       }
@@ -1707,7 +1968,7 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
 
   if (CurMultiClass) {
     // Copy the template arguments for the multiclass into the def.
-    const std::vector<std::string> &TArgs =
+    const std::vector<Init *> &TArgs =
                                 CurMultiClass->Rec.getTemplateArgs();
 
     for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
@@ -1717,6 +1978,63 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
     }
   }
 
+  if (ProcessForeachDefs(CurRec, CurMultiClass, DefLoc)) {
+    Error(DefLoc,
+          "Could not process loops for def" + CurRec->getNameInitAsString());
+    return true;
+  }
+
+  return false;
+}
+
+/// ParseForeach - Parse a for statement.  Return the record corresponding
+/// to it.  This returns true on error.
+///
+///   Foreach ::= FOREACH Declaration IN '{ ObjectList '}'
+///   Foreach ::= FOREACH Declaration IN Object
+///
+bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
+  assert(Lex.getCode() == tgtok::Foreach && "Unknown tok");
+  Lex.Lex();  // Eat the 'for' token.
+
+  // Make a temporary object to record items associated with the for
+  // loop.
+  Init *ListValue = 0;
+  Init *IterName = ParseForeachDeclaration(ListValue);
+  if (IterName == 0)
+    return TokError("expected declaration in for");
+
+  if (Lex.getCode() != tgtok::In)
+    return TokError("Unknown tok");
+  Lex.Lex();  // Eat the in
+
+  // Create a loop object and remember it.
+  Loops.push_back(ForeachLoop(IterName, ListValue));
+
+  if (Lex.getCode() != tgtok::l_brace) {
+    // FOREACH Declaration IN Object
+    if (ParseObject(CurMultiClass))
+      return true;
+  }
+  else {
+    SMLoc BraceLoc = Lex.getLoc();
+    // Otherwise, this is a group foreach.
+    Lex.Lex();  // eat the '{'.
+
+    // Parse the object list.
+    if (ParseObjectList(CurMultiClass))
+      return true;
+
+    if (Lex.getCode() != tgtok::r_brace) {
+      TokError("expected '}' at end of foreach command");
+      return Error(BraceLoc, "to match this '{'");
+    }
+    Lex.Lex();  // Eat the }
+  }
+
+  // We've processed everything in this loop.
+  Loops.pop_back();
+
   return false;
 }
 
@@ -1734,10 +2052,11 @@ bool TGParser::ParseClass() {
   Record *CurRec = Records.getClass(Lex.getCurStrVal());
   if (CurRec) {
     // If the body was previously defined, this is an error.
-    if (!CurRec->getValues().empty() ||
+    if (CurRec->getValues().size() > 1 ||  // Account for NAME.
         !CurRec->getSuperClasses().empty() ||
         !CurRec->getTemplateArgs().empty())
-      return TokError("Class '" + CurRec->getName() + "' already defined");
+      return TokError("Class '" + CurRec->getNameInitAsString()
+                      + "' already defined");
   } else {
     // If this is the first reference to this class, create and add it.
     CurRec = new Record(Lex.getCurStrVal(), Lex.getLoc(), Records);
@@ -1909,6 +2228,7 @@ bool TGParser::ParseMultiClass() {
         case tgtok::Let:
         case tgtok::Def:
         case tgtok::Defm:
+        case tgtok::Foreach:
           if (ParseObject(CurMultiClass))
             return true;
          break;
@@ -1924,23 +2244,31 @@ bool TGParser::ParseMultiClass() {
 Record *TGParser::
 InstantiateMulticlassDef(MultiClass &MC,
                          Record *DefProto,
-                         const std::string &DefmPrefix,
+                         Init *DefmPrefix,
                          SMLoc DefmPrefixLoc) {
+  // We need to preserve DefProto so it can be reused for later
+  // instantiations, so create a new Record to inherit from it.
+
   // Add in the defm name.  If the defm prefix is empty, give each
   // instantiated def a unique name.  Otherwise, if "#NAME#" exists in the
   // name, substitute the prefix for #NAME#.  Otherwise, use the defm name
   // as a prefix.
-  std::string DefName = DefProto->getName();
-  if (DefmPrefix.empty()) {
-    DefName = GetNewAnonymousName();
-  } else {
-    std::string::size_type idx = DefName.find("#NAME#");
-    if (idx != std::string::npos) {
-      DefName.replace(idx, 6, DefmPrefix);
-    } else {
-      // Add the suffix to the defm name to get the new name.
-      DefName = DefmPrefix + DefName;
-    }
+
+  if (DefmPrefix == 0)
+    DefmPrefix = StringInit::get(GetNewAnonymousName());
+
+  Init *DefName = DefProto->getNameInit();
+
+  StringInit *DefNameString = dynamic_cast<StringInit *>(DefName);
+
+  if (DefNameString != 0) {
+    // We have a fully expanded string so there are no operators to
+    // resolve.  We should concatenate the given prefix and name.
+    DefName =
+      BinOpInit::get(BinOpInit::STRCONCAT,
+                     UnOpInit::get(UnOpInit::CAST, DefmPrefix,
+                                   StringRecTy::get())->Fold(DefProto, &MC),
+                     DefName, StringRecTy::get())->Fold(DefProto, &MC);
   }
 
   Record *CurRec = new Record(DefName, DefmPrefixLoc, Records);
@@ -1950,6 +2278,41 @@ InstantiateMulticlassDef(MultiClass &MC,
   Ref.Rec = DefProto;
   AddSubClass(CurRec, Ref);
 
+  if (DefNameString == 0) {
+    // We must resolve references to NAME.
+    if (SetValue(CurRec, Ref.RefLoc, "NAME", std::vector<unsigned>(),
+                 DefmPrefix)) {
+      Error(DefmPrefixLoc, "Could not resolve "
+            + CurRec->getNameInitAsString() + ":NAME to '"
+            + DefmPrefix->getAsUnquotedString() + "'");
+      return 0;
+    }
+
+    RecordVal *DefNameRV = CurRec->getValue("NAME");
+    CurRec->resolveReferencesTo(DefNameRV);
+  }
+
+  if (!CurMultiClass) {
+    // We do this after resolving NAME because before resolution, many
+    // multiclass defs will have the same name expression.  If we are
+    // currently in a multiclass, it means this defm appears inside a
+    // multiclass and its name won't be fully resolvable until we see
+    // the top-level defm.  Therefore, we don't add this to the
+    // RecordKeeper at this point.  If we did we could get duplicate
+    // defs as more than one probably refers to NAME or some other
+    // common internal placeholder.
+
+    // Ensure redefinition doesn't happen.
+    if (Records.getDef(CurRec->getNameInitAsString())) {
+      Error(DefmPrefixLoc, "def '" + CurRec->getNameInitAsString() + 
+            "' already defined, instantiating defm with subdef '" + 
+            DefProto->getNameInitAsString() + "'");
+      return 0;
+    }
+
+    Records.addDef(CurRec);
+  }
+
   return CurRec;
 }
 
@@ -1957,7 +2320,7 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC,
                                         Record *CurRec,
                                         SMLoc DefmPrefixLoc,
                                         SMLoc SubClassLoc,
-                                        const std::vector<std::string> &TArgs,
+                                        const std::vector<Init *> &TArgs,
                                         std::vector<Init *> &TemplateVals,
                                         bool DeleteArgs) {
   // Loop over all of the template arguments, setting them to the specified
@@ -1979,8 +2342,9 @@ bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC,
         
     } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
       return Error(SubClassLoc, "value not specified for template argument #"+
-                   utostr(i) + " (" + TArgs[i] + ") of multiclassclass '" +
-                   MC.Rec.getName() + "'");
+                   utostr(i) + " (" + TArgs[i]->getAsUnquotedString()
+                   + ") of multiclassclass '" + MC.Rec.getNameInitAsString()
+                   + "'");
     }
   }
   return false;
@@ -1997,25 +2361,20 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC,
                    LetStack[i][j].Bits, LetStack[i][j].Value))
         return Error(DefmPrefixLoc, "when instantiating this defm");
 
-  // Ensure redefinition doesn't happen.
-  if (Records.getDef(CurRec->getName()))
-    return Error(DefmPrefixLoc, "def '" + CurRec->getName() + 
-                 "' already defined, instantiating defm with subdef '" + 
-                 DefProto->getName() + "'");
-
   // Don't create a top level definition for defm inside multiclasses,
   // instead, only update the prototypes and bind the template args
   // with the new created definition.
   if (CurMultiClass) {
     for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size();
          i != e; ++i)
-      if (CurMultiClass->DefPrototypes[i]->getName() == CurRec->getName())
-        return Error(DefmPrefixLoc, "defm '" + CurRec->getName() +
+      if (CurMultiClass->DefPrototypes[i]->getNameInit()
+          == CurRec->getNameInit())
+        return Error(DefmPrefixLoc, "defm '" + CurRec->getNameInitAsString() +
                      "' already defined in this multiclass!");
     CurMultiClass->DefPrototypes.push_back(CurRec);
 
     // Copy the template arguments for the multiclass into the new def.
-    const std::vector<std::string> &TA =
+    const std::vector<Init *> &TA =
       CurMultiClass->Rec.getTemplateArgs();
 
     for (unsigned i = 0, e = TA.size(); i != e; ++i) {
@@ -2023,8 +2382,6 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC,
       assert(RV && "Template arg doesn't exist?");
       CurRec->addValue(*RV);
     }
-  } else {
-    Records.addDef(CurRec);
   }
 
   return false;
@@ -2037,10 +2394,10 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC,
 bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
   assert(Lex.getCode() == tgtok::Defm && "Unexpected token!");
 
-  std::string DefmPrefix;
+  Init *DefmPrefix = 0;
+
   if (Lex.Lex() == tgtok::Id) {  // eat the defm.
-    DefmPrefix = Lex.getCurStrVal();
-    Lex.Lex();  // Eat the defm prefix.
+    DefmPrefix = ParseObjectName(CurMultiClass);
   }
 
   SMLoc DefmPrefixLoc = Lex.getLoc();
@@ -2070,7 +2427,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
     std::vector<Init*> &TemplateVals = Ref.TemplateArgs;
 
     // Verify that the correct number of template arguments were specified.
-    const std::vector<std::string> &TArgs = MC->Rec.getTemplateArgs();
+    const std::vector<Init *> &TArgs = MC->Rec.getTemplateArgs();
     if (TArgs.size() < TemplateVals.size())
       return Error(SubClassLoc,
                    "more template args specified than multiclass expects");
@@ -2080,6 +2437,8 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
       Record *DefProto = MC->DefPrototypes[i];
 
       Record *CurRec = InstantiateMulticlassDef(*MC, DefProto, DefmPrefix, DefmPrefixLoc);
+      if (!CurRec)
+        return true;
 
       if (ResolveMulticlassDefArgs(*MC, CurRec, DefmPrefixLoc, SubClassLoc,
                                    TArgs, TemplateVals, true/*Delete args*/))
@@ -2165,6 +2524,7 @@ bool TGParser::ParseObject(MultiClass *MC) {
     return TokError("Expected class, def, defm, multiclass or let definition");
   case tgtok::Let:   return ParseTopLevelLet(MC);
   case tgtok::Def:   return ParseDef(MC);
+  case tgtok::Foreach:   return ParseForeach(MC);
   case tgtok::Defm:  return ParseDefm(MC);
   case tgtok::Class: return ParseClass();
   case tgtok::MultiClass: return ParseMultiClass();
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
index db8a62029746..b8e7cb1929bb 100644
--- a/lib/TableGen/TGParser.h
+++ b/lib/TableGen/TGParser.h
@@ -14,6 +14,7 @@
 #ifndef TGPARSER_H
 #define TGPARSER_H
 
+#include "llvm/TableGen/Record.h"
 #include "TGLexer.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/ADT/Twine.h"
@@ -41,17 +42,44 @@ namespace llvm {
     }
   };
   
+  /// ForeachLoop - Record the iteration state associated with a for loop.
+  /// This is used to instantiate items in the loop body.
+  struct ForeachLoop {
+    Init *IterVar;
+    Init *ListValue;
+
+    ForeachLoop(Init *IVar, Init *LValue) : IterVar(IVar), ListValue(LValue) {}
+  };
+
 class TGParser {
   TGLexer Lex;
   std::vector<std::vector<LetRecord> > LetStack;
   std::map<std::string, MultiClass*> MultiClasses;
   
+  /// Loops - Keep track of any foreach loops we are within.
+  ///
+  typedef std::vector<ForeachLoop> LoopVector;
+  LoopVector Loops;
+
   /// CurMultiClass - If we are parsing a 'multiclass' definition, this is the 
   /// current value.
   MultiClass *CurMultiClass;
 
   // Record tracker
   RecordKeeper &Records;
+
+  // A "named boolean" indicating how to parse identifiers.  Usually
+  // identifiers map to some existing object but in special cases
+  // (e.g. parsing def names) no such object exists yet because we are
+  // in the middle of creating in.  For those situations, allow the
+  // parser to ignore missing object errors.
+  enum IDParseMode {
+    ParseValueMode,   // We are parsing a value we expect to look up.
+    ParseNameMode,    // We are parsing a name of an object that does not yet
+                      // exist.
+    ParseForeachMode  // We are parsing a foreach init.
+  };
+
 public:
   TGParser(SourceMgr &SrcMgr, RecordKeeper &records) : 
     Lex(SrcMgr), CurMultiClass(0), Records(records) {}
@@ -70,14 +98,36 @@ public:
   const std::vector<std::string> &getDependencies() const {
     return Lex.getDependencies();
   }
+
 private:  // Semantic analysis methods.
   bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
-  bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName, 
+  bool SetValue(Record *TheRec, SMLoc Loc, Init *ValName, 
                 const std::vector<unsigned> &BitList, Init *V);
+  bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName, 
+                const std::vector<unsigned> &BitList, Init *V) {
+    return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V);
+  }
   bool AddSubClass(Record *Rec, SubClassReference &SubClass);
   bool AddSubMultiClass(MultiClass *CurMC,
                         SubMultiClassReference &SubMultiClass);
 
+  // IterRecord: Map an iterator name to a value.
+  struct IterRecord {
+    Init *IterVar;
+    Init *IterValue;
+    IterRecord(Init *Var, Init *Val) : IterVar(Var), IterValue(Val) {}
+  };
+
+  // IterSet: The set of all iterator values at some point in the
+  // iteration space.
+  typedef std::vector<IterRecord> IterSet;
+
+  bool ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass,
+                          SMLoc Loc);
+  bool ProcessForeachDefs(Record *CurRec, MultiClass *CurMultiClass,
+                          SMLoc Loc, IterSet &IterVals, ForeachLoop &CurLoop,
+                          LoopVector::iterator NextLoop);
+
 private:  // Parser methods.
   bool ParseObjectList(MultiClass *MC = 0);
   bool ParseObject(MultiClass *MC);
@@ -85,13 +135,13 @@ private:  // Parser methods.
   bool ParseMultiClass();
   Record *InstantiateMulticlassDef(MultiClass &MC,
                                    Record *DefProto,
-                                   const std::string &DefmPrefix,
+                                   Init *DefmPrefix,
                                    SMLoc DefmPrefixLoc);
   bool ResolveMulticlassDefArgs(MultiClass &MC,
                                 Record *DefProto,
                                 SMLoc DefmPrefixLoc,
                                 SMLoc SubClassLoc,
-                                const std::vector<std::string> &TArgs,
+                                const std::vector<Init *> &TArgs,
                                 std::vector<Init *> &TemplateVals,
                                 bool DeleteArgs);
   bool ResolveMulticlassDef(MultiClass &MC,
@@ -100,6 +150,7 @@ private:  // Parser methods.
                             SMLoc DefmPrefixLoc);
   bool ParseDefm(MultiClass *CurMultiClass);
   bool ParseDef(MultiClass *CurMultiClass);
+  bool ParseForeach(MultiClass *CurMultiClass);
   bool ParseTopLevelLet(MultiClass *CurMultiClass);
   std::vector<LetRecord> ParseLetList();
 
@@ -108,15 +159,19 @@ private:  // Parser methods.
   bool ParseBodyItem(Record *CurRec);
 
   bool ParseTemplateArgList(Record *CurRec);
-  std::string ParseDeclaration(Record *CurRec, bool ParsingTemplateArgs);
+  Init *ParseDeclaration(Record *CurRec, bool ParsingTemplateArgs);
+  Init *ParseForeachDeclaration(Init *&ForeachListValue);
 
   SubClassReference ParseSubClassReference(Record *CurRec, bool isDefm);
   SubMultiClassReference ParseSubMultiClassReference(MultiClass *CurMC);
 
-  Init *ParseIDValue(Record *CurRec);
-  Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc);
-  Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0);
-  Init *ParseValue(Record *CurRec, RecTy *ItemType = 0);
+  Init *ParseIDValue(Record *CurRec, IDParseMode Mode = ParseValueMode);
+  Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc,
+                     IDParseMode Mode = ParseValueMode);
+  Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0,
+                         IDParseMode Mode = ParseValueMode);
+  Init *ParseValue(Record *CurRec, RecTy *ItemType = 0,
+                   IDParseMode Mode = ParseValueMode);
   std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = 0, RecTy *EltTy = 0);
   std::vector<std::pair<llvm::Init*, std::string> > ParseDagArgList(Record *);
   bool ParseOptionalRangeList(std::vector<unsigned> &Ranges);
@@ -126,7 +181,7 @@ private:  // Parser methods.
   RecTy *ParseType();
   Init *ParseOperation(Record *CurRec);
   RecTy *ParseOperatorType();
-  std::string ParseObjectName();
+  Init *ParseObjectName(MultiClass *CurMultiClass);
   Record *ParseClassID();
   MultiClass *ParseMultiClassID();
   Record *ParseDefmID();
diff --git a/lib/TableGen/TableGenAction.cpp b/lib/TableGen/TableGenAction.cpp
new file mode 100644
index 000000000000..54e508309457
--- /dev/null
+++ b/lib/TableGen/TableGenAction.cpp
@@ -0,0 +1,15 @@
+//===- TableGenAction.cpp - defines TableGenAction --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/TableGenAction.h"
+
+using namespace llvm;
+
+void TableGenAction::anchor() { }
+
diff --git a/lib/TableGen/TableGenBackend.cpp b/lib/TableGen/TableGenBackend.cpp
index 29588db324cf..09bcc7a5b53e 100644
--- a/lib/TableGen/TableGenBackend.cpp
+++ b/lib/TableGen/TableGenBackend.cpp
@@ -15,7 +15,9 @@
 #include "llvm/TableGen/Record.h"
 using namespace llvm;
 
-void TableGenBackend::EmitSourceFileHeader(const std::string &Desc,
+void TableGenBackend::anchor() { }
+
+void TableGenBackend::EmitSourceFileHeader(StringRef Desc,
                                            raw_ostream &OS) const {
   OS << "//===- TableGen'erated file -------------------------------------*-"
        " C++ -*-===//\n//\n// " << Desc << "\n//\n// Automatically generate"
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 16d0da3b8ac7..2a1e8e4d3079 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -1,4 +1,4 @@
-//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//===-- ARM.h - Top-level interface for ARM representation ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,9 +18,7 @@
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
-#include <cassert>
 
 namespace llvm {
 
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 5c727ad6e343..9b0cb0c9e575 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -1,4 +1,4 @@
-//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
+//===-- ARM.td - Describe the ARM Target Machine -----------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,9 +23,6 @@ include "llvm/Target/Target.td"
 def ModeThumb  : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
                                   "Thumb mode">;
 
-def ModeNaCl   : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
-                                  "Native client mode">;
-
 //===----------------------------------------------------------------------===//
 // ARM Subtarget features.
 //
@@ -35,6 +32,9 @@ def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
 def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
                                    "Enable VFP3 instructions",
                                    [FeatureVFP2]>;
+def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true",
+                                   "Enable VFP4 instructions",
+                                   [FeatureVFP3]>;
 def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
                                    "Enable NEON instructions",
                                    [FeatureVFP3]>;
@@ -86,6 +86,11 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
                                                "AvoidCPSRPartialUpdate", "true",
                                  "Avoid CPSR partial update for OOO execution">;
 
+// Some processors perform return stack prediction. CodeGen should avoid issue
+// "normal" call instructions to callees which do not return.
+def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
+                                     "Has return address stack">;
+
 /// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
 def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
                                  "Supports v7 DSP instructions in Thumb2">;
@@ -201,13 +206,14 @@ def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
 // V7a Processors.
 def : Processor<"cortex-a8",        CortexA8Itineraries,
                                     [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2]>;
+                                     FeatureDSPThumb2, FeatureHasRAS]>;
 def : Processor<"cortex-a9",        CortexA9Itineraries,
                                     [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2]>;
+                                     FeatureDSPThumb2, FeatureHasRAS]>;
 def : Processor<"cortex-a9-mp",     CortexA9Itineraries,
                                     [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureMP]>;
+                                     FeatureDSPThumb2, FeatureMP,
+                                     FeatureHasRAS]>;
 
 // V7M Processors.
 def : ProcNoItin<"cortex-m3",       [HasV7Ops,
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index ea3319fb0e03..410790a7baa0 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -13,10 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
-#include "ARM.h"
 #include "ARMAsmPrinter.h"
+#include "ARM.h"
 #include "ARMBuildAttrs.h"
-#include "ARMBaseRegisterInfo.h"
 #include "ARMConstantPoolValue.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMTargetMachine.h"
@@ -35,7 +34,6 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCObjectStreamer.h"
@@ -44,10 +42,7 @@
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -85,15 +80,15 @@ namespace {
 
     void EmitTextAttribute(unsigned Attribute, StringRef String) {
       switch (Attribute) {
+      default: llvm_unreachable("Unsupported Text attribute in ASM Mode");
       case ARMBuildAttrs::CPU_name:
-        Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
+        Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower());
         break;
       /* GAS requires .fpu to be emitted regardless of EABI attribute */
       case ARMBuildAttrs::Advanced_SIMD_arch:
       case ARMBuildAttrs::VFP_arch:
-        Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
+        Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower());
         break;
-      default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
       }
     }
     void Finish() { }
@@ -197,15 +192,14 @@ namespace {
         AttributeItemType item = Contents[i];
         Streamer.EmitULEB128IntValue(item.Tag, 0);
         switch (item.Type) {
+        default: llvm_unreachable("Invalid attribute type");
         case AttributeItemType::NumericAttribute:
           Streamer.EmitULEB128IntValue(item.IntValue, 0);
           break;
         case AttributeItemType::TextAttribute:
-          Streamer.EmitBytes(UppercaseString(item.StringValue), 0);
+          Streamer.EmitBytes(item.StringValue.upper(), 0);
           Streamer.EmitIntValue(0, 1); // '\0'
           break;
-        default:
-          assert(0 && "Invalid attribute type");
         }
       }
 
@@ -300,6 +294,22 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
   OutStreamer.EmitLabel(CurrentFnSym);
 }
 
+void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
+  uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+  assert(Size && "C++ constructor pointer had zero size!");
+
+  const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
+  assert(GV && "C++ constructor pointer was not a GlobalValue!");
+
+  const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV),
+                                            (Subtarget->isTargetDarwin()
+                                             ? MCSymbolRefExpr::VK_None
+                                             : MCSymbolRefExpr::VK_ARM_TARGET1),
+                                            OutContext);
+  
+  OutStreamer.EmitValue(E, Size);
+}
+
 /// runOnMachineFunction - This uses the EmitInstruction()
 /// method to print assembly for each instruction.
 ///
@@ -316,8 +326,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   unsigned TF = MO.getTargetFlags();
 
   switch (MO.getType()) {
-  default:
-    assert(0 && "<unknown operand type>");
+  default: llvm_unreachable("<unknown operand type>");
   case MachineOperand::MO_Register: {
     unsigned Reg = MO.getReg();
     assert(TargetRegisterInfo::isPhysicalRegister(Reg));
@@ -494,11 +503,21 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       return false;
     }
 
-    // These modifiers are not yet supported.
-    case 'p': // The high single-precision register of a VFP double-precision
-              // register.
     case 'e': // The low doubleword register of a NEON quad register.
-    case 'f': // The high doubleword register of a NEON quad register.
+    case 'f': { // The high doubleword register of a NEON quad register.
+      if (!MI->getOperand(OpNum).isReg())
+        return true;
+      unsigned Reg = MI->getOperand(OpNum).getReg();
+      if (!ARM::QPRRegClass.contains(Reg))
+        return true;
+      const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+      unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ?
+                                       ARM::dsub_0 : ARM::dsub_1);
+      O << ARMInstPrinter::getRegisterName(SubReg);
+      return false;
+    }
+
+    // These modifiers are not yet supported.
     case 'h': // A range of VFP/NEON registers suitable for VLD1/VST1.
     case 'H': // The highest-numbered register of a pair.
       return true;
@@ -576,10 +595,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
   OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
 
   // Emit ARM Build Attributes
-  if (Subtarget->isTargetELF()) {
-
+  if (Subtarget->isTargetELF())
     emitAttributes();
-  }
 }
 
 
@@ -710,15 +727,26 @@ void ARMAsmPrinter::emitAttributes() {
 
   if (Subtarget->hasNEON() && emitFPU) {
     /* NEON is not exactly a VFP architecture, but GAS emit one of
-     * neon/vfpv3/vfpv2 for .fpu parameters */
-    AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+     * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */
+    if (Subtarget->hasVFP4())
+      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+                                     "neon-vfpv4");
+    else
+      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
     /* If emitted for NEON, omit from VFP below, since you can have both
      * NEON and VFP in build attributes but only one .fpu */
     emitFPU = false;
   }
 
+  /* VFPv4 + .fpu */
+  if (Subtarget->hasVFP4()) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+                               ARMBuildAttrs::AllowFPv4A);
+    if (emitFPU)
+      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4");
+
   /* VFPv3 + .fpu */
-  if (Subtarget->hasVFP3()) {
+  } else if (Subtarget->hasVFP3()) {
     AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
                                ARMBuildAttrs::AllowFPv3A);
     if (emitFPU)
@@ -740,14 +768,14 @@ void ARMAsmPrinter::emitAttributes() {
   }
 
   // Signal various FP modes.
-  if (!UnsafeFPMath) {
+  if (!TM.Options.UnsafeFPMath) {
     AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
                                ARMBuildAttrs::Allowed);
     AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
                                ARMBuildAttrs::Allowed);
   }
 
-  if (NoInfsFPMath && NoNaNsFPMath)
+  if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
     AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
                                ARMBuildAttrs::Allowed);
   else
@@ -760,7 +788,7 @@ void ARMAsmPrinter::emitAttributes() {
   AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
 
   // Hard float.  Use both S and D registers and conform to AAPCS-VFP.
-  if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+  if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) {
     AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
     AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
   }
@@ -808,7 +836,6 @@ static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
 static MCSymbolRefExpr::VariantKind
 getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
   switch (Modifier) {
-  default: llvm_unreachable("Unknown modifier!");
   case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
   case ARMCP::TLSGD:       return MCSymbolRefExpr::VK_ARM_TLSGD;
   case ARMCP::TPOFF:       return MCSymbolRefExpr::VK_ARM_TPOFF;
@@ -816,7 +843,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
   case ARMCP::GOT:         return MCSymbolRefExpr::VK_ARM_GOT;
   case ARMCP::GOTOFF:      return MCSymbolRefExpr::VK_ARM_GOTOFF;
   }
-  return MCSymbolRefExpr::VK_None;
+  llvm_unreachable("Invalid ARMCPModifier!");
 }
 
 MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
@@ -1070,7 +1097,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
   }
 
   // Try to figure out the unwinding opcode out of src / dst regs.
-  if (MI->getDesc().mayStore()) {
+  if (MI->mayStore()) {
     // Register saves.
     assert(DstReg == ARM::SP &&
            "Only stack pointer as a destination reg is supported");
@@ -1084,7 +1111,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
     switch (Opc) {
     default:
       MI->dump();
-      assert(0 && "Unsupported opcode for unwinding information");
+      llvm_unreachable("Unsupported opcode for unwinding information");
     case ARM::tPUSH:
       // Special case here: no src & dst reg, but two extra imp ops.
       StartOp = 2; NumOffset = 2;
@@ -1099,6 +1126,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
       break;
     case ARM::STR_PRE_IMM:
     case ARM::STR_PRE_REG:
+    case ARM::t2STR_PRE:
       assert(MI->getOperand(2).getReg() == ARM::SP &&
              "Only stack pointer as a source reg is supported");
       RegList.push_back(SrcReg);
@@ -1112,14 +1140,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
       switch (Opc) {
       default:
         MI->dump();
-        assert(0 && "Unsupported opcode for unwinding information");
+        llvm_unreachable("Unsupported opcode for unwinding information");
       case ARM::MOVr:
+      case ARM::tMOVr:
         Offset = 0;
         break;
       case ARM::ADDri:
         Offset = -MI->getOperand(2).getImm();
         break;
       case ARM::SUBri:
+      case ARM::t2SUBri:
         Offset = MI->getOperand(2).getImm();
         break;
       case ARM::tSUBspi:
@@ -1157,16 +1187,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
         OutStreamer.EmitPad(Offset);
       } else {
         MI->dump();
-        assert(0 && "Unsupported opcode for unwinding information");
+        llvm_unreachable("Unsupported opcode for unwinding information");
       }
     } else if (DstReg == ARM::SP) {
       // FIXME: .movsp goes here
       MI->dump();
-      assert(0 && "Unsupported opcode for unwinding information");
+      llvm_unreachable("Unsupported opcode for unwinding information");
     }
     else {
       MI->dump();
-      assert(0 && "Unsupported opcode for unwinding information");
+      llvm_unreachable("Unsupported opcode for unwinding information");
     }
   }
 }
@@ -1195,7 +1225,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   // Check for manual lowerings.
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
-  case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
+  case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass");
   case ARM::DBG_VALUE: {
     if (isVerbose() && OutStreamer.hasRawTextSupport()) {
       SmallString<128> TmpStr;
@@ -1237,7 +1267,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
   // Darwin call instructions are just normal call instructions with different
   // clobber semantics (they clobber R9).
-  case ARM::BXr9_CALL:
   case ARM::BX_CALL: {
     {
       MCInst TmpInst;
@@ -1259,7 +1288,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     return;
   }
-  case ARM::tBXr9_CALL:
   case ARM::tBX_CALL: {
     {
       MCInst TmpInst;
@@ -1282,7 +1310,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     return;
   }
-  case ARM::BMOVPCRXr9_CALL:
   case ARM::BMOVPCRX_CALL: {
     {
       MCInst TmpInst;
@@ -1310,6 +1337,58 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     return;
   }
+  case ARM::BMOVPCB_CALL: {
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Add 's' bit operand (always reg0 for this)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::Bcc);
+      const GlobalValue *GV = MI->getOperand(0).getGlobal();
+      MCSymbol *GVSym = Mang->getSymbol(GV);
+      const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::t2BMOVPCB_CALL: {
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::t2B);
+      const GlobalValue *GV = MI->getOperand(0).getGlobal();
+      MCSymbol *GVSym = Mang->getSymbol(GV);
+      const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(GVSymExpr));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
   case ARM::MOVi16_ga_pcrel:
   case ARM::t2MOVi16_ga_pcrel: {
     MCInst TmpInst;
@@ -1482,11 +1561,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     /// in the function.  The first operand is the ID# for this instruction, the
     /// second is the index into the MachineConstantPool that this is, the third
     /// is the size in bytes of this constant pool entry.
+    /// The required alignment is specified on the basic block holding this MI.
     unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
     unsigned CPIdx   = (unsigned)MI->getOperand(1).getIndex();
 
-    EmitAlignment(2);
-
     // Mark the constant pool entry as data if we're not already in a data
     // region.
     OutStreamer.EmitDataRegion();
@@ -1898,10 +1976,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     {
       MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tLDRr);
+      TmpInst.setOpcode(ARM::tLDRi);
       TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
       TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
-      TmpInst.addOperand(MCOperand::CreateReg(0));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
       // Predicate.
       TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
       TmpInst.addOperand(MCOperand::CreateReg(0));
@@ -1935,4 +2013,3 @@ extern "C" void LLVMInitializeARMAsmPrinter() {
   RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
   RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
 }
-
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 7741fc4b34e8..af3f75a0e892 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===//
+//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -73,6 +73,7 @@ public:
   virtual void EmitFunctionEntryLabel();
   void EmitStartOfAsmFile(Module &M);
   void EmitEndOfAsmFile(Module &M);
+  void EmitXXStructor(const Constant *CV);
 
   // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
   bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
@@ -106,7 +107,7 @@ public:
     if (!Subtarget->isTargetDarwin())
       return 0;
     return Subtarget->isThumb() ?
-      llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+      ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm;
   }
 
   MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 408edfc20d4c..c6280f819a4f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
+//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,10 +13,10 @@
 
 #include "ARMBaseInstrInfo.h"
 #include "ARM.h"
+#include "ARMBaseRegisterInfo.h"
 #include "ARMConstantPoolValue.h"
 #include "ARMHazardRecognizer.h"
 #include "ARMMachineFunctionInfo.h"
-#include "ARMRegisterInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
@@ -28,7 +28,6 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/BranchProbability.h"
@@ -47,7 +46,7 @@ EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
                cl::desc("Enable ARM 2-addr to 3-addr conv"));
 
 static cl::opt<bool>
-WidenVMOVS("widen-vmovs", cl::Hidden,
+WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
            cl::desc("Widen ARM vmovs to vmovd when possible"));
 
 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
@@ -147,7 +146,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
   const MCInstrDesc &MCID = MI->getDesc();
   unsigned NumOps = MCID.getNumOperands();
-  bool isLoad = !MCID.mayStore();
+  bool isLoad = !MI->mayStore();
   const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
   const MachineOperand &Base = MI->getOperand(2);
   const MachineOperand &Offset = MI->getOperand(NumOps-3);
@@ -157,9 +156,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   unsigned OffImm = MI->getOperand(NumOps-2).getImm();
   ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
   switch (AddrMode) {
-  default:
-    assert(false && "Unknown indexed op!");
-    return NULL;
+  default: llvm_unreachable("Unknown indexed op!");
   case ARMII::AddrMode2: {
     bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
     unsigned Amt = ARM_AM::getAM2Offset(OffImm);
@@ -440,6 +437,22 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   return false;
 }
 
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
+  if (MI->isBundle()) {
+    MachineBasicBlock::const_instr_iterator I = MI;
+    MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+    while (++I != E && I->isInsideBundle()) {
+      int PIdx = I->findFirstPredOperandIdx();
+      if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
+        return true;
+    }
+    return false;
+  }
+
+  int PIdx = MI->findFirstPredOperandIdx();
+  return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+}
+
 bool ARMBaseInstrInfo::
 PredicateInstruction(MachineInstr *MI,
                      const SmallVectorImpl<MachineOperand> &Pred) const {
@@ -490,15 +503,11 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
 
 bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
                                     std::vector<MachineOperand> &Pred) const {
-  // FIXME: This confuses implicit_def with optional CPSR def.
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
-    return false;
-
   bool Found = false;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+    if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
+        (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
       Pred.push_back(MO);
       Found = true;
     }
@@ -511,11 +520,10 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
 /// By default, this returns true for every instruction with a
 /// PredicateOperand.
 bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isPredicable())
+  if (!MI->isPredicable())
     return false;
 
-  if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+  if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
     ARMFunctionInfo *AFI =
       MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
     return AFI->isThumb2Function();
@@ -544,83 +552,95 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   if (MCID.getSize())
     return MCID.getSize();
 
-    // If this machine instr is an inline asm, measure it.
-    if (MI->getOpcode() == ARM::INLINEASM)
-      return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
-    if (MI->isLabel())
-      return 0;
+  // If this machine instr is an inline asm, measure it.
+  if (MI->getOpcode() == ARM::INLINEASM)
+    return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+  if (MI->isLabel())
+    return 0;
   unsigned Opc = MI->getOpcode();
-    switch (Opc) {
-    case TargetOpcode::IMPLICIT_DEF:
-    case TargetOpcode::KILL:
-    case TargetOpcode::PROLOG_LABEL:
-    case TargetOpcode::EH_LABEL:
-    case TargetOpcode::DBG_VALUE:
-      return 0;
-    case ARM::MOVi16_ga_pcrel:
-    case ARM::MOVTi16_ga_pcrel:
-    case ARM::t2MOVi16_ga_pcrel:
-    case ARM::t2MOVTi16_ga_pcrel:
-      return 4;
-    case ARM::MOVi32imm:
-    case ARM::t2MOVi32imm:
-      return 8;
-    case ARM::CONSTPOOL_ENTRY:
-      // If this machine instr is a constant pool entry, its size is recorded as
-      // operand #2.
-      return MI->getOperand(2).getImm();
-    case ARM::Int_eh_sjlj_longjmp:
-      return 16;
-    case ARM::tInt_eh_sjlj_longjmp:
-      return 10;
-    case ARM::Int_eh_sjlj_setjmp:
-    case ARM::Int_eh_sjlj_setjmp_nofp:
-      return 20;
-    case ARM::tInt_eh_sjlj_setjmp:
-    case ARM::t2Int_eh_sjlj_setjmp:
-    case ARM::t2Int_eh_sjlj_setjmp_nofp:
-      return 12;
-    case ARM::BR_JTr:
-    case ARM::BR_JTm:
-    case ARM::BR_JTadd:
-    case ARM::tBR_JTr:
-    case ARM::t2BR_JT:
-    case ARM::t2TBB_JT:
-    case ARM::t2TBH_JT: {
-      // These are jumptable branches, i.e. a branch followed by an inlined
-      // jumptable. The size is 4 + 4 * number of entries. For TBB, each
-      // entry is one byte; TBH two byte each.
-      unsigned EntrySize = (Opc == ARM::t2TBB_JT)
-        ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
-      unsigned NumOps = MCID.getNumOperands();
-      MachineOperand JTOP =
-        MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
-      unsigned JTI = JTOP.getIndex();
-      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
-      assert(MJTI != 0);
-      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
-      assert(JTI < JT.size());
-      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
-      // 4 aligned. The assembler / linker may add 2 byte padding just before
-      // the JT entries.  The size does not include this padding; the
-      // constant islands pass does separate bookkeeping for it.
-      // FIXME: If we know the size of the function is less than (1 << 16) *2
-      // bytes, we can use 16-bit entries instead. Then there won't be an
-      // alignment issue.
-      unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
-      unsigned NumEntries = getNumJTEntries(JT, JTI);
-      if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
-        // Make sure the instruction that follows TBB is 2-byte aligned.
-        // FIXME: Constant island pass should insert an "ALIGN" instruction
-        // instead.
-        ++NumEntries;
-      return NumEntries * EntrySize + InstSize;
-    }
-    default:
-      // Otherwise, pseudo-instruction sizes are zero.
-      return 0;
-    }
-  return 0; // Not reached
+  switch (Opc) {
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+  case TargetOpcode::PROLOG_LABEL:
+  case TargetOpcode::EH_LABEL:
+  case TargetOpcode::DBG_VALUE:
+    return 0;
+  case TargetOpcode::BUNDLE:
+    return getInstBundleLength(MI);
+  case ARM::MOVi16_ga_pcrel:
+  case ARM::MOVTi16_ga_pcrel:
+  case ARM::t2MOVi16_ga_pcrel:
+  case ARM::t2MOVTi16_ga_pcrel:
+    return 4;
+  case ARM::MOVi32imm:
+  case ARM::t2MOVi32imm:
+    return 8;
+  case ARM::CONSTPOOL_ENTRY:
+    // If this machine instr is a constant pool entry, its size is recorded as
+    // operand #2.
+    return MI->getOperand(2).getImm();
+  case ARM::Int_eh_sjlj_longjmp:
+    return 16;
+  case ARM::tInt_eh_sjlj_longjmp:
+    return 10;
+  case ARM::Int_eh_sjlj_setjmp:
+  case ARM::Int_eh_sjlj_setjmp_nofp:
+    return 20;
+  case ARM::tInt_eh_sjlj_setjmp:
+  case ARM::t2Int_eh_sjlj_setjmp:
+  case ARM::t2Int_eh_sjlj_setjmp_nofp:
+    return 12;
+  case ARM::BR_JTr:
+  case ARM::BR_JTm:
+  case ARM::BR_JTadd:
+  case ARM::tBR_JTr:
+  case ARM::t2BR_JT:
+  case ARM::t2TBB_JT:
+  case ARM::t2TBH_JT: {
+    // These are jumptable branches, i.e. a branch followed by an inlined
+    // jumptable. The size is 4 + 4 * number of entries. For TBB, each
+    // entry is one byte; TBH two byte each.
+    unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+      ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
+    unsigned NumOps = MCID.getNumOperands();
+    MachineOperand JTOP =
+      MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
+    unsigned JTI = JTOP.getIndex();
+    const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+    assert(MJTI != 0);
+    const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+    assert(JTI < JT.size());
+    // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+    // 4 aligned. The assembler / linker may add 2 byte padding just before
+    // the JT entries.  The size does not include this padding; the
+    // constant islands pass does separate bookkeeping for it.
+    // FIXME: If we know the size of the function is less than (1 << 16) *2
+    // bytes, we can use 16-bit entries instead. Then there won't be an
+    // alignment issue.
+    unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
+    unsigned NumEntries = getNumJTEntries(JT, JTI);
+    if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
+      // Make sure the instruction that follows TBB is 2-byte aligned.
+      // FIXME: Constant island pass should insert an "ALIGN" instruction
+      // instead.
+      ++NumEntries;
+    return NumEntries * EntrySize + InstSize;
+  }
+  default:
+    // Otherwise, pseudo-instruction sizes are zero.
+    return 0;
+  }
+}
+
+unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
+  unsigned Size = 0;
+  MachineBasicBlock::const_instr_iterator I = MI;
+  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+  while (++I != E && I->isInsideBundle()) {
+    assert(!I->isBundle() && "No nested bundle!");
+    Size += GetInstSizeInBytes(&*I);
+  }
+  return Size;
 }
 
 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -660,29 +680,51 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
-  // Generate instructions for VMOVQQ and VMOVQQQQ pseudos in place.
-  if (ARM::QQPRRegClass.contains(DestReg, SrcReg) ||
-      ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {
+  // Handle register classes that require multiple instructions.
+  unsigned BeginIdx = 0;
+  unsigned SubRegs = 0;
+  unsigned Spacing = 1;
+
+  // Use VORRq when possible.
+  if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
+  else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
+  // Fall back to VMOVD.
+  else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
+  else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
+  else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
+
+  else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
+  else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
+  else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
+
+  if (Opc) {
     const TargetRegisterInfo *TRI = &getRegisterInfo();
-    assert(ARM::qsub_0 + 3 == ARM::qsub_3 && "Expected contiguous enum.");
-    unsigned EndSubReg = ARM::QQPRRegClass.contains(DestReg, SrcReg) ?
-      ARM::qsub_1 : ARM::qsub_3;
-    for (unsigned i = ARM::qsub_0, e = EndSubReg + 1; i != e; ++i) {
-      unsigned Dst = TRI->getSubReg(DestReg, i);
-      unsigned Src = TRI->getSubReg(SrcReg, i);
-      MachineInstrBuilder Mov =
-        AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VORRq))
-                       .addReg(Dst, RegState::Define)
-                       .addReg(Src, getKillRegState(KillSrc))
-                       .addReg(Src, getKillRegState(KillSrc)));
-      if (i == EndSubReg) {
-        Mov->addRegisterDefined(DestReg, TRI);
-        if (KillSrc)
-          Mov->addRegisterKilled(SrcReg, TRI);
-      }
+    MachineInstrBuilder Mov;
+    for (unsigned i = 0; i != SubRegs; ++i) {
+      unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
+      unsigned Src = TRI->getSubReg(SrcReg,  BeginIdx + i*Spacing);
+      assert(Dst && Src && "Bad sub-register");
+      Mov = AddDefaultPred(BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
+                             .addReg(Src));
+      // VORR takes two source operands.
+      if (Opc == ARM::VORRq)
+        Mov.addReg(Src);
     }
+    // Add implicit super-register defs and kills to the last instruction.
+    Mov->addRegisterDefined(DestReg, TRI);
+    if (KillSrc)
+      Mov->addRegisterKilled(SrcReg, TRI);
     return;
   }
+
   llvm_unreachable("Impossible reg-to-reg copy");
 }
 
@@ -710,8 +752,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   unsigned Align = MFI.getObjectAlignment(FI);
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo(
-                                         PseudoSourceValue::getFixedStack(FI)),
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
                             MachineMemOperand::MOStore,
                             MFI.getObjectSize(FI),
                             Align);
@@ -738,9 +779,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
         llvm_unreachable("Unknown reg class!");
       break;
     case 16:
-      if (ARM::QPRRegClass.hasSubClassEq(RC)) {
-        if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
-          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+      if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+        // Use aligned spills if the stack can be realigned.
+        if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+          AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
                      .addFrameIndex(FI).addImm(16)
                      .addReg(SrcReg, getKillRegState(isKill))
                      .addMemOperand(MMO));
@@ -825,7 +867,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
       return MI->getOperand(0).getReg();
     }
     break;
-  case ARM::VST1q64Pseudo:
+  case ARM::VST1q64:
     if (MI->getOperand(0).isFI() &&
         MI->getOperand(2).getSubReg() == 0) {
       FrameIndex = MI->getOperand(0).getIndex();
@@ -847,7 +889,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
                                                     int &FrameIndex) const {
   const MachineMemOperand *Dummy;
-  return MI->getDesc().mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
+  return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
 }
 
 void ARMBaseInstrInfo::
@@ -862,7 +904,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   unsigned Align = MFI.getObjectAlignment(FI);
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(
-                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                    MachinePointerInfo::getFixedStack(FI),
                             MachineMemOperand::MOLoad,
                             MFI.getObjectSize(FI),
                             Align);
@@ -887,9 +929,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       llvm_unreachable("Unknown reg class!");
     break;
   case 16:
-    if (ARM::QPRRegClass.hasSubClassEq(RC)) {
-      if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
-        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+    if (ARM::DPairRegClass.hasSubClassEq(RC)) {
+      if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
                      .addFrameIndex(FI).addImm(16)
                      .addMemOperand(MMO));
       } else {
@@ -911,11 +953,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
                        .addFrameIndex(FI))
                        .addMemOperand(MMO);
-        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
-        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
-        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
-        MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
-        MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+        MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+        MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+        MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+        MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+        if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+          MIB.addReg(DestReg, RegState::ImplicitDefine);
       }
     } else
       llvm_unreachable("Unknown reg class!");
@@ -926,15 +969,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
                      .addFrameIndex(FI))
                      .addMemOperand(MMO);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
-      MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
-      MIB.addReg(DestReg, RegState::Define | RegState::Implicit);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
+      if (TargetRegisterInfo::isPhysicalRegister(DestReg))
+        MIB.addReg(DestReg, RegState::ImplicitDefine);
     } else
       llvm_unreachable("Unknown reg class!");
     break;
@@ -971,7 +1015,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
       return MI->getOperand(0).getReg();
     }
     break;
-  case ARM::VLD1q64Pseudo:
+  case ARM::VLD1q64:
     if (MI->getOperand(1).isFI() &&
         MI->getOperand(0).getSubReg() == 0) {
       FrameIndex = MI->getOperand(1).getIndex();
@@ -993,7 +1037,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
                                              int &FrameIndex) const {
   const MachineMemOperand *Dummy;
-  return MI->getDesc().mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+  return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
 }
 
 bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
@@ -1359,7 +1403,7 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
     return false;
 
   // Terminators and labels can't be scheduled around.
-  if (MI->getDesc().isTerminator() || MI->isLabel())
+  if (MI->isTerminator() || MI->isLabel())
     return true;
 
   // Treat the start of the IT block as a scheduling boundary, but schedule
@@ -1380,7 +1424,10 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
   // saves compile time, because it doesn't require every single
   // stack slot reference to depend on the instruction that does the
   // modification.
-  if (MI->definesRegister(ARM::SP))
+  // Calls don't actually change the stack pointer, even if they have imp-defs.
+  // No ARM calling conventions change the stack pointer. (X86 calling
+  // conventions sometimes do).
+  if (!MI->isCall() && MI->definesRegister(ARM::SP))
     return true;
 
   return false;
@@ -1445,15 +1492,37 @@ llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
 int llvm::getMatchingCondBranchOpcode(int Opc) {
   if (Opc == ARM::B)
     return ARM::Bcc;
-  else if (Opc == ARM::tB)
+  if (Opc == ARM::tB)
     return ARM::tBcc;
-  else if (Opc == ARM::t2B)
-      return ARM::t2Bcc;
+  if (Opc == ARM::t2B)
+    return ARM::t2Bcc;
 
   llvm_unreachable("Unknown unconditional branch opcode!");
-  return 0;
 }
 
+/// commuteInstruction - Handle commutable instructions.
+MachineInstr *
+ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+  switch (MI->getOpcode()) {
+  case ARM::MOVCCr:
+  case ARM::t2MOVCCr: {
+    // MOVCC can be commuted by inverting the condition.
+    unsigned PredReg = 0;
+    ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
+    // MOVCC AL can't be inverted. Shouldn't happen.
+    if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+      return NULL;
+    MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    if (!MI)
+      return NULL;
+    // After swapping the MOVCC operands, also invert the condition.
+    MI->getOperand(MI->findFirstPredOperandIdx())
+      .setImm(ARMCC::getOppositeCondition(CC));
+    return MI;
+  }
+  }
+  return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+}
 
 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
@@ -1478,7 +1547,6 @@ static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
   {ARM::SUBSrsr, ARM::SUBrsr},
 
   {ARM::RSBSri, ARM::RSBri},
-  {ARM::RSBSrr, ARM::RSBrr},
   {ARM::RSBSrsi, ARM::RSBrsi},
   {ARM::RSBSrsr, ARM::RSBrsr},
 
@@ -1625,7 +1693,6 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
     }
     default:
       llvm_unreachable("Unsupported addressing mode!");
-      break;
     }
 
     Offset += InstrOffs * Scale;
@@ -1765,8 +1832,7 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
 
   // Check that CPSR isn't set between the comparison instruction and the one we
   // want to change.
-  MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
-    B = MI->getParent()->begin();
+  MachineBasicBlock::iterator I = CmpInstr,E = MI, B = MI->getParent()->begin();
 
   // Early exit if CmpInstr is at the beginning of the BB.
   if (I == B) return false;
@@ -1777,6 +1843,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
 
     for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
       const MachineOperand &MO = Instr.getOperand(IO);
+      if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR))
+        return false;
       if (!MO.isReg()) continue;
 
       // This instruction modifies or uses CPSR after the one we want to
@@ -1838,6 +1906,10 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
       for (unsigned IO = 0, EO = Instr.getNumOperands();
            !isSafe && IO != EO; ++IO) {
         const MachineOperand &MO = Instr.getOperand(IO);
+        if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
+          isSafe = true;
+          break;
+        }
         if (!MO.isReg() || MO.getReg() != ARM::CPSR)
           continue;
         if (MO.isDef()) {
@@ -1889,6 +1961,25 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
   if (!MRI->hasOneNonDBGUse(Reg))
     return false;
 
+  const MCInstrDesc &DefMCID = DefMI->getDesc();
+  if (DefMCID.hasOptionalDef()) {
+    unsigned NumOps = DefMCID.getNumOperands();
+    const MachineOperand &MO = DefMI->getOperand(NumOps-1);
+    if (MO.getReg() == ARM::CPSR && !MO.isDead())
+      // If DefMI defines CPSR and it is not dead, it's obviously not safe
+      // to delete DefMI.
+      return false;
+  }
+
+  const MCInstrDesc &UseMCID = UseMI->getDesc();
+  if (UseMCID.hasOptionalDef()) {
+    unsigned NumOps = UseMCID.getNumOperands();
+    if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
+      // If the instruction sets the flag, do not attempt this optimization
+      // since it may change the semantics of the code.
+      return false;
+  }
+
   unsigned UseOpc = UseMI->getOpcode();
   unsigned NewUseOpc = 0;
   uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
@@ -1960,7 +2051,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
   bool isKill = UseMI->getOperand(OpIdx).isKill();
   unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
   AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
-                                      *UseMI, UseMI->getDebugLoc(),
+                                      UseMI, UseMI->getDebugLoc(),
                                       get(NewUseOpc), NewReg)
                               .addReg(Reg1, getKillRegState(isKill))
                               .addImm(SOImmValV1)));
@@ -1988,7 +2079,6 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
   switch (Opc) {
   default:
     llvm_unreachable("Unexpected multi-uops instruction!");
-    break;
   case ARM::VLDMQIA:
   case ARM::VSTMQIA:
     return 2;
@@ -2335,6 +2425,59 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   return UseCycle;
 }
 
+static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
+                                           const MachineInstr *MI, unsigned Reg,
+                                           unsigned &DefIdx, unsigned &Dist) {
+  Dist = 0;
+
+  MachineBasicBlock::const_iterator I = MI; ++I;
+  MachineBasicBlock::const_instr_iterator II =
+    llvm::prior(I.getInstrIterator());
+  assert(II->isInsideBundle() && "Empty bundle?");
+
+  int Idx = -1;
+  while (II->isInsideBundle()) {
+    Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
+    if (Idx != -1)
+      break;
+    --II;
+    ++Dist;
+  }
+
+  assert(Idx != -1 && "Cannot find bundled definition!");
+  DefIdx = Idx;
+  return II;
+}
+
+static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
+                                           const MachineInstr *MI, unsigned Reg,
+                                           unsigned &UseIdx, unsigned &Dist) {
+  Dist = 0;
+
+  MachineBasicBlock::const_instr_iterator II = MI; ++II;
+  assert(II->isInsideBundle() && "Empty bundle?");
+  MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+
+  // FIXME: This doesn't properly handle multiple uses.
+  int Idx = -1;
+  while (II != E && II->isInsideBundle()) {
+    Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
+    if (Idx != -1)
+      break;
+    if (II->getOpcode() != ARM::t2IT)
+      ++Dist;
+    ++II;
+  }
+
+  if (Idx == -1) {
+    Dist = 0;
+    return 0;
+  }
+
+  UseIdx = Idx;
+  return II;
+}
+
 int
 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
                              const MachineInstr *DefMI, unsigned DefIdx,
@@ -2343,35 +2486,77 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
       DefMI->isRegSequence() || DefMI->isImplicitDef())
     return 1;
 
-  const MCInstrDesc &DefMCID = DefMI->getDesc();
   if (!ItinData || ItinData->isEmpty())
-    return DefMCID.mayLoad() ? 3 : 1;
+    return DefMI->mayLoad() ? 3 : 1;
 
-  const MCInstrDesc &UseMCID = UseMI->getDesc();
+  const MCInstrDesc *DefMCID = &DefMI->getDesc();
+  const MCInstrDesc *UseMCID = &UseMI->getDesc();
   const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
-  if (DefMO.getReg() == ARM::CPSR) {
+  unsigned Reg = DefMO.getReg();
+  if (Reg == ARM::CPSR) {
     if (DefMI->getOpcode() == ARM::FMSTAT) {
       // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
       return Subtarget.isCortexA9() ? 1 : 20;
     }
 
     // CPSR set and branch can be paired in the same cycle.
-    if (UseMCID.isBranch())
+    if (UseMI->isBranch())
       return 0;
+
+    // Otherwise it takes the instruction latency (generally one).
+    int Latency = getInstrLatency(ItinData, DefMI);
+
+    // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
+    // its uses. Instructions which are otherwise scheduled between them may
+    // incur a code size penalty (not able to use the CPSR setting 16-bit
+    // instructions).
+    if (Latency > 0 && Subtarget.isThumb2()) {
+      const MachineFunction *MF = DefMI->getParent()->getParent();
+      if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+        --Latency;
+    }
+    return Latency;
   }
 
   unsigned DefAlign = DefMI->hasOneMemOperand()
     ? (*DefMI->memoperands_begin())->getAlignment() : 0;
   unsigned UseAlign = UseMI->hasOneMemOperand()
     ? (*UseMI->memoperands_begin())->getAlignment() : 0;
-  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
-                                  UseMCID, UseIdx, UseAlign);
+
+  unsigned DefAdj = 0;
+  if (DefMI->isBundle()) {
+    DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
+    if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+        DefMI->isRegSequence() || DefMI->isImplicitDef())
+      return 1;
+    DefMCID = &DefMI->getDesc();
+  }
+  unsigned UseAdj = 0;
+  if (UseMI->isBundle()) {
+    unsigned NewUseIdx;
+    const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
+                                                   Reg, NewUseIdx, UseAdj);
+    if (NewUseMI) {
+      UseMI = NewUseMI;
+      UseIdx = NewUseIdx;
+      UseMCID = &UseMI->getDesc();
+    }
+  }
+
+  int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
+                                  *UseMCID, UseIdx, UseAlign);
+  int Adj = DefAdj + UseAdj;
+  if (Adj) {
+    Latency -= (int)(DefAdj + UseAdj);
+    if (Latency < 1)
+      return 1;
+  }
 
   if (Latency > 1 &&
       (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
     // variants are one cycle cheaper.
-    switch (DefMCID.getOpcode()) {
+    switch (DefMCID->getOpcode()) {
     default: break;
     case ARM::LDRrs:
     case ARM::LDRBrs: {
@@ -2396,28 +2581,38 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   }
 
   if (DefAlign < 8 && Subtarget.isCortexA9())
-    switch (DefMCID.getOpcode()) {
+    switch (DefMCID->getOpcode()) {
     default: break;
     case ARM::VLD1q8:
     case ARM::VLD1q16:
     case ARM::VLD1q32:
     case ARM::VLD1q64:
-    case ARM::VLD1q8_UPD:
-    case ARM::VLD1q16_UPD:
-    case ARM::VLD1q32_UPD:
-    case ARM::VLD1q64_UPD:
+    case ARM::VLD1q8wb_fixed:
+    case ARM::VLD1q16wb_fixed:
+    case ARM::VLD1q32wb_fixed:
+    case ARM::VLD1q64wb_fixed:
+    case ARM::VLD1q8wb_register:
+    case ARM::VLD1q16wb_register:
+    case ARM::VLD1q32wb_register:
+    case ARM::VLD1q64wb_register:
     case ARM::VLD2d8:
     case ARM::VLD2d16:
     case ARM::VLD2d32:
     case ARM::VLD2q8:
     case ARM::VLD2q16:
     case ARM::VLD2q32:
-    case ARM::VLD2d8_UPD:
-    case ARM::VLD2d16_UPD:
-    case ARM::VLD2d32_UPD:
-    case ARM::VLD2q8_UPD:
-    case ARM::VLD2q16_UPD:
-    case ARM::VLD2q32_UPD:
+    case ARM::VLD2d8wb_fixed:
+    case ARM::VLD2d16wb_fixed:
+    case ARM::VLD2d32wb_fixed:
+    case ARM::VLD2q8wb_fixed:
+    case ARM::VLD2q16wb_fixed:
+    case ARM::VLD2q32wb_fixed:
+    case ARM::VLD2d8wb_register:
+    case ARM::VLD2d16wb_register:
+    case ARM::VLD2d32wb_register:
+    case ARM::VLD2q8wb_register:
+    case ARM::VLD2q16wb_register:
+    case ARM::VLD2q32wb_register:
     case ARM::VLD3d8:
     case ARM::VLD3d16:
     case ARM::VLD3d32:
@@ -2425,7 +2620,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD3d8_UPD:
     case ARM::VLD3d16_UPD:
     case ARM::VLD3d32_UPD:
-    case ARM::VLD1d64T_UPD:
+    case ARM::VLD1d64Twb_fixed:
+    case ARM::VLD1d64Twb_register:
     case ARM::VLD3q8_UPD:
     case ARM::VLD3q16_UPD:
     case ARM::VLD3q32_UPD:
@@ -2436,22 +2632,29 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD4d8_UPD:
     case ARM::VLD4d16_UPD:
     case ARM::VLD4d32_UPD:
-    case ARM::VLD1d64Q_UPD:
+    case ARM::VLD1d64Qwb_fixed:
+    case ARM::VLD1d64Qwb_register:
     case ARM::VLD4q8_UPD:
     case ARM::VLD4q16_UPD:
     case ARM::VLD4q32_UPD:
     case ARM::VLD1DUPq8:
     case ARM::VLD1DUPq16:
     case ARM::VLD1DUPq32:
-    case ARM::VLD1DUPq8_UPD:
-    case ARM::VLD1DUPq16_UPD:
-    case ARM::VLD1DUPq32_UPD:
+    case ARM::VLD1DUPq8wb_fixed:
+    case ARM::VLD1DUPq16wb_fixed:
+    case ARM::VLD1DUPq32wb_fixed:
+    case ARM::VLD1DUPq8wb_register:
+    case ARM::VLD1DUPq16wb_register:
+    case ARM::VLD1DUPq32wb_register:
     case ARM::VLD2DUPd8:
     case ARM::VLD2DUPd16:
     case ARM::VLD2DUPd32:
-    case ARM::VLD2DUPd8_UPD:
-    case ARM::VLD2DUPd16_UPD:
-    case ARM::VLD2DUPd32_UPD:
+    case ARM::VLD2DUPd8wb_fixed:
+    case ARM::VLD2DUPd16wb_fixed:
+    case ARM::VLD2DUPd32wb_fixed:
+    case ARM::VLD2DUPd8wb_register:
+    case ARM::VLD2DUPd16wb_register:
+    case ARM::VLD2DUPd32wb_register:
     case ARM::VLD4DUPd8:
     case ARM::VLD4DUPd16:
     case ARM::VLD4DUPd32:
@@ -2559,26 +2762,36 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   if (DefAlign < 8 && Subtarget.isCortexA9())
     switch (DefMCID.getOpcode()) {
     default: break;
-    case ARM::VLD1q8Pseudo:
-    case ARM::VLD1q16Pseudo:
-    case ARM::VLD1q32Pseudo:
-    case ARM::VLD1q64Pseudo:
-    case ARM::VLD1q8Pseudo_UPD:
-    case ARM::VLD1q16Pseudo_UPD:
-    case ARM::VLD1q32Pseudo_UPD:
-    case ARM::VLD1q64Pseudo_UPD:
-    case ARM::VLD2d8Pseudo:
-    case ARM::VLD2d16Pseudo:
-    case ARM::VLD2d32Pseudo:
+    case ARM::VLD1q8:
+    case ARM::VLD1q16:
+    case ARM::VLD1q32:
+    case ARM::VLD1q64:
+    case ARM::VLD1q8wb_register:
+    case ARM::VLD1q16wb_register:
+    case ARM::VLD1q32wb_register:
+    case ARM::VLD1q64wb_register:
+    case ARM::VLD1q8wb_fixed:
+    case ARM::VLD1q16wb_fixed:
+    case ARM::VLD1q32wb_fixed:
+    case ARM::VLD1q64wb_fixed:
+    case ARM::VLD2d8:
+    case ARM::VLD2d16:
+    case ARM::VLD2d32:
     case ARM::VLD2q8Pseudo:
     case ARM::VLD2q16Pseudo:
     case ARM::VLD2q32Pseudo:
-    case ARM::VLD2d8Pseudo_UPD:
-    case ARM::VLD2d16Pseudo_UPD:
-    case ARM::VLD2d32Pseudo_UPD:
-    case ARM::VLD2q8Pseudo_UPD:
-    case ARM::VLD2q16Pseudo_UPD:
-    case ARM::VLD2q32Pseudo_UPD:
+    case ARM::VLD2d8wb_fixed:
+    case ARM::VLD2d16wb_fixed:
+    case ARM::VLD2d32wb_fixed:
+    case ARM::VLD2q8PseudoWB_fixed:
+    case ARM::VLD2q16PseudoWB_fixed:
+    case ARM::VLD2q32PseudoWB_fixed:
+    case ARM::VLD2d8wb_register:
+    case ARM::VLD2d16wb_register:
+    case ARM::VLD2d32wb_register:
+    case ARM::VLD2q8PseudoWB_register:
+    case ARM::VLD2q16PseudoWB_register:
+    case ARM::VLD2q32PseudoWB_register:
     case ARM::VLD3d8Pseudo:
     case ARM::VLD3d16Pseudo:
     case ARM::VLD3d32Pseudo:
@@ -2586,7 +2799,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD3d8Pseudo_UPD:
     case ARM::VLD3d16Pseudo_UPD:
     case ARM::VLD3d32Pseudo_UPD:
-    case ARM::VLD1d64TPseudo_UPD:
     case ARM::VLD3q8Pseudo_UPD:
     case ARM::VLD3q16Pseudo_UPD:
     case ARM::VLD3q32Pseudo_UPD:
@@ -2603,7 +2815,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD4d8Pseudo_UPD:
     case ARM::VLD4d16Pseudo_UPD:
     case ARM::VLD4d32Pseudo_UPD:
-    case ARM::VLD1d64QPseudo_UPD:
     case ARM::VLD4q8Pseudo_UPD:
     case ARM::VLD4q16Pseudo_UPD:
     case ARM::VLD4q32Pseudo_UPD:
@@ -2613,18 +2824,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     case ARM::VLD4q8oddPseudo_UPD:
     case ARM::VLD4q16oddPseudo_UPD:
     case ARM::VLD4q32oddPseudo_UPD:
-    case ARM::VLD1DUPq8Pseudo:
-    case ARM::VLD1DUPq16Pseudo:
-    case ARM::VLD1DUPq32Pseudo:
-    case ARM::VLD1DUPq8Pseudo_UPD:
-    case ARM::VLD1DUPq16Pseudo_UPD:
-    case ARM::VLD1DUPq32Pseudo_UPD:
-    case ARM::VLD2DUPd8Pseudo:
-    case ARM::VLD2DUPd16Pseudo:
-    case ARM::VLD2DUPd32Pseudo:
-    case ARM::VLD2DUPd8Pseudo_UPD:
-    case ARM::VLD2DUPd16Pseudo_UPD:
-    case ARM::VLD2DUPd32Pseudo_UPD:
+    case ARM::VLD1DUPq8:
+    case ARM::VLD1DUPq16:
+    case ARM::VLD1DUPq32:
+    case ARM::VLD1DUPq8wb_fixed:
+    case ARM::VLD1DUPq16wb_fixed:
+    case ARM::VLD1DUPq32wb_fixed:
+    case ARM::VLD1DUPq8wb_register:
+    case ARM::VLD1DUPq16wb_register:
+    case ARM::VLD1DUPq32wb_register:
+    case ARM::VLD2DUPd8:
+    case ARM::VLD2DUPd16:
+    case ARM::VLD2DUPd32:
+    case ARM::VLD2DUPd8wb_fixed:
+    case ARM::VLD2DUPd16wb_fixed:
+    case ARM::VLD2DUPd32wb_fixed:
+    case ARM::VLD2DUPd8wb_register:
+    case ARM::VLD2DUPd16wb_register:
+    case ARM::VLD2DUPd32wb_register:
     case ARM::VLD4DUPd8Pseudo:
     case ARM::VLD4DUPd16Pseudo:
     case ARM::VLD4DUPd32Pseudo:
@@ -2666,6 +2883,19 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   return Latency;
 }
 
+unsigned
+ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
+                                   const MachineInstr *DefMI, unsigned DefIdx,
+                                   const MachineInstr *DepMI) const {
+  unsigned Reg = DefMI->getOperand(DefIdx).getReg();
+  if (DepMI->readsRegister(Reg, &getRegisterInfo()) || !isPredicated(DepMI))
+    return 1;
+
+  // If the second MI is predicated, then there is an implicit use dependency.
+  return getOperandLatency(ItinData, DefMI, DefIdx, DepMI,
+                           DepMI->getNumOperands());
+}
+
 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
                                       const MachineInstr *MI,
                                       unsigned *PredCost) const {
@@ -2676,10 +2906,21 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   if (!ItinData || ItinData->isEmpty())
     return 1;
 
+  if (MI->isBundle()) {
+    int Latency = 0;
+    MachineBasicBlock::const_instr_iterator I = MI;
+    MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
+    while (++I != E && I->isInsideBundle()) {
+      if (I->getOpcode() != ARM::t2IT)
+        Latency += getInstrLatency(ItinData, I, PredCost);
+    }
+    return Latency;
+  }
+
   const MCInstrDesc &MCID = MI->getDesc();
   unsigned Class = MCID.getSchedClass();
   unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
-  if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR))
+  if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)))
     // When predicated, CPSR is an additional source operand for CPSR updating
     // instructions, this apparently increases their latencies.
     *PredCost = 1;
@@ -2828,3 +3069,7 @@ ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
   // This will go before any implicit ops.
   AddDefaultPred(MachineInstrBuilder(MI).addOperand(MI->getOperand(1)));
 }
+
+bool ARMBaseInstrInfo::hasNOP() const {
+  return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0f9f32179a31..2fe85072a330 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
+//===-- ARMBaseInstrInfo.h - ARM Base Instruction Information ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -35,6 +35,9 @@ protected:
   explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
 
 public:
+  // Return whether the target has an explicit NOP encoding.
+  bool hasNOP() const;
+
   // Return the non-pre/post incrementing version of 'Opc'. Return 0
   // if there is not such an opcode.
   virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
@@ -69,10 +72,7 @@ public:
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
   // Predication support.
-  bool isPredicated(const MachineInstr *MI) const {
-    int PIdx = MI->findFirstPredOperandIdx();
-    return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
-  }
+  bool isPredicated(const MachineInstr *MI) const;
 
   ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
     int PIdx = MI->findFirstPredOperandIdx();
@@ -139,6 +139,8 @@ public:
 
   MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
 
+  MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
+
   virtual bool produceSameValue(const MachineInstr *MI0,
                                 const MachineInstr *MI1,
                                 const MachineRegisterInfo *MRI) const;
@@ -213,12 +215,18 @@ public:
                         SDNode *DefNode, unsigned DefIdx,
                         SDNode *UseNode, unsigned UseIdx) const;
 
+  virtual unsigned getOutputLatency(const InstrItineraryData *ItinData,
+                                    const MachineInstr *DefMI, unsigned DefIdx,
+                                    const MachineInstr *DepMI) const;
+
   /// VFP/NEON execution domains.
   std::pair<uint16_t, uint16_t>
   getExecutionDomain(const MachineInstr *MI) const;
   void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
 
 private:
+  unsigned getInstBundleLength(const MachineInstr *MI) const;
+
   int getVLDMDefCycle(const InstrItineraryData *ItinData,
                       const MCInstrDesc &DefMCID,
                       unsigned DefClass,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 154f1f8ff997..3907f7535260 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===//
+//===-- ARMBaseRegisterInfo.cpp - ARM Register Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,11 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMBaseRegisterInfo.h"
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
-#include "ARMBaseRegisterInfo.h"
 #include "ARMFrameLowering.h"
-#include "ARMInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
@@ -61,41 +60,14 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
     BasePtr(ARM::R6) {
 }
 
-const unsigned*
+const uint16_t*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  bool ghcCall = false;
-
-  if (MF) {
-    const Function *F = MF->getFunction();
-    ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
-  }
-
-  static const unsigned CalleeSavedRegs[] = {
-    ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
-    ARM::R7, ARM::R6,  ARM::R5,  ARM::R4,
-
-    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
-    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
-    0
-  };
-
-  static const unsigned DarwinCalleeSavedRegs[] = {
-    // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved
-    // register.
-    ARM::LR,  ARM::R7,  ARM::R6, ARM::R5, ARM::R4,
-    ARM::R11, ARM::R10, ARM::R8,
-
-    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
-    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
-    0
-  };
-
-  static const unsigned GhcCalleeSavedRegs[] = {
-    0
-  };
+  return (STI.isTargetIOS()) ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
+}
 
-  return ghcCall ? GhcCalleeSavedRegs :
-         STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+const uint32_t*
+ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+  return (STI.isTargetIOS()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
 }
 
 BitVector ARMBaseRegisterInfo::
@@ -148,104 +120,6 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
   return false;
 }
 
-const TargetRegisterClass *
-ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
-                                              const TargetRegisterClass *B,
-                                              unsigned SubIdx) const {
-  switch (SubIdx) {
-  default: return 0;
-  case ARM::ssub_0:
-  case ARM::ssub_1:
-  case ARM::ssub_2:
-  case ARM::ssub_3: {
-    // S sub-registers.
-    if (A->getSize() == 8) {
-      if (B == &ARM::SPR_8RegClass)
-        return &ARM::DPR_8RegClass;
-      assert(B == &ARM::SPRRegClass && "Expecting SPR register class!");
-      if (A == &ARM::DPR_8RegClass)
-        return A;
-      return &ARM::DPR_VFP2RegClass;
-    }
-
-    if (A->getSize() == 16) {
-      if (B == &ARM::SPR_8RegClass)
-        return &ARM::QPR_8RegClass;
-      return &ARM::QPR_VFP2RegClass;
-    }
-
-    if (A->getSize() == 32) {
-      if (B == &ARM::SPR_8RegClass)
-        return 0;  // Do not allow coalescing!
-      return &ARM::QQPR_VFP2RegClass;
-    }
-
-    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
-    return 0;  // Do not allow coalescing!
-  }
-  case ARM::dsub_0:
-  case ARM::dsub_1:
-  case ARM::dsub_2:
-  case ARM::dsub_3: {
-    // D sub-registers.
-    if (A->getSize() == 16) {
-      if (B == &ARM::DPR_VFP2RegClass)
-        return &ARM::QPR_VFP2RegClass;
-      if (B == &ARM::DPR_8RegClass)
-        return 0;  // Do not allow coalescing!
-      return A;
-    }
-
-    if (A->getSize() == 32) {
-      if (B == &ARM::DPR_VFP2RegClass)
-        return &ARM::QQPR_VFP2RegClass;
-      if (B == &ARM::DPR_8RegClass)
-        return 0;  // Do not allow coalescing!
-      return A;
-    }
-
-    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
-    if (B != &ARM::DPRRegClass)
-      return 0;  // Do not allow coalescing!
-    return A;
-  }
-  case ARM::dsub_4:
-  case ARM::dsub_5:
-  case ARM::dsub_6:
-  case ARM::dsub_7: {
-    // D sub-registers of QQQQ registers.
-    if (A->getSize() == 64 && B == &ARM::DPRRegClass)
-      return A;
-    return 0;  // Do not allow coalescing!
-  }
-
-  case ARM::qsub_0:
-  case ARM::qsub_1: {
-    // Q sub-registers.
-    if (A->getSize() == 32) {
-      if (B == &ARM::QPR_VFP2RegClass)
-        return &ARM::QQPR_VFP2RegClass;
-      if (B == &ARM::QPR_8RegClass)
-        return 0;  // Do not allow coalescing!
-      return A;
-    }
-
-    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
-    if (B == &ARM::QPRRegClass)
-      return A;
-    return 0;  // Do not allow coalescing!
-  }
-  case ARM::qsub_2:
-  case ARM::qsub_3: {
-    // Q sub-registers of QQQQ registers.
-    if (A->getSize() == 64 && B == &ARM::QPRRegClass)
-      return A;
-    return 0;  // Do not allow coalescing!
-  }
-  }
-  return 0;
-}
-
 bool
 ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
                                           SmallVectorImpl<unsigned> &SubIndices,
@@ -416,7 +290,7 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 
 /// getRawAllocationOrder - Returns the register allocation order for a
 /// specified register class with a target-dependent hint.
-ArrayRef<unsigned>
+ArrayRef<uint16_t>
 ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
                                            unsigned HintType, unsigned HintReg,
                                            const MachineFunction &MF) const {
@@ -425,71 +299,71 @@ ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
   // of register pairs.
 
   // No FP, R9 is available.
-  static const unsigned GPREven1[] = {
+  static const uint16_t GPREven1[] = {
     ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
     ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
     ARM::R9, ARM::R11
   };
-  static const unsigned GPROdd1[] = {
+  static const uint16_t GPROdd1[] = {
     ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
     ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
     ARM::R8, ARM::R10
   };
 
   // FP is R7, R9 is available.
-  static const unsigned GPREven2[] = {
+  static const uint16_t GPREven2[] = {
     ARM::R0, ARM::R2, ARM::R4,          ARM::R8, ARM::R10,
     ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
     ARM::R9, ARM::R11
   };
-  static const unsigned GPROdd2[] = {
+  static const uint16_t GPROdd2[] = {
     ARM::R1, ARM::R3, ARM::R5,          ARM::R9, ARM::R11,
     ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
     ARM::R8, ARM::R10
   };
 
   // FP is R11, R9 is available.
-  static const unsigned GPREven3[] = {
+  static const uint16_t GPREven3[] = {
     ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
     ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
     ARM::R9
   };
-  static const unsigned GPROdd3[] = {
+  static const uint16_t GPROdd3[] = {
     ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
     ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
     ARM::R8
   };
 
   // No FP, R9 is not available.
-  static const unsigned GPREven4[] = {
+  static const uint16_t GPREven4[] = {
     ARM::R0, ARM::R2, ARM::R4, ARM::R6,          ARM::R10,
     ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
     ARM::R11
   };
-  static const unsigned GPROdd4[] = {
+  static const uint16_t GPROdd4[] = {
     ARM::R1, ARM::R3, ARM::R5, ARM::R7,          ARM::R11,
     ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
     ARM::R10
   };
 
   // FP is R7, R9 is not available.
-  static const unsigned GPREven5[] = {
+  static const uint16_t GPREven5[] = {
     ARM::R0, ARM::R2, ARM::R4,                   ARM::R10,
     ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
     ARM::R11
   };
-  static const unsigned GPROdd5[] = {
+  static const uint16_t GPROdd5[] = {
     ARM::R1, ARM::R3, ARM::R5,                   ARM::R11,
     ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
     ARM::R10
   };
 
   // FP is R11, R9 is not available.
-  static const unsigned GPREven6[] = {
+  static const uint16_t GPREven6[] = {
     ARM::R0, ARM::R2, ARM::R4, ARM::R6,
     ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
   };
-  static const unsigned GPROdd6[] = {
+  static const uint16_t GPROdd6[] = {
     ARM::R1, ARM::R3, ARM::R5, ARM::R7,
     ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
   };
@@ -610,11 +484,15 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const {
 bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
   if (!EnableBasePointer)
     return false;
 
-  if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+  // When outgoing call frames are so large that we adjust the stack pointer
+  // around the call, we can no longer use the stack pointer to reach the
+  // emergency spill slot.
+  if (needsStackRealignment(MF) && !TFI->hasReservedCallFrame(MF))
     return true;
 
   // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
@@ -638,14 +516,29 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
 }
 
 bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineRegisterInfo *MRI = &MF.getRegInfo();
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   // We can't realign the stack if:
   // 1. Dynamic stack realignment is explicitly disabled,
   // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
   // 3. There are VLAs in the function and the base pointer is disabled.
-  return (RealignStack && !AFI->isThumb1OnlyFunction() &&
-          (!MFI->hasVarSizedObjects() || EnableBasePointer));
+  if (!MF.getTarget().Options.RealignStack)
+    return false;
+  if (AFI->isThumb1OnlyFunction())
+    return false;
+  // Stack realignment requires a frame pointer.  If we already started
+  // register allocation with frame pointer elimination, it is too late now.
+  if (!MRI->canReserveReg(FramePtr))
+    return false;
+  // We may also need a base pointer if there are dynamic allocas or stack
+  // pointer adjustments around calls.
+  if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF))
+    return true;
+  if (!EnableBasePointer)
+    return false;
+  // A base pointer is required and allowed.  Check that it isn't too late to
+  // reserve it.
+  return MRI->canReserveReg(BasePtr);
 }
 
 bool ARMBaseRegisterInfo::
@@ -653,7 +546,7 @@ needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
   unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
-  bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
                                F->hasFnAttr(Attribute::StackAlignment));
 
   return requiresRealignment && canRealignStack(MF);
@@ -662,7 +555,7 @@ needsStackRealignment(const MachineFunction &MF) const {
 bool ARMBaseRegisterInfo::
 cannotEliminateFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+  if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
     return true;
   return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
     || needsStackRealignment(MF);
@@ -679,12 +572,10 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 
 unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
   llvm_unreachable("What is the exception register");
-  return 0;
 }
 
 unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
   llvm_unreachable("What is the exception handler register");
-  return 0;
 }
 
 unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
@@ -892,7 +783,7 @@ int64_t ARMBaseRegisterInfo::
 getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
   const MCInstrDesc &Desc = MI->getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
-  int64_t InstrOffs = 0;;
+  int64_t InstrOffs = 0;
   int Scale = 1;
   unsigned ImmIdx = 0;
   switch (AddrMode) {
@@ -933,7 +824,6 @@ getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
   }
   default:
     llvm_unreachable("Unsupported addressing mode!");
-    break;
   }
 
   return InstrOffs * Scale;
@@ -1129,7 +1019,6 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
     break;
   default:
     llvm_unreachable("Unsupported addressing mode!");
-    break;
   }
 
   Offset += getFrameIndexInstrOffset(MI, i);
@@ -1171,6 +1060,21 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
 
+  // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the
+  // call frame setup/destroy instructions have already been eliminated.  That
+  // means the stack pointer cannot be used to access the emergency spill slot
+  // when !hasReservedCallFrame().
+#ifndef NDEBUG
+  if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+    assert(TFI->hasReservedCallFrame(MF) &&
+           "Cannot use SP to access the emergency spill slot in "
+           "functions without a reserved call frame");
+    assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+           "Cannot use SP to access the emergency spill slot in "
+           "functions with variable sized frame objects");
+  }
+#endif // NDEBUG
+
   // Special handling of dbg_value instructions.
   if (MI.isDebugValue()) {
     MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index fee17ff3c1ca..af7935147e48 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===//
+//===-- ARMBaseRegisterInfo.h - ARM Register Information Impl ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -35,7 +35,7 @@ namespace ARMRI {
 
 /// isARMArea1Register - Returns true if the register is a low register (r0-r7)
 /// or a stack/pc register that we should push/pop.
-static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
+static inline bool isARMArea1Register(unsigned Reg, bool isIOS) {
   using namespace ARM;
   switch (Reg) {
     case R0:  case R1:  case R2:  case R3:
@@ -43,25 +43,25 @@ static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
     case LR:  case SP:  case PC:
       return true;
     case R8:  case R9:  case R10: case R11:
-      // For darwin we want r7 and lr to be next to each other.
-      return !isDarwin;
+      // For iOS we want r7 and lr to be next to each other.
+      return !isIOS;
     default:
       return false;
   }
 }
 
-static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) {
+static inline bool isARMArea2Register(unsigned Reg, bool isIOS) {
   using namespace ARM;
   switch (Reg) {
     case R8: case R9: case R10: case R11:
-      // Darwin has this second area.
-      return isDarwin;
+      // iOS has this second area.
+      return isIOS;
     default:
       return false;
   }
 }
 
-static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) {
+static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
   using namespace ARM;
   switch (Reg) {
     case D15: case D14: case D13: case D12:
@@ -94,17 +94,11 @@ protected:
 
 public:
   /// Code Generation virtual methods...
-  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+  const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+  const uint32_t *getCallPreservedMask(CallingConv::ID) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
-  /// getMatchingSuperRegClass - Return a subclass of the specified register
-  /// class A so that each register in it has a sub-register of the
-  /// specified sub-register index which is in the specified register class B.
-  virtual const TargetRegisterClass *
-  getMatchingSuperRegClass(const TargetRegisterClass *A,
-                           const TargetRegisterClass *B, unsigned Idx) const;
-
   /// canCombineSubRegIndices - Given a register class and a list of
   /// subregister indices, return true if it's possible to combine the
   /// subregister indices into one that corresponds to a larger
@@ -125,7 +119,7 @@ public:
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const;
 
-  ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC,
+  ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC,
                                            unsigned HintType, unsigned HintReg,
                                            const MachineFunction &MF) const;
 
diff --git a/lib/Target/ARM/ARMBuildAttrs.h b/lib/Target/ARM/ARMBuildAttrs.h
index 69eddf03ec94..11bd6a4a8dbc 100644
--- a/lib/Target/ARM/ARMBuildAttrs.h
+++ b/lib/Target/ARM/ARMBuildAttrs.h
@@ -1,4 +1,4 @@
-//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//===-- ARMBuildAttrs.h - ARM Build Attributes ------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index ff7db1ff62ed..0bd1c3ee2feb 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -1,4 +1,4 @@
-//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===//
+//=== ARMCallingConv.h - ARM Custom Calling Convention Routines -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,13 +15,12 @@
 #ifndef ARMCALLINGCONV_H
 #define ARMCALLINGCONV_H
 
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMSubtarget.h"
 #include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
-#include "ARMSubtarget.h"
-#include "ARM.h"
 
 namespace llvm {
 
@@ -29,7 +28,7 @@ namespace llvm {
 static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                           CCValAssign::LocInfo &LocInfo,
                           CCState &State, bool CanFail) {
-  static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+  static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
 
   // Try to get the first register.
   if (unsigned Reg = State.AllocateReg(RegList, 4))
@@ -72,9 +71,9 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                            CCValAssign::LocInfo &LocInfo,
                            CCState &State, bool CanFail) {
-  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
-  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
-  static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+  static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+  static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
+  static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
 
   unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
   if (Reg == 0) {
@@ -118,8 +117,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 
 static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                          CCValAssign::LocInfo &LocInfo, CCState &State) {
-  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
-  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+  static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
+  static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
 
   unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
   if (Reg == 0)
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 47b2e9829834..d33364bb2871 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -1,4 +1,4 @@
-//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
+//===-- ARMCallingConv.td - Calling Conventions for ARM ----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -25,7 +25,7 @@ def CC_ARM_APCS : CallingConv<[
   // Handles byval parameters.
   CCIfByVal<CCPassByVal<4, 4>>,
     
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // Handle all vector types as either f64 or v2f64.
   CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
@@ -43,6 +43,7 @@ def CC_ARM_APCS : CallingConv<[
 ]>;
 
 def RetCC_ARM_APCS : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
   CCIfType<[f32], CCBitConvertToType<i32>>,
 
   // Handle all vector types as either f64 or v2f64.
@@ -82,25 +83,6 @@ def RetFastCC_ARM_APCS : CallingConv<[
   CCDelegateTo<RetCC_ARM_APCS>
 ]>;
 
-//===----------------------------------------------------------------------===//
-// ARM APCS Calling Convention for GHC
-//===----------------------------------------------------------------------===//
-
-def CC_ARM_APCS_GHC : CallingConv<[
-  // Handle all vector types as either f64 or v2f64.
-  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
-  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
-
-  CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
-  CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>,
-  CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>,
-
-  // Promote i8/i16 arguments to i32.
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
-
-  // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, SpLim
-  CCIfType<[i32], CCAssignToReg<[R4, R5, R6, R7, R8, R9, R10, R11]>>
-]>;
 
 //===----------------------------------------------------------------------===//
 // ARM AAPCS (EABI) Calling Convention, common parts
@@ -108,7 +90,7 @@ def CC_ARM_APCS_GHC : CallingConv<[
 
 def CC_ARM_AAPCS_Common : CallingConv<[
 
-  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // i64/f64 is passed in even pairs of GPRs
   // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
@@ -125,6 +107,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
 ]>;
 
 def RetCC_ARM_AAPCS_Common : CallingConv<[
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
   CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
   CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
 ]>;
@@ -181,3 +164,14 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
                                  S9, S10, S11, S12, S13, S14, S15]>>,
   CCDelegateTo<RetCC_ARM_AAPCS_Common>
 ]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
+                                     (sequence "D%u", 15, 8))>;
+
+// iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
+// Also save R7-R4 first to match the stack frame fixed spill areas.
+def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 4148d4ab10e9..32ef345c058f 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -15,7 +15,7 @@
 #define DEBUG_TYPE "jit"
 #include "ARM.h"
 #include "ARMConstantPoolValue.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
 #include "ARMRelocations.h"
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
@@ -46,7 +46,7 @@ namespace {
 
   class ARMCodeEmitter : public MachineFunctionPass {
     ARMJITInfo                *JTI;
-    const ARMInstrInfo        *II;
+    const ARMBaseInstrInfo    *II;
     const TargetData          *TD;
     const ARMSubtarget        *Subtarget;
     TargetMachine             &TM;
@@ -66,7 +66,7 @@ namespace {
   public:
     ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
       : MachineFunctionPass(ID), JTI(0),
-        II((const ARMInstrInfo *)tm.getInstrInfo()),
+        II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
         TD(tm.getTargetData()), TM(tm),
         MCE(mce), MCPEs(0), MJTEs(0),
         IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
@@ -74,7 +74,7 @@ namespace {
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
     /// machine instructions.
-    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+    uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
 
     bool runOnMachineFunction(MachineFunction &MF);
 
@@ -189,6 +189,8 @@ namespace {
       unsigned Op) const { return 0; }
     unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
       const { return 0; }
+    unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
     unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
       const { return 0; }
     unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
@@ -366,9 +368,9 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
   assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
           MF.getTarget().getRelocationModel() != Reloc::Static) &&
          "JIT relocation model must be set to static or default!");
-  JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo();
-  II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo();
-  TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData();
+  JTI = ((ARMBaseTargetMachine &)MF.getTarget()).getJITInfo();
+  II = (const ARMBaseInstrInfo *)MF.getTarget().getInstrInfo();
+  TD = MF.getTarget().getTargetData();
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
   MCPEs = &MF.getConstantPool()->getConstants();
   MJTEs = 0;
@@ -386,7 +388,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
     for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
          MBB != E; ++MBB) {
       MCE.StartMachineBasicBlock(MBB);
-      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
            I != E; ++I)
         emitInstruction(*I);
     }
@@ -406,7 +408,6 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
   case ARM_AM::ror:
   case ARM_AM::rrx: return 3;
   }
-  return 0;
 }
 
 /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
@@ -532,7 +533,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
   switch (MI.getDesc().TSFlags & ARMII::FormMask) {
   default: {
     llvm_unreachable("Unhandled instruction encoding format!");
-    break;
   }
   case ARMII::MiscFrm:
     if (MI.getOpcode() == ARM::LEApcrelJT) {
@@ -541,7 +541,6 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
       break;
     }
     llvm_unreachable("Unhandled instruction encoding!");
-    break;
   case ARMII::Pseudo:
     emitPseudoInstruction(MI);
     break;
@@ -837,9 +836,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
   default:
     llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
   case ARM::BX_CALL:
-  case ARM::BMOVPCRX_CALL:
-  case ARM::BXr9_CALL:
-  case ARM::BMOVPCRXr9_CALL: {
+  case ARM::BMOVPCRX_CALL: {
     // First emit mov lr, pc
     unsigned Binary = 0x01a0e00f;
     Binary |= II->getPredicate(&MI) << ARMII::CondShift;
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 3e3a4134c704..fc35c7cb0200 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -16,16 +16,17 @@
 #define DEBUG_TYPE "arm-cp-islands"
 #include "ARM.h"
 #include "ARMMachineFunctionInfo.h"
-#include "ARMInstrInfo.h"
 #include "Thumb2InstrInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -51,6 +52,44 @@ static cl::opt<bool>
 AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
           cl::desc("Adjust basic block layout to better use TB[BH]"));
 
+// FIXME: This option should be removed once it has received sufficient testing.
+static cl::opt<bool>
+AlignConstantIslands("arm-align-constant-islands", cl::Hidden, cl::init(true),
+          cl::desc("Align constant islands in code"));
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits.  This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+  if (KnownBits < LogAlign)
+    return (1u << LogAlign) - (1u << KnownBits);
+  return 0;
+}
+
+/// WorstCaseAlign - Assuming only the low KnownBits bits in Offset are exact,
+/// add padding such that:
+///
+/// 1. The result is aligned to 1 << LogAlign.
+///
+/// 2. No other value of the unknown bits would require more padding.
+///
+/// This may add more padding than is required to satisfy just one of the
+/// constraints.  It is necessary to compute alignment this way to guarantee
+/// that we don't underestimate the padding before an aligned block.  If the
+/// real padding before a block is larger than we think, constant pool entries
+/// may go out of range.
+static inline unsigned WorstCaseAlign(unsigned Offset, unsigned LogAlign,
+                                      unsigned KnownBits) {
+  // Add the worst possible padding that the unknown bits could cause.
+  Offset += UnknownPadding(LogAlign, KnownBits);
+
+  // Then align the result.
+  return RoundUpToAlignment(Offset, 1u << LogAlign);
+}
+
 namespace {
   /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
   /// requires constant pool entries to be scattered among the instructions
@@ -64,16 +103,70 @@ namespace {
   ///   CPE     - A constant pool entry that has been placed somewhere, which
   ///             tracks a list of users.
   class ARMConstantIslands : public MachineFunctionPass {
-    /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
-    /// by MBB Number.  The two-byte pads required for Thumb alignment are
-    /// counted as part of the following block (i.e., the offset and size for
-    /// a padded block will both be ==2 mod 4).
-    std::vector<unsigned> BBSizes;
+    /// BasicBlockInfo - Information about the offset and size of a single
+    /// basic block.
+    struct BasicBlockInfo {
+      /// Offset - Distance from the beginning of the function to the beginning
+      /// of this basic block.
+      ///
+      /// The offset is always aligned as required by the basic block.
+      unsigned Offset;
+
+      /// Size - Size of the basic block in bytes.  If the block contains
+      /// inline assembly, this is a worst case estimate.
+      ///
+      /// The size does not include any alignment padding whether from the
+      /// beginning of the block, or from an aligned jump table at the end.
+      unsigned Size;
+
+      /// KnownBits - The number of low bits in Offset that are known to be
+      /// exact.  The remaining bits of Offset are an upper bound.
+      uint8_t KnownBits;
+
+      /// Unalign - When non-zero, the block contains instructions (inline asm)
+      /// of unknown size.  The real size may be smaller than Size bytes by a
+      /// multiple of 1 << Unalign.
+      uint8_t Unalign;
+
+      /// PostAlign - When non-zero, the block terminator contains a .align
+      /// directive, so the end of the block is aligned to 1 << PostAlign
+      /// bytes.
+      uint8_t PostAlign;
+
+      BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+        PostAlign(0) {}
+
+      /// Compute the number of known offset bits internally to this block.
+      /// This number should be used to predict worst case padding when
+      /// splitting the block.
+      unsigned internalKnownBits() const {
+        return Unalign ? Unalign : KnownBits;
+      }
+
+      /// Compute the offset immediately following this block.  If LogAlign is
+      /// specified, return the offset the successor block will get if it has
+      /// this alignment.
+      unsigned postOffset(unsigned LogAlign = 0) const {
+        unsigned PO = Offset + Size;
+        unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+        if (!LA)
+          return PO;
+        // Add alignment padding from the terminator.
+        return WorstCaseAlign(PO, LA, internalKnownBits());
+      }
+
+      /// Compute the number of known low bits of postOffset.  If this block
+      /// contains inline asm, the number of known bits drops to the
+      /// instruction alignment.  An aligned terminator may increase the number
+      /// of know bits.
+      /// If LogAlign is given, also consider the alignment of the next block.
+      unsigned postKnownBits(unsigned LogAlign = 0) const {
+        return std::max(std::max(unsigned(PostAlign), LogAlign),
+                        internalKnownBits());
+      }
+    };
 
-    /// BBOffsets - the offset of each MBB in bytes, starting from 0.
-    /// The two-byte pads required for Thumb alignment are counted as part of
-    /// the following block.
-    std::vector<unsigned> BBOffsets;
+    std::vector<BasicBlockInfo> BBInfo;
 
     /// WaterList - A sorted list of basic blocks where islands could be placed
     /// (i.e. blocks that don't fall through to the following block, due
@@ -102,14 +195,24 @@ namespace {
       MachineInstr *MI;
       MachineInstr *CPEMI;
       MachineBasicBlock *HighWaterMark;
+    private:
       unsigned MaxDisp;
+    public:
       bool NegOk;
       bool IsSoImm;
+      bool KnownAlignment;
       CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
              bool neg, bool soimm)
-        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm),
+          KnownAlignment(false) {
         HighWaterMark = CPEMI->getParent();
       }
+      /// getMaxDisp - Returns the maximum displacement supported by MI.
+      /// Correct for unknown alignment.
+      /// Conservatively subtract 2 bytes to handle weird alignment effects.
+      unsigned getMaxDisp() const {
+        return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2;
+      }
     };
 
     /// CPUsers - Keep track of all of the machine instructions that use various
@@ -162,10 +265,9 @@ namespace {
     /// the branch fix up pass.
     bool HasFarJump;
 
-    /// HasInlineAsm - True if the function contains inline assembly.
-    bool HasInlineAsm;
-
-    const ARMInstrInfo *TII;
+    MachineFunction *MF;
+    MachineConstantPool *MCP;
+    const ARMBaseInstrInfo *TII;
     const ARMSubtarget *STI;
     ARMFunctionInfo *AFI;
     bool isThumb;
@@ -182,85 +284,100 @@ namespace {
     }
 
   private:
-    void DoInitialPlacement(MachineFunction &MF,
-                            std::vector<MachineInstr*> &CPEMIs);
+    void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
     CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
-    void JumpTableFunctionScan(MachineFunction &MF);
-    void InitialFunctionScan(MachineFunction &MF,
-                             const std::vector<MachineInstr*> &CPEMIs);
-    MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
-    void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
-    void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
-    bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
-    int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
-    bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
-    void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+    unsigned getCPELogAlign(const MachineInstr *CPEMI);
+    void scanFunctionJumpTables();
+    void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
+    MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
+    void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+    void adjustBBOffsetsAfter(MachineBasicBlock *BB);
+    bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
+    int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
+    bool findAvailableWater(CPUser&U, unsigned UserOffset,
+                            water_iterator &WaterIter);
+    void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
                         MachineBasicBlock *&NewMBB);
-    bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
-    void RemoveDeadCPEMI(MachineInstr *CPEMI);
-    bool RemoveUnusedCPEntries();
-    bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
-                      MachineInstr *CPEMI, unsigned Disp, bool NegOk,
-                      bool DoDump = false);
-    bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
-                        CPUser &U);
-    bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
-                         unsigned Disp, bool NegativeOK, bool IsSoImm = false);
-    bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
-    bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
-    bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
-    bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
-    bool UndoLRSpillRestore();
-    bool OptimizeThumb2Instructions(MachineFunction &MF);
-    bool OptimizeThumb2Branches(MachineFunction &MF);
-    bool ReorderThumb2JumpTables(MachineFunction &MF);
-    bool OptimizeThumb2JumpTables(MachineFunction &MF);
-    MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+    bool handleConstantPoolUser(unsigned CPUserIndex);
+    void removeDeadCPEMI(MachineInstr *CPEMI);
+    bool removeUnusedCPEntries();
+    bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
+                          MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+                          bool DoDump = false);
+    bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
+                        CPUser &U, unsigned &Growth);
+    bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+    bool fixupImmediateBr(ImmBranch &Br);
+    bool fixupConditionalBr(ImmBranch &Br);
+    bool fixupUnconditionalBr(ImmBranch &Br);
+    bool undoLRSpillRestore();
+    bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const;
+    bool optimizeThumb2Instructions();
+    bool optimizeThumb2Branches();
+    bool reorderThumb2JumpTables();
+    bool optimizeThumb2JumpTables();
+    MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
                                                   MachineBasicBlock *JTBB);
 
-    unsigned GetOffsetOf(MachineInstr *MI) const;
+    void computeBlockSize(MachineBasicBlock *MBB);
+    unsigned getOffsetOf(MachineInstr *MI) const;
+    unsigned getUserOffset(CPUser&) const;
     void dumpBBs();
-    void verify(MachineFunction &MF);
+    void verify();
+
+    bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+                         unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+    bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
+                         const CPUser &U) {
+      return isOffsetInRange(UserOffset, TrialOffset,
+                             U.getMaxDisp(), U.NegOk, U.IsSoImm);
+    }
   };
   char ARMConstantIslands::ID = 0;
 }
 
 /// verify - check BBOffsets, BBSizes, alignment of islands
-void ARMConstantIslands::verify(MachineFunction &MF) {
-  assert(BBOffsets.size() == BBSizes.size());
-  for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
-    assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
-  if (!isThumb)
-    return;
+void ARMConstantIslands::verify() {
 #ifndef NDEBUG
-  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock *MBB = MBBI;
-    if (!MBB->empty() &&
-        MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
-      unsigned MBBId = MBB->getNumber();
-      assert(HasInlineAsm ||
-             (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
-             (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
-    }
+    unsigned Align = MBB->getAlignment();
+    unsigned MBBId = MBB->getNumber();
+    assert(BBInfo[MBBId].Offset % (1u << Align) == 0);
+    assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
   }
+  DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
   for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
     CPUser &U = CPUsers[i];
-    unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
-    unsigned CPEOffset  = GetOffsetOf(U.CPEMI);
-    unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
-      UserOffset - CPEOffset;
-    assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+    unsigned UserOffset = getUserOffset(U);
+    // Verify offset using the real max displacement without the safety
+    // adjustment.
+    if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
+                         /* DoDump = */ true)) {
+      DEBUG(dbgs() << "OK\n");
+      continue;
+    }
+    DEBUG(dbgs() << "Out of range.\n");
+    dumpBBs();
+    DEBUG(MF->dump());
+    llvm_unreachable("Constant pool entry out of range!");
   }
 #endif
 }
 
 /// print block size and offset information - debugging
 void ARMConstantIslands::dumpBBs() {
-  for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
-    DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
-                 << " size " << BBSizes[J] << "\n");
-  }
+  DEBUG({
+    for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
+      const BasicBlockInfo &BBI = BBInfo[J];
+      dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
+             << " kb=" << unsigned(BBI.KnownBits)
+             << " ua=" << unsigned(BBI.Unalign)
+             << " pa=" << unsigned(BBI.PostAlign)
+             << format(" size=%#x\n", BBInfo[J].Size);
+    }
+  });
 }
 
 /// createARMConstantIslandPass - returns an instance of the constpool
@@ -269,34 +386,41 @@ FunctionPass *llvm::createARMConstantIslandPass() {
   return new ARMConstantIslands();
 }
 
-bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
-  MachineConstantPool &MCP = *MF.getConstantPool();
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  MCP = mf.getConstantPool();
 
-  TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
-  AFI = MF.getInfo<ARMFunctionInfo>();
-  STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+  DEBUG(dbgs() << "***** ARMConstantIslands: "
+               << MCP->getConstants().size() << " CP entries, aligned to "
+               << MCP->getConstantPoolAlignment() << " bytes *****\n");
+
+  TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo();
+  AFI = MF->getInfo<ARMFunctionInfo>();
+  STI = &MF->getTarget().getSubtarget<ARMSubtarget>();
 
   isThumb = AFI->isThumbFunction();
   isThumb1 = AFI->isThumb1OnlyFunction();
   isThumb2 = AFI->isThumb2Function();
 
   HasFarJump = false;
-  HasInlineAsm = false;
+
+  // This pass invalidates liveness information when it splits basic blocks.
+  MF->getRegInfo().invalidateLiveness();
 
   // Renumber all of the machine basic blocks in the function, guaranteeing that
   // the numbers agree with the position of the block in the function.
-  MF.RenumberBlocks();
+  MF->RenumberBlocks();
 
   // Try to reorder and otherwise adjust the block layout to make good use
   // of the TB[BH] instructions.
   bool MadeChange = false;
   if (isThumb2 && AdjustJumpTableBlocks) {
-    JumpTableFunctionScan(MF);
-    MadeChange |= ReorderThumb2JumpTables(MF);
+    scanFunctionJumpTables();
+    MadeChange |= reorderThumb2JumpTables();
     // Data is out of date, so clear it. It'll be re-computed later.
     T2JumpTables.clear();
     // Blocks may have shifted around. Keep the numbering up to date.
-    MF.RenumberBlocks();
+    MF->RenumberBlocks();
   }
 
   // Thumb1 functions containing constant pools get 4-byte alignment.
@@ -304,16 +428,13 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
 
   // ARM and Thumb2 functions need to be 4-byte aligned.
   if (!isThumb1)
-    MF.EnsureAlignment(2);  // 2 = log2(4)
+    MF->EnsureAlignment(2);  // 2 = log2(4)
 
   // Perform the initial placement of the constant pool entries.  To start with,
   // we put them all at the end of the function.
   std::vector<MachineInstr*> CPEMIs;
-  if (!MCP.isEmpty()) {
-    DoInitialPlacement(MF, CPEMIs);
-    if (isThumb1)
-      MF.EnsureAlignment(2);  // 2 = log2(4)
-  }
+  if (!MCP->isEmpty())
+    doInitialPlacement(CPEMIs);
 
   /// The next UID to take is the first unused one.
   AFI->initPICLabelUId(CPEMIs.size());
@@ -321,34 +442,36 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
   // Do the initial scan of the function, building up information about the
   // sizes of each block, the location of all the water, and finding all of the
   // constant pool users.
-  InitialFunctionScan(MF, CPEMIs);
+  initializeFunctionInfo(CPEMIs);
   CPEMIs.clear();
   DEBUG(dumpBBs());
 
 
   /// Remove dead constant pool entries.
-  MadeChange |= RemoveUnusedCPEntries();
+  MadeChange |= removeUnusedCPEntries();
 
   // Iteratively place constant pool entries and fix up branches until there
   // is no change.
   unsigned NoCPIters = 0, NoBRIters = 0;
   while (true) {
+    DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
     bool CPChange = false;
     for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
-      CPChange |= HandleConstantPoolUser(MF, i);
+      CPChange |= handleConstantPoolUser(i);
     if (CPChange && ++NoCPIters > 30)
-      llvm_unreachable("Constant Island pass failed to converge!");
+      report_fatal_error("Constant Island pass failed to converge!");
     DEBUG(dumpBBs());
 
     // Clear NewWaterList now.  If we split a block for branches, it should
     // appear as "new water" for the next iteration of constant pool placement.
     NewWaterList.clear();
 
+    DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
     bool BRChange = false;
     for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
-      BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+      BRChange |= fixupImmediateBr(ImmBranches[i]);
     if (BRChange && ++NoBRIters > 30)
-      llvm_unreachable("Branch Fix Up pass failed to converge!");
+      report_fatal_error("Branch Fix Up pass failed to converge!");
     DEBUG(dumpBBs());
 
     if (!CPChange && !BRChange)
@@ -358,15 +481,15 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
 
   // Shrink 32-bit Thumb2 branch, load, and store instructions.
   if (isThumb2 && !STI->prefers32BitThumb())
-    MadeChange |= OptimizeThumb2Instructions(MF);
+    MadeChange |= optimizeThumb2Instructions();
 
   // After a while, this might be made debug-only, but it is not expensive.
-  verify(MF);
+  verify();
 
   // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
   // undo the spill / restore of LR if possible.
   if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
-    MadeChange |= UndoLRSpillRestore();
+    MadeChange |= undoLRSpillRestore();
 
   // Save the mapping between original and cloned constpool entries.
   for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
@@ -376,10 +499,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
-  DEBUG(errs() << '\n'; dumpBBs());
+  DEBUG(dbgs() << '\n'; dumpBBs());
 
-  BBSizes.clear();
-  BBOffsets.clear();
+  BBInfo.clear();
   WaterList.clear();
   CPUsers.clear();
   CPEntries.clear();
@@ -390,39 +512,68 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
   return MadeChange;
 }
 
-/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// doInitialPlacement - Perform the initial placement of the constant pool
 /// entries.  To start with, we put them all at the end of the function.
-void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
-                                        std::vector<MachineInstr*> &CPEMIs) {
+void
+ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
   // Create the basic block to hold the CPE's.
-  MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
-  MF.push_back(BB);
+  MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
+  MF->push_back(BB);
+
+  // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
+  unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
+
+  // Mark the basic block as required by the const-pool.
+  // If AlignConstantIslands isn't set, use 4-byte alignment for everything.
+  BB->setAlignment(AlignConstantIslands ? MaxAlign : 2);
+
+  // The function needs to be as aligned as the basic blocks. The linker may
+  // move functions around based on their alignment.
+  MF->EnsureAlignment(BB->getAlignment());
+
+  // Order the entries in BB by descending alignment.  That ensures correct
+  // alignment of all entries as long as BB is sufficiently aligned.  Keep
+  // track of the insertion point for each alignment.  We are going to bucket
+  // sort the entries as they are created.
+  SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
 
   // Add all of the constants from the constant pool to the end block, use an
   // identity mapping of CPI's to CPE's.
-  const std::vector<MachineConstantPoolEntry> &CPs =
-    MF.getConstantPool()->getConstants();
+  const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
 
-  const TargetData &TD = *MF.getTarget().getTargetData();
+  const TargetData &TD = *MF->getTarget().getTargetData();
   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
     unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
-    // Verify that all constant pool entries are a multiple of 4 bytes.  If not,
-    // we would have to pad them out or something so that instructions stay
-    // aligned.
-    assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+    assert(Size >= 4 && "Too small constant pool entry");
+    unsigned Align = CPs[i].getAlignment();
+    assert(isPowerOf2_32(Align) && "Invalid alignment");
+    // Verify that all constant pool entries are a multiple of their alignment.
+    // If not, we would have to pad them out so that instructions stay aligned.
+    assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
+
+    // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
+    unsigned LogAlign = Log2_32(Align);
+    MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
     MachineInstr *CPEMI =
-      BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+      BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
         .addImm(i).addConstantPoolIndex(i).addImm(Size);
     CPEMIs.push_back(CPEMI);
 
+    // Ensure that future entries with higher alignment get inserted before
+    // CPEMI. This is bucket sort with iterators.
+    for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
+      if (InsPoint[a] == InsAt)
+        InsPoint[a] = CPEMI;
+
     // Add a new CPEntry, but no corresponding CPUser yet.
     std::vector<CPEntry> CPEs;
     CPEs.push_back(CPEntry(CPEMI, i));
     CPEntries.push_back(CPEs);
     ++NumCPEs;
-    DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
-                 << "\n");
+    DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
+                 << Size << ", align = " << Align <<'\n');
   }
+  DEBUG(BB->dump());
 }
 
 /// BBHasFallthrough - Return true if the specified basic block can fallthrough
@@ -458,41 +609,61 @@ ARMConstantIslands::CPEntry
   return NULL;
 }
 
-/// JumpTableFunctionScan - Do a scan of the function, building up
+/// getCPELogAlign - Returns the required alignment of the constant pool entry
+/// represented by CPEMI.  Alignment is measured in log2(bytes) units.
+unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
+  assert(CPEMI && CPEMI->getOpcode() == ARM::CONSTPOOL_ENTRY);
+
+  // Everything is 4-byte aligned unless AlignConstantIslands is set.
+  if (!AlignConstantIslands)
+    return 2;
+
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
+  unsigned Align = MCP->getConstants()[CPI].getAlignment();
+  assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
+  return Log2_32(Align);
+}
+
+/// scanFunctionJumpTables - Do a scan of the function, building up
 /// information about the sizes of each block and the locations of all
 /// the jump tables.
-void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
-  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+void ARMConstantIslands::scanFunctionJumpTables() {
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock &MBB = *MBBI;
 
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E; ++I)
-      if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+      if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
         T2JumpTables.push_back(I);
   }
 }
 
-/// InitialFunctionScan - Do the initial scan of the function, building up
+/// initializeFunctionInfo - Do the initial scan of the function, building up
 /// information about the sizes of each block, the location of all the water,
 /// and finding all of the constant pool users.
-void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
-                                 const std::vector<MachineInstr*> &CPEMIs) {
-  // First thing, see if the function has any inline assembly in it. If so,
-  // we have to be conservative about alignment assumptions, as we don't
-  // know for sure the size of any instructions in the inline assembly.
-  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
-       MBBI != E; ++MBBI) {
-    MachineBasicBlock &MBB = *MBBI;
-    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-         I != E; ++I)
-      if (I->getOpcode() == ARM::INLINEASM)
-        HasInlineAsm = true;
-  }
+void ARMConstantIslands::
+initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
+  BBInfo.clear();
+  BBInfo.resize(MF->getNumBlockIDs());
+
+  // First thing, compute the size of all basic blocks, and see if the function
+  // has any inline assembly in it. If so, we have to be conservative about
+  // alignment assumptions, as we don't know for sure the size of any
+  // instructions in the inline assembly.
+  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
+    computeBlockSize(I);
+
+  // The known bits of the entry block offset are determined by the function
+  // alignment.
+  BBInfo.front().KnownBits = MF->getAlignment();
+
+  // Compute block offsets and known bits.
+  adjustBBOffsetsAfter(MF->begin());
 
   // Now go back through the instructions and build up our data structures.
-  unsigned Offset = 0;
-  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
        MBBI != E; ++MBBI) {
     MachineBasicBlock &MBB = *MBBI;
 
@@ -501,16 +672,13 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
     if (!BBHasFallthrough(&MBB))
       WaterList.push_back(&MBB);
 
-    unsigned MBBSize = 0;
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E; ++I) {
       if (I->isDebugValue())
         continue;
-      // Add instruction size to MBBSize.
-      MBBSize += TII->GetInstSizeInBytes(I);
 
       int Opc = I->getOpcode();
-      if (I->getDesc().isBranch()) {
+      if (I->isBranch()) {
         bool isCond = false;
         unsigned Bits = 0;
         unsigned Scale = 1;
@@ -518,18 +686,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
         switch (Opc) {
         default:
           continue;  // Ignore other JT branches
-        case ARM::tBR_JTr:
-          // A Thumb1 table jump may involve padding; for the offsets to
-          // be right, functions containing these must be 4-byte aligned.
-          // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
-          // table entries. So this code checks whether offset of tBR_JTr + 2
-          // is aligned.  That is held in Offset+MBBSize, which already has
-          // 2 added in for the size of the mov pc instruction.
-          MF.EnsureAlignment(2U);
-          if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
-            // FIXME: Add a pseudo ALIGN instruction instead.
-            MBBSize += 2;           // padding
-          continue;   // Does not get an entry in ImmBranches
         case ARM::t2BR_JT:
           T2JumpTables.push_back(I);
           continue;   // Does not get an entry in ImmBranches
@@ -589,7 +745,6 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
           switch (Opc) {
           default:
             llvm_unreachable("Unknown addressing mode for CP reference!");
-            break;
 
           // Taking the address of a CP entry.
           case ARM::LEApcrel:
@@ -647,45 +802,53 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
           break;
         }
     }
+  }
+}
+
+/// computeBlockSize - Compute the size and some alignment information for MBB.
+/// This function updates BBInfo directly.
+void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
+  BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
+  BBI.Size = 0;
+  BBI.Unalign = 0;
+  BBI.PostAlign = 0;
+
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+       ++I) {
+    BBI.Size += TII->GetInstSizeInBytes(I);
+    // For inline asm, GetInstSizeInBytes returns a conservative estimate.
+    // The actual size may be smaller, but still a multiple of the instr size.
+    if (I->isInlineAsm())
+      BBI.Unalign = isThumb ? 1 : 2;
+    // Also consider instructions that may be shrunk later.
+    else if (isThumb && mayOptimizeThumb2Instruction(I))
+      BBI.Unalign = 1;
+  }
 
-    // In thumb mode, if this block is a constpool island, we may need padding
-    // so it's aligned on 4 byte boundary.
-    if (isThumb &&
-        !MBB.empty() &&
-        MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
-        ((Offset%4) != 0 || HasInlineAsm))
-      MBBSize += 2;
-
-    BBSizes.push_back(MBBSize);
-    BBOffsets.push_back(Offset);
-    Offset += MBBSize;
+  // tBR_JTr contains a .align 2 directive.
+  if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+    BBI.PostAlign = 2;
+    MBB->getParent()->EnsureAlignment(2);
   }
 }
 
-/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// getOffsetOf - Return the current offset of the specified machine instruction
 /// from the start of the function.  This offset changes as stuff is moved
 /// around inside the function.
-unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
   MachineBasicBlock *MBB = MI->getParent();
 
   // The offset is composed of two things: the sum of the sizes of all MBB's
   // before this instruction's block, and the offset from the start of the block
   // it is in.
-  unsigned Offset = BBOffsets[MBB->getNumber()];
-
-  // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
-  // alignment padding, and compensate if so.
-  if (isThumb &&
-      MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
-      (Offset%4 != 0 || HasInlineAsm))
-    Offset += 2;
+  unsigned Offset = BBInfo[MBB->getNumber()].Offset;
 
   // Sum instructions before MI in MBB.
-  for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+  for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
     assert(I != MBB->end() && "Didn't find MI in its own basic block?");
-    if (&*I == MI) return Offset;
     Offset += TII->GetInstSizeInBytes(I);
   }
+  return Offset;
 }
 
 /// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
@@ -695,19 +858,16 @@ static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
   return LHS->getNumber() < RHS->getNumber();
 }
 
-/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// updateForInsertedWaterBlock - When a block is newly inserted into the
 /// machine function, it upsets all of the block numbers.  Renumber the blocks
 /// and update the arrays that parallel this numbering.
-void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
   // Renumber the MBB's to keep them consecutive.
   NewBB->getParent()->RenumberBlocks(NewBB);
 
-  // Insert a size into BBSizes to align it properly with the (newly
+  // Insert an entry into BBInfo to align it properly with the (newly
   // renumbered) block numbers.
-  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
-  // Likewise for BBOffsets.
-  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
 
   // Next, update WaterList.  Specifically, we need to add NewMBB as having
   // available water after it.
@@ -721,15 +881,14 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
 /// Split the basic block containing MI into two blocks, which are joined by
 /// an unconditional branch.  Update data structures and renumber blocks to
 /// account for this change and returns the newly created block.
-MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
   MachineBasicBlock *OrigBB = MI->getParent();
-  MachineFunction &MF = *OrigBB->getParent();
 
   // Create a new MBB for the code after the OrigBB.
   MachineBasicBlock *NewBB =
-    MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+    MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
   MachineFunction::iterator MBBI = OrigBB; ++MBBI;
-  MF.insert(MBBI, NewBB);
+  MF->insert(MBBI, NewBB);
 
   // Splice the instructions starting with MI over to NewBB.
   NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
@@ -747,31 +906,19 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
   ++NumSplit;
 
   // Update the CFG.  All succs of OrigBB are now succs of NewBB.
-  while (!OrigBB->succ_empty()) {
-    MachineBasicBlock *Succ = *OrigBB->succ_begin();
-    OrigBB->removeSuccessor(Succ);
-    NewBB->addSuccessor(Succ);
-
-    // This pass should be run after register allocation, so there should be no
-    // PHI nodes to update.
-    assert((Succ->empty() || !Succ->begin()->isPHI())
-           && "PHI nodes should be eliminated by now!");
-  }
+  NewBB->transferSuccessors(OrigBB);
 
   // OrigBB branches to NewBB.
   OrigBB->addSuccessor(NewBB);
 
   // Update internal data structures to account for the newly inserted MBB.
-  // This is almost the same as UpdateForInsertedWaterBlock, except that
+  // This is almost the same as updateForInsertedWaterBlock, except that
   // the Water goes after OrigBB, not NewBB.
-  MF.RenumberBlocks(NewBB);
+  MF->RenumberBlocks(NewBB);
 
-  // Insert a size into BBSizes to align it properly with the (newly
+  // Insert an entry into BBInfo to align it properly with the (newly
   // renumbered) block numbers.
-  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
-
-  // Likewise for BBOffsets.
-  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+  BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
 
   // Next, update WaterList.  Specifically, we need to add OrigMBB as having
   // available water after it (but not if it's already there, which happens
@@ -787,86 +934,56 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
     WaterList.insert(IP, OrigBB);
   NewWaterList.insert(OrigBB);
 
-  unsigned OrigBBI = OrigBB->getNumber();
-  unsigned NewBBI = NewBB->getNumber();
-
-  int delta = isThumb1 ? 2 : 4;
-
   // Figure out how large the OrigBB is.  As the first half of the original
   // block, it cannot contain a tablejump.  The size includes
   // the new jump we added.  (It should be possible to do this without
   // recounting everything, but it's very confusing, and this is rarely
   // executed.)
-  unsigned OrigBBSize = 0;
-  for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
-       I != E; ++I)
-    OrigBBSize += TII->GetInstSizeInBytes(I);
-  BBSizes[OrigBBI] = OrigBBSize;
-
-  // ...and adjust BBOffsets for NewBB accordingly.
-  BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+  computeBlockSize(OrigBB);
 
   // Figure out how large the NewMBB is.  As the second half of the original
   // block, it may contain a tablejump.
-  unsigned NewBBSize = 0;
-  for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
-       I != E; ++I)
-    NewBBSize += TII->GetInstSizeInBytes(I);
-  // Set the size of NewBB in BBSizes.  It does not include any padding now.
-  BBSizes[NewBBI] = NewBBSize;
-
-  MachineInstr* ThumbJTMI = prior(NewBB->end());
-  if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
-    // We've added another 2-byte instruction before this tablejump, which
-    // means we will always need padding if we didn't before, and vice versa.
-
-    // The original offset of the jump instruction was:
-    unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
-    if (OrigOffset%4 == 0) {
-      // We had padding before and now we don't.  No net change in code size.
-      delta = 0;
-    } else {
-      // We didn't have padding before and now we do.
-      BBSizes[NewBBI] += 2;
-      delta = 4;
-    }
-  }
+  computeBlockSize(NewBB);
 
   // All BBOffsets following these blocks must be modified.
-  if (delta)
-    AdjustBBOffsetsAfter(NewBB, delta);
+  adjustBBOffsetsAfter(OrigBB);
 
   return NewBB;
 }
 
-/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// getUserOffset - Compute the offset of U.MI as seen by the hardware
+/// displacement computation.  Update U.KnownAlignment to match its current
+/// basic block location.
+unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
+  unsigned UserOffset = getOffsetOf(U.MI);
+  const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
+  unsigned KnownBits = BBI.internalKnownBits();
+
+  // The value read from PC is offset from the actual instruction address.
+  UserOffset += (isThumb ? 4 : 8);
+
+  // Because of inline assembly, we may not know the alignment (mod 4) of U.MI.
+  // Make sure U.getMaxDisp() returns a constrained range.
+  U.KnownAlignment = (KnownBits >= 2);
+
+  // On Thumb, offsets==2 mod 4 are rounded down by the hardware for
+  // purposes of the displacement computation; compensate for that here.
+  // For unknown alignments, getMaxDisp() constrains the range instead.
+  if (isThumb && U.KnownAlignment)
+    UserOffset &= ~3u;
+
+  return UserOffset;
+}
+
+/// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
 /// reference) is within MaxDisp of TrialOffset (a proposed location of a
 /// constant pool entry).
-bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+/// UserOffset is computed by getUserOffset above to include PC adjustments. If
+/// the mod 4 alignment of UserOffset is not known, the uncertainty must be
+/// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that.
+bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
                                          unsigned TrialOffset, unsigned MaxDisp,
                                          bool NegativeOK, bool IsSoImm) {
-  // On Thumb offsets==2 mod 4 are rounded down by the hardware for
-  // purposes of the displacement computation; compensate for that here.
-  // Effectively, the valid range of displacements is 2 bytes smaller for such
-  // references.
-  unsigned TotalAdj = 0;
-  if (isThumb && UserOffset%4 !=0) {
-    UserOffset -= 2;
-    TotalAdj = 2;
-  }
-  // CPEs will be rounded up to a multiple of 4.
-  if (isThumb && TrialOffset%4 != 0) {
-    TrialOffset += 2;
-    TotalAdj += 2;
-  }
-
-  // In Thumb2 mode, later branch adjustments can shift instructions up and
-  // cause alignment change. In the worst case scenario this can cause the
-  // user's effective address to be subtracted by 2 and the CPE's address to
-  // be plus 2.
-  if (isThumb2 && TotalAdj != 4)
-    MaxDisp -= (4 - TotalAdj);
-
   if (UserOffset <= TrialOffset) {
     // User before the Trial.
     if (TrialOffset - UserOffset <= MaxDisp)
@@ -880,40 +997,71 @@ bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
   return false;
 }
 
-/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// isWaterInRange - Returns true if a CPE placed after the specified
 /// Water (a basic block) will be in range for the specific MI.
-
-bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
-                                        MachineBasicBlock* Water, CPUser &U) {
-  unsigned MaxDisp = U.MaxDisp;
-  unsigned CPEOffset = BBOffsets[Water->getNumber()] +
-                       BBSizes[Water->getNumber()];
-
-  // If the CPE is to be inserted before the instruction, that will raise
-  // the offset of the instruction.
-  if (CPEOffset < UserOffset)
-    UserOffset += U.CPEMI->getOperand(2).getImm();
-
-  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+///
+/// Compute how much the function will grow by inserting a CPE after Water.
+bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
+                                        MachineBasicBlock* Water, CPUser &U,
+                                        unsigned &Growth) {
+  unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
+  unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
+  unsigned NextBlockOffset, NextBlockAlignment;
+  MachineFunction::const_iterator NextBlock = Water;
+  if (++NextBlock == MF->end()) {
+    NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
+    NextBlockAlignment = 0;
+  } else {
+    NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
+    NextBlockAlignment = NextBlock->getAlignment();
+  }
+  unsigned Size = U.CPEMI->getOperand(2).getImm();
+  unsigned CPEEnd = CPEOffset + Size;
+
+  // The CPE may be able to hide in the alignment padding before the next
+  // block. It may also cause more padding to be required if it is more aligned
+  // that the next block.
+  if (CPEEnd > NextBlockOffset) {
+    Growth = CPEEnd - NextBlockOffset;
+    // Compute the padding that would go at the end of the CPE to align the next
+    // block.
+    Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
+
+    // If the CPE is to be inserted before the instruction, that will raise
+    // the offset of the instruction. Also account for unknown alignment padding
+    // in blocks between CPE and the user.
+    if (CPEOffset < UserOffset)
+      UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
+  } else
+    // CPE fits in existing padding.
+    Growth = 0;
+
+  return isOffsetInRange(UserOffset, CPEOffset, U);
 }
 
-/// CPEIsInRange - Returns true if the distance between specific MI and
+/// isCPEntryInRange - Returns true if the distance between specific MI and
 /// specific ConstPool entry instruction can fit in MI's displacement field.
-bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
                                       MachineInstr *CPEMI, unsigned MaxDisp,
                                       bool NegOk, bool DoDump) {
-  unsigned CPEOffset  = GetOffsetOf(CPEMI);
-  assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+  unsigned CPEOffset  = getOffsetOf(CPEMI);
+  assert(CPEOffset % 4 == 0 && "Misaligned CPE");
 
   if (DoDump) {
-    DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
-                 << " max delta=" << MaxDisp
-                 << " insn address=" << UserOffset
-                 << " CPE address=" << CPEOffset
-                 << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+    DEBUG({
+      unsigned Block = MI->getParent()->getNumber();
+      const BasicBlockInfo &BBI = BBInfo[Block];
+      dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+             << " max delta=" << MaxDisp
+             << format(" insn address=%#x", UserOffset)
+             << " in BB#" << Block << ": "
+             << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
+             << format("CPE address=%#x offset=%+d: ", CPEOffset,
+                       int(CPEOffset-UserOffset));
+    });
   }
 
-  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+  return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
 }
 
 #ifndef NDEBUG
@@ -933,69 +1081,40 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
 }
 #endif // NDEBUG
 
-void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
-                                              int delta) {
-  MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
-  for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
-      i < e; ++i) {
-    BBOffsets[i] += delta;
-    // If some existing blocks have padding, adjust the padding as needed, a
-    // bit tricky.  delta can be negative so don't use % on that.
-    if (!isThumb)
-      continue;
-    MachineBasicBlock *MBB = MBBI;
-    if (!MBB->empty() && !HasInlineAsm) {
-      // Constant pool entries require padding.
-      if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
-        unsigned OldOffset = BBOffsets[i] - delta;
-        if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
-          // add new padding
-          BBSizes[i] += 2;
-          delta += 2;
-        } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
-          // remove existing padding
-          BBSizes[i] -= 2;
-          delta -= 2;
-        }
-      }
-      // Thumb1 jump tables require padding.  They should be at the end;
-      // following unconditional branches are removed by AnalyzeBranch.
-      // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
-      // table entries. So this code checks whether offset of tBR_JTr
-      // is aligned; if it is, the offset of the jump table following the
-      // instruction will not be aligned, and we need padding.
-      MachineInstr *ThumbJTMI = prior(MBB->end());
-      if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
-        unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
-        unsigned OldMIOffset = NewMIOffset - delta;
-        if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
-          // remove existing padding
-          BBSizes[i] -= 2;
-          delta -= 2;
-        } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
-          // add new padding
-          BBSizes[i] += 2;
-          delta += 2;
-        }
-      }
-      if (delta==0)
-        return;
-    }
-    MBBI = llvm::next(MBBI);
+void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
+  unsigned BBNum = BB->getNumber();
+  for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
+    // Get the offset and known bits at the end of the layout predecessor.
+    // Include the alignment of the current block.
+    unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
+    unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
+    unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
+
+    // This is where block i begins.  Stop if the offset is already correct,
+    // and we have updated 2 blocks.  This is the maximum number of blocks
+    // changed before calling this function.
+    if (i > BBNum + 2 &&
+        BBInfo[i].Offset == Offset &&
+        BBInfo[i].KnownBits == KnownBits)
+      break;
+
+    BBInfo[i].Offset = Offset;
+    BBInfo[i].KnownBits = KnownBits;
   }
 }
 
-/// DecrementOldEntry - find the constant pool entry with index CPI
+/// decrementCPEReferenceCount - find the constant pool entry with index CPI
 /// and instruction CPEMI, and decrement its refcount.  If the refcount
 /// becomes 0 remove the entry and instruction.  Returns true if we removed
 /// the entry, false if we didn't.
 
-bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
+                                                    MachineInstr *CPEMI) {
   // Find the old entry. Eliminate it if it is no longer used.
   CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
   assert(CPE && "Unexpected!");
   if (--CPE->RefCount == 0) {
-    RemoveDeadCPEMI(CPEMI);
+    removeDeadCPEMI(CPEMI);
     CPE->CPEMI = NULL;
     --NumCPEs;
     return true;
@@ -1009,14 +1128,15 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
 /// 0 = no existing entry found
 /// 1 = entry found, and there were no code insertions or deletions
 /// 2 = entry found, and there were code insertions or deletions
-int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
 {
   MachineInstr *UserMI = U.MI;
   MachineInstr *CPEMI  = U.CPEMI;
 
   // Check to see if the CPE is already in-range.
-  if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
-    DEBUG(errs() << "In range\n");
+  if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
+                       true)) {
+    DEBUG(dbgs() << "In range\n");
     return 1;
   }
 
@@ -1030,8 +1150,9 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
     // Removing CPEs can leave empty entries, skip
     if (CPEs[i].CPEMI == NULL)
       continue;
-    if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
-      DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+    if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
+                     U.NegOk)) {
+      DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
                    << CPEs[i].CPI << "\n");
       // Point the CPUser node to the replacement
       U.CPEMI = CPEs[i].CPEMI;
@@ -1045,7 +1166,7 @@ int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
       CPEs[i].RefCount++;
       // ...and the original.  If we didn't remove the old entry, none of the
       // addresses changed, so we don't need another pass.
-      return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+      return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
     }
   }
   return 0;
@@ -1066,7 +1187,7 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
   return ((1<<23)-1)*4;
 }
 
-/// LookForWater - Look for an existing entry in the WaterList in which
+/// findAvailableWater - Look for an existing entry in the WaterList in which
 /// we can place the CPE referenced from U so it's within range of U's MI.
 /// Returns true if found, false if not.  If it returns true, WaterIter
 /// is set to the WaterList entry.  For Thumb, prefer water that will not
@@ -1074,15 +1195,14 @@ static inline unsigned getUnconditionalBrDisp(int Opc) {
 /// terminates, the CPE location for a particular CPUser is only allowed to
 /// move to a lower address, so search backward from the end of the list and
 /// prefer the first water that is in range.
-bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
                                       water_iterator &WaterIter) {
   if (WaterList.empty())
     return false;
 
-  bool FoundWaterThatWouldPad = false;
-  water_iterator IPThatWouldPad;
-  for (water_iterator IP = prior(WaterList.end()),
-         B = WaterList.begin();; --IP) {
+  unsigned BestGrowth = ~0u;
+  for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();;
+       --IP) {
     MachineBasicBlock* WaterBB = *IP;
     // Check if water is in range and is either at a lower address than the
     // current "high water mark" or a new water block that was created since
@@ -1092,166 +1212,186 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
     // should be relatively uncommon and when it does happen, we want to be
     // sure to take advantage of it for all the CPEs near that block, so that
     // we don't insert more branches than necessary.
-    if (WaterIsInRange(UserOffset, WaterBB, U) &&
+    unsigned Growth;
+    if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
         (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
-         NewWaterList.count(WaterBB))) {
-      unsigned WBBId = WaterBB->getNumber();
-      if (isThumb &&
-          (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
-        // This is valid Water, but would introduce padding.  Remember
-        // it in case we don't find any Water that doesn't do this.
-        if (!FoundWaterThatWouldPad) {
-          FoundWaterThatWouldPad = true;
-          IPThatWouldPad = IP;
-        }
-      } else {
-        WaterIter = IP;
+         NewWaterList.count(WaterBB)) && Growth < BestGrowth) {
+      // This is the least amount of required padding seen so far.
+      BestGrowth = Growth;
+      WaterIter = IP;
+      DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
+                   << " Growth=" << Growth << '\n');
+
+      // Keep looking unless it is perfect.
+      if (BestGrowth == 0)
         return true;
-      }
     }
     if (IP == B)
       break;
   }
-  if (FoundWaterThatWouldPad) {
-    WaterIter = IPThatWouldPad;
-    return true;
-  }
-  return false;
+  return BestGrowth != ~0u;
 }
 
-/// CreateNewWater - No existing WaterList entry will work for
+/// createNewWater - No existing WaterList entry will work for
 /// CPUsers[CPUserIndex], so create a place to put the CPE.  The end of the
 /// block is used if in range, and the conditional branch munged so control
 /// flow is correct.  Otherwise the block is split to create a hole with an
 /// unconditional branch around it.  In either case NewMBB is set to a
 /// block following which the new island can be inserted (the WaterList
 /// is not adjusted).
-void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
                                         unsigned UserOffset,
                                         MachineBasicBlock *&NewMBB) {
   CPUser &U = CPUsers[CPUserIndex];
   MachineInstr *UserMI = U.MI;
   MachineInstr *CPEMI  = U.CPEMI;
+  unsigned CPELogAlign = getCPELogAlign(CPEMI);
   MachineBasicBlock *UserMBB = UserMI->getParent();
-  unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
-                               BBSizes[UserMBB->getNumber()];
-  assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+  const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
 
   // If the block does not end in an unconditional branch already, and if the
   // end of the block is within range, make new water there.  (The addition
   // below is for the unconditional branch we will be adding: 4 bytes on ARM +
-  // Thumb2, 2 on Thumb1.  Possible Thumb1 alignment padding is allowed for
-  // inside OffsetIsInRange.
-  if (BBHasFallthrough(UserMBB) &&
-      OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
-                      U.MaxDisp, U.NegOk, U.IsSoImm)) {
-    DEBUG(errs() << "Split at end of block\n");
-    if (&UserMBB->back() == UserMI)
-      assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
-    NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
-    // Add an unconditional branch from UserMBB to fallthrough block.
-    // Record it for branch lengthening; this new branch will not get out of
-    // range, but if the preceding conditional branch is out of range, the
-    // targets will be exchanged, and the altered branch may be out of
-    // range, so the machinery has to know about it.
-    int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
-    if (!isThumb)
-      BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
-    else
-      BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
-              .addImm(ARMCC::AL).addReg(0);
-    unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
-    ImmBranches.push_back(ImmBranch(&UserMBB->back(),
-                          MaxDisp, false, UncondBr));
-    int delta = isThumb1 ? 2 : 4;
-    BBSizes[UserMBB->getNumber()] += delta;
-    AdjustBBOffsetsAfter(UserMBB, delta);
-  } else {
-    // What a big block.  Find a place within the block to split it.
-    // This is a little tricky on Thumb1 since instructions are 2 bytes
-    // and constant pool entries are 4 bytes: if instruction I references
-    // island CPE, and instruction I+1 references CPE', it will
-    // not work well to put CPE as far forward as possible, since then
-    // CPE' cannot immediately follow it (that location is 2 bytes
-    // farther away from I+1 than CPE was from I) and we'd need to create
-    // a new island.  So, we make a first guess, then walk through the
-    // instructions between the one currently being looked at and the
-    // possible insertion point, and make sure any other instructions
-    // that reference CPEs will be able to use the same island area;
-    // if not, we back up the insertion point.
-
-    // The 4 in the following is for the unconditional branch we'll be
-    // inserting (allows for long branch on Thumb1).  Alignment of the
-    // island is handled inside OffsetIsInRange.
-    unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
-    // This could point off the end of the block if we've already got
-    // constant pool entries following this block; only the last one is
-    // in the water list.  Back past any possible branches (allow for a
-    // conditional and a maximally long unconditional).
-    if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
-      BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
-                              (isThumb1 ? 6 : 8);
-    unsigned EndInsertOffset = BaseInsertOffset +
-           CPEMI->getOperand(2).getImm();
-    MachineBasicBlock::iterator MI = UserMI;
-    ++MI;
-    unsigned CPUIndex = CPUserIndex+1;
-    unsigned NumCPUsers = CPUsers.size();
-    MachineInstr *LastIT = 0;
-    for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
-         Offset < BaseInsertOffset;
-         Offset += TII->GetInstSizeInBytes(MI),
-           MI = llvm::next(MI)) {
-      if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
-        CPUser &U = CPUsers[CPUIndex];
-        if (!OffsetIsInRange(Offset, EndInsertOffset,
-                             U.MaxDisp, U.NegOk, U.IsSoImm)) {
-          BaseInsertOffset -= (isThumb1 ? 2 : 4);
-          EndInsertOffset  -= (isThumb1 ? 2 : 4);
-        }
-        // This is overly conservative, as we don't account for CPEMIs
-        // being reused within the block, but it doesn't matter much.
-        EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
-        CPUIndex++;
-      }
+  // Thumb2, 2 on Thumb1.
+  if (BBHasFallthrough(UserMBB)) {
+    // Size of branch to insert.
+    unsigned Delta = isThumb1 ? 2 : 4;
+    // End of UserBlock after adding a branch.
+    unsigned UserBlockEnd = UserBBI.postOffset() + Delta;
+    // Compute the offset where the CPE will begin.
+    unsigned CPEOffset = WorstCaseAlign(UserBlockEnd, CPELogAlign,
+                                        UserBBI.postKnownBits());
+
+    if (isOffsetInRange(UserOffset, CPEOffset, U)) {
+      DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
+            << format(", expected CPE offset %#x\n", CPEOffset));
+      NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+      // Add an unconditional branch from UserMBB to fallthrough block.  Record
+      // it for branch lengthening; this new branch will not get out of range,
+      // but if the preceding conditional branch is out of range, the targets
+      // will be exchanged, and the altered branch may be out of range, so the
+      // machinery has to know about it.
+      int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+      if (!isThumb)
+        BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+      else
+        BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
+          .addImm(ARMCC::AL).addReg(0);
+      unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+      ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+                                      MaxDisp, false, UncondBr));
+      BBInfo[UserMBB->getNumber()].Size += Delta;
+      adjustBBOffsetsAfter(UserMBB);
+      return;
+    }
+  }
 
-      // Remember the last IT instruction.
-      if (MI->getOpcode() == ARM::t2IT)
-        LastIT = MI;
+  // What a big block.  Find a place within the block to split it.  This is a
+  // little tricky on Thumb1 since instructions are 2 bytes and constant pool
+  // entries are 4 bytes: if instruction I references island CPE, and
+  // instruction I+1 references CPE', it will not work well to put CPE as far
+  // forward as possible, since then CPE' cannot immediately follow it (that
+  // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
+  // need to create a new island.  So, we make a first guess, then walk through
+  // the instructions between the one currently being looked at and the
+  // possible insertion point, and make sure any other instructions that
+  // reference CPEs will be able to use the same island area; if not, we back
+  // up the insertion point.
+
+  // Try to split the block so it's fully aligned.  Compute the latest split
+  // point where we can add a 4-byte branch instruction, and then
+  // WorstCaseAlign to LogAlign.
+  unsigned LogAlign = MF->getAlignment();
+  assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
+  unsigned KnownBits = UserBBI.internalKnownBits();
+  unsigned UPad = UnknownPadding(LogAlign, KnownBits);
+  unsigned BaseInsertOffset = UserOffset + U.getMaxDisp();
+  DEBUG(dbgs() << format("Split in middle of big block before %#x",
+                         BaseInsertOffset));
+
+  // Account for alignment and unknown padding.
+  BaseInsertOffset &= ~((1u << LogAlign) - 1);
+  BaseInsertOffset -= UPad;
+
+  // The 4 in the following is for the unconditional branch we'll be inserting
+  // (allows for long branch on Thumb1).  Alignment of the island is handled
+  // inside isOffsetInRange.
+  BaseInsertOffset -= 4;
+
+  DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
+               << " la=" << LogAlign
+               << " kb=" << KnownBits
+               << " up=" << UPad << '\n');
+
+  // This could point off the end of the block if we've already got constant
+  // pool entries following this block; only the last one is in the water list.
+  // Back past any possible branches (allow for a conditional and a maximally
+  // long unconditional).
+  if (BaseInsertOffset >= BBInfo[UserMBB->getNumber()+1].Offset)
+    BaseInsertOffset = BBInfo[UserMBB->getNumber()+1].Offset -
+      (isThumb1 ? 6 : 8);
+  unsigned EndInsertOffset =
+    WorstCaseAlign(BaseInsertOffset + 4, LogAlign, KnownBits) +
+    CPEMI->getOperand(2).getImm();
+  MachineBasicBlock::iterator MI = UserMI;
+  ++MI;
+  unsigned CPUIndex = CPUserIndex+1;
+  unsigned NumCPUsers = CPUsers.size();
+  MachineInstr *LastIT = 0;
+  for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+       Offset < BaseInsertOffset;
+       Offset += TII->GetInstSizeInBytes(MI),
+       MI = llvm::next(MI)) {
+    if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+      CPUser &U = CPUsers[CPUIndex];
+      if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
+        // Shift intertion point by one unit of alignment so it is within reach.
+        BaseInsertOffset -= 1u << LogAlign;
+        EndInsertOffset  -= 1u << LogAlign;
+      }
+      // This is overly conservative, as we don't account for CPEMIs being
+      // reused within the block, but it doesn't matter much.  Also assume CPEs
+      // are added in order with alignment padding.  We may eventually be able
+      // to pack the aligned CPEs better.
+      EndInsertOffset = RoundUpToAlignment(EndInsertOffset,
+                                           1u << getCPELogAlign(U.CPEMI)) +
+        U.CPEMI->getOperand(2).getImm();
+      CPUIndex++;
     }
 
-    DEBUG(errs() << "Split in middle of big block\n");
-    --MI;
+    // Remember the last IT instruction.
+    if (MI->getOpcode() == ARM::t2IT)
+      LastIT = MI;
+  }
+
+  --MI;
 
-    // Avoid splitting an IT block.
-    if (LastIT) {
-      unsigned PredReg = 0;
-      ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
-      if (CC != ARMCC::AL)
-        MI = LastIT;
-    }
-    NewMBB = SplitBlockBeforeInstr(MI);
+  // Avoid splitting an IT block.
+  if (LastIT) {
+    unsigned PredReg = 0;
+    ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
+    if (CC != ARMCC::AL)
+      MI = LastIT;
   }
+  NewMBB = splitBlockBeforeInstr(MI);
 }
 
-/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// handleConstantPoolUser - Analyze the specified user, checking to see if it
 /// is out-of-range.  If so, pick up the constant pool value and move it some
 /// place in-range.  Return true if we changed any addresses (thus must run
 /// another pass of branch lengthening), false otherwise.
-bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
-                                                unsigned CPUserIndex) {
+bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
   CPUser &U = CPUsers[CPUserIndex];
   MachineInstr *UserMI = U.MI;
   MachineInstr *CPEMI  = U.CPEMI;
   unsigned CPI = CPEMI->getOperand(1).getIndex();
   unsigned Size = CPEMI->getOperand(2).getImm();
-  // Compute this only once, it's expensive.  The 4 or 8 is the value the
-  // hardware keeps in the PC.
-  unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+  // Compute this only once, it's expensive.
+  unsigned UserOffset = getUserOffset(U);
 
   // See if the current entry is within range, or there is a clone of it
   // in range.
-  int result = LookForExistingCPEntry(U, UserOffset);
+  int result = findInRangeCPEntry(U, UserOffset);
   if (result==1) return false;
   else if (result==2) return true;
 
@@ -1260,11 +1400,11 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
   unsigned ID = AFI->createPICLabelUId();
 
   // Look for water where we can place this CPE.
-  MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+  MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
   MachineBasicBlock *NewMBB;
   water_iterator IP;
-  if (LookForWater(U, UserOffset, IP)) {
-    DEBUG(errs() << "found water in range\n");
+  if (findAvailableWater(U, UserOffset, IP)) {
+    DEBUG(dbgs() << "Found water in range\n");
     MachineBasicBlock *WaterBB = *IP;
 
     // If the original WaterList entry was "new water" on this iteration,
@@ -1279,10 +1419,10 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
 
   } else {
     // No water found.
-    DEBUG(errs() << "No water found\n");
-    CreateNewWater(CPUserIndex, UserOffset, NewMBB);
+    DEBUG(dbgs() << "No water found\n");
+    createNewWater(CPUserIndex, UserOffset, NewMBB);
 
-    // SplitBlockBeforeInstr adds to WaterList, which is important when it is
+    // splitBlockBeforeInstr adds to WaterList, which is important when it is
     // called while handling branches so that the water will be seen on the
     // next iteration for constant pools, but in this context, we don't want
     // it.  Check for this so it will be removed from the WaterList.
@@ -1304,13 +1444,13 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
     WaterList.erase(IP);
 
   // Okay, we know we can put an island before NewMBB now, do it!
-  MF.insert(NewMBB, NewIsland);
+  MF->insert(NewMBB, NewIsland);
 
   // Update internal data structures to account for the newly inserted MBB.
-  UpdateForInsertedWaterBlock(NewIsland);
+  updateForInsertedWaterBlock(NewIsland);
 
   // Decrement the old entry, and remove it if refcount becomes 0.
-  DecrementOldEntry(CPI, CPEMI);
+  decrementCPEReferenceCount(CPI, CPEMI);
 
   // Now that we have an island to add the CPE to, clone the original CPE and
   // add it to the island.
@@ -1320,13 +1460,12 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
   CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
   ++NumCPEs;
 
-  BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
-  // Compensate for .align 2 in thumb mode.
-  if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
-    Size += 2;
+  // Mark the basic block as aligned as required by the const-pool entry.
+  NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
+
   // Increase the size of the island block to account for the new entry.
-  BBSizes[NewIsland->getNumber()] += Size;
-  AdjustBBOffsetsAfter(NewIsland, Size);
+  BBInfo[NewIsland->getNumber()].Size += Size;
+  adjustBBOffsetsAfter(llvm::prior(MachineFunction::iterator(NewIsland)));
 
   // Finally, change the CPI in the instruction operand to be ID.
   for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1335,31 +1474,30 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
       break;
     }
 
-  DEBUG(errs() << "  Moved CPE to #" << ID << " CPI=" << CPI
-           << '\t' << *UserMI);
+  DEBUG(dbgs() << "  Moved CPE to #" << ID << " CPI=" << CPI
+        << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
 
   return true;
 }
 
-/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
 /// sizes and offsets of impacted basic blocks.
-void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
   MachineBasicBlock *CPEBB = CPEMI->getParent();
   unsigned Size = CPEMI->getOperand(2).getImm();
   CPEMI->eraseFromParent();
-  BBSizes[CPEBB->getNumber()] -= Size;
+  BBInfo[CPEBB->getNumber()].Size -= Size;
   // All succeeding offsets have the current size value added in, fix this.
   if (CPEBB->empty()) {
-    // In thumb1 mode, the size of island may be padded by two to compensate for
-    // the alignment requirement.  Then it will now be 2 when the block is
-    // empty, so fix this.
-    // All succeeding offsets have the current size value added in, fix this.
-    if (BBSizes[CPEBB->getNumber()] != 0) {
-      Size += BBSizes[CPEBB->getNumber()];
-      BBSizes[CPEBB->getNumber()] = 0;
-    }
-  }
-  AdjustBBOffsetsAfter(CPEBB, -Size);
+    BBInfo[CPEBB->getNumber()].Size = 0;
+
+    // This block no longer needs to be aligned. <rdar://problem/10534709>.
+    CPEBB->setAlignment(0);
+  } else
+    // Entries are sorted by descending alignment, so realign from the front.
+    CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
+
+  adjustBBOffsetsAfter(CPEBB);
   // An island has only one predecessor BB and one successor BB. Check if
   // this BB's predecessor jumps directly to this BB's successor. This
   // shouldn't happen currently.
@@ -1367,15 +1505,15 @@ void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
   // FIXME: remove the empty blocks after all the work is done?
 }
 
-/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// removeUnusedCPEntries - Remove constant pool entries whose refcounts
 /// are zero.
-bool ARMConstantIslands::RemoveUnusedCPEntries() {
+bool ARMConstantIslands::removeUnusedCPEntries() {
   unsigned MadeChange = false;
   for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
       std::vector<CPEntry> &CPEs = CPEntries[i];
       for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
         if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
-          RemoveDeadCPEMI(CPEs[j].CPEMI);
+          removeDeadCPEMI(CPEs[j].CPEMI);
           CPEs[j].CPEMI = NULL;
           MadeChange = true;
         }
@@ -1384,18 +1522,18 @@ bool ARMConstantIslands::RemoveUnusedCPEntries() {
   return MadeChange;
 }
 
-/// BBIsInRange - Returns true if the distance between specific MI and
+/// isBBInRange - Returns true if the distance between specific MI and
 /// specific BB can fit in MI's displacement field.
-bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
                                      unsigned MaxDisp) {
   unsigned PCAdj      = isThumb ? 4 : 8;
-  unsigned BrOffset   = GetOffsetOf(MI) + PCAdj;
-  unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+  unsigned BrOffset   = getOffsetOf(MI) + PCAdj;
+  unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
 
-  DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+  DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
                << " from BB#" << MI->getParent()->getNumber()
                << " max delta=" << MaxDisp
-               << " from " << GetOffsetOf(MI) << " to " << DestOffset
+               << " from " << getOffsetOf(MI) << " to " << DestOffset
                << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
 
   if (BrOffset <= DestOffset) {
@@ -1409,50 +1547,50 @@ bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
   return false;
 }
 
-/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// fixupImmediateBr - Fix up an immediate branch whose destination is too far
 /// away to fit in its displacement field.
-bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
 
   // Check to see if the DestBB is already in-range.
-  if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+  if (isBBInRange(MI, DestBB, Br.MaxDisp))
     return false;
 
   if (!Br.isCond)
-    return FixUpUnconditionalBr(MF, Br);
-  return FixUpConditionalBr(MF, Br);
+    return fixupUnconditionalBr(Br);
+  return fixupConditionalBr(Br);
 }
 
-/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
 /// too far away to fit in its displacement field. If the LR register has been
 /// spilled in the epilogue, then we can use BL to implement a far jump.
 /// Otherwise, add an intermediate branch instruction to a branch.
 bool
-ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *MBB = MI->getParent();
   if (!isThumb1)
-    llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!");
+    llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
 
   // Use BL to implement far jump.
   Br.MaxDisp = (1 << 21) * 2;
   MI->setDesc(TII->get(ARM::tBfar));
-  BBSizes[MBB->getNumber()] += 2;
-  AdjustBBOffsetsAfter(MBB, 2);
+  BBInfo[MBB->getNumber()].Size += 2;
+  adjustBBOffsetsAfter(MBB);
   HasFarJump = true;
   ++NumUBrFixed;
 
-  DEBUG(errs() << "  Changed B to long jump " << *MI);
+  DEBUG(dbgs() << "  Changed B to long jump " << *MI);
 
   return true;
 }
 
-/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// fixupConditionalBr - Fix up a conditional branch whose destination is too
 /// far away to fit in its displacement field. It is converted to an inverse
 /// conditional branch + an unconditional branch to the destination.
 bool
-ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
   MachineInstr *MI = Br.MI;
   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
 
@@ -1486,8 +1624,8 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
       // bne L2
       // b   L1
       MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
-      if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
-        DEBUG(errs() << "  Invert Bcc condition and swap its destination with "
+      if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
+        DEBUG(dbgs() << "  Invert Bcc condition and swap its destination with "
                      << *BMI);
         BMI->getOperand(0).setMBB(DestBB);
         MI->getOperand(0).setMBB(NewDest);
@@ -1498,19 +1636,17 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
   }
 
   if (NeedSplit) {
-    SplitBlockBeforeInstr(MI);
+    splitBlockBeforeInstr(MI);
     // No need for the branch to the next block. We're adding an unconditional
     // branch to the destination.
     int delta = TII->GetInstSizeInBytes(&MBB->back());
-    BBSizes[MBB->getNumber()] -= delta;
-    MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
-    AdjustBBOffsetsAfter(SplitBB, -delta);
+    BBInfo[MBB->getNumber()].Size -= delta;
     MBB->back().eraseFromParent();
-    // BBOffsets[SplitBB] is wrong temporarily, fixed below
+    // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
   }
   MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
 
-  DEBUG(errs() << "  Insert B to BB#" << DestBB->getNumber()
+  DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
                << " also invert condition and change dest. to BB#"
                << NextBB->getNumber() << "\n");
 
@@ -1519,30 +1655,27 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
   BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
     .addMBB(NextBB).addImm(CC).addReg(CCReg);
   Br.MI = &MBB->back();
-  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+  BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
   if (isThumb)
     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
             .addImm(ARMCC::AL).addReg(0);
   else
     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
-  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+  BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
   unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
   ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
 
   // Remove the old conditional branch.  It may or may not still be in MBB.
-  BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+  BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
   MI->eraseFromParent();
-
-  // The net size change is an addition of one unconditional branch.
-  int delta = TII->GetInstSizeInBytes(&MBB->back());
-  AdjustBBOffsetsAfter(MBB, delta);
+  adjustBBOffsetsAfter(MBB);
   return true;
 }
 
-/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills
 /// LR / restores LR to pc. FIXME: This is done here because it's only possible
 /// to do this if tBfar is not used.
-bool ARMConstantIslands::UndoLRSpillRestore() {
+bool ARMConstantIslands::undoLRSpillRestore() {
   bool MadeChange = false;
   for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
     MachineInstr *MI = PushPopMIs[i];
@@ -1561,7 +1694,26 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
   return MadeChange;
 }
 
-bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
+// below may shrink MI.
+bool
+ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const {
+  switch(MI->getOpcode()) {
+    // optimizeThumb2Instructions.
+    case ARM::t2LEApcrel:
+    case ARM::t2LDRpci:
+    // optimizeThumb2Branches.
+    case ARM::t2B:
+    case ARM::t2Bcc:
+    case ARM::tBcc:
+    // optimizeThumb2JumpTables.
+    case ARM::t2BR_JT:
+      return true;
+  }
+  return false;
+}
+
+bool ARMConstantIslands::optimizeThumb2Instructions() {
   bool MadeChange = false;
 
   // Shrink ADR and LDR from constantpool.
@@ -1592,25 +1744,31 @@ bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
     if (!NewOpc)
       continue;
 
-    unsigned UserOffset = GetOffsetOf(U.MI) + 4;
+    unsigned UserOffset = getUserOffset(U);
     unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+
+    // Be conservative with inline asm.
+    if (!U.KnownAlignment)
+      MaxOffs -= 2;
+
     // FIXME: Check if offset is multiple of scale if scale is not 4.
-    if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+    if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+      DEBUG(dbgs() << "Shrink: " << *U.MI);
       U.MI->setDesc(TII->get(NewOpc));
       MachineBasicBlock *MBB = U.MI->getParent();
-      BBSizes[MBB->getNumber()] -= 2;
-      AdjustBBOffsetsAfter(MBB, -2);
+      BBInfo[MBB->getNumber()].Size -= 2;
+      adjustBBOffsetsAfter(MBB);
       ++NumT2CPShrunk;
       MadeChange = true;
     }
   }
 
-  MadeChange |= OptimizeThumb2Branches(MF);
-  MadeChange |= OptimizeThumb2JumpTables(MF);
+  MadeChange |= optimizeThumb2Branches();
+  MadeChange |= optimizeThumb2JumpTables();
   return MadeChange;
 }
 
-bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+bool ARMConstantIslands::optimizeThumb2Branches() {
   bool MadeChange = false;
 
   for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
@@ -1636,11 +1794,12 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
     if (NewOpc) {
       unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
       MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
-      if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+      if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
+        DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
         Br.MI->setDesc(TII->get(NewOpc));
         MachineBasicBlock *MBB = Br.MI->getParent();
-        BBSizes[MBB->getNumber()] -= 2;
-        AdjustBBOffsetsAfter(MBB, -2);
+        BBInfo[MBB->getNumber()].Size -= 2;
+        adjustBBOffsetsAfter(MBB);
         ++NumT2BrShrunk;
         MadeChange = true;
       }
@@ -1650,9 +1809,14 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
     if (Opcode != ARM::tBcc)
       continue;
 
+    // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
+    // so this transformation is not safe.
+    if (!Br.MI->killsRegister(ARM::CPSR))
+      continue;
+
     NewOpc = 0;
     unsigned PredReg = 0;
-    ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+    ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
     if (Pred == ARMCC::EQ)
       NewOpc = ARM::tCBZ;
     else if (Pred == ARMCC::NE)
@@ -1662,27 +1826,28 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
     MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
     // Check if the distance is within 126. Subtract starting offset by 2
     // because the cmp will be eliminated.
-    unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
-    unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+    unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
+    unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
     if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
       MachineBasicBlock::iterator CmpMI = Br.MI;
       if (CmpMI != Br.MI->getParent()->begin()) {
         --CmpMI;
         if (CmpMI->getOpcode() == ARM::tCMPi8) {
           unsigned Reg = CmpMI->getOperand(0).getReg();
-          Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+          Pred = getInstrPredicate(CmpMI, PredReg);
           if (Pred == ARMCC::AL &&
               CmpMI->getOperand(1).getImm() == 0 &&
               isARMLowRegister(Reg)) {
             MachineBasicBlock *MBB = Br.MI->getParent();
+            DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
             MachineInstr *NewBR =
               BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
               .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
             CmpMI->eraseFromParent();
             Br.MI->eraseFromParent();
             Br.MI = NewBR;
-            BBSizes[MBB->getNumber()] -= 2;
-            AdjustBBOffsetsAfter(MBB, -2);
+            BBInfo[MBB->getNumber()].Size -= 2;
+            adjustBBOffsetsAfter(MBB);
             ++NumCBZ;
             MadeChange = true;
           }
@@ -1694,14 +1859,14 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
   return MadeChange;
 }
 
-/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
 /// jumptables when it's possible.
-bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::optimizeThumb2JumpTables() {
   bool MadeChange = false;
 
   // FIXME: After the tables are shrunk, can we get rid some of the
   // constantpool tables?
-  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   if (MJTI == 0) return false;
 
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1709,18 +1874,18 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
     MachineInstr *MI = T2JumpTables[i];
     const MCInstrDesc &MCID = MI->getDesc();
     unsigned NumOps = MCID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
     MachineOperand JTOP = MI->getOperand(JTOpIdx);
     unsigned JTI = JTOP.getIndex();
     assert(JTI < JT.size());
 
     bool ByteOk = true;
     bool HalfWordOk = true;
-    unsigned JTOffset = GetOffsetOf(MI) + 4;
+    unsigned JTOffset = getOffsetOf(MI) + 4;
     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
     for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
       MachineBasicBlock *MBB = JTBBs[j];
-      unsigned DstOffset = BBOffsets[MBB->getNumber()];
+      unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
       // Negative offset is not ok. FIXME: We should change BB layout to make
       // sure all the branches are forward.
       if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
@@ -1791,11 +1956,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
       if (!OptOk)
         continue;
 
+      DEBUG(dbgs() << "Shrink JT: " << *MI << "     addr: " << *AddrMI
+                   << "      lea: " << *LeaMI);
       unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
       MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
         .addReg(IdxReg, getKillRegState(IdxRegKill))
         .addJumpTableIndex(JTI, JTOP.getTargetFlags())
         .addImm(MI->getOperand(JTOpIdx+1).getImm());
+      DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
       // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
       // is 2-byte aligned. For now, asm printer will fix it up.
       unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
@@ -1808,8 +1976,8 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
       MI->eraseFromParent();
 
       int delta = OrigSize - NewSize;
-      BBSizes[MBB->getNumber()] -= delta;
-      AdjustBBOffsetsAfter(MBB, -delta);
+      BBInfo[MBB->getNumber()].Size -= delta;
+      adjustBBOffsetsAfter(MBB);
 
       ++NumTBs;
       MadeChange = true;
@@ -1819,12 +1987,12 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
   return MadeChange;
 }
 
-/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
 /// jump tables always branch forwards, since that's what tbb and tbh need.
-bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+bool ARMConstantIslands::reorderThumb2JumpTables() {
   bool MadeChange = false;
 
-  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   if (MJTI == 0) return false;
 
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -1832,7 +2000,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
     MachineInstr *MI = T2JumpTables[i];
     const MCInstrDesc &MCID = MI->getDesc();
     unsigned NumOps = MCID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
+    unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 3 : 2);
     MachineOperand JTOP = MI->getOperand(JTOpIdx);
     unsigned JTI = JTOP.getIndex();
     assert(JTI < JT.size());
@@ -1850,7 +2018,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
         // The destination precedes the switch. Try to move the block forward
         // so we have a positive offset.
         MachineBasicBlock *NewBB =
-          AdjustJTTargetBlockForward(MBB, MI->getParent());
+          adjustJTTargetBlockForward(MBB, MI->getParent());
         if (NewBB)
           MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
         MadeChange = true;
@@ -1862,10 +2030,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
 }
 
 MachineBasicBlock *ARMConstantIslands::
-AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
-{
-  MachineFunction &MF = *BB->getParent();
-
+adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
   // If the destination block is terminated by an unconditional branch,
   // try to move it; otherwise, create a new block following the jump
   // table that branches back to the actual target. This is a very simple
@@ -1882,22 +2047,22 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
   // If the block ends in an unconditional branch, move it. The prior block
   // has to have an analyzable terminator for us to move this one. Be paranoid
   // and make sure we're not trying to move the entry block of the function.
-  if (!B && Cond.empty() && BB != MF.begin() &&
+  if (!B && Cond.empty() && BB != MF->begin() &&
       !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
     BB->moveAfter(JTBB);
     OldPrior->updateTerminator();
     BB->updateTerminator();
     // Update numbering to account for the block being moved.
-    MF.RenumberBlocks();
+    MF->RenumberBlocks();
     ++NumJTMoved;
     return NULL;
   }
 
   // Create a new MBB for the code after the jump BB.
   MachineBasicBlock *NewBB =
-    MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+    MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
   MachineFunction::iterator MBBI = JTBB; ++MBBI;
-  MF.insert(MBBI, NewBB);
+  MF->insert(MBBI, NewBB);
 
   // Add an unconditional branch from NewBB to BB.
   // There doesn't seem to be meaningful DebugInfo available; this doesn't
@@ -1907,7 +2072,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
           .addImm(ARMCC::AL).addReg(0);
 
   // Update internal data structures to account for the newly inserted MBB.
-  MF.RenumberBlocks(NewBB);
+  MF->RenumberBlocks(NewBB);
 
   // Update the CFG.
   NewBB->addSuccessor(BB);
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index aadfd4779db3..fa3226e37eb9 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -1,4 +1,4 @@
-//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//===-- ARMConstantPoolValue.cpp - ARM constantpool value -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -48,7 +48,6 @@ ARMConstantPoolValue::~ARMConstantPoolValue() {}
 
 const char *ARMConstantPoolValue::getModifierText() const {
   switch (Modifier) {
-  default: llvm_unreachable("Unknown modifier!");
     // FIXME: Are these case sensitive? It'd be nice to lower-case all the
     // strings if that's legal.
   case ARMCP::no_modifier: return "none";
@@ -58,12 +57,12 @@ const char *ARMConstantPoolValue::getModifierText() const {
   case ARMCP::GOTTPOFF:    return "gottpoff";
   case ARMCP::TPOFF:       return "tpoff";
   }
+  llvm_unreachable("Unknown modifier!");
 }
 
 int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
                                                     unsigned Alignment) {
-  assert(false && "Shouldn't be calling this directly!");
-  return -1;
+  llvm_unreachable("Shouldn't be calling this directly!");
 }
 
 void
@@ -315,5 +314,6 @@ void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) {
 }
 
 void ARMConstantPoolMBB::print(raw_ostream &O) const {
+  O << "BB#" << MBB->getNumber();
   ARMConstantPoolValue::print(O);
 }
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 0d0def32b7d8..6b98d446b003 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -1,4 +1,4 @@
-//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//===-- ARMConstantPoolValue.h - ARM constantpool value ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp
index 51e68b4553ff..f671317d0948 100644
--- a/lib/Target/ARM/ARMELFWriterInfo.cpp
+++ b/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -41,43 +41,38 @@ unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
   case ARM::reloc_arm_machine_cp_entry:
   case ARM::reloc_arm_jt_base:
   case ARM::reloc_arm_pic_jt:
-    assert(0 && "unsupported ARM relocation type"); break;
-    
-  case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break;
-  case ARM::reloc_arm_movt:   return ELF::R_ARM_MOVT_ABS; break;
-  case ARM::reloc_arm_movw:   return ELF::R_ARM_MOVW_ABS_NC; break;
+    llvm_unreachable("unsupported ARM relocation type");
+
+  case ARM::reloc_arm_branch: return ELF::R_ARM_CALL;
+  case ARM::reloc_arm_movt:   return ELF::R_ARM_MOVT_ABS;
+  case ARM::reloc_arm_movw:   return ELF::R_ARM_MOVW_ABS_NC;
   default:
-    llvm_unreachable("unknown ARM relocation type"); break;
+    llvm_unreachable("unknown ARM relocation type");
   }
-  return 0;
 }
 
 long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
                                                     long int Modifier) const {
-  assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented");
-  return 0;
+  llvm_unreachable("ARMELFWriterInfo::getDefaultAddendForRelTy() not "
+                   "implemented");
 }
 
 unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
-  assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented");
-  return 0;
+  llvm_unreachable("ARMELFWriterInfo::getRelocationTySize() not implemented");
 }
 
 bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
-  assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented");
-  return 1;
+  llvm_unreachable("ARMELFWriterInfo::isPCRelativeRel() not implemented");
 }
 
 unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
-  assert(0 &&
-         "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
-  return 0;
+  llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not "
+                   "implemented");
 }
 
 long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
                                              unsigned RelOffset,
                                              unsigned RelTy) const {
-  assert(0 &&
-         "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
-  return 0;
+  llvm_unreachable("ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not "
+                   "implemented");
 }
diff --git a/lib/Target/ARM/ARMELFWriterInfo.h b/lib/Target/ARM/ARMELFWriterInfo.h
index 1c4e5329ac61..6a84f8ac4235 100644
--- a/lib/Target/ARM/ARMELFWriterInfo.h
+++ b/lib/Target/ARM/ARMELFWriterInfo.h
@@ -17,6 +17,7 @@
 #include "llvm/Target/TargetELFWriterInfo.h"
 
 namespace llvm {
+  class TargetMachine;
 
   class ARMELFWriterInfo : public TargetELFWriterInfo {
   public:
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 7872cb90f4e7..5fc0360528cc 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1,4 +1,4 @@
-//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,7 +19,6 @@
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMMachineFunctionInfo.h"
-#include "ARMRegisterInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -61,7 +60,7 @@ namespace {
     void ExpandVST(MachineBasicBlock::iterator &MBBI);
     void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
     void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
-                    unsigned Opc, bool IsExt, unsigned NumRegs);
+                    unsigned Opc, bool IsExt);
     void ExpandMOV32BitImm(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator &MBBI);
   };
@@ -99,13 +98,20 @@ namespace {
   // Entries for NEON load/store information table.  The table is sorted by
   // PseudoOpc for fast binary-search lookups.
   struct NEONLdStTableEntry {
-    unsigned PseudoOpc;
-    unsigned RealOpc;
+    uint16_t PseudoOpc;
+    uint16_t RealOpc;
     bool IsLoad;
-    bool HasWriteBack;
+    bool isUpdating;
+    bool hasWritebackOperand;
     NEONRegSpacing RegSpacing;
     unsigned char NumRegs; // D registers loaded or stored
     unsigned char RegElts; // elements per D register; used for lane ops
+    // FIXME: Temporary flag to denote whether the real instruction takes
+    // a single register (like the encoding) or all of the registers in
+    // the list (like the asm syntax and the isel DAG). When all definitions
+    // are converted to take only the single encoded register, this will
+    // go away.
+    bool copyAllListRegs;
 
     // Comparison methods for binary search of the table.
     bool operator<(const NEONLdStTableEntry &TE) const {
@@ -122,243 +128,203 @@ namespace {
 }
 
 static const NEONLdStTableEntry NEONLdStTable[] = {
-{ ARM::VLD1DUPq16Pseudo,     ARM::VLD1DUPq16,     true, false, SingleSpc, 2, 4},
-{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true,  SingleSpc, 2, 4},
-{ ARM::VLD1DUPq32Pseudo,     ARM::VLD1DUPq32,     true, false, SingleSpc, 2, 2},
-{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true,  SingleSpc, 2, 2},
-{ ARM::VLD1DUPq8Pseudo,      ARM::VLD1DUPq8,      true, false, SingleSpc, 2, 8},
-{ ARM::VLD1DUPq8Pseudo_UPD,  ARM::VLD1DUPq8_UPD,  true, true,  SingleSpc, 2, 8},
-
-{ ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, EvenDblSpc, 1, 4 },
-{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true,  EvenDblSpc, 1, 4 },
-{ ARM::VLD1LNq32Pseudo,     ARM::VLD1LNd32,     true, false, EvenDblSpc, 1, 2 },
-{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true,  EvenDblSpc, 1, 2 },
-{ ARM::VLD1LNq8Pseudo,      ARM::VLD1LNd8,      true, false, EvenDblSpc, 1, 8 },
-{ ARM::VLD1LNq8Pseudo_UPD,  ARM::VLD1LNd8_UPD,  true, true,  EvenDblSpc, 1, 8 },
-
-{ ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, SingleSpc,  4, 1 },
-{ ARM::VLD1d64QPseudo_UPD,  ARM::VLD1d64Q_UPD, true,  true,  SingleSpc,  4, 1 },
-{ ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, SingleSpc,  3, 1 },
-{ ARM::VLD1d64TPseudo_UPD,  ARM::VLD1d64T_UPD, true,  true,  SingleSpc,  3, 1 },
-
-{ ARM::VLD1q16Pseudo,       ARM::VLD1q16,      true,  false, SingleSpc,  2, 4 },
-{ ARM::VLD1q16Pseudo_UPD,   ARM::VLD1q16_UPD,  true,  true,  SingleSpc,  2, 4 },
-{ ARM::VLD1q32Pseudo,       ARM::VLD1q32,      true,  false, SingleSpc,  2, 2 },
-{ ARM::VLD1q32Pseudo_UPD,   ARM::VLD1q32_UPD,  true,  true,  SingleSpc,  2, 2 },
-{ ARM::VLD1q64Pseudo,       ARM::VLD1q64,      true,  false, SingleSpc,  2, 1 },
-{ ARM::VLD1q64Pseudo_UPD,   ARM::VLD1q64_UPD,  true,  true,  SingleSpc,  2, 1 },
-{ ARM::VLD1q8Pseudo,        ARM::VLD1q8,       true,  false, SingleSpc,  2, 8 },
-{ ARM::VLD1q8Pseudo_UPD,    ARM::VLD1q8_UPD,   true,  true,  SingleSpc,  2, 8 },
-
-{ ARM::VLD2DUPd16Pseudo,     ARM::VLD2DUPd16,     true, false, SingleSpc, 2, 4},
-{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true,  SingleSpc, 2, 4},
-{ ARM::VLD2DUPd32Pseudo,     ARM::VLD2DUPd32,     true, false, SingleSpc, 2, 2},
-{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true,  SingleSpc, 2, 2},
-{ ARM::VLD2DUPd8Pseudo,      ARM::VLD2DUPd8,      true, false, SingleSpc, 2, 8},
-{ ARM::VLD2DUPd8Pseudo_UPD,  ARM::VLD2DUPd8_UPD,  true, true,  SingleSpc, 2, 8},
-
-{ ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, SingleSpc,  2, 4 },
-{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true,  SingleSpc,  2, 4 },
-{ ARM::VLD2LNd32Pseudo,     ARM::VLD2LNd32,     true, false, SingleSpc,  2, 2 },
-{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true,  SingleSpc,  2, 2 },
-{ ARM::VLD2LNd8Pseudo,      ARM::VLD2LNd8,      true, false, SingleSpc,  2, 8 },
-{ ARM::VLD2LNd8Pseudo_UPD,  ARM::VLD2LNd8_UPD,  true, true,  SingleSpc,  2, 8 },
-{ ARM::VLD2LNq16Pseudo,     ARM::VLD2LNq16,     true, false, EvenDblSpc, 2, 4 },
-{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true,  EvenDblSpc, 2, 4 },
-{ ARM::VLD2LNq32Pseudo,     ARM::VLD2LNq32,     true, false, EvenDblSpc, 2, 2 },
-{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true,  EvenDblSpc, 2, 2 },
-
-{ ARM::VLD2d16Pseudo,       ARM::VLD2d16,      true,  false, SingleSpc,  2, 4 },
-{ ARM::VLD2d16Pseudo_UPD,   ARM::VLD2d16_UPD,  true,  true,  SingleSpc,  2, 4 },
-{ ARM::VLD2d32Pseudo,       ARM::VLD2d32,      true,  false, SingleSpc,  2, 2 },
-{ ARM::VLD2d32Pseudo_UPD,   ARM::VLD2d32_UPD,  true,  true,  SingleSpc,  2, 2 },
-{ ARM::VLD2d8Pseudo,        ARM::VLD2d8,       true,  false, SingleSpc,  2, 8 },
-{ ARM::VLD2d8Pseudo_UPD,    ARM::VLD2d8_UPD,   true,  true,  SingleSpc,  2, 8 },
-
-{ ARM::VLD2q16Pseudo,       ARM::VLD2q16,      true,  false, SingleSpc,  4, 4 },
-{ ARM::VLD2q16Pseudo_UPD,   ARM::VLD2q16_UPD,  true,  true,  SingleSpc,  4, 4 },
-{ ARM::VLD2q32Pseudo,       ARM::VLD2q32,      true,  false, SingleSpc,  4, 2 },
-{ ARM::VLD2q32Pseudo_UPD,   ARM::VLD2q32_UPD,  true,  true,  SingleSpc,  4, 2 },
-{ ARM::VLD2q8Pseudo,        ARM::VLD2q8,       true,  false, SingleSpc,  4, 8 },
-{ ARM::VLD2q8Pseudo_UPD,    ARM::VLD2q8_UPD,   true,  true,  SingleSpc,  4, 8 },
-
-{ ARM::VLD3DUPd16Pseudo,     ARM::VLD3DUPd16,     true, false, SingleSpc, 3, 4},
-{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true,  SingleSpc, 3, 4},
-{ ARM::VLD3DUPd32Pseudo,     ARM::VLD3DUPd32,     true, false, SingleSpc, 3, 2},
-{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true,  SingleSpc, 3, 2},
-{ ARM::VLD3DUPd8Pseudo,      ARM::VLD3DUPd8,      true, false, SingleSpc, 3, 8},
-{ ARM::VLD3DUPd8Pseudo_UPD,  ARM::VLD3DUPd8_UPD,  true, true,  SingleSpc, 3, 8},
-
-{ ARM::VLD3LNd16Pseudo,     ARM::VLD3LNd16,     true, false, SingleSpc,  3, 4 },
-{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true,  SingleSpc,  3, 4 },
-{ ARM::VLD3LNd32Pseudo,     ARM::VLD3LNd32,     true, false, SingleSpc,  3, 2 },
-{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true,  SingleSpc,  3, 2 },
-{ ARM::VLD3LNd8Pseudo,      ARM::VLD3LNd8,      true, false, SingleSpc,  3, 8 },
-{ ARM::VLD3LNd8Pseudo_UPD,  ARM::VLD3LNd8_UPD,  true, true,  SingleSpc,  3, 8 },
-{ ARM::VLD3LNq16Pseudo,     ARM::VLD3LNq16,     true, false, EvenDblSpc, 3, 4 },
-{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true,  EvenDblSpc, 3, 4 },
-{ ARM::VLD3LNq32Pseudo,     ARM::VLD3LNq32,     true, false, EvenDblSpc, 3, 2 },
-{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true,  EvenDblSpc, 3, 2 },
-
-{ ARM::VLD3d16Pseudo,       ARM::VLD3d16,      true,  false, SingleSpc,  3, 4 },
-{ ARM::VLD3d16Pseudo_UPD,   ARM::VLD3d16_UPD,  true,  true,  SingleSpc,  3, 4 },
-{ ARM::VLD3d32Pseudo,       ARM::VLD3d32,      true,  false, SingleSpc,  3, 2 },
-{ ARM::VLD3d32Pseudo_UPD,   ARM::VLD3d32_UPD,  true,  true,  SingleSpc,  3, 2 },
-{ ARM::VLD3d8Pseudo,        ARM::VLD3d8,       true,  false, SingleSpc,  3, 8 },
-{ ARM::VLD3d8Pseudo_UPD,    ARM::VLD3d8_UPD,   true,  true,  SingleSpc,  3, 8 },
-
-{ ARM::VLD3q16Pseudo_UPD,    ARM::VLD3q16_UPD, true,  true,  EvenDblSpc, 3, 4 },
-{ ARM::VLD3q16oddPseudo,     ARM::VLD3q16,     true,  false, OddDblSpc,  3, 4 },
-{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true,  true,  OddDblSpc,  3, 4 },
-{ ARM::VLD3q32Pseudo_UPD,    ARM::VLD3q32_UPD, true,  true,  EvenDblSpc, 3, 2 },
-{ ARM::VLD3q32oddPseudo,     ARM::VLD3q32,     true,  false, OddDblSpc,  3, 2 },
-{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true,  true,  OddDblSpc,  3, 2 },
-{ ARM::VLD3q8Pseudo_UPD,     ARM::VLD3q8_UPD,  true,  true,  EvenDblSpc, 3, 8 },
-{ ARM::VLD3q8oddPseudo,      ARM::VLD3q8,      true,  false, OddDblSpc,  3, 8 },
-{ ARM::VLD3q8oddPseudo_UPD,  ARM::VLD3q8_UPD,  true,  true,  OddDblSpc,  3, 8 },
-
-{ ARM::VLD4DUPd16Pseudo,     ARM::VLD4DUPd16,     true, false, SingleSpc, 4, 4},
-{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true,  SingleSpc, 4, 4},
-{ ARM::VLD4DUPd32Pseudo,     ARM::VLD4DUPd32,     true, false, SingleSpc, 4, 2},
-{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true,  SingleSpc, 4, 2},
-{ ARM::VLD4DUPd8Pseudo,      ARM::VLD4DUPd8,      true, false, SingleSpc, 4, 8},
-{ ARM::VLD4DUPd8Pseudo_UPD,  ARM::VLD4DUPd8_UPD,  true, true,  SingleSpc, 4, 8},
-
-{ ARM::VLD4LNd16Pseudo,     ARM::VLD4LNd16,     true, false, SingleSpc,  4, 4 },
-{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true,  SingleSpc,  4, 4 },
-{ ARM::VLD4LNd32Pseudo,     ARM::VLD4LNd32,     true, false, SingleSpc,  4, 2 },
-{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true,  SingleSpc,  4, 2 },
-{ ARM::VLD4LNd8Pseudo,      ARM::VLD4LNd8,      true, false, SingleSpc,  4, 8 },
-{ ARM::VLD4LNd8Pseudo_UPD,  ARM::VLD4LNd8_UPD,  true, true,  SingleSpc,  4, 8 },
-{ ARM::VLD4LNq16Pseudo,     ARM::VLD4LNq16,     true, false, EvenDblSpc, 4, 4 },
-{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true,  EvenDblSpc, 4, 4 },
-{ ARM::VLD4LNq32Pseudo,     ARM::VLD4LNq32,     true, false, EvenDblSpc, 4, 2 },
-{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true,  EvenDblSpc, 4, 2 },
-
-{ ARM::VLD4d16Pseudo,       ARM::VLD4d16,      true,  false, SingleSpc,  4, 4 },
-{ ARM::VLD4d16Pseudo_UPD,   ARM::VLD4d16_UPD,  true,  true,  SingleSpc,  4, 4 },
-{ ARM::VLD4d32Pseudo,       ARM::VLD4d32,      true,  false, SingleSpc,  4, 2 },
-{ ARM::VLD4d32Pseudo_UPD,   ARM::VLD4d32_UPD,  true,  true,  SingleSpc,  4, 2 },
-{ ARM::VLD4d8Pseudo,        ARM::VLD4d8,       true,  false, SingleSpc,  4, 8 },
-{ ARM::VLD4d8Pseudo_UPD,    ARM::VLD4d8_UPD,   true,  true,  SingleSpc,  4, 8 },
-
-{ ARM::VLD4q16Pseudo_UPD,    ARM::VLD4q16_UPD, true,  true,  EvenDblSpc, 4, 4 },
-{ ARM::VLD4q16oddPseudo,     ARM::VLD4q16,     true,  false, OddDblSpc,  4, 4 },
-{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true,  true,  OddDblSpc,  4, 4 },
-{ ARM::VLD4q32Pseudo_UPD,    ARM::VLD4q32_UPD, true,  true,  EvenDblSpc, 4, 2 },
-{ ARM::VLD4q32oddPseudo,     ARM::VLD4q32,     true,  false, OddDblSpc,  4, 2 },
-{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true,  true,  OddDblSpc,  4, 2 },
-{ ARM::VLD4q8Pseudo_UPD,     ARM::VLD4q8_UPD,  true,  true,  EvenDblSpc, 4, 8 },
-{ ARM::VLD4q8oddPseudo,      ARM::VLD4q8,      true,  false, OddDblSpc,  4, 8 },
-{ ARM::VLD4q8oddPseudo_UPD,  ARM::VLD4q8_UPD,  true,  true,  OddDblSpc,  4, 8 },
-
-{ ARM::VST1LNq16Pseudo,     ARM::VST1LNd16,    false, false, EvenDblSpc, 1, 4 },
-{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true,  EvenDblSpc, 1, 4 },
-{ ARM::VST1LNq32Pseudo,     ARM::VST1LNd32,    false, false, EvenDblSpc, 1, 2 },
-{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true,  EvenDblSpc, 1, 2 },
-{ ARM::VST1LNq8Pseudo,      ARM::VST1LNd8,     false, false, EvenDblSpc, 1, 8 },
-{ ARM::VST1LNq8Pseudo_UPD,  ARM::VST1LNd8_UPD, false, true,  EvenDblSpc, 1, 8 },
-
-{ ARM::VST1d64QPseudo,      ARM::VST1d64Q,     false, false, SingleSpc,  4, 1 },
-{ ARM::VST1d64QPseudo_UPD,  ARM::VST1d64Q_UPD, false, true,  SingleSpc,  4, 1 },
-{ ARM::VST1d64TPseudo,      ARM::VST1d64T,     false, false, SingleSpc,  3, 1 },
-{ ARM::VST1d64TPseudo_UPD,  ARM::VST1d64T_UPD, false, true,  SingleSpc,  3, 1 },
-
-{ ARM::VST1q16Pseudo,       ARM::VST1q16,      false, false, SingleSpc,  2, 4 },
-{ ARM::VST1q16Pseudo_UPD,   ARM::VST1q16_UPD,  false, true,  SingleSpc,  2, 4 },
-{ ARM::VST1q32Pseudo,       ARM::VST1q32,      false, false, SingleSpc,  2, 2 },
-{ ARM::VST1q32Pseudo_UPD,   ARM::VST1q32_UPD,  false, true,  SingleSpc,  2, 2 },
-{ ARM::VST1q64Pseudo,       ARM::VST1q64,      false, false, SingleSpc,  2, 1 },
-{ ARM::VST1q64Pseudo_UPD,   ARM::VST1q64_UPD,  false, true,  SingleSpc,  2, 1 },
-{ ARM::VST1q8Pseudo,        ARM::VST1q8,       false, false, SingleSpc,  2, 8 },
-{ ARM::VST1q8Pseudo_UPD,    ARM::VST1q8_UPD,   false, true,  SingleSpc,  2, 8 },
-
-{ ARM::VST2LNd16Pseudo,     ARM::VST2LNd16,     false, false, SingleSpc, 2, 4 },
-{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true,  SingleSpc, 2, 4 },
-{ ARM::VST2LNd32Pseudo,     ARM::VST2LNd32,     false, false, SingleSpc, 2, 2 },
-{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true,  SingleSpc, 2, 2 },
-{ ARM::VST2LNd8Pseudo,      ARM::VST2LNd8,      false, false, SingleSpc, 2, 8 },
-{ ARM::VST2LNd8Pseudo_UPD,  ARM::VST2LNd8_UPD,  false, true,  SingleSpc, 2, 8 },
-{ ARM::VST2LNq16Pseudo,     ARM::VST2LNq16,     false, false, EvenDblSpc, 2, 4},
-{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true,  EvenDblSpc, 2, 4},
-{ ARM::VST2LNq32Pseudo,     ARM::VST2LNq32,     false, false, EvenDblSpc, 2, 2},
-{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true,  EvenDblSpc, 2, 2},
-
-{ ARM::VST2d16Pseudo,       ARM::VST2d16,      false, false, SingleSpc,  2, 4 },
-{ ARM::VST2d16Pseudo_UPD,   ARM::VST2d16_UPD,  false, true,  SingleSpc,  2, 4 },
-{ ARM::VST2d32Pseudo,       ARM::VST2d32,      false, false, SingleSpc,  2, 2 },
-{ ARM::VST2d32Pseudo_UPD,   ARM::VST2d32_UPD,  false, true,  SingleSpc,  2, 2 },
-{ ARM::VST2d8Pseudo,        ARM::VST2d8,       false, false, SingleSpc,  2, 8 },
-{ ARM::VST2d8Pseudo_UPD,    ARM::VST2d8_UPD,   false, true,  SingleSpc,  2, 8 },
-
-{ ARM::VST2q16Pseudo,       ARM::VST2q16,      false, false, SingleSpc,  4, 4 },
-{ ARM::VST2q16Pseudo_UPD,   ARM::VST2q16_UPD,  false, true,  SingleSpc,  4, 4 },
-{ ARM::VST2q32Pseudo,       ARM::VST2q32,      false, false, SingleSpc,  4, 2 },
-{ ARM::VST2q32Pseudo_UPD,   ARM::VST2q32_UPD,  false, true,  SingleSpc,  4, 2 },
-{ ARM::VST2q8Pseudo,        ARM::VST2q8,       false, false, SingleSpc,  4, 8 },
-{ ARM::VST2q8Pseudo_UPD,    ARM::VST2q8_UPD,   false, true,  SingleSpc,  4, 8 },
-
-{ ARM::VST3LNd16Pseudo,     ARM::VST3LNd16,     false, false, SingleSpc, 3, 4 },
-{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true,  SingleSpc, 3, 4 },
-{ ARM::VST3LNd32Pseudo,     ARM::VST3LNd32,     false, false, SingleSpc, 3, 2 },
-{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true,  SingleSpc, 3, 2 },
-{ ARM::VST3LNd8Pseudo,      ARM::VST3LNd8,      false, false, SingleSpc, 3, 8 },
-{ ARM::VST3LNd8Pseudo_UPD,  ARM::VST3LNd8_UPD,  false, true,  SingleSpc, 3, 8 },
-{ ARM::VST3LNq16Pseudo,     ARM::VST3LNq16,     false, false, EvenDblSpc, 3, 4},
-{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true,  EvenDblSpc, 3, 4},
-{ ARM::VST3LNq32Pseudo,     ARM::VST3LNq32,     false, false, EvenDblSpc, 3, 2},
-{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true,  EvenDblSpc, 3, 2},
-
-{ ARM::VST3d16Pseudo,       ARM::VST3d16,      false, false, SingleSpc,  3, 4 },
-{ ARM::VST3d16Pseudo_UPD,   ARM::VST3d16_UPD,  false, true,  SingleSpc,  3, 4 },
-{ ARM::VST3d32Pseudo,       ARM::VST3d32,      false, false, SingleSpc,  3, 2 },
-{ ARM::VST3d32Pseudo_UPD,   ARM::VST3d32_UPD,  false, true,  SingleSpc,  3, 2 },
-{ ARM::VST3d8Pseudo,        ARM::VST3d8,       false, false, SingleSpc,  3, 8 },
-{ ARM::VST3d8Pseudo_UPD,    ARM::VST3d8_UPD,   false, true,  SingleSpc,  3, 8 },
-
-{ ARM::VST3q16Pseudo_UPD,    ARM::VST3q16_UPD, false, true,  EvenDblSpc, 3, 4 },
-{ ARM::VST3q16oddPseudo,     ARM::VST3q16,     false, false, OddDblSpc,  3, 4 },
-{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true,  OddDblSpc,  3, 4 },
-{ ARM::VST3q32Pseudo_UPD,    ARM::VST3q32_UPD, false, true,  EvenDblSpc, 3, 2 },
-{ ARM::VST3q32oddPseudo,     ARM::VST3q32,     false, false, OddDblSpc,  3, 2 },
-{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true,  OddDblSpc,  3, 2 },
-{ ARM::VST3q8Pseudo_UPD,     ARM::VST3q8_UPD,  false, true,  EvenDblSpc, 3, 8 },
-{ ARM::VST3q8oddPseudo,      ARM::VST3q8,      false, false, OddDblSpc,  3, 8 },
-{ ARM::VST3q8oddPseudo_UPD,  ARM::VST3q8_UPD,  false, true,  OddDblSpc,  3, 8 },
-
-{ ARM::VST4LNd16Pseudo,     ARM::VST4LNd16,     false, false, SingleSpc, 4, 4 },
-{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true,  SingleSpc, 4, 4 },
-{ ARM::VST4LNd32Pseudo,     ARM::VST4LNd32,     false, false, SingleSpc, 4, 2 },
-{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true,  SingleSpc, 4, 2 },
-{ ARM::VST4LNd8Pseudo,      ARM::VST4LNd8,      false, false, SingleSpc, 4, 8 },
-{ ARM::VST4LNd8Pseudo_UPD,  ARM::VST4LNd8_UPD,  false, true,  SingleSpc, 4, 8 },
-{ ARM::VST4LNq16Pseudo,     ARM::VST4LNq16,     false, false, EvenDblSpc, 4, 4},
-{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true,  EvenDblSpc, 4, 4},
-{ ARM::VST4LNq32Pseudo,     ARM::VST4LNq32,     false, false, EvenDblSpc, 4, 2},
-{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true,  EvenDblSpc, 4, 2},
-
-{ ARM::VST4d16Pseudo,       ARM::VST4d16,      false, false, SingleSpc,  4, 4 },
-{ ARM::VST4d16Pseudo_UPD,   ARM::VST4d16_UPD,  false, true,  SingleSpc,  4, 4 },
-{ ARM::VST4d32Pseudo,       ARM::VST4d32,      false, false, SingleSpc,  4, 2 },
-{ ARM::VST4d32Pseudo_UPD,   ARM::VST4d32_UPD,  false, true,  SingleSpc,  4, 2 },
-{ ARM::VST4d8Pseudo,        ARM::VST4d8,       false, false, SingleSpc,  4, 8 },
-{ ARM::VST4d8Pseudo_UPD,    ARM::VST4d8_UPD,   false, true,  SingleSpc,  4, 8 },
-
-{ ARM::VST4q16Pseudo_UPD,    ARM::VST4q16_UPD, false, true,  EvenDblSpc, 4, 4 },
-{ ARM::VST4q16oddPseudo,     ARM::VST4q16,     false, false, OddDblSpc,  4, 4 },
-{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true,  OddDblSpc,  4, 4 },
-{ ARM::VST4q32Pseudo_UPD,    ARM::VST4q32_UPD, false, true,  EvenDblSpc, 4, 2 },
-{ ARM::VST4q32oddPseudo,     ARM::VST4q32,     false, false, OddDblSpc,  4, 2 },
-{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true,  OddDblSpc,  4, 2 },
-{ ARM::VST4q8Pseudo_UPD,     ARM::VST4q8_UPD,  false, true,  EvenDblSpc, 4, 8 },
-{ ARM::VST4q8oddPseudo,      ARM::VST4q8,      false, false, OddDblSpc,  4, 8 },
-{ ARM::VST4q8oddPseudo_UPD,  ARM::VST4q8_UPD,  false, true,  OddDblSpc,  4, 8 }
+{ ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, false, EvenDblSpc, 1, 4 ,true},
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true,  EvenDblSpc, 1, 4 ,true},
+{ ARM::VLD1LNq32Pseudo,     ARM::VLD1LNd32,     true, false, false, EvenDblSpc, 1, 2 ,true},
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true,  EvenDblSpc, 1, 2 ,true},
+{ ARM::VLD1LNq8Pseudo,      ARM::VLD1LNd8,      true, false, false, EvenDblSpc, 1, 8 ,true},
+{ ARM::VLD1LNq8Pseudo_UPD,  ARM::VLD1LNd8_UPD, true, true, true,  EvenDblSpc, 1, 8 ,true},
+
+{ ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, false, SingleSpc,  4, 1 ,false},
+{ ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, false, SingleSpc,  3, 1 ,false},
+
+{ ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, false, SingleSpc,  2, 4 ,true},
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true,  SingleSpc,  2, 4 ,true},
+{ ARM::VLD2LNd32Pseudo,     ARM::VLD2LNd32,     true, false, false, SingleSpc,  2, 2 ,true},
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true,  SingleSpc,  2, 2 ,true},
+{ ARM::VLD2LNd8Pseudo,      ARM::VLD2LNd8,      true, false, false, SingleSpc,  2, 8 ,true},
+{ ARM::VLD2LNd8Pseudo_UPD,  ARM::VLD2LNd8_UPD, true, true, true,  SingleSpc,  2, 8 ,true},
+{ ARM::VLD2LNq16Pseudo,     ARM::VLD2LNq16,     true, false, false, EvenDblSpc, 2, 4 ,true},
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true,  EvenDblSpc, 2, 4 ,true},
+{ ARM::VLD2LNq32Pseudo,     ARM::VLD2LNq32,     true, false, false, EvenDblSpc, 2, 2 ,true},
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true,  EvenDblSpc, 2, 2 ,true},
+
+{ ARM::VLD2q16Pseudo,       ARM::VLD2q16,      true,  false, false, SingleSpc,  4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_fixed,   ARM::VLD2q16wb_fixed, true, true, false,  SingleSpc,  4, 4 ,false},
+{ ARM::VLD2q16PseudoWB_register,   ARM::VLD2q16wb_register, true, true, true,  SingleSpc,  4, 4 ,false},
+{ ARM::VLD2q32Pseudo,       ARM::VLD2q32,      true,  false, false, SingleSpc,  4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_fixed,   ARM::VLD2q32wb_fixed, true, true, false,  SingleSpc,  4, 2 ,false},
+{ ARM::VLD2q32PseudoWB_register,   ARM::VLD2q32wb_register, true, true, true,  SingleSpc,  4, 2 ,false},
+{ ARM::VLD2q8Pseudo,        ARM::VLD2q8,       true,  false, false, SingleSpc,  4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_fixed,    ARM::VLD2q8wb_fixed, true, true, false,  SingleSpc,  4, 8 ,false},
+{ ARM::VLD2q8PseudoWB_register,    ARM::VLD2q8wb_register, true, true, true,  SingleSpc,  4, 8 ,false},
+
+{ ARM::VLD3DUPd16Pseudo,     ARM::VLD3DUPd16,     true, false, false, SingleSpc, 3, 4,true},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true,  SingleSpc, 3, 4,true},
+{ ARM::VLD3DUPd32Pseudo,     ARM::VLD3DUPd32,     true, false, false, SingleSpc, 3, 2,true},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true,  SingleSpc, 3, 2,true},
+{ ARM::VLD3DUPd8Pseudo,      ARM::VLD3DUPd8,      true, false, false, SingleSpc, 3, 8,true},
+{ ARM::VLD3DUPd8Pseudo_UPD,  ARM::VLD3DUPd8_UPD, true, true, true,  SingleSpc, 3, 8,true},
+
+{ ARM::VLD3LNd16Pseudo,     ARM::VLD3LNd16,     true, false, false, SingleSpc,  3, 4 ,true},
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true,  SingleSpc,  3, 4 ,true},
+{ ARM::VLD3LNd32Pseudo,     ARM::VLD3LNd32,     true, false, false, SingleSpc,  3, 2 ,true},
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true,  SingleSpc,  3, 2 ,true},
+{ ARM::VLD3LNd8Pseudo,      ARM::VLD3LNd8,      true, false, false, SingleSpc,  3, 8 ,true},
+{ ARM::VLD3LNd8Pseudo_UPD,  ARM::VLD3LNd8_UPD, true, true, true,  SingleSpc,  3, 8 ,true},
+{ ARM::VLD3LNq16Pseudo,     ARM::VLD3LNq16,     true, false, false, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true,  EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3LNq32Pseudo,     ARM::VLD3LNq32,     true, false, false, EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true,  EvenDblSpc, 3, 2 ,true},
+
+{ ARM::VLD3d16Pseudo,       ARM::VLD3d16,      true,  false, false, SingleSpc,  3, 4 ,true},
+{ ARM::VLD3d16Pseudo_UPD,   ARM::VLD3d16_UPD, true, true, true,  SingleSpc,  3, 4 ,true},
+{ ARM::VLD3d32Pseudo,       ARM::VLD3d32,      true,  false, false, SingleSpc,  3, 2 ,true},
+{ ARM::VLD3d32Pseudo_UPD,   ARM::VLD3d32_UPD, true, true, true,  SingleSpc,  3, 2 ,true},
+{ ARM::VLD3d8Pseudo,        ARM::VLD3d8,       true,  false, false, SingleSpc,  3, 8 ,true},
+{ ARM::VLD3d8Pseudo_UPD,    ARM::VLD3d8_UPD, true, true, true,  SingleSpc,  3, 8 ,true},
+
+{ ARM::VLD3q16Pseudo_UPD,    ARM::VLD3q16_UPD, true, true, true,  EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3q16oddPseudo,     ARM::VLD3q16,     true,  false, false, OddDblSpc,  3, 4 ,true},
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true,  OddDblSpc,  3, 4 ,true},
+{ ARM::VLD3q32Pseudo_UPD,    ARM::VLD3q32_UPD, true, true, true,  EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3q32oddPseudo,     ARM::VLD3q32,     true,  false, false, OddDblSpc,  3, 2 ,true},
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true,  OddDblSpc,  3, 2 ,true},
+{ ARM::VLD3q8Pseudo_UPD,     ARM::VLD3q8_UPD, true, true, true,  EvenDblSpc, 3, 8 ,true},
+{ ARM::VLD3q8oddPseudo,      ARM::VLD3q8,      true,  false, false, OddDblSpc,  3, 8 ,true},
+{ ARM::VLD3q8oddPseudo_UPD,  ARM::VLD3q8_UPD, true, true, true,  OddDblSpc,  3, 8 ,true},
+
+{ ARM::VLD4DUPd16Pseudo,     ARM::VLD4DUPd16,     true, false, false, SingleSpc, 4, 4,true},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true,  SingleSpc, 4, 4,true},
+{ ARM::VLD4DUPd32Pseudo,     ARM::VLD4DUPd32,     true, false, false, SingleSpc, 4, 2,true},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true,  SingleSpc, 4, 2,true},
+{ ARM::VLD4DUPd8Pseudo,      ARM::VLD4DUPd8,      true, false, false, SingleSpc, 4, 8,true},
+{ ARM::VLD4DUPd8Pseudo_UPD,  ARM::VLD4DUPd8_UPD, true, true, true,  SingleSpc, 4, 8,true},
+
+{ ARM::VLD4LNd16Pseudo,     ARM::VLD4LNd16,     true, false, false, SingleSpc,  4, 4 ,true},
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true,  SingleSpc,  4, 4 ,true},
+{ ARM::VLD4LNd32Pseudo,     ARM::VLD4LNd32,     true, false, false, SingleSpc,  4, 2 ,true},
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true,  SingleSpc,  4, 2 ,true},
+{ ARM::VLD4LNd8Pseudo,      ARM::VLD4LNd8,      true, false, false, SingleSpc,  4, 8 ,true},
+{ ARM::VLD4LNd8Pseudo_UPD,  ARM::VLD4LNd8_UPD, true, true, true,  SingleSpc,  4, 8 ,true},
+{ ARM::VLD4LNq16Pseudo,     ARM::VLD4LNq16,     true, false, false, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true,  EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4LNq32Pseudo,     ARM::VLD4LNq32,     true, false, false, EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true,  EvenDblSpc, 4, 2 ,true},
+
+{ ARM::VLD4d16Pseudo,       ARM::VLD4d16,      true,  false, false, SingleSpc,  4, 4 ,true},
+{ ARM::VLD4d16Pseudo_UPD,   ARM::VLD4d16_UPD, true, true, true,  SingleSpc,  4, 4 ,true},
+{ ARM::VLD4d32Pseudo,       ARM::VLD4d32,      true,  false, false, SingleSpc,  4, 2 ,true},
+{ ARM::VLD4d32Pseudo_UPD,   ARM::VLD4d32_UPD, true, true, true,  SingleSpc,  4, 2 ,true},
+{ ARM::VLD4d8Pseudo,        ARM::VLD4d8,       true,  false, false, SingleSpc,  4, 8 ,true},
+{ ARM::VLD4d8Pseudo_UPD,    ARM::VLD4d8_UPD, true, true, true,  SingleSpc,  4, 8 ,true},
+
+{ ARM::VLD4q16Pseudo_UPD,    ARM::VLD4q16_UPD, true, true, true,  EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4q16oddPseudo,     ARM::VLD4q16,     true,  false, false, OddDblSpc,  4, 4 ,true},
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true,  OddDblSpc,  4, 4 ,true},
+{ ARM::VLD4q32Pseudo_UPD,    ARM::VLD4q32_UPD, true, true, true,  EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4q32oddPseudo,     ARM::VLD4q32,     true,  false, false, OddDblSpc,  4, 2 ,true},
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true,  OddDblSpc,  4, 2 ,true},
+{ ARM::VLD4q8Pseudo_UPD,     ARM::VLD4q8_UPD, true, true, true,  EvenDblSpc, 4, 8 ,true},
+{ ARM::VLD4q8oddPseudo,      ARM::VLD4q8,      true,  false, false, OddDblSpc,  4, 8 ,true},
+{ ARM::VLD4q8oddPseudo_UPD,  ARM::VLD4q8_UPD, true, true, true,  OddDblSpc,  4, 8 ,true},
+
+{ ARM::VST1LNq16Pseudo,     ARM::VST1LNd16,    false, false, false, EvenDblSpc, 1, 4 ,true},
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true,  EvenDblSpc, 1, 4 ,true},
+{ ARM::VST1LNq32Pseudo,     ARM::VST1LNd32,    false, false, false, EvenDblSpc, 1, 2 ,true},
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true,  EvenDblSpc, 1, 2 ,true},
+{ ARM::VST1LNq8Pseudo,      ARM::VST1LNd8,     false, false, false, EvenDblSpc, 1, 8 ,true},
+{ ARM::VST1LNq8Pseudo_UPD,  ARM::VST1LNd8_UPD, false, true, true,  EvenDblSpc, 1, 8 ,true},
+
+{ ARM::VST1d64QPseudo,      ARM::VST1d64Q,     false, false, false, SingleSpc,  4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_fixed,  ARM::VST1d64Qwb_fixed, false, true, false,  SingleSpc,  4, 1 ,false},
+{ ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true,  SingleSpc,  4, 1 ,false},
+{ ARM::VST1d64TPseudo,      ARM::VST1d64T,     false, false, false, SingleSpc,  3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_fixed,  ARM::VST1d64Twb_fixed, false, true, false,  SingleSpc,  3, 1 ,false},
+{ ARM::VST1d64TPseudoWB_register,  ARM::VST1d64Twb_register, false, true, true,  SingleSpc,  3, 1 ,false},
+
+{ ARM::VST2LNd16Pseudo,     ARM::VST2LNd16,     false, false, false, SingleSpc, 2, 4 ,true},
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true,  SingleSpc, 2, 4 ,true},
+{ ARM::VST2LNd32Pseudo,     ARM::VST2LNd32,     false, false, false, SingleSpc, 2, 2 ,true},
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true,  SingleSpc, 2, 2 ,true},
+{ ARM::VST2LNd8Pseudo,      ARM::VST2LNd8,      false, false, false, SingleSpc, 2, 8 ,true},
+{ ARM::VST2LNd8Pseudo_UPD,  ARM::VST2LNd8_UPD, false, true, true,  SingleSpc, 2, 8 ,true},
+{ ARM::VST2LNq16Pseudo,     ARM::VST2LNq16,     false, false, false, EvenDblSpc, 2, 4,true},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true,  EvenDblSpc, 2, 4,true},
+{ ARM::VST2LNq32Pseudo,     ARM::VST2LNq32,     false, false, false, EvenDblSpc, 2, 2,true},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true,  EvenDblSpc, 2, 2,true},
+
+{ ARM::VST2q16Pseudo,       ARM::VST2q16,      false, false, false, SingleSpc,  4, 4 ,false},
+{ ARM::VST2q16PseudoWB_fixed,   ARM::VST2q16wb_fixed, false, true, false,  SingleSpc,  4, 4 ,false},
+{ ARM::VST2q16PseudoWB_register,   ARM::VST2q16wb_register, false, true, true,  SingleSpc,  4, 4 ,false},
+{ ARM::VST2q32Pseudo,       ARM::VST2q32,      false, false, false, SingleSpc,  4, 2 ,false},
+{ ARM::VST2q32PseudoWB_fixed,   ARM::VST2q32wb_fixed, false, true, false,  SingleSpc,  4, 2 ,false},
+{ ARM::VST2q32PseudoWB_register,   ARM::VST2q32wb_register, false, true, true,  SingleSpc,  4, 2 ,false},
+{ ARM::VST2q8Pseudo,        ARM::VST2q8,       false, false, false, SingleSpc,  4, 8 ,false},
+{ ARM::VST2q8PseudoWB_fixed,    ARM::VST2q8wb_fixed, false, true, false,  SingleSpc,  4, 8 ,false},
+{ ARM::VST2q8PseudoWB_register,    ARM::VST2q8wb_register, false, true, true,  SingleSpc,  4, 8 ,false},
+
+{ ARM::VST3LNd16Pseudo,     ARM::VST3LNd16,     false, false, false, SingleSpc, 3, 4 ,true},
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true,  SingleSpc, 3, 4 ,true},
+{ ARM::VST3LNd32Pseudo,     ARM::VST3LNd32,     false, false, false, SingleSpc, 3, 2 ,true},
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true,  SingleSpc, 3, 2 ,true},
+{ ARM::VST3LNd8Pseudo,      ARM::VST3LNd8,      false, false, false, SingleSpc, 3, 8 ,true},
+{ ARM::VST3LNd8Pseudo_UPD,  ARM::VST3LNd8_UPD, false, true, true,  SingleSpc, 3, 8 ,true},
+{ ARM::VST3LNq16Pseudo,     ARM::VST3LNq16,     false, false, false, EvenDblSpc, 3, 4,true},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true,  EvenDblSpc, 3, 4,true},
+{ ARM::VST3LNq32Pseudo,     ARM::VST3LNq32,     false, false, false, EvenDblSpc, 3, 2,true},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true,  EvenDblSpc, 3, 2,true},
+
+{ ARM::VST3d16Pseudo,       ARM::VST3d16,      false, false, false, SingleSpc,  3, 4 ,true},
+{ ARM::VST3d16Pseudo_UPD,   ARM::VST3d16_UPD, false, true, true,  SingleSpc,  3, 4 ,true},
+{ ARM::VST3d32Pseudo,       ARM::VST3d32,      false, false, false, SingleSpc,  3, 2 ,true},
+{ ARM::VST3d32Pseudo_UPD,   ARM::VST3d32_UPD, false, true, true,  SingleSpc,  3, 2 ,true},
+{ ARM::VST3d8Pseudo,        ARM::VST3d8,       false, false, false, SingleSpc,  3, 8 ,true},
+{ ARM::VST3d8Pseudo_UPD,    ARM::VST3d8_UPD, false, true, true,  SingleSpc,  3, 8 ,true},
+
+{ ARM::VST3q16Pseudo_UPD,    ARM::VST3q16_UPD, false, true, true,  EvenDblSpc, 3, 4 ,true},
+{ ARM::VST3q16oddPseudo,     ARM::VST3q16,     false, false, false, OddDblSpc,  3, 4 ,true},
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true,  OddDblSpc,  3, 4 ,true},
+{ ARM::VST3q32Pseudo_UPD,    ARM::VST3q32_UPD, false, true, true,  EvenDblSpc, 3, 2 ,true},
+{ ARM::VST3q32oddPseudo,     ARM::VST3q32,     false, false, false, OddDblSpc,  3, 2 ,true},
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true,  OddDblSpc,  3, 2 ,true},
+{ ARM::VST3q8Pseudo_UPD,     ARM::VST3q8_UPD, false, true, true,  EvenDblSpc, 3, 8 ,true},
+{ ARM::VST3q8oddPseudo,      ARM::VST3q8,      false, false, false, OddDblSpc,  3, 8 ,true},
+{ ARM::VST3q8oddPseudo_UPD,  ARM::VST3q8_UPD, false, true, true,  OddDblSpc,  3, 8 ,true},
+
+{ ARM::VST4LNd16Pseudo,     ARM::VST4LNd16,     false, false, false, SingleSpc, 4, 4 ,true},
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true,  SingleSpc, 4, 4 ,true},
+{ ARM::VST4LNd32Pseudo,     ARM::VST4LNd32,     false, false, false, SingleSpc, 4, 2 ,true},
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true,  SingleSpc, 4, 2 ,true},
+{ ARM::VST4LNd8Pseudo,      ARM::VST4LNd8,      false, false, false, SingleSpc, 4, 8 ,true},
+{ ARM::VST4LNd8Pseudo_UPD,  ARM::VST4LNd8_UPD, false, true, true,  SingleSpc, 4, 8 ,true},
+{ ARM::VST4LNq16Pseudo,     ARM::VST4LNq16,     false, false, false, EvenDblSpc, 4, 4,true},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true,  EvenDblSpc, 4, 4,true},
+{ ARM::VST4LNq32Pseudo,     ARM::VST4LNq32,     false, false, false, EvenDblSpc, 4, 2,true},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true,  EvenDblSpc, 4, 2,true},
+
+{ ARM::VST4d16Pseudo,       ARM::VST4d16,      false, false, false, SingleSpc,  4, 4 ,true},
+{ ARM::VST4d16Pseudo_UPD,   ARM::VST4d16_UPD, false, true, true,  SingleSpc,  4, 4 ,true},
+{ ARM::VST4d32Pseudo,       ARM::VST4d32,      false, false, false, SingleSpc,  4, 2 ,true},
+{ ARM::VST4d32Pseudo_UPD,   ARM::VST4d32_UPD, false, true, true,  SingleSpc,  4, 2 ,true},
+{ ARM::VST4d8Pseudo,        ARM::VST4d8,       false, false, false, SingleSpc,  4, 8 ,true},
+{ ARM::VST4d8Pseudo_UPD,    ARM::VST4d8_UPD, false, true, true,  SingleSpc,  4, 8 ,true},
+
+{ ARM::VST4q16Pseudo_UPD,    ARM::VST4q16_UPD, false, true, true,  EvenDblSpc, 4, 4 ,true},
+{ ARM::VST4q16oddPseudo,     ARM::VST4q16,     false, false, false, OddDblSpc,  4, 4 ,true},
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true,  OddDblSpc,  4, 4 ,true},
+{ ARM::VST4q32Pseudo_UPD,    ARM::VST4q32_UPD, false, true, true,  EvenDblSpc, 4, 2 ,true},
+{ ARM::VST4q32oddPseudo,     ARM::VST4q32,     false, false, false, OddDblSpc,  4, 2 ,true},
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true,  OddDblSpc,  4, 2 ,true},
+{ ARM::VST4q8Pseudo_UPD,     ARM::VST4q8_UPD, false, true, true,  EvenDblSpc, 4, 8 ,true},
+{ ARM::VST4q8oddPseudo,      ARM::VST4q8,      false, false, false, OddDblSpc,  4, 8 ,true},
+{ ARM::VST4q8oddPseudo_UPD,  ARM::VST4q8_UPD, false, true, true,  OddDblSpc,  4, 8 ,true}
 };
 
 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
 /// load or store pseudo instruction.
 static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
-  unsigned NumEntries = array_lengthof(NEONLdStTable);
+  const unsigned NumEntries = array_lengthof(NEONLdStTable);
 
 #ifndef NDEBUG
   // Make sure the table is sorted.
@@ -422,21 +388,22 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
   unsigned DstReg = MI.getOperand(OpIdx++).getReg();
   unsigned D0, D1, D2, D3;
   GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
-  MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
-    .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
-  if (NumRegs > 2)
+  MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+  if (NumRegs > 1 && TableEntry->copyAllListRegs)
+    MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+  if (NumRegs > 2 && TableEntry->copyAllListRegs)
     MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
-  if (NumRegs > 3)
+  if (NumRegs > 3 && TableEntry->copyAllListRegs)
     MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
 
-  if (TableEntry->HasWriteBack)
+  if (TableEntry->isUpdating)
     MIB.addOperand(MI.getOperand(OpIdx++));
 
   // Copy the addrmode6 operands.
   MIB.addOperand(MI.getOperand(OpIdx++));
   MIB.addOperand(MI.getOperand(OpIdx++));
   // Copy the am6offset operand.
-  if (TableEntry->HasWriteBack)
+  if (TableEntry->hasWritebackOperand)
     MIB.addOperand(MI.getOperand(OpIdx++));
 
   // For an instruction writing double-spaced subregs, the pseudo instruction
@@ -481,24 +448,26 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
                                     TII->get(TableEntry->RealOpc));
   unsigned OpIdx = 0;
-  if (TableEntry->HasWriteBack)
+  if (TableEntry->isUpdating)
     MIB.addOperand(MI.getOperand(OpIdx++));
 
   // Copy the addrmode6 operands.
   MIB.addOperand(MI.getOperand(OpIdx++));
   MIB.addOperand(MI.getOperand(OpIdx++));
   // Copy the am6offset operand.
-  if (TableEntry->HasWriteBack)
+  if (TableEntry->hasWritebackOperand)
     MIB.addOperand(MI.getOperand(OpIdx++));
 
   bool SrcIsKill = MI.getOperand(OpIdx).isKill();
   unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
   unsigned D0, D1, D2, D3;
   GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
-  MIB.addReg(D0).addReg(D1);
-  if (NumRegs > 2)
+  MIB.addReg(D0);
+  if (NumRegs > 1 && TableEntry->copyAllListRegs)
+    MIB.addReg(D1);
+  if (NumRegs > 2 && TableEntry->copyAllListRegs)
     MIB.addReg(D2);
-  if (NumRegs > 3)
+  if (NumRegs > 3 && TableEntry->copyAllListRegs)
     MIB.addReg(D3);
 
   // Copy the predicate operands.
@@ -558,14 +527,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
       MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
   }
 
-  if (TableEntry->HasWriteBack)
+  if (TableEntry->isUpdating)
     MIB.addOperand(MI.getOperand(OpIdx++));
 
   // Copy the addrmode6 operands.
   MIB.addOperand(MI.getOperand(OpIdx++));
   MIB.addOperand(MI.getOperand(OpIdx++));
   // Copy the am6offset operand.
-  if (TableEntry->HasWriteBack)
+  if (TableEntry->hasWritebackOperand)
     MIB.addOperand(MI.getOperand(OpIdx++));
 
   // Grab the super-register source.
@@ -599,13 +568,15 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
     // Add an implicit def for the super-register.
     MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
   TransferImpOps(MI, MIB, MIB);
+  // Transfer memoperands.
+  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
   MI.eraseFromParent();
 }
 
 /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
 /// register operands to real instructions with D register operands.
 void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
-                                 unsigned Opc, bool IsExt, unsigned NumRegs) {
+                                 unsigned Opc, bool IsExt) {
   MachineInstr &MI = *MBBI;
   MachineBasicBlock &MBB = *MI.getParent();
 
@@ -621,11 +592,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
   unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
   unsigned D0, D1, D2, D3;
   GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
-  MIB.addReg(D0).addReg(D1);
-  if (NumRegs > 2)
-    MIB.addReg(D2);
-  if (NumRegs > 3)
-    MIB.addReg(D3);
+  MIB.addReg(D0);
 
   // Copy the other source register operand.
   MIB.addOperand(MI.getOperand(OpIdx++));
@@ -645,7 +612,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
   MachineInstr &MI = *MBBI;
   unsigned Opcode = MI.getOpcode();
   unsigned PredReg = 0;
-  ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+  ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
   unsigned DstReg = MI.getOperand(0).getReg();
   bool DstIsDead = MI.getOperand(0).isDead();
   bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
@@ -809,7 +776,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       MI.eraseFromParent();
       return true;
     }
-    case ARM::Int_eh_sjlj_dispatchsetup: {
+    case ARM::Int_eh_sjlj_dispatchsetup:
+    case ARM::Int_eh_sjlj_dispatchsetup_nofp:
+    case ARM::tInt_eh_sjlj_dispatchsetup: {
       MachineFunction &MF = *MI.getParent()->getParent();
       const ARMBaseInstrInfo *AII =
         static_cast<const ARMBaseInstrInfo*>(TII);
@@ -824,15 +793,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
                "base pointer without frame pointer?");
 
         if (AFI->isThumb2Function()) {
-          llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
-                                       FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+          emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                 FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
         } else if (AFI->isThumbFunction()) {
-          llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
-                                          FramePtr, -NumBytes, *TII, RI);
+          emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                    FramePtr, -NumBytes, *TII, RI);
         } else {
-          llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
-                                        FramePtr, -NumBytes, ARMCC::AL, 0,
-                                        *TII);
+          emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                  FramePtr, -NumBytes, ARMCC::AL, 0,
+                                  *TII);
         }
         // If there's dynamic realignment, adjust for it.
         if (RI.needsStackRealignment(MF)) {
@@ -996,6 +965,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       // Add an implicit def for the super-register.
       MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
       TransferImpOps(MI, MIB, MIB);
+      MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       MI.eraseFromParent();
       return true;
     }
@@ -1026,6 +996,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
         MIB->addRegisterKilled(SrcReg, TRI, true);
 
       TransferImpOps(MI, MIB, MIB);
+      MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       MI.eraseFromParent();
       return true;
     }
@@ -1057,26 +1028,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       return true;
     }
 
-    case ARM::VLD1q8Pseudo:
-    case ARM::VLD1q16Pseudo:
-    case ARM::VLD1q32Pseudo:
-    case ARM::VLD1q64Pseudo:
-    case ARM::VLD1q8Pseudo_UPD:
-    case ARM::VLD1q16Pseudo_UPD:
-    case ARM::VLD1q32Pseudo_UPD:
-    case ARM::VLD1q64Pseudo_UPD:
-    case ARM::VLD2d8Pseudo:
-    case ARM::VLD2d16Pseudo:
-    case ARM::VLD2d32Pseudo:
     case ARM::VLD2q8Pseudo:
     case ARM::VLD2q16Pseudo:
     case ARM::VLD2q32Pseudo:
-    case ARM::VLD2d8Pseudo_UPD:
-    case ARM::VLD2d16Pseudo_UPD:
-    case ARM::VLD2d32Pseudo_UPD:
-    case ARM::VLD2q8Pseudo_UPD:
-    case ARM::VLD2q16Pseudo_UPD:
-    case ARM::VLD2q32Pseudo_UPD:
+    case ARM::VLD2q8PseudoWB_fixed:
+    case ARM::VLD2q16PseudoWB_fixed:
+    case ARM::VLD2q32PseudoWB_fixed:
+    case ARM::VLD2q8PseudoWB_register:
+    case ARM::VLD2q16PseudoWB_register:
+    case ARM::VLD2q32PseudoWB_register:
     case ARM::VLD3d8Pseudo:
     case ARM::VLD3d16Pseudo:
     case ARM::VLD3d32Pseudo:
@@ -1084,7 +1044,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VLD3d8Pseudo_UPD:
     case ARM::VLD3d16Pseudo_UPD:
     case ARM::VLD3d32Pseudo_UPD:
-    case ARM::VLD1d64TPseudo_UPD:
     case ARM::VLD3q8Pseudo_UPD:
     case ARM::VLD3q16Pseudo_UPD:
     case ARM::VLD3q32Pseudo_UPD:
@@ -1101,7 +1060,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VLD4d8Pseudo_UPD:
     case ARM::VLD4d16Pseudo_UPD:
     case ARM::VLD4d32Pseudo_UPD:
-    case ARM::VLD1d64QPseudo_UPD:
     case ARM::VLD4q8Pseudo_UPD:
     case ARM::VLD4q16Pseudo_UPD:
     case ARM::VLD4q32Pseudo_UPD:
@@ -1111,18 +1069,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VLD4q8oddPseudo_UPD:
     case ARM::VLD4q16oddPseudo_UPD:
     case ARM::VLD4q32oddPseudo_UPD:
-    case ARM::VLD1DUPq8Pseudo:
-    case ARM::VLD1DUPq16Pseudo:
-    case ARM::VLD1DUPq32Pseudo:
-    case ARM::VLD1DUPq8Pseudo_UPD:
-    case ARM::VLD1DUPq16Pseudo_UPD:
-    case ARM::VLD1DUPq32Pseudo_UPD:
-    case ARM::VLD2DUPd8Pseudo:
-    case ARM::VLD2DUPd16Pseudo:
-    case ARM::VLD2DUPd32Pseudo:
-    case ARM::VLD2DUPd8Pseudo_UPD:
-    case ARM::VLD2DUPd16Pseudo_UPD:
-    case ARM::VLD2DUPd32Pseudo_UPD:
     case ARM::VLD3DUPd8Pseudo:
     case ARM::VLD3DUPd16Pseudo:
     case ARM::VLD3DUPd32Pseudo:
@@ -1138,26 +1084,15 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       ExpandVLD(MBBI);
       return true;
 
-    case ARM::VST1q8Pseudo:
-    case ARM::VST1q16Pseudo:
-    case ARM::VST1q32Pseudo:
-    case ARM::VST1q64Pseudo:
-    case ARM::VST1q8Pseudo_UPD:
-    case ARM::VST1q16Pseudo_UPD:
-    case ARM::VST1q32Pseudo_UPD:
-    case ARM::VST1q64Pseudo_UPD:
-    case ARM::VST2d8Pseudo:
-    case ARM::VST2d16Pseudo:
-    case ARM::VST2d32Pseudo:
     case ARM::VST2q8Pseudo:
     case ARM::VST2q16Pseudo:
     case ARM::VST2q32Pseudo:
-    case ARM::VST2d8Pseudo_UPD:
-    case ARM::VST2d16Pseudo_UPD:
-    case ARM::VST2d32Pseudo_UPD:
-    case ARM::VST2q8Pseudo_UPD:
-    case ARM::VST2q16Pseudo_UPD:
-    case ARM::VST2q32Pseudo_UPD:
+    case ARM::VST2q8PseudoWB_fixed:
+    case ARM::VST2q16PseudoWB_fixed:
+    case ARM::VST2q32PseudoWB_fixed:
+    case ARM::VST2q8PseudoWB_register:
+    case ARM::VST2q16PseudoWB_register:
+    case ARM::VST2q32PseudoWB_register:
     case ARM::VST3d8Pseudo:
     case ARM::VST3d16Pseudo:
     case ARM::VST3d32Pseudo:
@@ -1165,7 +1100,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VST3d8Pseudo_UPD:
     case ARM::VST3d16Pseudo_UPD:
     case ARM::VST3d32Pseudo_UPD:
-    case ARM::VST1d64TPseudo_UPD:
+    case ARM::VST1d64TPseudoWB_fixed:
+    case ARM::VST1d64TPseudoWB_register:
     case ARM::VST3q8Pseudo_UPD:
     case ARM::VST3q16Pseudo_UPD:
     case ARM::VST3q32Pseudo_UPD:
@@ -1182,7 +1118,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VST4d8Pseudo_UPD:
     case ARM::VST4d16Pseudo_UPD:
     case ARM::VST4d32Pseudo_UPD:
-    case ARM::VST1d64QPseudo_UPD:
+    case ARM::VST1d64QPseudoWB_fixed:
+    case ARM::VST1d64QPseudoWB_register:
     case ARM::VST4q8Pseudo_UPD:
     case ARM::VST4q16Pseudo_UPD:
     case ARM::VST4q32Pseudo_UPD:
@@ -1270,15 +1207,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       ExpandLaneOp(MBBI);
       return true;
 
-    case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
-    case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
-    case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
-    case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
-    case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
-    case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+    case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
+    case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
+    case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
+    case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
   }
-
-  return false;
 }
 
 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index dc8e54ddf957..2e1eaca85b5b 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -16,7 +16,6 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMCallingConv.h"
-#include "ARMRegisterInfo.h"
 #include "ARMTargetMachine.h"
 #include "ARMSubtarget.h"
 #include "ARMConstantPoolValue.h"
@@ -37,7 +36,6 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -90,7 +88,7 @@ class ARMFastISel : public FastISel {
   ARMFunctionInfo *AFI;
 
   // Convenience variables to avoid some queries.
-  bool isThumb;
+  bool isThumb2;
   LLVMContext *Context;
 
   public:
@@ -101,7 +99,7 @@ class ARMFastISel : public FastISel {
       TLI(*TM.getTargetLowering()) {
       Subtarget = &TM.getSubtarget<ARMSubtarget>();
       AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
-      isThumb = AFI->isThumbFunction();
+      isThumb2 = AFI->isThumbFunction();
       Context = &funcInfo.Fn->getContext();
     }
 
@@ -148,6 +146,8 @@ class ARMFastISel : public FastISel {
     virtual bool TargetSelectInstruction(const Instruction *I);
     virtual unsigned TargetMaterializeConstant(const Constant *C);
     virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+    virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                               const LoadInst *LI);
 
   #include "ARMGenFastISel.inc"
 
@@ -156,27 +156,40 @@ class ARMFastISel : public FastISel {
     bool SelectLoad(const Instruction *I);
     bool SelectStore(const Instruction *I);
     bool SelectBranch(const Instruction *I);
+    bool SelectIndirectBr(const Instruction *I);
     bool SelectCmp(const Instruction *I);
     bool SelectFPExt(const Instruction *I);
     bool SelectFPTrunc(const Instruction *I);
-    bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
-    bool SelectSIToFP(const Instruction *I);
-    bool SelectFPToSI(const Instruction *I);
-    bool SelectSDiv(const Instruction *I);
-    bool SelectSRem(const Instruction *I);
-    bool SelectCall(const Instruction *I);
+    bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+    bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
+    bool SelectIToFP(const Instruction *I, bool isSigned);
+    bool SelectFPToI(const Instruction *I, bool isSigned);
+    bool SelectDiv(const Instruction *I, bool isSigned);
+    bool SelectRem(const Instruction *I, bool isSigned);
+    bool SelectCall(const Instruction *I, const char *IntrMemName);
+    bool SelectIntrinsicCall(const IntrinsicInst &I);
     bool SelectSelect(const Instruction *I);
     bool SelectRet(const Instruction *I);
-    bool SelectIntCast(const Instruction *I);
+    bool SelectTrunc(const Instruction *I);
+    bool SelectIntExt(const Instruction *I);
 
     // Utility routines.
   private:
     bool isTypeLegal(Type *Ty, MVT &VT);
     bool isLoadTypeLegal(Type *Ty, MVT &VT);
-    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
-    bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+    bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+                    bool isZExt);
+    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+                     unsigned Alignment = 0, bool isZExt = true,
+                     bool allocReg = true);
+                     
+    bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+                      unsigned Alignment = 0);
     bool ARMComputeAddress(const Value *Obj, Address &Addr);
-    void ARMSimplifyAddress(Address &Addr, EVT VT);
+    void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
+    bool ARMIsMemCpySmall(uint64_t Len);
+    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len);
+    unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
     unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
     unsigned ARMMaterializeInt(const Constant *C, EVT VT);
     unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
@@ -186,8 +199,6 @@ class ARMFastISel : public FastISel {
 
     // Call handling routines.
   private:
-    bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
-                        unsigned &ResultReg);
     CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
     bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
                          SmallVectorImpl<unsigned> &ArgRegs,
@@ -208,7 +219,7 @@ class ARMFastISel : public FastISel {
     const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
     void AddLoadStoreOperands(EVT VT, Address &Addr,
                               const MachineInstrBuilder &MIB,
-                              unsigned Flags);
+                              unsigned Flags, bool useAM3);
 };
 
 } // end anonymous namespace
@@ -219,8 +230,7 @@ class ARMFastISel : public FastISel {
 // we don't care about implicit defs here, just places we'll need to add a
 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.hasOptionalDef())
+  if (!MI->hasOptionalDef())
     return false;
 
   // Look to see if our OptionalDef is defining CPSR or CCR.
@@ -290,10 +300,10 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
   unsigned ResultReg = createResultReg(RC);
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
-  if (II.getNumDefs() >= 1)
+  if (II.getNumDefs() >= 1) {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
                    .addReg(Op0, Op0IsKill * RegState::Kill));
-  else {
+  } else {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
                    .addReg(Op0, Op0IsKill * RegState::Kill));
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -310,11 +320,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
   unsigned ResultReg = createResultReg(RC);
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
-  if (II.getNumDefs() >= 1)
+  if (II.getNumDefs() >= 1) {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
                    .addReg(Op0, Op0IsKill * RegState::Kill)
                    .addReg(Op1, Op1IsKill * RegState::Kill));
-  else {
+  } else {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
                    .addReg(Op0, Op0IsKill * RegState::Kill)
                    .addReg(Op1, Op1IsKill * RegState::Kill));
@@ -333,12 +343,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
   unsigned ResultReg = createResultReg(RC);
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
-  if (II.getNumDefs() >= 1)
+  if (II.getNumDefs() >= 1) {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
                    .addReg(Op0, Op0IsKill * RegState::Kill)
                    .addReg(Op1, Op1IsKill * RegState::Kill)
                    .addReg(Op2, Op2IsKill * RegState::Kill));
-  else {
+  } else {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
                    .addReg(Op0, Op0IsKill * RegState::Kill)
                    .addReg(Op1, Op1IsKill * RegState::Kill)
@@ -357,11 +367,11 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
   unsigned ResultReg = createResultReg(RC);
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
-  if (II.getNumDefs() >= 1)
+  if (II.getNumDefs() >= 1) {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
                    .addReg(Op0, Op0IsKill * RegState::Kill)
                    .addImm(Imm));
-  else {
+  } else {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
                    .addReg(Op0, Op0IsKill * RegState::Kill)
                    .addImm(Imm));
@@ -379,11 +389,11 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
   unsigned ResultReg = createResultReg(RC);
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
-  if (II.getNumDefs() >= 1)
+  if (II.getNumDefs() >= 1) {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
                    .addReg(Op0, Op0IsKill * RegState::Kill)
                    .addFPImm(FPImm));
-  else {
+  } else {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
                    .addReg(Op0, Op0IsKill * RegState::Kill)
                    .addFPImm(FPImm));
@@ -402,12 +412,12 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
   unsigned ResultReg = createResultReg(RC);
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
-  if (II.getNumDefs() >= 1)
+  if (II.getNumDefs() >= 1) {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
                    .addReg(Op0, Op0IsKill * RegState::Kill)
                    .addReg(Op1, Op1IsKill * RegState::Kill)
                    .addImm(Imm));
-  else {
+  } else {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
                    .addReg(Op0, Op0IsKill * RegState::Kill)
                    .addReg(Op1, Op1IsKill * RegState::Kill)
@@ -425,10 +435,10 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
   unsigned ResultReg = createResultReg(RC);
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
-  if (II.getNumDefs() >= 1)
+  if (II.getNumDefs() >= 1) {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
                    .addImm(Imm));
-  else {
+  } else {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
                    .addImm(Imm));
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -444,10 +454,10 @@ unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
   unsigned ResultReg = createResultReg(RC);
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
-  if (II.getNumDefs() >= 1)
+  if (II.getNumDefs() >= 1) {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
                     .addImm(Imm1).addImm(Imm2));
-  else {
+  } else {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
                     .addImm(Imm1).addImm(Imm2));
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
@@ -464,9 +474,10 @@ unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
   assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
          "Cannot yet extract from physregs");
+
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
-                         DL, TII.get(TargetOpcode::COPY), ResultReg)
-                 .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+                          DL, TII.get(TargetOpcode::COPY), ResultReg)
+                  .addReg(Op0, getKillRegState(Op0IsKill), Idx));
   return ResultReg;
 }
 
@@ -477,7 +488,7 @@ unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
 
   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                          TII.get(ARM::VMOVRS), MoveReg)
+                          TII.get(ARM::VMOVSR), MoveReg)
                   .addReg(SrcReg));
   return MoveReg;
 }
@@ -487,7 +498,7 @@ unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
 
   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                          TII.get(ARM::VMOVSR), MoveReg)
+                          TII.get(ARM::VMOVRS), MoveReg)
                   .addReg(SrcReg));
   return MoveReg;
 }
@@ -541,22 +552,42 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
 
 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
 
-  // For now 32-bit only.
-  if (VT != MVT::i32) return false;
-
-  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+  if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
+    return false;
 
   // If we can do this in a single instruction without a constant pool entry
   // do so now.
   const ConstantInt *CI = cast<ConstantInt>(C);
-  if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
-    unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+  if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
+    unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
+    unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                            TII.get(Opc), DestReg)
-                    .addImm(CI->getSExtValue()));
-    return DestReg;
+                            TII.get(Opc), ImmReg)
+                    .addImm(CI->getZExtValue()));
+    return ImmReg;
   }
 
+  // Use MVN to emit negative constants.
+  if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
+    unsigned Imm = (unsigned)~(CI->getSExtValue());
+    bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+      (ARM_AM::getSOImmVal(Imm) != -1);
+    if (UseImm) {
+      unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
+      unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(Opc), ImmReg)
+                      .addImm(Imm));
+      return ImmReg;
+    }
+  }
+
+  // Load from constant pool.  For now 32-bit only.
+  if (VT != MVT::i32)
+    return false;
+
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
   // MachineConstantPool wants an explicit alignment.
   unsigned Align = TD.getPrefTypeAlignment(C->getType());
   if (Align == 0) {
@@ -565,7 +596,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
   }
   unsigned Idx = MCP.getConstantPoolIndex(C, Align);
 
-  if (isThumb)
+  if (isThumb2)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                             TII.get(ARM::t2LDRpci), DestReg)
                     .addConstantPoolIndex(Idx));
@@ -586,44 +617,69 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
   Reloc::Model RelocM = TM.getRelocationModel();
 
   // TODO: Need more magic for ARM PIC.
-  if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
-
-  // MachineConstantPool wants an explicit alignment.
-  unsigned Align = TD.getPrefTypeAlignment(GV->getType());
-  if (Align == 0) {
-    // TODO: Figure out if this is correct.
-    Align = TD.getTypeAllocSize(GV->getType());
-  }
+  if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0;
 
-  // Grab index.
-  unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
-  unsigned Id = AFI->createPICLabelUId();
-  ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
-                                                              ARMCP::CPValue,
-                                                              PCAdj);
-  unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
-
-  // Load value.
-  MachineInstrBuilder MIB;
   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
-  if (isThumb) {
-    unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
-    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
-          .addConstantPoolIndex(Idx);
-    if (RelocM == Reloc::PIC_)
-      MIB.addImm(Id);
+
+  // Use movw+movt when possible, it avoids constant pool entries.
+  // Darwin targets don't support movt with Reloc::Static, see
+  // ARMTargetLowering::LowerGlobalAddressDarwin.  Other targets only support
+  // static movt relocations.
+  if (Subtarget->useMovt() &&
+      Subtarget->isTargetDarwin() == (RelocM != Reloc::Static)) {
+    unsigned Opc;
+    switch (RelocM) {
+    case Reloc::PIC_:
+      Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
+      break;
+    case Reloc::DynamicNoPIC:
+      Opc = isThumb2 ? ARM::t2MOV_ga_dyn : ARM::MOV_ga_dyn;
+      break;
+    default:
+      Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
+      break;
+    }
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                            DestReg).addGlobalAddress(GV));
   } else {
-    // The extra immediate is for addrmode2.
-    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
-                  DestReg)
-          .addConstantPoolIndex(Idx)
-          .addImm(0);
+    // MachineConstantPool wants an explicit alignment.
+    unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+    if (Align == 0) {
+      // TODO: Figure out if this is correct.
+      Align = TD.getTypeAllocSize(GV->getType());
+    }
+
+    // Grab index.
+    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
+      (Subtarget->isThumb() ? 4 : 8);
+    unsigned Id = AFI->createPICLabelUId();
+    ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
+                                                                ARMCP::CPValue,
+                                                                PCAdj);
+    unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+    // Load value.
+    MachineInstrBuilder MIB;
+    if (isThumb2) {
+      unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+        .addConstantPoolIndex(Idx);
+      if (RelocM == Reloc::PIC_)
+        MIB.addImm(Id);
+    } else {
+      // The extra immediate is for addrmode2.
+      MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+                    DestReg)
+        .addConstantPoolIndex(Idx)
+        .addImm(0);
+    }
+    AddOptionalDefs(MIB);
   }
-  AddOptionalDefs(MIB);
 
   if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
+    MachineInstrBuilder MIB;
     unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
-    if (isThumb)
+    if (isThumb2)
       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                     TII.get(ARM::t2LDRi12), NewDestReg)
             .addReg(DestReg)
@@ -656,6 +712,8 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
   return 0;
 }
 
+// TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
+
 unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
   // Don't handle dynamic allocas.
   if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
@@ -669,10 +727,10 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
   // This will get lowered later into the correct offsets and registers
   // via rewriteXFrameIndex.
   if (SI != FuncInfo.StaticAllocaMap.end()) {
-    TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+    const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
     unsigned ResultReg = createResultReg(RC);
-    unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                             TII.get(Opc), ResultReg)
                             .addFrameIndex(SI->second)
                             .addImm(0));
@@ -699,7 +757,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
 
   // If this is a type than can be sign or zero-extended to a basic operation
   // go ahead and accept it now.
-  if (VT == MVT::i8 || VT == MVT::i16)
+  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
     return true;
 
   return false;
@@ -813,35 +871,33 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
     }
   }
 
-  // Materialize the global variable's address into a reg which can
-  // then be used later to load the variable.
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
-    unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
-    if (Tmp == 0) return false;
-
-    Addr.Base.Reg = Tmp;
-    return true;
-  }
-
   // Try to get this in a register if nothing else has worked.
   if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
   return Addr.Base.Reg != 0;
 }
 
-void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) {
 
   assert(VT.isSimple() && "Non-simple types are invalid here!");
 
   bool needsLowering = false;
   switch (VT.getSimpleVT().SimpleTy) {
-    default:
-      assert(false && "Unhandled load/store type!");
+    default: llvm_unreachable("Unhandled load/store type!");
     case MVT::i1:
     case MVT::i8:
     case MVT::i16:
     case MVT::i32:
-      // Integer loads/stores handle 12-bit offsets.
-      needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+      if (!useAM3) {
+        // Integer loads/stores handle 12-bit offsets.
+        needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+        // Handle negative offsets.
+        if (needsLowering && isThumb2)
+          needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
+                            Addr.Offset > -256);
+      } else {
+        // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
+        needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
+      }
       break;
     case MVT::f32:
     case MVT::f64:
@@ -854,11 +910,11 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
   // put the alloca address into a register, set the base type back to
   // register and continue. This should almost never happen.
   if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
-    TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
-                              ARM::GPRRegisterClass;
+    const TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass
+                                             : ARM::GPRRegisterClass;
     unsigned ResultReg = createResultReg(RC);
-    unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+    unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                             TII.get(Opc), ResultReg)
                             .addFrameIndex(Addr.Base.FI)
                             .addImm(0));
@@ -877,7 +933,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
 
 void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
                                        const MachineInstrBuilder &MIB,
-                                       unsigned Flags) {
+                                       unsigned Flags, bool useAM3) {
   // addrmode5 output depends on the selection dag addressing dividing the
   // offset by 4 that it then later multiplies. Do this here as well.
   if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
@@ -897,60 +953,127 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
     // Now add the rest of the operands.
     MIB.addFrameIndex(FI);
 
-    // ARM halfword load/stores need an additional operand.
-    if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
-
-    MIB.addImm(Addr.Offset);
+    // ARM halfword load/stores and signed byte loads need an additional
+    // operand.
+    if (useAM3) {
+      signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+      MIB.addReg(0);
+      MIB.addImm(Imm);
+    } else {
+      MIB.addImm(Addr.Offset);
+    }
     MIB.addMemOperand(MMO);
   } else {
     // Now add the rest of the operands.
     MIB.addReg(Addr.Base.Reg);
 
-    // ARM halfword load/stores need an additional operand.
-    if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
-
-    MIB.addImm(Addr.Offset);
+    // ARM halfword load/stores and signed byte loads need an additional
+    // operand.
+    if (useAM3) {
+      signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
+      MIB.addReg(0);
+      MIB.addImm(Imm);
+    } else {
+      MIB.addImm(Addr.Offset);
+    }
   }
   AddOptionalDefs(MIB);
 }
 
-bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
-
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr,
+                              unsigned Alignment, bool isZExt, bool allocReg) {
   assert(VT.isSimple() && "Non-simple types are invalid here!");
   unsigned Opc;
-  TargetRegisterClass *RC;
+  bool useAM3 = false;
+  bool needVMOV = false;
+  const TargetRegisterClass *RC;
   switch (VT.getSimpleVT().SimpleTy) {
     // This is mostly going to be Neon/vector support.
     default: return false;
-    case MVT::i16:
-      Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+    case MVT::i1:
+    case MVT::i8:
+      if (isThumb2) {
+        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+          Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
+        else
+          Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
+      } else {
+        if (isZExt) {
+          Opc = ARM::LDRBi12;
+        } else {
+          Opc = ARM::LDRSB;
+          useAM3 = true;
+        }
+      }
       RC = ARM::GPRRegisterClass;
       break;
-    case MVT::i8:
-      Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+    case MVT::i16:
+      if (isThumb2) {
+        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+          Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
+        else
+          Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
+      } else {
+        Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
+        useAM3 = true;
+      }
       RC = ARM::GPRRegisterClass;
       break;
     case MVT::i32:
-      Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+      if (isThumb2) {
+        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+          Opc = ARM::t2LDRi8;
+        else
+          Opc = ARM::t2LDRi12;
+      } else {
+        Opc = ARM::LDRi12;
+      }
       RC = ARM::GPRRegisterClass;
       break;
     case MVT::f32:
-      Opc = ARM::VLDRS;
-      RC = TLI.getRegClassFor(VT);
+      if (!Subtarget->hasVFP2()) return false;
+      // Unaligned loads need special handling. Floats require word-alignment.
+      if (Alignment && Alignment < 4) {
+        needVMOV = true;
+        VT = MVT::i32;
+        Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
+        RC = ARM::GPRRegisterClass;
+      } else {
+        Opc = ARM::VLDRS;
+        RC = TLI.getRegClassFor(VT);
+      }
       break;
     case MVT::f64:
+      if (!Subtarget->hasVFP2()) return false;
+      // FIXME: Unaligned loads need special handling.  Doublewords require
+      // word-alignment.
+      if (Alignment && Alignment < 4)
+        return false;
+
       Opc = ARM::VLDRD;
       RC = TLI.getRegClassFor(VT);
       break;
   }
   // Simplify this down to something we can handle.
-  ARMSimplifyAddress(Addr, VT);
+  ARMSimplifyAddress(Addr, VT, useAM3);
 
   // Create the base instruction, then add the operands.
-  ResultReg = createResultReg(RC);
+  if (allocReg)
+    ResultReg = createResultReg(RC);
+  assert (ResultReg > 255 && "Expected an allocated virtual register.");
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                                     TII.get(Opc), ResultReg);
-  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad);
+  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
+
+  // If we had an unaligned load of a float we've converted it to an regular
+  // load.  Now we must move from the GRP to the FP register.
+  if (needVMOV) {
+    unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(ARM::VMOVSR), MoveReg)
+                    .addReg(ResultReg));
+    ResultReg = MoveReg;
+  }
   return true;
 }
 
@@ -969,51 +1092,92 @@ bool ARMFastISel::SelectLoad(const Instruction *I) {
   if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
 
   unsigned ResultReg;
-  if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+  if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
+    return false;
   UpdateValueMap(I, ResultReg);
   return true;
 }
 
-bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr,
+                               unsigned Alignment) {
   unsigned StrOpc;
+  bool useAM3 = false;
   switch (VT.getSimpleVT().SimpleTy) {
     // This is mostly going to be Neon/vector support.
     default: return false;
     case MVT::i1: {
-      unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+      unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass :
                                                ARM::GPRRegisterClass);
-      unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+      unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                               TII.get(Opc), Res)
                       .addReg(SrcReg).addImm(1));
       SrcReg = Res;
     } // Fallthrough here.
     case MVT::i8:
-      StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+      if (isThumb2) {
+        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+          StrOpc = ARM::t2STRBi8;
+        else
+          StrOpc = ARM::t2STRBi12;
+      } else {
+        StrOpc = ARM::STRBi12;
+      }
       break;
     case MVT::i16:
-      StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+      if (isThumb2) {
+        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+          StrOpc = ARM::t2STRHi8;
+        else
+          StrOpc = ARM::t2STRHi12;
+      } else {
+        StrOpc = ARM::STRH;
+        useAM3 = true;
+      }
       break;
     case MVT::i32:
-      StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+      if (isThumb2) {
+        if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
+          StrOpc = ARM::t2STRi8;
+        else
+          StrOpc = ARM::t2STRi12;
+      } else {
+        StrOpc = ARM::STRi12;
+      }
       break;
     case MVT::f32:
       if (!Subtarget->hasVFP2()) return false;
-      StrOpc = ARM::VSTRS;
+      // Unaligned stores need special handling. Floats require word-alignment.
+      if (Alignment && Alignment < 4) {
+        unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                TII.get(ARM::VMOVRS), MoveReg)
+                        .addReg(SrcReg));
+        SrcReg = MoveReg;
+        VT = MVT::i32;
+        StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
+      } else {
+        StrOpc = ARM::VSTRS;
+      }
       break;
     case MVT::f64:
       if (!Subtarget->hasVFP2()) return false;
+      // FIXME: Unaligned stores need special handling.  Doublewords require
+      // word-alignment.
+      if (Alignment && Alignment < 4)
+          return false;
+
       StrOpc = ARM::VSTRD;
       break;
   }
   // Simplify this down to something we can handle.
-  ARMSimplifyAddress(Addr, VT);
+  ARMSimplifyAddress(Addr, VT, useAM3);
 
   // Create the base instruction, then add the operands.
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                                     TII.get(StrOpc))
-                            .addReg(SrcReg, getKillRegState(true));
-  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore);
+                            .addReg(SrcReg);
+  AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
   return true;
 }
 
@@ -1039,7 +1203,8 @@ bool ARMFastISel::SelectStore(const Instruction *I) {
   if (!ARMComputeAddress(I->getOperand(1), Addr))
     return false;
 
-  if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+  if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+    return false;
   return true;
 }
 
@@ -1099,30 +1264,8 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
 
   // If we can, avoid recomputing the compare - redoing it could lead to wonky
   // behavior.
-  // TODO: Factor this out.
   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
-    MVT SourceVT;
-    Type *Ty = CI->getOperand(0)->getType();
-    if (CI->hasOneUse() && (CI->getParent() == I->getParent())
-        && isTypeLegal(Ty, SourceVT)) {
-      bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
-      if (isFloat && !Subtarget->hasVFP2())
-        return false;
-
-      unsigned CmpOpc;
-      switch (SourceVT.SimpleTy) {
-        default: return false;
-        // TODO: Verify compares.
-        case MVT::f32:
-          CmpOpc = ARM::VCMPES;
-          break;
-        case MVT::f64:
-          CmpOpc = ARM::VCMPED;
-          break;
-        case MVT::i32:
-          CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
-          break;
-      }
+    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
 
       // Get the compare predicate.
       // Try to take advantage of fallthrough opportunities.
@@ -1137,23 +1280,11 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
       // We may not handle every CC for now.
       if (ARMPred == ARMCC::AL) return false;
 
-      unsigned Arg1 = getRegForValue(CI->getOperand(0));
-      if (Arg1 == 0) return false;
-
-      unsigned Arg2 = getRegForValue(CI->getOperand(1));
-      if (Arg2 == 0) return false;
-
-      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                              TII.get(CmpOpc))
-                      .addReg(Arg1).addReg(Arg2));
-
-      // For floating point we need to move the result to a comparison register
-      // that we can then use for branches.
-      if (isFloat)
-        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                                TII.get(ARM::FMSTAT)));
+      // Emit the compare.
+      if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+        return false;
 
-      unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
       .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
       FastEmitBranch(FBB, DL);
@@ -1164,7 +1295,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
     MVT SourceVT;
     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
         (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
-      unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+      unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
       unsigned OpReg = getRegForValue(TI->getOperand(0));
       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                               TII.get(TstOpc))
@@ -1176,7 +1307,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
         CCMode = ARMCC::EQ;
       }
 
-      unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+      unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
       .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
 
@@ -1184,6 +1315,12 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
       FuncInfo.MBB->addSuccessor(TBB);
       return true;
     }
+  } else if (const ConstantInt *CI =
+             dyn_cast<ConstantInt>(BI->getCondition())) {
+    uint64_t Imm = CI->getZExtValue();
+    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+    FastEmitBranch(Target, DL);
+    return true;
   }
 
   unsigned CmpReg = getRegForValue(BI->getCondition());
@@ -1196,7 +1333,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
   // Regardless, the compare has been done in the predecessor block,
   // and it left a value for us in a virtual register.  Ergo, we test
   // the one-bit value left in the virtual register.
-  unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+  unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc))
                   .addReg(CmpReg).addImm(1));
 
@@ -1206,7 +1343,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
     CCMode = ARMCC::EQ;
   }
 
-  unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+  unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
                   .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
   FastEmitBranch(FBB, DL);
@@ -1214,70 +1351,155 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
   return true;
 }
 
-bool ARMFastISel::SelectCmp(const Instruction *I) {
-  const CmpInst *CI = cast<CmpInst>(I);
+bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
+  unsigned AddrReg = getRegForValue(I->getOperand(0));
+  if (AddrReg == 0) return false;
 
-  MVT VT;
-  Type *Ty = CI->getOperand(0)->getType();
-  if (!isTypeLegal(Ty, VT))
-    return false;
+  unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+                  .addReg(AddrReg));
+  return true;  
+}
 
-  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
+                             bool isZExt) {
+  Type *Ty = Src1Value->getType();
+  EVT SrcVT = TLI.getValueType(Ty, true);
+  if (!SrcVT.isSimple()) return false;
+
+  bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
   if (isFloat && !Subtarget->hasVFP2())
     return false;
 
+  // Check to see if the 2nd operand is a constant that we can encode directly
+  // in the compare.
+  int Imm = 0;
+  bool UseImm = false;
+  bool isNegativeImm = false;
+  // FIXME: At -O0 we don't have anything that canonicalizes operand order.
+  // Thus, Src1Value may be a ConstantInt, but we're missing it.
+  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
+    if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
+        SrcVT == MVT::i1) {
+      const APInt &CIVal = ConstInt->getValue();
+      Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
+      // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
+      // then a cmn, because there is no way to represent 2147483648 as a 
+      // signed 32-bit int.
+      if (Imm < 0 && Imm != (int)0x80000000) {
+        isNegativeImm = true;
+        Imm = -Imm;
+      }
+      UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+        (ARM_AM::getSOImmVal(Imm) != -1);
+    }
+  } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
+    if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
+      if (ConstFP->isZero() && !ConstFP->isNegative())
+        UseImm = true;
+  }
+
   unsigned CmpOpc;
-  unsigned CondReg;
-  switch (VT.SimpleTy) {
+  bool isICmp = true;
+  bool needsExt = false;
+  switch (SrcVT.getSimpleVT().SimpleTy) {
     default: return false;
     // TODO: Verify compares.
     case MVT::f32:
-      CmpOpc = ARM::VCMPES;
-      CondReg = ARM::FPSCR;
+      isICmp = false;
+      CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
       break;
     case MVT::f64:
-      CmpOpc = ARM::VCMPED;
-      CondReg = ARM::FPSCR;
+      isICmp = false;
+      CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
       break;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+      needsExt = true;
+    // Intentional fall-through.
     case MVT::i32:
-      CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
-      CondReg = ARM::CPSR;
+      if (isThumb2) {
+        if (!UseImm)
+          CmpOpc = ARM::t2CMPrr;
+        else
+          CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri;
+      } else {
+        if (!UseImm)
+          CmpOpc = ARM::CMPrr;
+        else
+          CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri;
+      }
       break;
   }
 
-  // Get the compare predicate.
-  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+  unsigned SrcReg1 = getRegForValue(Src1Value);
+  if (SrcReg1 == 0) return false;
 
-  // We may not handle every CC for now.
-  if (ARMPred == ARMCC::AL) return false;
+  unsigned SrcReg2 = 0;
+  if (!UseImm) {
+    SrcReg2 = getRegForValue(Src2Value);
+    if (SrcReg2 == 0) return false;
+  }
 
-  unsigned Arg1 = getRegForValue(CI->getOperand(0));
-  if (Arg1 == 0) return false;
+  // We have i1, i8, or i16, we need to either zero extend or sign extend.
+  if (needsExt) {
+    SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
+    if (SrcReg1 == 0) return false;
+    if (!UseImm) {
+      SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
+      if (SrcReg2 == 0) return false;
+    }
+  }
 
-  unsigned Arg2 = getRegForValue(CI->getOperand(1));
-  if (Arg2 == 0) return false;
+  if (!UseImm) {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(CmpOpc))
+                    .addReg(SrcReg1).addReg(SrcReg2));
+  } else {
+    MachineInstrBuilder MIB;
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+      .addReg(SrcReg1);
 
-  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
-                  .addReg(Arg1).addReg(Arg2));
+    // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
+    if (isICmp)
+      MIB.addImm(Imm);
+    AddOptionalDefs(MIB);
+  }
 
   // For floating point we need to move the result to a comparison register
   // that we can then use for branches.
-  if (isFloat)
+  if (Ty->isFloatTy() || Ty->isDoubleTy())
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                             TII.get(ARM::FMSTAT)));
+  return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+  const CmpInst *CI = cast<CmpInst>(I);
+
+  // Get the compare predicate.
+  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+  // We may not handle every CC for now.
+  if (ARMPred == ARMCC::AL) return false;
+
+  // Emit the compare.
+  if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
+    return false;
 
   // Now set a register based on the comparison. Explicitly set the predicates
   // here.
-  unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
-  TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
-                                    : ARM::GPRRegisterClass;
+  unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+  const TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass
+                                           : ARM::GPRRegisterClass;
   unsigned DestReg = createResultReg(RC);
-  Constant *Zero
-    = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+  Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
   unsigned ZeroReg = TargetMaterializeConstant(Zero);
+  // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
           .addReg(ZeroReg).addImm(1)
-          .addImm(ARMPred).addReg(CondReg);
+          .addImm(ARMPred).addReg(ARM::CPSR);
 
   UpdateValueMap(I, DestReg);
   return true;
@@ -1321,7 +1543,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
   return true;
 }
 
-bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
   // Make sure we have VFP.
   if (!Subtarget->hasVFP2()) return false;
 
@@ -1330,21 +1552,30 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) {
   if (!isTypeLegal(Ty, DstVT))
     return false;
 
-  // FIXME: Handle sign-extension where necessary.
-  if (!I->getOperand(0)->getType()->isIntegerTy(32))
+  Value *Src = I->getOperand(0);
+  EVT SrcVT = TLI.getValueType(Src->getType(), true);
+  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
     return false;
 
-  unsigned Op = getRegForValue(I->getOperand(0));
-  if (Op == 0) return false;
+  unsigned SrcReg = getRegForValue(Src);
+  if (SrcReg == 0) return false;
+
+  // Handle sign-extension.
+  if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
+    EVT DestVT = MVT::i32;
+    SrcReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT,
+                                       /*isZExt*/!isSigned);
+    if (SrcReg == 0) return false;
+  }
 
   // The conversion routine works on fp-reg to fp-reg and the operand above
   // was an integer, move it to the fp registers if possible.
-  unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+  unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
   if (FP == 0) return false;
 
   unsigned Opc;
-  if (Ty->isFloatTy()) Opc = ARM::VSITOS;
-  else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+  if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
+  else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
   else return false;
 
   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
@@ -1355,7 +1586,7 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) {
   return true;
 }
 
-bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
   // Make sure we have VFP.
   if (!Subtarget->hasVFP2()) return false;
 
@@ -1369,11 +1600,11 @@ bool ARMFastISel::SelectFPToSI(const Instruction *I) {
 
   unsigned Opc;
   Type *OpTy = I->getOperand(0)->getType();
-  if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
-  else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+  if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
+  else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
   else return false;
 
-  // f64->s32 or f32->s32 both need an intermediate f32 reg.
+  // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
   unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
                           ResultReg)
@@ -1401,22 +1632,54 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
   if (CondReg == 0) return false;
   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   if (Op1Reg == 0) return false;
-  unsigned Op2Reg = getRegForValue(I->getOperand(2));
-  if (Op2Reg == 0) return false;
 
-  unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+  // Check to see if we can use an immediate in the conditional move.
+  int Imm = 0;
+  bool UseImm = false;
+  bool isNegativeImm = false;
+  if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
+    assert (VT == MVT::i32 && "Expecting an i32.");
+    Imm = (int)ConstInt->getValue().getZExtValue();
+    if (Imm < 0) {
+      isNegativeImm = true;
+      Imm = ~Imm;
+    }
+    UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
+      (ARM_AM::getSOImmVal(Imm) != -1);
+  }
+
+  unsigned Op2Reg = 0;
+  if (!UseImm) {
+    Op2Reg = getRegForValue(I->getOperand(2));
+    if (Op2Reg == 0) return false;
+  }
+
+  unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
-                  .addReg(CondReg).addImm(1));
+                  .addReg(CondReg).addImm(0));
+
+  unsigned MovCCOpc;
+  if (!UseImm) {
+    MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
+  } else {
+    if (!isNegativeImm) {
+      MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
+    } else {
+      MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
+    }
+  }
   unsigned ResultReg = createResultReg(RC);
-  unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
-    .addReg(Op1Reg).addReg(Op2Reg)
-    .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+  if (!UseImm)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+    .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR);
+  else
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+    .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR);
   UpdateValueMap(I, ResultReg);
   return true;
 }
 
-bool ARMFastISel::SelectSDiv(const Instruction *I) {
+bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
   MVT VT;
   Type *Ty = I->getType();
   if (!isTypeLegal(Ty, VT))
@@ -1430,21 +1693,21 @@ bool ARMFastISel::SelectSDiv(const Instruction *I) {
   // Otherwise emit a libcall.
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i8)
-    LC = RTLIB::SDIV_I8;
+    LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
   else if (VT == MVT::i16)
-    LC = RTLIB::SDIV_I16;
+    LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
   else if (VT == MVT::i32)
-    LC = RTLIB::SDIV_I32;
+    LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
   else if (VT == MVT::i64)
-    LC = RTLIB::SDIV_I64;
+    LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
   else if (VT == MVT::i128)
-    LC = RTLIB::SDIV_I128;
+    LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
 
   return ARMEmitLibcall(I, LC);
 }
 
-bool ARMFastISel::SelectSRem(const Instruction *I) {
+bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
   MVT VT;
   Type *Ty = I->getType();
   if (!isTypeLegal(Ty, VT))
@@ -1452,21 +1715,59 @@ bool ARMFastISel::SelectSRem(const Instruction *I) {
 
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i8)
-    LC = RTLIB::SREM_I8;
+    LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
   else if (VT == MVT::i16)
-    LC = RTLIB::SREM_I16;
+    LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
   else if (VT == MVT::i32)
-    LC = RTLIB::SREM_I32;
+    LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
   else if (VT == MVT::i64)
-    LC = RTLIB::SREM_I64;
+    LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
   else if (VT == MVT::i128)
-    LC = RTLIB::SREM_I128;
+    LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
 
   return ARMEmitLibcall(I, LC);
 }
 
-bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
+  EVT DestVT  = TLI.getValueType(I->getType(), true);
+
+  // We can get here in the case when we have a binary operation on a non-legal
+  // type and the target independent selector doesn't know how to handle it.
+  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+    return false;
+  
+  unsigned Opc;
+  switch (ISDOpcode) {
+    default: return false;
+    case ISD::ADD:
+      Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
+      break;
+    case ISD::OR:
+      Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
+      break;
+    case ISD::SUB:
+      Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
+      break;
+  }
+
+  unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+  if (SrcReg1 == 0) return false;
+
+  // TODO: Often the 2nd operand is an immediate, which can be encoded directly
+  // in the instruction, rather then materializing the value in a register.
+  unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+  if (SrcReg2 == 0) return false;
+
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(Opc), ResultReg)
+                  .addReg(SrcReg1).addReg(SrcReg2));
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
   EVT VT  = TLI.getValueType(I->getType(), true);
 
   // We can get here in the case when we want to use NEON for our fp
@@ -1478,12 +1779,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
   if (isFloat && !Subtarget->hasVFP2())
     return false;
 
-  unsigned Op1 = getRegForValue(I->getOperand(0));
-  if (Op1 == 0) return false;
-
-  unsigned Op2 = getRegForValue(I->getOperand(1));
-  if (Op2 == 0) return false;
-
   unsigned Opc;
   bool is64bit = VT == MVT::f64 || VT == MVT::i64;
   switch (ISDOpcode) {
@@ -1498,6 +1793,12 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
       Opc = is64bit ? ARM::VMULD : ARM::VMULS;
       break;
   }
+  unsigned Op1 = getRegForValue(I->getOperand(0));
+  if (Op1 == 0) return false;
+
+  unsigned Op2 = getRegForValue(I->getOperand(1));
+  if (Op2 == 0) return false;
+
   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                           TII.get(Opc), ResultReg)
@@ -1508,18 +1809,6 @@ bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
 
 // Call Handling Code
 
-bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
-                                 EVT SrcVT, unsigned &ResultReg) {
-  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
-                           Src, /*TODO: Kill=*/false);
-
-  if (RR != 0) {
-    ResultReg = RR;
-    return true;
-  } else
-    return false;
-}
-
 // This is largely taken directly from CCAssignFnForNode - we don't support
 // varargs in FastISel so that part has been removed.
 // TODO: We may not support all of this.
@@ -1536,7 +1825,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
     // Use target triple & subtarget features to do actual dispatch.
     if (Subtarget->isAAPCS_ABI()) {
       if (Subtarget->hasVFP2() &&
-          FloatABIType == FloatABI::Hard)
+          TM.Options.FloatABIType == FloatABI::Hard)
         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
       else
         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
@@ -1548,11 +1837,6 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
   case CallingConv::ARM_APCS:
     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
-  case CallingConv::GHC:
-    if (Return)
-      llvm_unreachable("Can't return in GHC call convention");
-    else
-      return CC_ARM_APCS_GHC;
   }
 }
 
@@ -1567,6 +1851,48 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
   CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
 
+  // Check that we can handle all of the arguments. If we can't, then bail out
+  // now before we add code to the MBB.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    MVT ArgVT = ArgVTs[VA.getValNo()];
+
+    // We don't handle NEON/vector parameters yet.
+    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+      return false;
+
+    // Now copy/store arg to correct locations.
+    if (VA.isRegLoc() && !VA.needsCustom()) {
+      continue;
+    } else if (VA.needsCustom()) {
+      // TODO: We need custom lowering for vector (v2f64) args.
+      if (VA.getLocVT() != MVT::f64 ||
+          // TODO: Only handle register args for now.
+          !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
+        return false;
+    } else {
+      switch (static_cast<EVT>(ArgVT).getSimpleVT().SimpleTy) {
+      default:
+        return false;
+      case MVT::i1:
+      case MVT::i8:
+      case MVT::i16:
+      case MVT::i32:
+        break;
+      case MVT::f32:
+        if (!Subtarget->hasVFP2())
+          return false;
+        break;
+      case MVT::f64:
+        if (!Subtarget->hasVFP2())
+          return false;
+        break;
+      }
+    }
+  }
+
+  // At the point, we are able to handle the call's arguments in fast isel.
+
   // Get a count of how many bytes are to be pushed on the stack.
   NumBytes = CCInfo.getNextStackOffset();
 
@@ -1582,41 +1908,26 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
     unsigned Arg = ArgRegs[VA.getValNo()];
     MVT ArgVT = ArgVTs[VA.getValNo()];
 
-    // We don't handle NEON/vector parameters yet.
-    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
-      return false;
+    assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
+           "We don't handle NEON/vector parameters yet.");
 
     // Handle arg promotion, etc.
     switch (VA.getLocInfo()) {
       case CCValAssign::Full: break;
       case CCValAssign::SExt: {
-        bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
-                                         Arg, ArgVT, Arg);
-        assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
-        Emitted = true;
-        ArgVT = VA.getLocVT();
+        MVT DestVT = VA.getLocVT();
+        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
+        assert (Arg != 0 && "Failed to emit a sext");
+        ArgVT = DestVT;
         break;
       }
+      case CCValAssign::AExt:
+        // Intentional fall-through.  Handle AExt and ZExt.
       case CCValAssign::ZExt: {
-        bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
-                                         Arg, ArgVT, Arg);
-        assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
-        Emitted = true;
-        ArgVT = VA.getLocVT();
-        break;
-      }
-      case CCValAssign::AExt: {
-        bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
-                                         Arg, ArgVT, Arg);
-        if (!Emitted)
-          Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
-                                      Arg, ArgVT, Arg);
-        if (!Emitted)
-          Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
-                                      Arg, ArgVT, Arg);
-
-        assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
-        ArgVT = VA.getLocVT();
+        MVT DestVT = VA.getLocVT();
+        Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
+        assert (Arg != 0 && "Failed to emit a sext");
+        ArgVT = DestVT;
         break;
       }
       case CCValAssign::BCvt: {
@@ -1634,16 +1945,17 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
     if (VA.isRegLoc() && !VA.needsCustom()) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
               VA.getLocReg())
-      .addReg(Arg);
+        .addReg(Arg);
       RegArgs.push_back(VA.getLocReg());
     } else if (VA.needsCustom()) {
       // TODO: We need custom lowering for vector (v2f64) args.
-      if (VA.getLocVT() != MVT::f64) return false;
+      assert(VA.getLocVT() == MVT::f64 &&
+             "Custom lowering for v2f64 args not available");
 
       CCValAssign &NextVA = ArgLocs[++i];
 
-      // TODO: Only handle register args for now.
-      if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+      assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+             "We only handle register args!");
 
       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                               TII.get(ARM::VMOVRRD), VA.getLocReg())
@@ -1659,9 +1971,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
       Addr.Base.Reg = ARM::SP;
       Addr.Offset = VA.getLocMemOffset();
 
-      if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+      bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
+      assert(EmitRet && "Could not emit a store for argument!");
     }
   }
+
   return true;
 }
 
@@ -1685,7 +1999,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
       // For this move we copy into two registers and then move into the
       // double fp reg we want.
       EVT DestVT = RVLocs[0].getValVT();
-      TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+      const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
       unsigned ResultReg = createResultReg(DstRC);
       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                               TII.get(ARM::VMOVDRR), ResultReg)
@@ -1700,7 +2014,12 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
     } else {
       assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
       EVT CopyVT = RVLocs[0].getValVT();
-      TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+      // Special handling for extended integers.
+      if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
+        CopyVT = MVT::i32;
+
+      const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
 
       unsigned ResultReg = createResultReg(DstRC);
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1753,13 +2072,26 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
     // Only handle register returns for now.
     if (!VA.isRegLoc())
       return false;
-    // TODO: For now, don't try to handle cases where getLocInfo()
-    // says Full but the types don't match.
-    if (TLI.getValueType(RV->getType()) != VA.getValVT())
-      return false;
 
-    // Make the copy.
     unsigned SrcReg = Reg + VA.getValNo();
+    EVT RVVT = TLI.getValueType(RV->getType());
+    EVT DestVT = VA.getValVT();
+    // Special handling for extended integers.
+    if (RVVT != DestVT) {
+      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
+        return false;
+
+      assert(DestVT == MVT::i32 && "ARM should always ext to i32");
+
+      // Perform extension if flagged as either zext or sext.  Otherwise, do
+      // nothing.
+      if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
+        SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
+        if (SrcReg == 0) return false;
+      }
+    }
+
+    // Make the copy.
     unsigned DstReg = VA.getLocReg();
     const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
     // Avoid a cross-class copy. This is very unlikely.
@@ -1772,20 +2104,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
     MRI.addLiveOut(VA.getLocReg());
   }
 
-  unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+  unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                           TII.get(RetOpc)));
   return true;
 }
 
 unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
-
-  // Darwin needs the r9 versions of the opcodes.
-  bool isDarwin = Subtarget->isTargetDarwin();
-  if (isThumb) {
-    return isDarwin ? ARM::tBLr9 : ARM::tBL;
+  if (isThumb2) {
+    return ARM::tBL;
   } else  {
-    return isDarwin ? ARM::BLr9 : ARM::BL;
+    return ARM::BL;
   }
 }
 
@@ -1844,11 +2173,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
     return false;
 
-  // Issue the call, BLr9 for darwin, BL otherwise.
-  // TODO: Turn this into the table of arm call ops.
+  // Issue the call.
   MachineInstrBuilder MIB;
   unsigned CallOpc = ARMSelectCallOp(NULL);
-  if(isThumb)
+  if (isThumb2)
     // Explicitly adding the predicate here.
     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                          TII.get(CallOpc)))
@@ -1863,6 +2191,10 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
     MIB.addReg(RegArgs[i]);
 
+  // Add a register mask with the call-preserved registers.
+  // Proper defs for return values will be added by setPhysRegsDeadExcept().
+  MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
   // Finish off the call including any return values.
   SmallVector<unsigned, 4> UsedRegs;
   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
@@ -1873,12 +2205,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   return true;
 }
 
-bool ARMFastISel::SelectCall(const Instruction *I) {
+bool ARMFastISel::SelectCall(const Instruction *I,
+                             const char *IntrMemName = 0) {
   const CallInst *CI = cast<CallInst>(I);
   const Value *Callee = CI->getCalledValue();
 
-  // Can't handle inline asm or worry about intrinsics yet.
-  if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+  // Can't handle inline asm.
+  if (isa<InlineAsm>(Callee)) return false;
 
   // Only handle global variable Callees.
   const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
@@ -1902,7 +2235,8 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
   MVT RetVT;
   if (RetTy->isVoidTy())
     RetVT = MVT::isVoid;
-  else if (!isTypeLegal(RetTy, RetVT))
+  else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+           RetVT != MVT::i8  && RetVT != MVT::i1)
     return false;
 
   // TODO: For now if we have long calls specified we don't handle the call.
@@ -1913,16 +2247,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
   SmallVector<unsigned, 8> ArgRegs;
   SmallVector<MVT, 8> ArgVTs;
   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
-  Args.reserve(CS.arg_size());
-  ArgRegs.reserve(CS.arg_size());
-  ArgVTs.reserve(CS.arg_size());
-  ArgFlags.reserve(CS.arg_size());
+  unsigned arg_size = CS.arg_size();
+  Args.reserve(arg_size);
+  ArgRegs.reserve(arg_size);
+  ArgVTs.reserve(arg_size);
+  ArgFlags.reserve(arg_size);
   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
-    unsigned Arg = getRegForValue(*i);
+    // If we're lowering a memory intrinsic instead of a regular call, skip the
+    // last two arguments, which shouldn't be passed to the underlying function.
+    if (IntrMemName && e-i <= 2)
+      break;
 
-    if (Arg == 0)
-      return false;
     ISD::ArgFlagsTy Flags;
     unsigned AttrInd = i - CS.arg_begin() + 1;
     if (CS.paramHasAttr(AttrInd, Attribute::SExt))
@@ -1930,7 +2266,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
     if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
       Flags.setZExt();
 
-         // FIXME: Only handle *easy* calls for now.
+    // FIXME: Only handle *easy* calls for now.
     if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
         CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
         CS.paramHasAttr(AttrInd, Attribute::Nest) ||
@@ -1939,8 +2275,14 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
 
     Type *ArgTy = (*i)->getType();
     MVT ArgVT;
-    if (!isTypeLegal(ArgTy, ArgVT))
+    if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
+        ArgVT != MVT::i1)
       return false;
+
+    unsigned Arg = getRegForValue(*i);
+    if (Arg == 0)
+      return false;
+
     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
     Flags.setOrigAlign(OriginalAlignment);
 
@@ -1956,26 +2298,38 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
     return false;
 
-  // Issue the call, BLr9 for darwin, BL otherwise.
-  // TODO: Turn this into the table of arm call ops.
+  // Issue the call.
   MachineInstrBuilder MIB;
   unsigned CallOpc = ARMSelectCallOp(GV);
   // Explicitly adding the predicate here.
-  if(isThumb)
+  if(isThumb2) {
     // Explicitly adding the predicate here.
     MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                         TII.get(CallOpc)))
-          .addGlobalAddress(GV, 0, 0);
-  else
-    // Explicitly adding the predicate here.
-    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
-                         TII.get(CallOpc))
-          .addGlobalAddress(GV, 0, 0));
-
+                                 TII.get(CallOpc)));
+    if (!IntrMemName)
+      MIB.addGlobalAddress(GV, 0, 0);
+    else 
+      MIB.addExternalSymbol(IntrMemName, 0);
+  } else {
+    if (!IntrMemName)
+      // Explicitly adding the predicate here.
+      MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                   TII.get(CallOpc))
+            .addGlobalAddress(GV, 0, 0));
+    else
+      MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                   TII.get(CallOpc))
+            .addExternalSymbol(IntrMemName, 0));
+  }
+  
   // Add implicit physical register uses to the call.
   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
     MIB.addReg(RegArgs[i]);
 
+  // Add a register mask with the call-preserved registers.
+  // Proper defs for return values will be added by setPhysRegsDeadExcept().
+  MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
   // Finish off the call including any return values.
   SmallVector<unsigned, 4> UsedRegs;
   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
@@ -1984,83 +2338,187 @@ bool ARMFastISel::SelectCall(const Instruction *I) {
   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
 
   return true;
+}
 
+bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
+  return Len <= 16;
 }
 
-bool ARMFastISel::SelectIntCast(const Instruction *I) {
-  // On ARM, in general, integer casts don't involve legal types; this code
-  // handles promotable integers.  The high bits for a type smaller than
-  // the register size are assumed to be undefined.
-  Type *DestTy = I->getType();
-  Value *Op = I->getOperand(0);
-  Type *SrcTy = Op->getType();
+bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
+                                        uint64_t Len) {
+  // Make sure we don't bloat code by inlining very large memcpy's.
+  if (!ARMIsMemCpySmall(Len))
+    return false;
 
-  EVT SrcVT, DestVT;
-  SrcVT = TLI.getValueType(SrcTy, true);
-  DestVT = TLI.getValueType(DestTy, true);
+  // We don't care about alignment here since we just emit integer accesses.
+  while (Len) {
+    MVT VT;
+    if (Len >= 4)
+      VT = MVT::i32;
+    else if (Len >= 2)
+      VT = MVT::i16;
+    else {
+      assert(Len == 1);
+      VT = MVT::i8;
+    }
 
-  if (isa<TruncInst>(I)) {
-    if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
-      return false;
-    if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
+    bool RV;
+    unsigned ResultReg;
+    RV = ARMEmitLoad(VT, ResultReg, Src);
+    assert (RV == true && "Should be able to handle this load.");
+    RV = ARMEmitStore(VT, ResultReg, Dest);
+    assert (RV == true && "Should be able to handle this store.");
+    (void)RV;
+
+    unsigned Size = VT.getSizeInBits()/8;
+    Len -= Size;
+    Dest.Offset += Size;
+    Src.Offset += Size;
+  }
+
+  return true;
+}
+
+bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
+  // FIXME: Handle more intrinsics.
+  switch (I.getIntrinsicID()) {
+  default: return false;
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove: {
+    const MemTransferInst &MTI = cast<MemTransferInst>(I);
+    // Don't handle volatile.
+    if (MTI.isVolatile())
       return false;
 
-    unsigned SrcReg = getRegForValue(Op);
-    if (!SrcReg) return false;
+    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
+    // we would emit dead code because we don't currently handle memmoves.
+    bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
+    if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
+      // Small memcpy's are common enough that we want to do them without a call
+      // if possible.
+      uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
+      if (ARMIsMemCpySmall(Len)) {
+        Address Dest, Src;
+        if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
+            !ARMComputeAddress(MTI.getRawSource(), Src))
+          return false;
+        if (ARMTryEmitSmallMemCpy(Dest, Src, Len))
+          return true;
+      }
+    }
+    
+    if (!MTI.getLength()->getType()->isIntegerTy(32))
+      return false;
+    
+    if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
+      return false;
 
-    // Because the high bits are undefined, a truncate doesn't generate
-    // any code.
-    UpdateValueMap(I, SrcReg);
-    return true;
+    const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
+    return SelectCall(&I, IntrMemName);
   }
-  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+  case Intrinsic::memset: {
+    const MemSetInst &MSI = cast<MemSetInst>(I);
+    // Don't handle volatile.
+    if (MSI.isVolatile())
+      return false;
+    
+    if (!MSI.getLength()->getType()->isIntegerTy(32))
+      return false;
+    
+    if (MSI.getDestAddressSpace() > 255)
+      return false;
+    
+    return SelectCall(&I, "memset");
+  }
+  }
+}
+
+bool ARMFastISel::SelectTrunc(const Instruction *I) {
+  // The high bits for a type smaller than the register size are assumed to be 
+  // undefined.
+  Value *Op = I->getOperand(0);
+
+  EVT SrcVT, DestVT;
+  SrcVT = TLI.getValueType(Op->getType(), true);
+  DestVT = TLI.getValueType(I->getType(), true);
+
+  if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
+    return false;
+  if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
     return false;
 
+  unsigned SrcReg = getRegForValue(Op);
+  if (!SrcReg) return false;
+
+  // Because the high bits are undefined, a truncate doesn't generate
+  // any code.
+  UpdateValueMap(I, SrcReg);
+  return true;
+}
+
+unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
+                                    bool isZExt) {
+  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+    return 0;
+
   unsigned Opc;
-  bool isZext = isa<ZExtInst>(I);
   bool isBoolZext = false;
-  if (!SrcVT.isSimple())
-    return false;
+  if (!SrcVT.isSimple()) return 0;
   switch (SrcVT.getSimpleVT().SimpleTy) {
-  default: return false;
+  default: return 0;
   case MVT::i16:
-    if (!Subtarget->hasV6Ops()) return false;
-    if (isZext)
-      Opc = isThumb ? ARM::t2UXTH : ARM::UXTH;
+    if (!Subtarget->hasV6Ops()) return 0;
+    if (isZExt)
+      Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
     else
-      Opc = isThumb ? ARM::t2SXTH : ARM::SXTH;
+      Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
     break;
   case MVT::i8:
-    if (!Subtarget->hasV6Ops()) return false;
-    if (isZext)
-      Opc = isThumb ? ARM::t2UXTB : ARM::UXTB;
+    if (!Subtarget->hasV6Ops()) return 0;
+    if (isZExt)
+      Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
     else
-      Opc = isThumb ? ARM::t2SXTB : ARM::SXTB;
+      Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
     break;
   case MVT::i1:
-    if (isZext) {
-      Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+    if (isZExt) {
+      Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
       isBoolZext = true;
       break;
     }
-    return false;
+    return 0;
   }
 
-  // FIXME: We could save an instruction in many cases by special-casing
-  // load instructions.
-  unsigned SrcReg = getRegForValue(Op);
-  if (!SrcReg) return false;
-
-  unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
   MachineInstrBuilder MIB;
-  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
         .addReg(SrcReg);
   if (isBoolZext)
     MIB.addImm(1);
   else
     MIB.addImm(0);
   AddOptionalDefs(MIB);
-  UpdateValueMap(I, DestReg);
+  return ResultReg;
+}
+
+bool ARMFastISel::SelectIntExt(const Instruction *I) {
+  // On ARM, in general, integer casts don't involve legal types; this code
+  // handles promotable integers.
+  Type *DestTy = I->getType();
+  Value *Src = I->getOperand(0);
+  Type *SrcTy = Src->getType();
+
+  EVT SrcVT, DestVT;
+  SrcVT = TLI.getValueType(SrcTy, true);
+  DestVT = TLI.getValueType(DestTy, true);
+
+  bool isZExt = isa<ZExtInst>(I);
+  unsigned SrcReg = getRegForValue(Src);
+  if (!SrcReg) return false;
+
+  unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
+  if (ResultReg == 0) return false;
+  UpdateValueMap(I, ResultReg);
   return true;
 }
 
@@ -2074,6 +2532,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
       return SelectStore(I);
     case Instruction::Br:
       return SelectBranch(I);
+    case Instruction::IndirectBr:
+      return SelectIndirectBr(I);
     case Instruction::ICmp:
     case Instruction::FCmp:
       return SelectCmp(I);
@@ -2082,42 +2542,105 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
     case Instruction::FPTrunc:
       return SelectFPTrunc(I);
     case Instruction::SIToFP:
-      return SelectSIToFP(I);
+      return SelectIToFP(I, /*isSigned*/ true);
+    case Instruction::UIToFP:
+      return SelectIToFP(I, /*isSigned*/ false);
     case Instruction::FPToSI:
-      return SelectFPToSI(I);
+      return SelectFPToI(I, /*isSigned*/ true);
+    case Instruction::FPToUI:
+      return SelectFPToI(I, /*isSigned*/ false);
+    case Instruction::Add:
+      return SelectBinaryIntOp(I, ISD::ADD);
+    case Instruction::Or:
+      return SelectBinaryIntOp(I, ISD::OR);
+    case Instruction::Sub:
+      return SelectBinaryIntOp(I, ISD::SUB);
     case Instruction::FAdd:
-      return SelectBinaryOp(I, ISD::FADD);
+      return SelectBinaryFPOp(I, ISD::FADD);
     case Instruction::FSub:
-      return SelectBinaryOp(I, ISD::FSUB);
+      return SelectBinaryFPOp(I, ISD::FSUB);
     case Instruction::FMul:
-      return SelectBinaryOp(I, ISD::FMUL);
+      return SelectBinaryFPOp(I, ISD::FMUL);
     case Instruction::SDiv:
-      return SelectSDiv(I);
+      return SelectDiv(I, /*isSigned*/ true);
+    case Instruction::UDiv:
+      return SelectDiv(I, /*isSigned*/ false);
     case Instruction::SRem:
-      return SelectSRem(I);
+      return SelectRem(I, /*isSigned*/ true);
+    case Instruction::URem:
+      return SelectRem(I, /*isSigned*/ false);
     case Instruction::Call:
+      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+        return SelectIntrinsicCall(*II);
       return SelectCall(I);
     case Instruction::Select:
       return SelectSelect(I);
     case Instruction::Ret:
       return SelectRet(I);
     case Instruction::Trunc:
+      return SelectTrunc(I);
     case Instruction::ZExt:
     case Instruction::SExt:
-      return SelectIntCast(I);
+      return SelectIntExt(I);
     default: break;
   }
   return false;
 }
 
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction.  If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// successful.
+bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                                const LoadInst *LI) {
+  // Verify we have a legal type before going any further.
+  MVT VT;
+  if (!isLoadTypeLegal(LI->getType(), VT))
+    return false;
+
+  // Combine load followed by zero- or sign-extend.
+  // ldrb r1, [r0]       ldrb r1, [r0]
+  // uxtb r2, r1     =>
+  // mov  r3, r2         mov  r3, r1
+  bool isZExt = true;
+  switch(MI->getOpcode()) {
+    default: return false;
+    case ARM::SXTH:
+    case ARM::t2SXTH:
+      isZExt = false;
+    case ARM::UXTH:
+    case ARM::t2UXTH:
+      if (VT != MVT::i16)
+        return false;
+    break;
+    case ARM::SXTB:
+    case ARM::t2SXTB:
+      isZExt = false;
+    case ARM::UXTB:
+    case ARM::t2UXTB:
+      if (VT != MVT::i8)
+        return false;
+    break;
+  }
+  // See if we can handle this address.
+  Address Addr;
+  if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
+  
+  unsigned ResultReg = MI->getOperand(0).getReg();
+  if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
+    return false;
+  MI->eraseFromParent();
+  return true;
+}
+
 namespace llvm {
-  llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
-    // Completely untested on non-darwin.
+  FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+    // Completely untested on non-iOS.
     const TargetMachine &TM = funcInfo.MF->getTarget();
 
     // Darwin and thumb1 only for now.
     const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
-    if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+    if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only() &&
         !DisableARMFastISel)
       return new ARMFastISel(funcInfo);
     return 0;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 412751b84d41..402ecb0c5ffd 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,31 +15,40 @@
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMMachineFunctionInfo.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Function.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 
+static cl::opt<bool>
+SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
+                     cl::desc("Align ARM NEON spills in prolog and epilog"));
+
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+                        unsigned NumAlignedDPRCS2Regs);
+
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
   const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
 
-  // Mac OS X requires FP not to be clobbered for backtracing purpose.
-  if (STI.isTargetDarwin())
+  // iOS requires FP not to be clobbered for backtracing purpose.
+  if (STI.isTargetIOS())
     return true;
 
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   // Always eliminate non-leaf frame pointers.
-  return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+  return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
+           MFI->hasCalls()) ||
           RegInfo->needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
@@ -72,7 +81,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
   return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
 }
 
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
   for (unsigned i = 0; CSRegs[i]; ++i)
     if (Reg == CSRegs[i])
       return true;
@@ -81,7 +90,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
 
 static bool isCSRestore(MachineInstr *MI,
                         const ARMBaseInstrInfo &TII,
-                        const unsigned *CSRegs) {
+                        const uint16_t *CSRegs) {
   // Integer spill area is handled with "pop".
   if (MI->getOpcode() == ARM::LDMIA_RET ||
       MI->getOpcode() == ARM::t2LDMIA_RET ||
@@ -140,10 +149,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   // belongs to which callee-save spill areas.
   unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
   int FramePtrSpillFI = 0;
-
-  // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
-  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
-    return;
+  int D8SpillFI = 0;
 
   // Allocate the vararg register save area. This is not counted in NumBytes.
   if (VARegSaveSize)
@@ -177,7 +183,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
     case ARM::R11:
       if (Reg == FramePtr)
         FramePtrSpillFI = FI;
-      if (STI.isTargetDarwin()) {
+      if (STI.isTargetIOS()) {
         AFI->addGPRCalleeSavedArea2Frame(FI);
         GPRCS2Size += 4;
       } else {
@@ -186,8 +192,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       }
       break;
     default:
-      AFI->addDPRCalleeSavedAreaFrame(FI);
-      DPRCSSize += 8;
+      // This is a DPR. Exclude the aligned DPRCS2 spills.
+      if (Reg == ARM::D8)
+        D8SpillFI = FI;
+      if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) {
+        AFI->addDPRCalleeSavedAreaFrame(FI);
+        DPRCSSize += 8;
+      }
     }
   }
 
@@ -195,8 +206,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   if (GPRCS1Size > 0) MBBI++;
 
   // Set FP to point to the stack slot that contains the previous FP.
-  // For Darwin, FP is R7, which has now been stored in spill area 1.
-  // Otherwise, if this is not Darwin, all the callee-saved registers go
+  // For iOS, FP is R7, which has now been stored in spill area 1.
+  // Otherwise, if this is not iOS, all the callee-saved registers go
   // into spill area 1, including the FP in R11.  In either case, it is
   // now safe to emit this assignment.
   bool HasFP = hasFP(MF);
@@ -232,7 +243,17 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       MBBI++;
   }
 
-  NumBytes = DPRCSOffset;
+  // Move past the aligned DPRCS2 area.
+  if (AFI->getNumAlignedDPRCS2Regs() > 0) {
+    MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());
+    // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
+    // leaves the stack pointer pointing to the DPRCS2 area.
+    //
+    // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
+    NumBytes += MFI->getObjectOffset(D8SpillFI);
+  } else
+    NumBytes = DPRCSOffset;
+
   if (NumBytes) {
     // Adjust SP after all the callee-save spills.
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
@@ -259,7 +280,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
 
   // If we need dynamic stack realignment, do it here. Be paranoid and make
   // sure if we also have VLAs, we have a base pointer for frame access.
-  if (RegInfo->needsStackRealignment(MF)) {
+  // If aligned NEON registers were spilled, the stack has already been
+  // realigned.
+  if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
     unsigned MaxAlign = MFI->getMaxAlignment();
     assert (!AFI->isThumb1OnlyFunction());
     if (!AFI->isThumbFunction()) {
@@ -315,8 +338,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert(MBBI->getDesc().isReturn() &&
-         "Can only insert epilog into returning blocks");
+  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
   unsigned RetOpcode = MBBI->getOpcode();
   DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -332,16 +354,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
   int NumBytes = (int)MFI->getStackSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
 
-  // All calls are tail calls in GHC calling conv, and functions have no prologue/epilogue.
-  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
-    return;
-
   if (!AFI->hasStackFrame()) {
     if (NumBytes != 0)
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
   } else {
     // Unwind MBBI to point to first LDR / VLDRD.
-    const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+    const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
     if (MBBI != MBB.begin()) {
       do
         --MBBI;
@@ -365,7 +383,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                                   ARMCC::AL, 0, TII);
         else {
           // It's not possible to restore SP from FP in a single instruction.
-          // For Darwin, this looks like:
+          // For iOS, this looks like:
           // mov sp, r7
           // sub sp, #24
           // This is bad, if an interrupt is taken after the mov, sp is in an
@@ -404,17 +422,16 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
   }
 
-  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
-      RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri) {
     // Tail call return: adjust the stack pointer and jump to callee.
     MBBI = MBB.getLastNonDebugInstr();
     MachineOperand &JumpTarget = MBBI->getOperand(0);
 
     // Jump to label or value in register.
-    if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
-      unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
-        ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd)
-        : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND);
+    if (RetOpcode == ARM::TCRETURNdi) {
+      unsigned TCOpcode = STI.isThumb() ?
+               (STI.isTargetIOS() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
+               ARM::TAILJMPd;
       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
       if (JumpTarget.isGlobal())
         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
@@ -431,10 +448,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
       BuildMI(MBB, MBBI, dl,
               TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
         addReg(JumpTarget.getReg(), RegState::Kill);
-    } else if (RetOpcode == ARM::TCRETURNriND) {
-      BuildMI(MBB, MBBI, dl,
-              TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)).
-        addReg(JumpTarget.getReg(), RegState::Kill);
     }
 
     MachineInstr *NewMI = prior(MBBI);
@@ -481,6 +494,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
   else if (AFI->isDPRCalleeSavedAreaFrame(FI))
     return Offset - AFI->getDPRCalleeSavedAreaOffset();
 
+  // SP can move around if there are allocas.  We may also lose track of SP
+  // when emergency spilling inside a non-reserved call frame setup.
+  bool hasMovingSP = !hasReservedCallFrame(MF);
+
   // When dynamically realigning the stack, use the frame pointer for
   // parameters, and the stack/base pointer for locals.
   if (RegInfo->needsStackRealignment(MF)) {
@@ -488,7 +505,7 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
     if (isFixed) {
       FrameReg = RegInfo->getFrameRegister(MF);
       Offset = FPOffset;
-    } else if (MFI->hasVarSizedObjects()) {
+    } else if (hasMovingSP) {
       assert(RegInfo->hasBasePointer(MF) &&
              "VLAs and dynamic stack alignment, but missing base pointer!");
       FrameReg = RegInfo->getBaseRegister();
@@ -500,11 +517,10 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
   if (hasFP(MF) && AFI->hasStackFrame()) {
     // Use frame pointer to reference fixed objects. Use it for locals if
     // there are VLAs (and thus the SP isn't reliable as a base).
-    if (isFixed || (MFI->hasVarSizedObjects() &&
-                    !RegInfo->hasBasePointer(MF))) {
+    if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
       FrameReg = RegInfo->getFrameRegister(MF);
       return FPOffset;
-    } else if (MFI->hasVarSizedObjects()) {
+    } else if (hasMovingSP) {
       assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
       if (AFI->isThumb2Function()) {
         // Try to use the frame pointer if we can, else use the base pointer
@@ -551,6 +567,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
                                     unsigned StmOpc, unsigned StrOpc,
                                     bool NoGap,
                                     bool(*Func)(unsigned, bool),
+                                    unsigned NumAlignedDPRCS2Regs,
                                     unsigned MIFlags) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
@@ -564,7 +581,11 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
     unsigned LastReg = 0;
     for (; i != 0; --i) {
       unsigned Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+      if (!(Func)(Reg, STI.isTargetIOS())) continue;
+
+      // D-registers in the aligned area DPRCS2 are NOT spilled here.
+      if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+        continue;
 
       // Add the callee-saved register as live-in unless it's LR and
       // @llvm.returnaddress is called. If LR is returned for
@@ -614,16 +635,15 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
                                    const std::vector<CalleeSavedInfo> &CSI,
                                    unsigned LdmOpc, unsigned LdrOpc,
                                    bool isVarArg, bool NoGap,
-                                   bool(*Func)(unsigned, bool)) const {
+                                   bool(*Func)(unsigned, bool),
+                                   unsigned NumAlignedDPRCS2Regs) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc DL = MI->getDebugLoc();
   unsigned RetOpcode = MI->getOpcode();
   bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
-                     RetOpcode == ARM::TCRETURNdiND ||
-                     RetOpcode == ARM::TCRETURNri ||
-                     RetOpcode == ARM::TCRETURNriND);
+                     RetOpcode == ARM::TCRETURNri);
 
   SmallVector<unsigned, 4> Regs;
   unsigned i = CSI.size();
@@ -632,7 +652,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
     bool DeleteRet = false;
     for (; i != 0; --i) {
       unsigned Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+      if (!(Func)(Reg, STI.isTargetIOS())) continue;
+
+      // The aligned reloads from area DPRCS2 are not inserted here.
+      if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
+        continue;
 
       if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
         Reg = ARM::PC;
@@ -686,6 +710,247 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
   }
 }
 
+/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8.  Also insert stack realignment code and leave the stack
+/// pointer pointing to the d8 spill slot.
+static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned NumAlignedDPRCS2Regs,
+                                    const std::vector<CalleeSavedInfo> &CSI,
+                                    const TargetRegisterInfo *TRI) {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc DL = MI->getDebugLoc();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  // Mark the D-register spill slots as properly aligned.  Since MFI computes
+  // stack slot layout backwards, this can actually mean that the d-reg stack
+  // slot offsets can be wrong. The offset for d8 will always be correct.
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned DNum = CSI[i].getReg() - ARM::D8;
+    if (DNum >= 8)
+      continue;
+    int FI = CSI[i].getFrameIdx();
+    // The even-numbered registers will be 16-byte aligned, the odd-numbered
+    // registers will be 8-byte aligned.
+    MFI.setObjectAlignment(FI, DNum % 2 ? 8 : 16);
+
+    // The stack slot for D8 needs to be maximally aligned because this is
+    // actually the point where we align the stack pointer.  MachineFrameInfo
+    // computes all offsets relative to the incoming stack pointer which is a
+    // bit weird when realigning the stack.  Any extra padding for this
+    // over-alignment is not realized because the code inserted below adjusts
+    // the stack pointer by numregs * 8 before aligning the stack pointer.
+    if (DNum == 0)
+      MFI.setObjectAlignment(FI, MFI.getMaxAlignment());
+  }
+
+  // Move the stack pointer to the d8 spill slot, and align it at the same
+  // time. Leave the stack slot address in the scratch register r4.
+  //
+  //   sub r4, sp, #numregs * 8
+  //   bic r4, r4, #align - 1
+  //   mov sp, r4
+  //
+  bool isThumb = AFI->isThumbFunction();
+  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+  AFI->setShouldRestoreSPFromFP(true);
+
+  // sub r4, sp, #numregs * 8
+  // The immediate is <= 64, so it doesn't need any special encoding.
+  unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
+  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+                              .addReg(ARM::SP)
+                              .addImm(8 * NumAlignedDPRCS2Regs)));
+
+  // bic r4, r4, #align-1
+  Opc = isThumb ? ARM::t2BICri : ARM::BICri;
+  unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
+  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+                              .addReg(ARM::R4, RegState::Kill)
+                              .addImm(MaxAlign - 1)));
+
+  // mov sp, r4
+  // The stack pointer must be adjusted before spilling anything, otherwise
+  // the stack slots could be clobbered by an interrupt handler.
+  // Leave r4 live, it is used below.
+  Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
+                            .addReg(ARM::R4);
+  MIB = AddDefaultPred(MIB);
+  if (!isThumb)
+    AddDefaultCC(MIB);
+
+  // Now spill NumAlignedDPRCS2Regs registers starting from d8.
+  // r4 holds the stack slot address.
+  unsigned NextReg = ARM::D8;
+
+  // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
+  // The writeback is only needed when emitting two vst1.64 instructions.
+  if (NumAlignedDPRCS2Regs >= 6) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               ARM::QQPRRegisterClass);
+    MBB.addLiveIn(SupReg);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed),
+                           ARM::R4)
+                   .addReg(ARM::R4, RegState::Kill).addImm(16)
+                   .addReg(NextReg)
+                   .addReg(SupReg, RegState::ImplicitKill));
+    NextReg += 4;
+    NumAlignedDPRCS2Regs -= 4;
+  }
+
+  // We won't modify r4 beyond this point.  It currently points to the next
+  // register to be spilled.
+  unsigned R4BaseReg = NextReg;
+
+  // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
+  if (NumAlignedDPRCS2Regs >= 4) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               ARM::QQPRRegisterClass);
+    MBB.addLiveIn(SupReg);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
+                   .addReg(ARM::R4).addImm(16).addReg(NextReg)
+                   .addReg(SupReg, RegState::ImplicitKill));
+    NextReg += 4;
+    NumAlignedDPRCS2Regs -= 4;
+  }
+
+  // 16-byte aligned vst1.64 with 2 d-regs.
+  if (NumAlignedDPRCS2Regs >= 2) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               ARM::QPRRegisterClass);
+    MBB.addLiveIn(SupReg);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
+                   .addReg(ARM::R4).addImm(16).addReg(SupReg));
+    NextReg += 2;
+    NumAlignedDPRCS2Regs -= 2;
+  }
+
+  // Finally, use a vanilla vstr.64 for the odd last register.
+  if (NumAlignedDPRCS2Regs) {
+    MBB.addLiveIn(NextReg);
+    // vstr.64 uses addrmode5 which has an offset scale of 4.
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
+                   .addReg(NextReg)
+                   .addReg(ARM::R4).addImm((NextReg-R4BaseReg)*2));
+  }
+
+  // The last spill instruction inserted should kill the scratch register r4.
+  llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
+/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
+/// iterator to the following instruction.
+static MachineBasicBlock::iterator
+skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
+                        unsigned NumAlignedDPRCS2Regs) {
+  //   sub r4, sp, #numregs * 8
+  //   bic r4, r4, #align - 1
+  //   mov sp, r4
+  ++MI; ++MI; ++MI;
+  assert(MI->mayStore() && "Expecting spill instruction");
+
+  // These switches all fall through.
+  switch(NumAlignedDPRCS2Regs) {
+  case 7:
+    ++MI;
+    assert(MI->mayStore() && "Expecting spill instruction");
+  default:
+    ++MI;
+    assert(MI->mayStore() && "Expecting spill instruction");
+  case 1:
+  case 2:
+  case 4:
+    assert(MI->killsRegister(ARM::R4) && "Missed kill flag");
+    ++MI;
+  }
+  return MI;
+}
+
+/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
+/// starting from d8.  These instructions are assumed to execute while the
+/// stack is still aligned, unlike the code inserted by emitPopInst.
+static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MI,
+                                      unsigned NumAlignedDPRCS2Regs,
+                                      const std::vector<CalleeSavedInfo> &CSI,
+                                      const TargetRegisterInfo *TRI) {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc DL = MI->getDebugLoc();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  // Find the frame index assigned to d8.
+  int D8SpillFI = 0;
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+    if (CSI[i].getReg() == ARM::D8) {
+      D8SpillFI = CSI[i].getFrameIdx();
+      break;
+    }
+
+  // Materialize the address of the d8 spill slot into the scratch register r4.
+  // This can be fairly complicated if the stack frame is large, so just use
+  // the normal frame index elimination mechanism to do it.  This code runs as
+  // the initial part of the epilog where the stack and base pointers haven't
+  // been changed yet.
+  bool isThumb = AFI->isThumbFunction();
+  assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
+
+  unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+  AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
+                              .addFrameIndex(D8SpillFI).addImm(0)));
+
+  // Now restore NumAlignedDPRCS2Regs registers starting from d8.
+  unsigned NextReg = ARM::D8;
+
+  // 16-byte aligned vld1.64 with 4 d-regs and writeback.
+  if (NumAlignedDPRCS2Regs >= 6) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               ARM::QQPRRegisterClass);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
+                   .addReg(ARM::R4, RegState::Define)
+                   .addReg(ARM::R4, RegState::Kill).addImm(16)
+                   .addReg(SupReg, RegState::ImplicitDefine));
+    NextReg += 4;
+    NumAlignedDPRCS2Regs -= 4;
+  }
+
+  // We won't modify r4 beyond this point.  It currently points to the next
+  // register to be spilled.
+  unsigned R4BaseReg = NextReg;
+
+  // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
+  if (NumAlignedDPRCS2Regs >= 4) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               ARM::QQPRRegisterClass);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
+                   .addReg(ARM::R4).addImm(16)
+                   .addReg(SupReg, RegState::ImplicitDefine));
+    NextReg += 4;
+    NumAlignedDPRCS2Regs -= 4;
+  }
+
+  // 16-byte aligned vld1.64 with 2 d-regs.
+  if (NumAlignedDPRCS2Regs >= 2) {
+    unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
+                                               ARM::QPRRegisterClass);
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
+                   .addReg(ARM::R4).addImm(16));
+    NextReg += 2;
+    NumAlignedDPRCS2Regs -= 2;
+  }
+
+  // Finally, use a vanilla vldr.64 for the remaining odd register.
+  if (NumAlignedDPRCS2Regs)
+    AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
+                   .addReg(ARM::R4).addImm(2*(NextReg-R4BaseReg)));
+
+  // Last store kills r4.
+  llvm::prior(MI)->addRegisterKilled(ARM::R4, TRI);
+}
+
 bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator MI,
                                         const std::vector<CalleeSavedInfo> &CSI,
@@ -700,12 +965,19 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   unsigned PushOneOpc = AFI->isThumbFunction() ?
     ARM::t2STR_PRE : ARM::STR_PRE_IMM;
   unsigned FltOpc = ARM::VSTMDDB_UPD;
-  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
                MachineInstr::FrameSetup);
-  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
                MachineInstr::FrameSetup);
   emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
-               MachineInstr::FrameSetup);
+               NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
+
+  // The code above does not insert spill code for the aligned DPRCS2 registers.
+  // The stack realignment code will be inserted between the push instructions
+  // and these spills.
+  if (NumAlignedDPRCS2Regs)
+    emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
 
   return true;
 }
@@ -720,15 +992,22 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+
+  // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
+  // registers. Do that here instead.
+  if (NumAlignedDPRCS2Regs)
+    emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
 
   unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
   unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM;
   unsigned FltOpc = ARM::VLDMDIA_UPD;
-  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
+              NumAlignedDPRCS2Regs);
   emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
-              &isARMArea2Register);
+              &isARMArea2Register, 0);
   emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
-              &isARMArea1Register);
+              &isARMArea1Register, 0);
 
   return true;
 }
@@ -852,6 +1131,55 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
   return Limit;
 }
 
+// In functions that realign the stack, it can be an advantage to spill the
+// callee-saved vector registers after realigning the stack. The vst1 and vld1
+// instructions take alignment hints that can improve performance.
+//
+static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
+  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
+  if (!SpillAlignedNEONRegs)
+    return;
+
+  // Naked functions don't spill callee-saved registers.
+  if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+    return;
+
+  // We are planning to use NEON instructions vst1 / vld1.
+  if (!MF.getTarget().getSubtarget<ARMSubtarget>().hasNEON())
+    return;
+
+  // Don't bother if the default stack alignment is sufficiently high.
+  if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8)
+    return;
+
+  // Aligned spills require stack realignment.
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  if (!RegInfo->canRealignStack(MF))
+    return;
+
+  // We always spill contiguous d-registers starting from d8. Count how many
+  // needs spilling.  The register allocator will almost always use the
+  // callee-saved registers in order, but it can happen that there are holes in
+  // the range.  Registers above the hole will be spilled to the standard DPRCS
+  // area.
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  unsigned NumSpills = 0;
+  for (; NumSpills < 8; ++NumSpills)
+    if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills))
+      break;
+
+  // Don't do this for just one d-register. It's not worth it.
+  if (NumSpills < 2)
+    return;
+
+  // Spill the first NumSpills D-registers after realigning the stack.
+  MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
+
+  // A scratch register is required for the vst1 / vld1 instructions.
+  MF.getRegInfo().setPhysRegUsed(ARM::R4);
+}
+
 void
 ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                                        RegScavenger *RS) const {
@@ -898,28 +1226,22 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
       MF.getRegInfo().setPhysRegUsed(ARM::R4);
   }
 
+  // See if we can spill vector registers to aligned stack.
+  checkNumAlignedDPRCS2Regs(MF);
+
   // Spill the BasePtr if it's used.
   if (RegInfo->hasBasePointer(MF))
     MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
 
   // Don't spill FP if the frame can be eliminated. This is determined
   // by scanning the callee-save registers to see if any is used.
-  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
     bool Spilled = false;
-    if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+    if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
       Spilled = true;
       CanEliminateFrame = false;
-    } else {
-      // Check alias registers too.
-      for (const unsigned *Aliases =
-             RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
-        if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
-          Spilled = true;
-          CanEliminateFrame = false;
-        }
-      }
     }
 
     if (!ARM::GPRRegisterClass->contains(Reg))
@@ -928,7 +1250,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     if (Spilled) {
       NumGPRSpills++;
 
-      if (!STI.isTargetDarwin()) {
+      if (!STI.isTargetIOS()) {
         if (Reg == ARM::LR)
           LRSpilled = true;
         CS1Spilled = true;
@@ -948,7 +1270,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
         break;
       }
     } else {
-      if (!STI.isTargetDarwin()) {
+      if (!STI.isTargetIOS()) {
         UnspilledCS1GPRs.push_back(Reg);
         continue;
       }
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 61bb8afa40f2..a1c2b93562c9 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -63,12 +63,13 @@ public:
   void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
                     unsigned StrOpc, bool NoGap,
-                    bool(*Func)(unsigned, bool),
+                    bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs,
                     unsigned MIFlags = 0) const;
   void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
                    unsigned LdrOpc, bool isVarArg, bool NoGap,
-                   bool(*Func)(unsigned, bool)) const;
+                   bool(*Func)(unsigned, bool),
+                   unsigned NumAlignedDPRCS2Regs) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp
deleted file mode 100644
index 5f863ea241ca..000000000000
--- a/lib/Target/ARM/ARMGlobalMerge.cpp
+++ /dev/null
@@ -1,219 +0,0 @@
-//===-- ARMGlobalMerge.cpp - Internal globals merging  --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This pass merges globals with internal linkage into one. This way all the
-// globals which were merged into a biggest one can be addressed using offsets
-// from the same base pointer (no need for separate base pointer for each of the
-// global). Such a transformation can significantly reduce the register pressure
-// when many globals are involved.
-//
-// For example, consider the code which touches several global variables at 
-// once:
-//
-// static int foo[N], bar[N], baz[N];
-//
-// for (i = 0; i < N; ++i) {
-//    foo[i] = bar[i] * baz[i];
-// }
-//
-//  On ARM the addresses of 3 arrays should be kept in the registers, thus
-//  this code has quite large register pressure (loop body):
-//
-//  ldr     r1, [r5], #4
-//  ldr     r2, [r6], #4
-//  mul     r1, r2, r1
-//  str     r1, [r0], #4
-//
-//  Pass converts the code to something like:
-//
-//  static struct {
-//    int foo[N];
-//    int bar[N];
-//    int baz[N];
-//  } merged;
-//
-//  for (i = 0; i < N; ++i) {
-//    merged.foo[i] = merged.bar[i] * merged.baz[i];
-//  }
-//
-//  and in ARM code this becomes:
-//
-//  ldr     r0, [r5, #40]
-//  ldr     r1, [r5, #80]
-//  mul     r0, r1, r0
-//  str     r0, [r5], #4
-//
-//  note that we saved 2 registers here almostly "for free".
-// ===---------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm-global-merge"
-#include "ARM.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Attributes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-using namespace llvm;
-
-namespace {
-  class ARMGlobalMerge : public FunctionPass {
-    /// TLI - Keep a pointer of a TargetLowering to consult for determining
-    /// target type sizes.
-    const TargetLowering *TLI;
-
-    bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
-                 Module &M, bool isConst) const;
-
-  public:
-    static char ID;             // Pass identification, replacement for typeid.
-    explicit ARMGlobalMerge(const TargetLowering *tli)
-      : FunctionPass(ID), TLI(tli) {}
-
-    virtual bool doInitialization(Module &M);
-    virtual bool runOnFunction(Function &F);
-
-    const char *getPassName() const {
-      return "Merge internal globals";
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      FunctionPass::getAnalysisUsage(AU);
-    }
-
-    struct GlobalCmp {
-      const TargetData *TD;
-
-      GlobalCmp(const TargetData *td) : TD(td) { }
-
-      bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
-        Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
-        Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
-
-        return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
-      }
-    };
-  };
-} // end anonymous namespace
-
-char ARMGlobalMerge::ID = 0;
-
-bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
-                             Module &M, bool isConst) const {
-  const TargetData *TD = TLI->getTargetData();
-
-  // FIXME: Infer the maximum possible offset depending on the actual users
-  // (these max offsets are different for the users inside Thumb or ARM
-  // functions)
-  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
-
-  // FIXME: Find better heuristics
-  std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
-
-  Type *Int32Ty = Type::getInt32Ty(M.getContext());
-
-  for (size_t i = 0, e = Globals.size(); i != e; ) {
-    size_t j = 0;
-    uint64_t MergedSize = 0;
-    std::vector<Type*> Tys;
-    std::vector<Constant*> Inits;
-    for (j = i; j != e; ++j) {
-      Type *Ty = Globals[j]->getType()->getElementType();
-      MergedSize += TD->getTypeAllocSize(Ty);
-      if (MergedSize > MaxOffset) {
-        break;
-      }
-      Tys.push_back(Ty);
-      Inits.push_back(Globals[j]->getInitializer());
-    }
-
-    StructType *MergedTy = StructType::get(M.getContext(), Tys);
-    Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
-    GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
-                                                  GlobalValue::InternalLinkage,
-                                                  MergedInit, "_MergedGlobals");
-    for (size_t k = i; k < j; ++k) {
-      Constant *Idx[2] = {
-        ConstantInt::get(Int32Ty, 0),
-        ConstantInt::get(Int32Ty, k-i)
-      };
-      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
-      Globals[k]->replaceAllUsesWith(GEP);
-      Globals[k]->eraseFromParent();
-    }
-    i = j;
-  }
-
-  return true;
-}
-
-
-bool ARMGlobalMerge::doInitialization(Module &M) {
-  SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
-  const TargetData *TD = TLI->getTargetData();
-  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
-  bool Changed = false;
-
-  // Grab all non-const globals.
-  for (Module::global_iterator I = M.global_begin(),
-         E = M.global_end(); I != E; ++I) {
-    // Merge is safe for "normal" internal globals only
-    if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
-      continue;
-
-    // Ignore fancy-aligned globals for now.
-    unsigned Alignment = I->getAlignment();
-    Type *Ty = I->getType()->getElementType();
-    if (Alignment > TD->getABITypeAlignment(Ty))
-      continue;
-
-    // Ignore all 'special' globals.
-    if (I->getName().startswith("llvm.") ||
-        I->getName().startswith(".llvm."))
-      continue;
-
-    if (TD->getTypeAllocSize(Ty) < MaxOffset) {
-      const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
-      if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
-        BSSGlobals.push_back(I);
-      else if (I->isConstant())
-        ConstGlobals.push_back(I);
-      else
-        Globals.push_back(I);
-    }
-  }
-
-  if (Globals.size() > 1)
-    Changed |= doMerge(Globals, M, false);
-  if (BSSGlobals.size() > 1)
-    Changed |= doMerge(BSSGlobals, M, false);
-
-  // FIXME: This currently breaks the EH processing due to way how the 
-  // typeinfo detection works. We might want to detect the TIs and ignore 
-  // them in the future.
-  // if (ConstGlobals.size() > 1)
-  //  Changed |= doMerge(ConstGlobals, M, true);
-
-  return Changed;
-}
-
-bool ARMGlobalMerge::runOnFunction(Function &F) {
-  return false;
-}
-
-FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
-  return new ARMGlobalMerge(tli);
-}
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 787f6a279187..a5fd15b6bb97 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -21,7 +21,7 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
   // FIXME: Detect integer instructions properly.
   const MCInstrDesc &MCID = MI->getDesc();
   unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
-  if (MCID.mayStore())
+  if (MI->mayStore())
     return false;
   unsigned Opcode = MCID.getOpcode();
   if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -38,9 +38,6 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   MachineInstr *MI = SU->getInstr();
 
   if (!MI->isDebugValue()) {
-    if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
-      return Hazard;
-
     // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
     // a VMLA / VMLS will cause 4 cycle stall.
     const MCInstrDesc &MCID = MI->getDesc();
@@ -48,9 +45,9 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
       MachineInstr *DefMI = LastMI;
       const MCInstrDesc &LastMCID = LastMI->getDesc();
       // Skip over one non-VFP / NEON instruction.
-      if (!LastMCID.isBarrier() &&
+      if (!LastMI->isBarrier() &&
           // On A9, AGU and NEON/FPU are muxed.
-          !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+          !(STI.isCortexA9() && (LastMI->mayLoad() || LastMI->mayStore())) &&
           (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
         MachineBasicBlock::iterator I = LastMI;
         if (I != LastMI->getParent()->begin()) {
@@ -76,30 +73,11 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
 void ARMHazardRecognizer::Reset() {
   LastMI = 0;
   FpMLxStalls = 0;
-  ITBlockSize = 0;
   ScoreboardHazardRecognizer::Reset();
 }
 
 void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
   MachineInstr *MI = SU->getInstr();
-  unsigned Opcode = MI->getOpcode();
-  if (ITBlockSize) {
-    --ITBlockSize;
-  } else if (Opcode == ARM::t2IT) {
-    unsigned Mask = MI->getOperand(1).getImm();
-    unsigned NumTZ = CountTrailingZeros_32(Mask);
-    assert(NumTZ <= 3 && "Invalid IT mask!");
-    ITBlockSize = 4 - NumTZ;
-    MachineBasicBlock::iterator I = MI;
-    for (unsigned i = 0; i < ITBlockSize; ++i) {
-      // Advance to the next instruction, skipping any dbg_value instructions.
-      do {
-        ++I;
-      } while (I->isDebugValue());
-      ITBlockMIs[ITBlockSize-1-i] = &*I;
-    }
-  }
-
   if (!MI->isDebugValue()) {
     LastMI = MI;
     FpMLxStalls = 0;
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index 2bc218d8566b..98bfc4cf0cc5 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -23,6 +23,10 @@ class ARMBaseRegisterInfo;
 class ARMSubtarget;
 class MachineInstr;
 
+/// ARMHazardRecognizer handles special constraints that are not expressed in
+/// the scheduling itinerary. This is only used during postRA scheduling. The
+/// ARM preRA scheduler uses an unspecialized instance of the
+/// ScoreboardHazardRecognizer.
 class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
   const ARMBaseInstrInfo &TII;
   const ARMBaseRegisterInfo &TRI;
@@ -30,8 +34,6 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
 
   MachineInstr *LastMI;
   unsigned FpMLxStalls;
-  unsigned ITBlockSize;  // No. of MIs in current IT block yet to be scheduled.
-  MachineInstr *ITBlockMIs[4];
 
 public:
   ARMHazardRecognizer(const InstrItineraryData *ItinData,
@@ -40,7 +42,7 @@ public:
                       const ARMSubtarget &sti,
                       const ScheduleDAG *DAG) :
     ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
-    TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+    TRI(tri), STI(sti), LastMI(0) {}
 
   virtual HazardType getHazardType(SUnit *SU, int Stalls);
   virtual void Reset();
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 5ee009c04c5b..1eafbbc8f64d 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -244,6 +244,7 @@ private:
 
   /// SelectCMOVOp - Select CMOV instructions for ARM.
   SDNode *SelectCMOVOp(SDNode *N);
+  SDNode *SelectConditionalOp(SDNode *N);
   SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
                               ARMCC::CondCodes CCVal, SDValue CCR,
                               SDValue InFlag);
@@ -923,7 +924,7 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
     // The maximum alignment is equal to the memory size being referenced.
     unsigned LSNAlign = LSN->getAlignment();
     unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
-    if (LSNAlign > MemSize && MemSize > 1)
+    if (LSNAlign >= MemSize && MemSize > 1)
       Alignment = MemSize;
   } else {
     // All other uses of addrmode6 are for intrinsics.  For now just record
@@ -1549,6 +1550,52 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
   return CurDAG->getTargetConstant(Alignment, MVT::i32);
 }
 
+// Get the register stride update opcode of a VLD/VST instruction that
+// is otherwise equivalent to the given fixed stride updating instruction.
+static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
+  switch (Opc) {
+  default: break;
+  case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
+  case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
+  case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
+  case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
+  case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
+  case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
+  case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
+  case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
+
+  case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
+  case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
+  case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
+  case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
+  case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
+  case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
+  case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
+  case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
+  case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
+  case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
+
+  case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
+  case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
+  case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
+  case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
+  case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
+  case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
+
+  case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
+  case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
+  case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
+  case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
+  case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
+  case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
+
+  case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
+  case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
+  case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
+  }
+  return Opc; // If not one we handle, return it unchanged.
+}
+
 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
                                    unsigned *DOpcodes, unsigned *QOpcodes0,
                                    unsigned *QOpcodes1) {
@@ -1612,7 +1659,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Align);
     if (isUpdating) {
       SDValue Inc = N->getOperand(AddrOpIdx + 1);
-      Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+      // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
+      // case entirely when the rest are updated to that form, too.
+      if ((NumVecs == 1 || NumVecs == 2) && !isa<ConstantSDNode>(Inc.getNode()))
+        Opc = getVLDSTRegisterUpdateOpcode(Opc);
+      // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
+      // check for that explicitly too. Horribly hacky, but temporary.
+      if ((NumVecs != 1 && NumVecs != 2 && Opc != ARM::VLD1q64wb_fixed) ||
+          !isa<ConstantSDNode>(Inc.getNode()))
+        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
     }
     Ops.push_back(Pred);
     Ops.push_back(Reg0);
@@ -1754,7 +1809,15 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
     Ops.push_back(Align);
     if (isUpdating) {
       SDValue Inc = N->getOperand(AddrOpIdx + 1);
-      Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+      // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
+      // case entirely when the rest are updated to that form, too.
+      if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+        Opc = getVLDSTRegisterUpdateOpcode(Opc);
+      // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
+      // check for that explicitly too. Horribly hacky, but temporary.
+      if ((NumVecs > 2 && Opc != ARM::VST1q64wb_fixed) ||
+          !isa<ConstantSDNode>(Inc.getNode()))
+        Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
     }
     Ops.push_back(SrcReg);
     Ops.push_back(Pred);
@@ -1977,8 +2040,14 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
   Ops.push_back(MemAddr);
   Ops.push_back(Align);
   if (isUpdating) {
+    // fixed-stride update instructions don't have an explicit writeback
+    // operand. It's implicit in the opcode itself.
     SDValue Inc = N->getOperand(2);
-    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+    if (!isa<ConstantSDNode>(Inc.getNode()))
+      Ops.push_back(Inc);
+    // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
+    else if (NumVecs > 2)
+      Ops.push_back(Reg0);
   }
   Ops.push_back(Pred);
   Ops.push_back(Reg0);
@@ -2116,7 +2185,6 @@ SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
     case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
     default:
       llvm_unreachable("Unknown so_reg opcode!");
-      break;
     }
     SDValue SOShImm =
       CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
@@ -2227,9 +2295,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
     // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
     // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
     // Pattern complexity = 18  cost = 1  size = 0
-    SDValue CPTmp0;
-    SDValue CPTmp1;
-    SDValue CPTmp2;
     if (Subtarget->isThumb()) {
       SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
                                         CCVal, CCR, InFlag);
@@ -2286,8 +2351,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
   SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
   unsigned Opc = 0;
   switch (VT.getSimpleVT().SimpleTy) {
-  default: assert(false && "Illegal conditional move type!");
-    break;
+  default: llvm_unreachable("Illegal conditional move type!");
   case MVT::i32:
     Opc = Subtarget->isThumb()
       ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
@@ -2303,6 +2367,115 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
   return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
 }
 
+SDNode *ARMDAGToDAGISel::SelectConditionalOp(SDNode *N) {
+  SDValue FalseVal = N->getOperand(0);
+  SDValue TrueVal  = N->getOperand(1);
+  ARMCC::CondCodes CCVal =
+    (ARMCC::CondCodes)cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+  SDValue CCR = N->getOperand(3);
+  assert(CCR.getOpcode() == ISD::Register);
+  SDValue InFlag = N->getOperand(4);
+  SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+  if (Subtarget->isThumb()) {
+    SDValue CPTmp0;
+    SDValue CPTmp1;
+    if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
+      unsigned Opc;
+      switch (N->getOpcode()) {
+      default: llvm_unreachable("Unexpected node");
+      case ARMISD::CAND: Opc = ARM::t2ANDCCrs; break;
+      case ARMISD::COR:  Opc = ARM::t2ORRCCrs; break;
+      case ARMISD::CXOR: Opc = ARM::t2EORCCrs; break;
+      }
+      SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CC, CCR, Reg0, InFlag };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
+    }
+
+    ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+    if (T) {
+      unsigned TrueImm = T->getZExtValue();
+      if (is_t2_so_imm(TrueImm)) {
+        unsigned Opc;
+        switch (N->getOpcode()) {
+        default: llvm_unreachable("Unexpected node");
+        case ARMISD::CAND: Opc = ARM::t2ANDCCri; break;
+        case ARMISD::COR:  Opc = ARM::t2ORRCCri; break;
+        case ARMISD::CXOR: Opc = ARM::t2EORCCri; break;
+        }
+        SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+        SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
+        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+      }
+    }
+
+    unsigned Opc;
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unexpected node");
+    case ARMISD::CAND: Opc = ARM::t2ANDCCrr; break;
+    case ARMISD::COR:  Opc = ARM::t2ORRCCrr; break;
+    case ARMISD::CXOR: Opc = ARM::t2EORCCrr; break;
+    }
+    SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+  }
+
+  SDValue CPTmp0;
+  SDValue CPTmp1;
+  SDValue CPTmp2;
+  if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) {
+    unsigned Opc;
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unexpected node");
+    case ARMISD::CAND: Opc = ARM::ANDCCrsi; break;
+    case ARMISD::COR:  Opc = ARM::ORRCCrsi; break;
+    case ARMISD::CXOR: Opc = ARM::EORCCrsi; break;
+    }
+    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, Reg0, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 7);
+  }
+
+  if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+    unsigned Opc;
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unexpected node");
+    case ARMISD::CAND: Opc = ARM::ANDCCrsr; break;
+    case ARMISD::COR:  Opc = ARM::ORRCCrsr; break;
+    case ARMISD::CXOR: Opc = ARM::EORCCrsr; break;
+    }
+    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, Reg0, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 8);
+  }
+
+  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+  if (T) {
+    unsigned TrueImm = T->getZExtValue();
+    if (is_so_imm(TrueImm)) {
+      unsigned Opc;
+      switch (N->getOpcode()) {
+      default: llvm_unreachable("Unexpected node");
+      case ARMISD::CAND: Opc = ARM::ANDCCri; break;
+      case ARMISD::COR:  Opc = ARM::ORRCCri; break;
+      case ARMISD::CXOR: Opc = ARM::EORCCri; break;
+      }
+      SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+      SDValue Ops[] = { FalseVal, True, CC, CCR, Reg0, InFlag };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+    }
+  }
+
+  unsigned Opc;
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("Unexpected node");
+  case ARMISD::CAND: Opc = ARM::ANDCCrr; break;
+  case ARMISD::COR:  Opc = ARM::ORRCCrr; break;
+  case ARMISD::CXOR: Opc = ARM::EORCCrr; break;
+  }
+  SDValue Ops[] = { FalseVal, TrueVal, CC, CCR, Reg0, InFlag };
+  return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 6);
+}
+
 /// Target-specific DAG combining for ISD::XOR.
 /// Target-independent combining lowers SELECT_CC nodes of the form
 /// select_cc setg[ge] X,  0,  X, -X
@@ -2316,7 +2489,6 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
 SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
   SDValue XORSrc0 = N->getOperand(0);
   SDValue XORSrc1 = N->getOperand(1);
-  DebugLoc DL = N->getDebugLoc();
   EVT VT = N->getValueType(0);
 
   if (DisableARMIntABS)
@@ -2641,6 +2813,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
   case ARMISD::CMOV:
     return SelectCMOVOp(N);
+  case ARMISD::CAND:
+  case ARMISD::COR:
+  case ARMISD::CXOR:
+    return SelectConditionalOp(N);
   case ARMISD::VZIP: {
     unsigned Opc = 0;
     EVT VT = N->getValueType(0);
@@ -2649,7 +2825,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
     case MVT::v4i16: Opc = ARM::VZIPd16; break;
     case MVT::v2f32:
-    case MVT::v2i32: Opc = ARM::VZIPd32; break;
+    // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+    case MVT::v2i32: Opc = ARM::VTRNd32; break;
     case MVT::v16i8: Opc = ARM::VZIPq8; break;
     case MVT::v8i16: Opc = ARM::VZIPq16; break;
     case MVT::v4f32:
@@ -2668,7 +2845,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
     case MVT::v4i16: Opc = ARM::VUZPd16; break;
     case MVT::v2f32:
-    case MVT::v2i32: Opc = ARM::VUZPd32; break;
+    // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+    case MVT::v2i32: Opc = ARM::VTRNd32; break;
     case MVT::v16i8: Opc = ARM::VUZPq8; break;
     case MVT::v8i16: Opc = ARM::VUZPq16; break;
     case MVT::v4f32:
@@ -2715,8 +2893,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::VLD2DUP: {
-    unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
-                           ARM::VLD2DUPd32Pseudo };
+    unsigned Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+                           ARM::VLD2DUPd32 };
     return SelectVLDDup(N, false, 2, Opcodes);
   }
 
@@ -2733,8 +2911,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::VLD2DUP_UPD: {
-    unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
-                           ARM::VLD2DUPd32Pseudo_UPD };
+    unsigned Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed,
+                           ARM::VLD2DUPd32wb_fixed };
     return SelectVLDDup(N, true, 2, Opcodes);
   }
 
@@ -2751,24 +2929,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::VLD1_UPD: {
-    unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
-                            ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
-    unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
-                            ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+    unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed,
+                            ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed };
+    unsigned QOpcodes[] = { ARM::VLD1q8wb_fixed,
+                            ARM::VLD1q16wb_fixed,
+                            ARM::VLD1q32wb_fixed,
+                            ARM::VLD1q64wb_fixed };
     return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
   }
 
   case ARMISD::VLD2_UPD: {
-    unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
-                            ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
-    unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
-                            ARM::VLD2q32Pseudo_UPD };
+    unsigned DOpcodes[] = { ARM::VLD2d8wb_fixed,
+                            ARM::VLD2d16wb_fixed,
+                            ARM::VLD2d32wb_fixed,
+                            ARM::VLD1q64wb_fixed};
+    unsigned QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
+                            ARM::VLD2q16PseudoWB_fixed,
+                            ARM::VLD2q32PseudoWB_fixed };
     return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
   }
 
   case ARMISD::VLD3_UPD: {
     unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
-                            ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+                            ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
     unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
                              ARM::VLD3q16Pseudo_UPD,
                              ARM::VLD3q32Pseudo_UPD };
@@ -2780,7 +2963,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
   case ARMISD::VLD4_UPD: {
     unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
-                            ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+                            ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64wb_fixed};
     unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
                              ARM::VLD4q16Pseudo_UPD,
                              ARM::VLD4q32Pseudo_UPD };
@@ -2815,24 +2998,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
 
   case ARMISD::VST1_UPD: {
-    unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
-                            ARM::VST1d32_UPD, ARM::VST1d64_UPD };
-    unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
-                            ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+    unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed,
+                            ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed };
+    unsigned QOpcodes[] = { ARM::VST1q8wb_fixed,
+                            ARM::VST1q16wb_fixed,
+                            ARM::VST1q32wb_fixed,
+                            ARM::VST1q64wb_fixed };
     return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
   }
 
   case ARMISD::VST2_UPD: {
-    unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
-                            ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
-    unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
-                            ARM::VST2q32Pseudo_UPD };
+    unsigned DOpcodes[] = { ARM::VST2d8wb_fixed,
+                            ARM::VST2d16wb_fixed,
+                            ARM::VST2d32wb_fixed,
+                            ARM::VST1q64wb_fixed};
+    unsigned QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
+                            ARM::VST2q16PseudoWB_fixed,
+                            ARM::VST2q32PseudoWB_fixed };
     return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
   }
 
   case ARMISD::VST3_UPD: {
     unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
-                            ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+                            ARM::VST3d32Pseudo_UPD,ARM::VST1d64TPseudoWB_fixed};
     unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
                              ARM::VST3q16Pseudo_UPD,
                              ARM::VST3q32Pseudo_UPD };
@@ -2844,7 +3032,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
 
   case ARMISD::VST4_UPD: {
     unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
-                            ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+                            ARM::VST4d32Pseudo_UPD,ARM::VST1d64QPseudoWB_fixed};
     unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
                              ARM::VST4q16Pseudo_UPD,
                              ARM::VST4q32Pseudo_UPD };
@@ -2993,14 +3181,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case Intrinsic::arm_neon_vld1: {
       unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
                               ARM::VLD1d32, ARM::VLD1d64 };
-      unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
-                              ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+                              ARM::VLD1q32, ARM::VLD1q64};
       return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
     }
 
     case Intrinsic::arm_neon_vld2: {
-      unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
-                              ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+      unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
+                              ARM::VLD2d32, ARM::VLD1q64 };
       unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
                               ARM::VLD2q32Pseudo };
       return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
@@ -3054,14 +3242,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     case Intrinsic::arm_neon_vst1: {
       unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
                               ARM::VST1d32, ARM::VST1d64 };
-      unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
-                              ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
+      unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+                              ARM::VST1q32, ARM::VST1q64 };
       return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
     }
 
     case Intrinsic::arm_neon_vst2: {
-      unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
-                              ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+      unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
+                              ARM::VST2d32, ARM::VST1q64 };
       unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
                               ARM::VST2q32Pseudo };
       return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
@@ -3122,14 +3310,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
       break;
 
     case Intrinsic::arm_neon_vtbl2:
-      return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
+      return SelectVTBL(N, false, 2, ARM::VTBL2);
     case Intrinsic::arm_neon_vtbl3:
       return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
     case Intrinsic::arm_neon_vtbl4:
       return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
 
     case Intrinsic::arm_neon_vtbx2:
-      return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
+      return SelectVTBL(N, true, 2, ARM::VTBX2);
     case Intrinsic::arm_neon_vtbx3:
       return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
     case Intrinsic::arm_neon_vtbx4:
@@ -3163,7 +3351,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     Ops.push_back(N->getOperand(2));
     Ops.push_back(getAL(CurDAG));                    // Predicate
     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
-    return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT,
+    return CurDAG->getMachineNode(ARM::VTBL2, dl, VT,
                                   Ops.data(), Ops.size());
   }
 
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index f60d177a920b..a103c94cede4 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -13,13 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "arm-isel"
+#include "ARMISelLowering.h"
 #include "ARM.h"
 #include "ARMCallingConv.h"
 #include "ARMConstantPoolValue.h"
-#include "ARMISelLowering.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMPerfectShuffle.h"
-#include "ARMRegisterInfo.h"
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
 #include "ARMTargetObjectFile.h"
@@ -40,18 +39,15 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/VectorExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include <sstream>
 using namespace llvm;
 
 STATISTIC(NumTailCalls, "Number of tail calls");
@@ -73,7 +69,7 @@ ARMInterworking("arm-interworking", cl::Hidden,
   cl::desc("Enable / disable ARM interworking (for debugging only)"),
   cl::init(true));
 
-namespace llvm {
+namespace {
   class ARMCCState : public CCState {
   public:
     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
@@ -89,7 +85,7 @@ namespace llvm {
 }
 
 // The APCS parameter registers.
-static const unsigned GPRArgRegs[] = {
+static const uint16_t GPRArgRegs[] = {
   ARM::R0, ARM::R1, ARM::R2, ARM::R3
 };
 
@@ -108,8 +104,14 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
   EVT ElemTy = VT.getVectorElementType();
   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
     setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
-  if (ElemTy != MVT::i32) {
+  if (ElemTy == MVT::i32) {
+    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+  } else {
     setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
     setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
     setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
@@ -121,18 +123,12 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
   setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
   setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand);
   if (VT.isInteger()) {
     setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
     setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
     setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
-    setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
-    setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
-    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
-      setTruncStoreAction(VT.getSimpleVT(),
-                          (MVT::SimpleValueType)InnerVT, Expand);
   }
-  setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
 
   // Promote all bit-wise operations.
   if (VT.isInteger() && VT != PromotedBitwiseVT) {
@@ -263,7 +259,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setLibcallName(RTLIB::SRL_I128, 0);
   setLibcallName(RTLIB::SRA_I128, 0);
 
-  if (Subtarget->isAAPCS_ABI()) {
+  if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
     // Double-precision floating-point arithmetic helper functions
     // RTABI chapter 4.1.2, Table 2
     setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
@@ -388,8 +384,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     // Long long helper functions
     // RTABI chapter 4.2, Table 9
     setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
-    setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
-    setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
     setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
     setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
     setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
@@ -405,21 +399,28 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
     setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
     setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+    setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
     setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
     setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
     setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+    setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
     setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
     setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
     setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
     setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
     setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
     setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
 
     // Memory operations
     // RTABI chapter 4.3.4
     setLibcallName(RTLIB::MEMCPY,  "__aeabi_memcpy");
     setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
     setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
+    setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
   }
 
   // Use divmod compiler-rt calls for iOS 5.0 and later.
@@ -433,7 +434,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
   else
     addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+      !Subtarget->isThumb1Only()) {
     addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
     if (!Subtarget->isFPOnlySP())
       addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
@@ -441,6 +443,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
   }
 
+  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+      setTruncStoreAction((MVT::SimpleValueType)VT,
+                          (MVT::SimpleValueType)InnerVT, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+  }
+
+  setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
+
   if (Subtarget->hasNEON()) {
     addDRTypeForNEON(MVT::v2f32);
     addDRTypeForNEON(MVT::v8i8);
@@ -457,13 +472,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 
     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
     // neither Neon nor VFP support any arithmetic operations on it.
+    // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
+    // supported for v4f32.
     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+    // FIXME: Code duplication: FDIV and FREM are expanded always, see
+    // ARMTargetLowering::addTypeForNEON method for details.
     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+    // FIXME: Create unittest.
+    // In another words, find a way when "copysign" appears in DAG with vector
+    // operands.
     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+    // FIXME: Code duplication: SETCC has custom operation action, see
+    // ARMTargetLowering::addTypeForNEON method for details.
     setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
+    // FIXME: Create unittest for FNEG and for FABS.
     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
@@ -476,13 +501,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+    // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
 
-    setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+    setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
+    setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
+    setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
+    setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
+    setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
+    setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
+    setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
+    setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
 
     // Neon does not support some operations on v1i64 and v2i64 types.
     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
@@ -498,9 +533,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
     setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
     // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
-    // a destination type that is wider than the source.
+    // a destination type that is wider than the source, and nor does
+    // it have a FP_TO_[SU]INT instruction with a narrower destination than
+    // source.
     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
 
     setTargetDAGCombine(ISD::INTRINSIC_VOID);
     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
@@ -519,6 +558,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setTargetDAGCombine(ISD::FP_TO_SINT);
     setTargetDAGCombine(ISD::FP_TO_UINT);
     setTargetDAGCombine(ISD::FDIV);
+
+    // It is legal to extload from v4i8 to v4i16 or v4i32.
+    MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
+                  MVT::v4i16, MVT::v2i16,
+                  MVT::v2i32};
+    for (unsigned i = 0; i < 6; ++i) {
+      setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
+      setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
+      setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
+    }
   }
 
   computeRegisterProperties();
@@ -576,6 +625,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
 
+  // These just redirect to CTTZ and CTLZ on ARM.
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
+
   // Only ARMv6 has BSWAP.
   if (!Subtarget->hasV6Ops())
     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -606,10 +659,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
-  setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
-  setOperationAction(ISD::EXCEPTIONADDR,      MVT::i32,   Expand);
-  setExceptionPointerRegister(ARM::R0);
-  setExceptionSelectorRegister(ARM::R1);
+
+  if (!Subtarget->isTargetDarwin()) {
+    // Non-Darwin platforms may return values in these registers via the
+    // personality function.
+    setOperationAction(ISD::EHSELECTION,      MVT::i32,   Expand);
+    setOperationAction(ISD::EXCEPTIONADDR,    MVT::i32,   Expand);
+    setExceptionPointerRegister(ARM::R0);
+    setExceptionSelectorRegister(ARM::R1);
+  }
 
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
@@ -664,7 +722,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   }
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+      !Subtarget->isThumb1Only()) {
     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
     // iff target supports vfp2.
     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
@@ -676,7 +735,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   if (Subtarget->isTargetDarwin()) {
     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
-    setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
   }
 
@@ -703,18 +761,21 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
   setOperationAction(ISD::FREM,      MVT::f64, Expand);
   setOperationAction(ISD::FREM,      MVT::f32, Expand);
-  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
+      !Subtarget->isThumb1Only()) {
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
   }
   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 
-  setOperationAction(ISD::FMA, MVT::f64, Expand);
-  setOperationAction(ISD::FMA, MVT::f32, Expand);
+  if (!Subtarget->hasVFP4()) {
+    setOperationAction(ISD::FMA, MVT::f64, Expand);
+    setOperationAction(ISD::FMA, MVT::f32, Expand);
+  }
 
   // Various VFP goodness
-  if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+  if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
     if (Subtarget->hasVFP2()) {
       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
@@ -735,20 +796,27 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::MUL);
 
-  if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
-    setTargetDAGCombine(ISD::OR);
-  if (Subtarget->hasNEON())
+  if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) {
     setTargetDAGCombine(ISD::AND);
+    setTargetDAGCombine(ISD::OR);
+    setTargetDAGCombine(ISD::XOR);
+  }
+
+  if (Subtarget->hasV6Ops())
+    setTargetDAGCombine(ISD::SRL);
 
   setStackPointerRegisterToSaveRestore(ARM::SP);
 
-  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+  if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
+      !Subtarget->hasVFP2())
     setSchedulingPreference(Sched::RegPressure);
   else
     setSchedulingPreference(Sched::Hybrid);
 
   //// temporary - rewrite interface to use type
   maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
+  maxStoresPerMemset = 16;
+  maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
 
   // On ARM arguments smaller than 4 bytes are extended, so all arguments
   // are at least 4 bytes aligned.
@@ -828,7 +896,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
+
   case ARMISD::CMOV:          return "ARMISD::CMOV";
+  case ARMISD::CAND:          return "ARMISD::CAND";
+  case ARMISD::COR:           return "ARMISD::COR";
+  case ARMISD::CXOR:          return "ARMISD::CXOR";
 
   case ARMISD::RBIT:          return "ARMISD::RBIT";
 
@@ -851,7 +923,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
-  case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
 
   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
 
@@ -899,6 +970,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
+  case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
   case ARMISD::VDUP:          return "ARMISD::VDUP";
   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
   case ARMISD::VEXT:          return "ARMISD::VEXT";
@@ -949,7 +1021,7 @@ EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
 
 /// getRegClassFor - Return the register class that should be used for the
 /// specified value type.
-TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+const TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
   // load / store 4 to 8 consecutive D registers.
@@ -984,7 +1056,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
     if (VT == MVT::Glue || VT == MVT::Other)
       continue;
     if (VT.isFloatingPoint() || VT.isVector())
-      return Sched::Latency;
+      return Sched::ILP;
   }
 
   if (!N->isMachineOpcode())
@@ -999,7 +1071,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
     return Sched::RegPressure;
   if (!Itins->isEmpty() &&
       Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
-    return Sched::Latency;
+    return Sched::ILP;
 
   return Sched::RegPressure;
 }
@@ -1081,18 +1153,19 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
     if (!Subtarget->isAAPCS_ABI())
       return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
     else if (Subtarget->hasVFP2() &&
-             FloatABIType == FloatABI::Hard && !isVarArg)
+             getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
+             !isVarArg)
       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   }
   case CallingConv::ARM_AAPCS_VFP:
-    return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    if (!isVarArg)
+      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    // Fallthrough
   case CallingConv::ARM_AAPCS:
     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   case CallingConv::ARM_APCS:
     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
-  case CallingConv::GHC:
-    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
   }
 }
 
@@ -1215,7 +1288,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
 SDValue
 ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              CallingConv::ID CallConv, bool isVarArg,
-                             bool &isTailCall,
+                             bool doesNotRet, bool &isTailCall,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<SDValue> &OutVals,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1334,7 +1407,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
                                      MachinePointerInfo(),
-                                     false, false, 0);
+                                     false, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(j, Load));
         }
@@ -1350,12 +1423,10 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
       SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
                                          MVT::i32);
-      // TODO: Disable AlwaysInline when it becomes possible
-      //       to emit a nested call sequence.
       MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
                                           Flags.getByValAlign(),
                                           /*isVolatile=*/false,
-                                          /*AlwaysInline=*/true,
+                                          /*AlwaysInline=*/false,
                                           MachinePointerInfo(0),
                                           MachinePointerInfo(0)));
 
@@ -1429,7 +1500,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
                            MachinePointerInfo::getConstantPool(),
-                           false, false, 0);
+                           false, false, false, 0);
     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
       const char *Sym = S->getSymbol();
 
@@ -1444,7 +1515,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
                            MachinePointerInfo::getConstantPool(),
-                           false, false, 0);
+                           false, false, false, 0);
     }
   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     const GlobalValue *GV = G->getGlobal();
@@ -1465,7 +1536,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
                            MachinePointerInfo::getConstantPool(),
-                           false, false, 0);
+                           false, false, false, 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
@@ -1494,7 +1565,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       Callee = DAG.getLoad(getPointerTy(), dl,
                            DAG.getEntryNode(), CPAddr,
                            MachinePointerInfo::getConstantPool(),
-                           false, false, 0);
+                           false, false, false, 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
                            getPointerTy(), Callee, PICLabel);
@@ -1513,12 +1584,20 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (Subtarget->isThumb()) {
     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
       CallOpc = ARMISD::CALL_NOLINK;
+    else if (doesNotRet && isDirect && !isARMFunc &&
+             Subtarget->hasRAS() && !Subtarget->isThumb1Only())
+      // "mov lr, pc; b _foo" to avoid confusing the RSP
+      CallOpc = ARMISD::CALL_NOLINK;
     else
       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
   } else {
-    CallOpc = (isDirect || Subtarget->hasV5TOps())
-      ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
-      : ARMISD::CALL_NOLINK;
+    if (!isDirect && !Subtarget->hasV5TOps()) {
+      CallOpc = ARMISD::CALL_NOLINK;
+    } else if (doesNotRet && isDirect && Subtarget->hasRAS())
+      // "mov lr, pc; b _foo" to avoid confusing the RSP
+      CallOpc = ARMISD::CALL_NOLINK;
+    else
+      CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
   }
 
   std::vector<SDValue> Ops;
@@ -1531,6 +1610,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                   RegsToPass[i].second.getValueType()));
 
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
@@ -1558,7 +1643,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 /// and then confiscate the rest of the parameter registers to insure
 /// this.
 void
-llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
+ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
   unsigned reg = State->AllocateReg(GPRArgRegs, 4);
   assert((State->getCallOrPrologue() == Prologue ||
           State->getCallOrPrologue() == Call) &&
@@ -1588,7 +1673,7 @@ llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
 static
 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
-                         const ARMInstrInfo *TII) {
+                         const TargetInstrInfo *TII) {
   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
   int FI = INT_MAX;
   if (Arg.getOpcode() == ISD::CopyFromReg) {
@@ -1723,8 +1808,7 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
       // the caller's fixed stack objects.
       MachineFrameInfo *MFI = MF.getFrameInfo();
       const MachineRegisterInfo *MRI = &MF.getRegInfo();
-      const ARMInstrInfo *TII =
-        ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+      const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
            i != e;
            ++i, ++realArgIdx) {
@@ -1852,63 +1936,72 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
   return result;
 }
 
-bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
   if (N->getNumValues() != 1)
     return false;
   if (!N->hasNUsesOfValue(1, 0))
     return false;
 
-  unsigned NumCopies = 0;
-  SDNode* Copies[2];
-  SDNode *Use = *N->use_begin();
-  if (Use->getOpcode() == ISD::CopyToReg) {
-    Copies[NumCopies++] = Use;
-  } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+  SDValue TCChain = Chain;
+  SDNode *Copy = *N->use_begin();
+  if (Copy->getOpcode() == ISD::CopyToReg) {
+    // If the copy has a glue operand, we conservatively assume it isn't safe to
+    // perform a tail call.
+    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+      return false;
+    TCChain = Copy->getOperand(0);
+  } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
+    SDNode *VMov = Copy;
     // f64 returned in a pair of GPRs.
-    for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+    SmallPtrSet<SDNode*, 2> Copies;
+    for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
          UI != UE; ++UI) {
       if (UI->getOpcode() != ISD::CopyToReg)
         return false;
-      Copies[UI.getUse().getResNo()] = *UI;
-      ++NumCopies;
+      Copies.insert(*UI);
     }
-  } else if (Use->getOpcode() == ISD::BITCAST) {
+    if (Copies.size() > 2)
+      return false;
+
+    for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
+         UI != UE; ++UI) {
+      SDValue UseChain = UI->getOperand(0);
+      if (Copies.count(UseChain.getNode()))
+        // Second CopyToReg
+        Copy = *UI;
+      else
+        // First CopyToReg
+        TCChain = UseChain;
+    }
+  } else if (Copy->getOpcode() == ISD::BITCAST) {
     // f32 returned in a single GPR.
-    if (!Use->hasNUsesOfValue(1, 0))
+    if (!Copy->hasOneUse())
       return false;
-    Use = *Use->use_begin();
-    if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+    Copy = *Copy->use_begin();
+    if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
       return false;
-    Copies[NumCopies++] = Use;
+    Chain = Copy->getOperand(0);
   } else {
     return false;
   }
 
-  if (NumCopies != 1 && NumCopies != 2)
-    return false;
-
   bool HasRet = false;
-  for (unsigned i = 0; i < NumCopies; ++i) {
-    SDNode *Copy = Copies[i];
-    for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
-         UI != UE; ++UI) {
-      if (UI->getOpcode() == ISD::CopyToReg) {
-        SDNode *Use = *UI;
-        if (Use == Copies[0] || Use == Copies[1])
-          continue;
-        return false;
-      }
-      if (UI->getOpcode() != ARMISD::RET_FLAG)
-        return false;
-      HasRet = true;
-    }
+  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+       UI != UE; ++UI) {
+    if (UI->getOpcode() != ARMISD::RET_FLAG)
+      return false;
+    HasRet = true;
   }
 
-  return HasRet;
+  if (!HasRet)
+    return false;
+
+  Chain = TCChain;
+  return true;
 }
 
 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
-  if (!EnableARMTailCalls)
+  if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
     return false;
 
   if (!CI->isTailCall())
@@ -1965,7 +2058,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
                                MachinePointerInfo::getConstantPool(),
-                               false, false, 0);
+                               false, false, false, 0);
   if (RelocM == Reloc::Static)
     return Result;
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1989,7 +2082,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
                          MachinePointerInfo::getConstantPool(),
-                         false, false, 0);
+                         false, false, false, 0);
   SDValue Chain = Argument.getValue(1);
 
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2005,7 +2098,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   std::pair<SDValue, SDValue> CallResult =
     LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
                 false, false, false, false,
-                0, CallingConv::C, false, /*isReturnValueUsed=*/true,
+                0, CallingConv::C, /*isTailCall=*/false,
+                /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
   return CallResult.first;
 }
@@ -2037,7 +2131,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
                          MachinePointerInfo::getConstantPool(),
-                         false, false, 0);
+                         false, false, false, 0);
     Chain = Offset.getValue(1);
 
     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2045,7 +2139,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
 
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
                          MachinePointerInfo::getConstantPool(),
-                         false, false, 0);
+                         false, false, false, 0);
   } else {
     // local exec model
     ARMConstantPoolValue *CPV =
@@ -2054,7 +2148,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
                          MachinePointerInfo::getConstantPool(),
-                         false, false, 0);
+                         false, false, false, 0);
   }
 
   // The address of the thread local variable is the add of the thread
@@ -2092,13 +2186,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
                                  CPAddr,
                                  MachinePointerInfo::getConstantPool(),
-                                 false, false, 0);
+                                 false, false, false, 0);
     SDValue Chain = Result.getValue(1);
     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
     if (!UseGOTOFF)
       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
-                           MachinePointerInfo::getGOT(), false, false, 0);
+                           MachinePointerInfo::getGOT(),
+                           false, false, false, 0);
     return Result;
   }
 
@@ -2115,7 +2210,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                        MachinePointerInfo::getConstantPool(),
-                       false, false, 0);
+                       false, false, false, 0);
   }
 }
 
@@ -2128,7 +2223,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
-  // FIXME: Enable this for static codegen when tool issues are fixed.
+  // FIXME: Enable this for static codegen when tool issues are fixed.  Also
+  // update ARMFastISel::ARMMaterializeGV.
   if (Subtarget->useMovt() && RelocM != Reloc::Static) {
     ++NumMovwMovt;
     // FIXME: Once remat is capable of dealing with instructions with register
@@ -2143,7 +2239,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
     if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
       Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
-                           MachinePointerInfo::getGOT(), false, false, 0);
+                           MachinePointerInfo::getGOT(),
+                           false, false, false, 0);
     return Result;
   }
 
@@ -2163,7 +2260,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
 
   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                                MachinePointerInfo::getConstantPool(),
-                               false, false, 0);
+                               false, false, false, 0);
   SDValue Chain = Result.getValue(1);
 
   if (RelocM == Reloc::PIC_) {
@@ -2173,7 +2270,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
 
   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
     Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
-                         false, false, 0);
+                         false, false, false, 0);
 
   return Result;
 }
@@ -2195,20 +2292,12 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                                MachinePointerInfo::getConstantPool(),
-                               false, false, 0);
+                               false, false, false, 0);
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
 }
 
 SDValue
-ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
-  const {
-  DebugLoc dl = Op.getDebugLoc();
-  return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
-                     Op.getOperand(0), Op.getOperand(1));
-}
-
-SDValue
 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   SDValue Val = DAG.getConstant(0, MVT::i32);
@@ -2253,7 +2342,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
     SDValue Result =
       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
                   MachinePointerInfo::getConstantPool(),
-                  false, false, 0);
+                  false, false, false, 0);
 
     if (RelocM == Reloc::PIC_) {
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -2366,7 +2455,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
   MachineFunction &MF = DAG.getMachineFunction();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
-  TargetRegisterClass *RC;
+  const TargetRegisterClass *RC;
   if (AFI->isThumb1OnlyFunction())
     RC = ARM::tGPRRegisterClass;
   else
@@ -2385,7 +2474,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
                             MachinePointerInfo::getFixedStack(FI),
-                            false, false, 0);
+                            false, false, false, 0);
   } else {
     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -2452,7 +2541,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
 
     SmallVector<SDValue, 4> MemOps;
     for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
-      TargetRegisterClass *RC;
+      const TargetRegisterClass *RC;
       if (AFI->isThumb1OnlyFunction())
         RC = ARM::tGPRRegisterClass;
       else
@@ -2521,7 +2610,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
                                     MachinePointerInfo::getFixedStack(FI),
-                                    false, false, 0);
+                                    false, false, false, 0);
           } else {
             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
                                              Chain, DAG, dl);
@@ -2535,7 +2624,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
 
       } else {
-        TargetRegisterClass *RC;
+        const TargetRegisterClass *RC;
 
         if (RegVT == MVT::f32)
           RC = ARM::SPRRegisterClass;
@@ -2612,7 +2701,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
             InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
                                          MachinePointerInfo::getFixedStack(FI),
-                                         false, false, 0));
+                                         false, false, false, 0));
           }
           lastInsIndex = index;
         }
@@ -2777,6 +2866,11 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
+  // undefined bits before doing a full-word comparison with zero.
+  Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
+                     DAG.getConstant(1, Cond.getValueType()));
+
   return DAG.getSelectCC(dl, Cond,
                          DAG.getConstant(0, Cond.getValueType()),
                          SelectTrue, SelectFalse, ISD::SETNE);
@@ -2847,7 +2941,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
     return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
                        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
                        Ld->isVolatile(), Ld->isNonTemporal(),
-                       Ld->getAlignment());
+                       Ld->isInvariant(), Ld->getAlignment());
 
   llvm_unreachable("Unknown VFP cmp argument!");
 }
@@ -2866,7 +2960,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
                           Ld->getChain(), Ptr,
                           Ld->getPointerInfo(),
                           Ld->isVolatile(), Ld->isNonTemporal(),
-                          Ld->getAlignment());
+                          Ld->isInvariant(), Ld->getAlignment());
 
     EVT PtrType = Ptr.getValueType();
     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
@@ -2876,7 +2970,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
                           Ld->getChain(), NewPtr,
                           Ld->getPointerInfo().getWithOffset(4),
                           Ld->isVolatile(), Ld->isNonTemporal(),
-                          NewAlign);
+                          Ld->isInvariant(), NewAlign);
     return;
   }
 
@@ -2894,12 +2988,11 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
   SDValue Dest = Op.getOperand(4);
   DebugLoc dl = Op.getDebugLoc();
 
-  bool SeenZero = false;
-  if (canChangeToInt(LHS, SeenZero, Subtarget) &&
-      canChangeToInt(RHS, SeenZero, Subtarget) &&
-      // If one of the operand is zero, it's safe to ignore the NaN case since
-      // we only care about equality comparisons.
-      (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
+  bool LHSSeenZero = false;
+  bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
+  bool RHSSeenZero = false;
+  bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
+  if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
     // If unsafe fp math optimization is enabled and there are no other uses of
     // the CMP operands, and the condition code is EQ or NE, we can optimize it
     // to an integer comparison.
@@ -2908,10 +3001,13 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
     else if (CC == ISD::SETUNE)
       CC = ISD::SETNE;
 
+    SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
     SDValue ARMcc;
     if (LHS.getValueType() == MVT::f32) {
-      LHS = bitcastf32Toi32(LHS, DAG);
-      RHS = bitcastf32Toi32(RHS, DAG);
+      LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
+                        bitcastf32Toi32(LHS, DAG), Mask);
+      RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
+                        bitcastf32Toi32(RHS, DAG), Mask);
       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
@@ -2922,6 +3018,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
     SDValue RHS1, RHS2;
     expandf64Toi32(LHS, DAG, LHS1, LHS2);
     expandf64Toi32(RHS, DAG, RHS1, RHS2);
+    LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
+    RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
     ARMcc = DAG.getConstant(CondCode, MVT::i32);
     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -2950,7 +3048,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 
   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
 
-  if (UnsafeFPMath &&
+  if (getTargetMachine().Options.UnsafeFPMath &&
       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
        CC == ISD::SETNE || CC == ISD::SETUNE)) {
     SDValue Result = OptimizeVFPBrcond(Op, DAG);
@@ -3000,25 +3098,48 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
                        MachinePointerInfo::getJumpTable(),
-                       false, false, 0);
+                       false, false, false, 0);
     Chain = Addr.getValue(1);
     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   } else {
     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
-                       MachinePointerInfo::getJumpTable(), false, false, 0);
+                       MachinePointerInfo::getJumpTable(),
+                       false, false, false, 0);
     Chain = Addr.getValue(1);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   }
 }
 
+static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Op.getValueType().getVectorElementType() == MVT::i32) {
+    if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
+      return Op;
+    return DAG.UnrollVectorOp(Op.getNode());
+  }
+
+  assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
+         "Invalid type for custom lowering!");
+  if (VT != MVT::v4i16)
+    return DAG.UnrollVectorOp(Op.getNode());
+
+  Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
+  return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
+}
+
 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  if (VT.isVector())
+    return LowerVectorFP_TO_INT(Op, DAG);
+
   DebugLoc dl = Op.getDebugLoc();
   unsigned Opc;
 
   switch (Op.getOpcode()) {
-  default:
-    assert(0 && "Invalid opcode!");
+  default: llvm_unreachable("Invalid opcode!");
   case ISD::FP_TO_SINT:
     Opc = ARMISD::FTOSI;
     break;
@@ -3034,6 +3155,12 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
 
+  if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
+    if (VT.getVectorElementType() == MVT::f32)
+      return Op;
+    return DAG.UnrollVectorOp(Op.getNode());
+  }
+
   assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
          "Invalid type for custom lowering!");
   if (VT != MVT::v4f32)
@@ -3042,8 +3169,7 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   unsigned CastOpc;
   unsigned Opc;
   switch (Op.getOpcode()) {
-  default:
-    assert(0 && "Invalid opcode!");
+  default: llvm_unreachable("Invalid opcode!");
   case ISD::SINT_TO_FP:
     CastOpc = ISD::SIGN_EXTEND;
     Opc = ISD::SINT_TO_FP;
@@ -3067,8 +3193,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   unsigned Opc;
 
   switch (Op.getOpcode()) {
-  default:
-    assert(0 && "Invalid opcode!");
+  default: llvm_unreachable("Invalid opcode!");
   case ISD::SINT_TO_FP:
     Opc = ARMISD::SITOF;
     break;
@@ -3176,7 +3301,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
     SDValue Offset = DAG.getConstant(4, MVT::i32);
     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
-                       MachinePointerInfo(), false, false, 0);
+                       MachinePointerInfo(), false, false, false, 0);
   }
 
   // Return LR, which contains the return address. Mark it an implicit live-in.
@@ -3197,7 +3322,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   while (Depth--)
     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
                             MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
   return FrameAddr;
 }
 
@@ -3442,7 +3567,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
 
   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
     switch (SetCCOpcode) {
-    default: llvm_unreachable("Illegal FP comparison"); break;
+    default: llvm_unreachable("Illegal FP comparison");
     case ISD::SETUNE:
     case ISD::SETNE:  Invert = true; // Fallthrough
     case ISD::SETOEQ:
@@ -3481,7 +3606,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   } else {
     // Integer comparisons.
     switch (SetCCOpcode) {
-    default: llvm_unreachable("Illegal integer comparison"); break;
+    default: llvm_unreachable("Illegal integer comparison");
     case ISD::SETNE:  Invert = true;
     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
     case ISD::SETLT:  Swap = true;
@@ -3688,14 +3813,65 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
 
   default:
     llvm_unreachable("unexpected size for isNEONModifiedImm");
-    return SDValue();
   }
 
   unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
   return DAG.getTargetConstant(EncodedVal, MVT::i32);
 }
 
-static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
+SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+                                           const ARMSubtarget *ST) const {
+  if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
+    return SDValue();
+
+  ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+  assert(Op.getValueType() == MVT::f32 &&
+         "ConstantFP custom lowering should only occur for f32.");
+
+  // Try splatting with a VMOV.f32...
+  APFloat FPVal = CFP->getValueAPF();
+  int ImmVal = ARM_AM::getFP32Imm(FPVal);
+  if (ImmVal != -1) {
+    DebugLoc DL = Op.getDebugLoc();
+    SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
+    SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
+                                      NewVal);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
+                       DAG.getConstant(0, MVT::i32));
+  }
+
+  // If that fails, try a VMOV.i32
+  EVT VMovVT;
+  unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
+  SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
+                                     VMOVModImm);
+  if (NewVal != SDValue()) {
+    DebugLoc DL = Op.getDebugLoc();
+    SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
+                                      NewVal);
+    SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+                                       VecConstant);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+                       DAG.getConstant(0, MVT::i32));
+  }
+
+  // Finally, try a VMVN.i32
+  NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
+                             VMVNModImm);
+  if (NewVal != SDValue()) {
+    DebugLoc DL = Op.getDebugLoc();
+    SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
+    SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
+                                       VecConstant);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
+                       DAG.getConstant(0, MVT::i32));
+  }
+
+  return SDValue();
+}
+
+
+static bool isVEXTMask(ArrayRef<int> M, EVT VT,
                        bool &ReverseVEXT, unsigned &Imm) {
   unsigned NumElts = VT.getVectorNumElements();
   ReverseVEXT = false;
@@ -3734,8 +3910,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
 /// instruction with the specified blocksize.  (The order of the elements
 /// within each block of the vector is reversed.)
-static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
-                       unsigned BlockSize) {
+static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
          "Only possible block sizes for VREV are: 16, 32, 64");
 
@@ -3761,15 +3936,14 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
   return true;
 }
 
-static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) {
+static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
   // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
   // range, then 0 is placed into the resulting vector. So pretty much any mask
   // of 8 elements can work here.
   return VT == MVT::v8i8 && M.size() == 8;
 }
 
-static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
-                       unsigned &WhichResult) {
+static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   if (EltSz == 64)
     return false;
@@ -3787,8 +3961,7 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
-static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
-                                unsigned &WhichResult) {
+static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   if (EltSz == 64)
     return false;
@@ -3803,8 +3976,7 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
   return true;
 }
 
-static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
-                       unsigned &WhichResult) {
+static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   if (EltSz == 64)
     return false;
@@ -3827,8 +3999,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
-static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
-                                unsigned &WhichResult) {
+static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   if (EltSz == 64)
     return false;
@@ -3852,8 +4023,7 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
   return true;
 }
 
-static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
-                       unsigned &WhichResult) {
+static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   if (EltSz == 64)
     return false;
@@ -3878,8 +4048,7 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
-static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
-                                unsigned &WhichResult) {
+static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   if (EltSz == 64)
     return false;
@@ -3955,6 +4124,15 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
       }
+
+      // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
+      if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
+        int ImmVal = ARM_AM::getFP32Imm(SplatBits);
+        if (ImmVal != -1) {
+          SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
+          return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
+        }
+      }
     }
   }
 
@@ -4302,7 +4480,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
 }
 
 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
-                                       SmallVectorImpl<int> &ShuffleMask,
+                                       ArrayRef<int> ShuffleMask,
                                        SelectionDAG &DAG) {
   // Check to see if we can use the VTBL instruction.
   SDValue V1 = Op.getOperand(0);
@@ -4310,7 +4488,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
   DebugLoc DL = Op.getDebugLoc();
 
   SmallVector<SDValue, 8> VTBLMask;
-  for (SmallVectorImpl<int>::iterator
+  for (ArrayRef<int>::iterator
          I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
     VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
 
@@ -4330,7 +4508,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
-  SmallVector<int, 8> ShuffleMask;
 
   // Convert shuffles that are directly supported on NEON to target-specific
   // DAG nodes, instead of keeping them as shuffles and matching them again
@@ -4338,7 +4515,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   // of inconsistencies between legalization and selection.
   // FIXME: floating-point vectors should be canonicalized to integer vectors
   // of the same time so that they get CSEd properly.
-  SVN->getMask(ShuffleMask);
+  ArrayRef<int> ShuffleMask = SVN->getMask();
 
   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
   if (EltSize <= 32) {
@@ -4347,9 +4524,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
       // If this is undef splat, generate it via "just" vdup, if possible.
       if (Lane == -1) Lane = 0;
 
+      // Test if V1 is a SCALAR_TO_VECTOR.
       if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
         return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
       }
+      // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
+      // (and probably will turn into a SCALAR_TO_VECTOR once legalization
+      // reaches it).
+      if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
+          !isa<ConstantSDNode>(V1.getOperand(0))) {
+        bool IsScalarToVector = true;
+        for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+          if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
+            IsScalarToVector = false;
+            break;
+          }
+        if (IsScalarToVector)
+          return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+      }
       return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
                          DAG.getConstant(Lane, MVT::i32));
     }
@@ -4450,6 +4642,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   return SDValue();
 }
 
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+  // INSERT_VECTOR_ELT is legal only for immediate indexes.
+  SDValue Lane = Op.getOperand(2);
+  if (!isa<ConstantSDNode>(Lane))
+    return SDValue();
+
+  return Op;
+}
+
 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
   SDValue Lane = Op.getOperand(1);
@@ -4526,11 +4727,10 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
       unsigned HalfSize = EltSize / 2;
       if (isSigned) {
-        int64_t SExtVal = C->getSExtValue();
-        if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+        if (!isIntN(HalfSize, C->getSExtValue()))
           return false;
       } else {
-        if ((C->getZExtValue() >> HalfSize) != 0)
+        if (!isUIntN(HalfSize, C->getZExtValue()))
           return false;
       }
       continue;
@@ -4569,7 +4769,8 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
     return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
                        LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
-                       LD->isNonTemporal(), LD->getAlignment());
+                       LD->isNonTemporal(), LD->isInvariant(),
+                       LD->getAlignment());
   // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
   // have been legalized as a BITCAST from v4i32.
   if (N->getOpcode() == ISD::BITCAST) {
@@ -4874,7 +5075,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
   unsigned Opc;
   bool ExtraOp = false;
   switch (Op.getOpcode()) {
-  default: assert(0 && "Invalid code");
+  default: llvm_unreachable("Invalid code");
   case ISD::ADDC: Opc = ARMISD::ADDC; break;
   case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
   case ISD::SUBC: Opc = ARMISD::SUBC; break;
@@ -4959,7 +5160,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
-  case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
                                                                Subtarget);
   case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG);
@@ -4971,8 +5171,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
   case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
   case ISD::SETCC:         return LowerVSETCC(Op, DAG);
+  case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
   case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
@@ -4986,7 +5188,6 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ATOMIC_LOAD:
   case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
   }
-  return SDValue();
 }
 
 /// ReplaceNodeResults - Replace the results of node with an illegal result
@@ -4998,7 +5199,6 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to custom expand this!");
-    break;
   case ISD::BITCAST:
     Res = ExpandBITCAST(N, DAG);
     break;
@@ -5194,7 +5394,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
                   BB->end());
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
-  TargetRegisterClass *TRC =
+  const TargetRegisterClass *TRC =
     isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
   unsigned scratch = MRI.createVirtualRegister(TRC);
   unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
@@ -5304,7 +5504,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
                   BB->end());
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
-  TargetRegisterClass *TRC =
+  const TargetRegisterClass *TRC =
     isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
   unsigned scratch = MRI.createVirtualRegister(TRC);
   unsigned scratch2 = MRI.createVirtualRegister(TRC);
@@ -5414,7 +5614,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
                   BB->end());
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
-  TargetRegisterClass *TRC =
+  const TargetRegisterClass *TRC =
     isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass;
   unsigned storesuccess = MRI.createVirtualRegister(TRC);
 
@@ -5500,52 +5700,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
   return BB;
 }
 
-/// EmitBasePointerRecalculation - For functions using a base pointer, we
-/// rematerialize it (via the frame pointer).
-void ARMTargetLowering::
-EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
-                             MachineBasicBlock *DispatchBB) const {
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
-  MachineFunction &MF = *MI->getParent()->getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
-
-  if (!RI.hasBasePointer(MF)) return;
-
-  MachineBasicBlock::iterator MBBI = MI;
-
-  int32_t NumBytes = AFI->getFramePtrSpillOffset();
-  unsigned FramePtr = RI.getFrameRegister(MF);
-  assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
-         "Base pointer without frame pointer?");
-
-  if (AFI->isThumb2Function())
-    llvm::emitT2RegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
-                                 FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
-  else if (AFI->isThumbFunction())
-    llvm::emitThumbRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
-                                    FramePtr, -NumBytes, *AII, RI);
-  else
-    llvm::emitARMRegPlusImmediate(*MBB, MBBI, MI->getDebugLoc(), ARM::R6,
-                                  FramePtr, -NumBytes, ARMCC::AL, 0, *AII);
-
-  if (!RI.needsStackRealignment(MF)) return;
-
-  // If there's dynamic realignment, adjust for it.
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned MaxAlign = MFI->getMaxAlignment();
-  assert(!AFI->isThumb1OnlyFunction());
-
-  // Emit bic r6, r6, MaxAlign
-  unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
-  AddDefaultCC(
-    AddDefaultPred(
-      BuildMI(*MBB, MBBI, MI->getDebugLoc(), TII->get(bicOpc), ARM::R6)
-      .addReg(ARM::R6, RegState::Kill)
-      .addImm(MaxAlign - 1)));
-}
-
 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
 /// registers the function context.
 void ARMTargetLowering::
@@ -5580,8 +5734,6 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
     MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
                              MachineMemOperand::MOStore, 4, 4);
 
-  EmitBasePointerRecalculation(MI, MBB, DispatchBB);
-
   // Load the address of the dispatch MBB into the jump buffer.
   if (isThumb2) {
     // Incoming value: jbuf
@@ -5683,7 +5835,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
   DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
   unsigned MaxCSNum = 0;
   MachineModuleInfo &MMI = MF->getMMI();
-  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; ++BB) {
+  for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
+       ++BB) {
     if (!BB->isLandingPad()) continue;
 
     // FIXME: We should assert that the EH_LABEL is the first MI in the landing
@@ -5741,12 +5894,10 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
   MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
   DispatchBB->addSuccessor(DispContBB);
 
-  // Insert and renumber MBBs.
-  MachineBasicBlock *Last = &MF->back();
+  // Insert and MBBs.
   MF->insert(MF->end(), DispatchBB);
   MF->insert(MF->end(), DispContBB);
   MF->insert(MF->end(), TrapBB);
-  MF->RenumberBlocks(Last);
 
   // Insert code into the entry block that creates and registers the function
   // context.
@@ -5757,35 +5908,63 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
                              MachineMemOperand::MOLoad |
                              MachineMemOperand::MOVolatile, 4, 4);
 
+  if (AFI->isThumb1OnlyFunction())
+    BuildMI(DispatchBB, dl, TII->get(ARM::tInt_eh_sjlj_dispatchsetup));
+  else if (!Subtarget->hasVFP2())
+    BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup_nofp));
+  else
+    BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
+
+  unsigned NumLPads = LPadList.size();
   if (Subtarget->isThumb2()) {
     unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
     AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
                    .addFrameIndex(FI)
                    .addImm(4)
                    .addMemOperand(FIMMOLd));
-    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
-                   .addReg(NewVReg1)
-                   .addImm(LPadList.size()));
+
+    if (NumLPads < 256) {
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
+                     .addReg(NewVReg1)
+                     .addImm(LPadList.size()));
+    } else {
+      unsigned VReg1 = MRI->createVirtualRegister(TRC);
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
+                     .addImm(NumLPads & 0xFFFF));
+
+      unsigned VReg2 = VReg1;
+      if ((NumLPads & 0xFFFF0000) != 0) {
+        VReg2 = MRI->createVirtualRegister(TRC);
+        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
+                       .addReg(VReg1)
+                       .addImm(NumLPads >> 16));
+      }
+
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
+                     .addReg(NewVReg1)
+                     .addReg(VReg2));
+    }
+
     BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
       .addMBB(TrapBB)
       .addImm(ARMCC::HI)
       .addReg(ARM::CPSR);
 
-    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
-    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg2)
+    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
                    .addJumpTableIndex(MJTI)
                    .addImm(UId));
 
-    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
+    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
     AddDefaultCC(
       AddDefaultPred(
-        BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg3)
-        .addReg(NewVReg2, RegState::Kill)
+        BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
+        .addReg(NewVReg3, RegState::Kill)
         .addReg(NewVReg1)
         .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
 
     BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
-      .addReg(NewVReg3, RegState::Kill)
+      .addReg(NewVReg4, RegState::Kill)
       .addReg(NewVReg1)
       .addJumpTableIndex(MJTI)
       .addImm(UId);
@@ -5796,9 +5975,30 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
                    .addImm(1)
                    .addMemOperand(FIMMOLd));
 
-    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
-                   .addReg(NewVReg1)
-                   .addImm(LPadList.size()));
+    if (NumLPads < 256) {
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
+                     .addReg(NewVReg1)
+                     .addImm(NumLPads));
+    } else {
+      MachineConstantPool *ConstantPool = MF->getConstantPool();
+      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+      // MachineConstantPool wants an explicit alignment.
+      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+      if (Align == 0)
+        Align = getTargetData()->getTypeAllocSize(C->getType());
+      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+      unsigned VReg1 = MRI->createVirtualRegister(TRC);
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
+                     .addReg(VReg1, RegState::Define)
+                     .addConstantPoolIndex(Idx));
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
+                     .addReg(NewVReg1)
+                     .addReg(VReg1));
+    }
+
     BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
       .addMBB(TrapBB)
       .addImm(ARMCC::HI)
@@ -5847,38 +6047,77 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
                    .addFrameIndex(FI)
                    .addImm(4)
                    .addMemOperand(FIMMOLd));
-    AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
-                   .addReg(NewVReg1)
-                   .addImm(LPadList.size()));
+
+    if (NumLPads < 256) {
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
+                     .addReg(NewVReg1)
+                     .addImm(NumLPads));
+    } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
+      unsigned VReg1 = MRI->createVirtualRegister(TRC);
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
+                     .addImm(NumLPads & 0xFFFF));
+
+      unsigned VReg2 = VReg1;
+      if ((NumLPads & 0xFFFF0000) != 0) {
+        VReg2 = MRI->createVirtualRegister(TRC);
+        AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
+                       .addReg(VReg1)
+                       .addImm(NumLPads >> 16));
+      }
+
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+                     .addReg(NewVReg1)
+                     .addReg(VReg2));
+    } else {
+      MachineConstantPool *ConstantPool = MF->getConstantPool();
+      Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
+      const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
+
+      // MachineConstantPool wants an explicit alignment.
+      unsigned Align = getTargetData()->getPrefTypeAlignment(Int32Ty);
+      if (Align == 0)
+        Align = getTargetData()->getTypeAllocSize(C->getType());
+      unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
+
+      unsigned VReg1 = MRI->createVirtualRegister(TRC);
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
+                     .addReg(VReg1, RegState::Define)
+                     .addConstantPoolIndex(Idx)
+                     .addImm(0));
+      AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
+                     .addReg(NewVReg1)
+                     .addReg(VReg1, RegState::Kill));
+    }
+
     BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
       .addMBB(TrapBB)
       .addImm(ARMCC::HI)
       .addReg(ARM::CPSR);
 
-    unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
+    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
     AddDefaultCC(
-      AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg2)
+      AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
                      .addReg(NewVReg1)
                      .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
-    unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
-    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg3)
+    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+    AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
                    .addJumpTableIndex(MJTI)
                    .addImm(UId));
 
     MachineMemOperand *JTMMOLd =
       MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
                                MachineMemOperand::MOLoad, 4, 4);
-    unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
+    unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
     AddDefaultPred(
-      BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg4)
-      .addReg(NewVReg2, RegState::Kill)
-      .addReg(NewVReg3)
+      BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
+      .addReg(NewVReg3, RegState::Kill)
+      .addReg(NewVReg4)
       .addImm(0)
       .addMemOperand(JTMMOLd));
 
     BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
-      .addReg(NewVReg4, RegState::Kill)
-      .addReg(NewVReg3)
+      .addReg(NewVReg5, RegState::Kill)
+      .addReg(NewVReg4)
       .addJumpTableIndex(MJTI)
       .addImm(UId);
   }
@@ -5893,21 +6132,24 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
     PrevMBB = CurMBB;
   }
 
+  // N.B. the order the invoke BBs are processed in doesn't matter here.
   const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
   const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
-  const unsigned *SavedRegs = RI.getCalleeSavedRegs(MF);
+  const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
+  SmallVector<MachineBasicBlock*, 64> MBBLPads;
   for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
          I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
     MachineBasicBlock *BB = *I;
 
     // Remove the landing pad successor from the invoke block and replace it
     // with the new dispatch block.
-    for (MachineBasicBlock::succ_iterator
-           SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) {
-      MachineBasicBlock *SMBB = *SI;
+    SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
+                                                  BB->succ_end());
+    while (!Successors.empty()) {
+      MachineBasicBlock *SMBB = Successors.pop_back_val();
       if (SMBB->isLandingPad()) {
         BB->removeSuccessor(SMBB);
-        SMBB->setIsLandingPad(false);
+        MBBLPads.push_back(SMBB);
       }
     }
 
@@ -5919,7 +6161,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
     // executed.
     for (MachineBasicBlock::reverse_iterator
            II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
-      if (!II->getDesc().isCall()) continue;
+      if (!II->isCall()) continue;
 
       DenseMap<unsigned, bool> DefRegs;
       for (MachineInstr::mop_iterator
@@ -5932,15 +6174,31 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
       MachineInstrBuilder MIB(&*II);
 
       for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
-        if (!TRC->contains(SavedRegs[i])) continue;
-        if (!DefRegs[SavedRegs[i]])
-          MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead);
+        unsigned Reg = SavedRegs[i];
+        if (Subtarget->isThumb2() &&
+            !ARM::tGPRRegisterClass->contains(Reg) &&
+            !ARM::hGPRRegisterClass->contains(Reg))
+          continue;
+        else if (Subtarget->isThumb1Only() &&
+                 !ARM::tGPRRegisterClass->contains(Reg))
+          continue;
+        else if (!Subtarget->isThumb() &&
+                 !ARM::GPRRegisterClass->contains(Reg))
+          continue;
+        if (!DefRegs[Reg])
+          MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
       }
 
       break;
     }
   }
 
+  // Mark all former landing pads as non-landing pads. The dispatch is the only
+  // landing pad now.
+  for (SmallVectorImpl<MachineBasicBlock*>::iterator
+         I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
+    (*I)->setIsLandingPad(false);
+
   // The instruction is gone now.
   MI->eraseFromParent();
 
@@ -6222,20 +6480,28 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return BB;
   }
 
+  case ARM::Int_eh_sjlj_setjmp:
+  case ARM::Int_eh_sjlj_setjmp_nofp:
+  case ARM::tInt_eh_sjlj_setjmp:
+  case ARM::t2Int_eh_sjlj_setjmp:
+  case ARM::t2Int_eh_sjlj_setjmp_nofp:
+    EmitSjLjDispatchBlock(MI, BB);
+    return BB;
+
   case ARM::ABS:
   case ARM::t2ABS: {
     // To insert an ABS instruction, we have to insert the
     // diamond control-flow pattern.  The incoming instruction knows the
     // source vreg to test against 0, the destination vreg to set,
     // the condition code register to branch on, the
-    // true/false values to select between, and a branch opcode to use. 
+    // true/false values to select between, and a branch opcode to use.
     // It transforms
     //     V1 = ABS V0
     // into
     //     V2 = MOVS V0
     //     BCC                      (branch to SinkBB if V0 >= 0)
     //     RSBBB: V3 = RSBri V2, 0  (compute ABS if V2 < 0)
-    //     SinkBB: V1 = PHI(V2, V3)     
+    //     SinkBB: V1 = PHI(V2, V3)
     const BasicBlock *LLVM_BB = BB->getBasicBlock();
     MachineFunction::iterator BBI = BB;
     ++BBI;
@@ -6276,19 +6542,19 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
       .addReg(ARM::CPSR, RegState::Define);
 
     // insert a bcc with opposite CC to ARMCC::MI at the end of BB
-    BuildMI(BB, dl, 
+    BuildMI(BB, dl,
       TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
       .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
 
     // insert rsbri in RSBBB
     // Note: BCC and rsbri will be converted into predicated rsbmi
     // by if-conversion pass
-    BuildMI(*RSBBB, RSBBB->begin(), dl, 
+    BuildMI(*RSBBB, RSBBB->begin(), dl,
       TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
       .addReg(NewMovDstReg, RegState::Kill)
       .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
 
-    // insert PHI in SinkBB, 
+    // insert PHI in SinkBB,
     // reuse ABSDstReg to not change uses of ABS instruction
     BuildMI(*SinkBB, SinkBB->begin(), dl,
       TII->get(ARM::PHI), ABSDstReg)
@@ -6296,7 +6562,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
       .addReg(NewMovDstReg).addMBB(BB);
 
     // remove ABS instruction
-    MI->eraseFromParent(); 
+    MI->eraseFromParent();
 
     // return last added BB
     return SinkBB;
@@ -6306,32 +6572,40 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 
 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                       SDNode *Node) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.hasPostISelHook()) {
+  if (!MI->hasPostISelHook()) {
     assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
            "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
     return;
   }
 
+  const MCInstrDesc *MCID = &MI->getDesc();
   // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
   // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
   // operand is still set to noreg. If needed, set the optional operand's
   // register to CPSR, and remove the redundant implicit def.
   //
-  // e.g. ADCS (...opt:%noreg, CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
+  // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
 
   // Rename pseudo opcodes.
   unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
   if (NewOpc) {
     const ARMBaseInstrInfo *TII =
       static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
-    MI->setDesc(TII->get(NewOpc));
+    MCID = &TII->get(NewOpc);
+
+    assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
+           "converted opcode should be the same except for cc_out");
+
+    MI->setDesc(*MCID);
+
+    // Add the optional cc_out operand
+    MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
   }
-  unsigned ccOutIdx = MCID.getNumOperands() - 1;
+  unsigned ccOutIdx = MCID->getNumOperands() - 1;
 
   // Any ARM instruction that sets the 's' bit should specify an optional
   // "cc_out" operand in the last operand position.
-  if (!MCID.hasOptionalDef() || !MCID.OpInfo[ccOutIdx].isOptionalDef()) {
+  if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
     assert(!NewOpc && "Optional cc_out operand required");
     return;
   }
@@ -6339,7 +6613,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
   // since we already have an optional CPSR def.
   bool definesCPSR = false;
   bool deadCPSR = false;
-  for (unsigned i = MCID.getNumOperands(), e = MI->getNumOperands();
+  for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
        i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
@@ -6513,7 +6787,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
     case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
     case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
     default:
-      assert(0 && "Invalid vector element type for padd optimization.");
+      llvm_unreachable("Invalid vector element type for padd optimization.");
   }
 
   SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
@@ -6632,41 +6906,115 @@ static SDValue PerformMULCombine(SDNode *N,
   if (!C)
     return SDValue();
 
-  uint64_t MulAmt = C->getZExtValue();
+  int64_t MulAmt = C->getSExtValue();
   unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+
   ShiftAmt = ShiftAmt & (32 - 1);
   SDValue V = N->getOperand(0);
   DebugLoc DL = N->getDebugLoc();
 
   SDValue Res;
   MulAmt >>= ShiftAmt;
-  if (isPowerOf2_32(MulAmt - 1)) {
-    // (mul x, 2^N + 1) => (add (shl x, N), x)
-    Res = DAG.getNode(ISD::ADD, DL, VT,
-                      V, DAG.getNode(ISD::SHL, DL, VT,
-                                     V, DAG.getConstant(Log2_32(MulAmt-1),
-                                                        MVT::i32)));
-  } else if (isPowerOf2_32(MulAmt + 1)) {
-    // (mul x, 2^N - 1) => (sub (shl x, N), x)
-    Res = DAG.getNode(ISD::SUB, DL, VT,
-                      DAG.getNode(ISD::SHL, DL, VT,
-                                  V, DAG.getConstant(Log2_32(MulAmt+1),
-                                                     MVT::i32)),
-                                                     V);
-  } else
-    return SDValue();
+
+  if (MulAmt >= 0) {
+    if (isPowerOf2_32(MulAmt - 1)) {
+      // (mul x, 2^N + 1) => (add (shl x, N), x)
+      Res = DAG.getNode(ISD::ADD, DL, VT,
+                        V,
+                        DAG.getNode(ISD::SHL, DL, VT,
+                                    V,
+                                    DAG.getConstant(Log2_32(MulAmt - 1),
+                                                    MVT::i32)));
+    } else if (isPowerOf2_32(MulAmt + 1)) {
+      // (mul x, 2^N - 1) => (sub (shl x, N), x)
+      Res = DAG.getNode(ISD::SUB, DL, VT,
+                        DAG.getNode(ISD::SHL, DL, VT,
+                                    V,
+                                    DAG.getConstant(Log2_32(MulAmt + 1),
+                                                    MVT::i32)),
+                        V);
+    } else
+      return SDValue();
+  } else {
+    uint64_t MulAmtAbs = -MulAmt;
+    if (isPowerOf2_32(MulAmtAbs + 1)) {
+      // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+      Res = DAG.getNode(ISD::SUB, DL, VT,
+                        V,
+                        DAG.getNode(ISD::SHL, DL, VT,
+                                    V,
+                                    DAG.getConstant(Log2_32(MulAmtAbs + 1),
+                                                    MVT::i32)));
+    } else if (isPowerOf2_32(MulAmtAbs - 1)) {
+      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+      Res = DAG.getNode(ISD::ADD, DL, VT,
+                        V,
+                        DAG.getNode(ISD::SHL, DL, VT,
+                                    V,
+                                    DAG.getConstant(Log2_32(MulAmtAbs-1),
+                                                    MVT::i32)));
+      Res = DAG.getNode(ISD::SUB, DL, VT,
+                        DAG.getConstant(0, MVT::i32),Res);
+
+    } else
+      return SDValue();
+  }
 
   if (ShiftAmt != 0)
-    Res = DAG.getNode(ISD::SHL, DL, VT, Res,
-                      DAG.getConstant(ShiftAmt, MVT::i32));
+    Res = DAG.getNode(ISD::SHL, DL, VT,
+                      Res, DAG.getConstant(ShiftAmt, MVT::i32));
 
   // Do not add new nodes to DAG combiner worklist.
   DCI.CombineTo(N, Res, false);
   return SDValue();
 }
 
+static bool isCMOVWithZeroOrAllOnesLHS(SDValue N, bool AllOnes) {
+  if (N.getOpcode() != ARMISD::CMOV || !N.getNode()->hasOneUse())
+    return false;
+
+  SDValue FalseVal = N.getOperand(0);
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(FalseVal);
+  if (!C)
+    return false;
+  if (AllOnes)
+    return C->isAllOnesValue();
+  return C->isNullValue();
+}
+
+/// formConditionalOp - Combine an operation with a conditional move operand
+/// to form a conditional op. e.g. (or x, (cmov 0, y, cond)) => (or.cond x, y)
+/// (and x, (cmov -1, y, cond)) => (and.cond, x, y)
+static SDValue formConditionalOp(SDNode *N, SelectionDAG &DAG,
+                                 bool Commutable) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  bool isAND = N->getOpcode() == ISD::AND;
+  bool isCand = isCMOVWithZeroOrAllOnesLHS(N1, isAND);
+  if (!isCand && Commutable) {
+    isCand = isCMOVWithZeroOrAllOnesLHS(N0, isAND);
+    if (isCand)
+      std::swap(N0, N1);
+  }
+  if (!isCand)
+    return SDValue();
+
+  unsigned Opc = 0;
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("Unexpected node");
+  case ISD::AND: Opc = ARMISD::CAND; break;
+  case ISD::OR:  Opc = ARMISD::COR; break;
+  case ISD::XOR: Opc = ARMISD::CXOR; break;
+  }
+  return DAG.getNode(Opc, N->getDebugLoc(), N->getValueType(0), N0,
+                     N1.getOperand(1), N1.getOperand(2), N1.getOperand(3),
+                     N1.getOperand(4));
+}
+
 static SDValue PerformANDCombine(SDNode *N,
-                                TargetLowering::DAGCombinerInfo &DCI) {
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
 
   // Attempt to use immediate-form VBIC
   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
@@ -6697,6 +7045,13 @@ static SDValue PerformANDCombine(SDNode *N,
     }
   }
 
+  if (!Subtarget->isThumb1Only()) {
+    // (and x, (cmov -1, y, cond)) => (and.cond x, y)
+    SDValue CAND = formConditionalOp(N, DAG, true);
+    if (CAND.getNode())
+      return CAND;
+  }
+
   return SDValue();
 }
 
@@ -6733,6 +7088,13 @@ static SDValue PerformORCombine(SDNode *N,
     }
   }
 
+  if (!Subtarget->isThumb1Only()) {
+    // (or x, (cmov 0, y, cond)) => (or.cond x, y)
+    SDValue COR = formConditionalOp(N, DAG, true);
+    if (COR.getNode())
+      return COR;
+  }
+
   SDValue N0 = N->getOperand(0);
   if (N0.getOpcode() != ISD::AND)
     return SDValue();
@@ -6881,6 +7243,25 @@ static SDValue PerformORCombine(SDNode *N,
   return SDValue();
 }
 
+static SDValue PerformXORCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  SelectionDAG &DAG = DCI.DAG;
+
+  if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  if (!Subtarget->isThumb1Only()) {
+    // (xor x, (cmov 0, y, cond)) => (xor.cond x, y)
+    SDValue CXOR = formConditionalOp(N, DAG, true);
+    if (CXOR.getNode())
+      return CXOR;
+  }
+
+  return SDValue();
+}
+
 /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
 /// the bits being cleared by the AND are not demanded by the BFI.
 static SDValue PerformBFICombine(SDNode *N,
@@ -6926,13 +7307,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
     SDValue BasePtr = LD->getBasePtr();
     SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
                                  LD->getPointerInfo(), LD->isVolatile(),
-                                 LD->isNonTemporal(), LD->getAlignment());
+                                 LD->isNonTemporal(), LD->isInvariant(),
+                                 LD->getAlignment());
 
     SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
                                     DAG.getConstant(4, MVT::i32));
     SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
                                  LD->getPointerInfo(), LD->isVolatile(),
-                                 LD->isNonTemporal(),
+                                 LD->isNonTemporal(), LD->isInvariant(),
                                  std::min(4U, LD->getAlignment() / 2));
 
     DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
@@ -6967,15 +7349,99 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
 /// ISD::STORE.
 static SDValue PerformSTORECombine(SDNode *N,
                                    TargetLowering::DAGCombinerInfo &DCI) {
-  // Bitcast an i64 store extracted from a vector to f64.
-  // Otherwise, the i64 value will be legalized to a pair of i32 values.
   StoreSDNode *St = cast<StoreSDNode>(N);
+  if (St->isVolatile())
+    return SDValue();
+
+  // Optimize trunc store (of multiple scalars) to shuffle and store.  First, 
+  // pack all of the elements in one place.  Next, store to memory in fewer
+  // chunks.
   SDValue StVal = St->getValue();
-  if (!ISD::isNormalStore(St) || St->isVolatile())
+  EVT VT = StVal.getValueType();
+  if (St->isTruncatingStore() && VT.isVector()) {
+    SelectionDAG &DAG = DCI.DAG;
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    EVT StVT = St->getMemoryVT();
+    unsigned NumElems = VT.getVectorNumElements();
+    assert(StVT != VT && "Cannot truncate to the same type");
+    unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
+    unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+
+    // From, To sizes and ElemCount must be pow of two
+    if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
+
+    // We are going to use the original vector elt for storing.
+    // Accumulated smaller vector elements must be a multiple of the store size.
+    if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
+
+    unsigned SizeRatio  = FromEltSz / ToEltSz;
+    assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
+
+    // Create a type on which we perform the shuffle.
+    EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
+                                     NumElems*SizeRatio);
+    assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
+
+    DebugLoc DL = St->getDebugLoc();
+    SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
+    SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
+    for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
+
+    // Can't shuffle using an illegal type.
+    if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
+
+    SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
+                                DAG.getUNDEF(WideVec.getValueType()),
+                                ShuffleVec.data());
+    // At this point all of the data is stored at the bottom of the
+    // register. We now need to save it to mem.
+
+    // Find the largest store unit
+    MVT StoreType = MVT::i8;
+    for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
+         tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
+      MVT Tp = (MVT::SimpleValueType)tp;
+      if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
+        StoreType = Tp;
+    }
+    // Didn't find a legal store type.
+    if (!TLI.isTypeLegal(StoreType))
+      return SDValue();
+
+    // Bitcast the original vector into a vector of store-size units
+    EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
+            StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
+    assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
+    SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
+    SmallVector<SDValue, 8> Chains;
+    SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
+                                        TLI.getPointerTy());
+    SDValue BasePtr = St->getBasePtr();
+
+    // Perform one or more big stores into memory.
+    unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
+    for (unsigned I = 0; I < E; I++) {
+      SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                                   StoreType, ShuffWide,
+                                   DAG.getIntPtrConstant(I));
+      SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
+                                St->getPointerInfo(), St->isVolatile(),
+                                St->isNonTemporal(), St->getAlignment());
+      BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
+                            Increment);
+      Chains.push_back(Ch);
+    }
+    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
+                       Chains.size());
+  }
+
+  if (!ISD::isNormalStore(St))
     return SDValue();
 
+  // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
+  // ARM stores of arguments in the same cache line.
   if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
-      StVal.getNode()->hasOneUse() && !St->isVolatile()) {
+      StVal.getNode()->hasOneUse()) {
     SelectionDAG  &DAG = DCI.DAG;
     DebugLoc DL = St->getDebugLoc();
     SDValue BasePtr = St->getBasePtr();
@@ -6996,6 +7462,8 @@ static SDValue PerformSTORECombine(SDNode *N,
       StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
     return SDValue();
 
+  // Bitcast an i64 store extracted from a vector to f64.
+  // Otherwise, the i64 value will be legalized to a pair of i32 values.
   SelectionDAG &DAG = DCI.DAG;
   DebugLoc dl = StVal.getDebugLoc();
   SDValue IntVec = StVal.getOperand(0);
@@ -7177,7 +7645,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
     if (isIntrinsic) {
       unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
       switch (IntNo) {
-      default: assert(0 && "unexpected intrinsic for Neon base update");
+      default: llvm_unreachable("unexpected intrinsic for Neon base update");
       case Intrinsic::arm_neon_vld1:     NewOpc = ARMISD::VLD1_UPD;
         NumVecs = 1; break;
       case Intrinsic::arm_neon_vld2:     NewOpc = ARMISD::VLD2_UPD;
@@ -7210,7 +7678,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
     } else {
       isLaneOp = true;
       switch (N->getOpcode()) {
-      default: assert(0 && "unexpected opcode for Neon base update");
+      default: llvm_unreachable("unexpected opcode for Neon base update");
       case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
       case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
       case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
@@ -7703,6 +8171,18 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
                                    const ARMSubtarget *ST) {
   EVT VT = N->getValueType(0);
+  if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
+    // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
+    // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
+    SDValue N1 = N->getOperand(1);
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+      SDValue N0 = N->getOperand(0);
+      if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
+          DAG.MaskedValueIsZero(N0.getOperand(0),
+                                APInt::getHighBitsSet(32, 16)))
+        return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1);
+    }
+  }
 
   // Nothing to be done for scalar shifts.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -7824,7 +8304,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
     // will return -0, so vmin can only be used for unsafe math or if one of
     // the operands is known to be nonzero.
     if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
-        !UnsafeFPMath &&
+        !DAG.getTarget().Options.UnsafeFPMath &&
         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
       break;
     Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
@@ -7846,7 +8326,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
     // will return +0, so vmax can only be used for unsafe math or if one of
     // the operands is known to be nonzero.
     if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
-        !UnsafeFPMath &&
+        !DAG.getTarget().Options.UnsafeFPMath &&
         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
       break;
     Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
@@ -7906,8 +8386,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
 
   if (Res.getNode()) {
     APInt KnownZero, KnownOne;
-    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
-    DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
+    DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
     // Capture demanded bits information that would be otherwise lost.
     if (KnownZero == 0xfffffffe)
       Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
@@ -7931,7 +8410,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SUB:        return PerformSUBCombine(N, DCI);
   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
-  case ISD::AND:        return PerformANDCombine(N, DCI);
+  case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);
+  case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);
   case ARMISD::BFI:     return PerformBFICombine(N, DCI);
   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
   case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
@@ -8001,6 +8481,41 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
   }
 }
 
+static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
+                       unsigned AlignCheck) {
+  return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
+          (DstAlign == 0 || DstAlign % AlignCheck == 0));
+}
+
+EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
+                                           unsigned DstAlign, unsigned SrcAlign,
+                                           bool IsZeroVal,
+                                           bool MemcpyStrSrc,
+                                           MachineFunction &MF) const {
+  const Function *F = MF.getFunction();
+
+  // See if we can use NEON instructions for this...
+  if (IsZeroVal &&
+      !F->hasFnAttr(Attribute::NoImplicitFloat) &&
+      Subtarget->hasNEON()) {
+    if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
+      return MVT::v4i32;
+    } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
+      return MVT::v2i32;
+    }
+  }
+
+  // Lowering to i32/i16 if the size permits.
+  if (Size >= 4) {
+    return MVT::i32;
+  } else if (Size >= 2) {
+    return MVT::i16;
+  }
+
+  // Let the target-independent logic figure it out.
+  return MVT::Other;
+}
+
 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
   if (V < 0)
     return false;
@@ -8188,7 +8703,6 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
       if (Scale & 1) return false;
       return isPowerOf2_32(Scale);
     }
-    break;
   }
   return true;
 }
@@ -8198,10 +8712,12 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
 /// a register against the immediate without having to materialize the
 /// immediate into a register.
 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  // Thumb2 and ARM modes can use cmn for negative immediates.
   if (!Subtarget->isThumb())
-    return ARM_AM::getSOImmVal(Imm) != -1;
+    return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
   if (Subtarget->isThumb2())
-    return ARM_AM::getT2SOImmVal(Imm) != -1;
+    return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
+  // Thumb1 doesn't have cmn, and only 8-bit immediates.
   return Imm >= 0 && Imm <= 255;
 }
 
@@ -8388,22 +8904,20 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
 }
 
 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                       const APInt &Mask,
                                                        APInt &KnownZero,
                                                        APInt &KnownOne,
                                                        const SelectionDAG &DAG,
                                                        unsigned Depth) const {
-  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
   switch (Op.getOpcode()) {
   default: break;
   case ARMISD::CMOV: {
     // Bits are known zero/one if known on the LHS and RHS.
-    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+    DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
     if (KnownZero == 0 && KnownOne == 0) return;
 
     APInt KnownZeroRHS, KnownOneRHS;
-    DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
-                          KnownZeroRHS, KnownOneRHS, Depth+1);
+    DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
     KnownZero &= KnownZeroRHS;
     KnownOne  &= KnownOneRHS;
     return;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 5da9b27fca68..352d98001ddc 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -15,6 +15,7 @@
 #ifndef ARMISELLOWERING_H
 #define ARMISELLOWERING_H
 
+#include "ARM.h"
 #include "ARMSubtarget.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -56,7 +57,11 @@ namespace llvm {
       CMPFP,        // ARM VFP compare instruction, sets FPSCR.
       CMPFPw0,      // ARM VFP compare against zero instruction, sets FPSCR.
       FMSTAT,       // ARM fmstat instruction.
+
       CMOV,         // ARM conditional move instructions.
+      CAND,         // ARM conditional and instructions.
+      COR,          // ARM conditional or instructions.
+      CXOR,         // ARM conditional xor instructions.
 
       BCC_i64,
 
@@ -81,7 +86,6 @@ namespace llvm {
 
       EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
       EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
-      EH_SJLJ_DISPATCHSETUP,  // SjLj exception handling dispatch setup.
 
       TC_RETURN,    // Tail call return pseudo.
 
@@ -146,6 +150,9 @@ namespace llvm {
       VMOVIMM,
       VMVNIMM,
 
+      // Vector move f32 immediate:
+      VMOVFPIMM,
+
       // Vector duplicate:
       VDUP,
       VDUPLANE,
@@ -266,9 +273,14 @@ namespace llvm {
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
     /// unaligned memory accesses. of the specified type.
-    /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
     virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
 
+    virtual EVT getOptimalMemOpType(uint64_t Size,
+                                    unsigned DstAlign, unsigned SrcAlign,
+                                    bool IsZeroVal,
+                                    bool MemcpyStrSrc,
+                                    MachineFunction &MF) const;
+
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
@@ -303,7 +315,6 @@ namespace llvm {
                                             SelectionDAG &DAG) const;
 
     virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-                                                const APInt &Mask,
                                                 APInt &KnownZero,
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
@@ -338,7 +349,7 @@ namespace llvm {
 
     /// getRegClassFor - Return the register class that should be used for the
     /// specified value type.
-    virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+    virtual const TargetRegisterClass *getRegClassFor(EVT VT) const;
 
     /// getMaximalGlobalOffset - Returns the maximal possible offset which can
     /// be used for loads / stores from the global.
@@ -402,7 +413,6 @@ namespace llvm {
                              ISD::ArgFlagsTy Flags) const;
     SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                                     const ARMSubtarget *Subtarget) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -424,6 +434,8 @@ namespace llvm {
     SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantFP(SDValue Op, SelectionDAG &DAG,
+                            const ARMSubtarget *ST) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
                               const ARMSubtarget *ST) const;
 
@@ -452,7 +464,7 @@ namespace llvm {
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg,
-                bool &isTailCall,
+                bool doesNotRet, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<SDValue> &OutVals,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -481,7 +493,7 @@ namespace llvm {
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
 
-    virtual bool isUsedByReturnOnly(SDNode *N) const;
+    virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
 
     virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
 
@@ -512,9 +524,6 @@ namespace llvm {
                                                bool signExtend,
                                                ARMCC::CondCodes Cond) const;
 
-    void EmitBasePointerRecalculation(MachineInstr *MI, MachineBasicBlock *MBB,
-                                      MachineBasicBlock *DispatchBB) const;
-
     void SetupEntryBlockForSjLj(MachineInstr *MI,
                                 MachineBasicBlock *MBB,
                                 MachineBasicBlock *DispatchBB, int FI) const;
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 7cbc9111dec3..1d38bcf9e843 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1,4 +1,4 @@
-//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=//
+//===-- ARMInstrFormats.td - ARM Instruction Formats -------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -174,7 +174,7 @@ def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
 
 // ARM special operands for disassembly only.
 //
-def SetEndAsmOperand : AsmOperandClass {
+def SetEndAsmOperand : ImmAsmOperand {
   let Name = "SetEndImm";
   let ParserMethod = "parseSetEndImm";
 }
@@ -201,21 +201,29 @@ def msr_mask : Operand<i32> {
 //     16       imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
 //     32       imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
 //     64       64 - <imm> is encoded in imm6<5:0>
+def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
 def shr_imm8  : Operand<i32> {
   let EncoderMethod = "getShiftRight8Imm";
   let DecoderMethod = "DecodeShiftRight8Imm";
+  let ParserMatchClass = shr_imm8_asm_operand;
 }
+def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
 def shr_imm16 : Operand<i32> {
   let EncoderMethod = "getShiftRight16Imm";
   let DecoderMethod = "DecodeShiftRight16Imm";
+  let ParserMatchClass = shr_imm16_asm_operand;
 }
+def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
 def shr_imm32 : Operand<i32> {
   let EncoderMethod = "getShiftRight32Imm";
   let DecoderMethod = "DecodeShiftRight32Imm";
+  let ParserMatchClass = shr_imm32_asm_operand;
 }
+def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
 def shr_imm64 : Operand<i32> {
   let EncoderMethod = "getShiftRight64Imm";
   let DecoderMethod = "DecodeShiftRight64Imm";
+  let ParserMatchClass = shr_imm64_asm_operand;
 }
 
 //===----------------------------------------------------------------------===//
@@ -231,6 +239,14 @@ class VFP2InstAlias<string Asm, dag Result, bit Emit = 0b1>
       : InstAlias<Asm, Result, Emit>, Requires<[HasVFP2]>;
 class VFP3InstAlias<string Asm, dag Result, bit Emit = 0b1>
       : InstAlias<Asm, Result, Emit>, Requires<[HasVFP3]>;
+class NEONInstAlias<string Asm, dag Result, bit Emit = 0b1>
+      : InstAlias<Asm, Result, Emit>, Requires<[HasNEON]>;
+
+
+class VFP2MnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+          Requires<[HasVFP2]>;
+class NEONMnemonicAlias<string src, string dst> : MnemonicAlias<src, dst>,
+          Requires<[HasNEON]>;
 
 //===----------------------------------------------------------------------===//
 // ARM Instruction templates.
@@ -274,6 +290,14 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
 
 class Encoding {
   field bits<32> Inst;
+  // Mask of bits that cause an encoding to be UNPREDICTABLE.
+  // If a bit is set, then if the corresponding bit in the
+  // target encoding differs from its value in the "Inst" field,
+  // the instruction is UNPREDICTABLE (SoftFail in abstract parlance).
+  field bits<32> Unpredictable = 0;
+  // SoftFail is the generic name for this field, but we alias it so
+  // as to make it more obvious what it means in ARM-land.
+  field bits<32> SoftFail = Unpredictable;
 }
 
 class InstARM<AddrMode am, int sz, IndexMode im,
@@ -290,6 +314,32 @@ class InstThumb<AddrMode am, int sz, IndexMode im,
   let DecoderNamespace = "Thumb";
 }
 
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class AsmPseudoInst<string asm, dag iops>
+  : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
+                 "", NoItinerary> {
+  let OutOperandList = (outs);
+  let InOperandList = iops;
+  let Pattern = [];
+  let isCodeGenOnly = 0; // So we get asm matcher for it.
+  let AsmString = asm;
+  let isPseudo = 1;
+}
+
+class ARMAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+        Requires<[IsARM]>;
+class tAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+        Requires<[IsThumb]>;
+class t2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+        Requires<[IsThumb2]>;
+class VFP2AsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+        Requires<[HasVFP2]>;
+class NEONAsmPseudo<string asm, dag iops> : AsmPseudoInst<asm, iops>,
+        Requires<[HasNEON]>;
+
+// Pseudo instructions for the code generator.
 class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
   : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
                  GenericDomain, "", itin> {
@@ -481,6 +531,8 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
   let Inst{15-12} = Rt;
   let Inst{11-4} = 0b00001001;
   let Inst{3-0} = Rt2;
+
+  let DecoderMethod = "DecodeSwap";
 }
 
 // addrmode1 instructions
@@ -792,7 +844,7 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
 }
 
 // PKH instructions
-def PKHLSLAsmOperand : AsmOperandClass {
+def PKHLSLAsmOperand : ImmAsmOperand {
   let Name = "PKHLSLImm";
   let ParserMethod = "parsePKHLSLImm";
 }
@@ -1550,8 +1602,11 @@ class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
                 dag oops, dag iops, InstrItinClass itin, string opc, string asm,
                 list<dag> pattern>
   : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+  bits<5> fbits;
   // size (fixed-point number): sx == 0 ? 16 : 32
   let Inst{7} = op5; // sx
+  let Inst{5} = fbits{0};
+  let Inst{3-0} = fbits{4-1};
 }
 
 // VFP conversion instructions, if no NEON
@@ -1963,3 +2018,54 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
 class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [HasNEON,UseNEONForFP];
 }
+
+// VFP/NEON Instruction aliases for type suffices.
+class VFPDataTypeInstAlias<string opc, string dt, string asm, dag Result> :
+  InstAlias<!strconcat(opc, dt, "\t", asm), Result>, Requires<[HasVFP2]>;
+
+multiclass VFPDTAnyInstAlias<string opc, string asm, dag Result> {
+  def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+  def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+  def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+  def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+
+multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result> {
+  let Predicates = [HasNEON] in {
+  def : VFPDataTypeInstAlias<opc, ".8", asm, Result>;
+  def : VFPDataTypeInstAlias<opc, ".16", asm, Result>;
+  def : VFPDataTypeInstAlias<opc, ".32", asm, Result>;
+  def : VFPDataTypeInstAlias<opc, ".64", asm, Result>;
+}
+}
+
+// The same alias classes using AsmPseudo instead, for the more complex
+// stuff in NEON that InstAlias can't quite handle.
+// Note that we can't use anonymous defm references here like we can
+// above, as we care about the ultimate instruction enum names generated, unlike
+// for instalias defs.
+class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> :
+  AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>;
+
+// Data type suffix token aliases. Implements Table A7-3 in the ARM ARM.
+def : TokenAlias<".s8", ".i8">;
+def : TokenAlias<".u8", ".i8">;
+def : TokenAlias<".s16", ".i16">;
+def : TokenAlias<".u16", ".i16">;
+def : TokenAlias<".s32", ".i32">;
+def : TokenAlias<".u32", ".i32">;
+def : TokenAlias<".s64", ".i64">;
+def : TokenAlias<".u64", ".i64">;
+
+def : TokenAlias<".i8", ".8">;
+def : TokenAlias<".i16", ".16">;
+def : TokenAlias<".i32", ".32">;
+def : TokenAlias<".i64", ".64">;
+
+def : TokenAlias<".p8", ".8">;
+def : TokenAlias<".p16", ".16">;
+
+def : TokenAlias<".f32", ".32">;
+def : TokenAlias<".f64", ".64">;
+def : TokenAlias<".f", ".f32">;
+def : TokenAlias<".d", ".f64">;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 48da03f63bb9..b8f607eb4c55 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//===-- ARMInstrInfo.cpp - ARM Instruction Information --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,12 +21,29 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
 using namespace llvm;
 
 ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
   : ARMBaseInstrInfo(STI), RI(*this, STI) {
 }
 
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+  if (hasNOP()) {
+    NopInst.setOpcode(ARM::NOP);
+    NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    NopInst.addOperand(MCOperand::CreateReg(0));
+  } else {
+    NopInst.setOpcode(ARM::MOVr);
+    NopInst.addOperand(MCOperand::CreateReg(ARM::R0));
+    NopInst.addOperand(MCOperand::CreateReg(ARM::R0));
+    NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    NopInst.addOperand(MCOperand::CreateReg(0));
+    NopInst.addOperand(MCOperand::CreateReg(0));
+  }
+}
+
 unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
   switch (Opc) {
   default: break;
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index f2c7bdc31be9..5d3e059b7038 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//===-- ARMInstrInfo.h - ARM Instruction Information ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,11 +14,10 @@
 #ifndef ARMINSTRUCTIONINFO_H
 #define ARMINSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMRegisterInfo.h"
 #include "ARMSubtarget.h"
-#include "ARM.h"
 
 namespace llvm {
   class ARMSubtarget;
@@ -28,6 +27,9 @@ class ARMInstrInfo : public ARMBaseInstrInfo {
 public:
   explicit ARMInstrInfo(const ARMSubtarget &STI);
 
+  /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+  void getNoopForMachoTarget(MCInst &NopInst) const;
+
   // Return the non-pre/post incrementing version of 'Opc'. Return 0
   // if there is not such an opcode.
   unsigned getUnindexedOpcode(unsigned Opc) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 2cf0f09ffc64..3caaa2366123 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -58,8 +58,6 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
                                                  SDTCisInt<2>]>;
 def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
 
-def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-
 def SDT_ARMMEMBARRIER     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
@@ -143,9 +141,6 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
                                SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
 def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
                                SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
-def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
-                               SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
-
 
 def ARMMemBarrier     : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
                                [SDNPHasChain]>;
@@ -184,6 +179,8 @@ def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
                                  AssemblerPredicate<"FeatureVFP2">;
 def HasVFP3          : Predicate<"Subtarget->hasVFP3()">,
                                  AssemblerPredicate<"FeatureVFP3">;
+def HasVFP4          : Predicate<"Subtarget->hasVFP4()">,
+                                 AssemblerPredicate<"FeatureVFP4">;
 def HasNEON          : Predicate<"Subtarget->hasNEON()">,
                                  AssemblerPredicate<"FeatureNEON">;
 def HasFP16          : Predicate<"Subtarget->hasFP16()">,
@@ -211,16 +208,20 @@ def IsARClass        : Predicate<"!Subtarget->isMClass()">,
                                  AssemblerPredicate<"!FeatureMClass">;
 def IsARM            : Predicate<"!Subtarget->isThumb()">,
                                  AssemblerPredicate<"!ModeThumb">;
-def IsDarwin         : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin      : Predicate<"!Subtarget->isTargetDarwin()">;
-def IsNaCl           : Predicate<"Subtarget->isTargetNaCl()">,
-                                 AssemblerPredicate<"ModeNaCl">;
+def IsIOS            : Predicate<"Subtarget->isTargetIOS()">;
+def IsNotIOS         : Predicate<"!Subtarget->isTargetIOS()">;
+def IsNaCl           : Predicate<"Subtarget->isTargetNaCl()">;
 
 // FIXME: Eventually this will be just "hasV6T2Ops".
 def UseMovt          : Predicate<"Subtarget->useMovt()">;
 def DontUseMovt      : Predicate<"!Subtarget->useMovt()">;
 def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
 
+// Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.
+// But only select them if more precision in FP computation is allowed.
+def UseFusedMAC      : Predicate<"!TM.Options.NoExcessFPPrecision">;
+def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4()">;
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
@@ -244,25 +245,28 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
 }]>;
 
-/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
-def imm1_15 : ImmLeaf<i32, [{
-  return (int32_t)Imm >= 1 && (int32_t)Imm < 16;
-}]>;
-
 /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
 def imm16_31 : ImmLeaf<i32, [{
   return (int32_t)Imm >= 16 && (int32_t)Imm < 32;
 }]>;
 
-def so_imm_neg :
-  PatLeaf<(imm), [{
-    return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
-  }], so_imm_neg_XFORM>;
+def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; }
+def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+    int64_t Value = -(int)N->getZExtValue();
+    return Value && ARM_AM::getSOImmVal(Value) != -1;
+  }], so_imm_neg_XFORM> {
+  let ParserMatchClass = so_imm_neg_asmoperand;
+}
 
-def so_imm_not :
-  PatLeaf<(imm), [{
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use so_imm.
+def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; }
+def so_imm_not : Operand<i32>, PatLeaf<(imm), [{
     return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
-  }], so_imm_not_XFORM>;
+  }], so_imm_not_XFORM> {
+  let ParserMatchClass = so_imm_not_asmoperand;
+}
 
 // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
 def sext_16_node : PatLeaf<(i32 GPR:$a), [{
@@ -279,14 +283,6 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
   return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
 }], hi16>;
 
-/// imm0_65535 - An immediate is in the range [0.65535].
-def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; }
-def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
-  return Imm >= 0 && Imm < 65536;
-}]> {
-  let ParserMatchClass = Imm0_65535AsmOperand;
-}
-
 class BinOpWithFlagFrag<dag res> :
       PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>;
 class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
@@ -321,6 +317,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
 // Operand Definitions.
 //
 
+// Immediate operands with a shared generic asm render method.
+class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; }
+
 // Branch target.
 // FIXME: rename brtarget to t2_brtarget
 def brtarget : Operand<OtherVT> {
@@ -352,13 +351,11 @@ def bltarget : Operand<i32> {
 // Call target for ARM. Handles conditional/unconditional
 // FIXME: rename bl_target to t2_bltarget?
 def bl_target : Operand<i32> {
-  // Encoded the same as branch targets.
-  let EncoderMethod = "getARMBranchTargetOpValue";
+  let EncoderMethod = "getARMBLTargetOpValue";
   let OperandType = "OPERAND_PCREL";
 }
 
 def blx_target : Operand<i32> {
-  // Encoded the same as branch targets.
   let EncoderMethod = "getARMBLXTargetOpValue";
   let OperandType = "OPERAND_PCREL";
 }
@@ -475,6 +472,7 @@ def shift_so_reg_reg : Operand<i32>,    // reg reg imm
   let EncoderMethod = "getSORegRegOpValue";
   let PrintMethod = "printSORegRegOperand";
   let DecoderMethod = "DecodeSORegRegOperand";
+  let ParserMatchClass = ShiftedRegAsmOperand;
   let MIOperandInfo = (ops GPR, GPR, i32imm);
 }
 
@@ -485,13 +483,14 @@ def shift_so_reg_imm : Operand<i32>,    // reg reg imm
   let EncoderMethod = "getSORegImmOpValue";
   let PrintMethod = "printSORegImmOperand";
   let DecoderMethod = "DecodeSORegImmOperand";
+  let ParserMatchClass = ShiftedImmAsmOperand;
   let MIOperandInfo = (ops GPR, i32imm);
 }
 
 
 // so_imm - Match a 32-bit shifter_operand immediate operand, which is an
 // 8-bit immediate rotated by an arbitrary number of bits.
-def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; }
+def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; }
 def so_imm : Operand<i32>, ImmLeaf<i32, [{
     return ARM_AM::getSOImmVal(Imm) != -1;
   }]> {
@@ -515,16 +514,60 @@ def arm_i32imm : PatLeaf<(imm), [{
   return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
 }]>;
 
+/// imm0_1 predicate - Immediate in the range [0,1].
+def Imm0_1AsmOperand: ImmAsmOperand { let Name = "Imm0_1"; }
+def imm0_1 : Operand<i32> { let ParserMatchClass = Imm0_1AsmOperand; }
+
+/// imm0_3 predicate - Immediate in the range [0,3].
+def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; }
+def imm0_3 : Operand<i32> { let ParserMatchClass = Imm0_3AsmOperand; }
+
 /// imm0_7 predicate - Immediate in the range [0,7].
-def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; }
+def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; }
 def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
   return Imm >= 0 && Imm < 8;
 }]> {
   let ParserMatchClass = Imm0_7AsmOperand;
 }
 
+/// imm8 predicate - Immediate is exactly 8.
+def Imm8AsmOperand: ImmAsmOperand { let Name = "Imm8"; }
+def imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 8; }]> {
+  let ParserMatchClass = Imm8AsmOperand;
+}
+
+/// imm16 predicate - Immediate is exactly 16.
+def Imm16AsmOperand: ImmAsmOperand { let Name = "Imm16"; }
+def imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 16; }]> {
+  let ParserMatchClass = Imm16AsmOperand;
+}
+
+/// imm32 predicate - Immediate is exactly 32.
+def Imm32AsmOperand: ImmAsmOperand { let Name = "Imm32"; }
+def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> {
+  let ParserMatchClass = Imm32AsmOperand;
+}
+
+/// imm1_7 predicate - Immediate in the range [1,7].
+def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; }
+def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> {
+  let ParserMatchClass = Imm1_7AsmOperand;
+}
+
+/// imm1_15 predicate - Immediate in the range [1,15].
+def Imm1_15AsmOperand: ImmAsmOperand { let Name = "Imm1_15"; }
+def imm1_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 16; }]> {
+  let ParserMatchClass = Imm1_15AsmOperand;
+}
+
+/// imm1_31 predicate - Immediate in the range [1,31].
+def Imm1_31AsmOperand: ImmAsmOperand { let Name = "Imm1_31"; }
+def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> {
+  let ParserMatchClass = Imm1_31AsmOperand;
+}
+
 /// imm0_15 predicate - Immediate in the range [0,15].
-def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; }
+def Imm0_15AsmOperand: ImmAsmOperand { let Name = "Imm0_15"; }
 def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
   return Imm >= 0 && Imm < 16;
 }]> {
@@ -532,33 +575,57 @@ def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
 }
 
 /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
-def Imm0_31AsmOperand: AsmOperandClass { let Name = "Imm0_31"; }
+def Imm0_31AsmOperand: ImmAsmOperand { let Name = "Imm0_31"; }
 def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
   return Imm >= 0 && Imm < 32;
 }]> {
   let ParserMatchClass = Imm0_31AsmOperand;
 }
 
+/// imm0_32 predicate - True if the 32-bit immediate is in the range [0,32].
+def Imm0_32AsmOperand: ImmAsmOperand { let Name = "Imm0_32"; }
+def imm0_32 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 32;
+}]> {
+  let ParserMatchClass = Imm0_32AsmOperand;
+}
+
+/// imm0_63 predicate - True if the 32-bit immediate is in the range [0,63].
+def Imm0_63AsmOperand: ImmAsmOperand { let Name = "Imm0_63"; }
+def imm0_63 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 64;
+}]> {
+  let ParserMatchClass = Imm0_63AsmOperand;
+}
+
 /// imm0_255 predicate - Immediate in the range [0,255].
-def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; }
+def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; }
 def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
   let ParserMatchClass = Imm0_255AsmOperand;
 }
 
+/// imm0_65535 - An immediate is in the range [0.65535].
+def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; }
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 65536;
+}]> {
+  let ParserMatchClass = Imm0_65535AsmOperand;
+}
+
 // imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference
 // a relocatable expression.
 //
 // FIXME: This really needs a Thumb version separate from the ARM version.
 // While the range is the same, and can thus use the same match class,
 // the encoding is different so it should have a different encoder method.
-def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; }
+def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; }
 def imm0_65535_expr : Operand<i32> {
   let EncoderMethod = "getHiLo16ImmOpValue";
   let ParserMatchClass = Imm0_65535ExprAsmOperand;
 }
 
 /// imm24b - True if the 32-bit immediate is encodable in 24 bits.
-def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; }
+def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; }
 def imm24b : Operand<i32>, ImmLeaf<i32, [{
   return Imm >= 0 && Imm <= 0xffffff;
 }]> {
@@ -572,6 +639,7 @@ def BitfieldAsmOperand : AsmOperandClass {
   let Name = "Bitfield";
   let ParserMethod = "parseBitfield";
 }
+
 def bf_inv_mask_imm : Operand<i32>,
                       PatLeaf<(imm), [{
   return ARM::isBitFieldInvertedMask(N->getZExtValue());
@@ -670,7 +738,7 @@ def postidx_reg : Operand<i32> {
   let DecoderMethod = "DecodePostIdxReg";
   let PrintMethod = "printPostIdxRegOperand";
   let ParserMatchClass = PostIdxRegAsmOperand;
-  let MIOperandInfo = (ops GPR, i32imm);
+  let MIOperandInfo = (ops GPRnopc, i32imm);
 }
 
 
@@ -699,7 +767,7 @@ def am2offset_reg : Operand<i32>,
   let PrintMethod = "printAddrMode2OffsetOperand";
   // When using this for assembly, it's always as a post-index offset.
   let ParserMatchClass = PostIdxRegShiftedAsmOperand;
-  let MIOperandInfo = (ops GPR, i32imm);
+  let MIOperandInfo = (ops GPRnopc, i32imm);
 }
 
 // FIXME: am2offset_imm should only need the immediate, not the GPR. Having
@@ -711,7 +779,7 @@ def am2offset_imm : Operand<i32>,
   let EncoderMethod = "getAddrMode2OffsetOpValue";
   let PrintMethod = "printAddrMode2OffsetOperand";
   let ParserMatchClass = AM2OffsetImmAsmOperand;
-  let MIOperandInfo = (ops GPR, i32imm);
+  let MIOperandInfo = (ops GPRnopc, i32imm);
 }
 
 
@@ -799,6 +867,9 @@ def addrmode6dup : Operand<i32>,
   let PrintMethod = "printAddrMode6Operand";
   let MIOperandInfo = (ops GPR:$addr, i32imm);
   let EncoderMethod = "getAddrMode6DupAddressOpValue";
+  // FIXME: This is close, but not quite right. The alignment specifier is
+  // different.
+  let ParserMatchClass = AddrMode6AsmOperand;
 }
 
 // addrmodepc := pc + reg
@@ -1041,69 +1112,58 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc,
 
 }
 
-/// AsI1_rbin_s_is - Same as AsI1_rbin_s_is except it sets 's' bit by default.
+/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
 ///
 /// These opcodes will be converted to the real non-S opcodes by
-/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass AsI1_rbin_s_is<bits<4> opcod, string opc,
-                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                        PatFrag opnode, bit Commutable = 0> {
-  def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
-               iii, opc, "\t$Rd, $Rn, $imm",
-               [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
-
-  def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
-               iir, opc, "\t$Rd, $Rn, $Rm",
-               [/* pattern left blank */]>;
-
-  def rsi : AsI1<opcod, (outs GPR:$Rd),
-               (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
-               iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift, GPR:$Rn))]>;
-
-  def rsr : AsI1<opcod, (outs GPR:$Rd),
-               (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
-               iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift, GPR:$Rn))]> {
-    bits<4> Rd;
-    bits<4> Rn;
-    bits<12> shift;
-    let Inst{25} = 0;
-    let Inst{19-16} = Rn;
-    let Inst{15-12} = Rd;
-    let Inst{11-8} = shift{11-8};
-    let Inst{7} = 0;
-    let Inst{6-5} = shift{6-5};
-    let Inst{4} = 1;
-    let Inst{3-0} = shift{3-0};
+/// AdjustInstrPostInstrSelection after giving them an optional CPSR operand.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
+                          InstrItinClass iis, PatFrag opnode,
+                          bit Commutable = 0> {
+  def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+                         4, iii,
+                         [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
+
+  def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p),
+                         4, iir,
+                         [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]> {
+    let isCommutable = Commutable;
   }
+  def rsi : ARMPseudoInst<(outs GPR:$Rd),
+                          (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
+                          4, iis,
+                          [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
+                                                so_reg_imm:$shift))]>;
+
+  def rsr : ARMPseudoInst<(outs GPR:$Rd),
+                          (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
+                          4, iis,
+                          [(set GPR:$Rd, CPSR, (opnode GPR:$Rn,
+                                                so_reg_reg:$shift))]>;
 }
 }
 
-/// AsI1_bin_s_irs - Same as AsI1_bin_irs except it sets the 's' bit by default.
-///
-/// These opcodes will be converted to the real non-S opcodes by
-/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass AsI1_bin_s_irs<bits<4> opcod, string opc,
-                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                         PatFrag opnode, bit Commutable = 0> {
-  def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
-               iii, opc, "\t$Rd, $Rn, $imm",
-               [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>;
-  def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
-               iir, opc, "\t$Rd, $Rn, $Rm",
-               [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, GPR:$Rm))]>;
-  def rsi : AsI1<opcod, (outs GPR:$Rd),
-               (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm,
-               iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>;
+/// AsI1_rbin_s_is - Same as AsI1_bin_s_irs, except selection DAG
+/// operands are reversed.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir,
+                          InstrItinClass iis, PatFrag opnode,
+                          bit Commutable = 0> {
+  def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p),
+                         4, iii,
+                         [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>;
 
-  def rsr : AsI1<opcod, (outs GPR:$Rd),
-               (ins GPR:$Rn, so_reg_reg:$shift), DPSoRegRegFrm,
-               iis, opc, "\t$Rd, $Rn, $shift",
-               [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift))]>;
+  def rsi : ARMPseudoInst<(outs GPR:$Rd),
+                          (ins GPR:$Rn, so_reg_imm:$shift, pred:$p),
+                          4, iis,
+                          [(set GPR:$Rd, CPSR, (opnode so_reg_imm:$shift,
+                                             GPR:$Rn))]>;
+
+  def rsr : ARMPseudoInst<(outs GPR:$Rd),
+                          (ins GPR:$Rn, so_reg_reg:$shift, pred:$p),
+                          4, iis,
+                          [(set GPR:$Rd, CPSR, (opnode so_reg_reg:$shift,
+                                             GPR:$Rn))]>;
 }
 }
 
@@ -1272,10 +1332,10 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
     let Inst{4} = 0;
     let Inst{3-0} = shift{3-0};
   }
-  def rsr : AsI1<opcod, (outs GPR:$Rd),
-                (ins GPR:$Rn, so_reg_reg:$shift),
+  def rsr : AsI1<opcod, (outs GPRnopc:$Rd),
+                (ins GPRnopc:$Rn, so_reg_reg:$shift),
                 DPSoRegRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
-              [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_reg_reg:$shift, CPSR))]>,
+              [(set GPRnopc:$Rd, CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift, CPSR))]>,
                Requires<[IsARM]> {
     bits<4> Rd;
     bits<4> Rn;
@@ -1309,7 +1369,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
                                                     cc_out:$s)>,
      Requires<[IsARM]>;
   def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
-     (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPR:$Rdn, GPR:$Rdn,
+     (!cast<Instruction>(!strconcat(baseOpc, "rsr")) GPRnopc:$Rdn, GPRnopc:$Rdn,
                                                     so_reg_reg:$shift, pred:$p,
                                                     cc_out:$s)>,
      Requires<[IsARM]>;
@@ -1550,7 +1610,7 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
 }
 
 // Atomic pseudo-insts which will be lowered to ldrexd/strexd loops.
-// (These psuedos use a hand-written selection code).
+// (These pseudos use a hand-written selection code).
 let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in {
 def ATOMOR6432   : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
                               (ins GPR:$addr, GPR:$src1, GPR:$src2),
@@ -1652,7 +1712,7 @@ class CPS<dag iops, string asm_ops>
   let Inst{27-20} = 0b00010000;
   let Inst{19-18} = imod;
   let Inst{17}    = M; // Enabled if mode is set;
-  let Inst{16}    = 0;
+  let Inst{16-9}  = 0b00000000;
   let Inst{8-6}   = iflags;
   let Inst{5}     = 0;
   let Inst{4-0}   = mode;
@@ -1839,20 +1899,17 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   }
 }
 
-// All calls clobber the non-callee saved registers. SP is marked as
-// a use to prevent stack-pointer assignments that appear immediately
-// before calls from potentially appearing dead.
+// SP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead.
 let isCall = 1,
-  // On non-Darwin platforms R9 is callee-saved.
   // FIXME:  Do we really need a non-predicated version? If so, it should
   // at least be a pseudo instruction expanding to the predicated version
   // at MC lowering time.
-  Defs = [R0,  R1,  R2,  R3,  R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
-  Uses = [SP] in {
+  Defs = [LR], Uses = [SP] in {
   def BL  : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
                 IIC_Br, "bl\t$func",
                 [(ARMcall tglobaladdr:$func)]>,
-            Requires<[IsARM, IsNotDarwin]> {
+            Requires<[IsARM]> {
     let Inst{31-28} = 0b1110;
     bits<24> func;
     let Inst{23-0} = func;
@@ -1862,7 +1919,7 @@ let isCall = 1,
   def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
                    IIC_Br, "bl", "\t$func",
                    [(ARMcall_pred tglobaladdr:$func)]>,
-                Requires<[IsARM, IsNotDarwin]> {
+                Requires<[IsARM]> {
     bits<24> func;
     let Inst{23-0} = func;
     let DecoderMethod = "DecodeBranchImmInstruction";
@@ -1872,7 +1929,7 @@ let isCall = 1,
   def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
                 IIC_Br, "blx\t$func",
                 [(ARMcall GPR:$func)]>,
-            Requires<[IsARM, HasV5T, IsNotDarwin]> {
+            Requires<[IsARM, HasV5T]> {
     bits<4> func;
     let Inst{31-4} = 0b1110000100101111111111110011;
     let Inst{3-0}  = func;
@@ -1881,7 +1938,7 @@ let isCall = 1,
   def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
                     IIC_Br, "blx", "\t$func",
                     [(ARMcall_pred GPR:$func)]>,
-                 Requires<[IsARM, HasV5T, IsNotDarwin]> {
+                 Requires<[IsARM, HasV5T]> {
     bits<4> func;
     let Inst{27-4} = 0b000100101111111111110011;
     let Inst{3-0}  = func;
@@ -1891,55 +1948,19 @@ let isCall = 1,
   // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
   def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
                    8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
-                   Requires<[IsARM, HasV4T, IsNotDarwin]>;
+                   Requires<[IsARM, HasV4T]>;
 
   // ARMv4
   def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
                    8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
-                   Requires<[IsARM, NoV4T, IsNotDarwin]>;
-}
-
-let isCall = 1,
-  // On Darwin R9 is call-clobbered.
-  // R7 is marked as a use to prevent frame-pointer assignments from being
-  // moved above / below calls.
-  Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
-  Uses = [R7, SP] in {
-  def BLr9  : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
-                4, IIC_Br,
-                [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
-              Requires<[IsARM, IsDarwin]>;
-
-  def BLr9_pred : ARMPseudoExpand<(outs),
-                   (ins bl_target:$func, pred:$p, variable_ops),
-                   4, IIC_Br,
-                   [(ARMcall_pred tglobaladdr:$func)],
-                   (BL_pred bl_target:$func, pred:$p)>,
-                  Requires<[IsARM, IsDarwin]>;
-
-  // ARMv5T and above
-  def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops),
-                4, IIC_Br,
-                [(ARMcall GPR:$func)],
-                (BLX GPR:$func)>,
-               Requires<[IsARM, HasV5T, IsDarwin]>;
-
-  def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops),
-                4, IIC_Br,
-                [(ARMcall_pred GPR:$func)],
-                (BLX_pred GPR:$func, pred:$p)>,
-                   Requires<[IsARM, HasV5T, IsDarwin]>;
+                   Requires<[IsARM, NoV4T]>;
 
-  // ARMv4T
-  // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
-  def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                  8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
-                  Requires<[IsARM, HasV4T, IsDarwin]>;
-
-  // ARMv4
-  def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                  8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
-                  Requires<[IsARM, NoV4T, IsDarwin]>;
+  // mov lr, pc; b if callee is marked noreturn to avoid confusing the
+  // return stack predictor.
+  def BMOVPCB_CALL : ARMPseudoInst<(outs),
+                                   (ins bl_target:$func, variable_ops),
+                               8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
+                      Requires<[IsARM]>;
 }
 
 let isBranch = 1, isTerminator = 1 in {
@@ -2006,47 +2027,22 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
 
 // Tail calls.
 
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
-  // Darwin versions.
-  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
-      Uses = [SP] in {
-    def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsDarwin]>;
-
-    def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsDarwin]>;
-
-    def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
-                   4, IIC_Br, [],
-                   (Bcc br_target:$dst, (ops 14, zero_reg))>,
-                   Requires<[IsARM, IsDarwin]>;
-
-    def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
-                   4, IIC_Br, [],
-                   (BX GPR:$dst)>,
-                   Requires<[IsARM, IsDarwin]>;
-
-  }
-
-  // Non-Darwin versions (the difference is R9).
-  let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
-      Uses = [SP] in {
-    def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsNotDarwin]>;
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
+  def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                              IIC_Br, []>;
 
-    def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsNotDarwin]>;
+  def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                              IIC_Br, []>;
 
-    def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops),
-                   4, IIC_Br, [],
-                   (Bcc br_target:$dst, (ops 14, zero_reg))>,
-                   Requires<[IsARM, IsNotDarwin]>;
+  def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
+                                 4, IIC_Br, [],
+                                 (Bcc br_target:$dst, (ops 14, zero_reg))>,
+                                 Requires<[IsARM]>;
 
-    def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
-                     4, IIC_Br, [],
-                     (BX GPR:$dst)>,
-                     Requires<[IsARM, IsNotDarwin]>;
-  }
+  def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                                 4, IIC_Br, [],
+                                 (BX GPR:$dst)>,
+                                 Requires<[IsARM]>;
 }
 
 // Secure Monitor Call is a system instruction.
@@ -2145,7 +2141,7 @@ def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> {
 }
 
 //===----------------------------------------------------------------------===//
-//  Load / store Instructions.
+//  Load / Store Instructions.
 //
 
 // Load
@@ -2197,9 +2193,10 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
 }
 
 // Indexed loads
-multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
+multiclass AI2_ldridx<bit isByte, string opc,
+                      InstrItinClass iii, InstrItinClass iir> {
   def _PRE_IMM  : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
-                      (ins addrmode_imm12:$addr), IndexModePre, LdFrm, itin,
+                      (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii,
                       opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<17> addr;
     let Inst{25} = 0;
@@ -2211,7 +2208,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
   }
 
   def _PRE_REG  : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
-                      (ins ldst_so_reg:$addr), IndexModePre, LdFrm, itin,
+                      (ins ldst_so_reg:$addr), IndexModePre, LdFrm, iir,
                       opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<17> addr;
     let Inst{25} = 1;
@@ -2225,7 +2222,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
 
   def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
                        (ins addr_offset_none:$addr, am2offset_reg:$offset),
-                       IndexModePost, LdFrm, itin,
+                       IndexModePost, LdFrm, iir,
                        opc, "\t$Rt, $addr, $offset",
                        "$addr.base = $Rn_wb", []> {
      // {12}     isAdd
@@ -2242,7 +2239,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
 
    def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
                        (ins addr_offset_none:$addr, am2offset_imm:$offset),
-                      IndexModePost, LdFrm, itin,
+                      IndexModePost, LdFrm, iii,
                       opc, "\t$Rt, $addr, $offset",
                       "$addr.base = $Rn_wb", []> {
     // {12}     isAdd
@@ -2260,8 +2257,10 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
 }
 
 let mayLoad = 1, neverHasSideEffects = 1 in {
-defm LDR  : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
-defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
+// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or
+// IIC_iLoad_siu depending on whether it the offset register is shifted.
+defm LDR  : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>;
 }
 
 multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> {
@@ -2416,7 +2415,7 @@ multiclass AI3ldrT<bits<4> op, string opc> {
     let Inst{3-0} = offset{3-0};
     let AsmMatchConverter = "cvtLdExtTWriteBackImm";
   }
-  def r : AI3ldstidxT<op, 1, (outs GPR:$Rt, GPR:$base_wb),
+  def r : AI3ldstidxT<op, 1, (outs GPRnopc:$Rt, GPRnopc:$base_wb),
                       (ins addr_offset_none:$addr, postidx_reg:$Rm),
                       IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, opc,
                       "\t$Rt, $addr, $Rm", "$addr.base = $base_wb", []> {
@@ -2424,8 +2423,10 @@ multiclass AI3ldrT<bits<4> op, string opc> {
     let Inst{23} = Rm{4};
     let Inst{22} = 0;
     let Inst{11-8} = 0;
+    let Unpredictable{11-8} = 0b1111;
     let Inst{3-0} = Rm{3-0};
     let AsmMatchConverter = "cvtLdExtTWriteBackReg";
+    let DecoderMethod = "DecodeLDR";
   }
 }
 
@@ -2451,10 +2452,11 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr),
 }
 
 // Indexed stores
-multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
+multiclass AI2_stridx<bit isByte, string opc,
+                      InstrItinClass iii, InstrItinClass iir> {
   def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
                             (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre,
-                            StFrm, itin,
+                            StFrm, iii,
                             opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<17> addr;
     let Inst{25} = 0;
@@ -2467,7 +2469,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
 
   def _PRE_REG  : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb),
                       (ins GPR:$Rt, ldst_so_reg:$addr),
-                      IndexModePre, StFrm, itin,
+                      IndexModePre, StFrm, iir,
                       opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
     bits<17> addr;
     let Inst{25} = 1;
@@ -2480,7 +2482,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
   }
   def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
                 (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
-                IndexModePost, StFrm, itin,
+                IndexModePost, StFrm, iir,
                 opc, "\t$Rt, $addr, $offset",
                 "$addr.base = $Rn_wb", []> {
      // {12}     isAdd
@@ -2497,7 +2499,7 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
 
    def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb),
                 (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset),
-                IndexModePost, StFrm, itin,
+                IndexModePost, StFrm, iii,
                 opc, "\t$Rt, $addr, $offset",
                 "$addr.base = $Rn_wb", []> {
     // {12}     isAdd
@@ -2514,8 +2516,10 @@ multiclass AI2_stridx<bit isByte, string opc, InstrItinClass itin> {
 }
 
 let mayStore = 1, neverHasSideEffects = 1 in {
-defm STR  : AI2_stridx<0, "str", IIC_iStore_ru>;
-defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_ru>;
+// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or
+// IIC_iStore_siu depending on whether it the offset register is shifted.
+defm STR  : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>;
+defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_iu, IIC_iStore_bh_ru>;
 }
 
 def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr,
@@ -2745,23 +2749,25 @@ defm STRHT : AI3strT<0b1011, "strht">;
 //  Load / store multiple Instructions.
 //
 
-multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f,
                          InstrItinClass itin, InstrItinClass itin_upd> {
   // IA is the default, so no need for an explicit suffix on the
   // mnemonic here. Without it is the cannonical spelling.
   def IA :
     AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeNone, f, itin,
-         !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
+         !strconcat(asm, "${p}\t$Rn, $regs", sfx), "", []> {
     let Inst{24-23} = 0b01;       // Increment After
+    let Inst{22}    = P_bit;
     let Inst{21}    = 0;          // No writeback
     let Inst{20}    = L_bit;
   }
   def IA_UPD :
     AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeUpd, f, itin_upd,
-         !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+         !strconcat(asm, "${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
     let Inst{24-23} = 0b01;       // Increment After
+    let Inst{22}    = P_bit;
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
 
@@ -2770,16 +2776,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
   def DA :
     AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeNone, f, itin,
-         !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+         !strconcat(asm, "da${p}\t$Rn, $regs", sfx), "", []> {
     let Inst{24-23} = 0b00;       // Decrement After
+    let Inst{22}    = P_bit;
     let Inst{21}    = 0;          // No writeback
     let Inst{20}    = L_bit;
   }
   def DA_UPD :
     AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeUpd, f, itin_upd,
-         !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+         !strconcat(asm, "da${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
     let Inst{24-23} = 0b00;       // Decrement After
+    let Inst{22}    = P_bit;
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
 
@@ -2788,16 +2796,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
   def DB :
     AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeNone, f, itin,
-         !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+         !strconcat(asm, "db${p}\t$Rn, $regs", sfx), "", []> {
     let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{22}    = P_bit;
     let Inst{21}    = 0;          // No writeback
     let Inst{20}    = L_bit;
   }
   def DB_UPD :
     AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeUpd, f, itin_upd,
-         !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+         !strconcat(asm, "db${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
     let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{22}    = P_bit;
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
 
@@ -2806,16 +2816,18 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
   def IB :
     AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeNone, f, itin,
-         !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+         !strconcat(asm, "ib${p}\t$Rn, $regs", sfx), "", []> {
     let Inst{24-23} = 0b11;       // Increment Before
+    let Inst{22}    = P_bit;
     let Inst{21}    = 0;          // No writeback
     let Inst{20}    = L_bit;
   }
   def IB_UPD :
     AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeUpd, f, itin_upd,
-         !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+         !strconcat(asm, "ib${p}\t$Rn!, $regs", sfx), "$Rn = $wb", []> {
     let Inst{24-23} = 0b11;       // Increment Before
+    let Inst{22}    = P_bit;
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
 
@@ -2826,10 +2838,12 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
 let neverHasSideEffects = 1 in {
 
 let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
-defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m,
+                         IIC_iLoad_mu>;
 
 let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
-defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m,
+                         IIC_iStore_mu>;
 
 } // neverHasSideEffects
 
@@ -2843,6 +2857,16 @@ def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
                      (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
       RegConstraint<"$Rn = $wb">;
 
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm sysLDM : arm_ldst_mult<"ldm", " ^", 1, 1, LdStMulFrm, IIC_iLoad_m,
+                               IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m,
+                               IIC_iStore_mu>;
+
+
+
 //===----------------------------------------------------------------------===//
 //  Move Instructions.
 //
@@ -2860,7 +2884,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
   let Inst{15-12} = Rd;
 }
 
-def : ARMInstAlias<"movs${p} $Rd, $Rm", 
+def : ARMInstAlias<"movs${p} $Rd, $Rm",
                    (MOVr GPR:$Rd, GPR:$Rm, pred:$p, CPSR)>;
 
 // A version for the smaller set of tail call registers.
@@ -3080,20 +3104,18 @@ defm SUB  : AsI1_bin_irs<0b0010, "sub",
 
 // ADD and SUB with 's' bit set.
 //
-// Currently, t2ADDS/t2SUBS are pseudo opcodes that exist only in the
-// selection DAG. They are "lowered" to real t2ADD/t2SUB opcodes by
+// Currently, ADDS/SUBS are pseudo opcodes that exist only in the
+// selection DAG. They are "lowered" to real ADD/SUB opcodes by
 // AdjustInstrPostInstrSelection where we determine whether or not to
 // set the "s" bit based on CPSR liveness.
 //
-// FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
+// FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen
 // support for an optional CPSR definition that corresponds to the DAG
 // node's second value. We can then eliminate the implicit def of CPSR.
-defm ADDS : AsI1_bin_s_irs<0b0100, "add",
-                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
-                          BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
-defm SUBS : AsI1_bin_s_irs<0b0010, "sub",
-                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
-                          BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                           BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                           BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
 
 defm ADC : AI1_adde_sube_irs<0b0101, "adc",
                   BinOpWithFlagFrag<(ARMadde node:$LHS, node:$RHS, node:$FLAG)>,
@@ -3108,9 +3130,8 @@ defm RSB  : AsI1_rbin_irs <0b0011, "rsb",
 
 // FIXME: Eliminate them if we can write def : Pat patterns which defines
 // CPSR and the implicit def of CPSR is not needed.
-defm RSBS : AsI1_rbin_s_is<0b0011, "rsb",
-                         IIC_iALUi, IIC_iALUr, IIC_iALUsr,
-                         BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm RSBS : AsI1_rbin_s_is<IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                           BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
 
 defm RSC : AI1_rsc_irs<0b0111, "rsc",
                   BinOpWithFlagFrag<(ARMsube node:$LHS, node:$RHS, node:$FLAG)>,
@@ -3153,6 +3174,8 @@ class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
   let Inst{19-16} = Rn;
   let Inst{15-12} = Rd;
   let Inst{3-0}   = Rm;
+  
+  let Unpredictable{11-8} = 0b1111;
 }
 
 // Saturating add/subtract
@@ -3445,19 +3468,20 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
 //        property. Remove them when it's possible to add those properties
 //        on an individual MachineInstr, not just an instuction description.
 let isCommutable = 1 in {
-def MUL  : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+def MUL  : AsMul1I32<0b0000000, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm),
                    IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
-                   [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+                   [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>,
                    Requires<[IsARM, HasV6]> {
   let Inst{15-12} = 0b0000;
+  let Unpredictable{15-12} = 0b1111;
 }
 
 let Constraints = "@earlyclobber $Rd" in
-def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+def MULv5: ARMPseudoExpand<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm,
                                             pred:$p, cc_out:$s),
                           4, IIC_iMUL32,
-                         [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))],
-                         (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                         [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))],
+                         (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>,
                         Requires<[IsARM, NoV6]>;
 }
 
@@ -3952,10 +3976,13 @@ def BCCZi64 : PseudoInst<(outs),
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
+
+let isCommutable = 1 in
 def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
                            4, IIC_iCMOVr,
   [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
       RegConstraint<"$false = $Rd">;
+
 def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd),
                            (ins GPR:$false, so_reg_imm:$shift, pred:$p),
                            4, IIC_iCMOVsr,
@@ -3996,8 +4023,44 @@ def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
                            4, IIC_iCMOVi,
  [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $Rd">;
+
+// Conditional instructions
+multiclass AsI1_bincc_irs<Instruction iri, Instruction irr, Instruction irsi,
+                          Instruction irsr,
+                          InstrItinClass iii, InstrItinClass iir,
+                          InstrItinClass iis> {
+  def ri  : ARMPseudoExpand<(outs GPR:$Rd),
+                            (ins GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s),
+                            4, iii, [],
+                       (iri GPR:$Rd, GPR:$Rn, so_imm:$imm, pred:$p, cc_out:$s)>,
+                            RegConstraint<"$Rn = $Rd">;
+  def rr  : ARMPseudoExpand<(outs GPR:$Rd),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                            4, iir, [],
+                           (irr GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                            RegConstraint<"$Rn = $Rd">;
+  def rsi : ARMPseudoExpand<(outs GPR:$Rd),
+                           (ins GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s),
+                            4, iis, [],
+                (irsi GPR:$Rd, GPR:$Rn, so_reg_imm:$shift, pred:$p, cc_out:$s)>,
+                            RegConstraint<"$Rn = $Rd">;
+  def rsr : ARMPseudoExpand<(outs GPRnopc:$Rd),
+                       (ins GPRnopc:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s),
+                            4, iis, [],
+                (irsr GPR:$Rd, GPR:$Rn, so_reg_reg:$shift, pred:$p, cc_out:$s)>,
+                            RegConstraint<"$Rn = $Rd">;
+}
+
+defm ANDCC : AsI1_bincc_irs<ANDri, ANDrr, ANDrsi, ANDrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm ORRCC : AsI1_bincc_irs<ORRri, ORRrr, ORRrsi, ORRrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+defm EORCC : AsI1_bincc_irs<EORri, EORrr, EORrsi, EORrsr,
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsr>;
+
 } // neverHasSideEffects
 
+
 //===----------------------------------------------------------------------===//
 // Atomic operations intrinsics
 //
@@ -4076,10 +4139,10 @@ let usesCustomInserter = 1 in {
       [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
     def ATOMIC_LOAD_UMIN_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
-      [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>;
+      [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>;
     def ATOMIC_LOAD_UMAX_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
-      [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>;
+      [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>;
     def ATOMIC_LOAD_ADD_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
       [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
@@ -4106,10 +4169,10 @@ let usesCustomInserter = 1 in {
       [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
     def ATOMIC_LOAD_UMIN_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
-      [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>;
+      [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>;
     def ATOMIC_LOAD_UMAX_I16 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
-      [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>;
+      [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>;
     def ATOMIC_LOAD_ADD_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
       [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
@@ -4136,10 +4199,10 @@ let usesCustomInserter = 1 in {
       [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
     def ATOMIC_LOAD_UMIN_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
-      [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>;
+      [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>;
     def ATOMIC_LOAD_UMAX_I32 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary,
-      [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>;
+      [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>;
 
     def ATOMIC_SWAP_I8 : PseudoInst<
       (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
@@ -4185,14 +4248,14 @@ def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
                     NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>;
 def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr),
                     NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>;
-}
-
-let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+let hasExtraSrcRegAllocReq = 1 in
 def STREXD : AIstrex<0b01, (outs GPR:$Rd),
                     (ins GPR:$Rt, GPR:$Rt2, addr_offset_none:$addr),
                     NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []> {
   let DecoderMethod = "DecodeDoubleRegStore";
 }
+}
+
 
 def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>,
             Requires<[IsARM, HasV7]>  {
@@ -4451,10 +4514,16 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
                          c_imm:$CRm, imm0_7:$opc2),
                     [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                                   imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
+                   (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                        c_imm:$CRm, 0, pred:$p)>;
 def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
                     (outs GPR:$Rt),
                     (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
                          imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
+                   (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+                        c_imm:$CRm, 0, pred:$p)>;
 
 def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
              (MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -4488,10 +4557,16 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
                            c_imm:$CRm, imm0_7:$opc2),
                       [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                                      imm:$CRm, imm:$opc2)]>;
+def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
+                   (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                         c_imm:$CRm, 0)>;
 def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
                       (outs GPR:$Rt),
                       (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
                            imm0_7:$opc2), []>;
+def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
+                   (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+                         c_imm:$CRm, 0)>;
 
 def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
                               imm:$CRm, imm:$opc2),
@@ -4635,7 +4710,8 @@ let isCall = 1,
 // no encoding information is necessary.
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR,
-    QQQQ0, QQQQ1, QQQQ2, QQQQ3 ], hasSideEffects = 1, isBarrier = 1 in {
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
+  hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
   def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
                                NoItinerary,
                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
@@ -4644,31 +4720,37 @@ let Defs =
 
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR ],
-  hasSideEffects = 1, isBarrier = 1 in {
+  hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
   def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
                                    NoItinerary,
                          [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
                                 Requires<[IsARM, NoVFP]>;
 }
 
-// FIXME: Non-Darwin version(s)
+// FIXME: Non-IOS version(s)
 let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
     Defs = [ R7, LR, SP ] in {
 def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
                              NoItinerary,
                          [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
-                                Requires<[IsARM, IsDarwin]>;
+                                Requires<[IsARM, IsIOS]>;
 }
 
-// eh.sjlj.dispatchsetup pseudo-instruction.
-// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// eh.sjlj.dispatchsetup pseudo-instructions.
+// These pseudos are used for both ARM and Thumb2. Any differences are
 // handled when the pseudo is expanded (which happens before any passes
 // that need the instruction size).
-let isBarrier = 1, hasSideEffects = 1 in
-def Int_eh_sjlj_dispatchsetup :
- PseudoInst<(outs), (ins GPR:$src), NoItinerary,
-            [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
-              Requires<[IsDarwin]>;
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15 ],
+  isBarrier = 1 in
+def Int_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
+
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR ],
+  isBarrier = 1 in
+def Int_eh_sjlj_dispatchsetup_nofp : PseudoInst<(outs), (ins), NoItinerary, []>;
+
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
@@ -4725,30 +4807,15 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
 
 // TODO: add,sub,and, 3-instr forms?
 
-// Tail calls
-def : ARMPat<(ARMtcret tcGPR:$dst),
-          (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
-          (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
-          (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
-
-def : ARMPat<(ARMtcret tcGPR:$dst),
-          (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
-          (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
-
-def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
-          (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+// Tail calls. These patterns also apply to Thumb mode.
+def : Pat<(ARMtcret tcGPR:$dst), (TCRETURNri tcGPR:$dst)>;
+def : Pat<(ARMtcret (i32 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>;
+def : Pat<(ARMtcret (i32 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>;
 
 // Direct calls
-def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
-      Requires<[IsARM, IsNotDarwin]>;
-def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
-      Requires<[IsARM, IsDarwin]>;
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>;
+def : ARMPat<(ARMcall_nolink texternalsym:$func),
+             (BMOVPCB_CALL texternalsym:$func)>;
 
 // zextload i1 -> zextload i8
 def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
@@ -4992,13 +5059,113 @@ def : MnemonicAlias<"uqsubaddx", "uqsax">;
 // USAX == USUBADDX
 def : MnemonicAlias<"usubaddx", "usax">;
 
-// LDRSBT/LDRHT/LDRSHT post-index offset if optional.
-// Note that the write-back output register is a dummy operand for MC (it's
-// only meaningful for codegen), so we just pass zero here.
-// FIXME: tblgen not cooperating with argument conversions.
-//def : InstAlias<"ldrsbt${p} $Rt, $addr",
-//                (LDRSBTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0,pred:$p)>;
-//def : InstAlias<"ldrht${p} $Rt, $addr",
-//                (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
-//def : InstAlias<"ldrsht${p} $Rt, $addr",
-//                (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>;
+// "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like
+// for isel.
+def : ARMInstAlias<"mov${s}${p} $Rd, $imm",
+                   (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
+                   (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>;
+// Same for AND <--> BIC
+def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+                   (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+                          pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
+                   (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+                          pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
+                   (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+                          pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
+                   (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+                          pred:$p, cc_out:$s)>;
+
+// Likewise, "add Rd, so_imm_neg" -> sub
+def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm",
+                 (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"add${s}${p} $Rd, $imm",
+                 (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via so_imm_neg
+def : ARMInstAlias<"cmp${p} $Rd, $imm",
+                   (CMNzri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+def : ARMInstAlias<"cmn${p} $Rd, $imm",
+                   (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>;
+
+// The shifter forms of the MOV instruction are aliased to the ASR, LSL,
+// LSR, ROR, and RRX instructions.
+// FIXME: We need C++ parser hooks to map the alias to the MOV
+//        encoding. It seems we should be able to do that sort of thing
+//        in tblgen, but it could get ugly.
+def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm",
+                        (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
+                             cc_out:$s)>;
+def LSRi : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rm, $imm",
+                        (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p,
+                             cc_out:$s)>;
+def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm",
+                        (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
+                             cc_out:$s)>;
+def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm",
+                        (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p,
+                             cc_out:$s)>;
+def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm",
+                        (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>;
+def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm",
+                        (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+                             cc_out:$s)>;
+def LSRr : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rn, $Rm",
+                        (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+                             cc_out:$s)>;
+def LSLr : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rn, $Rm",
+                        (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+                             cc_out:$s)>;
+def RORr : ARMAsmPseudo<"ror${s}${p} $Rd, $Rn, $Rm",
+                        (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+                             cc_out:$s)>;
+// shifter instructions also support a two-operand form.
+def : ARMInstAlias<"asr${s}${p} $Rm, $imm",
+                   (ASRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"lsr${s}${p} $Rm, $imm",
+                   (LSRi GPR:$Rm, GPR:$Rm, imm0_32:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"lsl${s}${p} $Rm, $imm",
+                   (LSLi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"ror${s}${p} $Rm, $imm",
+                   (RORi GPR:$Rm, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>;
+def : ARMInstAlias<"asr${s}${p} $Rn, $Rm",
+                   (ASRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+                         cc_out:$s)>;
+def : ARMInstAlias<"lsr${s}${p} $Rn, $Rm",
+                   (LSRr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+                         cc_out:$s)>;
+def : ARMInstAlias<"lsl${s}${p} $Rn, $Rm",
+                   (LSLr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+                         cc_out:$s)>;
+def : ARMInstAlias<"ror${s}${p} $Rn, $Rm",
+                   (RORr GPRnopc:$Rn, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p,
+                         cc_out:$s)>;
+
+
+// 'mul' instruction can be specified with only two operands.
+def : ARMInstAlias<"mul${s}${p} $Rn, $Rm",
+                   (MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p, cc_out:$s)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : ARMInstAlias<"neg${s}${p} $Rd, $Rm",
+                   (RSBri GPR:$Rd, GPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// Pre-v6, 'mov r0, r0' was used as a NOP encoding.
+def : InstAlias<"nop${p}", (MOVr R0, R0, pred:$p, zero_reg)>,
+         Requires<[IsARM, NoV6]>;
+
+// UMULL/SMULL are available on all arches, but the instruction definitions
+// need difference constraints pre-v6. Use these aliases for the assembly
+// parsing on pre-v6.
+def : InstAlias<"smull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+            (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+         Requires<[IsARM, NoV6]>;
+def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
+            (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+         Requires<[IsARM, NoV6]>;
+
+// 'it' blocks in ARM mode just validate the predicates. The IT itself
+// is discarded.
+def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 7aad18695bb3..c7219a60f6c3 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -1,4 +1,4 @@
-//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
+//===-- ARMInstrNEON.td - NEON support for ARM -------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,6 +15,45 @@
 //===----------------------------------------------------------------------===//
 // NEON-specific Operands.
 //===----------------------------------------------------------------------===//
+def nModImm : Operand<i32> {
+  let PrintMethod = "printNEONModImmOperand";
+}
+
+def nImmSplatI8AsmOperand : AsmOperandClass { let Name = "NEONi8splat"; }
+def nImmSplatI8 : Operand<i32> {
+  let PrintMethod = "printNEONModImmOperand";
+  let ParserMatchClass = nImmSplatI8AsmOperand;
+}
+def nImmSplatI16AsmOperand : AsmOperandClass { let Name = "NEONi16splat"; }
+def nImmSplatI16 : Operand<i32> {
+  let PrintMethod = "printNEONModImmOperand";
+  let ParserMatchClass = nImmSplatI16AsmOperand;
+}
+def nImmSplatI32AsmOperand : AsmOperandClass { let Name = "NEONi32splat"; }
+def nImmSplatI32 : Operand<i32> {
+  let PrintMethod = "printNEONModImmOperand";
+  let ParserMatchClass = nImmSplatI32AsmOperand;
+}
+def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; }
+def nImmVMOVI32 : Operand<i32> {
+  let PrintMethod = "printNEONModImmOperand";
+  let ParserMatchClass = nImmVMOVI32AsmOperand;
+}
+def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
+def nImmVMOVI32Neg : Operand<i32> {
+  let PrintMethod = "printNEONModImmOperand";
+  let ParserMatchClass = nImmVMOVI32NegAsmOperand;
+}
+def nImmVMOVF32 : Operand<i32> {
+  let PrintMethod = "printFPImmOperand";
+  let ParserMatchClass = FPImmOperand;
+}
+def nImmSplatI64AsmOperand : AsmOperandClass { let Name = "NEONi64splat"; }
+def nImmSplatI64 : Operand<i32> {
+  let PrintMethod = "printNEONModImmOperand";
+  let ParserMatchClass = nImmSplatI64AsmOperand;
+}
+
 def VectorIndex8Operand  : AsmOperandClass { let Name = "VectorIndex8"; }
 def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; }
 def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; }
@@ -40,6 +79,326 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{
   let MIOperandInfo = (ops i32imm);
 }
 
+// Register list of one D register.
+def VecListOneDAsmOperand : AsmOperandClass {
+  let Name = "VecListOneD";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListOneD : RegisterOperand<DPR, "printVectorListOne"> {
+  let ParserMatchClass = VecListOneDAsmOperand;
+}
+// Register list of two sequential D registers.
+def VecListDPairAsmOperand : AsmOperandClass {
+  let Name = "VecListDPair";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListDPair : RegisterOperand<DPair, "printVectorListTwo"> {
+  let ParserMatchClass = VecListDPairAsmOperand;
+}
+// Register list of three sequential D registers.
+def VecListThreeDAsmOperand : AsmOperandClass {
+  let Name = "VecListThreeD";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListThreeD : RegisterOperand<DPR, "printVectorListThree"> {
+  let ParserMatchClass = VecListThreeDAsmOperand;
+}
+// Register list of four sequential D registers.
+def VecListFourDAsmOperand : AsmOperandClass {
+  let Name = "VecListFourD";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListFourD : RegisterOperand<DPR, "printVectorListFour"> {
+  let ParserMatchClass = VecListFourDAsmOperand;
+}
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListDPairSpacedAsmOperand : AsmOperandClass {
+  let Name = "VecListDPairSpaced";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListDPairSpaced : RegisterOperand<DPair, "printVectorListTwoSpaced"> {
+  let ParserMatchClass = VecListDPairSpacedAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three Q registers).
+def VecListThreeQAsmOperand : AsmOperandClass {
+  let Name = "VecListThreeQ";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListThreeQ : RegisterOperand<DPR, "printVectorListThreeSpaced"> {
+  let ParserMatchClass = VecListThreeQAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three Q registers).
+def VecListFourQAsmOperand : AsmOperandClass {
+  let Name = "VecListFourQ";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListFourQ : RegisterOperand<DPR, "printVectorListFourSpaced"> {
+  let ParserMatchClass = VecListFourQAsmOperand;
+}
+
+// Register list of one D register, with "all lanes" subscripting.
+def VecListOneDAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListOneDAllLanes";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListOneDAllLanes : RegisterOperand<DPR, "printVectorListOneAllLanes"> {
+  let ParserMatchClass = VecListOneDAllLanesAsmOperand;
+}
+// Register list of two D registers, with "all lanes" subscripting.
+def VecListDPairAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListDPairAllLanes";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListDPairAllLanes : RegisterOperand<DPair,
+                                           "printVectorListTwoAllLanes"> {
+  let ParserMatchClass = VecListDPairAllLanesAsmOperand;
+}
+// Register list of two D registers spaced by 2 (two sequential Q registers).
+def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListDPairSpacedAllLanes";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
+                                         "printVectorListTwoSpacedAllLanes"> {
+  let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
+}
+// Register list of three D registers, with "all lanes" subscripting.
+def VecListThreeDAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListThreeDAllLanes";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListThreeDAllLanes : RegisterOperand<DPR,
+                                            "printVectorListThreeAllLanes"> {
+  let ParserMatchClass = VecListThreeDAllLanesAsmOperand;
+}
+// Register list of three D registers spaced by 2 (three sequential Q regs).
+def VecListThreeQAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListThreeQAllLanes";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListThreeQAllLanes : RegisterOperand<DPR,
+                                         "printVectorListThreeSpacedAllLanes"> {
+  let ParserMatchClass = VecListThreeQAllLanesAsmOperand;
+}
+// Register list of four D registers, with "all lanes" subscripting.
+def VecListFourDAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListFourDAllLanes";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListFourDAllLanes : RegisterOperand<DPR, "printVectorListFourAllLanes"> {
+  let ParserMatchClass = VecListFourDAllLanesAsmOperand;
+}
+// Register list of four D registers spaced by 2 (four sequential Q regs).
+def VecListFourQAllLanesAsmOperand : AsmOperandClass {
+  let Name = "VecListFourQAllLanes";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListOperands";
+}
+def VecListFourQAllLanes : RegisterOperand<DPR,
+                                         "printVectorListFourSpacedAllLanes"> {
+  let ParserMatchClass = VecListFourQAllLanesAsmOperand;
+}
+
+
+// Register list of one D register, with byte lane subscripting.
+def VecListOneDByteIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListOneDByteIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDByteIndexed : Operand<i32> {
+  let ParserMatchClass = VecListOneDByteIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListOneDHWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListOneDHWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDHWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListOneDHWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListOneDWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListOneDWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListOneDWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListOneDWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+// Register list of two D registers with byte lane subscripting.
+def VecListTwoDByteIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoDByteIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDByteIndexed : Operand<i32> {
+  let ParserMatchClass = VecListTwoDByteIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListTwoDHWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoDHWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDHWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListTwoDHWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoDWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoDWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoDWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListTwoDWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of two Q registers with half-word lane subscripting.
+def VecListTwoQHWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoQHWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoQHWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListTwoQHWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListTwoQWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListTwoQWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListTwoQWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListTwoQWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+
+// Register list of three D registers with byte lane subscripting.
+def VecListThreeDByteIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListThreeDByteIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDByteIndexed : Operand<i32> {
+  let ParserMatchClass = VecListThreeDByteIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListThreeDHWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListThreeDHWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDHWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListThreeDHWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListThreeDWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListThreeDWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeDWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListThreeDWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of three Q registers with half-word lane subscripting.
+def VecListThreeQHWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListThreeQHWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeQHWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListThreeQHWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListThreeQWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListThreeQWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListThreeQWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListThreeQWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+// Register list of four D registers with byte lane subscripting.
+def VecListFourDByteIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListFourDByteIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDByteIndexed : Operand<i32> {
+  let ParserMatchClass = VecListFourDByteIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with half-word lane subscripting.
+def VecListFourDHWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListFourDHWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDHWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListFourDHWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListFourDWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListFourDWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourDWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListFourDWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// Register list of four Q registers with half-word lane subscripting.
+def VecListFourQHWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListFourQHWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourQHWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListFourQHWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+// ...with word lane subscripting.
+def VecListFourQWordIndexAsmOperand : AsmOperandClass {
+  let Name = "VecListFourQWordIndexed";
+  let ParserMethod = "parseVectorList";
+  let RenderMethod = "addVecListIndexedOperands";
+}
+def VecListFourQWordIndexed : Operand<i32> {
+  let ParserMatchClass = VecListFourQWordIndexAsmOperand;
+  let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
+}
+
+
 //===----------------------------------------------------------------------===//
 // NEON-specific DAG Nodes.
 //===----------------------------------------------------------------------===//
@@ -103,6 +462,7 @@ def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
 def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
 def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
 def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+def NEONvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;
 
 def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
                                            SDTCisVT<2, i32>]>;
@@ -164,30 +524,22 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
 }]>;
 
 //===----------------------------------------------------------------------===//
-// NEON operand definitions
-//===----------------------------------------------------------------------===//
-
-def nModImm : Operand<i32> {
-  let PrintMethod = "printNEONModImmOperand";
-}
-
-//===----------------------------------------------------------------------===//
 // NEON load / store instructions
 //===----------------------------------------------------------------------===//
 
 // Use VLDM to load a Q register as a D register pair.
 // This is a pseudo instruction that is expanded to VLDMD after reg alloc.
 def VLDMQIA
-  : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+  : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
                     IIC_fpLoad_m, "",
-                   [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+                   [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
 
 // Use VSTM to store a Q register as a D register pair.
 // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
 def VSTMQIA
-  : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+  : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
                     IIC_fpStore_m, "",
-                   [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+                   [(store (v2f64 DPair:$src), GPR:$Rn)]>;
 
 // Classes for VLD* pseudo-instructions with multi-register operands.
 // These are expanded to real instructions after register allocation.
@@ -197,12 +549,31 @@ class VLDQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
                 (ins addrmode6:$addr, am6offset:$offset), itin,
                 "$addr.addr = $wb">;
+class VLDQWBfixedPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr), itin,
+                "$addr.addr = $wb">;
+class VLDQWBregisterPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, rGPR:$offset), itin,
+                "$addr.addr = $wb">;
+
 class VLDQQPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
 class VLDQQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
                 (ins addrmode6:$addr, am6offset:$offset), itin,
                 "$addr.addr = $wb">;
+class VLDQQWBfixedPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr), itin,
+                "$addr.addr = $wb">;
+class VLDQQWBregisterPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, rGPR:$offset), itin,
+                "$addr.addr = $wb">;
+
+
 class VLDQQQQPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,
                 "$src = $dst">;
@@ -215,17 +586,17 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 
 //   VLD1     : Vector Load (multiple single elements)
 class VLD1D<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
+  : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
           (ins addrmode6:$Rn), IIC_VLD1,
-          "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+          "vld1", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLDInstruction";
 }
 class VLD1Q<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
+  : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
           (ins addrmode6:$Rn), IIC_VLD1x2,
-          "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+          "vld1", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{5-4} = Rn{5-4};
   let DecoderMethod = "DecodeVLDInstruction";
@@ -241,59 +612,82 @@ def  VLD1q16  : VLD1Q<{0,1,?,?}, "16">;
 def  VLD1q32  : VLD1Q<{1,0,?,?}, "32">;
 def  VLD1q64  : VLD1Q<{1,1,?,?}, "64">;
 
-def  VLD1q8Pseudo  : VLDQPseudo<IIC_VLD1x2>;
-def  VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
-def  VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
-def  VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
-
 // ...with address register writeback:
-class VLD1DWB<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
-          "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{4} = Rn{4};
-  let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1DWB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
+                     (ins addrmode6:$Rn), IIC_VLD1u,
+                     "vld1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
+                        "vld1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
-class VLD1QWB<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
-          "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1QWB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
+                    (ins addrmode6:$Rn), IIC_VLD1x2u,
+                     "vld1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+                        "vld1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
 
-def VLD1d8_UPD  : VLD1DWB<{0,0,0,?}, "8">;
-def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
-def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
-def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
-
-def VLD1q8_UPD  : VLD1QWB<{0,0,?,?}, "8">;
-def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
-def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
-def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
-
-def VLD1q8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1x2u>;
-def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
-def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
-def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+defm VLD1d8wb  : VLD1DWB<{0,0,0,?}, "8">;
+defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
+defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
+defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
+defm VLD1q8wb  : VLD1QWB<{0,0,?,?}, "8">;
+defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
+defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
+defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
 
-// ...with 3 registers (some of these are only for the disassembler):
+// ...with 3 registers
 class VLD1D3<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+  : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
           (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
-          "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+          "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLDInstruction";
 }
-class VLD1D3WB<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
-          "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
-  let Inst{4} = Rn{4};
-  let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
+                    (ins addrmode6:$Rn), IIC_VLD1x2u,
+                     "vld1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+                        "vld1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
 
 def VLD1d8T      : VLD1D3<{0,0,0,?}, "8">;
@@ -301,31 +695,40 @@ def VLD1d16T     : VLD1D3<{0,1,0,?}, "16">;
 def VLD1d32T     : VLD1D3<{1,0,0,?}, "32">;
 def VLD1d64T     : VLD1D3<{1,1,0,?}, "64">;
 
-def VLD1d8T_UPD  : VLD1D3WB<{0,0,0,?}, "8">;
-def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
-def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
-def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
+defm VLD1d8Twb  : VLD1D3WB<{0,0,0,?}, "8">;
+defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
+defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
+defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
 
-def VLD1d64TPseudo     : VLDQQPseudo<IIC_VLD1x3>;
-def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
+def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
 
-// ...with 4 registers (some of these are only for the disassembler):
+// ...with 4 registers
 class VLD1D4<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+  : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
           (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
-          "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+          "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{5-4} = Rn{5-4};
   let DecoderMethod = "DecodeVLDInstruction";
 }
-class VLD1D4WB<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b10,0b0010,op7_4,
-          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
-          "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
-          []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
+                    (ins addrmode6:$Rn), IIC_VLD1x2u,
+                     "vld1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+                        "vld1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
 
 def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8">;
@@ -333,91 +736,80 @@ def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16">;
 def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32">;
 def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64">;
 
-def VLD1d8Q_UPD  : VLD1D4WB<{0,0,?,?}, "8">;
-def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
-def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
-def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
+defm VLD1d8Qwb   : VLD1D4WB<{0,0,?,?}, "8">;
+defm VLD1d16Qwb  : VLD1D4WB<{0,1,?,?}, "16">;
+defm VLD1d32Qwb  : VLD1D4WB<{1,0,?,?}, "32">;
+defm VLD1d64Qwb  : VLD1D4WB<{1,1,?,?}, "64">;
 
-def VLD1d64QPseudo     : VLDQQPseudo<IIC_VLD1x4>;
-def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
+def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
 
 //   VLD2     : Vector Load (multiple 2-element structures)
-class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
-          (ins addrmode6:$Rn), IIC_VLD2,
-          "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
-  let Rm = 0b1111;
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2Q<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b10, 0b0011, op7_4,
-          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
-          (ins addrmode6:$Rn), IIC_VLD2x2,
-          "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+           InstrItinClass itin>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
+          (ins addrmode6:$Rn), itin,
+          "vld2", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{5-4} = Rn{5-4};
   let DecoderMethod = "DecodeVLDInstruction";
 }
 
-def  VLD2d8   : VLD2D<0b1000, {0,0,?,?}, "8">;
-def  VLD2d16  : VLD2D<0b1000, {0,1,?,?}, "16">;
-def  VLD2d32  : VLD2D<0b1000, {1,0,?,?}, "32">;
+def  VLD2d8   : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
+def  VLD2d16  : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
+def  VLD2d32  : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
 
-def  VLD2q8   : VLD2Q<{0,0,?,?}, "8">;
-def  VLD2q16  : VLD2Q<{0,1,?,?}, "16">;
-def  VLD2q32  : VLD2Q<{1,0,?,?}, "32">;
-
-def  VLD2d8Pseudo  : VLDQPseudo<IIC_VLD2>;
-def  VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
-def  VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
+def  VLD2q8   : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
+def  VLD2q16  : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
+def  VLD2q32  : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
 
 def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>;
 def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
 def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
 
 // ...with address register writeback:
-class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
-          "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVLDInstruction";
-}
-class VLD2QWB<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b10, 0b0011, op7_4,
-          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
-          "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVLDInstruction";
+multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
+                  RegisterOperand VdTy, InstrItinClass itin> {
+  def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+                     (ins addrmode6:$Rn), itin,
+                     "vld2", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm), itin,
+                        "vld2", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVLDInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
 
-def VLD2d8_UPD  : VLD2DWB<0b1000, {0,0,?,?}, "8">;
-def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
-def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
+defm VLD2d8wb  : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
 
-def VLD2q8_UPD  : VLD2QWB<{0,0,?,?}, "8">;
-def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
-def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
+defm VLD2q8wb  : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
 
-def VLD2d8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
-def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2q8PseudoWB_fixed     : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_fixed    : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
+def VLD2q8PseudoWB_register  : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
+def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
 
-def VLD2q8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
-
-// ...with double-spaced registers (for disassembly only):
-def VLD2b8      : VLD2D<0b1001, {0,0,?,?}, "8">;
-def VLD2b16     : VLD2D<0b1001, {0,1,?,?}, "16">;
-def VLD2b32     : VLD2D<0b1001, {1,0,?,?}, "32">;
-def VLD2b8_UPD  : VLD2DWB<0b1001, {0,0,?,?}, "8">;
-def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
-def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
+// ...with double-spaced registers
+def  VLD2b8    : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>;
+def  VLD2b16   : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>;
+def  VLD2b32   : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>;
+defm VLD2b8wb  : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>;
 
 //   VLD3     : Vector Load (multiple 3-element structures)
 class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -601,12 +993,11 @@ def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
 }
 def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
   let Inst{7-6} = lane{1-0};
-  let Inst{4}   = Rn{4};
+  let Inst{5-4} = Rn{5-4};
 }
 def VLD1LNd32 : VLD1LN32<0b1000, {?,0,?,?}, "32", v2i32, load> {
   let Inst{7} = lane{0};
-  let Inst{5} = Rn{4};
-  let Inst{4} = Rn{4};
+  let Inst{5-4} = Rn{5-4};
 }
 
 def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
@@ -776,7 +1167,7 @@ def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
   let Inst{7-6} = lane{1-0};
 }
 def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
-  let Inst{7}   = lane{0};
+  let Inst{7} = lane{0};
 }
 
 def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>;
@@ -787,7 +1178,7 @@ def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
   let Inst{7-6} = lane{1-0};
 }
 def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
-  let Inst{7}   = lane{0};
+  let Inst{7} = lane{0};
 }
 
 def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
@@ -802,7 +1193,7 @@ class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
           "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
           "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
   let Rm = 0b1111;
-  let Inst{4}   = Rn{4};
+  let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD4LN";
 }
 
@@ -813,7 +1204,7 @@ def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
   let Inst{7-6} = lane{1-0};
 }
 def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
-  let Inst{7}   = lane{0};
+  let Inst{7} = lane{0};
   let Inst{5} = Rn{5};
 }
 
@@ -826,7 +1217,7 @@ def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
   let Inst{7-6} = lane{1-0};
 }
 def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
-  let Inst{7}   = lane{0};
+  let Inst{7} = lane{0};
   let Inst{5} = Rn{5};
 }
 
@@ -854,7 +1245,7 @@ def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
   let Inst{7-6} = lane{1-0};
 }
 def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
-  let Inst{7}   = lane{0};
+  let Inst{7} = lane{0};
   let Inst{5} = Rn{5};
 }
 
@@ -866,7 +1257,7 @@ def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
   let Inst{7-6} = lane{1-0};
 }
 def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
-  let Inst{7}   = lane{0};
+  let Inst{7} = lane{0};
   let Inst{5} = Rn{5};
 }
 
@@ -877,117 +1268,142 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
 
 //   VLD1DUP  : Vector Load (single element to all lanes)
 class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
-  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
-          IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
-          [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
+          (ins addrmode6dup:$Rn),
+          IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
+          [(set VecListOneDAllLanes:$Vd,
+                (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD1DupInstruction";
 }
-class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
-  let Pattern = [(set QPR:$dst,
-                      (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
-}
-
 def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
 def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
 def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
 
-def VLD1DUPq8Pseudo  : VLD1QDUPPseudo<v16i8, extloadi8>;
-def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
-def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
-
 def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
           (VLD1DUPd32 addrmode6:$addr)>;
-def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
-          (VLD1DUPq32Pseudo addrmode6:$addr)>;
-
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 
-class VLD1QDUP<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
           (ins addrmode6dup:$Rn), IIC_VLD1dup,
-          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+          "vld1", Dt, "$Vd, $Rn", "",
+          [(set VecListDPairAllLanes:$Vd,
+                (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD1DupInstruction";
 }
 
-def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8">;
-def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
-def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
 
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+          (VLD1DUPq32 addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
 // ...with address register writeback:
-class VLD1DUPWB<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
-          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
-          "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
-  let Inst{4} = Rn{4};
-  let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+                     (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+                     (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+                     "vld1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLD1DupInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+                        (outs VecListOneDAllLanes:$Vd, GPR:$wb),
+                        (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+                        "vld1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLD1DupInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
-class VLD1QDUPWB<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
-          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
-          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
-  let Inst{4} = Rn{4};
-  let DecoderMethod = "DecodeVLD1DupInstruction";
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
+                     (outs VecListDPairAllLanes:$Vd, GPR:$wb),
+                     (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+                     "vld1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLD1DupInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<1, 0b10, 0b1100, op7_4,
+                        (outs VecListDPairAllLanes:$Vd, GPR:$wb),
+                        (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+                        "vld1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLD1DupInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
 
-def VLD1DUPd8_UPD  : VLD1DUPWB<{0,0,0,0}, "8">;
-def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
-def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
-
-def VLD1DUPq8_UPD  : VLD1QDUPWB<{0,0,1,0}, "8">;
-def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
-def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+defm VLD1DUPd8wb  : VLD1DUPWB<{0,0,0,0}, "8">;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
 
-def VLD1DUPq8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
-def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+defm VLD1DUPq8wb  : VLD1QDUPWB<{0,0,1,0}, "8">;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
 
 //   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
-class VLD2DUP<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
+  : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
           (ins addrmode6dup:$Rn), IIC_VLD2dup,
-          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+          "vld2", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVLD2DupInstruction";
 }
 
-def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8">;
-def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
-def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
-
-def VLD2DUPd8Pseudo  : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
-def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8",  VecListDPairAllLanes>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
 
-// ...with double-spaced registers (not used for codegen):
-def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8">;
-def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
-def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+// ...with double-spaced registers
+def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8",  VecListDPairSpacedAllLanes>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
 
 // ...with address register writeback:
-class VLD2DUPWB<bits<4> op7_4, string Dt>
-  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
-          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
-          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
-  let Inst{4} = Rn{4};
-  let DecoderMethod = "DecodeVLD2DupInstruction";
+multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
+  def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
+                     (outs VdTy:$Vd, GPR:$wb),
+                     (ins addrmode6dup:$Rn), IIC_VLD2dupu,
+                     "vld2", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLD2DupInstruction";
+    let AsmMatchConverter = "cvtVLDwbFixed";
+  }
+  def _register : NLdSt<1, 0b10, 0b1101, op7_4,
+                        (outs VdTy:$Vd, GPR:$wb),
+                        (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu,
+                        "vld2", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVLD2DupInstruction";
+    let AsmMatchConverter = "cvtVLDwbRegister";
+  }
 }
 
-def VLD2DUPd8_UPD  : VLD2DUPWB<{0,0,0,0}, "8">;
-def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
-def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
-
-def VLD2DUPd8x2_UPD  : VLD2DUPWB<{0,0,1,0}, "8">;
-def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
-def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
+defm VLD2DUPd8wb    : VLD2DUPWB<{0,0,0,0}, "8",  VecListDPairAllLanes>;
+defm VLD2DUPd16wb   : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
+defm VLD2DUPd32wb   : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
 
-def VLD2DUPd8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2dupu>;
-def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
-def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+defm VLD2DUPd8x2wb  : VLD2DUPWB<{0,0,1,0}, "8",  VecListDPairSpacedAllLanes>;
+defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
 
 //   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
 class VLD3DUP<bits<4> op7_4, string Dt>
@@ -1008,9 +1424,9 @@ def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
 def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
 
 // ...with double-spaced registers (not used for codegen):
-def VLD3DUPd8x2  : VLD3DUP<{0,0,1,?}, "8">;
-def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
-def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
+def VLD3DUPq8  : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
 
 // ...with address register writeback:
 class VLD3DUPWB<bits<4> op7_4, string Dt>
@@ -1026,9 +1442,9 @@ def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8">;
 def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
 def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
 
-def VLD3DUPd8x2_UPD  : VLD3DUPWB<{0,0,1,0}, "8">;
-def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
-def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+def VLD3DUPq8_UPD  : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
 
 def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>;
 def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
@@ -1054,9 +1470,9 @@ def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
 def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
 
 // ...with double-spaced registers (not used for codegen):
-def VLD4DUPd8x2  : VLD4DUP<{0,0,1,?}, "8">;
-def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
-def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+def VLD4DUPq8  : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
 
 // ...with address register writeback:
 class VLD4DUPWB<bits<4> op7_4, string Dt>
@@ -1073,9 +1489,9 @@ def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
 def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
 def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
 
-def VLD4DUPd8x2_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
-def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
-def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+def VLD4DUPq8_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
 
 def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>;
 def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
@@ -1093,12 +1509,29 @@ class VSTQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs GPR:$wb),
                 (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
                 "$addr.addr = $wb">;
+class VSTQWBfixedPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, QPR:$src), itin,
+                "$addr.addr = $wb">;
+class VSTQWBregisterPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin,
+                "$addr.addr = $wb">;
 class VSTQQPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
 class VSTQQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs GPR:$wb),
                 (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
                 "$addr.addr = $wb">;
+class VSTQQWBfixedPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, QQPR:$src), itin,
+                "$addr.addr = $wb">;
+class VSTQQWBregisterPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, rGPR:$offset, QQPR:$src), itin,
+                "$addr.addr = $wb">;
+
 class VSTQQQQPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
 class VSTQQQQWBPseudo<InstrItinClass itin>
@@ -1108,16 +1541,15 @@ class VSTQQQQWBPseudo<InstrItinClass itin>
 
 //   VST1     : Vector Store (multiple single elements)
 class VST1D<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
-          IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
+          IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVSTInstruction";
 }
 class VST1Q<bits<4> op7_4, string Dt>
-  : NLdSt<0,0b00,0b1010,op7_4, (outs),
-          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
-          "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+  : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
+          IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{5-4} = Rn{5-4};
   let DecoderMethod = "DecodeVSTInstruction";
@@ -1133,185 +1565,233 @@ def  VST1q16  : VST1Q<{0,1,?,?}, "16">;
 def  VST1q32  : VST1Q<{1,0,?,?}, "32">;
 def  VST1q64  : VST1Q<{1,1,?,?}, "64">;
 
-def  VST1q8Pseudo  : VSTQPseudo<IIC_VST1x2>;
-def  VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
-def  VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
-def  VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
-
 // ...with address register writeback:
-class VST1DWB<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
-          "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
-  let Inst{4} = Rn{4};
-  let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1DWB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
+                     (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u,
+                     "vst1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbFixed";
+  }
+  def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
+                        IIC_VLD1u,
+                        "vst1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{4} = Rn{4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbRegister";
+  }
 }
-class VST1QWB<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
-          IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1QWB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
+                    (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
+                     "vst1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbFixed";
+  }
+  def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
+                        IIC_VLD1x2u,
+                        "vst1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbRegister";
+  }
 }
 
-def VST1d8_UPD  : VST1DWB<{0,0,0,?}, "8">;
-def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
-def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
-def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
-
-def VST1q8_UPD  : VST1QWB<{0,0,?,?}, "8">;
-def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
-def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
-def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
+defm VST1d8wb  : VST1DWB<{0,0,0,?}, "8">;
+defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">;
+defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">;
+defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">;
 
-def VST1q8Pseudo_UPD  : VSTQWBPseudo<IIC_VST1x2u>;
-def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
-def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
-def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+defm VST1q8wb  : VST1QWB<{0,0,?,?}, "8">;
+defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
+defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
+defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
 
-// ...with 3 registers (some of these are only for the disassembler):
+// ...with 3 registers
 class VST1D3<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
-          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
-          IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+          (ins addrmode6:$Rn, VecListThreeD:$Vd),
+          IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{4} = Rn{4};
   let DecoderMethod = "DecodeVSTInstruction";
 }
-class VST1D3WB<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm,
-           DPR:$Vd, DPR:$src2, DPR:$src3),
-          IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{4} = Rn{4};
-  let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D3WB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+                    (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+                     "vst1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbFixed";
+  }
+  def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+                        IIC_VLD1x3u,
+                        "vst1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbRegister";
+  }
 }
 
-def VST1d8T      : VST1D3<{0,0,0,?}, "8">;
-def VST1d16T     : VST1D3<{0,1,0,?}, "16">;
-def VST1d32T     : VST1D3<{1,0,0,?}, "32">;
-def VST1d64T     : VST1D3<{1,1,0,?}, "64">;
+def VST1d8T     : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T    : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T    : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T    : VST1D3<{1,1,0,?}, "64">;
 
-def VST1d8T_UPD  : VST1D3WB<{0,0,0,?}, "8">;
-def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
-def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
-def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+defm VST1d8Twb  : VST1D3WB<{0,0,0,?}, "8">;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
 
-def VST1d64TPseudo     : VSTQQPseudo<IIC_VST1x3>;
-def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudo            : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudoWB_fixed    : VSTQQWBPseudo<IIC_VST1x3u>;
+def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
 
-// ...with 4 registers (some of these are only for the disassembler):
+// ...with 4 registers
 class VST1D4<bits<4> op7_4, string Dt>
   : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
-          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+          (ins addrmode6:$Rn, VecListFourD:$Vd),
+          IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
           []> {
   let Rm = 0b1111;
   let Inst{5-4} = Rn{5-4};
   let DecoderMethod = "DecodeVSTInstruction";
 }
-class VST1D4WB<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm,
-           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
-          "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST1D4WB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+                    (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+                     "vst1", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbFixed";
+  }
+  def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+                        IIC_VLD1x4u,
+                        "vst1", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbRegister";
+  }
 }
 
-def VST1d8Q      : VST1D4<{0,0,?,?}, "8">;
-def VST1d16Q     : VST1D4<{0,1,?,?}, "16">;
-def VST1d32Q     : VST1D4<{1,0,?,?}, "32">;
-def VST1d64Q     : VST1D4<{1,1,?,?}, "64">;
+def VST1d8Q     : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q    : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q    : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q    : VST1D4<{1,1,?,?}, "64">;
 
-def VST1d8Q_UPD  : VST1D4WB<{0,0,?,?}, "8">;
-def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
-def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
-def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+defm VST1d8Qwb  : VST1D4WB<{0,0,?,?}, "8">;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
 
-def VST1d64QPseudo     : VSTQQPseudo<IIC_VST1x4>;
-def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudo            : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudoWB_fixed    : VSTQQWBPseudo<IIC_VST1x4u>;
+def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
 
 //   VST2     : Vector Store (multiple 2-element structures)
-class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
-          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
-          IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
-  let Rm = 0b1111;
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVSTInstruction";
-}
-class VST2Q<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
-          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
-          IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
-          "", []> {
+class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
+            InstrItinClass itin>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
+          itin, "vst2", Dt, "$Vd, $Rn", "", []> {
   let Rm = 0b1111;
   let Inst{5-4} = Rn{5-4};
   let DecoderMethod = "DecodeVSTInstruction";
 }
 
-def  VST2d8   : VST2D<0b1000, {0,0,?,?}, "8">;
-def  VST2d16  : VST2D<0b1000, {0,1,?,?}, "16">;
-def  VST2d32  : VST2D<0b1000, {1,0,?,?}, "32">;
+def  VST2d8   : VST2<0b1000, {0,0,?,?}, "8",  VecListDPair, IIC_VST2>;
+def  VST2d16  : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
+def  VST2d32  : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
 
-def  VST2q8   : VST2Q<{0,0,?,?}, "8">;
-def  VST2q16  : VST2Q<{0,1,?,?}, "16">;
-def  VST2q32  : VST2Q<{1,0,?,?}, "32">;
-
-def  VST2d8Pseudo  : VSTQPseudo<IIC_VST2>;
-def  VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
-def  VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
+def  VST2q8   : VST2<0b0011, {0,0,?,?}, "8",  VecListFourD, IIC_VST2x2>;
+def  VST2q16  : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
+def  VST2q32  : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
 
 def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>;
 def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
 def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
 
 // ...with address register writeback:
-class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
-          IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
+                   RegisterOperand VdTy> {
+  def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+                     (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+                     "vst2", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbFixed";
+  }
+  def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+                        "vst2", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbRegister";
+  }
 }
-class VST2QWB<bits<4> op7_4, string Dt>
-  : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm,
-           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
-          "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
-          "$Rn.addr = $wb", []> {
-  let Inst{5-4} = Rn{5-4};
-  let DecoderMethod = "DecodeVSTInstruction";
+multiclass VST2QWB<bits<4> op7_4, string Dt> {
+  def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+                     (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+                     "vst2", Dt, "$Vd, $Rn!",
+                     "$Rn.addr = $wb", []> {
+    let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbFixed";
+  }
+  def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+                        (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+                        IIC_VLD1u,
+                        "vst2", Dt, "$Vd, $Rn, $Rm",
+                        "$Rn.addr = $wb", []> {
+    let Inst{5-4} = Rn{5-4};
+    let DecoderMethod = "DecodeVSTInstruction";
+    let AsmMatchConverter = "cvtVSTwbRegister";
+  }
 }
 
-def VST2d8_UPD  : VST2DWB<0b1000, {0,0,?,?}, "8">;
-def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
-def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+defm VST2d8wb    : VST2DWB<0b1000, {0,0,?,?}, "8",  VecListDPair>;
+defm VST2d16wb   : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
+defm VST2d32wb   : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
 
-def VST2q8_UPD  : VST2QWB<{0,0,?,?}, "8">;
-def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
-def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+defm VST2q8wb    : VST2QWB<{0,0,?,?}, "8">;
+defm VST2q16wb   : VST2QWB<{0,1,?,?}, "16">;
+defm VST2q32wb   : VST2QWB<{1,0,?,?}, "32">;
 
-def VST2d8Pseudo_UPD  : VSTQWBPseudo<IIC_VST2u>;
-def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
-def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2q8PseudoWB_fixed     : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_fixed    : VSTQQWBfixedPseudo<IIC_VST2x2u>;
+def VST2q8PseudoWB_register  : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
+def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
 
-def VST2q8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
-
-// ...with double-spaced registers (for disassembly only):
-def VST2b8      : VST2D<0b1001, {0,0,?,?}, "8">;
-def VST2b16     : VST2D<0b1001, {0,1,?,?}, "16">;
-def VST2b32     : VST2D<0b1001, {1,0,?,?}, "32">;
-def VST2b8_UPD  : VST2DWB<0b1001, {0,0,?,?}, "8">;
-def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
-def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+// ...with double-spaced registers
+def VST2b8      : VST2<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced, IIC_VST2>;
+def VST2b16     : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>;
+def VST2b32     : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>;
+defm VST2b8wb   : VST2DWB<0b1001, {0,0,?,?}, "8",  VecListDPairSpaced>;
+defm VST2b16wb  : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>;
+defm VST2b32wb  : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>;
 
 //   VST3     : Vector Store (multiple 3-element structures)
 class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1458,20 +1938,11 @@ class VSTQQQQLNWBPseudo<InstrItinClass itin>
 
 //   VST1LN   : Vector Store (single element from one lane)
 class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
-             PatFrag StoreOp, SDNode ExtractOp>
-  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
-          (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
-          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
-          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
-  let Rm = 0b1111;
-  let DecoderMethod = "DecodeVST1LN";
-}
-class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
-             PatFrag StoreOp, SDNode ExtractOp>
+             PatFrag StoreOp, SDNode ExtractOp, Operand AddrMode>
   : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
-          (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
+          (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane),
           IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
-          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
+          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> {
   let Rm = 0b1111;
   let DecoderMethod = "DecodeVST1LN";
 }
@@ -1482,16 +1953,17 @@ class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
 }
 
 def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
-                       NEONvgetlaneu> {
+                       NEONvgetlaneu, addrmode6> {
   let Inst{7-5} = lane{2-0};
 }
 def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
-                       NEONvgetlaneu> {
+                       NEONvgetlaneu, addrmode6> {
   let Inst{7-6} = lane{1-0};
   let Inst{4}   = Rn{5};
 }
 
-def VST1LNd32 : VST1LN32<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt, 
+                       addrmode6oneL32> {
   let Inst{7}   = lane{0};
   let Inst{5-4} = Rn{5-4};
 }
@@ -1507,14 +1979,14 @@ def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
 
 // ...with address register writeback:
 class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
-               PatFrag StoreOp, SDNode ExtractOp>
+               PatFrag StoreOp, SDNode ExtractOp, Operand AdrMode>
   : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$Rn, am6offset:$Rm,
+          (ins AdrMode:$Rn, am6offset:$Rm,
            DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
           "\\{$Vd[$lane]\\}, $Rn$Rm",
           "$Rn.addr = $wb",
           [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
-                                  addrmode6:$Rn, am6offset:$Rm))]> {
+                                  AdrMode:$Rn, am6offset:$Rm))]> {
   let DecoderMethod = "DecodeVST1LN";
 }
 class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
@@ -1524,16 +1996,16 @@ class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
 }
 
 def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
-                             NEONvgetlaneu> {
+                             NEONvgetlaneu, addrmode6> {
   let Inst{7-5} = lane{2-0};
 }
 def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
-                             NEONvgetlaneu> {
+                             NEONvgetlaneu, addrmode6> {
   let Inst{7-6} = lane{1-0};
   let Inst{4}   = Rn{5};
 }
 def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
-                             extractelt> {
+                             extractelt, addrmode6oneL32> {
   let Inst{7}   = lane{0};
   let Inst{5-4} = Rn{5-4};
 }
@@ -1585,10 +2057,10 @@ def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
 // ...with address register writeback:
 class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
   : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
-          (ins addrmode6:$addr, am6offset:$offset,
-           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
-          "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
-          "$addr.addr = $wb", []> {
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+          "\\{$Vd[$lane], $src2[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
   let Inst{4}   = Rn{4};
   let DecoderMethod = "DecodeVST2LN";
 }
@@ -1914,8 +2386,8 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType Ty, SDNode ShOp>
   : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
-        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (Ty DPR:$Vd),
               (Ty (ShOp (Ty DPR:$Vn),
                         (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
@@ -1924,8 +2396,8 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
 class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
   : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
-        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane","",
         [(set (Ty DPR:$Vd),
               (Ty (ShOp (Ty DPR:$Vn),
                         (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
@@ -1954,8 +2426,8 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType ResTy, ValueType OpTy, SDNode ShOp>
   : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (ShOp (ResTy QPR:$Vn),
                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
@@ -1965,8 +2437,8 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
 class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
                ValueType ResTy, ValueType OpTy, SDNode ShOp>
   : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (ShOp (ResTy QPR:$Vn),
                            (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
@@ -1987,8 +2459,8 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
 class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
   : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
-        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (Ty DPR:$Vd),
               (Ty (IntOp (Ty DPR:$Vn),
                          (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
@@ -1998,8 +2470,8 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
 class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
   : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
-        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (Ty DPR:$Vd),
               (Ty (IntOp (Ty DPR:$Vn),
                          (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
@@ -2028,8 +2500,8 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (ResTy QPR:$Vn),
                             (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
@@ -2040,8 +2512,8 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   string OpcodeStr, string Dt,
                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (ResTy QPR:$Vn),
                             (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
@@ -2073,9 +2545,9 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
   : N3VLane32<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$Vd),
-        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
         [(set (Ty DPR:$Vd),
               (Ty (ShOp (Ty DPR:$src1),
                         (Ty (MulOp DPR:$Vn,
@@ -2086,9 +2558,9 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     ValueType Ty, SDNode MulOp, SDNode ShOp>
   : N3VLane16<0, 1, op21_20, op11_8, 1, 0,
         (outs DPR:$Vd),
-        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
         [(set (Ty DPR:$Vd),
               (Ty (ShOp (Ty DPR:$src1),
                         (Ty (MulOp DPR:$Vn,
@@ -2108,9 +2580,9 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                   SDPatternOperator MulOp, SDPatternOperator ShOp>
   : N3VLane32<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd),
-        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
         [(set (ResTy QPR:$Vd),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (MulOp QPR:$Vn,
@@ -2122,9 +2594,9 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                     SDNode MulOp, SDNode ShOp>
   : N3VLane16<1, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd),
-        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
         [(set (ResTy QPR:$Vd),
               (ResTy (ShOp (ResTy QPR:$src1),
                            (ResTy (MulOp QPR:$Vn,
@@ -2182,9 +2654,9 @@ class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
                   InstrItinClass itin, string OpcodeStr, string Dt,
                   ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
   : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
-        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
         [(set QPR:$Vd,
           (OpNode (TyQ QPR:$src1),
                   (TyQ (MulOp (TyD DPR:$Vn),
@@ -2194,9 +2666,9 @@ class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                     InstrItinClass itin, string OpcodeStr, string Dt,
                     ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
   : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
-        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
         [(set QPR:$Vd,
           (OpNode (TyQ QPR:$src1),
                   (TyQ (MulOp (TyD DPR:$Vn),
@@ -2230,9 +2702,9 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd),
-        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (ResTy QPR:$src1),
                             (OpTy DPR:$Vn),
@@ -2243,9 +2715,9 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                    ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
         (outs QPR:$Vd),
-        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
         NVMulSLFrm, itin,
-        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "$src1 = $Vd",
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (ResTy QPR:$src1),
                             (OpTy DPR:$Vn),
@@ -2277,8 +2749,8 @@ class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
              InstrItinClass itin, string OpcodeStr, string Dt,
              ValueType TyQ, ValueType TyD, SDNode OpNode>
   : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set QPR:$Vd,
           (TyQ (OpNode (TyD DPR:$Vn),
                        (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
@@ -2286,8 +2758,8 @@ class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                InstrItinClass itin, string OpcodeStr, string Dt,
                ValueType TyQ, ValueType TyD, SDNode OpNode>
   : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set QPR:$Vd,
           (TyQ (OpNode (TyD DPR:$Vn),
                        (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
@@ -2332,8 +2804,8 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
                 string OpcodeStr, string Dt,
                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3VLane32<op24, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (OpTy DPR:$Vn),
                             (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
@@ -2342,8 +2814,8 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
                   InstrItinClass itin, string OpcodeStr, string Dt,
                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
   : N3VLane16<op24, 1, op21_20, op11_8, 1, 0,
-        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
-        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, VectorIndex16:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm$lane", "",
         [(set (ResTy QPR:$Vd),
               (ResTy (IntOp (OpTy DPR:$Vn),
                             (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
@@ -2417,9 +2889,9 @@ class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
 // Long shift by immediate.
 class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
              string OpcodeStr, string Dt,
-             ValueType ResTy, ValueType OpTy, SDNode OpNode>
+             ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
   : N2VImm<op24, op23, op11_8, op7, op6, op4,
-           (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+           (outs QPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), N2RegVShLFrm,
            IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
            [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
                                           (i32 imm:$SIMM))))]>;
@@ -2649,14 +3121,11 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
                    v4i32, v4i32, OpNode, Commutable>;
 }
 
-multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
-  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
-                       v4i16, ShOp>;
-  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
-                     v2i32, ShOp>;
-  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
-                       v8i16, v4i16, ShOp>;
-  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, SDNode ShOp> {
+  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, "i16", v4i16, ShOp>;
+  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, "i32", v2i32, ShOp>;
+  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, "i16", v8i16, v4i16, ShOp>;
+  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, "i32",
                      v4i32, v2i32, ShOp>;
 }
 
@@ -3165,7 +3634,7 @@ multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 }
 multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
                        InstrItinClass itin, string OpcodeStr, string Dt,
-                       SDNode OpNode> {
+                       string baseOpc, SDNode OpNode> {
   // 64-bit vector types.
   def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8,
                      OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
@@ -3199,6 +3668,33 @@ multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
   def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64,
                      OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
                              // imm6 = xxxxxx
+
+  // Aliases for two-operand forms (source and dest regs the same).
+  def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"),
+                      (!cast<Instruction>(!strconcat(baseOpc, "v8i8"))
+                          DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+  def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"),
+                      (!cast<Instruction>(!strconcat(baseOpc, "v4i16"))
+                          DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+  def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"),
+                      (!cast<Instruction>(!strconcat(baseOpc, "v2i32"))
+                          DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+  def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"),
+                      (!cast<Instruction>(!strconcat(baseOpc, "v1i64"))
+                          DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+  def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "8 $Vdn, $imm"),
+                      (!cast<Instruction>(!strconcat(baseOpc, "v16i8"))
+                          QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+  def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "16 $Vdn, $imm"),
+                      (!cast<Instruction>(!strconcat(baseOpc, "v8i16"))
+                          QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+  def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "32 $Vdn, $imm"),
+                      (!cast<Instruction>(!strconcat(baseOpc, "v4i32"))
+                          QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+  def : NEONInstAlias<!strconcat(OpcodeStr, "${p}.", Dt, "64 $Vdn, $imm"),
+                      (!cast<Instruction>(!strconcat(baseOpc, "v2i64"))
+                          QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
 }
 
 // Neon Shift-Accumulate vector operations,
@@ -3321,15 +3817,15 @@ multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
 multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
                       bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
   def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
-                 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+              OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, imm1_7, OpNode> {
     let Inst{21-19} = 0b001; // imm6 = 001xxx
   }
   def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
-                  OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+               OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, imm1_15, OpNode> {
     let Inst{21-20} = 0b01;  // imm6 = 01xxxx
   }
   def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
-                  OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+               OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, imm1_31, OpNode> {
     let Inst{21} = 0b1;      // imm6 = 1xxxxx
   }
 }
@@ -3418,7 +3914,7 @@ def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
                      v2f32, v2f32, fmul, 1>;
 def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
                      v4f32, v4f32, fmul, 1>;
-defm VMULsl   : N3VSL_HS<0b1000, "vmul", "i", mul>;
+defm VMULsl   : N3VSL_HS<0b1000, "vmul", mul>;
 def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
 def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
                        v2f32, fmul>;
@@ -3509,10 +4005,10 @@ defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
 def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
                           v2f32, fmul_su, fadd_mlx>,
-                Requires<[HasNEON, UseFPVMLx]>;
+                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
 def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
                           v4f32, fmul_su, fadd_mlx>,
-                Requires<[HasNEON, UseFPVMLx]>;
+                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
 defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
 def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -3567,10 +4063,10 @@ defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
 def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
                           v2f32, fmul_su, fsub_mlx>,
-                Requires<[HasNEON, UseFPVMLx]>;
+                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
 def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
                           v4f32, fmul_su, fsub_mlx>,
-                Requires<[HasNEON, UseFPVMLx]>;
+                Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
 defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
 def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -3619,6 +4115,37 @@ defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
                             "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
 
+// Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations.
+def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
+                          v2f32, fmul_su, fadd_mlx>,
+                Requires<[HasVFP4,UseFusedMAC]>;
+
+def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
+                          v4f32, fmul_su, fadd_mlx>,
+                Requires<[HasVFP4,UseFusedMAC]>;
+
+//   Fused Vector Multiply Subtract (floating-point)
+def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
+                          v2f32, fmul_su, fsub_mlx>,
+                Requires<[HasVFP4,UseFusedMAC]>;
+def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
+                          v4f32, fmul_su, fsub_mlx>,
+                Requires<[HasVFP4,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+def : Pat<(v2f32 (fma DPR:$src1, DPR:$Vn, DPR:$Vm)),
+          (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+          Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma QPR:$src1, QPR:$Vn, QPR:$Vm)),
+          (VFMAfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+          Requires<[HasVFP4]>;
+def : Pat<(v2f32 (fma (fneg DPR:$src1), DPR:$Vn, DPR:$Vm)),
+          (VFMSfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+      Requires<[HasVFP4]>;
+def : Pat<(v4f32 (fma (fneg QPR:$src1), QPR:$Vn, QPR:$Vm)),
+          (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+      Requires<[HasVFP4]>;
+
 // Vector Subtract Operations.
 
 //   VSUB     : Vector Subtract (integer and floating-point)
@@ -3741,7 +4268,7 @@ def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
                       v4i32, v4i32, or, 1>;
 
 def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
-                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
                           IIC_VMOVImm,
                           "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
@@ -3750,7 +4277,7 @@ def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
 }
 
 def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
-                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
                           IIC_VMOVImm,
                           "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
@@ -3759,7 +4286,7 @@ def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
 }
 
 def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
-                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
                           IIC_VMOVImm,
                           "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
@@ -3768,7 +4295,7 @@ def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
 }
 
 def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
-                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
                           IIC_VMOVImm,
                           "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
@@ -3790,7 +4317,7 @@ def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
                                                  (vnotq QPR:$Vm))))]>;
 
 def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
-                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          (outs DPR:$Vd), (ins nImmSplatI16:$SIMM, DPR:$src),
                           IIC_VMOVImm,
                           "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
@@ -3799,7 +4326,7 @@ def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
 }
 
 def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
-                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          (outs DPR:$Vd), (ins nImmSplatI32:$SIMM, DPR:$src),
                           IIC_VMOVImm,
                           "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set DPR:$Vd,
@@ -3808,7 +4335,7 @@ def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
 }
 
 def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
-                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          (outs QPR:$Vd), (ins nImmSplatI16:$SIMM, QPR:$src),
                           IIC_VMOVImm,
                           "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
@@ -3817,7 +4344,7 @@ def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
 }
 
 def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
-                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          (outs QPR:$Vd), (ins nImmSplatI32:$SIMM, QPR:$src),
                           IIC_VMOVImm,
                           "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
                           [(set QPR:$Vd,
@@ -3842,28 +4369,28 @@ def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
 let isReMaterializable = 1 in {
 
 def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
                          "vmvn", "i16", "$Vd, $SIMM", "",
                          [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
 def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
                          "vmvn", "i16", "$Vd, $SIMM", "",
                          [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
 def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
                          "vmvn", "i32", "$Vd, $SIMM", "",
                          [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
   let Inst{11-8} = SIMM{11-8};
 }
 
 def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
                          "vmvn", "i32", "$Vd, $SIMM", "",
                          [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
   let Inst{11-8} = SIMM{11-8};
@@ -3912,12 +4439,12 @@ def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
                      (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
                      N3RegFrm, IIC_VBINiD,
                      "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [/* For disassembly only; pattern left blank */]>;
+                     []>;
 def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
                      (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
                      N3RegFrm, IIC_VBINiQ,
                      "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [/* For disassembly only; pattern left blank */]>;
+                     []>;
 
 //   VBIT     : Vector Bitwise Insert if True
 //              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
@@ -3926,12 +4453,12 @@ def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
                      (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
                      N3RegFrm, IIC_VBINiD,
                      "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [/* For disassembly only; pattern left blank */]>;
+                     []>;
 def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
                      (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
                      N3RegFrm, IIC_VBINiQ,
                      "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [/* For disassembly only; pattern left blank */]>;
+                     []>;
 
 // VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
 // for equivalent operations with different register constraints; it just
@@ -4119,8 +4646,10 @@ defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
 defm VSHLi    : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
 
 //   VSHR     : Vector Shift Right (Immediate)
-defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>;
-defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>;
+defm VSHRs    : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
+                            NEONvshrs>;
+defm VSHRu    : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
+                            NEONvshru>;
 
 //   VSHLL    : Vector Shift Left Long
 defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
@@ -4129,18 +4658,18 @@ defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
 //   VSHLL    : Vector Shift Left Long (with maximum shift count)
 class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
                 bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
-                ValueType OpTy, SDNode OpNode>
+                ValueType OpTy, Operand ImmTy, SDNode OpNode>
   : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
-           ResTy, OpTy, OpNode> {
+           ResTy, OpTy, ImmTy, OpNode> {
   let Inst{21-16} = op21_16;
   let DecoderMethod = "DecodeVSHLMaxInstruction";
 }
 def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
-                          v8i16, v8i8, NEONvshlli>;
+                          v8i16, v8i8, imm8, NEONvshlli>;
 def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
-                          v4i32, v4i16, NEONvshlli>;
+                          v4i32, v4i16, imm16, NEONvshlli>;
 def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
-                          v2i64, v2i32, NEONvshlli>;
+                          v2i64, v2i32, imm32, NEONvshlli>;
 
 //   VSHRN    : Vector Shift Right and Narrow
 defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
@@ -4154,8 +4683,10 @@ defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
                             IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
                             "vrshl", "u", int_arm_neon_vrshiftu>;
 //   VRSHR    : Vector Rounding Shift Right
-defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>;
-defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>;
+defm VRSHRs   : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", "VRSHRs",
+                            NEONvrshrs>;
+defm VRSHRu   : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", "VRSHRu",
+                            NEONvrshru>;
 
 //   VRSHRN   : Vector Rounding Shift Right and Narrow
 defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
@@ -4298,13 +4829,15 @@ def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
                         IIC_VCNTiQ, "vcnt", "8",
                         v16i8, v16i8, int_arm_neon_vcnt>;
 
-// Vector Swap -- for disassembly only.
+// Vector Swap
 def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
-                     (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
-                     "vswp", "$Vd, $Vm", "", []>;
+                     (outs DPR:$Vd, DPR:$Vm), (ins DPR:$in1, DPR:$in2),
+                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
+                     []>;
 def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
-                     (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
-                     "vswp", "$Vd, $Vm", "", []>;
+                     (outs QPR:$Vd, QPR:$Vm), (ins QPR:$in1, QPR:$in2),
+                     NoItinerary, "vswp", "$Vd, $Vm", "$in1 = $Vd, $in2 = $Vm",
+                     []>;
 
 // Vector Move Operations.
 
@@ -4318,89 +4851,98 @@ def : InstAlias<"vmov${p} $Vd, $Vm",
 
 let isReMaterializable = 1 in {
 def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
                          "vmov", "i8", "$Vd, $SIMM", "",
                          [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
 def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmSplatI8:$SIMM), IIC_VMOVImm,
                          "vmov", "i8", "$Vd, $SIMM", "",
                          [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
 
 def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
                          "vmov", "i16", "$Vd, $SIMM", "",
                          [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
   let Inst{9} = SIMM{9};
 }
 
 def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmSplatI16:$SIMM), IIC_VMOVImm,
                          "vmov", "i16", "$Vd, $SIMM", "",
                          [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
  let Inst{9} = SIMM{9};
 }
 
 def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
                          "vmov", "i32", "$Vd, $SIMM", "",
                          [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
   let Inst{11-8} = SIMM{11-8};
 }
 
 def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmVMOVI32:$SIMM), IIC_VMOVImm,
                          "vmov", "i32", "$Vd, $SIMM", "",
                          [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
   let Inst{11-8} = SIMM{11-8};
 }
 
 def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
                          "vmov", "i64", "$Vd, $SIMM", "",
                          [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
 def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
-                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         (ins nImmSplatI64:$SIMM), IIC_VMOVImm,
                          "vmov", "i64", "$Vd, $SIMM", "",
                          [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv2f32 : N1ModImm<1, 0b000, 0b1111, 0, 0, 0, 1, (outs DPR:$Vd),
+                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+                         "vmov", "f32", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v2f32 (NEONvmovFPImm timm:$SIMM)))]>;
+def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
+                         (ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
+                         "vmov", "f32", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
 } // isReMaterializable
 
 //   VMOV     : Vector Get Lane (move scalar to ARM core register)
 
 def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
-                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
+                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
+                          IIC_VMOVSI, "vmov", "s8", "$R, $V$lane",
                           [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
                                            imm:$lane))]> {
   let Inst{21}  = lane{2};
   let Inst{6-5} = lane{1-0};
 }
 def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
-                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
+                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
+                          IIC_VMOVSI, "vmov", "s16", "$R, $V$lane",
                           [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
                                            imm:$lane))]> {
   let Inst{21} = lane{1};
   let Inst{6}  = lane{0};
 }
 def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
-                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
+                          (outs GPR:$R), (ins DPR:$V, VectorIndex8:$lane),
+                          IIC_VMOVSI, "vmov", "u8", "$R, $V$lane",
                           [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
                                            imm:$lane))]> {
   let Inst{21}  = lane{2};
   let Inst{6-5} = lane{1-0};
 }
 def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
-                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
+                          (outs GPR:$R), (ins DPR:$V, VectorIndex16:$lane),
+                          IIC_VMOVSI, "vmov", "u16", "$R, $V$lane",
                           [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
                                            imm:$lane))]> {
   let Inst{21} = lane{1};
   let Inst{6}  = lane{0};
 }
 def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
-                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
-                          IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
+                          (outs GPR:$R), (ins DPR:$V, VectorIndex32:$lane),
+                          IIC_VMOVSI, "vmov", "32", "$R, $V$lane",
                           [(set GPR:$R, (extractelt (v2i32 DPR:$V),
                                            imm:$lane))]> {
   let Inst{21} = lane{0};
@@ -4442,24 +4984,24 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
 
 let Constraints = "$src1 = $V" in {
 def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
-                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
-                          IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
+                          (ins DPR:$src1, GPR:$R, VectorIndex8:$lane),
+                          IIC_VMOVISL, "vmov", "8", "$V$lane, $R",
                           [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
                                            GPR:$R, imm:$lane))]> {
   let Inst{21}  = lane{2};
   let Inst{6-5} = lane{1-0};
 }
 def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
-                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
-                          IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
+                          (ins DPR:$src1, GPR:$R, VectorIndex16:$lane),
+                          IIC_VMOVISL, "vmov", "16", "$V$lane, $R",
                           [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
                                            GPR:$R, imm:$lane))]> {
   let Inst{21} = lane{1};
   let Inst{6}  = lane{0};
 }
 def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
-                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
-                          IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
+                          (ins DPR:$src1, GPR:$R, VectorIndex32:$lane),
+                          IIC_VMOVISL, "vmov", "32", "$V$lane, $R",
                           [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
                                            GPR:$R, imm:$lane))]> {
   let Inst{21} = lane{0};
@@ -4627,6 +5169,9 @@ defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
 //   VMOVL    : Vector Lengthening Move
 defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
 defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+def : Pat<(v8i16 (anyext (v8i8 DPR:$Vm))), (VMOVLuv8i16 DPR:$Vm)>;
+def : Pat<(v4i32 (anyext (v4i16 DPR:$Vm))), (VMOVLuv4i32 DPR:$Vm)>;
+def : Pat<(v2i64 (anyext (v2i32 DPR:$Vm))), (VMOVLuv2i64 DPR:$Vm)>;
 
 // Vector Conversions.
 
@@ -4650,6 +5195,7 @@ def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
                      v4f32, v4i32, uint_to_fp>;
 
 //   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
+let DecoderMethod = "DecodeVCVTD" in {
 def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
                         v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
 def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
@@ -4658,7 +5204,9 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
                         v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
                         v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+}
 
+let DecoderMethod = "DecodeVCVTQ" in {
 def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
                         v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
 def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
@@ -4667,6 +5215,7 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
                         v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
                         v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+}
 
 //   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
 def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
@@ -4759,34 +5308,34 @@ def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
 
 //   VEXT     : Vector Extract
 
-class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
   : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
-        (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+        (ins DPR:$Vn, DPR:$Vm, immTy:$index), NVExtFrm,
         IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
         [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
-                                      (Ty DPR:$Vm), imm:$index)))]> {
+                                     (Ty DPR:$Vm), imm:$index)))]> {
   bits<4> index;
   let Inst{11-8} = index{3-0};
 }
 
-class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty, Operand immTy>
   : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
-        (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+        (ins QPR:$Vn, QPR:$Vm, imm0_15:$index), NVExtFrm,
         IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
         [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
-                                      (Ty QPR:$Vm), imm:$index)))]> {
+                                     (Ty QPR:$Vm), imm:$index)))]> {
   bits<4> index;
   let Inst{11-8} = index{3-0};
 }
 
-def VEXTd8  : VEXTd<"vext", "8",  v8i8> {
+def VEXTd8  : VEXTd<"vext", "8",  v8i8, imm0_7> {
   let Inst{11-8} = index{3-0};
 }
-def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> {
   let Inst{11-9} = index{2-0};
   let Inst{8}    = 0b0;
 }
-def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
   let Inst{11-10} = index{1-0};
   let Inst{9-8}    = 0b00;
 }
@@ -4795,17 +5344,21 @@ def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
                            (i32 imm:$index))),
           (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
 
-def VEXTq8  : VEXTq<"vext", "8",  v16i8> {
+def VEXTq8  : VEXTq<"vext", "8",  v16i8, imm0_15> {
   let Inst{11-8} = index{3-0};
 }
-def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+def VEXTq16 : VEXTq<"vext", "16", v8i16, imm0_7> {
   let Inst{11-9} = index{2-0};
   let Inst{8}    = 0b0;
 }
-def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
   let Inst{11-10} = index{1-0};
   let Inst{9-8}    = 0b00;
 }
+def VEXTq64 : VEXTq<"vext", "64", v2i64, imm0_1> {
+  let Inst{11} = index{0};
+  let Inst{10-8}    = 0b000;
+}
 def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
                            (v4f32 QPR:$Vm),
                            (i32 imm:$index))),
@@ -4825,7 +5378,9 @@ def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
 
 def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
 def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
-def  VUZPd32  : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
+// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+def : NEONInstAlias<"vuzp${p}.32 $Dd, $Dm",
+                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
 
 def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
 def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
@@ -4835,7 +5390,9 @@ def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
 
 def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
 def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
-def  VZIPd32  : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
+// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
+def : NEONInstAlias<"vzip${p}.32 $Dd, $Dm",
+                    (VTRNd32 DPR:$Dd, DPR:$Dm, pred:$p)>;
 
 def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
 def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
@@ -4847,27 +5404,25 @@ def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
 let DecoderMethod = "DecodeTBLInstruction" in {
 def  VTBL1
   : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
-        (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
-        "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
-        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
+        (ins VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+        "vtbl", "8", "$Vd, $Vn, $Vm", "",
+        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 VecListOneD:$Vn, DPR:$Vm)))]>;
 let hasExtraSrcRegAllocReq = 1 in {
 def  VTBL2
   : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
-        (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
-        "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+        (ins VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
 def  VTBL3
   : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
-        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
-        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+        (ins VecListThreeD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
 def  VTBL4
   : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
-        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+        (ins VecListFourD:$Vn, DPR:$Vm),
         NVTBLFrm, IIC_VTB4,
-        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+        "vtbl", "8", "$Vd, $Vn, $Vm", "", []>;
 } // hasExtraSrcRegAllocReq = 1
 
-def  VTBL2Pseudo
-  : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
 def  VTBL3Pseudo
   : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
 def  VTBL4Pseudo
@@ -4876,31 +5431,28 @@ def  VTBL4Pseudo
 //   VTBX     : Vector Table Extension
 def  VTBX1
   : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
-        (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
-        "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
+        (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd",
         [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
-                               DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
+                               DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>;
 let hasExtraSrcRegAllocReq = 1 in {
 def  VTBX2
   : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
-        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
-        "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+        (ins DPR:$orig, VecListDPair:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+        "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", []>;
 def  VTBX3
   : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
-        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+        (ins DPR:$orig, VecListThreeD:$Vn, DPR:$Vm),
         NVTBLFrm, IIC_VTBX3,
-        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+        "vtbx", "8", "$Vd, $Vn, $Vm",
         "$orig = $Vd", []>;
 def  VTBX4
-  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
-        DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
-        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, VecListFourD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+        "vtbx", "8", "$Vd, $Vn, $Vm",
         "$orig = $Vd", []>;
 } // hasExtraSrcRegAllocReq = 1
 
-def  VTBX2Pseudo
-  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
-                IIC_VTBX2, "$orig = $dst", []>;
 def  VTBX3Pseudo
   : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
                 IIC_VTBX3, "$orig = $dst", []>;
@@ -4950,9 +5502,13 @@ def : N3VSPat<fadd, VADDfd>;
 def : N3VSPat<fsub, VSUBfd>;
 def : N3VSPat<fmul, VMULfd>;
 def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
-      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
 def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
-      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fadd, VFMAfd>,
+      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
+def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
+      Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
 def : N2VSPat<fabs, VABSfd>;
 def : N2VSPat<fneg, VNEGfd>;
 def : N3VSPat<NEONfmax, VMAXfd>;
@@ -5028,3 +5584,1448 @@ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
 def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
 def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
 def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+
+// Vector lengthening move with load, matching extending loads.
+
+// extload, zextload and sextload for a standard lengthening load. Example:
+// Lengthen_Single<"8", "i16", "i8"> = Pat<(v8i16 (extloadvi8 addrmode5:$addr))
+//                                         (VMOVLuv8i16 (VLDRD addrmode5:$addr))>;
+multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
+  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                    (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+                  (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+                    (VLDRD addrmode5:$addr))>;
+  def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                  (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+                (!cast<Instruction>("VMOVLuv" # DestLanes # DestTy)
+                  (VLDRD addrmode5:$addr))>;
+  def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                  (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+                (!cast<Instruction>("VMOVLsv" # DestLanes # DestTy)
+                  (VLDRD addrmode5:$addr))>;
+}
+
+// extload, zextload and sextload for a lengthening load which only uses
+// half the lanes available. Example:
+// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
+//     Pat<(v4i16 (extloadvi8 addrmode5:$addr))
+//         (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+//                                                     (VLDRS addrmode5:$addr),
+//                                                     ssub_0)),
+//                         dsub_0)>;
+multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
+                               string InsnLanes, string InsnTy> {
+  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+         (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+         dsub_0)>;
+  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy)
+         (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+         dsub_0)>;
+  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+       (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy)
+         (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+         dsub_0)>;
+}
+
+// extload, zextload and sextload for a lengthening load followed by another
+// lengthening load, to quadruple the initial length.
+// Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0> =
+//     Pat<(v4i32 (extloadvi8 addrmode5:$addr))
+//         (EXTRACT_SUBREG (VMOVLuv4i32 
+//           (EXTRACT_SUBREG (VMOVLuv8i16 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+//                                                       (VLDRS addrmode5:$addr),
+//                                                       ssub_0)),
+//                           dsub_0)),
+//           qsub_0)>;
+multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy,
+                           string Insn1Lanes, string Insn1Ty, string Insn2Lanes,
+                           string Insn2Ty, SubRegIndex RegType> {
+  def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("extloadv" # SrcTy) addrmode5:$addr)),
+         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+             (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+              ssub_0)), dsub_0)),
+          RegType)>;
+  def _Z   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("zextloadv" # SrcTy) addrmode5:$addr)),
+         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
+           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
+             (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+              ssub_0)), dsub_0)),
+          RegType)>;
+  def _S   : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
+                   (!cast<PatFrag>("sextloadv" # SrcTy) addrmode5:$addr)),
+         (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
+           (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
+             (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr),
+              ssub_0)), dsub_0)),
+          RegType)>;
+}
+
+defm : Lengthen_Single<"8", "i16", "i8">; // v8i8 -> v8i16
+defm : Lengthen_Single<"4", "i32", "i16">; // v4i16 -> v4i32
+defm : Lengthen_Single<"2", "i64", "i32">; // v2i32 -> v2i64
+
+defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16
+defm : Lengthen_HalfSingle<"2", "i16", "i8", "8", "i16">; // v2i8 -> v2i16
+defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32
+
+// Double lengthening - v4i8 -> v4i16 -> v4i32 
+defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32", qsub_0>;
+// v2i8 -> v2i16 -> v2i32
+defm : Lengthen_Double<"2", "i32", "i8", "8", "i16", "4", "i32", dsub_0>;
+// v2i16 -> v2i32 -> v2i64
+defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64", qsub_0>;
+
+// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
+def : Pat<(v2i64 (extloadvi8 addrmode5:$addr)),
+      (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+         (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+         dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (zextloadvi8 addrmode5:$addr)),
+      (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
+         (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+         dsub_0)), dsub_0))>;
+def : Pat<(v2i64 (sextloadvi8 addrmode5:$addr)),
+      (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
+         (INSERT_SUBREG (f64 (IMPLICIT_DEF)), (VLDRS addrmode5:$addr), ssub_0)),
+         dsub_0)), dsub_0))>;
+
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+def : VFP2InstAlias<"fmdhr${p} $Dd, $Rn",
+                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 1, pred:$p)>;
+def : VFP2InstAlias<"fmdlr${p} $Dd, $Rn",
+                    (VSETLNi32 DPR:$Dd, GPR:$Rn, 0, pred:$p)>;
+
+
+// VADD two-operand aliases.
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+                    (VADDv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+                    (VADDv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+                    (VADDv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+                    (VADDv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.i8 $Vdn, $Vm",
+                    (VADDv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i16 $Vdn, $Vm",
+                    (VADDv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i32 $Vdn, $Vm",
+                    (VADDv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.i64 $Vdn, $Vm",
+                    (VADDv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+                    (VADDfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vadd${p}.f32 $Vdn, $Vm",
+                    (VADDfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSUB two-operand aliases.
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+                    (VSUBv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+                    (VSUBv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+                    (VSUBv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+                    (VSUBv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.i8 $Vdn, $Vm",
+                    (VSUBv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i16 $Vdn, $Vm",
+                    (VSUBv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i32 $Vdn, $Vm",
+                    (VSUBv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.i64 $Vdn, $Vm",
+                    (VSUBv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+                    (VSUBfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vsub${p}.f32 $Vdn, $Vm",
+                    (VSUBfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VADDW two-operand aliases.
+def : NEONInstAlias<"vaddw${p}.s8 $Vdn, $Vm",
+                    (VADDWsv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s16 $Vdn, $Vm",
+                    (VADDWsv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.s32 $Vdn, $Vm",
+                    (VADDWsv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u8 $Vdn, $Vm",
+                    (VADDWuv8i16 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u16 $Vdn, $Vm",
+                    (VADDWuv4i32 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vaddw${p}.u32 $Vdn, $Vm",
+                    (VADDWuv2i64 QPR:$Vdn, QPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+// VAND/VBIC/VEOR/VORR accept but do not require a type suffix.
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
+                         (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm",
+                         (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+                         (VBICd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbic${p}", "$Vd, $Vn, $Vm",
+                         (VBICq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
+                         (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm",
+                         (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
+                         (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm",
+                         (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+// ... two-operand aliases
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+                    (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vand${p} $Vdn, $Vm",
+                    (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+                    (VBICd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vbic${p} $Vdn, $Vm",
+                    (VBICq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+                    (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"veor${p} $Vdn, $Vm",
+                    (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+                    (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vorr${p} $Vdn, $Vm",
+                    (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+                         (VANDd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vand${p}", "$Vdn, $Vm",
+                         (VANDq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+                         (VEORd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"veor${p}", "$Vdn, $Vm",
+                         (VEORq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+                         (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
+                         (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VMUL two-operand aliases.
+def : NEONInstAlias<"vmul${p}.p8 $Qdn, $Qm",
+                    (VMULpq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Qdn, $Qm",
+                    (VMULv16i8 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Qm",
+                    (VMULv8i16 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Qm",
+                    (VMULv4i32 QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.p8 $Ddn, $Dm",
+                    (VMULpd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i8 $Ddn, $Dm",
+                    (VMULv8i8 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm",
+                    (VMULv4i16 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm",
+                    (VMULv2i32 DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Qm",
+                    (VMULfq QPR:$Qdn, QPR:$Qdn, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm",
+                    (VMULfd DPR:$Ddn, DPR:$Ddn, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i16 $Ddn, $Dm$lane",
+                    (VMULslv4i16 DPR:$Ddn, DPR:$Ddn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i16 $Qdn, $Dm$lane",
+                    (VMULslv8i16 QPR:$Qdn, QPR:$Qdn, DPR_8:$Dm,
+                                 VectorIndex16:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.i32 $Ddn, $Dm$lane",
+                    (VMULslv2i32 DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.i32 $Qdn, $Dm$lane",
+                    (VMULslv4i32 QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+                                 VectorIndex32:$lane, pred:$p)>;
+
+def : NEONInstAlias<"vmul${p}.f32 $Ddn, $Dm$lane",
+                    (VMULslfd DPR:$Ddn, DPR:$Ddn, DPR_VFP2:$Dm,
+                              VectorIndex32:$lane, pred:$p)>;
+def : NEONInstAlias<"vmul${p}.f32 $Qdn, $Dm$lane",
+                    (VMULslfq QPR:$Qdn, QPR:$Qdn, DPR_VFP2:$Dm,
+                              VectorIndex32:$lane, pred:$p)>;
+
+// VQADD (register) two-operand aliases.
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+                    (VQADDsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+                    (VQADDsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+                    (VQADDsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+                    (VQADDsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+                    (VQADDuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+                    (VQADDuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+                    (VQADDuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+                    (VQADDuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqadd${p}.s8 $Vdn, $Vm",
+                    (VQADDsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s16 $Vdn, $Vm",
+                    (VQADDsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s32 $Vdn, $Vm",
+                    (VQADDsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.s64 $Vdn, $Vm",
+                    (VQADDsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u8 $Vdn, $Vm",
+                    (VQADDuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u16 $Vdn, $Vm",
+                    (VQADDuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u32 $Vdn, $Vm",
+                    (VQADDuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqadd${p}.u64 $Vdn, $Vm",
+                    (VQADDuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+                    (VSHLiv8i8 DPR:$Vdn, DPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+                    (VSHLiv4i16 DPR:$Vdn, DPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+                    (VSHLiv2i32 DPR:$Vdn, DPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+                    (VSHLiv1i64 DPR:$Vdn, DPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.i8 $Vdn, $imm",
+                    (VSHLiv16i8 QPR:$Vdn, QPR:$Vdn, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i16 $Vdn, $imm",
+                    (VSHLiv8i16 QPR:$Vdn, QPR:$Vdn, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i32 $Vdn, $imm",
+                    (VSHLiv4i32 QPR:$Vdn, QPR:$Vdn, imm0_31:$imm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.i64 $Vdn, $imm",
+                    (VSHLiv2i64 QPR:$Vdn, QPR:$Vdn, imm0_63:$imm, pred:$p)>;
+
+// VSHL (register) two-operand aliases.
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+                    (VSHLsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+                    (VSHLsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+                    (VSHLsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+                    (VSHLsv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+                    (VSHLuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+                    (VSHLuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+                    (VSHLuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+                    (VSHLuv1i64 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vshl${p}.s8 $Vdn, $Vm",
+                    (VSHLsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s16 $Vdn, $Vm",
+                    (VSHLsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s32 $Vdn, $Vm",
+                    (VSHLsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.s64 $Vdn, $Vm",
+                    (VSHLsv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u8 $Vdn, $Vm",
+                    (VSHLuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u16 $Vdn, $Vm",
+                    (VSHLuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u32 $Vdn, $Vm",
+                    (VSHLuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vshl${p}.u64 $Vdn, $Vm",
+                    (VSHLuv2i64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// VSHL (immediate) two-operand aliases.
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+                    (VSHRsv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+                    (VSHRsv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+                    (VSHRsv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+                    (VSHRsv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.s8 $Vdn, $imm",
+                    (VSHRsv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s16 $Vdn, $imm",
+                    (VSHRsv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s32 $Vdn, $imm",
+                    (VSHRsv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.s64 $Vdn, $imm",
+                    (VSHRsv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+                    (VSHRuv8i8 DPR:$Vdn, DPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+                    (VSHRuv4i16 DPR:$Vdn, DPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+                    (VSHRuv2i32 DPR:$Vdn, DPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+                    (VSHRuv1i64 DPR:$Vdn, DPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vshr${p}.u8 $Vdn, $imm",
+                    (VSHRuv16i8 QPR:$Vdn, QPR:$Vdn, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u16 $Vdn, $imm",
+                    (VSHRuv8i16 QPR:$Vdn, QPR:$Vdn, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u32 $Vdn, $imm",
+                    (VSHRuv4i32 QPR:$Vdn, QPR:$Vdn, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vshr${p}.u64 $Vdn, $imm",
+                    (VSHRuv2i64 QPR:$Vdn, QPR:$Vdn, shr_imm64:$imm, pred:$p)>;
+
+// VLD1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
+                 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
+                 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
+                 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD1LNdWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
+                 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
+                 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
+                 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD1LNdWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD1LNdWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD1LNdWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+
+// VST1 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
+                 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
+                 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
+                 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST1LNdWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
+                 (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
+                 (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
+                 (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST1LNdWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST1LNdWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST1LNdWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+// VLD2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
+                 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
+                 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
+                 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
+                 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
+                 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD2LNdWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
+                 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
+                 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
+                 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
+                 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNqWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
+                 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD2LNdWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD2LNdWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD2LNdWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD2LNqWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD2LNqWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+
+// VST2 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
+                 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
+                 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
+                 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
+                 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
+                 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST2LNdWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
+                 (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
+                 (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
+                 (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
+                 (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNqWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
+                 (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST2LNdWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST2LNdWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
+                  (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST2LNdWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST2LNqWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
+                  (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST2LNqWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+// VLD3 all-lanes pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD3DUPdAsm_8  : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+               (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+               (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+               (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_8  : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+               (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+               (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+               (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3DUPdWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+               (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+               (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+               (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+               (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+               (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPqWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+               (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD3DUPdWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3DUPdWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3DUPdWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3DUPqWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+
+// VLD3 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+               (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+               (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+               (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+               (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+               (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3LNdWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+               (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+               (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+               (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+               (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNqWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+               (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD3LNdWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3LNdWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3LNdWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3LNqWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3LNqWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+// VLD3 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD3dWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3qWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD3dWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListThreeD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3dWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListThreeD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3dWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListThreeD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListThreeQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListThreeQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD3qWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListThreeQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+// VST3 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+               (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+               (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+               (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+               (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+               (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST3LNdWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+               (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+               (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+               (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+               (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNqWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+               (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST3LNdWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST3LNdWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST3LNdWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST3LNqWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST3LNqWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+
+// VST3 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VST3dWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+               (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3qWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
+               (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+def VST3dWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListThreeD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST3dWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListThreeD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST3dWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListThreeD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListThreeQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListThreeQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST3qWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListThreeQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+// VLD4 all-lanes pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD4DUPdAsm_8  : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+               (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+               (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+               (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_8  : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+               (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+               (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+               (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4DUPdWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+               (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+               (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+               (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+               (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+               (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPqWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+               (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+def VLD4DUPdWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4DUPdWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4DUPdWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4DUPqWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+
+// VLD4 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+               (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+               (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+               (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+               (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+               (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4LNdWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+               (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+               (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+               (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+               (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNqWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+               (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VLD4LNdWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4LNdWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4LNdWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4LNqWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4LNqWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+
+
+// VLD4 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VLD4dWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4qWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VLD4dWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListFourD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4dWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListFourD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4dWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListFourD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListFourQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListFourQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VLD4qWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListFourQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+// VST4 single-lane pseudo-instructions. These need special handling for
+// the lane index that an InstAlias can't handle, so we use these instead.
+def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+               (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+               (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+               (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+               (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+               (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+
+def VST4LNdWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+               (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+               (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+               (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+               (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNqWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+               (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+def VST4LNdWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST4LNdWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST4LNdWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST4LNqWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST4LNqWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+
+// VST4 multiple structure pseudo-instructions. These need special handling for
+// the vector operands that the normal instructions don't yet model.
+// FIXME: Remove these when the register classes and instructions are updated.
+def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+
+def VST4dWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+               (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4qWB_fixed_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
+               (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+def VST4dWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListFourD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST4dWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListFourD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST4dWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListFourD:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_8 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
+                  (ins VecListFourQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_16 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
+                  (ins VecListFourQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+def VST4qWB_register_Asm_32 :
+        NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
+                  (ins VecListFourQ:$list, addrmode6:$addr,
+                       rGPR:$Rm, pred:$p)>;
+
+// VMOV takes an optional datatype suffix
+defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+                         (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
+                         (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm",
+                    (VCGEsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Dd, $Dn, $Dm",
+                    (VCGEsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Dd, $Dn, $Dm",
+                    (VCGEsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Dd, $Dn, $Dm",
+                    (VCGEuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Dd, $Dn, $Dm",
+                    (VCGEuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
+                    (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
+                    (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
+                    (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s16 $Qd, $Qn, $Qm",
+                    (VCGEsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.s32 $Qd, $Qn, $Qm",
+                    (VCGEsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u8 $Qd, $Qn, $Qm",
+                    (VCGEuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u16 $Qd, $Qn, $Qm",
+                    (VCGEuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
+                    (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
+                    (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
+// D-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Dd, $Dn, $Dm",
+                    (VCGTsv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Dd, $Dn, $Dm",
+                    (VCGTsv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Dd, $Dn, $Dm",
+                    (VCGTsv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Dd, $Dn, $Dm",
+                    (VCGTuv8i8 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Dd, $Dn, $Dm",
+                    (VCGTuv4i16 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
+                    (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
+                    (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+// Q-register versions.
+def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
+                    (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s16 $Qd, $Qn, $Qm",
+                    (VCGTsv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.s32 $Qd, $Qn, $Qm",
+                    (VCGTsv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u8 $Qd, $Qn, $Qm",
+                    (VCGTuv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u16 $Qd, $Qn, $Qm",
+                    (VCGTuv8i16 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
+                    (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
+                    (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+
+// Two-operand variants for VEXT
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+                  (VEXTd8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+                  (VEXTd16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+                  (VEXTd32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vext${p}.8 $Vdn, $Vm, $imm",
+                  (VEXTq8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_15:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.16 $Vdn, $Vm, $imm",
+                  (VEXTq16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_7:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.32 $Vdn, $Vm, $imm",
+                  (VEXTq32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_3:$imm, pred:$p)>;
+def : NEONInstAlias<"vext${p}.64 $Vdn, $Vm, $imm",
+                  (VEXTq64 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, imm0_1:$imm, pred:$p)>;
+
+// Two-operand variants for VQDMULH
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+                    (VQDMULHv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+                    (VQDMULHv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vqdmulh${p}.s16 $Vdn, $Vm",
+                    (VQDMULHv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vqdmulh${p}.s32 $Vdn, $Vm",
+                    (VQDMULHv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VMAX.
+def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
+                    (VMAXsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
+                    (VMAXsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
+                    (VMAXsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
+                    (VMAXuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
+                    (VMAXuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
+                    (VMAXuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
+                    (VMAXfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vmax${p}.s8 $Vdn, $Vm",
+                    (VMAXsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s16 $Vdn, $Vm",
+                    (VMAXsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.s32 $Vdn, $Vm",
+                    (VMAXsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u8 $Vdn, $Vm",
+                    (VMAXuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u16 $Vdn, $Vm",
+                    (VMAXuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.u32 $Vdn, $Vm",
+                    (VMAXuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmax${p}.f32 $Vdn, $Vm",
+                    (VMAXfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VMIN.
+def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
+                    (VMINsv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
+                    (VMINsv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
+                    (VMINsv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
+                    (VMINuv8i8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
+                    (VMINuv4i16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
+                    (VMINuv2i32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
+                    (VMINfd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+def : NEONInstAlias<"vmin${p}.s8 $Vdn, $Vm",
+                    (VMINsv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s16 $Vdn, $Vm",
+                    (VMINsv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.s32 $Vdn, $Vm",
+                    (VMINsv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u8 $Vdn, $Vm",
+                    (VMINuv16i8 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u16 $Vdn, $Vm",
+                    (VMINuv8i16 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.u32 $Vdn, $Vm",
+                    (VMINuv4i32 QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmin${p}.f32 $Vdn, $Vm",
+                    (VMINfq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VPADD.
+def : NEONInstAlias<"vpadd${p}.i8 $Vdn, $Vm",
+                    (VPADDi8 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vpadd${p}.i16 $Vdn, $Vm",
+                    (VPADDi16 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vpadd${p}.i32 $Vdn, $Vm",
+                    (VPADDi32 DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+def : NEONInstAlias<"vpadd${p}.f32 $Vdn, $Vm",
+                    (VPADDf DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>;
+
+// Two-operand variants for VSRA.
+    // Signed.
+def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
+                    (VSRAsv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
+                    (VSRAsv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
+                    (VSRAsv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
+                    (VSRAsv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsra${p}.s8 $Vdm, $imm",
+                    (VSRAsv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s16 $Vdm, $imm",
+                    (VSRAsv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s32 $Vdm, $imm",
+                    (VSRAsv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.s64 $Vdm, $imm",
+                    (VSRAsv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+    // Unsigned.
+def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
+                    (VSRAuv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
+                    (VSRAuv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
+                    (VSRAuv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
+                    (VSRAuv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsra${p}.u8 $Vdm, $imm",
+                    (VSRAuv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u16 $Vdm, $imm",
+                    (VSRAuv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u32 $Vdm, $imm",
+                    (VSRAuv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsra${p}.u64 $Vdm, $imm",
+                    (VSRAuv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+// Two-operand variants for VSRI.
+def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
+                    (VSRIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
+                    (VSRIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
+                    (VSRIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
+                    (VSRIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsri${p}.8 $Vdm, $imm",
+                    (VSRIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.16 $Vdm, $imm",
+                    (VSRIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.32 $Vdm, $imm",
+                    (VSRIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsri${p}.64 $Vdm, $imm",
+                    (VSRIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+// Two-operand variants for VSLI.
+def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
+                    (VSLIv8i8 DPR:$Vdm, DPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
+                    (VSLIv4i16 DPR:$Vdm, DPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
+                    (VSLIv2i32 DPR:$Vdm, DPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
+                    (VSLIv1i64 DPR:$Vdm, DPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+def : NEONInstAlias<"vsli${p}.8 $Vdm, $imm",
+                    (VSLIv16i8 QPR:$Vdm, QPR:$Vdm, shr_imm8:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.16 $Vdm, $imm",
+                    (VSLIv8i16 QPR:$Vdm, QPR:$Vdm, shr_imm16:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.32 $Vdm, $imm",
+                    (VSLIv4i32 QPR:$Vdm, QPR:$Vdm, shr_imm32:$imm, pred:$p)>;
+def : NEONInstAlias<"vsli${p}.64 $Vdm, $imm",
+                    (VSLIv2i64 QPR:$Vdm, QPR:$Vdm, shr_imm64:$imm, pred:$p)>;
+
+// VSWP allows, but does not require, a type suffix.
+defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
+                         (VSWPd DPR:$Vd, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
+                         (VSWPq QPR:$Vd, QPR:$Vm, pred:$p)>;
+
+// VBIF, VBIT, and VBSL allow, but do not require, a type suffix.
+defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
+                         (VBIFd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
+                         (VBITd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
+                         (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbif${p}", "$Vd, $Vn, $Vm",
+                         (VBIFq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbit${p}", "$Vd, $Vn, $Vm",
+                         (VBITq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+defm : NEONDTAnyInstAlias<"vbsl${p}", "$Vd, $Vn, $Vm",
+                         (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>;
+
+// "vmov Rd, #-imm" can be handled via "vmvn".
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
+                    (VMVNv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $imm",
+                    (VMVNv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
+                    (VMOVv2i32 DPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $imm",
+                    (VMOVv4i32 QPR:$Vd, nImmVMOVI32Neg:$imm, pred:$p)>;
+
+// 'gas' compatibility aliases for quad-word instructions. Strictly speaking,
+// these should restrict to just the Q register variants, but the register
+// classes are enough to match correctly regardless, so we keep it simple
+// and just use MnemonicAlias.
+def : NEONMnemonicAlias<"vbicq", "vbic">;
+def : NEONMnemonicAlias<"vandq", "vand">;
+def : NEONMnemonicAlias<"veorq", "veor">;
+def : NEONMnemonicAlias<"vorrq", "vorr">;
+
+def : NEONMnemonicAlias<"vmovq", "vmov">;
+def : NEONMnemonicAlias<"vmvnq", "vmvn">;
+// Explicit versions for floating point so that the FPImm variants get
+// handled early. The parser gets confused otherwise.
+def : NEONMnemonicAlias<"vmovq.f32", "vmov.f32">;
+def : NEONMnemonicAlias<"vmovq.f64", "vmov.f64">;
+
+def : NEONMnemonicAlias<"vaddq", "vadd">;
+def : NEONMnemonicAlias<"vsubq", "vsub">;
+
+def : NEONMnemonicAlias<"vminq", "vmin">;
+def : NEONMnemonicAlias<"vmaxq", "vmax">;
+
+def : NEONMnemonicAlias<"vmulq", "vmul">;
+
+def : NEONMnemonicAlias<"vabsq", "vabs">;
+
+def : NEONMnemonicAlias<"vshlq", "vshl">;
+def : NEONMnemonicAlias<"vshrq", "vshr">;
+
+def : NEONMnemonicAlias<"vcvtq", "vcvt">;
+
+def : NEONMnemonicAlias<"vcleq", "vcle">;
+def : NEONMnemonicAlias<"vceqq", "vceq">;
+
+def : NEONMnemonicAlias<"vzipq", "vzip">;
+def : NEONMnemonicAlias<"vswpq", "vswp">;
+
+def : NEONMnemonicAlias<"vrecpeq.f32", "vrecpe.f32">;
+def : NEONMnemonicAlias<"vrecpeq.u32", "vrecpe.u32">;
+
+
+// Alias for loading floating point immediates that aren't representable
+// using the vmov.f32 encoding but the bitpattern is representable using
+// the .i32 encoding.
+def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
+                     (VMOVv4i32 QPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.f32 $Vd, $imm",
+                     (VMOVv2i32 DPR:$Vd, nImmVMOVI32:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index cedb54799db0..6335229d3c2a 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1,4 +1,4 @@
-//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===//
+//===-- ARMInstrThumb.td - Thumb support for ARM -----------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -91,6 +91,12 @@ def t_imm0_508s4 : Operand<i32> {
   let ParserMatchClass = t_imm0_508s4_asmoperand;
   let OperandType = "OPERAND_IMMEDIATE";
 }
+// Alias use only, so no printer is necessary.
+def t_imm0_508s4_neg_asmoperand: AsmOperandClass { let Name = "Imm0_508s4Neg"; }
+def t_imm0_508s4_neg : Operand<i32> {
+  let ParserMatchClass = t_imm0_508s4_neg_asmoperand;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
 
 // Define Thumb specific addressing modes.
 
@@ -345,6 +351,11 @@ def tSUBspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm),
   let DecoderMethod = "DecodeThumbAddSPImm";
 }
 
+def : tInstAlias<"add${p} sp, $imm",
+                 (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
+def : tInstAlias<"add${p} sp, sp, $imm",
+                 (tSUBspi SP, t_imm0_508s4_neg:$imm, pred:$p)>;
+
 // Can optionally specify SP as a three operand instruction.
 def : tInstAlias<"add${p} sp, sp, $imm",
                  (tADDspi SP, t_imm0_508s4:$imm, pred:$p)>;
@@ -387,6 +398,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
     bits<4> Rm;
     let Inst{6-3} = Rm;
     let Inst{2-0} = 0b000;
+    let Unpredictable{2-0} = 0b111;
   }
 }
 
@@ -404,15 +416,13 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
 // prevent stack-pointer assignments that appear immediately before calls from
 // potentially appearing dead.
 let isCall = 1,
-  // On non-Darwin platforms R9 is callee-saved.
-  Defs = [R0,  R1,  R2,  R3,  R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
-  Uses = [SP] in {
+  Defs = [LR], Uses = [SP] in {
   // Also used for Thumb2
   def tBL  : TIx2<0b11110, 0b11, 1,
                   (outs), (ins pred:$p, t_bltarget:$func, variable_ops), IIC_Br,
                   "bl${p}\t$func",
                   [(ARMtcall tglobaladdr:$func)]>,
-             Requires<[IsThumb, IsNotDarwin]> {
+             Requires<[IsThumb]> {
     bits<22> func;
     let Inst{26} = func{21};
     let Inst{25-16} = func{20-11};
@@ -426,7 +436,7 @@ let isCall = 1,
                  (outs), (ins pred:$p, t_blxtarget:$func, variable_ops), IIC_Br,
                    "blx${p}\t$func",
                    [(ARMcall tglobaladdr:$func)]>,
-              Requires<[IsThumb, HasV5T, IsNotDarwin]> {
+              Requires<[IsThumb, HasV5T]> {
     bits<21> func;
     let Inst{25-16} = func{20-11};
     let Inst{13} = 1;
@@ -439,7 +449,7 @@ let isCall = 1,
   def tBLXr : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
                   "blx${p}\t$func",
                   [(ARMtcall GPR:$func)]>,
-              Requires<[IsThumb, HasV5T, IsNotDarwin]>,
+              Requires<[IsThumb, HasV5T]>,
               T1Special<{1,1,1,?}> { // A6.2.3 & A8.6.24;
     bits<4> func;
     let Inst{6-3} = func;
@@ -450,38 +460,7 @@ let isCall = 1,
   def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
                   4, IIC_Br,
                   [(ARMcall_nolink tGPR:$func)]>,
-            Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
-}
-
-let isCall = 1,
-  // On Darwin R9 is call-clobbered.
-  // R7 is marked as a use to prevent frame-pointer assignments from being
-  // moved above / below calls.
-  Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
-  Uses = [R7, SP] in {
-  // Also used for Thumb2
-  def tBLr9 : tPseudoExpand<(outs), (ins pred:$p, t_bltarget:$func, variable_ops),
-                          4, IIC_Br, [(ARMtcall tglobaladdr:$func)],
-                          (tBL pred:$p, t_bltarget:$func)>,
-              Requires<[IsThumb, IsDarwin]>;
-
-  // ARMv5T and above, also used for Thumb2
-  def tBLXi_r9 : tPseudoExpand<(outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
-                      4, IIC_Br, [(ARMcall tglobaladdr:$func)],
-                      (tBLXi pred:$p, t_blxtarget:$func)>,
-                 Requires<[IsThumb, HasV5T, IsDarwin]>;
-
-  // Also used for Thumb2
-  def tBLXr_r9 : tPseudoExpand<(outs), (ins pred:$p, GPR:$func, variable_ops),
-                    2, IIC_Br, [(ARMtcall GPR:$func)],
-                    (tBLXr pred:$p, GPR:$func)>,
-                 Requires<[IsThumb, HasV5T, IsDarwin]>;
-
-  // ARMv4T
-  def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                   4, IIC_Br,
-                   [(ARMcall_nolink tGPR:$func)]>,
-              Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
+            Requires<[IsThumb, IsThumb1Only]>;
 }
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
@@ -523,28 +502,22 @@ let isBranch = 1, isTerminator = 1 in
 
 // Tail calls
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
-  // Darwin versions.
-  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
-      Uses = [SP] in {
-    // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls
-    // on Darwin), so it's in ARMInstrThumb2.td.
+  // IOS versions.
+  let Uses = [SP] in {
     def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
                      4, IIC_Br, [],
                      (tBX GPR:$dst, (ops 14, zero_reg))>,
-                     Requires<[IsThumb, IsDarwin]>;
+                     Requires<[IsThumb]>;
   }
-  // Non-Darwin versions (the difference is R9).
-  let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
-      Uses = [SP] in {
+  // tTAILJMPd: IOS version uses a Thumb2 branch (no Thumb1 tail calls
+  // on IOS), so it's in ARMInstrThumb2.td.
+  // Non-IOS version:
+  let Uses = [SP] in {
     def tTAILJMPdND : tPseudoExpand<(outs),
                    (ins t_brtarget:$dst, pred:$p, variable_ops),
                    4, IIC_Br, [],
                    (tB t_brtarget:$dst, pred:$p)>,
-                 Requires<[IsThumb, IsNotDarwin]>;
-    def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
-                     4, IIC_Br, [],
-                     (tBX GPR:$dst, (ops 14, zero_reg))>,
-                     Requires<[IsThumb, IsNotDarwin]>;
+                 Requires<[IsThumb, IsNotIOS]>;
   }
 }
 
@@ -652,7 +625,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
 }
 
 // Load tconstpool
-// FIXME: Use ldr.n to work around a Darwin assembler bug.
+// FIXME: Use ldr.n to work around a darwin assembler bug.
 let canFoldAsLoad = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
 def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
                   "ldr", ".n\t$Rt, $addr",
@@ -666,10 +639,9 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
 }
 
 // FIXME: Remove this entry when the above ldr.n workaround is fixed.
-// For disassembly use only.
-def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
-                       "ldr", "\t$Rt, $addr",
-                       [/* disassembly only */]>,
+// For assembly/disassembly use only.
+def tLDRpciASM : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+                       "ldr", "\t$Rt, $addr", []>,
                  T1Encoding<{0,1,0,0,1,?}> {
   // A6.2 & A8.6.59
   bits<3> Rt;
@@ -1131,9 +1103,6 @@ def tRSB :                      // A8.6.141
                "rsb", "\t$Rd, $Rn, #0",
                [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
 
-def : tInstAlias<"neg${s}${p} $Rd, $Rm",
-                 (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
-
 // Subtract with carry register
 let Uses = [CPSR] in
 def tSBC :                      // A8.6.151
@@ -1259,19 +1228,24 @@ def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
 // preserve all of the callee-saved resgisters, which is exactly what we want.
 // $val is a scratch register for our use.
 let Defs = [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12, CPSR ],
-    hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
+    hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+    usesCustomInserter = 1 in
 def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
                                   AddrModeNone, 0, NoItinerary, "","",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
 
-// FIXME: Non-Darwin version(s)
+// FIXME: Non-IOS version(s)
 let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
     Defs = [ R7, LR, SP ] in
 def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
                               AddrModeNone, 0, IndexModeNone,
                               Pseudo, NoItinerary, "", "",
                               [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
-                             Requires<[IsThumb, IsDarwin]>;
+                             Requires<[IsThumb, IsIOS]>;
+
+let Defs = [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12, CPSR ],
+    isBarrier = 1 in
+def tInt_eh_sjlj_dispatchsetup : PseudoInst<(outs), (ins), NoItinerary, []>;
 
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
@@ -1309,20 +1283,14 @@ def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
 
 // Direct calls
 def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
-      Requires<[IsThumb, IsNotDarwin]>;
-def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>,
-      Requires<[IsThumb, IsDarwin]>;
+      Requires<[IsThumb]>;
 
 def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
-      Requires<[IsThumb, HasV5T, IsNotDarwin]>;
-def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>,
-      Requires<[IsThumb, HasV5T, IsDarwin]>;
+      Requires<[IsThumb, HasV5T]>;
 
 // Indirect calls to ARM routines
 def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
-      Requires<[IsThumb, HasV5T, IsNotDarwin]>;
-def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
-      Requires<[IsThumb, HasV5T, IsDarwin]>;
+      Requires<[IsThumb, HasV5T]>;
 
 // zextload i1 -> zextload i8
 def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
@@ -1434,3 +1402,16 @@ def : InstAlias<"nop", (tMOVr R8, R8, 14, 0)>,Requires<[IsThumb, IsThumb1Only]>;
 // nothing).
 def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
 def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : tInstAlias<"neg${s}${p} $Rd, $Rm",
+                 (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>;
+
+
+// Implied destination operand forms for shifts.
+def : tInstAlias<"lsl${s}${p} $Rdm, $imm",
+             (tLSLri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm0_31:$imm, pred:$p)>;
+def : tInstAlias<"lsr${s}${p} $Rdm, $imm",
+             (tLSRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
+def : tInstAlias<"asr${s}${p} $Rdm, $imm",
+             (tASRri tGPR:$Rdm, cc_out:$s, tGPR:$Rdm, imm_sr:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 05dcc8993969..e6fb9d5f01eb 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1,4 +1,4 @@
-//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//===-- ARMInstrThumb2.td - Thumb2 support for ARM ---------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -65,7 +65,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
 // t2_so_imm - Match a 32-bit immediate operand, which is an
 // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
 // immediate splatted into multiple bytes of the word.
-def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; }
+def t2_so_imm_asmoperand : ImmAsmOperand { let Name = "T2SOImm"; }
 def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
     return ARM_AM::getT2SOImmVal(Imm) != -1;
   }]> {
@@ -76,26 +76,39 @@ def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
 
 // t2_so_imm_not - Match an immediate that is a complement
 // of a t2_so_imm.
-def t2_so_imm_not : Operand<i32>,
-                    PatLeaf<(imm), [{
+// Note: this pattern doesn't require an encoder method and such, as it's
+// only used on aliases (Pat<> and InstAlias<>). The actual encoding
+// is handled by the destination instructions, which use t2_so_imm.
+def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; }
+def t2_so_imm_not : Operand<i32>, PatLeaf<(imm), [{
   return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
-}], t2_so_imm_not_XFORM>;
+}], t2_so_imm_not_XFORM> {
+  let ParserMatchClass = t2_so_imm_not_asmoperand;
+}
 
 // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
-def t2_so_imm_neg : Operand<i32>,
-                    PatLeaf<(imm), [{
-  return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
-}], t2_so_imm_neg_XFORM>;
+def t2_so_imm_neg_asmoperand : AsmOperandClass { let Name = "T2SOImmNeg"; }
+def t2_so_imm_neg : Operand<i32>, PatLeaf<(imm), [{
+  int64_t Value = -(int)N->getZExtValue();
+  return Value && ARM_AM::getT2SOImmVal(Value) != -1;
+}], t2_so_imm_neg_XFORM> {
+  let ParserMatchClass = t2_so_imm_neg_asmoperand;
+}
 
 /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
-def imm0_4095 : Operand<i32>,
-                ImmLeaf<i32, [{
+def imm0_4095_asmoperand: ImmAsmOperand { let Name = "Imm0_4095"; }
+def imm0_4095 : Operand<i32>, ImmLeaf<i32, [{
   return Imm >= 0 && Imm < 4096;
-}]>;
+}]> {
+  let ParserMatchClass = imm0_4095_asmoperand;
+}
 
-def imm0_4095_neg : PatLeaf<(i32 imm), [{
+def imm0_4095_neg_asmoperand: AsmOperandClass { let Name = "Imm0_4095Neg"; }
+def imm0_4095_neg : Operand<i32>, PatLeaf<(i32 imm), [{
  return (uint32_t)(-N->getZExtValue()) < 4096;
-}], imm_neg_XFORM>;
+}], imm_neg_XFORM> {
+  let ParserMatchClass = imm0_4095_neg_asmoperand;
+}
 
 def imm0_255_neg : PatLeaf<(i32 imm), [{
   return (uint32_t)(-N->getZExtValue()) < 255;
@@ -129,6 +142,12 @@ def t2ldrlabel : Operand<i32> {
   let PrintMethod = "printT2LdrLabelOperand";
 }
 
+def t2ldr_pcrel_imm12_asmoperand : AsmOperandClass {let Name = "MemPCRelImm12";}
+def t2ldr_pcrel_imm12 : Operand<i32> {
+  let ParserMatchClass = t2ldr_pcrel_imm12_asmoperand;
+  // used for assembler pseudo instruction and maps to t2ldrlabel, so
+  // doesn't need encoder or print methods of its own.
+}
 
 // ADR instruction labels.
 def t2adrlabel : Operand<i32> {
@@ -545,6 +564,11 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
                          PatFrag opnode, string baseOpc, bit Commutable = 0> :
     T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w"> {
+  // Assembler aliases w/ the ".w" suffix.
+  def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rd, $Rn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rd, rGPR:$Rn,
+                                                    t2_so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>;
   // Assembler aliases w/o the ".w" suffix.
   def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rd, $Rn, $Rm"),
      (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rd, rGPR:$Rn,
@@ -556,6 +580,10 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
                                                     cc_out:$s)>;
 
   // and with the optional destination operand, too.
+  def : t2InstAlias<!strconcat(opc, "${s}${p}.w", " $Rdn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+                                                    t2_so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>;
   def : t2InstAlias<!strconcat(opc, "${s}${p}", " $Rdn, $Rm"),
      (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
                                                     rGPR:$Rm, pred:$p,
@@ -608,25 +636,48 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
 ///
 /// These opcodes will be converted to the real non-S opcodes by
 /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
-                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                         PatFrag opnode, bit Commutable = 0> {
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<InstrItinClass iii, InstrItinClass iir,
+                         InstrItinClass iis, PatFrag opnode,
+                         bit Commutable = 0> {
    // shifted imm
-   def ri : T2sTwoRegImm<
-                (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
-                opc, ".w\t$Rd, $Rn, $imm",
-                [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_imm:$imm))]>;
+   def ri : t2PseudoInst<(outs rGPR:$Rd),
+                         (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
+                         4, iii,
+                         [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+                                                t2_so_imm:$imm))]>;
    // register
-   def rr : T2sThreeReg<
-                (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
-                opc, ".w\t$Rd, $Rn, $Rm",
-                [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, rGPR:$Rm))]>;
+   def rr : t2PseudoInst<(outs rGPR:$Rd), (ins GPRnopc:$Rn, rGPR:$Rm, pred:$p),
+                         4, iir,
+                         [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+                                                rGPR:$Rm))]> {
+     let isCommutable = Commutable;
+   }
    // shifted register
-   def rs : T2sTwoRegShiftedReg<
-                (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
-                opc, ".w\t$Rd, $Rn, $ShiftedRm",
-               [(set rGPR:$Rd, CPSR, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]>;
+   def rs : t2PseudoInst<(outs rGPR:$Rd),
+                         (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+                         4, iis,
+                         [(set rGPR:$Rd, CPSR, (opnode GPRnopc:$Rn,
+                                                t2_so_reg:$ShiftedRm))]>;
+}
+}
+
+/// T2I_rbin_s_is -  Same as T2I_bin_s_irs, except selection DAG
+/// operands are reversed.
+let hasPostISelHook = 1, Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<PatFrag opnode> {
+   // shifted imm
+   def ri : t2PseudoInst<(outs rGPR:$Rd),
+                         (ins GPRnopc:$Rn, t2_so_imm:$imm, pred:$p),
+                         4, IIC_iALUi,
+                         [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm,
+                                                GPRnopc:$Rn))]>;
+   // shifted register
+   def rs : t2PseudoInst<(outs rGPR:$Rd),
+                         (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p),
+                         4, IIC_iALUsi,
+                         [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm,
+                                                GPRnopc:$Rn))]>;
 }
 }
 
@@ -735,26 +786,6 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
 }
 }
 
-/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
-/// version is not needed since this is only for codegen.
-///
-/// These opcodes will be converted to the real non-S opcodes by
-/// AdjustInstrPostInstrSelection after giving then an optional CPSR operand.
-let hasPostISelHook = 1, isCodeGenOnly = 1, isPseudo = 1, Defs = [CPSR] in {
-multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
-   // shifted imm
-   def ri : T2sTwoRegImm<
-                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
-                opc, ".w\t$Rd, $Rn, $imm",
-                [(set rGPR:$Rd, CPSR, (opnode t2_so_imm:$imm, rGPR:$Rn))]>;
-   // shifted register
-   def rs : T2sTwoRegShiftedReg<
-                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
-                IIC_iALUsi, opc, "\t$Rd, $Rn, $ShiftedRm",
-              [(set rGPR:$Rd, CPSR, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]>;
-}
-}
-
 /// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
 //  rotate operation that produces a value.
 multiclass T2I_sh_ir<bits<2> opcod, string opc, Operand ty, PatFrag opnode,
@@ -930,7 +961,8 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
     let DecoderMethod = "DecodeT2LoadShift";
   }
 
-  // FIXME: Is the pci variant actually needed?
+  // pci variant is very similar to i12, but supports negative offsets
+  // from the PC.
   def pci : T2Ipc <(outs target:$Rt), (ins t2ldrlabel:$addr), iii,
                    opc, ".w\t$Rt, $addr",
                    [(set target:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
@@ -1315,14 +1347,16 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
                    rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
 
 // Store doubleword
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
 def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
                        (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
                IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
 
 // Indexed stores
+
+let mayStore = 1, neverHasSideEffects = 1 in {
 def t2STR_PRE  : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb),
-                            (ins rGPR:$Rt, t2addrmode_imm8:$addr),
+                            (ins GPRnopc:$Rt, t2addrmode_imm8:$addr),
                             AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
                             "str", "\t$Rt, $addr!",
                             "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
@@ -1343,15 +1377,16 @@ def t2STRB_PRE  : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb),
                         "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> {
   let AsmMatchConverter = "cvtStWriteBackRegT2AddrModeImm8";
 }
+} // mayStore = 1, neverHasSideEffects = 1
 
 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb),
-                            (ins rGPR:$Rt, addr_offset_none:$Rn,
+                            (ins GPRnopc:$Rt, addr_offset_none:$Rn,
                                  t2am_imm8_offset:$offset),
                             AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
                           "str", "\t$Rt, $Rn$offset",
                           "$Rn = $Rn_wb,@earlyclobber $Rn_wb",
              [(set GPRnopc:$Rn_wb,
-                  (post_store rGPR:$Rt, addr_offset_none:$Rn,
+                  (post_store GPRnopc:$Rt, addr_offset_none:$Rn,
                               t2am_imm8_offset:$offset))]>;
 
 def t2STRH_POST : T2Ipostldst<0, 0b01, 0, 0, (outs GPRnopc:$Rn_wb),
@@ -1398,7 +1433,6 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
             (pre_truncsti16 rGPR:$Rt, GPRnopc:$Rn, t2am_imm8_offset:$offset))]>;
 }
 
-
 // STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
 // only.
 // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
@@ -1455,7 +1489,7 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
                  "$addr.base = $wb", []>;
 
 // T2Ipl (Preload Data/Instruction) signals the memory system of possible future
-// data/instruction access.  These are for disassembly only.
+// data/instruction access.
 // instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
 // (prefetch 1) -> (preload 2),  (prefetch 2) -> (preload 1).
 multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
@@ -1513,6 +1547,10 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
 
     let DecoderMethod = "DecodeT2LoadShift";
   }
+  // FIXME: We should have a separate 'pci' variant here. As-is we represent
+  // it via the i12 variant, which it's related to, but that means we can
+  // represent negative immediates, which aren't legal for anything except
+  // the 'pci' case (Rn == 15).
 }
 
 defm t2PLD  : T2Ipl<0, 0, "pld">,  Requires<[IsThumb2]>;
@@ -1689,6 +1727,8 @@ def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr,
   let Inst{14-12} = 0b000;
   let Inst{7-4} = 0b0000;
 }
+def : t2InstAlias<"mov${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
+                                                pred:$p, zero_reg)>;
 def : t2InstAlias<"movs${p}.w $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
                                                  pred:$p, CPSR)>;
 def : t2InstAlias<"movs${p} $Rd, $Rm", (t2MOVr GPRnopc:$Rd, GPR:$Rm,
@@ -1837,11 +1877,9 @@ defm t2SUB  : T2I_bin_ii12rs<0b101, "sub",
 // FIXME: Eliminate t2ADDS/t2SUBS pseudo opcodes after adding tablegen
 // support for an optional CPSR definition that corresponds to the DAG
 // node's second value. We can then eliminate the implicit def of CPSR.
-defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
-                             IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+defm t2ADDS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
                              BinOpFrag<(ARMaddc node:$LHS, node:$RHS)>, 1>;
-defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
-                             IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+defm t2SUBS : T2I_bin_s_irs <IIC_iALUi, IIC_iALUr, IIC_iALUsi,
                              BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
 
 let hasPostISelHook = 1 in {
@@ -1857,8 +1895,7 @@ defm t2RSB  : T2I_rbin_irs  <0b1110, "rsb",
 
 // FIXME: Eliminate them if we can write def : Pat patterns which defines
 // CPSR and the implicit def of CPSR is not needed.
-defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
-                             BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_s_is <BinOpFrag<(ARMsubc node:$LHS, node:$RHS)>>;
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
 // The assume-no-carry-in form uses the negation of the input since add/sub
@@ -2840,6 +2877,8 @@ defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
+
+let isCommutable = 1 in
 def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
                             (ins rGPR:$false, rGPR:$Rm, pred:$p),
                             4, IIC_iCMOVr,
@@ -2883,7 +2922,7 @@ def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
 
 let isMoveImm = 1 in
 def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
-                   IIC_iCMOVi, "mvn", ".w\t$Rd, $imm",
+                   IIC_iCMOVi, "mvn", "\t$Rd, $imm",
 [/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
                    imm:$cc, CCR:$ccr))*/]>,
                    RegConstraint<"$false = $Rd"> {
@@ -2922,6 +2961,35 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
                              IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
                  RegConstraint<"$false = $Rd">;
 } // isCodeGenOnly = 1
+
+multiclass T2I_bincc_irs<Instruction iri, Instruction irr, Instruction irs,
+                   InstrItinClass iii, InstrItinClass iir, InstrItinClass iis> {
+   // shifted imm
+   def ri : t2PseudoExpand<(outs rGPR:$Rd),
+                           (ins rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s),
+                           4, iii, [],
+                  (iri rGPR:$Rd, rGPR:$Rn, t2_so_imm:$imm, pred:$p, cc_out:$s)>,
+                           RegConstraint<"$Rn = $Rd">;
+   // register
+   def rr : t2PseudoExpand<(outs rGPR:$Rd),
+                           (ins rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s),
+                           4, iir, [],
+                        (irr rGPR:$Rd, rGPR:$Rn, rGPR:$Rm, pred:$p, cc_out:$s)>,
+                           RegConstraint<"$Rn = $Rd">;
+   // shifted register
+   def rs : t2PseudoExpand<(outs rGPR:$Rd),
+                       (ins rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s),
+                           4, iis, [],
+            (irs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>,
+                           RegConstraint<"$Rn = $Rd">;
+} // T2I_bincc_irs
+
+defm t2ANDCC : T2I_bincc_irs<t2ANDri, t2ANDrr, t2ANDrs,
+                             IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2ORRCC : T2I_bincc_irs<t2ORRri, t2ORRrr, t2ORRrs,
+                             IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
+defm t2EORCC : T2I_bincc_irs<t2EORri, t2EORrr, t2EORrs,
+                             IIC_iBITi, IIC_iBITr, IIC_iBITsi>;
 } // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
@@ -3043,9 +3111,7 @@ def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt,
   let Inst{11-8}  = Rd;
   let Inst{7-0} = addr{7-0};
 }
-}
-
-let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
+let hasExtraSrcRegAllocReq = 1 in
 def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
                          (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr),
                          AddrModeNone, 4, NoItinerary,
@@ -3054,6 +3120,7 @@ def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
   bits<4> Rt2;
   let Inst{11-8} = Rt2;
 }
+}
 
 def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
             Requires<[IsThumb2, HasV7]>  {
@@ -3081,8 +3148,9 @@ def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "", []>,
 //   $val is a scratch register for our use.
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR,
-    QQQQ0, QQQQ1, QQQQ2, QQQQ3 ],
-  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+    Q0, Q1, Q2, Q3, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
+  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+  usesCustomInserter = 1 in {
   def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
                                AddrModeNone, 0, NoItinerary, "", "",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
@@ -3091,7 +3159,8 @@ let Defs =
 
 let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR ],
-  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
+  usesCustomInserter = 1 in {
   def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
                                AddrModeNone, 0, NoItinerary, "", "",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
@@ -3128,6 +3197,7 @@ def t2B   : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
   let Inst{13} = target{17};
   let Inst{21-16} = target{16-11};
   let Inst{10-0} = target{10-0};
+  let DecoderMethod = "DecodeT2BInstruction";
 }
 
 let isNotDuplicable = 1, isIndirectBranch = 1 in {
@@ -3195,19 +3265,32 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
   let DecoderMethod = "DecodeThumb2BCCInstruction";
 }
 
-// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so
+// Tail calls. The IOS version of thumb tail calls uses a t2 branch, so
 // it goes here.
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
-  // Darwin version.
-  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
-      Uses = [SP] in
+  // IOS version.
+  let Uses = [SP] in
   def tTAILJMPd: tPseudoExpand<(outs),
                    (ins uncondbrtarget:$dst, pred:$p, variable_ops),
                    4, IIC_Br, [],
                    (t2B uncondbrtarget:$dst, pred:$p)>,
-                 Requires<[IsThumb2, IsDarwin]>;
+                 Requires<[IsThumb2, IsIOS]>;
 }
 
+let isCall = 1, Defs = [LR], Uses = [SP] in {
+  // mov lr, pc; b if callee is marked noreturn to avoid confusing the
+  // return stack predictor.
+  def t2BMOVPCB_CALL : tPseudoInst<(outs),
+                                   (ins t_bltarget:$func, variable_ops),
+                               6, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>,
+                        Requires<[IsThumb]>;
+}
+
+// Direct calls
+def : T2Pat<(ARMcall_nolink texternalsym:$func),
+            (t2BMOVPCB_CALL texternalsym:$func)>,
+      Requires<[IsThumb]>;
+
 // IT block
 let Defs = [ITSTATE] in
 def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
@@ -3430,7 +3513,7 @@ def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
                                            imm:$cp))]>,
                Requires<[IsThumb2]>;
 
-// Pseudo isntruction that combines movs + predicated rsbmi 
+// Pseudo isntruction that combines movs + predicated rsbmi
 // to implement integer ABS
 let usesCustomInserter = 1, Defs = [CPSR] in {
 def t2ABS : PseudoInst<(outs rGPR:$dst), (ins rGPR:$src),
@@ -3667,20 +3750,32 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
                 c_imm:$CRm, imm0_7:$opc2),
            [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                          imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr $cop, $opc1, $Rt, $CRn, $CRm",
+                  (t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                         c_imm:$CRm, 0)>;
 def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
              (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
                           c_imm:$CRm, imm0_7:$opc2),
              [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                             imm:$CRm, imm:$opc2)]>;
+def : t2InstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
+                  (t2MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                          c_imm:$CRm, 0)>;
 
 /* from coprocessor to ARM core register */
 def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
              (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
                                   c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc $cop, $opc1, $Rt, $CRn, $CRm",
+                  (t2MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+                         c_imm:$CRm, 0)>;
 
 def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
              (outs GPR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
                                   c_imm:$CRm, imm0_7:$opc2), []>;
+def : t2InstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
+                  (t2MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
+                          c_imm:$CRm, 0)>;
 
 def : T2v6Pat<(int_arm_mrc  imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
               (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
@@ -3851,6 +3946,29 @@ def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm",
 def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm",
                   (t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
                            pred:$p, cc_out:$s)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+      (t2ADDri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rdn, $imm",
+           (t2ADDri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $Rm",
+            (t2ADDrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm",
+                  (t2ADDrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
+                           pred:$p, cc_out:$s)>;
+
+// add w/ negative immediates is just a sub.
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+        (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm, pred:$p,
+                 cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
+           (t2SUBri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095_neg:$imm, pred:$p)>;
+def : t2InstAlias<"add${s}${p} $Rdn, $imm",
+      (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm_neg:$imm, pred:$p,
+               cc_out:$s)>;
+def : t2InstAlias<"add${p} $Rdn, $imm",
+           (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095_neg:$imm, pred:$p)>;
+
 
 // Aliases for SUB without the ".w" optional width specifier.
 def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm",
@@ -3862,6 +3980,18 @@ def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm",
 def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm",
                   (t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm,
                            pred:$p, cc_out:$s)>;
+// ... and with the destination and source register combined.
+def : t2InstAlias<"sub${s}${p} $Rdn, $imm",
+      (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${p} $Rdn, $imm",
+           (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>;
+def : t2InstAlias<"sub${s}${p}.w $Rdn, $Rm",
+            (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rdn, $Rm",
+            (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm",
+                  (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm,
+                           pred:$p, cc_out:$s)>;
 
 // Alias for compares without the ".w" optional width specifier.
 def : t2InstAlias<"cmn${p} $Rn, $Rm",
@@ -3900,7 +4030,20 @@ def : t2InstAlias<"ldrsb${p} $Rt, $addr",
 def : t2InstAlias<"ldrsh${p} $Rt, $addr",
                   (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
 
-// Alias for MVN without the ".w" optional width specifier.
+def : t2InstAlias<"ldr${p} $Rt, $addr",
+                  (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p} $Rt, $addr",
+                  (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p} $Rt, $addr",
+                  (t2LDRHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p} $Rt, $addr",
+                  (t2LDRSBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p} $Rt, $addr",
+                  (t2LDRSHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
+
+// Alias for MVN with(out) the ".w" optional width specifier.
+def : t2InstAlias<"mvn${s}${p}.w $Rd, $imm",
+           (t2MVNi rGPR:$Rd, t2_so_imm:$imm, pred:$p, cc_out:$s)>;
 def : t2InstAlias<"mvn${s}${p} $Rd, $Rm",
            (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>;
 def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm",
@@ -3921,6 +4064,30 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>;
 def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
 def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>;
 
+// STMIA/STMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"stm${p} $Rn, $regs",
+                  (t2STMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stm${p} $Rn!, $regs",
+                  (t2STMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMIA/LDMIA_UPD aliases w/o the optional .w suffix
+def : t2InstAlias<"ldm${p} $Rn, $regs",
+                  (t2LDMIA GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldm${p} $Rn!, $regs",
+                  (t2LDMIA_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// STMDB/STMDB_UPD aliases w/ the optional .w suffix
+def : t2InstAlias<"stmdb${p}.w $Rn, $regs",
+                  (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"stmdb${p}.w $Rn!, $regs",
+                  (t2STMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
+// LDMDB/LDMDB_UPD aliases w/ the optional .w suffix
+def : t2InstAlias<"ldmdb${p}.w $Rn, $regs",
+                  (t2LDMDB GPR:$Rn, pred:$p, reglist:$regs)>;
+def : t2InstAlias<"ldmdb${p}.w $Rn!, $regs",
+                  (t2LDMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>;
+
 // Alias for REV/REV16/REVSH without the ".w" optional width specifier.
 def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>;
 def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>;
@@ -4016,3 +4183,87 @@ def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot",
                   (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
 def : t2InstAlias<"sxth${p} $Rd, $Rm$rot",
                   (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>;
+
+
+// "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like
+// for isel.
+def : t2InstAlias<"mov${p} $Rd, $imm",
+                  (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+def : t2InstAlias<"mvn${p} $Rd, $imm",
+                  (t2MOVi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>;
+// Same for AND <--> BIC
+def : t2InstAlias<"bic${s}${p} $Rd, $Rn, $imm",
+                  (t2ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+                           pred:$p, cc_out:$s)>;
+def : t2InstAlias<"bic${s}${p} $Rdn, $imm",
+                  (t2ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+                           pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rd, $Rn, $imm",
+                  (t2BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm,
+                           pred:$p, cc_out:$s)>;
+def : t2InstAlias<"and${s}${p} $Rdn, $imm",
+                  (t2BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm,
+                           pred:$p, cc_out:$s)>;
+// Likewise, "add Rd, t2_so_imm_neg" -> sub
+def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
+                  (t2SUBri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm_neg:$imm,
+                           pred:$p, cc_out:$s)>;
+def : t2InstAlias<"add${s}${p} $Rd, $imm",
+                  (t2SUBri GPRnopc:$Rd, GPRnopc:$Rd, t2_so_imm_neg:$imm,
+                           pred:$p, cc_out:$s)>;
+// Same for CMP <--> CMN via t2_so_imm_neg
+def : t2InstAlias<"cmp${p} $Rd, $imm",
+                  (t2CMNzri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+def : t2InstAlias<"cmn${p} $Rd, $imm",
+                  (t2CMPri rGPR:$Rd, t2_so_imm_neg:$imm, pred:$p)>;
+
+
+// Wide 'mul' encoding can be specified with only two operands.
+def : t2InstAlias<"mul${p} $Rn, $Rm",
+                  (t2MUL rGPR:$Rn, rGPR:$Rm, rGPR:$Rn, pred:$p)>;
+
+// "neg" is and alias for "rsb rd, rn, #0"
+def : t2InstAlias<"neg${s}${p} $Rd, $Rm",
+                  (t2RSBri rGPR:$Rd, rGPR:$Rm, 0, pred:$p, cc_out:$s)>;
+
+// MOV so_reg assembler pseudos. InstAlias isn't expressive enough for
+// these, unfortunately.
+def t2MOVsi: t2AsmPseudo<"mov${p} $Rd, $shift",
+                         (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+def t2MOVSsi: t2AsmPseudo<"movs${p} $Rd, $shift",
+                          (ins rGPR:$Rd, t2_so_reg:$shift, pred:$p)>;
+
+def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift",
+                         (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift",
+                          (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>;
+
+// ADR w/o the .w suffix
+def : t2InstAlias<"adr${p} $Rd, $addr",
+                  (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>;
+
+// LDR(literal) w/ alternate [pc, #imm] syntax.
+def t2LDRpcrel   : t2AsmPseudo<"ldr${p} $Rt, $addr",
+                         (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRBpcrel  : t2AsmPseudo<"ldrb${p} $Rt, $addr",
+                         (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRHpcrel  : t2AsmPseudo<"ldrh${p} $Rt, $addr",
+                         (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRSBpcrel  : t2AsmPseudo<"ldrsb${p} $Rt, $addr",
+                         (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def t2LDRSHpcrel  : t2AsmPseudo<"ldrsh${p} $Rt, $addr",
+                         (ins GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+    // Version w/ the .w suffix.
+def : t2InstAlias<"ldr${p}.w $Rt, $addr",
+                  (t2LDRpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrb${p}.w $Rt, $addr",
+                  (t2LDRBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrh${p}.w $Rt, $addr",
+                  (t2LDRHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsb${p}.w $Rt, $addr",
+                  (t2LDRSBpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+def : t2InstAlias<"ldrsh${p}.w $Rt, $addr",
+                  (t2LDRSHpcrel GPRnopc:$Rt, t2ldr_pcrel_imm12:$addr, pred:$p)>;
+
+def : t2InstAlias<"add${p} $Rd, pc, $imm",
+                  (t2ADR rGPR:$Rd, imm0_4095:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e746cf20d032..3600b889d6f1 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -1,4 +1,4 @@
-//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
+//===-- ARMInstrVFP.td - VFP support for ARM ---------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -61,6 +61,22 @@ def vfp_f64imm : Operand<f64>,
   let ParserMatchClass = FPImmOperand;
 }
 
+// The VCVT to/from fixed-point instructions encode the 'fbits' operand
+// (the number of fixed bits) differently than it appears in the assembly
+// source. It's encoded as "Size - fbits" where Size is the size of the
+// fixed-point representation (32 or 16) and fbits is the value appearing
+// in the assembly source, an integer in [0,16] or (0,32], depending on size.
+def fbits32_asm_operand : AsmOperandClass { let Name = "FBits32"; }
+def fbits32 : Operand<i32> {
+  let PrintMethod = "printFBits32";
+  let ParserMatchClass = fbits32_asm_operand;
+}
+
+def fbits16_asm_operand : AsmOperandClass { let Name = "FBits16"; }
+def fbits16 : Operand<i32> {
+  let PrintMethod = "printFBits16";
+  let ParserMatchClass = fbits16_asm_operand;
+}
 
 //===----------------------------------------------------------------------===//
 //  Load / store Instructions.
@@ -69,11 +85,11 @@ def vfp_f64imm : Operand<f64>,
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
 
 def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
-                 IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+                 IIC_fpLoad64, "vldr", "\t$Dd, $addr",
                  [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
 
 def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
-                 IIC_fpLoad32, "vldr", ".32\t$Sd, $addr",
+                 IIC_fpLoad32, "vldr", "\t$Sd, $addr",
                  [(set SPR:$Sd, (load addrmode5:$addr))]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
@@ -83,11 +99,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
 } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
 
 def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
-                 IIC_fpStore64, "vstr", ".64\t$Dd, $addr",
+                 IIC_fpStore64, "vstr", "\t$Dd, $addr",
                  [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
 
 def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
-                 IIC_fpStore32, "vstr", ".32\t$Sd, $addr",
+                 IIC_fpStore32, "vstr", "\t$Sd, $addr",
                  [(store SPR:$Sd, addrmode5:$addr)]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
@@ -190,6 +206,14 @@ def : InstAlias<"vpop${p} $r",  (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
                 Requires<[HasVFP2]>;
 def : InstAlias<"vpop${p} $r",  (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
                 Requires<[HasVFP2]>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+                         (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpush${p}", "$r",
+                         (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+                         (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>;
+defm : VFPDTAnyInstAlias<"vpop${p}", "$r",
+                         (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>;
 
 // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
 
@@ -270,7 +294,7 @@ def : Pat<(fmul (fneg SPR:$a), SPR:$b),
           (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
 
 // These are encoded as unary instructions.
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
 def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
                   (outs), (ins DPR:$Dd, DPR:$Dm),
                   IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
@@ -299,7 +323,7 @@ def VCMPS  : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
 
 //===----------------------------------------------------------------------===//
 // FP Unary Operations.
@@ -319,7 +343,7 @@ def VABSS  : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
   let D = VFPNeonA8Domain;
 }
 
-let Defs = [FPSCR] in {
+let Defs = [FPSCR_NZCV] in {
 def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
                    (outs), (ins DPR:$Dd),
                    IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
@@ -360,7 +384,7 @@ def VCMPZS  : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
-} // Defs = [FPSCR]
+} // Defs = [FPSCR_NZCV]
 
 def VCVTDS  : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
                    (outs DPR:$Dd), (ins SPR:$Sm),
@@ -790,127 +814,131 @@ def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
 //   S32 (U=0, sx=1) -> SL
 //   U32 (U=1, sx=1) -> UL
 
-// FIXME: Marking these as codegen only seems wrong. They are real
-//        instructions(?)
-let Constraints = "$a = $dst", isCodeGenOnly = 1 in {
+let Constraints = "$a = $dst" in {
 
 // FP to Fixed-Point:
 
-def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
-                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
-                 IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]> {
+// Single Precision register
+class AVConv1XInsS_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, 
+                dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+                list<dag> pattern>
+  : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+  bits<5> dst;
+  // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+  let Inst{22} = dst{0};
+  let Inst{15-12} = dst{4-1};
+}
+
+// Double Precision register
+class AVConv1XInsD_Encode<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5, 
+                dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+                list<dag> pattern>
+  : AVConv1XI<op1, op2, op3, op4, op5, oops, iops, itin, opc, asm, pattern> {
+  bits<5> dst;
+  // if dp_operation then UInt(D:Vd) else UInt(Vd:D);
+  let Inst{22} = dst{4};
+  let Inst{15-12} = dst{3-0};
+}
+
+def VTOSHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
 
-def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
-                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
-                 IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]> {
+def VTOUHS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
 
-def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
-                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
-                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]> {
+def VTOSLS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1110, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
 
-def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
-                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
-                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]> {
+def VTOULS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1111, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
 
-def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
-                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
-                 IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+def VTOSHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits", []>;
 
-def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
-                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
-                 IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+def VTOUHD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits", []>;
 
-def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
-                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
-                 IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+def VTOSLD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1110, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits", []>;
 
-def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
-                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
-                 IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+def VTOULD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1111, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits", []>;
 
 // Fixed-Point to FP:
 
-def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
-                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
-                 IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]> {
+def VSHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
 
-def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
-                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
-                 IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]> {
+def VUHTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, fbits16:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
 
-def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
-                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
-                 IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]> {
+def VSLTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1010, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
 
-def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
-                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
-                 IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]> {
+def VULTOS : AVConv1XInsS_Encode<0b11101, 0b11, 0b1011, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, fbits32:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", []> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
 }
 
-def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
-                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
-                 IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+def VSHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits", []>;
 
-def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
-                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
-                 IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+def VUHTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, fbits16:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits", []>;
 
-def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
-                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
-                 IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+def VSLTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1010, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits", []>;
 
-def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
-                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
-                 IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
-                 [/* For disassembly only; pattern left blank */]>;
+def VULTOD : AVConv1XInsD_Encode<0b11101, 0b11, 0b1011, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, fbits32:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits", []>;
 
-} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
+} // End of 'let Constraints = "$a = $dst" in'
 
 //===----------------------------------------------------------------------===//
 // FP Multiply-Accumulate Operations.
@@ -922,7 +950,7 @@ def VMLAD : ADbI<0b11100, 0b00, 0, 0,
                  [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
                                           (f64 DPR:$Ddin)))]>,
               RegConstraint<"$Ddin = $Dd">,
-              Requires<[HasVFP2,UseFPVMLx]>;
+              Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
 
 def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
                   (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -930,7 +958,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
                   [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
                                            SPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
-              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
@@ -938,10 +966,10 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
 
 def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
           (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP2,UseFPVMLx]>;
+          Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
 def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
           (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+          Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>;
 
 def VMLSD : ADbI<0b11100, 0b00, 1, 0,
                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -949,7 +977,7 @@ def VMLSD : ADbI<0b11100, 0b00, 1, 0,
                  [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
                                           (f64 DPR:$Ddin)))]>,
               RegConstraint<"$Ddin = $Dd">,
-              Requires<[HasVFP2,UseFPVMLx]>;
+              Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
 
 def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
                   (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -957,7 +985,7 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
                   [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
                                            SPR:$Sdin))]>,
               RegConstraint<"$Sdin = $Sd">,
-              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
@@ -965,10 +993,10 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
 
 def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
           (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP2,UseFPVMLx]>;
+          Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
 def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
           (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
 
 def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
                   (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -976,7 +1004,7 @@ def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
                   [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
                                           (f64 DPR:$Ddin)))]>,
                 RegConstraint<"$Ddin = $Dd">,
-                Requires<[HasVFP2,UseFPVMLx]>;
+                Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
 
 def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
                   (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
@@ -984,7 +1012,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
                   [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
                                            SPR:$Sdin))]>,
                 RegConstraint<"$Sdin = $Sd">,
-                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
@@ -992,10 +1020,10 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
 
 def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
           (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP2,UseFPVMLx]>;
+          Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
 def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
           (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
 
 def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
                   (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -1003,14 +1031,14 @@ def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
                   [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
                                            (f64 DPR:$Ddin)))]>,
                RegConstraint<"$Ddin = $Dd">,
-               Requires<[HasVFP2,UseFPVMLx]>;
+               Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
 
 def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
                   (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
                   IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
              [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
                          RegConstraint<"$Sdin = $Sd">,
-                  Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]> {
   // Some single precision VFP instructions may be executed on both NEON and
   // VFP pipelines on A8.
   let D = VFPNeonA8Domain;
@@ -1018,11 +1046,172 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
 
 def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
           (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
-          Requires<[HasVFP2,UseFPVMLx]>;
+          Requires<[HasVFP2,UseFPVMLx,DontUseFusedMAC]>;
 def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
           (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
-          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
+//===----------------------------------------------------------------------===//
+// Fused FP Multiply-Accumulate Operations.
+//
+def VFMAD : ADbI<0b11101, 0b10, 0, 0,
+                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                 IIC_fpFMAC64, "vfma", ".f64\t$Dd, $Dn, $Dm",
+                 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+                                          (f64 DPR:$Ddin)))]>,
+              RegConstraint<"$Ddin = $Dd">,
+              Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMAS : ASbIn<0b11101, 0b10, 0, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpFMAC32, "vfma", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+                                           SPR:$Sdin))]>,
+              RegConstraint<"$Sdin = $Sd">,
+              Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines.
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+          (VFMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+          (VFMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+def : Pat<(f64 (fma DPR:$Ddin, DPR:$Dn, DPR:$Dm)),
+          (VFMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+      Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sdin, SPR:$Sn, SPR:$Sm)),
+          (VFMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+      Requires<[HasVFP4]>;
+
+def VFMSD : ADbI<0b11101, 0b10, 1, 0,
+                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                 IIC_fpFMAC64, "vfms", ".f64\t$Dd, $Dn, $Dm",
+                 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+                                          (f64 DPR:$Ddin)))]>,
+              RegConstraint<"$Ddin = $Dd">,
+              Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFMSS : ASbIn<0b11101, 0b10, 1, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpFMAC32, "vfms", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+                                           SPR:$Sdin))]>,
+              RegConstraint<"$Sdin = $Sd">,
+              Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+          (VFMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+          (VFMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fma (fneg x), y, z) -> (vfms x, y, z)
+def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm)),
+          (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+      Requires<[HasVFP4]>;
+def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm)),
+          (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+      Requires<[HasVFP4]>;
+// (fneg (fma x, (fneg y), z) -> (vfms x, y, z)
+def : Pat<(fneg (f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm))),
+          (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+      Requires<[HasVFP4]>;
+def : Pat<(fneg (f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm))),
+          (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+      Requires<[HasVFP4]>;
+
+def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
+                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                  IIC_fpFMAC64, "vfnma", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+                                          (f64 DPR:$Ddin)))]>,
+                RegConstraint<"$Ddin = $Dd">,
+                Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFNMAS : ASbI<0b11101, 0b01, 1, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpFMAC32, "vfnma", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+                                           SPR:$Sdin))]>,
+                RegConstraint<"$Sdin = $Sd">,
+                Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines.
+}
 
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+          (VFNMAD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+          (VFNMAS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fneg (fma x, y, z)) -> (vfnma x, y, z)
+def : Pat<(fneg (fma (f64 DPR:$Ddin), (f64 DPR:$Dn), (f64 DPR:$Dm))),
+          (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+      Requires<[HasVFP4]>;
+def : Pat<(fneg (fma (f32 SPR:$Sdin), (f32 SPR:$Sn), (f32 SPR:$Sm))),
+          (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+      Requires<[HasVFP4]>;
+// (fma (fneg x), y, (fneg z)) -> (vfnma x, y, z)
+def : Pat<(f64 (fma (fneg DPR:$Ddin), DPR:$Dn, (fneg DPR:$Dm))),
+          (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+      Requires<[HasVFP4]>;
+def : Pat<(f32 (fma (fneg SPR:$Sdin), SPR:$Sn, (fneg SPR:$Sm))),
+          (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+      Requires<[HasVFP4]>;
+
+def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                  IIC_fpFMAC64, "vfnms", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+                                           (f64 DPR:$Ddin)))]>,
+               RegConstraint<"$Ddin = $Dd">,
+               Requires<[HasVFP4,UseFusedMAC]>;
+
+def VFNMSS : ASbI<0b11101, 0b01, 0, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpFMAC32, "vfnms", ".f32\t$Sd, $Sn, $Sm",
+             [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+                         RegConstraint<"$Sdin = $Sd">,
+                  Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines.
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+          (VFNMSD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP4,UseFusedMAC]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+          (VFNMSS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP4,DontUseNEONForFP,UseFusedMAC]>;
+
+// Match @llvm.fma.* intrinsics
+// (fneg (fma (fneg x), y, z)) -> (vnfms x, y, z)
+def : Pat<(fneg (f64 (fma (fneg DPR:$Ddin), DPR:$Dn, DPR:$Dm))),
+          (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+      Requires<[HasVFP4]>;
+def : Pat<(fneg (f32 (fma (fneg SPR:$Sdin), SPR:$Sn, SPR:$Sm))),
+          (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+      Requires<[HasVFP4]>;
+// (fma x, (fneg y), z) -> (vnfms x, y, z)
+def : Pat<(f64 (fma DPR:$Ddin, (fneg DPR:$Dn), DPR:$Dm)),
+          (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
+      Requires<[HasVFP4]>;
+def : Pat<(f32 (fma SPR:$Sdin, (fneg SPR:$Sn), SPR:$Sm)),
+          (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
+      Requires<[HasVFP4]>;
 
 //===----------------------------------------------------------------------===//
 // FP Conditional moves.
@@ -1063,9 +1252,9 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
 
 // APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
 // to APSR.
-let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in
 def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
-                        "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+                        "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>;
 
 // Application level FPSCR -> GPR
 let hasSideEffects = 1, Uses = [FPSCR] in
@@ -1079,6 +1268,10 @@ let Uses = [FPSCR] in {
                               "vmrs", "\t$Rt, fpexc", []>;
   def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
                               "vmrs", "\t$Rt, fpsid", []>;
+  def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins),
+                              "vmrs", "\t$Rt, mvfr0", []>;
+  def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins),
+                              "vmrs", "\t$Rt, mvfr1", []>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1160,6 +1353,115 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
 //===----------------------------------------------------------------------===//
 // Assembler aliases.
 //
+// A few mnemnoic aliases for pre-unifixed syntax. We don't guarantee to
+// support them all, but supporting at least some of the basics is
+// good to be friendly.
+def : VFP2MnemonicAlias<"flds", "vldr">;
+def : VFP2MnemonicAlias<"fldd", "vldr">;
+def : VFP2MnemonicAlias<"fmrs", "vmov">;
+def : VFP2MnemonicAlias<"fmsr", "vmov">;
+def : VFP2MnemonicAlias<"fsqrts", "vsqrt">;
+def : VFP2MnemonicAlias<"fsqrtd", "vsqrt">;
+def : VFP2MnemonicAlias<"fadds", "vadd.f32">;
+def : VFP2MnemonicAlias<"faddd", "vadd.f64">;
+def : VFP2MnemonicAlias<"fmrdd", "vmov">;
+def : VFP2MnemonicAlias<"fmrds", "vmov">;
+def : VFP2MnemonicAlias<"fmrrd", "vmov">;
+def : VFP2MnemonicAlias<"fmdrr", "vmov">;
+def : VFP2MnemonicAlias<"fmuls", "vmul.f32">;
+def : VFP2MnemonicAlias<"fmuld", "vmul.f64">;
+def : VFP2MnemonicAlias<"fnegs", "vneg.f32">;
+def : VFP2MnemonicAlias<"fnegd", "vneg.f64">;
+def : VFP2MnemonicAlias<"ftosizd", "vcvt.s32.f64">;
+def : VFP2MnemonicAlias<"ftosid", "vcvtr.s32.f64">;
+def : VFP2MnemonicAlias<"ftosizs", "vcvt.s32.f32">;
+def : VFP2MnemonicAlias<"ftosis", "vcvtr.s32.f32">;
+def : VFP2MnemonicAlias<"ftouizd", "vcvt.u32.f64">;
+def : VFP2MnemonicAlias<"ftouid", "vcvtr.u32.f64">;
+def : VFP2MnemonicAlias<"ftouizs", "vcvt.u32.f32">;
+def : VFP2MnemonicAlias<"ftouis", "vcvtr.u32.f32">;
+def : VFP2MnemonicAlias<"fsitod", "vcvt.f64.s32">;
+def : VFP2MnemonicAlias<"fsitos", "vcvt.f32.s32">;
+def : VFP2MnemonicAlias<"fuitod", "vcvt.f64.u32">;
+def : VFP2MnemonicAlias<"fuitos", "vcvt.f32.u32">;
+def : VFP2MnemonicAlias<"fsts", "vstr">;
+def : VFP2MnemonicAlias<"fstd", "vstr">;
+def : VFP2MnemonicAlias<"fmacd", "vmla.f64">;
+def : VFP2MnemonicAlias<"fmacs", "vmla.f32">;
+def : VFP2MnemonicAlias<"fcpys", "vmov.f32">;
+def : VFP2MnemonicAlias<"fcpyd", "vmov.f64">;
+def : VFP2MnemonicAlias<"fcmps", "vcmp.f32">;
+def : VFP2MnemonicAlias<"fcmpd", "vcmp.f64">;
+def : VFP2MnemonicAlias<"fdivs", "vdiv.f32">;
+def : VFP2MnemonicAlias<"fdivd", "vdiv.f64">;
+def : VFP2MnemonicAlias<"fmrx", "vmrs">;
+def : VFP2MnemonicAlias<"fmxr", "vmsr">;
+
+// Be friendly and accept the old form of zero-compare
+def : VFP2InstAlias<"fcmpzd${p} $val", (VCMPZD DPR:$val, pred:$p)>;
+def : VFP2InstAlias<"fcmpzs${p} $val", (VCMPZS SPR:$val, pred:$p)>;
 
-def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
 
+def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>;
+def : VFP2InstAlias<"fadds${p} $Sd, $Sn, $Sm",
+                    (VADDS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"faddd${p} $Dd, $Dn, $Dm",
+                    (VADDD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"fsubs${p} $Sd, $Sn, $Sm",
+                    (VSUBS SPR:$Sd, SPR:$Sn, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"fsubd${p} $Dd, $Dn, $Dm",
+                    (VSUBD DPR:$Dd, DPR:$Dn, DPR:$Dm, pred:$p)>;
+
+// No need for the size suffix on VSQRT. It's implied by the register classes.
+def : VFP2InstAlias<"vsqrt${p} $Sd, $Sm", (VSQRTS SPR:$Sd, SPR:$Sm, pred:$p)>;
+def : VFP2InstAlias<"vsqrt${p} $Dd, $Dm", (VSQRTD DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+// VLDR/VSTR accept an optional type suffix.
+def : VFP2InstAlias<"vldr${p}.32 $Sd, $addr",
+                    (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.32 $Sd, $addr",
+                    (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vldr${p}.64 $Dd, $addr",
+                    (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+def : VFP2InstAlias<"vstr${p}.64 $Dd, $addr",
+                    (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>;
+
+// VMUL has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vmul${p}.f64 $Dn, $Dm",
+                    (VMULD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vmul${p}.f32 $Sn, $Sm",
+                    (VMULS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+// VADD has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vadd${p}.f64 $Dn, $Dm",
+                    (VADDD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vadd${p}.f32 $Sn, $Sm",
+                    (VADDS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+// VSUB has a two-operand form (implied destination operand)
+def : VFP2InstAlias<"vsub${p}.f64 $Dn, $Dm",
+                    (VSUBD DPR:$Dn, DPR:$Dn, DPR:$Dm, pred:$p)>;
+def : VFP2InstAlias<"vsub${p}.f32 $Sn, $Sm",
+                    (VSUBS SPR:$Sn, SPR:$Sn, SPR:$Sm, pred:$p)>;
+
+// VMOV can accept optional 32-bit or less data type suffix suffix.
+def : VFP2InstAlias<"vmov${p}.8 $Rt, $Sn",
+                    (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.16 $Rt, $Sn",
+                    (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.32 $Rt, $Sn",
+                    (VMOVRS GPR:$Rt, SPR:$Sn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.8 $Sn, $Rt",
+                    (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.16 $Sn, $Rt",
+                    (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.32 $Sn, $Rt",
+                    (VMOVSR SPR:$Sn, GPR:$Rt, pred:$p)>;
+
+def : VFP2InstAlias<"vmov${p}.f64 $Rt, $Rt2, $Dn",
+                    (VMOVRRD GPR:$Rt, GPR:$Rt2, DPR:$Dn, pred:$p)>;
+def : VFP2InstAlias<"vmov${p}.f64 $Dn, $Rt, $Rt2",
+                    (VMOVDRR DPR:$Dn, GPR:$Rt, GPR:$Rt2, pred:$p)>;
+
+// VMOVS doesn't need the .f32 to disambiguate from the NEON encoding the way
+// VMOVD does.
+def : VFP2InstAlias<"vmov${p} $Sd, $Sm",
+                    (VMOVS SPR:$Sd, SPR:$Sm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 45b7e48d0cfb..98930ccdb194 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -13,7 +13,7 @@
 
 #define DEBUG_TYPE "jit"
 #include "ARMJITInfo.h"
-#include "ARMInstrInfo.h"
+#include "ARM.h"
 #include "ARMConstantPoolValue.h"
 #include "ARMRelocations.h"
 #include "ARMSubtarget.h"
@@ -61,7 +61,7 @@ extern "C" {
     // concerned, so we can't just preserve the callee saved regs.
     "stmdb sp!, {r0, r1, r2, r3, lr}\n"
 #if (defined(__VFP_FP__) && !defined(__SOFTFP__))
-    "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+    "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
 #endif
     // The LR contains the address of the stub function on entry.
     // pass it as the argument to the C part of the callback
@@ -85,7 +85,7 @@ extern "C" {
     //
 #if (defined(__VFP_FP__) && !defined(__SOFTFP__))
     // Restore VFP caller-saved registers.
-    "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+    "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
 #endif
     //
     //      We need to exchange the values in slots 0 and 1 so we can
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index 2f9792813d32..792818442724 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -1,4 +1,4 @@
-//===- ARMJITInfo.h - ARM implementation of the JIT interface  --*- C++ -*-===//
+//===-- ARMJITInfo.h - ARM implementation of the JIT interface  -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index faa8ba76845e..9ef2ace29cff 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -1,4 +1,4 @@
-//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,8 +15,8 @@
 #define DEBUG_TYPE "arm-ldst-opt"
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
 #include "ARMMachineFunctionInfo.h"
-#include "ARMRegisterInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -32,6 +32,8 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -62,6 +64,7 @@ namespace {
 
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const ARMSubtarget *STI;
     ARMFunctionInfo *AFI;
     RegScavenger *RS;
     bool isThumb2;
@@ -90,7 +93,9 @@ namespace {
     bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                   int Offset, unsigned Base, bool BaseKill, int Opcode,
                   ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
-                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+                  DebugLoc dl,
+                  ArrayRef<std::pair<unsigned, bool> > Regs,
+                  ArrayRef<unsigned> ImpDefs);
     void MergeOpsUpdate(MachineBasicBlock &MBB,
                         MemOpQueue &MemOps,
                         unsigned memOpsBegin,
@@ -141,7 +146,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     case ARM_AM::db: return ARM::LDMDB;
     case ARM_AM::ib: return ARM::LDMIB;
     }
-    break;
   case ARM::STRi12:
     ++NumSTMGened;
     switch (Mode) {
@@ -151,7 +155,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     case ARM_AM::db: return ARM::STMDB;
     case ARM_AM::ib: return ARM::STMIB;
     }
-    break;
   case ARM::t2LDRi8:
   case ARM::t2LDRi12:
     ++NumLDMGened;
@@ -160,7 +163,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     case ARM_AM::ia: return ARM::t2LDMIA;
     case ARM_AM::db: return ARM::t2LDMDB;
     }
-    break;
   case ARM::t2STRi8:
   case ARM::t2STRi12:
     ++NumSTMGened;
@@ -169,7 +171,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     case ARM_AM::ia: return ARM::t2STMIA;
     case ARM_AM::db: return ARM::t2STMDB;
     }
-    break;
   case ARM::VLDRS:
     ++NumVLDMGened;
     switch (Mode) {
@@ -177,7 +178,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     case ARM_AM::ia: return ARM::VLDMSIA;
     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
     }
-    break;
   case ARM::VSTRS:
     ++NumVSTMGened;
     switch (Mode) {
@@ -185,7 +185,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     case ARM_AM::ia: return ARM::VSTMSIA;
     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
     }
-    break;
   case ARM::VLDRD:
     ++NumVLDMGened;
     switch (Mode) {
@@ -193,7 +192,6 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     case ARM_AM::ia: return ARM::VLDMDIA;
     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
     }
-    break;
   case ARM::VSTRD:
     ++NumVSTMGened;
     switch (Mode) {
@@ -201,10 +199,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
     case ARM_AM::ia: return ARM::VSTMDIA;
     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
     }
-    break;
   }
-
-  return 0;
 }
 
 namespace llvm {
@@ -259,8 +254,6 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
   case ARM::STMIB_UPD:
     return ARM_AM::ib;
   }
-
-  return ARM_AM::bad_am_submode;
 }
 
   } // end namespace ARM_AM
@@ -291,7 +284,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
                           int Offset, unsigned Base, bool BaseKill,
                           int Opcode, ARMCC::CondCodes Pred,
                           unsigned PredReg, unsigned Scratch, DebugLoc dl,
-                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
+                          ArrayRef<std::pair<unsigned, bool> > Regs,
+                          ArrayRef<unsigned> ImpDefs) {
   // Only a single register to load / store. Don't bother.
   unsigned NumRegs = Regs.size();
   if (NumRegs <= 1)
@@ -359,6 +353,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
                      | getKillRegState(Regs[i].second));
 
+  // Add implicit defs for super-registers.
+  for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
+    MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
+
   return true;
 }
 
@@ -393,19 +391,29 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
   }
 
   SmallVector<std::pair<unsigned, bool>, 8> Regs;
+  SmallVector<unsigned, 8> ImpDefs;
   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
     unsigned Reg = memOps[i].Reg;
     // If we are inserting the merged operation after an operation that
     // uses the same register, make sure to transfer any kill flag.
     bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
     Regs.push_back(std::make_pair(Reg, isKill));
+
+    // Collect any implicit defs of super-registers. They must be preserved.
+    for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
+      if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
+        continue;
+      unsigned DefReg = MO->getReg();
+      if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
+        ImpDefs.push_back(DefReg);
+    }
   }
 
   // Try to do the merge.
   MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
   ++Loc;
   if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
-                Pred, PredReg, Scratch, dl, Regs))
+                Pred, PredReg, Scratch, dl, Regs, ImpDefs))
     return;
 
   // Merge succeeded, update records.
@@ -506,50 +514,84 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
   return;
 }
 
-static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
-                                       unsigned Bytes, unsigned Limit,
-                                       ARMCC::CondCodes Pred, unsigned PredReg){
+static bool definesCPSR(MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
+      // If the instruction has live CPSR def, then it's not safe to fold it
+      // into load / store.
+      return true;
+  }
+
+  return false;
+}
+
+static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+                                unsigned Bytes, unsigned Limit,
+                                ARMCC::CondCodes Pred, unsigned PredReg) {
   unsigned MyPredReg = 0;
   if (!MI)
     return false;
-  if (MI->getOpcode() != ARM::t2SUBri &&
-      MI->getOpcode() != ARM::tSUBspi &&
-      MI->getOpcode() != ARM::SUBri)
-    return false;
+
+  bool CheckCPSRDef = false;
+  switch (MI->getOpcode()) {
+  default: return false;
+  case ARM::t2SUBri:
+  case ARM::SUBri:
+    CheckCPSRDef = true;
+  // fallthrough
+  case ARM::tSUBspi:
+    break;
+  }
 
   // Make sure the offset fits in 8 bits.
   if (Bytes == 0 || (Limit && Bytes >= Limit))
     return false;
 
   unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
-  return (MI->getOperand(0).getReg() == Base &&
-          MI->getOperand(1).getReg() == Base &&
-          (MI->getOperand(2).getImm()*Scale) == Bytes &&
-          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
-          MyPredReg == PredReg);
+  if (!(MI->getOperand(0).getReg() == Base &&
+        MI->getOperand(1).getReg() == Base &&
+        (MI->getOperand(2).getImm()*Scale) == Bytes &&
+        getInstrPredicate(MI, MyPredReg) == Pred &&
+        MyPredReg == PredReg))
+    return false;
+
+  return CheckCPSRDef ? !definesCPSR(MI) : true;
 }
 
-static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
-                                       unsigned Bytes, unsigned Limit,
-                                       ARMCC::CondCodes Pred, unsigned PredReg){
+static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+                                unsigned Bytes, unsigned Limit,
+                                ARMCC::CondCodes Pred, unsigned PredReg) {
   unsigned MyPredReg = 0;
   if (!MI)
     return false;
-  if (MI->getOpcode() != ARM::t2ADDri &&
-      MI->getOpcode() != ARM::tADDspi &&
-      MI->getOpcode() != ARM::ADDri)
-    return false;
+
+  bool CheckCPSRDef = false;
+  switch (MI->getOpcode()) {
+  default: return false;
+  case ARM::t2ADDri:
+  case ARM::ADDri:
+    CheckCPSRDef = true;
+  // fallthrough
+  case ARM::tADDspi:
+    break;
+  }
 
   if (Bytes == 0 || (Limit && Bytes >= Limit))
     // Make sure the offset fits in 8 bits.
     return false;
 
   unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
-  return (MI->getOperand(0).getReg() == Base &&
-          MI->getOperand(1).getReg() == Base &&
-          (MI->getOperand(2).getImm()*Scale) == Bytes &&
-          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
-          MyPredReg == PredReg);
+  if (!(MI->getOperand(0).getReg() == Base &&
+        MI->getOperand(1).getReg() == Base &&
+        (MI->getOperand(2).getImm()*Scale) == Bytes &&
+        getInstrPredicate(MI, MyPredReg) == Pred &&
+        MyPredReg == PredReg))
+    return false;
+
+  return CheckCPSRDef ? !definesCPSR(MI) : true;
 }
 
 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
@@ -603,7 +645,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
     case ARM_AM::da: return ARM::LDMDA_UPD;
     case ARM_AM::db: return ARM::LDMDB_UPD;
     }
-    break;
   case ARM::STMIA:
   case ARM::STMDA:
   case ARM::STMDB:
@@ -615,7 +656,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
     case ARM_AM::da: return ARM::STMDA_UPD;
     case ARM_AM::db: return ARM::STMDB_UPD;
     }
-    break;
   case ARM::t2LDMIA:
   case ARM::t2LDMDB:
     switch (Mode) {
@@ -623,7 +663,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
     case ARM_AM::db: return ARM::t2LDMDB_UPD;
     }
-    break;
   case ARM::t2STMIA:
   case ARM::t2STMDB:
     switch (Mode) {
@@ -631,38 +670,31 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
     case ARM_AM::ia: return ARM::t2STMIA_UPD;
     case ARM_AM::db: return ARM::t2STMDB_UPD;
     }
-    break;
   case ARM::VLDMSIA:
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
     case ARM_AM::db: return ARM::VLDMSDB_UPD;
     }
-    break;
   case ARM::VLDMDIA:
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
     case ARM_AM::db: return ARM::VLDMDDB_UPD;
     }
-    break;
   case ARM::VSTMSIA:
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
     case ARM_AM::db: return ARM::VSTMSDB_UPD;
     }
-    break;
   case ARM::VSTMDIA:
     switch (Mode) {
     default: llvm_unreachable("Unhandled submode!");
     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
     case ARM_AM::db: return ARM::VSTMDDB_UPD;
     }
-    break;
   }
-
-  return 0;
 }
 
 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
@@ -686,7 +718,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
   bool BaseKill = MI->getOperand(0).isKill();
   unsigned Bytes = getLSMultipleTransferSize(MI);
   unsigned PredReg = 0;
-  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   int Opcode = MI->getOpcode();
   DebugLoc dl = MI->getDebugLoc();
 
@@ -783,7 +815,6 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
     return ARM::t2STR_PRE;
   default: llvm_unreachable("Unhandled opcode!");
   }
-  return 0;
 }
 
 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
@@ -809,7 +840,6 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
     return ARM::t2STR_POST;
   default: llvm_unreachable("Unhandled opcode!");
   }
-  return 0;
 }
 
 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
@@ -841,7 +871,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
     return false;
 
   unsigned PredReg = 0;
-  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   bool DoMerge = false;
   ARM_AM::AddrOpc AddSub = ARM_AM::add;
   unsigned NewOpc = 0;
@@ -1071,11 +1101,17 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
   unsigned Opcode = MI->getOpcode();
   if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+    const MachineOperand &BaseOp = MI->getOperand(2);
+    unsigned BaseReg = BaseOp.getReg();
     unsigned EvenReg = MI->getOperand(0).getReg();
     unsigned OddReg  = MI->getOperand(1).getReg();
     unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
     unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
-    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+    // ARM errata 602117: LDRD with base in list may result in incorrect base
+    // register when interrupted or faulted.
+    bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
+    if (!Errata602117 &&
+        ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
       return false;
 
     MachineBasicBlock::iterator NewBBI = MBBI;
@@ -1087,15 +1123,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
     bool OddDeadKill  = isLd ?
       MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
     bool OddUndef = MI->getOperand(1).isUndef();
-    const MachineOperand &BaseOp = MI->getOperand(2);
-    unsigned BaseReg = BaseOp.getReg();
     bool BaseKill = BaseOp.isKill();
     bool BaseUndef = BaseOp.isUndef();
     bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
     bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
     int OffImm = getMemoryOpOffset(MI);
     unsigned PredReg = 0;
-    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+    ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
 
     if (OddRegNum > EvenRegNum && OffImm == 0) {
       // Ascending register numbers and no offset. It's safe to change it to a
@@ -1126,6 +1160,11 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
       unsigned NewOpc = (isLd)
         ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
         : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+      // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
+      // so adjust and use t2LDRi12 here for that.
+      unsigned NewOpc2 = (isLd)
+        ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+        : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
       DebugLoc dl = MBBI->getDebugLoc();
       // If this is a load and base register is killed, it may have been
       // re-defed by the load, make sure the first load does not clobber it.
@@ -1133,11 +1172,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
           (BaseKill || OffKill) &&
           (TRI->regsOverlap(EvenReg, BaseReg))) {
         assert(!TRI->regsOverlap(OddReg, BaseReg));
-        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
                       OddReg, OddDeadKill, false,
                       BaseReg, false, BaseUndef, false, OffUndef,
                       Pred, PredReg, TII, isT2);
         NewBBI = llvm::prior(MBBI);
+        if (isT2 && NewOpc == ARM::t2LDRi8 && OffImm+4 >= 0)
+          NewOpc = ARM::t2LDRi12;
         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
                       EvenReg, EvenDeadKill, false,
                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
@@ -1150,12 +1191,16 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
           EvenDeadKill = false;
           OddDeadKill = true;
         }
+        // Never kill the base register in the first instruction.
+        // <rdar://problem/11101911>
+        if (EvenReg == BaseReg)
+          EvenDeadKill = false;
         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
                       EvenReg, EvenDeadKill, EvenUndef,
                       BaseReg, false, BaseUndef, false, OffUndef,
                       Pred, PredReg, TII, isT2);
         NewBBI = llvm::prior(MBBI);
-        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
                       OddReg, OddDeadKill, OddUndef,
                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
                       Pred, PredReg, TII, isT2);
@@ -1206,7 +1251,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       bool isKill = MO.isDef() ? false : MO.isKill();
       unsigned Base = MBBI->getOperand(1).getReg();
       unsigned PredReg = 0;
-      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
+      ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
       int Offset = getMemoryOpOffset(MBBI);
       // Watch out for:
       // r4 := ldr [r5]
@@ -1380,6 +1425,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   AFI = Fn.getInfo<ARMFunctionInfo>();
   TII = TM.getInstrInfo();
   TRI = TM.getRegisterInfo();
+  STI = &TM.getSubtarget<ARMSubtarget>();
   RS = new RegScavenger();
   isThumb2 = AFI->isThumb2Function();
 
@@ -1464,19 +1510,18 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
   while (++I != E) {
     if (I->isDebugValue() || MemOps.count(&*I))
       continue;
-    const MCInstrDesc &MCID = I->getDesc();
-    if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
+    if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
       return false;
-    if (isLd && MCID.mayStore())
+    if (isLd && I->mayStore())
       return false;
     if (!isLd) {
-      if (MCID.mayLoad())
+      if (I->mayLoad())
         return false;
       // It's not safe to move the first 'str' down.
       // str r1, [r0]
       // strh r5, [r0]
       // str r4, [r0, #+4]
-      if (MCID.mayStore())
+      if (I->mayStore())
         return false;
     }
     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
@@ -1498,6 +1543,23 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
   return AddedRegPressure.size() <= MemRegs.size() * 2;
 }
 
+
+/// Copy Op0 and Op1 operands into a new array assigned to MI.
+static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
+                                   MachineInstr *Op1) {
+  assert(MI->memoperands_empty() && "expected a new machineinstr");
+  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
+    + (Op1->memoperands_end() - Op1->memoperands_begin());
+
+  MachineFunction *MF = MI->getParent()->getParent();
+  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
+  MachineSDNode::mmo_iterator MemEnd =
+    std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
+  MemEnd =
+    std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
+  MI->setMemRefs(MemBegin, MemEnd);
+}
+
 bool
 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
                                           DebugLoc &dl,
@@ -1565,7 +1627,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
   if (EvenReg == OddReg)
     return false;
   BaseReg = Op0->getOperand(1).getReg();
-  Pred = llvm::getInstrPredicate(Op0, PredReg);
+  Pred = getInstrPredicate(Op0, PredReg);
   dl = Op0->getDebugLoc();
   return true;
 }
@@ -1615,8 +1677,9 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
         LastOp = Op;
       }
 
-      unsigned Opcode = Op->getOpcode();
-      if (LastOpcode && Opcode != LastOpcode)
+      unsigned LSMOpcode
+        = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
+      if (LastOpcode && LSMOpcode != LastOpcode)
         break;
 
       int Offset = getMemoryOpOffset(Op);
@@ -1627,7 +1690,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
       }
       LastOffset = Offset;
       LastBytes = Bytes;
-      LastOpcode = Opcode;
+      LastOpcode = LSMOpcode;
       if (++NumMove == 8) // FIXME: Tune this limit.
         break;
     }
@@ -1692,6 +1755,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
             if (!isT2)
               MIB.addReg(0);
             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+            concatenateMemOperands(MIB, Op0, Op1);
+            DEBUG(dbgs() << "Formed " << *MIB << "\n");
             ++NumLDRDFormed;
           } else {
             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
@@ -1704,6 +1769,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
             if (!isT2)
               MIB.addReg(0);
             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+            concatenateMemOperands(MIB, Op0, Op1);
+            DEBUG(dbgs() << "Formed " << *MIB << "\n");
             ++NumSTRDFormed;
           }
           MBB->erase(Op0);
@@ -1745,8 +1812,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
   while (MBBI != E) {
     for (; MBBI != E; ++MBBI) {
       MachineInstr *MI = MBBI;
-      const MCInstrDesc &MCID = MI->getDesc();
-      if (MCID.isCall() || MCID.isTerminator()) {
+      if (MI->isCall() || MI->isTerminator()) {
         // Stop at barriers.
         ++MBBI;
         break;
@@ -1758,7 +1824,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
       if (!isMemoryOp(MI))
         continue;
       unsigned PredReg = 0;
-      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
+      if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
         continue;
 
       int Opc = MI->getOpcode();
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index daa126def401..e2ac9a466ed8 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -31,8 +31,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
     Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
                                    OutContext);
     switch (MO.getTargetFlags()) {
-    default:
-      assert(0 && "Unknown target flag on symbol operand");
+    default: llvm_unreachable("Unknown target flag on symbol operand");
     case 0:
       break;
     case ARMII::MO_LO16:
@@ -67,9 +66,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
 bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
                                  MCOperand &MCOp) {
   switch (MO.getType()) {
-  default:
-    assert(0 && "unknown operand type");
-    return false;
+  default: llvm_unreachable("unknown operand type");
   case MachineOperand::MO_Register:
     // Ignore all non-CPSR implicit register operands.
     if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
@@ -107,6 +104,9 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
     MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
     break;
   }
+  case MachineOperand::MO_RegisterMask:
+    // Ignore call clobbers.
+    return false;
   }
   return true;
 }
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..af445e2f35aa
--- /dev/null
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void ARMFunctionInfo::anchor() { }
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 138f0c262271..f1c8fc84816e 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -25,6 +25,7 @@ namespace llvm {
 /// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
 /// contains private ARM-specific information for each MachineFunction.
 class ARMFunctionInfo : public MachineFunctionInfo {
+  virtual void anchor();
 
   /// isThumb - True if this function is compiled under Thumb mode.
   /// Used to initialized Align, so must precede it.
@@ -63,6 +64,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// GPR callee-saved (2) : r8, r10, r11
   /// --------------------------------------------
   /// DPR callee-saved : d8 - d15
+  ///
+  /// Also see AlignedDPRCSRegs below. Not all D-regs need to go in area 3.
+  /// Some may be spilled after the stack has been realigned.
   unsigned GPRCS1Offset;
   unsigned GPRCS2Offset;
   unsigned DPRCSOffset;
@@ -79,6 +83,15 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   BitVector GPRCS2Frames;
   BitVector DPRCSFrames;
 
+  /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in
+  /// the aligned portion of the stack frame.  This is always a contiguous
+  /// sequence of D-registers starting from d8.
+  ///
+  /// We do not keep track of the frame indices used for these registers - they
+  /// behave like any other frame index in the aligned stack frame.  These
+  /// registers also aren't included in DPRCSSize above.
+  unsigned NumAlignedDPRCS2Regs;
+
   /// JumpTableUId - Unique id for jumptables.
   ///
   unsigned JumpTableUId;
@@ -104,6 +117,7 @@ public:
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
     GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+    NumAlignedDPRCS2Regs(0),
     JumpTableUId(0), PICLabelUId(0),
     VarArgsFrameIndex(0), HasITBlocks(false) {}
 
@@ -137,6 +151,9 @@ public:
   unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
   void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
 
+  unsigned getNumAlignedDPRCS2Regs() const { return NumAlignedDPRCS2Regs; }
+  void setNumAlignedDPRCS2Regs(unsigned n) { NumAlignedDPRCS2Regs = n; }
+
   unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
   unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
   unsigned getDPRCalleeSavedAreaOffset()  const { return DPRCSOffset; }
diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h
index 18e162000602..efa22fbed9f7 100644
--- a/lib/Target/ARM/ARMPerfectShuffle.h
+++ b/lib/Target/ARM/ARMPerfectShuffle.h
@@ -1,4 +1,4 @@
-//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===//
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 1cba1ba591ef..6f3819afd0a5 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//===-- ARMRegisterInfo.cpp - ARM Register Information --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,11 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMRegisterInfo.h"
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
-#include "ARMRegisterInfo.h"
 using namespace llvm;
 
+void ARMRegisterInfo::anchor() { }
+
 ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
                                  const ARMSubtarget &sti)
   : ARMBaseRegisterInfo(tii, sti) {
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 8edfb9a2057f..8a248425c33c 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//===-- ARMRegisterInfo.h - ARM Register Information Impl -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,15 +15,15 @@
 #define ARMREGISTERINFO_H
 
 #include "ARM.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "ARMBaseRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
   class ARMSubtarget;
   class ARMBaseInstrInfo;
-  class Type;
 
 struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+  virtual void anchor();
 public:
   ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
 };
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 036822d18ad2..1466e983f3be 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
+//===-- ARMRegisterInfo.td - ARM Register defs -------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,6 +16,8 @@ class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
   field bits<4> Num;
   let Namespace = "ARM";
   let SubRegs = subregs;
+  // All bits of ARM registers with sub-registers are covered by sub-registers.
+  let CoveredBySubRegs = 1;
 }
 
 class ARMFReg<bits<6> num, string n> : Register<n> {
@@ -25,28 +27,30 @@ class ARMFReg<bits<6> num, string n> : Register<n> {
 
 // Subregister indices.
 let Namespace = "ARM" in {
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+
 // Note: Code depends on these having consecutive numbers.
-def ssub_0  : SubRegIndex;
-def ssub_1  : SubRegIndex;
-def ssub_2  : SubRegIndex; // In a Q reg.
-def ssub_3  : SubRegIndex;
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex<[qqsub_1, qsub_0]>;
+def qsub_3 : SubRegIndex<[qqsub_1, qsub_1]>;
 
 def dsub_0 : SubRegIndex;
 def dsub_1 : SubRegIndex;
-def dsub_2 : SubRegIndex;
-def dsub_3 : SubRegIndex;
-def dsub_4 : SubRegIndex;
-def dsub_5 : SubRegIndex;
-def dsub_6 : SubRegIndex;
-def dsub_7 : SubRegIndex;
+def dsub_2 : SubRegIndex<[qsub_1, dsub_0]>;
+def dsub_3 : SubRegIndex<[qsub_1, dsub_1]>;
+def dsub_4 : SubRegIndex<[qsub_2, dsub_0]>;
+def dsub_5 : SubRegIndex<[qsub_2, dsub_1]>;
+def dsub_6 : SubRegIndex<[qsub_3, dsub_0]>;
+def dsub_7 : SubRegIndex<[qsub_3, dsub_1]>;
 
-def qsub_0 : SubRegIndex;
-def qsub_1 : SubRegIndex;
-def qsub_2 : SubRegIndex;
-def qsub_3 : SubRegIndex;
-
-def qqsub_0 : SubRegIndex;
-def qqsub_1 : SubRegIndex;
+def ssub_0  : SubRegIndex;
+def ssub_1  : SubRegIndex;
+def ssub_2  : SubRegIndex<[dsub_1, ssub_0]>;
+def ssub_3  : SubRegIndex<[dsub_1, ssub_1]>;
+// Let TableGen synthesize the remaining 12 ssub_* indices.
+// We don't need to name them.
 }
 
 // Integer registers
@@ -127,9 +131,7 @@ def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>;
 def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>;
 
 // Advanced SIMD (NEON) defines 16 quad-word aliases
-let SubRegIndices = [dsub_0, dsub_1],
- CompositeIndices = [(ssub_2 dsub_1, ssub_0),
-                     (ssub_3 dsub_1, ssub_1)] in {
+let SubRegIndices = [dsub_0, dsub_1] in {
 def Q0  : ARMReg< 0,  "q0", [D0,   D1]>;
 def Q1  : ARMReg< 1,  "q1", [D2,   D3]>;
 def Q2  : ARMReg< 2,  "q2", [D4,   D5]>;
@@ -150,45 +152,22 @@ def Q14 : ARMReg<14, "q14", [D28, D29]>;
 def Q15 : ARMReg<15, "q15", [D30, D31]>;
 }
 
-// Pseudo 256-bit registers to represent pairs of Q registers. These should
-// never be present in the emitted code.
-// These are used for NEON load / store instructions, e.g., vld4, vst3.
-// NOTE: It's possible to define more QQ registers since technically the
-// starting D register number doesn't have to be multiple of 4, e.g.,
-// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
-// stuff very messy.
-let SubRegIndices = [qsub_0, qsub_1],
- CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
-def QQ0 : ARMReg<0, "qq0", [Q0,  Q1]>;
-def QQ1 : ARMReg<1, "qq1", [Q2,  Q3]>;
-def QQ2 : ARMReg<2, "qq2", [Q4,  Q5]>;
-def QQ3 : ARMReg<3, "qq3", [Q6,  Q7]>;
-def QQ4 : ARMReg<4, "qq4", [Q8,  Q9]>;
-def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
-def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
-def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
-}
-
-// Pseudo 512-bit registers to represent four consecutive Q registers.
-let SubRegIndices = [qqsub_0, qqsub_1],
- CompositeIndices = [(qsub_2  qqsub_1, qsub_0), (qsub_3  qqsub_1, qsub_1),
-                     (dsub_4  qqsub_1, dsub_0), (dsub_5  qqsub_1, dsub_1),
-                     (dsub_6  qqsub_1, dsub_2), (dsub_7  qqsub_1, dsub_3)] in {
-def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
-def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
-def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
-def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
-}
-
 // Current Program Status Register.
-def CPSR    : ARMReg<0, "cpsr">;
-def APSR    : ARMReg<1, "apsr">;
-def SPSR    : ARMReg<2, "spsr">;
-def FPSCR   : ARMReg<3, "fpscr">;
-def ITSTATE : ARMReg<4, "itstate">;
+// We model fpscr with two registers: FPSCR models the control bits and will be
+// reserved. FPSCR_NZCV models the flag bits and will be unreserved. 
+def CPSR       : ARMReg<0, "cpsr">;
+def APSR       : ARMReg<1, "apsr">;
+def SPSR       : ARMReg<2, "spsr">;
+def FPSCR      : ARMReg<3, "fpscr">;
+def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
+  let Aliases = [FPSCR];
+}
+def ITSTATE    : ARMReg<4, "itstate">;
 
 // Special Registers - only available in privileged mode.
 def FPSID   : ARMReg<0, "fpsid">;
+def MVFR1   : ARMReg<6, "mvfr1">;
+def MVFR0   : ARMReg<7, "mvfr0">;
 def FPEXC   : ARMReg<8, "fpexc">;
 
 // Register classes.
@@ -261,6 +240,12 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
   }];
 }
 
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+  let isAllocatable = 0;
+}
+
 // Scalar single precision floating point register class..
 def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
 
@@ -316,37 +301,100 @@ def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
                        (DPR_8 dsub_0, dsub_1)];
 }
 
+// Pseudo-registers representing odd-even pairs of D registers. The even-odd
+// pairs are already represented by the Q registers.
+// These are needed by NEON instructions requiring two consecutive D registers.
+// There is no D31_D0 register as that is always an UNPREDICTABLE encoding.
+def TuplesOE2D : RegisterTuples<[dsub_0, dsub_1],
+                                [(decimate (shl DPR, 1), 2),
+                                 (decimate (shl DPR, 2), 2)]>;
+
+// Register class representing a pair of consecutive D registers.
+// Use the Q registers for the even-odd pairs.
+def DPair : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                          128, (interleave QPR, TuplesOE2D)> {
+  // Allocate starting at non-VFP2 registers D16-D31 first.
+  // Prefer even-odd pairs as they are easier to copy.
+  let AltOrders = [(add (rotl QPR, 8), (rotl DPair, 16))];
+  let AltOrderSelect = [{ return 1; }];
+}
+
+// Pseudo-registers representing 3 consecutive D registers.
+def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
+                              [(shl DPR, 0),
+                               (shl DPR, 1),
+                               (shl DPR, 2)]>;
+
+// 3 consecutive D registers.
+def DTriple : RegisterClass<"ARM", [untyped], 64, (add Tuples3D)> {
+  let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], [(shl QPR, 0), (shl QPR, 1)]>;
+
 // Pseudo 256-bit vector register class to model pairs of Q registers
 // (4 consecutive D registers).
-def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
+def QQPR : RegisterClass<"ARM", [v4i64], 256, (add Tuples2Q)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
                        (QPR qsub_0, qsub_1)];
   // Allocate non-VFP2 aliases first.
-  let AltOrders = [(rotl QQPR, 4)];
+  let AltOrders = [(rotl QQPR, 8)];
   let AltOrderSelect = [{ return 1; }];
 }
 
-// Subset of QQPR that have 32-bit SPR subregs.
-def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> {
-  let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
-                       (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
-                       (QPR_VFP2 qsub_0, qsub_1)];
+// Tuples of 4 D regs that isn't also a pair of Q regs.
+def TuplesOE4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
+                                [(decimate (shl DPR, 1), 2),
+                                 (decimate (shl DPR, 2), 2),
+                                 (decimate (shl DPR, 3), 2),
+                                 (decimate (shl DPR, 4), 2)]>;
 
-}
+// 4 consecutive D registers.
+def DQuad : RegisterClass<"ARM", [v4i64], 256,
+                          (interleave Tuples2Q, TuplesOE4D)>;
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+def Tuples2QQ : RegisterTuples<[qqsub_0, qqsub_1],
+                               [(shl QQPR, 0), (shl QQPR, 2)]>;
 
 // Pseudo 512-bit vector register class to model 4 consecutive Q registers
 // (8 consecutive D registers).
-def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
+def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (add Tuples2QQ)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
                             dsub_4, dsub_5, dsub_6, dsub_7),
                        (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
   // Allocate non-VFP2 aliases first.
-  let AltOrders = [(rotl QQQQPR, 2)];
+  let AltOrders = [(rotl QQQQPR, 8)];
   let AltOrderSelect = [{ return 1; }];
 }
 
-// Condition code registers.
-def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
-  let CopyCost = -1;  // Don't allow copying of status registers.
-  let isAllocatable = 0;
+
+// Pseudo-registers representing 2-spaced consecutive D registers.
+def Tuples2DSpc : RegisterTuples<[dsub_0, dsub_2],
+                                 [(shl DPR, 0),
+                                  (shl DPR, 2)]>;
+
+// Spaced pairs of D registers.
+def DPairSpc : RegisterClass<"ARM", [v2i64], 64, (add Tuples2DSpc)>;
+
+def Tuples3DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4],
+                                 [(shl DPR, 0),
+                                  (shl DPR, 2),
+                                  (shl DPR, 4)]>;
+
+// Spaced triples of D registers.
+def DTripleSpc : RegisterClass<"ARM", [untyped], 64, (add Tuples3DSpc)> {
+  let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
 }
+
+def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6],
+                                 [(shl DPR, 0),
+                                  (shl DPR, 2),
+                                  (shl DPR, 4),
+                                  (shl DPR, 6)]>;
+
+// Spaced quads of D registers.
+def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
index 86e7206f2cc6..21877fd9af37 100644
--- a/lib/Target/ARM/ARMRelocations.h
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -1,4 +1,4 @@
-//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index 958c5c647013..45486fd0b6dd 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -1,10 +1,10 @@
-//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
-// 
+//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -118,6 +118,8 @@ def IIC_fpMUL32    : InstrItinClass;
 def IIC_fpMUL64    : InstrItinClass;
 def IIC_fpMAC32    : InstrItinClass;
 def IIC_fpMAC64    : InstrItinClass;
+def IIC_fpFMAC32   : InstrItinClass;
+def IIC_fpFMAC64   : InstrItinClass;
 def IIC_fpDIV32    : InstrItinClass;
 def IIC_fpDIV64    : InstrItinClass;
 def IIC_fpSQRT32   : InstrItinClass;
@@ -208,6 +210,8 @@ def IIC_VPERMQ     : InstrItinClass;
 def IIC_VPERMQ3    : InstrItinClass;
 def IIC_VMACD      : InstrItinClass;
 def IIC_VMACQ      : InstrItinClass;
+def IIC_VFMACD     : InstrItinClass;
+def IIC_VFMACQ     : InstrItinClass;
 def IIC_VRECSD     : InstrItinClass;
 def IIC_VRECSQ     : InstrItinClass;
 def IIC_VCNTiD     : InstrItinClass;
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index 8d86c01dc741..8b1fb9386ad5 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -324,6 +324,15 @@ def CortexA8Itineraries : ProcessorItineraries<
                                InstrStage<19, [A8_NPipe], 0>,
                                InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
   //
+  // Single-precision Fused FP MAC
+  InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+  //
+  // Double-precision Fused FP MAC
+  InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<19, [A8_NPipe], 0>,
+                               InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+  //
   // Single-precision FP DIV
   InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<20, [A8_NPipe], 0>,
@@ -860,6 +869,16 @@ def CortexA8Itineraries : ProcessorItineraries<
   InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
   //
+  // Double-register Fused FP Multiple-Accumulate
+  InstrItinData<IIC_VFMACD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+  //
+  // Quad-register Fused FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+  //
   // Double-register Reciprical Step
   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
                                InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 49fedf63f8bc..0d710cc1acee 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -604,6 +604,22 @@ def CortexA9Itineraries : ProcessorItineraries<
                                InstrStage<2,  [A9_NPipe]>],
                               [9, 1, 1, 1]>,
   //
+  // Single-precision Fused FP MAC
+  InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [8, 1, 1, 1]>,
+  //
+  // Double-precision Fused FP MAC
+  InstrItinData<IIC_fpFMAC64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<2,  [A9_NPipe]>],
+                              [9, 1, 1, 1]>,
+  //
   // Single-precision FP DIV
   InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1,  [A9_MUX0], 0>,
@@ -1697,6 +1713,26 @@ def CortexA9Itineraries : ProcessorItineraries<
                                InstrStage<4, [A9_NPipe]>],
                               [8, 4, 2, 1]>,
   //
+  // Double-register Fused FP Multiple-Accumulate
+  InstrItinData<IIC_VFMACD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 3, 2, 1]>,
+  //
+  // Quad-register Fused FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [8, 4, 2, 1]>,
+  //
   // Double-register Reciprical Step
   InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
                                InstrStage<1, [A9_MUX0], 0>,
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index c1880a72fff3..0ace9bc1796d 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -1,10 +1,10 @@
-//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===//
-// 
+//===-- ARMScheduleV6.td - ARM v6 Scheduling Definitions ---*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines the itinerary class data for the ARM v6 processors.
@@ -243,6 +243,12 @@ def ARMV6Itineraries : ProcessorItineraries<
   // Double-precision FP MAC
   InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
   //
+  // Single-precision Fused FP MAC
+  InstrItinData<IIC_fpFMAC32, [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+  //
+  // Double-precision Fused FP MAC
+  InstrItinData<IIC_fpFMAC64, [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+  //
   // Single-precision FP DIV
   InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
   //
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index a3a3d58841db..e2530d07e237 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -67,7 +67,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                              DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                          DAG.getConstant(SrcOff, MVT::i32)),
                              SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
-                             false, 0);
+                             false, false, 0);
       TFOps[i] = Loads[i].getValue(1);
       SrcOff += VTSize;
     }
@@ -105,7 +105,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     Loads[i] = DAG.getLoad(VT, dl, Chain,
                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
                                        DAG.getConstant(SrcOff, MVT::i32)),
-                           SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+                           SrcPtrInfo.getWithOffset(SrcOff),
+                           false, false, false, 0);
     TFOps[i] = Loads[i].getValue(1);
     ++i;
     SrcOff += VTSize;
@@ -144,8 +145,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                         SDValue Src, SDValue Size,
                         unsigned Align, bool isVolatile,
                         MachinePointerInfo DstPtrInfo) const {
-  // Use default for non AAPCS subtargets
-  if (!Subtarget->isAAPCS_ABI())
+  // Use default for non AAPCS (or Darwin) subtargets
+  if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin())
     return SDValue();
 
   const ARMTargetLowering &TLI =
@@ -188,6 +189,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                     0,     // number of fixed arguments
                     TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv
                     false, // is tail call
+                    false, // does not return
                     false, // is return val used
                     DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
                                           TLI.getPointerTy()), // callee
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 247d6be59ad4..e247b76ad43b 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -1,4 +1,4 @@
-//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,7 +16,6 @@
 #include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SmallVector.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
@@ -47,13 +46,13 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
   , HasV7Ops(false)
   , HasVFPv2(false)
   , HasVFPv3(false)
+  , HasVFPv4(false)
   , HasNEON(false)
   , UseNEONForSinglePrecisionFP(false)
   , SlowFPVMLx(false)
   , HasVMLxForwarding(false)
   , SlowFPBrcc(false)
   , InThumbMode(false)
-  , InNaClMode(false)
   , HasThumb2(false)
   , IsMClass(false)
   , NoARM(false)
@@ -104,18 +103,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
   computeIssueWidth();
 
   if (TT.find("eabi") != std::string::npos)
+    // FIXME: We might want to separate AAPCS and EABI. Some systems, e.g.
+    // Darwin-EABI conforms to AACPS but not the rest of EABI.
     TargetABI = ARM_ABI_AAPCS;
 
   if (isAAPCS_ABI())
     stackAlignment = 8;
 
-  if (!isTargetDarwin())
+  if (!isTargetIOS())
     UseMovt = hasV6T2Ops();
   else {
     IsR9Reserved = ReserveR9 | !HasV6Ops;
     UseMovt = DarwinUseMOVT && hasV6T2Ops();
-    const Triple &T = getTargetTriple();
-    SupportsTailCall = T.getOS() == Triple::IOS && !T.isOSVersionLT(5, 0);
+    SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0);
   }
 
   if (!isThumb() || hasThumb2())
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index b63e1085fb83..e72b06fa3fcc 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -1,4 +1,4 @@
-//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//===-- ARMSubtarget.h - Define Subtarget for the ARM ----------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -45,10 +45,11 @@ protected:
   bool HasV6T2Ops;
   bool HasV7Ops;
 
-  /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
-  /// supported.
+  /// HasVFPv2, HasVFPv3, HasVFPv4, HasNEON - Specify what
+  /// floating point ISAs are supported.
   bool HasVFPv2;
   bool HasVFPv3;
+  bool HasVFPv4;
   bool HasNEON;
 
   /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
@@ -70,9 +71,6 @@ protected:
   /// InThumbMode - True if compiling for Thumb, false for ARM.
   bool InThumbMode;
 
-  /// InNaClMode - True if targeting Native Client
-  bool InNaClMode;
-
   /// HasThumb2 - True if Thumb2 instructions are supported.
   bool HasThumb2;
 
@@ -126,6 +124,10 @@ protected:
   /// CPSR setting instruction.
   bool AvoidCPSRPartialUpdate;
 
+  /// HasRAS - Some processors perform return stack prediction. CodeGen should
+  /// avoid issue "normal" call instructions to callees which do not return.
+  bool HasRAS;
+
   /// HasMPExtension - True if the subtarget supports Multiprocessing
   /// extension (ARMv7 only).
   bool HasMPExtension;
@@ -194,11 +196,13 @@ protected:
 
   bool isCortexA8() const { return ARMProcFamily == CortexA8; }
   bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+  bool isCortexM3() const { return CPUString == "cortex-m3"; }
 
   bool hasARMOps() const { return !NoARM; }
 
   bool hasVFP2() const { return HasVFPv2; }
   bool hasVFP3() const { return HasVFPv3; }
+  bool hasVFP4() const { return HasVFPv4; }
   bool hasNEON() const { return HasNEON;  }
   bool useNEONForSinglePrecisionFP() const {
     return hasNEON() && UseNEONForSinglePrecisionFP; }
@@ -212,6 +216,7 @@ protected:
   bool isFPOnlySP() const { return FPOnlySP; }
   bool prefers32BitThumb() const { return Pref32BitThumb; }
   bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+  bool hasRAS() const { return HasRAS; }
   bool hasMPExtension() const { return HasMPExtension; }
   bool hasThumb2DSP() const { return Thumb2DSP; }
 
@@ -220,6 +225,7 @@ protected:
 
   const Triple &getTargetTriple() const { return TargetTriple; }
 
+  bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
   bool isTargetNaCl() const {
     return TargetTriple.getOS() == Triple::NativeClient;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 96b1e89b0df0..047efc23a4ea 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -33,24 +34,32 @@ extern "C" void LLVMInitializeARMTarget() {
   RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
 }
 
+
 /// TargetMachine ctor - Create an ARM architecture model.
 ///
 ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT,
                                            StringRef CPU, StringRef FS,
-                                           Reloc::Model RM, CodeModel::Model CM)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM, CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS),
     JITInfo(),
     InstrItins(Subtarget.getInstrItineraryData()) {
   // Default to soft float ABI
-  if (FloatABIType == FloatABI::Default)
-    FloatABIType = FloatABI::Soft;
+  if (Options.FloatABIType == FloatABI::Default)
+    this->Options.FloatABIType = FloatABI::Soft;
 }
 
+void ARMTargetMachine::anchor() { }
+
 ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
                                    StringRef CPU, StringRef FS,
-                                   Reloc::Model RM, CodeModel::Model CM)
-  : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM), InstrInfo(Subtarget),
+                                   const TargetOptions &Options,
+                                   Reloc::Model RM, CodeModel::Model CM,
+                                   CodeGenOpt::Level OL)
+  : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    InstrInfo(Subtarget),
     DataLayout(Subtarget.isAPCS_ABI() ?
                std::string("e-p:32:32-f64:32:64-i64:32:64-"
                            "v128:32:128-v64:32:64-n32-S32") :
@@ -68,10 +77,14 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
                        "support ARM mode execution!");
 }
 
+void ThumbTargetMachine::anchor() { }
+
 ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
-                                       Reloc::Model RM, CodeModel::Model CM)
-  : ARMBaseTargetMachine(T, TT, CPU, FS, RM, CM),
+                                       const TargetOptions &Options,
+                                       Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+  : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     InstrInfo(Subtarget.hasThumb2()
               ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
               : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
@@ -94,37 +107,62 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
               : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
 }
 
-bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel) {
-  if (OptLevel != CodeGenOpt::None && EnableGlobalMerge)
-    PM.add(createARMGlobalMergePass(getTargetLowering()));
+namespace {
+/// ARM Code Generator Pass Configuration Options.
+class ARMPassConfig : public TargetPassConfig {
+public:
+  ARMPassConfig(ARMBaseTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  ARMBaseTargetMachine &getARMTargetMachine() const {
+    return getTM<ARMBaseTargetMachine>();
+  }
+
+  const ARMSubtarget &getARMSubtarget() const {
+    return *getARMTargetMachine().getSubtargetImpl();
+  }
+
+  virtual bool addPreISel();
+  virtual bool addInstSelector();
+  virtual bool addPreRegAlloc();
+  virtual bool addPreSched2();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new ARMPassConfig(this, PM);
+}
+
+bool ARMPassConfig::addPreISel() {
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge)
+    PM.add(createGlobalMergePass(TM->getTargetLowering()));
 
   return false;
 }
 
-bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
-                                           CodeGenOpt::Level OptLevel) {
-  PM.add(createARMISelDag(*this, OptLevel));
+bool ARMPassConfig::addInstSelector() {
+  PM.add(createARMISelDag(getARMTargetMachine(), getOptLevel()));
   return false;
 }
 
-bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel) {
+bool ARMPassConfig::addPreRegAlloc() {
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
-  if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
+  if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
     PM.add(createARMLoadStoreOptimizationPass(true));
-  if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9())
+  if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
     PM.add(createMLxExpansionPass());
   return true;
 }
 
-bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
-                                        CodeGenOpt::Level OptLevel) {
+bool ARMPassConfig::addPreSched2() {
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
-  if (OptLevel != CodeGenOpt::None) {
-    if (!Subtarget.isThumb1Only())
+  if (getOptLevel() != CodeGenOpt::None) {
+    if (!getARMSubtarget().isThumb1Only()) {
       PM.add(createARMLoadStoreOptimizationPass());
-    if (Subtarget.hasNEON())
+      printAndVerify("After ARM load / store optimizer");
+    }
+    if (getARMSubtarget().hasNEON())
       PM.add(createExecutionDependencyFixPass(&ARM::DPRRegClass));
   }
 
@@ -132,27 +170,31 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
   // proper scheduling.
   PM.add(createARMExpandPseudoPass());
 
-  if (OptLevel != CodeGenOpt::None) {
-    if (!Subtarget.isThumb1Only())
-      PM.add(createIfConverterPass());
+  if (getOptLevel() != CodeGenOpt::None) {
+    if (!getARMSubtarget().isThumb1Only())
+      addPass(IfConverterID);
   }
-  if (Subtarget.isThumb2())
+  if (getARMSubtarget().isThumb2())
     PM.add(createThumb2ITBlockPass());
 
   return true;
 }
 
-bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel) {
-  if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
-    PM.add(createThumb2SizeReductionPass());
+bool ARMPassConfig::addPreEmitPass() {
+  if (getARMSubtarget().isThumb2()) {
+    if (!getARMSubtarget().prefers32BitThumb())
+      PM.add(createThumb2SizeReductionPass());
+
+    // Constant island pass work on unbundled instructions.
+    addPass(UnpackMachineBundlesID);
+  }
 
   PM.add(createARMConstantIslandPass());
+
   return true;
 }
 
 bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel,
                                           JITCodeEmitter &JCE) {
   // Machine code emitter pass for ARM.
   PM.add(createARMJITCodeEmitterPass(*this, JCE));
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index c8c601c30171..abcdb24c0c69 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -41,7 +41,9 @@ private:
 public:
   ARMBaseTargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS,
-                       Reloc::Model RM, CodeModel::Model CM);
+                       const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
 
   virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
   virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
@@ -50,18 +52,15 @@ public:
   }
 
   // Pass Pipeline Configuration
-  virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
-                              JITCodeEmitter &MCE);
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+  virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE);
 };
 
 /// ARMTargetMachine - ARM target machine.
 ///
 class ARMTargetMachine : public ARMBaseTargetMachine {
+  virtual void anchor();
   ARMInstrInfo        InstrInfo;
   const TargetData    DataLayout;       // Calculates type size & alignment
   ARMELFWriterInfo    ELFWriterInfo;
@@ -71,7 +70,9 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
  public:
   ARMTargetMachine(const Target &T, StringRef TT,
                    StringRef CPU, StringRef FS,
-                   Reloc::Model RM, CodeModel::Model CM);
+                   const TargetOptions &Options,
+                   Reloc::Model RM, CodeModel::Model CM,
+                   CodeGenOpt::Level OL);
 
   virtual const ARMRegisterInfo  *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
@@ -100,6 +101,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
 ///   Thumb-1 and Thumb-2.
 ///
 class ThumbTargetMachine : public ARMBaseTargetMachine {
+  virtual void anchor();
   // Either Thumb1InstrInfo or Thumb2InstrInfo.
   OwningPtr<ARMBaseInstrInfo> InstrInfo;
   const TargetData    DataLayout;   // Calculates type size & alignment
@@ -111,7 +113,9 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
 public:
   ThumbTargetMachine(const Target &T, StringRef TT,
                      StringRef CPU, StringRef FS,
-                     Reloc::Model RM, CodeModel::Model CM);
+                     const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM,
+                     CodeGenOpt::Level OL);
 
   /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
   virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 19defa1b5196..a5ea1c202e2c 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 using namespace dwarf;
 
@@ -24,8 +25,9 @@ using namespace dwarf;
 void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
                                         const TargetMachine &TM) {
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  isAAPCS_ABI = TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI();
 
-  if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
+  if (isAAPCS_ABI) {
     StaticCtorSection =
       getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
                                  ELF::SHF_WRITE |
@@ -45,3 +47,33 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
                                0,
                                SectionKind::getMetadata());
 }
+
+const MCSection *
+ARMElfTargetObjectFile::getStaticCtorSection(unsigned Priority) const {
+  if (!isAAPCS_ABI)
+    return TargetLoweringObjectFileELF::getStaticCtorSection(Priority);
+
+  if (Priority == 65535)
+    return StaticCtorSection;
+
+  // Emit ctors in priority order.
+  std::string Name = std::string(".init_array.") + utostr(Priority);
+  return getContext().getELFSection(Name, ELF::SHT_INIT_ARRAY,
+                                    ELF::SHF_ALLOC | ELF::SHF_WRITE,
+                                    SectionKind::getDataRel());
+}
+
+const MCSection *
+ARMElfTargetObjectFile::getStaticDtorSection(unsigned Priority) const {
+  if (!isAAPCS_ABI)
+    return TargetLoweringObjectFileELF::getStaticDtorSection(Priority);
+
+  if (Priority == 65535)
+    return StaticDtorSection;
+
+  // Emit dtors in priority order.
+  std::string Name = std::string(".fini_array.") + utostr(Priority);
+  return getContext().getELFSection(Name, ELF::SHT_FINI_ARRAY,
+                                    ELF::SHF_ALLOC | ELF::SHF_WRITE,
+                                    SectionKind::getDataRel());
+}
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index c6a7261439d7..ff210604148d 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -20,6 +20,7 @@ class TargetMachine;
 class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
 protected:
   const MCSection *AttributesSection;
+  bool isAAPCS_ABI;
 public:
   ARMElfTargetObjectFile() :
     TargetLoweringObjectFileELF(),
@@ -31,6 +32,9 @@ public:
   virtual const MCSection *getAttributesSection() const {
     return AttributesSection;
   }
+
+  const MCSection * getStaticCtorSection(unsigned Priority) const;
+  const MCSection * getStaticDtorSection(unsigned Priority) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
index 14d35ba54654..fda8536fcf6b 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -17,9 +17,6 @@
 
 #include "llvm/Support/TargetRegistry.h"
 
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 
 #include <string>
@@ -107,11 +104,9 @@ AsmToken ARMBaseAsmLexer::LexTokenUAL() {
     SetError(Lexer->getErrLoc(), Lexer->getErr());
     break;
   case AsmToken::Identifier: {
-    std::string upperCase = lexedToken.getString().str();
-    std::string lowerCase = LowercaseString(upperCase);
-    StringRef lowerRef(lowerCase);
+    std::string lowerCase = lexedToken.getString().lower();
 
-    unsigned regID = MatchRegisterName(lowerRef);
+    unsigned regID = MatchRegisterName(lowerCase);
     // Check for register aliases.
     //   r13 -> sp
     //   r14 -> lr
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 24f15b4694ff..e55a7dad45db 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -30,7 +30,6 @@
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 
@@ -40,9 +39,15 @@ namespace {
 
 class ARMOperand;
 
+enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
+
 class ARMAsmParser : public MCTargetAsmParser {
   MCSubtargetInfo &STI;
   MCAsmParser &Parser;
+  const MCRegisterInfo *MRI;
+
+  // Map of register aliases registers via the .req directive.
+  StringMap<unsigned> RegisterReqs;
 
   struct {
     ARMCC::CondCodes Cond;    // Condition for IT block.
@@ -91,9 +96,14 @@ class ARMAsmParser : public MCTargetAsmParser {
                               unsigned &ShiftAmount);
   bool parseDirectiveWord(unsigned Size, SMLoc L);
   bool parseDirectiveThumb(SMLoc L);
+  bool parseDirectiveARM(SMLoc L);
   bool parseDirectiveThumbFunc(SMLoc L);
   bool parseDirectiveCode(SMLoc L);
   bool parseDirectiveSyntax(SMLoc L);
+  bool parseDirectiveReq(StringRef Name, SMLoc L);
+  bool parseDirectiveUnreq(SMLoc L);
+  bool parseDirectiveArch(SMLoc L);
+  bool parseDirectiveEabiAttr(SMLoc L);
 
   StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode,
                           bool &CarrySetting, unsigned &ProcessorIMod,
@@ -161,6 +171,8 @@ class ARMAsmParser : public MCTargetAsmParser {
   OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&);
   OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&);
   OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index);
 
   // Asm Match Converter Methods
   bool cvtT2LdrdPre(MCInst &Inst, unsigned Opcode,
@@ -197,10 +209,18 @@ class ARMAsmParser : public MCTargetAsmParser {
                                   const SmallVectorImpl<MCParsedAsmOperand*> &);
   bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
                         const SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
+                     const SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
+                        const SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
+                     const SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
+                        const SmallVectorImpl<MCParsedAsmOperand*> &);
 
   bool validateInstruction(MCInst &Inst,
                            const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
-  void processInstruction(MCInst &Inst,
+  bool processInstruction(MCInst &Inst,
                           const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
   bool shouldOmitCCOutOperand(StringRef Mnemonic,
                               SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -217,6 +237,9 @@ public:
     : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
     MCAsmParserExtension::Initialize(_Parser);
 
+    // Cache the MCRegisterInfo.
+    MRI = &getContext().getRegisterInfo();
+
     // Initialize the set of available features.
     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
 
@@ -251,7 +274,6 @@ class ARMOperand : public MCParsedAsmOperand {
     k_CoprocReg,
     k_CoprocOption,
     k_Immediate,
-    k_FPImmediate,
     k_MemBarrierOpt,
     k_Memory,
     k_PostIndexRegister,
@@ -262,6 +284,9 @@ class ARMOperand : public MCParsedAsmOperand {
     k_RegisterList,
     k_DPRRegisterList,
     k_SPRRegisterList,
+    k_VectorList,
+    k_VectorListAllLanes,
+    k_VectorListIndexed,
     k_ShiftedRegister,
     k_ShiftedImmediate,
     k_ShifterImmediate,
@@ -311,6 +336,14 @@ class ARMOperand : public MCParsedAsmOperand {
       unsigned RegNum;
     } Reg;
 
+    // A vector register list is a sequential list of 1 to 4 registers.
+    struct {
+      unsigned RegNum;
+      unsigned Count;
+      unsigned LaneIndex;
+      bool isDoubleSpaced;
+    } VectorList;
+
     struct {
       unsigned Val;
     } VectorIndex;
@@ -319,10 +352,6 @@ class ARMOperand : public MCParsedAsmOperand {
       const MCExpr *Val;
     } Imm;
 
-    struct {
-      unsigned Val;       // encoded 8-bit representation
-    } FPImm;
-
     /// Combined record for all forms of ARM address expressions.
     struct {
       unsigned BaseRegNum;
@@ -333,7 +362,7 @@ class ARMOperand : public MCParsedAsmOperand {
       ARM_AM::ShiftOpc ShiftType; // Shift type for OffsetReg
       unsigned ShiftImm;        // shift for OffsetReg.
       unsigned Alignment;       // 0 = no alignment specified
-                                // n = alignment in bytes (8, 16, or 32)
+                                // n = alignment in bytes (2, 4, 8, 16, or 32)
       unsigned isNegative : 1;  // Negated OffsetReg? (~'U' bit)
     } Memory;
 
@@ -393,6 +422,11 @@ public:
     case k_SPRRegisterList:
       Registers = o.Registers;
       break;
+    case k_VectorList:
+    case k_VectorListAllLanes:
+    case k_VectorListIndexed:
+      VectorList = o.VectorList;
+      break;
     case k_CoprocNum:
     case k_CoprocReg:
       Cop = o.Cop;
@@ -403,9 +437,6 @@ public:
     case k_Immediate:
       Imm = o.Imm;
       break;
-    case k_FPImmediate:
-      FPImm = o.FPImm;
-      break;
     case k_MemBarrierOpt:
       MBOpt = o.MBOpt;
       break;
@@ -474,15 +505,10 @@ public:
   }
 
   const MCExpr *getImm() const {
-    assert(Kind == k_Immediate && "Invalid access!");
+    assert(isImm() && "Invalid access!");
     return Imm.Val;
   }
 
-  unsigned getFPImm() const {
-    assert(Kind == k_FPImmediate && "Invalid access!");
-    return FPImm.Val;
-  }
-
   unsigned getVectorIndex() const {
     assert(Kind == k_VectorIndex && "Invalid access!");
     return VectorIndex.Val;
@@ -511,90 +537,219 @@ public:
   bool isITMask() const { return Kind == k_ITCondMask; }
   bool isITCondCode() const { return Kind == k_CondCode; }
   bool isImm() const { return Kind == k_Immediate; }
-  bool isFPImm() const { return Kind == k_FPImmediate; }
+  bool isFPImm() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
+    return Val != -1;
+  }
+  bool isFBits16() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value <= 16;
+  }
+  bool isFBits32() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 1 && Value <= 32;
+  }
   bool isImm8s4() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return ((Value & 3) == 0) && Value >= -1020 && Value <= 1020;
   }
   bool isImm0_1020s4() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return ((Value & 3) == 0) && Value >= 0 && Value <= 1020;
   }
   bool isImm0_508s4() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return ((Value & 3) == 0) && Value >= 0 && Value <= 508;
   }
+  bool isImm0_508s4Neg() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = -CE->getValue();
+    // explicitly exclude zero. we want that to use the normal 0_508 version.
+    return ((Value & 3) == 0) && Value > 0 && Value <= 508;
+  }
   bool isImm0_255() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value >= 0 && Value < 256;
   }
+  bool isImm0_4095() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 4096;
+  }
+  bool isImm0_4095Neg() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = -CE->getValue();
+    return Value > 0 && Value < 4096;
+  }
+  bool isImm0_1() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 2;
+  }
+  bool isImm0_3() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 4;
+  }
   bool isImm0_7() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value >= 0 && Value < 8;
   }
   bool isImm0_15() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value >= 0 && Value < 16;
   }
   bool isImm0_31() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value >= 0 && Value < 32;
   }
+  bool isImm0_63() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 64;
+  }
+  bool isImm8() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value == 8;
+  }
+  bool isImm16() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value == 16;
+  }
+  bool isImm32() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value == 32;
+  }
+  bool isShrImm8() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value <= 8;
+  }
+  bool isShrImm16() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value <= 16;
+  }
+  bool isShrImm32() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value <= 32;
+  }
+  bool isShrImm64() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value <= 64;
+  }
+  bool isImm1_7() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value < 8;
+  }
+  bool isImm1_15() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value < 16;
+  }
+  bool isImm1_31() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value > 0 && Value < 32;
+  }
   bool isImm1_16() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value > 0 && Value < 17;
   }
   bool isImm1_32() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value > 0 && Value < 33;
   }
+  bool isImm0_32() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 33;
+  }
   bool isImm0_65535() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value >= 0 && Value < 65536;
   }
   bool isImm0_65535Expr() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     // If it's not a constant expression, it'll generate a fixup and be
     // handled later.
@@ -603,56 +758,81 @@ public:
     return Value >= 0 && Value < 65536;
   }
   bool isImm24bit() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value >= 0 && Value <= 0xffffff;
   }
   bool isImmThumbSR() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value > 0 && Value < 33;
   }
   bool isPKHLSLImm() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value >= 0 && Value < 32;
   }
   bool isPKHASRImm() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return Value > 0 && Value <= 32;
   }
   bool isARMSOImm() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return ARM_AM::getSOImmVal(Value) != -1;
   }
+  bool isARMSOImmNot() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return ARM_AM::getSOImmVal(~Value) != -1;
+  }
+  bool isARMSOImmNeg() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    // Only use this when not representable as a plain so_imm.
+    return ARM_AM::getSOImmVal(Value) == -1 &&
+      ARM_AM::getSOImmVal(-Value) != -1;
+  }
   bool isT2SOImm() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
     return ARM_AM::getT2SOImmVal(Value) != -1;
   }
+  bool isT2SOImmNot() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return ARM_AM::getT2SOImmVal(~Value) != -1;
+  }
+  bool isT2SOImmNeg() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    // Only use this when not representable as a plain so_imm.
+    return ARM_AM::getT2SOImmVal(Value) == -1 &&
+      ARM_AM::getT2SOImmVal(-Value) != -1;
+  }
   bool isSetEndImm() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Value = CE->getValue();
@@ -672,7 +852,7 @@ public:
   bool isBitfield() const { return Kind == k_BitfieldDescriptor; }
   bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; }
   bool isPostIdxReg() const {
-    return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift;
+    return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift;
   }
   bool isMemNoOffset(bool alignOK = false) const {
     if (!isMemory())
@@ -681,6 +861,17 @@ public:
     return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
      (alignOK || Memory.Alignment == 0);
   }
+  bool isMemPCRelImm12() const {
+    if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
+      return false;
+    // Base register must be PC.
+    if (Memory.BaseRegNum != ARM::PC)
+      return false;
+    // Immediate offset in range [-4095, 4095].
+    if (!Memory.OffsetImm) return true;
+    int64_t Val = Memory.OffsetImm->getValue();
+    return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
+  }
   bool isAlignedMemory() const {
     return isMemNoOffset(true);
   }
@@ -694,8 +885,7 @@ public:
     return Val > -4096 && Val < 4096;
   }
   bool isAM2OffsetImm() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     // Immediate offset in range [-4095, 4095].
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
@@ -703,6 +893,11 @@ public:
     return Val > -4096 && Val < 4096;
   }
   bool isAddrMode3() const {
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, it's something else
+    // and we reject it.
+    if (isImm() && !isa<MCConstantExpr>(getImm()))
+      return true;
     if (!isMemory() || Memory.Alignment != 0) return false;
     // No shifts are legal for AM3.
     if (Memory.ShiftType != ARM_AM::no_shift) return false;
@@ -726,6 +921,11 @@ public:
     return (Val > -256 && Val < 256) || Val == INT32_MIN;
   }
   bool isAddrMode5() const {
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, it's something else
+    // and we reject it.
+    if (isImm() && !isa<MCConstantExpr>(getImm()))
+      return true;
     if (!isMemory() || Memory.Alignment != 0) return false;
     // Check for register offset.
     if (Memory.OffsetRegNum) return false;
@@ -733,7 +933,7 @@ public:
     if (!Memory.OffsetImm) return true;
     int64_t Val = Memory.OffsetImm->getValue();
     return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) ||
-           Val == INT32_MIN;
+      Val == INT32_MIN;
   }
   bool isMemTBB() const {
     if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative ||
@@ -810,6 +1010,11 @@ public:
     return Val >= 0 && Val <= 1020 && (Val % 4) == 0;
   }
   bool isMemImm8s4Offset() const {
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, it's something else
+    // and we reject it.
+    if (isImm() && !isa<MCConstantExpr>(getImm()))
+      return true;
     if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Immediate offset a multiple of 4 in range [-1020, 1020].
@@ -828,6 +1033,8 @@ public:
   bool isMemImm8Offset() const {
     if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
+    // Base reg of PC isn't allowed for these encodings.
+    if (Memory.BaseRegNum == ARM::PC) return false;
     // Immediate offset in range [-255, 255].
     if (!Memory.OffsetImm) return true;
     int64_t Val = Memory.OffsetImm->getValue();
@@ -844,18 +1051,14 @@ public:
   bool isMemNegImm8Offset() const {
     if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
+    // Base reg of PC isn't allowed for these encodings.
+    if (Memory.BaseRegNum == ARM::PC) return false;
     // Immediate offset in range [-255, -1].
-    if (!Memory.OffsetImm) return true;
+    if (!Memory.OffsetImm) return false;
     int64_t Val = Memory.OffsetImm->getValue();
-    return Val > -256 && Val < 0;
+    return (Val == INT32_MIN) || (Val > -256 && Val < 0);
   }
   bool isMemUImm12Offset() const {
-    // If we have an immediate that's not a constant, treat it as a label
-    // reference needing a fixup. If it is a constant, it's something else
-    // and we reject it.
-    if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
-      return true;
-
     if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
       return false;
     // Immediate offset in range [0, 4095].
@@ -867,7 +1070,7 @@ public:
     // If we have an immediate that's not a constant, treat it as a label
     // reference needing a fixup. If it is a constant, it's something else
     // and we reject it.
-    if (Kind == k_Immediate && !isa<MCConstantExpr>(getImm()))
+    if (isImm() && !isa<MCConstantExpr>(getImm()))
       return true;
 
     if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -878,16 +1081,14 @@ public:
     return (Val > -4096 && Val < 4096) || (Val == INT32_MIN);
   }
   bool isPostIdxImm8() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Val = CE->getValue();
     return (Val > -256 && Val < 256) || (Val == INT32_MIN);
   }
   bool isPostIdxImm8s4() const {
-    if (Kind != k_Immediate)
-      return false;
+    if (!isImm()) return false;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
     if (!CE) return false;
     int64_t Val = CE->getValue();
@@ -898,6 +1099,188 @@ public:
   bool isMSRMask() const { return Kind == k_MSRMask; }
   bool isProcIFlags() const { return Kind == k_ProcIFlags; }
 
+  // NEON operands.
+  bool isSingleSpacedVectorList() const {
+    return Kind == k_VectorList && !VectorList.isDoubleSpaced;
+  }
+  bool isDoubleSpacedVectorList() const {
+    return Kind == k_VectorList && VectorList.isDoubleSpaced;
+  }
+  bool isVecListOneD() const {
+    if (!isSingleSpacedVectorList()) return false;
+    return VectorList.Count == 1;
+  }
+
+  bool isVecListDPair() const {
+    if (!isSingleSpacedVectorList()) return false;
+    return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+              .contains(VectorList.RegNum));
+  }
+
+  bool isVecListThreeD() const {
+    if (!isSingleSpacedVectorList()) return false;
+    return VectorList.Count == 3;
+  }
+
+  bool isVecListFourD() const {
+    if (!isSingleSpacedVectorList()) return false;
+    return VectorList.Count == 4;
+  }
+
+  bool isVecListDPairSpaced() const {
+    if (isSingleSpacedVectorList()) return false;
+    return (ARMMCRegisterClasses[ARM::DPairSpcRegClassID]
+              .contains(VectorList.RegNum));
+  }
+
+  bool isVecListThreeQ() const {
+    if (!isDoubleSpacedVectorList()) return false;
+    return VectorList.Count == 3;
+  }
+
+  bool isVecListFourQ() const {
+    if (!isDoubleSpacedVectorList()) return false;
+    return VectorList.Count == 4;
+  }
+
+  bool isSingleSpacedVectorAllLanes() const {
+    return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced;
+  }
+  bool isDoubleSpacedVectorAllLanes() const {
+    return Kind == k_VectorListAllLanes && VectorList.isDoubleSpaced;
+  }
+  bool isVecListOneDAllLanes() const {
+    if (!isSingleSpacedVectorAllLanes()) return false;
+    return VectorList.Count == 1;
+  }
+
+  bool isVecListDPairAllLanes() const {
+    if (!isSingleSpacedVectorAllLanes()) return false;
+    return (ARMMCRegisterClasses[ARM::DPairRegClassID]
+              .contains(VectorList.RegNum));
+  }
+
+  bool isVecListDPairSpacedAllLanes() const {
+    if (!isDoubleSpacedVectorAllLanes()) return false;
+    return VectorList.Count == 2;
+  }
+
+  bool isVecListThreeDAllLanes() const {
+    if (!isSingleSpacedVectorAllLanes()) return false;
+    return VectorList.Count == 3;
+  }
+
+  bool isVecListThreeQAllLanes() const {
+    if (!isDoubleSpacedVectorAllLanes()) return false;
+    return VectorList.Count == 3;
+  }
+
+  bool isVecListFourDAllLanes() const {
+    if (!isSingleSpacedVectorAllLanes()) return false;
+    return VectorList.Count == 4;
+  }
+
+  bool isVecListFourQAllLanes() const {
+    if (!isDoubleSpacedVectorAllLanes()) return false;
+    return VectorList.Count == 4;
+  }
+
+  bool isSingleSpacedVectorIndexed() const {
+    return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced;
+  }
+  bool isDoubleSpacedVectorIndexed() const {
+    return Kind == k_VectorListIndexed && VectorList.isDoubleSpaced;
+  }
+  bool isVecListOneDByteIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 1 && VectorList.LaneIndex <= 7;
+  }
+
+  bool isVecListOneDHWordIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 1 && VectorList.LaneIndex <= 3;
+  }
+
+  bool isVecListOneDWordIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 1 && VectorList.LaneIndex <= 1;
+  }
+
+  bool isVecListTwoDByteIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 2 && VectorList.LaneIndex <= 7;
+  }
+
+  bool isVecListTwoDHWordIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+  }
+
+  bool isVecListTwoQWordIndexed() const {
+    if (!isDoubleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
+  }
+
+  bool isVecListTwoQHWordIndexed() const {
+    if (!isDoubleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 2 && VectorList.LaneIndex <= 3;
+  }
+
+  bool isVecListTwoDWordIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 2 && VectorList.LaneIndex <= 1;
+  }
+
+  bool isVecListThreeDByteIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 3 && VectorList.LaneIndex <= 7;
+  }
+
+  bool isVecListThreeDHWordIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 3 && VectorList.LaneIndex <= 3;
+  }
+
+  bool isVecListThreeQWordIndexed() const {
+    if (!isDoubleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 3 && VectorList.LaneIndex <= 1;
+  }
+
+  bool isVecListThreeQHWordIndexed() const {
+    if (!isDoubleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 3 && VectorList.LaneIndex <= 3;
+  }
+
+  bool isVecListThreeDWordIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 3 && VectorList.LaneIndex <= 1;
+  }
+
+  bool isVecListFourDByteIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 4 && VectorList.LaneIndex <= 7;
+  }
+
+  bool isVecListFourDHWordIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 4 && VectorList.LaneIndex <= 3;
+  }
+
+  bool isVecListFourQWordIndexed() const {
+    if (!isDoubleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
+  }
+
+  bool isVecListFourQHWordIndexed() const {
+    if (!isDoubleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 4 && VectorList.LaneIndex <= 3;
+  }
+
+  bool isVecListFourDWordIndexed() const {
+    if (!isSingleSpacedVectorIndexed()) return false;
+    return VectorList.Count == 4 && VectorList.LaneIndex <= 1;
+  }
+
   bool isVectorIndex8() const {
     if (Kind != k_VectorIndex) return false;
     return VectorIndex.Val < 8;
@@ -911,7 +1294,82 @@ public:
     return VectorIndex.Val < 2;
   }
 
+  bool isNEONi8splat() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    // Must be a constant.
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    // i8 value splatted across 8 bytes. The immediate is just the 8 byte
+    // value.
+    return Value >= 0 && Value < 256;
+  }
+
+  bool isNEONi16splat() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    // Must be a constant.
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    // i16 value in the range [0,255] or [0x0100, 0xff00]
+    return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00);
+  }
+
+  bool isNEONi32splat() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    // Must be a constant.
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X.
+    return (Value >= 0 && Value < 256) ||
+      (Value >= 0x0100 && Value <= 0xff00) ||
+      (Value >= 0x010000 && Value <= 0xff0000) ||
+      (Value >= 0x01000000 && Value <= 0xff000000);
+  }
 
+  bool isNEONi32vmov() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    // Must be a constant.
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
+    // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+    return (Value >= 0 && Value < 256) ||
+      (Value >= 0x0100 && Value <= 0xff00) ||
+      (Value >= 0x010000 && Value <= 0xff0000) ||
+      (Value >= 0x01000000 && Value <= 0xff000000) ||
+      (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) ||
+      (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff);
+  }
+  bool isNEONi32vmovNeg() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    // Must be a constant.
+    if (!CE) return false;
+    int64_t Value = ~CE->getValue();
+    // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
+    // for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
+    return (Value >= 0 && Value < 256) ||
+      (Value >= 0x0100 && Value <= 0xff00) ||
+      (Value >= 0x010000 && Value <= 0xff0000) ||
+      (Value >= 0x01000000 && Value <= 0xff000000) ||
+      (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) ||
+      (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff);
+  }
+
+  bool isNEONi64splat() const {
+    if (!isImm()) return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    // Must be a constant.
+    if (!CE) return false;
+    uint64_t Value = CE->getValue();
+    // i64 value with each byte being either 0 or 0xff.
+    for (unsigned i = 0; i < 8; ++i)
+      if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) return false;
+    return true;
+  }
 
   void addExpr(MCInst &Inst, const MCExpr *Expr) const {
     // Add as immediates when possible.  Null MCExpr = 0.
@@ -967,7 +1425,8 @@ public:
 
   void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 3 && "Invalid number of operands!");
-    assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!");
+    assert(isRegShiftedReg() &&
+           "addRegShiftedRegOperands() on non RegShiftedReg!");
     Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg));
     Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg));
     Inst.addOperand(MCOperand::CreateImm(
@@ -976,7 +1435,8 @@ public:
 
   void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
-    assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!");
+    assert(isRegShiftedImm() &&
+           "addRegShiftedImmOperands() on non RegShiftedImm!");
     Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg));
     Inst.addOperand(MCOperand::CreateImm(
       ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm)));
@@ -1026,9 +1486,23 @@ public:
     addExpr(Inst, getImm());
   }
 
+  void addFBits16Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(16 - CE->getValue()));
+  }
+
+  void addFBits32Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(32 - CE->getValue()));
+  }
+
   void addFPImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateImm(getFPImm()));
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    int Val = ARM_AM::getFP32Imm(APInt(32, CE->getValue()));
+    Inst.addOperand(MCOperand::CreateImm(Val));
   }
 
   void addImm8s4Operands(MCInst &Inst, unsigned N) const {
@@ -1047,32 +1521,20 @@ public:
     Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
   }
 
-  void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
+  void addImm0_508s4NegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // The immediate is scaled by four in the encoding and is stored
     // in the MCInst as such. Lop off the low two bits here.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
+    Inst.addOperand(MCOperand::CreateImm(-(CE->getValue() / 4)));
   }
 
-  void addImm0_255Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
-  }
-
-  void addImm0_7Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
-  }
-
-  void addImm0_15Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
-  }
-
-  void addImm0_31Operands(MCInst &Inst, unsigned N) const {
+  void addImm0_508s4Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
+    // The immediate is scaled by four in the encoding and is stored
+    // in the MCInst as such. Lop off the low two bits here.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4));
   }
 
   void addImm1_16Operands(MCInst &Inst, unsigned N) const {
@@ -1091,21 +1553,6 @@ public:
     Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
   }
 
-  void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
-  }
-
-  void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
-  }
-
-  void addImm24bitOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
-  }
-
   void addImmThumbSROperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // The constant encodes as the immediate, except for 32, which encodes as
@@ -1115,11 +1562,6 @@ public:
     Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm)));
   }
 
-  void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
-  }
-
   void addPKHASRImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     // An ASR value of 32 encodes as 0, so that's how we want to add it to
@@ -1129,19 +1571,44 @@ public:
     Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val));
   }
 
-  void addARMSOImmOperands(MCInst &Inst, unsigned N) const {
+  void addT2SOImmNotOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
+    // The operand is actually a t2_so_imm, but we have its bitwise
+    // negation in the assembly source, so twiddle it here.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
   }
 
-  void addT2SOImmOperands(MCInst &Inst, unsigned N) const {
+  void addT2SOImmNegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
+    // The operand is actually a t2_so_imm, but we have its
+    // negation in the assembly source, so twiddle it here.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
   }
 
-  void addSetEndImmOperands(MCInst &Inst, unsigned N) const {
+  void addImm0_4095NegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    addExpr(Inst, getImm());
+    // The operand is actually an imm0_4095, but we have its
+    // negation in the assembly source, so twiddle it here.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
+  }
+
+  void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The operand is actually a so_imm, but we have its bitwise
+    // negation in the assembly source, so twiddle it here.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(~CE->getValue()));
+  }
+
+  void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The operand is actually a so_imm, but we have its
+    // negation in the assembly source, so twiddle it here.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(-CE->getValue()));
   }
 
   void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
@@ -1154,6 +1621,14 @@ public:
     Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
   }
 
+  void addMemPCRelImm12Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    int32_t Imm = Memory.OffsetImm->getValue();
+    // FIXME: Handle #-0
+    if (Imm == INT32_MIN) Imm = 0;
+    Inst.addOperand(MCOperand::CreateImm(Imm));
+  }
+
   void addAlignedMemoryOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
@@ -1196,6 +1671,16 @@ public:
 
   void addAddrMode3Operands(MCInst &Inst, unsigned N) const {
     assert(N == 3 && "Invalid number of operands!");
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, it's something else
+    // and we reject it.
+    if (isImm()) {
+      Inst.addOperand(MCOperand::CreateExpr(getImm()));
+      Inst.addOperand(MCOperand::CreateReg(0));
+      Inst.addOperand(MCOperand::CreateImm(0));
+      return;
+    }
+
     int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
     if (!Memory.OffsetRegNum) {
       ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
@@ -1237,6 +1722,15 @@ public:
 
   void addAddrMode5Operands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, it's something else
+    // and we reject it.
+    if (isImm()) {
+      Inst.addOperand(MCOperand::CreateExpr(getImm()));
+      Inst.addOperand(MCOperand::CreateImm(0));
+      return;
+    }
+
     // The lower two bits are always zero and as such are not encoded.
     int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0;
     ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add;
@@ -1250,6 +1744,15 @@ public:
 
   void addMemImm8s4OffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
+    // If we have an immediate that's not a constant, treat it as a label
+    // reference needing a fixup. If it is a constant, it's something else
+    // and we reject it.
+    if (isImm()) {
+      Inst.addOperand(MCOperand::CreateExpr(getImm()));
+      Inst.addOperand(MCOperand::CreateImm(0));
+      return;
+    }
+
     int64_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
     Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
     Inst.addOperand(MCOperand::CreateImm(Val));
@@ -1281,7 +1784,7 @@ public:
   void addMemUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     // If this is an immediate, it's a label reference.
-    if (Kind == k_Immediate) {
+    if (isImm()) {
       addExpr(Inst, getImm());
       Inst.addOperand(MCOperand::CreateImm(0));
       return;
@@ -1296,7 +1799,7 @@ public:
   void addMemImm12OffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
     // If this is an immediate, it's a label reference.
-    if (Kind == k_Immediate) {
+    if (isImm()) {
       addExpr(Inst, getImm());
       Inst.addOperand(MCOperand::CreateImm(0));
       return;
@@ -1322,8 +1825,9 @@ public:
 
   void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const {
     assert(N == 3 && "Invalid number of operands!");
-    unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
-                                     Memory.ShiftImm, Memory.ShiftType);
+    unsigned Val =
+      ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add,
+                        Memory.ShiftImm, Memory.ShiftType);
     Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum));
     Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum));
     Inst.addOperand(MCOperand::CreateImm(Val));
@@ -1420,6 +1924,17 @@ public:
     Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
   }
 
+  void addVecListOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+  }
+
+  void addVecListIndexedOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+    Inst.addOperand(MCOperand::CreateImm(VectorList.LaneIndex));
+  }
+
   void addVectorIndex8Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
@@ -1435,6 +1950,80 @@ public:
     Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
   }
 
+  void addNEONi8splatOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The immediate encodes the type of constant as well as the value.
+    // Mask in that this is an i8 splat.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::CreateImm(CE->getValue() | 0xe00));
+  }
+
+  void addNEONi16splatOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The immediate encodes the type of constant as well as the value.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    unsigned Value = CE->getValue();
+    if (Value >= 256)
+      Value = (Value >> 8) | 0xa00;
+    else
+      Value |= 0x800;
+    Inst.addOperand(MCOperand::CreateImm(Value));
+  }
+
+  void addNEONi32splatOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The immediate encodes the type of constant as well as the value.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    unsigned Value = CE->getValue();
+    if (Value >= 256 && Value <= 0xff00)
+      Value = (Value >> 8) | 0x200;
+    else if (Value > 0xffff && Value <= 0xff0000)
+      Value = (Value >> 16) | 0x400;
+    else if (Value > 0xffffff)
+      Value = (Value >> 24) | 0x600;
+    Inst.addOperand(MCOperand::CreateImm(Value));
+  }
+
+  void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The immediate encodes the type of constant as well as the value.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    unsigned Value = CE->getValue();
+    if (Value >= 256 && Value <= 0xffff)
+      Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200);
+    else if (Value > 0xffff && Value <= 0xffffff)
+      Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
+    else if (Value > 0xffffff)
+      Value = (Value >> 24) | 0x600;
+    Inst.addOperand(MCOperand::CreateImm(Value));
+  }
+
+  void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The immediate encodes the type of constant as well as the value.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    unsigned Value = ~CE->getValue();
+    if (Value >= 256 && Value <= 0xffff)
+      Value = (Value >> 8) | ((Value & 0xff) ? 0xc00 : 0x200);
+    else if (Value > 0xffff && Value <= 0xffffff)
+      Value = (Value >> 16) | ((Value & 0xff) ? 0xd00 : 0x400);
+    else if (Value > 0xffffff)
+      Value = (Value >> 24) | 0x600;
+    Inst.addOperand(MCOperand::CreateImm(Value));
+  }
+
+  void addNEONi64splatOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    // The immediate encodes the type of constant as well as the value.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    uint64_t Value = CE->getValue();
+    unsigned Imm = 0;
+    for (unsigned i = 0; i < 8; ++i, Value >>= 8) {
+      Imm |= (Value & 1) << i;
+    }
+    Inst.addOperand(MCOperand::CreateImm(Imm | 0x1e00));
+  }
+
   virtual void print(raw_ostream &OS) const;
 
   static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) {
@@ -1579,6 +2168,43 @@ public:
     return Op;
   }
 
+  static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count,
+                                      bool isDoubleSpaced, SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(k_VectorList);
+    Op->VectorList.RegNum = RegNum;
+    Op->VectorList.Count = Count;
+    Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count,
+                                              bool isDoubleSpaced,
+                                              SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(k_VectorListAllLanes);
+    Op->VectorList.RegNum = RegNum;
+    Op->VectorList.Count = Count;
+    Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count,
+                                             unsigned Index,
+                                             bool isDoubleSpaced,
+                                             SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(k_VectorListIndexed);
+    Op->VectorList.RegNum = RegNum;
+    Op->VectorList.Count = Count;
+    Op->VectorList.LaneIndex = Index;
+    Op->VectorList.isDoubleSpaced = isDoubleSpaced;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
   static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
                                        MCContext &Ctx) {
     ARMOperand *Op = new ARMOperand(k_VectorIndex);
@@ -1596,14 +2222,6 @@ public:
     return Op;
   }
 
-  static ARMOperand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
-    ARMOperand *Op = new ARMOperand(k_FPImmediate);
-    Op->FPImm.Val = Val;
-    Op->StartLoc = S;
-    Op->EndLoc = S;
-    return Op;
-  }
-
   static ARMOperand *CreateMem(unsigned BaseRegNum,
                                const MCConstantExpr *OffsetImm,
                                unsigned OffsetRegNum,
@@ -1668,10 +2286,6 @@ public:
 
 void ARMOperand::print(raw_ostream &OS) const {
   switch (Kind) {
-  case k_FPImmediate:
-    OS << "<fpimm " << getFPImm() << "(" << ARM_AM::getFPImmFloat(getFPImm())
-       << ") >";
-    break;
   case k_CondCode:
     OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
     break;
@@ -1679,9 +2293,10 @@ void ARMOperand::print(raw_ostream &OS) const {
     OS << "<ccout " << getReg() << ">";
     break;
   case k_ITCondMask: {
-    static char MaskStr[][6] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)",
-      "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)",
-      "(tee)", "(eee)" };
+    static const char *MaskStr[] = {
+      "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)",
+      "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)"
+    };
     assert((ITMask.Mask & 0xf) == ITMask.Mask);
     OS << "<it-mask " << MaskStr[ITMask.Mask] << ">";
     break;
@@ -1735,18 +2350,15 @@ void ARMOperand::print(raw_ostream &OS) const {
     break;
   case k_ShiftedRegister:
     OS << "<so_reg_reg "
-       << RegShiftedReg.SrcReg
-       << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedReg.ShiftImm))
-       << ", " << RegShiftedReg.ShiftReg << ", "
-       << ARM_AM::getSORegOffset(RegShiftedReg.ShiftImm)
-       << ">";
+       << RegShiftedReg.SrcReg << " "
+       << ARM_AM::getShiftOpcStr(RegShiftedReg.ShiftTy)
+       << " " << RegShiftedReg.ShiftReg << ">";
     break;
   case k_ShiftedImmediate:
     OS << "<so_reg_imm "
-       << RegShiftedImm.SrcReg
-       << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(RegShiftedImm.ShiftImm))
-       << ", " << ARM_AM::getSORegOffset(RegShiftedImm.ShiftImm)
-       << ">";
+       << RegShiftedImm.SrcReg << " "
+       << ARM_AM::getShiftOpcStr(RegShiftedImm.ShiftTy)
+       << " #" << RegShiftedImm.ShiftImm << ">";
     break;
   case k_RotateImmediate:
     OS << "<ror " << " #" << (RotImm.Imm * 8) << ">";
@@ -1770,6 +2382,18 @@ void ARMOperand::print(raw_ostream &OS) const {
     OS << ">";
     break;
   }
+  case k_VectorList:
+    OS << "<vector_list " << VectorList.Count << " * "
+       << VectorList.RegNum << ">";
+    break;
+  case k_VectorListAllLanes:
+    OS << "<vector_list(all lanes) " << VectorList.Count << " * "
+       << VectorList.RegNum << ">";
+    break;
+  case k_VectorListIndexed:
+    OS << "<vector_list(lane " << VectorList.LaneIndex << ") "
+       << VectorList.Count << " * " << VectorList.RegNum << ">";
+    break;
   case k_Token:
     OS << "'" << getToken() << "'";
     break;
@@ -1788,7 +2412,9 @@ static unsigned MatchRegisterName(StringRef Name);
 
 bool ARMAsmParser::ParseRegister(unsigned &RegNo,
                                  SMLoc &StartLoc, SMLoc &EndLoc) {
+  StartLoc = Parser.getTok().getLoc();
   RegNo = tryParseRegister();
+  EndLoc = Parser.getTok().getLoc();
 
   return (RegNo == (unsigned)-1);
 }
@@ -1801,10 +2427,7 @@ int ARMAsmParser::tryParseRegister() {
   const AsmToken &Tok = Parser.getTok();
   if (Tok.isNot(AsmToken::Identifier)) return -1;
 
-  // FIXME: Validate register for the current architecture; we have to do
-  // validation later, so maybe there is no need for this here.
-  std::string upperCase = Tok.getString().str();
-  std::string lowerCase = LowercaseString(upperCase);
+  std::string lowerCase = Tok.getString().lower();
   unsigned RegNum = MatchRegisterName(lowerCase);
   if (!RegNum) {
     RegNum = StringSwitch<unsigned>(lowerCase)
@@ -1812,44 +2435,38 @@ int ARMAsmParser::tryParseRegister() {
       .Case("r14", ARM::LR)
       .Case("r15", ARM::PC)
       .Case("ip", ARM::R12)
+      // Additional register name aliases for 'gas' compatibility.
+      .Case("a1", ARM::R0)
+      .Case("a2", ARM::R1)
+      .Case("a3", ARM::R2)
+      .Case("a4", ARM::R3)
+      .Case("v1", ARM::R4)
+      .Case("v2", ARM::R5)
+      .Case("v3", ARM::R6)
+      .Case("v4", ARM::R7)
+      .Case("v5", ARM::R8)
+      .Case("v6", ARM::R9)
+      .Case("v7", ARM::R10)
+      .Case("v8", ARM::R11)
+      .Case("sb", ARM::R9)
+      .Case("sl", ARM::R10)
+      .Case("fp", ARM::R11)
       .Default(0);
   }
-  if (!RegNum) return -1;
+  if (!RegNum) {
+    // Check for aliases registered via .req. Canonicalize to lower case.
+    // That's more consistent since register names are case insensitive, and
+    // it's how the original entry was passed in from MC/MCParser/AsmParser.
+    StringMap<unsigned>::const_iterator Entry = RegisterReqs.find(lowerCase);
+    // If no match, return failure.
+    if (Entry == RegisterReqs.end())
+      return -1;
+    Parser.Lex(); // Eat identifier token.
+    return Entry->getValue();
+  }
 
   Parser.Lex(); // Eat identifier token.
 
-#if 0
-  // Also check for an index operand. This is only legal for vector registers,
-  // but that'll get caught OK in operand matching, so we don't need to
-  // explicitly filter everything else out here.
-  if (Parser.getTok().is(AsmToken::LBrac)) {
-    SMLoc SIdx = Parser.getTok().getLoc();
-    Parser.Lex(); // Eat left bracket token.
-
-    const MCExpr *ImmVal;
-    SMLoc ExprLoc = Parser.getTok().getLoc();
-    if (getParser().ParseExpression(ImmVal))
-      return MatchOperand_ParseFail;
-    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
-    if (!MCE) {
-      TokError("immediate value expected for vector index");
-      return MatchOperand_ParseFail;
-    }
-
-    SMLoc E = Parser.getTok().getLoc();
-    if (Parser.getTok().isNot(AsmToken::RBrac)) {
-      Error(E, "']' expected");
-      return MatchOperand_ParseFail;
-    }
-
-    Parser.Lex(); // Eat right bracket token.
-
-    Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(),
-                                                     SIdx, E,
-                                                     getContext()));
-  }
-#endif
-
   return RegNum;
 }
 
@@ -1864,9 +2481,9 @@ int ARMAsmParser::tryParseShiftRegister(
   const AsmToken &Tok = Parser.getTok();
   assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
 
-  std::string upperCase = Tok.getString().str();
-  std::string lowerCase = LowercaseString(upperCase);
+  std::string lowerCase = Tok.getString().lower();
   ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase)
+      .Case("asl", ARM_AM::lsl)
       .Case("lsl", ARM_AM::lsl)
       .Case("lsr", ARM_AM::lsr)
       .Case("asr", ARM_AM::asr)
@@ -1895,7 +2512,8 @@ int ARMAsmParser::tryParseShiftRegister(
     ShiftReg = SrcReg;
   } else {
     // Figure out if this is shifted by a constant or a register (for non-RRX).
-    if (Parser.getTok().is(AsmToken::Hash)) {
+    if (Parser.getTok().is(AsmToken::Hash) ||
+        Parser.getTok().is(AsmToken::Dollar)) {
       Parser.Lex(); // Eat hash.
       SMLoc ImmLoc = Parser.getTok().getLoc();
       const MCExpr *ShiftExpr = 0;
@@ -1919,6 +2537,10 @@ int ARMAsmParser::tryParseShiftRegister(
         Error(ImmLoc, "immediate shift value out of range");
         return -1;
       }
+      // shift by zero is a nop. Always send it through as lsl.
+      // ('as' compatibility)
+      if (Imm == 0)
+        ShiftTy = ARM_AM::lsl;
     } else if (Parser.getTok().is(AsmToken::Identifier)) {
       ShiftReg = tryParseRegister();
       SMLoc L = Parser.getTok().getLoc();
@@ -1976,20 +2598,15 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     Parser.Lex(); // Eat left bracket token.
 
     const MCExpr *ImmVal;
-    SMLoc ExprLoc = Parser.getTok().getLoc();
     if (getParser().ParseExpression(ImmVal))
-      return MatchOperand_ParseFail;
+      return true;
     const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
-    if (!MCE) {
-      TokError("immediate value expected for vector index");
-      return MatchOperand_ParseFail;
-    }
+    if (!MCE)
+      return TokError("immediate value expected for vector index");
 
     SMLoc E = Parser.getTok().getLoc();
-    if (Parser.getTok().isNot(AsmToken::RBrac)) {
-      Error(E, "']' expected");
-      return MatchOperand_ParseFail;
-    }
+    if (Parser.getTok().isNot(AsmToken::RBrac))
+      return Error(E, "']' expected");
 
     Parser.Lex(); // Eat right bracket token.
 
@@ -2008,7 +2625,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
   // Use the same layout as the tablegen'erated register name matcher. Ugly,
   // but efficient.
   switch (Name.size()) {
-  default: break;
+  default: return -1;
   case 2:
     if (Name[0] != CoprocOp)
       return -1;
@@ -2025,7 +2642,6 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
     case '8': return 8;
     case '9': return 9;
     }
-    break;
   case 3:
     if (Name[0] != CoprocOp || Name[1] != '1')
       return -1;
@@ -2038,10 +2654,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
     case '4': return 14;
     case '5': return 15;
     }
-    break;
   }
-
-  return -1;
 }
 
 /// parseITCondCode - Try to parse a condition code for an IT instruction.
@@ -2161,7 +2774,7 @@ static unsigned getNextRegister(unsigned Reg) {
   if (!ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
     return Reg + 1;
   switch(Reg) {
-  default: assert(0 && "Invalid GPR number!");
+  default: llvm_unreachable("Invalid GPR number!");
   case ARM::R0:  return ARM::R1;  case ARM::R1:  return ARM::R2;
   case ARM::R2:  return ARM::R3;  case ARM::R3:  return ARM::R4;
   case ARM::R4:  return ARM::R5;  case ARM::R5:  return ARM::R6;
@@ -2173,6 +2786,29 @@ static unsigned getNextRegister(unsigned Reg) {
   }
 }
 
+// Return the low-subreg of a given Q register.
+static unsigned getDRegFromQReg(unsigned QReg) {
+  switch (QReg) {
+  default: llvm_unreachable("expected a Q register!");
+  case ARM::Q0:  return ARM::D0;
+  case ARM::Q1:  return ARM::D2;
+  case ARM::Q2:  return ARM::D4;
+  case ARM::Q3:  return ARM::D6;
+  case ARM::Q4:  return ARM::D8;
+  case ARM::Q5:  return ARM::D10;
+  case ARM::Q6:  return ARM::D12;
+  case ARM::Q7:  return ARM::D14;
+  case ARM::Q8:  return ARM::D16;
+  case ARM::Q9:  return ARM::D18;
+  case ARM::Q10: return ARM::D20;
+  case ARM::Q11: return ARM::D22;
+  case ARM::Q12: return ARM::D24;
+  case ARM::Q13: return ARM::D26;
+  case ARM::Q14: return ARM::D28;
+  case ARM::Q15: return ARM::D30;
+  }
+}
+
 /// Parse a register list.
 bool ARMAsmParser::
 parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -2188,7 +2824,17 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   if (Reg == -1)
     return Error(RegLoc, "register expected");
 
-  MCRegisterClass *RC;
+  // The reglist instructions have at most 16 registers, so reserve
+  // space for that many.
+  SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
+
+  // Allow Q regs and just interpret them as the two D sub-registers.
+  if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+    Reg = getDRegFromQReg(Reg);
+    Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+    ++Reg;
+  }
+  const MCRegisterClass *RC;
   if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
     RC = &ARMMCRegisterClasses[ARM::GPRRegClassID];
   else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg))
@@ -2198,10 +2844,7 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   else
     return Error(RegLoc, "invalid register in register list");
 
-  // The reglist instructions have at most 16 registers, so reserve
-  // space for that many.
-  SmallVector<std::pair<unsigned, SMLoc>, 16> Registers;
-  // Store the first register.
+  // Store the register.
   Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
 
   // This starts immediately after the first register token in the list,
@@ -2210,11 +2853,14 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   while (Parser.getTok().is(AsmToken::Comma) ||
          Parser.getTok().is(AsmToken::Minus)) {
     if (Parser.getTok().is(AsmToken::Minus)) {
-      Parser.Lex(); // Eat the comma.
+      Parser.Lex(); // Eat the minus.
       SMLoc EndLoc = Parser.getTok().getLoc();
       int EndReg = tryParseRegister();
       if (EndReg == -1)
         return Error(EndLoc, "register expected");
+      // Allow Q regs and just interpret them as the two D sub-registers.
+      if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+        EndReg = getDRegFromQReg(EndReg) + 1;
       // If the register is the same as the start reg, there's nothing
       // more to do.
       if (Reg == EndReg)
@@ -2236,15 +2882,31 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     Parser.Lex(); // Eat the comma.
     RegLoc = Parser.getTok().getLoc();
     int OldReg = Reg;
+    const AsmToken RegTok = Parser.getTok();
     Reg = tryParseRegister();
     if (Reg == -1)
       return Error(RegLoc, "register expected");
+    // Allow Q regs and just interpret them as the two D sub-registers.
+    bool isQReg = false;
+    if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+      Reg = getDRegFromQReg(Reg);
+      isQReg = true;
+    }
     // The register must be in the same register class as the first.
     if (!RC->contains(Reg))
       return Error(RegLoc, "invalid register in register list");
     // List must be monotonically increasing.
-    if (getARMRegisterNumbering(Reg) <= getARMRegisterNumbering(OldReg))
-      return Error(RegLoc, "register list not in ascending order");
+    if (getARMRegisterNumbering(Reg) < getARMRegisterNumbering(OldReg)) {
+      if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+        Warning(RegLoc, "register list not in ascending order");
+      else
+        return Error(RegLoc, "register list not in ascending order");
+    }
+    if (getARMRegisterNumbering(Reg) == getARMRegisterNumbering(OldReg)) {
+      Warning(RegLoc, "duplicated register (" + RegTok.getString() +
+              ") in register list");
+      continue;
+    }
     // VFP register lists must also be contiguous.
     // It's OK to use the enumeration values directly here rather, as the
     // VFP register classes have the enum sorted properly.
@@ -2252,6 +2914,8 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
         Reg != OldReg + 1)
       return Error(RegLoc, "non-contiguous register range");
     Registers.push_back(std::pair<unsigned, SMLoc>(Reg, RegLoc));
+    if (isQReg)
+      Registers.push_back(std::pair<unsigned, SMLoc>(++Reg, RegLoc));
   }
 
   SMLoc E = Parser.getTok().getLoc();
@@ -2259,10 +2923,319 @@ parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     return Error(E, "'}' expected");
   Parser.Lex(); // Eat '}' token.
 
+  // Push the register list operand.
   Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+
+  // The ARM system instruction variants for LDM/STM have a '^' token here.
+  if (Parser.getTok().is(AsmToken::Caret)) {
+    Operands.push_back(ARMOperand::CreateToken("^",Parser.getTok().getLoc()));
+    Parser.Lex(); // Eat '^' token.
+  }
+
   return false;
 }
 
+// Helper function to parse the lane index for vector lists.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index) {
+  Index = 0; // Always return a defined index value.
+  if (Parser.getTok().is(AsmToken::LBrac)) {
+    Parser.Lex(); // Eat the '['.
+    if (Parser.getTok().is(AsmToken::RBrac)) {
+      // "Dn[]" is the 'all lanes' syntax.
+      LaneKind = AllLanes;
+      Parser.Lex(); // Eat the ']'.
+      return MatchOperand_Success;
+    }
+
+    // There's an optional '#' token here. Normally there wouldn't be, but
+    // inline assemble puts one in, and it's friendly to accept that.
+    if (Parser.getTok().is(AsmToken::Hash))
+      Parser.Lex(); // Eat the '#'
+
+    const MCExpr *LaneIndex;
+    SMLoc Loc = Parser.getTok().getLoc();
+    if (getParser().ParseExpression(LaneIndex)) {
+      Error(Loc, "illegal expression");
+      return MatchOperand_ParseFail;
+    }
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LaneIndex);
+    if (!CE) {
+      Error(Loc, "lane index must be empty or an integer");
+      return MatchOperand_ParseFail;
+    }
+    if (Parser.getTok().isNot(AsmToken::RBrac)) {
+      Error(Parser.getTok().getLoc(), "']' expected");
+      return MatchOperand_ParseFail;
+    }
+    Parser.Lex(); // Eat the ']'.
+    int64_t Val = CE->getValue();
+
+    // FIXME: Make this range check context sensitive for .8, .16, .32.
+    if (Val < 0 || Val > 7) {
+      Error(Parser.getTok().getLoc(), "lane index out of range");
+      return MatchOperand_ParseFail;
+    }
+    Index = Val;
+    LaneKind = IndexedLane;
+    return MatchOperand_Success;
+  }
+  LaneKind = NoLanes;
+  return MatchOperand_Success;
+}
+
+// parse a vector register list
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  VectorLaneTy LaneKind;
+  unsigned LaneIndex;
+  SMLoc S = Parser.getTok().getLoc();
+  // As an extension (to match gas), support a plain D register or Q register
+  // (without encosing curly braces) as a single or double entry list,
+  // respectively.
+  if (Parser.getTok().is(AsmToken::Identifier)) {
+    int Reg = tryParseRegister();
+    if (Reg == -1)
+      return MatchOperand_NoMatch;
+    SMLoc E = Parser.getTok().getLoc();
+    if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+      OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+      if (Res != MatchOperand_Success)
+        return Res;
+      switch (LaneKind) {
+      case NoLanes:
+        E = Parser.getTok().getLoc();
+        Operands.push_back(ARMOperand::CreateVectorList(Reg, 1, false, S, E));
+        break;
+      case AllLanes:
+        E = Parser.getTok().getLoc();
+        Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 1, false,
+                                                                S, E));
+        break;
+      case IndexedLane:
+        Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 1,
+                                                               LaneIndex,
+                                                               false, S, E));
+        break;
+      }
+      return MatchOperand_Success;
+    }
+    if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+      Reg = getDRegFromQReg(Reg);
+      OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex);
+      if (Res != MatchOperand_Success)
+        return Res;
+      switch (LaneKind) {
+      case NoLanes:
+        E = Parser.getTok().getLoc();
+        Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+                                   &ARMMCRegisterClasses[ARM::DPairRegClassID]);
+        Operands.push_back(ARMOperand::CreateVectorList(Reg, 2, false, S, E));
+        break;
+      case AllLanes:
+        E = Parser.getTok().getLoc();
+        Reg = MRI->getMatchingSuperReg(Reg, ARM::dsub_0,
+                                   &ARMMCRegisterClasses[ARM::DPairRegClassID]);
+        Operands.push_back(ARMOperand::CreateVectorListAllLanes(Reg, 2, false,
+                                                                S, E));
+        break;
+      case IndexedLane:
+        Operands.push_back(ARMOperand::CreateVectorListIndexed(Reg, 2,
+                                                               LaneIndex,
+                                                               false, S, E));
+        break;
+      }
+      return MatchOperand_Success;
+    }
+    Error(S, "vector register expected");
+    return MatchOperand_ParseFail;
+  }
+
+  if (Parser.getTok().isNot(AsmToken::LCurly))
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat '{' token.
+  SMLoc RegLoc = Parser.getTok().getLoc();
+
+  int Reg = tryParseRegister();
+  if (Reg == -1) {
+    Error(RegLoc, "register expected");
+    return MatchOperand_ParseFail;
+  }
+  unsigned Count = 1;
+  int Spacing = 0;
+  unsigned FirstReg = Reg;
+  // The list is of D registers, but we also allow Q regs and just interpret
+  // them as the two D sub-registers.
+  if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+    FirstReg = Reg = getDRegFromQReg(Reg);
+    Spacing = 1; // double-spacing requires explicit D registers, otherwise
+                 // it's ambiguous with four-register single spaced.
+    ++Reg;
+    ++Count;
+  }
+  if (parseVectorLane(LaneKind, LaneIndex) != MatchOperand_Success)
+    return MatchOperand_ParseFail;
+
+  while (Parser.getTok().is(AsmToken::Comma) ||
+         Parser.getTok().is(AsmToken::Minus)) {
+    if (Parser.getTok().is(AsmToken::Minus)) {
+      if (!Spacing)
+        Spacing = 1; // Register range implies a single spaced list.
+      else if (Spacing == 2) {
+        Error(Parser.getTok().getLoc(),
+              "sequential registers in double spaced list");
+        return MatchOperand_ParseFail;
+      }
+      Parser.Lex(); // Eat the minus.
+      SMLoc EndLoc = Parser.getTok().getLoc();
+      int EndReg = tryParseRegister();
+      if (EndReg == -1) {
+        Error(EndLoc, "register expected");
+        return MatchOperand_ParseFail;
+      }
+      // Allow Q regs and just interpret them as the two D sub-registers.
+      if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
+        EndReg = getDRegFromQReg(EndReg) + 1;
+      // If the register is the same as the start reg, there's nothing
+      // more to do.
+      if (Reg == EndReg)
+        continue;
+      // The register must be in the same register class as the first.
+      if (!ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)) {
+        Error(EndLoc, "invalid register in register list");
+        return MatchOperand_ParseFail;
+      }
+      // Ranges must go from low to high.
+      if (Reg > EndReg) {
+        Error(EndLoc, "bad range in register list");
+        return MatchOperand_ParseFail;
+      }
+      // Parse the lane specifier if present.
+      VectorLaneTy NextLaneKind;
+      unsigned NextLaneIndex;
+      if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+        return MatchOperand_ParseFail;
+      if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+        Error(EndLoc, "mismatched lane index in register list");
+        return MatchOperand_ParseFail;
+      }
+      EndLoc = Parser.getTok().getLoc();
+
+      // Add all the registers in the range to the register list.
+      Count += EndReg - Reg;
+      Reg = EndReg;
+      continue;
+    }
+    Parser.Lex(); // Eat the comma.
+    RegLoc = Parser.getTok().getLoc();
+    int OldReg = Reg;
+    Reg = tryParseRegister();
+    if (Reg == -1) {
+      Error(RegLoc, "register expected");
+      return MatchOperand_ParseFail;
+    }
+    // vector register lists must be contiguous.
+    // It's OK to use the enumeration values directly here rather, as the
+    // VFP register classes have the enum sorted properly.
+    //
+    // The list is of D registers, but we also allow Q regs and just interpret
+    // them as the two D sub-registers.
+    if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
+      if (!Spacing)
+        Spacing = 1; // Register range implies a single spaced list.
+      else if (Spacing == 2) {
+        Error(RegLoc,
+              "invalid register in double-spaced list (must be 'D' register')");
+        return MatchOperand_ParseFail;
+      }
+      Reg = getDRegFromQReg(Reg);
+      if (Reg != OldReg + 1) {
+        Error(RegLoc, "non-contiguous register range");
+        return MatchOperand_ParseFail;
+      }
+      ++Reg;
+      Count += 2;
+      // Parse the lane specifier if present.
+      VectorLaneTy NextLaneKind;
+      unsigned NextLaneIndex;
+      SMLoc EndLoc = Parser.getTok().getLoc();
+      if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+        return MatchOperand_ParseFail;
+      if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+        Error(EndLoc, "mismatched lane index in register list");
+        return MatchOperand_ParseFail;
+      }
+      continue;
+    }
+    // Normal D register.
+    // Figure out the register spacing (single or double) of the list if
+    // we don't know it already.
+    if (!Spacing)
+      Spacing = 1 + (Reg == OldReg + 2);
+
+    // Just check that it's contiguous and keep going.
+    if (Reg != OldReg + Spacing) {
+      Error(RegLoc, "non-contiguous register range");
+      return MatchOperand_ParseFail;
+    }
+    ++Count;
+    // Parse the lane specifier if present.
+    VectorLaneTy NextLaneKind;
+    unsigned NextLaneIndex;
+    SMLoc EndLoc = Parser.getTok().getLoc();
+    if (parseVectorLane(NextLaneKind, NextLaneIndex) != MatchOperand_Success)
+      return MatchOperand_ParseFail;
+    if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
+      Error(EndLoc, "mismatched lane index in register list");
+      return MatchOperand_ParseFail;
+    }
+  }
+
+  SMLoc E = Parser.getTok().getLoc();
+  if (Parser.getTok().isNot(AsmToken::RCurly)) {
+    Error(E, "'}' expected");
+    return MatchOperand_ParseFail;
+  }
+  Parser.Lex(); // Eat '}' token.
+
+  switch (LaneKind) {
+  case NoLanes:
+    // Two-register operands have been converted to the
+    // composite register classes.
+    if (Count == 2) {
+      const MCRegisterClass *RC = (Spacing == 1) ?
+        &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+        &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+      FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+    }
+
+    Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count,
+                                                    (Spacing == 2), S, E));
+    break;
+  case AllLanes:
+    // Two-register operands have been converted to the
+    // composite register classes.
+    if (Count == 2) {
+      const MCRegisterClass *RC = (Spacing == 1) ?
+        &ARMMCRegisterClasses[ARM::DPairRegClassID] :
+        &ARMMCRegisterClasses[ARM::DPairSpcRegClassID];
+      FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC);
+    }
+    Operands.push_back(ARMOperand::CreateVectorListAllLanes(FirstReg, Count,
+                                                            (Spacing == 2),
+                                                            S, E));
+    break;
+  case IndexedLane:
+    Operands.push_back(ARMOperand::CreateVectorListIndexed(FirstReg, Count,
+                                                           LaneIndex,
+                                                           (Spacing == 2),
+                                                           S, E));
+    break;
+  }
+  return MatchOperand_Success;
+}
+
 /// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
 ARMAsmParser::OperandMatchResultTy ARMAsmParser::
 parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
@@ -2337,7 +3310,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
 
   if (isMClass()) {
     // See ARMv6-M 10.1.1
-    unsigned FlagsVal = StringSwitch<unsigned>(Mask)
+    std::string Name = Mask.lower();
+    unsigned FlagsVal = StringSwitch<unsigned>(Name)
       .Case("apsr", 0)
       .Case("iapsr", 1)
       .Case("eapsr", 2)
@@ -2353,14 +3327,14 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
       .Case("faultmask", 19)
       .Case("control", 20)
       .Default(~0U);
-    
+
     if (FlagsVal == ~0U)
       return MatchOperand_NoMatch;
 
     if (!hasV7Ops() && FlagsVal >= 17 && FlagsVal <= 19)
       // basepri, basepri_max and faultmask only valid for V7m.
       return MatchOperand_NoMatch;
-    
+
     Parser.Lex(); // Eat identifier token.
     Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
     return MatchOperand_Success;
@@ -2369,7 +3343,7 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
   size_t Start = 0, Next = Mask.find('_');
   StringRef Flags = "";
-  std::string SpecReg = LowercaseString(Mask.slice(Start, Next));
+  std::string SpecReg = Mask.slice(Start, Next).lower();
   if (Next != StringRef::npos)
     Flags = Mask.slice(Next+1, Mask.size());
 
@@ -2392,7 +3366,8 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
         FlagsVal = 8; // No flag
     }
   } else if (SpecReg == "cpsr" || SpecReg == "spsr") {
-    if (Flags == "all") // cpsr_all is an alias for cpsr_fc
+    // cpsr_all is an alias for cpsr_fc, as is plain cpsr.
+    if (Flags == "all" || Flags == "")
       Flags = "fc";
     for (int i = 0, e = Flags.size(); i != e; ++i) {
       unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
@@ -2411,9 +3386,13 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   } else // No match for special register.
     return MatchOperand_NoMatch;
 
-  // Special register without flags are equivalent to "fc" flags.
-  if (!FlagsVal)
-    FlagsVal = 0x9;
+  // Special register without flags is NOT equivalent to "fc" flags.
+  // NOTE: This is a divergence from gas' behavior.  Uncommenting the following
+  // two lines would enable gas compatibility at the expense of breaking
+  // round-tripping.
+  //
+  // if (!FlagsVal)
+  //  FlagsVal = 0x9;
 
   // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
   if (SpecReg == "spsr")
@@ -2433,8 +3412,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
     return MatchOperand_ParseFail;
   }
   StringRef ShiftName = Tok.getString();
-  std::string LowerOp = LowercaseString(Op);
-  std::string UpperOp = UppercaseString(Op);
+  std::string LowerOp = Op.lower();
+  std::string UpperOp = Op.upper();
   if (ShiftName != LowerOp && ShiftName != UpperOp) {
     Error(Parser.getTok().getLoc(), Op + " operand expected.");
     return MatchOperand_ParseFail;
@@ -2442,7 +3421,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op,
   Parser.Lex(); // Eat shift type token.
 
   // There must be a '#' and a shift amount.
-  if (Parser.getTok().isNot(AsmToken::Hash)) {
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar)) {
     Error(Parser.getTok().getLoc(), "'#' expected");
     return MatchOperand_ParseFail;
   }
@@ -2520,7 +3500,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   Parser.Lex(); // Eat the operator.
 
   // A '#' and a shift amount.
-  if (Parser.getTok().isNot(AsmToken::Hash)) {
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar)) {
     Error(Parser.getTok().getLoc(), "'#' expected");
     return MatchOperand_ParseFail;
   }
@@ -2580,7 +3561,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   Parser.Lex(); // Eat the operator.
 
   // A '#' and a rotate amount.
-  if (Parser.getTok().isNot(AsmToken::Hash)) {
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar)) {
     Error(Parser.getTok().getLoc(), "'#' expected");
     return MatchOperand_ParseFail;
   }
@@ -2617,7 +3599,8 @@ ARMAsmParser::OperandMatchResultTy ARMAsmParser::
 parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   // The bitfield descriptor is really two operands, the LSB and the width.
-  if (Parser.getTok().isNot(AsmToken::Hash)) {
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar)) {
     Error(Parser.getTok().getLoc(), "'#' expected");
     return MatchOperand_ParseFail;
   }
@@ -2649,7 +3632,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     return MatchOperand_ParseFail;
   }
   Parser.Lex(); // Eat hash token.
-  if (Parser.getTok().isNot(AsmToken::Hash)) {
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar)) {
     Error(Parser.getTok().getLoc(), "'#' expected");
     return MatchOperand_ParseFail;
   }
@@ -2743,7 +3727,8 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   SMLoc S = Tok.getLoc();
 
   // Do immediates first, as we always parse those if we have a '#'.
-  if (Parser.getTok().is(AsmToken::Hash)) {
+  if (Parser.getTok().is(AsmToken::Hash) ||
+      Parser.getTok().is(AsmToken::Dollar)) {
     Parser.Lex(); // Eat the '#'.
     // Explicitly look for a '-', as we need to encode negative zero
     // differently.
@@ -3082,18 +4067,80 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode,
   }
   ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1);
   ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1);
-  ((ARMOperand*)Operands[4])->addRegOperands(Inst, 1);
-  // If we have a three-operand form, use that, else the second source operand
-  // is just the destination operand again.
-  if (Operands.size() == 6)
-    ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
-  else
-    Inst.addOperand(Inst.getOperand(0));
+  // If we have a three-operand form, make sure to set Rn to be the operand
+  // that isn't the same as Rd.
+  unsigned RegOp = 4;
+  if (Operands.size() == 6 &&
+      ((ARMOperand*)Operands[4])->getReg() ==
+        ((ARMOperand*)Operands[3])->getReg())
+    RegOp = 5;
+  ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1);
+  Inst.addOperand(Inst.getOperand(0));
   ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2);
 
   return true;
 }
 
+bool ARMAsmParser::
+cvtVLDwbFixed(MCInst &Inst, unsigned Opcode,
+              const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Vd
+  ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+  // Vn
+  ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+  // pred
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
+bool ARMAsmParser::
+cvtVLDwbRegister(MCInst &Inst, unsigned Opcode,
+                 const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Vd
+  ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+  // Vn
+  ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+  // Vm
+  ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+  // pred
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
+bool ARMAsmParser::
+cvtVSTwbFixed(MCInst &Inst, unsigned Opcode,
+              const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+  // Vn
+  ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+  // Vt
+  ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+  // pred
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
+bool ARMAsmParser::
+cvtVSTwbRegister(MCInst &Inst, unsigned Opcode,
+                 const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Create a writeback register dummy placeholder.
+  Inst.addOperand(MCOperand::CreateImm(0));
+  // Vn
+  ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2);
+  // Vm
+  ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1);
+  // Vt
+  ((ARMOperand*)Operands[3])->addVecListOperands(Inst, 1);
+  // pred
+  ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2);
+  return true;
+}
+
 /// Parse an ARM memory expression, return false if successful else return true
 /// or an error.  The first token must be a '[' when called.
 bool ARMAsmParser::
@@ -3153,7 +4200,10 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     unsigned Align = 0;
     switch (CE->getValue()) {
     default:
-      return Error(E, "alignment specifier must be 64, 128, or 256 bits");
+      return Error(E,
+                   "alignment specifier must be 16, 32, 64, 128, or 256 bits");
+    case 16:  Align = 2; break;
+    case 32:  Align = 4; break;
     case 64:  Align = 8; break;
     case 128: Align = 16; break;
     case 256: Align = 32; break;
@@ -3182,9 +4232,13 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   }
 
   // If we have a '#', it's an immediate offset, else assume it's a register
-  // offset.
-  if (Parser.getTok().is(AsmToken::Hash)) {
-    Parser.Lex(); // Eat the '#'.
+  // offset. Be friendly and also accept a plain integer (without a leading
+  // hash) for gas compatibility.
+  if (Parser.getTok().is(AsmToken::Hash) ||
+      Parser.getTok().is(AsmToken::Dollar) ||
+      Parser.getTok().is(AsmToken::Integer)) {
+    if (Parser.getTok().isNot(AsmToken::Integer))
+      Parser.Lex(); // Eat the '#'.
     E = Parser.getTok().getLoc();
 
     bool isNegative = getParser().getTok().is(AsmToken::Minus);
@@ -3281,7 +4335,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
   if (Tok.isNot(AsmToken::Identifier))
     return true;
   StringRef ShiftName = Tok.getString();
-  if (ShiftName == "lsl" || ShiftName == "LSL")
+  if (ShiftName == "lsl" || ShiftName == "LSL" ||
+      ShiftName == "asl" || ShiftName == "ASL")
     St = ARM_AM::lsl;
   else if (ShiftName == "lsr" || ShiftName == "LSR")
     St = ARM_AM::lsr;
@@ -3301,7 +4356,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
     Loc = Parser.getTok().getLoc();
     // A '#' and a shift amount.
     const AsmToken &HashTok = Parser.getTok();
-    if (HashTok.isNot(AsmToken::Hash))
+    if (HashTok.isNot(AsmToken::Hash) &&
+        HashTok.isNot(AsmToken::Dollar))
       return Error(HashTok.getLoc(), "'#' expected");
     Parser.Lex(); // Eat hash token.
 
@@ -3328,10 +4384,36 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
 /// parseFPImm - A floating point immediate expression operand.
 ARMAsmParser::OperandMatchResultTy ARMAsmParser::
 parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Anything that can accept a floating point constant as an operand
+  // needs to go through here, as the regular ParseExpression is
+  // integer only.
+  //
+  // This routine still creates a generic Immediate operand, containing
+  // a bitcast of the 64-bit floating point value. The various operands
+  // that accept floats can check whether the value is valid for them
+  // via the standard is*() predicates.
+
   SMLoc S = Parser.getTok().getLoc();
 
-  if (Parser.getTok().isNot(AsmToken::Hash))
+  if (Parser.getTok().isNot(AsmToken::Hash) &&
+      Parser.getTok().isNot(AsmToken::Dollar))
     return MatchOperand_NoMatch;
+
+  // Disambiguate the VMOV forms that can accept an FP immediate.
+  // vmov.f32 <sreg>, #imm
+  // vmov.f64 <dreg>, #imm
+  // vmov.f32 <dreg>, #imm  @ vector f32x2
+  // vmov.f32 <qreg>, #imm  @ vector f32x4
+  //
+  // There are also the NEON VMOV instructions which expect an
+  // integer constant. Make sure we don't try to parse an FPImm
+  // for these:
+  // vmov.i{8|16|32|64} <dreg|qreg>, #imm
+  ARMOperand *TyOp = static_cast<ARMOperand*>(Operands[2]);
+  if (!TyOp->isToken() || (TyOp->getToken() != ".f32" &&
+                           TyOp->getToken() != ".f64"))
+    return MatchOperand_NoMatch;
+
   Parser.Lex(); // Eat the '#'.
 
   // Handle negation, as that still comes through as a separate token.
@@ -3341,34 +4423,39 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
     Parser.Lex();
   }
   const AsmToken &Tok = Parser.getTok();
+  SMLoc Loc = Tok.getLoc();
   if (Tok.is(AsmToken::Real)) {
-    APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+    APFloat RealVal(APFloat::IEEEsingle, Tok.getString());
     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
     // If we had a '-' in front, toggle the sign bit.
-    IntVal ^= (uint64_t)isNegative << 63;
-    int Val = ARM_AM::getFP64Imm(APInt(64, IntVal));
+    IntVal ^= (uint64_t)isNegative << 31;
     Parser.Lex(); // Eat the token.
-    if (Val == -1) {
-      TokError("floating point value out of range");
-      return MatchOperand_ParseFail;
-    }
-    Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+    Operands.push_back(ARMOperand::CreateImm(
+          MCConstantExpr::Create(IntVal, getContext()),
+          S, Parser.getTok().getLoc()));
     return MatchOperand_Success;
   }
+  // Also handle plain integers. Instructions which allow floating point
+  // immediates also allow a raw encoded 8-bit value.
   if (Tok.is(AsmToken::Integer)) {
     int64_t Val = Tok.getIntVal();
     Parser.Lex(); // Eat the token.
     if (Val > 255 || Val < 0) {
-      TokError("encoded floating point value out of range");
+      Error(Loc, "encoded floating point value out of range");
       return MatchOperand_ParseFail;
     }
-    Operands.push_back(ARMOperand::CreateFPImm(Val, S, getContext()));
+    double RealVal = ARM_AM::getFPImmFloat(Val);
+    Val = APFloat(APFloat::IEEEdouble, RealVal).bitcastToAPInt().getZExtValue();
+    Operands.push_back(ARMOperand::CreateImm(
+        MCConstantExpr::Create(Val, getContext()), S,
+        Parser.getTok().getLoc()));
     return MatchOperand_Success;
   }
 
-  TokError("invalid floating point immediate");
+  Error(Loc, "invalid floating point immediate");
   return MatchOperand_ParseFail;
 }
+
 /// Parse a arm instruction operand.  For now this parses the operand regardless
 /// of the mnemonic.
 bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -3391,7 +4478,6 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     Error(Parser.getTok().getLoc(), "unexpected token in operand");
     return true;
   case AsmToken::Identifier: {
-    // If this is VMRS, check for the apsr_nzcv operand.
     if (!tryParseRegisterWithWriteBack(Operands))
       return false;
     int Res = tryParseShiftRegister(Operands);
@@ -3399,17 +4485,21 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
       return false;
     else if (Res == -1) // irrecoverable error
       return true;
-    if (Mnemonic == "vmrs" && Parser.getTok().getString() == "apsr_nzcv") {
+    // If this is VMRS, check for the apsr_nzcv operand.
+    if (Mnemonic == "vmrs" &&
+        Parser.getTok().getString().equals_lower("apsr_nzcv")) {
       S = Parser.getTok().getLoc();
       Parser.Lex();
-      Operands.push_back(ARMOperand::CreateToken("apsr_nzcv", S));
+      Operands.push_back(ARMOperand::CreateToken("APSR_nzcv", S));
       return false;
     }
 
     // Fall though for the Identifier case that is not a register or a
     // special name.
   }
+  case AsmToken::LParen:  // parenthesized expressions like (_strcmp-4)
   case AsmToken::Integer: // things like 1f and 2b as a branch targets
+  case AsmToken::String:  // quoted label names.
   case AsmToken::Dot: {   // . as a branch target
     // This was not a register so parse other operands that start with an
     // identifier (like labels) as expressions and create them as immediates.
@@ -3425,6 +4515,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     return parseMemory(Operands);
   case AsmToken::LCurly:
     return parseRegisterList(Operands);
+  case AsmToken::Dollar:
   case AsmToken::Hash: {
     // #42 -> immediate.
     // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
@@ -3435,13 +4526,11 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
     if (getParser().ParseExpression(ImmVal))
       return true;
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
-    if (!CE) {
-      Error(S, "constant expression expected");
-      return MatchOperand_ParseFail;
+    if (CE) {
+      int32_t Val = CE->getValue();
+      if (isNegative && Val == 0)
+        ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
     }
-    int32_t Val = CE->getValue();
-    if (isNegative && Val == 0)
-      ImmVal = MCConstantExpr::Create(INT32_MIN, getContext());
     E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
     Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
     return false;
@@ -3524,7 +4613,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
       Mnemonic == "vcge"  || Mnemonic == "vclt"   || Mnemonic == "vacgt" ||
       Mnemonic == "vcgt"  || Mnemonic == "vcle"   || Mnemonic == "smlal" ||
       Mnemonic == "umaal" || Mnemonic == "umlal"  || Mnemonic == "vabal" ||
-      Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal")
+      Mnemonic == "vmlal" || Mnemonic == "vpadal" || Mnemonic == "vqdmlal" ||
+      Mnemonic == "fmuls")
     return Mnemonic;
 
   // First, split out any predication code. Ignore mnemonics we know aren't
@@ -3565,7 +4655,11 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic,
         Mnemonic == "mrs" || Mnemonic == "smmls" || Mnemonic == "vabs" ||
         Mnemonic == "vcls" || Mnemonic == "vmls" || Mnemonic == "vmrs" ||
         Mnemonic == "vnmls" || Mnemonic == "vqabs" || Mnemonic == "vrecps" ||
-        Mnemonic == "vrsqrts" || Mnemonic == "srs" ||
+        Mnemonic == "vrsqrts" || Mnemonic == "srs" || Mnemonic == "flds" ||
+        Mnemonic == "fmrs" || Mnemonic == "fsqrts" || Mnemonic == "fsubs" ||
+        Mnemonic == "fsts" || Mnemonic == "fcpys" || Mnemonic == "fdivs" ||
+        Mnemonic == "fmuls" || Mnemonic == "fcmps" || Mnemonic == "fcmpzs" ||
+        Mnemonic == "vfms" || Mnemonic == "vfnms" ||
         (Mnemonic == "movs" && isThumb()))) {
     Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
     CarrySetting = true;
@@ -3609,6 +4703,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
       Mnemonic == "orr" || Mnemonic == "mvn" ||
       Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
       Mnemonic == "sbc" || Mnemonic == "eor" || Mnemonic == "neg" ||
+      Mnemonic == "vfm" || Mnemonic == "vfnm" ||
       (!isThumb() && (Mnemonic == "smull" || Mnemonic == "mov" ||
                       Mnemonic == "mla" || Mnemonic == "smlal" ||
                       Mnemonic == "umlal" || Mnemonic == "umull"))) {
@@ -3677,7 +4772,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
       static_cast<ARMOperand*>(Operands[4])->isReg() &&
       static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP &&
       static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
-      (static_cast<ARMOperand*>(Operands[5])->isReg() ||
+      ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) ||
        static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4()))
     return true;
   // For Thumb2, add/sub immediate does not have a cc_out operand for the
@@ -3694,9 +4789,11 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
     //
     // If either register is a high reg, it's either one of the SP
     // variants (handled above) or a 32-bit encoding, so we just
-    // check against T3.
+    // check against T3. If the second register is the PC, this is an
+    // alternate form of ADR, which uses encoding T4, so check for that too.
     if ((!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
          !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg())) &&
+        static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC &&
         static_cast<ARMOperand*>(Operands[5])->isT2SOImm())
       return false;
     // If both registers are low, we're in an IT block, and the immediate is
@@ -3726,6 +4823,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
       // remove the cc_out operand.
       (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
        !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+       !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) ||
        !inITBlock() ||
        (static_cast<ARMOperand*>(Operands[3])->getReg() !=
         static_cast<ARMOperand*>(Operands[5])->getReg() &&
@@ -3733,6 +4831,20 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
         static_cast<ARMOperand*>(Operands[4])->getReg())))
     return true;
 
+  // Also check the 'mul' syntax variant that doesn't specify an explicit
+  // destination register.
+  if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 &&
+      static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+      static_cast<ARMOperand*>(Operands[3])->isReg() &&
+      static_cast<ARMOperand*>(Operands[4])->isReg() &&
+      // If the registers aren't low regs  or the cc_out operand is zero
+      // outside of an IT block, we have to use the 32-bit encoding, so
+      // remove the cc_out operand.
+      (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) ||
+       !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) ||
+       !inITBlock()))
+    return true;
+
 
 
   // Register-register 'add/sub' for thumb does not have a cc_out operand
@@ -3744,15 +4856,52 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
       (Operands.size() == 5 || Operands.size() == 6) &&
       static_cast<ARMOperand*>(Operands[3])->isReg() &&
       static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP &&
-      static_cast<ARMOperand*>(Operands[1])->getReg() == 0)
+      static_cast<ARMOperand*>(Operands[1])->getReg() == 0 &&
+      (static_cast<ARMOperand*>(Operands[4])->isImm() ||
+       (Operands.size() == 6 &&
+        static_cast<ARMOperand*>(Operands[5])->isImm())))
     return true;
 
   return false;
 }
 
+static bool isDataTypeToken(StringRef Tok) {
+  return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" ||
+    Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" ||
+    Tok == ".u8" || Tok == ".u16" || Tok == ".u32" || Tok == ".u64" ||
+    Tok == ".s8" || Tok == ".s16" || Tok == ".s32" || Tok == ".s64" ||
+    Tok == ".p8" || Tok == ".p16" || Tok == ".f32" || Tok == ".f64" ||
+    Tok == ".f" || Tok == ".d";
+}
+
+// FIXME: This bit should probably be handled via an explicit match class
+// in the .td files that matches the suffix instead of having it be
+// a literal string token the way it is now.
+static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) {
+  return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm");
+}
+
+static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features);
 /// Parse an arm instruction mnemonic followed by its operands.
 bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Apply mnemonic aliases before doing anything else, as the destination
+  // mnemnonic may include suffices and we want to handle them normally.
+  // The generic tblgen'erated code does this later, at the start of
+  // MatchInstructionImpl(), but that's too late for aliases that include
+  // any sort of suffix.
+  unsigned AvailableFeatures = getAvailableFeatures();
+  applyMnemonicAliases(Name, AvailableFeatures);
+
+  // First check for the ARM-specific .req directive.
+  if (Parser.getTok().is(AsmToken::Identifier) &&
+      Parser.getTok().getIdentifier() == ".req") {
+    parseDirectiveReq(Name, NameLoc);
+    // We always return 'error' for this, as we're done with this
+    // statement and don't need to match the 'instruction."
+    return true;
+  }
+
   // Create the leading tokens for the mnemonic, split by '.' characters.
   size_t Start = 0, Next = Name.find('.');
   StringRef Mnemonic = Name.slice(Start, Next);
@@ -3854,9 +5003,12 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
     Next = Name.find('.', Start + 1);
     StringRef ExtraToken = Name.slice(Start, Next);
 
-    // For now, we're only parsing Thumb1 (for the most part), so
-    // just ignore ".n" qualifiers. We'll use them to restrict
-    // matching when we do Thumb2.
+    // Some NEON instructions have an optional datatype suffix that is
+    // completely ignored. Check for that.
+    if (isDataTypeToken(ExtraToken) &&
+        doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken))
+      continue;
+
     if (ExtraToken != ".n") {
       SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
       Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc));
@@ -3941,12 +5093,21 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
     }
   }
   // Similarly, the Thumb1 "RSB" instruction has a literal "#0" on the
-  // end. Convert it to a token here.
+  // end. Convert it to a token here. Take care not to convert those
+  // that should hit the Thumb2 encoding.
   if (Mnemonic == "rsb" && isThumb() && Operands.size() == 6 &&
+      static_cast<ARMOperand*>(Operands[3])->isReg() &&
+      static_cast<ARMOperand*>(Operands[4])->isReg() &&
       static_cast<ARMOperand*>(Operands[5])->isImm()) {
     ARMOperand *Op = static_cast<ARMOperand*>(Operands[5]);
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
-    if (CE && CE->getValue() == 0) {
+    if (CE && CE->getValue() == 0 &&
+        (isThumbOne() ||
+         // The cc_out operand matches the IT block.
+         ((inITBlock() != CarrySetting) &&
+         // Neither register operand is a high register.
+         (isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) &&
+          isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()))))){
       Operands.erase(Operands.begin() + 5);
       Operands.push_back(ARMOperand::CreateToken("#0", Op->getStartLoc()));
       delete Op;
@@ -3990,9 +5151,9 @@ static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) {
 // the ARMInsts array) instead. Getting that here requires awkward
 // API changes, though. Better way?
 namespace llvm {
-extern MCInstrDesc ARMInsts[];
+extern const MCInstrDesc ARMInsts[];
 }
-static MCInstrDesc &getInstDesc(unsigned Opcode) {
+static const MCInstrDesc &getInstDesc(unsigned Opcode) {
   return ARMInsts[Opcode];
 }
 
@@ -4000,13 +5161,14 @@ static MCInstrDesc &getInstDesc(unsigned Opcode) {
 bool ARMAsmParser::
 validateInstruction(MCInst &Inst,
                     const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
-  MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+  const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
   SMLoc Loc = Operands[0]->getStartLoc();
   // Check the IT block state first.
-  // NOTE: In Thumb mode, the BKPT instruction has the interesting property of
-  // being allowed in IT blocks, but not being predicable.  It just always
+  // NOTE: BKPT instruction has the interesting property of being
+  // allowed in IT blocks, but not being predicable.  It just always
   // executes.
-  if (inITBlock() && Inst.getOpcode() != ARM::tBKPT) {
+  if (inITBlock() && Inst.getOpcode() != ARM::tBKPT &&
+      Inst.getOpcode() != ARM::BKPT) {
     unsigned bit = 1;
     if (ITState.FirstCond)
       ITState.FirstCond = false;
@@ -4115,16 +5277,21 @@ validateInstruction(MCInst &Inst,
                    "in register list");
     break;
   }
+  // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2,
+  // so only issue a diagnostic for thumb1. The instructions will be
+  // switched to the t2 encodings in processInstruction() if necessary.
   case ARM::tPOP: {
     bool listContainsBase;
-    if (checkLowRegisterList(Inst, 3, 0, ARM::PC, listContainsBase))
+    if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase) &&
+        !isThumbTwo())
       return Error(Operands[2]->getStartLoc(),
                    "registers must be in range r0-r7 or pc");
     break;
   }
   case ARM::tPUSH: {
     bool listContainsBase;
-    if (checkLowRegisterList(Inst, 3, 0, ARM::LR, listContainsBase))
+    if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase) &&
+        !isThumbTwo())
       return Error(Operands[2]->getStartLoc(),
                    "registers must be in range r0-r7 or lr");
     break;
@@ -4141,10 +5308,1553 @@ validateInstruction(MCInst &Inst,
   return false;
 }
 
-void ARMAsmParser::
+static unsigned getRealVSTOpcode(unsigned Opc, unsigned &Spacing) {
+  switch(Opc) {
+  default: llvm_unreachable("unexpected opcode!");
+  // VST1LN
+  case ARM::VST1LNdWB_fixed_Asm_8:  Spacing = 1; return ARM::VST1LNd8_UPD;
+  case ARM::VST1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD;
+  case ARM::VST1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD;
+  case ARM::VST1LNdWB_register_Asm_8:  Spacing = 1; return ARM::VST1LNd8_UPD;
+  case ARM::VST1LNdWB_register_Asm_16: Spacing = 1; return ARM::VST1LNd16_UPD;
+  case ARM::VST1LNdWB_register_Asm_32: Spacing = 1; return ARM::VST1LNd32_UPD;
+  case ARM::VST1LNdAsm_8:  Spacing = 1; return ARM::VST1LNd8;
+  case ARM::VST1LNdAsm_16: Spacing = 1; return ARM::VST1LNd16;
+  case ARM::VST1LNdAsm_32: Spacing = 1; return ARM::VST1LNd32;
+
+  // VST2LN
+  case ARM::VST2LNdWB_fixed_Asm_8:  Spacing = 1; return ARM::VST2LNd8_UPD;
+  case ARM::VST2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD;
+  case ARM::VST2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD;
+  case ARM::VST2LNqWB_fixed_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD;
+  case ARM::VST2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD;
+
+  case ARM::VST2LNdWB_register_Asm_8:  Spacing = 1; return ARM::VST2LNd8_UPD;
+  case ARM::VST2LNdWB_register_Asm_16: Spacing = 1; return ARM::VST2LNd16_UPD;
+  case ARM::VST2LNdWB_register_Asm_32: Spacing = 1; return ARM::VST2LNd32_UPD;
+  case ARM::VST2LNqWB_register_Asm_16: Spacing = 2; return ARM::VST2LNq16_UPD;
+  case ARM::VST2LNqWB_register_Asm_32: Spacing = 2; return ARM::VST2LNq32_UPD;
+
+  case ARM::VST2LNdAsm_8:  Spacing = 1; return ARM::VST2LNd8;
+  case ARM::VST2LNdAsm_16: Spacing = 1; return ARM::VST2LNd16;
+  case ARM::VST2LNdAsm_32: Spacing = 1; return ARM::VST2LNd32;
+  case ARM::VST2LNqAsm_16: Spacing = 2; return ARM::VST2LNq16;
+  case ARM::VST2LNqAsm_32: Spacing = 2; return ARM::VST2LNq32;
+
+  // VST3LN
+  case ARM::VST3LNdWB_fixed_Asm_8:  Spacing = 1; return ARM::VST3LNd8_UPD;
+  case ARM::VST3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD;
+  case ARM::VST3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD;
+  case ARM::VST3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST3LNq16_UPD;
+  case ARM::VST3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD;
+  case ARM::VST3LNdWB_register_Asm_8:  Spacing = 1; return ARM::VST3LNd8_UPD;
+  case ARM::VST3LNdWB_register_Asm_16: Spacing = 1; return ARM::VST3LNd16_UPD;
+  case ARM::VST3LNdWB_register_Asm_32: Spacing = 1; return ARM::VST3LNd32_UPD;
+  case ARM::VST3LNqWB_register_Asm_16: Spacing = 2; return ARM::VST3LNq16_UPD;
+  case ARM::VST3LNqWB_register_Asm_32: Spacing = 2; return ARM::VST3LNq32_UPD;
+  case ARM::VST3LNdAsm_8:  Spacing = 1; return ARM::VST3LNd8;
+  case ARM::VST3LNdAsm_16: Spacing = 1; return ARM::VST3LNd16;
+  case ARM::VST3LNdAsm_32: Spacing = 1; return ARM::VST3LNd32;
+  case ARM::VST3LNqAsm_16: Spacing = 2; return ARM::VST3LNq16;
+  case ARM::VST3LNqAsm_32: Spacing = 2; return ARM::VST3LNq32;
+
+  // VST3
+  case ARM::VST3dWB_fixed_Asm_8:  Spacing = 1; return ARM::VST3d8_UPD;
+  case ARM::VST3dWB_fixed_Asm_16: Spacing = 1; return ARM::VST3d16_UPD;
+  case ARM::VST3dWB_fixed_Asm_32: Spacing = 1; return ARM::VST3d32_UPD;
+  case ARM::VST3qWB_fixed_Asm_8:  Spacing = 2; return ARM::VST3q8_UPD;
+  case ARM::VST3qWB_fixed_Asm_16: Spacing = 2; return ARM::VST3q16_UPD;
+  case ARM::VST3qWB_fixed_Asm_32: Spacing = 2; return ARM::VST3q32_UPD;
+  case ARM::VST3dWB_register_Asm_8:  Spacing = 1; return ARM::VST3d8_UPD;
+  case ARM::VST3dWB_register_Asm_16: Spacing = 1; return ARM::VST3d16_UPD;
+  case ARM::VST3dWB_register_Asm_32: Spacing = 1; return ARM::VST3d32_UPD;
+  case ARM::VST3qWB_register_Asm_8:  Spacing = 2; return ARM::VST3q8_UPD;
+  case ARM::VST3qWB_register_Asm_16: Spacing = 2; return ARM::VST3q16_UPD;
+  case ARM::VST3qWB_register_Asm_32: Spacing = 2; return ARM::VST3q32_UPD;
+  case ARM::VST3dAsm_8:  Spacing = 1; return ARM::VST3d8;
+  case ARM::VST3dAsm_16: Spacing = 1; return ARM::VST3d16;
+  case ARM::VST3dAsm_32: Spacing = 1; return ARM::VST3d32;
+  case ARM::VST3qAsm_8:  Spacing = 2; return ARM::VST3q8;
+  case ARM::VST3qAsm_16: Spacing = 2; return ARM::VST3q16;
+  case ARM::VST3qAsm_32: Spacing = 2; return ARM::VST3q32;
+
+  // VST4LN
+  case ARM::VST4LNdWB_fixed_Asm_8:  Spacing = 1; return ARM::VST4LNd8_UPD;
+  case ARM::VST4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD;
+  case ARM::VST4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD;
+  case ARM::VST4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VST4LNq16_UPD;
+  case ARM::VST4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD;
+  case ARM::VST4LNdWB_register_Asm_8:  Spacing = 1; return ARM::VST4LNd8_UPD;
+  case ARM::VST4LNdWB_register_Asm_16: Spacing = 1; return ARM::VST4LNd16_UPD;
+  case ARM::VST4LNdWB_register_Asm_32: Spacing = 1; return ARM::VST4LNd32_UPD;
+  case ARM::VST4LNqWB_register_Asm_16: Spacing = 2; return ARM::VST4LNq16_UPD;
+  case ARM::VST4LNqWB_register_Asm_32: Spacing = 2; return ARM::VST4LNq32_UPD;
+  case ARM::VST4LNdAsm_8:  Spacing = 1; return ARM::VST4LNd8;
+  case ARM::VST4LNdAsm_16: Spacing = 1; return ARM::VST4LNd16;
+  case ARM::VST4LNdAsm_32: Spacing = 1; return ARM::VST4LNd32;
+  case ARM::VST4LNqAsm_16: Spacing = 2; return ARM::VST4LNq16;
+  case ARM::VST4LNqAsm_32: Spacing = 2; return ARM::VST4LNq32;
+
+  // VST4
+  case ARM::VST4dWB_fixed_Asm_8:  Spacing = 1; return ARM::VST4d8_UPD;
+  case ARM::VST4dWB_fixed_Asm_16: Spacing = 1; return ARM::VST4d16_UPD;
+  case ARM::VST4dWB_fixed_Asm_32: Spacing = 1; return ARM::VST4d32_UPD;
+  case ARM::VST4qWB_fixed_Asm_8:  Spacing = 2; return ARM::VST4q8_UPD;
+  case ARM::VST4qWB_fixed_Asm_16: Spacing = 2; return ARM::VST4q16_UPD;
+  case ARM::VST4qWB_fixed_Asm_32: Spacing = 2; return ARM::VST4q32_UPD;
+  case ARM::VST4dWB_register_Asm_8:  Spacing = 1; return ARM::VST4d8_UPD;
+  case ARM::VST4dWB_register_Asm_16: Spacing = 1; return ARM::VST4d16_UPD;
+  case ARM::VST4dWB_register_Asm_32: Spacing = 1; return ARM::VST4d32_UPD;
+  case ARM::VST4qWB_register_Asm_8:  Spacing = 2; return ARM::VST4q8_UPD;
+  case ARM::VST4qWB_register_Asm_16: Spacing = 2; return ARM::VST4q16_UPD;
+  case ARM::VST4qWB_register_Asm_32: Spacing = 2; return ARM::VST4q32_UPD;
+  case ARM::VST4dAsm_8:  Spacing = 1; return ARM::VST4d8;
+  case ARM::VST4dAsm_16: Spacing = 1; return ARM::VST4d16;
+  case ARM::VST4dAsm_32: Spacing = 1; return ARM::VST4d32;
+  case ARM::VST4qAsm_8:  Spacing = 2; return ARM::VST4q8;
+  case ARM::VST4qAsm_16: Spacing = 2; return ARM::VST4q16;
+  case ARM::VST4qAsm_32: Spacing = 2; return ARM::VST4q32;
+  }
+}
+
+static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
+  switch(Opc) {
+  default: llvm_unreachable("unexpected opcode!");
+  // VLD1LN
+  case ARM::VLD1LNdWB_fixed_Asm_8:  Spacing = 1; return ARM::VLD1LNd8_UPD;
+  case ARM::VLD1LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD;
+  case ARM::VLD1LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD;
+  case ARM::VLD1LNdWB_register_Asm_8:  Spacing = 1; return ARM::VLD1LNd8_UPD;
+  case ARM::VLD1LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD1LNd16_UPD;
+  case ARM::VLD1LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD1LNd32_UPD;
+  case ARM::VLD1LNdAsm_8:  Spacing = 1; return ARM::VLD1LNd8;
+  case ARM::VLD1LNdAsm_16: Spacing = 1; return ARM::VLD1LNd16;
+  case ARM::VLD1LNdAsm_32: Spacing = 1; return ARM::VLD1LNd32;
+
+  // VLD2LN
+  case ARM::VLD2LNdWB_fixed_Asm_8:  Spacing = 1; return ARM::VLD2LNd8_UPD;
+  case ARM::VLD2LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD;
+  case ARM::VLD2LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD;
+  case ARM::VLD2LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD2LNq16_UPD;
+  case ARM::VLD2LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD;
+  case ARM::VLD2LNdWB_register_Asm_8:  Spacing = 1; return ARM::VLD2LNd8_UPD;
+  case ARM::VLD2LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD2LNd16_UPD;
+  case ARM::VLD2LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD2LNd32_UPD;
+  case ARM::VLD2LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD2LNq16_UPD;
+  case ARM::VLD2LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD2LNq32_UPD;
+  case ARM::VLD2LNdAsm_8:  Spacing = 1; return ARM::VLD2LNd8;
+  case ARM::VLD2LNdAsm_16: Spacing = 1; return ARM::VLD2LNd16;
+  case ARM::VLD2LNdAsm_32: Spacing = 1; return ARM::VLD2LNd32;
+  case ARM::VLD2LNqAsm_16: Spacing = 2; return ARM::VLD2LNq16;
+  case ARM::VLD2LNqAsm_32: Spacing = 2; return ARM::VLD2LNq32;
+
+  // VLD3DUP
+  case ARM::VLD3DUPdWB_fixed_Asm_8:  Spacing = 1; return ARM::VLD3DUPd8_UPD;
+  case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
+  case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
+  case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD;
+  case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD;
+  case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
+  case ARM::VLD3DUPdWB_register_Asm_8:  Spacing = 1; return ARM::VLD3DUPd8_UPD;
+  case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
+  case ARM::VLD3DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
+  case ARM::VLD3DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD3DUPq8_UPD;
+  case ARM::VLD3DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD;
+  case ARM::VLD3DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
+  case ARM::VLD3DUPdAsm_8:  Spacing = 1; return ARM::VLD3DUPd8;
+  case ARM::VLD3DUPdAsm_16: Spacing = 1; return ARM::VLD3DUPd16;
+  case ARM::VLD3DUPdAsm_32: Spacing = 1; return ARM::VLD3DUPd32;
+  case ARM::VLD3DUPqAsm_8: Spacing = 2; return ARM::VLD3DUPq8;
+  case ARM::VLD3DUPqAsm_16: Spacing = 2; return ARM::VLD3DUPq16;
+  case ARM::VLD3DUPqAsm_32: Spacing = 2; return ARM::VLD3DUPq32;
+
+  // VLD3LN
+  case ARM::VLD3LNdWB_fixed_Asm_8:  Spacing = 1; return ARM::VLD3LNd8_UPD;
+  case ARM::VLD3LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD;
+  case ARM::VLD3LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD;
+  case ARM::VLD3LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3LNq16_UPD;
+  case ARM::VLD3LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD;
+  case ARM::VLD3LNdWB_register_Asm_8:  Spacing = 1; return ARM::VLD3LNd8_UPD;
+  case ARM::VLD3LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD3LNd16_UPD;
+  case ARM::VLD3LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD3LNd32_UPD;
+  case ARM::VLD3LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD3LNq16_UPD;
+  case ARM::VLD3LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD3LNq32_UPD;
+  case ARM::VLD3LNdAsm_8:  Spacing = 1; return ARM::VLD3LNd8;
+  case ARM::VLD3LNdAsm_16: Spacing = 1; return ARM::VLD3LNd16;
+  case ARM::VLD3LNdAsm_32: Spacing = 1; return ARM::VLD3LNd32;
+  case ARM::VLD3LNqAsm_16: Spacing = 2; return ARM::VLD3LNq16;
+  case ARM::VLD3LNqAsm_32: Spacing = 2; return ARM::VLD3LNq32;
+
+  // VLD3
+  case ARM::VLD3dWB_fixed_Asm_8:  Spacing = 1; return ARM::VLD3d8_UPD;
+  case ARM::VLD3dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD;
+  case ARM::VLD3dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD;
+  case ARM::VLD3qWB_fixed_Asm_8:  Spacing = 2; return ARM::VLD3q8_UPD;
+  case ARM::VLD3qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD;
+  case ARM::VLD3qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD;
+  case ARM::VLD3dWB_register_Asm_8:  Spacing = 1; return ARM::VLD3d8_UPD;
+  case ARM::VLD3dWB_register_Asm_16: Spacing = 1; return ARM::VLD3d16_UPD;
+  case ARM::VLD3dWB_register_Asm_32: Spacing = 1; return ARM::VLD3d32_UPD;
+  case ARM::VLD3qWB_register_Asm_8:  Spacing = 2; return ARM::VLD3q8_UPD;
+  case ARM::VLD3qWB_register_Asm_16: Spacing = 2; return ARM::VLD3q16_UPD;
+  case ARM::VLD3qWB_register_Asm_32: Spacing = 2; return ARM::VLD3q32_UPD;
+  case ARM::VLD3dAsm_8:  Spacing = 1; return ARM::VLD3d8;
+  case ARM::VLD3dAsm_16: Spacing = 1; return ARM::VLD3d16;
+  case ARM::VLD3dAsm_32: Spacing = 1; return ARM::VLD3d32;
+  case ARM::VLD3qAsm_8:  Spacing = 2; return ARM::VLD3q8;
+  case ARM::VLD3qAsm_16: Spacing = 2; return ARM::VLD3q16;
+  case ARM::VLD3qAsm_32: Spacing = 2; return ARM::VLD3q32;
+
+  // VLD4LN
+  case ARM::VLD4LNdWB_fixed_Asm_8:  Spacing = 1; return ARM::VLD4LNd8_UPD;
+  case ARM::VLD4LNdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD;
+  case ARM::VLD4LNdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD;
+  case ARM::VLD4LNqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4LNq16_UPD;
+  case ARM::VLD4LNqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD;
+  case ARM::VLD4LNdWB_register_Asm_8:  Spacing = 1; return ARM::VLD4LNd8_UPD;
+  case ARM::VLD4LNdWB_register_Asm_16: Spacing = 1; return ARM::VLD4LNd16_UPD;
+  case ARM::VLD4LNdWB_register_Asm_32: Spacing = 1; return ARM::VLD4LNd32_UPD;
+  case ARM::VLD4LNqWB_register_Asm_16: Spacing = 2; return ARM::VLD4LNq16_UPD;
+  case ARM::VLD4LNqWB_register_Asm_32: Spacing = 2; return ARM::VLD4LNq32_UPD;
+  case ARM::VLD4LNdAsm_8:  Spacing = 1; return ARM::VLD4LNd8;
+  case ARM::VLD4LNdAsm_16: Spacing = 1; return ARM::VLD4LNd16;
+  case ARM::VLD4LNdAsm_32: Spacing = 1; return ARM::VLD4LNd32;
+  case ARM::VLD4LNqAsm_16: Spacing = 2; return ARM::VLD4LNq16;
+  case ARM::VLD4LNqAsm_32: Spacing = 2; return ARM::VLD4LNq32;
+
+  // VLD4DUP
+  case ARM::VLD4DUPdWB_fixed_Asm_8:  Spacing = 1; return ARM::VLD4DUPd8_UPD;
+  case ARM::VLD4DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD;
+  case ARM::VLD4DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD;
+  case ARM::VLD4DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD4DUPq8_UPD;
+  case ARM::VLD4DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4DUPq16_UPD;
+  case ARM::VLD4DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD;
+  case ARM::VLD4DUPdWB_register_Asm_8:  Spacing = 1; return ARM::VLD4DUPd8_UPD;
+  case ARM::VLD4DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD4DUPd16_UPD;
+  case ARM::VLD4DUPdWB_register_Asm_32: Spacing = 1; return ARM::VLD4DUPd32_UPD;
+  case ARM::VLD4DUPqWB_register_Asm_8: Spacing = 2; return ARM::VLD4DUPq8_UPD;
+  case ARM::VLD4DUPqWB_register_Asm_16: Spacing = 2; return ARM::VLD4DUPq16_UPD;
+  case ARM::VLD4DUPqWB_register_Asm_32: Spacing = 2; return ARM::VLD4DUPq32_UPD;
+  case ARM::VLD4DUPdAsm_8:  Spacing = 1; return ARM::VLD4DUPd8;
+  case ARM::VLD4DUPdAsm_16: Spacing = 1; return ARM::VLD4DUPd16;
+  case ARM::VLD4DUPdAsm_32: Spacing = 1; return ARM::VLD4DUPd32;
+  case ARM::VLD4DUPqAsm_8: Spacing = 2; return ARM::VLD4DUPq8;
+  case ARM::VLD4DUPqAsm_16: Spacing = 2; return ARM::VLD4DUPq16;
+  case ARM::VLD4DUPqAsm_32: Spacing = 2; return ARM::VLD4DUPq32;
+
+  // VLD4
+  case ARM::VLD4dWB_fixed_Asm_8:  Spacing = 1; return ARM::VLD4d8_UPD;
+  case ARM::VLD4dWB_fixed_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD;
+  case ARM::VLD4dWB_fixed_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD;
+  case ARM::VLD4qWB_fixed_Asm_8:  Spacing = 2; return ARM::VLD4q8_UPD;
+  case ARM::VLD4qWB_fixed_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD;
+  case ARM::VLD4qWB_fixed_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD;
+  case ARM::VLD4dWB_register_Asm_8:  Spacing = 1; return ARM::VLD4d8_UPD;
+  case ARM::VLD4dWB_register_Asm_16: Spacing = 1; return ARM::VLD4d16_UPD;
+  case ARM::VLD4dWB_register_Asm_32: Spacing = 1; return ARM::VLD4d32_UPD;
+  case ARM::VLD4qWB_register_Asm_8:  Spacing = 2; return ARM::VLD4q8_UPD;
+  case ARM::VLD4qWB_register_Asm_16: Spacing = 2; return ARM::VLD4q16_UPD;
+  case ARM::VLD4qWB_register_Asm_32: Spacing = 2; return ARM::VLD4q32_UPD;
+  case ARM::VLD4dAsm_8:  Spacing = 1; return ARM::VLD4d8;
+  case ARM::VLD4dAsm_16: Spacing = 1; return ARM::VLD4d16;
+  case ARM::VLD4dAsm_32: Spacing = 1; return ARM::VLD4d32;
+  case ARM::VLD4qAsm_8:  Spacing = 2; return ARM::VLD4q8;
+  case ARM::VLD4qAsm_16: Spacing = 2; return ARM::VLD4q16;
+  case ARM::VLD4qAsm_32: Spacing = 2; return ARM::VLD4q32;
+  }
+}
+
+bool ARMAsmParser::
 processInstruction(MCInst &Inst,
                    const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   switch (Inst.getOpcode()) {
+  // Aliases for alternate PC+imm syntax of LDR instructions.
+  case ARM::t2LDRpcrel:
+    Inst.setOpcode(ARM::t2LDRpci);
+    return true;
+  case ARM::t2LDRBpcrel:
+    Inst.setOpcode(ARM::t2LDRBpci);
+    return true;
+  case ARM::t2LDRHpcrel:
+    Inst.setOpcode(ARM::t2LDRHpci);
+    return true;
+  case ARM::t2LDRSBpcrel:
+    Inst.setOpcode(ARM::t2LDRSBpci);
+    return true;
+  case ARM::t2LDRSHpcrel:
+    Inst.setOpcode(ARM::t2LDRSHpci);
+    return true;
+  // Handle NEON VST complex aliases.
+  case ARM::VST1LNdWB_register_Asm_8:
+  case ARM::VST1LNdWB_register_Asm_16:
+  case ARM::VST1LNdWB_register_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST2LNdWB_register_Asm_8:
+  case ARM::VST2LNdWB_register_Asm_16:
+  case ARM::VST2LNdWB_register_Asm_32:
+  case ARM::VST2LNqWB_register_Asm_16:
+  case ARM::VST2LNqWB_register_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST3LNdWB_register_Asm_8:
+  case ARM::VST3LNdWB_register_Asm_16:
+  case ARM::VST3LNdWB_register_Asm_32:
+  case ARM::VST3LNqWB_register_Asm_16:
+  case ARM::VST3LNqWB_register_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST4LNdWB_register_Asm_8:
+  case ARM::VST4LNdWB_register_Asm_16:
+  case ARM::VST4LNdWB_register_Asm_32:
+  case ARM::VST4LNqWB_register_Asm_16:
+  case ARM::VST4LNqWB_register_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST1LNdWB_fixed_Asm_8:
+  case ARM::VST1LNdWB_fixed_Asm_16:
+  case ARM::VST1LNdWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST2LNdWB_fixed_Asm_8:
+  case ARM::VST2LNdWB_fixed_Asm_16:
+  case ARM::VST2LNdWB_fixed_Asm_32:
+  case ARM::VST2LNqWB_fixed_Asm_16:
+  case ARM::VST2LNqWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST3LNdWB_fixed_Asm_8:
+  case ARM::VST3LNdWB_fixed_Asm_16:
+  case ARM::VST3LNdWB_fixed_Asm_32:
+  case ARM::VST3LNqWB_fixed_Asm_16:
+  case ARM::VST3LNqWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST4LNdWB_fixed_Asm_8:
+  case ARM::VST4LNdWB_fixed_Asm_16:
+  case ARM::VST4LNdWB_fixed_Asm_32:
+  case ARM::VST4LNqWB_fixed_Asm_16:
+  case ARM::VST4LNqWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST1LNdAsm_8:
+  case ARM::VST1LNdAsm_16:
+  case ARM::VST1LNdAsm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST2LNdAsm_8:
+  case ARM::VST2LNdAsm_16:
+  case ARM::VST2LNdAsm_32:
+  case ARM::VST2LNqAsm_16:
+  case ARM::VST2LNqAsm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST3LNdAsm_8:
+  case ARM::VST3LNdAsm_16:
+  case ARM::VST3LNdAsm_32:
+  case ARM::VST3LNqAsm_16:
+  case ARM::VST3LNqAsm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST4LNdAsm_8:
+  case ARM::VST4LNdAsm_16:
+  case ARM::VST4LNdAsm_32:
+  case ARM::VST4LNqAsm_16:
+  case ARM::VST4LNqAsm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  // Handle NEON VLD complex aliases.
+  case ARM::VLD1LNdWB_register_Asm_8:
+  case ARM::VLD1LNdWB_register_Asm_16:
+  case ARM::VLD1LNdWB_register_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD2LNdWB_register_Asm_8:
+  case ARM::VLD2LNdWB_register_Asm_16:
+  case ARM::VLD2LNdWB_register_Asm_32:
+  case ARM::VLD2LNqWB_register_Asm_16:
+  case ARM::VLD2LNqWB_register_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD3LNdWB_register_Asm_8:
+  case ARM::VLD3LNdWB_register_Asm_16:
+  case ARM::VLD3LNdWB_register_Asm_32:
+  case ARM::VLD3LNqWB_register_Asm_16:
+  case ARM::VLD3LNqWB_register_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD4LNdWB_register_Asm_8:
+  case ARM::VLD4LNdWB_register_Asm_16:
+  case ARM::VLD4LNdWB_register_Asm_32:
+  case ARM::VLD4LNqWB_register_Asm_16:
+  case ARM::VLD4LNqWB_register_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(4)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(5)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(6));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD1LNdWB_fixed_Asm_8:
+  case ARM::VLD1LNdWB_fixed_Asm_16:
+  case ARM::VLD1LNdWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD2LNdWB_fixed_Asm_8:
+  case ARM::VLD2LNdWB_fixed_Asm_16:
+  case ARM::VLD2LNdWB_fixed_Asm_32:
+  case ARM::VLD2LNqWB_fixed_Asm_16:
+  case ARM::VLD2LNqWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD3LNdWB_fixed_Asm_8:
+  case ARM::VLD3LNdWB_fixed_Asm_16:
+  case ARM::VLD3LNdWB_fixed_Asm_32:
+  case ARM::VLD3LNqWB_fixed_Asm_16:
+  case ARM::VLD3LNqWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD4LNdWB_fixed_Asm_8:
+  case ARM::VLD4LNdWB_fixed_Asm_16:
+  case ARM::VLD4LNdWB_fixed_Asm_32:
+  case ARM::VLD4LNqWB_fixed_Asm_16:
+  case ARM::VLD4LNqWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD1LNdAsm_8:
+  case ARM::VLD1LNdAsm_16:
+  case ARM::VLD1LNdAsm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD2LNdAsm_8:
+  case ARM::VLD2LNdAsm_16:
+  case ARM::VLD2LNdAsm_32:
+  case ARM::VLD2LNqAsm_16:
+  case ARM::VLD2LNqAsm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD3LNdAsm_8:
+  case ARM::VLD3LNdAsm_16:
+  case ARM::VLD3LNdAsm_32:
+  case ARM::VLD3LNqAsm_16:
+  case ARM::VLD3LNqAsm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD4LNdAsm_8:
+  case ARM::VLD4LNdAsm_16:
+  case ARM::VLD4LNdAsm_32:
+  case ARM::VLD4LNqAsm_16:
+  case ARM::VLD4LNqAsm_32: {
+    MCInst TmpInst;
+    // Shuffle the operands around so the lane index operand is in the
+    // right place.
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(2)); // Rn
+    TmpInst.addOperand(Inst.getOperand(3)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Tied operand src (== Vd)
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // lane
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  // VLD3DUP single 3-element structure to all lanes instructions.
+  case ARM::VLD3DUPdAsm_8:
+  case ARM::VLD3DUPdAsm_16:
+  case ARM::VLD3DUPdAsm_32:
+  case ARM::VLD3DUPqAsm_8:
+  case ARM::VLD3DUPqAsm_16:
+  case ARM::VLD3DUPqAsm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD3DUPdWB_fixed_Asm_8:
+  case ARM::VLD3DUPdWB_fixed_Asm_16:
+  case ARM::VLD3DUPdWB_fixed_Asm_32:
+  case ARM::VLD3DUPqWB_fixed_Asm_8:
+  case ARM::VLD3DUPqWB_fixed_Asm_16:
+  case ARM::VLD3DUPqWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD3DUPdWB_register_Asm_8:
+  case ARM::VLD3DUPdWB_register_Asm_16:
+  case ARM::VLD3DUPdWB_register_Asm_32:
+  case ARM::VLD3DUPqWB_register_Asm_8:
+  case ARM::VLD3DUPqWB_register_Asm_16:
+  case ARM::VLD3DUPqWB_register_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(3)); // Rm
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  // VLD3 multiple 3-element structure instructions.
+  case ARM::VLD3dAsm_8:
+  case ARM::VLD3dAsm_16:
+  case ARM::VLD3dAsm_32:
+  case ARM::VLD3qAsm_8:
+  case ARM::VLD3qAsm_16:
+  case ARM::VLD3qAsm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD3dWB_fixed_Asm_8:
+  case ARM::VLD3dWB_fixed_Asm_16:
+  case ARM::VLD3dWB_fixed_Asm_32:
+  case ARM::VLD3qWB_fixed_Asm_8:
+  case ARM::VLD3qWB_fixed_Asm_16:
+  case ARM::VLD3qWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD3dWB_register_Asm_8:
+  case ARM::VLD3dWB_register_Asm_16:
+  case ARM::VLD3dWB_register_Asm_32:
+  case ARM::VLD3qWB_register_Asm_8:
+  case ARM::VLD3qWB_register_Asm_16:
+  case ARM::VLD3qWB_register_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(3)); // Rm
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  // VLD4DUP single 3-element structure to all lanes instructions.
+  case ARM::VLD4DUPdAsm_8:
+  case ARM::VLD4DUPdAsm_16:
+  case ARM::VLD4DUPdAsm_32:
+  case ARM::VLD4DUPqAsm_8:
+  case ARM::VLD4DUPqAsm_16:
+  case ARM::VLD4DUPqAsm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD4DUPdWB_fixed_Asm_8:
+  case ARM::VLD4DUPdWB_fixed_Asm_16:
+  case ARM::VLD4DUPdWB_fixed_Asm_32:
+  case ARM::VLD4DUPqWB_fixed_Asm_8:
+  case ARM::VLD4DUPqWB_fixed_Asm_16:
+  case ARM::VLD4DUPqWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD4DUPdWB_register_Asm_8:
+  case ARM::VLD4DUPdWB_register_Asm_16:
+  case ARM::VLD4DUPdWB_register_Asm_32:
+  case ARM::VLD4DUPqWB_register_Asm_8:
+  case ARM::VLD4DUPqWB_register_Asm_16:
+  case ARM::VLD4DUPqWB_register_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(3)); // Rm
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  // VLD4 multiple 4-element structure instructions.
+  case ARM::VLD4dAsm_8:
+  case ARM::VLD4dAsm_16:
+  case ARM::VLD4dAsm_32:
+  case ARM::VLD4qAsm_8:
+  case ARM::VLD4qAsm_16:
+  case ARM::VLD4qAsm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD4dWB_fixed_Asm_8:
+  case ARM::VLD4dWB_fixed_Asm_16:
+  case ARM::VLD4dWB_fixed_Asm_32:
+  case ARM::VLD4qWB_fixed_Asm_8:
+  case ARM::VLD4qWB_fixed_Asm_16:
+  case ARM::VLD4qWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VLD4dWB_register_Asm_8:
+  case ARM::VLD4dWB_register_Asm_16:
+  case ARM::VLD4dWB_register_Asm_32:
+  case ARM::VLD4qWB_register_Asm_8:
+  case ARM::VLD4qWB_register_Asm_16:
+  case ARM::VLD4qWB_register_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVLDOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(3)); // Rm
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  // VST3 multiple 3-element structure instructions.
+  case ARM::VST3dAsm_8:
+  case ARM::VST3dAsm_16:
+  case ARM::VST3dAsm_32:
+  case ARM::VST3qAsm_8:
+  case ARM::VST3qAsm_16:
+  case ARM::VST3qAsm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST3dWB_fixed_Asm_8:
+  case ARM::VST3dWB_fixed_Asm_16:
+  case ARM::VST3dWB_fixed_Asm_32:
+  case ARM::VST3qWB_fixed_Asm_8:
+  case ARM::VST3qWB_fixed_Asm_16:
+  case ARM::VST3qWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST3dWB_register_Asm_8:
+  case ARM::VST3dWB_register_Asm_16:
+  case ARM::VST3dWB_register_Asm_32:
+  case ARM::VST3qWB_register_Asm_8:
+  case ARM::VST3qWB_register_Asm_16:
+  case ARM::VST3qWB_register_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(3)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  // VST4 multiple 3-element structure instructions.
+  case ARM::VST4dAsm_8:
+  case ARM::VST4dAsm_16:
+  case ARM::VST4dAsm_32:
+  case ARM::VST4qAsm_8:
+  case ARM::VST4qAsm_16:
+  case ARM::VST4qAsm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST4dWB_fixed_Asm_8:
+  case ARM::VST4dWB_fixed_Asm_16:
+  case ARM::VST4dWB_fixed_Asm_32:
+  case ARM::VST4qWB_fixed_Asm_8:
+  case ARM::VST4qWB_fixed_Asm_16:
+  case ARM::VST4qWB_fixed_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(MCOperand::CreateReg(0)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+
+  case ARM::VST4dWB_register_Asm_8:
+  case ARM::VST4dWB_register_Asm_16:
+  case ARM::VST4dWB_register_Asm_32:
+  case ARM::VST4qWB_register_Asm_8:
+  case ARM::VST4qWB_register_Asm_16:
+  case ARM::VST4qWB_register_Asm_32: {
+    MCInst TmpInst;
+    unsigned Spacing;
+    TmpInst.setOpcode(getRealVSTOpcode(Inst.getOpcode(), Spacing));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn_wb == tied Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // alignment
+    TmpInst.addOperand(Inst.getOperand(3)); // Rm
+    TmpInst.addOperand(Inst.getOperand(0)); // Vd
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 2));
+    TmpInst.addOperand(MCOperand::CreateReg(Inst.getOperand(0).getReg() +
+                                            Spacing * 3));
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    Inst = TmpInst;
+    return true;
+  }
+
+  // Handle encoding choice for the shift-immediate instructions.
+  case ARM::t2LSLri:
+  case ARM::t2LSRri:
+  case ARM::t2ASRri: {
+    if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+        Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+        Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) &&
+        !(static_cast<ARMOperand*>(Operands[3])->isToken() &&
+         static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) {
+      unsigned NewOpc;
+      switch (Inst.getOpcode()) {
+      default: llvm_unreachable("unexpected opcode");
+      case ARM::t2LSLri: NewOpc = ARM::tLSLri; break;
+      case ARM::t2LSRri: NewOpc = ARM::tLSRri; break;
+      case ARM::t2ASRri: NewOpc = ARM::tASRri; break;
+      }
+      // The Thumb1 operands aren't in the same order. Awesome, eh?
+      MCInst TmpInst;
+      TmpInst.setOpcode(NewOpc);
+      TmpInst.addOperand(Inst.getOperand(0));
+      TmpInst.addOperand(Inst.getOperand(5));
+      TmpInst.addOperand(Inst.getOperand(1));
+      TmpInst.addOperand(Inst.getOperand(2));
+      TmpInst.addOperand(Inst.getOperand(3));
+      TmpInst.addOperand(Inst.getOperand(4));
+      Inst = TmpInst;
+      return true;
+    }
+    return false;
+  }
+
+  // Handle the Thumb2 mode MOV complex aliases.
+  case ARM::t2MOVsr:
+  case ARM::t2MOVSsr: {
+    // Which instruction to expand to depends on the CCOut operand and
+    // whether we're in an IT block if the register operands are low
+    // registers.
+    bool isNarrow = false;
+    if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+        isARMLowRegister(Inst.getOperand(1).getReg()) &&
+        isARMLowRegister(Inst.getOperand(2).getReg()) &&
+        Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() &&
+        inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr))
+      isNarrow = true;
+    MCInst TmpInst;
+    unsigned newOpc;
+    switch(ARM_AM::getSORegShOp(Inst.getOperand(3).getImm())) {
+    default: llvm_unreachable("unexpected opcode!");
+    case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRrr : ARM::t2ASRrr; break;
+    case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRrr : ARM::t2LSRrr; break;
+    case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLrr : ARM::t2LSLrr; break;
+    case ARM_AM::ror: newOpc = isNarrow ? ARM::tROR   : ARM::t2RORrr; break;
+    }
+    TmpInst.setOpcode(newOpc);
+    TmpInst.addOperand(Inst.getOperand(0)); // Rd
+    if (isNarrow)
+      TmpInst.addOperand(MCOperand::CreateReg(
+          Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // Rm
+    TmpInst.addOperand(Inst.getOperand(4)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(5));
+    if (!isNarrow)
+      TmpInst.addOperand(MCOperand::CreateReg(
+          Inst.getOpcode() == ARM::t2MOVSsr ? ARM::CPSR : 0));
+    Inst = TmpInst;
+    return true;
+  }
+  case ARM::t2MOVsi:
+  case ARM::t2MOVSsi: {
+    // Which instruction to expand to depends on the CCOut operand and
+    // whether we're in an IT block if the register operands are low
+    // registers.
+    bool isNarrow = false;
+    if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
+        isARMLowRegister(Inst.getOperand(1).getReg()) &&
+        inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi))
+      isNarrow = true;
+    MCInst TmpInst;
+    unsigned newOpc;
+    switch(ARM_AM::getSORegShOp(Inst.getOperand(2).getImm())) {
+    default: llvm_unreachable("unexpected opcode!");
+    case ARM_AM::asr: newOpc = isNarrow ? ARM::tASRri : ARM::t2ASRri; break;
+    case ARM_AM::lsr: newOpc = isNarrow ? ARM::tLSRri : ARM::t2LSRri; break;
+    case ARM_AM::lsl: newOpc = isNarrow ? ARM::tLSLri : ARM::t2LSLri; break;
+    case ARM_AM::ror: newOpc = ARM::t2RORri; isNarrow = false; break;
+    case ARM_AM::rrx: isNarrow = false; newOpc = ARM::t2RRX; break;
+    }
+    unsigned Ammount = ARM_AM::getSORegOffset(Inst.getOperand(2).getImm());
+    if (Ammount == 32) Ammount = 0;
+    TmpInst.setOpcode(newOpc);
+    TmpInst.addOperand(Inst.getOperand(0)); // Rd
+    if (isNarrow)
+      TmpInst.addOperand(MCOperand::CreateReg(
+          Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    if (newOpc != ARM::t2RRX)
+      TmpInst.addOperand(MCOperand::CreateImm(Ammount));
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    if (!isNarrow)
+      TmpInst.addOperand(MCOperand::CreateReg(
+          Inst.getOpcode() == ARM::t2MOVSsi ? ARM::CPSR : 0));
+    Inst = TmpInst;
+    return true;
+  }
+  // Handle the ARM mode MOV complex aliases.
+  case ARM::ASRr:
+  case ARM::LSRr:
+  case ARM::LSLr:
+  case ARM::RORr: {
+    ARM_AM::ShiftOpc ShiftTy;
+    switch(Inst.getOpcode()) {
+    default: llvm_unreachable("unexpected opcode!");
+    case ARM::ASRr: ShiftTy = ARM_AM::asr; break;
+    case ARM::LSRr: ShiftTy = ARM_AM::lsr; break;
+    case ARM::LSLr: ShiftTy = ARM_AM::lsl; break;
+    case ARM::RORr: ShiftTy = ARM_AM::ror; break;
+    }
+    unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, 0);
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::MOVsr);
+    TmpInst.addOperand(Inst.getOperand(0)); // Rd
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(Inst.getOperand(2)); // Rm
+    TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+    Inst = TmpInst;
+    return true;
+  }
+  case ARM::ASRi:
+  case ARM::LSRi:
+  case ARM::LSLi:
+  case ARM::RORi: {
+    ARM_AM::ShiftOpc ShiftTy;
+    switch(Inst.getOpcode()) {
+    default: llvm_unreachable("unexpected opcode!");
+    case ARM::ASRi: ShiftTy = ARM_AM::asr; break;
+    case ARM::LSRi: ShiftTy = ARM_AM::lsr; break;
+    case ARM::LSLi: ShiftTy = ARM_AM::lsl; break;
+    case ARM::RORi: ShiftTy = ARM_AM::ror; break;
+    }
+    // A shift by zero is a plain MOVr, not a MOVsi.
+    unsigned Amt = Inst.getOperand(2).getImm();
+    unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi;
+    unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt);
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opc);
+    TmpInst.addOperand(Inst.getOperand(0)); // Rd
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    if (Opc == ARM::MOVsi)
+      TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+    TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(4));
+    TmpInst.addOperand(Inst.getOperand(5)); // cc_out
+    Inst = TmpInst;
+    return true;
+  }
+  case ARM::RRXi: {
+    unsigned Shifter = ARM_AM::getSORegOpc(ARM_AM::rrx, 0);
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::MOVsi);
+    TmpInst.addOperand(Inst.getOperand(0)); // Rd
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty
+    TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(3));
+    TmpInst.addOperand(Inst.getOperand(4)); // cc_out
+    Inst = TmpInst;
+    return true;
+  }
+  case ARM::t2LDMIA_UPD: {
+    // If this is a load of a single register, then we should use
+    // a post-indexed LDR instruction instead, per the ARM ARM.
+    if (Inst.getNumOperands() != 5)
+      return false;
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::t2LDR_POST);
+    TmpInst.addOperand(Inst.getOperand(4)); // Rt
+    TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(MCOperand::CreateImm(4));
+    TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(3));
+    Inst = TmpInst;
+    return true;
+  }
+  case ARM::t2STMDB_UPD: {
+    // If this is a store of a single register, then we should use
+    // a pre-indexed STR instruction instead, per the ARM ARM.
+    if (Inst.getNumOperands() != 5)
+      return false;
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::t2STR_PRE);
+    TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb
+    TmpInst.addOperand(Inst.getOperand(4)); // Rt
+    TmpInst.addOperand(Inst.getOperand(1)); // Rn
+    TmpInst.addOperand(MCOperand::CreateImm(-4));
+    TmpInst.addOperand(Inst.getOperand(2)); // CondCode
+    TmpInst.addOperand(Inst.getOperand(3));
+    Inst = TmpInst;
+    return true;
+  }
   case ARM::LDMIA_UPD:
     // If this is a load of a single register via a 'pop', then we should use
     // a post-indexed LDR instruction instead, per the ARM ARM.
@@ -4160,6 +6870,7 @@ processInstruction(MCInst &Inst,
       TmpInst.addOperand(Inst.getOperand(2)); // CondCode
       TmpInst.addOperand(Inst.getOperand(3));
       Inst = TmpInst;
+      return true;
     }
     break;
   case ARM::STMDB_UPD:
@@ -4178,41 +6889,117 @@ processInstruction(MCInst &Inst,
       Inst = TmpInst;
     }
     break;
+  case ARM::t2ADDri12:
+    // If the immediate fits for encoding T3 (t2ADDri) and the generic "add"
+    // mnemonic was used (not "addw"), encoding T3 is preferred.
+    if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" ||
+        ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+      break;
+    Inst.setOpcode(ARM::t2ADDri);
+    Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+    break;
+  case ARM::t2SUBri12:
+    // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub"
+    // mnemonic was used (not "subw"), encoding T3 is preferred.
+    if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" ||
+        ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1)
+      break;
+    Inst.setOpcode(ARM::t2SUBri);
+    Inst.addOperand(MCOperand::CreateReg(0)); // cc_out
+    break;
   case ARM::tADDi8:
     // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
     // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
     // to encoding T2 if <Rd> is specified and encoding T2 is preferred
     // to encoding T1 if <Rd> is omitted."
-    if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+    if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
       Inst.setOpcode(ARM::tADDi3);
+      return true;
+    }
     break;
   case ARM::tSUBi8:
     // If the immediate is in the range 0-7, we want tADDi3 iff Rd was
     // explicitly specified. From the ARM ARM: "Encoding T1 is preferred
     // to encoding T2 if <Rd> is specified and encoding T2 is preferred
     // to encoding T1 if <Rd> is omitted."
-    if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6)
+    if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
       Inst.setOpcode(ARM::tSUBi3);
+      return true;
+    }
     break;
+  case ARM::t2ADDri:
+  case ARM::t2SUBri: {
+    // If the destination and first source operand are the same, and
+    // the flags are compatible with the current IT status, use encoding T2
+    // instead of T3. For compatibility with the system 'as'. Make sure the
+    // wide encoding wasn't explicit.
+    if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+        !isARMLowRegister(Inst.getOperand(0).getReg()) ||
+        (unsigned)Inst.getOperand(2).getImm() > 255 ||
+        ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) ||
+        (inITBlock() && Inst.getOperand(5).getReg() != 0)) ||
+        (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+         static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+      break;
+    MCInst TmpInst;
+    TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ?
+                      ARM::tADDi8 : ARM::tSUBi8);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(5));
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(Inst.getOperand(3));
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
+  case ARM::t2ADDrr: {
+    // If the destination and first source operand are the same, and
+    // there's no setting of the flags, use encoding T2 instead of T3.
+    // Note that this is only for ADD, not SUB. This mirrors the system
+    // 'as' behaviour. Make sure the wide encoding wasn't explicit.
+    if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() ||
+        Inst.getOperand(5).getReg() != 0 ||
+        (static_cast<ARMOperand*>(Operands[3])->isToken() &&
+         static_cast<ARMOperand*>(Operands[3])->getToken() == ".w"))
+      break;
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::tADDhirr);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(2));
+    TmpInst.addOperand(Inst.getOperand(3));
+    TmpInst.addOperand(Inst.getOperand(4));
+    Inst = TmpInst;
+    return true;
+  }
   case ARM::tB:
     // A Thumb conditional branch outside of an IT block is a tBcc.
-    if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+    if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) {
       Inst.setOpcode(ARM::tBcc);
+      return true;
+    }
     break;
   case ARM::t2B:
     // A Thumb2 conditional branch outside of an IT block is a t2Bcc.
-    if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock())
+    if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()){
       Inst.setOpcode(ARM::t2Bcc);
+      return true;
+    }
     break;
   case ARM::t2Bcc:
     // If the conditional is AL or we're in an IT block, we really want t2B.
-    if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock())
+    if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) {
       Inst.setOpcode(ARM::t2B);
+      return true;
+    }
     break;
   case ARM::tBcc:
     // If the conditional is AL, we really want tB.
-    if (Inst.getOperand(1).getImm() == ARMCC::AL)
+    if (Inst.getOperand(1).getImm() == ARMCC::AL) {
       Inst.setOpcode(ARM::tB);
+      return true;
+    }
     break;
   case ARM::tLDMIA: {
     // If the register list contains any high registers, or if the writeback
@@ -4235,6 +7022,7 @@ processInstruction(MCInst &Inst,
       if (hasWritebackToken)
         Inst.insert(Inst.begin(),
                     MCOperand::CreateReg(Inst.getOperand(0).getReg()));
+      return true;
     }
     break;
   }
@@ -4248,14 +7036,40 @@ processInstruction(MCInst &Inst,
       // 16-bit encoding isn't sufficient. Switch to the 32-bit version.
       assert (isThumbTwo());
       Inst.setOpcode(ARM::t2STMIA_UPD);
+      return true;
     }
     break;
   }
+  case ARM::tPOP: {
+    bool listContainsBase;
+    // If the register list contains any high registers, we need to use
+    // the 32-bit encoding instead if we're in Thumb2. Otherwise, this
+    // should have generated an error in validateInstruction().
+    if (!checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase))
+      return false;
+    assert (isThumbTwo());
+    Inst.setOpcode(ARM::t2LDMIA_UPD);
+    // Add the base register and writeback operands.
+    Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+    Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+    return true;
+  }
+  case ARM::tPUSH: {
+    bool listContainsBase;
+    if (!checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase))
+      return false;
+    assert (isThumbTwo());
+    Inst.setOpcode(ARM::t2STMDB_UPD);
+    // Add the base register and writeback operands.
+    Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+    Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP));
+    return true;
+  }
   case ARM::t2MOVi: {
     // If we can use the 16-bit encoding and the user didn't explicitly
     // request the 32-bit variant, transform it here.
     if (isARMLowRegister(Inst.getOperand(0).getReg()) &&
-        Inst.getOperand(1).getImm() <= 255 &&
+        (unsigned)Inst.getOperand(1).getImm() <= 255 &&
         ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL &&
          Inst.getOperand(4).getReg() == ARM::CPSR) ||
         (inITBlock() && Inst.getOperand(4).getReg() == 0)) &&
@@ -4270,6 +7084,7 @@ processInstruction(MCInst &Inst,
       TmpInst.addOperand(Inst.getOperand(2));
       TmpInst.addOperand(Inst.getOperand(3));
       Inst = TmpInst;
+      return true;
     }
     break;
   }
@@ -4290,6 +7105,7 @@ processInstruction(MCInst &Inst,
       TmpInst.addOperand(Inst.getOperand(2));
       TmpInst.addOperand(Inst.getOperand(3));
       Inst = TmpInst;
+      return true;
     }
     break;
   }
@@ -4320,9 +7136,61 @@ processInstruction(MCInst &Inst,
       TmpInst.addOperand(Inst.getOperand(3));
       TmpInst.addOperand(Inst.getOperand(4));
       Inst = TmpInst;
+      return true;
     }
     break;
   }
+  case ARM::MOVsi: {
+    ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm());
+    if (SOpc == ARM_AM::rrx) return false;
+    if (ARM_AM::getSORegOffset(Inst.getOperand(2).getImm()) == 0) {
+      // Shifting by zero is accepted as a vanilla 'MOVr'
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(Inst.getOperand(0));
+      TmpInst.addOperand(Inst.getOperand(1));
+      TmpInst.addOperand(Inst.getOperand(3));
+      TmpInst.addOperand(Inst.getOperand(4));
+      TmpInst.addOperand(Inst.getOperand(5));
+      Inst = TmpInst;
+      return true;
+    }
+    return false;
+  }
+  case ARM::ANDrsi:
+  case ARM::ORRrsi:
+  case ARM::EORrsi:
+  case ARM::BICrsi:
+  case ARM::SUBrsi:
+  case ARM::ADDrsi: {
+    unsigned newOpc;
+    ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(3).getImm());
+    if (SOpc == ARM_AM::rrx) return false;
+    switch (Inst.getOpcode()) {
+    default: llvm_unreachable("unexpected opcode!");
+    case ARM::ANDrsi: newOpc = ARM::ANDrr; break;
+    case ARM::ORRrsi: newOpc = ARM::ORRrr; break;
+    case ARM::EORrsi: newOpc = ARM::EORrr; break;
+    case ARM::BICrsi: newOpc = ARM::BICrr; break;
+    case ARM::SUBrsi: newOpc = ARM::SUBrr; break;
+    case ARM::ADDrsi: newOpc = ARM::ADDrr; break;
+    }
+    // If the shift is by zero, use the non-shifted instruction definition.
+    if (ARM_AM::getSORegOffset(Inst.getOperand(3).getImm()) == 0) {
+      MCInst TmpInst;
+      TmpInst.setOpcode(newOpc);
+      TmpInst.addOperand(Inst.getOperand(0));
+      TmpInst.addOperand(Inst.getOperand(1));
+      TmpInst.addOperand(Inst.getOperand(2));
+      TmpInst.addOperand(Inst.getOperand(4));
+      TmpInst.addOperand(Inst.getOperand(5));
+      TmpInst.addOperand(Inst.getOperand(6));
+      Inst = TmpInst;
+      return true;
+    }
+    return false;
+  }
+  case ARM::ITasm:
   case ARM::t2IT: {
     // The mask bits for all but the first condition are represented as
     // the low bit of the condition code value implies 't'. We currently
@@ -4352,13 +7220,14 @@ processInstruction(MCInst &Inst,
     break;
   }
   }
+  return false;
 }
 
 unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   // 16-bit thumb arithmetic instructions either require or preclude the 'S'
   // suffix depending on whether they're in an IT block or not.
   unsigned Opc = Inst.getOpcode();
-  MCInstrDesc &MCID = getInstDesc(Opc);
+  const MCInstrDesc &MCID = getInstDesc(Opc);
   if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {
     assert(MCID.hasOptionalDef() &&
            "optionally flag setting instruction missing optional def operand");
@@ -4417,14 +7286,23 @@ MatchAndEmitInstruction(SMLoc IDLoc,
     }
 
     // Some instructions need post-processing to, for example, tweak which
-    // encoding is selected.
-    processInstruction(Inst, Operands);
+    // encoding is selected. Loop on it while changes happen so the
+    // individual transformations can chain off each other. E.g.,
+    // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8)
+    while (processInstruction(Inst, Operands))
+      ;
 
     // Only move forward at the very end so that everything in validate
     // and process gets a consistent answer about whether we're in an IT
     // block.
     forwardITPosition();
 
+    // ITasm is an ARM mode pseudo-instruction that just sets the ITblock and
+    // doesn't actually encode.
+    if (Inst.getOpcode() == ARM::ITasm)
+      return false;
+
+    Inst.setLoc(IDLoc);
     Out.EmitInstruction(Inst);
     return false;
   case Match_MissingFeature:
@@ -4458,7 +7336,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
   }
 
   llvm_unreachable("Implement any new match types added!");
-  return true;
 }
 
 /// parseDirective parses the arm specific directives
@@ -4468,12 +7345,20 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
     return parseDirectiveWord(4, DirectiveID.getLoc());
   else if (IDVal == ".thumb")
     return parseDirectiveThumb(DirectiveID.getLoc());
+  else if (IDVal == ".arm")
+    return parseDirectiveARM(DirectiveID.getLoc());
   else if (IDVal == ".thumb_func")
     return parseDirectiveThumbFunc(DirectiveID.getLoc());
   else if (IDVal == ".code")
     return parseDirectiveCode(DirectiveID.getLoc());
   else if (IDVal == ".syntax")
     return parseDirectiveSyntax(DirectiveID.getLoc());
+  else if (IDVal == ".unreq")
+    return parseDirectiveUnreq(DirectiveID.getLoc());
+  else if (IDVal == ".arch")
+    return parseDirectiveArch(DirectiveID.getLoc());
+  else if (IDVal == ".eabi_attribute")
+    return parseDirectiveEabiAttr(DirectiveID.getLoc());
   return true;
 }
 
@@ -4509,9 +7394,22 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
     return Error(L, "unexpected token in directive");
   Parser.Lex();
 
-  // TODO: set thumb mode
-  // TODO: tell the MC streamer the mode
-  // getParser().getStreamer().Emit???();
+  if (!isThumb())
+    SwitchMode();
+  getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+  return false;
+}
+
+/// parseDirectiveARM
+///  ::= .arm
+bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(L, "unexpected token in directive");
+  Parser.Lex();
+
+  if (isThumb())
+    SwitchMode();
+  getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
   return false;
 }
 
@@ -4521,24 +7419,33 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
   const MCAsmInfo &MAI = getParser().getStreamer().getContext().getAsmInfo();
   bool isMachO = MAI.hasSubsectionsViaSymbols();
   StringRef Name;
+  bool needFuncName = true;
 
-  // Darwin asm has function name after .thumb_func direction
+  // Darwin asm has (optionally) function name after .thumb_func direction
   // ELF doesn't
   if (isMachO) {
     const AsmToken &Tok = Parser.getTok();
-    if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
-      return Error(L, "unexpected token in .thumb_func directive");
-    Name = Tok.getString();
-    Parser.Lex(); // Consume the identifier token.
+    if (Tok.isNot(AsmToken::EndOfStatement)) {
+      if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
+        return Error(L, "unexpected token in .thumb_func directive");
+      Name = Tok.getIdentifier();
+      Parser.Lex(); // Consume the identifier token.
+      needFuncName = false;
+    }
   }
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return Error(L, "unexpected token in directive");
-  Parser.Lex();
+
+  // Eat the end of statement and any blank lines that follow.
+  while (getLexer().is(AsmToken::EndOfStatement))
+    Parser.Lex();
 
   // FIXME: assuming function name will be the line following .thumb_func
-  if (!isMachO) {
-    Name = Parser.getTok().getString();
+  // We really should be checking the next symbol definition even if there's
+  // stuff in between.
+  if (needFuncName) {
+    Name = Parser.getTok().getIdentifier();
   }
 
   // Mark symbol as a thumb symbol.
@@ -4601,6 +7508,57 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
   return false;
 }
 
+/// parseDirectiveReq
+///  ::= name .req registername
+bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
+  Parser.Lex(); // Eat the '.req' token.
+  unsigned Reg;
+  SMLoc SRegLoc, ERegLoc;
+  if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
+    Parser.EatToEndOfStatement();
+    return Error(SRegLoc, "register name expected");
+  }
+
+  // Shouldn't be anything else.
+  if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
+    Parser.EatToEndOfStatement();
+    return Error(Parser.getTok().getLoc(),
+                 "unexpected input in .req directive.");
+  }
+
+  Parser.Lex(); // Consume the EndOfStatement
+
+  if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg)
+    return Error(SRegLoc, "redefinition of '" + Name +
+                          "' does not match original.");
+
+  return false;
+}
+
+/// parseDirectiveUneq
+///  ::= .unreq registername
+bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
+  if (Parser.getTok().isNot(AsmToken::Identifier)) {
+    Parser.EatToEndOfStatement();
+    return Error(L, "unexpected input in .unreq directive.");
+  }
+  RegisterReqs.erase(Parser.getTok().getIdentifier());
+  Parser.Lex(); // Eat the identifier.
+  return false;
+}
+
+/// parseDirectiveArch
+///  ::= .arch token
+bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
+  return true;
+}
+
+/// parseDirectiveEabiAttr
+///  ::= .eabi_attribute int, int
+bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
+  return true;
+}
+
 extern "C" void LLVMInitializeARMAsmLexer();
 
 /// Force static initialization.
diff --git a/lib/Target/ARM/AsmParser/CMakeLists.txt b/lib/Target/ARM/AsmParser/CMakeLists.txt
index 3f5ad39debc3..e24a1b17867a 100644
--- a/lib/Target/ARM/AsmParser/CMakeLists.txt
+++ b/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -6,11 +6,3 @@ add_llvm_library(LLVMARMAsmParser
   )
 
 add_dependencies(LLVMARMAsmParser ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmParser
-  LLVMARMDesc
-  LLVMARMInfo
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  )
diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..f0184b675dac
--- /dev/null
+++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/AsmParser/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMAsmParser
+parent = ARM
+required_libraries = ARMDesc ARMInfo MC MCParser Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index f045e839a616..9a2aab5304ab 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -1,18 +1,18 @@
 set(LLVM_TARGET_DEFINITIONS ARM.td)
 
-llvm_tablegen(ARMGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(ARMGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(ARMGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-llvm_tablegen(ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
-llvm_tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
-llvm_tablegen(ARMGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(ARMGenFastISel.inc -gen-fast-isel)
-llvm_tablegen(ARMGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(ARMGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
-llvm_tablegen(ARMGenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
+tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel)
+tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler)
 add_public_tablegen_target(ARMCommonTableGen)
 
 add_llvm_target(ARMCodeGen
@@ -26,7 +26,6 @@ add_llvm_target(ARMCodeGen
   ARMExpandPseudoInsts.cpp
   ARMFastISel.cpp
   ARMFrameLowering.cpp
-  ARMGlobalMerge.cpp
   ARMHazardRecognizer.cpp
   ARMISelDAGToDAG.cpp
   ARMISelLowering.cpp
@@ -34,6 +33,7 @@ add_llvm_target(ARMCodeGen
   ARMJITInfo.cpp
   ARMLoadStoreOptimizer.cpp
   ARMMCInstLower.cpp
+  ARMMachineFunctionInfo.cpp
   ARMRegisterInfo.cpp
   ARMSelectionDAGInfo.cpp
   ARMSubtarget.cpp
@@ -49,22 +49,8 @@ add_llvm_target(ARMCodeGen
   Thumb2SizeReduction.cpp
   )
 
-add_llvm_library_dependencies(LLVMARMCodeGen
-  LLVMARMAsmPrinter
-  LLVMARMDesc
-  LLVMARMInfo
-  LLVMAnalysis
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
-# workaround for hanging compilation on MSVC10
-if( MSVC_VERSION EQUAL 1600 )
+# workaround for hanging compilation on MSVC9, 10
+if( MSVC_VERSION EQUAL 1600 OR MSVC_VERSION EQUAL 1500 )
 set_property(
   SOURCE ARMISelLowering.cpp
   PROPERTY COMPILE_FLAGS "/Od"
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 8f2f813b676a..2f504b756b1b 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -1,4 +1,4 @@
-//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===//
+//===-- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,17 +9,16 @@
 
 #define DEBUG_TYPE "arm-disassembler"
 
-#include "ARM.h"
-#include "ARMRegisterInfo.h"
-#include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMMCExpr.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -52,7 +51,7 @@ public:
                               raw_ostream &cStream) const;
 
   /// getEDInfo - See MCDisassembler.
-  EDInstInfo *getEDInfo() const;
+  const EDInstInfo *getEDInfo() const;
 private:
 };
 
@@ -77,7 +76,7 @@ public:
                               raw_ostream &cStream) const;
 
   /// getEDInfo - See MCDisassembler.
-  EDInstInfo *getEDInfo() const;
+  const EDInstInfo *getEDInfo() const;
 private:
   mutable std::vector<unsigned> ITBlock;
   DecodeStatus AddThumbPredicate(MCInst&) const;
@@ -97,224 +96,236 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
       Out = In;
       return false;
   }
-  return false;
+  llvm_unreachable("Invalid DecodeStatus!");
 }
 
 
 // Forward declare these because the autogenerated code will reference them.
 // Definitions are further down.
-static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeGPRnopcRegisterClass(llvm::MCInst &Inst,
+static DecodeStatus DecodeGPRnopcRegisterClass(MCInst &Inst,
                                                unsigned RegNo, uint64_t Address,
                                                const void *Decoder);
-static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
-static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
-static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst,
+static DecodeStatus DecodeDPR_VFP2RegisterClass(MCInst &Inst,
                                                 unsigned RegNo,
                                                 uint64_t Address,
                                                 const void *Decoder);
-static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
+                               unsigned RegNo, uint64_t Address,
+                               const void *Decoder);
 
-static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
 
-static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst,
+static DecodeStatus DecodeAddrMode2IdxInstruction(MCInst &Inst,
                                                   unsigned Insn,
                                                   uint64_t Address,
                                                   const void *Decoder);
-static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode3Instruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeAddrMode3Instruction(MCInst &Inst,unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
 
-static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst & Inst,
+static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst & Inst,
                                                   unsigned Insn,
                                                   uint64_t Adddress,
                                                   const void *Decoder);
-static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBranchImmInstruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeBranchImmInstruction(MCInst &Inst,unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeNEONModImmInstruction(MCInst &Inst,unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeNEONModImmInstruction(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
+                                uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
+                                uint64_t Address, const void *Decoder);
 
-static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+
+static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Val,
+static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Val,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbTableBranch(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBCCTargetOperand(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeThumbBCCTargetOperand(MCInst &Inst,unsigned Val,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LDRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeT2LDRDPreInstruction(MCInst &Inst,unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2STRDPreInstruction(llvm::MCInst &Inst,unsigned Insn,
+static DecodeStatus DecodeT2STRDPreInstruction(MCInst &Inst,unsigned Insn,
                                uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Adr(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, unsigned Val,
                                 uint64_t Address, const void *Decoder);
 
-
-
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
+                                uint64_t Address, const void *Decoder);
 #include "ARMGenDisassemblerTables.inc"
 #include "ARMGenInstrInfo.inc"
 #include "ARMGenEDInfo.inc"
@@ -327,11 +338,11 @@ static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtarge
   return new ThumbDisassembler(STI);
 }
 
-EDInstInfo *ARMDisassembler::getEDInfo() const {
+const EDInstInfo *ARMDisassembler::getEDInfo() const {
   return instInfoARM;
 }
 
-EDInstInfo *ThumbDisassembler::getEDInfo() const {
+const EDInstInfo *ThumbDisassembler::getEDInfo() const {
   return instInfoARM;
 }
 
@@ -415,7 +426,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 }
 
 namespace llvm {
-extern MCInstrDesc ARMInsts[];
+extern const MCInstrDesc ARMInsts[];
 }
 
 /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
@@ -435,40 +446,38 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
                                      MCInst &MI, const void *Decoder) {
   const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
   LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
-  if (!getOpInfo)
-    return false;
-
   struct LLVMOpInfo1 SymbolicOp;
+  memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
   SymbolicOp.Value = Value;
   void *DisInfo = Dis->getDisInfoBlock();
-  if (!getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
-    if (isBranch) {
-      LLVMSymbolLookupCallback SymbolLookUp =
-                                            Dis->getLLVMSymbolLookupCallback();
-      if (SymbolLookUp) {
-        uint64_t ReferenceType;
-        ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
-        const char *ReferenceName;
-        const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
-                                        &ReferenceName);
-        if (Name) {
-          SymbolicOp.AddSymbol.Name = Name;
-          SymbolicOp.AddSymbol.Present = true;
-          SymbolicOp.Value = 0;
-        }
-        else {
-          SymbolicOp.Value = Value;
-        }
-        if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
-          (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
-      }
-      else {
-        return false;
-      }
-    }
-    else {
+
+  if (!getOpInfo ||
+      !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+    // Clear SymbolicOp.Value from above and also all other fields.
+    memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+    LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+    if (!SymbolLookUp)
       return false;
+    uint64_t ReferenceType;
+    if (isBranch)
+       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+    else
+       ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+    const char *ReferenceName;
+    const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
+                                    &ReferenceName);
+    if (Name) {
+      SymbolicOp.AddSymbol.Name = Name;
+      SymbolicOp.AddSymbol.Present = true;
     }
+    // For branches always create an MCExpr so it gets printed as hex address.
+    else if (isBranch) {
+      SymbolicOp.Value = Value;
+    }
+    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+      (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
+    if (!Name && !isBranch)
+      return false;
   }
 
   MCContext *Ctx = Dis->getMCContext();
@@ -527,8 +536,8 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
     MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx)));
   else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None)
     MI.addOperand(MCOperand::CreateExpr(Expr));
-  else 
-    assert(0 && "bad SymbolicOp.VariantKind");
+  else
+    llvm_unreachable("bad SymbolicOp.VariantKind");
 
   return true;
 }
@@ -543,7 +552,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
 /// a literal 'C' string if the referenced address of the literal pool's entry
 /// is an address into a section with 'C' string literals.
 static void tryAddingPcLoadReferenceComment(uint64_t Address, int Value,
-					    const void *Decoder) {
+                                            const void *Decoder) {
   const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
   LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
   if (SymbolLookUp) {
@@ -841,14 +850,14 @@ extern "C" void LLVMInitializeARMDisassembler() {
                                          createThumbDisassembler);
 }
 
-static const unsigned GPRDecoderTable[] = {
+static const uint16_t GPRDecoderTable[] = {
   ARM::R0, ARM::R1, ARM::R2, ARM::R3,
   ARM::R4, ARM::R5, ARM::R6, ARM::R7,
   ARM::R8, ARM::R9, ARM::R10, ARM::R11,
   ARM::R12, ARM::SP, ARM::LR, ARM::PC
 };
 
-static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 15)
     return MCDisassembler::Fail;
@@ -859,20 +868,26 @@ static DecodeStatus DecodeGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
 }
 
 static DecodeStatus
-DecodeGPRnopcRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
                            uint64_t Address, const void *Decoder) {
-  if (RegNo == 15) return MCDisassembler::Fail;
-  return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
+  DecodeStatus S = MCDisassembler::Success;
+  
+  if (RegNo == 15) 
+    S = MCDisassembler::SoftFail;
+
+  Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
+
+  return S;
 }
 
-static DecodeStatus DecodetGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 7)
     return MCDisassembler::Fail;
   return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
 }
 
-static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   unsigned Register = 0;
   switch (RegNo) {
@@ -902,13 +917,13 @@ static DecodeStatus DecodetcGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecoderGPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo == 13 || RegNo == 15) return MCDisassembler::Fail;
   return DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder);
 }
 
-static const unsigned SPRDecoderTable[] = {
+static const uint16_t SPRDecoderTable[] = {
      ARM::S0,  ARM::S1,  ARM::S2,  ARM::S3,
      ARM::S4,  ARM::S5,  ARM::S6,  ARM::S7,
      ARM::S8,  ARM::S9, ARM::S10, ARM::S11,
@@ -919,7 +934,7 @@ static const unsigned SPRDecoderTable[] = {
     ARM::S28, ARM::S29, ARM::S30, ARM::S31
 };
 
-static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 31)
     return MCDisassembler::Fail;
@@ -929,7 +944,7 @@ static DecodeStatus DecodeSPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
-static const unsigned DPRDecoderTable[] = {
+static const uint16_t DPRDecoderTable[] = {
      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
      ARM::D8,  ARM::D9, ARM::D10, ARM::D11,
@@ -940,7 +955,7 @@ static const unsigned DPRDecoderTable[] = {
     ARM::D28, ARM::D29, ARM::D30, ARM::D31
 };
 
-static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 31)
     return MCDisassembler::Fail;
@@ -950,7 +965,7 @@ static DecodeStatus DecodeDPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeDPR_8RegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 7)
     return MCDisassembler::Fail;
@@ -958,14 +973,14 @@ static DecodeStatus DecodeDPR_8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
 }
 
 static DecodeStatus
-DecodeDPR_VFP2RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+DecodeDPR_VFP2RegisterClass(MCInst &Inst, unsigned RegNo,
                             uint64_t Address, const void *Decoder) {
   if (RegNo > 15)
     return MCDisassembler::Fail;
   return DecodeDPRRegisterClass(Inst, RegNo, Address, Decoder);
 }
 
-static const unsigned QPRDecoderTable[] = {
+static const uint16_t QPRDecoderTable[] = {
      ARM::Q0,  ARM::Q1,  ARM::Q2,  ARM::Q3,
      ARM::Q4,  ARM::Q5,  ARM::Q6,  ARM::Q7,
      ARM::Q8,  ARM::Q9, ARM::Q10, ARM::Q11,
@@ -973,7 +988,7 @@ static const unsigned QPRDecoderTable[] = {
 };
 
 
-static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeQPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   if (RegNo > 31)
     return MCDisassembler::Fail;
@@ -984,7 +999,49 @@ static DecodeStatus DecodeQPRRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
+static const uint16_t DPairDecoderTable[] = {
+  ARM::Q0,  ARM::D1_D2,   ARM::Q1,  ARM::D3_D4,   ARM::Q2,  ARM::D5_D6,
+  ARM::Q3,  ARM::D7_D8,   ARM::Q4,  ARM::D9_D10,  ARM::Q5,  ARM::D11_D12,
+  ARM::Q6,  ARM::D13_D14, ARM::Q7,  ARM::D15_D16, ARM::Q8,  ARM::D17_D18,
+  ARM::Q9,  ARM::D19_D20, ARM::Q10, ARM::D21_D22, ARM::Q11, ARM::D23_D24,
+  ARM::Q12, ARM::D25_D26, ARM::Q13, ARM::D27_D28, ARM::Q14, ARM::D29_D30,
+  ARM::Q15
+};
+
+static DecodeStatus DecodeDPairRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder) {
+  if (RegNo > 30)
+    return MCDisassembler::Fail;
+
+  unsigned Register = DPairDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static const uint16_t DPairSpacedDecoderTable[] = {
+  ARM::D0_D2,   ARM::D1_D3,   ARM::D2_D4,   ARM::D3_D5,
+  ARM::D4_D6,   ARM::D5_D7,   ARM::D6_D8,   ARM::D7_D9,
+  ARM::D8_D10,  ARM::D9_D11,  ARM::D10_D12, ARM::D11_D13,
+  ARM::D12_D14, ARM::D13_D15, ARM::D14_D16, ARM::D15_D17,
+  ARM::D16_D18, ARM::D17_D19, ARM::D18_D20, ARM::D19_D21,
+  ARM::D20_D22, ARM::D21_D23, ARM::D22_D24, ARM::D23_D25,
+  ARM::D24_D26, ARM::D25_D27, ARM::D26_D28, ARM::D27_D29,
+  ARM::D28_D30, ARM::D29_D31
+};
+
+static DecodeStatus DecodeDPairSpacedRegisterClass(MCInst &Inst,
+                                                   unsigned RegNo,
+                                                   uint64_t Address,
+                                                   const void *Decoder) {
+  if (RegNo > 29)
+    return MCDisassembler::Fail;
+
+  unsigned Register = DPairSpacedDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::CreateReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   if (Val == 0xF) return MCDisassembler::Fail;
   // AL predicate is not allowed on Thumb1 branches.
@@ -998,7 +1055,7 @@ static DecodeStatus DecodePredicateOperand(llvm::MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   if (Val)
     Inst.addOperand(MCOperand::CreateReg(ARM::CPSR));
@@ -1007,7 +1064,7 @@ static DecodeStatus DecodeCCOutOperand(llvm::MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   uint32_t imm = Val & 0xFF;
   uint32_t rot = (Val & 0xF00) >> 7;
@@ -1016,7 +1073,7 @@ static DecodeStatus DecodeSOImmOperand(llvm::MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1053,7 +1110,7 @@ static DecodeStatus DecodeSORegImmOperand(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSORegRegOperand(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1088,7 +1145,7 @@ static DecodeStatus DecodeSORegRegOperand(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1123,7 +1180,7 @@ static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1140,7 +1197,7 @@ static DecodeStatus DecodeSPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1157,7 +1214,7 @@ static DecodeStatus DecodeDPRRegListOperand(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
                                       uint64_t Address, const void *Decoder) {
   // This operand encodes a mask of contiguous zeros between a specified MSB
   // and LSB.  To decode it, we create the mask of all bits MSB-and-lower,
@@ -1178,7 +1235,7 @@ static DecodeStatus DecodeBitfieldMaskOperand(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1234,16 +1291,6 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
 
-  unsigned P = fieldFromInstruction32(Insn, 24, 1);
-  unsigned W = fieldFromInstruction32(Insn, 21, 1);
-
-  bool writeback = (P == 0) || (W == 1);
-  unsigned idx_mode = 0;
-  if (P && writeback)
-    idx_mode = ARMII::IndexModePre;
-  else if (!P && writeback)
-    idx_mode = ARMII::IndexModePost;
-
   switch (Inst.getOpcode()) {
     case ARM::t2LDC2_OFFSET:
     case ARM::t2LDC2L_OFFSET:
@@ -1333,7 +1380,7 @@ static DecodeStatus DecodeCopMemInstruction(llvm::MCInst &Inst, unsigned Insn,
 }
 
 static DecodeStatus
-DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeAddrMode2IdxInstruction(MCInst &Inst, unsigned Insn,
                               uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1436,7 +1483,7 @@ DecodeAddrMode2IdxInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeSORegMemOperand(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1477,7 +1524,7 @@ static DecodeStatus DecodeSORegMemOperand(llvm::MCInst &Inst, unsigned Val,
 }
 
 static DecodeStatus
-DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1490,6 +1537,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
   unsigned pred = fieldFromInstruction32(Insn, 28, 4);
   unsigned W = fieldFromInstruction32(Insn, 21, 1);
   unsigned P = fieldFromInstruction32(Insn, 24, 1);
+  unsigned Rt2 = Rt + 1;
 
   bool writeback = (W == 1) | (P == 0);
 
@@ -1501,7 +1549,86 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
     case ARM::LDRD:
     case ARM::LDRD_PRE:
     case ARM::LDRD_POST:
-      if (Rt & 0x1) return MCDisassembler::Fail;
+      if (Rt & 0x1) S = MCDisassembler::SoftFail;
+      break;
+    default:
+      break;
+  }
+  switch (Inst.getOpcode()) {
+    case ARM::STRD:
+    case ARM::STRD_PRE:
+    case ARM::STRD_POST:
+      if (P == 0 && W == 1)
+        S = MCDisassembler::SoftFail;
+      
+      if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2))
+        S = MCDisassembler::SoftFail;
+      if (type && Rm == 15)
+        S = MCDisassembler::SoftFail;
+      if (Rt2 == 15)
+        S = MCDisassembler::SoftFail;
+      if (!type && fieldFromInstruction32(Insn, 8, 4))
+        S = MCDisassembler::SoftFail;
+      break;
+    case ARM::STRH:
+    case ARM::STRH_PRE:
+    case ARM::STRH_POST:
+      if (Rt == 15)
+        S = MCDisassembler::SoftFail;
+      if (writeback && (Rn == 15 || Rn == Rt))
+        S = MCDisassembler::SoftFail;
+      if (!type && Rm == 15)
+        S = MCDisassembler::SoftFail;
+      break;
+    case ARM::LDRD:
+    case ARM::LDRD_PRE:
+    case ARM::LDRD_POST:
+      if (type && Rn == 15){
+        if (Rt2 == 15)
+          S = MCDisassembler::SoftFail;
+        break;
+      }
+      if (P == 0 && W == 1)
+        S = MCDisassembler::SoftFail;
+      if (!type && (Rt2 == 15 || Rm == 15 || Rm == Rt || Rm == Rt2))
+        S = MCDisassembler::SoftFail;
+      if (!type && writeback && Rn == 15)
+        S = MCDisassembler::SoftFail;
+      if (writeback && (Rn == Rt || Rn == Rt2))
+        S = MCDisassembler::SoftFail;
+      break;
+    case ARM::LDRH:
+    case ARM::LDRH_PRE:
+    case ARM::LDRH_POST:
+      if (type && Rn == 15){
+        if (Rt == 15)
+          S = MCDisassembler::SoftFail;
+        break;
+      }
+      if (Rt == 15)
+        S = MCDisassembler::SoftFail;
+      if (!type && Rm == 15)
+        S = MCDisassembler::SoftFail;
+      if (!type && writeback && (Rn == 15 || Rn == Rt))
+        S = MCDisassembler::SoftFail;
+      break;
+    case ARM::LDRSH:
+    case ARM::LDRSH_PRE:
+    case ARM::LDRSH_POST:
+    case ARM::LDRSB:
+    case ARM::LDRSB_PRE:
+    case ARM::LDRSB_POST:
+      if (type && Rn == 15){
+        if (Rt == 15)
+          S = MCDisassembler::SoftFail;
+        break;
+      }
+      if (type && (Rt == 15 || (writeback && Rn == Rt)))
+        S = MCDisassembler::SoftFail;
+      if (!type && (Rt == 15 || Rm == 15))
+        S = MCDisassembler::SoftFail;
+      if (!type && writeback && (Rn == 15 || Rn == Rt))
+        S = MCDisassembler::SoftFail;
       break;
     default:
       break;
@@ -1588,7 +1715,7 @@ DecodeAddrMode3Instruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeRFEInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1617,7 +1744,7 @@ static DecodeStatus DecodeRFEInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst,
+static DecodeStatus DecodeMemMultipleWritebackInstruction(MCInst &Inst,
                                   unsigned Insn,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
@@ -1702,7 +1829,7 @@ static DecodeStatus DecodeMemMultipleWritebackInstruction(llvm::MCInst &Inst,
   return S;
 }
 
-static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeCPSInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   unsigned imod = fieldFromInstruction32(Insn, 18, 2);
   unsigned M = fieldFromInstruction32(Insn, 17, 1);
@@ -1742,7 +1869,7 @@ static DecodeStatus DecodeCPSInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2CPSInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   unsigned imod = fieldFromInstruction32(Insn, 9, 2);
   unsigned M = fieldFromInstruction32(Insn, 8, 1);
@@ -1782,7 +1909,7 @@ static DecodeStatus DecodeT2CPSInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2MOVTWInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1806,7 +1933,7 @@ static DecodeStatus DecodeT2MOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeArmMOVTWInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1832,7 +1959,7 @@ static DecodeStatus DecodeArmMOVTWInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSMLAInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1860,7 +1987,7 @@ static DecodeStatus DecodeSMLAInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrModeImm12Operand(MCInst &Inst, unsigned Val,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1880,7 +2007,7 @@ static DecodeStatus DecodeAddrModeImm12Operand(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode5Operand(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1899,13 +2026,28 @@ static DecodeStatus DecodeAddrMode5Operand(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeAddrMode7Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode7Operand(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   return DecodeGPRRegisterClass(Inst, Val, Address, Decoder);
 }
 
 static DecodeStatus
-DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeT2BInstruction(MCInst &Inst, unsigned Insn,
+                     uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+  unsigned imm = (fieldFromInstruction32(Insn, 0, 11) << 0) |
+                 (fieldFromInstruction32(Insn, 11, 1) << 18) |
+                 (fieldFromInstruction32(Insn, 13, 1) << 17) |
+                 (fieldFromInstruction32(Insn, 16, 6) << 11) |
+                 (fieldFromInstruction32(Insn, 26, 1) << 19);
+  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<20>(imm<<1) + 4,
+                                true, 4, Inst, Decoder))
+    Inst.addOperand(MCOperand::CreateImm(SignExtend32<20>(imm << 1)));
+  return S;
+}
+
+static DecodeStatus
+DecodeBranchImmInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1915,12 +2057,14 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
   if (pred == 0xF) {
     Inst.setOpcode(ARM::BLXi);
     imm |= fieldFromInstruction32(Insn, 24, 1) << 1;
+    if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
+                                  true, 4, Inst, Decoder))
     Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
     return S;
   }
 
-  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8, true,
-                                4, Inst, Decoder))
+  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<26>(imm) + 8,
+                                true, 4, Inst, Decoder))
     Inst.addOperand(MCOperand::CreateImm(SignExtend32<26>(imm)));
   if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -1929,13 +2073,7 @@ DecodeBranchImmInstruction(llvm::MCInst &Inst, unsigned Insn,
 }
 
 
-static DecodeStatus DecodeVCVTImmOperand(llvm::MCInst &Inst, unsigned Val,
-                                 uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(64 - Val));
-  return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeAddrMode6Operand(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1952,7 +2090,7 @@ static DecodeStatus DecodeAddrMode6Operand(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -1964,47 +2102,38 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
   unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
 
   // First output register
-  if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
-    return MCDisassembler::Fail;
+  switch (Inst.getOpcode()) {
+  case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: case ARM::VLD1q8:
+  case ARM::VLD1q16wb_fixed: case ARM::VLD1q16wb_register:
+  case ARM::VLD1q32wb_fixed: case ARM::VLD1q32wb_register:
+  case ARM::VLD1q64wb_fixed: case ARM::VLD1q64wb_register:
+  case ARM::VLD1q8wb_fixed: case ARM::VLD1q8wb_register:
+  case ARM::VLD2d16: case ARM::VLD2d32: case ARM::VLD2d8:
+  case ARM::VLD2d16wb_fixed: case ARM::VLD2d16wb_register:
+  case ARM::VLD2d32wb_fixed: case ARM::VLD2d32wb_register:
+  case ARM::VLD2d8wb_fixed: case ARM::VLD2d8wb_register:
+    if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  case ARM::VLD2b16:
+  case ARM::VLD2b32:
+  case ARM::VLD2b8:
+  case ARM::VLD2b16wb_fixed:
+  case ARM::VLD2b16wb_register:
+  case ARM::VLD2b32wb_fixed:
+  case ARM::VLD2b32wb_register:
+  case ARM::VLD2b8wb_fixed:
+  case ARM::VLD2b8wb_register:
+    if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  default:
+    if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+  }
 
   // Second output register
   switch (Inst.getOpcode()) {
-    case ARM::VLD1q8:
-    case ARM::VLD1q16:
-    case ARM::VLD1q32:
-    case ARM::VLD1q64:
-    case ARM::VLD1q8_UPD:
-    case ARM::VLD1q16_UPD:
-    case ARM::VLD1q32_UPD:
-    case ARM::VLD1q64_UPD:
-    case ARM::VLD1d8T:
-    case ARM::VLD1d16T:
-    case ARM::VLD1d32T:
-    case ARM::VLD1d64T:
-    case ARM::VLD1d8T_UPD:
-    case ARM::VLD1d16T_UPD:
-    case ARM::VLD1d32T_UPD:
-    case ARM::VLD1d64T_UPD:
-    case ARM::VLD1d8Q:
-    case ARM::VLD1d16Q:
-    case ARM::VLD1d32Q:
-    case ARM::VLD1d64Q:
-    case ARM::VLD1d8Q_UPD:
-    case ARM::VLD1d16Q_UPD:
-    case ARM::VLD1d32Q_UPD:
-    case ARM::VLD1d64Q_UPD:
-    case ARM::VLD2d8:
-    case ARM::VLD2d16:
-    case ARM::VLD2d32:
-    case ARM::VLD2d8_UPD:
-    case ARM::VLD2d16_UPD:
-    case ARM::VLD2d32_UPD:
-    case ARM::VLD2q8:
-    case ARM::VLD2q16:
-    case ARM::VLD2q32:
-    case ARM::VLD2q8_UPD:
-    case ARM::VLD2q16_UPD:
-    case ARM::VLD2q32_UPD:
     case ARM::VLD3d8:
     case ARM::VLD3d16:
     case ARM::VLD3d32:
@@ -2020,12 +2149,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
       if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
         return MCDisassembler::Fail;
       break;
-    case ARM::VLD2b8:
-    case ARM::VLD2b16:
-    case ARM::VLD2b32:
-    case ARM::VLD2b8_UPD:
-    case ARM::VLD2b16_UPD:
-    case ARM::VLD2b32_UPD:
     case ARM::VLD3q8:
     case ARM::VLD3q16:
     case ARM::VLD3q32:
@@ -2046,28 +2169,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // Third output register
   switch(Inst.getOpcode()) {
-    case ARM::VLD1d8T:
-    case ARM::VLD1d16T:
-    case ARM::VLD1d32T:
-    case ARM::VLD1d64T:
-    case ARM::VLD1d8T_UPD:
-    case ARM::VLD1d16T_UPD:
-    case ARM::VLD1d32T_UPD:
-    case ARM::VLD1d64T_UPD:
-    case ARM::VLD1d8Q:
-    case ARM::VLD1d16Q:
-    case ARM::VLD1d32Q:
-    case ARM::VLD1d64Q:
-    case ARM::VLD1d8Q_UPD:
-    case ARM::VLD1d16Q_UPD:
-    case ARM::VLD1d32Q_UPD:
-    case ARM::VLD1d64Q_UPD:
-    case ARM::VLD2q8:
-    case ARM::VLD2q16:
-    case ARM::VLD2q32:
-    case ARM::VLD2q8_UPD:
-    case ARM::VLD2q16_UPD:
-    case ARM::VLD2q32_UPD:
     case ARM::VLD3d8:
     case ARM::VLD3d16:
     case ARM::VLD3d32:
@@ -2104,20 +2205,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // Fourth output register
   switch (Inst.getOpcode()) {
-    case ARM::VLD1d8Q:
-    case ARM::VLD1d16Q:
-    case ARM::VLD1d32Q:
-    case ARM::VLD1d64Q:
-    case ARM::VLD1d8Q_UPD:
-    case ARM::VLD1d16Q_UPD:
-    case ARM::VLD1d32Q_UPD:
-    case ARM::VLD1d64Q_UPD:
-    case ARM::VLD2q8:
-    case ARM::VLD2q16:
-    case ARM::VLD2q32:
-    case ARM::VLD2q8_UPD:
-    case ARM::VLD2q16_UPD:
-    case ARM::VLD2q32_UPD:
     case ARM::VLD4d8:
     case ARM::VLD4d16:
     case ARM::VLD4d32:
@@ -2142,31 +2229,58 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // Writeback operand
   switch (Inst.getOpcode()) {
-    case ARM::VLD1d8_UPD:
-    case ARM::VLD1d16_UPD:
-    case ARM::VLD1d32_UPD:
-    case ARM::VLD1d64_UPD:
-    case ARM::VLD1q8_UPD:
-    case ARM::VLD1q16_UPD:
-    case ARM::VLD1q32_UPD:
-    case ARM::VLD1q64_UPD:
-    case ARM::VLD1d8T_UPD:
-    case ARM::VLD1d16T_UPD:
-    case ARM::VLD1d32T_UPD:
-    case ARM::VLD1d64T_UPD:
-    case ARM::VLD1d8Q_UPD:
-    case ARM::VLD1d16Q_UPD:
-    case ARM::VLD1d32Q_UPD:
-    case ARM::VLD1d64Q_UPD:
-    case ARM::VLD2d8_UPD:
-    case ARM::VLD2d16_UPD:
-    case ARM::VLD2d32_UPD:
-    case ARM::VLD2q8_UPD:
-    case ARM::VLD2q16_UPD:
-    case ARM::VLD2q32_UPD:
-    case ARM::VLD2b8_UPD:
-    case ARM::VLD2b16_UPD:
-    case ARM::VLD2b32_UPD:
+    case ARM::VLD1d8wb_fixed:
+    case ARM::VLD1d16wb_fixed:
+    case ARM::VLD1d32wb_fixed:
+    case ARM::VLD1d64wb_fixed:
+    case ARM::VLD1d8wb_register:
+    case ARM::VLD1d16wb_register:
+    case ARM::VLD1d32wb_register:
+    case ARM::VLD1d64wb_register:
+    case ARM::VLD1q8wb_fixed:
+    case ARM::VLD1q16wb_fixed:
+    case ARM::VLD1q32wb_fixed:
+    case ARM::VLD1q64wb_fixed:
+    case ARM::VLD1q8wb_register:
+    case ARM::VLD1q16wb_register:
+    case ARM::VLD1q32wb_register:
+    case ARM::VLD1q64wb_register:
+    case ARM::VLD1d8Twb_fixed:
+    case ARM::VLD1d8Twb_register:
+    case ARM::VLD1d16Twb_fixed:
+    case ARM::VLD1d16Twb_register:
+    case ARM::VLD1d32Twb_fixed:
+    case ARM::VLD1d32Twb_register:
+    case ARM::VLD1d64Twb_fixed:
+    case ARM::VLD1d64Twb_register:
+    case ARM::VLD1d8Qwb_fixed:
+    case ARM::VLD1d8Qwb_register:
+    case ARM::VLD1d16Qwb_fixed:
+    case ARM::VLD1d16Qwb_register:
+    case ARM::VLD1d32Qwb_fixed:
+    case ARM::VLD1d32Qwb_register:
+    case ARM::VLD1d64Qwb_fixed:
+    case ARM::VLD1d64Qwb_register:
+    case ARM::VLD2d8wb_fixed:
+    case ARM::VLD2d16wb_fixed:
+    case ARM::VLD2d32wb_fixed:
+    case ARM::VLD2q8wb_fixed:
+    case ARM::VLD2q16wb_fixed:
+    case ARM::VLD2q32wb_fixed:
+    case ARM::VLD2d8wb_register:
+    case ARM::VLD2d16wb_register:
+    case ARM::VLD2d32wb_register:
+    case ARM::VLD2q8wb_register:
+    case ARM::VLD2q16wb_register:
+    case ARM::VLD2q32wb_register:
+    case ARM::VLD2b8wb_fixed:
+    case ARM::VLD2b16wb_fixed:
+    case ARM::VLD2b32wb_fixed:
+    case ARM::VLD2b8wb_register:
+    case ARM::VLD2b16wb_register:
+    case ARM::VLD2b32wb_register:
+      Inst.addOperand(MCOperand::CreateImm(0));
+      break;
     case ARM::VLD3d8_UPD:
     case ARM::VLD3d16_UPD:
     case ARM::VLD3d32_UPD:
@@ -2191,17 +2305,66 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
 
   // AddrMode6 Offset (register)
-  if (Rm == 0xD)
-    Inst.addOperand(MCOperand::CreateReg(0));
-  else if (Rm != 0xF) {
-    if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+  switch (Inst.getOpcode()) {
+  default:
+    // The below have been updated to have explicit am6offset split
+    // between fixed and register offset. For those instructions not
+    // yet updated, we need to add an additional reg0 operand for the
+    // fixed variant.
+    //
+    // The fixed offset encodes as Rm == 0xd, so we check for that.
+    if (Rm == 0xd) {
+      Inst.addOperand(MCOperand::CreateReg(0));
+      break;
+    }
+    // Fall through to handle the register offset variant.
+  case ARM::VLD1d8wb_fixed:
+  case ARM::VLD1d16wb_fixed:
+  case ARM::VLD1d32wb_fixed:
+  case ARM::VLD1d64wb_fixed:
+  case ARM::VLD1d8Twb_fixed:
+  case ARM::VLD1d16Twb_fixed:
+  case ARM::VLD1d32Twb_fixed:
+  case ARM::VLD1d64Twb_fixed:
+  case ARM::VLD1d8Qwb_fixed:
+  case ARM::VLD1d16Qwb_fixed:
+  case ARM::VLD1d32Qwb_fixed:
+  case ARM::VLD1d64Qwb_fixed:
+  case ARM::VLD1d8wb_register:
+  case ARM::VLD1d16wb_register:
+  case ARM::VLD1d32wb_register:
+  case ARM::VLD1d64wb_register:
+  case ARM::VLD1q8wb_fixed:
+  case ARM::VLD1q16wb_fixed:
+  case ARM::VLD1q32wb_fixed:
+  case ARM::VLD1q64wb_fixed:
+  case ARM::VLD1q8wb_register:
+  case ARM::VLD1q16wb_register:
+  case ARM::VLD1q32wb_register:
+  case ARM::VLD1q64wb_register:
+    // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+    // variant encodes Rm == 0xf. Anything else is a register offset post-
+    // increment and we need to add the register operand to the instruction.
+    if (Rm != 0xD && Rm != 0xF &&
+        !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
       return MCDisassembler::Fail;
+    break;
+  case ARM::VLD2d8wb_fixed:
+  case ARM::VLD2d16wb_fixed:
+  case ARM::VLD2d32wb_fixed:
+  case ARM::VLD2b8wb_fixed:
+  case ARM::VLD2b16wb_fixed:
+  case ARM::VLD2b32wb_fixed:
+  case ARM::VLD2q8wb_fixed:
+  case ARM::VLD2q16wb_fixed:
+  case ARM::VLD2q32wb_fixed:
+    break;
   }
 
   return S;
 }
 
-static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2214,31 +2377,60 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // Writeback Operand
   switch (Inst.getOpcode()) {
-    case ARM::VST1d8_UPD:
-    case ARM::VST1d16_UPD:
-    case ARM::VST1d32_UPD:
-    case ARM::VST1d64_UPD:
-    case ARM::VST1q8_UPD:
-    case ARM::VST1q16_UPD:
-    case ARM::VST1q32_UPD:
-    case ARM::VST1q64_UPD:
-    case ARM::VST1d8T_UPD:
-    case ARM::VST1d16T_UPD:
-    case ARM::VST1d32T_UPD:
-    case ARM::VST1d64T_UPD:
-    case ARM::VST1d8Q_UPD:
-    case ARM::VST1d16Q_UPD:
-    case ARM::VST1d32Q_UPD:
-    case ARM::VST1d64Q_UPD:
-    case ARM::VST2d8_UPD:
-    case ARM::VST2d16_UPD:
-    case ARM::VST2d32_UPD:
-    case ARM::VST2q8_UPD:
-    case ARM::VST2q16_UPD:
-    case ARM::VST2q32_UPD:
-    case ARM::VST2b8_UPD:
-    case ARM::VST2b16_UPD:
-    case ARM::VST2b32_UPD:
+    case ARM::VST1d8wb_fixed:
+    case ARM::VST1d16wb_fixed:
+    case ARM::VST1d32wb_fixed:
+    case ARM::VST1d64wb_fixed:
+    case ARM::VST1d8wb_register:
+    case ARM::VST1d16wb_register:
+    case ARM::VST1d32wb_register:
+    case ARM::VST1d64wb_register:
+    case ARM::VST1q8wb_fixed:
+    case ARM::VST1q16wb_fixed:
+    case ARM::VST1q32wb_fixed:
+    case ARM::VST1q64wb_fixed:
+    case ARM::VST1q8wb_register:
+    case ARM::VST1q16wb_register:
+    case ARM::VST1q32wb_register:
+    case ARM::VST1q64wb_register:
+    case ARM::VST1d8Twb_fixed:
+    case ARM::VST1d16Twb_fixed:
+    case ARM::VST1d32Twb_fixed:
+    case ARM::VST1d64Twb_fixed:
+    case ARM::VST1d8Twb_register:
+    case ARM::VST1d16Twb_register:
+    case ARM::VST1d32Twb_register:
+    case ARM::VST1d64Twb_register:
+    case ARM::VST1d8Qwb_fixed:
+    case ARM::VST1d16Qwb_fixed:
+    case ARM::VST1d32Qwb_fixed:
+    case ARM::VST1d64Qwb_fixed:
+    case ARM::VST1d8Qwb_register:
+    case ARM::VST1d16Qwb_register:
+    case ARM::VST1d32Qwb_register:
+    case ARM::VST1d64Qwb_register:
+    case ARM::VST2d8wb_fixed:
+    case ARM::VST2d16wb_fixed:
+    case ARM::VST2d32wb_fixed:
+    case ARM::VST2d8wb_register:
+    case ARM::VST2d16wb_register:
+    case ARM::VST2d32wb_register:
+    case ARM::VST2q8wb_fixed:
+    case ARM::VST2q16wb_fixed:
+    case ARM::VST2q32wb_fixed:
+    case ARM::VST2q8wb_register:
+    case ARM::VST2q16wb_register:
+    case ARM::VST2q32wb_register:
+    case ARM::VST2b8wb_fixed:
+    case ARM::VST2b16wb_fixed:
+    case ARM::VST2b32wb_fixed:
+    case ARM::VST2b8wb_register:
+    case ARM::VST2b16wb_register:
+    case ARM::VST2b32wb_register:
+      if (Rm == 0xF)
+        return MCDisassembler::Fail;
+      Inst.addOperand(MCOperand::CreateImm(0));
+      break;
     case ARM::VST3d8_UPD:
     case ARM::VST3d16_UPD:
     case ARM::VST3d32_UPD:
@@ -2263,55 +2455,89 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
 
   // AddrMode6 Offset (register)
-  if (Rm == 0xD)
-    Inst.addOperand(MCOperand::CreateReg(0));
-  else if (Rm != 0xF) {
-    if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
-    return MCDisassembler::Fail;
+  switch (Inst.getOpcode()) {
+    default:
+      if (Rm == 0xD)
+        Inst.addOperand(MCOperand::CreateReg(0));
+      else if (Rm != 0xF) {
+        if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+          return MCDisassembler::Fail;
+      }
+      break;
+    case ARM::VST1d8wb_fixed:
+    case ARM::VST1d16wb_fixed:
+    case ARM::VST1d32wb_fixed:
+    case ARM::VST1d64wb_fixed:
+    case ARM::VST1q8wb_fixed:
+    case ARM::VST1q16wb_fixed:
+    case ARM::VST1q32wb_fixed:
+    case ARM::VST1q64wb_fixed:
+    case ARM::VST1d8Twb_fixed:
+    case ARM::VST1d16Twb_fixed:
+    case ARM::VST1d32Twb_fixed:
+    case ARM::VST1d64Twb_fixed:
+    case ARM::VST1d8Qwb_fixed:
+    case ARM::VST1d16Qwb_fixed:
+    case ARM::VST1d32Qwb_fixed:
+    case ARM::VST1d64Qwb_fixed:
+    case ARM::VST2d8wb_fixed:
+    case ARM::VST2d16wb_fixed:
+    case ARM::VST2d32wb_fixed:
+    case ARM::VST2q8wb_fixed:
+    case ARM::VST2q16wb_fixed:
+    case ARM::VST2q32wb_fixed:
+    case ARM::VST2b8wb_fixed:
+    case ARM::VST2b16wb_fixed:
+    case ARM::VST2b32wb_fixed:
+      break;
   }
 
+
   // First input register
-  if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
-    return MCDisassembler::Fail;
+  switch (Inst.getOpcode()) {
+  case ARM::VST1q16:
+  case ARM::VST1q32:
+  case ARM::VST1q64:
+  case ARM::VST1q8:
+  case ARM::VST1q16wb_fixed:
+  case ARM::VST1q16wb_register:
+  case ARM::VST1q32wb_fixed:
+  case ARM::VST1q32wb_register:
+  case ARM::VST1q64wb_fixed:
+  case ARM::VST1q64wb_register:
+  case ARM::VST1q8wb_fixed:
+  case ARM::VST1q8wb_register:
+  case ARM::VST2d16:
+  case ARM::VST2d32:
+  case ARM::VST2d8:
+  case ARM::VST2d16wb_fixed:
+  case ARM::VST2d16wb_register:
+  case ARM::VST2d32wb_fixed:
+  case ARM::VST2d32wb_register:
+  case ARM::VST2d8wb_fixed:
+  case ARM::VST2d8wb_register:
+    if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  case ARM::VST2b16:
+  case ARM::VST2b32:
+  case ARM::VST2b8:
+  case ARM::VST2b16wb_fixed:
+  case ARM::VST2b16wb_register:
+  case ARM::VST2b32wb_fixed:
+  case ARM::VST2b32wb_register:
+  case ARM::VST2b8wb_fixed:
+  case ARM::VST2b8wb_register:
+    if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  default:
+    if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+  }
 
   // Second input register
   switch (Inst.getOpcode()) {
-    case ARM::VST1q8:
-    case ARM::VST1q16:
-    case ARM::VST1q32:
-    case ARM::VST1q64:
-    case ARM::VST1q8_UPD:
-    case ARM::VST1q16_UPD:
-    case ARM::VST1q32_UPD:
-    case ARM::VST1q64_UPD:
-    case ARM::VST1d8T:
-    case ARM::VST1d16T:
-    case ARM::VST1d32T:
-    case ARM::VST1d64T:
-    case ARM::VST1d8T_UPD:
-    case ARM::VST1d16T_UPD:
-    case ARM::VST1d32T_UPD:
-    case ARM::VST1d64T_UPD:
-    case ARM::VST1d8Q:
-    case ARM::VST1d16Q:
-    case ARM::VST1d32Q:
-    case ARM::VST1d64Q:
-    case ARM::VST1d8Q_UPD:
-    case ARM::VST1d16Q_UPD:
-    case ARM::VST1d32Q_UPD:
-    case ARM::VST1d64Q_UPD:
-    case ARM::VST2d8:
-    case ARM::VST2d16:
-    case ARM::VST2d32:
-    case ARM::VST2d8_UPD:
-    case ARM::VST2d16_UPD:
-    case ARM::VST2d32_UPD:
-    case ARM::VST2q8:
-    case ARM::VST2q16:
-    case ARM::VST2q32:
-    case ARM::VST2q8_UPD:
-    case ARM::VST2q16_UPD:
-    case ARM::VST2q32_UPD:
     case ARM::VST3d8:
     case ARM::VST3d16:
     case ARM::VST3d32:
@@ -2327,12 +2553,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
       if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
         return MCDisassembler::Fail;
       break;
-    case ARM::VST2b8:
-    case ARM::VST2b16:
-    case ARM::VST2b32:
-    case ARM::VST2b8_UPD:
-    case ARM::VST2b16_UPD:
-    case ARM::VST2b32_UPD:
     case ARM::VST3q8:
     case ARM::VST3q16:
     case ARM::VST3q32:
@@ -2354,28 +2574,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // Third input register
   switch (Inst.getOpcode()) {
-    case ARM::VST1d8T:
-    case ARM::VST1d16T:
-    case ARM::VST1d32T:
-    case ARM::VST1d64T:
-    case ARM::VST1d8T_UPD:
-    case ARM::VST1d16T_UPD:
-    case ARM::VST1d32T_UPD:
-    case ARM::VST1d64T_UPD:
-    case ARM::VST1d8Q:
-    case ARM::VST1d16Q:
-    case ARM::VST1d32Q:
-    case ARM::VST1d64Q:
-    case ARM::VST1d8Q_UPD:
-    case ARM::VST1d16Q_UPD:
-    case ARM::VST1d32Q_UPD:
-    case ARM::VST1d64Q_UPD:
-    case ARM::VST2q8:
-    case ARM::VST2q16:
-    case ARM::VST2q32:
-    case ARM::VST2q8_UPD:
-    case ARM::VST2q16_UPD:
-    case ARM::VST2q32_UPD:
     case ARM::VST3d8:
     case ARM::VST3d16:
     case ARM::VST3d32:
@@ -2412,20 +2610,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
 
   // Fourth input register
   switch (Inst.getOpcode()) {
-    case ARM::VST1d8Q:
-    case ARM::VST1d16Q:
-    case ARM::VST1d32Q:
-    case ARM::VST1d64Q:
-    case ARM::VST1d8Q_UPD:
-    case ARM::VST1d16Q_UPD:
-    case ARM::VST1d32Q_UPD:
-    case ARM::VST1d64Q_UPD:
-    case ARM::VST2q8:
-    case ARM::VST2q16:
-    case ARM::VST2q32:
-    case ARM::VST2q8_UPD:
-    case ARM::VST2q16_UPD:
-    case ARM::VST2q32_UPD:
     case ARM::VST4d8:
     case ARM::VST4d16:
     case ARM::VST4d32:
@@ -2451,7 +2635,7 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD1DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2461,15 +2645,21 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
   unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
   unsigned align = fieldFromInstruction32(Insn, 4, 1);
   unsigned size = fieldFromInstruction32(Insn, 6, 2);
-  unsigned regs = fieldFromInstruction32(Insn, 5, 1) + 1;
 
   align *= (1 << size);
 
-  if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
-    return MCDisassembler::Fail;
-  if (regs == 2) {
-    if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder)))
+  switch (Inst.getOpcode()) {
+  case ARM::VLD1DUPq16: case ARM::VLD1DUPq32: case ARM::VLD1DUPq8:
+  case ARM::VLD1DUPq16wb_fixed: case ARM::VLD1DUPq16wb_register:
+  case ARM::VLD1DUPq32wb_fixed: case ARM::VLD1DUPq32wb_register:
+  case ARM::VLD1DUPq8wb_fixed: case ARM::VLD1DUPq8wb_register:
+    if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  default:
+    if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
       return MCDisassembler::Fail;
+    break;
   }
   if (Rm != 0xF) {
     if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
@@ -2480,17 +2670,17 @@ static DecodeStatus DecodeVLD1DupInstruction(llvm::MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail;
   Inst.addOperand(MCOperand::CreateImm(align));
 
-  if (Rm == 0xD)
-    Inst.addOperand(MCOperand::CreateReg(0));
-  else if (Rm != 0xF) {
-    if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
-      return MCDisassembler::Fail;
-  }
+  // The fixed offset post-increment encodes Rm == 0xd. The no-writeback
+  // variant encodes Rm == 0xf. Anything else is a register offset post-
+  // increment and we need to add the register operand to the instruction.
+  if (Rm != 0xD && Rm != 0xF &&
+      !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+    return MCDisassembler::Fail;
 
   return S;
 }
 
-static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD2DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2500,18 +2690,33 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
   unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
   unsigned align = fieldFromInstruction32(Insn, 4, 1);
   unsigned size = 1 << fieldFromInstruction32(Insn, 6, 2);
-  unsigned inc = fieldFromInstruction32(Insn, 5, 1) + 1;
+  unsigned pred = fieldFromInstruction32(Insn, 22, 4);
   align *= 2*size;
 
-  if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
-    return MCDisassembler::Fail;
-  if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+inc)%32, Address, Decoder)))
-    return MCDisassembler::Fail;
-  if (Rm != 0xF) {
-    if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
+  switch (Inst.getOpcode()) {
+  case ARM::VLD2DUPd16: case ARM::VLD2DUPd32: case ARM::VLD2DUPd8:
+  case ARM::VLD2DUPd16wb_fixed: case ARM::VLD2DUPd16wb_register:
+  case ARM::VLD2DUPd32wb_fixed: case ARM::VLD2DUPd32wb_register:
+  case ARM::VLD2DUPd8wb_fixed: case ARM::VLD2DUPd8wb_register:
+    if (!Check(S, DecodeDPairRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  case ARM::VLD2DUPd16x2: case ARM::VLD2DUPd32x2: case ARM::VLD2DUPd8x2:
+  case ARM::VLD2DUPd16x2wb_fixed: case ARM::VLD2DUPd16x2wb_register:
+  case ARM::VLD2DUPd32x2wb_fixed: case ARM::VLD2DUPd32x2wb_register:
+  case ARM::VLD2DUPd8x2wb_fixed: case ARM::VLD2DUPd8x2wb_register:
+    if (!Check(S, DecodeDPairSpacedRegisterClass(Inst, Rd, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  default:
+    if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
       return MCDisassembler::Fail;
+    break;
   }
 
+  if (Rm != 0xF)
+    Inst.addOperand(MCOperand::CreateImm(0));
+
   if (!Check(S, DecodeGPRRegisterClass(Inst, Rn, Address, Decoder)))
     return MCDisassembler::Fail;
   Inst.addOperand(MCOperand::CreateImm(align));
@@ -2523,10 +2728,13 @@ static DecodeStatus DecodeVLD2DupInstruction(llvm::MCInst &Inst, unsigned Insn,
       return MCDisassembler::Fail;
   }
 
+  if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+    return MCDisassembler::Fail;
+
   return S;
 }
 
-static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD3DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2561,7 +2769,7 @@ static DecodeStatus DecodeVLD3DupInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2614,7 +2822,7 @@ static DecodeStatus DecodeVLD4DupInstruction(llvm::MCInst &Inst, unsigned Insn,
 }
 
 static DecodeStatus
-DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeNEONModImmInstruction(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2659,7 +2867,7 @@ DecodeNEONModImmInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVSHLMaxInstruction(MCInst &Inst, unsigned Insn,
                                         uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2678,31 +2886,31 @@ static DecodeStatus DecodeVSHLMaxInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeShiftRight8Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight8Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   Inst.addOperand(MCOperand::CreateImm(8 - Val));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeShiftRight16Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight16Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   Inst.addOperand(MCOperand::CreateImm(16 - Val));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeShiftRight32Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight32Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   Inst.addOperand(MCOperand::CreateImm(32 - Val));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeShiftRight64Imm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeShiftRight64Imm(MCInst &Inst, unsigned Val,
                                uint64_t Address, const void *Decoder) {
   Inst.addOperand(MCOperand::CreateImm(64 - Val));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeTBLInstruction(MCInst &Inst, unsigned Insn,
                                uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2713,7 +2921,6 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
   unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
   Rm |= fieldFromInstruction32(Insn, 5, 1) << 4;
   unsigned op = fieldFromInstruction32(Insn, 6, 1);
-  unsigned length = fieldFromInstruction32(Insn, 8, 2) + 1;
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder)))
     return MCDisassembler::Fail;
@@ -2722,9 +2929,15 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
     return MCDisassembler::Fail; // Writeback
   }
 
-  for (unsigned i = 0; i < length; ++i) {
-    if (!Check(S, DecodeDPRRegisterClass(Inst, (Rn+i)%32, Address, Decoder)))
-    return MCDisassembler::Fail;
+  switch (Inst.getOpcode()) {
+  case ARM::VTBL2:
+  case ARM::VTBX2:
+    if (!Check(S, DecodeDPairRegisterClass(Inst, Rn, Address, Decoder)))
+      return MCDisassembler::Fail;
+    break;
+  default:
+    if (!Check(S, DecodeDPRRegisterClass(Inst, Rn, Address, Decoder)))
+      return MCDisassembler::Fail;
   }
 
   if (!Check(S, DecodeDPRRegisterClass(Inst, Rm, Address, Decoder)))
@@ -2733,7 +2946,7 @@ static DecodeStatus DecodeTBLInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn,
                                      uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2757,25 +2970,31 @@ static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn,
   return S;
 }
 
-static DecodeStatus DecodeThumbBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBROperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
+  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<12>(Val<<1) + 4,
+                                true, 2, Inst, Decoder))
+    Inst.addOperand(MCOperand::CreateImm(SignExtend32<12>(Val << 1)));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeT2BROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2BROperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
+  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+                                true, 4, Inst, Decoder))
+    Inst.addOperand(MCOperand::CreateImm(SignExtend32<21>(Val)));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeThumbCmpBROperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbCmpBROperand(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
+  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<7>(Val<<1) + 4,
+                                true, 2, Inst, Decoder))
+    Inst.addOperand(MCOperand::CreateImm(SignExtend32<7>(Val << 1)));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeRR(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2790,7 +3009,7 @@ static DecodeStatus DecodeThumbAddrModeRR(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeIS(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2804,7 +3023,7 @@ static DecodeStatus DecodeThumbAddrModeIS(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModePC(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   unsigned imm = Val << 2;
 
@@ -2814,7 +3033,7 @@ static DecodeStatus DecodeThumbAddrModePC(llvm::MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbAddrModeSP(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   Inst.addOperand(MCOperand::CreateReg(ARM::SP));
   Inst.addOperand(MCOperand::CreateImm(Val));
@@ -2822,7 +3041,7 @@ static DecodeStatus DecodeThumbAddrModeSP(llvm::MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2839,7 +3058,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn,
                               uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2894,7 +3113,7 @@ static DecodeStatus DecodeT2LoadShift(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8S4(MCInst &Inst, unsigned Val,
                            uint64_t Address, const void *Decoder) {
   int imm = Val & 0xFF;
   if (!(Val & 0x100)) imm *= -1;
@@ -2903,7 +3122,7 @@ static DecodeStatus DecodeT2Imm8S4(llvm::MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8s4(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2918,7 +3137,7 @@ static DecodeStatus DecodeT2AddrModeImm8s4(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm0_1020s4(MCInst &Inst,unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2933,7 +3152,7 @@ static DecodeStatus DecodeT2AddrModeImm0_1020s4(llvm::MCInst &Inst,unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2Imm8(MCInst &Inst, unsigned Val,
                          uint64_t Address, const void *Decoder) {
   int imm = Val & 0xFF;
   if (Val == 0)
@@ -2946,7 +3165,7 @@ static DecodeStatus DecodeT2Imm8(llvm::MCInst &Inst, unsigned Val,
 }
 
 
-static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm8(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -2977,7 +3196,7 @@ static DecodeStatus DecodeT2AddrModeImm8(llvm::MCInst &Inst, unsigned Val,
   return S;
 }
 
-static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeT2LdStPre(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3007,7 +3226,7 @@ static DecodeStatus DecodeT2LdStPre(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3022,7 +3241,7 @@ static DecodeStatus DecodeT2AddrModeImm12(llvm::MCInst &Inst, unsigned Val,
 }
 
 
-static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn,
                                 uint64_t Address, const void *Decoder) {
   unsigned imm = fieldFromInstruction16(Insn, 0, 7);
 
@@ -3033,7 +3252,7 @@ static DecodeStatus DecodeThumbAddSPImm(llvm::MCInst &Inst, uint16_t Insn,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbAddSPReg(MCInst &Inst, uint16_t Insn,
                                 uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3058,7 +3277,7 @@ static DecodeStatus DecodeThumbAddSPReg(llvm::MCInst &Inst, uint16_t Insn,
   return S;
 }
 
-static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
+static DecodeStatus DecodeThumbCPS(MCInst &Inst, uint16_t Insn,
                            uint64_t Address, const void *Decoder) {
   unsigned imod = fieldFromInstruction16(Insn, 4, 1) | 0x2;
   unsigned flags = fieldFromInstruction16(Insn, 0, 3);
@@ -3069,29 +3288,29 @@ static DecodeStatus DecodeThumbCPS(llvm::MCInst &Inst, uint16_t Insn,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodePostIdxReg(MCInst &Inst, unsigned Insn,
                              uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
   unsigned Rm = fieldFromInstruction32(Insn, 0, 4);
   unsigned add = fieldFromInstruction32(Insn, 4, 1);
 
-  if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
   Inst.addOperand(MCOperand::CreateImm(add));
 
   return S;
 }
 
-static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBLXOffset(MCInst &Inst, unsigned Val,
                                  uint64_t Address, const void *Decoder) {
-  if (!tryAddingSymbolicOperand(Address, 
+  if (!tryAddingSymbolicOperand(Address,
                                 (Address & ~2u) + SignExtend32<22>(Val << 1) + 4,
                                 true, 4, Inst, Decoder))
     Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeCoprocessor(MCInst &Inst, unsigned Val,
                               uint64_t Address, const void *Decoder) {
   if (Val == 0xA || Val == 0xB)
     return MCDisassembler::Fail;
@@ -3101,7 +3320,7 @@ static DecodeStatus DecodeCoprocessor(llvm::MCInst &Inst, unsigned Val,
 }
 
 static DecodeStatus
-DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn,
+DecodeThumbTableBranch(MCInst &Inst, unsigned Insn,
                        uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3117,7 +3336,7 @@ DecodeThumbTableBranch(llvm::MCInst &Inst, unsigned Insn,
 }
 
 static DecodeStatus
-DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeThumb2BCCInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3159,7 +3378,7 @@ DecodeThumb2BCCInstruction(llvm::MCInst &Inst, unsigned Insn,
 // Decode a shifted immediate operand.  These basically consist
 // of an 8-bit value, and a 4-bit directive that specifies either
 // a splat operation or a rotation.
-static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val,
                           uint64_t Address, const void *Decoder) {
   unsigned ctrl = fieldFromInstruction32(Val, 10, 2);
   if (ctrl == 0) {
@@ -3191,19 +3410,23 @@ static DecodeStatus DecodeT2SOImm(llvm::MCInst &Inst, unsigned Val,
 }
 
 static DecodeStatus
-DecodeThumbBCCTargetOperand(llvm::MCInst &Inst, unsigned Val,
+DecodeThumbBCCTargetOperand(MCInst &Inst, unsigned Val,
                             uint64_t Address, const void *Decoder){
-  Inst.addOperand(MCOperand::CreateImm(Val << 1));
+  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<8>(Val<<1) + 4,
+                                true, 2, Inst, Decoder))
+    Inst.addOperand(MCOperand::CreateImm(SignExtend32<8>(Val << 1)));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeThumbBLTargetOperand(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeThumbBLTargetOperand(MCInst &Inst, unsigned Val,
                                        uint64_t Address, const void *Decoder){
-  Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
+  if (!tryAddingSymbolicOperand(Address, Address + SignExtend32<22>(Val<<1) + 4,
+                                true, 4, Inst, Decoder))
+    Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1)));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeMemBarrierOption(MCInst &Inst, unsigned Val,
                                    uint64_t Address, const void *Decoder) {
   switch (Val) {
   default:
@@ -3223,14 +3446,14 @@ static DecodeStatus DecodeMemBarrierOption(llvm::MCInst &Inst, unsigned Val,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeMSRMask(llvm::MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
                           uint64_t Address, const void *Decoder) {
   if (!Val) return MCDisassembler::Fail;
   Inst.addOperand(MCOperand::CreateImm(Val));
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn,
                                         uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3253,7 +3476,7 @@ static DecodeStatus DecodeDoubleRegLoad(llvm::MCInst &Inst, unsigned Insn,
 }
 
 
-static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn,
                                          uint64_t Address, const void *Decoder){
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3280,7 +3503,7 @@ static DecodeStatus DecodeDoubleRegStore(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreImm(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3305,7 +3528,7 @@ static DecodeStatus DecodeLDRPreImm(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeLDRPreReg(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3333,7 +3556,7 @@ static DecodeStatus DecodeLDRPreReg(llvm::MCInst &Inst, unsigned Insn,
 }
 
 
-static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreImm(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3358,7 +3581,7 @@ static DecodeStatus DecodeSTRPreImm(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeSTRPreReg(MCInst &Inst, unsigned Insn,
                             uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3383,7 +3606,7 @@ static DecodeStatus DecodeSTRPreReg(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD1LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3442,7 +3665,7 @@ static DecodeStatus DecodeVLD1LN(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST1LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3500,7 +3723,7 @@ static DecodeStatus DecodeVST1LN(llvm::MCInst &Inst, unsigned Insn,
 }
 
 
-static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD2LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3567,7 +3790,7 @@ static DecodeStatus DecodeVLD2LN(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST2LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3631,7 +3854,7 @@ static DecodeStatus DecodeVST2LN(llvm::MCInst &Inst, unsigned Insn,
 }
 
 
-static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD3LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3701,7 +3924,7 @@ static DecodeStatus DecodeVLD3LN(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST3LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3765,7 +3988,7 @@ static DecodeStatus DecodeVST3LN(llvm::MCInst &Inst, unsigned Insn,
 }
 
 
-static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVLD4LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3839,7 +4062,7 @@ static DecodeStatus DecodeVLD4LN(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVST4LN(MCInst &Inst, unsigned Insn,
                          uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -3904,7 +4127,7 @@ static DecodeStatus DecodeVST4LN(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVSRR(MCInst &Inst, unsigned Insn,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
   unsigned Rt  = fieldFromInstruction32(Insn, 12, 4);
@@ -3930,7 +4153,7 @@ static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeVMOVRRS(MCInst &Inst, unsigned Insn,
                                   uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
   unsigned Rt  = fieldFromInstruction32(Insn, 12, 4);
@@ -3956,7 +4179,7 @@ static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeIT(MCInst &Inst, unsigned Insn,
                              uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
   unsigned pred = fieldFromInstruction16(Insn, 4, 4);
@@ -3983,7 +4206,7 @@ static DecodeStatus DecodeIT(llvm::MCInst &Inst, unsigned Insn,
 }
 
 static DecodeStatus
-DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeT2LDRDPreInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -4020,7 +4243,7 @@ DecodeT2LDRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
 }
 
 static DecodeStatus
-DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
+DecodeT2STRDPreInstruction(MCInst &Inst, unsigned Insn,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
 
@@ -4054,7 +4277,7 @@ DecodeT2STRDPreInstruction(llvm::MCInst &Inst, unsigned Insn,
   return S;
 }
 
-static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn,
+static DecodeStatus DecodeT2Adr(MCInst &Inst, uint32_t Insn,
                                 uint64_t Address, const void *Decoder) {
   unsigned sign1 = fieldFromInstruction32(Insn, 21, 1);
   unsigned sign2 = fieldFromInstruction32(Insn, 23, 1);
@@ -4069,7 +4292,7 @@ static DecodeStatus DecodeT2Adr(llvm::MCInst &Inst, uint32_t Insn,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val,
+static DecodeStatus DecodeT2ShifterImmOperand(MCInst &Inst, uint32_t Val,
                                               uint64_t Address,
                                               const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
@@ -4080,3 +4303,109 @@ static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val,
   return S;
 }
 
+static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
+                               uint64_t Address, const void *Decoder) {
+  unsigned Rt   = fieldFromInstruction32(Insn, 12, 4);
+  unsigned Rt2  = fieldFromInstruction32(Insn, 0,  4);
+  unsigned Rn   = fieldFromInstruction32(Insn, 16, 4);
+  unsigned pred = fieldFromInstruction32(Insn, 28, 4);
+
+  if (pred == 0xF)
+    return DecodeCPSInstruction(Inst, Insn, Address, Decoder);
+
+  DecodeStatus S = MCDisassembler::Success;
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
+static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
+                                uint64_t Address, const void *Decoder) {
+  unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
+  Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
+  unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
+  Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
+  unsigned imm = fieldFromInstruction32(Insn, 16, 6);
+  unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+
+  DecodeStatus S = MCDisassembler::Success;
+
+  // VMOVv2f32 is ambiguous with these decodings.
+  if (!(imm & 0x38) && cmode == 0xF) {
+    Inst.setOpcode(ARM::VMOVv2f32);
+    return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+  }
+
+  if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+
+  if (!Check(S, DecodeDPRRegisterClass(Inst, Vd, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder)))
+    return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::CreateImm(64 - imm));
+
+  return S;
+}
+
+static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
+                                uint64_t Address, const void *Decoder) {
+  unsigned Vd = (fieldFromInstruction32(Insn, 12, 4) << 0);
+  Vd |= (fieldFromInstruction32(Insn, 22, 1) << 4);
+  unsigned Vm = (fieldFromInstruction32(Insn, 0, 4) << 0);
+  Vm |= (fieldFromInstruction32(Insn, 5, 1) << 4);
+  unsigned imm = fieldFromInstruction32(Insn, 16, 6);
+  unsigned cmode = fieldFromInstruction32(Insn, 8, 4);
+
+  DecodeStatus S = MCDisassembler::Success;
+
+  // VMOVv4f32 is ambiguous with these decodings.
+  if (!(imm & 0x38) && cmode == 0xF) {
+    Inst.setOpcode(ARM::VMOVv4f32);
+    return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
+  }
+
+  if (!(imm & 0x20)) Check(S, MCDisassembler::SoftFail);
+
+  if (!Check(S, DecodeQPRRegisterClass(Inst, Vd, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeQPRRegisterClass(Inst, Vm, Address, Decoder)))
+    return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::CreateImm(64 - imm));
+
+  return S;
+}
+
+static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
+                                uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  unsigned Rn = fieldFromInstruction32(Val, 16, 4);
+  unsigned Rt = fieldFromInstruction32(Val, 12, 4);
+  unsigned Rm = fieldFromInstruction32(Val, 0, 4);
+  Rm |= (fieldFromInstruction32(Val, 23, 1) << 4);
+  unsigned Cond = fieldFromInstruction32(Val, 28, 4);
+ 
+  if (fieldFromInstruction32(Val, 8, 4) != 0 || Rn == Rt)
+    S = MCDisassembler::SoftFail;
+
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodeAddrMode7Operand(Inst, Rn, Address, Decoder))) 
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodePostIdxReg(Inst, Rm, Address, Decoder)))
+    return MCDisassembler::Fail;
+  if (!Check(S, DecodePredicateOperand(Inst, Cond, Address, Decoder)))
+    return MCDisassembler::Fail;
+
+  return S;
+}
+
diff --git a/lib/Target/ARM/Disassembler/CMakeLists.txt b/lib/Target/ARM/Disassembler/CMakeLists.txt
index da87751150e8..9de6e5c511bd 100644
--- a/lib/Target/ARM/Disassembler/CMakeLists.txt
+++ b/lib/Target/ARM/Disassembler/CMakeLists.txt
@@ -11,11 +11,3 @@ set_property(
   )
 endif()
 add_dependencies(LLVMARMDisassembler ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMDisassembler
-  LLVMARMCodeGen
-  LLVMARMDesc
-  LLVMARMInfo
-  LLVMMC
-  LLVMSupport
-  )
diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..52d833893270
--- /dev/null
+++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/Disassembler/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMDisassembler
+parent = ARM
+required_libraries = ARMDesc ARMInfo MC Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index ccdac3ebeb47..b3eeafe08314 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -18,11 +18,11 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-#define GET_INSTRUCTION_NAME
 #include "ARMGenAsmWriter.inc"
 
 /// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
@@ -36,16 +36,14 @@ static unsigned translateShiftImm(unsigned imm) {
 
 
 ARMInstPrinter::ARMInstPrinter(const MCAsmInfo &MAI,
+                               const MCInstrInfo &MII,
+                               const MCRegisterInfo &MRI,
                                const MCSubtargetInfo &STI) :
-  MCInstPrinter(MAI) {
+  MCInstPrinter(MAI, MII, MRI) {
   // Initialize the set of available features.
   setAvailableFeatures(STI.getFeatureBits());
 }
 
-StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
-  return getInstructionName(Opcode);
-}
-
 void ARMInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   OS << getRegisterName(RegNo);
 }
@@ -101,7 +99,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
 
   // A8.6.123 PUSH
   if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
-      MI->getOperand(0).getReg() == ARM::SP) {
+      MI->getOperand(0).getReg() == ARM::SP &&
+      MI->getNumOperands() > 5) {
+    // Should only print PUSH if there are at least two registers in the list.
     O << '\t' << "push";
     printPredicateOperand(MI, 2, O);
     if (Opcode == ARM::t2STMDB_UPD)
@@ -122,7 +122,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
 
   // A8.6.122 POP
   if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
-      MI->getOperand(0).getReg() == ARM::SP) {
+      MI->getOperand(0).getReg() == ARM::SP &&
+      MI->getNumOperands() > 5) {
+    // Should only print POP if there are at least two registers in the list.
     O << '\t' << "pop";
     printPredicateOperand(MI, 2, O);
     if (Opcode == ARM::t2LDMIA_UPD)
@@ -250,7 +252,7 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum,
   O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
   if (ShOpc == ARM_AM::rrx)
     return;
-  
+
   O << ' ' << getRegisterName(MO2.getReg());
   assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
 }
@@ -433,6 +435,12 @@ void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op,
 
 void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op,
                                            raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  if (!MO1.isReg()) {   //  For label symbolic references.
+    printOperand(MI, Op, O);
+    return;
+  }
+
   const MCOperand &MO3 = MI->getOperand(Op+2);
   unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm());
 
@@ -636,7 +644,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
 
   if (getAvailableFeatures() & ARM::FeatureMClass) {
     switch (Op.getImm()) {
-    default: assert(0 && "Unexpected mask value!");
+    default: llvm_unreachable("Unexpected mask value!");
     case 0: O << "apsr"; return;
     case 1: O << "iapsr"; return;
     case 2: O << "eapsr"; return;
@@ -659,12 +667,11 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
   if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) {
     O << "APSR_";
     switch (Mask) {
-    default: assert(0);
+    default: llvm_unreachable("Unexpected mask value!");
     case 4:  O << "g"; return;
     case 8:  O << "nzcvq"; return;
     case 12: O << "nzcvqg"; return;
     }
-    llvm_unreachable("Unexpected mask value!");
   }
 
   if (SpecRegRBit)
@@ -684,7 +691,10 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
 void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
                                            raw_ostream &O) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
-  if (CC != ARMCC::AL)
+  // Handle the undefined 15 CC value here for printing so we don't abort().
+  if ((unsigned)CC == 15)
+    O << "<und>";
+  else if (CC != ARMCC::AL)
     O << ARMCondCodeToString(CC);
 }
 
@@ -882,6 +892,11 @@ void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
   const MCOperand &MO1 = MI->getOperand(OpNum);
   const MCOperand &MO2 = MI->getOperand(OpNum+1);
 
+  if (!MO1.isReg()) {   //  For label symbolic references.
+    printOperand(MI, OpNum, O);
+    return;
+  }
+
   O << "[" << getRegisterName(MO1.getReg());
 
   int32_t OffImm = (int32_t)MO2.getImm() / 4;
@@ -963,7 +978,8 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
   unsigned EncodedImm = MI->getOperand(OpNum).getImm();
   unsigned EltBits;
   uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
-  O << "#0x" << utohexstr(Val);
+  O << "#0x";
+  O.write_hex(Val);
 }
 
 void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
@@ -986,7 +1002,153 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum,
   }
 }
 
+void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O) {
+  O << "#" << 16 - MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printFBits32(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O) {
+  O << "#" << 32 - MI->getOperand(OpNum).getImm();
+}
+
 void ARMInstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
                                       raw_ostream &O) {
   O << "[" << MI->getOperand(OpNum).getImm() << "]";
 }
+
+void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}";
+}
+
+void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+  O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpaced(const MCInst *MI,
+                                              unsigned OpNum,
+                                              raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+  O << "{" << getRegisterName(Reg0) << ", " << getRegisterName(Reg1) << "}";
+}
+
+void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}";
+}
+
+void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}";
+}
+
+void ARMInstPrinter::printVectorListOneAllLanes(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoAllLanes(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_1);
+  O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeAllLanes(const MCInst *MI,
+                                                  unsigned OpNum,
+                                                  raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourAllLanes(const MCInst *MI,
+                                                  unsigned OpNum,
+                                                  raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListTwoSpacedAllLanes(const MCInst *MI,
+                                                      unsigned OpNum,
+                                                      raw_ostream &O) {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  unsigned Reg0 = MRI.getSubReg(Reg, ARM::dsub_0);
+  unsigned Reg1 = MRI.getSubReg(Reg, ARM::dsub_2);
+  O << "{" << getRegisterName(Reg0) << "[], " << getRegisterName(Reg1) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpacedAllLanes(const MCInst *MI,
+                                                        unsigned OpNum,
+                                                        raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListFourSpacedAllLanes(const MCInst *MI,
+                                                       unsigned OpNum,
+                                                       raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "[], "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "[]}";
+}
+
+void ARMInstPrinter::printVectorListThreeSpaced(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << "}";
+}
+
+void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  // Normally, it's not safe to use register enum values directly with
+  // addition to get the next register, but for VFP registers, the
+  // sort order is guaranteed because they're all of the form D<n>.
+  O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 4) << ", "
+    << getRegisterName(MI->getOperand(OpNum).getReg() + 6) << "}";
+}
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 5c2173fcde62..8acb7eef019b 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===//
+//===- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,14 +23,12 @@ class MCOperand;
 
 class ARMInstPrinter : public MCInstPrinter {
 public:
-    ARMInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
+  ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                 const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
 
   virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
-  virtual StringRef getOpcodeName(unsigned Opcode) const;
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
 
-  static const char *getInstructionName(unsigned Opcode);
-
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
@@ -128,7 +126,33 @@ public:
 
   void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printFBits32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
   void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVectorListTwoSpaced(const MCInst *MI, unsigned OpNum,
+                               raw_ostream &O);
+  void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVectorListOneAllLanes(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
+  void printVectorListTwoAllLanes(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
+  void printVectorListThreeAllLanes(const MCInst *MI, unsigned OpNum,
+                                    raw_ostream &O);
+  void printVectorListFourAllLanes(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+  void printVectorListTwoSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O);
+  void printVectorListThreeSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O);
+  void printVectorListFourSpacedAllLanes(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O);
+  void printVectorListThreeSpaced(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
+  void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/InstPrinter/CMakeLists.txt b/lib/Target/ARM/InstPrinter/CMakeLists.txt
index fa0b4957ccde..e2d4819b4b4a 100644
--- a/lib/Target/ARM/InstPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -5,8 +5,3 @@ add_llvm_library(LLVMARMAsmPrinter
   )
 
 add_dependencies(LLVMARMAsmPrinter ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..6f4fa365358c
--- /dev/null
+++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMAsmPrinter
+parent = ARM
+required_libraries = MC Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt
new file mode 100644
index 000000000000..fd4b3a33de1a
--- /dev/null
+++ b/lib/Target/ARM/LLVMBuild.txt
@@ -0,0 +1,35 @@
+;===- ./lib/Target/ARM/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = ARM
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = ARMCodeGen
+parent = ARM
+required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 9982fa68a578..62473b2bfdee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -1,4 +1,4 @@
-//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//===-- ARMAddressingModes.h - ARM Addressing Modes -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 
@@ -43,7 +44,7 @@ namespace ARM_AM {
 
   static inline const char *getShiftOpcStr(ShiftOpc Op) {
     switch (Op) {
-    default: assert(0 && "Unknown shift opc!");
+    default: llvm_unreachable("Unknown shift opc!");
     case ARM_AM::asr: return "asr";
     case ARM_AM::lsl: return "lsl";
     case ARM_AM::lsr: return "lsr";
@@ -54,7 +55,7 @@ namespace ARM_AM {
 
   static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
     switch (Op) {
-    default: assert(0 && "Unknown shift opc!");
+    default: llvm_unreachable("Unknown shift opc!");
     case ARM_AM::asr: return 2;
     case ARM_AM::lsl: return 0;
     case ARM_AM::lsr: return 1;
@@ -72,7 +73,7 @@ namespace ARM_AM {
 
   static inline const char *getAMSubModeStr(AMSubMode Mode) {
     switch (Mode) {
-    default: assert(0 && "Unknown addressing sub-mode!");
+    default: llvm_unreachable("Unknown addressing sub-mode!");
     case ARM_AM::ia: return "ia";
     case ARM_AM::ib: return "ib";
     case ARM_AM::da: return "da";
@@ -569,7 +570,7 @@ namespace ARM_AM {
       }
       EltBits = 64;
     } else {
-      assert(false && "Unsupported NEON immediate");
+      llvm_unreachable("Unsupported NEON immediate");
     }
     return Val;
   }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index c31c5e6b8452..d10bfc104a3c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -11,17 +11,18 @@
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -31,8 +32,8 @@ using namespace llvm;
 namespace {
 class ARMELFObjectWriter : public MCELFObjectTargetWriter {
 public:
-  ARMELFObjectWriter(Triple::OSType OSType)
-    : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM,
+  ARMELFObjectWriter(uint8_t OSABI)
+    : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM,
                               /*HasRelocationAddend*/ false) {}
 };
 
@@ -60,15 +61,16 @@ public:
 // ARMFixupKinds.h.
 //
 // Name                      Offset (bits) Size (bits)     Flags
-{ "fixup_arm_ldst_pcrel_12", 1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_ldst_pcrel_12", 0,            32,  MCFixupKindInfo::FKF_IsPCRel },
 { "fixup_t2_ldst_pcrel_12",  0,            32,  MCFixupKindInfo::FKF_IsPCRel |
                                    MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_pcrel_10",      1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_pcrel_10_unscaled", 0,        32,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_pcrel_10",      0,            32,  MCFixupKindInfo::FKF_IsPCRel },
 { "fixup_t2_pcrel_10",       0,            32,  MCFixupKindInfo::FKF_IsPCRel |
                                    MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
 { "fixup_thumb_adr_pcrel_10",0,            8,   MCFixupKindInfo::FKF_IsPCRel |
                                    MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
-{ "fixup_arm_adr_pcrel_12",  1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_adr_pcrel_12",  0,            32,  MCFixupKindInfo::FKF_IsPCRel },
 { "fixup_t2_adr_pcrel_12",   0,            32,  MCFixupKindInfo::FKF_IsPCRel |
                                    MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
 { "fixup_arm_condbranch",    0,            24,  MCFixupKindInfo::FKF_IsPCRel },
@@ -76,6 +78,9 @@ public:
 { "fixup_t2_condbranch",     0,            32,  MCFixupKindInfo::FKF_IsPCRel },
 { "fixup_t2_uncondbranch",   0,            32,  MCFixupKindInfo::FKF_IsPCRel },
 { "fixup_arm_thumb_br",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbl",      0,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_condbl",        0,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_blx",           0,            24,  MCFixupKindInfo::FKF_IsPCRel },
 { "fixup_arm_thumb_bl",      0,            32,  MCFixupKindInfo::FKF_IsPCRel },
 { "fixup_arm_thumb_blx",     0,            32,  MCFixupKindInfo::FKF_IsPCRel },
 { "fixup_arm_thumb_cb",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
@@ -100,13 +105,50 @@ public:
     return Infos[Kind - FirstTargetFixupKind];
   }
 
-  bool MayNeedRelaxation(const MCInst &Inst) const;
+  /// processFixupValue - Target hook to process the literal value of a fixup
+  /// if necessary.
+  void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                         const MCFixup &Fixup, const MCFragment *DF,
+                         MCValue &Target, uint64_t &Value,
+                         bool &IsResolved) {
+    const MCSymbolRefExpr *A = Target.getSymA();
+    // Some fixups to thumb function symbols need the low bit (thumb bit)
+    // twiddled.
+    if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
+        (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
+        (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
+        (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
+        (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
+        (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
+      if (A) {
+        const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+        if (Asm.isThumbFunc(&Sym))
+          Value |= 1;
+      }
+    }
+    // We must always generate a relocation for BL/BLX instructions if we have
+    // a symbol to reference, as the linker relies on knowing the destination
+    // symbol's thumb-ness to get interworking right.
+    if (A && ((unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_blx ||
+              (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl ||
+              (unsigned)Fixup.getKind() == ARM::fixup_arm_blx ||
+              (unsigned)Fixup.getKind() == ARM::fixup_arm_uncondbl ||
+              (unsigned)Fixup.getKind() == ARM::fixup_arm_condbl))
+      IsResolved = false;
+  }
+
+  bool mayNeedRelaxation(const MCInst &Inst) const;
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCInstFragment *DF,
+                            const MCAsmLayout &Layout) const;
 
-  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+  void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
 
-  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
 
-  void HandleAssemblerFlag(MCAssemblerFlag Flag) {
+  void handleAssemblerFlag(MCAssemblerFlag Flag) {
     switch (Flag) {
     default: break;
     case MCAF_Code16:
@@ -124,21 +166,81 @@ public:
 };
 } // end anonymous namespace
 
-bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
-  // FIXME: Thumb targets, different move constant targets..
+static unsigned getRelaxedOpcode(unsigned Op) {
+  switch (Op) {
+  default: return Op;
+  case ARM::tBcc:       return ARM::t2Bcc;
+  case ARM::tLDRpciASM: return ARM::t2LDRpci;
+  case ARM::tADR:       return ARM::t2ADR;
+  case ARM::tB:         return ARM::t2B;
+  }
+}
+
+bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+  if (getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode())
+    return true;
   return false;
 }
 
-void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
-  assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
-  return;
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                         uint64_t Value,
+                                         const MCInstFragment *DF,
+                                         const MCAsmLayout &Layout) const {
+  switch ((unsigned)Fixup.getKind()) {
+  case ARM::fixup_arm_thumb_br: {
+    // Relaxing tB to t2B. tB has a signed 12-bit displacement with the
+    // low bit being an implied zero. There's an implied +4 offset for the
+    // branch, so we adjust the other way here to determine what's
+    // encodable.
+    //
+    // Relax if the value is too big for a (signed) i8.
+    int64_t Offset = int64_t(Value) - 4;
+    return Offset > 2046 || Offset < -2048;
+  }
+  case ARM::fixup_arm_thumb_bcc: {
+    // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
+    // low bit being an implied zero. There's an implied +4 offset for the
+    // branch, so we adjust the other way here to determine what's
+    // encodable.
+    //
+    // Relax if the value is too big for a (signed) i8.
+    int64_t Offset = int64_t(Value) - 4;
+    return Offset > 254 || Offset < -256;
+  }
+  case ARM::fixup_thumb_adr_pcrel_10:
+  case ARM::fixup_arm_thumb_cp: {
+    // If the immediate is negative, greater than 1020, or not a multiple
+    // of four, the wide version of the instruction must be used.
+    int64_t Offset = int64_t(Value) - 4;
+    return Offset > 1020 || Offset < 0 || Offset & 3;
+  }
+  }
+  llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!");
+}
+
+void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+  unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+  // Sanity check w/ diagnostic if we get here w/ a bogus instruction.
+  if (RelaxedOp == Inst.getOpcode()) {
+    SmallString<256> Tmp;
+    raw_svector_ostream OS(Tmp);
+    Inst.dump_pretty(OS);
+    OS << "\n";
+    report_fatal_error("unexpected instruction to relax: " + OS.str());
+  }
+
+  // The instructions we're relaxing have (so far) the same operands.
+  // We just need to update to the proper opcode.
+  Res = Inst;
+  Res.setOpcode(RelaxedOp);
 }
 
-bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8
   const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP
   const uint32_t ARMv4_NopEncoding = 0xe1a0000; // using MOV r0,r0
-  const uint32_t ARMv6T2_NopEncoding = 0xe3207800; // NOP
+  const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP
   if (isThumb()) {
     const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding
                                           : Thumb1_16bitNopEncoding;
@@ -269,6 +371,9 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
 
   case ARM::fixup_arm_condbranch:
   case ARM::fixup_arm_uncondbranch:
+  case ARM::fixup_arm_uncondbl:
+  case ARM::fixup_arm_condbl:
+  case ARM::fixup_arm_blx:
     // These values don't encode the low two bits since they're always zero.
     // Offset by 8 just as above.
     return 0xffffff & ((Value - 8) >> 2);
@@ -359,6 +464,19 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   case ARM::fixup_arm_thumb_bcc:
     // Offset by 4 and don't encode the lower bit, which is always 0.
     return ((Value - 4) >> 1) & 0xff;
+  case ARM::fixup_arm_pcrel_10_unscaled: {
+    Value = Value - 8; // ARM fixups offset by an additional word and don't
+                       // need to adjust for the half-word ordering.
+    bool isAdd = true;
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      isAdd = false;
+    }
+    // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
+    assert ((Value < 256) && "Out of range pc-relative fixup value!");
+    Value = (Value & 0xf) | ((Value & 0xf0) << 4);
+    return Value | (isAdd << 23);
+  }
   case ARM::fixup_arm_pcrel_10:
     Value = Value - 4; // ARM fixups offset by an additional word and don't
                        // need to adjust for the half-word ordering.
@@ -376,8 +494,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
     assert ((Value < 256) && "Out of range pc-relative fixup value!");
     Value |= isAdd << 23;
 
-    // Same addressing mode as fixup_arm_pcrel_10,
-    // but with 16-bit halfwords swapped.
+    // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
+    // swapped.
     if (Kind == ARM::fixup_t2_pcrel_10) {
       uint32_t swapped = (Value & 0xFFFF0000) >> 16;
       swapped |= (Value & 0x0000FFFF) << 16;
@@ -395,22 +513,21 @@ namespace {
 // ELF is an ELF of course...
 class ELFARMAsmBackend : public ARMAsmBackend {
 public:
-  Triple::OSType OSType;
+  uint8_t OSABI;
   ELFARMAsmBackend(const Target &T, const StringRef TT,
-                   Triple::OSType _OSType)
-    : ARMAsmBackend(T, TT), OSType(_OSType) { }
+                   uint8_t _OSABI)
+    : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
 
-  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value) const;
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS,
-                              /*IsLittleEndian*/ true);
+    return createARMELFObjectWriter(OS, OSABI);
   }
 };
 
 // FIXME: Raise this to share code between Darwin and ELF.
-void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+void ELFARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                   unsigned DataSize, uint64_t Value) const {
   unsigned NumBytes = 4;        // FIXME: 2 for Thumb
   Value = adjustFixupValue(Fixup.getKind(), Value);
@@ -439,7 +556,7 @@ public:
                                      Subtype);
   }
 
-  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value) const;
 
   virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -464,9 +581,13 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   case ARM::fixup_arm_thumb_cb:
     return 2;
 
+  case ARM::fixup_arm_pcrel_10_unscaled:
   case ARM::fixup_arm_ldst_pcrel_12:
   case ARM::fixup_arm_pcrel_10:
   case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_uncondbl:
+  case ARM::fixup_arm_condbl:
+  case ARM::fixup_arm_blx:
   case ARM::fixup_arm_condbranch:
   case ARM::fixup_arm_uncondbranch:
     return 3;
@@ -491,7 +612,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   }
 }
 
-void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+void DarwinARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                      unsigned DataSize, uint64_t Value) const {
   unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
   Value = adjustFixupValue(Fixup.getKind(), Value);
@@ -527,5 +648,6 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
   if (TheTriple.isOSWindows())
     assert(0 && "Windows not supported on ARM");
 
-  return new ELFARMAsmBackend(T, TT, Triple(TT).getOS());
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+  return new ELFARMAsmBackend(T, TT, OSABI);
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index ec4b6ffcfe83..ae11be888137 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -67,7 +67,6 @@ namespace ARMCC {
 
 inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
   switch (CC) {
-  default: llvm_unreachable("Unknown condition code");
   case ARMCC::EQ:  return "eq";
   case ARMCC::NE:  return "ne";
   case ARMCC::HS:  return "hs";
@@ -84,6 +83,7 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
   case ARMCC::LE:  return "le";
   case ARMCC::AL:  return "al";
   }
+  llvm_unreachable("Unknown condition code");
 }
 
 namespace ARM_PROC {
@@ -185,6 +185,39 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) {
   case S29: case D29: return 29;
   case S30: case D30: return 30;
   case S31: case D31: return 31;
+
+  // Composite registers use the regnum of the first register in the list.
+  /* Q0  */     case D0_D2:   return 0;
+  case D1_D2:   case D1_D3:   return 1;
+  /* Q1  */     case D2_D4:   return 2;
+  case D3_D4:   case D3_D5:   return 3;
+  /* Q2  */     case D4_D6:   return 4;
+  case D5_D6:   case D5_D7:   return 5;
+  /* Q3  */     case D6_D8:   return 6;
+  case D7_D8:   case D7_D9:   return 7;
+  /* Q4  */     case D8_D10:  return 8;
+  case D9_D10:  case D9_D11:  return 9;
+  /* Q5  */     case D10_D12: return 10;
+  case D11_D12: case D11_D13: return 11;
+  /* Q6  */     case D12_D14: return 12;
+  case D13_D14: case D13_D15: return 13;
+  /* Q7  */     case D14_D16: return 14;
+  case D15_D16: case D15_D17: return 15;
+  /* Q8  */     case D16_D18: return 16;
+  case D17_D18: case D17_D19: return 17;
+  /* Q9  */     case D18_D20: return 18;
+  case D19_D20: case D19_D21: return 19;
+  /* Q10 */     case D20_D22: return 20;
+  case D21_D22: case D21_D23: return 21;
+  /* Q11 */     case D22_D24: return 22;
+  case D23_D24: case D23_D25: return 23;
+  /* Q12 */     case D24_D26: return 24;
+  case D25_D26: case D25_D27: return 25;
+  /* Q13 */     case D26_D28: return 26;
+  case D27_D28: case D27_D29: return 27;
+  /* Q14 */     case D28_D30: return 28;
+  case D29_D30: case D29_D31: return 29;
+  /* Q15 */
   }
 }
 
@@ -237,7 +270,6 @@ namespace ARMII {
 
   inline static const char *AddrModeToString(AddrMode addrmode) {
     switch (addrmode) {
-    default: llvm_unreachable("Unknown memory operation");
     case AddrModeNone:    return "AddrModeNone";
     case AddrMode1:       return "AddrMode1";
     case AddrMode2:       return "AddrMode2";
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
new file mode 100644
index 000000000000..aa649badaf82
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -0,0 +1,283 @@
+//===-- ARMELFObjectWriter.cpp - ARM ELF Writer ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
+
+using namespace llvm;
+
+namespace {
+  class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+    enum { DefaultEABIVersion = 0x05000000U };
+    unsigned GetRelocTypeInner(const MCValue &Target,
+                               const MCFixup &Fixup,
+                               bool IsPCRel) const;
+
+
+  public:
+    ARMELFObjectWriter(uint8_t OSABI);
+
+    virtual ~ARMELFObjectWriter();
+
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend) const;
+    virtual unsigned getEFlags() const;
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                   const MCValue &Target,
+                                   const MCFragment &F,
+                                   const MCFixup &Fixup,
+                                   bool IsPCRel) const;
+  };
+}
+
+ARMELFObjectWriter::ARMELFObjectWriter(uint8_t OSABI)
+  : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI,
+                            ELF::EM_ARM,
+                            /*HasRelocationAddend*/ false) {}
+
+ARMELFObjectWriter::~ARMELFObjectWriter() {}
+
+// FIXME: get the real EABI Version from the Triple.
+unsigned ARMELFObjectWriter::getEFlags() const {
+  return ELF::EF_ARM_EABIMASK & DefaultEABIVersion;
+}
+
+// In ARM, _MergedGlobals and other most symbols get emitted directly.
+// I.e. not as an offset to a section symbol.
+// This code is an approximation of what ARM/gcc does.
+
+STATISTIC(PCRelCount, "Total number of PIC Relocations");
+STATISTIC(NonPCRelCount, "Total number of non-PIC relocations");
+
+const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+                                                   const MCValue &Target,
+                                                   const MCFragment &F,
+                                                   const MCFixup &Fixup,
+                                                   bool IsPCRel) const {
+  const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
+  bool EmitThisSym = false;
+
+  const MCSectionELF &Section =
+    static_cast<const MCSectionELF&>(Symbol.getSection());
+  bool InNormalSection = true;
+  unsigned RelocType = 0;
+  RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel);
+
+  DEBUG(
+      const MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+      MCSymbolRefExpr::VariantKind Kind2;
+      Kind2 = Target.getSymB() ?  Target.getSymB()->getKind() :
+        MCSymbolRefExpr::VK_None;
+      dbgs() << "considering symbol "
+        << Section.getSectionName() << "/"
+        << Symbol.getName() << "/"
+        << " Rel:" << (unsigned)RelocType
+        << " Kind: " << (int)Kind << "/" << (int)Kind2
+        << " Tmp:"
+        << Symbol.isAbsolute() << "/" << Symbol.isDefined() << "/"
+        << Symbol.isVariable() << "/" << Symbol.isTemporary()
+        << " Counts:" << PCRelCount << "/" << NonPCRelCount << "\n");
+
+  if (IsPCRel) { ++PCRelCount;
+    switch (RelocType) {
+    default:
+      // Most relocation types are emitted as explicit symbols
+      InNormalSection =
+        StringSwitch<bool>(Section.getSectionName())
+        .Case(".data.rel.ro.local", false)
+        .Case(".data.rel", false)
+        .Case(".bss", false)
+        .Default(true);
+      EmitThisSym = true;
+      break;
+    case ELF::R_ARM_ABS32:
+      // But things get strange with R_ARM_ABS32
+      // In this case, most things that go in .rodata show up
+      // as section relative relocations
+      InNormalSection =
+        StringSwitch<bool>(Section.getSectionName())
+        .Case(".data.rel.ro.local", false)
+        .Case(".data.rel", false)
+        .Case(".rodata", false)
+        .Case(".bss", false)
+        .Default(true);
+      EmitThisSym = false;
+      break;
+    }
+  } else {
+    NonPCRelCount++;
+    InNormalSection =
+      StringSwitch<bool>(Section.getSectionName())
+      .Case(".data.rel.ro.local", false)
+      .Case(".rodata", false)
+      .Case(".data.rel", false)
+      .Case(".bss", false)
+      .Default(true);
+
+    switch (RelocType) {
+    default: EmitThisSym = true; break;
+    case ELF::R_ARM_ABS32: EmitThisSym = false; break;
+    }
+  }
+
+  if (EmitThisSym)
+    return &Symbol;
+  if (! Symbol.isTemporary() && InNormalSection) {
+    return &Symbol;
+  }
+  return NULL;
+}
+
+// Need to examine the Fixup when determining whether to 
+// emit the relocation as an explicit symbol or as a section relative
+// offset
+unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel,
+                                          bool IsRelocWithSymbol,
+                                          int64_t Addend) const {
+  return GetRelocTypeInner(Target, Fixup, IsPCRel);
+}
+
+unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
+                                               const MCFixup &Fixup,
+                                               bool IsPCRel) const  {
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+  unsigned Type = 0;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default: llvm_unreachable("Unimplemented");
+    case FK_Data_4:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_ARM_REL32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TLSGD:
+        llvm_unreachable("unimplemented");
+      case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+        Type = ELF::R_ARM_TLS_IE32;
+        break;
+      }
+      break;
+    case ARM::fixup_arm_uncondbl:
+    case ARM::fixup_arm_blx:
+    case ARM::fixup_arm_uncondbranch:
+      switch (Modifier) {
+      case MCSymbolRefExpr::VK_ARM_PLT:
+        Type = ELF::R_ARM_PLT32;
+        break;
+      default:
+        Type = ELF::R_ARM_CALL;
+        break;
+      }
+      break;
+    case ARM::fixup_arm_condbl:
+    case ARM::fixup_arm_condbranch:
+      Type = ELF::R_ARM_JUMP24;
+      break;
+    case ARM::fixup_arm_movt_hi16:
+    case ARM::fixup_arm_movt_hi16_pcrel:
+      Type = ELF::R_ARM_MOVT_PREL;
+      break;
+    case ARM::fixup_arm_movw_lo16:
+    case ARM::fixup_arm_movw_lo16_pcrel:
+      Type = ELF::R_ARM_MOVW_PREL_NC;
+      break;
+    case ARM::fixup_t2_movt_hi16:
+    case ARM::fixup_t2_movt_hi16_pcrel:
+      Type = ELF::R_ARM_THM_MOVT_PREL;
+      break;
+    case ARM::fixup_t2_movw_lo16:
+    case ARM::fixup_t2_movw_lo16_pcrel:
+      Type = ELF::R_ARM_THM_MOVW_PREL_NC;
+      break;
+    case ARM::fixup_arm_thumb_bl:
+    case ARM::fixup_arm_thumb_blx:
+      Type = ELF::R_ARM_THM_CALL;
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default: llvm_unreachable("invalid fixup kind!");
+    case FK_Data_4:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_ARM_GOT:
+        Type = ELF::R_ARM_GOT_BREL;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TLSGD:
+        Type = ELF::R_ARM_TLS_GD32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TPOFF:
+        Type = ELF::R_ARM_TLS_LE32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+        Type = ELF::R_ARM_TLS_IE32;
+        break;
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_ARM_ABS32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTOFF:
+        Type = ELF::R_ARM_GOTOFF32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TARGET1:
+        Type = ELF::R_ARM_TARGET1;
+        break;
+      } 
+      break;
+    case ARM::fixup_arm_ldst_pcrel_12:
+    case ARM::fixup_arm_pcrel_10:
+    case ARM::fixup_arm_adr_pcrel_12:
+    case ARM::fixup_arm_thumb_bl:
+    case ARM::fixup_arm_thumb_cb:
+    case ARM::fixup_arm_thumb_cp:
+    case ARM::fixup_arm_thumb_br:
+      llvm_unreachable("Unimplemented");
+    case ARM::fixup_arm_uncondbranch:
+      Type = ELF::R_ARM_CALL;
+      break;
+    case ARM::fixup_arm_condbranch:
+      Type = ELF::R_ARM_JUMP24;
+      break;
+    case ARM::fixup_arm_movt_hi16:
+      Type = ELF::R_ARM_MOVT_ABS;
+      break;
+    case ARM::fixup_arm_movw_lo16:
+      Type = ELF::R_ARM_MOVW_ABS_NC;
+      break;
+    case ARM::fixup_t2_movt_hi16:
+      Type = ELF::R_ARM_THM_MOVT_ABS;
+      break;
+    case ARM::fixup_t2_movw_lo16:
+      Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+      break;
+    }
+  }
+
+  return Type;
+}
+
+MCObjectWriter *llvm::createARMELFObjectWriter(raw_ostream &OS,
+                                               uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI);
+  return createELFObjectWriter(MOTW, OS,  /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 350c92decdce..0085feb82069 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -1,4 +1,4 @@
-//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===//
+//===-- ARMFixupKinds.h - ARM Specific Fixup Entries ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,6 +23,9 @@ enum Fixups {
   // the 16-bit halfwords reordered.
   fixup_t2_ldst_pcrel_12,
 
+  // fixup_arm_pcrel_10_unscaled - 10-bit PC relative relocation for symbol
+  // addresses used in LDRD/LDRH/LDRB/etc. instructions. All bits are encoded.
+  fixup_arm_pcrel_10_unscaled,
   // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
   // used in VFP instructions where the lower 2 bits are not encoded
   // (so it's encoded as an 8-bit immediate).
@@ -56,6 +59,25 @@ enum Fixups {
   // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
   fixup_arm_thumb_br,
 
+  // The following fixups handle the ARM BL instructions. These can be
+  // conditionalised; however, the ARM ELF ABI requires a different relocation
+  // in that case: R_ARM_JUMP24 instead of R_ARM_CALL. The difference is that
+  // R_ARM_CALL is allowed to change the instruction to a BLX inline, which has
+  // no conditional version; R_ARM_JUMP24 would have to insert a veneer.
+  //
+  // MachO does not draw a distinction between the two cases, so it will treat
+  // fixup_arm_uncondbl and fixup_arm_condbl as identical fixups.
+
+  // fixup_arm_uncondbl - Fixup for unconditional ARM BL instructions.
+  fixup_arm_uncondbl,
+
+  // fixup_arm_condbl - Fixup for ARM BL instructions with nontrivial
+  // conditionalisation.
+  fixup_arm_condbl,
+
+  // fixup_arm_blx - Fixup for ARM BLX instructions.
+  fixup_arm_blx,
+
   // fixup_arm_thumb_bl - Fixup for Thumb BL instructions.
   fixup_arm_thumb_bl,
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 1c109e015280..03e8d5f83ae7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===//
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -48,6 +48,8 @@ static const char *const arm_asm_table[] = {
   0,0
 };
 
+void ARMMCAsmInfoDarwin::anchor() { }
+
 ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
   AsmTransCBE = arm_asm_table;
   Data64bitsDirective = 0;
@@ -61,6 +63,8 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
   ExceptionsType = ExceptionHandling::SjLj;
 }
 
+void ARMELFMCAsmInfo::anchor() { }
+
 ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
   // ".comm align is in bytes but .align is pow-2."
   AlignmentIsInBytes = false;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 90f7822ea580..f0b289c6f3b6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//===-- ARMMCAsmInfo.h - ARM asm properties --------------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,11 +18,15 @@
 
 namespace llvm {
 
-  struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+  class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+    virtual void anchor();
+  public:
     explicit ARMMCAsmInfoDarwin();
   };
 
-  struct ARMELFMCAsmInfo : public MCAsmInfo {
+  class ARMELFMCAsmInfo : public MCAsmInfo {
+    virtual void anchor();
+  public:
     explicit ARMELFMCAsmInfo();
   };
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 865c3e22b842..10d1c48876ed 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -64,7 +64,7 @@ public:
 
   // getBinaryCodeForInstr - TableGen'erated function for getting the
   // binary encoding for an instruction.
-  unsigned getBinaryCodeForInstr(const MCInst &MI,
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
                                  SmallVectorImpl<MCFixup> &Fixups) const;
 
   /// getMachineOpValue - Return binary encoding of operand. If the machine
@@ -118,8 +118,10 @@ public:
   /// branch target.
   uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
                                      SmallVectorImpl<MCFixup> &Fixups) const;
+  uint32_t getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
   uint32_t getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
-                                     SmallVectorImpl<MCFixup> &Fixups) const;
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
 
   /// getAdrLabelOpValue - Return encoding info for 12-bit immediate
   /// ADR label target.
@@ -166,7 +168,7 @@ public:
                                SmallVectorImpl<MCFixup> &Fixups) const {
     ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
     switch (Mode) {
-    default: assert(0 && "Unknown addressing sub-mode!");
+    default: llvm_unreachable("Unknown addressing sub-mode!");
     case ARM_AM::da: return 0;
     case ARM_AM::ia: return 1;
     case ARM_AM::db: return 2;
@@ -177,7 +179,6 @@ public:
   ///
   unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
     switch (ShOpc) {
-    default: llvm_unreachable("Unknown shift opc!");
     case ARM_AM::no_shift:
     case ARM_AM::lsl: return 0;
     case ARM_AM::lsr: return 1;
@@ -185,7 +186,7 @@ public:
     case ARM_AM::ror:
     case ARM_AM::rrx: return 3;
     }
-    return 0;
+    llvm_unreachable("Invalid ShiftOpc!");
   }
 
   /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
@@ -423,7 +424,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
   }
 
   llvm_unreachable("Unable to encode MCOperand!");
-  return 0;
 }
 
 /// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
@@ -466,7 +466,7 @@ static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
   assert(MO.isExpr() && "Unexpected branch target type!");
   const MCExpr *Expr = MO.getExpr();
   MCFixupKind Kind = MCFixupKind(FixupKind);
-  Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+  Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
 
   // All of the information is in the fixup.
   return 0;
@@ -594,17 +594,26 @@ getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
 }
 
 uint32_t ARMMCCodeEmitter::
-getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+getARMBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
                           SmallVectorImpl<MCFixup> &Fixups) const {
   const MCOperand MO = MI.getOperand(OpIdx);
   if (MO.isExpr()) {
     if (HasConditionalBranch(MI))
-      return ::getBranchTargetOpValue(MI, OpIdx,
-                                      ARM::fixup_arm_condbranch, Fixups);
-    return ::getBranchTargetOpValue(MI, OpIdx,
-                                    ARM::fixup_arm_uncondbranch, Fixups);
+      return ::getBranchTargetOpValue(MI, OpIdx, 
+                                      ARM::fixup_arm_condbl, Fixups);
+    return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_uncondbl, Fixups);
   }
 
+  return MO.getImm() >> 2;
+}
+
+uint32_t ARMMCCodeEmitter::
+getARMBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand MO = MI.getOperand(OpIdx);
+  if (MO.isExpr())
+    return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_blx, Fixups);
+
   return MO.getImm() >> 1;
 }
 
@@ -718,12 +727,13 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
         Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
       else
         Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
-      Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+      Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
 
       ++MCNumCPRelocations;
     } else {
       Reg = ARM::PC;
       int32_t Offset = MO.getImm();
+      // FIXME: Handle #-0.
       if (Offset < 0) {
         Offset *= -1;
         isAdd = false;
@@ -791,8 +801,8 @@ getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
 
     assert(MO.isExpr() && "Unexpected machine operand type!");
     const MCExpr *Expr = MO.getExpr();
-    MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
-    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+    MCFixupKind Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
 
     ++MCNumCPRelocations;
   } else
@@ -833,7 +843,7 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
 // but this is good enough for now.
 static bool EvaluateAsPCRel(const MCExpr *Expr) {
   switch (Expr->getKind()) {
-  default: assert(0 && "Unexpected expression type");
+  default: llvm_unreachable("Unexpected expression type");
   case MCExpr::SymbolRef: return false;
   case MCExpr::Binary: return true;
   }
@@ -857,7 +867,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
 
     MCFixupKind Kind;
     switch (ARM16Expr->getKind()) {
-    default: assert(0 && "Unsupported ARMFixup");
+    default: llvm_unreachable("Unsupported ARMFixup");
     case ARMMCExpr::VK_ARM_HI16:
       if (!isTargetDarwin() && EvaluateAsPCRel(E))
         Kind = MCFixupKind(isThumb2()
@@ -879,12 +889,11 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
                            : ARM::fixup_arm_movw_lo16);
       break;
     }
-    Fixups.push_back(MCFixup::Create(0, E, Kind));
+    Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
     return 0;
   };
 
   llvm_unreachable("Unsupported MCExpr type in MCOperand!");
-  return 0;
 }
 
 uint32_t ARMMCCodeEmitter::
@@ -993,6 +1002,19 @@ getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
   const MCOperand &MO = MI.getOperand(OpIdx);
   const MCOperand &MO1 = MI.getOperand(OpIdx+1);
   const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+
+  // If The first operand isn't a register, we have a label reference.
+  if (!MO.isReg()) {
+    unsigned Rn = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+
+    assert(MO.isExpr() && "Unexpected machine operand type!");
+    const MCExpr *Expr = MO.getExpr();
+    MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10_unscaled);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
+
+    ++MCNumCPRelocations;
+    return (Rn << 9) | (1 << 13);
+  }
   unsigned Rn = getARMRegisterNumbering(MO.getReg());
   unsigned Imm = MO2.getImm();
   bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
@@ -1066,7 +1088,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
       Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
     else
       Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
-    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
 
     ++MCNumCPRelocations;
   } else {
@@ -1312,8 +1334,8 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
   // LDM/STM:
   //   {15-0}  = Bitfield of GPRs.
   unsigned Reg = MI.getOperand(Op).getReg();
-  bool SPRRegs = llvm::ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
-  bool DPRRegs = llvm::ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
+  bool SPRRegs = ARMMCRegisterClasses[ARM::SPRRegClassID].contains(Reg);
+  bool DPRRegs = ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg);
 
   unsigned Binary = 0;
 
@@ -1372,11 +1394,11 @@ getAddrMode6OneLane32AddressOpValue(const MCInst &MI, unsigned Op,
 
   switch (Imm.getImm()) {
   default: break;
-  case 2:
-  case 4:
   case 8:
-  case 16: Align = 0x00; break;
-  case 32: Align = 0x03; break;
+  case 16:
+  case 32: // Default '0' value for invalid alignments of 8, 16, 32 bytes.
+  case 2: Align = 0x00; break;
+  case 4: Align = 0x03; break;
   }
 
   return RegNo | (Align << 4);
@@ -1412,7 +1434,7 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
                           SmallVectorImpl<MCFixup> &Fixups) const {
   const MCOperand &MO = MI.getOperand(Op);
   if (MO.getReg() == 0) return 0x0D;
-  return MO.getReg();
+  return getARMRegisterNumbering(MO.getReg());
 }
 
 unsigned ARMMCCodeEmitter::
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 2727ba8c8aa5..22e14a2281de 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -21,7 +21,7 @@ ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
 
 void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
   switch (Kind) {
-  default: assert(0 && "Invalid kind!");
+  default: llvm_unreachable("Invalid kind!");
   case VK_ARM_HI16: OS << ":upper16:"; break;
   case VK_ARM_LO16: OS << ":lower16:"; break;
   }
@@ -45,8 +45,7 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
 static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
   switch (Value->getKind()) {
   case MCExpr::Target:
-    assert(0 && "Can't handle nested target expr!");
-    break;
+    llvm_unreachable("Can't handle nested target expr!");
 
   case MCExpr::Constant:
     break;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 0a2e883deb1d..a727e087d291 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -1,4 +1,4 @@
-//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===//
+//===-- ARMMCExpr.h - ARM specific MC expression classes --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index a55c41075d40..e3512cda3ae3 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===//
+//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -89,14 +89,6 @@ std::string ARM_MC::ParseARMTriple(StringRef TT) {
       ARMArchFeature += ",+thumb-mode";
   }
 
-  Triple TheTriple(TT);
-  if (TheTriple.getOS() == Triple::NativeClient) {
-    if (ARMArchFeature.empty())
-      ARMArchFeature = "+nacl-mode";
-    else
-      ARMArchFeature += ",+nacl-mode";
-  }
-
   return ARMArchFeature;
 }
 
@@ -137,14 +129,15 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
 }
 
 static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                             CodeModel::Model CM) {
+                                             CodeModel::Model CM,
+                                             CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
   if (RM == Reloc::Default) {
     Triple TheTriple(TT);
     // Default relocation model on Darwin is PIC, not DynamicNoPIC.
     RM = TheTriple.isOSDarwin() ? Reloc::PIC_ : Reloc::DynamicNoPIC;
   }
-  X->InitMCCodeGenInfo(RM, CM);
+  X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
@@ -158,22 +151,23 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
   Triple TheTriple(TT);
 
   if (TheTriple.isOSDarwin())
-    return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+    return createMachOStreamer(Ctx, MAB, OS, Emitter, false);
 
   if (TheTriple.isOSWindows()) {
     llvm_unreachable("ARM does not support Windows COFF format");
-    return NULL;
   }
 
-  return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+  return createELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack);
 }
 
 static MCInstPrinter *createARMMCInstPrinter(const Target &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
+                                             const MCInstrInfo &MII,
+                                             const MCRegisterInfo &MRI,
                                              const MCSubtargetInfo &STI) {
   if (SyntaxVariant == 0)
-    return new ARMInstPrinter(MAI, STI);
+    return new ARMInstPrinter(MAI, MII, MRI, STI);
   return 0;
 }
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index 9b3d3bd32183..88472d7ffc3f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -46,6 +46,10 @@ MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
 
 MCAsmBackend *createARMAsmBackend(const Target &T, StringRef TT);
 
+/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
+MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
+                                         uint8_t OSABI);
+
 /// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
 MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
                                           bool Is64Bit,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 352c73e84df1..8057cb6687a6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -13,9 +13,11 @@
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -32,12 +34,12 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter {
                                     MCValue Target,
                                     unsigned Log2Size,
                                     uint64_t &FixedValue);
-  void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
-                                   const MCAssembler &Asm,
-                                   const MCAsmLayout &Layout,
-                                   const MCFragment *Fragment,
-                                   const MCFixup &Fixup, MCValue Target,
-                                   uint64_t &FixedValue);
+  void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+                                        const MCAssembler &Asm,
+                                        const MCAsmLayout &Layout,
+                                        const MCFragment *Fragment,
+                                        const MCFixup &Fixup, MCValue Target,
+                                        uint64_t &FixedValue);
 
 public:
   ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
@@ -80,6 +82,9 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
   case ARM::fixup_arm_adr_pcrel_12:
   case ARM::fixup_arm_condbranch:
   case ARM::fixup_arm_uncondbranch:
+  case ARM::fixup_arm_uncondbl:
+  case ARM::fixup_arm_condbl:
+  case ARM::fixup_arm_blx:
     RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
     // Report as 'long', even though that is not quite accurate.
     Log2Size = llvm::Log2_32(4);
@@ -98,34 +103,47 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
     Log2Size = llvm::Log2_32(4);
     return true;
 
+  // For movw/movt r_type relocations they always have a pair following them and
+  // the r_length bits are used differently.  The encoding of the r_length is as
+  // follows:
+  //   low bit of r_length:
+  //      0 - :lower16: for movw instructions
+  //      1 - :upper16: for movt instructions
+  //   high bit of r_length:
+  //      0 - arm instructions
+  //      1 - thumb instructions
   case ARM::fixup_arm_movt_hi16:
   case ARM::fixup_arm_movt_hi16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_Half);
+    Log2Size = 1;
+    return true;
   case ARM::fixup_t2_movt_hi16:
   case ARM::fixup_t2_movt_hi16_pcrel:
-    RelocType = unsigned(macho::RIT_ARM_HalfDifference);
-    // Report as 'long', even though that is not quite accurate.
-    Log2Size = llvm::Log2_32(4);
+    RelocType = unsigned(macho::RIT_ARM_Half);
+    Log2Size = 3;
     return true;
 
   case ARM::fixup_arm_movw_lo16:
   case ARM::fixup_arm_movw_lo16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_Half);
+    Log2Size = 0;
+    return true;
   case ARM::fixup_t2_movw_lo16:
   case ARM::fixup_t2_movw_lo16_pcrel:
     RelocType = unsigned(macho::RIT_ARM_Half);
-    // Report as 'long', even though that is not quite accurate.
-    Log2Size = llvm::Log2_32(4);
+    Log2Size = 2;
     return true;
   }
 }
 
 void ARMMachObjectWriter::
-RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
-                            const MCAssembler &Asm,
-                            const MCAsmLayout &Layout,
-                            const MCFragment *Fragment,
-                            const MCFixup &Fixup,
-                            MCValue Target,
-                            uint64_t &FixedValue) {
+RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
+                                 const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFragment *Fragment,
+                                 const MCFixup &Fixup,
+                                 MCValue Target,
+                                 uint64_t &FixedValue) {
   uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
   unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
   unsigned Type = macho::RIT_ARM_Half;
@@ -135,7 +153,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
   MCSymbolData *A_SD = &Asm.getSymbolData(*A);
 
   if (!A_SD->getFragment())
-    report_fatal_error("symbol '" + A->getName() +
+    Asm.getContext().FatalError(Fixup.getLoc(),
+                       "symbol '" + A->getName() +
                        "' can not be undefined in a subtraction expression");
 
   uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
@@ -148,7 +167,8 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
     MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
 
     if (!B_SD->getFragment())
-      report_fatal_error("symbol '" + B->getSymbol().getName() +
+      Asm.getContext().FatalError(Fixup.getLoc(),
+                         "symbol '" + B->getSymbol().getName() +
                          "' can not be undefined in a subtraction expression");
 
     // Select the appropriate difference relocation type.
@@ -178,9 +198,16 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
   case ARM::fixup_arm_movt_hi16:
   case ARM::fixup_arm_movt_hi16_pcrel:
     MovtBit = 1;
+    // The thumb bit shouldn't be set in the 'other-half' bit of the
+    // relocation, but it will be set in FixedValue if the base symbol
+    // is a thumb function. Clear it out here.
+    if (A_SD->getFlags() & SF_ThumbFunc)
+      FixedValue &= 0xfffffffe;
     break;
   case ARM::fixup_t2_movt_hi16:
   case ARM::fixup_t2_movt_hi16_pcrel:
+    if (A_SD->getFlags() & SF_ThumbFunc)
+      FixedValue &= 0xfffffffe;
     MovtBit = 1;
     // Fallthrough
   case ARM::fixup_t2_movw_lo16:
@@ -189,7 +216,6 @@ RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
     break;
   }
 
-
   if (Type == macho::RIT_ARM_HalfDifference) {
     uint32_t OtherHalf = MovtBit
       ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
@@ -233,7 +259,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
   MCSymbolData *A_SD = &Asm.getSymbolData(*A);
 
   if (!A_SD->getFragment())
-    report_fatal_error("symbol '" + A->getName() +
+    Asm.getContext().FatalError(Fixup.getLoc(),
+                       "symbol '" + A->getName() +
                        "' can not be undefined in a subtraction expression");
 
   uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
@@ -245,7 +272,8 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
     MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
 
     if (!B_SD->getFragment())
-      report_fatal_error("symbol '" + B->getSymbol().getName() +
+      Asm.getContext().FatalError(Fixup.getLoc(),
+                         "symbol '" + B->getSymbol().getName() +
                          "' can not be undefined in a subtraction expression");
 
     // Select the appropriate difference relocation type.
@@ -287,19 +315,21 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
   unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
   unsigned Log2Size;
   unsigned RelocType = macho::RIT_Vanilla;
-  if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
-    report_fatal_error("unknown ARM fixup kind!");
-    return;
-  }
+  if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
+    // If we failed to get fixup kind info, it's because there's no legal
+    // relocation type for the fixup kind. This happens when it's a fixup that's
+    // expected to always be resolvable at assembly time and not have any
+    // relocations needed.
+    Asm.getContext().FatalError(Fixup.getLoc(),
+                                "unsupported relocation on symbol");
 
   // If this is a difference or a defined symbol plus an offset, then we need a
   // scattered relocation entry.  Differences always require scattered
   // relocations.
   if (Target.getSymB()) {
-    if (RelocType == macho::RIT_ARM_Half ||
-        RelocType == macho::RIT_ARM_HalfDifference)
-      return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
-                                         Target, FixedValue);
+    if (RelocType == macho::RIT_ARM_Half)
+      return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment,
+                                              Fixup, Target, FixedValue);
     return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
                                         Target, Log2Size, FixedValue);
   }
@@ -374,6 +404,30 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
                (Log2Size  << 25) |
                (IsExtern  << 27) |
                (Type      << 28));
+
+  // Even when it's not a scattered relocation, movw/movt always uses
+  // a PAIR relocation.
+  if (Type == macho::RIT_ARM_Half) {
+    // The other-half value only gets populated for the movt relocation.
+    uint32_t Value = 0;;
+    switch ((unsigned)Fixup.getKind()) {
+    default: break;
+    case ARM::fixup_arm_movt_hi16:
+    case ARM::fixup_arm_movt_hi16_pcrel:
+    case ARM::fixup_t2_movt_hi16:
+    case ARM::fixup_t2_movt_hi16_pcrel:
+      Value = FixedValue;
+      break;
+    }
+    macho::RelocationEntry MREPair;
+    MREPair.Word0 = Value;
+    MREPair.Word1 = ((0xffffff) |
+                     (Log2Size << 25) |
+                     (macho::RIT_Pair << 28));
+
+    Writer->addRelocation(Fragment->getParent(), MREPair);
+  }
+
   Writer->addRelocation(Fragment->getParent(), MRE);
 }
 
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index adc37cbf582e..256599412e8b 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -1,19 +1,14 @@
 add_llvm_library(LLVMARMDesc
   ARMAsmBackend.cpp
+  ARMELFObjectWriter.cpp
   ARMMCAsmInfo.cpp
   ARMMCCodeEmitter.cpp
   ARMMCExpr.cpp
   ARMMCTargetDesc.cpp
   ARMMachObjectWriter.cpp
+  ARMELFObjectWriter.cpp
   )
 add_dependencies(LLVMARMDesc ARMCommonTableGen)
 
 # Hack: we need to include 'main' target directory to grab private headers
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
-
-add_llvm_library_dependencies(LLVMARMDesc
-  LLVMARMInfo
-  LLVMARMAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..2a7fe6188b50
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMDesc
+parent = ARM
+required_libraries = ARMAsmPrinter ARMInfo MC Support
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 2df00538b39f..28998361c7a0 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -1,4 +1,4 @@
-//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -139,7 +139,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
   // FIXME: Detect integer instructions properly.
   const MCInstrDesc &MCID = MI->getDesc();
   unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
-  if (MCID.mayStore())
+  if (MI->mayStore())
     return false;
   unsigned Opcode = MCID.getOpcode();
   if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
@@ -222,14 +222,14 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
   const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
   unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
 
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID1, TmpReg)
     .addReg(Src1Reg, getKillRegState(Src1Kill))
     .addReg(Src2Reg, getKillRegState(Src2Kill));
   if (HasLane)
     MIB.addImm(LaneImm);
   MIB.addImm(Pred).addReg(PredReg);
 
-  MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
+  MIB = BuildMI(MBB, MI, MI->getDebugLoc(), MCID2)
     .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
 
   if (NegAcc) {
@@ -274,7 +274,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
     }
 
     const MCInstrDesc &MCID = MI->getDesc();
-    if (MCID.isBarrier()) {
+    if (MI->isBarrier()) {
       clearStack();
       Skip = 0;
       ++MII;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 2f6842e8cb60..3eddda812f84 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -501,11 +501,6 @@ those operations and the ARMv6 scalar versions.
 
 //===---------------------------------------------------------------------===//
 
-ARM::MOVCCr is commutable (by flipping the condition). But we need to implement
-ARMInstrInfo::commuteInstruction() to support it.
-
-//===---------------------------------------------------------------------===//
-
 Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting
 LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g.
 ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg)
@@ -699,3 +694,19 @@ test is equality test so it's more a conditional move rather than a select:
 
 Currently this is a ARM specific dag combine. We probably should make it into a
 target-neutral one.
+
+//===---------------------------------------------------------------------===//
+
+Optimize unnecessary checks for zero with __builtin_clz/ctz.  Those builtins
+are specified to be undefined at zero, so portable code must check for zero
+and handle it as a special case.  That is unnecessary on ARM where those
+operations are implemented in a way that is well-defined for zero.  For
+example:
+
+int f(int x) { return x ? __builtin_clz(x) : sizeof(int)*8; }
+
+should just be implemented with a CLZ instruction.  Since there are other
+targets, e.g., PPC, that share this behavior, it would be best to implement
+this in a target-independent way: we should probably fold that (when using
+"undefined at zero" semantics) to set the "defined at zero" bit and have
+the code generator expand out the right code.
diff --git a/lib/Target/ARM/TargetInfo/CMakeLists.txt b/lib/Target/ARM/TargetInfo/CMakeLists.txt
index 8b38b136ce64..533e747894ca 100644
--- a/lib/Target/ARM/TargetInfo/CMakeLists.txt
+++ b/lib/Target/ARM/TargetInfo/CMakeLists.txt
@@ -5,9 +5,3 @@ add_llvm_library(LLVMARMInfo
   )
 
 add_dependencies(LLVMARMInfo ARMCommonTableGen)
-
-add_llvm_library_dependencies(LLVMARMInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..a07a94047d4e
--- /dev/null
+++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/ARM/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = ARMInfo
+parent = ARM
+required_libraries = MC Support Target
+add_to_library_groups = ARM
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index d8481778c0da..edd73c20c0be 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -1,4 +1,4 @@
-//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====//
+//===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Thumb1FrameLowering.h"
-#include "ARMBaseInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -101,7 +100,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
     case ARM::R11:
       if (Reg == FramePtr)
         FramePtrSpillFI = FI;
-      if (STI.isTargetDarwin()) {
+      if (STI.isTargetIOS()) {
         AFI->addGPRCalleeSavedArea2Frame(FI);
         GPRCS2Size += 4;
       } else {
@@ -175,14 +174,14 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
     AFI->setShouldRestoreSPFromFP(true);
 }
 
-static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
   for (unsigned i = 0; CSRegs[i]; ++i)
     if (Reg == CSRegs[i])
       return true;
   return false;
 }
 
-static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
   if (MI->getOpcode() == ARM::tLDRspi &&
       MI->getOperand(1).isFI() &&
       isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
@@ -214,7 +213,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
 
   unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
   int NumBytes = (int)MFI->getStackSize();
-  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+  const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
 
   if (!AFI->hasStackFrame()) {
@@ -278,8 +277,11 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
 
     emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
 
-    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
-      .addReg(ARM::R3, RegState::Kill));
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+      .addReg(ARM::R3, RegState::Kill);
+    AddDefaultPred(MIB);
+    MIB->copyImplicitOps(&*MBBI);
     // erase the old tBX_RET instruction
     MBB.erase(MBBI);
   }
@@ -350,6 +352,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
         continue;
       Reg = ARM::PC;
       (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+      MIB->copyImplicitOps(&*MI);
       MI = MBB.erase(MI);
     }
     MIB.addReg(Reg, getDefRegState(true));
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 218311d78d30..e03e75815c7e 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
+//===-- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,14 +13,11 @@
 
 #include "Thumb1InstrInfo.h"
 #include "ARM.h"
-#include "ARMMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/ADT/SmallVector.h"
-#include "Thumb1InstrInfo.h"
+#include "llvm/MC/MCInst.h"
 
 using namespace llvm;
 
@@ -28,6 +25,15 @@ Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
   : ARMBaseInstrInfo(STI), RI(*this, STI) {
 }
 
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void Thumb1InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+  NopInst.setOpcode(ARM::tMOVr);
+  NopInst.addOperand(MCOperand::CreateReg(ARM::R8));
+  NopInst.addOperand(MCOperand::CreateReg(ARM::R8));
+  NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+  NopInst.addOperand(MCOperand::CreateReg(0));
+}
+
 unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
   return 0;
 }
@@ -60,8 +66,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(
-                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
                               MachineMemOperand::MOStore,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
@@ -89,8 +94,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(
-                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
                               MachineMemOperand::MOLoad,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 17ef2f758ef4..36af20492d4e 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
+//===-- Thumb1InstrInfo.h - Thumb-1 Instruction Information -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,9 +14,8 @@
 #ifndef THUMB1INSTRUCTIONINFO_H
 #define THUMB1INSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
 #include "Thumb1RegisterInfo.h"
 
 namespace llvm {
@@ -27,6 +26,9 @@ class Thumb1InstrInfo : public ARMBaseInstrInfo {
 public:
   explicit Thumb1InstrInfo(const ARMSubtarget &STI);
 
+  /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+  void getNoopForMachoTarget(MCInst &NopInst) const;
+
   // Return the non-pre/post incrementing version of 'Opc'. Return 0
   // if there is not such an opcode.
   unsigned getUnindexedOpcode(unsigned Opc) const;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index e8ed482a66fa..ef77bbd21a4e 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
+//===-- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,12 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Thumb1RegisterInfo.h"
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
-#include "Thumb1InstrInfo.h"
-#include "Thumb1RegisterInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -28,6 +27,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
@@ -570,6 +570,11 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
     // If this instruction affects R12, adjust our restore point.
     for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
       const MachineOperand &MO = II->getOperand(i);
+      if (MO.isRegMask() && MO.clobbersPhysReg(ARM::R12)) {
+        UseMI = II;
+        done = true;
+        break;
+      }
       if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
           TargetRegisterInfo::isVirtualRegister(MO.getReg()))
         continue;
@@ -624,6 +629,21 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       FrameReg = BasePtr;
   }
 
+  // PEI::scavengeFrameVirtualRegs() cannot accurately track SPAdj because the
+  // call frame setup/destroy instructions have already been eliminated.  That
+  // means the stack pointer cannot be used to access the emergency spill slot
+  // when !hasReservedCallFrame().
+#ifndef NDEBUG
+  if (RS && FrameReg == ARM::SP && FrameIndex == RS->getScavengingFrameIndex()){
+    assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) &&
+           "Cannot use SP to access the emergency spill slot in "
+           "functions without a reserved call frame");
+    assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+           "Cannot use SP to access the emergency spill slot in "
+           "functions with variable sized frame objects");
+  }
+#endif // NDEBUG
+
   // Special handling of dbg_value instructions.
   if (MI.isDebugValue()) {
     MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
@@ -643,14 +663,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   assert(Offset && "This code isn't needed if offset already handled!");
 
   unsigned Opcode = MI.getOpcode();
-  const MCInstrDesc &Desc = MI.getDesc();
 
   // Remove predicate first.
   int PIdx = MI.findFirstPredOperandIdx();
   if (PIdx != -1)
     removeOperands(MI, PIdx);
 
-  if (Desc.mayLoad()) {
+  if (MI.mayLoad()) {
     // Use the destination register to materialize sp + offset.
     unsigned TmpReg = MI.getOperand(0).getReg();
     bool UseRR = false;
@@ -673,7 +692,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
       // register. The offset is already handled in the vreg value.
       MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
-  } else if (Desc.mayStore()) {
+  } else if (MI.mayStore()) {
       VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
       bool UseRR = false;
 
@@ -695,11 +714,11 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
         // register. The offset is already handled in the vreg value.
         MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
   } else {
-    assert(false && "Unexpected opcode!");
+    llvm_unreachable("Unexpected opcode!");
   }
 
   // Add predicate back if it's needed.
-  if (MI.getDesc().isPredicable()) {
+  if (MI.isPredicable()) {
     MachineInstrBuilder MIB(&MI);
     AddDefaultPred(MIB);
   }
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 9060e59e5980..69718424e735 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -16,13 +16,12 @@
 #define THUMB1REGISTERINFO_H
 
 #include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
   class ARMSubtarget;
   class ARMBaseInstrInfo;
-  class Type;
 
 struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
 public:
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index b6274007b237..ecb4c2f0e5da 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb-2 IT blocks ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,6 +13,7 @@
 #include "Thumb2InstrInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -75,7 +76,7 @@ static void TrackDefUses(MachineInstr *MI,
   for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
     unsigned Reg = LocalUses[i];
     Uses.insert(Reg);
-    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+    for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
          *Subreg; ++Subreg)
       Uses.insert(*Subreg);
   }
@@ -83,7 +84,7 @@ static void TrackDefUses(MachineInstr *MI,
   for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
     unsigned Reg = LocalDefs[i];
     Defs.insert(Reg);
-    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+    for (const uint16_t *Subreg = TRI->getSubRegisters(Reg);
          *Subreg; ++Subreg)
       Defs.insert(*Subreg);
     if (Reg == ARM::CPSR)
@@ -141,7 +142,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
   //   rsb   r2, 0
   //
   const MCInstrDesc &MCID = MI->getDesc();
-  if (MCID.hasOptionalDef() &&
+  if (MI->hasOptionalDef() &&
       MI->getOperand(MCID.getNumOperands() - 1).getReg() == ARM::CPSR)
     return false;
 
@@ -153,7 +154,7 @@ Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
     ++I;
   if (I != E) {
     unsigned NPredReg = 0;
-    ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+    ARMCC::CondCodes NCC = getITInstrPredicate(I, NPredReg);
     if (NCC == CC || NCC == OCC)
       return true;
   }
@@ -170,7 +171,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
     MachineInstr *MI = &*MBBI;
     DebugLoc dl = MI->getDebugLoc();
     unsigned PredReg = 0;
-    ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+    ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
     if (CC == ARMCC::AL) {
       ++MBBI;
       continue;
@@ -198,7 +199,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
     // Branches, including tricky ones like LDM_RET, need to end an IT
     // block so check the instruction we just put in the block.
     for (; MBBI != E && Pos &&
-           (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+           (!MI->isBranch() && !MI->isReturn()) ; ++MBBI) {
       if (MBBI->isDebugValue())
         continue;
 
@@ -206,7 +207,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
       MI = NMI;
 
       unsigned NPredReg = 0;
-      ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+      ARMCC::CondCodes NCC = getITInstrPredicate(NMI, NPredReg);
       if (NCC == CC || NCC == OCC) {
         Mask |= (NCC & 1) << Pos;
         // Add implicit use of ITSTATE.
@@ -237,6 +238,10 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
     // Last instruction in IT block kills ITSTATE.
     LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
 
+    // Finalize the bundle.
+    MachineBasicBlock::instr_iterator LI = LastITMI;
+    finalizeBundle(MBB, InsertPos.getInstrIterator(), llvm::next(LI));
+
     Modified = true;
     ++NumITs;
   }
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index cf040c822de9..8ab486b0c1bf 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
+//===-- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,13 +15,11 @@
 #include "ARM.h"
 #include "ARMConstantPoolValue.h"
 #include "ARMMachineFunctionInfo.h"
-#include "Thumb2InstrInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
@@ -35,6 +33,13 @@ Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
   : ARMBaseInstrInfo(STI), RI(*this, STI) {
 }
 
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void Thumb2InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+  NopInst.setOpcode(ARM::tNOP);
+  NopInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+  NopInst.addOperand(MCOperand::CreateReg(0));
+}
+
 unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
   // FIXME
   return 0;
@@ -53,7 +58,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
   // If the first instruction of Tail is predicated, we may have to update
   // the IT instruction.
   unsigned PredReg = 0;
-  ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+  ARMCC::CondCodes CC = getInstrPredicate(Tail, PredReg);
   MachineBasicBlock::iterator MBBI = Tail;
   if (CC != ARMCC::AL)
     // Expecting at least the t2IT instruction before it.
@@ -101,7 +106,7 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
   }
 
   unsigned PredReg = 0;
-  return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+  return getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
 }
 
 void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -130,8 +135,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(
-                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
                               MachineMemOperand::MOStore,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
@@ -158,8 +162,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(
-                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
                               MachineMemOperand::MOLoad,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
@@ -570,7 +573,7 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
     return;
 
   unsigned PredReg = 0;
-  ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+  ARMCC::CondCodes CC = getInstrPredicate(UseMI, PredReg);
   if (CC == ARMCC::AL || PredReg != ARM::CPSR)
     return;
 
@@ -586,10 +589,10 @@ Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
       continue;
 
     MachineInstr *NMI = &*MBBI;
-    ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+    ARMCC::CondCodes NCC = getInstrPredicate(NMI, PredReg);
     if (!(NCC == CC || NCC == OCC) ||
         NMI->modifiesRegister(SrcReg, &TRI) ||
-        NMI->definesRegister(ARM::CPSR))
+        NMI->modifiesRegister(ARM::CPSR, &TRI))
       break;
     if (++NumInsts == 4)
       // Too many in a row!
@@ -607,5 +610,5 @@ llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
   unsigned Opc = MI->getOpcode();
   if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
     return ARMCC::AL;
-  return llvm::getInstrPredicate(MI, PredReg);
+  return getInstrPredicate(MI, PredReg);
 }
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index f2637d7fbcab..0911f8a597ce 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
+//===-- Thumb2InstrInfo.h - Thumb-2 Instruction Information -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,9 +14,8 @@
 #ifndef THUMB2INSTRUCTIONINFO_H
 #define THUMB2INSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "ARM.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
 #include "Thumb2RegisterInfo.h"
 
 namespace llvm {
@@ -28,6 +27,9 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo {
 public:
   explicit Thumb2InstrInfo(const ARMSubtarget &STI);
 
+  /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+  void getNoopForMachoTarget(MCInst &NopInst) const;
+
   // Return the non-pre/post incrementing version of 'Opc'. Return 0
   // if there is not such an opcode.
   unsigned getUnindexedOpcode(unsigned Opc) const;
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 355c3bf0352c..29a87d016227 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===//
+//===-- Thumb2RegisterInfo.cpp - Thumb-2 Register Information -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Thumb2RegisterInfo.h"
 #include "ARM.h"
+#include "ARMBaseInstrInfo.h"
 #include "ARMSubtarget.h"
-#include "Thumb2InstrInfo.h"
-#include "Thumb2RegisterInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index 824378aeab4e..6b397e869683 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -16,13 +16,12 @@
 #define THUMB2REGISTERINFO_H
 
 #include "ARM.h"
-#include "ARMRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
   class ARMSubtarget;
   class ARMBaseInstrInfo;
-  class Type;
 
 struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
 public:
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 89a155c5a7f5..b5a397e61685 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -39,9 +39,9 @@ namespace {
   /// ReduceTable - A static table with information on mapping from wide
   /// opcodes to narrow
   struct ReduceEntry {
-    unsigned WideOpc;      // Wide opcode
-    unsigned NarrowOpc1;   // Narrow opcode to transform to
-    unsigned NarrowOpc2;   // Narrow opcode when it's two-address
+    uint16_t WideOpc;      // Wide opcode
+    uint16_t NarrowOpc1;   // Narrow opcode to transform to
+    uint16_t NarrowOpc2;   // Narrow opcode when it's two-address
     uint8_t  Imm1Limit;    // Limit of immediate field (bits)
     uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
     unsigned LowRegs1 : 1; // Only possible if low-registers are used
@@ -146,7 +146,8 @@ namespace {
     /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
     DenseMap<unsigned, unsigned> ReduceOpcodeMap;
 
-    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use);
+    bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
+                             bool IsSelfLoop);
 
     bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
                          bool is2Addr, ARMCC::CondCodes Pred,
@@ -157,19 +158,21 @@ namespace {
 
     bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                        const ReduceEntry &Entry, bool LiveCPSR,
-                       MachineInstr *CPSRDef);
+                       MachineInstr *CPSRDef, bool IsSelfLoop);
 
     /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
     /// instruction.
     bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
                        const ReduceEntry &Entry,
-                       bool LiveCPSR, MachineInstr *CPSRDef);
+                       bool LiveCPSR, MachineInstr *CPSRDef,
+                       bool IsSelfLoop);
 
     /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
     /// non-two-address instruction.
     bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
                         const ReduceEntry &Entry,
-                        bool LiveCPSR, MachineInstr *CPSRDef);
+                        bool LiveCPSR, MachineInstr *CPSRDef,
+                        bool IsSelfLoop);
 
     /// ReduceMBB - Reduce width of instructions in the specified basic block.
     bool ReduceMBB(MachineBasicBlock &MBB);
@@ -186,7 +189,7 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
 }
 
 static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
-  for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
+  for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
     if (*Regs == ARM::CPSR)
       return true;
   return false;
@@ -210,10 +213,17 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
 /// In this case it would have been ok to narrow the mul.w to muls since there
 /// are indirect RAW dependency between the muls and the mul.w
 bool
-Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) {
-  if (!Def || !STI->avoidCPSRPartialUpdate())
+Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use,
+                                      bool FirstInSelfLoop) {
+  // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder).
+  if (!STI->avoidCPSRPartialUpdate())
     return false;
 
+  if (!Def)
+    // If this BB loops back to itself, conservatively avoid narrowing the
+    // first instruction that does partial flag update.
+    return FirstInSelfLoop;
+
   SmallSet<unsigned, 2> Defs;
   for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = Def->getOperand(i);
@@ -442,7 +452,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
 
   // Add the 16-bit load / store instruction.
   DebugLoc dl = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
   if (!isLdStMul) {
     MIB.addOperand(MI->getOperand(0));
     MIB.addOperand(MI->getOperand(1));
@@ -468,7 +478,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
 
   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
 
-  MBB.erase(MI);
+  MBB.erase_instr(MI);
   ++NumLdSts;
   return true;
 }
@@ -476,15 +486,16 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
-                                bool LiveCPSR, MachineInstr *CPSRDef) {
+                                bool LiveCPSR, MachineInstr *CPSRDef,
+                                bool IsSelfLoop) {
   unsigned Opc = MI->getOpcode();
   if (Opc == ARM::t2ADDri) {
     // If the source register is SP, try to reduce to tADDrSPi, otherwise
     // it's a normal reduce.
     if (MI->getOperand(1).getReg() != ARM::SP) {
-      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
         return true;
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
     }
     // Try to reduce to tADDrSPi.
     unsigned Imm = MI->getOperand(2).getImm();
@@ -502,7 +513,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
         MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
       return false;
 
-    MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+    MachineInstrBuilder MIB = BuildMI(MBB, MI, MI->getDebugLoc(),
                                       TII->get(ARM::tADDrSPi))
       .addOperand(MI->getOperand(0))
       .addOperand(MI->getOperand(1))
@@ -514,7 +525,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
 
     DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " <<*MIB);
 
-    MBB.erase(MI);
+    MBB.erase_instr(MI);
     ++NumNarrows;
     return true;
   }
@@ -522,8 +533,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
   if (Entry.LowRegs1 && !VerifyLowRegs(MI))
     return false;
 
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (MCID.mayLoad() || MCID.mayStore())
+  if (MI->mayLoad() || MI->mayStore())
     return ReduceLoadStore(MBB, MI, Entry);
 
   switch (Opc) {
@@ -535,12 +545,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
       switch (Opc) {
       default: break;
       case ARM::t2ADDSri: {
-        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop))
           return true;
         // fallthrough
       }
       case ARM::t2ADDSrr:
-        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
       }
     }
     break;
@@ -552,13 +562,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
   case ARM::t2UXTB:
   case ARM::t2UXTH:
     if (MI->getOperand(2).getImm() == 0)
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
     break;
   case ARM::t2MOVi16:
     // Can convert only 'pure' immediate operands, not immediates obtained as
     // globals' addresses.
     if (MI->getOperand(1).isImm())
-      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
     break;
   case ARM::t2CMPrr: {
     // Try to reduce to the lo-reg only version first. Why there are two
@@ -568,9 +578,9 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
     // source insn opcode. So for now, we hack a local entry record to use.
     static const ReduceEntry NarrowEntry =
       { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 };
-    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef))
+    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop))
       return true;
-    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop);
   }
   }
   return false;
@@ -579,14 +589,32 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
-                                bool LiveCPSR, MachineInstr *CPSRDef) {
+                                bool LiveCPSR, MachineInstr *CPSRDef,
+                                bool IsSelfLoop) {
 
   if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
     return false;
 
   unsigned Reg0 = MI->getOperand(0).getReg();
   unsigned Reg1 = MI->getOperand(1).getReg();
-  if (Reg0 != Reg1) {
+  // t2MUL is "special". The tied source operand is second, not first.
+  if (MI->getOpcode() == ARM::t2MUL) {
+    unsigned Reg2 = MI->getOperand(2).getReg();
+    // Early exit if the regs aren't all low regs.
+    if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
+        || !isARMLowRegister(Reg2))
+      return false;
+    if (Reg0 != Reg2) {
+      // If the other operand also isn't the same as the destination, we
+      // can't reduce.
+      if (Reg1 != Reg0)
+        return false;
+      // Try to commute the operands to make it a 2-address instruction.
+      MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+      if (!CommutedMI)
+        return false;
+    }
+  } else if (Reg0 != Reg1) {
     // Try to commute the operands to make it a 2-address instruction.
     unsigned CommOpIdx1, CommOpIdx2;
     if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
@@ -637,12 +665,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
-      canAddPseudoFlagDep(CPSRDef, MI))
+      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
     return false;
 
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
   MIB.addOperand(MI->getOperand(0));
   if (NewMCID.hasOptionalDef()) {
     if (HasCC)
@@ -666,7 +694,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 
   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
 
-  MBB.erase(MI);
+  MBB.erase_instr(MI);
   ++Num2Addrs;
   return true;
 }
@@ -674,7 +702,8 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 bool
 Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
                                  const ReduceEntry &Entry,
-                                 bool LiveCPSR, MachineInstr *CPSRDef) {
+                                 bool LiveCPSR, MachineInstr *CPSRDef,
+                                 bool IsSelfLoop) {
   if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
     return false;
 
@@ -727,12 +756,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
   if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
-      canAddPseudoFlagDep(CPSRDef, MI))
+      canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop))
     return false;
 
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
   MIB.addOperand(MI->getOperand(0));
   if (NewMCID.hasOptionalDef()) {
     if (HasCC)
@@ -772,7 +801,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
 
   DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
 
-  MBB.erase(MI);
+  MBB.erase_instr(MI);
   ++NumNarrows;
   return true;
 }
@@ -817,13 +846,22 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
   // Yes, CPSR could be livein.
   bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
   MachineInstr *CPSRDef = 0;
+  MachineInstr *BundleMI = 0;
 
-  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
-  MachineBasicBlock::iterator NextMII;
+  // If this BB loops back to itself, conservatively avoid narrowing the
+  // first instruction that does partial flag update.
+  bool IsSelfLoop = MBB.isSuccessor(&MBB);
+  MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
+  MachineBasicBlock::instr_iterator NextMII;
   for (; MII != E; MII = NextMII) {
     NextMII = llvm::next(MII);
 
     MachineInstr *MI = &*MII;
+    if (MI->isBundle()) {
+      BundleMI = MI;
+      continue;
+    }
+
     LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
 
     unsigned Opcode = MI->getOpcode();
@@ -832,9 +870,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
       const ReduceEntry &Entry = ReduceTable[OPI->second];
       // Ignore "special" cases for now.
       if (Entry.Special) {
-        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
           Modified = true;
-          MachineBasicBlock::iterator I = prior(NextMII);
+          MachineBasicBlock::instr_iterator I = prior(NextMII);
           MI = &*I;
         }
         goto ProcessNext;
@@ -842,31 +880,46 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
 
       // Try to transform to a 16-bit two-address instruction.
       if (Entry.NarrowOpc2 &&
-          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+          ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
         Modified = true;
-        MachineBasicBlock::iterator I = prior(NextMII);
+        MachineBasicBlock::instr_iterator I = prior(NextMII);
         MI = &*I;
         goto ProcessNext;
       }
 
       // Try to transform to a 16-bit non-two-address instruction.
       if (Entry.NarrowOpc1 &&
-          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) {
+          ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) {
         Modified = true;
-        MachineBasicBlock::iterator I = prior(NextMII);
+        MachineBasicBlock::instr_iterator I = prior(NextMII);
         MI = &*I;
       }
     }
 
   ProcessNext:
+    if (NextMII != E && MI->isInsideBundle() && !NextMII->isInsideBundle()) {
+      // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
+      // marker is only on the BUNDLE instruction. Process the BUNDLE
+      // instruction as we finish with the bundled instruction to work around
+      // the inconsistency.
+      if (BundleMI->killsRegister(ARM::CPSR))
+        LiveCPSR = false;
+      MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
+      if (MO && !MO->isDead())
+        LiveCPSR = true;
+    }
+
     bool DefCPSR = false;
     LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
-    if (MI->getDesc().isCall())
+    if (MI->isCall()) {
       // Calls don't really set CPSR.
       CPSRDef = 0;
-    else if (DefCPSR)
+      IsSelfLoop = false;
+    } else if (DefCPSR) {
       // This is the last CPSR defining instruction.
       CPSRDef = MI;
+      IsSelfLoop = false;
+    }
   }
 
   return Modified;
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
deleted file mode 100644
index 6ffaf45f4ed1..000000000000
--- a/lib/Target/Alpha/Alpha.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// Alpha back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TARGET_ALPHA_H
-#define TARGET_ALPHA_H
-
-#include "MCTargetDesc/AlphaMCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-  namespace Alpha {
-    // These describe LDAx
-
-    static const int IMM_LOW  = -32768;
-    static const int IMM_HIGH = 32767;
-    static const int IMM_MULT = 65536;
-  }
-
-  class AlphaTargetMachine;
-  class FunctionPass;
-  class formatted_raw_ostream;
-
-  FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
-  FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
-  FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
-                                              JITCodeEmitter &JCE);
-  FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
-  FunctionPass *createAlphaBranchSelectionPass();
-
-} // end namespace llvm;
-
-#endif
diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td
deleted file mode 100644
index ae79c2e4b70e..000000000000
--- a/lib/Target/Alpha/Alpha.td
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-// Get the target-independent interfaces which we are implementing...
-//
-include "llvm/Target/Target.td"
-
-//Alpha is little endian
-
-//===----------------------------------------------------------------------===//
-// Subtarget Features
-//===----------------------------------------------------------------------===//
-
-def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
-                                  "Enable CIX extensions">;
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "AlphaRegisterInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Description
-//===----------------------------------------------------------------------===//
-
-include "AlphaCallingConv.td"
-
-//===----------------------------------------------------------------------===//
-// Schedule Description
-//===----------------------------------------------------------------------===//
-
-include "AlphaSchedule.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "AlphaInstrInfo.td"
-
-def AlphaInstrInfo : InstrInfo;
-
-//===----------------------------------------------------------------------===//
-// Alpha Processor Definitions
-//===----------------------------------------------------------------------===//
-
-def : Processor<"generic", Alpha21264Itineraries, []>;
-def : Processor<"ev6"    , Alpha21264Itineraries, []>;
-def : Processor<"ev67"   , Alpha21264Itineraries, [FeatureCIX]>;
-
-//===----------------------------------------------------------------------===//
-// The Alpha Target
-//===----------------------------------------------------------------------===//
-
-
-def Alpha : Target {
-  // Pull in Instruction Info:
-  let InstructionSet = AlphaInstrInfo;
-}
diff --git a/lib/Target/Alpha/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AlphaAsmPrinter.cpp
deleted file mode 100644
index 5dce06ac86a5..000000000000
--- a/lib/Target/Alpha/AlphaAsmPrinter.cpp
+++ /dev/null
@@ -1,166 +0,0 @@
-//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format Alpha assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "Alpha.h"
-#include "AlphaInstrInfo.h"
-#include "AlphaTargetMachine.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  struct AlphaAsmPrinter : public AsmPrinter {
-    /// Unique incrementer for label values for referencing Global values.
-    ///
-
-    explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
-      : AsmPrinter(tm, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "Alpha Assembly Printer";
-    }
-    void printInstruction(const MachineInstr *MI, raw_ostream &O);
-    void EmitInstruction(const MachineInstr *MI) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    static const char *getRegisterName(unsigned RegNo);
-
-    void printOp(const MachineOperand &MO, raw_ostream &O);
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    virtual void EmitFunctionBodyStart();
-    virtual void EmitFunctionBodyEnd(); 
-    void EmitStartOfAsmFile(Module &M);
-
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    bool PrintAsmMemoryOperand(const MachineInstr *MI,
-                               unsigned OpNo, unsigned AsmVariant,
-                               const char *ExtraCode, raw_ostream &O);
-  };
-} // end of anonymous namespace
-
-#include "AlphaGenAsmWriter.inc"
-
-void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                   raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  if (MO.isReg()) {
-    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
-           "Not physreg??");
-    O << getRegisterName(MO.getReg());
-  } else if (MO.isImm()) {
-    O << MO.getImm();
-    assert(MO.getImm() < (1 << 30));
-  } else {
-    printOp(MO, O);
-  }
-}
-
-
-void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register:
-    O << getRegisterName(MO.getReg());
-    return;
-
-  case MachineOperand::MO_Immediate:
-    assert(0 && "printOp() does not handle immediate values");
-    return;
-
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
-      << MO.getIndex();
-    return;
-
-  case MachineOperand::MO_ExternalSymbol:
-    O << MO.getSymbolName();
-    return;
-
-  case MachineOperand::MO_GlobalAddress:
-    O << *Mang->getSymbol(MO.getGlobal());
-    return;
-
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    return;
-
-  default:
-    O << "<unknown operand type: " << MO.getType() << ">";
-    return;
-  }
-}
-
-/// EmitFunctionBodyStart - Targets can override this to emit stuff before
-/// the first basic block in the function.
-void AlphaAsmPrinter::EmitFunctionBodyStart() {
-  OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName()));
-}
-
-/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
-/// the last basic block in the function.
-void AlphaAsmPrinter::EmitFunctionBodyEnd() {
-  OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName()));
-}
-
-void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) {
-  OutStreamer.EmitRawText(StringRef("\t.arch ev6"));
-  OutStreamer.EmitRawText(StringRef("\t.set noat"));
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                                      unsigned AsmVariant,
-                                      const char *ExtraCode, raw_ostream &O) {
-  printOperand(MI, OpNo, O);
-  return false;
-}
-
-bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNo, unsigned AsmVariant,
-                                            const char *ExtraCode,
-                                            raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true; // Unknown modifier.
-  O << "0(";
-  printOperand(MI, OpNo, O);
-  O << ")";
-  return false;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeAlphaAsmPrinter() { 
-  RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget);
-}
diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp
deleted file mode 100644
index 376811709536..000000000000
--- a/lib/Target/Alpha/AlphaBranchSelector.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Replace Pseudo COND_BRANCH_* with their appropriate real branch
-// Simplified version of the PPC Branch Selector
-//
-//===----------------------------------------------------------------------===//
-
-#include "Alpha.h"
-#include "AlphaInstrInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCAsmInfo.h"
-using namespace llvm;
-
-namespace {
-  struct AlphaBSel : public MachineFunctionPass {
-    static char ID;
-    AlphaBSel() : MachineFunctionPass(ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &Fn);
-
-    virtual const char *getPassName() const {
-      return "Alpha Branch Selection";
-    }
-  };
-  char AlphaBSel::ID = 0;
-}
-
-/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection
-/// Pass
-///
-FunctionPass *llvm::createAlphaBranchSelectionPass() {
-  return new AlphaBSel();
-}
-
-bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) {
-
-  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
-       ++MFI) {
-    MachineBasicBlock *MBB = MFI;
-    
-    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
-         MBBI != EE; ++MBBI) {
-      if (MBBI->getOpcode() == Alpha::COND_BRANCH_I ||
-          MBBI->getOpcode() == Alpha::COND_BRANCH_F) {
-        
-        // condbranch operands:
-        // 0. bc opcode
-        // 1. reg
-        // 2. target MBB
-        const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
-        MBBI->setDesc(TII->get(MBBI->getOperand(0).getImm()));
-      }
-    }
-  }
-  
-  return true;
-}
-
diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td
deleted file mode 100644
index bde8819f46e4..000000000000
--- a/lib/Target/Alpha/AlphaCallingConv.td
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- AlphaCallingConv.td - Calling Conventions for Alpha -*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This describes the calling conventions for Alpha architecture.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Alpha Return Value Calling Convention
-//===----------------------------------------------------------------------===//
-def RetCC_Alpha : CallingConv<[
-  // i64 is returned in register R0
-  // R1 is an llvm extension, I don't know what gcc does
-  CCIfType<[i64], CCAssignToReg<[R0,R1]>>,
-
-  // f32 / f64 are returned in F0/F1
-  CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>
-]>;
-
-//===----------------------------------------------------------------------===//
-// Alpha Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-def CC_Alpha : CallingConv<[
-  // The first 6 arguments are passed in registers, whether integer or
-  // floating-point
-  CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21],
-                                          [F16, F17, F18, F19, F20, F21]>>,
-
-  CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21],
-                                               [R16, R17, R18, R19, R20, R21]>>,
-
-  // Stack slots are 8 bytes in size and 8-byte aligned.
-  CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
-]>;
diff --git a/lib/Target/Alpha/AlphaFrameLowering.cpp b/lib/Target/Alpha/AlphaFrameLowering.cpp
deleted file mode 100644
index 690cd1da9c1d..000000000000
--- a/lib/Target/Alpha/AlphaFrameLowering.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//=====- AlphaFrameLowering.cpp - Alpha Frame Information ------*- C++ -*-====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AlphaFrameLowering.h"
-#include "AlphaInstrInfo.h"
-#include "AlphaMachineFunctionInfo.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/ADT/Twine.h"
-
-using namespace llvm;
-
-static long getUpper16(long l) {
-  long y = l / Alpha::IMM_MULT;
-  if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
-    ++y;
-  return y;
-}
-
-static long getLower16(long l) {
-  long h = getUpper16(l);
-  return l - h * Alpha::IMM_MULT;
-}
-
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-//
-bool AlphaFrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return MFI->hasVarSizedObjects();
-}
-
-void AlphaFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-
-  DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
-  bool FP = hasFP(MF);
-
-  // Handle GOP offset
-  BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
-    .addGlobalAddress(MF.getFunction()).addReg(Alpha::R27).addImm(++curgpdist);
-  BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
-    .addGlobalAddress(MF.getFunction()).addReg(Alpha::R29).addImm(curgpdist);
-
-  BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
-    .addGlobalAddress(MF.getFunction());
-
-  // Get the number of bytes to allocate from the FrameInfo
-  long NumBytes = MFI->getStackSize();
-
-  if (FP)
-    NumBytes += 8; //reserve space for the old FP
-
-  // Do we need to allocate space on the stack?
-  if (NumBytes == 0) return;
-
-  unsigned Align = getStackAlignment();
-  NumBytes = (NumBytes+Align-1)/Align*Align;
-
-  // Update frame info to pretend that this is part of the stack...
-  MFI->setStackSize(NumBytes);
-
-  // adjust stack pointer: r30 -= numbytes
-  NumBytes = -NumBytes;
-  if (NumBytes >= Alpha::IMM_LOW) {
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
-      .addReg(Alpha::R30);
-  } else if (getUpper16(NumBytes) >= Alpha::IMM_LOW) {
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
-      .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
-      .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
-  } else {
-    report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
-  }
-
-  // Now if we need to, save the old FP and set the new
-  if (FP) {
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
-      .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
-    // This must be the last instr in the prolog
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
-      .addReg(Alpha::R30).addReg(Alpha::R30);
-  }
-
-}
-
-void AlphaFrameLowering::emitEpilogue(MachineFunction &MF,
-                                  MachineBasicBlock &MBB) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-
-  assert((MBBI->getOpcode() == Alpha::RETDAG ||
-          MBBI->getOpcode() == Alpha::RETDAGp)
-         && "Can only insert epilog into returning blocks");
-  DebugLoc dl = MBBI->getDebugLoc();
-
-  bool FP = hasFP(MF);
-
-  // Get the number of bytes allocated from the FrameInfo...
-  long NumBytes = MFI->getStackSize();
-
-  //now if we need to, restore the old FP
-  if (FP) {
-    //copy the FP into the SP (discards allocas)
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
-      .addReg(Alpha::R15);
-    //restore the FP
-    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
-      .addImm(0).addReg(Alpha::R15);
-  }
-
-  if (NumBytes != 0) {
-    if (NumBytes <= Alpha::IMM_HIGH) {
-      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
-        .addReg(Alpha::R30);
-    } else if (getUpper16(NumBytes) <= Alpha::IMM_HIGH) {
-      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
-        .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
-      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
-        .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
-    } else {
-      report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
-    }
-  }
-}
diff --git a/lib/Target/Alpha/AlphaFrameLowering.h b/lib/Target/Alpha/AlphaFrameLowering.h
deleted file mode 100644
index ebd9e1bac190..000000000000
--- a/lib/Target/Alpha/AlphaFrameLowering.h
+++ /dev/null
@@ -1,43 +0,0 @@
-//==-- AlphaFrameLowering.h - Define frame lowering for Alpha --*- C++ -*---==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHA_FRAMEINFO_H
-#define ALPHA_FRAMEINFO_H
-
-#include "Alpha.h"
-#include "AlphaSubtarget.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
-  class AlphaSubtarget;
-
-class AlphaFrameLowering : public TargetFrameLowering {
-  const AlphaSubtarget &STI;
-  // FIXME: This should end in MachineFunctionInfo, not here!
-  mutable int curgpdist;
-public:
-  explicit AlphaFrameLowering(const AlphaSubtarget &sti)
-    : TargetFrameLowering(StackGrowsDown, 16, 0), STI(sti), curgpdist(0) {
-  }
-
-  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
-  /// the function.
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
-  bool hasFP(const MachineFunction &MF) const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
deleted file mode 100644
index f877c65cd61f..000000000000
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ /dev/null
@@ -1,425 +0,0 @@
-//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a pattern matching instruction selector for Alpha,
-// converting from a legalized dag to a Alpha dag.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Alpha.h"
-#include "AlphaTargetMachine.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-using namespace llvm;
-
-namespace {
-
-  //===--------------------------------------------------------------------===//
-  /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine
-  /// instructions for SelectionDAG operations.
-  class AlphaDAGToDAGISel : public SelectionDAGISel {
-    static const int64_t IMM_LOW  = -32768;
-    static const int64_t IMM_HIGH = 32767;
-    static const int64_t IMM_MULT = 65536;
-    static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
-    static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW  * IMM_MULT;
-
-    static int64_t get_ldah16(int64_t x) {
-      int64_t y = x / IMM_MULT;
-      if (x % IMM_MULT > IMM_HIGH)
-        ++y;
-      return y;
-    }
-
-    static int64_t get_lda16(int64_t x) {
-      return x - get_ldah16(x) * IMM_MULT;
-    }
-
-    /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
-    /// instruction (if not, return 0).  Note that this code accepts partial
-    /// zap masks.  For example (and LHS, 1) is a valid zap, as long we know
-    /// that the bits 1-7 of LHS are already zero.  If LHS is non-null, we are
-    /// in checking mode.  If LHS is null, we assume that the mask has already
-    /// been validated before.
-    uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
-      uint64_t BitsToCheck = 0;
-      unsigned Result = 0;
-      for (unsigned i = 0; i != 8; ++i) {
-        if (((Constant >> 8*i) & 0xFF) == 0) {
-          // nothing to do.
-        } else {
-          Result |= 1 << i;
-          if (((Constant >> 8*i) & 0xFF) == 0xFF) {
-            // If the entire byte is set, zapnot the byte.
-          } else if (LHS.getNode() == 0) {
-            // Otherwise, if the mask was previously validated, we know its okay
-            // to zapnot this entire byte even though all the bits aren't set.
-          } else {
-            // Otherwise we don't know that the it's okay to zapnot this entire
-            // byte.  Only do this iff we can prove that the missing bits are
-            // already null, so the bytezap doesn't need to really null them.
-            BitsToCheck |= ~Constant & (0xFFULL << 8*i);
-          }
-        }
-      }
-      
-      // If there are missing bits in a byte (for example, X & 0xEF00), check to
-      // see if the missing bits (0x1000) are already known zero if not, the zap
-      // isn't okay to do, as it won't clear all the required bits.
-      if (BitsToCheck &&
-          !CurDAG->MaskedValueIsZero(LHS,
-                                     APInt(LHS.getValueSizeInBits(),
-                                           BitsToCheck)))
-        return 0;
-      
-      return Result;
-    }
-    
-    static uint64_t get_zapImm(uint64_t x) {
-      unsigned build = 0;
-      for(int i = 0; i != 8; ++i) {
-        if ((x & 0x00FF) == 0x00FF)
-          build |= 1 << i;
-        else if ((x & 0x00FF) != 0)
-          return 0;
-        x >>= 8;
-      }
-      return build;
-    }
-      
-    
-    static uint64_t getNearPower2(uint64_t x) {
-      if (!x) return 0;
-      unsigned at = CountLeadingZeros_64(x);
-      uint64_t complow = 1ULL << (63 - at);
-      uint64_t comphigh = complow << 1;
-      if (x - complow <= comphigh - x)
-        return complow;
-      else
-        return comphigh;
-    }
-
-    static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
-      uint64_t y = getNearPower2(x);
-      if (swap)
-        return (y - x) == r;
-      else
-        return (x - y) == r;
-    }
-
-  public:
-    explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
-      : SelectionDAGISel(TM)
-    {}
-
-    /// getI64Imm - Return a target constant with the specified value, of type
-    /// i64.
-    inline SDValue getI64Imm(int64_t Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i64);
-    }
-
-    // Select - Convert the specified operand from a target-independent to a
-    // target-specific node if it hasn't already been changed.
-    SDNode *Select(SDNode *N);
-    
-    virtual const char *getPassName() const {
-      return "Alpha DAG->DAG Pattern Instruction Selection";
-    } 
-
-    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
-    /// inline asm expressions.
-    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
-                                              char ConstraintCode,
-                                              std::vector<SDValue> &OutOps) {
-      SDValue Op0;
-      switch (ConstraintCode) {
-      default: return true;
-      case 'm':   // memory
-        Op0 = Op;
-        break;
-      }
-      
-      OutOps.push_back(Op0);
-      return false;
-    }
-    
-// Include the pieces autogenerated from the target description.
-#include "AlphaGenDAGISel.inc"
-    
-private:
-    /// getTargetMachine - Return a reference to the TargetMachine, casted
-    /// to the target-specific type.
-    const AlphaTargetMachine &getTargetMachine() {
-      return static_cast<const AlphaTargetMachine &>(TM);
-    }
-
-    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
-    /// to the target-specific type.
-    const AlphaInstrInfo *getInstrInfo() {
-      return getTargetMachine().getInstrInfo();
-    }
-
-    SDNode *getGlobalBaseReg();
-    SDNode *getGlobalRetAddr();
-    void SelectCALL(SDNode *Op);
-
-  };
-}
-
-/// getGlobalBaseReg - Output the instructions required to put the
-/// GOT address into a register.
-///
-SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() {
-  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
-  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
-}
-
-/// getGlobalRetAddr - Grab the return address.
-///
-SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
-  unsigned GlobalRetAddr = getInstrInfo()->getGlobalRetAddr(MF);
-  return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode();
-}
-
-// Select - Convert the specified operand from a target-independent to a
-// target-specific node if it hasn't already been changed.
-SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
-  if (N->isMachineOpcode())
-    return NULL;   // Already selected.
-  DebugLoc dl = N->getDebugLoc();
-
-  switch (N->getOpcode()) {
-  default: break;
-  case AlphaISD::CALL:
-    SelectCALL(N);
-    return NULL;
-
-  case ISD::FrameIndex: {
-    int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64,
-                                CurDAG->getTargetFrameIndex(FI, MVT::i32),
-                                getI64Imm(0));
-  }
-  case ISD::GLOBAL_OFFSET_TABLE:
-    return getGlobalBaseReg();
-  case AlphaISD::GlobalRetAddr:
-    return getGlobalRetAddr();
-  
-  case AlphaISD::DivCall: {
-    SDValue Chain = CurDAG->getEntryNode();
-    SDValue N0 = N->getOperand(0);
-    SDValue N1 = N->getOperand(1);
-    SDValue N2 = N->getOperand(2);
-    Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1, 
-                                 SDValue(0,0));
-    Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2, 
-                                 Chain.getValue(1));
-    Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0, 
-                                 Chain.getValue(1));
-    SDNode *CNode =
-      CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Glue, 
-                             Chain, Chain.getValue(1));
-    Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64, 
-                                   SDValue(CNode, 1));
-    return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain);
-  }
-
-  case ISD::READCYCLECOUNTER: {
-    SDValue Chain = N->getOperand(0);
-    return CurDAG->getMachineNode(Alpha::RPCC, dl, MVT::i64, MVT::Other,
-                                  Chain);
-  }
-
-  case ISD::Constant: {
-    uint64_t uval = cast<ConstantSDNode>(N)->getZExtValue();
-    
-    if (uval == 0) {
-      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                                Alpha::R31, MVT::i64);
-      ReplaceUses(SDValue(N, 0), Result);
-      return NULL;
-    }
-
-    int64_t val = (int64_t)uval;
-    int32_t val32 = (int32_t)val;
-    if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
-        val >= IMM_LOW  + IMM_LOW  * IMM_MULT)
-      break; //(LDAH (LDA))
-    if ((uval >> 32) == 0 && //empty upper bits
-        val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
-      // val32 >= IMM_LOW  + IMM_LOW  * IMM_MULT) //always true
-      break; //(zext (LDAH (LDA)))
-    //Else use the constant pool
-    ConstantInt *C = ConstantInt::get(
-                                Type::getInt64Ty(*CurDAG->getContext()), uval);
-    SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
-    SDNode *Tmp = CurDAG->getMachineNode(Alpha::LDAHr, dl, MVT::i64, CPI,
-                                         SDValue(getGlobalBaseReg(), 0));
-    return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other, 
-                                CPI, SDValue(Tmp, 0), CurDAG->getEntryNode());
-  }
-  case ISD::TargetConstantFP:
-  case ISD::ConstantFP: {
-    ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
-    bool isDouble = N->getValueType(0) == MVT::f64;
-    EVT T = isDouble ? MVT::f64 : MVT::f32;
-    if (CN->getValueAPF().isPosZero()) {
-      return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS,
-                                  T, CurDAG->getRegister(Alpha::F31, T),
-                                  CurDAG->getRegister(Alpha::F31, T));
-    } else if (CN->getValueAPF().isNegZero()) {
-      return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS,
-                                  T, CurDAG->getRegister(Alpha::F31, T),
-                                  CurDAG->getRegister(Alpha::F31, T));
-    } else {
-      report_fatal_error("Unhandled FP constant type");
-    }
-    break;
-  }
-
-  case ISD::SETCC:
-    if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) {
-      ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
-
-      unsigned Opc = Alpha::WTF;
-      bool rev = false;
-      bool inv = false;
-      switch(CC) {
-      default: DEBUG(N->dump(CurDAG)); llvm_unreachable("Unknown FP comparison!");
-      case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ:
-        Opc = Alpha::CMPTEQ; break;
-      case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT: 
-        Opc = Alpha::CMPTLT; break;
-      case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE: 
-        Opc = Alpha::CMPTLE; break;
-      case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT: 
-        Opc = Alpha::CMPTLT; rev = true; break;
-      case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE: 
-        Opc = Alpha::CMPTLE; rev = true; break;
-      case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE:
-        Opc = Alpha::CMPTEQ; inv = true; break;
-      case ISD::SETO:
-        Opc = Alpha::CMPTUN; inv = true; break;
-      case ISD::SETUO:
-        Opc = Alpha::CMPTUN; break;
-      };
-      SDValue tmp1 = N->getOperand(rev?1:0);
-      SDValue tmp2 = N->getOperand(rev?0:1);
-      SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2);
-      if (inv) 
-        cmp = CurDAG->getMachineNode(Alpha::CMPTEQ, dl, 
-                                     MVT::f64, SDValue(cmp, 0), 
-                                     CurDAG->getRegister(Alpha::F31, MVT::f64));
-      switch(CC) {
-      case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE:
-      case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE:
-       {
-         SDNode* cmp2 = CurDAG->getMachineNode(Alpha::CMPTUN, dl, MVT::f64,
-                                               tmp1, tmp2);
-         cmp = CurDAG->getMachineNode(Alpha::ADDT, dl, MVT::f64, 
-                                      SDValue(cmp2, 0), SDValue(cmp, 0));
-         break;
-       }
-      default: break;
-      }
-
-      SDNode* LD = CurDAG->getMachineNode(Alpha::FTOIT, dl,
-                                          MVT::i64, SDValue(cmp, 0));
-      return CurDAG->getMachineNode(Alpha::CMPULT, dl, MVT::i64, 
-                                    CurDAG->getRegister(Alpha::R31, MVT::i64),
-                                    SDValue(LD,0));
-    }
-    break;
-
-  case ISD::AND: {
-    ConstantSDNode* SC = NULL;
-    ConstantSDNode* MC = NULL;
-    if (N->getOperand(0).getOpcode() == ISD::SRL &&
-        (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
-        (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
-      uint64_t sval = SC->getZExtValue();
-      uint64_t mval = MC->getZExtValue();
-      // If the result is a zap, let the autogened stuff handle it.
-      if (get_zapImm(N->getOperand(0), mval))
-        break;
-      // given mask X, and shift S, we want to see if there is any zap in the
-      // mask if we play around with the botton S bits
-      uint64_t dontcare = (~0ULL) >> (64 - sval);
-      uint64_t mask = mval << sval;
-      
-      if (get_zapImm(mask | dontcare))
-        mask = mask | dontcare;
-      
-      if (get_zapImm(mask)) {
-        SDValue Z = 
-          SDValue(CurDAG->getMachineNode(Alpha::ZAPNOTi, dl, MVT::i64,
-                                         N->getOperand(0).getOperand(0),
-                                         getI64Imm(get_zapImm(mask))), 0);
-        return CurDAG->getMachineNode(Alpha::SRLr, dl, MVT::i64, Z, 
-                                      getI64Imm(sval));
-      }
-    }
-    break;
-  }
-
-  }
-
-  return SelectCode(N);
-}
-
-void AlphaDAGToDAGISel::SelectCALL(SDNode *N) {
-  //TODO: add flag stuff to prevent nondeturministic breakage!
-
-  SDValue Chain = N->getOperand(0);
-  SDValue Addr = N->getOperand(1);
-  SDValue InFlag = N->getOperand(N->getNumOperands() - 1);
-  DebugLoc dl = N->getDebugLoc();
-
-   if (Addr.getOpcode() == AlphaISD::GPRelLo) {
-     SDValue GOT = SDValue(getGlobalBaseReg(), 0);
-     Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
-     InFlag = Chain.getValue(1);
-     Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other, 
-                                            MVT::Glue, Addr.getOperand(0),
-                                            Chain, InFlag), 0);
-   } else {
-     Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
-     InFlag = Chain.getValue(1);
-     Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other,
-                                            MVT::Glue, Chain, InFlag), 0);
-   }
-   InFlag = Chain.getValue(1);
-
-  ReplaceUses(SDValue(N, 0), Chain);
-  ReplaceUses(SDValue(N, 1), InFlag);
-}
-
-
-/// createAlphaISelDag - This pass converts a legalized DAG into a 
-/// Alpha-specific DAG, ready for instruction scheduling.
-///
-FunctionPass *llvm::createAlphaISelDag(AlphaTargetMachine &TM) {
-  return new AlphaDAGToDAGISel(TM);
-}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
deleted file mode 100644
index 3057eb8c57fb..000000000000
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ /dev/null
@@ -1,962 +0,0 @@
-//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the AlphaISelLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AlphaISelLowering.h"
-#include "AlphaTargetMachine.h"
-#include "AlphaMachineFunctionInfo.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Type.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-/// AddLiveIn - This helper function adds the specified physical register to the
-/// MachineFunction as a live in value.  It also creates a corresponding virtual
-/// register for it.
-static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
-                          TargetRegisterClass *RC) {
-  assert(RC->contains(PReg) && "Not the correct regclass!");
-  unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
-  MF.getRegInfo().addLiveIn(PReg, VReg);
-  return VReg;
-}
-
-AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
-  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
-  // Set up the TargetLowering object.
-  //I am having problems with shr n i8 1
-  setBooleanContents(ZeroOrOneBooleanContent);
-  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
-
-  addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
-  addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
-  addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
-
-  // We want to custom lower some of our intrinsics.
-  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-
-  setLoadExtAction(ISD::EXTLOAD, MVT::i1,  Promote);
-  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
-
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
-
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i8,  Expand);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
-
-  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
-  //  setOperationAction(ISD::BRIND,        MVT::Other,   Expand);
-  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
-  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
-
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
-  setOperationAction(ISD::FREM, MVT::f32, Expand);
-  setOperationAction(ISD::FREM, MVT::f64, Expand);
-
-  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
-  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
-  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
-  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
-
-  if (!TM.getSubtarget<AlphaSubtarget>().hasCT()) {
-    setOperationAction(ISD::CTPOP    , MVT::i64  , Expand);
-    setOperationAction(ISD::CTTZ     , MVT::i64  , Expand);
-    setOperationAction(ISD::CTLZ     , MVT::i64  , Expand);
-  }
-  setOperationAction(ISD::BSWAP    , MVT::i64, Expand);
-  setOperationAction(ISD::ROTL     , MVT::i64, Expand);
-  setOperationAction(ISD::ROTR     , MVT::i64, Expand);
-
-  setOperationAction(ISD::SREM     , MVT::i64, Custom);
-  setOperationAction(ISD::UREM     , MVT::i64, Custom);
-  setOperationAction(ISD::SDIV     , MVT::i64, Custom);
-  setOperationAction(ISD::UDIV     , MVT::i64, Custom);
-
-  setOperationAction(ISD::ADDC     , MVT::i64, Expand);
-  setOperationAction(ISD::ADDE     , MVT::i64, Expand);
-  setOperationAction(ISD::SUBC     , MVT::i64, Expand);
-  setOperationAction(ISD::SUBE     , MVT::i64, Expand);
-
-  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-
-  setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
-  setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
-  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
-
-  // We don't support sin/cos/sqrt/pow
-  setOperationAction(ISD::FSIN , MVT::f64, Expand);
-  setOperationAction(ISD::FCOS , MVT::f64, Expand);
-  setOperationAction(ISD::FSIN , MVT::f32, Expand);
-  setOperationAction(ISD::FCOS , MVT::f32, Expand);
-
-  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
-  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
-
-  setOperationAction(ISD::FPOW , MVT::f32, Expand);
-  setOperationAction(ISD::FPOW , MVT::f64, Expand);
-
-  setOperationAction(ISD::FMA, MVT::f64, Expand);
-  setOperationAction(ISD::FMA, MVT::f32, Expand);
-
-  setOperationAction(ISD::SETCC, MVT::f32, Promote);
-
-  setOperationAction(ISD::BITCAST, MVT::f32, Promote);
-
-  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
-
-  // Not implemented yet.
-  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
-  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
-
-  // We want to legalize GlobalAddress and ConstantPool and
-  // ExternalSymbols nodes into the appropriate instructions to
-  // materialize the address.
-  setOperationAction(ISD::GlobalAddress,  MVT::i64, Custom);
-  setOperationAction(ISD::ConstantPool,   MVT::i64, Custom);
-  setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
-  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
-
-  setOperationAction(ISD::VASTART, MVT::Other, Custom);
-  setOperationAction(ISD::VAEND,   MVT::Other, Expand);
-  setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
-  setOperationAction(ISD::VAARG,   MVT::Other, Custom);
-  setOperationAction(ISD::VAARG,   MVT::i32,   Custom);
-
-  setOperationAction(ISD::JumpTable, MVT::i64, Custom);
-  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
-
-  setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
-  setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
-
-  setStackPointerRegisterToSaveRestore(Alpha::R30);
-
-  setJumpBufSize(272);
-  setJumpBufAlignment(16);
-
-  setMinFunctionAlignment(4);
-
-  setInsertFencesForAtomic(true);
-
-  computeRegisterProperties();
-}
-
-EVT AlphaTargetLowering::getSetCCResultType(EVT VT) const {
-  return MVT::i64;
-}
-
-const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return 0;
-  case AlphaISD::CVTQT_: return "Alpha::CVTQT_";
-  case AlphaISD::CVTQS_: return "Alpha::CVTQS_";
-  case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_";
-  case AlphaISD::GPRelHi: return "Alpha::GPRelHi";
-  case AlphaISD::GPRelLo: return "Alpha::GPRelLo";
-  case AlphaISD::RelLit: return "Alpha::RelLit";
-  case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr";
-  case AlphaISD::CALL:   return "Alpha::CALL";
-  case AlphaISD::DivCall: return "Alpha::DivCall";
-  case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG";
-  case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I";
-  case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F";
-  }
-}
-
-static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
-  EVT PtrVT = Op.getValueType();
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-
-  SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, JTI,
-                             DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
-  SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi);
-  return Lo;
-}
-
-//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/
-//AA-PY8AC-TET1_html/callCH3.html#BLOCK21
-
-//For now, just use variable size stack frame format
-
-//In a standard call, the first six items are passed in registers $16
-//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details
-//of argument-to-register correspondence.) The remaining items are
-//collected in a memory argument list that is a naturally aligned
-//array of quadwords. In a standard call, this list, if present, must
-//be passed at 0(SP).
-//7 ... n         0(SP) ... (n-7)*8(SP)
-
-// //#define FP    $15
-// //#define RA    $26
-// //#define PV    $27
-// //#define GP    $29
-// //#define SP    $30
-
-#include "AlphaGenCallingConv.inc"
-
-SDValue
-AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
-                               CallingConv::ID CallConv, bool isVarArg,
-                               bool &isTailCall,
-                               const SmallVectorImpl<ISD::OutputArg> &Outs,
-                               const SmallVectorImpl<SDValue> &OutVals,
-                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                               DebugLoc dl, SelectionDAG &DAG,
-                               SmallVectorImpl<SDValue> &InVals) const {
-  // Alpha target does not yet support tail call optimization.
-  isTailCall = false;
-
-  // Analyze operands of the call, assigning locations to each operand.
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 getTargetMachine(), ArgLocs, *DAG.getContext());
-
-  CCInfo.AnalyzeCallOperands(Outs, CC_Alpha);
-
-    // Get a count of how many bytes are to be pushed on the stack.
-  unsigned NumBytes = CCInfo.getNextStackOffset();
-
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
-                                                      getPointerTy(), true));
-
-  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
-  SmallVector<SDValue, 12> MemOpChains;
-  SDValue StackPtr;
-
-  // Walk the register/memloc assignments, inserting copies/loads.
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
-
-    SDValue Arg = OutVals[i];
-
-    // Promote the value if needed.
-    switch (VA.getLocInfo()) {
-      default: assert(0 && "Unknown loc info!");
-      case CCValAssign::Full: break;
-      case CCValAssign::SExt:
-        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
-        break;
-      case CCValAssign::ZExt:
-        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
-        break;
-      case CCValAssign::AExt:
-        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
-        break;
-    }
-
-    // Arguments that can be passed on register must be kept at RegsToPass
-    // vector
-    if (VA.isRegLoc()) {
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-    } else {
-      assert(VA.isMemLoc());
-
-      if (StackPtr.getNode() == 0)
-        StackPtr = DAG.getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64);
-
-      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                                   StackPtr,
-                                   DAG.getIntPtrConstant(VA.getLocMemOffset()));
-
-      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         MachinePointerInfo(),false, false, 0));
-    }
-  }
-
-  // Transform all store nodes into one single node because all store nodes are
-  // independent of each other.
-  if (!MemOpChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        &MemOpChains[0], MemOpChains.size());
-
-  // Build a sequence of copy-to-reg nodes chained together with token chain and
-  // flag operands which copy the outgoing args into registers.  The InFlag in
-  // necessary since all emitted instructions must be stuck together.
-  SDValue InFlag;
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
-                             RegsToPass[i].second, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  // Returns a chain & a flag for retval copy to use.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-  SmallVector<SDValue, 8> Ops;
-  Ops.push_back(Chain);
-  Ops.push_back(Callee);
-
-  // Add argument registers to the end of the list so that they are
-  // known live into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
-
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
-
-  Chain = DAG.getNode(AlphaISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
-
-  // Create the CALLSEQ_END node.
-  Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getConstant(NumBytes, getPointerTy(), true),
-                             DAG.getConstant(0, getPointerTy(), true),
-                             InFlag);
-  InFlag = Chain.getValue(1);
-
-  // Handle result values, copying them out of physregs into vregs that we
-  // return.
-  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
-                         Ins, dl, DAG, InVals);
-}
-
-/// LowerCallResult - Lower the result values of a call into the
-/// appropriate copies out of appropriate physical registers.
-///
-SDValue
-AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
-                                     CallingConv::ID CallConv, bool isVarArg,
-                                     const SmallVectorImpl<ISD::InputArg> &Ins,
-                                     DebugLoc dl, SelectionDAG &DAG,
-                                     SmallVectorImpl<SDValue> &InVals) const {
-
-  // Assign locations to each value returned by this call.
-  SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 getTargetMachine(), RVLocs, *DAG.getContext());
-
-  CCInfo.AnalyzeCallResult(Ins, RetCC_Alpha);
-
-  // Copy all of the result registers out of their specified physreg.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    CCValAssign &VA = RVLocs[i];
-
-    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
-                               VA.getLocVT(), InFlag).getValue(1);
-    SDValue RetValue = Chain.getValue(0);
-    InFlag = Chain.getValue(2);
-
-    // If this is an 8/16/32-bit value, it is really passed promoted to 64
-    // bits. Insert an assert[sz]ext to capture this, then truncate to the
-    // right size.
-    if (VA.getLocInfo() == CCValAssign::SExt)
-      RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
-                             DAG.getValueType(VA.getValVT()));
-    else if (VA.getLocInfo() == CCValAssign::ZExt)
-      RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
-                             DAG.getValueType(VA.getValVT()));
-
-    if (VA.getLocInfo() != CCValAssign::Full)
-      RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
-
-    InVals.push_back(RetValue);
-  }
-
-  return Chain;
-}
-
-SDValue
-AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
-                                          CallingConv::ID CallConv, bool isVarArg,
-                                          const SmallVectorImpl<ISD::InputArg>
-                                            &Ins,
-                                          DebugLoc dl, SelectionDAG &DAG,
-                                          SmallVectorImpl<SDValue> &InVals)
-                                            const {
-
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
-
-  unsigned args_int[] = {
-    Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
-  unsigned args_float[] = {
-    Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
-
-  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
-    SDValue argt;
-    EVT ObjectVT = Ins[ArgNo].VT;
-    SDValue ArgVal;
-
-    if (ArgNo  < 6) {
-      switch (ObjectVT.getSimpleVT().SimpleTy) {
-      default:
-        assert(false && "Invalid value type!");
-      case MVT::f64:
-        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
-                                      &Alpha::F8RCRegClass);
-        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
-        break;
-      case MVT::f32:
-        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
-                                      &Alpha::F4RCRegClass);
-        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
-        break;
-      case MVT::i64:
-        args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo],
-                                    &Alpha::GPRCRegClass);
-        ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64);
-        break;
-      }
-    } else { //more args
-      // Create the frame index object for this incoming parameter...
-      int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true);
-
-      // Create the SelectionDAG nodes corresponding to a load
-      //from this parameter
-      SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
-      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
-                           false, false, 0);
-    }
-    InVals.push_back(ArgVal);
-  }
-
-  // If the functions takes variable number of arguments, copy all regs to stack
-  if (isVarArg) {
-    FuncInfo->setVarArgsOffset(Ins.size() * 8);
-    std::vector<SDValue> LS;
-    for (int i = 0; i < 6; ++i) {
-      if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
-        args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
-      SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
-      int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
-      if (i == 0) FuncInfo->setVarArgsBase(FI);
-      SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
-                                false, false, 0));
-
-      if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
-        args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
-      argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
-      FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
-      SDFI = DAG.getFrameIndex(FI, MVT::i64);
-      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
-                                false, false, 0));
-    }
-
-    //Set up a token factor with all the stack traffic
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size());
-  }
-
-  return Chain;
-}
-
-SDValue
-AlphaTargetLowering::LowerReturn(SDValue Chain,
-                                 CallingConv::ID CallConv, bool isVarArg,
-                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 const SmallVectorImpl<SDValue> &OutVals,
-                                 DebugLoc dl, SelectionDAG &DAG) const {
-
-  SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
-                                  DAG.getNode(AlphaISD::GlobalRetAddr,
-                                              DebugLoc(), MVT::i64),
-                                  SDValue());
-  switch (Outs.size()) {
-  default:
-    llvm_unreachable("Do not know how to return this many arguments!");
-  case 0:
-    break;
-    //return SDValue(); // ret void is legal
-  case 1: {
-    EVT ArgVT = Outs[0].VT;
-    unsigned ArgReg;
-    if (ArgVT.isInteger())
-      ArgReg = Alpha::R0;
-    else {
-      assert(ArgVT.isFloatingPoint());
-      ArgReg = Alpha::F0;
-    }
-    Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
-                            OutVals[0], Copy.getValue(1));
-    if (DAG.getMachineFunction().getRegInfo().liveout_empty())
-      DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
-    break;
-  }
-  case 2: {
-    EVT ArgVT = Outs[0].VT;
-    unsigned ArgReg1, ArgReg2;
-    if (ArgVT.isInteger()) {
-      ArgReg1 = Alpha::R0;
-      ArgReg2 = Alpha::R1;
-    } else {
-      assert(ArgVT.isFloatingPoint());
-      ArgReg1 = Alpha::F0;
-      ArgReg2 = Alpha::F1;
-    }
-    Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
-                            OutVals[0], Copy.getValue(1));
-    if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
-                  DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
-        == DAG.getMachineFunction().getRegInfo().liveout_end())
-      DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
-    Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
-                            OutVals[1], Copy.getValue(1));
-    if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
-                   DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
-        == DAG.getMachineFunction().getRegInfo().liveout_end())
-      DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2);
-    break;
-  }
-  }
-  return DAG.getNode(AlphaISD::RET_FLAG, dl,
-                     MVT::Other, Copy, Copy.getValue(1));
-}
-
-void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
-                                     SDValue &DataPtr,
-                                     SelectionDAG &DAG) const {
-  Chain = N->getOperand(0);
-  SDValue VAListP = N->getOperand(1);
-  const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
-  DebugLoc dl = N->getDebugLoc();
-
-  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP,
-                             MachinePointerInfo(VAListS),
-                             false, false, 0);
-  SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
-                              DAG.getConstant(8, MVT::i64));
-  SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
-                                  Tmp, MachinePointerInfo(),
-                                  MVT::i32, false, false, 0);
-  DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
-  if (N->getValueType(0).isFloatingPoint())
-  {
-    //if fp && Offset < 6*8, then subtract 6*8 from DataPtr
-    SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr,
-                                      DAG.getConstant(8*6, MVT::i64));
-    SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset,
-                                DAG.getConstant(8*6, MVT::i64), ISD::SETLT);
-    DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr);
-  }
-
-  SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
-                                    DAG.getConstant(8, MVT::i64));
-  Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
-                            MachinePointerInfo(),
-                            MVT::i32, false, false, 0);
-}
-
-/// LowerOperation - Provide custom lowering hooks for some operations.
-///
-SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
-                                            SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
-  switch (Op.getOpcode()) {
-  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
-  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
-
-  case ISD::INTRINSIC_WO_CHAIN: {
-    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-    switch (IntNo) {
-    default: break;    // Don't custom lower most intrinsics.
-    case Intrinsic::alpha_umulh:
-      return DAG.getNode(ISD::MULHU, dl, MVT::i64,
-                         Op.getOperand(1), Op.getOperand(2));
-    }
-  }
-
-  case ISD::SRL_PARTS: {
-    SDValue ShOpLo = Op.getOperand(0);
-    SDValue ShOpHi = Op.getOperand(1);
-    SDValue ShAmt  = Op.getOperand(2);
-    SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
-                             DAG.getConstant(64, MVT::i64), ShAmt);
-    SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
-                                DAG.getConstant(0, MVT::i64), ISD::SETLE);
-    // if 64 - shAmt <= 0
-    SDValue Hi_Neg = DAG.getConstant(0, MVT::i64);
-    SDValue ShAmt_Neg = DAG.getNode(ISD::SUB, dl, MVT::i64,
-                                    DAG.getConstant(0, MVT::i64), bm);
-    SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg);
-    // else
-    SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
-    SDValue Hi_Pos =  DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt);
-    SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
-    Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
-    // Merge
-    SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
-    SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
-    SDValue Ops[2] = { Lo, Hi };
-    return DAG.getMergeValues(Ops, 2, dl);
-  }
-    //  case ISD::SRA_PARTS:
-
-    //  case ISD::SHL_PARTS:
-
-
-  case ISD::SINT_TO_FP: {
-    assert(Op.getOperand(0).getValueType() == MVT::i64 &&
-           "Unhandled SINT_TO_FP type in custom expander!");
-    SDValue LD;
-    bool isDouble = Op.getValueType() == MVT::f64;
-    LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
-    SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl,
-                               isDouble?MVT::f64:MVT::f32, LD);
-    return FP;
-  }
-  case ISD::FP_TO_SINT: {
-    bool isDouble = Op.getOperand(0).getValueType() == MVT::f64;
-    SDValue src = Op.getOperand(0);
-
-    if (!isDouble) //Promote
-      src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
-
-    src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
-
-    return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src);
-  }
-  case ISD::ConstantPool: {
-    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-    const Constant *C = CP->getConstVal();
-    SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
-    // FIXME there isn't really any debug info here
-
-    SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, CPI,
-                               DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
-    SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi);
-    return Lo;
-  }
-  case ISD::GlobalTLSAddress:
-    llvm_unreachable("TLS not implemented for Alpha.");
-  case ISD::GlobalAddress: {
-    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
-    const GlobalValue *GV = GSDN->getGlobal();
-    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
-                                            GSDN->getOffset());
-    // FIXME there isn't really any debug info here
-
-    //    if (!GV->hasWeakLinkage() && !GV->isDeclaration()
-    //        && !GV->hasLinkOnceLinkage()) {
-    if (GV->hasLocalLinkage()) {
-      SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, GA,
-                                DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
-      SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi);
-      return Lo;
-    } else
-      return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA,
-                         DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
-  }
-  case ISD::ExternalSymbol: {
-    return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64,
-                       DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
-                                                   ->getSymbol(), MVT::i64),
-                       DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
-  }
-
-  case ISD::UREM:
-  case ISD::SREM:
-    //Expand only on constant case
-    if (Op.getOperand(1).getOpcode() == ISD::Constant) {
-      EVT VT = Op.getNode()->getValueType(0);
-      SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ?
-        BuildUDIV(Op.getNode(), DAG, NULL) :
-        BuildSDIV(Op.getNode(), DAG, NULL);
-      Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1));
-      Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1);
-      return Tmp1;
-    }
-    //fall through
-  case ISD::SDIV:
-  case ISD::UDIV:
-    if (Op.getValueType().isInteger()) {
-      if (Op.getOperand(1).getOpcode() == ISD::Constant)
-        return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL)
-          : BuildUDIV(Op.getNode(), DAG, NULL);
-      const char* opstr = 0;
-      switch (Op.getOpcode()) {
-      case ISD::UREM: opstr = "__remqu"; break;
-      case ISD::SREM: opstr = "__remq";  break;
-      case ISD::UDIV: opstr = "__divqu"; break;
-      case ISD::SDIV: opstr = "__divq";  break;
-      }
-      SDValue Tmp1 = Op.getOperand(0),
-        Tmp2 = Op.getOperand(1),
-        Addr = DAG.getExternalSymbol(opstr, MVT::i64);
-      return DAG.getNode(AlphaISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2);
-    }
-    break;
-
-  case ISD::VAARG: {
-    SDValue Chain, DataPtr;
-    LowerVAARG(Op.getNode(), Chain, DataPtr, DAG);
-
-    SDValue Result;
-    if (Op.getValueType() == MVT::i32)
-      Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
-                              MachinePointerInfo(), MVT::i32, false, false, 0);
-    else
-      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
-                           MachinePointerInfo(),
-                           false, false, 0);
-    return Result;
-  }
-  case ISD::VACOPY: {
-    SDValue Chain = Op.getOperand(0);
-    SDValue DestP = Op.getOperand(1);
-    SDValue SrcP = Op.getOperand(2);
-    const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
-    const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
-
-    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP,
-                              MachinePointerInfo(SrcS),
-                              false, false, 0);
-    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP,
-                                  MachinePointerInfo(DestS),
-                                  false, false, 0);
-    SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
-                               DAG.getConstant(8, MVT::i64));
-    Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
-                         NP, MachinePointerInfo(), MVT::i32, false, false, 0);
-    SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
-                                DAG.getConstant(8, MVT::i64));
-    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD,
-                             MachinePointerInfo(), MVT::i32,
-                             false, false, 0);
-  }
-  case ISD::VASTART: {
-    MachineFunction &MF = DAG.getMachineFunction();
-    AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
-
-    SDValue Chain = Op.getOperand(0);
-    SDValue VAListP = Op.getOperand(1);
-    const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
-
-    // vastart stores the address of the VarArgsBase and VarArgsOffset
-    SDValue FR  = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
-    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP,
-                               MachinePointerInfo(VAListS), false, false, 0);
-    SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
-                                DAG.getConstant(8, MVT::i64));
-    return DAG.getTruncStore(S1, dl,
-                             DAG.getConstant(FuncInfo->getVarArgsOffset(),
-                                             MVT::i64),
-                             SA2, MachinePointerInfo(),
-                             MVT::i32, false, false, 0);
-  }
-  case ISD::RETURNADDR:
-    return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64);
-      //FIXME: implement
-  case ISD::FRAMEADDR:          break;
-  }
-
-  return SDValue();
-}
-
-void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
-                                             SmallVectorImpl<SDValue>&Results,
-                                             SelectionDAG &DAG) const {
-  DebugLoc dl = N->getDebugLoc();
-  assert(N->getValueType(0) == MVT::i32 &&
-         N->getOpcode() == ISD::VAARG &&
-         "Unknown node to custom promote!");
-
-  SDValue Chain, DataPtr;
-  LowerVAARG(N, Chain, DataPtr, DAG);
-  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr,
-                            MachinePointerInfo(),
-                            false, false, 0);
-  Results.push_back(Res);
-  Results.push_back(SDValue(Res.getNode(), 1));
-}
-
-
-//Inline Asm
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-AlphaTargetLowering::ConstraintType
-AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
-  if (Constraint.size() == 1) {
-    switch (Constraint[0]) {
-    default: break;
-    case 'f':
-    case 'r':
-      return C_RegisterClass;
-    }
-  }
-  return TargetLowering::getConstraintType(Constraint);
-}
-
-/// Examine constraint type and operand type and determine a weight value.
-/// This object must already have been set up with the operand type
-/// and the current alternative constraint selected.
-TargetLowering::ConstraintWeight
-AlphaTargetLowering::getSingleConstraintMatchWeight(
-    AsmOperandInfo &info, const char *constraint) const {
-  ConstraintWeight weight = CW_Invalid;
-  Value *CallOperandVal = info.CallOperandVal;
-    // If we don't have a value, we can't do a match,
-    // but allow it at the lowest weight.
-  if (CallOperandVal == NULL)
-    return CW_Default;
-  // Look at the constraint type.
-  switch (*constraint) {
-  default:
-    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
-    break;
-  case 'f':
-    weight = CW_Register;
-    break;
-  }
-  return weight;
-}
-
-/// Given a register class constraint, like 'r', if this corresponds directly
-/// to an LLVM register class, return a register of 0 and the register class
-/// pointer.
-std::pair<unsigned, const TargetRegisterClass*> AlphaTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
-{
-  if (Constraint.size() == 1) {
-    switch (Constraint[0]) {
-    case 'r':
-      return std::make_pair(0U, Alpha::GPRCRegisterClass);
-    case 'f':
-      return VT == MVT::f64 ? std::make_pair(0U, Alpha::F8RCRegisterClass) :
-	std::make_pair(0U, Alpha::F4RCRegisterClass);
-    }
-  }
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
-
-//===----------------------------------------------------------------------===//
-//  Other Lowering Code
-//===----------------------------------------------------------------------===//
-
-MachineBasicBlock *
-AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                 MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  assert((MI->getOpcode() == Alpha::CAS32 ||
-          MI->getOpcode() == Alpha::CAS64 ||
-          MI->getOpcode() == Alpha::LAS32 ||
-          MI->getOpcode() == Alpha::LAS64 ||
-          MI->getOpcode() == Alpha::SWAP32 ||
-          MI->getOpcode() == Alpha::SWAP64) &&
-         "Unexpected instr type to insert");
-
-  bool is32 = MI->getOpcode() == Alpha::CAS32 ||
-    MI->getOpcode() == Alpha::LAS32 ||
-    MI->getOpcode() == Alpha::SWAP32;
-
-  //Load locked store conditional for atomic ops take on the same form
-  //start:
-  //ll
-  //do stuff (maybe branch to exit)
-  //sc
-  //test sc and maybe branck to start
-  //exit:
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  DebugLoc dl = MI->getDebugLoc();
-  MachineFunction::iterator It = BB;
-  ++It;
-
-  MachineBasicBlock *thisMBB = BB;
-  MachineFunction *F = BB->getParent();
-  MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
-
-  sinkMBB->splice(sinkMBB->begin(), thisMBB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  thisMBB->end());
-  sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
-
-  F->insert(It, llscMBB);
-  F->insert(It, sinkMBB);
-
-  BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB);
-
-  unsigned reg_res = MI->getOperand(0).getReg(),
-    reg_ptr = MI->getOperand(1).getReg(),
-    reg_v2 = MI->getOperand(2).getReg(),
-    reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
-
-  BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L),
-          reg_res).addImm(0).addReg(reg_ptr);
-  switch (MI->getOpcode()) {
-  case Alpha::CAS32:
-  case Alpha::CAS64: {
-    unsigned reg_cmp
-      = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
-    BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp)
-      .addReg(reg_v2).addReg(reg_res);
-    BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
-      .addImm(0).addReg(reg_cmp).addMBB(sinkMBB);
-    BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
-      .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg());
-    break;
-  }
-  case Alpha::LAS32:
-  case Alpha::LAS64: {
-    BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store)
-      .addReg(reg_res).addReg(reg_v2);
-    break;
-  }
-  case Alpha::SWAP32:
-  case Alpha::SWAP64: {
-    BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
-      .addReg(reg_v2).addReg(reg_v2);
-    break;
-  }
-  }
-  BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store)
-    .addReg(reg_store).addImm(0).addReg(reg_ptr);
-  BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
-    .addImm(0).addReg(reg_store).addMBB(llscMBB);
-  BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB);
-
-  thisMBB->addSuccessor(llscMBB);
-  llscMBB->addSuccessor(llscMBB);
-  llscMBB->addSuccessor(sinkMBB);
-  MI->eraseFromParent();   // The pseudo instruction is gone now.
-
-  return sinkMBB;
-}
-
-bool
-AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
-  // The Alpha target isn't yet aware of offsets.
-  return false;
-}
-
-bool AlphaTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-  if (VT != MVT::f32 && VT != MVT::f64)
-    return false;
-  // +0.0   F31
-  // +0.0f  F31
-  // -0.0  -F31
-  // -0.0f -F31
-  return Imm.isZero() || Imm.isNegZero();
-}
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
deleted file mode 100644
index 80f8efaea5d2..000000000000
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ /dev/null
@@ -1,142 +0,0 @@
-//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that Alpha uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
-#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
-
-#include "llvm/ADT/VectorExtras.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "Alpha.h"
-
-namespace llvm {
-
-  namespace AlphaISD {
-    enum NodeType {
-      // Start the numbering where the builting ops and target ops leave off.
-      FIRST_NUMBER = ISD::BUILTIN_OP_END,
-      //These corrospond to the identical Instruction
-      CVTQT_, CVTQS_, CVTTQ_,
-
-      /// GPRelHi/GPRelLo - These represent the high and low 16-bit
-      /// parts of a global address respectively.
-      GPRelHi, GPRelLo,
-
-      /// RetLit - Literal Relocation of a Global
-      RelLit,
-
-      /// GlobalRetAddr - used to restore the return address
-      GlobalRetAddr,
-
-      /// CALL - Normal call.
-      CALL,
-
-      /// DIVCALL - used for special library calls for div and rem
-      DivCall,
-
-      /// return flag operand
-      RET_FLAG,
-
-      /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
-      /// corresponds to the COND_BRANCH pseudo instruction.
-      /// *PRC is the input register to compare to zero,
-      /// OPC is the branch opcode to use (e.g. Alpha::BEQ),
-      /// DESTBB is the destination block to branch to, and INFLAG is
-      /// an optional input flag argument.
-      COND_BRANCH_I, COND_BRANCH_F
-
-    };
-  }
-
-  class AlphaTargetLowering : public TargetLowering {
-  public:
-    explicit AlphaTargetLowering(TargetMachine &TM);
-
-    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
-
-    /// getSetCCResultType - Get the SETCC result ValueType
-    virtual EVT getSetCCResultType(EVT VT) const;
-
-    /// LowerOperation - Provide custom lowering hooks for some operations.
-    ///
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-
-    /// ReplaceNodeResults - Replace the results of node with an illegal result
-    /// type with new values built out of custom code.
-    ///
-    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                                    SelectionDAG &DAG) const;
-
-    // Friendly names for dumps
-    const char *getTargetNodeName(unsigned Opcode) const;
-
-    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
-                            CallingConv::ID CallConv, bool isVarArg,
-                            const SmallVectorImpl<ISD::InputArg> &Ins,
-                            DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals) const;
-
-    ConstraintType getConstraintType(const std::string &Constraint) const;
-
-    /// Examine constraint string and operand type and determine a weight value.
-    /// The operand object must already have been set up with the operand type.
-    ConstraintWeight getSingleConstraintMatchWeight(
-      AsmOperandInfo &info, const char *constraint) const;
-
-    std::pair<unsigned, const TargetRegisterClass*>
-    getRegForInlineAsmConstraint(const std::string &Constraint,
-				 EVT VT) const;
-
-    MachineBasicBlock *
-      EmitInstrWithCustomInserter(MachineInstr *MI,
-                                  MachineBasicBlock *BB) const;
-
-    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
-
-    /// isFPImmLegal - Returns true if the target can instruction select the
-    /// specified FP immediate natively. If false, the legalizer will
-    /// materialize the FP immediate as a load from a constant pool.
-    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
-
-  private:
-    // Helpers for custom lowering.
-    void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
-                    SelectionDAG &DAG) const;
-
-    virtual SDValue
-      LowerFormalArguments(SDValue Chain,
-                           CallingConv::ID CallConv, bool isVarArg,
-                           const SmallVectorImpl<ISD::InputArg> &Ins,
-                           DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals) const;
-
-    virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
-                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                const SmallVectorImpl<SDValue> &OutVals,
-                const SmallVectorImpl<ISD::InputArg> &Ins,
-                DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals) const;
-
-    virtual SDValue
-      LowerReturn(SDValue Chain,
-                  CallingConv::ID CallConv, bool isVarArg,
-                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  const SmallVectorImpl<SDValue> &OutVals,
-                  DebugLoc dl, SelectionDAG &DAG) const;
-  };
-}
-
-#endif   // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td
deleted file mode 100644
index 6f4ebf279643..000000000000
--- a/lib/Target/Alpha/AlphaInstrFormats.td
+++ /dev/null
@@ -1,268 +0,0 @@
-//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-//3.3:
-//Memory
-//Branch
-//Operate
-//Floating-point
-//PALcode
-
-def u8imm   : Operand<i64>;
-def s14imm  : Operand<i64>;
-def s16imm  : Operand<i64>;
-def s21imm  : Operand<i64>;
-def s64imm  : Operand<i64>;
-def u64imm  : Operand<i64>;
-
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-// Alpha instruction baseline
-class InstAlpha<bits<6> op, string asmstr, InstrItinClass itin> : Instruction {
-  field bits<32> Inst;
-  let Namespace = "Alpha";
-  let AsmString = asmstr;
-  let Inst{31-26} = op;
-  let Itinerary = itin;
-}
-
-
-//3.3.1
-class MForm<bits<6> opcode, bit load, string asmstr, list<dag> pattern, InstrItinClass itin> 
-        : InstAlpha<opcode, asmstr, itin> {
-  let Pattern = pattern;
-  let canFoldAsLoad = load;
-  let Defs = [R28]; //We may use this for frame index calculations, so reserve it here
-
-  bits<5> Ra;
-  bits<16> disp;
-  bits<5> Rb;
-
-  let Inst{25-21} = Ra;
-  let Inst{20-16} = Rb;
-  let Inst{15-0} = disp;
-}
-class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> 
-        : InstAlpha<opcode, asmstr, itin> {    
-  bits<5> Ra;
-
-  let OutOperandList = (outs GPRC:$RA);
-  let InOperandList = (ins);
-  let Inst{25-21} = Ra;
-  let Inst{20-16} = 0;
-  let Inst{15-0} = fc;
-}
-class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> 
-        : InstAlpha<opcode, asmstr, itin> {    
-  let OutOperandList = (outs);
-  let InOperandList = (ins);
-  let Inst{25-21} = 0;
-  let Inst{20-16} = 0;
-  let Inst{15-0} = fc;
-}
-
-class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
-    : InstAlpha<opcode, asmstr, itin> {
-  bits<5> Ra;
-  bits<5> Rb;
-  bits<14> disp;
-
-  let OutOperandList = (outs);
-  let InOperandList = OL;
-
-  let Inst{25-21} = Ra;
-  let Inst{20-16} = Rb;
-  let Inst{15-14} = TB;
-  let Inst{13-0} = disp;
-}
-class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> pattern, InstrItinClass itin>
-    : InstAlpha<opcode, asmstr, itin> {
-  let Pattern=pattern;
-  bits<5> Ra;
-  bits<5> Rb;
-  bits<14> disp;
-
-  let OutOperandList = (outs);
-  let InOperandList = OL;
-
-  let Inst{25-21} = Ra;
-  let Inst{20-16} = Rb;
-  let Inst{15-14} = TB;
-  let Inst{13-0} = disp;
-}
-
-//3.3.2
-def target : Operand<OtherVT> {}
-
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
-   : InstAlpha<opcode, asmstr, itin> {
-  let OutOperandList = (outs);
-  let InOperandList = OL;
-  bits<64> Opc; //dummy
-  bits<5> Ra;
-  bits<21> disp;
-
-  let Inst{25-21} = Ra;
-  let Inst{20-0} = disp;
-}
-}
-
-let isBranch = 1, isTerminator = 1 in
-class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin> 
-    : InstAlpha<opcode, asmstr, itin> {
-  let Pattern = pattern;
-  let OutOperandList = (outs);
-  let InOperandList = (ins target:$DISP);
-  bits<5> Ra;
-  bits<21> disp;
-
-  let Inst{25-21} = Ra;
-  let Inst{20-0} = disp;
-}
-
-//3.3.3
-class OForm<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
-    : InstAlpha<opcode, asmstr, itin> {
-  let Pattern = pattern;
-  let OutOperandList = (outs GPRC:$RC);
-  let InOperandList = (ins GPRC:$RA, GPRC:$RB);
-
-  bits<5> Rc;
-  bits<5> Ra;
-  bits<5> Rb;
-  bits<7> Function = fun;
-
-  let Inst{25-21} = Ra;
-  let Inst{20-16} = Rb;
-  let Inst{15-13} = 0;
-  let Inst{12} = 0;
-  let Inst{11-5} = Function;
-  let Inst{4-0} = Rc;
-}
-
-class OForm2<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
-    : InstAlpha<opcode, asmstr, itin> {
-  let Pattern = pattern;
-  let OutOperandList = (outs GPRC:$RC);
-  let InOperandList = (ins GPRC:$RB);
-
-  bits<5> Rc;
-  bits<5> Rb;
-  bits<7> Function = fun;
-
-  let Inst{25-21} = 31;
-  let Inst{20-16} = Rb;
-  let Inst{15-13} = 0;
-  let Inst{12} = 0;
-  let Inst{11-5} = Function;
-  let Inst{4-0} = Rc;
-}
-
-class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
-    : InstAlpha<opcode, asmstr, itin> {
-  let Pattern = pattern;
-  let OutOperandList = (outs GPRC:$RDEST);
-  let InOperandList = (ins GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE);
-  let Constraints = "$RFALSE = $RDEST";
-  let DisableEncoding = "$RFALSE";
-
-  bits<5> Rc;
-  bits<5> Ra;
-  bits<5> Rb;
-  bits<7> Function = fun;
-
-//  let Constraints = "$RFALSE = $RDEST";
-  let Inst{25-21} = Ra;
-  let Inst{20-16} = Rb;
-  let Inst{15-13} = 0;
-  let Inst{12} = 0;
-  let Inst{11-5} = Function;
-  let Inst{4-0} = Rc;
-}
-
-
-class OFormL<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
-    : InstAlpha<opcode, asmstr, itin> {
-  let Pattern = pattern;
-  let OutOperandList = (outs GPRC:$RC);
-  let InOperandList = (ins GPRC:$RA, u8imm:$L);
-
-  bits<5> Rc;
-  bits<5> Ra;
-  bits<8> LIT;
-  bits<7> Function = fun;
-
-  let Inst{25-21} = Ra;
-  let Inst{20-13} = LIT;
-  let Inst{12} = 1;
-  let Inst{11-5} = Function;
-  let Inst{4-0} = Rc;
-}
-
-class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
-    : InstAlpha<opcode, asmstr, itin> {
-  let Pattern = pattern;
-  let OutOperandList = (outs GPRC:$RDEST);
-  let InOperandList = (ins GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE);
-  let Constraints = "$RFALSE = $RDEST";
-  let DisableEncoding = "$RFALSE";
-
-  bits<5> Rc;
-  bits<5> Ra;
-  bits<8> LIT;
-  bits<7> Function = fun;
-
-//  let Constraints = "$RFALSE = $RDEST";
-  let Inst{25-21} = Ra;
-  let Inst{20-13} = LIT;
-  let Inst{12} = 1;
-  let Inst{11-5} = Function;
-  let Inst{4-0} = Rc;
-}
-
-//3.3.4
-class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
-    : InstAlpha<opcode, asmstr, itin> {
-  let Pattern = pattern;
-
-  bits<5> Fc;
-  bits<5> Fa;
-  bits<5> Fb;
-  bits<11> Function = fun;
-
-  let Inst{25-21} = Fa;
-  let Inst{20-16} = Fb;
-  let Inst{15-5} = Function;
-  let Inst{4-0} = Fc;
-}
-
-//3.3.5
-class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
-    : InstAlpha<opcode, asmstr, itin> {
-  let OutOperandList = (outs);
-  let InOperandList = OL;
-  bits<26> Function;
-
-  let Inst{25-0} = Function;
-}
-
-
-// Pseudo instructions.
-class PseudoInstAlpha<dag OOL, dag IOL, string nm, list<dag> pattern, InstrItinClass itin> 
-    : InstAlpha<0, nm, itin>  {
-  let OutOperandList = OOL;
-  let InOperandList = IOL;
-  let Pattern = pattern;
-
-}
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
deleted file mode 100644
index 8df2ed75f625..000000000000
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ /dev/null
@@ -1,382 +0,0 @@
-//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Alpha.h"
-#include "AlphaInstrInfo.h"
-#include "AlphaMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
-
-#define GET_INSTRINFO_CTOR
-#include "AlphaGenInstrInfo.inc"
-using namespace llvm;
-
-AlphaInstrInfo::AlphaInstrInfo()
-  : AlphaGenInstrInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
-    RI(*this) {
-}
-
-
-unsigned 
-AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                    int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  case Alpha::LDL:
-  case Alpha::LDQ:
-  case Alpha::LDBU:
-  case Alpha::LDWU:
-  case Alpha::LDS:
-  case Alpha::LDT:
-    if (MI->getOperand(1).isFI()) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  return 0;
-}
-
-unsigned 
-AlphaInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                   int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  case Alpha::STL:
-  case Alpha::STQ:
-  case Alpha::STB:
-  case Alpha::STW:
-  case Alpha::STS:
-  case Alpha::STT:
-    if (MI->getOperand(1).isFI()) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  return 0;
-}
-
-static bool isAlphaIntCondCode(unsigned Opcode) {
-  switch (Opcode) {
-  case Alpha::BEQ: 
-  case Alpha::BNE: 
-  case Alpha::BGE: 
-  case Alpha::BGT: 
-  case Alpha::BLE: 
-  case Alpha::BLT: 
-  case Alpha::BLBC: 
-  case Alpha::BLBS:
-    return true;
-  default:
-    return false;
-  }
-}
-
-unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
-                                      MachineBasicBlock *TBB,
-                                      MachineBasicBlock *FBB,
-                                      const SmallVectorImpl<MachineOperand> &Cond,
-                                      DebugLoc DL) const {
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-  assert((Cond.size() == 2 || Cond.size() == 0) && 
-         "Alpha branch conditions have two components!");
-
-  // One-way branch.
-  if (FBB == 0) {
-    if (Cond.empty())   // Unconditional branch
-      BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB);
-    else                // Conditional branch
-      if (isAlphaIntCondCode(Cond[0].getImm()))
-        BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
-          .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
-      else
-        BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
-          .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
-    return 1;
-  }
-  
-  // Two-way Conditional Branch.
-  if (isAlphaIntCondCode(Cond[0].getImm()))
-    BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
-      .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
-  else
-    BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
-      .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
-  BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB);
-  return 2;
-}
-
-void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MI, DebugLoc DL,
-                                 unsigned DestReg, unsigned SrcReg,
-                                 bool KillSrc) const {
-  if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) {
-    BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
-      .addReg(SrcReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-  } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) {
-    BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg)
-      .addReg(SrcReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-  } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) {
-    BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg)
-      .addReg(SrcReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-  } else {
-    llvm_unreachable("Attempt to copy register that is not GPR or FPR");
-  }
-}
-
-void
-AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MI,
-                                    unsigned SrcReg, bool isKill, int FrameIdx,
-                                    const TargetRegisterClass *RC,
-                                    const TargetRegisterInfo *TRI) const {
-  //cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
-  //     << FrameIdx << "\n";
-  //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  if (RC == Alpha::F4RCRegisterClass)
-    BuildMI(MBB, MI, DL, get(Alpha::STS))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
-  else if (RC == Alpha::F8RCRegisterClass)
-    BuildMI(MBB, MI, DL, get(Alpha::STT))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
-  else if (RC == Alpha::GPRCRegisterClass)
-    BuildMI(MBB, MI, DL, get(Alpha::STQ))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
-  else
-    llvm_unreachable("Unhandled register class");
-}
-
-void
-AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator MI,
-                                        unsigned DestReg, int FrameIdx,
-                                     const TargetRegisterClass *RC,
-                                     const TargetRegisterInfo *TRI) const {
-  //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
-  //     << FrameIdx << "\n";
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  if (RC == Alpha::F4RCRegisterClass)
-    BuildMI(MBB, MI, DL, get(Alpha::LDS), DestReg)
-      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
-  else if (RC == Alpha::F8RCRegisterClass)
-    BuildMI(MBB, MI, DL, get(Alpha::LDT), DestReg)
-      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
-  else if (RC == Alpha::GPRCRegisterClass)
-    BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg)
-      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
-  else
-    llvm_unreachable("Unhandled register class");
-}
-
-static unsigned AlphaRevCondCode(unsigned Opcode) {
-  switch (Opcode) {
-  case Alpha::BEQ: return Alpha::BNE;
-  case Alpha::BNE: return Alpha::BEQ;
-  case Alpha::BGE: return Alpha::BLT;
-  case Alpha::BGT: return Alpha::BLE;
-  case Alpha::BLE: return Alpha::BGT;
-  case Alpha::BLT: return Alpha::BGE;
-  case Alpha::BLBC: return Alpha::BLBS;
-  case Alpha::BLBS: return Alpha::BLBC;
-  case Alpha::FBEQ: return Alpha::FBNE;
-  case Alpha::FBNE: return Alpha::FBEQ;
-  case Alpha::FBGE: return Alpha::FBLT;
-  case Alpha::FBGT: return Alpha::FBLE;
-  case Alpha::FBLE: return Alpha::FBGT;
-  case Alpha::FBLT: return Alpha::FBGE;
-  default:
-    llvm_unreachable("Unknown opcode");
-  }
-  return 0; // Not reached
-}
-
-// Branch analysis.
-bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
-                                   MachineBasicBlock *&FBB,
-                                   SmallVectorImpl<MachineOperand> &Cond,
-                                   bool AllowModify) const {
-  // If the block has no terminators, it just falls into the block after it.
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin())
-    return false;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return false;
-    --I;
-  }
-  if (!isUnpredicatedTerminator(I))
-    return false;
-
-  // Get the last instruction in the block.
-  MachineInstr *LastInst = I;
-  
-  // If there is only one terminator instruction, process it.
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
-    if (LastInst->getOpcode() == Alpha::BR) {
-      TBB = LastInst->getOperand(0).getMBB();
-      return false;
-    } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I ||
-               LastInst->getOpcode() == Alpha::COND_BRANCH_F) {
-      // Block ends with fall-through condbranch.
-      TBB = LastInst->getOperand(2).getMBB();
-      Cond.push_back(LastInst->getOperand(0));
-      Cond.push_back(LastInst->getOperand(1));
-      return false;
-    }
-    // Otherwise, don't know what this is.
-    return true;
-  }
-  
-  // Get the instruction before it if it's a terminator.
-  MachineInstr *SecondLastInst = I;
-
-  // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() &&
-      isUnpredicatedTerminator(--I))
-    return true;
-  
-  // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it.
-  if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I ||
-      SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) && 
-      LastInst->getOpcode() == Alpha::BR) {
-    TBB =  SecondLastInst->getOperand(2).getMBB();
-    Cond.push_back(SecondLastInst->getOperand(0));
-    Cond.push_back(SecondLastInst->getOperand(1));
-    FBB = LastInst->getOperand(0).getMBB();
-    return false;
-  }
-  
-  // If the block ends with two Alpha::BRs, handle it.  The second one is not
-  // executed, so remove it.
-  if (SecondLastInst->getOpcode() == Alpha::BR && 
-      LastInst->getOpcode() == Alpha::BR) {
-    TBB = SecondLastInst->getOperand(0).getMBB();
-    I = LastInst;
-    if (AllowModify)
-      I->eraseFromParent();
-    return false;
-  }
-
-  // Otherwise, can't handle this.
-  return true;
-}
-
-unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin()) return 0;
-  --I;
-  while (I->isDebugValue()) {
-    if (I == MBB.begin())
-      return 0;
-    --I;
-  }
-  if (I->getOpcode() != Alpha::BR && 
-      I->getOpcode() != Alpha::COND_BRANCH_I &&
-      I->getOpcode() != Alpha::COND_BRANCH_F)
-    return 0;
-  
-  // Remove the branch.
-  I->eraseFromParent();
-  
-  I = MBB.end();
-
-  if (I == MBB.begin()) return 1;
-  --I;
-  if (I->getOpcode() != Alpha::COND_BRANCH_I && 
-      I->getOpcode() != Alpha::COND_BRANCH_F)
-    return 1;
-  
-  // Remove the branch.
-  I->eraseFromParent();
-  return 2;
-}
-
-void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, 
-                                MachineBasicBlock::iterator MI) const {
-  DebugLoc DL;
-  BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
-    .addReg(Alpha::R31)
-    .addReg(Alpha::R31);
-}
-
-bool AlphaInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
-  assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
-  Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm()));
-  return false;
-}
-
-/// getGlobalBaseReg - Return a virtual register initialized with the
-/// the global base register value. Output instructions required to
-/// initialize the register in the function entry block, if necessary.
-///
-unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
-  AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
-  unsigned GlobalBaseReg = AlphaFI->getGlobalBaseReg();
-  if (GlobalBaseReg != 0)
-    return GlobalBaseReg;
-
-  // Insert the set of GlobalBaseReg into the first MBB of the function
-  MachineBasicBlock &FirstMBB = MF->front();
-  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
-  MachineRegisterInfo &RegInfo = MF->getRegInfo();
-  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
-  GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
-  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
-          GlobalBaseReg).addReg(Alpha::R29);
-  RegInfo.addLiveIn(Alpha::R29);
-
-  AlphaFI->setGlobalBaseReg(GlobalBaseReg);
-  return GlobalBaseReg;
-}
-
-/// getGlobalRetAddr - Return a virtual register initialized with the
-/// the global base register value. Output instructions required to
-/// initialize the register in the function entry block, if necessary.
-///
-unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
-  AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
-  unsigned GlobalRetAddr = AlphaFI->getGlobalRetAddr();
-  if (GlobalRetAddr != 0)
-    return GlobalRetAddr;
-
-  // Insert the set of GlobalRetAddr into the first MBB of the function
-  MachineBasicBlock &FirstMBB = MF->front();
-  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
-  MachineRegisterInfo &RegInfo = MF->getRegInfo();
-  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
-  GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
-  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
-          GlobalRetAddr).addReg(Alpha::R26);
-  RegInfo.addLiveIn(Alpha::R26);
-
-  AlphaFI->setGlobalRetAddr(GlobalRetAddr);
-  return GlobalRetAddr;
-}
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
deleted file mode 100644
index 337a85cdf22d..000000000000
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHAINSTRUCTIONINFO_H
-#define ALPHAINSTRUCTIONINFO_H
-
-#include "llvm/Target/TargetInstrInfo.h"
-#include "AlphaRegisterInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "AlphaGenInstrInfo.inc"
-
-namespace llvm {
-
-class AlphaInstrInfo : public AlphaGenInstrInfo {
-  const AlphaRegisterInfo RI;
-public:
-  AlphaInstrInfo();
-
-  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-  /// such, whenever a client has an instance of instruction info, it should
-  /// always be able to get register info as well (through this method).
-  ///
-  virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
-
-  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                       int &FrameIndex) const;
-  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const;
-  
-  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                                MachineBasicBlock *FBB,
-                                const SmallVectorImpl<MachineOperand> &Cond,
-                                DebugLoc DL) const;
-  virtual void copyPhysReg(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator MI, DebugLoc DL,
-                           unsigned DestReg, unsigned SrcReg,
-                           bool KillSrc) const;
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   unsigned SrcReg, bool isKill, int FrameIndex,
-                                   const TargetRegisterClass *RC,
-                                   const TargetRegisterInfo *TRI) const;
-
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MBBI,
-                                    unsigned DestReg, int FrameIndex,
-                                    const TargetRegisterClass *RC,
-                                    const TargetRegisterInfo *TRI) const;
-  
-  bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
-                     MachineBasicBlock *&FBB,
-                     SmallVectorImpl<MachineOperand> &Cond,
-                     bool AllowModify) const;
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-  void insertNoop(MachineBasicBlock &MBB, 
-                  MachineBasicBlock::iterator MI) const;
-  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
-  /// getGlobalBaseReg - Return a virtual register initialized with the
-  /// the global base register value. Output instructions required to
-  /// initialize the register in the function entry block, if necessary.
-  ///
-  unsigned getGlobalBaseReg(MachineFunction *MF) const;
-
-  /// getGlobalRetAddr - Return a virtual register initialized with the
-  /// the global return address register value. Output instructions required to
-  /// initialize the register in the function entry block, if necessary.
-  ///
-  unsigned getGlobalRetAddr(MachineFunction *MF) const;
-};
-
-}
-
-#endif
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
deleted file mode 100644
index c8c9377c3d8d..000000000000
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ /dev/null
@@ -1,1159 +0,0 @@
-//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-include "AlphaInstrFormats.td"
-
-//********************
-//Custom DAG Nodes
-//********************
-
-def SDTFPUnaryOpUnC  : SDTypeProfile<1, 1, [
-  SDTCisFP<1>, SDTCisFP<0>
-]>;
-def Alpha_cvtqt   : SDNode<"AlphaISD::CVTQT_",    SDTFPUnaryOpUnC, []>;
-def Alpha_cvtqs   : SDNode<"AlphaISD::CVTQS_",    SDTFPUnaryOpUnC, []>;
-def Alpha_cvttq   : SDNode<"AlphaISD::CVTTQ_"  ,  SDTFPUnaryOp, []>;
-def Alpha_gprello : SDNode<"AlphaISD::GPRelLo",   SDTIntBinOp, []>;
-def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi",   SDTIntBinOp, []>;
-def Alpha_rellit  : SDNode<"AlphaISD::RelLit",    SDTIntBinOp, [SDNPMayLoad]>;
-
-def retflag       : SDNode<"AlphaISD::RET_FLAG", SDTNone,
-                           [SDNPHasChain, SDNPOptInGlue]>;
-
-// These are target-independent nodes, but have target-specific formats.
-def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
-def SDT_AlphaCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i64>,
-                                           SDTCisVT<1, i64> ]>;
-
-def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart,
-                           [SDNPHasChain, SDNPOutGlue]>;
-def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_AlphaCallSeqEnd,
-                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-//********************
-//Paterns for matching
-//********************
-def invX : SDNodeXForm<imm, [{ //invert
-  return getI64Imm(~N->getZExtValue());
-}]>;
-def negX : SDNodeXForm<imm, [{ //negate
-  return getI64Imm(~N->getZExtValue() + 1);
-}]>;
-def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
-  return getI64Imm(((int64_t)N->getZExtValue() << 32) >> 32);
-}]>;
-def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
-  return getI64Imm(((int64_t)N->getZExtValue() << 48) >> 48);
-}]>;
-def LL16 : SDNodeXForm<imm, [{ //lda part of constant
-  return getI64Imm(get_lda16(N->getZExtValue()));
-}]>;
-def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
-  return getI64Imm(get_ldah16(N->getZExtValue()));
-}]>;
-def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
-  ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
-  return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()));
-}]>;
-def nearP2X : SDNodeXForm<imm, [{
-  return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getZExtValue())));
-}]>;
-def nearP2RemX : SDNodeXForm<imm, [{
-  uint64_t x =
-    abs64(N->getZExtValue() - getNearPower2((uint64_t)N->getZExtValue()));
-  return getI64Imm(Log2_64(x));
-}]>;
-
-def immUExt8  : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
-  return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
-}]>;
-def immUExt8inv  : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
-  return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue();
-}], invX>;
-def immUExt8neg  : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
-  return ((uint64_t)~N->getZExtValue() + 1) ==
-         (uint8_t)((uint64_t)~N->getZExtValue() + 1);
-}], negX>;
-def immSExt16  : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
-  return ((int64_t)N->getZExtValue() << 48) >> 48 ==
-         (int64_t)N->getZExtValue();
-}]>;
-def immSExt16int  : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
-  return ((int64_t)N->getZExtValue() << 48) >> 48 ==
-         ((int64_t)N->getZExtValue() << 32) >> 32;
-}], SExt16>;
-
-def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
-  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
-  if (!RHS) return 0;
-  uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
-  return build != 0;
-}]>;
-
-def immFPZ  : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
-  (void)N; // silence warning.
-  return true;
-}]>;
-
-def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>;
-def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>;
-def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>;
-def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>;
-def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>;
-def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>;
-def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>;
-def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>;
-def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>;
-def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>;
-
-def immRemP2n : PatLeaf<(imm), [{
-  return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) -
-                         N->getZExtValue());
-}]>;
-def immRemP2 : PatLeaf<(imm), [{
-  return isPowerOf2_64(N->getZExtValue() -
-                         getNearPower2((uint64_t)N->getZExtValue()));
-}]>;
-def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
-  int64_t d =  abs64((int64_t)N->getZExtValue() -
-               (int64_t)getNearPower2((uint64_t)N->getZExtValue()));
-  if (isPowerOf2_64(d)) return false;
-  switch (d) {
-    case 1: case 3: case 5: return false; 
-    default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
-  };
-}]>;
-
-def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
-def add4  : PatFrag<(ops node:$op1, node:$op2),
-                    (add (shl node:$op1, 2), node:$op2)>;
-def sub4  : PatFrag<(ops node:$op1, node:$op2),
-                    (sub (shl node:$op1, 2), node:$op2)>;
-def add8  : PatFrag<(ops node:$op1, node:$op2),
-                    (add (shl node:$op1, 3), node:$op2)>;
-def sub8  : PatFrag<(ops node:$op1, node:$op2),
-                    (sub (shl node:$op1, 3), node:$op2)>;
-class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
-class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
-
-//Pseudo ops for selection
-
-def WTF : PseudoInstAlpha<(outs), (ins variable_ops), "#wtf", [], s_pseudo>;
-
-let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in {
-def ADJUSTSTACKUP : PseudoInstAlpha<(outs), (ins s64imm:$amt),
-                "; ADJUP $amt", 
-                [(callseq_start timm:$amt)], s_pseudo>;
-def ADJUSTSTACKDOWN : PseudoInstAlpha<(outs), (ins s64imm:$amt1, s64imm:$amt2),
-                "; ADJDOWN $amt1",
-                [(callseq_end timm:$amt1, timm:$amt2)], s_pseudo>;
-}
-
-def ALTENT : PseudoInstAlpha<(outs), (ins s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>;
-def PCLABEL : PseudoInstAlpha<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>;
-def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m),
-         "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;
-
-
-let usesCustomInserter = 1 in {   // Expanded after instruction selection.
-def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
-      [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
-def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
-      [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
-
-def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
-      [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
-def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
-      [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
-
-def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
-        [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
-def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
-        [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
-}
-
-//***********************
-//Real instructions
-//***********************
-
-//Operation Form:
-
-//conditional moves, int
-
-multiclass cmov_inst<bits<7> fun, string asmstr, PatFrag OpNode> {
-def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
-             [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>;
-def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
-             [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>;
-}
-
-defm CMOVEQ  : cmov_inst<0x24, "cmoveq",  CmpOpFrag<(seteq node:$R, 0)>>;
-defm CMOVNE  : cmov_inst<0x26, "cmovne",  CmpOpFrag<(setne node:$R, 0)>>;
-defm CMOVLT  : cmov_inst<0x44, "cmovlt",  CmpOpFrag<(setlt node:$R, 0)>>;
-defm CMOVLE  : cmov_inst<0x64, "cmovle",  CmpOpFrag<(setle node:$R, 0)>>;
-defm CMOVGT  : cmov_inst<0x66, "cmovgt",  CmpOpFrag<(setgt node:$R, 0)>>;
-defm CMOVGE  : cmov_inst<0x46, "cmovge",  CmpOpFrag<(setge node:$R, 0)>>;
-defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor   node:$R, 1)>>;
-defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and   node:$R, 1)>>;
-
-//General pattern for cmov
-def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
-      (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>;
-def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
-      (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>;
-
-//Invert sense when we can for constants:
-def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
-          (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
-def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
-          (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
-def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
-          (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
-def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
-          (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
-def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
-          (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
-
-multiclass all_inst<bits<6> opc, bits<7> funl, bits<7> funq, 
-                    string asmstr, PatFrag OpNode, InstrItinClass itin> {
-  def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"),
-               [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>;
-  def Li : OFormL<opc, funl, !strconcat(asmstr, "l $RA,$L,$RC"),
-               [(set GPRC:$RC, (intop (OpNode GPRC:$RA, immUExt8:$L)))], itin>;
-  def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"),
-               [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
-  def Qi : OFormL<opc, funq, !strconcat(asmstr, "q $RA,$L,$RC"),
-               [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
-}
-
-defm MUL   : all_inst<0x13, 0x00, 0x20, "mul",   BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>;
-defm ADD   : all_inst<0x10, 0x00, 0x20, "add",   BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>;
-defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>;
-defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>;
-defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>;
-defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>;
-defm SUB   : all_inst<0x10, 0x09, 0x29, "sub",   BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>;
-//Const cases since legalize does sub x, int -> add x, inv(int) + 1
-def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>;
-def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
-def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
-def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
-def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
-def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
-
-multiclass log_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
-def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
-              [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
-def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
-              [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
-}
-multiclass inv_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
-def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
-              [(set GPRC:$RC, (OpNode GPRC:$RA, (not GPRC:$RB)))], itin>;
-def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
-              [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8inv:$L))], itin>;
-}
-
-defm AND   : log_inst<0x11, 0x00, "and",   and,   s_ilog>;
-defm BIC   : inv_inst<0x11, 0x08, "bic",   and,   s_ilog>;
-defm BIS   : log_inst<0x11, 0x20, "bis",   or,    s_ilog>;
-defm ORNOT : inv_inst<0x11, 0x28, "ornot", or,    s_ilog>;
-defm XOR   : log_inst<0x11, 0x40, "xor",   xor,   s_ilog>;
-defm EQV   : inv_inst<0x11, 0x48, "eqv",   xor,   s_ilog>;
-
-defm SL    : log_inst<0x12, 0x39, "sll",   shl,   s_ishf>;
-defm SRA   : log_inst<0x12, 0x3c, "sra",   sra,   s_ishf>;
-defm SRL   : log_inst<0x12, 0x34, "srl",   srl,   s_ishf>;
-defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>;
-
-def CTLZ     : OForm2<0x1C, 0x32, "CTLZ $RB,$RC", 
-                      [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>;
-def CTPOP    : OForm2<0x1C, 0x30, "CTPOP $RB,$RC", 
-                      [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>;
-def CTTZ     : OForm2<0x1C, 0x33, "CTTZ $RB,$RC", 
-                      [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>;
-def EXTBL    : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC", 
-                      [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>;
-def EXTWL    : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC", 
-                      [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>;
-def EXTLL    : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC", 
-                      [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>;
-def SEXTB    : OForm2<0x1C, 0x00, "sextb $RB,$RC", 
-                      [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>;
-def SEXTW    : OForm2<0x1C, 0x01, "sextw $RB,$RC", 
-                      [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>;
-
-//def EXTBLi   : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low
-//def EXTLH    : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high
-//def EXTLHi   : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high
-//def EXTLLi   : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low
-//def EXTQH    : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high
-//def EXTQHi   : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high
-//def EXTQ     : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low
-//def EXTQi    : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low
-//def EXTWH    : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high
-//def EXTWHi   : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high
-//def EXTWLi   : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low
-
-//def INSBL    : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low
-//def INSBLi   : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low
-//def INSLH    : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high
-//def INSLHi   : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high
-//def INSLL    : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low
-//def INSLLi   : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low
-//def INSQH    : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high
-//def INSQHi   : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high
-//def INSQL    : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low
-//def INSQLi   : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low
-//def INSWH    : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high
-//def INSWHi   : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high
-//def INSWL    : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low
-//def INSWLi   : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low
-
-//def MSKBL    : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low
-//def MSKBLi   : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low
-//def MSKLH    : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high
-//def MSKLHi   : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high
-//def MSKLL    : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low
-//def MSKLLi   : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low
-//def MSKQH    : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high
-//def MSKQHi   : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high
-//def MSKQL    : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low
-//def MSKQLi   : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low
-//def MSKWH    : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high
-//def MSKWHi   : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high
-//def MSKWL    : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low
-//def MSKWLi   : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low
-                      
-def ZAPNOTi  : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>;
-
-// Define the pattern that produces ZAPNOTi.
-def : Pat<(zappat:$imm GPRC:$RA),
-          (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
-
-
-//Comparison, int
-//So this is a waste of what this instruction can do, but it still saves something
-def CMPBGE  : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC", 
-                     [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>;
-def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC",
-                     [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>;
-def CMPEQ   : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC", 
-                     [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>;
-def CMPEQi  : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC", 
-                     [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>;
-def CMPLE   : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC", 
-                     [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>;
-def CMPLEi  : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC",
-                     [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>;
-def CMPLT   : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC",
-                     [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>;
-def CMPLTi  : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC",
-                     [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>;
-def CMPULE  : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC",
-                     [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>;
-def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC",
-                     [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>;
-def CMPULT  : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC",
-                     [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>;
-def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC", 
-                      [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>;
-
-//Patterns for unsupported int comparisons
-def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>;
-def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
-
-def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>;
-def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
-
-def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>;
-def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
-
-def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>;
-def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
-
-def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>;
-def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
-
-def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
-def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
-
-def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
-def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
-
-
-let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
-  def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
-  def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
-}
-
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in
-def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0", 
-          [(brind GPRC:$RS)], s_jsr>; //Jump
-
-let isCall = 1, Ra = 26,
-    Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
-            R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
-            F0, F1,
-            F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
-            F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in {
-    def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine
-}
-let isCall = 1, Ra = 26, Rb = 27, disp = 0,
-    Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
-            R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
-            F0, F1,
-            F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
-            F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
-    def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
-}
-
-let isCall = 1, Ra = 23, Rb = 27, disp = 0,
-    Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
-  def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
-
-
-def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
-
-
-let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
-def LDQ   : MForm<0x29, 1, "ldq $RA,$DISP($RB)",
-                 [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
-def LDQr  : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
-                 [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
-def LDL   : MForm<0x28, 1, "ldl $RA,$DISP($RB)",
-                 [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
-def LDLr  : MForm<0x28, 1, "ldl $RA,$DISP($RB)\t\t!gprellow",
-                 [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
-def LDBU  : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)",
-                 [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
-def LDBUr : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow",
-                 [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
-def LDWU  : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)",
-                 [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
-def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
-                 [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
-}
-
-
-let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
-def STB   : MForm<0x0E, 0, "stb $RA,$DISP($RB)",
-                 [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
-def STBr  : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
-                 [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
-def STW   : MForm<0x0D, 0, "stw $RA,$DISP($RB)",
-                 [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
-def STWr  : MForm<0x0D, 0, "stw $RA,$DISP($RB)\t\t!gprellow",
-                 [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
-def STL   : MForm<0x2C, 0, "stl $RA,$DISP($RB)",
-                 [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
-def STLr  : MForm<0x2C, 0, "stl $RA,$DISP($RB)\t\t!gprellow",
-                 [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
-def STQ   : MForm<0x2D, 0, "stq $RA,$DISP($RB)",
-                 [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
-def STQr  : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
-                 [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
-}
-
-//Load address
-let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
-def LDA   : MForm<0x08, 0, "lda $RA,$DISP($RB)",
-                 [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
-def LDAr  : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
-                 [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>;  //Load address
-def LDAH  : MForm<0x09, 0, "ldah $RA,$DISP($RB)",
-                 [], s_lda>;  //Load address high
-def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
-                 [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>;  //Load address high
-}
-
-let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
-def STS  : MForm<0x26, 0, "sts $RA,$DISP($RB)",
-                [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
-def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
-                [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
-}
-let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
-def LDS  : MForm<0x22, 1, "lds $RA,$DISP($RB)",
-                [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
-def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
-                [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
-}
-let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
-def STT  : MForm<0x27, 0, "stt $RA,$DISP($RB)",
-                 [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
-def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
-                 [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
-}
-let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
-def LDT  : MForm<0x23, 1, "ldt $RA,$DISP($RB)",
-                [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
-def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
-                [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
-}
-
-
-//constpool rels
-def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
-          (LDQr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
-          (LDLr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
-          (LDBUr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
-          (LDWUr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)),
-          (LDAr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)),
-          (LDAHr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
-          (LDSr tconstpool:$DISP, GPRC:$RB)>;
-def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
-          (LDTr tconstpool:$DISP, GPRC:$RB)>;
-
-//jumptable rels
-def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)),
-          (LDAHr tjumptable:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)),
-          (LDAr tjumptable:$DISP, GPRC:$RB)>;
-
-
-//misc ext patterns
-def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
-          (LDBU   immSExt16:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
-          (LDWU  immSExt16:$DISP, GPRC:$RB)>;
-def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
-          (LDL   immSExt16:$DISP, GPRC:$RB)>;
-
-//0 disp patterns
-def : Pat<(i64 (load GPRC:$addr)),
-          (LDQ  0, GPRC:$addr)>;
-def : Pat<(f64 (load GPRC:$addr)),
-          (LDT  0, GPRC:$addr)>;
-def : Pat<(f32 (load GPRC:$addr)),
-          (LDS  0, GPRC:$addr)>;
-def : Pat<(i64 (sextloadi32 GPRC:$addr)),
-          (LDL  0, GPRC:$addr)>;
-def : Pat<(i64 (zextloadi16 GPRC:$addr)),
-          (LDWU 0, GPRC:$addr)>;
-def : Pat<(i64 (zextloadi8 GPRC:$addr)),
-          (LDBU 0, GPRC:$addr)>;
-def : Pat<(i64 (extloadi8 GPRC:$addr)),
-          (LDBU 0, GPRC:$addr)>;
-def : Pat<(i64 (extloadi16 GPRC:$addr)),
-          (LDWU 0, GPRC:$addr)>;
-def : Pat<(i64 (extloadi32 GPRC:$addr)),
-          (LDL  0, GPRC:$addr)>;
-
-def : Pat<(store GPRC:$DATA, GPRC:$addr),
-          (STQ  GPRC:$DATA, 0, GPRC:$addr)>;
-def : Pat<(store F8RC:$DATA, GPRC:$addr),
-          (STT  F8RC:$DATA, 0, GPRC:$addr)>;
-def : Pat<(store F4RC:$DATA, GPRC:$addr),
-          (STS  F4RC:$DATA, 0, GPRC:$addr)>;
-def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
-          (STL  GPRC:$DATA, 0, GPRC:$addr)>;
-def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
-          (STW GPRC:$DATA, 0, GPRC:$addr)>;
-def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
-          (STB GPRC:$DATA, 0, GPRC:$addr)>;
-
-
-//load address, rellocated gpdist form
-let OutOperandList = (outs GPRC:$RA),
-    InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
-    mayLoad = 1 in {
-def LDAg  : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>;  //Load address
-def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>;  //Load address
-}
-
-//Load quad, rellocated literal form
-let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in 
-def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
-                 [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
-def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
-          (LDQl texternalsym:$ext, GPRC:$RB)>;
-
-let OutOperandList = (outs GPRC:$RR),
-    InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB),
-    Constraints = "$RA = $RR",
-    DisableEncoding = "$RR" in {
-def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
-def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
-}
-let OutOperandList = (outs GPRC:$RA),
-    InOperandList = (ins s64imm:$DISP, GPRC:$RB),
-    mayLoad = 1 in {
-def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
-def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
-}
-
-def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
-def MB  : MfcPForm<0x18, 0x4000, "mb",  s_imisc>; //memory barrier
-def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
-
-def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
-          (WMB)>;
-def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
-          (MB)>;
-
-def : Pat<(atomic_fence (imm), (imm)), (MB)>;
-
-//Basic Floating point ops
-
-//Floats
-
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in 
-def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
-                   [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
-
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in {
-def ADDS  : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
-                   [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
-def SUBS  : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
-                   [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>;
-def DIVS  : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC",
-                   [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>;
-def MULS  : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC",
-                   [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>;
-
-def CPYSS  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
-                   [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>;
-def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
-def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
-                   [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>;
-}
-
-//Doubles
-
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
-def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
-                   [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
-
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in {
-def ADDT  : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
-                   [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
-def SUBT  : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
-                   [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>;
-def DIVT  : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC",
-                   [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>;
-def MULT  : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC",
-                   [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>;
-
-def CPYST  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
-                   [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>;
-def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
-def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
-                   [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>;
-
-def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>;
-//                    [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>;
-def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>;
-//                    [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>;
-def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>;
-//                    [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>;
-def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
-//                    [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>;
-}
-
-//More CPYS forms:
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in {
-def CPYSTs  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
-                   [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
-def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
-                   [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
-}
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in {
-def CPYSSt  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
-                   [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
-def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
-def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
-                   [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>;
-}
-
-//conditional moves, floats
-let OutOperandList = (outs F4RC:$RDEST),
-    InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
-    Constraints = "$RTRUE = $RDEST" in {
-def FCMOVEQS : FPForm<0x17, 0x02A, 
-                      "fcmoveq $RCOND,$RTRUE,$RDEST",
-                      [], s_fcmov>; //FCMOVE if = zero
-def FCMOVGES : FPForm<0x17, 0x02D, 
-                      "fcmovge $RCOND,$RTRUE,$RDEST",
-                      [], s_fcmov>; //FCMOVE if >= zero
-def FCMOVGTS : FPForm<0x17, 0x02F, 
-                      "fcmovgt $RCOND,$RTRUE,$RDEST",
-                      [], s_fcmov>; //FCMOVE if > zero
-def FCMOVLES : FPForm<0x17, 0x02E, 
-                      "fcmovle $RCOND,$RTRUE,$RDEST",
-                      [], s_fcmov>; //FCMOVE if <= zero
-def FCMOVLTS : FPForm<0x17, 0x02C,
-                      "fcmovlt $RCOND,$RTRUE,$RDEST",
-                      [], s_fcmov>; // FCMOVE if < zero
-def FCMOVNES : FPForm<0x17, 0x02B, 
-                      "fcmovne $RCOND,$RTRUE,$RDEST",
-                      [], s_fcmov>; //FCMOVE if != zero
-}
-//conditional moves, doubles
-let OutOperandList = (outs F8RC:$RDEST), 
-    InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
-    Constraints = "$RTRUE = $RDEST" in {
-def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
-}
-
-//misc FP selects
-//Select double
-
-def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-
-def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-
-def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
-      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-
-//Select single
-def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
-
-def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
-
-def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
-
-def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
-      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
-
-
-
-let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in 
-def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",
-        [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating
-let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in 
-def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
-        [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in 
-def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",
-    	[(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in 
-def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
-        [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
-
-
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
-def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
-        [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
-def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
-        [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
-def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
-        [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
-let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in 
-def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
-                   [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
-let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
-def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
-                   [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
-
-def :  Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf),
-       (f64 (FCMOVEQT  F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>; 
-def :  Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
-       (f32 (FCMOVEQS  F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>; 
-
-/////////////////////////////////////////////////////////
-//Branching
-/////////////////////////////////////////////////////////
-class br_icc<bits<6> opc, string asmstr>
-  : BFormN<opc, (ins u64imm:$opc, GPRC:$R, target:$dst), 
-    !strconcat(asmstr, " $R,$dst"),  s_icbr>;
-class br_fcc<bits<6> opc, string asmstr>
-  : BFormN<opc, (ins u64imm:$opc, F8RC:$R, target:$dst), 
-    !strconcat(asmstr, " $R,$dst"),  s_fbr>;
-
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-let Ra = 31, isBarrier = 1 in
-def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
-
-def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), 
-                    "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst", 
-                    s_icbr>;
-def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst), 
-                    "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
-                    s_fbr>;
-//Branches, int
-def BEQ  : br_icc<0x39, "beq">;
-def BGE  : br_icc<0x3E, "bge">;
-def BGT  : br_icc<0x3F, "bgt">;
-def BLBC : br_icc<0x38, "blbc">;
-def BLBS : br_icc<0x3C, "blbs">;
-def BLE  : br_icc<0x3B, "ble">;
-def BLT  : br_icc<0x3A, "blt">;
-def BNE  : br_icc<0x3D, "bne">;
-
-//Branches, float
-def FBEQ : br_fcc<0x31, "fbeq">;
-def FBGE : br_fcc<0x36, "fbge">;
-def FBGT : br_fcc<0x37, "fbgt">;
-def FBLE : br_fcc<0x33, "fble">;
-def FBLT : br_fcc<0x32, "fblt">;
-def FBNE : br_fcc<0x36, "fbne">;
-}
-
-//An ugly trick to get the opcode as an imm I can use
-def immBRCond : SDNodeXForm<imm, [{
-  switch((uint64_t)N->getZExtValue()) {
-    default: assert(0 && "Unknown branch type");
-    case 0:  return getI64Imm(Alpha::BEQ);
-    case 1:  return getI64Imm(Alpha::BNE);
-    case 2:  return getI64Imm(Alpha::BGE);
-    case 3:  return getI64Imm(Alpha::BGT);
-    case 4:  return getI64Imm(Alpha::BLE);
-    case 5:  return getI64Imm(Alpha::BLT);
-    case 6:  return getI64Imm(Alpha::BLBS);
-    case 7:  return getI64Imm(Alpha::BLBC);
-    case 20: return getI64Imm(Alpha::FBEQ);
-    case 21: return getI64Imm(Alpha::FBNE);
-    case 22: return getI64Imm(Alpha::FBGE);
-    case 23: return getI64Imm(Alpha::FBGT);
-    case 24: return getI64Imm(Alpha::FBLE);
-    case 25: return getI64Imm(Alpha::FBLT);
-  }
-}]>;
-
-//Int cond patterns
-def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP), 
-      (COND_BRANCH_I (immBRCond 0),  GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP), 
-      (COND_BRANCH_I (immBRCond 2),  GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP), 
-      (COND_BRANCH_I (immBRCond 3),  GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP), 
-      (COND_BRANCH_I (immBRCond 6),  GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP), 
-      (COND_BRANCH_I (immBRCond 4),  GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP), 
-      (COND_BRANCH_I (immBRCond 5),  GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP), 
-      (COND_BRANCH_I (immBRCond 1),  GPRC:$RA, bb:$DISP)>;
-
-def : Pat<(brcond GPRC:$RA, bb:$DISP), 
-      (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP), 
-      (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP), 
-      (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>;
-
-//FP cond patterns
-def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 20),  F8RC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21),  F8RC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 22),  F8RC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 23),  F8RC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 24),  F8RC:$RA, bb:$DISP)>;
-def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 25),  F8RC:$RA, bb:$DISP)>;
-
-
-def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP),  
-      (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-
-def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP),  
-      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-
-def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP),  
-      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-
-def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP),  
-      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-
-def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP),  
-      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
-
-def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP),  
-      (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP), 
-      (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
-
-
-def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
-
-def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
-
-def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
-
-def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
-
-def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
-
-def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
-def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),   
-      (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
-
-//End Branches
-
-//S_floating : IEEE Single
-//T_floating : IEEE Double
-
-//Unused instructions
-//Mnemonic Format Opcode Description
-//CALL_PAL Pcd 00 Trap to PALcode
-//ECB Mfc 18.E800 Evict cache block
-//EXCB Mfc 18.0400 Exception barrier
-//FETCH Mfc 18.8000 Prefetch data
-//FETCH_M Mfc 18.A000 Prefetch data, modify intent
-//LDQ_U Mem 0B Load unaligned quadword
-//MB Mfc 18.4000 Memory barrier
-//STQ_U Mem 0F Store unaligned quadword
-//TRAPB Mfc 18.0000 Trap barrier
-//WH64 Mfc 18.F800 Write hint  64 bytes
-//WMB Mfc 18.4400 Write memory barrier
-//MF_FPCR F-P 17.025 Move from FPCR
-//MT_FPCR F-P 17.024 Move to FPCR
-//There are in the Multimedia extensions, so let's not use them yet
-//def MAXSB8  : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
-//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
-//def MAXUB8  : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
-//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum
-//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum
-//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum
-//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum
-//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum
-//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error
-//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes
-//def PKWB  : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes
-//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords
-//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words
-//CVTLQ F-P 17.010 Convert longword to quadword
-//CVTQL F-P 17.030 Convert quadword to longword
-
-
-//Constant handling
-
-def immConst2Part  : PatLeaf<(imm), [{
-  //true if imm fits in a LDAH LDA pair
-  int64_t val = (int64_t)N->getZExtValue();
-  return (val <= IMM_FULLHIGH  && val >= IMM_FULLLOW);
-}]>;
-def immConst2PartInt  : PatLeaf<(imm), [{
-  //true if imm fits in a LDAH LDA pair with zeroext
-  uint64_t uval = N->getZExtValue();
-  int32_t val32 = (int32_t)uval;
-  return ((uval >> 32) == 0 && //empty upper bits
-          val32 <= IMM_FULLHIGH);
-//          val32 >= IMM_FULLLOW  + IMM_LOW  * IMM_MULT); //Always True
-}], SExt32>;
-
-def : Pat<(i64 immConst2Part:$imm),
-          (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>;
-
-def : Pat<(i64 immSExt16:$imm),
-          (LDA immSExt16:$imm, R31)>;
-
-def : Pat<(i64 immSExt16int:$imm),
-          (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>;
-def : Pat<(i64 immConst2PartInt:$imm),
-          (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))),
-                        (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))), R31)), 15)>;
-
-
-//TODO: I want to just define these like this!
-//def : Pat<(i64 0),
-//          (R31)>;
-//def : Pat<(f64 0.0),
-//          (F31)>;
-//def : Pat<(f64 -0.0),
-//          (CPYSNT F31, F31)>;
-//def : Pat<(f32 0.0),
-//          (F31)>;
-//def : Pat<(f32 -0.0),
-//          (CPYSNS F31, F31)>;
-
-//Misc Patterns:
-
-def : Pat<(sext_inreg GPRC:$RB, i32),
-          (ADDLi GPRC:$RB, 0)>;
-
-def : Pat<(fabs F8RC:$RB),
-          (CPYST F31, F8RC:$RB)>;
-def : Pat<(fabs F4RC:$RB),
-          (CPYSS F31, F4RC:$RB)>;
-def : Pat<(fneg F8RC:$RB),
-          (CPYSNT F8RC:$RB, F8RC:$RB)>;
-def : Pat<(fneg F4RC:$RB),
-          (CPYSNS F4RC:$RB, F4RC:$RB)>;
-
-def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
-          (CPYSNS F4RC:$B, F4RC:$A)>;
-def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
-          (CPYSNT F8RC:$B, F8RC:$A)>;
-def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)),
-          (CPYSNSt F8RC:$B, F4RC:$A)>;
-def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)),
-          (CPYSNTs F4RC:$B, F8RC:$A)>;
-
-//Yes, signed multiply high is ugly
-def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
-          (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA), 
-                                                     (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>;
-
-//Stupid crazy arithmetic stuff:
-let AddedComplexity = 1 in {
-def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
-def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
-def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
-def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
-
-//slight tree expansion if we are multiplying near to a power of 2
-//n is above a power of 2
-def : Pat<(mul GPRC:$RA, immRem1:$imm), 
-          (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
-def : Pat<(mul GPRC:$RA, immRem2:$imm), 
-          (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
-def : Pat<(mul GPRC:$RA, immRem3:$imm),
-          (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
-def : Pat<(mul GPRC:$RA, immRem4:$imm),
-          (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>;
-def : Pat<(mul GPRC:$RA, immRem5:$imm),
-          (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
-def : Pat<(mul GPRC:$RA, immRemP2:$imm),
-          (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
-
-//n is below a power of 2
-//FIXME: figure out why something is truncating the imm to 32bits
-// this will fix 2007-11-27-mulneg3
-//def : Pat<(mul GPRC:$RA, immRem1n:$imm), 
-//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>;
-//def : Pat<(mul GPRC:$RA, immRem2n:$imm), 
-//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
-//def : Pat<(mul GPRC:$RA, immRem3n:$imm),
-//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
-//def : Pat<(mul GPRC:$RA, immRem4n:$imm),
-//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>;
-//def : Pat<(mul GPRC:$RA, immRem5n:$imm),
-//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
-//def : Pat<(mul GPRC:$RA, immRemP2n:$imm),
-//          (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>;
-} //Added complexity
diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp
deleted file mode 100644
index 85fbfd1affe2..000000000000
--- a/lib/Target/Alpha/AlphaLLRP.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Here we check for potential replay traps introduced by the spiller
-// We also align some branch targets if we can do so for free.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "alpha-nops"
-#include "Alpha.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-using namespace llvm;
-
-STATISTIC(nopintro, "Number of nops inserted");
-STATISTIC(nopalign, "Number of nops inserted for alignment");
-
-namespace {
-  cl::opt<bool>
-  AlignAll("alpha-align-all", cl::Hidden,
-                   cl::desc("Align all blocks"));
-
-  struct AlphaLLRPPass : public MachineFunctionPass {
-    /// Target machine description which we query for reg. names, data
-    /// layout, etc.
-    ///
-    AlphaTargetMachine &TM;
-
-    static char ID;
-    AlphaLLRPPass(AlphaTargetMachine &tm) 
-      : MachineFunctionPass(ID), TM(tm) { }
-
-    virtual const char *getPassName() const {
-      return "Alpha NOP inserter";
-    }
-
-    bool runOnMachineFunction(MachineFunction &F) {
-      const TargetInstrInfo *TII = F.getTarget().getInstrInfo();
-      bool Changed = false;
-      MachineInstr* prev[3] = {0,0,0};
-      DebugLoc dl;
-      unsigned count = 0;
-      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
-           FI != FE; ++FI) {
-        MachineBasicBlock& MBB = *FI;
-        bool ub = false;
-        for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
-          if (count%4 == 0)
-            prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary
-          ++count;
-          MachineInstr *MI = I++;
-          switch (MI->getOpcode()) {
-          case Alpha::LDQ:  case Alpha::LDL:
-          case Alpha::LDWU: case Alpha::LDBU:
-          case Alpha::LDT: case Alpha::LDS:
-          case Alpha::STQ:  case Alpha::STL:
-          case Alpha::STW:  case Alpha::STB:
-          case Alpha::STT: case Alpha::STS:
-           if (MI->getOperand(2).getReg() == Alpha::R30) {
-             if (prev[0] && 
-                 prev[0]->getOperand(2).getReg() == MI->getOperand(2).getReg()&&
-                 prev[0]->getOperand(1).getImm() == MI->getOperand(1).getImm()){
-               prev[0] = prev[1];
-               prev[1] = prev[2];
-               prev[2] = 0;
-               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
-                 .addReg(Alpha::R31)
-                 .addReg(Alpha::R31); 
-               Changed = true; nopintro += 1;
-               count += 1;
-             } else if (prev[1] 
-                        && prev[1]->getOperand(2).getReg() == 
-                        MI->getOperand(2).getReg()
-                        && prev[1]->getOperand(1).getImm() == 
-                        MI->getOperand(1).getImm()) {
-               prev[0] = prev[2];
-               prev[1] = prev[2] = 0;
-               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
-                 .addReg(Alpha::R31)
-                 .addReg(Alpha::R31); 
-               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
-                 .addReg(Alpha::R31)
-                 .addReg(Alpha::R31);
-               Changed = true; nopintro += 2;
-               count += 2;
-             } else if (prev[2] 
-                        && prev[2]->getOperand(2).getReg() == 
-                        MI->getOperand(2).getReg()
-                        && prev[2]->getOperand(1).getImm() == 
-                        MI->getOperand(1).getImm()) {
-               prev[0] = prev[1] = prev[2] = 0;
-               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
-                 .addReg(Alpha::R31).addReg(Alpha::R31);
-               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
-                 .addReg(Alpha::R31).addReg(Alpha::R31);
-               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
-                 .addReg(Alpha::R31).addReg(Alpha::R31);
-               Changed = true; nopintro += 3;
-               count += 3;
-             }
-             prev[0] = prev[1];
-             prev[1] = prev[2];
-             prev[2] = MI;
-             break;
-           }
-           prev[0] = prev[1];
-           prev[1] = prev[2];
-           prev[2] = 0;
-           break;
-          case Alpha::ALTENT:
-          case Alpha::MEMLABEL:
-          case Alpha::PCLABEL:
-            --count;
-            break;
-          case Alpha::BR:
-          case Alpha::JMP:
-            ub = true;
-            //fall through
-          default:
-            prev[0] = prev[1];
-            prev[1] = prev[2];
-            prev[2] = 0;
-            break;
-          }
-        }
-        if (ub || AlignAll) {
-          //we can align stuff for free at this point
-          while (count % 4) {
-            BuildMI(MBB, MBB.end(), dl, TII->get(Alpha::BISr), Alpha::R31)
-              .addReg(Alpha::R31).addReg(Alpha::R31);
-            ++count;
-            ++nopalign;
-            prev[0] = prev[1];
-            prev[1] = prev[2];
-            prev[2] = 0;
-          }
-        }
-      }
-      return Changed;
-    }
-  };
-  char AlphaLLRPPass::ID = 0;
-} // end of anonymous namespace
-
-FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) {
-  return new AlphaLLRPPass(tm);
-}
diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h
deleted file mode 100644
index 186738c20c70..000000000000
--- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//====- AlphaMachineFuctionInfo.h - Alpha machine function info -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares Alpha-specific per-machine-function information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHAMACHINEFUNCTIONINFO_H
-#define ALPHAMACHINEFUNCTIONINFO_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// AlphaMachineFunctionInfo - This class is derived from MachineFunction
-/// private Alpha target-specific information for each MachineFunction.
-class AlphaMachineFunctionInfo : public MachineFunctionInfo {
-  /// GlobalBaseReg - keeps track of the virtual register initialized for
-  /// use as the global base register. This is used for PIC in some PIC
-  /// relocation models.
-  unsigned GlobalBaseReg;
-
-  /// GlobalRetAddr = keeps track of the virtual register initialized for
-  /// the return address value.
-  unsigned GlobalRetAddr;
-
-  /// VarArgsOffset - What is the offset to the first vaarg
-  int VarArgsOffset;
-  /// VarArgsBase - What is the base FrameIndex
-  int VarArgsBase;
-
-public:
-  AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0),
-                               VarArgsOffset(0), VarArgsBase(0) {}
-
-  explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
-                                                           GlobalRetAddr(0),
-                                                           VarArgsOffset(0),
-                                                           VarArgsBase(0) {}
-
-  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
-  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
-
-  unsigned getGlobalRetAddr() const { return GlobalRetAddr; }
-  void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; }
-
-  int getVarArgsOffset() const { return VarArgsOffset; }
-  void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; }
-
-  int getVarArgsBase() const { return VarArgsBase; }
-  void setVarArgsBase(int Base) { VarArgsBase = Base; }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
deleted file mode 100644
index 8b6230fa2a24..000000000000
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ /dev/null
@@ -1,199 +0,0 @@
-//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "reginfo"
-#include "Alpha.h"
-#include "AlphaRegisterInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include <cstdlib>
-
-#define GET_REGINFO_TARGET_DESC
-#include "AlphaGenRegisterInfo.inc"
-
-using namespace llvm;
-
-AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
-  : AlphaGenRegisterInfo(Alpha::R26), TII(tii) {
-}
-
-static long getUpper16(long l) {
-  long y = l / Alpha::IMM_MULT;
-  if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
-    ++y;
-  return y;
-}
-
-static long getLower16(long l) {
-  long h = getUpper16(l);
-  return l - h * Alpha::IMM_MULT;
-}
-
-const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
-                                                                         const {
-  static const unsigned CalleeSavedRegs[] = {
-    Alpha::R9, Alpha::R10,
-    Alpha::R11, Alpha::R12,
-    Alpha::R13, Alpha::R14,
-    Alpha::F2, Alpha::F3,
-    Alpha::F4, Alpha::F5,
-    Alpha::F6, Alpha::F7,
-    Alpha::F8, Alpha::F9,  0
-  };
-  return CalleeSavedRegs;
-}
-
-BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-  BitVector Reserved(getNumRegs());
-  Reserved.set(Alpha::R15);
-  Reserved.set(Alpha::R29);
-  Reserved.set(Alpha::R30);
-  Reserved.set(Alpha::R31);
-  return Reserved;
-}
-
-//===----------------------------------------------------------------------===//
-// Stack Frame Processing methods
-//===----------------------------------------------------------------------===//
-
-void AlphaRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  if (TFI->hasFP(MF)) {
-    // If we have a frame pointer, turn the adjcallstackup instruction into a
-    // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
-    // <amt>'
-    MachineInstr *Old = I;
-    uint64_t Amount = Old->getOperand(0).getImm();
-    if (Amount != 0) {
-      // We need to keep the stack aligned properly.  To do this, we round the
-      // amount of space needed for the outgoing arguments up to the next
-      // alignment boundary.
-      unsigned Align = TFI->getStackAlignment();
-      Amount = (Amount+Align-1)/Align*Align;
-
-      MachineInstr *New;
-      if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) {
-        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
-          .addImm(-Amount).addReg(Alpha::R30);
-      } else {
-         assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP);
-         New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
-          .addImm(Amount).addReg(Alpha::R30);
-      }
-
-      // Replace the pseudo instruction with a new instruction...
-      MBB.insert(I, New);
-    }
-  }
-
-  MBB.erase(I);
-}
-
-//Alpha has a slightly funny stack:
-//Args
-//<- incoming SP
-//fixed locals (and spills, callee saved, etc)
-//<- FP
-//variable locals
-//<- SP
-
-void
-AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                       int SPAdj, RegScavenger *RS) const {
-  assert(SPAdj == 0 && "Unexpected");
-
-  unsigned i = 0;
-  MachineInstr &MI = *II;
-  MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction &MF = *MBB.getParent();
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  bool FP = TFI->hasFP(MF);
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  int FrameIndex = MI.getOperand(i).getIndex();
-
-  // Add the base register of R30 (SP) or R15 (FP).
-  MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false);
-
-  // Now add the frame object offset to the offset from the virtual frame index.
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
-
-  DEBUG(errs() << "FI: " << FrameIndex << " Offset: " << Offset << "\n");
-
-  Offset += MF.getFrameInfo()->getStackSize();
-
-  DEBUG(errs() << "Corrected Offset " << Offset
-       << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n");
-
-  if (Offset > Alpha::IMM_HIGH || Offset < Alpha::IMM_LOW) {
-    DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
-          << Offset << "\n");
-    //so in this case, we need to use a temporary register, and move the
-    //original inst off the SP/FP
-    //fix up the old:
-    MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false);
-    MI.getOperand(i).ChangeToImmediate(getLower16(Offset));
-    //insert the new
-    MachineInstr* nMI=BuildMI(MF, MI.getDebugLoc(),
-                              TII.get(Alpha::LDAH), Alpha::R28)
-      .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30);
-    MBB.insert(II, nMI);
-  } else {
-    MI.getOperand(i).ChangeToImmediate(Offset);
-  }
-}
-
-unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  return TFI->hasFP(MF) ? Alpha::R15 : Alpha::R30;
-}
-
-unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
-  llvm_unreachable("What is the exception register");
-  return 0;
-}
-
-unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
-  llvm_unreachable("What is the exception handler register");
-  return 0;
-}
-
-std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
-{
-  std::string s(AlphaRegDesc[reg].Name);
-  return s;
-}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
deleted file mode 100644
index e35be273c7cd..000000000000
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Alpha implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHAREGISTERINFO_H
-#define ALPHAREGISTERINFO_H
-
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#include "AlphaGenRegisterInfo.inc"
-
-namespace llvm {
-
-class TargetInstrInfo;
-class Type;
-
-struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
-  const TargetInstrInfo &TII;
-
-  AlphaRegisterInfo(const TargetInstrInfo &tii);
-
-  /// Code Generation virtual methods...
-  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
-  BitVector getReservedRegs(const MachineFunction &MF) const;
-
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
-
-  // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const;
-
-  // Exception handling queries.
-  unsigned getEHExceptionRegister() const;
-  unsigned getEHHandlerRegister() const;
-
-  static std::string getPrettyName(unsigned reg);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.td b/lib/Target/Alpha/AlphaRegisterInfo.td
deleted file mode 100644
index 32120d750413..000000000000
--- a/lib/Target/Alpha/AlphaRegisterInfo.td
+++ /dev/null
@@ -1,133 +0,0 @@
-//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Alpha register set.
-//
-//===----------------------------------------------------------------------===//
-
-class AlphaReg<string n> : Register<n> {
-  field bits<5> Num;
-  let Namespace = "Alpha";
-}
-
-// We identify all our registers with a 5-bit ID, for consistency's sake.
-
-// GPR - One of the 32 32-bit general-purpose registers
-class GPR<bits<5> num, string n> : AlphaReg<n> {
-  let Num = num;
-}
-
-// FPR - One of the 32 64-bit floating-point registers
-class FPR<bits<5> num, string n> : AlphaReg<n> {
-  let Num = num;
-}
-
-//#define FP    $15
-//#define RA    $26
-//#define PV    $27
-//#define GP    $29
-//#define SP    $30
-
-// General-purpose registers
-def R0  : GPR< 0,  "$0">, DwarfRegNum<[0]>;
-def R1  : GPR< 1,  "$1">, DwarfRegNum<[1]>;
-def R2  : GPR< 2,  "$2">, DwarfRegNum<[2]>;
-def R3  : GPR< 3,  "$3">, DwarfRegNum<[3]>;
-def R4  : GPR< 4,  "$4">, DwarfRegNum<[4]>;
-def R5  : GPR< 5,  "$5">, DwarfRegNum<[5]>;
-def R6  : GPR< 6,  "$6">, DwarfRegNum<[6]>;
-def R7  : GPR< 7,  "$7">, DwarfRegNum<[7]>;
-def R8  : GPR< 8,  "$8">, DwarfRegNum<[8]>;
-def R9  : GPR< 9,  "$9">, DwarfRegNum<[9]>;
-def R10 : GPR<10, "$10">, DwarfRegNum<[10]>;
-def R11 : GPR<11, "$11">, DwarfRegNum<[11]>;
-def R12 : GPR<12, "$12">, DwarfRegNum<[12]>;
-def R13 : GPR<13, "$13">, DwarfRegNum<[13]>;
-def R14 : GPR<14, "$14">, DwarfRegNum<[14]>;
-def R15 : GPR<15, "$15">, DwarfRegNum<[15]>;
-def R16 : GPR<16, "$16">, DwarfRegNum<[16]>;
-def R17 : GPR<17, "$17">, DwarfRegNum<[17]>;
-def R18 : GPR<18, "$18">, DwarfRegNum<[18]>;
-def R19 : GPR<19, "$19">, DwarfRegNum<[19]>;
-def R20 : GPR<20, "$20">, DwarfRegNum<[20]>;
-def R21 : GPR<21, "$21">, DwarfRegNum<[21]>;
-def R22 : GPR<22, "$22">, DwarfRegNum<[22]>;
-def R23 : GPR<23, "$23">, DwarfRegNum<[23]>;
-def R24 : GPR<24, "$24">, DwarfRegNum<[24]>;
-def R25 : GPR<25, "$25">, DwarfRegNum<[25]>;
-def R26 : GPR<26, "$26">, DwarfRegNum<[26]>;
-def R27 : GPR<27, "$27">, DwarfRegNum<[27]>;
-def R28 : GPR<28, "$28">, DwarfRegNum<[28]>;
-def R29 : GPR<29, "$29">, DwarfRegNum<[29]>;
-def R30 : GPR<30, "$30">, DwarfRegNum<[30]>;
-def R31 : GPR<31, "$31">, DwarfRegNum<[31]>;
-
-// Floating-point registers
-def F0  : FPR< 0,  "$f0">, DwarfRegNum<[33]>;
-def F1  : FPR< 1,  "$f1">, DwarfRegNum<[34]>;
-def F2  : FPR< 2,  "$f2">, DwarfRegNum<[35]>;
-def F3  : FPR< 3,  "$f3">, DwarfRegNum<[36]>;
-def F4  : FPR< 4,  "$f4">, DwarfRegNum<[37]>;
-def F5  : FPR< 5,  "$f5">, DwarfRegNum<[38]>;
-def F6  : FPR< 6,  "$f6">, DwarfRegNum<[39]>;
-def F7  : FPR< 7,  "$f7">, DwarfRegNum<[40]>;
-def F8  : FPR< 8,  "$f8">, DwarfRegNum<[41]>;
-def F9  : FPR< 9,  "$f9">, DwarfRegNum<[42]>;
-def F10 : FPR<10, "$f10">, DwarfRegNum<[43]>;
-def F11 : FPR<11, "$f11">, DwarfRegNum<[44]>;
-def F12 : FPR<12, "$f12">, DwarfRegNum<[45]>;
-def F13 : FPR<13, "$f13">, DwarfRegNum<[46]>;
-def F14 : FPR<14, "$f14">, DwarfRegNum<[47]>;
-def F15 : FPR<15, "$f15">, DwarfRegNum<[48]>;
-def F16 : FPR<16, "$f16">, DwarfRegNum<[49]>;
-def F17 : FPR<17, "$f17">, DwarfRegNum<[50]>;
-def F18 : FPR<18, "$f18">, DwarfRegNum<[51]>;
-def F19 : FPR<19, "$f19">, DwarfRegNum<[52]>;
-def F20 : FPR<20, "$f20">, DwarfRegNum<[53]>;
-def F21 : FPR<21, "$f21">, DwarfRegNum<[54]>;
-def F22 : FPR<22, "$f22">, DwarfRegNum<[55]>;
-def F23 : FPR<23, "$f23">, DwarfRegNum<[56]>;
-def F24 : FPR<24, "$f24">, DwarfRegNum<[57]>;
-def F25 : FPR<25, "$f25">, DwarfRegNum<[58]>;
-def F26 : FPR<26, "$f26">, DwarfRegNum<[59]>;
-def F27 : FPR<27, "$f27">, DwarfRegNum<[60]>;
-def F28 : FPR<28, "$f28">, DwarfRegNum<[61]>;
-def F29 : FPR<29, "$f29">, DwarfRegNum<[62]>;
-def F30 : FPR<30, "$f30">, DwarfRegNum<[63]>;
-def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>;
-
-  // //#define FP    $15
-  // //#define RA    $26
-  // //#define PV    $27
-  // //#define GP    $29
-  // //#define SP    $30
-  // $28 is undefined after any and all calls
-
-/// Register classes
-def GPRC : RegisterClass<"Alpha", [i64], 64, (add
-     // Volatile
-     R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
-     R23, R24, R25, R28,
-     //Special meaning, but volatile
-     R27, //procedure address
-     R26, //return address
-     R29, //global offset table address
-     // Non-volatile
-     R9, R10, R11, R12, R13, R14,
-// Don't allocate 15, 30, 31
-     R15, R30, R31)>; //zero
-
-def F4RC : RegisterClass<"Alpha", [f32], 64, (add F0, F1,
-        F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
-        F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
-        // Saved:
-        F2, F3, F4, F5, F6, F7, F8, F9,
-        F31)>; //zero
-
-def F8RC : RegisterClass<"Alpha", [f64], 64, (add F4RC)>;
diff --git a/lib/Target/Alpha/AlphaRelocations.h b/lib/Target/Alpha/AlphaRelocations.h
deleted file mode 100644
index 4c92045d4696..000000000000
--- a/lib/Target/Alpha/AlphaRelocations.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Alpha target-specific relocation types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHARELOCATIONS_H
-#define ALPHARELOCATIONS_H
-
-#include "llvm/CodeGen/MachineRelocation.h"
-
-namespace llvm {
-  namespace Alpha {
-    enum RelocationType {
-      reloc_literal,
-      reloc_gprellow,
-      reloc_gprelhigh,
-      reloc_gpdist,
-      reloc_bsr
-    };
-  }
-}
-
-#endif
diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td
deleted file mode 100644
index 3703dd4fa9f6..000000000000
--- a/lib/Target/Alpha/AlphaSchedule.td
+++ /dev/null
@@ -1,85 +0,0 @@
-//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-
-//This is table 2-2 from the 21264 compiler writers guide
-//modified some
-
-//Pipelines
-
-def L0   : FuncUnit;
-def L1   : FuncUnit;
-def FST0 : FuncUnit;
-def FST1 : FuncUnit;
-def U0   : FuncUnit;
-def U1   : FuncUnit;
-def FA   : FuncUnit;
-def FM   : FuncUnit;
-
-def s_ild   : InstrItinClass;
-def s_fld   : InstrItinClass;
-def s_ist   : InstrItinClass;
-def s_fst   : InstrItinClass;
-def s_lda   : InstrItinClass;
-def s_rpcc  : InstrItinClass;
-def s_rx    : InstrItinClass;
-def s_mxpr  : InstrItinClass;
-def s_icbr  : InstrItinClass;
-def s_ubr   : InstrItinClass;
-def s_jsr   : InstrItinClass;
-def s_iadd  : InstrItinClass;
-def s_ilog  : InstrItinClass;
-def s_ishf  : InstrItinClass;
-def s_cmov  : InstrItinClass;
-def s_imul  : InstrItinClass;
-def s_imisc : InstrItinClass;
-def s_fbr   : InstrItinClass;
-def s_fadd  : InstrItinClass;
-def s_fmul  : InstrItinClass;
-def s_fcmov : InstrItinClass;
-def s_fdivt : InstrItinClass;
-def s_fdivs : InstrItinClass;
-def s_fsqrts: InstrItinClass;
-def s_fsqrtt: InstrItinClass;
-def s_ftoi  : InstrItinClass;
-def s_itof  : InstrItinClass;
-def s_pseudo : InstrItinClass;
-
-//Table 2-4 Instruction Class Latency in Cycles
-//modified some
-
-def Alpha21264Itineraries : ProcessorItineraries<
-  [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [
-  InstrItinData<s_ild    , [InstrStage<3, [L0, L1]>]>,
-  InstrItinData<s_fld    , [InstrStage<4, [L0, L1]>]>,
-  InstrItinData<s_ist    , [InstrStage<0, [L0, L1]>]>,
-  InstrItinData<s_fst    , [InstrStage<0, [FST0, FST1, L0, L1]>]>,
-  InstrItinData<s_lda    , [InstrStage<1, [L0, L1, U0, U1]>]>,
-  InstrItinData<s_rpcc   , [InstrStage<1, [L1]>]>,
-  InstrItinData<s_rx     , [InstrStage<1, [L1]>]>,
-  InstrItinData<s_mxpr   , [InstrStage<1, [L0, L1]>]>,
-  InstrItinData<s_icbr   , [InstrStage<0, [U0, U1]>]>,
-  InstrItinData<s_ubr    , [InstrStage<3, [U0, U1]>]>,
-  InstrItinData<s_jsr    , [InstrStage<3, [L0]>]>,
-  InstrItinData<s_iadd   , [InstrStage<1, [L0, U0, L1, U1]>]>,
-  InstrItinData<s_ilog   , [InstrStage<1, [L0, U0, L1, U1]>]>,
-  InstrItinData<s_ishf   , [InstrStage<1, [U0, U1]>]>,
-  InstrItinData<s_cmov   , [InstrStage<1, [L0, U0, L1, U1]>]>,
-  InstrItinData<s_imul   , [InstrStage<7, [U1]>]>,
-  InstrItinData<s_imisc  , [InstrStage<3, [U0]>]>,
-  InstrItinData<s_fbr    , [InstrStage<0, [FA]>]>,
-  InstrItinData<s_fadd   , [InstrStage<6, [FA]>]>,
-  InstrItinData<s_fmul   , [InstrStage<6, [FM]>]>,
-  InstrItinData<s_fcmov  , [InstrStage<6, [FA]>]>,
-  InstrItinData<s_fdivs  , [InstrStage<12, [FA]>]>,
-  InstrItinData<s_fdivt  , [InstrStage<15, [FA]>]>,
-  InstrItinData<s_fsqrts , [InstrStage<18, [FA]>]>,
-  InstrItinData<s_fsqrtt , [InstrStage<33, [FA]>]>,
-  InstrItinData<s_ftoi   , [InstrStage<3, [FST0, FST1, L0, L1]>]>,
-  InstrItinData<s_itof   , [InstrStage<4, [L0, L1]>]>
-]>;
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
deleted file mode 100644
index f1958fe6b5ad..000000000000
--- a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the AlphaSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "alpha-selectiondag-info"
-#include "AlphaTargetMachine.h"
-using namespace llvm;
-
-AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM)
-  : TargetSelectionDAGInfo(TM) {
-}
-
-AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
-}
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
deleted file mode 100644
index 3405cc0cdedd..000000000000
--- a/lib/Target/Alpha/AlphaSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Alpha subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHASELECTIONDAGINFO_H
-#define ALPHASELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class AlphaTargetMachine;
-
-class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
-  explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM);
-  ~AlphaSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp
deleted file mode 100644
index bd55ce9e315a..000000000000
--- a/lib/Target/Alpha/AlphaSubtarget.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Alpha specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AlphaSubtarget.h"
-#include "Alpha.h"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "AlphaGenSubtargetInfo.inc"
-
-using namespace llvm;
-
-AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &CPU,
-                               const std::string &FS)
-  : AlphaGenSubtargetInfo(TT, CPU, FS), HasCT(false) {
-  std::string CPUName = CPU;
-  if (CPUName.empty())
-    CPUName = "generic";
-
-  // Parse features string.
-  ParseSubtargetFeatures(CPUName, FS);
-
-  // Initialize scheduling itinerary for the specified CPU.
-  InstrItins = getInstrItineraryForCPU(CPUName);
-}
diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h
deleted file mode 100644
index 70b311683f8b..000000000000
--- a/lib/Target/Alpha/AlphaSubtarget.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Alpha specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHASUBTARGET_H
-#define ALPHASUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "AlphaGenSubtargetInfo.inc"
-
-namespace llvm {
-class StringRe;
-
-class AlphaSubtarget : public AlphaGenSubtargetInfo {
-protected:
-
-  bool HasCT;
-
-  InstrItineraryData InstrItins;
-
-public:
-  /// This constructor initializes the data members to match that
-  /// of the specified triple.
-  ///
-  AlphaSubtarget(const std::string &TT, const std::string &CPU,
-                 const std::string &FS);
-  
-  /// ParseSubtargetFeatures - Parses features string setting specified 
-  /// subtarget options.  Definition of function is auto generated by tblgen.
-  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
-  bool hasCT() const { return HasCT; }
-};
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
deleted file mode 100644
index fc9a6771a900..000000000000
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#include "Alpha.h"
-#include "AlphaTargetMachine.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-extern "C" void LLVMInitializeAlphaTarget() { 
-  // Register the target.
-  RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
-}
-
-AlphaTargetMachine::AlphaTargetMachine(const Target &T, StringRef TT,
-                                       StringRef CPU, StringRef FS,
-                                       Reloc::Model RM, CodeModel::Model CM)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
-    DataLayout("e-f128:128:128-n64"),
-    FrameLowering(Subtarget),
-    Subtarget(TT, CPU, FS),
-    TLInfo(*this),
-    TSInfo(*this) {
-}
-
-//===----------------------------------------------------------------------===//
-// Pass Pipeline Configuration
-//===----------------------------------------------------------------------===//
-
-bool AlphaTargetMachine::addInstSelector(PassManagerBase &PM,
-                                         CodeGenOpt::Level OptLevel) {
-  PM.add(createAlphaISelDag(*this));
-  return false;
-}
-bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
-                                        CodeGenOpt::Level OptLevel) {
-  // Must run branch selection immediately preceding the asm printer
-  PM.add(createAlphaBranchSelectionPass());
-  PM.add(createAlphaLLRPPass(*this));
-  return false;
-}
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
deleted file mode 100644
index 48bb948a7945..000000000000
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ /dev/null
@@ -1,66 +0,0 @@
-//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Alpha-specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHA_TARGETMACHINE_H
-#define ALPHA_TARGETMACHINE_H
-
-#include "AlphaInstrInfo.h"
-#include "AlphaISelLowering.h"
-#include "AlphaFrameLowering.h"
-#include "AlphaSelectionDAGInfo.h"
-#include "AlphaSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
-
-class GlobalValue;
-
-class AlphaTargetMachine : public LLVMTargetMachine {
-  const TargetData DataLayout;       // Calculates type size & alignment
-  AlphaInstrInfo InstrInfo;
-  AlphaFrameLowering FrameLowering;
-  AlphaSubtarget Subtarget;
-  AlphaTargetLowering TLInfo;
-  AlphaSelectionDAGInfo TSInfo;
-
-public:
-  AlphaTargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
-                     Reloc::Model RM, CodeModel::Model CM);
-
-  virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
-  virtual const TargetFrameLowering  *getFrameLowering() const {
-    return &FrameLowering;
-  }
-  virtual const AlphaSubtarget   *getSubtargetImpl() const{ return &Subtarget; }
-  virtual const AlphaRegisterInfo *getRegisterInfo() const {
-    return &InstrInfo.getRegisterInfo();
-  }
-  virtual const AlphaTargetLowering* getTargetLowering() const {
-    return &TLInfo;
-  }
-  virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const {
-    return &TSInfo;
-  }
-  virtual const TargetData       *getTargetData() const { return &DataLayout; }
-
-  // Pass Pipeline Configuration
-  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
deleted file mode 100644
index a6d551618b9f..000000000000
--- a/lib/Target/Alpha/CMakeLists.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS Alpha.td)
-
-llvm_tablegen(AlphaGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(AlphaGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(AlphaGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(AlphaGenSubtargetInfo.inc -gen-subtarget)
-add_public_tablegen_target(AlphaCommonTableGen)
-
-add_llvm_target(AlphaCodeGen
-  AlphaAsmPrinter.cpp
-  AlphaBranchSelector.cpp
-  AlphaInstrInfo.cpp
-  AlphaISelDAGToDAG.cpp
-  AlphaISelLowering.cpp
-  AlphaFrameLowering.cpp
-  AlphaLLRP.cpp
-  AlphaRegisterInfo.cpp
-  AlphaSubtarget.cpp
-  AlphaTargetMachine.cpp
-  AlphaSelectionDAGInfo.cpp
-  )
-
-add_llvm_library_dependencies(LLVMAlphaCodeGen
-  LLVMAlphaDesc
-  LLVMAlphaInfo
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
-add_subdirectory(TargetInfo)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
deleted file mode 100644
index a35e8846e072..000000000000
--- a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- AlphaMCAsmInfo.cpp - Alpha asm properties ---------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the AlphaMCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AlphaMCAsmInfo.h"
-using namespace llvm;
-
-AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) {
-  AlignmentIsInBytes = false;
-  PrivateGlobalPrefix = "$";
-  GPRel32Directive = ".gprel32";
-  WeakRefDirective = "\t.weak\t";
-  HasSetDirective = false;
-}
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
deleted file mode 100644
index 837844bd29a9..000000000000
--- a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//=====-- AlphaMCAsmInfo.h - Alpha asm properties -------------*- C++ -*--====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the AlphaMCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHATARGETASMINFO_H
-#define ALPHATARGETASMINFO_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCAsmInfo.h"
-
-namespace llvm {
-  class Target;
-
-  struct AlphaMCAsmInfo : public MCAsmInfo {
-    explicit AlphaMCAsmInfo(const Target &T, StringRef TT);
-  };
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
deleted file mode 100644
index 4ad021ca6761..000000000000
--- a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-//===-- AlphaMCTargetDesc.cpp - Alpha Target Descriptions -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Alpha specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AlphaMCTargetDesc.h"
-#include "AlphaMCAsmInfo.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "AlphaGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "AlphaGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "AlphaGenRegisterInfo.inc"
-
-using namespace llvm;
-
-
-static MCInstrInfo *createAlphaMCInstrInfo() {
-  MCInstrInfo *X = new MCInstrInfo();
-  InitAlphaMCInstrInfo(X);
-  return X;
-}
-
-static MCRegisterInfo *createAlphaMCRegisterInfo(StringRef TT) {
-  MCRegisterInfo *X = new MCRegisterInfo();
-  InitAlphaMCRegisterInfo(X, Alpha::R26);
-  return X;
-}
-
-static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
-                                                   StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitAlphaMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
-}
-
-static MCCodeGenInfo *createAlphaMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                               CodeModel::Model CM) {
-  MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(Reloc::PIC_, CM);
-  return X;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeAlphaTargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget);
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(TheAlphaTarget,
-                                        createAlphaMCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheAlphaTarget, createAlphaMCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
-                                          createAlphaMCSubtargetInfo);
-}
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
deleted file mode 100644
index b0619e6cb011..000000000000
--- a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Alpha specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ALPHAMCTARGETDESC_H
-#define ALPHAMCTARGETDESC_H
-
-namespace llvm {
-class MCSubtargetInfo;
-class Target;
-class StringRef;
-
-extern Target TheAlphaTarget;
-
-} // End llvm namespace
-
-// Defines symbolic names for Alpha registers.  This defines a mapping from
-// register name to register number.
-//
-#define GET_REGINFO_ENUM
-#include "AlphaGenRegisterInfo.inc"
-
-// Defines symbolic names for the Alpha instructions.
-//
-#define GET_INSTRINFO_ENUM
-#include "AlphaGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "AlphaGenSubtargetInfo.inc"
-
-#endif
diff --git a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index f745ecbdb67f..000000000000
--- a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-add_llvm_library(LLVMAlphaDesc
-  AlphaMCTargetDesc.cpp
-  AlphaMCAsmInfo.cpp
-  )
-
-add_llvm_library_dependencies(LLVMAlphaDesc
-  LLVMAlphaInfo
-  LLVMMC
-  )
-
-add_dependencies(LLVMAlphaDesc AlphaCommonTableGen)
diff --git a/lib/Target/Alpha/MCTargetDesc/Makefile b/lib/Target/Alpha/MCTargetDesc/Makefile
deleted file mode 100644
index d55175fa69dc..000000000000
--- a/lib/Target/Alpha/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMAlphaDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile
deleted file mode 100644
index f48847a0627d..000000000000
--- a/lib/Target/Alpha/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-##===- lib/Target/Alpha/Makefile -------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMAlphaCodeGen
-TARGET = Alpha
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = AlphaGenRegisterInfo.inc AlphaGenInstrInfo.inc \
-                AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \
-                AlphaGenCallingConv.inc AlphaGenSubtargetInfo.inc
-
-DIRS = TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt
deleted file mode 100644
index cc170e313030..000000000000
--- a/lib/Target/Alpha/README.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-***
-
-add gcc builtins for alpha instructions
-
-
-***
-
-custom expand byteswap into nifty 
-extract/insert/mask byte/word/longword/quadword low/high
-sequences
-
-***
-
-see if any of the extract/insert/mask operations can be added
-
-***
-
-match more interesting things for cmovlbc cmovlbs (move if low bit clear/set)
-
-***
-
-lower srem and urem
-
-remq(i,j):  i - (j * divq(i,j)) if j != 0
-remqu(i,j): i - (j * divqu(i,j)) if j != 0
-reml(i,j):  i - (j * divl(i,j)) if j != 0
-remlu(i,j): i - (j * divlu(i,j)) if j != 0
-
-***
-
-add crazy vector instructions (MVI):
-
-(MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word
-PKWB, UNPKBW pack/unpack word to byte
-PKLB UNPKBL pack/unpack long to byte
-PERR pixel error (sum across bytes of bytewise abs(i8v8 a - i8v8 b))
-
-cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extensions)
-
-this has some good examples for other operations that can be synthesised well 
-from these rather meager vector ops (such as saturating add).
-http://www.alphalinux.org/docs/MVI-full.html
diff --git a/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
deleted file mode 100644
index bdc69e788bcc..000000000000
--- a/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===-- AlphaTargetInfo.cpp - Alpha Target Implementation -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Alpha.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-llvm::Target llvm::TheAlphaTarget;
-
-extern "C" void LLVMInitializeAlphaTargetInfo() { 
-  RegisterTarget<Triple::alpha, /*HasJIT=*/true>
-    X(TheAlphaTarget, "alpha", "Alpha [experimental]");
-}
diff --git a/lib/Target/Alpha/TargetInfo/CMakeLists.txt b/lib/Target/Alpha/TargetInfo/CMakeLists.txt
deleted file mode 100644
index cac3178b789d..000000000000
--- a/lib/Target/Alpha/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMAlphaInfo
-  AlphaTargetInfo.cpp
-  )
-
-add_llvm_library_dependencies(LLVMAlphaInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
-add_dependencies(LLVMAlphaInfo AlphaCommonTableGen)
diff --git a/lib/Target/Alpha/TargetInfo/Makefile b/lib/Target/Alpha/TargetInfo/Makefile
deleted file mode 100644
index de01d7f8e8ef..000000000000
--- a/lib/Target/Alpha/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/Alpha/TargetInfo/Makefile ----------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMAlphaInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/Blackfin.h b/lib/Target/Blackfin/Blackfin.h
deleted file mode 100644
index a00ff4cc3275..000000000000
--- a/lib/Target/Blackfin/Blackfin.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//=== Blackfin.h - Top-level interface for Blackfin backend -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// Blackfin back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TARGET_BLACKFIN_H
-#define TARGET_BLACKFIN_H
-
-#include "MCTargetDesc/BlackfinMCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
-  class FunctionPass;
-  class BlackfinTargetMachine;
-
-  FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM,
-                                      CodeGenOpt::Level OptLevel);
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/Blackfin.td b/lib/Target/Blackfin/Blackfin.td
deleted file mode 100644
index cd90962a9540..000000000000
--- a/lib/Target/Blackfin/Blackfin.td
+++ /dev/null
@@ -1,202 +0,0 @@
-//===- Blackfin.td - Describe the Blackfin Target Machine --*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces which we are implementing
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-//===----------------------------------------------------------------------===//
-// Blackfin Subtarget features.
-//===----------------------------------------------------------------------===//
-
-def FeatureSDRAM : SubtargetFeature<"sdram", "sdram", "true",
-    "Build for SDRAM">;
-
-def FeatureICPLB : SubtargetFeature<"icplb", "icplb", "true",
-    "Assume instruction cache lookaside buffers are enabled at runtime">;
-
-//===----------------------------------------------------------------------===//
-// Bugs in the silicon becomes workarounds in the compiler.
-// See http://www.analog.com/ for the full list of IC anomalies.
-//===----------------------------------------------------------------------===//
-
-def WA_MI_SHIFT : SubtargetFeature<"mi-shift-anomaly","wa_mi_shift", "true",
-    "Work around 05000074 - "
-    "Multi-Issue Instruction with dsp32shiftimm and P-reg Store">;
-
-def WA_CSYNC : SubtargetFeature<"csync-anomaly","wa_csync", "true",
-    "Work around 05000244 - "
-    "If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control">;
-
-def WA_SPECLD : SubtargetFeature<"specld-anomaly","wa_specld", "true",
-    "Work around 05000245 - "
-    "Access in the Shadow of a Conditional Branch">;
-
-def WA_HWLOOP : SubtargetFeature<"hwloop-anomaly","wa_hwloop", "true",
-    "Work around 05000257 - "
-    "Interrupt/Exception During Short Hardware Loop">;
-
-def WA_MMR_STALL : SubtargetFeature<"mmr-stall-anomaly","wa_mmr_stall", "true",
-    "Work around 05000283 - "
-    "System MMR Write Is Stalled Indefinitely when Killed">;
-
-def WA_LCREGS : SubtargetFeature<"lcregs-anomaly","wa_lcregs", "true",
-    "Work around 05000312 - "
-    "SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted">;
-
-def WA_KILLED_MMR : SubtargetFeature<"killed-mmr-anomaly",
-                                     "wa_killed_mmr", "true",
-    "Work around 05000315 - "
-    "Killed System MMR Write Completes Erroneously on Next System MMR Access">;
-
-def WA_RETS : SubtargetFeature<"rets-anomaly", "wa_rets", "true",
-    "Work around 05000371 - "
-    "Possible RETS Register Corruption when Subroutine Is under 5 Cycles">;
-
-def WA_IND_CALL : SubtargetFeature<"ind-call-anomaly", "wa_ind_call", "true",
-    "Work around 05000426 - "
-    "Speculative Fetches of Indirect-Pointer Instructions">;
-
-//===----------------------------------------------------------------------===//
-// Register File, Calling Conv, Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "BlackfinRegisterInfo.td"
-include "BlackfinCallingConv.td"
-include "BlackfinIntrinsics.td"
-include "BlackfinInstrInfo.td"
-
-def BlackfinInstrInfo : InstrInfo {}
-
-//===----------------------------------------------------------------------===//
-// Blackfin processors supported.
-//===----------------------------------------------------------------------===//
-
-class Proc<string Name, string Suffix, list<SubtargetFeature> Features>
- : Processor<!strconcat(Name, Suffix), NoItineraries, Features>;
-
-def : Proc<"generic", "", []>;
-
-multiclass Core<string Name,string Suffix,
-                list<SubtargetFeature> Features> {
-  def : Proc<Name, Suffix, Features>;
-  def : Proc<Name, "", Features>;
-  def : Proc<Name, "-none", []>;
-}
-
-multiclass CoreEdinburgh<string Name>
-      : Core<Name, "-0.6", [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS]> {
-  def : Proc<Name, "-0.5",
-        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
-         WA_RETS]>;
-  def : Proc<Name, "-0.4",
-        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
-         WA_KILLED_MMR, WA_RETS]>;
-  def : Proc<Name, "-0.3",
-        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
-         WA_KILLED_MMR, WA_RETS]>;
-  def : Proc<Name, "-any",
-        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
-         WA_KILLED_MMR, WA_RETS]>;
-}
-multiclass CoreBraemar<string Name>
-       : Core<Name, "-0.3",
-         [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]> {
-  def  : Proc<Name, "-0.2",
-         [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
-          WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
-  def  : Proc<Name, "-any",
-         [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
-          WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
-}
-multiclass CoreStirling<string Name>
-      : Core<Name, "-0.5", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
-  def : Proc<Name, "-0.4",
-        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
-  def : Proc<Name, "-0.3",
-        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
-         WA_RETS, WA_IND_CALL]>;
-  def : Proc<Name, "-any",
-        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
-         WA_RETS, WA_IND_CALL]>;
-}
-multiclass CoreMoab<string Name>
-      : Core<Name, "-0.3", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
-  def : Proc<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
-  def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
-  def : Proc<Name, "-0.0",
-        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
-  def : Proc<Name, "-any",
-        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
-}
-multiclass CoreTeton<string Name>
-      : Core<Name, "-0.5",
-        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
-         WA_RETS, WA_IND_CALL]> {
-  def : Proc<Name, "-0.3",
-        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
-         WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
-  def : Proc<Name, "-any",
-        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
-         WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
-}
-multiclass CoreKookaburra<string Name>
-      : Core<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
-  def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
-  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
-  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
-}
-multiclass CoreMockingbird<string Name>
-      : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
-  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
-  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
-}
-multiclass CoreBrodie<string Name>
-      : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
-  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
-  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
-}
-
-defm BF512 : CoreBrodie<"bf512">;
-defm BF514 : CoreBrodie<"bf514">;
-defm BF516 : CoreBrodie<"bf516">;
-defm BF518 : CoreBrodie<"bf518">;
-defm BF522 : CoreMockingbird<"bf522">;
-defm BF523 : CoreKookaburra<"bf523">;
-defm BF524 : CoreMockingbird<"bf524">;
-defm BF525 : CoreKookaburra<"bf525">;
-defm BF526 : CoreMockingbird<"bf526">;
-defm BF527 : CoreKookaburra<"bf527">;
-defm BF531 : CoreEdinburgh<"bf531">;
-defm BF532 : CoreEdinburgh<"bf532">;
-defm BF533 : CoreEdinburgh<"bf533">;
-defm BF534 : CoreBraemar<"bf534">;
-defm BF536 : CoreBraemar<"bf536">;
-defm BF537 : CoreBraemar<"bf537">;
-defm BF538 : CoreStirling<"bf538">;
-defm BF539 : CoreStirling<"bf539">;
-defm BF542 : CoreMoab<"bf542">;
-defm BF544 : CoreMoab<"bf544">;
-defm BF548 : CoreMoab<"bf548">;
-defm BF549 : CoreMoab<"bf549">;
-defm BF561 : CoreTeton<"bf561">;
-
-//===----------------------------------------------------------------------===//
-// Declare the target which we are implementing
-//===----------------------------------------------------------------------===//
-
-def Blackfin : Target {
-  // Pull in Instruction Info:
-  let InstructionSet = BlackfinInstrInfo;
-}
diff --git a/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
deleted file mode 100644
index ed9844e1bee4..000000000000
--- a/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "Blackfin.h"
-#include "BlackfinInstrInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class BlackfinAsmPrinter : public AsmPrinter {
-  public:
-    BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "Blackfin Assembly Printer";
-    }
-
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd.
-    static const char *getRegisterName(unsigned RegNo);
-
-    void EmitInstruction(const MachineInstr *MI) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
-                               unsigned AsmVariant, const char *ExtraCode,
-                               raw_ostream &O);
-  };
-} // end of anonymous namespace
-
-#include "BlackfinGenAsmWriter.inc"
-
-extern "C" void LLVMInitializeBlackfinAsmPrinter() {
-  RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget);
-}
-
-void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
-                                      raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(opNum);
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register:
-    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
-           "Virtual registers should be already mapped!");
-    O << getRegisterName(MO.getReg());
-    break;
-
-  case MachineOperand::MO_Immediate:
-    O << MO.getImm();
-    break;
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_GlobalAddress:
-    O << *Mang->getSymbol(MO.getGlobal());
-    printOffset(MO.getOffset(), O);
-    break;
-  case MachineOperand::MO_ExternalSymbol:
-    O << *GetExternalSymbolSymbol(MO.getSymbolName());
-    break;
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
-      << MO.getIndex();
-    break;
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
-      << '_' << MO.getIndex();
-    break;
-  default:
-    llvm_unreachable("<unknown operand type>");
-    break;
-  }
-}
-
-void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum,
-                                            raw_ostream &O) {
-  printOperand(MI, opNum, O);
-
-  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
-    return;
-
-  O << " + ";
-  printOperand(MI, opNum+1, O);
-}
-
-/// PrintAsmOperand - Print out an operand for an inline asm expression.
-///
-bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
-                                         unsigned OpNo, unsigned AsmVariant,
-                                         const char *ExtraCode,
-                                         raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0) return true; // Unknown modifier.
-
-    switch (ExtraCode[0]) {
-    default: return true;  // Unknown modifier.
-    case 'r':
-      break;
-    }
-  }
-
-  printOperand(MI, OpNo, O);
-
-  return false;
-}
-
-bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                               unsigned OpNo,
-                                               unsigned AsmVariant,
-                                               const char *ExtraCode,
-                                               raw_ostream &O) {
-  if (ExtraCode && ExtraCode[0])
-    return true;  // Unknown modifier
-
-  O << '[';
-  printOperand(MI, OpNo, O);
-  O << ']';
-
-  return false;
-}
diff --git a/lib/Target/Blackfin/BlackfinCallingConv.td b/lib/Target/Blackfin/BlackfinCallingConv.td
deleted file mode 100644
index 0abc84c3c405..000000000000
--- a/lib/Target/Blackfin/BlackfinCallingConv.td
+++ /dev/null
@@ -1,30 +0,0 @@
-//===--- BlackfinCallingConv.td - Calling Conventions ------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This describes the calling conventions for the Blackfin architectures.
-//
-//===----------------------------------------------------------------------===//
-
-// Blackfin C Calling convention.
-def CC_Blackfin : CallingConv<[
-  CCIfType<[i16], CCPromoteToType<i32>>,
-  CCIfSRet<CCAssignToReg<[P0]>>,
-  CCAssignToReg<[R0, R1, R2]>,
-  CCAssignToStack<4, 4>
-]>;
-
-//===----------------------------------------------------------------------===//
-// Return Value Calling Conventions
-//===----------------------------------------------------------------------===//
-
-// Blackfin C return-value convention.
-def RetCC_Blackfin : CallingConv<[
-  CCIfType<[i16], CCPromoteToType<i32>>,
-  CCAssignToReg<[R0, R1]>
-]>;
diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.cpp b/lib/Target/Blackfin/BlackfinFrameLowering.cpp
deleted file mode 100644
index 0b0984d2f777..000000000000
--- a/lib/Target/Blackfin/BlackfinFrameLowering.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-//====- BlackfinFrameLowering.cpp - Blackfin Frame Information --*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinFrameLowering.h"
-#include "BlackfinInstrInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetOptions.h"
-
-using namespace llvm;
-
-
-// hasFP - Return true if the specified function should have a dedicated frame
-// pointer register.  This is true if the function has variable sized allocas or
-// if frame pointer elimination is disabled.
-bool BlackfinFrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) ||
-    MFI->adjustsStack() || MFI->hasVarSizedObjects();
-}
-
-// Always reserve a call frame. We dont have enough registers to adjust SP.
-bool BlackfinFrameLowering::
-hasReservedCallFrame(const MachineFunction &MF) const {
-  return true;
-}
-
-// Emit a prologue that sets up a stack frame.
-// On function entry, R0-R2 and P0 may hold arguments.
-// R3, P1, and P2 may be used as scratch registers
-void BlackfinFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const BlackfinRegisterInfo *RegInfo =
-    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
-  const BlackfinInstrInfo &TII =
-    *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
-
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  int FrameSize = MFI->getStackSize();
-  if (FrameSize%4) {
-    FrameSize = (FrameSize+3) & ~3;
-    MFI->setStackSize(FrameSize);
-  }
-
-  if (!hasFP(MF)) {
-    assert(!MFI->adjustsStack() &&
-           "FP elimination on a non-leaf function is not supported");
-    RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
-    return;
-  }
-
-  // emit a LINK instruction
-  if (FrameSize <= 0x3ffff) {
-    BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
-    return;
-  }
-
-  // Frame is too big, do a manual LINK:
-  // [--SP] = RETS;
-  // [--SP] = FP;
-  // FP = SP;
-  // P1 = -FrameSize;
-  // SP = SP + P1;
-  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
-    .addReg(BF::RETS, RegState::Kill);
-  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
-    .addReg(BF::FP, RegState::Kill);
-  BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
-    .addReg(BF::SP);
-  RegInfo->loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
-  BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
-    .addReg(BF::SP, RegState::Kill)
-    .addReg(BF::P1, RegState::Kill);
-
-}
-
-void BlackfinFrameLowering::emitEpilogue(MachineFunction &MF,
-                                     MachineBasicBlock &MBB) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const BlackfinRegisterInfo *RegInfo =
-    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
-  const BlackfinInstrInfo &TII =
-    *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  DebugLoc dl = MBBI->getDebugLoc();
-
-  int FrameSize = MFI->getStackSize();
-  assert(FrameSize%4 == 0 && "Misaligned frame size");
-
-  if (!hasFP(MF)) {
-    assert(!MFI->adjustsStack() &&
-           "FP elimination on a non-leaf function is not supported");
-    RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
-    return;
-  }
-
-  // emit an UNLINK instruction
-  BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
-}
-
-void BlackfinFrameLowering::
-processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                     RegScavenger *RS) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const BlackfinRegisterInfo *RegInfo =
-    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
-  const TargetRegisterClass *RC = BF::DPRegisterClass;
-
-  if (RegInfo->requiresRegisterScavenging(MF)) {
-    // Reserve a slot close to SP or frame pointer.
-    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                       RC->getAlignment(),
-                                                       false));
-  }
-}
diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.h b/lib/Target/Blackfin/BlackfinFrameLowering.h
deleted file mode 100644
index 169aa8e3011d..000000000000
--- a/lib/Target/Blackfin/BlackfinFrameLowering.h
+++ /dev/null
@@ -1,47 +0,0 @@
-//=- BlackfinFrameLowering.h - Define frame lowering for Blackfin -*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFIN_FRAMEINFO_H
-#define BLACKFIN_FRAMEINFO_H
-
-#include "Blackfin.h"
-#include "BlackfinSubtarget.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
-  class BlackfinSubtarget;
-
-class BlackfinFrameLowering : public TargetFrameLowering {
-protected:
-  const BlackfinSubtarget &STI;
-
-public:
-  explicit BlackfinFrameLowering(const BlackfinSubtarget &sti)
-    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), STI(sti) {
-  }
-
-  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
-  /// the function.
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
-  bool hasFP(const MachineFunction &MF) const;
-  bool hasReservedCallFrame(const MachineFunction &MF) const;
-
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
deleted file mode 100644
index 215ca43ea338..000000000000
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-//===- BlackfinISelDAGToDAG.cpp - A dag to dag inst selector for Blackfin -===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the Blackfin target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Blackfin.h"
-#include "BlackfinTargetMachine.h"
-#include "BlackfinRegisterInfo.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-/// BlackfinDAGToDAGISel - Blackfin specific code to select blackfin machine
-/// instructions for SelectionDAG operations.
-namespace {
-  class BlackfinDAGToDAGISel : public SelectionDAGISel {
-    /// Subtarget - Keep a pointer to the Blackfin Subtarget around so that we
-    /// can make the right decision when generating code for different targets.
-    //const BlackfinSubtarget &Subtarget;
-  public:
-    BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(TM, OptLevel) {}
-
-    virtual void PostprocessISelDAG();
-
-    virtual const char *getPassName() const {
-      return "Blackfin DAG->DAG Pattern Instruction Selection";
-    }
-
-    // Include the pieces autogenerated from the target description.
-#include "BlackfinGenDAGISel.inc"
-
-  private:
-    SDNode *Select(SDNode *N);
-    bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
-
-    // Walk the DAG after instruction selection, fixing register class issues.
-    void FixRegisterClasses(SelectionDAG &DAG);
-
-    const BlackfinInstrInfo &getInstrInfo() {
-      return *static_cast<const BlackfinTargetMachine&>(TM).getInstrInfo();
-    }
-    const BlackfinRegisterInfo *getRegisterInfo() {
-      return static_cast<const BlackfinTargetMachine&>(TM).getRegisterInfo();
-    }
-  };
-}  // end anonymous namespace
-
-FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM,
-                                          CodeGenOpt::Level OptLevel) {
-  return new BlackfinDAGToDAGISel(TM, OptLevel);
-}
-
-void BlackfinDAGToDAGISel::PostprocessISelDAG() {
-  FixRegisterClasses(*CurDAG);
-}
-
-SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) {
-  if (N->isMachineOpcode())
-    return NULL;   // Already selected.
-
-  switch (N->getOpcode()) {
-  default: break;
-  case ISD::FrameIndex: {
-    // Selects to ADDpp FI, 0 which in turn will become ADDimm7 SP, imm or ADDpp
-    // SP, Px
-    int FI = cast<FrameIndexSDNode>(N)->getIndex();
-    SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
-    return CurDAG->SelectNodeTo(N, BF::ADDpp, MVT::i32, TFI,
-                                CurDAG->getTargetConstant(0, MVT::i32));
-  }
-  }
-
-  return SelectCode(N);
-}
-
-bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr,
-                                          SDValue &Base,
-                                          SDValue &Offset) {
-  FrameIndexSDNode *FIN = 0;
-  if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
-    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-    Offset = CurDAG->getTargetConstant(0, MVT::i32);
-    return true;
-  }
-  if (Addr.getOpcode() == ISD::ADD) {
-    ConstantSDNode *CN = 0;
-    if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) &&
-        (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
-        (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
-      // Constant positive word offset from frame index
-      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
-      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
-      return true;
-    }
-  }
-  return false;
-}
-
-static inline bool isCC(const TargetRegisterClass *RC) {
-  return BF::AnyCCRegClass.hasSubClassEq(RC);
-}
-
-static inline bool isDCC(const TargetRegisterClass *RC) {
-  return BF::DRegClass.hasSubClassEq(RC) || isCC(RC);
-}
-
-static void UpdateNodeOperand(SelectionDAG &DAG,
-                              SDNode *N,
-                              unsigned Num,
-                              SDValue Val) {
-  SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end());
-  ops[Num] = Val;
-  SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size());
-  DAG.ReplaceAllUsesWith(N, New);
-}
-
-// After instruction selection, insert COPY_TO_REGCLASS nodes to help in
-// choosing the proper register classes.
-void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) {
-  const BlackfinInstrInfo &TII = getInstrInfo();
-  const BlackfinRegisterInfo *TRI = getRegisterInfo();
-  DAG.AssignTopologicalOrder();
-  HandleSDNode Dummy(DAG.getRoot());
-
-  for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin();
-       NI != DAG.allnodes_end(); ++NI) {
-    if (NI->use_empty() || !NI->isMachineOpcode())
-      continue;
-    const MCInstrDesc &DefMCID = TII.get(NI->getMachineOpcode());
-    for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) {
-      if (!UI->isMachineOpcode())
-        continue;
-
-      if (UI.getUse().getResNo() >= DefMCID.getNumDefs())
-        continue;
-      const TargetRegisterClass *DefRC =
-        TII.getRegClass(DefMCID, UI.getUse().getResNo(), TRI);
-
-      const MCInstrDesc &UseMCID = TII.get(UI->getMachineOpcode());
-      if (UseMCID.getNumDefs()+UI.getOperandNo() >= UseMCID.getNumOperands())
-        continue;
-      const TargetRegisterClass *UseRC =
-        TII.getRegClass(UseMCID, UseMCID.getNumDefs()+UI.getOperandNo(), TRI);
-      if (!DefRC || !UseRC)
-        continue;
-      // We cannot copy CC <-> !(CC/D)
-      if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) {
-        SDNode *Copy =
-          DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
-                             NI->getDebugLoc(),
-                             MVT::i32,
-                             UI.getUse().get(),
-                             DAG.getTargetConstant(BF::DRegClassID, MVT::i32));
-        UpdateNodeOperand(DAG, *UI, UI.getOperandNo(), SDValue(Copy, 0));
-      }
-    }
-  }
-  DAG.setRoot(Dummy.getValue());
-}
-
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
deleted file mode 100644
index 7d4c45fdf665..000000000000
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ /dev/null
@@ -1,645 +0,0 @@
-//===- BlackfinISelLowering.cpp - Blackfin DAG Lowering Implementation ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the interfaces that Blackfin uses to lower LLVM code
-// into a selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinISelLowering.h"
-#include "BlackfinTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/ADT/VectorExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinGenCallingConv.inc"
-
-//===----------------------------------------------------------------------===//
-// TargetLowering Implementation
-//===----------------------------------------------------------------------===//
-
-BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
-  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
-  setBooleanContents(ZeroOrOneBooleanContent);
-  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
-  setStackPointerRegisterToSaveRestore(BF::SP);
-  setIntDivIsCheap(false);
-
-  // Set up the legal register classes.
-  addRegisterClass(MVT::i32, BF::DRegisterClass);
-  addRegisterClass(MVT::i16, BF::D16RegisterClass);
-
-  computeRegisterProperties();
-
-  // Blackfin doesn't have i1 loads or stores
-  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
-
-  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
-
-  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
-  setOperationAction(ISD::BR_JT,     MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC,     MVT::Other, Expand);
-
-  // i16 registers don't do much
-  setOperationAction(ISD::AND,   MVT::i16, Promote);
-  setOperationAction(ISD::OR,    MVT::i16, Promote);
-  setOperationAction(ISD::XOR,   MVT::i16, Promote);
-  setOperationAction(ISD::CTPOP, MVT::i16, Promote);
-  // The expansion of CTLZ/CTTZ uses AND/OR, so we might as well promote
-  // immediately.
-  setOperationAction(ISD::CTLZ,  MVT::i16, Promote);
-  setOperationAction(ISD::CTTZ,  MVT::i16, Promote);
-  setOperationAction(ISD::SETCC, MVT::i16, Promote);
-
-  // Blackfin has no division
-  setOperationAction(ISD::SDIV,    MVT::i16, Expand);
-  setOperationAction(ISD::SDIV,    MVT::i32, Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
-  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
-  setOperationAction(ISD::SREM,    MVT::i16, Expand);
-  setOperationAction(ISD::SREM,    MVT::i32, Expand);
-  setOperationAction(ISD::UDIV,    MVT::i16, Expand);
-  setOperationAction(ISD::UDIV,    MVT::i32, Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
-  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
-  setOperationAction(ISD::UREM,    MVT::i16, Expand);
-  setOperationAction(ISD::UREM,    MVT::i32, Expand);
-
-  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
-  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
-  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
-  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
-
-  // No carry-in operations.
-  setOperationAction(ISD::ADDE, MVT::i32, Custom);
-  setOperationAction(ISD::SUBE, MVT::i32, Custom);
-
-  // Blackfin has no intrinsics for these particular operations.
-  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
-  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
-  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
-
-  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
-  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
-  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
-
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
-  // i32 has native CTPOP, but not CTLZ/CTTZ
-  setOperationAction(ISD::CTLZ, MVT::i32, Expand);
-  setOperationAction(ISD::CTTZ, MVT::i32, Expand);
-
-  // READCYCLECOUNTER needs special type legalization.
-  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
-
-  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
-
-  // Use the default implementation.
-  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
-  setOperationAction(ISD::VAEND, MVT::Other, Expand);
-  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
-  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
-
-  setMinFunctionAlignment(2);
-}
-
-const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  default: return 0;
-  case BFISD::CALL:     return "BFISD::CALL";
-  case BFISD::RET_FLAG: return "BFISD::RET_FLAG";
-  case BFISD::Wrapper:  return "BFISD::Wrapper";
-  }
-}
-
-EVT BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
-  // SETCC always sets the CC register. Technically that is an i1 register, but
-  // that type is not legal, so we treat it as an i32 register.
-  return MVT::i32;
-}
-
-SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
-                                                   SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-
-  Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
-  return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
-}
-
-SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op,
-                                               SelectionDAG &DAG) const {
-  DebugLoc DL = Op.getDebugLoc();
-  int JTI = cast<JumpTableSDNode>(Op)->getIndex();
-
-  Op = DAG.getTargetJumpTable(JTI, MVT::i32);
-  return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
-}
-
-SDValue
-BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
-                                             CallingConv::ID CallConv, bool isVarArg,
-                                            const SmallVectorImpl<ISD::InputArg>
-                                               &Ins,
-                                             DebugLoc dl, SelectionDAG &DAG,
-                                             SmallVectorImpl<SDValue> &InVals)
-                                               const {
-
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 getTargetMachine(), ArgLocs, *DAG.getContext());
-  CCInfo.AllocateStack(12, 4);  // ABI requires 12 bytes stack space
-  CCInfo.AnalyzeFormalArguments(Ins, CC_Blackfin);
-
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
-
-    if (VA.isRegLoc()) {
-      EVT RegVT = VA.getLocVT();
-      TargetRegisterClass *RC = VA.getLocReg() == BF::P0 ?
-        BF::PRegisterClass : BF::DRegisterClass;
-      assert(RC->contains(VA.getLocReg()) && "Unexpected regclass in CCState");
-      assert(RC->hasType(RegVT) && "Unexpected regclass in CCState");
-
-      unsigned Reg = MF.getRegInfo().createVirtualRegister(RC);
-      MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
-
-      // If this is an 8 or 16-bit value, it is really passed promoted to 32
-      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
-      // right size.
-      if (VA.getLocInfo() == CCValAssign::SExt)
-        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
-                               DAG.getValueType(VA.getValVT()));
-      else if (VA.getLocInfo() == CCValAssign::ZExt)
-        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
-                               DAG.getValueType(VA.getValVT()));
-
-      if (VA.getLocInfo() != CCValAssign::Full)
-        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
-
-      InVals.push_back(ArgValue);
-    } else {
-      assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
-      unsigned ObjSize = VA.getLocVT().getStoreSize();
-      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
-      SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
-                                   MachinePointerInfo(),
-                                   false, false, 0));
-    }
-  }
-
-  return Chain;
-}
-
-SDValue
-BlackfinTargetLowering::LowerReturn(SDValue Chain,
-                                    CallingConv::ID CallConv, bool isVarArg,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    const SmallVectorImpl<SDValue> &OutVals,
-                                    DebugLoc dl, SelectionDAG &DAG) const {
-
-  // CCValAssign - represent the assignment of the return value to locations.
-  SmallVector<CCValAssign, 16> RVLocs;
-
-  // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 DAG.getTarget(), RVLocs, *DAG.getContext());
-
-  // Analize return values.
-  CCInfo.AnalyzeReturn(Outs, RetCC_Blackfin);
-
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
-  SDValue Flag;
-
-  // Copy the result values into the output registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    CCValAssign &VA = RVLocs[i];
-    assert(VA.isRegLoc() && "Can only return in registers!");
-    SDValue Opi = OutVals[i];
-
-    // Expand to i32 if necessary
-    switch (VA.getLocInfo()) {
-    default: llvm_unreachable("Unknown loc info!");
-    case CCValAssign::Full: break;
-    case CCValAssign::SExt:
-      Opi = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Opi);
-      break;
-    case CCValAssign::ZExt:
-      Opi = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Opi);
-      break;
-    case CCValAssign::AExt:
-      Opi = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Opi);
-      break;
-    }
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Opi, SDValue());
-    // Guarantee that all emitted copies are stuck together with flags.
-    Flag = Chain.getValue(1);
-  }
-
-  if (Flag.getNode()) {
-    return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
-  } else {
-    return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain);
-  }
-}
-
-SDValue
-BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
-                                  CallingConv::ID CallConv, bool isVarArg,
-                                  bool &isTailCall,
-                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                  const SmallVectorImpl<SDValue> &OutVals,
-                                  const SmallVectorImpl<ISD::InputArg> &Ins,
-                                  DebugLoc dl, SelectionDAG &DAG,
-                                  SmallVectorImpl<SDValue> &InVals) const {
-  // Blackfin target does not yet support tail call optimization.
-  isTailCall = false;
-
-  // Analyze operands of the call, assigning locations to each operand.
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 DAG.getTarget(), ArgLocs, *DAG.getContext());
-  CCInfo.AllocateStack(12, 4);  // ABI requires 12 bytes stack space
-  CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin);
-
-  // Get the size of the outgoing arguments stack space requirement.
-  unsigned ArgsSize = CCInfo.getNextStackOffset();
-
-  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
-  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
-  SmallVector<SDValue, 8> MemOpChains;
-
-  // Walk the register/memloc assignments, inserting copies/loads.
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
-    SDValue Arg = OutVals[i];
-
-    // Promote the value if needed.
-    switch (VA.getLocInfo()) {
-    default: llvm_unreachable("Unknown loc info!");
-    case CCValAssign::Full: break;
-    case CCValAssign::SExt:
-      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
-      break;
-    case CCValAssign::ZExt:
-      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
-      break;
-    case CCValAssign::AExt:
-      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
-      break;
-    }
-
-    // Arguments that can be passed on register must be kept at
-    // RegsToPass vector
-    if (VA.isRegLoc()) {
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-    } else {
-      assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
-      int Offset = VA.getLocMemOffset();
-      assert(Offset%4 == 0 && "Unaligned LocMemOffset");
-      assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type");
-      SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32);
-      SDValue OffsetN = DAG.getIntPtrConstant(Offset);
-      OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
-      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
-                                         MachinePointerInfo(),false, false, 0));
-    }
-  }
-
-  // Transform all store nodes into one single node because
-  // all store nodes are independent of each other.
-  if (!MemOpChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        &MemOpChains[0], MemOpChains.size());
-
-  // Build a sequence of copy-to-reg nodes chained together with token
-  // chain and flag operands which copy the outgoing args into registers.
-  // The InFlag in necessary since all emitted instructions must be
-  // stuck together.
-  SDValue InFlag;
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
-                             RegsToPass[i].second, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  // If the callee is a GlobalAddress node (quite common, every direct call is)
-  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
-  // Likewise ExternalSymbol -> TargetExternalSymbol.
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
-  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
-    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
-
-  std::vector<EVT> NodeTys;
-  NodeTys.push_back(MVT::Other);   // Returns a chain
-  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
-  SDValue Ops[] = { Chain, Callee, InFlag };
-  Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops,
-                      InFlag.getNode() ? 3 : 2);
-  InFlag = Chain.getValue(1);
-
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
-                             DAG.getIntPtrConstant(0, true), InFlag);
-  InFlag = Chain.getValue(1);
-
-  // Assign locations to each value returned by this call.
-  SmallVector<CCValAssign, 16> RVLocs;
-  CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 DAG.getTarget(), RVLocs, *DAG.getContext());
-
-  RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin);
-
-  // Copy all of the result registers out of their specified physreg.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    CCValAssign &RV = RVLocs[i];
-    unsigned Reg = RV.getLocReg();
-
-    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
-                               RVLocs[i].getLocVT(), InFlag);
-    SDValue Val = Chain.getValue(0);
-    InFlag = Chain.getValue(2);
-    Chain = Chain.getValue(1);
-
-    // Callee is responsible for extending any i16 return values.
-    switch (RV.getLocInfo()) {
-    case CCValAssign::SExt:
-      Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val,
-                        DAG.getValueType(RV.getValVT()));
-      break;
-    case CCValAssign::ZExt:
-      Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val,
-                        DAG.getValueType(RV.getValVT()));
-      break;
-    default:
-      break;
-    }
-
-    // Truncate to valtype
-    if (RV.getLocInfo() != CCValAssign::Full)
-      Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val);
-    InVals.push_back(Val);
-  }
-
-  return Chain;
-}
-
-// Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have
-// add-with-carry instructions.
-SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
-  // Operands: lhs, rhs, carry-in (AC0 flag)
-  // Results: sum, carry-out (AC0 flag)
-  DebugLoc dl = Op.getDebugLoc();
-
-  unsigned Opcode = Op.getOpcode()==ISD::ADDE ? BF::ADD : BF::SUB;
-
-  // zext incoming carry flag in AC0 to 32 bits
-  SDNode* CarryIn = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
-                                       /* flag= */ Op.getOperand(2));
-  CarryIn = DAG.getMachineNode(BF::MOVECC_zext, dl, MVT::i32,
-                               SDValue(CarryIn, 0));
-
-  // Add operands, produce sum and carry flag
-  SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
-                                   Op.getOperand(0), Op.getOperand(1));
-
-  // Store intermediate carry from Sum
-  SDNode* Carry1 = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
-                                      /* flag= */ SDValue(Sum, 1));
-
-  // Add incoming carry, again producing an output flag
-  Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
-                           SDValue(Sum, 0), SDValue(CarryIn, 0));
-
-  // Update AC0 with the intermediate carry, producing a flag.
-  SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Glue,
-                                        SDValue(Carry1, 0));
-
-  // Compose (i32, flag) pair
-  SDValue ops[2] = { SDValue(Sum, 0), SDValue(CarryOut, 0) };
-  return DAG.getMergeValues(ops, 2, dl);
-}
-
-SDValue BlackfinTargetLowering::LowerOperation(SDValue Op,
-                                               SelectionDAG &DAG) const {
-  switch (Op.getOpcode()) {
-  default:
-    Op.getNode()->dump();
-    llvm_unreachable("Should not custom lower this!");
-  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
-  case ISD::GlobalTLSAddress:
-    llvm_unreachable("TLS not implemented for Blackfin.");
-  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
-    // Frame & Return address.  Currently unimplemented
-  case ISD::FRAMEADDR:          return SDValue();
-  case ISD::RETURNADDR:         return SDValue();
-  case ISD::ADDE:
-  case ISD::SUBE:               return LowerADDE(Op, DAG);
-  }
-}
-
-void
-BlackfinTargetLowering::ReplaceNodeResults(SDNode *N,
-                                           SmallVectorImpl<SDValue> &Results,
-                                           SelectionDAG &DAG) const {
-  DebugLoc dl = N->getDebugLoc();
-  switch (N->getOpcode()) {
-  default:
-    llvm_unreachable("Do not know how to custom type legalize this operation!");
-    return;
-  case ISD::READCYCLECOUNTER: {
-    // The low part of the cycle counter is in CYCLES, the high part in
-    // CYCLES2. Reading CYCLES will latch the value of CYCLES2, so we must read
-    // CYCLES2 last.
-    SDValue TheChain = N->getOperand(0);
-    SDValue lo = DAG.getCopyFromReg(TheChain, dl, BF::CYCLES, MVT::i32);
-    SDValue hi = DAG.getCopyFromReg(lo.getValue(1), dl, BF::CYCLES2, MVT::i32);
-    // Use a buildpair to merge the two 32-bit values into a 64-bit one.
-    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, lo, hi));
-    // Outgoing chain. If we were to use the chain from lo instead, it would be
-    // possible to entirely eliminate the CYCLES2 read in (i32 (trunc
-    // readcyclecounter)). Unfortunately this could possibly delay the CYCLES2
-    // read beyond the next CYCLES read, leading to invalid results.
-    Results.push_back(hi.getValue(1));
-    return;
-  }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-//                         Blackfin Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-BlackfinTargetLowering::ConstraintType
-BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const {
-  if (Constraint.size() != 1)
-    return TargetLowering::getConstraintType(Constraint);
-
-  switch (Constraint[0]) {
-    // Standard constraints
-  case 'r':
-    return C_RegisterClass;
-
-    // Blackfin-specific constraints
-  case 'a':
-  case 'd':
-  case 'z':
-  case 'D':
-  case 'W':
-  case 'e':
-  case 'b':
-  case 'v':
-  case 'f':
-  case 'c':
-  case 't':
-  case 'u':
-  case 'k':
-  case 'x':
-  case 'y':
-  case 'w':
-    return C_RegisterClass;
-  case 'A':
-  case 'B':
-  case 'C':
-  case 'Z':
-  case 'Y':
-    return C_Register;
-  }
-
-  // Not implemented: q0-q7, qA. Use {R2} etc instead
-
-  return TargetLowering::getConstraintType(Constraint);
-}
-
-/// Examine constraint type and operand type and determine a weight value.
-/// This object must already have been set up with the operand type
-/// and the current alternative constraint selected.
-TargetLowering::ConstraintWeight
-BlackfinTargetLowering::getSingleConstraintMatchWeight(
-    AsmOperandInfo &info, const char *constraint) const {
-  ConstraintWeight weight = CW_Invalid;
-  Value *CallOperandVal = info.CallOperandVal;
-    // If we don't have a value, we can't do a match,
-    // but allow it at the lowest weight.
-  if (CallOperandVal == NULL)
-    return CW_Default;
-  // Look at the constraint type.
-  switch (*constraint) {
-  default:
-    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
-    break;
-
-    // Blackfin-specific constraints
-  case 'a':
-  case 'd':
-  case 'z':
-  case 'D':
-  case 'W':
-  case 'e':
-  case 'b':
-  case 'v':
-  case 'f':
-  case 'c':
-  case 't':
-  case 'u':
-  case 'k':
-  case 'x':
-  case 'y':
-  case 'w':
-    return CW_Register;
-  case 'A':
-  case 'B':
-  case 'C':
-  case 'Z':
-  case 'Y':
-    return CW_SpecificReg;
-  }
-  return weight;
-}
-
-/// getRegForInlineAsmConstraint - Return register no and class for a C_Register
-/// constraint.
-std::pair<unsigned, const TargetRegisterClass*> BlackfinTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
-  typedef std::pair<unsigned, const TargetRegisterClass*> Pair;
-  using namespace BF;
-
-  if (Constraint.size() != 1)
-    return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-
-  switch (Constraint[0]) {
-    // Standard constraints
-  case 'r':
-    return Pair(0U, VT == MVT::i16 ? D16RegisterClass : DPRegisterClass);
-
-    // Blackfin-specific constraints
-  case 'a': return Pair(0U, PRegisterClass);
-  case 'd': return Pair(0U, DRegisterClass);
-  case 'e': return Pair(0U, AccuRegisterClass);
-  case 'A': return Pair(A0, AccuRegisterClass);
-  case 'B': return Pair(A1, AccuRegisterClass);
-  case 'b': return Pair(0U, IRegisterClass);
-  case 'v': return Pair(0U, BRegisterClass);
-  case 'f': return Pair(0U, MRegisterClass);
-  case 'C': return Pair(CC, JustCCRegisterClass);
-  case 'x': return Pair(0U, GRRegisterClass);
-  case 'w': return Pair(0U, ALLRegisterClass);
-  case 'Z': return Pair(P3, PRegisterClass);
-  case 'Y': return Pair(P1, PRegisterClass);
-  case 'z': return Pair(0U, zConsRegisterClass);
-  case 'D': return Pair(0U, DConsRegisterClass);
-  case 'W': return Pair(0U, WConsRegisterClass);
-  case 'c': return Pair(0U, cConsRegisterClass);
-  case 't': return Pair(0U, tConsRegisterClass);
-  case 'u': return Pair(0U, uConsRegisterClass);
-  case 'k': return Pair(0U, kConsRegisterClass);
-  case 'y': return Pair(0U, yConsRegisterClass);
-  }
-
-  // Not implemented: q0-q7, qA. Use {R2} etc instead.
-
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
-
-bool BlackfinTargetLowering::
-isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
-  // The Blackfin target isn't yet aware of offsets.
-  return false;
-}
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
deleted file mode 100644
index 90908baaae9d..000000000000
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ /dev/null
@@ -1,83 +0,0 @@
-//===- BlackfinISelLowering.h - Blackfin DAG Lowering Interface -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that Blackfin uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFIN_ISELLOWERING_H
-#define BLACKFIN_ISELLOWERING_H
-
-#include "llvm/Target/TargetLowering.h"
-#include "Blackfin.h"
-
-namespace llvm {
-
-  namespace BFISD {
-    enum {
-      FIRST_NUMBER = ISD::BUILTIN_OP_END,
-      CALL,                     // A call instruction.
-      RET_FLAG,                 // Return with a flag operand.
-      Wrapper                   // Address wrapper
-    };
-  }
-
-  class BlackfinTargetLowering : public TargetLowering {
-  public:
-    BlackfinTargetLowering(TargetMachine &TM);
-    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; }
-    virtual EVT getSetCCResultType(EVT VT) const;
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-    virtual void ReplaceNodeResults(SDNode *N,
-                                    SmallVectorImpl<SDValue> &Results,
-                                    SelectionDAG &DAG) const;
-
-    ConstraintType getConstraintType(const std::string &Constraint) const;
-
-    /// Examine constraint string and operand type and determine a weight value.
-    /// The operand object must already have been set up with the operand type.
-    ConstraintWeight getSingleConstraintMatchWeight(
-      AsmOperandInfo &info, const char *constraint) const;
-
-    std::pair<unsigned, const TargetRegisterClass*>
-    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
-    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
-    const char *getTargetNodeName(unsigned Opcode) const;
-
-  private:
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const;
-
-    virtual SDValue
-      LowerFormalArguments(SDValue Chain,
-                           CallingConv::ID CallConv, bool isVarArg,
-                           const SmallVectorImpl<ISD::InputArg> &Ins,
-                           DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals) const;
-    virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
-                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                const SmallVectorImpl<SDValue> &OutVals,
-                const SmallVectorImpl<ISD::InputArg> &Ins,
-                DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals) const;
-
-    virtual SDValue
-      LowerReturn(SDValue Chain,
-                  CallingConv::ID CallConv, bool isVarArg,
-                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  const SmallVectorImpl<SDValue> &OutVals,
-                  DebugLoc dl, SelectionDAG &DAG) const;
-  };
-} // end namespace llvm
-
-#endif    // BLACKFIN_ISELLOWERING_H
diff --git a/lib/Target/Blackfin/BlackfinInstrFormats.td b/lib/Target/Blackfin/BlackfinInstrFormats.td
deleted file mode 100644
index d8e6e252e787..000000000000
--- a/lib/Target/Blackfin/BlackfinInstrFormats.td
+++ /dev/null
@@ -1,34 +0,0 @@
-//===--- BlackfinInstrFormats.td ---------------------------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-
-class InstBfin<dag outs, dag ins, string asmstr, list<dag> pattern>
-  : Instruction {
-  field bits<32> Inst;
-
-  let Namespace = "BF";
-
-  dag OutOperandList = outs;
-  dag InOperandList = ins;
-  let AsmString   = asmstr;
-  let Pattern = pattern;
-}
-
-// Single-word (16-bit) instructions
-class F1<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstBfin<outs, ins, asmstr, pattern> {
-}
-
-// Double-word (32-bit) instructions
-class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
-    : InstBfin<outs, ins, asmstr, pattern> {
-}
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
deleted file mode 100644
index c06a919708d6..000000000000
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-//===- BlackfinInstrInfo.cpp - Blackfin Instruction Information -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinInstrInfo.h"
-#include "BlackfinSubtarget.h"
-#include "Blackfin.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_CTOR
-#include "BlackfinGenInstrInfo.inc"
-
-using namespace llvm;
-
-BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
-  : BlackfinGenInstrInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
-    RI(ST, *this),
-    Subtarget(ST) {}
-
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the destination along with the FrameIndex of the loaded stack slot.  If
-/// not, return 0.  This predicate must return 0 if the instruction has
-/// any side effects other than loading from the stack slot.
-unsigned BlackfinInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                                int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case BF::LOAD32fi:
-  case BF::LOAD16fi:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  return 0;
-}
-
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot.  If
-/// not, return 0.  This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned BlackfinInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                               int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case BF::STORE32fi:
-  case BF::STORE16fi:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  return 0;
-}
-
-unsigned BlackfinInstrInfo::
-InsertBranch(MachineBasicBlock &MBB,
-             MachineBasicBlock *TBB,
-             MachineBasicBlock *FBB,
-             const SmallVectorImpl<MachineOperand> &Cond,
-             DebugLoc DL) const {
-  // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-  assert((Cond.size() == 1 || Cond.size() == 0) &&
-         "Branch conditions have one component!");
-
-  if (Cond.empty()) {
-    // Unconditional branch?
-    assert(!FBB && "Unconditional branch with multiple successors!");
-    BuildMI(&MBB, DL, get(BF::JUMPa)).addMBB(TBB);
-    return 1;
-  }
-
-  // Conditional branch.
-  llvm_unreachable("Implement conditional branches!");
-}
-
-void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator I, DebugLoc DL,
-                                    unsigned DestReg, unsigned SrcReg,
-                                    bool KillSrc) const {
-  if (BF::ALLRegClass.contains(DestReg, SrcReg)) {
-    BuildMI(MBB, I, DL, get(BF::MOVE), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-
-  if (BF::D16RegClass.contains(DestReg, SrcReg)) {
-    BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc))
-      .addImm(0);
-    return;
-  }
-
-  if (BF::DRegClass.contains(DestReg)) {
-    if (SrcReg == BF::NCC) {
-      BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg)
-        .addReg(SrcReg, getKillRegState(KillSrc));
-      BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
-      return;
-    }
-    if (SrcReg == BF::CC) {
-      BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg)
-        .addReg(SrcReg, getKillRegState(KillSrc));
-      return;
-    }
-  }
-
-  if (BF::DRegClass.contains(SrcReg)) {
-    if (DestReg == BF::NCC) {
-      BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg)
-        .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
-      return;
-    }
-    if (DestReg == BF::CC) {
-      BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg)
-        .addReg(SrcReg, getKillRegState(KillSrc));
-      return;
-    }
-  }
-
-
-  if (DestReg == BF::NCC && SrcReg == BF::CC) {
-    BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-
-  if (DestReg == BF::CC && SrcReg == BF::NCC) {
-    BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-
-  llvm_unreachable("Bad reg-to-reg copy");
-}
-
-static bool inClass(const TargetRegisterClass &Test,
-                    unsigned Reg,
-                    const TargetRegisterClass *RC) {
-  if (TargetRegisterInfo::isPhysicalRegister(Reg))
-    return Test.contains(Reg);
-  else
-    return Test.hasSubClassEq(RC);
-}
-
-void
-BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator I,
-                                       unsigned SrcReg,
-                                       bool isKill,
-                                       int FI,
-                                       const TargetRegisterClass *RC,
-                                       const TargetRegisterInfo *TRI) const {
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
-
-  if (inClass(BF::DPRegClass, SrcReg, RC)) {
-    BuildMI(MBB, I, DL, get(BF::STORE32fi))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FI)
-      .addImm(0);
-    return;
-  }
-
-  if (inClass(BF::D16RegClass, SrcReg, RC)) {
-    BuildMI(MBB, I, DL, get(BF::STORE16fi))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FI)
-      .addImm(0);
-    return;
-  }
-
-  if (inClass(BF::AnyCCRegClass, SrcReg, RC)) {
-    BuildMI(MBB, I, DL, get(BF::STORE8fi))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FI)
-      .addImm(0);
-    return;
-  }
-
-  llvm_unreachable((std::string("Cannot store regclass to stack slot: ")+
-                    RC->getName()).c_str());
-}
-
-void BlackfinInstrInfo::
-storeRegToAddr(MachineFunction &MF,
-               unsigned SrcReg,
-               bool isKill,
-               SmallVectorImpl<MachineOperand> &Addr,
-               const TargetRegisterClass *RC,
-               SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  llvm_unreachable("storeRegToAddr not implemented");
-}
-
-void
-BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator I,
-                                        unsigned DestReg,
-                                        int FI,
-                                        const TargetRegisterClass *RC,
-                                        const TargetRegisterInfo *TRI) const {
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
-  if (inClass(BF::DPRegClass, DestReg, RC)) {
-    BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
-      .addFrameIndex(FI)
-      .addImm(0);
-    return;
-  }
-
-  if (inClass(BF::D16RegClass, DestReg, RC)) {
-    BuildMI(MBB, I, DL, get(BF::LOAD16fi), DestReg)
-      .addFrameIndex(FI)
-      .addImm(0);
-    return;
-  }
-
-  if (inClass(BF::AnyCCRegClass, DestReg, RC)) {
-    BuildMI(MBB, I, DL, get(BF::LOAD8fi), DestReg)
-      .addFrameIndex(FI)
-      .addImm(0);
-    return;
-  }
-
-  llvm_unreachable("Cannot load regclass from stack slot");
-}
-
-void BlackfinInstrInfo::
-loadRegFromAddr(MachineFunction &MF,
-                unsigned DestReg,
-                SmallVectorImpl<MachineOperand> &Addr,
-                const TargetRegisterClass *RC,
-                SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  llvm_unreachable("loadRegFromAddr not implemented");
-}
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
deleted file mode 100644
index d22ddf0d7313..000000000000
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ /dev/null
@@ -1,81 +0,0 @@
-//===- BlackfinInstrInfo.h - Blackfin Instruction Information ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFININSTRUCTIONINFO_H
-#define BLACKFININSTRUCTIONINFO_H
-
-#include "llvm/Target/TargetInstrInfo.h"
-#include "BlackfinRegisterInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "BlackfinGenInstrInfo.inc"
-
-namespace llvm {
-
-  class BlackfinInstrInfo : public BlackfinGenInstrInfo {
-    const BlackfinRegisterInfo RI;
-    const BlackfinSubtarget& Subtarget;
-  public:
-    explicit BlackfinInstrInfo(BlackfinSubtarget &ST);
-
-    /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-    /// such, whenever a client has an instance of instruction info, it should
-    /// always be able to get register info as well (through this method).
-    virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; }
-
-    virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                         int &FrameIndex) const;
-
-    virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                        int &FrameIndex) const;
-
-    virtual unsigned
-    InsertBranch(MachineBasicBlock &MBB,
-                 MachineBasicBlock *TBB,
-                 MachineBasicBlock *FBB,
-                 const SmallVectorImpl<MachineOperand> &Cond,
-                 DebugLoc DL) const;
-
-    virtual void copyPhysReg(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator MI, DebugLoc DL,
-                             unsigned DestReg, unsigned SrcReg,
-                             bool KillSrc) const;
-
-    virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator MBBI,
-                                     unsigned SrcReg, bool isKill,
-                                     int FrameIndex,
-                                     const TargetRegisterClass *RC,
-                                     const TargetRegisterInfo *TRI) const;
-
-    virtual void storeRegToAddr(MachineFunction &MF,
-                                unsigned SrcReg, bool isKill,
-                                SmallVectorImpl<MachineOperand> &Addr,
-                                const TargetRegisterClass *RC,
-                                SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
-    virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                      MachineBasicBlock::iterator MBBI,
-                                      unsigned DestReg, int FrameIndex,
-                                      const TargetRegisterClass *RC,
-                                      const TargetRegisterInfo *TRI) const;
-
-    virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                                 SmallVectorImpl<MachineOperand> &Addr,
-                                 const TargetRegisterClass *RC,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const;
-  };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
deleted file mode 100644
index 5b59d7769c7e..000000000000
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ /dev/null
@@ -1,862 +0,0 @@
-//===- BlackfinInstrInfo.td - Target Description for Blackfin Target ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the Blackfin instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Instruction format superclass
-//===----------------------------------------------------------------------===//
-
-include "BlackfinInstrFormats.td"
-
-// These are target-independent nodes, but have target-specific formats.
-def SDT_BfinCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
-def SDT_BfinCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
-                                        SDTCisVT<1, i32> ]>;
-
-def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart,
-                              [SDNPHasChain, SDNPOutGlue]>;
-def BfinCallseqEnd   : SDNode<"ISD::CALLSEQ_END",   SDT_BfinCallSeqEnd,
-                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-def SDT_BfinCall  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-def BfinCall      : SDNode<"BFISD::CALL", SDT_BfinCall,
-                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
-                            SDNPVariadic]>;
-
-def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
-                    [SDNPHasChain, SDNPOptInGlue]>;
-
-def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>;
-
-//===----------------------------------------------------------------------===//
-// Transformations
-//===----------------------------------------------------------------------===//
-
-def trailingZeros_xform : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(),
-                                   MVT::i32);
-}]>;
-
-def trailingOnes_xform : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(),
-                                   MVT::i32);
-}]>;
-
-def LO16 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant((unsigned short)N->getZExtValue(), MVT::i16);
-}]>;
-
-def HI16 : SDNodeXForm<imm, [{
-  // Transformation function: shift the immediate value down into the low bits.
-  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 16, MVT::i16);
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Immediates
-//===----------------------------------------------------------------------===//
-
-def imm3  : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>;
-def uimm3 : PatLeaf<(imm), [{return isUInt<3>(N->getZExtValue());}]>;
-def uimm4 : PatLeaf<(imm), [{return isUInt<4>(N->getZExtValue());}]>;
-def uimm5 : PatLeaf<(imm), [{return isUInt<5>(N->getZExtValue());}]>;
-
-def uimm5m2 : PatLeaf<(imm), [{
-    uint64_t value = N->getZExtValue();
-    return value % 2 == 0 && isUInt<5>(value);
-}]>;
-
-def uimm6m4 : PatLeaf<(imm), [{
-    uint64_t value = N->getZExtValue();
-    return value % 4 == 0 && isUInt<6>(value);
-}]>;
-
-def imm7   : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>;
-def imm16  : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>;
-def uimm16 : PatLeaf<(imm), [{return isUInt<16>(N->getZExtValue());}]>;
-
-def ximm16 : PatLeaf<(imm), [{
-    int64_t value = N->getSExtValue();
-    return value < (1<<16) && value >= -(1<<15);
-}]>;
-
-def imm17m2 : PatLeaf<(imm), [{
-    int64_t value = N->getSExtValue();
-    return value % 2 == 0 && isInt<17>(value);
-}]>;
-
-def imm18m4 : PatLeaf<(imm), [{
-    int64_t value = N->getSExtValue();
-    return value % 4 == 0 && isInt<18>(value);
-}]>;
-
-// 32-bit bitmask transformed to a bit number
-def uimm5mask : Operand<i32>, PatLeaf<(imm), [{
-    return isPowerOf2_32(N->getZExtValue());
-}], trailingZeros_xform>;
-
-// 32-bit inverse bitmask transformed to a bit number
-def uimm5imask : Operand<i32>, PatLeaf<(imm), [{
-    return isPowerOf2_32(~N->getZExtValue());
-}], trailingOnes_xform>;
-
-//===----------------------------------------------------------------------===//
-// Operands
-//===----------------------------------------------------------------------===//
-
-def calltarget : Operand<iPTR>;
-
-def brtarget : Operand<OtherVT>;
-
-// Addressing modes
-def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
-
-// Address operands
-def MEMii : Operand<i32> {
-  let PrintMethod = "printMemoryOperand";
-  let MIOperandInfo = (ops i32imm, i32imm);
-}
-
-//===----------------------------------------------------------------------===//
-// Instructions
-//===----------------------------------------------------------------------===//
-
-// Pseudo instructions.
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : InstBfin<outs, ins, asmstr, pattern>;
-
-let Defs = [SP], Uses = [SP] in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
-                              "${:comment}ADJCALLSTACKDOWN $amt",
-                              [(BfinCallseqStart timm:$amt)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
-                            "${:comment}ADJCALLSTACKUP $amt1 $amt2",
-                            [(BfinCallseqEnd timm:$amt1, timm:$amt2)]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Table C-9. Program Flow Control Instructions
-//===----------------------------------------------------------------------===//
-
-let isBranch = 1, isTerminator = 1 in {
-
-let isIndirectBranch = 1 in
-def JUMPp : F1<(outs), (ins P:$target),
-               "JUMP ($target);",
-               [(brind P:$target)]>;
-
-// TODO JUMP (PC-P)
-
-// NOTE: assembler chooses between JUMP.S and JUMP.L
-def JUMPa : F1<(outs), (ins brtarget:$target),
-               "jump $target;",
-               [(br bb:$target)]>;
-
-def JUMPcc : F1<(outs), (ins AnyCC:$cc, brtarget:$target),
-               "if $cc jump $target;",
-               [(brcond AnyCC:$cc, bb:$target)]>;
-}
-
-let isCall = 1,
-    Defs   = [R0, R1, R2, R3, P0, P1, P2, LB0, LB1, LC0, LC1, RETS, ASTAT] in {
-def CALLa: F1<(outs), (ins calltarget:$func, variable_ops),
-              "call $func;", []>;
-def CALLp: F1<(outs), (ins P:$func, variable_ops),
-              "call ($func);", [(BfinCall P:$func)]>;
-}
-
-let isReturn     = 1,
-    isTerminator = 1,
-    isBarrier    = 1,
-    Uses         = [RETS] in
-def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>;
-
-//===----------------------------------------------------------------------===//
-// Table C-10. Load / Store Instructions
-//===----------------------------------------------------------------------===//
-
-// Immediate constant loads
-
-// sext immediate, i32 D/P regs
-def LOADimm7: F1<(outs DP:$dst), (ins i32imm:$src),
-                 "$dst = $src (x);",
-                 [(set DP:$dst, imm7:$src)]>;
-
-// zext immediate, i32 reg groups 0-3
-def LOADuimm16: F2<(outs GR:$dst), (ins i32imm:$src),
-                   "$dst = $src (z);",
-                   [(set GR:$dst, uimm16:$src)]>;
-
-// sext immediate, i32 reg groups 0-3
-def LOADimm16: F2<(outs GR:$dst), (ins i32imm:$src),
-                  "$dst = $src (x);",
-                  [(set GR:$dst, imm16:$src)]>;
-
-// Pseudo-instruction for loading a general 32-bit constant.
-def LOAD32imm: Pseudo<(outs GR:$dst), (ins i32imm:$src),
-                      "$dst.h = ($src >> 16); $dst.l = ($src & 0xffff);",
-                      [(set GR:$dst, imm:$src)]>;
-
-def LOAD32sym: Pseudo<(outs GR:$dst), (ins i32imm:$src),
-                      "$dst.h = $src; $dst.l = $src;", []>;
-
-
-// 16-bit immediate, i16 reg groups 0-3
-def LOAD16i: F2<(outs GR16:$dst), (ins i16imm:$src),
-                 "$dst = $src;", []>;
-
-def : Pat<(BfinWrapper (i32 tglobaladdr:$addr)),
-          (LOAD32sym tglobaladdr:$addr)>;
-
-def : Pat<(BfinWrapper (i32 tjumptable:$addr)),
-          (LOAD32sym tjumptable:$addr)>;
-
-// We cannot copy from GR16 to D16, and codegen wants to insert copies if we
-// emit GR16 instructions. As a hack, we use this fake instruction instead.
-def LOAD16i_d16: F2<(outs D16:$dst), (ins i16imm:$src),
-                    "$dst = $src;",
-                    [(set D16:$dst, ximm16:$src)]>;
-
-// Memory loads with patterns
-
-def LOAD32p: F1<(outs DP:$dst), (ins P:$ptr),
-                "$dst = [$ptr];",
-                [(set DP:$dst, (load P:$ptr))]>;
-
-// Pseudo-instruction for loading a stack slot
-def LOAD32fi: Pseudo<(outs DP:$dst), (ins MEMii:$mem),
-                     "${:comment}FI $dst = [$mem];",
-                     [(set DP:$dst, (load ADDRspii:$mem))]>;
-
-// Note: Expands to multiple insns
-def LOAD16fi: Pseudo<(outs D16:$dst), (ins MEMii:$mem),
-                     "${:comment}FI $dst = [$mem];",
-                     [(set D16:$dst, (load ADDRspii:$mem))]>;
-
-// Pseudo-instruction for loading a stack slot, used for AnyCC regs.
-// Replaced with Load D + CC=D
-def LOAD8fi: Pseudo<(outs AnyCC:$dst), (ins MEMii:$mem),
-                    "${:comment}FI $dst = B[$mem];",
-                    [(set AnyCC:$dst, (load ADDRspii:$mem))]>;
-
-def LOAD32p_uimm6m4: F1<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
-                        "$dst = [$ptr + $off];",
-                        [(set DP:$dst, (load (add P:$ptr, uimm6m4:$off)))]>;
-
-def LOAD32p_imm18m4: F2<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
-                         "$dst = [$ptr + $off];",
-                         [(set DP:$dst, (load (add P:$ptr, imm18m4:$off)))]>;
-
-def LOAD32p_16z: F1<(outs D:$dst), (ins P:$ptr),
-                    "$dst = W[$ptr] (z);",
-                    [(set D:$dst, (zextloadi16 P:$ptr))]>;
-
-def : Pat<(i32 (extloadi16 P:$ptr)),(LOAD32p_16z P:$ptr)>;
-
-def LOAD32p_uimm5m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
-                            "$dst = w[$ptr + $off] (z);",
-                            [(set D:$dst, (zextloadi16 (add P:$ptr,
-                                                        uimm5m2:$off)))]>;
-
-def : Pat<(i32 (extloadi16 (add P:$ptr, uimm5m2:$off))),
-          (LOAD32p_uimm5m2_16z P:$ptr, imm:$off)>;
-
-def LOAD32p_imm17m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
-                            "$dst = w[$ptr + $off] (z);",
-                            [(set D:$dst,
-                                  (zextloadi16 (add P:$ptr, imm17m2:$off)))]>;
-
-def : Pat<(i32 (extloadi16 (add P:$ptr, imm17m2:$off))),
-          (LOAD32p_imm17m2_16z P:$ptr, imm:$off)>;
-
-def LOAD32p_16s: F1<(outs D:$dst), (ins P:$ptr),
-                    "$dst = w[$ptr] (x);",
-                    [(set D:$dst, (sextloadi16 P:$ptr))]>;
-
-def LOAD32p_uimm5m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
-                            "$dst = w[$ptr + $off] (x);",
-                            [(set D:$dst,
-                                  (sextloadi16 (add P:$ptr, uimm5m2:$off)))]>;
-
-def LOAD32p_imm17m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
-                            "$dst = w[$ptr + $off] (x);",
-                            [(set D:$dst,
-                                  (sextloadi16 (add P:$ptr, imm17m2:$off)))]>;
-
-def LOAD16pi: F1<(outs D16:$dst), (ins PI:$ptr),
-                "$dst = w[$ptr];",
-                [(set D16:$dst, (load PI:$ptr))]>;
-
-def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr),
-                   "$dst = B[$ptr] (z);",
-                   [(set D:$dst, (zextloadi8 P:$ptr))]>;
-
-def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>;
-def : Pat<(i16 (extloadi8 P:$ptr)),
-          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
-def : Pat<(i16 (zextloadi8 P:$ptr)),
-          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
-
-def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
-                         "$dst = b[$ptr + $off] (z);",
-                         [(set D:$dst, (zextloadi8 (add P:$ptr, imm16:$off)))]>;
-
-def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))),
-          (LOAD32p_imm16_8z P:$ptr, imm:$off)>;
-def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))),
-          (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
-                           lo16)>;
-def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))),
-          (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
-                           lo16)>;
-
-def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr),
-                   "$dst = b[$ptr] (x);",
-                   [(set D:$dst, (sextloadi8 P:$ptr))]>;
-
-def : Pat<(i16 (sextloadi8 P:$ptr)),
-          (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>;
-
-def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
-                         "$dst = b[$ptr + $off] (x);",
-                         [(set D:$dst, (sextloadi8 (add P:$ptr, imm16:$off)))]>;
-
-def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))),
-          (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off),
-                           lo16)>;
-// Memory loads without patterns
-
-let mayLoad = 1 in {
-
-multiclass LOAD_incdec<RegisterClass drc, RegisterClass prc,
-                       string mem="", string suf=";"> {
-  def _inc : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
-                !strconcat(!subst("M", mem, "$dst = M[$ptr++]"), suf), []>;
-  def _dec : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
-                !strconcat(!subst("M", mem, "$dst = M[$ptr--]"), suf), []>;
-}
-multiclass LOAD_incdecpost<RegisterClass drc, RegisterClass prc,
-                           string mem="", string suf=";">
-         : LOAD_incdec<drc, prc, mem, suf> {
-  def _post : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr, prc:$off),
-                 !strconcat(!subst("M", mem, "$dst = M[$ptr++$off]"), suf), []>;
-}
-
-defm LOAD32p:    LOAD_incdec<DP, P>;
-defm LOAD32i:    LOAD_incdec<D, I>;
-defm LOAD8z32p:  LOAD_incdec<D, P, "b", " (z);">;
-defm LOAD8s32p:  LOAD_incdec<D, P, "b", " (x);">;
-defm LOADhi:     LOAD_incdec<D16, I, "w">;
-defm LOAD16z32p: LOAD_incdecpost<D, P, "w", " (z);">;
-defm LOAD16s32p: LOAD_incdecpost<D, P, "w", " (x);">;
-
-def LOAD32p_post: F1<(outs D:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
-                     "$dst = [$ptr ++ $off];", []>;
-
-// Note: $fp MUST be FP
-def LOAD32fp_nimm7m4: F1<(outs DP:$dst), (ins P:$fp, i32imm:$off),
-                         "$dst = [$fp - $off];", []>;
-
-def LOAD32i:      F1<(outs D:$dst), (ins I:$ptr),
-                     "$dst = [$ptr];", []>;
-def LOAD32i_post: F1<(outs D:$dst, I:$ptr_wb), (ins I:$ptr, M:$off),
-                     "$dst = [$ptr ++ $off];", []>;
-
-
-
-def LOADhp_post: F1<(outs D16:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
-                    "$dst = w[$ptr ++ $off];", []>;
-
-
-}
-
-// Memory stores with patterns
-def STORE32p: F1<(outs), (ins DP:$val, P:$ptr),
-                 "[$ptr] = $val;",
-                 [(store DP:$val, P:$ptr)]>;
-
-// Pseudo-instructions for storing to a stack slot
-def STORE32fi: Pseudo<(outs), (ins DP:$val, MEMii:$mem),
-                      "${:comment}FI [$mem] = $val;",
-                      [(store DP:$val, ADDRspii:$mem)]>;
-
-// Note: This stack-storing pseudo-instruction is expanded to multiple insns
-def STORE16fi: Pseudo<(outs), (ins D16:$val, MEMii:$mem),
-                  "${:comment}FI [$mem] = $val;",
-                  [(store D16:$val, ADDRspii:$mem)]>;
-
-// Pseudo-instructions for storing AnyCC register to a stack slot.
-// Replaced with D=CC + STORE byte
-def STORE8fi: Pseudo<(outs), (ins AnyCC:$val, MEMii:$mem),
-                      "${:comment}FI b[$mem] = $val;",
-                      [(store AnyCC:$val, ADDRspii:$mem)]>;
-
-def STORE32p_uimm6m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
-                 "[$ptr + $off] = $val;",
-                 [(store DP:$val, (add P:$ptr, uimm6m4:$off))]>;
-
-def STORE32p_imm18m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
-                 "[$ptr + $off] = $val;",
-                 [(store DP:$val, (add P:$ptr, imm18m4:$off))]>;
-
-def STORE16pi: F1<(outs), (ins D16:$val, PI:$ptr),
-                  "w[$ptr] = $val;",
-                  [(store D16:$val, PI:$ptr)]>;
-
-def STORE8p: F1<(outs), (ins D:$val, P:$ptr),
-                "b[$ptr] = $val;",
-                [(truncstorei8 D:$val, P:$ptr)]>;
-
-def STORE8p_imm16: F1<(outs), (ins D:$val, P:$ptr, i32imm:$off),
-                 "b[$ptr + $off] = $val;",
-                 [(truncstorei8 D:$val, (add P:$ptr, imm16:$off))]>;
-
-let Constraints = "$ptr = $ptr_wb" in {
-
-multiclass STORE_incdec<RegisterClass drc, RegisterClass prc,
-                        int off=4, string pre=""> {
-  def _inc : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
-                !strconcat(pre, "[$ptr++] = $val;"),
-                [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, off))]>;
-  def _dec : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
-                !strconcat(pre, "[$ptr--] = $val;"),
-                [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr,
-                                               (ineg off)))]>;
-}
-
-defm STORE32p: STORE_incdec<DP, P>;
-defm STORE16i: STORE_incdec<D16, I, 2, "w">;
-defm STORE8p:  STORE_incdec<D, P, 1, "b">;
-
-def STORE32p_post: F1<(outs P:$ptr_wb), (ins D:$val, P:$ptr, P:$off),
-                      "[$ptr ++ $off] = $val;",
-                      [(set P:$ptr_wb, (post_store D:$val, P:$ptr, P:$off))]>;
-
-def STORE16p_post: F1<(outs P:$ptr_wb), (ins D16:$val, P:$ptr, P:$off),
-                      "w[$ptr ++ $off] = $val;",
-                      [(set P:$ptr_wb, (post_store D16:$val, P:$ptr, P:$off))]>;
-}
-
-// Memory stores without patterns
-
-let mayStore = 1 in {
-
-// Note: only works for $fp == FP
-def STORE32fp_nimm7m4: F1<(outs), (ins DP:$val, P:$fp, i32imm:$off),
-                         "[$fp - $off] = $val;", []>;
-
-def STORE32i: F1<(outs), (ins D:$val, I:$ptr),
-                 "[$ptr] = $val;", []>;
-
-def STORE32i_inc: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
-                 "[$ptr++] = $val;", []>;
-
-def STORE32i_dec: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
-                 "[$ptr--] = $val;", []>;
-
-def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
-                      "[$ptr ++ $off] = $val;", []>;
-}
-
-def : Pat<(truncstorei16 D:$val, PI:$ptr),
-          (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
-                                     lo16), PI:$ptr)>;
-
-def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
-          (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
-                                     hi16), PI:$ptr)>;
-
-def : Pat<(truncstorei8 D16L:$val, P:$ptr),
-          (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
-                                  (i16 (COPY_TO_REGCLASS D16L:$val, D16L)),
-                                  lo16),
-                   P:$ptr)>;
-
-//===----------------------------------------------------------------------===//
-// Table C-11. Move Instructions.
-//===----------------------------------------------------------------------===//
-
-def MOVE: F1<(outs ALL:$dst), (ins ALL:$src),
-             "$dst = $src;",
-             []>;
-
-let Constraints = "$src1 = $dst" in
-def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc),
-               "if $cc $dst = $src2;",
-               [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>;
-
-let Defs = [AZ, AN, AC0, V] in {
-def MOVEzext: F1<(outs D:$dst), (ins D16L:$src),
-                 "$dst = $src (z);",
-                 [(set D:$dst, (zext D16L:$src))]>;
-
-def MOVEsext: F1<(outs D:$dst), (ins D16L:$src),
-                 "$dst = $src (x);",
-                 [(set D:$dst, (sext D16L:$src))]>;
-
-def MOVEzext8: F1<(outs D:$dst), (ins D:$src),
-                  "$dst = $src.b (z);",
-                  [(set D:$dst, (and D:$src, 0xff))]>;
-
-def MOVEsext8: F1<(outs D:$dst), (ins D:$src),
-                  "$dst = $src.b (x);",
-                  [(set D:$dst, (sext_inreg D:$src, i8))]>;
-
-}
-
-def : Pat<(sext_inreg D16L:$src, i8),
-          (EXTRACT_SUBREG (MOVEsext8
-                           (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
-                                          D16L:$src,
-                                          lo16)),
-                          lo16)>;
-
-def : Pat<(sext_inreg D:$src, i16),
-          (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>;
-
-def : Pat<(and D:$src, 0xffff),
-          (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>;
-
-def : Pat<(i32 (anyext D16L:$src)),
-          (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
-                         (i16 (COPY_TO_REGCLASS D16L:$src, D16L)),
-                         lo16)>;
-
-// TODO Dreg = Dreg_byte (X/Z)
-
-// TODO Accumulator moves
-
-//===----------------------------------------------------------------------===//
-// Table C-12. Stack Control Instructions
-//===----------------------------------------------------------------------===//
-
-let Uses = [SP], Defs = [SP] in {
-def PUSH: F1<(outs), (ins ALL:$src),
-             "[--sp] = $src;", []> { let mayStore = 1; }
-
-// NOTE: POP does not work for DP regs, use LOAD instead
-def POP:  F1<(outs ALL:$dst), (ins),
-             "$dst = [sp++];", []> { let mayLoad = 1; }
-}
-
-// TODO: push/pop multiple
-
-def LINK: F2<(outs), (ins i32imm:$amount),
-             "link $amount;", []>;
-
-def UNLINK: F2<(outs), (ins),
-               "unlink;", []>;
-
-//===----------------------------------------------------------------------===//
-// Table C-13. Control Code Bit Management Instructions
-//===----------------------------------------------------------------------===//
-
-multiclass SETCC<PatFrag opnode, PatFrag invnode, string cond, string suf=";"> {
-  def dd : F1<(outs JustCC:$cc), (ins D:$a, D:$b),
-              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
-              [(set JustCC:$cc, (opnode  D:$a, D:$b))]>;
-
-  def ri : F1<(outs JustCC:$cc), (ins DP:$a, i32imm:$b),
-              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
-              [(set JustCC:$cc, (opnode  DP:$a, imm3:$b))]>;
-
-  def pp : F1<(outs JustCC:$cc), (ins P:$a, P:$b),
-              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
-              []>;
-
-  def ri_not : F1<(outs NotCC:$cc), (ins DP:$a, i32imm:$b),
-                  !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
-                  [(set NotCC:$cc, (invnode  DP:$a, imm3:$b))]>;
-}
-
-defm SETEQ  : SETCC<seteq,  setne,  "==">;
-defm SETLT  : SETCC<setlt,  setge,  "<">;
-defm SETLE  : SETCC<setle,  setgt,  "<=">;
-defm SETULT : SETCC<setult, setuge, "<",  " (iu);">;
-defm SETULE : SETCC<setule, setugt, "<=", " (iu);">;
-
-def SETNEdd : F1<(outs NotCC:$cc), (ins D:$a, D:$b),
-                 "cc = $a == $b;",
-                 [(set NotCC:$cc, (setne  D:$a, D:$b))]>;
-
-def : Pat<(setgt  D:$a, D:$b), (SETLTdd  D:$b, D:$a)>;
-def : Pat<(setge  D:$a, D:$b), (SETLEdd  D:$b, D:$a)>;
-def : Pat<(setugt D:$a, D:$b), (SETULTdd D:$b, D:$a)>;
-def : Pat<(setuge D:$a, D:$b), (SETULEdd D:$b, D:$a)>;
-
-// TODO: compare pointer for P-P comparisons
-// TODO: compare accumulator
-
-let Defs = [AC0] in
-def OR_ac0_cc : F1<(outs), (ins JustCC:$cc),
-                   "ac0 \\|= cc;", []>;
-
-let Uses = [AC0] in
-def MOVE_cc_ac0 : F1<(outs JustCC:$cc), (ins),
-                   "cc = ac0;", []>;
-
-def MOVE_ccncc : F1<(outs JustCC:$cc), (ins NotCC:$sb),
-                    "cc = !cc;", []>;
-
-def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
-                    "cc = !cc;", []>;
-
-def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
-                      "$dst = $cc;", []>;
-
-def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
-                   "$dst = cc;", []>;
-
-def MOVECC_nz : F1<(outs AnyCC:$cc), (ins D:$src),
-                   "cc = $src;",
-                   [(set AnyCC:$cc, (setne D:$src, 0))]>;
-
-//===----------------------------------------------------------------------===//
-// Table C-14. Logical Operations Instructions
-//===----------------------------------------------------------------------===//
-
-def AND: F1<(outs D:$dst), (ins D:$src1, D:$src2),
-            "$dst = $src1 & $src2;",
-            [(set D:$dst, (and D:$src1, D:$src2))]>;
-
-def NOT: F1<(outs D:$dst), (ins D:$src),
-            "$dst = ~$src;",
-            [(set D:$dst, (not D:$src))]>;
-
-def OR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
-           "$dst = $src1 \\| $src2;",
-           [(set D:$dst, (or D:$src1, D:$src2))]>;
-
-def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
-            "$dst = $src1 ^ $src2;",
-            [(set D:$dst, (xor D:$src1, D:$src2))]>;
-
-// missing: BXOR, BXORSHIFT
-
-//===----------------------------------------------------------------------===//
-// Table C-15. Bit Operations Instructions
-//===----------------------------------------------------------------------===//
-
-let Constraints = "$src1 = $dst" in {
-def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2),
-              "bitclr($dst, $src2);",
-              [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>;
-
-def BITSET: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
-              "bitset($dst, $src2);",
-              [(set D:$dst, (or D:$src1, uimm5mask:$src2))]>;
-
-def BITTGL: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
-              "bittgl($dst, $src2);",
-              [(set D:$dst, (xor D:$src1, uimm5mask:$src2))]>;
-}
-
-def BITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
-              "cc = bittst($src1, $src2);",
-              [(set JustCC:$cc, (setne (and D:$src1, uimm5mask:$src2),
-                                       (i32 0)))]>;
-
-def NBITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
-               "cc = !bittst($src1, $src2);",
-               [(set JustCC:$cc, (seteq (and D:$src1, uimm5mask:$src2),
-                                        (i32 0)))]>;
-
-// TODO: DEPOSIT, EXTRACT, BITMUX
-
-def ONES: F2<(outs D16L:$dst), (ins D:$src),
-              "$dst = ones $src;",
-              [(set D16L:$dst, (trunc (ctpop D:$src)))]>;
-
-def : Pat<(ctpop D:$src), (MOVEzext (ONES D:$src))>;
-
-//===----------------------------------------------------------------------===//
-// Table C-16. Shift / Rotate Instructions
-//===----------------------------------------------------------------------===//
-
-multiclass SHIFT32<SDNode opnode, string ops> {
-  def i : F1<(outs D:$dst), (ins D:$src, i16imm:$amount),
-             !subst("XX", ops, "$dst XX= $amount;"),
-             [(set D:$dst, (opnode D:$src, (i16 uimm5:$amount)))]>;
-  def r : F1<(outs D:$dst), (ins D:$src, D:$amount),
-             !subst("XX", ops, "$dst XX= $amount;"),
-             [(set D:$dst, (opnode D:$src, D:$amount))]>;
-}
-
-let Defs = [AZ, AN, V, VS],
-    Constraints = "$src = $dst" in {
-defm SRA : SHIFT32<sra, ">>>">;
-defm SRL : SHIFT32<srl, ">>">;
-defm SLL : SHIFT32<shl, "<<">;
-}
-
-// TODO: automatic switching between 2-addr and 3-addr (?)
-
-let Defs = [AZ, AN, V, VS] in {
-def SLLr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
-             "$dst = lshift $src by $amount;",
-             [(set D:$dst, (shl D:$src, D16L:$amount))]>;
-
-// Arithmetic left-shift = saturing overflow.
-def SLAr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
-             "$dst = ashift $src by $amount;",
-             [(set D:$dst, (sra D:$src, (ineg D16L:$amount)))]>;
-
-def SRA16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
-              "$dst = $src >>> $amount;",
-              [(set D16:$dst, (sra D16:$src, (i16 uimm4:$amount)))]>;
-
-def SRL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
-              "$dst = $src >> $amount;",
-              [(set D16:$dst, (srl D16:$src, (i16 uimm4:$amount)))]>;
-
-// Arithmetic left-shift = saturing overflow.
-def SLA16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
-              "$dst = ashift $src BY $amount;",
-              [(set D16:$dst, (srl D16:$src, (ineg D16L:$amount)))]>;
-
-def SLL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
-              "$dst = $src << $amount;",
-              [(set D16:$dst, (shl D16:$src, (i16 uimm4:$amount)))]>;
-
-def SLL16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
-              "$dst = lshift $src by $amount;",
-              [(set D16:$dst, (shl D16:$src, D16L:$amount))]>;
-
-}
-
-//===----------------------------------------------------------------------===//
-// Table C-17. Arithmetic Operations Instructions
-//===----------------------------------------------------------------------===//
-
-// TODO: ABS
-
-let Defs = [AZ, AN, AC0, V, VS] in {
-
-def ADD: F1<(outs D:$dst), (ins D:$src1, D:$src2),
-            "$dst = $src1 + $src2;",
-            [(set D:$dst, (add D:$src1, D:$src2))]>;
-
-def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
-              "$dst = $src1 + $src2;",
-              [(set D16:$dst, (add D16:$src1, D16:$src2))]>;
-
-let Constraints = "$src1 = $dst" in
-def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2),
-                "$dst += $src2;",
-                [(set D:$dst, (add D:$src1, imm7:$src2))]>;
-
-def SUB: F1<(outs D:$dst), (ins D:$src1, D:$src2),
-            "$dst = $src1 - $src2;",
-            [(set D:$dst, (sub D:$src1, D:$src2))]>;
-
-def SUB16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
-              "$dst = $src1 - $src2;",
-              [(set D16:$dst, (sub D16:$src1, D16:$src2))]>;
-
-}
-
-def : Pat<(addc D:$src1, D:$src2), (ADD D:$src1, D:$src2)>;
-def : Pat<(subc D:$src1, D:$src2), (SUB D:$src1, D:$src2)>;
-
-let Defs = [AZ, AN, V, VS] in
-def NEG: F1<(outs D:$dst), (ins D:$src),
-            "$dst = -$src;",
-            [(set D:$dst, (ineg D:$src))]>;
-
-// No pattern, it would confuse isel to have two i32 = i32+i32 patterns
-def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2),
-              "$dst = $src1 + $src2;", []>;
-
-let Constraints = "$src1 = $dst" in
-def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2),
-                "$dst += $src2;", []>;
-
-let Defs = [AZ, AN, V] in
-def ADD_RND20: F2<(outs D16:$dst), (ins D:$src1, D:$src2),
-                  "$dst = $src1 + $src2 (rnd20);", []>;
-
-let Defs = [V, VS] in {
-def MUL16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
-              "$dst = $src1 * $src2 (is);",
-              [(set D16:$dst, (mul D16:$src1, D16:$src2))]>;
-
-def MULHS16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
-                "$dst = $src1 * $src2 (ih);",
-                [(set D16:$dst, (mulhs D16:$src1, D16:$src2))]>;
-
-def MULhh32s: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
-                "$dst = $src1 * $src2 (is);",
-                [(set D:$dst, (mul (sext D16:$src1), (sext D16:$src2)))]>;
-
-def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
-                "$dst = $src1 * $src2 (is);",
-                [(set D:$dst, (mul (zext D16:$src1), (zext D16:$src2)))]>;
-}
-
-
-let Constraints = "$src1 = $dst" in
-def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2),
-            "$dst *= $src2;",
-            [(set D:$dst, (mul D:$src1, D:$src2))]>;
-
-//===----------------------------------------------------------------------===//
-// Table C-18. External Exent Management Instructions
-//===----------------------------------------------------------------------===//
-
-def IDLE : F1<(outs), (ins), "idle;", [(int_bfin_idle)]>;
-def CSYNC : F1<(outs), (ins), "csync;", [(int_bfin_csync)]>;
-def SSYNC : F1<(outs), (ins), "ssync;", [(int_bfin_ssync)]>;
-def EMUEXCPT : F1<(outs), (ins), "emuexcpt;", []>;
-def CLI : F1<(outs D:$mask), (ins), "cli $mask;", []>;
-def STI : F1<(outs), (ins D:$mask), "sti $mask;", []>;
-def RAISE : F1<(outs), (ins i32imm:$itr), "raise $itr;", []>;
-def EXCPT : F1<(outs), (ins i32imm:$exc), "excpt $exc;", []>;
-def NOP : F1<(outs), (ins), "nop;", []>;
-def MNOP : F2<(outs), (ins), "mnop;", []>;
-def ABORT : F1<(outs), (ins), "abort;", []>;
-
-//===----------------------------------------------------------------------===//
-// Table C-19. Cache Control Instructions
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Table C-20. Video Pixel Operations Instructions
-//===----------------------------------------------------------------------===//
-
-def ALIGN8 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
-                "$dst = align8($src1, $src2);",
-                [(set D:$dst, (or (shl D:$src1, (i32 24)),
-                                  (srl D:$src2, (i32 8))))]>;
-
-def ALIGN16 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
-                 "$dst = align16($src1, $src2);",
-                 [(set D:$dst, (or (shl D:$src1, (i32 16)),
-                                   (srl D:$src2, (i32 16))))]>;
-
-def ALIGN24 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
-                 "$dst = align16($src1, $src2);",
-                 [(set D:$dst, (or (shl D:$src1, (i32 8)),
-                                   (srl D:$src2, (i32 24))))]>;
-
-def DISALGNEXCPT : F2<(outs), (ins), "disalignexcpt;", []>;
-
-// TODO: BYTEOP3P, BYTEOP16P, BYTEOP1P, BYTEOP2P, BYTEOP16M, SAA,
-//       BYTEPACK, BYTEUNPACK
-
-// Table C-21. Vector Operations Instructions
-
-// Patterns
-def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
-          (CALLa tglobaladdr:$dst)>;
-def : Pat<(BfinCall (i32 texternalsym:$dst)),
-          (CALLa texternalsym:$dst)>;
-def : Pat<(i16 (trunc D:$src)),
-          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>;
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
deleted file mode 100644
index 71356768dd5d..000000000000
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//===- BlackfinIntrinsicInfo.cpp - Intrinsic Information --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of TargetIntrinsicInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinIntrinsicInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cstring>
-
-using namespace llvm;
-
-namespace bfinIntrinsic {
-
-  enum ID {
-    last_non_bfin_intrinsic = Intrinsic::num_intrinsics-1,
-#define GET_INTRINSIC_ENUM_VALUES
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_INTRINSIC_ENUM_VALUES
-    , num_bfin_intrinsics
-  };
-
-}
-
-std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, Type **Tys,
-                                           unsigned numTys) const {
-  static const char *const names[] = {
-#define GET_INTRINSIC_NAME_TABLE
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_INTRINSIC_NAME_TABLE
-  };
-
-  assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
-  if (IntrID < Intrinsic::num_intrinsics)
-    return 0;
-  assert(IntrID < bfinIntrinsic::num_bfin_intrinsics && "Invalid intrinsic ID");
-
-  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
-  return Result;
-}
-
-unsigned
-BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
-  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
-      || Name[2] != 'v' || Name[3] != 'm')
-    return 0;  // All intrinsics start with 'llvm.'
-
-#define GET_FUNCTION_RECOGNIZER
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_FUNCTION_RECOGNIZER
-  return 0;
-}
-
-bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
-  // Overload Table
-  const bool OTable[] = {
-#define GET_INTRINSIC_OVERLOAD_TABLE
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_INTRINSIC_OVERLOAD_TABLE
-  };
-  if (IntrID == 0)
-    return false;
-  else
-    return OTable[IntrID - Intrinsic::num_intrinsics];
-}
-
-/// This defines the "getAttributes(ID id)" method.
-#define GET_INTRINSIC_ATTRIBUTES
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_INTRINSIC_ATTRIBUTES
-
-static FunctionType *getType(LLVMContext &Context, unsigned id) {
-  Type *ResultTy = NULL;
-  std::vector<Type*> ArgTys;
-  bool IsVarArg = false;
-  
-#define GET_INTRINSIC_GENERATOR
-#include "BlackfinGenIntrinsics.inc"
-#undef GET_INTRINSIC_GENERATOR
-
-  return FunctionType::get(ResultTy, ArgTys, IsVarArg); 
-}
-
-Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
-                                                Type **Tys,
-                                                unsigned numTy) const {
-  assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
-  AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID);
-  return cast<Function>(M->getOrInsertFunction(getName(IntrID),
-                                               getType(M->getContext(), IntrID),
-                                               AList));
-}
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
deleted file mode 100644
index f05db5ad7cd9..000000000000
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
+++ /dev/null
@@ -1,32 +0,0 @@
-//===- BlackfinIntrinsicInfo.h - Blackfin Intrinsic Information -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of TargetIntrinsicInfo.
-//
-//===----------------------------------------------------------------------===//
-#ifndef BLACKFININTRINSICS_H
-#define BLACKFININTRINSICS_H
-
-#include "llvm/Target/TargetIntrinsicInfo.h"
-
-namespace llvm {
-
-  class BlackfinIntrinsicInfo : public TargetIntrinsicInfo {
-  public:
-    std::string getName(unsigned IntrID, Type **Tys = 0,
-                        unsigned numTys = 0) const;
-    unsigned lookupName(const char *Name, unsigned Len) const;
-    bool isOverloaded(unsigned IID) const;
-    Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0,
-                             unsigned numTys = 0) const;
-  };
-
-}
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinIntrinsics.td b/lib/Target/Blackfin/BlackfinIntrinsics.td
deleted file mode 100644
index ce21b082376f..000000000000
--- a/lib/Target/Blackfin/BlackfinIntrinsics.td
+++ /dev/null
@@ -1,34 +0,0 @@
-//===- BlackfinIntrinsics.td - Defines Blackfin intrinsics -*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines all of the blackfin-specific intrinsics.
-//
-//===----------------------------------------------------------------------===//
-
-let TargetPrefix = "bfin", isTarget = 1 in {
-
-//===----------------------------------------------------------------------===//
-// Core synchronisation etc.
-//
-// These intrinsics have sideeffects. Each represent a single instruction, but
-// workarounds are sometimes required depending on the cpu.
-
-// Execute csync instruction with workarounds
-def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">,
-        Intrinsic<[]>;
-
-// Execute ssync instruction with workarounds
-def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">,
-        Intrinsic<[]>;
-
-// Execute idle instruction with workarounds
-def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">,
-        Intrinsic<[]>;
-
-}
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
deleted file mode 100644
index 0d415c5f342b..000000000000
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ /dev/null
@@ -1,344 +0,0 @@
-//===- BlackfinRegisterInfo.cpp - Blackfin Register Information -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Blackfin.h"
-#include "BlackfinRegisterInfo.h"
-#include "BlackfinSubtarget.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-
-#define GET_REGINFO_TARGET_DESC
-#include "BlackfinGenRegisterInfo.inc"
-
-using namespace llvm;
-
-BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
-                                           const TargetInstrInfo &tii)
-  : BlackfinGenRegisterInfo(BF::RETS), Subtarget(st), TII(tii) {}
-
-const unsigned*
-BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  using namespace BF;
-  static const unsigned CalleeSavedRegs[] = {
-    FP,
-    R4, R5, R6, R7,
-    P3, P4, P5,
-    0 };
-  return  CalleeSavedRegs;
-}
-
-BitVector
-BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  using namespace BF;
-  BitVector Reserved(getNumRegs());
-  Reserved.set(AZ);
-  Reserved.set(AN);
-  Reserved.set(AQ);
-  Reserved.set(AC0);
-  Reserved.set(AC1);
-  Reserved.set(AV0);
-  Reserved.set(AV0S);
-  Reserved.set(AV1);
-  Reserved.set(AV1S);
-  Reserved.set(V);
-  Reserved.set(VS);
-  Reserved.set(CYCLES).set(CYCLES2);
-  Reserved.set(L0);
-  Reserved.set(L1);
-  Reserved.set(L2);
-  Reserved.set(L3);
-  Reserved.set(SP);
-  Reserved.set(RETS);
-  if (TFI->hasFP(MF))
-    Reserved.set(FP);
-  return Reserved;
-}
-
-bool BlackfinRegisterInfo::
-requiresRegisterScavenging(const MachineFunction &MF) const {
-  return true;
-}
-
-// Emit instructions to add delta to D/P register. ScratchReg must be of the
-// same class as Reg (P).
-void BlackfinRegisterInfo::adjustRegister(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator I,
-                                          DebugLoc DL,
-                                          unsigned Reg,
-                                          unsigned ScratchReg,
-                                          int delta) const {
-  if (!delta)
-    return;
-  if (isInt<7>(delta)) {
-    BuildMI(MBB, I, DL, TII.get(BF::ADDpp_imm7), Reg)
-      .addReg(Reg)              // No kill on two-addr operand
-      .addImm(delta);
-    return;
-  }
-
-  // We must load delta into ScratchReg and add that.
-  loadConstant(MBB, I, DL, ScratchReg, delta);
-  if (BF::PRegClass.contains(Reg)) {
-    assert(BF::PRegClass.contains(ScratchReg) &&
-           "ScratchReg must be a P register");
-    BuildMI(MBB, I, DL, TII.get(BF::ADDpp), Reg)
-      .addReg(Reg, RegState::Kill)
-      .addReg(ScratchReg, RegState::Kill);
-  } else {
-    assert(BF::DRegClass.contains(Reg) && "Reg must be a D or P register");
-    assert(BF::DRegClass.contains(ScratchReg) &&
-           "ScratchReg must be a D register");
-    BuildMI(MBB, I, DL, TII.get(BF::ADD), Reg)
-      .addReg(Reg, RegState::Kill)
-      .addReg(ScratchReg, RegState::Kill);
-  }
-}
-
-// Emit instructions to load a constant into D/P register
-void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator I,
-                                        DebugLoc DL,
-                                        unsigned Reg,
-                                        int value) const {
-  if (isInt<7>(value)) {
-    BuildMI(MBB, I, DL, TII.get(BF::LOADimm7), Reg).addImm(value);
-    return;
-  }
-
-  if (isUInt<16>(value)) {
-    BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value);
-    return;
-  }
-
-  if (isInt<16>(value)) {
-    BuildMI(MBB, I, DL, TII.get(BF::LOADimm16), Reg).addImm(value);
-    return;
-  }
-
-  // We must split into halves
-  BuildMI(MBB, I, DL,
-          TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16))
-    .addImm((value >> 16) & 0xffff)
-    .addReg(Reg, RegState::ImplicitDefine);
-  BuildMI(MBB, I, DL,
-          TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16))
-    .addImm(value & 0xffff)
-    .addReg(Reg, RegState::ImplicitKill)
-    .addReg(Reg, RegState::ImplicitDefine);
-}
-
-void BlackfinRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF,
-                              MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  if (!TFI->hasReservedCallFrame(MF)) {
-    int64_t Amount = I->getOperand(0).getImm();
-    if (Amount != 0) {
-      assert(Amount%4 == 0 && "Unaligned call frame size");
-      if (I->getOpcode() == BF::ADJCALLSTACKDOWN) {
-        adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, -Amount);
-      } else {
-        assert(I->getOpcode() == BF::ADJCALLSTACKUP &&
-               "Unknown call frame pseudo instruction");
-        adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, Amount);
-      }
-    }
-  }
-  MBB.erase(I);
-}
-
-/// findScratchRegister - Find a 'free' register. Try for a call-clobbered
-/// register first and then a spilled callee-saved register if that fails.
-static unsigned findScratchRegister(MachineBasicBlock::iterator II,
-                                    RegScavenger *RS,
-                                    const TargetRegisterClass *RC,
-                                    int SPAdj) {
-  assert(RS && "Register scavenging must be on");
-  unsigned Reg = RS->FindUnusedReg(RC);
-  if (Reg == 0)
-    Reg = RS->scavengeRegister(RC, II, SPAdj);
-  return Reg;
-}
-
-void
-BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                          int SPAdj, RegScavenger *RS) const {
-  MachineInstr &MI = *II;
-  MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction &MF = *MBB.getParent();
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-  DebugLoc DL = MI.getDebugLoc();
-
-  unsigned FIPos;
-  for (FIPos=0; !MI.getOperand(FIPos).isFI(); ++FIPos) {
-    assert(FIPos < MI.getNumOperands() &&
-           "Instr doesn't have FrameIndex operand!");
-  }
-  int FrameIndex = MI.getOperand(FIPos).getIndex();
-  assert(FIPos+1 < MI.getNumOperands() && MI.getOperand(FIPos+1).isImm());
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex)
-    + MI.getOperand(FIPos+1).getImm();
-  unsigned BaseReg = BF::FP;
-  if (TFI->hasFP(MF)) {
-    assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer");
-  } else {
-    BaseReg = BF::SP;
-    Offset += MF.getFrameInfo()->getStackSize() + SPAdj;
-  }
-
-  bool isStore = false;
-
-  switch (MI.getOpcode()) {
-  case BF::STORE32fi:
-    isStore = true;
-  case BF::LOAD32fi: {
-    assert(Offset%4 == 0 && "Unaligned i32 stack access");
-    assert(FIPos==1 && "Bad frame index operand");
-    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
-    MI.getOperand(FIPos+1).setImm(Offset);
-    if (isUInt<6>(Offset)) {
-      MI.setDesc(TII.get(isStore
-                         ? BF::STORE32p_uimm6m4
-                         : BF::LOAD32p_uimm6m4));
-      return;
-    }
-    if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
-      MI.setDesc(TII.get(isStore
-                         ? BF::STORE32fp_nimm7m4
-                         : BF::LOAD32fp_nimm7m4));
-      MI.getOperand(FIPos+1).setImm(-Offset);
-      return;
-    }
-    if (isInt<18>(Offset)) {
-      MI.setDesc(TII.get(isStore
-                         ? BF::STORE32p_imm18m4
-                         : BF::LOAD32p_imm18m4));
-      return;
-    }
-    // Use RegScavenger to calculate proper offset...
-    MI.dump();
-    llvm_unreachable("Stack frame offset too big");
-    break;
-  }
-  case BF::ADDpp: {
-    assert(MI.getOperand(0).isReg() && "ADD instruction needs a register");
-    unsigned DestReg = MI.getOperand(0).getReg();
-    // We need to produce a stack offset in a P register. We emit:
-    // P0 = offset;
-    // P0 = BR + P0;
-    assert(FIPos==1 && "Bad frame index operand");
-    loadConstant(MBB, II, DL, DestReg, Offset);
-    MI.getOperand(1).ChangeToRegister(DestReg, false, false, true);
-    MI.getOperand(2).ChangeToRegister(BaseReg, false);
-    break;
-  }
-  case BF::STORE16fi:
-    isStore = true;
-  case BF::LOAD16fi: {
-    assert(Offset%2 == 0 && "Unaligned i16 stack access");
-    assert(FIPos==1 && "Bad frame index operand");
-    // We need a P register to use as an address
-    unsigned ScratchReg = findScratchRegister(II, RS, &BF::PRegClass, SPAdj);
-    assert(ScratchReg && "Could not scavenge register");
-    loadConstant(MBB, II, DL, ScratchReg, Offset);
-    BuildMI(MBB, II, DL, TII.get(BF::ADDpp), ScratchReg)
-      .addReg(ScratchReg, RegState::Kill)
-      .addReg(BaseReg);
-    MI.setDesc(TII.get(isStore ? BF::STORE16pi : BF::LOAD16pi));
-    MI.getOperand(1).ChangeToRegister(ScratchReg, false, false, true);
-    MI.RemoveOperand(2);
-    break;
-  }
-  case BF::STORE8fi: {
-    // This is an AnyCC spill, we need a scratch register.
-    assert(FIPos==1 && "Bad frame index operand");
-    MachineOperand SpillReg = MI.getOperand(0);
-    unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
-    assert(ScratchReg && "Could not scavenge register");
-    if (SpillReg.getReg()==BF::NCC) {
-      BuildMI(MBB, II, DL, TII.get(BF::MOVENCC_z), ScratchReg)
-        .addOperand(SpillReg);
-      BuildMI(MBB, II, DL, TII.get(BF::BITTGL), ScratchReg)
-        .addReg(ScratchReg).addImm(0);
-    } else {
-      BuildMI(MBB, II, DL, TII.get(BF::MOVECC_zext), ScratchReg)
-        .addOperand(SpillReg);
-    }
-    // STORE D
-    MI.setDesc(TII.get(BF::STORE8p_imm16));
-    MI.getOperand(0).ChangeToRegister(ScratchReg, false, false, true);
-    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
-    MI.getOperand(FIPos+1).setImm(Offset);
-    break;
-  }
-  case BF::LOAD8fi: {
-    // This is an restore, we need a scratch register.
-    assert(FIPos==1 && "Bad frame index operand");
-    MachineOperand SpillReg = MI.getOperand(0);
-    unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
-    assert(ScratchReg && "Could not scavenge register");
-    MI.setDesc(TII.get(BF::LOAD32p_imm16_8z));
-    MI.getOperand(0).ChangeToRegister(ScratchReg, true);
-    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
-    MI.getOperand(FIPos+1).setImm(Offset);
-    ++II;
-    if (SpillReg.getReg()==BF::CC) {
-      // CC = D
-      BuildMI(MBB, II, DL, TII.get(BF::MOVECC_nz), BF::CC)
-        .addReg(ScratchReg, RegState::Kill);
-    } else {
-      // Restore NCC (CC = D==0)
-      BuildMI(MBB, II, DL, TII.get(BF::SETEQri_not), BF::NCC)
-        .addReg(ScratchReg, RegState::Kill)
-        .addImm(0);
-    }
-    break;
-  }
-  default:
-    llvm_unreachable("Cannot eliminate frame index");
-    break;
-  }
-}
-
-unsigned
-BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  return TFI->hasFP(MF) ? BF::FP : BF::SP;
-}
-
-unsigned BlackfinRegisterInfo::getEHExceptionRegister() const {
-  llvm_unreachable("What is the exception register");
-  return 0;
-}
-
-unsigned BlackfinRegisterInfo::getEHHandlerRegister() const {
-  llvm_unreachable("What is the exception handler register");
-  return 0;
-}
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
deleted file mode 100644
index 6ac22af793e0..000000000000
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ /dev/null
@@ -1,77 +0,0 @@
-//===- BlackfinRegisterInfo.h - Blackfin Register Information ..-*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the Blackfin implementation of the TargetRegisterInfo
-// class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFINREGISTERINFO_H
-#define BLACKFINREGISTERINFO_H
-
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#include "BlackfinGenRegisterInfo.inc"
-
-namespace llvm {
-
-  class BlackfinSubtarget;
-  class TargetInstrInfo;
-  class Type;
-
-  struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo {
-    BlackfinSubtarget &Subtarget;
-    const TargetInstrInfo &TII;
-
-    BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii);
-
-    /// Code Generation virtual methods...
-    const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
-    BitVector getReservedRegs(const MachineFunction &MF) const;
-
-    // getSubReg implemented by tablegen
-
-    const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const {
-      return &BF::PRegClass;
-    }
-
-    bool requiresRegisterScavenging(const MachineFunction &MF) const;
-
-    void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                       MachineBasicBlock &MBB,
-                                       MachineBasicBlock::iterator I) const;
-
-    void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                             int SPAdj, RegScavenger *RS = NULL) const;
-
-    unsigned getFrameRegister(const MachineFunction &MF) const;
-
-    // Exception handling queries.
-    unsigned getEHExceptionRegister() const;
-    unsigned getEHHandlerRegister() const;
-
-    // Utility functions
-    void adjustRegister(MachineBasicBlock &MBB,
-                        MachineBasicBlock::iterator I,
-                        DebugLoc DL,
-                        unsigned Reg,
-                        unsigned ScratchReg,
-                        int delta) const;
-    void loadConstant(MachineBasicBlock &MBB,
-                      MachineBasicBlock::iterator I,
-                      DebugLoc DL,
-                      unsigned Reg,
-                      int value) const;
-  };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
deleted file mode 100644
index 1c42205eb780..000000000000
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ /dev/null
@@ -1,277 +0,0 @@
-//===- BlackfinRegisterInfo.td - Blackfin Register defs ----*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//  Declarations that describe the Blackfin register file
-//===----------------------------------------------------------------------===//
-
-// Subregs are:
-// 1: .L
-// 2: .H
-// 3: .W (32 low bits of 40-bit accu)
-let Namespace = "BF" in {
-def lo16 : SubRegIndex;
-def hi16 : SubRegIndex;
-def lo32 : SubRegIndex;
-def hi32 : SubRegIndex;
-}
-
-// Registers are identified with 3-bit group and 3-bit ID numbers.
-class BlackfinReg<string n> : Register<n> {
-  field bits<3> Group;
-  field bits<3> Num;
-  let Namespace = "BF";
-}
-
-// Rc - 1-bit registers
-class Rc<bits<5> bitno, string n> : BlackfinReg<n> {
-  field bits<5> BitNum = bitno;
-}
-
-// Rs - 16-bit integer registers
-class Rs<bits<3> group, bits<3> num, bits<1> hi, string n> : BlackfinReg<n> {
-  let Group = group;
-  let Num = num;
-  field bits<1> High = hi;
-}
-
-// Ri - 32-bit integer registers with subregs
-class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> {
-  let Group = group;
-  let Num = num;
-}
-
-// Ra 40-bit accumulator registers
-class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
-  let SubRegs = subs;
-  let SubRegIndices = [hi32, lo32];
-  let Group = 4;
-  let Num = num;
-}
-
-// Two halves of 32-bit register
-multiclass Rss<bits<3> group, bits<3> num, string n> {
-  def H : Rs<group, num, 1, !strconcat(n, ".h")>;
-  def L : Rs<group, num, 0, !strconcat(n, ".l")>;
-}
-
-// Rii - 32-bit integer registers with subregs
-class Rii<bits<3> group, bits<3> num, string n, list<Register> subs>
-      : BlackfinReg<n> {
-  let SubRegs = subs;
-  let SubRegIndices = [hi16, lo16];
-  let Group = group;
-  let Num = num;
-}
-
-// Status bits are all part of ASTAT
-def AZ   : Rc<0,  "az">;
-def AN   : Rc<1,  "an">;
-def CC   : Rc<5,  "cc">, DwarfRegNum<[34]>;
-def NCC  : Rc<5,  "!cc"> { let Aliases = [CC]; }
-def AQ   : Rc<6,  "aq">;
-def AC0  : Rc<12, "ac0">;
-def AC1  : Rc<13, "ac1">;
-def AV0  : Rc<16, "av0">;
-def AV0S : Rc<17, "av0s">;
-def AV1  : Rc<18, "av1">;
-def AV1S : Rc<19, "av1s">;
-def V    : Rc<24, "v">;
-def VS   : Rc<25, "vs">;
-// Skipped non-status bits: AC0_COPY, V_COPY, RND_MOD
-
-// Group 0: Integer registers
-defm R0 : Rss<0, 0, "r0">;
-def  R0 : Rii<0, 0, "r0", [R0H, R0L]>, DwarfRegNum<[0]>;
-defm R1 : Rss<0, 1, "r1">;
-def  R1 : Rii<0, 1, "r1", [R1H, R1L]>, DwarfRegNum<[1]>;
-defm R2 : Rss<0, 2, "r2">;
-def  R2 : Rii<0, 2, "r2", [R2H, R2L]>, DwarfRegNum<[2]>;
-defm R3 : Rss<0, 3, "r3">;
-def  R3 : Rii<0, 3, "r3", [R3H, R3L]>, DwarfRegNum<[3]>;
-defm R4 : Rss<0, 4, "r4">;
-def  R4 : Rii<0, 4, "r4", [R4H, R4L]>, DwarfRegNum<[4]>;
-defm R5 : Rss<0, 5, "r5">;
-def  R5 : Rii<0, 5, "r5", [R5H, R5L]>, DwarfRegNum<[5]>;
-defm R6 : Rss<0, 6, "r6">;
-def  R6 : Rii<0, 6, "r6", [R6H, R6L]>, DwarfRegNum<[6]>;
-defm R7 : Rss<0, 7, "r7">;
-def  R7 : Rii<0, 7, "r7", [R7H, R7L]>, DwarfRegNum<[7]>;
-
-// Group 1: Pointer registers
-defm P0 : Rss<1, 0, "p0">;
-def  P0 : Rii<1, 0, "p0", [P0H, P0L]>, DwarfRegNum<[8]>;
-defm P1 : Rss<1, 1, "p1">;
-def  P1 : Rii<1, 1, "p1", [P1H, P1L]>, DwarfRegNum<[9]>;
-defm P2 : Rss<1, 2, "p2">;
-def  P2 : Rii<1, 2, "p2", [P2H, P2L]>, DwarfRegNum<[10]>;
-defm P3 : Rss<1, 3, "p3">;
-def  P3 : Rii<1, 3, "p3", [P3H, P3L]>, DwarfRegNum<[11]>;
-defm P4 : Rss<1, 4, "p4">;
-def  P4 : Rii<1, 4, "p4", [P4H, P4L]>, DwarfRegNum<[12]>;
-defm P5 : Rss<1, 5, "p5">;
-def  P5 : Rii<1, 5, "p5", [P5H, P5L]>, DwarfRegNum<[13]>;
-defm SP : Rss<1, 6, "sp">;
-def  SP : Rii<1, 6, "sp", [SPH, SPL]>, DwarfRegNum<[14]>;
-defm FP : Rss<1, 7, "fp">;
-def  FP : Rii<1, 7, "fp", [FPH, FPL]>, DwarfRegNum<[15]>;
-
-// Group 2: Index registers
-defm I0 : Rss<2, 0, "i0">;
-def  I0 : Rii<2, 0, "i0", [I0H, I0L]>, DwarfRegNum<[16]>;
-defm I1 : Rss<2, 1, "i1">;
-def  I1 : Rii<2, 1, "i1", [I1H, I1L]>, DwarfRegNum<[17]>;
-defm I2 : Rss<2, 2, "i2">;
-def  I2 : Rii<2, 2, "i2", [I2H, I2L]>, DwarfRegNum<[18]>;
-defm I3 : Rss<2, 3, "i3">;
-def  I3 : Rii<2, 3, "i3", [I3H, I3L]>, DwarfRegNum<[19]>;
-defm M0 : Rss<2, 4, "m0">;
-def  M0 : Rii<2, 4, "m0", [M0H, M0L]>, DwarfRegNum<[20]>;
-defm M1 : Rss<2, 5, "m1">;
-def  M1 : Rii<2, 5, "m1", [M1H, M1L]>, DwarfRegNum<[21]>;
-defm M2 : Rss<2, 6, "m2">;
-def  M2 : Rii<2, 6, "m2", [M2H, M2L]>, DwarfRegNum<[22]>;
-defm M3 : Rss<2, 7, "m3">;
-def  M3 : Rii<2, 7, "m3", [M3H, M3L]>, DwarfRegNum<[23]>;
-
-// Group 3: Cyclic indexing registers
-defm B0 : Rss<3, 0, "b0">;
-def  B0 : Rii<3, 0, "b0", [B0H, B0L]>, DwarfRegNum<[24]>;
-defm B1 : Rss<3, 1, "b1">;
-def  B1 : Rii<3, 1, "b1", [B1H, B1L]>, DwarfRegNum<[25]>;
-defm B2 : Rss<3, 2, "b2">;
-def  B2 : Rii<3, 2, "b2", [B2H, B2L]>, DwarfRegNum<[26]>;
-defm B3 : Rss<3, 3, "b3">;
-def  B3 : Rii<3, 3, "b3", [B3H, B3L]>, DwarfRegNum<[27]>;
-defm L0 : Rss<3, 4, "l0">;
-def  L0 : Rii<3, 4, "l0", [L0H, L0L]>, DwarfRegNum<[28]>;
-defm L1 : Rss<3, 5, "l1">;
-def  L1 : Rii<3, 5, "l1", [L1H, L1L]>, DwarfRegNum<[29]>;
-defm L2 : Rss<3, 6, "l2">;
-def  L2 : Rii<3, 6, "l2", [L2H, L2L]>, DwarfRegNum<[30]>;
-defm L3 : Rss<3, 7, "l3">;
-def  L3 : Rii<3, 7, "l3", [L3H, L3L]>, DwarfRegNum<[31]>;
-
-// Accumulators
-def  A0X : Ri <4, 0, "a0.x">;
-defm A0  : Rss<4, 1, "a0">;
-def  A0W : Rii<4, 1, "a0.w", [A0H, A0L]>, DwarfRegNum<[32]>;
-def  A0  : Ra <0, "a0", [A0X, A0W]>;
-
-def  A1X : Ri <4, 2, "a1.x">;
-defm A1  : Rss<4, 3, "a1">;
-def  A1W : Rii<4, 3, "a1.w", [A1H, A1L]>, DwarfRegNum<[33]>;
-def  A1  : Ra <2, "a1", [A1X, A1W]>;
-
-def RETS : Ri<4, 7, "rets">,  DwarfRegNum<[35]>;
-def RETI : Ri<7, 3, "reti">,  DwarfRegNum<[36]>;
-def RETX : Ri<7, 4, "retx">,  DwarfRegNum<[37]>;
-def RETN : Ri<7, 5, "retn">,  DwarfRegNum<[38]>;
-def RETE : Ri<7, 6, "rete">,  DwarfRegNum<[39]>;
-
-def ASTAT   : Ri<4, 6, "astat">,   DwarfRegNum<[40]> {
-  let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
-}
-
-def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>;
-def USP     : Ri<7, 0, "usp">,     DwarfRegNum<[42]>;
-def EMUDAT  : Ri<7, 7, "emudat">,  DwarfRegNum<[43]>;
-def SYSCFG  : Ri<7, 2, "syscfg">;
-def CYCLES  : Ri<6, 6, "cycles">;
-def CYCLES2 : Ri<6, 7, "cycles2">;
-
-// Hardware loops
-def LT0 : Ri<6, 1, "lt0">, DwarfRegNum<[44]>;
-def LT1 : Ri<6, 4, "lt1">, DwarfRegNum<[45]>;
-def LC0 : Ri<6, 0, "lc0">, DwarfRegNum<[46]>;
-def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>;
-def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
-def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
-
-// Register classes.
-def D16L : RegisterClass<"BF", [i16], 16, (sequence "R%uL", 0, 7)>;
-
-def D16H : RegisterClass<"BF", [i16], 16, (sequence "R%uH", 0, 7)>;
-
-def D16 : RegisterClass<"BF", [i16], 16, (add D16L, D16H)>;
-
-def P16L : RegisterClass<"BF", [i16], 16,
-                         (add (sequence "P%uL", 0, 5), SPL, FPL)>;
-
-def P16H : RegisterClass<"BF", [i16], 16,
-                         (add (sequence "P%uH", 0, 5), SPH, FPH)>;
-
-def P16 : RegisterClass<"BF", [i16], 16, (add P16L, P16H)>;
-
-def DP16 : RegisterClass<"BF", [i16], 16, (add D16, P16)>;
-
-def DP16L : RegisterClass<"BF", [i16], 16, (add D16L, P16L)>;
-
-def DP16H : RegisterClass<"BF", [i16], 16, (add D16H, P16H)>;
-
-def GR16 : RegisterClass<"BF", [i16], 16,
-    (add DP16,
-     I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L,
-     M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L,
-     B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L,
-     L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L)>;
-
-def D : RegisterClass<"BF", [i32], 32, (sequence "R%u", 0, 7)> {
-  let SubRegClasses = [(D16L lo16), (D16H hi16)];
-}
-
-def P : RegisterClass<"BF", [i32], 32, (add (sequence "P%u", 0, 5), FP, SP)> {
-  let SubRegClasses = [(P16L lo16), (P16H hi16)];
-}
-
-def DP : RegisterClass<"BF", [i32], 32, (add D, P)> {
-  let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
-}
-
-def I : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3)>;
-def M : RegisterClass<"BF", [i32], 32, (add M0, M1, M2, M3)>;
-def B : RegisterClass<"BF", [i32], 32, (add B0, B1, B2, B3)>;
-def L : RegisterClass<"BF", [i32], 32, (add L0, L1, L2, L3)>;
-
-def GR : RegisterClass<"BF", [i32], 32, (add DP, I, M, B, L)>;
-
-def ALL : RegisterClass<"BF", [i32], 32,
-    (add GR,
-     A0X, A0W, A1X, A1W, ASTAT, RETS,
-     LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2,
-     USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT)>;
-
-def PI : RegisterClass<"BF", [i32], 32, (add P, I)>;
-
-// We are going to pretend that CC and !CC are 32-bit registers, even though
-// they only can hold 1 bit.
-let CopyCost = -1, Size = 8 in {
-def JustCC  : RegisterClass<"BF", [i32], 8, (add CC)>;
-def NotCC   : RegisterClass<"BF", [i32], 8, (add NCC)>;
-def AnyCC   : RegisterClass<"BF", [i32], 8, (add CC, NCC)>;
-def StatBit : RegisterClass<"BF", [i1], 8,
-    (add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>;
-}
-
-// Should be i40, but that isn't defined. It is not a legal type yet anyway.
-def Accu : RegisterClass<"BF", [i64], 64, (add A0, A1)>;
-
-// Register classes to match inline asm constraints.
-def zCons : RegisterClass<"BF", [i32], 32, (add P0, P1, P2)>;
-def DCons : RegisterClass<"BF", [i32], 32, (add R0, R2, R4, R6)>;
-def WCons : RegisterClass<"BF", [i32], 32, (add R1, R3, R5, R7)>;
-def cCons : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3,
-    	    			       	   	B0, B1, B2, B3,
-						L0, L1, L2, L3)>;
-def tCons : RegisterClass<"BF", [i32], 32, (add LT0, LT1)>;
-def uCons : RegisterClass<"BF", [i32], 32, (add LB0, LB1)>;
-def kCons : RegisterClass<"BF", [i32], 32, (add LC0, LC1)>;
-def yCons : RegisterClass<"BF", [i32], 32, (add RETS, RETN, RETI, RETX,
-    	    			       	   	RETE, ASTAT, SEQSTAT,
-						USP)>;
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
deleted file mode 100644
index a21f696a62eb..000000000000
--- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the BlackfinSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "blackfin-selectiondag-info"
-#include "BlackfinTargetMachine.h"
-using namespace llvm;
-
-BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo(
-                                              const BlackfinTargetMachine &TM)
-  : TargetSelectionDAGInfo(TM) {
-}
-
-BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
-}
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
deleted file mode 100644
index f1ce3482f90f..000000000000
--- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the Blackfin subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFINSELECTIONDAGINFO_H
-#define BLACKFINSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class BlackfinTargetMachine;
-
-class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
-  explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM);
-  ~BlackfinSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.cpp b/lib/Target/Blackfin/BlackfinSubtarget.cpp
deleted file mode 100644
index 0bdce09177ed..000000000000
--- a/lib/Target/Blackfin/BlackfinSubtarget.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-//===- BlackfinSubtarget.cpp - BLACKFIN Subtarget Information -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the blackfin specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinSubtarget.h"
-#include "Blackfin.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "BlackfinGenSubtargetInfo.inc"
-
-using namespace llvm;
-
-BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
-                                     const std::string &CPU,
-                                     const std::string &FS)
-  : BlackfinGenSubtargetInfo(TT, CPU, FS), sdram(false),
-    icplb(false),
-    wa_mi_shift(false),
-    wa_csync(false),
-    wa_specld(false),
-    wa_mmr_stall(false),
-    wa_lcregs(false),
-    wa_hwloop(false),
-    wa_ind_call(false),
-    wa_killed_mmr(false),
-    wa_rets(false)
-{
-  std::string CPUName = CPU;
-  if (CPUName.empty())
-    CPUName = "generic";
-  // Parse features string.
-  ParseSubtargetFeatures(CPUName, FS);
-}
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.h b/lib/Target/Blackfin/BlackfinSubtarget.h
deleted file mode 100644
index 1a01a81116d6..000000000000
--- a/lib/Target/Blackfin/BlackfinSubtarget.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===- BlackfinSubtarget.h - Define Subtarget for the Blackfin -*- C++ -*-====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the BLACKFIN specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFIN_SUBTARGET_H
-#define BLACKFIN_SUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "BlackfinGenSubtargetInfo.inc"
-
-namespace llvm {
-class StringRef;
-
-  class BlackfinSubtarget : public BlackfinGenSubtargetInfo {
-    bool sdram;
-    bool icplb;
-    bool wa_mi_shift;
-    bool wa_csync;
-    bool wa_specld;
-    bool wa_mmr_stall;
-    bool wa_lcregs;
-    bool wa_hwloop;
-    bool wa_ind_call;
-    bool wa_killed_mmr;
-    bool wa_rets;
-  public:
-    BlackfinSubtarget(const std::string &TT, const std::string &CPU,
-                      const std::string &FS);
-
-    /// ParseSubtargetFeatures - Parses features string setting specified
-    /// subtarget options.  Definition of function is auto generated by tblgen.
-    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-  };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
deleted file mode 100644
index a4ae46b90fa0..000000000000
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-//===-- BlackfinTargetMachine.cpp - Define TargetMachine for Blackfin -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinTargetMachine.h"
-#include "Blackfin.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-extern "C" void LLVMInitializeBlackfinTarget() {
-  RegisterTargetMachine<BlackfinTargetMachine> X(TheBlackfinTarget);
-}
-
-BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
-                                             StringRef TT,
-                                             StringRef CPU,
-                                             StringRef FS,
-                                             Reloc::Model RM,
-                                             CodeModel::Model CM)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
-    DataLayout("e-p:32:32-i64:32-f64:32-n32"),
-    Subtarget(TT, CPU, FS),
-    TLInfo(*this),
-    TSInfo(*this),
-    InstrInfo(Subtarget),
-    FrameLowering(Subtarget) {
-}
-
-bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel) {
-  PM.add(createBlackfinISelDag(*this, OptLevel));
-  return false;
-}
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
deleted file mode 100644
index c85337fe237f..000000000000
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===-- BlackfinTargetMachine.h - TargetMachine for Blackfin ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the Blackfin specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFINTARGETMACHINE_H
-#define BLACKFINTARGETMACHINE_H
-
-#include "BlackfinInstrInfo.h"
-#include "BlackfinIntrinsicInfo.h"
-#include "BlackfinISelLowering.h"
-#include "BlackfinFrameLowering.h"
-#include "BlackfinSubtarget.h"
-#include "BlackfinSelectionDAGInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
-
-  class BlackfinTargetMachine : public LLVMTargetMachine {
-    const TargetData DataLayout;
-    BlackfinSubtarget Subtarget;
-    BlackfinTargetLowering TLInfo;
-    BlackfinSelectionDAGInfo TSInfo;
-    BlackfinInstrInfo InstrInfo;
-    BlackfinFrameLowering FrameLowering;
-    BlackfinIntrinsicInfo IntrinsicInfo;
-  public:
-    BlackfinTargetMachine(const Target &T, StringRef TT,
-                          StringRef CPU, StringRef FS,
-                          Reloc::Model RM, CodeModel::Model CM);
-
-    virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
-    virtual const TargetFrameLowering *getFrameLowering() const {
-      return &FrameLowering;
-    }
-    virtual const BlackfinSubtarget *getSubtargetImpl() const {
-      return &Subtarget;
-    }
-    virtual const BlackfinRegisterInfo *getRegisterInfo() const {
-      return &InstrInfo.getRegisterInfo();
-    }
-    virtual const BlackfinTargetLowering* getTargetLowering() const {
-      return &TLInfo;
-    }
-    virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const {
-      return &TSInfo;
-    }
-    virtual const TargetData *getTargetData() const { return &DataLayout; }
-    virtual bool addInstSelector(PassManagerBase &PM,
-                                 CodeGenOpt::Level OptLevel);
-    const TargetIntrinsicInfo *getIntrinsicInfo() const {
-      return &IntrinsicInfo;
-    }
-  };
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt
deleted file mode 100644
index 94d05fbf8878..000000000000
--- a/lib/Target/Blackfin/CMakeLists.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS Blackfin.td)
-
-llvm_tablegen(BlackfinGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(BlackfinGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(BlackfinGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(BlackfinGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(BlackfinGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic)
-add_public_tablegen_target(BlackfinCommonTableGen)
-
-add_llvm_target(BlackfinCodeGen
-  BlackfinAsmPrinter.cpp
-  BlackfinInstrInfo.cpp
-  BlackfinIntrinsicInfo.cpp
-  BlackfinISelDAGToDAG.cpp
-  BlackfinISelLowering.cpp
-  BlackfinFrameLowering.cpp
-  BlackfinRegisterInfo.cpp
-  BlackfinSubtarget.cpp
-  BlackfinTargetMachine.cpp
-  BlackfinSelectionDAGInfo.cpp
-  )
-
-add_llvm_library_dependencies(LLVMBlackfinCodeGen
-  LLVMAsmPrinter
-  LLVMBlackfinDesc
-  LLVMBlackfinInfo
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
-add_subdirectory(TargetInfo)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
deleted file mode 100644
index 5b9d4a29794e..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-//===-- BlackfinMCAsmInfo.cpp - Blackfin asm properties -------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the BlackfinMCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinMCAsmInfo.h"
-
-using namespace llvm;
-
-BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) {
-  GlobalPrefix = "_";
-  CommentString = "//";
-  HasSetDirective = false;
-}
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
deleted file mode 100644
index c372aa247e04..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- BlackfinMCAsmInfo.h - Blackfin asm properties ---------*- C++ -*--====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the BlackfinMCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFINTARGETASMINFO_H
-#define BLACKFINTARGETASMINFO_H
-
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCAsmInfo.h"
-
-namespace llvm {
-  class Target;
-
-  struct BlackfinMCAsmInfo : public MCAsmInfo {
-    explicit BlackfinMCAsmInfo(const Target &T, StringRef TT);
-  };
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
deleted file mode 100644
index 272e3c2bbb75..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-//===-- BlackfinMCTargetDesc.cpp - Blackfin Target Descriptions -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Blackfin specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "BlackfinMCTargetDesc.h"
-#include "BlackfinMCAsmInfo.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "BlackfinGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "BlackfinGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "BlackfinGenRegisterInfo.inc"
-
-using namespace llvm;
-
-
-static MCInstrInfo *createBlackfinMCInstrInfo() {
-  MCInstrInfo *X = new MCInstrInfo();
-  InitBlackfinMCInstrInfo(X);
-  return X;
-}
-
-static MCRegisterInfo *createBlackfinMCRegisterInfo(StringRef TT) {
-  MCRegisterInfo *X = new MCRegisterInfo();
-  InitBlackfinMCRegisterInfo(X, BF::RETS);
-  return X;
-}
-
-static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
-                                                      StringRef CPU,
-                                                      StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitBlackfinMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
-}
-
-static MCCodeGenInfo *createBlackfinMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                                  CodeModel::Model CM) {
-  MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM);
-  return X;
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeBlackfinTargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget);
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(TheBlackfinTarget,
-                                        createBlackfinMCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
-                                      createBlackfinMCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheBlackfinTarget,
-                                    createBlackfinMCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
-                                          createBlackfinMCSubtargetInfo);
-}
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
deleted file mode 100644
index 5bffe94fc582..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- BlackfinMCTargetDesc.h - Blackfin Target Descriptions ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides Blackfin specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef BLACKFINMCTARGETDESC_H
-#define BLACKFINMCTARGETDESC_H
-
-namespace llvm {
-class MCSubtargetInfo;
-class Target;
-class StringRef;
-
-extern Target TheBlackfinTarget;
-
-} // End llvm namespace
-
-// Defines symbolic names for Blackfin registers.  This defines a mapping from
-// register name to register number.
-#define GET_REGINFO_ENUM
-#include "BlackfinGenRegisterInfo.inc"
-
-// Defines symbolic names for the Blackfin instructions.
-#define GET_INSTRINFO_ENUM
-#include "BlackfinGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "BlackfinGenSubtargetInfo.inc"
-
-#endif
diff --git a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 73315d852cbd..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-add_llvm_library(LLVMBlackfinDesc
-  BlackfinMCTargetDesc.cpp
-  BlackfinMCAsmInfo.cpp
-  )
-
-add_llvm_library_dependencies(LLVMBlackfinDesc
-  LLVMBlackfinInfo
-  LLVMMC
-  )
-
-add_dependencies(LLVMBlackfinDesc BlackfinCommonTableGen)
diff --git a/lib/Target/Blackfin/MCTargetDesc/Makefile b/lib/Target/Blackfin/MCTargetDesc/Makefile
deleted file mode 100644
index 6b26101f4473..000000000000
--- a/lib/Target/Blackfin/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMBlackfinDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile
deleted file mode 100644
index 756ac6bcd8a0..000000000000
--- a/lib/Target/Blackfin/Makefile
+++ /dev/null
@@ -1,23 +0,0 @@
-##===- lib/Target/Blackfin/Makefile ------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMBlackfinCodeGen
-TARGET = Blackfin
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = BlackfinGenRegisterInfo.inc BlackfinGenInstrInfo.inc \
-		BlackfinGenAsmWriter.inc \
-                BlackfinGenDAGISel.inc BlackfinGenSubtargetInfo.inc \
-		BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc
-
-DIRS = TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Target/Blackfin/README.txt b/lib/Target/Blackfin/README.txt
deleted file mode 100644
index b4c8227cd645..000000000000
--- a/lib/Target/Blackfin/README.txt
+++ /dev/null
@@ -1,244 +0,0 @@
-//===-- README.txt - Notes for Blackfin Target ------------------*- org -*-===//
-
-* Condition codes
-** DONE Problem with asymmetric SETCC operations
-The instruction
-
-  CC = R0 < 2
-
-is not symmetric - there is no R0 > 2 instruction. On the other hand, IF CC
-JUMP can take both CC and !CC as a condition. We cannot pattern-match (brcond
-(not cc), target), the DAG optimizer removes that kind of thing.
-
-This is handled by creating a pseudo-register NCC that aliases CC. Register
-classes JustCC and NotCC are used to control the inversion of CC.
-
-** DONE CC as an i32 register
-The AnyCC register class pretends to hold i32 values. It can only represent the
-values 0 and 1, but we can copy to and from the D class. This hack makes it
-possible to represent the setcc instruction without having i1 as a legal type.
-
-In most cases, the CC register is set by a "CC = .." or BITTST instruction, and
-then used in a conditional branch or move. The code generator thinks it is
-moving 32 bits, but the value stays in CC. In other cases, the result of a
-comparison is actually used as am i32 number, and CC will be copied to a D
-register.
-
-* Stack frames
-** TODO Use Push/Pop instructions
-We should use the push/pop instructions when saving callee-saved
-registers. The are smaller, and we may even use push multiple instructions.
-
-** TODO requiresRegisterScavenging
-We need more intelligence in determining when the scavenger is needed. We
-should keep track of:
-- Spilling D16 registers
-- Spilling AnyCC registers
-
-* Assembler
-** TODO Implement PrintGlobalVariable
-** TODO Remove LOAD32sym
-It's a hack combining two instructions by concatenation.
-
-* Inline Assembly
-
-These are the GCC constraints from bfin/constraints.md:
-
-| Code  | Register class                            | LLVM |
-|-------+-------------------------------------------+------|
-| a     | P                                         | C    |
-| d     | D                                         | C    |
-| z     | Call clobbered P (P0, P1, P2)             | X    |
-| D     | EvenD                                     | X    |
-| W     | OddD                                      | X    |
-| e     | Accu                                      | C    |
-| A     | A0                                        | S    |
-| B     | A1                                        | S    |
-| b     | I                                         | C    |
-| v     | B                                         | C    |
-| f     | M                                         | C    |
-| c     | Circular I, B, L                          | X    |
-| C     | JustCC                                    | S    |
-| t     | LoopTop                                   | X    |
-| u     | LoopBottom                                | X    |
-| k     | LoopCount                                 | X    |
-| x     | GR                                        | C    |
-| y     | RET*, ASTAT, SEQSTAT, USP                 | X    |
-| w     | ALL                                       | C    |
-| Z     | The FD-PIC GOT pointer (P3)               | S    |
-| Y     | The FD-PIC function pointer register (P1) | S    |
-| q0-q7 | R0-R7 individually                        |      |
-| qA    | P0                                        |      |
-|-------+-------------------------------------------+------|
-| Code  | Constant                                  |      |
-|-------+-------------------------------------------+------|
-| J     | 1<<N, N<32                                |      |
-| Ks3   | imm3                                      |      |
-| Ku3   | uimm3                                     |      |
-| Ks4   | imm4                                      |      |
-| Ku4   | uimm4                                     |      |
-| Ks5   | imm5                                      |      |
-| Ku5   | uimm5                                     |      |
-| Ks7   | imm7                                      |      |
-| KN7   | -imm7                                     |      |
-| Ksh   | imm16                                     |      |
-| Kuh   | uimm16                                    |      |
-| L     | ~(1<<N)                                   |      |
-| M1    | 0xff                                      |      |
-| M2    | 0xffff                                    |      |
-| P0-P4 | 0-4                                       |      |
-| PA    | Macflag, not M                            |      |
-| PB    | Macflag, only M                           |      |
-| Q     | Symbol                                    |      |
-
-** TODO Support all register classes
-* DAG combiner
-** Create test case for each Illegal SETCC case
-The DAG combiner may someimes produce illegal i16 SETCC instructions.
-
-*** TODO SETCC (ctlz x), 5) == const
-*** TODO SETCC (and load, const) == const
-*** DONE SETCC (zext x) == const
-*** TODO SETCC (sext x) == const
-
-* Instruction selection
-** TODO Better imediate constants
-Like ARM, build constants as small imm + shift.
-
-** TODO Implement cycle counter
-We have CYCLES and CYCLES2 registers, but the readcyclecounter intrinsic wants
-to return i64, and the code generator doesn't know how to legalize that.
-
-** TODO Instruction alternatives
-Some instructions come in different variants for example:
-
-  D = D + D
-  P = P + P
-
-Cross combinations are not allowed:
-
-  P = D + D (bad)
-
-Similarly for the subreg pseudo-instructions:
-
- D16L = EXTRACT_SUBREG D16, bfin_subreg_lo16
- P16L = EXTRACT_SUBREG P16, bfin_subreg_lo16
-
-We want to take advantage of the alternative instructions. This could be done by
-changing the DAG after instruction selection.
-
-
-** Multipatterns for load/store
-We should try to identify multipatterns for load and store instructions. The
-available instruction matrix is a bit irregular.
-
-Loads:
-
-| Addr       | D | P | D 16z | D 16s | D16 | D 8z | D 8s |
-|------------+---+---+-------+-------+-----+------+------|
-| P          | * | * | *     | *     | *   | *    | *    |
-| P++        | * | * | *     | *     |     | *    | *    |
-| P--        | * | * | *     | *     |     | *    | *    |
-| P+uimm5m2  |   |   | *     | *     |     |      |      |
-| P+uimm6m4  | * | * |       |       |     |      |      |
-| P+imm16    |   |   |       |       |     | *    | *    |
-| P+imm17m2  |   |   | *     | *     |     |      |      |
-| P+imm18m4  | * | * |       |       |     |      |      |
-| P++P       | * |   | *     | *     | *   |      |      |
-| FP-uimm7m4 | * | * |       |       |     |      |      |
-| I          | * |   |       |       | *   |      |      |
-| I++        | * |   |       |       | *   |      |      |
-| I--        | * |   |       |       | *   |      |      |
-| I++M       | * |   |       |       |     |      |      |
-
-Stores:
-
-| Addr       | D | P | D16H | D16L | D 8 |
-|------------+---+---+------+------+-----|
-| P          | * | * | *    | *    | *   |
-| P++        | * | * |      | *    | *   |
-| P--        | * | * |      | *    | *   |
-| P+uimm5m2  |   |   |      | *    |     |
-| P+uimm6m4  | * | * |      |      |     |
-| P+imm16    |   |   |      |      | *   |
-| P+imm17m2  |   |   |      | *    |     |
-| P+imm18m4  | * | * |      |      |     |
-| P++P       | * |   | *    | *    |     |
-| FP-uimm7m4 | * | * |      |      |     |
-| I          | * |   | *    | *    |     |
-| I++        | * |   | *    | *    |     |
-| I--        | * |   | *    | *    |     |
-| I++M       | * |   |      |      |     |
-
-* Workarounds and features
-Blackfin CPUs have bugs. Each model comes in a number of silicon revisions with
-different bugs. We learn about the CPU model from the -mcpu switch.
-
-** Interpretation of -mcpu value
-- -mcpu=bf527 refers to the latest known BF527 revision
-- -mcpu=bf527-0.2 refers to silicon rev. 0.2
-- -mcpu=bf527-any refers to all known revisions
-- -mcpu=bf527-none disables all workarounds
-
-The -mcpu setting affects the __SILICON_REVISION__ macro and enabled workarounds:
-
-| -mcpu      | __SILICON_REVISION__ | Workarounds        |
-|------------+----------------------+--------------------|
-| bf527      | Def Latest           | Specific to latest |
-| bf527-1.3  | Def 0x0103           | Specific to 1.3    |
-| bf527-any  | Def 0xffff           | All bf527-x.y      |
-| bf527-none | Undefined            | None               |
-
-These are the known cores and revisions:
-
-| Core        | Silicon            | Processors              |
-|-------------+--------------------+-------------------------|
-| Edinburgh   | 0.3, 0.4, 0.5, 0.6 | BF531 BF532 BF533       |
-| Braemar     | 0.2, 0.3           | BF534 BF536 BF537       |
-| Stirling    | 0.3, 0.4, 0.5      | BF538 BF539             |
-| Moab        | 0.0, 0.1, 0.2      | BF542 BF544 BF548 BF549 |
-| Teton       | 0.3, 0.5           | BF561                   |
-| Kookaburra  | 0.0, 0.1, 0.2      | BF523 BF525 BF527       |
-| Mockingbird | 0.0, 0.1           | BF522 BF524 BF526       |
-| Brodie      | 0.0, 0.1           | BF512 BF514 BF516 BF518 |
-
-
-** Compiler implemented workarounds
-Most workarounds are implemented in header files and source code using the
-__ADSPBF527__ macros. A few workarounds require compiler support.
-
-|  Anomaly | Macro                          | GCC Switch       |
-|----------+--------------------------------+------------------|
-|      Any | __WORKAROUNDS_ENABLED          |                  |
-| 05000074 | WA_05000074                    |                  |
-| 05000244 | __WORKAROUND_SPECULATIVE_SYNCS | -mcsync-anomaly  |
-| 05000245 | __WORKAROUND_SPECULATIVE_LOADS | -mspecld-anomaly |
-| 05000257 | WA_05000257                    |                  |
-| 05000283 | WA_05000283                    |                  |
-| 05000312 | WA_LOAD_LCREGS                 |                  |
-| 05000315 | WA_05000315                    |                  |
-| 05000371 | __WORKAROUND_RETS              |                  |
-| 05000426 | __WORKAROUND_INDIRECT_CALLS    | Not -micplb      |
-
-** GCC feature switches
-| Switch                    | Description                            |
-|---------------------------+----------------------------------------|
-| -msim                     | Use simulator runtime                  |
-| -momit-leaf-frame-pointer | Omit frame pointer for leaf functions  |
-| -mlow64k                  |                                        |
-| -mcsync-anomaly           |                                        |
-| -mspecld-anomaly          |                                        |
-| -mid-shared-library       |                                        |
-| -mleaf-id-shared-library  |                                        |
-| -mshared-library-id=      |                                        |
-| -msep-data                | Enable separate data segment           |
-| -mlong-calls              | Use indirect calls                     |
-| -mfast-fp                 |                                        |
-| -mfdpic                   |                                        |
-| -minline-plt              |                                        |
-| -mstack-check-l1          | Do stack checking in L1 scratch memory |
-| -mmulticore               | Enable multicore support               |
-| -mcorea                   | Build for Core A                       |
-| -mcoreb                   | Build for Core B                       |
-| -msdram                   | Build for SDRAM                        |
-| -micplb                   | Assume ICPLBs are enabled at runtime.  |
diff --git a/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
deleted file mode 100644
index 57f1d3e95fbf..000000000000
--- a/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- BlackfinTargetInfo.cpp - Blackfin Target Implementation -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "Blackfin.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-
-using namespace llvm;
-
-Target llvm::TheBlackfinTarget;
-
-extern "C" void LLVMInitializeBlackfinTargetInfo() {
-  RegisterTarget<Triple::bfin> X(TheBlackfinTarget, "bfin",
-                                 "Analog Devices Blackfin [experimental]");
-}
diff --git a/lib/Target/Blackfin/TargetInfo/CMakeLists.txt b/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 771f092eb062..000000000000
--- a/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMBlackfinInfo
-  BlackfinTargetInfo.cpp
-  )
-
-add_llvm_library_dependencies(LLVMBlackfinInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
-add_dependencies(LLVMBlackfinInfo BlackfinCommonTableGen)
diff --git a/lib/Target/Blackfin/TargetInfo/Makefile b/lib/Target/Blackfin/TargetInfo/Makefile
deleted file mode 100644
index c49cfbe69077..000000000000
--- a/lib/Target/Blackfin/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/Blackfin/TargetInfo/Makefile -------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMBlackfinInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
deleted file mode 100644
index 69d8c46a5024..000000000000
--- a/lib/Target/CBackend/CBackend.cpp
+++ /dev/null
@@ -1,3617 +0,0 @@
-//===-- CBackend.cpp - Library for converting LLVM code to C --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This library converts LLVM code to C code, compilable by GCC and other C
-// compilers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CTargetMachine.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/ConstantsScanner.h"
-#include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Config/config.h"
-#include <algorithm>
-// Some ms header decided to define setjmp as _setjmp, undo this for this file.
-#ifdef _MSC_VER
-#undef setjmp
-#endif
-using namespace llvm;
-
-extern "C" void LLVMInitializeCBackendTarget() {
-  // Register the target.
-  RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
-}
-
-namespace {
-  class CBEMCAsmInfo : public MCAsmInfo {
-  public:
-    CBEMCAsmInfo() {
-      GlobalPrefix = "";
-      PrivateGlobalPrefix = "";
-    }
-  };
-
-  /// CWriter - This class is the main chunk of code that converts an LLVM
-  /// module to a C translation unit.
-  class CWriter : public FunctionPass, public InstVisitor<CWriter> {
-    formatted_raw_ostream &Out;
-    IntrinsicLowering *IL;
-    Mangler *Mang;
-    LoopInfo *LI;
-    const Module *TheModule;
-    const MCAsmInfo* TAsm;
-    const MCRegisterInfo *MRI;
-    const MCObjectFileInfo *MOFI;
-    MCContext *TCtx;
-    const TargetData* TD;
-    
-    std::map<const ConstantFP *, unsigned> FPConstantMap;
-    std::set<Function*> intrinsicPrototypesAlreadyGenerated;
-    std::set<const Argument*> ByValParams;
-    unsigned FPCounter;
-    unsigned OpaqueCounter;
-    DenseMap<const Value*, unsigned> AnonValueNumbers;
-    unsigned NextAnonValueNumber;
-
-    /// UnnamedStructIDs - This contains a unique ID for each struct that is
-    /// either anonymous or has no name.
-    DenseMap<StructType*, unsigned> UnnamedStructIDs;
-    
-  public:
-    static char ID;
-    explicit CWriter(formatted_raw_ostream &o)
-      : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
-        TheModule(0), TAsm(0), MRI(0), MOFI(0), TCtx(0), TD(0),
-        OpaqueCounter(0), NextAnonValueNumber(0) {
-      initializeLoopInfoPass(*PassRegistry::getPassRegistry());
-      FPCounter = 0;
-    }
-
-    virtual const char *getPassName() const { return "C backend"; }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<LoopInfo>();
-      AU.setPreservesAll();
-    }
-
-    virtual bool doInitialization(Module &M);
-
-    bool runOnFunction(Function &F) {
-     // Do not codegen any 'available_externally' functions at all, they have
-     // definitions outside the translation unit.
-     if (F.hasAvailableExternallyLinkage())
-       return false;
-
-      LI = &getAnalysis<LoopInfo>();
-
-      // Get rid of intrinsics we can't handle.
-      lowerIntrinsics(F);
-
-      // Output all floating point constants that cannot be printed accurately.
-      printFloatingPointConstants(F);
-
-      printFunction(F);
-      return false;
-    }
-
-    virtual bool doFinalization(Module &M) {
-      // Free memory...
-      delete IL;
-      delete TD;
-      delete Mang;
-      delete TCtx;
-      delete TAsm;
-      delete MRI;
-      delete MOFI;
-      FPConstantMap.clear();
-      ByValParams.clear();
-      intrinsicPrototypesAlreadyGenerated.clear();
-      UnnamedStructIDs.clear();
-      return false;
-    }
-
-    raw_ostream &printType(raw_ostream &Out, Type *Ty,
-                           bool isSigned = false,
-                           const std::string &VariableName = "",
-                           bool IgnoreName = false,
-                           const AttrListPtr &PAL = AttrListPtr());
-    raw_ostream &printSimpleType(raw_ostream &Out, Type *Ty,
-                                 bool isSigned,
-                                 const std::string &NameSoFar = "");
-
-    void printStructReturnPointerFunctionType(raw_ostream &Out,
-                                              const AttrListPtr &PAL,
-                                              PointerType *Ty);
-
-    std::string getStructName(StructType *ST);
-    
-    /// writeOperandDeref - Print the result of dereferencing the specified
-    /// operand with '*'.  This is equivalent to printing '*' then using
-    /// writeOperand, but avoids excess syntax in some cases.
-    void writeOperandDeref(Value *Operand) {
-      if (isAddressExposed(Operand)) {
-        // Already something with an address exposed.
-        writeOperandInternal(Operand);
-      } else {
-        Out << "*(";
-        writeOperand(Operand);
-        Out << ")";
-      }
-    }
-
-    void writeOperand(Value *Operand, bool Static = false);
-    void writeInstComputationInline(Instruction &I);
-    void writeOperandInternal(Value *Operand, bool Static = false);
-    void writeOperandWithCast(Value* Operand, unsigned Opcode);
-    void writeOperandWithCast(Value* Operand, const ICmpInst &I);
-    bool writeInstructionCast(const Instruction &I);
-
-    void writeMemoryAccess(Value *Operand, Type *OperandType,
-                           bool IsVolatile, unsigned Alignment);
-
-  private :
-    std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
-
-    void lowerIntrinsics(Function &F);
-    /// Prints the definition of the intrinsic function F. Supports the 
-    /// intrinsics which need to be explicitly defined in the CBackend.
-    void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
-
-    void printModuleTypes();
-    void printContainedStructs(Type *Ty, SmallPtrSet<Type *, 16> &);
-    void printFloatingPointConstants(Function &F);
-    void printFloatingPointConstants(const Constant *C);
-    void printFunctionSignature(const Function *F, bool Prototype);
-
-    void printFunction(Function &);
-    void printBasicBlock(BasicBlock *BB);
-    void printLoop(Loop *L);
-
-    void printCast(unsigned opcode, Type *SrcTy, Type *DstTy);
-    void printConstant(Constant *CPV, bool Static);
-    void printConstantWithCast(Constant *CPV, unsigned Opcode);
-    bool printConstExprCast(const ConstantExpr *CE, bool Static);
-    void printConstantArray(ConstantArray *CPA, bool Static);
-    void printConstantVector(ConstantVector *CV, bool Static);
-
-    /// isAddressExposed - Return true if the specified value's name needs to
-    /// have its address taken in order to get a C value of the correct type.
-    /// This happens for global variables, byval parameters, and direct allocas.
-    bool isAddressExposed(const Value *V) const {
-      if (const Argument *A = dyn_cast<Argument>(V))
-        return ByValParams.count(A);
-      return isa<GlobalVariable>(V) || isDirectAlloca(V);
-    }
-
-    // isInlinableInst - Attempt to inline instructions into their uses to build
-    // trees as much as possible.  To do this, we have to consistently decide
-    // what is acceptable to inline, so that variable declarations don't get
-    // printed and an extra copy of the expr is not emitted.
-    //
-    static bool isInlinableInst(const Instruction &I) {
-      // Always inline cmp instructions, even if they are shared by multiple
-      // expressions.  GCC generates horrible code if we don't.
-      if (isa<CmpInst>(I))
-        return true;
-
-      // Must be an expression, must be used exactly once.  If it is dead, we
-      // emit it inline where it would go.
-      if (I.getType() == Type::getVoidTy(I.getContext()) || !I.hasOneUse() ||
-          isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) ||
-          isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) ||
-          isa<InsertValueInst>(I))
-        // Don't inline a load across a store or other bad things!
-        return false;
-
-      // Must not be used in inline asm, extractelement, or shufflevector.
-      if (I.hasOneUse()) {
-        const Instruction &User = cast<Instruction>(*I.use_back());
-        if (isInlineAsm(User) || isa<ExtractElementInst>(User) ||
-            isa<ShuffleVectorInst>(User))
-          return false;
-      }
-
-      // Only inline instruction it if it's use is in the same BB as the inst.
-      return I.getParent() == cast<Instruction>(I.use_back())->getParent();
-    }
-
-    // isDirectAlloca - Define fixed sized allocas in the entry block as direct
-    // variables which are accessed with the & operator.  This causes GCC to
-    // generate significantly better code than to emit alloca calls directly.
-    //
-    static const AllocaInst *isDirectAlloca(const Value *V) {
-      const AllocaInst *AI = dyn_cast<AllocaInst>(V);
-      if (!AI) return 0;
-      if (AI->isArrayAllocation())
-        return 0;   // FIXME: we can also inline fixed size array allocas!
-      if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
-        return 0;
-      return AI;
-    }
-
-    // isInlineAsm - Check if the instruction is a call to an inline asm chunk.
-    static bool isInlineAsm(const Instruction& I) {
-      if (const CallInst *CI = dyn_cast<CallInst>(&I))
-        return isa<InlineAsm>(CI->getCalledValue());
-      return false;
-    }
-
-    // Instruction visitation functions
-    friend class InstVisitor<CWriter>;
-
-    void visitReturnInst(ReturnInst &I);
-    void visitBranchInst(BranchInst &I);
-    void visitSwitchInst(SwitchInst &I);
-    void visitIndirectBrInst(IndirectBrInst &I);
-    void visitInvokeInst(InvokeInst &I) {
-      llvm_unreachable("Lowerinvoke pass didn't work!");
-    }
-    void visitUnwindInst(UnwindInst &I) {
-      llvm_unreachable("Lowerinvoke pass didn't work!");
-    }
-    void visitResumeInst(ResumeInst &I) {
-      llvm_unreachable("DwarfEHPrepare pass didn't work!");
-    }
-    void visitUnreachableInst(UnreachableInst &I);
-
-    void visitPHINode(PHINode &I);
-    void visitBinaryOperator(Instruction &I);
-    void visitICmpInst(ICmpInst &I);
-    void visitFCmpInst(FCmpInst &I);
-
-    void visitCastInst (CastInst &I);
-    void visitSelectInst(SelectInst &I);
-    void visitCallInst (CallInst &I);
-    void visitInlineAsm(CallInst &I);
-    bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
-
-    void visitAllocaInst(AllocaInst &I);
-    void visitLoadInst  (LoadInst   &I);
-    void visitStoreInst (StoreInst  &I);
-    void visitGetElementPtrInst(GetElementPtrInst &I);
-    void visitVAArgInst (VAArgInst &I);
-
-    void visitInsertElementInst(InsertElementInst &I);
-    void visitExtractElementInst(ExtractElementInst &I);
-    void visitShuffleVectorInst(ShuffleVectorInst &SVI);
-
-    void visitInsertValueInst(InsertValueInst &I);
-    void visitExtractValueInst(ExtractValueInst &I);
-
-    void visitInstruction(Instruction &I) {
-#ifndef NDEBUG
-      errs() << "C Writer does not know about " << I;
-#endif
-      llvm_unreachable(0);
-    }
-
-    void outputLValue(Instruction *I) {
-      Out << "  " << GetValueName(I) << " = ";
-    }
-
-    bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To);
-    void printPHICopiesForSuccessor(BasicBlock *CurBlock,
-                                    BasicBlock *Successor, unsigned Indent);
-    void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock,
-                            unsigned Indent);
-    void printGEPExpression(Value *Ptr, gep_type_iterator I,
-                            gep_type_iterator E, bool Static);
-
-    std::string GetValueName(const Value *Operand);
-  };
-}
-
-char CWriter::ID = 0;
-
-
-
-static std::string CBEMangle(const std::string &S) {
-  std::string Result;
-
-  for (unsigned i = 0, e = S.size(); i != e; ++i)
-    if (isalnum(S[i]) || S[i] == '_') {
-      Result += S[i];
-    } else {
-      Result += '_';
-      Result += 'A'+(S[i]&15);
-      Result += 'A'+((S[i]>>4)&15);
-      Result += '_';
-    }
-  return Result;
-}
-
-std::string CWriter::getStructName(StructType *ST) {
-  if (!ST->isLiteral() && !ST->getName().empty())
-    return CBEMangle("l_"+ST->getName().str());
-  
-  return "l_unnamed_" + utostr(UnnamedStructIDs[ST]);
-}
-
-
-/// printStructReturnPointerFunctionType - This is like printType for a struct
-/// return type, except, instead of printing the type as void (*)(Struct*, ...)
-/// print it as "Struct (*)(...)", for struct return functions.
-void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
-                                                   const AttrListPtr &PAL,
-                                                   PointerType *TheTy) {
-  FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
-  std::string tstr;
-  raw_string_ostream FunctionInnards(tstr);
-  FunctionInnards << " (*) (";
-  bool PrintedType = false;
-
-  FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
-  Type *RetTy = cast<PointerType>(*I)->getElementType();
-  unsigned Idx = 1;
-  for (++I, ++Idx; I != E; ++I, ++Idx) {
-    if (PrintedType)
-      FunctionInnards << ", ";
-    Type *ArgTy = *I;
-    if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
-      assert(ArgTy->isPointerTy());
-      ArgTy = cast<PointerType>(ArgTy)->getElementType();
-    }
-    printType(FunctionInnards, ArgTy,
-        /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
-    PrintedType = true;
-  }
-  if (FTy->isVarArg()) {
-    if (!PrintedType)
-      FunctionInnards << " int"; //dummy argument for empty vararg functs
-    FunctionInnards << ", ...";
-  } else if (!PrintedType) {
-    FunctionInnards << "void";
-  }
-  FunctionInnards << ')';
-  printType(Out, RetTy,
-      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
-}
-
-raw_ostream &
-CWriter::printSimpleType(raw_ostream &Out, Type *Ty, bool isSigned,
-                         const std::string &NameSoFar) {
-  assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
-         "Invalid type for printSimpleType");
-  switch (Ty->getTypeID()) {
-  case Type::VoidTyID:   return Out << "void " << NameSoFar;
-  case Type::IntegerTyID: {
-    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
-    if (NumBits == 1)
-      return Out << "bool " << NameSoFar;
-    else if (NumBits <= 8)
-      return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
-    else if (NumBits <= 16)
-      return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
-    else if (NumBits <= 32)
-      return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
-    else if (NumBits <= 64)
-      return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
-    else {
-      assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
-      return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
-    }
-  }
-  case Type::FloatTyID:  return Out << "float "   << NameSoFar;
-  case Type::DoubleTyID: return Out << "double "  << NameSoFar;
-  // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
-  // present matches host 'long double'.
-  case Type::X86_FP80TyID:
-  case Type::PPC_FP128TyID:
-  case Type::FP128TyID:  return Out << "long double " << NameSoFar;
-
-  case Type::X86_MMXTyID:
-    return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned,
-                     " __attribute__((vector_size(64))) " + NameSoFar);
-
-  case Type::VectorTyID: {
-    VectorType *VTy = cast<VectorType>(Ty);
-    return printSimpleType(Out, VTy->getElementType(), isSigned,
-                     " __attribute__((vector_size(" +
-                     utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
-  }
-
-  default:
-#ifndef NDEBUG
-    errs() << "Unknown primitive type: " << *Ty << "\n";
-#endif
-    llvm_unreachable(0);
-  }
-}
-
-// Pass the Type* and the variable name and this prints out the variable
-// declaration.
-//
-raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty,
-                                bool isSigned, const std::string &NameSoFar,
-                                bool IgnoreName, const AttrListPtr &PAL) {
-  if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
-    printSimpleType(Out, Ty, isSigned, NameSoFar);
-    return Out;
-  }
-
-  switch (Ty->getTypeID()) {
-  case Type::FunctionTyID: {
-    FunctionType *FTy = cast<FunctionType>(Ty);
-    std::string tstr;
-    raw_string_ostream FunctionInnards(tstr);
-    FunctionInnards << " (" << NameSoFar << ") (";
-    unsigned Idx = 1;
-    for (FunctionType::param_iterator I = FTy->param_begin(),
-           E = FTy->param_end(); I != E; ++I) {
-      Type *ArgTy = *I;
-      if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
-        assert(ArgTy->isPointerTy());
-        ArgTy = cast<PointerType>(ArgTy)->getElementType();
-      }
-      if (I != FTy->param_begin())
-        FunctionInnards << ", ";
-      printType(FunctionInnards, ArgTy,
-        /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
-      ++Idx;
-    }
-    if (FTy->isVarArg()) {
-      if (!FTy->getNumParams())
-        FunctionInnards << " int"; //dummy argument for empty vaarg functs
-      FunctionInnards << ", ...";
-    } else if (!FTy->getNumParams()) {
-      FunctionInnards << "void";
-    }
-    FunctionInnards << ')';
-    printType(Out, FTy->getReturnType(),
-      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
-    return Out;
-  }
-  case Type::StructTyID: {
-    StructType *STy = cast<StructType>(Ty);
-    
-    // Check to see if the type is named.
-    if (!IgnoreName)
-      return Out << getStructName(STy) << ' ' << NameSoFar;
-    
-    Out << NameSoFar + " {\n";
-    unsigned Idx = 0;
-    for (StructType::element_iterator I = STy->element_begin(),
-           E = STy->element_end(); I != E; ++I) {
-      Out << "  ";
-      printType(Out, *I, false, "field" + utostr(Idx++));
-      Out << ";\n";
-    }
-    Out << '}';
-    if (STy->isPacked())
-      Out << " __attribute__ ((packed))";
-    return Out;
-  }
-
-  case Type::PointerTyID: {
-    PointerType *PTy = cast<PointerType>(Ty);
-    std::string ptrName = "*" + NameSoFar;
-
-    if (PTy->getElementType()->isArrayTy() ||
-        PTy->getElementType()->isVectorTy())
-      ptrName = "(" + ptrName + ")";
-
-    if (!PAL.isEmpty())
-      // Must be a function ptr cast!
-      return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
-    return printType(Out, PTy->getElementType(), false, ptrName);
-  }
-
-  case Type::ArrayTyID: {
-    ArrayType *ATy = cast<ArrayType>(Ty);
-    unsigned NumElements = ATy->getNumElements();
-    if (NumElements == 0) NumElements = 1;
-    // Arrays are wrapped in structs to allow them to have normal
-    // value semantics (avoiding the array "decay").
-    Out << NameSoFar << " { ";
-    printType(Out, ATy->getElementType(), false,
-              "array[" + utostr(NumElements) + "]");
-    return Out << "; }";
-  }
-
-  default:
-    llvm_unreachable("Unhandled case in getTypeProps!");
-  }
-
-  return Out;
-}
-
-void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
-
-  // As a special case, print the array as a string if it is an array of
-  // ubytes or an array of sbytes with positive values.
-  //
-  Type *ETy = CPA->getType()->getElementType();
-  bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) ||
-                   ETy == Type::getInt8Ty(CPA->getContext()));
-
-  // Make sure the last character is a null char, as automatically added by C
-  if (isString && (CPA->getNumOperands() == 0 ||
-                   !cast<Constant>(*(CPA->op_end()-1))->isNullValue()))
-    isString = false;
-
-  if (isString) {
-    Out << '\"';
-    // Keep track of whether the last number was a hexadecimal escape.
-    bool LastWasHex = false;
-
-    // Do not include the last character, which we know is null
-    for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) {
-      unsigned char C = cast<ConstantInt>(CPA->getOperand(i))->getZExtValue();
-
-      // Print it out literally if it is a printable character.  The only thing
-      // to be careful about is when the last letter output was a hex escape
-      // code, in which case we have to be careful not to print out hex digits
-      // explicitly (the C compiler thinks it is a continuation of the previous
-      // character, sheesh...)
-      //
-      if (isprint(C) && (!LastWasHex || !isxdigit(C))) {
-        LastWasHex = false;
-        if (C == '"' || C == '\\')
-          Out << "\\" << (char)C;
-        else
-          Out << (char)C;
-      } else {
-        LastWasHex = false;
-        switch (C) {
-        case '\n': Out << "\\n"; break;
-        case '\t': Out << "\\t"; break;
-        case '\r': Out << "\\r"; break;
-        case '\v': Out << "\\v"; break;
-        case '\a': Out << "\\a"; break;
-        case '\"': Out << "\\\""; break;
-        case '\'': Out << "\\\'"; break;
-        default:
-          Out << "\\x";
-          Out << (char)(( C/16  < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
-          Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
-          LastWasHex = true;
-          break;
-        }
-      }
-    }
-    Out << '\"';
-  } else {
-    Out << '{';
-    if (CPA->getNumOperands()) {
-      Out << ' ';
-      printConstant(cast<Constant>(CPA->getOperand(0)), Static);
-      for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
-        Out << ", ";
-        printConstant(cast<Constant>(CPA->getOperand(i)), Static);
-      }
-    }
-    Out << " }";
-  }
-}
-
-void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
-  Out << '{';
-  if (CP->getNumOperands()) {
-    Out << ' ';
-    printConstant(cast<Constant>(CP->getOperand(0)), Static);
-    for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
-      Out << ", ";
-      printConstant(cast<Constant>(CP->getOperand(i)), Static);
-    }
-  }
-  Out << " }";
-}
-
-// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
-// textually as a double (rather than as a reference to a stack-allocated
-// variable). We decide this by converting CFP to a string and back into a
-// double, and then checking whether the conversion results in a bit-equal
-// double to the original value of CFP. This depends on us and the target C
-// compiler agreeing on the conversion process (which is pretty likely since we
-// only deal in IEEE FP).
-//
-static bool isFPCSafeToPrint(const ConstantFP *CFP) {
-  bool ignored;
-  // Do long doubles in hex for now.
-  if (CFP->getType() != Type::getFloatTy(CFP->getContext()) &&
-      CFP->getType() != Type::getDoubleTy(CFP->getContext()))
-    return false;
-  APFloat APF = APFloat(CFP->getValueAPF());  // copy
-  if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
-    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
-#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
-  char Buffer[100];
-  sprintf(Buffer, "%a", APF.convertToDouble());
-  if (!strncmp(Buffer, "0x", 2) ||
-      !strncmp(Buffer, "-0x", 3) ||
-      !strncmp(Buffer, "+0x", 3))
-    return APF.bitwiseIsEqual(APFloat(atof(Buffer)));
-  return false;
-#else
-  std::string StrVal = ftostr(APF);
-
-  while (StrVal[0] == ' ')
-    StrVal.erase(StrVal.begin());
-
-  // Check to make sure that the stringized number is not some string like "Inf"
-  // or NaN.  Check that the string matches the "[-+]?[0-9]" regex.
-  if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
-      ((StrVal[0] == '-' || StrVal[0] == '+') &&
-       (StrVal[1] >= '0' && StrVal[1] <= '9')))
-    // Reparse stringized version!
-    return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str())));
-  return false;
-#endif
-}
-
-/// Print out the casting for a cast operation. This does the double casting
-/// necessary for conversion to the destination type, if necessary.
-/// @brief Print a cast
-void CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) {
-  // Print the destination type cast
-  switch (opc) {
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::IntToPtr:
-    case Instruction::Trunc:
-    case Instruction::BitCast:
-    case Instruction::FPExt:
-    case Instruction::FPTrunc: // For these the DstTy sign doesn't matter
-      Out << '(';
-      printType(Out, DstTy);
-      Out << ')';
-      break;
-    case Instruction::ZExt:
-    case Instruction::PtrToInt:
-    case Instruction::FPToUI: // For these, make sure we get an unsigned dest
-      Out << '(';
-      printSimpleType(Out, DstTy, false);
-      Out << ')';
-      break;
-    case Instruction::SExt:
-    case Instruction::FPToSI: // For these, make sure we get a signed dest
-      Out << '(';
-      printSimpleType(Out, DstTy, true);
-      Out << ')';
-      break;
-    default:
-      llvm_unreachable("Invalid cast opcode");
-  }
-
-  // Print the source type cast
-  switch (opc) {
-    case Instruction::UIToFP:
-    case Instruction::ZExt:
-      Out << '(';
-      printSimpleType(Out, SrcTy, false);
-      Out << ')';
-      break;
-    case Instruction::SIToFP:
-    case Instruction::SExt:
-      Out << '(';
-      printSimpleType(Out, SrcTy, true);
-      Out << ')';
-      break;
-    case Instruction::IntToPtr:
-    case Instruction::PtrToInt:
-      // Avoid "cast to pointer from integer of different size" warnings
-      Out << "(unsigned long)";
-      break;
-    case Instruction::Trunc:
-    case Instruction::BitCast:
-    case Instruction::FPExt:
-    case Instruction::FPTrunc:
-    case Instruction::FPToSI:
-    case Instruction::FPToUI:
-      break; // These don't need a source cast.
-    default:
-      llvm_unreachable("Invalid cast opcode");
-      break;
-  }
-}
-
-// printConstant - The LLVM Constant to C Constant converter.
-void CWriter::printConstant(Constant *CPV, bool Static) {
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
-    switch (CE->getOpcode()) {
-    case Instruction::Trunc:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::FPTrunc:
-    case Instruction::FPExt:
-    case Instruction::UIToFP:
-    case Instruction::SIToFP:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-    case Instruction::PtrToInt:
-    case Instruction::IntToPtr:
-    case Instruction::BitCast:
-      Out << "(";
-      printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
-      if (CE->getOpcode() == Instruction::SExt &&
-          CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) {
-        // Make sure we really sext from bool here by subtracting from 0
-        Out << "0-";
-      }
-      printConstant(CE->getOperand(0), Static);
-      if (CE->getType() == Type::getInt1Ty(CPV->getContext()) &&
-          (CE->getOpcode() == Instruction::Trunc ||
-           CE->getOpcode() == Instruction::FPToUI ||
-           CE->getOpcode() == Instruction::FPToSI ||
-           CE->getOpcode() == Instruction::PtrToInt)) {
-        // Make sure we really truncate to bool here by anding with 1
-        Out << "&1u";
-      }
-      Out << ')';
-      return;
-
-    case Instruction::GetElementPtr:
-      Out << "(";
-      printGEPExpression(CE->getOperand(0), gep_type_begin(CPV),
-                         gep_type_end(CPV), Static);
-      Out << ")";
-      return;
-    case Instruction::Select:
-      Out << '(';
-      printConstant(CE->getOperand(0), Static);
-      Out << '?';
-      printConstant(CE->getOperand(1), Static);
-      Out << ':';
-      printConstant(CE->getOperand(2), Static);
-      Out << ')';
-      return;
-    case Instruction::Add:
-    case Instruction::FAdd:
-    case Instruction::Sub:
-    case Instruction::FSub:
-    case Instruction::Mul:
-    case Instruction::FMul:
-    case Instruction::SDiv:
-    case Instruction::UDiv:
-    case Instruction::FDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
-    case Instruction::FRem:
-    case Instruction::And:
-    case Instruction::Or:
-    case Instruction::Xor:
-    case Instruction::ICmp:
-    case Instruction::Shl:
-    case Instruction::LShr:
-    case Instruction::AShr:
-    {
-      Out << '(';
-      bool NeedsClosingParens = printConstExprCast(CE, Static);
-      printConstantWithCast(CE->getOperand(0), CE->getOpcode());
-      switch (CE->getOpcode()) {
-      case Instruction::Add:
-      case Instruction::FAdd: Out << " + "; break;
-      case Instruction::Sub:
-      case Instruction::FSub: Out << " - "; break;
-      case Instruction::Mul:
-      case Instruction::FMul: Out << " * "; break;
-      case Instruction::URem:
-      case Instruction::SRem:
-      case Instruction::FRem: Out << " % "; break;
-      case Instruction::UDiv:
-      case Instruction::SDiv:
-      case Instruction::FDiv: Out << " / "; break;
-      case Instruction::And: Out << " & "; break;
-      case Instruction::Or:  Out << " | "; break;
-      case Instruction::Xor: Out << " ^ "; break;
-      case Instruction::Shl: Out << " << "; break;
-      case Instruction::LShr:
-      case Instruction::AShr: Out << " >> "; break;
-      case Instruction::ICmp:
-        switch (CE->getPredicate()) {
-          case ICmpInst::ICMP_EQ: Out << " == "; break;
-          case ICmpInst::ICMP_NE: Out << " != "; break;
-          case ICmpInst::ICMP_SLT:
-          case ICmpInst::ICMP_ULT: Out << " < "; break;
-          case ICmpInst::ICMP_SLE:
-          case ICmpInst::ICMP_ULE: Out << " <= "; break;
-          case ICmpInst::ICMP_SGT:
-          case ICmpInst::ICMP_UGT: Out << " > "; break;
-          case ICmpInst::ICMP_SGE:
-          case ICmpInst::ICMP_UGE: Out << " >= "; break;
-          default: llvm_unreachable("Illegal ICmp predicate");
-        }
-        break;
-      default: llvm_unreachable("Illegal opcode here!");
-      }
-      printConstantWithCast(CE->getOperand(1), CE->getOpcode());
-      if (NeedsClosingParens)
-        Out << "))";
-      Out << ')';
-      return;
-    }
-    case Instruction::FCmp: {
-      Out << '(';
-      bool NeedsClosingParens = printConstExprCast(CE, Static);
-      if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
-        Out << "0";
-      else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
-        Out << "1";
-      else {
-        const char* op = 0;
-        switch (CE->getPredicate()) {
-        default: llvm_unreachable("Illegal FCmp predicate");
-        case FCmpInst::FCMP_ORD: op = "ord"; break;
-        case FCmpInst::FCMP_UNO: op = "uno"; break;
-        case FCmpInst::FCMP_UEQ: op = "ueq"; break;
-        case FCmpInst::FCMP_UNE: op = "une"; break;
-        case FCmpInst::FCMP_ULT: op = "ult"; break;
-        case FCmpInst::FCMP_ULE: op = "ule"; break;
-        case FCmpInst::FCMP_UGT: op = "ugt"; break;
-        case FCmpInst::FCMP_UGE: op = "uge"; break;
-        case FCmpInst::FCMP_OEQ: op = "oeq"; break;
-        case FCmpInst::FCMP_ONE: op = "one"; break;
-        case FCmpInst::FCMP_OLT: op = "olt"; break;
-        case FCmpInst::FCMP_OLE: op = "ole"; break;
-        case FCmpInst::FCMP_OGT: op = "ogt"; break;
-        case FCmpInst::FCMP_OGE: op = "oge"; break;
-        }
-        Out << "llvm_fcmp_" << op << "(";
-        printConstantWithCast(CE->getOperand(0), CE->getOpcode());
-        Out << ", ";
-        printConstantWithCast(CE->getOperand(1), CE->getOpcode());
-        Out << ")";
-      }
-      if (NeedsClosingParens)
-        Out << "))";
-      Out << ')';
-      return;
-    }
-    default:
-#ifndef NDEBUG
-      errs() << "CWriter Error: Unhandled constant expression: "
-           << *CE << "\n";
-#endif
-      llvm_unreachable(0);
-    }
-  } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
-    Out << "((";
-    printType(Out, CPV->getType()); // sign doesn't matter
-    Out << ")/*UNDEF*/";
-    if (!CPV->getType()->isVectorTy()) {
-      Out << "0)";
-    } else {
-      Out << "{})";
-    }
-    return;
-  }
-
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
-    Type* Ty = CI->getType();
-    if (Ty == Type::getInt1Ty(CPV->getContext()))
-      Out << (CI->getZExtValue() ? '1' : '0');
-    else if (Ty == Type::getInt32Ty(CPV->getContext()))
-      Out << CI->getZExtValue() << 'u';
-    else if (Ty->getPrimitiveSizeInBits() > 32)
-      Out << CI->getZExtValue() << "ull";
-    else {
-      Out << "((";
-      printSimpleType(Out, Ty, false) << ')';
-      if (CI->isMinValue(true))
-        Out << CI->getZExtValue() << 'u';
-      else
-        Out << CI->getSExtValue();
-      Out << ')';
-    }
-    return;
-  }
-
-  switch (CPV->getType()->getTypeID()) {
-  case Type::FloatTyID:
-  case Type::DoubleTyID:
-  case Type::X86_FP80TyID:
-  case Type::PPC_FP128TyID:
-  case Type::FP128TyID: {
-    ConstantFP *FPC = cast<ConstantFP>(CPV);
-    std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
-    if (I != FPConstantMap.end()) {
-      // Because of FP precision problems we must load from a stack allocated
-      // value that holds the value in hex.
-      Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
-                       "float" :
-                       FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
-                       "double" :
-                       "long double")
-          << "*)&FPConstant" << I->second << ')';
-    } else {
-      double V;
-      if (FPC->getType() == Type::getFloatTy(CPV->getContext()))
-        V = FPC->getValueAPF().convertToFloat();
-      else if (FPC->getType() == Type::getDoubleTy(CPV->getContext()))
-        V = FPC->getValueAPF().convertToDouble();
-      else {
-        // Long double.  Convert the number to double, discarding precision.
-        // This is not awesome, but it at least makes the CBE output somewhat
-        // useful.
-        APFloat Tmp = FPC->getValueAPF();
-        bool LosesInfo;
-        Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
-        V = Tmp.convertToDouble();
-      }
-
-      if (IsNAN(V)) {
-        // The value is NaN
-
-        // FIXME the actual NaN bits should be emitted.
-        // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
-        // it's 0x7ff4.
-        const unsigned long QuietNaN = 0x7ff8UL;
-        //const unsigned long SignalNaN = 0x7ff4UL;
-
-        // We need to grab the first part of the FP #
-        char Buffer[100];
-
-        uint64_t ll = DoubleToBits(V);
-        sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
-
-        std::string Num(&Buffer[0], &Buffer[6]);
-        unsigned long Val = strtoul(Num.c_str(), 0, 16);
-
-        if (FPC->getType() == Type::getFloatTy(FPC->getContext()))
-          Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
-              << Buffer << "\") /*nan*/ ";
-        else
-          Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
-              << Buffer << "\") /*nan*/ ";
-      } else if (IsInf(V)) {
-        // The value is Inf
-        if (V < 0) Out << '-';
-        Out << "LLVM_INF" <<
-            (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "")
-            << " /*inf*/ ";
-      } else {
-        std::string Num;
-#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
-        // Print out the constant as a floating point number.
-        char Buffer[100];
-        sprintf(Buffer, "%a", V);
-        Num = Buffer;
-#else
-        Num = ftostr(FPC->getValueAPF());
-#endif
-       Out << Num;
-      }
-    }
-    break;
-  }
-
-  case Type::ArrayTyID:
-    // Use C99 compound expression literal initializer syntax.
-    if (!Static) {
-      Out << "(";
-      printType(Out, CPV->getType());
-      Out << ")";
-    }
-    Out << "{ "; // Arrays are wrapped in struct types.
-    if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
-      printConstantArray(CA, Static);
-    } else {
-      assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
-      ArrayType *AT = cast<ArrayType>(CPV->getType());
-      Out << '{';
-      if (AT->getNumElements()) {
-        Out << ' ';
-        Constant *CZ = Constant::getNullValue(AT->getElementType());
-        printConstant(CZ, Static);
-        for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
-          Out << ", ";
-          printConstant(CZ, Static);
-        }
-      }
-      Out << " }";
-    }
-    Out << " }"; // Arrays are wrapped in struct types.
-    break;
-
-  case Type::VectorTyID:
-    // Use C99 compound expression literal initializer syntax.
-    if (!Static) {
-      Out << "(";
-      printType(Out, CPV->getType());
-      Out << ")";
-    }
-    if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
-      printConstantVector(CV, Static);
-    } else {
-      assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
-      VectorType *VT = cast<VectorType>(CPV->getType());
-      Out << "{ ";
-      Constant *CZ = Constant::getNullValue(VT->getElementType());
-      printConstant(CZ, Static);
-      for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
-        Out << ", ";
-        printConstant(CZ, Static);
-      }
-      Out << " }";
-    }
-    break;
-
-  case Type::StructTyID:
-    // Use C99 compound expression literal initializer syntax.
-    if (!Static) {
-      Out << "(";
-      printType(Out, CPV->getType());
-      Out << ")";
-    }
-    if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
-      StructType *ST = cast<StructType>(CPV->getType());
-      Out << '{';
-      if (ST->getNumElements()) {
-        Out << ' ';
-        printConstant(Constant::getNullValue(ST->getElementType(0)), Static);
-        for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) {
-          Out << ", ";
-          printConstant(Constant::getNullValue(ST->getElementType(i)), Static);
-        }
-      }
-      Out << " }";
-    } else {
-      Out << '{';
-      if (CPV->getNumOperands()) {
-        Out << ' ';
-        printConstant(cast<Constant>(CPV->getOperand(0)), Static);
-        for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
-          Out << ", ";
-          printConstant(cast<Constant>(CPV->getOperand(i)), Static);
-        }
-      }
-      Out << " }";
-    }
-    break;
-
-  case Type::PointerTyID:
-    if (isa<ConstantPointerNull>(CPV)) {
-      Out << "((";
-      printType(Out, CPV->getType()); // sign doesn't matter
-      Out << ")/*NULL*/0)";
-      break;
-    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
-      writeOperand(GV, Static);
-      break;
-    }
-    // FALL THROUGH
-  default:
-#ifndef NDEBUG
-    errs() << "Unknown constant type: " << *CPV << "\n";
-#endif
-    llvm_unreachable(0);
-  }
-}
-
-// Some constant expressions need to be casted back to the original types
-// because their operands were casted to the expected type. This function takes
-// care of detecting that case and printing the cast for the ConstantExpr.
-bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
-  bool NeedsExplicitCast = false;
-  Type *Ty = CE->getOperand(0)->getType();
-  bool TypeIsSigned = false;
-  switch (CE->getOpcode()) {
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-    // We need to cast integer arithmetic so that it is always performed
-    // as unsigned, to avoid undefined behavior on overflow.
-  case Instruction::LShr:
-  case Instruction::URem:
-  case Instruction::UDiv: NeedsExplicitCast = true; break;
-  case Instruction::AShr:
-  case Instruction::SRem:
-  case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
-  case Instruction::SExt:
-    Ty = CE->getType();
-    NeedsExplicitCast = true;
-    TypeIsSigned = true;
-    break;
-  case Instruction::ZExt:
-  case Instruction::Trunc:
-  case Instruction::FPTrunc:
-  case Instruction::FPExt:
-  case Instruction::UIToFP:
-  case Instruction::SIToFP:
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-  case Instruction::PtrToInt:
-  case Instruction::IntToPtr:
-  case Instruction::BitCast:
-    Ty = CE->getType();
-    NeedsExplicitCast = true;
-    break;
-  default: break;
-  }
-  if (NeedsExplicitCast) {
-    Out << "((";
-    if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext()))
-      printSimpleType(Out, Ty, TypeIsSigned);
-    else
-      printType(Out, Ty); // not integer, sign doesn't matter
-    Out << ")(";
-  }
-  return NeedsExplicitCast;
-}
-
-//  Print a constant assuming that it is the operand for a given Opcode. The
-//  opcodes that care about sign need to cast their operands to the expected
-//  type before the operation proceeds. This function does the casting.
-void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
-
-  // Extract the operand's type, we'll need it.
-  Type* OpTy = CPV->getType();
-
-  // Indicate whether to do the cast or not.
-  bool shouldCast = false;
-  bool typeIsSigned = false;
-
-  // Based on the Opcode for which this Constant is being written, determine
-  // the new type to which the operand should be casted by setting the value
-  // of OpTy. If we change OpTy, also set shouldCast to true so it gets
-  // casted below.
-  switch (Opcode) {
-    default:
-      // for most instructions, it doesn't matter
-      break;
-    case Instruction::Add:
-    case Instruction::Sub:
-    case Instruction::Mul:
-      // We need to cast integer arithmetic so that it is always performed
-      // as unsigned, to avoid undefined behavior on overflow.
-    case Instruction::LShr:
-    case Instruction::UDiv:
-    case Instruction::URem:
-      shouldCast = true;
-      break;
-    case Instruction::AShr:
-    case Instruction::SDiv:
-    case Instruction::SRem:
-      shouldCast = true;
-      typeIsSigned = true;
-      break;
-  }
-
-  // Write out the casted constant if we should, otherwise just write the
-  // operand.
-  if (shouldCast) {
-    Out << "((";
-    printSimpleType(Out, OpTy, typeIsSigned);
-    Out << ")";
-    printConstant(CPV, false);
-    Out << ")";
-  } else
-    printConstant(CPV, false);
-}
-
-std::string CWriter::GetValueName(const Value *Operand) {
-
-  // Resolve potential alias.
-  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
-    if (const Value *V = GA->resolveAliasedGlobal(false))
-      Operand = V;
-  }
-
-  // Mangle globals with the standard mangler interface for LLC compatibility.
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
-    SmallString<128> Str;
-    Mang->getNameWithPrefix(Str, GV, false);
-    return CBEMangle(Str.str().str());
-  }
-
-  std::string Name = Operand->getName();
-
-  if (Name.empty()) { // Assign unique names to local temporaries.
-    unsigned &No = AnonValueNumbers[Operand];
-    if (No == 0)
-      No = ++NextAnonValueNumber;
-    Name = "tmp__" + utostr(No);
-  }
-
-  std::string VarName;
-  VarName.reserve(Name.capacity());
-
-  for (std::string::iterator I = Name.begin(), E = Name.end();
-       I != E; ++I) {
-    char ch = *I;
-
-    if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
-          (ch >= '0' && ch <= '9') || ch == '_')) {
-      char buffer[5];
-      sprintf(buffer, "_%x_", ch);
-      VarName += buffer;
-    } else
-      VarName += ch;
-  }
-
-  return "llvm_cbe_" + VarName;
-}
-
-/// writeInstComputationInline - Emit the computation for the specified
-/// instruction inline, with no destination provided.
-void CWriter::writeInstComputationInline(Instruction &I) {
-  // We can't currently support integer types other than 1, 8, 16, 32, 64.
-  // Validate this.
-  Type *Ty = I.getType();
-  if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
-        Ty!=Type::getInt8Ty(I.getContext()) &&
-        Ty!=Type::getInt16Ty(I.getContext()) &&
-        Ty!=Type::getInt32Ty(I.getContext()) &&
-        Ty!=Type::getInt64Ty(I.getContext()))) {
-      report_fatal_error("The C backend does not currently support integer "
-                        "types of widths other than 1, 8, 16, 32, 64.\n"
-                        "This is being tracked as PR 4158.");
-  }
-
-  // If this is a non-trivial bool computation, make sure to truncate down to
-  // a 1 bit value.  This is important because we want "add i1 x, y" to return
-  // "0" when x and y are true, not "2" for example.
-  bool NeedBoolTrunc = false;
-  if (I.getType() == Type::getInt1Ty(I.getContext()) &&
-      !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
-    NeedBoolTrunc = true;
-
-  if (NeedBoolTrunc)
-    Out << "((";
-
-  visit(I);
-
-  if (NeedBoolTrunc)
-    Out << ")&1)";
-}
-
-
-void CWriter::writeOperandInternal(Value *Operand, bool Static) {
-  if (Instruction *I = dyn_cast<Instruction>(Operand))
-    // Should we inline this instruction to build a tree?
-    if (isInlinableInst(*I) && !isDirectAlloca(I)) {
-      Out << '(';
-      writeInstComputationInline(*I);
-      Out << ')';
-      return;
-    }
-
-  Constant* CPV = dyn_cast<Constant>(Operand);
-
-  if (CPV && !isa<GlobalValue>(CPV))
-    printConstant(CPV, Static);
-  else
-    Out << GetValueName(Operand);
-}
-
-void CWriter::writeOperand(Value *Operand, bool Static) {
-  bool isAddressImplicit = isAddressExposed(Operand);
-  if (isAddressImplicit)
-    Out << "(&";  // Global variables are referenced as their addresses by llvm
-
-  writeOperandInternal(Operand, Static);
-
-  if (isAddressImplicit)
-    Out << ')';
-}
-
-// Some instructions need to have their result value casted back to the
-// original types because their operands were casted to the expected type.
-// This function takes care of detecting that case and printing the cast
-// for the Instruction.
-bool CWriter::writeInstructionCast(const Instruction &I) {
-  Type *Ty = I.getOperand(0)->getType();
-  switch (I.getOpcode()) {
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::Mul:
-    // We need to cast integer arithmetic so that it is always performed
-    // as unsigned, to avoid undefined behavior on overflow.
-  case Instruction::LShr:
-  case Instruction::URem:
-  case Instruction::UDiv:
-    Out << "((";
-    printSimpleType(Out, Ty, false);
-    Out << ")(";
-    return true;
-  case Instruction::AShr:
-  case Instruction::SRem:
-  case Instruction::SDiv:
-    Out << "((";
-    printSimpleType(Out, Ty, true);
-    Out << ")(";
-    return true;
-  default: break;
-  }
-  return false;
-}
-
-// Write the operand with a cast to another type based on the Opcode being used.
-// This will be used in cases where an instruction has specific type
-// requirements (usually signedness) for its operands.
-void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
-
-  // Extract the operand's type, we'll need it.
-  Type* OpTy = Operand->getType();
-
-  // Indicate whether to do the cast or not.
-  bool shouldCast = false;
-
-  // Indicate whether the cast should be to a signed type or not.
-  bool castIsSigned = false;
-
-  // Based on the Opcode for which this Operand is being written, determine
-  // the new type to which the operand should be casted by setting the value
-  // of OpTy. If we change OpTy, also set shouldCast to true.
-  switch (Opcode) {
-    default:
-      // for most instructions, it doesn't matter
-      break;
-    case Instruction::Add:
-    case Instruction::Sub:
-    case Instruction::Mul:
-      // We need to cast integer arithmetic so that it is always performed
-      // as unsigned, to avoid undefined behavior on overflow.
-    case Instruction::LShr:
-    case Instruction::UDiv:
-    case Instruction::URem: // Cast to unsigned first
-      shouldCast = true;
-      castIsSigned = false;
-      break;
-    case Instruction::GetElementPtr:
-    case Instruction::AShr:
-    case Instruction::SDiv:
-    case Instruction::SRem: // Cast to signed first
-      shouldCast = true;
-      castIsSigned = true;
-      break;
-  }
-
-  // Write out the casted operand if we should, otherwise just write the
-  // operand.
-  if (shouldCast) {
-    Out << "((";
-    printSimpleType(Out, OpTy, castIsSigned);
-    Out << ")";
-    writeOperand(Operand);
-    Out << ")";
-  } else
-    writeOperand(Operand);
-}
-
-// Write the operand with a cast to another type based on the icmp predicate
-// being used.
-void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
-  // This has to do a cast to ensure the operand has the right signedness.
-  // Also, if the operand is a pointer, we make sure to cast to an integer when
-  // doing the comparison both for signedness and so that the C compiler doesn't
-  // optimize things like "p < NULL" to false (p may contain an integer value
-  // f.e.).
-  bool shouldCast = Cmp.isRelational();
-
-  // Write out the casted operand if we should, otherwise just write the
-  // operand.
-  if (!shouldCast) {
-    writeOperand(Operand);
-    return;
-  }
-
-  // Should this be a signed comparison?  If so, convert to signed.
-  bool castIsSigned = Cmp.isSigned();
-
-  // If the operand was a pointer, convert to a large integer type.
-  Type* OpTy = Operand->getType();
-  if (OpTy->isPointerTy())
-    OpTy = TD->getIntPtrType(Operand->getContext());
-
-  Out << "((";
-  printSimpleType(Out, OpTy, castIsSigned);
-  Out << ")";
-  writeOperand(Operand);
-  Out << ")";
-}
-
-// generateCompilerSpecificCode - This is where we add conditional compilation
-// directives to cater to specific compilers as need be.
-//
-static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
-                                         const TargetData *TD) {
-  // Alloca is hard to get, and we don't want to include stdlib.h here.
-  Out << "/* get a declaration for alloca */\n"
-      << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
-      << "#define  alloca(x) __builtin_alloca((x))\n"
-      << "#define _alloca(x) __builtin_alloca((x))\n"
-      << "#elif defined(__APPLE__)\n"
-      << "extern void *__builtin_alloca(unsigned long);\n"
-      << "#define alloca(x) __builtin_alloca(x)\n"
-      << "#define longjmp _longjmp\n"
-      << "#define setjmp _setjmp\n"
-      << "#elif defined(__sun__)\n"
-      << "#if defined(__sparcv9)\n"
-      << "extern void *__builtin_alloca(unsigned long);\n"
-      << "#else\n"
-      << "extern void *__builtin_alloca(unsigned int);\n"
-      << "#endif\n"
-      << "#define alloca(x) __builtin_alloca(x)\n"
-      << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n"
-      << "#define alloca(x) __builtin_alloca(x)\n"
-      << "#elif defined(_MSC_VER)\n"
-      << "#define inline _inline\n"
-      << "#define alloca(x) _alloca(x)\n"
-      << "#else\n"
-      << "#include <alloca.h>\n"
-      << "#endif\n\n";
-
-  // We output GCC specific attributes to preserve 'linkonce'ness on globals.
-  // If we aren't being compiled with GCC, just drop these attributes.
-  Out << "#ifndef __GNUC__  /* Can only support \"linkonce\" vars with GCC */\n"
-      << "#define __attribute__(X)\n"
-      << "#endif\n\n";
-
-  // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
-  Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
-      << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
-      << "#elif defined(__GNUC__)\n"
-      << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
-      << "#else\n"
-      << "#define __EXTERNAL_WEAK__\n"
-      << "#endif\n\n";
-
-  // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
-  Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
-      << "#define __ATTRIBUTE_WEAK__\n"
-      << "#elif defined(__GNUC__)\n"
-      << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
-      << "#else\n"
-      << "#define __ATTRIBUTE_WEAK__\n"
-      << "#endif\n\n";
-
-  // Add hidden visibility support. FIXME: APPLE_CC?
-  Out << "#if defined(__GNUC__)\n"
-      << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
-      << "#endif\n\n";
-
-  // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
-  // From the GCC documentation:
-  //
-  //   double __builtin_nan (const char *str)
-  //
-  // This is an implementation of the ISO C99 function nan.
-  //
-  // Since ISO C99 defines this function in terms of strtod, which we do
-  // not implement, a description of the parsing is in order. The string is
-  // parsed as by strtol; that is, the base is recognized by leading 0 or
-  // 0x prefixes. The number parsed is placed in the significand such that
-  // the least significant bit of the number is at the least significant
-  // bit of the significand. The number is truncated to fit the significand
-  // field provided. The significand is forced to be a quiet NaN.
-  //
-  // This function, if given a string literal, is evaluated early enough
-  // that it is considered a compile-time constant.
-  //
-  //   float __builtin_nanf (const char *str)
-  //
-  // Similar to __builtin_nan, except the return type is float.
-  //
-  //   double __builtin_inf (void)
-  //
-  // Similar to __builtin_huge_val, except a warning is generated if the
-  // target floating-point format does not support infinities. This
-  // function is suitable for implementing the ISO C99 macro INFINITY.
-  //
-  //   float __builtin_inff (void)
-  //
-  // Similar to __builtin_inf, except the return type is float.
-  Out << "#ifdef __GNUC__\n"
-      << "#define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */\n"
-      << "#define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */\n"
-      << "#define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */\n"
-      << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
-      << "#define LLVM_INF           __builtin_inf()         /* Double */\n"
-      << "#define LLVM_INFF          __builtin_inff()        /* Float */\n"
-      << "#define LLVM_PREFETCH(addr,rw,locality) "
-                              "__builtin_prefetch(addr,rw,locality)\n"
-      << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
-      << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
-      << "#define LLVM_ASM           __asm__\n"
-      << "#else\n"
-      << "#define LLVM_NAN(NanStr)   ((double)0.0)           /* Double */\n"
-      << "#define LLVM_NANF(NanStr)  0.0F                    /* Float */\n"
-      << "#define LLVM_NANS(NanStr)  ((double)0.0)           /* Double */\n"
-      << "#define LLVM_NANSF(NanStr) 0.0F                    /* Float */\n"
-      << "#define LLVM_INF           ((double)0.0)           /* Double */\n"
-      << "#define LLVM_INFF          0.0F                    /* Float */\n"
-      << "#define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */\n"
-      << "#define __ATTRIBUTE_CTOR__\n"
-      << "#define __ATTRIBUTE_DTOR__\n"
-      << "#define LLVM_ASM(X)\n"
-      << "#endif\n\n";
-
-  Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
-      << "#define __builtin_stack_save() 0   /* not implemented */\n"
-      << "#define __builtin_stack_restore(X) /* noop */\n"
-      << "#endif\n\n";
-
-  // Output typedefs for 128-bit integers. If these are needed with a
-  // 32-bit target or with a C compiler that doesn't support mode(TI),
-  // more drastic measures will be needed.
-  Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n"
-      << "typedef int __attribute__((mode(TI))) llvmInt128;\n"
-      << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n"
-      << "#endif\n\n";
-
-  // Output target-specific code that should be inserted into main.
-  Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n";
-}
-
-/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
-/// the StaticTors set.
-static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
-  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-  if (!InitList) return;
-
-  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
-    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
-      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
-
-      if (CS->getOperand(1)->isNullValue())
-        return;  // Found a null terminator, exit printing.
-      Constant *FP = CS->getOperand(1);
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
-        if (CE->isCast())
-          FP = CE->getOperand(0);
-      if (Function *F = dyn_cast<Function>(FP))
-        StaticTors.insert(F);
-    }
-}
-
-enum SpecialGlobalClass {
-  NotSpecial = 0,
-  GlobalCtors, GlobalDtors,
-  NotPrinted
-};
-
-/// getGlobalVariableClass - If this is a global that is specially recognized
-/// by LLVM, return a code that indicates how we should handle it.
-static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
-  // If this is a global ctors/dtors list, handle it now.
-  if (GV->hasAppendingLinkage() && GV->use_empty()) {
-    if (GV->getName() == "llvm.global_ctors")
-      return GlobalCtors;
-    else if (GV->getName() == "llvm.global_dtors")
-      return GlobalDtors;
-  }
-
-  // Otherwise, if it is other metadata, don't print it.  This catches things
-  // like debug information.
-  if (GV->getSection() == "llvm.metadata")
-    return NotPrinted;
-
-  return NotSpecial;
-}
-
-// PrintEscapedString - Print each character of the specified string, escaping
-// it if it is not printable or if it is an escape char.
-static void PrintEscapedString(const char *Str, unsigned Length,
-                               raw_ostream &Out) {
-  for (unsigned i = 0; i != Length; ++i) {
-    unsigned char C = Str[i];
-    if (isprint(C) && C != '\\' && C != '"')
-      Out << C;
-    else if (C == '\\')
-      Out << "\\\\";
-    else if (C == '\"')
-      Out << "\\\"";
-    else if (C == '\t')
-      Out << "\\t";
-    else
-      Out << "\\x" << hexdigit(C >> 4) << hexdigit(C & 0x0F);
-  }
-}
-
-// PrintEscapedString - Print each character of the specified string, escaping
-// it if it is not printable or if it is an escape char.
-static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
-  PrintEscapedString(Str.c_str(), Str.size(), Out);
-}
-
-bool CWriter::doInitialization(Module &M) {
-  FunctionPass::doInitialization(M);
-
-  // Initialize
-  TheModule = &M;
-
-  TD = new TargetData(&M);
-  IL = new IntrinsicLowering(*TD);
-  IL->AddPrototypes(M);
-
-#if 0
-  std::string Triple = TheModule->getTargetTriple();
-  if (Triple.empty())
-    Triple = llvm::sys::getHostTriple();
-
-  std::string E;
-  if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
-    TAsm = Match->createMCAsmInfo(Triple);
-#endif
-  TAsm = new CBEMCAsmInfo();
-  MRI  = new MCRegisterInfo();
-  TCtx = new MCContext(*TAsm, *MRI, NULL);
-  Mang = new Mangler(*TCtx, *TD);
-
-  // Keep track of which functions are static ctors/dtors so they can have
-  // an attribute added to their prototypes.
-  std::set<Function*> StaticCtors, StaticDtors;
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I) {
-    switch (getGlobalVariableClass(I)) {
-    default: break;
-    case GlobalCtors:
-      FindStaticTors(I, StaticCtors);
-      break;
-    case GlobalDtors:
-      FindStaticTors(I, StaticDtors);
-      break;
-    }
-  }
-
-  // get declaration for alloca
-  Out << "/* Provide Declarations */\n";
-  Out << "#include <stdarg.h>\n";      // Varargs support
-  Out << "#include <setjmp.h>\n";      // Unwind support
-  Out << "#include <limits.h>\n";      // With overflow intrinsics support.
-  generateCompilerSpecificCode(Out, TD);
-
-  // Provide a definition for `bool' if not compiling with a C++ compiler.
-  Out << "\n"
-      << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"
-
-      << "\n\n/* Support for floating point constants */\n"
-      << "typedef unsigned long long ConstantDoubleTy;\n"
-      << "typedef unsigned int        ConstantFloatTy;\n"
-      << "typedef struct { unsigned long long f1; unsigned short f2; "
-         "unsigned short pad[3]; } ConstantFP80Ty;\n"
-      // This is used for both kinds of 128-bit long double; meaning differs.
-      << "typedef struct { unsigned long long f1; unsigned long long f2; }"
-         " ConstantFP128Ty;\n"
-      << "\n\n/* Global Declarations */\n";
-
-  // First output all the declarations for the program, because C requires
-  // Functions & globals to be declared before they are used.
-  //
-  if (!M.getModuleInlineAsm().empty()) {
-    Out << "/* Module asm statements */\n"
-        << "asm(";
-
-    // Split the string into lines, to make it easier to read the .ll file.
-    std::string Asm = M.getModuleInlineAsm();
-    size_t CurPos = 0;
-    size_t NewLine = Asm.find_first_of('\n', CurPos);
-    while (NewLine != std::string::npos) {
-      // We found a newline, print the portion of the asm string from the
-      // last newline up to this newline.
-      Out << "\"";
-      PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
-                         Out);
-      Out << "\\n\"\n";
-      CurPos = NewLine+1;
-      NewLine = Asm.find_first_of('\n', CurPos);
-    }
-    Out << "\"";
-    PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
-    Out << "\");\n"
-        << "/* End Module asm statements */\n";
-  }
-
-  // Loop over the symbol table, emitting all named constants.
-  printModuleTypes();
-
-  // Global variable declarations...
-  if (!M.global_empty()) {
-    Out << "\n/* External Global Variable Declarations */\n";
-    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-         I != E; ++I) {
-
-      if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
-          I->hasCommonLinkage())
-        Out << "extern ";
-      else if (I->hasDLLImportLinkage())
-        Out << "__declspec(dllimport) ";
-      else
-        continue; // Internal Global
-
-      // Thread Local Storage
-      if (I->isThreadLocal())
-        Out << "__thread ";
-
-      printType(Out, I->getType()->getElementType(), false, GetValueName(I));
-
-      if (I->hasExternalWeakLinkage())
-         Out << " __EXTERNAL_WEAK__";
-      Out << ";\n";
-    }
-  }
-
-  // Function declarations
-  Out << "\n/* Function Declarations */\n";
-  Out << "double fmod(double, double);\n";   // Support for FP rem
-  Out << "float fmodf(float, float);\n";
-  Out << "long double fmodl(long double, long double);\n";
-
-  // Store the intrinsics which will be declared/defined below.
-  SmallVector<const Function*, 8> intrinsicsToDefine;
-
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    // Don't print declarations for intrinsic functions.
-    // Store the used intrinsics, which need to be explicitly defined.
-    if (I->isIntrinsic()) {
-      switch (I->getIntrinsicID()) {
-        default:
-          break;
-        case Intrinsic::uadd_with_overflow:
-        case Intrinsic::sadd_with_overflow:
-          intrinsicsToDefine.push_back(I);
-          break;
-      }
-      continue;
-    }
-
-    if (I->getName() == "setjmp" ||
-        I->getName() == "longjmp" || I->getName() == "_setjmp")
-      continue;
-
-    if (I->hasExternalWeakLinkage())
-      Out << "extern ";
-    printFunctionSignature(I, true);
-    if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
-      Out << " __ATTRIBUTE_WEAK__";
-    if (I->hasExternalWeakLinkage())
-      Out << " __EXTERNAL_WEAK__";
-    if (StaticCtors.count(I))
-      Out << " __ATTRIBUTE_CTOR__";
-    if (StaticDtors.count(I))
-      Out << " __ATTRIBUTE_DTOR__";
-    if (I->hasHiddenVisibility())
-      Out << " __HIDDEN__";
-
-    if (I->hasName() && I->getName()[0] == 1)
-      Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
-
-    Out << ";\n";
-  }
-
-  // Output the global variable declarations
-  if (!M.global_empty()) {
-    Out << "\n\n/* Global Variable Declarations */\n";
-    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-         I != E; ++I)
-      if (!I->isDeclaration()) {
-        // Ignore special globals, such as debug info.
-        if (getGlobalVariableClass(I))
-          continue;
-
-        if (I->hasLocalLinkage())
-          Out << "static ";
-        else
-          Out << "extern ";
-
-        // Thread Local Storage
-        if (I->isThreadLocal())
-          Out << "__thread ";
-
-        printType(Out, I->getType()->getElementType(), false,
-                  GetValueName(I));
-
-        if (I->hasLinkOnceLinkage())
-          Out << " __attribute__((common))";
-        else if (I->hasCommonLinkage())     // FIXME is this right?
-          Out << " __ATTRIBUTE_WEAK__";
-        else if (I->hasWeakLinkage())
-          Out << " __ATTRIBUTE_WEAK__";
-        else if (I->hasExternalWeakLinkage())
-          Out << " __EXTERNAL_WEAK__";
-        if (I->hasHiddenVisibility())
-          Out << " __HIDDEN__";
-        Out << ";\n";
-      }
-  }
-
-  // Output the global variable definitions and contents...
-  if (!M.global_empty()) {
-    Out << "\n\n/* Global Variable Definitions and Initialization */\n";
-    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-         I != E; ++I)
-      if (!I->isDeclaration()) {
-        // Ignore special globals, such as debug info.
-        if (getGlobalVariableClass(I))
-          continue;
-
-        if (I->hasLocalLinkage())
-          Out << "static ";
-        else if (I->hasDLLImportLinkage())
-          Out << "__declspec(dllimport) ";
-        else if (I->hasDLLExportLinkage())
-          Out << "__declspec(dllexport) ";
-
-        // Thread Local Storage
-        if (I->isThreadLocal())
-          Out << "__thread ";
-
-        printType(Out, I->getType()->getElementType(), false,
-                  GetValueName(I));
-        if (I->hasLinkOnceLinkage())
-          Out << " __attribute__((common))";
-        else if (I->hasWeakLinkage())
-          Out << " __ATTRIBUTE_WEAK__";
-        else if (I->hasCommonLinkage())
-          Out << " __ATTRIBUTE_WEAK__";
-
-        if (I->hasHiddenVisibility())
-          Out << " __HIDDEN__";
-
-        // If the initializer is not null, emit the initializer.  If it is null,
-        // we try to avoid emitting large amounts of zeros.  The problem with
-        // this, however, occurs when the variable has weak linkage.  In this
-        // case, the assembler will complain about the variable being both weak
-        // and common, so we disable this optimization.
-        // FIXME common linkage should avoid this problem.
-        if (!I->getInitializer()->isNullValue()) {
-          Out << " = " ;
-          writeOperand(I->getInitializer(), true);
-        } else if (I->hasWeakLinkage()) {
-          // We have to specify an initializer, but it doesn't have to be
-          // complete.  If the value is an aggregate, print out { 0 }, and let
-          // the compiler figure out the rest of the zeros.
-          Out << " = " ;
-          if (I->getInitializer()->getType()->isStructTy() ||
-              I->getInitializer()->getType()->isVectorTy()) {
-            Out << "{ 0 }";
-          } else if (I->getInitializer()->getType()->isArrayTy()) {
-            // As with structs and vectors, but with an extra set of braces
-            // because arrays are wrapped in structs.
-            Out << "{ { 0 } }";
-          } else {
-            // Just print it out normally.
-            writeOperand(I->getInitializer(), true);
-          }
-        }
-        Out << ";\n";
-      }
-  }
-
-  if (!M.empty())
-    Out << "\n\n/* Function Bodies */\n";
-
-  // Emit some helper functions for dealing with FCMP instruction's
-  // predicates
-  Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
-  Out << "return X == X && Y == Y; }\n";
-  Out << "static inline int llvm_fcmp_uno(double X, double Y) { ";
-  Out << "return X != X || Y != Y; }\n";
-  Out << "static inline int llvm_fcmp_ueq(double X, double Y) { ";
-  Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
-  Out << "static inline int llvm_fcmp_une(double X, double Y) { ";
-  Out << "return X != Y; }\n";
-  Out << "static inline int llvm_fcmp_ult(double X, double Y) { ";
-  Out << "return X <  Y || llvm_fcmp_uno(X, Y); }\n";
-  Out << "static inline int llvm_fcmp_ugt(double X, double Y) { ";
-  Out << "return X >  Y || llvm_fcmp_uno(X, Y); }\n";
-  Out << "static inline int llvm_fcmp_ule(double X, double Y) { ";
-  Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
-  Out << "static inline int llvm_fcmp_uge(double X, double Y) { ";
-  Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
-  Out << "static inline int llvm_fcmp_oeq(double X, double Y) { ";
-  Out << "return X == Y ; }\n";
-  Out << "static inline int llvm_fcmp_one(double X, double Y) { ";
-  Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
-  Out << "static inline int llvm_fcmp_olt(double X, double Y) { ";
-  Out << "return X <  Y ; }\n";
-  Out << "static inline int llvm_fcmp_ogt(double X, double Y) { ";
-  Out << "return X >  Y ; }\n";
-  Out << "static inline int llvm_fcmp_ole(double X, double Y) { ";
-  Out << "return X <= Y ; }\n";
-  Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
-  Out << "return X >= Y ; }\n";
-
-  // Emit definitions of the intrinsics.
-  for (SmallVector<const Function*, 8>::const_iterator
-       I = intrinsicsToDefine.begin(),
-       E = intrinsicsToDefine.end(); I != E; ++I) {
-    printIntrinsicDefinition(**I, Out);
-  }
-
-  return false;
-}
-
-
-/// Output all floating point constants that cannot be printed accurately...
-void CWriter::printFloatingPointConstants(Function &F) {
-  // Scan the module for floating point constants.  If any FP constant is used
-  // in the function, we want to redirect it here so that we do not depend on
-  // the precision of the printed form, unless the printed form preserves
-  // precision.
-  //
-  for (constant_iterator I = constant_begin(&F), E = constant_end(&F);
-       I != E; ++I)
-    printFloatingPointConstants(*I);
-
-  Out << '\n';
-}
-
-void CWriter::printFloatingPointConstants(const Constant *C) {
-  // If this is a constant expression, recursively check for constant fp values.
-  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
-    for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
-      printFloatingPointConstants(CE->getOperand(i));
-    return;
-  }
-
-  // Otherwise, check for a FP constant that we need to print.
-  const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
-  if (FPC == 0 ||
-      // Do not put in FPConstantMap if safe.
-      isFPCSafeToPrint(FPC) ||
-      // Already printed this constant?
-      FPConstantMap.count(FPC))
-    return;
-
-  FPConstantMap[FPC] = FPCounter;  // Number the FP constants
-
-  if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) {
-    double Val = FPC->getValueAPF().convertToDouble();
-    uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
-    Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
-    << " = 0x" << utohexstr(i)
-    << "ULL;    /* " << Val << " */\n";
-  } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) {
-    float Val = FPC->getValueAPF().convertToFloat();
-    uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().
-    getZExtValue();
-    Out << "static const ConstantFloatTy FPConstant" << FPCounter++
-    << " = 0x" << utohexstr(i)
-    << "U;    /* " << Val << " */\n";
-  } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) {
-    // api needed to prevent premature destruction
-    APInt api = FPC->getValueAPF().bitcastToAPInt();
-    const uint64_t *p = api.getRawData();
-    Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
-    << " = { 0x" << utohexstr(p[0])
-    << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
-    << "}; /* Long double constant */\n";
-  } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
-             FPC->getType() == Type::getFP128Ty(FPC->getContext())) {
-    APInt api = FPC->getValueAPF().bitcastToAPInt();
-    const uint64_t *p = api.getRawData();
-    Out << "static const ConstantFP128Ty FPConstant" << FPCounter++
-    << " = { 0x"
-    << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
-    << "}; /* Long double constant */\n";
-
-  } else {
-    llvm_unreachable("Unknown float type!");
-  }
-}
-
-
-/// printSymbolTable - Run through symbol table looking for type names.  If a
-/// type name is found, emit its declaration...
-///
-void CWriter::printModuleTypes() {
-  Out << "/* Helper union for bitcasts */\n";
-  Out << "typedef union {\n";
-  Out << "  unsigned int Int32;\n";
-  Out << "  unsigned long long Int64;\n";
-  Out << "  float Float;\n";
-  Out << "  double Double;\n";
-  Out << "} llvmBitCastUnion;\n";
-
-  // Get all of the struct types used in the module.
-  std::vector<StructType*> StructTypes;
-  TheModule->findUsedStructTypes(StructTypes);
-
-  if (StructTypes.empty()) return;
-
-  Out << "/* Structure forward decls */\n";
-
-  unsigned NextTypeID = 0;
-  
-  // If any of them are missing names, add a unique ID to UnnamedStructIDs.
-  // Print out forward declarations for structure types.
-  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
-    StructType *ST = StructTypes[i];
-
-    if (ST->isLiteral() || ST->getName().empty())
-      UnnamedStructIDs[ST] = NextTypeID++;
-
-    std::string Name = getStructName(ST);
-
-    Out << "typedef struct " << Name << ' ' << Name << ";\n";
-  }
-
-  Out << '\n';
-
-  // Keep track of which structures have been printed so far.
-  SmallPtrSet<Type *, 16> StructPrinted;
-
-  // Loop over all structures then push them into the stack so they are
-  // printed in the correct order.
-  //
-  Out << "/* Structure contents */\n";
-  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i)
-    if (StructTypes[i]->isStructTy())
-      // Only print out used types!
-      printContainedStructs(StructTypes[i], StructPrinted);
-}
-
-// Push the struct onto the stack and recursively push all structs
-// this one depends on.
-//
-// TODO:  Make this work properly with vector types
-//
-void CWriter::printContainedStructs(Type *Ty,
-                                SmallPtrSet<Type *, 16> &StructPrinted) {
-  // Don't walk through pointers.
-  if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
-    return;
-
-  // Print all contained types first.
-  for (Type::subtype_iterator I = Ty->subtype_begin(),
-       E = Ty->subtype_end(); I != E; ++I)
-    printContainedStructs(*I, StructPrinted);
-
-  if (StructType *ST = dyn_cast<StructType>(Ty)) {
-    // Check to see if we have already printed this struct.
-    if (!StructPrinted.insert(Ty)) return;
-    
-    // Print structure type out.
-    printType(Out, ST, false, getStructName(ST), true);
-    Out << ";\n\n";
-  }
-}
-
-void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
-  /// isStructReturn - Should this function actually return a struct by-value?
-  bool isStructReturn = F->hasStructRetAttr();
-
-  if (F->hasLocalLinkage()) Out << "static ";
-  if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
-  if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
-  switch (F->getCallingConv()) {
-   case CallingConv::X86_StdCall:
-    Out << "__attribute__((stdcall)) ";
-    break;
-   case CallingConv::X86_FastCall:
-    Out << "__attribute__((fastcall)) ";
-    break;
-   case CallingConv::X86_ThisCall:
-    Out << "__attribute__((thiscall)) ";
-    break;
-   default:
-    break;
-  }
-
-  // Loop over the arguments, printing them...
-  FunctionType *FT = cast<FunctionType>(F->getFunctionType());
-  const AttrListPtr &PAL = F->getAttributes();
-
-  std::string tstr;
-  raw_string_ostream FunctionInnards(tstr);
-
-  // Print out the name...
-  FunctionInnards << GetValueName(F) << '(';
-
-  bool PrintedArg = false;
-  if (!F->isDeclaration()) {
-    if (!F->arg_empty()) {
-      Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-      unsigned Idx = 1;
-
-      // If this is a struct-return function, don't print the hidden
-      // struct-return argument.
-      if (isStructReturn) {
-        assert(I != E && "Invalid struct return function!");
-        ++I;
-        ++Idx;
-      }
-
-      std::string ArgName;
-      for (; I != E; ++I) {
-        if (PrintedArg) FunctionInnards << ", ";
-        if (I->hasName() || !Prototype)
-          ArgName = GetValueName(I);
-        else
-          ArgName = "";
-        Type *ArgTy = I->getType();
-        if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
-          ArgTy = cast<PointerType>(ArgTy)->getElementType();
-          ByValParams.insert(I);
-        }
-        printType(FunctionInnards, ArgTy,
-            /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt),
-            ArgName);
-        PrintedArg = true;
-        ++Idx;
-      }
-    }
-  } else {
-    // Loop over the arguments, printing them.
-    FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
-    unsigned Idx = 1;
-
-    // If this is a struct-return function, don't print the hidden
-    // struct-return argument.
-    if (isStructReturn) {
-      assert(I != E && "Invalid struct return function!");
-      ++I;
-      ++Idx;
-    }
-
-    for (; I != E; ++I) {
-      if (PrintedArg) FunctionInnards << ", ";
-      Type *ArgTy = *I;
-      if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
-        assert(ArgTy->isPointerTy());
-        ArgTy = cast<PointerType>(ArgTy)->getElementType();
-      }
-      printType(FunctionInnards, ArgTy,
-             /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt));
-      PrintedArg = true;
-      ++Idx;
-    }
-  }
-
-  if (!PrintedArg && FT->isVarArg()) {
-    FunctionInnards << "int vararg_dummy_arg";
-    PrintedArg = true;
-  }
-
-  // Finish printing arguments... if this is a vararg function, print the ...,
-  // unless there are no known types, in which case, we just emit ().
-  //
-  if (FT->isVarArg() && PrintedArg) {
-    FunctionInnards << ",...";  // Output varargs portion of signature!
-  } else if (!FT->isVarArg() && !PrintedArg) {
-    FunctionInnards << "void"; // ret() -> ret(void) in C.
-  }
-  FunctionInnards << ')';
-
-  // Get the return tpe for the function.
-  Type *RetTy;
-  if (!isStructReturn)
-    RetTy = F->getReturnType();
-  else {
-    // If this is a struct-return function, print the struct-return type.
-    RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
-  }
-
-  // Print out the return type and the signature built above.
-  printType(Out, RetTy,
-            /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
-            FunctionInnards.str());
-}
-
-static inline bool isFPIntBitCast(const Instruction &I) {
-  if (!isa<BitCastInst>(I))
-    return false;
-  Type *SrcTy = I.getOperand(0)->getType();
-  Type *DstTy = I.getType();
-  return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
-         (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
-}
-
-void CWriter::printFunction(Function &F) {
-  /// isStructReturn - Should this function actually return a struct by-value?
-  bool isStructReturn = F.hasStructRetAttr();
-
-  printFunctionSignature(&F, false);
-  Out << " {\n";
-
-  // If this is a struct return function, handle the result with magic.
-  if (isStructReturn) {
-    Type *StructTy =
-      cast<PointerType>(F.arg_begin()->getType())->getElementType();
-    Out << "  ";
-    printType(Out, StructTy, false, "StructReturn");
-    Out << ";  /* Struct return temporary */\n";
-
-    Out << "  ";
-    printType(Out, F.arg_begin()->getType(), false,
-              GetValueName(F.arg_begin()));
-    Out << " = &StructReturn;\n";
-  }
-
-  bool PrintedVar = false;
-
-  // print local variable information for the function
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
-    if (const AllocaInst *AI = isDirectAlloca(&*I)) {
-      Out << "  ";
-      printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
-      Out << ";    /* Address-exposed local */\n";
-      PrintedVar = true;
-    } else if (I->getType() != Type::getVoidTy(F.getContext()) &&
-               !isInlinableInst(*I)) {
-      Out << "  ";
-      printType(Out, I->getType(), false, GetValueName(&*I));
-      Out << ";\n";
-
-      if (isa<PHINode>(*I)) {  // Print out PHI node temporaries as well...
-        Out << "  ";
-        printType(Out, I->getType(), false,
-                  GetValueName(&*I)+"__PHI_TEMPORARY");
-        Out << ";\n";
-      }
-      PrintedVar = true;
-    }
-    // We need a temporary for the BitCast to use so it can pluck a value out
-    // of a union to do the BitCast. This is separate from the need for a
-    // variable to hold the result of the BitCast.
-    if (isFPIntBitCast(*I)) {
-      Out << "  llvmBitCastUnion " << GetValueName(&*I)
-          << "__BITCAST_TEMPORARY;\n";
-      PrintedVar = true;
-    }
-  }
-
-  if (PrintedVar)
-    Out << '\n';
-
-  if (F.hasExternalLinkage() && F.getName() == "main")
-    Out << "  CODE_FOR_MAIN();\n";
-
-  // print the basic blocks
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    if (Loop *L = LI->getLoopFor(BB)) {
-      if (L->getHeader() == BB && L->getParentLoop() == 0)
-        printLoop(L);
-    } else {
-      printBasicBlock(BB);
-    }
-  }
-
-  Out << "}\n\n";
-}
-
-void CWriter::printLoop(Loop *L) {
-  Out << "  do {     /* Syntactic loop '" << L->getHeader()->getName()
-      << "' to make GCC happy */\n";
-  for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
-    BasicBlock *BB = L->getBlocks()[i];
-    Loop *BBLoop = LI->getLoopFor(BB);
-    if (BBLoop == L)
-      printBasicBlock(BB);
-    else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
-      printLoop(BBLoop);
-  }
-  Out << "  } while (1); /* end of syntactic loop '"
-      << L->getHeader()->getName() << "' */\n";
-}
-
-void CWriter::printBasicBlock(BasicBlock *BB) {
-
-  // Don't print the label for the basic block if there are no uses, or if
-  // the only terminator use is the predecessor basic block's terminator.
-  // We have to scan the use list because PHI nodes use basic blocks too but
-  // do not require a label to be generated.
-  //
-  bool NeedsLabel = false;
-  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
-    if (isGotoCodeNecessary(*PI, BB)) {
-      NeedsLabel = true;
-      break;
-    }
-
-  if (NeedsLabel) Out << GetValueName(BB) << ":\n";
-
-  // Output all of the instructions in the basic block...
-  for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E;
-       ++II) {
-    if (!isInlinableInst(*II) && !isDirectAlloca(II)) {
-      if (II->getType() != Type::getVoidTy(BB->getContext()) &&
-          !isInlineAsm(*II))
-        outputLValue(II);
-      else
-        Out << "  ";
-      writeInstComputationInline(*II);
-      Out << ";\n";
-    }
-  }
-
-  // Don't emit prefix or suffix for the terminator.
-  visit(*BB->getTerminator());
-}
-
-
-// Specific Instruction type classes... note that all of the casts are
-// necessary because we use the instruction classes as opaque types...
-//
-void CWriter::visitReturnInst(ReturnInst &I) {
-  // If this is a struct return function, return the temporary struct.
-  bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
-
-  if (isStructReturn) {
-    Out << "  return StructReturn;\n";
-    return;
-  }
-
-  // Don't output a void return if this is the last basic block in the function
-  if (I.getNumOperands() == 0 &&
-      &*--I.getParent()->getParent()->end() == I.getParent() &&
-      !I.getParent()->size() == 1) {
-    return;
-  }
-
-  Out << "  return";
-  if (I.getNumOperands()) {
-    Out << ' ';
-    writeOperand(I.getOperand(0));
-  }
-  Out << ";\n";
-}
-
-void CWriter::visitSwitchInst(SwitchInst &SI) {
-
-  Value* Cond = SI.getCondition();
-
-  Out << "  switch (";
-  writeOperand(Cond);
-  Out << ") {\n  default:\n";
-  printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
-  printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
-  Out << ";\n";
-
-  unsigned NumCases = SI.getNumCases();
-  // Skip the first item since that's the default case.
-  for (unsigned i = 1; i < NumCases; ++i) {
-    ConstantInt* CaseVal = SI.getCaseValue(i);
-    BasicBlock* Succ = SI.getSuccessor(i);
-    Out << "  case ";
-    writeOperand(CaseVal);
-    Out << ":\n";
-    printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
-    printBranchToBlock(SI.getParent(), Succ, 2);
-    if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
-      Out << "    break;\n";
-  }
-
-  Out << "  }\n";
-}
-
-void CWriter::visitIndirectBrInst(IndirectBrInst &IBI) {
-  Out << "  goto *(void*)(";
-  writeOperand(IBI.getOperand(0));
-  Out << ");\n";
-}
-
-void CWriter::visitUnreachableInst(UnreachableInst &I) {
-  Out << "  /*UNREACHABLE*/;\n";
-}
-
-bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
-  /// FIXME: This should be reenabled, but loop reordering safe!!
-  return true;
-
-  if (llvm::next(Function::iterator(From)) != Function::iterator(To))
-    return true;  // Not the direct successor, we need a goto.
-
-  //isa<SwitchInst>(From->getTerminator())
-
-  if (LI->getLoopFor(From) != LI->getLoopFor(To))
-    return true;
-  return false;
-}
-
-void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
-                                          BasicBlock *Successor,
-                                          unsigned Indent) {
-  for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) {
-    PHINode *PN = cast<PHINode>(I);
-    // Now we have to do the printing.
-    Value *IV = PN->getIncomingValueForBlock(CurBlock);
-    if (!isa<UndefValue>(IV)) {
-      Out << std::string(Indent, ' ');
-      Out << "  " << GetValueName(I) << "__PHI_TEMPORARY = ";
-      writeOperand(IV);
-      Out << ";   /* for PHI node */\n";
-    }
-  }
-}
-
-void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ,
-                                 unsigned Indent) {
-  if (isGotoCodeNecessary(CurBB, Succ)) {
-    Out << std::string(Indent, ' ') << "  goto ";
-    writeOperand(Succ);
-    Out << ";\n";
-  }
-}
-
-// Branch instruction printing - Avoid printing out a branch to a basic block
-// that immediately succeeds the current one.
-//
-void CWriter::visitBranchInst(BranchInst &I) {
-
-  if (I.isConditional()) {
-    if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
-      Out << "  if (";
-      writeOperand(I.getCondition());
-      Out << ") {\n";
-
-      printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2);
-      printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
-
-      if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
-        Out << "  } else {\n";
-        printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
-        printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
-      }
-    } else {
-      // First goto not necessary, assume second one is...
-      Out << "  if (!";
-      writeOperand(I.getCondition());
-      Out << ") {\n";
-
-      printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
-      printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
-    }
-
-    Out << "  }\n";
-  } else {
-    printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0);
-    printBranchToBlock(I.getParent(), I.getSuccessor(0), 0);
-  }
-  Out << "\n";
-}
-
-// PHI nodes get copied into temporary values at the end of predecessor basic
-// blocks.  We now need to copy these temporary values into the REAL value for
-// the PHI.
-void CWriter::visitPHINode(PHINode &I) {
-  writeOperand(&I);
-  Out << "__PHI_TEMPORARY";
-}
-
-
-void CWriter::visitBinaryOperator(Instruction &I) {
-  // binary instructions, shift instructions, setCond instructions.
-  assert(!I.getType()->isPointerTy());
-
-  // We must cast the results of binary operations which might be promoted.
-  bool needsCast = false;
-  if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
-      (I.getType() == Type::getInt16Ty(I.getContext()))
-      || (I.getType() == Type::getFloatTy(I.getContext()))) {
-    needsCast = true;
-    Out << "((";
-    printType(Out, I.getType(), false);
-    Out << ")(";
-  }
-
-  // If this is a negation operation, print it out as such.  For FP, we don't
-  // want to print "-0.0 - X".
-  if (BinaryOperator::isNeg(&I)) {
-    Out << "-(";
-    writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
-    Out << ")";
-  } else if (BinaryOperator::isFNeg(&I)) {
-    Out << "-(";
-    writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I)));
-    Out << ")";
-  } else if (I.getOpcode() == Instruction::FRem) {
-    // Output a call to fmod/fmodf instead of emitting a%b
-    if (I.getType() == Type::getFloatTy(I.getContext()))
-      Out << "fmodf(";
-    else if (I.getType() == Type::getDoubleTy(I.getContext()))
-      Out << "fmod(";
-    else  // all 3 flavors of long double
-      Out << "fmodl(";
-    writeOperand(I.getOperand(0));
-    Out << ", ";
-    writeOperand(I.getOperand(1));
-    Out << ")";
-  } else {
-
-    // Write out the cast of the instruction's value back to the proper type
-    // if necessary.
-    bool NeedsClosingParens = writeInstructionCast(I);
-
-    // Certain instructions require the operand to be forced to a specific type
-    // so we use writeOperandWithCast here instead of writeOperand. Similarly
-    // below for operand 1
-    writeOperandWithCast(I.getOperand(0), I.getOpcode());
-
-    switch (I.getOpcode()) {
-    case Instruction::Add:
-    case Instruction::FAdd: Out << " + "; break;
-    case Instruction::Sub:
-    case Instruction::FSub: Out << " - "; break;
-    case Instruction::Mul:
-    case Instruction::FMul: Out << " * "; break;
-    case Instruction::URem:
-    case Instruction::SRem:
-    case Instruction::FRem: Out << " % "; break;
-    case Instruction::UDiv:
-    case Instruction::SDiv:
-    case Instruction::FDiv: Out << " / "; break;
-    case Instruction::And:  Out << " & "; break;
-    case Instruction::Or:   Out << " | "; break;
-    case Instruction::Xor:  Out << " ^ "; break;
-    case Instruction::Shl : Out << " << "; break;
-    case Instruction::LShr:
-    case Instruction::AShr: Out << " >> "; break;
-    default:
-#ifndef NDEBUG
-       errs() << "Invalid operator type!" << I;
-#endif
-       llvm_unreachable(0);
-    }
-
-    writeOperandWithCast(I.getOperand(1), I.getOpcode());
-    if (NeedsClosingParens)
-      Out << "))";
-  }
-
-  if (needsCast) {
-    Out << "))";
-  }
-}
-
-void CWriter::visitICmpInst(ICmpInst &I) {
-  // We must cast the results of icmp which might be promoted.
-  bool needsCast = false;
-
-  // Write out the cast of the instruction's value back to the proper type
-  // if necessary.
-  bool NeedsClosingParens = writeInstructionCast(I);
-
-  // Certain icmp predicate require the operand to be forced to a specific type
-  // so we use writeOperandWithCast here instead of writeOperand. Similarly
-  // below for operand 1
-  writeOperandWithCast(I.getOperand(0), I);
-
-  switch (I.getPredicate()) {
-  case ICmpInst::ICMP_EQ:  Out << " == "; break;
-  case ICmpInst::ICMP_NE:  Out << " != "; break;
-  case ICmpInst::ICMP_ULE:
-  case ICmpInst::ICMP_SLE: Out << " <= "; break;
-  case ICmpInst::ICMP_UGE:
-  case ICmpInst::ICMP_SGE: Out << " >= "; break;
-  case ICmpInst::ICMP_ULT:
-  case ICmpInst::ICMP_SLT: Out << " < "; break;
-  case ICmpInst::ICMP_UGT:
-  case ICmpInst::ICMP_SGT: Out << " > "; break;
-  default:
-#ifndef NDEBUG
-    errs() << "Invalid icmp predicate!" << I;
-#endif
-    llvm_unreachable(0);
-  }
-
-  writeOperandWithCast(I.getOperand(1), I);
-  if (NeedsClosingParens)
-    Out << "))";
-
-  if (needsCast) {
-    Out << "))";
-  }
-}
-
-void CWriter::visitFCmpInst(FCmpInst &I) {
-  if (I.getPredicate() == FCmpInst::FCMP_FALSE) {
-    Out << "0";
-    return;
-  }
-  if (I.getPredicate() == FCmpInst::FCMP_TRUE) {
-    Out << "1";
-    return;
-  }
-
-  const char* op = 0;
-  switch (I.getPredicate()) {
-  default: llvm_unreachable("Illegal FCmp predicate");
-  case FCmpInst::FCMP_ORD: op = "ord"; break;
-  case FCmpInst::FCMP_UNO: op = "uno"; break;
-  case FCmpInst::FCMP_UEQ: op = "ueq"; break;
-  case FCmpInst::FCMP_UNE: op = "une"; break;
-  case FCmpInst::FCMP_ULT: op = "ult"; break;
-  case FCmpInst::FCMP_ULE: op = "ule"; break;
-  case FCmpInst::FCMP_UGT: op = "ugt"; break;
-  case FCmpInst::FCMP_UGE: op = "uge"; break;
-  case FCmpInst::FCMP_OEQ: op = "oeq"; break;
-  case FCmpInst::FCMP_ONE: op = "one"; break;
-  case FCmpInst::FCMP_OLT: op = "olt"; break;
-  case FCmpInst::FCMP_OLE: op = "ole"; break;
-  case FCmpInst::FCMP_OGT: op = "ogt"; break;
-  case FCmpInst::FCMP_OGE: op = "oge"; break;
-  }
-
-  Out << "llvm_fcmp_" << op << "(";
-  // Write the first operand
-  writeOperand(I.getOperand(0));
-  Out << ", ";
-  // Write the second operand
-  writeOperand(I.getOperand(1));
-  Out << ")";
-}
-
-static const char * getFloatBitCastField(Type *Ty) {
-  switch (Ty->getTypeID()) {
-    default: llvm_unreachable("Invalid Type");
-    case Type::FloatTyID:  return "Float";
-    case Type::DoubleTyID: return "Double";
-    case Type::IntegerTyID: {
-      unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
-      if (NumBits <= 32)
-        return "Int32";
-      else
-        return "Int64";
-    }
-  }
-}
-
-void CWriter::visitCastInst(CastInst &I) {
-  Type *DstTy = I.getType();
-  Type *SrcTy = I.getOperand(0)->getType();
-  if (isFPIntBitCast(I)) {
-    Out << '(';
-    // These int<->float and long<->double casts need to be handled specially
-    Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
-        << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
-    writeOperand(I.getOperand(0));
-    Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
-        << getFloatBitCastField(I.getType());
-    Out << ')';
-    return;
-  }
-
-  Out << '(';
-  printCast(I.getOpcode(), SrcTy, DstTy);
-
-  // Make a sext from i1 work by subtracting the i1 from 0 (an int).
-  if (SrcTy == Type::getInt1Ty(I.getContext()) &&
-      I.getOpcode() == Instruction::SExt)
-    Out << "0-";
-
-  writeOperand(I.getOperand(0));
-
-  if (DstTy == Type::getInt1Ty(I.getContext()) &&
-      (I.getOpcode() == Instruction::Trunc ||
-       I.getOpcode() == Instruction::FPToUI ||
-       I.getOpcode() == Instruction::FPToSI ||
-       I.getOpcode() == Instruction::PtrToInt)) {
-    // Make sure we really get a trunc to bool by anding the operand with 1
-    Out << "&1u";
-  }
-  Out << ')';
-}
-
-void CWriter::visitSelectInst(SelectInst &I) {
-  Out << "((";
-  writeOperand(I.getCondition());
-  Out << ") ? (";
-  writeOperand(I.getTrueValue());
-  Out << ") : (";
-  writeOperand(I.getFalseValue());
-  Out << "))";
-}
-
-// Returns the macro name or value of the max or min of an integer type
-// (as defined in limits.h).
-static void printLimitValue(IntegerType &Ty, bool isSigned, bool isMax,
-                            raw_ostream &Out) {
-  const char* type;
-  const char* sprefix = "";
-
-  unsigned NumBits = Ty.getBitWidth();
-  if (NumBits <= 8) {
-    type = "CHAR";
-    sprefix = "S";
-  } else if (NumBits <= 16) {
-    type = "SHRT";
-  } else if (NumBits <= 32) {
-    type = "INT";
-  } else if (NumBits <= 64) {
-    type = "LLONG";
-  } else {
-    llvm_unreachable("Bit widths > 64 not implemented yet");
-  }
-
-  if (isSigned)
-    Out << sprefix << type << (isMax ? "_MAX" : "_MIN");
-  else
-    Out << "U" << type << (isMax ? "_MAX" : "0");
-}
-
-#ifndef NDEBUG
-static bool isSupportedIntegerSize(IntegerType &T) {
-  return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
-         T.getBitWidth() == 32 || T.getBitWidth() == 64;
-}
-#endif
-
-void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
-  FunctionType *funT = F.getFunctionType();
-  Type *retT = F.getReturnType();
-  IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
-
-  assert(isSupportedIntegerSize(*elemT) &&
-         "CBackend does not support arbitrary size integers.");
-  assert(cast<StructType>(retT)->getElementType(0) == elemT &&
-         elemT == funT->getParamType(0) && funT->getNumParams() == 2);
-
-  switch (F.getIntrinsicID()) {
-  default:
-    llvm_unreachable("Unsupported Intrinsic.");
-  case Intrinsic::uadd_with_overflow:
-    // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) {
-    //   Rty r;
-    //   r.field0 = a + b;
-    //   r.field1 = (r.field0 < a);
-    //   return r;
-    // }
-    Out << "static inline ";
-    printType(Out, retT);
-    Out << GetValueName(&F);
-    Out << "(";
-    printSimpleType(Out, elemT, false);
-    Out << "a,";
-    printSimpleType(Out, elemT, false);
-    Out << "b) {\n  ";
-    printType(Out, retT);
-    Out << "r;\n";
-    Out << "  r.field0 = a + b;\n";
-    Out << "  r.field1 = (r.field0 < a);\n";
-    Out << "  return r;\n}\n";
-    break;
-    
-  case Intrinsic::sadd_with_overflow:            
-    // static inline Rty sadd_ixx(ixx a, ixx b) {
-    //   Rty r;
-    //   r.field1 = (b > 0 && a > XX_MAX - b) ||
-    //              (b < 0 && a < XX_MIN - b);
-    //   r.field0 = r.field1 ? 0 : a + b;
-    //   return r;
-    // }
-    Out << "static ";
-    printType(Out, retT);
-    Out << GetValueName(&F);
-    Out << "(";
-    printSimpleType(Out, elemT, true);
-    Out << "a,";
-    printSimpleType(Out, elemT, true);
-    Out << "b) {\n  ";
-    printType(Out, retT);
-    Out << "r;\n";
-    Out << "  r.field1 = (b > 0 && a > ";
-    printLimitValue(*elemT, true, true, Out);
-    Out << " - b) || (b < 0 && a < ";
-    printLimitValue(*elemT, true, false, Out);
-    Out << " - b);\n";
-    Out << "  r.field0 = r.field1 ? 0 : a + b;\n";
-    Out << "  return r;\n}\n";
-    break;
-  }
-}
-
-void CWriter::lowerIntrinsics(Function &F) {
-  // This is used to keep track of intrinsics that get generated to a lowered
-  // function. We must generate the prototypes before the function body which
-  // will only be expanded on first use (by the loop below).
-  std::vector<Function*> prototypesToGen;
-
-  // Examine all the instructions in this function to find the intrinsics that
-  // need to be lowered.
-  for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
-      if (CallInst *CI = dyn_cast<CallInst>(I++))
-        if (Function *F = CI->getCalledFunction())
-          switch (F->getIntrinsicID()) {
-          case Intrinsic::not_intrinsic:
-          case Intrinsic::vastart:
-          case Intrinsic::vacopy:
-          case Intrinsic::vaend:
-          case Intrinsic::returnaddress:
-          case Intrinsic::frameaddress:
-          case Intrinsic::setjmp:
-          case Intrinsic::longjmp:
-          case Intrinsic::prefetch:
-          case Intrinsic::powi:
-          case Intrinsic::x86_sse_cmp_ss:
-          case Intrinsic::x86_sse_cmp_ps:
-          case Intrinsic::x86_sse2_cmp_sd:
-          case Intrinsic::x86_sse2_cmp_pd:
-          case Intrinsic::ppc_altivec_lvsl:
-          case Intrinsic::uadd_with_overflow:
-          case Intrinsic::sadd_with_overflow:
-              // We directly implement these intrinsics
-            break;
-          default:
-            // If this is an intrinsic that directly corresponds to a GCC
-            // builtin, we handle it.
-            const char *BuiltinName = "";
-#define GET_GCC_BUILTIN_NAME
-#include "llvm/Intrinsics.gen"
-#undef GET_GCC_BUILTIN_NAME
-            // If we handle it, don't lower it.
-            if (BuiltinName[0]) break;
-
-            // All other intrinsic calls we must lower.
-            Instruction *Before = 0;
-            if (CI != &BB->front())
-              Before = prior(BasicBlock::iterator(CI));
-
-            IL->LowerIntrinsicCall(CI);
-            if (Before) {        // Move iterator to instruction after call
-              I = Before; ++I;
-            } else {
-              I = BB->begin();
-            }
-            // If the intrinsic got lowered to another call, and that call has
-            // a definition then we need to make sure its prototype is emitted
-            // before any calls to it.
-            if (CallInst *Call = dyn_cast<CallInst>(I))
-              if (Function *NewF = Call->getCalledFunction())
-                if (!NewF->isDeclaration())
-                  prototypesToGen.push_back(NewF);
-
-            break;
-          }
-
-  // We may have collected some prototypes to emit in the loop above.
-  // Emit them now, before the function that uses them is emitted. But,
-  // be careful not to emit them twice.
-  std::vector<Function*>::iterator I = prototypesToGen.begin();
-  std::vector<Function*>::iterator E = prototypesToGen.end();
-  for ( ; I != E; ++I) {
-    if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
-      Out << '\n';
-      printFunctionSignature(*I, true);
-      Out << ";\n";
-    }
-  }
-}
-
-void CWriter::visitCallInst(CallInst &I) {
-  if (isa<InlineAsm>(I.getCalledValue()))
-    return visitInlineAsm(I);
-
-  bool WroteCallee = false;
-
-  // Handle intrinsic function calls first...
-  if (Function *F = I.getCalledFunction())
-    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
-      if (visitBuiltinCall(I, ID, WroteCallee))
-        return;
-
-  Value *Callee = I.getCalledValue();
-
-  PointerType  *PTy   = cast<PointerType>(Callee->getType());
-  FunctionType *FTy   = cast<FunctionType>(PTy->getElementType());
-
-  // If this is a call to a struct-return function, assign to the first
-  // parameter instead of passing it to the call.
-  const AttrListPtr &PAL = I.getAttributes();
-  bool hasByVal = I.hasByValArgument();
-  bool isStructRet = I.hasStructRetAttr();
-  if (isStructRet) {
-    writeOperandDeref(I.getArgOperand(0));
-    Out << " = ";
-  }
-
-  if (I.isTailCall()) Out << " /*tail*/ ";
-
-  if (!WroteCallee) {
-    // If this is an indirect call to a struct return function, we need to cast
-    // the pointer. Ditto for indirect calls with byval arguments.
-    bool NeedsCast = (hasByVal || isStructRet) && !isa<Function>(Callee);
-
-    // GCC is a real PITA.  It does not permit codegening casts of functions to
-    // function pointers if they are in a call (it generates a trap instruction
-    // instead!).  We work around this by inserting a cast to void* in between
-    // the function and the function pointer cast.  Unfortunately, we can't just
-    // form the constant expression here, because the folder will immediately
-    // nuke it.
-    //
-    // Note finally, that this is completely unsafe.  ANSI C does not guarantee
-    // that void* and function pointers have the same size. :( To deal with this
-    // in the common case, we handle casts where the number of arguments passed
-    // match exactly.
-    //
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee))
-      if (CE->isCast())
-        if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) {
-          NeedsCast = true;
-          Callee = RF;
-        }
-
-    if (NeedsCast) {
-      // Ok, just cast the pointer type.
-      Out << "((";
-      if (isStructRet)
-        printStructReturnPointerFunctionType(Out, PAL,
-                             cast<PointerType>(I.getCalledValue()->getType()));
-      else if (hasByVal)
-        printType(Out, I.getCalledValue()->getType(), false, "", true, PAL);
-      else
-        printType(Out, I.getCalledValue()->getType());
-      Out << ")(void*)";
-    }
-    writeOperand(Callee);
-    if (NeedsCast) Out << ')';
-  }
-
-  Out << '(';
-
-  bool PrintedArg = false;
-  if(FTy->isVarArg() && !FTy->getNumParams()) {
-    Out << "0 /*dummy arg*/";
-    PrintedArg = true;
-  }
-
-  unsigned NumDeclaredParams = FTy->getNumParams();
-  CallSite CS(&I);
-  CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
-  unsigned ArgNo = 0;
-  if (isStructRet) {   // Skip struct return argument.
-    ++AI;
-    ++ArgNo;
-  }
-
-
-  for (; AI != AE; ++AI, ++ArgNo) {
-    if (PrintedArg) Out << ", ";
-    if (ArgNo < NumDeclaredParams &&
-        (*AI)->getType() != FTy->getParamType(ArgNo)) {
-      Out << '(';
-      printType(Out, FTy->getParamType(ArgNo),
-            /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
-      Out << ')';
-    }
-    // Check if the argument is expected to be passed by value.
-    if (I.paramHasAttr(ArgNo+1, Attribute::ByVal))
-      writeOperandDeref(*AI);
-    else
-      writeOperand(*AI);
-    PrintedArg = true;
-  }
-  Out << ')';
-}
-
-/// visitBuiltinCall - Handle the call to the specified builtin.  Returns true
-/// if the entire call is handled, return false if it wasn't handled, and
-/// optionally set 'WroteCallee' if the callee has already been printed out.
-bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
-                               bool &WroteCallee) {
-  switch (ID) {
-  default: {
-    // If this is an intrinsic that directly corresponds to a GCC
-    // builtin, we emit it here.
-    const char *BuiltinName = "";
-    Function *F = I.getCalledFunction();
-#define GET_GCC_BUILTIN_NAME
-#include "llvm/Intrinsics.gen"
-#undef GET_GCC_BUILTIN_NAME
-    assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
-
-    Out << BuiltinName;
-    WroteCallee = true;
-    return false;
-  }
-  case Intrinsic::vastart:
-    Out << "0; ";
-
-    Out << "va_start(*(va_list*)";
-    writeOperand(I.getArgOperand(0));
-    Out << ", ";
-    // Output the last argument to the enclosing function.
-    if (I.getParent()->getParent()->arg_empty())
-      Out << "vararg_dummy_arg";
-    else
-      writeOperand(--I.getParent()->getParent()->arg_end());
-    Out << ')';
-    return true;
-  case Intrinsic::vaend:
-    if (!isa<ConstantPointerNull>(I.getArgOperand(0))) {
-      Out << "0; va_end(*(va_list*)";
-      writeOperand(I.getArgOperand(0));
-      Out << ')';
-    } else {
-      Out << "va_end(*(va_list*)0)";
-    }
-    return true;
-  case Intrinsic::vacopy:
-    Out << "0; ";
-    Out << "va_copy(*(va_list*)";
-    writeOperand(I.getArgOperand(0));
-    Out << ", *(va_list*)";
-    writeOperand(I.getArgOperand(1));
-    Out << ')';
-    return true;
-  case Intrinsic::returnaddress:
-    Out << "__builtin_return_address(";
-    writeOperand(I.getArgOperand(0));
-    Out << ')';
-    return true;
-  case Intrinsic::frameaddress:
-    Out << "__builtin_frame_address(";
-    writeOperand(I.getArgOperand(0));
-    Out << ')';
-    return true;
-  case Intrinsic::powi:
-    Out << "__builtin_powi(";
-    writeOperand(I.getArgOperand(0));
-    Out << ", ";
-    writeOperand(I.getArgOperand(1));
-    Out << ')';
-    return true;
-  case Intrinsic::setjmp:
-    Out << "setjmp(*(jmp_buf*)";
-    writeOperand(I.getArgOperand(0));
-    Out << ')';
-    return true;
-  case Intrinsic::longjmp:
-    Out << "longjmp(*(jmp_buf*)";
-    writeOperand(I.getArgOperand(0));
-    Out << ", ";
-    writeOperand(I.getArgOperand(1));
-    Out << ')';
-    return true;
-  case Intrinsic::prefetch:
-    Out << "LLVM_PREFETCH((const void *)";
-    writeOperand(I.getArgOperand(0));
-    Out << ", ";
-    writeOperand(I.getArgOperand(1));
-    Out << ", ";
-    writeOperand(I.getArgOperand(2));
-    Out << ")";
-    return true;
-  case Intrinsic::stacksave:
-    // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save()
-    // to work around GCC bugs (see PR1809).
-    Out << "0; *((void**)&" << GetValueName(&I)
-        << ") = __builtin_stack_save()";
-    return true;
-  case Intrinsic::x86_sse_cmp_ss:
-  case Intrinsic::x86_sse_cmp_ps:
-  case Intrinsic::x86_sse2_cmp_sd:
-  case Intrinsic::x86_sse2_cmp_pd:
-    Out << '(';
-    printType(Out, I.getType());
-    Out << ')';
-    // Multiple GCC builtins multiplex onto this intrinsic.
-    switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
-    default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
-    case 0: Out << "__builtin_ia32_cmpeq"; break;
-    case 1: Out << "__builtin_ia32_cmplt"; break;
-    case 2: Out << "__builtin_ia32_cmple"; break;
-    case 3: Out << "__builtin_ia32_cmpunord"; break;
-    case 4: Out << "__builtin_ia32_cmpneq"; break;
-    case 5: Out << "__builtin_ia32_cmpnlt"; break;
-    case 6: Out << "__builtin_ia32_cmpnle"; break;
-    case 7: Out << "__builtin_ia32_cmpord"; break;
-    }
-    if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd)
-      Out << 'p';
-    else
-      Out << 's';
-    if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps)
-      Out << 's';
-    else
-      Out << 'd';
-
-    Out << "(";
-    writeOperand(I.getArgOperand(0));
-    Out << ", ";
-    writeOperand(I.getArgOperand(1));
-    Out << ")";
-    return true;
-  case Intrinsic::ppc_altivec_lvsl:
-    Out << '(';
-    printType(Out, I.getType());
-    Out << ')';
-    Out << "__builtin_altivec_lvsl(0, (void*)";
-    writeOperand(I.getArgOperand(0));
-    Out << ")";
-    return true;
-  case Intrinsic::uadd_with_overflow:
-  case Intrinsic::sadd_with_overflow:
-    Out << GetValueName(I.getCalledFunction()) << "(";
-    writeOperand(I.getArgOperand(0));
-    Out << ", ";
-    writeOperand(I.getArgOperand(1));
-    Out << ")";
-    return true;
-  }
-}
-
-//This converts the llvm constraint string to something gcc is expecting.
-//TODO: work out platform independent constraints and factor those out
-//      of the per target tables
-//      handle multiple constraint codes
-std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
-  assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
-
-  // Grab the translation table from MCAsmInfo if it exists.
-  const MCAsmInfo *TargetAsm;
-  std::string Triple = TheModule->getTargetTriple();
-  if (Triple.empty())
-    Triple = llvm::sys::getHostTriple();
-
-  std::string E;
-  if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
-    TargetAsm = Match->createMCAsmInfo(Triple);
-  else
-    return c.Codes[0];
-
-  const char *const *table = TargetAsm->getAsmCBE();
-
-  // Search the translation table if it exists.
-  for (int i = 0; table && table[i]; i += 2)
-    if (c.Codes[0] == table[i]) {
-      delete TargetAsm;
-      return table[i+1];
-    }
-
-  // Default is identity.
-  delete TargetAsm;
-  return c.Codes[0];
-}
-
-//TODO: import logic from AsmPrinter.cpp
-static std::string gccifyAsm(std::string asmstr) {
-  for (std::string::size_type i = 0; i != asmstr.size(); ++i)
-    if (asmstr[i] == '\n')
-      asmstr.replace(i, 1, "\\n");
-    else if (asmstr[i] == '\t')
-      asmstr.replace(i, 1, "\\t");
-    else if (asmstr[i] == '$') {
-      if (asmstr[i + 1] == '{') {
-        std::string::size_type a = asmstr.find_first_of(':', i + 1);
-        std::string::size_type b = asmstr.find_first_of('}', i + 1);
-        std::string n = "%" +
-          asmstr.substr(a + 1, b - a - 1) +
-          asmstr.substr(i + 2, a - i - 2);
-        asmstr.replace(i, b - i + 1, n);
-        i += n.size() - 1;
-      } else
-        asmstr.replace(i, 1, "%");
-    }
-    else if (asmstr[i] == '%')//grr
-      { asmstr.replace(i, 1, "%%"); ++i;}
-
-  return asmstr;
-}
-
-//TODO: assumptions about what consume arguments from the call are likely wrong
-//      handle communitivity
-void CWriter::visitInlineAsm(CallInst &CI) {
-  InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
-  InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints();
-
-  std::vector<std::pair<Value*, int> > ResultVals;
-  if (CI.getType() == Type::getVoidTy(CI.getContext()))
-    ;
-  else if (StructType *ST = dyn_cast<StructType>(CI.getType())) {
-    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
-      ResultVals.push_back(std::make_pair(&CI, (int)i));
-  } else {
-    ResultVals.push_back(std::make_pair(&CI, -1));
-  }
-
-  // Fix up the asm string for gcc and emit it.
-  Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
-  Out << "        :";
-
-  unsigned ValueCount = 0;
-  bool IsFirst = true;
-
-  // Convert over all the output constraints.
-  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
-       E = Constraints.end(); I != E; ++I) {
-
-    if (I->Type != InlineAsm::isOutput) {
-      ++ValueCount;
-      continue;  // Ignore non-output constraints.
-    }
-
-    assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
-    std::string C = InterpretASMConstraint(*I);
-    if (C.empty()) continue;
-
-    if (!IsFirst) {
-      Out << ", ";
-      IsFirst = false;
-    }
-
-    // Unpack the dest.
-    Value *DestVal;
-    int DestValNo = -1;
-
-    if (ValueCount < ResultVals.size()) {
-      DestVal = ResultVals[ValueCount].first;
-      DestValNo = ResultVals[ValueCount].second;
-    } else
-      DestVal = CI.getArgOperand(ValueCount-ResultVals.size());
-
-    if (I->isEarlyClobber)
-      C = "&"+C;
-
-    Out << "\"=" << C << "\"(" << GetValueName(DestVal);
-    if (DestValNo != -1)
-      Out << ".field" << DestValNo; // Multiple retvals.
-    Out << ")";
-    ++ValueCount;
-  }
-
-
-  // Convert over all the input constraints.
-  Out << "\n        :";
-  IsFirst = true;
-  ValueCount = 0;
-  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
-       E = Constraints.end(); I != E; ++I) {
-    if (I->Type != InlineAsm::isInput) {
-      ++ValueCount;
-      continue;  // Ignore non-input constraints.
-    }
-
-    assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
-    std::string C = InterpretASMConstraint(*I);
-    if (C.empty()) continue;
-
-    if (!IsFirst) {
-      Out << ", ";
-      IsFirst = false;
-    }
-
-    assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
-    Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
-
-    Out << "\"" << C << "\"(";
-    if (!I->isIndirect)
-      writeOperand(SrcVal);
-    else
-      writeOperandDeref(SrcVal);
-    Out << ")";
-  }
-
-  // Convert over the clobber constraints.
-  IsFirst = true;
-  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
-       E = Constraints.end(); I != E; ++I) {
-    if (I->Type != InlineAsm::isClobber)
-      continue;  // Ignore non-input constraints.
-
-    assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
-    std::string C = InterpretASMConstraint(*I);
-    if (C.empty()) continue;
-
-    if (!IsFirst) {
-      Out << ", ";
-      IsFirst = false;
-    }
-
-    Out << '\"' << C << '"';
-  }
-
-  Out << ")";
-}
-
-void CWriter::visitAllocaInst(AllocaInst &I) {
-  Out << '(';
-  printType(Out, I.getType());
-  Out << ") alloca(sizeof(";
-  printType(Out, I.getType()->getElementType());
-  Out << ')';
-  if (I.isArrayAllocation()) {
-    Out << " * " ;
-    writeOperand(I.getOperand(0));
-  }
-  Out << ')';
-}
-
-void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
-                                 gep_type_iterator E, bool Static) {
-
-  // If there are no indices, just print out the pointer.
-  if (I == E) {
-    writeOperand(Ptr);
-    return;
-  }
-
-  // Find out if the last index is into a vector.  If so, we have to print this
-  // specially.  Since vectors can't have elements of indexable type, only the
-  // last index could possibly be of a vector element.
-  VectorType *LastIndexIsVector = 0;
-  {
-    for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
-      LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
-  }
-
-  Out << "(";
-
-  // If the last index is into a vector, we can't print it as &a[i][j] because
-  // we can't index into a vector with j in GCC.  Instead, emit this as
-  // (((float*)&a[i])+j)
-  if (LastIndexIsVector) {
-    Out << "((";
-    printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
-    Out << ")(";
-  }
-
-  Out << '&';
-
-  // If the first index is 0 (very typical) we can do a number of
-  // simplifications to clean up the code.
-  Value *FirstOp = I.getOperand();
-  if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) {
-    // First index isn't simple, print it the hard way.
-    writeOperand(Ptr);
-  } else {
-    ++I;  // Skip the zero index.
-
-    // Okay, emit the first operand. If Ptr is something that is already address
-    // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
-    if (isAddressExposed(Ptr)) {
-      writeOperandInternal(Ptr, Static);
-    } else if (I != E && (*I)->isStructTy()) {
-      // If we didn't already emit the first operand, see if we can print it as
-      // P->f instead of "P[0].f"
-      writeOperand(Ptr);
-      Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
-      ++I;  // eat the struct index as well.
-    } else {
-      // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic.
-      Out << "(*";
-      writeOperand(Ptr);
-      Out << ")";
-    }
-  }
-
-  for (; I != E; ++I) {
-    if ((*I)->isStructTy()) {
-      Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
-    } else if ((*I)->isArrayTy()) {
-      Out << ".array[";
-      writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
-      Out << ']';
-    } else if (!(*I)->isVectorTy()) {
-      Out << '[';
-      writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
-      Out << ']';
-    } else {
-      // If the last index is into a vector, then print it out as "+j)".  This
-      // works with the 'LastIndexIsVector' code above.
-      if (isa<Constant>(I.getOperand()) &&
-          cast<Constant>(I.getOperand())->isNullValue()) {
-        Out << "))";  // avoid "+0".
-      } else {
-        Out << ")+(";
-        writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
-        Out << "))";
-      }
-    }
-  }
-  Out << ")";
-}
-
-void CWriter::writeMemoryAccess(Value *Operand, Type *OperandType,
-                                bool IsVolatile, unsigned Alignment) {
-
-  bool IsUnaligned = Alignment &&
-    Alignment < TD->getABITypeAlignment(OperandType);
-
-  if (!IsUnaligned)
-    Out << '*';
-  if (IsVolatile || IsUnaligned) {
-    Out << "((";
-    if (IsUnaligned)
-      Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {";
-    printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*");
-    if (IsUnaligned) {
-      Out << "; } ";
-      if (IsVolatile) Out << "volatile ";
-      Out << "*";
-    }
-    Out << ")";
-  }
-
-  writeOperand(Operand);
-
-  if (IsVolatile || IsUnaligned) {
-    Out << ')';
-    if (IsUnaligned)
-      Out << "->data";
-  }
-}
-
-void CWriter::visitLoadInst(LoadInst &I) {
-  writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(),
-                    I.getAlignment());
-
-}
-
-void CWriter::visitStoreInst(StoreInst &I) {
-  writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(),
-                    I.isVolatile(), I.getAlignment());
-  Out << " = ";
-  Value *Operand = I.getOperand(0);
-  Constant *BitMask = 0;
-  if (IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
-    if (!ITy->isPowerOf2ByteWidth())
-      // We have a bit width that doesn't match an even power-of-2 byte
-      // size. Consequently we must & the value with the type's bit mask
-      BitMask = ConstantInt::get(ITy, ITy->getBitMask());
-  if (BitMask)
-    Out << "((";
-  writeOperand(Operand);
-  if (BitMask) {
-    Out << ") & ";
-    printConstant(BitMask, false);
-    Out << ")";
-  }
-}
-
-void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
-  printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
-                     gep_type_end(I), false);
-}
-
-void CWriter::visitVAArgInst(VAArgInst &I) {
-  Out << "va_arg(*(va_list*)";
-  writeOperand(I.getOperand(0));
-  Out << ", ";
-  printType(Out, I.getType());
-  Out << ");\n ";
-}
-
-void CWriter::visitInsertElementInst(InsertElementInst &I) {
-  Type *EltTy = I.getType()->getElementType();
-  writeOperand(I.getOperand(0));
-  Out << ";\n  ";
-  Out << "((";
-  printType(Out, PointerType::getUnqual(EltTy));
-  Out << ")(&" << GetValueName(&I) << "))[";
-  writeOperand(I.getOperand(2));
-  Out << "] = (";
-  writeOperand(I.getOperand(1));
-  Out << ")";
-}
-
-void CWriter::visitExtractElementInst(ExtractElementInst &I) {
-  // We know that our operand is not inlined.
-  Out << "((";
-  Type *EltTy =
-    cast<VectorType>(I.getOperand(0)->getType())->getElementType();
-  printType(Out, PointerType::getUnqual(EltTy));
-  Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
-  writeOperand(I.getOperand(1));
-  Out << "]";
-}
-
-void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
-  Out << "(";
-  printType(Out, SVI.getType());
-  Out << "){ ";
-  VectorType *VT = SVI.getType();
-  unsigned NumElts = VT->getNumElements();
-  Type *EltTy = VT->getElementType();
-
-  for (unsigned i = 0; i != NumElts; ++i) {
-    if (i) Out << ", ";
-    int SrcVal = SVI.getMaskValue(i);
-    if ((unsigned)SrcVal >= NumElts*2) {
-      Out << " 0/*undef*/ ";
-    } else {
-      Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts);
-      if (isa<Instruction>(Op)) {
-        // Do an extractelement of this value from the appropriate input.
-        Out << "((";
-        printType(Out, PointerType::getUnqual(EltTy));
-        Out << ")(&" << GetValueName(Op)
-            << "))[" << (SrcVal & (NumElts-1)) << "]";
-      } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) {
-        Out << "0";
-      } else {
-        printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal &
-                                                           (NumElts-1)),
-                      false);
-      }
-    }
-  }
-  Out << "}";
-}
-
-void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
-  // Start by copying the entire aggregate value into the result variable.
-  writeOperand(IVI.getOperand(0));
-  Out << ";\n  ";
-
-  // Then do the insert to update the field.
-  Out << GetValueName(&IVI);
-  for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
-       i != e; ++i) {
-    Type *IndexedTy =
-      ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
-                                       makeArrayRef(b, i+1));
-    if (IndexedTy->isArrayTy())
-      Out << ".array[" << *i << "]";
-    else
-      Out << ".field" << *i;
-  }
-  Out << " = ";
-  writeOperand(IVI.getOperand(1));
-}
-
-void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
-  Out << "(";
-  if (isa<UndefValue>(EVI.getOperand(0))) {
-    Out << "(";
-    printType(Out, EVI.getType());
-    Out << ") 0/*UNDEF*/";
-  } else {
-    Out << GetValueName(EVI.getOperand(0));
-    for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
-         i != e; ++i) {
-      Type *IndexedTy =
-        ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
-                                         makeArrayRef(b, i+1));
-      if (IndexedTy->isArrayTy())
-        Out << ".array[" << *i << "]";
-      else
-        Out << ".field" << *i;
-    }
-  }
-  Out << ")";
-}
-
-//===----------------------------------------------------------------------===//
-//                       External Interface declaration
-//===----------------------------------------------------------------------===//
-
-bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                         formatted_raw_ostream &o,
-                                         CodeGenFileType FileType,
-                                         CodeGenOpt::Level OptLevel,
-                                         bool DisableVerify) {
-  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
-
-  PM.add(createGCLoweringPass());
-  PM.add(createLowerInvokePass());
-  PM.add(createCFGSimplificationPass());   // clean up after lower invoke.
-  PM.add(new CWriter(o));
-  PM.add(createGCInfoDeleter());
-  return false;
-}
diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt
deleted file mode 100644
index 96ae49f01fc0..000000000000
--- a/lib/Target/CBackend/CMakeLists.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-add_llvm_target(CBackend
-  CBackend.cpp
-  )
-
-add_llvm_library_dependencies(LLVMCBackend
-  LLVMAnalysis
-  LLVMCBackendInfo
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMScalarOpts
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
-
-add_subdirectory(TargetInfo)
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
deleted file mode 100644
index 4f1ca974ded9..000000000000
--- a/lib/Target/CBackend/CTargetMachine.h
+++ /dev/null
@@ -1,42 +0,0 @@
-//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the TargetMachine that is used by the C backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef CTARGETMACHINE_H
-#define CTARGETMACHINE_H
-
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
-
-namespace llvm {
-
-struct CTargetMachine : public TargetMachine {
-  CTargetMachine(const Target &T, StringRef TT,
-                 StringRef CPU, StringRef FS,
-                 Reloc::Model RM, CodeModel::Model CM)
-    : TargetMachine(T, TT, CPU, FS) {}
-
-  virtual bool addPassesToEmitFile(PassManagerBase &PM,
-                                   formatted_raw_ostream &Out,
-                                   CodeGenFileType FileType,
-                                   CodeGenOpt::Level OptLevel,
-                                   bool DisableVerify);
-  
-  virtual const TargetData *getTargetData() const { return 0; }
-};
-
-extern Target TheCBackendTarget;
-
-} // End llvm namespace
-
-
-#endif
diff --git a/lib/Target/CBackend/Makefile b/lib/Target/CBackend/Makefile
deleted file mode 100644
index 621948a9f4ac..000000000000
--- a/lib/Target/CBackend/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/CBackend/Makefile ------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMCBackend
-DIRS = TargetInfo
-
-include $(LEVEL)/Makefile.common
-
-CompileCommonOpts += -Wno-format
diff --git a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
deleted file mode 100644
index e8274ff9ce5a..000000000000
--- a/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "CTargetMachine.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::TheCBackendTarget;
-
-extern "C" void LLVMInitializeCBackendTargetInfo() { 
-  RegisterTarget<> X(TheCBackendTarget, "c", "C backend");
-}
-
-extern "C" void LLVMInitializeCBackendTargetMC() {}
diff --git a/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/lib/Target/CBackend/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 8e616bebd532..000000000000
--- a/lib/Target/CBackend/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMCBackendInfo
-  CBackendTargetInfo.cpp
-  )
-
-add_llvm_library_dependencies(LLVMCBackendInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
diff --git a/lib/Target/CBackend/TargetInfo/Makefile b/lib/Target/CBackend/TargetInfo/Makefile
deleted file mode 100644
index d4d5e15b40bb..000000000000
--- a/lib/Target/CBackend/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/CBackend/TargetInfo/Makefile -------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMCBackendInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 030f8089abf7..5913a9c4ccdd 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -3,58 +3,18 @@ add_llvm_library(LLVMTarget
   Target.cpp
   TargetData.cpp
   TargetELFWriterInfo.cpp
-  TargetFrameLowering.cpp
   TargetInstrInfo.cpp
   TargetIntrinsicInfo.cpp
+  TargetJITInfo.cpp
   TargetLibraryInfo.cpp
   TargetLoweringObjectFile.cpp
   TargetMachine.cpp
+  TargetMachineC.cpp
   TargetRegisterInfo.cpp
   TargetSubtargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMTarget
-  LLVMCore
-  LLVMMC
-  LLVMSupport
-  )
-
-set(LLVM_ENUM_ASM_PRINTERS "")
-set(LLVM_ENUM_ASM_PARSERS "")
-set(LLVM_ENUM_DISASSEMBLERS "")
 foreach(t ${LLVM_TARGETS_TO_BUILD})
   message(STATUS "Targeting ${t}")
   add_subdirectory(${t})
-  set( td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t} )
-  file(GLOB asmp_file "${td}/*AsmPrinter.cpp")
-  if( asmp_file )
-    set(LLVM_ENUM_ASM_PRINTERS
-      "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n")
-  endif()
-  if( EXISTS ${td}/AsmParser/CMakeLists.txt )
-    set(LLVM_ENUM_ASM_PARSERS
-      "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n")
-  endif()
-  if( EXISTS ${td}/Disassembler/CMakeLists.txt )
-    set(LLVM_ENUM_DISASSEMBLERS
-      "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n")
-  endif()
-endforeach(t)
-
-# Produce llvm/Config/AsmPrinters.def
-configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in
-  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def
-  )
-
-# Produce llvm/Config/AsmParsers.def
-configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in
-  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def
-  )
-
-# Produce llvm/Config/Disassemblers.def
-configure_file(
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in
-  ${LLVM_BINARY_DIR}/include/llvm/Config/Disassemblers.def
-  )
+endforeach()
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index 158fb3eacce3..cf4f796ec2fb 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -1,12 +1,12 @@
 set(LLVM_TARGET_DEFINITIONS SPU.td)
 
-llvm_tablegen(SPUGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(SPUGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(SPUGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(SPUGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(SPUGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(SPUGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(SPUGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM SPUGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM SPUGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM SPUGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM SPUGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM SPUGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM SPUGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM SPUGenCallingConv.inc -gen-callingconv)
 add_public_tablegen_target(CellSPUCommonTableGen)
 
 add_llvm_target(CellSPUCodeGen
@@ -16,6 +16,7 @@ add_llvm_target(CellSPUCodeGen
   SPUISelDAGToDAG.cpp
   SPUISelLowering.cpp
   SPUFrameLowering.cpp
+  SPUMachineFunction.cpp
   SPURegisterInfo.cpp
   SPUSubtarget.cpp
   SPUTargetMachine.cpp
@@ -23,17 +24,5 @@ add_llvm_target(CellSPUCodeGen
   SPUNopFiller.cpp
   )
 
-add_llvm_library_dependencies(LLVMCellSPUCodeGen
-  LLVMAsmPrinter
-  LLVMCellSPUDesc
-  LLVMCellSPUInfo
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/CellSDKIntrinsics.td b/lib/Target/CellSPU/CellSDKIntrinsics.td
index 9468aee067a3..cdb4099ffbca 100644
--- a/lib/Target/CellSPU/CellSDKIntrinsics.td
+++ b/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -1,5 +1,5 @@
 //===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt
new file mode 100644
index 000000000000..277620bf4e59
--- /dev/null
+++ b/lib/Target/CellSPU/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/CellSPU/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = CellSPU
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = CellSPUCodeGen
+parent = CellSPU
+required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target
+add_to_library_groups = CellSPU
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
index d41fe934e2c5..0027bdbf6ca1 100644
--- a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
@@ -3,9 +3,4 @@ add_llvm_library(LLVMCellSPUDesc
   SPUMCAsmInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMCellSPUDesc
-  LLVMCellSPUInfo
-  LLVMMC
-  )
-
 add_dependencies(LLVMCellSPUDesc CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..71e5bbc629ca
--- /dev/null
+++ b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = CellSPUDesc
+parent = CellSPU
+required_libraries = CellSPUInfo MC
+add_to_library_groups = CellSPU
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
index 8c1176a9d028..4bad37eacaf7 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
@@ -14,6 +14,8 @@
 #include "SPUMCAsmInfo.h"
 using namespace llvm;
 
+void SPULinuxMCAsmInfo::anchor() { }
+
 SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
   IsLittleEndian = false;
 
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
index 7f850d347f56..f786147b9267 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
@@ -20,7 +20,9 @@
 namespace llvm {
   class Target;
   
-  struct SPULinuxMCAsmInfo : public MCAsmInfo {
+  class SPULinuxMCAsmInfo : public MCAsmInfo {
+    virtual void anchor();
+  public:
     explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
   };
 } // namespace llvm
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
index d5af2a88aed1..8450e2c6634c 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions -----*- C++ -*-===//
+//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,6 +18,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #define GET_INSTRINFO_MC_DESC
@@ -62,11 +63,12 @@ static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) {
 }
 
 static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                      CodeModel::Model CM) {
+                                             CodeModel::Model CM,
+                                             CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
   // For the time being, use static relocations, since there's really no
   // support for PIC yet.
-  X->InitMCCodeGenInfo(Reloc::Static, CM);
+  X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
   return X;
 }
 
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
index a3717b0bfc10..d26449e8908f 100644
--- a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
@@ -15,9 +15,7 @@
 #define SPUMCTARGETDESC_H
 
 namespace llvm {
-class MCSubtargetInfo;
 class Target;
-class StringRef;
 
 extern Target TheCellSPUTarget;
 
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index b51fbc7a5197..c660131706cb 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -1,4 +1,4 @@
-//===-- SPU.h - Top-level interface for Cell SPU Target ----------*- C++ -*-==//
+//===-- SPU.h - Top-level interface for Cell SPU Target ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/CellSPU/SPU.td b/lib/Target/CellSPU/SPU.td
index 8327fe03d7f8..e835b9cac8e1 100644
--- a/lib/Target/CellSPU/SPU.td
+++ b/lib/Target/CellSPU/SPU.td
@@ -1,5 +1,5 @@
-//===- SPU.td - Describe the STI Cell SPU Target Machine ----*- tablegen -*-===//
-// 
+//===-- SPU.td - Describe the STI Cell SPU Target Machine --*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td
index 3031fda54381..e051e047333a 100644
--- a/lib/Target/CellSPU/SPU128InstrInfo.td
+++ b/lib/Target/CellSPU/SPU128InstrInfo.td
@@ -1,9 +1,9 @@
-//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===//
+//===-- SPU128InstrInfo.td - Cell SPU 128-bit operations --*- tablegen -*--===//
 //
 //                     Cell SPU 128-bit operations
 //
 //===----------------------------------------------------------------------===//
-                                  
+
 // zext 32->128: Zero extend 32-bit to 128-bit
 def : Pat<(i128 (zext R32C:$rSrc)),
           (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index f340edfb0f86..bea33b5362d2 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -1,4 +1,4 @@
-//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
+//====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===//
 //
 //                     Cell SPU 64-bit operations
 //
diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp
index 90b5270a9dae..14021fef05d9 100644
--- a/lib/Target/CellSPU/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=//
+//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -248,7 +248,6 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
   switch (MO.getType()) {
   case MachineOperand::MO_Immediate:
     report_fatal_error("printOp() does not handle immediate values");
-    return;
 
   case MachineOperand::MO_MachineBasicBlock:
     O << *MO.getMBB()->getSymbol();
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
index 04fa2ae866d6..9f9692bf67fe 100644
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -1,10 +1,10 @@
 //===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This describes the calling conventions for the STI Cell SPU architecture.
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index 093f99f28711..fac806e1b0ea 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SPU.h"
 #include "SPUFrameLowering.h"
+#include "SPU.h"
 #include "SPUInstrBuilder.h"
 #include "SPUInstrInfo.h"
 #include "llvm/Function.h"
@@ -47,7 +47,8 @@ bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
 
   return MFI->getStackSize() &&
-    (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+    (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+     MFI->hasVarSizedObjects());
 }
 
 
diff --git a/lib/Target/CellSPU/SPUFrameLowering.h b/lib/Target/CellSPU/SPUFrameLowering.h
index b837f2cf94e1..11c52818dd9c 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.h
+++ b/lib/Target/CellSPU/SPUFrameLowering.h
@@ -1,4 +1,4 @@
-//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========//
+//===-- SPUFrameLowering.h - SPU Frame Lowering stuff ----------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index a297d036f03e..c27caeae7d45 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -22,7 +22,6 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
@@ -91,8 +90,6 @@ namespace {
       short s_val = (short) i_val;
       return i_val == s_val;
     }
-
-    return false;
   }
 
   //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
@@ -216,7 +213,7 @@ namespace {
       HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
                                          CurDAG->getEntryNode(), CGPoolOffset,
                                          MachinePointerInfo::getConstantPool(),
-                                         false, false, Alignment));
+                                         false, false, false, Alignment));
       CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
       if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
         return N;
@@ -287,8 +284,8 @@ namespace {
         llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
 #else
         SelectAddrIdxOnly(Op, Op, Op0, Op1);
-#endif
         break;
+#endif
       }
 
       OutOps.push_back(Op0);
@@ -327,7 +324,7 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
     val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
     Base = CurDAG->getTargetConstant( val , MVT::i32);
     Index = Zero;
-    return true; break;
+    return true;
   case ISD::ConstantPool:
   case ISD::GlobalAddress:
     report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
@@ -579,22 +576,16 @@ SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
   switch( VT.SimpleTy ) {
   case MVT::i8:
     return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
-    break;
   case MVT::i16:
     return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
-    break;
   case MVT::i32:
     return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
-    break;
   case MVT::f32:
     return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
-    break;
   case MVT::i64:
     return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
-    break;
   case MVT::i128:
     return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
-    break;
   case MVT::v16i8:
   case MVT::v8i16:
   case MVT::v4i32:
@@ -602,11 +593,10 @@ SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
   case MVT::v2i64:
   case MVT::v2f64:
     return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
-    break;
   default:
     assert( false && "add a new case here" );
+    return SDValue();
   }
-  return SDValue();
 }
 
 //! Convert the operand from a target-independent to a target-specific node
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index ac33111f74ae..062374127e2f 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -27,19 +27,14 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include <map>
 
 using namespace llvm;
 
-// Used in getTargetNodeName() below
 namespace {
-  std::map<unsigned, const char *> node_names;
-
   // Byte offset of the preferred slot (counted from the MSB)
   int prefslotOffset(EVT VT) {
     int retval=0;
@@ -84,8 +79,9 @@ namespace {
                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
     std::pair<SDValue, SDValue> CallInfo =
             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
-                            0, TLI.getLibcallCallingConv(LC), false,
-                            /*isReturnValueUsed=*/true,
+                            0, TLI.getLibcallCallingConv(LC),
+                            /*isTailCall=*/false,
+                            /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                             Callee, Args, DAG, Op.getDebugLoc());
 
     return CallInfo.first;
@@ -296,12 +292,22 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128,  Expand);
 
   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8,    Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128,  Expand);
 
   // SPU has a version of select that implements (a&~c)|(b&c), just like
   // select ought to work:
@@ -424,6 +430,13 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
     setOperationAction(ISD::UDIV,    VT, Expand);
     setOperationAction(ISD::UREM,    VT, Expand);
 
+    // Expand all trunc stores
+    for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
+      MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j;
+    setTruncStoreAction(VT, TargetVT, Expand);
+    }
+
     // Custom lower build_vector, constant pool spills, insert and
     // extract vector elements:
     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
@@ -434,6 +447,8 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
   }
 
+  setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+
   setOperationAction(ISD::AND, MVT::v16i8, Custom);
   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
@@ -462,40 +477,34 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setSchedulingPreference(Sched::RegPressure);
 }
 
-const char *
-SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
-{
-  if (node_names.empty()) {
-    node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
-    node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
-    node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
-    node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
-    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
-    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
-    node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
-    node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
-    node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
-    node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
-    node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
-    node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
-    node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
-    node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
-    node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
-    node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
-    node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
-    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
-    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
-            "SPUISD::ROTBYTES_LEFT_BITS";
-    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
-    node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
-    node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
-    node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
-    node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
-  }
-
-  std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
-
-  return ((i != node_names.end()) ? i->second : 0);
+const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
+  case SPUISD::Hi: return "SPUISD::Hi";
+  case SPUISD::Lo: return "SPUISD::Lo";
+  case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
+  case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
+  case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
+  case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
+  case SPUISD::CALL: return "SPUISD::CALL";
+  case SPUISD::SHUFB: return "SPUISD::SHUFB";
+  case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
+  case SPUISD::CNTB: return "SPUISD::CNTB";
+  case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
+  case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
+  case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
+  case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
+  case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
+  case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
+  case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
+  case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
+  case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
+  case SPUISD::SELB: return "SPUISD::SELB";
+  case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
+  case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
+  case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -658,7 +667,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
   // Do the load as a i128 to allow possible shifting
   SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
                        lowMemPtr,
-                       LN->isVolatile(), LN->isNonTemporal(), 16);
+                       LN->isVolatile(), LN->isNonTemporal(), false, 16);
 
   // When the size is not greater than alignment we get all data with just
   // one load
@@ -695,7 +704,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
                                            basePtr,
                                            DAG.getConstant(16, PtrVT)),
                                highMemPtr,
-                               LN->isVolatile(), LN->isNonTemporal(), 16);
+                               LN->isVolatile(), LN->isNonTemporal(), false, 
+                               16);
 
     the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
                                                               high.getValue(1));
@@ -850,7 +860,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 
   // Load the lower part of the memory to which to store.
   SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
-                          lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
+                          lowMemPtr, SN->isVolatile(), SN->isNonTemporal(),
+                            false, 16);
 
   // if we don't need to store over the 16 byte boundary, one store suffices
   if (alignment >= StVT.getSizeInBits()/8) {
@@ -950,7 +961,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
                                DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
                                            DAG.getConstant( 16, PtrVT)),
                                highMemPtr,
-                               SN->isVolatile(), SN->isNonTemporal(), 16);
+                               SN->isVolatile(), SN->isNonTemporal(), 
+                               false, 16);
     the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
                                                               hi.getValue(1));
 
@@ -1017,7 +1029,6 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 
   llvm_unreachable("LowerConstantPool: Relocation model other than static"
                    " not supported.");
-  return SDValue();
 }
 
 //! Alternate entry point for generating the address of a constant pool entry
@@ -1048,7 +1059,6 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 
   llvm_unreachable("LowerJumpTable: Relocation model other than static"
                    " not supported.");
-  return SDValue();
 }
 
 static SDValue
@@ -1076,8 +1086,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
                       "not supported.");
     /*NOTREACHED*/
   }
-
-  return SDValue();
 }
 
 //! Custom lower double precision floating point constants
@@ -1185,7 +1193,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
-                           false, false, 0);
+                           false, false, false, 0);
       ArgOffset += StackSlotSize;
     }
 
@@ -1198,7 +1206,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
   if (isVarArg) {
     // FIXME: we should be able to query the argument registers from
     //        tablegen generated code.
-    static const unsigned ArgRegs[] = {
+    static const uint16_t ArgRegs[] = {
       SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
       SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
       SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
@@ -1212,7 +1220,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
       SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
     };
     // size of ArgRegs array
-    unsigned NumArgRegs = 77;
+    const unsigned NumArgRegs = 77;
 
     // We will spill (79-3)+1 registers to the stack
     SmallVector<SDValue, 79-3+1> MemOps;
@@ -1257,7 +1265,7 @@ static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
 SDValue
 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              CallingConv::ID CallConv, bool isVarArg,
-                             bool &isTailCall,
+                             bool doesNotRet, bool &isTailCall,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<SDValue> &OutVals,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1675,7 +1683,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     SDValue T = DAG.getConstant(Value32, MVT::i32);
     return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
-    break;
   }
   case MVT::v2f64: {
     uint64_t f64val = uint64_t(SplatBits);
@@ -1685,7 +1692,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     SDValue T = DAG.getConstant(f64val, MVT::i64);
     return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
-    break;
   }
   case MVT::v16i8: {
    // 8-bit constants have to be expanded to 16-bits
@@ -1712,8 +1718,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
   }
   }
-
-  return SDValue();
 }
 
 /*!
@@ -1743,9 +1747,11 @@ SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
 
     // Both upper and lower are special, lower to a constant pool load:
     if (lower_special && upper_special) {
-      SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
-      return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
-                         SplatValCN, SplatValCN);
+      SDValue UpperVal = DAG.getConstant(upper, MVT::i32);
+      SDValue LowerVal = DAG.getConstant(lower, MVT::i32);
+      SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                         UpperVal, LowerVal, UpperVal, LowerVal);
+      return DAG.getNode(ISD::BITCAST, dl, OpVT, BV);
     }
 
     SDValue LO32;
@@ -1985,8 +1991,6 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
     }
   }
-
-  return SDValue();
 }
 
 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
@@ -2020,8 +2024,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
     int elt_byte = EltNo * VT.getSizeInBits() / 8;
 
     switch (VT.getSimpleVT().SimpleTy) {
-    default:
-      assert(false && "Invalid value type!");
+    default: llvm_unreachable("Invalid value type!");
     case MVT::i8: {
       prefslot_begin = prefslot_end = 3;
       break;
@@ -2199,8 +2202,6 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
   switch (Opc) {
   default:
     llvm_unreachable("Unhandled i8 math operator");
-    /*NOTREACHED*/
-    break;
   case ISD::ADD: {
     // 8-bit addition: Promote the arguments up to 16-bits and truncate
     // the result:
@@ -2285,11 +2286,8 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
-    break;
   }
   }
-
-  return SDValue();
 }
 
 //! Lower byte immediate operations for v16i8 vectors:
@@ -2354,8 +2352,7 @@ static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
 
   switch (VT.getSimpleVT().SimpleTy) {
-  default:
-    assert(false && "Invalid value type!");
+  default: llvm_unreachable("Invalid value type!");
   case MVT::i8: {
     SDValue N = Op.getOperand(0);
     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
@@ -3161,7 +3158,6 @@ SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
 //! Compute used/known bits for a SPU operand
 void
 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                  const APInt &Mask,
                                                   APInt &KnownZero,
                                                   APInt &KnownOne,
                                                   const SelectionDAG &DAG,
@@ -3227,7 +3223,7 @@ bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
   return (V > -(1 << 18) && V < (1 << 18) - 1);
 }
 
-bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
   return false;
 }
 
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index aa4a1687278a..e3db7b2f1fbc 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -15,9 +15,9 @@
 #ifndef SPU_ISELLOWERING_H
 #define SPU_ISELLOWERING_H
 
+#include "SPU.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "SPU.h"
 
 namespace llvm {
   namespace SPUISD {
@@ -121,7 +121,6 @@ namespace llvm {
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-                                                const APInt &Mask,
                                                 APInt &KnownZero,
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
@@ -162,7 +161,7 @@ namespace llvm {
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg,
-                bool &isTailCall,
+                bool doesNotRet, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<SDValue> &OutVals,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/CellSPU/SPUInstrBuilder.h b/lib/Target/CellSPU/SPUInstrBuilder.h
index 5e268f8767c2..b495537fc2c8 100644
--- a/lib/Target/CellSPU/SPUInstrBuilder.h
+++ b/lib/Target/CellSPU/SPUInstrBuilder.h
@@ -1,4 +1,4 @@
-//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==//
+//===-- SPUInstrBuilder.h - Aides for building Cell SPU insts ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td
index bdbe2552dcdd..cd3f42214345 100644
--- a/lib/Target/CellSPU/SPUInstrFormats.td
+++ b/lib/Target/CellSPU/SPUInstrFormats.td
@@ -1,10 +1,10 @@
-//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===//
-// 
+//===-- SPUInstrFormats.td - Cell SPU Instruction Formats --*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 007bc0e02c7e..759923d7bb42 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- SPUInstrInfo.cpp - Cell SPU Instruction Information ----------------===//
+//===-- SPUInstrInfo.cpp - Cell SPU Instruction Information ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index bc1ba71f7a45..85e5821aefa1 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -1,4 +1,4 @@
-//===- SPUInstrInfo.h - Cell SPU Instruction Information --------*- C++ -*-===//
+//===-- SPUInstrInfo.h - Cell SPU Instruction Information -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,8 +15,8 @@
 #define SPU_INSTRUCTIONINFO_H
 
 #include "SPU.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "SPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "SPUGenInstrInfo.inc"
diff --git a/lib/Target/CellSPU/SPUMachineFunction.cpp b/lib/Target/CellSPU/SPUMachineFunction.cpp
new file mode 100644
index 000000000000..3e948d071d63
--- /dev/null
+++ b/lib/Target/CellSPU/SPUMachineFunction.cpp
@@ -0,0 +1,14 @@
+//==-- SPUMachineFunctionInfo.cpp - Private data used for CellSPU ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMachineFunction.h"
+
+using namespace llvm;
+
+void SPUFunctionInfo::anchor() { }
diff --git a/lib/Target/CellSPU/SPUMachineFunction.h b/lib/Target/CellSPU/SPUMachineFunction.h
index 3ef3ccbcaaee..399684bb0887 100644
--- a/lib/Target/CellSPU/SPUMachineFunction.h
+++ b/lib/Target/CellSPU/SPUMachineFunction.h
@@ -21,7 +21,8 @@ namespace llvm {
 /// SPUFunctionInfo - Cell SPU target-specific information for each
 /// MachineFunction
 class SPUFunctionInfo : public MachineFunctionInfo {
-private:
+  virtual void anchor();
+
   /// UsesLR - Indicates whether LR is used in the current function.
   ///
   bool UsesLR;
diff --git a/lib/Target/CellSPU/SPUMathInstr.td b/lib/Target/CellSPU/SPUMathInstr.td
index ed7129e33291..9a5c3976afbe 100644
--- a/lib/Target/CellSPU/SPUMathInstr.td
+++ b/lib/Target/CellSPU/SPUMathInstr.td
@@ -1,4 +1,4 @@
-//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
+//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===//
 //
 //                     Cell SPU math operations
 //
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index a6e621f36b35..a47e9ef0167c 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -1,4 +1,4 @@
-//===- SPUNodes.td - Specialized SelectionDAG nodes used for CellSPU ------===//
+//=== SPUNodes.td - Specialized SelectionDAG nodes by CellSPU -*- tablegen -*-//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/CellSPU/SPUNopFiller.cpp b/lib/Target/CellSPU/SPUNopFiller.cpp
index e2bd2d7f4100..7c58041e3b84 100644
--- a/lib/Target/CellSPU/SPUNopFiller.cpp
+++ b/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -1,4 +1,4 @@
-//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===//
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index 96cde51709ec..6f8deef5530f 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -1,10 +1,10 @@
-//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===//
-// 
+//===-- SPUOperands.td - Cell SPU Instruction Operands -----*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 // Cell SPU Instruction Operands:
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index bbac6fd0be54..1b2da5f50c81 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- SPURegisterInfo.cpp - Cell SPU Register Information ----------------===//
+//===-- SPURegisterInfo.cpp - Cell SPU Register Information ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "reginfo"
-#include "SPU.h"
 #include "SPURegisterInfo.h"
+#include "SPU.h"
 #include "SPUInstrBuilder.h"
 #include "SPUSubtarget.h"
 #include "SPUMachineFunction.h"
@@ -197,11 +197,11 @@ SPURegisterInfo::getPointerRegClass(unsigned Kind) const {
   return &SPU::R32CRegClass;
 }
 
-const unsigned *
+const uint16_t *
 SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
 {
   // Cell ABI calling convention
-  static const unsigned SPU_CalleeSaveRegs[] = {
+  static const uint16_t SPU_CalleeSaveRegs[] = {
     SPU::R80, SPU::R81, SPU::R82, SPU::R83,
     SPU::R84, SPU::R85, SPU::R86, SPU::R87,
     SPU::R88, SPU::R89, SPU::R90, SPU::R91,
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index b7818a47abd7..e5ab22422502 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -1,4 +1,4 @@
-//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==//
+//===-- SPURegisterInfo.h - Cell SPU Register Information Impl --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -57,7 +57,7 @@ namespace llvm {
     }
 
     //! Return the array of callee-saved registers
-    virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
+    virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
 
     //! Allow for scavenging, so we can get scratch registers when needed.
     virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
index e16f51ff0e02..f27b042edd63 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ b/lib/Target/CellSPU/SPURegisterInfo.td
@@ -1,10 +1,10 @@
-//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===//
-// 
+//===-- SPURegisterInfo.td - The Cell SPU Register File ----*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 //
diff --git a/lib/Target/CellSPU/SPUSchedule.td b/lib/Target/CellSPU/SPUSchedule.td
index 9cd3c2327df0..9ccd0844e48e 100644
--- a/lib/Target/CellSPU/SPUSchedule.td
+++ b/lib/Target/CellSPU/SPUSchedule.td
@@ -1,10 +1,10 @@
-//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===//
-// 
+//===-- SPUSchedule.td - Cell Scheduling Definitions -------*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
index 43335abf0ac2..eec2d250be7f 100644
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ b/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===//
+//===-- SPUSubtarget.cpp - STI Cell SPU Subtarget Information -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,7 +15,6 @@
 #include "SPU.h"
 #include "SPURegisterInfo.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/SmallVector.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 93a7f6e36501..21f6b25bf256 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -11,17 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SPU.h"
 #include "SPUTargetMachine.h"
+#include "SPU.h"
 #include "llvm/PassManager.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
 
-extern "C" void LLVMInitializeCellSPUTarget() { 
+extern "C" void LLVMInitializeCellSPUTarget() {
   // Register the target.
   RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
 }
@@ -34,8 +33,10 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
 
 SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
                                    StringRef CPU, StringRef FS,
-                                   Reloc::Model RM, CodeModel::Model CM)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+                                   const TargetOptions &Options,
+                                   Reloc::Model RM, CodeModel::Model CM,
+                                   CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS),
     DataLayout(Subtarget.getTargetDataString()),
     InstrInfo(*this),
@@ -49,16 +50,34 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
 // Pass Pipeline Configuration
 //===----------------------------------------------------------------------===//
 
-bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
-                                       CodeGenOpt::Level OptLevel) {
+namespace {
+/// SPU Code Generator Pass Configuration Options.
+class SPUPassConfig : public TargetPassConfig {
+public:
+  SPUPassConfig(SPUTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  SPUTargetMachine &getSPUTargetMachine() const {
+    return getTM<SPUTargetMachine>();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new SPUPassConfig(this, PM);
+}
+
+bool SPUPassConfig::addInstSelector() {
   // Install an instruction selector.
-  PM.add(createSPUISelDag(*this));
+  PM.add(createSPUISelDag(getSPUTargetMachine()));
   return false;
 }
 
 // passes to run just before printing the assembly
-bool SPUTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+bool SPUPassConfig::addPreEmitPass() {
   // load the TCE instruction scheduler, if available via
   // loaded plugins
   typedef llvm::FunctionPass* (*BuilderFunc)(const char*);
@@ -69,6 +88,6 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
       PM.add(schedulerCreator("cellspu"));
 
   //align instructions with nops/lnops for dual issue
-  PM.add(createSPUNopFillerPass(*this));
+  PM.add(createSPUNopFillerPass(getSPUTargetMachine()));
   return true;
 }
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index fffe77cabba3..3e5d38c919c1 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=//
+//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,9 +23,6 @@
 #include "llvm/Target/TargetData.h"
 
 namespace llvm {
-class PassManager;
-class GlobalValue;
-class TargetFrameLowering;
 
 /// SPUTargetMachine
 ///
@@ -39,8 +36,9 @@ class SPUTargetMachine : public LLVMTargetMachine {
   InstrItineraryData  InstrItins;
 public:
   SPUTargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS,
-                   Reloc::Model RM, CodeModel::Model CM);
+                   StringRef CPU, StringRef FS, const TargetOptions &Options,
+                   Reloc::Model RM, CodeModel::Model CM,
+                   CodeGenOpt::Level OL);
 
   /// Return the subtarget implementation object
   virtual const SPUSubtarget     *getSubtargetImpl() const {
@@ -60,7 +58,7 @@ public:
     return NULL;
   }
 
-  virtual const SPUTargetLowering *getTargetLowering() const { 
+  virtual const SPUTargetLowering *getTargetLowering() const {
    return &TLInfo;
   }
 
@@ -71,7 +69,7 @@ public:
   virtual const SPURegisterInfo *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
-  
+
   virtual const TargetData *getTargetData() const {
     return &DataLayout;
   }
@@ -79,11 +77,9 @@ public:
   virtual const InstrItineraryData *getInstrItineraryData() const {
     return &InstrItins;
   }
-  
+
   // Pass Pipeline Configuration
-  virtual bool addInstSelector(PassManagerBase &PM,
-                               CodeGenOpt::Level OptLevel);
-  virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);	
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
index 3f2d6b09adad..6a98f95db664 100644
--- a/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMCellSPUInfo
   CellSPUTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMCellSPUInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMCellSPUInfo CellSPUCommonTableGen)
diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..6937e705ff7f
--- /dev/null
+++ b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/CellSPU/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = CellSPUInfo
+parent = CellSPU
+required_libraries = MC Support Target
+add_to_library_groups = CellSPU
diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt
index 95b6058243dc..515e1dd7e39f 100644
--- a/lib/Target/CppBackend/CMakeLists.txt
+++ b/lib/Target/CppBackend/CMakeLists.txt
@@ -1,12 +1,5 @@
-add_llvm_target(CppBackend
+add_llvm_target(CppBackendCodeGen
   CPPBackend.cpp
   )
 
-add_llvm_library_dependencies(LLVMCppBackend
-  LLVMCore
-  LLVMCppBackendInfo
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(TargetInfo)
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 394ea2bfea02..69f0ff87eda0 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -33,8 +33,9 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 #include <algorithm>
-#include <set>
+#include <cstdio>
 #include <map>
+#include <set>
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -189,13 +190,24 @@ static std::string getTypePrefix(Type *Ty) {
   case Type::VectorTyID:   return "packed_";
   default:                 return "other_";
   }
-  return "unknown_";
 }
 
 void CppWriter::error(const std::string& msg) {
   report_fatal_error(msg);
 }
 
+static inline std::string ftostr(const APFloat& V) {
+  std::string Buf;
+  if (&V.getSemantics() == &APFloat::IEEEdouble) {
+    raw_string_ostream(Buf) << V.convertToDouble();
+    return Buf;
+  } else if (&V.getSemantics() == &APFloat::IEEEsingle) {
+    raw_string_ostream(Buf) << (double)V.convertToFloat();
+    return Buf;
+  }
+  return "<unknown format in ftostr>"; // error
+}
+
 // printCFP - Print a floating point constant .. very carefully :)
 // This makes sure that conversion to/from floating yields the same binary
 // result so that we don't lose precision.
@@ -301,7 +313,6 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
 
 void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
   switch (VisType) {
-  default: llvm_unreachable("Unknown GVar visibility");
   case GlobalValue::DefaultVisibility:
     Out << "GlobalValue::DefaultVisibility";
     break;
@@ -443,7 +454,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
     for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
       unsigned index = PAL.getSlot(i).Index;
       Attributes attrs = PAL.getSlot(i).Attrs;
-      Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
+      Out << "PAWI.Index = " << index << "U; PAWI.Attrs = Attribute::None ";
 #define HANDLE_ATTR(X)                 \
       if (attrs & Attribute::X)      \
         Out << " | Attribute::" #X;  \
@@ -678,11 +689,6 @@ void CppWriter::printConstant(const Constant *CV) {
   std::string constName(getCppName(CV));
   std::string typeName(getCppName(CV->getType()));
 
-  if (isa<GlobalValue>(CV)) {
-    // Skip variables and functions, we emit them elsewhere
-    return;
-  }
-
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
     std::string constValue = CI->getValue().toString(10, true);
     Out << "ConstantInt* " << constName
@@ -700,38 +706,17 @@ void CppWriter::printConstant(const Constant *CV) {
     printCFP(CFP);
     Out << ";";
   } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
-    if (CA->isString() &&
-        CA->getType()->getElementType() ==
-            Type::getInt8Ty(CA->getContext())) {
-      Out << "Constant* " << constName <<
-             " = ConstantArray::get(mod->getContext(), \"";
-      std::string tmp = CA->getAsString();
-      bool nullTerminate = false;
-      if (tmp[tmp.length()-1] == 0) {
-        tmp.erase(tmp.length()-1);
-        nullTerminate = true;
-      }
-      printEscapedString(tmp);
-      // Determine if we want null termination or not.
-      if (nullTerminate)
-        Out << "\", true"; // Indicate that the null terminator should be
-                           // added.
-      else
-        Out << "\", false";// No null terminator
-      Out << ");";
-    } else {
-      Out << "std::vector<Constant*> " << constName << "_elems;";
+    Out << "std::vector<Constant*> " << constName << "_elems;";
+    nl(Out);
+    unsigned N = CA->getNumOperands();
+    for (unsigned i = 0; i < N; ++i) {
+      printConstant(CA->getOperand(i)); // recurse to print operands
+      Out << constName << "_elems.push_back("
+          << getCppName(CA->getOperand(i)) << ");";
       nl(Out);
-      unsigned N = CA->getNumOperands();
-      for (unsigned i = 0; i < N; ++i) {
-        printConstant(CA->getOperand(i)); // recurse to print operands
-        Out << constName << "_elems.push_back("
-            << getCppName(CA->getOperand(i)) << ");";
-        nl(Out);
-      }
-      Out << "Constant* " << constName << " = ConstantArray::get("
-          << typeName << ", " << constName << "_elems);";
     }
+    Out << "Constant* " << constName << " = ConstantArray::get("
+        << typeName << ", " << constName << "_elems);";
   } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
     Out << "std::vector<Constant*> " << constName << "_fields;";
     nl(Out);
@@ -744,14 +729,14 @@ void CppWriter::printConstant(const Constant *CV) {
     }
     Out << "Constant* " << constName << " = ConstantStruct::get("
         << typeName << ", " << constName << "_fields);";
-  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+  } else if (const ConstantVector *CVec = dyn_cast<ConstantVector>(CV)) {
     Out << "std::vector<Constant*> " << constName << "_elems;";
     nl(Out);
-    unsigned N = CP->getNumOperands();
+    unsigned N = CVec->getNumOperands();
     for (unsigned i = 0; i < N; ++i) {
-      printConstant(CP->getOperand(i));
+      printConstant(CVec->getOperand(i));
       Out << constName << "_elems.push_back("
-          << getCppName(CP->getOperand(i)) << ");";
+          << getCppName(CVec->getOperand(i)) << ");";
       nl(Out);
     }
     Out << "Constant* " << constName << " = ConstantVector::get("
@@ -759,6 +744,41 @@ void CppWriter::printConstant(const Constant *CV) {
   } else if (isa<UndefValue>(CV)) {
     Out << "UndefValue* " << constName << " = UndefValue::get("
         << typeName << ");";
+  } else if (const ConstantDataSequential *CDS =
+               dyn_cast<ConstantDataSequential>(CV)) {
+    if (CDS->isString()) {
+      Out << "Constant *" << constName <<
+      " = ConstantDataArray::getString(mod->getContext(), \"";
+      StringRef Str = CDS->getAsString();
+      bool nullTerminate = false;
+      if (Str.back() == 0) {
+        Str = Str.drop_back();
+        nullTerminate = true;
+      }
+      printEscapedString(Str);
+      // Determine if we want null termination or not.
+      if (nullTerminate)
+        Out << "\", true);";
+      else
+        Out << "\", false);";// No null terminator
+    } else {
+      // TODO: Could generate more efficient code generating CDS calls instead.
+      Out << "std::vector<Constant*> " << constName << "_elems;";
+      nl(Out);
+      for (unsigned i = 0; i != CDS->getNumElements(); ++i) {
+        Constant *Elt = CDS->getElementAsConstant(i);
+        printConstant(Elt);
+        Out << constName << "_elems.push_back(" << getCppName(Elt) << ");";
+        nl(Out);
+      }
+      Out << "Constant* " << constName;
+      
+      if (isa<ArrayType>(CDS->getType()))
+        Out << " = ConstantArray::get(";
+      else
+        Out << " = ConstantVector::get(";
+      Out << typeName << ", " << constName << "_elems);";
+    }
   } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     if (CE->getOpcode() == Instruction::GetElementPtr) {
       Out << "std::vector<Constant*> " << constName << "_indices;";
@@ -1083,10 +1103,10 @@ void CppWriter::printInstruction(const Instruction *I,
         << getOpName(SI->getDefaultDest()) << ", "
         << SI->getNumCases() << ", " << bbname << ");";
     nl(Out);
-    unsigned NumCases = SI->getNumCases();
-    for (unsigned i = 1; i < NumCases; ++i) {
-      const ConstantInt* CaseVal = SI->getCaseValue(i);
-      const BasicBlock* BB = SI->getSuccessor(i);
+    for (SwitchInst::ConstCaseIt i = SI->case_begin(), e = SI->case_end();
+         i != e; ++i) {
+      const ConstantInt* CaseVal = i.getCaseValue();
+      const BasicBlock *BB = i.getCaseSuccessor();
       Out << iName << "->addCase("
           << getOpName(CaseVal) << ", "
           << getOpName(BB) << ");";
@@ -1135,11 +1155,6 @@ void CppWriter::printInstruction(const Instruction *I,
     nl(Out);
     break;
   }
-  case Instruction::Unwind: {
-    Out << "new UnwindInst("
-        << bbname << ");";
-    break;
-  }
   case Instruction::Unreachable: {
     Out << "new UnreachableInst("
         << "mod->getContext(), "
@@ -1354,7 +1369,7 @@ void CppWriter::printInstruction(const Instruction *I,
     case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
     case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
     case Instruction::BitCast:  Out << "BitCastInst"; break;
-    default: assert(0 && "Unreachable"); break;
+    default: llvm_unreachable("Unreachable");
     }
     Out << "(" << opNames[0] << ", "
         << getCppName(cst->getType()) << ", \"";
@@ -2049,8 +2064,6 @@ bool CppWriter::runOnModule(Module &M) {
       fname = "makeLLVMType";
     printType(fname,tgtname);
     break;
-   default:
-    error("Invalid generation option");
   }
 
   return false;
@@ -2065,7 +2078,6 @@ char CppWriter::ID = 0;
 bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                            formatted_raw_ostream &o,
                                            CodeGenFileType FileType,
-                                           CodeGenOpt::Level OptLevel,
                                            bool DisableVerify) {
   if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
   PM.add(new CppWriter(o));
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 287e53727139..92bca6c3c770 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -23,14 +23,14 @@ class formatted_raw_ostream;
 
 struct CPPTargetMachine : public TargetMachine {
   CPPTargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS,
-                   Reloc::Model RM, CodeModel::Model CM)
-    : TargetMachine(T, TT, CPU, FS) {}
+                   StringRef CPU, StringRef FS, const TargetOptions &Options,
+                   Reloc::Model RM, CodeModel::Model CM,
+                   CodeGenOpt::Level OL)
+    : TargetMachine(T, TT, CPU, FS, Options) {}
 
   virtual bool addPassesToEmitFile(PassManagerBase &PM,
                                    formatted_raw_ostream &Out,
                                    CodeGenFileType FileType,
-                                   CodeGenOpt::Level OptLevel,
                                    bool DisableVerify);
 
   virtual const TargetData *getTargetData() const { return 0; }
diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt
new file mode 100644
index 000000000000..122b5e7502fc
--- /dev/null
+++ b/lib/Target/CppBackend/LLVMBuild.txt
@@ -0,0 +1,31 @@
+;===- ./lib/Target/CppBackend/LLVMBuild.txt --------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = TargetInfo
+
+[component_0]
+type = TargetGroup
+name = CppBackend
+parent = Target
+
+[component_1]
+type = Library
+name = CppBackendCodeGen
+parent = CppBackend
+required_libraries = Core CppBackendInfo Support Target
+add_to_library_groups = CppBackend
diff --git a/lib/Target/CppBackend/Makefile b/lib/Target/CppBackend/Makefile
index d75f4e872265..efc7463fda3d 100644
--- a/lib/Target/CppBackend/Makefile
+++ b/lib/Target/CppBackend/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../../..
-LIBRARYNAME = LLVMCppBackend
+LIBRARYNAME = LLVMCppBackendCodeGen
 DIRS = TargetInfo
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
index 7165d8fdf2cd..f82d72e378cb 100644
--- a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
+++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
@@ -3,8 +3,3 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMCppBackendInfo
   CppBackendTargetInfo.cpp
   )
-
-add_llvm_library_dependencies(LLVMCppBackendInfo
-  LLVMMC
-  LLVMTarget
-  )
diff --git a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..d4dfc3ef0406
--- /dev/null
+++ b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/CppBackend/TargetInfo/LLVMBuild.txt ---------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = CppBackendInfo
+parent = CppBackend
+required_libraries = MC Support Target
+add_to_library_groups = CppBackend
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
new file mode 100644
index 000000000000..8a49cd8105a7
--- /dev/null
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -0,0 +1,37 @@
+set(LLVM_TARGET_DEFINITIONS Hexagon.td)
+
+tablegen(LLVM HexagonGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM HexagonGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer)
+add_public_tablegen_target(HexagonCommonTableGen)
+
+add_llvm_target(HexagonCodeGen
+  HexagonAsmPrinter.cpp
+  HexagonCFGOptimizer.cpp
+  HexagonCallingConvLower.cpp
+  HexagonExpandPredSpillCode.cpp
+  HexagonFrameLowering.cpp
+  HexagonHardwareLoops.cpp
+  HexagonISelDAGToDAG.cpp
+  HexagonISelLowering.cpp
+  HexagonInstrInfo.cpp
+  HexagonMCInstLower.cpp
+  HexagonPeephole.cpp
+  HexagonRegisterInfo.cpp
+  HexagonRemoveSZExtArgs.cpp
+  HexagonSelectionDAGInfo.cpp
+  HexagonSplitTFRCondSets.cpp
+  HexagonSubtarget.cpp
+  HexagonTargetMachine.cpp
+  HexagonTargetObjectFile.cpp
+  HexagonVLIWPacketizer.cpp
+)
+
+add_subdirectory(TargetInfo)
+add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
+
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
new file mode 100644
index 000000000000..43858b9624f1
--- /dev/null
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -0,0 +1,74 @@
+//=-- Hexagon.h - Top-level interface for Hexagon representation --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Hexagon back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_Hexagon_H
+#define TARGET_Hexagon_H
+
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class FunctionPass;
+  class TargetMachine;
+  class MachineInstr;
+  class MCInst;
+  class HexagonAsmPrinter;
+  class HexagonTargetMachine;
+  class raw_ostream;
+
+  FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM);
+  FunctionPass *createHexagonDelaySlotFillerPass(TargetMachine &TM);
+  FunctionPass *createHexagonFPMoverPass(TargetMachine &TM);
+  FunctionPass *createHexagonRemoveExtendOps(HexagonTargetMachine &TM);
+  FunctionPass *createHexagonCFGOptimizer(HexagonTargetMachine &TM);
+
+  FunctionPass *createHexagonSplitTFRCondSets(HexagonTargetMachine &TM);
+  FunctionPass *createHexagonExpandPredSpillCode(HexagonTargetMachine &TM);
+
+  FunctionPass *createHexagonHardwareLoops();
+  FunctionPass *createHexagonPeephole();
+  FunctionPass *createHexagonFixupHwLoops();
+  FunctionPass *createHexagonPacketizer();
+
+/* TODO: object output.
+  MCCodeEmitter *createHexagonMCCodeEmitter(const Target &,
+                                            TargetMachine &TM,
+                                            MCContext &Ctx);
+*/
+/* TODO: assembler input.
+  TargetAsmBackend *createHexagonAsmBackend(const Target &, const std::string &);
+*/
+  void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI,
+                        HexagonAsmPrinter &AP);
+} // end namespace llvm;
+
+#define Hexagon_POINTER_SIZE 4
+
+#define Hexagon_PointerSize (Hexagon_POINTER_SIZE)
+#define Hexagon_PointerSize_Bits (Hexagon_POINTER_SIZE * 8)
+#define Hexagon_WordSize Hexagon_PointerSize
+#define Hexagon_WordSize_Bits Hexagon_PointerSize_Bits
+
+// allocframe saves LR and FP on stack before allocating
+// a new stack frame. This takes 8 bytes.
+#define HEXAGON_LRFP_SIZE 8
+
+// Normal instruction size (in bytes).
+#define HEXAGON_INSTR_SIZE 4
+
+// Maximum number of words in a packet (in instructions).
+#define HEXAGON_PACKET_SIZE 4
+
+#endif
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
new file mode 100644
index 000000000000..4a50d1609308
--- /dev/null
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -0,0 +1,72 @@
+//===-- Hexagon.td - Describe the Hexagon Target Machine --*- tablegen -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Subtarget features.
+//===----------------------------------------------------------------------===//
+
+// Hexagon Archtectures
+def ArchV2       : SubtargetFeature<"v2", "HexagonArchVersion", "V2",
+                                    "Hexagon v2">;
+def ArchV3       : SubtargetFeature<"v3", "HexagonArchVersion", "V3",
+                                    "Hexagon v3">;
+def ArchV4       : SubtargetFeature<"v4", "HexagonArchVersion", "V4",
+                                    "Hexagon v4">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+include "HexagonSchedule.td"
+include "HexagonRegisterInfo.td"
+include "HexagonCallingConv.td"
+include "HexagonInstrInfo.td"
+include "HexagonIntrinsics.td"
+include "HexagonIntrinsicsDerived.td"
+
+def HexagonInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Hexagon processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, ProcessorItineraries Itin,
+           list<SubtargetFeature> Features>
+ : Processor<Name, Itin, Features>;
+
+def : Proc<"hexagonv2", HexagonItineraries,   [ArchV2]>;
+def : Proc<"hexagonv3", HexagonItineraries,   [ArchV2, ArchV3]>;
+def : Proc<"hexagonv4", HexagonItinerariesV4, [ArchV2, ArchV3, ArchV4]>;
+
+// Hexagon Uses the MC printer for assembler output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def HexagonAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Hexagon : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = HexagonInstrInfo;
+
+  let AssemblyWriters = [HexagonAsmWriter];
+}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
new file mode 100644
index 000000000000..2cc8b814a022
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -0,0 +1,313 @@
+//===-- HexagonAsmPrinter.cpp - Print machine instrs to Hexagon assembly --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Hexagon assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonMCInst.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "InstPrinter/HexagonInstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <map>
+
+using namespace llvm;
+
+static cl::opt<bool> AlignCalls(
+         "hexagon-align-calls", cl::Hidden, cl::init(true),
+          cl::desc("Insert falign after call instruction for Hexagon target"));
+
+void HexagonAsmPrinter::EmitAlignment(unsigned NumBits,
+                                      const GlobalValue *GV) const {
+  // For basic block level alignment, use ".falign".
+  if (!GV) {
+    OutStreamer.EmitRawText(StringRef("\t.falign"));
+    return;
+  }
+
+  AsmPrinter::EmitAlignment(NumBits, GV);
+}
+
+void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+
+  switch (MO.getType()) {
+  default: llvm_unreachable("<unknown operand type>");
+  case MachineOperand::MO_Register:
+    O << HexagonInstPrinter::getRegisterName(MO.getReg());
+    return;
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << *GetJTISymbol(MO.getIndex());
+    // FIXME: PIC relocation model.
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << *GetCPISymbol(MO.getIndex());
+    return;
+  case MachineOperand::MO_ExternalSymbol:
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    // Computing the address of a global symbol, not calling it.
+    O << *Mang->getSymbol(MO.getGlobal());
+    printOffset(MO.getOffset(), O);
+    return;
+  }
+}
+
+//
+// isBlockOnlyReachableByFallthrough - We need to override this since the
+// default AsmPrinter does not print labels for any basic block that
+// is only reachable by a fall through. That works for all cases except
+// for the case in which the basic block is reachable by a fall through but
+// through an indirect from a jump table. In this case, the jump table
+// will contain a label not defined by AsmPrinter.
+//
+bool HexagonAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  if (MBB->hasAddressTaken()) {
+    return false;
+  }
+  return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                        unsigned AsmVariant,
+                                        const char *ExtraCode,
+                                        raw_ostream &OS) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'c': // Don't print "$" before a global var name or constant.
+      // Hexagon never has a prefix.
+      printOperand(MI, OpNo, OS);
+      return false;
+    case 'L': // Write second word of DImode reference.
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isReg() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isReg())
+        return true;
+      ++OpNo;   // Return the high-part.
+      break;
+    case 'I':
+      // Write 'i' if an integer constant, otherwise nothing.  Used to print
+      // addi vs add, etc.
+      if (MI->getOperand(OpNo).isImm())
+        OS << "i";
+      return false;
+    }
+  }
+
+  printOperand(MI, OpNo, OS);
+  return false;
+}
+
+bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNo, unsigned AsmVariant,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  const MachineOperand &Base  = MI->getOperand(OpNo);
+  const MachineOperand &Offset = MI->getOperand(OpNo+1);
+
+  if (Base.isReg())
+    printOperand(MI, OpNo, O);
+  else
+    llvm_unreachable("Unimplemented");
+
+  if (Offset.isImm()) {
+    if (Offset.getImm())
+      O << " + #" << Offset.getImm();
+  }
+  else
+    llvm_unreachable("Unimplemented");
+
+  return false;
+}
+
+void HexagonAsmPrinter::printPredicateOperand(const MachineInstr *MI,
+                                              unsigned OpNo,
+                                              raw_ostream &O) {
+  llvm_unreachable("Unimplemented");
+}
+
+
+/// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
+/// the current output stream.
+///
+void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  if (MI->isBundle()) {
+    std::vector<const MachineInstr*> BundleMIs;
+
+    const MachineBasicBlock *MBB = MI->getParent();
+    MachineBasicBlock::const_instr_iterator MII = MI;
+    ++MII;
+    unsigned int IgnoreCount = 0;
+    while (MII != MBB->end() && MII->isInsideBundle()) {
+      const MachineInstr *MInst = MII;
+      if (MInst->getOpcode() == TargetOpcode::DBG_VALUE ||
+          MInst->getOpcode() == TargetOpcode::IMPLICIT_DEF) {
+          IgnoreCount++;
+          ++MII;
+          continue;
+      }
+      //BundleMIs.push_back(&*MII);
+      BundleMIs.push_back(MInst);
+      ++MII;
+    }
+    unsigned Size = BundleMIs.size();
+    assert((Size+IgnoreCount) == MI->getBundleSize() && "Corrupt Bundle!");
+    for (unsigned Index = 0; Index < Size; Index++) {
+      HexagonMCInst MCI;
+      MCI.setStartPacket(Index == 0);
+      MCI.setEndPacket(Index == (Size-1));
+
+      HexagonLowerToMC(BundleMIs[Index], MCI, *this);
+      OutStreamer.EmitInstruction(MCI);
+    }
+  }
+  else {
+    HexagonMCInst MCI;
+    if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+      MCI.setStartPacket(true);
+      MCI.setEndPacket(true);
+    }
+    HexagonLowerToMC(MI, MCI, *this);
+    OutStreamer.EmitInstruction(MCI);
+  }
+
+  return;
+}
+
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \n or \0.
+// static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+//   for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+//        Name != E; ++Name)
+//     if (isprint(*Name))
+//       OS << *Name;
+// }
+
+
+void HexagonAsmPrinter::printAddrModeBasePlusOffset(const MachineInstr *MI,
+                                                    int OpNo, raw_ostream &O) {
+  const MachineOperand &MO1 = MI->getOperand(OpNo);
+  const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+  O << HexagonInstPrinter::getRegisterName(MO1.getReg())
+    << " + #"
+    << MO2.getImm();
+}
+
+
+void HexagonAsmPrinter::printGlobalOperand(const MachineInstr *MI, int OpNo,
+                                           raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  assert( (MO.getType() == MachineOperand::MO_GlobalAddress) &&
+         "Expecting global address");
+
+  O << *Mang->getSymbol(MO.getGlobal());
+  if (MO.getOffset() != 0) {
+    O << " + ";
+    O << MO.getOffset();
+  }
+}
+
+void HexagonAsmPrinter::printJumpTable(const MachineInstr *MI, int OpNo,
+                                       raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  assert( (MO.getType() == MachineOperand::MO_JumpTableIndex) &&
+    "Expecting jump table index");
+
+  // Hexagon_TODO: Do we need name mangling?
+  O << *GetJTISymbol(MO.getIndex());
+}
+
+void HexagonAsmPrinter::printConstantPool(const MachineInstr *MI, int OpNo,
+                                       raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  assert( (MO.getType() == MachineOperand::MO_ConstantPoolIndex) &&
+   "Expecting constant pool index");
+
+  // Hexagon_TODO: Do we need name mangling?
+  O << *GetCPISymbol(MO.getIndex());
+}
+
+static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
+                                                 unsigned SyntaxVariant,
+                                                 const MCAsmInfo &MAI,
+                                                 const MCInstrInfo &MII,
+                                                 const MCRegisterInfo &MRI,
+                                                 const MCSubtargetInfo &STI) {
+  if (SyntaxVariant == 0)
+    return(new HexagonInstPrinter(MAI, MII, MRI));
+  else
+   return NULL;
+}
+
+extern "C" void LLVMInitializeHexagonAsmPrinter() {
+  RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
+
+  TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
+                                        createHexagonMCInstPrinter);
+}
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
new file mode 100755
index 000000000000..bc2af636124c
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -0,0 +1,165 @@
+//===-- HexagonAsmPrinter.h - Print machine code to an Hexagon .s file ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hexagon Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONASMPRINTER_H
+#define HEXAGONASMPRINTER_H
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+  class HexagonAsmPrinter : public AsmPrinter {
+    const HexagonSubtarget *Subtarget;
+
+  public:
+    explicit HexagonAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {
+      Subtarget = &TM.getSubtarget<HexagonSubtarget>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Hexagon Assembly Printer";
+    }
+
+    bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+
+    virtual void EmitInstruction(const MachineInstr *MI);
+    virtual void EmitAlignment(unsigned NumBits,
+                               const GlobalValue *GV = 0) const;
+
+    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &OS);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &OS);
+
+    /// printInstruction - This method is automatically generated by tablegen
+    /// from the instruction set description.  This method returns true if the
+    /// machine instruction was sufficiently described to print it, otherwise it
+    /// returns false.
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);
+
+    //    void printMachineInstruction(const MachineInstr *MI);
+    void printOp(const MachineOperand &MO, raw_ostream &O);
+
+    /// printRegister - Print register according to target requirements.
+    ///
+    void printRegister(const MachineOperand &MO, bool R0AsZero,
+                       raw_ostream &O) {
+      unsigned RegNo = MO.getReg();
+      assert(TargetRegisterInfo::isPhysicalRegister(RegNo) && "Not physreg??");
+      O << getRegisterName(RegNo);
+    }
+
+    void printImmOperand(const MachineInstr *MI, unsigned OpNo,
+                                raw_ostream &O) {
+      int value = MI->getOperand(OpNo).getImm();
+      O << value;
+    }
+
+    void printNegImmOperand(const MachineInstr *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+      int value = MI->getOperand(OpNo).getImm();
+      O << -value;
+    }
+
+    void printMEMriOperand(const MachineInstr *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+      const MachineOperand &MO1 = MI->getOperand(OpNo);
+      const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+      O << getRegisterName(MO1.getReg())
+        << " + #"
+        << (int) MO2.getImm();
+    }
+
+    void printFrameIndexOperand(const MachineInstr *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+      const MachineOperand &MO1 = MI->getOperand(OpNo);
+      const MachineOperand &MO2 = MI->getOperand(OpNo+1);
+
+      O << getRegisterName(MO1.getReg())
+        << ", #"
+        << MO2.getImm();
+    }
+
+    void printBranchOperand(const MachineInstr *MI, unsigned OpNo,
+                            raw_ostream &O) {
+      // Branches can take an immediate operand.  This is used by the branch
+      // selection pass to print $+8, an eight byte displacement from the PC.
+      if (MI->getOperand(OpNo).isImm()) {
+        O << "$+" << MI->getOperand(OpNo).getImm()*4;
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+      }
+    }
+
+    void printCallOperand(const MachineInstr *MI, unsigned OpNo,
+                          raw_ostream &O) {
+    }
+
+    void printAbsAddrOperand(const MachineInstr *MI, unsigned OpNo,
+                             raw_ostream &O) {
+    }
+
+    void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      O << "#HI(";
+      if (MI->getOperand(OpNo).isImm()) {
+        printImmOperand(MI, OpNo, O);
+      }
+      else {
+        printOp(MI->getOperand(OpNo), O);
+      }
+      O << ")";
+    }
+
+    void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      O << "#HI(";
+      if (MI->getOperand(OpNo).isImm()) {
+        printImmOperand(MI, OpNo, O);
+      }
+      else {
+        printOp(MI->getOperand(OpNo), O);
+      }
+      O << ")";
+    }
+
+    void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
+                               raw_ostream &O);
+
+#if 0
+    void printModuleLevelGV(const GlobalVariable* GVar, raw_ostream &O);
+#endif
+
+    void printAddrModeBasePlusOffset(const MachineInstr *MI, int OpNo,
+                                     raw_ostream &O);
+
+    void printGlobalOperand(const MachineInstr *MI, int OpNo, raw_ostream &O);
+    void printJumpTable(const MachineInstr *MI, int OpNo, raw_ostream &O);
+    void printConstantPool(const MachineInstr *MI, int OpNo, raw_ostream &O);
+
+    static const char *getRegisterName(unsigned RegNo);
+
+#if 0
+    void EmitStartOfAsmFile(Module &M);
+#endif
+  };
+
+} // end of llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
new file mode 100644
index 000000000000..9bca9e070709
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -0,0 +1,235 @@
+//===-- HexagonCFGOptimizer.cpp - CFG optimizations -----------------------===//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon_cfg"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonCFGOptimizer : public MachineFunctionPass {
+
+private:
+  HexagonTargetMachine& QTM;
+  const HexagonSubtarget &QST;
+
+  void InvertAndChangeJumpTarget(MachineInstr*, MachineBasicBlock*);
+
+ public:
+  static char ID;
+  HexagonCFGOptimizer(HexagonTargetMachine& TM) : MachineFunctionPass(ID),
+                                                  QTM(TM),
+                                                  QST(*TM.getSubtargetImpl()) {}
+
+  const char *getPassName() const {
+    return "Hexagon CFG Optimizer";
+  }
+  bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonCFGOptimizer::ID = 0;
+
+static bool IsConditionalBranch(int Opc) {
+  return (Opc == Hexagon::JMP_c) || (Opc == Hexagon::JMP_cNot)
+    || (Opc == Hexagon::JMP_cdnPt) || (Opc == Hexagon::JMP_cdnNotPt);
+}
+
+
+static bool IsUnconditionalJump(int Opc) {
+  return (Opc == Hexagon::JMP);
+}
+
+
+void
+HexagonCFGOptimizer::InvertAndChangeJumpTarget(MachineInstr* MI,
+                                               MachineBasicBlock* NewTarget) {
+  const HexagonInstrInfo *QII = QTM.getInstrInfo();
+  int NewOpcode = 0;
+  switch(MI->getOpcode()) {
+  case Hexagon::JMP_c:
+    NewOpcode = Hexagon::JMP_cNot;
+    break;
+
+  case Hexagon::JMP_cNot:
+    NewOpcode = Hexagon::JMP_c;
+    break;
+
+  case Hexagon::JMP_cdnPt:
+    NewOpcode = Hexagon::JMP_cdnNotPt;
+    break;
+
+  case Hexagon::JMP_cdnNotPt:
+    NewOpcode = Hexagon::JMP_cdnPt;
+    break;
+
+  default:
+    llvm_unreachable("Cannot handle this case");
+  }
+
+  MI->setDesc(QII->get(NewOpcode));
+  MI->getOperand(1).setMBB(NewTarget);
+}
+
+
+bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock* MBB = MBBb;
+
+    // Traverse the basic block.
+    MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
+    if (MII != MBB->end()) {
+      MachineInstr *MI = MII;
+      int Opc = MI->getOpcode();
+      if (IsConditionalBranch(Opc)) {
+
+        //
+        // (Case 1) Transform the code if the following condition occurs:
+        //   BB1: if (p0) jump BB3
+        //   ...falls-through to BB2 ...
+        //   BB2: jump BB4
+        //   ...next block in layout is BB3...
+        //   BB3: ...
+        //
+        //  Transform this to:
+        //  BB1: if (!p0) jump BB4
+        //  Remove BB2
+        //  BB3: ...
+        //
+        // (Case 2) A variation occurs when BB3 contains a JMP to BB4:
+        //   BB1: if (p0) jump BB3
+        //   ...falls-through to BB2 ...
+        //   BB2: jump BB4
+        //   ...other basic blocks ...
+        //   BB4:
+        //   ...not a fall-thru
+        //   BB3: ...
+        //     jump BB4
+        //
+        // Transform this to:
+        //   BB1: if (!p0) jump BB4
+        //   Remove BB2
+        //   BB3: ...
+        //   BB4: ...
+        //
+        unsigned NumSuccs = MBB->succ_size();
+        MachineBasicBlock::succ_iterator SI = MBB->succ_begin();
+        MachineBasicBlock* FirstSucc = *SI;
+        MachineBasicBlock* SecondSucc = *(++SI);
+        MachineBasicBlock* LayoutSucc = NULL;
+        MachineBasicBlock* JumpAroundTarget = NULL;
+
+        if (MBB->isLayoutSuccessor(FirstSucc)) {
+          LayoutSucc = FirstSucc;
+          JumpAroundTarget = SecondSucc;
+        } else if (MBB->isLayoutSuccessor(SecondSucc)) {
+          LayoutSucc = SecondSucc;
+          JumpAroundTarget = FirstSucc;
+        } else {
+          // Odd case...cannot handle.
+        }
+
+        // The target of the unconditional branch must be JumpAroundTarget.
+        // TODO: If not, we should not invert the unconditional branch.
+        MachineBasicBlock* CondBranchTarget = NULL;
+        if ((MI->getOpcode() == Hexagon::JMP_c) ||
+            (MI->getOpcode() == Hexagon::JMP_cNot)) {
+          CondBranchTarget = MI->getOperand(1).getMBB();
+        }
+
+        if (!LayoutSucc || (CondBranchTarget != JumpAroundTarget)) {
+          continue;
+        }
+
+        if ((NumSuccs == 2) && LayoutSucc && (LayoutSucc->pred_size() == 1)) {
+
+          // Ensure that BB2 has one instruction -- an unconditional jump.
+          if ((LayoutSucc->size() == 1) &&
+              IsUnconditionalJump(LayoutSucc->front().getOpcode())) {
+            MachineBasicBlock* UncondTarget =
+              LayoutSucc->front().getOperand(0).getMBB();
+            // Check if the layout successor of BB2 is BB3.
+            bool case1 = LayoutSucc->isLayoutSuccessor(JumpAroundTarget);
+            bool case2 = JumpAroundTarget->isSuccessor(UncondTarget) &&
+              JumpAroundTarget->size() >= 1 &&
+              IsUnconditionalJump(JumpAroundTarget->back().getOpcode()) &&
+              JumpAroundTarget->pred_size() == 1 &&
+              JumpAroundTarget->succ_size() == 1;
+
+            if (case1 || case2) {
+              InvertAndChangeJumpTarget(MI, UncondTarget);
+              MBB->removeSuccessor(JumpAroundTarget);
+              MBB->addSuccessor(UncondTarget);
+
+              // Remove the unconditional branch in LayoutSucc.
+              LayoutSucc->erase(LayoutSucc->begin());
+              LayoutSucc->removeSuccessor(UncondTarget);
+              LayoutSucc->addSuccessor(JumpAroundTarget);
+
+              // This code performs the conversion for case 2, which moves
+              // the block to the fall-thru case (BB3 in the code above).
+              if (case2 && !case1) {
+                JumpAroundTarget->moveAfter(LayoutSucc);
+                // only move a block if it doesn't have a fall-thru. otherwise
+                // the CFG will be incorrect.
+                if (!UncondTarget->canFallThrough()) {
+                  UncondTarget->moveAfter(JumpAroundTarget);
+                }
+              }
+
+              //
+              // Correct live-in information. Is used by post-RA scheduler
+              // The live-in to LayoutSucc is now all values live-in to
+              // JumpAroundTarget.
+              //
+              std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(),
+                                               LayoutSucc->livein_end());
+              std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(),
+                                              JumpAroundTarget->livein_end());
+              for (unsigned i = 0; i < OrigLiveIn.size(); ++i) {
+                LayoutSucc->removeLiveIn(OrigLiveIn[i]);
+              }
+              for (unsigned i = 0; i < NewLiveIn.size(); ++i) {
+                LayoutSucc->addLiveIn(NewLiveIn[i]);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonCFGOptimizer(HexagonTargetMachine &TM) {
+  return new HexagonCFGOptimizer(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonCallingConv.td b/lib/Target/Hexagon/HexagonCallingConv.td
new file mode 100644
index 000000000000..bd9608bdb0f4
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConv.td
@@ -0,0 +1,35 @@
+//===- HexagonCallingConv.td - Calling Conventions Hexagon -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Hexagon architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Hexagon 32-bit C return-value convention.
+def RetCC_Hexagon32 : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+  CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+
+  // Alternatively, they are assigned to the stack in 4-byte aligned units.
+  CCAssignToStack<4, 4>
+]>;
+
+// Hexagon 32-bit C Calling convention.
+def CC_Hexagon32 : CallingConv<[
+  // All arguments get passed in integer registers if there is space.
+  CCIfType<[i32, i16, i8], CCAssignToReg<[R0, R1, R2, R3, R4, R5]>>,
+  CCIfType<[i64], CCAssignToReg<[D0, D1, D2]>>,
+
+  // Alternatively, they are assigned to the stack in 4-byte aligned units.
+  CCAssignToStack<4, 4>
+]>;
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
new file mode 100644
index 000000000000..46c20e9972b4
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -0,0 +1,207 @@
+//===-- llvm/CallingConvLower.cpp - Calling Convention lowering -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon_CCState class, used for lowering and
+// implementing calling conventions. Adapted from the machine independent
+// version of the class (CCState) but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonCallingConvLower.h"
+#include "Hexagon.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
+                                 const TargetMachine &tm,
+                                 SmallVector<CCValAssign, 16> &locs,
+                                 LLVMContext &c)
+  : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+    TRI(*TM.getRegisterInfo()), Locs(locs), Context(c) {
+  // No stack is used.
+  StackOffset = 0;
+
+  UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void Hexagon_CCState::HandleByVal(unsigned ValNo, EVT ValVT,
+                                EVT LocVT, CCValAssign::LocInfo LocInfo,
+                                int MinSize, int MinAlign,
+                                ISD::ArgFlagsTy ArgFlags) {
+  unsigned Align = ArgFlags.getByValAlign();
+  unsigned Size  = ArgFlags.getByValSize();
+  if (MinSize > (int)Size)
+    Size = MinSize;
+  if (MinAlign > (int)Align)
+    Align = MinAlign;
+  unsigned Offset = AllocateStack(Size, Align);
+
+  addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset,
+                             LocVT.getSimpleVT(), LocInfo));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void Hexagon_CCState::MarkAllocated(unsigned Reg) {
+  UsedRegs[Reg/32] |= 1 << (Reg&31);
+
+  if (const uint16_t *RegAliases = TRI.getAliasSet(Reg))
+    for (; (Reg = *RegAliases); ++RegAliases)
+      UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+/// incorporating info about the formals into this state.
+void
+Hexagon_CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                        Hexagon_CCAssignFn Fn,
+                                        unsigned SretValueInRegs) {
+  unsigned NumArgs = Ins.size();
+  unsigned i = 0;
+
+  // If the function returns a small struct in registers, skip
+  // over the first (dummy) argument.
+  if (SretValueInRegs != 0) {
+    ++i;
+  }
+
+
+  for (; i != NumArgs; ++i) {
+    EVT ArgVT = Ins[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, 0, 0, false)) {
+      dbgs() << "Formal argument #" << i << " has unhandled type "
+             << ArgVT.getEVTString() << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+/// incorporating info about the result values into this state.
+void
+Hexagon_CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               Hexagon_CCAssignFn Fn,
+                               unsigned SretValueInRegs) {
+
+  // For Hexagon, Return small structures in registers.
+  if (SretValueInRegs != 0) {
+    if (SretValueInRegs <= 32) {
+      unsigned Reg = Hexagon::R0;
+      addLoc(CCValAssign::getReg(0, MVT::i32, Reg, MVT::i32,
+                                 CCValAssign::Full));
+      return;
+    }
+    if (SretValueInRegs <= 64) {
+      unsigned Reg = Hexagon::D0;
+      addLoc(CCValAssign::getReg(0, MVT::i64, Reg, MVT::i64,
+                                 CCValAssign::Full));
+      return;
+    }
+  }
+
+
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    EVT VT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this, -1, -1, false)){
+      dbgs() << "Return operand #" << i << " has unhandled type "
+           << VT.getEVTString() << "\n";
+      abort();
+    }
+  }
+}
+
+
+/// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+/// about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg>
+                                     &Outs,
+                                     Hexagon_CCAssignFn Fn,
+                                     int NonVarArgsParams,
+                                     unsigned SretValueSize) {
+  unsigned NumOps = Outs.size();
+
+  unsigned i = 0;
+  // If the called function returns a small struct in registers, skip
+  // the first actual parameter. We do not want to pass a pointer to
+  // the stack location.
+  if (SretValueSize != 0) {
+    ++i;
+  }
+
+  for (; i != NumOps; ++i) {
+    EVT ArgVT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this,
+           NonVarArgsParams, i+1, false)) {
+      dbgs() << "Call operand #" << i << " has unhandled type "
+           << ArgVT.getEVTString() << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void
+Hexagon_CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+                                     SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                                     Hexagon_CCAssignFn Fn) {
+  unsigned NumOps = ArgVTs.size();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    EVT ArgVT = ArgVTs[i];
+    ISD::ArgFlagsTy ArgFlags = Flags[i];
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this, -1, -1,
+           false)) {
+      dbgs() << "Call operand #" << i << " has unhandled type "
+           << ArgVT.getEVTString() << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+/// incorporating info about the passed values into this state.
+void
+Hexagon_CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   Hexagon_CCAssignFn Fn,
+                                   unsigned SretValueInRegs) {
+
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    EVT VT = Ins[i].VT;
+    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+      if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this, -1, -1, false)) {
+        dbgs() << "Call result #" << i << " has unhandled type "
+               << VT.getEVTString() << "\n";
+      abort();
+    }
+  }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void Hexagon_CCState::AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn) {
+  if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this, -1, -1,
+         false)) {
+    dbgs() << "Call result has unhandled type "
+         << VT.getEVTString() << "\n";
+    abort();
+  }
+}
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
new file mode 100644
index 000000000000..1f601e87ad68
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -0,0 +1,189 @@
+//===-- HexagonCallingConvLower.h - Calling Conventions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon_CCState class, used for lowering
+// and implementing calling conventions. Adapted from the target independent
+// version but this handles calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+#define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+//
+// Need to handle varargs.
+//
+namespace llvm {
+  class TargetRegisterInfo;
+  class TargetMachine;
+  class Hexagon_CCState;
+  class SDNode;
+
+
+/// Hexagon_CCAssignFn - This function assigns a location for Val, updating
+/// State to reflect the change.
+typedef bool Hexagon_CCAssignFn(unsigned ValNo, EVT ValVT,
+                              EVT LocVT, CCValAssign::LocInfo LocInfo,
+                              ISD::ArgFlagsTy ArgFlags, Hexagon_CCState &State,
+                              int NonVarArgsParams,
+                              int CurrentParam,
+                              bool ForceMem);
+
+
+/// CCState - This class holds information needed while lowering arguments and
+/// return values.  It captures which registers are already assigned and which
+/// stack slots are used.  It provides accessors to allocate these values.
+class Hexagon_CCState {
+  CallingConv::ID CallingConv;
+  bool IsVarArg;
+  const TargetMachine &TM;
+  const TargetRegisterInfo &TRI;
+  SmallVector<CCValAssign, 16> &Locs;
+  LLVMContext &Context;
+
+  unsigned StackOffset;
+  SmallVector<uint32_t, 16> UsedRegs;
+public:
+  Hexagon_CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
+                SmallVector<CCValAssign, 16> &locs, LLVMContext &c);
+
+  void addLoc(const CCValAssign &V) {
+    Locs.push_back(V);
+  }
+
+  LLVMContext &getContext() const { return Context; }
+  const TargetMachine &getTarget() const { return TM; }
+  unsigned getCallingConv() const { return CallingConv; }
+  bool isVarArg() const { return IsVarArg; }
+
+  unsigned getNextStackOffset() const { return StackOffset; }
+
+  /// isAllocated - Return true if the specified register (or an alias) is
+  /// allocated.
+  bool isAllocated(unsigned Reg) const {
+    return UsedRegs[Reg/32] & (1 << (Reg&31));
+  }
+
+  /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
+  /// incorporating info about the formals into this state.
+  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                              Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+  /// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
+  /// incorporating info about the result values into this state.
+  void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+  /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
+  /// about the passed values into this state.
+  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           Hexagon_CCAssignFn Fn, int NonVarArgsParams,
+                           unsigned SretValueSize);
+
+  /// AnalyzeCallOperands - Same as above except it takes vectors of types
+  /// and argument flags.
+  void AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+                           SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                           Hexagon_CCAssignFn Fn);
+
+  /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
+  /// incorporating info about the passed values into this state.
+  void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                         Hexagon_CCAssignFn Fn, unsigned SretValueInRegs);
+
+  /// AnalyzeCallResult - Same as above except it's specialized for calls which
+  /// produce a single value.
+  void AnalyzeCallResult(EVT VT, Hexagon_CCAssignFn Fn);
+
+  /// getFirstUnallocated - Return the first unallocated register in the set, or
+  /// NumRegs if they are all allocated.
+  unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
+    for (unsigned i = 0; i != NumRegs; ++i)
+      if (!isAllocated(Regs[i]))
+        return i;
+    return NumRegs;
+  }
+
+  /// AllocateReg - Attempt to allocate one register.  If it is not available,
+  /// return zero.  Otherwise, return the register, marking it and any aliases
+  /// as allocated.
+  unsigned AllocateReg(unsigned Reg) {
+    if (isAllocated(Reg)) return 0;
+    MarkAllocated(Reg);
+    return Reg;
+  }
+
+  /// Version of AllocateReg with extra register to be shadowed.
+  unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) {
+    if (isAllocated(Reg)) return 0;
+    MarkAllocated(Reg);
+    MarkAllocated(ShadowReg);
+    return Reg;
+  }
+
+  /// AllocateReg - Attempt to allocate one of the specified registers.  If none
+  /// are available, return zero.  Otherwise, return the first one available,
+  /// marking it and any aliases as allocated.
+  unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
+    unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+    if (FirstUnalloc == NumRegs)
+      return 0;    // Didn't find the reg.
+
+    // Mark the register and any aliases as allocated.
+    unsigned Reg = Regs[FirstUnalloc];
+    MarkAllocated(Reg);
+    return Reg;
+  }
+
+  /// Version of AllocateReg with list of registers to be shadowed.
+  unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
+                       unsigned NumRegs) {
+    unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+    if (FirstUnalloc == NumRegs)
+      return 0;    // Didn't find the reg.
+
+    // Mark the register and any aliases as allocated.
+    unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
+    MarkAllocated(Reg);
+    MarkAllocated(ShadowReg);
+    return Reg;
+  }
+
+  /// AllocateStack - Allocate a chunk of stack space with the specified size
+  /// and alignment.
+  unsigned AllocateStack(unsigned Size, unsigned Align) {
+    assert(Align && ((Align-1) & Align) == 0); // Align is power of 2.
+    StackOffset = ((StackOffset + Align-1) & ~(Align-1));
+    unsigned Result = StackOffset;
+    StackOffset += Size;
+    return Result;
+  }
+
+  // HandleByVal - Allocate a stack slot large enough to pass an argument by
+  // value. The size and alignment information of the argument is encoded in its
+  // parameter attribute.
+  void HandleByVal(unsigned ValNo, EVT ValVT,
+                   EVT LocVT, CCValAssign::LocInfo LocInfo,
+                   int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
+
+private:
+  /// MarkAllocated - Mark a register and all of its aliases as allocated.
+  void MarkAllocated(unsigned Reg);
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
new file mode 100644
index 000000000000..210047446034
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -0,0 +1,177 @@
+//===-- HexagonExpandPredSpillCode.cpp - Expand Predicate Spill Code ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The Hexagon processor has no instructions that load or store predicate
+// registers directly.  So, when these registers must be spilled a general 
+// purpose register must be found and the value copied to/from it from/to 
+// the predicate register.  This code currently does not use the register 
+// scavenger mechanism available in the allocator.  There are two registers
+// reserved to allow spilling/restoring predicate registers.  One is used to
+// hold the predicate value.  The other is used when stack frame offsets are
+// too large.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+
+namespace {
+
+class HexagonExpandPredSpillCode : public MachineFunctionPass {
+    HexagonTargetMachine& QTM;
+    const HexagonSubtarget &QST;
+
+ public:
+    static char ID;
+    HexagonExpandPredSpillCode(HexagonTargetMachine& TM) :
+      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+    const char *getPassName() const {
+      return "Hexagon Expand Predicate Spill Code";
+    }
+    bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonExpandPredSpillCode::ID = 0;
+
+
+bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
+
+  const HexagonInstrInfo *TII = QTM.getInstrInfo();
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock* MBB = MBBb;
+    // Traverse the basic block.
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+         ++MII) {
+      MachineInstr *MI = MII;
+      int Opc = MI->getOpcode();
+      if (Opc == Hexagon::STriw_pred) {
+        // STriw_pred [R30], ofst, SrcReg;
+        unsigned FP = MI->getOperand(0).getReg();
+        assert(FP == QTM.getRegisterInfo()->getFrameRegister() &&
+               "Not a Frame Pointer, Nor a Spill Slot");
+        assert(MI->getOperand(1).isImm() && "Not an offset");
+        int Offset = MI->getOperand(1).getImm();
+        int SrcReg = MI->getOperand(2).getReg();
+        assert(Hexagon::PredRegsRegClass.contains(SrcReg) &&
+               "Not a predicate register");
+        if (!TII->isValidOffset(Hexagon::STriw, Offset)) {
+          if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+            BuildMI(*MBB, MII, MI->getDebugLoc(),
+                    TII->get(Hexagon::CONST32_Int_Real),
+                      HEXAGON_RESERVED_REG_1).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+                    HEXAGON_RESERVED_REG_1)
+              .addReg(FP).addReg(HEXAGON_RESERVED_REG_1);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+                      HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+            BuildMI(*MBB, MII, MI->getDebugLoc(),
+                    TII->get(Hexagon::STriw))
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0).addReg(HEXAGON_RESERVED_REG_2);
+          } else {
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+                      HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+                      HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw))
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0)
+              .addReg(HEXAGON_RESERVED_REG_2);
+          }
+        } else {
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_RsPd),
+                    HEXAGON_RESERVED_REG_2).addReg(SrcReg);
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::STriw)).
+                    addReg(FP).addImm(Offset).addReg(HEXAGON_RESERVED_REG_2);
+        }
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::LDriw_pred) {
+        // DstReg = LDriw_pred [R30], ofst.
+        int DstReg = MI->getOperand(0).getReg();
+        assert(Hexagon::PredRegsRegClass.contains(DstReg) &&
+               "Not a predicate register");
+        unsigned FP = MI->getOperand(1).getReg();
+        assert(FP == QTM.getRegisterInfo()->getFrameRegister() &&
+               "Not a Frame Pointer, Nor a Spill Slot");
+        assert(MI->getOperand(2).isImm() && "Not an offset");
+        int Offset = MI->getOperand(2).getImm();
+        if (!TII->isValidOffset(Hexagon::LDriw, Offset)) {
+          if (!TII->isValidOffset(Hexagon::ADD_ri, Offset)) {
+            BuildMI(*MBB, MII, MI->getDebugLoc(),
+                    TII->get(Hexagon::CONST32_Int_Real),
+                      HEXAGON_RESERVED_REG_1).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_rr),
+                    HEXAGON_RESERVED_REG_1)
+              .addReg(FP)
+              .addReg(HEXAGON_RESERVED_REG_1);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+                      HEXAGON_RESERVED_REG_2)
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+                      DstReg).addReg(HEXAGON_RESERVED_REG_2);
+          } else {
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::ADD_ri),
+                      HEXAGON_RESERVED_REG_1).addReg(FP).addImm(Offset);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+                      HEXAGON_RESERVED_REG_2)
+              .addReg(HEXAGON_RESERVED_REG_1)
+              .addImm(0);
+            BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+                      DstReg).addReg(HEXAGON_RESERVED_REG_2);
+          }
+        } else {
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::LDriw),
+                    HEXAGON_RESERVED_REG_2).addReg(FP).addImm(Offset);
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_PdRs),
+                    DstReg).addReg(HEXAGON_RESERVED_REG_2);
+        }
+        MII = MBB->erase(MI);
+        --MII;
+      }
+    }
+  }
+
+  return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonExpandPredSpillCode(HexagonTargetMachine &TM) {
+  return new HexagonExpandPredSpillCode(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
new file mode 100644
index 000000000000..e8a692406e08
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -0,0 +1,332 @@
+//===-- HexagonFrameLowering.cpp - Define frame lowering ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonFrameLowering.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableDeallocRet(
+                       "disable-hexagon-dealloc-ret",
+                       cl::Hidden,
+                       cl::desc("Disable Dealloc Return for Hexagon target"));
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void HexagonFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned FrameSize = MFI->getStackSize();
+
+  // Get the alignments provided by the target.
+  unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  // Get the maximum call frame size of all the calls.
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+  // that allocations will be aligned.
+  if (MFI->hasVarSizedObjects())
+    maxCallFrameSize = RoundUpToAlignment(maxCallFrameSize, TargetAlign);
+
+  // Update maximum call frame size.
+  MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+  // Include call frame size in total.
+  FrameSize += maxCallFrameSize;
+
+  // Make sure the frame is aligned.
+  FrameSize = RoundUpToAlignment(FrameSize, TargetAlign);
+
+  // Update frame info.
+  MFI->setStackSize(FrameSize);
+}
+
+
+void HexagonFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  const HexagonRegisterInfo *QRI =
+    static_cast<const HexagonRegisterInfo *>(MF.getTarget().getRegisterInfo());
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  determineFrameLayout(MF);
+
+  // Check if frame moves are needed for EH.
+  bool needsFrameMoves = MMI.hasDebugInfo() ||
+    !MF.getFunction()->needsUnwindTableEntry();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  int NumBytes = (int) MFI->getStackSize();
+
+  // LLVM expects allocframe not to be the first instruction in the
+  // basic block.
+  MachineBasicBlock::iterator InsertPt = MBB.begin();
+
+  //
+  // ALLOCA adjust regs.  Iterate over ADJDYNALLOC nodes and change the offset.
+  //
+  HexagonMachineFunctionInfo *FuncInfo =
+    MF.getInfo<HexagonMachineFunctionInfo>();
+  const std::vector<MachineInstr*>& AdjustRegs =
+    FuncInfo->getAllocaAdjustInsts();
+  for (std::vector<MachineInstr*>::const_iterator i = AdjustRegs.begin(),
+         e = AdjustRegs.end();
+       i != e; ++i) {
+    MachineInstr* MI = *i;
+    assert((MI->getOpcode() == Hexagon::ADJDYNALLOC) &&
+           "Expected adjust alloca node");
+
+    MachineOperand& MO = MI->getOperand(2);
+    assert(MO.isImm() && "Expected immediate");
+    MO.setImm(MFI->getMaxCallFrameSize());
+  }
+
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ if (needsFrameMoves) {
+   // Advance CFA. DW_CFA_def_cfa
+   unsigned FPReg = QRI->getFrameRegister();
+   unsigned RAReg = QRI->getRARegister();
+
+   MachineLocation Dst(MachineLocation::VirtualFP);
+   MachineLocation Src(FPReg, -8);
+   Moves.push_back(MachineMove(0, Dst, Src));
+
+   // R31 = (R31 - #4)
+   MachineLocation LRDst(RAReg, -4);
+   MachineLocation LRSrc(RAReg);
+   Moves.push_back(MachineMove(0, LRDst, LRSrc));
+
+   // R30 = (R30 - #8)
+   MachineLocation SPDst(FPReg, -8);
+   MachineLocation SPSrc(FPReg);
+   Moves.push_back(MachineMove(0, SPDst, SPSrc));
+ }
+
+  //
+  // Only insert ALLOCFRAME if we need to.
+  //
+  if (hasFP(MF)) {
+    // Check for overflow.
+    // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
+    const int ALLOCFRAME_MAX = 16384;
+    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+    if (NumBytes >= ALLOCFRAME_MAX) {
+      // Emit allocframe(#0).
+      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(0);
+
+      // Subtract offset from frame pointer.
+      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::CONST32_Int_Real),
+                                      HEXAGON_RESERVED_REG_1).addImm(NumBytes);
+      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::SUB_rr),
+                                      QRI->getStackRegister()).
+                                      addReg(QRI->getStackRegister()).
+                                      addReg(HEXAGON_RESERVED_REG_1);
+    } else {
+      BuildMI(MBB, InsertPt, dl, TII.get(Hexagon::ALLOCFRAME)).addImm(NumBytes);
+    }
+  }
+}
+// Returns true if MBB has a machine instructions that indicates a tail call
+// in the block.
+bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  unsigned RetOpcode = MBBI->getOpcode();
+
+  return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;}
+
+void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = prior(MBB.end());
+  DebugLoc dl = MBBI->getDebugLoc();
+  //
+  // Only insert deallocframe if we need to.
+  //
+  if (hasFP(MF)) {
+    MachineBasicBlock::iterator MBBI = prior(MBB.end());
+    MachineBasicBlock::iterator MBBI_end = MBB.end();
+    //
+    // For Hexagon, we don't need the frame size.
+    //
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    int NumBytes = (int) MFI->getStackSize();
+
+    const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+    // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher
+    // versions.
+    if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPR
+                        && !DisableDeallocRet) {
+      // Remove jumpr node.
+      MBB.erase(MBBI);
+      // Add dealloc_return.
+      BuildMI(MBB, MBBI_end, dl, TII.get(Hexagon::DEALLOC_RET_V4))
+        .addImm(NumBytes);
+    } else { // Add deallocframe for V2 and V3.
+      BuildMI(MBB, MBBI, dl, TII.get(Hexagon::DEALLOCFRAME)).addImm(NumBytes);
+    }
+  }
+}
+
+bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const HexagonMachineFunctionInfo *FuncInfo =
+    MF.getInfo<HexagonMachineFunctionInfo>();
+  return (MFI->hasCalls() || (MFI->getStackSize() > 0) ||
+          FuncInfo->hasClobberLR() );
+}
+
+bool
+HexagonFrameLowering::spillCalleeSavedRegisters(
+                                        MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  if (CSI.empty()) {
+    return false;
+  }
+
+  // We can only schedule double loads if we spill contiguous callee-saved regs
+  // For instance, we cannot scheduled double-word loads if we spill r24,
+  // r26, and r27.
+  // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+  // above.
+  bool ContiguousRegs = true;
+
+  for (unsigned i = 0; i < CSI.size(); ++i) {
+    unsigned Reg = CSI[i].getReg();
+
+    //
+    // Check if we can use a double-word store.
+    //
+    const uint16_t* SuperReg = TRI->getSuperRegisters(Reg);
+
+    // Assume that there is exactly one superreg.
+    assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+    bool CanUseDblStore = false;
+    const TargetRegisterClass* SuperRegClass = 0;
+
+    if (ContiguousRegs && (i < CSI.size()-1)) {
+      const uint16_t* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
+      assert(SuperRegNext[0] && !SuperRegNext[1] &&
+             "Expected exactly one superreg");
+      SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
+      CanUseDblStore = (SuperRegNext[0] == SuperReg[0]);
+    }
+
+
+    if (CanUseDblStore) {
+      TII.storeRegToStackSlot(MBB, MI, SuperReg[0], true,
+                              CSI[i+1].getFrameIdx(), SuperRegClass, TRI);
+      MBB.addLiveIn(SuperReg[0]);
+      ++i;
+    } else {
+      // Cannot use a double-word store.
+      ContiguousRegs = false;
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RC,
+                              TRI);
+      MBB.addLiveIn(Reg);
+    }
+  }
+  return true;
+}
+
+
+bool HexagonFrameLowering::restoreCalleeSavedRegisters(
+                                        MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  if (CSI.empty()) {
+    return false;
+  }
+
+  // We can only schedule double loads if we spill contiguous callee-saved regs
+  // For instance, we cannot scheduled double-word loads if we spill r24,
+  // r26, and r27.
+  // Hexagon_TODO: We can try to double-word align odd registers for -O2 and
+  // above.
+  bool ContiguousRegs = true;
+
+  for (unsigned i = 0; i < CSI.size(); ++i) {
+    unsigned Reg = CSI[i].getReg();
+
+    //
+    // Check if we can use a double-word load.
+    //
+    const uint16_t* SuperReg = TRI->getSuperRegisters(Reg);
+    const TargetRegisterClass* SuperRegClass = 0;
+
+    // Assume that there is exactly one superreg.
+    assert(SuperReg[0] && !SuperReg[1] && "Expected exactly one superreg");
+    bool CanUseDblLoad = false;
+    if (ContiguousRegs && (i < CSI.size()-1)) {
+      const uint16_t* SuperRegNext = TRI->getSuperRegisters(CSI[i+1].getReg());
+      assert(SuperRegNext[0] && !SuperRegNext[1] &&
+             "Expected exactly one superreg");
+      SuperRegClass = TRI->getMinimalPhysRegClass(SuperReg[0]);
+      CanUseDblLoad = (SuperRegNext[0] == SuperReg[0]);
+    }
+
+
+    if (CanUseDblLoad) {
+      TII.loadRegFromStackSlot(MBB, MI, SuperReg[0], CSI[i+1].getFrameIdx(),
+                               SuperRegClass, TRI);
+      MBB.addLiveIn(SuperReg[0]);
+      ++i;
+    } else {
+      // Cannot use a double-word load.
+      ContiguousRegs = false;
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
+      MBB.addLiveIn(Reg);
+    }
+  }
+  return true;
+}
+
+int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                              int FI) const {
+  return MF.getFrameInfo()->getObjectOffset(FI);
+}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
new file mode 100644
index 000000000000..ad87f11e2457
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -0,0 +1,50 @@
+//=- HexagonFrameLowering.h - Define frame lowering for Hexagon --*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGON_FRAMEINFO_H
+#define HEXAGON_FRAMEINFO_H
+
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class HexagonFrameLowering : public TargetFrameLowering {
+private:
+  const HexagonSubtarget &STI;
+  void determineFrameLayout(MachineFunction &MF) const;
+
+public:
+  explicit HexagonFrameLowering(const HexagonSubtarget &sti)
+    : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+  virtual bool
+  spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const std::vector<CalleeSavedInfo> &CSI,
+                            const TargetRegisterInfo *TRI) const;
+  virtual bool
+  restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MI,
+                              const std::vector<CalleeSavedInfo> &CSI,
+                              const TargetRegisterInfo *TRI) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasTailCall(MachineBasicBlock &MBB) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
new file mode 100644
index 000000000000..57772a514d55
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -0,0 +1,644 @@
+//===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the Hexagon hardware
+// loop instruction.  The hardware loop can perform loop branches with a
+// zero-cycle overhead.
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+// The pattern detected by this phase is due to running Strength Reduction.
+//
+// Criteria for hardware loops:
+//  - Countable loops (w/ ind. var for a trip count)
+//  - Assumes loops are normalized by IndVarSimplify
+//  - Try inner-most loops first
+//  - No nested hardware loops.
+//  - No function calls in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hwloops"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace {
+  class CountValue;
+  struct HexagonHardwareLoops : public MachineFunctionPass {
+    MachineLoopInfo       *MLI;
+    MachineRegisterInfo   *MRI;
+    const TargetInstrInfo *TII;
+
+  public:
+    static char ID;   // Pass identification, replacement for typeid
+
+    HexagonHardwareLoops() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const { return "Hexagon Hardware Loops"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// getCanonicalInductionVariable - Check to see if the loop has a canonical
+    /// induction variable.
+    /// Should be defined in MachineLoop. Based upon version in class Loop.
+    const MachineInstr *getCanonicalInductionVariable(MachineLoop *L) const;
+
+    /// getTripCount - Return a loop-invariant LLVM register indicating the
+    /// number of times the loop will be executed.  If the trip-count cannot
+    /// be determined, this return null.
+    CountValue *getTripCount(MachineLoop *L) const;
+
+    /// isInductionOperation - Return true if the instruction matches the
+    /// pattern for an opertion that defines an induction variable.
+    bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
+
+    /// isInvalidOperation - Return true if the instruction is not valid within
+    /// a hardware loop.
+    bool isInvalidLoopOperation(const MachineInstr *MI) const;
+
+    /// containsInavlidInstruction - Return true if the loop contains an
+    /// instruction that inhibits using the hardware loop.
+    bool containsInvalidInstruction(MachineLoop *L) const;
+
+    /// converToHardwareLoop - Given a loop, check if we can convert it to a
+    /// hardware loop.  If so, then perform the conversion and return true.
+    bool convertToHardwareLoop(MachineLoop *L);
+
+  };
+
+  char HexagonHardwareLoops::ID = 0;
+
+
+  // CountValue class - Abstraction for a trip count of a loop. A
+  // smaller vesrsion of the MachineOperand class without the concerns
+  // of changing the operand representation.
+  class CountValue {
+  public:
+    enum CountValueType {
+      CV_Register,
+      CV_Immediate
+    };
+  private:
+    CountValueType Kind;
+    union Values {
+      unsigned RegNum;
+      int64_t ImmVal;
+      Values(unsigned r) : RegNum(r) {}
+      Values(int64_t i) : ImmVal(i) {}
+    } Contents;
+    bool isNegative;
+
+  public:
+    CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
+                                       isNegative(neg) {}
+    explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
+                                     isNegative(i < 0) {}
+    CountValueType getType() const { return Kind; }
+    bool isReg() const { return Kind == CV_Register; }
+    bool isImm() const { return Kind == CV_Immediate; }
+    bool isNeg() const { return isNegative; }
+
+    unsigned getReg() const {
+      assert(isReg() && "Wrong CountValue accessor");
+      return Contents.RegNum;
+    }
+    void setReg(unsigned Val) {
+      Contents.RegNum = Val;
+    }
+    int64_t getImm() const {
+      assert(isImm() && "Wrong CountValue accessor");
+      if (isNegative) {
+        return -Contents.ImmVal;
+      }
+      return Contents.ImmVal;
+    }
+    void setImm(int64_t Val) {
+      Contents.ImmVal = Val;
+    }
+
+    void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
+      if (isReg()) { OS << PrintReg(getReg()); }
+      if (isImm()) { OS << getImm(); }
+    }
+  };
+
+  struct HexagonFixupHwLoops : public MachineFunctionPass {
+  public:
+    static char ID;     // Pass identification, replacement for typeid.
+
+    HexagonFixupHwLoops() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    /// Maximum distance between the loop instr and the basic block.
+    /// Just an estimate.
+    static const unsigned MAX_LOOP_DISTANCE = 200;
+
+    /// fixupLoopInstrs - Check the offset between each loop instruction and
+    /// the loop basic block to determine if we can use the LOOP instruction
+    /// or if we need to set the LC/SA registers explicitly.
+    bool fixupLoopInstrs(MachineFunction &MF);
+
+    /// convertLoopInstr - Add the instruction to set the LC and SA registers
+    /// explicitly.
+    void convertLoopInstr(MachineFunction &MF,
+                          MachineBasicBlock::iterator &MII,
+                          RegScavenger &RS);
+
+  };
+
+  char HexagonFixupHwLoops::ID = 0;
+
+} // end anonymous namespace
+
+
+/// isHardwareLoop - Returns true if the instruction is a hardware loop
+/// instruction.
+static bool isHardwareLoop(const MachineInstr *MI) {
+  return MI->getOpcode() == Hexagon::LOOP0_r ||
+    MI->getOpcode() == Hexagon::LOOP0_i;
+}
+
+/// isCompareEquals - Returns true if the instruction is a compare equals
+/// instruction with an immediate operand.
+static bool isCompareEqualsImm(const MachineInstr *MI) {
+  return MI->getOpcode() == Hexagon::CMPEQri;
+}
+
+
+/// createHexagonHardwareLoops - Factory for creating
+/// the hardware loop phase.
+FunctionPass *llvm::createHexagonHardwareLoops() {
+  return new HexagonHardwareLoops();
+}
+
+
+bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
+
+  bool Changed = false;
+
+  // get the loop information
+  MLI = &getAnalysis<MachineLoopInfo>();
+  // get the register information
+  MRI = &MF.getRegInfo();
+  // the target specific instructio info.
+  TII = MF.getTarget().getInstrInfo();
+
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+       I != E; ++I) {
+    MachineLoop *L = *I;
+    if (!L->getParentLoop()) {
+      Changed |= convertToHardwareLoop(L);
+    }
+  }
+
+  return Changed;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable. We check for a simple recurrence pattern - an
+/// integer recurrence that decrements by one each time through the loop and
+/// ends at zero.  If so, return the phi node that corresponds to it.
+///
+/// Based upon the similar code in LoopInfo except this code is specific to
+/// the machine.
+/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
+///
+const MachineInstr
+*HexagonHardwareLoops::getCanonicalInductionVariable(MachineLoop *L) const {
+  MachineBasicBlock *TopMBB = L->getTopBlock();
+  MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
+  assert(PI != TopMBB->pred_end() &&
+         "Loop must have more than one incoming edge!");
+  MachineBasicBlock *Backedge = *PI++;
+  if (PI == TopMBB->pred_end()) return 0;  // dead loop
+  MachineBasicBlock *Incoming = *PI++;
+  if (PI != TopMBB->pred_end()) return 0;  // multiple backedges?
+
+  // make sure there is one incoming and one backedge and determine which
+  // is which.
+  if (L->contains(Incoming)) {
+    if (L->contains(Backedge))
+      return 0;
+    std::swap(Incoming, Backedge);
+  } else if (!L->contains(Backedge))
+    return 0;
+
+  // Loop over all of the PHI nodes, looking for a canonical induction variable:
+  //   - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
+  //   - The recurrence comes from the backedge.
+  //   - the definition is an induction operatio.n
+  for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
+       I != E && I->isPHI(); ++I) {
+    const MachineInstr *MPhi = &*I;
+    unsigned DefReg = MPhi->getOperand(0).getReg();
+    for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+      // Check each operand for the value from the backedge.
+      MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
+      if (L->contains(MBB)) { // operands comes from the backedge
+        // Check if the definition is an induction operation.
+        const MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
+        if (isInductionOperation(DI, DefReg)) {
+          return MPhi;
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the
+/// number of times the loop will be executed.  The trip count can
+/// be either a register or a constant value.  If the trip-count
+/// cannot be determined, this returns null.
+///
+/// We find the trip count from the phi instruction that defines the
+/// induction variable.  We follow the links to the CMP instruction
+/// to get the trip count.
+///
+/// Based upon getTripCount in LoopInfo.
+///
+CountValue *HexagonHardwareLoops::getTripCount(MachineLoop *L) const {
+  // Check that the loop has a induction variable.
+  const MachineInstr *IV_Inst = getCanonicalInductionVariable(L);
+  if (IV_Inst == 0) return 0;
+
+  // Canonical loops will end with a 'cmpeq_ri IV, Imm',
+  //  if Imm is 0, get the count from the PHI opnd
+  //  if Imm is -M, than M is the count
+  //  Otherwise, Imm is the count
+  const MachineOperand *IV_Opnd;
+  const MachineOperand *InitialValue;
+  if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
+    InitialValue = &IV_Inst->getOperand(1);
+    IV_Opnd = &IV_Inst->getOperand(3);
+  } else {
+    InitialValue = &IV_Inst->getOperand(3);
+    IV_Opnd = &IV_Inst->getOperand(1);
+  }
+
+  // Look for the cmp instruction to determine if we
+  // can get a useful trip count.  The trip count can
+  // be either a register or an immediate.  The location
+  // of the value depends upon the type (reg or imm).
+  while ((IV_Opnd = IV_Opnd->getNextOperandForReg())) {
+    const MachineInstr *MI = IV_Opnd->getParent();
+    if (L->contains(MI) && isCompareEqualsImm(MI)) {
+      const MachineOperand &MO = MI->getOperand(2);
+      assert(MO.isImm() && "IV Cmp Operand should be 0");
+      int64_t ImmVal = MO.getImm();
+
+      const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
+      assert(L->contains(IV_DefInstr->getParent()) &&
+             "IV definition should occurs in loop");
+      int64_t iv_value = IV_DefInstr->getOperand(2).getImm();
+
+      if (ImmVal == 0) {
+        // Make sure the induction variable changes by one on each iteration.
+        if (iv_value != 1 && iv_value != -1) {
+          return 0;
+        }
+        return new CountValue(InitialValue->getReg(), iv_value > 0);
+      } else {
+        assert(InitialValue->isReg() && "Expecting register for init value");
+        const MachineInstr *DefInstr = MRI->getVRegDef(InitialValue->getReg());
+        if (DefInstr && DefInstr->getOpcode() == Hexagon::TFRI) {
+          int64_t count = ImmVal - DefInstr->getOperand(1).getImm();
+          if ((count % iv_value) != 0) {
+            return 0;
+          }
+          return new CountValue(count/iv_value);
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+/// isInductionOperation - return true if the operation is matches the
+/// pattern that defines an induction variable:
+///    add iv, c
+///
+bool
+HexagonHardwareLoops::isInductionOperation(const MachineInstr *MI,
+                                           unsigned IVReg) const {
+  return (MI->getOpcode() ==
+          Hexagon::ADD_ri && MI->getOperand(1).getReg() == IVReg);
+}
+
+/// isInvalidOperation - Return true if the operation is invalid within
+/// hardware loop.
+bool
+HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
+
+  // call is not allowed because the callee may use a hardware loop
+  if (MI->getDesc().isCall()) {
+    return true;
+  }
+  // do not allow nested hardware loops
+  if (isHardwareLoop(MI)) {
+    return true;
+  }
+  // check if the instruction defines a hardware loop register
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef() &&
+        (MO.getReg() == Hexagon::LC0 || MO.getReg() == Hexagon::LC1 ||
+         MO.getReg() == Hexagon::SA0 || MO.getReg() == Hexagon::SA0)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/// containsInvalidInstruction - Return true if the loop contains
+/// an instruction that inhibits the use of the hardware loop function.
+///
+bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
+  const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = Blocks[i];
+    for (MachineBasicBlock::iterator
+           MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
+      const MachineInstr *MI = &*MII;
+      if (isInvalidLoopOperation(MI)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// converToHardwareLoop - check if the loop is a candidate for
+/// converting to a hardware loop.  If so, then perform the
+/// transformation.
+///
+/// This function works on innermost loops first.  A loop can
+/// be converted if it is a counting loop; either a register
+/// value or an immediate.
+///
+/// The code makes several assumptions about the representation
+/// of the loop in llvm.
+bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
+  bool Changed = false;
+  // Process nested loops first.
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+    Changed |= convertToHardwareLoop(*I);
+  }
+  // If a nested loop has been converted, then we can't convert this loop.
+  if (Changed) {
+    return Changed;
+  }
+  // Are we able to determine the trip count for the loop?
+  CountValue *TripCount = getTripCount(L);
+  if (TripCount == 0) {
+    return false;
+  }
+  // Does the loop contain any invalid instructions?
+  if (containsInvalidInstruction(L)) {
+    return false;
+  }
+  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  // No preheader means there's not place for the loop instr.
+  if (Preheader == 0) {
+    return false;
+  }
+  MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
+
+  MachineBasicBlock *LastMBB = L->getExitingBlock();
+  // Don't generate hw loop if the loop has more than one exit.
+  if (LastMBB == 0) {
+    return false;
+  }
+  MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+
+  // Determine the loop start.
+  MachineBasicBlock *LoopStart = L->getTopBlock();
+  if (L->getLoopLatch() != LastMBB) {
+    // When the exit and latch are not the same, use the latch block as the
+    // start.
+    // The loop start address is used only after the 1st iteration, and the loop
+    // latch may contains instrs. that need to be executed after the 1st iter.
+    LoopStart = L->getLoopLatch();
+    // Make sure the latch is a successor of the exit, otherwise it won't work.
+    if (!LastMBB->isSuccessor(LoopStart)) {
+      return false;
+    }
+  }
+
+  // Convert the loop to a hardware loop
+  DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+
+  if (TripCount->isReg()) {
+    // Create a copy of the loop count register.
+    MachineFunction *MF = LastMBB->getParent();
+    const TargetRegisterClass *RC =
+      MF->getRegInfo().getRegClass(TripCount->getReg());
+    unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
+    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+            TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
+    if (TripCount->isNeg()) {
+      unsigned CountReg1 = CountReg;
+      CountReg = MF->getRegInfo().createVirtualRegister(RC);
+      BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+              TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
+    }
+
+    // Add the Loop instruction to the begining of the loop.
+    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+            TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
+  } else {
+    assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
+    // Add the Loop immediate instruction to the beginning of the loop.
+    int64_t CountImm = TripCount->getImm();
+    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+            TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
+  }
+
+  // Make sure the loop start always has a reference in the CFG.  We need to
+  // create a BlockAddress operand to get this mechanism to work both the
+  // MachineBasicBlock and BasicBlock objects need the flag set.
+  LoopStart->setHasAddressTaken();
+  // This line is needed to set the hasAddressTaken flag on the BasicBlock
+  // object
+  BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
+
+  // Replace the loop branch with an endloop instruction.
+  DebugLoc dl = LastI->getDebugLoc();
+  BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+
+  // The loop ends with either:
+  //  - a conditional branch followed by an unconditional branch, or
+  //  - a conditional branch to the loop start.
+  if (LastI->getOpcode() == Hexagon::JMP_c ||
+      LastI->getOpcode() == Hexagon::JMP_cNot) {
+    // delete one and change/add an uncond. branch to out of the loop
+    MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
+    LastI = LastMBB->erase(LastI);
+    if (!L->contains(BranchTarget)) {
+      if (LastI != LastMBB->end()) {
+        TII->RemoveBranch(*LastMBB);
+      }
+      SmallVector<MachineOperand, 0> Cond;
+      TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl);
+    }
+  } else {
+    // Conditional branch to loop start; just delete it.
+    LastMBB->erase(LastI);
+  }
+  delete TripCount;
+
+  ++NumHWLoops;
+  return true;
+}
+
+/// createHexagonFixupHwLoops - Factory for creating the hardware loop
+/// phase.
+FunctionPass *llvm::createHexagonFixupHwLoops() {
+  return new HexagonFixupHwLoops();
+}
+
+bool HexagonFixupHwLoops::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "****** Hexagon Hardware Loop Fixup ******\n");
+
+  bool Changed = fixupLoopInstrs(MF);
+  return Changed;
+}
+
+/// fixupLoopInsts - For Hexagon, if the loop label is to far from the
+/// loop instruction then we need to set the LC0 and SA0 registers
+/// explicitly instead of using LOOP(start,count).  This function
+/// checks the distance, and generates register assignments if needed.
+///
+/// This function makes two passes over the basic blocks.  The first
+/// pass computes the offset of the basic block from the start.
+/// The second pass checks all the loop instructions.
+bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
+
+  // Offset of the current instruction from the start.
+  unsigned InstOffset = 0;
+  // Map for each basic block to it's first instruction.
+  DenseMap<MachineBasicBlock*, unsigned> BlockToInstOffset;
+
+  // First pass - compute the offset of each basic block.
+  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+       MBB != MBBe; ++MBB) {
+    BlockToInstOffset[MBB] = InstOffset;
+    InstOffset += (MBB->size() * 4);
+  }
+
+  // Second pass - check each loop instruction to see if it needs to
+  // be converted.
+  InstOffset = 0;
+  bool Changed = false;
+  RegScavenger RS;
+
+  // Loop over all the basic blocks.
+  for (MachineFunction::iterator MBB = MF.begin(), MBBe = MF.end();
+       MBB != MBBe; ++MBB) {
+    InstOffset = BlockToInstOffset[MBB];
+    RS.enterBasicBlock(MBB);
+
+    // Loop over all the instructions.
+    MachineBasicBlock::iterator MIE = MBB->end();
+    MachineBasicBlock::iterator MII = MBB->begin();
+    while (MII != MIE) {
+      if (isHardwareLoop(MII)) {
+        RS.forward(MII);
+        assert(MII->getOperand(0).isMBB() &&
+               "Expect a basic block as loop operand");
+        int diff = InstOffset - BlockToInstOffset[MII->getOperand(0).getMBB()];
+        diff = (diff > 0 ? diff : -diff);
+        if ((unsigned)diff > MAX_LOOP_DISTANCE) {
+          // Convert to explicity setting LC0 and SA0.
+          convertLoopInstr(MF, MII, RS);
+          MII = MBB->erase(MII);
+          Changed = true;
+        } else {
+          ++MII;
+        }
+      } else {
+        ++MII;
+      }
+      InstOffset += 4;
+    }
+  }
+
+  return Changed;
+
+}
+
+/// convertLoopInstr - convert a loop instruction to a sequence of instructions
+/// that set the lc and sa register explicitly.
+void HexagonFixupHwLoops::convertLoopInstr(MachineFunction &MF,
+                                           MachineBasicBlock::iterator &MII,
+                                           RegScavenger &RS) {
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  MachineBasicBlock *MBB = MII->getParent();
+  DebugLoc DL = MII->getDebugLoc();
+  unsigned Scratch = RS.scavengeRegister(Hexagon::IntRegsRegisterClass, MII, 0);
+
+  // First, set the LC0 with the trip count.
+  if (MII->getOperand(1).isReg()) {
+    // Trip count is a register
+    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+      .addReg(MII->getOperand(1).getReg());
+  } else {
+    // Trip count is an immediate.
+    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFRI), Scratch)
+      .addImm(MII->getOperand(1).getImm());
+    BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::LC0)
+      .addReg(Scratch);
+  }
+  // Then, set the SA0 with the loop start address.
+  BuildMI(*MBB, MII, DL, TII->get(Hexagon::CONST32_Label), Scratch)
+    .addMBB(MII->getOperand(0).getMBB());
+  BuildMI(*MBB, MII, DL, TII->get(Hexagon::TFCR), Hexagon::SA0).addReg(Scratch);
+}
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
new file mode 100644
index 000000000000..9df965efc14b
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -0,0 +1,1485 @@
+//===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Hexagon target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-isel"
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// HexagonDAGToDAGISel - Hexagon specific code to select Hexagon machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class HexagonDAGToDAGISel : public SelectionDAGISel {
+  /// Subtarget - Keep a pointer to the Hexagon Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const HexagonSubtarget &Subtarget;
+
+  // Keep a reference to HexagonTargetMachine.
+  HexagonTargetMachine& TM;
+  const HexagonInstrInfo *TII;
+
+public:
+  explicit HexagonDAGToDAGISel(HexagonTargetMachine &targetmachine)
+    : SelectionDAGISel(targetmachine),
+      Subtarget(targetmachine.getSubtarget<HexagonSubtarget>()),
+      TM(targetmachine),
+      TII(static_cast<const HexagonInstrInfo*>(TM.getInstrInfo())) {
+
+  }
+
+  SDNode *Select(SDNode *N);
+
+  // Complex Pattern Selectors.
+  bool SelectADDRri(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRriS11_0(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRriS11_1(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRriS11_2(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectMEMriS11_2(SDValue& Addr, SDValue &Base, SDValue &Offset);
+  bool SelectADDRriS11_3(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRrr(SDValue &Addr, SDValue &Base, SDValue &Offset);
+  bool SelectADDRriU6_0(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
+  bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
+
+  virtual const char *getPassName() const {
+    return "Hexagon DAG->DAG Pattern Instruction Selection";
+  }
+
+  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+  /// inline asm expressions.
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
+  bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
+
+  SDNode *SelectLoad(SDNode *N);
+  SDNode *SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl);
+  SDNode *SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl);
+  SDNode *SelectIndexedLoadZeroExtend64(LoadSDNode *LD, unsigned Opcode,
+                                        DebugLoc dl);
+  SDNode *SelectIndexedLoadSignExtend64(LoadSDNode *LD, unsigned Opcode,
+                                        DebugLoc dl);
+  SDNode *SelectBaseOffsetStore(StoreSDNode *ST, DebugLoc dl);
+  SDNode *SelectIndexedStore(StoreSDNode *ST, DebugLoc dl);
+  SDNode *SelectStore(SDNode *N);
+  SDNode *SelectSHL(SDNode *N);
+  SDNode *SelectSelect(SDNode *N);
+  SDNode *SelectTruncate(SDNode *N);
+  SDNode *SelectMul(SDNode *N);
+  SDNode *SelectZeroExtend(SDNode *N);
+  SDNode *SelectIntrinsicWOChain(SDNode *N);
+  SDNode *SelectConstant(SDNode *N);
+  SDNode *SelectAdd(SDNode *N);
+
+  // Include the pieces autogenerated from the target description.
+#include "HexagonGenDAGISel.inc"
+};
+}  // end anonymous namespace
+
+
+/// createHexagonISelDag - This pass converts a legalized DAG into a
+/// Hexagon-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM) {
+  return new HexagonDAGToDAGISel(TM);
+}
+
+static bool IsS11_0_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<11>(v);
+}
+
+
+static bool IsS11_1_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,1>(v);
+}
+
+
+static bool IsS11_2_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,2>(v);
+}
+
+
+static bool IsS11_3_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,3>(v);
+}
+
+
+static bool IsU6_0_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<6>(v);
+}
+
+
+static bool IsU6_1_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,1>(v);
+}
+
+
+static bool IsU6_2_Offset(SDNode * S) {
+    ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // u6 predicate - True if the immediate fits in a 6-bit unsigned extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,2>(v);
+}
+
+
+// Intrinsics that return a a predicate.
+static unsigned doesIntrinsicReturnPredicate(unsigned ID)
+{
+  switch (ID) {
+    default:
+      return 0;
+    case Intrinsic::hexagon_C2_cmpeq:
+    case Intrinsic::hexagon_C2_cmpgt:
+    case Intrinsic::hexagon_C2_cmpgtu:
+    case Intrinsic::hexagon_C2_cmpgtup:
+    case Intrinsic::hexagon_C2_cmpgtp:
+    case Intrinsic::hexagon_C2_cmpeqp:
+    case Intrinsic::hexagon_C2_bitsset:
+    case Intrinsic::hexagon_C2_bitsclr:
+    case Intrinsic::hexagon_C2_cmpeqi:
+    case Intrinsic::hexagon_C2_cmpgti:
+    case Intrinsic::hexagon_C2_cmpgtui:
+    case Intrinsic::hexagon_C2_cmpgei:
+    case Intrinsic::hexagon_C2_cmpgeui:
+    case Intrinsic::hexagon_C2_cmplt:
+    case Intrinsic::hexagon_C2_cmpltu:
+    case Intrinsic::hexagon_C2_bitsclri:
+    case Intrinsic::hexagon_C2_and:
+    case Intrinsic::hexagon_C2_or:
+    case Intrinsic::hexagon_C2_xor:
+    case Intrinsic::hexagon_C2_andn:
+    case Intrinsic::hexagon_C2_not:
+    case Intrinsic::hexagon_C2_orn:
+    case Intrinsic::hexagon_C2_pxfer_map:
+    case Intrinsic::hexagon_C2_any8:
+    case Intrinsic::hexagon_C2_all8:
+    case Intrinsic::hexagon_A2_vcmpbeq:
+    case Intrinsic::hexagon_A2_vcmpbgtu:
+    case Intrinsic::hexagon_A2_vcmpheq:
+    case Intrinsic::hexagon_A2_vcmphgt:
+    case Intrinsic::hexagon_A2_vcmphgtu:
+    case Intrinsic::hexagon_A2_vcmpweq:
+    case Intrinsic::hexagon_A2_vcmpwgt:
+    case Intrinsic::hexagon_A2_vcmpwgtu:
+    case Intrinsic::hexagon_C2_tfrrp:
+    case Intrinsic::hexagon_S2_tstbit_i:
+    case Intrinsic::hexagon_S2_tstbit_r:
+      return 1;
+  }
+}
+
+
+// Intrinsics that have predicate operands.
+static unsigned doesIntrinsicContainPredicate(unsigned ID)
+{
+  switch (ID) {
+    default:
+      return 0;
+    case Intrinsic::hexagon_C2_tfrpr:
+      return Hexagon::TFR_RsPd;
+    case Intrinsic::hexagon_C2_and:
+      return Hexagon::AND_pp;
+    case Intrinsic::hexagon_C2_xor:
+      return Hexagon::XOR_pp;
+    case Intrinsic::hexagon_C2_or:
+      return Hexagon::OR_pp;
+    case Intrinsic::hexagon_C2_not:
+      return Hexagon::NOT_p;
+    case Intrinsic::hexagon_C2_any8:
+      return Hexagon::ANY_pp;
+    case Intrinsic::hexagon_C2_all8:
+      return Hexagon::ALL_pp;
+    case Intrinsic::hexagon_C2_vitpack:
+      return Hexagon::VITPACK_pp;
+    case Intrinsic::hexagon_C2_mask:
+      return Hexagon::MASK_p;
+    case Intrinsic::hexagon_C2_mux:
+      return Hexagon::MUX_rr;
+
+      // Mapping hexagon_C2_muxir to MUX_pri.  This is pretty weird - but
+      // that's how it's mapped in q6protos.h.
+    case Intrinsic::hexagon_C2_muxir:
+      return Hexagon::MUX_ri;
+
+      // Mapping hexagon_C2_muxri to MUX_pir.  This is pretty weird - but
+      // that's how it's mapped in q6protos.h.
+    case Intrinsic::hexagon_C2_muxri:
+      return Hexagon::MUX_ir;
+
+    case Intrinsic::hexagon_C2_muxii:
+      return Hexagon::MUX_ii;
+    case Intrinsic::hexagon_C2_vmux:
+      return Hexagon::VMUX_prr64;
+    case Intrinsic::hexagon_S2_valignrb:
+      return Hexagon::VALIGN_rrp;
+    case Intrinsic::hexagon_S2_vsplicerb:
+      return Hexagon::VSPLICE_rrp;
+  }
+}
+
+
+static bool OffsetFitsS11(EVT MemType, int64_t Offset) {
+  if (MemType == MVT::i64 && isShiftedInt<11,3>(Offset)) {
+    return true;
+  }
+  if (MemType == MVT::i32 && isShiftedInt<11,2>(Offset)) {
+    return true;
+  }
+  if (MemType == MVT::i16 && isShiftedInt<11,1>(Offset)) {
+    return true;
+  }
+  if (MemType == MVT::i8 && isInt<11>(Offset)) {
+    return true;
+  }
+  return false;
+}
+
+
+//
+// Try to lower loads of GlobalAdresses into base+offset loads.  Custom
+// lowering for GlobalAddress nodes has already turned it into a
+// CONST32.
+//
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetLoad(LoadSDNode *LD, DebugLoc dl) {
+  SDValue Chain = LD->getChain();
+  SDNode* Const32 = LD->getBasePtr().getNode();
+  unsigned Opcode = 0;
+
+  if (Const32->getOpcode() == HexagonISD::CONST32 &&
+      ISD::isNormalLoad(LD)) {
+    SDValue Base = Const32->getOperand(0);
+    EVT LoadedVT = LD->getMemoryVT();
+    int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+    if (Offset != 0 && OffsetFitsS11(LoadedVT, Offset)) {
+      MVT PointerTy = TLI.getPointerTy();
+      const GlobalValue* GV =
+        cast<GlobalAddressSDNode>(Base)->getGlobal();
+      SDValue TargAddr =
+        CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+      SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+                                               dl, PointerTy,
+                                               TargAddr);
+      // Figure out base + offset opcode
+      if (LoadedVT == MVT::i64) Opcode = Hexagon::LDrid_indexed;
+      else if (LoadedVT == MVT::i32) Opcode = Hexagon::LDriw_indexed;
+      else if (LoadedVT == MVT::i16) Opcode = Hexagon::LDrih_indexed;
+      else if (LoadedVT == MVT::i8) Opcode = Hexagon::LDrib_indexed;
+      else assert (0 && "unknown memory type");
+
+      // Build indexed load.
+      SDValue TargetConstOff = CurDAG->getTargetConstant(Offset, PointerTy);
+      SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+                                              LD->getValueType(0),
+                                              MVT::Other,
+                                              SDValue(NewBase,0),
+                                              TargetConstOff,
+                                              Chain);
+      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+      MemOp[0] = LD->getMemOperand();
+      cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+      ReplaceUses(LD, Result);
+      return Result;
+    }
+  }
+
+  return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
+                                                           unsigned Opcode,
+                                                           DebugLoc dl)
+{
+  SDValue Chain = LD->getChain();
+  EVT LoadedVT = LD->getMemoryVT();
+  SDValue Base = LD->getBasePtr();
+  SDValue Offset = LD->getOffset();
+  SDNode *OffsetNode = Offset.getNode();
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+  SDValue N1 = LD->getOperand(1);
+  SDValue CPTmpN1_0;
+  SDValue CPTmpN1_1;
+  if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+      N1.getNode()->getValueType(0) == MVT::i32) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+      SDValue TargetConst = CurDAG->getTargetConstant(Val, MVT::i32);
+      SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
+                                                MVT::Other, Base, TargetConst,
+                                                Chain);
+      SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl, MVT::i64,
+                                                SDValue(Result_1, 0));
+      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+      MemOp[0] = LD->getMemOperand();
+      cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+      const SDValue Froms[] = { SDValue(LD, 0),
+                                SDValue(LD, 1),
+                                SDValue(LD, 2)
+      };
+      const SDValue Tos[]   = { SDValue(Result_2, 0),
+                                SDValue(Result_1, 1),
+                                SDValue(Result_1, 2)
+      };
+      ReplaceUses(Froms, Tos, 3);
+      return Result_2;
+    } 
+    SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+    SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                              MVT::Other, Base, TargetConst0,
+                                              Chain);
+    SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::SXTW, dl,
+                                                MVT::i64, SDValue(Result_1, 0));
+    SDNode* Result_3 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl,
+                                              MVT::i32, Base, TargetConstVal,
+                                                SDValue(Result_1, 1));
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2)
+    };
+    const SDValue Tos[]   = { SDValue(Result_2, 0),
+                              SDValue(Result_3, 0),
+                              SDValue(Result_1, 1)
+    };
+    ReplaceUses(Froms, Tos, 3);
+    return Result_2;
+  }
+  return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
+                                                           unsigned Opcode,
+                                                           DebugLoc dl)
+{
+  SDValue Chain = LD->getChain();
+  EVT LoadedVT = LD->getMemoryVT();
+  SDValue Base = LD->getBasePtr();
+  SDValue Offset = LD->getOffset();
+  SDNode *OffsetNode = Offset.getNode();
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+  SDValue N1 = LD->getOperand(1);
+  SDValue CPTmpN1_0;
+  SDValue CPTmpN1_1;
+  if (SelectADDRriS11_2(N1, CPTmpN1_0, CPTmpN1_1) &&
+      N1.getNode()->getValueType(0) == MVT::i32) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+      SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+      SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+      SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                                MVT::i32, MVT::Other, Base,
+                                                TargetConstVal, Chain);
+      SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+                                                TargetConst0);
+      SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+                                                MVT::i64, MVT::Other,
+                                                SDValue(Result_2,0),
+                                                SDValue(Result_1,0));
+      MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+      MemOp[0] = LD->getMemOperand();
+      cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+      const SDValue Froms[] = { SDValue(LD, 0),
+                                SDValue(LD, 1),
+                                SDValue(LD, 2)
+      };
+      const SDValue Tos[]   = { SDValue(Result_3, 0),
+                                SDValue(Result_1, 1),
+                                SDValue(Result_1, 2)
+      };
+      ReplaceUses(Froms, Tos, 3);
+      return Result_3;
+    }
+
+    // Generate an indirect load.
+    SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+    SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                              MVT::Other,
+                                              Base, TargetConst0, Chain);
+    SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+                                              TargetConst0);
+    SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+                                              MVT::i64, MVT::Other,
+                                              SDValue(Result_2,0),
+                                              SDValue(Result_1,0));
+    // Add offset to base.
+    SDNode* Result_4 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+                                              Base, TargetConstVal,
+                                              SDValue(Result_1, 1));
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2)
+    };
+    const SDValue Tos[]   = { SDValue(Result_3, 0), // Load value.
+                              SDValue(Result_4, 0), // New address.
+                              SDValue(Result_1, 1)
+    };
+    ReplaceUses(Froms, Tos, 3);
+    return Result_3;
+  }
+
+  return SelectCode(LD);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, DebugLoc dl) {
+  SDValue Chain = LD->getChain();
+  SDValue Base = LD->getBasePtr();
+  SDValue Offset = LD->getOffset();
+  SDNode *OffsetNode = Offset.getNode();
+  // Get the constant value.
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+  EVT LoadedVT = LD->getMemoryVT();
+  unsigned Opcode = 0;
+
+  // Check for zero ext loads.
+  bool zextval = (LD->getExtensionType() == ISD::ZEXTLOAD);
+
+  // Figure out the opcode.
+  if (LoadedVT == MVT::i64) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = Hexagon::POST_LDrid;
+    else
+      Opcode = Hexagon::LDrid;
+  } else if (LoadedVT == MVT::i32) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = Hexagon::POST_LDriw;
+    else
+      Opcode = Hexagon::LDriw;
+  } else if (LoadedVT == MVT::i16) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = zextval ? Hexagon::POST_LDriuh : Hexagon::POST_LDrih;
+    else
+      Opcode = zextval ? Hexagon::LDriuh : Hexagon::LDrih;
+  } else if (LoadedVT == MVT::i8) {
+    if (TII->isValidAutoIncImm(LoadedVT, Val))
+      Opcode = zextval ? Hexagon::POST_LDriub : Hexagon::POST_LDrib;
+    else
+      Opcode = zextval ? Hexagon::LDriub : Hexagon::LDrib;
+  } else
+    assert (0 && "unknown memory type");
+
+  // For zero ext i64 loads, we need to add combine instructions.
+  if (LD->getValueType(0) == MVT::i64 &&
+      LD->getExtensionType() == ISD::ZEXTLOAD) {
+    return SelectIndexedLoadZeroExtend64(LD, Opcode, dl);
+  }
+  if (LD->getValueType(0) == MVT::i64 &&
+             LD->getExtensionType() == ISD::SEXTLOAD) {
+    // Handle sign ext i64 loads.
+    return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
+  }
+  if (TII->isValidAutoIncImm(LoadedVT, Val)) {
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+    SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+                                            LD->getValueType(0),
+                                            MVT::i32, MVT::Other, Base,
+                                            TargetConstVal, Chain);
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2)
+    };
+    const SDValue Tos[]   = { SDValue(Result, 0),
+                              SDValue(Result, 1),
+                              SDValue(Result, 2)
+    };
+    ReplaceUses(Froms, Tos, 3);
+    return Result;
+  } else {
+    SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+    SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+    SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl,
+                                              LD->getValueType(0),
+                                              MVT::Other, Base, TargetConst0,
+                                              Chain);
+    SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+                                              Base, TargetConstVal,
+                                              SDValue(Result_1, 1));
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = LD->getMemOperand();
+    cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+    const SDValue Froms[] = { SDValue(LD, 0),
+                              SDValue(LD, 1),
+                              SDValue(LD, 2)
+    };
+    const SDValue Tos[]   = { SDValue(Result_1, 0),
+                              SDValue(Result_2, 0),
+                              SDValue(Result_1, 1)
+    };
+    ReplaceUses(Froms, Tos, 3);
+    return Result_1;
+  }
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
+  SDNode *result;
+  DebugLoc dl = N->getDebugLoc();
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+
+  // Handle indexed loads.
+  if (AM != ISD::UNINDEXED) {
+    result = SelectIndexedLoad(LD, dl);
+  } else {
+    result = SelectBaseOffsetLoad(LD, dl);
+  }
+
+  return result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, DebugLoc dl) {
+  SDValue Chain = ST->getChain();
+  SDValue Base = ST->getBasePtr();
+  SDValue Offset = ST->getOffset();
+  SDValue Value = ST->getValue();
+  SDNode *OffsetNode = Offset.getNode();
+  // Get the constant value.
+  int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
+  EVT StoredVT = ST->getMemoryVT();
+
+  // Offset value must be within representable range
+  // and must have correct alignment properties.
+  if (TII->isValidAutoIncImm(StoredVT, Val)) {
+    SDValue Ops[] = { Value, Base,
+                      CurDAG->getTargetConstant(Val, MVT::i32), Chain};
+    unsigned Opcode = 0;
+
+    // Figure out the post inc version of opcode.
+    if (StoredVT == MVT::i64) Opcode = Hexagon::POST_STdri;
+    else if (StoredVT == MVT::i32) Opcode = Hexagon::POST_STwri;
+    else if (StoredVT == MVT::i16) Opcode = Hexagon::POST_SThri;
+    else if (StoredVT == MVT::i8) Opcode = Hexagon::POST_STbri;
+    else assert (0 && "unknown memory type");
+
+    // Build post increment store.
+    SDNode* Result = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
+                                            MVT::Other, Ops, 4);
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+    MemOp[0] = ST->getMemOperand();
+    cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+
+    ReplaceUses(ST, Result);
+    ReplaceUses(SDValue(ST,1), SDValue(Result,1));
+    return Result;
+  }
+
+  // Note: Order of operands matches the def of instruction:
+  // def STrid : STInst<(outs), (ins MEMri:$addr, DoubleRegs:$src1), ...
+  // and it differs for POST_ST* for instance.
+  SDValue Ops[] = { Base, CurDAG->getTargetConstant(0, MVT::i32), Value,
+                    Chain};
+  unsigned Opcode = 0;
+
+  // Figure out the opcode.
+  if (StoredVT == MVT::i64) Opcode = Hexagon::STrid;
+  else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw;
+  else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih;
+  else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib;
+  else assert (0 && "unknown memory type");
+
+  // Build regular store.
+  SDValue TargetConstVal = CurDAG->getTargetConstant(Val, MVT::i32);
+  SDNode* Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops,
+                                            4);
+  // Build splitted incriment instruction.
+  SDNode* Result_2 = CurDAG->getMachineNode(Hexagon::ADD_ri, dl, MVT::i32,
+                                            Base,
+                                            TargetConstVal,
+                                            SDValue(Result_1, 0));
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = ST->getMemOperand();
+  cast<MachineSDNode>(Result_1)->setMemRefs(MemOp, MemOp + 1);
+
+  ReplaceUses(SDValue(ST,0), SDValue(Result_2,0));
+  ReplaceUses(SDValue(ST,1), SDValue(Result_1,0));
+  return Result_2;
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectBaseOffsetStore(StoreSDNode *ST,
+                                                   DebugLoc dl) {
+  SDValue Chain = ST->getChain();
+  SDNode* Const32 = ST->getBasePtr().getNode();
+  SDValue Value = ST->getValue();
+  unsigned Opcode = 0;
+
+  // Try to lower stores of GlobalAdresses into indexed stores.  Custom
+  // lowering for GlobalAddress nodes has already turned it into a
+  // CONST32.  Avoid truncating stores for the moment.  Post-inc stores
+  // do the same.  Don't think there's a reason for it, so will file a
+  // bug to fix.
+  if ((Const32->getOpcode() == HexagonISD::CONST32) &&
+      !(Value.getValueType() == MVT::i64 && ST->isTruncatingStore())) {
+    SDValue Base = Const32->getOperand(0);
+    if (Base.getOpcode() == ISD::TargetGlobalAddress) {
+      EVT StoredVT = ST->getMemoryVT();
+      int64_t Offset = cast<GlobalAddressSDNode>(Base)->getOffset();
+      if (Offset != 0 && OffsetFitsS11(StoredVT, Offset)) {
+        MVT PointerTy = TLI.getPointerTy();
+        const GlobalValue* GV =
+          cast<GlobalAddressSDNode>(Base)->getGlobal();
+        SDValue TargAddr =
+          CurDAG->getTargetGlobalAddress(GV, dl, PointerTy, 0);
+        SDNode* NewBase = CurDAG->getMachineNode(Hexagon::CONST32_set,
+                                                 dl, PointerTy,
+                                                 TargAddr);
+
+        // Figure out base + offset opcode
+        if (StoredVT == MVT::i64) Opcode = Hexagon::STrid_indexed;
+        else if (StoredVT == MVT::i32) Opcode = Hexagon::STriw_indexed;
+        else if (StoredVT == MVT::i16) Opcode = Hexagon::STrih_indexed;
+        else if (StoredVT == MVT::i8) Opcode = Hexagon::STrib_indexed;
+        else assert (0 && "unknown memory type");
+
+        SDValue Ops[] = {SDValue(NewBase,0),
+                         CurDAG->getTargetConstant(Offset,PointerTy),
+                         Value, Chain};
+        // build indexed store
+        SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
+                                                MVT::Other, Ops, 4);
+        MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+        MemOp[0] = ST->getMemOperand();
+        cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+        ReplaceUses(ST, Result);
+        return Result;
+      }
+    }
+  }
+
+  return SelectCode(ST);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectStore(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+  ISD::MemIndexedMode AM = ST->getAddressingMode();
+
+  // Handle indexed stores.
+  if (AM != ISD::UNINDEXED) {
+    return SelectIndexedStore(ST, dl);
+  }
+   
+  return SelectBaseOffsetStore(ST, dl);
+}
+
+SDNode *HexagonDAGToDAGISel::SelectMul(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+
+  //
+  // %conv.i = sext i32 %tmp1 to i64
+  // %conv2.i = sext i32 %add to i64
+  // %mul.i = mul nsw i64 %conv2.i, %conv.i
+  //
+  //   --- match with the following ---
+  //
+  // %mul.i = mpy (%tmp1, %add)
+  //
+
+  if (N->getValueType(0) == MVT::i64) {
+    // Shifting a i64 signed multiply.
+    SDValue MulOp0 = N->getOperand(0);
+    SDValue MulOp1 = N->getOperand(1);
+
+    SDValue OP0;
+    SDValue OP1;
+
+    // Handle sign_extend and sextload.
+    if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+      SDValue Sext0 = MulOp0.getOperand(0);
+      if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+        SelectCode(N);
+      }
+
+      OP0 = Sext0;
+    } else if (MulOp0.getOpcode() == ISD::LOAD) {
+      LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+      if (LD->getMemoryVT() != MVT::i32 ||
+          LD->getExtensionType() != ISD::SEXTLOAD ||
+          LD->getAddressingMode() != ISD::UNINDEXED) {
+        SelectCode(N);
+      }
+
+      SDValue Chain = LD->getChain();
+      SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+      OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+                                            MVT::Other,
+                                            LD->getBasePtr(), TargetConst0,
+                                            Chain), 0);
+    } else {
+      return SelectCode(N);
+    }
+
+    // Same goes for the second operand.
+    if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+      SDValue Sext1 = MulOp1.getOperand(0);
+      if (Sext1.getNode()->getValueType(0) != MVT::i32) {
+        return SelectCode(N);
+      }
+
+      OP1 = Sext1;
+    } else if (MulOp1.getOpcode() == ISD::LOAD) {
+      LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+      if (LD->getMemoryVT() != MVT::i32 ||
+          LD->getExtensionType() != ISD::SEXTLOAD ||
+          LD->getAddressingMode() != ISD::UNINDEXED) {
+        return SelectCode(N);
+      }
+
+      SDValue Chain = LD->getChain();
+      SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+      OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+                                            MVT::Other,
+                                            LD->getBasePtr(), TargetConst0,
+                                            Chain), 0);
+    } else {
+      return SelectCode(N);
+    }
+
+    // Generate a mpy instruction.
+    SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY64, dl, MVT::i64,
+                                            OP0, OP1);
+    ReplaceUses(N, Result);
+    return Result;
+  }
+
+  return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSelect(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue N0 = N->getOperand(0);
+  if (N0.getOpcode() == ISD::SETCC) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+      SDValue N000 = N00.getOperand(0);
+      SDValue N001 = N00.getOperand(1);
+      if (cast<VTSDNode>(N001)->getVT() == MVT::i16) {
+        SDValue N01 = N0.getOperand(1);
+        SDValue N02 = N0.getOperand(2);
+
+        // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+        // i16:Other),IntRegs:i32:$src1, SETLT:Other),IntRegs:i32:$src1,
+        // IntRegs:i32:$src2)
+        // Emits: (MAXh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+        // Pattern complexity = 9  cost = 1  size = 0.
+        if (cast<CondCodeSDNode>(N02)->get() == ISD::SETLT) {
+          SDValue N1 = N->getOperand(1);
+          if (N01 == N1) {
+            SDValue N2 = N->getOperand(2);
+            if (N000 == N2 &&
+                N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+                N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+              SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+                                                        MVT::i32, N000);
+              SDNode *Result = CurDAG->getMachineNode(Hexagon::MAXw_rr, dl,
+                                                      MVT::i32,
+                                                      SDValue(SextNode, 0),
+                                                      N1);
+              ReplaceUses(N, Result);
+              return Result;
+            }
+          }
+        }
+
+        // Pattern: (select:i32 (setcc:i1 (sext_inreg:i32 IntRegs:i32:$src2,
+        // i16:Other), IntRegs:i32:$src1, SETGT:Other), IntRegs:i32:$src1,
+        // IntRegs:i32:$src2)
+        // Emits: (MINh_rr:i32 IntRegs:i32:$src1, IntRegs:i32:$src2)
+        // Pattern complexity = 9  cost = 1  size = 0.
+        if (cast<CondCodeSDNode>(N02)->get() == ISD::SETGT) {
+          SDValue N1 = N->getOperand(1);
+          if (N01 == N1) {
+            SDValue N2 = N->getOperand(2);
+            if (N000 == N2 &&
+                N0.getNode()->getValueType(N0.getResNo()) == MVT::i1 &&
+                N00.getNode()->getValueType(N00.getResNo()) == MVT::i32) {
+              SDNode *SextNode = CurDAG->getMachineNode(Hexagon::SXTH, dl,
+                                                        MVT::i32, N000);
+              SDNode *Result = CurDAG->getMachineNode(Hexagon::MINw_rr, dl,
+                                                      MVT::i32,
+                                                      SDValue(SextNode, 0),
+                                                      N1);
+              ReplaceUses(N, Result);
+              return Result;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectTruncate(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Shift = N->getOperand(0);
+
+  //
+  // %conv.i = sext i32 %tmp1 to i64
+  // %conv2.i = sext i32 %add to i64
+  // %mul.i = mul nsw i64 %conv2.i, %conv.i
+  // %shr5.i = lshr i64 %mul.i, 32
+  // %conv3.i = trunc i64 %shr5.i to i32
+  //
+  //   --- match with the following ---
+  //
+  // %conv3.i = mpy (%tmp1, %add)
+  //
+  // Trunc to i32.
+  if (N->getValueType(0) == MVT::i32) {
+    // Trunc from i64.
+    if (Shift.getNode()->getValueType(0) == MVT::i64) {
+      // Trunc child is logical shift right.
+      if (Shift.getOpcode() != ISD::SRL) {
+        return SelectCode(N);
+      }
+
+      SDValue ShiftOp0 = Shift.getOperand(0);
+      SDValue ShiftOp1 = Shift.getOperand(1);
+
+      // Shift by const 32
+      if (ShiftOp1.getOpcode() != ISD::Constant) {
+        return SelectCode(N);
+      }
+
+      int32_t ShiftConst =
+        cast<ConstantSDNode>(ShiftOp1.getNode())->getSExtValue();
+      if (ShiftConst != 32) {
+        return SelectCode(N);
+      }
+
+      // Shifting a i64 signed multiply
+      SDValue Mul = ShiftOp0;
+      if (Mul.getOpcode() != ISD::MUL) {
+        return SelectCode(N);
+      }
+
+      SDValue MulOp0 = Mul.getOperand(0);
+      SDValue MulOp1 = Mul.getOperand(1);
+
+      SDValue OP0;
+      SDValue OP1;
+
+      // Handle sign_extend and sextload
+      if (MulOp0.getOpcode() == ISD::SIGN_EXTEND) {
+        SDValue Sext0 = MulOp0.getOperand(0);
+        if (Sext0.getNode()->getValueType(0) != MVT::i32) {
+          return SelectCode(N);
+        }
+
+        OP0 = Sext0;
+      } else if (MulOp0.getOpcode() == ISD::LOAD) {
+        LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
+        if (LD->getMemoryVT() != MVT::i32 ||
+            LD->getExtensionType() != ISD::SEXTLOAD ||
+            LD->getAddressingMode() != ISD::UNINDEXED) {
+          return SelectCode(N);
+        }
+
+        SDValue Chain = LD->getChain();
+        SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+        OP0 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+                                              MVT::Other,
+                                              LD->getBasePtr(),
+                                              TargetConst0, Chain), 0);
+      } else {
+        return SelectCode(N);
+      }
+
+      // Same goes for the second operand.
+      if (MulOp1.getOpcode() == ISD::SIGN_EXTEND) {
+        SDValue Sext1 = MulOp1.getOperand(0);
+        if (Sext1.getNode()->getValueType(0) != MVT::i32)
+          return SelectCode(N);
+
+        OP1 = Sext1;
+      } else if (MulOp1.getOpcode() == ISD::LOAD) {
+        LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
+        if (LD->getMemoryVT() != MVT::i32 ||
+            LD->getExtensionType() != ISD::SEXTLOAD ||
+            LD->getAddressingMode() != ISD::UNINDEXED) {
+          return SelectCode(N);
+        }
+
+        SDValue Chain = LD->getChain();
+        SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+        OP1 = SDValue (CurDAG->getMachineNode(Hexagon::LDriw, dl, MVT::i32,
+                                              MVT::Other,
+                                              LD->getBasePtr(),
+                                              TargetConst0, Chain), 0);
+      } else {
+        return SelectCode(N);
+      }
+
+      // Generate a mpy instruction.
+      SDNode *Result = CurDAG->getMachineNode(Hexagon::MPY, dl, MVT::i32,
+                                              OP0, OP1);
+      ReplaceUses(N, Result);
+      return Result;
+    }
+  }
+
+  return SelectCode(N);
+}
+
+
+SDNode *HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getValueType(0) == MVT::i32) {
+    SDValue Shl_0 = N->getOperand(0);
+    SDValue Shl_1 = N->getOperand(1);
+    // RHS is const.
+    if (Shl_1.getOpcode() == ISD::Constant) {
+      if (Shl_0.getOpcode() == ISD::MUL) {
+        SDValue Mul_0 = Shl_0.getOperand(0); // Val
+        SDValue Mul_1 = Shl_0.getOperand(1); // Const
+        // RHS of mul is const.
+        if (Mul_1.getOpcode() == ISD::Constant) {
+          int32_t ShlConst =
+            cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+          int32_t MulConst =
+            cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
+          int32_t ValConst = MulConst << ShlConst;
+          SDValue Val = CurDAG->getTargetConstant(ValConst,
+                                                  MVT::i32);
+          if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
+            if (isInt<9>(CN->getSExtValue())) {
+              SDNode* Result =
+                CurDAG->getMachineNode(Hexagon::MPYI_ri, dl,
+                                       MVT::i32, Mul_0, Val);
+              ReplaceUses(N, Result);
+              return Result;
+            }
+
+        }
+      } else if (Shl_0.getOpcode() == ISD::SUB) {
+        SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
+        SDValue Sub_1 = Shl_0.getOperand(1); // Val
+        if (Sub_0.getOpcode() == ISD::Constant) {
+          int32_t SubConst =
+            cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue();
+          if (SubConst == 0) {
+            if (Sub_1.getOpcode() == ISD::SHL) {
+              SDValue Shl2_0 = Sub_1.getOperand(0); // Val
+              SDValue Shl2_1 = Sub_1.getOperand(1); // Const
+              if (Shl2_1.getOpcode() == ISD::Constant) {
+                int32_t ShlConst =
+                  cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
+                int32_t Shl2Const =
+                  cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
+                int32_t ValConst = 1 << (ShlConst+Shl2Const);
+                SDValue Val = CurDAG->getTargetConstant(-ValConst, MVT::i32);
+                if (ConstantSDNode *CN =
+                    dyn_cast<ConstantSDNode>(Val.getNode()))
+                  if (isInt<9>(CN->getSExtValue())) {
+                    SDNode* Result =
+                      CurDAG->getMachineNode(Hexagon::MPYI_ri, dl, MVT::i32,
+                                             Shl2_0, Val);
+                    ReplaceUses(N, Result);
+                    return Result;
+                  }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return SelectCode(N);
+}
+
+
+//
+// If there is an zero_extend followed an intrinsic in DAG (this means - the
+// result of the intrinsic is predicate); convert the zero_extend to
+// transfer instruction.
+//
+// Zero extend -> transfer is lowered here. Otherwise, zero_extend will be
+// converted into a MUX as predicate registers defined as 1 bit in the
+// compiler. Architecture defines them as 8-bit registers.
+// We want to preserve all the lower 8-bits and, not just 1 LSB bit.
+//
+SDNode *HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDNode *IsIntrinsic = N->getOperand(0).getNode();
+  if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
+    unsigned ID =
+      cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue();
+    if (doesIntrinsicReturnPredicate(ID)) {
+      // Now we need to differentiate target data types.
+      if (N->getValueType(0) == MVT::i64) {
+        // Convert the zero_extend to Rs = Pd followed by COMBINE_rr(0,Rs).
+        SDValue TargetConst0 = CurDAG->getTargetConstant(0, MVT::i32);
+        SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+                                                  MVT::i32,
+                                                  SDValue(IsIntrinsic, 0));
+        SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::TFRI, dl,
+                                                  MVT::i32,
+                                                  TargetConst0);
+        SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::COMBINE_rr, dl,
+                                                  MVT::i64, MVT::Other,
+                                                  SDValue(Result_2, 0),
+                                                  SDValue(Result_1, 0));
+        ReplaceUses(N, Result_3);
+        return Result_3;
+      }
+      if (N->getValueType(0) == MVT::i32) {
+        // Convert the zero_extend to Rs = Pd
+        SDNode* RsPd = CurDAG->getMachineNode(Hexagon::TFR_RsPd, dl,
+                                              MVT::i32,
+                                              SDValue(IsIntrinsic, 0));
+        ReplaceUses(N, RsPd);
+        return RsPd;
+      }
+      llvm_unreachable("Unexpected value type");
+    }
+  }
+  return SelectCode(N);
+}
+
+
+//
+// Checking for intrinsics which have predicate registers as operand(s)
+// and lowering to the actual intrinsic.
+//
+SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  unsigned ID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+  unsigned IntrinsicWithPred = doesIntrinsicContainPredicate(ID);
+
+  // We are concerned with only those intrinsics that have predicate registers
+  // as at least one of the operands.
+  if (IntrinsicWithPred) {
+    SmallVector<SDValue, 8> Ops;
+    const MCInstrDesc &MCID = TII->get(IntrinsicWithPred);
+    const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+    // Iterate over all the operands of the intrinsics.
+    // For PredRegs, do the transfer.
+    // For Double/Int Regs, just preserve the value
+    // For immediates, lower it.
+    for (unsigned i = 1; i < N->getNumOperands(); ++i) {
+      SDNode *Arg = N->getOperand(i).getNode();
+      const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
+
+      if (RC == Hexagon::IntRegsRegisterClass ||
+          RC == Hexagon::DoubleRegsRegisterClass) {
+        Ops.push_back(SDValue(Arg, 0));
+      } else if (RC == Hexagon::PredRegsRegisterClass) {
+        // Do the transfer.
+        SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+                                              SDValue(Arg, 0));
+        Ops.push_back(SDValue(PdRs,0));
+      } else if (RC == NULL && (dyn_cast<ConstantSDNode>(Arg) != NULL)) {
+        // This is immediate operand. Lower it here making sure that we DO have
+        // const SDNode for immediate value.
+        int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue();
+        SDValue SDVal = CurDAG->getTargetConstant(Val, MVT::i32);
+        Ops.push_back(SDVal);
+      } else {
+        llvm_unreachable("Unimplemented");
+      }
+    }
+    EVT ReturnValueVT = N->getValueType(0);
+    SDNode *Result = CurDAG->getMachineNode(IntrinsicWithPred, dl,
+                                            ReturnValueVT,
+                                            Ops.data(), Ops.size());
+    ReplaceUses(N, Result);
+    return Result;
+  }
+  return SelectCode(N);
+}
+
+
+//
+// Map predicate true (encoded as -1 in LLVM) to a XOR.
+//
+SDNode *HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getValueType(0) == MVT::i1) {
+    SDNode* Result;
+    int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    if (Val == -1) {
+      // Create the IntReg = 1 node.
+      SDNode* IntRegTFR =
+        CurDAG->getMachineNode(Hexagon::TFRI, dl, MVT::i32,
+                               CurDAG->getTargetConstant(0, MVT::i32));
+
+      // Pd = IntReg
+      SDNode* Pd = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
+                                          SDValue(IntRegTFR, 0));
+
+      // not(Pd)
+      SDNode* NotPd = CurDAG->getMachineNode(Hexagon::NOT_p, dl, MVT::i1,
+                                             SDValue(Pd, 0));
+
+      // xor(not(Pd))
+      Result = CurDAG->getMachineNode(Hexagon::XOR_pp, dl, MVT::i1,
+                                      SDValue(Pd, 0), SDValue(NotPd, 0));
+
+      // We have just built:
+      // Rs = Pd
+      // Pd = xor(not(Pd), Pd)
+
+      ReplaceUses(N, Result);
+      return Result;
+    }
+  }
+
+  return SelectCode(N);
+}
+
+
+//
+// Map add followed by a asr -> asr +=.
+//
+SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getValueType(0) != MVT::i32) {
+    return SelectCode(N);
+  }
+  // Identify nodes of the form: add(asr(...)).
+  SDNode* Src1 = N->getOperand(0).getNode();
+  if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse()
+      || Src1->getValueType(0) != MVT::i32) {
+    return SelectCode(N);
+  }
+
+  // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
+  // Rd and Rd' are assigned to the same register
+  SDNode* Result = CurDAG->getMachineNode(Hexagon::ASR_rr_acc, dl, MVT::i32,
+                                          N->getOperand(1),
+                                          Src1->getOperand(0),
+                                          Src1->getOperand(1));
+  ReplaceUses(N, Result);
+
+  return Result;
+}
+
+
+SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+
+  switch (N->getOpcode()) {
+  case ISD::Constant:
+    return SelectConstant(N);
+
+  case ISD::ADD:
+    return SelectAdd(N);
+
+  case ISD::SHL:
+    return SelectSHL(N);
+
+  case ISD::LOAD:
+    return SelectLoad(N);
+
+  case ISD::STORE:
+    return SelectStore(N);
+
+  case ISD::SELECT:
+    return SelectSelect(N);
+
+  case ISD::TRUNCATE:
+    return SelectTruncate(N);
+
+  case ISD::MUL:
+    return SelectMul(N);
+
+  case ISD::ZERO_EXTEND:
+    return SelectZeroExtend(N);
+
+  case ISD::INTRINSIC_WO_CHAIN:
+    return SelectIntrinsicWOChain(N);
+  }
+
+  return SelectCode(N);
+}
+
+
+//
+// Hexagon_TODO: Five functions for ADDRri?! Surely there must be a better way
+// to define these instructions.
+//
+bool HexagonDAGToDAGISel::SelectADDRri(SDValue& Addr, SDValue &Base,
+                                       SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_0(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsS11_0_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsS11_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_1(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsS11_1_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsS11_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_2(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsS11_2_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsS11_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_0(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsU6_0_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsU6_0_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_1(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsU6_1_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsU6_1_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriU6_2(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsU6_2_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsU6_2_Offset(Offset.getNode()));
+}
+
+
+bool HexagonDAGToDAGISel::SelectMEMriS11_2(SDValue& Addr, SDValue &Base,
+                                           SDValue &Offset) {
+
+  if (Addr.getOpcode() != ISD::ADD) {
+    return(SelectADDRriS11_2(Addr, Base, Offset));
+  }
+
+  return SelectADDRriS11_2(Addr, Base, Offset);
+}
+
+
+bool HexagonDAGToDAGISel::SelectADDRriS11_3(SDValue& Addr, SDValue &Base,
+                                            SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return (IsS11_3_Offset(Offset.getNode()));
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return (IsS11_3_Offset(Offset.getNode()));
+}
+
+bool HexagonDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1,
+                                       SDValue &R2) {
+  if (Addr.getOpcode() == ISD::FrameIndex) return false;
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      if (isInt<13>(CN->getSExtValue()))
+        return false;  // Let the reg+imm pattern catch this!
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+    return true;
+  }
+
+  R1 = Addr;
+
+  return true;
+}
+
+
+// Handle generic address case. It is accessed from inlined asm =m constraints,
+// which could have any kind of pointer.
+bool HexagonDAGToDAGISel::SelectAddr(SDNode *Op, SDValue Addr,
+                                          SDValue &Base, SDValue &Offset) {
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // Direct calls.
+
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    Base = Addr.getOperand(0);
+    Offset = Addr.getOperand(1);
+    return true;
+  }
+
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+
+bool HexagonDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  SDValue Op0, Op1;
+
+  switch (ConstraintCode) {
+  case 'o':   // Offsetable.
+  case 'v':   // Not offsetable.
+  default: return true;
+  case 'm':   // Memory.
+    if (!SelectAddr(Op.getNode(), Op, Op0, Op1))
+      return true;
+    break;
+  }
+
+  OutOps.push_back(Op0);
+  OutOps.push_back(Op1);
+  return false;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
new file mode 100644
index 000000000000..d6da0d0911b9
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -0,0 +1,1496 @@
+//===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Hexagon uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonISelLowering.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+const unsigned Hexagon_MAX_RET_SIZE = 64;
+
+static cl::opt<bool>
+EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
+               cl::desc("Control jump table emission on Hexagon target"));
+
+int NumNamedVarArgParams = -1;
+
+// Implement calling convention for Hexagon.
+static bool
+CC_Hexagon(unsigned ValNo, MVT ValVT,
+           MVT LocVT, CCValAssign::LocInfo LocInfo,
+           ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon32(unsigned ValNo, MVT ValVT,
+             MVT LocVT, CCValAssign::LocInfo LocInfo,
+             ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon64(unsigned ValNo, MVT ValVT,
+             MVT LocVT, CCValAssign::LocInfo LocInfo,
+             ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+              MVT LocVT, CCValAssign::LocInfo LocInfo,
+              ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+                MVT LocVT, CCValAssign::LocInfo LocInfo,
+                ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+                MVT LocVT, CCValAssign::LocInfo LocInfo,
+                ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+static bool
+CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
+            MVT LocVT, CCValAssign::LocInfo LocInfo,
+            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  // NumNamedVarArgParams can not be zero for a VarArg function.
+  assert ( (NumNamedVarArgParams > 0) &&
+           "NumNamedVarArgParams is not bigger than zero.");
+
+  if ( (int)ValNo < NumNamedVarArgParams ) {
+    // Deal with named arguments.
+    return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
+  }
+
+  // Deal with un-named arguments.
+  unsigned ofst;
+  if (ArgFlags.isByVal()) {
+    // If pass-by-value, the size allocated on stack is decided
+    // by ArgFlags.getByValSize(), not by the size of LocVT.
+    assert ((ArgFlags.getByValSize() > 8) &&
+            "ByValSize must be bigger than 8 bytes");
+    ofst = State.AllocateStack(ArgFlags.getByValSize(), 4);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::i32) {
+    ofst = State.AllocateStack(4, 4);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::i64) {
+    ofst = State.AllocateStack(8, 8);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  llvm_unreachable(0);
+}
+
+
+static bool
+CC_Hexagon (unsigned ValNo, MVT ValVT,
+            MVT LocVT, CCValAssign::LocInfo LocInfo,
+            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  if (ArgFlags.isByVal()) {
+    // Passed on stack.
+    assert ((ArgFlags.getByValSize() > 8) &&
+            "ByValSize must be bigger than 8 bytes");
+    unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 4);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+
+  if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    ValVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+
+  if (LocVT == MVT::i32) {
+    if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+      return false;
+  }
+
+  if (LocVT == MVT::i64) {
+    if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+      return false;
+  }
+
+  return true;  // CC didn't match.
+}
+
+
+static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
+                         MVT LocVT, CCValAssign::LocInfo LocInfo,
+                         ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  static const uint16_t RegList[] = {
+    Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+    Hexagon::R5
+  };
+  if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return false;
+  }
+
+  unsigned Offset = State.AllocateStack(4, 4);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
+                         MVT LocVT, CCValAssign::LocInfo LocInfo,
+                         ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return false;
+  }
+
+  static const uint16_t RegList1[] = {
+    Hexagon::D1, Hexagon::D2
+  };
+  static const uint16_t RegList2[] = {
+    Hexagon::R1, Hexagon::R3
+  };
+  if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return false;
+  }
+
+  unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
+                          MVT LocVT, CCValAssign::LocInfo LocInfo,
+                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+
+  if (LocVT == MVT::i1 ||
+      LocVT == MVT::i8 ||
+      LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    ValVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+
+  if (LocVT == MVT::i32) {
+    if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+    return false;
+  }
+
+  if (LocVT == MVT::i64) {
+    if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+    return false;
+  }
+
+  return true;  // CC didn't match.
+}
+
+static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
+                            MVT LocVT, CCValAssign::LocInfo LocInfo,
+                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  if (LocVT == MVT::i32) {
+    if (unsigned Reg = State.AllocateReg(Hexagon::R0)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+  }
+
+  unsigned Offset = State.AllocateStack(4, 4);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
+                            MVT LocVT, CCValAssign::LocInfo LocInfo,
+                            ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  if (LocVT == MVT::i64) {
+    if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+  }
+
+  unsigned Offset = State.AllocateStack(8, 8);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+SDValue
+HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
+const {
+  return SDValue();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size".  Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.  Sometimes what we are copying is the end of a
+/// larger object, the part that does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+                          DebugLoc dl) {
+
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+                       /*isVolatile=*/false, /*AlwaysInline=*/false,
+                       MachinePointerInfo(), MachinePointerInfo());
+}
+
+
+// LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
+// passed by value, the function prototype is modified to return void and
+// the value is stored in memory pointed by a pointer passed by caller.
+SDValue
+HexagonTargetLowering::LowerReturn(SDValue Chain,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   const SmallVectorImpl<SDValue> &OutVals,
+                                   DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  // Analyze return values of ISD::RET
+  CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode())
+    return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+  return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+
+
+/// LowerCallResult - Lower the result values of an ISD::CALL into the
+/// appropriate copies out of appropriate physical registers.  This assumes that
+/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+/// being lowered. Returns a SDNode with the same number of values as the
+/// ISD::CALL.
+SDValue
+HexagonTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                       CallingConv::ID CallConv, bool isVarArg,
+                                       const
+                                       SmallVectorImpl<ISD::InputArg> &Ins,
+                                       DebugLoc dl, SelectionDAG &DAG,
+                                       SmallVectorImpl<SDValue> &InVals,
+                                       const SmallVectorImpl<SDValue> &OutVals,
+                                       SDValue Callee) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl,
+                               RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+/// LowerCall - Functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+SDValue
+HexagonTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 bool doesNotRet, bool &isTailCall,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 const SmallVectorImpl<ISD::InputArg> &Ins,
+                                 DebugLoc dl, SelectionDAG &DAG,
+                                 SmallVectorImpl<SDValue> &InVals) const {
+
+  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  // Check for varargs.
+  NumNamedVarArgParams = -1;
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
+  {
+    const Function* CalleeFn = NULL;
+    Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32);
+    if ((CalleeFn = dyn_cast<Function>(GA->getGlobal())))
+    {
+      // If a function has zero args and is a vararg function, that's
+      // disallowed so it must be an undeclared function.  Do not assume
+      // varargs if the callee is undefined.
+      if (CalleeFn->isVarArg() &&
+          CalleeFn->getFunctionType()->getNumParams() != 0) {
+        NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams();
+      }
+    }
+  }
+
+  if (NumNamedVarArgParams > 0)
+    CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
+  else
+    CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
+
+
+  if(isTailCall) {
+    bool StructAttrFlag =
+      DAG.getMachineFunction().getFunction()->hasStructRetAttr();
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                                                   isVarArg, IsStructRet,
+                                                   StructAttrFlag,
+                                                   Outs, OutVals, Ins, DAG);
+    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i){
+      CCValAssign &VA = ArgLocs[i];
+      if (VA.isMemLoc()) {
+        isTailCall = false;
+        break;
+      }
+    }
+    if (isTailCall) {
+      DEBUG(dbgs () << "Eligible for Tail Call\n");
+    } else {
+      DEBUG(dbgs () <<
+            "Argument must be passed on stack. Not eligible for Tail Call\n");
+    }
+  }
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+  SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  SDValue StackPtr =
+    DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(),
+                       getPointerTy());
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default:
+        // Loc info must be one of Full, SExt, ZExt, or AExt.
+        llvm_unreachable("Unknown loc info!");
+      case CCValAssign::Full:
+        break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    if (VA.isMemLoc()) {
+      unsigned LocMemOffset = VA.getLocMemOffset();
+      SDValue PtrOff = DAG.getConstant(LocMemOffset, StackPtr.getValueType());
+      PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+
+      if (Flags.isByVal()) {
+        // The argument is a struct passed by value. According to LLVM, "Arg"
+        // is is pointer.
+        MemOpChains.push_back(CreateCopyOfByValArgument(Arg, PtrOff, Chain,
+                                                        Flags, DAG, dl));
+      } else {
+        // The argument is not passed by value. "Arg" is a buildin type. It is
+        // not a pointer.
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                           MachinePointerInfo(),false, false,
+                                           0));
+      }
+      continue;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector.
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store
+  // nodes are independent of each other.
+  if (!MemOpChains.empty()) {
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0],
+                        MemOpChains.size());
+  }
+
+  if (!isTailCall)
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+                                                        getPointerTy(), true));
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emited instructions must be
+  // stuck together.
+  SDValue InFlag;
+  if (!isTailCall) {
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+  }
+
+  // For tail calls lower the arguments to the 'real' stack slot.
+  if (isTailCall) {
+    // Force all the incoming stack arguments to be loaded from the stack
+    // before any new outgoing arguments are stored to the stack, because the
+    // outgoing stack slots may alias the incoming argument stack slots, and
+    // the alias isn't otherwise explicit. This is slightly more conservative
+    // than necessary, because it means that each store effectively depends
+    // on every argument instead of just those arguments it would clobber.
+    //
+    // Do not flag preceeding copytoreg stuff together with the following stuff.
+    InFlag = SDValue();
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+    InFlag =SDValue();
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  if (flag_aligned_memcpy) {
+    const char *MemcpyName =
+      "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+    Callee =
+      DAG.getTargetExternalSymbol(MemcpyName, getPointerTy());
+    flag_aligned_memcpy = false;
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+  } else if (ExternalSymbolSDNode *S =
+             dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+  }
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+  }
+
+  if (InFlag.getNode()) {
+    Ops.push_back(InFlag);
+  }
+
+  if (isTailCall)
+    return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+  Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+                         InVals, OutVals, Callee);
+}
+
+static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
+                                   bool isSEXTLoad, SDValue &Base,
+                                   SDValue &Offset, bool &isInc,
+                                   SelectionDAG &DAG) {
+  if (Ptr->getOpcode() != ISD::ADD)
+  return false;
+
+  if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+    isInc = (Ptr->getOpcode() == ISD::ADD);
+    Base = Ptr->getOperand(0);
+    Offset = Ptr->getOperand(1);
+    // Ensure that Offset is a constant.
+    return (isa<ConstantSDNode>(Offset));
+  }
+
+  return false;
+}
+
+// TODO: Put this function along with the other isS* functions in
+// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
+// functions defined in HexagonImmediates.td.
+static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
+  ConstantSDNode *N = cast<ConstantSDNode>(S);
+
+  // immS4 predicate - True if the immediate fits in a 4-bit sign extended.
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  int64_t m = 0;
+  if (ShiftAmount > 0) {
+    m = v % ShiftAmount;
+    v = v >> ShiftAmount;
+  }
+  return (v <= 7) && (v >= -8) && (m == 0);
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                       SDValue &Base,
+                                                       SDValue &Offset,
+                                                       ISD::MemIndexedMode &AM,
+                                                       SelectionDAG &DAG) const
+{
+  EVT VT;
+  SDValue Ptr;
+  bool isSEXTLoad = false;
+
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    VT  = LD->getMemoryVT();
+    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    VT  = ST->getMemoryVT();
+    if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) {
+      return false;
+    }
+  } else {
+    return false;
+  }
+
+  bool isInc = false;
+  bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                        isInc, DAG);
+  // ShiftAmount = number of left-shifted bits in the Hexagon instruction.
+  int ShiftAmount = VT.getSizeInBits() / 16;
+  if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) {
+    AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+    return true;
+  }
+
+  return false;
+}
+
+SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDNode *Node = Op.getNode();
+  MachineFunction &MF = DAG.getMachineFunction();
+  HexagonMachineFunctionInfo *FuncInfo =
+    MF.getInfo<HexagonMachineFunctionInfo>();
+  switch (Node->getOpcode()) {
+    case ISD::INLINEASM: {
+      unsigned NumOps = Node->getNumOperands();
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the flag operand.
+
+      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+        if (FuncInfo->hasClobberLR())
+          break;
+        unsigned Flags =
+          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+        ++i;  // Skip the ID value.
+
+        switch (InlineAsm::getKind(Flags)) {
+        default: llvm_unreachable("Bad flags!");
+          case InlineAsm::Kind_RegDef:
+          case InlineAsm::Kind_RegUse:
+          case InlineAsm::Kind_Imm:
+          case InlineAsm::Kind_Clobber:
+          case InlineAsm::Kind_Mem: {
+            for (; NumVals; --NumVals, ++i) {}
+            break;
+          }
+          case InlineAsm::Kind_RegDefEarlyClobber: {
+            for (; NumVals; --NumVals, ++i) {
+              unsigned Reg =
+                cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+
+              // Check it to be lr
+              if (Reg == TM.getRegisterInfo()->getRARegister()) {
+                FuncInfo->setHasClobberLR(true);
+                break;
+              }
+            }
+            break;
+          }
+        }
+      }
+    }
+  } // Node->getOpcode
+  return Op;
+}
+
+
+//
+// Taken from the XCore backend.
+//
+SDValue HexagonTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Chain = Op.getOperand(0);
+  SDValue Table = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+  unsigned JTI = JT->getIndex();
+  MachineFunction &MF = DAG.getMachineFunction();
+  const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+  // Mark all jump table targets as address taken.
+  const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs;
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    MBB->setHasAddressTaken();
+    // This line is needed to set the hasAddressTaken flag on the BasicBlock
+    // object.
+    BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
+  }
+
+  SDValue JumpTableBase = DAG.getNode(HexagonISD::WrapperJT, dl,
+                                      getPointerTy(), TargetJT);
+  SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+                                   DAG.getConstant(2, MVT::i32));
+  SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
+                                  ShiftIndex);
+  SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
+                                   MachinePointerInfo(), false, false, false,
+                                   0);
+  return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget);
+}
+
+
+SDValue
+HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  unsigned SPReg = getStackPointerRegisterToSaveRestore();
+
+  // Get a reference to the stack pointer.
+  SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+
+  // Subtract the dynamic size from the actual stack size to
+  // obtain the new stack size.
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+  //
+  // For Hexagon, the outgoing memory arguments area should be on top of the
+  // alloca area on the stack i.e., the outgoing memory arguments should be
+  // at a lower address than the alloca area. Move the alloca area down the
+  // stack by adding back the space reserved for outgoing arguments to SP
+  // here.
+  //
+  // We do not know what the size of the outgoing args is at this point.
+  // So, we add a pseudo instruction ADJDYNALLOC that will adjust the
+  // stack pointer. We patch this instruction with the correct, known
+  // offset in emitPrologue().
+  //
+  // Use a placeholder immediate (zero) for now. This will be patched up
+  // by emitPrologue().
+  SDValue ArgAdjust = DAG.getNode(HexagonISD::ADJDYNALLOC, dl,
+                                  MVT::i32,
+                                  Sub,
+                                  DAG.getConstant(0, MVT::i32));
+
+  // The Sub result contains the new stack start address, so it
+  // must be placed in the stack pointer register.
+  SDValue CopyChain = DAG.getCopyToReg(Chain, dl,
+                                       TM.getRegisterInfo()->getStackRegister(),
+                                       Sub);
+
+  SDValue Ops[2] = { ArgAdjust, CopyChain };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue
+HexagonTargetLowering::LowerFormalArguments(SDValue Chain,
+                                            CallingConv::ID CallConv,
+                                            bool isVarArg,
+                                            const
+                                            SmallVectorImpl<ISD::InputArg> &Ins,
+                                            DebugLoc dl, SelectionDAG &DAG,
+                                            SmallVectorImpl<SDValue> &InVals)
+const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  HexagonMachineFunctionInfo *FuncInfo =
+    MF.getInfo<HexagonMachineFunctionInfo>();
+
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+		 getTargetMachine(), ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
+
+  // For LLVM, in the case when returning a struct by value (>8byte),
+  // the first argument is a pointer that points to the location on caller's
+  // stack where the return value will be stored. For Hexagon, the location on
+  // caller's stack is passed only when the struct size is smaller than (and
+  // equal to) 8 bytes. If not, no address will be passed into callee and
+  // callee return the result direclty through R0/R1.
+
+  SmallVector<SDValue, 4> MemOps;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+    unsigned ObjSize;
+    unsigned StackLocation;
+    int FI;
+
+    if (   (VA.isRegLoc() && !Flags.isByVal())
+        || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
+      // Arguments passed in registers
+      // 1. int, long long, ptr args that get allocated in register.
+      // 2. Large struct that gets an register to put its address in.
+      EVT RegVT = VA.getLocVT();
+      if (RegVT == MVT::i8 || RegVT == MVT::i16 || RegVT == MVT::i32) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(Hexagon::IntRegsRegisterClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      } else if (RegVT == MVT::i64) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(Hexagon::DoubleRegsRegisterClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      } else {
+        assert (0);
+      }
+    } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
+      assert (0 && "ByValSize must be bigger than 8 bytes");
+    } else {
+      // Sanity check.
+      assert(VA.isMemLoc());
+
+      if (Flags.isByVal()) {
+        // If it's a byval parameter, then we need to compute the
+        // "real" size, not the size of the pointer.
+        ObjSize = Flags.getByValSize();
+      } else {
+        ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
+      }
+
+      StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
+      // Create the frame index object for this incoming parameter...
+      FI = MFI->CreateFixedObject(ObjSize, StackLocation, true);
+
+      // Create the SelectionDAG nodes cordl, responding to a load
+      // from this parameter.
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+
+      if (Flags.isByVal()) {
+        // If it's a pass-by-value aggregate, then do not dereference the stack
+        // location. Instead, we should generate a reference to the stack
+        // location.
+        InVals.push_back(FIN);
+      } else {
+        InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                                     MachinePointerInfo(), false, false,
+                                     false, 0));
+      }
+    }
+  }
+
+  if (!MemOps.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0],
+                        MemOps.size());
+
+  if (isVarArg) {
+    // This will point to the next argument passed via stack.
+    int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize,
+                                            HEXAGON_LRFP_SIZE +
+                                            CCInfo.getNextStackOffset(),
+                                            true);
+    FuncInfo->setVarArgsFrameIndex(FrameIndex);
+  }
+
+  return Chain;
+}
+
+SDValue
+HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  // VASTART stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  MachineFunction &MF = DAG.getMachineFunction();
+  HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
+  SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), Op.getDebugLoc(), Addr,
+                      Op.getOperand(1), MachinePointerInfo(SV), false,
+                      false, 0);
+}
+
+SDValue
+HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDNode* OpNode = Op.getNode();
+
+  SDValue Cond = DAG.getNode(ISD::SETCC, Op.getDebugLoc(), MVT::i1,
+                             Op.getOperand(2), Op.getOperand(3),
+                             Op.getOperand(4));
+  return DAG.getNode(ISD::SELECT, Op.getDebugLoc(), OpNode->getValueType(0),
+                     Cond, Op.getOperand(0),
+                     Op.getOperand(1));
+}
+
+SDValue
+HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  if (Depth) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset = DAG.getConstant(4, MVT::i32);
+    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, false, 0);
+  }
+
+  // Return LR, which contains the return address. Mark it an implicit live-in.
+  unsigned Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
+  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue
+HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+  const HexagonRegisterInfo  *TRI = TM.getRegisterInfo();
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                                         TRI->getFrameRegister(), VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
+                            false, false, false, 0);
+  return FrameAddr;
+}
+
+
+SDValue HexagonTargetLowering::LowerMEMBARRIER(SDValue Op,
+                                               SelectionDAG& DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other,  Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
+                                                 SelectionDAG& DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
+}
+
+
+SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  SDValue Result;
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+  DebugLoc dl = Op.getDebugLoc();
+  Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+
+  HexagonTargetObjectFile &TLOF =
+    (HexagonTargetObjectFile&)getObjFileLowering();
+  if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+    return DAG.getNode(HexagonISD::CONST32_GP, dl, getPointerTy(), Result);
+  }
+
+  return DAG.getNode(HexagonISD::CONST32, dl, getPointerTy(), Result);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
+                                             &targetmachine)
+  : TargetLowering(targetmachine, new HexagonTargetObjectFile()),
+    TM(targetmachine) {
+
+    // Set up the register classes.
+    addRegisterClass(MVT::i32, Hexagon::IntRegsRegisterClass);
+    addRegisterClass(MVT::i64, Hexagon::DoubleRegsRegisterClass);
+
+    addRegisterClass(MVT::i1, Hexagon::PredRegsRegisterClass);
+
+    computeRegisterProperties();
+
+    // Align loop entry
+    setPrefLoopAlignment(4);
+
+    // Limits for inline expansion of memcpy/memmove
+    maxStoresPerMemcpy = 6;
+    maxStoresPerMemmove = 6;
+
+    //
+    // Library calls for unsupported operations
+    //
+    setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+
+    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf");
+    setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
+    setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
+    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf");
+    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf");
+    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf");
+    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf");
+
+    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi");
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi");
+    setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
+
+    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi");
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi");
+    setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
+
+    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf");
+    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi");
+    setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
+    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi");
+    setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
+
+    setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2");
+
+    setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
+    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
+    setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3");
+    setOperationAction(ISD::SREM,  MVT::i32, Expand);
+
+    setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
+    setOperationAction(ISD::SDIV,  MVT::i64, Expand);
+    setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
+    setOperationAction(ISD::SREM,  MVT::i64, Expand);
+
+    setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
+    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
+
+    setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
+    setOperationAction(ISD::UDIV,  MVT::i64, Expand);
+
+    setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
+    setOperationAction(ISD::UREM,  MVT::i32, Expand);
+
+    setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
+    setOperationAction(ISD::UREM,  MVT::i64, Expand);
+
+    setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
+    setOperationAction(ISD::FDIV,  MVT::f32, Expand);
+
+    setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
+    setOperationAction(ISD::FDIV,  MVT::f64, Expand);
+
+    setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2");
+    setOperationAction(ISD::FP_EXTEND,  MVT::f32, Expand);
+
+    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf");
+    setOperationAction(ISD::SINT_TO_FP,  MVT::i32, Expand);
+
+    setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
+    setOperationAction(ISD::FADD,  MVT::f64, Expand);
+
+    setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+    setOperationAction(ISD::FADD,  MVT::f32, Expand);
+
+    setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3");
+    setOperationAction(ISD::FADD,  MVT::f32, Expand);
+
+    setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2");
+    setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi");
+    setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi");
+    setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf");
+    setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+
+    setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2");
+    setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2");
+    setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2");
+    setCondCodeAction(ISD::SETOGT, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2");
+    setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2");
+    setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2");
+    setCondCodeAction(ISD::SETOLT, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2");
+    setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
+    setOperationAction(ISD::SREM, MVT::i32, Expand);
+
+    setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
+    setOperationAction(ISD::FMUL, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3");
+    setOperationAction(ISD::MUL, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2");
+    setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2");
+
+
+    setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
+    setOperationAction(ISD::SUB, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3");
+    setOperationAction(ISD::SUB, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2");
+    setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2");
+    setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2");
+    setCondCodeAction(ISD::SETO, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2");
+    setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+
+    setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2");
+    setCondCodeAction(ISD::SETO, MVT::f32, Expand);
+
+    setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2");
+    setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
+
+    setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+    setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+    setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
+    setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal);
+
+    setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
+    setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
+    setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
+    setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal);
+
+    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+    // Turn FP extload into load/fextend.
+    setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+    // Hexagon has a i1 sign extending load.
+    setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+    // Turn FP truncstore into trunc + store.
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+    // Custom legalize GlobalAddress nodes into CONST32.
+    setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+    setOperationAction(ISD::GlobalAddress, MVT::i8, Custom);
+    // Truncate action?
+    setOperationAction(ISD::TRUNCATE, MVT::i64, Expand);
+
+    // Hexagon doesn't have sext_inreg, replace them with shl/sra.
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+    // Hexagon has no REM or DIVREM operations.
+    setOperationAction(ISD::UREM, MVT::i32, Expand);
+    setOperationAction(ISD::SREM, MVT::i32, Expand);
+    setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+    setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+    setOperationAction(ISD::SREM, MVT::i64, Expand);
+    setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+    setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+
+    setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+    // Expand fp<->uint.
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+    // Hexagon has no select or setcc: expand to SELECT_CC.
+    setOperationAction(ISD::SELECT, MVT::f32, Expand);
+    setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+    // Lower SELECT_CC to SETCC and SELECT.
+    setOperationAction(ISD::SELECT_CC, MVT::i32,   Custom);
+    setOperationAction(ISD::SELECT_CC, MVT::i64,   Custom);
+    // This is a workaround documented in DAGCombiner.cpp:2892 We don't
+    // support SELECT_CC on every type.
+    setOperationAction(ISD::SELECT_CC, MVT::Other,   Expand);
+
+    setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+    setOperationAction(ISD::BRIND, MVT::Other, Expand);
+    if (EmitJumpTables) {
+      setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+    } else {
+      setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+    }
+
+    setOperationAction(ISD::BR_CC, MVT::i32, Expand);
+
+    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+    setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
+    setOperationAction(ISD::FSIN , MVT::f64, Expand);
+    setOperationAction(ISD::FCOS , MVT::f64, Expand);
+    setOperationAction(ISD::FREM , MVT::f64, Expand);
+    setOperationAction(ISD::FSIN , MVT::f32, Expand);
+    setOperationAction(ISD::FCOS , MVT::f32, Expand);
+    setOperationAction(ISD::FREM , MVT::f32, Expand);
+    setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+    setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+    setOperationAction(ISD::ROTL , MVT::i32, Expand);
+    setOperationAction(ISD::ROTR , MVT::i32, Expand);
+    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+    setOperationAction(ISD::FPOW , MVT::f64, Expand);
+    setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+    setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+    setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+    setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+    setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+    setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+    setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+    setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
+    setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+    setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
+
+    setOperationAction(ISD::EH_RETURN,     MVT::Other, Expand);
+
+    if (TM.getSubtargetImpl()->isSubtargetV2()) {
+      setExceptionPointerRegister(Hexagon::R20);
+      setExceptionSelectorRegister(Hexagon::R21);
+    } else {
+      setExceptionPointerRegister(Hexagon::R0);
+      setExceptionSelectorRegister(Hexagon::R1);
+    }
+
+    // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+    setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+
+    // Use the default implementation.
+    setOperationAction(ISD::VAARG             , MVT::Other, Expand);
+    setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+    setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+    setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
+    setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
+
+
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
+    setOperationAction(ISD::INLINEASM         , MVT::Other, Custom);
+
+    setMinFunctionAlignment(2);
+
+    // Needed for DYNAMIC_STACKALLOC expansion.
+    unsigned StackRegister = TM.getRegisterInfo()->getStackRegister();
+    setStackPointerRegisterToSaveRestore(StackRegister);
+    setSchedulingPreference(Sched::VLIW);
+}
+
+
+const char*
+HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+    default: return 0;
+    case HexagonISD::CONST32:    return "HexagonISD::CONST32";
+    case HexagonISD::ADJDYNALLOC: return "HexagonISD::ADJDYNALLOC";
+    case HexagonISD::CMPICC:     return "HexagonISD::CMPICC";
+    case HexagonISD::CMPFCC:     return "HexagonISD::CMPFCC";
+    case HexagonISD::BRICC:      return "HexagonISD::BRICC";
+    case HexagonISD::BRFCC:      return "HexagonISD::BRFCC";
+    case HexagonISD::SELECT_ICC: return "HexagonISD::SELECT_ICC";
+    case HexagonISD::SELECT_FCC: return "HexagonISD::SELECT_FCC";
+    case HexagonISD::Hi:         return "HexagonISD::Hi";
+    case HexagonISD::Lo:         return "HexagonISD::Lo";
+    case HexagonISD::FTOI:       return "HexagonISD::FTOI";
+    case HexagonISD::ITOF:       return "HexagonISD::ITOF";
+    case HexagonISD::CALL:       return "HexagonISD::CALL";
+    case HexagonISD::RET_FLAG:   return "HexagonISD::RET_FLAG";
+    case HexagonISD::BR_JT:      return "HexagonISD::BR_JT";
+    case HexagonISD::TC_RETURN:  return "HexagonISD::TC_RETURN";
+  }
+}
+
+bool
+HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+  EVT MTy1 = EVT::getEVT(Ty1);
+  EVT MTy2 = EVT::getEVT(Ty2);
+  if (!MTy1.isSimple() || !MTy2.isSimple()) {
+    return false;
+  }
+  return ((MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32));
+}
+
+bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+  if (!VT1.isSimple() || !VT2.isSimple()) {
+    return false;
+  }
+  return ((VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32));
+}
+
+SDValue
+HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+    default: llvm_unreachable("Should not custom lower this!");
+      // Frame & Return address.  Currently unimplemented.
+    case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+    case ISD::FRAMEADDR:  return LowerFRAMEADDR(Op, DAG);
+    case ISD::GlobalTLSAddress:
+                          llvm_unreachable("TLS not implemented for Hexagon.");
+    case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op, DAG);
+    case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, DAG);
+    case ISD::GlobalAddress:      return LowerGLOBALADDRESS(Op, DAG);
+    case ISD::VASTART:            return LowerVASTART(Op, DAG);
+    case ISD::BR_JT:              return LowerBR_JT(Op, DAG);
+
+    case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+    case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
+    case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::INLINEASM:          return LowerINLINEASM(Op, DAG);
+
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                           Hexagon Scheduler Hooks
+//===----------------------------------------------------------------------===//
+MachineBasicBlock *
+HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB)
+const {
+  switch (MI->getOpcode()) {
+    case Hexagon::ADJDYNALLOC: {
+      MachineFunction *MF = BB->getParent();
+      HexagonMachineFunctionInfo *FuncInfo =
+        MF->getInfo<HexagonMachineFunctionInfo>();
+      FuncInfo->addAllocaAdjustInst(MI);
+      return BB;
+    }
+    default: llvm_unreachable("Unexpected instr type to insert");
+  } // switch
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::pair<unsigned, const TargetRegisterClass*>
+HexagonTargetLowering::getRegForInlineAsmConstraint(const
+                                                    std::string &Constraint,
+                                                    EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':   // R0-R31
+       switch (VT.getSimpleVT().SimpleTy) {
+       default:
+         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+       case MVT::i32:
+       case MVT::i16:
+       case MVT::i8:
+         return std::make_pair(0U, Hexagon::IntRegsRegisterClass);
+       case MVT::i64:
+         return std::make_pair(0U, Hexagon::DoubleRegsRegisterClass);
+      }
+    default:
+      llvm_unreachable("Unknown asm register class");
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented by
+/// AM is legal for this target, for a load/store of the specified type.
+bool HexagonTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                                  Type *Ty) const {
+  // Allows a signed-extended 11-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1) {
+    return false;
+  }
+
+  // No global is ever allowed as a base.
+  if (AM.BaseGV) {
+    return false;
+  }
+
+  int Scale = AM.Scale;
+  if (Scale < 0) Scale = -Scale;
+  switch (Scale) {
+  case 0:  // No scale reg, "r+i", "r", or just "i".
+    break;
+  default: // No scaled addressing mode.
+    return false;
+  }
+  return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  return Imm >= -512 && Imm <= 511;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
+                                 SDValue Callee,
+                                 CallingConv::ID CalleeCC,
+                                 bool isVarArg,
+                                 bool isCalleeStructRet,
+                                 bool isCallerStructRet,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 const SmallVectorImpl<ISD::InputArg> &Ins,
+                                 SelectionDAG& DAG) const {
+  const Function *CallerF = DAG.getMachineFunction().getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
+  // ***************************************************************************
+  //  Look for obvious safe cases to perform tail call optimization that do not
+  //  require ABI changes.
+  // ***************************************************************************
+
+  // If this is a tail call via a function pointer, then don't do it!
+  if (!(dyn_cast<GlobalAddressSDNode>(Callee))
+      && !(dyn_cast<ExternalSymbolSDNode>(Callee))) {
+    return false;
+  }
+
+  // Do not optimize if the calling conventions do not match.
+  if (!CCMatch)
+    return false;
+
+  // Do not tail call optimize vararg calls.
+  if (isVarArg)
+    return false;
+
+  // Also avoid tail call optimization if either caller or callee uses struct
+  // return semantics.
+  if (isCalleeStructRet || isCallerStructRet)
+    return false;
+
+  // In addition to the cases above, we also disable Tail Call Optimization if
+  // the calling convention code that at least one outgoing argument needs to
+  // go on the stack. We cannot check that here because at this point that
+  // information is not available.
+  return true;
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
new file mode 100644
index 000000000000..4208bcb2fdca
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -0,0 +1,162 @@
+//===-- HexagonISelLowering.h - Hexagon DAG Lowering Interface --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Hexagon uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_ISELLOWERING_H
+#define Hexagon_ISELLOWERING_H
+
+#include "Hexagon.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+namespace llvm {
+  namespace HexagonISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      CONST32,
+      CONST32_GP,  // For marking data present in GP.
+      SETCC,
+      ADJDYNALLOC,
+      ARGEXTEND,
+
+      CMPICC,      // Compare two GPR operands, set icc.
+      CMPFCC,      // Compare two FP operands, set fcc.
+      BRICC,       // Branch to dest on icc condition
+      BRFCC,       // Branch to dest on fcc condition
+      SELECT_ICC,  // Select between two values using the current ICC flags.
+      SELECT_FCC,  // Select between two values using the current FCC flags.
+
+      Hi, Lo,      // Hi/Lo operations, typically on a global address.
+
+      FTOI,        // FP to Int within a FP register.
+      ITOF,        // Int to FP within a FP register.
+
+      CALL,        // A call instruction.
+      RET_FLAG,    // Return with a flag operand.
+      BR_JT,       // Jump table.
+      BARRIER,     // Memory barrier.
+      WrapperJT,
+      TC_RETURN
+    };
+  }
+
+  class HexagonTargetLowering : public TargetLowering {
+    int VarArgsFrameOffset;   // Frame offset to start of varargs area.
+
+    bool CanReturnSmallStruct(const Function* CalleeFn,
+                              unsigned& RetSize) const;
+
+  public:
+    HexagonTargetMachine &TM;
+    explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine);
+
+    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+    /// for tail call optimization. Targets which want to do tail call
+    /// optimization should implement this function.
+    bool
+    IsEligibleForTailCallOptimization(SDValue Callee,
+                                      CallingConv::ID CalleeCC,
+                                      bool isVarArg,
+                                      bool isCalleeStructRet,
+                                      bool isCallerStructRet,
+                                      const
+                                      SmallVectorImpl<ISD::OutputArg> &Outs,
+                                      const SmallVectorImpl<SDValue> &OutVals,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      SelectionDAG& DAG) const;
+
+    virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
+    virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+    SDValue  LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFormalArguments(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::InputArg> &Ins,
+                                 DebugLoc dl, SelectionDAG &DAG,
+                                 SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCall(SDValue Chain, SDValue Callee,
+                      CallingConv::ID CallConv, bool isVarArg,
+                      bool doesNotRet, bool &isTailCall,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      const SmallVectorImpl<SDValue> &OutVals,
+                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                      DebugLoc dl, SelectionDAG &DAG,
+                      SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals,
+                            const SmallVectorImpl<SDValue> &OutVals,
+                            SDValue Callee) const;
+
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
+    SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerReturn(SDValue Chain,
+                        CallingConv::ID CallConv, bool isVarArg,
+                        const SmallVectorImpl<ISD::OutputArg> &Outs,
+                        const SmallVectorImpl<SDValue> &OutVals,
+                        DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual MachineBasicBlock
+    *EmitInstrWithCustomInserter(MachineInstr *MI,
+                                 MachineBasicBlock *BB) const;
+
+    SDValue  LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    virtual EVT getSetCCResultType(EVT VT) const {
+      return MVT::i1;
+    }
+
+    virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                            SDValue &Base, SDValue &Offset,
+                                            ISD::MemIndexedMode &AM,
+                                            SelectionDAG &DAG) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+                                 EVT VT) const;
+
+    // Intrinsics
+    virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                            SelectionDAG &DAG) const;
+    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// by AM is legal for this target, for a load/store of the specified type.
+    /// The type may be VoidTy, in which case only return true if the addressing
+    /// mode is legal for a load/store of any legal type.
+    /// TODO: Handle pre/postinc as well.
+    virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+
+    /// isLegalICmpImmediate - Return true if the specified immediate is legal
+    /// icmp immediate, that is the target has icmp instructions which can
+    /// compare a register against the immediate without having to materialize
+    /// the immediate into a register.
+    virtual bool isLegalICmpImmediate(int64_t Imm) const;
+  };
+} // end namespace llvm
+
+#endif    // Hexagon_ISELLOWERING_H
diff --git a/lib/Target/Hexagon/HexagonImmediates.td b/lib/Target/Hexagon/HexagonImmediates.td
new file mode 100644
index 000000000000..e78bb790ae7d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonImmediates.td
@@ -0,0 +1,508 @@
+//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illnois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// From IA64's InstrInfo file
+def s32Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s16Imm : Operand<i32> {
+  let PrintMethod = "printImmOperand";
+}
+
+def s12Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s11Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s11_0Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s11_1Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s11_2Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s11_3Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s10Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s9Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s8Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s8Imm64 : Operand<i64> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s6Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s4Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s4_0Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s4_1Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s4_2Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def s4_3Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u64Imm : Operand<i64> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u32Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u16Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u16_0Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u16_1Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u16_2Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u11_3Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u10Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u9Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u8Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u7Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u6Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u6_0Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u6_1Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u6_2Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u6_3Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u5Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u4Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u3Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u2Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def u1Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def n8Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def m6Imm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printImmOperand";
+}
+
+def nOneImm : Operand<i32> {
+  // For now, we use a generic print function for all operands.
+  let PrintMethod = "printNOneImmOperand";
+}
+
+//
+// Immediate predicates
+//
+def s32ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<32>(v);
+}]>;
+
+def s32_24ImmPred  : PatLeaf<(i32 imm), [{
+  // s32_24ImmPred predicate - True if the immediate fits in a 32-bit sign
+  // extended field that is a multiple of 0x1000000.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<32,24>(v);
+}]>;
+
+def s32_16s8ImmPred  : PatLeaf<(i32 imm), [{
+  // s32_16s8ImmPred predicate - True if the immediate fits in a 32-bit sign
+  // extended field that is a multiple of 0x10000.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<24,16>(v);
+}]>;
+
+def s16ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<16>(v);
+}]>;
+
+
+def s13ImmPred  : PatLeaf<(i32 imm), [{
+  // immS13 predicate - True if the immediate fits in a 13-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<13>(v);
+}]>;
+
+
+def s12ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<12>(v);
+}]>;
+
+def s11_0ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<11>(v);
+}]>;
+
+
+def s11_1ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,1>(v);
+}]>;
+
+
+def s11_2ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,2>(v);
+}]>;
+
+
+def s11_3ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<11,3>(v);
+}]>;
+
+
+def s10ImmPred  : PatLeaf<(i32 imm), [{
+  // s10ImmPred predicate - True if the immediate fits in a 10-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<10>(v);
+}]>;
+
+
+def s9ImmPred  : PatLeaf<(i32 imm), [{
+  // s9ImmPred predicate - True if the immediate fits in a 9-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<9>(v);
+}]>;
+
+
+def s8ImmPred  : PatLeaf<(i32 imm), [{
+  // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<8>(v);
+}]>;
+
+
+def s8Imm64Pred  : PatLeaf<(i64 imm), [{
+  // s8ImmPred predicate - True if the immediate fits in a 8-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<8>(v);
+}]>;
+
+
+def s6ImmPred  : PatLeaf<(i32 imm), [{
+  // s6ImmPred predicate - True if the immediate fits in a 6-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<6>(v);
+}]>;
+
+
+def s4_0ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_0ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<4>(v);
+}]>;
+
+
+def s4_1ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_1ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field of 2.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,1>(v);
+}]>;
+
+
+def s4_2ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_2ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field that is a multiple of 4.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,2>(v);
+}]>;
+
+
+def s4_3ImmPred  : PatLeaf<(i32 imm), [{
+  // s4_3ImmPred predicate - True if the immediate fits in a 4-bit sign extended
+  // field that is a multiple of 8.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<4,3>(v);
+}]>;
+
+
+def u64ImmPred  : PatLeaf<(i64 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  // Adding "N ||" to supress gcc unused warning.
+  return (N || true);
+}]>;
+
+def u32ImmPred  : PatLeaf<(i32 imm), [{
+  // immS16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<32>(v);
+}]>;
+
+def u16ImmPred  : PatLeaf<(i32 imm), [{
+  // u16ImmPred predicate - True if the immediate fits in a 16-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<16>(v);
+}]>;
+
+def u16_s8ImmPred  : PatLeaf<(i32 imm), [{
+  // u16_s8ImmPred predicate - True if the immediate fits in a 16-bit sign
+  // extended s8 field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<16,8>(v);
+}]>;
+
+def u9ImmPred  : PatLeaf<(i32 imm), [{
+  // u9ImmPred predicate - True if the immediate fits in a 9-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<9>(v);
+}]>;
+
+
+def u8ImmPred  : PatLeaf<(i32 imm), [{
+  // u8ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<8>(v);
+}]>;
+
+def u7ImmPred  : PatLeaf<(i32 imm), [{
+  // u7ImmPred predicate - True if the immediate fits in a 8-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<7>(v);
+}]>;
+
+
+def u6ImmPred  : PatLeaf<(i32 imm), [{
+  // u6ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<6>(v);
+}]>;
+
+def u6_0ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_0ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field. Same as u6ImmPred.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<6>(v);
+}]>;
+
+def u6_1ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_1ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field that is 1 bit alinged - multiple of 2.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,1>(v);
+}]>;
+
+def u6_2ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_2ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field that is 2 bits alinged - multiple of 4.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,2>(v);
+}]>;
+
+def u6_3ImmPred  : PatLeaf<(i32 imm), [{
+  // u6_3ImmPred predicate - True if the immediate fits in a 6-bit unsigned
+  // field that is 3 bits alinged - multiple of 8.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<6,3>(v);
+}]>;
+
+def u5ImmPred  : PatLeaf<(i32 imm), [{
+  // u5ImmPred predicate - True if the immediate fits in a 5-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<5>(v);
+}]>;
+
+
+def u3ImmPred  : PatLeaf<(i32 imm), [{
+  // u3ImmPred predicate - True if the immediate fits in a 3-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<3>(v);
+}]>;
+
+
+def u2ImmPred  : PatLeaf<(i32 imm), [{
+  // u2ImmPred predicate - True if the immediate fits in a 2-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<2>(v);
+}]>;
+
+
+def u1ImmPred  : PatLeaf<(i1 imm), [{
+  // u1ImmPred predicate - True if the immediate fits in a 1-bit unsigned
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<1>(v);
+}]>;
+
+def m6ImmPred  : PatLeaf<(i32 imm), [{
+  // m6ImmPred predicate - True if the immediate is negative and fits in
+  // a 6-bit negative number.
+  int64_t v = (int64_t)N->getSExtValue();
+  return isInt<6>(v);
+}]>;
+
+//InN means negative integers in [-(2^N - 1), 0]
+def n8ImmPred  : PatLeaf<(i32 imm), [{
+  // n8ImmPred predicate - True if the immediate fits in a 8-bit signed
+  // field.
+  int64_t v = (int64_t)N->getSExtValue();
+  return (-255 <= v && v <= 0);
+}]>;
+
+def nOneImmPred  : PatLeaf<(i32 imm), [{
+  // nOneImmPred predicate - True if the immediate is -1.
+  int64_t v = (int64_t)N->getSExtValue();
+  return (-1 == v);
+}]>;
+
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
new file mode 100644
index 000000000000..48f0f01bb4cf
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -0,0 +1,308 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//                         Hexagon Intruction Flags +
+//
+//                    *** Must match HexagonBaseInfo.h ***
+//===----------------------------------------------------------------------===//
+
+class Type<bits<5> t> {
+  bits<5> Value = t;
+}
+def TypePSEUDO : Type<0>;
+def TypeALU32  : Type<1>;
+def TypeCR     : Type<2>;
+def TypeJR     : Type<3>;
+def TypeJ      : Type<4>;
+def TypeLD     : Type<5>;
+def TypeST     : Type<6>;
+def TypeSYSTEM : Type<7>;
+def TypeXTYPE  : Type<8>;
+def TypeMARKER : Type<31>;
+
+//===----------------------------------------------------------------------===//
+//                         Intruction Class Declaration +
+//===----------------------------------------------------------------------===//
+
+class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
+                  string cstr, InstrItinClass itin, Type type> : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "Hexagon";
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+  let Constraints = cstr;
+  let Itinerary   = itin;
+
+  // *** Must match HexagonBaseInfo.h ***
+  Type HexagonType = type;
+  let TSFlags{4-0} = HexagonType.Value;
+  bits<1> isHexagonSolo = 0;
+  let TSFlags{5} = isHexagonSolo;
+
+  // Predicated instructions.
+  bits<1> isPredicated = 0;
+  let TSFlags{6} = isPredicated;
+
+  // *** The code above must match HexagonBaseInfo.h ***
+}
+
+//===----------------------------------------------------------------------===//
+//                         Intruction Classes Definitions +
+//===----------------------------------------------------------------------===//
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", LD, TypeLD> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<13> imm13;
+  let mayLoad = 1;
+}
+
+// LD Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
+                 string cstr>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+  bits<13> imm13;
+  let mayLoad = 1;
+}
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4    can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", ST, TypeST> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<13> imm13;
+  let mayStore = 1;
+}
+
+// SYSTEM Instruction Class in V4 can take SLOT0 only
+// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
+class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", SYS, TypeSYSTEM> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<13> imm13;
+}
+
+// ST Instruction Class in V2/V3 can take SLOT0 only.
+// ST Instruction Class in V4    can take SLOT0 & SLOT1.
+// Definition of the instruction class CHANGED from V2/V3 to V4.
+class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern,
+                 string cstr>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+  bits<13> imm13;
+  let mayStore = 1;
+}
+
+// ALU32 Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class ALU32Type<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstHexagon<outs, ins, asmstr, pattern, "", ALU32, TypeALU32> {
+  bits<5>  rd;
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<16> imm16;
+  bits<16> imm16_2;
+}
+
+// ALU64 Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
+class ALU64Type<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstHexagon<outs, ins, asmstr, pattern, "", ALU64, TypeXTYPE> {
+  bits<5>  rd;
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<16> imm16;
+  bits<16> imm16_2;
+}
+
+class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+   string cstr>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE> {
+  bits<5>  rd;
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<16> imm16;
+  bits<16> imm16_2;
+}
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", M, TypeXTYPE> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+}
+
+// M Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
+class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+		string cstr>
+    : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+}
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", S, TypeXTYPE> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+}
+
+// S Instruction Class in V2/V3.
+// XTYPE Instruction Class in V4.
+// Definition of the instruction class NOT CHANGED.
+// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
+class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern,
+		string cstr>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE> {
+//  : InstHexagon<outs, ins, asmstr, pattern, cstr,  S> {
+//  : InstHexagon<outs, ins, asmstr, pattern, cstr, !if(V4T, XTYPE_V4, S)> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+}
+
+// J Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JType<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", J, TypeJ> {
+  bits<16> imm16;
+}
+
+// JR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class JRType<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", JR, TypeJR> {
+  bits<5>  rs;
+  bits<5>  pu; // Predicate register
+}
+
+// CR Instruction Class in V2/V3/V4.
+// Definition of the instruction class NOT CHANGED.
+class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", CR, TypeCR> {
+  bits<5> rs;
+  bits<10> imm10;
+}
+
+class Marker<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", MARKER, TypeMARKER> {
+  let isCodeGenOnly = 1;
+  let isPseudo = 1;
+}
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", PSEUDO, TypePSEUDO> {
+  let isCodeGenOnly = 1;
+  let isPseudo = 1;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Intruction Classes Definitions -
+//===----------------------------------------------------------------------===//
+
+
+//
+// ALU32 patterns
+//.
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU32Type<outs, ins, asmstr, pattern> {
+}
+
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU32Type<outs, ins, asmstr, pattern> {
+   let rt{0-4} = 0;
+}
+
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU32Type<outs, ins, asmstr, pattern> {
+  let rt{0-4} = 0;
+}
+
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU32Type<outs, ins, asmstr, pattern> {
+  let rt{0-4} = 0;
+}
+
+//
+// ALU64 patterns.
+//
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU64Type<outs, ins, asmstr, pattern> {
+}
+
+class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : ALU64Type<outs, ins, asmstr, pattern> {
+  let rt{0-4} = 0;
+}
+
+// J Type Instructions.
+class JInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : JType<outs, ins, asmstr, pattern> {
+}
+
+// JR type Instructions.
+class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : JRType<outs, ins, asmstr, pattern> {
+}
+
+
+// Post increment ST Instruction.
+class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+  : STInstPost<outs, ins, asmstr, pattern, cstr> {
+  let rt{0-4} = 0;
+  let mayStore = 1;
+}
+
+// Post increment LD Instruction.
+class LDInstPI<dag outs, dag ins, string asmstr, list<dag> pattern, string cstr>
+  : LDInstPost<outs, ins, asmstr, pattern, cstr> {
+  let rt{0-4} = 0;
+  let mayLoad = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
new file mode 100644
index 000000000000..49741a3d1b20
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -0,0 +1,67 @@
+//==- HexagonInstrFormats.td - Hexagon Instruction Formats --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+//                         Hexagon Intruction Flags +
+//
+//                        *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeMEMOP  : Type<9>;
+def TypeNV     : Type<10>;
+def TypePREFIX : Type<30>;
+
+//----------------------------------------------------------------------------//
+//                         Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+//
+// NV type instructions.
+//
+class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", NV_V4, TypeNV> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<13> imm13;
+}
+
+// Definition of Post increment new value store.
+class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
+                    string cstr>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+  bits<13> imm13;
+}
+
+// Post increment ST Instruction.
+class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern,
+                  string cstr>
+  : NVInstPost_V4<outs, ins, asmstr, pattern, cstr> {
+  let rt{0-4} = 0;
+}
+
+class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", MEM_V4, TypeMEMOP> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<6> imm6;
+}
+
+class Immext<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX> {
+  let isCodeGenOnly = 1;
+
+  bits<26> imm26;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
new file mode 100644
index 000000000000..8685ec192c7e
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -0,0 +1,2732 @@
+//===-- HexagonInstrInfo.cpp - Hexagon Instruction Information ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/MathExtras.h"
+#define GET_INSTRINFO_CTOR
+#include "HexagonGenInstrInfo.inc"
+#include "HexagonGenDFAPacketizer.inc"
+
+using namespace llvm;
+
+///
+/// Constants for Hexagon instructions.
+///
+const int Hexagon_MEMW_OFFSET_MAX = 4095;
+const int Hexagon_MEMW_OFFSET_MIN = 4096;
+const int Hexagon_MEMD_OFFSET_MAX = 8191;
+const int Hexagon_MEMD_OFFSET_MIN = 8192;
+const int Hexagon_MEMH_OFFSET_MAX = 2047;
+const int Hexagon_MEMH_OFFSET_MIN = 2048;
+const int Hexagon_MEMB_OFFSET_MAX = 1023;
+const int Hexagon_MEMB_OFFSET_MIN = 1024;
+const int Hexagon_ADDI_OFFSET_MAX = 32767;
+const int Hexagon_ADDI_OFFSET_MIN = 32768;
+const int Hexagon_MEMD_AUTOINC_MAX = 56;
+const int Hexagon_MEMD_AUTOINC_MIN = 64;
+const int Hexagon_MEMW_AUTOINC_MAX = 28;
+const int Hexagon_MEMW_AUTOINC_MIN = 32;
+const int Hexagon_MEMH_AUTOINC_MAX = 14;
+const int Hexagon_MEMH_AUTOINC_MIN = 16;
+const int Hexagon_MEMB_AUTOINC_MAX = 7;
+const int Hexagon_MEMB_AUTOINC_MIN = 8;
+
+
+
+HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
+  : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
+    RI(ST, *this), Subtarget(ST) {
+}
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+
+
+  switch (MI->getOpcode()) {
+  case Hexagon::LDriw:
+  case Hexagon::LDrid:
+  case Hexagon::LDrih:
+  case Hexagon::LDrib:
+  case Hexagon::LDriub:
+    if (MI->getOperand(2).isFI() &&
+        MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                            int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::STriw:
+  case Hexagon::STrid:
+  case Hexagon::STrih:
+  case Hexagon::STrib:
+    if (MI->getOperand(2).isFI() &&
+        MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+
+unsigned
+HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+                             MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond,
+                             DebugLoc DL) const{
+
+    int BOpc   = Hexagon::JMP;
+    int BccOpc = Hexagon::JMP_c;
+
+    assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+    int regPos = 0;
+    // Check if ReverseBranchCondition has asked to reverse this branch
+    // If we want to reverse the branch an odd number of times, we want
+    // JMP_cNot.
+    if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+      BccOpc = Hexagon::JMP_cNot;
+      regPos = 1;
+    }
+
+    if (FBB == 0) {
+      if (Cond.empty()) {
+        // Due to a bug in TailMerging/CFG Optimization, we need to add a
+        // special case handling of a predicated jump followed by an
+        // unconditional jump. If not, Tail Merging and CFG Optimization go
+        // into an infinite loop.
+        MachineBasicBlock *NewTBB, *NewFBB;
+        SmallVector<MachineOperand, 4> Cond;
+        MachineInstr *Term = MBB.getFirstTerminator();
+        if (isPredicated(Term) && !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond,
+                                                 false)) {
+          MachineBasicBlock *NextBB =
+            llvm::next(MachineFunction::iterator(&MBB));
+          if (NewTBB == NextBB) {
+            ReverseBranchCondition(Cond);
+            RemoveBranch(MBB);
+            return InsertBranch(MBB, TBB, 0, Cond, DL);
+          }
+        }
+        BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+      } else {
+        BuildMI(&MBB, DL,
+                get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+      }
+      return 1;
+    }
+
+    BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[regPos].getReg()).addMBB(TBB);
+    BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+
+    return 2;
+}
+
+
+bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const {
+  FBB = NULL;
+
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+
+  // A basic block may looks like this:
+  //
+  //  [   insn
+  //     EH_LABEL
+  //      insn
+  //      insn
+  //      insn
+  //     EH_LABEL
+  //      insn     ]
+  //
+  // It has two succs but does not have a terminator
+  // Don't know how to handle it.
+  do {
+    --I;
+    if (I->isEHLabel())
+      return true;
+  } while (I != MBB.begin());
+
+  I = MBB.end();
+  --I;
+
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (LastInst->getOpcode() == Hexagon::JMP) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (LastInst->getOpcode() == Hexagon::JMP_c) {
+      // Block ends with fall-through true condbranch.
+      TBB = LastInst->getOperand(1).getMBB();
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    if (LastInst->getOpcode() == Hexagon::JMP_cNot) {
+      // Block ends with fall-through false condbranch.
+      TBB = LastInst->getOperand(1).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(0));
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with Hexagon::BRCOND and Hexagon:JMP, handle it.
+  if (((SecondLastInst->getOpcode() == Hexagon::BRCOND) ||
+      (SecondLastInst->getOpcode() == Hexagon::JMP_c)) &&
+      LastInst->getOpcode() == Hexagon::JMP) {
+    TBB =  SecondLastInst->getOperand(1).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with Hexagon::JMP_cNot and Hexagon:JMP, handle it.
+  if ((SecondLastInst->getOpcode() == Hexagon::JMP_cNot) &&
+      LastInst->getOpcode() == Hexagon::JMP) {
+    TBB =  SecondLastInst->getOperand(1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(0));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two Hexagon:JMPs, handle it.  The second one is not
+  // executed, so remove it.
+  if (SecondLastInst->getOpcode() == Hexagon::JMP &&
+      LastInst->getOpcode() == Hexagon::JMP) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+
+unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  int BOpc   = Hexagon::JMP;
+  int BccOpc = Hexagon::JMP_c;
+  int BccOpcNot = Hexagon::JMP_cNot;
+
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc &&
+      I->getOpcode() != BccOpcNot)
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (I->getOpcode() != BccOpc && I->getOpcode() != BccOpcNot)
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+
+void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I, DebugLoc DL,
+                                 unsigned DestReg, unsigned SrcReg,
+                                 bool KillSrc) const {
+  if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::TFR), DestReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::TFR_64), DestReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
+    // Map Pd = Ps to Pd = or(Ps, Ps).
+    BuildMI(MBB, I, DL, get(Hexagon::OR_pp),
+            DestReg).addReg(SrcReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::DoubleRegsRegClass.contains(DestReg, SrcReg)) {
+    // We can have an overlap between single and double reg: r1:0 = r0.
+    if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
+        // r1:0 = r0
+        BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+                Hexagon::subreg_hireg))).addImm(0);
+    } else {
+        // r1:0 = r1 or no overlap.
+        BuildMI(MBB, I, DL, get(Hexagon::TFR), (RI.getSubReg(DestReg,
+                Hexagon::subreg_loreg))).addReg(SrcReg);
+        BuildMI(MBB, I, DL, get(Hexagon::TFRI), (RI.getSubReg(DestReg,
+                Hexagon::subreg_hireg))).addImm(0);
+    }
+    return;
+  }
+  if (Hexagon::CRRegsRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::TFCR), DestReg).addReg(SrcReg);
+    return;
+  }
+
+  llvm_unreachable("Unimplemented");
+}
+
+
+void HexagonInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+
+  DebugLoc DL = MBB.findDebugLoc(I);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                      MachineMemOperand::MOStore,
+                      MFI.getObjectSize(FI),
+                      Align);
+
+  if (Hexagon::IntRegsRegisterClass->hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::STriw))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else if (Hexagon::DoubleRegsRegisterClass->hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::STrid))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else if (Hexagon::PredRegsRegisterClass->hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else {
+    llvm_unreachable("Unimplemented");
+  }
+}
+
+
+void HexagonInstrInfo::storeRegToAddr(
+                                 MachineFunction &MF, unsigned SrcReg,
+                                 bool isKill,
+                                 SmallVectorImpl<MachineOperand> &Addr,
+                                 const TargetRegisterClass *RC,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const
+{
+  llvm_unreachable("Unimplemented");
+}
+
+
+void HexagonInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(I);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                      MachineMemOperand::MOLoad,
+                      MFI.getObjectSize(FI),
+                      Align);
+
+  if (RC == Hexagon::IntRegsRegisterClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::LDriw), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else if (RC == Hexagon::DoubleRegsRegisterClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::LDrid), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else if (RC == Hexagon::PredRegsRegisterClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else {
+    llvm_unreachable("Can't store this register to stack slot");
+  }
+}
+
+
+void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                                        SmallVectorImpl<MachineOperand> &Addr,
+                                        const TargetRegisterClass *RC,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  llvm_unreachable("Unimplemented");
+}
+
+
+MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                    MachineInstr* MI,
+                                          const SmallVectorImpl<unsigned> &Ops,
+                                                    int FI) const {
+  // Hexagon_TODO: Implement.
+  return(0);
+}
+
+
+unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetRegisterClass *TRC;
+  if (VT == MVT::i1) {
+    TRC =  Hexagon::PredRegsRegisterClass;
+  } else if (VT == MVT::i32) {
+    TRC =  Hexagon::IntRegsRegisterClass;
+  } else if (VT == MVT::i64) {
+    TRC =  Hexagon::DoubleRegsRegisterClass;
+  } else {
+    llvm_unreachable("Cannot handle this register class");
+  }
+
+  unsigned NewReg = RegInfo.createVirtualRegister(TRC);
+  return NewReg;
+}
+
+bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
+  switch(MI->getOpcode()) {
+    // JMP_EQri
+    case Hexagon::JMP_EQriPt_nv_V4:
+    case Hexagon::JMP_EQriPnt_nv_V4:
+    case Hexagon::JMP_EQriNotPt_nv_V4:
+    case Hexagon::JMP_EQriNotPnt_nv_V4:
+
+    // JMP_EQri - with -1
+    case Hexagon::JMP_EQriPtneg_nv_V4:
+    case Hexagon::JMP_EQriPntneg_nv_V4:
+    case Hexagon::JMP_EQriNotPtneg_nv_V4:
+    case Hexagon::JMP_EQriNotPntneg_nv_V4:
+
+    // JMP_EQrr
+    case Hexagon::JMP_EQrrPt_nv_V4:
+    case Hexagon::JMP_EQrrPnt_nv_V4:
+    case Hexagon::JMP_EQrrNotPt_nv_V4:
+    case Hexagon::JMP_EQrrNotPnt_nv_V4:
+
+    // JMP_GTri
+    case Hexagon::JMP_GTriPt_nv_V4:
+    case Hexagon::JMP_GTriPnt_nv_V4:
+    case Hexagon::JMP_GTriNotPt_nv_V4:
+    case Hexagon::JMP_GTriNotPnt_nv_V4:
+
+    // JMP_GTri - with -1
+    case Hexagon::JMP_GTriPtneg_nv_V4:
+    case Hexagon::JMP_GTriPntneg_nv_V4:
+    case Hexagon::JMP_GTriNotPtneg_nv_V4:
+    case Hexagon::JMP_GTriNotPntneg_nv_V4:
+
+    // JMP_GTrr
+    case Hexagon::JMP_GTrrPt_nv_V4:
+    case Hexagon::JMP_GTrrPnt_nv_V4:
+    case Hexagon::JMP_GTrrNotPt_nv_V4:
+    case Hexagon::JMP_GTrrNotPnt_nv_V4:
+
+    // JMP_GTrrdn
+    case Hexagon::JMP_GTrrdnPt_nv_V4:
+    case Hexagon::JMP_GTrrdnPnt_nv_V4:
+    case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+    case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+
+    // JMP_GTUri
+    case Hexagon::JMP_GTUriPt_nv_V4:
+    case Hexagon::JMP_GTUriPnt_nv_V4:
+    case Hexagon::JMP_GTUriNotPt_nv_V4:
+    case Hexagon::JMP_GTUriNotPnt_nv_V4:
+
+    // JMP_GTUrr
+    case Hexagon::JMP_GTUrrPt_nv_V4:
+    case Hexagon::JMP_GTUrrPnt_nv_V4:
+    case Hexagon::JMP_GTUrrNotPt_nv_V4:
+    case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+
+    // JMP_GTUrrdn
+    case Hexagon::JMP_GTUrrdnPt_nv_V4:
+    case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+    case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+    case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+      return true;
+
+    // TFR_FI
+    case Hexagon::TFR_FI:
+      return true;
+
+
+    default:
+      return false;
+  }
+  return  false;
+}
+
+bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
+  switch(MI->getOpcode()) {
+    // JMP_EQri
+    case Hexagon::JMP_EQriPt_ie_nv_V4:
+    case Hexagon::JMP_EQriPnt_ie_nv_V4:
+    case Hexagon::JMP_EQriNotPt_ie_nv_V4:
+    case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
+
+    // JMP_EQri - with -1
+    case Hexagon::JMP_EQriPtneg_ie_nv_V4:
+    case Hexagon::JMP_EQriPntneg_ie_nv_V4:
+    case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
+    case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
+
+    // JMP_EQrr
+    case Hexagon::JMP_EQrrPt_ie_nv_V4:
+    case Hexagon::JMP_EQrrPnt_ie_nv_V4:
+    case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
+    case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
+
+    // JMP_GTri
+    case Hexagon::JMP_GTriPt_ie_nv_V4:
+    case Hexagon::JMP_GTriPnt_ie_nv_V4:
+    case Hexagon::JMP_GTriNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
+
+    // JMP_GTri - with -1
+    case Hexagon::JMP_GTriPtneg_ie_nv_V4:
+    case Hexagon::JMP_GTriPntneg_ie_nv_V4:
+    case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
+    case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
+
+    // JMP_GTrr
+    case Hexagon::JMP_GTrrPt_ie_nv_V4:
+    case Hexagon::JMP_GTrrPnt_ie_nv_V4:
+    case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
+
+    // JMP_GTrrdn
+    case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
+    case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
+    case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
+
+    // JMP_GTUri
+    case Hexagon::JMP_GTUriPt_ie_nv_V4:
+    case Hexagon::JMP_GTUriPnt_ie_nv_V4:
+    case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
+
+    // JMP_GTUrr
+    case Hexagon::JMP_GTUrrPt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
+
+    // JMP_GTUrrdn
+    case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
+
+    // V4 absolute set addressing.
+    case Hexagon::LDrid_abs_setimm_V4:
+    case Hexagon::LDriw_abs_setimm_V4:
+    case Hexagon::LDrih_abs_setimm_V4:
+    case Hexagon::LDrib_abs_setimm_V4:
+    case Hexagon::LDriuh_abs_setimm_V4:
+    case Hexagon::LDriub_abs_setimm_V4:
+
+    case Hexagon::STrid_abs_setimm_V4:
+    case Hexagon::STrib_abs_setimm_V4:
+    case Hexagon::STrih_abs_setimm_V4:
+    case Hexagon::STriw_abs_setimm_V4:
+
+    // V4 global address load.
+    case Hexagon::LDrid_GP_cPt_V4 :
+    case Hexagon::LDrid_GP_cNotPt_V4 :
+    case Hexagon::LDrid_GP_cdnPt_V4 :
+    case Hexagon::LDrid_GP_cdnNotPt_V4 :
+    case Hexagon::LDrib_GP_cPt_V4 :
+    case Hexagon::LDrib_GP_cNotPt_V4 :
+    case Hexagon::LDrib_GP_cdnPt_V4 :
+    case Hexagon::LDrib_GP_cdnNotPt_V4 :
+    case Hexagon::LDriub_GP_cPt_V4 :
+    case Hexagon::LDriub_GP_cNotPt_V4 :
+    case Hexagon::LDriub_GP_cdnPt_V4 :
+    case Hexagon::LDriub_GP_cdnNotPt_V4 :
+    case Hexagon::LDrih_GP_cPt_V4 :
+    case Hexagon::LDrih_GP_cNotPt_V4 :
+    case Hexagon::LDrih_GP_cdnPt_V4 :
+    case Hexagon::LDrih_GP_cdnNotPt_V4 :
+    case Hexagon::LDriuh_GP_cPt_V4 :
+    case Hexagon::LDriuh_GP_cNotPt_V4 :
+    case Hexagon::LDriuh_GP_cdnPt_V4 :
+    case Hexagon::LDriuh_GP_cdnNotPt_V4 :
+    case Hexagon::LDriw_GP_cPt_V4 :
+    case Hexagon::LDriw_GP_cNotPt_V4 :
+    case Hexagon::LDriw_GP_cdnPt_V4 :
+    case Hexagon::LDriw_GP_cdnNotPt_V4 :
+    case Hexagon::LDd_GP_cPt_V4 :
+    case Hexagon::LDd_GP_cNotPt_V4 :
+    case Hexagon::LDd_GP_cdnPt_V4 :
+    case Hexagon::LDd_GP_cdnNotPt_V4 :
+    case Hexagon::LDb_GP_cPt_V4 :
+    case Hexagon::LDb_GP_cNotPt_V4 :
+    case Hexagon::LDb_GP_cdnPt_V4 :
+    case Hexagon::LDb_GP_cdnNotPt_V4 :
+    case Hexagon::LDub_GP_cPt_V4 :
+    case Hexagon::LDub_GP_cNotPt_V4 :
+    case Hexagon::LDub_GP_cdnPt_V4 :
+    case Hexagon::LDub_GP_cdnNotPt_V4 :
+    case Hexagon::LDh_GP_cPt_V4 :
+    case Hexagon::LDh_GP_cNotPt_V4 :
+    case Hexagon::LDh_GP_cdnPt_V4 :
+    case Hexagon::LDh_GP_cdnNotPt_V4 :
+    case Hexagon::LDuh_GP_cPt_V4 :
+    case Hexagon::LDuh_GP_cNotPt_V4 :
+    case Hexagon::LDuh_GP_cdnPt_V4 :
+    case Hexagon::LDuh_GP_cdnNotPt_V4 :
+    case Hexagon::LDw_GP_cPt_V4 :
+    case Hexagon::LDw_GP_cNotPt_V4 :
+    case Hexagon::LDw_GP_cdnPt_V4 :
+    case Hexagon::LDw_GP_cdnNotPt_V4 :
+
+    // V4 global address store.
+    case Hexagon::STrid_GP_cPt_V4 :
+    case Hexagon::STrid_GP_cNotPt_V4 :
+    case Hexagon::STrid_GP_cdnPt_V4 :
+    case Hexagon::STrid_GP_cdnNotPt_V4 :
+    case Hexagon::STrib_GP_cPt_V4 :
+    case Hexagon::STrib_GP_cNotPt_V4 :
+    case Hexagon::STrib_GP_cdnPt_V4 :
+    case Hexagon::STrib_GP_cdnNotPt_V4 :
+    case Hexagon::STrih_GP_cPt_V4 :
+    case Hexagon::STrih_GP_cNotPt_V4 :
+    case Hexagon::STrih_GP_cdnPt_V4 :
+    case Hexagon::STrih_GP_cdnNotPt_V4 :
+    case Hexagon::STriw_GP_cPt_V4 :
+    case Hexagon::STriw_GP_cNotPt_V4 :
+    case Hexagon::STriw_GP_cdnPt_V4 :
+    case Hexagon::STriw_GP_cdnNotPt_V4 :
+    case Hexagon::STd_GP_cPt_V4 :
+    case Hexagon::STd_GP_cNotPt_V4 :
+    case Hexagon::STd_GP_cdnPt_V4 :
+    case Hexagon::STd_GP_cdnNotPt_V4 :
+    case Hexagon::STb_GP_cPt_V4 :
+    case Hexagon::STb_GP_cNotPt_V4 :
+    case Hexagon::STb_GP_cdnPt_V4 :
+    case Hexagon::STb_GP_cdnNotPt_V4 :
+    case Hexagon::STh_GP_cPt_V4 :
+    case Hexagon::STh_GP_cNotPt_V4 :
+    case Hexagon::STh_GP_cdnPt_V4 :
+    case Hexagon::STh_GP_cdnNotPt_V4 :
+    case Hexagon::STw_GP_cPt_V4 :
+    case Hexagon::STw_GP_cNotPt_V4 :
+    case Hexagon::STw_GP_cdnPt_V4 :
+    case Hexagon::STw_GP_cdnNotPt_V4 :
+
+    // V4 predicated global address new value store.
+    case Hexagon::STrib_GP_cPt_nv_V4 :
+    case Hexagon::STrib_GP_cNotPt_nv_V4 :
+    case Hexagon::STrib_GP_cdnPt_nv_V4 :
+    case Hexagon::STrib_GP_cdnNotPt_nv_V4 :
+    case Hexagon::STrih_GP_cPt_nv_V4 :
+    case Hexagon::STrih_GP_cNotPt_nv_V4 :
+    case Hexagon::STrih_GP_cdnPt_nv_V4 :
+    case Hexagon::STrih_GP_cdnNotPt_nv_V4 :
+    case Hexagon::STriw_GP_cPt_nv_V4 :
+    case Hexagon::STriw_GP_cNotPt_nv_V4 :
+    case Hexagon::STriw_GP_cdnPt_nv_V4 :
+    case Hexagon::STriw_GP_cdnNotPt_nv_V4 :
+    case Hexagon::STb_GP_cPt_nv_V4 :
+    case Hexagon::STb_GP_cNotPt_nv_V4 :
+    case Hexagon::STb_GP_cdnPt_nv_V4 :
+    case Hexagon::STb_GP_cdnNotPt_nv_V4 :
+    case Hexagon::STh_GP_cPt_nv_V4 :
+    case Hexagon::STh_GP_cNotPt_nv_V4 :
+    case Hexagon::STh_GP_cdnPt_nv_V4 :
+    case Hexagon::STh_GP_cdnNotPt_nv_V4 :
+    case Hexagon::STw_GP_cPt_nv_V4 :
+    case Hexagon::STw_GP_cNotPt_nv_V4 :
+    case Hexagon::STw_GP_cdnPt_nv_V4 :
+    case Hexagon::STw_GP_cdnNotPt_nv_V4 :
+
+    // TFR_FI
+    case Hexagon::TFR_FI_immext_V4:
+      return true;
+
+    default:
+      return false;
+  }
+  return  false;
+}
+
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+    // JMP_EQri
+    case Hexagon::JMP_EQriPt_nv_V4:
+    case Hexagon::JMP_EQriPnt_nv_V4:
+    case Hexagon::JMP_EQriNotPt_nv_V4:
+    case Hexagon::JMP_EQriNotPnt_nv_V4:
+    case Hexagon::JMP_EQriPt_ie_nv_V4:
+    case Hexagon::JMP_EQriPnt_ie_nv_V4:
+    case Hexagon::JMP_EQriNotPt_ie_nv_V4:
+    case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
+
+    // JMP_EQri - with -1
+    case Hexagon::JMP_EQriPtneg_nv_V4:
+    case Hexagon::JMP_EQriPntneg_nv_V4:
+    case Hexagon::JMP_EQriNotPtneg_nv_V4:
+    case Hexagon::JMP_EQriNotPntneg_nv_V4:
+    case Hexagon::JMP_EQriPtneg_ie_nv_V4:
+    case Hexagon::JMP_EQriPntneg_ie_nv_V4:
+    case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
+    case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
+
+    // JMP_EQrr
+    case Hexagon::JMP_EQrrPt_nv_V4:
+    case Hexagon::JMP_EQrrPnt_nv_V4:
+    case Hexagon::JMP_EQrrNotPt_nv_V4:
+    case Hexagon::JMP_EQrrNotPnt_nv_V4:
+    case Hexagon::JMP_EQrrPt_ie_nv_V4:
+    case Hexagon::JMP_EQrrPnt_ie_nv_V4:
+    case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
+    case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
+
+    // JMP_GTri
+    case Hexagon::JMP_GTriPt_nv_V4:
+    case Hexagon::JMP_GTriPnt_nv_V4:
+    case Hexagon::JMP_GTriNotPt_nv_V4:
+    case Hexagon::JMP_GTriNotPnt_nv_V4:
+    case Hexagon::JMP_GTriPt_ie_nv_V4:
+    case Hexagon::JMP_GTriPnt_ie_nv_V4:
+    case Hexagon::JMP_GTriNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
+
+    // JMP_GTri - with -1
+    case Hexagon::JMP_GTriPtneg_nv_V4:
+    case Hexagon::JMP_GTriPntneg_nv_V4:
+    case Hexagon::JMP_GTriNotPtneg_nv_V4:
+    case Hexagon::JMP_GTriNotPntneg_nv_V4:
+    case Hexagon::JMP_GTriPtneg_ie_nv_V4:
+    case Hexagon::JMP_GTriPntneg_ie_nv_V4:
+    case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
+    case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
+
+    // JMP_GTrr
+    case Hexagon::JMP_GTrrPt_nv_V4:
+    case Hexagon::JMP_GTrrPnt_nv_V4:
+    case Hexagon::JMP_GTrrNotPt_nv_V4:
+    case Hexagon::JMP_GTrrNotPnt_nv_V4:
+    case Hexagon::JMP_GTrrPt_ie_nv_V4:
+    case Hexagon::JMP_GTrrPnt_ie_nv_V4:
+    case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
+
+    // JMP_GTrrdn
+    case Hexagon::JMP_GTrrdnPt_nv_V4:
+    case Hexagon::JMP_GTrrdnPnt_nv_V4:
+    case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+    case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+    case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
+    case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
+    case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
+
+    // JMP_GTUri
+    case Hexagon::JMP_GTUriPt_nv_V4:
+    case Hexagon::JMP_GTUriPnt_nv_V4:
+    case Hexagon::JMP_GTUriNotPt_nv_V4:
+    case Hexagon::JMP_GTUriNotPnt_nv_V4:
+    case Hexagon::JMP_GTUriPt_ie_nv_V4:
+    case Hexagon::JMP_GTUriPnt_ie_nv_V4:
+    case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
+
+    // JMP_GTUrr
+    case Hexagon::JMP_GTUrrPt_nv_V4:
+    case Hexagon::JMP_GTUrrPnt_nv_V4:
+    case Hexagon::JMP_GTUrrNotPt_nv_V4:
+    case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+    case Hexagon::JMP_GTUrrPt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
+
+    // JMP_GTUrrdn
+    case Hexagon::JMP_GTUrrdnPt_nv_V4:
+    case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+    case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+    case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+    case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
+    case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
+      return true;
+
+    default:
+      return false;
+  }
+  return false;
+}
+
+unsigned HexagonInstrInfo::getImmExtForm(const MachineInstr* MI) const {
+  switch(MI->getOpcode()) {
+  default: llvm_unreachable("Unknown type of instruction");
+
+  // JMP_EQri
+  case Hexagon::JMP_EQriPt_nv_V4:
+    return Hexagon::JMP_EQriPt_ie_nv_V4;
+  case Hexagon::JMP_EQriNotPt_nv_V4:
+    return Hexagon::JMP_EQriNotPt_ie_nv_V4;
+  case Hexagon::JMP_EQriPnt_nv_V4:
+    return Hexagon::JMP_EQriPnt_ie_nv_V4;
+  case Hexagon::JMP_EQriNotPnt_nv_V4:
+    return Hexagon::JMP_EQriNotPnt_ie_nv_V4;
+
+  // JMP_EQri -- with -1
+  case Hexagon::JMP_EQriPtneg_nv_V4:
+    return Hexagon::JMP_EQriPtneg_ie_nv_V4;
+  case Hexagon::JMP_EQriNotPtneg_nv_V4:
+    return Hexagon::JMP_EQriNotPtneg_ie_nv_V4;
+  case Hexagon::JMP_EQriPntneg_nv_V4:
+    return Hexagon::JMP_EQriPntneg_ie_nv_V4;
+  case Hexagon::JMP_EQriNotPntneg_nv_V4:
+    return Hexagon::JMP_EQriNotPntneg_ie_nv_V4;
+
+  // JMP_EQrr
+  case Hexagon::JMP_EQrrPt_nv_V4:
+    return Hexagon::JMP_EQrrPt_ie_nv_V4;
+  case Hexagon::JMP_EQrrNotPt_nv_V4:
+    return Hexagon::JMP_EQrrNotPt_ie_nv_V4;
+  case Hexagon::JMP_EQrrPnt_nv_V4:
+    return Hexagon::JMP_EQrrPnt_ie_nv_V4;
+  case Hexagon::JMP_EQrrNotPnt_nv_V4:
+    return Hexagon::JMP_EQrrNotPnt_ie_nv_V4;
+
+  // JMP_GTri
+  case Hexagon::JMP_GTriPt_nv_V4:
+    return Hexagon::JMP_GTriPt_ie_nv_V4;
+  case Hexagon::JMP_GTriNotPt_nv_V4:
+    return Hexagon::JMP_GTriNotPt_ie_nv_V4;
+  case Hexagon::JMP_GTriPnt_nv_V4:
+    return Hexagon::JMP_GTriPnt_ie_nv_V4;
+  case Hexagon::JMP_GTriNotPnt_nv_V4:
+    return Hexagon::JMP_GTriNotPnt_ie_nv_V4;
+
+  // JMP_GTri -- with -1
+  case Hexagon::JMP_GTriPtneg_nv_V4:
+    return Hexagon::JMP_GTriPtneg_ie_nv_V4;
+  case Hexagon::JMP_GTriNotPtneg_nv_V4:
+    return Hexagon::JMP_GTriNotPtneg_ie_nv_V4;
+  case Hexagon::JMP_GTriPntneg_nv_V4:
+    return Hexagon::JMP_GTriPntneg_ie_nv_V4;
+  case Hexagon::JMP_GTriNotPntneg_nv_V4:
+    return Hexagon::JMP_GTriNotPntneg_ie_nv_V4;
+
+  // JMP_GTrr
+  case Hexagon::JMP_GTrrPt_nv_V4:
+    return Hexagon::JMP_GTrrPt_ie_nv_V4;
+  case Hexagon::JMP_GTrrNotPt_nv_V4:
+    return Hexagon::JMP_GTrrNotPt_ie_nv_V4;
+  case Hexagon::JMP_GTrrPnt_nv_V4:
+    return Hexagon::JMP_GTrrPnt_ie_nv_V4;
+  case Hexagon::JMP_GTrrNotPnt_nv_V4:
+    return Hexagon::JMP_GTrrNotPnt_ie_nv_V4;
+
+  // JMP_GTrrdn
+  case Hexagon::JMP_GTrrdnPt_nv_V4:
+    return Hexagon::JMP_GTrrdnPt_ie_nv_V4;
+  case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+    return Hexagon::JMP_GTrrdnNotPt_ie_nv_V4;
+  case Hexagon::JMP_GTrrdnPnt_nv_V4:
+    return Hexagon::JMP_GTrrdnPnt_ie_nv_V4;
+  case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+    return Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4;
+
+  // JMP_GTUri
+  case Hexagon::JMP_GTUriPt_nv_V4:
+    return Hexagon::JMP_GTUriPt_ie_nv_V4;
+  case Hexagon::JMP_GTUriNotPt_nv_V4:
+    return Hexagon::JMP_GTUriNotPt_ie_nv_V4;
+  case Hexagon::JMP_GTUriPnt_nv_V4:
+    return Hexagon::JMP_GTUriPnt_ie_nv_V4;
+  case Hexagon::JMP_GTUriNotPnt_nv_V4:
+    return Hexagon::JMP_GTUriNotPnt_ie_nv_V4;
+
+  // JMP_GTUrr
+  case Hexagon::JMP_GTUrrPt_nv_V4:
+    return Hexagon::JMP_GTUrrPt_ie_nv_V4;
+  case Hexagon::JMP_GTUrrNotPt_nv_V4:
+    return Hexagon::JMP_GTUrrNotPt_ie_nv_V4;
+  case Hexagon::JMP_GTUrrPnt_nv_V4:
+    return Hexagon::JMP_GTUrrPnt_ie_nv_V4;
+  case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+    return Hexagon::JMP_GTUrrNotPnt_ie_nv_V4;
+
+  // JMP_GTUrrdn
+  case Hexagon::JMP_GTUrrdnPt_nv_V4:
+    return Hexagon::JMP_GTUrrdnPt_ie_nv_V4;
+  case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+    return Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4;
+  case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+    return Hexagon::JMP_GTUrrdnPnt_ie_nv_V4;
+  case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+    return Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4;
+
+  case Hexagon::TFR_FI:
+      return Hexagon::TFR_FI_immext_V4;
+
+  case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+  case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+  case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+  case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+  case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+  case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+  case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+  case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+  case Hexagon::MEMw_ADDi_MEM_V4 :
+  case Hexagon::MEMw_SUBi_MEM_V4 :
+  case Hexagon::MEMw_ADDr_MEM_V4 :
+  case Hexagon::MEMw_SUBr_MEM_V4 :
+  case Hexagon::MEMw_ANDr_MEM_V4 :
+  case Hexagon::MEMw_ORr_MEM_V4 :
+  case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+  case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+  case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+  case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+  case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+  case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+  case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+  case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+  case Hexagon::MEMh_ADDi_MEM_V4 :
+  case Hexagon::MEMh_SUBi_MEM_V4 :
+  case Hexagon::MEMh_ADDr_MEM_V4 :
+  case Hexagon::MEMh_SUBr_MEM_V4 :
+  case Hexagon::MEMh_ANDr_MEM_V4 :
+  case Hexagon::MEMh_ORr_MEM_V4 :
+  case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+  case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+  case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+  case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+  case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+  case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+  case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+  case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+  case Hexagon::MEMb_ADDi_MEM_V4 :
+  case Hexagon::MEMb_SUBi_MEM_V4 :
+  case Hexagon::MEMb_ADDr_MEM_V4 :
+  case Hexagon::MEMb_SUBr_MEM_V4 :
+  case Hexagon::MEMb_ANDr_MEM_V4 :
+  case Hexagon::MEMb_ORr_MEM_V4 :
+    llvm_unreachable("Needs implementing");
+  }
+}
+
+unsigned HexagonInstrInfo::getNormalBranchForm(const MachineInstr* MI) const {
+  switch(MI->getOpcode()) {
+  default: llvm_unreachable("Unknown type of jump instruction");
+
+  // JMP_EQri
+  case Hexagon::JMP_EQriPt_ie_nv_V4:
+    return Hexagon::JMP_EQriPt_nv_V4;
+  case Hexagon::JMP_EQriNotPt_ie_nv_V4:
+    return Hexagon::JMP_EQriNotPt_nv_V4;
+  case Hexagon::JMP_EQriPnt_ie_nv_V4:
+    return Hexagon::JMP_EQriPnt_nv_V4;
+  case Hexagon::JMP_EQriNotPnt_ie_nv_V4:
+    return Hexagon::JMP_EQriNotPnt_nv_V4;
+
+  // JMP_EQri -- with -1
+  case Hexagon::JMP_EQriPtneg_ie_nv_V4:
+    return Hexagon::JMP_EQriPtneg_nv_V4;
+  case Hexagon::JMP_EQriNotPtneg_ie_nv_V4:
+    return Hexagon::JMP_EQriNotPtneg_nv_V4;
+  case Hexagon::JMP_EQriPntneg_ie_nv_V4:
+    return Hexagon::JMP_EQriPntneg_nv_V4;
+  case Hexagon::JMP_EQriNotPntneg_ie_nv_V4:
+    return Hexagon::JMP_EQriNotPntneg_nv_V4;
+
+  // JMP_EQrr
+  case Hexagon::JMP_EQrrPt_ie_nv_V4:
+    return Hexagon::JMP_EQrrPt_nv_V4;
+  case Hexagon::JMP_EQrrNotPt_ie_nv_V4:
+    return Hexagon::JMP_EQrrNotPt_nv_V4;
+  case Hexagon::JMP_EQrrPnt_ie_nv_V4:
+    return Hexagon::JMP_EQrrPnt_nv_V4;
+  case Hexagon::JMP_EQrrNotPnt_ie_nv_V4:
+    return Hexagon::JMP_EQrrNotPnt_nv_V4;
+
+  // JMP_GTri
+  case Hexagon::JMP_GTriPt_ie_nv_V4:
+    return Hexagon::JMP_GTriPt_nv_V4;
+  case Hexagon::JMP_GTriNotPt_ie_nv_V4:
+    return Hexagon::JMP_GTriNotPt_nv_V4;
+  case Hexagon::JMP_GTriPnt_ie_nv_V4:
+    return Hexagon::JMP_GTriPnt_nv_V4;
+  case Hexagon::JMP_GTriNotPnt_ie_nv_V4:
+    return Hexagon::JMP_GTriNotPnt_nv_V4;
+
+  // JMP_GTri -- with -1
+  case Hexagon::JMP_GTriPtneg_ie_nv_V4:
+    return Hexagon::JMP_GTriPtneg_nv_V4;
+  case Hexagon::JMP_GTriNotPtneg_ie_nv_V4:
+    return Hexagon::JMP_GTriNotPtneg_nv_V4;
+  case Hexagon::JMP_GTriPntneg_ie_nv_V4:
+    return Hexagon::JMP_GTriPntneg_nv_V4;
+  case Hexagon::JMP_GTriNotPntneg_ie_nv_V4:
+    return Hexagon::JMP_GTriNotPntneg_nv_V4;
+
+  // JMP_GTrr
+  case Hexagon::JMP_GTrrPt_ie_nv_V4:
+    return Hexagon::JMP_GTrrPt_nv_V4;
+  case Hexagon::JMP_GTrrNotPt_ie_nv_V4:
+    return Hexagon::JMP_GTrrNotPt_nv_V4;
+  case Hexagon::JMP_GTrrPnt_ie_nv_V4:
+    return Hexagon::JMP_GTrrPnt_nv_V4;
+  case Hexagon::JMP_GTrrNotPnt_ie_nv_V4:
+    return Hexagon::JMP_GTrrNotPnt_nv_V4;
+
+  // JMP_GTrrdn
+  case Hexagon::JMP_GTrrdnPt_ie_nv_V4:
+    return Hexagon::JMP_GTrrdnPt_nv_V4;
+  case Hexagon::JMP_GTrrdnNotPt_ie_nv_V4:
+    return Hexagon::JMP_GTrrdnNotPt_nv_V4;
+  case Hexagon::JMP_GTrrdnPnt_ie_nv_V4:
+    return Hexagon::JMP_GTrrdnPnt_nv_V4;
+  case Hexagon::JMP_GTrrdnNotPnt_ie_nv_V4:
+    return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
+
+  // JMP_GTUri
+  case Hexagon::JMP_GTUriPt_ie_nv_V4:
+    return Hexagon::JMP_GTUriPt_nv_V4;
+  case Hexagon::JMP_GTUriNotPt_ie_nv_V4:
+    return Hexagon::JMP_GTUriNotPt_nv_V4;
+  case Hexagon::JMP_GTUriPnt_ie_nv_V4:
+    return Hexagon::JMP_GTUriPnt_nv_V4;
+  case Hexagon::JMP_GTUriNotPnt_ie_nv_V4:
+    return Hexagon::JMP_GTUriNotPnt_nv_V4;
+
+  // JMP_GTUrr
+  case Hexagon::JMP_GTUrrPt_ie_nv_V4:
+    return Hexagon::JMP_GTUrrPt_nv_V4;
+  case Hexagon::JMP_GTUrrNotPt_ie_nv_V4:
+    return Hexagon::JMP_GTUrrNotPt_nv_V4;
+  case Hexagon::JMP_GTUrrPnt_ie_nv_V4:
+    return Hexagon::JMP_GTUrrPnt_nv_V4;
+  case Hexagon::JMP_GTUrrNotPnt_ie_nv_V4:
+    return Hexagon::JMP_GTUrrNotPnt_nv_V4;
+
+  // JMP_GTUrrdn
+  case Hexagon::JMP_GTUrrdnPt_ie_nv_V4:
+    return Hexagon::JMP_GTUrrdnPt_nv_V4;
+  case Hexagon::JMP_GTUrrdnNotPt_ie_nv_V4:
+    return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
+  case Hexagon::JMP_GTUrrdnPnt_ie_nv_V4:
+    return Hexagon::JMP_GTUrrdnPnt_nv_V4;
+  case Hexagon::JMP_GTUrrdnNotPnt_ie_nv_V4:
+    return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
+  }
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+
+    // Store Byte
+    case Hexagon::STrib_nv_V4:
+    case Hexagon::STrib_indexed_nv_V4:
+    case Hexagon::STrib_indexed_shl_nv_V4:
+    case Hexagon::STrib_shl_nv_V4:
+    case Hexagon::STrib_GP_nv_V4:
+    case Hexagon::STb_GP_nv_V4:
+    case Hexagon::POST_STbri_nv_V4:
+    case Hexagon::STrib_cPt_nv_V4:
+    case Hexagon::STrib_cdnPt_nv_V4:
+    case Hexagon::STrib_cNotPt_nv_V4:
+    case Hexagon::STrib_cdnNotPt_nv_V4:
+    case Hexagon::STrib_indexed_cPt_nv_V4:
+    case Hexagon::STrib_indexed_cdnPt_nv_V4:
+    case Hexagon::STrib_indexed_cNotPt_nv_V4:
+    case Hexagon::STrib_indexed_cdnNotPt_nv_V4:
+    case Hexagon::STrib_indexed_shl_cPt_nv_V4:
+    case Hexagon::STrib_indexed_shl_cdnPt_nv_V4:
+    case Hexagon::STrib_indexed_shl_cNotPt_nv_V4:
+    case Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4:
+    case Hexagon::POST_STbri_cPt_nv_V4:
+    case Hexagon::POST_STbri_cdnPt_nv_V4:
+    case Hexagon::POST_STbri_cNotPt_nv_V4:
+    case Hexagon::POST_STbri_cdnNotPt_nv_V4:
+    case Hexagon::STb_GP_cPt_nv_V4:
+    case Hexagon::STb_GP_cNotPt_nv_V4:
+    case Hexagon::STb_GP_cdnPt_nv_V4:
+    case Hexagon::STb_GP_cdnNotPt_nv_V4:
+    case Hexagon::STrib_GP_cPt_nv_V4:
+    case Hexagon::STrib_GP_cNotPt_nv_V4:
+    case Hexagon::STrib_GP_cdnPt_nv_V4:
+    case Hexagon::STrib_GP_cdnNotPt_nv_V4:
+    case Hexagon::STrib_abs_nv_V4:
+    case Hexagon::STrib_abs_cPt_nv_V4:
+    case Hexagon::STrib_abs_cdnPt_nv_V4:
+    case Hexagon::STrib_abs_cNotPt_nv_V4:
+    case Hexagon::STrib_abs_cdnNotPt_nv_V4:
+    case Hexagon::STrib_imm_abs_nv_V4:
+    case Hexagon::STrib_imm_abs_cPt_nv_V4:
+    case Hexagon::STrib_imm_abs_cdnPt_nv_V4:
+    case Hexagon::STrib_imm_abs_cNotPt_nv_V4:
+    case Hexagon::STrib_imm_abs_cdnNotPt_nv_V4:
+
+    // Store Halfword
+    case Hexagon::STrih_nv_V4:
+    case Hexagon::STrih_indexed_nv_V4:
+    case Hexagon::STrih_indexed_shl_nv_V4:
+    case Hexagon::STrih_shl_nv_V4:
+    case Hexagon::STrih_GP_nv_V4:
+    case Hexagon::STh_GP_nv_V4:
+    case Hexagon::POST_SThri_nv_V4:
+    case Hexagon::STrih_cPt_nv_V4:
+    case Hexagon::STrih_cdnPt_nv_V4:
+    case Hexagon::STrih_cNotPt_nv_V4:
+    case Hexagon::STrih_cdnNotPt_nv_V4:
+    case Hexagon::STrih_indexed_cPt_nv_V4:
+    case Hexagon::STrih_indexed_cdnPt_nv_V4:
+    case Hexagon::STrih_indexed_cNotPt_nv_V4:
+    case Hexagon::STrih_indexed_cdnNotPt_nv_V4:
+    case Hexagon::STrih_indexed_shl_cPt_nv_V4:
+    case Hexagon::STrih_indexed_shl_cdnPt_nv_V4:
+    case Hexagon::STrih_indexed_shl_cNotPt_nv_V4:
+    case Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4:
+    case Hexagon::POST_SThri_cPt_nv_V4:
+    case Hexagon::POST_SThri_cdnPt_nv_V4:
+    case Hexagon::POST_SThri_cNotPt_nv_V4:
+    case Hexagon::POST_SThri_cdnNotPt_nv_V4:
+    case Hexagon::STh_GP_cPt_nv_V4:
+    case Hexagon::STh_GP_cNotPt_nv_V4:
+    case Hexagon::STh_GP_cdnPt_nv_V4:
+    case Hexagon::STh_GP_cdnNotPt_nv_V4:
+    case Hexagon::STrih_GP_cPt_nv_V4:
+    case Hexagon::STrih_GP_cNotPt_nv_V4:
+    case Hexagon::STrih_GP_cdnPt_nv_V4:
+    case Hexagon::STrih_GP_cdnNotPt_nv_V4:
+    case Hexagon::STrih_abs_nv_V4:
+    case Hexagon::STrih_abs_cPt_nv_V4:
+    case Hexagon::STrih_abs_cdnPt_nv_V4:
+    case Hexagon::STrih_abs_cNotPt_nv_V4:
+    case Hexagon::STrih_abs_cdnNotPt_nv_V4:
+    case Hexagon::STrih_imm_abs_nv_V4:
+    case Hexagon::STrih_imm_abs_cPt_nv_V4:
+    case Hexagon::STrih_imm_abs_cdnPt_nv_V4:
+    case Hexagon::STrih_imm_abs_cNotPt_nv_V4:
+    case Hexagon::STrih_imm_abs_cdnNotPt_nv_V4:
+
+    // Store Word
+    case Hexagon::STriw_nv_V4:
+    case Hexagon::STriw_indexed_nv_V4:
+    case Hexagon::STriw_indexed_shl_nv_V4:
+    case Hexagon::STriw_shl_nv_V4:
+    case Hexagon::STriw_GP_nv_V4:
+    case Hexagon::STw_GP_nv_V4:
+    case Hexagon::POST_STwri_nv_V4:
+    case Hexagon::STriw_cPt_nv_V4:
+    case Hexagon::STriw_cdnPt_nv_V4:
+    case Hexagon::STriw_cNotPt_nv_V4:
+    case Hexagon::STriw_cdnNotPt_nv_V4:
+    case Hexagon::STriw_indexed_cPt_nv_V4:
+    case Hexagon::STriw_indexed_cdnPt_nv_V4:
+    case Hexagon::STriw_indexed_cNotPt_nv_V4:
+    case Hexagon::STriw_indexed_cdnNotPt_nv_V4:
+    case Hexagon::STriw_indexed_shl_cPt_nv_V4:
+    case Hexagon::STriw_indexed_shl_cdnPt_nv_V4:
+    case Hexagon::STriw_indexed_shl_cNotPt_nv_V4:
+    case Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4:
+    case Hexagon::POST_STwri_cPt_nv_V4:
+    case Hexagon::POST_STwri_cdnPt_nv_V4:
+    case Hexagon::POST_STwri_cNotPt_nv_V4:
+    case Hexagon::POST_STwri_cdnNotPt_nv_V4:
+    case Hexagon::STw_GP_cPt_nv_V4:
+    case Hexagon::STw_GP_cNotPt_nv_V4:
+    case Hexagon::STw_GP_cdnPt_nv_V4:
+    case Hexagon::STw_GP_cdnNotPt_nv_V4:
+    case Hexagon::STriw_GP_cPt_nv_V4:
+    case Hexagon::STriw_GP_cNotPt_nv_V4:
+    case Hexagon::STriw_GP_cdnPt_nv_V4:
+    case Hexagon::STriw_GP_cdnNotPt_nv_V4:
+    case Hexagon::STriw_abs_nv_V4:
+    case Hexagon::STriw_abs_cPt_nv_V4:
+    case Hexagon::STriw_abs_cdnPt_nv_V4:
+    case Hexagon::STriw_abs_cNotPt_nv_V4:
+    case Hexagon::STriw_abs_cdnNotPt_nv_V4:
+    case Hexagon::STriw_imm_abs_nv_V4:
+    case Hexagon::STriw_imm_abs_cPt_nv_V4:
+    case Hexagon::STriw_imm_abs_cdnPt_nv_V4:
+    case Hexagon::STriw_imm_abs_cNotPt_nv_V4:
+    case Hexagon::STriw_imm_abs_cdnNotPt_nv_V4:
+      return true;
+
+    default:
+      return false;
+  }
+  return false;
+}
+
+bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
+  switch (MI->getOpcode())
+  {
+    // Load Byte
+    case Hexagon::POST_LDrib:
+    case Hexagon::POST_LDrib_cPt:
+    case Hexagon::POST_LDrib_cNotPt:
+    case Hexagon::POST_LDrib_cdnPt_V4:
+    case Hexagon::POST_LDrib_cdnNotPt_V4:
+
+    // Load unsigned byte
+    case Hexagon::POST_LDriub:
+    case Hexagon::POST_LDriub_cPt:
+    case Hexagon::POST_LDriub_cNotPt:
+    case Hexagon::POST_LDriub_cdnPt_V4:
+    case Hexagon::POST_LDriub_cdnNotPt_V4:
+
+    // Load halfword
+    case Hexagon::POST_LDrih:
+    case Hexagon::POST_LDrih_cPt:
+    case Hexagon::POST_LDrih_cNotPt:
+    case Hexagon::POST_LDrih_cdnPt_V4:
+    case Hexagon::POST_LDrih_cdnNotPt_V4:
+
+    // Load unsigned halfword
+    case Hexagon::POST_LDriuh:
+    case Hexagon::POST_LDriuh_cPt:
+    case Hexagon::POST_LDriuh_cNotPt:
+    case Hexagon::POST_LDriuh_cdnPt_V4:
+    case Hexagon::POST_LDriuh_cdnNotPt_V4:
+
+    // Load word
+    case Hexagon::POST_LDriw:
+    case Hexagon::POST_LDriw_cPt:
+    case Hexagon::POST_LDriw_cNotPt:
+    case Hexagon::POST_LDriw_cdnPt_V4:
+    case Hexagon::POST_LDriw_cdnNotPt_V4:
+
+    // Load double word
+    case Hexagon::POST_LDrid:
+    case Hexagon::POST_LDrid_cPt:
+    case Hexagon::POST_LDrid_cNotPt:
+    case Hexagon::POST_LDrid_cdnPt_V4:
+    case Hexagon::POST_LDrid_cdnNotPt_V4:
+
+    // Store byte
+    case Hexagon::POST_STbri:
+    case Hexagon::POST_STbri_cPt:
+    case Hexagon::POST_STbri_cNotPt:
+    case Hexagon::POST_STbri_cdnPt_V4:
+    case Hexagon::POST_STbri_cdnNotPt_V4:
+
+    // Store halfword
+    case Hexagon::POST_SThri:
+    case Hexagon::POST_SThri_cPt:
+    case Hexagon::POST_SThri_cNotPt:
+    case Hexagon::POST_SThri_cdnPt_V4:
+    case Hexagon::POST_SThri_cdnNotPt_V4:
+
+    // Store word
+    case Hexagon::POST_STwri:
+    case Hexagon::POST_STwri_cPt:
+    case Hexagon::POST_STwri_cNotPt:
+    case Hexagon::POST_STwri_cdnPt_V4:
+    case Hexagon::POST_STwri_cdnNotPt_V4:
+
+    // Store double word
+    case Hexagon::POST_STdri:
+    case Hexagon::POST_STdri_cPt:
+    case Hexagon::POST_STdri_cNotPt:
+    case Hexagon::POST_STdri_cdnPt_V4:
+    case Hexagon::POST_STdri_cdnNotPt_V4:
+      return true;
+
+    default:
+      return false;
+  }
+}
+
+bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
+  return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
+}
+
+bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
+  bool isPred = MI->getDesc().isPredicable();
+
+  if (!isPred)
+    return false;
+
+  const int Opc = MI->getOpcode();
+
+  switch(Opc) {
+  case Hexagon::TFRI:
+    return isInt<12>(MI->getOperand(1).getImm());
+
+  case Hexagon::STrid:
+  case Hexagon::STrid_indexed:
+    return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
+
+  case Hexagon::STriw:
+  case Hexagon::STriw_indexed:
+  case Hexagon::STriw_nv_V4:
+    return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
+
+  case Hexagon::STrih:
+  case Hexagon::STrih_indexed:
+  case Hexagon::STrih_nv_V4:
+    return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
+
+  case Hexagon::STrib:
+  case Hexagon::STrib_indexed:
+  case Hexagon::STrib_nv_V4:
+    return isUInt<6>(MI->getOperand(1).getImm());
+
+  case Hexagon::LDrid:
+  case Hexagon::LDrid_indexed:
+    return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
+
+  case Hexagon::LDriw:
+  case Hexagon::LDriw_indexed:
+    return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
+
+  case Hexagon::LDrih:
+  case Hexagon::LDriuh:
+  case Hexagon::LDrih_indexed:
+  case Hexagon::LDriuh_indexed:
+    return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
+
+  case Hexagon::LDrib:
+  case Hexagon::LDriub:
+  case Hexagon::LDrib_indexed:
+  case Hexagon::LDriub_indexed:
+    return isUInt<6>(MI->getOperand(2).getImm());
+
+  case Hexagon::POST_LDrid:
+    return isShiftedInt<4,3>(MI->getOperand(3).getImm());
+
+  case Hexagon::POST_LDriw:
+    return isShiftedInt<4,2>(MI->getOperand(3).getImm());
+
+  case Hexagon::POST_LDrih:
+  case Hexagon::POST_LDriuh:
+    return isShiftedInt<4,1>(MI->getOperand(3).getImm());
+
+  case Hexagon::POST_LDrib:
+  case Hexagon::POST_LDriub:
+    return isInt<4>(MI->getOperand(3).getImm());
+
+  case Hexagon::STrib_imm_V4:
+  case Hexagon::STrih_imm_V4:
+  case Hexagon::STriw_imm_V4:
+    return (isUInt<6>(MI->getOperand(1).getImm()) &&
+            isInt<6>(MI->getOperand(2).getImm()));
+
+  case Hexagon::ADD_ri:
+    return isInt<8>(MI->getOperand(2).getImm());
+
+  case Hexagon::ASLH:
+  case Hexagon::ASRH:
+  case Hexagon::SXTB:
+  case Hexagon::SXTH:
+  case Hexagon::ZXTB:
+  case Hexagon::ZXTH:
+    return Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+
+  case Hexagon::JMPR:
+    return false;
+  }
+
+  return true;
+}
+
+unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+  switch(Opc) {
+    case Hexagon::TFR_cPt:
+      return Hexagon::TFR_cNotPt;
+    case Hexagon::TFR_cNotPt:
+      return Hexagon::TFR_cPt;
+
+    case Hexagon::TFRI_cPt:
+      return Hexagon::TFRI_cNotPt;
+    case Hexagon::TFRI_cNotPt:
+      return Hexagon::TFRI_cPt;
+
+    case Hexagon::JMP_c:
+      return Hexagon::JMP_cNot;
+    case Hexagon::JMP_cNot:
+      return Hexagon::JMP_c;
+
+    case Hexagon::ADD_ri_cPt:
+      return Hexagon::ADD_ri_cNotPt;
+    case Hexagon::ADD_ri_cNotPt:
+      return Hexagon::ADD_ri_cPt;
+
+    case Hexagon::ADD_rr_cPt:
+      return Hexagon::ADD_rr_cNotPt;
+    case Hexagon::ADD_rr_cNotPt:
+      return Hexagon::ADD_rr_cPt;
+
+    case Hexagon::XOR_rr_cPt:
+      return Hexagon::XOR_rr_cNotPt;
+    case Hexagon::XOR_rr_cNotPt:
+      return Hexagon::XOR_rr_cPt;
+
+    case Hexagon::AND_rr_cPt:
+      return Hexagon::AND_rr_cNotPt;
+    case Hexagon::AND_rr_cNotPt:
+      return Hexagon::AND_rr_cPt;
+
+    case Hexagon::OR_rr_cPt:
+      return Hexagon::OR_rr_cNotPt;
+    case Hexagon::OR_rr_cNotPt:
+      return Hexagon::OR_rr_cPt;
+
+    case Hexagon::SUB_rr_cPt:
+      return Hexagon::SUB_rr_cNotPt;
+    case Hexagon::SUB_rr_cNotPt:
+      return Hexagon::SUB_rr_cPt;
+
+    case Hexagon::COMBINE_rr_cPt:
+      return Hexagon::COMBINE_rr_cNotPt;
+    case Hexagon::COMBINE_rr_cNotPt:
+      return Hexagon::COMBINE_rr_cPt;
+
+    case Hexagon::ASLH_cPt_V4:
+      return Hexagon::ASLH_cNotPt_V4;
+    case Hexagon::ASLH_cNotPt_V4:
+      return Hexagon::ASLH_cPt_V4;
+
+    case Hexagon::ASRH_cPt_V4:
+      return Hexagon::ASRH_cNotPt_V4;
+    case Hexagon::ASRH_cNotPt_V4:
+      return Hexagon::ASRH_cPt_V4;
+
+    case Hexagon::SXTB_cPt_V4:
+      return Hexagon::SXTB_cNotPt_V4;
+    case Hexagon::SXTB_cNotPt_V4:
+      return Hexagon::SXTB_cPt_V4;
+
+    case Hexagon::SXTH_cPt_V4:
+      return Hexagon::SXTH_cNotPt_V4;
+    case Hexagon::SXTH_cNotPt_V4:
+      return Hexagon::SXTH_cPt_V4;
+
+    case Hexagon::ZXTB_cPt_V4:
+      return Hexagon::ZXTB_cNotPt_V4;
+    case Hexagon::ZXTB_cNotPt_V4:
+      return Hexagon::ZXTB_cPt_V4;
+
+    case Hexagon::ZXTH_cPt_V4:
+      return Hexagon::ZXTH_cNotPt_V4;
+    case Hexagon::ZXTH_cNotPt_V4:
+      return Hexagon::ZXTH_cPt_V4;
+
+
+    case Hexagon::JMPR_cPt:
+      return Hexagon::JMPR_cNotPt;
+    case Hexagon::JMPR_cNotPt:
+      return Hexagon::JMPR_cPt;
+
+  // V4 indexed+scaled load.
+    case Hexagon::LDrid_indexed_cPt_V4:
+      return Hexagon::LDrid_indexed_cNotPt_V4;
+    case Hexagon::LDrid_indexed_cNotPt_V4:
+      return Hexagon::LDrid_indexed_cPt_V4;
+
+    case Hexagon::LDrid_indexed_shl_cPt_V4:
+      return Hexagon::LDrid_indexed_shl_cNotPt_V4;
+    case Hexagon::LDrid_indexed_shl_cNotPt_V4:
+      return Hexagon::LDrid_indexed_shl_cPt_V4;
+
+    case Hexagon::LDrib_indexed_cPt_V4:
+      return Hexagon::LDrib_indexed_cNotPt_V4;
+    case Hexagon::LDrib_indexed_cNotPt_V4:
+      return Hexagon::LDrib_indexed_cPt_V4;
+
+    case Hexagon::LDriub_indexed_cPt_V4:
+      return Hexagon::LDriub_indexed_cNotPt_V4;
+    case Hexagon::LDriub_indexed_cNotPt_V4:
+      return Hexagon::LDriub_indexed_cPt_V4;
+
+    case Hexagon::LDrib_indexed_shl_cPt_V4:
+      return Hexagon::LDrib_indexed_shl_cNotPt_V4;
+    case Hexagon::LDrib_indexed_shl_cNotPt_V4:
+      return Hexagon::LDrib_indexed_shl_cPt_V4;
+
+    case Hexagon::LDriub_indexed_shl_cPt_V4:
+      return Hexagon::LDriub_indexed_shl_cNotPt_V4;
+    case Hexagon::LDriub_indexed_shl_cNotPt_V4:
+      return Hexagon::LDriub_indexed_shl_cPt_V4;
+
+    case Hexagon::LDrih_indexed_cPt_V4:
+      return Hexagon::LDrih_indexed_cNotPt_V4;
+    case Hexagon::LDrih_indexed_cNotPt_V4:
+      return Hexagon::LDrih_indexed_cPt_V4;
+
+    case Hexagon::LDriuh_indexed_cPt_V4:
+      return Hexagon::LDriuh_indexed_cNotPt_V4;
+    case Hexagon::LDriuh_indexed_cNotPt_V4:
+      return Hexagon::LDriuh_indexed_cPt_V4;
+
+    case Hexagon::LDrih_indexed_shl_cPt_V4:
+      return Hexagon::LDrih_indexed_shl_cNotPt_V4;
+    case Hexagon::LDrih_indexed_shl_cNotPt_V4:
+      return Hexagon::LDrih_indexed_shl_cPt_V4;
+
+    case Hexagon::LDriuh_indexed_shl_cPt_V4:
+      return Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+    case Hexagon::LDriuh_indexed_shl_cNotPt_V4:
+      return Hexagon::LDriuh_indexed_shl_cPt_V4;
+
+    case Hexagon::LDriw_indexed_cPt_V4:
+      return Hexagon::LDriw_indexed_cNotPt_V4;
+    case Hexagon::LDriw_indexed_cNotPt_V4:
+      return Hexagon::LDriw_indexed_cPt_V4;
+
+    case Hexagon::LDriw_indexed_shl_cPt_V4:
+      return Hexagon::LDriw_indexed_shl_cNotPt_V4;
+    case Hexagon::LDriw_indexed_shl_cNotPt_V4:
+      return Hexagon::LDriw_indexed_shl_cPt_V4;
+
+    // Byte.
+    case Hexagon::POST_STbri_cPt:
+      return Hexagon::POST_STbri_cNotPt;
+    case Hexagon::POST_STbri_cNotPt:
+      return Hexagon::POST_STbri_cPt;
+
+    case Hexagon::STrib_cPt:
+      return Hexagon::STrib_cNotPt;
+    case Hexagon::STrib_cNotPt:
+      return Hexagon::STrib_cPt;
+
+    case Hexagon::STrib_indexed_cPt:
+      return Hexagon::STrib_indexed_cNotPt;
+    case Hexagon::STrib_indexed_cNotPt:
+      return Hexagon::STrib_indexed_cPt;
+
+    case Hexagon::STrib_imm_cPt_V4:
+      return Hexagon::STrib_imm_cNotPt_V4;
+    case Hexagon::STrib_imm_cNotPt_V4:
+      return Hexagon::STrib_imm_cPt_V4;
+
+    case Hexagon::STrib_indexed_shl_cPt_V4:
+      return Hexagon::STrib_indexed_shl_cNotPt_V4;
+    case Hexagon::STrib_indexed_shl_cNotPt_V4:
+      return Hexagon::STrib_indexed_shl_cPt_V4;
+
+  // Halfword.
+    case Hexagon::POST_SThri_cPt:
+      return Hexagon::POST_SThri_cNotPt;
+    case Hexagon::POST_SThri_cNotPt:
+      return Hexagon::POST_SThri_cPt;
+
+    case Hexagon::STrih_cPt:
+      return Hexagon::STrih_cNotPt;
+    case Hexagon::STrih_cNotPt:
+      return Hexagon::STrih_cPt;
+
+    case Hexagon::STrih_indexed_cPt:
+      return Hexagon::STrih_indexed_cNotPt;
+    case Hexagon::STrih_indexed_cNotPt:
+      return Hexagon::STrih_indexed_cPt;
+
+    case Hexagon::STrih_imm_cPt_V4:
+      return Hexagon::STrih_imm_cNotPt_V4;
+    case Hexagon::STrih_imm_cNotPt_V4:
+      return Hexagon::STrih_imm_cPt_V4;
+
+    case Hexagon::STrih_indexed_shl_cPt_V4:
+      return Hexagon::STrih_indexed_shl_cNotPt_V4;
+    case Hexagon::STrih_indexed_shl_cNotPt_V4:
+      return Hexagon::STrih_indexed_shl_cPt_V4;
+
+  // Word.
+    case Hexagon::POST_STwri_cPt:
+      return Hexagon::POST_STwri_cNotPt;
+    case Hexagon::POST_STwri_cNotPt:
+      return Hexagon::POST_STwri_cPt;
+
+    case Hexagon::STriw_cPt:
+      return Hexagon::STriw_cNotPt;
+    case Hexagon::STriw_cNotPt:
+      return Hexagon::STriw_cPt;
+
+    case Hexagon::STriw_indexed_cPt:
+      return Hexagon::STriw_indexed_cNotPt;
+    case Hexagon::STriw_indexed_cNotPt:
+      return Hexagon::STriw_indexed_cPt;
+
+    case Hexagon::STriw_indexed_shl_cPt_V4:
+      return Hexagon::STriw_indexed_shl_cNotPt_V4;
+    case Hexagon::STriw_indexed_shl_cNotPt_V4:
+      return Hexagon::STriw_indexed_shl_cPt_V4;
+
+    case Hexagon::STriw_imm_cPt_V4:
+      return Hexagon::STriw_imm_cNotPt_V4;
+    case Hexagon::STriw_imm_cNotPt_V4:
+      return Hexagon::STriw_imm_cPt_V4;
+
+  // Double word.
+    case Hexagon::POST_STdri_cPt:
+      return Hexagon::POST_STdri_cNotPt;
+    case Hexagon::POST_STdri_cNotPt:
+      return Hexagon::POST_STdri_cPt;
+
+    case Hexagon::STrid_cPt:
+      return Hexagon::STrid_cNotPt;
+    case Hexagon::STrid_cNotPt:
+      return Hexagon::STrid_cPt;
+
+    case Hexagon::STrid_indexed_cPt:
+      return Hexagon::STrid_indexed_cNotPt;
+    case Hexagon::STrid_indexed_cNotPt:
+      return Hexagon::STrid_indexed_cPt;
+
+    case Hexagon::STrid_indexed_shl_cPt_V4:
+      return Hexagon::STrid_indexed_shl_cNotPt_V4;
+    case Hexagon::STrid_indexed_shl_cNotPt_V4:
+      return Hexagon::STrid_indexed_shl_cPt_V4;
+
+  // Load.
+    case Hexagon::LDrid_cPt:
+      return Hexagon::LDrid_cNotPt;
+    case Hexagon::LDrid_cNotPt:
+      return Hexagon::LDrid_cPt;
+
+    case Hexagon::LDriw_cPt:
+      return Hexagon::LDriw_cNotPt;
+    case Hexagon::LDriw_cNotPt:
+      return Hexagon::LDriw_cPt;
+
+    case Hexagon::LDrih_cPt:
+      return Hexagon::LDrih_cNotPt;
+    case Hexagon::LDrih_cNotPt:
+      return Hexagon::LDrih_cPt;
+
+    case Hexagon::LDriuh_cPt:
+      return Hexagon::LDriuh_cNotPt;
+    case Hexagon::LDriuh_cNotPt:
+      return Hexagon::LDriuh_cPt;
+
+    case Hexagon::LDrib_cPt:
+      return Hexagon::LDrib_cNotPt;
+    case Hexagon::LDrib_cNotPt:
+      return Hexagon::LDrib_cPt;
+
+    case Hexagon::LDriub_cPt:
+      return Hexagon::LDriub_cNotPt;
+    case Hexagon::LDriub_cNotPt:
+      return Hexagon::LDriub_cPt;
+
+ // Load Indexed.
+    case Hexagon::LDrid_indexed_cPt:
+      return Hexagon::LDrid_indexed_cNotPt;
+    case Hexagon::LDrid_indexed_cNotPt:
+      return Hexagon::LDrid_indexed_cPt;
+
+    case Hexagon::LDriw_indexed_cPt:
+      return Hexagon::LDriw_indexed_cNotPt;
+    case Hexagon::LDriw_indexed_cNotPt:
+      return Hexagon::LDriw_indexed_cPt;
+
+    case Hexagon::LDrih_indexed_cPt:
+      return Hexagon::LDrih_indexed_cNotPt;
+    case Hexagon::LDrih_indexed_cNotPt:
+      return Hexagon::LDrih_indexed_cPt;
+
+    case Hexagon::LDriuh_indexed_cPt:
+      return Hexagon::LDriuh_indexed_cNotPt;
+    case Hexagon::LDriuh_indexed_cNotPt:
+      return Hexagon::LDriuh_indexed_cPt;
+
+    case Hexagon::LDrib_indexed_cPt:
+      return Hexagon::LDrib_indexed_cNotPt;
+    case Hexagon::LDrib_indexed_cNotPt:
+      return Hexagon::LDrib_indexed_cPt;
+
+    case Hexagon::LDriub_indexed_cPt:
+      return Hexagon::LDriub_indexed_cNotPt;
+    case Hexagon::LDriub_indexed_cNotPt:
+      return Hexagon::LDriub_indexed_cPt;
+
+  // Post Inc Load.
+    case Hexagon::POST_LDrid_cPt:
+      return Hexagon::POST_LDrid_cNotPt;
+    case Hexagon::POST_LDriw_cNotPt:
+      return Hexagon::POST_LDriw_cPt;
+
+    case Hexagon::POST_LDrih_cPt:
+      return Hexagon::POST_LDrih_cNotPt;
+    case Hexagon::POST_LDrih_cNotPt:
+      return Hexagon::POST_LDrih_cPt;
+
+    case Hexagon::POST_LDriuh_cPt:
+      return Hexagon::POST_LDriuh_cNotPt;
+    case Hexagon::POST_LDriuh_cNotPt:
+      return Hexagon::POST_LDriuh_cPt;
+
+    case Hexagon::POST_LDrib_cPt:
+      return Hexagon::POST_LDrib_cNotPt;
+    case Hexagon::POST_LDrib_cNotPt:
+      return Hexagon::POST_LDrib_cPt;
+
+    case Hexagon::POST_LDriub_cPt:
+      return Hexagon::POST_LDriub_cNotPt;
+    case Hexagon::POST_LDriub_cNotPt:
+      return Hexagon::POST_LDriub_cPt;
+
+  // Dealloc_return.
+    case Hexagon::DEALLOC_RET_cPt_V4:
+      return Hexagon::DEALLOC_RET_cNotPt_V4;
+    case Hexagon::DEALLOC_RET_cNotPt_V4:
+      return Hexagon::DEALLOC_RET_cPt_V4;
+
+   // New Value Jump.
+   // JMPEQ_ri - with -1.
+    case Hexagon::JMP_EQriPtneg_nv_V4:
+      return Hexagon::JMP_EQriNotPtneg_nv_V4;
+    case Hexagon::JMP_EQriNotPtneg_nv_V4:
+      return Hexagon::JMP_EQriPtneg_nv_V4;
+
+    case Hexagon::JMP_EQriPntneg_nv_V4:
+      return Hexagon::JMP_EQriNotPntneg_nv_V4;
+    case Hexagon::JMP_EQriNotPntneg_nv_V4:
+      return Hexagon::JMP_EQriPntneg_nv_V4;
+
+   // JMPEQ_ri.
+     case Hexagon::JMP_EQriPt_nv_V4:
+      return Hexagon::JMP_EQriNotPt_nv_V4;
+    case Hexagon::JMP_EQriNotPt_nv_V4:
+      return Hexagon::JMP_EQriPt_nv_V4;
+
+     case Hexagon::JMP_EQriPnt_nv_V4:
+      return Hexagon::JMP_EQriNotPnt_nv_V4;
+    case Hexagon::JMP_EQriNotPnt_nv_V4:
+      return Hexagon::JMP_EQriPnt_nv_V4;
+
+   // JMPEQ_rr.
+     case Hexagon::JMP_EQrrPt_nv_V4:
+      return Hexagon::JMP_EQrrNotPt_nv_V4;
+    case Hexagon::JMP_EQrrNotPt_nv_V4:
+      return Hexagon::JMP_EQrrPt_nv_V4;
+
+     case Hexagon::JMP_EQrrPnt_nv_V4:
+      return Hexagon::JMP_EQrrNotPnt_nv_V4;
+    case Hexagon::JMP_EQrrNotPnt_nv_V4:
+      return Hexagon::JMP_EQrrPnt_nv_V4;
+
+   // JMPGT_ri - with -1.
+    case Hexagon::JMP_GTriPtneg_nv_V4:
+      return Hexagon::JMP_GTriNotPtneg_nv_V4;
+    case Hexagon::JMP_GTriNotPtneg_nv_V4:
+      return Hexagon::JMP_GTriPtneg_nv_V4;
+
+    case Hexagon::JMP_GTriPntneg_nv_V4:
+      return Hexagon::JMP_GTriNotPntneg_nv_V4;
+    case Hexagon::JMP_GTriNotPntneg_nv_V4:
+      return Hexagon::JMP_GTriPntneg_nv_V4;
+
+   // JMPGT_ri.
+     case Hexagon::JMP_GTriPt_nv_V4:
+      return Hexagon::JMP_GTriNotPt_nv_V4;
+    case Hexagon::JMP_GTriNotPt_nv_V4:
+      return Hexagon::JMP_GTriPt_nv_V4;
+
+     case Hexagon::JMP_GTriPnt_nv_V4:
+      return Hexagon::JMP_GTriNotPnt_nv_V4;
+    case Hexagon::JMP_GTriNotPnt_nv_V4:
+      return Hexagon::JMP_GTriPnt_nv_V4;
+
+   // JMPGT_rr.
+     case Hexagon::JMP_GTrrPt_nv_V4:
+      return Hexagon::JMP_GTrrNotPt_nv_V4;
+    case Hexagon::JMP_GTrrNotPt_nv_V4:
+      return Hexagon::JMP_GTrrPt_nv_V4;
+
+     case Hexagon::JMP_GTrrPnt_nv_V4:
+      return Hexagon::JMP_GTrrNotPnt_nv_V4;
+    case Hexagon::JMP_GTrrNotPnt_nv_V4:
+      return Hexagon::JMP_GTrrPnt_nv_V4;
+
+   // JMPGT_rrdn.
+     case Hexagon::JMP_GTrrdnPt_nv_V4:
+      return Hexagon::JMP_GTrrdnNotPt_nv_V4;
+    case Hexagon::JMP_GTrrdnNotPt_nv_V4:
+      return Hexagon::JMP_GTrrdnPt_nv_V4;
+
+     case Hexagon::JMP_GTrrdnPnt_nv_V4:
+      return Hexagon::JMP_GTrrdnNotPnt_nv_V4;
+    case Hexagon::JMP_GTrrdnNotPnt_nv_V4:
+      return Hexagon::JMP_GTrrdnPnt_nv_V4;
+
+   // JMPGTU_ri.
+     case Hexagon::JMP_GTUriPt_nv_V4:
+      return Hexagon::JMP_GTUriNotPt_nv_V4;
+    case Hexagon::JMP_GTUriNotPt_nv_V4:
+      return Hexagon::JMP_GTUriPt_nv_V4;
+
+     case Hexagon::JMP_GTUriPnt_nv_V4:
+      return Hexagon::JMP_GTUriNotPnt_nv_V4;
+    case Hexagon::JMP_GTUriNotPnt_nv_V4:
+      return Hexagon::JMP_GTUriPnt_nv_V4;
+
+   // JMPGTU_rr.
+     case Hexagon::JMP_GTUrrPt_nv_V4:
+      return Hexagon::JMP_GTUrrNotPt_nv_V4;
+    case Hexagon::JMP_GTUrrNotPt_nv_V4:
+      return Hexagon::JMP_GTUrrPt_nv_V4;
+
+     case Hexagon::JMP_GTUrrPnt_nv_V4:
+      return Hexagon::JMP_GTUrrNotPnt_nv_V4;
+    case Hexagon::JMP_GTUrrNotPnt_nv_V4:
+      return Hexagon::JMP_GTUrrPnt_nv_V4;
+
+   // JMPGTU_rrdn.
+     case Hexagon::JMP_GTUrrdnPt_nv_V4:
+      return Hexagon::JMP_GTUrrdnNotPt_nv_V4;
+    case Hexagon::JMP_GTUrrdnNotPt_nv_V4:
+      return Hexagon::JMP_GTUrrdnPt_nv_V4;
+
+     case Hexagon::JMP_GTUrrdnPnt_nv_V4:
+      return Hexagon::JMP_GTUrrdnNotPnt_nv_V4;
+    case Hexagon::JMP_GTUrrdnNotPnt_nv_V4:
+      return Hexagon::JMP_GTUrrdnPnt_nv_V4;
+
+  default:
+    llvm_unreachable("Unexpected predicated instruction");
+  }
+}
+
+
+int HexagonInstrInfo::
+getMatchingCondBranchOpcode(int Opc, bool invertPredicate) const {
+  switch(Opc) {
+  case Hexagon::TFR:
+    return !invertPredicate ? Hexagon::TFR_cPt :
+                              Hexagon::TFR_cNotPt;
+  case Hexagon::TFRI:
+    return !invertPredicate ? Hexagon::TFRI_cPt :
+                              Hexagon::TFRI_cNotPt;
+  case Hexagon::JMP:
+    return !invertPredicate ? Hexagon::JMP_c :
+                              Hexagon::JMP_cNot;
+  case Hexagon::ADD_ri:
+    return !invertPredicate ? Hexagon::ADD_ri_cPt :
+                              Hexagon::ADD_ri_cNotPt;
+  case Hexagon::ADD_rr:
+    return !invertPredicate ? Hexagon::ADD_rr_cPt :
+                              Hexagon::ADD_rr_cNotPt;
+  case Hexagon::XOR_rr:
+    return !invertPredicate ? Hexagon::XOR_rr_cPt :
+                              Hexagon::XOR_rr_cNotPt;
+  case Hexagon::AND_rr:
+    return !invertPredicate ? Hexagon::AND_rr_cPt :
+                              Hexagon::AND_rr_cNotPt;
+  case Hexagon::OR_rr:
+    return !invertPredicate ? Hexagon::OR_rr_cPt :
+                              Hexagon::OR_rr_cNotPt;
+  case Hexagon::SUB_rr:
+    return !invertPredicate ? Hexagon::SUB_rr_cPt :
+                              Hexagon::SUB_rr_cNotPt;
+  case Hexagon::COMBINE_rr:
+    return !invertPredicate ? Hexagon::COMBINE_rr_cPt :
+                              Hexagon::COMBINE_rr_cNotPt;
+  case Hexagon::ASLH:
+    return !invertPredicate ? Hexagon::ASLH_cPt_V4 :
+                              Hexagon::ASLH_cNotPt_V4;
+  case Hexagon::ASRH:
+    return !invertPredicate ? Hexagon::ASRH_cPt_V4 :
+                              Hexagon::ASRH_cNotPt_V4;
+  case Hexagon::SXTB:
+    return !invertPredicate ? Hexagon::SXTB_cPt_V4 :
+                              Hexagon::SXTB_cNotPt_V4;
+  case Hexagon::SXTH:
+    return !invertPredicate ? Hexagon::SXTH_cPt_V4 :
+                              Hexagon::SXTH_cNotPt_V4;
+  case Hexagon::ZXTB:
+    return !invertPredicate ? Hexagon::ZXTB_cPt_V4 :
+                              Hexagon::ZXTB_cNotPt_V4;
+  case Hexagon::ZXTH:
+    return !invertPredicate ? Hexagon::ZXTH_cPt_V4 :
+                              Hexagon::ZXTH_cNotPt_V4;
+
+  case Hexagon::JMPR:
+    return !invertPredicate ? Hexagon::JMPR_cPt :
+                              Hexagon::JMPR_cNotPt;
+
+  // V4 indexed+scaled load.
+  case Hexagon::LDrid_indexed_V4:
+    return !invertPredicate ? Hexagon::LDrid_indexed_cPt_V4 :
+                              Hexagon::LDrid_indexed_cNotPt_V4;
+  case Hexagon::LDrid_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDrid_indexed_shl_cPt_V4 :
+                              Hexagon::LDrid_indexed_shl_cNotPt_V4;
+  case Hexagon::LDrib_indexed_V4:
+    return !invertPredicate ? Hexagon::LDrib_indexed_cPt_V4 :
+                              Hexagon::LDrib_indexed_cNotPt_V4;
+  case Hexagon::LDriub_indexed_V4:
+    return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
+                              Hexagon::LDriub_indexed_cNotPt_V4;
+  case Hexagon::LDriub_ae_indexed_V4:
+    return !invertPredicate ? Hexagon::LDriub_indexed_cPt_V4 :
+                              Hexagon::LDriub_indexed_cNotPt_V4;
+  case Hexagon::LDrib_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDrib_indexed_shl_cPt_V4 :
+                              Hexagon::LDrib_indexed_shl_cNotPt_V4;
+  case Hexagon::LDriub_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+                              Hexagon::LDriub_indexed_shl_cNotPt_V4;
+  case Hexagon::LDriub_ae_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDriub_indexed_shl_cPt_V4 :
+                              Hexagon::LDriub_indexed_shl_cNotPt_V4;
+  case Hexagon::LDrih_indexed_V4:
+    return !invertPredicate ? Hexagon::LDrih_indexed_cPt_V4 :
+                              Hexagon::LDrih_indexed_cNotPt_V4;
+  case Hexagon::LDriuh_indexed_V4:
+    return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
+                              Hexagon::LDriuh_indexed_cNotPt_V4;
+  case Hexagon::LDriuh_ae_indexed_V4:
+    return !invertPredicate ? Hexagon::LDriuh_indexed_cPt_V4 :
+                              Hexagon::LDriuh_indexed_cNotPt_V4;
+  case Hexagon::LDrih_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDrih_indexed_shl_cPt_V4 :
+                              Hexagon::LDrih_indexed_shl_cNotPt_V4;
+  case Hexagon::LDriuh_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+                              Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+  case Hexagon::LDriuh_ae_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDriuh_indexed_shl_cPt_V4 :
+                              Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+  case Hexagon::LDriw_indexed_V4:
+    return !invertPredicate ? Hexagon::LDriw_indexed_cPt_V4 :
+                              Hexagon::LDriw_indexed_cNotPt_V4;
+  case Hexagon::LDriw_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::LDriw_indexed_shl_cPt_V4 :
+                              Hexagon::LDriw_indexed_shl_cNotPt_V4;
+    // Byte.
+  case Hexagon::POST_STbri:
+    return !invertPredicate ? Hexagon::POST_STbri_cPt :
+                              Hexagon::POST_STbri_cNotPt;
+  case Hexagon::STrib:
+    return !invertPredicate ? Hexagon::STrib_cPt :
+                              Hexagon::STrib_cNotPt;
+  case Hexagon::STrib_indexed:
+    return !invertPredicate ? Hexagon::STrib_indexed_cPt :
+                              Hexagon::STrib_indexed_cNotPt;
+  case Hexagon::STrib_imm_V4:
+    return !invertPredicate ? Hexagon::STrib_imm_cPt_V4 :
+                              Hexagon::STrib_imm_cNotPt_V4;
+  case Hexagon::STrib_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::STrib_indexed_shl_cPt_V4 :
+                              Hexagon::STrib_indexed_shl_cNotPt_V4;
+  // Halfword.
+  case Hexagon::POST_SThri:
+    return !invertPredicate ? Hexagon::POST_SThri_cPt :
+                              Hexagon::POST_SThri_cNotPt;
+  case Hexagon::STrih:
+    return !invertPredicate ? Hexagon::STrih_cPt :
+                              Hexagon::STrih_cNotPt;
+  case Hexagon::STrih_indexed:
+    return !invertPredicate ? Hexagon::STrih_indexed_cPt :
+                              Hexagon::STrih_indexed_cNotPt;
+  case Hexagon::STrih_imm_V4:
+    return !invertPredicate ? Hexagon::STrih_imm_cPt_V4 :
+                              Hexagon::STrih_imm_cNotPt_V4;
+  case Hexagon::STrih_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::STrih_indexed_shl_cPt_V4 :
+                              Hexagon::STrih_indexed_shl_cNotPt_V4;
+  // Word.
+  case Hexagon::POST_STwri:
+    return !invertPredicate ? Hexagon::POST_STwri_cPt :
+                              Hexagon::POST_STwri_cNotPt;
+  case Hexagon::STriw:
+    return !invertPredicate ? Hexagon::STriw_cPt :
+                              Hexagon::STriw_cNotPt;
+  case Hexagon::STriw_indexed:
+    return !invertPredicate ? Hexagon::STriw_indexed_cPt :
+                              Hexagon::STriw_indexed_cNotPt;
+  case Hexagon::STriw_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::STriw_indexed_shl_cPt_V4 :
+                              Hexagon::STriw_indexed_shl_cNotPt_V4;
+  case Hexagon::STriw_imm_V4:
+    return !invertPredicate ? Hexagon::STriw_imm_cPt_V4 :
+                              Hexagon::STriw_imm_cNotPt_V4;
+  // Double word.
+  case Hexagon::POST_STdri:
+    return !invertPredicate ? Hexagon::POST_STdri_cPt :
+                              Hexagon::POST_STdri_cNotPt;
+  case Hexagon::STrid:
+    return !invertPredicate ? Hexagon::STrid_cPt :
+                              Hexagon::STrid_cNotPt;
+  case Hexagon::STrid_indexed:
+    return !invertPredicate ? Hexagon::STrid_indexed_cPt :
+                              Hexagon::STrid_indexed_cNotPt;
+  case Hexagon::STrid_indexed_shl_V4:
+    return !invertPredicate ? Hexagon::STrid_indexed_shl_cPt_V4 :
+                              Hexagon::STrid_indexed_shl_cNotPt_V4;
+  // Load.
+  case Hexagon::LDrid:
+    return !invertPredicate ? Hexagon::LDrid_cPt :
+                              Hexagon::LDrid_cNotPt;
+  case Hexagon::LDriw:
+    return !invertPredicate ? Hexagon::LDriw_cPt :
+                              Hexagon::LDriw_cNotPt;
+  case Hexagon::LDrih:
+    return !invertPredicate ? Hexagon::LDrih_cPt :
+                              Hexagon::LDrih_cNotPt;
+  case Hexagon::LDriuh:
+    return !invertPredicate ? Hexagon::LDriuh_cPt :
+                              Hexagon::LDriuh_cNotPt;
+  case Hexagon::LDrib:
+    return !invertPredicate ? Hexagon::LDrib_cPt :
+                              Hexagon::LDrib_cNotPt;
+  case Hexagon::LDriub:
+    return !invertPredicate ? Hexagon::LDriub_cPt :
+                              Hexagon::LDriub_cNotPt;
+  case Hexagon::LDriubit:
+    return !invertPredicate ? Hexagon::LDriub_cPt :
+                              Hexagon::LDriub_cNotPt;
+ // Load Indexed.
+  case Hexagon::LDrid_indexed:
+    return !invertPredicate ? Hexagon::LDrid_indexed_cPt :
+                              Hexagon::LDrid_indexed_cNotPt;
+  case Hexagon::LDriw_indexed:
+    return !invertPredicate ? Hexagon::LDriw_indexed_cPt :
+                              Hexagon::LDriw_indexed_cNotPt;
+  case Hexagon::LDrih_indexed:
+    return !invertPredicate ? Hexagon::LDrih_indexed_cPt :
+                              Hexagon::LDrih_indexed_cNotPt;
+  case Hexagon::LDriuh_indexed:
+    return !invertPredicate ? Hexagon::LDriuh_indexed_cPt :
+                              Hexagon::LDriuh_indexed_cNotPt;
+  case Hexagon::LDrib_indexed:
+    return !invertPredicate ? Hexagon::LDrib_indexed_cPt :
+                              Hexagon::LDrib_indexed_cNotPt;
+  case Hexagon::LDriub_indexed:
+    return !invertPredicate ? Hexagon::LDriub_indexed_cPt :
+                              Hexagon::LDriub_indexed_cNotPt;
+  // Post Increment Load.
+  case Hexagon::POST_LDrid:
+    return !invertPredicate ? Hexagon::POST_LDrid_cPt :
+                              Hexagon::POST_LDrid_cNotPt;
+  case Hexagon::POST_LDriw:
+    return !invertPredicate ? Hexagon::POST_LDriw_cPt :
+                              Hexagon::POST_LDriw_cNotPt;
+  case Hexagon::POST_LDrih:
+    return !invertPredicate ? Hexagon::POST_LDrih_cPt :
+                              Hexagon::POST_LDrih_cNotPt;
+  case Hexagon::POST_LDriuh:
+    return !invertPredicate ? Hexagon::POST_LDriuh_cPt :
+                              Hexagon::POST_LDriuh_cNotPt;
+  case Hexagon::POST_LDrib:
+    return !invertPredicate ? Hexagon::POST_LDrib_cPt :
+                              Hexagon::POST_LDrib_cNotPt;
+  case Hexagon::POST_LDriub:
+    return !invertPredicate ? Hexagon::POST_LDriub_cPt :
+                              Hexagon::POST_LDriub_cNotPt;
+  // DEALLOC_RETURN.
+  case Hexagon::DEALLOC_RET_V4:
+    return !invertPredicate ? Hexagon::DEALLOC_RET_cPt_V4 :
+                              Hexagon::DEALLOC_RET_cNotPt_V4;
+  }
+  llvm_unreachable("Unexpected predicable instruction");
+}
+
+
+bool HexagonInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Cond) const {
+  int Opc = MI->getOpcode();
+  assert (isPredicable(MI) && "Expected predicable instruction");
+  bool invertJump = (!Cond.empty() && Cond[0].isImm() &&
+                     (Cond[0].getImm() == 0));
+  MI->setDesc(get(getMatchingCondBranchOpcode(Opc, invertJump)));
+  //
+  // This assumes that the predicate is always the first operand
+  // in the set of inputs.
+  //
+  MI->addOperand(MI->getOperand(MI->getNumOperands()-1));
+  int oper;
+  for (oper = MI->getNumOperands() - 3; oper >= 0; --oper) {
+    MachineOperand MO = MI->getOperand(oper);
+    if ((MO.isReg() && !MO.isUse() && !MO.isImplicit())) {
+      break;
+    }
+
+    if (MO.isReg()) {
+      MI->getOperand(oper+1).ChangeToRegister(MO.getReg(), MO.isDef(),
+                                              MO.isImplicit(), MO.isKill(),
+                                              MO.isDead(), MO.isUndef(),
+                                              MO.isDebug());
+    } else if (MO.isImm()) {
+      MI->getOperand(oper+1).ChangeToImmediate(MO.getImm());
+    } else {
+      llvm_unreachable("Unexpected operand type");
+    }
+  }
+
+  int regPos = invertJump ? 1 : 0;
+  MachineOperand PredMO = Cond[regPos];
+  MI->getOperand(oper+1).ChangeToRegister(PredMO.getReg(), PredMO.isDef(),
+                                          PredMO.isImplicit(), PredMO.isKill(),
+                                          PredMO.isDead(), PredMO.isUndef(),
+                                          PredMO.isDebug());
+
+  return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+                    unsigned NumCyles,
+                    unsigned ExtraPredCycles,
+                    const BranchProbability &Probability) const {
+  return true;
+}
+
+
+bool
+HexagonInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                    unsigned NumTCycles,
+                    unsigned ExtraTCycles,
+                    MachineBasicBlock &FMBB,
+                    unsigned NumFCycles,
+                    unsigned ExtraFCycles,
+                    const BranchProbability &Probability) const {
+  return true;
+}
+
+
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+
+  return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+}
+
+
+bool
+HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                   std::vector<MachineOperand> &Pred) const {
+  for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+    MachineOperand MO = MI->getOperand(oper);
+    if (MO.isReg() && MO.isDef()) {
+      const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg());
+      if (RC == Hexagon::PredRegsRegisterClass) {
+        Pred.push_back(MO);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
+bool
+HexagonInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+  // TODO: Fix this
+  return false;
+}
+
+
+//
+// We indicate that we want to reverse the branch by
+// inserting a 0 at the beginning of the Cond vector.
+//
+bool HexagonInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  if (!Cond.empty() && Cond[0].isImm() && Cond[0].getImm() == 0) {
+    Cond.erase(Cond.begin());
+  } else {
+    Cond.insert(Cond.begin(), MachineOperand::CreateImm(0));
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::
+isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
+                          const BranchProbability &Probability) const {
+  return (NumInstrs <= 4);
+}
+
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::DEALLOC_RET_V4 :
+  case Hexagon::DEALLOC_RET_cPt_V4 :
+  case Hexagon::DEALLOC_RET_cNotPt_V4 :
+  case Hexagon::DEALLOC_RET_cdnPnt_V4 :
+  case Hexagon::DEALLOC_RET_cNotdnPnt_V4 :
+  case Hexagon::DEALLOC_RET_cdnPt_V4 :
+  case Hexagon::DEALLOC_RET_cNotdnPt_V4 :
+   return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::
+isValidOffset(const int Opcode, const int Offset) const {
+  // This function is to check whether the "Offset" is in the correct range of
+  // the given "Opcode". If "Offset" is not in the correct range, "ADD_ri" is
+  // inserted to calculate the final address. Due to this reason, the function
+  // assumes that the "Offset" has correct alignment.
+
+  switch(Opcode) {
+
+  case Hexagon::LDriw:
+  case Hexagon::STriw:
+    assert((Offset % 4 == 0) && "Offset has incorrect alignment");
+    return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMW_OFFSET_MAX);
+
+  case Hexagon::LDrid:
+  case Hexagon::STrid:
+    assert((Offset % 8 == 0) && "Offset has incorrect alignment");
+    return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMD_OFFSET_MAX);
+
+  case Hexagon::LDrih:
+  case Hexagon::LDriuh:
+  case Hexagon::STrih:
+  case Hexagon::LDrih_ae:
+    assert((Offset % 2 == 0) && "Offset has incorrect alignment");
+    return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMH_OFFSET_MAX);
+
+  case Hexagon::LDrib:
+  case Hexagon::STrib:
+  case Hexagon::LDriub:
+  case Hexagon::LDriubit:
+  case Hexagon::LDrib_ae:
+  case Hexagon::LDriub_ae:
+    return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMB_OFFSET_MAX);
+
+  case Hexagon::ADD_ri:
+  case Hexagon::TFR_FI:
+    return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
+      (Offset <= Hexagon_ADDI_OFFSET_MAX);
+
+  case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+  case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+  case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+  case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+  case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+  case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+  case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+  case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+  case Hexagon::MEMw_ADDi_MEM_V4 :
+  case Hexagon::MEMw_SUBi_MEM_V4 :
+  case Hexagon::MEMw_ADDr_MEM_V4 :
+  case Hexagon::MEMw_SUBr_MEM_V4 :
+  case Hexagon::MEMw_ANDr_MEM_V4 :
+  case Hexagon::MEMw_ORr_MEM_V4 :
+    assert ((Offset % 4) == 0 && "MEMOPw offset is not aligned correctly." );
+    return (0 <= Offset && Offset <= 255);
+
+  case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+  case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+  case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+  case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+  case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+  case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+  case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+  case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+  case Hexagon::MEMh_ADDi_MEM_V4 :
+  case Hexagon::MEMh_SUBi_MEM_V4 :
+  case Hexagon::MEMh_ADDr_MEM_V4 :
+  case Hexagon::MEMh_SUBr_MEM_V4 :
+  case Hexagon::MEMh_ANDr_MEM_V4 :
+  case Hexagon::MEMh_ORr_MEM_V4 :
+    assert ((Offset % 2) == 0 && "MEMOPh offset is not aligned correctly." );
+    return (0 <= Offset && Offset <= 127);
+
+  case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+  case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+  case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+  case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+  case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+  case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+  case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+  case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+  case Hexagon::MEMb_ADDi_MEM_V4 :
+  case Hexagon::MEMb_SUBi_MEM_V4 :
+  case Hexagon::MEMb_ADDr_MEM_V4 :
+  case Hexagon::MEMb_SUBr_MEM_V4 :
+  case Hexagon::MEMb_ANDr_MEM_V4 :
+  case Hexagon::MEMb_ORr_MEM_V4 :
+    return (0 <= Offset && Offset <= 63);
+
+  // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
+  // any size. Later pass knows how to handle it.
+  case Hexagon::STriw_pred:
+  case Hexagon::LDriw_pred:
+    return true;
+
+  // INLINEASM is very special.
+  case Hexagon::INLINEASM:
+    return true;
+  }
+
+  llvm_unreachable("No offset range is defined for this opcode. "
+                   "Please define it in the above switch statement!");
+}
+
+
+//
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::
+isValidAutoIncImm(const EVT VT, const int Offset) const {
+
+  if (VT == MVT::i64) {
+      return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+              (Offset & 0x7) == 0);
+  }
+  if (VT == MVT::i32) {
+      return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+              (Offset & 0x3) == 0);
+  }
+  if (VT == MVT::i16) {
+      return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+              (Offset & 0x1) == 0);
+  }
+  if (VT == MVT::i8) {
+      return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMB_AUTOINC_MAX);
+  }
+  llvm_unreachable("Not an auto-inc opc!");
+}
+
+
+bool HexagonInstrInfo::
+isMemOp(const MachineInstr *MI) const {
+  switch (MI->getOpcode())
+  {
+    case Hexagon::MEMw_ADDSUBi_indexed_MEM_V4 :
+    case Hexagon::MEMw_ADDi_indexed_MEM_V4 :
+    case Hexagon::MEMw_SUBi_indexed_MEM_V4 :
+    case Hexagon::MEMw_ADDr_indexed_MEM_V4 :
+    case Hexagon::MEMw_SUBr_indexed_MEM_V4 :
+    case Hexagon::MEMw_ANDr_indexed_MEM_V4 :
+    case Hexagon::MEMw_ORr_indexed_MEM_V4 :
+    case Hexagon::MEMw_ADDSUBi_MEM_V4 :
+    case Hexagon::MEMw_ADDi_MEM_V4 :
+    case Hexagon::MEMw_SUBi_MEM_V4 :
+    case Hexagon::MEMw_ADDr_MEM_V4 :
+    case Hexagon::MEMw_SUBr_MEM_V4 :
+    case Hexagon::MEMw_ANDr_MEM_V4 :
+    case Hexagon::MEMw_ORr_MEM_V4 :
+    case Hexagon::MEMh_ADDSUBi_indexed_MEM_V4 :
+    case Hexagon::MEMh_ADDi_indexed_MEM_V4 :
+    case Hexagon::MEMh_SUBi_indexed_MEM_V4 :
+    case Hexagon::MEMh_ADDr_indexed_MEM_V4 :
+    case Hexagon::MEMh_SUBr_indexed_MEM_V4 :
+    case Hexagon::MEMh_ANDr_indexed_MEM_V4 :
+    case Hexagon::MEMh_ORr_indexed_MEM_V4 :
+    case Hexagon::MEMh_ADDSUBi_MEM_V4 :
+    case Hexagon::MEMh_ADDi_MEM_V4 :
+    case Hexagon::MEMh_SUBi_MEM_V4 :
+    case Hexagon::MEMh_ADDr_MEM_V4 :
+    case Hexagon::MEMh_SUBr_MEM_V4 :
+    case Hexagon::MEMh_ANDr_MEM_V4 :
+    case Hexagon::MEMh_ORr_MEM_V4 :
+    case Hexagon::MEMb_ADDSUBi_indexed_MEM_V4 :
+    case Hexagon::MEMb_ADDi_indexed_MEM_V4 :
+    case Hexagon::MEMb_SUBi_indexed_MEM_V4 :
+    case Hexagon::MEMb_ADDr_indexed_MEM_V4 :
+    case Hexagon::MEMb_SUBr_indexed_MEM_V4 :
+    case Hexagon::MEMb_ANDr_indexed_MEM_V4 :
+    case Hexagon::MEMb_ORr_indexed_MEM_V4 :
+    case Hexagon::MEMb_ADDSUBi_MEM_V4 :
+    case Hexagon::MEMb_ADDi_MEM_V4 :
+    case Hexagon::MEMb_SUBi_MEM_V4 :
+    case Hexagon::MEMb_ADDr_MEM_V4 :
+    case Hexagon::MEMb_SUBr_MEM_V4 :
+    case Hexagon::MEMb_ANDr_MEM_V4 :
+    case Hexagon::MEMb_ORr_MEM_V4 :
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::
+isSpillPredRegOp(const MachineInstr *MI) const {
+  switch (MI->getOpcode())
+  {
+    case Hexagon::STriw_pred :
+    case Hexagon::LDriw_pred :
+    return true;
+  }
+  return false;
+}
+
+bool HexagonInstrInfo::
+isConditionalTransfer (const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+    case Hexagon::TFR_cPt:
+    case Hexagon::TFR_cNotPt:
+    case Hexagon::TFRI_cPt:
+    case Hexagon::TFRI_cNotPt:
+    case Hexagon::TFR_cdnPt:
+    case Hexagon::TFR_cdnNotPt:
+    case Hexagon::TFRI_cdnPt:
+    case Hexagon::TFRI_cdnNotPt:
+      return true;
+
+    default:
+      return false;
+  }
+  return false;
+}
+
+bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
+  const HexagonRegisterInfo& QRI = getRegisterInfo();
+  switch (MI->getOpcode())
+  {
+    case Hexagon::ADD_ri_cPt:
+    case Hexagon::ADD_ri_cNotPt:
+    case Hexagon::ADD_rr_cPt:
+    case Hexagon::ADD_rr_cNotPt:
+    case Hexagon::XOR_rr_cPt:
+    case Hexagon::XOR_rr_cNotPt:
+    case Hexagon::AND_rr_cPt:
+    case Hexagon::AND_rr_cNotPt:
+    case Hexagon::OR_rr_cPt:
+    case Hexagon::OR_rr_cNotPt:
+    case Hexagon::SUB_rr_cPt:
+    case Hexagon::SUB_rr_cNotPt:
+    case Hexagon::COMBINE_rr_cPt:
+    case Hexagon::COMBINE_rr_cNotPt:
+      return true;
+    case Hexagon::ASLH_cPt_V4:
+    case Hexagon::ASLH_cNotPt_V4:
+    case Hexagon::ASRH_cPt_V4:
+    case Hexagon::ASRH_cNotPt_V4:
+    case Hexagon::SXTB_cPt_V4:
+    case Hexagon::SXTB_cNotPt_V4:
+    case Hexagon::SXTH_cPt_V4:
+    case Hexagon::SXTH_cNotPt_V4:
+    case Hexagon::ZXTB_cPt_V4:
+    case Hexagon::ZXTB_cNotPt_V4:
+    case Hexagon::ZXTH_cPt_V4:
+    case Hexagon::ZXTH_cNotPt_V4:
+      return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+
+    default:
+      return false;
+  }
+}
+
+bool HexagonInstrInfo::
+isConditionalLoad (const MachineInstr* MI) const {
+  const HexagonRegisterInfo& QRI = getRegisterInfo();
+  switch (MI->getOpcode())
+  {
+    case Hexagon::LDrid_cPt :
+    case Hexagon::LDrid_cNotPt :
+    case Hexagon::LDrid_indexed_cPt :
+    case Hexagon::LDrid_indexed_cNotPt :
+    case Hexagon::LDriw_cPt :
+    case Hexagon::LDriw_cNotPt :
+    case Hexagon::LDriw_indexed_cPt :
+    case Hexagon::LDriw_indexed_cNotPt :
+    case Hexagon::LDrih_cPt :
+    case Hexagon::LDrih_cNotPt :
+    case Hexagon::LDrih_indexed_cPt :
+    case Hexagon::LDrih_indexed_cNotPt :
+    case Hexagon::LDrib_cPt :
+    case Hexagon::LDrib_cNotPt :
+    case Hexagon::LDrib_indexed_cPt :
+    case Hexagon::LDrib_indexed_cNotPt :
+    case Hexagon::LDriuh_cPt :
+    case Hexagon::LDriuh_cNotPt :
+    case Hexagon::LDriuh_indexed_cPt :
+    case Hexagon::LDriuh_indexed_cNotPt :
+    case Hexagon::LDriub_cPt :
+    case Hexagon::LDriub_cNotPt :
+    case Hexagon::LDriub_indexed_cPt :
+    case Hexagon::LDriub_indexed_cNotPt :
+      return true;
+    case Hexagon::POST_LDrid_cPt :
+    case Hexagon::POST_LDrid_cNotPt :
+    case Hexagon::POST_LDriw_cPt :
+    case Hexagon::POST_LDriw_cNotPt :
+    case Hexagon::POST_LDrih_cPt :
+    case Hexagon::POST_LDrih_cNotPt :
+    case Hexagon::POST_LDrib_cPt :
+    case Hexagon::POST_LDrib_cNotPt :
+    case Hexagon::POST_LDriuh_cPt :
+    case Hexagon::POST_LDriuh_cNotPt :
+    case Hexagon::POST_LDriub_cPt :
+    case Hexagon::POST_LDriub_cNotPt :
+      return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+    case Hexagon::LDrid_indexed_cPt_V4 :
+    case Hexagon::LDrid_indexed_cNotPt_V4 :
+    case Hexagon::LDrid_indexed_shl_cPt_V4 :
+    case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+    case Hexagon::LDrib_indexed_cPt_V4 :
+    case Hexagon::LDrib_indexed_cNotPt_V4 :
+    case Hexagon::LDrib_indexed_shl_cPt_V4 :
+    case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+    case Hexagon::LDriub_indexed_cPt_V4 :
+    case Hexagon::LDriub_indexed_cNotPt_V4 :
+    case Hexagon::LDriub_indexed_shl_cPt_V4 :
+    case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+    case Hexagon::LDrih_indexed_cPt_V4 :
+    case Hexagon::LDrih_indexed_cNotPt_V4 :
+    case Hexagon::LDrih_indexed_shl_cPt_V4 :
+    case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+    case Hexagon::LDriuh_indexed_cPt_V4 :
+    case Hexagon::LDriuh_indexed_cNotPt_V4 :
+    case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+    case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+    case Hexagon::LDriw_indexed_cPt_V4 :
+    case Hexagon::LDriw_indexed_cNotPt_V4 :
+    case Hexagon::LDriw_indexed_shl_cPt_V4 :
+    case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+      return QRI.Subtarget.getHexagonArchVersion() == HexagonSubtarget::V4;
+    default:
+      return false;
+  }
+}
+
+// Returns true if an instruction is a conditional store.
+//
+// Note: It doesn't include conditional new-value stores as they can't be
+// converted to .new predicate.
+//
+//               p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
+//                ^           ^
+//               /             \ (not OK. it will cause new-value store to be
+//              /               X conditional on p0.new while R2 producer is
+//             /                 \ on p0)
+//            /                   \.
+//     p.new store                 p.old NV store
+// [if(p0.new)memw(R0+#0)=R2]    [if(p0)memw(R0+#0)=R2.new]
+//            ^                  ^
+//             \                /
+//              \              /
+//               \            /
+//                 p.old store
+//             [if (p0)memw(R0+#0)=R2]
+//
+// The above diagram shows the steps involoved in the conversion of a predicated
+// store instruction to its .new predicated new-value form.
+//
+// The following set of instructions further explains the scenario where
+// conditional new-value store becomes invalid when promoted to .new predicate
+// form.
+//
+// { 1) if (p0) r0 = add(r1, r2)
+//   2) p0 = cmp.eq(r3, #0) }
+//
+//   3) if (p0) memb(r1+#0) = r0  --> this instruction can't be grouped with
+// the first two instructions because in instr 1, r0 is conditional on old value
+// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which
+// is not valid for new-value stores.
+bool HexagonInstrInfo::
+isConditionalStore (const MachineInstr* MI) const {
+  const HexagonRegisterInfo& QRI = getRegisterInfo();
+  switch (MI->getOpcode())
+  {
+    case Hexagon::STrib_imm_cPt_V4 :
+    case Hexagon::STrib_imm_cNotPt_V4 :
+    case Hexagon::STrib_indexed_shl_cPt_V4 :
+    case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+    case Hexagon::STrib_cPt :
+    case Hexagon::STrib_cNotPt :
+    case Hexagon::POST_STbri_cPt :
+    case Hexagon::POST_STbri_cNotPt :
+    case Hexagon::STrid_indexed_cPt :
+    case Hexagon::STrid_indexed_cNotPt :
+    case Hexagon::STrid_indexed_shl_cPt_V4 :
+    case Hexagon::POST_STdri_cPt :
+    case Hexagon::POST_STdri_cNotPt :
+    case Hexagon::STrih_cPt :
+    case Hexagon::STrih_cNotPt :
+    case Hexagon::STrih_indexed_cPt :
+    case Hexagon::STrih_indexed_cNotPt :
+    case Hexagon::STrih_imm_cPt_V4 :
+    case Hexagon::STrih_imm_cNotPt_V4 :
+    case Hexagon::STrih_indexed_shl_cPt_V4 :
+    case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+    case Hexagon::POST_SThri_cPt :
+    case Hexagon::POST_SThri_cNotPt :
+    case Hexagon::STriw_cPt :
+    case Hexagon::STriw_cNotPt :
+    case Hexagon::STriw_indexed_cPt :
+    case Hexagon::STriw_indexed_cNotPt :
+    case Hexagon::STriw_imm_cPt_V4 :
+    case Hexagon::STriw_imm_cNotPt_V4 :
+    case Hexagon::STriw_indexed_shl_cPt_V4 :
+    case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+    case Hexagon::POST_STwri_cPt :
+    case Hexagon::POST_STwri_cNotPt :
+      return QRI.Subtarget.hasV4TOps();
+
+    // V4 global address store before promoting to dot new.
+    case Hexagon::STrid_GP_cPt_V4 :
+    case Hexagon::STrid_GP_cNotPt_V4 :
+    case Hexagon::STrib_GP_cPt_V4 :
+    case Hexagon::STrib_GP_cNotPt_V4 :
+    case Hexagon::STrih_GP_cPt_V4 :
+    case Hexagon::STrih_GP_cNotPt_V4 :
+    case Hexagon::STriw_GP_cPt_V4 :
+    case Hexagon::STriw_GP_cNotPt_V4 :
+    case Hexagon::STd_GP_cPt_V4 :
+    case Hexagon::STd_GP_cNotPt_V4 :
+    case Hexagon::STb_GP_cPt_V4 :
+    case Hexagon::STb_GP_cNotPt_V4 :
+    case Hexagon::STh_GP_cPt_V4 :
+    case Hexagon::STh_GP_cNotPt_V4 :
+    case Hexagon::STw_GP_cPt_V4 :
+    case Hexagon::STw_GP_cNotPt_V4 :
+      return QRI.Subtarget.hasV4TOps();
+
+    // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+    // from the "Conditional Store" list. Because a predicated new value store
+    // would NOT be promoted to a double dot new store. See diagram below:
+    // This function returns yes for those stores that are predicated but not
+    // yet promoted to predicate dot new instructions.
+    //
+    //                          +---------------------+
+    //                    /-----| if (p0) memw(..)=r0 |---------\~
+    //                   ||     +---------------------+         ||
+    //          promote  ||       /\       /\                   ||  promote
+    //                   ||      /||\     /||\                  ||
+    //                  \||/    demote     ||                  \||/
+    //                   \/       ||       ||                   \/
+    //       +-------------------------+   ||   +-------------------------+
+    //       | if (p0.new) memw(..)=r0 |   ||   | if (p0) memw(..)=r0.new |
+    //       +-------------------------+   ||   +-------------------------+
+    //                        ||           ||         ||
+    //                        ||         demote      \||/
+    //                      promote        ||         \/ NOT possible
+    //                        ||           ||         /\~
+    //                       \||/          ||        /||\~
+    //                        \/           ||         ||
+    //                      +-----------------------------+
+    //                      | if (p0.new) memw(..)=r0.new |
+    //                      +-----------------------------+
+    //                           Double Dot New Store
+    //
+
+    default:
+      return false;
+
+  }
+  return false;
+}
+
+
+
+DFAPacketizer *HexagonInstrInfo::
+CreateTargetScheduleState(const TargetMachine *TM,
+                           const ScheduleDAG *DAG) const {
+  const InstrItineraryData *II = TM->getInstrItineraryData();
+  return TM->getSubtarget<HexagonGenSubtargetInfo>().createDFAPacketizer(II);
+}
+
+bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+                                            const MachineBasicBlock *MBB,
+                                            const MachineFunction &MF) const {
+  // Debug info is never a scheduling boundary. It's necessary to be explicit
+  // due to the special treatment of IT instructions below, otherwise a
+  // dbg_value followed by an IT will result in the IT instruction being
+  // considered a scheduling hazard, which is wrong. It should be the actual
+  // instruction preceding the dbg_value instruction(s), just like it is
+  // when debug info is not present.
+  if (MI->isDebugValue())
+    return false;
+
+  // Terminators and labels can't be scheduled around.
+  if (MI->getDesc().isTerminator() || MI->isLabel() || MI->isInlineAsm())
+    return true;
+
+  return false;
+}
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
new file mode 100644
index 000000000000..6a45871b67e2
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -0,0 +1,185 @@
+//===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonINSTRUCTIONINFO_H
+#define HexagonINSTRUCTIONINFO_H
+
+#include "HexagonRegisterInfo.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+
+#define GET_INSTRINFO_HEADER
+#include "HexagonGenInstrInfo.inc"
+
+namespace llvm {
+
+class HexagonInstrInfo : public HexagonGenInstrInfo {
+  const HexagonRegisterInfo RI;
+  const HexagonSubtarget& Subtarget;
+public:
+  explicit HexagonInstrInfo(HexagonSubtarget &ST);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const;
+
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              int FrameIndex) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const {
+    return 0;
+  }
+
+  unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+  virtual bool isPredicable(MachineInstr *MI) const;
+  virtual bool
+  PredicateInstruction(MachineInstr *MI,
+                       const SmallVectorImpl<MachineOperand> &Cond) const;
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                                   unsigned ExtraPredCycles,
+                                   const BranchProbability &Probability) const;
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumTCycles, unsigned ExtraTCycles,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumFCycles, unsigned ExtraFCycles,
+                                   const BranchProbability &Probability) const;
+
+  virtual bool isPredicated(const MachineInstr *MI) const;
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+  virtual bool
+  SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                    const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool
+  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  virtual bool
+  isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
+                            const BranchProbability &Probability) const;
+
+  virtual DFAPacketizer*
+  CreateTargetScheduleState(const TargetMachine *TM,
+                            const ScheduleDAG *DAG) const;
+
+  virtual bool isSchedulingBoundary(const MachineInstr *MI,
+                                    const MachineBasicBlock *MBB,
+                                    const MachineFunction &MF) const;
+  bool isValidOffset(const int Opcode, const int Offset) const;
+  bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+  bool isMemOp(const MachineInstr *MI) const;
+  bool isSpillPredRegOp(const MachineInstr *MI) const;
+  bool isU6_3Immediate(const int value) const;
+  bool isU6_2Immediate(const int value) const;
+  bool isU6_1Immediate(const int value) const;
+  bool isU6_0Immediate(const int value) const;
+  bool isS4_3Immediate(const int value) const;
+  bool isS4_2Immediate(const int value) const;
+  bool isS4_1Immediate(const int value) const;
+  bool isS4_0Immediate(const int value) const;
+  bool isS12_Immediate(const int value) const;
+  bool isU6_Immediate(const int value) const;
+  bool isS8_Immediate(const int value) const;
+  bool isS6_Immediate(const int value) const;
+
+  bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const;
+  bool isConditionalTransfer(const MachineInstr* MI) const;
+  bool isConditionalALU32 (const MachineInstr* MI) const;
+  bool isConditionalLoad (const MachineInstr* MI) const;
+  bool isConditionalStore(const MachineInstr* MI) const;
+  bool isDeallocRet(const MachineInstr *MI) const;
+  unsigned getInvertedPredicatedOpcode(const int Opc) const;
+  bool isExtendable(const MachineInstr* MI) const;
+  bool isExtended(const MachineInstr* MI) const;
+  bool isPostIncrement(const MachineInstr* MI) const;
+  bool isNewValueStore(const MachineInstr* MI) const;
+  bool isNewValueJump(const MachineInstr* MI) const;
+  unsigned getImmExtForm(const MachineInstr* MI) const;
+  unsigned getNormalBranchForm(const MachineInstr* MI) const;
+
+private:
+  int getMatchingCondBranchOpcode(int Opc, bool sense) const;
+
+};
+
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
new file mode 100644
index 000000000000..fd5adef0f63f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -0,0 +1,3052 @@
+//==- HexagonInstrInfo.td - Target Description for Hexagon -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormats.td"
+include "HexagonImmediates.td"
+
+//===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV2T                      : Predicate<"Subtarget.hasV2TOps()">;
+def HasV2TOnly                  : Predicate<"Subtarget.hasV2TOpsOnly()">;
+def NoV2T                       : Predicate<"!Subtarget.hasV2TOps()">;
+def HasV3T                      : Predicate<"Subtarget.hasV3TOps()">;
+def HasV3TOnly                  : Predicate<"Subtarget.hasV3TOpsOnly()">;
+def NoV3T                       : Predicate<"!Subtarget.hasV3TOps()">;
+def HasV4T                      : Predicate<"Subtarget.hasV4TOps()">;
+def NoV4T                       : Predicate<"!Subtarget.hasV4TOps()">;
+def UseMEMOP                    : Predicate<"Subtarget.useMemOps()">;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
+def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
+def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
+def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
+def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
+def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
+def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+
+// Address operands.
+def MEMrr : Operand<i32> {
+  let PrintMethod = "printMEMrrOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+// Address operands
+def MEMri : Operand<i32> {
+  let PrintMethod = "printMEMriOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri_s11_2 : Operand<i32>,
+  ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
+  let PrintMethod = "printMEMriOperand";
+  let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+def FrameIndex : Operand<i32> {
+  let PrintMethod = "printFrameIndexOperand";
+  let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+let PrintMethod = "printGlobalOperand" in
+  def globaladdress : Operand<i32>;
+
+let PrintMethod = "printJumpTable" in
+ def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+def bblabel : Operand<i32>;
+def bbl   : SDNode<"ISD::BasicBlock", SDTPtrLeaf   , [], "BasicBlockSDNode">;
+
+def symbolHi32 : Operand<i32> {
+  let PrintMethod = "printSymbolHi";
+}
+def symbolLo32 : Operand<i32> {
+  let PrintMethod = "printSymbolLo";
+}
+
+// Multi-class for logical operators.
+multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
+  def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+  def ri : ALU32_ri<(outs IntRegs:$dst), (ins s10Imm:$b, IntRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "(#$b, $c)")),
+                 [(set IntRegs:$dst, (OpNode s10Imm:$b, IntRegs:$c))]>;
+}
+
+// Multi-class for compare ops.
+let isCompare = 1 in {
+multiclass CMP64_rr<string OpcStr, PatFrag OpNode> {
+  def rr : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$b, DoubleRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                 [(set PredRegs:$dst, (OpNode DoubleRegs:$b, DoubleRegs:$c))]>;
+}
+multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
+  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+}
+
+multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> {
+  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, s10ImmPred:$c))]>;
+}
+
+multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> {
+  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+}
+
+multiclass CMP32_ri_u9<string OpcStr, PatFrag OpNode> {
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, u9ImmPred:$c))]>;
+}
+
+multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c),
+                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                 [(set PredRegs:$dst, (OpNode IntRegs:$b, s8ImmPred:$c))]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// http://qualnet.qualcomm.com/~erich/v1/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v2/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v3/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v4/htmldocs/index.html
+// http://qualnet.qualcomm.com/~erich/v5/htmldocs/index.html
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+let isPredicable = 1 in
+def ADD_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = add($src1, $src2)",
+            [(set IntRegs:$dst, (add IntRegs:$src1, IntRegs:$src2))]>;
+
+let isPredicable = 1 in
+def ADD_ri : ALU32_ri<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s16Imm:$src2),
+            "$dst = add($src1, #$src2)",
+            [(set IntRegs:$dst, (add IntRegs:$src1, s16ImmPred:$src2))]>;
+
+// Logical operations.
+let isPredicable = 1 in
+def XOR_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = xor($src1, $src2)",
+            [(set IntRegs:$dst, (xor IntRegs:$src1, IntRegs:$src2))]>;
+
+let isPredicable = 1 in
+def AND_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = and($src1, $src2)",
+            [(set IntRegs:$dst, (and IntRegs:$src1, IntRegs:$src2))]>;
+
+def OR_ri : ALU32_ri<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s8Imm:$src2),
+            "$dst = or($src1, #$src2)",
+            [(set IntRegs:$dst, (or IntRegs:$src1, s8ImmPred:$src2))]>;
+
+def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1),
+            "$dst = not($src1)",
+            [(set IntRegs:$dst, (not IntRegs:$src1))]>;
+
+def AND_ri : ALU32_ri<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s10Imm:$src2),
+            "$dst = and($src1, #$src2)",
+            [(set IntRegs:$dst, (and IntRegs:$src1, s10ImmPred:$src2))]>;
+
+let isPredicable = 1 in
+def OR_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = or($src1, $src2)",
+            [(set IntRegs:$dst, (or IntRegs:$src1, IntRegs:$src2))]>;
+
+// Negate.
+def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+          "$dst = neg($src1)",
+          [(set IntRegs:$dst, (ineg IntRegs:$src1))]>;
+// Nop.
+let neverHasSideEffects = 1 in
+def NOP : ALU32_rr<(outs), (ins),
+          "nop",
+          []>;
+
+// Subtract.
+let isPredicable = 1 in
+def SUB_rr : ALU32_rr<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = sub($src1, $src2)",
+            [(set IntRegs:$dst, (sub IntRegs:$src1, IntRegs:$src2))]>;
+
+// Transfer immediate.
+let isReMaterializable = 1, isPredicable = 1 in
+def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1),
+           "$dst = #$src1",
+           [(set IntRegs:$dst, s16ImmPred:$src1)]>;
+
+// Transfer register.
+let neverHasSideEffects = 1, isPredicable = 1 in
+def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1),
+          "$dst = $src1",
+          []>;
+
+// Transfer control register.
+let neverHasSideEffects = 1 in
+def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
+           "$dst = $src1",
+           []>;
+//===----------------------------------------------------------------------===//
+// ALU32/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine.
+let isPredicable = 1, neverHasSideEffects = 1 in
+def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = combine($src1, $src2)",
+            []>;
+
+// Mux.
+def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
+                                                   DoubleRegs:$src2,
+                                                   DoubleRegs:$src3),
+            "$dst = vmux($src1, $src2, $src3)",
+            []>;
+
+def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                            IntRegs:$src2, IntRegs:$src3),
+             "$dst = mux($src1, $src2, $src3)",
+             [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+                                         IntRegs:$src3))]>;
+
+def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+                                                IntRegs:$src3),
+             "$dst = mux($src1, #$src2, $src3)",
+             [(set IntRegs:$dst, (select PredRegs:$src1,
+                                         s8ImmPred:$src2, IntRegs:$src3))]>;
+
+def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
+                                                s8Imm:$src3),
+             "$dst = mux($src1, $src2, #$src3)",
+             [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+                                         s8ImmPred:$src3))]>;
+
+def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+                                                s8Imm:$src3),
+             "$dst = mux($src1, #$src2, #$src3)",
+             [(set IntRegs:$dst, (select PredRegs:$src1, s8ImmPred:$src2,
+                                         s8ImmPred:$src3))]>;
+
+// Shift halfword.
+let isPredicable = 1 in
+def ASLH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+           "$dst = aslh($src1)",
+           [(set IntRegs:$dst, (shl 16, IntRegs:$src1))]>;
+
+let isPredicable = 1 in
+def ASRH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+           "$dst = asrh($src1)",
+           [(set IntRegs:$dst, (sra 16, IntRegs:$src1))]>;
+
+// Sign extend.
+let isPredicable = 1 in
+def SXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+           "$dst = sxtb($src1)",
+           [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i8))]>;
+
+let isPredicable = 1 in
+def SXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+           "$dst = sxth($src1)",
+           [(set IntRegs:$dst, (sext_inreg IntRegs:$src1, i16))]>;
+
+// Zero extend.
+let isPredicable = 1, neverHasSideEffects = 1 in
+def ZXTB : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+           "$dst = zxtb($src1)",
+           []>;
+
+let isPredicable = 1, neverHasSideEffects = 1 in
+def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+                    "$dst = zxth($src1)",
+                    []>;
+//===----------------------------------------------------------------------===//
+// ALU32/PERM -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PRED +
+//===----------------------------------------------------------------------===//
+
+// Conditional add.
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_ri_cPt : ALU32_ri<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+            "if ($src1) $dst = add($src2, #$src3)",
+            []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_ri_cNotPt : ALU32_ri<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+            "if (!$src1) $dst = add($src2, #$src3)",
+            []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_ri_cdnPt : ALU32_ri<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+            "if ($src1.new) $dst = add($src2, #$src3)",
+            []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_ri_cdnNotPt : ALU32_ri<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, s16Imm:$src3),
+            "if (!$src1.new) $dst = add($src2, #$src3)",
+            []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = add($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = add($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = add($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ADD_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = add($src2, $src3)",
+            []>;
+
+
+// Conditional combine.
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = combine($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = combine($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = combine($src2, $src3)",
+            []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = combine($src2, $src3)",
+            []>;
+
+// Conditional logical operations.
+
+let isPredicated = 1 in
+def XOR_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = xor($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def XOR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = xor($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def XOR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = xor($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def XOR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = xor($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def AND_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = and($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def AND_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = and($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def AND_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = and($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def AND_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = and($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def OR_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = or($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def OR_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = or($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def OR_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = or($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def OR_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = or($src2, $src3)",
+            []>;
+
+
+// Conditional subtract.
+
+let isPredicated = 1 in
+def SUB_rr_cPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1) $dst = sub($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def SUB_rr_cNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1) $dst = sub($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def SUB_rr_cdnPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if ($src1.new) $dst = sub($src2, $src3)",
+            []>;
+
+let isPredicated = 1 in
+def SUB_rr_cdnNotPt : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "if (!$src1.new) $dst = sub($src2, $src3)",
+            []>;
+
+
+// Conditional transfer.
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2),
+              "if ($src1) $dst = $src2",
+              []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                    IntRegs:$src2),
+                 "if (!$src1) $dst = $src2",
+                 []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2),
+               "if ($src1) $dst = #$src2",
+               []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                     s12Imm:$src2),
+                  "if (!$src1) $dst = #$src2",
+                  []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                   IntRegs:$src2),
+                "if ($src1.new) $dst = $src2",
+                []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                      IntRegs:$src2),
+                   "if (!$src1.new) $dst = $src2",
+                   []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                    s12Imm:$src2),
+                 "if ($src1.new) $dst = #$src2",
+                 []>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                       s12Imm:$src2),
+                    "if (!$src1.new) $dst = #$src2",
+                    []>;
+
+// Compare.
+defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>;
+defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>;
+defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
+defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>;
+defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
+defm CMPGEU : CMP32_ri_u9<"cmp.geu", setuge>;
+//===----------------------------------------------------------------------===//
+// ALU32/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU32/VH +
+//===----------------------------------------------------------------------===//
+// Vector add halfwords
+
+// Vector averagehalfwords
+
+// Vector subtract halfwords
+//===----------------------------------------------------------------------===//
+// ALU32/VH -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+// Add.
+def ADD64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2),
+               "$dst = add($src1, $src2)",
+               [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+// Add halfword.
+
+// Compare.
+defm CMPEHexagon4 : CMP64_rr<"cmp.eq", seteq>;
+defm CMPGT64 : CMP64_rr<"cmp.gt", setgt>;
+defm CMPGTU64 : CMP64_rr<"cmp.gtu", setugt>;
+
+// Logical operations.
+def AND_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2),
+               "$dst = and($src1, $src2)",
+               [(set DoubleRegs:$dst, (and DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+def OR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                    DoubleRegs:$src2),
+              "$dst = or($src1, $src2)",
+              [(set DoubleRegs:$dst, (or DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+def XOR_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2),
+               "$dst = xor($src1, $src2)",
+               [(set DoubleRegs:$dst, (xor DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+// Maximum.
+def MAXw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+              "$dst = max($src2, $src1)",
+              [(set IntRegs:$dst, (select (i1 (setlt IntRegs:$src2,
+                                                     IntRegs:$src1)),
+                                          IntRegs:$src1, IntRegs:$src2))]>;
+
+// Minimum.
+def MINw_rr : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+              "$dst = min($src2, $src1)",
+              [(set IntRegs:$dst, (select (i1 (setgt IntRegs:$src2,
+                                                     IntRegs:$src1)),
+                                          IntRegs:$src1, IntRegs:$src2))]>;
+
+// Subtract.
+def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2),
+               "$dst = sub($src1, $src2)",
+               [(set DoubleRegs:$dst, (sub DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+// Subtract halfword.
+
+// Transfer register.
+let neverHasSideEffects = 1 in
+def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+             "$dst = $src1",
+             []>;
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/BIT +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/BIT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/PERM +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VB +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VH +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/VW +
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+// ALU64/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CR +
+//===----------------------------------------------------------------------===//
+// Logical reductions on predicates.
+
+// Looping instructions.
+
+// Pipelined looping instructions.
+
+// Logical operations on predicates.
+def AND_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+             "$dst = and($src1, $src2)",
+             [(set PredRegs:$dst, (and PredRegs:$src1, PredRegs:$src2))]>;
+
+let neverHasSideEffects = 1 in
+def AND_pnotp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1,
+                                                 PredRegs:$src2),
+                "$dst = and($src1, !$src2)",
+                []>;
+
+def ANY_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+             "$dst = any8($src1)",
+             []>;
+
+def ALL_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+             "$dst = all8($src1)",
+             []>;
+
+def VITPACK_pp : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                 PredRegs:$src2),
+             "$dst = vitpack($src1, $src2)",
+             []>;
+
+def VALIGN_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                    DoubleRegs:$src2,
+                                                    PredRegs:$src3),
+             "$dst = valignb($src1, $src2, $src3)",
+             []>;
+
+def VSPLICE_rrp : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2,
+                                                     PredRegs:$src3),
+             "$dst = vspliceb($src1, $src2, $src3)",
+             []>;
+
+def MASK_p : SInst<(outs DoubleRegs:$dst), (ins PredRegs:$src1),
+             "$dst = mask($src1)",
+             []>;
+
+def NOT_p : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1),
+             "$dst = not($src1)",
+             [(set PredRegs:$dst, (not PredRegs:$src1))]>;
+
+def OR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+            "$dst = or($src1, $src2)",
+            [(set PredRegs:$dst, (or PredRegs:$src1, PredRegs:$src2))]>;
+
+def XOR_pp : SInst<(outs PredRegs:$dst), (ins PredRegs:$src1, PredRegs:$src2),
+             "$dst = xor($src1, $src2)",
+             [(set PredRegs:$dst, (xor PredRegs:$src1, PredRegs:$src2))]>;
+
+
+// User control register transfer.
+//===----------------------------------------------------------------------===//
+// CR -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Jump to address.
+let isBranch = 1, isTerminator=1, isBarrier = 1, isPredicable = 1 in {
+  def JMP : JInst< (outs),
+            (ins brtarget:$offset),
+            "jump $offset",
+            [(br bb:$offset)]>;
+}
+
+// if (p0) jump
+let isBranch = 1, isTerminator=1, Defs = [PC],
+    isPredicated = 1 in {
+  def JMP_c : JInst< (outs),
+                 (ins PredRegs:$src, brtarget:$offset),
+                 "if ($src) jump $offset",
+                 [(brcond PredRegs:$src, bb:$offset)]>;
+}
+
+// if (!p0) jump
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+    isPredicated = 1 in {
+  def JMP_cNot : JInst< (outs),
+                    (ins PredRegs:$src, brtarget:$offset),
+                    "if (!$src) jump $offset",
+                    []>;
+}
+
+let isTerminator = 1, isBranch = 1, neverHasSideEffects = 1, Defs = [PC],
+    isPredicated = 1 in {
+  def BRCOND : JInst < (outs), (ins PredRegs:$pred, brtarget:$dst),
+               "if ($pred) jump $dst",
+               []>;
+}
+
+// Jump to address conditioned on new predicate.
+// if (p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+    isPredicated = 1 in {
+  def JMP_cdnPt : JInst< (outs),
+                   (ins PredRegs:$src, brtarget:$offset),
+                   "if ($src.new) jump:t $offset",
+                   []>;
+}
+
+// if (!p0) jump:t
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+    isPredicated = 1 in {
+  def JMP_cdnNotPt : JInst< (outs),
+                      (ins PredRegs:$src, brtarget:$offset),
+                      "if (!$src.new) jump:t $offset",
+                      []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+    isPredicated = 1 in {
+  def JMP_cdnPnt : JInst< (outs),
+                    (ins PredRegs:$src, brtarget:$offset),
+                    "if ($src.new) jump:nt $offset",
+                    []>;
+}
+
+// Not taken.
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC],
+    isPredicated = 1 in {
+  def JMP_cdnNotPnt : JInst< (outs),
+                       (ins PredRegs:$src, brtarget:$offset),
+                       "if (!$src.new) jump:nt $offset",
+                       []>;
+}
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+                               [SDNPHasChain, SDNPOptInGlue]>;
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR: JRInst<(outs), (ins),
+                   "jumpr r31",
+                   [(retflag)]>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cPt: JRInst<(outs), (ins PredRegs:$src1),
+                       "if ($src1) jumpr r31",
+                       []>;
+}
+
+// Jump to address from register.
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cNotPt: JRInst<(outs), (ins PredRegs:$src1),
+                          "if (!$src1) jumpr r31",
+                          []>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+///
+/// Make sure that in post increment load, the first operand is always the post
+/// increment operand.
+///
+// Load doubleword.
+let isPredicable = 1 in
+def LDrid : LDInst<(outs DoubleRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memd($addr)",
+            [(set DoubleRegs:$dst, (load ADDRriS11_3:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrid_indexed : LDInst<(outs DoubleRegs:$dst),
+            (ins IntRegs:$src1, s11_3Imm:$offset),
+            "$dst=memd($src1+#$offset)",
+            [(set DoubleRegs:$dst, (load (add IntRegs:$src1,
+                                              s11_3ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_GP : LDInst<(outs DoubleRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memd(#$global+$offset)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDd_GP : LDInst<(outs DoubleRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memd(#$global)",
+            []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid : LDInstPI<(outs DoubleRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memd($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load doubleword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memd($addr)",
+            []>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cNotPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memd($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+            "if ($src1) $dst=memd($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cNotPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+            "if (!$src1) $dst=memd($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+            "if ($src1) $dst1 = memd($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrid_cNotPt : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+            "if (!$src1) $dst1 = memd($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cdnPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memd($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memd($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cdnPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+            "if ($src1.new) $dst=memd($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_indexed_cdnNotPt : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
+            "if (!$src1.new) $dst=memd($src2+#$src3)",
+            []>;
+
+
+// Load byte.
+let isPredicable = 1 in
+def LDrib : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memb($addr)",
+            [(set IntRegs:$dst, (sextloadi8 ADDRriS11_0:$addr))]>;
+
+def LDrib_ae : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memb($addr)",
+            [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+
+// Indexed load byte.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrib_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_0Imm:$offset),
+            "$dst=memb($src1+#$offset)",
+            [(set IntRegs:$dst, (sextloadi8 (add IntRegs:$src1,
+                                                 s11_0ImmPred:$offset)))]>;
+
+
+// Indexed load byte any-extend.
+let AddedComplexity = 20 in
+def LDrib_ae_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_0Imm:$offset),
+            "$dst=memb($src1+#$offset)",
+            [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+                                                s11_0ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memb(#$global+$offset)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDb_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memb(#$global)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDub_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memub(#$global)",
+            []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memb($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load byte conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memb($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memb($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if ($src1) $dst = memb($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if (!$src1) $dst = memb($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if ($src1) $dst1 = memb($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrib_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if (!$src1) $dst1 = memb($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memb($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memb($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if ($src1.new) $dst = memb($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if (!$src1.new) $dst = memb($src2+#$src3)",
+            []>;
+
+
+// Load halfword.
+let isPredicable = 1 in
+def LDrih : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memh($addr)",
+            [(set IntRegs:$dst, (sextloadi16 ADDRriS11_1:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDrih_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_1Imm:$offset),
+            "$dst=memh($src1+#$offset)",
+            [(set IntRegs:$dst, (sextloadi16 (add IntRegs:$src1,
+                                                  s11_1ImmPred:$offset)))] >;
+
+def LDrih_ae : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memh($addr)",
+            [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+
+let AddedComplexity = 20 in
+def LDrih_ae_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_1Imm:$offset),
+            "$dst=memh($src1+#$offset)",
+            [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+                                                 s11_1ImmPred:$offset)))] >;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memh(#$global+$offset)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDh_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memh(#$global)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDuh_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memuh(#$global)",
+            []>;
+
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memh($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load halfword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if ($src1) $dst = memh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if (!$src1) $dst = memh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if ($src1) $dst1 = memh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDrih_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if (!$src1) $dst1 = memh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if ($src1.new) $dst = memh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if (!$src1.new) $dst = memh($src2+#$src3)",
+            []>;
+
+// Load unsigned byte.
+let isPredicable = 1 in
+def LDriub : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memub($addr)",
+            [(set IntRegs:$dst, (zextloadi8 ADDRriS11_0:$addr))]>;
+
+let isPredicable = 1 in
+def LDriubit : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memub($addr)",
+            [(set IntRegs:$dst, (zextloadi1 ADDRriS11_0:$addr))]>;
+
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriub_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_0Imm:$offset),
+            "$dst=memub($src1+#$offset)",
+            [(set IntRegs:$dst, (zextloadi8 (add IntRegs:$src1,
+                                                 s11_0ImmPred:$offset)))]>;
+
+let AddedComplexity = 20 in
+def LDriubit_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_0Imm:$offset),
+            "$dst=memub($src1+#$offset)",
+            [(set IntRegs:$dst, (zextloadi1 (add IntRegs:$src1,
+                                                 s11_0ImmPred:$offset)))]>;
+
+def LDriub_ae : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memub($addr)",
+            [(set IntRegs:$dst, (extloadi8 ADDRriS11_0:$addr))]>;
+
+
+let AddedComplexity = 20 in
+def LDriub_ae_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_0Imm:$offset),
+            "$dst=memub($src1+#$offset)",
+            [(set IntRegs:$dst, (extloadi8 (add IntRegs:$src1,
+                                                s11_0ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memub(#$global+$offset)",
+            []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memub($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load unsigned byte conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memub($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memub($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if ($src1) $dst = memub($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if (!$src1) $dst = memub($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if ($src1) $dst1 = memub($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriub_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if (!$src1) $dst1 = memub($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memub($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memub($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if ($src1.new) $dst = memub($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
+            "if (!$src1.new) $dst = memub($src2+#$src3)",
+            []>;
+
+// Load unsigned halfword.
+let isPredicable = 1 in
+def LDriuh : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memuh($addr)",
+            [(set IntRegs:$dst, (zextloadi16 ADDRriS11_1:$addr))]>;
+
+// Indexed load unsigned halfword.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriuh_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_1Imm:$offset),
+            "$dst=memuh($src1+#$offset)",
+            [(set IntRegs:$dst, (zextloadi16 (add IntRegs:$src1,
+                                                  s11_1ImmPred:$offset)))]>;
+
+def LDriuh_ae : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr),
+            "$dst = memuh($addr)",
+            [(set IntRegs:$dst, (extloadi16 ADDRriS11_1:$addr))]>;
+
+
+// Indexed load unsigned halfword any-extend.
+let AddedComplexity = 20 in
+def LDriuh_ae_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_1Imm:$offset),
+            "$dst=memuh($src1+#$offset)",
+            [(set IntRegs:$dst, (extloadi16 (add IntRegs:$src1,
+                                                 s11_1ImmPred:$offset)))] >;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memuh(#$global+$offset)",
+            []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memuh($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load unsigned halfword conditionally.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memuh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memuh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if ($src1) $dst = memuh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if (!$src1) $dst = memuh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if ($src1) $dst1 = memuh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriuh_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if (!$src1) $dst1 = memuh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memuh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memuh($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if ($src1.new) $dst = memuh($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
+            "if (!$src1.new) $dst = memuh($src2+#$src3)",
+            []>;
+
+
+// Load word.
+let isPredicable = 1 in
+def LDriw : LDInst<(outs IntRegs:$dst),
+            (ins MEMri:$addr), "$dst = memw($addr)",
+            [(set IntRegs:$dst, (load ADDRriS11_2:$addr))]>;
+
+// Load predicate.
+let mayLoad = 1, Defs = [R10,R11] in
+def LDriw_pred : LDInst<(outs PredRegs:$dst),
+            (ins MEMri:$addr),
+            "Error; should not emit",
+            []>;
+
+// Indexed load.
+let isPredicable = 1, AddedComplexity = 20 in
+def LDriw_indexed : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s11_2Imm:$offset),
+            "$dst=memw($src1+#$offset)",
+            [(set IntRegs:$dst, (load (add IntRegs:$src1,
+                                           s11_2ImmPred:$offset)))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memw(#$global+$offset)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDw_GP : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memw(#$global)",
+            []>;
+
+let isPredicable = 1, mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw : LDInstPI<(outs IntRegs:$dst, IntRegs:$dst2),
+            (ins IntRegs:$src1, s4Imm:$offset),
+            "$dst = memw($src1++#$offset)",
+            [],
+            "$src1 = $dst2">;
+
+// Load word conditionally.
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1) $dst = memw($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1) $dst = memw($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+            "if ($src1) $dst=memw($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+            "if (!$src1) $dst=memw($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+            "if ($src1) $dst1 = memw($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1 in
+def POST_LDriw_cNotPt : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+            "if (!$src1) $dst1 = memw($src2++#$src3)",
+            [],
+            "$src2 = $dst2">;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if ($src1.new) $dst = memw($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, MEMri:$addr),
+            "if (!$src1.new) $dst = memw($addr)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cdnPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+            "if ($src1.new) $dst=memw($src2+#$src3)",
+            []>;
+
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_indexed_cdnNotPt : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
+            "if (!$src1.new) $dst=memw($src2+#$src3)",
+            []>;
+
+// Deallocate stack frame.
+let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
+  def DEALLOCFRAME : LDInst<(outs), (ins i32imm:$amt1),
+                     "deallocframe",
+                     []>;
+}
+
+// Load and unpack bytes to halfwords.
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH +
+//===----------------------------------------------------------------------===//
+// Multiply and use lower result.
+// Rd=+mpyi(Rs,#u8)
+def MPYI_riu : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+              "$dst =+ mpyi($src1, #$src2)",
+              [(set IntRegs:$dst, (mul IntRegs:$src1, u8ImmPred:$src2))]>;
+
+// Rd=-mpyi(Rs,#u8)
+def MPYI_rin : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, n8Imm:$src2),
+              "$dst =- mpyi($src1, #$src2)",
+              [(set IntRegs:$dst,
+               (mul IntRegs:$src1, n8ImmPred:$src2))]>;
+
+// Rd=mpyi(Rs,#m9)
+// s9 is NOT the same as m9 - but it works.. so far.
+// Assembler maps to either Rd=+mpyi(Rs,#u8 or Rd=-mpyi(Rs,#u8)
+// depending on the value of m9. See Arch Spec.
+def MPYI_ri : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Imm:$src2),
+              "$dst = mpyi($src1, #$src2)",
+              [(set IntRegs:$dst, (mul IntRegs:$src1, s9ImmPred:$src2))]>;
+
+// Rd=mpyi(Rs,Rt)
+def MPYI : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+           "$dst = mpyi($src1, $src2)",
+           [(set IntRegs:$dst, (mul IntRegs:$src1, IntRegs:$src2))]>;
+
+// Rx+=mpyi(Rs,#u8)
+def MPYI_acc_ri : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+            "$dst += mpyi($src2, #$src3)",
+            [(set IntRegs:$dst,
+            (add (mul IntRegs:$src2, u8ImmPred:$src3), IntRegs:$src1))],
+            "$src1 = $dst">;
+
+// Rx+=mpyi(Rs,Rt)
+def MPYI_acc_rr : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst += mpyi($src2, $src3)",
+            [(set IntRegs:$dst,
+            (add (mul IntRegs:$src2, IntRegs:$src3), IntRegs:$src1))],
+            "$src1 = $dst">;
+
+// Rx-=mpyi(Rs,#u8)
+def MPYI_sub_ri : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, u8Imm:$src3),
+            "$dst -= mpyi($src2, #$src3)",
+            [(set IntRegs:$dst,
+            (sub IntRegs:$src1, (mul IntRegs:$src2, u8ImmPred:$src3)))],
+            "$src1 = $dst">;
+
+// Multiply and use upper result.
+// Rd=mpy(Rs,Rt.H):<<1:rnd:sat
+// Rd=mpy(Rs,Rt.L):<<1:rnd:sat
+// Rd=mpy(Rs,Rt)
+def MPY : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          "$dst = mpy($src1, $src2)",
+          [(set IntRegs:$dst, (mulhs IntRegs:$src1, IntRegs:$src2))]>;
+
+// Rd=mpy(Rs,Rt):rnd
+// Rd=mpyu(Rs,Rt)
+def MPYU : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+           "$dst = mpyu($src1, $src2)",
+           [(set IntRegs:$dst, (mulhu IntRegs:$src1, IntRegs:$src2))]>;
+
+// Multiply and use full result.
+// Rdd=mpyu(Rs,Rt)
+def MPYU64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             "$dst = mpyu($src1, $src2)",
+             [(set DoubleRegs:$dst, (mul (i64 (anyext IntRegs:$src1)),
+              (i64 (anyext IntRegs:$src2))))]>;
+
+// Rdd=mpy(Rs,Rt)
+def MPY64 : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             "$dst = mpy($src1, $src2)",
+             [(set DoubleRegs:$dst, (mul (i64 (sext IntRegs:$src1)),
+              (i64 (sext IntRegs:$src2))))]>;
+
+
+// Multiply and accumulate, use full result.
+// Rxx[+-]=mpy(Rs,Rt)
+// Rxx+=mpy(Rs,Rt)
+def MPY64_acc : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst += mpy($src2, $src3)",
+            [(set DoubleRegs:$dst,
+            (add (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3))),
+               DoubleRegs:$src1))],
+            "$src1 = $dst">;
+
+// Rxx-=mpy(Rs,Rt)
+def MPY64_sub : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst -= mpy($src2, $src3)",
+            [(set DoubleRegs:$dst,
+            (sub DoubleRegs:$src1,
+                (mul (i64 (sext IntRegs:$src2)), (i64 (sext IntRegs:$src3)))))],
+            "$src1 = $dst">;
+
+// Rxx[+-]=mpyu(Rs,Rt)
+// Rxx+=mpyu(Rs,Rt)
+def MPYU64_acc : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                            IntRegs:$src2, IntRegs:$src3),
+             "$dst += mpyu($src2, $src3)",
+             [(set DoubleRegs:$dst, (add (mul (i64 (anyext IntRegs:$src2)),
+              (i64 (anyext IntRegs:$src3))),
+               DoubleRegs:$src1))],"$src1 = $dst">;
+
+// Rxx-=mpyu(Rs,Rt)
+def MPYU64_sub : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst += mpyu($src2, $src3)",
+            [(set DoubleRegs:$dst,
+            (sub DoubleRegs:$src1,
+                    (mul (i64 (anyext IntRegs:$src2)),
+                         (i64 (anyext IntRegs:$src3)))))],
+            "$src1 = $dst">;
+
+
+def ADDrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+                            IntRegs:$src2, IntRegs:$src3),
+             "$dst += add($src2, $src3)",
+             [(set IntRegs:$dst, (add (add IntRegs:$src2, IntRegs:$src3),
+                                      IntRegs:$src1))],
+             "$src1 = $dst">;
+
+def ADDri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+                            IntRegs:$src2, s8Imm:$src3),
+             "$dst += add($src2, #$src3)",
+             [(set IntRegs:$dst, (add (add IntRegs:$src2, s8ImmPred:$src3),
+                                      IntRegs:$src1))],
+             "$src1 = $dst">;
+
+def SUBrr_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+                            IntRegs:$src2, IntRegs:$src3),
+             "$dst -= add($src2, $src3)",
+             [(set IntRegs:$dst, (sub IntRegs:$src1, (add IntRegs:$src2,
+                                                     IntRegs:$src3)))],
+             "$src1 = $dst">;
+
+def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
+                            IntRegs:$src2, s8Imm:$src3),
+             "$dst -= add($src2, #$src3)",
+             [(set IntRegs:$dst, (sub IntRegs:$src1,
+                                      (add IntRegs:$src2, s8ImmPred:$src3)))],
+             "$src1 = $dst">;
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/MPYS -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VB +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VB -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MTYPE/VH  +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// MTYPE/VH  -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+///    post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+///    last operand.
+///
+// Store doubleword.
+let isPredicable = 1 in
+def STrid : STInst<(outs),
+            (ins MEMri:$addr, DoubleRegs:$src1),
+            "memd($addr) = $src1",
+            [(store DoubleRegs:$src1, ADDRriS11_3:$addr)]>;
+
+// Indexed store double word.
+let AddedComplexity = 10, isPredicable = 1 in
+def STrid_indexed : STInst<(outs),
+            (ins IntRegs:$src1, s11_3Imm:$src2,  DoubleRegs:$src3),
+            "memd($src1+#$src2) = $src3",
+            [(store DoubleRegs:$src3,
+                                (add IntRegs:$src1, s11_3ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrid_GP : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
+            "memd(#$global+$offset) = $src",
+            []>;
+
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STdri : STInstPI<(outs IntRegs:$dst),
+            (ins DoubleRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+            "memd($src2++#$offset) = $src1",
+            [(set IntRegs:$dst,
+            (post_store DoubleRegs:$src1, IntRegs:$src2, s4_3ImmPred:$offset))],
+            "$src2 = $dst">;
+
+// Store doubleword conditionally.
+// if ([!]Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+            "if ($src1) memd($addr) = $src2",
+            []>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+            "if (!$src1) memd($addr) = $src2",
+            []>;
+
+// if (Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+                 DoubleRegs:$src4),
+            "if ($src1) memd($src2+#$src3) = $src4",
+            []>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_indexed_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+                 DoubleRegs:$src4),
+            "if (!$src1) memd($src2+#$src3) = $src4",
+            []>;
+
+// if ([!]Pv) memd(Rx++#s4:3)=Rtt
+// if (Pv) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1 in
+def POST_STdri_cPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+                 s4_3Imm:$offset),
+            "if ($src1) memd($src3++#$offset) = $src2",
+            [],
+            "$src3 = $dst">;
+
+// if (!Pv) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def POST_STdri_cNotPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+                 s4_3Imm:$offset),
+            "if (!$src1) memd($src3++#$offset) = $src2",
+            [],
+            "$src3 = $dst">;
+
+
+// Store byte.
+// memb(Rs+#s11:0)=Rt
+let isPredicable = 1 in
+def STrib : STInst<(outs),
+            (ins MEMri:$addr, IntRegs:$src1),
+            "memb($addr) = $src1",
+            [(truncstorei8 IntRegs:$src1, ADDRriS11_0:$addr)]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed : STInst<(outs),
+            (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
+            "memb($src1+#$src2) = $src3",
+            [(truncstorei8 IntRegs:$src3, (add IntRegs:$src1,
+                                               s11_0ImmPred:$src2))]>;
+
+// memb(gp+#u16:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memb(#$global+$offset) = $src",
+            []>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP   : STInst<(outs),
+            (ins globaladdress:$global, IntRegs:$src),
+            "memb(#$global) = $src",
+            []>;
+
+// memb(Rx++#s4:0)=Rt
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1,
+                                                    IntRegs:$src2,
+                                                    s4Imm:$offset),
+            "memb($src2++#$offset) = $src1",
+            [(set IntRegs:$dst,
+            (post_truncsti8 IntRegs:$src1, IntRegs:$src2,
+                            s4_0ImmPred:$offset))],
+            "$src2 = $dst">;
+
+// Store byte conditionally.
+// if ([!]Pv) memb(Rs+#u6:0)=Rt
+// if (Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memb($addr) = $src2",
+            []>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memb($addr) = $src2",
+            []>;
+
+// if (Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if ($src1) memb($src2+#$src3) = $src4",
+            []>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_indexed_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memb($src2+#$src3) = $src4",
+            []>;
+
+// if ([!]Pv) memb(Rx++#s4:0)=Rt
+// if (Pv) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_STbri_cPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if ($src1) memb($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+// if (!Pv) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_STbri_cNotPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if (!$src1) memb($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+
+// Store halfword.
+// memh(Rs+#s11:1)=Rt
+let isPredicable = 1 in
+def STrih : STInst<(outs),
+            (ins MEMri:$addr, IntRegs:$src1),
+            "memh($addr) = $src1",
+            [(truncstorei16 IntRegs:$src1, ADDRriS11_1:$addr)]>;
+
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed : STInst<(outs),
+            (ins IntRegs:$src1, s11_1Imm:$src2,  IntRegs:$src3),
+            "memh($src1+#$src2) = $src3",
+            [(truncstorei16 IntRegs:$src3, (add IntRegs:$src1,
+                                                s11_1ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memh(#$global+$offset) = $src",
+            []>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP   : STInst<(outs),
+            (ins globaladdress:$global, IntRegs:$src),
+            "memh(#$global) = $src",
+            []>;
+
+// memh(Rx++#s4:1)=Rt.H
+// memh(Rx++#s4:1)=Rt
+let hasCtrlDep = 1, isPredicable = 1 in
+def POST_SThri : STInstPI<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+            "memh($src2++#$offset) = $src1",
+            [(set IntRegs:$dst,
+            (post_truncsti16 IntRegs:$src1, IntRegs:$src2,
+                             s4_1ImmPred:$offset))],
+            "$src2 = $dst">;
+
+// Store halfword conditionally.
+// if ([!]Pv) memh(Rs+#u6:1)=Rt
+// if (Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memh($addr) = $src2",
+            []>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memh($addr) = $src2",
+            []>;
+
+// if (Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if ($src1) memh($src2+#$src3) = $src4",
+            []>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_indexed_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memh($src2+#$src3) = $src4",
+            []>;
+
+// if ([!]Pv) memh(Rx++#s4:1)=Rt
+// if (Pv) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_SThri_cPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if ($src1) memh($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+// if (!Pv) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_SThri_cNotPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if (!$src1) memh($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+
+// Store word.
+// Store predicate.
+let Defs = [R10,R11] in
+def STriw_pred : STInst<(outs),
+            (ins MEMri:$addr, PredRegs:$src1),
+            "Error; should not emit",
+            []>;
+
+// memw(Rs+#s11:2)=Rt
+let isPredicable = 1 in
+def STriw : STInst<(outs),
+            (ins MEMri:$addr, IntRegs:$src1),
+            "memw($addr) = $src1",
+            [(store IntRegs:$src1, ADDRriS11_2:$addr)]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed : STInst<(outs),
+            (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+            "memw($src1+#$src2) = $src3",
+            [(store IntRegs:$src3, (add IntRegs:$src1, s11_2ImmPred:$src2))]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memw(#$global+$offset) = $src",
+            []>;
+
+let hasCtrlDep = 1, isPredicable = 1  in
+def POST_STwri : STInstPI<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, s4Imm:$offset),
+            "memw($src2++#$offset) = $src1",
+            [(set IntRegs:$dst,
+            (post_store IntRegs:$src1, IntRegs:$src2, s4_2ImmPred:$offset))],
+            "$src2 = $dst">;
+
+// Store word conditionally.
+// if ([!]Pv) memw(Rs+#u6:2)=Rt
+// if (Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memw($addr) = $src2",
+            []>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memw($addr) = $src2",
+            []>;
+
+// if (Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if ($src1) memw($src2+#$src3) = $src4",
+            []>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_indexed_cNotPt : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memw($src2+#$src3) = $src4",
+            []>;
+
+// if ([!]Pv) memw(Rx++#s4:2)=Rt
+// if (Pv) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_STwri_cPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if ($src1) memw($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+// if (!Pv) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1, isPredicated = 1 in
+def POST_STwri_cNotPt : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if (!$src1) memw($src3++#$offset) = $src2",
+            [],"$src3 = $dst">;
+
+
+
+// Allocate stack frame.
+let Defs = [R29, R30], Uses = [R31, R30], neverHasSideEffects = 1 in {
+  def ALLOCFRAME : STInst<(outs),
+             (ins i32imm:$amt),
+             "allocframe(#$amt)",
+             []>;
+}
+//===----------------------------------------------------------------------===//
+// ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/ALU +
+//===----------------------------------------------------------------------===//
+// Logical NOT.
+def NOT_rr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+               "$dst = not($src1)",
+               [(set DoubleRegs:$dst, (not DoubleRegs:$src1))]>;
+
+
+// Sign extend word to doubleword.
+def SXTW : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+           "$dst = sxtw($src1)",
+           [(set DoubleRegs:$dst, (sext IntRegs:$src1))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/BIT +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/BIT -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/COMPLEX -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PERM +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/PERM -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/PRED +
+//===----------------------------------------------------------------------===//
+// Predicate transfer.
+let neverHasSideEffects = 1 in
+def TFR_RsPd : SInst<(outs IntRegs:$dst), (ins PredRegs:$src1),
+               "$dst = $src1  // Should almost never emit this",
+               []>;
+
+def TFR_PdRs : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1),
+               "$dst = $src1  // Should almost never emit!",
+               [(set PredRegs:$dst, (trunc IntRegs:$src1))]>;
+//===----------------------------------------------------------------------===//
+// STYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+// Shift by immediate.
+def ASR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+             "$dst = asr($src1, #$src2)",
+             [(set IntRegs:$dst, (sra IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def ASRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+              "$dst = asr($src1, #$src2)",
+              [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, u6ImmPred:$src2))]>;
+
+def ASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+          "$dst = asl($src1, #$src2)",
+          [(set IntRegs:$dst, (shl IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def LSR_ri : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+             "$dst = lsr($src1, #$src2)",
+             [(set IntRegs:$dst, (srl IntRegs:$src1, u5ImmPred:$src2))]>;
+
+def LSRd_ri : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+              "$dst = lsr($src1, #$src2)",
+              [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, u6ImmPred:$src2))]>;
+
+def LSRd_ri_acc : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                     DoubleRegs:$src2,
+                                                     u6Imm:$src3),
+              "$dst += lsr($src2, #$src3)",
+              [(set DoubleRegs:$dst, (add DoubleRegs:$src1,
+                                          (srl DoubleRegs:$src2,
+                                           u6ImmPred:$src3)))],
+              "$src1 = $dst">;
+
+// Shift by immediate and accumulate.
+def ASR_rr_acc : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$src1,
+                                                     IntRegs:$src2,
+                                                     IntRegs:$src3),
+                 "$dst += asr($src2, $src3)",
+                 [], "$src1 = $dst">;
+
+// Shift by immediate and add.
+def ADDASL : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                             u3Imm:$src3),
+             "$dst = addasl($src1, $src2, #$src3)",
+             [(set IntRegs:$dst, (add IntRegs:$src1,
+                                      (shl IntRegs:$src2,
+                                           u3ImmPred:$src3)))]>;
+
+// Shift by register.
+def ASL_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             "$dst = asl($src1, $src2)",
+             [(set IntRegs:$dst, (shl IntRegs:$src1, IntRegs:$src2))]>;
+
+def ASR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             "$dst = asr($src1, $src2)",
+             [(set IntRegs:$dst, (sra IntRegs:$src1, IntRegs:$src2))]>;
+
+
+def LSR_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             "$dst = lsr($src1, $src2)",
+             [(set IntRegs:$dst, (srl IntRegs:$src1, IntRegs:$src2))]>;
+
+def LSLd : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+           "$dst = lsl($src1, $src2)",
+           [(set DoubleRegs:$dst, (shl DoubleRegs:$src1, IntRegs:$src2))]>;
+
+def ASRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                 IntRegs:$src2),
+              "$dst = asr($src1, $src2)",
+              [(set DoubleRegs:$dst, (sra DoubleRegs:$src1, IntRegs:$src2))]>;
+
+def LSRd_rr : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                 IntRegs:$src2),
+              "$dst = lsr($src1, $src2)",
+              [(set DoubleRegs:$dst, (srl DoubleRegs:$src1, IntRegs:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// STYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VH +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VH -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// STYPE/VW +
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+// STYPE/VW -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/USER +
+//===----------------------------------------------------------------------===//
+def SDHexagonBARRIER: SDTypeProfile<0, 0, []>;
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDHexagonBARRIER,
+                           [SDNPHasChain]>;
+
+let hasSideEffects = 1 in
+def BARRIER : STInst<(outs), (ins),
+                     "barrier",
+                     [(HexagonBARRIER)]>;
+
+//===----------------------------------------------------------------------===//
+// SYSTEM/SUPER -
+//===----------------------------------------------------------------------===//
+
+// TFRI64 - assembly mapped.
+let isReMaterializable = 1 in
+def TFRI64 : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
+             "$dst = #$src1",
+             [(set DoubleRegs:$dst, s8Imm64Pred:$src1)]>;
+
+// Pseudo instruction to encode a set of conditional transfers.
+// This instruction is used instead of a mux and trades-off codesize
+// for performance. We conduct this transformation optimistically in
+// the hope that these instructions get promoted to dot-new transfers.
+let AddedComplexity = 100 in
+def TFR_condset_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
+                                                        IntRegs:$src2,
+                                                        IntRegs:$src3),
+                     "Error; should not emit",
+                     [(set IntRegs:$dst, (select PredRegs:$src1, IntRegs:$src2,
+                                                 IntRegs:$src3))]>;
+
+let AddedComplexity = 100 in
+def TFR_condset_ri : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, s12Imm:$src3),
+            "Error; should not emit",
+            [(set IntRegs:$dst,
+            (select PredRegs:$src1, IntRegs:$src2, s12ImmPred:$src3))]>;
+
+let AddedComplexity = 100 in
+def TFR_condset_ir : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, s12Imm:$src2, IntRegs:$src3),
+            "Error; should not emit",
+            [(set IntRegs:$dst,
+            (select PredRegs:$src1, s12ImmPred:$src2, IntRegs:$src3))]>;
+
+let AddedComplexity = 100 in
+def TFR_condset_ii : ALU32_rr<(outs IntRegs:$dst),
+                              (ins PredRegs:$src1, s12Imm:$src2, s12Imm:$src3),
+                     "Error; should not emit",
+                     [(set IntRegs:$dst, (select PredRegs:$src1,
+                                                 s12ImmPred:$src2,
+                                                 s12ImmPred:$src3))]>;
+
+// Generate frameindex addresses.
+let isReMaterializable = 1 in
+def TFR_FI : ALU32_ri<(outs IntRegs:$dst), (ins FrameIndex:$src1),
+             "$dst = add($src1)",
+             [(set IntRegs:$dst, ADDRri:$src1)]>;
+
+//
+// CR - Type.
+//
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_i : CRInst<(outs), (ins brtarget:$offset, u10Imm:$src2),
+                      "loop0($offset, #$src2)",
+                      []>;
+}
+
+let neverHasSideEffects = 1, Defs = [SA0, LC0] in {
+def LOOP0_r : CRInst<(outs), (ins brtarget:$offset, IntRegs:$src2),
+                      "loop0($offset, $src2)",
+                      []>;
+}
+
+let isBranch = 1, isTerminator = 1, neverHasSideEffects = 1,
+    Defs = [PC, LC0], Uses = [SA0, LC0] in {
+def ENDLOOP0 : CRInst<(outs), (ins brtarget:$offset),
+                      ":endloop0",
+                      []>;
+}
+
+// Support for generating global address.
+// Taken from X86InstrInfo.td.
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+                                             SDTCisPtrTy<0>]>;
+def HexagonCONST32 : SDNode<"HexagonISD::CONST32",     SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP",     SDTHexagonCONST32>;
+
+// This pattern is incorrect. When we add small data, we should change
+// this pattern to use memw(#foo).
+let isMoveImm = 1 in
+def CONST32 : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+              "$dst = CONST32(#$global)",
+              [(set IntRegs:$dst,
+              (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+                  "$dst = CONST32(#$global)",
+                  [(set IntRegs:$dst,
+                  (HexagonCONST32 tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_set_jt : LDInst<(outs IntRegs:$dst), (ins jumptablebase:$jt),
+                     "$dst = CONST32(#$jt)",
+                     [(set IntRegs:$dst,
+                     (HexagonCONST32 tjumptable:$jt))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32GP_set : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
+                    "$dst = CONST32(#$global)",
+                    [(set IntRegs:$dst,
+                    (HexagonCONST32_GP tglobaladdr:$global))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Int_Real : LDInst<(outs IntRegs:$dst), (ins i32imm:$global),
+                       "$dst = CONST32(#$global)",
+                       [(set IntRegs:$dst, imm:$global) ]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST32_Label : LDInst<(outs IntRegs:$dst), (ins bblabel:$label),
+                    "$dst = CONST32($label)",
+                    [(set IntRegs:$dst, (HexagonCONST32 bbl:$label))]>;
+
+let isReMaterializable = 1, isMoveImm = 1 in
+def CONST64_Int_Real : LDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
+                       "$dst = CONST64(#$global)",
+                       [(set DoubleRegs:$dst, imm:$global) ]>;
+
+def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins),
+                  "$dst = xor($dst, $dst)",
+                  [(set PredRegs:$dst, 0)]>;
+
+def MPY_trsext : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+                 "$dst = mpy($src1, $src2)",
+                 [(set IntRegs:$dst,
+                       (trunc (i64 (srl (i64 (mul (i64 (sext IntRegs:$src1)),
+                                             (i64 (sext IntRegs:$src2)))),
+                                        (i32 32)))))]>;
+
+// Pseudo instructions.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+
+def SDT_SPCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def callseq_end : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
+                  [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+                    [SDNPHasChain, SDNPOutGlue]>;
+
+def SDT_SPCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def call : SDNode<"HexagonISD::CALL", SDT_SPCall,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
+// Optional Flag and Variable Arguments.
+// Its 1 Operand has pointer type.
+def HexagonTCRet    : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+                     [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+let Defs = [R29, R30], Uses = [R31, R30, R29] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+                        "Should never be emitted",
+                        [(callseq_start timm:$amt)]>;
+}
+
+let Defs = [R29, R30, R31], Uses = [R29] in {
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                      "Should never be emitted",
+                      [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def CALL : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "call $dst", []>;
+}
+
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def CALLR : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+              "callr $dst",
+              []>;
+ }
+
+// Tail Calls.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def TCRETURNtg : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "jump $dst // TAILCALL", []>;
+}
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def TCRETURNtext : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "jump $dst // TAILCALL", []>;
+}
+
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10,
+          R22, R23, R28, R31, P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def TCRETURNR : JInst<(outs), (ins IntRegs:$dst, variable_ops),
+             "jumpr $dst // TAILCALL", []>;
+}
+// Map call instruction.
+def : Pat<(call IntRegs:$dst),
+      (CALLR IntRegs:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call tglobaladdr:$dst),
+      (CALL tglobaladdr:$dst)>, Requires<[HasV2TOnly]>;
+def : Pat<(call texternalsym:$dst),
+      (CALL texternalsym:$dst)>, Requires<[HasV2TOnly]>;
+//Tail calls.
+def : Pat<(HexagonTCRet tglobaladdr:$dst),
+      (TCRETURNtg tglobaladdr:$dst)>;
+def : Pat<(HexagonTCRet texternalsym:$dst),
+      (TCRETURNtext texternalsym:$dst)>;
+def : Pat<(HexagonTCRet IntRegs:$dst),
+      (TCRETURNR IntRegs:$dst)>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1).
+def : Pat <(and IntRegs:$src1, 65535),
+      (ZXTH IntRegs:$src1)>;
+
+// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
+def : Pat <(and IntRegs:$src1, 255),
+      (ZXTB IntRegs:$src1)>;
+
+// Map Add(p1, true) to p1 = not(p1).
+//     Add(p1, false) should never be produced,
+//     if it does, it got to be mapped to NOOP.
+def : Pat <(add PredRegs:$src1, -1),
+      (NOT_p PredRegs:$src1)>;
+
+// Map from p0 = setlt(r0, r1) r2 = mux(p0, r3, r4) =>
+//   p0 = cmp.lt(r0, r1), r0 = mux(p0, r2, r1).
+def : Pat <(select (i1 (setlt IntRegs:$src1, IntRegs:$src2)), IntRegs:$src3,
+                   IntRegs:$src4),
+      (TFR_condset_rr (CMPLTrr IntRegs:$src1, IntRegs:$src2), IntRegs:$src4,
+                      IntRegs:$src3)>, Requires<[HasV2TOnly]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def : Pat <(select (not PredRegs:$src1), s8ImmPred:$src2, s8ImmPred:$src3),
+      (TFR_condset_ii PredRegs:$src1, s8ImmPred:$src3, s8ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
+def : Pat <(brcond (not PredRegs:$src1), bb:$offset),
+      (JMP_cNot PredRegs:$src1, bb:$offset)>;
+
+// Map from p2 = pnot(p2); p1 = and(p0, p2) => p1 = and(p0, !p2).
+def : Pat <(and PredRegs:$src1, (not PredRegs:$src2)),
+      (AND_pnotp PredRegs:$src1, PredRegs:$src2)>;
+
+// Map from store(globaladdress + x) -> memd(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(store DoubleRegs:$src1,
+                  (add (HexagonCONST32_GP tglobaladdr:$global),
+                       u16ImmPred:$offset)),
+      (STrid_GP tglobaladdr:$global, u16ImmPred:$offset, DoubleRegs:$src1)>;
+
+// Map from store(globaladdress) -> memd(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store DoubleRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+      (STrid_GP tglobaladdr:$global, 0, DoubleRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (add (HexagonCONST32_GP tglobaladdr:$global),
+                                      u16ImmPred:$offset)),
+      (STriw_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+      (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(store IntRegs:$src1, (HexagonCONST32_GP tglobaladdr:$global)),
+      (STriw_GP tglobaladdr:$global, 0, IntRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei16 IntRegs:$src1,
+                          (add (HexagonCONST32_GP tglobaladdr:$global),
+                               u16ImmPred:$offset)),
+      (STrih_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei16 IntRegs:$src1,
+                          (HexagonCONST32_GP tglobaladdr:$global)),
+      (STh_GP tglobaladdr:$global, IntRegs:$src1)>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei8 IntRegs:$src1,
+                         (add (HexagonCONST32_GP tglobaladdr:$global),
+                              u16ImmPred:$offset)),
+      (STrib_GP tglobaladdr:$global, u16ImmPred:$offset, IntRegs:$src1)>;
+
+// Map from store(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(truncstorei8 IntRegs:$src1,
+                         (HexagonCONST32_GP tglobaladdr:$global)),
+      (STb_GP tglobaladdr:$global, IntRegs:$src1)>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(load (add (HexagonCONST32_GP tglobaladdr:$global),
+                      u16ImmPred:$offset)),
+      (LDriw_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(load (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDw_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+                           u16ImmPred:$offset))),
+      (LDrid_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memw(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+      (LDd_GP tglobaladdr:$global)>;
+
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress + 0), Pd = Rd.
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+      (TFR_PdRs (LDrib_GP tglobaladdr:$global, 0))>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                             u16ImmPred:$offset)),
+      (LDrih_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDrih_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                             u16ImmPred:$offset)),
+      (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDriuh_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                            u16ImmPred:$offset)),
+      (LDriuh_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDriuh_GP tglobaladdr:$global, 0)>;
+// Map from load(globaladdress + x) -> memub(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+                            u16ImmPred:$offset)),
+      (LDriub_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memuh(#foo + 0).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDriub_GP tglobaladdr:$global, 0)>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+                            u16ImmPred:$offset)),
+      (LDrib_GP tglobaladdr:$global, u16ImmPred:$offset)>;
+
+// Map from load(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(extloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDb_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memb(#foo).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDb_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memub(#foo).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi8 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDub_GP tglobaladdr:$global)>;
+
+// When the Interprocedural Global Variable optimizer realizes that a
+// certain global variable takes only two constant values, it shrinks the
+// global to a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(extloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDb_GP tglobaladdr:$global)>;
+
+let AddedComplexity = 100 in
+def : Pat <(sextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDb_GP tglobaladdr:$global)>;
+
+let AddedComplexity = 100 in
+def : Pat <(zextloadi1 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDub_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(extloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDh_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(sextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDh_GP tglobaladdr:$global)>;
+
+// Map from load(globaladdress) -> memuh(#foo).
+let AddedComplexity = 100 in
+def : Pat <(zextloadi16 (HexagonCONST32_GP tglobaladdr:$global)),
+      (LDuh_GP tglobaladdr:$global)>;
+
+// Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned.
+def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)),
+      (AND_rr (LDrib ADDRriS11_0:$addr), (TFRI 0x1))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = SXTW(Rss.lo).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i32)),
+      (i64 (SXTW (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg)))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = SXTW(SXTH(Rss.lo)).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i16)),
+      (i64 (SXTW (SXTH (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = SXTW(SXTB(Rss.lo)).
+def : Pat <(i64 (sext_inreg DoubleRegs:$src1, i8)),
+      (i64 (SXTW (SXTB (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg))))>;
+
+// We want to prevent emiting pnot's as much as possible.
+// Map brcond with an unsupported setcc to a JMP_cNot.
+def : Pat <(brcond (i1 (setne IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+      (JMP_cNot (CMPEQrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne IntRegs:$src1, s10ImmPred:$src2)), bb:$offset),
+      (JMP_cNot (CMPEQri IntRegs:$src1, s10ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 -1))), bb:$offset),
+      (JMP_cNot PredRegs:$src1, bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne PredRegs:$src1, (i1 0))), bb:$offset),
+      (JMP_c PredRegs:$src1, bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt IntRegs:$src1, s8ImmPred:$src2)), bb:$offset),
+      (JMP_cNot (CMPGEri IntRegs:$src1, s8ImmPred:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setlt IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+      (JMP_c (CMPLTrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+                   bb:$offset),
+      (JMP_cNot (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1),
+                   bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule IntRegs:$src1, IntRegs:$src2)), bb:$offset),
+      (JMP_cNot (CMPGTUrr IntRegs:$src1, IntRegs:$src2), bb:$offset)>;
+
+def : Pat <(brcond (i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+                   bb:$offset),
+      (JMP_cNot (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2),
+                   bb:$offset)>;
+
+// Map from a 64-bit select to an emulated 64-bit mux.
+// Hexagon does not support 64-bit MUXes; so emulate with combines.
+def : Pat <(select PredRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+      (COMBINE_rr
+      (MUX_rr PredRegs:$src1,
+      (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg),
+      (EXTRACT_SUBREG DoubleRegs:$src3, subreg_hireg)),
+      (MUX_rr PredRegs:$src1,
+      (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
+      (EXTRACT_SUBREG DoubleRegs:$src3, subreg_loreg)))>;
+
+// Map from a 1-bit select to logical ops.
+// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
+def : Pat <(select PredRegs:$src1, PredRegs:$src2, PredRegs:$src3),
+      (OR_pp (AND_pp PredRegs:$src1, PredRegs:$src2),
+             (AND_pp (NOT_p PredRegs:$src1), PredRegs:$src3))>;
+
+// Map Pd = load(addr) -> Rs = load(addr); Pd = Rs.
+def : Pat<(i1 (load ADDRriS11_2:$addr)),
+      (i1 (TFR_PdRs (i32 (LDrib ADDRriS11_2:$addr))))>;
+
+// Map for truncating from 64 immediates to 32 bit immediates.
+def : Pat<(i32 (trunc DoubleRegs:$src)),
+      (i32 (EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))>;
+
+// Map for truncating from i64 immediates to i1 bit immediates.
+def :  Pat<(i1 (trunc DoubleRegs:$src)),
+       (i1 (TFR_PdRs (i32(EXTRACT_SUBREG DoubleRegs:$src, subreg_loreg))))>;
+
+// Map memb(Rs) = Rdd -> memb(Rs) = Rt.
+def : Pat<(truncstorei8 DoubleRegs:$src, ADDRriS11_0:$addr),
+      (STrib ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+                                                     subreg_loreg)))>;
+
+// Map memh(Rs) = Rdd -> memh(Rs) = Rt.
+def : Pat<(truncstorei16 DoubleRegs:$src, ADDRriS11_0:$addr),
+      (STrih ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+                                                     subreg_loreg)))>;
+
+// Map memw(Rs) = Rdd -> memw(Rs) = Rt.
+def : Pat<(truncstorei32 DoubleRegs:$src, ADDRriS11_0:$addr),
+      (STriw ADDRriS11_0:$addr, (i32 (EXTRACT_SUBREG DoubleRegs:$src,
+                                                     subreg_loreg)))>;
+
+// Map from i1 = constant<-1>; memw(addr) = i1 -> r0 = 1; memw(addr) = r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+      (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+let AddedComplexity = 100 in
+// Map from i1 = constant<-1>; memw(CONST32(#foo)) = i1 -> r0 = 1;
+// memw(#foo) = r0
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+      (STb_GP tglobaladdr:$global, (TFRI 1))>;
+
+
+// Map from i1 = constant<-1>; store i1 -> r0 = 1; store r0.
+def : Pat<(store (i1 -1), ADDRriS11_2:$addr),
+      (STrib ADDRriS11_2:$addr, (TFRI 1))>;
+
+// Map from memb(Rs) = Pd -> Rt = mux(Pd, #0, #1); store Rt.
+def : Pat<(store PredRegs:$src1, ADDRriS11_2:$addr),
+      (STrib ADDRriS11_2:$addr, (i32 (MUX_ii PredRegs:$src1, 1, 0)) )>;
+
+// Map Rdd = anyext(Rs) -> Rdd = sxtw(Rs).
+// Hexagon_TODO: We can probably use combine but that will cost 2 instructions.
+// Better way to do this?
+def : Pat<(i64 (anyext IntRegs:$src1)),
+      (i64 (SXTW IntRegs:$src1))>;
+
+// Map cmple -> cmpgt.
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle IntRegs:$src1, s10ImmPred:$src2)),
+      (i1 (NOT_p (CMPGTri IntRegs:$src1, s10ImmPred:$src2)))>;
+
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle IntRegs:$src1, IntRegs:$src2)),
+      (i1 (NOT_p (CMPGTrr IntRegs:$src1, IntRegs:$src2)))>;
+
+// Rss <= Rtt -> !(Rss > Rtt).
+def : Pat<(i1 (setle DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (NOT_p (CMPGT64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+
+// Map cmpne -> cmpeq.
+// Hexagon_TODO: We should improve on this.
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne IntRegs:$src1, s10ImmPred:$src2)),
+      (i1 (NOT_p(i1 (CMPEQri IntRegs:$src1, s10ImmPred:$src2))))>;
+
+// Map cmpne(Rs) -> !cmpeqe(Rs).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne IntRegs:$src1, IntRegs:$src2)),
+      (i1 (NOT_p(i1 (CMPEQrr IntRegs:$src1, IntRegs:$src2))))>;
+
+// Convert setne back to xor for hexagon since we compute w/ pred registers.
+def : Pat <(i1 (setne PredRegs:$src1, PredRegs:$src2)),
+      (i1 (XOR_pp PredRegs:$src1, PredRegs:$src2))>;
+
+// Map cmpne(Rss) -> !cmpew(Rss).
+// rs != rt -> !(rs == rt).
+def : Pat <(i1 (setne DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (NOT_p(i1 (CMPEHexagon4rr DoubleRegs:$src1, DoubleRegs:$src2))))>;
+
+// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setge IntRegs:$src1, IntRegs:$src2)),
+      (i1 (NOT_p(i1 (CMPGTrr IntRegs:$src2, IntRegs:$src1))))>;
+
+def : Pat <(i1 (setge IntRegs:$src1, s8ImmPred:$src2)),
+      (i1 (CMPGEri IntRegs:$src1, s8ImmPred:$src2))>;
+
+// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
+// rss >= rtt -> !(rtt > rss).
+def : Pat <(i1 (setge DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (NOT_p(i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))))>;
+
+// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// rs < rt -> !(rs >= rt).
+def : Pat <(i1 (setlt IntRegs:$src1, s8ImmPred:$src2)),
+      (i1 (NOT_p (CMPGEri IntRegs:$src1, s8ImmPred:$src2)))>;
+
+// Map cmplt(Rs, Rt) -> cmplt(Rs, Rt).
+// rs < rt -> rs < rt. Let assembler map it.
+def : Pat <(i1 (setlt IntRegs:$src1, IntRegs:$src2)),
+      (i1 (CMPLTrr IntRegs:$src2, IntRegs:$src1))>;
+
+// Map cmplt(Rss, Rtt) -> cmpgt(Rtt, Rss).
+// rss < rtt -> (rtt > rss).
+def : Pat <(i1 (setlt DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (CMPGT64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from cmpltu(Rs, Rd) -> !cmpgtu(Rs, Rd - 1).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult IntRegs:$src1, IntRegs:$src2)),
+      (i1 (CMPGTUrr IntRegs:$src2, IntRegs:$src1))>;
+
+// Map from cmpltu(Rss, Rdd) -> !cmpgtu(Rss, Rdd - 1).
+// rs < rt -> rt > rs.
+def : Pat <(i1 (setult DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge IntRegs:$src1, IntRegs:$src2)),
+      (i1 (NOT_p (CMPGTUrr IntRegs:$src2, IntRegs:$src1)))>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setuge DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (NOT_p (CMPGTU64rr DoubleRegs:$src2, DoubleRegs:$src1)))>;
+
+// Map from cmpleu(Rs, Rs) -> !cmpgtu(Rs, Rs).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule IntRegs:$src1, IntRegs:$src2)),
+      (i1 (NOT_p (CMPGTUrr IntRegs:$src1, IntRegs:$src2)))>;
+
+// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def : Pat <(i1 (setule DoubleRegs:$src1, DoubleRegs:$src2)),
+      (i1 (NOT_p (CMPGTU64rr DoubleRegs:$src1, DoubleRegs:$src2)))>;
+
+// Sign extends.
+// i1 -> i32
+def : Pat <(i32 (sext PredRegs:$src1)),
+      (i32 (MUX_ii PredRegs:$src1, -1, 0))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i8 -> i64
+def:  Pat <(i64 (sextloadi8 ADDRriS11_0:$src1)),
+      (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert any-extended load back to load and sign extend.
+// i8 -> i64
+def:  Pat <(i64 (extloadi8 ADDRriS11_0:$src1)),
+      (i64 (SXTW (LDrib ADDRriS11_0:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i16 -> i64
+def:  Pat <(i64 (sextloadi16 ADDRriS11_1:$src1)),
+      (i64 (SXTW (LDrih ADDRriS11_1:$src1)))>;
+
+// Convert sign-extended load back to load and sign extend.
+// i32 -> i64
+def:  Pat <(i64 (sextloadi32 ADDRriS11_2:$src1)),
+      (i64 (SXTW (LDriw ADDRriS11_2:$src1)))>;
+
+
+// Zero extends.
+// i1 -> i32
+def : Pat <(i32 (zext PredRegs:$src1)),
+      (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// i1 -> i64
+def : Pat <(i64 (zext PredRegs:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (MUX_ii PredRegs:$src1, 1, 0)))>;
+
+// i32 -> i64
+def : Pat <(i64 (zext IntRegs:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+
+// i8 -> i64
+def:  Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>;
+
+// i16 -> i64
+def:  Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>;
+
+// i32 -> i64
+def:  Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+
+def:  Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
+      (i32 (LDriw ADDRriS11_0:$src1))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (zext PredRegs:$src1)),
+      (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def : Pat <(i32 (anyext PredRegs:$src1)),
+      (i32 (MUX_ii PredRegs:$src1, 1, 0))>;
+
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def : Pat <(i64 (anyext PredRegs:$src1)),
+      (i64 (SXTW (i32 (MUX_ii PredRegs:$src1, 1, 0))))>;
+
+
+// Any extended 64-bit load.
+// anyext i32 -> i64
+def:  Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
+
+// anyext i16 -> i64.
+def:  Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
+
+// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
+def : Pat<(i64 (zext IntRegs:$src1)),
+      (i64 (COMBINE_rr (TFRI 0), IntRegs:$src1))>;
+
+// Multiply 64-bit unsigned and use upper result.
+def : Pat <(mulhu DoubleRegs:$src1, DoubleRegs:$src2),
+      (MPYU64_acc(COMBINE_rr (TFRI 0),
+                 (EXTRACT_SUBREG
+                 (LSRd_ri(MPYU64_acc(MPYU64_acc(COMBINE_rr (TFRI 0),
+                                 (EXTRACT_SUBREG (LSRd_ri(MPYU64
+                                 (EXTRACT_SUBREG DoubleRegs:$src1,
+                                                 subreg_loreg),
+                                 (EXTRACT_SUBREG DoubleRegs:$src2,
+                                                 subreg_loreg)),
+                                  32) ,subreg_loreg)),
+                                 (EXTRACT_SUBREG DoubleRegs:$src1,
+                                                 subreg_hireg),
+                                 (EXTRACT_SUBREG DoubleRegs:$src2,
+                                                 subreg_loreg)),
+                              (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+                              (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+                          32),subreg_loreg)),
+                 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
+                 (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
+      )>;
+
+// Multiply 64-bit signed and use upper result.
+def : Pat <(mulhs DoubleRegs:$src1, DoubleRegs:$src2),
+      (MPY64_acc(COMBINE_rr (TFRI 0),
+                 (EXTRACT_SUBREG
+                 (LSRd_ri(MPY64_acc(MPY64_acc(COMBINE_rr (TFRI 0),
+                                 (EXTRACT_SUBREG (LSRd_ri(MPYU64
+                                 (EXTRACT_SUBREG DoubleRegs:$src1,
+                                                 subreg_loreg),
+                                 (EXTRACT_SUBREG DoubleRegs:$src2,
+                                                 subreg_loreg)),
+                                  32) ,subreg_loreg)),
+                                 (EXTRACT_SUBREG DoubleRegs:$src1,
+                                                 subreg_hireg),
+                                 (EXTRACT_SUBREG DoubleRegs:$src2,
+                                                 subreg_loreg)),
+                              (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+                              (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+                          32),subreg_loreg)),
+                 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg),
+                 (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)
+      )>;
+
+// Hexagon specific ISD nodes.
+def SDTHexagonADJDYNALLOC : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>]>;
+def Hexagon_ADJDYNALLOC : SDNode<"HexagonISD::ADJDYNALLOC",
+                                 SDTHexagonADJDYNALLOC>;
+// Needed to tag these instructions for stack layout.
+let usesCustomInserter = 1 in
+def ADJDYNALLOC : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1,
+                                                     s16Imm:$src2),
+                  "$dst = add($src1, #$src2)",
+                  [(set IntRegs:$dst, (Hexagon_ADJDYNALLOC IntRegs:$src1,
+                                                           s16ImmPred:$src2))]>;
+
+def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, []>;
+def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
+def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
+                "$dst = $src1",
+                [(set IntRegs:$dst, (Hexagon_ARGEXTEND IntRegs:$src1))]>;
+
+let AddedComplexity = 100 in
+def : Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND IntRegs:$src1), i16)),
+      (TFR IntRegs:$src1)>;
+
+
+def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : JRInst<(outs), (ins IntRegs:$src),
+                   "jumpr $src",
+                   [(HexagonBR_JT IntRegs:$src)]>;
+def HexagonWrapperJT: SDNode<"HexagonISD::WrapperJT", SDTIntUnaryOp>;
+
+def : Pat<(HexagonWrapperJT tjumptable:$dst),
+          (CONST32_set_jt tjumptable:$dst)>;
+
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV3.td"
+
+//===----------------------------------------------------------------------===//
+// V3 Instructions -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Instructions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV3.td b/lib/Target/Hexagon/HexagonInstrInfoV3.td
new file mode 100644
index 000000000000..2bd6770efd7d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -0,0 +1,137 @@
+//=- HexagonInstrInfoV3.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// J +
+//===----------------------------------------------------------------------===//
+// Call subroutine.
+let isCall = 1, neverHasSideEffects = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+                P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def CALLv3 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "call $dst", []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// J -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// JR +
+//===----------------------------------------------------------------------===//
+// Call subroutine from register.
+let isCall = 1, neverHasSideEffects = 1,
+  Defs = [D0, D1, D2, D3, D4, D5, D6, D7, R28, R31,
+                P0, P1, P2, P3, LC0, LC1, SA0, SA1] in {
+  def CALLRv3 : JRInst<(outs), (ins IntRegs:$dst, variable_ops),
+              "callr $dst",
+              []>, Requires<[HasV3TOnly]>;
+ }
+
+
+// Jump to address from register
+// if(p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cdnPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+                       "if ($src1.new) jumpr:t $src2",
+                       []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:t r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cdnNotPt_V3: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+                       "if (!$src1.new) jumpr:t $src2",
+                       []>, Requires<[HasV3T]>;
+}
+
+// Not taken.
+// if(p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cdnPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+                       "if ($src1.new) jumpr:nt $src2",
+                       []>, Requires<[HasV3T]>;
+}
+
+// if (!p?.new) jumpr:nt r?
+let isReturn = 1, isTerminator = 1, isBarrier = 1,
+  Defs = [PC], Uses = [R31] in {
+  def JMPR_cdnNotPnt: JRInst<(outs), (ins PredRegs:$src1, IntRegs:$src2),
+                       "if (!$src1.new) jumpr:nt $src2",
+                       []>, Requires<[HasV3T]>;
+}
+
+//===----------------------------------------------------------------------===//
+// JR -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU +
+//===----------------------------------------------------------------------===//
+
+let AddedComplexity = 200 in
+def MAXw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                    DoubleRegs:$src2),
+              "$dst = max($src2, $src1)",
+              [(set (i64 DoubleRegs:$dst),
+                    (i64 (select (i1 (setlt (i64 DoubleRegs:$src2),
+                                            (i64 DoubleRegs:$src1))),
+                                 (i64 DoubleRegs:$src1),
+                                 (i64 DoubleRegs:$src2))))]>,
+Requires<[HasV3T]>;
+
+let AddedComplexity = 200 in
+def MINw_dd : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
+                                                    DoubleRegs:$src2),
+              "$dst = min($src2, $src1)",
+              [(set (i64 DoubleRegs:$dst),
+                    (i64 (select (i1 (setgt (i64 DoubleRegs:$src2),
+                                            (i64 DoubleRegs:$src1))),
+                                 (i64 DoubleRegs:$src1),
+                                 (i64 DoubleRegs:$src2))))]>,
+Requires<[HasV3T]>;
+
+//===----------------------------------------------------------------------===//
+// ALU64/ALU -
+//===----------------------------------------------------------------------===//
+
+
+
+
+//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset),
+//      (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset),
+//      (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset),
+//      (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset),
+//      (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset),
+//      (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>, Requires<[HasV3T]>;
+
+
+// Map call instruction
+def : Pat<(call (i32 IntRegs:$dst)),
+      (CALLRv3 (i32 IntRegs:$dst))>, Requires<[HasV3T]>;
+def : Pat<(call tglobaladdr:$dst),
+      (CALLv3 tglobaladdr:$dst)>, Requires<[HasV3T]>;
+def : Pat<(call texternalsym:$dst),
+      (CALLv3 texternalsym:$dst)>, Requires<[HasV3T]>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
new file mode 100644
index 000000000000..f507e4f37c18
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -0,0 +1,5746 @@
+//=- HexagonInstrInfoV4.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V4 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def IMMEXT : Immext<(outs), (ins),
+                    "##immext //should never emit this",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// Hexagon V4 Architecture spec defines 8 instruction classes:
+// LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
+// compiler)
+
+// LD Instructions:
+// ========================================
+// Loads (8/16/32/64 bit)
+// Deallocframe
+
+// ST Instructions:
+// ========================================
+// Stores (8/16/32/64 bit)
+// Allocframe
+
+// ALU32 Instructions:
+// ========================================
+// Arithmetic / Logical (32 bit)
+// Vector Halfword
+
+// XTYPE Instructions (32/64 bit):
+// ========================================
+// Arithmetic, Logical, Bit Manipulation
+// Multiply (Integer, Fractional, Complex)
+// Permute / Vector Permute Operations
+// Predicate Operations
+// Shift / Shift with Add/Sub/Logical
+// Vector Byte ALU
+// Vector Halfword (ALU, Shift, Multiply)
+// Vector Word (ALU, Shift)
+
+// J Instructions:
+// ========================================
+// Jump/Call PC-relative
+
+// JR Instructions:
+// ========================================
+// Jump/Call Register
+
+// MEMOP Instructions:
+// ========================================
+// Operation on memory (8/16/32 bit)
+
+// NV Instructions:
+// ========================================
+// New-value Jumps
+// New-value Stores
+
+// CR Instructions:
+// ========================================
+// Control-Register Transfers
+// Hardware Loop Setup
+// Predicate Logicals & Reductions
+
+// SYSTEM Instructions (not implemented in the compiler):
+// ========================================
+// Prefetch
+// Cache Maintenance
+// Bus Operations
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 +
+//===----------------------------------------------------------------------===//
+
+// Shift halfword.
+
+let isPredicated = 1 in
+def ASLH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = aslh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASLH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = aslh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASLH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = aslh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASLH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = aslh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASRH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = asrh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASRH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = asrh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASRH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = asrh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def ASRH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = asrh($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Sign extend.
+
+let isPredicated = 1 in
+def SXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = sxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = sxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = sxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = sxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+let isPredicated = 1 in
+def SXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = sxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = sxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = sxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicated = 1 in
+def SXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = sxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Zero exten.
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTB_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = zxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTB_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = zxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTB_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = zxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTB_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = zxtb($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTH_cPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1) $dst = zxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTH_cNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1) $dst = zxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTH_cdnPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if ($src1.new) $dst = zxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+let neverHasSideEffects = 1, isPredicated = 1 in
+def ZXTH_cdnNotPt_V4 : ALU32_rr<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2),
+            "if (!$src1.new) $dst = zxth($src2)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Generate frame index addresses.
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, s32Imm:$offset),
+            "$dst = add($src1, ##$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// ALU32 -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+// Combine
+// Rdd=combine(Rs, #s8)
+let neverHasSideEffects = 1 in
+def COMBINE_ri_V4 : ALU32_ri<(outs DoubleRegs:$dst),
+            (ins IntRegs:$src1, s8Imm:$src2),
+            "$dst = combine($src1, #$src2)",
+            []>,
+            Requires<[HasV4T]>;
+// Rdd=combine(#s8, Rs)
+let neverHasSideEffects = 1 in
+def COMBINE_ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
+            (ins s8Imm:$src1, IntRegs:$src2),
+            "$dst = combine(#$src1, $src2)",
+            []>,
+            Requires<[HasV4T]>;
+//===----------------------------------------------------------------------===//
+// ALU32/PERM +
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LD +
+//===----------------------------------------------------------------------===//
+//
+// These absolute set addressing mode instructions accept immediate as
+// an operand. We have duplicated these patterns to take global address.
+
+let neverHasSideEffects = 1 in
+def LDrid_abs_setimm_V4 : LDInst<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+            (ins u6Imm:$addr),
+            "$dst1 = memd($dst2=#$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memb(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDrib_abs_setimm_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins u6Imm:$addr),
+            "$dst1 = memb($dst2=#$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memh(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDrih_abs_setimm_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins u6Imm:$addr),
+            "$dst1 = memh($dst2=#$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memub(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriub_abs_setimm_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins u6Imm:$addr),
+            "$dst1 = memub($dst2=#$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memuh(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriuh_abs_setimm_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins u6Imm:$addr),
+            "$dst1 = memuh($dst2=#$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memw(Re=#U6)
+let neverHasSideEffects = 1 in
+def LDriw_abs_setimm_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins u6Imm:$addr),
+            "$dst1 = memw($dst2=#$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Following patterns are defined for absolute set addressing mode
+// instruction which take global address as operand.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_abs_set_V4 : LDInst<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+            (ins globaladdress:$addr),
+            "$dst1 = memd($dst2=##$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memb(Re=#U6)
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_abs_set_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins globaladdress:$addr),
+            "$dst1 = memb($dst2=##$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memh(Re=#U6)
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_abs_set_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins globaladdress:$addr),
+            "$dst1 = memh($dst2=##$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memub(Re=#U6)
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_abs_set_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins globaladdress:$addr),
+            "$dst1 = memub($dst2=##$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memuh(Re=#U6)
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_abs_set_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins globaladdress:$addr),
+            "$dst1 = memuh($dst2=##$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memw(Re=#U6)
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_abs_set_V4 : LDInst<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins globaladdress:$addr),
+            "$dst1 = memw($dst2=##$addr)",
+            []>,
+            Requires<[HasV4T]>;
+
+// Load doubleword.
+//
+// Make sure that in post increment load, the first operand is always the post
+// increment operand.
+//
+// Rdd=memd(Rs+Rt<<#u2)
+// Special case pattern for indexed load without offset which is easier to
+// match. AddedComplexity of this pattern should be lower than base+offset load
+// and lower yet than the more generic version with offset/shift below
+// Similar approach is taken for all other base+index loads.
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memd($src1+$src2<<#0)",
+                    [(set (i64 DoubleRegs:$dst),
+                          (i64 (load (add (i32 IntRegs:$src1),
+                                          (i32 IntRegs:$src2)))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memd($src1+$src2<<#$offset)",
+                    [(set (i64 DoubleRegs:$dst),
+                          (i64 (load (add (i32 IntRegs:$src1),
+                                          (shl (i32 IntRegs:$src2),
+                                               u2ImmPred:$offset)))))]>,
+                    Requires<[HasV4T]>;
+
+//// Load doubleword conditionally.
+// if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2)
+// if (Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memd($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memd($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memd($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrid_indexed_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memd($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memd($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memd($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memd($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrid_indexed_shl_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// Rdd=memd(Rt<<#u2+#U6)
+
+//// Load byte.
+// Rd=memb(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrib_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memb($src1+$src2<<#0)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (sextloadi8 (add (i32 IntRegs:$src1),
+                                                (i32 IntRegs:$src2)))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriub_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memub($src1+$src2<<#0)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (zextloadi8 (add (i32 IntRegs:$src1),
+                                                (i32 IntRegs:$src2)))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memub($src1+$src2<<#0)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (extloadi8 (add (i32 IntRegs:$src1),
+                                               (i32 IntRegs:$src2)))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memb($src1+$src2<<#$offset)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (sextloadi8 (add (i32 IntRegs:$src1),
+                                                (shl (i32 IntRegs:$src2),
+                                                     u2ImmPred:$offset)))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memub($src1+$src2<<#$offset)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (zextloadi8 (add (i32 IntRegs:$src1),
+                                                (shl (i32 IntRegs:$src2),
+                                                     u2ImmPred:$offset)))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memub($src1+$src2<<#$offset)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (extloadi8 (add (i32 IntRegs:$src1),
+                                               (shl (i32 IntRegs:$src2),
+                                                    u2ImmPred:$offset)))))]>,
+                    Requires<[HasV4T]>;
+
+//// Load byte conditionally.
+// if ([!]Pv[.new]) Rd=memb(Rs+Rt<<#u2)
+// if (Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memb($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memb($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memb($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrib_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memb($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memb($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memb($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memb($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrib_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+//// Load unsigned byte conditionally.
+// if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2)
+// if (Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memub($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memub($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memub($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriub_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memub($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memub($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memub($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memub($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriub_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// Rd=memb(Rt<<#u2+#U6)
+
+//// Load halfword
+// Rd=memh(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDrih_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memh($src1+$src2<<#0)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (sextloadi16 (add (i32 IntRegs:$src1),
+                                                 (i32 IntRegs:$src2)))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriuh_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memuh($src1+$src2<<#0)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (zextloadi16 (add (i32 IntRegs:$src1),
+                                                 (i32 IntRegs:$src2)))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memuh($src1+$src2<<#0)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (extloadi16 (add (i32 IntRegs:$src1),
+                                                (i32 IntRegs:$src2)))))]>,
+                    Requires<[HasV4T]>;
+
+// Rd=memh(Rs+Rt<<#u2)
+let AddedComplexity = 40, isPredicable = 1 in
+def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memh($src1+$src2<<#$offset)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (sextloadi16 (add (i32 IntRegs:$src1),
+                                                 (shl (i32 IntRegs:$src2),
+                                                      u2ImmPred:$offset)))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memuh($src1+$src2<<#$offset)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (zextloadi16 (add (i32 IntRegs:$src1),
+                                                 (shl (i32 IntRegs:$src2),
+                                                      u2ImmPred:$offset)))))]>,
+                    Requires<[HasV4T]>;
+
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memuh($src1+$src2<<#$offset)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (extloadi16 (add (i32 IntRegs:$src1),
+                                                (shl (i32 IntRegs:$src2),
+                                                     u2ImmPred:$offset)))))]>,
+                    Requires<[HasV4T]>;
+
+//// Load halfword conditionally.
+// if ([!]Pv[.new]) Rd=memh(Rs+Rt<<#u2)
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDrih_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDrih_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+//// Load unsigned halfword conditionally.
+// if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2)
+// if (Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memuh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memuh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memuh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriuh_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memuh($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memuh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memuh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// Rd=memh(Rt<<#u2+#U6)
+
+//// Load word.
+// Load predicate: Fix for bug 5279.
+let mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_pred_V4 : LDInst<(outs PredRegs:$dst),
+            (ins MEMri:$addr),
+            "Error; should not emit",
+            []>,
+            Requires<[HasV4T]>;
+
+// Rd=memw(Re=#U6)
+
+// Rd=memw(Rs+Rt<<#u2)
+let AddedComplexity = 10, isPredicable = 1 in
+def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2),
+                    "$dst=memw($src1+$src2<<#0)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (load (add (i32 IntRegs:$src1),
+                                          (i32 IntRegs:$src2)))))]>,
+                    Requires<[HasV4T]>;
+
+// Rd=memw(Rs+Rt<<#u2)
+let AddedComplexity = 40, isPredicable = 1 in
+def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+                    "$dst=memw($src1+$src2<<#$offset)",
+                    [(set (i32 IntRegs:$dst),
+                          (i32 (load (add (i32 IntRegs:$src1),
+                                          (shl (i32 IntRegs:$src2),
+                                               u2ImmPred:$offset)))))]>,
+                    Requires<[HasV4T]>;
+
+//// Load word conditionally.
+// if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2)
+// if (Pv) Rd=memw(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1) $dst=memw($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if ($src1.new) $dst=memw($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1) $dst=memw($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 15, isPredicated = 1 in
+def LDriw_indexed_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+                    "if (!$src1.new) $dst=memw($src2+$src3<<#0)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1) $dst=memw($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if ($src1.new) $dst=memw($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1) $dst=memw($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
+let mayLoad = 1, AddedComplexity = 45, isPredicated = 1 in
+def LDriw_indexed_shl_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
+                         u2Imm:$offset),
+                    "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)",
+                    []>,
+                    Requires<[HasV4T]>;
+
+// Rd=memw(Rt<<#u2+#U6)
+
+
+// Post-inc Load, Predicated, Dot new
+
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrid_cdnPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+            "if ($src1.new) $dst1 = memd($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrid_cdnNotPt_V4 : LDInstPI<(outs DoubleRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_3Imm:$src3),
+            "if (!$src1.new) $dst1 = memd($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrib_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if ($src1.new) $dst1 = memb($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrib_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if (!$src1.new) $dst1 = memb($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrih_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if ($src1.new) $dst1 = memh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDrih_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if (!$src1.new) $dst1 = memh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriub_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if ($src1.new) $dst1 = memub($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriub_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_0Imm:$src3),
+            "if (!$src1.new) $dst1 = memub($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriuh_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if ($src1.new) $dst1 = memuh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriuh_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_1Imm:$src3),
+            "if (!$src1.new) $dst1 = memuh($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriw_cdnPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+            "if ($src1.new) $dst1 = memw($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, hasCtrlDep = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def POST_LDriw_cdnNotPt_V4 : LDInstPI<(outs IntRegs:$dst1, IntRegs:$dst2),
+            (ins PredRegs:$src1, IntRegs:$src2, s4_2Imm:$src3),
+            "if (!$src1.new) $dst1 = memw($src2++#$src3)",
+            [],
+            "$src2 = $dst2">,
+            Requires<[HasV4T]>;
+
+/// Load from global offset
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDrid_GP_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memd(#$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1) $dst=memd(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1) $dst=memd(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1.new) $dst=memd(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrid_GP_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1.new) $dst=memd(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDrib_GP_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memb(#$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1) $dst=memb(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1) $dst=memb(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1.new) $dst=memb(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrib_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1.new) $dst=memb(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDriub_GP_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memub(#$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1) $dst=memub(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1) $dst=memub(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1.new) $dst=memub(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriub_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1.new) $dst=memub(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDrih_GP_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memh(#$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1) $dst=memh(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1) $dst=memh(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1.new) $dst=memh(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDrih_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1.new) $dst=memh(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDriuh_GP_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memuh(#$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1) $dst=memuh(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1) $dst=memuh(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1.new) $dst=memuh(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriuh_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1.new) $dst=memuh(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDriw_GP_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global, u16Imm:$offset),
+            "$dst=memw(#$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1) $dst=memw(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1) $dst=memw(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if ($src1.new) $dst=memw(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDriw_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset),
+            "if (!$src1.new) $dst=memw(##$global+$offset)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDd_GP_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memd(#$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rtt=memd(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1) $dst=memd(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// if (!Pv) Rtt=memd(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1) $dst=memd(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rtt=memd(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1.new) $dst=memd(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// if (!Pv) Rtt=memd(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDd_GP_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1.new) $dst=memd(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDb_GP_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memb(#$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rt=memb(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1) $dst=memb(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memb(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1) $dst=memb(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rt=memb(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1.new) $dst=memb(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memb(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDb_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1.new) $dst=memb(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDub_GP_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memub(#$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rt=memub(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1) $dst=memub(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memub(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1) $dst=memub(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rt=memub(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1.new) $dst=memub(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memub(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDub_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1.new) $dst=memub(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDh_GP_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memh(#$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rt=memh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1) $dst=memh(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1) $dst=memh(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rt=memh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1.new) $dst=memh(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDh_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1.new) $dst=memh(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDuh_GP_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memuh(#$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rt=memuh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1) $dst=memuh(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memuh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1) $dst=memuh(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rt=memuh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1.new) $dst=memuh(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) Rt=memuh(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDuh_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1.new) $dst=memuh(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicable = 1, mayLoad = 1, neverHasSideEffects = 1 in
+def LDw_GP_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$global),
+            "$dst=memw(#$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rt=memw(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1) $dst=memw(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memw(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1) $dst=memw(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) Rt=memw(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if ($src1.new) $dst=memw(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// if (!Pv) Rt=memw(##global)
+let mayLoad = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def LDw_GP_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$global),
+            "if (!$src1.new) $dst=memw(##$global)",
+            []>,
+            Requires<[HasV4T]>;
+
+
+
+def : Pat <(atomic_load_64 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i64 (LDd_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_32 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i32 (LDw_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_16 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_8 (HexagonCONST32_GP tglobaladdr:$global)),
+           (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memw(#foo + 0)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (HexagonCONST32_GP tglobaladdr:$global))),
+           (i64 (LDd_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
+let AddedComplexity = 100 in
+def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
+           (i1 (TFR_PdRs (i32 (LDb_GP_V4 tglobaladdr:$global))))>,
+           Requires<[HasV4T]>;
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDb_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi1 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memub(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDub_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDh_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDh_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memuh(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDuh_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (HexagonCONST32_GP tglobaladdr:$global))),
+           (i32 (LDw_GP_V4 tglobaladdr:$global))>,
+            Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_64 (add (HexagonCONST32_GP tglobaladdr:$global),
+                                u16ImmPred:$offset)),
+           (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+           Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_32 (add (HexagonCONST32_GP tglobaladdr:$global),
+                                u16ImmPred:$offset)),
+           (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+            Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                                u16ImmPred:$offset)),
+           (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+            Requires<[HasV4T]>;
+
+def : Pat <(atomic_load_8 (add (HexagonCONST32_GP tglobaladdr:$global),
+                               u16ImmPred:$offset)),
+           (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i64 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+                           u16ImmPred:$offset))),
+           (i64 (LDrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+                           u16ImmPred:$offset))),
+           (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+                            u16ImmPred:$offset))),
+           (i32 (LDrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memub(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi8 (add (HexagonCONST32_GP tglobaladdr:$global),
+                            u16ImmPred:$offset))),
+           (i32 (LDriub_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+           Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memuh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (extloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                            u16ImmPred:$offset))),
+           (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (sextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                             u16ImmPred:$offset))),
+           (i32 (LDrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+           Requires<[HasV4T]>;
+
+
+// Map from load(globaladdress + x) -> memuh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (zextloadi16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                             u16ImmPred:$offset))),
+           (i32 (LDriuh_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+            Requires<[HasV4T]>;
+
+// Map from load(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
+                      u16ImmPred:$offset))),
+           (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
+            Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// LD -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ST +
+//===----------------------------------------------------------------------===//
+///
+/// Assumptions::: ****** DO NOT IGNORE ********
+/// 1. Make sure that in post increment store, the zero'th operand is always the
+///    post increment operand.
+/// 2. Make sure that the store value operand(Rt/Rtt) in a store is always the
+///    last operand.
+///
+
+// memd(Re=#U6)=Rtt
+def STrid_abs_setimm_V4 : STInst<(outs IntRegs:$dst1),
+            (ins DoubleRegs:$src1, u6Imm:$src2),
+            "memd($dst1=#$src2) = $src1",
+            []>,
+            Requires<[HasV4T]>;
+
+// memb(Re=#U6)=Rs
+def STrib_abs_setimm_V4 : STInst<(outs IntRegs:$dst1),
+            (ins IntRegs:$src1, u6Imm:$src2),
+            "memb($dst1=#$src2) = $src1",
+            []>,
+            Requires<[HasV4T]>;
+
+// memh(Re=#U6)=Rs
+def STrih_abs_setimm_V4 : STInst<(outs IntRegs:$dst1),
+            (ins IntRegs:$src1, u6Imm:$src2),
+            "memh($dst1=#$src2) = $src1",
+            []>,
+            Requires<[HasV4T]>;
+
+// memw(Re=#U6)=Rs
+def STriw_abs_setimm_V4 : STInst<(outs IntRegs:$dst1),
+            (ins IntRegs:$src1, u6Imm:$src2),
+            "memw($dst1=#$src2) = $src1",
+            []>,
+            Requires<[HasV4T]>;
+
+// memd(Re=#U6)=Rtt
+def STrid_abs_set_V4 : STInst<(outs IntRegs:$dst1),
+            (ins DoubleRegs:$src1, globaladdress:$src2),
+            "memd($dst1=##$src2) = $src1",
+            []>,
+            Requires<[HasV4T]>;
+
+// memb(Re=#U6)=Rs
+def STrib_abs_set_V4 : STInst<(outs IntRegs:$dst1),
+            (ins IntRegs:$src1, globaladdress:$src2),
+            "memb($dst1=##$src2) = $src1",
+            []>,
+            Requires<[HasV4T]>;
+
+// memh(Re=#U6)=Rs
+def STrih_abs_set_V4 : STInst<(outs IntRegs:$dst1),
+            (ins IntRegs:$src1, globaladdress:$src2),
+            "memh($dst1=##$src2) = $src1",
+            []>,
+            Requires<[HasV4T]>;
+
+// memw(Re=#U6)=Rs
+def STriw_abs_set_V4 : STInst<(outs IntRegs:$dst1),
+            (ins IntRegs:$src1, globaladdress:$src2),
+            "memw($dst1=##$src2) = $src1",
+            []>,
+            Requires<[HasV4T]>;
+
+// memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrid_indexed_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4),
+            "memd($src1+$src2<<#$src3) = $src4",
+            [(store (i64 DoubleRegs:$src4),
+                    (add (i32 IntRegs:$src1),
+                         (shl (i32 IntRegs:$src2), u2ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memd(Ru<<#u2+#U6)=Rtt
+let AddedComplexity = 10 in
+def STrid_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, DoubleRegs:$src4),
+            "memd($src1<<#$src2+#$src3) = $src4",
+            [(store (i64 DoubleRegs:$src4),
+                    (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+                         u6ImmPred:$src3))]>,
+            Requires<[HasV4T]>;
+
+// memd(Rx++#s4:3)=Rtt
+// memd(Rx++#s4:3:circ(Mu))=Rtt
+// memd(Rx++I:circ(Mu))=Rtt
+// memd(Rx++Mu)=Rtt
+// memd(Rx++Mu:brev)=Rtt
+// memd(gp+#u16:3)=Rtt
+
+// Store doubleword conditionally.
+// if ([!]Pv[.new]) memd(#u6)=Rtt
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt
+// if (Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrid_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+            "if ($src1.new) memd($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+// if (!Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrid_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
+            "if (!$src1.new) memd($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memd(Rs+#u6:3)=Rtt
+// if (Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrid_indexed_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+                 DoubleRegs:$src4),
+            "if ($src1.new) memd($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rs+#u6:3)=Rtt
+// if (!Pv.new) memd(Rs+#u6:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrid_indexed_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
+                 DoubleRegs:$src4),
+            "if (!$src1.new) memd($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt
+// if (Pv) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrid_indexed_shl_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 DoubleRegs:$src5),
+            "if ($src1) memd($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrid_indexed_shl_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 DoubleRegs:$src5),
+            "if ($src1.new) memd($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+// if (!Pv) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrid_indexed_shl_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 DoubleRegs:$src5),
+            "if (!$src1) memd($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrid_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 DoubleRegs:$src5),
+            "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt
+// if (Pv) memd(Rx++#s4:3)=Rtt
+// if (Pv.new) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def POST_STdri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+                 s4_3Imm:$offset),
+            "if ($src1.new) memd($src3++#$offset) = $src2",
+            [],
+            "$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memd(Rx++#s4:3)=Rtt
+// if (!Pv.new) memd(Rx++#s4:3)=Rtt
+let AddedComplexity = 10, mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def POST_STdri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, DoubleRegs:$src2, IntRegs:$src3,
+                 s4_3Imm:$offset),
+            "if (!$src1.new) memd($src3++#$offset) = $src2",
+            [],
+            "$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Store byte.
+// memb(Rs+#u6:0)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_imm_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3),
+            "memb($src1+#$src2) = #$src3",
+            [(truncstorei8 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
+                                                 u6_0ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// memb(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memb($src1+$src2<<#$src3) = $src4",
+            [(truncstorei8 (i32 IntRegs:$src4),
+                           (add (i32 IntRegs:$src1),
+                                (shl (i32 IntRegs:$src2),
+                                          u2ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STrib_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memb($src1<<#$src2+#$src3) = $src4",
+            [(truncstorei8 (i32 IntRegs:$src4),
+                           (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+                                u6ImmPred:$src3))]>,
+            Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Rt
+// memb(Rx++I:circ(Mu))=Rt
+// memb(Rx++Mu)=Rt
+// memb(Rx++Mu:brev)=Rt
+// memb(gp+#u16:0)=Rt
+
+
+// Store byte conditionally.
+// if ([!]Pv[.new]) memb(#u6)=Rt
+// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6
+// if (Pv) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_imm_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+            "if ($src1) memb($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_imm_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+            "if ($src1.new) memb($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_imm_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+            "if (!$src1) memb($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_imm_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
+            "if (!$src1.new) memb($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt
+// if (Pv) memb(Rs+#u6:0)=Rt
+// if (Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memb($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Rt
+// if (!Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memb($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memb(Rs+#u6:0)=Rt
+// if (!Pv) memb(Rs+#u6:0)=Rt
+// if (Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_indexed_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memb($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_indexed_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memb($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt
+// if (Pv) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrib_indexed_shl_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memb($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrib_indexed_shl_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memb($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrib_indexed_shl_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memb($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrib_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt
+// if (Pv) memb(Rx++#s4:0)=Rt
+// if (Pv.new) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STbri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if ($src1.new) memb($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rx++#s4:0)=Rt
+// if (!Pv.new) memb(Rx++#s4:0)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STbri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if (!$src1.new) memb($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Store halfword.
+// TODO: needs to be implemented
+// memh(Re=#U6)=Rt.H
+// memh(Rs+#s11:1)=Rt.H
+// memh(Rs+#u6:1)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_imm_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3),
+            "memh($src1+#$src2) = #$src3",
+            [(truncstorei16 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
+                                                  u6_1ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Rt.H
+// TODO: needs to be implemented.
+
+// memh(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memh($src1+$src2<<#$src3) = $src4",
+            [(truncstorei16 (i32 IntRegs:$src4),
+                            (add (i32 IntRegs:$src1),
+                                 (shl (i32 IntRegs:$src2),
+                                      u2ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memh(Ru<<#u2+#U6)=Rt.H
+// memh(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STrih_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memh($src1<<#$src2+#$src3) = $src4",
+            [(truncstorei16 (i32 IntRegs:$src4),
+                            (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+                                 u6ImmPred:$src3))]>,
+            Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Rt.H
+// memh(Rx++#s4:1:circ(Mu))=Rt
+// memh(Rx++I:circ(Mu))=Rt.H
+// memh(Rx++I:circ(Mu))=Rt
+// memh(Rx++Mu)=Rt.H
+// memh(Rx++Mu)=Rt
+// memh(Rx++Mu:brev)=Rt.H
+// memh(Rx++Mu:brev)=Rt
+// memh(gp+#u16:1)=Rt
+// if ([!]Pv[.new]) memh(#u6)=Rt.H
+// if ([!]Pv[.new]) memh(#u6)=Rt
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6
+// if (Pv) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_imm_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+            "if ($src1) memh($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_imm_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+            "if ($src1.new) memh($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_imm_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+            "if (!$src1) memh($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_imm_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
+            "if (!$src1.new) memh($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
+// TODO: needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt
+// if (Pv) memh(Rs+#u6:1)=Rt
+// if (Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memh($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Rt
+// if (!Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memh($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_indexed_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memh($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_indexed_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memh($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt
+// if (Pv) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrih_indexed_shl_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memh($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrih_indexed_shl_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memh($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrih_indexed_shl_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memh($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrih_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
+// TODO: Needs to be implemented.
+
+// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt
+// if (Pv) memh(Rx++#s4:1)=Rt
+// if (Pv.new) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_SThri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if ($src1.new) memh($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rx++#s4:1)=Rt
+// if (!Pv.new) memh(Rx++#s4:1)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_SThri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if (!$src1.new) memh($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Store word.
+// memw(Re=#U6)=Rt
+// TODO: Needs to be implemented.
+
+// Store predicate:
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_pred_V4 : STInst<(outs),
+            (ins MEMri:$addr, PredRegs:$src1),
+            "Error; should not emit",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_imm_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3),
+            "memw($src1+#$src2) = #$src3",
+            [(store s8ImmPred:$src3, (add (i32 IntRegs:$src1),
+                                          u6_2ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// memw(Rs+Ru<<#u2)=Rt
+let AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memw($src1+$src2<<#$src3) = $src4",
+            [(store (i32 IntRegs:$src4), (add (i32 IntRegs:$src1),
+                                    (shl (i32 IntRegs:$src2),
+                                         u2ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Rt
+let AddedComplexity = 10 in
+def STriw_shl_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memw($src1<<#$src2+#$src3) = $src4",
+            [(store (i32 IntRegs:$src4),
+                    (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
+                              u6ImmPred:$src3))]>,
+            Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Rt
+// memw(Rx++#s4:2:circ(Mu))=Rt
+// memw(Rx++I:circ(Mu))=Rt
+// memw(Rx++Mu)=Rt
+// memw(Rx++Mu:brev)=Rt
+// memw(gp+#u16:2)=Rt
+
+
+// Store word conditionally.
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6
+// if (Pv) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_imm_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+            "if ($src1) memw($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_imm_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+            "if ($src1.new) memw($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_imm_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+            "if (!$src1) memw($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=#S6
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_imm_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
+            "if (!$src1.new) memw($src2+#$src3) = #$src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt
+// if (Pv) memw(Rs+#u6:2)=Rt
+// if (Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memw($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Rt
+// if (!Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memw($addr) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memw(Rs+#u6:2)=Rt
+// if (!Pv) memw(Rs+#u6:2)=Rt
+// if (Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_indexed_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memw($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Rt
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_indexed_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memw($src2+#$src3) = $src4",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt
+// if (Pv) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STriw_indexed_shl_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memw($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STriw_indexed_shl_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memw($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STriw_indexed_shl_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memw($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STriw_indexed_shl_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt
+// if (Pv) memw(Rx++#s4:2)=Rt
+// if (Pv.new) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STwri_cdnPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if ($src1.new) memw($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rx++#s4:2)=Rt
+// if (!Pv.new) memw(Rx++#s4:2)=Rt
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STwri_cdnNotPt_V4 : STInstPI<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if (!$src1.new) memw($src3++#$offset) = $src2",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+/// store to global address
+
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STrid_GP_V4 : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, DoubleRegs:$src),
+            "memd(#$global+$offset) = $src",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        DoubleRegs:$src2),
+            "if ($src1) memd(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        DoubleRegs:$src2),
+            "if (!$src1) memd(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        DoubleRegs:$src2),
+            "if ($src1.new) memd(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrid_GP_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        DoubleRegs:$src2),
+            "if (!$src1.new) memd(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_V4 : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memb(#$global+$offset) = $src",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1) memb(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1) memb(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1.new) memb(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrib_GP_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1.new) memb(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_V4 : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memh(#$global+$offset) = $src",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1) memh(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1) memh(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1.new) memh(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STrih_GP_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1.new) memh(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_V4 : STInst<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memw(#$global+$offset) = $src",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1) memw(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1) memw(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1.new) memw(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STriw_GP_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1.new) memw(##$global+$offset) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// memd(#global)=Rtt
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STd_GP_V4 : STInst<(outs),
+            (ins globaladdress:$global, DoubleRegs:$src),
+            "memd(#$global) = $src",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memd(##global) = Rtt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+            "if ($src1) memd(##$global) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memd(##global) = Rtt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+            "if (!$src1) memd(##$global) = $src2",
+            []>,
+              Requires<[HasV4T]>;
+
+// if (Pv) memd(##global) = Rtt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+            "if ($src1.new) memd(##$global) = $src2",
+            []>,
+              Requires<[HasV4T]>;
+
+// if (!Pv) memd(##global) = Rtt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STd_GP_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, DoubleRegs:$src2),
+            "if (!$src1.new) memd(##$global) = $src2",
+            []>,
+            Requires<[HasV4T]>;
+
+// memb(#global)=Rt
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_V4 : STInst<(outs),
+            (ins globaladdress:$global, IntRegs:$src),
+            "memb(#$global) = $src",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1) memb(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1) memb(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// if (Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1.new) memb(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STb_GP_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1.new) memb(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// memh(#global)=Rt
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_V4 : STInst<(outs),
+            (ins globaladdress:$global, IntRegs:$src),
+            "memh(#$global) = $src",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1) memh(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1) memh(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1.new) memh(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STh_GP_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1.new) memh(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// memw(#global)=Rt
+let isPredicable = 1, mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_V4 : STInst<(outs),
+            (ins globaladdress:$global, IntRegs:$src),
+            "memw(#$global) = $src",
+              []>,
+              Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1) memw(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1) memw(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1.new) memw(##$global) = $src2",
+              []>,
+              Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1, isPredicated = 1 in
+def STw_GP_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1.new) memw(##$global) = $src2",
+            []>,
+              Requires<[HasV4T]>;
+
+// 64 bit atomic store
+def : Pat <(atomic_store_64 (HexagonCONST32_GP tglobaladdr:$global),
+                            (i64 DoubleRegs:$src1)),
+           (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+           Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memd(#foo)
+let AddedComplexity = 100 in
+def : Pat <(store (i64 DoubleRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+           (STd_GP_V4 tglobaladdr:$global, (i64 DoubleRegs:$src1))>,
+           Requires<[HasV4T]>;
+
+// 8 bit atomic store
+def : Pat < (atomic_store_8 (HexagonCONST32_GP tglobaladdr:$global),
+                            (i32 IntRegs:$src1)),
+            (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+              Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memb(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+          (HexagonCONST32_GP tglobaladdr:$global)),
+          (STb_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+// Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+//       to "r0 = 1; memw(#foo) = r0"
+let AddedComplexity = 100 in
+def : Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+          (STb_GP_V4 tglobaladdr:$global, (TFRI 1))>,
+          Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_16 (HexagonCONST32_GP tglobaladdr:$global),
+                           (i32 IntRegs:$src1)),
+          (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memh(#foo)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+                         (HexagonCONST32_GP tglobaladdr:$global)),
+          (STh_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+// 32 bit atomic store
+def : Pat<(atomic_store_32 (HexagonCONST32_GP tglobaladdr:$global),
+                           (i32 IntRegs:$src1)),
+          (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+// Map from store(globaladdress) -> memw(#foo)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32_GP tglobaladdr:$global)),
+          (STw_GP_V4 tglobaladdr:$global, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_64 (add (HexagonCONST32_GP tglobaladdr:$global),
+                                u16ImmPred:$offset),
+                           (i64 DoubleRegs:$src1)),
+          (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+                                            (i64 DoubleRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_32 (add (HexagonCONST32_GP tglobaladdr:$global),
+                                u16ImmPred:$offset),
+                           (i32 IntRegs:$src1)),
+          (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_16 (add (HexagonCONST32_GP tglobaladdr:$global),
+                                u16ImmPred:$offset),
+                           (i32 IntRegs:$src1)),
+          (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+def : Pat<(atomic_store_8 (add (HexagonCONST32_GP tglobaladdr:$global),
+                               u16ImmPred:$offset),
+                          (i32 IntRegs:$src1)),
+          (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memd(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i64 DoubleRegs:$src1), (add (HexagonCONST32_GP tglobaladdr:$global),
+                                        u16ImmPred:$offset)),
+          (STrid_GP_V4 tglobaladdr:$global, u16ImmPred:$offset,
+                                            (i64 DoubleRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memb(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1),
+                        (add (HexagonCONST32_GP tglobaladdr:$global),
+                             u16ImmPred:$offset)),
+          (STrib_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memh(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1),
+                         (add (HexagonCONST32_GP tglobaladdr:$global),
+                              u16ImmPred:$offset)),
+          (STrih_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+// Map from store(globaladdress + x) -> memw(#foo + x)
+let AddedComplexity = 100 in
+def : Pat<(store (i32 IntRegs:$src1),
+                 (add (HexagonCONST32_GP tglobaladdr:$global),
+                                u16ImmPred:$offset)),
+          (STriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset, (i32 IntRegs:$src1))>,
+          Requires<[HasV4T]>;
+
+
+
+//===----------------------------------------------------------------------===
+// ST -
+//===----------------------------------------------------------------------===
+
+
+//===----------------------------------------------------------------------===//
+// NV/ST +
+//===----------------------------------------------------------------------===//
+
+// Store new-value byte.
+
+// memb(Re=#U6)=Nt.new
+// memb(Rs+#s11:0)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
+            "memb($addr) = $src1.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STrib_indexed_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
+            "memb($src1+#$src2) = $src3.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STrib_indexed_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memb($src1+$src2<<#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memb(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrib_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memb($src1<<#$src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1  in
+def POST_STbri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, s4_0Imm:$offset),
+            "memb($src2++#$offset) = $src1.new",
+            [],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// memb(Rx++#s4:0:circ(Mu))=Nt.new
+// memb(Rx++I:circ(Mu))=Nt.new
+// memb(Rx++Mu)=Nt.new
+// memb(Rx++Mu:brev)=Nt.new
+
+// memb(gp+#u16:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_nv_V4 : NVInst_V4<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memb(#$global+$offset) = $src.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memb(#global)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_nv_V4 : NVInst_V4<(outs),
+            (ins globaladdress:$global, IntRegs:$src),
+            "memb(#$global) = $src.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// Store new-value byte conditionally.
+// if ([!]Pv[.new]) memb(#u6)=Nt.new
+// if (Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memb($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memb($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memb($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memb($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if ($src1) memb($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memb($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memb($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memb($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memb($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memb($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
+// if (Pv) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STbri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if ($src1) memb($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STbri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if ($src1.new) memb($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STbri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if (!$src1) memb($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memb(Rx++#s4:0)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STbri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_0Imm:$offset),
+            "if (!$src1.new) memb($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Store new-value halfword.
+// memh(Re=#U6)=Nt.new
+// memh(Rs+#s11:1)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
+            "memh($addr) = $src1.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STrih_indexed_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
+            "memh($src1+#$src2) = $src3.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STrih_indexed_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memh($src1+$src2<<#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memh(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STrih_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memh($src1<<#$src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1  in
+def POST_SThri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, s4_1Imm:$offset),
+            "memh($src2++#$offset) = $src1.new",
+            [],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// memh(Rx++#s4:1:circ(Mu))=Nt.new
+// memh(Rx++I:circ(Mu))=Nt.new
+// memh(Rx++Mu)=Nt.new
+// memh(Rx++Mu:brev)=Nt.new
+
+// memh(gp+#u16:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_nv_V4 : NVInst_V4<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memh(#$global+$offset) = $src.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memh(#global)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_nv_V4 : NVInst_V4<(outs),
+            (ins globaladdress:$global, IntRegs:$src),
+            "memh(#$global) = $src.new",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// Store new-value halfword conditionally.
+
+// if ([!]Pv[.new]) memh(#u6)=Nt.new
+
+// if ([!]Pv[.new]) memh(Rs+#u6:1)=Nt.new
+// if (Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memh($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memh($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memh($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memh($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if ($src1) memh($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memh($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memh($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memh($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memh($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memh($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memh($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
+// if (Pv) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_SThri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if ($src1) memh($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_SThri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if ($src1.new) memh($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_SThri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if (!$src1) memh($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memh(Rx++#s4:1)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_SThri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_1Imm:$offset),
+            "if (!$src1.new) memh($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Store new-value word.
+
+// memw(Re=#U6)=Nt.new
+// memw(Rs+#s11:2)=Nt.new
+let mayStore = 1, isPredicable = 1 in
+def STriw_nv_V4 : NVInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$src1),
+            "memw($addr) = $src1.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, isPredicable = 1 in
+def STriw_indexed_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
+            "memw($src1+#$src2) = $src3.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
+def STriw_indexed_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
+            "memw($src1+$src2<<#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memw(Ru<<#u2+#U6)=Nt.new
+let mayStore = 1, AddedComplexity = 10 in
+def STriw_shl_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, u6Imm:$src3, IntRegs:$src4),
+            "memw($src1<<#$src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1, isPredicable = 1  in
+def POST_STwri_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, s4_2Imm:$offset),
+            "memw($src2++#$offset) = $src1.new",
+            [],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// memw(Rx++#s4:2:circ(Mu))=Nt.new
+// memw(Rx++I:circ(Mu))=Nt.new
+// memw(Rx++Mu)=Nt.new
+// memw(Rx++Mu:brev)=Nt.new
+// memw(gp+#u16:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_nv_V4 : NVInst_V4<(outs),
+            (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
+            "memw(#$global+$offset) = $src.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_nv_V4 : NVInst_V4<(outs),
+            (ins globaladdress:$global, IntRegs:$src),
+            "memw(#$global) = $src.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// Store new-value word conditionally.
+
+// if ([!]Pv[.new]) memw(#u6)=Nt.new
+
+// if ([!]Pv[.new]) memw(Rs+#u6:2)=Nt.new
+// if (Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1) memw($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if ($src1.new) memw($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1) memw($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
+            "if (!$src1.new) memw($addr) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if ($src1) memw($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if ($src1.new) memw($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if (!$src1) memw($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
+let mayStore = 1, neverHasSideEffects = 1,
+    isPredicated = 1 in
+def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
+            "if (!$src1.new) memw($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+
+// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new
+// if (Pv) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1) memw($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if ($src1.new) memw($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new
+let mayStore = 1, AddedComplexity = 10,
+    isPredicated = 1 in
+def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 IntRegs:$src5),
+            "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
+// if (Pv) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STwri_cPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if ($src1) memw($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (Pv.new) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STwri_cdnPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if ($src1.new) memw($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STwri_cNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if (!$src1) memw($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+// if (!Pv.new) memw(Rx++#s4:2)=Nt.new
+let mayStore = 1, hasCtrlDep = 1,
+    isPredicated = 1 in
+def POST_STwri_cdnNotPt_nv_V4 : NVInstPI_V4<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, s4_2Imm:$offset),
+            "if (!$src1.new) memw($src3++#$offset) = $src2.new",
+            [],"$src3 = $dst">,
+            Requires<[HasV4T]>;
+
+
+
+// if (Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1) memb(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1) memb(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1.new) memb(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memb(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STb_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1.new) memb(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1) memh(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1) memh(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1.new) memh(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memh(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STh_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1.new) memh(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1) memw(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1) memw(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if ($src1.new) memw(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+// if (!Pv) memw(##global) = Rt
+let mayStore = 1, neverHasSideEffects = 1 in
+def STw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, IntRegs:$src2),
+            "if (!$src1.new) memw(##$global) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1) memb(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1) memb(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1.new) memb(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrib_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1.new) memb(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1) memh(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1) memh(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1.new) memh(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STrih_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1.new) memh(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1) memw(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1) memw(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cdnPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if ($src1.new) memw(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+let mayStore = 1, neverHasSideEffects = 1 in
+def STriw_GP_cdnNotPt_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, globaladdress:$global, u16Imm:$offset,
+                                                        IntRegs:$src2),
+            "if (!$src1.new) memw(##$global+$offset) = $src2.new",
+            []>,
+            Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// NV/ST -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NV/J +
+//===----------------------------------------------------------------------===//
+
+multiclass NVJ_type_basic_reg<string NotStr, string OpcStr, string TakenStr> {
+  def _ie_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+            !strconcat("($src1.new, $src2)) jump:",
+            !strconcat(TakenStr, " $offset"))))),
+            []>,
+            Requires<[HasV4T]>;
+
+  def _nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+            !strconcat("($src1.new, $src2)) jump:",
+            !strconcat(TakenStr, " $offset"))))),
+            []>,
+            Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_2ndDotNew<string NotStr, string OpcStr, string TakenStr> {
+  def _ie_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+            !strconcat("($src1, $src2.new)) jump:",
+            !strconcat(TakenStr, " $offset"))))),
+            []>,
+            Requires<[HasV4T]>;
+
+  def _nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
+            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+            !strconcat("($src1, $src2.new)) jump:",
+            !strconcat(TakenStr, " $offset"))))),
+            []>,
+            Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_imm<string NotStr, string OpcStr, string TakenStr> {
+  def _ie_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+            !strconcat("($src1.new, #$src2)) jump:",
+            !strconcat(TakenStr, " $offset"))))),
+            []>,
+            Requires<[HasV4T]>;
+
+  def _nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+            !strconcat("($src1.new, #$src2)) jump:",
+            !strconcat(TakenStr, " $offset"))))),
+            []>,
+            Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_neg<string NotStr, string OpcStr, string TakenStr> {
+  def _ie_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
+            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+            !strconcat("($src1.new, #$src2)) jump:",
+            !strconcat(TakenStr, " $offset"))))),
+            []>,
+            Requires<[HasV4T]>;
+
+  def _nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, nOneImm:$src2, brtarget:$offset),
+            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+            !strconcat("($src1.new, #$src2)) jump:",
+            !strconcat(TakenStr, " $offset"))))),
+            []>,
+            Requires<[HasV4T]>;
+}
+
+multiclass NVJ_type_basic_tstbit<string NotStr, string OpcStr, string TakenStr> {
+  def _ie_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
+            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+            !strconcat("($src1.new, #$src2)) jump:",
+            !strconcat(TakenStr, " $offset"))))),
+            []>,
+            Requires<[HasV4T]>;
+
+  def _nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, u1Imm:$src2, brtarget:$offset),
+            !strconcat("if (", !strconcat(NotStr, !strconcat(OpcStr,
+            !strconcat("($src1.new, #$src2)) jump:",
+            !strconcat(TakenStr, " $offset"))))),
+            []>,
+            Requires<[HasV4T]>;
+}
+
+// Multiclass for regular dot new of Ist operand register.
+multiclass NVJ_type_br_pred_reg<string NotStr, string OpcStr> {
+  defm Pt  : NVJ_type_basic_reg<NotStr, OpcStr, "t">;
+  defm Pnt : NVJ_type_basic_reg<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for dot new of 2nd operand register.
+multiclass NVJ_type_br_pred_2ndDotNew<string NotStr, string OpcStr> {
+  defm Pt  : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "t">;
+  defm Pnt : NVJ_type_basic_2ndDotNew<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for 2nd operand immediate, including -1.
+multiclass NVJ_type_br_pred_imm<string NotStr, string OpcStr> {
+  defm Pt     : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
+  defm Pnt    : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
+  defm Ptneg  : NVJ_type_basic_neg<NotStr, OpcStr, "t">;
+  defm Pntneg : NVJ_type_basic_neg<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for 2nd operand immediate, excluding -1.
+multiclass NVJ_type_br_pred_imm_only<string NotStr, string OpcStr> {
+  defm Pt     : NVJ_type_basic_imm<NotStr, OpcStr, "t">;
+  defm Pnt    : NVJ_type_basic_imm<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for tstbit, where 2nd operand is always #0.
+multiclass NVJ_type_br_pred_tstbit<string NotStr, string OpcStr> {
+  defm Pt     : NVJ_type_basic_tstbit<NotStr, OpcStr, "t">;
+  defm Pnt    : NVJ_type_basic_tstbit<NotStr, OpcStr, "nt">;
+}
+
+// Multiclass for GT.
+multiclass NVJ_type_rr_ri<string OpcStr> {
+  defm rrNot   : NVJ_type_br_pred_reg<"!", OpcStr>;
+  defm rr      : NVJ_type_br_pred_reg<"",  OpcStr>;
+  defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
+  defm rrdn    : NVJ_type_br_pred_2ndDotNew<"",  OpcStr>;
+  defm riNot   : NVJ_type_br_pred_imm<"!", OpcStr>;
+  defm ri      : NVJ_type_br_pred_imm<"",  OpcStr>;
+}
+
+// Multiclass for EQ.
+multiclass NVJ_type_rr_ri_no_2ndDotNew<string OpcStr> {
+  defm rrNot   : NVJ_type_br_pred_reg<"!", OpcStr>;
+  defm rr      : NVJ_type_br_pred_reg<"",  OpcStr>;
+  defm riNot   : NVJ_type_br_pred_imm<"!", OpcStr>;
+  defm ri      : NVJ_type_br_pred_imm<"",  OpcStr>;
+}
+
+// Multiclass for GTU.
+multiclass NVJ_type_rr_ri_no_nOne<string OpcStr> {
+  defm rrNot   : NVJ_type_br_pred_reg<"!", OpcStr>;
+  defm rr      : NVJ_type_br_pred_reg<"",  OpcStr>;
+  defm rrdnNot : NVJ_type_br_pred_2ndDotNew<"!", OpcStr>;
+  defm rrdn    : NVJ_type_br_pred_2ndDotNew<"",  OpcStr>;
+  defm riNot   : NVJ_type_br_pred_imm_only<"!", OpcStr>;
+  defm ri      : NVJ_type_br_pred_imm_only<"",  OpcStr>;
+}
+
+// Multiclass for tstbit.
+multiclass NVJ_type_r0<string OpcStr> {
+  defm r0Not : NVJ_type_br_pred_tstbit<"!", OpcStr>;
+  defm r0    : NVJ_type_br_pred_tstbit<"",  OpcStr>;
+ }
+
+// Base Multiclass for New Value Jump.
+multiclass NVJ_type {
+  defm GT     : NVJ_type_rr_ri<"cmp.gt">;
+  defm EQ     : NVJ_type_rr_ri_no_2ndDotNew<"cmp.eq">;
+  defm GTU    : NVJ_type_rr_ri_no_nOne<"cmp.gtu">;
+  defm TSTBIT : NVJ_type_r0<"tstbit">;
+}
+
+let isBranch = 1, isTerminator=1, neverHasSideEffects = 1, Defs = [PC] in {
+  defm JMP_ : NVJ_type;
+}
+
+//===----------------------------------------------------------------------===//
+// NV/J -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU +
+//===----------------------------------------------------------------------===//
+
+//  Add and accumulate.
+//  Rd=add(Rs,add(Ru,#s6))
+def ADDr_ADDri_V4 : MInst<(outs IntRegs:$dst),
+          (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+          "$dst = add($src1, add($src2, #$src3))",
+          [(set (i32 IntRegs:$dst),
+           (add (i32 IntRegs:$src1), (add (i32 IntRegs:$src2),
+                                          s6ImmPred:$src3)))]>,
+          Requires<[HasV4T]>;
+
+//  Rd=add(Rs,sub(#s6,Ru))
+def ADDr_SUBri_V4 : MInst<(outs IntRegs:$dst),
+          (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+          "$dst = add($src1, sub(#$src2, $src3))",
+          [(set (i32 IntRegs:$dst),
+           (add (i32 IntRegs:$src1), (sub s6ImmPred:$src2,
+                                          (i32 IntRegs:$src3))))]>,
+          Requires<[HasV4T]>;
+
+// Generates the same instruction as ADDr_SUBri_V4 but matches different
+// pattern.
+//  Rd=add(Rs,sub(#s6,Ru))
+def ADDri_SUBr_V4 : MInst<(outs IntRegs:$dst),
+          (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+          "$dst = add($src1, sub(#$src2, $src3))",
+          [(set (i32 IntRegs:$dst),
+                (sub (add (i32 IntRegs:$src1), s6ImmPred:$src2),
+                     (i32 IntRegs:$src3)))]>,
+          Requires<[HasV4T]>;
+
+
+//  Add or subtract doublewords with carry.
+//TODO:
+//  Rdd=add(Rss,Rtt,Px):carry
+//TODO:
+//  Rdd=sub(Rss,Rtt,Px):carry
+
+
+//  Logical doublewords.
+//  Rdd=and(Rtt,~Rss)
+def ANDd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+          (ins DoubleRegs:$src1, DoubleRegs:$src2),
+          "$dst = and($src1, ~$src2)",
+          [(set (i64 DoubleRegs:$dst), (and (i64 DoubleRegs:$src1),
+                                      (not (i64 DoubleRegs:$src2))))]>,
+          Requires<[HasV4T]>;
+
+//  Rdd=or(Rtt,~Rss)
+def ORd_NOTd_V4 : MInst<(outs DoubleRegs:$dst),
+          (ins DoubleRegs:$src1, DoubleRegs:$src2),
+          "$dst = or($src1, ~$src2)",
+          [(set (i64 DoubleRegs:$dst),
+           (or (i64 DoubleRegs:$src1), (not (i64 DoubleRegs:$src2))))]>,
+          Requires<[HasV4T]>;
+
+
+//  Logical-logical doublewords.
+//  Rxx^=xor(Rss,Rtt)
+def XORd_XORdd: MInst_acc<(outs DoubleRegs:$dst),
+          (ins DoubleRegs:$src1, DoubleRegs:$src2, DoubleRegs:$src3),
+          "$dst ^= xor($src2, $src3)",
+          [(set (i64 DoubleRegs:$dst),
+           (xor (i64 DoubleRegs:$src1), (xor (i64 DoubleRegs:$src2),
+                                             (i64 DoubleRegs:$src3))))],
+          "$src1 = $dst">,
+          Requires<[HasV4T]>;
+
+
+// Logical-logical words.
+// Rx=or(Ru,and(Rx,#s10))
+def ORr_ANDri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+            "$dst = or($src1, and($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+                                                s10ImmPred:$src3)))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,Rt)
+// Rx&=and(Rs,Rt)
+def ANDr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst &= and($src2, $src3)",
+            [(set (i32 IntRegs:$dst),
+                  (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+                                                 (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=and(Rs,Rt)
+def ORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst |= and($src2, $src3)",
+            [(set (i32 IntRegs:$dst),
+                  (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+                                                (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx^=and(Rs,Rt)
+def XORr_ANDrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst ^= and($src2, $src3)",
+            [(set (i32 IntRegs:$dst),
+             (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+                                            (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx[&|^]=and(Rs,~Rt)
+// Rx&=and(Rs,~Rt)
+def ANDr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst &= and($src2, ~$src3)",
+            [(set (i32 IntRegs:$dst),
+                  (and (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+                                                 (not (i32 IntRegs:$src3)))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=and(Rs,~Rt)
+def ORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst |= and($src2, ~$src3)",
+            [(set (i32 IntRegs:$dst),
+             (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+                                           (not (i32 IntRegs:$src3)))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx^=and(Rs,~Rt)
+def XORr_ANDr_NOTr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst ^= and($src2, ~$src3)",
+            [(set (i32 IntRegs:$dst),
+             (xor (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+                                            (not (i32 IntRegs:$src3)))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx[&|^]=or(Rs,Rt)
+// Rx&=or(Rs,Rt)
+def ANDr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst &= or($src2, $src3)",
+            [(set (i32 IntRegs:$dst),
+                  (and (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+                                                (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=or(Rs,Rt)
+def ORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst |= or($src2, $src3)",
+            [(set (i32 IntRegs:$dst),
+                  (or (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+                                               (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx^=or(Rs,Rt)
+def XORr_ORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst ^= or($src2, $src3)",
+            [(set (i32 IntRegs:$dst),
+             (xor (i32 IntRegs:$src1), (or (i32 IntRegs:$src2),
+                                           (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx[&|^]=xor(Rs,Rt)
+// Rx&=xor(Rs,Rt)
+def ANDr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst &= xor($src2, $src3)",
+            [(set (i32 IntRegs:$dst),
+                  (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+                                                 (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=xor(Rs,Rt)
+def ORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst |= xor($src2, $src3)",
+            [(set (i32 IntRegs:$dst),
+                  (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+                                                 (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx^=xor(Rs,Rt)
+def XORr_XORrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, IntRegs:$src3),
+            "$dst ^= xor($src2, $src3)",
+            [(set (i32 IntRegs:$dst),
+             (and (i32 IntRegs:$src1), (xor (i32 IntRegs:$src2),
+                                            (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=and(Rs,#s10)
+def ORr_ANDri2_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+            "$dst |= and($src2, #$src3)",
+            [(set (i32 IntRegs:$dst),
+                  (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+                                                s10ImmPred:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx|=or(Rs,#s10)
+def ORr_ORri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs: $src2, s10Imm:$src3),
+            "$dst |= or($src2, #$src3)",
+            [(set (i32 IntRegs:$dst),
+                  (or (i32 IntRegs:$src1), (and (i32 IntRegs:$src2),
+                                                s10ImmPred:$src3)))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+
+//    Modulo wrap
+//        Rd=modwrap(Rs,Rt)
+//    Round
+//        Rd=cround(Rs,#u5)
+//        Rd=cround(Rs,Rt)
+//        Rd=round(Rs,#u5)[:sat]
+//        Rd=round(Rs,Rt)[:sat]
+//    Vector reduce add unsigned halfwords
+//        Rd=vraddh(Rss,Rtt)
+//    Vector add bytes
+//        Rdd=vaddb(Rss,Rtt)
+//    Vector conditional negate
+//        Rdd=vcnegh(Rss,Rt)
+//        Rxx+=vrcnegh(Rss,Rt)
+//    Vector maximum bytes
+//        Rdd=vmaxb(Rtt,Rss)
+//    Vector reduce maximum halfwords
+//        Rxx=vrmaxh(Rss,Ru)
+//        Rxx=vrmaxuh(Rss,Ru)
+//    Vector reduce maximum words
+//        Rxx=vrmaxuw(Rss,Ru)
+//        Rxx=vrmaxw(Rss,Ru)
+//    Vector minimum bytes
+//        Rdd=vminb(Rtt,Rss)
+//    Vector reduce minimum halfwords
+//        Rxx=vrminh(Rss,Ru)
+//        Rxx=vrminuh(Rss,Ru)
+//    Vector reduce minimum words
+//        Rxx=vrminuw(Rss,Ru)
+//        Rxx=vrminw(Rss,Ru)
+//    Vector subtract bytes
+//        Rdd=vsubb(Rss,Rtt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/ALU -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY +
+//===----------------------------------------------------------------------===//
+
+// Multiply and user lower result.
+// Rd=add(#u6,mpyi(Rs,#U6))
+def ADDi_MPYri_V4 : MInst<(outs IntRegs:$dst),
+            (ins u6Imm:$src1, IntRegs:$src2, u6Imm:$src3),
+            "$dst = add(#$src1, mpyi($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (add (mul (i32 IntRegs:$src2), u6ImmPred:$src3),
+                       u6ImmPred:$src1))]>,
+            Requires<[HasV4T]>;
+
+// Rd=add(#u6,mpyi(Rs,Rt))
+
+def ADDi_MPYrr_V4 : MInst<(outs IntRegs:$dst),
+            (ins u6Imm:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst = add(#$src1, mpyi($src2, $src3))",
+            [(set (i32 IntRegs:$dst),
+                  (add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
+                       u6ImmPred:$src1))]>,
+            Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(#u6:2,Rs))
+def ADDr_MPYir_V4 : MInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            "$dst = add($src1, mpyi(#$src2, $src3))",
+            [(set (i32 IntRegs:$dst),
+             (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3),
+                                            u6_2ImmPred:$src2)))]>,
+            Requires<[HasV4T]>;
+
+// Rd=add(Ru,mpyi(Rs,#u6))
+def ADDr_MPYri_V4 : MInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, u6Imm:$src3),
+            "$dst = add($src1, mpyi($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
+                                                 u6ImmPred:$src3)))]>,
+            Requires<[HasV4T]>;
+
+// Rx=add(Ru,mpyi(Rx,Rs))
+def ADDr_MPYrr_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, IntRegs:$src3),
+            "$dst = add($src1, mpyi($src2, $src3))",
+            [(set (i32 IntRegs:$dst),
+             (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src2),
+                                            (i32 IntRegs:$src3))))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+
+// Polynomial multiply words
+// Rdd=pmpyw(Rs,Rt)
+// Rxx^=pmpyw(Rs,Rt)
+
+// Vector reduce multiply word by signed half (32x16)
+// Rdd=vrmpyweh(Rss,Rtt)[:<<1]
+// Rdd=vrmpywoh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpyweh(Rss,Rtt)[:<<1]
+// Rxx+=vrmpywoh(Rss,Rtt)[:<<1]
+
+// Multiply and use upper result
+// Rd=mpy(Rs,Rt.H):<<1:sat
+// Rd=mpy(Rs,Rt.L):<<1:sat
+// Rd=mpy(Rs,Rt):<<1
+// Rd=mpy(Rs,Rt):<<1:sat
+// Rd=mpysu(Rs,Rt)
+// Rx+=mpy(Rs,Rt):<<1:sat
+// Rx-=mpy(Rs,Rt):<<1:sat
+
+// Vector multiply bytes
+// Rdd=vmpybsu(Rs,Rt)
+// Rdd=vmpybu(Rs,Rt)
+// Rxx+=vmpybsu(Rs,Rt)
+// Rxx+=vmpybu(Rs,Rt)
+
+// Vector polynomial multiply halfwords
+// Rdd=vpmpyh(Rs,Rt)
+// Rxx^=vpmpyh(Rs,Rt)
+
+//===----------------------------------------------------------------------===//
+// XTYPE/MPY -
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT +
+//===----------------------------------------------------------------------===//
+
+// Shift by immediate and accumulate.
+// Rx=add(#u8,asl(Rx,#U5))
+def ADDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = add(#$src1, asl($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (add (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+                       u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx=add(#u8,lsr(Rx,#U5))
+def ADDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = add(#$src1, lsr($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (add (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+                       u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx=sub(#u8,asl(Rx,#U5))
+def SUBi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = sub(#$src1, asl($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (sub (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+                       u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+// Rx=sub(#u8,lsr(Rx,#U5))
+def SUBi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = sub(#$src1, lsr($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (sub (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+                       u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+
+//Shift by immediate and logical.
+//Rx=and(#u8,asl(Rx,#U5))
+def ANDi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = and(#$src1, asl($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (and (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+                       u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rx=and(#u8,lsr(Rx,#U5))
+def ANDi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = and(#$src1, lsr($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (and (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+                       u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rx=or(#u8,asl(Rx,#U5))
+let AddedComplexity = 30 in
+def ORi_ASLri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = or(#$src1, asl($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (or (shl (i32 IntRegs:$src2), u5ImmPred:$src3),
+                      u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rx=or(#u8,lsr(Rx,#U5))
+let AddedComplexity = 30 in
+def ORi_LSRri_V4 : MInst_acc<(outs IntRegs:$dst),
+            (ins u8Imm:$src1, IntRegs:$src2, u5Imm:$src3),
+            "$dst = or(#$src1, lsr($src2, #$src3))",
+            [(set (i32 IntRegs:$dst),
+                  (or (srl (i32 IntRegs:$src2), u5ImmPred:$src3),
+                      u8ImmPred:$src1))],
+            "$src2 = $dst">,
+            Requires<[HasV4T]>;
+
+
+//Shift by register.
+//Rd=lsl(#s6,Rt)
+def LSLi_V4 : MInst<(outs IntRegs:$dst), (ins s6Imm:$src1, IntRegs:$src2),
+            "$dst = lsl(#$src1, $src2)",
+            [(set (i32 IntRegs:$dst), (shl s6ImmPred:$src1,
+                                           (i32 IntRegs:$src2)))]>,
+            Requires<[HasV4T]>;
+
+
+//Shift by register and logical.
+//Rxx^=asl(Rss,Rt)
+def ASLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+            "$dst ^= asl($src2, $src3)",
+            [(set (i64 DoubleRegs:$dst),
+                  (xor (i64 DoubleRegs:$src1), (shl (i64 DoubleRegs:$src2),
+                                                    (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rxx^=asr(Rss,Rt)
+def ASRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+            "$dst ^= asr($src2, $src3)",
+            [(set (i64 DoubleRegs:$dst),
+                  (xor (i64 DoubleRegs:$src1), (sra (i64 DoubleRegs:$src2),
+                                                    (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rxx^=lsl(Rss,Rt)
+def LSLd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+            "$dst ^= lsl($src2, $src3)",
+            [(set (i64 DoubleRegs:$dst), (xor (i64 DoubleRegs:$src1),
+                                              (shl (i64 DoubleRegs:$src2),
+                                                   (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+//Rxx^=lsr(Rss,Rt)
+def LSRd_rr_xor_V4 : MInst_acc<(outs DoubleRegs:$dst),
+            (ins DoubleRegs:$src1, DoubleRegs:$src2, IntRegs:$src3),
+            "$dst ^= lsr($src2, $src3)",
+            [(set (i64 DoubleRegs:$dst),
+                  (xor (i64 DoubleRegs:$src1), (srl (i64 DoubleRegs:$src2),
+                                                    (i32 IntRegs:$src3))))],
+            "$src1 = $dst">,
+            Requires<[HasV4T]>;
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/SHIFT -
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word, Half, Byte
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Word
+//
+//  Implemented:
+//     MEMw_ADDi_indexed_V4  : memw(Rs+#u6:2)+=#U5
+//     MEMw_SUBi_indexed_V4  : memw(Rs+#u6:2)-=#U5
+//     MEMw_ADDr_indexed_V4  : memw(Rs+#u6:2)+=Rt
+//     MEMw_SUBr_indexed_V4  : memw(Rs+#u6:2)-=Rt
+//     MEMw_CLRr_indexed_V4  : memw(Rs+#u6:2)&=Rt
+//     MEMw_SETr_indexed_V4  : memw(Rs+#u6:2)|=Rt
+//     MEMw_ADDi_V4          : memw(Rs+#u6:2)+=#U5
+//     MEMw_SUBi_V4          : memw(Rs+#u6:2)-=#U5
+//     MEMw_ADDr_V4          : memw(Rs+#u6:2)+=Rt
+//     MEMw_SUBr_V4          : memw(Rs+#u6:2)-=Rt
+//     MEMw_CLRr_V4          : memw(Rs+#u6:2)&=Rt
+//     MEMw_SETr_V4          : memw(Rs+#u6:2)|=Rt
+//
+//   Not implemented:
+//     MEMw_CLRi_indexed_V4  : memw(Rs+#u6:2)=clrbit(#U5)
+//     MEMw_SETi_indexed_V4  : memw(Rs+#u6:2)=setbit(#U5)
+//     MEMw_CLRi_V4          : memw(Rs+#u6:2)=clrbit(#U5)
+//     MEMw_SETi_V4          : memw(Rs+#u6:2)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMw_ADDSUBi_indexed_V4:
+//   pseudo operation for MEMw_ADDi_indexed_V4 and
+//   MEMw_SUBi_indexed_V4 a later pass will change it
+//   to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMw_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, m6Imm:$addend),
+            "Error; should not emit",
+            [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+                         m6ImmPred:$addend),
+                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += #U5
+let AddedComplexity = 30 in
+def MEMw_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$addend),
+            "memw($base+#$offset) += #$addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= #U5
+let AddedComplexity = 30 in
+def MEMw_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, u5Imm:$subend),
+            "memw($base+#$offset) -= #$subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += Rt
+let AddedComplexity = 30 in
+def MEMw_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$addend),
+            "memw($base+#$offset) += $addend",
+            [(store (add (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+                         (i32 IntRegs:$addend)),
+                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= Rt
+let AddedComplexity = 30 in
+def MEMw_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$subend),
+            "memw($base+#$offset) -= $subend",
+            [(store (sub (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+                         (i32 IntRegs:$subend)),
+                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) &= Rt
+let AddedComplexity = 30 in
+def MEMw_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$andend),
+            "memw($base+#$offset) &= $andend",
+            [(store (and (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+                         (i32 IntRegs:$andend)),
+                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) |= Rt
+let AddedComplexity = 30 in
+def MEMw_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_2Imm:$offset, IntRegs:$orend),
+            "memw($base+#$offset) |= $orend",
+            [(store (or (load (add (i32 IntRegs:$base), u6_2ImmPred:$offset)),
+                        (i32 IntRegs:$orend)),
+                    (add (i32 IntRegs:$base), u6_2ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// MEMw_ADDSUBi_V4:
+//   Pseudo operation for MEMw_ADDi_V4 and MEMw_SUBi_V4
+//   a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMw_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, m6Imm:$addend),
+            "Error; should not emit",
+            [(store (add (load ADDRriU6_2:$addr), m6ImmPred:$addend),
+                    ADDRriU6_2:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += #U5
+let AddedComplexity = 30 in
+def MEMw_ADDi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$addend),
+            "memw($addr) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= #U5
+let AddedComplexity = 30 in
+def MEMw_SUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$subend),
+            "memw($addr) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) += Rt
+let AddedComplexity = 30 in
+def MEMw_ADDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$addend),
+            "memw($addr) += $addend",
+            [(store (add (load ADDRriU6_2:$addr), (i32 IntRegs:$addend)),
+                    ADDRriU6_2:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) -= Rt
+let AddedComplexity = 30 in
+def MEMw_SUBr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$subend),
+            "memw($addr) -= $subend",
+            [(store (sub (load ADDRriU6_2:$addr), (i32 IntRegs:$subend)),
+                    ADDRriU6_2:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) &= Rt
+let AddedComplexity = 30 in
+def MEMw_ANDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$andend),
+            "memw($addr) &= $andend",
+            [(store (and (load ADDRriU6_2:$addr), (i32 IntRegs:$andend)),
+                    ADDRriU6_2:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memw(Rs+#u6:2) |= Rt
+let AddedComplexity = 30 in
+def MEMw_ORr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$orend),
+            "memw($addr) |= $orend",
+            [(store (or (load ADDRriU6_2:$addr), (i32 IntRegs:$orend)),
+                    ADDRriU6_2:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Halfword
+//
+//  Implemented:
+//     MEMh_ADDi_indexed_V4  : memw(Rs+#u6:2)+=#U5
+//     MEMh_SUBi_indexed_V4  : memw(Rs+#u6:2)-=#U5
+//     MEMh_ADDr_indexed_V4  : memw(Rs+#u6:2)+=Rt
+//     MEMh_SUBr_indexed_V4  : memw(Rs+#u6:2)-=Rt
+//     MEMh_CLRr_indexed_V4  : memw(Rs+#u6:2)&=Rt
+//     MEMh_SETr_indexed_V4  : memw(Rs+#u6:2)|=Rt
+//     MEMh_ADDi_V4          : memw(Rs+#u6:2)+=#U5
+//     MEMh_SUBi_V4          : memw(Rs+#u6:2)-=#U5
+//     MEMh_ADDr_V4          : memw(Rs+#u6:2)+=Rt
+//     MEMh_SUBr_V4          : memw(Rs+#u6:2)-=Rt
+//     MEMh_CLRr_V4          : memw(Rs+#u6:2)&=Rt
+//     MEMh_SETr_V4          : memw(Rs+#u6:2)|=Rt
+//
+//   Not implemented:
+//     MEMh_CLRi_indexed_V4  : memw(Rs+#u6:2)=clrbit(#U5)
+//     MEMh_SETi_indexed_V4  : memw(Rs+#u6:2)=setbit(#U5)
+//     MEMh_CLRi_V4          : memw(Rs+#u6:2)=clrbit(#U5)
+//     MEMh_SETi_V4          : memw(Rs+#u6:2)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMh_ADDSUBi_indexed_V4:
+//   Pseudo operation for MEMh_ADDi_indexed_V4 and
+//   MEMh_SUBi_indexed_V4 a later pass will change it
+//   to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMh_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, m6Imm:$addend),
+            "Error; should not emit",
+            [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
+                                                   u6_1ImmPred:$offset)),
+                                 m6ImmPred:$addend),
+                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += #U5
+let AddedComplexity = 30 in
+def MEMh_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$addend),
+            "memh($base+#$offset) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= #U5
+let AddedComplexity = 30 in
+def MEMh_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, u5Imm:$subend),
+            "memh($base+#$offset) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += Rt
+let AddedComplexity = 30 in
+def MEMh_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$addend),
+            "memh($base+#$offset) += $addend",
+            [(truncstorei16 (add (sextloadi16 (add (i32 IntRegs:$base),
+                                                   u6_1ImmPred:$offset)),
+                                 (i32 IntRegs:$addend)),
+                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= Rt
+let AddedComplexity = 30 in
+def MEMh_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$subend),
+            "memh($base+#$offset) -= $subend",
+            [(truncstorei16 (sub (sextloadi16 (add (i32 IntRegs:$base),
+                                                   u6_1ImmPred:$offset)),
+                                 (i32 IntRegs:$subend)),
+                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) &= Rt
+let AddedComplexity = 30 in
+def MEMh_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$andend),
+            "memh($base+#$offset) += $andend",
+            [(truncstorei16 (and (sextloadi16 (add (i32 IntRegs:$base),
+                                                   u6_1ImmPred:$offset)),
+                                 (i32 IntRegs:$andend)),
+                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) |= Rt
+let AddedComplexity = 30 in
+def MEMh_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_1Imm:$offset, IntRegs:$orend),
+            "memh($base+#$offset) |= $orend",
+            [(truncstorei16 (or (sextloadi16 (add (i32 IntRegs:$base),
+                                              u6_1ImmPred:$offset)),
+                             (i32 IntRegs:$orend)),
+                            (add (i32 IntRegs:$base), u6_1ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// MEMh_ADDSUBi_V4:
+//   Pseudo operation for MEMh_ADDi_V4 and MEMh_SUBi_V4
+//   a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMh_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, m6Imm:$addend),
+            "Error; should not emit",
+            [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
+                                 m6ImmPred:$addend), ADDRriU6_1:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += #U5
+let AddedComplexity = 30 in
+def MEMh_ADDi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$addend),
+            "memh($addr) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= #U5
+let AddedComplexity = 30 in
+def MEMh_SUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$subend),
+            "memh($addr) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) += Rt
+let AddedComplexity = 30 in
+def MEMh_ADDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$addend),
+            "memh($addr) += $addend",
+            [(truncstorei16 (add (sextloadi16 ADDRriU6_1:$addr),
+                                 (i32 IntRegs:$addend)), ADDRriU6_1:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) -= Rt
+let AddedComplexity = 30 in
+def MEMh_SUBr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$subend),
+            "memh($addr) -= $subend",
+            [(truncstorei16 (sub (sextloadi16 ADDRriU6_1:$addr),
+                                 (i32 IntRegs:$subend)), ADDRriU6_1:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) &= Rt
+let AddedComplexity = 30 in
+def MEMh_ANDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$andend),
+            "memh($addr) &= $andend",
+            [(truncstorei16 (and (sextloadi16 ADDRriU6_1:$addr),
+                                 (i32 IntRegs:$andend)), ADDRriU6_1:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memh(Rs+#u6:1) |= Rt
+let AddedComplexity = 30 in
+def MEMh_ORr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$orend),
+            "memh($addr) |= $orend",
+            [(truncstorei16 (or (sextloadi16 ADDRriU6_1:$addr),
+                                (i32 IntRegs:$orend)), ADDRriU6_1:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+
+//===----------------------------------------------------------------------===//
+// MEMOP: Byte
+//
+//  Implemented:
+//     MEMb_ADDi_indexed_V4  : memb(Rs+#u6:0)+=#U5
+//     MEMb_SUBi_indexed_V4  : memb(Rs+#u6:0)-=#U5
+//     MEMb_ADDr_indexed_V4  : memb(Rs+#u6:0)+=Rt
+//     MEMb_SUBr_indexed_V4  : memb(Rs+#u6:0)-=Rt
+//     MEMb_CLRr_indexed_V4  : memb(Rs+#u6:0)&=Rt
+//     MEMb_SETr_indexed_V4  : memb(Rs+#u6:0)|=Rt
+//     MEMb_ADDi_V4          : memb(Rs+#u6:0)+=#U5
+//     MEMb_SUBi_V4          : memb(Rs+#u6:0)-=#U5
+//     MEMb_ADDr_V4          : memb(Rs+#u6:0)+=Rt
+//     MEMb_SUBr_V4          : memb(Rs+#u6:0)-=Rt
+//     MEMb_CLRr_V4          : memb(Rs+#u6:0)&=Rt
+//     MEMb_SETr_V4          : memb(Rs+#u6:0)|=Rt
+//
+//   Not implemented:
+//     MEMb_CLRi_indexed_V4  : memb(Rs+#u6:0)=clrbit(#U5)
+//     MEMb_SETi_indexed_V4  : memb(Rs+#u6:0)=setbit(#U5)
+//     MEMb_CLRi_V4          : memb(Rs+#u6:0)=clrbit(#U5)
+//     MEMb_SETi_V4          : memb(Rs+#u6:0)=setbit(#U5)
+//===----------------------------------------------------------------------===//
+
+
+// MEMb_ADDSUBi_indexed_V4:
+//   Pseudo operation for MEMb_ADDi_indexed_V4 and
+//   MEMb_SUBi_indexed_V4 a later pass will change it
+//   to the corresponding pattern.
+let AddedComplexity = 30 in
+def MEMb_ADDSUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, m6Imm:$addend),
+            "Error; should not emit",
+            [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
+                                                 u6_0ImmPred:$offset)),
+                                m6ImmPred:$addend),
+                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += #U5
+let AddedComplexity = 30 in
+def MEMb_ADDi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$addend),
+            "memb($base+#$offset) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= #U5
+let AddedComplexity = 30 in
+def MEMb_SUBi_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, u5Imm:$subend),
+            "memb($base+#$offset) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += Rt
+let AddedComplexity = 30 in
+def MEMb_ADDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$addend),
+            "memb($base+#$offset) += $addend",
+            [(truncstorei8 (add (sextloadi8 (add (i32 IntRegs:$base),
+                                                 u6_0ImmPred:$offset)),
+                                (i32 IntRegs:$addend)),
+                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= Rt
+let AddedComplexity = 30 in
+def MEMb_SUBr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$subend),
+            "memb($base+#$offset) -= $subend",
+            [(truncstorei8 (sub (sextloadi8 (add (i32 IntRegs:$base),
+                                                 u6_0ImmPred:$offset)),
+                                (i32 IntRegs:$subend)),
+                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) &= Rt
+let AddedComplexity = 30 in
+def MEMb_ANDr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$andend),
+            "memb($base+#$offset) += $andend",
+            [(truncstorei8 (and (sextloadi8 (add (i32 IntRegs:$base),
+                                                 u6_0ImmPred:$offset)),
+                                (i32 IntRegs:$andend)),
+                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) |= Rt
+let AddedComplexity = 30 in
+def MEMb_ORr_indexed_MEM_V4 : MEMInst_V4<(outs),
+            (ins IntRegs:$base, u6_0Imm:$offset, IntRegs:$orend),
+            "memb($base+#$offset) |= $orend",
+            [(truncstorei8 (or (sextloadi8 (add (i32 IntRegs:$base),
+                                                u6_0ImmPred:$offset)),
+                               (i32 IntRegs:$orend)),
+                           (add (i32 IntRegs:$base), u6_0ImmPred:$offset))]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// MEMb_ADDSUBi_V4:
+//   Pseudo operation for MEMb_ADDi_V4 and MEMb_SUBi_V4
+//   a later pass will change it to the right pattern.
+let AddedComplexity = 30 in
+def MEMb_ADDSUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, m6Imm:$addend),
+            "Error; should not emit",
+            [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
+                                m6ImmPred:$addend), ADDRriU6_0:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += #U5
+let AddedComplexity = 30 in
+def MEMb_ADDi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$addend),
+            "memb($addr) += $addend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= #U5
+let AddedComplexity = 30 in
+def MEMb_SUBi_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, u5Imm:$subend),
+            "memb($addr) -= $subend",
+            []>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) += Rt
+let AddedComplexity = 30 in
+def MEMb_ADDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$addend),
+            "memb($addr) += $addend",
+            [(truncstorei8 (add (sextloadi8 ADDRriU6_0:$addr),
+                                (i32 IntRegs:$addend)), ADDRriU6_0:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) -= Rt
+let AddedComplexity = 30 in
+def MEMb_SUBr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$subend),
+            "memb($addr) -= $subend",
+            [(truncstorei8 (sub (sextloadi8 ADDRriU6_0:$addr),
+                                (i32 IntRegs:$subend)), ADDRriU6_0:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) &= Rt
+let AddedComplexity = 30 in
+def MEMb_ANDr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$andend),
+            "memb($addr) &= $andend",
+            [(truncstorei8 (and (sextloadi8 ADDRriU6_0:$addr),
+                                (i32 IntRegs:$andend)), ADDRriU6_0:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+// memb(Rs+#u6:0) |= Rt
+let AddedComplexity = 30 in
+def MEMb_ORr_MEM_V4 : MEMInst_V4<(outs),
+            (ins MEMri:$addr, IntRegs:$orend),
+            "memb($addr) |= $orend",
+            [(truncstorei8 (or (sextloadi8 ADDRriU6_0:$addr),
+                               (i32 IntRegs:$orend)), ADDRriU6_0:$addr)]>,
+            Requires<[HasV4T, UseMEMOP]>;
+
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED +
+//===----------------------------------------------------------------------===//
+
+// Hexagon V4 only supports these flavors of byte/half compare instructions:
+// EQ/GT/GTU. Other flavors like GE/GEU/LT/LTU/LE/LEU are not supported by
+// hardware. However, compiler can still implement these patterns through
+// appropriate patterns combinations based on current implemented patterns.
+// The implemented patterns are: EQ/GT/GTU.
+// Missing patterns are: GE/GEU/LT/LTU/LE/LEU.
+
+// Following instruction is not being extended as it results into the
+// incorrect code for negative numbers.
+// Pd=cmpb.eq(Rs,#u8)
+
+let isCompare = 1 in
+def CMPbEQri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, u8Imm:$src2),
+            "$dst = cmpb.eq($src1, #$src2)",
+            [(set (i1 PredRegs:$dst),
+                  (seteq (and (i32 IntRegs:$src1), 255), u8ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPbEQrr_ubub_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmpb.eq($src1, $src2)",
+            [(set (i1 PredRegs:$dst),
+                  (seteq (and (xor (i32 IntRegs:$src1),
+                                   (i32 IntRegs:$src2)), 255), 0))]>,
+            Requires<[HasV4T]>;
+
+// Pd=cmpb.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPbEQrr_sbsb_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmpb.eq($src1, $src2)",
+            [(set (i1 PredRegs:$dst),
+                  (seteq (shl (i32 IntRegs:$src1), (i32 24)),
+                         (shl (i32 IntRegs:$src2), (i32 24))))]>,
+            Requires<[HasV4T]>;
+
+/* Incorrect Pattern -- immediate should be right shifted before being
+used in the cmpb.gt instruction.
+// Pd=cmpb.gt(Rs,#s8)
+let isCompare = 1 in
+def CMPbGTri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, s8Imm:$src2),
+            "$dst = cmpb.gt($src1, #$src2)",
+            [(set (i1 PredRegs:$dst), (setgt (shl (i32 IntRegs:$src1), (i32 24)),
+                                             s8ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+*/
+
+// Pd=cmpb.gt(Rs,Rt)
+let isCompare = 1 in
+def CMPbGTrr_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmpb.gt($src1, $src2)",
+            [(set (i1 PredRegs:$dst),
+                  (setgt (shl (i32 IntRegs:$src1), (i32 24)),
+                         (shl (i32 IntRegs:$src2), (i32 24))))]>,
+            Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,#u7)
+let isCompare = 1 in
+def CMPbGTUri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, u7Imm:$src2),
+            "$dst = cmpb.gtu($src1, #$src2)",
+            [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
+                                              u7ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// Pd=cmpb.gtu(Rs,Rt)
+let isCompare = 1 in
+def CMPbGTUrr_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmpb.gtu($src1, $src2)",
+            [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 255),
+                                              (and (i32 IntRegs:$src2), 255)))]>,
+            Requires<[HasV4T]>;
+
+// Following instruction is not being extended as it results into the incorrect
+// code for negative numbers.
+
+// Signed half compare(.eq) ri.
+// Pd=cmph.eq(Rs,#s8)
+let isCompare = 1 in
+def CMPhEQri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, s8Imm:$src2),
+            "$dst = cmph.eq($src1, #$src2)",
+            [(set (i1 PredRegs:$dst), (seteq (and (i32 IntRegs:$src1), 65535),
+                                             s8ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 1: xor + and, then compare:
+//   r0=xor(r0,r1)
+//   r0=and(r0,#0xffff)
+//   p0=cmp.eq(r0,#0)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPhEQrr_xor_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmph.eq($src1, $src2)",
+            [(set (i1 PredRegs:$dst), (seteq (and (xor (i32 IntRegs:$src1),
+                                                       (i32 IntRegs:$src2)),
+                                                  65535), 0))]>,
+            Requires<[HasV4T]>;
+
+// Signed half compare(.eq) rr.
+// Case 2: shift left 16 bits then compare:
+//   r0=asl(r0,16)
+//   r1=asl(r1,16)
+//   p0=cmp.eq(r0,r1)
+// Pd=cmph.eq(Rs,Rt)
+let isCompare = 1 in
+def CMPhEQrr_shl_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmph.eq($src1, $src2)",
+            [(set (i1 PredRegs:$dst),
+                  (seteq (shl (i32 IntRegs:$src1), (i32 16)),
+                         (shl (i32 IntRegs:$src2), (i32 16))))]>,
+            Requires<[HasV4T]>;
+
+/* Incorrect Pattern -- immediate should be right shifted before being
+used in the cmph.gt instruction.
+// Signed half compare(.gt) ri.
+// Pd=cmph.gt(Rs,#s8)
+
+let isCompare = 1 in
+def CMPhGTri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, s8Imm:$src2),
+            "$dst = cmph.gt($src1, #$src2)",
+            [(set (i1 PredRegs:$dst),
+                  (setgt (shl (i32 IntRegs:$src1), (i32 16)),
+                         s8ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+*/
+
+// Signed half compare(.gt) rr.
+// Pd=cmph.gt(Rs,Rt)
+let isCompare = 1 in
+def CMPhGTrr_shl_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmph.gt($src1, $src2)",
+            [(set (i1 PredRegs:$dst),
+                  (setgt (shl (i32 IntRegs:$src1), (i32 16)),
+                         (shl (i32 IntRegs:$src2), (i32 16))))]>,
+            Requires<[HasV4T]>;
+
+// Unsigned half compare rr (.gtu).
+// Pd=cmph.gtu(Rs,Rt)
+let isCompare = 1 in
+def CMPhGTUrr_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2),
+            "$dst = cmph.gtu($src1, $src2)",
+            [(set (i1 PredRegs:$dst),
+                  (setugt (and (i32 IntRegs:$src1), 65535),
+                          (and (i32 IntRegs:$src2), 65535)))]>,
+            Requires<[HasV4T]>;
+
+// Unsigned half compare ri (.gtu).
+// Pd=cmph.gtu(Rs,#u7)
+let isCompare = 1 in
+def CMPhGTUri_V4 : MInst<(outs PredRegs:$dst),
+            (ins IntRegs:$src1, u7Imm:$src2),
+            "$dst = cmph.gtu($src1, #$src2)",
+            [(set (i1 PredRegs:$dst), (setugt (and (i32 IntRegs:$src1), 65535),
+                                              u7ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+//===----------------------------------------------------------------------===//
+// XTYPE/PRED -
+//===----------------------------------------------------------------------===//
+
+//Deallocate frame and return.
+//    dealloc_return
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicable = 1,
+  Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1 in {
+  def DEALLOC_RET_V4 : NVInst_V4<(outs), (ins i32imm:$amt1),
+            "dealloc_return",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+// Restore registers and dealloc return function call.
+let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
+  Defs = [R29, R30, R31, PC] in {
+  def RESTORE_DEALLOC_RET_JMP_V4 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "jump $dst // Restore_and_dealloc_return",
+             []>,
+             Requires<[HasV4T]>;
+}
+
+// Restore registers and dealloc frame before a tail call.
+let isCall = 1, isBarrier = 1,
+  Defs = [R29, R30, R31, PC] in {
+  def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "call $dst // Restore_and_dealloc_before_tailcall",
+             []>,
+             Requires<[HasV4T]>;
+}
+
+// Save registers function call.
+let isCall = 1, isBarrier = 1,
+  Uses = [R29, R31] in {
+  def SAVE_REGISTERS_CALL_V4 : JInst<(outs), (ins calltarget:$dst, variable_ops),
+             "call $dst // Save_calle_saved_registers",
+             []>,
+             Requires<[HasV4T]>;
+}
+
+//    if (Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+    Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+    isPredicated = 1 in {
+  def DEALLOC_RET_cPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1, i32imm:$amt1),
+            "if ($src1) dealloc_return",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (!Ps) dealloc_return
+let isReturn = 1, isTerminator = 1,
+    Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+    isPredicated = 1 in {
+  def DEALLOC_RET_cNotPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+                                                     i32imm:$amt1),
+            "if (!$src1) dealloc_return",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+    Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+    isPredicated = 1 in {
+  def DEALLOC_RET_cdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+                                                     i32imm:$amt1),
+            "if ($src1.new) dealloc_return:nt",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+    Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+    isPredicated = 1 in {
+  def DEALLOC_RET_cNotdnPnt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+                                                        i32imm:$amt1),
+            "if (!$src1.new) dealloc_return:nt",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (Ps.new) dealloc_return:t
+let isReturn = 1, isTerminator = 1,
+    Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+    isPredicated = 1 in {
+  def DEALLOC_RET_cdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+                                                    i32imm:$amt1),
+            "if ($src1.new) dealloc_return:t",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+//    if (!Ps.new) dealloc_return:nt
+let isReturn = 1, isTerminator = 1,
+    Defs = [R29, R30, R31, PC], Uses = [R29, R31], neverHasSideEffects = 1,
+    isPredicated = 1 in {
+  def DEALLOC_RET_cNotdnPt_V4 : NVInst_V4<(outs), (ins PredRegs:$src1,
+                                                       i32imm:$amt1),
+            "if (!$src1.new) dealloc_return:t",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+
+// Load/Store with absolute addressing mode
+// memw(#u6)=Rt
+
+multiclass ST_abs<string OpcStr> {
+  let isPredicable = 1 in
+  def _abs_V4 : STInst<(outs),
+            (ins globaladdress:$absaddr, IntRegs:$src),
+            !strconcat(OpcStr, "(##$absaddr) = $src"),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+            !strconcat("if ($src1)", !strconcat(OpcStr, "(##$absaddr) = $src2")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+            !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$absaddr) = $src2")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+            !strconcat("if ($src1.new)", !strconcat(OpcStr, "(##$absaddr) = $src2")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+            !strconcat("if (!$src1.new)", !strconcat(OpcStr, "(##$absaddr) = $src2")),
+            []>,
+            Requires<[HasV4T]>;
+
+  def _abs_nv_V4 : STInst<(outs),
+            (ins globaladdress:$absaddr, IntRegs:$src),
+            !strconcat(OpcStr, "(##$absaddr) = $src.new"),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cPt_nv_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+            !strconcat("if ($src1)", !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cNotPt_nv_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+            !strconcat("if (!$src1)", !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnPt_nv_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+            !strconcat("if ($src1.new)", !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnNotPt_nv_V4 : STInst<(outs),
+            (ins PredRegs:$src1, globaladdress:$absaddr, IntRegs:$src2),
+            !strconcat("if (!$src1.new)", !strconcat(OpcStr, "(##$absaddr) = $src2.new")),
+            []>,
+            Requires<[HasV4T]>;
+}
+
+let AddedComplexity = 30, isPredicable = 1 in
+def STrid_abs_V4 : STInst<(outs),
+          (ins globaladdress:$absaddr, DoubleRegs:$src),
+           "memd(##$absaddr) = $src",
+          [(store (i64 DoubleRegs:$src), (HexagonCONST32 tglobaladdr:$absaddr))]>,
+          Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cPt_V4 : STInst<(outs),
+          (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+          "if ($src1) memd(##$absaddr) = $src2",
+          []>,
+          Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cNotPt_V4 : STInst<(outs),
+          (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+          "if (!$src1) memd(##$absaddr) = $src2",
+          []>,
+          Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cdnPt_V4 : STInst<(outs),
+          (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+          "if ($src1.new) memd(##$absaddr) = $src2",
+          []>,
+          Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def STrid_abs_cdnNotPt_V4 : STInst<(outs),
+          (ins PredRegs:$src1, globaladdress:$absaddr, DoubleRegs:$src2),
+          "if (!$src1.new) memd(##$absaddr) = $src2",
+          []>,
+          Requires<[HasV4T]>;
+
+defm STrib : ST_abs<"memb">;
+defm STrih : ST_abs<"memh">;
+defm STriw : ST_abs<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity  = 30 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
+          (STrib_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity  = 30 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
+          (STrih_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity  = 30 in
+def : Pat<(store (i32 IntRegs:$src1), (HexagonCONST32 tglobaladdr:$absaddr)),
+          (STriw_abs_V4 tglobaladdr: $absaddr, IntRegs: $src1)>;
+
+
+multiclass LD_abs<string OpcStr> {
+  let isPredicable = 1 in
+  def _abs_V4 : LDInst<(outs IntRegs:$dst),
+            (ins globaladdress:$absaddr),
+            !strconcat("$dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$absaddr),
+            !strconcat("if ($src1) $dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$absaddr),
+            !strconcat("if (!$src1) $dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$absaddr),
+            !strconcat("if ($src1.new) $dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, globaladdress:$absaddr),
+            !strconcat("if (!$src1.new) $dst = ", !strconcat(OpcStr, "(##$absaddr)")),
+            []>,
+            Requires<[HasV4T]>;
+}
+
+let AddedComplexity = 30 in
+def LDrid_abs_V4 : LDInst<(outs DoubleRegs:$dst),
+          (ins globaladdress:$absaddr),
+          "$dst = memd(##$absaddr)",
+          [(set (i64 DoubleRegs:$dst), (load (HexagonCONST32 tglobaladdr:$absaddr)))]>,
+          Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cPt_V4 : LDInst<(outs DoubleRegs:$dst),
+          (ins PredRegs:$src1, globaladdress:$absaddr),
+          "if ($src1) $dst = memd(##$absaddr)",
+          []>,
+          Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+          (ins PredRegs:$src1, globaladdress:$absaddr),
+          "if (!$src1) $dst = memd(##$absaddr)",
+          []>,
+          Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cdnPt_V4 : LDInst<(outs DoubleRegs:$dst),
+          (ins PredRegs:$src1, globaladdress:$absaddr),
+          "if ($src1.new) $dst = memd(##$absaddr)",
+          []>,
+          Requires<[HasV4T]>;
+
+let AddedComplexity = 30, isPredicated = 1 in
+def LDrid_abs_cdnNotPt_V4 : LDInst<(outs DoubleRegs:$dst),
+          (ins PredRegs:$src1, globaladdress:$absaddr),
+          "if (!$src1.new) $dst = memd(##$absaddr)",
+          []>,
+          Requires<[HasV4T]>;
+
+defm LDrib : LD_abs<"memb">;
+defm LDriub : LD_abs<"memub">;
+defm LDrih : LD_abs<"memh">;
+defm LDriuh : LD_abs<"memuh">;
+defm LDriw : LD_abs<"memw">;
+
+
+let Predicates = [HasV4T], AddedComplexity  = 30 in
+def : Pat<(i32 (load (HexagonCONST32 tglobaladdr:$absaddr))),
+          (LDriw_abs_V4 tglobaladdr: $absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
+          (LDrib_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi8 (HexagonCONST32 tglobaladdr:$absaddr))),
+          (LDriub_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
+          (LDrih_abs_V4 tglobaladdr:$absaddr)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi16 (HexagonCONST32 tglobaladdr:$absaddr))),
+          (LDriuh_abs_V4 tglobaladdr:$absaddr)>;
+
+// Transfer global address into a register
+let AddedComplexity=50, isMoveImm = 1, isReMaterializable = 1 in
+def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$src1),
+           "$dst = ##$src1",
+           [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>,
+           Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+                           (ins PredRegs:$src1, globaladdress:$src2),
+           "if($src1) $dst = ##$src2",
+           []>,
+           Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+                              (ins PredRegs:$src1, globaladdress:$src2),
+           "if(!$src1) $dst = ##$src2",
+           []>,
+           Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+                             (ins PredRegs:$src1, globaladdress:$src2),
+           "if($src1.new) $dst = ##$src2",
+           []>,
+           Requires<[HasV4T]>;
+
+let AddedComplexity=50, neverHasSideEffects = 1, isPredicated = 1 in
+def TFRI_cdnNotPt_V4 : ALU32_ri<(outs IntRegs:$dst),
+                                (ins PredRegs:$src1, globaladdress:$src2),
+           "if(!$src1.new) $dst = ##$src2",
+           []>,
+           Requires<[HasV4T]>;
+
+let AddedComplexity = 50, Predicates = [HasV4T] in
+def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
+           (TFRI_V4 tglobaladdr:$src1)>;
+
+
+// Load - Indirect with long offset: These instructions take global address
+// as an operand
+let AddedComplexity = 10 in
+def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst),
+            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+            "$dst=memd($src1<<#$src2+##$offset)",
+            [(set (i64 DoubleRegs:$dst),
+                  (load (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                        (HexagonCONST32 tglobaladdr:$offset))))]>,
+            Requires<[HasV4T]>;
+
+let AddedComplexity = 10 in
+multiclass LD_indirect_lo<string OpcStr, PatFrag OpNode> {
+  def _lo_V4 : LDInst<(outs IntRegs:$dst),
+            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset),
+            !strconcat("$dst = ", !strconcat(OpcStr, "($src1<<#$src2+##$offset)")),
+            [(set IntRegs:$dst,
+                  (i32 (OpNode (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                          (HexagonCONST32 tglobaladdr:$offset)))))]>,
+            Requires<[HasV4T]>;
+}
+
+defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>;
+defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>;
+defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>;
+defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>;
+defm LDriw_ind : LD_indirect_lo<"memw", load>;
+
+// Store - Indirect with long offset: These instructions take global address
+// as an operand
+let AddedComplexity = 10 in
+def STrid_ind_lo_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
+                 DoubleRegs:$src4),
+            "memd($src1<<#$src2+#$src3) = $src4",
+            [(store (i64 DoubleRegs:$src4),
+                 (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                      (HexagonCONST32 tglobaladdr:$src3)))]>,
+             Requires<[HasV4T]>;
+
+let AddedComplexity = 10 in
+multiclass ST_indirect_lo<string OpcStr, PatFrag OpNode> {
+  def _lo_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3,
+                 IntRegs:$src4),
+            !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"),
+            [(OpNode (i32 IntRegs:$src4),
+                 (add (shl IntRegs:$src1, u2ImmPred:$src2),
+                      (HexagonCONST32 tglobaladdr:$src3)))]>,
+             Requires<[HasV4T]>;
+}
+
+defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>;
+defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>;
+defm STriw_ind : ST_indirect_lo<"memw", store>;
+
+// Store - absolute addressing mode: These instruction take constant
+// value as the extended operand
+multiclass ST_absimm<string OpcStr> {
+  let isPredicable = 1 in
+  def _abs_V4 : STInst<(outs),
+            (ins u6Imm:$src1, IntRegs:$src2),
+            !strconcat(OpcStr, "(#$src1) = $src2"),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            !strconcat("if ($src1.new)", !strconcat(OpcStr, "(#$src2) = $src3")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnNotPt_V4 : STInst<(outs),
+            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            !strconcat("if (!$src1.new)", !strconcat(OpcStr, "(#$src2) = $src3")),
+            []>,
+            Requires<[HasV4T]>;
+
+  def _abs_nv_V4 : STInst<(outs),
+            (ins u6Imm:$src1, IntRegs:$src2),
+            !strconcat(OpcStr, "(#$src1) = $src2.new"),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cPt_nv_V4 : STInst<(outs),
+            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            !strconcat("if ($src1)", !strconcat(OpcStr, "(#$src2) = $src3.new")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cNotPt_nv_V4 : STInst<(outs),
+            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            !strconcat("if (!$src1)", !strconcat(OpcStr, "(#$src2) = $src3.new")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnPt_nv_V4 : STInst<(outs),
+            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            !strconcat("if ($src1.new)", !strconcat(OpcStr, "(#$src2) = $src3.new")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnNotPt_nv_V4 : STInst<(outs),
+            (ins PredRegs:$src1, u6Imm:$src2, IntRegs:$src3),
+            !strconcat("if (!$src1.new)", !strconcat(OpcStr, "(#$src2) = $src3.new")),
+            []>,
+            Requires<[HasV4T]>;
+}
+
+defm STrib_imm : ST_absimm<"memb">;
+defm STrih_imm : ST_absimm<"memh">;
+defm STriw_imm : ST_absimm<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity  = 30 in
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), u6ImmPred:$src2),
+          (STrib_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity  = 30 in
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), u6ImmPred:$src2),
+          (STrih_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+
+let Predicates = [HasV4T], AddedComplexity  = 30 in
+def : Pat<(store (i32 IntRegs:$src1), u6ImmPred:$src2),
+          (STriw_imm_abs_V4 u6ImmPred:$src2, IntRegs: $src1)>;
+
+
+// Load - absolute addressing mode: These instruction take constant
+// value as the extended operand
+
+multiclass LD_absimm<string OpcStr> {
+  let isPredicable = 1 in
+  def _abs_V4 : LDInst<(outs IntRegs:$dst),
+            (ins u6Imm:$src),
+            !strconcat("$dst = ", !strconcat(OpcStr, "(#$src)")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, u6Imm:$src2),
+            !strconcat("if ($src1) $dst = ", !strconcat(OpcStr, "(#$src2)")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, u6Imm:$src2),
+            !strconcat("if (!$src1) $dst = ", !strconcat(OpcStr, "(#$src2)")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, u6Imm:$src2),
+            !strconcat("if ($src1.new) $dst = ", !strconcat(OpcStr, "(#$src2)")),
+            []>,
+            Requires<[HasV4T]>;
+
+  let isPredicated = 1 in
+  def _abs_cdnNotPt_V4 : LDInst<(outs IntRegs:$dst),
+            (ins PredRegs:$src1, u6Imm:$src2),
+            !strconcat("if (!$src1.new) $dst = ", !strconcat(OpcStr, "(#$src2)")),
+            []>,
+            Requires<[HasV4T]>;
+}
+
+defm LDrib_imm : LD_absimm<"memb">;
+defm LDriub_imm : LD_absimm<"memub">;
+defm LDrih_imm : LD_absimm<"memh">;
+defm LDriuh_imm : LD_absimm<"memuh">;
+defm LDriw_imm : LD_absimm<"memw">;
+
+let Predicates = [HasV4T], AddedComplexity  = 30 in
+def : Pat<(i32 (load u6ImmPred:$src)),
+          (LDriw_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi8 u6ImmPred:$src)),
+          (LDrib_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi8 u6ImmPred:$src)),
+          (LDriub_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (sextloadi16 u6ImmPred:$src)),
+          (LDrih_imm_abs_V4 u6ImmPred:$src)>;
+
+let Predicates = [HasV4T], AddedComplexity=30 in
+def : Pat<(i32 (zextloadi16 u6ImmPred:$src)),
+          (LDriuh_imm_abs_V4 u6ImmPred:$src)>;
+
+
+// Indexed store double word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10 in
+def STriw_offset_ext_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u6_2Imm:$src2, globaladdress:$src3),
+            "memw($src1+#$src2) = ##$src3",
+            [(store (HexagonCONST32 tglobaladdr:$src3),
+                    (add IntRegs:$src1, u6_2ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
+
+
+// Indexed store double word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 10 in
+def STrih_offset_ext_V4 : STInst<(outs),
+            (ins IntRegs:$src1, u6_1Imm:$src2, globaladdress:$src3),
+            "memh($src1+#$src2) = ##$src3",
+            [(truncstorei16 (HexagonCONST32 tglobaladdr:$src3),
+                    (add IntRegs:$src1, u6_1ImmPred:$src2))]>,
+            Requires<[HasV4T]>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
new file mode 100644
index 000000000000..b15e293fdfb4
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -0,0 +1,3462 @@
+//===-- HexagonIntrinsics.td - Instruction intrinsics ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V2 Architecture
+// Application-Level Specification
+// 80-V9418-8 Rev. B
+// March 4, 2008
+//===----------------------------------------------------------------------===//
+
+//
+// ALU 32 types.
+//
+
+class qi_ALU32_sisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_ALU32_sis10<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_sis8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u8Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_ALU32_siu9<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_qisisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                      IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_ALU32_qis8si<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2,
+                                       IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_ALU32_qisis8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                       s8Imm:$src3),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                        imm:$src3))]>;
+
+class si_ALU32_qis8s8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2, s8Imm:$src3),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_ALU32_sisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_sat<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sisi_rnd<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU32_sis16<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s16Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis10<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s10si<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins s10Imm:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+             [(set IntRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class si_lo_ALU32_siu16<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+             !strconcat("$dst.l = ", !strconcat(opc , "#$src2")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_hi_ALU32_siu16<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, u16Imm:$src2),
+             !strconcat("$dst.h = ", !strconcat(opc , "#$src2")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_s16<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins s16Imm:$src1),
+             !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+             [(set IntRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU32_s8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1),
+             !strconcat("$dst = ", !strconcat(opc , "#$src1")),
+             [(set DoubleRegs:$dst, (IntID imm:$src1))]>;
+
+class di_ALU64_di<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "$src")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_ALU32_si<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "($src)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU32_si_tfr<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "$src")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+//
+// ALU 64 types.
+//
+
+class si_ALU64_si_sat<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_ALU64_didi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_sidi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_ALU64_didi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_ALU64_qididi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2,
+                                          DoubleRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2,
+                                           DoubleRegs:$src3))]>;
+
+class di_ALU64_sisi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU64_didi_sat<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_rnd_sat<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_ALU64_didi_crnd_sat<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):crnd:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class qi_ALU64_didi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs PredRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set PredRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_ALU64_sisi<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_hl<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_sat_ll<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_hl<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_l16_ll<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1.H, $src2.H):sat:<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1.L, $src2.H):sat:<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_hl<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1.H, $src2.L):sat:<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_sat_ll<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1.L, $src2.L):sat:<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_hl<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_h16_ll<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<16")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_lh<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_ll<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_ALU64_sisi_sat<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+//
+// SInst classes.
+//
+
+class qi_SInst_qi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "($src)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qi_pxfer<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "$src")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class qi_SInst_qiqi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qiqi_neg<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, !$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_di<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "($src)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_di_sat<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src),
+             !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "($src)")),
+          [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class si_SInst_di_sat<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+          [(set IntRegs:$dst, (IntID DoubleRegs:$src))]>;
+
+class di_SInst_disi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_didi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_si<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1),
+          !strconcat("$dst = ", !strconcat(opc , "($src1)")),
+          [(set DoubleRegs:$dst, (IntID IntRegs:$src1))]>;
+
+class si_SInst_sisiu3<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, u3Imm:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                     imm:$src3))]>;
+
+class si_SInst_diu5<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, u5Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+          [(set IntRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_disi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_sidi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, DoubleRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_SInst_disisi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2,
+                                       IntRegs:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class di_SInst_sisi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_siu5<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+          [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_siu6<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u6Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+          [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_SInst_sisi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_SInst_si<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "($src)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_si_sat<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "($src):sat")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class di_SInst_qi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins IntRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "($src)")),
+          [(set DoubleRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "$src")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_qiqi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_si<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src),
+          !strconcat("$dst = ", !strconcat(opc , "$src")),
+          [(set PredRegs:$dst, (IntID IntRegs:$src))]>;
+
+class si_SInst_sisi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_SInst_diu6<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5_rnd<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):rnd")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_SInst_siu5u5<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2, u5Imm:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2, imm:$src3))]>;
+
+class si_SInst_sisisi_acc<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+              !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         IntRegs:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisisi_nac<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+              !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         IntRegs:$src2))],
+              "$dst2 = $dst">;
+
+class di_SInst_didisi_acc<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_SInst_didisi_nac<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           IntRegs:$src2),
+          !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                        DoubleRegs:$src1, IntRegs:$src2))],
+          "$dst2 = $dst">;
+
+class si_SInst_sisiu5u5<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2, u5Imm:$src3),
+              !strconcat("$dst = ", !strconcat(opc ,
+                                               "($src1, #$src2, #$src3)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2, imm:$src3))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisidi<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        DoubleRegs:$src2),
+              !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         DoubleRegs:$src2))],
+              "$dst2 = $dst">;
+
+class di_SInst_didiu6u6<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u6Imm:$src2, u6Imm:$src3),
+              !strconcat("$dst = ", !strconcat(opc ,
+                                               "($src1, #$src2, #$src3)")),
+              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                            imm:$src2, imm:$src3))],
+              "$dst2 = $dst">;
+
+class di_SInst_dididi<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+              !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                            DoubleRegs:$src1,
+                                            DoubleRegs:$src2))],
+              "$dst2 = $dst">;
+
+class di_SInst_diu6u6<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2,
+                                       u6Imm:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2, #$src3)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, imm:$src2,
+                                        imm:$src3))]>;
+
+class di_SInst_didisi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+                                       IntRegs:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class di_SInst_didiqi<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+                                       IntRegs:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, $src3)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class di_SInst_didiu3<string opc, Intrinsic IntID>
+  : SInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2,
+                                       u3Imm:$src3),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2, #$src3)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2,
+                                        imm:$src3))]>;
+
+class di_SInst_didisi_or<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           IntRegs:$src2),
+          !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                        IntRegs:$src2))],
+          "$dst2 = $dst">;
+
+class di_SInst_didisi_and<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           IntRegs:$src2),
+          !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                        IntRegs:$src2))],
+          "$dst2 = $dst">;
+
+class di_SInst_didiu6_and<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u6Imm:$src2),
+          !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                        imm:$src2))],
+          "$dst2 = $dst">;
+
+class di_SInst_didiu6_or<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u6Imm:$src2),
+          !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                        imm:$src2))],
+          "$dst2 = $dst">;
+
+class di_SInst_didiu6_xor<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u6Imm:$src2),
+          !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                        imm:$src2))],
+          "$dst2 = $dst">;
+
+class si_SInst_sisisi_and<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+              !strconcat("$dst &= ", !strconcat(opc , "($src1, $src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         IntRegs:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisisi_or<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+              !strconcat("$dst |= ", !strconcat(opc , "($src1, $src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         IntRegs:$src2))],
+              "$dst2 = $dst">;
+
+
+class si_SInst_sisiu5_and<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2),
+              !strconcat("$dst &= ", !strconcat(opc , "($src1, #$src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisiu5_or<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2),
+              !strconcat("$dst |= ", !strconcat(opc , "($src1, #$src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisiu5_xor<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2),
+              !strconcat("$dst ^= ", !strconcat(opc , "($src1, #$src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisiu5_acc<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2),
+              !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2))],
+              "$dst2 = $dst">;
+
+class si_SInst_sisiu5_nac<string opc, Intrinsic IntID>
+  : SInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u5Imm:$src2),
+              !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+              [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                         imm:$src2))],
+              "$dst2 = $dst">;
+
+class di_SInst_didiu6_acc<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u5Imm:$src2),
+              !strconcat("$dst += ", !strconcat(opc , "($src1, #$src2)")),
+              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                            DoubleRegs:$src1, imm:$src2))],
+              "$dst2 = $dst">;
+
+class di_SInst_didiu6_nac<string opc, Intrinsic IntID>
+  : SInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           u5Imm:$src2),
+              !strconcat("$dst -= ", !strconcat(opc , "($src1, #$src2)")),
+              [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                            imm:$src2))],
+              "$dst2 = $dst">;
+
+
+//
+// MInst classes.
+//
+
+class di_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):<<1:rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.L):<<1:rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.L):rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.H):<<1:rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.H):rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.L):<<1:rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.L):rnd")),
+               [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_disisi_acc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_nac<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_sat_conj<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_sat_conj<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst -= ", !strconcat(opc , "($src1, $src2*):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst -= ", !strconcat(opc ,
+                                               "($src1, $src2):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat_conj<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst += ", !strconcat(opc ,
+                                               "($src1, $src2*):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_s1_sat_conj<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+             !strconcat("$dst -= ", !strconcat(opc ,
+                                               "($src1, $src2*):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2))],
+             "$dst2 = $dst">;
+
+class di_MInst_s8s8<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins s8Imm:$src1, s8Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "(#$src1, #$src2)")),
+             [(set DoubleRegs:$dst, (IntID imm:$src1, imm:$src2))]>;
+
+class si_MInst_sisi<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+
+class si_MInst_sisi_hh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):<<1")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):<<1")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):<<1")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):<<1")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_up<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_MInst_didi_conj<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*)")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat_conj<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2*):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2):<<1:rnd:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_MInst_didi_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class di_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2):rnd:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class si_SInst_sisi_sat<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_l_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2.L):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_h_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2.H):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat_conj<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2*):rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_s1_rnd_sat_conj<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2*):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2):rnd:sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisisi_xacc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3),
+             !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3))],
+             "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3),
+             !strconcat("$dst += ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3))],
+             "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3),
+             !strconcat("$dst -= ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                        IntRegs:$src3))],
+             "$dst2 = $dst">;
+
+class si_MInst_sisis8_acc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        s8Imm:$src3),
+             !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                        imm:$src3))],
+             "$dst2 = $dst">;
+
+class si_MInst_sisis8_nac<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        s8Imm:$src3),
+             !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                        imm:$src3))],
+             "$dst2 = $dst">;
+
+class si_MInst_sisiu4u5<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        u4Imm:$src2, u5Imm:$src3),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1, #$src2, #$src3)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          imm:$src2, imm:$src3))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisiu8_acc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        u8Imm:$src3),
+               !strconcat("$dst += ", !strconcat(opc , "($src2, #$src3)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                          imm:$src3))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisiu8_nac<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src2,
+                                        u8Imm:$src3),
+               !strconcat("$dst -= ", !strconcat(opc , "($src2, #$src3)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src2,
+                                          imm:$src3))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_sat_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_acc_sat_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):<<1")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hh_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_hl_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.H, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_lh_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_nac_ll_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs IntRegs:$dst), (ins IntRegs:$dst2, IntRegs:$src1,
+                                        IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc ,
+                                                 "($src1.L, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$dst2, IntRegs:$src1,
+                                          IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_ALU32_sisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):sat")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_sat_conj<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2*):sat")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_sisi_s1_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didi_s1_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1,
+                                           DoubleRegs:$src2))]>;
+
+class si_MInst_didi_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, $src2):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_didi_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):rnd:sat")),
+             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.H):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.H, $src2.L):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.H):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.H):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1.L, $src2.L):sat")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.L, $src2.L):<<1:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ", !strconcat(opc ,
+                                                "($src1.H, $src2.H):<<1:rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc ,
+                                     "($src1.H, $src2.H):<<1:rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.H, $src2.L):rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.H, $src2.L):<<1:rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.H, $src2.L):rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_hl_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.H, $src2.L):<<1:rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.H):rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.H):rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.H):<<1:rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_lh_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.H):<<1:rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.L):rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_sat_rnd_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.L):<<1:rnd:sat")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.L):rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_sisi_rnd_ll_s1<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+               !strconcat("$dst = ",
+                          !strconcat(opc , "($src1.L, $src2.L):<<1:rnd")),
+               [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_dididi_acc_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+                                           DoubleRegs:$src1, DoubleRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2):sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_rnd_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1, $src2):rnd:sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2,
+                                           DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1, $src2):<<1:sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_dididi_acc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_dididi_acc_conj<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1, $src2*)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.H)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.H, $src2.L)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.H)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ", !strconcat(opc , "($src1.L, $src2.L)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1.H, $src2.H):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1.H, $src2.L):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1.L, $src2.H):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1.L, $src2.L):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.H)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.H, $src2.L)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.H)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ", !strconcat(opc , "($src1.L, $src2.L)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ",
+                          !strconcat(opc , "($src1.H, $src2.H):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_hl_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ",
+                          !strconcat(opc , "($src1.H, $src2.L):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_lh_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ",
+                          !strconcat(opc , "($src1.L, $src2.H):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_nac_ll_s1<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst -= ",
+                          !strconcat(opc , "($src1.L, $src2.L):<<1")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disisi_acc_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, IntRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1, $src2):<<1:sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, IntRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class di_MInst_disi_s1_sat<string opc, Intrinsic IntID>
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2):<<1:sat")),
+             [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class di_MInst_didisi_acc_s1_sat<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           IntRegs:$src2),
+               !strconcat("$dst += ",
+                          !strconcat(opc , "($src1, $src2):<<1:sat")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2,
+                                             DoubleRegs:$src1,
+                                             IntRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_disi_s1_rnd_sat<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ",
+                        !strconcat(opc , "($src1, $src2):<<1:rnd:sat")),
+             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, IntRegs:$src2))]>;
+
+class si_MInst_didi<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+
+/********************************************************************
+*            ALU32/ALU                                              *
+*********************************************************************/
+
+// ALU32 / ALU / Add.
+def Hexagon_A2_add:
+  si_ALU32_sisi                   <"add",      int_hexagon_A2_add>;
+def Hexagon_A2_addi:
+  si_ALU32_sis16                  <"add",      int_hexagon_A2_addi>;
+
+// ALU32 / ALU / Logical operations.
+def Hexagon_A2_and:
+  si_ALU32_sisi                   <"and",      int_hexagon_A2_and>;
+def Hexagon_A2_andir:
+  si_ALU32_sis10                  <"and",      int_hexagon_A2_andir>;
+def Hexagon_A2_not:
+  si_ALU32_si                     <"not",      int_hexagon_A2_not>;
+def Hexagon_A2_or:
+  si_ALU32_sisi                   <"or",       int_hexagon_A2_or>;
+def Hexagon_A2_orir:
+  si_ALU32_sis10                  <"or",       int_hexagon_A2_orir>;
+def Hexagon_A2_xor:
+  si_ALU32_sisi                   <"xor",      int_hexagon_A2_xor>;
+
+// ALU32 / ALU / Negate.
+def Hexagon_A2_neg:
+  si_ALU32_si                     <"neg",      int_hexagon_A2_neg>;
+
+// ALU32 / ALU / Subtract.
+def Hexagon_A2_sub:
+  si_ALU32_sisi                   <"sub",      int_hexagon_A2_sub>;
+def Hexagon_A2_subri:
+  si_ALU32_s10si                  <"sub",      int_hexagon_A2_subri>;
+
+// ALU32 / ALU / Transfer Immediate.
+def Hexagon_A2_tfril:
+  si_lo_ALU32_siu16               <"",         int_hexagon_A2_tfril>;
+def Hexagon_A2_tfrih:
+  si_hi_ALU32_siu16               <"",         int_hexagon_A2_tfrih>;
+def Hexagon_A2_tfrsi:
+  si_ALU32_s16                    <"",         int_hexagon_A2_tfrsi>;
+def Hexagon_A2_tfrpi:
+  di_ALU32_s8                     <"",         int_hexagon_A2_tfrpi>;
+
+// ALU32 / ALU / Transfer Register.
+def Hexagon_A2_tfr:
+  si_ALU32_si_tfr                  <"",        int_hexagon_A2_tfr>;
+
+/********************************************************************
+*            ALU32/PERM                                             *
+*********************************************************************/
+
+// ALU32 / PERM / Combine.
+def Hexagon_A2_combinew:
+  di_ALU32_sisi                   <"combine",  int_hexagon_A2_combinew>;
+def Hexagon_A2_combine_hh:
+  si_MInst_sisi_hh                <"combine",  int_hexagon_A2_combine_hh>;
+def Hexagon_A2_combine_lh:
+  si_MInst_sisi_lh                <"combine",  int_hexagon_A2_combine_lh>;
+def Hexagon_A2_combine_hl:
+  si_MInst_sisi_hl                <"combine",  int_hexagon_A2_combine_hl>;
+def Hexagon_A2_combine_ll:
+  si_MInst_sisi_ll                <"combine",  int_hexagon_A2_combine_ll>;
+def Hexagon_A2_combineii:
+  di_MInst_s8s8                   <"combine",  int_hexagon_A2_combineii>;
+
+// ALU32 / PERM / Mux.
+def Hexagon_C2_mux:
+  si_ALU32_qisisi                 <"mux",      int_hexagon_C2_mux>;
+def Hexagon_C2_muxri:
+  si_ALU32_qis8si                 <"mux",      int_hexagon_C2_muxri>;
+def Hexagon_C2_muxir:
+  si_ALU32_qisis8                 <"mux",      int_hexagon_C2_muxir>;
+def Hexagon_C2_muxii:
+  si_ALU32_qis8s8                 <"mux",      int_hexagon_C2_muxii>;
+
+// ALU32 / PERM / Shift halfword.
+def Hexagon_A2_aslh:
+  si_ALU32_si                     <"aslh",     int_hexagon_A2_aslh>;
+def Hexagon_A2_asrh:
+  si_ALU32_si                     <"asrh",     int_hexagon_A2_asrh>;
+def SI_to_SXTHI_asrh:
+  si_ALU32_si                     <"asrh",     int_hexagon_SI_to_SXTHI_asrh>;
+
+// ALU32 / PERM / Sign/zero extend.
+def Hexagon_A2_sxth:
+  si_ALU32_si                     <"sxth",     int_hexagon_A2_sxth>;
+def Hexagon_A2_sxtb:
+  si_ALU32_si                     <"sxtb",     int_hexagon_A2_sxtb>;
+def Hexagon_A2_zxth:
+  si_ALU32_si                     <"zxth",     int_hexagon_A2_zxth>;
+def Hexagon_A2_zxtb:
+  si_ALU32_si                     <"zxtb",     int_hexagon_A2_zxtb>;
+
+/********************************************************************
+*            ALU32/PRED                                             *
+*********************************************************************/
+
+// ALU32 / PRED / Compare.
+def Hexagon_C2_cmpeq:
+  qi_ALU32_sisi                   <"cmp.eq",   int_hexagon_C2_cmpeq>;
+def Hexagon_C2_cmpeqi:
+  qi_ALU32_sis10                  <"cmp.eq",   int_hexagon_C2_cmpeqi>;
+def Hexagon_C2_cmpgei:
+  qi_ALU32_sis8                   <"cmp.ge",   int_hexagon_C2_cmpgei>;
+def Hexagon_C2_cmpgeui:
+  qi_ALU32_siu8                   <"cmp.geu",  int_hexagon_C2_cmpgeui>;
+def Hexagon_C2_cmpgt:
+  qi_ALU32_sisi                   <"cmp.gt",   int_hexagon_C2_cmpgt>;
+def Hexagon_C2_cmpgti:
+  qi_ALU32_sis10                  <"cmp.gt",   int_hexagon_C2_cmpgti>;
+def Hexagon_C2_cmpgtu:
+  qi_ALU32_sisi                   <"cmp.gtu",  int_hexagon_C2_cmpgtu>;
+def Hexagon_C2_cmpgtui:
+  qi_ALU32_siu9                   <"cmp.gtu",  int_hexagon_C2_cmpgtui>;
+def Hexagon_C2_cmplt:
+  qi_ALU32_sisi                   <"cmp.lt",   int_hexagon_C2_cmplt>;
+def Hexagon_C2_cmpltu:
+  qi_ALU32_sisi                   <"cmp.ltu",  int_hexagon_C2_cmpltu>;
+
+/********************************************************************
+*            ALU32/VH                                               *
+*********************************************************************/
+
+// ALU32 / VH / Vector add halfwords.
+// Rd32=vadd[u]h(Rs32,Rt32:sat]
+def Hexagon_A2_svaddh:
+  si_ALU32_sisi                   <"vaddh",    int_hexagon_A2_svaddh>;
+def Hexagon_A2_svaddhs:
+  si_ALU32_sisi_sat               <"vaddh",    int_hexagon_A2_svaddhs>;
+def Hexagon_A2_svadduhs:
+  si_ALU32_sisi_sat               <"vadduh",   int_hexagon_A2_svadduhs>;
+
+// ALU32 / VH / Vector average halfwords.
+def Hexagon_A2_svavgh:
+  si_ALU32_sisi                   <"vavgh",    int_hexagon_A2_svavgh>;
+def Hexagon_A2_svavghs:
+  si_ALU32_sisi_rnd               <"vavgh",    int_hexagon_A2_svavghs>;
+def Hexagon_A2_svnavgh:
+  si_ALU32_sisi                   <"vnavgh",   int_hexagon_A2_svnavgh>;
+
+// ALU32 / VH / Vector subtract halfwords.
+def Hexagon_A2_svsubh:
+  si_ALU32_sisi                   <"vsubh",    int_hexagon_A2_svsubh>;
+def Hexagon_A2_svsubhs:
+  si_ALU32_sisi_sat               <"vsubh",    int_hexagon_A2_svsubhs>;
+def Hexagon_A2_svsubuhs:
+  si_ALU32_sisi_sat               <"vsubuh",   int_hexagon_A2_svsubuhs>;
+
+/********************************************************************
+*            ALU64/ALU                                              *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addp:
+  di_ALU64_didi                   <"add",      int_hexagon_A2_addp>;
+def Hexagon_A2_addsat:
+  si_ALU64_sisi_sat               <"add",      int_hexagon_A2_addsat>;
+
+// ALU64 / ALU / Add halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def Hexagon_A2_addh_l16_hl:
+  si_ALU64_sisi_l16_lh            <"add",      int_hexagon_A2_addh_l16_hl>;
+def Hexagon_A2_addh_l16_ll:
+  si_ALU64_sisi_l16_ll            <"add",      int_hexagon_A2_addh_l16_ll>;
+
+def Hexagon_A2_addh_l16_sat_hl:
+  si_ALU64_sisi_l16_sat_lh        <"add",      int_hexagon_A2_addh_l16_sat_hl>;
+def Hexagon_A2_addh_l16_sat_ll:
+  si_ALU64_sisi_l16_sat_ll        <"add",      int_hexagon_A2_addh_l16_sat_ll>;
+
+def Hexagon_A2_addh_h16_hh:
+  si_ALU64_sisi_h16_hh            <"add",      int_hexagon_A2_addh_h16_hh>;
+def Hexagon_A2_addh_h16_hl:
+  si_ALU64_sisi_h16_hl            <"add",      int_hexagon_A2_addh_h16_hl>;
+def Hexagon_A2_addh_h16_lh:
+  si_ALU64_sisi_h16_lh            <"add",      int_hexagon_A2_addh_h16_lh>;
+def Hexagon_A2_addh_h16_ll:
+  si_ALU64_sisi_h16_ll            <"add",      int_hexagon_A2_addh_h16_ll>;
+
+def Hexagon_A2_addh_h16_sat_hh:
+  si_ALU64_sisi_h16_sat_hh        <"add",      int_hexagon_A2_addh_h16_sat_hh>;
+def Hexagon_A2_addh_h16_sat_hl:
+  si_ALU64_sisi_h16_sat_hl        <"add",      int_hexagon_A2_addh_h16_sat_hl>;
+def Hexagon_A2_addh_h16_sat_lh:
+  si_ALU64_sisi_h16_sat_lh        <"add",      int_hexagon_A2_addh_h16_sat_lh>;
+def Hexagon_A2_addh_h16_sat_ll:
+  si_ALU64_sisi_h16_sat_ll        <"add",      int_hexagon_A2_addh_h16_sat_ll>;
+
+// ALU64 / ALU / Compare.
+def Hexagon_C2_cmpeqp:
+  qi_ALU64_didi                   <"cmp.eq",   int_hexagon_C2_cmpeqp>;
+def Hexagon_C2_cmpgtp:
+  qi_ALU64_didi                   <"cmp.gt",   int_hexagon_C2_cmpgtp>;
+def Hexagon_C2_cmpgtup:
+  qi_ALU64_didi                   <"cmp.gtu",  int_hexagon_C2_cmpgtup>;
+
+// ALU64 / ALU / Logical operations.
+def Hexagon_A2_andp:
+  di_ALU64_didi                   <"and",      int_hexagon_A2_andp>;
+def Hexagon_A2_orp:
+  di_ALU64_didi                   <"or",       int_hexagon_A2_orp>;
+def Hexagon_A2_xorp:
+  di_ALU64_didi                   <"xor",      int_hexagon_A2_xorp>;
+
+// ALU64 / ALU / Maximum.
+def Hexagon_A2_max:
+  si_ALU64_sisi                   <"max",      int_hexagon_A2_max>;
+def Hexagon_A2_maxu:
+  si_ALU64_sisi                   <"maxu",     int_hexagon_A2_maxu>;
+
+// ALU64 / ALU / Minimum.
+def Hexagon_A2_min:
+  si_ALU64_sisi                   <"min",      int_hexagon_A2_min>;
+def Hexagon_A2_minu:
+  si_ALU64_sisi                   <"minu",     int_hexagon_A2_minu>;
+
+// ALU64 / ALU / Subtract.
+def Hexagon_A2_subp:
+  di_ALU64_didi                   <"sub",      int_hexagon_A2_subp>;
+def Hexagon_A2_subsat:
+  si_ALU64_sisi_sat               <"sub",      int_hexagon_A2_subsat>;
+
+// ALU64 / ALU / Subtract halfword.
+// Even though the definition says hl, it should be lh -
+//so DON'T change the class " si_ALU64_sisi_l16_lh " it inherits.
+def Hexagon_A2_subh_l16_hl:
+  si_ALU64_sisi_l16_lh            <"sub",      int_hexagon_A2_subh_l16_hl>;
+def Hexagon_A2_subh_l16_ll:
+  si_ALU64_sisi_l16_ll            <"sub",      int_hexagon_A2_subh_l16_ll>;
+
+def Hexagon_A2_subh_l16_sat_hl:
+  si_ALU64_sisi_l16_sat_lh        <"sub",      int_hexagon_A2_subh_l16_sat_hl>;
+def Hexagon_A2_subh_l16_sat_ll:
+  si_ALU64_sisi_l16_sat_ll        <"sub",      int_hexagon_A2_subh_l16_sat_ll>;
+
+def Hexagon_A2_subh_h16_hh:
+  si_ALU64_sisi_h16_hh            <"sub",      int_hexagon_A2_subh_h16_hh>;
+def Hexagon_A2_subh_h16_hl:
+  si_ALU64_sisi_h16_hl            <"sub",      int_hexagon_A2_subh_h16_hl>;
+def Hexagon_A2_subh_h16_lh:
+  si_ALU64_sisi_h16_lh            <"sub",      int_hexagon_A2_subh_h16_lh>;
+def Hexagon_A2_subh_h16_ll:
+  si_ALU64_sisi_h16_ll            <"sub",      int_hexagon_A2_subh_h16_ll>;
+
+def Hexagon_A2_subh_h16_sat_hh:
+  si_ALU64_sisi_h16_sat_hh        <"sub",      int_hexagon_A2_subh_h16_sat_hh>;
+def Hexagon_A2_subh_h16_sat_hl:
+  si_ALU64_sisi_h16_sat_hl        <"sub",      int_hexagon_A2_subh_h16_sat_hl>;
+def Hexagon_A2_subh_h16_sat_lh:
+  si_ALU64_sisi_h16_sat_lh        <"sub",      int_hexagon_A2_subh_h16_sat_lh>;
+def Hexagon_A2_subh_h16_sat_ll:
+  si_ALU64_sisi_h16_sat_ll        <"sub",      int_hexagon_A2_subh_h16_sat_ll>;
+
+// ALU64 / ALU / Transfer register.
+def Hexagon_A2_tfrp:
+  di_ALU64_di                     <"",         int_hexagon_A2_tfrp>;
+
+/********************************************************************
+*            ALU64/BIT                                              *
+*********************************************************************/
+
+// ALU64 / BIT / Masked parity.
+def Hexagon_S2_parityp:
+  si_ALU64_didi                   <"parity",   int_hexagon_S2_parityp>;
+
+/********************************************************************
+*            ALU64/PERM                                             *
+*********************************************************************/
+
+// ALU64 / PERM / Vector pack high and low halfwords.
+def Hexagon_S2_packhl:
+  di_ALU64_sisi                   <"packhl",   int_hexagon_S2_packhl>;
+
+/********************************************************************
+*            ALU64/VB                                               *
+*********************************************************************/
+
+// ALU64 / VB / Vector add unsigned bytes.
+def Hexagon_A2_vaddub:
+  di_ALU64_didi                   <"vaddub",   int_hexagon_A2_vaddub>;
+def Hexagon_A2_vaddubs:
+  di_ALU64_didi_sat               <"vaddub",   int_hexagon_A2_vaddubs>;
+
+// ALU64 / VB / Vector average unsigned bytes.
+def Hexagon_A2_vavgub:
+  di_ALU64_didi                   <"vavgub",   int_hexagon_A2_vavgub>;
+def Hexagon_A2_vavgubr:
+  di_ALU64_didi_rnd               <"vavgub",   int_hexagon_A2_vavgubr>;
+
+// ALU64 / VB / Vector compare unsigned bytes.
+def Hexagon_A2_vcmpbeq:
+  qi_ALU64_didi                   <"vcmpb.eq", int_hexagon_A2_vcmpbeq>;
+def Hexagon_A2_vcmpbgtu:
+  qi_ALU64_didi                   <"vcmpb.gtu",int_hexagon_A2_vcmpbgtu>;
+
+// ALU64 / VB / Vector maximum/minimum unsigned bytes.
+def Hexagon_A2_vmaxub:
+  di_ALU64_didi                   <"vmaxub",   int_hexagon_A2_vmaxub>;
+def Hexagon_A2_vminub:
+  di_ALU64_didi                   <"vminub",   int_hexagon_A2_vminub>;
+
+// ALU64 / VB / Vector subtract unsigned bytes.
+def Hexagon_A2_vsubub:
+  di_ALU64_didi                   <"vsubub",   int_hexagon_A2_vsubub>;
+def Hexagon_A2_vsububs:
+  di_ALU64_didi_sat               <"vsubub",   int_hexagon_A2_vsububs>;
+
+// ALU64 / VB / Vector mux.
+def Hexagon_C2_vmux:
+  di_ALU64_qididi                 <"vmux",     int_hexagon_C2_vmux>;
+
+
+/********************************************************************
+*            ALU64/VH                                               *
+*********************************************************************/
+
+// ALU64 / VH / Vector add halfwords.
+// Rdd64=vadd[u]h(Rss64,Rtt64:sat]
+def Hexagon_A2_vaddh:
+  di_ALU64_didi                   <"vaddh",    int_hexagon_A2_vaddh>;
+def Hexagon_A2_vaddhs:
+  di_ALU64_didi_sat               <"vaddh",    int_hexagon_A2_vaddhs>;
+def Hexagon_A2_vadduhs:
+  di_ALU64_didi_sat               <"vadduh",   int_hexagon_A2_vadduhs>;
+
+// ALU64 / VH / Vector average halfwords.
+// Rdd64=v[n]avg[u]h(Rss64,Rtt64:rnd/:crnd][:sat]
+def Hexagon_A2_vavgh:
+  di_ALU64_didi                   <"vavgh",    int_hexagon_A2_vavgh>;
+def Hexagon_A2_vavghcr:
+  di_ALU64_didi_crnd              <"vavgh",    int_hexagon_A2_vavghcr>;
+def Hexagon_A2_vavghr:
+  di_ALU64_didi_rnd               <"vavgh",    int_hexagon_A2_vavghr>;
+def Hexagon_A2_vavguh:
+  di_ALU64_didi                   <"vavguh",   int_hexagon_A2_vavguh>;
+def Hexagon_A2_vavguhr:
+  di_ALU64_didi_rnd               <"vavguh",   int_hexagon_A2_vavguhr>;
+def Hexagon_A2_vnavgh:
+  di_ALU64_didi                   <"vnavgh",   int_hexagon_A2_vnavgh>;
+def Hexagon_A2_vnavghcr:
+  di_ALU64_didi_crnd_sat          <"vnavgh",   int_hexagon_A2_vnavghcr>;
+def Hexagon_A2_vnavghr:
+  di_ALU64_didi_rnd_sat           <"vnavgh",   int_hexagon_A2_vnavghr>;
+
+// ALU64 / VH / Vector compare halfwords.
+def Hexagon_A2_vcmpheq:
+  qi_ALU64_didi                   <"vcmph.eq", int_hexagon_A2_vcmpheq>;
+def Hexagon_A2_vcmphgt:
+  qi_ALU64_didi                   <"vcmph.gt", int_hexagon_A2_vcmphgt>;
+def Hexagon_A2_vcmphgtu:
+  qi_ALU64_didi                   <"vcmph.gtu",int_hexagon_A2_vcmphgtu>;
+
+// ALU64 / VH / Vector maximum halfwords.
+def Hexagon_A2_vmaxh:
+  di_ALU64_didi                   <"vmaxh",    int_hexagon_A2_vmaxh>;
+def Hexagon_A2_vmaxuh:
+  di_ALU64_didi                   <"vmaxuh",   int_hexagon_A2_vmaxuh>;
+
+// ALU64 / VH / Vector minimum halfwords.
+def Hexagon_A2_vminh:
+  di_ALU64_didi                   <"vminh",    int_hexagon_A2_vminh>;
+def Hexagon_A2_vminuh:
+  di_ALU64_didi                   <"vminuh",   int_hexagon_A2_vminuh>;
+
+// ALU64 / VH / Vector subtract halfwords.
+def Hexagon_A2_vsubh:
+  di_ALU64_didi                   <"vsubh",    int_hexagon_A2_vsubh>;
+def Hexagon_A2_vsubhs:
+  di_ALU64_didi_sat               <"vsubh",    int_hexagon_A2_vsubhs>;
+def Hexagon_A2_vsubuhs:
+  di_ALU64_didi_sat               <"vsubuh",   int_hexagon_A2_vsubuhs>;
+
+
+/********************************************************************
+*            ALU64/VW                                               *
+*********************************************************************/
+
+// ALU64 / VW / Vector add words.
+// Rdd32=vaddw(Rss32,Rtt32)[:sat]
+def Hexagon_A2_vaddw:
+  di_ALU64_didi                   <"vaddw",    int_hexagon_A2_vaddw>;
+def Hexagon_A2_vaddws:
+  di_ALU64_didi_sat               <"vaddw",   int_hexagon_A2_vaddws>;
+
+// ALU64 / VW / Vector average words.
+def Hexagon_A2_vavguw:
+  di_ALU64_didi                   <"vavguw",   int_hexagon_A2_vavguw>;
+def Hexagon_A2_vavguwr:
+  di_ALU64_didi_rnd               <"vavguw",   int_hexagon_A2_vavguwr>;
+def Hexagon_A2_vavgw:
+  di_ALU64_didi                   <"vavgw",    int_hexagon_A2_vavgw>;
+def Hexagon_A2_vavgwcr:
+  di_ALU64_didi_crnd              <"vavgw",    int_hexagon_A2_vavgwcr>;
+def Hexagon_A2_vavgwr:
+  di_ALU64_didi_rnd               <"vavgw",    int_hexagon_A2_vavgwr>;
+def Hexagon_A2_vnavgw:
+  di_ALU64_didi                   <"vnavgw",   int_hexagon_A2_vnavgw>;
+def Hexagon_A2_vnavgwcr:
+  di_ALU64_didi_crnd_sat          <"vnavgw",   int_hexagon_A2_vnavgwcr>;
+def Hexagon_A2_vnavgwr:
+  di_ALU64_didi_rnd_sat           <"vnavgw",   int_hexagon_A2_vnavgwr>;
+
+// ALU64 / VW / Vector compare words.
+def Hexagon_A2_vcmpweq:
+  qi_ALU64_didi                   <"vcmpw.eq", int_hexagon_A2_vcmpweq>;
+def Hexagon_A2_vcmpwgt:
+  qi_ALU64_didi                   <"vcmpw.gt", int_hexagon_A2_vcmpwgt>;
+def Hexagon_A2_vcmpwgtu:
+  qi_ALU64_didi                   <"vcmpw.gtu",int_hexagon_A2_vcmpwgtu>;
+
+// ALU64 / VW / Vector maximum words.
+def Hexagon_A2_vmaxw:
+  di_ALU64_didi                   <"vmaxw",    int_hexagon_A2_vmaxw>;
+def Hexagon_A2_vmaxuw:
+  di_ALU64_didi                   <"vmaxuw",   int_hexagon_A2_vmaxuw>;
+
+// ALU64 / VW / Vector minimum words.
+def Hexagon_A2_vminw:
+  di_ALU64_didi                   <"vminw",    int_hexagon_A2_vminw>;
+def Hexagon_A2_vminuw:
+  di_ALU64_didi                   <"vminuw",   int_hexagon_A2_vminuw>;
+
+// ALU64 / VW / Vector subtract words.
+def Hexagon_A2_vsubw:
+  di_ALU64_didi                   <"vsubw",    int_hexagon_A2_vsubw>;
+def Hexagon_A2_vsubws:
+  di_ALU64_didi_sat               <"vsubw",    int_hexagon_A2_vsubws>;
+
+
+/********************************************************************
+*            CR                                                     *
+*********************************************************************/
+
+// CR / Logical reductions on predicates.
+def Hexagon_C2_all8:
+  qi_SInst_qi                     <"all8",     int_hexagon_C2_all8>;
+def Hexagon_C2_any8:
+  qi_SInst_qi                     <"any8",     int_hexagon_C2_any8>;
+
+// CR / Logical operations on predicates.
+def Hexagon_C2_pxfer_map:
+  qi_SInst_qi_pxfer               <"",         int_hexagon_C2_pxfer_map>;
+def Hexagon_C2_and:
+  qi_SInst_qiqi                   <"and",      int_hexagon_C2_and>;
+def Hexagon_C2_andn:
+  qi_SInst_qiqi_neg               <"and",      int_hexagon_C2_andn>;
+def Hexagon_C2_not:
+  qi_SInst_qi                     <"not",      int_hexagon_C2_not>;
+def Hexagon_C2_or:
+  qi_SInst_qiqi                   <"or",       int_hexagon_C2_or>;
+def Hexagon_C2_orn:
+  qi_SInst_qiqi_neg               <"or",       int_hexagon_C2_orn>;
+def Hexagon_C2_xor:
+  qi_SInst_qiqi                   <"xor",      int_hexagon_C2_xor>;
+
+
+/********************************************************************
+*            MTYPE/ALU                                              *
+*********************************************************************/
+
+// MTYPE / ALU / Add and accumulate.
+def Hexagon_M2_acci:
+  si_MInst_sisisi_acc             <"add",      int_hexagon_M2_acci>;
+def Hexagon_M2_accii:
+  si_MInst_sisis8_acc             <"add",      int_hexagon_M2_accii>;
+def Hexagon_M2_nacci:
+  si_MInst_sisisi_nac             <"add",      int_hexagon_M2_nacci>;
+def Hexagon_M2_naccii:
+  si_MInst_sisis8_nac             <"add",      int_hexagon_M2_naccii>;
+
+// MTYPE / ALU / Subtract and accumulate.
+def Hexagon_M2_subacc:
+  si_MInst_sisisi_acc             <"sub",      int_hexagon_M2_subacc>;
+
+// MTYPE / ALU / Vector absolute difference.
+def Hexagon_M2_vabsdiffh:
+  di_MInst_didi                   <"vabsdiffh",int_hexagon_M2_vabsdiffh>;
+def Hexagon_M2_vabsdiffw:
+  di_MInst_didi                   <"vabsdiffw",int_hexagon_M2_vabsdiffw>;
+
+// MTYPE / ALU / XOR and xor with destination.
+def Hexagon_M2_xor_xacc:
+  si_MInst_sisisi_xacc            <"xor",      int_hexagon_M2_xor_xacc>;
+
+
+/********************************************************************
+*            MTYPE/COMPLEX                                          *
+*********************************************************************/
+
+// MTYPE / COMPLEX / Complex multiply.
+// Rdd[-+]=cmpy(Rs, Rt:<<1]:sat
+def Hexagon_M2_cmpys_s1:
+  di_MInst_sisi_s1_sat            <"cmpy",     int_hexagon_M2_cmpys_s1>;
+def Hexagon_M2_cmpys_s0:
+  di_MInst_sisi_sat               <"cmpy",     int_hexagon_M2_cmpys_s0>;
+def Hexagon_M2_cmpysc_s1:
+  di_MInst_sisi_s1_sat_conj       <"cmpy",     int_hexagon_M2_cmpysc_s1>;
+def Hexagon_M2_cmpysc_s0:
+  di_MInst_sisi_sat_conj          <"cmpy",     int_hexagon_M2_cmpysc_s0>;
+
+def Hexagon_M2_cmacs_s1:
+  di_MInst_disisi_acc_s1_sat      <"cmpy",     int_hexagon_M2_cmacs_s1>;
+def Hexagon_M2_cmacs_s0:
+  di_MInst_disisi_acc_sat         <"cmpy",     int_hexagon_M2_cmacs_s0>;
+def Hexagon_M2_cmacsc_s1:
+  di_MInst_disisi_acc_s1_sat_conj <"cmpy",     int_hexagon_M2_cmacsc_s1>;
+def Hexagon_M2_cmacsc_s0:
+  di_MInst_disisi_acc_sat_conj    <"cmpy",     int_hexagon_M2_cmacsc_s0>;
+
+def Hexagon_M2_cnacs_s1:
+  di_MInst_disisi_nac_s1_sat      <"cmpy",     int_hexagon_M2_cnacs_s1>;
+def Hexagon_M2_cnacs_s0:
+  di_MInst_disisi_nac_sat         <"cmpy",     int_hexagon_M2_cnacs_s0>;
+def Hexagon_M2_cnacsc_s1:
+  di_MInst_disisi_nac_s1_sat_conj <"cmpy",     int_hexagon_M2_cnacsc_s1>;
+def Hexagon_M2_cnacsc_s0:
+  di_MInst_disisi_nac_sat_conj    <"cmpy",     int_hexagon_M2_cnacsc_s0>;
+
+// MTYPE / COMPLEX / Complex multiply real or imaginary.
+def Hexagon_M2_cmpyr_s0:
+  di_MInst_sisi                   <"cmpyr",    int_hexagon_M2_cmpyr_s0>;
+def Hexagon_M2_cmacr_s0:
+  di_MInst_disisi_acc             <"cmpyr",    int_hexagon_M2_cmacr_s0>;
+
+def Hexagon_M2_cmpyi_s0:
+  di_MInst_sisi                   <"cmpyi",    int_hexagon_M2_cmpyi_s0>;
+def Hexagon_M2_cmaci_s0:
+  di_MInst_disisi_acc             <"cmpyi",    int_hexagon_M2_cmaci_s0>;
+
+// MTYPE / COMPLEX / Complex multiply with round and pack.
+// Rxx32+=cmpy(Rs32,[*]Rt32:<<1]:rnd:sat
+def Hexagon_M2_cmpyrs_s0:
+  si_MInst_sisi_rnd_sat           <"cmpy",     int_hexagon_M2_cmpyrs_s0>;
+def Hexagon_M2_cmpyrs_s1:
+  si_MInst_sisi_s1_rnd_sat        <"cmpy",     int_hexagon_M2_cmpyrs_s1>;
+
+def Hexagon_M2_cmpyrsc_s0:
+  si_MInst_sisi_rnd_sat_conj      <"cmpy",     int_hexagon_M2_cmpyrsc_s0>;
+def Hexagon_M2_cmpyrsc_s1:
+  si_MInst_sisi_s1_rnd_sat_conj   <"cmpy",     int_hexagon_M2_cmpyrsc_s1>;
+
+//MTYPE / COMPLEX / Vector complex multiply real or imaginary.
+def Hexagon_M2_vcmpy_s0_sat_i:
+  di_MInst_didi_sat               <"vcmpyi",   int_hexagon_M2_vcmpy_s0_sat_i>;
+def Hexagon_M2_vcmpy_s1_sat_i:
+  di_MInst_didi_s1_sat            <"vcmpyi",   int_hexagon_M2_vcmpy_s1_sat_i>;
+
+def Hexagon_M2_vcmpy_s0_sat_r:
+  di_MInst_didi_sat               <"vcmpyr",   int_hexagon_M2_vcmpy_s0_sat_r>;
+def Hexagon_M2_vcmpy_s1_sat_r:
+  di_MInst_didi_s1_sat            <"vcmpyr",   int_hexagon_M2_vcmpy_s1_sat_r>;
+
+def Hexagon_M2_vcmac_s0_sat_i:
+  di_MInst_dididi_acc_sat         <"vcmpyi",   int_hexagon_M2_vcmac_s0_sat_i>;
+def Hexagon_M2_vcmac_s0_sat_r:
+  di_MInst_dididi_acc_sat         <"vcmpyr",   int_hexagon_M2_vcmac_s0_sat_r>;
+
+//MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpyi_s0:
+  di_MInst_didi                   <"vrcmpyi",  int_hexagon_M2_vrcmpyi_s0>;
+def Hexagon_M2_vrcmpyr_s0:
+  di_MInst_didi                   <"vrcmpyr",  int_hexagon_M2_vrcmpyr_s0>;
+
+def Hexagon_M2_vrcmpyi_s0c:
+  di_MInst_didi_conj              <"vrcmpyi",  int_hexagon_M2_vrcmpyi_s0c>;
+def Hexagon_M2_vrcmpyr_s0c:
+  di_MInst_didi_conj              <"vrcmpyr",  int_hexagon_M2_vrcmpyr_s0c>;
+
+def Hexagon_M2_vrcmaci_s0:
+  di_MInst_dididi_acc             <"vrcmpyi",  int_hexagon_M2_vrcmaci_s0>;
+def Hexagon_M2_vrcmacr_s0:
+  di_MInst_dididi_acc             <"vrcmpyr",  int_hexagon_M2_vrcmacr_s0>;
+
+def Hexagon_M2_vrcmaci_s0c:
+  di_MInst_dididi_acc_conj        <"vrcmpyi",  int_hexagon_M2_vrcmaci_s0c>;
+def Hexagon_M2_vrcmacr_s0c:
+  di_MInst_dididi_acc_conj        <"vrcmpyr",  int_hexagon_M2_vrcmacr_s0c>;
+
+
+/********************************************************************
+*            MTYPE/MPYH                                             *
+*********************************************************************/
+
+// MTYPE / MPYH / Multiply and use lower result.
+//def Hexagon_M2_mpysmi:
+//  si_MInst_sim9                   <"mpyi",     int_hexagon_M2_mpysmi>;
+def Hexagon_M2_mpyi:
+  si_MInst_sisi                   <"mpyi",     int_hexagon_M2_mpyi>;
+def Hexagon_M2_mpyui:
+  si_MInst_sisi                   <"mpyui",    int_hexagon_M2_mpyui>;
+def Hexagon_M2_macsip:
+  si_MInst_sisiu8_acc             <"mpyi",     int_hexagon_M2_macsip>;
+def Hexagon_M2_maci:
+  si_MInst_sisisi_acc             <"mpyi",     int_hexagon_M2_maci>;
+def Hexagon_M2_macsin:
+  si_MInst_sisiu8_nac             <"mpyi",     int_hexagon_M2_macsin>;
+
+// MTYPE / MPYH / Multiply word by half (32x16).
+//Rdd[+]=vmpywoh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweh(Rss,Rtt)[:<<1][:rnd][:sat]
+def Hexagon_M2_mmpyl_rs1:
+  di_MInst_didi_s1_rnd_sat        <"vmpyweh",  int_hexagon_M2_mmpyl_rs1>;
+def Hexagon_M2_mmpyl_s1:
+  di_MInst_didi_s1_sat            <"vmpyweh",  int_hexagon_M2_mmpyl_s1>;
+def Hexagon_M2_mmpyl_rs0:
+  di_MInst_didi_rnd_sat           <"vmpyweh",  int_hexagon_M2_mmpyl_rs0>;
+def Hexagon_M2_mmpyl_s0:
+  di_MInst_didi_sat               <"vmpyweh",  int_hexagon_M2_mmpyl_s0>;
+def Hexagon_M2_mmpyh_rs1:
+  di_MInst_didi_s1_rnd_sat        <"vmpywoh",  int_hexagon_M2_mmpyh_rs1>;
+def Hexagon_M2_mmpyh_s1:
+  di_MInst_didi_s1_sat            <"vmpywoh",  int_hexagon_M2_mmpyh_s1>;
+def Hexagon_M2_mmpyh_rs0:
+  di_MInst_didi_rnd_sat           <"vmpywoh",  int_hexagon_M2_mmpyh_rs0>;
+def Hexagon_M2_mmpyh_s0:
+  di_MInst_didi_sat               <"vmpywoh",  int_hexagon_M2_mmpyh_s0>;
+def Hexagon_M2_mmacls_rs1:
+  di_MInst_dididi_acc_s1_rnd_sat  <"vmpyweh",  int_hexagon_M2_mmacls_rs1>;
+def Hexagon_M2_mmacls_s1:
+  di_MInst_dididi_acc_s1_sat      <"vmpyweh",  int_hexagon_M2_mmacls_s1>;
+def Hexagon_M2_mmacls_rs0:
+  di_MInst_dididi_acc_rnd_sat     <"vmpyweh",  int_hexagon_M2_mmacls_rs0>;
+def Hexagon_M2_mmacls_s0:
+  di_MInst_dididi_acc_sat         <"vmpyweh",  int_hexagon_M2_mmacls_s0>;
+def Hexagon_M2_mmachs_rs1:
+  di_MInst_dididi_acc_s1_rnd_sat  <"vmpywoh",  int_hexagon_M2_mmachs_rs1>;
+def Hexagon_M2_mmachs_s1:
+  di_MInst_dididi_acc_s1_sat      <"vmpywoh",  int_hexagon_M2_mmachs_s1>;
+def Hexagon_M2_mmachs_rs0:
+  di_MInst_dididi_acc_rnd_sat     <"vmpywoh",  int_hexagon_M2_mmachs_rs0>;
+def Hexagon_M2_mmachs_s0:
+  di_MInst_dididi_acc_sat         <"vmpywoh",  int_hexagon_M2_mmachs_s0>;
+
+// MTYPE / MPYH / Multiply word by unsigned half (32x16).
+//Rdd[+]=vmpywouh(Rss,Rtt)[:<<1][:rnd][:sat]
+//Rdd[+]=vmpyweuh(Rss,Rtt)[:<<1][:rnd][:sat]
+def Hexagon_M2_mmpyul_rs1:
+  di_MInst_didi_s1_rnd_sat        <"vmpyweuh", int_hexagon_M2_mmpyul_rs1>;
+def Hexagon_M2_mmpyul_s1:
+  di_MInst_didi_s1_sat            <"vmpyweuh", int_hexagon_M2_mmpyul_s1>;
+def Hexagon_M2_mmpyul_rs0:
+  di_MInst_didi_rnd_sat           <"vmpyweuh", int_hexagon_M2_mmpyul_rs0>;
+def Hexagon_M2_mmpyul_s0:
+  di_MInst_didi_sat               <"vmpyweuh", int_hexagon_M2_mmpyul_s0>;
+def Hexagon_M2_mmpyuh_rs1:
+  di_MInst_didi_s1_rnd_sat        <"vmpywouh", int_hexagon_M2_mmpyuh_rs1>;
+def Hexagon_M2_mmpyuh_s1:
+  di_MInst_didi_s1_sat            <"vmpywouh", int_hexagon_M2_mmpyuh_s1>;
+def Hexagon_M2_mmpyuh_rs0:
+  di_MInst_didi_rnd_sat           <"vmpywouh", int_hexagon_M2_mmpyuh_rs0>;
+def Hexagon_M2_mmpyuh_s0:
+  di_MInst_didi_sat               <"vmpywouh", int_hexagon_M2_mmpyuh_s0>;
+def Hexagon_M2_mmaculs_rs1:
+  di_MInst_dididi_acc_s1_rnd_sat  <"vmpyweuh", int_hexagon_M2_mmaculs_rs1>;
+def Hexagon_M2_mmaculs_s1:
+  di_MInst_dididi_acc_s1_sat      <"vmpyweuh", int_hexagon_M2_mmaculs_s1>;
+def Hexagon_M2_mmaculs_rs0:
+  di_MInst_dididi_acc_rnd_sat     <"vmpyweuh", int_hexagon_M2_mmaculs_rs0>;
+def Hexagon_M2_mmaculs_s0:
+  di_MInst_dididi_acc_sat         <"vmpyweuh", int_hexagon_M2_mmaculs_s0>;
+def Hexagon_M2_mmacuhs_rs1:
+  di_MInst_dididi_acc_s1_rnd_sat  <"vmpywouh", int_hexagon_M2_mmacuhs_rs1>;
+def Hexagon_M2_mmacuhs_s1:
+  di_MInst_dididi_acc_s1_sat      <"vmpywouh", int_hexagon_M2_mmacuhs_s1>;
+def Hexagon_M2_mmacuhs_rs0:
+  di_MInst_dididi_acc_rnd_sat     <"vmpywouh", int_hexagon_M2_mmacuhs_rs0>;
+def Hexagon_M2_mmacuhs_s0:
+  di_MInst_dididi_acc_sat         <"vmpywouh", int_hexagon_M2_mmacuhs_s0>;
+
+// MTYPE / MPYH / Multiply and use upper result.
+def Hexagon_M2_hmmpyh_rs1:
+  si_MInst_sisi_h_s1_rnd_sat      <"mpy",      int_hexagon_M2_hmmpyh_rs1>;
+def Hexagon_M2_hmmpyl_rs1:
+  si_MInst_sisi_l_s1_rnd_sat      <"mpy",      int_hexagon_M2_hmmpyl_rs1>;
+def Hexagon_M2_mpy_up:
+  si_MInst_sisi                   <"mpy",      int_hexagon_M2_mpy_up>;
+def Hexagon_M2_dpmpyss_rnd_s0:
+  si_MInst_sisi_rnd               <"mpy",      int_hexagon_M2_dpmpyss_rnd_s0>;
+def Hexagon_M2_mpyu_up:
+  si_MInst_sisi                   <"mpyu",     int_hexagon_M2_mpyu_up>;
+
+// MTYPE / MPYH / Multiply and use full result.
+def Hexagon_M2_dpmpyuu_s0:
+  di_MInst_sisi                   <"mpyu",     int_hexagon_M2_dpmpyuu_s0>;
+def Hexagon_M2_dpmpyuu_acc_s0:
+  di_MInst_disisi_acc             <"mpyu",     int_hexagon_M2_dpmpyuu_acc_s0>;
+def Hexagon_M2_dpmpyuu_nac_s0:
+  di_MInst_disisi_nac             <"mpyu",     int_hexagon_M2_dpmpyuu_nac_s0>;
+def Hexagon_M2_dpmpyss_s0:
+  di_MInst_sisi                   <"mpy",      int_hexagon_M2_dpmpyss_s0>;
+def Hexagon_M2_dpmpyss_acc_s0:
+  di_MInst_disisi_acc             <"mpy",      int_hexagon_M2_dpmpyss_acc_s0>;
+def Hexagon_M2_dpmpyss_nac_s0:
+  di_MInst_disisi_nac             <"mpy",      int_hexagon_M2_dpmpyss_nac_s0>;
+
+
+/********************************************************************
+*            MTYPE/MPYS                                             *
+*********************************************************************/
+
+// MTYPE / MPYS / Scalar 16x16 multiply signed.
+//Rd=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]|
+//          [:<<0[:rnd|:sat|:rnd:sat]|:<<1[:rnd|:sat|:rnd:sat]]]
+def Hexagon_M2_mpy_hh_s0:
+  si_MInst_sisi_hh                <"mpy",     int_hexagon_M2_mpy_hh_s0>;
+def Hexagon_M2_mpy_hh_s1:
+  si_MInst_sisi_hh_s1             <"mpy",     int_hexagon_M2_mpy_hh_s1>;
+def Hexagon_M2_mpy_rnd_hh_s1:
+  si_MInst_sisi_rnd_hh_s1         <"mpy",     int_hexagon_M2_mpy_rnd_hh_s1>;
+def Hexagon_M2_mpy_sat_rnd_hh_s1:
+  si_MInst_sisi_sat_rnd_hh_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_hh_s1>;
+def Hexagon_M2_mpy_sat_hh_s1:
+  si_MInst_sisi_sat_hh_s1         <"mpy",     int_hexagon_M2_mpy_sat_hh_s1>;
+def Hexagon_M2_mpy_rnd_hh_s0:
+  si_MInst_sisi_rnd_hh            <"mpy",     int_hexagon_M2_mpy_rnd_hh_s0>;
+def Hexagon_M2_mpy_sat_rnd_hh_s0:
+  si_MInst_sisi_sat_rnd_hh        <"mpy",     int_hexagon_M2_mpy_sat_rnd_hh_s0>;
+def Hexagon_M2_mpy_sat_hh_s0:
+  si_MInst_sisi_sat_hh            <"mpy",     int_hexagon_M2_mpy_sat_hh_s0>;
+
+def Hexagon_M2_mpy_hl_s0:
+  si_MInst_sisi_hl                <"mpy",     int_hexagon_M2_mpy_hl_s0>;
+def Hexagon_M2_mpy_hl_s1:
+  si_MInst_sisi_hl_s1             <"mpy",     int_hexagon_M2_mpy_hl_s1>;
+def Hexagon_M2_mpy_rnd_hl_s1:
+  si_MInst_sisi_rnd_hl_s1         <"mpy",     int_hexagon_M2_mpy_rnd_hl_s1>;
+def Hexagon_M2_mpy_sat_rnd_hl_s1:
+  si_MInst_sisi_sat_rnd_hl_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_hl_s1>;
+def Hexagon_M2_mpy_sat_hl_s1:
+  si_MInst_sisi_sat_hl_s1         <"mpy",     int_hexagon_M2_mpy_sat_hl_s1>;
+def Hexagon_M2_mpy_rnd_hl_s0:
+  si_MInst_sisi_rnd_hl            <"mpy",     int_hexagon_M2_mpy_rnd_hl_s0>;
+def Hexagon_M2_mpy_sat_rnd_hl_s0:
+  si_MInst_sisi_sat_rnd_hl        <"mpy",     int_hexagon_M2_mpy_sat_rnd_hl_s0>;
+def Hexagon_M2_mpy_sat_hl_s0:
+  si_MInst_sisi_sat_hl            <"mpy",     int_hexagon_M2_mpy_sat_hl_s0>;
+
+def Hexagon_M2_mpy_lh_s0:
+  si_MInst_sisi_lh                <"mpy",     int_hexagon_M2_mpy_lh_s0>;
+def Hexagon_M2_mpy_lh_s1:
+  si_MInst_sisi_lh_s1             <"mpy",     int_hexagon_M2_mpy_lh_s1>;
+def Hexagon_M2_mpy_rnd_lh_s1:
+  si_MInst_sisi_rnd_lh_s1         <"mpy",     int_hexagon_M2_mpy_rnd_lh_s1>;
+def Hexagon_M2_mpy_sat_rnd_lh_s1:
+  si_MInst_sisi_sat_rnd_lh_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_lh_s1>;
+def Hexagon_M2_mpy_sat_lh_s1:
+  si_MInst_sisi_sat_lh_s1         <"mpy",     int_hexagon_M2_mpy_sat_lh_s1>;
+def Hexagon_M2_mpy_rnd_lh_s0:
+  si_MInst_sisi_rnd_lh            <"mpy",     int_hexagon_M2_mpy_rnd_lh_s0>;
+def Hexagon_M2_mpy_sat_rnd_lh_s0:
+  si_MInst_sisi_sat_rnd_lh        <"mpy",     int_hexagon_M2_mpy_sat_rnd_lh_s0>;
+def Hexagon_M2_mpy_sat_lh_s0:
+  si_MInst_sisi_sat_lh            <"mpy",     int_hexagon_M2_mpy_sat_lh_s0>;
+
+def Hexagon_M2_mpy_ll_s0:
+  si_MInst_sisi_ll                <"mpy",     int_hexagon_M2_mpy_ll_s0>;
+def Hexagon_M2_mpy_ll_s1:
+  si_MInst_sisi_ll_s1             <"mpy",     int_hexagon_M2_mpy_ll_s1>;
+def Hexagon_M2_mpy_rnd_ll_s1:
+  si_MInst_sisi_rnd_ll_s1         <"mpy",     int_hexagon_M2_mpy_rnd_ll_s1>;
+def Hexagon_M2_mpy_sat_rnd_ll_s1:
+  si_MInst_sisi_sat_rnd_ll_s1     <"mpy",     int_hexagon_M2_mpy_sat_rnd_ll_s1>;
+def Hexagon_M2_mpy_sat_ll_s1:
+  si_MInst_sisi_sat_ll_s1         <"mpy",     int_hexagon_M2_mpy_sat_ll_s1>;
+def Hexagon_M2_mpy_rnd_ll_s0:
+  si_MInst_sisi_rnd_ll            <"mpy",     int_hexagon_M2_mpy_rnd_ll_s0>;
+def Hexagon_M2_mpy_sat_rnd_ll_s0:
+  si_MInst_sisi_sat_rnd_ll        <"mpy",     int_hexagon_M2_mpy_sat_rnd_ll_s0>;
+def Hexagon_M2_mpy_sat_ll_s0:
+  si_MInst_sisi_sat_ll            <"mpy",     int_hexagon_M2_mpy_sat_ll_s0>;
+
+//Rdd=mpy(Rs.[H|L],Rt.[H|L])[[:<<0|:<<1]|[:<<0:rnd|:<<1:rnd]]
+def Hexagon_M2_mpyd_hh_s0:
+  di_MInst_sisi_hh                <"mpy",     int_hexagon_M2_mpyd_hh_s0>;
+def Hexagon_M2_mpyd_hh_s1:
+  di_MInst_sisi_hh_s1             <"mpy",     int_hexagon_M2_mpyd_hh_s1>;
+def Hexagon_M2_mpyd_rnd_hh_s1:
+  di_MInst_sisi_rnd_hh_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_hh_s1>;
+def Hexagon_M2_mpyd_rnd_hh_s0:
+  di_MInst_sisi_rnd_hh            <"mpy",     int_hexagon_M2_mpyd_rnd_hh_s0>;
+
+def Hexagon_M2_mpyd_hl_s0:
+  di_MInst_sisi_hl                <"mpy",     int_hexagon_M2_mpyd_hl_s0>;
+def Hexagon_M2_mpyd_hl_s1:
+  di_MInst_sisi_hl_s1             <"mpy",     int_hexagon_M2_mpyd_hl_s1>;
+def Hexagon_M2_mpyd_rnd_hl_s1:
+  di_MInst_sisi_rnd_hl_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_hl_s1>;
+def Hexagon_M2_mpyd_rnd_hl_s0:
+  di_MInst_sisi_rnd_hl            <"mpy",     int_hexagon_M2_mpyd_rnd_hl_s0>;
+
+def Hexagon_M2_mpyd_lh_s0:
+  di_MInst_sisi_lh                <"mpy",     int_hexagon_M2_mpyd_lh_s0>;
+def Hexagon_M2_mpyd_lh_s1:
+  di_MInst_sisi_lh_s1             <"mpy",     int_hexagon_M2_mpyd_lh_s1>;
+def Hexagon_M2_mpyd_rnd_lh_s1:
+  di_MInst_sisi_rnd_lh_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_lh_s1>;
+def Hexagon_M2_mpyd_rnd_lh_s0:
+  di_MInst_sisi_rnd_lh            <"mpy",     int_hexagon_M2_mpyd_rnd_lh_s0>;
+
+def Hexagon_M2_mpyd_ll_s0:
+  di_MInst_sisi_ll                <"mpy",     int_hexagon_M2_mpyd_ll_s0>;
+def Hexagon_M2_mpyd_ll_s1:
+  di_MInst_sisi_ll_s1             <"mpy",     int_hexagon_M2_mpyd_ll_s1>;
+def Hexagon_M2_mpyd_rnd_ll_s1:
+  di_MInst_sisi_rnd_ll_s1         <"mpy",     int_hexagon_M2_mpyd_rnd_ll_s1>;
+def Hexagon_M2_mpyd_rnd_ll_s0:
+  di_MInst_sisi_rnd_ll            <"mpy",     int_hexagon_M2_mpyd_rnd_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def Hexagon_M2_mpy_acc_hh_s0:
+  si_MInst_sisisi_acc_hh            <"mpy",  int_hexagon_M2_mpy_acc_hh_s0>;
+def Hexagon_M2_mpy_acc_hh_s1:
+  si_MInst_sisisi_acc_hh_s1         <"mpy",  int_hexagon_M2_mpy_acc_hh_s1>;
+def Hexagon_M2_mpy_acc_sat_hh_s1:
+  si_MInst_sisisi_acc_sat_hh_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_hh_s1>;
+def Hexagon_M2_mpy_acc_sat_hh_s0:
+  si_MInst_sisisi_acc_sat_hh        <"mpy",  int_hexagon_M2_mpy_acc_sat_hh_s0>;
+
+def Hexagon_M2_mpy_acc_hl_s0:
+  si_MInst_sisisi_acc_hl            <"mpy",  int_hexagon_M2_mpy_acc_hl_s0>;
+def Hexagon_M2_mpy_acc_hl_s1:
+  si_MInst_sisisi_acc_hl_s1         <"mpy",  int_hexagon_M2_mpy_acc_hl_s1>;
+def Hexagon_M2_mpy_acc_sat_hl_s1:
+  si_MInst_sisisi_acc_sat_hl_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_hl_s1>;
+def Hexagon_M2_mpy_acc_sat_hl_s0:
+  si_MInst_sisisi_acc_sat_hl        <"mpy",  int_hexagon_M2_mpy_acc_sat_hl_s0>;
+
+def Hexagon_M2_mpy_acc_lh_s0:
+  si_MInst_sisisi_acc_lh            <"mpy",  int_hexagon_M2_mpy_acc_lh_s0>;
+def Hexagon_M2_mpy_acc_lh_s1:
+  si_MInst_sisisi_acc_lh_s1         <"mpy",  int_hexagon_M2_mpy_acc_lh_s1>;
+def Hexagon_M2_mpy_acc_sat_lh_s1:
+  si_MInst_sisisi_acc_sat_lh_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_lh_s1>;
+def Hexagon_M2_mpy_acc_sat_lh_s0:
+  si_MInst_sisisi_acc_sat_lh        <"mpy",  int_hexagon_M2_mpy_acc_sat_lh_s0>;
+
+def Hexagon_M2_mpy_acc_ll_s0:
+  si_MInst_sisisi_acc_ll            <"mpy",  int_hexagon_M2_mpy_acc_ll_s0>;
+def Hexagon_M2_mpy_acc_ll_s1:
+  si_MInst_sisisi_acc_ll_s1         <"mpy",  int_hexagon_M2_mpy_acc_ll_s1>;
+def Hexagon_M2_mpy_acc_sat_ll_s1:
+  si_MInst_sisisi_acc_sat_ll_s1     <"mpy",  int_hexagon_M2_mpy_acc_sat_ll_s1>;
+def Hexagon_M2_mpy_acc_sat_ll_s0:
+  si_MInst_sisisi_acc_sat_ll        <"mpy",  int_hexagon_M2_mpy_acc_sat_ll_s0>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L])[[[:<<0|:<<1]|[:<<0:sat|:<<1:sat]]
+def Hexagon_M2_mpy_nac_hh_s0:
+  si_MInst_sisisi_nac_hh            <"mpy",  int_hexagon_M2_mpy_nac_hh_s0>;
+def Hexagon_M2_mpy_nac_hh_s1:
+  si_MInst_sisisi_nac_hh_s1         <"mpy",  int_hexagon_M2_mpy_nac_hh_s1>;
+def Hexagon_M2_mpy_nac_sat_hh_s1:
+  si_MInst_sisisi_nac_sat_hh_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_hh_s1>;
+def Hexagon_M2_mpy_nac_sat_hh_s0:
+  si_MInst_sisisi_nac_sat_hh        <"mpy",  int_hexagon_M2_mpy_nac_sat_hh_s0>;
+
+def Hexagon_M2_mpy_nac_hl_s0:
+  si_MInst_sisisi_nac_hl            <"mpy",  int_hexagon_M2_mpy_nac_hl_s0>;
+def Hexagon_M2_mpy_nac_hl_s1:
+  si_MInst_sisisi_nac_hl_s1         <"mpy",  int_hexagon_M2_mpy_nac_hl_s1>;
+def Hexagon_M2_mpy_nac_sat_hl_s1:
+  si_MInst_sisisi_nac_sat_hl_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_hl_s1>;
+def Hexagon_M2_mpy_nac_sat_hl_s0:
+  si_MInst_sisisi_nac_sat_hl        <"mpy",  int_hexagon_M2_mpy_nac_sat_hl_s0>;
+
+def Hexagon_M2_mpy_nac_lh_s0:
+  si_MInst_sisisi_nac_lh            <"mpy",  int_hexagon_M2_mpy_nac_lh_s0>;
+def Hexagon_M2_mpy_nac_lh_s1:
+  si_MInst_sisisi_nac_lh_s1         <"mpy",  int_hexagon_M2_mpy_nac_lh_s1>;
+def Hexagon_M2_mpy_nac_sat_lh_s1:
+  si_MInst_sisisi_nac_sat_lh_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_lh_s1>;
+def Hexagon_M2_mpy_nac_sat_lh_s0:
+  si_MInst_sisisi_nac_sat_lh        <"mpy",  int_hexagon_M2_mpy_nac_sat_lh_s0>;
+
+def Hexagon_M2_mpy_nac_ll_s0:
+  si_MInst_sisisi_nac_ll            <"mpy",  int_hexagon_M2_mpy_nac_ll_s0>;
+def Hexagon_M2_mpy_nac_ll_s1:
+  si_MInst_sisisi_nac_ll_s1         <"mpy",  int_hexagon_M2_mpy_nac_ll_s1>;
+def Hexagon_M2_mpy_nac_sat_ll_s1:
+  si_MInst_sisisi_nac_sat_ll_s1     <"mpy",  int_hexagon_M2_mpy_nac_sat_ll_s1>;
+def Hexagon_M2_mpy_nac_sat_ll_s0:
+  si_MInst_sisisi_nac_sat_ll        <"mpy",  int_hexagon_M2_mpy_nac_sat_ll_s0>;
+
+//Rx+=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def Hexagon_M2_mpyd_acc_hh_s0:
+  di_MInst_disisi_acc_hh          <"mpy",    int_hexagon_M2_mpyd_acc_hh_s0>;
+def Hexagon_M2_mpyd_acc_hh_s1:
+  di_MInst_disisi_acc_hh_s1       <"mpy",    int_hexagon_M2_mpyd_acc_hh_s1>;
+
+def Hexagon_M2_mpyd_acc_hl_s0:
+  di_MInst_disisi_acc_hl          <"mpy",    int_hexagon_M2_mpyd_acc_hl_s0>;
+def Hexagon_M2_mpyd_acc_hl_s1:
+  di_MInst_disisi_acc_hl_s1       <"mpy",    int_hexagon_M2_mpyd_acc_hl_s1>;
+
+def Hexagon_M2_mpyd_acc_lh_s0:
+  di_MInst_disisi_acc_lh          <"mpy",    int_hexagon_M2_mpyd_acc_lh_s0>;
+def Hexagon_M2_mpyd_acc_lh_s1:
+  di_MInst_disisi_acc_lh_s1       <"mpy",    int_hexagon_M2_mpyd_acc_lh_s1>;
+
+def Hexagon_M2_mpyd_acc_ll_s0:
+  di_MInst_disisi_acc_ll          <"mpy",    int_hexagon_M2_mpyd_acc_ll_s0>;
+def Hexagon_M2_mpyd_acc_ll_s1:
+  di_MInst_disisi_acc_ll_s1       <"mpy",    int_hexagon_M2_mpyd_acc_ll_s1>;
+
+//Rx-=mpy(Rs.[H|L],Rt.[H|L:<<0|:<<1]
+def Hexagon_M2_mpyd_nac_hh_s0:
+  di_MInst_disisi_nac_hh          <"mpy",    int_hexagon_M2_mpyd_nac_hh_s0>;
+def Hexagon_M2_mpyd_nac_hh_s1:
+  di_MInst_disisi_nac_hh_s1       <"mpy",    int_hexagon_M2_mpyd_nac_hh_s1>;
+
+def Hexagon_M2_mpyd_nac_hl_s0:
+  di_MInst_disisi_nac_hl          <"mpy",    int_hexagon_M2_mpyd_nac_hl_s0>;
+def Hexagon_M2_mpyd_nac_hl_s1:
+  di_MInst_disisi_nac_hl_s1       <"mpy",    int_hexagon_M2_mpyd_nac_hl_s1>;
+
+def Hexagon_M2_mpyd_nac_lh_s0:
+  di_MInst_disisi_nac_lh          <"mpy",    int_hexagon_M2_mpyd_nac_lh_s0>;
+def Hexagon_M2_mpyd_nac_lh_s1:
+  di_MInst_disisi_nac_lh_s1       <"mpy",    int_hexagon_M2_mpyd_nac_lh_s1>;
+
+def Hexagon_M2_mpyd_nac_ll_s0:
+  di_MInst_disisi_nac_ll          <"mpy",    int_hexagon_M2_mpyd_nac_ll_s0>;
+def Hexagon_M2_mpyd_nac_ll_s1:
+  di_MInst_disisi_nac_ll_s1       <"mpy",    int_hexagon_M2_mpyd_nac_ll_s1>;
+
+// MTYPE / MPYS / Scalar 16x16 multiply unsigned.
+//Rd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_hh_s0:
+  si_MInst_sisi_hh                <"mpyu",    int_hexagon_M2_mpyu_hh_s0>;
+def Hexagon_M2_mpyu_hh_s1:
+  si_MInst_sisi_hh_s1             <"mpyu",    int_hexagon_M2_mpyu_hh_s1>;
+def Hexagon_M2_mpyu_hl_s0:
+  si_MInst_sisi_hl                <"mpyu",    int_hexagon_M2_mpyu_hl_s0>;
+def Hexagon_M2_mpyu_hl_s1:
+  si_MInst_sisi_hl_s1             <"mpyu",    int_hexagon_M2_mpyu_hl_s1>;
+def Hexagon_M2_mpyu_lh_s0:
+  si_MInst_sisi_lh                <"mpyu",    int_hexagon_M2_mpyu_lh_s0>;
+def Hexagon_M2_mpyu_lh_s1:
+  si_MInst_sisi_lh_s1             <"mpyu",    int_hexagon_M2_mpyu_lh_s1>;
+def Hexagon_M2_mpyu_ll_s0:
+  si_MInst_sisi_ll                <"mpyu",    int_hexagon_M2_mpyu_ll_s0>;
+def Hexagon_M2_mpyu_ll_s1:
+  si_MInst_sisi_ll_s1             <"mpyu",    int_hexagon_M2_mpyu_ll_s1>;
+
+//Rdd=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_hh_s0:
+  di_MInst_sisi_hh                <"mpyu",    int_hexagon_M2_mpyud_hh_s0>;
+def Hexagon_M2_mpyud_hh_s1:
+  di_MInst_sisi_hh_s1             <"mpyu",    int_hexagon_M2_mpyud_hh_s1>;
+def Hexagon_M2_mpyud_hl_s0:
+  di_MInst_sisi_hl                <"mpyu",    int_hexagon_M2_mpyud_hl_s0>;
+def Hexagon_M2_mpyud_hl_s1:
+  di_MInst_sisi_hl_s1             <"mpyu",    int_hexagon_M2_mpyud_hl_s1>;
+def Hexagon_M2_mpyud_lh_s0:
+  di_MInst_sisi_lh                <"mpyu",    int_hexagon_M2_mpyud_lh_s0>;
+def Hexagon_M2_mpyud_lh_s1:
+  di_MInst_sisi_lh_s1             <"mpyu",    int_hexagon_M2_mpyud_lh_s1>;
+def Hexagon_M2_mpyud_ll_s0:
+  di_MInst_sisi_ll                <"mpyu",    int_hexagon_M2_mpyud_ll_s0>;
+def Hexagon_M2_mpyud_ll_s1:
+  di_MInst_sisi_ll_s1             <"mpyu",    int_hexagon_M2_mpyud_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_acc_hh_s0:
+  si_MInst_sisisi_acc_hh            <"mpyu",    int_hexagon_M2_mpyu_acc_hh_s0>;
+def Hexagon_M2_mpyu_acc_hh_s1:
+  si_MInst_sisisi_acc_hh_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_hh_s1>;
+def Hexagon_M2_mpyu_acc_hl_s0:
+  si_MInst_sisisi_acc_hl            <"mpyu",    int_hexagon_M2_mpyu_acc_hl_s0>;
+def Hexagon_M2_mpyu_acc_hl_s1:
+  si_MInst_sisisi_acc_hl_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_hl_s1>;
+def Hexagon_M2_mpyu_acc_lh_s0:
+  si_MInst_sisisi_acc_lh            <"mpyu",    int_hexagon_M2_mpyu_acc_lh_s0>;
+def Hexagon_M2_mpyu_acc_lh_s1:
+  si_MInst_sisisi_acc_lh_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_lh_s1>;
+def Hexagon_M2_mpyu_acc_ll_s0:
+  si_MInst_sisisi_acc_ll            <"mpyu",    int_hexagon_M2_mpyu_acc_ll_s0>;
+def Hexagon_M2_mpyu_acc_ll_s1:
+  si_MInst_sisisi_acc_ll_s1         <"mpyu",    int_hexagon_M2_mpyu_acc_ll_s1>;
+
+//Rd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyu_nac_hh_s0:
+  si_MInst_sisisi_nac_hh            <"mpyu",    int_hexagon_M2_mpyu_nac_hh_s0>;
+def Hexagon_M2_mpyu_nac_hh_s1:
+  si_MInst_sisisi_nac_hh_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_hh_s1>;
+def Hexagon_M2_mpyu_nac_hl_s0:
+  si_MInst_sisisi_nac_hl            <"mpyu",    int_hexagon_M2_mpyu_nac_hl_s0>;
+def Hexagon_M2_mpyu_nac_hl_s1:
+  si_MInst_sisisi_nac_hl_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_hl_s1>;
+def Hexagon_M2_mpyu_nac_lh_s0:
+  si_MInst_sisisi_nac_lh            <"mpyu",    int_hexagon_M2_mpyu_nac_lh_s0>;
+def Hexagon_M2_mpyu_nac_lh_s1:
+  si_MInst_sisisi_nac_lh_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_lh_s1>;
+def Hexagon_M2_mpyu_nac_ll_s0:
+  si_MInst_sisisi_nac_ll            <"mpyu",    int_hexagon_M2_mpyu_nac_ll_s0>;
+def Hexagon_M2_mpyu_nac_ll_s1:
+  si_MInst_sisisi_nac_ll_s1         <"mpyu",    int_hexagon_M2_mpyu_nac_ll_s1>;
+
+//Rdd+=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_acc_hh_s0:
+  di_MInst_disisi_acc_hh            <"mpyu", int_hexagon_M2_mpyud_acc_hh_s0>;
+def Hexagon_M2_mpyud_acc_hh_s1:
+  di_MInst_disisi_acc_hh_s1         <"mpyu", int_hexagon_M2_mpyud_acc_hh_s1>;
+def Hexagon_M2_mpyud_acc_hl_s0:
+  di_MInst_disisi_acc_hl            <"mpyu", int_hexagon_M2_mpyud_acc_hl_s0>;
+def Hexagon_M2_mpyud_acc_hl_s1:
+  di_MInst_disisi_acc_hl_s1         <"mpyu", int_hexagon_M2_mpyud_acc_hl_s1>;
+def Hexagon_M2_mpyud_acc_lh_s0:
+  di_MInst_disisi_acc_lh            <"mpyu", int_hexagon_M2_mpyud_acc_lh_s0>;
+def Hexagon_M2_mpyud_acc_lh_s1:
+  di_MInst_disisi_acc_lh_s1         <"mpyu", int_hexagon_M2_mpyud_acc_lh_s1>;
+def Hexagon_M2_mpyud_acc_ll_s0:
+  di_MInst_disisi_acc_ll            <"mpyu", int_hexagon_M2_mpyud_acc_ll_s0>;
+def Hexagon_M2_mpyud_acc_ll_s1:
+  di_MInst_disisi_acc_ll_s1         <"mpyu", int_hexagon_M2_mpyud_acc_ll_s1>;
+
+//Rdd-=mpyu(Rs.[H|L],Rt.[H|L])[:<<0|:<<1]
+def Hexagon_M2_mpyud_nac_hh_s0:
+  di_MInst_disisi_nac_hh            <"mpyu", int_hexagon_M2_mpyud_nac_hh_s0>;
+def Hexagon_M2_mpyud_nac_hh_s1:
+  di_MInst_disisi_nac_hh_s1         <"mpyu", int_hexagon_M2_mpyud_nac_hh_s1>;
+def Hexagon_M2_mpyud_nac_hl_s0:
+  di_MInst_disisi_nac_hl            <"mpyu", int_hexagon_M2_mpyud_nac_hl_s0>;
+def Hexagon_M2_mpyud_nac_hl_s1:
+  di_MInst_disisi_nac_hl_s1         <"mpyu", int_hexagon_M2_mpyud_nac_hl_s1>;
+def Hexagon_M2_mpyud_nac_lh_s0:
+  di_MInst_disisi_nac_lh            <"mpyu", int_hexagon_M2_mpyud_nac_lh_s0>;
+def Hexagon_M2_mpyud_nac_lh_s1:
+  di_MInst_disisi_nac_lh_s1         <"mpyu", int_hexagon_M2_mpyud_nac_lh_s1>;
+def Hexagon_M2_mpyud_nac_ll_s0:
+  di_MInst_disisi_nac_ll            <"mpyu", int_hexagon_M2_mpyud_nac_ll_s0>;
+def Hexagon_M2_mpyud_nac_ll_s1:
+  di_MInst_disisi_nac_ll_s1         <"mpyu", int_hexagon_M2_mpyud_nac_ll_s1>;
+
+
+/********************************************************************
+*            MTYPE/VB                                               *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_A2_vraddub:
+  di_MInst_didi                   <"vraddub", int_hexagon_A2_vraddub>;
+def Hexagon_A2_vraddub_acc:
+  di_MInst_dididi_acc             <"vraddub", int_hexagon_A2_vraddub_acc>;
+
+// MTYPE / VB / Vector sum of absolute differences unsigned bytes.
+def Hexagon_A2_vrsadub:
+  di_MInst_didi                   <"vrsadub", int_hexagon_A2_vrsadub>;
+def Hexagon_A2_vrsadub_acc:
+  di_MInst_dididi_acc             <"vrsadub", int_hexagon_A2_vrsadub_acc>;
+
+/********************************************************************
+*            MTYPE/VH                                               *
+*********************************************************************/
+
+// MTYPE / VH / Vector dual multiply.
+def Hexagon_M2_vdmpys_s1:
+  di_MInst_didi_s1_sat            <"vdmpy",   int_hexagon_M2_vdmpys_s1>;
+def Hexagon_M2_vdmpys_s0:
+  di_MInst_didi_sat               <"vdmpy",   int_hexagon_M2_vdmpys_s0>;
+def Hexagon_M2_vdmacs_s1:
+  di_MInst_dididi_acc_s1_sat      <"vdmpy",   int_hexagon_M2_vdmacs_s1>;
+def Hexagon_M2_vdmacs_s0:
+  di_MInst_dididi_acc_sat         <"vdmpy",   int_hexagon_M2_vdmacs_s0>;
+
+// MTYPE / VH / Vector dual multiply with round and pack.
+def Hexagon_M2_vdmpyrs_s0:
+  si_MInst_didi_rnd_sat           <"vdmpy",   int_hexagon_M2_vdmpyrs_s0>;
+def Hexagon_M2_vdmpyrs_s1:
+  si_MInst_didi_s1_rnd_sat        <"vdmpy",   int_hexagon_M2_vdmpyrs_s1>;
+
+// MTYPE / VH / Vector multiply even halfwords.
+def Hexagon_M2_vmpy2es_s1:
+  di_MInst_didi_s1_sat            <"vmpyeh",  int_hexagon_M2_vmpy2es_s1>;
+def Hexagon_M2_vmpy2es_s0:
+  di_MInst_didi_sat               <"vmpyeh",  int_hexagon_M2_vmpy2es_s0>;
+def Hexagon_M2_vmac2es:
+  di_MInst_dididi_acc             <"vmpyeh",  int_hexagon_M2_vmac2es>;
+def Hexagon_M2_vmac2es_s1:
+  di_MInst_dididi_acc_s1_sat      <"vmpyeh",  int_hexagon_M2_vmac2es_s1>;
+def Hexagon_M2_vmac2es_s0:
+  di_MInst_dididi_acc_sat         <"vmpyeh",  int_hexagon_M2_vmac2es_s0>;
+
+// MTYPE / VH / Vector multiply halfwords.
+def Hexagon_M2_vmpy2s_s0:
+  di_MInst_sisi_sat               <"vmpyh",   int_hexagon_M2_vmpy2s_s0>;
+def Hexagon_M2_vmpy2s_s1:
+  di_MInst_sisi_s1_sat            <"vmpyh",   int_hexagon_M2_vmpy2s_s1>;
+def Hexagon_M2_vmac2:
+  di_MInst_disisi_acc             <"vmpyh",   int_hexagon_M2_vmac2>;
+def Hexagon_M2_vmac2s_s0:
+  di_MInst_disisi_acc_sat         <"vmpyh",   int_hexagon_M2_vmac2s_s0>;
+def Hexagon_M2_vmac2s_s1:
+  di_MInst_disisi_acc_s1_sat      <"vmpyh",   int_hexagon_M2_vmac2s_s1>;
+
+// MTYPE / VH / Vector multiply halfwords with round and pack.
+def Hexagon_M2_vmpy2s_s0pack:
+  si_MInst_sisi_rnd_sat           <"vmpyh",   int_hexagon_M2_vmpy2s_s0pack>;
+def Hexagon_M2_vmpy2s_s1pack:
+  si_MInst_sisi_s1_rnd_sat        <"vmpyh",   int_hexagon_M2_vmpy2s_s1pack>;
+
+// MTYPE / VH / Vector reduce multiply halfwords.
+// Rxx32+=vrmpyh(Rss32,Rtt32)
+def Hexagon_M2_vrmpy_s0:
+  di_MInst_didi                   <"vrmpyh",  int_hexagon_M2_vrmpy_s0>;
+def Hexagon_M2_vrmac_s0:
+  di_MInst_dididi_acc             <"vrmpyh",  int_hexagon_M2_vrmac_s0>;
+
+
+/********************************************************************
+*            STYPE/ALU                                              *
+*********************************************************************/
+
+// STYPE / ALU / Absolute value.
+def Hexagon_A2_abs:
+  si_SInst_si                     <"abs",     int_hexagon_A2_abs>;
+def Hexagon_A2_absp:
+  di_SInst_di                     <"abs",     int_hexagon_A2_absp>;
+def Hexagon_A2_abssat:
+  si_SInst_si_sat                 <"abs",     int_hexagon_A2_abssat>;
+
+// STYPE / ALU / Negate.
+def Hexagon_A2_negp:
+  di_SInst_di                     <"neg",     int_hexagon_A2_negp>;
+def Hexagon_A2_negsat:
+  si_SInst_si_sat                 <"neg",     int_hexagon_A2_negsat>;
+
+// STYPE / ALU / Logical Not.
+def Hexagon_A2_notp:
+  di_SInst_di                     <"not",     int_hexagon_A2_notp>;
+
+// STYPE / ALU / Sign extend word to doubleword.
+def Hexagon_A2_sxtw:
+  di_SInst_si                     <"sxtw",     int_hexagon_A2_sxtw>;
+
+
+/********************************************************************
+*            STYPE/BIT                                              *
+*********************************************************************/
+
+// STYPE / BIT / Count leading.
+def Hexagon_S2_cl0:
+  si_SInst_si                     <"cl0",     int_hexagon_S2_cl0>;
+def Hexagon_S2_cl0p:
+  si_SInst_di                     <"cl0",     int_hexagon_S2_cl0p>;
+def Hexagon_S2_cl1:
+  si_SInst_si                     <"cl1",     int_hexagon_S2_cl1>;
+def Hexagon_S2_cl1p:
+  si_SInst_di                     <"cl1",     int_hexagon_S2_cl1p>;
+def Hexagon_S2_clb:
+  si_SInst_si                     <"clb",     int_hexagon_S2_clb>;
+def Hexagon_S2_clbp:
+  si_SInst_di                     <"clb",     int_hexagon_S2_clbp>;
+def Hexagon_S2_clbnorm:
+  si_SInst_si                     <"normamt", int_hexagon_S2_clbnorm>;
+
+// STYPE / BIT / Count trailing.
+def Hexagon_S2_ct0:
+  si_SInst_si                     <"ct0",     int_hexagon_S2_ct0>;
+def Hexagon_S2_ct1:
+  si_SInst_si                     <"ct1",     int_hexagon_S2_ct1>;
+
+// STYPE / BIT / Compare bit mask.
+def HEXAGON_C2_bitsclr:
+  qi_SInst_sisi                   <"bitsclr", int_hexagon_C2_bitsclr>;
+def HEXAGON_C2_bitsclri:
+  qi_SInst_siu6                   <"bitsclr", int_hexagon_C2_bitsclri>;
+def HEXAGON_C2_bitsset:
+  qi_SInst_sisi                   <"bitsset", int_hexagon_C2_bitsset>;
+
+// STYPE / BIT / Extract unsigned.
+// Rd[d][32/64]=extractu(Rs[s],Rt[t],[imm])
+def Hexagon_S2_extractu:
+  si_SInst_siu5u5                 <"extractu",int_hexagon_S2_extractu>;
+def Hexagon_S2_extractu_rp:
+  si_SInst_sidi                   <"extractu",int_hexagon_S2_extractu_rp>;
+def Hexagon_S2_extractup:
+  di_SInst_diu6u6                 <"extractu",int_hexagon_S2_extractup>;
+def Hexagon_S2_extractup_rp:
+  di_SInst_didi                   <"extractu",int_hexagon_S2_extractup_rp>;
+
+// STYPE / BIT / Insert bitfield.
+def HEXAGON_S2_insert:
+  si_SInst_sisiu5u5               <"insert",  int_hexagon_S2_insert>;
+def HEXAGON_S2_insert_rp:
+  si_SInst_sisidi                 <"insert",  int_hexagon_S2_insert_rp>;
+def HEXAGON_S2_insertp:
+  di_SInst_didiu6u6               <"insert",  int_hexagon_S2_insertp>;
+def HEXAGON_S2_insertp_rp:
+  di_SInst_dididi                 <"insert",  int_hexagon_S2_insertp_rp>;
+
+// STYPE / BIT / Innterleave/deinterleave.
+def HEXAGON_S2_interleave:
+  di_SInst_di                     <"interleave", int_hexagon_S2_interleave>;
+def HEXAGON_S2_deinterleave:
+  di_SInst_di                     <"deinterleave", int_hexagon_S2_deinterleave>;
+
+// STYPE / BIT / Linear feedback-shift Iteration.
+def HEXAGON_S2_lfsp:
+  di_SInst_didi                   <"lfs",     int_hexagon_S2_lfsp>;
+
+// STYPE / BIT / Bit reverse.
+def HEXAGON_S2_brev:
+  si_SInst_si                     <"brev",    int_hexagon_S2_brev>;
+
+// STYPE / BIT / Set/Clear/Toggle Bit.
+def Hexagon_S2_setbit_i:
+  si_SInst_siu5                   <"setbit",  int_hexagon_S2_setbit_i>;
+def Hexagon_S2_togglebit_i:
+  si_SInst_siu5                   <"togglebit", int_hexagon_S2_togglebit_i>;
+def Hexagon_S2_clrbit_i:
+  si_SInst_siu5                   <"clrbit",  int_hexagon_S2_clrbit_i>;
+def Hexagon_S2_setbit_r:
+  si_SInst_sisi                   <"setbit",  int_hexagon_S2_setbit_r>;
+def Hexagon_S2_togglebit_r:
+  si_SInst_sisi                   <"togglebit", int_hexagon_S2_togglebit_r>;
+def Hexagon_S2_clrbit_r:
+  si_SInst_sisi                   <"clrbit",  int_hexagon_S2_clrbit_r>;
+
+// STYPE / BIT / Test Bit.
+def Hexagon_S2_tstbit_i:
+  qi_SInst_siu5                   <"tstbit",  int_hexagon_S2_tstbit_i>;
+def Hexagon_S2_tstbit_r:
+  qi_SInst_sisi                   <"tstbit",  int_hexagon_S2_tstbit_r>;
+
+
+/********************************************************************
+*            STYPE/COMPLEX                                          *
+*********************************************************************/
+
+// STYPE / COMPLEX / Vector Complex conjugate.
+def Hexagon_A2_vconj:
+  di_SInst_di_sat                 <"vconj",   int_hexagon_A2_vconj>;
+
+// STYPE / COMPLEX / Vector Complex rotate.
+def Hexagon_S2_vcrotate:
+  di_SInst_disi                   <"vcrotate",int_hexagon_S2_vcrotate>;
+
+
+/********************************************************************
+*            STYPE/PERM                                             *
+*********************************************************************/
+
+// STYPE / PERM / Saturate.
+def Hexagon_A2_sat:
+  si_SInst_di                     <"sat",     int_hexagon_A2_sat>;
+def Hexagon_A2_satb:
+  si_SInst_si                     <"satb",    int_hexagon_A2_satb>;
+def Hexagon_A2_sath:
+  si_SInst_si                     <"sath",    int_hexagon_A2_sath>;
+def Hexagon_A2_satub:
+  si_SInst_si                     <"satub",   int_hexagon_A2_satub>;
+def Hexagon_A2_satuh:
+  si_SInst_si                     <"satuh",   int_hexagon_A2_satuh>;
+
+// STYPE / PERM / Swizzle bytes.
+def Hexagon_A2_swiz:
+  si_SInst_si                     <"swiz",    int_hexagon_A2_swiz>;
+
+// STYPE / PERM / Vector align.
+// Need custom lowering
+def Hexagon_S2_valignib:
+  di_SInst_didiu3                 <"valignb", int_hexagon_S2_valignib>;
+def Hexagon_S2_valignrb:
+  di_SInst_didiqi                 <"valignb", int_hexagon_S2_valignrb>;
+
+// STYPE / PERM / Vector round and pack.
+def Hexagon_S2_vrndpackwh:
+  si_SInst_di                     <"vrndwh",  int_hexagon_S2_vrndpackwh>;
+def Hexagon_S2_vrndpackwhs:
+  si_SInst_di_sat                 <"vrndwh",  int_hexagon_S2_vrndpackwhs>;
+
+// STYPE / PERM / Vector saturate and pack.
+def Hexagon_S2_svsathb:
+  si_SInst_si                     <"vsathb",  int_hexagon_S2_svsathb>;
+def Hexagon_S2_vsathb:
+  si_SInst_di                     <"vsathb",  int_hexagon_S2_vsathb>;
+def Hexagon_S2_svsathub:
+  si_SInst_si                     <"vsathub", int_hexagon_S2_svsathub>;
+def Hexagon_S2_vsathub:
+  si_SInst_di                     <"vsathub", int_hexagon_S2_vsathub>;
+def Hexagon_S2_vsatwh:
+  si_SInst_di                     <"vsatwh",  int_hexagon_S2_vsatwh>;
+def Hexagon_S2_vsatwuh:
+  si_SInst_di                     <"vsatwuh", int_hexagon_S2_vsatwuh>;
+
+// STYPE / PERM / Vector saturate without pack.
+def Hexagon_S2_vsathb_nopack:
+  di_SInst_di                     <"vsathb",  int_hexagon_S2_vsathb_nopack>;
+def Hexagon_S2_vsathub_nopack:
+  di_SInst_di                     <"vsathub", int_hexagon_S2_vsathub_nopack>;
+def Hexagon_S2_vsatwh_nopack:
+  di_SInst_di                     <"vsatwh",  int_hexagon_S2_vsatwh_nopack>;
+def Hexagon_S2_vsatwuh_nopack:
+  di_SInst_di                     <"vsatwuh", int_hexagon_S2_vsatwuh_nopack>;
+
+// STYPE / PERM / Vector shuffle.
+def Hexagon_S2_shuffeb:
+  di_SInst_didi                   <"shuffeb", int_hexagon_S2_shuffeb>;
+def Hexagon_S2_shuffeh:
+  di_SInst_didi                   <"shuffeh", int_hexagon_S2_shuffeh>;
+def Hexagon_S2_shuffob:
+  di_SInst_didi                   <"shuffob", int_hexagon_S2_shuffob>;
+def Hexagon_S2_shuffoh:
+  di_SInst_didi                   <"shuffoh", int_hexagon_S2_shuffoh>;
+
+// STYPE / PERM / Vector splat bytes.
+def Hexagon_S2_vsplatrb:
+  si_SInst_si                     <"vsplatb", int_hexagon_S2_vsplatrb>;
+
+// STYPE / PERM / Vector splat halfwords.
+def Hexagon_S2_vsplatrh:
+  di_SInst_si                     <"vsplath", int_hexagon_S2_vsplatrh>;
+
+// STYPE / PERM / Vector splice.
+def HEXAGON_S2_vsplicerb:
+  di_SInst_didiqi                 <"vspliceb",int_hexagon_S2_vsplicerb>;
+def HEXAGON_S2_vspliceib:
+  di_SInst_didiu3                 <"vspliceb",int_hexagon_S2_vspliceib>;
+
+// STYPE / PERM / Sign extend.
+def Hexagon_S2_vsxtbh:
+  di_SInst_si                     <"vsxtbh",  int_hexagon_S2_vsxtbh>;
+def Hexagon_S2_vsxthw:
+  di_SInst_si                     <"vsxthw",  int_hexagon_S2_vsxthw>;
+
+// STYPE / PERM / Truncate.
+def Hexagon_S2_vtrunehb:
+  si_SInst_di                     <"vtrunehb",int_hexagon_S2_vtrunehb>;
+def Hexagon_S2_vtrunohb:
+  si_SInst_di                     <"vtrunohb",int_hexagon_S2_vtrunohb>;
+def Hexagon_S2_vtrunewh:
+  di_SInst_didi                   <"vtrunewh",int_hexagon_S2_vtrunewh>;
+def Hexagon_S2_vtrunowh:
+  di_SInst_didi                   <"vtrunowh",int_hexagon_S2_vtrunowh>;
+
+// STYPE / PERM / Zero extend.
+def Hexagon_S2_vzxtbh:
+  di_SInst_si                     <"vzxtbh",  int_hexagon_S2_vzxtbh>;
+def Hexagon_S2_vzxthw:
+  di_SInst_si                     <"vzxthw",  int_hexagon_S2_vzxthw>;
+
+
+/********************************************************************
+*            STYPE/PRED                                             *
+*********************************************************************/
+
+// STYPE / PRED / Mask generate from predicate.
+def Hexagon_C2_mask:
+  di_SInst_qi                     <"mask",   int_hexagon_C2_mask>;
+
+// STYPE / PRED / Predicate transfer.
+def Hexagon_C2_tfrpr:
+  si_SInst_qi                     <"",       int_hexagon_C2_tfrpr>;
+def Hexagon_C2_tfrrp:
+  qi_SInst_si                     <"",       int_hexagon_C2_tfrrp>;
+
+// STYPE / PRED / Viterbi pack even and odd predicate bits.
+def Hexagon_C2_vitpack:
+  si_SInst_qiqi                   <"vitpack",int_hexagon_C2_vitpack>;
+
+
+/********************************************************************
+*            STYPE/SHIFT                                            *
+*********************************************************************/
+
+// STYPE / SHIFT / Shift by immediate.
+def Hexagon_S2_asl_i_r:
+  si_SInst_siu5                   <"asl",     int_hexagon_S2_asl_i_r>;
+def Hexagon_S2_asr_i_r:
+  si_SInst_siu5                   <"asr",     int_hexagon_S2_asr_i_r>;
+def Hexagon_S2_lsr_i_r:
+  si_SInst_siu5                   <"lsr",     int_hexagon_S2_lsr_i_r>;
+def Hexagon_S2_asl_i_p:
+  di_SInst_diu6                   <"asl",     int_hexagon_S2_asl_i_p>;
+def Hexagon_S2_asr_i_p:
+  di_SInst_diu6                   <"asr",     int_hexagon_S2_asr_i_p>;
+def Hexagon_S2_lsr_i_p:
+  di_SInst_diu6                   <"lsr",     int_hexagon_S2_lsr_i_p>;
+
+// STYPE / SHIFT / Shift by immediate and accumulate.
+def Hexagon_S2_asl_i_r_acc:
+  si_SInst_sisiu5_acc             <"asl",     int_hexagon_S2_asl_i_r_acc>;
+def Hexagon_S2_asr_i_r_acc:
+  si_SInst_sisiu5_acc             <"asr",     int_hexagon_S2_asr_i_r_acc>;
+def Hexagon_S2_lsr_i_r_acc:
+  si_SInst_sisiu5_acc             <"lsr",     int_hexagon_S2_lsr_i_r_acc>;
+def Hexagon_S2_asl_i_r_nac:
+  si_SInst_sisiu5_nac             <"asl",     int_hexagon_S2_asl_i_r_nac>;
+def Hexagon_S2_asr_i_r_nac:
+  si_SInst_sisiu5_nac             <"asr",     int_hexagon_S2_asr_i_r_nac>;
+def Hexagon_S2_lsr_i_r_nac:
+  si_SInst_sisiu5_nac             <"lsr",     int_hexagon_S2_lsr_i_r_nac>;
+def Hexagon_S2_asl_i_p_acc:
+  di_SInst_didiu6_acc             <"asl",     int_hexagon_S2_asl_i_p_acc>;
+def Hexagon_S2_asr_i_p_acc:
+  di_SInst_didiu6_acc             <"asr",     int_hexagon_S2_asr_i_p_acc>;
+def Hexagon_S2_lsr_i_p_acc:
+  di_SInst_didiu6_acc             <"lsr",     int_hexagon_S2_lsr_i_p_acc>;
+def Hexagon_S2_asl_i_p_nac:
+  di_SInst_didiu6_nac             <"asl",     int_hexagon_S2_asl_i_p_nac>;
+def Hexagon_S2_asr_i_p_nac:
+  di_SInst_didiu6_nac             <"asr",     int_hexagon_S2_asr_i_p_nac>;
+def Hexagon_S2_lsr_i_p_nac:
+  di_SInst_didiu6_nac             <"lsr",     int_hexagon_S2_lsr_i_p_nac>;
+
+// STYPE / SHIFT / Shift by immediate and add.
+def Hexagon_S2_addasl_rrri:
+  si_SInst_sisiu3                 <"addasl",  int_hexagon_S2_addasl_rrri>;
+
+// STYPE / SHIFT / Shift by immediate and logical.
+def Hexagon_S2_asl_i_r_and:
+  si_SInst_sisiu5_and             <"asl",     int_hexagon_S2_asl_i_r_and>;
+def Hexagon_S2_asr_i_r_and:
+  si_SInst_sisiu5_and             <"asr",     int_hexagon_S2_asr_i_r_and>;
+def Hexagon_S2_lsr_i_r_and:
+  si_SInst_sisiu5_and             <"lsr",     int_hexagon_S2_lsr_i_r_and>;
+
+def Hexagon_S2_asl_i_r_xacc:
+  si_SInst_sisiu5_xor             <"asl",     int_hexagon_S2_asl_i_r_xacc>;
+def Hexagon_S2_lsr_i_r_xacc:
+  si_SInst_sisiu5_xor             <"lsr",     int_hexagon_S2_lsr_i_r_xacc>;
+
+def Hexagon_S2_asl_i_r_or:
+  si_SInst_sisiu5_or              <"asl",     int_hexagon_S2_asl_i_r_or>;
+def Hexagon_S2_asr_i_r_or:
+  si_SInst_sisiu5_or              <"asr",     int_hexagon_S2_asr_i_r_or>;
+def Hexagon_S2_lsr_i_r_or:
+  si_SInst_sisiu5_or              <"lsr",     int_hexagon_S2_lsr_i_r_or>;
+
+def Hexagon_S2_asl_i_p_and:
+  di_SInst_didiu6_and             <"asl",     int_hexagon_S2_asl_i_p_and>;
+def Hexagon_S2_asr_i_p_and:
+  di_SInst_didiu6_and             <"asr",     int_hexagon_S2_asr_i_p_and>;
+def Hexagon_S2_lsr_i_p_and:
+  di_SInst_didiu6_and             <"lsr",     int_hexagon_S2_lsr_i_p_and>;
+
+def Hexagon_S2_asl_i_p_xacc:
+  di_SInst_didiu6_xor             <"asl",     int_hexagon_S2_asl_i_p_xacc>;
+def Hexagon_S2_lsr_i_p_xacc:
+  di_SInst_didiu6_xor             <"lsr",     int_hexagon_S2_lsr_i_p_xacc>;
+
+def Hexagon_S2_asl_i_p_or:
+  di_SInst_didiu6_or              <"asl",     int_hexagon_S2_asl_i_p_or>;
+def Hexagon_S2_asr_i_p_or:
+  di_SInst_didiu6_or              <"asr",     int_hexagon_S2_asr_i_p_or>;
+def Hexagon_S2_lsr_i_p_or:
+  di_SInst_didiu6_or              <"lsr",     int_hexagon_S2_lsr_i_p_or>;
+
+// STYPE / SHIFT / Shift right by immediate with rounding.
+def Hexagon_S2_asr_i_r_rnd:
+  si_SInst_siu5_rnd               <"asr",     int_hexagon_S2_asr_i_r_rnd>;
+def Hexagon_S2_asr_i_r_rnd_goodsyntax:
+  si_SInst_siu5              <"asrrnd",  int_hexagon_S2_asr_i_r_rnd_goodsyntax>;
+
+// STYPE / SHIFT / Shift left by immediate with saturation.
+def Hexagon_S2_asl_i_r_sat:
+  si_SInst_sisi_sat               <"asl",     int_hexagon_S2_asl_i_r_sat>;
+
+// STYPE / SHIFT / Shift by register.
+def Hexagon_S2_asl_r_r:
+  si_SInst_sisi                   <"asl",     int_hexagon_S2_asl_r_r>;
+def Hexagon_S2_asr_r_r:
+  si_SInst_sisi                   <"asr",     int_hexagon_S2_asr_r_r>;
+def Hexagon_S2_lsl_r_r:
+  si_SInst_sisi                   <"lsl",     int_hexagon_S2_lsl_r_r>;
+def Hexagon_S2_lsr_r_r:
+  si_SInst_sisi                   <"lsr",     int_hexagon_S2_lsr_r_r>;
+def Hexagon_S2_asl_r_p:
+  di_SInst_disi                   <"asl",     int_hexagon_S2_asl_r_p>;
+def Hexagon_S2_asr_r_p:
+  di_SInst_disi                   <"asr",     int_hexagon_S2_asr_r_p>;
+def Hexagon_S2_lsl_r_p:
+  di_SInst_disi                   <"lsl",     int_hexagon_S2_lsl_r_p>;
+def Hexagon_S2_lsr_r_p:
+  di_SInst_disi                   <"lsr",     int_hexagon_S2_lsr_r_p>;
+
+// STYPE / SHIFT / Shift by register and accumulate.
+def Hexagon_S2_asl_r_r_acc:
+  si_SInst_sisisi_acc             <"asl",     int_hexagon_S2_asl_r_r_acc>;
+def Hexagon_S2_asr_r_r_acc:
+  si_SInst_sisisi_acc             <"asr",     int_hexagon_S2_asr_r_r_acc>;
+def Hexagon_S2_lsl_r_r_acc:
+  si_SInst_sisisi_acc             <"lsl",     int_hexagon_S2_lsl_r_r_acc>;
+def Hexagon_S2_lsr_r_r_acc:
+  si_SInst_sisisi_acc             <"lsr",     int_hexagon_S2_lsr_r_r_acc>;
+def Hexagon_S2_asl_r_p_acc:
+  di_SInst_didisi_acc             <"asl",     int_hexagon_S2_asl_r_p_acc>;
+def Hexagon_S2_asr_r_p_acc:
+  di_SInst_didisi_acc             <"asr",     int_hexagon_S2_asr_r_p_acc>;
+def Hexagon_S2_lsl_r_p_acc:
+  di_SInst_didisi_acc             <"lsl",     int_hexagon_S2_lsl_r_p_acc>;
+def Hexagon_S2_lsr_r_p_acc:
+  di_SInst_didisi_acc             <"lsr",     int_hexagon_S2_lsr_r_p_acc>;
+
+def Hexagon_S2_asl_r_r_nac:
+  si_SInst_sisisi_nac             <"asl",     int_hexagon_S2_asl_r_r_nac>;
+def Hexagon_S2_asr_r_r_nac:
+  si_SInst_sisisi_nac             <"asr",     int_hexagon_S2_asr_r_r_nac>;
+def Hexagon_S2_lsl_r_r_nac:
+  si_SInst_sisisi_nac             <"lsl",     int_hexagon_S2_lsl_r_r_nac>;
+def Hexagon_S2_lsr_r_r_nac:
+  si_SInst_sisisi_nac             <"lsr",     int_hexagon_S2_lsr_r_r_nac>;
+def Hexagon_S2_asl_r_p_nac:
+  di_SInst_didisi_nac             <"asl",     int_hexagon_S2_asl_r_p_nac>;
+def Hexagon_S2_asr_r_p_nac:
+  di_SInst_didisi_nac             <"asr",     int_hexagon_S2_asr_r_p_nac>;
+def Hexagon_S2_lsl_r_p_nac:
+  di_SInst_didisi_nac             <"lsl",     int_hexagon_S2_lsl_r_p_nac>;
+def Hexagon_S2_lsr_r_p_nac:
+  di_SInst_didisi_nac             <"lsr",     int_hexagon_S2_lsr_r_p_nac>;
+
+// STYPE / SHIFT / Shift by register and logical.
+def Hexagon_S2_asl_r_r_and:
+  si_SInst_sisisi_and             <"asl",     int_hexagon_S2_asl_r_r_and>;
+def Hexagon_S2_asr_r_r_and:
+  si_SInst_sisisi_and             <"asr",     int_hexagon_S2_asr_r_r_and>;
+def Hexagon_S2_lsl_r_r_and:
+  si_SInst_sisisi_and             <"lsl",     int_hexagon_S2_lsl_r_r_and>;
+def Hexagon_S2_lsr_r_r_and:
+  si_SInst_sisisi_and             <"lsr",     int_hexagon_S2_lsr_r_r_and>;
+
+def Hexagon_S2_asl_r_r_or:
+  si_SInst_sisisi_or              <"asl",     int_hexagon_S2_asl_r_r_or>;
+def Hexagon_S2_asr_r_r_or:
+  si_SInst_sisisi_or              <"asr",     int_hexagon_S2_asr_r_r_or>;
+def Hexagon_S2_lsl_r_r_or:
+  si_SInst_sisisi_or              <"lsl",     int_hexagon_S2_lsl_r_r_or>;
+def Hexagon_S2_lsr_r_r_or:
+  si_SInst_sisisi_or              <"lsr",     int_hexagon_S2_lsr_r_r_or>;
+
+def Hexagon_S2_asl_r_p_and:
+  di_SInst_didisi_and             <"asl",     int_hexagon_S2_asl_r_p_and>;
+def Hexagon_S2_asr_r_p_and:
+  di_SInst_didisi_and             <"asr",     int_hexagon_S2_asr_r_p_and>;
+def Hexagon_S2_lsl_r_p_and:
+  di_SInst_didisi_and             <"lsl",     int_hexagon_S2_lsl_r_p_and>;
+def Hexagon_S2_lsr_r_p_and:
+  di_SInst_didisi_and             <"lsr",     int_hexagon_S2_lsr_r_p_and>;
+
+def Hexagon_S2_asl_r_p_or:
+  di_SInst_didisi_or              <"asl",     int_hexagon_S2_asl_r_p_or>;
+def Hexagon_S2_asr_r_p_or:
+  di_SInst_didisi_or              <"asr",     int_hexagon_S2_asr_r_p_or>;
+def Hexagon_S2_lsl_r_p_or:
+  di_SInst_didisi_or              <"lsl",     int_hexagon_S2_lsl_r_p_or>;
+def Hexagon_S2_lsr_r_p_or:
+  di_SInst_didisi_or              <"lsr",     int_hexagon_S2_lsr_r_p_or>;
+
+// STYPE / SHIFT / Shift by register with saturation.
+def Hexagon_S2_asl_r_r_sat:
+  si_SInst_sisi_sat               <"asl",     int_hexagon_S2_asl_r_r_sat>;
+def Hexagon_S2_asr_r_r_sat:
+  si_SInst_sisi_sat               <"asr",     int_hexagon_S2_asr_r_r_sat>;
+
+// STYPE / SHIFT / Table Index.
+def HEXAGON_S2_tableidxb_goodsyntax:
+  si_MInst_sisiu4u5          <"tableidxb",int_hexagon_S2_tableidxb_goodsyntax>;
+def HEXAGON_S2_tableidxd_goodsyntax:
+  si_MInst_sisiu4u5          <"tableidxd",int_hexagon_S2_tableidxd_goodsyntax>;
+def HEXAGON_S2_tableidxh_goodsyntax:
+  si_MInst_sisiu4u5          <"tableidxh",int_hexagon_S2_tableidxh_goodsyntax>;
+def HEXAGON_S2_tableidxw_goodsyntax:
+  si_MInst_sisiu4u5          <"tableidxw",int_hexagon_S2_tableidxw_goodsyntax>;
+
+
+/********************************************************************
+*            STYPE/VH                                               *
+*********************************************************************/
+
+// STYPE / VH / Vector absolute value halfwords.
+// Rdd64=vabsh(Rss64)
+def Hexagon_A2_vabsh:
+  di_SInst_di                     <"vabsh",   int_hexagon_A2_vabsh>;
+def Hexagon_A2_vabshsat:
+  di_SInst_di_sat                 <"vabsh",   int_hexagon_A2_vabshsat>;
+
+// STYPE / VH / Vector shift halfwords by immediate.
+// Rdd64=v[asl/asr/lsr]h(Rss64,Rt32)
+def Hexagon_S2_asl_i_vh:
+  di_SInst_disi                   <"vaslh",   int_hexagon_S2_asl_i_vh>;
+def Hexagon_S2_asr_i_vh:
+  di_SInst_disi                   <"vasrh",   int_hexagon_S2_asr_i_vh>;
+def Hexagon_S2_lsr_i_vh:
+  di_SInst_disi                   <"vlsrh",   int_hexagon_S2_lsr_i_vh>;
+
+// STYPE / VH / Vector shift halfwords by register.
+// Rdd64=v[asl/asr/lsl/lsr]w(Rss64,Rt32)
+def Hexagon_S2_asl_r_vh:
+  di_SInst_disi                   <"vaslh",   int_hexagon_S2_asl_r_vh>;
+def Hexagon_S2_asr_r_vh:
+  di_SInst_disi                   <"vasrh",   int_hexagon_S2_asr_r_vh>;
+def Hexagon_S2_lsl_r_vh:
+  di_SInst_disi                   <"vlslh",   int_hexagon_S2_lsl_r_vh>;
+def Hexagon_S2_lsr_r_vh:
+  di_SInst_disi                   <"vlsrh",   int_hexagon_S2_lsr_r_vh>;
+
+
+/********************************************************************
+*            STYPE/VW                                               *
+*********************************************************************/
+
+// STYPE / VW / Vector absolute value words.
+def Hexagon_A2_vabsw:
+  di_SInst_di                     <"vabsw",   int_hexagon_A2_vabsw>;
+def Hexagon_A2_vabswsat:
+  di_SInst_di_sat                 <"vabsw",   int_hexagon_A2_vabswsat>;
+
+// STYPE / VW / Vector shift words by immediate.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def Hexagon_S2_asl_i_vw:
+  di_SInst_disi                   <"vaslw",   int_hexagon_S2_asl_i_vw>;
+def Hexagon_S2_asr_i_vw:
+  di_SInst_disi                   <"vasrw",   int_hexagon_S2_asr_i_vw>;
+def Hexagon_S2_lsr_i_vw:
+  di_SInst_disi                   <"vlsrw",   int_hexagon_S2_lsr_i_vw>;
+
+// STYPE / VW / Vector shift words by register.
+// Rdd64=v[asl/vsl]w(Rss64,Rt32)
+def Hexagon_S2_asl_r_vw:
+  di_SInst_disi                   <"vaslw",   int_hexagon_S2_asl_r_vw>;
+def Hexagon_S2_asr_r_vw:
+  di_SInst_disi                   <"vasrw",   int_hexagon_S2_asr_r_vw>;
+def Hexagon_S2_lsl_r_vw:
+  di_SInst_disi                   <"vlslw",   int_hexagon_S2_lsl_r_vw>;
+def Hexagon_S2_lsr_r_vw:
+  di_SInst_disi                   <"vlsrw",   int_hexagon_S2_lsr_r_vw>;
+
+// STYPE / VW / Vector shift words with truncate and pack.
+def Hexagon_S2_asr_r_svw_trun:
+  si_SInst_disi                   <"vasrw",   int_hexagon_S2_asr_r_svw_trun>;
+def Hexagon_S2_asr_i_svw_trun:
+  si_SInst_diu5                   <"vasrw",   int_hexagon_S2_asr_i_svw_trun>;
+
+include "HexagonIntrinsicsV3.td"
+include "HexagonIntrinsicsV4.td"
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
new file mode 100644
index 000000000000..68eaf68480e0
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -0,0 +1,29 @@
+//===-- HexagonIntrinsicsDerived.td - Derived intrinsics ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Multiply 64-bit and use lower result
+//
+// Optimized with intrinisics accumulates
+//
+def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
+      (COMBINE_rr
+                  (Hexagon_M2_maci
+                           (Hexagon_M2_maci (EXTRACT_SUBREG  (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+                                                           (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
+                                       subreg_hireg),
+                                       (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+                                       (EXTRACT_SUBREG DoubleRegs:$src2, subreg_hireg)),
+                            (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg),
+                            (EXTRACT_SUBREG DoubleRegs:$src1, subreg_hireg)),
+                    (EXTRACT_SUBREG  (MPYU64 (EXTRACT_SUBREG DoubleRegs:$src1, subreg_loreg),
+                                      (EXTRACT_SUBREG DoubleRegs:$src2, subreg_loreg)),
+                     subreg_loreg))>;
+
+
+
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV3.td b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
new file mode 100644
index 000000000000..2a54e62d20ae
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV3.td
@@ -0,0 +1,50 @@
+//=- HexagonIntrinsicsV3.td - Target Description for Hexagon -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V3 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+
+
+// MTYPE / COMPLEX / Vector reduce complex multiply real or imaginary.
+def Hexagon_M2_vrcmpys_s1:
+  di_MInst_disi_s1_sat            <"vrcmpys",  int_hexagon_M2_vrcmpys_s1>;
+def Hexagon_M2_vrcmpys_acc_s1:
+  di_MInst_didisi_acc_s1_sat      <"vrcmpys",  int_hexagon_M2_vrcmpys_acc_s1>;
+def Hexagon_M2_vrcmpys_s1rp:
+  si_MInst_disi_s1_rnd_sat        <"vrcmpys",  int_hexagon_M2_vrcmpys_s1rp>;
+
+
+
+
+/********************************************************************
+*            MTYPE/VB                                               *
+*********************************************************************/
+
+// MTYPE / VB / Vector reduce add unsigned bytes.
+def Hexagon_M2_vradduh:
+  si_MInst_didi                   <"vradduh",  int_hexagon_M2_vradduh>;
+
+
+/********************************************************************
+*            ALU64/ALU                                              *
+*********************************************************************/
+
+// ALU64 / ALU / Add.
+def Hexagon_A2_addsp:
+  di_ALU64_sidi                   <"add",      int_hexagon_A2_addsp>;
+def Hexagon_A2_addpsat:
+  di_ALU64_didi                   <"add",      int_hexagon_A2_addpsat>;
+
+def Hexagon_A2_maxp:
+  di_ALU64_didi                   <"max",      int_hexagon_A2_maxp>;
+def Hexagon_A2_maxup:
+  di_ALU64_didi                   <"maxu",     int_hexagon_A2_maxup>;
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
new file mode 100644
index 000000000000..dd28ebb57231
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -0,0 +1,369 @@
+//===- HexagonIntrinsicsV4.td - V4 Instruction intrinsics --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is populated based on the following specs:
+// Hexagon V4 Architecture Extensions
+// Application-Level Specification
+// 80-V9418-12 Rev. A
+// June 15, 2010
+
+
+//
+// ALU 32 types.
+//
+
+class si_ALU32_sisi_not<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_s8si<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs DoubleRegs:$dst), (ins s8Imm:$src1, IntRegs:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "(#$src1, $src2)")),
+             [(set DoubleRegs:$dst, (IntID imm:$src1, IntRegs:$src2))]>;
+
+class di_ALU32_sis8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set DoubleRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_sisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_neg_ALU32_sis10<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, s10Imm:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class qi_neg_ALU32_siu9<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$src1, u9Imm:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_neg_ALU32_sisi<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class si_neg_ALU32_sis8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, #$src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+class si_ALU32_sis8<string opc, Intrinsic IntID>
+  : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1, s8Imm:$src2),
+             !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+//
+// SInst Classes.
+//
+class qi_neg_SInst_qiqi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+             !strconcat("$dst = !", !strconcat(opc , "($src1, $src2)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2))]>;
+
+class qi_SInst_qi_andqiqi_neg<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                     IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, and($src2, !$src3)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                         IntRegs:$src3))]>;
+
+class qi_SInst_qi_andqiqi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                     IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, and($src2, $src3)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                         IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi_neg<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                     IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, or($src2, !$src3)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                         IntRegs:$src3))]>;
+
+class qi_SInst_qi_orqiqi<string opc, Intrinsic IntID>
+  : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2,
+                                     IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, or($src2, $src3)")),
+             [(set PredRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                         IntRegs:$src3))]>;
+
+class si_SInst_si_addsis6<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s6Imm:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, add($src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                        imm:$src3))]>;
+
+class si_SInst_si_subs6si<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s6Imm:$src2, IntRegs:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, sub(#$src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2,
+                                        IntRegs:$src3))]>;
+
+class di_ALU64_didi_neg<string opc, Intrinsic IntID>
+  : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, DoubleRegs:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, ~$src2)")),
+          [(set DoubleRegs:$dst, (IntID DoubleRegs:$src1, DoubleRegs:$src2))]>;
+
+class di_MInst_dididi_xacc<string opc, Intrinsic IntID>
+  : MInst_acc<(outs DoubleRegs:$dst), (ins DoubleRegs:$dst2, DoubleRegs:$src1,
+                                           DoubleRegs:$src2),
+               !strconcat("$dst ^= ", !strconcat(opc , "($src1, $src2)")),
+               [(set DoubleRegs:$dst, (IntID DoubleRegs:$dst2, DoubleRegs:$src1,
+                                             DoubleRegs:$src2))],
+               "$dst2 = $dst">;
+
+class si_MInst_sisisi_and<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst &= ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_MInst_sisisi_andn<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst &= ", !strconcat(opc , "($src2, ~$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_SInst_sisis10_andi<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2, s10Imm:$src3),
+             !strconcat("$dst = ", !strconcat(opc ,
+                                              "($src1, and($src2, #$src3))")),
+             [(set IntRegs:$dst, (IntID IntRegs:$src1, IntRegs:$src2,
+                                        imm:$src3))]>;
+
+class si_MInst_sisisi_xor<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst ^= ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_MInst_sisisi_xorn<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst ^= ", !strconcat(opc , "($src2, ~$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_SInst_sisis10_or<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2, s10Imm:$src3),
+             !strconcat("$dst |= ", !strconcat(opc , "($src2, #$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        imm:$src3))]>;
+
+class si_MInst_sisisi_or<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst |= ", !strconcat(opc , "($src2, $src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_MInst_sisisi_orn<string opc, Intrinsic IntID>
+  : MInst<(outs IntRegs:$dst), (ins IntRegs:$dst1, IntRegs:$src2,
+                                    IntRegs:$src3),
+             !strconcat("$dst |= ", !strconcat(opc , "($src2, ~$src3)")),
+             [(set IntRegs:$dst, (IntID IntRegs:$dst1, IntRegs:$src2,
+                                        IntRegs:$src3))]>;
+
+class si_SInst_siu5_sat<string opc, Intrinsic IntID>
+  : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+          !strconcat("$dst = ", !strconcat(opc , "($src1, #$src2):sat")),
+          [(set IntRegs:$dst, (IntID IntRegs:$src1, imm:$src2))]>;
+
+
+/********************************************************************
+*            ALU32/ALU                                              *
+*********************************************************************/
+
+// ALU32 / ALU / Logical Operations.
+def Hexagon_A4_orn  : si_ALU32_sisi_not <"or",  int_hexagon_A4_orn>;
+def Hexagon_A4_andn : si_ALU32_sisi_not <"and", int_hexagon_A4_andn>;
+
+
+/********************************************************************
+*            ALU32/PERM                                             *
+*********************************************************************/
+
+// ALU32 / PERM / Combine Words Into Doublewords.
+def Hexagon_A4_combineir : di_ALU32_s8si  <"combine", int_hexagon_A4_combineir>;
+def Hexagon_A4_combineri : di_ALU32_sis8  <"combine", int_hexagon_A4_combineri>;
+
+
+/********************************************************************
+*            ALU32/PRED                                             *
+*********************************************************************/
+
+// ALU32 / PRED / Conditional Shift Halfword.
+// ALU32 / PRED / Conditional Sign Extend.
+// ALU32 / PRED / Conditional Zero Extend.
+// ALU32 / PRED / Compare.
+def Hexagon_C4_cmpneq  : qi_neg_ALU32_sisi  <"cmp.eq", int_hexagon_C4_cmpneq>;
+def Hexagon_C4_cmpneqi : qi_neg_ALU32_sis10 <"cmp.eq", int_hexagon_C4_cmpneqi>;
+def Hexagon_C4_cmplte  : qi_neg_ALU32_sisi  <"cmp.gt", int_hexagon_C4_cmplte>;
+def Hexagon_C4_cmpltei : qi_neg_ALU32_sis10 <"cmp.gt", int_hexagon_C4_cmpltei>;
+def Hexagon_C4_cmplteu : qi_neg_ALU32_sisi  <"cmp.gtu",int_hexagon_C4_cmplteu>;
+def Hexagon_C4_cmplteui: qi_neg_ALU32_siu9  <"cmp.gtu",int_hexagon_C4_cmplteui>;
+
+// ALU32 / PRED / cmpare To General Register.
+def Hexagon_A4_rcmpneq : si_neg_ALU32_sisi <"cmp.eq", int_hexagon_A4_rcmpneq>;
+def Hexagon_A4_rcmpneqi: si_neg_ALU32_sis8 <"cmp.eq", int_hexagon_A4_rcmpneqi>;
+def Hexagon_A4_rcmpeq  : si_ALU32_sisi     <"cmp.eq", int_hexagon_A4_rcmpeq>;
+def Hexagon_A4_rcmpeqi : si_ALU32_sis8     <"cmp.eq", int_hexagon_A4_rcmpeqi>;
+
+
+/********************************************************************
+*            CR                                                     *
+*********************************************************************/
+
+// CR / Corner Detection Acceleration.
+def Hexagon_C4_fastcorner9:
+  qi_SInst_qiqi<"fastcorner9", int_hexagon_C4_fastcorner9>;
+def Hexagon_C4_fastcorner9_not:
+  qi_neg_SInst_qiqi<"fastcorner9",int_hexagon_C4_fastcorner9_not>;
+
+// CR / Logical Operations On Predicates.
+def Hexagon_C4_and_andn:
+  qi_SInst_qi_andqiqi_neg         <"and",      int_hexagon_C4_and_andn>;
+def Hexagon_C4_and_and:
+  qi_SInst_qi_andqiqi             <"and",      int_hexagon_C4_and_and>;
+def Hexagon_C4_and_orn:
+  qi_SInst_qi_orqiqi_neg          <"and",      int_hexagon_C4_and_orn>;
+def Hexagon_C4_and_or:
+  qi_SInst_qi_orqiqi              <"and",      int_hexagon_C4_and_or>;
+def Hexagon_C4_or_andn:
+  qi_SInst_qi_andqiqi_neg         <"or",       int_hexagon_C4_or_andn>;
+def Hexagon_C4_or_and:
+  qi_SInst_qi_andqiqi             <"or",       int_hexagon_C4_or_and>;
+def Hexagon_C4_or_orn:
+  qi_SInst_qi_orqiqi_neg          <"or",       int_hexagon_C4_or_orn>;
+def Hexagon_C4_or_or:
+  qi_SInst_qi_orqiqi              <"or",       int_hexagon_C4_or_or>;
+
+
+/********************************************************************
+*            XTYPE/ALU                                              *
+*********************************************************************/
+
+// XTYPE / ALU / Add And Accumulate.
+def Hexagon_S4_addaddi:
+  si_SInst_si_addsis6             <"add",      int_hexagon_S4_addaddi>;
+def Hexagon_S4_subaddi:
+  si_SInst_si_subs6si             <"add",      int_hexagon_S4_subaddi>;
+
+// XTYPE / ALU / Logical Doublewords.
+def Hexagon_S4_andnp:
+  di_ALU64_didi_neg               <"and",      int_hexagon_A4_andnp>;
+def Hexagon_S4_ornp:
+  di_ALU64_didi_neg               <"or",       int_hexagon_A4_ornp>;
+
+// XTYPE / ALU / Logical-logical Doublewords.
+def Hexagon_M4_xor_xacc:
+  di_MInst_dididi_xacc            <"xor",      int_hexagon_M4_xor_xacc>;
+
+// XTYPE / ALU / Logical-logical Words.
+def HEXAGON_M4_and_and:
+  si_MInst_sisisi_and             <"and",      int_hexagon_M4_and_and>;
+def HEXAGON_M4_and_or:
+  si_MInst_sisisi_and             <"or",       int_hexagon_M4_and_or>;
+def HEXAGON_M4_and_xor:
+  si_MInst_sisisi_and             <"xor",      int_hexagon_M4_and_xor>;
+def HEXAGON_M4_and_andn:
+  si_MInst_sisisi_andn            <"and",      int_hexagon_M4_and_andn>;
+def HEXAGON_M4_xor_and:
+  si_MInst_sisisi_xor             <"and",      int_hexagon_M4_xor_and>;
+def HEXAGON_M4_xor_or:
+  si_MInst_sisisi_xor             <"or",       int_hexagon_M4_xor_or>;
+def HEXAGON_M4_xor_andn:
+  si_MInst_sisisi_xorn            <"and",      int_hexagon_M4_xor_andn>;
+def HEXAGON_M4_or_and:
+  si_MInst_sisisi_or              <"and",      int_hexagon_M4_or_and>;
+def HEXAGON_M4_or_or:
+  si_MInst_sisisi_or              <"or",       int_hexagon_M4_or_or>;
+def HEXAGON_M4_or_xor:
+  si_MInst_sisisi_or              <"xor",      int_hexagon_M4_or_xor>;
+def HEXAGON_M4_or_andn:
+  si_MInst_sisisi_orn             <"and",      int_hexagon_M4_or_andn>;
+def HEXAGON_S4_or_andix:
+  si_SInst_sisis10_andi           <"or",       int_hexagon_S4_or_andix>;
+def HEXAGON_S4_or_andi:
+  si_SInst_sisis10_or             <"and",      int_hexagon_S4_or_andi>;
+def HEXAGON_S4_or_ori:
+  si_SInst_sisis10_or             <"or",       int_hexagon_S4_or_ori>;
+
+// XTYPE / ALU / Modulo wrap.
+def HEXAGON_A4_modwrapu:
+  si_ALU64_sisi                   <"modwrap",  int_hexagon_A4_modwrapu>;
+
+// XTYPE / ALU / Round.
+def HEXAGON_A4_cround_ri:
+  si_SInst_siu5                   <"cround",   int_hexagon_A4_cround_ri>;
+def HEXAGON_A4_cround_rr:
+  si_SInst_sisi                   <"cround",   int_hexagon_A4_cround_rr>;
+def HEXAGON_A4_round_ri:
+  si_SInst_siu5                   <"round",    int_hexagon_A4_round_ri>;
+def HEXAGON_A4_round_rr:
+  si_SInst_sisi                   <"round",    int_hexagon_A4_round_rr>;
+def HEXAGON_A4_round_ri_sat:
+  si_SInst_siu5_sat               <"round",    int_hexagon_A4_round_ri_sat>;
+def HEXAGON_A4_round_rr_sat:
+  si_SInst_sisi_sat               <"round",    int_hexagon_A4_round_rr_sat>;
+
+// XTYPE / ALU / Vector reduce add unsigned halfwords.
+// XTYPE / ALU / Vector add bytes.
+// XTYPE / ALU / Vector conditional negate.
+// XTYPE / ALU / Vector maximum bytes.
+// XTYPE / ALU / Vector reduce maximum halfwords.
+// XTYPE / ALU / Vector reduce maximum words.
+// XTYPE / ALU / Vector minimum bytes.
+// XTYPE / ALU / Vector reduce minimum halfwords.
+// XTYPE / ALU / Vector reduce minimum words.
+// XTYPE / ALU / Vector subtract bytes.
+
+
+/********************************************************************
+*            XTYPE/BIT                                              *
+*********************************************************************/
+
+// XTYPE / BIT / Count leading.
+// XTYPE / BIT / Count trailing.
+// XTYPE / BIT / Extract bitfield.
+// XTYPE / BIT / Masked parity.
+// XTYPE / BIT / Bit reverse.
+// XTYPE / BIT / Split bitfield.
+
+
+/********************************************************************
+*            XTYPE/COMPLEX                                          *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
+// XTYPE / COMPLEX / Complex add/sub words.
+// XTYPE / COMPLEX / Complex multiply 32x16.
+// XTYPE / COMPLEX / Vector reduce complex rotate.
+
+
+/********************************************************************
+*            XTYPE/MPY                                              *
+*********************************************************************/
+
+// XTYPE / COMPLEX / Complex add/sub halfwords.
diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h
new file mode 100644
index 000000000000..16ea7cf6ed7f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMCInst.h
@@ -0,0 +1,41 @@
+//===- HexagonMCInst.h - Hexagon sub-class of MCInst ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class extends MCInst to allow some VLIW annotation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCINST_H
+#define HEXAGONMCINST_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/CodeGen/MachineInstr.h"
+
+namespace llvm {
+  class HexagonMCInst: public MCInst {
+    // Packet start and end markers
+    unsigned startPacket: 1, endPacket: 1;
+    const MachineInstr *MachineI;
+  public:
+    explicit HexagonMCInst(): MCInst(),
+                              startPacket(0), endPacket(0) {}
+
+    const MachineInstr* getMI() const { return MachineI; };
+
+    void setMI(const MachineInstr *MI) { MachineI = MI; };
+
+    bool isStartPacket() const { return (startPacket); };
+    bool isEndPacket() const { return (endPacket); };
+
+    void setStartPacket(bool yes) { startPacket = yes; };
+    void setEndPacket(bool yes) { endPacket = yes; };
+  };
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
new file mode 100644
index 000000000000..70bddcc76a59
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -0,0 +1,93 @@
+//===- HexagonMCInstLower.cpp - Convert Hexagon MachineInstr to an MCInst -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Hexagon MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
+                              HexagonAsmPrinter& Printer) {
+  MCContext &MC = Printer.OutContext;
+  const MCExpr *ME;
+
+  ME = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, MC);
+
+  if (!MO.isJTI() && MO.getOffset())
+    ME = MCBinaryExpr::CreateAdd(ME, MCConstantExpr::Create(MO.getOffset(), MC),
+                                 MC);
+
+  return (MCOperand::CreateExpr(ME));
+}
+
+// Create an MCInst from a MachineInstr
+void llvm::HexagonLowerToMC(const MachineInstr* MI, MCInst& MCI,
+                            HexagonAsmPrinter& AP) {
+  MCI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
+    const MachineOperand &MO = MI->getOperand(i);
+    MCOperand MCO;
+
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCO = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_FPImmediate: {
+      APFloat Val = MO.getFPImm()->getValueAPF();
+      // FP immediates are used only when setting GPRs, so they may be dealt
+      // with like regular immediates from this point on.
+      MCO = MCOperand::CreateImm(*Val.bitcastToAPInt().getRawData());
+      break;
+    }
+    case MachineOperand::MO_Immediate:
+      MCO = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCO = MCOperand::CreateExpr
+              (MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(),
+               AP.OutContext));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCO = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCO = GetSymbolRef(MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()),
+                         AP);
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCO = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCO = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCO = GetSymbolRef(MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
+      break;
+    }
+
+    MCI.addOperand(MCO);
+  }
+}
diff --git a/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
new file mode 100644
index 000000000000..0318c519e453
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -0,0 +1,75 @@
+//=- HexagonMachineFuctionInfo.h - Hexagon machine function info --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMACHINEFUNCTIONINFO_H
+#define HexagonMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+  namespace Hexagon {
+    const unsigned int StartPacket = 0x1;
+    const unsigned int EndPacket = 0x2;
+  }
+
+
+/// Hexagon target-specific information for each MachineFunction.
+class HexagonMachineFunctionInfo : public MachineFunctionInfo {
+  // SRetReturnReg - Some subtargets require that sret lowering includes
+  // returning the value of the returned struct in a register. This field
+  // holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+  std::vector<MachineInstr*> AllocaAdjustInsts;
+  int VarArgsFrameIndex;
+  bool HasClobberLR;
+
+  std::map<const MachineInstr*, unsigned> PacketInfo;
+
+
+public:
+  HexagonMachineFunctionInfo() : SRetReturnReg(0), HasClobberLR(0) {}
+
+  HexagonMachineFunctionInfo(MachineFunction &MF) : SRetReturnReg(0),
+                                                    HasClobberLR(0) {}
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+  void addAllocaAdjustInst(MachineInstr* MI) {
+    AllocaAdjustInsts.push_back(MI);
+  }
+  const std::vector<MachineInstr*>& getAllocaAdjustInsts() {
+    return AllocaAdjustInsts;
+  }
+
+  void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
+  int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
+
+  void setStartPacket(MachineInstr* MI) {
+    PacketInfo[MI] |= Hexagon::StartPacket;
+  }
+  void setEndPacket(MachineInstr* MI)   {
+    PacketInfo[MI] |= Hexagon::EndPacket;
+  }
+  bool isStartPacket(const MachineInstr* MI) const {
+    return (PacketInfo.count(MI) &&
+            (PacketInfo.find(MI)->second & Hexagon::StartPacket));
+  }
+  bool isEndPacket(const MachineInstr* MI) const {
+    return (PacketInfo.count(MI) &&
+            (PacketInfo.find(MI)->second & Hexagon::EndPacket));
+  }
+  void setHasClobberLR(bool v) { HasClobberLR = v;  }
+  bool hasClobberLR() const { return HasClobberLR; }
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
new file mode 100644
index 000000000000..55cbc094a2ad
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -0,0 +1,288 @@
+//===-- HexagonPeephole.cpp - Hexagon Peephole Optimiztions ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This peephole pass optimizes in the following cases.
+// 1. Optimizes redundant sign extends for the following case
+//    Transform the following pattern
+//    %vreg170<def> = SXTW %vreg166
+//    ...
+//    %vreg176<def> = COPY %vreg170:subreg_loreg
+//
+//    Into
+//    %vreg176<def> = COPY vreg166
+//
+//  2. Optimizes redundant negation of predicates.
+//     %vreg15<def> = CMPGTrr %vreg6, %vreg2
+//     ...
+//     %vreg16<def> = NOT_p %vreg15<kill>
+//     ...
+//     JMP_c %vreg16<kill>, <BB#1>, %PC<imp-def,dead>
+//
+//     Into
+//     %vreg15<def> = CMPGTrr %vreg6, %vreg2;
+//     ...
+//     JMP_cNot %vreg15<kill>, <BB#1>, %PC<imp-def,dead>;
+//
+// Note: The peephole pass makes the instrucstions like
+// %vreg170<def> = SXTW %vreg166 or %vreg16<def> = NOT_p %vreg15<kill>
+// redundant and relies on some form of dead removal instrucions, like
+// DCE or DIE to actually eliminate them.
+
+
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-peephole"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include <algorithm>
+
+using namespace llvm;
+
+static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Peephole Optimization"));
+
+static cl::opt<int>
+DbgPNPCount("pnp-count", cl::init(-1), cl::Hidden,
+  cl::desc("Maximum number of P=NOT(P) to be optimized"));
+
+static cl::opt<bool> DisablePNotP("disable-hexagon-pnotp",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Optimization of PNotP"));
+
+static cl::opt<bool> DisableOptSZExt("disable-hexagon-optszext",
+    cl::Hidden, cl::ZeroOrMore, cl::init(false),
+    cl::desc("Disable Optimization of Sign/Zero Extends"));
+
+namespace {
+  struct HexagonPeephole : public MachineFunctionPass {
+    const HexagonInstrInfo    *QII;
+    const HexagonRegisterInfo *QRI;
+    const MachineRegisterInfo *MRI;
+
+  public:
+    static char ID;
+    HexagonPeephole() : MachineFunctionPass(ID) { }
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const {
+      return "Hexagon optimize redundant zero and size extends";
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    void ChangeOpInto(MachineOperand &Dst, MachineOperand &Src);
+  };
+}
+
+char HexagonPeephole::ID = 0;
+
+bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
+
+  QII = static_cast<const HexagonInstrInfo *>(MF.getTarget().
+                                        getInstrInfo());
+  QRI = static_cast<const HexagonRegisterInfo *>(MF.getTarget().
+                                       getRegisterInfo());
+  MRI = &MF.getRegInfo();
+
+  DenseMap<unsigned, unsigned> PeepholeMap;
+
+  if (DisableHexagonPeephole) return false;
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock* MBB = MBBb;
+    PeepholeMap.clear();
+
+    // Traverse the basic block.
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+                                     ++MII) {
+      MachineInstr *MI = MII;
+      // Look for sign extends:
+      // %vreg170<def> = SXTW %vreg166
+      if (!DisableOptSZExt && MI->getOpcode() == Hexagon::SXTW) {
+        assert (MI->getNumOperands() == 2);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src  = MI->getOperand(1);
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src.getReg();
+        // Just handle virtual registers.
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+          // Map the following:
+          // %vreg170<def> = SXTW %vreg166
+          // PeepholeMap[170] = vreg166
+          PeepholeMap[DstReg] = SrcReg;
+        }
+      }
+
+      // Look for P=NOT(P).
+      if (!DisablePNotP &&
+          (MI->getOpcode() == Hexagon::NOT_p)) {
+        assert (MI->getNumOperands() == 2);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src  = MI->getOperand(1);
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src.getReg();
+        // Just handle virtual registers.
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+          // Map the following:
+          // %vreg170<def> = NOT_xx %vreg166
+          // PeepholeMap[170] = vreg166
+          PeepholeMap[DstReg] = SrcReg;
+        }
+      }
+
+      // Look for copy:
+      // %vreg176<def> = COPY %vreg170:subreg_loreg
+      if (!DisableOptSZExt && MI->isCopy()) {
+        assert (MI->getNumOperands() == 2);
+        MachineOperand &Dst = MI->getOperand(0);
+        MachineOperand &Src  = MI->getOperand(1);
+
+        // Make sure we are copying the lower 32 bits.
+        if (Src.getSubReg() != Hexagon::subreg_loreg)
+          continue;
+
+        unsigned DstReg = Dst.getReg();
+        unsigned SrcReg = Src.getReg();
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+          // Try to find in the map.
+          if (unsigned PeepholeSrc = PeepholeMap.lookup(SrcReg)) {
+            // Change the 1st operand.
+            MI->RemoveOperand(1);
+            MI->addOperand(MachineOperand::CreateReg(PeepholeSrc, false));
+          }
+        }
+      }
+
+      // Look for Predicated instructions.
+      if (!DisablePNotP) {
+        bool Done = false;
+        if (QII->isPredicated(MI)) {
+          MachineOperand &Op0 = MI->getOperand(0);
+          unsigned Reg0 = Op0.getReg();
+          const TargetRegisterClass *RC0 = MRI->getRegClass(Reg0);
+          if (RC0->getID() == Hexagon::PredRegsRegClassID) {
+            // Handle instructions that have a prediate register in op0
+            // (most cases of predicable instructions).
+            if (TargetRegisterInfo::isVirtualRegister(Reg0)) {
+              // Try to find in the map.
+              if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
+                // Change the 1st operand and, flip the opcode.
+                MI->getOperand(0).setReg(PeepholeSrc);
+                int NewOp = QII->getInvertedPredicatedOpcode(MI->getOpcode());
+                MI->setDesc(QII->get(NewOp));
+                Done = true;
+              }
+            }
+          }
+        }
+
+        if (!Done) {
+          // Handle special instructions.
+          unsigned Op = MI->getOpcode();
+          unsigned NewOp = 0;
+          unsigned PR = 1, S1 = 2, S2 = 3;   // Operand indices.
+
+          switch (Op) {
+            case Hexagon::TFR_condset_rr:
+            case Hexagon::TFR_condset_ii:
+            case Hexagon::MUX_ii:
+            case Hexagon::MUX_rr:
+              NewOp = Op;
+              break;
+            case Hexagon::TFR_condset_ri:
+              NewOp = Hexagon::TFR_condset_ir;
+              break;
+            case Hexagon::TFR_condset_ir:
+              NewOp = Hexagon::TFR_condset_ri;
+              break;
+            case Hexagon::MUX_ri:
+              NewOp = Hexagon::MUX_ir;
+              break;
+            case Hexagon::MUX_ir:
+              NewOp = Hexagon::MUX_ri;
+              break;
+          }
+          if (NewOp) {
+            unsigned PSrc = MI->getOperand(PR).getReg();
+            if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
+              MI->getOperand(PR).setReg(POrig);
+              MI->setDesc(QII->get(NewOp));
+              // Swap operands S1 and S2.
+              MachineOperand Op1 = MI->getOperand(S1);
+              MachineOperand Op2 = MI->getOperand(S2);
+              ChangeOpInto(MI->getOperand(S1), Op2);
+              ChangeOpInto(MI->getOperand(S2), Op1);
+            }
+          } // if (NewOp)
+        } // if (!Done)
+
+      } // if (!DisablePNotP)
+
+    } // Instruction
+  } // Basic Block
+  return true;
+}
+
+void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) {
+  assert (&Dst != &Src && "Cannot duplicate into itself");
+  switch (Dst.getType()) {
+    case MachineOperand::MO_Register:
+      if (Src.isReg()) {
+        Dst.setReg(Src.getReg());
+      } else if (Src.isImm()) {
+        Dst.ChangeToImmediate(Src.getImm());
+      } else {
+        llvm_unreachable("Unexpected src operand type");
+      }
+      break;
+
+    case MachineOperand::MO_Immediate:
+      if (Src.isImm()) {
+        Dst.setImm(Src.getImm());
+      } else if (Src.isReg()) {
+        Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(),
+                             Src.isKill(), Src.isDead(), Src.isUndef(),
+                             Src.isDebug());
+      } else {
+        llvm_unreachable("Unexpected src operand type");
+      }
+      break;
+
+    default:
+      llvm_unreachable("Unexpected dst operand type");
+      break;
+  }
+}
+
+FunctionPass *llvm::createHexagonPeephole() {
+  return new HexagonPeephole();
+}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
new file mode 100644
index 000000000000..2a9de9232915
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -0,0 +1,315 @@
+//===-- HexagonRegisterInfo.cpp - Hexagon Register Information ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRegisterInfo.h"
+#include "Hexagon.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+
+HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st,
+                                     const HexagonInstrInfo &tii)
+  : HexagonGenRegisterInfo(Hexagon::R31),
+    Subtarget(st),
+   TII(tii) {
+}
+
+const uint16_t* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction
+                                                        *MF)
+  const {
+  static const uint16_t CalleeSavedRegsV2[] = {
+    Hexagon::R24,   Hexagon::R25,   Hexagon::R26,   Hexagon::R27, 0
+  };
+  static const uint16_t CalleeSavedRegsV3[] = {
+    Hexagon::R16,   Hexagon::R17,   Hexagon::R18,   Hexagon::R19,
+    Hexagon::R20,   Hexagon::R21,   Hexagon::R22,   Hexagon::R23,
+    Hexagon::R24,   Hexagon::R25,   Hexagon::R26,   Hexagon::R27, 0
+  };
+
+  switch(Subtarget.getHexagonArchVersion()) {
+  case HexagonSubtarget::V1:
+    break;
+  case HexagonSubtarget::V2:
+    return CalleeSavedRegsV2;
+  case HexagonSubtarget::V3:
+  case HexagonSubtarget::V4:
+    return CalleeSavedRegsV3;
+  }
+  llvm_unreachable("Callee saved registers requested for unknown architecture "
+                   "version");
+}
+
+BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
+  const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(HEXAGON_RESERVED_REG_1);
+  Reserved.set(HEXAGON_RESERVED_REG_2);
+  Reserved.set(Hexagon::R29);
+  Reserved.set(Hexagon::R30);
+  Reserved.set(Hexagon::R31);
+  Reserved.set(Hexagon::D14);
+  Reserved.set(Hexagon::D15);
+  Reserved.set(Hexagon::LC0);
+  Reserved.set(Hexagon::LC1);
+  Reserved.set(Hexagon::SA0);
+  Reserved.set(Hexagon::SA1);
+  return Reserved;
+}
+
+
+const TargetRegisterClass* const*
+HexagonRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+  static const TargetRegisterClass * const CalleeSavedRegClassesV2[] = {
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    };
+  static const TargetRegisterClass * const CalleeSavedRegClassesV3[] = {
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+    &Hexagon::IntRegsRegClass,     &Hexagon::IntRegsRegClass,
+  };
+
+  switch(Subtarget.getHexagonArchVersion()) {
+  case HexagonSubtarget::V1:
+    break;
+  case HexagonSubtarget::V2:
+    return CalleeSavedRegClassesV2;
+  case HexagonSubtarget::V3:
+  case HexagonSubtarget::V4:
+    return CalleeSavedRegClassesV3;
+  }
+  llvm_unreachable("Callee saved register classes requested for unknown "
+                   "architecture version");
+}
+
+void HexagonRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  MachineInstr &MI = *I;
+
+  if (MI.getOpcode() == Hexagon::ADJCALLSTACKDOWN) {
+    // Hexagon_TODO: add code
+  } else if (MI.getOpcode() == Hexagon::ADJCALLSTACKUP) {
+    // Hexagon_TODO: add code
+  } else {
+    llvm_unreachable("Cannot handle this call frame pseudo instruction");
+  }
+  MBB.erase(I);
+}
+
+void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                            int SPAdj, RegScavenger *RS) const {
+
+  //
+  // Hexagon_TODO: Do we need to enforce this for Hexagon?
+  assert(SPAdj == 0 && "Unexpected");
+
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  // Addressable stack objects are accessed using neg. offsets from %fp.
+  MachineFunction &MF = *MI.getParent()->getParent();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  unsigned FrameReg = getFrameRegister(MF);
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  if (!TFI->hasFP(MF)) {
+    // We will not reserve space on the stack for the lr and fp registers.
+    Offset -= 2 * Hexagon_WordSize;
+  }
+
+  const unsigned FrameSize = MFI.getStackSize();
+
+  if (!MFI.hasVarSizedObjects() &&
+      TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset)) &&
+      !TII.isSpillPredRegOp(&MI)) {
+    // Replace frame index with a stack pointer reference.
+    MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false, true);
+    MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+  } else {
+    // Replace frame index with a frame pointer reference.
+    if (!TII.isValidOffset(MI.getOpcode(), Offset)) {
+
+      // If the offset overflows, then correct it.
+      //
+      // For loads, we do not need a reserved register
+      // r0 = memw(r30 + #10000) to:
+      //
+      // r0 = add(r30, #10000)
+      // r0 = memw(r0)
+      if ( (MI.getOpcode() == Hexagon::LDriw)  ||
+           (MI.getOpcode() == Hexagon::LDrid) ||
+           (MI.getOpcode() == Hexagon::LDrih) ||
+           (MI.getOpcode() == Hexagon::LDriuh) ||
+           (MI.getOpcode() == Hexagon::LDrib) ||
+           (MI.getOpcode() == Hexagon::LDriub) ) {
+        unsigned dstReg = (MI.getOpcode() == Hexagon::LDrid) ?
+          *getSubRegisters(MI.getOperand(0).getReg()) :
+          MI.getOperand(0).getReg();
+
+        // Check if offset can fit in addi.
+        if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_rr),
+                  dstReg).addReg(FrameReg).addReg(dstReg);
+        } else {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_ri),
+                  dstReg).addReg(FrameReg).addImm(Offset);
+        }
+
+        MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
+        MI.getOperand(i+1).ChangeToImmediate(0);
+      } else if ((MI.getOpcode() == Hexagon::STriw) ||
+                 (MI.getOpcode() == Hexagon::STrid) ||
+                 (MI.getOpcode() == Hexagon::STrih) ||
+                 (MI.getOpcode() == Hexagon::STrib)) {
+        // For stores, we need a reserved register. Change
+        // memw(r30 + #10000) = r0 to:
+        //
+        // rs = add(r30, #10000);
+        // memw(rs) = r0
+        unsigned resReg = HEXAGON_RESERVED_REG_1;
+
+        // Check if offset can fit in addi.
+        if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_rr),
+                  resReg).addReg(FrameReg).addReg(resReg);
+        } else {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_ri),
+                  resReg).addReg(FrameReg).addImm(Offset);
+        }
+        MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+        MI.getOperand(i+1).ChangeToImmediate(0);
+      } else if (TII.isMemOp(&MI)) {
+        unsigned resReg = HEXAGON_RESERVED_REG_1;
+        if (!MFI.hasVarSizedObjects() &&
+            TII.isValidOffset(MI.getOpcode(), (FrameSize+Offset))) {
+          MI.getOperand(i).ChangeToRegister(getStackRegister(), false, false,
+                                            true);
+          MI.getOperand(i+1).ChangeToImmediate(FrameSize+Offset);
+        } else if (!TII.isValidOffset(Hexagon::ADD_ri, Offset)) {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::CONST32_Int_Real), resReg).addImm(Offset);
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_rr),
+                  resReg).addReg(FrameReg).addReg(resReg);
+          MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+          MI.getOperand(i+1).ChangeToImmediate(0);
+        } else {
+          BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                  TII.get(Hexagon::ADD_ri),
+                  resReg).addReg(FrameReg).addImm(Offset);
+          MI.getOperand(i).ChangeToRegister(resReg, false, false, true);
+          MI.getOperand(i+1).ChangeToImmediate(0);
+        }
+      } else {
+        unsigned dstReg = MI.getOperand(0).getReg();
+        BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                TII.get(Hexagon::CONST32_Int_Real), dstReg).addImm(Offset);
+        BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
+                TII.get(Hexagon::ADD_rr),
+                dstReg).addReg(FrameReg).addReg(dstReg);
+        // Can we delete MI??? r2 = add (r2, #0).
+        MI.getOperand(i).ChangeToRegister(dstReg, false, false, true);
+        MI.getOperand(i+1).ChangeToImmediate(0);
+      }
+    } else {
+      // If the offset is small enough to fit in the immediate field, directly
+      // encode it.
+      MI.getOperand(i).ChangeToRegister(FrameReg, false);
+      MI.getOperand(i+1).ChangeToImmediate(Offset);
+    }
+  }
+
+}
+
+unsigned HexagonRegisterInfo::getRARegister() const {
+  return Hexagon::R31;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
+                                               &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  if (TFI->hasFP(MF)) {
+    return Hexagon::R30;
+  }
+
+  return Hexagon::R29;
+}
+
+unsigned HexagonRegisterInfo::getFrameRegister() const {
+  return Hexagon::R30;
+}
+
+unsigned HexagonRegisterInfo::getStackRegister() const {
+  return Hexagon::R29;
+}
+
+void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
+                                               &Moves)  const
+{
+  // VirtualFP = (R30 + #0).
+  unsigned FPReg = getFrameRegister();
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(FPReg, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+}
+
+unsigned HexagonRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+}
+
+#define GET_REGINFO_TARGET_DESC
+#include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
new file mode 100644
index 000000000000..6cf727bc027d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -0,0 +1,90 @@
+//==- HexagonRegisterInfo.h - Hexagon Register Information Impl --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Hexagon implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonREGISTERINFO_H
+#define HexagonREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/MC/MachineLocation.h"
+
+#define GET_REGINFO_HEADER
+#include "HexagonGenRegisterInfo.inc"
+
+//
+//  We try not to hard code the reserved registers in our code,
+//  so the following two macros were defined. However, there
+//  are still a few places that R11 and R10 are hard wired.
+//  See below. If, in the future, we decided to change the reserved
+//  register. Don't forget changing the following places.
+//
+//  1. the "Defs" set of STriw_pred in HexagonInstrInfo.td
+//  2. the "Defs" set of LDri_pred in HexagonInstrInfo.td
+//  3. the definition of "IntRegs" in HexagonRegisterInfo.td
+//  4. the definition of "DoubleRegs" in HexagonRegisterInfo.td
+//
+#define HEXAGON_RESERVED_REG_1 Hexagon::R10
+#define HEXAGON_RESERVED_REG_2 Hexagon::R11
+
+namespace llvm {
+
+class HexagonSubtarget;
+class HexagonInstrInfo;
+class Type;
+
+struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
+  HexagonSubtarget &Subtarget;
+  const HexagonInstrInfo &TII;
+
+  HexagonRegisterInfo(HexagonSubtarget &st, const HexagonInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  const TargetRegisterClass* const* getCalleeSavedRegClasses(
+                                     const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  /// determineFrameLayout - Determine the size of the frame and maximum call
+  /// frame size.
+  void determineFrameLayout(MachineFunction &MF) const;
+
+  /// requiresRegisterScavenging - returns true since we may need scavenging for
+  /// a temporary register when generating hardware loop instructions.
+  bool requiresRegisterScavenging(const MachineFunction &MF) const {
+    return true;
+  }
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+  unsigned getFrameRegister() const;
+  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+  unsigned getStackRegister() const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
new file mode 100644
index 000000000000..d44eae3602c9
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -0,0 +1,167 @@
+//===-- HexagonRegisterInfo.td - Hexagon Register defs -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the Hexagon register file.
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Hexagon" in {
+
+  class HexagonReg<string n> : Register<n> {
+    field bits<5> Num;
+  }
+
+  class HexagonDoubleReg<string n, list<Register> subregs> :
+        RegisterWithSubRegs<n, subregs> {
+    field bits<5> Num;
+  }
+
+  // Registers are identified with 5-bit ID numbers.
+  // Ri - 32-bit integer registers.
+  class Ri<bits<5> num, string n> : HexagonReg<n> {
+    let Num = num;
+  }
+
+  // Rf - 32-bit floating-point registers.
+  class Rf<bits<5> num, string n> : HexagonReg<n> {
+    let Num = num;
+  }
+
+
+  // Rd - 64-bit registers.
+  class Rd<bits<5> num, string n, list<Register> subregs> :
+        HexagonDoubleReg<n, subregs> {
+    let Num = num;
+    let SubRegs = subregs;
+  }
+
+  // Rp - predicate registers
+  class Rp<bits<5> num, string n> : HexagonReg<n> {
+    let Num = num;
+  }
+
+  // Rc - control registers
+  class Rc<bits<5> num, string n> : HexagonReg<n> {
+    let Num = num;
+  }
+
+  // Rj - aliased integer registers
+  class Rj<string n, Ri R>: HexagonReg<n> {
+    let Num = R.Num;
+    let Aliases = [R];
+  }
+
+  def subreg_loreg  : SubRegIndex;
+  def subreg_hireg  : SubRegIndex;
+
+  // Integer registers.
+  def R0 : Ri< 0, "r0">, DwarfRegNum<[0]>;
+  def R1 : Ri< 1, "r1">, DwarfRegNum<[1]>;
+  def R2 : Ri< 2, "r2">, DwarfRegNum<[2]>;
+  def R3 : Ri< 3, "r3">, DwarfRegNum<[3]>;
+  def R4 : Ri< 4, "r4">, DwarfRegNum<[4]>;
+  def R5 : Ri< 5, "r5">, DwarfRegNum<[5]>;
+  def R6 : Ri< 6, "r6">, DwarfRegNum<[6]>;
+  def R7 : Ri< 7, "r7">, DwarfRegNum<[7]>;
+  def R8 : Ri< 8, "r8">, DwarfRegNum<[8]>;
+  def R9 : Ri< 9, "r9">, DwarfRegNum<[9]>;
+  def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+  def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+  def R12 : Ri<12, "r12">, DwarfRegNum<[12]>;
+  def R13 : Ri<13, "r13">, DwarfRegNum<[13]>;
+  def R14 : Ri<14, "r14">, DwarfRegNum<[14]>;
+  def R15 : Ri<15, "r15">, DwarfRegNum<[15]>;
+  def R16 : Ri<16, "r16">, DwarfRegNum<[16]>;
+  def R17 : Ri<17, "r17">, DwarfRegNum<[17]>;
+  def R18 : Ri<18, "r18">, DwarfRegNum<[18]>;
+  def R19 : Ri<19, "r19">, DwarfRegNum<[19]>;
+  def R20 : Ri<20, "r20">, DwarfRegNum<[20]>;
+  def R21 : Ri<21, "r21">, DwarfRegNum<[21]>;
+  def R22 : Ri<22, "r22">, DwarfRegNum<[22]>;
+  def R23 : Ri<23, "r23">, DwarfRegNum<[23]>;
+  def R24 : Ri<24, "r24">, DwarfRegNum<[24]>;
+  def R25 : Ri<25, "r25">, DwarfRegNum<[25]>;
+  def R26 : Ri<26, "r26">, DwarfRegNum<[26]>;
+  def R27 : Ri<27, "r27">, DwarfRegNum<[27]>;
+  def R28 : Ri<28, "r28">, DwarfRegNum<[28]>;
+  def R29 : Ri<29, "r29">, DwarfRegNum<[29]>;
+  def R30 : Ri<30, "r30">, DwarfRegNum<[30]>;
+  def R31 : Ri<31, "r31">, DwarfRegNum<[31]>;
+
+  def SP : Rj<"sp", R29>, DwarfRegNum<[29]>;
+  def FP : Rj<"fp", R30>, DwarfRegNum<[30]>;
+  def LR : Rj<"lr", R31>, DwarfRegNum<[31]>;
+
+  // Aliases of the R* registers used to hold 64-bit int values (doubles).
+  let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+  def D0  : Rd< 0,  "r1:0",  [R0,  R1]>, DwarfRegNum<[32]>;
+  def D1  : Rd< 2,  "r3:2",  [R2,  R3]>, DwarfRegNum<[34]>;
+  def D2  : Rd< 4,  "r5:4",  [R4,  R5]>, DwarfRegNum<[36]>;
+  def D3  : Rd< 6,  "r7:6",  [R6,  R7]>, DwarfRegNum<[38]>;
+  def D4  : Rd< 8,  "r9:8",  [R8,  R9]>, DwarfRegNum<[40]>;
+  def D5  : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>;
+  def D6  : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>;
+  def D7  : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>;
+  def D8  : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>;
+  def D9  : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>;
+  def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>;
+  def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>;
+  def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>;
+  def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>;
+  def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>;
+  def D15 : Rd<30, "r31:30", [R30, R31]>, DwarfRegNum<[62]>;
+  }
+
+  // Predicate registers.
+  def P0 : Rp<0, "p0">, DwarfRegNum<[63]>;
+  def P1 : Rp<1, "p1">, DwarfRegNum<[64]>;
+  def P2 : Rp<2, "p2">, DwarfRegNum<[65]>;
+  def P3 : Rp<3, "p3">, DwarfRegNum<[66]>;
+
+  // Control registers.
+  def SA0 : Rc<0, "sa0">, DwarfRegNum<[67]>;
+  def LC0 : Rc<1, "lc0">, DwarfRegNum<[68]>;
+
+  def SA1 : Rc<2, "sa1">, DwarfRegNum<[69]>;
+  def LC1 : Rc<3, "lc1">, DwarfRegNum<[70]>;
+
+  def PC : Rc<9,  "pc">, DwarfRegNum<[32]>; // is the Dwarf number correct?
+  def GP : Rc<11, "gp">, DwarfRegNum<[33]>; // is the Dwarf number correct?
+}
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"Hexagon", [i32], 32,
+                            (add (sequence "R%u", 0, 9),
+                                 (sequence "R%u", 12, 28),
+                                 R10, R11, R29, R30, R31)> {
+}
+
+
+
+def DoubleRegs : RegisterClass<"Hexagon", [i64], 64,
+                               (add (sequence "D%u", 0, 4),
+                                    (sequence "D%u", 6, 13), D5, D14, D15)> {
+  let SubRegClasses = [(IntRegs subreg_loreg, subreg_hireg)];
+}
+
+
+def PredRegs : RegisterClass<"Hexagon", [i1], 32, (add (sequence "P%u", 0, 3))>
+{
+  let Size = 32;
+}
+
+def CRRegs : RegisterClass<"Hexagon", [i32], 32,
+                           (add (sequence "LC%u", 0, 1),
+                                (sequence "SA%u", 0, 1), PC, GP)> {
+  let Size = 32;
+}
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
new file mode 100644
index 000000000000..66a00e12dd09
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -0,0 +1,82 @@
+//===- HexagonRemoveExtendArgs.cpp - Remove unecessary argument sign extends =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+namespace {
+  struct HexagonRemoveExtendArgs : public FunctionPass {
+  public:
+    static char ID;
+    HexagonRemoveExtendArgs() : FunctionPass(ID) {}
+    virtual bool runOnFunction(Function &F);
+
+    const char *getPassName() const {
+      return "Remove sign extends";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineFunctionAnalysis>();
+      AU.addPreserved<MachineFunctionAnalysis>();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+char HexagonRemoveExtendArgs::ID = 0;
+RegisterPass<HexagonRemoveExtendArgs> X("reargs",
+                                        "Remove Sign and Zero Extends for Args"
+                                        );
+
+
+
+bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
+  unsigned Idx = 1;
+  for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
+       ++AI, ++Idx) {
+    if (F.paramHasAttr(Idx, Attribute::SExt)) {
+      Argument* Arg = AI;
+      if (!isa<PointerType>(Arg->getType())) {
+        for (Instruction::use_iterator UI = Arg->use_begin();
+             UI != Arg->use_end();) {
+          if (isa<SExtInst>(*UI)) {
+            Instruction* Use = cast<Instruction>(*UI);
+            SExtInst* SI = new SExtInst(Arg, Use->getType());
+            assert (EVT::getEVT(SI->getType()) ==
+                    (EVT::getEVT(Use->getType())));
+            ++UI;
+            Use->replaceAllUsesWith(SI);
+            Instruction* First = F.getEntryBlock().begin();
+            SI->insertBefore(First);
+            Use->eraseFromParent();
+          } else {
+            ++UI;
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+
+
+FunctionPass *llvm::createHexagonRemoveExtendOps(HexagonTargetMachine &TM) {
+  return new HexagonRemoveExtendArgs();
+}
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
new file mode 100644
index 000000000000..c4887963895c
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -0,0 +1,54 @@
+//===- HexagonSchedule.td - Hexagon Scheduling Definitions -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Functional Units
+def LUNIT     : FuncUnit;
+def LSUNIT    : FuncUnit;
+def MUNIT     : FuncUnit;
+def SUNIT     : FuncUnit;
+
+// Itinerary classes
+def ALU32     : InstrItinClass;
+def ALU64     : InstrItinClass;
+def CR        : InstrItinClass;
+def J         : InstrItinClass;
+def JR        : InstrItinClass;
+def LD        : InstrItinClass;
+def M         : InstrItinClass;
+def ST        : InstrItinClass;
+def S         : InstrItinClass;
+def SYS       : InstrItinClass;
+def MARKER    : InstrItinClass;
+def PSEUDO    : InstrItinClass;
+
+def HexagonItineraries :
+      ProcessorItineraries<[LUNIT, LSUNIT, MUNIT, SUNIT], [], [
+        InstrItinData<ALU32  , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+        InstrItinData<ALU64  , [InstrStage<1, [MUNIT, SUNIT]>]>,
+        InstrItinData<CR     , [InstrStage<1, [SUNIT]>]>,
+        InstrItinData<J      , [InstrStage<1, [SUNIT, MUNIT]>]>,
+        InstrItinData<JR     , [InstrStage<1, [MUNIT]>]>,
+        InstrItinData<LD     , [InstrStage<1, [LUNIT, LSUNIT]>]>,
+        InstrItinData<M      , [InstrStage<1, [MUNIT, SUNIT]>]>,
+        InstrItinData<ST     , [InstrStage<1, [LSUNIT]>]>,
+        InstrItinData<S      , [InstrStage<1, [SUNIT, MUNIT]>]>,
+        InstrItinData<SYS    , [InstrStage<1, [LSUNIT]>]>,
+        InstrItinData<MARKER , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
+        InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>
+      ]>;
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info +
+//===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV4.td"
+
+//===----------------------------------------------------------------------===//
+// V4 Machine Info -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
new file mode 100644
index 000000000000..1d82dbb90e91
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -0,0 +1,59 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+//    |===========|==================================================|
+//    | PIPELINE  |              Instruction Classes                 |
+//    |===========|==================================================|
+//    | SLOT0     |  LD       ST    ALU32     MEMOP     NV    SYSTEM |
+//    |-----------|--------------------------------------------------|
+//    | SLOT1     |  LD       ST    ALU32                            |
+//    |-----------|--------------------------------------------------|
+//    | SLOT2     |  XTYPE          ALU32     J         JR           |
+//    |-----------|--------------------------------------------------|
+//    | SLOT3     |  XTYPE          ALU32     J         CR           |
+//    |===========|==================================================|
+
+// Functional Units.
+def SLOT0       : FuncUnit;
+def SLOT1       : FuncUnit;
+def SLOT2       : FuncUnit;
+def SLOT3       : FuncUnit;
+
+// Itinerary classes.
+def NV_V4       : InstrItinClass;
+def MEM_V4      : InstrItinClass;
+// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
+def PREFIX      : InstrItinClass;
+
+def HexagonItinerariesV4 :
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3], [], [
+        InstrItinData<ALU32  , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64  , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<CR     , [InstrStage<1, [SLOT3]>]>,
+        InstrItinData<J      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<JR     , [InstrStage<1, [SLOT2]>]>,
+        InstrItinData<LD     , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<M      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<ST     , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<S      , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<SYS    , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<NV_V4  , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<MARKER , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>
+      ]>;
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonSelectCCInfo.td b/lib/Target/Hexagon/HexagonSelectCCInfo.td
new file mode 100644
index 000000000000..d8feb89c0ab5
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectCCInfo.td
@@ -0,0 +1,121 @@
+//===-- HexagoSelectCCInfo.td - Selectcc mappings ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+//
+// selectcc mappings.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETEQ)),
+      (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETNE)),
+      (i32 (MUX_rr (i1 (NOT_p (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETGT)),
+      (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETUGT)),
+      (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETULT)),
+      (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs,
+                                         (ADD_ri IntRegs:$rhs, -1)))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETLT)),
+      (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs,
+                                        (ADD_ri IntRegs:$rhs, -1)))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETLE)),
+      (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETULE)),
+      (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for greater-equal-to Rs => greater-than Rs-1.
+//
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETGE)),
+      (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETUGE)),
+      (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+
+//
+// selectcc mappings for predicate comparisons.
+//
+// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into:
+//  pt = not(p1 xor p2)
+//  Rd = mux(pt, true_val, false_val)
+// and similarly for SETNE
+//
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETNE)),
+      (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval,
+                   IntRegs:$fval))>;
+
+def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
+                          IntRegs:$fval, SETEQ)),
+      (i32 (MUX_rr (i1 (NOT_p (XOR_pp PredRegs:$lhs, PredRegs:$rhs))),
+                   IntRegs:$tval, IntRegs:$fval))>;
+
+
+//
+// selectcc mappings for 64-bit operands are messy. Hexagon does not have a
+// MUX64 o, use this:
+// selectcc(Rss, Rdd, tval, fval, cond) ->
+//   combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi),
+//           mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo))
+
+// setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+                         DoubleRegs:$fval, SETGT)),
+      (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+                   (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
+
+
+// setlt-64 -> setgt-64.
+def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
+                         DoubleRegs:$fval, SETLT)),
+      (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+                                     (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
+                   (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
+                                      (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
+                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
+                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..a52c604505b8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -0,0 +1,46 @@
+//===-- HexagonSelectionDAGInfo.cpp - Hexagon SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the HexagonSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-selectiondag-info"
+#include "HexagonTargetMachine.h"
+using namespace llvm;
+
+bool llvm::flag_aligned_memcpy;
+
+HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine
+                                                 &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() {
+}
+
+SDValue
+HexagonSelectionDAGInfo::
+EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain,
+                        SDValue Dst, SDValue Src, SDValue Size, unsigned Align,
+                        bool isVolatile, bool AlwaysInline,
+                        MachinePointerInfo DstPtrInfo,
+                        MachinePointerInfo SrcPtrInfo) const {
+  flag_aligned_memcpy = false;
+  if ((Align & 0x3) == 0) {
+    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+    if (ConstantSize) {
+      uint64_t SizeVal = ConstantSize->getZExtValue();
+      if ((SizeVal > 32) && ((SizeVal % 8) == 0))
+        flag_aligned_memcpy = true;
+    }
+  }
+
+  return SDValue();
+}
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
new file mode 100644
index 000000000000..0673e4d35472
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -0,0 +1,40 @@
+//===-- HexagonSelectionDAGInfo.h - Hexagon SelectionDAG Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Hexagon subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonSELECTIONDAGINFO_H
+#define HexagonSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class HexagonTargetMachine;
+
+class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM);
+  ~HexagonSelectionDAGInfo();
+
+  virtual
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
new file mode 100644
index 000000000000..d10c9f2d5242
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -0,0 +1,129 @@
+//===-- HexagonSplitTFRCondSets.cpp - split TFR condsets into xfers -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//
+//===----------------------------------------------------------------------===//
+// This pass tries to provide opportunities for better optimization of muxes.
+// The default code generated for something like: flag = (a == b) ? 1 : 3;
+// would be:
+//
+//   {p0 = cmp.eq(r0,r1)}
+//   {r3 = mux(p0,#1,#3)}
+//
+// This requires two packets.  If we use .new predicated immediate transfers, 
+// then we can do this in a single packet, e.g.:
+//
+//   {p0 = cmp.eq(r0,r1)
+//    if (p0.new) r3 = #1
+//    if (!p0.new) r3 = #3}
+//
+// Note that the conditional assignments are not generated in .new form here.
+// We assume opptimisically that they will be formed later.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xfer"
+#include "HexagonTargetMachine.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+namespace {
+
+class HexagonSplitTFRCondSets : public MachineFunctionPass {
+    HexagonTargetMachine& QTM;
+    const HexagonSubtarget &QST;
+
+ public:
+    static char ID;
+    HexagonSplitTFRCondSets(HexagonTargetMachine& TM) :
+      MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+
+    const char *getPassName() const {
+      return "Hexagon Split TFRCondSets";
+    }
+    bool runOnMachineFunction(MachineFunction &Fn);
+};
+
+
+char HexagonSplitTFRCondSets::ID = 0;
+
+
+bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
+
+  const TargetInstrInfo *TII = QTM.getInstrInfo();
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
+       MBBb != MBBe; ++MBBb) {
+    MachineBasicBlock* MBB = MBBb;
+    // Traverse the basic block.
+    for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
+         ++MII) {
+      MachineInstr *MI = MII;
+      int Opc = MI->getOpcode();
+      if (Opc == Hexagon::TFR_condset_rr) {
+
+        int DestReg = MI->getOperand(0).getReg();
+        int SrcReg1 = MI->getOperand(2).getReg();
+        int SrcReg2 = MI->getOperand(3).getReg();
+
+        // Minor optimization: do not emit the predicated copy if the source and
+        // the destination is the same register
+        if (DestReg != SrcReg1) {
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cPt),
+                  DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg1);
+        }
+        if (DestReg != SrcReg2) {
+          BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFR_cNotPt),
+                  DestReg).addReg(MI->getOperand(1).getReg()).addReg(SrcReg2);
+        }
+        MII = MBB->erase(MI);
+        --MII;
+      } else if (Opc == Hexagon::TFR_condset_ii) {
+        int DestReg = MI->getOperand(0).getReg();
+        int SrcReg1 = MI->getOperand(1).getReg();
+        int Immed1 = MI->getOperand(2).getImm();
+        int Immed2 = MI->getOperand(3).getImm();
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cPt),
+                DestReg).addReg(SrcReg1).addImm(Immed1);
+        BuildMI(*MBB, MII, MI->getDebugLoc(), TII->get(Hexagon::TFRI_cNotPt),
+                DestReg).addReg(SrcReg1).addImm(Immed2);
+        MII = MBB->erase(MI);
+        --MII;
+      }
+    }
+  }
+
+  return true;
+}
+
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonSplitTFRCondSets(HexagonTargetMachine &TM) {
+  return new HexagonSplitTFRCondSets(TM);
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
new file mode 100644
index 000000000000..654d33626edf
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -0,0 +1,62 @@
+//===-- HexagonSubtarget.cpp - Hexagon Subtarget Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonSubtarget.h"
+#include "Hexagon.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+static cl::opt<bool>
+EnableV3("enable-hexagon-v3", cl::Hidden,
+         cl::desc("Enable Hexagon V3 instructions."));
+
+static cl::opt<bool>
+EnableMemOps(
+    "enable-hexagon-memops",
+    cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed,
+    cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
+
+HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS):
+  HexagonGenSubtargetInfo(TT, CPU, FS),
+  HexagonArchVersion(V1),
+  CPUString(CPU.str()) {
+  ParseSubtargetFeatures(CPU, FS);
+
+  switch(HexagonArchVersion) {
+  case HexagonSubtarget::V2:
+    break;
+  case HexagonSubtarget::V3:
+    EnableV3 = true;
+    break;
+  case HexagonSubtarget::V4:
+    break;
+  default:
+    llvm_unreachable("Unknown Architecture Version.");
+  }
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUString);
+
+  // Max issue per cycle == bundle width.
+  InstrItins.IssueWidth = 4;
+
+  if (EnableMemOps)
+    UseMemOps = true;
+  else
+    UseMemOps = false;
+}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
new file mode 100644
index 000000000000..3079086986d9
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -0,0 +1,74 @@
+//===-- HexagonSubtarget.h - Define Subtarget for the Hexagon ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef Hexagon_SUBTARGET_H
+#define Hexagon_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "HexagonGenSubtargetInfo.inc"
+
+#define Hexagon_SMALL_DATA_THRESHOLD 8
+
+namespace llvm {
+
+class HexagonSubtarget : public HexagonGenSubtargetInfo {
+
+  bool UseMemOps;
+
+public:
+  enum HexagonArchEnum {
+    V1, V2, V3, V4
+  };
+
+  HexagonArchEnum HexagonArchVersion;
+  std::string CPUString;
+  InstrItineraryData InstrItins;
+
+public:
+  HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS);
+
+  /// getInstrItins - Return the instruction itineraies based on subtarget
+  /// selection.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+  bool hasV2TOps () const { return HexagonArchVersion >= V2; }
+  bool hasV2TOpsOnly () const { return HexagonArchVersion == V2; }
+  bool hasV3TOps () const { return HexagonArchVersion >= V3; }
+  bool hasV3TOpsOnly () const { return HexagonArchVersion == V3; }
+  bool hasV4TOps () const { return HexagonArchVersion >= V4; }
+  bool useMemOps () const { return HexagonArchVersion >= V4 && UseMemOps; }
+
+  bool isSubtargetV2() const { return HexagonArchVersion == V2;}
+  const std::string &getCPUString () const { return CPUString; }
+
+  // Threshold for small data section
+  unsigned getSmallDataThreshold() const {
+    return Hexagon_SMALL_DATA_THRESHOLD;
+  }
+  const HexagonArchEnum &getHexagonArchVersion() const {
+    return  HexagonArchVersion;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
new file mode 100644
index 000000000000..411325bf963e
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -0,0 +1,145 @@
+//===-- HexagonTargetMachine.cpp - Define TargetMachine for Hexagon -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Hexagon target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "Hexagon.h"
+#include "HexagonISelLowering.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+static cl::
+opt<bool> DisableHardwareLoops(
+                        "disable-hexagon-hwloops", cl::Hidden,
+                        cl::desc("Disable Hardware Loops for Hexagon target"));
+
+/// HexagonTargetMachineModule - Note that this is used on hosts that
+/// cannot link in a library unless there are references into the
+/// library.  In particular, it seems that it is not possible to get
+/// things to work on Win32 without this.  Though it is unused, do not
+/// remove it.
+extern "C" int HexagonTargetMachineModule;
+int HexagonTargetMachineModule = 0;
+
+extern "C" void LLVMInitializeHexagonTarget() {
+  // Register the target.
+  RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
+}
+
+
+/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
+///
+
+/// Hexagon_TODO: Do I need an aggregate alignment?
+///
+HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
+                                           StringRef CPU, StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM,
+                                           CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    DataLayout("e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-a0:0") ,
+    Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this),
+    TSInfo(*this),
+    FrameLowering(Subtarget),
+    InstrItins(&Subtarget.getInstrItineraryData()) {
+  setMCUseCFI(false);
+}
+
+// addPassesForOptimizations - Allow the backend (target) to add Target
+// Independent Optimization passes to the Pass Manager.
+bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
+
+  PM.add(createConstantPropagationPass());
+  PM.add(createLoopSimplifyPass());
+  PM.add(createDeadCodeEliminationPass());
+  PM.add(createConstantPropagationPass());
+  PM.add(createLoopUnrollPass());
+  PM.add(createLoopStrengthReducePass(getTargetLowering()));
+  return true;
+}
+
+namespace {
+/// Hexagon Code Generator Pass Configuration Options.
+class HexagonPassConfig : public TargetPassConfig {
+public:
+  HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  HexagonTargetMachine &getHexagonTargetMachine() const {
+    return getTM<HexagonTargetMachine>();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreRegAlloc();
+  virtual bool addPostRegAlloc();
+  virtual bool addPreSched2();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new HexagonPassConfig(this, PM);
+}
+
+bool HexagonPassConfig::addInstSelector() {
+  PM.add(createHexagonRemoveExtendOps(getHexagonTargetMachine()));
+  PM.add(createHexagonISelDag(getHexagonTargetMachine()));
+  PM.add(createHexagonPeephole());
+  return false;
+}
+
+
+bool HexagonPassConfig::addPreRegAlloc() {
+  if (!DisableHardwareLoops) {
+    PM.add(createHexagonHardwareLoops());
+  }
+
+  return false;
+}
+
+bool HexagonPassConfig::addPostRegAlloc() {
+  PM.add(createHexagonCFGOptimizer(getHexagonTargetMachine()));
+  return true;
+}
+
+
+bool HexagonPassConfig::addPreSched2() {
+  addPass(IfConverterID);
+  return true;
+}
+
+bool HexagonPassConfig::addPreEmitPass() {
+
+  if (!DisableHardwareLoops) {
+    PM.add(createHexagonFixupHwLoops());
+  }
+
+  // Expand Spill code for predicate registers.
+  PM.add(createHexagonExpandPredSpillCode(getHexagonTargetMachine()));
+
+  // Split up TFRcondsets into conditional transfers.
+  PM.add(createHexagonSplitTFRCondSets(getHexagonTargetMachine()));
+
+  // Create Packets.
+  PM.add(createHexagonPacketizer());
+
+  return false;
+}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
new file mode 100644
index 000000000000..0336965d11f1
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -0,0 +1,83 @@
+//=-- HexagonTargetMachine.h - Define TargetMachine for Hexagon ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Hexagon specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETMACHINE_H
+#define HexagonTARGETMACHINE_H
+
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonISelLowering.h"
+#include "HexagonSelectionDAGInfo.h"
+#include "HexagonFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+class Module;
+
+class HexagonTargetMachine : public LLVMTargetMachine {
+  const TargetData DataLayout;       // Calculates type size & alignment.
+  HexagonSubtarget Subtarget;
+  HexagonInstrInfo InstrInfo;
+  HexagonTargetLowering TLInfo;
+  HexagonSelectionDAGInfo TSInfo;
+  HexagonFrameLowering FrameLowering;
+  const InstrItineraryData* InstrItins;
+
+public:
+  HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU,
+                       StringRef FS, const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
+
+  virtual const HexagonInstrInfo *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual const HexagonSubtarget *getSubtargetImpl() const {
+    return &Subtarget;
+  }
+  virtual const HexagonRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual const InstrItineraryData* getInstrItineraryData() const {
+    return InstrItins;
+  }
+
+
+  virtual const HexagonTargetLowering* getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const HexagonFrameLowering* getFrameLowering() const {
+    return &FrameLowering;
+  }
+
+  virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  static unsigned getModuleMatchQuality(const Module &M);
+
+  // Pass Pipeline Configuration.
+  virtual bool addPassesForOptimizations(PassManagerBase &PM);
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+extern bool flag_aligned_memcpy;
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
new file mode 100644
index 000000000000..32cc70958638
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -0,0 +1,94 @@
+//===-- HexagonTargetObjectFile.cpp - Hexagon asm properties --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonTargetAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetObjectFile.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<int> SmallDataThreshold("hexagon-small-data-threshold",
+                                cl::init(8), cl::Hidden);
+
+void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
+                                         const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+
+  SmallDataSection =
+    getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE | ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+  SmallBSSSection =
+    getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE | ELF::SHF_ALLOC,
+                               SectionKind::getBSS());
+}
+
+// sdata/sbss support taken largely from the MIPS Backend.
+static bool IsInSmallSection(uint64_t Size) {
+  return Size > 0 && Size <= (uint64_t)SmallDataThreshold;
+}
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+                                                const TargetMachine &TM) const {
+  // If the primary definition of this global value is outside the current
+  // translation unit or the global value is available for inspection but not
+  // emission, then do nothing.
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return false;
+
+  // Otherwise, Check if GV should be in sdata/sbss, when normally it would end
+  // up in getKindForGlobal(GV, TM).
+  return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global value should be
+/// placed into small data/bss section.
+bool HexagonTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+                       SectionKind Kind) const {
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  if (!GVA)
+    return false;
+
+  if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
+    Type *Ty = GV->getType()->getElementType();
+    return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+  }
+
+  return false;
+}
+
+const MCSection *HexagonTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallBSSSection;
+  if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.h b/lib/Target/Hexagon/HexagonTargetObjectFile.h
new file mode 100644
index 000000000000..693345081ee3
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonTARGETOBJECTFILE_H
+#define HexagonTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+  class HexagonTargetObjectFile : public TargetLoweringObjectFileELF {
+    const MCSectionELF *SmallDataSection;
+    const MCSectionELF *SmallBSSSection;
+  public:
+    virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    /// IsGlobalInSmallSection - Return true if this global address should be
+    /// placed into small data/bss section.
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM,
+                                SectionKind Kind) const;
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM) const;
+
+    const MCSection* SelectSectionForGlobal(const GlobalValue *GV,
+                                            SectionKind Kind,
+                                            Mangler *Mang,
+                                            const TargetMachine &TM) const;
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
new file mode 100644
index 000000000000..c6e7bd1f53d6
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -0,0 +1,3642 @@
+//===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a simple VLIW packetizer using DFA. The packetizer works on
+// machine basic blocks. For each instruction I in BB, the packetizer consults
+// the DFA to see if machine resources are available to execute I. If so, the
+// packetizer checks if I depends on any instruction J in the current packet.
+// If no dependency is found, I is added to current packet and machine resource
+// is marked as taken. If any dependency is found, a target API call is made to
+// prune the dependence.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "packets"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonMachineFunctionInfo.h"
+
+#include <map>
+
+using namespace llvm;
+
+namespace {
+  class HexagonPacketizer : public MachineFunctionPass {
+
+  public:
+    static char ID;
+    HexagonPacketizer() : MachineFunctionPass(ID) {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    const char *getPassName() const {
+      return "Hexagon Packetizer";
+    }
+
+    bool runOnMachineFunction(MachineFunction &Fn);
+  };
+  char HexagonPacketizer::ID = 0;
+
+  class HexagonPacketizerList : public VLIWPacketizerList {
+
+  private:
+
+    // Has the instruction been promoted to a dot-new instruction.
+    bool PromotedToDotNew;
+
+    // Has the instruction been glued to allocframe.
+    bool GlueAllocframeStore;
+
+    // Has the feeder instruction been glued to new value jump.
+    bool GlueToNewValueJump;
+
+    // Check if there is a dependence between some instruction already in this
+    // packet and this instruction.
+    bool Dependence;
+
+    // Only check for dependence if there are resources available to
+    // schedule this instruction.
+    bool FoundSequentialDependence;
+
+  public:
+    // Ctor.
+    HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+                          MachineDominatorTree &MDT);
+
+    // initPacketizerState - initialize some internal flags.
+    void initPacketizerState();
+
+    // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+    bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB);
+
+    // isSoloInstruction - return true if instruction MI can not be packetized
+    // with any other instruction, which means that MI itself is a packet.
+    bool isSoloInstruction(MachineInstr *MI);
+
+    // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+    // together.
+    bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ);
+
+    // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+    // and SUJ.
+    bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ);
+
+    MachineBasicBlock::iterator addToPacket(MachineInstr *MI);
+  private:
+    bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
+    bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
+                    MachineBasicBlock::iterator &MII,
+                    const TargetRegisterClass* RC);
+    bool CanPromoteToDotNew(MachineInstr* MI, SUnit* PacketSU,
+                    unsigned DepReg,
+                    std::map <MachineInstr*, SUnit*> MIToSUnit,
+                    MachineBasicBlock::iterator &MII,
+                    const TargetRegisterClass* RC);
+    bool CanPromoteToNewValue(MachineInstr* MI, SUnit* PacketSU,
+                    unsigned DepReg,
+                    std::map <MachineInstr*, SUnit*> MIToSUnit,
+                    MachineBasicBlock::iterator &MII);
+    bool CanPromoteToNewValueStore(MachineInstr* MI, MachineInstr* PacketMI,
+                    unsigned DepReg,
+                    std::map <MachineInstr*, SUnit*> MIToSUnit);
+    bool DemoteToDotOld(MachineInstr* MI);
+    bool ArePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2,
+                    std::map <MachineInstr*, SUnit*> MIToSUnit);
+    bool RestrictingDepExistInPacket(MachineInstr*,
+                    unsigned, std::map <MachineInstr*, SUnit*>);
+    bool isNewifiable(MachineInstr* MI);
+    bool isCondInst(MachineInstr* MI);
+    bool IsNewifyStore (MachineInstr* MI);
+    bool tryAllocateResourcesForConstExt(MachineInstr* MI);
+    bool canReserveResourcesForConstExt(MachineInstr *MI);
+    void reserveResourcesForConstExt(MachineInstr* MI);
+    bool isNewValueInst(MachineInstr* MI);
+    bool isDotNewInst(MachineInstr* MI);
+  };
+}
+
+// HexagonPacketizerList Ctor.
+HexagonPacketizerList::HexagonPacketizerList(
+  MachineFunction &MF, MachineLoopInfo &MLI,MachineDominatorTree &MDT)
+  : VLIWPacketizerList(MF, MLI, MDT, true){
+}
+
+bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
+  // Instantiate the packetizer.
+  HexagonPacketizerList Packetizer(Fn, MLI, MDT);
+
+  // DFA state table should not be empty.
+  assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+
+  //
+  // Loop over all basic blocks and remove KILL pseudo-instructions
+  // These instructions confuse the dependence analysis. Consider:
+  // D0 = ...   (Insn 0)
+  // R0 = KILL R0, D0 (Insn 1)
+  // R0 = ... (Insn 2)
+  // Here, Insn 1 will result in the dependence graph not emitting an output
+  // dependence between Insn 0 and Insn 2. This can lead to incorrect
+  // packetization
+  //
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    MachineBasicBlock::iterator End = MBB->end();
+    MachineBasicBlock::iterator MI = MBB->begin();
+    while (MI != End) {
+      if (MI->isKill()) {
+        MachineBasicBlock::iterator DeleteMI = MI;
+        ++MI;
+        MBB->erase(DeleteMI);
+        End = MBB->end();
+        continue;
+      }
+      ++MI;
+    }
+  }
+
+  // Loop over all of the basic blocks.
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+    // Find scheduling regions and schedule / packetize each region.
+    unsigned RemainingCount = MBB->size();
+    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
+        RegionEnd != MBB->begin();) {
+      // The next region starts above the previous region. Look backward in the
+      // instruction stream until we find the nearest boundary.
+      MachineBasicBlock::iterator I = RegionEnd;
+      for(;I != MBB->begin(); --I, --RemainingCount) {
+        if (TII->isSchedulingBoundary(llvm::prior(I), MBB, Fn))
+          break;
+      }
+      I = MBB->begin();
+
+      // Skip empty scheduling regions.
+      if (I == RegionEnd) {
+        RegionEnd = llvm::prior(RegionEnd);
+        --RemainingCount;
+        continue;
+      }
+      // Skip regions with one instruction.
+      if (I == llvm::prior(RegionEnd)) {
+        RegionEnd = llvm::prior(RegionEnd);
+        continue;
+      }
+
+      Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+      RegionEnd = I;
+    }
+  }
+
+  return true;
+}
+
+
+static bool IsIndirectCall(MachineInstr* MI) {
+  return ((MI->getOpcode() == Hexagon::CALLR) ||
+          (MI->getOpcode() == Hexagon::CALLRv3));
+}
+
+// Reserve resources for constant extender. Trigure an assertion if
+// reservation fail.
+void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
+                                  QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+
+  if (ResourceTracker->canReserveResources(PseudoMI)) {
+    ResourceTracker->reserveResources(PseudoMI);
+    MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+  } else {
+    MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+    llvm_unreachable("can not reserve resources for constant extender.");
+  }
+  return;
+}
+
+bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  assert(QII->isExtended(MI) &&
+         "Should only be called for constant extended instructions");
+  MachineFunction *MF = MI->getParent()->getParent();
+  MachineInstr *PseudoMI = MF->CreateMachineInstr(QII->get(Hexagon::IMMEXT),
+                                                  MI->getDebugLoc());
+  bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
+  MF->DeleteMachineInstr(PseudoMI);
+  return CanReserve;
+}
+
+// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return
+// true, otherwise, return false.
+bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  MachineInstr *PseudoMI = MI->getParent()->getParent()->CreateMachineInstr(
+                                  QII->get(Hexagon::IMMEXT), MI->getDebugLoc());
+
+  if (ResourceTracker->canReserveResources(PseudoMI)) {
+    ResourceTracker->reserveResources(PseudoMI);
+    MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+    return true;
+  } else {
+    MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+    return false;
+  }
+}
+
+
+bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI,
+                                          SDep::Kind DepType,
+                                          unsigned DepReg) {
+
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  const HexagonRegisterInfo* QRI = (const HexagonRegisterInfo *) TM.getRegisterInfo();
+
+  // Check for lr dependence
+  if (DepReg == QRI->getRARegister()) {
+    return true;
+  }
+
+  if (QII->isDeallocRet(MI)) {
+    if (DepReg == QRI->getFrameRegister() ||
+        DepReg == QRI->getStackRegister())
+      return true;
+  }
+
+  // Check if this is a predicate dependence
+  const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg);
+  if (RC == Hexagon::PredRegsRegisterClass) {
+    return true;
+  }
+
+  //
+  // Lastly check for an operand used in an indirect call
+  // If we had an attribute for checking if an instruction is an indirect call,
+  // then we could have avoided this relatively brittle implementation of
+  // IsIndirectCall()
+  //
+  // Assumes that the first operand of the CALLr is the function address
+  //
+  if (IsIndirectCall(MI) && (DepType == SDep::Data)) {
+    MachineOperand MO = MI->getOperand(0);
+    if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+static bool IsRegDependence(const SDep::Kind DepType) {
+  return (DepType == SDep::Data || DepType == SDep::Anti ||
+          DepType == SDep::Output);
+}
+
+static bool IsDirectJump(MachineInstr* MI) {
+  return (MI->getOpcode() == Hexagon::JMP);
+}
+
+static bool IsSchedBarrier(MachineInstr* MI) {
+  switch (MI->getOpcode()) {
+  case Hexagon::BARRIER:
+    return true;
+  }
+  return false;
+}
+
+static bool IsControlFlow(MachineInstr* MI) {
+  return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
+}
+
+bool HexagonPacketizerList::isNewValueInst(MachineInstr* MI) {
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  if (QII->isNewValueJump(MI))
+    return true;
+
+  if (QII->isNewValueStore(MI))
+    return true;
+
+  return false;
+}
+
+// Function returns true if an instruction can be promoted to the new-value
+// store. It will always return false for v2 and v3.
+// It lists all the conditional and unconditional stores that can be promoted
+// to the new-value stores.
+
+bool HexagonPacketizerList::IsNewifyStore (MachineInstr* MI) {
+  const HexagonRegisterInfo* QRI = (const HexagonRegisterInfo *) TM.getRegisterInfo();
+  switch (MI->getOpcode())
+  {
+    // store byte
+    case Hexagon::STrib:
+    case Hexagon::STrib_indexed:
+    case Hexagon::STrib_indexed_shl_V4:
+    case Hexagon::STrib_shl_V4:
+    case Hexagon::STrib_GP_V4:
+    case Hexagon::STb_GP_V4:
+    case Hexagon::POST_STbri:
+    case Hexagon::STrib_cPt:
+    case Hexagon::STrib_cdnPt_V4:
+    case Hexagon::STrib_cNotPt:
+    case Hexagon::STrib_cdnNotPt_V4:
+    case Hexagon::STrib_indexed_cPt:
+    case Hexagon::STrib_indexed_cdnPt_V4:
+    case Hexagon::STrib_indexed_cNotPt:
+    case Hexagon::STrib_indexed_cdnNotPt_V4:
+    case Hexagon::STrib_indexed_shl_cPt_V4:
+    case Hexagon::STrib_indexed_shl_cdnPt_V4:
+    case Hexagon::STrib_indexed_shl_cNotPt_V4:
+    case Hexagon::STrib_indexed_shl_cdnNotPt_V4:
+    case Hexagon::POST_STbri_cPt:
+    case Hexagon::POST_STbri_cdnPt_V4:
+    case Hexagon::POST_STbri_cNotPt:
+    case Hexagon::POST_STbri_cdnNotPt_V4:
+    case Hexagon::STb_GP_cPt_V4:
+    case Hexagon::STb_GP_cNotPt_V4:
+    case Hexagon::STb_GP_cdnPt_V4:
+    case Hexagon::STb_GP_cdnNotPt_V4:
+    case Hexagon::STrib_GP_cPt_V4:
+    case Hexagon::STrib_GP_cNotPt_V4:
+    case Hexagon::STrib_GP_cdnPt_V4:
+    case Hexagon::STrib_GP_cdnNotPt_V4:
+
+    // store halfword
+    case Hexagon::STrih:
+    case Hexagon::STrih_indexed:
+    case Hexagon::STrih_indexed_shl_V4:
+    case Hexagon::STrih_shl_V4:
+    case Hexagon::STrih_GP_V4:
+    case Hexagon::STh_GP_V4:
+    case Hexagon::POST_SThri:
+    case Hexagon::STrih_cPt:
+    case Hexagon::STrih_cdnPt_V4:
+    case Hexagon::STrih_cNotPt:
+    case Hexagon::STrih_cdnNotPt_V4:
+    case Hexagon::STrih_indexed_cPt:
+    case Hexagon::STrih_indexed_cdnPt_V4:
+    case Hexagon::STrih_indexed_cNotPt:
+    case Hexagon::STrih_indexed_cdnNotPt_V4:
+    case Hexagon::STrih_indexed_shl_cPt_V4:
+    case Hexagon::STrih_indexed_shl_cdnPt_V4:
+    case Hexagon::STrih_indexed_shl_cNotPt_V4:
+    case Hexagon::STrih_indexed_shl_cdnNotPt_V4:
+    case Hexagon::POST_SThri_cPt:
+    case Hexagon::POST_SThri_cdnPt_V4:
+    case Hexagon::POST_SThri_cNotPt:
+    case Hexagon::POST_SThri_cdnNotPt_V4:
+    case Hexagon::STh_GP_cPt_V4:
+    case Hexagon::STh_GP_cNotPt_V4:
+    case Hexagon::STh_GP_cdnPt_V4:
+    case Hexagon::STh_GP_cdnNotPt_V4:
+    case Hexagon::STrih_GP_cPt_V4:
+    case Hexagon::STrih_GP_cNotPt_V4:
+    case Hexagon::STrih_GP_cdnPt_V4:
+    case Hexagon::STrih_GP_cdnNotPt_V4:
+
+    // store word
+    case Hexagon::STriw:
+    case Hexagon::STriw_indexed:
+    case Hexagon::STriw_indexed_shl_V4:
+    case Hexagon::STriw_shl_V4:
+    case Hexagon::STriw_GP_V4:
+    case Hexagon::STw_GP_V4:
+    case Hexagon::POST_STwri:
+    case Hexagon::STriw_cPt:
+    case Hexagon::STriw_cdnPt_V4:
+    case Hexagon::STriw_cNotPt:
+    case Hexagon::STriw_cdnNotPt_V4:
+    case Hexagon::STriw_indexed_cPt:
+    case Hexagon::STriw_indexed_cdnPt_V4:
+    case Hexagon::STriw_indexed_cNotPt:
+    case Hexagon::STriw_indexed_cdnNotPt_V4:
+    case Hexagon::STriw_indexed_shl_cPt_V4:
+    case Hexagon::STriw_indexed_shl_cdnPt_V4:
+    case Hexagon::STriw_indexed_shl_cNotPt_V4:
+    case Hexagon::STriw_indexed_shl_cdnNotPt_V4:
+    case Hexagon::POST_STwri_cPt:
+    case Hexagon::POST_STwri_cdnPt_V4:
+    case Hexagon::POST_STwri_cNotPt:
+    case Hexagon::POST_STwri_cdnNotPt_V4:
+    case Hexagon::STw_GP_cPt_V4:
+    case Hexagon::STw_GP_cNotPt_V4:
+    case Hexagon::STw_GP_cdnPt_V4:
+    case Hexagon::STw_GP_cdnNotPt_V4:
+    case Hexagon::STriw_GP_cPt_V4:
+    case Hexagon::STriw_GP_cNotPt_V4:
+    case Hexagon::STriw_GP_cdnPt_V4:
+    case Hexagon::STriw_GP_cdnNotPt_V4:
+        return QRI->Subtarget.hasV4TOps();
+  }
+  return false;
+}
+
+static bool IsLoopN(MachineInstr *MI) {
+  return (MI->getOpcode() == Hexagon::LOOP0_i ||
+          MI->getOpcode() == Hexagon::LOOP0_r);
+}
+
+/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a
+/// callee-saved register.
+static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
+                                     const TargetRegisterInfo *TRI) {
+  for (const uint16_t *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
+    unsigned CalleeSavedReg = *CSR;
+    if (MI->modifiesRegister(CalleeSavedReg, TRI))
+      return true;
+  }
+  return false;
+}
+
+// Return the new value instruction for a given store.
+static int GetDotNewOp(const int opc) {
+  switch (opc) {
+  default: llvm_unreachable("Unknown .new type");
+
+  // store new value byte
+  case Hexagon::STrib:
+    return Hexagon::STrib_nv_V4;
+
+  case Hexagon::STrib_indexed:
+    return Hexagon::STrib_indexed_nv_V4;
+
+  case Hexagon::STrib_indexed_shl_V4:
+    return Hexagon::STrib_indexed_shl_nv_V4;
+
+  case Hexagon::STrib_shl_V4:
+    return Hexagon::STrib_shl_nv_V4;
+
+  case Hexagon::STrib_GP_V4:
+    return Hexagon::STrib_GP_nv_V4;
+
+  case Hexagon::STb_GP_V4:
+    return Hexagon::STb_GP_nv_V4;
+
+  case Hexagon::POST_STbri:
+    return Hexagon::POST_STbri_nv_V4;
+
+  case Hexagon::STrib_cPt:
+    return Hexagon::STrib_cPt_nv_V4;
+
+  case Hexagon::STrib_cdnPt_V4:
+    return Hexagon::STrib_cdnPt_nv_V4;
+
+  case Hexagon::STrib_cNotPt:
+    return Hexagon::STrib_cNotPt_nv_V4;
+
+  case Hexagon::STrib_cdnNotPt_V4:
+    return Hexagon::STrib_cdnNotPt_nv_V4;
+
+  case Hexagon::STrib_indexed_cPt:
+    return Hexagon::STrib_indexed_cPt_nv_V4;
+
+  case Hexagon::STrib_indexed_cdnPt_V4:
+    return Hexagon::STrib_indexed_cdnPt_nv_V4;
+
+  case Hexagon::STrib_indexed_cNotPt:
+    return Hexagon::STrib_indexed_cNotPt_nv_V4;
+
+  case Hexagon::STrib_indexed_cdnNotPt_V4:
+    return Hexagon::STrib_indexed_cdnNotPt_nv_V4;
+
+  case Hexagon::STrib_indexed_shl_cPt_V4:
+    return Hexagon::STrib_indexed_shl_cPt_nv_V4;
+
+  case Hexagon::STrib_indexed_shl_cdnPt_V4:
+    return Hexagon::STrib_indexed_shl_cdnPt_nv_V4;
+
+  case Hexagon::STrib_indexed_shl_cNotPt_V4:
+    return Hexagon::STrib_indexed_shl_cNotPt_nv_V4;
+
+  case Hexagon::STrib_indexed_shl_cdnNotPt_V4:
+    return Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4;
+
+  case Hexagon::POST_STbri_cPt:
+    return Hexagon::POST_STbri_cPt_nv_V4;
+
+  case Hexagon::POST_STbri_cdnPt_V4:
+    return Hexagon::POST_STbri_cdnPt_nv_V4;
+
+  case Hexagon::POST_STbri_cNotPt:
+    return Hexagon::POST_STbri_cNotPt_nv_V4;
+
+  case Hexagon::POST_STbri_cdnNotPt_V4:
+    return Hexagon::POST_STbri_cdnNotPt_nv_V4;
+
+  case Hexagon::STb_GP_cPt_V4:
+    return Hexagon::STb_GP_cPt_nv_V4;
+
+  case Hexagon::STb_GP_cNotPt_V4:
+    return Hexagon::STb_GP_cNotPt_nv_V4;
+
+  case Hexagon::STb_GP_cdnPt_V4:
+    return Hexagon::STb_GP_cdnPt_nv_V4;
+
+  case Hexagon::STb_GP_cdnNotPt_V4:
+    return Hexagon::STb_GP_cdnNotPt_nv_V4;
+
+  case Hexagon::STrib_GP_cPt_V4:
+    return Hexagon::STrib_GP_cPt_nv_V4;
+
+  case Hexagon::STrib_GP_cNotPt_V4:
+    return Hexagon::STrib_GP_cNotPt_nv_V4;
+
+  case Hexagon::STrib_GP_cdnPt_V4:
+    return Hexagon::STrib_GP_cdnPt_nv_V4;
+
+  case Hexagon::STrib_GP_cdnNotPt_V4:
+    return Hexagon::STrib_GP_cdnNotPt_nv_V4;
+
+  // store new value halfword
+  case Hexagon::STrih:
+    return Hexagon::STrih_nv_V4;
+
+  case Hexagon::STrih_indexed:
+    return Hexagon::STrih_indexed_nv_V4;
+
+  case Hexagon::STrih_indexed_shl_V4:
+    return Hexagon::STrih_indexed_shl_nv_V4;
+
+  case Hexagon::STrih_shl_V4:
+    return Hexagon::STrih_shl_nv_V4;
+
+  case Hexagon::STrih_GP_V4:
+    return Hexagon::STrih_GP_nv_V4;
+
+  case Hexagon::STh_GP_V4:
+    return Hexagon::STh_GP_nv_V4;
+
+  case Hexagon::POST_SThri:
+    return Hexagon::POST_SThri_nv_V4;
+
+  case Hexagon::STrih_cPt:
+    return Hexagon::STrih_cPt_nv_V4;
+
+  case Hexagon::STrih_cdnPt_V4:
+    return Hexagon::STrih_cdnPt_nv_V4;
+
+  case Hexagon::STrih_cNotPt:
+    return Hexagon::STrih_cNotPt_nv_V4;
+
+  case Hexagon::STrih_cdnNotPt_V4:
+    return Hexagon::STrih_cdnNotPt_nv_V4;
+
+  case Hexagon::STrih_indexed_cPt:
+    return Hexagon::STrih_indexed_cPt_nv_V4;
+
+  case Hexagon::STrih_indexed_cdnPt_V4:
+    return Hexagon::STrih_indexed_cdnPt_nv_V4;
+
+  case Hexagon::STrih_indexed_cNotPt:
+    return Hexagon::STrih_indexed_cNotPt_nv_V4;
+
+  case Hexagon::STrih_indexed_cdnNotPt_V4:
+    return Hexagon::STrih_indexed_cdnNotPt_nv_V4;
+
+  case Hexagon::STrih_indexed_shl_cPt_V4:
+    return Hexagon::STrih_indexed_shl_cPt_nv_V4;
+
+  case Hexagon::STrih_indexed_shl_cdnPt_V4:
+    return Hexagon::STrih_indexed_shl_cdnPt_nv_V4;
+
+  case Hexagon::STrih_indexed_shl_cNotPt_V4:
+    return Hexagon::STrih_indexed_shl_cNotPt_nv_V4;
+
+  case Hexagon::STrih_indexed_shl_cdnNotPt_V4:
+    return Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4;
+
+  case Hexagon::POST_SThri_cPt:
+    return Hexagon::POST_SThri_cPt_nv_V4;
+
+  case Hexagon::POST_SThri_cdnPt_V4:
+    return Hexagon::POST_SThri_cdnPt_nv_V4;
+
+  case Hexagon::POST_SThri_cNotPt:
+    return Hexagon::POST_SThri_cNotPt_nv_V4;
+
+  case Hexagon::POST_SThri_cdnNotPt_V4:
+    return Hexagon::POST_SThri_cdnNotPt_nv_V4;
+
+  case Hexagon::STh_GP_cPt_V4:
+    return Hexagon::STh_GP_cPt_nv_V4;
+
+  case Hexagon::STh_GP_cNotPt_V4:
+    return Hexagon::STh_GP_cNotPt_nv_V4;
+
+  case Hexagon::STh_GP_cdnPt_V4:
+    return Hexagon::STh_GP_cdnPt_nv_V4;
+
+  case Hexagon::STh_GP_cdnNotPt_V4:
+    return Hexagon::STh_GP_cdnNotPt_nv_V4;
+
+  case Hexagon::STrih_GP_cPt_V4:
+    return Hexagon::STrih_GP_cPt_nv_V4;
+
+  case Hexagon::STrih_GP_cNotPt_V4:
+    return Hexagon::STrih_GP_cNotPt_nv_V4;
+
+  case Hexagon::STrih_GP_cdnPt_V4:
+    return Hexagon::STrih_GP_cdnPt_nv_V4;
+
+  case Hexagon::STrih_GP_cdnNotPt_V4:
+    return Hexagon::STrih_GP_cdnNotPt_nv_V4;
+
+  // store new value word
+  case Hexagon::STriw:
+    return Hexagon::STriw_nv_V4;
+
+  case Hexagon::STriw_indexed:
+    return Hexagon::STriw_indexed_nv_V4;
+
+  case Hexagon::STriw_indexed_shl_V4:
+    return Hexagon::STriw_indexed_shl_nv_V4;
+
+  case Hexagon::STriw_shl_V4:
+    return Hexagon::STriw_shl_nv_V4;
+
+  case Hexagon::STriw_GP_V4:
+    return Hexagon::STriw_GP_nv_V4;
+
+  case Hexagon::STw_GP_V4:
+    return Hexagon::STw_GP_nv_V4;
+
+  case Hexagon::POST_STwri:
+    return Hexagon::POST_STwri_nv_V4;
+
+  case Hexagon::STriw_cPt:
+    return Hexagon::STriw_cPt_nv_V4;
+
+  case Hexagon::STriw_cdnPt_V4:
+    return Hexagon::STriw_cdnPt_nv_V4;
+
+  case Hexagon::STriw_cNotPt:
+    return Hexagon::STriw_cNotPt_nv_V4;
+
+  case Hexagon::STriw_cdnNotPt_V4:
+    return Hexagon::STriw_cdnNotPt_nv_V4;
+
+  case Hexagon::STriw_indexed_cPt:
+    return Hexagon::STriw_indexed_cPt_nv_V4;
+
+  case Hexagon::STriw_indexed_cdnPt_V4:
+    return Hexagon::STriw_indexed_cdnPt_nv_V4;
+
+  case Hexagon::STriw_indexed_cNotPt:
+    return Hexagon::STriw_indexed_cNotPt_nv_V4;
+
+  case Hexagon::STriw_indexed_cdnNotPt_V4:
+    return Hexagon::STriw_indexed_cdnNotPt_nv_V4;
+
+  case Hexagon::STriw_indexed_shl_cPt_V4:
+    return Hexagon::STriw_indexed_shl_cPt_nv_V4;
+
+  case Hexagon::STriw_indexed_shl_cdnPt_V4:
+    return Hexagon::STriw_indexed_shl_cdnPt_nv_V4;
+
+  case Hexagon::STriw_indexed_shl_cNotPt_V4:
+    return Hexagon::STriw_indexed_shl_cNotPt_nv_V4;
+
+  case Hexagon::STriw_indexed_shl_cdnNotPt_V4:
+    return Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4;
+
+  case Hexagon::POST_STwri_cPt:
+    return Hexagon::POST_STwri_cPt_nv_V4;
+
+  case Hexagon::POST_STwri_cdnPt_V4:
+    return Hexagon::POST_STwri_cdnPt_nv_V4;
+
+  case Hexagon::POST_STwri_cNotPt:
+    return Hexagon::POST_STwri_cNotPt_nv_V4;
+
+  case Hexagon::POST_STwri_cdnNotPt_V4:
+    return Hexagon::POST_STwri_cdnNotPt_nv_V4;
+
+  case Hexagon::STw_GP_cPt_V4:
+    return Hexagon::STw_GP_cPt_nv_V4;
+
+  case Hexagon::STw_GP_cNotPt_V4:
+    return Hexagon::STw_GP_cNotPt_nv_V4;
+
+  case Hexagon::STw_GP_cdnPt_V4:
+    return Hexagon::STw_GP_cdnPt_nv_V4;
+
+  case Hexagon::STw_GP_cdnNotPt_V4:
+    return Hexagon::STw_GP_cdnNotPt_nv_V4;
+
+  case Hexagon::STriw_GP_cPt_V4:
+    return Hexagon::STriw_GP_cPt_nv_V4;
+
+  case Hexagon::STriw_GP_cNotPt_V4:
+    return Hexagon::STriw_GP_cNotPt_nv_V4;
+
+  case Hexagon::STriw_GP_cdnPt_V4:
+    return Hexagon::STriw_GP_cdnPt_nv_V4;
+
+  case Hexagon::STriw_GP_cdnNotPt_V4:
+    return Hexagon::STriw_GP_cdnNotPt_nv_V4;
+  }
+}
+
+// Return .new predicate version for an instruction
+static int GetDotNewPredOp(const int opc) {
+  switch (opc) {
+  default: llvm_unreachable("Unknown .new type");
+
+  // Conditional stores
+  // Store byte conditionally
+  case Hexagon::STrib_cPt :
+    return Hexagon::STrib_cdnPt_V4;
+
+  case Hexagon::STrib_cNotPt :
+    return Hexagon::STrib_cdnNotPt_V4;
+
+  case Hexagon::STrib_indexed_cPt :
+    return Hexagon::STrib_indexed_cdnPt_V4;
+
+  case Hexagon::STrib_indexed_cNotPt :
+    return Hexagon::STrib_indexed_cdnNotPt_V4;
+
+  case Hexagon::STrib_imm_cPt_V4 :
+    return Hexagon::STrib_imm_cdnPt_V4;
+
+  case Hexagon::STrib_imm_cNotPt_V4 :
+    return Hexagon::STrib_imm_cdnNotPt_V4;
+
+  case Hexagon::POST_STbri_cPt :
+    return Hexagon::POST_STbri_cdnPt_V4;
+
+  case Hexagon::POST_STbri_cNotPt :
+    return Hexagon::POST_STbri_cdnNotPt_V4;
+
+  case Hexagon::STrib_indexed_shl_cPt_V4 :
+    return Hexagon::STrib_indexed_shl_cdnPt_V4;
+
+  case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+    return Hexagon::STrib_indexed_shl_cdnNotPt_V4;
+
+  case Hexagon::STb_GP_cPt_V4 :
+    return Hexagon::STb_GP_cdnPt_V4;
+
+  case Hexagon::STb_GP_cNotPt_V4 :
+    return Hexagon::STb_GP_cdnNotPt_V4;
+
+  case Hexagon::STrib_GP_cPt_V4 :
+    return Hexagon::STrib_GP_cdnPt_V4;
+
+  case Hexagon::STrib_GP_cNotPt_V4 :
+    return Hexagon::STrib_GP_cdnNotPt_V4;
+
+  // Store doubleword conditionally
+  case Hexagon::STrid_cPt :
+    return Hexagon::STrid_cdnPt_V4;
+
+  case Hexagon::STrid_cNotPt :
+    return Hexagon::STrid_cdnNotPt_V4;
+
+  case Hexagon::STrid_indexed_cPt :
+    return Hexagon::STrid_indexed_cdnPt_V4;
+
+  case Hexagon::STrid_indexed_cNotPt :
+    return Hexagon::STrid_indexed_cdnNotPt_V4;
+
+  case Hexagon::STrid_indexed_shl_cPt_V4 :
+    return Hexagon::STrid_indexed_shl_cdnPt_V4;
+
+  case Hexagon::STrid_indexed_shl_cNotPt_V4 :
+    return Hexagon::STrid_indexed_shl_cdnNotPt_V4;
+
+  case Hexagon::POST_STdri_cPt :
+    return Hexagon::POST_STdri_cdnPt_V4;
+
+  case Hexagon::POST_STdri_cNotPt :
+    return Hexagon::POST_STdri_cdnNotPt_V4;
+
+  case Hexagon::STd_GP_cPt_V4 :
+    return Hexagon::STd_GP_cdnPt_V4;
+
+  case Hexagon::STd_GP_cNotPt_V4 :
+    return Hexagon::STd_GP_cdnNotPt_V4;
+
+  case Hexagon::STrid_GP_cPt_V4 :
+    return Hexagon::STrid_GP_cdnPt_V4;
+
+  case Hexagon::STrid_GP_cNotPt_V4 :
+    return Hexagon::STrid_GP_cdnNotPt_V4;
+
+  // Store halfword conditionally
+  case Hexagon::STrih_cPt :
+    return Hexagon::STrih_cdnPt_V4;
+
+  case Hexagon::STrih_cNotPt :
+    return Hexagon::STrih_cdnNotPt_V4;
+
+  case Hexagon::STrih_indexed_cPt :
+    return Hexagon::STrih_indexed_cdnPt_V4;
+
+  case Hexagon::STrih_indexed_cNotPt :
+    return Hexagon::STrih_indexed_cdnNotPt_V4;
+
+  case Hexagon::STrih_imm_cPt_V4 :
+    return Hexagon::STrih_imm_cdnPt_V4;
+
+  case Hexagon::STrih_imm_cNotPt_V4 :
+    return Hexagon::STrih_imm_cdnNotPt_V4;
+
+  case Hexagon::STrih_indexed_shl_cPt_V4 :
+    return Hexagon::STrih_indexed_shl_cdnPt_V4;
+
+  case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+    return Hexagon::STrih_indexed_shl_cdnNotPt_V4;
+
+  case Hexagon::POST_SThri_cPt :
+    return Hexagon::POST_SThri_cdnPt_V4;
+
+  case Hexagon::POST_SThri_cNotPt :
+    return Hexagon::POST_SThri_cdnNotPt_V4;
+
+  case Hexagon::STh_GP_cPt_V4 :
+    return Hexagon::STh_GP_cdnPt_V4;
+
+  case Hexagon::STh_GP_cNotPt_V4 :
+    return Hexagon::STh_GP_cdnNotPt_V4;
+
+  case Hexagon::STrih_GP_cPt_V4 :
+    return Hexagon::STrih_GP_cdnPt_V4;
+
+  case Hexagon::STrih_GP_cNotPt_V4 :
+    return Hexagon::STrih_GP_cdnNotPt_V4;
+
+  // Store word conditionally
+  case Hexagon::STriw_cPt :
+    return Hexagon::STriw_cdnPt_V4;
+
+  case Hexagon::STriw_cNotPt :
+    return Hexagon::STriw_cdnNotPt_V4;
+
+  case Hexagon::STriw_indexed_cPt :
+    return Hexagon::STriw_indexed_cdnPt_V4;
+
+  case Hexagon::STriw_indexed_cNotPt :
+    return Hexagon::STriw_indexed_cdnNotPt_V4;
+
+  case Hexagon::STriw_imm_cPt_V4 :
+    return Hexagon::STriw_imm_cdnPt_V4;
+
+  case Hexagon::STriw_imm_cNotPt_V4 :
+    return Hexagon::STriw_imm_cdnNotPt_V4;
+
+  case Hexagon::STriw_indexed_shl_cPt_V4 :
+    return Hexagon::STriw_indexed_shl_cdnPt_V4;
+
+  case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+    return Hexagon::STriw_indexed_shl_cdnNotPt_V4;
+
+  case Hexagon::POST_STwri_cPt :
+    return Hexagon::POST_STwri_cdnPt_V4;
+
+  case Hexagon::POST_STwri_cNotPt :
+    return Hexagon::POST_STwri_cdnNotPt_V4;
+
+  case Hexagon::STw_GP_cPt_V4 :
+    return Hexagon::STw_GP_cdnPt_V4;
+
+  case Hexagon::STw_GP_cNotPt_V4 :
+    return Hexagon::STw_GP_cdnNotPt_V4;
+
+  case Hexagon::STriw_GP_cPt_V4 :
+    return Hexagon::STriw_GP_cdnPt_V4;
+
+  case Hexagon::STriw_GP_cNotPt_V4 :
+    return Hexagon::STriw_GP_cdnNotPt_V4;
+
+  // Condtional Jumps
+  case Hexagon::JMP_c:
+    return Hexagon::JMP_cdnPt;
+
+  case Hexagon::JMP_cNot:
+    return Hexagon::JMP_cdnNotPt;
+
+  case Hexagon::JMPR_cPt:
+    return Hexagon::JMPR_cdnPt_V3;
+
+  case Hexagon::JMPR_cNotPt:
+    return Hexagon::JMPR_cdnNotPt_V3;
+
+  // Conditional Transfers
+  case Hexagon::TFR_cPt:
+    return Hexagon::TFR_cdnPt;
+
+  case Hexagon::TFR_cNotPt:
+    return Hexagon::TFR_cdnNotPt;
+
+  case Hexagon::TFRI_cPt:
+    return Hexagon::TFRI_cdnPt;
+
+  case Hexagon::TFRI_cNotPt:
+    return Hexagon::TFRI_cdnNotPt;
+
+  // Load double word
+  case Hexagon::LDrid_cPt :
+    return Hexagon::LDrid_cdnPt;
+
+  case Hexagon::LDrid_cNotPt :
+    return Hexagon::LDrid_cdnNotPt;
+
+  case Hexagon::LDrid_indexed_cPt :
+    return Hexagon::LDrid_indexed_cdnPt;
+
+  case Hexagon::LDrid_indexed_cNotPt :
+    return Hexagon::LDrid_indexed_cdnNotPt;
+
+  case Hexagon::POST_LDrid_cPt :
+    return Hexagon::POST_LDrid_cdnPt_V4;
+
+  case Hexagon::POST_LDrid_cNotPt :
+    return Hexagon::POST_LDrid_cdnNotPt_V4;
+
+  // Load word
+  case Hexagon::LDriw_cPt :
+    return Hexagon::LDriw_cdnPt;
+
+  case Hexagon::LDriw_cNotPt :
+    return Hexagon::LDriw_cdnNotPt;
+
+  case Hexagon::LDriw_indexed_cPt :
+    return Hexagon::LDriw_indexed_cdnPt;
+
+  case Hexagon::LDriw_indexed_cNotPt :
+    return Hexagon::LDriw_indexed_cdnNotPt;
+
+  case Hexagon::POST_LDriw_cPt :
+    return Hexagon::POST_LDriw_cdnPt_V4;
+
+  case Hexagon::POST_LDriw_cNotPt :
+    return Hexagon::POST_LDriw_cdnNotPt_V4;
+
+  // Load halfword
+  case Hexagon::LDrih_cPt :
+    return Hexagon::LDrih_cdnPt;
+
+  case Hexagon::LDrih_cNotPt :
+    return Hexagon::LDrih_cdnNotPt;
+
+  case Hexagon::LDrih_indexed_cPt :
+    return Hexagon::LDrih_indexed_cdnPt;
+
+  case Hexagon::LDrih_indexed_cNotPt :
+    return Hexagon::LDrih_indexed_cdnNotPt;
+
+  case Hexagon::POST_LDrih_cPt :
+    return Hexagon::POST_LDrih_cdnPt_V4;
+
+  case Hexagon::POST_LDrih_cNotPt :
+    return Hexagon::POST_LDrih_cdnNotPt_V4;
+
+  // Load byte
+  case Hexagon::LDrib_cPt :
+    return Hexagon::LDrib_cdnPt;
+
+  case Hexagon::LDrib_cNotPt :
+    return Hexagon::LDrib_cdnNotPt;
+
+  case Hexagon::LDrib_indexed_cPt :
+    return Hexagon::LDrib_indexed_cdnPt;
+
+  case Hexagon::LDrib_indexed_cNotPt :
+    return Hexagon::LDrib_indexed_cdnNotPt;
+
+  case Hexagon::POST_LDrib_cPt :
+    return Hexagon::POST_LDrib_cdnPt_V4;
+
+  case Hexagon::POST_LDrib_cNotPt :
+    return Hexagon::POST_LDrib_cdnNotPt_V4;
+
+  // Load unsigned halfword
+  case Hexagon::LDriuh_cPt :
+    return Hexagon::LDriuh_cdnPt;
+
+  case Hexagon::LDriuh_cNotPt :
+    return Hexagon::LDriuh_cdnNotPt;
+
+  case Hexagon::LDriuh_indexed_cPt :
+    return Hexagon::LDriuh_indexed_cdnPt;
+
+  case Hexagon::LDriuh_indexed_cNotPt :
+    return Hexagon::LDriuh_indexed_cdnNotPt;
+
+  case Hexagon::POST_LDriuh_cPt :
+    return Hexagon::POST_LDriuh_cdnPt_V4;
+
+  case Hexagon::POST_LDriuh_cNotPt :
+    return Hexagon::POST_LDriuh_cdnNotPt_V4;
+
+  // Load unsigned byte
+  case Hexagon::LDriub_cPt :
+    return Hexagon::LDriub_cdnPt;
+
+  case Hexagon::LDriub_cNotPt :
+    return Hexagon::LDriub_cdnNotPt;
+
+  case Hexagon::LDriub_indexed_cPt :
+    return Hexagon::LDriub_indexed_cdnPt;
+
+  case Hexagon::LDriub_indexed_cNotPt :
+    return Hexagon::LDriub_indexed_cdnNotPt;
+
+  case Hexagon::POST_LDriub_cPt :
+    return Hexagon::POST_LDriub_cdnPt_V4;
+
+  case Hexagon::POST_LDriub_cNotPt :
+    return Hexagon::POST_LDriub_cdnNotPt_V4;
+
+  // V4 indexed+scaled load
+
+  case Hexagon::LDrid_indexed_cPt_V4 :
+    return Hexagon::LDrid_indexed_cdnPt_V4;
+
+  case Hexagon::LDrid_indexed_cNotPt_V4 :
+    return Hexagon::LDrid_indexed_cdnNotPt_V4;
+
+  case Hexagon::LDrid_indexed_shl_cPt_V4 :
+    return Hexagon::LDrid_indexed_shl_cdnPt_V4;
+
+  case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+    return Hexagon::LDrid_indexed_shl_cdnNotPt_V4;
+
+  case Hexagon::LDrib_indexed_cPt_V4 :
+    return Hexagon::LDrib_indexed_cdnPt_V4;
+
+  case Hexagon::LDrib_indexed_cNotPt_V4 :
+    return Hexagon::LDrib_indexed_cdnNotPt_V4;
+
+  case Hexagon::LDrib_indexed_shl_cPt_V4 :
+    return Hexagon::LDrib_indexed_shl_cdnPt_V4;
+
+  case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+    return Hexagon::LDrib_indexed_shl_cdnNotPt_V4;
+
+  case Hexagon::LDriub_indexed_cPt_V4 :
+    return Hexagon::LDriub_indexed_cdnPt_V4;
+
+  case Hexagon::LDriub_indexed_cNotPt_V4 :
+    return Hexagon::LDriub_indexed_cdnNotPt_V4;
+
+  case Hexagon::LDriub_indexed_shl_cPt_V4 :
+    return Hexagon::LDriub_indexed_shl_cdnPt_V4;
+
+  case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+    return Hexagon::LDriub_indexed_shl_cdnNotPt_V4;
+
+  case Hexagon::LDrih_indexed_cPt_V4 :
+    return Hexagon::LDrih_indexed_cdnPt_V4;
+
+  case Hexagon::LDrih_indexed_cNotPt_V4 :
+    return Hexagon::LDrih_indexed_cdnNotPt_V4;
+
+  case Hexagon::LDrih_indexed_shl_cPt_V4 :
+    return Hexagon::LDrih_indexed_shl_cdnPt_V4;
+
+  case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+    return Hexagon::LDrih_indexed_shl_cdnNotPt_V4;
+
+  case Hexagon::LDriuh_indexed_cPt_V4 :
+    return Hexagon::LDriuh_indexed_cdnPt_V4;
+
+  case Hexagon::LDriuh_indexed_cNotPt_V4 :
+    return Hexagon::LDriuh_indexed_cdnNotPt_V4;
+
+  case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+    return Hexagon::LDriuh_indexed_shl_cdnPt_V4;
+
+  case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+    return Hexagon::LDriuh_indexed_shl_cdnNotPt_V4;
+
+  case Hexagon::LDriw_indexed_cPt_V4 :
+    return Hexagon::LDriw_indexed_cdnPt_V4;
+
+  case Hexagon::LDriw_indexed_cNotPt_V4 :
+    return Hexagon::LDriw_indexed_cdnNotPt_V4;
+
+  case Hexagon::LDriw_indexed_shl_cPt_V4 :
+    return Hexagon::LDriw_indexed_shl_cdnPt_V4;
+
+  case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+    return Hexagon::LDriw_indexed_shl_cdnNotPt_V4;
+
+  // V4 global address load
+
+  case Hexagon::LDd_GP_cPt_V4:
+    return Hexagon::LDd_GP_cdnPt_V4;
+
+  case Hexagon::LDd_GP_cNotPt_V4:
+    return Hexagon::LDd_GP_cdnNotPt_V4;
+
+  case Hexagon::LDb_GP_cPt_V4:
+    return Hexagon::LDb_GP_cdnPt_V4;
+
+  case Hexagon::LDb_GP_cNotPt_V4:
+    return Hexagon::LDb_GP_cdnNotPt_V4;
+
+  case Hexagon::LDub_GP_cPt_V4:
+    return Hexagon::LDub_GP_cdnPt_V4;
+
+  case Hexagon::LDub_GP_cNotPt_V4:
+    return Hexagon::LDub_GP_cdnNotPt_V4;
+
+  case Hexagon::LDh_GP_cPt_V4:
+    return Hexagon::LDh_GP_cdnPt_V4;
+
+  case Hexagon::LDh_GP_cNotPt_V4:
+    return Hexagon::LDh_GP_cdnNotPt_V4;
+
+  case Hexagon::LDuh_GP_cPt_V4:
+    return Hexagon::LDuh_GP_cdnPt_V4;
+
+  case Hexagon::LDuh_GP_cNotPt_V4:
+    return Hexagon::LDuh_GP_cdnNotPt_V4;
+
+  case Hexagon::LDw_GP_cPt_V4:
+    return Hexagon::LDw_GP_cdnPt_V4;
+
+  case Hexagon::LDw_GP_cNotPt_V4:
+    return Hexagon::LDw_GP_cdnNotPt_V4;
+
+  case Hexagon::LDrid_GP_cPt_V4:
+    return Hexagon::LDrid_GP_cdnPt_V4;
+
+  case Hexagon::LDrid_GP_cNotPt_V4:
+    return Hexagon::LDrid_GP_cdnNotPt_V4;
+
+  case Hexagon::LDrib_GP_cPt_V4:
+    return Hexagon::LDrib_GP_cdnPt_V4;
+
+  case Hexagon::LDrib_GP_cNotPt_V4:
+    return Hexagon::LDrib_GP_cdnNotPt_V4;
+
+  case Hexagon::LDriub_GP_cPt_V4:
+    return Hexagon::LDriub_GP_cdnPt_V4;
+
+  case Hexagon::LDriub_GP_cNotPt_V4:
+    return Hexagon::LDriub_GP_cdnNotPt_V4;
+
+  case Hexagon::LDrih_GP_cPt_V4:
+    return Hexagon::LDrih_GP_cdnPt_V4;
+
+  case Hexagon::LDrih_GP_cNotPt_V4:
+    return Hexagon::LDrih_GP_cdnNotPt_V4;
+
+  case Hexagon::LDriuh_GP_cPt_V4:
+    return Hexagon::LDriuh_GP_cdnPt_V4;
+
+  case Hexagon::LDriuh_GP_cNotPt_V4:
+    return Hexagon::LDriuh_GP_cdnNotPt_V4;
+
+  case Hexagon::LDriw_GP_cPt_V4:
+    return Hexagon::LDriw_GP_cdnPt_V4;
+
+  case Hexagon::LDriw_GP_cNotPt_V4:
+    return Hexagon::LDriw_GP_cdnNotPt_V4;
+
+  // Conditional store new-value byte
+  case Hexagon::STrib_cPt_nv_V4 :
+    return Hexagon::STrib_cdnPt_nv_V4;
+  case Hexagon::STrib_cNotPt_nv_V4 :
+    return Hexagon::STrib_cdnNotPt_nv_V4;
+
+  case Hexagon::STrib_indexed_cPt_nv_V4 :
+    return Hexagon::STrib_indexed_cdnPt_nv_V4;
+  case Hexagon::STrib_indexed_cNotPt_nv_V4 :
+    return Hexagon::STrib_indexed_cdnNotPt_nv_V4;
+
+  case Hexagon::STrib_indexed_shl_cPt_nv_V4 :
+    return Hexagon::STrib_indexed_shl_cdnPt_nv_V4;
+  case Hexagon::STrib_indexed_shl_cNotPt_nv_V4 :
+    return Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4;
+
+  case Hexagon::POST_STbri_cPt_nv_V4 :
+    return Hexagon::POST_STbri_cdnPt_nv_V4;
+  case Hexagon::POST_STbri_cNotPt_nv_V4 :
+    return Hexagon::POST_STbri_cdnNotPt_nv_V4;
+
+  case Hexagon::STb_GP_cPt_nv_V4 :
+    return Hexagon::STb_GP_cdnPt_nv_V4;
+
+  case Hexagon::STb_GP_cNotPt_nv_V4 :
+    return Hexagon::STb_GP_cdnNotPt_nv_V4;
+
+  case Hexagon::STrib_GP_cPt_nv_V4 :
+    return Hexagon::STrib_GP_cdnPt_nv_V4;
+
+  case Hexagon::STrib_GP_cNotPt_nv_V4 :
+    return Hexagon::STrib_GP_cdnNotPt_nv_V4;
+
+  // Conditional store new-value halfword
+  case Hexagon::STrih_cPt_nv_V4 :
+    return Hexagon::STrih_cdnPt_nv_V4;
+  case Hexagon::STrih_cNotPt_nv_V4 :
+    return Hexagon::STrih_cdnNotPt_nv_V4;
+
+  case Hexagon::STrih_indexed_cPt_nv_V4 :
+    return Hexagon::STrih_indexed_cdnPt_nv_V4;
+  case Hexagon::STrih_indexed_cNotPt_nv_V4 :
+    return Hexagon::STrih_indexed_cdnNotPt_nv_V4;
+
+  case Hexagon::STrih_indexed_shl_cPt_nv_V4 :
+    return Hexagon::STrih_indexed_shl_cdnPt_nv_V4;
+  case Hexagon::STrih_indexed_shl_cNotPt_nv_V4 :
+    return Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4;
+
+  case Hexagon::POST_SThri_cPt_nv_V4 :
+    return Hexagon::POST_SThri_cdnPt_nv_V4;
+  case Hexagon::POST_SThri_cNotPt_nv_V4 :
+    return Hexagon::POST_SThri_cdnNotPt_nv_V4;
+
+  case Hexagon::STh_GP_cPt_nv_V4 :
+    return Hexagon::STh_GP_cdnPt_nv_V4;
+
+  case Hexagon::STh_GP_cNotPt_nv_V4 :
+    return Hexagon::STh_GP_cdnNotPt_nv_V4;
+
+  case Hexagon::STrih_GP_cPt_nv_V4 :
+    return Hexagon::STrih_GP_cdnPt_nv_V4;
+
+  case Hexagon::STrih_GP_cNotPt_nv_V4 :
+    return Hexagon::STrih_GP_cdnNotPt_nv_V4;
+
+  // Conditional store new-value word
+  case Hexagon::STriw_cPt_nv_V4 :
+    return  Hexagon::STriw_cdnPt_nv_V4;
+  case Hexagon::STriw_cNotPt_nv_V4 :
+    return Hexagon::STriw_cdnNotPt_nv_V4;
+
+  case Hexagon::STriw_indexed_cPt_nv_V4 :
+    return Hexagon::STriw_indexed_cdnPt_nv_V4;
+  case Hexagon::STriw_indexed_cNotPt_nv_V4 :
+    return Hexagon::STriw_indexed_cdnNotPt_nv_V4;
+
+  case Hexagon::STriw_indexed_shl_cPt_nv_V4 :
+    return Hexagon::STriw_indexed_shl_cdnPt_nv_V4;
+  case Hexagon::STriw_indexed_shl_cNotPt_nv_V4 :
+    return Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4;
+
+  case Hexagon::POST_STwri_cPt_nv_V4 :
+    return Hexagon::POST_STwri_cdnPt_nv_V4;
+  case Hexagon::POST_STwri_cNotPt_nv_V4:
+    return Hexagon::POST_STwri_cdnNotPt_nv_V4;
+
+  case Hexagon::STw_GP_cPt_nv_V4 :
+    return Hexagon::STw_GP_cdnPt_nv_V4;
+
+  case Hexagon::STw_GP_cNotPt_nv_V4 :
+    return Hexagon::STw_GP_cdnNotPt_nv_V4;
+
+  case Hexagon::STriw_GP_cPt_nv_V4 :
+    return Hexagon::STriw_GP_cdnPt_nv_V4;
+
+  case Hexagon::STriw_GP_cNotPt_nv_V4 :
+    return Hexagon::STriw_GP_cdnNotPt_nv_V4;
+
+  // Conditional add
+  case Hexagon::ADD_ri_cPt :
+    return Hexagon::ADD_ri_cdnPt;
+  case Hexagon::ADD_ri_cNotPt :
+    return Hexagon::ADD_ri_cdnNotPt;
+
+  case Hexagon::ADD_rr_cPt :
+    return Hexagon::ADD_rr_cdnPt;
+  case Hexagon::ADD_rr_cNotPt :
+    return Hexagon::ADD_rr_cdnNotPt;
+
+  // Conditional logical Operations
+  case Hexagon::XOR_rr_cPt :
+    return Hexagon::XOR_rr_cdnPt;
+  case Hexagon::XOR_rr_cNotPt :
+    return Hexagon::XOR_rr_cdnNotPt;
+
+  case Hexagon::AND_rr_cPt :
+    return Hexagon::AND_rr_cdnPt;
+  case Hexagon::AND_rr_cNotPt :
+    return Hexagon::AND_rr_cdnNotPt;
+
+  case Hexagon::OR_rr_cPt :
+    return Hexagon::OR_rr_cdnPt;
+  case Hexagon::OR_rr_cNotPt :
+    return Hexagon::OR_rr_cdnNotPt;
+
+  // Conditional Subtract
+  case Hexagon::SUB_rr_cPt :
+    return Hexagon::SUB_rr_cdnPt;
+  case Hexagon::SUB_rr_cNotPt :
+    return Hexagon::SUB_rr_cdnNotPt;
+
+  // Conditional combine
+  case Hexagon::COMBINE_rr_cPt :
+    return Hexagon::COMBINE_rr_cdnPt;
+  case Hexagon::COMBINE_rr_cNotPt :
+    return Hexagon::COMBINE_rr_cdnNotPt;
+
+  case Hexagon::ASLH_cPt_V4 :
+    return Hexagon::ASLH_cdnPt_V4;
+  case Hexagon::ASLH_cNotPt_V4 :
+    return Hexagon::ASLH_cdnNotPt_V4;
+
+  case Hexagon::ASRH_cPt_V4 :
+    return Hexagon::ASRH_cdnPt_V4;
+  case Hexagon::ASRH_cNotPt_V4 :
+    return Hexagon::ASRH_cdnNotPt_V4;
+
+  case Hexagon::SXTB_cPt_V4 :
+    return Hexagon::SXTB_cdnPt_V4;
+  case Hexagon::SXTB_cNotPt_V4 :
+    return Hexagon::SXTB_cdnNotPt_V4;
+
+  case Hexagon::SXTH_cPt_V4 :
+    return Hexagon::SXTH_cdnPt_V4;
+  case Hexagon::SXTH_cNotPt_V4 :
+    return Hexagon::SXTH_cdnNotPt_V4;
+
+  case Hexagon::ZXTB_cPt_V4 :
+    return Hexagon::ZXTB_cdnPt_V4;
+  case Hexagon::ZXTB_cNotPt_V4 :
+    return Hexagon::ZXTB_cdnNotPt_V4;
+
+  case Hexagon::ZXTH_cPt_V4 :
+    return Hexagon::ZXTH_cdnPt_V4;
+  case Hexagon::ZXTH_cNotPt_V4 :
+    return Hexagon::ZXTH_cdnNotPt_V4;
+  }
+}
+
+// Returns true if an instruction can be promoted to .new predicate
+// or new-value store.
+bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
+  if ( isCondInst(MI) || IsNewifyStore(MI))
+    return true;
+  else
+    return false;
+}
+
+bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  const MCInstrDesc& TID = MI->getDesc();
+                                    // bug 5670: until that is fixed,
+                                    // this portion is disabled.
+  if (   TID.isConditionalBranch()  // && !IsRegisterJump(MI)) ||
+      || QII->isConditionalTransfer(MI)
+      || QII->isConditionalALU32(MI)
+      || QII->isConditionalLoad(MI)
+      || QII->isConditionalStore(MI)) {
+    return true;
+  }
+  return false;
+}
+
+
+// Promote an instructiont to its .new form.
+// At this time, we have already made a call to CanPromoteToDotNew
+// and made sure that it can *indeed* be promoted.
+bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
+                        SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+                        const TargetRegisterClass* RC) {
+
+  assert (DepType == SDep::Data);
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+  int NewOpcode;
+  if (RC == Hexagon::PredRegsRegisterClass)
+    NewOpcode = GetDotNewPredOp(MI->getOpcode());
+  else
+    NewOpcode = GetDotNewOp(MI->getOpcode());
+  MI->setDesc(QII->get(NewOpcode));
+
+  return true;
+}
+
+// Returns the most basic instruction for the .new predicated instructions and
+// new-value stores.
+// For example, all of the following instructions will be converted back to the
+// same instruction:
+// 1) if (p0.new) memw(R0+#0) = R1.new  --->
+// 2) if (p0) memw(R0+#0)= R1.new      -------> if (p0) memw(R0+#0) = R1
+// 3) if (p0.new) memw(R0+#0) = R1      --->
+//
+// To understand the translation of instruction 1 to its original form, consider
+// a packet with 3 instructions.
+// { p0 = cmp.eq(R0,R1)
+//   if (p0.new) R2 = add(R3, R4)
+//   R5 = add (R3, R1)
+//   }
+// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet
+//
+// This instruction can be part of the previous packet only if both p0 and R2
+// are promoted to .new values. This promotion happens in steps, first
+// predicate register is promoted to .new and in the next iteration R2 is
+// promoted. Therefore, in case of dependence check failure (due to R5) during
+// next iteration, it should be converted back to its most basic form.
+
+static int GetDotOldOp(const int opc) {
+  switch (opc) {
+  default: llvm_unreachable("Unknown .old type");
+
+  case Hexagon::TFR_cdnPt:
+    return Hexagon::TFR_cPt;
+
+  case Hexagon::TFR_cdnNotPt:
+    return Hexagon::TFR_cNotPt;
+
+  case Hexagon::TFRI_cdnPt:
+    return Hexagon::TFRI_cPt;
+
+  case Hexagon::TFRI_cdnNotPt:
+    return Hexagon::TFRI_cNotPt;
+
+  case Hexagon::JMP_cdnPt:
+    return Hexagon::JMP_c;
+
+  case Hexagon::JMP_cdnNotPt:
+    return Hexagon::JMP_cNot;
+
+  case Hexagon::JMPR_cdnPt_V3:
+    return Hexagon::JMPR_cPt;
+
+  case Hexagon::JMPR_cdnNotPt_V3:
+    return Hexagon::JMPR_cNotPt;
+
+  // Load double word
+
+  case Hexagon::LDrid_cdnPt :
+    return Hexagon::LDrid_cPt;
+
+  case Hexagon::LDrid_cdnNotPt :
+    return Hexagon::LDrid_cNotPt;
+
+  case Hexagon::LDrid_indexed_cdnPt :
+    return Hexagon::LDrid_indexed_cPt;
+
+  case Hexagon::LDrid_indexed_cdnNotPt :
+    return Hexagon::LDrid_indexed_cNotPt;
+
+  case Hexagon::POST_LDrid_cdnPt_V4 :
+    return Hexagon::POST_LDrid_cPt;
+
+  case Hexagon::POST_LDrid_cdnNotPt_V4 :
+    return Hexagon::POST_LDrid_cNotPt;
+
+  // Load word
+
+  case Hexagon::LDriw_cdnPt :
+    return Hexagon::LDriw_cPt;
+
+  case Hexagon::LDriw_cdnNotPt :
+    return Hexagon::LDriw_cNotPt;
+
+  case Hexagon::LDriw_indexed_cdnPt :
+    return Hexagon::LDriw_indexed_cPt;
+
+  case Hexagon::LDriw_indexed_cdnNotPt :
+    return Hexagon::LDriw_indexed_cNotPt;
+
+  case Hexagon::POST_LDriw_cdnPt_V4 :
+    return Hexagon::POST_LDriw_cPt;
+
+  case Hexagon::POST_LDriw_cdnNotPt_V4 :
+    return Hexagon::POST_LDriw_cNotPt;
+
+  // Load half
+
+  case Hexagon::LDrih_cdnPt :
+    return Hexagon::LDrih_cPt;
+
+  case Hexagon::LDrih_cdnNotPt :
+    return Hexagon::LDrih_cNotPt;
+
+  case Hexagon::LDrih_indexed_cdnPt :
+    return Hexagon::LDrih_indexed_cPt;
+
+  case Hexagon::LDrih_indexed_cdnNotPt :
+    return Hexagon::LDrih_indexed_cNotPt;
+
+  case Hexagon::POST_LDrih_cdnPt_V4 :
+    return Hexagon::POST_LDrih_cPt;
+
+  case Hexagon::POST_LDrih_cdnNotPt_V4 :
+    return Hexagon::POST_LDrih_cNotPt;
+
+  // Load byte
+
+  case Hexagon::LDrib_cdnPt :
+    return Hexagon::LDrib_cPt;
+
+  case Hexagon::LDrib_cdnNotPt :
+    return Hexagon::LDrib_cNotPt;
+
+  case Hexagon::LDrib_indexed_cdnPt :
+    return Hexagon::LDrib_indexed_cPt;
+
+  case Hexagon::LDrib_indexed_cdnNotPt :
+    return Hexagon::LDrib_indexed_cNotPt;
+
+  case Hexagon::POST_LDrib_cdnPt_V4 :
+    return Hexagon::POST_LDrib_cPt;
+
+  case Hexagon::POST_LDrib_cdnNotPt_V4 :
+    return Hexagon::POST_LDrib_cNotPt;
+
+  // Load unsigned half
+
+  case Hexagon::LDriuh_cdnPt :
+    return Hexagon::LDriuh_cPt;
+
+  case Hexagon::LDriuh_cdnNotPt :
+    return Hexagon::LDriuh_cNotPt;
+
+  case Hexagon::LDriuh_indexed_cdnPt :
+    return Hexagon::LDriuh_indexed_cPt;
+
+  case Hexagon::LDriuh_indexed_cdnNotPt :
+    return Hexagon::LDriuh_indexed_cNotPt;
+
+  case Hexagon::POST_LDriuh_cdnPt_V4 :
+    return Hexagon::POST_LDriuh_cPt;
+
+  case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+    return Hexagon::POST_LDriuh_cNotPt;
+
+  // Load unsigned byte
+  case Hexagon::LDriub_cdnPt :
+    return Hexagon::LDriub_cPt;
+
+  case Hexagon::LDriub_cdnNotPt :
+    return Hexagon::LDriub_cNotPt;
+
+  case Hexagon::LDriub_indexed_cdnPt :
+    return Hexagon::LDriub_indexed_cPt;
+
+  case Hexagon::LDriub_indexed_cdnNotPt :
+    return Hexagon::LDriub_indexed_cNotPt;
+
+  case Hexagon::POST_LDriub_cdnPt_V4 :
+    return Hexagon::POST_LDriub_cPt;
+
+  case Hexagon::POST_LDriub_cdnNotPt_V4 :
+    return Hexagon::POST_LDriub_cNotPt;
+
+  // V4 indexed+scaled Load
+
+  case Hexagon::LDrid_indexed_cdnPt_V4 :
+    return Hexagon::LDrid_indexed_cPt_V4;
+
+  case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+    return Hexagon::LDrid_indexed_cNotPt_V4;
+
+  case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+    return Hexagon::LDrid_indexed_shl_cPt_V4;
+
+  case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+    return Hexagon::LDrid_indexed_shl_cNotPt_V4;
+
+  case Hexagon::LDrib_indexed_cdnPt_V4 :
+    return Hexagon::LDrib_indexed_cPt_V4;
+
+  case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+    return Hexagon::LDrib_indexed_cNotPt_V4;
+
+  case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+    return Hexagon::LDrib_indexed_shl_cPt_V4;
+
+  case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+    return Hexagon::LDrib_indexed_shl_cNotPt_V4;
+
+  case Hexagon::LDriub_indexed_cdnPt_V4 :
+    return Hexagon::LDriub_indexed_cPt_V4;
+
+  case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+    return Hexagon::LDriub_indexed_cNotPt_V4;
+
+  case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+    return Hexagon::LDriub_indexed_shl_cPt_V4;
+
+  case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+    return Hexagon::LDriub_indexed_shl_cNotPt_V4;
+
+  case Hexagon::LDrih_indexed_cdnPt_V4 :
+    return Hexagon::LDrih_indexed_cPt_V4;
+
+  case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+    return Hexagon::LDrih_indexed_cNotPt_V4;
+
+  case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+    return Hexagon::LDrih_indexed_shl_cPt_V4;
+
+  case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+    return Hexagon::LDrih_indexed_shl_cNotPt_V4;
+
+  case Hexagon::LDriuh_indexed_cdnPt_V4 :
+    return Hexagon::LDriuh_indexed_cPt_V4;
+
+  case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+    return Hexagon::LDriuh_indexed_cNotPt_V4;
+
+  case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+    return Hexagon::LDriuh_indexed_shl_cPt_V4;
+
+  case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+    return Hexagon::LDriuh_indexed_shl_cNotPt_V4;
+
+  case Hexagon::LDriw_indexed_cdnPt_V4 :
+    return Hexagon::LDriw_indexed_cPt_V4;
+
+  case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+    return Hexagon::LDriw_indexed_cNotPt_V4;
+
+  case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+    return Hexagon::LDriw_indexed_shl_cPt_V4;
+
+  case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+    return Hexagon::LDriw_indexed_shl_cNotPt_V4;
+
+  // V4 global address load
+
+  case Hexagon::LDd_GP_cdnPt_V4:
+    return Hexagon::LDd_GP_cPt_V4;
+
+  case Hexagon::LDd_GP_cdnNotPt_V4:
+    return Hexagon::LDd_GP_cNotPt_V4;
+
+  case Hexagon::LDb_GP_cdnPt_V4:
+    return Hexagon::LDb_GP_cPt_V4;
+
+  case Hexagon::LDb_GP_cdnNotPt_V4:
+    return Hexagon::LDb_GP_cNotPt_V4;
+
+  case Hexagon::LDub_GP_cdnPt_V4:
+    return Hexagon::LDub_GP_cPt_V4;
+
+  case Hexagon::LDub_GP_cdnNotPt_V4:
+    return Hexagon::LDub_GP_cNotPt_V4;
+
+  case Hexagon::LDh_GP_cdnPt_V4:
+    return Hexagon::LDh_GP_cPt_V4;
+
+  case Hexagon::LDh_GP_cdnNotPt_V4:
+    return Hexagon::LDh_GP_cNotPt_V4;
+
+  case Hexagon::LDuh_GP_cdnPt_V4:
+    return Hexagon::LDuh_GP_cPt_V4;
+
+  case Hexagon::LDuh_GP_cdnNotPt_V4:
+    return Hexagon::LDuh_GP_cNotPt_V4;
+
+  case Hexagon::LDw_GP_cdnPt_V4:
+    return Hexagon::LDw_GP_cPt_V4;
+
+  case Hexagon::LDw_GP_cdnNotPt_V4:
+    return Hexagon::LDw_GP_cNotPt_V4;
+
+  case Hexagon::LDrid_GP_cdnPt_V4:
+    return Hexagon::LDrid_GP_cPt_V4;
+
+  case Hexagon::LDrid_GP_cdnNotPt_V4:
+    return Hexagon::LDrid_GP_cNotPt_V4;
+
+  case Hexagon::LDrib_GP_cdnPt_V4:
+    return Hexagon::LDrib_GP_cPt_V4;
+
+  case Hexagon::LDrib_GP_cdnNotPt_V4:
+    return Hexagon::LDrib_GP_cNotPt_V4;
+
+  case Hexagon::LDriub_GP_cdnPt_V4:
+    return Hexagon::LDriub_GP_cPt_V4;
+
+  case Hexagon::LDriub_GP_cdnNotPt_V4:
+    return Hexagon::LDriub_GP_cNotPt_V4;
+
+  case Hexagon::LDrih_GP_cdnPt_V4:
+    return Hexagon::LDrih_GP_cPt_V4;
+
+  case Hexagon::LDrih_GP_cdnNotPt_V4:
+    return Hexagon::LDrih_GP_cNotPt_V4;
+
+  case Hexagon::LDriuh_GP_cdnPt_V4:
+    return Hexagon::LDriuh_GP_cPt_V4;
+
+  case Hexagon::LDriuh_GP_cdnNotPt_V4:
+    return Hexagon::LDriuh_GP_cNotPt_V4;
+
+  case Hexagon::LDriw_GP_cdnPt_V4:
+    return Hexagon::LDriw_GP_cPt_V4;
+
+  case Hexagon::LDriw_GP_cdnNotPt_V4:
+    return Hexagon::LDriw_GP_cNotPt_V4;
+
+  // Conditional add
+
+  case Hexagon::ADD_ri_cdnPt :
+    return Hexagon::ADD_ri_cPt;
+  case Hexagon::ADD_ri_cdnNotPt :
+    return Hexagon::ADD_ri_cNotPt;
+
+  case Hexagon::ADD_rr_cdnPt :
+    return Hexagon::ADD_rr_cPt;
+  case Hexagon::ADD_rr_cdnNotPt:
+    return Hexagon::ADD_rr_cNotPt;
+
+  // Conditional logical Operations
+
+  case Hexagon::XOR_rr_cdnPt :
+    return Hexagon::XOR_rr_cPt;
+  case Hexagon::XOR_rr_cdnNotPt :
+    return Hexagon::XOR_rr_cNotPt;
+
+  case Hexagon::AND_rr_cdnPt :
+    return Hexagon::AND_rr_cPt;
+  case Hexagon::AND_rr_cdnNotPt :
+    return Hexagon::AND_rr_cNotPt;
+
+  case Hexagon::OR_rr_cdnPt :
+    return Hexagon::OR_rr_cPt;
+  case Hexagon::OR_rr_cdnNotPt :
+    return Hexagon::OR_rr_cNotPt;
+
+  // Conditional Subtract
+
+  case Hexagon::SUB_rr_cdnPt :
+    return Hexagon::SUB_rr_cPt;
+  case Hexagon::SUB_rr_cdnNotPt :
+    return Hexagon::SUB_rr_cNotPt;
+
+  // Conditional combine
+
+  case Hexagon::COMBINE_rr_cdnPt :
+    return Hexagon::COMBINE_rr_cPt;
+  case Hexagon::COMBINE_rr_cdnNotPt :
+    return Hexagon::COMBINE_rr_cNotPt;
+
+// Conditional shift operations
+
+  case Hexagon::ASLH_cdnPt_V4 :
+    return Hexagon::ASLH_cPt_V4;
+  case Hexagon::ASLH_cdnNotPt_V4 :
+    return Hexagon::ASLH_cNotPt_V4;
+
+  case Hexagon::ASRH_cdnPt_V4 :
+    return Hexagon::ASRH_cPt_V4;
+  case Hexagon::ASRH_cdnNotPt_V4 :
+    return Hexagon::ASRH_cNotPt_V4;
+
+  case Hexagon::SXTB_cdnPt_V4 :
+    return Hexagon::SXTB_cPt_V4;
+  case Hexagon::SXTB_cdnNotPt_V4 :
+    return Hexagon::SXTB_cNotPt_V4;
+
+  case Hexagon::SXTH_cdnPt_V4 :
+    return Hexagon::SXTH_cPt_V4;
+  case Hexagon::SXTH_cdnNotPt_V4 :
+    return Hexagon::SXTH_cNotPt_V4;
+
+  case Hexagon::ZXTB_cdnPt_V4 :
+    return Hexagon::ZXTB_cPt_V4;
+  case Hexagon::ZXTB_cdnNotPt_V4 :
+    return Hexagon::ZXTB_cNotPt_V4;
+
+  case Hexagon::ZXTH_cdnPt_V4 :
+    return Hexagon::ZXTH_cPt_V4;
+  case Hexagon::ZXTH_cdnNotPt_V4 :
+    return Hexagon::ZXTH_cNotPt_V4;
+
+  // Store byte
+
+  case Hexagon::STrib_imm_cdnPt_V4 :
+    return Hexagon::STrib_imm_cPt_V4;
+
+  case Hexagon::STrib_imm_cdnNotPt_V4 :
+    return Hexagon::STrib_imm_cNotPt_V4;
+
+  case Hexagon::STrib_cdnPt_nv_V4 :
+  case Hexagon::STrib_cPt_nv_V4 :
+  case Hexagon::STrib_cdnPt_V4 :
+    return Hexagon::STrib_cPt;
+
+  case Hexagon::STrib_cdnNotPt_nv_V4 :
+  case Hexagon::STrib_cNotPt_nv_V4 :
+  case Hexagon::STrib_cdnNotPt_V4 :
+    return Hexagon::STrib_cNotPt;
+
+  case Hexagon::STrib_indexed_cdnPt_V4 :
+  case Hexagon::STrib_indexed_cPt_nv_V4 :
+  case Hexagon::STrib_indexed_cdnPt_nv_V4 :
+    return Hexagon::STrib_indexed_cPt;
+
+  case Hexagon::STrib_indexed_cdnNotPt_V4 :
+  case Hexagon::STrib_indexed_cNotPt_nv_V4 :
+  case Hexagon::STrib_indexed_cdnNotPt_nv_V4 :
+    return Hexagon::STrib_indexed_cNotPt;
+
+  case Hexagon::STrib_indexed_shl_cdnPt_nv_V4:
+  case Hexagon::STrib_indexed_shl_cPt_nv_V4 :
+  case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+    return Hexagon::STrib_indexed_shl_cPt_V4;
+
+  case Hexagon::STrib_indexed_shl_cdnNotPt_nv_V4:
+  case Hexagon::STrib_indexed_shl_cNotPt_nv_V4 :
+  case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+    return Hexagon::STrib_indexed_shl_cNotPt_V4;
+
+  case Hexagon::POST_STbri_cdnPt_nv_V4 :
+  case Hexagon::POST_STbri_cPt_nv_V4 :
+  case Hexagon::POST_STbri_cdnPt_V4 :
+    return Hexagon::POST_STbri_cPt;
+
+  case Hexagon::POST_STbri_cdnNotPt_nv_V4 :
+  case Hexagon::POST_STbri_cNotPt_nv_V4:
+  case Hexagon::POST_STbri_cdnNotPt_V4 :
+    return Hexagon::POST_STbri_cNotPt;
+
+  case Hexagon::STb_GP_cdnPt_nv_V4:
+  case Hexagon::STb_GP_cdnPt_V4:
+  case Hexagon::STb_GP_cPt_nv_V4:
+    return Hexagon::STb_GP_cPt_V4;
+
+  case Hexagon::STb_GP_cdnNotPt_nv_V4:
+  case Hexagon::STb_GP_cdnNotPt_V4:
+  case Hexagon::STb_GP_cNotPt_nv_V4:
+    return Hexagon::STb_GP_cNotPt_V4;
+
+  case Hexagon::STrib_GP_cdnPt_nv_V4:
+  case Hexagon::STrib_GP_cdnPt_V4:
+  case Hexagon::STrib_GP_cPt_nv_V4:
+    return Hexagon::STrib_GP_cPt_V4;
+
+  case Hexagon::STrib_GP_cdnNotPt_nv_V4:
+  case Hexagon::STrib_GP_cdnNotPt_V4:
+  case Hexagon::STrib_GP_cNotPt_nv_V4:
+    return Hexagon::STrib_GP_cNotPt_V4;
+
+  // Store new-value byte - unconditional
+  case Hexagon::STrib_nv_V4:
+    return Hexagon::STrib;
+
+  case Hexagon::STrib_indexed_nv_V4:
+    return Hexagon::STrib_indexed;
+
+  case Hexagon::STrib_indexed_shl_nv_V4:
+    return Hexagon::STrib_indexed_shl_V4;
+
+  case Hexagon::STrib_shl_nv_V4:
+    return Hexagon::STrib_shl_V4;
+
+  case Hexagon::STrib_GP_nv_V4:
+    return Hexagon::STrib_GP_V4;
+
+  case Hexagon::STb_GP_nv_V4:
+    return Hexagon::STb_GP_V4;
+
+  case Hexagon::POST_STbri_nv_V4:
+    return Hexagon::POST_STbri;
+
+  // Store halfword
+  case Hexagon::STrih_imm_cdnPt_V4 :
+    return Hexagon::STrih_imm_cPt_V4;
+
+  case Hexagon::STrih_imm_cdnNotPt_V4 :
+    return Hexagon::STrih_imm_cNotPt_V4;
+
+  case Hexagon::STrih_cdnPt_nv_V4 :
+  case Hexagon::STrih_cPt_nv_V4 :
+  case Hexagon::STrih_cdnPt_V4 :
+    return Hexagon::STrih_cPt;
+
+  case Hexagon::STrih_cdnNotPt_nv_V4 :
+  case Hexagon::STrih_cNotPt_nv_V4 :
+  case Hexagon::STrih_cdnNotPt_V4 :
+    return Hexagon::STrih_cNotPt;
+
+  case Hexagon::STrih_indexed_cdnPt_nv_V4:
+  case Hexagon::STrih_indexed_cPt_nv_V4 :
+  case Hexagon::STrih_indexed_cdnPt_V4 :
+    return Hexagon::STrih_indexed_cPt;
+
+  case Hexagon::STrih_indexed_cdnNotPt_nv_V4:
+  case Hexagon::STrih_indexed_cNotPt_nv_V4 :
+  case Hexagon::STrih_indexed_cdnNotPt_V4 :
+    return Hexagon::STrih_indexed_cNotPt;
+
+  case Hexagon::STrih_indexed_shl_cdnPt_nv_V4 :
+  case Hexagon::STrih_indexed_shl_cPt_nv_V4 :
+  case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+    return Hexagon::STrih_indexed_shl_cPt_V4;
+
+  case Hexagon::STrih_indexed_shl_cdnNotPt_nv_V4 :
+  case Hexagon::STrih_indexed_shl_cNotPt_nv_V4 :
+  case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+    return Hexagon::STrih_indexed_shl_cNotPt_V4;
+
+  case Hexagon::POST_SThri_cdnPt_nv_V4 :
+  case Hexagon::POST_SThri_cPt_nv_V4 :
+  case Hexagon::POST_SThri_cdnPt_V4 :
+    return Hexagon::POST_SThri_cPt;
+
+  case Hexagon::POST_SThri_cdnNotPt_nv_V4 :
+  case Hexagon::POST_SThri_cNotPt_nv_V4 :
+  case Hexagon::POST_SThri_cdnNotPt_V4 :
+    return Hexagon::POST_SThri_cNotPt;
+
+  case Hexagon::STh_GP_cdnPt_nv_V4:
+  case Hexagon::STh_GP_cdnPt_V4:
+  case Hexagon::STh_GP_cPt_nv_V4:
+    return Hexagon::STh_GP_cPt_V4;
+
+  case Hexagon::STh_GP_cdnNotPt_nv_V4:
+  case Hexagon::STh_GP_cdnNotPt_V4:
+  case Hexagon::STh_GP_cNotPt_nv_V4:
+    return Hexagon::STh_GP_cNotPt_V4;
+
+  case Hexagon::STrih_GP_cdnPt_nv_V4:
+  case Hexagon::STrih_GP_cdnPt_V4:
+  case Hexagon::STrih_GP_cPt_nv_V4:
+    return Hexagon::STrih_GP_cPt_V4;
+
+  case Hexagon::STrih_GP_cdnNotPt_nv_V4:
+  case Hexagon::STrih_GP_cdnNotPt_V4:
+  case Hexagon::STrih_GP_cNotPt_nv_V4:
+    return Hexagon::STrih_GP_cNotPt_V4;
+
+  // Store new-value halfword - unconditional
+
+  case Hexagon::STrih_nv_V4:
+    return Hexagon::STrih;
+
+  case Hexagon::STrih_indexed_nv_V4:
+    return Hexagon::STrih_indexed;
+
+  case Hexagon::STrih_indexed_shl_nv_V4:
+    return Hexagon::STrih_indexed_shl_V4;
+
+  case Hexagon::STrih_shl_nv_V4:
+    return Hexagon::STrih_shl_V4;
+
+  case Hexagon::STrih_GP_nv_V4:
+    return Hexagon::STrih_GP_V4;
+
+  case Hexagon::STh_GP_nv_V4:
+    return Hexagon::STh_GP_V4;
+
+  case Hexagon::POST_SThri_nv_V4:
+    return Hexagon::POST_SThri;
+
+   // Store word
+
+   case Hexagon::STriw_imm_cdnPt_V4 :
+    return Hexagon::STriw_imm_cPt_V4;
+
+  case Hexagon::STriw_imm_cdnNotPt_V4 :
+    return Hexagon::STriw_imm_cNotPt_V4;
+
+  case Hexagon::STriw_cdnPt_nv_V4 :
+  case Hexagon::STriw_cPt_nv_V4 :
+  case Hexagon::STriw_cdnPt_V4 :
+    return Hexagon::STriw_cPt;
+
+  case Hexagon::STriw_cdnNotPt_nv_V4 :
+  case Hexagon::STriw_cNotPt_nv_V4 :
+  case Hexagon::STriw_cdnNotPt_V4 :
+    return Hexagon::STriw_cNotPt;
+
+  case Hexagon::STriw_indexed_cdnPt_nv_V4 :
+  case Hexagon::STriw_indexed_cPt_nv_V4 :
+  case Hexagon::STriw_indexed_cdnPt_V4 :
+    return Hexagon::STriw_indexed_cPt;
+
+  case Hexagon::STriw_indexed_cdnNotPt_nv_V4 :
+  case Hexagon::STriw_indexed_cNotPt_nv_V4 :
+  case Hexagon::STriw_indexed_cdnNotPt_V4 :
+    return Hexagon::STriw_indexed_cNotPt;
+
+  case Hexagon::STriw_indexed_shl_cdnPt_nv_V4 :
+  case Hexagon::STriw_indexed_shl_cPt_nv_V4 :
+  case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+    return Hexagon::STriw_indexed_shl_cPt_V4;
+
+  case Hexagon::STriw_indexed_shl_cdnNotPt_nv_V4 :
+  case Hexagon::STriw_indexed_shl_cNotPt_nv_V4 :
+  case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+    return Hexagon::STriw_indexed_shl_cNotPt_V4;
+
+  case Hexagon::POST_STwri_cdnPt_nv_V4 :
+  case Hexagon::POST_STwri_cPt_nv_V4 :
+  case Hexagon::POST_STwri_cdnPt_V4 :
+    return Hexagon::POST_STwri_cPt;
+
+  case Hexagon::POST_STwri_cdnNotPt_nv_V4 :
+  case Hexagon::POST_STwri_cNotPt_nv_V4 :
+  case Hexagon::POST_STwri_cdnNotPt_V4 :
+    return Hexagon::POST_STwri_cNotPt;
+
+  case Hexagon::STw_GP_cdnPt_nv_V4:
+  case Hexagon::STw_GP_cdnPt_V4:
+  case Hexagon::STw_GP_cPt_nv_V4:
+    return Hexagon::STw_GP_cPt_V4;
+
+  case Hexagon::STw_GP_cdnNotPt_nv_V4:
+  case Hexagon::STw_GP_cdnNotPt_V4:
+  case Hexagon::STw_GP_cNotPt_nv_V4:
+    return Hexagon::STw_GP_cNotPt_V4;
+
+  case Hexagon::STriw_GP_cdnPt_nv_V4:
+  case Hexagon::STriw_GP_cdnPt_V4:
+  case Hexagon::STriw_GP_cPt_nv_V4:
+    return Hexagon::STriw_GP_cPt_V4;
+
+  case Hexagon::STriw_GP_cdnNotPt_nv_V4:
+  case Hexagon::STriw_GP_cdnNotPt_V4:
+  case Hexagon::STriw_GP_cNotPt_nv_V4:
+    return Hexagon::STriw_GP_cNotPt_V4;
+
+  // Store new-value word - unconditional
+
+  case Hexagon::STriw_nv_V4:
+    return Hexagon::STriw;
+
+  case Hexagon::STriw_indexed_nv_V4:
+    return Hexagon::STriw_indexed;
+
+  case Hexagon::STriw_indexed_shl_nv_V4:
+    return Hexagon::STriw_indexed_shl_V4;
+
+  case Hexagon::STriw_shl_nv_V4:
+    return Hexagon::STriw_shl_V4;
+
+  case Hexagon::STriw_GP_nv_V4:
+    return Hexagon::STriw_GP_V4;
+
+  case Hexagon::STw_GP_nv_V4:
+    return Hexagon::STw_GP_V4;
+
+  case Hexagon::POST_STwri_nv_V4:
+    return Hexagon::POST_STwri;
+
+ // Store doubleword
+
+  case Hexagon::STrid_cdnPt_V4 :
+    return Hexagon::STrid_cPt;
+
+  case Hexagon::STrid_cdnNotPt_V4 :
+    return Hexagon::STrid_cNotPt;
+
+  case Hexagon::STrid_indexed_cdnPt_V4 :
+    return Hexagon::STrid_indexed_cPt;
+
+  case Hexagon::STrid_indexed_cdnNotPt_V4 :
+    return Hexagon::STrid_indexed_cNotPt;
+
+  case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+    return Hexagon::STrid_indexed_shl_cPt_V4;
+
+  case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+    return Hexagon::STrid_indexed_shl_cNotPt_V4;
+
+  case Hexagon::POST_STdri_cdnPt_V4 :
+    return Hexagon::POST_STdri_cPt;
+
+  case Hexagon::POST_STdri_cdnNotPt_V4 :
+    return Hexagon::POST_STdri_cNotPt;
+
+  case Hexagon::STd_GP_cdnPt_V4 :
+    return Hexagon::STd_GP_cPt_V4;
+
+  case Hexagon::STd_GP_cdnNotPt_V4 :
+    return Hexagon::STd_GP_cNotPt_V4;
+
+  case Hexagon::STrid_GP_cdnPt_V4 :
+    return Hexagon::STrid_GP_cPt_V4;
+
+  case Hexagon::STrid_GP_cdnNotPt_V4 :
+    return Hexagon::STrid_GP_cNotPt_V4;
+  }
+}
+
+bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) {
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  int NewOpcode = GetDotOldOp(MI->getOpcode());
+  MI->setDesc(QII->get(NewOpcode));
+  return true;
+}
+
+// Returns true if an instruction is predicated on p0 and false if it's
+// predicated on !p0.
+
+static bool GetPredicateSense(MachineInstr* MI,
+                              const HexagonInstrInfo *QII) {
+
+  switch (MI->getOpcode()) {
+  case Hexagon::TFR_cPt:
+  case Hexagon::TFR_cdnPt:
+  case Hexagon::TFRI_cPt:
+  case Hexagon::TFRI_cdnPt:
+  case Hexagon::STrib_cPt :
+  case Hexagon::STrib_cdnPt_V4 :
+  case Hexagon::STrib_indexed_cPt :
+  case Hexagon::STrib_indexed_cdnPt_V4 :
+  case Hexagon::STrib_indexed_shl_cPt_V4 :
+  case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+  case Hexagon::POST_STbri_cPt :
+  case Hexagon::POST_STbri_cdnPt_V4 :
+  case Hexagon::STrih_cPt :
+  case Hexagon::STrih_cdnPt_V4 :
+  case Hexagon::STrih_indexed_cPt :
+  case Hexagon::STrih_indexed_cdnPt_V4 :
+  case Hexagon::STrih_indexed_shl_cPt_V4 :
+  case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+  case Hexagon::POST_SThri_cPt :
+  case Hexagon::POST_SThri_cdnPt_V4 :
+  case Hexagon::STriw_cPt :
+  case Hexagon::STriw_cdnPt_V4 :
+  case Hexagon::STriw_indexed_cPt :
+  case Hexagon::STriw_indexed_cdnPt_V4 :
+  case Hexagon::STriw_indexed_shl_cPt_V4 :
+  case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+  case Hexagon::POST_STwri_cPt :
+  case Hexagon::POST_STwri_cdnPt_V4 :
+  case Hexagon::STrib_imm_cPt_V4 :
+  case Hexagon::STrib_imm_cdnPt_V4 :
+  case Hexagon::STrid_cPt :
+  case Hexagon::STrid_cdnPt_V4 :
+  case Hexagon::STrid_indexed_cPt :
+  case Hexagon::STrid_indexed_cdnPt_V4 :
+  case Hexagon::STrid_indexed_shl_cPt_V4 :
+  case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+  case Hexagon::POST_STdri_cPt :
+  case Hexagon::POST_STdri_cdnPt_V4 :
+  case Hexagon::STrih_imm_cPt_V4 :
+  case Hexagon::STrih_imm_cdnPt_V4 :
+  case Hexagon::STriw_imm_cPt_V4 :
+  case Hexagon::STriw_imm_cdnPt_V4 :
+  case Hexagon::JMP_cdnPt :
+  case Hexagon::LDrid_cPt :
+  case Hexagon::LDrid_cdnPt :
+  case Hexagon::LDrid_indexed_cPt :
+  case Hexagon::LDrid_indexed_cdnPt :
+  case Hexagon::POST_LDrid_cPt :
+  case Hexagon::POST_LDrid_cdnPt_V4 :
+  case Hexagon::LDriw_cPt :
+  case Hexagon::LDriw_cdnPt :
+  case Hexagon::LDriw_indexed_cPt :
+  case Hexagon::LDriw_indexed_cdnPt :
+  case Hexagon::POST_LDriw_cPt :
+  case Hexagon::POST_LDriw_cdnPt_V4 :
+  case Hexagon::LDrih_cPt :
+  case Hexagon::LDrih_cdnPt :
+  case Hexagon::LDrih_indexed_cPt :
+  case Hexagon::LDrih_indexed_cdnPt :
+  case Hexagon::POST_LDrih_cPt :
+  case Hexagon::POST_LDrih_cdnPt_V4 :
+  case Hexagon::LDrib_cPt :
+  case Hexagon::LDrib_cdnPt :
+  case Hexagon::LDrib_indexed_cPt :
+  case Hexagon::LDrib_indexed_cdnPt :
+  case Hexagon::POST_LDrib_cPt :
+  case Hexagon::POST_LDrib_cdnPt_V4 :
+  case Hexagon::LDriuh_cPt :
+  case Hexagon::LDriuh_cdnPt :
+  case Hexagon::LDriuh_indexed_cPt :
+  case Hexagon::LDriuh_indexed_cdnPt :
+  case Hexagon::POST_LDriuh_cPt :
+  case Hexagon::POST_LDriuh_cdnPt_V4 :
+  case Hexagon::LDriub_cPt :
+  case Hexagon::LDriub_cdnPt :
+  case Hexagon::LDriub_indexed_cPt :
+  case Hexagon::LDriub_indexed_cdnPt :
+  case Hexagon::POST_LDriub_cPt :
+  case Hexagon::POST_LDriub_cdnPt_V4 :
+  case Hexagon::LDrid_indexed_cPt_V4 :
+  case Hexagon::LDrid_indexed_cdnPt_V4 :
+  case Hexagon::LDrid_indexed_shl_cPt_V4 :
+  case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDrib_indexed_cPt_V4 :
+  case Hexagon::LDrib_indexed_cdnPt_V4 :
+  case Hexagon::LDrib_indexed_shl_cPt_V4 :
+  case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDriub_indexed_cPt_V4 :
+  case Hexagon::LDriub_indexed_cdnPt_V4 :
+  case Hexagon::LDriub_indexed_shl_cPt_V4 :
+  case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDrih_indexed_cPt_V4 :
+  case Hexagon::LDrih_indexed_cdnPt_V4 :
+  case Hexagon::LDrih_indexed_shl_cPt_V4 :
+  case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDriuh_indexed_cPt_V4 :
+  case Hexagon::LDriuh_indexed_cdnPt_V4 :
+  case Hexagon::LDriuh_indexed_shl_cPt_V4 :
+  case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDriw_indexed_cPt_V4 :
+  case Hexagon::LDriw_indexed_cdnPt_V4 :
+  case Hexagon::LDriw_indexed_shl_cPt_V4 :
+  case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+  case Hexagon::ADD_ri_cPt :
+  case Hexagon::ADD_ri_cdnPt :
+  case Hexagon::ADD_rr_cPt :
+  case Hexagon::ADD_rr_cdnPt :
+  case Hexagon::XOR_rr_cPt :
+  case Hexagon::XOR_rr_cdnPt :
+  case Hexagon::AND_rr_cPt :
+  case Hexagon::AND_rr_cdnPt :
+  case Hexagon::OR_rr_cPt :
+  case Hexagon::OR_rr_cdnPt :
+  case Hexagon::SUB_rr_cPt :
+  case Hexagon::SUB_rr_cdnPt :
+  case Hexagon::COMBINE_rr_cPt :
+  case Hexagon::COMBINE_rr_cdnPt :
+  case Hexagon::ASLH_cPt_V4 :
+  case Hexagon::ASLH_cdnPt_V4 :
+  case Hexagon::ASRH_cPt_V4 :
+  case Hexagon::ASRH_cdnPt_V4 :
+  case Hexagon::SXTB_cPt_V4 :
+  case Hexagon::SXTB_cdnPt_V4 :
+  case Hexagon::SXTH_cPt_V4 :
+  case Hexagon::SXTH_cdnPt_V4 :
+  case Hexagon::ZXTB_cPt_V4 :
+  case Hexagon::ZXTB_cdnPt_V4 :
+  case Hexagon::ZXTH_cPt_V4 :
+  case Hexagon::ZXTH_cdnPt_V4 :
+  case Hexagon::LDrid_GP_cPt_V4 :
+  case Hexagon::LDrib_GP_cPt_V4 :
+  case Hexagon::LDriub_GP_cPt_V4 :
+  case Hexagon::LDrih_GP_cPt_V4 :
+  case Hexagon::LDriuh_GP_cPt_V4 :
+  case Hexagon::LDriw_GP_cPt_V4 :
+  case Hexagon::LDd_GP_cPt_V4 :
+  case Hexagon::LDb_GP_cPt_V4 :
+  case Hexagon::LDub_GP_cPt_V4 :
+  case Hexagon::LDh_GP_cPt_V4 :
+  case Hexagon::LDuh_GP_cPt_V4 :
+  case Hexagon::LDw_GP_cPt_V4 :
+  case Hexagon::STrid_GP_cPt_V4 :
+  case Hexagon::STrib_GP_cPt_V4 :
+  case Hexagon::STrih_GP_cPt_V4 :
+  case Hexagon::STriw_GP_cPt_V4 :
+  case Hexagon::STd_GP_cPt_V4 :
+  case Hexagon::STb_GP_cPt_V4 :
+  case Hexagon::STh_GP_cPt_V4 :
+  case Hexagon::STw_GP_cPt_V4 :
+  case Hexagon::LDrid_GP_cdnPt_V4 :
+  case Hexagon::LDrib_GP_cdnPt_V4 :
+  case Hexagon::LDriub_GP_cdnPt_V4 :
+  case Hexagon::LDrih_GP_cdnPt_V4 :
+  case Hexagon::LDriuh_GP_cdnPt_V4 :
+  case Hexagon::LDriw_GP_cdnPt_V4 :
+  case Hexagon::LDd_GP_cdnPt_V4 :
+  case Hexagon::LDb_GP_cdnPt_V4 :
+  case Hexagon::LDub_GP_cdnPt_V4 :
+  case Hexagon::LDh_GP_cdnPt_V4 :
+  case Hexagon::LDuh_GP_cdnPt_V4 :
+  case Hexagon::LDw_GP_cdnPt_V4 :
+  case Hexagon::STrid_GP_cdnPt_V4 :
+  case Hexagon::STrib_GP_cdnPt_V4 :
+  case Hexagon::STrih_GP_cdnPt_V4 :
+  case Hexagon::STriw_GP_cdnPt_V4 :
+  case Hexagon::STd_GP_cdnPt_V4 :
+  case Hexagon::STb_GP_cdnPt_V4 :
+  case Hexagon::STh_GP_cdnPt_V4 :
+  case Hexagon::STw_GP_cdnPt_V4 :
+    return true;
+
+  case Hexagon::TFR_cNotPt:
+  case Hexagon::TFR_cdnNotPt:
+  case Hexagon::TFRI_cNotPt:
+  case Hexagon::TFRI_cdnNotPt:
+  case Hexagon::STrib_cNotPt :
+  case Hexagon::STrib_cdnNotPt_V4 :
+  case Hexagon::STrib_indexed_cNotPt :
+  case Hexagon::STrib_indexed_cdnNotPt_V4 :
+  case Hexagon::STrib_indexed_shl_cNotPt_V4 :
+  case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::POST_STbri_cNotPt :
+  case Hexagon::POST_STbri_cdnNotPt_V4 :
+  case Hexagon::STrih_cNotPt :
+  case Hexagon::STrih_cdnNotPt_V4 :
+  case Hexagon::STrih_indexed_cNotPt :
+  case Hexagon::STrih_indexed_cdnNotPt_V4 :
+  case Hexagon::STrih_indexed_shl_cNotPt_V4 :
+  case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::POST_SThri_cNotPt :
+  case Hexagon::POST_SThri_cdnNotPt_V4 :
+  case Hexagon::STriw_cNotPt :
+  case Hexagon::STriw_cdnNotPt_V4 :
+  case Hexagon::STriw_indexed_cNotPt :
+  case Hexagon::STriw_indexed_cdnNotPt_V4 :
+  case Hexagon::STriw_indexed_shl_cNotPt_V4 :
+  case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::POST_STwri_cNotPt :
+  case Hexagon::POST_STwri_cdnNotPt_V4 :
+  case Hexagon::STrib_imm_cNotPt_V4 :
+  case Hexagon::STrib_imm_cdnNotPt_V4 :
+  case Hexagon::STrid_cNotPt :
+  case Hexagon::STrid_cdnNotPt_V4 :
+  case Hexagon::STrid_indexed_cdnNotPt_V4 :
+  case Hexagon::STrid_indexed_cNotPt :
+  case Hexagon::STrid_indexed_shl_cNotPt_V4 :
+  case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::POST_STdri_cNotPt :
+  case Hexagon::POST_STdri_cdnNotPt_V4 :
+  case Hexagon::STrih_imm_cNotPt_V4 :
+  case Hexagon::STrih_imm_cdnNotPt_V4 :
+  case Hexagon::STriw_imm_cNotPt_V4 :
+  case Hexagon::STriw_imm_cdnNotPt_V4 :
+  case Hexagon::JMP_cdnNotPt :
+  case Hexagon::LDrid_cNotPt :
+  case Hexagon::LDrid_cdnNotPt :
+  case Hexagon::LDrid_indexed_cNotPt :
+  case Hexagon::LDrid_indexed_cdnNotPt :
+  case Hexagon::POST_LDrid_cNotPt :
+  case Hexagon::POST_LDrid_cdnNotPt_V4 :
+  case Hexagon::LDriw_cNotPt :
+  case Hexagon::LDriw_cdnNotPt :
+  case Hexagon::LDriw_indexed_cNotPt :
+  case Hexagon::LDriw_indexed_cdnNotPt :
+  case Hexagon::POST_LDriw_cNotPt :
+  case Hexagon::POST_LDriw_cdnNotPt_V4 :
+  case Hexagon::LDrih_cNotPt :
+  case Hexagon::LDrih_cdnNotPt :
+  case Hexagon::LDrih_indexed_cNotPt :
+  case Hexagon::LDrih_indexed_cdnNotPt :
+  case Hexagon::POST_LDrih_cNotPt :
+  case Hexagon::POST_LDrih_cdnNotPt_V4 :
+  case Hexagon::LDrib_cNotPt :
+  case Hexagon::LDrib_cdnNotPt :
+  case Hexagon::LDrib_indexed_cNotPt :
+  case Hexagon::LDrib_indexed_cdnNotPt :
+  case Hexagon::POST_LDrib_cNotPt :
+  case Hexagon::POST_LDrib_cdnNotPt_V4 :
+  case Hexagon::LDriuh_cNotPt :
+  case Hexagon::LDriuh_cdnNotPt :
+  case Hexagon::LDriuh_indexed_cNotPt :
+  case Hexagon::LDriuh_indexed_cdnNotPt :
+  case Hexagon::POST_LDriuh_cNotPt :
+  case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+  case Hexagon::LDriub_cNotPt :
+  case Hexagon::LDriub_cdnNotPt :
+  case Hexagon::LDriub_indexed_cNotPt :
+  case Hexagon::LDriub_indexed_cdnNotPt :
+  case Hexagon::POST_LDriub_cNotPt :
+  case Hexagon::POST_LDriub_cdnNotPt_V4 :
+  case Hexagon::LDrid_indexed_cNotPt_V4 :
+  case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+  case Hexagon::LDrid_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDrib_indexed_cNotPt_V4 :
+  case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+  case Hexagon::LDrib_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDriub_indexed_cNotPt_V4 :
+  case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+  case Hexagon::LDriub_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDrih_indexed_cNotPt_V4 :
+  case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+  case Hexagon::LDrih_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDriuh_indexed_cNotPt_V4 :
+  case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+  case Hexagon::LDriuh_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDriw_indexed_cNotPt_V4 :
+  case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+  case Hexagon::LDriw_indexed_shl_cNotPt_V4 :
+  case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::ADD_ri_cNotPt :
+  case Hexagon::ADD_ri_cdnNotPt :
+  case Hexagon::ADD_rr_cNotPt :
+  case Hexagon::ADD_rr_cdnNotPt :
+  case Hexagon::XOR_rr_cNotPt :
+  case Hexagon::XOR_rr_cdnNotPt :
+  case Hexagon::AND_rr_cNotPt :
+  case Hexagon::AND_rr_cdnNotPt :
+  case Hexagon::OR_rr_cNotPt :
+  case Hexagon::OR_rr_cdnNotPt :
+  case Hexagon::SUB_rr_cNotPt :
+  case Hexagon::SUB_rr_cdnNotPt :
+  case Hexagon::COMBINE_rr_cNotPt :
+  case Hexagon::COMBINE_rr_cdnNotPt :
+  case Hexagon::ASLH_cNotPt_V4 :
+  case Hexagon::ASLH_cdnNotPt_V4 :
+  case Hexagon::ASRH_cNotPt_V4 :
+  case Hexagon::ASRH_cdnNotPt_V4 :
+  case Hexagon::SXTB_cNotPt_V4 :
+  case Hexagon::SXTB_cdnNotPt_V4 :
+  case Hexagon::SXTH_cNotPt_V4 :
+  case Hexagon::SXTH_cdnNotPt_V4 :
+  case Hexagon::ZXTB_cNotPt_V4 :
+  case Hexagon::ZXTB_cdnNotPt_V4 :
+  case Hexagon::ZXTH_cNotPt_V4 :
+  case Hexagon::ZXTH_cdnNotPt_V4 :
+
+  case Hexagon::LDrid_GP_cNotPt_V4 :
+  case Hexagon::LDrib_GP_cNotPt_V4 :
+  case Hexagon::LDriub_GP_cNotPt_V4 :
+  case Hexagon::LDrih_GP_cNotPt_V4 :
+  case Hexagon::LDriuh_GP_cNotPt_V4 :
+  case Hexagon::LDriw_GP_cNotPt_V4 :
+  case Hexagon::LDd_GP_cNotPt_V4 :
+  case Hexagon::LDb_GP_cNotPt_V4 :
+  case Hexagon::LDub_GP_cNotPt_V4 :
+  case Hexagon::LDh_GP_cNotPt_V4 :
+  case Hexagon::LDuh_GP_cNotPt_V4 :
+  case Hexagon::LDw_GP_cNotPt_V4 :
+  case Hexagon::STrid_GP_cNotPt_V4 :
+  case Hexagon::STrib_GP_cNotPt_V4 :
+  case Hexagon::STrih_GP_cNotPt_V4 :
+  case Hexagon::STriw_GP_cNotPt_V4 :
+  case Hexagon::STd_GP_cNotPt_V4 :
+  case Hexagon::STb_GP_cNotPt_V4 :
+  case Hexagon::STh_GP_cNotPt_V4 :
+  case Hexagon::STw_GP_cNotPt_V4 :
+  case Hexagon::LDrid_GP_cdnNotPt_V4 :
+  case Hexagon::LDrib_GP_cdnNotPt_V4 :
+  case Hexagon::LDriub_GP_cdnNotPt_V4 :
+  case Hexagon::LDrih_GP_cdnNotPt_V4 :
+  case Hexagon::LDriuh_GP_cdnNotPt_V4 :
+  case Hexagon::LDriw_GP_cdnNotPt_V4 :
+  case Hexagon::LDd_GP_cdnNotPt_V4 :
+  case Hexagon::LDb_GP_cdnNotPt_V4 :
+  case Hexagon::LDub_GP_cdnNotPt_V4 :
+  case Hexagon::LDh_GP_cdnNotPt_V4 :
+  case Hexagon::LDuh_GP_cdnNotPt_V4 :
+  case Hexagon::LDw_GP_cdnNotPt_V4 :
+  case Hexagon::STrid_GP_cdnNotPt_V4 :
+  case Hexagon::STrib_GP_cdnNotPt_V4 :
+  case Hexagon::STrih_GP_cdnNotPt_V4 :
+  case Hexagon::STriw_GP_cdnNotPt_V4 :
+  case Hexagon::STd_GP_cdnNotPt_V4 :
+  case Hexagon::STb_GP_cdnNotPt_V4 :
+  case Hexagon::STh_GP_cdnNotPt_V4 :
+  case Hexagon::STw_GP_cdnNotPt_V4 :
+    return false;
+
+  default:
+    assert (false && "Unknown predicate sense of the instruction");
+  }
+  // return *some value* to avoid compiler warning
+  return false;
+}
+
+bool HexagonPacketizerList::isDotNewInst(MachineInstr* MI) {
+  if (isNewValueInst(MI))
+    return true;
+
+  switch (MI->getOpcode()) {
+  case Hexagon::TFR_cdnNotPt:
+  case Hexagon::TFR_cdnPt:
+  case Hexagon::TFRI_cdnNotPt:
+  case Hexagon::TFRI_cdnPt:
+  case Hexagon::LDrid_cdnPt :
+  case Hexagon::LDrid_cdnNotPt :
+  case Hexagon::LDrid_indexed_cdnPt :
+  case Hexagon::LDrid_indexed_cdnNotPt :
+  case Hexagon::POST_LDrid_cdnPt_V4 :
+  case Hexagon::POST_LDrid_cdnNotPt_V4 :
+  case Hexagon::LDriw_cdnPt :
+  case Hexagon::LDriw_cdnNotPt :
+  case Hexagon::LDriw_indexed_cdnPt :
+  case Hexagon::LDriw_indexed_cdnNotPt :
+  case Hexagon::POST_LDriw_cdnPt_V4 :
+  case Hexagon::POST_LDriw_cdnNotPt_V4 :
+  case Hexagon::LDrih_cdnPt :
+  case Hexagon::LDrih_cdnNotPt :
+  case Hexagon::LDrih_indexed_cdnPt :
+  case Hexagon::LDrih_indexed_cdnNotPt :
+  case Hexagon::POST_LDrih_cdnPt_V4 :
+  case Hexagon::POST_LDrih_cdnNotPt_V4 :
+  case Hexagon::LDrib_cdnPt :
+  case Hexagon::LDrib_cdnNotPt :
+  case Hexagon::LDrib_indexed_cdnPt :
+  case Hexagon::LDrib_indexed_cdnNotPt :
+  case Hexagon::POST_LDrib_cdnPt_V4 :
+  case Hexagon::POST_LDrib_cdnNotPt_V4 :
+  case Hexagon::LDriuh_cdnPt :
+  case Hexagon::LDriuh_cdnNotPt :
+  case Hexagon::LDriuh_indexed_cdnPt :
+  case Hexagon::LDriuh_indexed_cdnNotPt :
+  case Hexagon::POST_LDriuh_cdnPt_V4 :
+  case Hexagon::POST_LDriuh_cdnNotPt_V4 :
+  case Hexagon::LDriub_cdnPt :
+  case Hexagon::LDriub_cdnNotPt :
+  case Hexagon::LDriub_indexed_cdnPt :
+  case Hexagon::LDriub_indexed_cdnNotPt :
+  case Hexagon::POST_LDriub_cdnPt_V4 :
+  case Hexagon::POST_LDriub_cdnNotPt_V4 :
+
+  case Hexagon::LDrid_indexed_cdnPt_V4 :
+  case Hexagon::LDrid_indexed_cdnNotPt_V4 :
+  case Hexagon::LDrid_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDrid_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDrib_indexed_cdnPt_V4 :
+  case Hexagon::LDrib_indexed_cdnNotPt_V4 :
+  case Hexagon::LDrib_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDrib_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDriub_indexed_cdnPt_V4 :
+  case Hexagon::LDriub_indexed_cdnNotPt_V4 :
+  case Hexagon::LDriub_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDriub_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDrih_indexed_cdnPt_V4 :
+  case Hexagon::LDrih_indexed_cdnNotPt_V4 :
+  case Hexagon::LDrih_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDrih_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDriuh_indexed_cdnPt_V4 :
+  case Hexagon::LDriuh_indexed_cdnNotPt_V4 :
+  case Hexagon::LDriuh_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDriuh_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::LDriw_indexed_cdnPt_V4 :
+  case Hexagon::LDriw_indexed_cdnNotPt_V4 :
+  case Hexagon::LDriw_indexed_shl_cdnPt_V4 :
+  case Hexagon::LDriw_indexed_shl_cdnNotPt_V4 :
+
+// Coditional add
+  case Hexagon::ADD_ri_cdnPt:
+  case Hexagon::ADD_ri_cdnNotPt:
+  case Hexagon::ADD_rr_cdnPt:
+  case Hexagon::ADD_rr_cdnNotPt:
+
+  // Conditional logical operations
+  case Hexagon::XOR_rr_cdnPt :
+  case Hexagon::XOR_rr_cdnNotPt :
+  case Hexagon::AND_rr_cdnPt :
+  case Hexagon::AND_rr_cdnNotPt :
+  case Hexagon::OR_rr_cdnPt :
+  case Hexagon::OR_rr_cdnNotPt :
+
+  // Conditonal subtract
+  case Hexagon::SUB_rr_cdnPt :
+  case Hexagon::SUB_rr_cdnNotPt :
+
+  // Conditional combine
+  case Hexagon::COMBINE_rr_cdnPt :
+  case Hexagon::COMBINE_rr_cdnNotPt :
+
+  // Conditional shift operations
+  case Hexagon::ASLH_cdnPt_V4:
+  case Hexagon::ASLH_cdnNotPt_V4:
+  case Hexagon::ASRH_cdnPt_V4:
+  case Hexagon::ASRH_cdnNotPt_V4:
+  case Hexagon::SXTB_cdnPt_V4:
+  case Hexagon::SXTB_cdnNotPt_V4:
+  case Hexagon::SXTH_cdnPt_V4:
+  case Hexagon::SXTH_cdnNotPt_V4:
+  case Hexagon::ZXTB_cdnPt_V4:
+  case Hexagon::ZXTB_cdnNotPt_V4:
+  case Hexagon::ZXTH_cdnPt_V4:
+  case Hexagon::ZXTH_cdnNotPt_V4:
+
+  // Conditional stores
+  case Hexagon::STrib_imm_cdnPt_V4 :
+  case Hexagon::STrib_imm_cdnNotPt_V4 :
+  case Hexagon::STrib_cdnPt_V4 :
+  case Hexagon::STrib_cdnNotPt_V4 :
+  case Hexagon::STrib_indexed_cdnPt_V4 :
+  case Hexagon::STrib_indexed_cdnNotPt_V4 :
+  case Hexagon::POST_STbri_cdnPt_V4 :
+  case Hexagon::POST_STbri_cdnNotPt_V4 :
+  case Hexagon::STrib_indexed_shl_cdnPt_V4 :
+  case Hexagon::STrib_indexed_shl_cdnNotPt_V4 :
+
+  // Store doubleword conditionally
+  case Hexagon::STrid_indexed_cdnPt_V4 :
+  case Hexagon::STrid_indexed_cdnNotPt_V4 :
+  case Hexagon::STrid_indexed_shl_cdnPt_V4 :
+  case Hexagon::STrid_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::POST_STdri_cdnPt_V4 :
+  case Hexagon::POST_STdri_cdnNotPt_V4 :
+
+  // Store halfword conditionally
+  case Hexagon::STrih_cdnPt_V4 :
+  case Hexagon::STrih_cdnNotPt_V4 :
+  case Hexagon::STrih_indexed_cdnPt_V4 :
+  case Hexagon::STrih_indexed_cdnNotPt_V4 :
+  case Hexagon::STrih_imm_cdnPt_V4 :
+  case Hexagon::STrih_imm_cdnNotPt_V4 :
+  case Hexagon::STrih_indexed_shl_cdnPt_V4 :
+  case Hexagon::STrih_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::POST_SThri_cdnPt_V4 :
+  case Hexagon::POST_SThri_cdnNotPt_V4 :
+
+  // Store word conditionally
+  case Hexagon::STriw_cdnPt_V4 :
+  case Hexagon::STriw_cdnNotPt_V4 :
+  case Hexagon::STriw_indexed_cdnPt_V4 :
+  case Hexagon::STriw_indexed_cdnNotPt_V4 :
+  case Hexagon::STriw_imm_cdnPt_V4 :
+  case Hexagon::STriw_imm_cdnNotPt_V4 :
+  case Hexagon::STriw_indexed_shl_cdnPt_V4 :
+  case Hexagon::STriw_indexed_shl_cdnNotPt_V4 :
+  case Hexagon::POST_STwri_cdnPt_V4 :
+  case Hexagon::POST_STwri_cdnNotPt_V4 :
+
+  case Hexagon::LDd_GP_cdnPt_V4:
+  case Hexagon::LDd_GP_cdnNotPt_V4:
+  case Hexagon::LDb_GP_cdnPt_V4:
+  case Hexagon::LDb_GP_cdnNotPt_V4:
+  case Hexagon::LDub_GP_cdnPt_V4:
+  case Hexagon::LDub_GP_cdnNotPt_V4:
+  case Hexagon::LDh_GP_cdnPt_V4:
+  case Hexagon::LDh_GP_cdnNotPt_V4:
+  case Hexagon::LDuh_GP_cdnPt_V4:
+  case Hexagon::LDuh_GP_cdnNotPt_V4:
+  case Hexagon::LDw_GP_cdnPt_V4:
+  case Hexagon::LDw_GP_cdnNotPt_V4:
+  case Hexagon::LDrid_GP_cdnPt_V4:
+  case Hexagon::LDrid_GP_cdnNotPt_V4:
+  case Hexagon::LDrib_GP_cdnPt_V4:
+  case Hexagon::LDrib_GP_cdnNotPt_V4:
+  case Hexagon::LDriub_GP_cdnPt_V4:
+  case Hexagon::LDriub_GP_cdnNotPt_V4:
+  case Hexagon::LDrih_GP_cdnPt_V4:
+  case Hexagon::LDrih_GP_cdnNotPt_V4:
+  case Hexagon::LDriuh_GP_cdnPt_V4:
+  case Hexagon::LDriuh_GP_cdnNotPt_V4:
+  case Hexagon::LDriw_GP_cdnPt_V4:
+  case Hexagon::LDriw_GP_cdnNotPt_V4:
+
+  case Hexagon::STrid_GP_cdnPt_V4:
+  case Hexagon::STrid_GP_cdnNotPt_V4:
+  case Hexagon::STrib_GP_cdnPt_V4:
+  case Hexagon::STrib_GP_cdnNotPt_V4:
+  case Hexagon::STrih_GP_cdnPt_V4:
+  case Hexagon::STrih_GP_cdnNotPt_V4:
+  case Hexagon::STriw_GP_cdnPt_V4:
+  case Hexagon::STriw_GP_cdnNotPt_V4:
+  case Hexagon::STd_GP_cdnPt_V4:
+  case Hexagon::STd_GP_cdnNotPt_V4:
+  case Hexagon::STb_GP_cdnPt_V4:
+  case Hexagon::STb_GP_cdnNotPt_V4:
+  case Hexagon::STh_GP_cdnPt_V4:
+  case Hexagon::STh_GP_cdnNotPt_V4:
+  case Hexagon::STw_GP_cdnPt_V4:
+  case Hexagon::STw_GP_cdnNotPt_V4:
+
+    return true;
+  }
+  return false;
+}
+
+static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
+                                               const HexagonInstrInfo *QII) {
+  assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
+#ifndef NDEBUG
+  // Post Increment means duplicates. Use dense map to find duplicates in the
+  // list. Caution: Densemap initializes with the minimum of 64 buckets,
+  // whereas there are at most 5 operands in the post increment.
+  DenseMap<unsigned,  unsigned> DefRegsSet;
+  for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
+    if (MI->getOperand(opNum).isReg() &&
+        MI->getOperand(opNum).isDef()) {
+      DefRegsSet[MI->getOperand(opNum).getReg()] = 1;
+    }
+
+  for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
+    if (MI->getOperand(opNum).isReg() &&
+        MI->getOperand(opNum).isUse()) {
+      if (DefRegsSet[MI->getOperand(opNum).getReg()]) {
+        return MI->getOperand(opNum);
+      }
+    }
+#else
+  if (MI->getDesc().mayLoad()) {
+    // The 2nd operand is always the post increment operand in load.
+    assert(MI->getOperand(1).isReg() &&
+                "Post increment operand has be to a register.");
+    return (MI->getOperand(1));
+  }
+  if (MI->getDesc().mayStore()) {
+    // The 1st operand is always the post increment operand in store.
+    assert(MI->getOperand(0).isReg() &&
+                "Post increment operand has be to a register.");
+    return (MI->getOperand(0));
+  }
+#endif
+  // we should never come here.
+  llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
+}
+
+// get the value being stored
+static MachineOperand& GetStoreValueOperand(MachineInstr *MI) {
+  // value being stored is always the last operand.
+  return (MI->getOperand(MI->getNumOperands()-1));
+}
+
+// can be new value store?
+// Following restrictions are to be respected in convert a store into
+// a new value store.
+// 1. If an instruction uses auto-increment, its address register cannot
+//    be a new-value register. Arch Spec 5.4.2.1
+// 2. If an instruction uses absolute-set addressing mode,
+//    its address register cannot be a new-value register.
+//    Arch Spec 5.4.2.1.TODO: This is not enabled as
+//    as absolute-set address mode patters are not implemented.
+// 3. If an instruction produces a 64-bit result, its registers cannot be used
+//    as new-value registers. Arch Spec 5.4.2.2.
+// 4. If the instruction that sets a new-value register is conditional, then
+//    the instruction that uses the new-value register must also be conditional,
+//    and both must always have their predicates evaluate identically.
+//    Arch Spec 5.4.2.3.
+// 5. There is an implied restriction of a packet can not have another store,
+//    if there is a  new value store in the packet. Corollary, if there is
+//    already a store in a packet, there can not be a new value store.
+//    Arch Spec: 3.4.4.2
+bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
+                MachineInstr *PacketMI, unsigned DepReg,
+                std::map <MachineInstr*, SUnit*> MIToSUnit)
+{
+  // Make sure we are looking at the store
+  if (!IsNewifyStore(MI))
+    return false;
+
+  // Make sure there is dependency and can be new value'ed
+  if (GetStoreValueOperand(MI).isReg() &&
+      GetStoreValueOperand(MI).getReg() != DepReg)
+    return false;
+
+  const HexagonRegisterInfo* QRI = (const HexagonRegisterInfo *) TM.getRegisterInfo();
+  const MCInstrDesc& MCID = PacketMI->getDesc();
+  // first operand is always the result
+
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI);
+
+  // if there is already an store in the packet, no can do new value store
+  // Arch Spec 3.4.4.2.
+  for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
+         VE = CurrentPacketMIs.end();
+       (VI != VE); ++VI) {
+    SUnit* PacketSU = MIToSUnit[*VI];
+    if (PacketSU->getInstr()->getDesc().mayStore() ||
+        // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME,
+        // then we don't need this
+        PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
+        PacketSU->getInstr()->getOpcode() == Hexagon::DEALLOCFRAME)
+      return false;
+  }
+
+  if (PacketRC == Hexagon::DoubleRegsRegisterClass) {
+    // new value store constraint: double regs can not feed into new value store
+    // arch spec section: 5.4.2.2
+    return false;
+  }
+
+  // Make sure it's NOT the post increment register that we are going to
+  // new value.
+  if (QII->isPostIncrement(MI) &&
+      MI->getDesc().mayStore() &&
+      GetPostIncrementOperand(MI, QII).getReg() == DepReg) {
+    return false;
+  }
+
+  if (QII->isPostIncrement(PacketMI) &&
+      PacketMI->getDesc().mayLoad() &&
+      GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) {
+    // if source is post_inc, or absolute-set addressing,
+    // it can not feed into new value store
+    //  r3 = memw(r2++#4)
+    //  memw(r30 + #-1404) = r2.new -> can not be new value store
+    // arch spec section: 5.4.2.1
+    return false;
+  }
+
+  // If the source that feeds the store is predicated, new value store must also be
+  // also predicated.
+  if (QII->isPredicated(PacketMI)) {
+    if (!QII->isPredicated(MI))
+      return false;
+
+    // Check to make sure that they both will have their predicates
+    // evaluate identically
+    unsigned predRegNumSrc;
+    unsigned predRegNumDst;
+    const TargetRegisterClass* predRegClass;
+
+    // Get predicate register used in the source instruction
+    for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
+      if ( PacketMI->getOperand(opNum).isReg())
+      predRegNumSrc = PacketMI->getOperand(opNum).getReg();
+      predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc);
+      if (predRegClass == Hexagon::PredRegsRegisterClass) {
+        break;
+      }
+    }
+    assert ((predRegClass == Hexagon::PredRegsRegisterClass ) &&
+        ("predicate register not found in a predicated PacketMI instruction"));
+
+    // Get predicate register used in new-value store instruction
+    for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+      if ( MI->getOperand(opNum).isReg())
+      predRegNumDst = MI->getOperand(opNum).getReg();
+      predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst);
+      if (predRegClass == Hexagon::PredRegsRegisterClass) {
+        break;
+      }
+    }
+    assert ((predRegClass == Hexagon::PredRegsRegisterClass ) &&
+            ("predicate register not found in a predicated MI instruction"));
+
+    // New-value register producer and user (store) need to satisfy these
+    // constraints:
+    // 1) Both instructions should be predicated on the same register.
+    // 2) If producer of the new-value register is .new predicated then store
+    // should also be .new predicated and if producer is not .new predicated
+    // then store should not be .new predicated.
+    // 3) Both new-value register producer and user should have same predicate
+    // sense, i.e, either both should be negated or both should be none negated.
+
+    if (( predRegNumDst != predRegNumSrc) ||
+          isDotNewInst(PacketMI) != isDotNewInst(MI)  ||
+          GetPredicateSense(MI, QII) != GetPredicateSense(PacketMI, QII)) {
+      return false;
+    }
+  }
+
+  // Make sure that other than the new-value register no other store instruction
+  // register has been modified in the same packet. Predicate registers can be
+  // modified by they should not be modified between the producer and the store
+  // instruction as it will make them both conditional on different values.
+  // We already know this to be true for all the instructions before and
+  // including PacketMI. Howerver, we need to perform the check for the
+  // remaining instructions in the packet.
+
+  std::vector<MachineInstr*>::iterator VI;
+  std::vector<MachineInstr*>::iterator VE;
+  unsigned StartCheck = 0;
+
+  for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end();
+      (VI != VE); ++VI) {
+    SUnit* TempSU = MIToSUnit[*VI];
+    MachineInstr* TempMI = TempSU->getInstr();
+
+    // Following condition is true for all the instructions until PacketMI is
+    // reached (StartCheck is set to 0 before the for loop).
+    // StartCheck flag is 1 for all the instructions after PacketMI.
+    if (TempMI != PacketMI && !StartCheck) // start processing only after
+      continue;                            // encountering PacketMI
+
+    StartCheck = 1;
+    if (TempMI == PacketMI) // We don't want to check PacketMI for dependence
+      continue;
+
+    for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+      if (MI->getOperand(opNum).isReg() &&
+          TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(), QRI))
+        return false;
+    }
+  }
+
+  // Make sure that for non POST_INC stores:
+  // 1. The only use of reg is DepReg and no other registers.
+  //    This handles V4 base+index registers.
+  //    The following store can not be dot new.
+  //    Eg.   r0 = add(r0, #3)a
+  //          memw(r1+r0<<#2) = r0
+  if (!QII->isPostIncrement(MI) &&
+      GetStoreValueOperand(MI).isReg() &&
+      GetStoreValueOperand(MI).getReg() == DepReg) {
+    for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
+      if (MI->getOperand(opNum).isReg() &&
+          MI->getOperand(opNum).getReg() == DepReg) {
+        return false;
+      }
+    }
+    // 2. If data definition is because of implicit definition of the register,
+    //    do not newify the store. Eg.
+    //    %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
+    //    STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
+    for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
+      if (PacketMI->getOperand(opNum).isReg() &&
+          PacketMI->getOperand(opNum).getReg() == DepReg &&
+          PacketMI->getOperand(opNum).isDef() &&
+          PacketMI->getOperand(opNum).isImplicit()) {
+        return false;
+      }
+    }
+  }
+
+  // Can be dot new store.
+  return true;
+}
+
+// can this MI to promoted to either
+// new value store or new value jump
+bool HexagonPacketizerList::CanPromoteToNewValue( MachineInstr *MI,
+                SUnit *PacketSU, unsigned DepReg,
+                std::map <MachineInstr*, SUnit*> MIToSUnit,
+                MachineBasicBlock::iterator &MII)
+{
+
+  const HexagonRegisterInfo* QRI = (const HexagonRegisterInfo *) TM.getRegisterInfo();
+  if (!QRI->Subtarget.hasV4TOps() ||
+      !IsNewifyStore(MI))
+    return false;
+
+  MachineInstr *PacketMI = PacketSU->getInstr();
+
+  // Check to see the store can be new value'ed.
+  if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit))
+    return true;
+
+  // Check to see the compare/jump can be new value'ed.
+  // This is done as a pass on its own. Don't need to check it here.
+  return false;
+}
+
+// Check to see if an instruction can be dot new
+// There are three kinds.
+// 1. dot new on predicate - V2/V3/V4
+// 2. dot new on stores NV/ST - V4
+// 3. dot new on jump NV/J - V4 -- This is generated in a pass.
+bool HexagonPacketizerList::CanPromoteToDotNew( MachineInstr *MI,
+                              SUnit *PacketSU, unsigned DepReg,
+                              std::map <MachineInstr*, SUnit*> MIToSUnit,
+                              MachineBasicBlock::iterator &MII,
+                              const TargetRegisterClass* RC )
+{
+  // already a dot new instruction
+  if (isDotNewInst(MI) && !IsNewifyStore(MI))
+    return false;
+
+  if (!isNewifiable(MI))
+    return false;
+
+  // predicate .new
+  if (RC == Hexagon::PredRegsRegisterClass && isCondInst(MI))
+      return true;
+  else if (RC != Hexagon::PredRegsRegisterClass &&
+      !IsNewifyStore(MI)) // MI is not a new-value store
+    return false;
+  else {
+    // Create a dot new machine instruction to see if resources can be
+    // allocated. If not, bail out now.
+    const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+    int NewOpcode = GetDotNewOp(MI->getOpcode());
+    const MCInstrDesc &desc = QII->get(NewOpcode);
+    DebugLoc dl;
+    MachineInstr *NewMI = MI->getParent()->getParent()->CreateMachineInstr(desc, dl);
+    bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
+    MI->getParent()->getParent()->DeleteMachineInstr(NewMI);
+
+    if (!ResourcesAvailable)
+      return false;
+
+    // new value store only
+    // new new value jump generated as a passes
+    if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// Go through the packet instructions and search for anti dependency
+// between them and DepReg from MI
+// Consider this case:
+// Trying to add
+// a) %R1<def> = TFRI_cdNotPt %P3, 2
+// to this packet:
+// {
+//   b) %P0<def> = OR_pp %P3<kill>, %P0<kill>
+//   c) %P3<def> = TFR_PdRs %R23
+//   d) %R1<def> = TFRI_cdnPt %P3, 4
+//  }
+// The P3 from a) and d) will be complements after
+// a)'s P3 is converted to .new form
+// Anti Dep between c) and b) is irrelevant for this case
+bool HexagonPacketizerList::RestrictingDepExistInPacket (MachineInstr* MI,
+      unsigned DepReg,
+      std::map <MachineInstr*, SUnit*> MIToSUnit) {
+
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  SUnit* PacketSUDep = MIToSUnit[MI];
+
+  for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
+       VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
+
+    // We only care for dependencies to predicated instructions
+    if(!QII->isPredicated(*VIN)) continue;
+
+    // Scheduling Unit for current insn in the packet
+    SUnit* PacketSU = MIToSUnit[*VIN];
+
+    // Look at dependencies between current members of the packet
+    // and predicate defining instruction MI.
+    // Make sure that dependency is on the exact register
+    // we care about.
+    if (PacketSU->isSucc(PacketSUDep)) {
+      for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+        if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) &&
+            (PacketSU->Succs[i].getKind() == SDep::Anti) &&
+            (PacketSU->Succs[i].getReg() == DepReg)) {
+          return true;
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+
+// Given two predicated instructions, this function detects whether
+// the predicates are complements
+bool HexagonPacketizerList::ArePredicatesComplements (MachineInstr* MI1,
+     MachineInstr* MI2, std::map <MachineInstr*, SUnit*> MIToSUnit) {
+
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+  // Currently can only reason about conditional transfers
+  if (!QII->isConditionalTransfer(MI1) || !QII->isConditionalTransfer(MI2)) {
+    return false;
+  }
+
+  // Scheduling unit for candidate
+  SUnit* SU = MIToSUnit[MI1];
+
+  // One corner case deals with the following scenario:
+  // Trying to add
+  // a) %R24<def> = TFR_cPt %P0, %R25
+  // to this packet:
+  //
+  // {
+  //   b) %R25<def> = TFR_cNotPt %P0, %R24
+  //   c) %P0<def> = CMPEQri %R26, 1
+  // }
+  //
+  // On general check a) and b) are complements, but
+  // presence of c) will convert a) to .new form, and
+  // then it is not a complement
+  // We attempt to detect it by analyzing  existing
+  // dependencies in the packet
+
+  // Analyze relationships between all existing members of the packet.
+  // Look for Anti dependecy on the same predicate reg
+  // as used in the candidate
+  for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
+       VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
+
+    // Scheduling Unit for current insn in the packet
+    SUnit* PacketSU = MIToSUnit[*VIN];
+
+    // If this instruction in the packet is succeeded by the candidate...
+    if (PacketSU->isSucc(SU)) {
+      for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
+        // The corner case exist when there is true data
+        // dependency between candidate and one of current
+        // packet members, this dep is on predicate reg, and
+        // there already exist anti dep on the same pred in
+        // the packet.
+        if (PacketSU->Succs[i].getSUnit() == SU &&
+            Hexagon::PredRegsRegisterClass->contains(
+              PacketSU->Succs[i].getReg()) &&
+            PacketSU->Succs[i].getKind() == SDep::Data &&
+            // Here I know that *VIN is predicate setting instruction
+            // with true data dep to candidate on the register
+            // we care about - c) in the above example.
+            // Now I need to see if there is an anti dependency
+            // from c) to any other instruction in the
+            // same packet on the pred reg of interest
+            RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(),
+                                        MIToSUnit)) {
+           return false;
+        }
+      }
+    }
+  }
+
+  // If the above case does not apply, check regular
+  // complement condition.
+  // Check that the predicate register is the same and
+  // that the predicate sense is different
+  // We also need to differentiate .old vs. .new:
+  // !p0 is not complimentary to p0.new
+  return ((MI1->getOperand(1).getReg() == MI2->getOperand(1).getReg()) &&
+          (GetPredicateSense(MI1, QII) != GetPredicateSense(MI2, QII)) &&
+          (isDotNewInst(MI1) == isDotNewInst(MI2)));
+}
+
+// initPacketizerState - Initialize packetizer flags
+void HexagonPacketizerList::initPacketizerState() {
+
+  Dependence = false;
+  PromotedToDotNew = false;
+  GlueToNewValueJump = false;
+  GlueAllocframeStore = false;
+  FoundSequentialDependence = false;
+
+  return;
+}
+
+// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
+                                                    MachineBasicBlock *MBB) {
+  if (MI->isDebugValue())
+    return true;
+
+  // We must print out inline assembly
+  if (MI->isInlineAsm())
+    return false;
+
+  // We check if MI has any functional units mapped to it.
+  // If it doesn't, we ignore the instruction.
+  const MCInstrDesc& TID = MI->getDesc();
+  unsigned SchedClass = TID.getSchedClass();
+  const InstrStage* IS = ResourceTracker->getInstrItins()->beginStage(SchedClass);
+  unsigned FuncUnits = IS->getUnits();
+  return !FuncUnits;
+}
+
+// isSoloInstruction: - Returns true for instructions that must be
+// scheduled in their own packet.
+bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) {
+
+  if (MI->isInlineAsm())
+    return true;
+
+  if (MI->isEHLabel())
+    return true;
+
+  // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
+  // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
+  // They must not be grouped with other instructions in a packet.
+  if (IsSchedBarrier(MI))
+    return true;
+
+  return false;
+}
+
+// isLegalToPacketizeTogether:
+// SUI is the current instruction that is out side of the current packet.
+// SUJ is the current instruction inside the current packet against which that
+// SUI will be packetized.
+bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+  MachineInstr *I = SUI->getInstr();
+  MachineInstr *J = SUJ->getInstr();
+  assert(I && J && "Unable to packetize null instruction!");
+
+  const MCInstrDesc &MCIDI = I->getDesc();
+  const MCInstrDesc &MCIDJ = J->getDesc();
+
+  MachineBasicBlock::iterator II = I;
+
+  const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+  const HexagonRegisterInfo* QRI = (const HexagonRegisterInfo *) TM.getRegisterInfo();
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+  // Inline asm cannot go in the packet.
+  if (I->getOpcode() == Hexagon::INLINEASM)
+    llvm_unreachable("Should not meet inline asm here!");
+
+  if (isSoloInstruction(I))
+    llvm_unreachable("Should not meet solo instr here!");
+
+  // A save callee-save register function call can only be in a packet
+  // with instructions that don't write to the callee-save registers.
+  if ((QII->isSaveCalleeSavedRegsCall(I) &&
+       DoesModifyCalleeSavedReg(J, QRI)) ||
+      (QII->isSaveCalleeSavedRegsCall(J) &&
+       DoesModifyCalleeSavedReg(I, QRI))) {
+    Dependence = true;
+    return false;
+  }
+
+  // Two control flow instructions cannot go in the same packet.
+  if (IsControlFlow(I) && IsControlFlow(J)) {
+    Dependence = true;
+    return false;
+  }
+
+  // A LoopN instruction cannot appear in the same packet as a jump or call.
+  if (IsLoopN(I) && (   IsDirectJump(J)
+                     || MCIDJ.isCall()
+                     || QII->isDeallocRet(J))) {
+    Dependence = true;
+    return false;
+  }
+  if (IsLoopN(J) && (   IsDirectJump(I)
+                     || MCIDI.isCall()
+                     || QII->isDeallocRet(I))) {
+    Dependence = true;
+    return false;
+  }
+
+  // dealloc_return cannot appear in the same packet as a conditional or
+  // unconditional jump.
+  if (QII->isDeallocRet(I) && (   MCIDJ.isBranch()
+                               || MCIDJ.isCall()
+                               || MCIDJ.isBarrier())) {
+    Dependence = true;
+    return false;
+  }
+
+
+  // V4 allows dual store. But does not allow second store, if the
+  // first store is not in SLOT0. New value store, new value jump,
+  // dealloc_return and memop always take SLOT0.
+  // Arch spec 3.4.4.2
+  if (QRI->Subtarget.hasV4TOps()) {
+
+    if (MCIDI.mayStore() && MCIDJ.mayStore() && isNewValueInst(J)) {
+      Dependence = true;
+      return false;
+    }
+
+    if (   (QII->isMemOp(J) && MCIDI.mayStore())
+        || (MCIDJ.mayStore() && QII->isMemOp(I))
+        || (QII->isMemOp(J) && QII->isMemOp(I))) {
+      Dependence = true;
+      return false;
+    }
+
+    //if dealloc_return
+    if (MCIDJ.mayStore() && QII->isDeallocRet(I)){
+      Dependence = true;
+      return false;
+    }
+
+    // If an instruction feeds new value jump, glue it.
+    MachineBasicBlock::iterator NextMII = I;
+    ++NextMII;
+    MachineInstr *NextMI = NextMII;
+
+    if (QII->isNewValueJump(NextMI)) {
+
+      bool secondRegMatch = false;
+      bool maintainNewValueJump = false;
+
+      if (NextMI->getOperand(1).isReg() &&
+          I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
+        secondRegMatch = true;
+        maintainNewValueJump = true;
+      }
+
+      if (!secondRegMatch &&
+           I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
+        maintainNewValueJump = true;
+      }
+
+      for (std::vector<MachineInstr*>::iterator
+            VI = CurrentPacketMIs.begin(),
+             VE = CurrentPacketMIs.end();
+           (VI != VE && maintainNewValueJump); ++VI) {
+        SUnit* PacketSU = MIToSUnit[*VI];
+
+        // NVJ can not be part of the dual jump - Arch Spec: section 7.8
+        if (PacketSU->getInstr()->getDesc().isCall()) {
+          Dependence = true;
+          break;
+        }
+        // Validate
+        // 1. Packet does not have a store in it.
+        // 2. If the first operand of the nvj is newified, and the second
+        //    operand is also a reg, it (second reg) is not defined in
+        //    the same packet.
+        // 3. If the second operand of the nvj is newified, (which means
+        //    first operand is also a reg), first reg is not defined in
+        //    the same packet.
+        if (PacketSU->getInstr()->getDesc().mayStore()               ||
+            PacketSU->getInstr()->getOpcode() == Hexagon::ALLOCFRAME ||
+            // Check #2.
+            (!secondRegMatch && NextMI->getOperand(1).isReg() &&
+             PacketSU->getInstr()->modifiesRegister(
+                               NextMI->getOperand(1).getReg(), QRI)) ||
+            // Check #3.
+            (secondRegMatch &&
+             PacketSU->getInstr()->modifiesRegister(
+                               NextMI->getOperand(0).getReg(), QRI))) {
+          Dependence = true;
+          break;
+        }
+      }
+      if (!Dependence)
+        GlueToNewValueJump = true;
+      else
+        return false;
+    }
+  }
+
+  if (SUJ->isSucc(SUI)) {
+    for (unsigned i = 0;
+         (i < SUJ->Succs.size()) && !FoundSequentialDependence;
+         ++i) {
+
+      if (SUJ->Succs[i].getSUnit() != SUI) {
+        continue;
+      }
+
+      SDep::Kind DepType = SUJ->Succs[i].getKind();
+
+      // For direct calls:
+      // Ignore register dependences for call instructions for
+      // packetization purposes except for those due to r31 and
+      // predicate registers.
+      //
+      // For indirect calls:
+      // Same as direct calls + check for true dependences to the register
+      // used in the indirect call.
+      //
+      // We completely ignore Order dependences for call instructions
+      //
+      // For returns:
+      // Ignore register dependences for return instructions like jumpr,
+      // dealloc return unless we have dependencies on the explicit uses
+      // of the registers used by jumpr (like r31) or dealloc return
+      // (like r29 or r30).
+      //
+      // TODO: Currently, jumpr is handling only return of r31. So, the
+      // following logic (specificaly IsCallDependent) is working fine.
+      // We need to enable jumpr for register other than r31 and then,
+      // we need to rework the last part, where it handles indirect call
+      // of that (IsCallDependent) function. Bug 6216 is opened for this.
+      //
+      unsigned DepReg;
+      const TargetRegisterClass* RC;
+      if (DepType == SDep::Data) {
+        DepReg = SUJ->Succs[i].getReg();
+        RC = QRI->getMinimalPhysRegClass(DepReg);
+      }
+      if ((MCIDI.isCall() || MCIDI.isReturn()) &&
+          (!IsRegDependence(DepType) ||
+            !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) {
+        /* do nothing */
+      }
+
+      // For instructions that can be promoted to dot-new, try to promote.
+      else if ((DepType == SDep::Data) &&
+               CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) &&
+               PromoteToDotNew(I, DepType, II, RC)) {
+        PromotedToDotNew = true;
+        /* do nothing */
+      }
+
+      else if ((DepType == SDep::Data) &&
+               (QII->isNewValueJump(I))) {
+        /* do nothing */
+      }
+
+      // For predicated instructions, if the predicates are complements
+      // then there can be no dependence.
+      else if (QII->isPredicated(I) &&
+               QII->isPredicated(J) &&
+          ArePredicatesComplements(I, J, MIToSUnit)) {
+        /* do nothing */
+
+      }
+      else if (IsDirectJump(I) &&
+               !MCIDJ.isBranch() &&
+               !MCIDJ.isCall() &&
+               (DepType == SDep::Order)) {
+        // Ignore Order dependences between unconditional direct branches
+        // and non-control-flow instructions
+        /* do nothing */
+      }
+      else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) &&
+               (DepType != SDep::Output)) {
+        // Ignore all dependences for jumps except for true and output
+        // dependences
+        /* do nothing */
+      }
+
+      // Ignore output dependences due to superregs. We can
+      // write to two different subregisters of R1:0 for instance
+      // in the same cycle
+      //
+
+      //
+      // Let the
+      // If neither I nor J defines DepReg, then this is a
+      // superfluous output dependence. The dependence must be of the
+      // form:
+      //  R0 = ...
+      //  R1 = ...
+      // and there is an output dependence between the two instructions
+      // with
+      // DepReg = D0
+      // We want to ignore these dependences.
+      // Ideally, the dependence constructor should annotate such
+      // dependences. We can then avoid this relatively expensive check.
+      //
+      else if (DepType == SDep::Output) {
+        // DepReg is the register that's responsible for the dependence.
+        unsigned DepReg = SUJ->Succs[i].getReg();
+
+        // Check if I and J really defines DepReg.
+        if (I->definesRegister(DepReg) ||
+            J->definesRegister(DepReg)) {
+          FoundSequentialDependence = true;
+          break;
+        }
+      }
+
+      // We ignore Order dependences for
+      // 1. Two loads unless they are volatile.
+      // 2. Two stores in V4 unless they are volatile.
+      else if ((DepType == SDep::Order) &&
+               !I->hasVolatileMemoryRef() &&
+               !J->hasVolatileMemoryRef()) {
+        if (QRI->Subtarget.hasV4TOps() &&
+            // hexagonv4 allows dual store.
+            MCIDI.mayStore() && MCIDJ.mayStore()) {
+          /* do nothing */
+        }
+        // store followed by store-- not OK on V2
+        // store followed by load -- not OK on all (OK if addresses
+        // are not aliased)
+        // load followed by store -- OK on all
+        // load followed by load  -- OK on all
+        else if ( !MCIDJ.mayStore()) {
+          /* do nothing */
+        }
+        else {
+          FoundSequentialDependence = true;
+          break;
+        }
+      }
+
+      // For V4, special case ALLOCFRAME. Even though there is dependency
+      // between ALLOCAFRAME and subsequent store, allow it to be
+      // packetized in a same packet. This implies that the store is using
+      // caller's SP. Hense, offset needs to be updated accordingly.
+      else if (DepType == SDep::Data
+               && QRI->Subtarget.hasV4TOps()
+               && J->getOpcode() == Hexagon::ALLOCFRAME
+               && (I->getOpcode() == Hexagon::STrid
+                   || I->getOpcode() == Hexagon::STriw
+                   || I->getOpcode() == Hexagon::STrib)
+               && I->getOperand(0).getReg() == QRI->getStackRegister()
+               && QII->isValidOffset(I->getOpcode(),
+                                     I->getOperand(1).getImm() -
+                                     (FrameSize + HEXAGON_LRFP_SIZE)))
+      {
+        GlueAllocframeStore = true;
+        // Since this store is to be glued with allocframe in the same
+        // packet, it will use SP of the previous stack frame, i.e
+        // caller's SP. Therefore, we need to recalculate offset according
+        // to this change.
+        I->getOperand(1).setImm(I->getOperand(1).getImm() -
+                                        (FrameSize + HEXAGON_LRFP_SIZE));
+      }
+
+      //
+      // Skip over anti-dependences. Two instructions that are
+      // anti-dependent can share a packet
+      //
+      else if (DepType != SDep::Anti) {
+        FoundSequentialDependence = true;
+        break;
+      }
+    }
+
+    if (FoundSequentialDependence) {
+      Dependence = true;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// isLegalToPruneDependencies
+bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
+  MachineInstr *I = SUI->getInstr();
+  assert(I && SUJ->getInstr() && "Unable to packetize null instruction!");
+
+  const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+
+  if (Dependence) {
+
+    // Check if the instruction was promoted to a dot-new. If so, demote it
+    // back into a dot-old.
+    if (PromotedToDotNew) {
+      DemoteToDotOld(I);
+    }
+
+    // Check if the instruction (must be a store) was glued with an Allocframe
+    // instruction. If so, restore its offset to its original value, i.e. use
+    // curent SP instead of caller's SP.
+    if (GlueAllocframeStore) {
+      I->getOperand(1).setImm(I->getOperand(1).getImm() +
+                                             FrameSize + HEXAGON_LRFP_SIZE);
+    }
+
+    return false;
+  }
+  return true;
+}
+
+MachineBasicBlock::iterator HexagonPacketizerList::addToPacket(MachineInstr *MI) {
+
+    MachineBasicBlock::iterator MII = MI;
+    MachineBasicBlock *MBB = MI->getParent();
+
+    const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+
+    if (GlueToNewValueJump) {
+
+      ++MII;
+      MachineInstr *nvjMI = MII;
+      assert(ResourceTracker->canReserveResources(MI));
+      ResourceTracker->reserveResources(MI);
+      if (QII->isExtended(MI) &&
+          !tryAllocateResourcesForConstExt(MI)) {
+        endPacket(MBB, MI);
+        ResourceTracker->reserveResources(MI);
+        assert(canReserveResourcesForConstExt(MI) &&
+               "Ensure that there is a slot");
+        reserveResourcesForConstExt(MI);
+        // Reserve resources for new value jump constant extender.
+        assert(canReserveResourcesForConstExt(MI) &&
+               "Ensure that there is a slot");
+        reserveResourcesForConstExt(nvjMI);
+        assert(ResourceTracker->canReserveResources(nvjMI) &&
+               "Ensure that there is a slot");
+
+      } else if (   // Extended instruction takes two slots in the packet.
+        // Try reserve and allocate 4-byte in the current packet first.
+        (QII->isExtended(nvjMI)
+            && (!tryAllocateResourcesForConstExt(nvjMI)
+                || !ResourceTracker->canReserveResources(nvjMI)))
+        || // For non-extended instruction, no need to allocate extra 4 bytes.
+        (!QII->isExtended(nvjMI) && !ResourceTracker->canReserveResources(nvjMI)))
+      {
+        endPacket(MBB, MI);
+        // A new and empty packet starts.
+        // We are sure that the resources requirements can be satisfied.
+        // Therefore, do not need to call "canReserveResources" anymore.
+        ResourceTracker->reserveResources(MI);
+        if (QII->isExtended(nvjMI))
+          reserveResourcesForConstExt(nvjMI);
+      }
+      // Here, we are sure that "reserveResources" would succeed.
+      ResourceTracker->reserveResources(nvjMI);
+      CurrentPacketMIs.push_back(MI);
+      CurrentPacketMIs.push_back(nvjMI);
+    } else {
+      if (   QII->isExtended(MI)
+          && (   !tryAllocateResourcesForConstExt(MI)
+              || !ResourceTracker->canReserveResources(MI)))
+      {
+        endPacket(MBB, MI);
+        // Check if the instruction was promoted to a dot-new. If so, demote it
+        // back into a dot-old
+        if (PromotedToDotNew) {
+          DemoteToDotOld(MI);
+        }
+        reserveResourcesForConstExt(MI);
+      }
+      // In case that "MI" is not an extended insn,
+      // the resource availability has already been checked.
+      ResourceTracker->reserveResources(MI);
+      CurrentPacketMIs.push_back(MI);
+    }
+    return MII;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonPacketizer() {
+  return new HexagonPacketizer();
+}
+
diff --git a/lib/Target/Hexagon/HexagonVarargsCallingConvention.h b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
new file mode 100644
index 000000000000..9305c2702fa4
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonVarargsCallingConvention.h
@@ -0,0 +1,141 @@
+//===-- HexagonVarargsCallingConvention.h - Calling Conventions -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the functions that assign locations to outgoing function
+// arguments. Adapted from the target independent version but this handles
+// calls to varargs functions
+//
+//===----------------------------------------------------------------------===//
+//
+
+
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+                                    EVT LocVT, CCValAssign::LocInfo LocInfo,
+                                    ISD::ArgFlagsTy ArgFlags,
+                                    Hexagon_CCState &State,
+                                    int NonVarArgsParams,
+                                    int CurrentParam,
+                                    bool ForceMem);
+
+
+static bool CC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+                                 EVT LocVT, CCValAssign::LocInfo LocInfo,
+                                 ISD::ArgFlagsTy ArgFlags,
+                                 Hexagon_CCState &State,
+                                 int NonVarArgsParams,
+                                 int CurrentParam,
+                                 bool ForceMem) {
+  unsigned ByValSize = 0;
+  if (ArgFlags.isByVal() &&
+      ((ByValSize = ArgFlags.getByValSize()) >
+       (MVT(MVT::i64).getSizeInBits() / 8))) {
+    ForceMem = true;
+  }
+
+
+  // Only assign registers for named (non varargs) arguments
+  if ( !ForceMem && ((NonVarArgsParams == -1) || (CurrentParam <=
+                                                  NonVarArgsParams))) {
+
+    if (LocVT == MVT::i32 ||
+        LocVT == MVT::i16 ||
+        LocVT == MVT::i8 ||
+        LocVT == MVT::f32) {
+      static const unsigned RegList1[] = {
+        Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+        Hexagon::R5
+      };
+      if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+                                         LocVT.getSimpleVT(), LocInfo));
+        return false;
+      }
+    }
+
+    if (LocVT == MVT::i64 ||
+        LocVT == MVT::f64) {
+      static const unsigned RegList2[] = {
+        Hexagon::D0, Hexagon::D1, Hexagon::D2
+      };
+      if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+                                         LocVT.getSimpleVT(), LocInfo));
+        return false;
+      }
+    }
+  }
+
+  const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+  unsigned Alignment =
+    State.getTarget().getTargetData()->getABITypeAlignment(ArgTy);
+  unsigned Size =
+    State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8;
+
+  // If it's passed by value, then we need the size of the aggregate not of
+  // the pointer.
+  if (ArgFlags.isByVal()) {
+    Size = ByValSize;
+
+    // Hexagon_TODO: Get the alignment of the contained type here.
+    Alignment = 8;
+  }
+
+  unsigned Offset3 = State.AllocateStack(Size, Alignment);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+                                   LocVT.getSimpleVT(), LocInfo));
+  return false;
+}
+
+
+static bool RetCC_Hexagon32_VarArgs(unsigned ValNo, EVT ValVT,
+                                    EVT LocVT, CCValAssign::LocInfo LocInfo,
+                                    ISD::ArgFlagsTy ArgFlags,
+                                    Hexagon_CCState &State,
+                                    int NonVarArgsParams,
+                                    int CurrentParam,
+                                    bool ForceMem) {
+
+  if (LocVT == MVT::i32 ||
+      LocVT == MVT::f32) {
+    static const unsigned RegList1[] = {
+      Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
+      Hexagon::R5
+    };
+    if (unsigned Reg = State.AllocateReg(RegList1, 6)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+                                       LocVT.getSimpleVT(), LocInfo));
+      return false;
+    }
+  }
+
+  if (LocVT == MVT::i64 ||
+      LocVT == MVT::f64) {
+    static const unsigned RegList2[] = {
+      Hexagon::D0, Hexagon::D1, Hexagon::D2
+    };
+    if (unsigned Reg = State.AllocateReg(RegList2, 3)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT.getSimpleVT(), Reg,
+                                       LocVT.getSimpleVT(), LocInfo));
+      return false;
+    }
+  }
+
+  const Type* ArgTy = LocVT.getTypeForEVT(State.getContext());
+  unsigned Alignment =
+    State.getTarget().getTargetData()->getABITypeAlignment(ArgTy);
+  unsigned Size =
+    State.getTarget().getTargetData()->getTypeSizeInBits(ArgTy) / 8;
+
+  unsigned Offset3 = State.AllocateStack(Size, Alignment);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT.getSimpleVT(), Offset3,
+                                   LocVT.getSimpleVT(), LocInfo));
+  return false;
+}
diff --git a/lib/Target/Hexagon/InstPrinter/CMakeLists.txt b/lib/Target/Hexagon/InstPrinter/CMakeLists.txt
new file mode 100644
index 000000000000..cb106a884432
--- /dev/null
+++ b/lib/Target/Hexagon/InstPrinter/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMHexagonAsmPrinter
+  HexagonInstPrinter.cpp
+  )
+
+add_dependencies(LLVMHexagonAsmPrinter HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
new file mode 100644
index 000000000000..75d6bfb0813a
--- /dev/null
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -0,0 +1,198 @@
+//===- HexagonInstPrinter.cpp - Convert Hexagon MCInst to assembly syntax -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Hexagon MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Hexagon.h"
+#include "HexagonAsmPrinter.h"
+#include "HexagonInstPrinter.h"
+#include "HexagonMCInst.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "HexagonGenAsmWriter.inc"
+
+StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return MII.getName(Opcode);
+}
+
+StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
+  return getRegisterName(RegNo);
+}
+
+void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                                   StringRef Annot) {
+  printInst((const HexagonMCInst*)(MI), O, Annot);
+}
+
+void HexagonInstPrinter::printInst(const HexagonMCInst *MI, raw_ostream &O,
+                                   StringRef Annot) {
+  const char packetPadding[] = "      ";
+  const char startPacket = '{',
+             endPacket = '}';
+  // TODO: add outer HW loop when it's supported too.
+  if (MI->getOpcode() == Hexagon::ENDLOOP0) {
+    // Ending a harware loop is different from ending an regular packet.
+    assert(MI->isEndPacket() && "Loop end must also end the packet");
+
+    if (MI->isStartPacket()) {
+      // There must be a packet to end a loop.
+      // FIXME: when shuffling is always run, this shouldn't be needed.
+      HexagonMCInst Nop;
+      StringRef NoAnnot;
+
+      Nop.setOpcode (Hexagon::NOP);
+      Nop.setStartPacket (MI->isStartPacket());
+      printInst (&Nop, O, NoAnnot);
+    }
+
+    // Close the packet.
+    if (MI->isEndPacket())
+      O << packetPadding << endPacket;
+
+    printInstruction(MI, O);
+  }
+  else {
+    // Prefix the insn opening the packet.
+    if (MI->isStartPacket())
+      O << packetPadding << startPacket << '\n';
+
+    printInstruction(MI, O);
+
+    // Suffix the insn closing the packet.
+    if (MI->isEndPacket())
+      // Suffix the packet in a new line always, since the GNU assembler has
+      // issues with a closing brace on the same line as CONST{32,64}.
+      O << '\n' << packetPadding << endPacket;
+  }
+
+  printAnnotation(O, Annot);
+}
+
+void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O) const {
+  const MCOperand& MO = MI->getOperand(OpNo);
+
+  if (MO.isReg()) {
+    O << getRegisterName(MO.getReg());
+  } else if(MO.isExpr()) {
+    O << *MO.getExpr();
+  } else if(MO.isImm()) {
+    printImmOperand(MI, OpNo, O);
+  } else {
+    assert(false && "Unknown operand");
+  }
+}
+
+void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) const {
+  O << MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) const {
+  O << MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI, unsigned OpNo,
+                                                 raw_ostream &O) const {
+  O << MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) const {
+  O << -MI->getOperand(OpNo).getImm();
+}
+
+void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+  O << -1;
+}
+
+void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) const {
+  const MCOperand& MO0 = MI->getOperand(OpNo);
+  const MCOperand& MO1 = MI->getOperand(OpNo + 1);
+
+  O << getRegisterName(MO0.getReg());
+  O << " + #" << MO1.getImm();
+}
+
+void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
+                                                raw_ostream &O) const {
+  const MCOperand& MO0 = MI->getOperand(OpNo);
+  const MCOperand& MO1 = MI->getOperand(OpNo + 1);
+
+  O << getRegisterName(MO0.getReg()) << ", #" << MO1.getImm();
+}
+
+void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) const {
+  assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+  printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) const {
+  assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+  printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) const {
+  assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
+
+  printOperand(MI, OpNo, O);
+}
+
+void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+                                            raw_ostream &O) const {
+  // Branches can take an immediate operand.  This is used by the branch
+  // selection pass to print $+8, an eight byte displacement from the PC.
+  assert("Unknown branch operand.");
+}
+
+void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) const {
+}
+
+void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+}
+
+void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+                                               raw_ostream &O) const {
+}
+
+void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O, bool hi) const {
+  const MCOperand& MO = MI->getOperand(OpNo);
+
+  O << '#' << (hi? "HI": "LO") << '(';
+  if (MO.isImm()) {
+    O << '#';
+    printOperand(MI, OpNo, O);
+  } else {
+    assert("Unknown symbol operand");
+    printOperand(MI, OpNo, O);
+  }
+  O << ')';
+}
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
new file mode 100644
index 000000000000..3ce7dfcbdbe2
--- /dev/null
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
@@ -0,0 +1,75 @@
+//===-- HexagonInstPrinter.h - Convert Hexagon MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Hexagon MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONINSTPRINTER_H
+#define HEXAGONINSTPRINTER_H
+
+#include "HexagonMCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class HexagonInstPrinter : public MCInstPrinter {
+  public:
+    explicit HexagonInstPrinter(const MCAsmInfo &MAI,
+                                const MCInstrInfo &MII,
+                                const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+    virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+    void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot);
+    virtual StringRef getOpcodeName(unsigned Opcode) const;
+    void printInstruction(const MCInst *MI, raw_ostream &O);
+    StringRef getRegName(unsigned RegNo) const;
+    static const char *getRegisterName(unsigned RegNo);
+
+    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+    void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+    void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+    void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+           const;
+    void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+           const;
+    void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+           const;
+    void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+           const;
+    void printFrameIndexOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+           const;
+    void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+           const;
+    void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+           const;
+    void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+           const;
+    void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+           const;
+    void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
+           const;
+    void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+
+    void printConstantPool(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
+
+    void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
+      { printSymbol(MI, OpNo, O, true); }
+    void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
+      { printSymbol(MI, OpNo, O, false); }
+
+    bool isConstExtended(const MCInst *MI) const;
+  protected:
+    void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi)
+           const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..8678401feee4
--- /dev/null
+++ b/lib/Target/Hexagon/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Hexagon/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonAsmPrinter
+parent = Hexagon
+required_libraries = MC Support
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/InstPrinter/Makefile b/lib/Target/Hexagon/InstPrinter/Makefile
new file mode 100644
index 000000000000..20331d8807ec
--- /dev/null
+++ b/lib/Target/Hexagon/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Hexagon/InstPrinter/Makefile ----------------------------===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonAsmPrinter
+
+# Hack: we need to include 'main' Hexagon target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
new file mode 100644
index 000000000000..c6d419a91058
--- /dev/null
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/Hexagon/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = Hexagon
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = HexagonCodeGen
+parent = Hexagon
+required_libraries = AsmPrinter CodeGen Core HexagonAsmPrinter HexagonDesc HexagonInfo MC SelectionDAG Support Target
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 000000000000..c9c5a6eadf88
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMHexagonDesc
+  HexagonMCAsmInfo.cpp
+  HexagonMCTargetDesc.cpp
+  )
+
+add_dependencies(LLVMHexagonDesc HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
new file mode 100644
index 000000000000..7221e906342e
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -0,0 +1,70 @@
+//===-- HexagonBaseInfo.h - Top level definitions for Hexagon --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the Hexagon target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONBASEINFO_H
+#define HEXAGONBASEINFO_H
+
+namespace llvm {
+
+/// HexagonII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace HexagonII {
+  // *** The code below must match HexagonInstrFormat*.td *** //
+
+  // Insn types.
+  // *** Must match HexagonInstrFormat*.td ***
+  enum Type {
+    TypePSEUDO = 0,
+    TypeALU32  = 1,
+    TypeCR     = 2,
+    TypeJR     = 3,
+    TypeJ      = 4,
+    TypeLD     = 5,
+    TypeST     = 6,
+    TypeSYSTEM = 7,
+    TypeXTYPE  = 8,
+    TypeMEMOP  = 9,
+    TypeNV     = 10,
+    TypePREFIX = 30, // Such as extenders.
+    TypeMARKER = 31  // Such as end of a HW loop.
+  };
+
+
+
+  // MCInstrDesc TSFlags
+  // *** Must match HexagonInstrFormat*.td ***
+  enum {
+    // This 5-bit field describes the insn type.
+    TypePos  = 0,
+    TypeMask = 0x1f,
+
+    // Solo instructions.
+    SoloPos  = 5,
+    SoloMask = 0x1,
+
+    // Predicated instructions.
+    PredicatedPos  = 6,
+    PredicatedMask = 0x1
+  };
+
+  // *** The code above must match HexagonInstrFormat*.td *** //
+
+} // End namespace HexagonII.
+
+} // End namespace llvm.
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
new file mode 100644
index 000000000000..d6e6c36af5de
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -0,0 +1,36 @@
+//===-- HexagonMCAsmInfo.cpp - Hexagon asm properties ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the HexagonMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCAsmInfo.h"
+
+using namespace llvm;
+
+HexagonMCAsmInfo::HexagonMCAsmInfo(const Target &T, StringRef TT) {
+  Data16bitsDirective = "\t.half\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = 0;  // .xword is only supported by V9.
+  ZeroDirective = "\t.skip\t";
+  CommentString = "//";
+  HasLEB128 = true;
+
+  PrivateGlobalPrefix = ".L";
+  LCOMMDirectiveType = LCOMM::ByteAlignment;
+  InlineAsmStart = "# InlineAsm Start";
+  InlineAsmEnd = "# InlineAsm End";
+  ZeroDirective = "\t.space\t";
+  AscizDirective = "\t.string\t";
+  WeakRefDirective = "\t.weak\t";
+
+  UsesELFSectionDirectiveForBSS  = true;
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
new file mode 100644
index 000000000000..d336cd5be917
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- HexagonTargetAsmInfo.h - Hexagon asm properties --------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the HexagonMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HexagonMCASMINFO_H
+#define HexagonMCASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  class HexagonMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit HexagonMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
new file mode 100644
index 000000000000..3cfa4fddd87c
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -0,0 +1,95 @@
+//===-- HexagonMCTargetDesc.cpp - Hexagon Target Descriptions -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCTargetDesc.h"
+#include "HexagonMCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "HexagonGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "HexagonGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createHexagonMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitHexagonMCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createHexagonMCRegisterInfo(StringRef TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitHexagonMCRegisterInfo(X, Hexagon::R0);
+  return X;
+}
+
+static MCSubtargetInfo *createHexagonMCSubtargetInfo(StringRef TT,
+                                                     StringRef CPU,
+                                                     StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitHexagonMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+static MCAsmInfo *createHexagonMCAsmInfo(const Target &T, StringRef TT) {
+  MCAsmInfo *MAI = new HexagonMCAsmInfo(T, TT);
+
+  // VirtualFP = (R30 + #0).
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(Hexagon::R30, 0);
+  MAI->addInitialFrameState(0, Dst, Src);
+
+  return MAI;
+}
+
+static MCCodeGenInfo *createHexagonMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+                                             CodeModel::Model CM,
+                                             CodeGenOpt::Level OL) {
+  MCCodeGenInfo *X = new MCCodeGenInfo();
+  // For the time being, use static relocations, since there's really no
+  // support for PIC yet.
+  X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
+  return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeHexagonTargetMC() {
+  // Register the MC asm info.
+  RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo);
+
+  // Register the MC codegen info.
+  TargetRegistry::RegisterMCCodeGenInfo(TheHexagonTarget,
+                                        createHexagonMCCodeGenInfo);
+
+  // Register the MC instruction info.
+  TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget, createHexagonMCInstrInfo);
+
+  // Register the MC register info.
+  TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
+                                    createHexagonMCRegisterInfo);
+
+  // Register the MC subtarget info.
+  TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
+                                          createHexagonMCSubtargetInfo);
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
new file mode 100644
index 000000000000..2238b1ae5f35
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -0,0 +1,39 @@
+//===-- HexagonMCTargetDesc.h - Hexagon Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Hexagon specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCTARGETDESC_H
+#define HEXAGONMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+
+extern Target TheHexagonTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Hexagon registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "HexagonGenRegisterInfo.inc"
+
+// Defines symbolic names for the Hexagon instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "HexagonGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "HexagonGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..73c7e016f939
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Hexagon/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonDesc
+parent = Hexagon
+required_libraries = HexagonInfo MC
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/MCTargetDesc/Makefile b/lib/Target/Hexagon/MCTargetDesc/Makefile
new file mode 100644
index 000000000000..885be2ddbd88
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Hexagon/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile
new file mode 100644
index 000000000000..dc387c549a1d
--- /dev/null
+++ b/lib/Target/Hexagon/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/Hexagon/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMHexagonCodeGen
+TARGET = Hexagon
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = HexagonGenRegisterInfo.inc \
+                HexagonGenInstrInfo.inc  \
+                HexagonGenAsmWriter.inc \
+                HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
+                HexagonGenCallingConv.inc \
+                HexagonGenDFAPacketizer.inc
+
+DIRS = InstPrinter TargetInfo MCTargetDesc
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/TargetInfo/CMakeLists.txt b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..5b04a30d26c2
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMHexagonInfo
+  HexagonTargetInfo.cpp
+  )
+
+add_dependencies(LLVMHexagonInfo HexagonCommonTableGen)
diff --git a/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
new file mode 100644
index 000000000000..7aa5dd3b8980
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- HexagonTargetInfo.cpp - Hexagon Target Implementation ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Hexagon.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheHexagonTarget;
+
+extern "C" void LLVMInitializeHexagonTargetInfo() {
+  RegisterTarget<Triple::hexagon, /*HasJIT=*/false>  X(TheHexagonTarget, "hexagon", "Hexagon");
+}
diff --git a/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..7b87be3e05a8
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Hexagon/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonInfo
+parent = Hexagon
+required_libraries = MC Support
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/TargetInfo/Makefile b/lib/Target/Hexagon/TargetInfo/Makefile
new file mode 100644
index 000000000000..494cca112249
--- /dev/null
+++ b/lib/Target/Hexagon/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Hexagon/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
new file mode 100644
index 000000000000..8ec5673470fc
--- /dev/null
+++ b/lib/Target/LLVMBuild.txt
@@ -0,0 +1,56 @@
+;===- ./lib/Target/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 Mips PTX PowerPC Sparc X86 XCore
+
+; This is a special group whose required libraries are extended (by llvm-build)
+; with the best execution engine (the native JIT, if available, or the
+; interpreter).
+[component_0]
+type = LibraryGroup
+name = Engine
+parent = Libraries
+
+; This is a special group whose required libraries are extended (by llvm-build)
+; with the configured native target, if any.
+[component_1]
+type = LibraryGroup
+name = Native
+parent = Libraries
+
+; This is a special group whose required libraries are extended (by llvm-build)
+; with the configured native code generator, if any.
+[component_2]
+type = LibraryGroup
+name = NativeCodeGen
+parent = Libraries
+
+; The component for the actual target library itself.
+[component_3]
+type = Library
+name = Target
+parent = Libraries
+required_libraries = Core MC Support
+
+; This is a special group whose required libraries are extended (by llvm-build)
+; with every built target, which makes it easy for tools to include every
+; target.
+[component_4]
+type = LibraryGroup
+name = all-targets
+parent = Libraries
diff --git a/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
index ec8f52a92cb1..813767ba6d65 100644
--- a/lib/Target/MBlaze/AsmParser/CMakeLists.txt
+++ b/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -6,11 +6,4 @@ add_llvm_library(LLVMMBlazeAsmParser
   MBlazeAsmParser.cpp
   )
 
-add_llvm_library_dependencies(LLVMMBlazeAsmParser
-  LLVMMBlazeInfo
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMBlazeAsmParser MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..b10189a9dd97
--- /dev/null
+++ b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MBlaze/AsmParser/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MBlazeAsmParser
+parent = MBlaze
+required_libraries = MBlazeInfo MC MCParser Support
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
index 2d357bb9674d..59a1ed97d3d4 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -9,10 +9,6 @@
 
 #include "MCTargetDesc/MBlazeBaseInfo.h"
 
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -100,11 +96,7 @@ AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
     return AsmToken(lexedToken);
   case AsmToken::Identifier:
   {
-    std::string upperCase = lexedToken.getString().str();
-    std::string lowerCase = LowercaseString(upperCase);
-    StringRef lowerRef(lowerCase);
-    
-    unsigned regID = MatchRegisterName(lowerRef);
+    unsigned regID = MatchRegisterName(lexedToken.getString().lower());
     
     if (regID) {
       return AsmToken(AsmToken::Register,
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index 97d311c15107..38fb0e87fdb4 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -18,9 +18,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
@@ -347,7 +345,6 @@ MatchAndEmitInstruction(SMLoc IDLoc,
   }
 
   llvm_unreachable("Implement any new match types added!");
-  return true;
 }
 
 MBlazeOperand *MBlazeAsmParser::
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index 47b0db2cb2d0..bf1deef491c9 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -1,15 +1,15 @@
 set(LLVM_TARGET_DEFINITIONS MBlaze.td)
 
-llvm_tablegen(MBlazeGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(MBlazeGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(MBlazeGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher)
-llvm_tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(MBlazeGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
-llvm_tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM MBlazeGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM MBlazeGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM MBlazeGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM MBlazeGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM MBlazeGenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM MBlazeGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM MBlazeGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM MBlazeGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
+tablegen(LLVM MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
 add_public_tablegen_target(MBlazeCommonTableGen)
 
 add_llvm_target(MBlazeCodeGen
@@ -18,6 +18,7 @@ add_llvm_target(MBlazeCodeGen
   MBlazeISelDAGToDAG.cpp
   MBlazeISelLowering.cpp
   MBlazeFrameLowering.cpp
+  MBlazeMachineFunction.cpp
   MBlazeRegisterInfo.cpp
   MBlazeSubtarget.cpp
   MBlazeTargetMachine.cpp
@@ -29,19 +30,6 @@ add_llvm_target(MBlazeCodeGen
   MBlazeELFWriterInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMBlazeCodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMBlazeAsmPrinter
-  LLVMMBlazeDesc
-  LLVMMBlazeInfo
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
index 112c64c02638..be2dce156d56 100644
--- a/lib/Target/MBlaze/Disassembler/CMakeLists.txt
+++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -13,12 +13,4 @@ set_property(
   )
 endif()
 
-add_llvm_library_dependencies(LLVMMBlazeDisassembler
-  LLVMMBlazeCodeGen
-  LLVMMBlazeDesc
-  LLVMMBlazeInfo
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMBlazeDisassembler MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..28dd9dc98da6
--- /dev/null
+++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MBlaze/Disassembler/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MBlazeDisassembler
+parent = MBlaze
+required_libraries = MBlazeDesc MBlazeInfo MC Support
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index fd761f1ca8c1..6b958c85eebf 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeDisassembler.cpp - Disassembler for MicroBlaze  ----*- C++ -*-===//
+//===-- MBlazeDisassembler.cpp - Disassembler for MicroBlaze  -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,13 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "MBlaze.h"
-#include "MBlazeInstrInfo.h"
 #include "MBlazeDisassembler.h"
 
 #include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -30,14 +29,14 @@
 #include "MBlazeGenEDInfo.inc"
 
 namespace llvm {
-extern MCInstrDesc MBlazeInsts[];
+extern const MCInstrDesc MBlazeInsts[];
 }
 
 using namespace llvm;
 
-const unsigned UNSUPPORTED = -1;
+const uint16_t UNSUPPORTED = -1;
 
-static unsigned mblazeBinary2Opcode[] = {
+static const uint16_t mblazeBinary2Opcode[] = {
   MBlaze::ADD,   MBlaze::RSUB,   MBlaze::ADDC,   MBlaze::RSUBC,   //00,01,02,03
   MBlaze::ADDK,  MBlaze::RSUBK,  MBlaze::ADDKC,  MBlaze::RSUBKC,  //04,05,06,07
   MBlaze::ADDI,  MBlaze::RSUBI,  MBlaze::ADDIC,  MBlaze::RSUBIC,  //08,09,0A,0B
@@ -124,6 +123,7 @@ static unsigned decodeSEXT(uint32_t insn) {
     case 0x41: return MBlaze::SRL;
     case 0x21: return MBlaze::SRC;
     case 0x01: return MBlaze::SRA;
+    case 0xE0: return MBlaze::CLZ;
     }
 }
 
@@ -177,6 +177,13 @@ static unsigned decodeBR(uint32_t insn) {
 }
 
 static unsigned decodeBRI(uint32_t insn) {
+    switch (insn&0x3FFFFFF) {
+    default:        break;
+    case 0x0020004: return MBlaze::IDMEMBAR;
+    case 0x0220004: return MBlaze::DMEMBAR;
+    case 0x0420004: return MBlaze::IMEMBAR;
+    }
+
     switch ((insn>>16)&0x1F) {
     default:   return UNSUPPORTED;
     case 0x00: return MBlaze::BRI;
@@ -485,7 +492,7 @@ static unsigned getOPCODE(uint32_t insn) {
   }
 }
 
-EDInstInfo *MBlazeDisassembler::getEDInfo() const {
+const EDInstInfo *MBlazeDisassembler::getEDInfo() const {
   return instInfoMBlaze;
 }
 
@@ -532,6 +539,9 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
   default: 
     return Fail;
 
+  case MBlazeII::FC:
+    break;
+
   case MBlazeII::FRRRR:
     if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED)
       return Fail;
@@ -548,6 +558,13 @@ MCDisassembler::DecodeStatus MBlazeDisassembler::getInstruction(MCInst &instr,
     instr.addOperand(MCOperand::CreateReg(RB));
     break;
 
+  case MBlazeII::FRR:
+    if (RD == UNSUPPORTED || RA == UNSUPPORTED)
+      return Fail;
+    instr.addOperand(MCOperand::CreateReg(RD));
+    instr.addOperand(MCOperand::CreateReg(RA));
+    break;
+
   case MBlazeII::FRI:
     switch (opcode) {
     default: 
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
index 0ac0d89efbe7..5c4ae3b1ace8 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -1,4 +1,4 @@
-//===- MBlazeDisassembler.h - Disassembler for MicroBlaze  ------*- C++ -*-===//
+//===-- MBlazeDisassembler.h - Disassembler for MicroBlaze  -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,8 +17,6 @@
 
 #include "llvm/MC/MCDisassembler.h"
 
-struct InternalInstruction;
-
 namespace llvm {
   
 class MCInst;
@@ -48,7 +46,7 @@ public:
                       raw_ostream &cStream) const;
 
   /// getEDInfo - See MCDisassembler.
-  EDInstInfo *getEDInfo() const;
+  const EDInstInfo *getEDInfo() const;
 };
 
 } // namespace llvm
diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
index aff0b3d992d4..586e2d3eefc3 100644
--- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -5,9 +5,4 @@ add_llvm_library(LLVMMBlazeAsmPrinter
   MBlazeInstPrinter.cpp
   )
 
-add_llvm_library_dependencies(LLVMMBlazeAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..3a21a0560aef
--- /dev/null
+++ b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MBlaze/InstPrinter/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MBlazeAsmPrinter
+parent = MBlaze
+required_libraries = MC Support
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index 570ab08a07aa..51ba7c359a1b 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//= MBlazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax -*- C++ -*-//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,15 +21,15 @@ namespace llvm {
 
   class MBlazeInstPrinter : public MCInstPrinter {
   public:
-    MBlazeInstPrinter(const MCAsmInfo &MAI)
-      : MCInstPrinter(MAI) {}
+    MBlazeInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                      const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
 
     virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
 
     // Autogenerated by tblgen.
     void printInstruction(const MCInst *MI, raw_ostream &O);
     static const char *getRegisterName(unsigned RegNo);
-    static const char *getInstructionName(unsigned Opcode);
 
     void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
                       const char *Modifier = 0);
diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt
new file mode 100644
index 000000000000..0b290076a4e9
--- /dev/null
+++ b/lib/Target/MBlaze/LLVMBuild.txt
@@ -0,0 +1,34 @@
+;===- ./lib/Target/MBlaze/LLVMBuild.txt ------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = MBlaze
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+
+[component_1]
+type = Library
+name = MBlazeCodeGen
+parent = MBlaze
+required_libraries = AsmPrinter CodeGen Core MBlazeAsmPrinter MBlazeDesc MBlazeInfo MC SelectionDAG Support Target
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
index 1245658d29ba..b4edff0709e6 100644
--- a/lib/Target/MBlaze/MBlaze.td
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -1,4 +1,4 @@
-//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===//
+//===-- MBlaze.td - Describe the MBlaze Target Machine -----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 97bd083fdd15..55fffe3ebfa7 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -38,8 +38,6 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
@@ -119,7 +117,7 @@ namespace {
 static void printHex32(unsigned int Value, raw_ostream &O) {
   O << "0x";
   for (int i = 7; i >= 0; i--)
-    O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+    O.write_hex((Value & (0xF << (i*4))) >> (i*4));
 }
 
 // Create a bitmask with all callee saved registers for CPU or Floating Point
@@ -311,9 +309,9 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
 
   // Check if the last terminator is an unconditional branch.
   MachineBasicBlock::const_iterator I = Pred->end();
-  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+  while (I != Pred->begin() && !(--I)->isTerminator())
     ; // Noop
-  return I == Pred->end() || !I->getDesc().isBarrier();
+  return I == Pred->end() || !I->isBarrier();
 }
 
 // Force static initialization.
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index c07570a487b9..19e787d8622d 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -29,13 +29,11 @@ using namespace llvm;
 
 STATISTIC(FilledSlots, "Number of delay slots filled");
 
-namespace llvm {
-cl::opt<bool> DisableDelaySlotFiller(
+static cl::opt<bool> MBDisableDelaySlotFiller(
   "disable-mblaze-delay-filler",
   cl::init(false),
   cl::desc("Disable the MBlaze delay slot filter."),
   cl::Hidden);
-}
 
 namespace {
   struct Filler : public MachineFunctionPass {
@@ -109,7 +107,6 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
   // Hazard check
   MachineBasicBlock::iterator a = candidate;
   MachineBasicBlock::iterator b = slot;
-  MCInstrDesc desc = candidate->getDesc();
 
   // MBB layout:-
   //    candidate := a0 = operation(a1, a2)
@@ -123,7 +120,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
   // 4. b0 is one or more of {a1, a2}
   // 5. a accesses memory, and the middle bit
   //    contains a store operation.
-  bool a_is_memory = desc.mayLoad() || desc.mayStore();
+  bool a_is_memory = candidate->mayLoad() || candidate->mayStore();
 
   // Determine the number of operands in the slot instruction and in the
   // candidate instruction.
@@ -156,7 +153,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
     }
 
     // Check hazard type 5
-    if (a_is_memory && m->getDesc().mayStore())
+    if (a_is_memory && m->mayStore())
       return true;
   }
 
@@ -183,8 +180,8 @@ static bool isDelayFiller(MachineBasicBlock &MBB,
   if (candidate == MBB.begin())
     return false;
 
-  MCInstrDesc brdesc = (--candidate)->getDesc();
-  return (brdesc.hasDelaySlot());
+  --candidate;
+  return (candidate->hasDelaySlot());
 }
 
 static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
@@ -211,9 +208,8 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
       break;
 
     --I;
-    MCInstrDesc desc = I->getDesc();
-    if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
-        desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+    if (I->hasDelaySlot() || I->isBranch() || isDelayFiller(MBB,I) ||
+        I->isCall() || I->isReturn() || I->isBarrier() ||
         hasUnknownSideEffects(I))
       break;
 
@@ -232,11 +228,11 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
 bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
-    if (I->getDesc().hasDelaySlot()) {
+    if (I->hasDelaySlot()) {
       MachineBasicBlock::iterator D = MBB.end();
       MachineBasicBlock::iterator J = I;
 
-      if (!DisableDelaySlotFiller)
+      if (!MBDisableDelaySlotFiller)
         D = findDelayInstr(MBB,I);
 
       ++FilledSlots;
diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
index 3f26ed15b284..e3c7236d1141 100644
--- a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -41,7 +41,6 @@ unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
   default:
     llvm_unreachable("unknown mblaze machine relocation type");
   }
-  return 0;
 }
 
 long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
@@ -54,7 +53,6 @@ long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
   default:
     llvm_unreachable("unknown mblaze relocation type");
   }
-  return 0;
 }
 
 unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
@@ -102,10 +100,8 @@ unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
 long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset,
                                                 unsigned RelOffset,
                                                 unsigned RelTy) const {
-  if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL)
-    return SymOffset - (RelOffset + 4);
-  else
-    assert("computeRelocation unknown for this relocation type");
-
-  return 0;
+  assert((RelTy == ELF::R_MICROBLAZE_32_PCREL ||
+          RelTy == ELF::R_MICROBLAZE_64_PCREL) &&
+         "computeRelocation unknown for this relocation type");
+  return SymOffset - (RelOffset + 4);
 }
diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/lib/Target/MBlaze/MBlazeELFWriterInfo.h
index 63bfc0da745a..a314eb76ea46 100644
--- a/lib/Target/MBlaze/MBlazeELFWriterInfo.h
+++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.h
@@ -17,6 +17,7 @@
 #include "llvm/Target/TargetELFWriterInfo.h"
 
 namespace llvm {
+  class TargetMachine;
 
   class MBlazeELFWriterInfo : public TargetELFWriterInfo {
   public:
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index f28d5a77d49c..d2f14a5c53b7 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
+//===-- MBlazeFrameLowering.cpp - MBlaze Frame Information ---------------====//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -32,13 +32,11 @@
 
 using namespace llvm;
 
-namespace llvm {
-  cl::opt<bool> DisableStackAdjust(
-    "disable-mblaze-stack-adjust",
-    cl::init(false),
-    cl::desc("Disable MBlaze stack layout adjustment."),
-    cl::Hidden);
-}
+static cl::opt<bool> MBDisableStackAdjust(
+  "disable-mblaze-stack-adjust",
+  cl::init(false),
+  cl::desc("Disable MBlaze stack layout adjustment."),
+  cl::Hidden);
 
 static void replaceFrameIndexes(MachineFunction &MF, 
                                 SmallVector<std::pair<int,int64_t>, 16> &FR) {
@@ -85,7 +83,7 @@ static void replaceFrameIndexes(MachineFunction &MF,
 //===----------------------------------------------------------------------===//
 
 static void analyzeFrameIndexes(MachineFunction &MF) {
-  if (DisableStackAdjust) return;
+  if (MBDisableStackAdjust) return;
 
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
@@ -213,13 +211,13 @@ static void analyzeFrameIndexes(MachineFunction &MF) {
 
 static void interruptFrameLayout(MachineFunction &MF) {
   const Function *F = MF.getFunction();
-  llvm::CallingConv::ID CallConv = F->getCallingConv();
+  CallingConv::ID CallConv = F->getCallingConv();
 
   // If this function is not using either the interrupt_handler
   // calling convention or the save_volatiles calling convention
   // then we don't need to do any additional frame layout.
-  if (CallConv != llvm::CallingConv::MBLAZE_INTR &&
-      CallConv != llvm::CallingConv::MBLAZE_SVOL)
+  if (CallConv != CallingConv::MBLAZE_INTR &&
+      CallConv != CallingConv::MBLAZE_SVOL)
       return;
 
   MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -230,7 +228,7 @@ static void interruptFrameLayout(MachineFunction &MF) {
   // Determine if the calling convention is the interrupt_handler
   // calling convention. Some pieces of the prologue and epilogue
   // only need to be emitted if we are lowering and interrupt handler.
-  bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR;
+  bool isIntr = CallConv == CallingConv::MBLAZE_INTR;
 
   // Determine where to put prologue and epilogue additions
   MachineBasicBlock &MENT   = MF.front();
@@ -336,7 +334,8 @@ int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI)
 // if frame pointer elimination is disabled.
 bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+         MFI->hasVarSizedObjects();
 }
 
 void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -348,8 +347,8 @@ void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
   DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
-  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
-  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+  CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == CallingConv::MBLAZE_INTR;
 
   // Determine the correct frame layout
   determineFrameLayout(MF);
@@ -394,8 +393,8 @@ void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
 
   DebugLoc dl = MBBI->getDebugLoc();
 
-  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
-  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+  CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == CallingConv::MBLAZE_INTR;
 
   // Get the FI's where RA and FP are saved.
   int FPOffset = MBlazeFI->getFPStackOffset();
@@ -432,8 +431,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                      RegScavenger *RS) const {
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
-  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
-  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+  CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == CallingConv::MBLAZE_INTR;
 
   if (MFI->adjustsStack() || requiresRA) {
     MBlazeFI->setRAStackOffset(0);
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.h b/lib/Target/MBlaze/MBlazeFrameLowering.h
index 8be15bfb857d..01e6578a352f 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.h
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -15,11 +15,10 @@
 #define MBLAZE_FRAMEINFO_H
 
 #include "MBlaze.h"
-#include "MBlazeSubtarget.h"
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
-  class MBlazeSubtarget;
+class MBlazeSubtarget;
 
 class MBlazeFrameLowering : public TargetFrameLowering {
 protected:
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 8ec548f1437d..edfc3355691f 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -167,7 +167,9 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
   setOperationAction(ISD::SRA_PARTS,          MVT::i32,   Expand);
   setOperationAction(ISD::SRL_PARTS,          MVT::i32,   Expand);
   setOperationAction(ISD::CTLZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF,    MVT::i32,   Expand);
   setOperationAction(ISD::CTTZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF,    MVT::i32,   Expand);
   setOperationAction(ISD::CTPOP,              MVT::i32,   Expand);
   setOperationAction(ISD::BSWAP,              MVT::i32,   Expand);
 
@@ -214,7 +216,7 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                   MachineBasicBlock *MBB)
                                                   const {
   switch (MI->getOpcode()) {
-  default: assert(false && "Unexpected instr type to insert");
+  default: llvm_unreachable("Unexpected instr type to insert");
 
   case MBlaze::ShiftRL:
   case MBlaze::ShiftRA:
@@ -600,7 +602,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
 SDValue MBlazeTargetLowering::
 LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("TLS not implemented for MicroBlaze.");
-  return SDValue(); // Not reached
 }
 
 SDValue MBlazeTargetLowering::
@@ -656,7 +657,7 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                 CCValAssign::LocInfo &LocInfo,
                                 ISD::ArgFlagsTy &ArgFlags,
                                 CCState &State) {
-  static const unsigned ArgRegs[] = {
+  static const uint16_t ArgRegs[] = {
     MBlaze::R5, MBlaze::R6, MBlaze::R7,
     MBlaze::R8, MBlaze::R9, MBlaze::R10
   };
@@ -681,7 +682,7 @@ static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
 /// TODO: isVarArg, isTailCall.
 SDValue MBlazeTargetLowering::
 LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
-          bool isVarArg, bool &isTailCall,
+          bool isVarArg, bool doesNotRet, bool &isTailCall,
           const SmallVectorImpl<ISD::OutputArg> &Outs,
           const SmallVectorImpl<SDValue> &OutVals,
           const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -895,7 +896,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     if (VA.isRegLoc()) {
       MVT RegVT = VA.getLocVT();
       ArgRegEnd = VA.getLocReg();
-      TargetRegisterClass *RC = 0;
+      const TargetRegisterClass *RC;
 
       if (RegVT == MVT::i32)
         RC = MBlaze::GPRRegisterClass;
@@ -951,7 +952,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
                                    MachinePointerInfo::getFixedStack(FI),
-                                   false, false, 0));
+                                   false, false, false, 0));
     }
   }
 
@@ -963,7 +964,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
       StackPtr = DAG.getRegister(StackReg, getPointerTy());
 
     // The last register argument that must be saved is MBlaze::R10
-    TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
+    const TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
 
     unsigned Begin = getMBlazeRegisterNumbering(MBlaze::R5);
     unsigned Start = getMBlazeRegisterNumbering(ArgRegEnd+1);
@@ -1045,10 +1046,10 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
 
   // If this function is using the interrupt_handler calling convention
   // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
-  unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
-                                                              : MBlazeISD::Ret;
-  unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14
-                                                              : MBlaze::R15;
+  unsigned Ret = (CallConv == CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet
+                                                        : MBlazeISD::Ret;
+  unsigned Reg = (CallConv == CallingConv::MBLAZE_INTR) ? MBlaze::R14
+                                                        : MBlaze::R15;
   SDValue DReg = DAG.getRegister(Reg, MVT::i32);
 
   if (Flag.getNode())
@@ -1079,7 +1080,6 @@ getConstraintType(const std::string &Constraint) const
       case 'y':
       case 'f':
         return C_RegisterClass;
-        break;
     }
   }
   return TargetLowering::getConstraintType(Constraint);
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 8b49bc3de0cc..6a79fc126702 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -15,11 +15,11 @@
 #ifndef MBlazeISELLOWERING_H
 #define MBlazeISELLOWERING_H
 
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
-#include "MBlaze.h"
-#include "MBlazeSubtarget.h"
 
 namespace llvm {
   namespace MBlazeCC {
@@ -134,7 +134,7 @@ namespace llvm {
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg,
-                bool &isTailCall,
+                bool doesNotRet, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<SDValue> &OutVals,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
index 4acdcfdd772c..3f145938728c 100644
--- a/lib/Target/MBlaze/MBlazeInstrFPU.td
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===//
+//===-- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
index 3082a7e227f8..91b69de05102 100644
--- a/lib/Target/MBlaze/MBlazeInstrFSL.td
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===//
+//===-- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index 54f605f989a3..e40432a1b9a9 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===//
+//===-- MBlazeInstrFormats.td - MB Instruction defs --------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -35,6 +35,7 @@ def FRIR    : Format<17>; // RSUBI
 def FRRRR   : Format<18>; // RSUB, FRSUB
 def FRI     : Format<19>; // RSUB, FRSUB
 def FC      : Format<20>; // NOP
+def FRR     : Format<21>; // CLZ
 
 //===----------------------------------------------------------------------===//
 //  Describe MBlaze instructions format
@@ -202,3 +203,26 @@ class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
   let Inst{11-16} = flags;
   let Inst{17-31} = imm15;
 }
+
+//===----------------------------------------------------------------------===//
+// TCLZ instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class TCLZ<bits<6> op, bits<16> flags, dag outs, dag ins, string asmstr,
+           list<dag> pattern, InstrItinClass itin> :
+           MBlazeInst<op, FRR, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<5>  ra;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15}  = ra;
+  let Inst{16-31}  = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// MBAR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MBAR<bits<6> op, bits<26> flags, dag outs, dag ins, string asmstr,
+           list<dag> pattern, InstrItinClass itin> :
+           MBlazeInst<op, FC, outs, ins, asmstr, pattern, itin> {
+  let Inst{6-31}  = flags;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 7ae05b367cba..db71434443bf 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===//
+//===-- MBlazeInstrInfo.cpp - MBlaze Instruction Information --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index 7174405a49d9..5252147b48e6 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -1,4 +1,4 @@
-//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===//
+//===-- MBlazeInstrInfo.h - MBlaze Instruction Information ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,9 +15,9 @@
 #define MBLAZEINSTRUCTIONINFO_H
 
 #include "MBlaze.h"
+#include "MBlazeRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "MBlazeRegisterInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "MBlazeGenInstrInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 1d8c987a0856..02a21574f493 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===//
+//===-- MBlazeInstrInfo.td - MBlaze Instruction defs -------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -594,9 +594,18 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
 //===----------------------------------------------------------------------===//
 
 let neverHasSideEffects = 1 in {
-  def NOP :  MBlazeInst< 0x20, FC, (outs), (ins), "nop    ", [], IIC_ALU>;
+  def NOP :  MBlazeInst<0x20, FC, (outs), (ins), "nop    ", [], IIC_ALU>;
 }
 
+let Predicates=[HasPatCmp] in {
+  def CLZ :  TCLZ<0x24, 0x00E0, (outs GPR:$dst), (ins GPR:$src),
+                  "clz    $dst, $src", [], IIC_ALU>;
+}
+
+def IMEMBAR  : MBAR<0x2E, 0x0420004, (outs), (ins), "mbar   2", [], IIC_ALU>;
+def DMEMBAR  : MBAR<0x2E, 0x0220004, (outs), (ins), "mbar   1", [], IIC_ALU>;
+def IDMEMBAR : MBAR<0x2E, 0x0020004, (outs), (ins), "mbar   0", [], IIC_ALU>;
+
 let usesCustomInserter = 1 in {
   def Select_CC : MBlazePseudo<(outs GPR:$dst),
     (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
@@ -751,6 +760,56 @@ def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
 def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
 
 // SET_CC operations
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETEQ),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETNE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETLE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$L, 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETUGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 0), SETULE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETEQ),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 1)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETNE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 2)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETLE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0), GPR:$R, 6)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETUGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(setcc (i32 0), (i32 GPR:$R), SETULE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, (i32 R0)), 6)>;
+
 def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
           (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
                      (CMP GPR:$R, GPR:$L), 1)>;
@@ -787,6 +846,68 @@ def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
           (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
 
 // SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETNE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGT),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLT),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$L, 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 0),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU (i32 R0), GPR:$L), 6)>;
+
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 1)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETNE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 2)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGT),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLT),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLE),
+          (Select_CC GPR:$T, GPR:$F, GPR:$R, 6)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 3)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 4)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 5)>;
+def : Pat<(selectcc (i32 0), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, (i32 R0)), 6)>;
+
 def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
                     (i32 GPR:$T), (i32 GPR:$F), SETEQ),
           (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
@@ -827,6 +948,48 @@ def : Pat<(br bb:$T), (BRID bb:$T)>;
 def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
 
 // BRCOND instructions
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETEQ), bb:$T),
+          (BEQID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETNE), bb:$T),
+          (BNEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGT), bb:$T),
+          (BGTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLT), bb:$T),
+          (BLTID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETGE), bb:$T),
+          (BGEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETLE), bb:$T),
+          (BLEID GPR:$L, bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGT), bb:$T),
+          (BGTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULT), bb:$T),
+          (BLTID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETUGE), bb:$T),
+          (BGEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 0), SETULE), bb:$T),
+          (BLEID (CMPU (i32 R0), GPR:$L), bb:$T)>;
+
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETEQ), bb:$T),
+          (BEQID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETNE), bb:$T),
+          (BNEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGT), bb:$T),
+          (BGTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLT), bb:$T),
+          (BLTID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETGE), bb:$T),
+          (BGEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETLE), bb:$T),
+          (BLEID GPR:$R, bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGT), bb:$T),
+          (BGTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULT), bb:$T),
+          (BLTID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETUGE), bb:$T),
+          (BGEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+def : Pat<(brcond (setcc (i32 0), (i32 GPR:$R), SETULE), bb:$T),
+          (BLEID (CMPU GPR:$R, (i32 R0)), bb:$T)>;
+
 def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
           (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
 def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
@@ -869,11 +1032,11 @@ def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
 def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
 
 // 16-bit load and store
-def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$ad), (SH GPR:$dst, xaddr:$ad)>;
 def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
 
 // 8-bit load and store
-def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$ad), (SB GPR:$dst, xaddr:$ad)>;
 def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
 
 // Peepholes
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index ea81dd63d195..91aaf940e626 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===//
+//===-- MBlazeIntrinsicInfo.cpp - Intrinsic Information -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,6 +18,7 @@
 #include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 
 using namespace llvm;
@@ -73,16 +74,13 @@ lookupGCCName(const char *Name) const {
 }
 
 bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
-  // Overload Table
-  const bool OTable[] = {
+  if (IntrID == 0)
+    return false;
+
+  unsigned id = IntrID - Intrinsic::num_intrinsics + 1;
 #define GET_INTRINSIC_OVERLOAD_TABLE
 #include "MBlazeGenIntrinsics.inc"
 #undef GET_INTRINSIC_OVERLOAD_TABLE
-  };
-  if (IntrID == 0)
-    return false;
-  else
-    return OTable[IntrID - Intrinsic::num_intrinsics];
 }
 
 /// This defines the "getAttributes(ID id)" method.
@@ -92,7 +90,7 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
 
 static FunctionType *getType(LLVMContext &Context, unsigned id) {
   Type *ResultTy = NULL;
-  std::vector<Type*> ArgTys;
+  SmallVector<Type*, 8> ArgTys;
   bool IsVarArg = false;
 
 #define GET_INTRINSIC_GENERATOR
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
index 80760d87e00a..34f379230def 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -1,4 +1,4 @@
-//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===//
+//===-- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
index 278afbefc165..b5dc59547bbf 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsics.td
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -1,4 +1,4 @@
-//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
+//===-- IntrinsicsMBlaze.td - Defines MBlaze intrinsics ----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
index a7e400b1d1a4..6b9f42ec91a6 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -1,4 +1,4 @@
-//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//===-- MBlazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -85,9 +85,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
 MCSymbol *MBlazeMCInstLower::
 GetBlockAddressSymbol(const MachineOperand &MO) const {
   switch (MO.getTargetFlags()) {
-  default:
-      assert(0 && "Unknown target flag on GV operand");
-
+  default: llvm_unreachable("Unknown target flag on GV operand");
   case 0: break;
   }
 
@@ -150,7 +148,7 @@ void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     case MachineOperand::MO_BlockAddress:
       MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
       break;
-    case MachineOperand::MO_FPImmediate:
+    case MachineOperand::MO_FPImmediate: {
       bool ignored;
       APFloat FVal = MO.getFPImm()->getValueAPF();
       FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored);
@@ -160,6 +158,9 @@ void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       MCOp = MCOperand::CreateImm(Val);
       break;
     }
+    case MachineOperand::MO_RegisterMask:
+      continue;
+    }
 
     OutMI.addOperand(MCOp);
   }
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.h b/lib/Target/MBlaze/MBlazeMCInstLower.h
index 92196f220225..7b97744ea933 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.h
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -1,4 +1,4 @@
-//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,7 +14,6 @@
 
 namespace llvm {
   class AsmPrinter;
-  class MCAsmInfo;
   class MCContext;
   class MCInst;
   class MCOperand;
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.cpp b/lib/Target/MBlaze/MBlazeMachineFunction.cpp
new file mode 100644
index 000000000000..2217b5477d6b
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.cpp
@@ -0,0 +1,14 @@
+//===-- MBlazeMachineFunctionInfo.cpp - Private data ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMachineFunction.h"
+
+using namespace llvm;
+
+void MBlazeFunctionInfo::anchor() { }
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
index df395094282f..95cc5077cc16 100644
--- a/lib/Target/MBlaze/MBlazeMachineFunction.h
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -1,4 +1,4 @@
-//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=//
+//===-- MBlazeMachineFunctionInfo.h - Private data --------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,7 +16,6 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/VectorExtras.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 
@@ -25,8 +24,8 @@ namespace llvm {
 /// MBlazeFunctionInfo - This class is derived from MachineFunction private
 /// MBlaze target-specific information for each MachineFunction.
 class MBlazeFunctionInfo : public MachineFunctionInfo {
+  virtual void anchor();
 
-private:
   /// Holds for each function where on the stack the Frame Pointer must be
   /// saved. This is used on Prologue and Epilogue to emit FP save/restore
   int FPStackOffset;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 9788ba9e6021..46f5207a90ba 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===//
+//===-- MBlazeRegisterInfo.cpp - MBlaze Register Information --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,9 +14,9 @@
 
 #define DEBUG_TYPE "mblaze-frame-info"
 
+#include "MBlazeRegisterInfo.h"
 #include "MBlaze.h"
 #include "MBlazeSubtarget.h"
-#include "MBlazeRegisterInfo.h"
 #include "MBlazeMachineFunction.h"
 #include "llvm/Constants.h"
 #include "llvm/Type.h"
@@ -54,10 +54,10 @@ unsigned MBlazeRegisterInfo::getPICCallReg() {
 //===----------------------------------------------------------------------===//
 
 /// MBlaze Callee Saved Registers
-const unsigned* MBlazeRegisterInfo::
+const uint16_t* MBlazeRegisterInfo::
 getCalleeSavedRegs(const MachineFunction *MF) const {
   // MBlaze callee-save register range is R20 - R31
-  static const unsigned CalleeSavedRegs[] = {
+  static const uint16_t CalleeSavedRegs[] = {
     MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23,
     MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27,
     MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31,
@@ -184,10 +184,8 @@ unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 
 unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
   llvm_unreachable("What is the exception register");
-  return 0;
 }
 
 unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
   llvm_unreachable("What is the exception handler register");
-  return 0;
 }
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 7e4b269cb887..1d5116293516 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===//
+//===-- MBlazeRegisterInfo.h - MBlaze Register Information Impl -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -46,7 +46,7 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
   static unsigned getPICCallReg();
 
   /// Code Generation virtual methods...
-  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+  const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index 13c46ba1ecba..64cae5cff85d 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===//
+//===-- MBlazeRegisterInfo.td - MBlaze Register defs -------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeRelocations.h b/lib/Target/MBlaze/MBlazeRelocations.h
index c298eda2195f..6387ee23ec9b 100644
--- a/lib/Target/MBlaze/MBlazeRelocations.h
+++ b/lib/Target/MBlaze/MBlazeRelocations.h
@@ -1,4 +1,4 @@
-//===- MBlazeRelocations.h - MBlaze Code Relocations ------------*- C++ -*-===//
+//===-- MBlazeRelocations.h - MBlaze Code Relocations -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index 4662f25ceb12..4a3ae5fc1470 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -1,4 +1,4 @@
-//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===//
+//===-- MBlazeSchedule.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeSchedule3.td b/lib/Target/MBlaze/MBlazeSchedule3.td
index ccbf99dbd3a2..20257a60a0fa 100644
--- a/lib/Target/MBlaze/MBlazeSchedule3.td
+++ b/lib/Target/MBlaze/MBlazeSchedule3.td
@@ -1,4 +1,4 @@
-//===- MBlazeSchedule3.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//===-- MBlazeSchedule3.td - MBlaze Scheduling Definitions -*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeSchedule5.td b/lib/Target/MBlaze/MBlazeSchedule5.td
index fa88766fdb18..ab53b424ded3 100644
--- a/lib/Target/MBlaze/MBlazeSchedule5.td
+++ b/lib/Target/MBlaze/MBlazeSchedule5.td
@@ -1,4 +1,4 @@
-//===- MBlazeSchedule5.td - MBlaze Scheduling Definitions --*- tablegen -*-===//
+//===-- MBlazeSchedule5.td - MBlaze Scheduling Definitions -*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
index 7e5667f55c15..d12d14245ea3 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.cpp
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===//
+//===-- MBlazeSubtarget.cpp - MBlaze Subtarget Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
index 43b0197ad5aa..eb375046f218 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.h
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====//
+//===-- MBlazeSubtarget.h - Define Subtarget for the MBlaze ----*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 7bff53ef8717..dd7de9bff36b 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MBlaze.h"
 #include "MBlazeTargetMachine.h"
+#include "MBlaze.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/FormattedStream.h"
@@ -33,30 +33,49 @@ extern "C" void LLVMInitializeMBlazeTarget() {
 // an easier handling.
 MBlazeTargetMachine::
 MBlazeTargetMachine(const Target &T, StringRef TT,
-                    StringRef CPU, StringRef FS,
-                    Reloc::Model RM, CodeModel::Model CM):
-  LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
-  Subtarget(TT, CPU, FS),
-  DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
-  InstrInfo(*this),
-  FrameLowering(Subtarget),
-  TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
-  InstrItins(Subtarget.getInstrItineraryData()) {
+                    StringRef CPU, StringRef FS, const TargetOptions &Options,
+                    Reloc::Model RM, CodeModel::Model CM,
+                    CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS),
+    DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
+    InstrInfo(*this),
+    FrameLowering(Subtarget),
+    TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this),
+    InstrItins(Subtarget.getInstrItineraryData()) {
+}
+
+namespace {
+/// MBlaze Code Generator Pass Configuration Options.
+class MBlazePassConfig : public TargetPassConfig {
+public:
+  MBlazePassConfig(MBlazeTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  MBlazeTargetMachine &getMBlazeTargetMachine() const {
+    return getTM<MBlazeTargetMachine>();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *MBlazeTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new MBlazePassConfig(this, PM);
 }
 
 // Install an instruction selector pass using
 // the ISelDag to gen MBlaze code.
-bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel) {
-  PM.add(createMBlazeISelDag(*this));
+bool MBlazePassConfig::addInstSelector() {
+  PM.add(createMBlazeISelDag(getMBlazeTargetMachine()));
   return false;
 }
 
 // Implemented by targets that want to run passes immediately before
 // machine code is emitted. return true if -print-machineinstrs should
 // print out the code after the passes.
-bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM,
-                                         CodeGenOpt::Level OptLevel) {
-  PM.add(createMBlazeDelaySlotFillerPass(*this));
+bool MBlazePassConfig::addPreEmitPass() {
+  PM.add(createMBlazeDelaySlotFillerPass(getMBlazeTargetMachine()));
   return true;
 }
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index c1bc08aeb505..1647a2169210 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===//
+//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -43,7 +43,9 @@ namespace llvm {
   public:
     MBlazeTargetMachine(const Target &T, StringRef TT,
                         StringRef CPU, StringRef FS,
-                        Reloc::Model RM, CodeModel::Model CM);
+                        const TargetOptions &Options,
+                        Reloc::Model RM, CodeModel::Model CM,
+                        CodeGenOpt::Level OL);
 
     virtual const MBlazeInstrInfo *getInstrInfo() const
     { return &InstrInfo; }
@@ -77,8 +79,7 @@ namespace llvm {
     }
 
     // Pass Pipeline Configuration
-    virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt);
-    virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt);
+    virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
   };
 } // End llvm namespace
 
diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
index 37871b6916c9..36134a69387c 100644
--- a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
@@ -3,13 +3,7 @@ add_llvm_library(LLVMMBlazeDesc
   MBlazeMCAsmInfo.cpp
   MBlazeMCCodeEmitter.cpp
   MBlazeMCTargetDesc.cpp
-  )
-
-add_llvm_library_dependencies(LLVMMBlazeDesc
-  LLVMMBlazeAsmPrinter
-  LLVMMBlazeInfo
-  LLVMMC
-  LLVMSupport
+  MBlazeELFObjectWriter.cpp
   )
 
 add_dependencies(LLVMMBlazeDesc MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..4982f0f17218
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MBlazeDesc
+parent = MBlaze
+required_libraries = MBlazeAsmPrinter MBlazeInfo MC Support
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index 08f7d46a58f9..f383fecdc25a 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
 
 static unsigned getFixupKindSize(unsigned Kind) {
   switch (Kind) {
-  default: assert(0 && "invalid fixup kind!");
+  default: llvm_unreachable("invalid fixup kind!");
   case FK_Data_1: return 1;
   case FK_PCRel_2:
   case FK_Data_2: return 2;
@@ -39,12 +39,6 @@ static unsigned getFixupKindSize(unsigned Kind) {
 
 
 namespace {
-class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
-public:
-  MBlazeELFObjectWriter(Triple::OSType OSType)
-    : MCELFObjectTargetWriter(/*is64Bit*/ false, OSType, ELF::EM_MBLAZE,
-                              /*HasRelocationAddend*/ true) {}
-};
 
 class MBlazeAsmBackend : public MCAsmBackend {
 public:
@@ -56,11 +50,16 @@ public:
     return 2;
   }
 
-  bool MayNeedRelaxation(const MCInst &Inst) const;
+  bool mayNeedRelaxation(const MCInst &Inst) const;
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCInstFragment *DF,
+                            const MCAsmLayout &Layout) const;
 
-  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+  void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
 
-  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
 
   unsigned getPointerSize() const {
     return 4;
@@ -76,7 +75,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
     }
 }
 
-bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+bool MBlazeAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
   if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
     return false;
 
@@ -87,12 +86,24 @@ bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
   return hasExprOrImm;
 }
 
-void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+bool MBlazeAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                            uint64_t Value,
+                                            const MCInstFragment *DF,
+                                            const MCAsmLayout &Layout) const {
+  // FIXME: Is this right? It's what the "generic" code was doing before,
+  // but is X86 specific. Is it actually true for MBlaze also, or was it
+  // just close enough to not be a big deal?
+  //
+  // Relax if the value is too big for a (signed) i8.
+  return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+void MBlazeAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
   Res = Inst;
   Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
 }
 
-bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+bool MBlazeAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   if ((Count % 4) != 0)
     return false;
 
@@ -106,20 +117,19 @@ bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
 namespace {
 class ELFMBlazeAsmBackend : public MBlazeAsmBackend {
 public:
-  Triple::OSType OSType;
-  ELFMBlazeAsmBackend(const Target &T, Triple::OSType _OSType)
-    : MBlazeAsmBackend(T), OSType(_OSType) { }
+  uint8_t OSABI;
+  ELFMBlazeAsmBackend(const Target &T, uint8_t _OSABI)
+    : MBlazeAsmBackend(T), OSABI(_OSABI) { }
 
-  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value) const;
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createELFObjectWriter(new MBlazeELFObjectWriter(OSType), OS,
-                                 /*IsLittleEndian*/ false);
+    return createMBlazeELFObjectWriter(OS, OSABI);
   }
 };
 
-void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+void ELFMBlazeAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                      unsigned DataSize, uint64_t Value) const {
   unsigned Size = getFixupKindSize(Fixup.getKind());
 
@@ -155,5 +165,6 @@ MCAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, StringRef TT) {
   if (TheTriple.isOSWindows())
     assert(0 && "Windows not supported on MBlaze");
 
-  return new ELFMBlazeAsmBackend(T, TheTriple.getOS());
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+  return new ELFMBlazeAsmBackend(T, OSABI);
 }
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
index 776dbc4d8678..437026e7bbc0 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h
@@ -51,6 +51,7 @@ namespace MBlazeII {
     FRRRR,
     FRI,
     FC,
+    FRR,
     FormMask = 63
 
     //===------------------------------------------------------------------===//
@@ -95,7 +96,6 @@ static inline bool isSpecialMBlazeRegister(unsigned Reg) {
     default:
       return false;
   }
-  return false; // Not reached
 }
 
 /// getMBlazeRegisterNumbering - Given the enum value for some register, e.g.
@@ -160,7 +160,6 @@ static inline unsigned getMBlazeRegisterNumbering(unsigned RegEnum) {
     case MBlaze::RPVR11 : return 0x200B;
     default: llvm_unreachable("Unknown register number!");
   }
-  return 0; // Not reached
 }
 
 /// getRegisterFromNumbering - Given the enum value for some register, e.g.
@@ -201,7 +200,6 @@ static inline unsigned getMBlazeRegisterFromNumbering(unsigned Reg) {
     case 31 : return MBlaze::R31;
     default: llvm_unreachable("Unknown register number!");
   }
-  return 0; // Not reached
 }
 
 static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) {
@@ -232,7 +230,6 @@ static inline unsigned getSpecialMBlazeRegisterFromNumbering(unsigned Reg) {
     case 0x200B : return MBlaze::RPVR11;
     default: llvm_unreachable("Unknown register number!");
   }
-  return 0; // Not reached
 }
 
 } // end namespace llvm;
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp
new file mode 100644
index 000000000000..2824b3c35cf1
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp
@@ -0,0 +1,77 @@
+//===-- MBlazeELFObjectWriter.cpp - MBlaze ELF Writer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+  class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
+  public:
+    MBlazeELFObjectWriter(uint8_t OSABI);
+
+    virtual ~MBlazeELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend) const;
+  };
+}
+
+MBlazeELFObjectWriter::MBlazeELFObjectWriter(uint8_t OSABI)
+  : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_MBLAZE,
+                            /*HasRelocationAddend*/ false) {}
+
+MBlazeELFObjectWriter::~MBlazeELFObjectWriter() {
+}
+
+unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target,
+                                             const MCFixup &Fixup,
+                                             bool IsPCRel,
+                                             bool IsRelocWithSymbol,
+                                             int64_t Addend) const {
+  // determine the type of the relocation
+  unsigned Type;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented");
+    case FK_PCRel_4:
+      Type = ELF::R_MICROBLAZE_64_PCREL;
+      break;
+    case FK_PCRel_2:
+      Type = ELF::R_MICROBLAZE_32_PCREL;
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default: llvm_unreachable("invalid fixup kind!");
+    case FK_Data_4:
+      Type = ((IsRelocWithSymbol || Addend !=0)
+              ? ELF::R_MICROBLAZE_32
+              : ELF::R_MICROBLAZE_64);
+      break;
+    case FK_Data_2:
+      Type = ELF::R_MICROBLAZE_32;
+      break;
+    }
+  }
+  return Type;
+}
+
+
+
+MCObjectWriter *llvm::createMBlazeELFObjectWriter(raw_ostream &OS,
+                                                  uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW = new MBlazeELFObjectWriter(OSABI);
+  return createELFObjectWriter(MOTW, OS,  /*IsLittleEndian=*/ false);
+}
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
index 0d88466bb300..8231f07dfa80 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
@@ -14,6 +14,8 @@
 #include "MBlazeMCAsmInfo.h"
 using namespace llvm;
 
+void MBlazeMCAsmInfo::anchor() { }
+
 MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
   IsLittleEndian              = false;
   StackGrowsUp                = false;
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
index e68dd58b016b..977f9a6866d7 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====//
+//===-- MBlazeMCAsmInfo.h - MBlaze asm properties --------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,13 +14,13 @@
 #ifndef MBLAZETARGETASMINFO_H
 #define MBLAZETARGETASMINFO_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
   class Target;
 
   class MBlazeMCAsmInfo : public MCAsmInfo {
+    virtual void anchor();
   public:
     explicit MBlazeMCAsmInfo();
   };
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
index 1514557bf00b..c9b16368ecc5 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
@@ -43,7 +43,7 @@ public:
 
   // getBinaryCodeForInstr - TableGen'erated function for getting the
   // binary encoding for an instruction.
-  unsigned getBinaryCodeForInstr(const MCInst &MI) const;
+  uint64_t getBinaryCodeForInstr(const MCInst &MI) const;
 
   /// getMachineOpValue - Return binary encoding of operand. If the machine
   /// operand requires relocation, record the relocation and return zero.
@@ -54,8 +54,8 @@ public:
 
   static unsigned GetMBlazeRegNum(const MCOperand &MO) {
     // FIXME: getMBlazeRegisterNumbering() is sufficient?
-    assert(0 && "MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet implemented.");
-    return 0;
+    llvm_unreachable("MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet "
+                     "implemented.");
   }
 
   void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
@@ -109,17 +109,14 @@ unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
                                              const MCOperand &MO) const {
   if (MO.isReg())
     return getMBlazeRegisterNumbering(MO.getReg());
-  else if (MO.isImm())
+  if (MO.isImm())
     return static_cast<unsigned>(MO.getImm());
-  else if (MO.isExpr())
-      return 0; // The relocation has already been recorded at this point.
-  else {
+  if (MO.isExpr())
+    return 0; // The relocation has already been recorded at this point.
 #ifndef NDEBUG
-    errs() << MO;
+  errs() << MO;
 #endif
-    llvm_unreachable(0);
-  }
-  return 0;
+  llvm_unreachable(0);
 }
 
 void MBlazeMCCodeEmitter::
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
index 43ae281519c2..9a7549b0e7cf 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions -----*- C++ -*-===//
+//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -62,13 +62,14 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
 }
 
 static MCCodeGenInfo *createMBlazeMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                                CodeModel::Model CM) {
+                                                CodeModel::Model CM,
+                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
   if (RM == Reloc::Default)
     RM = Reloc::Static;
   if (CM == CodeModel::Default)
     CM = CodeModel::Small;
-  X->InitMCCodeGenInfo(RM, CM);
+  X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
@@ -82,12 +83,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
 
   if (TheTriple.isOSDarwin()) {
     llvm_unreachable("MBlaze does not support Darwin MACH-O format");
-    return NULL;
   }
 
   if (TheTriple.isOSWindows()) {
     llvm_unreachable("MBlaze does not support Windows COFF format");
-    return NULL;
   }
 
   return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
@@ -96,9 +95,11 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
 static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI,
+                                                const MCInstrInfo &MII,
+                                                const MCRegisterInfo &MRI,
                                                 const MCSubtargetInfo &STI) {
   if (SyntaxVariant == 0)
-    return new MBlazeInstPrinter(MAI);
+    return new MBlazeInstPrinter(MAI, MII, MRI);
   return 0;
 }
 
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
index deff5cb078f9..ae82c32a5f26 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -14,24 +14,28 @@
 #ifndef MBLAZEMCTARGETDESC_H
 #define MBLAZEMCTARGETDESC_H
 
+#include "llvm/Support/DataTypes.h"
+
 namespace llvm {
 class MCAsmBackend;
 class MCContext;
 class MCCodeEmitter;
 class MCInstrInfo;
+class MCObjectWriter;
 class MCSubtargetInfo;
 class Target;
 class StringRef;
-class formatted_raw_ostream;
+class raw_ostream;
 
 extern Target TheMBlazeTarget;
 
 MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
                                          const MCSubtargetInfo &STI,
                                          MCContext &Ctx);
-  
+
 MCAsmBackend *createMBlazeAsmBackend(const Target &T, StringRef TT);
 
+MCObjectWriter *createMBlazeELFObjectWriter(raw_ostream &OS, uint8_t OSABI);
 } // End llvm namespace
 
 // Defines symbolic names for MBlaze registers.  This defines a mapping from
diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
index 93fce58883ed..b554d9b15e45 100644
--- a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -5,10 +5,4 @@ add_llvm_library(LLVMMBlazeInfo
   MBlazeTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMBlazeInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMMBlazeInfo MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..ba7ee5d69188
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MBlaze/TargetInfo/LLVMBuild.txt -------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MBlazeInfo
+parent = MBlaze
+required_libraries = MC Support Target
+add_to_library_groups = MBlaze
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 0952b76aefab..a8f9b52746ad 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -1,11 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS MSP430.td)
 
-llvm_tablegen(MSP430GenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(MSP430GenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(MSP430GenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(MSP430GenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(MSP430GenCallingConv.inc -gen-callingconv)
-llvm_tablegen(MSP430GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM MSP430GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM MSP430GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM MSP430GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM MSP430GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM MSP430GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM MSP430GenSubtargetInfo.inc -gen-subtarget)
 add_public_tablegen_target(MSP430CommonTableGen)
 
 add_llvm_target(MSP430CodeGen
@@ -14,6 +14,7 @@ add_llvm_target(MSP430CodeGen
   MSP430ISelLowering.cpp
   MSP430InstrInfo.cpp
   MSP430FrameLowering.cpp
+  MSP430MachineFunctionInfo.cpp
   MSP430RegisterInfo.cpp
   MSP430Subtarget.cpp
   MSP430TargetMachine.cpp
@@ -22,19 +23,6 @@ add_llvm_target(MSP430CodeGen
   MSP430MCInstLower.cpp
   )
 
-add_llvm_library_dependencies(LLVMMSP430CodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMMSP430AsmPrinter
-  LLVMMSP430Desc
-  LLVMMSP430Info
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
index ce39d9517efe..64ac994b7f47 100644
--- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMMSP430AsmPrinter
   MSP430InstPrinter.cpp
   )
 
-add_llvm_library_dependencies(LLVMMSP430AsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..37b8c2537fb4
--- /dev/null
+++ b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MSP430/InstPrinter/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MSP430AsmPrinter
+parent = MSP430
+required_libraries = MC Support
+add_to_library_groups = MSP430
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index 5d6c6ad93dbe..0930c453e954 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -92,7 +92,6 @@ void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
   switch (CC) {
   default:
    llvm_unreachable("Unsupported CC code");
-   break;
   case MSP430CC::COND_E:
    O << "eq";
    break;
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index a1984a8aec19..d32eb3a21a37 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===//
+//= MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax -*- C++ -*-//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,8 +21,9 @@ namespace llvm {
 
   class MSP430InstPrinter : public MCInstPrinter {
   public:
-    MSP430InstPrinter(const MCAsmInfo &MAI)
-        : MCInstPrinter(MAI) {}
+    MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                      const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
 
     virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
 
diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt
new file mode 100644
index 000000000000..51d9702ac560
--- /dev/null
+++ b/lib/Target/MSP430/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/MSP430/LLVMBuild.txt ------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = MSP430
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = MSP430CodeGen
+parent = MSP430
+required_libraries = AsmPrinter CodeGen Core MC MSP430AsmPrinter MSP430Desc MSP430Info SelectionDAG Support Target
+add_to_library_groups = MSP430
diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
index 04bd03e49460..adc95c52014e 100644
--- a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
@@ -3,10 +3,4 @@ add_llvm_library(LLVMMSP430Desc
   MSP430MCAsmInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMSP430Desc
-  LLVMMC
-  LLVMMSP430AsmPrinter
-  LLVMMSP430Info
-  )
-
 add_dependencies(LLVMMSP430Desc MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..3319d9363e16
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MSP430Desc
+parent = MSP430
+required_libraries = MC MSP430AsmPrinter MSP430Info Support Target
+add_to_library_groups = MSP430
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index ad7d380b5631..2e328cb5d6ac 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -12,8 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430MCAsmInfo.h"
+#include "llvm/ADT/StringRef.h"
 using namespace llvm;
 
+void MSP430MCAsmInfo::anchor() { }
+
 MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
   PointerSize = 2;
 
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index f3138a22022d..e5c2fc283b17 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- MSP430MCAsmInfo.h - MSP430 asm properties -----------*- C++ -*--====//
+//===-- MSP430MCAsmInfo.h - MSP430 asm properties --------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,13 +14,15 @@
 #ifndef MSP430TARGETASMINFO_H
 #define MSP430TARGETASMINFO_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
+  class StringRef;
   class Target;
 
-  struct MSP430MCAsmInfo : public MCAsmInfo {
+  class MSP430MCAsmInfo : public MCAsmInfo {
+    virtual void anchor();
+  public:
     explicit MSP430MCAsmInfo(const Target &T, StringRef TT);
   };
 
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index fda70b81dc8c..c455f6bc24f2 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions -----*- C++ -*-===//
+//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -51,18 +51,21 @@ static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
 }
 
 static MCCodeGenInfo *createMSP430MCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                                CodeModel::Model CM) {
+                                                CodeModel::Model CM,
+                                                CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM);
+  X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
 static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI,
+                                                const MCInstrInfo &MII,
+                                                const MCRegisterInfo &MRI,
                                                 const MCSubtargetInfo &STI) {
   if (SyntaxVariant == 0)
-    return new MSP430InstPrinter(MAI);
+    return new MSP430InstPrinter(MAI, MII, MRI);
   return 0;
 }
 
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 35f259076441..7f3505ca5514 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -15,9 +15,7 @@
 #define MSP430MCTARGETDESC_H
 
 namespace llvm {
-class MCSubtargetInfo;
 class Target;
-class StringRef;
 
 extern Target TheMSP430Target;
 
diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td
index 5cc5e6e3d7c9..c6796b3789ad 100644
--- a/lib/Target/MSP430/MSP430.td
+++ b/lib/Target/MSP430/MSP430.td
@@ -1,4 +1,4 @@
-//===- MSP430.td - Describe the MSP430 Target Machine ---------*- tblgen -*-==//
+//===-- MSP430.td - Describe the MSP430 Target Machine -----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 883654943b64..1d1094bc339d 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -65,7 +65,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
                                     raw_ostream &O, const char *Modifier) {
   const MachineOperand &MO = MI->getOperand(OpNum);
   switch (MO.getType()) {
-  default: assert(0 && "Not implemented yet!");
+  default: llvm_unreachable("Not implemented yet!");
   case MachineOperand::MO_Register:
     O << MSP430InstPrinter::getRegisterName(MO.getReg());
     return;
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index bd644435c76f..bdeb0c590f2d 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -1,4 +1,4 @@
-//===-- MSP430BranchSelector.cpp - Emit long conditional branches--*- C++ -*-=//
+//===-- MSP430BranchSelector.cpp - Emit long conditional branches ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index c99f4ab6c2f9..61d7f2bf4766 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -1,4 +1,4 @@
-//======-- MSP430FrameLowering.cpp - MSP430 Frame Information -------=========//
+//===-- MSP430FrameLowering.cpp - MSP430 Frame Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -29,7 +29,7 @@ using namespace llvm;
 bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
 
-  return (DisableFramePointerElim(MF) ||
+  return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
           MF.getFrameInfo()->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken());
 }
@@ -140,7 +140,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
   while (MBBI != MBB.begin()) {
     MachineBasicBlock::iterator PI = prior(MBBI);
     unsigned Opc = PI->getOpcode();
-    if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+    if (Opc != MSP430::POP16r && !PI->isTerminator())
       break;
     --MBBI;
   }
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index dc374315171f..071a2f7de2c8 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -29,7 +29,6 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -37,7 +36,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/VectorExtras.h"
 using namespace llvm;
 
 typedef enum {
@@ -80,7 +78,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   setStackPointerRegisterToSaveRestore(MSP430::SPW);
   setBooleanContents(ZeroOrOneBooleanContent);
   setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
-  setSchedulingPreference(Sched::Latency);
 
   // We have post-incremented loads / stores.
   setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
@@ -124,8 +121,12 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
 
   setOperationAction(ISD::CTTZ,             MVT::i8,    Expand);
   setOperationAction(ISD::CTTZ,             MVT::i16,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::i16,   Expand);
   setOperationAction(ISD::CTLZ,             MVT::i8,    Expand);
   setOperationAction(ISD::CTLZ,             MVT::i16,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::i8,    Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::i16,   Expand);
   setOperationAction(ISD::CTPOP,            MVT::i8,    Expand);
   setOperationAction(ISD::CTPOP,            MVT::i16,   Expand);
 
@@ -193,7 +194,6 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
   case ISD::FRAMEADDR:        return LowerFRAMEADDR(Op, DAG);
   default:
     llvm_unreachable("unimplemented operand");
-    return SDValue();
   }
 }
 
@@ -259,19 +259,16 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
   case CallingConv::Fast:
     return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
   case CallingConv::MSP430_INTR:
-   if (Ins.empty())
-     return Chain;
-   else {
+    if (Ins.empty())
+      return Chain;
     report_fatal_error("ISRs cannot have arguments");
-    return SDValue();
-   }
   }
 }
 
 SDValue
 MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                 CallingConv::ID CallConv, bool isVarArg,
-                                bool &isTailCall,
+                                bool doesNotRet, bool &isTailCall,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 const SmallVectorImpl<SDValue> &OutVals,
                                 const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -289,7 +286,6 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                           Outs, OutVals, Ins, dl, DAG, InVals);
   case CallingConv::MSP430_INTR:
     report_fatal_error("ISRs cannot be called directly");
-    return SDValue();
   }
 }
 
@@ -372,7 +368,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
       InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
                                    MachinePointerInfo::getFixedStack(FI),
-                                   false, false, 0));
+                                   false, false, false, 0));
     }
   }
 
@@ -390,10 +386,8 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
   SmallVector<CCValAssign, 16> RVLocs;
 
   // ISRs cannot return any value.
-  if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
+  if (CallConv == CallingConv::MSP430_INTR && !Outs.empty())
     report_fatal_error("ISRs cannot return any value");
-    return SDValue();
-  }
 
   // CCState - Info about the registers and stack slot.
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
@@ -599,8 +593,7 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
   // Expand non-constant shifts to loops:
   if (!isa<ConstantSDNode>(N->getOperand(1)))
     switch (Opc) {
-    default:
-      assert(0 && "Invalid shift opcode!");
+    default: llvm_unreachable("Invalid shift opcode!");
     case ISD::SHL:
       return DAG.getNode(MSP430ISD::SHL, dl,
                          VT, N->getOperand(0), N->getOperand(1));
@@ -651,7 +644,7 @@ SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
   const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
   SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
 
-  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);
 }
 
 SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
@@ -660,7 +653,7 @@ SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
 
-  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);
 }
 
 static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
@@ -908,13 +901,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       MachinePointerInfo(), false, false, 0);
+                       MachinePointerInfo(), false, false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, MachinePointerInfo(), false, false, 0);
+                     RetAddrFI, MachinePointerInfo(), false, false, false, 0);
 }
 
 SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -930,7 +923,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
   while (Depth--)
     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
                             MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
   return FrameAddr;
 }
 
@@ -1028,8 +1021,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
   unsigned Opc;
   const TargetRegisterClass * RC;
   switch (MI->getOpcode()) {
-  default:
-    assert(0 && "Invalid shift opcode!");
+  default: llvm_unreachable("Invalid shift opcode!");
   case MSP430::Shl8:
    Opc = MSP430::SHL8r1;
    RC = MSP430::GR8RegisterClass;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 237f60435736..e372f00bf324 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -1,4 +1,4 @@
-//==-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ------*- C++ -*-==//
+//===-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -152,8 +152,8 @@ namespace llvm {
                            DebugLoc dl, SelectionDAG &DAG,
                            SmallVectorImpl<SDValue> &InVals) const;
     virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+      LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+                bool isVarArg, bool doesNotRet, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<SDValue> &OutVals,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/MSP430/MSP430InstrFormats.td b/lib/Target/MSP430/MSP430InstrFormats.td
index 73aef1facc0f..a9e87dad0cd8 100644
--- a/lib/Target/MSP430/MSP430InstrFormats.td
+++ b/lib/Target/MSP430/MSP430InstrFormats.td
@@ -1,4 +1,4 @@
-//===- MSP430InstrFormats.td - MSP430 Instruction Formats-----*- tblgen -*-===//
+//===-- MSP430InstrFormats.td - MSP430 Instruction Formats -*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index ffd43183c5d9..c03ba470af27 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- MSP430InstrInfo.cpp - MSP430 Instruction Information ---------------===//
+//===-- MSP430InstrInfo.cpp - MSP430 Instruction Information --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,15 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MSP430.h"
 #include "MSP430InstrInfo.h"
+#include "MSP430.h"
 #include "MSP430MachineFunctionInfo.h"
 #include "MSP430TargetMachine.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -43,8 +42,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   MachineFrameInfo &MFI = *MF.getFrameInfo();
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(
-              MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
                             MachineMemOperand::MOStore,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
@@ -72,8 +70,7 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   MachineFrameInfo &MFI = *MF.getFrameInfo();
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(
-              MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
                             MachineMemOperand::MOLoad,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
@@ -133,9 +130,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   MSP430CC::CondCodes CC = static_cast<MSP430CC::CondCodes>(Cond[0].getImm());
 
   switch (CC) {
-  default:
-    assert(0 && "Invalid branch condition!");
-    break;
+  default: llvm_unreachable("Invalid branch condition!");
   case MSP430CC::COND_E:
     CC = MSP430CC::COND_NE;
     break;
@@ -161,13 +156,12 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
 }
 
 bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isTerminator()) return false;
+  if (!MI->isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (MCID.isBranch() && !MCID.isBarrier())
+  if (MI->isBranch() && !MI->isBarrier())
     return true;
-  if (!MCID.isPredicable())
+  if (!MI->isPredicable())
     return true;
   return !isPredicated(MI);
 }
@@ -192,7 +186,7 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // A terminator that isn't a branch can't easily be handled
     // by this analysis.
-    if (!I->getDesc().isBranch())
+    if (!I->isBranch())
       return true;
 
     // Cannot handle indirect branches.
@@ -301,8 +295,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   switch (Desc.TSFlags & MSP430II::SizeMask) {
   default:
     switch (Desc.getOpcode()) {
-    default:
-      assert(0 && "Unknown instruction size!");
+    default: llvm_unreachable("Unknown instruction size!");
     case TargetOpcode::PROLOG_LABEL:
     case TargetOpcode::EH_LABEL:
     case TargetOpcode::IMPLICIT_DEF:
@@ -318,8 +311,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
     }
   case MSP430II::SizeSpecial:
     switch (MI->getOpcode()) {
-    default:
-      assert(0 && "Unknown instruction size!");
+    default: llvm_unreachable("Unknown instruction size!");
     case MSP430::SAR8r1c:
     case MSP430::SAR16r1c:
       return 4;
@@ -331,6 +323,4 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case MSP430II::Size6Bytes:
     return 6;
   }
-
-  return 6;
 }
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 90013f5c2e70..04f339bdd608 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -1,4 +1,4 @@
-//===- MSP430InstrInfo.h - MSP430 Instruction Information -------*- C++ -*-===//
+//===-- MSP430InstrInfo.h - MSP430 Instruction Information ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,8 +14,8 @@
 #ifndef LLVM_TARGET_MSP430INSTRINFO_H
 #define LLVM_TARGET_MSP430INSTRINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "MSP430RegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "MSP430GenInstrInfo.inc"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 59cb59873ab7..4348dd5e54e6 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -1,4 +1,4 @@
-//===- MSP430InstrInfo.td - MSP430 Instruction defs -----------*- tblgen-*-===//
+//===-- MSP430InstrInfo.td - MSP430 Instruction defs -------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index d1d9a1158635..b1773fba7e92 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -1,4 +1,4 @@
-//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===//
+//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst --===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -39,7 +39,7 @@ GetGlobalAddressSymbol(const MachineOperand &MO) const {
 MCSymbol *MSP430MCInstLower::
 GetExternalSymbolSymbol(const MachineOperand &MO) const {
   switch (MO.getTargetFlags()) {
-  default: assert(0 && "Unknown target flag on GV operand");
+  default: llvm_unreachable("Unknown target flag on GV operand");
   case 0: break;
   }
 
@@ -81,7 +81,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
 MCSymbol *MSP430MCInstLower::
 GetBlockAddressSymbol(const MachineOperand &MO) const {
   switch (MO.getTargetFlags()) {
-  default: assert(0 && "Unknown target flag on GV operand");
+  default: llvm_unreachable("Unknown target flag on GV operand");
   case 0: break;
   }
 
@@ -116,7 +116,7 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
     switch (MO.getType()) {
     default:
       MI->dump();
-      assert(0 && "unknown operand type");
+      llvm_unreachable("unknown operand type");
     case MachineOperand::MO_Register:
       // Ignore all implicit register operands.
       if (MO.isImplicit()) continue;
@@ -143,6 +143,9 @@ void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       break;
     case MachineOperand::MO_BlockAddress:
       MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_RegisterMask:
+      continue;
     }
 
     OutMI.addOperand(MCOp);
diff --git a/lib/Target/MSP430/MSP430MCInstLower.h b/lib/Target/MSP430/MSP430MCInstLower.h
index e937696406fe..24151e2b8ea1 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/MSP430MCInstLower.h
@@ -1,4 +1,4 @@
-//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,7 +14,6 @@
 
 namespace llvm {
   class AsmPrinter;
-  class MCAsmInfo;
   class MCContext;
   class MCInst;
   class MCOperand;
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..0f7539908458
--- /dev/null
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- MSP430MachineFuctionInfo.cpp - MSP430 machine function info -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void MSP430MachineFunctionInfo::anchor() { }
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 383fd2e9821c..632d6dee275f 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -21,6 +21,8 @@ namespace llvm {
 /// MSP430MachineFunctionInfo - This class is derived from MachineFunction and
 /// contains private MSP430 target-specific information for each MachineFunction.
 class MSP430MachineFunctionInfo : public MachineFunctionInfo {
+  virtual void anchor();
+
   /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
   /// stack frame in bytes.
   unsigned CalleeSavedFrameSize;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 9049c4bf8f65..51ec71ace525 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- MSP430RegisterInfo.cpp - MSP430 Register Information ---------------===//
+//===-- MSP430RegisterInfo.cpp - MSP430 Register Information --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,9 +13,9 @@
 
 #define DEBUG_TYPE "msp430-reg-info"
 
+#include "MSP430RegisterInfo.h"
 #include "MSP430.h"
 #include "MSP430MachineFunctionInfo.h"
-#include "MSP430RegisterInfo.h"
 #include "MSP430TargetMachine.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -38,27 +38,27 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
   StackAlign = TM.getFrameLowering()->getStackAlignment();
 }
 
-const unsigned*
+const uint16_t*
 MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
   const Function* F = MF->getFunction();
-  static const unsigned CalleeSavedRegs[] = {
+  static const uint16_t CalleeSavedRegs[] = {
     MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
     MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
     0
   };
-  static const unsigned CalleeSavedRegsFP[] = {
+  static const uint16_t CalleeSavedRegsFP[] = {
     MSP430::R5W, MSP430::R6W, MSP430::R7W,
     MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
     0
   };
-  static const unsigned CalleeSavedRegsIntr[] = {
+  static const uint16_t CalleeSavedRegsIntr[] = {
     MSP430::FPW,  MSP430::R5W,  MSP430::R6W,  MSP430::R7W,
     MSP430::R8W,  MSP430::R9W,  MSP430::R10W, MSP430::R11W,
     MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
     0
   };
-  static const unsigned CalleeSavedRegsIntrFP[] = {
+  static const uint16_t CalleeSavedRegsIntrFP[] = {
     MSP430::R5W,  MSP430::R6W,  MSP430::R7W,
     MSP430::R8W,  MSP430::R9W,  MSP430::R10W, MSP430::R11W,
     MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 10a3d5320636..82ee4997392c 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- MSP430RegisterInfo.h - MSP430 Register Information Impl --*- C++ -*-===//
+//===-- MSP430RegisterInfo.h - MSP430 Register Information Impl -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -36,18 +36,11 @@ public:
   MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii);
 
   /// Code Generation virtual methods...
-  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+  const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
   const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
 
-  const TargetRegisterClass *
-  getMatchingSuperRegClass(const TargetRegisterClass *A,
-                           const TargetRegisterClass *B, unsigned Idx) const {
-    // No sub-classes makes this really easy.
-    return A;
-  }
-
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index d1c2e3f7915c..3f2eb8ccef10 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MSP430RegisterInfo.td - MSP430 Register defs ----------*- tblgen -*-===//
+//===-- MSP430RegisterInfo.td - MSP430 Register defs -------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 3ee14d9f7a83..edeaf34676bd 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -1,4 +1,4 @@
-//===- MSP430Subtarget.cpp - MSP430 Subtarget Information ---------*- C++ -*-=//
+//===-- MSP430Subtarget.cpp - MSP430 Subtarget Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,6 +21,8 @@
 
 using namespace llvm;
 
+void MSP430Subtarget::anchor() { }
+
 MSP430Subtarget::MSP430Subtarget(const std::string &TT,
                                  const std::string &CPU,
                                  const std::string &FS) :
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 1ce5f11fe1bb..4d8792eede7f 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -1,4 +1,4 @@
-//====-- MSP430Subtarget.h - Define Subtarget for the MSP430 ---*- C++ -*--===//
+//===-- MSP430Subtarget.h - Define Subtarget for the MSP430 ----*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,16 +15,16 @@
 #define LLVM_TARGET_MSP430_SUBTARGET_H
 
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
 
 #define GET_SUBTARGETINFO_HEADER
 #include "MSP430GenSubtargetInfo.inc"
 
-#include <string>
-
 namespace llvm {
 class StringRef;
 
 class MSP430Subtarget : public MSP430GenSubtargetInfo {
+  virtual void anchor();
   bool ExtendedInsts;
 public:
   /// This constructor initializes the data members to match that
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 4dd893326e3f..9f2eda13d7fd 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MSP430.h"
 #include "MSP430TargetMachine.h"
+#include "MSP430.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -28,24 +28,43 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
                                          StringRef TT,
                                          StringRef CPU,
                                          StringRef FS,
-                                         Reloc::Model RM, CodeModel::Model CM)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+                                         const TargetOptions &Options,
+                                         Reloc::Model RM, CodeModel::Model CM,
+                                         CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS),
     // FIXME: Check TargetData string.
     DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
     InstrInfo(*this), TLInfo(*this), TSInfo(*this),
     FrameLowering(Subtarget) { }
 
+namespace {
+/// MSP430 Code Generator Pass Configuration Options.
+class MSP430PassConfig : public TargetPassConfig {
+public:
+  MSP430PassConfig(MSP430TargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  MSP430TargetMachine &getMSP430TargetMachine() const {
+    return getTM<MSP430TargetMachine>();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *MSP430TargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new MSP430PassConfig(this, PM);
+}
 
-bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel) {
+bool MSP430PassConfig::addInstSelector() {
   // Install an instruction selector.
-  PM.add(createMSP430ISelDag(*this, OptLevel));
+  PM.add(createMSP430ISelDag(getMSP430TargetMachine(), getOptLevel()));
   return false;
 }
 
-bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM,
-                                         CodeGenOpt::Level OptLevel) {
+bool MSP430PassConfig::addPreEmitPass() {
   // Must run branch selection immediately preceding the asm printer.
   PM.add(createMSP430BranchSelectionPass());
   return false;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index eb483dc8706f..f54146b3e338 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -1,4 +1,4 @@
-//==-- MSP430TargetMachine.h - Define TargetMachine for MSP430 ---*- C++ -*-==//
+//===-- MSP430TargetMachine.h - Define TargetMachine for MSP430 -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -39,8 +39,9 @@ class MSP430TargetMachine : public LLVMTargetMachine {
 
 public:
   MSP430TargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
-                      Reloc::Model RM, CodeModel::Model CM);
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
+                      Reloc::Model RM, CodeModel::Model CM,
+                      CodeGenOpt::Level OL);
 
   virtual const TargetFrameLowering *getFrameLowering() const {
     return &FrameLowering;
@@ -61,8 +62,7 @@ public:
     return &TSInfo;
   }
 
-  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 }; // MSP430TargetMachine.
 
 } // end namespace llvm
diff --git a/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
index 1526946af5fd..f6b40eab31b6 100644
--- a/lib/Target/MSP430/TargetInfo/CMakeLists.txt
+++ b/lib/Target/MSP430/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMMSP430Info
   MSP430TargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMSP430Info
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMMSP430Info MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..deafc2d2f558
--- /dev/null
+++ b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/MSP430/TargetInfo/LLVMBuild.txt -------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MSP430Info
+parent = MSP430
+required_libraries = MC Support Target
+add_to_library_groups = MSP430
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 53ad155f376c..786a0c5ed187 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -22,12 +22,13 @@
 #include "llvm/ADT/Twine.h"
 using namespace llvm;
 
-static bool isAcceptableChar(char C, bool AllowPeriod) {
+static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
   if ((C < 'a' || C > 'z') &&
       (C < 'A' || C > 'Z') &&
       (C < '0' || C > '9') &&
       C != '_' && C != '$' && C != '@' &&
-      !(AllowPeriod && C == '.'))
+      !(AllowPeriod && C == '.') &&
+      !(AllowUTF8 && (C & 0x80)))
     return false;
   return true;
 }
@@ -56,8 +57,9 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
   // If any of the characters in the string is an unacceptable character, force
   // quotes.
   bool AllowPeriod = MAI.doesAllowPeriodsInName();
+  bool AllowUTF8 = MAI.doesAllowUTF8();
   for (unsigned i = 0, e = Str.size(); i != e; ++i)
-    if (!isAcceptableChar(Str[i], AllowPeriod))
+    if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
       return true;
   return false;
 }
@@ -74,8 +76,9 @@ static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
   }
 
   bool AllowPeriod = MAI.doesAllowPeriodsInName();
+  bool AllowUTF8 = MAI.doesAllowUTF8();
   for (unsigned i = 0, e = Str.size(); i != e; ++i) {
-    if (!isAcceptableChar(Str[i], AllowPeriod))
+    if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
       MangleLetter(OutName, Str[i]);
     else
       OutName.push_back(Str[i]);
diff --git a/lib/Target/Mips/AsmParser/CMakeLists.txt b/lib/Target/Mips/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..ac21c259fb44
--- /dev/null
+++ b/lib/Target/Mips/AsmParser/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMipsAsmParser
+  MipsAsmParser.cpp
+  )
+
diff --git a/lib/Target/Mips/AsmParser/LLVMBuild.txt b/lib/Target/Mips/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..e7ca243d0e7f
--- /dev/null
+++ b/lib/Target/Mips/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Mips/AsmParser/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MipsAsmParser
+parent = Mips
+required_libraries = MC MCParser Support MipsDesc MipsInfo
+add_to_library_groups = Mips
diff --git a/lib/Target/Mips/AsmParser/Makefile b/lib/Target/Mips/AsmParser/Makefile
new file mode 100644
index 000000000000..679acee9fe72
--- /dev/null
+++ b/lib/Target/Mips/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Mips/AsmParser/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsAsmParser
+
+# Hack: we need to include 'main' mips target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
new file mode 100644
index 000000000000..58b559025757
--- /dev/null
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -0,0 +1,66 @@
+//===-- MipsAsmParser.cpp - Parse Mips assembly to MCInst instructions ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+namespace {
+class MipsAsmParser : public MCTargetAsmParser {
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
+
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+  bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  bool ParseDirective(AsmToken DirectiveID);
+
+public:
+  MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+    : MCTargetAsmParser() {
+  }
+
+};
+}
+
+bool MipsAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
+  return true;
+}
+
+bool MipsAsmParser::
+ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+  return true;
+}
+
+bool MipsAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  return true;
+}
+
+bool MipsAsmParser::
+ParseDirective(AsmToken DirectiveID) {
+  return true;
+}
+
+extern "C" void LLVMInitializeMipsAsmParser() {
+  RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget);
+  RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget);
+  RegisterMCAsmParser<MipsAsmParser> A(TheMips64Target);
+  RegisterMCAsmParser<MipsAsmParser> B(TheMips64elTarget);
+}
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 71391f322725..13d17e4e5293 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -1,15 +1,17 @@
 set(LLVM_TARGET_DEFINITIONS Mips.td)
 
-llvm_tablegen(MipsGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(MipsGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(MipsGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(MipsGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(MipsGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(MipsGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(MipsGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
 add_public_tablegen_target(MipsCommonTableGen)
 
 add_llvm_target(MipsCodeGen
+  MipsAnalyzeImmediate.cpp
   MipsAsmPrinter.cpp
   MipsCodeEmitter.cpp
   MipsDelaySlotFiller.cpp
@@ -21,7 +23,7 @@ add_llvm_target(MipsCodeGen
   MipsISelLowering.cpp
   MipsFrameLowering.cpp
   MipsMCInstLower.cpp
-  MipsMCSymbolRefExpr.cpp
+  MipsMachineFunction.cpp
   MipsRegisterInfo.cpp
   MipsSubtarget.cpp
   MipsTargetMachine.cpp
@@ -29,19 +31,7 @@ add_llvm_target(MipsCodeGen
   MipsSelectionDAGInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMipsCodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMMipsAsmPrinter
-  LLVMMipsDesc
-  LLVMMipsInfo
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
+add_subdirectory(AsmParser)
diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt
index c45b35df8c11..3e9fbf1c5566 100644
--- a/lib/Target/Mips/InstPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMMipsAsmPrinter
   MipsInstPrinter.cpp
   )
 
-add_llvm_library_dependencies(LLVMMipsAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen)
diff --git a/lib/Target/Mips/InstPrinter/LLVMBuild.txt b/lib/Target/Mips/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..317057b913b1
--- /dev/null
+++ b/lib/Target/Mips/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Mips/InstPrinter/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MipsAsmPrinter
+parent = Mips
+required_libraries = MC Support
+add_to_library_groups = Mips
diff --git a/lib/Target/Mips/InstPrinter/Makefile b/lib/Target/Mips/InstPrinter/Makefile
index 74872a48b974..f07f3ed381ee 100644
--- a/lib/Target/Mips/InstPrinter/Makefile
+++ b/lib/Target/Mips/InstPrinter/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===##
+##===- lib/Target/Mips/AsmPrinter/Makefile -----------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 3dafc6134488..6886b1745240 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -13,14 +13,15 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "MipsInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
-#define GET_INSTRUCTION_NAME
 #include "MipsGenAsmWriter.inc"
 
 const char* Mips::MipsFCCToString(Mips::CondCode CC) {
@@ -61,12 +62,8 @@ const char* Mips::MipsFCCToString(Mips::CondCode CC) {
   llvm_unreachable("Impossible condition code!");
 }
 
-StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const {
-  return getInstructionName(Opcode);
-}
-
 void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << '$' << LowercaseString(getRegisterName(RegNo));
+  OS << '$' << StringRef(getRegisterName(RegNo)).lower();
 }
 
 void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -75,6 +72,59 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   printAnnotation(O, Annot);
 }
 
+static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
+  int Offset = 0;
+  const MCSymbolRefExpr *SRE;
+
+  if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr)) {
+    SRE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+    assert(SRE && CE && "Binary expression must be sym+const.");
+    Offset = CE->getValue();
+  }
+  else if (!(SRE = dyn_cast<MCSymbolRefExpr>(Expr)))
+    assert(false && "Unexpected MCExpr type.");
+
+  MCSymbolRefExpr::VariantKind Kind = SRE->getKind();
+
+  switch (Kind) {
+  default:                                 llvm_unreachable("Invalid kind!");
+  case MCSymbolRefExpr::VK_None:           break;
+  case MCSymbolRefExpr::VK_Mips_GPREL:     OS << "%gp_rel("; break;
+  case MCSymbolRefExpr::VK_Mips_GOT_CALL:  OS << "%call16("; break;
+  case MCSymbolRefExpr::VK_Mips_GOT16:     OS << "%got(";    break;
+  case MCSymbolRefExpr::VK_Mips_GOT:       OS << "%got(";    break;
+  case MCSymbolRefExpr::VK_Mips_ABS_HI:    OS << "%hi(";     break;
+  case MCSymbolRefExpr::VK_Mips_ABS_LO:    OS << "%lo(";     break;
+  case MCSymbolRefExpr::VK_Mips_TLSGD:     OS << "%tlsgd(";  break;
+  case MCSymbolRefExpr::VK_Mips_TLSLDM:    OS << "%tlsldm(";  break;
+  case MCSymbolRefExpr::VK_Mips_DTPREL_HI: OS << "%dtprel_hi(";  break;
+  case MCSymbolRefExpr::VK_Mips_DTPREL_LO: OS << "%dtprel_lo(";  break;
+  case MCSymbolRefExpr::VK_Mips_GOTTPREL:  OS << "%gottprel("; break;
+  case MCSymbolRefExpr::VK_Mips_TPREL_HI:  OS << "%tprel_hi("; break;
+  case MCSymbolRefExpr::VK_Mips_TPREL_LO:  OS << "%tprel_lo("; break;
+  case MCSymbolRefExpr::VK_Mips_GPOFF_HI:  OS << "%hi(%neg(%gp_rel("; break;
+  case MCSymbolRefExpr::VK_Mips_GPOFF_LO:  OS << "%lo(%neg(%gp_rel("; break;
+  case MCSymbolRefExpr::VK_Mips_GOT_DISP:  OS << "%got_disp("; break;
+  case MCSymbolRefExpr::VK_Mips_GOT_PAGE:  OS << "%got_page("; break;
+  case MCSymbolRefExpr::VK_Mips_GOT_OFST:  OS << "%got_ofst("; break;
+  }
+
+  OS << SRE->getSymbol();
+
+  if (Offset) {
+    if (Offset > 0)
+      OS << '+';
+    OS << Offset;
+  }
+
+  if ((Kind == MCSymbolRefExpr::VK_Mips_GPOFF_HI) ||
+      (Kind == MCSymbolRefExpr::VK_Mips_GPOFF_LO))
+    OS << ")))";
+  else if (Kind != MCSymbolRefExpr::VK_None)
+    OS << ')';
+}
+
 void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                    raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
@@ -82,14 +132,14 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     printRegName(O, Op.getReg());
     return;
   }
-  
+
   if (Op.isImm()) {
     O << Op.getImm();
     return;
   }
-  
+
   assert(Op.isExpr() && "unknown operand kind in printOperand");
-  O << *Op.getExpr();
+  printExpr(Op.getExpr(), O);
 }
 
 void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 5c1116538c61..76b839b2127f 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax --------===//
+//=== MipsInstPrinter.h - Convert Mips MCInst to assembly syntax -*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,7 +18,7 @@
 namespace llvm {
 // These enumeration declarations were orignally in MipsInstrInfo.h but
 // had to be moved here to avoid circular dependencies between
-// LLVMMipsCodeGen and LLVMMipsAsmPrinter. 
+// LLVMMipsCodeGen and LLVMMipsAsmPrinter.
 namespace Mips {
 // Mips Branch Codes
 enum FPBranchCode {
@@ -77,17 +77,17 @@ class TargetMachine;
 
 class MipsInstPrinter : public MCInstPrinter {
 public:
-  MipsInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
-  
+  MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                  const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
-  static const char *getInstructionName(unsigned Opcode);
   static const char *getRegisterName(unsigned RegNo);
-  
-  virtual StringRef getOpcodeName(unsigned Opcode) const;
+
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
   virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
-  
+
 private:
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt
new file mode 100644
index 000000000000..abbed8c90fc8
--- /dev/null
+++ b/lib/Target/Mips/LLVMBuild.txt
@@ -0,0 +1,34 @@
+;===- ./lib/Target/Mips/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = Mips
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = MipsCodeGen
+parent = Mips
+required_libraries = AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target
+add_to_library_groups = Mips
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index 2ceb5c95746b..fa231507a2ef 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -3,13 +3,7 @@ add_llvm_library(LLVMMipsDesc
   MipsMCAsmInfo.cpp
   MipsMCCodeEmitter.cpp
   MipsMCTargetDesc.cpp
-  )
-
-add_llvm_library_dependencies(LLVMMipsDesc
-  LLVMMC
-  LLVMMipsAsmPrinter
-  LLVMMipsInfo
-  LLVMSupport
+  MipsELFObjectWriter.cpp
   )
 
 add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..29f5da691180
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Mips/MCTargetDesc/LLVMBuild.txt -------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MipsDesc
+parent = Mips
+required_libraries = MC MipsAsmPrinter MipsInfo Support
+add_to_library_groups = Mips
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index f190ec42c776..e79be3363623 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -1,44 +1,172 @@
+//===-- MipsASMBackend.cpp - Mips Asm Backend  ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsAsmBackend and MipsELFObjectWriter classes.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "MipsBaseInfo.h"
+#include "MipsFixupKinds.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Object/MachOFormat.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
-namespace {
-class MipsELFObjectWriter : public MCELFObjectTargetWriter {
-public:
-  MipsELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
-                      bool HasRelocationAddend)
-    : MCELFObjectTargetWriter(is64Bit, OSType, EMachine,
-                              HasRelocationAddend) {}
-};
+// Prepare value for the target space for it
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+
+  // Add/subtract and shift
+  switch (Kind) {
+  default:
+    return 0;
+  case FK_GPRel_4:
+  case FK_Data_4:
+  case Mips::fixup_Mips_LO16:
+    break;
+  case Mips::fixup_Mips_PC16:
+    // So far we are only using this type for branches.
+    // For branches we start 1 instruction after the branch
+    // so the displacement will be one instruction size less.
+    Value -= 4;
+    // The displacement is then divided by 4 to give us an 18 bit
+    // address range.
+    Value >>= 2;
+    break;
+  case Mips::fixup_Mips_26:
+    // So far we are only using this type for jumps.
+    // The displacement is then divided by 4 to give us an 28 bit
+    // address range.
+    Value >>= 2;
+    break;
+  case Mips::fixup_Mips_HI16:
+  case Mips::fixup_Mips_GOT_Local:
+    // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+    Value = ((Value + 0x8000) >> 16) & 0xffff;
+    break;
+  }
+
+  return Value;
+}
 
+namespace {
 class MipsAsmBackend : public MCAsmBackend {
+  Triple::OSType OSType;
+  bool IsLittle; // Big or little endian
+  bool Is64Bit;  // 32 or 64 bit words
+
 public:
-  MipsAsmBackend(const Target &T)
-    : MCAsmBackend() {}
+  MipsAsmBackend(const Target &T,  Triple::OSType _OSType,
+                 bool _isLittle, bool _is64Bit)
+    :MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {}
 
-  unsigned getNumFixupKinds() const {
-    return 1;   //tbd
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createMipsELFObjectWriter(OS, OSType, IsLittle, Is64Bit);
   }
 
   /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
   /// data fragment, at the offset specified by the fixup and following the
   /// fixup kind as appropriate.
-  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value) const {
+    MCFixupKind Kind = Fixup.getKind();
+    Value = adjustFixupValue((unsigned)Kind, Value);
+    int64_t SymOffset = MipsGetSymAndOffset(Fixup).second;
+
+    if (!Value && !SymOffset)
+      return; // Doesn't change encoding.
+
+    // Where do we start in the object
+    unsigned Offset = Fixup.getOffset();
+    // Number of bytes we need to fixup
+    unsigned NumBytes = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+    // Used to point to big endian bytes
+    unsigned FullSize;
+
+    switch ((unsigned)Kind) {
+    case Mips::fixup_Mips_16:
+      FullSize = 2;
+      break;
+    case Mips::fixup_Mips_64:
+      FullSize = 8;
+      break;
+    default:
+      FullSize = 4;
+      break;
+    }
+
+    // Grab current value, if any, from bits.
+    uint64_t CurVal = 0;
+
+    for (unsigned i = 0; i != NumBytes; ++i) {
+      unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+      CurVal |= (uint64_t)((uint8_t)Data[Offset + Idx]) << (i*8);
+    }
+
+    uint64_t Mask = ((uint64_t)(-1) >> (64 - getFixupKindInfo(Kind).TargetSize));
+    CurVal |= (Value + SymOffset) & Mask;
+
+    // Write out the fixed up bytes back to the code/data bits.
+    for (unsigned i = 0; i != NumBytes; ++i) {
+      unsigned Idx = IsLittle ? i : (FullSize - 1 - i);
+      Data[Offset + Idx] = (uint8_t)((CurVal >> (i*8)) & 0xff);
+    }
+  }
+
+  unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = {
+      // This table *must* be in same the order of fixup_* kinds in
+      // MipsFixupKinds.h.
+      //
+      // name                    offset  bits  flags
+      { "fixup_Mips_16",           0,     16,   0 },
+      { "fixup_Mips_32",           0,     32,   0 },
+      { "fixup_Mips_REL32",        0,     32,   0 },
+      { "fixup_Mips_26",           0,     26,   0 },
+      { "fixup_Mips_HI16",         0,     16,   0 },
+      { "fixup_Mips_LO16",         0,     16,   0 },
+      { "fixup_Mips_GPREL16",      0,     16,   0 },
+      { "fixup_Mips_LITERAL",      0,     16,   0 },
+      { "fixup_Mips_GOT_Global",   0,     16,   0 },
+      { "fixup_Mips_GOT_Local",    0,     16,   0 },
+      { "fixup_Mips_PC16",         0,     16,  MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_Mips_CALL16",       0,     16,   0 },
+      { "fixup_Mips_GPREL32",      0,     32,   0 },
+      { "fixup_Mips_SHIFT5",       6,      5,   0 },
+      { "fixup_Mips_SHIFT6",       6,      5,   0 },
+      { "fixup_Mips_64",           0,     64,   0 },
+      { "fixup_Mips_TLSGD",        0,     16,   0 },
+      { "fixup_Mips_GOTTPREL",     0,     16,   0 },
+      { "fixup_Mips_TPREL_HI",     0,     16,   0 },
+      { "fixup_Mips_TPREL_LO",     0,     16,   0 },
+      { "fixup_Mips_TLSLDM",       0,     16,   0 },
+      { "fixup_Mips_DTPREL_HI",    0,     16,   0 },
+      { "fixup_Mips_DTPREL_LO",    0,     16,   0 },
+      { "fixup_Mips_Branch_PCRel", 0,     16,  MCFixupKindInfo::FKF_IsPCRel }
+    };
+
+    if (Kind < FirstTargetFixupKind)
+      return MCAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
   }
 
   /// @name Target Relaxation Interfaces
@@ -48,70 +176,62 @@ public:
   /// relaxation.
   ///
   /// \param Inst - The instruction to test.
-  bool MayNeedRelaxation(const MCInst &Inst) const {
+  bool mayNeedRelaxation(const MCInst &Inst) const {
     return false;
   }
 
-  /// RelaxInstruction - Relax the instruction in the given fragment to the next
-  /// wider instruction.
+  /// fixupNeedsRelaxation - Target specific predicate for whether a given
+  /// fixup requires the associated instruction to be relaxed.
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCInstFragment *DF,
+                            const MCAsmLayout &Layout) const {
+    // FIXME.
+    assert(0 && "RelaxInstruction() unimplemented");
+    return false;
+  }
+
+  /// RelaxInstruction - Relax the instruction in the given fragment
+  /// to the next wider instruction.
   ///
-  /// \param Inst - The instruction to relax, which may be the same as the
-  /// output.
+  /// \param Inst - The instruction to relax, which may be the same
+  /// as the output.
   /// \parm Res [output] - On return, the relaxed instruction.
-  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+  void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
   }
-  
+
   /// @}
 
-  /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given
-  /// output. If the target cannot generate such a sequence, it should return an
-  /// error.
+  /// WriteNopData - Write an (optimal) nop sequence of Count bytes
+  /// to the given output. If the target cannot generate such a sequence,
+  /// it should return an error.
   ///
   /// \return - True on success.
-  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
-    return false;
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+    return true;
   }
-};
+}; // class MipsAsmBackend
 
-class MipsEB_AsmBackend : public MipsAsmBackend {
-public:
-  Triple::OSType OSType;
-
-  MipsEB_AsmBackend(const Target &T, Triple::OSType _OSType)
-    : MipsAsmBackend(T), OSType(_OSType) {}
-
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createELFObjectWriter(createELFObjectTargetWriter(),
-                                 OS, /*IsLittleEndian*/ false);
-  }
-
-  MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
-    return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false);
-  }
-};
-
-class MipsEL_AsmBackend : public MipsAsmBackend {
-public:
-  Triple::OSType OSType;
-
-  MipsEL_AsmBackend(const Target &T, Triple::OSType _OSType)
-    : MipsAsmBackend(T), OSType(_OSType) {}
+} // namespace
 
-  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createELFObjectWriter(createELFObjectTargetWriter(),
-                                 OS, /*IsLittleEndian*/ true);
-  }
+// MCAsmBackend
+MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T, StringRef TT) {
+  return new MipsAsmBackend(T, Triple(TT).getOS(),
+                            /*IsLittle*/true, /*Is64Bit*/false);
+}
 
-  MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
-    return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false);
-  }
-};
+MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T, StringRef TT) {
+  return new MipsAsmBackend(T, Triple(TT).getOS(),
+                            /*IsLittle*/false, /*Is64Bit*/false);
 }
 
-MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, StringRef TT) {
-  Triple TheTriple(TT);
+MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T, StringRef TT) {
+  return new MipsAsmBackend(T, Triple(TT).getOS(),
+                            /*IsLittle*/true, /*Is64Bit*/true);
+}
 
-  // just return little endian for now
-  //
-  return new MipsEL_AsmBackend(T, Triple(TT).getOS());
+MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, StringRef TT) {
+  return new MipsAsmBackend(T, Triple(TT).getOS(),
+                            /*IsLittle*/false, /*Is64Bit*/true);
 }
+
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index f7a6fa949091..fb1c5ce6b6c6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -1,4 +1,4 @@
-//===-- MipsBaseInfo.h - Top level definitions for ARM ------- --*- C++ -*-===//
+//===-- MipsBaseInfo.h - Top level definitions for MIPS MC ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,11 +14,103 @@
 #ifndef MIPSBASEINFO_H
 #define MIPSBASEINFO_H
 
+#include "MipsFixupKinds.h"
 #include "MipsMCTargetDesc.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
+
+/// MipsII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MipsII {
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // Mips Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    /// MO_GOT16 - Represents the offset into the global offset table at which
+    /// the address the relocation entry symbol resides during execution.
+    MO_GOT16,
+    MO_GOT,
+
+    /// MO_GOT_CALL - Represents the offset into the global offset table at
+    /// which the address of a call site relocation entry symbol resides
+    /// during execution. This is different from the above since this flag
+    /// can only be present in call instructions.
+    MO_GOT_CALL,
+
+    /// MO_GPREL - Represents the offset from the current gp value to be used
+    /// for the relocatable object file being produced.
+    MO_GPREL,
+
+    /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
+    /// address.
+    MO_ABS_HI,
+    MO_ABS_LO,
+
+    /// MO_TLSGD - Represents the offset into the global offset table at which
+    // the module ID and TSL block offset reside during execution (General
+    // Dynamic TLS).
+    MO_TLSGD,
+
+    /// MO_TLSLDM - Represents the offset into the global offset table at which
+    // the module ID and TSL block offset reside during execution (Local
+    // Dynamic TLS).
+    MO_TLSLDM,
+    MO_DTPREL_HI,
+    MO_DTPREL_LO,
+
+    /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
+    // Exec TLS).
+    MO_GOTTPREL,
+
+    /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
+    // the thread pointer (Local Exec TLS).
+    MO_TPREL_HI,
+    MO_TPREL_LO,
+
+    // N32/64 Flags.
+    MO_GPOFF_HI,
+    MO_GPOFF_LO,
+    MO_GOT_DISP,
+    MO_GOT_PAGE,
+    MO_GOT_OFST
+  };
+
+  enum {
+    //===------------------------------------------------------------------===//
+    // Instruction encodings.  These are the standard/most common forms for
+    // Mips instructions.
+    //
+
+    // Pseudo - This represents an instruction that is a pseudo instruction
+    // or one that has not been implemented yet.  It is illegal to code generate
+    // it, but tolerated for intermediate implementation stages.
+    Pseudo   = 0,
+
+    /// FrmR - This form is for instructions of the format R.
+    FrmR  = 1,
+    /// FrmI - This form is for instructions of the format I.
+    FrmI  = 2,
+    /// FrmJ - This form is for instructions of the format J.
+    FrmJ  = 3,
+    /// FrmFR - This form is for instructions of the format FR.
+    FrmFR = 4,
+    /// FrmFI - This form is for instructions of the format FI.
+    FrmFI = 5,
+    /// FrmOther - This form is for instructions that have no specific format.
+    FrmOther = 6,
+
+    FormMask = 15
+  };
+}
+
+
 /// getMipsRegisterNumbering - Given the enum value for some register,
 /// return the number that it corresponds to.
 inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
@@ -98,15 +190,43 @@ inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
   case Mips::D14:
     return 28;
   case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
+  case Mips::HWR29:
     return 29;
   case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
-  case Mips::D15: 
+  case Mips::D15:
     return 30;
   case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
     return 31;
   default: llvm_unreachable("Unknown register number!");
   }
-  return 0; // Not reached
+}
+
+inline static std::pair<const MCSymbolRefExpr*, int64_t>
+MipsGetSymAndOffset(const MCFixup &Fixup) {
+  MCFixupKind FixupKind = Fixup.getKind();
+
+  if ((FixupKind < FirstTargetFixupKind) ||
+      (FixupKind >= MCFixupKind(Mips::LastTargetFixupKind)))
+    return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0);
+
+  const MCExpr *Expr = Fixup.getValue();
+  MCExpr::ExprKind Kind = Expr->getKind();
+
+  if (Kind == MCExpr::Binary) {
+    const MCBinaryExpr *BE = static_cast<const MCBinaryExpr*>(Expr);
+    const MCExpr *LHS = BE->getLHS();
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(BE->getRHS());
+
+    if ((LHS->getKind() != MCExpr::SymbolRef) || !CE)
+      return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0);
+
+    return std::make_pair(cast<MCSymbolRefExpr>(LHS), CE->getValue());
+  }
+
+  if (Kind != MCExpr::SymbolRef)
+    return std::make_pair((const MCSymbolRefExpr*)0, (int64_t)0);
+
+  return std::make_pair(cast<MCSymbolRefExpr>(Expr), 0);
 }
 }
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
new file mode 100644
index 000000000000..2091bec50082
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -0,0 +1,249 @@
+//===-- MipsELFObjectWriter.cpp - Mips ELF Writer -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsFixupKinds.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <list>
+
+using namespace llvm;
+
+namespace {
+  struct RelEntry {
+    RelEntry(const ELFRelocationEntry &R, const MCSymbol *S, int64_t O) :
+      Reloc(R), Sym(S), Offset(O) {}
+    ELFRelocationEntry Reloc;
+    const MCSymbol *Sym;
+    int64_t Offset;
+  };
+
+  typedef std::list<RelEntry> RelLs;
+  typedef RelLs::iterator RelLsIter;
+
+  class MipsELFObjectWriter : public MCELFObjectTargetWriter {
+  public:
+    MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI);
+
+    virtual ~MipsELFObjectWriter();
+
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend) const;
+    virtual unsigned getEFlags() const;
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                           const MCValue &Target,
+                                           const MCFragment &F,
+                                           const MCFixup &Fixup,
+                                           bool IsPCRel) const;
+    virtual void sortRelocs(const MCAssembler &Asm,
+                            std::vector<ELFRelocationEntry> &Relocs);
+  };
+}
+
+MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI)
+  : MCELFObjectTargetWriter(_is64Bit, OSABI, ELF::EM_MIPS,
+                            /*HasRelocationAddend*/ false) {}
+
+MipsELFObjectWriter::~MipsELFObjectWriter() {}
+
+// FIXME: get the real EABI Version from the Subtarget class.
+unsigned MipsELFObjectWriter::getEFlags() const {
+
+  // FIXME: We can't tell if we are PIC (dynamic) or CPIC (static)
+  unsigned Flag = ELF::EF_MIPS_NOREORDER;
+
+  if (is64Bit())
+    Flag |= ELF::EF_MIPS_ARCH_64R2;
+  else
+    Flag |= ELF::EF_MIPS_ARCH_32R2;
+  return Flag;
+}
+
+const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+                                                    const MCValue &Target,
+                                                    const MCFragment &F,
+                                                    const MCFixup &Fixup,
+                                                    bool IsPCRel) const {
+  assert(Target.getSymA() && "SymA cannot be 0.");
+  const MCSymbol &Sym = Target.getSymA()->getSymbol().AliasedSymbol();
+
+  if (Sym.getSection().getKind().isMergeableCString() ||
+      Sym.getSection().getKind().isMergeableConst())
+    return &Sym;
+
+  return NULL;
+}
+
+unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
+                                           const MCFixup &Fixup,
+                                           bool IsPCRel,
+                                           bool IsRelocWithSymbol,
+                                           int64_t Addend) const {
+  // determine the type of the relocation
+  unsigned Type = (unsigned)ELF::R_MIPS_NONE;
+  unsigned Kind = (unsigned)Fixup.getKind();
+
+  switch (Kind) {
+  default:
+    llvm_unreachable("invalid fixup kind!");
+  case FK_Data_4:
+    Type = ELF::R_MIPS_32;
+    break;
+  case FK_GPRel_4:
+    Type = ELF::R_MIPS_GPREL32;
+    break;
+  case Mips::fixup_Mips_GPREL16:
+    Type = ELF::R_MIPS_GPREL16;
+    break;
+  case Mips::fixup_Mips_26:
+    Type = ELF::R_MIPS_26;
+    break;
+  case Mips::fixup_Mips_CALL16:
+    Type = ELF::R_MIPS_CALL16;
+    break;
+  case Mips::fixup_Mips_GOT_Global:
+  case Mips::fixup_Mips_GOT_Local:
+    Type = ELF::R_MIPS_GOT16;
+    break;
+  case Mips::fixup_Mips_HI16:
+    Type = ELF::R_MIPS_HI16;
+    break;
+  case Mips::fixup_Mips_LO16:
+    Type = ELF::R_MIPS_LO16;
+    break;
+  case Mips::fixup_Mips_TLSGD:
+    Type = ELF::R_MIPS_TLS_GD;
+    break;
+  case Mips::fixup_Mips_GOTTPREL:
+    Type = ELF::R_MIPS_TLS_GOTTPREL;
+    break;
+  case Mips::fixup_Mips_TPREL_HI:
+    Type = ELF::R_MIPS_TLS_TPREL_HI16;
+    break;
+  case Mips::fixup_Mips_TPREL_LO:
+    Type = ELF::R_MIPS_TLS_TPREL_LO16;
+    break;
+  case Mips::fixup_Mips_TLSLDM:
+    Type = ELF::R_MIPS_TLS_LDM;
+    break;
+  case Mips::fixup_Mips_DTPREL_HI:
+    Type = ELF::R_MIPS_TLS_DTPREL_HI16;
+    break;
+  case Mips::fixup_Mips_DTPREL_LO:
+    Type = ELF::R_MIPS_TLS_DTPREL_LO16;
+    break;
+  case Mips::fixup_Mips_Branch_PCRel:
+  case Mips::fixup_Mips_PC16:
+    Type = ELF::R_MIPS_PC16;
+    break;
+  }
+
+  return Type;
+}
+
+// Return true if R is either a GOT16 against a local symbol or HI16.
+static bool NeedsMatchingLo(const MCAssembler &Asm, const RelEntry &R) {
+  if (!R.Sym)
+    return false;
+
+  MCSymbolData &SD = Asm.getSymbolData(R.Sym->AliasedSymbol());
+
+  return ((R.Reloc.Type == ELF::R_MIPS_GOT16) && !SD.isExternal()) ||
+    (R.Reloc.Type == ELF::R_MIPS_HI16);
+}
+
+static bool HasMatchingLo(const MCAssembler &Asm, RelLsIter I, RelLsIter Last) {
+  if (I == Last)
+    return false;
+
+  RelLsIter Hi = I++;
+
+  return (I->Reloc.Type == ELF::R_MIPS_LO16) && (Hi->Sym == I->Sym) &&
+    (Hi->Offset == I->Offset);
+}
+
+static bool HasSameSymbol(const RelEntry &R0, const RelEntry &R1) {
+  return R0.Sym == R1.Sym;
+}
+
+static int CompareOffset(const RelEntry &R0, const RelEntry &R1) {
+  return (R0.Offset > R1.Offset) ? 1 : ((R0.Offset == R1.Offset) ? 0 : -1);
+}
+
+void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
+                                     std::vector<ELFRelocationEntry> &Relocs) {
+  // Call the defualt function first. Relocations are sorted in descending
+  // order of r_offset.
+  MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
+  
+  RelLs RelocLs;
+  std::vector<RelLsIter> Unmatched;
+
+  // Fill RelocLs. Traverse Relocs backwards so that relocations in RelocLs
+  // are in ascending order of r_offset.
+  for (std::vector<ELFRelocationEntry>::reverse_iterator R = Relocs.rbegin();
+       R != Relocs.rend(); ++R) {
+     std::pair<const MCSymbolRefExpr*, int64_t> P =
+       MipsGetSymAndOffset(*R->Fixup);
+     RelocLs.push_back(RelEntry(*R, P.first ? &P.first->getSymbol() : 0,
+                                P.second));
+  }
+
+  // Get list of unmatched HI16 and GOT16.
+  for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R)
+    if (NeedsMatchingLo(Asm, *R) && !HasMatchingLo(Asm, R, --RelocLs.end()))
+      Unmatched.push_back(R);
+
+  // Insert unmatched HI16 and GOT16 immediately before their matching LO16.
+  for (std::vector<RelLsIter>::iterator U = Unmatched.begin();
+       U != Unmatched.end(); ++U) {
+    RelLsIter LoPos = RelocLs.end(), HiPos = *U;
+    bool MatchedLo = false;
+
+    for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R) {
+      if ((R->Reloc.Type == ELF::R_MIPS_LO16) && HasSameSymbol(*HiPos, *R) &&
+          (CompareOffset(*R, *HiPos) >= 0) &&
+          ((LoPos == RelocLs.end()) || ((CompareOffset(*R, *LoPos) < 0)) ||
+           (!MatchedLo && !CompareOffset(*R, *LoPos))))
+        LoPos = R;
+
+      MatchedLo = NeedsMatchingLo(Asm, *R) &&
+        HasMatchingLo(Asm, R, --RelocLs.end());
+    }
+
+    // If a matching LoPos was found, move HiPos and insert it before LoPos.
+    // Make the offsets of HiPos and LoPos match.
+    if (LoPos != RelocLs.end()) {
+      HiPos->Offset = LoPos->Offset;
+      RelocLs.insert(LoPos, *HiPos);
+      RelocLs.erase(HiPos);
+    }
+  }
+
+  // Put the sorted list back in reverse order.
+  assert(Relocs.size() == RelocLs.size());
+  unsigned I = RelocLs.size();
+
+  for (RelLsIter R = RelocLs.begin(); R != RelocLs.end(); ++R)
+    Relocs[--I] = R->Reloc;
+}
+
+MCObjectWriter *llvm::createMipsELFObjectWriter(raw_ostream &OS,
+                                                uint8_t OSABI,
+                                                bool IsLittleEndian,
+                                                bool Is64Bit) {
+  MCELFObjectTargetWriter *MOTW = new MipsELFObjectWriter(Is64Bit, OSABI);
+  return createELFObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index 8b099eab91fd..9b76eda861dc 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -1,7 +1,4 @@
-#ifndef LLVM_Mips_MipsFIXUPKINDS_H
-#define LLVM_Mips_MipsFIXUPKINDS_H
-
-//===-- Mips/MipsFixupKinds.h - Mips Specific Fixup Entries --------*- C++ -*-===//
+//===-- MipsFixupKinds.h - Mips Specific Fixup Entries ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,81 +7,100 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_MIPS_MIPSFIXUPKINDS_H
+#define LLVM_MIPS_MIPSFIXUPKINDS_H
 
 #include "llvm/MC/MCFixup.h"
 
 namespace llvm {
 namespace Mips {
-    enum Fixups {
-        // fixup_Mips_xxx - R_MIPS_NONE
-        fixup_Mips_NONE = FirstTargetFixupKind,
+  // Although most of the current fixup types reflect a unique relocation
+  // one can have multiple fixup types for a given relocation and thus need
+  // to be uniquely named.
+  //
+  // This table *must* be in the save order of
+  // MCFixupKindInfo Infos[Mips::NumTargetFixupKinds]
+  // in MipsAsmBackend.cpp.
+  //
+  enum Fixups {
+    // Branch fixups resulting in R_MIPS_16.
+    fixup_Mips_16 = FirstTargetFixupKind,
 
-        // fixup_Mips_xxx - R_MIPS_16.
-        fixup_Mips_16,
+    // Pure 32 bit data fixup resulting in - R_MIPS_32.
+    fixup_Mips_32,
 
-        // fixup_Mips_xxx - R_MIPS_32.
-        fixup_Mips_32,
+    // Full 32 bit data relative data fixup resulting in - R_MIPS_REL32.
+    fixup_Mips_REL32,
 
-        // fixup_Mips_xxx - R_MIPS_REL32.
-        fixup_Mips_REL32,
+    // Jump 26 bit fixup resulting in - R_MIPS_26.
+    fixup_Mips_26,
 
-        // fixup_Mips_xxx - R_MIPS_26.
-        fixup_Mips_26,
+    // Pure upper 16 bit fixup resulting in - R_MIPS_HI16.
+    fixup_Mips_HI16,
 
-        // fixup_Mips_xxx - R_MIPS_HI16.
-        fixup_Mips_HI16,
+    // Pure lower 16 bit fixup resulting in - R_MIPS_LO16.
+    fixup_Mips_LO16,
 
-        // fixup_Mips_xxx - R_MIPS_LO16.
-        fixup_Mips_LO16,
+    // 16 bit fixup for GP offest resulting in - R_MIPS_GPREL16.
+    fixup_Mips_GPREL16,
 
-        // fixup_Mips_xxx - R_MIPS_GPREL16.
-        fixup_Mips_GPREL16,
+    // 16 bit literal fixup resulting in - R_MIPS_LITERAL.
+    fixup_Mips_LITERAL,
 
-        // fixup_Mips_xxx - R_MIPS_LITERAL.
-        fixup_Mips_LITERAL,
+    // Global symbol fixup resulting in - R_MIPS_GOT16.
+    fixup_Mips_GOT_Global,
 
-        // fixup_Mips_xxx - R_MIPS_GOT16.
-        fixup_Mips_GOT16,
+    // Local symbol fixup resulting in - R_MIPS_GOT16.
+    fixup_Mips_GOT_Local,
 
-        // fixup_Mips_xxx - R_MIPS_PC16.
-        fixup_Mips_PC16,
+    // PC relative branch fixup resulting in - R_MIPS_PC16.
+    fixup_Mips_PC16,
 
-        // fixup_Mips_xxx - R_MIPS_CALL16.
-        fixup_Mips_CALL16,
+    // resulting in - R_MIPS_CALL16.
+    fixup_Mips_CALL16,
 
-        // fixup_Mips_xxx - R_MIPS_GPREL32.
-        fixup_Mips_GPREL32,
+    // resulting in - R_MIPS_GPREL32.
+    fixup_Mips_GPREL32,
 
-        // fixup_Mips_xxx - R_MIPS_SHIFT5.
-        fixup_Mips_SHIFT5,
+    // resulting in - R_MIPS_SHIFT5.
+    fixup_Mips_SHIFT5,
 
-        // fixup_Mips_xxx - R_MIPS_SHIFT6.
-        fixup_Mips_SHIFT6,
+    // resulting in - R_MIPS_SHIFT6.
+    fixup_Mips_SHIFT6,
 
-        // fixup_Mips_xxx - R_MIPS_64.
-        fixup_Mips_64,
+    // Pure 64 bit data fixup resulting in - R_MIPS_64.
+    fixup_Mips_64,
 
-        // fixup_Mips_xxx - R_MIPS_TLS_GD.
-        fixup_Mips_TLSGD,
+    // resulting in - R_MIPS_TLS_GD.
+    fixup_Mips_TLSGD,
 
-        // fixup_Mips_xxx - R_MIPS_TLS_GOTTPREL.
-        fixup_Mips_GOTTPREL,
+    // resulting in - R_MIPS_TLS_GOTTPREL.
+    fixup_Mips_GOTTPREL,
 
-        // fixup_Mips_xxx - R_MIPS_TLS_TPREL_HI16.
-        fixup_Mips_TPREL_HI,
+    // resulting in - R_MIPS_TLS_TPREL_HI16.
+    fixup_Mips_TPREL_HI,
 
-        // fixup_Mips_xxx - R_MIPS_TLS_TPREL_LO16.
-        fixup_Mips_TPREL_LO,
+    // resulting in - R_MIPS_TLS_TPREL_LO16.
+    fixup_Mips_TPREL_LO,
 
-        // fixup_Mips_xxx - yyy. // This should become R_MIPS_PC16
-        fixup_Mips_Branch_PCRel,
+    // resulting in - R_MIPS_TLS_LDM.
+    fixup_Mips_TLSLDM,
 
-        // Marker
-        LastTargetFixupKind,
-        NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
-    };
-} // namespace llvm
+    // resulting in - R_MIPS_TLS_DTPREL_HI16.
+    fixup_Mips_DTPREL_HI,
+
+    // resulting in - R_MIPS_TLS_DTPREL_LO16.
+    fixup_Mips_DTPREL_LO,
+
+    // PC relative branch fixup resulting in - R_MIPS_PC16
+    fixup_Mips_Branch_PCRel,
+
+    // Marker
+    LastTargetFixupKind,
+    NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+  };
 } // namespace Mips
+} // namespace llvm
 
 
-#endif /* LLVM_Mips_MipsFIXUPKINDS_H */
+#endif // LLVM_MIPS_MIPSFIXUPKINDS_H
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 71ae80498995..9d67aa1856e3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- MipsMCAsmInfo.cpp - Mips asm properties ---------------------------===//
+//===-- MipsMCAsmInfo.cpp - Mips Asm Properties ---------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,6 +16,8 @@
 
 using namespace llvm;
 
+void MipsMCAsmInfo::anchor() { }
+
 MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
   Triple TheTriple(TT);
   if ((TheTriple.getArch() == Triple::mips) ||
@@ -25,11 +27,12 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
   AlignmentIsInBytes          = false;
   Data16bitsDirective         = "\t.2byte\t";
   Data32bitsDirective         = "\t.4byte\t";
-  Data64bitsDirective         = 0;
+  Data64bitsDirective         = "\t.8byte\t";
   PrivateGlobalPrefix         = "$";
   CommentString               = "#";
   ZeroDirective               = "\t.space\t";
   GPRel32Directive            = "\t.gpword\t";
+  GPRel64Directive            = "\t.gpdword\t";
   WeakRefDirective            = "\t.weak\t";
 
   SupportsDebugInformation = true;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 41b719207b7b..e1d878936f31 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- MipsMCAsmInfo.h - Mips asm properties ---------------*- C++ -*--====//
+//===-- MipsMCAsmInfo.h - Mips Asm Info ------------------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,13 +14,14 @@
 #ifndef MIPSTARGETASMINFO_H
 #define MIPSTARGETASMINFO_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
+  class StringRef;
   class Target;
 
   class MipsMCAsmInfo : public MCAsmInfo {
+    virtual void anchor();
   public:
     explicit MipsMCAsmInfo(const Target &T, StringRef TT);
   };
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index d66de23ba115..27954b174ed9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- MipsMCCodeEmitter.cpp - Convert Mips code to machine code ---------===//
+//===-- MipsMCCodeEmitter.cpp - Convert Mips Code to Machine Code ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,16 +12,18 @@
 //===----------------------------------------------------------------------===//
 //
 #define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsFixupKinds.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/raw_ostream.h"
-#include "MCTargetDesc/MipsMCTargetDesc.h"
 
 using namespace llvm;
 
@@ -31,22 +33,252 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
   void operator=(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT
   const MCInstrInfo &MCII;
   const MCSubtargetInfo &STI;
+  MCContext &Ctx;
+  bool IsLittleEndian;
 
 public:
   MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
-                    MCContext &ctx)
-    : MCII(mcii), STI(sti) {}
+                    MCContext &ctx, bool IsLittle) :
+            MCII(mcii), STI(sti) , Ctx(ctx), IsLittleEndian(IsLittle) {}
 
   ~MipsMCCodeEmitter() {}
 
-  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-                         SmallVectorImpl<MCFixup> &Fixups) const {
+  void EmitByte(unsigned char C, raw_ostream &OS) const {
+    OS << (char)C;
   }
+
+  void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+    // Output the instruction encoding in little endian byte order.
+    for (unsigned i = 0; i < Size; ++i) {
+      unsigned Shift = IsLittleEndian ? i * 8 : (Size - 1 - i) * 8;
+      EmitByte((Val >> Shift) & 0xff, OS);
+    }
+  }
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  // getBranchJumpOpValue - Return binary encoding of the jump
+  // target operand. If the machine operand requires relocation,
+  // record the relocation and return zero.
+   unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+   // getBranchTargetOpValue - Return binary encoding of the branch
+   // target operand. If the machine operand requires relocation,
+   // record the relocation and return zero.
+  unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
+
+   // getMachineOpValue - Return binary encoding of operand. If the machin
+   // operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getMemEncoding(const MCInst &MI, unsigned OpNo,
+                          SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
 }; // class MipsMCCodeEmitter
 }  // namespace
 
-MCCodeEmitter *llvm::createMipsMCCodeEmitter(const MCInstrInfo &MCII,
-                                             const MCSubtargetInfo &STI,
-                                             MCContext &Ctx) {
-  return new MipsMCCodeEmitter(MCII, STI, Ctx);
+MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
+                                               const MCSubtargetInfo &STI,
+                                               MCContext &Ctx)
+{
+  return new MipsMCCodeEmitter(MCII, STI, Ctx, false);
+}
+
+MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
+                                               const MCSubtargetInfo &STI,
+                                               MCContext &Ctx)
+{
+  return new MipsMCCodeEmitter(MCII, STI, Ctx, true);
+}
+
+/// EncodeInstruction - Emit the instruction.
+/// Size the instruction (currently only 4 bytes
+void MipsMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const
+{
+  uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+
+  // Check for unimplemented opcodes.
+  // Unfortunately in MIPS both NOT and SLL will come in with Binary == 0
+  // so we have to special check for them.
+  unsigned Opcode = MI.getOpcode();
+  if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary)
+    llvm_unreachable("unimplemented opcode in EncodeInstruction()");
+
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+  uint64_t TSFlags = Desc.TSFlags;
+
+  // Pseudo instructions don't get encoded and shouldn't be here
+  // in the first place!
+  if ((TSFlags & MipsII::FormMask) == MipsII::Pseudo)
+    llvm_unreachable("Pseudo opcode found in EncodeInstruction()");
+
+  // For now all instructions are 4 bytes
+  int Size = 4; // FIXME: Have Desc.getSize() return the correct value!
+
+  EmitInstruction(Binary, Size, OS);
 }
+
+/// getBranchTargetOpValue - Return binary encoding of the branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpNo,
+                       SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpNo);
+  assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions");
+
+  const MCExpr *Expr = MO.getExpr();
+  Fixups.push_back(MCFixup::Create(0, Expr,
+                                   MCFixupKind(Mips::fixup_Mips_PC16)));
+  return 0;
+}
+
+/// getJumpTargetOpValue - Return binary encoding of the jump
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
+                     SmallVectorImpl<MCFixup> &Fixups) const {
+
+  const MCOperand &MO = MI.getOperand(OpNo);
+  assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions");
+
+  const MCExpr *Expr = MO.getExpr();
+  Fixups.push_back(MCFixup::Create(0, Expr,
+                                   MCFixupKind(Mips::fixup_Mips_26)));
+  return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    unsigned Reg = MO.getReg();
+    unsigned RegNo = getMipsRegisterNumbering(Reg);
+    return RegNo;
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  } else if (MO.isFPImm()) {
+    return static_cast<unsigned>(APFloat(MO.getFPImm())
+        .bitcastToAPInt().getHiBits(32).getLimitedValue());
+  } 
+
+  // MO must be an Expr.
+  assert(MO.isExpr());
+
+  const MCExpr *Expr = MO.getExpr();
+  MCExpr::ExprKind Kind = Expr->getKind();
+
+  if (Kind == MCExpr::Binary) {
+    Expr = static_cast<const MCBinaryExpr*>(Expr)->getLHS();
+    Kind = Expr->getKind();
+  }
+
+  assert (Kind == MCExpr::SymbolRef);
+    
+  Mips::Fixups FixupKind;
+
+  switch(cast<MCSymbolRefExpr>(Expr)->getKind()) {
+  case MCSymbolRefExpr::VK_Mips_GPREL:
+    FixupKind = Mips::fixup_Mips_GPREL16;
+    break;
+  case MCSymbolRefExpr::VK_Mips_GOT_CALL:
+    FixupKind = Mips::fixup_Mips_CALL16;
+    break;
+  case MCSymbolRefExpr::VK_Mips_GOT16:
+    FixupKind = Mips::fixup_Mips_GOT_Global;
+    break;
+  case MCSymbolRefExpr::VK_Mips_GOT:
+    FixupKind = Mips::fixup_Mips_GOT_Local;
+    break;
+  case MCSymbolRefExpr::VK_Mips_ABS_HI:
+    FixupKind = Mips::fixup_Mips_HI16;
+    break;
+  case MCSymbolRefExpr::VK_Mips_ABS_LO:
+    FixupKind = Mips::fixup_Mips_LO16;
+    break;
+  case MCSymbolRefExpr::VK_Mips_TLSGD:
+    FixupKind = Mips::fixup_Mips_TLSGD;
+    break;
+  case MCSymbolRefExpr::VK_Mips_TLSLDM:
+    FixupKind = Mips::fixup_Mips_TLSLDM;
+    break;
+  case MCSymbolRefExpr::VK_Mips_DTPREL_HI:
+    FixupKind = Mips::fixup_Mips_DTPREL_HI;
+    break;
+  case MCSymbolRefExpr::VK_Mips_DTPREL_LO:
+    FixupKind = Mips::fixup_Mips_DTPREL_LO;
+    break;
+  case MCSymbolRefExpr::VK_Mips_GOTTPREL:
+    FixupKind = Mips::fixup_Mips_GOTTPREL;
+    break;
+  case MCSymbolRefExpr::VK_Mips_TPREL_HI:
+    FixupKind = Mips::fixup_Mips_TPREL_HI;
+    break;
+  case MCSymbolRefExpr::VK_Mips_TPREL_LO:
+    FixupKind = Mips::fixup_Mips_TPREL_LO;
+    break;
+  default:
+    break;
+  } // switch
+
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(), MCFixupKind(FixupKind)));
+
+  // All of the information is in the fixup.
+  return 0;
+}
+
+/// getMemEncoding - Return binary encoding of memory related operand.
+/// If the offset operand requires relocation, record the relocation.
+unsigned
+MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo,
+                                  SmallVectorImpl<MCFixup> &Fixups) const {
+  // Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
+  assert(MI.getOperand(OpNo).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups) << 16;
+  unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups);
+
+  return (OffBits & 0xFFFF) | RegBits;
+}
+
+unsigned
+MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
+                                      SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpNo).isImm());
+  unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+  return SizeEncoding - 1;
+}
+
+// FIXME: should be called getMSBEncoding
+//
+unsigned
+MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
+                                      SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpNo-1).isImm());
+  assert(MI.getOperand(OpNo).isImm());
+  unsigned Position = getMachineOpValue(MI, MI.getOperand(OpNo-1), Fixups);
+  unsigned Size = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups);
+
+  return Position + Size - 1;
+}
+
+#include "MipsGenMCCodeEmitter.inc"
+
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 1f9e3ddf13c8..3c544f6aec90 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions ---------*- C++ -*-===//
+//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsMCTargetDesc.h"
 #include "MipsMCAsmInfo.h"
+#include "MipsMCTargetDesc.h"
 #include "InstPrinter/MipsInstPrinter.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/MC/MCCodeGenInfo.h"
@@ -20,6 +20,7 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #define GET_INSTRINFO_MC_DESC
@@ -63,19 +64,24 @@ static MCAsmInfo *createMipsMCAsmInfo(const Target &T, StringRef TT) {
 }
 
 static MCCodeGenInfo *createMipsMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                              CodeModel::Model CM) {
+                                              CodeModel::Model CM,
+                                              CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  if (RM == Reloc::Default)
+  if (CM == CodeModel::JITDefault)
+    RM = Reloc::Static;
+  else if (RM == Reloc::Default)
     RM = Reloc::PIC_;
-  X->InitMCCodeGenInfo(RM, CM);
+  X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
 static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
                                               unsigned SyntaxVariant,
                                               const MCAsmInfo &MAI,
+                                              const MCInstrInfo &MII,
+                                              const MCRegisterInfo &MRI,
                                               const MCSubtargetInfo &STI) {
-  return new MipsInstPrinter(MAI);
+  return new MipsInstPrinter(MAI, MII, MRI);
 }
 
 static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
@@ -110,7 +116,8 @@ extern "C" void LLVMInitializeMipsTargetMC() {
   TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
   TargetRegistry::RegisterMCInstrInfo(TheMipselTarget, createMipsMCInstrInfo);
   TargetRegistry::RegisterMCInstrInfo(TheMips64Target, createMipsMCInstrInfo);
-  TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget, createMipsMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(TheMips64elTarget,
+                                      createMipsMCInstrInfo);
 
   // Register the MC register info.
   TargetRegistry::RegisterMCRegInfo(TheMipsTarget, createMipsMCRegisterInfo);
@@ -120,25 +127,31 @@ extern "C" void LLVMInitializeMipsTargetMC() {
                                     createMipsMCRegisterInfo);
 
   // Register the MC Code Emitter
-  TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter);
+  TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget,
+                                        createMipsMCCodeEmitterEB);
   TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget,
-                                        createMipsMCCodeEmitter);
+                                        createMipsMCCodeEmitterEL);
   TargetRegistry::RegisterMCCodeEmitter(TheMips64Target,
-                                        createMipsMCCodeEmitter);
+                                        createMipsMCCodeEmitterEB);
   TargetRegistry::RegisterMCCodeEmitter(TheMips64elTarget,
-                                        createMipsMCCodeEmitter);
+                                        createMipsMCCodeEmitterEL);
 
   // Register the object streamer.
   TargetRegistry::RegisterMCObjectStreamer(TheMipsTarget, createMCStreamer);
   TargetRegistry::RegisterMCObjectStreamer(TheMipselTarget, createMCStreamer);
   TargetRegistry::RegisterMCObjectStreamer(TheMips64Target, createMCStreamer);
-  TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget, createMCStreamer);
+  TargetRegistry::RegisterMCObjectStreamer(TheMips64elTarget,
+                                           createMCStreamer);
 
   // Register the asm backend.
-  TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, createMipsAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheMipselTarget, createMipsAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheMips64Target, createMipsAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackend);
+  TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
+                                       createMipsAsmBackendEB32);
+  TargetRegistry::RegisterMCAsmBackend(TheMipselTarget,
+                                       createMipsAsmBackendEL32);
+  TargetRegistry::RegisterMCAsmBackend(TheMips64Target,
+                                       createMipsAsmBackendEB64);
+  TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget,
+                                       createMipsAsmBackendEL64);
 
   // Register the MC subtarget info.
   TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 7a0042ad889e..547ccddd78ea 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -14,25 +14,40 @@
 #ifndef MIPSMCTARGETDESC_H
 #define MIPSMCTARGETDESC_H
 
+#include "llvm/Support/DataTypes.h"
+
 namespace llvm {
 class MCAsmBackend;
-class MCInstrInfo;
 class MCCodeEmitter;
 class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
 class MCSubtargetInfo;
 class StringRef;
 class Target;
+class raw_ostream;
 
 extern Target TheMipsTarget;
 extern Target TheMipselTarget;
 extern Target TheMips64Target;
 extern Target TheMips64elTarget;
 
-MCCodeEmitter *createMipsMCCodeEmitter(const MCInstrInfo &MCII,
-                                       const MCSubtargetInfo &STI,
-                                       MCContext &Ctx);
+MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
+                                         const MCSubtargetInfo &STI,
+                                         MCContext &Ctx);
+MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
+                                         const MCSubtargetInfo &STI,
+                                         MCContext &Ctx);
+
+MCAsmBackend *createMipsAsmBackendEB32(const Target &T, StringRef TT);
+MCAsmBackend *createMipsAsmBackendEL32(const Target &T, StringRef TT);
+MCAsmBackend *createMipsAsmBackendEB64(const Target &T, StringRef TT);
+MCAsmBackend *createMipsAsmBackendEL64(const Target &T, StringRef TT);
 
-MCAsmBackend *createMipsAsmBackend(const Target &T, StringRef TT);
+MCObjectWriter *createMipsELFObjectWriter(raw_ostream &OS,
+                                          uint8_t OSABI,
+                                          bool IsLittleEndian,
+                                          bool Is64Bit);
 } // End llvm namespace
 
 // Defines symbolic names for Mips registers.  This defines a mapping from
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index d72693c0940d..168635c96beb 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -15,9 +15,9 @@ TARGET = Mips
 BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
                 MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \
                 MipsGenDAGISel.inc MipsGenCallingConv.inc \
-                MipsGenSubtargetInfo.inc
+                MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc
 
-DIRS = InstPrinter TargetInfo MCTargetDesc
+DIRS = InstPrinter AsmParser TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index bacecf20b920..bafadc8f25f6 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -21,8 +21,6 @@
 namespace llvm {
   class MipsTargetMachine;
   class FunctionPass;
-  class MachineCodeEmitter;
-  class formatted_raw_ostream;
 
   FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
   FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 39c2c164f664..cbebe84a1805 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -1,4 +1,4 @@
-//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===//
+//===-- Mips.td - Describe the Mips Target Machine ---------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -63,7 +63,7 @@ def FeatureMips32      : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
                                 [FeatureCondMov, FeatureBitCount]>;
 def FeatureMips32r2    : SubtargetFeature<"mips32r2", "MipsArchVersion",
                                 "Mips32r2", "Mips32r2 ISA Support",
-                                [FeatureMips32, FeatureSEInReg]>;
+                                [FeatureMips32, FeatureSEInReg, FeatureSwap]>;
 def FeatureMips64      : SubtargetFeature<"mips64", "MipsArchVersion",
                                 "Mips64", "Mips64 ISA Support",
                                 [FeatureGP64Bit, FeatureFP64Bit,
@@ -79,9 +79,9 @@ def FeatureMips64r2    : SubtargetFeature<"mips64r2", "MipsArchVersion",
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, MipsGenericItineraries, Features>;
 
-def : Proc<"mips32r1", [FeatureMips32]>;
-def : Proc<"4ke", [FeatureMips32r2]>;
-def : Proc<"mips64r1", [FeatureMips64]>;
+def : Proc<"mips32", [FeatureMips32]>;
+def : Proc<"mips32r2", [FeatureMips32r2]>;
+def : Proc<"mips64", [FeatureMips64]>;
 def : Proc<"mips64r2", [FeatureMips64r2]>;
 
 def MipsAsmWriter : AsmWriter {
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 9758f4bb8907..427e8d97ad9c 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -25,68 +25,48 @@ def uimm16_64      : Operand<i64> {
 
 // Transformation Function - get Imm - 32.
 def Subtract32 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() - 32);
+  return getImm(N, (unsigned)N->getZExtValue() - 32);
 }]>;
 
-// imm32_63 predicate - True if imm is in range [32, 63].
-def imm32_63 : ImmLeaf<i64,
-                       [{return (int32_t)Imm >= 32 && (int32_t)Imm < 64;}],
-                       Subtract32>;
+// shamt must fit in 6 bits.
+def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
 
 //===----------------------------------------------------------------------===//
 // Instructions specific format
 //===----------------------------------------------------------------------===//
 // Shifts
-class LogicR_shift_rotate_imm64<bits<6> func, bits<5> _rs, string instr_asm,
-                                SDNode OpNode, PatFrag PF>:
-  FR<0x00, func, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt, shamt_64:$shamt),
-     !strconcat(instr_asm, "\t$rd, $rt, $shamt"),
-     [(set CPU64Regs:$rd, (OpNode CPU64Regs:$rt, (i64 PF:$shamt)))],
-     IIAlu> {
-  let rs = _rs;
-}
-
-class LogicR_shift_rotate_reg64<bits<6> func, bits<5> _shamt, string instr_asm,
-                                SDNode OpNode>:
-  FR<0x00, func, (outs CPU64Regs:$rd), (ins CPU64Regs:$rs, CPU64Regs:$rt),
-     !strconcat(instr_asm, "\t$rd, $rt, $rs"),
-     [(set CPU64Regs:$rd, (OpNode CPU64Regs:$rt, CPU64Regs:$rs))], IIAlu> {
-  let shamt = _shamt;
-}
+// 64-bit shift instructions.
+class shift_rotate_imm64<bits<6> func, bits<5> isRotate, string instr_asm,
+                         SDNode OpNode>:
+  shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt6, shamt,
+                   CPU64Regs>;
 
 // Mul, Div
-let rd = 0, shamt = 0, Defs = [HI64, LO64] in {
-  let isCommutable = 1 in
-  class Mul64<bits<6> func, string instr_asm, InstrItinClass itin>:
-    FR<0x00, func, (outs), (ins CPU64Regs:$rs, CPU64Regs:$rt),
-       !strconcat(instr_asm, "\t$rs, $rt"), [], itin>;
-
-  class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
-              FR<0x00, func, (outs), (ins CPU64Regs:$rs, CPU64Regs:$rt),
-              !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
-              [(op CPU64Regs:$rs, CPU64Regs:$rt)], itin>;
+class Mult64<bits<6> func, string instr_asm, InstrItinClass itin>:
+  Mult<func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
+class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+  Div<op, func, instr_asm, itin, CPU64Regs, [HI64, LO64]>;
+
+multiclass Atomic2Ops64<PatFrag Op, string Opstr> {
+  def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>, Requires<[NotN64]>;
+  def _P8    : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>, Requires<[IsN64]>;
 }
 
-// Move from Hi/Lo
-let shamt = 0 in {
-let rs = 0, rt = 0 in
-class MoveFromLOHI64<bits<6> func, string instr_asm>:
-  FR<0x00, func, (outs CPU64Regs:$rd), (ins),
-     !strconcat(instr_asm, "\t$rd"), [], IIHiLo>;
-
-let rt = 0, rd = 0 in
-class MoveToLOHI64<bits<6> func, string instr_asm>:
-  FR<0x00, func, (outs), (ins CPU64Regs:$rs),
-     !strconcat(instr_asm, "\t$rs"), [], IIHiLo>;
+multiclass AtomicCmpSwap64<PatFrag Op, string Width>  {
+  def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>, Requires<[NotN64]>;
+  def _P8    : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>,
+               Requires<[IsN64]>;
 }
 
-// Count Leading Ones/Zeros in Word
-class CountLeading64<bits<6> func, string instr_asm, list<dag> pattern>:
-  FR<0x1c, func, (outs CPU64Regs:$rd), (ins CPU64Regs:$rs),
-     !strconcat(instr_asm, "\t$rd, $rs"), pattern, IIAlu>,
-     Requires<[HasBitCount]> {
-  let shamt = 0;
-  let rt = rd;
+let usesCustomInserter = 1, Predicates = [HasMips64] in {
+  defm ATOMIC_LOAD_ADD_I64  : Atomic2Ops64<atomic_load_add_64, "load_add_64">;
+  defm ATOMIC_LOAD_SUB_I64  : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">;
+  defm ATOMIC_LOAD_AND_I64  : Atomic2Ops64<atomic_load_and_64, "load_and_64">;
+  defm ATOMIC_LOAD_OR_I64   : Atomic2Ops64<atomic_load_or_64, "load_or_64">;
+  defm ATOMIC_LOAD_XOR_I64  : Atomic2Ops64<atomic_load_xor_64, "load_xor_64">;
+  defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64<atomic_load_nand_64, "load_nand_64">;
+  defm ATOMIC_SWAP_I64      : Atomic2Ops64<atomic_swap_64, "swap_64">;
+  defm ATOMIC_CMP_SWAP_I64  : AtomicCmpSwap64<atomic_cmp_swap_64, "64">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -101,6 +81,7 @@ def SLTi64   : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>;
 def SLTiu64  : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>;
 def ORi64    : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>;
 def XORi64   : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>;
+def LUi64    : LoadUpper<0x0f, "lui", CPU64Regs, uimm16_64>;
 
 /// Arithmetic Instructions (3-Operand, R-Type)
 def DADDu    : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>;
@@ -113,26 +94,21 @@ def XOR64    : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPU64Regs, 1>;
 def NOR64    : LogicNOR<0x00, 0x27, "nor", CPU64Regs>;
 
 /// Shift Instructions
-def DSLL     : LogicR_shift_rotate_imm64<0x38, 0x00, "dsll", shl, immZExt5>;
-def DSRL     : LogicR_shift_rotate_imm64<0x3a, 0x00, "dsrl", srl, immZExt5>;
-def DSRA     : LogicR_shift_rotate_imm64<0x3b, 0x00, "dsra", sra, immZExt5>;
-def DSLL32   : LogicR_shift_rotate_imm64<0x3c, 0x00, "dsll32", shl, imm32_63>;
-def DSRL32   : LogicR_shift_rotate_imm64<0x3e, 0x00, "dsrl32", srl, imm32_63>;
-def DSRA32   : LogicR_shift_rotate_imm64<0x3f, 0x00, "dsra32", sra, imm32_63>;
-def DSLLV    : LogicR_shift_rotate_reg64<0x24, 0x00, "dsllv", shl>;
-def DSRLV    : LogicR_shift_rotate_reg64<0x26, 0x00, "dsrlv", srl>;
-def DSRAV    : LogicR_shift_rotate_reg64<0x27, 0x00, "dsrav", sra>;
+def DSLL     : shift_rotate_imm64<0x38, 0x00, "dsll", shl>;
+def DSRL     : shift_rotate_imm64<0x3a, 0x00, "dsrl", srl>;
+def DSRA     : shift_rotate_imm64<0x3b, 0x00, "dsra", sra>;
+def DSLLV    : shift_rotate_reg<0x24, 0x00, "dsllv", shl, CPU64Regs>;
+def DSRLV    : shift_rotate_reg<0x26, 0x00, "dsrlv", srl, CPU64Regs>;
+def DSRAV    : shift_rotate_reg<0x27, 0x00, "dsrav", sra, CPU64Regs>;
 
 // Rotate Instructions
 let Predicates = [HasMips64r2] in {
-  def DROTR    : LogicR_shift_rotate_imm64<0x3a, 0x01, "drotr", rotr, immZExt5>;
-  def DROTR32  : LogicR_shift_rotate_imm64<0x3e, 0x01, "drotr32", rotr,
-                                           imm32_63>;
-  def DROTRV   : LogicR_shift_rotate_reg64<0x16, 0x01, "drotrv", rotr>;
+  def DROTR    : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>;
+  def DROTRV   : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>;
 }
 
 /// Load and Store Instructions
-///  aligned 
+///  aligned
 defm LB64    : LoadM64<0x20, "lb",  sextloadi8>;
 defm LBu64   : LoadM64<0x24, "lbu", zextloadi8>;
 defm LH64    : LoadM64<0x21, "lh",  sextloadi16_a>;
@@ -154,7 +130,14 @@ defm USW64     : StoreM64<0x2b, "usw", truncstorei32_u, 1>;
 defm ULD       : LoadM64<0x37, "uld",  load_u, 1>;
 defm USD       : StoreM64<0x3f, "usd", store_u, 1>;
 
+/// Load-linked, Store-conditional
+def LLD    : LLBase<0x34, "lld", CPU64Regs, mem>, Requires<[NotN64]>;
+def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, Requires<[IsN64]>;
+def SCD    : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>;
+def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]>;
+
 /// Jump and Branch Instructions
+def JR64   : JumpFR<0x00, 0x08, "jr", CPU64Regs>;
 def BEQ64  : CBranch<0x04, "beq", seteq, CPU64Regs>;
 def BNE64  : CBranch<0x05, "bne", setne, CPU64Regs>;
 def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>;
@@ -162,46 +145,104 @@ def BGTZ64 : CBranchZero<0x07, 0, "bgtz", setgt, CPU64Regs>;
 def BLEZ64 : CBranchZero<0x07, 0, "blez", setle, CPU64Regs>;
 def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>;
 
+def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>;
+
 /// Multiply and Divide Instructions.
-def DMULT    : Mul64<0x1c, "dmult", IIImul>;
-def DMULTu   : Mul64<0x1d, "dmultu", IIImul>;
+def DMULT    : Mult64<0x1c, "dmult", IIImul>;
+def DMULTu   : Mult64<0x1d, "dmultu", IIImul>;
 def DSDIV    : Div64<MipsDivRem, 0x1e, "ddiv", IIIdiv>;
 def DUDIV    : Div64<MipsDivRemU, 0x1f, "ddivu", IIIdiv>;
 
-let Defs = [HI64] in
-  def MTHI64  : MoveToLOHI64<0x11, "mthi">;
-let Defs = [LO64] in
-  def MTLO64  : MoveToLOHI64<0x13, "mtlo">;
+def MTHI64 : MoveToLOHI<0x11, "mthi", CPU64Regs, [HI64]>;
+def MTLO64 : MoveToLOHI<0x13, "mtlo", CPU64Regs, [LO64]>;
+def MFHI64 : MoveFromLOHI<0x10, "mfhi", CPU64Regs, [HI64]>;
+def MFLO64 : MoveFromLOHI<0x12, "mflo", CPU64Regs, [LO64]>;
 
-let Uses = [HI64] in
-  def MFHI64  : MoveFromLOHI64<0x10, "mfhi">;
-let Uses = [LO64] in
-  def MFLO64  : MoveFromLOHI64<0x12, "mflo">;
+/// Sign Ext In Register Instructions.
+def SEB64 : SignExtInReg<0x10, "seb", i8, CPU64Regs>;
+def SEH64 : SignExtInReg<0x18, "seh", i16, CPU64Regs>;
 
 /// Count Leading
-def DCLZ : CountLeading64<0x24, "dclz",
-                          [(set CPU64Regs:$rd, (ctlz CPU64Regs:$rs))]>;
-def DCLO : CountLeading64<0x25, "dclo",
-                          [(set CPU64Regs:$rd, (ctlz (not CPU64Regs:$rs)))]>;
+def DCLZ : CountLeading0<0x24, "dclz", CPU64Regs>;
+def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>;
+
+/// Double Word Swap Bytes/HalfWords
+def DSBH : SubwordSwap<0x24, 0x2, "dsbh", CPU64Regs>;
+def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>;
+
+def LEA_ADDiu64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
+
+let Uses = [SP_64] in
+def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
+                 Requires<[IsN64]>;
+
+def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
+
+def DEXT : ExtBase<3, "dext", CPU64Regs>;
+def DINS : InsBase<7, "dins", CPU64Regs>;
+
+def DSLL64_32 : FR<0x3c, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+                   "dsll\t$rd, $rt, 32", [], IIAlu>;
+
+def SLL64_32 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPURegs:$rt),
+                  "sll\t$rd, $rt, 0", [], IIAlu>;
+def SLL64_64 : FR<0x0, 0x00, (outs CPU64Regs:$rd), (ins CPU64Regs:$rt),
+                  "sll\t$rd, $rt, 0", [], IIAlu>;
 
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
 
-// Small immediates
-def : Pat<(i64 immSExt16:$in),
-          (DADDiu ZERO_64, imm:$in)>;
-def : Pat<(i64 immZExt16:$in),
-          (ORi64 ZERO_64, imm:$in)>;
-
-// zextloadi32_u
-def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>,
-      Requires<[IsN64]>;
-def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>,
-      Requires<[NotN64]>;
+// extended loads
+let Predicates = [NotN64] in {
+  def : Pat<(i64 (extloadi1  addr:$src)), (LB64 addr:$src)>;
+  def : Pat<(i64 (extloadi8  addr:$src)), (LB64 addr:$src)>;
+  def : Pat<(i64 (extloadi16_a addr:$src)), (LH64 addr:$src)>;
+  def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64 addr:$src)>;
+  def : Pat<(i64 (extloadi32_a addr:$src)), (LW64 addr:$src)>;
+  def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64 addr:$src)>;
+  def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>;
+}
+let Predicates = [IsN64] in {
+  def : Pat<(i64 (extloadi1  addr:$src)), (LB64_P8 addr:$src)>;
+  def : Pat<(i64 (extloadi8  addr:$src)), (LB64_P8 addr:$src)>;
+  def : Pat<(i64 (extloadi16_a addr:$src)), (LH64_P8 addr:$src)>;
+  def : Pat<(i64 (extloadi16_u addr:$src)), (ULH64_P8 addr:$src)>;
+  def : Pat<(i64 (extloadi32_a addr:$src)), (LW64_P8 addr:$src)>;
+  def : Pat<(i64 (extloadi32_u addr:$src)), (ULW64_P8 addr:$src)>;
+  def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>;
+}
 
 // hi/lo relocs
-def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>;
+def : Pat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
+def : Pat<(MipsHi tblockaddress:$in), (LUi64 tblockaddress:$in)>;
+def : Pat<(MipsHi tjumptable:$in), (LUi64 tjumptable:$in)>;
+def : Pat<(MipsHi tconstpool:$in), (LUi64 tconstpool:$in)>;
+def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi64 tglobaltlsaddr:$in)>;
+
+def : Pat<(MipsLo tglobaladdr:$in), (DADDiu ZERO_64, tglobaladdr:$in)>;
+def : Pat<(MipsLo tblockaddress:$in), (DADDiu ZERO_64, tblockaddress:$in)>;
+def : Pat<(MipsLo tjumptable:$in), (DADDiu ZERO_64, tjumptable:$in)>;
+def : Pat<(MipsLo tconstpool:$in), (DADDiu ZERO_64, tconstpool:$in)>;
+def : Pat<(MipsLo tglobaltlsaddr:$in), (DADDiu ZERO_64, tglobaltlsaddr:$in)>;
+
+def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaladdr:$lo)),
+          (DADDiu CPU64Regs:$hi, tglobaladdr:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tblockaddress:$lo)),
+          (DADDiu CPU64Regs:$hi, tblockaddress:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tjumptable:$lo)),
+          (DADDiu CPU64Regs:$hi, tjumptable:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tconstpool:$lo)),
+          (DADDiu CPU64Regs:$hi, tconstpool:$lo)>;
+def : Pat<(add CPU64Regs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+          (DADDiu CPU64Regs:$hi, tglobaltlsaddr:$lo)>;
+
+def : WrapperPat<tglobaladdr, DADDiu, CPU64Regs>;
+def : WrapperPat<tconstpool, DADDiu, CPU64Regs>;
+def : WrapperPat<texternalsym, DADDiu, CPU64Regs>;
+def : WrapperPat<tblockaddress, DADDiu, CPU64Regs>;
+def : WrapperPat<tjumptable, DADDiu, CPU64Regs>;
+def : WrapperPat<tglobaltlsaddr, DADDiu, CPU64Regs>;
 
 defm : BrcondPats<CPU64Regs, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
                   ZERO_64>;
@@ -212,3 +253,21 @@ defm : SetlePats<CPU64Regs, SLT64, SLTu64>;
 defm : SetgtPats<CPU64Regs, SLT64, SLTu64>;
 defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
 defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
+
+// select MipsDynAlloc
+def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>;
+
+// truncate
+def : Pat<(i32 (trunc CPU64Regs:$src)),
+          (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>;
+
+// 32-to-64-bit extension
+def : Pat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+def : Pat<(i64 (zext CPURegs:$src)), (DSRL (DSLL64_32 CPURegs:$src), 32)>;
+def : Pat<(i64 (sext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
+
+// Sign extend in register
+def : Pat<(i64 (sext_inreg CPU64Regs:$src, i32)), (SLL64_64 CPU64Regs:$src)>;
+
+// bswap pattern
+def : Pat<(bswap CPU64Regs:$rt), (DSHD (DSBH CPU64Regs:$rt))>;
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
new file mode 100644
index 000000000000..dc8fbd0d0370
--- /dev/null
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -0,0 +1,153 @@
+//===-- MipsAnalyzeImmediate.cpp - Analyze Immediates ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "MipsAnalyzeImmediate.h"
+#include "Mips.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+MipsAnalyzeImmediate::Inst::Inst(unsigned O, unsigned I) : Opc(O), ImmOpnd(I) {}
+
+// Add I to the instruction sequences.
+void MipsAnalyzeImmediate::AddInstr(InstSeqLs &SeqLs, const Inst &I) {
+  // Add an instruction seqeunce consisting of just I.
+  if (SeqLs.empty()) {
+    SeqLs.push_back(InstSeq(1, I));
+    return;
+  }
+
+  for (InstSeqLs::iterator Iter = SeqLs.begin(); Iter != SeqLs.end(); ++Iter)
+    Iter->push_back(I);
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize,
+                                             InstSeqLs &SeqLs) {
+  GetInstSeqLs((Imm + 0x8000ULL) & 0xffffffffffff0000ULL, RemSize, SeqLs);
+  AddInstr(SeqLs, Inst(ADDiu, Imm & 0xffffULL));
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLsORi(uint64_t Imm, unsigned RemSize,
+                                           InstSeqLs &SeqLs) {
+  GetInstSeqLs(Imm & 0xffffffffffff0000ULL, RemSize, SeqLs);
+  AddInstr(SeqLs, Inst(ORi, Imm & 0xffffULL));
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize,
+                                           InstSeqLs &SeqLs) {
+  unsigned Shamt = CountTrailingZeros_64(Imm);
+  GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs);
+  AddInstr(SeqLs, Inst(SLL, Shamt));
+}
+
+void MipsAnalyzeImmediate::GetInstSeqLs(uint64_t Imm, unsigned RemSize,
+                                        InstSeqLs &SeqLs) {
+  uint64_t MaskedImm = Imm & (0xffffffffffffffffULL >> (64 - Size));
+
+  // Do nothing if Imm is 0.
+  if (!MaskedImm)
+    return;
+
+  // A single ADDiu will do if RemSize <= 16.
+  if (RemSize <= 16) {
+    AddInstr(SeqLs, Inst(ADDiu, MaskedImm));
+    return;
+  }
+
+  // Shift if the lower 16-bit is cleared.
+  if (!(Imm & 0xffff)) {
+    GetInstSeqLsSLL(Imm, RemSize, SeqLs);
+    return;
+  }
+
+  GetInstSeqLsADDiu(Imm, RemSize, SeqLs);
+
+  // If bit 15 is cleared, it doesn't make a difference whether the last
+  // instruction is an ADDiu or ORi. In that case, do not call GetInstSeqLsORi.
+  if (Imm & 0x8000) {
+    InstSeqLs SeqLsORi;
+    GetInstSeqLsORi(Imm, RemSize, SeqLsORi);
+    SeqLs.insert(SeqLs.end(), SeqLsORi.begin(), SeqLsORi.end());
+  }
+}
+
+// Replace a ADDiu & SLL pair with a LUi.
+// e.g. the following two instructions
+//  ADDiu 0x0111
+//  SLL 18
+// are replaced with
+//  LUi 0x444
+void MipsAnalyzeImmediate::ReplaceADDiuSLLWithLUi(InstSeq &Seq) {
+  // Check if the first two instructions are ADDiu and SLL and the shift amount
+  // is at least 16.
+  if ((Seq.size() < 2) || (Seq[0].Opc != ADDiu) ||
+      (Seq[1].Opc != SLL) || (Seq[1].ImmOpnd < 16))
+    return;
+
+  // Sign-extend and shift operand of ADDiu and see if it still fits in 16-bit.
+  int64_t Imm = SignExtend64<16>(Seq[0].ImmOpnd);
+  int64_t ShiftedImm = Imm << (Seq[1].ImmOpnd - 16);
+
+  if (!isInt<16>(ShiftedImm))
+    return;
+
+  // Replace the first instruction and erase the second.
+  Seq[0].Opc = LUi;
+  Seq[0].ImmOpnd = (unsigned)(ShiftedImm & 0xffff);
+  Seq.erase(Seq.begin() + 1);
+}
+
+void MipsAnalyzeImmediate::GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts) {
+  InstSeqLs::iterator ShortestSeq = SeqLs.end();
+  // The length of an instruction sequence is at most 7.
+  unsigned ShortestLength = 8;
+
+  for (InstSeqLs::iterator S = SeqLs.begin(); S != SeqLs.end(); ++S) {
+    ReplaceADDiuSLLWithLUi(*S);
+    assert(S->size() <= 7);
+
+    if (S->size() < ShortestLength) {
+      ShortestSeq = S;
+      ShortestLength = S->size();
+    }
+  }
+
+  Insts.clear();
+  Insts.append(ShortestSeq->begin(), ShortestSeq->end());
+}
+
+const MipsAnalyzeImmediate::InstSeq
+&MipsAnalyzeImmediate::Analyze(uint64_t Imm, unsigned Size,
+                               bool LastInstrIsADDiu) {
+  this->Size = Size;
+
+  if (Size == 32) {
+    ADDiu = Mips::ADDiu;
+    ORi = Mips::ORi;
+    SLL = Mips::SLL;
+    LUi = Mips::LUi;
+  } else {
+    ADDiu = Mips::DADDiu;
+    ORi = Mips::ORi64;
+    SLL = Mips::DSLL;
+    LUi = Mips::LUi64;
+  }
+
+  InstSeqLs SeqLs;
+
+  // Get the list of instruction sequences.
+  if (LastInstrIsADDiu | !Imm)
+    GetInstSeqLsADDiu(Imm, Size, SeqLs);
+  else
+    GetInstSeqLs(Imm, Size, SeqLs);
+
+  // Set Insts to the shortest instruction sequence.
+  GetShortestSeq(SeqLs, Insts);
+
+  return Insts;
+}
diff --git a/lib/Target/Mips/MipsAnalyzeImmediate.h b/lib/Target/Mips/MipsAnalyzeImmediate.h
new file mode 100644
index 000000000000..a094ddae45de
--- /dev/null
+++ b/lib/Target/Mips/MipsAnalyzeImmediate.h
@@ -0,0 +1,63 @@
+//===-- MipsAnalyzeImmediate.h - Analyze Immediates ------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MIPS_ANALYZE_IMMEDIATE_H
+#define MIPS_ANALYZE_IMMEDIATE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+  class MipsAnalyzeImmediate {
+  public:
+    struct Inst {
+      unsigned Opc, ImmOpnd;
+      Inst(unsigned Opc, unsigned ImmOpnd);
+    };
+    typedef SmallVector<Inst, 7 > InstSeq;
+
+    /// Analyze - Get an instrucion sequence to load immediate Imm. The last
+    /// instruction in the sequence must be an ADDiu if LastInstrIsADDiu is
+    /// true;
+    const InstSeq &Analyze(uint64_t Imm, unsigned Size, bool LastInstrIsADDiu);
+  private:
+    typedef SmallVector<InstSeq, 5> InstSeqLs;
+
+    /// AddInstr - Add I to all instruction sequences in SeqLs.
+    void AddInstr(InstSeqLs &SeqLs, const Inst &I);
+
+    /// GetInstSeqLsADDiu - Get instrucion sequences which end with an ADDiu to
+    /// load immediate Imm
+    void GetInstSeqLsADDiu(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+    /// GetInstSeqLsORi - Get instrucion sequences which end with an ORi to
+    /// load immediate Imm
+    void GetInstSeqLsORi(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+    /// GetInstSeqLsSLL - Get instrucion sequences which end with a SLL to
+    /// load immediate Imm
+    void GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+    /// GetInstSeqLs - Get instrucion sequences to load immediate Imm.
+    void GetInstSeqLs(uint64_t Imm, unsigned RemSize, InstSeqLs &SeqLs);
+
+    /// ReplaceADDiuSLLWithLUi - Replace an ADDiu & SLL pair with a LUi.
+    void ReplaceADDiuSLLWithLUi(InstSeq &Seq);
+
+    /// GetShortestSeq - Find the shortest instruction sequence in SeqLs and
+    /// return it in Insts.
+    void GetShortestSeq(InstSeqLs &SeqLs, InstSeq &Insts);
+
+    unsigned Size;
+    unsigned ADDiu, ORi, SLL, LUi;
+    InstSeq Insts;
+  };
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 0e826812d076..8206cfc15704 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
+//===-- MipsAsmPrinter.cpp - Mips LLVM Assembly Printer -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,10 +16,12 @@
 #include "MipsAsmPrinter.h"
 #include "Mips.h"
 #include "MipsInstrInfo.h"
-#include "MipsMachineFunction.h"
-#include "MipsMCInstLower.h"
-#include "MipsMCSymbolRefExpr.h"
 #include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -27,55 +29,125 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/Instructions.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Analysis/DebugInfo.h"
 
 using namespace llvm;
 
-static bool isUnalignedLoadStore(unsigned Opc) {
-  return Opc == Mips::ULW    || Opc == Mips::ULH    || Opc == Mips::ULHu ||
-         Opc == Mips::USW    || Opc == Mips::USH    ||
-         Opc == Mips::ULW_P8 || Opc == Mips::ULH_P8 || Opc == Mips::ULHu_P8 ||
-         Opc == Mips::USW_P8 || Opc == Mips::USH_P8;
+void MipsAsmPrinter::EmitInstrWithMacroNoAT(const MachineInstr *MI) {
+  MCInst TmpInst;
+
+  MCInstLowering.Lower(MI, TmpInst);
+  OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+  if (MipsFI->getEmitNOAT())
+    OutStreamer.EmitRawText(StringRef("\t.set\tat"));
+  OutStreamer.EmitInstruction(TmpInst);
+  if (MipsFI->getEmitNOAT())
+    OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+  OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
+}
+
+bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  MipsFI = MF.getInfo<MipsFunctionInfo>();
+  AsmPrinter::runOnMachineFunction(MF);
+  return true;
 }
 
 void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  SmallString<128> Str;
-  raw_svector_ostream OS(Str);
-
   if (MI->isDebugValue()) {
+    SmallString<128> Str;
+    raw_svector_ostream OS(Str);
+
     PrintDebugValueComment(MI, OS);
     return;
   }
 
-  MipsMCInstLower MCInstLowering(Mang, *MF, *this);
   unsigned Opc = MI->getOpcode();
   MCInst TmpInst0;
-  MCInstLowering.Lower(MI, TmpInst0);
-  
-  // Enclose unaligned load or store with .macro & .nomacro directives.
-  if (isUnalignedLoadStore(Opc)) {
-    MCInst Directive;
-    Directive.setOpcode(Mips::MACRO);
-    OutStreamer.EmitInstruction(Directive);
-    OutStreamer.EmitInstruction(TmpInst0);
-    Directive.setOpcode(Mips::NOMACRO);
-    OutStreamer.EmitInstruction(Directive);
+  SmallVector<MCInst, 4> MCInsts;
+
+  switch (Opc) {
+  case Mips::ULW:
+  case Mips::ULH:
+  case Mips::ULHu:
+  case Mips::USW:
+  case Mips::USH:
+  case Mips::ULW_P8:
+  case Mips::ULH_P8:
+  case Mips::ULHu_P8:
+  case Mips::USW_P8:
+  case Mips::USH_P8:
+  case Mips::ULD:
+  case Mips::ULW64:
+  case Mips::ULH64:
+  case Mips::ULHu64:
+  case Mips::USD:
+  case Mips::USW64:
+  case Mips::USH64:
+  case Mips::ULD_P8:
+  case Mips::ULW64_P8:
+  case Mips::ULH64_P8:
+  case Mips::ULHu64_P8:
+  case Mips::USD_P8:
+  case Mips::USW64_P8:
+  case Mips::USH64_P8: {
+    if (OutStreamer.hasRawTextSupport()) {
+      EmitInstrWithMacroNoAT(MI);
+      return;
+    }
+
+    MCInstLowering.LowerUnalignedLoadStore(MI, MCInsts);
+    for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin(); I
+           != MCInsts.end(); ++I)
+      OutStreamer.EmitInstruction(*I);
+
     return;
   }
+  case Mips::CPRESTORE: {
+    const MachineOperand &MO = MI->getOperand(0);
+    assert(MO.isImm() && "CPRESTORE's operand must be an immediate.");
+    int64_t Offset = MO.getImm();
+
+    if (OutStreamer.hasRawTextSupport()) {
+      if (!isInt<16>(Offset)) {
+        EmitInstrWithMacroNoAT(MI);
+        return;
+      }
+    } else {
+      MCInstLowering.LowerCPRESTORE(Offset, MCInsts);
+
+      for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
+           I != MCInsts.end(); ++I)
+        OutStreamer.EmitInstruction(*I);
 
+      return;
+    }
+
+    break;
+  }
+  case Mips::SETGP01: {
+    MCInstLowering.LowerSETGP01(MI, MCInsts);
+
+    for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
+         I != MCInsts.end(); ++I)
+      OutStreamer.EmitInstruction(*I);
+
+    return;
+  }
+  default:
+    break;
+  }
+
+  MCInstLowering.Lower(MI, TmpInst0);
   OutStreamer.EmitInstruction(TmpInst0);
 }
 
@@ -138,7 +210,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
     if (Mips::CPURegsRegisterClass->contains(Reg))
       break;
 
-    unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+    unsigned RegNum = getMipsRegisterNumbering(Reg);
     if (Mips::AFGR64RegisterClass->contains(Reg)) {
       FPUBitmask |= (3 << RegNum);
       CSFPRegsSize += AFGR64RegSize;
@@ -153,7 +225,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
   // Set CPU Bitmask.
   for (; i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
-    unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+    unsigned RegNum = getMipsRegisterNumbering(Reg);
     CPUBitmask |= (1 << RegNum);
   }
 
@@ -177,7 +249,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
 void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
   O << "0x";
   for (int i = 7; i >= 0; i--)
-    O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+    O.write_hex((Value & (0xF << (i*4))) >> (i*4));
 }
 
 //===----------------------------------------------------------------------===//
@@ -192,10 +264,11 @@ void MipsAsmPrinter::emitFrameDirective() {
   unsigned returnReg = RI.getRARegister();
   unsigned stackSize = MF->getFrameInfo()->getStackSize();
 
-  OutStreamer.EmitRawText("\t.frame\t$" +
-           Twine(LowercaseString(MipsInstPrinter::getRegisterName(stackReg))) +
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.frame\t$" +
+           StringRef(MipsInstPrinter::getRegisterName(stackReg)).lower() +
            "," + Twine(stackSize) + ",$" +
-           Twine(LowercaseString(MipsInstPrinter::getRegisterName(returnReg))));
+           StringRef(MipsInstPrinter::getRegisterName(returnReg)).lower());
 }
 
 /// Emit Set directives.
@@ -205,27 +278,49 @@ const char *MipsAsmPrinter::getCurrentABIString() const {
   case MipsSubtarget::N32:  return "abiN32";
   case MipsSubtarget::N64:  return "abi64";
   case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
-  default: break;
+  default: llvm_unreachable("Unknown Mips ABI");;
   }
-
-  llvm_unreachable("Unknown Mips ABI");
-  return NULL;
 }
 
 void MipsAsmPrinter::EmitFunctionEntryLabel() {
-  OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
   OutStreamer.EmitLabel(CurrentFnSym);
 }
 
 /// EmitFunctionBodyStart - Targets can override this to emit stuff before
 /// the first basic block in the function.
 void MipsAsmPrinter::EmitFunctionBodyStart() {
+  MCInstLowering.Initialize(Mang, &MF->getContext());
+
   emitFrameDirective();
 
-  SmallString<128> Str;
-  raw_svector_ostream OS(Str);
-  printSavedRegsBitmask(OS);
-  OutStreamer.EmitRawText(OS.str());
+  bool EmitCPLoad = (MF->getTarget().getRelocationModel() == Reloc::PIC_) &&
+    Subtarget->isABI_O32() && MipsFI->globalBaseRegSet() &&
+    MipsFI->globalBaseRegFixed();
+
+  if (OutStreamer.hasRawTextSupport()) {
+    SmallString<128> Str;
+    raw_svector_ostream OS(Str);
+    printSavedRegsBitmask(OS);
+    OutStreamer.EmitRawText(OS.str());
+
+    OutStreamer.EmitRawText(StringRef("\t.set\tnoreorder"));
+
+    // Emit .cpload directive if needed.
+    if (EmitCPLoad)
+      OutStreamer.EmitRawText(StringRef("\t.cpload\t$25"));
+
+    OutStreamer.EmitRawText(StringRef("\t.set\tnomacro"));
+    if (MipsFI->getEmitNOAT())
+      OutStreamer.EmitRawText(StringRef("\t.set\tnoat"));
+  } else if (EmitCPLoad) {
+    SmallVector<MCInst, 4> MCInsts;
+    MCInstLowering.LowerCPLOAD(MCInsts);
+    for (SmallVector<MCInst, 4>::iterator I = MCInsts.begin();
+         I != MCInsts.end(); ++I)
+      OutStreamer.EmitInstruction(*I);
+  }
 }
 
 /// EmitFunctionBodyEnd - Targets can override this to emit stuff after
@@ -234,11 +329,15 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
   // There are instruction for this macros, but they must
   // always be at the function end, and we can't emit and
   // break with BB logic.
-  OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
-  OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
-  OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
-}
+  if (OutStreamer.hasRawTextSupport()) {
+    if (MipsFI->getEmitNOAT())
+      OutStreamer.EmitRawText(StringRef("\t.set\tat"));
 
+    OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+    OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+    OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+  }
+}
 
 /// isBlockOnlyReachableByFallthough - Return true if the basic block has
 /// exactly one predecessor and the control transfer mechanism between
@@ -262,24 +361,24 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
   // If there isn't exactly one predecessor, it can't be a fall through.
   MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
   ++PI2;
- 
+
   if (PI2 != MBB->pred_end())
-    return false;  
+    return false;
 
   // The predecessor has to be immediately before this block.
   if (!Pred->isLayoutSuccessor(MBB))
     return false;
-   
+
   // If the block is completely empty, then it definitely does fall through.
   if (Pred->empty())
     return true;
-  
+
   // Otherwise, check the last instruction.
   // Check if the last terminator is an unconditional branch.
   MachineBasicBlock::const_iterator I = Pred->end();
-  while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ;
+  while (I != Pred->begin() && !(--I)->isTerminator()) ;
 
-  return !I->getDesc().isBarrier();
+  return !I->isBarrier();
 }
 
 // Print out an operand for an inline asm expression.
@@ -300,7 +399,7 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
                                            raw_ostream &O) {
   if (ExtraCode && ExtraCode[0])
      return true; // Unknown modifier.
-   
+
   const MachineOperand &MO = MI->getOperand(OpNum);
   assert(MO.isReg() && "unexpected inline asm memory operand");
   O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
@@ -335,7 +434,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   switch (MO.getType()) {
     case MachineOperand::MO_Register:
       O << '$'
-        << LowercaseString(MipsInstPrinter::getRegisterName(MO.getReg()));
+        << StringRef(MipsInstPrinter::getRegisterName(MO.getReg())).lower();
       break;
 
     case MachineOperand::MO_Immediate:
@@ -420,18 +519,23 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // FIXME: Use SwitchSection.
 
   // Tell the assembler which ABI we are using
-  OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString()));
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.section .mdebug." +
+                            Twine(getCurrentABIString()));
 
   // TODO: handle O64 ABI
-  if (Subtarget->isABI_EABI()) {
-    if (Subtarget->isGP32bit())
-      OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
-    else
-      OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
+  if (OutStreamer.hasRawTextSupport()) {
+    if (Subtarget->isABI_EABI()) {
+      if (Subtarget->isGP32bit())
+        OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
+      else
+        OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
+    }
   }
 
   // return to previous section
-  OutStreamer.EmitRawText(StringRef("\t.previous"));
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText(StringRef("\t.previous"));
 }
 
 MachineLocation
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 16461ff1fbb0..562bf9ce0092 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- MipsAsmPrinter.h - Mips LLVM assembly writer ----------------------===//
+//===-- MipsAsmPrinter.h - Mips LLVM Assembly Printer ----------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,6 +14,8 @@
 #ifndef MIPSASMPRINTER_H
 #define MIPSASMPRINTER_H
 
+#include "MipsMachineFunction.h"
+#include "MipsMCInstLower.h"
 #include "MipsSubtarget.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/Support/Compiler.h"
@@ -22,16 +24,22 @@
 namespace llvm {
 class MCStreamer;
 class MachineInstr;
-class raw_ostream;
 class MachineBasicBlock;
 class Module;
+class raw_ostream;
 
 class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
-  const MipsSubtarget *Subtarget;
-  
+
+  void EmitInstrWithMacroNoAT(const MachineInstr *MI);
+
 public:
+
+  const MipsSubtarget *Subtarget;
+  const MipsFunctionInfo *MipsFI;
+  MipsMCInstLower MCInstLowering;
+
   explicit MipsAsmPrinter(TargetMachine &TM,  MCStreamer &Streamer)
-    : AsmPrinter(TM, Streamer) {
+    : AsmPrinter(TM, Streamer), MCInstLowering(*this) {
     Subtarget = &TM.getSubtarget<MipsSubtarget>();
   }
 
@@ -39,6 +47,8 @@ public:
     return "Mips Assembly Printer";
   }
 
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
   void EmitInstruction(const MachineInstr *MI);
   void printSavedRegsBitmask(raw_ostream &O);
   void printHex32(unsigned int Value, raw_ostream &O);
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 0ae4ef6fbad4..4b7e1d37662c 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -1,4 +1,4 @@
-//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
+//===-- MipsCallingConv.td - Calling Conventions for Mips --*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -35,12 +35,18 @@ def RetCC_MipsO32 : CallingConv<[
 //===----------------------------------------------------------------------===//
 
 def CC_MipsN : CallingConv<[
-  // FIXME: Handle byval, complex and float double parameters.
+   // Handles byval parameters.
+  CCIfByVal<CCCustom<"CC_Mips64Byval">>,
 
-  // Promote i8/i16/i32 arguments to i64.
-  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
 
   // Integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToRegWithShadow<[A0, A1, A2, A3,
+                                           T0, T1, T2, T3],
+                                          [F12, F13, F14, F15,
+                                           F16, F17, F18, F19]>>,
+
   CCIfType<[i64], CCAssignToRegWithShadow<[A0_64, A1_64, A2_64, A3_64,
                                            T0_64, T1_64, T2_64, T3_64],
                                           [D12_64, D13_64, D14_64, D15_64,
@@ -59,13 +65,30 @@ def CC_MipsN : CallingConv<[
                                            T0_64, T1_64, T2_64, T3_64]>>,
 
   // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
-  CCIfType<[i64, f64], CCAssignToStack<8, 8>>,
-  CCIfType<[f32], CCAssignToStack<4, 8>>
+  CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+  CCIfType<[i64, f64], CCAssignToStack<8, 8>>
 ]>;
 
-def RetCC_MipsN : CallingConv<[
-  // FIXME: Handle complex and float double return values.
+// N32/64 variable arguments.
+// All arguments are passed in integer registers.
+def CC_MipsN_VarArg : CallingConv<[
+   // Handles byval parameters.
+  CCIfByVal<CCCustom<"CC_Mips64Byval">>,
+
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  CCIfType<[i32, f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+
+  CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64,
+                                      T0_64, T1_64, T2_64, T3_64]>>,
+
+  // All stack parameter slots become 64-bit doublewords and are 8-byte aligned.
+  CCIfType<[i32, f32], CCAssignToStack<4, 8>>,
+  CCIfType<[i64, f64], CCAssignToStack<8, 8>>
+]>;
 
+def RetCC_MipsN : CallingConv<[
   // i32 are returned in registers V0, V1
   CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
 
@@ -137,3 +160,20 @@ def RetCC_Mips : CallingConv<[
   CCIfSubtarget<"isABI_N64()", CCDelegateTo<RetCC_MipsN>>,
   CCDelegateTo<RetCC_MipsO32>
 ]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP,
+                                               (sequence "S%u", 7, 0))>;
+
+def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
+                                   (sequence "S%u", 7, 0))>;
+
+def CSR_N32 : CalleeSavedRegs<(add D31_64, D29_64, D27_64, D25_64, D24_64,
+                                   D23_64, D22_64, D21_64, RA_64, FP_64, GP_64,
+                                   (sequence "S%u_64", 7, 0))>;
+
+def CSR_N64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, FP_64,
+                                   GP_64, (sequence "S%u_64", 7, 0))>;
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 23fabe315cf5..7d819026da96 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- Mips/MipsCodeEmitter.cpp - Convert Mips code to machine code -----===//
+//===-- Mips/MipsCodeEmitter.cpp - Convert Mips Code to Machine Code ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,18 +18,20 @@
 #include "MipsRelocations.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/PassManager.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -37,8 +39,6 @@
 #include <iomanip>
 #endif
 
-#include "llvm/CodeGen/MachineOperand.h"
-
 using namespace llvm;
 
 STATISTIC(NumEmitted, "Number of machine instructions emitted");
@@ -66,9 +66,9 @@ class MipsCodeEmitter : public MachineFunctionPass {
   public:
     MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) :
       MachineFunctionPass(ID), JTI(0),
-        II((const MipsInstrInfo *) tm.getInstrInfo()),
-        TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
-        IsPIC(TM.getRelocationModel() == Reloc::PIC_) {
+      II((const MipsInstrInfo *) tm.getInstrInfo()),
+      TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
+      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {
     }
 
     bool runOnMachineFunction(MachineFunction &MF);
@@ -80,7 +80,7 @@ class MipsCodeEmitter : public MachineFunctionPass {
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
     /// machine instructions.
-    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+    uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
 
     void emitInstruction(const MachineInstr &MI);
 
@@ -91,7 +91,7 @@ class MipsCodeEmitter : public MachineFunctionPass {
     /// Routines that handle operands which add machine relocations which are
     /// fixed up by the relocation stage.
     void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
-                                bool MayNeedFarStub) const;
+                           bool MayNeedFarStub) const;
     void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
     void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
     void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
@@ -105,9 +105,22 @@ class MipsCodeEmitter : public MachineFunctionPass {
     unsigned getRelocation(const MachineInstr &MI,
                            const MachineOperand &MO) const;
 
+    unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
+
+    unsigned getBranchTargetOpValue(const MachineInstr &MI,
+                                    unsigned OpNo) const;
     unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+    int emitULW(const MachineInstr &MI);
+    int emitUSW(const MachineInstr &MI);
+    int emitULH(const MachineInstr &MI);
+    int emitULHu(const MachineInstr &MI);
+    int emitUSH(const MachineInstr &MI);
+
+    void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
+                                    int Offset) const;
   };
 }
 
@@ -132,7 +145,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
     for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
         MBB != E; ++MBB){
       MCE.StartMachineBasicBlock(MBB);
-      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
           I != E; ++I)
         emitInstruction(*I);
     }
@@ -149,30 +162,50 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI,
   if (Form == MipsII::FrmJ)
     return Mips::reloc_mips_26;
   if ((Form == MipsII::FrmI || Form == MipsII::FrmFI)
-       && MI.getDesc().isBranch())
-    return Mips::reloc_mips_branch;
+       && MI.isBranch())
+    return Mips::reloc_mips_pc16;
   if (Form == MipsII::FrmI && MI.getOpcode() == Mips::LUi)
     return Mips::reloc_mips_hi;
   return Mips::reloc_mips_lo;
 }
 
+unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI,
+                                               unsigned OpNo) const {
+  MachineOperand MO = MI.getOperand(OpNo);
+  if (MO.isGlobal())
+    emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
+  else if (MO.isSymbol())
+    emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
+  else if (MO.isMBB())
+    emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+  else
+    llvm_unreachable("Unexpected jump target operand kind.");
+  return 0;
+}
+
+unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI,
+                                                 unsigned OpNo) const {
+  MachineOperand MO = MI.getOperand(OpNo);
+  emitMachineBasicBlock(MO.getMBB(), getRelocation(MI, MO));
+  return 0;
+}
+
 unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI,
-                                          unsigned OpNo) const {
+                                         unsigned OpNo) const {
   // Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
   assert(MI.getOperand(OpNo).isReg());
   unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo)) << 16;
-  return
-    (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits;
+  return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits;
 }
 
 unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI,
-                                          unsigned OpNo) const {
+                                             unsigned OpNo) const {
   // size is encoded as size-1.
   return getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
 }
 
 unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
-                                          unsigned OpNo) const {
+                                             unsigned OpNo) const {
   // size is encoded as pos+size-1.
   return getMachineOpValue(MI, MI.getOperand(OpNo-1)) +
          getMachineOpValue(MI, MI.getOperand(OpNo)) - 1;
@@ -181,14 +214,20 @@ unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
 /// getMachineOpValue - Return binary encoding of operand. If the machine
 /// operand requires relocation, record the relocation and return zero.
 unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
-                                           const MachineOperand &MO) const {
+                                            const MachineOperand &MO) const {
   if (MO.isReg())
-    return MipsRegisterInfo::getRegisterNumbering(MO.getReg());
+    return getMipsRegisterNumbering(MO.getReg());
   else if (MO.isImm())
     return static_cast<unsigned>(MO.getImm());
-  else if (MO.isGlobal())
-    emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
-  else if (MO.isSymbol())
+  else if (MO.isGlobal()) {
+    if (MI.getOpcode() == Mips::ULW || MI.getOpcode() == Mips::USW ||
+          MI.getOpcode() == Mips::ULH || MI.getOpcode() == Mips::ULHu)
+      emitGlobalAddressUnaligned(MO.getGlobal(), getRelocation(MI, MO), 4);
+    else if (MI.getOpcode() == Mips::USH)
+      emitGlobalAddressUnaligned(MO.getGlobal(), getRelocation(MI, MO), 8);
+    else
+      emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true);
+  } else if (MO.isSymbol())
     emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO));
   else if (MO.isCPI())
     emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO));
@@ -202,9 +241,18 @@ unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
 }
 
 void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
-                                                bool MayNeedFarStub) const {
+                                        bool MayNeedFarStub) const {
   MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
-                             const_cast<GlobalValue *>(GV), 0, MayNeedFarStub));
+                                             const_cast<GlobalValue *>(GV), 0,
+                                             MayNeedFarStub));
+}
+
+void MipsCodeEmitter::emitGlobalAddressUnaligned(const GlobalValue *GV,
+                                           unsigned Reloc, int Offset) const {
+  MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+                             const_cast<GlobalValue *>(GV), 0, false));
+  MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset() + Offset,
+                      Reloc, const_cast<GlobalValue *>(GV), 0, false));
 }
 
 void MipsCodeEmitter::
@@ -225,11 +273,108 @@ emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
 }
 
 void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
-                                           unsigned Reloc) const {
+                                            unsigned Reloc) const {
   MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
                                              Reloc, BB));
 }
 
+int MipsCodeEmitter::emitUSW(const MachineInstr &MI) {
+  unsigned src = getMachineOpValue(MI, MI.getOperand(0));
+  unsigned base = getMachineOpValue(MI, MI.getOperand(1));
+  unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
+  // swr src, offset(base)
+  // swl src, offset+3(base)
+  MCE.emitWordLE(
+    (0x2e << 26) | (base << 21) | (src << 16) | (offset & 0xffff));
+  MCE.emitWordLE(
+    (0x2a << 26) | (base << 21) | (src << 16) | ((offset+3) & 0xffff));
+  return 2;
+}
+
+int MipsCodeEmitter::emitULW(const MachineInstr &MI) {
+  unsigned dst = getMachineOpValue(MI, MI.getOperand(0));
+  unsigned base = getMachineOpValue(MI, MI.getOperand(1));
+  unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
+  unsigned at = 1;
+  if (dst != base) {
+    // lwr dst, offset(base)
+    // lwl dst, offset+3(base)
+    MCE.emitWordLE(
+      (0x26 << 26) | (base << 21) | (dst << 16) | (offset & 0xffff));
+    MCE.emitWordLE(
+      (0x22 << 26) | (base << 21) | (dst << 16) | ((offset+3) & 0xffff));
+    return 2;
+  } else {
+    // lwr at, offset(base)
+    // lwl at, offset+3(base)
+    // addu dst, at, $zero
+    MCE.emitWordLE(
+      (0x26 << 26) | (base << 21) | (at << 16) | (offset & 0xffff));
+    MCE.emitWordLE(
+      (0x22 << 26) | (base << 21) | (at << 16) | ((offset+3) & 0xffff));
+    MCE.emitWordLE(
+      (0x0 << 26) | (at << 21) | (0x0 << 16) | (dst << 11) | (0x0 << 6) | 0x21);
+    return 3;
+  }
+}
+
+int MipsCodeEmitter::emitUSH(const MachineInstr &MI) {
+  unsigned src = getMachineOpValue(MI, MI.getOperand(0));
+  unsigned base = getMachineOpValue(MI, MI.getOperand(1));
+  unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
+  unsigned at = 1;
+  // sb src, offset(base)
+  // srl at,src,8
+  // sb at, offset+1(base)
+  MCE.emitWordLE(
+    (0x28 << 26) | (base << 21) | (src << 16) | (offset & 0xffff));
+  MCE.emitWordLE(
+    (0x0 << 26) | (0x0 << 21) | (src << 16) | (at << 11) | (0x8 << 6) | 0x2);
+  MCE.emitWordLE(
+    (0x28 << 26) | (base << 21) | (at << 16) | ((offset+1) & 0xffff));
+  return 3;
+}
+
+int MipsCodeEmitter::emitULH(const MachineInstr &MI) {
+  unsigned dst = getMachineOpValue(MI, MI.getOperand(0));
+  unsigned base = getMachineOpValue(MI, MI.getOperand(1));
+  unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
+  unsigned at = 1;
+  // lbu at, offset(base)
+  // lb dst, offset+1(base)
+  // sll dst,dst,8
+  // or dst,dst,at
+  MCE.emitWordLE(
+    (0x24 << 26) | (base << 21) | (at << 16) | (offset & 0xffff));
+  MCE.emitWordLE(
+    (0x20 << 26) | (base << 21) | (dst << 16) | ((offset+1) & 0xffff));
+  MCE.emitWordLE(
+    (0x0 << 26) | (0x0 << 21) | (dst << 16) | (dst << 11) | (0x8 << 6) | 0x0);
+  MCE.emitWordLE(
+    (0x0 << 26) | (dst << 21) | (at << 16) | (dst << 11) | (0x0 << 6) | 0x25);
+  return 4;
+}
+
+int MipsCodeEmitter::emitULHu(const MachineInstr &MI) {
+  unsigned dst = getMachineOpValue(MI, MI.getOperand(0));
+  unsigned base = getMachineOpValue(MI, MI.getOperand(1));
+  unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
+  unsigned at = 1;
+  // lbu at, offset(base)
+  // lbu dst, offset+1(base)
+  // sll dst,dst,8
+  // or dst,dst,at
+  MCE.emitWordLE(
+    (0x24 << 26) | (base << 21) | (at << 16) | (offset & 0xffff));
+  MCE.emitWordLE(
+    (0x24 << 26) | (base << 21) | (dst << 16) | ((offset+1) & 0xffff));
+  MCE.emitWordLE(
+    (0x0 << 26) | (0x0 << 21) | (dst << 16) | (dst << 11) | (0x8 << 6) | 0x0);
+  MCE.emitWordLE(
+    (0x0 << 26) | (dst << 21) | (at << 16) | (dst << 11) | (0x0 << 6) | 0x25);
+  return 4;
+}
+
 void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
   DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
 
@@ -239,11 +384,27 @@ void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
   if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo)
     return;
 
-  ++NumEmitted;  // Keep track of the # of mi's emitted
 
   switch (MI.getOpcode()) {
+  case Mips::USW:
+    NumEmitted += emitUSW(MI);
+    break;
+  case Mips::ULW:
+    NumEmitted += emitULW(MI);
+    break;
+  case Mips::ULH:
+    NumEmitted += emitULH(MI);
+    break;
+  case Mips::ULHu:
+    NumEmitted += emitULHu(MI);
+    break;
+  case Mips::USH:
+    NumEmitted += emitUSH(MI);
+    break;
+
   default:
     emitWordLE(getBinaryCodeForInstr(MI));
+    ++NumEmitted;  // Keep track of the # of mi's emitted
     break;
   }
 
@@ -259,7 +420,7 @@ void MipsCodeEmitter::emitWordLE(unsigned Word) {
 /// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
 /// code to the specified MCE object.
 FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
-    JITCodeEmitter &JCE) {
+                                                 JITCodeEmitter &JCE) {
   return new MipsCodeEmitter(TM, JCE);
 }
 
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
new file mode 100644
index 000000000000..075a3e807b1f
--- /dev/null
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -0,0 +1,194 @@
+//===-- MipsCondMov.td - Describe Mips Conditional Moves --*- tablegen -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the Conditional Moves implementation.
+//
+//===----------------------------------------------------------------------===//
+
+// Conditional moves:
+// These instructions are expanded in
+// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
+// conditional move instructions.
+// cond:int, data:int
+class CondMovIntInt<RegisterClass CRC, RegisterClass DRC, bits<6> funct,
+                    string instr_asm> :
+  FR<0, funct, (outs DRC:$rd), (ins DRC:$rs, CRC:$rt, DRC:$F),
+     !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> {
+  let shamt = 0;
+  let Constraints = "$F = $rd";
+}
+
+// cond:int, data:float
+class CondMovIntFP<RegisterClass CRC, RegisterClass DRC, bits<5> fmt,
+                   bits<6> func, string instr_asm> :
+  FFR<0x11, func, fmt, (outs DRC:$fd), (ins DRC:$fs, CRC:$rt, DRC:$F),
+      !strconcat(instr_asm, "\t$fd, $fs, $rt"), []> {
+  bits<5> rt;
+  let ft = rt;
+  let Constraints = "$F = $fd";
+}
+
+// cond:float, data:int
+class CondMovFPInt<RegisterClass RC, SDNode cmov, bits<1> tf,
+                   string instr_asm> :
+  FCMOV<tf, (outs RC:$rd), (ins RC:$rs, RC:$F),
+        !strconcat(instr_asm, "\t$rd, $rs, $$fcc0"),
+        [(set RC:$rd, (cmov RC:$rs, RC:$F))]> {
+  let cc = 0;
+  let Uses = [FCR31];
+  let Constraints = "$F = $rd";
+}
+
+// cond:float, data:float
+class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
+                  string instr_asm> :
+  FFCMOV<fmt, tf, (outs RC:$fd), (ins RC:$fs, RC:$F),
+         !strconcat(instr_asm, "\t$fd, $fs, $$fcc0"),
+         [(set RC:$fd, (cmov RC:$fs, RC:$F))]> {
+  let cc = 0;
+  let Uses = [FCR31];
+  let Constraints = "$F = $fd";
+}
+
+// select patterns
+multiclass MovzPats0<RegisterClass CRC, RegisterClass DRC,
+                     Instruction MOVZInst, Instruction SLTOp,
+                     Instruction SLTuOp, Instruction SLTiOp,
+                     Instruction SLTiuOp> {
+  def : Pat<(select (i32 (setge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+            (MOVZInst DRC:$T, (SLTOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+  def : Pat<(select (i32 (setuge CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+            (MOVZInst DRC:$T, (SLTuOp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+  def : Pat<(select (i32 (setge CRC:$lhs, immSExt16:$rhs)), DRC:$T, DRC:$F),
+            (MOVZInst DRC:$T, (SLTiOp CRC:$lhs, immSExt16:$rhs), DRC:$F)>;
+  def : Pat<(select (i32 (setuge CRC:$lh, immSExt16:$rh)), DRC:$T, DRC:$F),
+            (MOVZInst DRC:$T, (SLTiuOp CRC:$lh, immSExt16:$rh), DRC:$F)>;
+  def : Pat<(select (i32 (setle CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+            (MOVZInst DRC:$T, (SLTOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+  def : Pat<(select (i32 (setule CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+            (MOVZInst DRC:$T, (SLTuOp CRC:$rhs, CRC:$lhs), DRC:$F)>;
+}
+
+multiclass MovzPats1<RegisterClass CRC, RegisterClass DRC,
+                     Instruction MOVZInst, Instruction XOROp> {
+  def : Pat<(select (i32 (seteq CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+            (MOVZInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+  def : Pat<(select (i32 (seteq CRC:$lhs, 0)), DRC:$T, DRC:$F),
+            (MOVZInst DRC:$T, CRC:$lhs, DRC:$F)>;
+}
+
+multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
+                    Instruction XOROp> {
+  def : Pat<(select (i32 (setne CRC:$lhs, CRC:$rhs)), DRC:$T, DRC:$F),
+            (MOVNInst DRC:$T, (XOROp CRC:$lhs, CRC:$rhs), DRC:$F)>;
+  def : Pat<(select CRC:$cond, DRC:$T, DRC:$F),
+            (MOVNInst DRC:$T, CRC:$cond, DRC:$F)>;
+  def : Pat<(select (i32 (setne CRC:$lhs, 0)),DRC:$T, DRC:$F),
+            (MOVNInst DRC:$T, CRC:$lhs, DRC:$F)>;
+}
+
+// Instantiation of instructions.
+def MOVZ_I_I     : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">;
+let Predicates = [HasMips64] in {
+  def MOVZ_I_I64   : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">;
+  def MOVZ_I64_I   : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz">;
+  def MOVZ_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0a, "movz">;
+}
+
+def MOVN_I_I     : CondMovIntInt<CPURegs, CPURegs, 0x0b, "movn">;
+let Predicates = [HasMips64] in {
+  def MOVN_I_I64   : CondMovIntInt<CPURegs, CPU64Regs, 0x0b, "movn">;
+  def MOVN_I64_I   : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn">;
+  def MOVN_I64_I64 : CondMovIntInt<CPU64Regs, CPU64Regs, 0x0b, "movn">;
+}
+
+def MOVZ_I_S   : CondMovIntFP<CPURegs, FGR32, 16, 18, "movz.s">;
+def MOVZ_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 18, "movz.s">,
+                 Requires<[HasMips64]>;
+
+def MOVN_I_S   : CondMovIntFP<CPURegs, FGR32, 16, 19, "movn.s">;
+def MOVN_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 19, "movn.s">,
+                 Requires<[HasMips64]>;
+
+let Predicates = [NotFP64bit] in {
+  def MOVZ_I_D32   : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">;
+  def MOVN_I_D32   : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">;
+}
+let Predicates = [IsFP64bit] in {
+  def MOVZ_I_D64   : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">;
+  def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d">;
+  def MOVN_I_D64   : CondMovIntFP<CPURegs, FGR64, 17, 19, "movn.d">;
+  def MOVN_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 19, "movn.d">;
+}
+
+def MOVT_I   : CondMovFPInt<CPURegs, MipsCMovFP_T, 1, "movt">;
+def MOVT_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_T, 1, "movt">,
+               Requires<[HasMips64]>;
+
+def MOVF_I   : CondMovFPInt<CPURegs, MipsCMovFP_F, 0, "movf">;
+def MOVF_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_F, 0, "movf">,
+               Requires<[HasMips64]>;
+
+def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
+def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
+
+let Predicates = [NotFP64bit] in {
+  def MOVT_D32 : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
+  def MOVF_D32 : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+}
+let Predicates = [IsFP64bit] in {
+  def MOVT_D64 : CondMovFPFP<FGR64, MipsCMovFP_T, 17, 1, "movt.d">;
+  def MOVF_D64 : CondMovFPFP<FGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+}
+
+// Instantiation of conditional move patterns.
+defm : MovzPats0<CPURegs, CPURegs, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>;
+defm : MovzPats1<CPURegs, CPURegs, MOVZ_I_I, XOR>;
+let Predicates = [HasMips64] in {
+  defm : MovzPats0<CPURegs, CPU64Regs, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>;
+  defm : MovzPats0<CPU64Regs, CPURegs, MOVZ_I_I, SLT64, SLTu64, SLTi64,
+                   SLTiu64>;
+  defm : MovzPats0<CPU64Regs, CPU64Regs, MOVZ_I_I64, SLT64, SLTu64, SLTi64,
+                   SLTiu64>;
+  defm : MovzPats1<CPURegs, CPU64Regs, MOVZ_I_I64, XOR>;
+  defm : MovzPats1<CPU64Regs, CPURegs, MOVZ_I64_I, XOR64>;
+  defm : MovzPats1<CPU64Regs, CPU64Regs, MOVZ_I64_I64, XOR64>;
+}
+
+defm : MovnPats<CPURegs, CPURegs, MOVN_I_I, XOR>;
+let Predicates = [HasMips64] in {
+  defm : MovnPats<CPURegs, CPU64Regs, MOVN_I_I64, XOR>;
+  defm : MovnPats<CPU64Regs, CPURegs, MOVN_I64_I, XOR64>;
+  defm : MovnPats<CPU64Regs, CPU64Regs, MOVN_I64_I64, XOR64>;
+}
+
+defm : MovzPats0<CPURegs, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>;
+defm : MovzPats1<CPURegs, FGR32, MOVZ_I_S, XOR>;
+defm : MovnPats<CPURegs, FGR32, MOVN_I_S, XOR>;
+let Predicates = [HasMips64] in {
+  defm : MovzPats0<CPU64Regs, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64,
+                   SLTiu64>;
+  defm : MovzPats1<CPU64Regs, FGR32, MOVZ_I64_S, XOR64>;
+  defm : MovnPats<CPU64Regs, FGR32, MOVN_I64_S, XOR64>;
+}
+
+let Predicates = [NotFP64bit] in {
+  defm : MovzPats0<CPURegs, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>;
+  defm : MovzPats1<CPURegs, AFGR64, MOVZ_I_D32, XOR>;
+  defm : MovnPats<CPURegs, AFGR64, MOVN_I_D32, XOR>;
+}
+let Predicates = [IsFP64bit] in {
+  defm : MovzPats0<CPURegs, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>;
+  defm : MovzPats0<CPU64Regs, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64,
+                   SLTiu64>;
+  defm : MovzPats1<CPURegs, FGR64, MOVZ_I_D64, XOR>;
+  defm : MovzPats1<CPU64Regs, FGR64, MOVZ_I64_D64, XOR64>;
+  defm : MovnPats<CPURegs, FGR64, MOVN_I_D64, XOR>;
+  defm : MovnPats<CPU64Regs, FGR64, MOVN_I64_D64, XOR64>;
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index be3b7a02ec31..debf2f1b85c1 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -1,4 +1,4 @@
-//===-- DelaySlotFiller.cpp - Mips delay slot filler ---------------------===//
+//===-- DelaySlotFiller.cpp - Mips Delay Slot Filler ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -96,7 +96,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   LastFiller = MBB.end();
 
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
-    if (I->getDesc().hasDelaySlot()) {
+    if (I->hasDelaySlot()) {
       ++FilledSlots;
       Changed = true;
 
@@ -105,8 +105,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
       if (EnableDelaySlotFiller && findDelayInstr(MBB, I, D)) {
         MBB.splice(llvm::next(I), &MBB, D);
         ++UsefulSlots;
-      }
-      else 
+      } else
         BuildMI(MBB, llvm::next(I), I->getDebugLoc(), TII->get(Mips::NOP));
 
       // Record the filler instruction that filled the delay slot.
@@ -146,7 +145,7 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
         || I->isInlineAsm()
         || I->isLabel()
         || FI == LastFiller
-        || I->getDesc().isPseudo()
+        || I->isPseudo()
         //
         // Should not allow:
         // ERET, DERET or WAIT, PAUSE. Need to add these to instruction
@@ -167,23 +166,21 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
 }
 
 bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
-                            bool &sawLoad,
-                            bool &sawStore,
+                            bool &sawLoad, bool &sawStore,
                             SmallSet<unsigned, 32> &RegDefs,
                             SmallSet<unsigned, 32> &RegUses) {
   if (candidate->isImplicitDef() || candidate->isKill())
     return true;
 
-  MCInstrDesc MCID = candidate->getDesc();
   // Loads or stores cannot be moved past a store to the delay slot
-  // and stores cannot be moved past a load. 
-  if (MCID.mayLoad()) {
+  // and stores cannot be moved past a load.
+  if (candidate->mayLoad()) {
     if (sawStore)
       return true;
     sawLoad = true;
   }
 
-  if (MCID.mayStore()) {
+  if (candidate->mayStore()) {
     if (sawStore)
       return true;
     sawStore = true;
@@ -191,7 +188,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
       return true;
   }
 
-  assert((!MCID.isCall() && !MCID.isReturn()) &&
+  assert((!candidate->isCall() && !candidate->isReturn()) &&
          "Cannot put calls or returns in delay slot.");
 
   for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
@@ -221,11 +218,11 @@ void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
                             SmallSet<unsigned, 32>& RegUses) {
   // If MI is a call or return, just examine the explicit non-variadic operands.
   MCInstrDesc MCID = MI->getDesc();
-  unsigned e = MCID.isCall() || MCID.isReturn() ? MCID.getNumOperands() :
-                                                  MI->getNumOperands();
-  
-  // Add RA to RegDefs to prevent users of RA from going into delay slot. 
-  if (MCID.isCall())
+  unsigned e = MI->isCall() || MI->isReturn() ? MCID.getNumOperands() :
+                                                MI->getNumOperands();
+
+  // Add RA to RegDefs to prevent users of RA from going into delay slot.
+  if (MI->isCall())
     RegDefs.insert(Mips::RA);
 
   for (unsigned i = 0; i != e; ++i) {
@@ -247,7 +244,7 @@ bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg) {
   if (RegSet.count(Reg))
     return true;
   // check Aliased Registers
-  for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+  for (const uint16_t *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
        *Alias; ++Alias)
     if (RegSet.count(*Alias))
       return true;
diff --git a/lib/Target/Mips/MipsEmitGPRestore.cpp b/lib/Target/Mips/MipsEmitGPRestore.cpp
index 03d922fe7cd6..119d1a824688 100644
--- a/lib/Target/Mips/MipsEmitGPRestore.cpp
+++ b/lib/Target/Mips/MipsEmitGPRestore.cpp
@@ -1,4 +1,4 @@
-//===-- MipsEmitGPRestore.cpp - Emit GP restore instruction----------------===//
+//===-- MipsEmitGPRestore.cpp - Emit GP Restore Instruction ---------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -44,11 +44,14 @@ namespace {
 } // end of anonymous namespace
 
 bool Inserter::runOnMachineFunction(MachineFunction &F) {
-  if (TM.getRelocationModel() != Reloc::PIC_)
+  MipsFunctionInfo *MipsFI = F.getInfo<MipsFunctionInfo>();
+
+  if ((TM.getRelocationModel() != Reloc::PIC_) ||
+      (!MipsFI->globalBaseRegFixed()))
     return false;
 
   bool Changed = false;
-  int FI =  F.getInfo<MipsFunctionInfo>()->getGPFI();
+  int FI = MipsFI->getGPFI();
 
   for (MachineFunction::iterator MFI = F.begin(), MFE = F.end();
        MFI != MFE; ++MFI) {
@@ -60,7 +63,7 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) {
     if (MBB.isLandingPad()) {
       // Find EH_LABEL first.
       for (; I->getOpcode() != TargetOpcode::EH_LABEL; ++I) ;
-      
+
       // Insert lw.
       ++I;
       DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
@@ -81,7 +84,7 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) {
                                                          .addImm(0);
       Changed = true;
     }
-  } 
+  }
 
   return Changed;
 }
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp
index a622258a4dcb..baeae97a4f52 100644
--- a/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -1,4 +1,4 @@
-//===--  MipsExpandPseudo.cpp - Expand pseudo instructions ----------------===//
+//===--  MipsExpandPseudo.cpp - Expand Pseudo Instructions ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -64,16 +64,22 @@ bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
     const MCInstrDesc& MCid = I->getDesc();
 
     switch(MCid.getOpcode()) {
-    default: 
+    default:
       ++I;
       continue;
+    case Mips::SETGP2:
+      // Convert "setgp2 $globalreg, $t9" to "addu $globalreg, $v0, $t9"
+      BuildMI(MBB, I, I->getDebugLoc(), TII->get(Mips::ADDu),
+              I->getOperand(0).getReg())
+        .addReg(Mips::V0).addReg(I->getOperand(1).getReg());
+      break;
     case Mips::BuildPairF64:
       ExpandBuildPairF64(MBB, I);
       break;
     case Mips::ExtractElementF64:
       ExpandExtractElementF64(MBB, I);
       break;
-    } 
+    }
 
     // delete original instr
     MBB.erase(I++);
@@ -84,12 +90,12 @@ bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
 }
 
 void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB,
-                                            MachineBasicBlock::iterator I) {  
+                                            MachineBasicBlock::iterator I) {
   unsigned DstReg = I->getOperand(0).getReg();
   unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
   const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
   DebugLoc dl = I->getDebugLoc();
-  const unsigned* SubReg =
+  const uint16_t* SubReg =
     TM.getRegisterInfo()->getSubRegisters(DstReg);
 
   // mtc1 Lo, $fp
@@ -105,12 +111,12 @@ void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB,
   unsigned N = I->getOperand(2).getImm();
   const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
   DebugLoc dl = I->getDebugLoc();
-  const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
+  const uint16_t* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
 
   BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N));
 }
 
-/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo 
+/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo
 /// instrs into real instrs
 FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) {
   return new MipsExpandPseudo(tm);
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 22d1e47b1a2b..f8ea3d0321d2 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- MipsFrameLowering.cpp - Mips Frame Information ------*- C++ -*-====//
+//===-- MipsFrameLowering.cpp - Mips Frame Information --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,8 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsFrameLowering.h"
+#include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
 #include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -84,55 +86,44 @@ using namespace llvm;
 // if frame pointer elimination is disabled.
 bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()
-      || MFI->isFrameAddressTaken();
+  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+      MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
 }
 
 bool MipsFrameLowering::targetHandlesStackFrameRounding() const {
   return true;
 }
 
-static unsigned AlignOffset(unsigned Offset, unsigned Align) {
-  return (Offset + Align - 1) / Align * Align; 
-} 
-
-// expand pair of register and immediate if the immediate doesn't fit in the
-// 16-bit offset field.
-// e.g.
-//  if OrigImm = 0x10000, OrigReg = $sp:
-//    generate the following sequence of instrs:
-//      lui  $at, hi(0x10000)
-//      addu $at, $sp, $at
-//
-//    (NewReg, NewImm) = ($at, lo(Ox10000))
-//    return true
-static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm,
-                                  unsigned& NewReg, int& NewImm,
-                                  MachineBasicBlock& MBB,
-                                  MachineBasicBlock::iterator I) {
-  // OrigImm fits in the 16-bit field
-  if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
-    NewReg = OrigReg;
-    NewImm = OrigImm;
-    return false;
-  }
+// Build an instruction sequence to load an immediate that is too large to fit
+// in 16-bit and add the result to Reg.
+static void expandLargeImm(unsigned Reg, int64_t Imm, bool IsN64,
+                           const MipsInstrInfo &TII, MachineBasicBlock& MBB,
+                           MachineBasicBlock::iterator II, DebugLoc DL) {
+  unsigned LUi = IsN64 ? Mips::LUi64 : Mips::LUi;
+  unsigned ADDu = IsN64 ? Mips::DADDu : Mips::ADDu;
+  unsigned ZEROReg = IsN64 ? Mips::ZERO_64 : Mips::ZERO;
+  unsigned ATReg = IsN64 ? Mips::AT_64 : Mips::AT;
+  MipsAnalyzeImmediate AnalyzeImm;
+  const MipsAnalyzeImmediate::InstSeq &Seq =
+    AnalyzeImm.Analyze(Imm, IsN64 ? 64 : 32, false /* LastInstrIsADDiu */);
+  MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+
+  // The first instruction can be a LUi, which is different from other
+  // instructions (ADDiu, ORI and SLL) in that it does not have a register
+  // operand.
+  if (Inst->Opc == LUi)
+    BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
+      .addImm(SignExtend64<16>(Inst->ImmOpnd));
+  else
+    BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
+      .addImm(SignExtend64<16>(Inst->ImmOpnd));
 
-  MachineFunction* MF = MBB.getParent();
-  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-  DebugLoc DL = I->getDebugLoc();
-  int ImmLo = (short)(OrigImm & 0xffff);
-  int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) +
-              ((OrigImm & 0x8000) != 0);
-
-  // FIXME: change this when mips goes MC".
-  BuildMI(MBB, I, DL, TII->get(Mips::NOAT));
-  BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
-  BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg)
-                                                     .addReg(Mips::AT);
-  NewReg = Mips::AT;
-  NewImm = ImmLo;
+  // Build the remaining instructions in Seq.
+  for (++Inst; Inst != Seq.end(); ++Inst)
+    BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
+      .addImm(SignExtend64<16>(Inst->ImmOpnd));
 
-  return true;
+  BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(Reg).addReg(ATReg);
 }
 
 void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -146,29 +137,41 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
   bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
-  unsigned NewReg = 0;
-  int NewImm = 0;
-  bool ATUsed;
+  unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+  unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+  unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+  unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
 
   // First, compute final stack size.
   unsigned RegSize = STI.isGP32bit() ? 4 : 8;
   unsigned StackAlign = getStackAlignment();
-  unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ? 
+  unsigned LocalVarAreaOffset = MipsFI->needGPSaveRestore() ?
     (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) :
     MipsFI->getMaxCallFrameSize();
-  unsigned StackSize = AlignOffset(LocalVarAreaOffset, StackAlign) +
-    AlignOffset(MFI->getStackSize(), StackAlign);
+  uint64_t StackSize =  RoundUpToAlignment(LocalVarAreaOffset, StackAlign) +
+     RoundUpToAlignment(MFI->getStackSize(), StackAlign);
 
    // Update stack size
-  MFI->setStackSize(StackSize); 
-  
-  BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
-
-  // TODO: check need from GP here.
-  if (isPIC && STI.isABI_O32())
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD))
-      .addReg(RegInfo->getPICCallReg());
-  BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+  MFI->setStackSize(StackSize);
+
+  // Emit instructions that set the global base register if the target ABI is
+  // O32.
+  if (isPIC && MipsFI->globalBaseRegSet() && STI.isABI_O32() &&
+      !MipsFI->globalBaseRegFixed()) {
+      // See MipsInstrInfo.td for explanation.
+    MachineBasicBlock *NewEntry = MF.CreateMachineBasicBlock();
+    MF.insert(&MBB, NewEntry);
+    NewEntry->addSuccessor(&MBB);
+
+    // Copy live in registers.
+    for (MachineBasicBlock::livein_iterator R = MBB.livein_begin();
+         R != MBB.livein_end(); ++R)
+      NewEntry->addLiveIn(*R);
+
+    BuildMI(*NewEntry, NewEntry->begin(), dl, TII.get(Mips:: SETGP01),
+            Mips::V0);
+  }
 
   // No need to allocate space on the stack.
   if (StackSize == 0 && !MFI->adjustsStack()) return;
@@ -177,15 +180,13 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
   std::vector<MachineMove> &Moves = MMI.getFrameMoves();
   MachineLocation DstML, SrcML;
 
-  // Adjust stack : addi sp, sp, (-imm)
-  ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB,
-                                 MBBI);
-  BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
-    .addReg(NewReg).addImm(NewImm);
-
-  // FIXME: change this when mips goes MC".
-  if (ATUsed)
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+  // Adjust stack.
+  if (isInt<16>(-StackSize)) // addi sp, sp, (-stacksize)
+    BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(-StackSize);
+  else { // Expand immediate that doesn't fit in 16-bit.
+    MipsFI->setEmitNOAT();
+    expandLargeImm(SP, -StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl);
+  }
 
   // emit ".cfi_def_cfa_offset StackSize"
   MCSymbol *AdjustSPLabel = MMI.getContext().CreateTempSymbol();
@@ -202,13 +203,13 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
     // register to the stack.
     for (unsigned i = 0; i < CSI.size(); ++i)
       ++MBBI;
- 
+
     // Iterate over list of callee-saved registers and emit .cfi_offset
     // directives.
     MCSymbol *CSLabel = MMI.getContext().CreateTempSymbol();
     BuildMI(MBB, MBBI, dl,
             TII.get(TargetOpcode::PROLOG_LABEL)).addSym(CSLabel);
- 
+
     for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
            E = CSI.end(); I != E; ++I) {
       int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
@@ -217,7 +218,7 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
       // If Reg is a double precision register, emit two cfa_offsets,
       // one for each of the paired single precision registers.
       if (Mips::AFGR64RegisterClass->contains(Reg)) {
-        const unsigned *SubRegs = RegInfo->getSubRegisters(Reg);
+        const uint16_t *SubRegs = RegInfo->getSubRegisters(Reg);
         MachineLocation DstML0(MachineLocation::VirtualFP, Offset);
         MachineLocation DstML1(MachineLocation::VirtualFP, Offset + 4);
         MachineLocation SrcML0(*SubRegs);
@@ -236,19 +237,18 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
         Moves.push_back(MachineMove(CSLabel, DstML, SrcML));
       }
     }
-  }    
+  }
 
   // if framepointer enabled, set it to point to the stack pointer.
   if (hasFP(MF)) {
-    // Insert instruction "move $fp, $sp" at this location.    
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
-      .addReg(Mips::SP).addReg(Mips::ZERO);
+    // Insert instruction "move $fp, $sp" at this location.
+    BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
 
-    // emit ".cfi_def_cfa_register $fp" 
+    // emit ".cfi_def_cfa_register $fp"
     MCSymbol *SetFPLabel = MMI.getContext().CreateTempSymbol();
     BuildMI(MBB, MBBI, dl,
             TII.get(TargetOpcode::PROLOG_LABEL)).addSym(SetFPLabel);
-    DstML = MachineLocation(Mips::FP);
+    DstML = MachineLocation(FP);
     SrcML = MachineLocation(MachineLocation::VirtualFP);
     Moves.push_back(MachineMove(SetFPLabel, DstML, SrcML));
   }
@@ -256,12 +256,8 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
   // Restore GP from the saved stack location
   if (MipsFI->needGPSaveRestore()) {
     unsigned Offset = MFI->getObjectOffset(MipsFI->getGPFI());
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset);
-
-    if (Offset >= 0x8000) {
-      BuildMI(MBB, llvm::prior(MBBI), dl, TII.get(Mips::MACRO));
-      BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
-    }
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)).addImm(Offset)
+      .addReg(Mips::GP);
   }
 }
 
@@ -272,59 +268,59 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
   const MipsInstrInfo &TII =
     *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
   DebugLoc dl = MBBI->getDebugLoc();
-
-  // Get the number of bytes from FrameInfo
-  unsigned StackSize = MFI->getStackSize();
-
-  unsigned NewReg = 0;
-  int NewImm = 0;
-  bool ATUsed = false;
+  unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP;
+  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
+  unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+  unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+  unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu;
 
   // if framepointer enabled, restore the stack pointer.
   if (hasFP(MF)) {
     // Find the first instruction that restores a callee-saved register.
     MachineBasicBlock::iterator I = MBBI;
-    
+
     for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
       --I;
 
     // Insert instruction "move $sp, $fp" at this location.
-    BuildMI(MBB, I, dl, TII.get(Mips::ADDu), Mips::SP)
-      .addReg(Mips::FP).addReg(Mips::ZERO);
+    BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
   }
 
-  // adjust stack  : insert addi sp, sp, (imm)
-  if (StackSize) {
-    ATUsed = expandRegLargeImmPair(Mips::SP, StackSize, NewReg, NewImm, MBB,
-                                   MBBI);
-    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
-      .addReg(NewReg).addImm(NewImm);
+  // Get the number of bytes from FrameInfo
+  uint64_t StackSize = MFI->getStackSize();
 
-    // FIXME: change this when mips goes MC".
-    if (ATUsed)
-      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
-  }
+  if (!StackSize)
+    return;
+
+  // Adjust stack.
+  if (isInt<16>(StackSize)) // addi sp, sp, (-stacksize)
+    BuildMI(MBB, MBBI, dl, TII.get(ADDiu), SP).addReg(SP).addImm(StackSize);
+  else // Expand immediate that doesn't fit in 16-bit.
+    expandLargeImm(SP, StackSize, STI.isABI_N64(), TII, MBB, MBBI, dl);
 }
 
 void MipsFrameLowering::
 processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
                                      RegScavenger *RS) const {
   MachineRegisterInfo& MRI = MF.getRegInfo();
+  unsigned FP = STI.isABI_N64() ? Mips::FP_64 : Mips::FP;
 
   // FIXME: remove this code if register allocator can correctly mark
   //        $fp and $ra used or unused.
 
   // Mark $fp and $ra as used or unused.
   if (hasFP(MF))
-    MRI.setPhysRegUsed(Mips::FP);
+    MRI.setPhysRegUsed(FP);
 
-  // The register allocator might determine $ra is used after seeing 
+  // The register allocator might determine $ra is used after seeing
   // instruction "jr $ra", but we do not want PrologEpilogInserter to insert
   // instructions to save/restore $ra unless there is a function call.
   // To correct this, $ra is explicitly marked unused if there is no
   // function call.
   if (MF.getFrameInfo()->hasCalls())
     MRI.setPhysRegUsed(Mips::RA);
-  else
+  else {
     MRI.setPhysRegUnused(Mips::RA);
+    MRI.setPhysRegUnused(Mips::RA_64);
+  }
 }
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index c24975614c8d..bd1d89f04bc1 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -1,4 +1,4 @@
-//==--- MipsFrameLowering.h - Define frame lowering for Mips --*- C++ -*---===//
+//===-- MipsFrameLowering.h - Define frame lowering for Mips ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 9c831ede9dbf..f0651c61311b 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -1,4 +1,4 @@
-//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===//
+//===-- MipsISelDAGToDAG.cpp - A Dag to Dag Inst Selector for Mips --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,10 +13,12 @@
 
 #define DEBUG_TYPE "mips-isel"
 #include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterInfo.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
@@ -28,6 +30,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -63,6 +66,7 @@ public:
     return "MIPS DAG->DAG Pattern Instruction Selection";
   }
 
+  virtual bool runOnMachineFunction(MachineFunction &MF);
 
 private:
   // Include the pieces autogenerated from the target description.
@@ -81,17 +85,24 @@ private:
   }
 
   SDNode *getGlobalBaseReg();
+
+  std::pair<SDNode*, SDNode*> SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl,
+                                         EVT Ty, bool HasLo, bool HasHi);
+
   SDNode *Select(SDNode *N);
 
   // Complex Pattern.
-  bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Offset);
 
-  // getI32Imm - Return a target constant with the specified
-  // value, of type i32.
-  inline SDValue getI32Imm(unsigned Imm) {
-    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  // getImm - Return a target constant with the specified value.
+  inline SDValue getImm(const SDNode *Node, unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, Node->getValueType(0));
   }
 
+  void ProcessFunctionAfterISel(MachineFunction &MF);
+  bool ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI, const MachineInstr&);
+  void InitGlobalBaseReg(MachineFunction &MF);
+
   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                             char ConstraintCode,
                                             std::vector<SDValue> &OutOps);
@@ -99,20 +110,163 @@ private:
 
 }
 
+// Insert instructions to initialize the global base register in the
+// first MBB of the function. When the ABI is O32 and the relocation model is
+// PIC, the necessary instructions are emitted later to prevent optimization
+// passes from moving them.
+void MipsDAGToDAGISel::InitGlobalBaseReg(MachineFunction &MF) {
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  if (!MipsFI->globalBaseRegSet())
+    return;
+
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator I = MBB.begin();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
+  bool FixGlobalBaseReg = MipsFI->globalBaseRegFixed();
+
+  if (Subtarget.isABI_O32() && FixGlobalBaseReg)
+    // $gp is the global base register.
+    V0 = V1 = GlobalBaseReg;
+  else {
+    const TargetRegisterClass *RC;
+    RC = Subtarget.isABI_N64() ?
+         Mips::CPU64RegsRegisterClass : Mips::CPURegsRegisterClass;
+
+    V0 = RegInfo.createVirtualRegister(RC);
+    V1 = RegInfo.createVirtualRegister(RC);
+  }
+
+  if (Subtarget.isABI_N64()) {
+    MF.getRegInfo().addLiveIn(Mips::T9_64);
+    MBB.addLiveIn(Mips::T9_64);
+
+    // lui $v0, %hi(%neg(%gp_rel(fname)))
+    // daddu $v1, $v0, $t9
+    // daddiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+    const GlobalValue *FName = MF.getFunction();
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi64), V0)
+      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::DADDu), V1).addReg(V0).addReg(Mips::T9_64);
+    BuildMI(MBB, I, DL, TII.get(Mips::DADDiu), GlobalBaseReg).addReg(V1)
+      .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+  } else if (MF.getTarget().getRelocationModel() == Reloc::Static) {
+    // Set global register to __gnu_local_gp.
+    //
+    // lui   $v0, %hi(__gnu_local_gp)
+    // addiu $globalbasereg, $v0, %lo(__gnu_local_gp)
+    BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+      .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_HI);
+    BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V0)
+      .addExternalSymbol("__gnu_local_gp", MipsII::MO_ABS_LO);
+  } else {
+    MF.getRegInfo().addLiveIn(Mips::T9);
+    MBB.addLiveIn(Mips::T9);
+
+    if (Subtarget.isABI_N32()) {
+      // lui $v0, %hi(%neg(%gp_rel(fname)))
+      // addu $v1, $v0, $t9
+      // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname)))
+      const GlobalValue *FName = MF.getFunction();
+      BuildMI(MBB, I, DL, TII.get(Mips::LUi), V0)
+        .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI);
+      BuildMI(MBB, I, DL, TII.get(Mips::ADDu), V1).addReg(V0).addReg(Mips::T9);
+      BuildMI(MBB, I, DL, TII.get(Mips::ADDiu), GlobalBaseReg).addReg(V1)
+        .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO);
+    } else if (!MipsFI->globalBaseRegFixed()) {
+      assert(Subtarget.isABI_O32());
+
+      BuildMI(MBB, I, DL, TII.get(Mips::SETGP2), GlobalBaseReg)
+        .addReg(Mips::T9);
+    }
+  }
+}
+
+bool MipsDAGToDAGISel::ReplaceUsesWithZeroReg(MachineRegisterInfo *MRI,
+                                              const MachineInstr& MI) {
+  unsigned DstReg = 0, ZeroReg = 0;
+
+  // Check if MI is "addiu $dst, $zero, 0" or "daddiu $dst, $zero, 0".
+  if ((MI.getOpcode() == Mips::ADDiu) &&
+      (MI.getOperand(1).getReg() == Mips::ZERO) &&
+      (MI.getOperand(2).getImm() == 0)) {
+    DstReg = MI.getOperand(0).getReg();
+    ZeroReg = Mips::ZERO;
+  } else if ((MI.getOpcode() == Mips::DADDiu) &&
+             (MI.getOperand(1).getReg() == Mips::ZERO_64) &&
+             (MI.getOperand(2).getImm() == 0)) {
+    DstReg = MI.getOperand(0).getReg();
+    ZeroReg = Mips::ZERO_64;
+  }
+
+  if (!DstReg)
+    return false;
+
+  // Replace uses with ZeroReg.
+  for (MachineRegisterInfo::use_iterator U = MRI->use_begin(DstReg),
+       E = MRI->use_end(); U != E; ++U) {
+    MachineOperand &MO = U.getOperand();
+    MachineInstr *MI = MO.getParent();
+
+    // Do not replace if it is a phi's operand or is tied to def operand.
+    if (MI->isPHI() || MI->isRegTiedToDefOperand(U.getOperandNo()))
+      continue;
+
+    MO.setReg(ZeroReg);
+  }
+
+  return true;
+}
+
+void MipsDAGToDAGISel::ProcessFunctionAfterISel(MachineFunction &MF) {
+  InitGlobalBaseReg(MF);
+
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+
+  for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); MFI != MFE;
+       ++MFI)
+    for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
+      ReplaceUsesWithZeroReg(MRI, *I);
+}
+
+bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  bool Ret = SelectionDAGISel::runOnMachineFunction(MF);
+
+  ProcessFunctionAfterISel(MF);
+
+  return Ret;
+}
 
 /// getGlobalBaseReg - Output the instructions required to put the
 /// GOT address into a register.
 SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
-  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+  unsigned GlobalBaseReg = MF->getInfo<MipsFunctionInfo>()->getGlobalBaseReg();
   return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
 }
 
 /// ComplexPattern used on MipsInstrInfo
 /// Used on Mips Load/Store instructions
 bool MipsDAGToDAGISel::
-SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
+SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
   EVT ValTy = Addr.getValueType();
-  unsigned GPReg = ValTy == MVT::i32 ? Mips::GP : Mips::GP_64;
+
+  // If Parent is an unaligned f32 load or store, select a (base + index)
+  // floating point load/store instruction (luxc1 or suxc1).
+  const LSBaseSDNode* LS = 0;
+
+  if (Parent && (LS = dyn_cast<LSBaseSDNode>(Parent))) {
+    EVT VT = LS->getMemoryVT();
+
+    if (VT.getSizeInBits() / 8 > LS->getAlignment()) {
+      assert(TLI.allowsUnalignedMemoryAccesses(VT) &&
+             "Unaligned loads/stores not supported for this type.");
+      if (VT == MVT::f32)
+        return false;
+    }
+  }
 
   // if Address is FI, get the TargetFrameIndex.
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
@@ -122,21 +276,16 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
   }
 
   // on PIC code Load GA
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    if (Addr.getOpcode() == MipsISD::WrapperPIC) {
-      Base   = CurDAG->getRegister(GPReg, ValTy);
-      Offset = Addr.getOperand(0);
-      return true;
-    }
-  } else {
+  if (Addr.getOpcode() == MipsISD::Wrapper) {
+    Base   = Addr.getOperand(0);
+    Offset = Addr.getOperand(1);
+    return true;
+  }
+
+  if (TM.getRelocationModel() != Reloc::PIC_) {
     if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
         Addr.getOpcode() == ISD::TargetGlobalAddress))
       return false;
-    else if (Addr.getOpcode() == ISD::TargetGlobalTLSAddress) {
-      Base   = CurDAG->getRegister(GPReg, ValTy);
-      Offset = Addr;
-      return true;
-    }
   }
 
   // Addresses of the form FI+const or FI|const
@@ -166,17 +315,20 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
     // Generate:
     //  lui $2, %hi($CPI1_0)
     //  lwc1 $f0, %lo($CPI1_0)($2)
-    if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
-         Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
-        Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
+    if (Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
       SDValue LoVal = Addr.getOperand(1);
-      if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || 
+      if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) ||
           isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
         Base = Addr.getOperand(0);
         Offset = LoVal.getOperand(0);
         return true;
       }
     }
+
+    // If an indexed floating point load/store can be emitted, return false.
+    if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
+        Subtarget.hasMips32r2Or64())
+      return false;
   }
 
   Base   = Addr;
@@ -184,6 +336,28 @@ SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
   return true;
 }
 
+/// Select multiply instructions.
+std::pair<SDNode*, SDNode*>
+MipsDAGToDAGISel::SelectMULT(SDNode *N, unsigned Opc, DebugLoc dl, EVT Ty,
+                             bool HasLo, bool HasHi) {
+  SDNode *Lo = 0, *Hi = 0;
+  SDNode *Mul = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N->getOperand(0),
+                                       N->getOperand(1));
+  SDValue InFlag = SDValue(Mul, 0);
+
+  if (HasLo) {
+    Lo = CurDAG->getMachineNode(Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64, dl,
+                                Ty, MVT::Glue, InFlag);
+    InFlag = SDValue(Lo, 1);
+  }
+  if (HasHi)
+    Hi = CurDAG->getMachineNode(Ty == MVT::i32 ? Mips::MFHI : Mips::MFHI64, dl,
+                                Ty, InFlag);
+
+  return std::make_pair(Lo, Hi);
+}
+
+
 /// Select instructions not customized! Used for
 /// expanded, promoted and normal instructions
 SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
@@ -203,123 +377,167 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
   // Instruction Selection not handled by the auto-generated
   // tablegen selection should be handled here.
   ///
+  EVT NodeTy = Node->getValueType(0);
+  unsigned MultOpc;
+
   switch(Opcode) {
-    default: break;
-
-    case ISD::SUBE:
-    case ISD::ADDE: {
-      SDValue InFlag = Node->getOperand(2), CmpLHS;
-      unsigned Opc = InFlag.getOpcode(); (void)Opc;
-      assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
-              (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
-             "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
-
-      unsigned MOp;
-      if (Opcode == ISD::ADDE) {
-        CmpLHS = InFlag.getValue(0);
-        MOp = Mips::ADDu;
-      } else {
-        CmpLHS = InFlag.getOperand(0);
-        MOp = Mips::SUBu;
-      }
+  default: break;
+
+  case ISD::SUBE:
+  case ISD::ADDE: {
+    SDValue InFlag = Node->getOperand(2), CmpLHS;
+    unsigned Opc = InFlag.getOpcode(); (void)Opc;
+    assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+            (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+           "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+    unsigned MOp;
+    if (Opcode == ISD::ADDE) {
+      CmpLHS = InFlag.getValue(0);
+      MOp = Mips::ADDu;
+    } else {
+      CmpLHS = InFlag.getOperand(0);
+      MOp = Mips::SUBu;
+    }
 
-      SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+    SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
 
-      SDValue LHS = Node->getOperand(0);
-      SDValue RHS = Node->getOperand(1);
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
 
-      EVT VT = LHS.getValueType();
-      SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
-      SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
-                                                SDValue(Carry,0), RHS);
+    EVT VT = LHS.getValueType();
+    SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
+    SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
+                                              SDValue(Carry,0), RHS);
 
-      return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
-                                  LHS, SDValue(AddCarry,0));
-    }
+    return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
+                                LHS, SDValue(AddCarry,0));
+  }
 
-    /// Mul with two results
-    case ISD::SMUL_LOHI:
-    case ISD::UMUL_LOHI: {
-      assert(Node->getValueType(0) != MVT::i64 &&
-             "64-bit multiplication with two results not handled.");
-      SDValue Op1 = Node->getOperand(0);
-      SDValue Op2 = Node->getOperand(1);
+  /// Mul with two results
+  case ISD::SMUL_LOHI:
+  case ISD::UMUL_LOHI: {
+    if (NodeTy == MVT::i32)
+      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+    else
+      MultOpc = (Opcode == ISD::UMUL_LOHI ? Mips::DMULTu : Mips::DMULT);
 
-      unsigned Op;
-      Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+    std::pair<SDNode*, SDNode*> LoHi = SelectMULT(Node, MultOpc, dl, NodeTy,
+                                                  true, true);
 
-      SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
+    if (!SDValue(Node, 0).use_empty())
+      ReplaceUses(SDValue(Node, 0), SDValue(LoHi.first, 0));
 
-      SDValue InFlag = SDValue(Mul, 0);
-      SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32,
-                                          MVT::Glue, InFlag);
-      InFlag = SDValue(Lo,1);
-      SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+    if (!SDValue(Node, 1).use_empty())
+      ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
 
-      if (!SDValue(Node, 0).use_empty())
-        ReplaceUses(SDValue(Node, 0), SDValue(Lo,0));
+    return NULL;
+  }
 
-      if (!SDValue(Node, 1).use_empty())
-        ReplaceUses(SDValue(Node, 1), SDValue(Hi,0));
+  /// Special Muls
+  case ISD::MUL: {
+    // Mips32 has a 32-bit three operand mul instruction.
+    if (Subtarget.hasMips32() && NodeTy == MVT::i32)
+      break;
+    return SelectMULT(Node, NodeTy == MVT::i32 ? Mips::MULT : Mips::DMULT,
+                      dl, NodeTy, true, false).first;
+  }
+  case ISD::MULHS:
+  case ISD::MULHU: {
+    if (NodeTy == MVT::i32)
+      MultOpc = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+    else
+      MultOpc = (Opcode == ISD::MULHU ? Mips::DMULTu : Mips::DMULT);
+
+    return SelectMULT(Node, MultOpc, dl, NodeTy, false, true).second;
+  }
 
-      return NULL;
-    }
+  // Get target GOT address.
+  case ISD::GLOBAL_OFFSET_TABLE:
+    return getGlobalBaseReg();
 
-    /// Special Muls
-    case ISD::MUL:
-      // Mips32 has a 32-bit three operand mul instruction.
-      if (Subtarget.hasMips32() && Node->getValueType(0) == MVT::i32)
-        break;
-    case ISD::MULHS:
-    case ISD::MULHU: {
-      assert((Opcode == ISD::MUL || Node->getValueType(0) != MVT::i64) &&
-             "64-bit MULH* not handled.");
-      EVT Ty = Node->getValueType(0);
-      SDValue MulOp1 = Node->getOperand(0);
-      SDValue MulOp2 = Node->getOperand(1);
-
-      unsigned MulOp  = (Opcode == ISD::MULHU ?
-                         Mips::MULTu :
-                         (Ty == MVT::i32 ? Mips::MULT : Mips::DMULT));
-      SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
-                                               MVT::Glue, MulOp1, MulOp2);
-
-      SDValue InFlag = SDValue(MulNode, 0);
-
-      if (Opcode == ISD::MUL) {
-        unsigned Opc = (Ty == MVT::i32 ? Mips::MFLO : Mips::MFLO64);
-        return CurDAG->getMachineNode(Opc, dl, Ty, InFlag);
+  case ISD::ConstantFP: {
+    ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+    if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+      if (Subtarget.hasMips64()) {
+        SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              Mips::ZERO_64, MVT::i64);
+        return CurDAG->getMachineNode(Mips::DMTC1, dl, MVT::f64, Zero);
       }
-      else
-        return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+
+      SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                            Mips::ZERO, MVT::i32);
+      return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
+                                    Zero);
     }
+    break;
+  }
 
-    // Get target GOT address.
-    case ISD::GLOBAL_OFFSET_TABLE:
-      return getGlobalBaseReg();
+  case ISD::Constant: {
+    const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+    unsigned Size = CN->getValueSizeInBits(0);
 
-    case ISD::ConstantFP: {
-      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
-      if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
-        SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
-                                        Mips::ZERO, MVT::i32);
-        return CurDAG->getMachineNode(Mips::BuildPairF64, dl, MVT::f64, Zero,
-                                      Zero);
-      }
+    if (Size == 32)
       break;
+
+    MipsAnalyzeImmediate AnalyzeImm;
+    int64_t Imm = CN->getSExtValue();
+
+    const MipsAnalyzeImmediate::InstSeq &Seq =
+      AnalyzeImm.Analyze(Imm, Size, false);
+
+    MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+    DebugLoc DL = CN->getDebugLoc();
+    SDNode *RegOpnd;
+    SDValue ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+                                                MVT::i64);
+
+    // The first instruction can be a LUi which is different from other
+    // instructions (ADDiu, ORI and SLL) in that it does not have a register
+    // operand.
+    if (Inst->Opc == Mips::LUi64)
+      RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64, ImmOpnd);
+    else
+      RegOpnd =
+        CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+                               CurDAG->getRegister(Mips::ZERO_64, MVT::i64),
+                               ImmOpnd);
+
+    // The remaining instructions in the sequence are handled here.
+    for (++Inst; Inst != Seq.end(); ++Inst) {
+      ImmOpnd = CurDAG->getTargetConstant(SignExtend64<16>(Inst->ImmOpnd),
+                                          MVT::i64);
+      RegOpnd = CurDAG->getMachineNode(Inst->Opc, DL, MVT::i64,
+                                       SDValue(RegOpnd, 0), ImmOpnd);
     }
 
-    case MipsISD::ThreadPointer: {
-      unsigned SrcReg = Mips::HWR29;
-      unsigned DestReg = Mips::V1;
-      SDNode *Rdhwr = CurDAG->getMachineNode(Mips::RDHWR, Node->getDebugLoc(),
-          Node->getValueType(0), CurDAG->getRegister(SrcReg, MVT::i32));
-      SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
-          SDValue(Rdhwr, 0));
-      SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, MVT::i32);
-      ReplaceUses(SDValue(Node, 0), ResNode);
-      return ResNode.getNode();
+    return RegOpnd;
+  }
+
+  case MipsISD::ThreadPointer: {
+    EVT PtrVT = TLI.getPointerTy();
+    unsigned RdhwrOpc, SrcReg, DestReg;
+
+    if (PtrVT == MVT::i32) {
+      RdhwrOpc = Mips::RDHWR;
+      SrcReg = Mips::HWR29;
+      DestReg = Mips::V1;
+    } else {
+      RdhwrOpc = Mips::RDHWR64;
+      SrcReg = Mips::HWR29_64;
+      DestReg = Mips::V1_64;
     }
+
+    SDNode *Rdhwr =
+      CurDAG->getMachineNode(RdhwrOpc, Node->getDebugLoc(),
+                             Node->getValueType(0),
+                             CurDAG->getRegister(SrcReg, PtrVT));
+    SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, DestReg,
+                                         SDValue(Rdhwr, 0));
+    SDValue ResNode = CurDAG->getCopyFromReg(Chain, dl, DestReg, PtrVT);
+    ReplaceUses(SDValue(Node, 0), ResNode);
+    return ResNode.getNode();
+  }
   }
 
   // Select the default instruction
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 1932e745c593..6a23bc3d1d7c 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -18,12 +18,13 @@
 #include "MipsTargetMachine.h"
 #include "MipsTargetObjectFile.h"
 #include "MipsSubtarget.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/CallingConv.h"
-#include "InstPrinter/MipsInstPrinter.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -35,27 +36,29 @@
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
-// If I is a shifted mask, set the size (Size) and the first bit of the 
+// If I is a shifted mask, set the size (Size) and the first bit of the
 // mask (Pos), and return true.
-// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).  
+// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
 static bool IsShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
-  if (!isUInt<32>(I) || !isShiftedMask_32(I))
+  if (!isShiftedMask_64(I))
      return false;
 
-  Size = CountPopulation_32(I);
-  Pos = CountTrailingZeros_32(I);
+  Size = CountPopulation_64(I);
+  Pos = CountTrailingZeros_64(I);
   return true;
 }
 
+static SDValue GetGlobalReg(SelectionDAG &DAG, EVT Ty) {
+  MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
+  return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
+}
+
 const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
   case MipsISD::JmpLink:           return "MipsISD::JmpLink";
   case MipsISD::Hi:                return "MipsISD::Hi";
   case MipsISD::Lo:                return "MipsISD::Lo";
   case MipsISD::GPRel:             return "MipsISD::GPRel";
-  case MipsISD::TlsGd:             return "MipsISD::TlsGd";
-  case MipsISD::TprelHi:           return "MipsISD::TprelHi";
-  case MipsISD::TprelLo:           return "MipsISD::TprelLo";
   case MipsISD::ThreadPointer:     return "MipsISD::ThreadPointer";
   case MipsISD::Ret:               return "MipsISD::Ret";
   case MipsISD::FPBrcond:          return "MipsISD::FPBrcond";
@@ -71,7 +74,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::DivRemU:           return "MipsISD::DivRemU";
   case MipsISD::BuildPairF64:      return "MipsISD::BuildPairF64";
   case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
-  case MipsISD::WrapperPIC:        return "MipsISD::WrapperPIC";
+  case MipsISD::Wrapper:           return "MipsISD::Wrapper";
   case MipsISD::DynAlloc:          return "MipsISD::DynAlloc";
   case MipsISD::Sync:              return "MipsISD::Sync";
   case MipsISD::Ext:               return "MipsISD::Ext";
@@ -84,7 +87,8 @@ MipsTargetLowering::
 MipsTargetLowering(MipsTargetMachine &TM)
   : TargetLowering(TM, new MipsTargetObjectFile()),
     Subtarget(&TM.getSubtarget<MipsSubtarget>()),
-    HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()) {
+    HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()),
+    IsO32(Subtarget->isABI_O32()) {
 
   // Mips does not have i1 type, so use i32 for
   // setcc operations results (slt, sgt, ...).
@@ -93,17 +97,20 @@ MipsTargetLowering(MipsTargetMachine &TM)
 
   // Set up the register classes
   addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
-  addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
 
   if (HasMips64)
     addRegisterClass(MVT::i64, Mips::CPU64RegsRegisterClass);
 
-  // When dealing with single precision only, use libcalls
-  if (!Subtarget->isSingleFloat()) {
-    if (HasMips64)
-      addRegisterClass(MVT::f64, Mips::FGR64RegisterClass);
-    else
-      addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+  if (!TM.Options.UseSoftFloat) {
+    addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+
+    // When dealing with single precision only, use libcalls
+    if (!Subtarget->isSingleFloat()) {
+      if (HasMips64)
+        addRegisterClass(MVT::f64, Mips::FGR64RegisterClass);
+      else
+        addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+    }
   }
 
   // Load extented operations for i1 types must be promoted
@@ -123,7 +130,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
 
   // Mips Custom Operations
   setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
-  setOperationAction(ISD::GlobalAddress,      MVT::i64,   Custom);
   setOperationAction(ISD::BlockAddress,       MVT::i32,   Custom);
   setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
   setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
@@ -131,9 +137,30 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
   setOperationAction(ISD::SELECT,             MVT::f64,   Custom);
   setOperationAction(ISD::SELECT,             MVT::i32,   Custom);
+  setOperationAction(ISD::SETCC,              MVT::f32,   Custom);
+  setOperationAction(ISD::SETCC,              MVT::f64,   Custom);
   setOperationAction(ISD::BRCOND,             MVT::Other, Custom);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Custom);
   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+  setOperationAction(ISD::FCOPYSIGN,          MVT::f32,   Custom);
+  setOperationAction(ISD::FCOPYSIGN,          MVT::f64,   Custom);
+  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
+  setOperationAction(ISD::ATOMIC_FENCE,       MVT::Other, Custom);
+
+  if (!TM.Options.NoNaNsFPMath) {
+    setOperationAction(ISD::FABS,             MVT::f32,   Custom);
+    setOperationAction(ISD::FABS,             MVT::f64,   Custom);
+  }
+
+  if (HasMips64) {
+    setOperationAction(ISD::GlobalAddress,      MVT::i64,   Custom);
+    setOperationAction(ISD::BlockAddress,       MVT::i64,   Custom);
+    setOperationAction(ISD::GlobalTLSAddress,   MVT::i64,   Custom);
+    setOperationAction(ISD::JumpTable,          MVT::i64,   Custom);
+    setOperationAction(ISD::ConstantPool,       MVT::i64,   Custom);
+    setOperationAction(ISD::SELECT,             MVT::i64,   Custom);
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64,   Custom);
+  }
 
   setOperationAction(ISD::SDIV, MVT::i32, Expand);
   setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -149,10 +176,18 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::BR_CC,             MVT::Other, Expand);
   setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
   setOperationAction(ISD::UINT_TO_FP,        MVT::i32,   Expand);
+  setOperationAction(ISD::UINT_TO_FP,        MVT::i64,   Expand);
   setOperationAction(ISD::FP_TO_UINT,        MVT::i32,   Expand);
+  setOperationAction(ISD::FP_TO_UINT,        MVT::i64,   Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,    Expand);
   setOperationAction(ISD::CTPOP,             MVT::i32,   Expand);
+  setOperationAction(ISD::CTPOP,             MVT::i64,   Expand);
   setOperationAction(ISD::CTTZ,              MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ,              MVT::i64,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::i64,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF,   MVT::i32,   Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF,   MVT::i64,   Expand);
   setOperationAction(ISD::ROTL,              MVT::i32,   Expand);
   setOperationAction(ISD::ROTL,              MVT::i64,   Expand);
 
@@ -165,8 +200,6 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::SHL_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRA_PARTS,         MVT::i32,   Expand);
   setOperationAction(ISD::SRL_PARTS,         MVT::i32,   Expand);
-  setOperationAction(ISD::FCOPYSIGN,         MVT::f32,   Custom);
-  setOperationAction(ISD::FCOPYSIGN,         MVT::f64,   Custom);
   setOperationAction(ISD::FSIN,              MVT::f32,   Expand);
   setOperationAction(ISD::FSIN,              MVT::f64,   Expand);
   setOperationAction(ISD::FCOS,              MVT::f32,   Expand);
@@ -180,9 +213,18 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::FEXP,              MVT::f32,   Expand);
   setOperationAction(ISD::FMA,               MVT::f32,   Expand);
   setOperationAction(ISD::FMA,               MVT::f64,   Expand);
+  setOperationAction(ISD::FREM,              MVT::f32,   Expand);
+  setOperationAction(ISD::FREM,              MVT::f64,   Expand);
+
+  if (!TM.Options.NoNaNsFPMath) {
+    setOperationAction(ISD::FNEG,             MVT::f32,   Expand);
+    setOperationAction(ISD::FNEG,             MVT::f64,   Expand);
+  }
 
   setOperationAction(ISD::EXCEPTIONADDR,     MVT::i32, Expand);
+  setOperationAction(ISD::EXCEPTIONADDR,     MVT::i64, Expand);
   setOperationAction(ISD::EHSELECTION,       MVT::i32, Expand);
+  setOperationAction(ISD::EHSELECTION,       MVT::i64, Expand);
 
   setOperationAction(ISD::VAARG,             MVT::Other, Expand);
   setOperationAction(ISD::VACOPY,            MVT::Other, Expand);
@@ -192,11 +234,10 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
 
-  setOperationAction(ISD::MEMBARRIER,        MVT::Other, Custom);
-  setOperationAction(ISD::ATOMIC_FENCE,      MVT::Other, Custom);  
-
-  setOperationAction(ISD::ATOMIC_LOAD,       MVT::i32,    Expand);  
-  setOperationAction(ISD::ATOMIC_STORE,      MVT::i32,    Expand);  
+  setOperationAction(ISD::ATOMIC_LOAD,       MVT::i32,    Expand);
+  setOperationAction(ISD::ATOMIC_LOAD,       MVT::i64,    Expand);
+  setOperationAction(ISD::ATOMIC_STORE,      MVT::i32,    Expand);
+  setOperationAction(ISD::ATOMIC_STORE,      MVT::i64,    Expand);
 
   setInsertFencesForAtomic(true);
 
@@ -208,32 +249,46 @@ MipsTargetLowering(MipsTargetMachine &TM)
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
   }
 
-  if (!Subtarget->hasBitCount())
+  if (!Subtarget->hasBitCount()) {
     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+    setOperationAction(ISD::CTLZ, MVT::i64, Expand);
+  }
 
-  if (!Subtarget->hasSwap())
+  if (!Subtarget->hasSwap()) {
     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+    setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+  }
 
   setTargetDAGCombine(ISD::ADDE);
   setTargetDAGCombine(ISD::SUBE);
   setTargetDAGCombine(ISD::SDIVREM);
   setTargetDAGCombine(ISD::UDIVREM);
-  setTargetDAGCombine(ISD::SETCC);
+  setTargetDAGCombine(ISD::SELECT);
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
 
-  setMinFunctionAlignment(2);
+  setMinFunctionAlignment(HasMips64 ? 3 : 2);
 
-  setStackPointerRegisterToSaveRestore(Mips::SP);
+  setStackPointerRegisterToSaveRestore(IsN64 ? Mips::SP_64 : Mips::SP);
   computeRegisterProperties();
 
-  setExceptionPointerRegister(Mips::A0);
-  setExceptionSelectorRegister(Mips::A1);
+  setExceptionPointerRegister(IsN64 ? Mips::A0_64 : Mips::A0);
+  setExceptionSelectorRegister(IsN64 ? Mips::A1_64 : Mips::A1);
 }
 
 bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
   MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
-  return SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16; 
+
+  switch (SVT) {
+  case MVT::i64:
+  case MVT::i32:
+  case MVT::i16:
+    return true;
+  case MVT::f32:
+    return Subtarget->hasMips32r2Or64();
+  default:
+    return false;
+  }
 }
 
 EVT MipsTargetLowering::getSetCCResultType(EVT VT) const {
@@ -290,8 +345,7 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
   // create MipsMAdd(u) node
   MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
 
-  SDValue MAdd = CurDAG->getNode(MultOpc, dl,
-                                 MVT::Glue,
+  SDValue MAdd = CurDAG->getNode(MultOpc, dl, MVT::Glue,
                                  MultNode->getOperand(0),// Factor 0
                                  MultNode->getOperand(1),// Factor 1
                                  ADDCNode->getOperand(1),// Lo0
@@ -364,8 +418,7 @@ static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
   // create MipsSub(u) node
   MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
 
-  SDValue MSub = CurDAG->getNode(MultOpc, dl,
-                                 MVT::Glue,
+  SDValue MSub = CurDAG->getNode(MultOpc, dl, MVT::Glue,
                                  MultNode->getOperand(0),// Factor 0
                                  MultNode->getOperand(1),// Factor 1
                                  SUBCNode->getOperand(0),// Lo0
@@ -394,7 +447,8 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
   if (DCI.isBeforeLegalize())
     return SDValue();
 
-  if (Subtarget->hasMips32() && SelectMadd(N, &DAG))
+  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+      SelectMadd(N, &DAG))
     return SDValue(N, 0);
 
   return SDValue();
@@ -406,7 +460,8 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
   if (DCI.isBeforeLegalize())
     return SDValue();
 
-  if (Subtarget->hasMips32() && SelectMsub(N, &DAG))
+  if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 &&
+      SelectMsub(N, &DAG))
     return SDValue(N, 0);
 
   return SDValue();
@@ -419,8 +474,8 @@ static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
     return SDValue();
 
   EVT Ty = N->getValueType(0);
-  unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64; 
-  unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64; 
+  unsigned LO = (Ty == MVT::i32) ? Mips::LO : Mips::LO64;
+  unsigned HI = (Ty == MVT::i32) ? Mips::HI : Mips::HI64;
   unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
                                                   MipsISD::DivRemU;
   DebugLoc dl = N->getDebugLoc();
@@ -481,11 +536,10 @@ static bool InvertFPCondCode(Mips::CondCode CC) {
   if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
     return false;
 
-  if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
-    return true;
+  assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) &&
+         "Illegal Condition Code");
 
-  assert(false && "Illegal Condition Code");
-  return false;
+  return true;
 }
 
 // Creates and returns an FPCmp node from a setcc node.
@@ -522,21 +576,37 @@ static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True,
                      True.getValueType(), True, False, Cond);
 }
 
-static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG,
-                                   TargetLowering::DAGCombinerInfo &DCI,
-                                   const MipsSubtarget* Subtarget) {
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG& DAG,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const MipsSubtarget* Subtarget) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0));
+  SDValue SetCC = N->getOperand(0);
 
-  if (Cond.getOpcode() != MipsISD::FPCmp)
+  if ((SetCC.getOpcode() != ISD::SETCC) ||
+      !SetCC.getOperand(0).getValueType().isInteger())
     return SDValue();
 
-  SDValue True  = DAG.getConstant(1, MVT::i32);
-  SDValue False = DAG.getConstant(0, MVT::i32);
+  SDValue False = N->getOperand(2);
+  EVT FalseTy = False.getValueType();
+
+  if (!FalseTy.isInteger())
+    return SDValue();
+
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(False);
+
+  if (!CN || CN->getZExtValue())
+    return SDValue();
 
-  return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc());
+  const DebugLoc DL = N->getDebugLoc();
+  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+  SDValue True = N->getOperand(1);
+  
+  SetCC = DAG.getSetCC(DL, SetCC.getValueType(), SetCC.getOperand(0),
+                       SetCC.getOperand(1), ISD::getSetCCInverse(CC, true));
+  
+  return DAG.getNode(ISD::SELECT, DL, FalseTy, SetCC, False, True);
 }
 
 static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
@@ -549,20 +619,20 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
     return SDValue();
 
   SDValue ShiftRight = N->getOperand(0), Mask = N->getOperand(1);
-  
+  unsigned ShiftRightOpc = ShiftRight.getOpcode();
+
   // Op's first operand must be a shift right.
-  if (ShiftRight.getOpcode() != ISD::SRA && ShiftRight.getOpcode() != ISD::SRL)
+  if (ShiftRightOpc != ISD::SRA && ShiftRightOpc != ISD::SRL)
     return SDValue();
 
   // The second operand of the shift must be an immediate.
-  uint64_t Pos;
   ConstantSDNode *CN;
   if (!(CN = dyn_cast<ConstantSDNode>(ShiftRight.getOperand(1))))
     return SDValue();
-  
-  Pos = CN->getZExtValue();
 
+  uint64_t Pos = CN->getZExtValue();
   uint64_t SMPos, SMSize;
+
   // Op's second operand must be a shifted mask.
   if (!(CN = dyn_cast<ConstantSDNode>(Mask)) ||
       !IsShiftedMask(CN->getZExtValue(), SMPos, SMSize))
@@ -570,21 +640,21 @@ static SDValue PerformANDCombine(SDNode *N, SelectionDAG& DAG,
 
   // Return if the shifted mask does not start at bit 0 or the sum of its size
   // and Pos exceeds the word's size.
-  if (SMPos != 0 || Pos + SMSize > 32)
+  EVT ValTy = N->getValueType(0);
+  if (SMPos != 0 || Pos + SMSize > ValTy.getSizeInBits())
     return SDValue();
 
-  return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), MVT::i32,
-                     ShiftRight.getOperand(0),
-                     DAG.getConstant(Pos, MVT::i32),
+  return DAG.getNode(MipsISD::Ext, N->getDebugLoc(), ValTy,
+                     ShiftRight.getOperand(0), DAG.getConstant(Pos, MVT::i32),
                      DAG.getConstant(SMSize, MVT::i32));
 }
-  
+
 static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const MipsSubtarget* Subtarget) {
   // Pattern match INS.
   //  $dst = or (and $src1 , mask0), (and (shl $src, pos), mask1),
-  //  where mask1 = (2**size - 1) << pos, mask0 = ~mask1 
+  //  where mask1 = (2**size - 1) << pos, mask0 = ~mask1
   //  => ins $dst, $src, size, pos, $src1
   if (DCI.isBeforeLegalizeOps() || !Subtarget->hasMips32r2())
     return SDValue();
@@ -604,7 +674,7 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
   // See if Op's second operand matches (and (shl $src, pos), mask1).
   if (And1.getOpcode() != ISD::AND)
     return SDValue();
-  
+
   if (!(CN = dyn_cast<ConstantSDNode>(And1.getOperand(1))) ||
       !IsShiftedMask(CN->getZExtValue(), SMPos1, SMSize1))
     return SDValue();
@@ -623,17 +693,16 @@ static SDValue PerformORCombine(SDNode *N, SelectionDAG& DAG,
   unsigned Shamt = CN->getZExtValue();
 
   // Return if the shift amount and the first bit position of mask are not the
-  // same.  
-  if (Shamt != SMPos0)
+  // same.
+  EVT ValTy = N->getValueType(0);
+  if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits()))
     return SDValue();
-  
-  return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), MVT::i32,
-                     Shl.getOperand(0),
+
+  return DAG.getNode(MipsISD::Ins, N->getDebugLoc(), ValTy, Shl.getOperand(0),
                      DAG.getConstant(SMPos0, MVT::i32),
-                     DAG.getConstant(SMSize0, MVT::i32),
-                     And0.getOperand(0));  
+                     DAG.getConstant(SMSize0, MVT::i32), And0.getOperand(0));
 }
-  
+
 SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
   const {
   SelectionDAG &DAG = DCI.DAG;
@@ -648,8 +717,8 @@ SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
   case ISD::SDIVREM:
   case ISD::UDIVREM:
     return PerformDivRemCombine(N, DAG, DCI, Subtarget);
-  case ISD::SETCC:
-    return PerformSETCCCombine(N, DAG, DCI, Subtarget);
+  case ISD::SELECT:
+    return PerformSELECTCombine(N, DAG, DCI, Subtarget);  
   case ISD::AND:
     return PerformANDCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:
@@ -672,8 +741,10 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
     case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
     case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
     case ISD::SELECT:             return LowerSELECT(Op, DAG);
+    case ISD::SETCC:              return LowerSETCC(Op, DAG);
     case ISD::VASTART:            return LowerVASTART(Op, DAG);
     case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
+    case ISD::FABS:               return LowerFABS(Op, DAG);
     case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
     case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op, DAG);
     case ISD::ATOMIC_FENCE:       return LowerATOMIC_FENCE(Op, DAG);
@@ -689,7 +760,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
 // MachineFunction as a live in value.  It also creates a corresponding
 // virtual register for it.
 static unsigned
-AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
+AddLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC)
 {
   assert(RC->contains(PReg) && "Not the correct regclass!");
   unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -702,12 +773,13 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
   if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
     return Mips::BRANCH_T;
 
-  if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
-    return Mips::BRANCH_F;
+  assert((CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) &&
+         "Invalid CondCode.");
 
-  return Mips::BRANCH_INVALID;
+  return Mips::BRANCH_F;
 }
 
+/*
 static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
                                         DebugLoc dl,
                                         const MipsSubtarget* Subtarget,
@@ -783,89 +855,115 @@ static MachineBasicBlock* ExpandCondMov(MachineInstr *MI, MachineBasicBlock *BB,
   MI->eraseFromParent();   // The pseudo instruction is gone now.
   return BB;
 }
-
+*/
 MachineBasicBlock *
 MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                 MachineBasicBlock *BB) const {
-  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  DebugLoc dl = MI->getDebugLoc();
-
   switch (MI->getOpcode()) {
-  default:
-    assert(false && "Unexpected instr type to insert");
-    return NULL;
-  case Mips::MOVT:
-  case Mips::MOVT_S:
-  case Mips::MOVT_D:
-    return ExpandCondMov(MI, BB, dl, Subtarget, TII, true, Mips::BC1F);
-  case Mips::MOVF:
-  case Mips::MOVF_S:
-  case Mips::MOVF_D:
-    return ExpandCondMov(MI, BB, dl, Subtarget, TII, true, Mips::BC1T);
-  case Mips::MOVZ_I:
-  case Mips::MOVZ_S:
-  case Mips::MOVZ_D:
-    return ExpandCondMov(MI, BB, dl, Subtarget, TII, false, Mips::BNE);
-  case Mips::MOVN_I:
-  case Mips::MOVN_S:
-  case Mips::MOVN_D:
-    return ExpandCondMov(MI, BB, dl, Subtarget, TII, false, Mips::BEQ);
-
+  default: llvm_unreachable("Unexpected instr type to insert");
   case Mips::ATOMIC_LOAD_ADD_I8:
+  case Mips::ATOMIC_LOAD_ADD_I8_P8:
     return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu);
   case Mips::ATOMIC_LOAD_ADD_I16:
+  case Mips::ATOMIC_LOAD_ADD_I16_P8:
     return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu);
   case Mips::ATOMIC_LOAD_ADD_I32:
+  case Mips::ATOMIC_LOAD_ADD_I32_P8:
     return EmitAtomicBinary(MI, BB, 4, Mips::ADDu);
+  case Mips::ATOMIC_LOAD_ADD_I64:
+  case Mips::ATOMIC_LOAD_ADD_I64_P8:
+    return EmitAtomicBinary(MI, BB, 8, Mips::DADDu);
 
   case Mips::ATOMIC_LOAD_AND_I8:
+  case Mips::ATOMIC_LOAD_AND_I8_P8:
     return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND);
   case Mips::ATOMIC_LOAD_AND_I16:
+  case Mips::ATOMIC_LOAD_AND_I16_P8:
     return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND);
   case Mips::ATOMIC_LOAD_AND_I32:
+  case Mips::ATOMIC_LOAD_AND_I32_P8:
     return EmitAtomicBinary(MI, BB, 4, Mips::AND);
+  case Mips::ATOMIC_LOAD_AND_I64:
+  case Mips::ATOMIC_LOAD_AND_I64_P8:
+    return EmitAtomicBinary(MI, BB, 8, Mips::AND64);
 
   case Mips::ATOMIC_LOAD_OR_I8:
+  case Mips::ATOMIC_LOAD_OR_I8_P8:
     return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR);
   case Mips::ATOMIC_LOAD_OR_I16:
+  case Mips::ATOMIC_LOAD_OR_I16_P8:
     return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR);
   case Mips::ATOMIC_LOAD_OR_I32:
+  case Mips::ATOMIC_LOAD_OR_I32_P8:
     return EmitAtomicBinary(MI, BB, 4, Mips::OR);
+  case Mips::ATOMIC_LOAD_OR_I64:
+  case Mips::ATOMIC_LOAD_OR_I64_P8:
+    return EmitAtomicBinary(MI, BB, 8, Mips::OR64);
 
   case Mips::ATOMIC_LOAD_XOR_I8:
+  case Mips::ATOMIC_LOAD_XOR_I8_P8:
     return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR);
   case Mips::ATOMIC_LOAD_XOR_I16:
+  case Mips::ATOMIC_LOAD_XOR_I16_P8:
     return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR);
   case Mips::ATOMIC_LOAD_XOR_I32:
+  case Mips::ATOMIC_LOAD_XOR_I32_P8:
     return EmitAtomicBinary(MI, BB, 4, Mips::XOR);
+  case Mips::ATOMIC_LOAD_XOR_I64:
+  case Mips::ATOMIC_LOAD_XOR_I64_P8:
+    return EmitAtomicBinary(MI, BB, 8, Mips::XOR64);
 
   case Mips::ATOMIC_LOAD_NAND_I8:
+  case Mips::ATOMIC_LOAD_NAND_I8_P8:
     return EmitAtomicBinaryPartword(MI, BB, 1, 0, true);
   case Mips::ATOMIC_LOAD_NAND_I16:
+  case Mips::ATOMIC_LOAD_NAND_I16_P8:
     return EmitAtomicBinaryPartword(MI, BB, 2, 0, true);
   case Mips::ATOMIC_LOAD_NAND_I32:
+  case Mips::ATOMIC_LOAD_NAND_I32_P8:
     return EmitAtomicBinary(MI, BB, 4, 0, true);
+  case Mips::ATOMIC_LOAD_NAND_I64:
+  case Mips::ATOMIC_LOAD_NAND_I64_P8:
+    return EmitAtomicBinary(MI, BB, 8, 0, true);
 
   case Mips::ATOMIC_LOAD_SUB_I8:
+  case Mips::ATOMIC_LOAD_SUB_I8_P8:
     return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu);
   case Mips::ATOMIC_LOAD_SUB_I16:
+  case Mips::ATOMIC_LOAD_SUB_I16_P8:
     return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu);
   case Mips::ATOMIC_LOAD_SUB_I32:
+  case Mips::ATOMIC_LOAD_SUB_I32_P8:
     return EmitAtomicBinary(MI, BB, 4, Mips::SUBu);
+  case Mips::ATOMIC_LOAD_SUB_I64:
+  case Mips::ATOMIC_LOAD_SUB_I64_P8:
+    return EmitAtomicBinary(MI, BB, 8, Mips::DSUBu);
 
   case Mips::ATOMIC_SWAP_I8:
+  case Mips::ATOMIC_SWAP_I8_P8:
     return EmitAtomicBinaryPartword(MI, BB, 1, 0);
   case Mips::ATOMIC_SWAP_I16:
+  case Mips::ATOMIC_SWAP_I16_P8:
     return EmitAtomicBinaryPartword(MI, BB, 2, 0);
   case Mips::ATOMIC_SWAP_I32:
+  case Mips::ATOMIC_SWAP_I32_P8:
     return EmitAtomicBinary(MI, BB, 4, 0);
+  case Mips::ATOMIC_SWAP_I64:
+  case Mips::ATOMIC_SWAP_I64_P8:
+    return EmitAtomicBinary(MI, BB, 8, 0);
 
   case Mips::ATOMIC_CMP_SWAP_I8:
+  case Mips::ATOMIC_CMP_SWAP_I8_P8:
     return EmitAtomicCmpSwapPartword(MI, BB, 1);
   case Mips::ATOMIC_CMP_SWAP_I16:
+  case Mips::ATOMIC_CMP_SWAP_I16_P8:
     return EmitAtomicCmpSwapPartword(MI, BB, 2);
   case Mips::ATOMIC_CMP_SWAP_I32:
+  case Mips::ATOMIC_CMP_SWAP_I32_P8:
     return EmitAtomicCmpSwap(MI, BB, 4);
+  case Mips::ATOMIC_CMP_SWAP_I64:
+  case Mips::ATOMIC_CMP_SWAP_I64_P8:
+    return EmitAtomicCmpSwap(MI, BB, 8);
   }
 }
 
@@ -875,13 +973,31 @@ MachineBasicBlock *
 MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
                                      unsigned Size, unsigned BinOpcode,
                                      bool Nand) const {
-  assert(Size == 4 && "Unsupported size for EmitAtomicBinary.");
+  assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary.");
 
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
-  const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+  const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
+  unsigned LL, SC, AND, NOR, ZERO, BEQ;
+
+  if (Size == 4) {
+    LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+    SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+    AND = Mips::AND;
+    NOR = Mips::NOR;
+    ZERO = Mips::ZERO;
+    BEQ = Mips::BEQ;
+  }
+  else {
+    LL = IsN64 ? Mips::LLD_P8 : Mips::LLD;
+    SC = IsN64 ? Mips::SCD_P8 : Mips::SCD;
+    AND = Mips::AND64;
+    NOR = Mips::NOR64;
+    ZERO = Mips::ZERO_64;
+    BEQ = Mips::BEQ64;
+  }
 
   unsigned OldVal = MI->getOperand(0).getReg();
   unsigned Ptr = MI->getOperand(1).getReg();
@@ -919,23 +1035,20 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   //    sc success, storeval, 0(ptr)
   //    beq success, $0, loopMBB
   BB = loopMBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(Ptr).addImm(0);
+  BuildMI(BB, dl, TII->get(LL), OldVal).addReg(Ptr).addImm(0);
   if (Nand) {
     //  and andres, oldval, incr
     //  nor storeval, $0, andres
-    BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr);
-    BuildMI(BB, dl, TII->get(Mips::NOR), StoreVal)
-      .addReg(Mips::ZERO).addReg(AndRes);
+    BuildMI(BB, dl, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr);
+    BuildMI(BB, dl, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes);
   } else if (BinOpcode) {
     //  <binop> storeval, oldval, incr
     BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr);
   } else {
     StoreVal = Incr;
   }
-  BuildMI(BB, dl, TII->get(Mips::SC), Success)
-    .addReg(StoreVal).addReg(Ptr).addImm(0);
-  BuildMI(BB, dl, TII->get(Mips::BEQ))
-    .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
+  BuildMI(BB, dl, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0);
+  BuildMI(BB, dl, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB);
 
   MI->eraseFromParent();   // The instruction is gone now.
 
@@ -955,6 +1068,8 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
+  unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+  unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
 
   unsigned Dest = MI->getOperand(0).getReg();
   unsigned Ptr = MI->getOperand(1).getReg();
@@ -992,8 +1107,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
 
   // Transfer the remainder of BB and its successor edges to exitMBB.
   exitMBB->splice(exitMBB->begin(), BB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
+                  llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   BB->addSuccessor(loopMBB);
@@ -1025,7 +1139,6 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   BuildMI(BB, dl, TII->get(Mips::NOR), Mask2).addReg(Mips::ZERO).addReg(Mask);
   BuildMI(BB, dl, TII->get(Mips::SLLV), Incr2).addReg(ShiftAmt).addReg(Incr);
 
-
   // atomic.load.binop
   // loopMBB:
   //   ll      oldval,0(alignedaddr)
@@ -1046,7 +1159,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   //   beq     success,$0,loopMBB
 
   BB = loopMBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
+  BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
   if (Nand) {
     //  and andres, oldval, incr2
     //  nor binopres, $0, andres
@@ -1064,12 +1177,12 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
     //  and newval, incr2, mask
     BuildMI(BB, dl, TII->get(Mips::AND), NewVal).addReg(Incr2).addReg(Mask);
   }
-    
+
   BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
     .addReg(OldVal).addReg(Mask2);
   BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
     .addReg(MaskedOldVal0).addReg(NewVal);
-  BuildMI(BB, dl, TII->get(Mips::SC), Success)
+  BuildMI(BB, dl, TII->get(SC), Success)
     .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BEQ))
     .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB);
@@ -1100,13 +1213,29 @@ MachineBasicBlock *
 MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
                                       MachineBasicBlock *BB,
                                       unsigned Size) const {
-  assert(Size == 4 && "Unsupported size for EmitAtomicCmpSwap.");
+  assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap.");
 
   MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
-  const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
+  const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8));
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
+  unsigned LL, SC, ZERO, BNE, BEQ;
+
+  if (Size == 4) {
+    LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+    SC = IsN64 ? Mips::SC_P8 : Mips::SC;
+    ZERO = Mips::ZERO;
+    BNE = Mips::BNE;
+    BEQ = Mips::BEQ;
+  }
+  else {
+    LL = IsN64 ? Mips::LLD_P8 : Mips::LLD;
+    SC = IsN64 ? Mips::SCD_P8 : Mips::SCD;
+    ZERO = Mips::ZERO_64;
+    BNE = Mips::BNE64;
+    BEQ = Mips::BEQ64;
+  }
 
   unsigned Dest    = MI->getOperand(0).getReg();
   unsigned Ptr     = MI->getOperand(1).getReg();
@@ -1128,8 +1257,7 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
 
   // Transfer the remainder of BB and its successor edges to exitMBB.
   exitMBB->splice(exitMBB->begin(), BB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
+                  llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   //  thisMBB:
@@ -1145,18 +1273,18 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   //   ll dest, 0(ptr)
   //   bne dest, oldval, exitMBB
   BB = loop1MBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0);
-  BuildMI(BB, dl, TII->get(Mips::BNE))
+  BuildMI(BB, dl, TII->get(LL), Dest).addReg(Ptr).addImm(0);
+  BuildMI(BB, dl, TII->get(BNE))
     .addReg(Dest).addReg(OldVal).addMBB(exitMBB);
 
   // loop2MBB:
   //   sc success, newval, 0(ptr)
   //   beq success, $0, loop1MBB
   BB = loop2MBB;
-  BuildMI(BB, dl, TII->get(Mips::SC), Success)
+  BuildMI(BB, dl, TII->get(SC), Success)
     .addReg(NewVal).addReg(Ptr).addImm(0);
-  BuildMI(BB, dl, TII->get(Mips::BEQ))
-    .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
+  BuildMI(BB, dl, TII->get(BEQ))
+    .addReg(Success).addReg(ZERO).addMBB(loop1MBB);
 
   MI->eraseFromParent();   // The instruction is gone now.
 
@@ -1175,6 +1303,8 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   const TargetRegisterClass *RC = getRegClassFor(MVT::i32);
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
+  unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL;
+  unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC;
 
   unsigned Dest    = MI->getOperand(0).getReg();
   unsigned Ptr     = MI->getOperand(1).getReg();
@@ -1215,8 +1345,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
 
   // Transfer the remainder of BB and its successor edges to exitMBB.
   exitMBB->splice(exitMBB->begin(), BB,
-                  llvm::next(MachineBasicBlock::iterator(MI)),
-                  BB->end());
+                  llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   BB->addSuccessor(loop1MBB);
@@ -1265,7 +1394,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   //    and     maskedoldval0,oldval,mask
   //    bne     maskedoldval0,shiftedcmpval,sinkMBB
   BB = loop1MBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0);
+  BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0)
     .addReg(OldVal).addReg(Mask);
   BuildMI(BB, dl, TII->get(Mips::BNE))
@@ -1281,7 +1410,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
     .addReg(OldVal).addReg(Mask2);
   BuildMI(BB, dl, TII->get(Mips::OR), StoreVal)
     .addReg(MaskedOldVal1).addReg(ShiftedNewVal);
-  BuildMI(BB, dl, TII->get(Mips::SC), Success)
+  BuildMI(BB, dl, TII->get(SC), Success)
       .addReg(StoreVal).addReg(AlignedAddr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BEQ))
       .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB);
@@ -1313,6 +1442,7 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
 {
   MachineFunction &MF = DAG.getMachineFunction();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP;
 
   assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
          cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
@@ -1324,20 +1454,19 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
   DebugLoc dl = Op.getDebugLoc();
 
   // Get a reference from Mips stack pointer
-  SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32);
+  SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy());
 
   // Subtract the dynamic size from the actual stack size to
   // obtain the new stack size.
-  SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size);
 
   // The Sub result contains the new stack start address, so it
   // must be placed in the stack pointer register.
-  Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub,
-                           SDValue());
+  Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue());
 
   // This node always has two return values: a new stack pointer
   // value and a chain
-  SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other);
+  SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other);
   SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
   SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
 
@@ -1381,11 +1510,23 @@ LowerSELECT(SDValue Op, SelectionDAG &DAG) const
                       Op.getDebugLoc());
 }
 
+SDValue MipsTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Cond = CreateFPCmp(DAG, Op);
+
+  assert(Cond.getOpcode() == MipsISD::FPCmp &&
+         "Floating point operand expected.");
+
+  SDValue True  = DAG.getConstant(1, MVT::i32);
+  SDValue False = DAG.getConstant(0, MVT::i32);
+
+  return CreateCMovFP(DAG, Cond, True, False, Op.getDebugLoc());
+}
+
 SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
                                                SelectionDAG &DAG) const {
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 	
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
 
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
     SDVTList VTs = DAG.getVTList(MVT::i32);
@@ -1413,21 +1554,20 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
   EVT ValTy = Op.getValueType();
   bool HasGotOfst = (GV->hasInternalLinkage() ||
                      (GV->hasLocalLinkage() && !isa<Function>(GV)));
-  unsigned GotFlag = IsN64 ?
+  unsigned GotFlag = HasMips64 ?
                      (HasGotOfst ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT_DISP) :
-                     MipsII::MO_GOT;
+                     (HasGotOfst ? MipsII::MO_GOT : MipsII::MO_GOT16);
   SDValue GA = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0, GotFlag);
-  GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA);
-  SDValue ResNode = DAG.getLoad(ValTy, dl,
-                                DAG.getEntryNode(), GA, MachinePointerInfo(),
-                                false, false, 0);
+  GA = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), GA);
+  SDValue ResNode = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), GA,
+                                MachinePointerInfo(), false, false, false, 0);
   // On functions and global targets not internal linked only
   // a load from got/GP is necessary for PIC to work.
   if (!HasGotOfst)
     return ResNode;
   SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, ValTy, 0,
-                                            IsN64 ? MipsII::MO_GOT_OFST :
-                                                    MipsII::MO_ABS_LO);
+                                            HasMips64 ? MipsII::MO_GOT_OFST :
+                                                        MipsII::MO_ABS_LO);
   SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, GALo);
   return DAG.getNode(ISD::ADD, dl, ValTy, ResNode, Lo);
 }
@@ -1438,35 +1578,34 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
 
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
     // %hi/%lo relocation
-    SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true,
-                                       MipsII::MO_ABS_HI);
-    SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true,
-                                       MipsII::MO_ABS_LO);
+    SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true, MipsII::MO_ABS_HI);
+    SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true, MipsII::MO_ABS_LO);
     SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi);
     SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo);
     return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
   }
 
-  SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true,
-                                            MipsII::MO_GOT);
-  BAGOTOffset = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, BAGOTOffset);
-  SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true,
-                                           MipsII::MO_ABS_LO);
-  SDValue Load = DAG.getLoad(MVT::i32, dl,
-                             DAG.getEntryNode(), BAGOTOffset,
-                             MachinePointerInfo(), false, false, 0);
-  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset);
-  return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+  EVT ValTy = Op.getValueType();
+  unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+  unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+  SDValue BAGOTOffset = DAG.getBlockAddress(BA, ValTy, true, GOTFlag);
+  BAGOTOffset = DAG.getNode(MipsISD::Wrapper, dl, ValTy,
+                            GetGlobalReg(DAG, ValTy), BAGOTOffset);
+  SDValue BALOOffset = DAG.getBlockAddress(BA, ValTy, true, OFSTFlag);
+  SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), BAGOTOffset,
+                             MachinePointerInfo(), false, false, false, 0);
+  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, BALOOffset);
+  return DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo);
 }
 
 SDValue MipsTargetLowering::
 LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 {
-  // If the relocation model is PIC, use the General Dynamic TLS Model,
-  // otherwise use the Initial Exec or Local Exec TLS Model.
-  // TODO: implement Local Dynamic TLS model
+  // If the relocation model is PIC, use the General Dynamic TLS Model or
+  // Local Dynamic TLS model, otherwise use the Initial Exec or
+  // Local Exec TLS Model.
 
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   DebugLoc dl = GA->getDebugLoc();
@@ -1475,45 +1614,63 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     // General Dynamic TLS Model
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
-                                             0, MipsII::MO_TLSGD);
-    SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
-    SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
-    SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
+    bool LocalDynamic = GV->hasInternalLinkage();
+    unsigned Flag = LocalDynamic ? MipsII::MO_TLSLDM :MipsII::MO_TLSGD;
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, Flag);
+    SDValue Argument = DAG.getNode(MipsISD::Wrapper, dl, PtrVT,
+                                   GetGlobalReg(DAG, PtrVT), TGA);
+    unsigned PtrSize = PtrVT.getSizeInBits();
+    IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
+
+    SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT);
 
     ArgListTy Args;
     ArgListEntry Entry;
     Entry.Node = Argument;
-    Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
+    Entry.Ty = PtrTy;
     Args.push_back(Entry);
-    std::pair<SDValue, SDValue> CallResult =
-        LowerCallTo(DAG.getEntryNode(),
-                    (Type *) Type::getInt32Ty(*DAG.getContext()),
-                    false, false, false, false, 0, CallingConv::C, false, true,
-                    DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
-                    dl);
 
-    return CallResult.first;
+    std::pair<SDValue, SDValue> CallResult =
+      LowerCallTo(DAG.getEntryNode(), PtrTy,
+                  false, false, false, false, 0, CallingConv::C,
+                  /*isTailCall=*/false, /*doesNotRet=*/false,
+                  /*isReturnValueUsed=*/true,
+                  TlsGetAddr, Args, DAG, dl);
+
+    SDValue Ret = CallResult.first;
+
+    if (!LocalDynamic)
+      return Ret;
+
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                               MipsII::MO_DTPREL_HI);
+    SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                               MipsII::MO_DTPREL_LO);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
+    SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Ret);
+    return DAG.getNode(ISD::ADD, dl, PtrVT, Add, Lo);
   }
 
   SDValue Offset;
   if (GV->isDeclaration()) {
     // Initial Exec TLS Model
-    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
                                              MipsII::MO_GOTTPREL);
-    Offset = DAG.getLoad(MVT::i32, dl,
+    TGA = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT),
+                      TGA);
+    Offset = DAG.getLoad(PtrVT, dl,
                          DAG.getEntryNode(), TGA, MachinePointerInfo(),
-                         false, false, 0);
+                         false, false, false, 0);
   } else {
     // Local Exec TLS Model
-    SDVTList VTs = DAG.getVTList(MVT::i32);
-    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
                                                MipsII::MO_TPREL_HI);
-    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
                                                MipsII::MO_TPREL_LO);
-    SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
-    SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
-    Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+    SDValue Hi = DAG.getNode(MipsISD::Hi, dl, PtrVT, TGAHi);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, TGALo);
+    Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Hi, Lo);
   }
 
   SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
@@ -1523,34 +1680,30 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 SDValue MipsTargetLowering::
 LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
 {
-  SDValue ResNode;
-  SDValue HiPart;
+  SDValue HiPart, JTI, JTILo;
   // FIXME there isn't actually debug info here
   DebugLoc dl = Op.getDebugLoc();
   bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
-  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI;
-
   EVT PtrVT = Op.getValueType();
-  JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 
-  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
-
-  if (!IsPIC) {
-    SDValue Ops[] = { JTI };
-    HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
+  if (!IsPIC && !IsN64) {
+    JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_HI);
+    HiPart = DAG.getNode(MipsISD::Hi, dl, PtrVT, JTI);
+    JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MipsII::MO_ABS_LO);
   } else {// Emit Load from Global Pointer
-    JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI);
-    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
-                         MachinePointerInfo(),
-                         false, false, 0);
+    unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+    unsigned OfstFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+    JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, GOTFlag);
+    JTI = DAG.getNode(MipsISD::Wrapper, dl, PtrVT, GetGlobalReg(DAG, PtrVT),
+                      JTI);
+    HiPart = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), JTI,
+                         MachinePointerInfo(), false, false, false, 0);
+    JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OfstFlag);
   }
 
-  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
-                                         MipsII::MO_ABS_LO);
-  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo);
-  ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
-
-  return ResNode;
+  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, PtrVT, JTILo);
+  return DAG.getNode(ISD::ADD, dl, PtrVT, HiPart, Lo);
 }
 
 SDValue MipsTargetLowering::
@@ -1572,7 +1725,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
   //  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
   //  ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
 
-  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !IsN64) {
     SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
                                              N->getOffset(), MipsII::MO_ABS_HI);
     SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
@@ -1581,16 +1734,19 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
     SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
     ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
   } else {
-    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
-                                           N->getOffset(), MipsII::MO_GOT);
-    CP = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, CP);
-    SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
-                               CP, MachinePointerInfo::getConstantPool(),
+    EVT ValTy = Op.getValueType();
+    unsigned GOTFlag = HasMips64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
+    unsigned OFSTFlag = HasMips64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
+    SDValue CP = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
+                                           N->getOffset(), GOTFlag);
+    CP = DAG.getNode(MipsISD::Wrapper, dl, ValTy, GetGlobalReg(DAG, ValTy), CP);
+    SDValue Load = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), CP,
+                               MachinePointerInfo::getConstantPool(), false,
                                false, false, 0);
-    SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
-                                             N->getOffset(), MipsII::MO_ABS_LO);
-    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo);
-    ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+    SDValue CPLo = DAG.getTargetConstantPool(C, ValTy, N->getAlignment(),
+                                             N->getOffset(), OFSTFlag);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, ValTy, CPLo);
+    ResNode = DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo);
   }
 
   return ResNode;
@@ -1608,62 +1764,165 @@ SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   // memory location argument.
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
-                      MachinePointerInfo(SV),
-                      false, false, 0);
+                      MachinePointerInfo(SV), false, false, 0);
 }
 
-static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG) {
-  // FIXME: Use ext/ins instructions if target architecture is Mips32r2.
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(0));
-  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op.getOperand(1));
-  SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op0,
-                             DAG.getConstant(0x7fffffff, MVT::i32));
-  SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op1,
-                             DAG.getConstant(0x80000000, MVT::i32));
-  SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
-  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Result);
+static SDValue LowerFCOPYSIGN32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+  EVT TyX = Op.getOperand(0).getValueType();
+  EVT TyY = Op.getOperand(1).getValueType();
+  SDValue Const1 = DAG.getConstant(1, MVT::i32);
+  SDValue Const31 = DAG.getConstant(31, MVT::i32);
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue Res;
+
+  // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
+  // to i32.
+  SDValue X = (TyX == MVT::f32) ?
+    DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) :
+    DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+                Const1);
+  SDValue Y = (TyY == MVT::f32) ?
+    DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(1)) :
+    DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(1),
+                Const1);
+
+  if (HasR2) {
+    // ext  E, Y, 31, 1  ; extract bit31 of Y
+    // ins  X, E, 31, 1  ; insert extracted bit at bit31 of X
+    SDValue E = DAG.getNode(MipsISD::Ext, DL, MVT::i32, Y, Const31, Const1);
+    Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32, E, Const31, Const1, X);
+  } else {
+    // sll SllX, X, 1
+    // srl SrlX, SllX, 1
+    // srl SrlY, Y, 31
+    // sll SllY, SrlX, 31
+    // or  Or, SrlX, SllY
+    SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1);
+    SDValue SrlX = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1);
+    SDValue SrlY = DAG.getNode(ISD::SRL, DL, MVT::i32, Y, Const31);
+    SDValue SllY = DAG.getNode(ISD::SHL, DL, MVT::i32, SrlY, Const31);
+    Res = DAG.getNode(ISD::OR, DL, MVT::i32, SrlX, SllY);
+  }
+
+  if (TyX == MVT::f32)
+    return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Res);
+
+  SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+                             Op.getOperand(0), DAG.getConstant(0, MVT::i32));
+  return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
 }
 
-static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool isLittle) {
-  // FIXME:
-  //  Use ext/ins instructions if target architecture is Mips32r2.
-  //  Eliminate redundant mfc1 and mtc1 instructions.
-  unsigned LoIdx = 0, HiIdx = 1;
+static SDValue LowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+  unsigned WidthX = Op.getOperand(0).getValueSizeInBits();
+  unsigned WidthY = Op.getOperand(1).getValueSizeInBits();
+  EVT TyX = MVT::getIntegerVT(WidthX), TyY = MVT::getIntegerVT(WidthY);
+  SDValue Const1 = DAG.getConstant(1, MVT::i32);
+  DebugLoc DL = Op.getDebugLoc();
+
+  // Bitcast to integer nodes.
+  SDValue X = DAG.getNode(ISD::BITCAST, DL, TyX, Op.getOperand(0));
+  SDValue Y = DAG.getNode(ISD::BITCAST, DL, TyY, Op.getOperand(1));
+
+  if (HasR2) {
+    // ext  E, Y, width(Y) - 1, 1  ; extract bit width(Y)-1 of Y
+    // ins  X, E, width(X) - 1, 1  ; insert extracted bit at bit width(X)-1 of X
+    SDValue E = DAG.getNode(MipsISD::Ext, DL, TyY, Y,
+                            DAG.getConstant(WidthY - 1, MVT::i32), Const1);
+
+    if (WidthX > WidthY)
+      E = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, E);
+    else if (WidthY > WidthX)
+      E = DAG.getNode(ISD::TRUNCATE, DL, TyX, E);
+
+    SDValue I = DAG.getNode(MipsISD::Ins, DL, TyX, E,
+                            DAG.getConstant(WidthX - 1, MVT::i32), Const1, X);
+    return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), I);
+  }
 
-  if (!isLittle)
-    std::swap(LoIdx, HiIdx);
+  // (d)sll SllX, X, 1
+  // (d)srl SrlX, SllX, 1
+  // (d)srl SrlY, Y, width(Y)-1
+  // (d)sll SllY, SrlX, width(Y)-1
+  // or     Or, SrlX, SllY
+  SDValue SllX = DAG.getNode(ISD::SHL, DL, TyX, X, Const1);
+  SDValue SrlX = DAG.getNode(ISD::SRL, DL, TyX, SllX, Const1);
+  SDValue SrlY = DAG.getNode(ISD::SRL, DL, TyY, Y,
+                             DAG.getConstant(WidthY - 1, MVT::i32));
+
+  if (WidthX > WidthY)
+    SrlY = DAG.getNode(ISD::ZERO_EXTEND, DL, TyX, SrlY);
+  else if (WidthY > WidthX)
+    SrlY = DAG.getNode(ISD::TRUNCATE, DL, TyX, SrlY);
+
+  SDValue SllY = DAG.getNode(ISD::SHL, DL, TyX, SrlY,
+                             DAG.getConstant(WidthX - 1, MVT::i32));
+  SDValue Or = DAG.getNode(ISD::OR, DL, TyX, SrlX, SllY);
+  return DAG.getNode(ISD::BITCAST, DL, Op.getOperand(0).getValueType(), Or);
+}
 
-  DebugLoc dl = Op.getDebugLoc();
-  SDValue Word0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
-                              Op.getOperand(0),
-                              DAG.getConstant(LoIdx, MVT::i32));
-  SDValue Hi0 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
-                            Op.getOperand(0), DAG.getConstant(HiIdx, MVT::i32));
-  SDValue Hi1 = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
-                            Op.getOperand(1), DAG.getConstant(HiIdx, MVT::i32));
-  SDValue And0 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi0,
-                             DAG.getConstant(0x7fffffff, MVT::i32));
-  SDValue And1 = DAG.getNode(ISD::AND, dl, MVT::i32, Hi1,
-                             DAG.getConstant(0x80000000, MVT::i32));
-  SDValue Word1 = DAG.getNode(ISD::OR, dl, MVT::i32, And0, And1);
-
-  if (!isLittle)
-    std::swap(Word0, Word1);
-
-  return DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, Word0, Word1);
-}
-
-SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
-  const {
-  EVT Ty = Op.getValueType();
+SDValue
+MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+  if (Subtarget->hasMips64())
+    return LowerFCOPYSIGN64(Op, DAG, Subtarget->hasMips32r2());
 
-  assert(Ty == MVT::f32 || Ty == MVT::f64);
+  return LowerFCOPYSIGN32(Op, DAG, Subtarget->hasMips32r2());
+}
 
-  if (Ty == MVT::f32)
-    return LowerFCOPYSIGN32(Op, DAG);
-  else
-    return LowerFCOPYSIGN64(Op, DAG, Subtarget->isLittle());
+static SDValue LowerFABS32(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+  SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
+  DebugLoc DL = Op.getDebugLoc();
+
+  // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
+  // to i32.
+  SDValue X = (Op.getValueType() == MVT::f32) ?
+    DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) :
+    DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
+                Const1);
+
+  // Clear MSB.
+  if (HasR2)
+    Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32,
+                      DAG.getRegister(Mips::ZERO, MVT::i32),
+                      DAG.getConstant(31, MVT::i32), Const1, X);
+  else {
+    SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1);
+    Res = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1);
+  }
+
+  if (Op.getValueType() == MVT::f32)
+    return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Res);
+
+  SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
+                             Op.getOperand(0), DAG.getConstant(0, MVT::i32));
+  return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
+}
+
+static SDValue LowerFABS64(SDValue Op, SelectionDAG &DAG, bool HasR2) {
+  SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
+  DebugLoc DL = Op.getDebugLoc();
+
+  // Bitcast to integer node.
+  SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0));
+
+  // Clear MSB.
+  if (HasR2)
+    Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64,
+                      DAG.getRegister(Mips::ZERO_64, MVT::i64),
+                      DAG.getConstant(63, MVT::i32), Const1, X);
+  else {
+    SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i64, X, Const1);
+    Res = DAG.getNode(ISD::SRL, DL, MVT::i64, SllX, Const1);
+  }
+
+  return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Res);
+}
+
+SDValue
+MipsTargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) const {
+  if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64))
+    return LowerFABS64(Op, DAG, Subtarget->hasMips32r2());
+
+  return LowerFABS32(Op, DAG, Subtarget->hasMips32r2());
 }
 
 SDValue MipsTargetLowering::
@@ -1676,13 +1935,14 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   MFI->setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
-  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Mips::FP, VT);
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                                         IsN64 ? Mips::FP_64 : Mips::FP, VT);
   return FrameAddr;
 }
 
 // TODO: set SType according to the desired memory barrier behavior.
-SDValue MipsTargetLowering::LowerMEMBARRIER(SDValue Op,
-                                            SelectionDAG& DAG) const {
+SDValue
+MipsTargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const {
   unsigned SType = 0;
   DebugLoc dl = Op.getDebugLoc();
   return DAG.getNode(MipsISD::Sync, dl, MVT::Other, Op.getOperand(0),
@@ -1703,8 +1963,6 @@ SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op,
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-#include "MipsGenCallingConv.inc"
-
 //===----------------------------------------------------------------------===//
 // TODO: Implement a generic logic using tblgen that can support this.
 // Mips O32 ABI rules:
@@ -1726,13 +1984,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
 
   static const unsigned IntRegsSize=4, FloatRegsSize=2;
 
-  static const unsigned IntRegs[] = {
+  static const uint16_t IntRegs[] = {
       Mips::A0, Mips::A1, Mips::A2, Mips::A3
   };
-  static const unsigned F32Regs[] = {
+  static const uint16_t F32Regs[] = {
       Mips::F12, Mips::F14
   };
-  static const unsigned F64Regs[] = {
+  static const uint16_t F64Regs[] = {
       Mips::D6, Mips::D7
   };
 
@@ -1811,13 +2069,77 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
   return false; // CC must always match
 }
 
+static const uint16_t Mips64IntRegs[8] =
+  {Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
+   Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
+static const uint16_t Mips64DPRegs[8] =
+  {Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
+   Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64};
+
+static bool CC_Mips64Byval(unsigned ValNo, MVT ValVT, MVT LocVT,
+                           CCValAssign::LocInfo LocInfo,
+                           ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  unsigned Align = std::max(ArgFlags.getByValAlign(), (unsigned)8);
+  unsigned Size  = (ArgFlags.getByValSize() + 7) / 8 * 8;
+  unsigned FirstIdx = State.getFirstUnallocated(Mips64IntRegs, 8);
+
+  assert(Align <= 16 && "Cannot handle alignments larger than 16.");
+
+  // If byval is 16-byte aligned, the first arg register must be even.
+  if ((Align == 16) && (FirstIdx % 2)) {
+    State.AllocateReg(Mips64IntRegs[FirstIdx], Mips64DPRegs[FirstIdx]);
+    ++FirstIdx;
+  }
+
+  // Mark the registers allocated.
+  for (unsigned I = FirstIdx; Size && (I < 8); Size -= 8, ++I)
+    State.AllocateReg(Mips64IntRegs[I], Mips64DPRegs[I]);
+
+  // Allocate space on caller's stack.
+  unsigned Offset = State.AllocateStack(Size, Align);
+
+  if (FirstIdx < 8)
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Mips64IntRegs[FirstIdx],
+                                     LocVT, LocInfo));
+  else
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+
+  return true;
+}
+
+#include "MipsGenCallingConv.inc"
+
+static void
+AnalyzeMips64CallOperands(CCState &CCInfo,
+                          const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  unsigned NumOps = Outs.size();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    MVT ArgVT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    bool R;
+
+    if (Outs[i].IsFixed)
+      R = CC_MipsN(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+    else
+      R = CC_MipsN_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+
+    if (R) {
+#ifndef NDEBUG
+      dbgs() << "Call operand #" << i << " has unhandled type "
+             << EVT(ArgVT).getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //                  Call Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
 static const unsigned O32IntRegsSize = 4;
 
-static const unsigned O32IntRegs[] = {
+static const uint16_t O32IntRegs[] = {
   Mips::A0, Mips::A1, Mips::A2, Mips::A3
 };
 
@@ -1848,9 +2170,8 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
     SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
                                   DAG.getConstant(Offset, MVT::i32));
     SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr,
-                                  MachinePointerInfo(),
-                                  false, false, std::min(ByValAlign,
-                                                         (unsigned )4));
+                                  MachinePointerInfo(), false, false, false,
+                                  std::min(ByValAlign, (unsigned )4));
     MemOpChains.push_back(LoadVal.getValue(1));
     unsigned DstReg = O32IntRegs[LocMemOffset / 4];
     RegsToPass.push_back(std::make_pair(DstReg, LoadVal));
@@ -1886,7 +2207,7 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
     // Read second subword if necessary.
     if (RemainingSize != 0)  {
       assert(RemainingSize == 1 && "There must be one byte remaining.");
-      LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg, 
+      LoadPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, Arg,
                             DAG.getConstant(Offset, MVT::i32));
       unsigned Alignment = std::min(ByValAlign, (unsigned )2);
       SDValue Subword = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
@@ -1919,13 +2240,101 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
                              MachinePointerInfo(0), MachinePointerInfo(0));
 }
 
+// Copy Mips64 byVal arg to registers and stack.
+void static
+PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl,
+               SmallVector<std::pair<unsigned, SDValue>, 16>& RegsToPass,
+               SmallVector<SDValue, 8>& MemOpChains, int& LastFI,
+               MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
+               const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+               EVT PtrTy, bool isLittle) {
+  unsigned ByValSize = Flags.getByValSize();
+  unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8);
+  bool IsRegLoc = VA.isRegLoc();
+  unsigned Offset = 0; // Offset in # of bytes from the beginning of struct.
+  unsigned LocMemOffset = 0;
+  unsigned MemCpySize = ByValSize;
+
+  if (!IsRegLoc)
+    LocMemOffset = VA.getLocMemOffset();
+  else {
+    const uint16_t *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8,
+                                    VA.getLocReg());
+    const uint16_t *RegEnd = Mips64IntRegs + 8;
+
+    // Copy double words to registers.
+    for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) {
+      SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
+                                    DAG.getConstant(Offset, PtrTy));
+      SDValue LoadVal = DAG.getLoad(MVT::i64, dl, Chain, LoadPtr,
+                                    MachinePointerInfo(), false, false, false,
+                                    Alignment);
+      MemOpChains.push_back(LoadVal.getValue(1));
+      RegsToPass.push_back(std::make_pair(*Reg, LoadVal));
+    }
+
+    // Return if the struct has been fully copied.
+    if (!(MemCpySize = ByValSize - Offset))
+      return;
+
+    // If there is an argument register available, copy the remainder of the
+    // byval argument with sub-doubleword loads and shifts.
+    if (Reg != RegEnd) {
+      assert((ByValSize < Offset + 8) &&
+             "Size of the remainder should be smaller than 8-byte.");
+      SDValue Val;
+      for (unsigned LoadSize = 4; Offset < ByValSize; LoadSize /= 2) {
+        unsigned RemSize = ByValSize - Offset;
+
+        if (RemSize < LoadSize)
+          continue;
+
+        SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
+                                      DAG.getConstant(Offset, PtrTy));
+        SDValue LoadVal =
+          DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i64, Chain, LoadPtr,
+                         MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8),
+                         false, false, Alignment);
+        MemOpChains.push_back(LoadVal.getValue(1));
+
+        // Offset in number of bits from double word boundary.
+        unsigned OffsetDW = (Offset % 8) * 8;
+        unsigned Shamt = isLittle ? OffsetDW : 64 - (OffsetDW + LoadSize * 8);
+        SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i64, LoadVal,
+                                    DAG.getConstant(Shamt, MVT::i32));
+
+        Val = Val.getNode() ? DAG.getNode(ISD::OR, dl, MVT::i64, Val, Shift) :
+                              Shift;
+        Offset += LoadSize;
+        Alignment = std::min(Alignment, LoadSize);
+      }
+
+      RegsToPass.push_back(std::make_pair(*Reg, Val));
+      return;
+    }
+  }
+
+  assert(MemCpySize && "MemCpySize must not be zero.");
+
+  // Create a fixed object on stack at offset LocMemOffset and copy
+  // remainder of byval arg to it with memcpy.
+  SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg,
+                            DAG.getConstant(Offset, PtrTy));
+  LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true);
+  SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy);
+  ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src,
+                             DAG.getConstant(MemCpySize, PtrTy), Alignment,
+                             /*isVolatile=*/false, /*AlwaysInline=*/false,
+                             MachinePointerInfo(0), MachinePointerInfo(0));
+}
+
 /// LowerCall - functions arguments are copied from virtual regs to
 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
 /// TODO: isTailCall.
 SDValue
 MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
                               CallingConv::ID CallConv, bool isVarArg,
-                              bool &isTailCall,
+                              bool doesNotRet, bool &isTailCall,
                               const SmallVectorImpl<ISD::OutputArg> &Outs,
                               const SmallVectorImpl<SDValue> &OutVals,
                               const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1943,10 +2352,12 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 getTargetMachine(), ArgLocs, *DAG.getContext());
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
 
-  if (Subtarget->isABI_O32())
+  if (IsO32)
     CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
+  else if (HasMips64)
+    AnalyzeMips64CallOperands(CCInfo, Outs);
   else
     CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
 
@@ -1963,7 +2374,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
 
   // If this is the first call, create a stack frame object that points to
   // a location to which .cprestore saves $gp.
-  if (IsPIC && !MipsFI->getGPFI())
+  if (IsO32 && IsPIC && MipsFI->globalBaseRegFixed() && !MipsFI->getGPFI())
     MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true));
 
   // Get the frame index of the stack frame object that points to the location
@@ -1973,7 +2384,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
   // Update size of the maximum argument space.
   // For O32, a minimum of four words (16 bytes) of argument space is
   // allocated.
-  if (Subtarget->isABI_O32())
+  if (IsO32)
     NextStackOffset = std::max(NextStackOffset, (unsigned)16);
 
   unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize();
@@ -1988,7 +2399,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
     NextStackOffset = (NextStackOffset + StackAlignment - 1) /
                       StackAlignment * StackAlignment;
 
-    if (IsPIC)
+    if (MipsFI->needGPSaveRestore())
       MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset);
 
     MFI->setObjectOffset(DynAllocFI, NextStackOffset);
@@ -2004,22 +2415,40 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     SDValue Arg = OutVals[i];
     CCValAssign &VA = ArgLocs[i];
+    MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT();
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    // ByVal Arg.
+    if (Flags.isByVal()) {
+      assert(Flags.getByValSize() &&
+             "ByVal args of size 0 should have been ignored by front-end.");
+      if (IsO32)
+        WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+                      MFI, DAG, Arg, VA, Flags, getPointerTy(),
+                      Subtarget->isLittle());
+      else
+        PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI,
+                       MFI, DAG, Arg, VA, Flags, getPointerTy(),
+                       Subtarget->isLittle());
+      continue;
+    }
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full:
-      if (Subtarget->isABI_O32() && VA.isRegLoc()) {
-        if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
-          Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
-        if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
+      if (VA.isRegLoc()) {
+        if ((ValVT == MVT::f32 && LocVT == MVT::i32) ||
+            (ValVT == MVT::f64 && LocVT == MVT::i64))
+          Arg = DAG.getNode(ISD::BITCAST, dl, LocVT, Arg);
+        else if (ValVT == MVT::f64 && LocVT == MVT::i32) {
           SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
                                    Arg, DAG.getConstant(0, MVT::i32));
           SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32,
                                    Arg, DAG.getConstant(1, MVT::i32));
           if (!Subtarget->isLittle())
             std::swap(Lo, Hi);
-          unsigned LocRegLo = VA.getLocReg(); 
+          unsigned LocRegLo = VA.getLocReg();
           unsigned LocRegHigh = getNextIntArgReg(LocRegLo);
           RegsToPass.push_back(std::make_pair(LocRegLo, Lo));
           RegsToPass.push_back(std::make_pair(LocRegHigh, Hi));
@@ -2028,13 +2457,13 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
       }
       break;
     case CCValAssign::SExt:
-      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, LocVT, Arg);
       break;
     case CCValAssign::ZExt:
-      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, LocVT, Arg);
       break;
     case CCValAssign::AExt:
-      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg);
       break;
     }
 
@@ -2048,28 +2477,15 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
     // Register can't get to this point...
     assert(VA.isMemLoc());
 
-    // ByVal Arg.
-    ISD::ArgFlagsTy Flags = Outs[i].Flags;
-    if (Flags.isByVal()) {
-      assert(Subtarget->isABI_O32() &&
-             "No support for ByVal args by ABIs other than O32 yet.");
-      assert(Flags.getByValSize() &&
-             "ByVal args of size 0 should have been ignored by front-end.");
-      WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI,
-                    DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle());
-      continue;
-    }
-
     // Create the frame index object for this incoming parameter
-    LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+    LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
                                     VA.getLocMemOffset(), true);
     SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
 
     // emit ISD::STORE whichs stores the
     // parameter value to a stack Location
     MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                       MachinePointerInfo(),
-                                       false, false, 0));
+                                       MachinePointerInfo(), false, false, 0));
   }
 
   // Extend range of indices of frame objects for outgoing arguments that were
@@ -2093,52 +2509,68 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
-  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
-  bool LoadSymAddr = false;
+  unsigned char OpFlag;
+  bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25
+  bool GlobalOrExternal = false;
   SDValue CalleeLo;
 
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    if (IsPIC && G->getGlobal()->hasInternalLinkage()) {
-      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
-                                          getPointerTy(), 0,MipsII:: MO_GOT);
+    if (IsPICCall && G->getGlobal()->hasInternalLinkage()) {
+      OpFlag = IsO32 ? MipsII::MO_GOT : MipsII::MO_GOT_PAGE;
+      unsigned char LoFlag = IsO32 ? MipsII::MO_ABS_LO : MipsII::MO_GOT_OFST;
+      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0,
+                                          OpFlag);
       CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(),
-                                            0, MipsII::MO_ABS_LO);
+                                            0, LoFlag);
     } else {
+      OpFlag = IsPICCall ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
       Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
                                           getPointerTy(), 0, OpFlag);
     }
 
-    LoadSymAddr = true;
+    GlobalOrExternal = true;
   }
   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
-                                getPointerTy(), OpFlag);
-    LoadSymAddr = true;
+    if (IsN64 || (!IsO32 && IsPIC))
+      OpFlag = MipsII::MO_GOT_DISP;
+    else if (!IsPIC) // !N64 && static
+      OpFlag = MipsII::MO_NO_FLAG;
+    else // O32 & PIC
+      OpFlag = MipsII::MO_GOT_CALL;
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+                                         OpFlag);
+    GlobalOrExternal = true;
   }
 
   SDValue InFlag;
 
   // Create nodes that load address of callee and copy it to T9
-  if (IsPIC) {
-    if (LoadSymAddr) {
+  if (IsPICCall) {
+    if (GlobalOrExternal) {
       // Load callee address
-      Callee = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, Callee);
-      SDValue LoadValue = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), Callee,
-                                      MachinePointerInfo::getGOT(),
-                                      false, false, 0);
+      Callee = DAG.getNode(MipsISD::Wrapper, dl, getPointerTy(),
+                           GetGlobalReg(DAG, getPointerTy()), Callee);
+      SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                                      Callee, MachinePointerInfo::getGOT(),
+                                      false, false, false, 0);
 
       // Use GOT+LO if callee has internal linkage.
       if (CalleeLo.getNode()) {
-        SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo);
-        Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo);
+        SDValue Lo = DAG.getNode(MipsISD::Lo, dl, getPointerTy(), CalleeLo);
+        Callee = DAG.getNode(ISD::ADD, dl, getPointerTy(), LoadValue, Lo);
       } else
         Callee = LoadValue;
     }
+  }
 
+  // T9 should contain the address of the callee function if
+  // -reloction-model=pic or it is an indirect call.
+  if (IsPICCall || !GlobalOrExternal) {
     // copy to T9
-    Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0));
+    unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9;
+    Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0));
     InFlag = Chain.getValue(1);
-    Callee = DAG.getRegister(Mips::T9, MVT::i32);
+    Callee = DAG.getRegister(T9Reg, getPointerTy());
   }
 
   // Build a sequence of copy-to-reg nodes chained together with token
@@ -2166,6 +2598,12 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee,
     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                   RegsToPass[i].second.getValueType()));
 
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
@@ -2216,7 +2654,8 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
                          std::vector<SDValue>& OutChains,
                          SelectionDAG &DAG, unsigned NumWords, SDValue FIN,
-                         const CCValAssign &VA, const ISD::ArgFlagsTy& Flags) {
+                         const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+                         const Argument *FuncArg) {
   unsigned LocMem = VA.getLocMemOffset();
   unsigned FirstWord = LocMem / 4;
 
@@ -2231,20 +2670,58 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl,
     SDValue StorePtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIN,
                                    DAG.getConstant(i * 4, MVT::i32));
     SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(Reg, MVT::i32),
-                                 StorePtr, MachinePointerInfo(), false,
-                                 false, 0);
+                                 StorePtr, MachinePointerInfo(FuncArg, i * 4),
+                                 false, false, 0);
     OutChains.push_back(Store);
   }
 }
 
+// Create frame object on stack and copy registers used for byval passing to it.
+static unsigned
+CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl,
+                    std::vector<SDValue>& OutChains, SelectionDAG &DAG,
+                    const CCValAssign &VA, const ISD::ArgFlagsTy& Flags,
+                    MachineFrameInfo *MFI, bool IsRegLoc,
+                    SmallVectorImpl<SDValue> &InVals, MipsFunctionInfo *MipsFI,
+                    EVT PtrTy, const Argument *FuncArg) {
+  const uint16_t *Reg = Mips64IntRegs + 8;
+  int FOOffset; // Frame object offset from virtual frame pointer.
+
+  if (IsRegLoc) {
+    Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, VA.getLocReg());
+    FOOffset = (Reg - Mips64IntRegs) * 8 - 8 * 8;
+  }
+  else
+    FOOffset = VA.getLocMemOffset();
+
+  // Create frame object.
+  unsigned NumRegs = (Flags.getByValSize() + 7) / 8;
+  unsigned LastFI = MFI->CreateFixedObject(NumRegs * 8, FOOffset, true);
+  SDValue FIN = DAG.getFrameIndex(LastFI, PtrTy);
+  InVals.push_back(FIN);
+
+  // Copy arg registers.
+  for (unsigned I = 0; (Reg != Mips64IntRegs + 8) && (I < NumRegs);
+       ++Reg, ++I) {
+    unsigned VReg = AddLiveIn(MF, *Reg, Mips::CPU64RegsRegisterClass);
+    SDValue StorePtr = DAG.getNode(ISD::ADD, dl, PtrTy, FIN,
+                                   DAG.getConstant(I * 8, PtrTy));
+    SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(VReg, MVT::i64),
+                                 StorePtr, MachinePointerInfo(FuncArg, I * 8),
+                                 false, false, 0);
+    OutChains.push_back(Store);
+  }
+
+  return LastFI;
+}
+
 /// LowerFormalArguments - transform physical registers into virtual registers
 /// and generate load operations for arguments places on the stack.
 SDValue
 MipsTargetLowering::LowerFormalArguments(SDValue Chain,
                                          CallingConv::ID CallConv,
                                          bool isVarArg,
-                                         const SmallVectorImpl<ISD::InputArg>
-                                         &Ins,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
                                          DebugLoc dl, SelectionDAG &DAG,
                                          SmallVectorImpl<SDValue> &InVals)
                                           const {
@@ -2260,23 +2737,46 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 getTargetMachine(), ArgLocs, *DAG.getContext());
+                 getTargetMachine(), ArgLocs, *DAG.getContext());
 
-  if (Subtarget->isABI_O32())
+  if (IsO32)
     CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32);
   else
     CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
 
+  Function::const_arg_iterator FuncArg =
+    DAG.getMachineFunction().getFunction()->arg_begin();
   int LastFI = 0;// MipsFI->LastInArgFI is 0 at the entry of this function.
 
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++FuncArg) {
     CCValAssign &VA = ArgLocs[i];
+    EVT ValVT = VA.getValVT();
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+    bool IsRegLoc = VA.isRegLoc();
+
+    if (Flags.isByVal()) {
+      assert(Flags.getByValSize() &&
+             "ByVal args of size 0 should have been ignored by front-end.");
+      if (IsO32) {
+        unsigned NumWords = (Flags.getByValSize() + 3) / 4;
+        LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(),
+                                        true);
+        SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
+        InVals.push_back(FIN);
+        ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags,
+                     &*FuncArg);
+      } else // N32/64
+        LastFI = CopyMips64ByValRegs(MF, Chain, dl, OutChains, DAG, VA, Flags,
+                                     MFI, IsRegLoc, InVals, MipsFI,
+                                     getPointerTy(), &*FuncArg);
+      continue;
+    }
 
     // Arguments stored on registers
-    if (VA.isRegLoc()) {
+    if (IsRegLoc) {
       EVT RegVT = VA.getLocVT();
       unsigned ArgReg = VA.getLocReg();
-      TargetRegisterClass *RC = 0;
+      const TargetRegisterClass *RC;
 
       if (RegVT == MVT::i32)
         RC = Mips::CPURegsRegisterClass;
@@ -2305,23 +2805,22 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
           Opcode = ISD::AssertZext;
         if (Opcode)
           ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
-                                 DAG.getValueType(VA.getValVT()));
-        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+                                 DAG.getValueType(ValVT));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
       }
 
-      // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
-      if (Subtarget->isABI_O32()) {
-        if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
-          ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue);
-        if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
-          unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
-                                    getNextIntArgReg(ArgReg), RC);
-          SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
-          if (!Subtarget->isLittle())
-            std::swap(ArgValue, ArgValue2);
-          ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
-                                 ArgValue, ArgValue2);
-        }
+      // Handle floating point arguments passed in integer registers.
+      if ((RegVT == MVT::i32 && ValVT == MVT::f32) ||
+          (RegVT == MVT::i64 && ValVT == MVT::f64))
+        ArgValue = DAG.getNode(ISD::BITCAST, dl, ValVT, ArgValue);
+      else if (IsO32 && RegVT == MVT::i32 && ValVT == MVT::f64) {
+        unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+                                  getNextIntArgReg(ArgReg), RC);
+        SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
+        if (!Subtarget->isLittle())
+          std::swap(ArgValue, ArgValue2);
+        ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64,
+                               ArgValue, ArgValue2);
       }
 
       InVals.push_back(ArgValue);
@@ -2330,32 +2829,15 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
       // sanity check
       assert(VA.isMemLoc());
 
-      ISD::ArgFlagsTy Flags = Ins[i].Flags;
-
-      if (Flags.isByVal()) {
-        assert(Subtarget->isABI_O32() &&
-               "No support for ByVal args by ABIs other than O32 yet.");
-        assert(Flags.getByValSize() &&
-               "ByVal args of size 0 should have been ignored by front-end.");
-        unsigned NumWords = (Flags.getByValSize() + 3) / 4;
-        LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(),
-                                        true);
-        SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
-        InVals.push_back(FIN);
-        ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags);
-
-        continue;
-      }
-
       // The stack pointer offset is relative to the caller stack frame.
-      LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+      LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
                                       VA.getLocMemOffset(), true);
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy());
-      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+      InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN,
                                    MachinePointerInfo::getFixedStack(LastFI),
-                                   false, false, 0));
+                                   false, false, false, 0));
     }
   }
 
@@ -2372,28 +2854,43 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
   }
 
-  if (isVarArg && Subtarget->isABI_O32()) {
+  if (isVarArg) {
+    unsigned NumOfRegs = IsO32 ? 4 : 8;
+    const uint16_t *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs;
+    unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs);
+    int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot.
+    const TargetRegisterClass *RC
+      = IsO32 ? Mips::CPURegsRegisterClass : Mips::CPU64RegsRegisterClass;
+    unsigned RegSize = RC->getSize();
+    int RegSlotOffset = FirstRegSlotOffset + Idx * RegSize;
+
+    // Offset of the first variable argument from stack pointer.
+    int FirstVaArgOffset;
+
+    if (IsO32 || (Idx == NumOfRegs)) {
+      FirstVaArgOffset =
+        (CCInfo.getNextStackOffset() + RegSize - 1) / RegSize * RegSize;
+    } else
+      FirstVaArgOffset = RegSlotOffset;
+
     // Record the frame index of the first variable argument
     // which is a value necessary to VASTART.
-    unsigned NextStackOffset = CCInfo.getNextStackOffset();
-    assert(NextStackOffset % 4 == 0 &&
-           "NextStackOffset must be aligned to 4-byte boundaries.");
-    LastFI = MFI->CreateFixedObject(4, NextStackOffset, true);
+    LastFI = MFI->CreateFixedObject(RegSize, FirstVaArgOffset, true);
     MipsFI->setVarArgsFrameIndex(LastFI);
 
-    // If NextStackOffset is smaller than o32's 16-byte reserved argument area,
-    // copy the integer registers that have not been used for argument passing
-    // to the caller's stack frame.
-    for (; NextStackOffset < 16; NextStackOffset += 4) {
-      TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
-      unsigned Idx = NextStackOffset / 4;
-      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), O32IntRegs[Idx], RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
-      LastFI = MFI->CreateFixedObject(4, NextStackOffset, true);
+    // Copy the integer registers that have not been used for argument passing
+    // to the argument register save area. For O32, the save area is allocated
+    // in the caller's stack frame, while for N32/64, it is allocated in the
+    // callee's stack frame.
+    for (int StackOffset = RegSlotOffset;
+         Idx < NumOfRegs; ++Idx, StackOffset += RegSize) {
+      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegs[Idx], RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+                                            MVT::getIntegerVT(RegSize * 8));
+      LastFI = MFI->CreateFixedObject(RegSize, StackOffset, true);
       SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy());
       OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
-                                       MachinePointerInfo(),
-                                       false, false, 0));
+                                       MachinePointerInfo(), false, false, 0));
     }
   }
 
@@ -2447,8 +2944,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
-                             OutVals[i], Flag);
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
 
     // guarantee that all emitted copies are
     // stuck together, avoiding something bad
@@ -2505,7 +3001,6 @@ getConstraintType(const std::string &Constraint) const
       case 'y':
       case 'f':
         return C_RegisterClass;
-        break;
     }
   }
   return TargetLowering::getConstraintType(Constraint);
@@ -2553,14 +3048,19 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
     case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
     case 'y': // Same as 'r'. Exists for compatibility.
     case 'r':
-      return std::make_pair(0U, Mips::CPURegsRegisterClass);
+      if (VT == MVT::i32)
+        return std::make_pair(0U, Mips::CPURegsRegisterClass);
+      assert(VT == MVT::i64 && "Unexpected type.");
+      return std::make_pair(0U, Mips::CPU64RegsRegisterClass);
     case 'f':
       if (VT == MVT::f32)
         return std::make_pair(0U, Mips::FGR32RegisterClass);
-      if (VT == MVT::f64)
-        if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+      if ((VT == MVT::f64) && (!Subtarget->isSingleFloat())) {
+        if (Subtarget->isFP64bit())
+          return std::make_pair(0U, Mips::FGR64RegisterClass);
+        else
           return std::make_pair(0U, Mips::AFGR64RegisterClass);
-      break;
+      }
     }
   }
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
@@ -2579,3 +3079,10 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
     return false;
   return Imm.isZero();
 }
+
+unsigned MipsTargetLowering::getJumpTableEncoding() const {
+  if (IsN64)
+    return MachineJumpTableInfo::EK_GPRel64BlockAddress;
+
+  return TargetLowering::getJumpTableEncoding();
+}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 4be3fed59fc0..c36f40f639f3 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -15,10 +15,10 @@
 #ifndef MipsISELLOWERING_H
 #define MipsISELLOWERING_H
 
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
 #include "Mips.h"
 #include "MipsSubtarget.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   namespace MipsISD {
@@ -40,13 +40,6 @@ namespace llvm {
       // Handle gp_rel (small data/bss sections) relocation.
       GPRel,
 
-      // General Dynamic TLS
-      TlsGd,
-
-      // Local Exec TLS
-      TprelHi,
-      TprelLo,
-
       // Thread Pointer
       ThreadPointer,
 
@@ -79,7 +72,7 @@ namespace llvm {
       BuildPairF64,
       ExtractElementF64,
 
-      WrapperPIC,
+      Wrapper,
 
       DynAlloc,
 
@@ -98,6 +91,8 @@ namespace llvm {
   public:
     explicit MipsTargetLowering(MipsTargetMachine &TM);
 
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
     virtual bool allowsUnalignedMemoryAccesses (EVT VT) const;
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
@@ -114,8 +109,8 @@ namespace llvm {
   private:
     // Subtarget Info
     const MipsSubtarget *Subtarget;
-    
-    bool HasMips64, IsN64;
+
+    bool HasMips64, IsN64, IsO32;
 
     // Lower Operand helpers
     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
@@ -133,8 +128,10 @@ namespace llvm {
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG& DAG) const;
     SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const;
@@ -149,7 +146,7 @@ namespace llvm {
     virtual SDValue
       LowerCall(SDValue Chain, SDValue Callee,
                 CallingConv::ID CallConv, bool isVarArg,
-                bool &isTailCall,
+                bool doesNotRet, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<SDValue> &OutVals,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -186,6 +183,8 @@ namespace llvm {
     /// materialize the FP immediate as a load from a constant pool.
     virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
 
+    virtual unsigned getJumpTableEncoding() const;
+
     MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
                     unsigned Size, unsigned BinOpcode, bool Nand = false) const;
     MachineBasicBlock *EmitAtomicBinaryPartword(MachineInstr *MI,
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 1fb779d6bec1..b6559452fecf 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -1,4 +1,4 @@
-//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===//
+//===-- MipsInstrFPU.td - Mips FPU Instruction Information -*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -59,6 +59,15 @@ def NotFP64bit       : Predicate<"!Subtarget.isFP64bit()">;
 def IsSingleFloat    : Predicate<"Subtarget.isSingleFloat()">;
 def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
 
+// FP immediate patterns.
+def fpimm0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimm0neg : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-0.0);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // Instruction Class Templates
 //
@@ -74,19 +83,35 @@ def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
 //===----------------------------------------------------------------------===//
 
 // FP load.
-class FPLoad<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC,
-             Operand MemOpnd>:
+class FPLoad<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
   FMem<op, (outs RC:$ft), (ins MemOpnd:$addr),
-      !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (FOp addr:$addr))],
+      !strconcat(opstr, "\t$ft, $addr"), [(set RC:$ft, (load_a addr:$addr))],
       IILoad>;
 
 // FP store.
-class FPStore<bits<6> op, string opstr, PatFrag FOp, RegisterClass RC,
-              Operand MemOpnd>:
+class FPStore<bits<6> op, string opstr, RegisterClass RC, Operand MemOpnd>:
   FMem<op, (outs), (ins RC:$ft, MemOpnd:$addr),
-      !strconcat(opstr, "\t$ft, $addr"), [(store RC:$ft, addr:$addr)],
+      !strconcat(opstr, "\t$ft, $addr"), [(store_a RC:$ft, addr:$addr)],
       IIStore>;
 
+// FP indexed load.
+class FPIdxLoad<bits<6> funct, string opstr, RegisterClass DRC,
+                RegisterClass PRC, PatFrag FOp>:
+  FFMemIdx<funct, (outs DRC:$fd), (ins PRC:$base, PRC:$index),
+           !strconcat(opstr, "\t$fd, $index($base)"),
+           [(set DRC:$fd, (FOp (add PRC:$base, PRC:$index)))]> {
+  let fs = 0;
+}
+
+// FP indexed store.
+class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
+                 RegisterClass PRC, PatFrag FOp>:
+  FFMemIdx<funct, (outs), (ins DRC:$fs, PRC:$base, PRC:$index),
+           !strconcat(opstr, "\t$fs, $index($base)"),
+           [(FOp DRC:$fs, (add PRC:$base, PRC:$index))]> {
+  let fd = 0;
+}
+
 // Instructions that convert an FP value to 32-bit fixed point.
 multiclass FFR1_W_M<bits<6> funct, string opstr> {
   def _S   : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>;
@@ -122,6 +147,19 @@ multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> {
   }
 }
 
+// FP madd/msub/nmadd/nmsub instruction classes.
+class FMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr,
+               SDNode OpNode, RegisterClass RC> :
+  FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+            !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"),
+            [(set RC:$fd, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr))]>;
+
+class FNMADDSUB<bits<3> funct, bits<3> fmt, string opstr, string fmtstr,
+                SDNode OpNode, RegisterClass RC> :
+  FFMADDSUB<funct, fmt, (outs RC:$fd), (ins RC:$fr, RC:$fs, RC:$ft),
+            !strconcat(opstr, ".", fmtstr, "\t$fd, $fr, $fs, $ft"),
+            [(set RC:$fd, (fsub fpimm0, (OpNode (fmul RC:$fs, RC:$ft), RC:$fr)))]>;
+
 //===----------------------------------------------------------------------===//
 // Floating Point Instructions
 //===----------------------------------------------------------------------===//
@@ -152,8 +190,10 @@ let Predicates = [IsFP64bit] in {
  def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>;
 }
 
-defm FABS    : FFR1P_M<0x5, "abs",  fabs>;
-defm FNEG    : FFR1P_M<0x7, "neg",  fneg>;
+let Predicates = [NoNaNsFPMath] in {
+  defm FABS    : FFR1P_M<0x5, "abs",  fabs>;
+  defm FNEG    : FFR1P_M<0x7, "neg",  fneg>;
+}
 defm FSQRT   : FFR1P_M<0x4, "sqrt", fsqrt>;
 
 // The odd-numbered registers are only referenced when doing loads,
@@ -183,6 +223,14 @@ def MTC1  : FFRGPR<0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
                   "mtc1\t$rt, $fs",
                   [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>;
 
+def DMFC1 : FFRGPR<0x01, (outs CPU64Regs:$rt), (ins FGR64:$fs),
+                  "dmfc1\t$rt, $fs",
+                  [(set CPU64Regs:$rt, (bitconvert FGR64:$fs))]>;
+
+def DMTC1 : FFRGPR<0x05, (outs FGR64:$fs), (ins CPU64Regs:$rt),
+                  "dmtc1\t$rt, $fs",
+                  [(set FGR64:$fs, (bitconvert CPU64Regs:$rt))]>;
+
 def FMOV_S   : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>;
 def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>,
                Requires<[NotFP64bit]>;
@@ -191,23 +239,53 @@ def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>,
 
 /// Floating Point Memory Instructions
 let Predicates = [IsN64] in {
-  def LWC1_P8   : FPLoad<0x31, "lwc1", load, FGR32, mem64>;
-  def SWC1_P8   : FPStore<0x39, "swc1", store, FGR32, mem64>;
-  def LDC164_P8 : FPLoad<0x35, "ldc1", load, FGR64, mem64>;
-  def SDC164_P8 : FPStore<0x3d, "sdc1", store, FGR64, mem64>;
+  def LWC1_P8   : FPLoad<0x31, "lwc1", FGR32, mem64>;
+  def SWC1_P8   : FPStore<0x39, "swc1", FGR32, mem64>;
+  def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64>;
+  def SDC164_P8 : FPStore<0x3d, "sdc1", FGR64, mem64>;
 }
 
 let Predicates = [NotN64] in {
-  def LWC1   : FPLoad<0x31, "lwc1", load, FGR32, mem>;
-  def SWC1   : FPStore<0x39, "swc1", store, FGR32, mem>;
-  let Predicates = [HasMips64] in {
-    def LDC164 : FPLoad<0x35, "ldc1", load, FGR64, mem>;
-    def SDC164 : FPStore<0x3d, "sdc1", store, FGR64, mem>;
-  }
-  let Predicates = [NotMips64] in {
-    def LDC1   : FPLoad<0x35, "ldc1", load, AFGR64, mem>;
-    def SDC1   : FPStore<0x3d, "sdc1", store, AFGR64, mem>;
-  }
+  def LWC1   : FPLoad<0x31, "lwc1", FGR32, mem>;
+  def SWC1   : FPStore<0x39, "swc1", FGR32, mem>;
+}
+
+let Predicates = [NotN64, HasMips64] in {
+  def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>;
+  def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>;
+}
+
+let Predicates = [NotN64, NotMips64] in {
+  def LDC1   : FPLoad<0x35, "ldc1", AFGR64, mem>;
+  def SDC1   : FPStore<0x3d, "sdc1", AFGR64, mem>;
+}
+
+// Indexed loads and stores.
+let Predicates = [HasMips32r2Or64] in {
+  def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>;
+  def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>;
+  def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>;
+  def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>;
+}
+
+let Predicates = [HasMips32r2, NotMips64] in {
+  def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load_a>;
+  def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store_a>;
+}
+
+let Predicates = [HasMips64, NotN64] in {
+  def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load_a>;
+  def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store_a>;
+}
+
+// n64
+let Predicates = [IsN64] in {
+  def LWXC1_P8   : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>;
+  def LUXC1_P8   : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>;
+  def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>;
+  def SWXC1_P8   : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store_a>;
+  def SUXC1_P8   : FPIdxStore<0xd, "suxc1", FGR32, CPU64Regs, store_u>;
+  def SDXC164_P8 : FPIdxStore<0x9, "sdxc1", FGR64, CPU64Regs, store_a>;
 }
 
 /// Floating-point Aritmetic
@@ -216,6 +294,36 @@ defm FDIV : FFR2P_M<0x03, "div", fdiv>;
 defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>;
 defm FSUB : FFR2P_M<0x01, "sub", fsub>;
 
+let Predicates = [HasMips32r2] in {
+  def MADD_S : FMADDSUB<0x4, 0, "madd", "s", fadd, FGR32>;
+  def MSUB_S : FMADDSUB<0x5, 0, "msub", "s", fsub, FGR32>;
+}
+
+let Predicates = [HasMips32r2, NoNaNsFPMath] in {
+  def NMADD_S : FNMADDSUB<0x6, 0, "nmadd", "s", fadd, FGR32>;
+  def NMSUB_S : FNMADDSUB<0x7, 0, "nmsub", "s", fsub, FGR32>;
+}
+
+let Predicates = [HasMips32r2, NotFP64bit] in {
+  def MADD_D32 : FMADDSUB<0x4, 1, "madd", "d", fadd, AFGR64>;
+  def MSUB_D32 : FMADDSUB<0x5, 1, "msub", "d", fsub, AFGR64>;
+}
+
+let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath] in {
+  def NMADD_D32 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, AFGR64>;
+  def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>;
+}
+
+let Predicates = [HasMips32r2, IsFP64bit] in {
+  def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>;
+  def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>;
+}
+
+let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath] in {
+  def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>;
+  def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>;
+}
+
 //===----------------------------------------------------------------------===//
 // Floating Point Branch Codes
 //===----------------------------------------------------------------------===//
@@ -259,71 +367,16 @@ def MIPS_FCOND_NGE  : PatLeaf<(i32 13)>;
 def MIPS_FCOND_LE   : PatLeaf<(i32 14)>;
 def MIPS_FCOND_NGT  : PatLeaf<(i32 15)>;
 
+class FCMP<bits<5> fmt, RegisterClass RC, string typestr> :
+  FCC<fmt, (outs), (ins RC:$fs, RC:$ft, condcode:$cc),
+      !strconcat("c.$cc.", typestr, "\t$fs, $ft"),
+      [(MipsFPCmp RC:$fs, RC:$ft, imm:$cc)]>;
+
 /// Floating Point Compare
 let Defs=[FCR31] in {
-  def FCMP_S32 : FCC<0x10, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
-                     "c.$cc.s\t$fs, $ft",
-                     [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>;
-
-  def FCMP_D32 : FCC<0x11, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
-                     "c.$cc.d\t$fs, $ft",
-                     [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>,
-                     Requires<[NotFP64bit]>;
-}
-
-
-// Conditional moves:
-// These instructions are expanded in
-// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
-// conditional move instructions.
-// flag:int, data:float
-let usesCustomInserter = 1, Constraints = "$F = $dst" in
-class CondMovIntFP<RegisterClass RC, bits<5> fmt, bits<6> func,
-                   string instr_asm> :
-  FFR<0x11, func, fmt, (outs RC:$dst), (ins RC:$T, CPURegs:$cond, RC:$F),
-      !strconcat(instr_asm, "\t$dst, $T, $cond"), []>;
-
-def MOVZ_S : CondMovIntFP<FGR32, 16, 18, "movz.s">;
-def MOVN_S : CondMovIntFP<FGR32, 16, 19, "movn.s">;
-
-let Predicates = [NotFP64bit] in {
-  def MOVZ_D : CondMovIntFP<AFGR64, 17, 18, "movz.d">;
-  def MOVN_D : CondMovIntFP<AFGR64, 17, 19, "movn.d">;
-}
-
-defm : MovzPats<FGR32, MOVZ_S>;
-defm : MovnPats<FGR32, MOVN_S>;
-
-let Predicates = [NotFP64bit] in {
-  defm : MovzPats<AFGR64, MOVZ_D>;
-  defm : MovnPats<AFGR64, MOVN_D>;
-}
-
-let cc = 0, usesCustomInserter = 1, Uses = [FCR31],
-    Constraints = "$F = $dst" in {
-// flag:float, data:int
-class CondMovFPInt<SDNode cmov, bits<1> tf, string instr_asm> :
-  FCMOV<tf, (outs CPURegs:$dst), (ins CPURegs:$T, CPURegs:$F),
-        !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
-        [(set CPURegs:$dst, (cmov CPURegs:$T, CPURegs:$F))]>;
-
-// flag:float, data:float
-let cc = 0 in
-class CondMovFPFP<RegisterClass RC, SDNode cmov, bits<5> fmt, bits<1> tf,
-                  string instr_asm> :
-  FFCMOV<fmt, tf, (outs RC:$dst), (ins RC:$T, RC:$F),
-         !strconcat(instr_asm, "\t$dst, $T, $$fcc0"),
-         [(set RC:$dst, (cmov RC:$T, RC:$F))]>;
-}
-
-def MOVT : CondMovFPInt<MipsCMovFP_T, 1, "movt">;
-def MOVF : CondMovFPInt<MipsCMovFP_F, 0, "movf">;
-def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
-def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
-
-let Predicates = [NotFP64bit] in {
-  def MOVT_D : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
-  def MOVF_D : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
+  def FCMP_S32 : FCMP<0x10, FGR32, "s">;
+  def FCMP_D32 : FCMP<0x11, AFGR64, "d">, Requires<[NotFP64bit]>;
+  def FCMP_D64 : FCMP<0x11, FGR64, "d">, Requires<[IsFP64bit]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -352,25 +405,46 @@ def ExtractElementF64 :
 //===----------------------------------------------------------------------===//
 // Floating Point Patterns
 //===----------------------------------------------------------------------===//
-def fpimm0 : PatLeaf<(fpimm), [{
-  return N->isExactlyValue(+0.0);
-}]>;
-
-def fpimm0neg : PatLeaf<(fpimm), [{
-  return N->isExactlyValue(-0.0);
-}]>;
-
 def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
 def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
 
 def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
-def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>;
-
 def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
-def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
 
 let Predicates = [NotFP64bit] in {
+  def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>;
+  def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>;
   def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>;
   def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
 }
 
+let Predicates = [IsFP64bit] in {
+  def : Pat<(f64 fpimm0), (DMTC1 ZERO_64)>;
+  def : Pat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
+
+  def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D64_W (MTC1 CPURegs:$src))>;
+  def : Pat<(f32 (sint_to_fp CPU64Regs:$src)),
+            (CVT_S_L (DMTC1 CPU64Regs:$src))>;
+  def : Pat<(f64 (sint_to_fp CPU64Regs:$src)),
+            (CVT_D64_L (DMTC1 CPU64Regs:$src))>;
+
+  def : Pat<(i32 (fp_to_sint FGR64:$src)), (MFC1 (TRUNC_W_D64 FGR64:$src))>;
+  def : Pat<(i64 (fp_to_sint FGR32:$src)), (DMFC1 (TRUNC_L_S FGR32:$src))>;
+  def : Pat<(i64 (fp_to_sint FGR64:$src)), (DMFC1 (TRUNC_L_D64 FGR64:$src))>;
+
+  def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>;
+  def : Pat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>;
+}
+
+// Patterns for unaligned floating point loads and stores.
+let Predicates = [HasMips32r2Or64, NotN64] in {
+  def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>; 
+  def : Pat<(store_u FGR32:$src, CPURegs:$addr),
+            (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>;
+}
+
+let Predicates = [IsN64] in {
+  def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>; 
+  def : Pat<(store_u FGR32:$src, CPU64Regs:$addr),
+            (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>;
+}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index e1725fa867f0..455530389eba 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -1,4 +1,4 @@
-//===- MipsInstrFormats.td - Mips Instruction Formats ------*- tablegen -*-===//
+//===-- MipsInstrFormats.td - Mips Instruction Formats -----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -115,7 +115,7 @@ class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
   let Inst{15-0}  = imm16;
 }
 
-class CBranchBase<bits<6> op, dag outs, dag ins, string asmstr,
+class BranchBase<bits<6> op, dag outs, dag ins, string asmstr,
                   list<dag> pattern, InstrItinClass itin>:
   MipsInst<outs, ins, asmstr, pattern, itin, FrmI>
 {
@@ -290,3 +290,40 @@ class FFR2P<bits<6> funct, bits<5> fmt, string opstr,
   FFR<0x11, funct, fmt, (outs RC:$fd), (ins RC:$fs, RC:$ft),
       !strconcat(opstr, ".", fmtstr, "\t$fd, $fs, $ft"),
       [(set RC:$fd, (OpNode RC:$fs, RC:$ft))]>;
+
+// Floating point madd/msub/nmadd/nmsub.
+class FFMADDSUB<bits<3> funct, bits<3> fmt, dag outs, dag ins, string asmstr,
+                list<dag> pattern>
+  : MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther> {
+  bits<5> fd;
+  bits<5> fr;
+  bits<5> fs;
+  bits<5> ft;
+
+  let Opcode = 0x13;
+  let Inst{25-21} = fr;
+  let Inst{20-16} = ft;
+  let Inst{15-11} = fs;
+  let Inst{10-6} = fd;
+  let Inst{5-3} = funct;
+  let Inst{2-0} = fmt;
+}
+
+// FP indexed load/store instructions.
+class FFMemIdx<bits<6> funct, dag outs, dag ins, string asmstr,
+               list<dag> pattern> :
+  MipsInst<outs, ins, asmstr, pattern, NoItinerary, FrmOther>
+{
+  bits<5>  base;
+  bits<5>  index;
+  bits<5>  fs;
+  bits<5>  fd;
+
+  let Opcode = 0x13;
+
+  let Inst{25-21} = base;
+  let Inst{20-16} = index;
+  let Inst{15-11} = fs;
+  let Inst{10-6} = fd;
+  let Inst{5-0} = funct;
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 559943a8dbae..a3a18bff6553 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===//
+//===-- MipsInstrInfo.cpp - Mips Instruction Information ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -29,10 +29,10 @@ using namespace llvm;
 MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
   : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
     TM(tm), IsN64(TM.getSubtarget<MipsSubtarget>().isABI_N64()),
-    RI(*TM.getSubtargetImpl(), *this) {}
+    RI(*TM.getSubtargetImpl(), *this),
+    UncondBrOpc(TM.getRelocationModel() == Reloc::PIC_ ? Mips::B : Mips::J) {}
 
-
-const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { 
+const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const {
   return RI;
 }
 
@@ -131,6 +131,8 @@ copyPhysReg(MachineBasicBlock &MBB,
     Opc = Mips::FMOV_S;
   else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg))
     Opc = Mips::FMOV_D32;
+  else if (Mips::FGR64RegClass.contains(DestReg, SrcReg))
+    Opc = Mips::FMOV_D64;
   else if (Mips::CCRRegClass.contains(DestReg, SrcReg))
     Opc = Mips::MOVCCRToCCR;
   else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg.
@@ -140,18 +142,22 @@ copyPhysReg(MachineBasicBlock &MBB,
       Opc = Mips::MFHI64, SrcReg = 0;
     else if (SrcReg == Mips::LO64)
       Opc = Mips::MFLO64, SrcReg = 0;
+    else if (Mips::FGR64RegClass.contains(SrcReg))
+      Opc = Mips::DMFC1;
   }
   else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg.
     if (DestReg == Mips::HI64)
       Opc = Mips::MTHI64, DestReg = 0;
     else if (DestReg == Mips::LO64)
       Opc = Mips::MTLO64, DestReg = 0;
+    else if (Mips::FGR64RegClass.contains(DestReg))
+      Opc = Mips::DMTC1;
   }
 
   assert(Opc && "Cannot copy registers");
 
   MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc));
-  
+
   if (DestReg)
     MIB.addReg(DestReg, RegState::Define);
 
@@ -162,6 +168,16 @@ copyPhysReg(MachineBasicBlock &MBB,
     MIB.addReg(SrcReg, getKillRegState(KillSrc));
 }
 
+static MachineMemOperand* GetMemOperand(MachineBasicBlock &MBB, int FI,
+                                        unsigned Flag) {
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flag,
+                                 MFI.getObjectSize(FI), Align);
+}
+
 void MipsInstrInfo::
 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     unsigned SrcReg, bool isKill, int FI,
@@ -169,6 +185,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                     const TargetRegisterInfo *TRI) const {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
+  MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
+
   unsigned Opc = 0;
 
   if (RC == Mips::CPURegsRegisterClass)
@@ -184,7 +202,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
   assert(Opc && "Register class not handled!");
   BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
-    .addFrameIndex(FI).addImm(0);
+    .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
 }
 
 void MipsInstrInfo::
@@ -195,6 +213,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 {
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
+  MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
   unsigned Opc = 0;
 
   if (RC == Mips::CPURegsRegisterClass)
@@ -209,7 +228,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = IsN64 ? Mips::LDC164_P8 : Mips::LDC164;
 
   assert(Opc && "Register class not handled!");
-  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0);
+  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(0)
+    .addMemOperand(MMO);
 }
 
 MachineInstr*
@@ -230,7 +250,8 @@ static unsigned GetAnalyzableBrOpc(unsigned Opc) {
           Opc == Mips::BGEZ   || Opc == Mips::BLTZ   || Opc == Mips::BLEZ   ||
           Opc == Mips::BEQ64  || Opc == Mips::BNE64  || Opc == Mips::BGTZ64 ||
           Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
-          Opc == Mips::BC1T   || Opc == Mips::BC1F   || Opc == Mips::J) ?
+          Opc == Mips::BC1T   || Opc == Mips::BC1F   || Opc == Mips::B      ||
+          Opc == Mips::J) ?
          Opc : 0;
 }
 
@@ -239,21 +260,21 @@ static unsigned GetAnalyzableBrOpc(unsigned Opc) {
 unsigned Mips::GetOppositeBranchOpc(unsigned Opc)
 {
   switch (Opc) {
-  default: llvm_unreachable("Illegal opcode!");
-  case Mips::BEQ    : return Mips::BNE;
-  case Mips::BNE    : return Mips::BEQ;
-  case Mips::BGTZ   : return Mips::BLEZ;
-  case Mips::BGEZ   : return Mips::BLTZ;
-  case Mips::BLTZ   : return Mips::BGEZ;
-  case Mips::BLEZ   : return Mips::BGTZ;
-  case Mips::BEQ64  : return Mips::BNE64;
-  case Mips::BNE64  : return Mips::BEQ64;
-  case Mips::BGTZ64 : return Mips::BLEZ64;
-  case Mips::BGEZ64 : return Mips::BLTZ64;
-  case Mips::BLTZ64 : return Mips::BGEZ64;
-  case Mips::BLEZ64 : return Mips::BGTZ64;
-  case Mips::BC1T   : return Mips::BC1F;
-  case Mips::BC1F   : return Mips::BC1T;
+  default:           llvm_unreachable("Illegal opcode!");
+  case Mips::BEQ:    return Mips::BNE;
+  case Mips::BNE:    return Mips::BEQ;
+  case Mips::BGTZ:   return Mips::BLEZ;
+  case Mips::BGEZ:   return Mips::BLTZ;
+  case Mips::BLTZ:   return Mips::BGEZ;
+  case Mips::BLEZ:   return Mips::BGTZ;
+  case Mips::BEQ64:  return Mips::BNE64;
+  case Mips::BNE64:  return Mips::BEQ64;
+  case Mips::BGTZ64: return Mips::BLEZ64;
+  case Mips::BGEZ64: return Mips::BLTZ64;
+  case Mips::BLTZ64: return Mips::BGEZ64;
+  case Mips::BLEZ64: return Mips::BGTZ64;
+  case Mips::BC1T:   return Mips::BC1F;
+  case Mips::BC1F:   return Mips::BC1T;
   }
 }
 
@@ -262,7 +283,7 @@ static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc,
                           SmallVectorImpl<MachineOperand>& Cond) {
   assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch");
   int NumOp = Inst->getNumExplicitOperands();
-  
+
   // for both int and fp branches, the last explicit operand is the
   // MBB.
   BB = Inst->getOperand(NumOp-1).getMBB();
@@ -314,7 +335,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   // If there is only one terminator instruction, process it.
   if (!SecondLastOpc) {
     // Unconditional branch
-    if (LastOpc == Mips::J) {
+    if (LastOpc == UncondBrOpc) {
       TBB = LastInst->getOperand(0).getMBB();
       return false;
     }
@@ -331,7 +352,7 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
   // If second to last instruction is an unconditional branch,
   // analyze it and remove the last instruction.
-  if (SecondLastOpc == Mips::J) {
+  if (SecondLastOpc == UncondBrOpc) {
     // Return if the last instruction cannot be removed.
     if (!AllowModify)
       return true;
@@ -343,15 +364,15 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
   // Conditional branch followed by an unconditional branch.
   // The last one must be unconditional.
-  if (LastOpc != Mips::J)
+  if (LastOpc != UncondBrOpc)
     return true;
 
   AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond);
   FBB = LastInst->getOperand(0).getMBB();
 
   return false;
-} 
-  
+}
+
 void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
                                 MachineBasicBlock *TBB, DebugLoc DL,
                                 const SmallVectorImpl<MachineOperand>& Cond)
@@ -385,14 +406,14 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   // Two-way Conditional branch.
   if (FBB) {
     BuildCondBr(MBB, TBB, DL, Cond);
-    BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+    BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(FBB);
     return 2;
   }
 
   // One way branch.
   // Unconditional branch.
   if (Cond.empty())
-    BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+    BuildMI(&MBB, DL, get(UncondBrOpc)).addMBB(TBB);
   else // Conditional branch.
     BuildCondBr(MBB, TBB, DL, Cond);
   return 1;
@@ -433,27 +454,3 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
   return false;
 }
 
-/// getGlobalBaseReg - Return a virtual register initialized with the
-/// the global base register value. Output instructions required to
-/// initialize the register in the function entry block, if necessary.
-///
-unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
-  MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
-  unsigned GlobalBaseReg = MipsFI->getGlobalBaseReg();
-  if (GlobalBaseReg != 0)
-    return GlobalBaseReg;
-
-  // Insert the set of GlobalBaseReg into the first MBB of the function
-  MachineBasicBlock &FirstMBB = MF->front();
-  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
-  MachineRegisterInfo &RegInfo = MF->getRegInfo();
-  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
-  GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
-  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
-          GlobalBaseReg).addReg(Mips::GP);
-  RegInfo.addLiveIn(Mips::GP);
-
-  MipsFI->setGlobalBaseReg(GlobalBaseReg);
-  return GlobalBaseReg;
-}
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 271d2487ecfa..4be727dd8994 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -1,4 +1,4 @@
-//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
+//===-- MipsInstrInfo.h - Mips Instruction Information ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,9 +15,9 @@
 #define MIPSINSTRUCTIONINFO_H
 
 #include "Mips.h"
+#include "MipsRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "MipsRegisterInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "MipsGenInstrInfo.inc"
@@ -30,90 +30,11 @@ namespace Mips {
   unsigned GetOppositeBranchOpc(unsigned Opc);
 }
 
-/// MipsII - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace MipsII {
-  /// Target Operand Flag enum.
-  enum TOF {
-    //===------------------------------------------------------------------===//
-    // Mips Specific MachineOperand flags.
-
-    MO_NO_FLAG,
-
-    /// MO_GOT - Represents the offset into the global offset table at which
-    /// the address the relocation entry symbol resides during execution.
-    MO_GOT,
-
-    /// MO_GOT_CALL - Represents the offset into the global offset table at
-    /// which the address of a call site relocation entry symbol resides
-    /// during execution. This is different from the above since this flag
-    /// can only be present in call instructions.
-    MO_GOT_CALL,
-
-    /// MO_GPREL - Represents the offset from the current gp value to be used
-    /// for the relocatable object file being produced.
-    MO_GPREL,
-
-    /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol
-    /// address.
-    MO_ABS_HI,
-    MO_ABS_LO,
-
-    /// MO_TLSGD - Represents the offset into the global offset table at which
-    // the module ID and TSL block offset reside during execution (General
-    // Dynamic TLS).
-    MO_TLSGD,
-
-    /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial
-    // Exec TLS).
-    MO_GOTTPREL,
-
-    /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from
-    // the thread pointer (Local Exec TLS).
-    MO_TPREL_HI,
-    MO_TPREL_LO,
-
-    // N32/64 Flags.
-    MO_GPOFF_HI,
-    MO_GPOFF_LO,
-    MO_GOT_DISP,
-    MO_GOT_PAGE,
-    MO_GOT_OFST
-  };
-
-  enum {
-    //===------------------------------------------------------------------===//
-    // Instruction encodings.  These are the standard/most common forms for
-    // Mips instructions.
-    //
-
-    // Pseudo - This represents an instruction that is a pseudo instruction
-    // or one that has not been implemented yet.  It is illegal to code generate
-    // it, but tolerated for intermediate implementation stages.
-    Pseudo   = 0,
-
-    /// FrmR - This form is for instructions of the format R.
-    FrmR  = 1,
-    /// FrmI - This form is for instructions of the format I.
-    FrmI  = 2,
-    /// FrmJ - This form is for instructions of the format J.
-    FrmJ  = 3,
-    /// FrmFR - This form is for instructions of the format FR.
-    FrmFR = 4,
-    /// FrmFI - This form is for instructions of the format FI.
-    FrmFI = 5,
-    /// FrmOther - This form is for instructions that have no specific format.
-    FrmOther = 6,
-
-    FormMask = 15
-  };
-}
-
 class MipsInstrInfo : public MipsGenInstrInfo {
   MipsTargetMachine &TM;
   bool IsN64;
   const MipsRegisterInfo RI;
+  unsigned UncondBrOpc;
 public:
   explicit MipsInstrInfo(MipsTargetMachine &TM);
 
@@ -182,12 +103,6 @@ public:
   /// Insert nop instruction when hazard condition is found
   virtual void insertNoop(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI) const;
-
-  /// getGlobalBaseReg - Return a virtual register initialized with the
-  /// the global base register value. Output instructions required to
-  /// initialize the register in the function entry block, if necessary.
-  ///
-  unsigned getGlobalBaseReg(MachineFunction *MF) const;
 };
 
 }
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 3fbd41ef6a3b..be74f8e5230c 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -39,8 +39,8 @@ def SDT_MipsDivRem       : SDTypeProfile<0, 2,
 
 def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
 
-def SDT_MipsDynAlloc    : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
-                                               SDTCisVT<1, iPTR>]>;
+def SDT_MipsDynAlloc    : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>,
+                                               SDTCisSameAs<0, 1>]>;
 def SDT_Sync             : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
 
 def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
@@ -103,11 +103,11 @@ def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
 // target constant nodes that would otherwise remain unchanged with ADDiu
 // nodes. Without these wrapper node patterns, the following conditional move
 // instrucion is emitted when function cmov2 in test/CodeGen/Mips/cmov.ll is
-// compiled: 
+// compiled:
 //  movn  %got(d)($gp), %got(c)($gp), $4
 // This instruction is illegal since movn can take only register operands.
 
-def MipsWrapperPIC    : SDNode<"MipsISD::WrapperPIC",  SDTIntUnaryOp>;
+def MipsWrapper    : SDNode<"MipsISD::Wrapper", SDTIntBinOp>;
 
 // Pointer to dynamically allocated stack area.
 def MipsDynAlloc  : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
@@ -128,18 +128,31 @@ def HasCondMov  : Predicate<"Subtarget.hasCondMov()">;
 def HasMips32    : Predicate<"Subtarget.hasMips32()">;
 def HasMips32r2  : Predicate<"Subtarget.hasMips32r2()">;
 def HasMips64    : Predicate<"Subtarget.hasMips64()">;
+def HasMips32r2Or64 : Predicate<"Subtarget.hasMips32r2Or64()">;
 def NotMips64    : Predicate<"!Subtarget.hasMips64()">;
 def HasMips64r2  : Predicate<"Subtarget.hasMips64r2()">;
 def IsN64       : Predicate<"Subtarget.isABI_N64()">;
 def NotN64      : Predicate<"!Subtarget.isABI_N64()">;
+def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def RelocPIC    : Predicate<"TM.getRelocationModel() == Reloc::PIC_">;
+def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
 
 //===----------------------------------------------------------------------===//
 // Mips Operand, Complex Patterns and Transformations Definitions.
 //===----------------------------------------------------------------------===//
 
 // Instruction operand types
-def brtarget    : Operand<OtherVT>;
-def calltarget  : Operand<i32>;
+def jmptarget   : Operand<OtherVT> {
+  let EncoderMethod = "getJumpTargetOpValue";
+}
+def brtarget    : Operand<OtherVT> {
+  let EncoderMethod = "getBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
+}
+def calltarget  : Operand<iPTR> {
+  let EncoderMethod = "getJumpTargetOpValue";
+}
+def calltarget64: Operand<i64>;
 def simm16      : Operand<i32>;
 def simm16_64   : Operand<i64>;
 def shamt       : Operand<i32>;
@@ -167,6 +180,12 @@ def mem_ea : Operand<i32> {
   let EncoderMethod = "getMemEncoding";
 }
 
+def mem_ea_64 : Operand<i64> {
+  let PrintMethod = "printMemOperandEA";
+  let MIOperandInfo = (ops CPU64Regs, simm16_64);
+  let EncoderMethod = "getMemEncoding";
+}
+
 // size operand of ext instruction
 def size_ext : Operand<i32> {
   let EncoderMethod = "getSizeExtEncoding";
@@ -179,12 +198,12 @@ def size_ins : Operand<i32> {
 
 // Transformation Function - get the lower 16 bits.
 def LO16 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+  return getImm(N, N->getZExtValue() & 0xFFFF);
 }]>;
 
 // Transformation Function - get the higher 16 bits.
 def HI16 : SDNodeXForm<imm, [{
-  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+  return getImm(N, (N->getZExtValue() >> 16) & 0xFFFF);
 }]>;
 
 // Node immediate fits as 16-bit sign extended on target immediate.
@@ -202,36 +221,42 @@ def immZExt16  : PatLeaf<(imm), [{
     return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
 }], LO16>;
 
-// shamt field must fit in 5 bits.
-def immZExt5 : PatLeaf<(imm), [{
-  return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+// Immediate can be loaded with LUi (32-bit int with lower 16-bit cleared).
+def immLow16Zero : PatLeaf<(imm), [{
+  int64_t Val = N->getSExtValue();
+  return isInt<32>(Val) && !(Val & 0xffff);
 }]>;
 
+// shamt field must fit in 5 bits.
+def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
+
 // Mips Address Mode! SDNode frameindex could possibily be a match
 // since load and store instructions from stack used it.
-def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], [SDNPWantParent]>;
 
 //===----------------------------------------------------------------------===//
 // Pattern fragment for load/store
 //===----------------------------------------------------------------------===//
-class UnalignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{
+class UnalignedLoad<PatFrag Node> :
+  PatFrag<(ops node:$ptr), (Node node:$ptr), [{
   LoadSDNode *LD = cast<LoadSDNode>(N);
   return LD->getMemoryVT().getSizeInBits()/8 > LD->getAlignment();
 }]>;
 
-class AlignedLoad<PatFrag Node> : PatFrag<(ops node:$ptr), (Node node:$ptr), [{
+class AlignedLoad<PatFrag Node> :
+  PatFrag<(ops node:$ptr), (Node node:$ptr), [{
   LoadSDNode *LD = cast<LoadSDNode>(N);
   return LD->getMemoryVT().getSizeInBits()/8 <= LD->getAlignment();
 }]>;
 
-class UnalignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr),
-                                             (Node node:$val, node:$ptr), [{
+class UnalignedStore<PatFrag Node> :
+  PatFrag<(ops node:$val, node:$ptr), (Node node:$val, node:$ptr), [{
   StoreSDNode *SD = cast<StoreSDNode>(N);
   return SD->getMemoryVT().getSizeInBits()/8 > SD->getAlignment();
 }]>;
 
-class AlignedStore<PatFrag Node> : PatFrag<(ops node:$val, node:$ptr),
-                                           (Node node:$val, node:$ptr), [{
+class AlignedStore<PatFrag Node> :
+  PatFrag<(ops node:$val, node:$ptr), (Node node:$val, node:$ptr), [{
   StoreSDNode *SD = cast<StoreSDNode>(N);
   return SD->getMemoryVT().getSizeInBits()/8 <= SD->getAlignment();
 }]>;
@@ -313,27 +338,34 @@ class LogicNOR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
 }
 
 // Shifts
-class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
-                              SDNode OpNode>:
-  FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rt, shamt:$shamt),
+class shift_rotate_imm<bits<6> func, bits<5> isRotate, string instr_asm,
+                       SDNode OpNode, PatFrag PF, Operand ImmOpnd,
+                       RegisterClass RC>:
+  FR<0x00, func, (outs RC:$rd), (ins RC:$rt, ImmOpnd:$shamt),
      !strconcat(instr_asm, "\t$rd, $rt, $shamt"),
-     [(set CPURegs:$rd, (OpNode CPURegs:$rt, (i32 immZExt5:$shamt)))], IIAlu> {
-  let rs = _rs;
+     [(set RC:$rd, (OpNode RC:$rt, PF:$shamt))], IIAlu> {
+  let rs = isRotate;
 }
 
-class LogicR_shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm,
-                              SDNode OpNode>:
-  FR<0x00, func, (outs CPURegs:$rd), (ins CPURegs:$rs, CPURegs:$rt),
+// 32-bit shift instructions.
+class shift_rotate_imm32<bits<6> func, bits<5> isRotate, string instr_asm,
+                         SDNode OpNode>:
+  shift_rotate_imm<func, isRotate, instr_asm, OpNode, immZExt5, shamt, CPURegs>;
+
+class shift_rotate_reg<bits<6> func, bits<5> isRotate, string instr_asm,
+                       SDNode OpNode, RegisterClass RC>:
+  FR<0x00, func, (outs RC:$rd), (ins CPURegs:$rs, RC:$rt),
      !strconcat(instr_asm, "\t$rd, $rt, $rs"),
-     [(set CPURegs:$rd, (OpNode CPURegs:$rt, CPURegs:$rs))], IIAlu> {
+     [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs))], IIAlu> {
   let shamt = isRotate;
 }
 
 // Load Upper Imediate
-class LoadUpper<bits<6> op, string instr_asm>:
-  FI<op, (outs CPURegs:$rt), (ins uimm16:$imm16),
+class LoadUpper<bits<6> op, string instr_asm, RegisterClass RC, Operand Imm>:
+  FI<op, (outs RC:$rt), (ins Imm:$imm16),
      !strconcat(instr_asm, "\t$rt, $imm16"), [], IIAlu> {
   let rs = 0;
+  let neverHasSideEffects = 1;
 }
 
 class FMem<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
@@ -361,6 +393,14 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
   let isPseudo = Pseudo;
 }
 
+// Unaligned Memory Load/Store
+let canFoldAsLoad = 1 in
+class LoadUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
+  FMem<op, (outs RC:$rt), (ins MemOpnd:$addr), "", [], IILoad> {}
+
+class StoreUnAlign<bits<6> op, RegisterClass RC, Operand MemOpnd>:
+  FMem<op, (outs), (ins RC:$rt, MemOpnd:$addr), "", [], IIStore> {}
+
 // 32-bit load.
 multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
                    bit Pseudo = 0> {
@@ -368,7 +408,7 @@ multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
                Requires<[NotN64]>;
   def _P8    : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
                Requires<[IsN64]>;
-} 
+}
 
 // 64-bit load.
 multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
@@ -377,8 +417,15 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
                Requires<[NotN64]>;
   def _P8    : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
                Requires<[IsN64]>;
-} 
+}
 
+// 32-bit load.
+multiclass LoadUnAlign32<bits<6> op> {
+  def #NAME# : LoadUnAlign<op, CPURegs, mem>,
+               Requires<[NotN64]>;
+  def _P8    : LoadUnAlign<op, CPURegs, mem64>,
+               Requires<[IsN64]>;
+}
 // 32-bit store.
 multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
                     bit Pseudo = 0> {
@@ -397,11 +444,19 @@ multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
                Requires<[IsN64]>;
 }
 
+// 32-bit store.
+multiclass StoreUnAlign32<bits<6> op> {
+  def #NAME# : StoreUnAlign<op, CPURegs, mem>,
+               Requires<[NotN64]>;
+  def _P8    : StoreUnAlign<op, CPURegs, mem64>,
+               Requires<[IsN64]>;
+}
+
 // Conditional Branch
 class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
-  CBranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
-              !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
-              [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
+  BranchBase<op, (outs), (ins RC:$rs, RC:$rt, brtarget:$imm16),
+             !strconcat(instr_asm, "\t$rs, $rt, $imm16"),
+             [(brcond (i32 (cond_op RC:$rs, RC:$rt)), bb:$imm16)], IIBranch> {
   let isBranch = 1;
   let isTerminator = 1;
   let hasDelaySlot = 1;
@@ -409,9 +464,9 @@ class CBranch<bits<6> op, string instr_asm, PatFrag cond_op, RegisterClass RC>:
 
 class CBranchZero<bits<6> op, bits<5> _rt, string instr_asm, PatFrag cond_op,
                   RegisterClass RC>:
-  CBranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
-              !strconcat(instr_asm, "\t$rs, $imm16"),
-              [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
+  BranchBase<op, (outs), (ins RC:$rs, brtarget:$imm16),
+             !strconcat(instr_asm, "\t$rs, $imm16"),
+             [(brcond (i32 (cond_op RC:$rs, 0)), bb:$imm16)], IIBranch> {
   let rt = _rt;
   let isBranch = 1;
   let isTerminator = 1;
@@ -435,146 +490,228 @@ class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
      [(set CPURegs:$rt, (cond_op RC:$rs, imm_type:$imm16))],
      IIAlu>;
 
-// Unconditional branch
-let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+// Jump
 class JumpFJ<bits<6> op, string instr_asm>:
-  FJ<op, (outs), (ins brtarget:$target),
-     !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
+  FJ<op, (outs), (ins jmptarget:$target),
+     !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch> {
+  let isBranch=1;
+  let isTerminator=1;
+  let isBarrier=1;
+  let hasDelaySlot = 1;
+  let Predicates = [RelocStatic];
+}
+
+// Unconditional branch
+class UncondBranch<bits<6> op, string instr_asm>:
+  BranchBase<op, (outs), (ins brtarget:$imm16),
+             !strconcat(instr_asm, "\t$imm16"), [(br bb:$imm16)], IIBranch> {
+  let rs = 0;
+  let rt = 0;
+  let isBranch = 1;
+  let isTerminator = 1;
+  let isBarrier = 1;
+  let hasDelaySlot = 1;
+  let Predicates = [RelocPIC];
+}
 
-let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
-class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
-  FR<op, func, (outs), (ins CPURegs:$rs),
-     !strconcat(instr_asm, "\t$rs"), [(brind CPURegs:$rs)], IIBranch> {
+let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1,
+    isIndirectBranch = 1 in
+class JumpFR<bits<6> op, bits<6> func, string instr_asm, RegisterClass RC>:
+  FR<op, func, (outs), (ins RC:$rs),
+     !strconcat(instr_asm, "\t$rs"), [(brind RC:$rs)], IIBranch> {
   let rt = 0;
   let rd = 0;
   let shamt = 0;
 }
 
 // Jump and Link (Call)
-let isCall=1, hasDelaySlot=1,
-  // All calls clobber the non-callee saved registers...
-  Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
-          K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
+let isCall=1, hasDelaySlot=1 in {
   class JumpLink<bits<6> op, string instr_asm>:
     FJ<op, (outs), (ins calltarget:$target, variable_ops),
        !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
        IIBranch>;
 
-  class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
-    FR<op, func, (outs), (ins CPURegs:$rs, variable_ops),
-       !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch> {
+  class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm,
+                    RegisterClass RC>:
+    FR<op, func, (outs), (ins RC:$rs, variable_ops),
+       !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink RC:$rs)], IIBranch> {
     let rt = 0;
     let rd = 31;
     let shamt = 0;
   }
 
-  class BranchLink<string instr_asm>:
-    FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$imm16, variable_ops),
-       !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch>;
+  class BranchLink<string instr_asm, bits<5> _rt, RegisterClass RC>:
+    FI<0x1, (outs), (ins RC:$rs, brtarget:$imm16, variable_ops),
+       !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch> {
+    let rt = _rt;
+  }
 }
 
 // Mul, Div
-class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
-  FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+class Mult<bits<6> func, string instr_asm, InstrItinClass itin,
+           RegisterClass RC, list<Register> DefRegs>:
+  FR<0x00, func, (outs), (ins RC:$rs, RC:$rt),
      !strconcat(instr_asm, "\t$rs, $rt"), [], itin> {
   let rd = 0;
   let shamt = 0;
   let isCommutable = 1;
-  let Defs = [HI, LO];
+  let Defs = DefRegs;
+  let neverHasSideEffects = 1;
 }
 
-class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
-          FR<0x00, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
-          !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
-          [(op CPURegs:$rs, CPURegs:$rt)], itin> {
+class Mult32<bits<6> func, string instr_asm, InstrItinClass itin>:
+  Mult<func, instr_asm, itin, CPURegs, [HI, LO]>;
+
+class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin,
+          RegisterClass RC, list<Register> DefRegs>:
+  FR<0x00, func, (outs), (ins RC:$rs, RC:$rt),
+     !strconcat(instr_asm, "\t$$zero, $rs, $rt"),
+     [(op RC:$rs, RC:$rt)], itin> {
   let rd = 0;
   let shamt = 0;
-  let Defs = [HI, LO];
+  let Defs = DefRegs;
 }
 
+class Div32<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+  Div<op, func, instr_asm, itin, CPURegs, [HI, LO]>;
+
 // Move from Hi/Lo
-class MoveFromLOHI<bits<6> func, string instr_asm>:
-  FR<0x00, func, (outs CPURegs:$rd), (ins),
+class MoveFromLOHI<bits<6> func, string instr_asm, RegisterClass RC,
+                   list<Register> UseRegs>:
+  FR<0x00, func, (outs RC:$rd), (ins),
      !strconcat(instr_asm, "\t$rd"), [], IIHiLo> {
   let rs = 0;
   let rt = 0;
   let shamt = 0;
+  let Uses = UseRegs;
+  let neverHasSideEffects = 1;
 }
 
-class MoveToLOHI<bits<6> func, string instr_asm>:
-  FR<0x00, func, (outs), (ins CPURegs:$rs),
+class MoveToLOHI<bits<6> func, string instr_asm, RegisterClass RC,
+                 list<Register> DefRegs>:
+  FR<0x00, func, (outs), (ins RC:$rs),
      !strconcat(instr_asm, "\t$rs"), [], IIHiLo> {
   let rt = 0;
   let rd = 0;
   let shamt = 0;
+  let Defs = DefRegs;
+  let neverHasSideEffects = 1;
 }
 
-class EffectiveAddress<string instr_asm> :
-  FMem<0x09, (outs CPURegs:$rt), (ins mem_ea:$addr),
-     instr_asm, [(set CPURegs:$rt, addr:$addr)], IIAlu>;
+class EffectiveAddress<string instr_asm, RegisterClass RC, Operand Mem> :
+  FMem<0x09, (outs RC:$rt), (ins Mem:$addr),
+     instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>;
 
 // Count Leading Ones/Zeros in Word
-class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>:
-  FR<0x1c, func, (outs CPURegs:$rd), (ins CPURegs:$rs),
-     !strconcat(instr_asm, "\t$rd, $rs"), pattern, IIAlu>,
+class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>:
+  FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
+     !strconcat(instr_asm, "\t$rd, $rs"),
+     [(set RC:$rd, (ctlz RC:$rs))], IIAlu>,
+     Requires<[HasBitCount]> {
+  let shamt = 0;
+  let rt = rd;
+}
+
+class CountLeading1<bits<6> func, string instr_asm, RegisterClass RC>:
+  FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
+     !strconcat(instr_asm, "\t$rd, $rs"),
+     [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu>,
      Requires<[HasBitCount]> {
   let shamt = 0;
   let rt = rd;
 }
 
 // Sign Extend in Register.
-class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt>:
-  FR<0x1f, 0x20, (outs CPURegs:$rd), (ins CPURegs:$rt),
+class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt,
+                   RegisterClass RC>:
+  FR<0x1f, 0x20, (outs RC:$rd), (ins RC:$rt),
      !strconcat(instr_asm, "\t$rd, $rt"),
-     [(set CPURegs:$rd, (sext_inreg CPURegs:$rt, vt))], NoItinerary> {
+     [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> {
   let rs = 0;
   let shamt = sa;
   let Predicates = [HasSEInReg];
 }
 
-// Byte Swap
-class ByteSwap<bits<6> func, bits<5> sa, string instr_asm>:
-  FR<0x1f, func, (outs CPURegs:$rd), (ins CPURegs:$rt),
-     !strconcat(instr_asm, "\t$rd, $rt"),
-     [(set CPURegs:$rd, (bswap CPURegs:$rt))], NoItinerary> {
+// Subword Swap
+class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>:
+  FR<0x1f, func, (outs RC:$rd), (ins RC:$rt),
+     !strconcat(instr_asm, "\t$rd, $rt"), [], NoItinerary> {
   let rs = 0;
   let shamt = sa;
   let Predicates = [HasSwap];
+  let neverHasSideEffects = 1;
 }
 
 // Read Hardware
-class ReadHardware: FR<0x1f, 0x3b, (outs CPURegs:$rt), (ins HWRegs:$rd),
-    "rdhwr\t$rt, $rd", [], IIAlu> {
+class ReadHardware<RegisterClass CPURegClass, RegisterClass HWRegClass>
+  : FR<0x1f, 0x3b, (outs CPURegClass:$rt), (ins HWRegClass:$rd),
+       "rdhwr\t$rt, $rd", [], IIAlu> {
   let rs = 0;
   let shamt = 0;
 }
 
 // Ext and Ins
-class ExtIns<bits<6> _funct, string instr_asm, dag outs, dag ins,
-             list<dag> pattern, InstrItinClass itin>:
-  FR<0x1f, _funct, outs, ins, !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
-     pattern, itin>, Requires<[HasMips32r2]> {
+class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
+  FR<0x1f, _funct, (outs RC:$rt), (ins RC:$rs, uimm16:$pos, size_ext:$sz),
+     !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+     [(set RC:$rt, (MipsExt RC:$rs, imm:$pos, imm:$sz))], NoItinerary> {
+  bits<5> pos;
+  bits<5> sz;
+  let rd = sz;
+  let shamt = pos;
+  let Predicates = [HasMips32r2];
+}
+
+class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
+  FR<0x1f, _funct, (outs RC:$rt),
+     (ins RC:$rs, uimm16:$pos, size_ins:$sz, RC:$src),
+     !strconcat(instr_asm, " $rt, $rs, $pos, $sz"),
+     [(set RC:$rt, (MipsIns RC:$rs, imm:$pos, imm:$sz, RC:$src))],
+     NoItinerary> {
   bits<5> pos;
   bits<5> sz;
   let rd = sz;
   let shamt = pos;
+  let Predicates = [HasMips32r2];
+  let Constraints = "$src = $rt";
 }
 
 // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*).
-class Atomic2Ops<PatFrag Op, string Opstr> :
-  MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr),
+class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC,
+                 RegisterClass PRC> :
+  MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr),
              !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"),
-             [(set CPURegs:$dst,
-              (Op CPURegs:$ptr, CPURegs:$incr))]>;
+             [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>;
+
+multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
+  def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>, Requires<[NotN64]>;
+  def _P8    : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>, Requires<[IsN64]>;
+}
 
 // Atomic Compare & Swap.
-class AtomicCmpSwap<PatFrag Op, string Width> :
-  MipsPseudo<(outs CPURegs:$dst), 
-             (ins CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap),
-             !strconcat("atomic_cmp_swap_", Width, 
-                        "\t$dst, $ptr, $cmp, $swap"),
-             [(set CPURegs:$dst,
-              (Op CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap))]>;
+class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC,
+                    RegisterClass PRC> :
+  MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap),
+             !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"),
+             [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>;
+
+multiclass AtomicCmpSwap32<PatFrag Op, string Width>  {
+  def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>, Requires<[NotN64]>;
+  def _P8    : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>, Requires<[IsN64]>;
+}
+
+class LLBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
+  FMem<Opc, (outs RC:$rt), (ins Mem:$addr),
+       !strconcat(opstring, "\t$rt, $addr"), [], IILoad> {
+  let mayLoad = 1;
+}
+
+class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
+  FMem<Opc, (outs RC:$dst), (ins RC:$rt, Mem:$addr),
+       !strconcat(opstring, "\t$rt, $addr"), [], IIStore> {
+  let mayStore = 1;
+  let Constraints = "$rt = $dst";
+}
 
 //===----------------------------------------------------------------------===//
 // Pseudo instructions
@@ -590,52 +727,64 @@ def ADJCALLSTACKUP   : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
                                   [(callseq_end timm:$amt1, timm:$amt2)]>;
 }
 
-// Some assembly macros need to avoid pseudoinstructions and assembler
-// automatic reodering, we should reorder ourselves.
-def MACRO     : MipsPseudo<(outs), (ins), ".set\tmacro",     []>;
-def REORDER   : MipsPseudo<(outs), (ins), ".set\treorder",   []>;
-def NOMACRO   : MipsPseudo<(outs), (ins), ".set\tnomacro",   []>;
-def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
-
-// These macros are inserted to prevent GAS from complaining
-// when using the AT register.
-def NOAT      : MipsPseudo<(outs), (ins), ".set\tnoat", []>;
-def ATMACRO   : MipsPseudo<(outs), (ins), ".set\tat", []>;
-
 // When handling PIC code the assembler needs .cpload and .cprestore
 // directives. If the real instructions corresponding these directives
 // are used, we have the same behavior, but get also a bunch of warnings
 // from the assembler.
-def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
-def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>;
+let neverHasSideEffects = 1 in
+def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc, CPURegs:$gp),
+                           ".cprestore\t$loc", []>;
+
+// For O32 ABI & PIC & non-fixed global base register, the following instruction
+// seqeunce is emitted to set the global base register:
+//
+//  0. lui   $2, %hi(_gp_disp)
+//  1. addiu $2, $2, %lo(_gp_disp)
+//  2. addu  $globalbasereg, $2, $t9
+//
+// SETGP01 is emitted during Prologue/Epilogue insertion and then converted to
+// instructions 0 and 1 in the sequence above during MC lowering.
+// SETGP2 is emitted just before register allocation and converted to
+// instruction 2 just prior to post-RA scheduling.
+//
+// These pseudo instructions are needed to ensure no instructions are inserted
+// before or between instructions 0 and 1, which is a limitation imposed by
+// GNU linker.
+
+let isTerminator = 1, isBarrier = 1 in
+def SETGP01 : MipsPseudo<(outs CPURegs:$dst), (ins), "", []>;
+
+let neverHasSideEffects = 1 in
+def SETGP2 : MipsPseudo<(outs CPURegs:$globalreg), (ins CPURegs:$picreg), "",
+                        []>;
 
 let usesCustomInserter = 1 in {
-  def ATOMIC_LOAD_ADD_I8   : Atomic2Ops<atomic_load_add_8, "load_add_8">;
-  def ATOMIC_LOAD_ADD_I16  : Atomic2Ops<atomic_load_add_16, "load_add_16">;
-  def ATOMIC_LOAD_ADD_I32  : Atomic2Ops<atomic_load_add_32, "load_add_32">;
-  def ATOMIC_LOAD_SUB_I8   : Atomic2Ops<atomic_load_sub_8, "load_sub_8">;
-  def ATOMIC_LOAD_SUB_I16  : Atomic2Ops<atomic_load_sub_16, "load_sub_16">;
-  def ATOMIC_LOAD_SUB_I32  : Atomic2Ops<atomic_load_sub_32, "load_sub_32">;
-  def ATOMIC_LOAD_AND_I8   : Atomic2Ops<atomic_load_and_8, "load_and_8">;
-  def ATOMIC_LOAD_AND_I16  : Atomic2Ops<atomic_load_and_16, "load_and_16">;
-  def ATOMIC_LOAD_AND_I32  : Atomic2Ops<atomic_load_and_32, "load_and_32">;
-  def ATOMIC_LOAD_OR_I8    : Atomic2Ops<atomic_load_or_8, "load_or_8">;
-  def ATOMIC_LOAD_OR_I16   : Atomic2Ops<atomic_load_or_16, "load_or_16">;
-  def ATOMIC_LOAD_OR_I32   : Atomic2Ops<atomic_load_or_32, "load_or_32">;
-  def ATOMIC_LOAD_XOR_I8   : Atomic2Ops<atomic_load_xor_8, "load_xor_8">;
-  def ATOMIC_LOAD_XOR_I16  : Atomic2Ops<atomic_load_xor_16, "load_xor_16">;
-  def ATOMIC_LOAD_XOR_I32  : Atomic2Ops<atomic_load_xor_32, "load_xor_32">;
-  def ATOMIC_LOAD_NAND_I8  : Atomic2Ops<atomic_load_nand_8, "load_nand_8">;
-  def ATOMIC_LOAD_NAND_I16 : Atomic2Ops<atomic_load_nand_16, "load_nand_16">;
-  def ATOMIC_LOAD_NAND_I32 : Atomic2Ops<atomic_load_nand_32, "load_nand_32">;
-
-  def ATOMIC_SWAP_I8       : Atomic2Ops<atomic_swap_8, "swap_8">;
-  def ATOMIC_SWAP_I16      : Atomic2Ops<atomic_swap_16, "swap_16">;
-  def ATOMIC_SWAP_I32      : Atomic2Ops<atomic_swap_32, "swap_32">;
-
-  def ATOMIC_CMP_SWAP_I8   : AtomicCmpSwap<atomic_cmp_swap_8, "8">;
-  def ATOMIC_CMP_SWAP_I16  : AtomicCmpSwap<atomic_cmp_swap_16, "16">;
-  def ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap<atomic_cmp_swap_32, "32">;
+  defm ATOMIC_LOAD_ADD_I8   : Atomic2Ops32<atomic_load_add_8, "load_add_8">;
+  defm ATOMIC_LOAD_ADD_I16  : Atomic2Ops32<atomic_load_add_16, "load_add_16">;
+  defm ATOMIC_LOAD_ADD_I32  : Atomic2Ops32<atomic_load_add_32, "load_add_32">;
+  defm ATOMIC_LOAD_SUB_I8   : Atomic2Ops32<atomic_load_sub_8, "load_sub_8">;
+  defm ATOMIC_LOAD_SUB_I16  : Atomic2Ops32<atomic_load_sub_16, "load_sub_16">;
+  defm ATOMIC_LOAD_SUB_I32  : Atomic2Ops32<atomic_load_sub_32, "load_sub_32">;
+  defm ATOMIC_LOAD_AND_I8   : Atomic2Ops32<atomic_load_and_8, "load_and_8">;
+  defm ATOMIC_LOAD_AND_I16  : Atomic2Ops32<atomic_load_and_16, "load_and_16">;
+  defm ATOMIC_LOAD_AND_I32  : Atomic2Ops32<atomic_load_and_32, "load_and_32">;
+  defm ATOMIC_LOAD_OR_I8    : Atomic2Ops32<atomic_load_or_8, "load_or_8">;
+  defm ATOMIC_LOAD_OR_I16   : Atomic2Ops32<atomic_load_or_16, "load_or_16">;
+  defm ATOMIC_LOAD_OR_I32   : Atomic2Ops32<atomic_load_or_32, "load_or_32">;
+  defm ATOMIC_LOAD_XOR_I8   : Atomic2Ops32<atomic_load_xor_8, "load_xor_8">;
+  defm ATOMIC_LOAD_XOR_I16  : Atomic2Ops32<atomic_load_xor_16, "load_xor_16">;
+  defm ATOMIC_LOAD_XOR_I32  : Atomic2Ops32<atomic_load_xor_32, "load_xor_32">;
+  defm ATOMIC_LOAD_NAND_I8  : Atomic2Ops32<atomic_load_nand_8, "load_nand_8">;
+  defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32<atomic_load_nand_16, "load_nand_16">;
+  defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32<atomic_load_nand_32, "load_nand_32">;
+
+  defm ATOMIC_SWAP_I8       : Atomic2Ops32<atomic_swap_8, "swap_8">;
+  defm ATOMIC_SWAP_I16      : Atomic2Ops32<atomic_swap_16, "swap_16">;
+  defm ATOMIC_SWAP_I32      : Atomic2Ops32<atomic_swap_32, "swap_32">;
+
+  defm ATOMIC_CMP_SWAP_I8   : AtomicCmpSwap32<atomic_cmp_swap_8, "8">;
+  defm ATOMIC_CMP_SWAP_I16  : AtomicCmpSwap32<atomic_cmp_swap_16, "16">;
+  defm ATOMIC_CMP_SWAP_I32  : AtomicCmpSwap32<atomic_cmp_swap_32, "32">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -654,7 +803,7 @@ def SLTiu   : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>;
 def ANDi    : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>;
 def ORi     : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>;
 def XORi    : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>;
-def LUi     : LoadUpper<0x0f, "lui">;
+def LUi     : LoadUpper<0x0f, "lui", CPURegs, uimm16>;
 
 /// Arithmetic Instructions (3-Operand, R-Type)
 def ADDu    : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>;
@@ -669,17 +818,17 @@ def XOR     : ArithLogicR<0x00, 0x26, "xor", xor, IIAlu, CPURegs, 1>;
 def NOR     : LogicNOR<0x00, 0x27, "nor", CPURegs>;
 
 /// Shift Instructions
-def SLL     : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>;
-def SRL     : LogicR_shift_rotate_imm<0x02, 0x00, "srl", srl>;
-def SRA     : LogicR_shift_rotate_imm<0x03, 0x00, "sra", sra>;
-def SLLV    : LogicR_shift_rotate_reg<0x04, 0x00, "sllv", shl>;
-def SRLV    : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>;
-def SRAV    : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>;
+def SLL     : shift_rotate_imm32<0x00, 0x00, "sll", shl>;
+def SRL     : shift_rotate_imm32<0x02, 0x00, "srl", srl>;
+def SRA     : shift_rotate_imm32<0x03, 0x00, "sra", sra>;
+def SLLV    : shift_rotate_reg<0x04, 0x00, "sllv", shl, CPURegs>;
+def SRLV    : shift_rotate_reg<0x06, 0x00, "srlv", srl, CPURegs>;
+def SRAV    : shift_rotate_reg<0x07, 0x00, "srav", sra, CPURegs>;
 
 // Rotate Instructions
 let Predicates = [HasMips32r2] in {
-    def ROTR    : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>;
-    def ROTRV   : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>;
+    def ROTR    : shift_rotate_imm32<0x02, 0x01, "rotr", rotr>;
+    def ROTRV   : shift_rotate_reg<0x06, 0x01, "rotrv", rotr, CPURegs>;
 }
 
 /// Load and Store Instructions
@@ -700,6 +849,12 @@ defm ULW     : LoadM32<0x23, "ulw",  load_u, 1>;
 defm USH     : StoreM32<0x29, "ush", truncstorei16_u, 1>;
 defm USW     : StoreM32<0x2b, "usw", store_u, 1>;
 
+/// Primitives for unaligned
+defm LWL     : LoadUnAlign32<0x22>;
+defm LWR     : LoadUnAlign32<0x26>;
+defm SWL     : StoreUnAlign32<0x2A>;
+defm SWR     : StoreUnAlign32<0x2E>;
+
 let hasSideEffects = 1 in
 def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
                     [(MipsSync imm:$stype)], NoItinerary, FrmOther>
@@ -712,19 +867,15 @@ def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype",
 }
 
 /// Load-linked, Store-conditional
-let mayLoad = 1 in
-  def LL    : FMem<0x30, (outs CPURegs:$rt), (ins mem:$addr),
-              "ll\t$rt, $addr", [], IILoad>;
-let mayStore = 1, Constraints = "$rt = $dst" in
-  def SC    : FMem<0x38, (outs CPURegs:$dst), (ins CPURegs:$rt, mem:$addr),
-              "sc\t$rt, $addr", [], IIStore>;
+def LL    : LLBase<0x30, "ll", CPURegs, mem>, Requires<[NotN64]>;
+def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, Requires<[IsN64]>;
+def SC    : SCBase<0x38, "sc", CPURegs, mem>, Requires<[NotN64]>;
+def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]>;
 
 /// Jump and Branch Instructions
 def J       : JumpFJ<0x02, "j">;
-let isIndirectBranch = 1 in
-  def JR      : JumpFR<0x00, 0x08, "jr">;
-def JAL     : JumpLink<0x03, "jal">;
-def JALR    : JumpLinkReg<0x00, 0x09, "jalr">;
+def JR      : JumpFR<0x00, 0x08, "jr", CPURegs>;
+def B       : UncondBranch<0x04, "b">;
 def BEQ     : CBranch<0x04, "beq", seteq, CPURegs>;
 def BNE     : CBranch<0x05, "bne", setne, CPURegs>;
 def BGEZ    : CBranchZero<0x01, 1, "bgez", setge, CPURegs>;
@@ -732,10 +883,10 @@ def BGTZ    : CBranchZero<0x07, 0, "bgtz", setgt, CPURegs>;
 def BLEZ    : CBranchZero<0x06, 0, "blez", setle, CPURegs>;
 def BLTZ    : CBranchZero<0x01, 0, "bltz", setlt, CPURegs>;
 
-let rt=0x11 in
-  def BGEZAL  : BranchLink<"bgezal">;
-let rt=0x10 in
-  def BLTZAL  : BranchLink<"bltzal">;
+def JAL  : JumpLink<0x03, "jal">;
+def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>;
+def BGEZAL  : BranchLink<"bgezal", 0x11, CPURegs>;
+def BLTZAL  : BranchLink<"bltzal", 0x10, CPURegs>;
 
 let isReturn=1, isTerminator=1, hasDelaySlot=1,
     isBarrier=1, hasCtrlDep=1, rd=0, rt=0, shamt=0 in
@@ -743,50 +894,26 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1,
                 "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
 
 /// Multiply and Divide Instructions.
-def MULT    : Mul<0x18, "mult", IIImul>;
-def MULTu   : Mul<0x19, "multu", IIImul>;
-def SDIV    : Div<MipsDivRem, 0x1a, "div", IIIdiv>;
-def UDIV    : Div<MipsDivRemU, 0x1b, "divu", IIIdiv>;
+def MULT    : Mult32<0x18, "mult", IIImul>;
+def MULTu   : Mult32<0x19, "multu", IIImul>;
+def SDIV    : Div32<MipsDivRem, 0x1a, "div", IIIdiv>;
+def UDIV    : Div32<MipsDivRemU, 0x1b, "divu", IIIdiv>;
 
-let Defs = [HI] in
-  def MTHI  : MoveToLOHI<0x11, "mthi">;
-let Defs = [LO] in
-  def MTLO  : MoveToLOHI<0x13, "mtlo">;
-
-let Uses = [HI] in
-  def MFHI  : MoveFromLOHI<0x10, "mfhi">;
-let Uses = [LO] in
-  def MFLO  : MoveFromLOHI<0x12, "mflo">;
+def MTHI : MoveToLOHI<0x11, "mthi", CPURegs, [HI]>;
+def MTLO : MoveToLOHI<0x13, "mtlo", CPURegs, [LO]>;
+def MFHI : MoveFromLOHI<0x10, "mfhi", CPURegs, [HI]>;
+def MFLO : MoveFromLOHI<0x12, "mflo", CPURegs, [LO]>;
 
 /// Sign Ext In Register Instructions.
-def SEB : SignExtInReg<0x10, "seb", i8>;
-def SEH : SignExtInReg<0x18, "seh", i16>;
+def SEB : SignExtInReg<0x10, "seb", i8, CPURegs>;
+def SEH : SignExtInReg<0x18, "seh", i16, CPURegs>;
 
 /// Count Leading
-def CLZ : CountLeading<0x20, "clz",
-                       [(set CPURegs:$rd, (ctlz CPURegs:$rs))]>;
-def CLO : CountLeading<0x21, "clo",
-                       [(set CPURegs:$rd, (ctlz (not CPURegs:$rs)))]>;
-
-/// Byte Swap
-def WSBW : ByteSwap<0x20, 0x2, "wsbw">;
-
-// Conditional moves:
-// These instructions are expanded in
-// MipsISelLowering::EmitInstrWithCustomInserter if target does not have
-// conditional move instructions.
-// flag:int, data:int
-class CondMovIntInt<bits<6> funct, string instr_asm> :
-  FR<0, funct, (outs CPURegs:$rd),
-     (ins CPURegs:$rs, CPURegs:$rt, CPURegs:$F),
-     !strconcat(instr_asm, "\t$rd, $rs, $rt"), [], NoItinerary> {
-  let shamt = 0;
-  let usesCustomInserter = 1;
-  let Constraints = "$F = $rd";
-}
+def CLZ : CountLeading0<0x20, "clz", CPURegs>;
+def CLO : CountLeading1<0x21, "clo", CPURegs>;
 
-def MOVZ_I : CondMovIntInt<0x0a, "movz">;
-def MOVN_I : CondMovIntInt<0x0b, "movn">;
+/// Word Swap Bytes Within Halfwords
+def WSBH : SubwordSwap<0x20, 0x2, "wsbh", CPURegs>;
 
 /// No operation
 let addr=0 in
@@ -796,13 +923,13 @@ let addr=0 in
 // instructions. The same not happens for stack address copies, so an
 // add op with mem ComplexPattern is used and the stack address copy
 // can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr">;
+def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>;
 
 // DynAlloc node points to dynamically allocated stack space.
 // $sp is added to the list of implicitly used registers to prevent dead code
 // elimination from removing instructions that modify $sp.
 let Uses = [SP] in
-def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr">;
+def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>;
 
 // MADD*/MSUB*
 def MADD  : MArithR<0, "madd", MipsMAdd, 1>;
@@ -815,21 +942,10 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>;
 def MUL   : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
             Requires<[HasMips32]>;
 
-def RDHWR : ReadHardware;
+def RDHWR : ReadHardware<CPURegs, HWRegs>;
 
-def EXT : ExtIns<0, "ext", (outs CPURegs:$rt),
-                 (ins CPURegs:$rs, uimm16:$pos, size_ext:$sz),
-                 [(set CPURegs:$rt,
-                   (MipsExt CPURegs:$rs, immZExt5:$pos, immZExt5:$sz))],
-                 NoItinerary>;
-
-let Constraints = "$src = $rt" in
-def INS : ExtIns<4, "ins", (outs CPURegs:$rt),
-                 (ins CPURegs:$rs, uimm16:$pos, size_ins:$sz, CPURegs:$src),
-                 [(set CPURegs:$rt,
-                   (MipsIns CPURegs:$rs, immZExt5:$pos, immZExt5:$sz,
-                    CPURegs:$src))],
-                 NoItinerary>;
+def EXT : ExtBase<0, "ext", CPURegs>;
+def INS : InsBase<4, "ins", CPURegs>;
 
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
@@ -840,6 +956,8 @@ def : Pat<(i32 immSExt16:$in),
           (ADDiu ZERO, imm:$in)>;
 def : Pat<(i32 immZExt16:$in),
           (ORi ZERO, imm:$in)>;
+def : Pat<(i32 immLow16Zero:$in),
+          (LUi (HI16 imm:$in))>;
 
 // Arbitrary immediates
 def : Pat<(i32 imm:$imm),
@@ -864,22 +982,26 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
 // hi/lo relocs
 def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
 def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
+def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(MipsHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
+
 def : Pat<(MipsLo tglobaladdr:$in), (ADDiu ZERO, tglobaladdr:$in)>;
 def : Pat<(MipsLo tblockaddress:$in), (ADDiu ZERO, tblockaddress:$in)>;
+def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
+def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
+def : Pat<(MipsLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
+
 def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
           (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
 def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
           (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
-
-def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
-def : Pat<(MipsLo tjumptable:$in), (ADDiu ZERO, tjumptable:$in)>;
 def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
           (ADDiu CPURegs:$hi, tjumptable:$lo)>;
-
-def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
-def : Pat<(MipsLo tconstpool:$in), (ADDiu ZERO, tconstpool:$in)>;
 def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
           (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaltlsaddr:$lo)),
+          (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
 
 // gp_rel relocs
 def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
@@ -887,39 +1009,45 @@ def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
 def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
           (ADDiu CPURegs:$gp, tconstpool:$in)>;
 
-// tlsgd
-def : Pat<(add CPURegs:$gp, (MipsTlsGd tglobaltlsaddr:$in)),
-          (ADDiu CPURegs:$gp, tglobaltlsaddr:$in)>;
-
-// tprel hi/lo
-def : Pat<(MipsTprelHi tglobaltlsaddr:$in), (LUi tglobaltlsaddr:$in)>;
-def : Pat<(MipsTprelLo tglobaltlsaddr:$in), (ADDiu ZERO, tglobaltlsaddr:$in)>;
-def : Pat<(add CPURegs:$hi, (MipsTprelLo tglobaltlsaddr:$lo)),
-          (ADDiu CPURegs:$hi, tglobaltlsaddr:$lo)>;
-
 // wrapper_pic
-class WrapperPICPat<SDNode node>:
-      Pat<(MipsWrapperPIC node:$in),
-          (ADDiu GP, node:$in)>;
+class WrapperPat<SDNode node, Instruction ADDiuOp, RegisterClass RC>:
+      Pat<(MipsWrapper RC:$gp, node:$in),
+          (ADDiuOp RC:$gp, node:$in)>;
 
-def : WrapperPICPat<tglobaladdr>;
-def : WrapperPICPat<tconstpool>;
-def : WrapperPICPat<texternalsym>;
-def : WrapperPICPat<tblockaddress>;
-def : WrapperPICPat<tjumptable>;
+def : WrapperPat<tglobaladdr, ADDiu, CPURegs>;
+def : WrapperPat<tconstpool, ADDiu, CPURegs>;
+def : WrapperPat<texternalsym, ADDiu, CPURegs>;
+def : WrapperPat<tblockaddress, ADDiu, CPURegs>;
+def : WrapperPat<tjumptable, ADDiu, CPURegs>;
+def : WrapperPat<tglobaltlsaddr, ADDiu, CPURegs>;
 
 // Mips does not have "not", so we expand our way
 def : Pat<(not CPURegs:$in),
           (NOR CPURegs:$in, ZERO)>;
 
-// extended load and stores
-def : Pat<(extloadi1  addr:$src), (LBu addr:$src)>;
-def : Pat<(extloadi8  addr:$src), (LBu addr:$src)>;
-def : Pat<(extloadi16_a addr:$src), (LHu addr:$src)>;
-def : Pat<(extloadi16_u addr:$src), (ULHu addr:$src)>;
+// extended loads
+let Predicates = [NotN64] in {
+  def : Pat<(i32 (extloadi1  addr:$src)), (LBu addr:$src)>;
+  def : Pat<(i32 (extloadi8  addr:$src)), (LBu addr:$src)>;
+  def : Pat<(i32 (extloadi16_a addr:$src)), (LHu addr:$src)>;
+  def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu addr:$src)>;
+}
+let Predicates = [IsN64] in {
+  def : Pat<(i32 (extloadi1  addr:$src)), (LBu_P8 addr:$src)>;
+  def : Pat<(i32 (extloadi8  addr:$src)), (LBu_P8 addr:$src)>;
+  def : Pat<(i32 (extloadi16_a addr:$src)), (LHu_P8 addr:$src)>;
+  def : Pat<(i32 (extloadi16_u addr:$src)), (ULHu_P8 addr:$src)>;
+}
 
 // peepholes
-def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+let Predicates = [NotN64] in {
+  def : Pat<(store_a (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+  def : Pat<(store_u (i32 0), addr:$dst), (USW ZERO, addr:$dst)>;
+}
+let Predicates = [IsN64] in {
+  def : Pat<(store_a (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
+  def : Pat<(store_u (i32 0), addr:$dst), (USW_P8 ZERO, addr:$dst)>;
+}
 
 // brcond patterns
 multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
@@ -950,38 +1078,6 @@ def : Pat<(brcond RC:$cond, bb:$dst),
 
 defm : BrcondPats<CPURegs, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
 
-// select patterns
-multiclass MovzPats<RegisterClass RC, Instruction MOVZInst> {
-  def : Pat<(select (i32 (setge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
-            (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
-  def : Pat<(select (i32 (setuge CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
-            (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
-  def : Pat<(select (i32 (setge CPURegs:$lhs, immSExt16:$rhs)), RC:$T, RC:$F),
-            (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>;
-  def : Pat<(select (i32 (setuge CPURegs:$lh, immSExt16:$rh)), RC:$T, RC:$F),
-            (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>;
-  def : Pat<(select (i32 (setle CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
-            (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
-  def : Pat<(select (i32 (setule CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
-            (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>;
-  def : Pat<(select (i32 (seteq CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
-            (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
-  def : Pat<(select (i32 (seteq CPURegs:$lhs, 0)), RC:$T, RC:$F),
-            (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>;
-}
-
-multiclass MovnPats<RegisterClass RC, Instruction MOVNInst> {
-  def : Pat<(select (i32 (setne CPURegs:$lhs, CPURegs:$rhs)), RC:$T, RC:$F),
-            (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>;
-  def : Pat<(select CPURegs:$cond, RC:$T, RC:$F),
-            (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>;
-  def : Pat<(select (i32 (setne CPURegs:$lhs, 0)), RC:$T, RC:$F),
-            (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>;
-}
-
-defm : MovzPats<CPURegs, MOVZ_I>;
-defm : MovnPats<CPURegs, MOVN_I>;
-
 // setcc patterns
 multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp,
                      Instruction SLTuOp, Register ZEROReg> {
@@ -1029,10 +1125,14 @@ defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
 // select MipsDynAlloc
 def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
 
+// bswap pattern
+def : Pat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
+
 //===----------------------------------------------------------------------===//
 // Floating Point Support
 //===----------------------------------------------------------------------===//
 
 include "MipsInstrFPU.td"
 include "Mips64InstrInfo.td"
+include "MipsCondMov.td"
 
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index e3f6a753c406..76ca3e176727 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -1,4 +1,4 @@
-//===- MipsJITInfo.cpp - Implement the JIT interfaces for the Mips target -===//
+//===-- MipsJITInfo.cpp - Implement the Mips JIT Interface ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -200,7 +200,7 @@ void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
     intptr_t ResultPtr = (intptr_t) MR->getResultPointer();
 
     switch ((Mips::RelocationType) MR->getRelocationType()) {
-    case Mips::reloc_mips_branch:
+    case Mips::reloc_mips_pc16:
       ResultPtr = (((ResultPtr - (intptr_t) RelocPos) - 4) >> 2) & 0xffff;
       *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
       break;
@@ -218,13 +218,16 @@ void MipsJITInfo::relocate(void *Function, MachineRelocation *MR,
       *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
       break;
 
-    case Mips::reloc_mips_lo:
-      ResultPtr = ResultPtr & 0xffff;
+    case Mips::reloc_mips_lo: {
+      // Addend is needed for unaligned load/store instructions, where offset
+      // for the second load/store in the expanded instruction sequence must
+      // be modified by +1 or +3. Otherwise, Addend is 0.
+      int Addend = *((unsigned*) RelocPos) & 0xffff;
+      ResultPtr = (ResultPtr + Addend) & 0xffff;
+      *((unsigned*) RelocPos) &= 0xffff0000;
       *((unsigned*) RelocPos) |= (unsigned) ResultPtr;
       break;
-
-    default:
-      llvm_unreachable("ERROR: Unknown Mips relocation.");
+    }
     }
   }
 }
diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h
index 41f32a35f1b0..f4c4ae86d38d 100644
--- a/lib/Target/Mips/MipsJITInfo.h
+++ b/lib/Target/Mips/MipsJITInfo.h
@@ -1,4 +1,4 @@
-//===- MipsJITInfo.h - Mips implementation of the JIT interface -*- C++ -*-===//
+//===- MipsJITInfo.h - Mips Implementation of the JIT Interface -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,8 +19,6 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/Target/TargetJITInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
 class MipsTargetMachine;
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 608a7d21a4f9..1597b9334450 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -15,99 +15,179 @@
 #include "MipsMCInstLower.h"
 #include "MipsAsmPrinter.h"
 #include "MipsInstrInfo.h"
-#include "MipsMCSymbolRefExpr.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/Mangler.h"
+
 using namespace llvm;
 
-MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
-                                 MipsAsmPrinter &asmprinter)
-  : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
+MipsMCInstLower::MipsMCInstLower(MipsAsmPrinter &asmprinter)
+  : AsmPrinter(asmprinter) {}
+
+void MipsMCInstLower::Initialize(Mangler *M, MCContext* C) {
+  Mang = M;
+  Ctx = C;
+}
 
 MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
                                               MachineOperandType MOTy,
                                               unsigned Offset) const {
-  MipsMCSymbolRefExpr::VariantKind Kind;
+  MCSymbolRefExpr::VariantKind Kind;
   const MCSymbol *Symbol;
 
   switch(MO.getTargetFlags()) {
-  default:                  assert(0 && "Invalid target flag!");
-  case MipsII::MO_NO_FLAG:  Kind = MipsMCSymbolRefExpr::VK_Mips_None; break;
-  case MipsII::MO_GPREL:    Kind = MipsMCSymbolRefExpr::VK_Mips_GPREL; break;
-  case MipsII::MO_GOT_CALL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_CALL; break;
-  case MipsII::MO_GOT:      Kind = MipsMCSymbolRefExpr::VK_Mips_GOT; break;
-  case MipsII::MO_ABS_HI:   Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_HI; break;
-  case MipsII::MO_ABS_LO:   Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_LO; break;
-  case MipsII::MO_TLSGD:    Kind = MipsMCSymbolRefExpr::VK_Mips_TLSGD; break;
-  case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break;
-  case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break;
-  case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break;
-  case MipsII::MO_GPOFF_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_HI; break;
-  case MipsII::MO_GPOFF_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_LO; break;
-  case MipsII::MO_GOT_DISP: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_DISP; break;
-  case MipsII::MO_GOT_PAGE: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_PAGE; break;
-  case MipsII::MO_GOT_OFST: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_OFST; break;
+  default:                   llvm_unreachable("Invalid target flag!");
+  case MipsII::MO_NO_FLAG:   Kind = MCSymbolRefExpr::VK_None; break;
+  case MipsII::MO_GPREL:     Kind = MCSymbolRefExpr::VK_Mips_GPREL; break;
+  case MipsII::MO_GOT_CALL:  Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+  case MipsII::MO_GOT16:     Kind = MCSymbolRefExpr::VK_Mips_GOT16; break;
+  case MipsII::MO_GOT:       Kind = MCSymbolRefExpr::VK_Mips_GOT; break;
+  case MipsII::MO_ABS_HI:    Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break;
+  case MipsII::MO_ABS_LO:    Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break;
+  case MipsII::MO_TLSGD:     Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break;
+  case MipsII::MO_TLSLDM:    Kind = MCSymbolRefExpr::VK_Mips_TLSLDM; break;
+  case MipsII::MO_DTPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_DTPREL_HI; break;
+  case MipsII::MO_DTPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_DTPREL_LO; break;
+  case MipsII::MO_GOTTPREL:  Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break;
+  case MipsII::MO_TPREL_HI:  Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break;
+  case MipsII::MO_TPREL_LO:  Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break;
+  case MipsII::MO_GPOFF_HI:  Kind = MCSymbolRefExpr::VK_Mips_GPOFF_HI; break;
+  case MipsII::MO_GPOFF_LO:  Kind = MCSymbolRefExpr::VK_Mips_GPOFF_LO; break;
+  case MipsII::MO_GOT_DISP:  Kind = MCSymbolRefExpr::VK_Mips_GOT_DISP; break;
+  case MipsII::MO_GOT_PAGE:  Kind = MCSymbolRefExpr::VK_Mips_GOT_PAGE; break;
+  case MipsII::MO_GOT_OFST:  Kind = MCSymbolRefExpr::VK_Mips_GOT_OFST; break;
   }
 
   switch (MOTy) {
-    case MachineOperand::MO_MachineBasicBlock:
-      Symbol = MO.getMBB()->getSymbol();
-      break;
-
-    case MachineOperand::MO_GlobalAddress:
-      Symbol = Mang->getSymbol(MO.getGlobal());
-      break;
-
-    case MachineOperand::MO_BlockAddress:
-      Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
-      break;
-
-    case MachineOperand::MO_ExternalSymbol:
-      Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
-      break;
-
-    case MachineOperand::MO_JumpTableIndex:
-      Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
-      break;
-
-    case MachineOperand::MO_ConstantPoolIndex:
-      Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
-      if (MO.getOffset())
-        Offset += MO.getOffset();  
-      break;
-
-    default:
-      llvm_unreachable("<unknown operand type>");
+  case MachineOperand::MO_MachineBasicBlock:
+    Symbol = MO.getMBB()->getSymbol();
+    break;
+
+  case MachineOperand::MO_GlobalAddress:
+    Symbol = Mang->getSymbol(MO.getGlobal());
+    break;
+
+  case MachineOperand::MO_BlockAddress:
+    Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+    break;
+
+  case MachineOperand::MO_ExternalSymbol:
+    Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+
+  case MachineOperand::MO_JumpTableIndex:
+    Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+    break;
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+    if (MO.getOffset())
+      Offset += MO.getOffset();
+    break;
+
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+
+  const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, *Ctx);
+
+  if (!Offset)
+    return MCOperand::CreateExpr(MCSym);
+
+  // Assume offset is never negative.
+  assert(Offset > 0);
+
+  const MCConstantExpr *OffsetExpr =  MCConstantExpr::Create(Offset, *Ctx);
+  const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, *Ctx);
+  return MCOperand::CreateExpr(AddExpr);
+}
+
+static void CreateMCInst(MCInst& Inst, unsigned Opc, const MCOperand& Opnd0,
+                         const MCOperand& Opnd1,
+                         const MCOperand& Opnd2 = MCOperand()) {
+  Inst.setOpcode(Opc);
+  Inst.addOperand(Opnd0);
+  Inst.addOperand(Opnd1);
+  if (Opnd2.isValid())
+    Inst.addOperand(Opnd2);
+}
+
+// Lower ".cpload $reg" to
+//  "lui   $gp, %hi(_gp_disp)"
+//  "addiu $gp, $gp, %lo(_gp_disp)"
+//  "addu  $gp, $gp, $t9"
+void MipsMCInstLower::LowerCPLOAD(SmallVector<MCInst, 4>& MCInsts) {
+  MCOperand GPReg = MCOperand::CreateReg(Mips::GP);
+  MCOperand T9Reg = MCOperand::CreateReg(Mips::T9);
+  StringRef SymName("_gp_disp");
+  const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName);
+  const MCSymbolRefExpr *MCSym;
+
+  MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx);
+  MCOperand SymHi = MCOperand::CreateExpr(MCSym);
+  MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx);
+  MCOperand SymLo = MCOperand::CreateExpr(MCSym);
+
+  MCInsts.resize(3);
+
+  CreateMCInst(MCInsts[0], Mips::LUi, GPReg, SymHi);
+  CreateMCInst(MCInsts[1], Mips::ADDiu, GPReg, GPReg, SymLo);
+  CreateMCInst(MCInsts[2], Mips::ADDu, GPReg, GPReg, T9Reg);
+}
+
+// Lower ".cprestore offset" to "sw $gp, offset($sp)".
+void MipsMCInstLower::LowerCPRESTORE(int64_t Offset,
+                                     SmallVector<MCInst, 4>& MCInsts) {
+  assert(isInt<32>(Offset) && (Offset >= 0) &&
+         "Imm operand of .cprestore must be a non-negative 32-bit value.");
+
+  MCOperand SPReg = MCOperand::CreateReg(Mips::SP), BaseReg = SPReg;
+  MCOperand GPReg = MCOperand::CreateReg(Mips::GP);
+
+  if (!isInt<16>(Offset)) {
+    unsigned Hi = ((Offset + 0x8000) >> 16) & 0xffff;
+    Offset &= 0xffff;
+    MCOperand ATReg = MCOperand::CreateReg(Mips::AT);
+    BaseReg = ATReg;
+
+    // lui   at,hi
+    // addu  at,at,sp
+    MCInsts.resize(2);
+    CreateMCInst(MCInsts[0], Mips::LUi, ATReg, MCOperand::CreateImm(Hi));
+    CreateMCInst(MCInsts[1], Mips::ADDu, ATReg, ATReg, SPReg);
   }
-  
-  return MCOperand::CreateExpr(MipsMCSymbolRefExpr::Create(Kind, Symbol, Offset,
-                                                           Ctx));
+
+  MCInst Sw;
+  CreateMCInst(Sw, Mips::SW, GPReg, BaseReg, MCOperand::CreateImm(Offset));
+  MCInsts.push_back(Sw);
 }
 
-MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const {
+MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO,
+                                        unsigned offset) const {
   MachineOperandType MOTy = MO.getType();
-  
+
   switch (MOTy) {
-  default:
-    assert(0 && "unknown operand type");
-    break;
+  default: llvm_unreachable("unknown operand type");
   case MachineOperand::MO_Register:
     // Ignore all implicit register operands.
     if (MO.isImplicit()) break;
     return MCOperand::CreateReg(MO.getReg());
   case MachineOperand::MO_Immediate:
-    return MCOperand::CreateImm(MO.getImm());
+    return MCOperand::CreateImm(MO.getImm() + offset);
   case MachineOperand::MO_MachineBasicBlock:
   case MachineOperand::MO_GlobalAddress:
   case MachineOperand::MO_ExternalSymbol:
   case MachineOperand::MO_JumpTableIndex:
   case MachineOperand::MO_ConstantPoolIndex:
   case MachineOperand::MO_BlockAddress:
-    return LowerSymbolOperand(MO, MOTy, 0);
+    return LowerSymbolOperand(MO, MOTy, offset);
+  case MachineOperand::MO_RegisterMask:
+    break;
  }
 
   return MCOperand();
@@ -115,7 +195,7 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const {
 
 void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   OutMI.setOpcode(MI->getOpcode());
-  
+
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     MCOperand MCOp = LowerOperand(MO);
@@ -124,3 +204,140 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       OutMI.addOperand(MCOp);
   }
 }
+
+void MipsMCInstLower::LowerUnalignedLoadStore(const MachineInstr *MI,
+                                              SmallVector<MCInst,
+                                              4>& MCInsts) {
+  unsigned Opc = MI->getOpcode();
+  MCInst Instr1, Instr2, Instr3, Move;
+
+  bool TwoInstructions = false;
+
+  assert(MI->getNumOperands() == 3);
+  assert(MI->getOperand(0).isReg());
+  assert(MI->getOperand(1).isReg());
+
+  MCOperand Target = LowerOperand(MI->getOperand(0));
+  MCOperand Base = LowerOperand(MI->getOperand(1));
+  MCOperand ATReg = MCOperand::CreateReg(Mips::AT);
+  MCOperand ZeroReg = MCOperand::CreateReg(Mips::ZERO);
+
+  MachineOperand UnLoweredName = MI->getOperand(2);
+  MCOperand Name = LowerOperand(UnLoweredName);
+
+  Move.setOpcode(Mips::ADDu);
+  Move.addOperand(Target);
+  Move.addOperand(ATReg);
+  Move.addOperand(ZeroReg);
+
+  switch (Opc) {
+  case Mips::ULW: {
+    // FIXME: only works for little endian right now
+    MCOperand AdjName = LowerOperand(UnLoweredName, 3);
+    if (Base.getReg() == (Target.getReg())) {
+      Instr1.setOpcode(Mips::LWL);
+      Instr1.addOperand(ATReg);
+      Instr1.addOperand(Base);
+      Instr1.addOperand(AdjName);
+      Instr2.setOpcode(Mips::LWR);
+      Instr2.addOperand(ATReg);
+      Instr2.addOperand(Base);
+      Instr2.addOperand(Name);
+      Instr3 = Move;
+    } else {
+      TwoInstructions = true;
+      Instr1.setOpcode(Mips::LWL);
+      Instr1.addOperand(Target);
+      Instr1.addOperand(Base);
+      Instr1.addOperand(AdjName);
+      Instr2.setOpcode(Mips::LWR);
+      Instr2.addOperand(Target);
+      Instr2.addOperand(Base);
+      Instr2.addOperand(Name);
+    }
+    break;
+  }
+  case Mips::ULHu: {
+    // FIXME: only works for little endian right now
+    MCOperand AdjName = LowerOperand(UnLoweredName, 1);
+    Instr1.setOpcode(Mips::LBu);
+    Instr1.addOperand(ATReg);
+    Instr1.addOperand(Base);
+    Instr1.addOperand(AdjName);
+    Instr2.setOpcode(Mips::LBu);
+    Instr2.addOperand(Target);
+    Instr2.addOperand(Base);
+    Instr2.addOperand(Name);
+    Instr3.setOpcode(Mips::INS);
+    Instr3.addOperand(Target);
+    Instr3.addOperand(ATReg);
+    Instr3.addOperand(MCOperand::CreateImm(0x8));
+    Instr3.addOperand(MCOperand::CreateImm(0x18));
+    break;
+  }
+
+  case Mips::USW: {
+    // FIXME: only works for little endian right now
+    assert (Base.getReg() != Target.getReg());
+    TwoInstructions = true;
+    MCOperand AdjName = LowerOperand(UnLoweredName, 3);
+    Instr1.setOpcode(Mips::SWL);
+    Instr1.addOperand(Target);
+    Instr1.addOperand(Base);
+    Instr1.addOperand(AdjName);
+    Instr2.setOpcode(Mips::SWR);
+    Instr2.addOperand(Target);
+    Instr2.addOperand(Base);
+    Instr2.addOperand(Name);
+    break;
+  }
+  case Mips::USH: {
+    MCOperand AdjName = LowerOperand(UnLoweredName, 1);
+    Instr1.setOpcode(Mips::SB);
+    Instr1.addOperand(Target);
+    Instr1.addOperand(Base);
+    Instr1.addOperand(Name);
+    Instr2.setOpcode(Mips::SRL);
+    Instr2.addOperand(ATReg);
+    Instr2.addOperand(Target);
+    Instr2.addOperand(MCOperand::CreateImm(8));
+    Instr3.setOpcode(Mips::SB);
+    Instr3.addOperand(ATReg);
+    Instr3.addOperand(Base);
+    Instr3.addOperand(AdjName);
+    break;
+  }
+  default:
+    // FIXME: need to add others
+    llvm_unreachable("unaligned instruction not processed");
+  }
+
+  MCInsts.push_back(Instr1);
+  MCInsts.push_back(Instr2);
+  if (!TwoInstructions) MCInsts.push_back(Instr3);
+}
+
+// Convert
+//  "setgp01 $reg"
+// to
+//  "lui   $reg, %hi(_gp_disp)"
+//  "addiu $reg, $reg, %lo(_gp_disp)"
+void MipsMCInstLower::LowerSETGP01(const MachineInstr *MI,
+                                   SmallVector<MCInst, 4>& MCInsts) {
+  const MachineOperand &MO = MI->getOperand(0);
+  assert(MO.isReg());
+  MCOperand RegOpnd = MCOperand::CreateReg(MO.getReg());
+  StringRef SymName("_gp_disp");
+  const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName);
+  const MCSymbolRefExpr *MCSym;
+
+  MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_HI, *Ctx);
+  MCOperand SymHi = MCOperand::CreateExpr(MCSym);
+  MCSym = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_Mips_ABS_LO, *Ctx);
+  MCOperand SymLo = MCOperand::CreateExpr(MCSym);
+
+  MCInsts.resize(2);
+
+  CreateMCInst(MCInsts[0], Mips::LUi, RegOpnd, SymHi);
+  CreateMCInst(MCInsts[1], Mips::ADDiu, RegOpnd, RegOpnd, SymLo);
+}
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 223f23aed286..c1d007d2f539 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -1,4 +1,4 @@
-//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------==//
+//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -9,35 +9,39 @@
 
 #ifndef MIPSMCINSTLOWER_H
 #define MIPSMCINSTLOWER_H
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
-  class MCAsmInfo;
   class MCContext;
   class MCInst;
   class MCOperand;
-  class MCSymbol;
   class MachineInstr;
   class MachineFunction;
   class Mangler;
   class MipsAsmPrinter;
-  
+
 /// MipsMCInstLower - This class is used to lower an MachineInstr into an
 //                    MCInst.
 class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
   typedef MachineOperand::MachineOperandType MachineOperandType;
-  MCContext &Ctx;
+  MCContext *Ctx;
   Mangler *Mang;
   MipsAsmPrinter &AsmPrinter;
 public:
-  MipsMCInstLower(Mangler *mang, const MachineFunction &MF,
-                  MipsAsmPrinter &asmprinter);  
+  MipsMCInstLower(MipsAsmPrinter &asmprinter);
+  void Initialize(Mangler *mang, MCContext* C);
   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+  void LowerCPLOAD(SmallVector<MCInst, 4>& MCInsts);
+  void LowerCPRESTORE(int64_t Offset, SmallVector<MCInst, 4>& MCInsts);
+  void LowerUnalignedLoadStore(const MachineInstr *MI,
+                               SmallVector<MCInst, 4>& MCInsts);
+  void LowerSETGP01(const MachineInstr *MI, SmallVector<MCInst, 4>& MCInsts);
 private:
   MCOperand LowerSymbolOperand(const MachineOperand &MO,
                                MachineOperandType MOTy, unsigned Offset) const;
-  MCOperand LowerOperand(const MachineOperand& MO) const;
+  MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
 };
 }
 
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
deleted file mode 100644
index a0a242c8c443..000000000000
--- a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "mipsmcsymbolrefexpr"
-#include "MipsMCSymbolRefExpr.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-using namespace llvm;
-
-const MipsMCSymbolRefExpr*
-MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol,
-                            int Offset, MCContext &Ctx) {
-  return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset);
-}
-
-void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
-  switch (Kind) {
-  default: assert(0 && "Invalid kind!");
-  case VK_Mips_None:     break;
-  case VK_Mips_GPREL:    OS << "%gp_rel("; break;
-  case VK_Mips_GOT_CALL: OS << "%call16("; break;
-  case VK_Mips_GOT:      OS << "%got(";    break;
-  case VK_Mips_ABS_HI:   OS << "%hi(";     break;
-  case VK_Mips_ABS_LO:   OS << "%lo(";     break;
-  case VK_Mips_TLSGD:    OS << "%tlsgd(";  break;
-  case VK_Mips_GOTTPREL: OS << "%gottprel("; break;
-  case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
-  case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
-  case VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break;
-  case VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break;
-  case VK_Mips_GOT_DISP: OS << "%got_disp("; break;
-  case VK_Mips_GOT_PAGE: OS << "%got_page("; break;
-  case VK_Mips_GOT_OFST: OS << "%got_ofst("; break;
-  }
-
-  OS << *Symbol;
-
-  if (Offset) {
-    if (Offset > 0)
-      OS << '+';
-    OS << Offset;
-  }
-
-  if (Kind == VK_Mips_GPOFF_HI || Kind == VK_Mips_GPOFF_LO)
-    OS << ")))";
-  else if (Kind != VK_Mips_None)
-    OS << ')';
-}
-
-bool
-MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res,
-                                              const MCAsmLayout *Layout) const {
-  return false;
-}
-
-void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const {
-  Asm->getOrCreateSymbolData(*Symbol);
-}
-
-const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const {
-  return Symbol->isDefined() ? &Symbol->getSection() : NULL;
-}
-  
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.h b/lib/Target/Mips/MipsMCSymbolRefExpr.h
deleted file mode 100644
index 55e85a79c1c8..000000000000
--- a/lib/Target/Mips/MipsMCSymbolRefExpr.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MIPSMCSYMBOLREFEXPR_H
-#define MIPSMCSYMBOLREFEXPR_H
-#include "llvm/MC/MCExpr.h"
-
-namespace llvm {
-
-class MipsMCSymbolRefExpr : public MCTargetExpr {
-public:
-  enum VariantKind {
-    VK_Mips_None,
-    VK_Mips_GPREL,
-    VK_Mips_GOT_CALL,
-    VK_Mips_GOT,
-    VK_Mips_ABS_HI,
-    VK_Mips_ABS_LO,
-    VK_Mips_TLSGD,
-    VK_Mips_GOTTPREL,
-    VK_Mips_TPREL_HI,
-    VK_Mips_TPREL_LO,
-    VK_Mips_GPOFF_HI,
-    VK_Mips_GPOFF_LO,
-    VK_Mips_GOT_DISP,
-    VK_Mips_GOT_PAGE,
-    VK_Mips_GOT_OFST
-  };
-
-private:
-  const VariantKind Kind;
-  const MCSymbol *Symbol;
-  int Offset;
-
-  explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol,
-                               int _Offset)
-    : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {}
-  
-public:
-  static const MipsMCSymbolRefExpr *Create(VariantKind Kind,
-                                           const MCSymbol *Symbol, int Offset,
-                                           MCContext &Ctx);
-
-  void PrintImpl(raw_ostream &OS) const;
-  bool EvaluateAsRelocatableImpl(MCValue &Res,
-                                 const MCAsmLayout *Layout) const;
-  void AddValueSymbols(MCAssembler *) const;
-  const MCSection *FindAssociatedSection() const;
-
-  static bool classof(const MCExpr *E) {
-    return E->getKind() == MCExpr::Target;
-  }
-
-  static bool classof(const MipsMCSymbolRefExpr *) { return true; }
-
-  int getOffset() const { return Offset; }
-  void setOffset(int O) { Offset = O; }
-};
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
new file mode 100644
index 000000000000..b00c62b09f4c
--- /dev/null
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -0,0 +1,50 @@
+//===-- MipsMachineFunctionInfo.cpp - Private data used for Mips ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMachineFunction.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
+                 cl::desc("Always use $gp as the global base register."));
+
+bool MipsFunctionInfo::globalBaseRegFixed() const {
+  return FixGlobalBaseReg;
+}
+
+bool MipsFunctionInfo::globalBaseRegSet() const {
+  return GlobalBaseReg;
+}
+
+unsigned MipsFunctionInfo::getGlobalBaseReg() {
+  // Return if it has already been initialized.
+  if (GlobalBaseReg)
+    return GlobalBaseReg;
+
+  const MipsSubtarget &ST = MF.getTarget().getSubtarget<MipsSubtarget>();
+
+  if (FixGlobalBaseReg) // $gp is the global base register.
+    return GlobalBaseReg = ST.isABI_N64() ? Mips::GP_64 : Mips::GP;
+
+  const TargetRegisterClass *RC;
+  RC = ST.isABI_N64() ?
+    Mips::CPU64RegsRegisterClass : Mips::CPURegsRegisterClass;
+
+  return GlobalBaseReg = MF.getRegInfo().createVirtualRegister(RC);
+}
+
+void MipsFunctionInfo::anchor() { }
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index bc30b6b2425b..0fde55cb62e8 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -14,19 +14,17 @@
 #ifndef MIPS_MACHINE_FUNCTION_INFO_H
 #define MIPS_MACHINE_FUNCTION_INFO_H
 
-#include <utility>
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/VectorExtras.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include <utility>
 
 namespace llvm {
 
 /// MipsFunctionInfo - This class is derived from MachineFunction private
 /// Mips target-specific information for each MachineFunction.
 class MipsFunctionInfo : public MachineFunctionInfo {
+  virtual void anchor();
 
-private:
   MachineFunction& MF;
   /// SRetReturnReg - Some subtargets require that sret lowering includes
   /// returning the value of the returned struct in a register. This field
@@ -45,18 +43,20 @@ private:
   // InArgFIRange: Range of indices of all frame objects created during call to
   //               LowerFormalArguments.
   // OutArgFIRange: Range of indices of all frame objects created during call to
-  //                LowerCall except for the frame object for restoring $gp. 
+  //                LowerCall except for the frame object for restoring $gp.
   std::pair<int, int> InArgFIRange, OutArgFIRange;
-  int GPFI; // Index of the frame object for restoring $gp 
-  mutable int DynAllocFI; // Frame index of dynamically allocated stack area.   
+  int GPFI; // Index of the frame object for restoring $gp
+  mutable int DynAllocFI; // Frame index of dynamically allocated stack area.
   unsigned MaxCallFrameSize;
 
+  bool EmitNOAT;
+
 public:
   MipsFunctionInfo(MachineFunction& MF)
   : MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
     VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
     OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
-    MaxCallFrameSize(0)
+    MaxCallFrameSize(0), EmitNOAT(false)
   {}
 
   bool isInArgFI(int FI) const {
@@ -64,7 +64,7 @@ public:
   }
   void setLastInArgFI(int FI) { InArgFIRange.second = FI; }
 
-  bool isOutArgFI(int FI) const { 
+  bool isOutArgFI(int FI) const {
     return FI <= OutArgFIRange.first && FI >= OutArgFIRange.second;
   }
   void extendOutArgFIRange(int FirstFI, int LastFI) {
@@ -92,14 +92,18 @@ public:
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
 
-  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
-  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+  bool globalBaseRegFixed() const;
+  bool globalBaseRegSet() const;
+  unsigned getGlobalBaseReg();
 
   int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
   void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
 
   unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
   void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
+
+  bool getEmitNOAT() const { return EmitNOAT; }
+  void setEmitNOAT() { EmitNOAT = true; }
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index f8c0fdac8cf0..f30de449f6d5 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===//
+//===-- MipsRegisterInfo.cpp - MIPS Register Information -== --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,9 +13,10 @@
 
 #define DEBUG_TYPE "mips-reg-info"
 
+#include "MipsRegisterInfo.h"
 #include "Mips.h"
+#include "MipsAnalyzeImmediate.h"
 #include "MipsSubtarget.h"
-#include "MipsRegisterInfo.h"
 #include "MipsMachineFunction.h"
 #include "llvm/Constants.h"
 #include "llvm/Type.h"
@@ -45,97 +46,6 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
                                    const TargetInstrInfo &tii)
   : MipsGenRegisterInfo(Mips::RA), Subtarget(ST), TII(tii) {}
 
-/// getRegisterNumbering - Given the enum value for some register, e.g.
-/// Mips::RA, return the number that it corresponds to (e.g. 31).
-unsigned MipsRegisterInfo::
-getRegisterNumbering(unsigned RegEnum)
-{
-  switch (RegEnum) {
-  case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
-  case Mips::D0:
-    return 0;
-  case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
-    return 1;
-  case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
-  case Mips::D1:
-    return 2;
-  case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
-    return 3;
-  case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
-  case Mips::D2:
-    return 4;
-  case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
-    return 5;
-  case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
-  case Mips::D3:
-    return 6;
-  case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
-    return 7;
-  case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
-  case Mips::D4:
-    return 8;
-  case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
-    return 9;
-  case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
-  case Mips::D5:
-    return 10;
-  case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
-    return 11;
-  case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
-  case Mips::D6:
-    return 12;
-  case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
-    return 13;
-  case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
-  case Mips::D7:
-    return 14;
-  case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
-    return 15;
-  case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
-  case Mips::D8:
-    return 16;
-  case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
-    return 17;
-  case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
-  case Mips::D9:
-    return 18;
-  case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
-    return 19;
-  case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
-  case Mips::D10:
-    return 20;
-  case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
-    return 21;
-  case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
-  case Mips::D11:
-    return 22;
-  case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
-    return 23;
-  case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
-  case Mips::D12:
-    return 24;
-  case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
-    return 25;
-  case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
-  case Mips::D13:
-    return 26;
-  case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
-    return 27;
-  case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
-  case Mips::D14:
-    return 28;
-  case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
-    return 29;
-  case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
-  case Mips::D15: 
-    return 30;
-  case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
-    return 31;
-  default: llvm_unreachable("Unknown register number!");
-  }
-  return 0; // Not reached
-}
-
 unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
 
 //===----------------------------------------------------------------------===//
@@ -143,71 +53,55 @@ unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
 //===----------------------------------------------------------------------===//
 
 /// Mips Callee Saved Registers
-const unsigned* MipsRegisterInfo::
+const uint16_t* MipsRegisterInfo::
 getCalleeSavedRegs(const MachineFunction *MF) const
 {
-  // Mips callee-save register range is $16-$23, $f20-$f30
-  static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
-    Mips::F31, Mips::F30, Mips::F29, Mips::F28, Mips::F27, Mips::F26,
-    Mips::F25, Mips::F24, Mips::F23, Mips::F22, Mips::F21, Mips::F20,
-    Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
-    Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
-  };
-
-  static const unsigned Mips32CalleeSavedRegs[] = {
-    Mips::D15, Mips::D14, Mips::D13, Mips::D12, Mips::D11, Mips::D10,
-    Mips::RA, Mips::FP, Mips::S7, Mips::S6, Mips::S5, Mips::S4,
-    Mips::S3, Mips::S2, Mips::S1, Mips::S0, 0
-  };
-
-  static const unsigned N32CalleeSavedRegs[] = {
-    Mips::D31_64, Mips::D29_64, Mips::D27_64, Mips::D25_64, Mips::D23_64,
-    Mips::D21_64,
-    Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64,
-    Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64,
-    Mips::S0_64, 0
-  };
+  if (Subtarget.isSingleFloat())
+    return CSR_SingleFloatOnly_SaveList;
+  else if (!Subtarget.hasMips64())
+    return CSR_O32_SaveList;
+  else if (Subtarget.isABI_N32())
+    return CSR_N32_SaveList;
 
-  static const unsigned N64CalleeSavedRegs[] = {
-    Mips::D31_64, Mips::D30_64, Mips::D29_64, Mips::D28_64, Mips::D27_64,
-    Mips::D26_64, Mips::D25_64, Mips::D24_64,
-    Mips::RA_64, Mips::FP_64, Mips::GP_64, Mips::S7_64, Mips::S6_64,
-    Mips::S5_64, Mips::S4_64, Mips::S3_64, Mips::S2_64, Mips::S1_64,
-    Mips::S0_64, 0
-  };
+  assert(Subtarget.isABI_N64());
+  return CSR_N64_SaveList;  
+}
 
+const uint32_t*
+MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const
+{  
   if (Subtarget.isSingleFloat())
-    return SingleFloatOnlyCalleeSavedRegs;
+    return CSR_SingleFloatOnly_RegMask;
   else if (!Subtarget.hasMips64())
-    return Mips32CalleeSavedRegs;
+    return CSR_O32_RegMask;
   else if (Subtarget.isABI_N32())
-    return N32CalleeSavedRegs;
-  
+    return CSR_N32_RegMask;
+
   assert(Subtarget.isABI_N64());
-  return N64CalleeSavedRegs;  
+  return CSR_N64_RegMask;  
 }
 
 BitVector MipsRegisterInfo::
 getReservedRegs(const MachineFunction &MF) const {
-  static const unsigned ReservedCPURegs[] = {
-    Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, 
-    Mips::GP, Mips::SP, Mips::FP, Mips::RA, 0
+  static const uint16_t ReservedCPURegs[] = {
+    Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
+    Mips::SP, Mips::FP, Mips::RA
   };
 
-  static const unsigned ReservedCPU64Regs[] = {
-    Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, 
-    Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64, 0
+  static const uint16_t ReservedCPU64Regs[] = {
+    Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64,
+    Mips::SP_64, Mips::FP_64, Mips::RA_64
   };
 
   BitVector Reserved(getNumRegs());
   typedef TargetRegisterClass::iterator RegIter;
 
-  for (const unsigned *Reg = ReservedCPURegs; *Reg; ++Reg)
-    Reserved.set(*Reg);
+  for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I)
+    Reserved.set(ReservedCPURegs[I]);
 
   if (Subtarget.hasMips64()) {
-    for (const unsigned *Reg = ReservedCPU64Regs; *Reg; ++Reg)
-      Reserved.set(*Reg);
+    for (unsigned I = 0; I < array_lengthof(ReservedCPU64Regs); ++I)
+      Reserved.set(ReservedCPU64Regs[I]);
 
     // Reserve all registers in AFGR64.
     for (RegIter Reg = Mips::AFGR64RegisterClass->begin();
@@ -224,10 +118,25 @@ getReservedRegs(const MachineFunction &MF) const {
          Reg != Mips::FGR64RegisterClass->end(); ++Reg)
       Reserved.set(*Reg);
   }
-  
+
+  // If GP is dedicated as a global base register, reserve it.
+  if (MF.getInfo<MipsFunctionInfo>()->globalBaseRegFixed()) {
+    Reserved.set(Mips::GP);
+    Reserved.set(Mips::GP_64);
+  }
+
+  // Reserve hardware registers.
+  Reserved.set(Mips::HWR29);
+  Reserved.set(Mips::HWR29_64);
+
   return Reserved;
 }
 
+bool
+MipsRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+  return true;
+}
+
 // This function eliminate ADJCALLSTACKDOWN,
 // ADJCALLSTACKUP pseudo instructions
 void MipsRegisterInfo::
@@ -259,8 +168,8 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
         errs() << "<--------->\n" << MI);
 
   int FrameIndex = MI.getOperand(i).getIndex();
-  int stackSize  = MF.getFrameInfo()->getStackSize();
-  int spOffset   = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+  uint64_t stackSize = MF.getFrameInfo()->getStackSize();
+  int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
 
   DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
                << "spOffset   : " << spOffset << "\n"
@@ -279,52 +188,71 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   //  1. Outgoing arguments.
   //  2. Pointer to dynamically allocated stack space.
   //  3. Locations for callee-saved registers.
-  // Everything else is referenced relative to whatever register 
+  // Everything else is referenced relative to whatever register
   // getFrameRegister() returns.
   unsigned FrameReg;
 
   if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
       (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
-    FrameReg = Mips::SP;
+    FrameReg = Subtarget.isABI_N64() ? Mips::SP_64 : Mips::SP;
   else
-    FrameReg = getFrameRegister(MF); 
-  
+    FrameReg = getFrameRegister(MF);
+
   // Calculate final offset.
   // - There is no need to change the offset if the frame object is one of the
   //   following: an outgoing argument, pointer to a dynamically allocated
   //   stack space or a $gp restore location,
   // - If the frame object is any of the following, its offset must be adjusted
   //   by adding the size of the stack:
-  //   incoming argument, callee-saved register location or local variable.  
-  int Offset;
+  //   incoming argument, callee-saved register location or local variable.
+  int64_t Offset;
 
   if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) ||
       MipsFI->isDynAllocFI(FrameIndex))
     Offset = spOffset;
   else
-    Offset = spOffset + stackSize;
+    Offset = spOffset + (int64_t)stackSize;
 
   Offset    += MI.getOperand(i+1).getImm();
 
   DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
 
   // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
-  // field. 
-  if (!MI.isDebugValue() && (Offset >= 0x8000 || Offset < -0x8000)) {
+  // field.
+  if (!MI.isDebugValue() && !isInt<16>(Offset)) {
     MachineBasicBlock &MBB = *MI.getParent();
     DebugLoc DL = II->getDebugLoc();
-    int ImmHi = (((unsigned)Offset & 0xffff0000) >> 16) +
-                ((Offset & 0x8000) != 0);
-
-    // FIXME: change this when mips goes MC".
-    BuildMI(MBB, II, DL, TII.get(Mips::NOAT));
-    BuildMI(MBB, II, DL, TII.get(Mips::LUi), Mips::AT).addImm(ImmHi);
-    BuildMI(MBB, II, DL, TII.get(Mips::ADDu), Mips::AT).addReg(FrameReg)
-                                                       .addReg(Mips::AT);
-    FrameReg = Mips::AT;
-    Offset = (short)(Offset & 0xffff);
-
-    BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO));
+    MipsAnalyzeImmediate AnalyzeImm;
+    unsigned Size = Subtarget.isABI_N64() ? 64 : 32;
+    unsigned LUi = Subtarget.isABI_N64() ? Mips::LUi64 : Mips::LUi;
+    unsigned ADDu = Subtarget.isABI_N64() ? Mips::DADDu : Mips::ADDu;
+    unsigned ZEROReg = Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO;
+    unsigned ATReg = Subtarget.isABI_N64() ? Mips::AT_64 : Mips::AT;
+    const MipsAnalyzeImmediate::InstSeq &Seq =
+      AnalyzeImm.Analyze(Offset, Size, true /* LastInstrIsADDiu */);
+    MipsAnalyzeImmediate::InstSeq::const_iterator Inst = Seq.begin();
+
+    MipsFI->setEmitNOAT();
+
+    // The first instruction can be a LUi, which is different from other
+    // instructions (ADDiu, ORI and SLL) in that it does not have a register
+    // operand.
+    if (Inst->Opc == LUi)
+      BuildMI(MBB, II, DL, TII.get(LUi), ATReg)
+        .addImm(SignExtend64<16>(Inst->ImmOpnd));
+    else
+      BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ZEROReg)
+        .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+    // Build the remaining instructions in Seq except for the last one.
+    for (++Inst; Inst != Seq.end() - 1; ++Inst)
+      BuildMI(MBB, II, DL, TII.get(Inst->Opc), ATReg).addReg(ATReg)
+        .addImm(SignExtend64<16>(Inst->ImmOpnd));
+
+    BuildMI(MBB, II, DL, TII.get(ADDu), ATReg).addReg(FrameReg).addReg(ATReg);
+
+    FrameReg = ATReg;
+    Offset = SignExtend64<16>(Inst->ImmOpnd);
   }
 
   MI.getOperand(i).ChangeToRegister(FrameReg, false);
@@ -334,18 +262,18 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
 unsigned MipsRegisterInfo::
 getFrameRegister(const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  bool IsN64 = Subtarget.isABI_N64();
 
-  return TFI->hasFP(MF) ? Mips::FP : Mips::SP;
+  return TFI->hasFP(MF) ? (IsN64 ? Mips::FP_64 : Mips::FP) :
+                          (IsN64 ? Mips::SP_64 : Mips::SP);
 }
 
 unsigned MipsRegisterInfo::
 getEHExceptionRegister() const {
   llvm_unreachable("What is the exception register");
-  return 0;
 }
 
 unsigned MipsRegisterInfo::
 getEHHandlerRegister() const {
   llvm_unreachable("What is the exception handler register");
-  return 0;
 }
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 67e57dd71bdd..0716d29b2f38 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
+//===-- MipsRegisterInfo.h - Mips Register Information Impl -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -42,10 +42,13 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
   void adjustMipsStackFrame(MachineFunction &MF) const;
 
   /// Code Generation virtual methods...
-  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+  const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+  const uint32_t *getCallPreservedMask(CallingConv::ID) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
+  virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 925ad9e70ab6..ce399a031201 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
+//===-- MipsRegisterInfo.td - Mips Register defs -----------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -50,6 +50,7 @@ class AFPR<bits<5> num, string n, list<Register> subregs>
   : MipsRegWithSubRegs<n, subregs> {
   let Num = num;
   let SubRegIndices = [sub_fpeven, sub_fpodd];
+  let CoveredBySubRegs = 1;
 }
 
 class AFPR64<bits<5> num, string n, list<Register> subregs>
@@ -68,8 +69,6 @@ class HWR<bits<5> num, string n> : MipsReg<n> {
 //===----------------------------------------------------------------------===//
 
 let Namespace = "Mips" in {
-  // FIXME: Fix DwarfRegNum.
-
   // General Purpose Registers
   def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
   def AT   : MipsGPRReg< 1, "AT">,   DwarfRegNum<[1]>;
@@ -105,38 +104,38 @@ let Namespace = "Mips" in {
   def RA   : MipsGPRReg< 31, "RA">,  DwarfRegNum<[31]>;
 
   // General Purpose 64-bit Registers
-  def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>;
-  def AT_64   : Mips64GPRReg< 1, "AT",   [AT]>;
-  def V0_64   : Mips64GPRReg< 2, "2",    [V0]>;
-  def V1_64   : Mips64GPRReg< 3, "3",    [V1]>;
-  def A0_64   : Mips64GPRReg< 4, "4",    [A0]>;
-  def A1_64   : Mips64GPRReg< 5, "5",    [A1]>;
-  def A2_64   : Mips64GPRReg< 6, "6",    [A2]>;
-  def A3_64   : Mips64GPRReg< 7, "7",    [A3]>;
-  def T0_64   : Mips64GPRReg< 8, "8",    [T0]>;
-  def T1_64   : Mips64GPRReg< 9, "9",    [T1]>;
-  def T2_64   : Mips64GPRReg< 10, "10",  [T2]>;   
-  def T3_64   : Mips64GPRReg< 11, "11",  [T3]>;   
-  def T4_64   : Mips64GPRReg< 12, "12",  [T4]>;   
-  def T5_64   : Mips64GPRReg< 13, "13",  [T5]>;   
-  def T6_64   : Mips64GPRReg< 14, "14",  [T6]>;   
-  def T7_64   : Mips64GPRReg< 15, "15",  [T7]>;   
-  def S0_64   : Mips64GPRReg< 16, "16",  [S0]>;   
-  def S1_64   : Mips64GPRReg< 17, "17",  [S1]>;   
-  def S2_64   : Mips64GPRReg< 18, "18",  [S2]>;   
-  def S3_64   : Mips64GPRReg< 19, "19",  [S3]>;   
-  def S4_64   : Mips64GPRReg< 20, "20",  [S4]>;   
-  def S5_64   : Mips64GPRReg< 21, "21",  [S5]>;   
-  def S6_64   : Mips64GPRReg< 22, "22",  [S6]>;   
-  def S7_64   : Mips64GPRReg< 23, "23",  [S7]>;
-  def T8_64   : Mips64GPRReg< 24, "24",  [T8]>;
-  def T9_64   : Mips64GPRReg< 25, "25",  [T9]>;
-  def K0_64   : Mips64GPRReg< 26, "26",  [K0]>;
-  def K1_64   : Mips64GPRReg< 27, "27",  [K1]>;
-  def GP_64   : Mips64GPRReg< 28, "GP",  [GP]>;
-  def SP_64   : Mips64GPRReg< 29, "SP",  [SP]>;
-  def FP_64   : Mips64GPRReg< 30, "FP",  [FP]>;
-  def RA_64   : Mips64GPRReg< 31, "RA",  [RA]>;
+  def ZERO_64 : Mips64GPRReg< 0, "ZERO", [ZERO]>, DwarfRegNum<[0]>;
+  def AT_64   : Mips64GPRReg< 1, "AT",   [AT]>, DwarfRegNum<[1]>;
+  def V0_64   : Mips64GPRReg< 2, "2",    [V0]>, DwarfRegNum<[2]>;
+  def V1_64   : Mips64GPRReg< 3, "3",    [V1]>, DwarfRegNum<[3]>;
+  def A0_64   : Mips64GPRReg< 4, "4",    [A0]>, DwarfRegNum<[4]>;
+  def A1_64   : Mips64GPRReg< 5, "5",    [A1]>, DwarfRegNum<[5]>;
+  def A2_64   : Mips64GPRReg< 6, "6",    [A2]>, DwarfRegNum<[6]>;
+  def A3_64   : Mips64GPRReg< 7, "7",    [A3]>, DwarfRegNum<[7]>;
+  def T0_64   : Mips64GPRReg< 8, "8",    [T0]>, DwarfRegNum<[8]>;
+  def T1_64   : Mips64GPRReg< 9, "9",    [T1]>, DwarfRegNum<[9]>;
+  def T2_64   : Mips64GPRReg< 10, "10",  [T2]>, DwarfRegNum<[10]>;
+  def T3_64   : Mips64GPRReg< 11, "11",  [T3]>, DwarfRegNum<[11]>;
+  def T4_64   : Mips64GPRReg< 12, "12",  [T4]>, DwarfRegNum<[12]>;
+  def T5_64   : Mips64GPRReg< 13, "13",  [T5]>, DwarfRegNum<[13]>;
+  def T6_64   : Mips64GPRReg< 14, "14",  [T6]>, DwarfRegNum<[14]>;
+  def T7_64   : Mips64GPRReg< 15, "15",  [T7]>, DwarfRegNum<[15]>;
+  def S0_64   : Mips64GPRReg< 16, "16",  [S0]>, DwarfRegNum<[16]>;
+  def S1_64   : Mips64GPRReg< 17, "17",  [S1]>, DwarfRegNum<[17]>;
+  def S2_64   : Mips64GPRReg< 18, "18",  [S2]>, DwarfRegNum<[18]>;
+  def S3_64   : Mips64GPRReg< 19, "19",  [S3]>, DwarfRegNum<[19]>;
+  def S4_64   : Mips64GPRReg< 20, "20",  [S4]>, DwarfRegNum<[20]>;
+  def S5_64   : Mips64GPRReg< 21, "21",  [S5]>, DwarfRegNum<[21]>;
+  def S6_64   : Mips64GPRReg< 22, "22",  [S6]>, DwarfRegNum<[22]>;
+  def S7_64   : Mips64GPRReg< 23, "23",  [S7]>, DwarfRegNum<[23]>;
+  def T8_64   : Mips64GPRReg< 24, "24",  [T8]>, DwarfRegNum<[24]>;
+  def T9_64   : Mips64GPRReg< 25, "25",  [T9]>, DwarfRegNum<[25]>;
+  def K0_64   : Mips64GPRReg< 26, "26",  [K0]>, DwarfRegNum<[26]>;
+  def K1_64   : Mips64GPRReg< 27, "27",  [K1]>, DwarfRegNum<[27]>;
+  def GP_64   : Mips64GPRReg< 28, "GP",  [GP]>, DwarfRegNum<[28]>;
+  def SP_64   : Mips64GPRReg< 29, "SP",  [SP]>, DwarfRegNum<[29]>;
+  def FP_64   : Mips64GPRReg< 30, "FP",  [FP]>, DwarfRegNum<[30]>;
+  def RA_64   : Mips64GPRReg< 31, "RA",  [RA]>, DwarfRegNum<[31]>;
 
   /// Mips Single point precision FPU Registers
   def F0  : FPR< 0,  "F0">, DwarfRegNum<[32]>;
@@ -192,38 +191,38 @@ let Namespace = "Mips" in {
   def D15 : AFPR<30, "F30", [F30, F31]>;
 
   /// Mips Double point precision FPU Registers in MFP64 mode.
-  def D0_64  : AFPR64<0, "F0", [F0]>;
-  def D1_64  : AFPR64<1, "F1", [F1]>;
-  def D2_64  : AFPR64<2, "F2", [F2]>;
-  def D3_64  : AFPR64<3, "F3", [F3]>;
-  def D4_64  : AFPR64<4, "F4", [F4]>;
-  def D5_64  : AFPR64<5, "F5", [F5]>;
-  def D6_64  : AFPR64<6, "F6", [F6]>;
-  def D7_64  : AFPR64<7, "F7", [F7]>;
-  def D8_64  : AFPR64<8, "F8", [F8]>;
-  def D9_64  : AFPR64<9, "F9", [F9]>;
-  def D10_64  : AFPR64<10, "F10", [F10]>;
-  def D11_64  : AFPR64<11, "F11", [F11]>;
-  def D12_64  : AFPR64<12, "F12", [F12]>;
-  def D13_64  : AFPR64<13, "F13", [F13]>;
-  def D14_64  : AFPR64<14, "F14", [F14]>;
-  def D15_64  : AFPR64<15, "F15", [F15]>;
-  def D16_64  : AFPR64<16, "F16", [F16]>;
-  def D17_64  : AFPR64<17, "F17", [F17]>;
-  def D18_64  : AFPR64<18, "F18", [F18]>;
-  def D19_64  : AFPR64<19, "F19", [F19]>;
-  def D20_64  : AFPR64<20, "F20", [F20]>;
-  def D21_64  : AFPR64<21, "F21", [F21]>;
-  def D22_64  : AFPR64<22, "F22", [F22]>;
-  def D23_64  : AFPR64<23, "F23", [F23]>;
-  def D24_64  : AFPR64<24, "F24", [F24]>;
-  def D25_64  : AFPR64<25, "F25", [F25]>;
-  def D26_64  : AFPR64<26, "F26", [F26]>;
-  def D27_64  : AFPR64<27, "F27", [F27]>;
-  def D28_64  : AFPR64<28, "F28", [F28]>;
-  def D29_64  : AFPR64<29, "F29", [F29]>;
-  def D30_64  : AFPR64<30, "F30", [F30]>;
-  def D31_64  : AFPR64<31, "F31", [F31]>;
+  def D0_64  : AFPR64<0, "F0", [F0]>, DwarfRegNum<[32]>;
+  def D1_64  : AFPR64<1, "F1", [F1]>, DwarfRegNum<[33]>;
+  def D2_64  : AFPR64<2, "F2", [F2]>, DwarfRegNum<[34]>;
+  def D3_64  : AFPR64<3, "F3", [F3]>, DwarfRegNum<[35]>;
+  def D4_64  : AFPR64<4, "F4", [F4]>, DwarfRegNum<[36]>;
+  def D5_64  : AFPR64<5, "F5", [F5]>, DwarfRegNum<[37]>;
+  def D6_64  : AFPR64<6, "F6", [F6]>, DwarfRegNum<[38]>;
+  def D7_64  : AFPR64<7, "F7", [F7]>, DwarfRegNum<[39]>;
+  def D8_64  : AFPR64<8, "F8", [F8]>, DwarfRegNum<[40]>;
+  def D9_64  : AFPR64<9, "F9", [F9]>, DwarfRegNum<[41]>;
+  def D10_64  : AFPR64<10, "F10", [F10]>, DwarfRegNum<[42]>;
+  def D11_64  : AFPR64<11, "F11", [F11]>, DwarfRegNum<[43]>;
+  def D12_64  : AFPR64<12, "F12", [F12]>, DwarfRegNum<[44]>;
+  def D13_64  : AFPR64<13, "F13", [F13]>, DwarfRegNum<[45]>;
+  def D14_64  : AFPR64<14, "F14", [F14]>, DwarfRegNum<[46]>;
+  def D15_64  : AFPR64<15, "F15", [F15]>, DwarfRegNum<[47]>;
+  def D16_64  : AFPR64<16, "F16", [F16]>, DwarfRegNum<[48]>;
+  def D17_64  : AFPR64<17, "F17", [F17]>, DwarfRegNum<[49]>;
+  def D18_64  : AFPR64<18, "F18", [F18]>, DwarfRegNum<[50]>;
+  def D19_64  : AFPR64<19, "F19", [F19]>, DwarfRegNum<[51]>;
+  def D20_64  : AFPR64<20, "F20", [F20]>, DwarfRegNum<[52]>;
+  def D21_64  : AFPR64<21, "F21", [F21]>, DwarfRegNum<[53]>;
+  def D22_64  : AFPR64<22, "F22", [F22]>, DwarfRegNum<[54]>;
+  def D23_64  : AFPR64<23, "F23", [F23]>, DwarfRegNum<[55]>;
+  def D24_64  : AFPR64<24, "F24", [F24]>, DwarfRegNum<[56]>;
+  def D25_64  : AFPR64<25, "F25", [F25]>, DwarfRegNum<[57]>;
+  def D26_64  : AFPR64<26, "F26", [F26]>, DwarfRegNum<[58]>;
+  def D27_64  : AFPR64<27, "F27", [F27]>, DwarfRegNum<[59]>;
+  def D28_64  : AFPR64<28, "F28", [F28]>, DwarfRegNum<[60]>;
+  def D29_64  : AFPR64<29, "F29", [F29]>, DwarfRegNum<[61]>;
+  def D30_64  : AFPR64<30, "F30", [F30]>, DwarfRegNum<[62]>;
+  def D31_64  : AFPR64<31, "F31", [F31]>, DwarfRegNum<[63]>;
 
   // Hi/Lo registers
   def HI  : Register<"hi">, DwarfRegNum<[64]>;
@@ -239,6 +238,7 @@ let Namespace = "Mips" in {
 
   // Hardware register $29
   def HWR29 : Register<"29">;
+  def HWR29_64 : Register<"29">;
 }
 
 //===----------------------------------------------------------------------===//
@@ -301,3 +301,5 @@ def HILO64 : RegisterClass<"Mips", [i64], 64, (add HI64, LO64)> {
 
 // Hardware registers
 def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
+def HWRegs64 : RegisterClass<"Mips", [i64], 32, (add HWR29_64)>;
+
diff --git a/lib/Target/Mips/MipsRelocations.h b/lib/Target/Mips/MipsRelocations.h
index 66d1bfd993f5..0787ed399d5f 100644
--- a/lib/Target/Mips/MipsRelocations.h
+++ b/lib/Target/Mips/MipsRelocations.h
@@ -1,16 +1,16 @@
-//===- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===//
+//===-- MipsRelocations.h - Mips Code Relocations ---------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 //
 // This file defines the Mips target-specific relocation types
 // (for relocation-model=static).
 //
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
 
 #ifndef MIPSRELOCATIONS_H_
 #define MIPSRELOCATIONS_H_
@@ -20,10 +20,10 @@
 namespace llvm {
   namespace Mips{
     enum RelocationType {
-      // reloc_mips_branch - pc relative relocation for branches. The lower 18
+      // reloc_mips_pc16 - pc relative relocation for branches. The lower 18
       // bits of the difference between the branch target and the branch
       // instruction, shifted right by 2.
-      reloc_mips_branch = 1,
+      reloc_mips_pc16 = 1,
 
       // reloc_mips_hi - upper 16 bits of the address (modified by +1 if the
       // lower 16 bits of the address is negative).
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 00be8ee94431..1add02ff83e9 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -1,4 +1,4 @@
-//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===//
+//===-- MipsSchedule.td - Mips Scheduling Definitions ------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 016d449a1067..00347df9ac84 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===//
+//===-- MipsSubtarget.cpp - Mips Subtarget Information --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,6 +13,7 @@
 
 #include "MipsSubtarget.h"
 #include "Mips.h"
+#include "MipsRegisterInfo.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
@@ -21,17 +22,19 @@
 
 using namespace llvm;
 
+void MipsSubtarget::anchor() { }
+
 MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
                              const std::string &FS, bool little) :
   MipsGenSubtargetInfo(TT, CPU, FS),
-  MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), 
+  MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
   IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
   IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
   HasMinMax(false), HasSwap(false), HasBitCount(false)
 {
   std::string CPUName = CPU;
   if (CPUName.empty())
-    CPUName = "mips32r1";
+    CPUName = "mips32";
 
   // Parse features string.
   ParseSubtargetFeatures(CPUName, FS);
@@ -41,7 +44,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
 
   // Set MipsABI if it hasn't been set yet.
   if (MipsABI == UnknownABI)
-    MipsABI = hasMips64() ? N64 : O32; 
+    MipsABI = hasMips64() ? N64 : O32;
 
   // Check if Architecture and ABI are compatible.
   assert(((!hasMips64() && (isABI_O32() || isABI_EABI())) ||
@@ -52,3 +55,14 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
   if (TT.find("linux") == std::string::npos)
     IsLinux = false;
 }
+
+bool
+MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                                    TargetSubtargetInfo::AntiDepBreakMode& Mode,
+                                     RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+  CriticalPathRCs.clear();
+  CriticalPathRCs.push_back(hasMips64() ?
+                            &Mips::CPU64RegsRegClass : &Mips::CPURegsRegClass);
+  return OptLevel >= CodeGenOpt::Aggressive;
+}
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index d9dddad23a48..7faf77baa650 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====//
+//===-- MipsSubtarget.h - Define Subtarget for the Mips ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -25,6 +25,7 @@ namespace llvm {
 class StringRef;
 
 class MipsSubtarget : public MipsGenSubtargetInfo {
+  virtual void anchor();
 
 public:
   // NOTE: O64 will not be supported.
@@ -88,6 +89,9 @@ protected:
   InstrItineraryData InstrItins;
 
 public:
+  virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                                     AntiDepBreakMode& Mode,
+                                     RegClassVector& CriticalPathRCs) const;
 
   /// Only O32 and EABI supported right now.
   bool isABI_EABI() const { return MipsABI == EABI; }
@@ -111,6 +115,8 @@ public:
   bool hasMips64() const { return MipsArchVersion >= Mips64; }
   bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
 
+  bool hasMips32r2Or64() const { return hasMips32r2() || hasMips64(); }
+
   bool isLittle() const { return IsLittle; }
   bool isFP64bit() const { return IsFP64bit; }
   bool isGP64bit() const { return IsGP64bit; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 6480da3e6dfc..ad022311ed7d 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -11,9 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Mips.h"
 #include "MipsTargetMachine.h"
+#include "Mips.h"
 #include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -34,86 +35,119 @@ extern "C" void LLVMInitializeMipsTarget() {
 // Using CodeModel::Large enables different CALL behavior.
 MipsTargetMachine::
 MipsTargetMachine(const Target &T, StringRef TT,
-                  StringRef CPU, StringRef FS,
+                  StringRef CPU, StringRef FS, const TargetOptions &Options,
                   Reloc::Model RM, CodeModel::Model CM,
-                  bool isLittle):
-  LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
-  Subtarget(TT, CPU, FS, isLittle),
-  DataLayout(isLittle ?
-             (Subtarget.isABI_N64() ?
-              "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
-              "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
-             (Subtarget.isABI_N64() ?
-              "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
-              "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
-  InstrInfo(*this),
-  FrameLowering(Subtarget),
-  TLInfo(*this), TSInfo(*this), JITInfo() {
+                  CodeGenOpt::Level OL,
+                  bool isLittle)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS, isLittle),
+    DataLayout(isLittle ?
+               (Subtarget.isABI_N64() ?
+                "e-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+                "e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
+               (Subtarget.isABI_N64() ?
+                "E-p:64:64:64-i8:8:32-i16:16:32-i64:64:64-f128:128:128-n32" :
+                "E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
+    InstrInfo(*this),
+    FrameLowering(Subtarget),
+    TLInfo(*this), TSInfo(*this), JITInfo() {
 }
 
+void MipsebTargetMachine::anchor() { }
+
 MipsebTargetMachine::
 MipsebTargetMachine(const Target &T, StringRef TT,
-                    StringRef CPU, StringRef FS,
-                    Reloc::Model RM, CodeModel::Model CM) :
-  MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+                    StringRef CPU, StringRef FS, const TargetOptions &Options,
+                    Reloc::Model RM, CodeModel::Model CM,
+                    CodeGenOpt::Level OL)
+  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+
+void MipselTargetMachine::anchor() { }
 
 MipselTargetMachine::
 MipselTargetMachine(const Target &T, StringRef TT,
-                    StringRef CPU, StringRef FS,
-                    Reloc::Model RM, CodeModel::Model CM) :
-  MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
+                    StringRef CPU, StringRef FS, const TargetOptions &Options,
+                    Reloc::Model RM, CodeModel::Model CM,
+                    CodeGenOpt::Level OL)
+  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+
+void Mips64ebTargetMachine::anchor() { }
 
 Mips64ebTargetMachine::
 Mips64ebTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
-                      Reloc::Model RM, CodeModel::Model CM) :
-  MipsTargetMachine(T, TT, CPU, FS, RM, CM, false) {}
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
+                      Reloc::Model RM, CodeModel::Model CM,
+                      CodeGenOpt::Level OL)
+  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
+
+void Mips64elTargetMachine::anchor() { }
 
 Mips64elTargetMachine::
 Mips64elTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
-                      Reloc::Model RM, CodeModel::Model CM) :
-  MipsTargetMachine(T, TT, CPU, FS, RM, CM, true) {}
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
+                      Reloc::Model RM, CodeModel::Model CM,
+                      CodeGenOpt::Level OL)
+  : MipsTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
+
+namespace {
+/// Mips Code Generator Pass Configuration Options.
+class MipsPassConfig : public TargetPassConfig {
+public:
+  MipsPassConfig(MipsTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  MipsTargetMachine &getMipsTargetMachine() const {
+    return getTM<MipsTargetMachine>();
+  }
+
+  const MipsSubtarget &getMipsSubtarget() const {
+    return *getMipsTargetMachine().getSubtargetImpl();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreRegAlloc();
+  virtual bool addPreSched2();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *MipsTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new MipsPassConfig(this, PM);
+}
 
 // Install an instruction selector pass using
 // the ISelDag to gen Mips code.
-bool MipsTargetMachine::
-addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+bool MipsPassConfig::addInstSelector()
 {
-  PM.add(createMipsISelDag(*this));
+  PM.add(createMipsISelDag(getMipsTargetMachine()));
   return false;
 }
 
 // Implemented by targets that want to run passes immediately before
 // machine code is emitted. return true if -print-machineinstrs should
 // print out the code after the passes.
-bool MipsTargetMachine::
-addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+bool MipsPassConfig::addPreEmitPass()
 {
-  PM.add(createMipsDelaySlotFillerPass(*this));
+  PM.add(createMipsDelaySlotFillerPass(getMipsTargetMachine()));
   return true;
 }
 
-bool MipsTargetMachine::
-addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+bool MipsPassConfig::addPreRegAlloc() {
   // Do not restore $gp if target is Mips64.
   // In N32/64, $gp is a callee-saved register.
-  if (!Subtarget.hasMips64())
-    PM.add(createMipsEmitGPRestorePass(*this));
+  if (!getMipsSubtarget().hasMips64())
+    PM.add(createMipsEmitGPRestorePass(getMipsTargetMachine()));
   return true;
 }
 
-bool MipsTargetMachine::
-addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
-  PM.add(createMipsExpandPseudoPass(*this));
+bool MipsPassConfig::addPreSched2() {
+  PM.add(createMipsExpandPseudoPass(getMipsTargetMachine()));
   return true;
 }
 
 bool MipsTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel,
-                                          JITCodeEmitter &JCE) {
+                                       JITCodeEmitter &JCE) {
   // Machine code emitter pass for Mips.
   PM.add(createMipsJITCodeEmitterPass(*this, JCE));
   return false;
 }
-
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 118ed107c514..80c00e80f12c 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===//
+//===-- MipsTargetMachine.h - Define TargetMachine for Mips -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,15 +14,15 @@
 #ifndef MIPSTARGETMACHINE_H
 #define MIPSTARGETMACHINE_H
 
-#include "MipsSubtarget.h"
+#include "MipsFrameLowering.h"
 #include "MipsInstrInfo.h"
 #include "MipsISelLowering.h"
-#include "MipsFrameLowering.h"
+#include "MipsJITInfo.h"
 #include "MipsSelectionDAGInfo.h"
+#include "MipsSubtarget.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetFrameLowering.h"
-#include "MipsJITInfo.h"
 
 namespace llvm {
   class formatted_raw_ostream;
@@ -38,8 +38,9 @@ namespace llvm {
 
   public:
     MipsTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
                       Reloc::Model RM, CodeModel::Model CM,
+                      CodeGenOpt::Level OL,
                       bool isLittle);
 
     virtual const MipsInstrInfo   *getInstrInfo()     const
@@ -67,15 +68,8 @@ namespace llvm {
     }
 
     // Pass Pipeline Configuration
-    virtual bool addInstSelector(PassManagerBase &PM,
-                                 CodeGenOpt::Level OptLevel);
-    virtual bool addPreEmitPass(PassManagerBase &PM,
-                                CodeGenOpt::Level OptLevel);
-    virtual bool addPreRegAlloc(PassManagerBase &PM,
-                                CodeGenOpt::Level OptLevel);
-    virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level);
+    virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
     virtual bool addCodeEmitter(PassManagerBase &PM,
-				 CodeGenOpt::Level OptLevel,
 				 JITCodeEmitter &JCE);
 
   };
@@ -83,37 +77,47 @@ namespace llvm {
 /// MipsebTargetMachine - Mips32 big endian target machine.
 ///
 class MipsebTargetMachine : public MipsTargetMachine {
+  virtual void anchor();
 public:
   MipsebTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
-                      Reloc::Model RM, CodeModel::Model CM);
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
+                      Reloc::Model RM, CodeModel::Model CM,
+                      CodeGenOpt::Level OL);
 };
 
 /// MipselTargetMachine - Mips32 little endian target machine.
 ///
 class MipselTargetMachine : public MipsTargetMachine {
+  virtual void anchor();
 public:
   MipselTargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
-                      Reloc::Model RM, CodeModel::Model CM);
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
+                      Reloc::Model RM, CodeModel::Model CM,
+                      CodeGenOpt::Level OL);
 };
 
 /// Mips64ebTargetMachine - Mips64 big endian target machine.
 ///
 class Mips64ebTargetMachine : public MipsTargetMachine {
+  virtual void anchor();
 public:
   Mips64ebTargetMachine(const Target &T, StringRef TT,
                         StringRef CPU, StringRef FS,
-                        Reloc::Model RM, CodeModel::Model CM);
+                        const TargetOptions &Options,
+                        Reloc::Model RM, CodeModel::Model CM,
+                        CodeGenOpt::Level OL);
 };
 
 /// Mips64elTargetMachine - Mips64 little endian target machine.
 ///
 class Mips64elTargetMachine : public MipsTargetMachine {
+  virtual void anchor();
 public:
   Mips64elTargetMachine(const Target &T, StringRef TT,
                         StringRef CPU, StringRef FS,
-                        Reloc::Model RM, CodeModel::Model CM);
+                        const TargetOptions &Options,
+                        Reloc::Model RM, CodeModel::Model CM,
+                        CodeGenOpt::Level OL);
 };
 } // End llvm namespace
 
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 05c46f5c97a5..04dc60aa6b45 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -1,4 +1,4 @@
-//===-- MipsTargetObjectFile.cpp - Mips object files ----------------------===//
+//===-- MipsTargetObjectFile.cpp - Mips Object Files ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/Mips/TargetInfo/CMakeLists.txt b/lib/Target/Mips/TargetInfo/CMakeLists.txt
index 5692604504a8..4172d00a33f0 100644
--- a/lib/Target/Mips/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Mips/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMMipsInfo
   MipsTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMMipsInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMMipsInfo MipsCommonTableGen)
diff --git a/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/lib/Target/Mips/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..2d425686227f
--- /dev/null
+++ b/lib/Target/Mips/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Mips/TargetInfo/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = MipsInfo
+parent = Mips
+required_libraries = MC Support Target
+add_to_library_groups = Mips
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt
index 6e87b171d896..a3be342f77fb 100644
--- a/lib/Target/PTX/CMakeLists.txt
+++ b/lib/Target/PTX/CMakeLists.txt
@@ -1,10 +1,10 @@
 set(LLVM_TARGET_DEFINITIONS PTX.td)
 
-llvm_tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(PTXGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(PTXGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(PTXGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(PTXGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM PTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM PTXGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM PTXGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM PTXGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM PTXGenSubtargetInfo.inc -gen-subtarget)
 add_public_tablegen_target(PTXCommonTableGen)
 
 add_llvm_target(PTXCodeGen
@@ -17,6 +17,7 @@ add_llvm_target(PTXCodeGen
   PTXMCAsmStreamer.cpp
   PTXMCInstLower.cpp
   PTXMFInfoExtract.cpp
+  PTXMachineFunctionInfo.cpp
   PTXParamManager.cpp
   PTXRegAlloc.cpp
   PTXRegisterInfo.cpp
@@ -25,19 +26,6 @@ add_llvm_target(PTXCodeGen
   PTXTargetMachine.cpp
   )
 
-add_llvm_library_dependencies(LLVMPTXCodeGen
-  LLVMAnalysis
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMPTXDesc
-  LLVMPTXInfo
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(TargetInfo)
 add_subdirectory(InstPrinter)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PTX/InstPrinter/CMakeLists.txt b/lib/Target/PTX/InstPrinter/CMakeLists.txt
index 029d06031dd2..b25289347ba5 100644
--- a/lib/Target/PTX/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PTX/InstPrinter/CMakeLists.txt
@@ -6,8 +6,3 @@ add_llvm_library(LLVMPTXAsmPrinter
 
 add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen)
 
-add_llvm_library_dependencies(LLVMPTXAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
-
diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/PTX/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..af5d20029ffc
--- /dev/null
+++ b/lib/Target/PTX/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PTX/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PTXAsmPrinter
+parent = PTX
+required_libraries = MC Support
+add_to_library_groups = PTX
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
index aabb404dad68..1830213267b8 100644
--- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
+++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp
@@ -18,27 +18,69 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-#define GET_INSTRUCTION_NAME
 #include "PTXGenAsmWriter.inc"
 
 PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
+                               const MCInstrInfo &MII,
+                               const MCRegisterInfo &MRI,
                                const MCSubtargetInfo &STI) :
-  MCInstPrinter(MAI) {
+  MCInstPrinter(MAI, MII, MRI) {
   // Initialize the set of available features.
   setAvailableFeatures(STI.getFeatureBits());
 }
 
-StringRef PTXInstPrinter::getOpcodeName(unsigned Opcode) const {
-  return getInstructionName(Opcode);
-}
-
 void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << getRegisterName(RegNo);
+  // Decode the register number into type and offset
+  unsigned RegSpace  = RegNo & 0x7;
+  unsigned RegType   = (RegNo >> 3) & 0x7;
+  unsigned RegOffset = RegNo >> 6;
+
+  // Print the register
+  OS << "%";
+
+  switch (RegSpace) {
+  default:
+    llvm_unreachable("Unknown register space!");
+  case PTXRegisterSpace::Reg:
+    switch (RegType) {
+    default:
+      llvm_unreachable("Unknown register type!");
+    case PTXRegisterType::Pred:
+      OS << "p";
+      break;
+    case PTXRegisterType::B16:
+      OS << "rh";
+      break;
+    case PTXRegisterType::B32:
+      OS << "r";
+      break;
+    case PTXRegisterType::B64:
+      OS << "rd";
+      break;
+    case PTXRegisterType::F32:
+      OS << "f";
+      break;
+    case PTXRegisterType::F64:
+      OS << "fd";
+      break;
+    }
+    break;
+  case PTXRegisterSpace::Return:
+    OS << "ret";
+    break;
+  case PTXRegisterSpace::Argument:
+    OS << "arg";
+    break;
+  }
+
+  OS << RegOffset;
 }
 
 void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
@@ -96,9 +138,23 @@ void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) {
     O << "), ";
   }
 
-  O << *(MI->getOperand(Index++).getExpr()) << ", (";
-
+  const MCExpr* Expr = MI->getOperand(Index++).getExpr();
   unsigned NumArgs = MI->getOperand(Index++).getImm();
+  
+  // if the function call is to printf or puts, change to vprintf
+  if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+    const MCSymbol &Sym = SymRefExpr->getSymbol();
+    if (Sym.getName() == "printf" || Sym.getName() == "puts") {
+      O << "vprintf";
+    } else {
+      O << Sym.getName();
+    }
+  } else {
+    O << *Expr;
+  }
+  
+  O << ", (";
+
   if (NumArgs > 0) {
     printOperand(MI, Index++, O);
     for (unsigned i = 1; i < NumArgs; ++i) {
@@ -125,6 +181,8 @@ void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     } else {
       O << "0000000000000000";
     }
+  } else if (Op.isReg()) {
+    printRegName(O, Op.getReg());
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
     const MCExpr *Expr = Op.getExpr();
@@ -156,7 +214,6 @@ void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo,
     llvm_unreachable("Unknown rounding mode!");
   case PTXRoundingMode::RndDefault:
     llvm_unreachable("FP rounding-mode pass did not handle instruction!");
-    break;
   case PTXRoundingMode::RndNone:
     // Do not print anything.
     break;
diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
index 86dfd482885b..ea4d50477d70 100644
--- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
+++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax ----------===//
+//===- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,14 +23,12 @@ class MCOperand;
 
 class PTXInstPrinter : public MCInstPrinter {
 public:
-  PTXInstPrinter(const MCAsmInfo &MAI, const MCSubtargetInfo &STI);
+  PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                 const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
 
   virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
-  virtual StringRef getOpcodeName(unsigned Opcode) const;
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
 
-  static const char *getInstructionName(unsigned Opcode);
-
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt
new file mode 100644
index 000000000000..15a1eb532837
--- /dev/null
+++ b/lib/Target/PTX/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/PTX/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = PTX
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = PTXCodeGen
+parent = PTX
+required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils
+add_to_library_groups = PTX
diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
index 811ef4bd1fcf..d1fd74c369b9 100644
--- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
@@ -3,11 +3,4 @@ add_llvm_library(LLVMPTXDesc
   PTXMCAsmInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMPTXDesc
-  LLVMMC
-  LLVMPTXInfo
-  LLVMPTXAsmPrinter
-  LLVMSupport
-  )
-
 add_dependencies(LLVMPTXDesc PTXCommonTableGen)
diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..19b80c5ce9e3
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PTX/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PTXDesc
+parent = PTX
+required_libraries = MC PTXAsmPrinter PTXInfo Support
+add_to_library_groups = PTX
diff --git a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
index c6094be4d15b..a3e0f320fcb5 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXBaseInfo.h
@@ -18,6 +18,8 @@
 #define PTXBASEINFO_H
 
 #include "PTXMCTargetDesc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
   namespace PTXStateSpace {
@@ -57,6 +59,75 @@ namespace llvm {
       RndPosInfInt        = 10  // .rpi
     };
   } // namespace PTXII
+
+  namespace PTXRegisterType {
+    // Register type encoded in MCOperands
+    enum {
+      Pred  = 0,
+      B16,
+      B32,
+      B64,
+      F32,
+      F64
+    };
+  } // namespace PTXRegisterType
+
+  namespace PTXRegisterSpace {
+    // Register space encoded in MCOperands
+    enum {
+      Reg = 0,
+      Local,
+      Param,
+      Argument,
+      Return
+    };
+  }
+
+  inline static void decodeRegisterName(raw_ostream &OS,
+                                        unsigned EncodedReg) {
+    OS << "%";
+
+    unsigned RegSpace  = EncodedReg & 0x7;
+    unsigned RegType   = (EncodedReg >> 3) & 0x7;
+    unsigned RegOffset = EncodedReg >> 6;
+
+    switch (RegSpace) {
+    default:
+      llvm_unreachable("Unknown register space!");
+    case PTXRegisterSpace::Reg:
+      switch (RegType) {
+      default:
+        llvm_unreachable("Unknown register type!");
+      case PTXRegisterType::Pred:
+        OS << "p";
+        break;
+      case PTXRegisterType::B16:
+        OS << "rh";
+        break;
+      case PTXRegisterType::B32:
+        OS << "r";
+        break;
+      case PTXRegisterType::B64:
+        OS << "rd";
+        break;
+      case PTXRegisterType::F32:
+        OS << "f";
+        break;
+      case PTXRegisterType::F64:
+        OS << "fd";
+        break;
+      }
+      break;
+    case PTXRegisterSpace::Return:
+      OS << "ret";
+      break;
+    case PTXRegisterSpace::Argument:
+      OS << "arg";
+      break;
+    }
+
+    OS << RegOffset;
+  }
 } // namespace llvm
 
 #endif
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
index efefead5341d..cdfbc8046246 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
@@ -16,6 +16,8 @@
 
 using namespace llvm;
 
+void PTXMCAsmInfo::anchor() { }
+
 PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
   Triple TheTriple(TT);
   if (TheTriple.getArch() == Triple::ptx64)
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
index 03f5d66b3d60..32ca0696950e 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====//
+//===-- PTXMCAsmInfo.h - PTX asm properties --------------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,7 +20,9 @@ namespace llvm {
   class Target;
   class StringRef;
 
-  struct PTXMCAsmInfo : public MCAsmInfo {
+  class PTXMCAsmInfo : public MCAsmInfo {
+    virtual void anchor();
+  public:
     explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
   };
 } // namespace llvm
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
index a5af3b880135..08fb970fc290 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===//
+//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -52,18 +52,21 @@ static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
 }
 
 static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                             CodeModel::Model CM) {
+                                             CodeModel::Model CM,
+                                             CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM);
+  X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
 static MCInstPrinter *createPTXMCInstPrinter(const Target &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
+                                             const MCInstrInfo &MII,
+                                             const MCRegisterInfo &MRI,
                                              const MCSubtargetInfo &STI) {
   assert(SyntaxVariant == 0 && "We only have one syntax variant");
-  return new PTXInstPrinter(MAI, STI);
+  return new PTXInstPrinter(MAI, MII, MRI, STI);
 }
 
 extern "C" void LLVMInitializePTXTargetMC() {
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
index 1003b0b5ece9..542638ace135 100644
--- a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
@@ -15,9 +15,7 @@
 #define PTXMCTARGETDESC_H
 
 namespace llvm {
-class MCSubtargetInfo;
 class Target;
-class StringRef;
 
 extern Target ThePTX32Target;
 extern Target ThePTX64Target;
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
index 7d46cce4aeca..ffb92cb89e39 100644
--- a/lib/Target/PTX/PTX.h
+++ b/lib/Target/PTX/PTX.h
@@ -1,4 +1,3 @@
-//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
index 693bb9c48344..994a68ed207a 100644
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -1,4 +1,4 @@
-//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==//
+//===-- PTX.td - Describe the PTX Target Machine -----------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 733744bbd08b..0b6ac7bcb588 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -14,8 +14,8 @@
 
 #define DEBUG_TYPE "ptx-asm-printer"
 
-#include "PTX.h"
 #include "PTXAsmPrinter.h"
+#include "PTX.h"
 #include "PTXMachineFunctionInfo.h"
 #include "PTXParamManager.h"
 #include "PTXRegisterInfo.h"
@@ -25,7 +25,6 @@
 #include "llvm/Function.h"
 #include "llvm/Module.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
@@ -52,23 +51,23 @@ using namespace llvm;
 static const char PARAM_PREFIX[] = "__param_";
 static const char RETURN_PREFIX[] = "__ret_";
 
-static const char *getRegisterTypeName(unsigned RegNo,
-                                       const MachineRegisterInfo& MRI) {
-  const TargetRegisterClass *TRC = MRI.getRegClass(RegNo);
-
-#define TEST_REGCLS(cls, clsstr) \
-  if (PTX::cls ## RegisterClass == TRC) return # clsstr;
-
-  TEST_REGCLS(RegPred, pred);
-  TEST_REGCLS(RegI16, b16);
-  TEST_REGCLS(RegI32, b32);
-  TEST_REGCLS(RegI64, b64);
-  TEST_REGCLS(RegF32, b32);
-  TEST_REGCLS(RegF64, b64);
-#undef TEST_REGCLS
-
-  llvm_unreachable("Not in any register class!");
-  return NULL;
+static const char *getRegisterTypeName(unsigned RegType) {
+  switch (RegType) {
+  default:
+    llvm_unreachable("Unknown register type");
+  case PTXRegisterType::Pred:
+    return ".pred";
+  case PTXRegisterType::B16:
+    return ".b16";
+  case PTXRegisterType::B32:
+    return ".b32";
+  case PTXRegisterType::B64:
+    return ".b64";
+  case PTXRegisterType::F32:
+    return ".f32";
+  case PTXRegisterType::F64:
+    return ".f64";
+  }
 }
 
 static const char *getStateSpaceName(unsigned addressSpace) {
@@ -80,7 +79,6 @@ static const char *getStateSpaceName(unsigned addressSpace) {
   case PTXStateSpace::Parameter: return "param";
   case PTXStateSpace::Shared:    return "shared";
   }
-  return NULL;
 }
 
 static const char *getTypeName(Type* type) {
@@ -139,15 +137,15 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
   const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
 
   // Emit the PTX .version and .target attributes
-  OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
-  OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
+  OutStreamer.EmitRawText(Twine("\t.version ") + ST.getPTXVersionString());
+  OutStreamer.EmitRawText(Twine("\t.target ") + ST.getTargetString() +
                                 (ST.supportsDouble() ? ""
-                                                     : ", map_f64_to_f32")));
+                                                     : ", map_f64_to_f32"));
   // .address_size directive is optional, but it must immediately follow
   // the .target directive if present within a module
   if (ST.supportsPTX23()) {
-    std::string addrSize = ST.is64Bit() ? "64" : "32";
-    OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize));
+    const char *addrSize = ST.is64Bit() ? "64" : "32";
+    OutStreamer.EmitRawText(Twine("\t.address_size ") + addrSize);
   }
 
   OutStreamer.AddBlankLine();
@@ -166,6 +164,11 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
 
   OutStreamer.AddBlankLine();
 
+  // declare external functions
+  for (Module::const_iterator i = M.begin(), e = M.end();
+       i != e; ++i)
+    EmitFunctionDeclaration(i);
+  
   // declare global variables
   for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
        i != e; ++i)
@@ -179,68 +182,47 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
   const PTXParamManager &PM = MFI->getParamManager();
 
   // Print register definitions
-  std::string regDefs;
+  SmallString<128> regDefs;
+  raw_svector_ostream os(regDefs);
   unsigned numRegs;
 
   // pred
-  numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass);
-  if(numRegs > 0) {
-    regDefs += "\t.reg .pred %p<";
-    regDefs += utostr(numRegs);
-    regDefs += ">;\n";
-  }
+  numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg);
+  if(numRegs > 0)
+    os << "\t.reg .pred %p<" << numRegs << ">;\n";
 
   // i16
-  numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass);
-  if(numRegs > 0) {
-    regDefs += "\t.reg .b16 %rh<";
-    regDefs += utostr(numRegs);
-    regDefs += ">;\n";
-  }
+  numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg);
+  if(numRegs > 0)
+    os << "\t.reg .b16 %rh<" << numRegs << ">;\n";
 
   // i32
-  numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass);
-  if(numRegs > 0) {
-    regDefs += "\t.reg .b32 %r<";
-    regDefs += utostr(numRegs);
-    regDefs += ">;\n";
-  }
+  numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg);
+  if(numRegs > 0)
+    os << "\t.reg .b32 %r<" << numRegs << ">;\n";
 
   // i64
-  numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass);
-  if(numRegs > 0) {
-    regDefs += "\t.reg .b64 %rd<";
-    regDefs += utostr(numRegs);
-    regDefs += ">;\n";
-  }
+  numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg);
+  if(numRegs > 0)
+    os << "\t.reg .b64 %rd<" << numRegs << ">;\n";
 
   // f32
-  numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass);
-  if(numRegs > 0) {
-    regDefs += "\t.reg .f32 %f<";
-    regDefs += utostr(numRegs);
-    regDefs += ">;\n";
-  }
+  numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg);
+  if(numRegs > 0)
+    os << "\t.reg .f32 %f<" << numRegs << ">;\n";
 
   // f64
-  numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass);
-  if(numRegs > 0) {
-    regDefs += "\t.reg .f64 %fd<";
-    regDefs += utostr(numRegs);
-    regDefs += ">;\n";
-  }
+  numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg);
+  if(numRegs > 0)
+    os << "\t.reg .f64 %fd<" << numRegs << ">;\n";
 
   // Local params
   for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end();
-       i != e; ++i) {
-    regDefs += "\t.param .b";
-    regDefs += utostr(PM.getParamSize(*i));
-    regDefs += " ";
-    regDefs += PM.getParamName(*i);
-    regDefs += ";\n";
-  }
+       i != e; ++i)
+    os << "\t.param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i)
+       << ";\n";
 
-  OutStreamer.EmitRawText(Twine(regDefs));
+  OutStreamer.EmitRawText(os.str());
 
 
   const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
@@ -249,16 +231,13 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
   for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
     DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
     if (FrameInfo->getObjectSize(i) > 0) {
-      std::string def = "\t.local .align ";
-      def += utostr(FrameInfo->getObjectAlignment(i));
-      def += " .b8";
-      def += " __local";
-      def += utostr(i);
-      def += "[";
-      def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits
-      def += "]";
-      def += ";";
-      OutStreamer.EmitRawText(Twine(def));
+      OutStreamer.EmitRawText("\t.local .align " +
+                              Twine(FrameInfo->getObjectAlignment(i)) +
+                              " .b8 __local" +
+                              Twine(i) +
+                              "[" +
+                              Twine(FrameInfo->getObjectSize(i)) +
+                              "];");
     }
   }
 
@@ -295,36 +274,27 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
 
   assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
 
-  std::string decl;
+  SmallString<128> decl;
+  raw_svector_ostream os(decl);
 
   // check if it is defined in some other translation unit
   if (gv->isDeclaration())
-    decl += ".extern ";
+    os << ".extern ";
 
   // state space: e.g., .global
-  decl += ".";
-  decl += getStateSpaceName(gv->getType()->getAddressSpace());
-  decl += " ";
+  os << '.' << getStateSpaceName(gv->getType()->getAddressSpace()) << ' ';
 
   // alignment (optional)
   unsigned alignment = gv->getAlignment();
-  if (alignment != 0) {
-    decl += ".align ";
-    decl += utostr(gv->getAlignment());
-    decl += " ";
-  }
+  if (alignment != 0)
+    os << ".align " << gv->getAlignment() << ' ';
 
 
   if (PointerType::classof(gv->getType())) {
     PointerType* pointerTy = dyn_cast<PointerType>(gv->getType());
     Type* elementTy = pointerTy->getElementType();
 
-    decl += ".b8 ";
-    decl += gvsym->getName();
-    decl += "[";
-
-    if (elementTy->isArrayTy())
-    {
+    if (elementTy->isArrayTy()) {
       assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
 
       ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy);
@@ -333,7 +303,6 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
       unsigned numElements = arrayTy->getNumElements();
 
       while (elementTy->isArrayTy()) {
-
         arrayTy = dyn_cast<ArrayType>(elementTy);
         elementTy = arrayTy->getElementType();
 
@@ -342,110 +311,91 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
 
       // FIXME: isPrimitiveType() == false for i16?
       assert(elementTy->isSingleValueType() &&
-              "Non-primitive types are not handled");
+             "Non-primitive types are not handled");
 
-      // Compute the size of the array, in bytes.
-      uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3)
-                        * numElements;
+      // Find the size of the element in bits
+      unsigned elementSize = elementTy->getPrimitiveSizeInBits();
 
-      decl += utostr(arraySize);
+      os << ".b" << elementSize << ' ' << gvsym->getName()
+         << '[' << numElements << ']';
+    } else {
+      os << ".b8" << gvsym->getName() << "[]";
     }
 
-    decl += "]";
-
     // handle string constants (assume ConstantArray means string)
-
-    if (gv->hasInitializer())
-    {
+    if (gv->hasInitializer()) {
       const Constant *C = gv->getInitializer();
-      if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
-      {
-        decl += " = {";
+      if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+        os << " = {";
 
-        for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
-        {
-          if (i > 0)   decl += ",";
+        for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+          if (i > 0)
+            os << ',';
 
-          decl += "0x" +
-                utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+          os << "0x";
+          os.write_hex(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
         }
 
-        decl += "}";
+        os << '}';
       }
     }
-  }
-  else {
+  } else {
     // Note: this is currently the fall-through case and most likely generates
     //       incorrect code.
-    decl += getTypeName(gv->getType());
-    decl += " ";
-
-    decl += gvsym->getName();
+    os << getTypeName(gv->getType()) << ' ' << gvsym->getName();
 
-    if (ArrayType::classof(gv->getType()) ||
-        PointerType::classof(gv->getType()))
-      decl += "[]";
+    if (isa<ArrayType>(gv->getType()) || isa<PointerType>(gv->getType()))
+      os << "[]";
   }
 
-  decl += ";";
-
-  OutStreamer.EmitRawText(Twine(decl));
+  os << ';';
 
+  OutStreamer.EmitRawText(os.str());
   OutStreamer.AddBlankLine();
 }
 
 void PTXAsmPrinter::EmitFunctionEntryLabel() {
   // The function label could have already been emitted if two symbols end up
   // conflicting due to asm renaming.  Detect this and emit an error.
-  if (!CurrentFnSym->isUndefined()) {
+  if (!CurrentFnSym->isUndefined())
     report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
                        "' label emitted multiple times to assembly file");
-    return;
-  }
 
   const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
   const PTXParamManager &PM = MFI->getParamManager();
   const bool isKernel = MFI->isKernel();
   const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
-  const MachineRegisterInfo& MRI = MF->getRegInfo();
 
-  std::string decl = isKernel ? ".entry" : ".func";
+  SmallString<128> decl;
+  raw_svector_ostream os(decl);
+  os << (isKernel ? ".entry" : ".func");
 
   if (!isKernel) {
-    decl += " (";
+    os << " (";
     if (ST.useParamSpaceForDeviceArgs()) {
       for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(),
            b = i; i != e; ++i) {
-        if (i != b) {
-          decl += ", ";
-        }
+        if (i != b)
+          os << ", ";
 
-        decl += ".param .b";
-        decl += utostr(PM.getParamSize(*i));
-        decl += " ";
-        decl += PM.getParamName(*i);
+        os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
       }
     } else {
       for (PTXMachineFunctionInfo::reg_iterator
            i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i;
            i != e; ++i) {
-        if (i != b) {
-          decl += ", ";
-        }
-        decl += ".reg .";
-        decl += getRegisterTypeName(*i, MRI);
-        decl += " ";
-        decl += MFI->getRegisterName(*i);
+        if (i != b)
+          os << ", ";
+
+        os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
+           << MFI->getRegisterName(*i);
       }
     }
-    decl += ")";
+    os << ')';
   }
 
   // Print function name
-  decl += " ";
-  decl += CurrentFnSym->getName().str();
-
-  decl += " (";
+  os << ' ' << CurrentFnSym->getName() << " (";
 
   const Function *F = MF->getFunction();
 
@@ -453,63 +403,80 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() {
   if (isKernel || ST.useParamSpaceForDeviceArgs()) {
     /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(),
          b = i; i != e; ++i) {
-      if (i != b) {
-        decl += ", ";
-      }
+      if (i != b)
+        os << ", ";
 
-      decl += ".param .b";
-      decl += utostr(PM.getParamSize(*i));
-      decl += " ";
-      decl += PM.getParamName(*i);
+      os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
     }*/
     int Counter = 1;
     for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(),
          b = i; i != e; ++i) {
       if (i != b)
-        decl += ", ";
+        os << ", ";
       const Type *ArgType = (*i).getType();
-      decl += ".param .b";
+      os << ".param .b";
       if (ArgType->isPointerTy()) {
         if (ST.is64Bit())
-          decl += "64";
+          os << "64";
         else
-          decl += "32";
+          os << "32";
       } else {
-        decl += utostr(ArgType->getPrimitiveSizeInBits());
+        os << ArgType->getPrimitiveSizeInBits();
       }
       if (ArgType->isPointerTy() && ST.emitPtrAttribute()) {
         const PointerType *PtrType = dyn_cast<const PointerType>(ArgType);
-        decl += " .ptr";
+        os << " .ptr";
         switch (PtrType->getAddressSpace()) {
         default:
           llvm_unreachable("Unknown address space in argument");
         case PTXStateSpace::Global:
-          decl += " .global";
+          os << " .global";
           break;
         case PTXStateSpace::Shared:
-          decl += " .shared";
+          os << " .shared";
           break;
         }
       }
-      decl += " __param_";
-      decl += utostr(Counter++);
+      os << " __param_" << Counter++;
     }
   } else {
     for (PTXMachineFunctionInfo::reg_iterator
          i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i;
          i != e; ++i) {
-      if (i != b) {
-        decl += ", ";
-      }
+      if (i != b)
+        os << ", ";
 
-      decl += ".reg .";
-      decl += getRegisterTypeName(*i, MRI);
-      decl += " ";
-      decl += MFI->getRegisterName(*i);
+      os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
+         << MFI->getRegisterName(*i);
     }
   }
-  decl += ")";
+  os << ')';
 
+  OutStreamer.EmitRawText(os.str());
+}
+
+void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func)
+{
+  const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+	
+  std::string decl = "";
+
+  // hard-coded emission of extern vprintf function 
+  
+  if (func->getName() == "printf" || func->getName() == "puts") {		
+    decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b";
+    if (ST.is64Bit())	
+      decl += "64";
+    else				
+      decl += "32";
+    decl += " __param_2, .param .b";
+    if (ST.is64Bit())	
+      decl += "64";
+    else				
+      decl += "32";
+    decl += " __param_3)\n";
+  }
+  
   OutStreamer.EmitRawText(Twine(decl));
 }
 
@@ -535,7 +502,7 @@ unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
   Entry.setValue(SrcId);
 
   // Print out a .file directive to specify files for .loc directives.
-  OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+  OutStreamer.EmitDwarfFileDirective(SrcId, "", Entry.getKey());
 
   return SrcId;
 }
@@ -550,20 +517,18 @@ MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
 MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
   MCOperand MCOp;
   const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
-  const MCExpr *Expr;
-  const char *RegSymbolName;
+  unsigned EncodedReg;
   switch (MO.getType()) {
   default:
     llvm_unreachable("Unknown operand type");
   case MachineOperand::MO_Register:
-    // We create register operands as symbols, since the PTXInstPrinter class
-    // has no way to map virtual registers back to a name without some ugly
-    // hacks.
-    // FIXME: Figure out a better way to handle virtual register naming.
-    RegSymbolName = MFI->getRegisterName(MO.getReg());
-    Expr = MCSymbolRefExpr::Create(RegSymbolName, MCSymbolRefExpr::VK_None,
-                                   OutContext);
-    MCOp = MCOperand::CreateExpr(Expr);
+    if (MO.getReg() > 0) {
+      // Encode the register
+      EncodedReg = MFI->getEncodedRegister(MO.getReg());
+    } else {
+      EncodedReg = 0;
+    }
+    MCOp = MCOperand::CreateReg(EncodedReg);
     break;
   case MachineOperand::MO_Immediate:
     MCOp = MCOperand::CreateImm(MO.getImm());
@@ -594,4 +559,3 @@ extern "C" void LLVMInitializePTXAsmPrinter() {
   RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
   RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
 }
-
diff --git a/lib/Target/PTX/PTXAsmPrinter.h b/lib/Target/PTX/PTXAsmPrinter.h
index 538c0802a27e..74c8d58a3e9b 100644
--- a/lib/Target/PTX/PTXAsmPrinter.h
+++ b/lib/Target/PTX/PTXAsmPrinter.h
@@ -1,4 +1,4 @@
-//===-- PTXAsmPrinter.h - Print machine code to a PTX file ----------------===//
+//===-- PTXAsmPrinter.h - Print machine code to a PTX file ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -47,7 +47,7 @@ public:
 
 private:
   void EmitVariableDeclaration(const GlobalVariable *gv);
-  void EmitFunctionDeclaration();
+  void EmitFunctionDeclaration(const Function* func);
 
   StringMap<unsigned> SourceIdMap;
 }; // class PTXAsmPrinter
diff --git a/lib/Target/PTX/PTXFPRoundingModePass.cpp b/lib/Target/PTX/PTXFPRoundingModePass.cpp
index 0b653e04b3bb..a21d1728d81c 100644
--- a/lib/Target/PTX/PTXFPRoundingModePass.cpp
+++ b/lib/Target/PTX/PTXFPRoundingModePass.cpp
@@ -23,9 +23,11 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
+using namespace llvm;
+
 // NOTE: PTXFPRoundingModePass should be executed just before emission.
 
-namespace llvm {
+namespace {
   /// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
   /// all FP instructions. Essentially, this pass just looks for all FP
   /// instructions that have a rounding mode set to RndDefault, and sets an
@@ -58,7 +60,7 @@ namespace llvm {
       void initializeMap();
       void processInstruction(MachineInstr &MI);
   }; // class PTXFPRoundingModePass
-} // namespace llvm
+} // end anonymous namespace
 
 using namespace llvm;
 
diff --git a/lib/Target/PTX/PTXFrameLowering.cpp b/lib/Target/PTX/PTXFrameLowering.cpp
index b621b9d634d2..e6e268e480f8 100644
--- a/lib/Target/PTX/PTXFrameLowering.cpp
+++ b/lib/Target/PTX/PTXFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====//
+//===-- PTXFrameLowering.cpp - PTX Frame Information ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h
index 9320676150df..831e81803df6 100644
--- a/lib/Target/PTX/PTXFrameLowering.h
+++ b/lib/Target/PTX/PTXFrameLowering.h
@@ -1,4 +1,4 @@
-//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===//
+//===-- PTXFrameLowering.h - Define frame lowering for PTX -----*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index 3307d91a6188..ef4455b96bc3 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PTX.h"
 #include "PTXISelLowering.h"
+#include "PTX.h"
 #include "PTXMachineFunctionInfo.h"
 #include "PTXRegisterInfo.h"
 #include "PTXSubtarget.h"
@@ -20,6 +20,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -46,6 +47,11 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
   setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
   setMinFunctionAlignment(2);
 
+  // Let LLVM use loads/stores for all mem* operations
+  maxStoresPerMemcpy  = 4096;
+  maxStoresPerMemmove = 4096;
+  maxStoresPerMemset  = 4096;
+
   ////////////////////////////////////
   /////////// Expansion //////////////
   ////////////////////////////////////
@@ -91,7 +97,8 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
 
   // customise setcc to use bitwise logic if possible
 
-  setOperationAction(ISD::SETCC, MVT::i1, Custom);
+  //setOperationAction(ISD::SETCC, MVT::i1, Custom);
+  setOperationAction(ISD::SETCC, MVT::i1, Legal);
 
   // customize translation of memory addresses
 
@@ -150,18 +157,27 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Op1 = Op.getOperand(1);
   SDValue Op2 = Op.getOperand(2);
   DebugLoc dl = Op.getDebugLoc();
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  //ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
 
   // Look for X == 0, X == 1, X != 0, or X != 1
   // We can simplify these to bitwise logic
 
-  if (Op1.getOpcode() == ISD::Constant &&
-      (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
-       cast<ConstantSDNode>(Op1)->isNullValue()) &&
-      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+  //if (Op1.getOpcode() == ISD::Constant &&
+  //    (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+  //     cast<ConstantSDNode>(Op1)->isNullValue()) &&
+  //    (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+  //
+  //  return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+  //}
 
-    return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
-  }
+  //ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1);
+  //if(COp1 && COp1->getZExtValue() == 1) {
+  //  if(CC == ISD::SETNE) {
+  //    return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0);
+  //  }
+  //}
+
+  llvm_unreachable("setcc was not matched by a pattern!");
 
   return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
 }
@@ -205,7 +221,6 @@ SDValue PTXTargetLowering::
   switch (CallConv) {
     default:
       llvm_unreachable("Unsupported calling convention");
-      break;
     case CallingConv::PTX_Kernel:
       MFI->setKernel(true);
       break;
@@ -235,8 +250,25 @@ SDValue PTXTargetLowering::
   }
   else {
     for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
-      EVT                  RegVT = Ins[i].VT;
-      TargetRegisterClass* TRC   = getRegClassFor(RegVT);
+      EVT                        RegVT = Ins[i].VT;
+      const TargetRegisterClass* TRC   = getRegClassFor(RegVT);
+      unsigned                   RegType;
+
+      // Determine which register class we need
+      if (RegVT == MVT::i1)
+        RegType = PTXRegisterType::Pred;
+      else if (RegVT == MVT::i16)
+        RegType = PTXRegisterType::B16;
+      else if (RegVT == MVT::i32)
+        RegType = PTXRegisterType::B32;
+      else if (RegVT == MVT::i64)
+        RegType = PTXRegisterType::B64;
+      else if (RegVT == MVT::f32)
+        RegType = PTXRegisterType::F32;
+      else if (RegVT == MVT::f64)
+        RegType = PTXRegisterType::F64;
+      else
+        llvm_unreachable("Unknown parameter type");
 
       // Use a unique index in the instruction to prevent instruction folding.
       // Yes, this is a hack.
@@ -247,7 +279,7 @@ SDValue PTXTargetLowering::
 
       InVals.push_back(ArgValue);
 
-      MFI->addArgReg(Reg);
+      MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument);
     }
   }
 
@@ -297,26 +329,33 @@ SDValue PTXTargetLowering::
   } else {
     for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
       EVT                  RegVT = Outs[i].VT;
-      TargetRegisterClass* TRC = 0;
+      const TargetRegisterClass* TRC;
+      unsigned             RegType;
 
       // Determine which register class we need
       if (RegVT == MVT::i1) {
         TRC = PTX::RegPredRegisterClass;
+        RegType = PTXRegisterType::Pred;
       }
       else if (RegVT == MVT::i16) {
         TRC = PTX::RegI16RegisterClass;
+        RegType = PTXRegisterType::B16;
       }
       else if (RegVT == MVT::i32) {
         TRC = PTX::RegI32RegisterClass;
+        RegType = PTXRegisterType::B32;
       }
       else if (RegVT == MVT::i64) {
         TRC = PTX::RegI64RegisterClass;
+        RegType = PTXRegisterType::B64;
       }
       else if (RegVT == MVT::f32) {
         TRC = PTX::RegF32RegisterClass;
+        RegType = PTXRegisterType::F32;
       }
       else if (RegVT == MVT::f64) {
         TRC = PTX::RegF64RegisterClass;
+        RegType = PTXRegisterType::F64;
       }
       else {
         llvm_unreachable("Unknown parameter type");
@@ -329,7 +368,7 @@ SDValue PTXTargetLowering::
 
       Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
 
-      MFI->addRetReg(Reg);
+      MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return);
     }
   }
 
@@ -344,7 +383,7 @@ SDValue PTXTargetLowering::
 SDValue
 PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              CallingConv::ID CallConv, bool isVarArg,
-                             bool &isTailCall,
+                             bool doesNotRet, bool &isTailCall,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<SDValue> &OutVals,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -352,38 +391,99 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              SmallVectorImpl<SDValue> &InVals) const {
 
   MachineFunction& MF = DAG.getMachineFunction();
-  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
-  PTXParamManager &PM = MFI->getParamManager();
+  PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>();
+  PTXParamManager &PM = PTXMFI->getParamManager();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
 
   assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
          "Calls are not handled for the target device");
 
+  // Identify the callee function
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
+  const Function *function = cast<Function>(GV);
+
+  // allow non-device calls only for printf
+  bool isPrintf = function->getName() == "printf" || function->getName() == "puts";
+
+  assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) &&
+			 "PTX function calls must be to PTX device functions");
+
+  unsigned outSize = isPrintf ? 2 : Outs.size();
+
   std::vector<SDValue> Ops;
   // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs]
-  Ops.resize(Outs.size() + Ins.size() + 4);
+  Ops.resize(outSize + Ins.size() + 4);
 
   Ops[0] = Chain;
 
   // Identify the callee function
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
-  assert(cast<Function>(GV)->getCallingConv() == CallingConv::PTX_Device &&
-         "PTX function calls must be to PTX device functions");
   Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
   Ops[Ins.size()+2] = Callee;
 
-  // Generate STORE_PARAM nodes for each function argument.  In PTX, function
-  // arguments are explicitly stored into .param variables and passed as
-  // arguments. There is no register/stack-based calling convention in PTX.
-  Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32);
-  for (unsigned i = 0; i != OutVals.size(); ++i) {
-    unsigned Size = OutVals[i].getValueType().getSizeInBits();
-    unsigned Param = PM.addLocalParam(Size);
-    const std::string &ParamName = PM.getParamName(Param);
-    SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
-                                                     MVT::Other);
+  // #Outs
+  Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32);
+
+  if (isPrintf) {
+    // first argument is the address of the global string variable in memory
+    unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits());
+    SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(),
+                                                      MVT::Other);
     Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
-                        ParamValue, OutVals[i]);
-    Ops[i+Ins.size()+4] = ParamValue;
+                        ParamValue0, OutVals[0]);
+    Ops[Ins.size()+4] = ParamValue0;
+
+    // alignment is the maximum size of all the arguments
+    unsigned alignment = 0;
+    for (unsigned i = 1; i < OutVals.size(); ++i) {
+      alignment = std::max(alignment,
+    		               OutVals[i].getValueType().getSizeInBits());
+    }
+
+    // size is the alignment multiplied by the number of arguments
+    unsigned size = alignment * (OutVals.size() - 1);
+
+    // second argument is the address of the stack object (unless no arguments)
+    unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits());
+    SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(),
+                                                      MVT::Other);
+    Ops[Ins.size()+5] = ParamValue1;
+
+    if (size > 0)
+    {
+      // create a local stack object to store the arguments
+      unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false);
+      SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy());
+
+      // store each of the arguments to the stack in turn
+      for (unsigned int i = 1; i != OutVals.size(); i++) {
+        SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy()));
+        Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr,
+                             MachinePointerInfo(),
+                             false, false, 0);
+      }
+
+      // copy the address of the local frame index to get the address in non-local space
+      SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex);
+
+      // store this address in the second argument
+      Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr);
+    }
+  }
+  else
+  {
+	  // Generate STORE_PARAM nodes for each function argument.  In PTX, function
+	  // arguments are explicitly stored into .param variables and passed as
+	  // arguments. There is no register/stack-based calling convention in PTX.
+	  for (unsigned i = 0; i != OutVals.size(); ++i) {
+		unsigned Size = OutVals[i].getValueType().getSizeInBits();
+		unsigned Param = PM.addLocalParam(Size);
+		const std::string &ParamName = PM.getParamName(Param);
+		SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
+														 MVT::Other);
+		Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
+							ParamValue, OutVals[i]);
+		Ops[i+Ins.size()+4] = ParamValue;
+	  }
   }
 
   std::vector<SDValue> InParams;
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
index 4d2566540af2..33220f4dc346 100644
--- a/lib/Target/PTX/PTXISelLowering.h
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -1,4 +1,4 @@
-//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==//
+//===-- PTXISelLowering.h - PTX DAG Lowering Interface ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,8 +18,6 @@
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
-class PTXSubtarget;
-class PTXTargetMachine;
 
 namespace PTXISD {
   enum NodeType {
@@ -64,9 +62,8 @@ class PTXTargetLowering : public TargetLowering {
                   SelectionDAG &DAG) const;
 
     virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg,
-                bool &isTailCall,
+      LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+                bool isVarArg, bool doesNotRet, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<SDValue> &OutVals,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td
index 397fdc319a84..267e8341293a 100644
--- a/lib/Target/PTX/PTXInstrFormats.td
+++ b/lib/Target/PTX/PTXInstrFormats.td
@@ -1,4 +1,4 @@
-//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===//
+//===-- PTXInstrFormats.td - PTX Instruction Formats -------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index 1b947a5400f4..443cd54906c2 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===//
+//===-- PTXInstrInfo.cpp - PTX Instruction Information --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,8 +13,8 @@
 
 #define DEBUG_TYPE "ptx-instrinfo"
 
-#include "PTX.h"
 #include "PTXInstrInfo.h"
+#include "PTX.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -116,7 +116,7 @@ bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
 }
 
 bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  return !isPredicated(MI) && get(MI->getOpcode()).isTerminator();
+  return !isPredicated(MI) && MI->isTerminator();
 }
 
 bool PTXInstrInfo::
@@ -184,15 +184,13 @@ AnalyzeBranch(MachineBasicBlock &MBB,
   if (MBB.empty())
     return true;
 
-  MachineBasicBlock::const_iterator iter = MBB.end();
+  MachineBasicBlock::iterator iter = MBB.end();
   const MachineInstr& instLast1 = *--iter;
-  const MCInstrDesc &desc1 = instLast1.getDesc();
   // for special case that MBB has only 1 instruction
   const bool IsSizeOne = MBB.size() == 1;
   // if IsSizeOne is true, *--iter and instLast2 are invalid
   // we put a dummy value in instLast2 and desc2 since they are used
   const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
-  const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
 
   DEBUG(dbgs() << "\n");
   DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
@@ -207,7 +205,7 @@ AnalyzeBranch(MachineBasicBlock &MBB,
   }
 
   // this block ends with only an unconditional branch
-  if (desc1.isUnconditionalBranch() &&
+  if (instLast1.isUnconditionalBranch() &&
       // when IsSizeOne is true, it "absorbs" the evaluation of instLast2
       (IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
     DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
@@ -217,7 +215,7 @@ AnalyzeBranch(MachineBasicBlock &MBB,
 
   // this block ends with a conditional branch and
   // it falls through to a successor block
-  if (desc1.isConditionalBranch() &&
+  if (instLast1.isConditionalBranch() &&
       IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
     DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
     TBB = GetBranchTarget(instLast1);
@@ -233,8 +231,8 @@ AnalyzeBranch(MachineBasicBlock &MBB,
 
   // this block ends with a conditional branch
   // followed by an unconditional branch
-  if (desc2.isConditionalBranch() &&
-      desc1.isUnconditionalBranch()) {
+  if (instLast2.isConditionalBranch() &&
+      instLast1.isUnconditionalBranch()) {
     DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
     TBB = GetBranchTarget(instLast2);
     FBB = GetBranchTarget(instLast1);
@@ -302,7 +300,7 @@ void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                      unsigned SrcReg, bool isKill, int FrameIdx,
                                        const TargetRegisterClass *RC,
                                        const TargetRegisterInfo *TRI) const {
-  assert(false && "storeRegToStackSlot should not be called for PTX");
+  llvm_unreachable("storeRegToStackSlot should not be called for PTX");
 }
 
 void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
@@ -310,7 +308,7 @@ void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         unsigned DestReg, int FrameIdx,
                                         const TargetRegisterClass *RC,
                                         const TargetRegisterInfo *TRI) const {
-  assert(false && "loadRegFromStackSlot should not be called for PTX");
+  llvm_unreachable("loadRegFromStackSlot should not be called for PTX");
 }
 
 // static helper routines
@@ -341,8 +339,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
 }
 
 bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
-  const MCInstrDesc &desc = inst.getDesc();
-  return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
+  return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch();
 }
 
 bool PTXInstrInfo::
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
index 871f1ac8d376..fba89c09394c 100644
--- a/lib/Target/PTX/PTXInstrInfo.h
+++ b/lib/Target/PTX/PTXInstrInfo.h
@@ -1,4 +1,4 @@
-//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===//
+//===-- PTXInstrInfo.h - PTX Instruction Information ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index a3fcea9038cf..bead4286dfd8 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -1,4 +1,4 @@
-//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===//
+//===-- PTXInstrInfo.td - PTX Instruction defs --------------*- tablegen-*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -680,6 +680,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
   def MOVaddr64
     : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
               [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+  def MOVframe32
+    : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "cvta.local.u32\t$d, $a",
+              [(set RegI32:$d, (PTXcopyaddress frameindex:$a))]>;
+  def MOVframe64
+    : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "cvta.local.u64\t$d, $a",
+              [(set RegI64:$d, (PTXcopyaddress frameindex:$a))]>;
 }
 
 // PTX cvt instructions
@@ -802,6 +808,8 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
 let isBranch = 1, isTerminator = 1 in {
   // FIXME: The pattern part is blank because I cannot (or do not yet know
   // how to) use the first operand of PredicateOperand (a RegPred register) here
+  // When this is revisited, make sure to also look at LowerSETCC and try to
+  // fold it into negated predicates, if possible.
   def BRAdp
     : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
               [/*(brcond pred:$_p, bb:$d)*/]>;
@@ -819,17 +827,17 @@ let hasSideEffects = 1 in {
 ///===- Parameter Passing Pseudo-Instructions -----------------------------===//
 
 def READPARAMPRED : InstPTX<(outs RegPred:$a), (ins i32imm:$b),
-                            "mov.pred\t$a, %param$b", []>;
+                            "mov.pred\t$a, %arg$b", []>;
 def READPARAMI16  : InstPTX<(outs RegI16:$a), (ins i32imm:$b),
-                            "mov.b16\t$a, %param$b", []>;
+                            "mov.b16\t$a, %arg$b", []>;
 def READPARAMI32  : InstPTX<(outs RegI32:$a), (ins i32imm:$b),
-                            "mov.b32\t$a, %param$b", []>;
+                            "mov.b32\t$a, %arg$b", []>;
 def READPARAMI64  : InstPTX<(outs RegI64:$a), (ins i32imm:$b),
-                            "mov.b64\t$a, %param$b", []>;
+                            "mov.b64\t$a, %arg$b", []>;
 def READPARAMF32  : InstPTX<(outs RegF32:$a), (ins i32imm:$b),
-                            "mov.f32\t$a, %param$b", []>;
+                            "mov.f32\t$a, %arg$b", []>;
 def READPARAMF64  : InstPTX<(outs RegF64:$a), (ins i32imm:$b),
-                            "mov.f64\t$a, %param$b", []>;
+                            "mov.f64\t$a, %arg$b", []>;
 
 def WRITEPARAMPRED : InstPTX<(outs), (ins RegPred:$a), "//w", []>;
 def WRITEPARAMI16  : InstPTX<(outs), (ins RegI16:$a), "//w", []>;
@@ -885,19 +893,26 @@ def : Pat<(f64 (fdiv RegF64:$a, fpimm:$b)),
 
 // FMUL+FADD
 def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), RegF32:$c)),
-          (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>;
+          (FMADrrr32 RndDefault, RegF32:$a, RegF32:$b, RegF32:$c)>,
+          Requires<[SupportsFMA]>;
 def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
-          (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+          (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>,
+          Requires<[SupportsFMA]>;
 def : Pat<(f32 (fadd (fmul RegF32:$a, fpimm:$b), fpimm:$c)),
-          (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>;
+          (FMADrrr32 RndDefault, RegF32:$a, fpimm:$b, fpimm:$c)>,
+          Requires<[SupportsFMA]>;
 def : Pat<(f32 (fadd (fmul RegF32:$a, RegF32:$b), fpimm:$c)),
-          (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>;
+          (FMADrri32 RndDefault, RegF32:$a, RegF32:$b, fpimm:$c)>,
+          Requires<[SupportsFMA]>;
 def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), RegF64:$c)),
-          (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>;
+          (FMADrrr64 RndDefault, RegF64:$a, RegF64:$b, RegF64:$c)>,
+          Requires<[SupportsFMA]>;
 def : Pat<(f64 (fadd (fmul RegF64:$a, RegF64:$b), fpimm:$c)),
-          (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>;
+          (FMADrri64 RndDefault, RegF64:$a, RegF64:$b, fpimm:$c)>,
+          Requires<[SupportsFMA]>;
 def : Pat<(f64 (fadd (fmul RegF64:$a, fpimm:$b), fpimm:$c)),
-          (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>;
+          (FMADrri64 RndDefault, RegF64:$a, fpimm:$b, fpimm:$c)>,
+          Requires<[SupportsFMA]>;
 
 // FNEG
 def : Pat<(f32 (fneg RegF32:$a)), (FNEGrr32 RndDefault, RegF32:$a)>;
@@ -1004,6 +1019,9 @@ def : Pat<(f64 (sint_to_fp RegI64:$a)), (CVTf64s64 RndDefault, RegI64:$a)>;
 def : Pat<(f64 (fextend RegF32:$a)),    (CVTf64f32 RegF32:$a)>;
 def : Pat<(f64 (bitconvert RegI64:$a)), (MOVf64i64 RegI64:$a)>;
 
+// setcc - predicate inversion for branch conditions
+def : Pat<(i1 (setcc RegPred:$a, imm:$b, SETNE)),
+          (XORripreds RegPred:$a, imm:$b)>;
 
 ///===- Intrinsic Instructions --------------------------------------------===//
 include "PTXIntrinsicInstrInfo.td"
diff --git a/lib/Target/PTX/PTXInstrLoadStore.td b/lib/Target/PTX/PTXInstrLoadStore.td
index 9b4f56cf25c6..7a62684b91b9 100644
--- a/lib/Target/PTX/PTXInstrLoadStore.td
+++ b/lib/Target/PTX/PTXInstrLoadStore.td
@@ -1,4 +1,4 @@
-//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tblgen-*-===//
+//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tablegen-*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
index 9de1cb62719a..3416f1cca96d 100644
--- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td
+++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -1,4 +1,4 @@
-//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===//
+//===-- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics --*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp
index 468ce9301de4..3ed67a6a9b47 100644
--- a/lib/Target/PTX/PTXMCAsmStreamer.cpp
+++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -1,4 +1,4 @@
-//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===//
+//===-- PTXMCAsmStreamer.cpp - PTX Text Assembly Output -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -22,6 +22,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/PathV2.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -161,11 +162,12 @@ public:
   virtual void EmitCodeAlignment(unsigned ByteAlignment,
                                  unsigned MaxBytesToEmit = 0);
 
-  virtual void EmitValueToOffset(const MCExpr *Offset,
+  virtual bool EmitValueToOffset(const MCExpr *Offset,
                                  unsigned char Value = 0);
 
   virtual void EmitFileDirective(StringRef Filename);
-  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
+                                      StringRef Filename);
 
   virtual void EmitInstruction(const MCInst &Inst);
 
@@ -174,7 +176,7 @@ public:
   /// indicated by the hasRawTextSupport() predicate.
   virtual void EmitRawText(StringRef String);
 
-  virtual void Finish();
+  virtual void FinishImpl();
 
   /// @}
 
@@ -476,8 +478,8 @@ void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
 void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
                                          unsigned MaxBytesToEmit) {}
 
-void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
-                                         unsigned char Value) {}
+bool PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                         unsigned char Value) {return false;}
 
 
 void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
@@ -489,11 +491,20 @@ void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
 
 // FIXME: should we inherit from MCAsmStreamer?
 bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
-                                              StringRef Filename){
+                                              StringRef Directory,
+                                              StringRef Filename) {
+  if (!Directory.empty()) {
+    if (sys::path::is_absolute(Filename))
+      return EmitDwarfFileDirective(FileNo, "", Filename);
+    SmallString<128> FullPathName = Directory;
+    sys::path::append(FullPathName, Filename);
+    return EmitDwarfFileDirective(FileNo, "", FullPathName);
+  }
+
   OS << "\t.file\t" << FileNo << ' ';
   PrintQuotedString(Filename, OS);
   EmitEOL();
-  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename);
 }
 
 void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
@@ -529,12 +540,13 @@ void PTXMCAsmStreamer::EmitRawText(StringRef String) {
   EmitEOL();
 }
 
-void PTXMCAsmStreamer::Finish() {}
+void PTXMCAsmStreamer::FinishImpl() {}
 
 namespace llvm {
   MCStreamer *createPTXAsmStreamer(MCContext &Context,
                                    formatted_raw_ostream &OS,
                                    bool isVerboseAsm, bool useLoc, bool useCFI,
+                                   bool useDwarfDirectory,
                                    MCInstPrinter *IP,
                                    MCCodeEmitter *CE, MCAsmBackend *MAB,
                                    bool ShowInst) {
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index b33a273dc93d..172a0e031356 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -22,9 +22,11 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
+using namespace llvm;
+
 // NOTE: PTXMFInfoExtract must after register allocation!
 
-namespace llvm {
+namespace {
   /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
   /// function information for PTXAsmPrinter
   ///
@@ -42,7 +44,7 @@ namespace llvm {
         return "PTX Machine Function Info Extractor";
       }
   }; // class PTXMFInfoExtract
-} // namespace llvm
+} // end anonymous namespace
 
 using namespace llvm;
 
@@ -56,7 +58,22 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
   for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
-    MFI->addVirtualRegister(TRC, Reg);
+    unsigned RegType;
+    if (TRC == PTX::RegPredRegisterClass)
+      RegType = PTXRegisterType::Pred;
+    else if (TRC == PTX::RegI16RegisterClass)
+      RegType = PTXRegisterType::B16;
+    else if (TRC == PTX::RegI32RegisterClass)
+      RegType = PTXRegisterType::B32;
+    else if (TRC == PTX::RegI64RegisterClass)
+      RegType = PTXRegisterType::B64;
+    else if (TRC == PTX::RegF32RegisterClass)
+      RegType = PTXRegisterType::F32;
+    else if (TRC == PTX::RegF64RegisterClass)
+      RegType = PTXRegisterType::F64;
+    else
+      llvm_unreachable("Unkown register class.");
+    MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg);
   }
 
   return false;
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.cpp b/lib/Target/PTX/PTXMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..60acfc75a2f7
--- /dev/null
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- PTXMachineFuctionInfo.cpp - PTX machine function info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void PTXMachineFunctionInfo::anchor() { }
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
index 3b985f7dd6b0..bb7574cbcd71 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==//
+//===-- PTXMachineFuctionInfo.h - PTX machine function info ------*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -30,20 +30,27 @@ namespace llvm {
 /// contains private PTX target-specific information for each MachineFunction.
 ///
 class PTXMachineFunctionInfo : public MachineFunctionInfo {
-private:
+  virtual void anchor();
   bool IsKernel;
   DenseSet<unsigned> RegArgs;
   DenseSet<unsigned> RegRets;
 
-  typedef std::vector<unsigned> RegisterList;
-  typedef DenseMap<const TargetRegisterClass*, RegisterList> RegisterMap;
-  typedef DenseMap<unsigned, std::string> RegisterNameMap;
   typedef DenseMap<int, std::string> FrameMap;
 
-  RegisterMap UsedRegs;
-  RegisterNameMap RegNames;
   FrameMap FrameSymbols;
 
+  struct RegisterInfo {
+    unsigned Reg;
+    unsigned Type;
+    unsigned Space;
+    unsigned Offset;
+    unsigned Encoded;
+  };
+
+  typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap;
+
+  RegisterInfoMap RegInfo;
+
   PTXParamManager ParamManager;
 
 public:
@@ -51,13 +58,7 @@ public:
 
   PTXMachineFunctionInfo(MachineFunction &MF)
     : IsKernel(false) {
-      UsedRegs[PTX::RegPredRegisterClass] = RegisterList();
-      UsedRegs[PTX::RegI16RegisterClass] = RegisterList();
-      UsedRegs[PTX::RegI32RegisterClass] = RegisterList();
-      UsedRegs[PTX::RegI64RegisterClass] = RegisterList();
-      UsedRegs[PTX::RegF32RegisterClass] = RegisterList();
-      UsedRegs[PTX::RegF64RegisterClass] = RegisterList();
-    }
+  }
 
   /// getParamManager - Returns the PTXParamManager instance for this function.
   PTXParamManager& getParamManager() { return ParamManager; }
@@ -78,69 +79,106 @@ public:
   reg_iterator retreg_begin() const { return RegRets.begin(); }
   reg_iterator retreg_end()   const { return RegRets.end(); }
 
+  /// addRegister - Adds a virtual register to the set of all used registers
+  void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) {
+    if (!RegInfo.count(Reg)) {
+      RegisterInfo Info;
+      Info.Reg = Reg;
+      Info.Type = RegType;
+      Info.Space = RegSpace;
+
+      // Determine register offset
+      Info.Offset = 0;
+      for(RegisterInfoMap::const_iterator i = RegInfo.begin(),
+          e = RegInfo.end(); i != e; ++i) {
+        const RegisterInfo& RI = i->second;
+        if (RI.Space == RegSpace)
+          if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type)
+            Info.Offset++;
+      }
+
+      // Encode the register data into a single register number
+      Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space;
+
+      RegInfo[Reg] = Info;
+
+      if (RegSpace == PTXRegisterSpace::Argument)
+        RegArgs.insert(Reg);
+      else if (RegSpace == PTXRegisterSpace::Return)
+        RegRets.insert(Reg);
+    }
+  }
+
+  /// countRegisters - Returns the number of registers of the given type and
+  /// space.
+  unsigned countRegisters(unsigned RegType, unsigned RegSpace) const {
+    unsigned Count = 0;
+    for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end();
+        i != e; ++i) {
+      const RegisterInfo& RI = i->second;
+      if (RI.Type == RegType && RI.Space == RegSpace)
+        Count++;
+    }
+    return Count;
+  }
+
+  /// getEncodedRegister - Returns the encoded value of the register.
+  unsigned getEncodedRegister(unsigned Reg) const {
+    return RegInfo.lookup(Reg).Encoded;
+  }
+
   /// addRetReg - Adds a register to the set of return-value registers.
   void addRetReg(unsigned Reg) {
     if (!RegRets.count(Reg)) {
       RegRets.insert(Reg);
-      std::string name;
-      name = "%ret";
-      name += utostr(RegRets.size() - 1);
-      RegNames[Reg] = name;
     }
   }
 
   /// addArgReg - Adds a register to the set of function argument registers.
   void addArgReg(unsigned Reg) {
     RegArgs.insert(Reg);
-    std::string name;
-    name = "%param";
-    name += utostr(RegArgs.size() - 1);
-    RegNames[Reg] = name;
-  }
-
-  /// addVirtualRegister - Adds a virtual register to the set of all used
-  /// registers in the function.
-  void addVirtualRegister(const TargetRegisterClass *TRC, unsigned Reg) {
-    std::string name;
-
-    // Do not count registers that are argument/return registers.
-    if (!RegRets.count(Reg) && !RegArgs.count(Reg)) {
-      UsedRegs[TRC].push_back(Reg);
-      if (TRC == PTX::RegPredRegisterClass)
-        name = "%p";
-      else if (TRC == PTX::RegI16RegisterClass)
-        name = "%rh";
-      else if (TRC == PTX::RegI32RegisterClass)
-        name = "%r";
-      else if (TRC == PTX::RegI64RegisterClass)
-        name = "%rd";
-      else if (TRC == PTX::RegF32RegisterClass)
-        name = "%f";
-      else if (TRC == PTX::RegF64RegisterClass)
-        name = "%fd";
-      else
-        llvm_unreachable("Invalid register class");
-
-      name += utostr(UsedRegs[TRC].size() - 1);
-      RegNames[Reg] = name;
-    }
   }
 
   /// getRegisterName - Returns the name of the specified virtual register. This
   /// name is used during PTX emission.
-  const char *getRegisterName(unsigned Reg) const {
-    if (RegNames.count(Reg))
-      return RegNames.find(Reg)->second.c_str();
+  std::string getRegisterName(unsigned Reg) const {
+    if (RegInfo.count(Reg)) {
+      const RegisterInfo& RI = RegInfo.lookup(Reg);
+      std::string Name;
+      raw_string_ostream NameStr(Name);
+      decodeRegisterName(NameStr, RI.Encoded);
+      NameStr.flush();
+      return Name;
+    }
     else if (Reg == PTX::NoRegister)
       return "%noreg";
     else
       llvm_unreachable("Register not in register name map");
   }
 
-  /// getNumRegistersForClass - Returns the number of virtual registers that are
-  /// used for the specified register class.
-  unsigned getNumRegistersForClass(const TargetRegisterClass *TRC) const {
-    return UsedRegs.lookup(TRC).size();
+  /// getEncodedRegisterName - Returns the name of the encoded register.
+  std::string getEncodedRegisterName(unsigned EncodedReg) const {
+    std::string Name;
+    raw_string_ostream NameStr(Name);
+    decodeRegisterName(NameStr, EncodedReg);
+    NameStr.flush();
+    return Name;
+  }
+
+  /// getRegisterType - Returns the type of the specified virtual register.
+  unsigned getRegisterType(unsigned Reg) const {
+    if (RegInfo.count(Reg))
+      return RegInfo.lookup(Reg).Type;
+    else
+      llvm_unreachable("Unknown register");
+  }
+
+  /// getOffsetForRegister - Returns the offset of the virtual register
+  unsigned getOffsetForRegister(unsigned Reg) const {
+    if (RegInfo.count(Reg))
+      return RegInfo.lookup(Reg).Offset;
+    else
+      return 0;
   }
 
   /// getFrameSymbol - Returns the symbol name for the given FrameIndex.
@@ -148,13 +186,13 @@ public:
     if (FrameSymbols.count(FrameIndex)) {
       return FrameSymbols.lookup(FrameIndex).c_str();
     } else {
-      std::string Name = "__local";
-      Name += utostr(FrameIndex);
+      std::string Name          = "__local";
+      Name                     += utostr(FrameIndex);
       // The whole point of caching this name is to ensure the pointer we pass
       // to any getExternalSymbol() calls will remain valid for the lifetime of
       // the back-end instance. This is to work around an issue in SelectionDAG
       // where symbol names are expected to be life-long strings.
-      FrameSymbols[FrameIndex] = Name;
+      FrameSymbols[FrameIndex]  = Name;
       return FrameSymbols[FrameIndex].c_str();
     }
   }
diff --git a/lib/Target/PTX/PTXParamManager.cpp b/lib/Target/PTX/PTXParamManager.cpp
index 7753787ebc51..cc1cc711c82e 100644
--- a/lib/Target/PTX/PTXParamManager.cpp
+++ b/lib/Target/PTX/PTXParamManager.cpp
@@ -1,4 +1,4 @@
-//===- PTXParamManager.cpp - Manager for .param variables -------*- C++ -*-===//
+//===-- PTXParamManager.cpp - Manager for .param variables ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PTX.h"
 #include "PTXParamManager.h"
+#include "PTX.h"
 #include "llvm/ADT/StringExtras.h"
 
 using namespace llvm;
diff --git a/lib/Target/PTX/PTXParamManager.h b/lib/Target/PTX/PTXParamManager.h
index 9fd2de52f7f2..92e7728b4f8b 100644
--- a/lib/Target/PTX/PTXParamManager.h
+++ b/lib/Target/PTX/PTXParamManager.h
@@ -1,4 +1,4 @@
-//===- PTXParamManager.h - Manager for .param variables ----------*- C++ -*-==//
+//===-- PTXParamManager.h - Manager for .param variables --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include <string>
 
 namespace llvm {
 
diff --git a/lib/Target/PTX/PTXRegAlloc.cpp b/lib/Target/PTX/PTXRegAlloc.cpp
index 2d2d5c30c8ca..7fd53752bf66 100644
--- a/lib/Target/PTX/PTXRegAlloc.cpp
+++ b/lib/Target/PTX/PTXRegAlloc.cpp
@@ -24,10 +24,7 @@ namespace {
   class PTXRegAlloc : public MachineFunctionPass {
   public:
     static char ID;
-    PTXRegAlloc() : MachineFunctionPass(ID) {
-      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
-      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
-    }
+    PTXRegAlloc() : MachineFunctionPass(ID) {}
 
     virtual const char* getPassName() const {
       return "PTX Register Allocator";
@@ -35,8 +32,6 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
-      AU.addRequiredID(PHIEliminationID);
-      AU.addRequiredID(TwoAddressInstructionPassID);
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
index c8062664a93b..b6ffd38232e3 100644
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===//
+//===-- PTXRegisterInfo.cpp - PTX Register Information --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PTX.h"
 #include "PTXRegisterInfo.h"
+#include "PTX.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -31,44 +31,8 @@ PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
   : PTXGenRegisterInfo(0), TII(tii) {
 }
 
-void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                          int SPAdj,
-                                          RegScavenger *RS) const {
-  unsigned Index;
-  MachineInstr &MI = *II;
-  //MachineBasicBlock &MBB = *MI.getParent();
-  //DebugLoc dl = MI.getDebugLoc();
-  //MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
-
-  //unsigned Reg = MRI.createVirtualRegister(PTX::RegF32RegisterClass);
-
+void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator /*II*/,
+                                          int /*SPAdj*/,
+                                          RegScavenger * /*RS*/) const {
   llvm_unreachable("FrameIndex should have been previously eliminated!");
-
-  Index = 0;
-  while (!MI.getOperand(Index).isFI()) {
-    ++Index;
-    assert(Index < MI.getNumOperands() &&
-           "Instr does not have a FrameIndex operand!");
-  }
-
-  int FrameIndex = MI.getOperand(Index).getIndex();
-
-  DEBUG(dbgs() << "eliminateFrameIndex: " << MI);
-  DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
-  DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
-
-  //MachineInstr* MI2 = BuildMI(MBB, II, dl, TII.get(PTX::LOAD_LOCAL_F32))
-  //.addReg(Reg, RegState::Define).addImm(FrameIndex);
-  //if (MI2->findFirstPredOperandIdx() == -1) {
-  //  MI2->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
-  //  MI2->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL));
-  //}
-  //MI2->dump();
-
-  //MachineOperand ESOp = MachineOperand::CreateES("__local__");
-
-  // This frame index is post stack slot re-use assignments
-  //MI.getOperand(Index).ChangeToRegister(Reg, false);
-  MI.getOperand(Index).ChangeToImmediate(FrameIndex);
-  //MI.getOperand(Index) = ESOp;
 }
diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h
index 55fafe47bf35..5614ce793b90 100644
--- a/lib/Target/PTX/PTXRegisterInfo.h
+++ b/lib/Target/PTX/PTXRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===//
+//===-- PTXRegisterInfo.h - PTX Register Information Impl -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -32,9 +32,9 @@ public:
   PTXRegisterInfo(PTXTargetMachine &TM,
                   const TargetInstrInfo &tii);
 
-  virtual const unsigned
+  virtual const uint16_t
     *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
-    static const unsigned CalleeSavedRegs[] = { 0 };
+    static const uint16_t CalleeSavedRegs[] = { 0 };
     return CalleeSavedRegs; // save nothing
   }
 
@@ -49,7 +49,6 @@ public:
 
   virtual unsigned getFrameRegister(const MachineFunction &MF) const {
     llvm_unreachable("PTX does not have a frame register");
-    return 0;
   }
 }; // struct PTXRegisterInfo
 } // namespace llvm
diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td
index 6ed6d3fe385f..e8b262e48bde 100644
--- a/lib/Target/PTX/PTXRegisterInfo.td
+++ b/lib/Target/PTX/PTXRegisterInfo.td
@@ -1,5 +1,4 @@
-
-//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//===-- PTXRegisterInfo.td - PTX Register defs -------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/lib/Target/PTX/PTXSelectionDAGInfo.cpp
index 50ef14a13d94..a116fabaa53b 100644
--- a/lib/Target/PTX/PTXSelectionDAGInfo.cpp
+++ b/lib/Target/PTX/PTXSelectionDAGInfo.cpp
@@ -70,7 +70,7 @@ PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
                              DAG.getNode(ISD::ADD, dl, PointerType, Src,
                                          DAG.getConstant(SrcOff, PointerType)),
                              SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
-                             false, 0);
+                             false, false, 0);
       TFOps[i] = Loads[i].getValue(1);
       SrcOff += VTSize;
     }
@@ -108,7 +108,8 @@ PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
     Loads[i] = DAG.getLoad(VT, dl, Chain,
                            DAG.getNode(ISD::ADD, dl, PointerType, Src,
                                        DAG.getConstant(SrcOff, PointerType)),
-                           SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+                           SrcPtrInfo.getWithOffset(SrcOff), false, false,
+                           false, 0);
     TFOps[i] = Loads[i].getValue(1);
     ++i;
     SrcOff += VTSize;
diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp
index 1eb57d2f1702..454f64e6bba3 100644
--- a/lib/Target/PTX/PTXSubtarget.cpp
+++ b/lib/Target/PTX/PTXSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=//
+//===-- PTXSubtarget.cpp - PTX Subtarget Information ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -22,6 +22,8 @@
 
 using namespace llvm;
 
+void PTXSubtarget::anchor() { }
+
 PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
                            const std::string &FS, bool is64Bit)
   : PTXGenSubtargetInfo(TT, CPU, FS),
@@ -57,10 +59,10 @@ std::string PTXSubtarget::getTargetString() const {
 
 std::string PTXSubtarget::getPTXVersionString() const {
   switch(PTXVersion) {
-    default: llvm_unreachable("Unknown PTX version");
     case PTX_VERSION_2_0: return "2.0";
     case PTX_VERSION_2_1: return "2.1";
     case PTX_VERSION_2_2: return "2.2";
     case PTX_VERSION_2_3: return "2.3";
   }
+  llvm_unreachable("Invalid PTX version");
 }
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
index b946d7c11cef..ce93fef02a11 100644
--- a/lib/Target/PTX/PTXSubtarget.h
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -1,4 +1,4 @@
-//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===//
+//===-- PTXSubtarget.h - Define Subtarget for the PTX -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,6 +23,7 @@ namespace llvm {
 class StringRef;
 
   class PTXSubtarget : public PTXGenSubtargetInfo {
+      virtual void anchor(); 
     public:
 
       /**
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 449a3d9fc8d4..c55a658dc375 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -11,13 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PTX.h"
 #include "PTXTargetMachine.h"
+#include "PTX.h"
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -26,6 +25,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
@@ -37,8 +37,6 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
 
 
 using namespace llvm;
@@ -46,7 +44,7 @@ using namespace llvm;
 namespace llvm {
   MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
                                    bool isVerboseAsm, bool useLoc,
-                                   bool useCFI,
+                                   bool useCFI, bool useDwarfDirectory,
                                    MCInstPrinter *InstPrint,
                                    MCCodeEmitter *CE,
                                    MCAsmBackend *MAB,
@@ -67,29 +65,16 @@ namespace {
     "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
   const char* DataLayout64 =
     "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
-
-  // Copied from LLVMTargetMachine.cpp
-  void printNoVerify(PassManagerBase &PM, const char *Banner) {
-    if (PrintMachineCode)
-      PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-  }
-
-  void printAndVerify(PassManagerBase &PM,
-                      const char *Banner) {
-    if (PrintMachineCode)
-      PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
-
-    //if (VerifyMachineCode)
-    //  PM.add(createMachineVerifierPass(Banner));
-  }
 }
 
 // DataLayout and FrameLowering are filled with dummy data
 PTXTargetMachine::PTXTargetMachine(const Target &T,
                                    StringRef TT, StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
+                                   CodeGenOpt::Level OL,
                                    bool is64Bit)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     DataLayout(is64Bit ? DataLayout64 : DataLayout32),
     Subtarget(TT, CPU, FS, is64Bit),
     FrameLowering(Subtarget),
@@ -98,276 +83,83 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
     TLInfo(*this) {
 }
 
+void PTX32TargetMachine::anchor() { }
+
 PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
-                                       Reloc::Model RM, CodeModel::Model CM)
-  : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+                                       const TargetOptions &Options,
+                                       Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+  : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
 }
 
+void PTX64TargetMachine::anchor() { }
+
 PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
-                                       Reloc::Model RM, CodeModel::Model CM)
-  : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+                                       const TargetOptions &Options,
+                                       Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+  : PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
 }
 
-bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
-                                       CodeGenOpt::Level OptLevel) {
-  PM.add(createPTXISelDag(*this, OptLevel));
-  return false;
+namespace llvm {
+/// PTX Code Generator Pass Configuration Options.
+class PTXPassConfig : public TargetPassConfig {
+public:
+  PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  PTXTargetMachine &getPTXTargetMachine() const {
+      return getTM<PTXTargetMachine>();
+  }
+
+  bool addInstSelector();
+  FunctionPass *createTargetRegisterAllocator(bool);
+  void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
+  bool addPostRegAlloc();
+  void addMachineLateOptimization();
+  bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) {
+  PTXPassConfig *PassConfig = new PTXPassConfig(this, PM);
+  PassConfig->disablePass(PrologEpilogCodeInserterID);
+  return PassConfig;
 }
 
-bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
-                                       CodeGenOpt::Level OptLevel) {
-  // PTXMFInfoExtract must after register allocation!
-  //PM.add(createPTXMFInfoExtract(*this, OptLevel));
+bool PTXPassConfig::addInstSelector() {
+  PM.add(createPTXISelDag(getPTXTargetMachine(), getOptLevel()));
   return false;
 }
 
-bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
-                                           formatted_raw_ostream &Out,
-                                           CodeGenFileType FileType,
-                                           CodeGenOpt::Level OptLevel,
-                                           bool DisableVerify) {
-  // This is mostly based on LLVMTargetMachine::addPassesToEmitFile
-
-  // Add common CodeGen passes.
-  MCContext *Context = 0;
-  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
-    return true;
-  assert(Context != 0 && "Failed to get MCContext");
-
-  if (hasMCSaveTempLabels())
-    Context->setAllowTemporaryLabels(false);
-
-  const MCAsmInfo &MAI = *getMCAsmInfo();
-  const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
-  OwningPtr<MCStreamer> AsmStreamer;
-
-  switch (FileType) {
-  default: return true;
-  case CGFT_AssemblyFile: {
-    MCInstPrinter *InstPrinter =
-      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
-
-    // Create a code emitter if asked to show the encoding.
-    MCCodeEmitter *MCE = 0;
-    MCAsmBackend *MAB = 0;
-
-    MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
-                                                  true, /* verbose asm */
-                                                  hasMCUseLoc(),
-                                                  hasMCUseCFI(),
-                                                  InstPrinter,
-                                                  MCE, MAB,
-                                                  false /* show MC encoding */);
-    AsmStreamer.reset(S);
-    break;
-  }
-  case CGFT_ObjectFile: {
-    llvm_unreachable("Object file emission is not supported with PTX");
-  }
-  case CGFT_Null:
-    // The Null output is intended for use for performance analysis and testing,
-    // not real users.
-    AsmStreamer.reset(createNullStreamer(*Context));
-    break;
-  }
-
-  // MC Logging
-  //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
-
-  // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
-  FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
-  if (Printer == 0)
-    return true;
-
-  // If successful, createAsmPrinter took ownership of AsmStreamer.
-  AsmStreamer.take();
+FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) {
+  return createPTXRegisterAllocator();
+}
 
-  PM.add(Printer);
+// Modify the optimized compilation path to bypass optimized register alloction.
+void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+  addFastRegAlloc(RegAllocPass);
+}
 
-  PM.add(createGCInfoDeleter());
+bool PTXPassConfig::addPostRegAlloc() {
+  // PTXMFInfoExtract must after register allocation!
+  //PM.add(createPTXMFInfoExtract(getPTXTargetMachine()));
   return false;
 }
 
-bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
-                                              CodeGenOpt::Level OptLevel,
-                                              bool DisableVerify,
-                                              MCContext *&OutContext) {
-  // Add standard LLVM codegen passes.
-  // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some
-  // modifications for the PTX target.
-
-  // Standard LLVM-Level Passes.
-
-  // Basic AliasAnalysis support.
-  // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
-  // BasicAliasAnalysis wins if they disagree. This is intended to help
-  // support "obvious" type-punning idioms.
-  PM.add(createTypeBasedAliasAnalysisPass());
-  PM.add(createBasicAliasAnalysisPass());
-
-  // Before running any passes, run the verifier to determine if the input
-  // coming from the front-end and/or optimizer is valid.
-  if (!DisableVerify)
-    PM.add(createVerifierPass());
-
-  // Run loop strength reduction before anything else.
-  if (OptLevel != CodeGenOpt::None) {
-    PM.add(createLoopStrengthReducePass(getTargetLowering()));
-    //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
-  }
-
-  PM.add(createGCLoweringPass());
-
-  // Make sure that no unreachable blocks are instruction selected.
-  PM.add(createUnreachableBlockEliminationPass());
-
-  PM.add(createLowerInvokePass(getTargetLowering()));
-  // The lower invoke pass may create unreachable code. Remove it.
-  PM.add(createUnreachableBlockEliminationPass());
-
-  if (OptLevel != CodeGenOpt::None)
-    PM.add(createCodeGenPreparePass(getTargetLowering()));
+/// Add passes that optimize machine instructions after register allocation.
+void PTXPassConfig::addMachineLateOptimization() {
+  if (addPass(BranchFolderPassID) != &NoPassID)
+    printAndVerify("After BranchFolding");
 
-  PM.add(createStackProtectorPass(getTargetLowering()));
-
-  addPreISel(PM, OptLevel);
-
-  //PM.add(createPrintFunctionPass("\n\n"
-  //                               "*** Final LLVM Code input to ISel ***\n",
-  //                               &dbgs()));
-
-  // All passes which modify the LLVM IR are now complete; run the verifier
-  // to ensure that the IR is valid.
-  if (!DisableVerify)
-    PM.add(createVerifierPass());
-
-  // Standard Lower-Level Passes.
-
-  // Install a MachineModuleInfo class, which is an immutable pass that holds
-  // all the per-module stuff we're generating, including MCContext.
-  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
-                                                 *getRegisterInfo(),
-                                    &getTargetLowering()->getObjFileLowering());
-  PM.add(MMI);
-  OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
-
-  // Set up a MachineFunction for the rest of CodeGen to work on.
-  PM.add(new MachineFunctionAnalysis(*this, OptLevel));
-
-  // Ask the target for an isel.
-  if (addInstSelector(PM, OptLevel))
-    return true;
-
-  // Print the instruction selected machine code...
-  printAndVerify(PM, "After Instruction Selection");
-
-  // Expand pseudo-instructions emitted by ISel.
-  PM.add(createExpandISelPseudosPass());
-
-  // Pre-ra tail duplication.
-  if (OptLevel != CodeGenOpt::None) {
-    PM.add(createTailDuplicatePass(true));
-    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
-  }
-
-  // Optimize PHIs before DCE: removing dead PHI cycles may make more
-  // instructions dead.
-  if (OptLevel != CodeGenOpt::None)
-    PM.add(createOptimizePHIsPass());
-
-  // If the target requests it, assign local variables to stack slots relative
-  // to one another and simplify frame index references where possible.
-  PM.add(createLocalStackSlotAllocationPass());
-
-  if (OptLevel != CodeGenOpt::None) {
-    // With optimization, dead code should already be eliminated. However
-    // there is one known exception: lowered code for arguments that are only
-    // used by tail calls, where the tail calls reuse the incoming stack
-    // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
-    PM.add(createDeadMachineInstructionElimPass());
-    printAndVerify(PM, "After codegen DCE pass");
-
-    PM.add(createMachineLICMPass());
-    PM.add(createMachineCSEPass());
-    PM.add(createMachineSinkingPass());
-    printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
-
-    PM.add(createPeepholeOptimizerPass());
-    printAndVerify(PM, "After codegen peephole optimization pass");
-  }
-
-  // Run pre-ra passes.
-  if (addPreRegAlloc(PM, OptLevel))
-    printAndVerify(PM, "After PreRegAlloc passes");
-
-  // Perform register allocation.
-  PM.add(createPTXRegisterAllocator());
-  printAndVerify(PM, "After Register Allocation");
-
-  // Perform stack slot coloring and post-ra machine LICM.
-  if (OptLevel != CodeGenOpt::None) {
-    // FIXME: Re-enable coloring with register when it's capable of adding
-    // kill markers.
-    PM.add(createStackSlotColoringPass(false));
-
-    // FIXME: Post-RA LICM has asserts that fire on virtual registers.
-    // Run post-ra machine LICM to hoist reloads / remats.
-    //if (!DisablePostRAMachineLICM)
-    //  PM.add(createMachineLICMPass(false));
-
-    printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
-  }
-
-  // Run post-ra passes.
-  if (addPostRegAlloc(PM, OptLevel))
-    printAndVerify(PM, "After PostRegAlloc passes");
-
-  PM.add(createExpandPostRAPseudosPass());
-  printAndVerify(PM, "After ExpandPostRAPseudos");
-
-  // Insert prolog/epilog code.  Eliminate abstract frame index references...
-  PM.add(createPrologEpilogCodeInserter());
-  printAndVerify(PM, "After PrologEpilogCodeInserter");
-
-  // Run pre-sched2 passes.
-  if (addPreSched2(PM, OptLevel))
-    printAndVerify(PM, "After PreSched2 passes");
-
-  // Second pass scheduler.
-  if (OptLevel != CodeGenOpt::None) {
-    PM.add(createPostRAScheduler(OptLevel));
-    printAndVerify(PM, "After PostRAScheduler");
-  }
-
-  // Branch folding must be run after regalloc and prolog/epilog insertion.
-  if (OptLevel != CodeGenOpt::None) {
-    PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
-    printNoVerify(PM, "After BranchFolding");
-  }
-
-  // Tail duplication.
-  if (OptLevel != CodeGenOpt::None) {
-    PM.add(createTailDuplicatePass(false));
-    printNoVerify(PM, "After TailDuplicate");
-  }
-
-  PM.add(createGCMachineCodeAnalysisPass());
-
-  //if (PrintGCInfo)
-  //  PM.add(createGCInfoPrinter(dbgs()));
-
-  if (OptLevel != CodeGenOpt::None) {
-    PM.add(createCodePlacementOptPass());
-    printNoVerify(PM, "After CodePlacementOpt");
-  }
-
-  if (addPreEmitPass(PM, OptLevel))
-    printNoVerify(PM, "After PreEmit passes");
-
-  PM.add(createPTXMFInfoExtract(*this, OptLevel));
-  PM.add(createPTXFPRoundingModePass(*this, OptLevel));
+  if (addPass(TailDuplicateID) != &NoPassID)
+    printAndVerify("After TailDuplicate");
+}
 
-  return false;
+bool PTXPassConfig::addPreEmitPass() {
+  PM.add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel()));
+  PM.add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel()));
+  return true;
 }
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 5b7c82b1f4f4..278d1555b00b 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -35,8 +35,9 @@ class PTXTargetMachine : public LLVMTargetMachine {
 
   public:
     PTXTargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
                      Reloc::Model RM, CodeModel::Model CM,
+                     CodeGenOpt::Level OL,
                      bool is64Bit);
 
     virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -58,22 +59,9 @@ class PTXTargetMachine : public LLVMTargetMachine {
 
     virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
 
-    virtual bool addInstSelector(PassManagerBase &PM,
-                                 CodeGenOpt::Level OptLevel);
-    virtual bool addPostRegAlloc(PassManagerBase &PM,
-                                 CodeGenOpt::Level OptLevel);
-
-    // We override this method to supply our own set of codegen passes.
-    virtual bool addPassesToEmitFile(PassManagerBase &,
-                                     formatted_raw_ostream &,
-                                     CodeGenFileType,
-                                     CodeGenOpt::Level,
-                                     bool = true);
-
     // Emission of machine code through JITCodeEmitter is not supported.
     virtual bool addPassesToEmitMachineCode(PassManagerBase &,
                                             JITCodeEmitter &,
-                                            CodeGenOpt::Level,
                                             bool = true) {
       return true;
     }
@@ -82,32 +70,33 @@ class PTXTargetMachine : public LLVMTargetMachine {
     virtual bool addPassesToEmitMC(PassManagerBase &,
                                    MCContext *&,
                                    raw_ostream &,
-                                   CodeGenOpt::Level,
                                    bool = true) {
       return true;
     }
 
-  private:
-
-    bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
-                                bool DisableVerify, MCContext *&OutCtx);
+    // Pass Pipeline Configuration
+    virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 }; // class PTXTargetMachine
 
 
 class PTX32TargetMachine : public PTXTargetMachine {
+  virtual void anchor();
 public:
 
   PTX32TargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
-                     Reloc::Model RM, CodeModel::Model CM);
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM,
+                     CodeGenOpt::Level OL);
 }; // class PTX32TargetMachine
 
 class PTX64TargetMachine : public PTXTargetMachine {
+  virtual void anchor();
 public:
 
   PTX64TargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
-                     Reloc::Model RM, CodeModel::Model CM);
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM,
+                     CodeGenOpt::Level OL);
 }; // class PTX32TargetMachine
 
 } // namespace llvm
diff --git a/lib/Target/PTX/TargetInfo/CMakeLists.txt b/lib/Target/PTX/TargetInfo/CMakeLists.txt
index 2366e45294f8..d9a5da3a9082 100644
--- a/lib/Target/PTX/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMPTXInfo
   PTXTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMPTXInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMPTXInfo PTXCommonTableGen)
diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..2cc30c422f14
--- /dev/null
+++ b/lib/Target/PTX/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PTX/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PTXInfo
+parent = PTX
+required_libraries = MC Support Target
+add_to_library_groups = PTX
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index 73b4aba9f015..bcd8bd291623 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -1,13 +1,13 @@
 set(LLVM_TARGET_DEFINITIONS PPC.td)
 
-llvm_tablegen(PPCGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(PPCGenCodeEmitter.inc -gen-emitter)
-llvm_tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-llvm_tablegen(PPCGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(PPCGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(PPCGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(PPCGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(PPCGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter)
+tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM PPCGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget)
 add_public_tablegen_target(PowerPCCommonTableGen)
 
 add_llvm_target(PowerPCCodeGen
@@ -21,26 +21,13 @@ add_llvm_target(PowerPCCodeGen
   PPCFrameLowering.cpp
   PPCJITInfo.cpp
   PPCMCInstLower.cpp
+  PPCMachineFunctionInfo.cpp
   PPCRegisterInfo.cpp
   PPCSubtarget.cpp
   PPCTargetMachine.cpp
   PPCSelectionDAGInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMPowerPCCodeGen
-  LLVMAnalysis
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMPowerPCAsmPrinter
-  LLVMPowerPCDesc
-  LLVMPowerPCInfo
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
index 1d857e2f48d4..a605cc4b5f27 100644
--- a/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMPowerPCAsmPrinter
   PPCInstPrinter.cpp
   )
 
-add_llvm_library_dependencies(LLVMPowerPCAsmPrinter
-  LLVMMC
-  LLVMSupport
-  )
-
 add_dependencies(LLVMPowerPCAsmPrinter PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..7c691deafccf
--- /dev/null
+++ b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PowerPC/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCAsmPrinter
+parent = PowerPC
+required_libraries = MC Support
+add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index b6a08354a210..61d23ce06aa1 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -17,16 +17,12 @@
 #include "MCTargetDesc/PPCPredicates.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-#define GET_INSTRUCTION_NAME
 #include "PPCGenAsmWriter.inc"
 
-StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const {
-  return getInstructionName(Opcode);
-}
-
 void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
   OS << getRegisterName(RegNo);
 }
@@ -94,7 +90,6 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
   unsigned Code = MI->getOperand(OpNo).getImm();
   if (StringRef(Modifier) == "cc") {
     switch ((PPC::Predicate)Code) {
-    default: assert(0 && "Invalid predicate");
     case PPC::PRED_ALWAYS: return; // Don't print anything for always.
     case PPC::PRED_LT: O << "lt"; return;
     case PPC::PRED_LE: O << "le"; return;
@@ -175,7 +170,7 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
   unsigned CCReg = MI->getOperand(OpNo).getReg();
   unsigned RegNo;
   switch (CCReg) {
-  default: assert(0 && "Unknown CR register");
+  default: llvm_unreachable("Unknown CR register");
   case PPC::CR0: RegNo = 0; break;
   case PPC::CR1: RegNo = 1; break;
   case PPC::CR2: RegNo = 2; break;
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 4ed4b765c1c7..73fd5342a165 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===//
+//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -24,8 +24,9 @@ class PPCInstPrinter : public MCInstPrinter {
   // 0 -> AIX, 1 -> Darwin.
   unsigned SyntaxVariant;
 public:
-  PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
-    : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
+  PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                 const MCRegisterInfo &MRI, unsigned syntaxVariant)
+    : MCInstPrinter(MAI, MII, MRI), SyntaxVariant(syntaxVariant) {}
   
   bool isDarwinSyntax() const {
     return SyntaxVariant == 1;
@@ -33,9 +34,6 @@ public:
   
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
   virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
-  virtual StringRef getOpcodeName(unsigned Opcode) const;
-  
-  static const char *getInstructionName(unsigned Opcode);
   
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt
new file mode 100644
index 000000000000..95fac5471ec7
--- /dev/null
+++ b/lib/Target/PowerPC/LLVMBuild.txt
@@ -0,0 +1,33 @@
+;===- ./lib/Target/PowerPC/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = InstPrinter MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = PowerPC
+parent = Target
+has_asmprinter = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = PowerPCCodeGen
+parent = PowerPC
+required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target
+add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
index c4041db8cf0b..b674883db7de 100644
--- a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -4,13 +4,7 @@ add_llvm_library(LLVMPowerPCDesc
   PPCMCAsmInfo.cpp
   PPCMCCodeEmitter.cpp
   PPCPredicates.cpp
-  )
-
-add_llvm_library_dependencies(LLVMPowerPCDesc
-  LLVMMC
-  LLVMPowerPCAsmPrinter
-  LLVMPowerPCInfo
-  LLVMSupport
+  PPCELFObjectWriter.cpp
   )
 
 add_dependencies(LLVMPowerPCDesc PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..d3a567d1581d
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCDesc
+parent = PowerPC
+required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support
+add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 9f2fd6d01b8e..48de583afdf1 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -7,10 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCAsmBackend.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCObjectWriter.h"
@@ -57,13 +58,6 @@ public:
                         MCValue Target, uint64_t &FixedValue) {}
 };
 
-class PPCELFObjectWriter : public MCELFObjectTargetWriter {
-public:
-  PPCELFObjectWriter(bool Is64Bit, Triple::OSType OSType, uint16_t EMachine,
-                     bool HasRelocationAddend, bool isLittleEndian)
-    : MCELFObjectTargetWriter(Is64Bit, OSType, EMachine, HasRelocationAddend) {}
-};
-
 class PPCAsmBackend : public MCAsmBackend {
 const Target &TheTarget;
 public:
@@ -80,33 +74,42 @@ public:
       { "fixup_ppc_ha16",        16,     16,   0 },
       { "fixup_ppc_lo14",        16,     14,   0 }
     };
-  
+
     if (Kind < FirstTargetFixupKind)
       return MCAsmBackend::getFixupKindInfo(Kind);
-  
+
     assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
            "Invalid kind!");
     return Infos[Kind - FirstTargetFixupKind];
   }
-  
-  bool MayNeedRelaxation(const MCInst &Inst) const {
+
+  bool mayNeedRelaxation(const MCInst &Inst) const {
     // FIXME.
     return false;
   }
-  
-  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCInstFragment *DF,
+                            const MCAsmLayout &Layout) const {
     // FIXME.
-    assert(0 && "RelaxInstruction() unimplemented");
+    llvm_unreachable("relaxInstruction() unimplemented");
   }
-  
-  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+
+
+  void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+    // FIXME.
+    llvm_unreachable("relaxInstruction() unimplemented");
+  }
+
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
     // FIXME: Zero fill for now. That's not right, but at least will get the
     // section size right.
     for (uint64_t i = 0; i != Count; ++i)
       OW->Write8(0);
     return true;
-  }      
-  
+  }
+
   unsigned getPointerSize() const {
     StringRef Name = TheTarget.getName();
     if (Name == "ppc64") return 8;
@@ -122,12 +125,12 @@ namespace {
   class DarwinPPCAsmBackend : public PPCAsmBackend {
   public:
     DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
-    
-    void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+
+    void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                     uint64_t Value) const {
-      assert(0 && "UNIMP");
+      llvm_unreachable("UNIMP");
     }
-    
+
     MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
       bool is64 = getPointerSize() == 8;
       return createMachObjectWriter(new PPCMachObjectWriter(
@@ -137,19 +140,19 @@ namespace {
                                       object::mach::CSPPC_ALL),
                                     OS, /*IsLittleEndian=*/false);
     }
-    
+
     virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
       return false;
     }
   };
 
   class ELFPPCAsmBackend : public PPCAsmBackend {
-    Triple::OSType OSType;
+    uint8_t OSABI;
   public:
-    ELFPPCAsmBackend(const Target &T, Triple::OSType OSType) :
-      PPCAsmBackend(T), OSType(OSType) { }
-    
-    void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+    ELFPPCAsmBackend(const Target &T, uint8_t OSABI) :
+      PPCAsmBackend(T), OSABI(OSABI) { }
+
+    void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                     uint64_t Value) const {
       Value = adjustFixupValue(Fixup.getKind(), Value);
       if (!Value) return;           // Doesn't change encoding.
@@ -162,17 +165,12 @@ namespace {
       for (unsigned i = 0; i != 4; ++i)
         Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
     }
-    
+
     MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
       bool is64 = getPointerSize() == 8;
-      return createELFObjectWriter(new PPCELFObjectWriter(
-                                      /*Is64Bit=*/is64,
-                                      OSType,
-                                      is64 ? ELF::EM_PPC64 : ELF::EM_PPC,                                      
-                                      /*addend*/ true, /*isLittleEndian*/ false),
-                                   OS, /*IsLittleEndian=*/false);
+      return createPPCELFObjectWriter(OS, is64, OSABI);
     }
-    
+
     virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
       return false;
     }
@@ -187,5 +185,6 @@ MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT) {
   if (Triple(TT).isOSDarwin())
     return new DarwinPPCAsmBackend(T);
 
-  return new ELFPPCAsmBackend(T, Triple(TT).getOS());
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+  return new ELFPPCAsmBackend(T, OSABI);
 }
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
index 369bbdce11f5..9c975c089ea6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h
@@ -1,4 +1,4 @@
-//===-- PPCBaseInfo.h - Top level definitions for PPC -------- --*- C++ -*-===//
+//===-- PPCBaseInfo.h - Top level definitions for PPC -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
new file mode 100644
index 000000000000..a19798157bf3
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -0,0 +1,103 @@
+//===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+  class PPCELFObjectWriter : public MCELFObjectTargetWriter {
+  public:
+    PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI);
+
+    virtual ~PPCELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend) const;
+    virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
+  };
+}
+
+PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
+  : MCELFObjectTargetWriter(Is64Bit, OSABI,
+                            Is64Bit ?  ELF::EM_PPC64 : ELF::EM_PPC,
+                            /*HasRelocationAddend*/ true) {}
+
+PPCELFObjectWriter::~PPCELFObjectWriter() {
+}
+
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+                                             const MCFixup &Fixup,
+                                             bool IsPCRel,
+                                             bool IsRelocWithSymbol,
+                                             int64_t Addend) const {
+  // determine the type of the relocation
+  unsigned Type;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented");
+    case PPC::fixup_ppc_br24:
+      Type = ELF::R_PPC_REL24;
+      break;
+    case FK_PCRel_4:
+      Type = ELF::R_PPC_REL32;
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+    case PPC::fixup_ppc_br24:
+      Type = ELF::R_PPC_ADDR24;
+      break;
+    case PPC::fixup_ppc_brcond14:
+      Type = ELF::R_PPC_ADDR14_BRTAKEN; // XXX: or BRNTAKEN?_
+      break;
+    case PPC::fixup_ppc_ha16:
+      Type = ELF::R_PPC_ADDR16_HA;
+      break;
+    case PPC::fixup_ppc_lo16:
+      Type = ELF::R_PPC_ADDR16_LO;
+      break;
+    case PPC::fixup_ppc_lo14:
+      Type = ELF::R_PPC_ADDR14;
+      break;
+    case FK_Data_4:
+      Type = ELF::R_PPC_ADDR32;
+      break;
+    case FK_Data_2:
+      Type = ELF::R_PPC_ADDR16;
+      break;
+    }
+  }
+  return Type;
+}
+
+void PPCELFObjectWriter::
+adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
+  switch ((unsigned)Fixup.getKind()) {
+    case PPC::fixup_ppc_ha16:
+    case PPC::fixup_ppc_lo16:
+      RelocOffset += 2;
+      break;
+    default:
+      break;
+  }
+}
+
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
+                                               bool Is64Bit,
+                                               uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI);
+  return createELFObjectWriter(MOTW, OS,  /*IsLittleEndian=*/false);
+}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index e9424d8415f6..245b4578bf28 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===//
+//===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,6 +14,8 @@
 #include "PPCMCAsmInfo.h"
 using namespace llvm;
 
+void PPCMCAsmInfoDarwin::anchor() { }
+
 PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
   if (is64Bit)
     PointerSize = 8;
@@ -30,6 +32,8 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
   SupportsDebugInformation= true; // Debug information.
 }
 
+void PPCLinuxMCAsmInfo::anchor() { }
+
 PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
   if (is64Bit)
     PointerSize = 8;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 96ae6fbba0e4..7b4ed9f14eb6 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====//
+//===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,11 +18,15 @@
 
 namespace llvm {
 
-  struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+  class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+    virtual void anchor();
+  public:
     explicit PPCMCAsmInfoDarwin(bool is64Bit);
   };
 
-  struct PPCLinuxMCAsmInfo : public MCAsmInfo {
+  class PPCLinuxMCAsmInfo : public MCAsmInfo {
+    virtual void anchor();
+  public:
     explicit PPCLinuxMCAsmInfo(bool is64Bit);
   };
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 262f97c36a93..5a6827ffd8d3 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -57,7 +57,7 @@ public:
   
   // getBinaryCodeForInstr - TableGen'erated function for getting the
   // binary encoding for an instruction.
-  unsigned getBinaryCodeForInstr(const MCInst &MI,
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
                                  SmallVectorImpl<MCFixup> &Fixups) const;
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index d5c8a9e72c67..6568e82e2bf0 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===//
+//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,6 +20,7 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #define GET_INSTRINFO_MC_DESC
@@ -76,7 +77,8 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
 }
 
 static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                             CodeModel::Model CM) {
+                                             CodeModel::Model CM,
+                                             CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
 
   if (RM == Reloc::Default) {
@@ -86,7 +88,7 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
     else
       RM = Reloc::Static;
   }
-  X->InitMCCodeGenInfo(RM, CM);
+  X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
@@ -106,8 +108,10 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
 static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
+                                             const MCInstrInfo &MII,
+                                             const MCRegisterInfo &MRI,
                                              const MCSubtargetInfo &STI) {
-  return new PPCInstPrinter(MAI, SyntaxVariant);
+  return new PPCInstPrinter(MAI, MII, MRI, SyntaxVariant);
 }
 
 extern "C" void LLVMInitializePowerPCTargetMC() {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index e5bf2a9dd92f..b7fa0646288d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -14,14 +14,18 @@
 #ifndef PPCMCTARGETDESC_H
 #define PPCMCTARGETDESC_H
 
+#include "llvm/Support/DataTypes.h"
+
 namespace llvm {
 class MCAsmBackend;
 class MCCodeEmitter;
 class MCContext;
 class MCInstrInfo;
+class MCObjectWriter;
 class MCSubtargetInfo;
 class Target;
 class StringRef;
+class raw_ostream;
 
 extern Target ThePPC32Target;
 extern Target ThePPC64Target;
@@ -31,7 +35,11 @@ MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
                                       MCContext &Ctx);
 
 MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT);
-  
+
+/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
+                                         bool Is64Bit,
+                                         uint8_t OSABI);
 } // End llvm namespace
 
 // Defines symbolic names for PowerPC registers.  This defines a mapping from
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 5dc1863a0b2d..24a7178d1ff9 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -25,14 +25,11 @@
 namespace llvm {
   class PPCTargetMachine;
   class FunctionPass;
-  class formatted_raw_ostream;
   class JITCodeEmitter;
-  class Target;
   class MachineInstr;
   class AsmPrinter;
   class MCInst;
-  class TargetMachine;
-  
+
   FunctionPass *createPPCBranchSelectionPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
   FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 2d5d302728f7..c554d39434c8 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -1,10 +1,10 @@
-//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===//
-// 
+//===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This is the top level entry point for the PowerPC target.
@@ -23,6 +23,7 @@ include "llvm/Target/Target.td"
 // CPU Directives                                                             //
 //===----------------------------------------------------------------------===//
 
+def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">;
 def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
 def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
 def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
@@ -33,6 +34,7 @@ def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
 def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
 def Directive32  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
 def Directive64  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+def DirectiveA2  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
 
 def Feature64Bit     : SubtargetFeature<"64bit","Has64BitSupport", "true",
                                         "Enable 64-bit instructions">;
@@ -46,6 +48,8 @@ def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
                                         "Enable the fsqrt instruction">;
 def FeatureSTFIWX    : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
                                         "Enable the stfiwx instruction">;
+def FeatureBookE     : SubtargetFeature<"booke", "IsBookE", "true",
+                                        "Enable Book E instructions">;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
@@ -60,6 +64,8 @@ include "PPCInstrInfo.td"
 //
 
 def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"440", PPC440Itineraries, [Directive440, FeatureBookE]>;
+def : Processor<"450", PPC440Itineraries, [Directive440, FeatureBookE]>;
 def : Processor<"601", G3Itineraries, [Directive601]>;
 def : Processor<"602", G3Itineraries, [Directive602]>;
 def : Processor<"603", G3Itineraries, [Directive603]>;
@@ -82,6 +88,10 @@ def : Processor<"g5", G5Itineraries,
                   [Directive970, FeatureAltivec,
                    FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
                    Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"a2",  PPCA2Itineraries, [DirectiveA2, FeatureBookE,
+                                          FeatureFSqrt, FeatureSTFIWX,
+                                          Feature64Bit
+                                      /*, Feature64BitRegs */]>;
 def : Processor<"ppc", G3Itineraries, [Directive32]>;
 def : Processor<"ppc64", G5Itineraries,
                   [Directive64, FeatureAltivec,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 952845943179..fb7aa71d98d3 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,6 +20,7 @@
 #include "PPC.h"
 #include "PPCTargetMachine.h"
 #include "PPCSubtarget.h"
+#include "InstPrinter/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
@@ -39,6 +40,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -49,10 +51,9 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/SmallString.h"
-#include "InstPrinter/PPCInstPrinter.h"
 using namespace llvm;
 
 namespace {
@@ -366,14 +367,21 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   }
       
   case PPC::MFCRpseud:
+  case PPC::MFCR8pseud:
     // Transform: %R3 = MFCRpseud %CR7
     // Into:      %R3 = MFCR      ;; cr7
     OutStreamer.AddComment(PPCInstPrinter::
                            getRegisterName(MI->getOperand(1).getReg()));
-    TmpInst.setOpcode(PPC::MFCR);
+    TmpInst.setOpcode(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR);
     TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
     OutStreamer.EmitInstruction(TmpInst);
     return;
+  case PPC::SYNC:
+    // In Book E sync is called msync, handle this special case here...
+    if (Subtarget.isBookE()) {
+      OutStreamer.EmitRawText(StringRef("\tmsync"));
+      return;
+    }
   }
 
   LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
@@ -385,14 +393,26 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
     return AsmPrinter::EmitFunctionEntryLabel();
     
   // Emit an official procedure descriptor.
-  // FIXME 64-bit SVR4: Use MCSection here!
-  OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\""));
-  OutStreamer.EmitRawText(StringRef("\t.align 3"));
+  const MCSection *Current = OutStreamer.getCurrentSection();
+  const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
+      ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+      SectionKind::getReadOnly());
+  OutStreamer.SwitchSection(Section);
   OutStreamer.EmitLabel(CurrentFnSym);
-  OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) +
-                          ",.TOC.@tocbase");
-  OutStreamer.EmitRawText(StringRef("\t.previous"));
-  OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":");
+  OutStreamer.EmitValueToAlignment(8);
+  MCSymbol *Symbol1 = 
+    OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
+  MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC.@tocbase"));
+  OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
+                        Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/);
+  OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2, OutContext),
+                        Subtarget.isPPC64() ? 8 : 4/*size*/, 0/*addrspace*/);
+  OutStreamer.SwitchSection(Current);
+
+  MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
+                          ".L." + Twine(CurrentFnSym->getName()));
+  OutStreamer.EmitLabel(RealFnSym);
+  CurrentFnSymForSize = RealFnSym;
 }
 
 
@@ -402,8 +422,10 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
   bool isPPC64 = TD->getPointerSizeInBits() == 64;
 
   if (isPPC64 && !TOC.empty()) {
-    // FIXME 64-bit SVR4: Use MCSection here?
-    OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\""));
+    const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
+        ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+        SectionKind::getReadOnly());
+    OutStreamer.SwitchSection(Section);
 
     // FIXME: This is nondeterminstic!
     for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
@@ -421,12 +443,14 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
   static const char *const CPUDirectives[] = {
     "",
     "ppc",
+    "ppc440",
     "ppc601",
     "ppc602",
     "ppc603",
     "ppc7400",
     "ppc750",
     "ppc970",
+    "ppcA2",
     "ppc64"
   };
 
@@ -435,7 +459,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
     Directive = PPC::DIR_970;
   if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
     Directive = PPC::DIR_7400;
-  if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+  if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
     Directive = PPC::DIR_64;
   assert(Directive <= PPC::DIR_64 && "Directive out of range.");
   
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 475edf309c0c..5f775e16f1ca 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -1,4 +1,4 @@
-//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=//
+//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 441db94581ae..9883c2e42995 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -1,10 +1,10 @@
 //===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This describes the calling conventions for the PowerPC 32- and 64-bit
@@ -130,3 +130,34 @@ def CC_PPC_SVR4_ByVal : CallingConv<[
   CCCustom<"CC_PPC_SVR4_Custom_Dummy">
 ]>;
 
+def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+                                        R21, R22, R23, R24, R25, R26, R27, R28,
+                                        R29, R30, R31, F14, F15, F16, F17, F18,
+                                        F19, F20, F21, F22, F23, F24, F25, F26,
+                                        F27, F28, F29, F30, F31, CR2, CR3, CR4,
+                                        V20, V21, V22, V23, V24, V25, V26, V27,
+                                        V28, V29, V30, V31)>;
+
+def CSR_SVR432   : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE,
+                                        R21, R22, R23, R24, R25, R26, R27, R28,
+                                        R29, R30, R31, F14, F15, F16, F17, F18,
+                                        F19, F20, F21, F22, F23, F24, F25, F26,
+                                        F27, F28, F29, F30, F31, CR2, CR3, CR4,
+                                        V20, V21, V22, V23, V24, V25, V26, V27,
+                                        V28, V29, V30, V31)>;
+
+def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
+                                        X21, X22, X23, X24, X25, X26, X27, X28,
+                                        X29, X30, X31, F14, F15, F16, F17, F18,
+                                        F19, F20, F21, F22, F23, F24, F25, F26,
+                                        F27, F28, F29, F30, F31, CR2, CR3, CR4,
+                                        V20, V21, V22, V23, V24, V25, V26, V27,
+                                        V28, V29, V30, V31)>;
+
+def CSR_SVR464   : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE,
+                                        X21, X22, X23, X24, X25, X26, X27, X28,
+                                        X29, X30, X31, F14, F15, F16, F17, F18,
+                                        F19, F20, F21, F22, F23, F24, F25, F26,
+                                        F27, F28, F29, F30, F31, CR2, CR3, CR4,
+                                        V20, V21, V22, V23, V24, V25, V26, V27,
+                                        V28, V29, V30, V31)>;
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 4a1f1822afdf..252a2d159ec3 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=//
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -50,7 +50,7 @@ namespace {
     /// getBinaryCodeForInstr - This function, generated by the
     /// CodeEmitterGenerator using TableGen, produces the binary encoding for
     /// machine instructions.
-    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+    uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
 
     
     MachineRelocation GetRelocation(const MachineOperand &MO,
@@ -138,7 +138,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
 unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
                                              unsigned OpNo) const {
   const MachineOperand &MO = MI.getOperand(OpNo);
-  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
+            MI.getOpcode() == PPC::MFOCRF) &&
          (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
   return 0x80 >> getPPCRegisterNumbering(MO.getReg());
 }
@@ -248,7 +249,8 @@ unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
   if (MO.isReg()) {
     // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
     // The GPR operand should come through here though.
-    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
+             MI.getOpcode() != PPC::MFOCRF) ||
            MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
     return getPPCRegisterNumbering(MO.getReg());
   }
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 0b85fea65758..b77a80bbf30d 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1,4 +1,4 @@
-//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===//
+//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -38,7 +38,7 @@ using namespace llvm;
 
 /// VRRegNo - Map from a numbered VR register to its enum value.
 ///
-static const unsigned short VRRegNo[] = {
+static const uint16_t VRRegNo[] = {
  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
@@ -64,7 +64,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
   // epilog blocks.
   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
     // If last instruction is a return instruction, add an epilogue
-    if (!I->empty() && I->back().getDesc().isReturn()) {
+    if (!I->empty() && I->back().isReturn()) {
       bool FoundIt = false;
       for (MBBI = I->end(); MBBI != I->begin(); ) {
         --MBBI;
@@ -244,8 +244,10 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
   if (MF.getFunction()->hasFnAttr(Attribute::Naked))
     return false;
 
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
-    (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+    MFI->hasVarSizedObjects() ||
+    (MF.getTarget().Options.GuaranteedTailCallOpt &&
+     MF.getInfo<PPCFunctionInfo>()->hasFastCall());
 }
 
 
@@ -365,8 +367,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addReg(PPC::R0, RegState::Kill)
         .addImm(NegFrameSize);
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
-        .addReg(PPC::R1)
-        .addReg(PPC::R1)
+        .addReg(PPC::R1, RegState::Kill)
+        .addReg(PPC::R1, RegState::Define)
         .addReg(PPC::R0);
     } else if (isInt<16>(NegFrameSize)) {
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
@@ -380,8 +382,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addReg(PPC::R0, RegState::Kill)
         .addImm(NegFrameSize & 0xFFFF);
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
-        .addReg(PPC::R1)
-        .addReg(PPC::R1)
+        .addReg(PPC::R1, RegState::Kill)
+        .addReg(PPC::R1, RegState::Define)
         .addReg(PPC::R0);
     }
   } else {    // PPC64.
@@ -398,8 +400,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addReg(PPC::X0)
         .addImm(NegFrameSize);
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
-        .addReg(PPC::X1)
-        .addReg(PPC::X1)
+        .addReg(PPC::X1, RegState::Kill)
+        .addReg(PPC::X1, RegState::Define)
         .addReg(PPC::X0);
     } else if (isInt<16>(NegFrameSize)) {
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
@@ -413,8 +415,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addReg(PPC::X0, RegState::Kill)
         .addImm(NegFrameSize & 0xFFFF);
       BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
-        .addReg(PPC::X1)
-        .addReg(PPC::X1)
+        .addReg(PPC::X1, RegState::Kill)
+        .addReg(PPC::X1, RegState::Define)
         .addReg(PPC::X0);
     }
   }
@@ -655,7 +657,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
 
   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
   // call optimization
-  if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+  if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
       MF.getFunction()->getCallingConv() == CallingConv::Fast) {
      PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
      unsigned CallerAllocatedAmt = FI->getMinReservedArea();
@@ -758,7 +760,8 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Reserve stack space to move the linkage area to in case of a tail call.
   int TCSPDelta = 0;
-  if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+      (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
     MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
   }
 
@@ -769,7 +772,7 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   // FIXME: doesn't detect whether or not we need to spill vXX, which requires
   //        r0 for now.
 
-  if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+  if (RegInfo->requiresRegisterScavenging(MF))
     if (needsFP(MF) || spillsCR(MF)) {
       const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
       const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
@@ -863,7 +866,8 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
 
   // Take into account stack space reserved for tail calls.
   int TCSPDelta = 0;
-  if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+      (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
     LowerBound = TCSPDelta;
   }
 
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 20faa71d4148..d708541c6686 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -1,4 +1,4 @@
-//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==//
+//===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index cddc9d858adf..6ed1fb9e6a3c 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -22,6 +22,30 @@
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
+// PowerPC Scoreboard Hazard Recognizer
+void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+  if (!MCID)
+    // This is a PPC pseudo-instruction.
+    return;
+
+  ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+ScheduleHazardRecognizer::HazardType
+PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+  return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void PPCScoreboardHazardRecognizer::AdvanceCycle() {
+  ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void PPCScoreboardHazardRecognizer::Reset() {
+  ScoreboardHazardRecognizer::Reset();
+}
+
+//===----------------------------------------------------------------------===//
 // PowerPC 970 Hazard Recognizer
 //
 // This models the dispatch group formation of the PPC970 processor.  Dispatch
@@ -67,12 +91,6 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
                                      bool &isFirst, bool &isSingle,
                                      bool &isCracked,
                                      bool &isLoad, bool &isStore) {
-  if ((int)Opcode >= 0) {
-    isFirst = isSingle = isCracked = isLoad = isStore = false;
-    return PPCII::PPC970_Pseudo;
-  }
-  Opcode = ~Opcode;
-
   const MCInstrDesc &MCID = TII.get(Opcode);
 
   isLoad  = MCID.mayLoad();
@@ -89,29 +107,23 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
 /// isLoadOfStoredAddress - If we have a load from the previously stored pointer
 /// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
 bool PPCHazardRecognizer970::
-isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+  const Value *LoadValue) const {
   for (unsigned i = 0, e = NumStores; i != e; ++i) {
     // Handle exact and commuted addresses.
-    if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
-      return true;
-    if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+    if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
       return true;
 
     // Okay, we don't have an exact match, if this is an indexed offset, see if
     // we have overlap (which happens during fp->int conversion for example).
-    if (StorePtr2[i] == Ptr2) {
-      if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
-        if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
-          // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
-          // to see if the load and store actually overlap.
-          int StoreOffs = StoreOffset->getZExtValue();
-          int LoadOffs  = LoadOffset->getZExtValue();
-          if (StoreOffs < LoadOffs) {
-            if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
-          } else {
-            if (int(LoadOffs+LoadSize) > StoreOffs) return true;
-          }
-        }
+    if (StoreValue[i] == LoadValue) {
+      // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
+      // to see if the load and store actually overlap.
+      if (StoreOffset[i] < LoadOffset) {
+        if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
+      } else {
+        if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
+      }
     }
   }
   return false;
@@ -125,13 +137,17 @@ ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
 getHazardType(SUnit *SU, int Stalls) {
   assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
 
-  const SDNode *Node = SU->getNode()->getGluedMachineNode();
+  MachineInstr *MI = SU->getInstr();
+
+  if (MI->isDebugValue())
+    return NoHazard;
+
+  unsigned Opcode = MI->getOpcode();
   bool isFirst, isSingle, isCracked, isLoad, isStore;
   PPCII::PPC970_Unit InstrType =
-    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+    GetInstrType(Opcode, isFirst, isSingle, isCracked,
                  isLoad, isStore);
   if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
-  unsigned Opcode = Node->getMachineOpcode();
 
   // We can only issue a PPC970_First/PPC970_Single instruction (such as
   // crand/mtspr/etc) if this is the first cycle of the dispatch group.
@@ -168,55 +184,10 @@ getHazardType(SUnit *SU, int Stalls) {
 
   // If this is a load following a store, make sure it's not to the same or
   // overlapping address.
-  if (isLoad && NumStores) {
-    unsigned LoadSize;
-    switch (Opcode) {
-    default: llvm_unreachable("Unknown load!");
-    case PPC::LBZ:   case PPC::LBZU:
-    case PPC::LBZX:
-    case PPC::LBZ8:  case PPC::LBZU8:
-    case PPC::LBZX8:
-    case PPC::LVEBX:
-      LoadSize = 1;
-      break;
-    case PPC::LHA:   case PPC::LHAU:
-    case PPC::LHAX:
-    case PPC::LHZ:   case PPC::LHZU:
-    case PPC::LHZX:
-    case PPC::LVEHX:
-    case PPC::LHBRX:
-    case PPC::LHA8:   case PPC::LHAU8:
-    case PPC::LHAX8:
-    case PPC::LHZ8:   case PPC::LHZU8:
-    case PPC::LHZX8:
-      LoadSize = 2;
-      break;
-    case PPC::LFS:    case PPC::LFSU:
-    case PPC::LFSX:
-    case PPC::LWZ:    case PPC::LWZU:
-    case PPC::LWZX:
-    case PPC::LWA:
-    case PPC::LWAX:
-    case PPC::LVEWX:
-    case PPC::LWBRX:
-    case PPC::LWZ8:
-    case PPC::LWZX8:
-      LoadSize = 4;
-      break;
-    case PPC::LFD:    case PPC::LFDU:
-    case PPC::LFDX:
-    case PPC::LD:     case PPC::LDU:
-    case PPC::LDX:
-      LoadSize = 8;
-      break;
-    case PPC::LVX:
-    case PPC::LVXL:
-      LoadSize = 16;
-      break;
-    }
-
-    if (isLoadOfStoredAddress(LoadSize,
-                              Node->getOperand(0), Node->getOperand(1)))
+  if (isLoad && NumStores && !MI->memoperands_empty()) {
+    MachineMemOperand *MO = *MI->memoperands_begin();
+    if (isLoadOfStoredAddress(MO->getSize(),
+                              MO->getOffset(), MO->getValue()))
       return NoopHazard;
   }
 
@@ -224,66 +195,27 @@ getHazardType(SUnit *SU, int Stalls) {
 }
 
 void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
-  const SDNode *Node = SU->getNode()->getGluedMachineNode();
+  MachineInstr *MI = SU->getInstr();
+
+  if (MI->isDebugValue())
+    return;
+
+  unsigned Opcode = MI->getOpcode();
   bool isFirst, isSingle, isCracked, isLoad, isStore;
   PPCII::PPC970_Unit InstrType =
-    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+    GetInstrType(Opcode, isFirst, isSingle, isCracked,
                  isLoad, isStore);
   if (InstrType == PPCII::PPC970_Pseudo) return;
-  unsigned Opcode = Node->getMachineOpcode();
 
   // Update structural hazard information.
   if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
 
   // Track the address stored to.
-  if (isStore) {
-    unsigned ThisStoreSize;
-    switch (Opcode) {
-    default: llvm_unreachable("Unknown store instruction!");
-    case PPC::STB:    case PPC::STB8:
-    case PPC::STBU:   case PPC::STBU8:
-    case PPC::STBX:   case PPC::STBX8:
-    case PPC::STVEBX:
-      ThisStoreSize = 1;
-      break;
-    case PPC::STH:    case PPC::STH8:
-    case PPC::STHU:   case PPC::STHU8:
-    case PPC::STHX:   case PPC::STHX8:
-    case PPC::STVEHX:
-    case PPC::STHBRX:
-      ThisStoreSize = 2;
-      break;
-    case PPC::STFS:
-    case PPC::STFSU:
-    case PPC::STFSX:
-    case PPC::STWX:   case PPC::STWX8:
-    case PPC::STWUX:
-    case PPC::STW:    case PPC::STW8:
-    case PPC::STWU:
-    case PPC::STVEWX:
-    case PPC::STFIWX:
-    case PPC::STWBRX:
-      ThisStoreSize = 4;
-      break;
-    case PPC::STD_32:
-    case PPC::STDX_32:
-    case PPC::STD:
-    case PPC::STDU:
-    case PPC::STFD:
-    case PPC::STFDX:
-    case PPC::STDX:
-    case PPC::STDUX:
-      ThisStoreSize = 8;
-      break;
-    case PPC::STVX:
-    case PPC::STVXL:
-      ThisStoreSize = 16;
-      break;
-    }
-
-    StoreSize[NumStores] = ThisStoreSize;
-    StorePtr1[NumStores] = Node->getOperand(1);
-    StorePtr2[NumStores] = Node->getOperand(2);
+  if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
+    MachineMemOperand *MO = *MI->memoperands_begin();
+    StoreSize[NumStores] = MO->getSize();
+    StoreOffset[NumStores] = MO->getOffset();
+    StoreValue[NumStores] = MO->getValue();
     ++NumStores;
   }
 
@@ -306,3 +238,8 @@ void PPCHazardRecognizer970::AdvanceCycle() {
   if (NumIssued == 5)
     EndDispatchGroup();
 }
+
+void PPCHazardRecognizer970::Reset() {
+  EndDispatchGroup();
+}
+
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 2f81f0f7c7f1..55b45d01b20e 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -14,12 +14,28 @@
 #ifndef PPCHAZRECS_H
 #define PPCHAZRECS_H
 
+#include "PPCInstrInfo.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "PPCInstrInfo.h"
 
 namespace llvm {
 
+/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based
+/// hazard recognizer for generic PPC processors.
+class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer {
+  const ScheduleDAG *DAG;
+public:
+  PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
+                         const ScheduleDAG *DAG_) :
+    ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {}
+
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
+  virtual void EmitInstruction(SUnit *SU);
+  virtual void AdvanceCycle();
+  virtual void Reset();
+};
+
 /// PPCHazardRecognizer970 - This class defines a finite state automata that
 /// models the dispatch logic on the PowerPC 970 (aka G5) processor.  This
 /// promotes good dispatch group formation and implements noop insertion to
@@ -42,8 +58,9 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
   //
   // This is null if we haven't seen a store yet.  We keep track of both
   // operands of the store here, since we support [r+r] and [r+i] addressing.
-  SDValue StorePtr1[4], StorePtr2[4];
-  unsigned  StoreSize[4];
+  const Value *StoreValue[4];
+  int64_t StoreOffset[4];
+  uint64_t StoreSize[4];
   unsigned NumStores;
 
 public:
@@ -51,6 +68,7 @@ public:
   virtual HazardType getHazardType(SUnit *SU, int Stalls);
   virtual void EmitInstruction(SUnit *SU);
   virtual void AdvanceCycle();
+  virtual void Reset();
 
 private:
   /// EndDispatchGroup - Called when we are finishing a new dispatch group.
@@ -63,8 +81,8 @@ private:
                                   bool &isFirst, bool &isSingle,bool &isCracked,
                                   bool &isLoad, bool &isStore);
 
-  bool isLoadOfStoredAddress(unsigned LoadSize,
-                             SDValue Ptr1, SDValue Ptr2) const;
+  bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+                             const Value *LoadValue) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 6f204cc58636..5a04888dd45b 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -18,7 +18,6 @@
 #include "MCTargetDesc/PPCPredicates.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
@@ -211,13 +210,13 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
 
   // Find all return blocks, outputting a restore in each epilog.
   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
-    if (!BB->empty() && BB->back().getDesc().isReturn()) {
+    if (!BB->empty() && BB->back().isReturn()) {
       IP = BB->end(); --IP;
 
       // Skip over all terminator instructions, which are part of the return
       // sequence.
       MachineBasicBlock::iterator I2 = IP;
-      while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+      while (I2 != BB->begin() && (--I2)->isTerminator())
         IP = I2;
 
       // Emit: MTVRSAVE InVRSave
@@ -378,8 +377,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   APInt LKZ, LKO, RKZ, RKO;
-  CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO);
-  CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO);
+  CurDAG->ComputeMaskedBits(Op0, LKZ, LKO);
+  CurDAG->ComputeMaskedBits(Op1, RKZ, RKO);
 
   unsigned TargetMask = LKZ.getZExtValue();
   unsigned InsertMask = RKZ.getZExtValue();
@@ -603,7 +602,6 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
   case ISD::SETULT: return 0;
   case ISD::SETUGT: return 1;
   }
-  return 0;
 }
 
 SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
@@ -1067,7 +1065,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     SDValue Target = N->getOperand(1);
     unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
     unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
-    Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target,
+    Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
                                            Chain), 0);
     return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
   }
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index d6b8a9ee93c7..3b24951d1dc9 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16,26 +16,24 @@
 #include "PPCPerfectShuffle.h"
 #include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/VectorExtras.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
@@ -104,6 +102,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // from FP_ROUND:  that rounds to nearest, this rounds to zero.
   setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
 
+  // We do not currently implment this libm ops for PowerPC.
+  setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
+  setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
+  setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
+  setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
+  setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+
   // PowerPC has no SREM/UREM instructions
   setOperationAction(ISD::SREM, MVT::i32, Expand);
   setOperationAction(ISD::UREM, MVT::i32, Expand);
@@ -147,9 +152,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
   setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
   setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
   setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
 
   // PowerPC does not have ROTR
   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
@@ -217,11 +226,23 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 
-  // VAARG is custom lowered with the 32-bit SVR4 ABI.
-  if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
-      && !TM.getSubtarget<PPCSubtarget>().isPPC64()) {
-    setOperationAction(ISD::VAARG, MVT::Other, Custom);
-    setOperationAction(ISD::VAARG, MVT::i64, Custom);
+  if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()) {
+    if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+      // VAARG always uses double-word chunks, so promote anything smaller.
+      setOperationAction(ISD::VAARG, MVT::i1, Promote);
+      AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
+      setOperationAction(ISD::VAARG, MVT::i8, Promote);
+      AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
+      setOperationAction(ISD::VAARG, MVT::i16, Promote);
+      AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
+      setOperationAction(ISD::VAARG, MVT::i32, Promote);
+      AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
+      setOperationAction(ISD::VAARG, MVT::Other, Expand);
+    } else {
+      // VAARG is custom lowered with the 32-bit SVR4 ABI.
+      setOperationAction(ISD::VAARG, MVT::Other, Custom);
+      setOperationAction(ISD::VAARG, MVT::i64, Custom);
+    }
   } else
     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 
@@ -333,7 +354,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
       setOperationAction(ISD::FPOW, VT, Expand);
       setOperationAction(ISD::CTPOP, VT, Expand);
       setOperationAction(ISD::CTLZ, VT, Expand);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::CTTZ, VT, Expand);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
     }
 
     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
@@ -366,6 +389,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
   }
 
+  if (TM.getSubtarget<PPCSubtarget>().has64BitSupport())
+    setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+
   setOperationAction(ISD::ATOMIC_LOAD,  MVT::i32, Expand);
   setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
 
@@ -408,6 +434,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
 
   setInsertFencesForAtomic(true);
 
+  setSchedulingPreference(Sched::Hybrid);
+
   computeRegisterProperties();
 }
 
@@ -418,7 +446,16 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
   // Darwin passes everything on 4 byte boundary.
   if (TM.getSubtarget<PPCSubtarget>().isDarwin())
     return 4;
-  // FIXME SVR4 TBD
+
+  // 16byte and wider vectors are passed on 16byte boundary.
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    if (VTy->getBitWidth() >= 128)
+      return 16;
+
+  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
+   if (PPCSubTarget.isPPC64())
+     return 8;
+
   return 4;
 }
 
@@ -447,6 +484,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
   case PPCISD::STD_32:          return "PPCISD::STD_32";
   case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
+  case PPCISD::CALL_NOP_SVR4:   return "PPCISD::CALL_NOP_SVR4";
   case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
   case PPCISD::NOP:             return "PPCISD::NOP";
   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
@@ -822,14 +860,10 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
     APInt LHSKnownZero, LHSKnownOne;
     APInt RHSKnownZero, RHSKnownOne;
     DAG.ComputeMaskedBits(N.getOperand(0),
-                          APInt::getAllOnesValue(N.getOperand(0)
-                            .getValueSizeInBits()),
                           LHSKnownZero, LHSKnownOne);
 
     if (LHSKnownZero.getBoolValue()) {
       DAG.ComputeMaskedBits(N.getOperand(1),
-                            APInt::getAllOnesValue(N.getOperand(1)
-                              .getValueSizeInBits()),
                             RHSKnownZero, RHSKnownOne);
       // If all of the bits are known zero on the LHS or RHS, the add won't
       // carry.
@@ -884,10 +918,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
       // (for better address arithmetic) if the LHS and RHS of the OR are
       // provably disjoint.
       APInt LHSKnownZero, LHSKnownOne;
-      DAG.ComputeMaskedBits(N.getOperand(0),
-                            APInt::getAllOnesValue(N.getOperand(0)
-                                                   .getValueSizeInBits()),
-                            LHSKnownZero, LHSKnownOne);
+      DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
 
       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
         // If all of the bits are known zero on the LHS or RHS, the add won't
@@ -1000,10 +1031,7 @@ bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
       // (for better address arithmetic) if the LHS and RHS of the OR are
       // provably disjoint.
       APInt LHSKnownZero, LHSKnownOne;
-      DAG.ComputeMaskedBits(N.getOperand(0),
-                            APInt::getAllOnesValue(N.getOperand(0)
-                                                   .getValueSizeInBits()),
-                            LHSKnownZero, LHSKnownOne);
+      DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
       if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
         // If all of the bits are known zero on the LHS or RHS, the add won't
         // carry.
@@ -1223,7 +1251,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
   // extra load to get the address of the global.
   if (MOHiFlag & PPCII::MO_NLP_FLAG)
     Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
-                      false, false, 0);
+                      false, false, false, 0);
   return Ptr;
 }
 
@@ -1319,11 +1347,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
 
   // areas
   SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
-                                     MachinePointerInfo(), false, false, 0);
+                                     MachinePointerInfo(), false, false,
+                                     false, 0);
   InChain = OverflowArea.getValue(1);
 
   SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
-                                    MachinePointerInfo(), false, false, 0);
+                                    MachinePointerInfo(), false, false,
+                                    false, 0);
   InChain = RegSaveArea.getValue(1);
 
   // select overflow_area if index > 8
@@ -1372,7 +1402,8 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
                               MachinePointerInfo(),
                               MVT::i32, false, false, 0);
 
-  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
+  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 
+                     false, false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
@@ -1411,8 +1442,9 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
   std::pair<SDValue, SDValue> CallResult =
     LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
-                false, false, false, false, 0, CallingConv::C, false,
-                /*isReturnValueUsed=*/true,
+                false, false, false, false, 0, CallingConv::C,
+                /*isTailCall=*/false,
+                /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                 DAG.getExternalSymbol("__trampoline_setup", PtrVT),
                 Args, DAG, dl);
 
@@ -1530,7 +1562,7 @@ static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
                                             CCValAssign::LocInfo &LocInfo,
                                             ISD::ArgFlagsTy &ArgFlags,
                                             CCState &State) {
-  static const unsigned ArgRegs[] = {
+  static const uint16_t ArgRegs[] = {
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
@@ -1557,7 +1589,7 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
                                               CCValAssign::LocInfo &LocInfo,
                                               ISD::ArgFlagsTy &ArgFlags,
                                               CCState &State) {
-  static const unsigned ArgRegs[] = {
+  static const uint16_t ArgRegs[] = {
     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
     PPC::F8
   };
@@ -1581,8 +1613,8 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
 
 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
 /// on Darwin.
-static const unsigned *GetFPR() {
-  static const unsigned FPR[] = {
+static const uint16_t *GetFPR() {
+  static const uint16_t FPR[] = {
     PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
     PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
   };
@@ -1663,7 +1695,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   // Potential tail calls could cause overwriting of argument stack slots.
-  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+                       (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = 4;
 
   // Assign locations to all of the incoming arguments.
@@ -1681,7 +1714,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
 
     // Arguments stored in registers.
     if (VA.isRegLoc()) {
-      TargetRegisterClass *RC;
+      const TargetRegisterClass *RC;
       EVT ValVT = VA.getValVT();
 
       switch (ValVT.getSimpleVT().SimpleTy) {
@@ -1721,7 +1754,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
                                    MachinePointerInfo(),
-                                   false, false, 0));
+                                   false, false, false, 0));
     }
   }
 
@@ -1762,13 +1795,13 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start.
   if (isVarArg) {
-    static const unsigned GPArgRegs[] = {
+    static const uint16_t GPArgRegs[] = {
       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
     };
     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
 
-    static const unsigned FPArgRegs[] = {
+    static const uint16_t FPArgRegs[] = {
       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
       PPC::F8
     };
@@ -1853,25 +1886,26 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   bool isPPC64 = PtrVT == MVT::i64;
   // Potential tail calls could cause overwriting of argument stack slots.
-  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+                       (CallConv == CallingConv::Fast));
   unsigned PtrByteSize = isPPC64 ? 8 : 4;
 
   unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
   // Area that is at least reserved in caller of this function.
   unsigned MinReservedArea = ArgOffset;
 
-  static const unsigned GPR_32[] = {           // 32-bit registers.
+  static const uint16_t GPR_32[] = {           // 32-bit registers.
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
-  static const unsigned GPR_64[] = {           // 64-bit registers.
+  static const uint16_t GPR_64[] = {           // 64-bit registers.
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
 
-  static const unsigned *FPR = GetFPR();
+  static const uint16_t *FPR = GetFPR();
 
-  static const unsigned VR[] = {
+  static const uint16_t VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
@@ -1882,7 +1916,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
 
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
 
-  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
 
   // In 32-bit non-varargs functions, the stack space for vectors is after the
   // stack space for non-vectors.  We do not use this space unless we have
@@ -1896,12 +1930,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
     for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
          ++ArgNo) {
       EVT ObjectVT = Ins[ArgNo].VT;
-      unsigned ObjSize = ObjectVT.getSizeInBits()/8;
       ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
 
       if (Flags.isByVal()) {
         // ObjSize is the true size, ArgSize rounded up to multiple of regs.
-        ObjSize = Flags.getByValSize();
+        unsigned ObjSize = Flags.getByValSize();
         unsigned ArgSize =
                 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
         VecArgOffset += ArgSize;
@@ -2138,7 +2171,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
                                       isImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
-                           false, false, 0);
+                           false, false, false, 0);
     }
 
     InVals.push_back(ArgVal);
@@ -2259,9 +2292,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
                       PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
 
   // Tail call needs the stack to be aligned.
-  if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
-    unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
-      getStackAlignment();
+  if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){
+    unsigned TargetAlign = DAG.getMachineFunction().getTarget().
+      getFrameLowering()->getStackAlignment();
     unsigned AlignMask = TargetAlign-1;
     NumBytes = (NumBytes + AlignMask) & ~AlignMask;
   }
@@ -2295,7 +2328,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
                                                      bool isVarArg,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                                      SelectionDAG& DAG) const {
-  if (!GuaranteedTailCallOpt)
+  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
     return false;
 
   // Variable argument functions are not supported.
@@ -2443,7 +2476,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
     EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
     LROpOut = getReturnAddrFrameIndex(DAG);
     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
-                          false, false, 0);
+                          false, false, false, 0);
     Chain = SDValue(LROpOut.getNode(), 1);
 
     // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
@@ -2451,7 +2484,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
     if (isDarwinABI) {
       FPOpOut = getFramePointerFrameIndex(DAG);
       FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
       Chain = SDValue(FPOpOut.getNode(), 1);
     }
   }
@@ -2748,7 +2781,14 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
   // the stack. Account for this here so these bytes can be pushed back on in
   // PPCRegisterInfo::eliminateCallFramePseudoInstr.
   int BytesCalleePops =
-    (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+    (CallConv == CallingConv::Fast &&
+     getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
+
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
 
   if (InFlag.getNode())
     Ops.push_back(InFlag);
@@ -2776,9 +2816,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
   }
 
-  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
-
   // Add a NOP immediately after the branch instruction when using the 64-bit
   // SVR4 ABI. At link time, if caller and callee are in a different module and
   // thus have a different TOC, the call will be replaced with a call to a stub
@@ -2787,8 +2824,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
   // which restores the TOC of the caller from the TOC save slot of the current
   // stack frame. If caller and callee belong to the same module (and have the
   // same TOC), the NOP will remain unchanged.
+
+  bool needsTOCRestore = false;
   if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
-    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
     if (CallOpc == PPCISD::BCTRL_SVR4) {
       // This is a call through a function pointer.
       // Restore the caller TOC from the save area into R2.
@@ -2799,14 +2837,22 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
       // since r2 is a reserved register (which prevents the register allocator
       // from allocating it), resulting in an additional register being
       // allocated and an unnecessary move instruction being generated.
-      Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
-      InFlag = Chain.getValue(1);
-    } else {
+      needsTOCRestore = true;
+    } else if (CallOpc == PPCISD::CALL_SVR4) {
       // Otherwise insert NOP.
-      InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
+      CallOpc = PPCISD::CALL_NOP_SVR4;
     }
   }
 
+  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  if (needsTOCRestore) {
+    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+    Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                              DAG.getIntPtrConstant(BytesCalleePops, true),
                              InFlag);
@@ -2820,7 +2866,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
 SDValue
 PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              CallingConv::ID CallConv, bool isVarArg,
-                             bool &isTailCall,
+                             bool doesNotRet, bool &isTailCall,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<SDValue> &OutVals,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -2864,7 +2910,8 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
-  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+      CallConv == CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
   // Count how many bytes are to be pushed on the stack, including the linkage
@@ -3071,7 +3118,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   // and restoring the callers stack pointer in this functions epilog. This is
   // done because by tail calling the called function might overwrite the value
   // in this function's (MF) stack pointer stack slot 0(SP).
-  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+      CallConv == CallingConv::Fast)
     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
 
   unsigned nAltivecParamsAtEnd = 0;
@@ -3120,17 +3168,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
 
-  static const unsigned GPR_32[] = {           // 32-bit registers.
+  static const uint16_t GPR_32[] = {           // 32-bit registers.
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
-  static const unsigned GPR_64[] = {           // 64-bit registers.
+  static const uint16_t GPR_64[] = {           // 64-bit registers.
     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
   };
-  static const unsigned *FPR = GetFPR();
+  static const uint16_t *FPR = GetFPR();
 
-  static const unsigned VR[] = {
+  static const uint16_t VR[] = {
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
@@ -3138,7 +3186,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
   const unsigned NumFPRs = 13;
   const unsigned NumVRs  = array_lengthof(VR);
 
-  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+  const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
 
   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
@@ -3212,7 +3260,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         if (GPR_idx != NumGPRs) {
           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
                                      MachinePointerInfo(),
-                                     false, false, 0);
+                                     false, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           ArgOffset += PtrByteSize;
@@ -3250,7 +3298,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
           // Float varargs are always shadowed in available integer registers
           if (GPR_idx != NumGPRs) {
             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
-                                       MachinePointerInfo(), false, false, 0);
+                                       MachinePointerInfo(), false, false,
+                                       false, 0);
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
@@ -3259,7 +3308,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
             PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
             SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
                                        MachinePointerInfo(),
-                                       false, false, 0);
+                                       false, false, false, 0);
             MemOpChains.push_back(Load.getValue(1));
             RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
           }
@@ -3308,7 +3357,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
         if (VR_idx != NumVRs) {
           SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
                                      MachinePointerInfo(),
-                                     false, false, 0);
+                                     false, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
         }
@@ -3319,7 +3368,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
                                   DAG.getConstant(i, PtrVT));
           SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
-                                     false, false, 0);
+                                     false, false, false, 0);
           MemOpChains.push_back(Load.getValue(1));
           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
         }
@@ -3483,7 +3532,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
   // Load the old link SP.
   SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
                                    MachinePointerInfo(),
-                                   false, false, 0);
+                                   false, false, false, 0);
 
   // Restore the stack pointer.
   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
@@ -3674,7 +3723,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
                         DAG.getConstant(4, FIPtr.getValueType()));
   return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
-                     false, false, 0);
+                     false, false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
@@ -3718,7 +3767,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
                             Ops, 4, MVT::i64, MMO);
   // Load the value as a double.
   SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
-                           false, false, 0);
+                           false, false, false, 0);
 
   // FCFID it and return it.
   SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
@@ -3770,7 +3819,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   SDValue Four = DAG.getConstant(4, PtrVT);
   SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
   SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
 
   // Transform as necessary
   SDValue CWD1 =
@@ -4236,8 +4285,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
 
   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
   // perfect shuffle table to emit an optimal matching sequence.
-  SmallVector<int, 16> PermMask;
-  SVOp->getMask(PermMask);
+  ArrayRef<int> PermMask = SVOp->getMask();
 
   unsigned PFIndexes[4];
   bool isFourElementShuffle = true;
@@ -4441,7 +4489,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                false, false, 0);
   // Load it out.
   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
-                     false, false, 0);
+                     false, false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
@@ -4549,7 +4597,6 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
   }
-  return SDValue();
 }
 
 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
@@ -4559,8 +4606,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
   default:
-    assert(false && "Do not know how to custom type legalize this operation!");
-    return;
+    llvm_unreachable("Do not know how to custom type legalize this operation!");
   case ISD::VAARG: {
     if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
         || TM.getSubtarget<PPCSubtarget>().isPPC64())
@@ -5461,12 +5507,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
 //===----------------------------------------------------------------------===//
 
 void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                       const APInt &Mask,
                                                        APInt &KnownZero,
                                                        APInt &KnownOne,
                                                        const SelectionDAG &DAG,
                                                        unsigned Depth) const {
-  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
   switch (Op.getOpcode()) {
   default: break;
   case PPCISD::LBRX: {
@@ -5700,7 +5745,7 @@ bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
   return (V > -(1 << 16) && V < (1 << 16)-1);
 }
 
-bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
   return false;
 }
 
@@ -5729,13 +5774,13 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       MachinePointerInfo(), false, false, 0);
+                       MachinePointerInfo(), false, false, false, 0);
   }
 
   // Just load the return address off the stack.
   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, MachinePointerInfo(), false, false, 0);
+                     RetAddrFI, MachinePointerInfo(), false, false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
@@ -5749,7 +5794,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MFI->setFrameAddressIsTaken(true);
-  bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+  bool is31 = (getTargetMachine().Options.DisableFramePointerElim(MF) ||
+               MFI->hasVarSizedObjects()) &&
                   MFI->getStackSize() &&
                   !MF.getFunction()->hasFnAttr(Attribute::Naked);
   unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
@@ -5758,7 +5804,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
                                          PtrVT);
   while (Depth--)
     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
-                            FrameAddr, MachinePointerInfo(), false, false, 0);
+                            FrameAddr, MachinePointerInfo(), false, false,
+                            false, 0);
   return FrameAddr;
 }
 
@@ -5774,7 +5821,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 /// means there isn't a need to check it against alignment requirement,
 /// probably because the source does not need to be loaded. If
-/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// 'IsZeroVal' is true, that means it's safe to return a
 /// non-scalar-integer type, e.g. empty string source, constant, or loaded
 /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
 /// constant so it does not need to be loaded.
@@ -5782,7 +5829,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 /// target-independent logic.
 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
                                            unsigned DstAlign, unsigned SrcAlign,
-                                           bool NonScalarIntSafe,
+                                           bool IsZeroVal,
                                            bool MemcpyStrSrc,
                                            MachineFunction &MF) const {
   if (this->PPCSubTarget.isPPC64()) {
@@ -5791,3 +5838,12 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
     return MVT::i32;
   }
 }
+
+Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
+  unsigned Directive = PPCSubTarget.getDarwinDirective();
+  if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2)
+    return Sched::ILP;
+
+  return TargetLowering::getSchedulingPreference(N);
+}
+
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 430e45e80493..18eb07200307 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
 #define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
 
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "PPC.h"
 #include "PPCSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 
 namespace llvm {
   namespace PPCISD {
@@ -95,7 +95,9 @@ namespace llvm {
       EXTSW_32,
 
       /// CALL - A direct function call.
-      CALL_Darwin, CALL_SVR4,
+      /// CALL_NOP_SVR4 is a call with the special  NOP which follows 64-bit
+      /// SVR4 calls.
+      CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4,
 
       /// NOP - Special NOP which follows 64-bit SVR4 calls.
       NOP,
@@ -279,6 +281,7 @@ namespace llvm {
     bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
                                   SelectionDAG &DAG) const;
 
+    Sched::Preference getSchedulingPreference(SDNode *N) const;
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
@@ -293,7 +296,6 @@ namespace llvm {
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-                                                const APInt &Mask,
                                                 APInt &KnownZero,
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
@@ -353,7 +355,7 @@ namespace llvm {
     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
     /// means there isn't a need to check it against alignment requirement,
     /// probably because the source does not need to be loaded. If
-    /// 'NonScalarIntSafe' is true, that means it's safe to return a
+    /// 'IsZeroVal' is true, that means it's safe to return a
     /// non-scalar-integer type, e.g. empty string source, constant, or loaded
     /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
     /// constant so it does not need to be loaded.
@@ -361,7 +363,7 @@ namespace llvm {
     /// target-independent logic.
     virtual EVT
     getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
-                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        bool IsZeroVal, bool MemcpyStrSrc,
                         MachineFunction &MF) const;
 
   private:
@@ -437,8 +439,8 @@ namespace llvm {
                            SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+      LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+                bool isVarArg, bool doesNotRet, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<SDValue> &OutVals,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -472,21 +474,21 @@ namespace llvm {
                                 SmallVectorImpl<SDValue> &InVals) const;
 
     SDValue
-      LowerCall_Darwin(SDValue Chain, SDValue Callee,
-                       CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+      LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+                       bool isVarArg, bool isTailCall,
                        const SmallVectorImpl<ISD::OutputArg> &Outs,
                        const SmallVectorImpl<SDValue> &OutVals,
                        const SmallVectorImpl<ISD::InputArg> &Ins,
                        DebugLoc dl, SelectionDAG &DAG,
                        SmallVectorImpl<SDValue> &InVals) const;
     SDValue
-      LowerCall_SVR4(SDValue Chain, SDValue Callee,
-                     CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
-                     const SmallVectorImpl<ISD::OutputArg> &Outs,
-                     const SmallVectorImpl<SDValue> &OutVals,
-                     const SmallVectorImpl<ISD::InputArg> &Ins,
-                     DebugLoc dl, SelectionDAG &DAG,
-                     SmallVectorImpl<SDValue> &InVals) const;
+    LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+                   bool isVarArg, bool isTailCall,
+                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                   const SmallVectorImpl<SDValue> &OutVals,
+                   const SmallVectorImpl<ISD::InputArg> &Ins,
+                   DebugLoc dl, SelectionDAG &DAG,
+                   SmallVectorImpl<SDValue> &InVals) const;
   };
 }
 
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index e88ad378ccd9..7f67a4159dfe 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1,10 +1,10 @@
-//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===//
-// 
+//===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the PowerPC 64-bit instructions.  These patterns are used
@@ -64,13 +64,7 @@ let Defs = [LR8] in
                     PPC970_Unit_BRU;
 
 // Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, 
-  // All calls clobber the PPC64 non-callee saved registers.
-  Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
-          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
-          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
-          LR8,CTR8,
-          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL8_Darwin  : IForm<18, 0, 1,
@@ -90,23 +84,29 @@ let isCall = 1, PPC970_Unit = 7,
 
 // ELF 64 ABI Calls = Darwin ABI Calls
 // Used to define BL8_ELF and BLA8_ELF
-let isCall = 1, PPC970_Unit = 7, 
-  // All calls clobber the PPC64 non-callee saved registers.
-  Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
-          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
-          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
-          LR8,CTR8,
-          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL8_ELF  : IForm<18, 0, 1,
                          (outs), (ins calltarget:$func, variable_ops), 
-                         "bl $func", BrB, []>;  // See Pat patterns below.                            
+                         "bl $func", BrB, []>;  // See Pat patterns below.
+
+    let isCodeGenOnly = 1 in
+    def BL8_NOP_ELF  : IForm_and_DForm_4_zero<18, 0, 1, 24,
+                             (outs), (ins calltarget:$func, variable_ops), 
+                             "bl $func\n\tnop", BrB, []>;
+
     def BLA8_ELF : IForm<18, 1, 1,
                          (outs), (ins aaddr:$func, variable_ops),
                          "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
+
+    let isCodeGenOnly = 1 in
+    def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
+                             (outs), (ins aaddr:$func, variable_ops),
+                             "bla $func\n\tnop", BrB,
+                             [(PPCcall_nop_SVR4 (i64 imm:$func))]>;
   }
-  let Uses = [CTR8, RM] in {
+  let Uses = [X11, CTR8, RM] in {
     def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
                                (outs), (ins variable_ops),
                                "bctrl", BrB,
@@ -123,8 +123,14 @@ def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
 
 def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
           (BL8_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)),
+          (BL8_NOP_ELF tglobaladdr:$dst)>;
+
 def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
           (BL8_ELF texternalsym:$dst)>;
+def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)),
+          (BL8_NOP_ELF texternalsym:$dst)>;
+
 def : Pat<(PPCnop),
           (NOP)>;
 
@@ -223,6 +229,18 @@ def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
 def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
           (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
 
+// 64-but CR instructions
+def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins G8RC:$rS),
+                      "mtcrf $FXM, $rS", BrMCRX>,
+            PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFCR8pseud: XFXForm_3<31, 19, (outs G8RC:$rT), (ins crbitm:$FXM),
+                       "", SprMFCR>,
+            PPC970_MicroCode, PPC970_Unit_CRU;
+            
+def MFCR8 : XFXForm_3<31, 19, (outs G8RC:$rT), (ins),
+                     "mfcr $rT", SprMFCR>,
+                     PPC970_MicroCode, PPC970_Unit_CRU;
 
 //===----------------------------------------------------------------------===//
 // 64-bit SPR manipulation instrs.
@@ -469,6 +487,12 @@ def RLDICR : MDForm_1<30, 1,
                       (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
                       "rldicr $rA, $rS, $SH, $ME", IntRotateD,
                       []>, isPPC64;
+
+def RLWINM8 : MForm_2<21,
+                     (outs G8RC:$rA), (ins G8RC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                     "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+                     []>;
+
 }  // End FXU Operations.
 
 
@@ -500,7 +524,7 @@ def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
 let mayLoad = 1 in
 def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
                             ptr_rc:$rA),
-                    "lhau $rD, $disp($rA)", LdStGeneral,
+                    "lhau $rD, $disp($rA)", LdStLoad,
                     []>, RegConstraint<"$rA = $ea_result">,
                     NoEncode<"$ea_result">;
 // NO LWAU!
@@ -510,38 +534,38 @@ def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp
 // Zero extending loads.
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
-                  "lbz $rD, $src", LdStGeneral,
+                  "lbz $rD, $src", LdStLoad,
                   [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
 def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
-                  "lhz $rD, $src", LdStGeneral,
+                  "lhz $rD, $src", LdStLoad,
                   [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
 def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
-                  "lwz $rD, $src", LdStGeneral,
+                  "lwz $rD, $src", LdStLoad,
                   [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
 
 def LBZX8 : XForm_1<31,  87, (outs G8RC:$rD), (ins memrr:$src),
-                   "lbzx $rD, $src", LdStGeneral,
+                   "lbzx $rD, $src", LdStLoad,
                    [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
 def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
-                   "lhzx $rD, $src", LdStGeneral,
+                   "lhzx $rD, $src", LdStLoad,
                    [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
 def LWZX8 : XForm_1<31,  23, (outs G8RC:$rD), (ins memrr:$src),
-                   "lwzx $rD, $src", LdStGeneral,
+                   "lwzx $rD, $src", LdStLoad,
                    [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
                    
                    
 // Update forms.
 let mayLoad = 1 in {
 def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
-                    "lbzu $rD, $addr", LdStGeneral,
+                    "lbzu $rD, $addr", LdStLoad,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
-                    "lhzu $rD, $addr", LdStGeneral,
+                    "lhzu $rD, $addr", LdStLoad,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
-                    "lwzu $rD, $addr", LdStGeneral,
+                    "lwzu $rD, $addr", LdStLoad,
                     []>, RegConstraint<"$addr.reg = $ea_result">,
                     NoEncode<"$ea_result">;
 }
@@ -557,7 +581,8 @@ def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
                   "",
                   [(set G8RC:$rD,
                      (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
-                     
+
+let hasSideEffects = 1 in { 
 let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
 def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
                     "ld 2, 8($reg)", LdStLD,
@@ -567,6 +592,7 @@ let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
 def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
                     "ld 2, 40(1)", LdStLD,
                     [(PPCtoc_restore)]>, isPPC64;
+}
 def LDX  : XForm_1<31,  21, (outs G8RC:$rD), (ins memrr:$src),
                    "ldx $rD, $src", LdStLD,
                    [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
@@ -587,24 +613,24 @@ def : Pat<(PPCload xaddr:$src),
 let PPC970_Unit = 2 in {
 // Truncating stores.                       
 def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
-                   "stb $rS, $src", LdStGeneral,
+                   "stb $rS, $src", LdStStore,
                    [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
 def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
-                   "sth $rS, $src", LdStGeneral,
+                   "sth $rS, $src", LdStStore,
                    [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
 def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
-                   "stw $rS, $src", LdStGeneral,
+                   "stw $rS, $src", LdStStore,
                    [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
 def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
-                   "stbx $rS, $dst", LdStGeneral,
+                   "stbx $rS, $dst", LdStStore,
                    [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, 
                    PPC970_DGroup_Cracked;
 def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
-                   "sthx $rS, $dst", LdStGeneral,
+                   "sthx $rS, $dst", LdStStore,
                    [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, 
                    PPC970_DGroup_Cracked;
 def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
-                   "stwx $rS, $dst", LdStGeneral,
+                   "stwx $rS, $dst", LdStStore,
                    [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
 // Normal 8-byte stores.
@@ -621,14 +647,14 @@ let PPC970_Unit = 2 in {
 
 def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    "stbu $rS, $ptroff($ptrreg)", LdStStore,
                     [(set ptr_rc:$ea_res,
                           (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, 
                                          iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
 def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    "sthu $rS, $ptroff($ptrreg)", LdStStore,
                     [(set ptr_rc:$ea_res,
                         (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, 
                                         iaddroff:$ptroff))]>,
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 256370fa5f52..6c0f3d3f06e5 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -1,10 +1,10 @@
-//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===//
-// 
+//===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the Altivec extension to the PowerPC instruction set.
@@ -188,85 +188,85 @@ class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
 
 def DSS      : DSS_Form<822, (outs),
                         (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
-                        "dss $STRM", LdStGeneral /*FIXME*/, []>;
+                        "dss $STRM", LdStLoad /*FIXME*/, []>;
 def DSSALL   : DSS_Form<822, (outs),
                         (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
-                        "dssall", LdStGeneral /*FIXME*/, []>;
+                        "dssall", LdStLoad /*FIXME*/, []>;
 def DST      : DSS_Form<342, (outs),
                         (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
-                        "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+                        "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTT     : DSS_Form<342, (outs),
                         (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
-                        "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+                        "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTST    : DSS_Form<374, (outs),
                         (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
-                        "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+                        "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTSTT   : DSS_Form<374, (outs),
                         (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
-                        "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+                        "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 
 def DST64    : DSS_Form<342, (outs),
                         (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
-                        "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+                        "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTT64   : DSS_Form<342, (outs),
                         (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
-                        "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+                        "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTST64  : DSS_Form<374, (outs),
                         (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
-                        "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+                        "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 def DSTSTT64 : DSS_Form<374, (outs),
                         (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
-                        "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+                        "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
 
 def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
-                      "mfvscr $vD", LdStGeneral,
+                      "mfvscr $vD", LdStStore,
                       [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; 
 def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
-                      "mtvscr $vB", LdStGeneral,
+                      "mtvscr $vB", LdStLoad,
                       [(int_ppc_altivec_mtvscr VRRC:$vB)]>; 
 
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {  // Loads.
 def LVEBX: XForm_1<31,   7, (outs VRRC:$vD), (ins memrr:$src),
-                   "lvebx $vD, $src", LdStGeneral,
+                   "lvebx $vD, $src", LdStLoad,
                    [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
 def LVEHX: XForm_1<31,  39, (outs VRRC:$vD), (ins memrr:$src),
-                   "lvehx $vD, $src", LdStGeneral,
+                   "lvehx $vD, $src", LdStLoad,
                    [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
 def LVEWX: XForm_1<31,  71, (outs VRRC:$vD), (ins memrr:$src),
-                   "lvewx $vD, $src", LdStGeneral,
+                   "lvewx $vD, $src", LdStLoad,
                    [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
 def LVX  : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
-                   "lvx $vD, $src", LdStGeneral,
+                   "lvx $vD, $src", LdStLoad,
                    [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
 def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
-                   "lvxl $vD, $src", LdStGeneral,
+                   "lvxl $vD, $src", LdStLoad,
                    [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
 }
 
 def LVSL : XForm_1<31,   6, (outs VRRC:$vD), (ins memrr:$src),
-                   "lvsl $vD, $src", LdStGeneral,
+                   "lvsl $vD, $src", LdStLoad,
                    [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
                    PPC970_Unit_LSU;
 def LVSR : XForm_1<31,  38, (outs VRRC:$vD), (ins memrr:$src),
-                   "lvsr $vD, $src", LdStGeneral,
+                   "lvsr $vD, $src", LdStLoad,
                    [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
                    PPC970_Unit_LSU;
 
 let PPC970_Unit = 2 in {   // Stores.
 def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
-                   "stvebx $rS, $dst", LdStGeneral,
+                   "stvebx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
 def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
-                   "stvehx $rS, $dst", LdStGeneral,
+                   "stvehx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
 def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
-                   "stvewx $rS, $dst", LdStGeneral,
+                   "stvewx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
 def STVX  : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
-                   "stvx $rS, $dst", LdStGeneral,
+                   "stvx $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
 def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
-                   "stvxl $rS, $dst", LdStGeneral,
+                   "stvxl $rS, $dst", LdStStore,
                    [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
 }
 
diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td
index 84a15b1ca942..d8e4b2bdf34a 100644
--- a/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/lib/Target/PowerPC/PPCInstrFormats.td
@@ -1,10 +1,10 @@
 //===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -51,6 +51,36 @@ class PPC970_Unit_VALU     { bits<3> PPC970_Unit = 5;   }
 class PPC970_Unit_VPERM    { bits<3> PPC970_Unit = 6;   }
 class PPC970_Unit_BRU      { bits<3> PPC970_Unit = 7;   }
 
+// Two joined instructions; used to emit two adjacent instructions as one.
+// The itinerary from the first instruction is used for scheduling and
+// classification.
+class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
+         InstrItinClass itin>
+        : Instruction {
+  field bits<64> Inst;
+
+  bit PPC64 = 0;  // Default value, override with isPPC64
+
+  let Namespace = "PPC";
+  let Inst{0-5} = opcode1;
+  let Inst{32-37} = opcode2;
+  let OutOperandList = OOL;
+  let InOperandList = IOL;
+  let AsmString = asmstr;
+  let Itinerary = itin;
+
+  bits<1> PPC970_First = 0;
+  bits<1> PPC970_Single = 0;
+  bits<1> PPC970_Cracked = 0;
+  bits<3> PPC970_Unit = 0;
+
+  /// These fields correspond to the fields in PPCInstrInfo.h.  Any changes to
+  /// these must be reflected there!  See comments there for what these are.
+  let TSFlags{0}   = PPC970_First;
+  let TSFlags{1}   = PPC970_Single;
+  let TSFlags{2}   = PPC970_Cracked;
+  let TSFlags{5-3} = PPC970_Unit;
+}
 
 // 1.7.1 I-Form
 class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
@@ -164,6 +194,35 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
   let Addr = 0;
 }
 
+class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+            dag OOL, dag IOL, string asmstr,
+            InstrItinClass itin, list<dag> pattern>
+         : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+  bits<5>  A;
+  bits<21> Addr;
+
+  let Pattern = pattern;
+  bits<24> LI;
+
+  let Inst{6-29}  = LI;
+  let Inst{30}    = aa;
+  let Inst{31}    = lk;
+
+  let Inst{38-42}  = A;
+  let Inst{43-47} = Addr{20-16}; // Base Reg
+  let Inst{48-63} = Addr{15-0};  // Displacement
+}
+
+// This is used to emit BL8+NOP.
+class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+            dag OOL, dag IOL, string asmstr,
+            InstrItinClass itin, list<dag> pattern>
+         :  IForm_and_DForm_1<opcode1, aa, lk, opcode2,
+                              OOL, IOL, asmstr, itin, pattern> {
+  let A = 0;
+  let Addr = 0;
+}
+
 class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
               InstrItinClass itin>
   : I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2bc109c8785a..b45ada9db32a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===//
+//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -34,8 +34,8 @@
 #include "PPCGenInstrInfo.inc"
 
 namespace llvm {
-extern cl::opt<bool> EnablePPC32RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
-extern cl::opt<bool> EnablePPC64RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> DisablePPC32RS;
+extern cl::opt<bool> DisablePPC64RS;
 }
 
 using namespace llvm;
@@ -49,13 +49,32 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
 ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
   const TargetMachine *TM,
   const ScheduleDAG *DAG) const {
-  // Should use subtarget info to pick the right hazard recognizer.  For
-  // now, always return a PPC970 recognizer.
-  const TargetInstrInfo *TII = TM->getInstrInfo();
-  assert(TII && "No InstrInfo?");
-  return new PPCHazardRecognizer970(*TII);
+  unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
+  if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2) {
+    const InstrItineraryData *II = TM->getInstrItineraryData();
+    return new PPCScoreboardHazardRecognizer(II, DAG);
+  }
+
+  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
 }
 
+/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
+/// to use for this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
+  const InstrItineraryData *II,
+  const ScheduleDAG *DAG) const {
+  unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+
+  // Most subtargets use a PPC970 recognizer.
+  if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2) {
+    const TargetInstrInfo *TII = TM.getInstrInfo();
+    assert(TII && "No InstrInfo?");
+
+    return new PPCHazardRecognizer970(*TII);
+  }
+
+  return new PPCScoreboardHazardRecognizer(II, DAG);
+}
 unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
                                            int &FrameIndex) const {
   switch (MI->getOpcode()) {
@@ -327,6 +346,7 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
 }
 
+// This function returns true if a CR spill is necessary and false otherwise.
 bool
 PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                   unsigned SrcReg, bool isKill,
@@ -358,7 +378,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                          FrameIdx));
     } else {
       // FIXME: this spills LR immediately to memory in one step.  To do this,
-      // we use R11, which we know cannot be used in the prolog/epilog.  This is
+      // we use X11, which we know cannot be used in the prolog/epilog.  This is
       // a hack.
       NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
       NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
@@ -377,9 +397,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                                getKillRegState(isKill)),
                                        FrameIdx));
   } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
-    if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
-        (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
-      // FIXME (64-bit): Enable
+    if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+        (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
       NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
                                          .addReg(SrcReg,
                                                  getKillRegState(isKill)),
@@ -392,11 +411,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
       // We hack this on Darwin by reserving R2.  It's probably broken on Linux
       // at the moment.
 
+      bool is64Bit = TM.getSubtargetImpl()->isPPC64();
       // We need to store the CR in the low 4-bits of the saved value.  First,
       // issue a MFCR to save all of the CRBits.
       unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
-                                                           PPC::R2 : PPC::R0;
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+                              (is64Bit ? PPC::X2 : PPC::R2) :
+                              (is64Bit ? PPC::X0 : PPC::R0);
+      NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::MFCR8pseud :
+                                             PPC::MFCRpseud), ScratchReg)
                                .addReg(SrcReg, getKillRegState(isKill)));
 
       // If the saved register wasn't CR0, shift the bits left so that they are
@@ -404,12 +426,14 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
       if (SrcReg != PPC::CR0) {
         unsigned ShiftBits = getPPCRegisterNumbering(SrcReg)*4;
         // rlwinm scratch, scratch, ShiftBits, 0, 31.
-        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+        NewMIs.push_back(BuildMI(MF, DL, get(is64Bit ? PPC::RLWINM8 :
+                           PPC::RLWINM), ScratchReg)
                        .addReg(ScratchReg).addImm(ShiftBits)
                        .addImm(0).addImm(31));
       }
 
-      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(is64Bit ?
+                                           PPC::STW8 : PPC::STW))
                                          .addReg(ScratchReg,
                                                  getKillRegState(isKill)),
                                          FrameIdx));
@@ -486,15 +510,14 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
 
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(
-                MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
                             MachineMemOperand::MOStore,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
   NewMIs.back()->addMemOperand(MF, MMO);
 }
 
-void
+bool
 PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                                    unsigned DestReg, int FrameIdx,
                                    const TargetRegisterClass *RC,
@@ -514,8 +537,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                                          FrameIdx));
     } else {
       NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
-                                                 PPC::R11), FrameIdx));
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+                                                 PPC::X11), FrameIdx));
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::X11));
     }
   } else if (PPC::F8RCRegisterClass->hasSubClassEq(RC)) {
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
@@ -524,28 +547,37 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
     NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
                                        FrameIdx));
   } else if (PPC::CRRCRegisterClass->hasSubClassEq(RC)) {
-    // FIXME: We need a scatch reg here.  The trouble with using R0 is that
-    // it's possible for the stack frame to be so big the save location is
-    // out of range of immediate offsets, necessitating another register.
-    // We hack this on Darwin by reserving R2.  It's probably broken on Linux
-    // at the moment.
-    unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
-                                                          PPC::R2 : PPC::R0;
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
-                                       ScratchReg), FrameIdx));
-
-    // If the reloaded register isn't CR0, shift the bits right so that they are
-    // in the right CR's slot.
-    if (DestReg != PPC::CR0) {
-      unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
-      // rlwinm r11, r11, 32-ShiftBits, 0, 31.
-      NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
-                    .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
-                    .addImm(31));
+    if ((!DisablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+        (!DisablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+                                                 get(PPC::RESTORE_CR), DestReg)
+                                         , FrameIdx));
+      return true;
+    } else {
+      // FIXME: We need a scatch reg here.  The trouble with using R0 is that
+      // it's possible for the stack frame to be so big the save location is
+      // out of range of immediate offsets, necessitating another register.
+      // We hack this on Darwin by reserving R2.  It's probably broken on Linux
+      // at the moment.
+      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+                                                            PPC::R2 : PPC::R0;
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                         ScratchReg), FrameIdx));
+  
+      // If the reloaded register isn't CR0, shift the bits right so that they are
+      // in the right CR's slot.
+      if (DestReg != PPC::CR0) {
+        unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+        // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+                      .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+                      .addImm(31));
+      }
+  
+      NewMIs.push_back(BuildMI(MF, DL, get(TM.getSubtargetImpl()->isPPC64() ?
+                         PPC::MTCRF8 : PPC::MTCRF), DestReg)
+                       .addReg(ScratchReg));
     }
-
-    NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
-                     .addReg(ScratchReg));
   } else if (PPC::CRBITRCRegisterClass->hasSubClassEq(RC)) {
 
     unsigned Reg = 0;
@@ -590,6 +622,8 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
   } else {
     llvm_unreachable("Unknown regclass!");
   }
+
+  return false;
 }
 
 void
@@ -602,14 +636,16 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   SmallVector<MachineInstr*, 4> NewMIs;
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
-  LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+  if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs)) {
+    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+    FuncInfo->setSpillsCR();
+  }
   for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
     MBB.insert(MI, NewMIs[i]);
 
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(
-                MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
                             MachineMemOperand::MOLoad,
                             MFI.getObjectSize(FrameIdx),
                             MFI.getObjectAlignment(FrameIdx));
@@ -649,6 +685,9 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case PPC::GC_LABEL:
   case PPC::DBG_VALUE:
     return 0;
+  case PPC::BL8_NOP_ELF:
+  case PPC::BLA8_NOP_ELF:
+    return 8;
   default:
     return 4; // PowerPC instructions are all 4 bytes
   }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 90bacc96c87e..7d49aa129e36 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -1,4 +1,4 @@
-//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===//
+//===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef POWERPC32_INSTRUCTIONINFO_H
-#define POWERPC32_INSTRUCTIONINFO_H
+#ifndef POWERPC_INSTRUCTIONINFO_H
+#define POWERPC_INSTRUCTIONINFO_H
 
 #include "PPC.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "PPCRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "PPCGenInstrInfo.inc"
@@ -72,7 +72,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                            unsigned SrcReg, bool isKill, int FrameIdx,
                            const TargetRegisterClass *RC,
                            SmallVectorImpl<MachineInstr*> &NewMIs) const;
-  void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+  bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
                             unsigned DestReg, int FrameIdx,
                             const TargetRegisterClass *RC,
                             SmallVectorImpl<MachineInstr*> &NewMIs) const;
@@ -88,6 +88,9 @@ public:
   ScheduleHazardRecognizer *
   CreateTargetHazardRecognizer(const TargetMachine *TM,
                                const ScheduleDAG *DAG) const;
+  ScheduleHazardRecognizer *
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                     const ScheduleDAG *DAG) const;
 
   unsigned isLoadFromStackSlot(const MachineInstr *MI,
                                int &FrameIndex) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index f248b5ba8c48..748486c1ca26 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1,10 +1,10 @@
-//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===//
-// 
+//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the subset of the 32-bit PowerPC instruction set, as used
@@ -116,6 +116,9 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
 def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                             SDNPVariadic]>;
+def PPCcall_nop_SVR4  : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall,
+                               [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                                SDNPVariadic]>;
 def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
 def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
@@ -349,10 +352,10 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Predicate Definitions.
-def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">;
 def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;
 def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
-
+def IsBookE  : Predicate<"PPCSubTarget.isBookE()">;
 
 //===----------------------------------------------------------------------===//
 // PowerPC Instruction Definitions.
@@ -399,7 +402,14 @@ let usesCustomInserter = 1,    // Expanded after instruction selection.
 
 // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
 // scavenge a register for it.
-def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+let mayStore = 1 in
+def SPILL_CR : Pseudo<(outs), (ins CRRC:$cond, memri:$F),
+                     "", []>;
+
+// RESTORE_CR - Indicate that we're restoring the CR register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_CR : Pseudo<(outs CRRC:$cond), (ins memri:$F),
                      "", []>;
 
 let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
@@ -431,13 +441,7 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
 }
 
 // Darwin ABI Calls.
-let isCall = 1, PPC970_Unit = 7, 
-  // All calls clobber the non-callee saved registers...
-  Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
-          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
-          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
-          LR,CTR,
-          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL_Darwin  : IForm<18, 0, 1,
@@ -456,13 +460,7 @@ let isCall = 1, PPC970_Unit = 7,
 }
 
 // SVR4 ABI Calls.
-let isCall = 1, PPC970_Unit = 7, 
-  // All calls clobber the non-callee saved registers...
-  Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
-          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
-          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
-          LR,CTR,
-          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
   // Convenient aliases for call instructions
   let Uses = [RM] in {
     def BL_SVR4  : IForm<18, 0, 1,
@@ -547,6 +545,9 @@ def DCBZL  : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
                       "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
                       PPC970_DGroup_Single;
 
+def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
+          (DCBT xoaddr:$dst)>;
+
 // Atomic operations
 let usesCustomInserter = 1 in {
   let Defs = [CR0] in {
@@ -642,7 +643,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
                    isDOT;
 
 let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
-def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
+def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
 
 //===----------------------------------------------------------------------===//
 // PPC32 Load Instructions.
@@ -651,17 +652,17 @@ def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
 // Unindexed (r+i) Loads. 
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
-                  "lbz $rD, $src", LdStGeneral,
+                  "lbz $rD, $src", LdStLoad,
                   [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
 def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
                   "lha $rD, $src", LdStLHA,
                   [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
                   PPC970_DGroup_Cracked;
 def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
-                  "lhz $rD, $src", LdStGeneral,
+                  "lhz $rD, $src", LdStLoad,
                   [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
 def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
-                  "lwz $rD, $src", LdStGeneral,
+                  "lwz $rD, $src", LdStLoad,
                   [(set GPRC:$rD, (load iaddr:$src))]>;
 
 def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
@@ -675,22 +676,22 @@ def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
 // Unindexed (r+i) Loads with Update (preinc).
 let mayLoad = 1 in {
 def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
-                   "lbzu $rD, $addr", LdStGeneral,
+                   "lbzu $rD, $addr", LdStLoad,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
 def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
-                   "lhau $rD, $addr", LdStGeneral,
+                   "lhau $rD, $addr", LdStLoad,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
 def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
-                   "lhzu $rD, $addr", LdStGeneral,
+                   "lhzu $rD, $addr", LdStLoad,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
 def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
-                   "lwzu $rD, $addr", LdStGeneral,
+                   "lwzu $rD, $addr", LdStLoad,
                    []>, RegConstraint<"$addr.reg = $ea_result">,
                    NoEncode<"$ea_result">;
 
@@ -710,25 +711,25 @@ def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
 //
 let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LBZX : XForm_1<31,  87, (outs GPRC:$rD), (ins memrr:$src),
-                   "lbzx $rD, $src", LdStGeneral,
+                   "lbzx $rD, $src", LdStLoad,
                    [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
 def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
                    "lhax $rD, $src", LdStLHA,
                    [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
                    PPC970_DGroup_Cracked;
 def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
-                   "lhzx $rD, $src", LdStGeneral,
+                   "lhzx $rD, $src", LdStLoad,
                    [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
 def LWZX : XForm_1<31,  23, (outs GPRC:$rD), (ins memrr:$src),
-                   "lwzx $rD, $src", LdStGeneral,
+                   "lwzx $rD, $src", LdStLoad,
                    [(set GPRC:$rD, (load xaddr:$src))]>;
                    
                    
 def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
-                   "lhbrx $rD, $src", LdStGeneral,
+                   "lhbrx $rD, $src", LdStLoad,
                    [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
 def LWBRX : XForm_1<31,  534, (outs GPRC:$rD), (ins memrr:$src),
-                   "lwbrx $rD, $src", LdStGeneral,
+                   "lwbrx $rD, $src", LdStLoad,
                    [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
 
 def LFSX   : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
@@ -746,13 +747,13 @@ def LFDX   : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
 // Unindexed (r+i) Stores.
 let PPC970_Unit = 2 in {
 def STB  : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
-                   "stb $rS, $src", LdStGeneral,
+                   "stb $rS, $src", LdStStore,
                    [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
 def STH  : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
-                   "sth $rS, $src", LdStGeneral,
+                   "sth $rS, $src", LdStStore,
                    [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
 def STW  : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
-                   "stw $rS, $src", LdStGeneral,
+                   "stw $rS, $src", LdStStore,
                    [(store GPRC:$rS, iaddr:$src)]>;
 def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
                    "stfs $rS, $dst", LdStUX,
@@ -766,33 +767,33 @@ def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
 let PPC970_Unit = 2 in {
 def STBU  : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    "stbu $rS, $ptroff($ptrreg)", LdStStore,
                     [(set ptr_rc:$ea_res,
                           (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, 
                                          iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
 def STHU  : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    "sthu $rS, $ptroff($ptrreg)", LdStStore,
                     [(set ptr_rc:$ea_res,
                         (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, 
                                         iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
 def STWU  : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    "stwu $rS, $ptroff($ptrreg)", LdStStore,
                     [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, 
                                                      iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
 def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    "stfsu $rS, $ptroff($ptrreg)", LdStStore,
                     [(set ptr_rc:$ea_res, (pre_store F4RC:$rS,  ptr_rc:$ptrreg, 
                                           iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
 def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
                              symbolLo:$ptroff, ptr_rc:$ptrreg),
-                    "stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    "stfdu $rS, $ptroff($ptrreg)", LdStStore,
                     [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, 
                                           iaddroff:$ptroff))]>,
                     RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
@@ -803,29 +804,29 @@ def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
 //
 let PPC970_Unit = 2 in {
 def STBX  : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
-                   "stbx $rS, $dst", LdStGeneral,
+                   "stbx $rS, $dst", LdStStore,
                    [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, 
                    PPC970_DGroup_Cracked;
 def STHX  : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
-                   "sthx $rS, $dst", LdStGeneral,
+                   "sthx $rS, $dst", LdStStore,
                    [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, 
                    PPC970_DGroup_Cracked;
 def STWX  : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
-                   "stwx $rS, $dst", LdStGeneral,
+                   "stwx $rS, $dst", LdStStore,
                    [(store GPRC:$rS, xaddr:$dst)]>,
                    PPC970_DGroup_Cracked;
                    
 let mayStore = 1 in {
 def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
-                   "stwux $rS, $rA, $rB", LdStGeneral,
+                   "stwux $rS, $rA, $rB", LdStStore,
                    []>;
 }
 def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
-                   "sthbrx $rS, $dst", LdStGeneral,
+                   "sthbrx $rS, $dst", LdStStore,
                    [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, 
                    PPC970_DGroup_Cracked;
 def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
-                   "stwbrx $rS, $dst", LdStGeneral,
+                   "stwbrx $rS, $dst", LdStStore,
                    [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
                    PPC970_DGroup_Cracked;
 
@@ -1091,7 +1092,7 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
                              "mfspr $rT, 256", IntGeneral>,
                PPC970_DGroup_First, PPC970_Unit_FXU;
 
-def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins GPRC:$rS),
                       "mtcrf $FXM, $rS", BrMCRX>,
             PPC970_MicroCode, PPC970_Unit_CRU;
 
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 4590f0045641..a6528c0d7030 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -291,9 +291,10 @@ void PPC64CompilationCallback() {
 }
 #endif
 
-extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
-                                         unsigned *OrigCallAddrPlus4,
-                                         bool is64Bit) {
+extern "C" {
+static void* LLVM_ATTRIBUTE_USED PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
+                                                         unsigned *OrigCallAddrPlus4,
+                                                         bool is64Bit) {
   // Adjust the pointer to the address of the call instruction in the stub
   // emitted by emitFunctionStub, rather than the instruction after it.
   unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
@@ -337,6 +338,7 @@ extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
   // stack after we restore all regs.
   return Target;
 }
+}
 
 
 
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 47ead59b587d..2f8243a597e6 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -1,4 +1,4 @@
-//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===//
+//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 33af4269a3ad..276edcb69d19 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -140,7 +140,7 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
     switch (MO.getType()) {
     default:
       MI->dump();
-      assert(0 && "unknown operand type");
+      llvm_unreachable("unknown operand type");
     case MachineOperand::MO_Register:
       assert(!MO.getSubReg() && "Subregs should be eliminated!");
       MCOp = MCOperand::CreateReg(MO.getReg());
@@ -166,6 +166,8 @@ void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
       MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP,
                           isDarwin);
       break;
+    case MachineOperand::MO_RegisterMask:
+      continue;
     }
     
     OutMI.addOperand(MCOp);
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..6a0aec842be7
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -0,0 +1,15 @@
+//===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void PPCFunctionInfo::anchor() { }
+
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index e2649c8b380f..24caffa3f0f2 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -21,7 +21,8 @@ namespace llvm {
 /// PPCFunctionInfo - This class is derived from MachineFunction private
 /// PowerPC target-specific information for each MachineFunction.
 class PPCFunctionInfo : public MachineFunctionInfo {
-private:
+  virtual void anchor();
+
   /// FramePointerSaveIndex - Frame index of where the old frame pointer is
   /// stored.  Also used as an anchor for instructions that need to be altered
   /// when using frame pointers (dyna_add, dyna_sub.)
diff --git a/lib/Target/PowerPC/PPCPerfectShuffle.h b/lib/Target/PowerPC/PPCPerfectShuffle.h
index 3164e33faae9..17b836d1ed97 100644
--- a/lib/Target/PowerPC/PPCPerfectShuffle.h
+++ b/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -1,4 +1,4 @@
-//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===//
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 2e90b7a4086b..ef1357137def 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===//
+//===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,10 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "reginfo"
+#include "PPCRegisterInfo.h"
 #include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
-#include "PPCRegisterInfo.h"
 #include "PPCFrameLowering.h"
 #include "PPCSubtarget.h"
 #include "llvm/CallingConv.h"
@@ -46,15 +46,14 @@
 #define GET_REGINFO_TARGET_DESC
 #include "PPCGenRegisterInfo.inc"
 
-// FIXME (64-bit): Eventually enable by default.
 namespace llvm {
-cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+cl::opt<bool> DisablePPC32RS("disable-ppc32-regscavenger",
                                    cl::init(false),
-                                   cl::desc("Enable PPC32 register scavenger"),
+                                   cl::desc("Disable PPC32 register scavenger"),
                                    cl::Hidden);
-cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+cl::opt<bool> DisablePPC64RS("disable-ppc64-regscavenger",
                                    cl::init(false),
-                                   cl::desc("Enable PPC64 register scavenger"),
+                                   cl::desc("Disable PPC64 register scavenger"),
                                    cl::Hidden);
 }
 
@@ -63,8 +62,8 @@ using namespace llvm;
 // FIXME (64-bit): Should be inlined.
 bool
 PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
-  return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
-          (EnablePPC64RS && Subtarget.isPPC64()));
+  return ((!DisablePPC32RS && !Subtarget.isPPC64()) ||
+          (!DisablePPC64RS && Subtarget.isPPC64()));
 }
 
 PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
@@ -99,122 +98,22 @@ PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
   return &PPC::GPRCRegClass;
 }
 
-const unsigned*
+const uint16_t*
 PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  // 32-bit Darwin calling convention. 
-  static const unsigned Darwin32_CalleeSavedRegs[] = {
-              PPC::R13, PPC::R14, PPC::R15,
-    PPC::R16, PPC::R17, PPC::R18, PPC::R19,
-    PPC::R20, PPC::R21, PPC::R22, PPC::R23,
-    PPC::R24, PPC::R25, PPC::R26, PPC::R27,
-    PPC::R28, PPC::R29, PPC::R30, PPC::R31,
-
-    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
-    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
-    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
-    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
-    PPC::F30, PPC::F31,
-    
-    PPC::CR2, PPC::CR3, PPC::CR4,
-    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-    
-    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
-    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
-    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-    
-    PPC::LR,  0
-  };
-
-  // 32-bit SVR4 calling convention.
-  static const unsigned SVR4_CalleeSavedRegs[] = {
-                        PPC::R14, PPC::R15,
-    PPC::R16, PPC::R17, PPC::R18, PPC::R19,
-    PPC::R20, PPC::R21, PPC::R22, PPC::R23,
-    PPC::R24, PPC::R25, PPC::R26, PPC::R27,
-    PPC::R28, PPC::R29, PPC::R30, PPC::R31,
-
-    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
-    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
-    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
-    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
-    PPC::F30, PPC::F31,
-    
-    PPC::CR2, PPC::CR3, PPC::CR4,
-    
-    PPC::VRSAVE,
-    
-    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-    
-    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
-    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
-    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-    
-    0
-  };
-  // 64-bit Darwin calling convention. 
-  static const unsigned Darwin64_CalleeSavedRegs[] = {
-    PPC::X14, PPC::X15,
-    PPC::X16, PPC::X17, PPC::X18, PPC::X19,
-    PPC::X20, PPC::X21, PPC::X22, PPC::X23,
-    PPC::X24, PPC::X25, PPC::X26, PPC::X27,
-    PPC::X28, PPC::X29, PPC::X30, PPC::X31,
-    
-    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
-    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
-    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
-    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
-    PPC::F30, PPC::F31,
-    
-    PPC::CR2, PPC::CR3, PPC::CR4,
-    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
-    
-    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
-    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
-    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
-    
-    PPC::LR8,  0
-  };
-
-  // 64-bit SVR4 calling convention.
-  static const unsigned SVR4_64_CalleeSavedRegs[] = {
-    PPC::X14, PPC::X15,
-    PPC::X16, PPC::X17, PPC::X18, PPC::X19,
-    PPC::X20, PPC::X21, PPC::X22, PPC::X23,
-    PPC::X24, PPC::X25, PPC::X26, PPC::X27,
-    PPC::X28, PPC::X29, PPC::X30, PPC::X31,
-
-    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
-    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
-    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
-    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
-    PPC::F30, PPC::F31,
-
-    PPC::CR2, PPC::CR3, PPC::CR4,
-
-    PPC::VRSAVE,
-
-    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
-    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
-    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+  if (Subtarget.isDarwinABI())
+    return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
+                                 CSR_Darwin32_SaveList;
 
-    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
-    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
-    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+  return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
+}
 
-    0
-  };
-  
+const unsigned*
+PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
   if (Subtarget.isDarwinABI())
-    return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
-                                 Darwin32_CalleeSavedRegs;
+    return Subtarget.isPPC64() ? CSR_Darwin64_RegMask :
+                                 CSR_Darwin32_RegMask;
 
-  return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
+  return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
 }
 
 BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -247,9 +146,6 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::R13);
     Reserved.set(PPC::R31);
 
-    if (!requiresRegisterScavenging(MF))
-      Reserved.set(PPC::R0);    // FIXME (64-bit): Remove
-
     Reserved.set(PPC::X0);
     Reserved.set(PPC::X1);
     Reserved.set(PPC::X13);
@@ -259,7 +155,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     if (Subtarget.isSVR4ABI()) {
       Reserved.set(PPC::X2);
     }
-    // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+    // Reserve X2 on Darwin to hack around the problem of save/restore of CR
     // when the stack frame is too big to address directly; we need two regs.
     // This is a hack.
     if (Subtarget.isDarwinABI()) {
@@ -273,6 +169,29 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
+unsigned
+PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                         MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const unsigned DefaultSafety = 1;
+
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case PPC::G8RCRegClassID:
+  case PPC::GPRCRegClassID: {
+    unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+    return 32 - FP - DefaultSafety;
+  }
+  case PPC::F8RCRegClassID:
+  case PPC::F4RCRegClassID:
+  case PPC::VRRCRegClassID:
+    return 32 - DefaultSafety;
+  case PPC::CRRCRegClassID:
+    return 8 - DefaultSafety;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
@@ -280,7 +199,8 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 void PPCRegisterInfo::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
-  if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+      I->getOpcode() == PPC::ADJCALLSTACKUP) {
     // Add (actually subtract) back the amount the callee popped on return.
     if (int CalleeAmt =  I->getOperand(1).getImm()) {
       bool is64Bit = Subtarget.isPPC64();
@@ -295,8 +215,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       DebugLoc dl = MI->getDebugLoc();
 
       if (isInt<16>(CalleeAmt)) {
-        BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg).
-          addImm(CalleeAmt);
+        BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
+          .addReg(StackReg, RegState::Kill)
+          .addImm(CalleeAmt);
       } else {
         MachineBasicBlock::iterator MBBI = I;
         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
@@ -304,9 +225,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
           .addReg(TmpReg, RegState::Kill)
           .addImm(CalleeAmt & 0xFFFF);
-        BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
-          .addReg(StackReg)
-          .addReg(StackReg)
+        BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
+          .addReg(StackReg, RegState::Kill)
           .addReg(TmpReg);
       }
     }
@@ -403,12 +323,12 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
     if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
       BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
         .addReg(Reg, RegState::Kill)
-        .addReg(PPC::X1)
+        .addReg(PPC::X1, RegState::Define)
         .addReg(MI.getOperand(1).getReg());
     else
       BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
         .addReg(PPC::X0, RegState::Kill)
-        .addReg(PPC::X1)
+        .addReg(PPC::X1, RegState::Define)
         .addReg(MI.getOperand(1).getReg());
 
     if (!MI.getOperand(1).isKill())
@@ -424,7 +344,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
   } else {
     BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
       .addReg(Reg, RegState::Kill)
-      .addReg(PPC::R1)
+      .addReg(PPC::R1, RegState::Define)
       .addReg(MI.getOperand(1).getReg());
 
     if (!MI.getOperand(1).isKill())
@@ -455,28 +375,32 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
                                       unsigned FrameIndex, int SPAdj,
                                       RegScavenger *RS) const {
   // Get the instruction.
-  MachineInstr &MI = *II;       // ; SPILL_CR <SrcReg>, <offset>, <FI>
+  MachineInstr &MI = *II;       // ; SPILL_CR <SrcReg>, <offset>
   // Get the instruction's basic block.
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc dl = MI.getDebugLoc();
 
-  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
-  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
-  const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
-  unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
-  unsigned SrcReg = MI.getOperand(0).getReg();
+  // FIXME: Once LLVM supports creating virtual registers here, or the register
+  // scavenger can return multiple registers, stop using reserved registers
+  // here.
+  (void) SPAdj;
+  (void) RS;
+
   bool LP64 = Subtarget.isPPC64();
+  unsigned Reg = Subtarget.isDarwinABI() ?  (LP64 ? PPC::X2 : PPC::R2) :
+                                            (LP64 ? PPC::X0 : PPC::R0);
+  unsigned SrcReg = MI.getOperand(0).getReg();
 
   // We need to store the CR in the low 4-bits of the saved value. First, issue
   // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
-  BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+  BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg)
           .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
     
   // If the saved register wasn't CR0, shift the bits left so that they are in
   // CR0's slot.
   if (SrcReg != PPC::CR0)
     // rlwinm rA, rA, ShiftBits, 0, 31.
-    BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
       .addReg(Reg, RegState::Kill)
       .addImm(getPPCRegisterNumbering(SrcReg) * 4)
       .addImm(0)
@@ -490,6 +414,48 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
+void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
+                                      unsigned FrameIndex, int SPAdj,
+                                      RegScavenger *RS) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;       // ; <DestReg> = RESTORE_CR <offset>
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc dl = MI.getDebugLoc();
+
+  // FIXME: Once LLVM supports creating virtual registers here, or the register
+  // scavenger can return multiple registers, stop using reserved registers
+  // here.
+  (void) SPAdj;
+  (void) RS;
+
+  bool LP64 = Subtarget.isPPC64();
+  unsigned Reg = Subtarget.isDarwinABI() ?  (LP64 ? PPC::X2 : PPC::R2) :
+                                            (LP64 ? PPC::X0 : PPC::R0);
+  unsigned DestReg = MI.getOperand(0).getReg();
+  assert(MI.definesRegister(DestReg) &&
+    "RESTORE_CR does not define its destination");
+
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+                              Reg), FrameIndex);
+
+  // If the reloaded register isn't CR0, shift the bits right so that they are
+  // in the right CR's slot.
+  if (DestReg != PPC::CR0) {
+    unsigned ShiftBits = getPPCRegisterNumbering(DestReg)*4;
+    // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+    BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+             .addReg(Reg).addImm(32-ShiftBits).addImm(0)
+             .addImm(31);
+  }
+
+  BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
+             .addReg(Reg);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
 void
 PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                      int SPAdj, RegScavenger *RS) const {
@@ -535,16 +501,23 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     return;
   }
 
-  // Special case for pseudo-op SPILL_CR.
-  if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
+  // Special case for pseudo-ops SPILL_CR and RESTORE_CR.
+  if (requiresRegisterScavenging(MF)) {
     if (OpC == PPC::SPILL_CR) {
       lowerCRSpilling(II, FrameIndex, SPAdj, RS);
       return;
+    } else if (OpC == PPC::RESTORE_CR) {
+      lowerCRRestore(II, FrameIndex, SPAdj, RS);
+      return;
     }
+  }
 
   // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+
+  bool is64Bit = Subtarget.isPPC64();
   MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
-                                              PPC::R31 : PPC::R1,
+                                              (is64Bit ? PPC::X31 : PPC::R31) :
+                                                (is64Bit ? PPC::X1 : PPC::R1),
                                               false);
 
   // Figure out if the offset in the instruction is shifted right two bits. This
@@ -581,7 +554,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // clear can be encoded.  This is extremely uncommon, because normally you
   // only "std" to a stack slot that is at least 4-byte aligned, but it can
   // happen in invalid code.
-  if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+  if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+      (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
     if (isIXAddr)
       Offset >>= 2;    // The actual encoded value has the low two bits zero.
     MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -590,19 +564,19 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   // The offset doesn't fit into a single register, scavenge one to build the
   // offset in.
-  // FIXME: figure out what SPAdj is doing here.
 
-  // FIXME (64-bit): Use "findScratchRegister".
   unsigned SReg;
-  if (requiresRegisterScavenging(MF))
-    SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
-  else
-    SReg = PPC::R0;
+  if (requiresRegisterScavenging(MF)) {
+    const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+    const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+    SReg = findScratchRegister(II, RS, is64Bit ? G8RC : GPRC, SPAdj);
+  } else
+    SReg = is64Bit ? PPC::X0 : PPC::R0;
 
   // Insert a set of rA with the full offset value before the ld, st, or add
-  BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+  BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SReg)
     .addImm(Offset >> 16);
-  BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+  BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
     .addReg(SReg, RegState::Kill)
     .addImm(Offset);
 
@@ -624,7 +598,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
   MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
-  MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
+  MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
 }
 
 unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 1cc7213417d1..b1e6a7218ee7 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==//
+//===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -37,8 +37,12 @@ public:
   /// This is used for addressing modes.
   virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;  
 
+  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                               MachineFunction &MF) const;
+
   /// Code Generation virtual methods...
-  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+  const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+  const unsigned *getCallPreservedMask(CallingConv::ID CC) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
@@ -54,6 +58,8 @@ public:
                          int SPAdj, RegScavenger *RS) const;
   void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
                        int SPAdj, RegScavenger *RS) const;
+  void lowerCRRestore(MachineBasicBlock::iterator II, unsigned FrameIndex,
+                       int SPAdj, RegScavenger *RS) const;
   void eliminateFrameIndex(MachineBasicBlock::iterator II,
                            int SPAdj, RegScavenger *RS = NULL) const;
 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 1acdf4eb853b..0e55313b135f 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -1,10 +1,10 @@
-//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
-// 
+//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 //
diff --git a/lib/Target/PowerPC/PPCRelocations.h b/lib/Target/PowerPC/PPCRelocations.h
index a33e7e03370c..0b392f99b6d7 100644
--- a/lib/Target/PowerPC/PPCRelocations.h
+++ b/lib/Target/PowerPC/PPCRelocations.h
@@ -1,4 +1,4 @@
-//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===//
+//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef PPC32RELOCATIONS_H
-#define PPC32RELOCATIONS_H
+#ifndef PPCRELOCATIONS_H
+#define PPCRELOCATIONS_H
 
 #include "llvm/CodeGen/MachineRelocation.h"
 
diff --git a/lib/Target/PowerPC/PPCSchedule.td b/lib/Target/PowerPC/PPCSchedule.td
index 9664f1457171..8c0a8589052a 100644
--- a/lib/Target/PowerPC/PPCSchedule.td
+++ b/lib/Target/PowerPC/PPCSchedule.td
@@ -1,10 +1,10 @@
-//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===//
-// 
+//===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -50,7 +50,8 @@ def BrMCRX       : InstrItinClass;
 def LdStDCBA     : InstrItinClass;
 def LdStDCBF     : InstrItinClass;
 def LdStDCBI     : InstrItinClass;
-def LdStGeneral  : InstrItinClass;
+def LdStLoad     : InstrItinClass;
+def LdStStore    : InstrItinClass;
 def LdStDSS      : InstrItinClass;
 def LdStICBI     : InstrItinClass;
 def LdStUX       : InstrItinClass;
@@ -103,9 +104,11 @@ def VecVSR       : InstrItinClass;
 // Processor instruction itineraries.
 
 include "PPCScheduleG3.td"
+include "PPCSchedule440.td"
 include "PPCScheduleG4.td"
 include "PPCScheduleG4Plus.td"
 include "PPCScheduleG5.td"
+include "PPCScheduleA2.td"
 
 //===----------------------------------------------------------------------===//
 // Instruction to itinerary class map - When add new opcodes to the supported
@@ -149,8 +152,8 @@ include "PPCScheduleG5.td"
 //    dcbf       LdStDCBF
 //    dcbi       LdStDCBI
 //    dcbst      LdStDCBF
-//    dcbt       LdStGeneral
-//    dcbtst     LdStGeneral
+//    dcbt       LdStLoad
+//    dcbtst     LdStLoad
 //    dcbz       LdStDCBF
 //    divd       IntDivD
 //    divdu      IntDivD
@@ -159,9 +162,9 @@ include "PPCScheduleG5.td"
 //    dss        LdStDSS
 //    dst        LdStDSS
 //    dstst      LdStDSS
-//    eciwx      LdStGeneral
-//    ecowx      LdStGeneral
-//    eieio      LdStGeneral
+//    eciwx      LdStLoad
+//    ecowx      LdStLoad
+//    eieio      LdStLoad
 //    eqv        IntGeneral
 //    extsb      IntGeneral
 //    extsh      IntGeneral
@@ -201,10 +204,10 @@ include "PPCScheduleG5.td"
 //    fsubs      FPGeneral
 //    icbi       LdStICBI
 //    isync      SprISYNC
-//    lbz        LdStGeneral
-//    lbzu       LdStGeneral
+//    lbz        LdStLoad
+//    lbzu       LdStLoad
 //    lbzux      LdStUX
-//    lbzx       LdStGeneral
+//    lbzx       LdStLoad
 //    ld         LdStLD
 //    ldarx      LdStLDARX
 //    ldu        LdStLD
@@ -222,11 +225,11 @@ include "PPCScheduleG5.td"
 //    lhau       LdStLHA
 //    lhaux      LdStLHA
 //    lhax       LdStLHA
-//    lhbrx      LdStGeneral
-//    lhz        LdStGeneral
-//    lhzu       LdStGeneral
+//    lhbrx      LdStLoad
+//    lhz        LdStLoad
+//    lhzu       LdStLoad
 //    lhzux      LdStUX
-//    lhzx       LdStGeneral
+//    lhzx       LdStLoad
 //    lmw        LdStLMW
 //    lswi       LdStLMW
 //    lswx       LdStLMW
@@ -241,11 +244,11 @@ include "PPCScheduleG5.td"
 //    lwarx      LdStLWARX
 //    lwaux      LdStLHA
 //    lwax       LdStLHA
-//    lwbrx      LdStGeneral
-//    lwz        LdStGeneral
-//    lwzu       LdStGeneral
+//    lwbrx      LdStLoad
+//    lwz        LdStLoad
+//    lwzu       LdStLoad
 //    lwzux      LdStUX
-//    lwzx       LdStGeneral
+//    lwzx       LdStLoad
 //    mcrf       BrMCR
 //    mcrfs      FPGeneral
 //    mcrxr      BrMCRX
@@ -306,10 +309,10 @@ include "PPCScheduleG5.td"
 //    srawi      IntShift
 //    srd        IntRotateD
 //    srw        IntGeneral
-//    stb        LdStGeneral
-//    stbu       LdStGeneral
-//    stbux      LdStGeneral
-//    stbx       LdStGeneral
+//    stb        LdStStore
+//    stbu       LdStStore
+//    stbux      LdStStore
+//    stbx       LdStStore
 //    std        LdStSTD
 //    stdcx.     LdStSTDCX
 //    stdu       LdStSTD
@@ -324,11 +327,11 @@ include "PPCScheduleG5.td"
 //    stfsu      LdStUX
 //    stfsux     LdStUX
 //    stfsx      LdStUX
-//    sth        LdStGeneral
-//    sthbrx     LdStGeneral
-//    sthu       LdStGeneral
-//    sthux      LdStGeneral
-//    sthx       LdStGeneral
+//    sth        LdStStore
+//    sthbrx     LdStStore
+//    sthu       LdStStore
+//    sthux      LdStStore
+//    sthx       LdStStore
 //    stmw       LdStLMW
 //    stswi      LdStLMW
 //    stswx      LdStLMW
@@ -337,12 +340,12 @@ include "PPCScheduleG5.td"
 //    stvewx     LdStSTVEBX
 //    stvx       LdStSTVEBX
 //    stvxl      LdStSTVEBX
-//    stw        LdStGeneral
-//    stwbrx     LdStGeneral
+//    stw        LdStStore
+//    stwbrx     LdStStore
 //    stwcx.     LdStSTWCX
-//    stwu       LdStGeneral
-//    stwux      LdStGeneral
-//    stwx       LdStGeneral
+//    stwu       LdStStore
+//    stwux      LdStStore
+//    stwx       LdStStore
 //    subf       IntGeneral
 //    subfc      IntGeneral
 //    subfe      IntGeneral
diff --git a/lib/Target/PowerPC/PPCSchedule440.td b/lib/Target/PowerPC/PPCSchedule440.td
new file mode 100644
index 000000000000..419faea30220
--- /dev/null
+++ b/lib/Target/PowerPC/PPCSchedule440.td
@@ -0,0 +1,616 @@
+//===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// PowerPC 440x6 Embedded Processor Core User's Manual.
+// IBM (as updated in) 2010.
+
+// The basic PPC 440 does not include a floating-point unit; the pipeline
+// timings here are constructed to match the FP2 unit shipped with the
+// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers.
+// References:
+// S. Chatterjee, et al. Design and exploitation of a high-performance
+// SIMD floating-point unit for Blue Gene/L.
+// IBM J. Res. & Dev. 49 (2/3) March/May 2005.
+// also:
+// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution:
+// Blue Gene/P Application Development.
+// IBM (as updated in) 2009.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC 440/450 chip sets
+//
+def IFTH1  : FuncUnit; // Fetch unit 1
+def IFTH2  : FuncUnit; // Fetch unit 2
+def PDCD1  : FuncUnit; // Decode unit 1
+def PDCD2  : FuncUnit; // Decode unit 2
+def DISS1  : FuncUnit; // Issue unit 1
+def DISS2  : FuncUnit; // Issue unit 2
+def LRACC  : FuncUnit; // Register access and dispatch for
+                       // the simple integer (J-pipe) and
+                       // load/store (L-pipe) pipelines
+def IRACC  : FuncUnit; // Register access and dispatch for
+                       // the complex integer (I-pipe) pipeline
+def FRACC  : FuncUnit; // Register access and dispatch for
+                       // the floating-point execution (F-pipe) pipeline
+def IEXE1  : FuncUnit; // Execution stage 1 for the I pipeline
+def IEXE2  : FuncUnit; // Execution stage 2 for the I pipeline
+def IWB    : FuncUnit; // Write-back unit for the I pipeline
+def JEXE1  : FuncUnit; // Execution stage 1 for the J pipeline
+def JEXE2  : FuncUnit; // Execution stage 2 for the J pipeline
+def JWB    : FuncUnit; // Write-back unit for the J pipeline
+def AGEN   : FuncUnit; // Address generation for the L pipeline
+def CRD    : FuncUnit; // D-cache access for the L pipeline
+def LWB    : FuncUnit; // Write-back unit for the L pipeline
+def FEXE1  : FuncUnit; // Execution stage 1 for the F pipeline
+def FEXE2  : FuncUnit; // Execution stage 2 for the F pipeline
+def FEXE3  : FuncUnit; // Execution stage 3 for the F pipeline
+def FEXE4  : FuncUnit; // Execution stage 4 for the F pipeline
+def FEXE5  : FuncUnit; // Execution stage 5 for the F pipeline
+def FEXE6  : FuncUnit; // Execution stage 6 for the F pipeline
+def FWB    : FuncUnit; // Write-back unit for the F pipeline
+
+def LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
+                           // to make sure that no lwarx/stwcx.
+                           // instructions are issued while another
+                           // lwarx/stwcx. is in the L pipe.
+
+def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+
+// Notes:
+// Instructions are held in the FRACC, LRACC and IRACC pipeline
+// stages until their source operands become ready. Exceptions:
+//  - Store instructions will hold in the AGEN stage
+//  - The integer multiply-accumulate instruction will hold in
+//    the IEXE1 stage
+//
+// For most I-pipe operations, the result is available at the end of
+// the IEXE1 stage. Operations such as multiply and divide must
+// continue to execute in IEXE2 and IWB. Divide resides in IWB for
+// 33 cycles (multiply also calculates its result in IWB). For all
+// J-pipe instructions, the result is available
+// at the end of the JEXE1 stage. Loads have a 3-cycle latency
+// (data is not available until after the LWB stage).
+//
+// The L1 cache hit latency is four cycles for floating point loads
+// and three cycles for integer loads.
+//
+// The stwcx. instruction requires both the LRACC and the IRACC
+// dispatch stages. It must be issued from DISS0.
+//
+// All lwarx/stwcx. instructions hold in LRACC if another
+// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB.
+//
+// msync (a.k.a. sync) and mbar will hold in LWB until all load/store
+// resources are empty. AGEN and CRD are held empty until the msync/mbar
+// commits.
+//
+// Most floating-point instructions, computational and move,
+// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that
+// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above,
+// loads take 4 cycles (for L1 hit).
+
+//
+// This file defines the itinerary class data for the PPC 440 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPC440Itineraries : ProcessorItineraries<
+  [IFTH1, IFTH2, PDCD1, PDCD2, DISS1, DISS2, FRACC,
+   IRACC, IEXE1, IEXE2, IWB, LRACC, JEXE1, JEXE2, JWB, AGEN, CRD, LWB,
+   FEXE1, FEXE2, FEXE3, FEXE4, FEXE5, FEXE6, FWB, LWARX_Hold],
+  [GPR_Bypass, FPR_Bypass], [
+  InstrItinData<IntGeneral  , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC, LRACC]>,
+                               InstrStage<1, [IEXE1, JEXE1]>,
+                               InstrStage<1, [IEXE2, JEXE2]>,
+                               InstrStage<1, [IWB, JWB]>],
+                              [6, 4, 4],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntCompare  , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC, LRACC]>,
+                               InstrStage<1, [IEXE1, JEXE1]>,
+                               InstrStage<1, [IEXE2, JEXE2]>,
+                               InstrStage<1, [IWB, JWB]>],
+                              [6, 4, 4],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntDivW     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<33, [IWB]>],
+                              [40, 4, 4],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntMFFS     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [7, 4, 4],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntMTFSB0   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [7, 4, 4],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntMulHW    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [8, 4, 4],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntMulHWU   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [8, 4, 4],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntMulLI    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [8, 4, 4],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntRotate   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC, LRACC]>,
+                               InstrStage<1, [IEXE1, JEXE1]>,
+                               InstrStage<1, [IEXE2, JEXE2]>,
+                               InstrStage<1, [IWB, JWB]>],
+                              [6, 4, 4],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntShift    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC, LRACC]>,
+                               InstrStage<1, [IEXE1, JEXE1]>,
+                               InstrStage<1, [IEXE2, JEXE2]>,
+                               InstrStage<1, [IWB, JWB]>],
+                              [6, 4, 4],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntTrapW    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [6, 4],
+                              [GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<BrB         , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [8, 4],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<BrCR        , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [8, 4, 4],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<BrMCR       , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [8, 4, 4],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<BrMCRX      , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [8, 4, 4],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStDCBA    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStDCBF    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStDCBI    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStLoad    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<2, [LWB]>],
+                              [9, 5],
+                              [GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStStore   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<2, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStICBI    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStUX      , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5, 5],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStLFD     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<2, [LWB]>],
+                              [9, 5, 5],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStLFDU    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [9, 5, 5],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStLHA     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStLMW     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStLWARX   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1]>,
+                               InstrStage<1, [IRACC], 0>,
+                               InstrStage<4, [LWARX_Hold], 0>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStSTD     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<2, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStSTDCX   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1]>,
+                               InstrStage<1, [IRACC], 0>,
+                               InstrStage<4, [LWARX_Hold], 0>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStSTD     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<2, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStSTDCX   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1]>,
+                               InstrStage<1, [IRACC], 0>,
+                               InstrStage<4, [LWARX_Hold], 0>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStSTWCX   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1]>,
+                               InstrStage<1, [IRACC], 0>,
+                               InstrStage<4, [LWARX_Hold], 0>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<1, [AGEN]>,
+                               InstrStage<1, [CRD]>,
+                               InstrStage<1, [LWB]>],
+                              [8, 5],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStSync    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [LRACC]>,
+                               InstrStage<3, [AGEN], 1>,
+                               InstrStage<2, [CRD],  1>,
+                               InstrStage<1, [LWB]>]>,
+  InstrItinData<SprISYNC    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [FRACC], 0>,
+                               InstrStage<1, [LRACC], 0>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [FEXE1], 0>,
+                               InstrStage<1, [AGEN],  0>,
+                               InstrStage<1, [JEXE1], 0>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [FEXE2], 0>,
+                               InstrStage<1, [CRD],   0>,
+                               InstrStage<1, [JEXE2], 0>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<6, [FEXE3], 0>,
+                               InstrStage<6, [LWB],   0>,
+                               InstrStage<6, [JWB],   0>,
+                               InstrStage<6, [IWB]>]>,
+  InstrItinData<SprMFSR     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [6, 4],
+                              [GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<SprMTMSR    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [6, 4],
+                              [GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<SprMTSR     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<3, [IWB]>],
+                              [9, 4],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprTLBSYNC  , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>]>,
+  InstrItinData<SprMFCR     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [8, 4],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprMFMSR    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [7, 4],
+                              [GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<SprMFSPR    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<3, [IWB]>],
+                              [10, 4],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprMFTB     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<3, [IWB]>],
+                              [10, 4],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprMTSPR    , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<3, [IWB]>],
+                              [10, 4],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprMTSRIN   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<3, [IWB]>],
+                              [10, 4],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprRFI      , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [8, 4],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprSC       , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [IRACC]>,
+                               InstrStage<1, [IEXE1]>,
+                               InstrStage<1, [IEXE2]>,
+                               InstrStage<1, [IWB]>],
+                              [8, 4],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<FPGeneral   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [FRACC]>,
+                               InstrStage<1, [FEXE1]>,
+                               InstrStage<1, [FEXE2]>,
+                               InstrStage<1, [FEXE3]>,
+                               InstrStage<1, [FEXE4]>,
+                               InstrStage<1, [FEXE5]>,
+                               InstrStage<1, [FEXE6]>,
+                               InstrStage<1, [FWB]>],
+                              [10, 4, 4],
+                              [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<FPCompare   , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [FRACC]>,
+                               InstrStage<1, [FEXE1]>,
+                               InstrStage<1, [FEXE2]>,
+                               InstrStage<1, [FEXE3]>,
+                               InstrStage<1, [FEXE4]>,
+                               InstrStage<1, [FEXE5]>,
+                               InstrStage<1, [FEXE6]>,
+                               InstrStage<1, [FWB]>],
+                              [10, 4, 4],
+                              [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<FPDivD      , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [FRACC]>,
+                               InstrStage<1, [FEXE1]>,
+                               InstrStage<1, [FEXE2]>,
+                               InstrStage<1, [FEXE3]>,
+                               InstrStage<1, [FEXE4]>,
+                               InstrStage<1, [FEXE5]>,
+                               InstrStage<1, [FEXE6]>,
+                               InstrStage<25, [FWB]>],
+                              [35, 4, 4],
+                              [NoBypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<FPDivS      , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [FRACC]>,
+                               InstrStage<1, [FEXE1]>,
+                               InstrStage<1, [FEXE2]>,
+                               InstrStage<1, [FEXE3]>,
+                               InstrStage<1, [FEXE4]>,
+                               InstrStage<1, [FEXE5]>,
+                               InstrStage<1, [FEXE6]>,
+                               InstrStage<13, [FWB]>],
+                              [23, 4, 4],
+                              [NoBypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<FPFused     , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [FRACC]>,
+                               InstrStage<1, [FEXE1]>,
+                               InstrStage<1, [FEXE2]>,
+                               InstrStage<1, [FEXE3]>,
+                               InstrStage<1, [FEXE4]>,
+                               InstrStage<1, [FEXE5]>,
+                               InstrStage<1, [FEXE6]>,
+                               InstrStage<1, [FWB]>],
+                              [10, 4, 4, 4],
+                              [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<FPRes       , [InstrStage<1, [IFTH1, IFTH2]>,
+                               InstrStage<1, [PDCD1, PDCD2]>,
+                               InstrStage<1, [DISS1, DISS2]>,
+                               InstrStage<1, [FRACC]>,
+                               InstrStage<1, [FEXE1]>,
+                               InstrStage<1, [FEXE2]>,
+                               InstrStage<1, [FEXE3]>,
+                               InstrStage<1, [FEXE4]>,
+                               InstrStage<1, [FEXE5]>,
+                               InstrStage<1, [FEXE6]>,
+                               InstrStage<1, [FWB]>],
+                              [10, 4],
+                              [FPR_Bypass, FPR_Bypass]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleA2.td b/lib/Target/PowerPC/PPCScheduleA2.td
new file mode 100644
index 000000000000..857ba40ff622
--- /dev/null
+++ b/lib/Target/PowerPC/PPCScheduleA2.td
@@ -0,0 +1,652 @@
+//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// A2 Processor User's Manual.
+// IBM (as updated in) 2010.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC A2 chip sets
+//
+def IU0to3_0  : FuncUnit; // Fetch unit 1 to 4 slot 1
+def IU0to3_1  : FuncUnit; // Fetch unit 1 to 4 slot 2
+def IU0to3_2  : FuncUnit; // Fetch unit 1 to 4 slot 3
+def IU0to3_3  : FuncUnit; // Fetch unit 1 to 4 slot 4
+def IU4_0  : FuncUnit; // Instruction buffer slot 1
+def IU4_1  : FuncUnit; // Instruction buffer slot 2
+def IU4_2  : FuncUnit; // Instruction buffer slot 3
+def IU4_3  : FuncUnit; // Instruction buffer slot 4
+def IU4_4  : FuncUnit; // Instruction buffer slot 5
+def IU4_5  : FuncUnit; // Instruction buffer slot 6
+def IU4_6  : FuncUnit; // Instruction buffer slot 7
+def IU4_7  : FuncUnit; // Instruction buffer slot 8
+def IU5    : FuncUnit; // Dependency resolution
+def IU6    : FuncUnit; // Instruction issue
+def RF0    : FuncUnit;
+def XRF1   : FuncUnit;
+def XEX1   : FuncUnit; // Execution stage 1 for the XU pipeline
+def XEX2   : FuncUnit; // Execution stage 2 for the XU pipeline
+def XEX3   : FuncUnit; // Execution stage 3 for the XU pipeline
+def XEX4   : FuncUnit; // Execution stage 4 for the XU pipeline
+def XEX5   : FuncUnit; // Execution stage 5 for the XU pipeline
+def XEX6   : FuncUnit; // Execution stage 6 for the XU pipeline
+def FRF1   : FuncUnit;
+def FEX1   : FuncUnit; // Execution stage 1 for the FU pipeline
+def FEX2   : FuncUnit; // Execution stage 2 for the FU pipeline
+def FEX3   : FuncUnit; // Execution stage 3 for the FU pipeline
+def FEX4   : FuncUnit; // Execution stage 4 for the FU pipeline
+def FEX5   : FuncUnit; // Execution stage 5 for the FU pipeline
+def FEX6   : FuncUnit; // Execution stage 6 for the FU pipeline
+
+def CR_Bypass  : Bypass; // The bypass for condition regs.
+//def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+//def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+
+//
+// This file defines the itinerary class data for the PPC A2 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPCA2Itineraries : ProcessorItineraries<
+  [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3,
+   IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7,
+   IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6,
+   FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6],
+  [CR_Bypass, GPR_Bypass, FPR_Bypass], [
+  InstrItinData<IntGeneral  , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7, 7],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntCompare  , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7, 7],
+                              [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntDivW     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>],
+                              [53, 7, 7],
+                              [NoBypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntMFFS     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7, 7],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntMTFSB0   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7, 7], 
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntMulHW    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [14, 7, 7],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntMulHWU   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [14, 7, 7],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntMulLI    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [15, 7, 7],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntRotate   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7, 7],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntShift    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7, 7],
+                              [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<IntTrapW    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7, 7], 
+                              [GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<BrB         , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [15, 7, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<BrCR        , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7, 7],
+                              [CR_Bypass, CR_Bypass, CR_Bypass]>,
+  InstrItinData<BrMCR       , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7, 7],
+                              [CR_Bypass, CR_Bypass, CR_Bypass]>,
+  InstrItinData<BrMCRX      , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7, 7],
+                              [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStDCBA    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [13, 11],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStDCBF    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [13, 11],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStDCBI    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [13, 11],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStLoad    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [14, 7],
+                              [GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStStore   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [13, 7],
+                              [GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStICBI    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [14, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStUX      , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [14, 7, 7],
+                              [NoBypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<LdStLFD     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [14, 7, 7],
+                              [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStLFDU    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [14, 7, 7],
+                              [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStLHA     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [14, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStLMW     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [14, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStLWARX   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [26, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStSTD     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [13, 7],
+                              [GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStSTDCX   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [26, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStSTD     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [13, 7],
+                              [GPR_Bypass, GPR_Bypass]>,
+  InstrItinData<LdStSTDCX   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [26, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStSTWCX   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [26, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<LdStSync    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>,
+  InstrItinData<SprISYNC    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
+  InstrItinData<SprMFSR     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [15, 7],
+                              [GPR_Bypass, NoBypass]>,
+  InstrItinData<SprMTMSR    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [15, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprMTSR     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [15, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprTLBSYNC  , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
+  InstrItinData<SprMFCR     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [10, 7], 
+                              [GPR_Bypass, CR_Bypass]>,
+  InstrItinData<SprMFMSR    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [15, 7],
+                              [GPR_Bypass, NoBypass]>,
+  InstrItinData<SprMFSPR    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [15, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprMFTB     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+                              [29, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprMTSPR    , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
+                              [15, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprMTSRIN   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+                              [29, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprRFI      , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+                              [29, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<SprSC       , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
+                               InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
+                               InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
+                               InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
+                              [29, 7],
+                              [NoBypass, GPR_Bypass]>,
+  InstrItinData<FPGeneral   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+                               InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+                               InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+                               InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+                              [15, 7, 7],
+                              [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<FPCompare   , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+                               InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+                               InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+                               InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+                              [13, 7, 7],
+                              [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<FPDivD      , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>,
+                               InstrStage<71, [FEX1], 0>,
+                                  InstrStage<71, [FEX2], 0>,
+                               InstrStage<71, [FEX3], 0>,
+                                  InstrStage<71, [FEX4], 0>,
+                               InstrStage<71, [FEX5], 0>,
+                                  InstrStage<71, [FEX6]>],
+                              [86, 7, 7],
+                              [NoBypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<FPDivS      , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>,
+                               InstrStage<58, [FEX1], 0>,
+                                  InstrStage<58, [FEX2], 0>,
+                               InstrStage<58, [FEX3], 0>,
+                                  InstrStage<58, [FEX4], 0>,
+                               InstrStage<58, [FEX5], 0>,
+                                  InstrStage<58, [FEX6]>],
+                              [73, 7, 7],
+                              [NoBypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<FPSqrt      , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>,
+                               InstrStage<68, [FEX1], 0>,
+                                  InstrStage<68, [FEX2], 0>,
+                               InstrStage<68, [FEX3], 0>,
+                                  InstrStage<68, [FEX4], 0>,
+                               InstrStage<68, [FEX5], 0>,
+                                  InstrStage<68, [FEX6]>],
+                              [86, 7], // FIXME: should be [86, 7] for double
+                                       // and [82, 7] for single. Likewise,
+                                       // the FEX? cycle count should be 68
+                                       // for double and 64 for single.
+                              [NoBypass, FPR_Bypass]>,
+  InstrItinData<FPFused     , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+                               InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+                               InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+                               InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+                              [15, 7, 7, 7],
+                              [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
+  InstrItinData<FPRes       , [InstrStage<4,
+                                 [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
+                               InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
+                                              IU4_4, IU4_5, IU4_6, IU4_7]>,
+                               InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
+                               InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
+                               InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
+                               InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
+                               InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
+                              [15, 7],
+                              [FPR_Bypass, FPR_Bypass]>
+]>;
diff --git a/lib/Target/PowerPC/PPCScheduleG3.td b/lib/Target/PowerPC/PPCScheduleG3.td
index ad4da1fe224f..bc926f7bb2b6 100644
--- a/lib/Target/PowerPC/PPCScheduleG3.td
+++ b/lib/Target/PowerPC/PPCScheduleG3.td
@@ -1,10 +1,10 @@
-//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===//
-// 
+//===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines the itinerary class data for the G3 (750) processor.
@@ -32,7 +32,8 @@ def G3Itineraries : ProcessorItineraries<
   InstrItinData<LdStDCBA    , [InstrStage<2, [SLU]>]>,
   InstrItinData<LdStDCBF    , [InstrStage<3, [SLU]>]>,
   InstrItinData<LdStDCBI    , [InstrStage<3, [SLU]>]>,
-  InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLoad    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStStore   , [InstrStage<2, [SLU]>]>,
   InstrItinData<LdStICBI    , [InstrStage<3, [SLU]>]>,
   InstrItinData<LdStUX      , [InstrStage<2, [SLU]>]>,
   InstrItinData<LdStLFD     , [InstrStage<2, [SLU]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4.td b/lib/Target/PowerPC/PPCScheduleG4.td
index 03c3b29cc101..f7ec1e01333e 100644
--- a/lib/Target/PowerPC/PPCScheduleG4.td
+++ b/lib/Target/PowerPC/PPCScheduleG4.td
@@ -1,10 +1,10 @@
-//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===//
-// 
+//===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines the itinerary class data for the G4 (7400) processor.
@@ -31,7 +31,8 @@ def G4Itineraries : ProcessorItineraries<
   InstrItinData<BrMCRX      , [InstrStage<1, [SRU]>]>,
   InstrItinData<LdStDCBF    , [InstrStage<2, [SLU]>]>,
   InstrItinData<LdStDCBI    , [InstrStage<2, [SLU]>]>,
-  InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLoad    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStStore   , [InstrStage<2, [SLU]>]>,
   InstrItinData<LdStDSS     , [InstrStage<2, [SLU]>]>,
   InstrItinData<LdStICBI    , [InstrStage<2, [SLU]>]>,
   InstrItinData<LdStUX      , [InstrStage<2, [SLU]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG4Plus.td b/lib/Target/PowerPC/PPCScheduleG4Plus.td
index 00cac3c7cab2..37ebfc59880b 100644
--- a/lib/Target/PowerPC/PPCScheduleG4Plus.td
+++ b/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -1,10 +1,10 @@
-//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===//
-// 
+//===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines the itinerary class data for the G4+ (7450) processor.
@@ -34,7 +34,8 @@ def G4PlusItineraries : ProcessorItineraries<
   InstrItinData<BrMCRX      , [InstrStage<2, [IU2]>]>,
   InstrItinData<LdStDCBF    , [InstrStage<3, [SLU]>]>,
   InstrItinData<LdStDCBI    , [InstrStage<3, [SLU]>]>,
-  InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLoad    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStStore   , [InstrStage<3, [SLU]>]>,
   InstrItinData<LdStDSS     , [InstrStage<3, [SLU]>]>,
   InstrItinData<LdStICBI    , [InstrStage<3, [IU2]>]>,
   InstrItinData<LdStUX      , [InstrStage<3, [SLU]>]>,
diff --git a/lib/Target/PowerPC/PPCScheduleG5.td b/lib/Target/PowerPC/PPCScheduleG5.td
index 1671f22b30ad..d1e40cef9639 100644
--- a/lib/Target/PowerPC/PPCScheduleG5.td
+++ b/lib/Target/PowerPC/PPCScheduleG5.td
@@ -1,10 +1,10 @@
-//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===//
-// 
+//===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file defines the itinerary class data for the G5 (970) processor.
@@ -35,7 +35,8 @@ def G5Itineraries : ProcessorItineraries<
   InstrItinData<BrMCR       , [InstrStage<2, [BPU]>]>,
   InstrItinData<BrMCRX      , [InstrStage<3, [BPU]>]>,
   InstrItinData<LdStDCBF    , [InstrStage<3, [SLU]>]>,
-  InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLoad    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStStore   , [InstrStage<3, [SLU]>]>,
   InstrItinData<LdStDSS     , [InstrStage<10, [SLU]>]>,
   InstrItinData<LdStICBI    , [InstrStage<40, [SLU]>]>,
   InstrItinData<LdStUX      , [InstrStage<4, [SLU]>]>,
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index cf194de42e8f..f405b4711a52 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===//
+//===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCSubtarget.h"
+#include "PPCRegisterInfo.h"
 #include "PPC.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetMachine.h"
@@ -74,6 +75,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
   , HasAltivec(false)
   , HasFSQRT(false)
   , HasSTFIWX(false)
+  , IsBookE(false)
   , HasLazyResolverStubs(false)
   , IsJITCodeModel(false)
   , TargetTriple(TT) {
@@ -139,3 +141,23 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
   return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
          GV->hasCommonLinkage() || isDecl;
 }
+
+bool PPCSubtarget::enablePostRAScheduler(
+           CodeGenOpt::Level OptLevel,
+           TargetSubtargetInfo::AntiDepBreakMode& Mode,
+           RegClassVector& CriticalPathRCs) const {
+  if (DarwinDirective == PPC::DIR_440 || DarwinDirective == PPC::DIR_A2)
+    Mode = TargetSubtargetInfo::ANTIDEP_ALL;
+  else
+    Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+
+  CriticalPathRCs.clear();
+
+  if (isPPC64())
+    CriticalPathRCs.push_back(&PPC::G8RCRegClass);
+  else
+    CriticalPathRCs.push_back(&PPC::GPRCRegClass);
+
+  return OptLevel >= CodeGenOpt::Default;
+}
+
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index e028de6b09de..a275029d3e5d 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====//
+//===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -33,12 +33,14 @@ namespace PPC {
   enum {
     DIR_NONE,
     DIR_32,
+    DIR_440, 
     DIR_601, 
     DIR_602, 
     DIR_603, 
     DIR_7400,
     DIR_750, 
     DIR_970, 
+    DIR_A2,
     DIR_64  
   };
 }
@@ -66,6 +68,7 @@ protected:
   bool HasAltivec;
   bool HasFSQRT;
   bool HasSTFIWX;
+  bool IsBookE;
   bool HasLazyResolverStubs;
   bool IsJITCodeModel;
   
@@ -136,15 +139,22 @@ public:
   bool hasSTFIWX() const { return HasSTFIWX; }
   bool hasAltivec() const { return HasAltivec; }
   bool isGigaProcessor() const { return IsGigaProcessor; }
+  bool isBookE() const { return IsBookE; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
 
   /// isDarwin - True if this is any darwin platform.
   bool isDarwin() const { return TargetTriple.isMacOSX(); }
+  /// isBGP - True if this is a BG/P platform.
+  bool isBGP() const { return TargetTriple.getVendor() == Triple::BGP; }
 
   bool isDarwinABI() const { return isDarwin(); }
   bool isSVR4ABI() const { return !isDarwin(); }
 
+  /// enablePostRAScheduler - True at 'More' optimization.
+  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
+                             RegClassVector& CriticalPathRCs) const;
 };
 } // End llvm namespace
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index f5744b830489..d11397669912 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -11,10 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPC.h"
 #include "PPCTargetMachine.h"
+#include "PPC.h"
 #include "llvm/PassManager.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -22,37 +23,46 @@ using namespace llvm;
 
 extern "C" void LLVMInitializePowerPCTarget() {
   // Register the targets
-  RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);  
+  RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
   RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
 }
 
 PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
                                    StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
+                                   CodeGenOpt::Level OL,
                                    bool is64Bit)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS, is64Bit),
     DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
     FrameLowering(Subtarget), JITInfo(*this, is64Bit),
     TLInfo(*this), TSInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()) {
+
+  // The binutils for the BG/P are too old for CFI.
+  if (Subtarget.isBGP())
+    setMCUseCFI(false);
 }
 
-/// Override this for PowerPC.  Tail merging happily breaks up instruction issue
-/// groups, which typically degrades performance.
-bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
+void PPC32TargetMachine::anchor() { }
 
-PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT, 
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
-                                       Reloc::Model RM, CodeModel::Model CM) 
-  : PPCTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+                                       const TargetOptions &Options,
+                                       Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+  : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
 }
 
+void PPC64TargetMachine::anchor() { }
 
-PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT, 
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU,  StringRef FS,
-                                       Reloc::Model RM, CodeModel::Model CM)
-  : PPCTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+                                       const TargetOptions &Options,
+                                       Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+  : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
 }
 
 
@@ -60,33 +70,56 @@ PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
 // Pass Pipeline Configuration
 //===----------------------------------------------------------------------===//
 
-bool PPCTargetMachine::addInstSelector(PassManagerBase &PM,
-                                       CodeGenOpt::Level OptLevel) {
+namespace {
+/// PPC Code Generator Pass Configuration Options.
+class PPCPassConfig : public TargetPassConfig {
+public:
+  PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  PPCTargetMachine &getPPCTargetMachine() const {
+    return getTM<PPCTargetMachine>();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
+  TargetPassConfig *PassConfig = new PPCPassConfig(this, PM);
+
+  // Override this for PowerPC.  Tail merging happily breaks up instruction issue
+  // groups, which typically degrades performance.
+  PassConfig->setEnableTailMerge(false);
+
+  return PassConfig;
+}
+
+bool PPCPassConfig::addInstSelector() {
   // Install an instruction selector.
-  PM.add(createPPCISelDag(*this));
+  PM.add(createPPCISelDag(getPPCTargetMachine()));
   return false;
 }
 
-bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel) {
+bool PPCPassConfig::addPreEmitPass() {
   // Must run branch selection immediately preceding the asm printer.
   PM.add(createPPCBranchSelectionPass());
   return false;
 }
 
 bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel,
                                       JITCodeEmitter &JCE) {
   // FIXME: This should be moved to TargetJITInfo!!
   if (Subtarget.isPPC64())
     // Temporary workaround for the inability of PPC64 JIT to handle jump
     // tables.
-    DisableJumpTables = true;      
-  
+    Options.DisableJumpTables = true;
+
   // Inform the subtarget that we are in JIT mode.  FIXME: does this break macho
   // writing?
   Subtarget.SetJITMode();
-  
+
   // Machine code emitter pass for PowerPC.
   PM.add(createPPCJITCodeEmitterPass(*this, JCE));
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index d06f0843bd6d..7da2b0cb10c1 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=//
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -24,8 +24,6 @@
 #include "llvm/Target/TargetData.h"
 
 namespace llvm {
-class PassManager;
-class GlobalValue;
 
 /// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
 ///
@@ -41,15 +39,16 @@ class PPCTargetMachine : public LLVMTargetMachine {
 
 public:
   PPCTargetMachine(const Target &T, StringRef TT,
-                   StringRef CPU, StringRef FS,
-                   Reloc::Model RM, CodeModel::Model CM, bool is64Bit);
+                   StringRef CPU, StringRef FS, const TargetOptions &Options,
+                   Reloc::Model RM, CodeModel::Model CM,
+                   CodeGenOpt::Level OL, bool is64Bit);
 
   virtual const PPCInstrInfo      *getInstrInfo() const { return &InstrInfo; }
   virtual const PPCFrameLowering  *getFrameLowering() const {
     return &FrameLowering;
   }
   virtual       PPCJITInfo        *getJITInfo()         { return &JITInfo; }
-  virtual const PPCTargetLowering *getTargetLowering() const { 
+  virtual const PPCTargetLowering *getTargetLowering() const {
    return &TLInfo;
   }
   virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
@@ -58,37 +57,39 @@ public:
   virtual const PPCRegisterInfo   *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
   }
-  
+
   virtual const TargetData    *getTargetData() const    { return &DataLayout; }
   virtual const PPCSubtarget  *getSubtargetImpl() const { return &Subtarget; }
-  virtual const InstrItineraryData *getInstrItineraryData() const {  
+  virtual const InstrItineraryData *getInstrItineraryData() const {
     return &InstrItins;
   }
 
   // Pass Pipeline Configuration
-  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+  virtual bool addCodeEmitter(PassManagerBase &PM,
                               JITCodeEmitter &JCE);
-  virtual bool getEnableTailMergeDefault() const;
 };
 
 /// PPC32TargetMachine - PowerPC 32-bit target machine.
 ///
 class PPC32TargetMachine : public PPCTargetMachine {
+  virtual void anchor();
 public:
   PPC32TargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
-                     Reloc::Model RM, CodeModel::Model CM);
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM,
+                     CodeGenOpt::Level OL);
 };
 
 /// PPC64TargetMachine - PowerPC 64-bit target machine.
 ///
 class PPC64TargetMachine : public PPCTargetMachine {
+  virtual void anchor();
 public:
   PPC64TargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
-                     Reloc::Model RM, CodeModel::Model CM);
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM,
+                     CodeGenOpt::Level OL);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
index f63111f465c3..fdb8a62b9d24 100644
--- a/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
+++ b/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMPowerPCInfo
   PowerPCTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMPowerPCInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMPowerPCInfo PowerPCCommonTableGen)
diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..f77d85b15ab9
--- /dev/null
+++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/PowerPC/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCInfo
+parent = PowerPC
+required_libraries = MC Support Target
+add_to_library_groups = PowerPC
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 1f69ffb09c0a..093255e6af2d 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -2,22 +2,6 @@ Target Independent Opportunities:
 
 //===---------------------------------------------------------------------===//
 
-With the recent changes to make the implicit def/use set explicit in
-machineinstrs, we should change the target descriptions for 'call' instructions
-so that the .td files don't list all the call-clobbered registers as implicit
-defs.  Instead, these should be added by the code generator (e.g. on the dag).
-
-This has a number of uses:
-
-1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions
-   for their different impdef sets.
-2. Targets with multiple calling convs (e.g. x86) which have different clobber
-   sets don't need copies of call instructions.
-3. 'Interprocedural register allocation' can be done to reduce the clobber sets
-   of calls.
-
-//===---------------------------------------------------------------------===//
-
 We should recognized various "overflow detection" idioms and translate them into
 llvm.uadd.with.overflow and similar intrinsics.  Here is a multiply idiom:
 
@@ -961,6 +945,25 @@ optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
 
 //===---------------------------------------------------------------------===//
 
+int g(int x) { return (x - 10) < 0; }
+Should combine to "x <= 9" (the sub has nsw).  Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int g(int x) { return (x + 10) < 0; }
+Should combine to "x < -10" (the add has nsw).  Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int f(int i, int j) { return i < j + 1; }
+int g(int i, int j) { return j > i - 1; }
+Should combine to "i <= j" (the add/sub has nsw).  Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
 This was noticed in the entryblock for grokdeclarator in 403.gcc:
 
         %tmp = icmp eq i32 %decl_context, 4          
@@ -2358,3 +2361,8 @@ unsigned foo(unsigned x, unsigned y) { return x > y && x != 0; }
 should fold to x > y.
 
 //===---------------------------------------------------------------------===//
+
+int f(double x) { return __builtin_fabs(x) < 0.0; }
+should fold to false.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index 5b87849b9d17..ae4af0f44250 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -1,11 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS Sparc.td)
 
-llvm_tablegen(SparcGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(SparcGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(SparcGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(SparcGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(SparcGenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(SparcGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM SparcGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM SparcGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM SparcGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM SparcGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM SparcGenCallingConv.inc -gen-callingconv)
 add_public_tablegen_target(SparcCommonTableGen)
 
 add_llvm_target(SparcCodeGen
@@ -16,23 +16,12 @@ add_llvm_target(SparcCodeGen
   SparcISelDAGToDAG.cpp
   SparcISelLowering.cpp
   SparcFrameLowering.cpp
+  SparcMachineFunctionInfo.cpp
   SparcRegisterInfo.cpp
   SparcSubtarget.cpp
   SparcTargetMachine.cpp
   SparcSelectionDAGInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMSparcCodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSparcDesc
-  LLVMSparcInfo
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index dab35e5e4e6f..883aa3a497c4 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -100,7 +100,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
 
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
-    if (I->getDesc().hasDelaySlot()) {
+    if (I->hasDelaySlot()) {
       MachineBasicBlock::iterator D = MBB.end();
       MachineBasicBlock::iterator J = I;
 
@@ -149,7 +149,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
   }
 
   //Call's delay filler can def some of call's uses.
-  if (slot->getDesc().isCall())
+  if (slot->isCall())
     insertCallUses(slot, RegUses);
   else
     insertDefsUses(slot, RegDefs, RegUses);
@@ -170,7 +170,7 @@ Filler::findDelayInstr(MachineBasicBlock &MBB,
     if (I->hasUnmodeledSideEffects()
         || I->isInlineAsm()
         || I->isLabel()
-        || I->getDesc().hasDelaySlot()
+        || I->hasDelaySlot()
         || isDelayFiller(MBB, I))
       break;
 
@@ -194,13 +194,13 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
   if (candidate->isImplicitDef() || candidate->isKill())
     return true;
 
-  if (candidate->getDesc().mayLoad()) {
+  if (candidate->mayLoad()) {
     sawLoad = true;
     if (sawStore)
       return true;
   }
 
-  if (candidate->getDesc().mayStore()) {
+  if (candidate->mayStore()) {
     if (sawStore)
       return true;
     sawStore = true;
@@ -282,7 +282,7 @@ bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
   if (RegSet.count(Reg))
     return true;
   // check Aliased Registers
-  for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+  for (const uint16_t *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
        *Alias; ++ Alias)
     if (RegSet.count(*Alias))
       return true;
@@ -298,13 +298,13 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB,
     return false;
   if (candidate->getOpcode() == SP::UNIMP)
     return true;
-  const MCInstrDesc &prevdesc = (--candidate)->getDesc();
-  return prevdesc.hasDelaySlot();
+  --candidate;
+  return candidate->hasDelaySlot();
 }
 
 bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
 {
-  if (!I->getDesc().isCall())
+  if (!I->isCall())
     return false;
 
   unsigned structSizeOpNum = 0;
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 1423b1e64d66..9a729bd87044 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -59,19 +59,19 @@ FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
 /// registers that correspond to it.
 static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
                              unsigned &OddReg) {
-  static const unsigned EvenHalvesOfPairs[] = {
+  static const uint16_t EvenHalvesOfPairs[] = {
     SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
     SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
   };
-  static const unsigned OddHalvesOfPairs[] = {
+  static const uint16_t OddHalvesOfPairs[] = {
     SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
     SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
   };
-  static const unsigned DoubleRegsInOrder[] = {
+  static const uint16_t DoubleRegsInOrder[] = {
     SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
     SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
   };
-  for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+  for (unsigned i = 0; i < array_lengthof(DoubleRegsInOrder); ++i)
     if (DoubleRegsInOrder[i] == DoubleReg) {
       EvenReg = EvenHalvesOfPairs[i];
       OddReg = OddHalvesOfPairs[i];
diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt
new file mode 100644
index 000000000000..fe20d2f4bd15
--- /dev/null
+++ b/lib/Target/Sparc/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/Sparc/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = Sparc
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = SparcCodeGen
+parent = Sparc
+required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target
+add_to_library_groups = Sparc
diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
index d3bdf0b503ae..9d4db4d25ef7 100644
--- a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
@@ -3,10 +3,4 @@ add_llvm_library(LLVMSparcDesc
   SparcMCAsmInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMSparcDesc
-  LLVMMC
-  LLVMSparcInfo
-  LLVMSupport
-  )
-
 add_dependencies(LLVMSparcDesc SparcCommonTableGen)
diff --git a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..97f8f162c27f
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SparcDesc
+parent = Sparc
+required_libraries = MC SparcInfo Support
+add_to_library_groups = Sparc
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index 6a7e0902354e..f5e10fc3a465 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -16,6 +16,8 @@
 
 using namespace llvm;
 
+void SparcELFMCAsmInfo::anchor() { }
+
 SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
   IsLittleEndian = false;
   Triple TheTriple(TT);
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 0cb6827d2771..f0e1354c212b 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- SparcMCAsmInfo.h - Sparc asm properties -------------*- C++ -*--====//
+//===-- SparcMCAsmInfo.h - Sparc asm properties ----------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,13 +14,15 @@
 #ifndef SPARCTARGETASMINFO_H
 #define SPARCTARGETASMINFO_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
+  class StringRef;
   class Target;
 
-  struct SparcELFMCAsmInfo : public MCAsmInfo {
+  class SparcELFMCAsmInfo : public MCAsmInfo {
+    virtual void anchor();
+  public:
     explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
   };
 
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index cb2a7dfe6160..7fdb0c39285a 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions --------*- C++ -*-===//
+//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,6 +17,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #define GET_INSTRINFO_MC_DESC
@@ -50,9 +51,10 @@ static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
 }
 
 static MCCodeGenInfo *createSparcMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                               CodeModel::Model CM) {
+                                               CodeModel::Model CM,
+                                               CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM);
+  X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index 2fd9e3f4cbd3..cba775adb1a8 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -15,9 +15,7 @@
 #define SPARCMCTARGETDESC_H
 
 namespace llvm {
-class MCSubtargetInfo;
 class Target;
-class StringRef;
 
 extern Target TheSparcTarget;
 extern Target TheSparcV9Target;
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index 7b2c6141dbf8..ce6ae17b6ca2 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -18,7 +18,6 @@
 #include "MCTargetDesc/SparcMCTargetDesc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
-#include <cassert>
 
 namespace llvm {
   class FunctionPass;
@@ -74,7 +73,6 @@ namespace llvm {
   
   inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
     switch (CC) {
-    default: llvm_unreachable("Unknown condition code");
     case SPCC::ICC_NE:  return "ne";
     case SPCC::ICC_E:   return "e";
     case SPCC::ICC_G:   return "g";
@@ -103,7 +101,8 @@ namespace llvm {
     case SPCC::FCC_LE:  return "le";
     case SPCC::FCC_ULE: return "ule";
     case SPCC::FCC_O:   return "o";
-    }       
+    }
+    llvm_unreachable("Invalid cond code");
   }
 }  // end namespace llvm
 #endif
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 764336665d0b..611f8e8129f4 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -1,10 +1,10 @@
-//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===//
-// 
+//===-- Sparc.td - Describe the Sparc Target Machine -------*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 //
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 345e1bca54c6..c14b3d4a0065 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -23,7 +23,6 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -63,6 +62,8 @@ namespace {
     
     virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
                        const;
+
+    virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
   };
 } // end of anonymous namespace
 
@@ -82,7 +83,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
   }
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
-    O << "%" << LowercaseString(getRegisterName(MO.getReg()));
+    O << "%" << StringRef(getRegisterName(MO.getReg())).lower();
     break;
 
   case MachineOperand::MO_Immediate:
@@ -141,13 +142,13 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
   std::string operand = "";
   const MachineOperand &MO = MI->getOperand(opNum);
   switch (MO.getType()) {
-  default: assert(0 && "Operand is not a register ");
+  default: llvm_unreachable("Operand is not a register");
   case MachineOperand::MO_Register:
     assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
            "Operand is not a physical register ");
     assert(MO.getReg() != SP::O7 && 
            "%o7 is assigned as destination for getpcx!");
-    operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
+    operand = "%" + StringRef(getRegisterName(MO.getReg())).lower();
     break;
   }
 
@@ -237,12 +238,19 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   
   // Check if the last terminator is an unconditional branch.
   MachineBasicBlock::const_iterator I = Pred->end();
-  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+  while (I != Pred->begin() && !(--I)->isTerminator())
     ; // Noop
-  return I == Pred->end() || !I->getDesc().isBarrier();
+  return I == Pred->end() || !I->isBarrier();
 }
 
-
+MachineLocation SparcAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+  assert(MI->getNumOperands() == 4 && "Invalid number of operands!");
+  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+         "Unexpected MachineOperand types");
+  return MachineLocation(MI->getOperand(0).getReg(),
+                         MI->getOperand(1).getImm());
+}
 
 // Force static initialization.
 extern "C" void LLVMInitializeSparcAsmPrinter() { 
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index 856f87ad1d37..d4712208126f 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -1,10 +1,10 @@
-//===- SparcCallingConv.td - Calling Conventions Sparc -----*- tablegen -*-===//
-// 
+//===-- SparcCallingConv.td - Calling Conventions Sparc ----*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This describes the calling conventions for the Sparc architectures.
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 320c8ca26d7e..1c5c89e97158 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -1,4 +1,4 @@
-//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====//
+//===-- SparcFrameLowering.cpp - Sparc Frame Information ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 9a2ddc83f5aa..210705e2d47a 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -1,4 +1,4 @@
-//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===//
+//===-- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 8c6103dd8a39..93710c4e0b0f 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -176,7 +176,6 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
                                          MulLHS, MulRHS);
     // The high part is in the Y register.
     return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
-    return NULL;
   }
   }
 
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index d70b16375e95..c3e6f1606794 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -25,7 +25,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
@@ -51,7 +50,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
                                 MVT &LocVT, CCValAssign::LocInfo &LocInfo,
                                 ISD::ArgFlagsTy &ArgFlags, CCState &State)
 {
-  static const unsigned RegList[] = {
+  static const uint16_t RegList[] = {
     SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
   };
   //Try to get first reg
@@ -175,7 +174,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
       SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
       SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
                                 MachinePointerInfo(),
-                                false, false, 0);
+                                false, false, false, 0);
       InVals.push_back(Arg);
       continue;
     }
@@ -197,7 +196,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
           LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
                               MachinePointerInfo(),
-                              false, false, 0);
+                              false, false, false, 0);
         } else {
           unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
                                         &SP::IntRegsRegClass);
@@ -237,7 +236,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
         SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
         SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
                                    MachinePointerInfo(),
-                                   false,false, 0);
+                                   false,false, false, 0);
         InVals.push_back(Load);
         continue;
       }
@@ -248,7 +247,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
       SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
       SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
                                   MachinePointerInfo(),
-                                  false, false, 0);
+                                  false, false, false, 0);
       int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
                                                      Offset+4,
                                                      true);
@@ -256,7 +255,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
 
       SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
                                   MachinePointerInfo(),
-                                  false, false, 0);
+                                  false, false, false, 0);
 
       SDValue WholeValue =
         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
@@ -273,7 +272,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
     if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
       Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
                          MachinePointerInfo(),
-                         false, false, 0);
+                         false, false, false, 0);
     } else {
       ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
       // Sparc is big endian, so add an offset based on the ObjectVT.
@@ -302,11 +301,11 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
 
   // Store remaining ArgRegs to the stack if this is a varargs function.
   if (isVarArg) {
-    static const unsigned ArgRegs[] = {
+    static const uint16_t ArgRegs[] = {
       SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
     };
     unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
-    const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+    const uint16_t *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
     unsigned ArgOffset = CCInfo.getNextStackOffset();
     if (NumAllocated == 6)
       ArgOffset += StackOffset;
@@ -348,7 +347,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
 SDValue
 SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                CallingConv::ID CallConv, bool isVarArg,
-                               bool &isTailCall,
+                               bool doesNotRet, bool &isTailCall,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<SDValue> &OutVals,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -467,13 +466,13 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                    false, false, 0);
       // Sparc is big-endian, so the high part comes first.
       SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
-                               MachinePointerInfo(), false, false, 0);
+                               MachinePointerInfo(), false, false, false, 0);
       // Increment the pointer to the other half.
       StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                              DAG.getIntPtrConstant(4));
       // Load the low part.
       SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
-                               MachinePointerInfo(), false, false, 0);
+                               MachinePointerInfo(), false, false, false, 0);
 
       if (VA.isRegLoc()) {
         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
@@ -763,7 +762,9 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FMA  , MVT::f32, Expand);
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
   setOperationAction(ISD::ROTL , MVT::i32, Expand);
   setOperationAction(ISD::ROTR , MVT::i32, Expand);
   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
@@ -831,22 +832,19 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
 /// be zero. Op is expected to be a target specific node. Used by DAG
 /// combiner.
 void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                         const APInt &Mask,
                                                          APInt &KnownZero,
                                                          APInt &KnownOne,
                                                          const SelectionDAG &DAG,
                                                          unsigned Depth) const {
   APInt KnownZero2, KnownOne2;
-  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);   // Don't know anything.
+  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
 
   switch (Op.getOpcode()) {
   default: break;
   case SPISD::SELECT_ICC:
   case SPISD::SELECT_FCC:
-    DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne,
-                          Depth+1);
-    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2,
-                          Depth+1);
+    DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+    DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
     assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
     assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
 
@@ -897,7 +895,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
   SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 GlobalBase, RelAddr);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     AbsAddr, MachinePointerInfo(), false, false, 0);
+                     AbsAddr, MachinePointerInfo(), false, false, false, 0);
 }
 
 SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
@@ -918,7 +916,7 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
   SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
                                 GlobalBase, RelAddr);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     AbsAddr, MachinePointerInfo(), false, false, 0);
+                     AbsAddr, MachinePointerInfo(), false, false, false, 0);
 }
 
 static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
@@ -1026,7 +1024,7 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   DebugLoc dl = Node->getDebugLoc();
   SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
-                               MachinePointerInfo(SV), false, false, 0);
+                               MachinePointerInfo(SV), false, false, false, 0);
   // Increment the pointer, VAList, to the next vaarg
   SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
                                   DAG.getConstant(VT.getSizeInBits()/8,
@@ -1038,11 +1036,11 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
   // f64 load.
   if (VT != MVT::f64)
     return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
-                       false, false, 0);
+                       false, false, false, 0);
 
   // Otherwise, load it as i64, then do a bitconvert.
   SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
-                          false, false, 0);
+                          false, false, false, 0);
 
   // Bit-Convert the value to f64.
   SDValue Ops[2] = {
@@ -1103,7 +1101,7 @@ static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
       FrameAddr = DAG.getLoad(MVT::i32, dl,
                               Chain,
                               Ptr,
-                              MachinePointerInfo(), false, false, 0);
+                              MachinePointerInfo(), false, false, false, 0);
     }
   }
   return FrameAddr;
@@ -1135,7 +1133,7 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
       RetAddr = DAG.getLoad(MVT::i32, dl,
                             Chain,
                             Ptr,
-                            MachinePointerInfo(), false, false, 0);
+                            MachinePointerInfo(), false, false, false, 0);
     }
   }
   return RetAddr;
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 8a1886a856e0..cf430485cfec 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -15,8 +15,8 @@
 #ifndef SPARC_ISELLOWERING_H
 #define SPARC_ISELLOWERING_H
 
-#include "llvm/Target/TargetLowering.h"
 #include "Sparc.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   namespace SPISD {
@@ -50,7 +50,6 @@ namespace llvm {
     /// in Mask are known to be either zero or one and return them in the
     /// KnownZero/KnownOne bitsets.
     virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-                                                const APInt &Mask,
                                                 APInt &KnownZero,
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
@@ -77,9 +76,8 @@ namespace llvm {
                            SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg,
-                bool &isTailCall,
+      LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+                bool isVarArg, bool doesNotRet, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<SDValue> &OutVals,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/Sparc/SparcInstrFormats.td b/lib/Target/Sparc/SparcInstrFormats.td
index 6535259e16ff..dce331228b8f 100644
--- a/lib/Target/Sparc/SparcInstrFormats.td
+++ b/lib/Target/Sparc/SparcInstrFormats.td
@@ -1,10 +1,10 @@
-//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===//
-// 
+//===-- SparcInstrFormats.td - Sparc Instruction Formats ---*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 7a6bf50fa7d4..faff468a587d 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===//
+//===-- SparcInstrInfo.cpp - Sparc Instruction Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -79,7 +79,6 @@ static bool IsIntegerCC(unsigned CC)
 static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
 {
   switch(CC) {
-  default: llvm_unreachable("Unknown condition code");
   case SPCC::ICC_NE:   return SPCC::ICC_E;
   case SPCC::ICC_E:    return SPCC::ICC_NE;
   case SPCC::ICC_G:    return SPCC::ICC_LE;
@@ -110,6 +109,18 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
   case SPCC::FCC_NE:   return SPCC::FCC_E;
   case SPCC::FCC_E:    return SPCC::FCC_NE;
   }
+  llvm_unreachable("Invalid cond code");
+}
+
+MachineInstr *
+SparcInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                         int FrameIx,
+                                         uint64_t Offset,
+                                         const MDNode *MDPtr,
+                                         DebugLoc dl) const {
+  MachineInstrBuilder MIB = BuildMI(MF, dl, get(SP::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
 }
 
 
@@ -133,7 +144,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       break;
 
     //Terminator is not a branch
-    if (!I->getDesc().isBranch())
+    if (!I->isBranch())
       return true;
 
     //Handle Unconditional branches
@@ -195,7 +206,7 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
           .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
         BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
           .addMBB(TargetBB);
-        MBB.addSuccessor(TargetBB);
+
         OldInst->eraseFromParent();
         UnCondBrIter->eraseFromParent();
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index eda64efb7a03..204f69855c23 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -1,4 +1,4 @@
-//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===//
+//===-- SparcInstrInfo.h - Sparc Instruction Information --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,8 +14,8 @@
 #ifndef SPARCINSTRUCTIONINFO_H
 #define SPARCINSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "SparcRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "SparcGenInstrInfo.inc"
@@ -62,6 +62,13 @@ public:
   virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
                                       int &FrameIndex) const;
 
+  /// emitFrameIndexDebugValue - Emit a target-dependent form of
+  /// DBG_VALUE encoding the address of a frame index.
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc dl) const;
 
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                              MachineBasicBlock *&FBB,
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index cf5c48fd18d9..15541ef2f837 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -1,10 +1,10 @@
-//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===//
-// 
+//===-- SparcInstrInfo.td - Target Description for Sparc Target -----------===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the Sparc instructions in TableGen format.
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.cpp b/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..e7442826e78b
--- /dev/null
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- SparcMachineFunctionInfo.cpp - Sparc Machine Function Info --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void SparcMachineFunctionInfo::anchor() { }
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index 0b74308eb0ee..90c27a4459a1 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -18,6 +18,7 @@
 namespace llvm {
 
   class SparcMachineFunctionInfo : public MachineFunctionInfo {
+    virtual void anchor();
   private:
     unsigned GlobalBaseReg;
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 8c1625148c8c..63574681b085 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===//
+//===-- SparcRegisterInfo.cpp - SPARC Register Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,15 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Sparc.h"
 #include "SparcRegisterInfo.h"
+#include "Sparc.h"
 #include "SparcSubtarget.h"
+#include "llvm/Type.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 
@@ -33,9 +33,9 @@ SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
   : SparcGenRegisterInfo(SP::I7), Subtarget(st), TII(tii) {
 }
 
-const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
                                                                          const {
-  static const unsigned CalleeSavedRegs[] = { 0 };
+  static const uint16_t CalleeSavedRegs[] = { 0 };
   return CalleeSavedRegs;
 }
 
@@ -118,10 +118,8 @@ unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 
 unsigned SparcRegisterInfo::getEHExceptionRegister() const {
   llvm_unreachable("What is the exception register");
-  return 0;
 }
 
 unsigned SparcRegisterInfo::getEHHandlerRegister() const {
   llvm_unreachable("What is the exception handler register");
-  return 0;
 }
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index f845667b4d9c..9515ad33dcc2 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===//
+//===-- SparcRegisterInfo.h - Sparc Register Information Impl ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -32,7 +32,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
   SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
 
   /// Code Generation virtual methods...
-  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+  const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index cf928293c169..81bff6c51c9d 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -1,10 +1,10 @@
-//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===//
-// 
+//===-- SparcRegisterInfo.td - Sparc Register defs ---------*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -39,6 +39,7 @@ class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
   let Num = num;
   let SubRegs = subregs;
   let SubRegIndices = [sub_even, sub_odd];
+  let CoveredBySubRegs = 1;
 }
 
 // Control Registers
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index 6c501cff6a3a..e5b2aeb1bb85 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===//
+//===-- SparcSubtarget.cpp - SPARC Subtarget Information ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,6 +21,8 @@
 
 using namespace llvm;
 
+void SparcSubtarget::anchor() { }
+
 SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
                                const std::string &FS,  bool is64Bit) :
   SparcGenSubtargetInfo(TT, CPU, FS),
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 00a04c3bea57..a81931b34aa2 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====//
+//===-- SparcSubtarget.h - Define Subtarget for the SPARC -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -24,6 +24,7 @@ namespace llvm {
 class StringRef;
 
 class SparcSubtarget : public SparcGenSubtargetInfo {
+  virtual void anchor();
   bool IsV9;
   bool V8DeprecatedInsts;
   bool IsVIS;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 3d7b4a47d1a8..6f313562c101 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -10,9 +10,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "Sparc.h"
 #include "SparcTargetMachine.h"
+#include "Sparc.h"
 #include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -24,43 +25,73 @@ extern "C" void LLVMInitializeSparcTarget() {
 
 /// SparcTargetMachine ctor - Create an ILP32 architecture model
 ///
-SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, 
+SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
                                        Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL,
                                        bool is64bit)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS, is64bit),
     DataLayout(Subtarget.getDataLayout()),
     TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
     FrameLowering(Subtarget) {
 }
 
-bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
-                                         CodeGenOpt::Level OptLevel) {
-  PM.add(createSparcISelDag(*this));
+namespace {
+/// Sparc Code Generator Pass Configuration Options.
+class SparcPassConfig : public TargetPassConfig {
+public:
+  SparcPassConfig(SparcTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  SparcTargetMachine &getSparcTargetMachine() const {
+    return getTM<SparcTargetMachine>();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *SparcTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new SparcPassConfig(this, PM);
+}
+
+bool SparcPassConfig::addInstSelector() {
+  PM.add(createSparcISelDag(getSparcTargetMachine()));
   return false;
 }
 
 /// addPreEmitPass - This pass may be implemented by targets that want to run
 /// passes immediately before machine code is emitted.  This should return
 /// true if -print-machineinstrs should print out the code after the passes.
-bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
-                                        CodeGenOpt::Level OptLevel){
-  PM.add(createSparcFPMoverPass(*this));
-  PM.add(createSparcDelaySlotFillerPass(*this));
+bool SparcPassConfig::addPreEmitPass(){
+  PM.add(createSparcFPMoverPass(getSparcTargetMachine()));
+  PM.add(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
   return true;
 }
 
+void SparcV8TargetMachine::anchor() { }
+
 SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
                                            StringRef TT, StringRef CPU,
-                                           StringRef FS, Reloc::Model RM,
-                                           CodeModel::Model CM)
-  : SparcTargetMachine(T, TT, CPU, FS, RM, CM, false) {
+                                           StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM,
+                                           CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+  : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
 }
 
-SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, 
+void SparcV9TargetMachine::anchor() { }
+
+SparcV9TargetMachine::SparcV9TargetMachine(const Target &T,
                                            StringRef TT,  StringRef CPU,
-                                           StringRef FS, Reloc::Model RM,
-                                           CodeModel::Model CM)
-  : SparcTargetMachine(T, TT, CPU, FS, RM, CM, true) {
+                                           StringRef FS,
+                                           const TargetOptions &Options,
+                                           Reloc::Model RM,
+                                           CodeModel::Model CM,
+                                           CodeGenOpt::Level OL)
+  : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
 }
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 3c907dd44de1..b203dfa48921 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -34,8 +34,9 @@ class SparcTargetMachine : public LLVMTargetMachine {
   SparcFrameLowering FrameLowering;
 public:
   SparcTargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
-                     Reloc::Model RM, CodeModel::Model CM, bool is64bit);
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM,
+                     CodeGenOpt::Level OL, bool is64bit);
 
   virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameLowering  *getFrameLowering() const {
@@ -54,26 +55,31 @@ public:
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
 
   // Pass Pipeline Configuration
-  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 };
 
 /// SparcV8TargetMachine - Sparc 32-bit target machine
 ///
 class SparcV8TargetMachine : public SparcTargetMachine {
+  virtual void anchor();
 public:
   SparcV8TargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS,
-                       Reloc::Model RM, CodeModel::Model CM);
+                       const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
 };
 
 /// SparcV9TargetMachine - Sparc 64-bit target machine
 ///
 class SparcV9TargetMachine : public SparcTargetMachine {
+  virtual void anchor();
 public:
   SparcV9TargetMachine(const Target &T, StringRef TT,
                        StringRef CPU, StringRef FS,
-                       Reloc::Model RM, CodeModel::Model CM);
+                       const TargetOptions &Options,
+                       Reloc::Model RM, CodeModel::Model CM,
+                       CodeGenOpt::Level OL);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
index a0760231386a..b0d031e0c2be 100644
--- a/lib/Target/Sparc/TargetInfo/CMakeLists.txt
+++ b/lib/Target/Sparc/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMSparcInfo
   SparcTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMSparcInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMSparcInfo SparcCommonTableGen)
diff --git a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..b5c320f92553
--- /dev/null
+++ b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Sparc/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = SparcInfo
+parent = Sparc
+required_libraries = MC Support Target
+add_to_library_groups = Sparc
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
deleted file mode 100644
index 7c09c0ea7b5a..000000000000
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS SystemZ.td)
-
-llvm_tablegen(SystemZGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(SystemZGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(SystemZGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(SystemZGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(SystemZGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(SystemZGenSubtargetInfo.inc -gen-subtarget)
-add_public_tablegen_target(SystemZCommonTableGen)
-
-add_llvm_target(SystemZCodeGen
-  SystemZAsmPrinter.cpp
-  SystemZISelDAGToDAG.cpp
-  SystemZISelLowering.cpp
-  SystemZInstrInfo.cpp
-  SystemZFrameLowering.cpp
-  SystemZRegisterInfo.cpp
-  SystemZSubtarget.cpp
-  SystemZTargetMachine.cpp
-  SystemZSelectionDAGInfo.cpp
-  )
-
-add_llvm_library_dependencies(LLVMSystemZCodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMSystemZDesc
-  LLVMSystemZInfo
-  LLVMTarget
-  )
-
-add_subdirectory(TargetInfo)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index 822df097a37d..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-add_llvm_library(LLVMSystemZDesc
-  SystemZMCTargetDesc.cpp
-  SystemZMCAsmInfo.cpp
-  )
-
-add_llvm_library_dependencies(LLVMSystemZDesc
-  LLVMMC
-  LLVMSystemZInfo
-  )
-
-add_dependencies(LLVMSystemZDesc SystemZCommonTableGen)
-
-# Hack: we need to include 'main' target directory to grab private headers
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/SystemZ/MCTargetDesc/Makefile b/lib/Target/SystemZ/MCTargetDesc/Makefile
deleted file mode 100644
index 08f1a9d51fb5..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZDesc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
deleted file mode 100644
index 8540546b62d3..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the SystemZMCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZMCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Support/ELF.h"
-using namespace llvm;
-
-SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
-  IsLittleEndian = false;
-  PointerSize = 8;
-  PrivateGlobalPrefix = ".L";
-  WeakRefDirective = "\t.weak\t";
-  PCSymbol = ".";
-}
-
-const MCSection *SystemZMCAsmInfo::
-getNonexecutableStackSection(MCContext &Ctx) const{
-  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
-                           0, SectionKind::getMetadata());
-}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
deleted file mode 100644
index a6a27e2f4b6d..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
+++ /dev/null
@@ -1,30 +0,0 @@
-//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the SystemZMCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SystemZTARGETASMINFO_H
-#define SystemZTARGETASMINFO_H
-
-#include "llvm/MC/MCAsmInfo.h"
-
-namespace llvm {
-  class Target;
-  class StringRef;
-
-  struct SystemZMCAsmInfo : public MCAsmInfo {
-    explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
-    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
-  };
-  
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
deleted file mode 100644
index 23fb1e068e70..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ /dev/null
@@ -1,81 +0,0 @@
-//===-- SystemZMCTargetDesc.cpp - SystemZ Target Descriptions ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides SystemZ specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZMCTargetDesc.h"
-#include "SystemZMCAsmInfo.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "SystemZGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "SystemZGenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "SystemZGenRegisterInfo.inc"
-
-using namespace llvm;
-
-static MCInstrInfo *createSystemZMCInstrInfo() {
-  MCInstrInfo *X = new MCInstrInfo();
-  InitSystemZMCInstrInfo(X);
-  return X;
-}
-
-static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) {
-  MCRegisterInfo *X = new MCRegisterInfo();
-  InitSystemZMCRegisterInfo(X, 0);
-  return X;
-}
-
-static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
-                                                     StringRef CPU,
-                                                     StringRef FS) {
-  MCSubtargetInfo *X = new MCSubtargetInfo();
-  InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
-  return X;
-}
-
-static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                                 CodeModel::Model CM) {
-  MCCodeGenInfo *X = new MCCodeGenInfo();
-  if (RM == Reloc::Default)
-    RM = Reloc::Static;
-  X->InitMCCodeGenInfo(RM, CM);
-  return X;
-}
-
-extern "C" void LLVMInitializeSystemZTargetMC() {
-  // Register the MC asm info.
-  RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget);
-
-  // Register the MC codegen info.
-  TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget,
-                                        createSystemZMCCodeGenInfo);
-
-  // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
-                                      createSystemZMCInstrInfo);
-
-  // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheSystemZTarget,
-                                    createSystemZMCRegisterInfo);
-
-  // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
-                                          createSystemZMCSubtargetInfo);
-}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
deleted file mode 100644
index e2ad5afd6e57..000000000000
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===-- SystemZMCTargetDesc.h - SystemZ Target Descriptions -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides SystemZ specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SYSTEMZMCTARGETDESC_H
-#define SYSTEMZMCTARGETDESC_H
-
-namespace llvm {
-class MCSubtargetInfo;
-class Target;
-class StringRef;
-
-extern Target TheSystemZTarget;
-
-} // End llvm namespace
-
-// Defines symbolic names for SystemZ registers.
-// This defines a mapping from register name to register number.
-#define GET_REGINFO_ENUM
-#include "SystemZGenRegisterInfo.inc"
-
-// Defines symbolic names for the SystemZ instructions.
-#define GET_INSTRINFO_ENUM
-#include "SystemZGenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "SystemZGenSubtargetInfo.inc"
-
-#endif
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
deleted file mode 100644
index 6356491debeb..000000000000
--- a/lib/Target/SystemZ/Makefile
+++ /dev/null
@@ -1,22 +0,0 @@
-##===- lib/Target/SystemZ/Makefile ---------------------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMSystemZCodeGen
-TARGET = SystemZ
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = SystemZGenRegisterInfo.inc SystemZGenInstrInfo.inc \
-		SystemZGenAsmWriter.inc SystemZGenDAGISel.inc \
-		SystemZGenSubtargetInfo.inc SystemZGenCallingConv.inc
-
-DIRS = TargetInfo MCTargetDesc
-
-include $(LEVEL)/Makefile.common
-
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
deleted file mode 100644
index 88960b9cc601..000000000000
--- a/lib/Target/SystemZ/SystemZ.h
+++ /dev/null
@@ -1,52 +0,0 @@
-//=-- SystemZ.h - Top-level interface for SystemZ representation -*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in
-// the LLVM SystemZ backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_SystemZ_H
-#define LLVM_TARGET_SystemZ_H
-
-#include "MCTargetDesc/SystemZMCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-  class SystemZTargetMachine;
-  class FunctionPass;
-  class formatted_raw_ostream;
-
-  namespace SystemZCC {
-    // SystemZ specific condition code. These correspond to SYSTEMZ_*_COND in
-    // SystemZInstrInfo.td. They must be kept in synch.
-    enum CondCodes {
-      O   = 0,
-      H   = 1,
-      NLE = 2,
-      L   = 3,
-      NHE = 4,
-      LH  = 5,
-      NE  = 6,
-      E   = 7,
-      NLH = 8,
-      HE  = 9,
-      NL  = 10,
-      LE  = 11,
-      NH  = 12,
-      NO  = 13,
-      INVALID = -1
-    };
-  }
-
-  FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
-                                    CodeGenOpt::Level OptLevel);
-
-} // end namespace llvm;
-#endif
diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td
deleted file mode 100644
index 4c08c087225e..000000000000
--- a/lib/Target/SystemZ/SystemZ.td
+++ /dev/null
@@ -1,61 +0,0 @@
-//===- SystemZ.td - Describe the SystemZ Target Machine ------*- tblgen -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This is the top level entry point for the SystemZ target.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-//===----------------------------------------------------------------------===//
-// Subtarget Features. 
-//===----------------------------------------------------------------------===//
-def FeatureZ10 : SubtargetFeature<"z10", "HasZ10Insts", "true",
-                                  "Support Z10 instructions">;
-
-//===----------------------------------------------------------------------===//
-// SystemZ supported processors.
-//===----------------------------------------------------------------------===//
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
-
-def : Proc<"z9",  []>;
-def : Proc<"z10", [FeatureZ10]>;
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "SystemZRegisterInfo.td"
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Description
-//===----------------------------------------------------------------------===//
-
-include "SystemZCallingConv.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "SystemZInstrInfo.td"
-include "SystemZInstrFP.td"
-
-def SystemZInstrInfo : InstrInfo {} 
-
-//===----------------------------------------------------------------------===//
-// Target Declaration
-//===----------------------------------------------------------------------===//
-
-def SystemZ : Target {
-  let InstructionSet = SystemZInstrInfo;
-}
-
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
deleted file mode 100644
index 43dcdfc3936b..000000000000
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ /dev/null
@@ -1,221 +0,0 @@
-//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to the SystemZ assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "SystemZ.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class SystemZAsmPrinter : public AsmPrinter {
-  public:
-    SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {}
-
-    virtual const char *getPassName() const {
-      return "SystemZ Assembly Printer";
-    }
-
-    void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
-                      const char* Modifier = 0);
-    void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
-    void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
-                            const char* Modifier = 0);
-    void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
-                             const char* Modifier = 0);
-    void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
-      O << (int16_t)MI->getOperand(OpNum).getImm();
-    }
-    void printU16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
-      O << (uint16_t)MI->getOperand(OpNum).getImm();
-    }
-    void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
-      O << (int32_t)MI->getOperand(OpNum).getImm();
-    }
-    void printU32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
-      O << (uint32_t)MI->getOperand(OpNum).getImm();
-    }
-
-    void printInstruction(const MachineInstr *MI, raw_ostream &O);
-    static const char *getRegisterName(unsigned RegNo);
-
-    void EmitInstruction(const MachineInstr *MI);
-  };
-} // end of anonymous namespace
-
-#include "SystemZGenAsmWriter.inc"
-
-void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  SmallString<128> Str;
-  raw_svector_ostream OS(Str);
-  printInstruction(MI, OS);
-  OutStreamer.EmitRawText(OS.str());
-}
-
-void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum,
-                                             raw_ostream &O) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  switch (MO.getType()) {
-  case MachineOperand::MO_Immediate:
-    O << MO.getImm();
-    return;
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_GlobalAddress: {
-    const GlobalValue *GV = MO.getGlobal();
-    O << *Mang->getSymbol(GV);
-
-    // Assemble calls via PLT for externally visible symbols if PIC.
-    if (TM.getRelocationModel() == Reloc::PIC_ &&
-        !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
-        !GV->hasLocalLinkage())
-      O << "@PLT";
-
-    printOffset(MO.getOffset(), O);
-    return;
-  }
-  case MachineOperand::MO_ExternalSymbol: {
-    std::string Name(MAI->getGlobalPrefix());
-    Name += MO.getSymbolName();
-    O << Name;
-
-    if (TM.getRelocationModel() == Reloc::PIC_)
-      O << "@PLT";
-
-    return;
-  }
-  default:
-    assert(0 && "Not implemented yet!");
-  }
-}
-
-
-void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
-                                     raw_ostream &O, const char *Modifier) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  switch (MO.getType()) {
-  case MachineOperand::MO_Register: {
-    assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
-            "Virtual registers should be already mapped!");
-    unsigned Reg = MO.getReg();
-    if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
-      if (strncmp(Modifier + 7, "even", 4) == 0)
-        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit);
-      else if (strncmp(Modifier + 7, "odd", 3) == 0)
-        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
-      else
-        assert(0 && "Invalid subreg modifier");
-    }
-
-    O << '%' << getRegisterName(Reg);
-    return;
-  }
-  case MachineOperand::MO_Immediate:
-    O << MO.getImm();
-    return;
-  case MachineOperand::MO_MachineBasicBlock:
-    O << *MO.getMBB()->getSymbol();
-    return;
-  case MachineOperand::MO_JumpTableIndex:
-    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
-      << MO.getIndex();
-
-    return;
-  case MachineOperand::MO_ConstantPoolIndex:
-    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
-      << MO.getIndex();
-
-    printOffset(MO.getOffset(), O);
-    break;
-  case MachineOperand::MO_GlobalAddress:
-    O << *Mang->getSymbol(MO.getGlobal());
-    break;
-  case MachineOperand::MO_ExternalSymbol: {
-    O << *GetExternalSymbolSymbol(MO.getSymbolName());
-    break;
-  }
-  default:
-    assert(0 && "Not implemented yet!");
-  }
-
-  switch (MO.getTargetFlags()) {
-  default: assert(0 && "Unknown target flag on GV operand");
-  case SystemZII::MO_NO_FLAG:
-    break;
-  case SystemZII::MO_GOTENT:    O << "@GOTENT";    break;
-  case SystemZII::MO_PLT:       O << "@PLT";       break;
-  }
-
-  printOffset(MO.getOffset(), O);
-}
-
-void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum,
-                                           raw_ostream &O,
-                                           const char *Modifier) {
-  const MachineOperand &Base = MI->getOperand(OpNum);
-
-  // Print displacement operand.
-  printOperand(MI, OpNum+1, O);
-
-  // Print base operand (if any)
-  if (Base.getReg()) {
-    O << '(';
-    printOperand(MI, OpNum, O);
-    O << ')';
-  }
-}
-
-void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum,
-                                            raw_ostream &O,
-                                            const char *Modifier) {
-  const MachineOperand &Base = MI->getOperand(OpNum);
-  const MachineOperand &Index = MI->getOperand(OpNum+2);
-
-  // Print displacement operand.
-  printOperand(MI, OpNum+1, O);
-
-  // Print base operand (if any)
-  if (Base.getReg()) {
-    O << '(';
-    printOperand(MI, OpNum, O);
-    if (Index.getReg()) {
-      O << ',';
-      printOperand(MI, OpNum+2, O);
-    }
-    O << ')';
-  } else
-    assert(!Index.getReg() && "Should allocate base register first!");
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeSystemZAsmPrinter() {
-  RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
-}
diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td
deleted file mode 100644
index c799a9e501aa..000000000000
--- a/lib/Target/SystemZ/SystemZCallingConv.td
+++ /dev/null
@@ -1,46 +0,0 @@
-//=- SystemZCallingConv.td - Calling Conventions for SystemZ -*- tablegen -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This describes the calling conventions for SystemZ architecture.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// SystemZ Return Value Calling Convention
-//===----------------------------------------------------------------------===//
-def RetCC_SystemZ : CallingConv<[
-  // Promote i8/i16/i32 arguments to i64.
-  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
-
-  // i64 is returned in register R2
-  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
-
-  // f32 / f64 are returned in F0
-  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
-  CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>
-]>;
-
-//===----------------------------------------------------------------------===//
-// SystemZ Argument Calling Conventions
-//===----------------------------------------------------------------------===//
-def CC_SystemZ : CallingConv<[
-  // Promote i8/i16/i32 arguments to i64.
-  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
-
-  // The first 5 integer arguments of non-varargs functions are passed in
-  // integer registers.
-  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
-
-  // The first 4 floating point arguments of non-varargs functions are passed
-  // in FP registers.
-  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
-  CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>,
-
-  // Integer values get stored in stack slots that are 8 bytes in
-  // size and 8-byte aligned.
-  CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
-]>;
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
deleted file mode 100644
index 2ad84a2d052e..000000000000
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ /dev/null
@@ -1,386 +0,0 @@
-//=====- SystemZFrameLowering.cpp - SystemZ Frame Information ------*- C++ -*-====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SystemZ implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZFrameLowering.h"
-#include "SystemZInstrBuilder.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZMachineFunctionInfo.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-SystemZFrameLowering::SystemZFrameLowering(const SystemZSubtarget &sti)
-  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, -160), STI(sti) {
-  // Fill the spill offsets map
-  static const unsigned SpillOffsTab[][2] = {
-    { SystemZ::R2D,  0x10 },
-    { SystemZ::R3D,  0x18 },
-    { SystemZ::R4D,  0x20 },
-    { SystemZ::R5D,  0x28 },
-    { SystemZ::R6D,  0x30 },
-    { SystemZ::R7D,  0x38 },
-    { SystemZ::R8D,  0x40 },
-    { SystemZ::R9D,  0x48 },
-    { SystemZ::R10D, 0x50 },
-    { SystemZ::R11D, 0x58 },
-    { SystemZ::R12D, 0x60 },
-    { SystemZ::R13D, 0x68 },
-    { SystemZ::R14D, 0x70 },
-    { SystemZ::R15D, 0x78 }
-  };
-
-  RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
-
-  for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
-    RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
-}
-
-/// needsFP - Return true if the specified function should have a dedicated
-/// frame pointer register.  This is true if the function has variable sized
-/// allocas or if frame pointer elimination is disabled.
-bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
-}
-
-/// emitSPUpdate - Emit a series of instructions to increment / decrement the
-/// stack pointer by a constant value.
-static
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-                  int64_t NumBytes, const TargetInstrInfo &TII) {
-  unsigned Opc; uint64_t Chunk;
-  bool isSub = NumBytes < 0;
-  uint64_t Offset = isSub ? -NumBytes : NumBytes;
-
-  if (Offset >= (1LL << 15) - 1) {
-    Opc = SystemZ::ADD64ri32;
-    Chunk = (1LL << 31) - 1;
-  } else {
-    Opc = SystemZ::ADD64ri16;
-    Chunk = (1LL << 15) - 1;
-  }
-
-  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  while (Offset) {
-    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
-    MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
-      .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
-    // The PSW implicit def is dead.
-    MI->getOperand(3).setIsDead();
-    Offset -= ThisVal;
-  }
-}
-
-void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
-  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const SystemZInstrInfo &TII =
-    *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
-  SystemZMachineFunctionInfo *SystemZMFI =
-    MF.getInfo<SystemZMachineFunctionInfo>();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-
-  // Get the number of bytes to allocate from the FrameInfo.
-  // Note that area for callee-saved stuff is already allocated, thus we need to
-  // 'undo' the stack movement.
-  uint64_t StackSize = MFI->getStackSize();
-  StackSize -= SystemZMFI->getCalleeSavedFrameSize();
-
-  uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
-
-  // Skip the callee-saved push instructions.
-  while (MBBI != MBB.end() &&
-         (MBBI->getOpcode() == SystemZ::MOV64mr ||
-          MBBI->getOpcode() == SystemZ::MOV64mrm))
-    ++MBBI;
-
-  if (MBBI != MBB.end())
-    DL = MBBI->getDebugLoc();
-
-  // adjust stack pointer: R15 -= numbytes
-  if (StackSize || MFI->hasCalls()) {
-    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
-           "Invalid stack frame calculation!");
-    emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
-  }
-
-  if (hasFP(MF)) {
-    // Update R11 with the new base value...
-    BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
-      .addReg(SystemZ::R15D);
-
-    // Mark the FramePtr as live-in in every block except the entry.
-    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
-         I != E; ++I)
-      I->addLiveIn(SystemZ::R11D);
-
-  }
-}
-
-void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
-                                    MachineBasicBlock &MBB) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  const SystemZInstrInfo &TII =
-    *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
-  SystemZMachineFunctionInfo *SystemZMFI =
-    MF.getInfo<SystemZMachineFunctionInfo>();
-  unsigned RetOpcode = MBBI->getOpcode();
-
-  switch (RetOpcode) {
-  case SystemZ::RET: break;  // These are ok
-  default:
-    assert(0 && "Can only insert epilog into returning blocks");
-  }
-
-  // Get the number of bytes to allocate from the FrameInfo
-  // Note that area for callee-saved stuff is already allocated, thus we need to
-  // 'undo' the stack movement.
-  uint64_t StackSize =
-    MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
-  uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
-
-  // Skip the final terminator instruction.
-  while (MBBI != MBB.begin()) {
-    MachineBasicBlock::iterator PI = prior(MBBI);
-    --MBBI;
-    if (!PI->getDesc().isTerminator())
-      break;
-  }
-
-  // During callee-saved restores emission stack frame was not yet finialized
-  // (and thus - the stack size was unknown). Tune the offset having full stack
-  // size in hands.
-  if (StackSize || MFI->hasCalls()) {
-    assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
-            MBBI->getOpcode() == SystemZ::MOV64rm) &&
-           "Expected to see callee-save register restore code");
-    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
-           "Invalid stack frame calculation!");
-
-    unsigned i = 0;
-    MachineInstr &MI = *MBBI;
-    while (!MI.getOperand(i).isImm()) {
-      ++i;
-      assert(i < MI.getNumOperands() && "Unexpected restore code!");
-    }
-
-    uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
-    // If Offset does not fit into 20-bit signed displacement field we need to
-    // emit some additional code...
-    if (Offset > 524287) {
-      // Fold the displacement into load instruction as much as possible.
-      NumBytes = Offset - 524287;
-      Offset = 524287;
-      emitSPUpdate(MBB, MBBI, NumBytes, TII);
-    }
-
-    MI.getOperand(i).ChangeToImmediate(Offset);
-  }
-}
-
-int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                          int FI) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const SystemZMachineFunctionInfo *SystemZMFI =
-    MF.getInfo<SystemZMachineFunctionInfo>();
-  int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
-  uint64_t StackSize = MFI->getStackSize();
-
-  // Fixed objects are really located in the "previous" frame.
-  if (FI < 0)
-    StackSize -= SystemZMFI->getCalleeSavedFrameSize();
-
-  Offset += StackSize - getOffsetOfLocalArea();
-
-  // Skip the register save area if we generated the stack frame.
-  if (StackSize || MFI->hasCalls())
-    Offset -= getOffsetOfLocalArea();
-
-  return Offset;
-}
-
-bool
-SystemZFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                            MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
-  unsigned CalleeFrameSize = 0;
-
-  // Scan the callee-saved and find the bounds of register spill area.
-  unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (!SystemZ::FP64RegClass.contains(Reg)) {
-      unsigned Offset = RegSpillOffsets[Reg];
-      CalleeFrameSize += 8;
-      if (StartOffset > Offset) {
-        LowReg = Reg; StartOffset = Offset;
-      }
-      if (EndOffset < Offset) {
-        HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
-      }
-    }
-  }
-
-  // Save information for epilogue inserter.
-  MFI->setCalleeSavedFrameSize(CalleeFrameSize);
-  MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
-
-  // Save GPRs
-  if (StartOffset) {
-    // Build a store instruction. Use STORE MULTIPLE instruction if there are many
-    // registers to store, otherwise - just STORE.
-    MachineInstrBuilder MIB =
-      BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
-                                    SystemZ::MOV64mr : SystemZ::MOV64mrm)));
-
-    // Add store operands.
-    MIB.addReg(SystemZ::R15D).addImm(StartOffset);
-    if (LowReg == HighReg)
-      MIB.addReg(0);
-    MIB.addReg(LowReg, RegState::Kill);
-    if (LowReg != HighReg)
-      MIB.addReg(HighReg, RegState::Kill);
-
-    // Do a second scan adding regs as being killed by instruction
-    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-      unsigned Reg = CSI[i].getReg();
-      // Add the callee-saved register as live-in. It's killed at the spill.
-      MBB.addLiveIn(Reg);
-      if (Reg != LowReg && Reg != HighReg)
-        MIB.addReg(Reg, RegState::ImplicitKill);
-    }
-  }
-
-  // Save FPRs
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (SystemZ::FP64RegClass.contains(Reg)) {
-      MBB.addLiveIn(Reg);
-      TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
-                              &SystemZ::FP64RegClass, TRI);
-    }
-  }
-
-  return true;
-}
-
-bool
-SystemZFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                              MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
-                                          const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
-
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
-  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
-
-  // Restore FP registers
-  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    unsigned Reg = CSI[i].getReg();
-    if (SystemZ::FP64RegClass.contains(Reg))
-      TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
-                               &SystemZ::FP64RegClass, TRI);
-  }
-
-  // Restore GP registers
-  unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
-  unsigned StartOffset = RegSpillOffsets[LowReg];
-
-  if (StartOffset) {
-    // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
-    // registers to load, otherwise - just LOAD.
-    MachineInstrBuilder MIB =
-      BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
-                                    SystemZ::MOV64rm : SystemZ::MOV64rmm)));
-    // Add store operands.
-    MIB.addReg(LowReg, RegState::Define);
-    if (LowReg != HighReg)
-      MIB.addReg(HighReg, RegState::Define);
-
-    MIB.addReg(hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
-    MIB.addImm(StartOffset);
-    if (LowReg == HighReg)
-      MIB.addReg(0);
-
-    // Do a second scan adding regs as being defined by instruction
-    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-      unsigned Reg = CSI[i].getReg();
-      if (Reg != LowReg && Reg != HighReg)
-        MIB.addReg(Reg, RegState::ImplicitDefine);
-    }
-  }
-
-  return true;
-}
-
-void
-SystemZFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                                       RegScavenger *RS) const {
-  // Determine whether R15/R14 will ever be clobbered inside the function. And
-  // if yes - mark it as 'callee' saved.
-  MachineFrameInfo *FFI = MF.getFrameInfo();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  // Check whether high FPRs are ever used, if yes - we need to save R15 as
-  // well.
-  static const unsigned HighFPRs[] = {
-    SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
-    SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
-    SystemZ::F8S,  SystemZ::F9S,  SystemZ::F10S, SystemZ::F11S,
-    SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
-  };
-
-  bool HighFPRsUsed = false;
-  for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
-    HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
-
-  if (FFI->hasCalls())
-    /* FIXME: function is varargs */
-    /* FIXME: function grabs RA */
-    /* FIXME: function calls eh_return */
-    MRI.setPhysRegUsed(SystemZ::R14D);
-
-  if (HighFPRsUsed ||
-      FFI->hasCalls() ||
-      FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
-      FFI->hasVarSizedObjects() // Function calls dynamic alloca's
-      /* FIXME: function is varargs */)
-    MRI.setPhysRegUsed(SystemZ::R15D);
-}
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
deleted file mode 100644
index 1284b6802b3a..000000000000
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ /dev/null
@@ -1,57 +0,0 @@
-//=- SystemZFrameLowering.h - Define frame lowering for z/System -*- C++ -*--=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SYSTEMZ_FRAMEINFO_H
-#define SYSTEMZ_FRAMEINFO_H
-
-#include "SystemZ.h"
-#include "SystemZSubtarget.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/ADT/IndexedMap.h"
-
-namespace llvm {
-  class SystemZSubtarget;
-
-class SystemZFrameLowering : public TargetFrameLowering {
-  IndexedMap<unsigned> RegSpillOffsets;
-protected:
-  const SystemZSubtarget &STI;
-
-public:
-  explicit SystemZFrameLowering(const SystemZSubtarget &sti);
-
-  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
-  /// the function.
-  void emitPrologue(MachineFunction &MF) const;
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
-  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI,
-                                 const TargetRegisterInfo *TRI) const;
-  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MI,
-                                   const std::vector<CalleeSavedInfo> &CSI,
-                                   const TargetRegisterInfo *TRI) const;
-
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const;
-
-  bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
-  bool hasFP(const MachineFunction &MF) const;
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
deleted file mode 100644
index 2186ff1fed54..000000000000
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ /dev/null
@@ -1,779 +0,0 @@
-//==-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ ---===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the SystemZ target.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZ.h"
-#include "SystemZTargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
-  /// instead of register numbers for the leaves of the matched tree.
-  struct SystemZRRIAddressMode {
-    enum {
-      RegBase,
-      FrameIndexBase
-    } BaseType;
-
-    struct {            // This is really a union, discriminated by BaseType!
-      SDValue Reg;
-      int FrameIndex;
-    } Base;
-
-    SDValue IndexReg;
-    int64_t Disp;
-    bool isRI;
-
-    SystemZRRIAddressMode(bool RI = false)
-      : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) {
-    }
-
-    void dump() {
-      errs() << "SystemZRRIAddressMode " << this << '\n';
-      if (BaseType == RegBase) {
-        errs() << "Base.Reg ";
-        if (Base.Reg.getNode() != 0)
-          Base.Reg.getNode()->dump();
-        else
-          errs() << "nul";
-        errs() << '\n';
-      } else {
-        errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
-      }
-      if (!isRI) {
-        errs() << "IndexReg ";
-        if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
-        else errs() << "nul";
-      }
-      errs() << " Disp " << Disp << '\n';
-    }
-  };
-}
-
-/// SystemZDAGToDAGISel - SystemZ specific code to select SystemZ machine
-/// instructions for SelectionDAG operations.
-///
-namespace {
-  class SystemZDAGToDAGISel : public SelectionDAGISel {
-    const SystemZTargetLowering &Lowering;
-    const SystemZSubtarget &Subtarget;
-
-    void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
-                            SDValue &Base, SDValue &Disp);
-    void getAddressOperands(const SystemZRRIAddressMode &AM,
-                            SDValue &Base, SDValue &Disp,
-                            SDValue &Index);
-
-  public:
-    SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(TM, OptLevel),
-        Lowering(*TM.getTargetLowering()),
-        Subtarget(*TM.getSubtargetImpl()) { }
-
-    virtual const char *getPassName() const {
-      return "SystemZ DAG->DAG Pattern Instruction Selection";
-    }
-
-    /// getI8Imm - Return a target constant with the specified value, of type
-    /// i8.
-    inline SDValue getI8Imm(uint64_t Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i8);
-    }
-
-    /// getI16Imm - Return a target constant with the specified value, of type
-    /// i16.
-    inline SDValue getI16Imm(uint64_t Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i16);
-    }
-
-    /// getI32Imm - Return a target constant with the specified value, of type
-    /// i32.
-    inline SDValue getI32Imm(uint64_t Imm) {
-      return CurDAG->getTargetConstant(Imm, MVT::i32);
-    }
-
-    // Include the pieces autogenerated from the target description.
-    #include "SystemZGenDAGISel.inc"
-
-  private:
-    bool SelectAddrRI12Only(SDValue& Addr,
-                            SDValue &Base, SDValue &Disp);
-    bool SelectAddrRI12(SDValue& Addr,
-                        SDValue &Base, SDValue &Disp,
-                        bool is12BitOnly = false);
-    bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp);
-    bool SelectAddrRRI12(SDValue Addr,
-                         SDValue &Base, SDValue &Disp, SDValue &Index);
-    bool SelectAddrRRI20(SDValue Addr,
-                         SDValue &Base, SDValue &Disp, SDValue &Index);
-    bool SelectLAAddr(SDValue Addr,
-                      SDValue &Base, SDValue &Disp, SDValue &Index);
-
-    SDNode *Select(SDNode *Node);
-
-    bool TryFoldLoad(SDNode *P, SDValue N,
-                     SDValue &Base, SDValue &Disp, SDValue &Index);
-
-    bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
-                      bool is12Bit, unsigned Depth = 0);
-    bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
-  };
-}  // end anonymous namespace
-
-/// createSystemZISelDag - This pass converts a legalized DAG into a
-/// SystemZ-specific DAG, ready for instruction scheduling.
-///
-FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
-                                        CodeGenOpt::Level OptLevel) {
-  return new SystemZDAGToDAGISel(TM, OptLevel);
-}
-
-/// isImmSExt20 - This method tests to see if the node is either a 32-bit
-/// or 64-bit immediate, and if the value can be accurately represented as a
-/// sign extension from a 20-bit value. If so, this returns true and the
-/// immediate.
-static bool isImmSExt20(int64_t Val, int64_t &Imm) {
-  if (Val >= -524288 && Val <= 524287) {
-    Imm = Val;
-    return true;
-  }
-  return false;
-}
-
-/// isImmZExt12 - This method tests to see if the node is either a 32-bit
-/// or 64-bit immediate, and if the value can be accurately represented as a
-/// zero extension from a 12-bit value. If so, this returns true and the
-/// immediate.
-static bool isImmZExt12(int64_t Val, int64_t &Imm) {
-  if (Val >= 0 && Val <= 0xFFF) {
-    Imm = Val;
-    return true;
-  }
-  return false;
-}
-
-/// MatchAddress - Add the specified node to the specified addressing mode,
-/// returning true if it cannot be done.  This just pattern matches for the
-/// addressing mode.
-bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
-                                       bool is12Bit, unsigned Depth) {
-  DebugLoc dl = N.getDebugLoc();
-  DEBUG(errs() << "MatchAddress: "; AM.dump());
-  // Limit recursion.
-  if (Depth > 5)
-    return MatchAddressBase(N, AM);
-
-  // FIXME: We can perform better here. If we have something like
-  // (shift (add A, imm), N), we can try to reassociate stuff and fold shift of
-  // imm into addressing mode.
-  switch (N.getOpcode()) {
-  default: break;
-  case ISD::Constant: {
-    int64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
-    int64_t Imm = 0;
-    bool Match = (is12Bit ?
-                  isImmZExt12(AM.Disp + Val, Imm) :
-                  isImmSExt20(AM.Disp + Val, Imm));
-    if (Match) {
-      AM.Disp = Imm;
-      return false;
-    }
-    break;
-  }
-
-  case ISD::FrameIndex:
-    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
-        AM.Base.Reg.getNode() == 0) {
-      AM.BaseType = SystemZRRIAddressMode::FrameIndexBase;
-      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
-      return false;
-    }
-    break;
-
-  case ISD::SUB: {
-    // Given A-B, if A can be completely folded into the address and
-    // the index field with the index field unused, use -B as the index.
-    // This is a win if a has multiple parts that can be folded into
-    // the address. Also, this saves a mov if the base register has
-    // other uses, since it avoids a two-address sub instruction, however
-    // it costs an additional mov if the index register has other uses.
-
-    // Test if the LHS of the sub can be folded.
-    SystemZRRIAddressMode Backup = AM;
-    if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) {
-      AM = Backup;
-      break;
-    }
-    // Test if the index field is free for use.
-    if (AM.IndexReg.getNode() || AM.isRI) {
-      AM = Backup;
-      break;
-    }
-
-    // If the base is a register with multiple uses, this transformation may
-    // save a mov. Otherwise it's probably better not to do it.
-    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
-        (!AM.Base.Reg.getNode() || AM.Base.Reg.getNode()->hasOneUse())) {
-      AM = Backup;
-      break;
-    }
-
-    // Ok, the transformation is legal and appears profitable. Go for it.
-    SDValue RHS = N.getNode()->getOperand(1);
-    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
-    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
-    AM.IndexReg = Neg;
-
-    // Insert the new nodes into the topological ordering.
-    if (Zero.getNode()->getNodeId() == -1 ||
-        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
-      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
-      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
-    }
-    if (Neg.getNode()->getNodeId() == -1 ||
-        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
-      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
-      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
-    }
-    return false;
-  }
-
-  case ISD::ADD: {
-    SystemZRRIAddressMode Backup = AM;
-    if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) &&
-        !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1))
-      return false;
-    AM = Backup;
-    if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) &&
-        !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1))
-      return false;
-    AM = Backup;
-
-    // If we couldn't fold both operands into the address at the same time,
-    // see if we can just put each operand into a register and fold at least
-    // the add.
-    if (!AM.isRI &&
-        AM.BaseType == SystemZRRIAddressMode::RegBase &&
-        !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) {
-      AM.Base.Reg = N.getNode()->getOperand(0);
-      AM.IndexReg = N.getNode()->getOperand(1);
-      return false;
-    }
-    break;
-  }
-
-  case ISD::OR:
-    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
-    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-      SystemZRRIAddressMode Backup = AM;
-      int64_t Offset = CN->getSExtValue();
-      int64_t Imm = 0;
-      bool MatchOffset = (is12Bit ?
-                          isImmZExt12(AM.Disp + Offset, Imm) :
-                          isImmSExt20(AM.Disp + Offset, Imm));
-      // The resultant disp must fit in 12 or 20-bits.
-      if (MatchOffset &&
-          // LHS should be an addr mode.
-          !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) &&
-          // Check to see if the LHS & C is zero.
-          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
-        AM.Disp = Imm;
-        return false;
-      }
-      AM = Backup;
-    }
-    break;
-  }
-
-  return MatchAddressBase(N, AM);
-}
-
-/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
-/// specified addressing mode without any further recursion.
-bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N,
-                                           SystemZRRIAddressMode &AM) {
-  // Is the base register already occupied?
-  if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) {
-    // If so, check to see if the index register is set.
-    if (AM.IndexReg.getNode() == 0 && !AM.isRI) {
-      AM.IndexReg = N;
-      return false;
-    }
-
-    // Otherwise, we cannot select it.
-    return true;
-  }
-
-  // Default, generate it as a register.
-  AM.BaseType = SystemZRRIAddressMode::RegBase;
-  AM.Base.Reg = N;
-  return false;
-}
-
-void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM,
-                                               SDValue &Base, SDValue &Disp) {
-  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
-    Base = AM.Base.Reg;
-  else
-    Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy());
-  Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64);
-}
-
-void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
-                                             SDValue &Base, SDValue &Disp,
-                                             SDValue &Index) {
-  getAddressOperandsRI(AM, Base, Disp);
-  Index = AM.IndexReg;
-}
-
-/// Returns true if the address can be represented by a base register plus
-/// an unsigned 12-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr,
-                                             SDValue &Base, SDValue &Disp) {
-  return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true);
-}
-
-bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr,
-                                         SDValue &Base, SDValue &Disp,
-                                         bool is12BitOnly) {
-  SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
-  bool Done = false;
-
-  if (!Addr.hasOneUse()) {
-    unsigned Opcode = Addr.getOpcode();
-    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
-      // If we are able to fold N into addressing mode, then we'll allow it even
-      // if N has multiple uses. In general, addressing computation is used as
-      // addresses by all of its uses. But watch out for CopyToReg uses, that
-      // means the address computation is liveout. It will be computed by a LA
-      // so we want to avoid computing the address twice.
-      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
-             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
-        if (UI->getOpcode() == ISD::CopyToReg) {
-          MatchAddressBase(Addr, AM12);
-          Done = true;
-          break;
-        }
-      }
-    }
-  }
-  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
-    return false;
-
-  // Check, whether we can match stuff using 20-bit displacements
-  if (!Done && !is12BitOnly &&
-      !MatchAddress(Addr, AM20, /* is12Bit */ false))
-    if (AM12.Disp == 0 && AM20.Disp != 0)
-      return false;
-
-  DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
-
-  EVT VT = Addr.getValueType();
-  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
-    if (!AM12.Base.Reg.getNode())
-      AM12.Base.Reg = CurDAG->getRegister(0, VT);
-  }
-
-  assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
-
-  getAddressOperandsRI(AM12, Base, Disp);
-
-  return true;
-}
-
-/// Returns true if the address can be represented by a base register plus
-/// a signed 20-bit displacement [r+imm].
-bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr,
-                                       SDValue &Base, SDValue &Disp) {
-  SystemZRRIAddressMode AM(/*isRI*/true);
-  bool Done = false;
-
-  if (!Addr.hasOneUse()) {
-    unsigned Opcode = Addr.getOpcode();
-    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
-      // If we are able to fold N into addressing mode, then we'll allow it even
-      // if N has multiple uses. In general, addressing computation is used as
-      // addresses by all of its uses. But watch out for CopyToReg uses, that
-      // means the address computation is liveout. It will be computed by a LA
-      // so we want to avoid computing the address twice.
-      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
-             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
-        if (UI->getOpcode() == ISD::CopyToReg) {
-          MatchAddressBase(Addr, AM);
-          Done = true;
-          break;
-        }
-      }
-    }
-  }
-  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
-    return false;
-
-  DEBUG(errs() << "MatchAddress (final): "; AM.dump());
-
-  EVT VT = Addr.getValueType();
-  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
-    if (!AM.Base.Reg.getNode())
-      AM.Base.Reg = CurDAG->getRegister(0, VT);
-  }
-
-  assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
-
-  getAddressOperandsRI(AM, Base, Disp);
-
-  return true;
-}
-
-/// Returns true if the address can be represented by a base register plus
-/// index register plus an unsigned 12-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr,
-                                SDValue &Base, SDValue &Disp, SDValue &Index) {
-  SystemZRRIAddressMode AM20, AM12;
-  bool Done = false;
-
-  if (!Addr.hasOneUse()) {
-    unsigned Opcode = Addr.getOpcode();
-    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
-      // If we are able to fold N into addressing mode, then we'll allow it even
-      // if N has multiple uses. In general, addressing computation is used as
-      // addresses by all of its uses. But watch out for CopyToReg uses, that
-      // means the address computation is liveout. It will be computed by a LA
-      // so we want to avoid computing the address twice.
-      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
-             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
-        if (UI->getOpcode() == ISD::CopyToReg) {
-          MatchAddressBase(Addr, AM12);
-          Done = true;
-          break;
-        }
-      }
-    }
-  }
-  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
-    return false;
-
-  // Check, whether we can match stuff using 20-bit displacements
-  if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false))
-    if (AM12.Disp == 0 && AM20.Disp != 0)
-      return false;
-
-  DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
-
-  EVT VT = Addr.getValueType();
-  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
-    if (!AM12.Base.Reg.getNode())
-      AM12.Base.Reg = CurDAG->getRegister(0, VT);
-  }
-
-  if (!AM12.IndexReg.getNode())
-    AM12.IndexReg = CurDAG->getRegister(0, VT);
-
-  getAddressOperands(AM12, Base, Disp, Index);
-
-  return true;
-}
-
-/// Returns true if the address can be represented by a base register plus
-/// index register plus a signed 20-bit displacement [base + idx + imm].
-bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr,
-                                SDValue &Base, SDValue &Disp, SDValue &Index) {
-  SystemZRRIAddressMode AM;
-  bool Done = false;
-
-  if (!Addr.hasOneUse()) {
-    unsigned Opcode = Addr.getOpcode();
-    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
-      // If we are able to fold N into addressing mode, then we'll allow it even
-      // if N has multiple uses. In general, addressing computation is used as
-      // addresses by all of its uses. But watch out for CopyToReg uses, that
-      // means the address computation is liveout. It will be computed by a LA
-      // so we want to avoid computing the address twice.
-      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
-             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
-        if (UI->getOpcode() == ISD::CopyToReg) {
-          MatchAddressBase(Addr, AM);
-          Done = true;
-          break;
-        }
-      }
-    }
-  }
-  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
-    return false;
-
-  DEBUG(errs() << "MatchAddress (final): "; AM.dump());
-
-  EVT VT = Addr.getValueType();
-  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
-    if (!AM.Base.Reg.getNode())
-      AM.Base.Reg = CurDAG->getRegister(0, VT);
-  }
-
-  if (!AM.IndexReg.getNode())
-    AM.IndexReg = CurDAG->getRegister(0, VT);
-
-  getAddressOperands(AM, Base, Disp, Index);
-
-  return true;
-}
-
-/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
-/// mode it matches can be cost effectively emitted as an LA/LAY instruction.
-bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr,
-                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
-  SystemZRRIAddressMode AM;
-
-  if (MatchAddress(Addr, AM, false))
-    return false;
-
-  EVT VT = Addr.getValueType();
-  unsigned Complexity = 0;
-  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
-    if (AM.Base.Reg.getNode())
-      Complexity = 1;
-    else
-      AM.Base.Reg = CurDAG->getRegister(0, VT);
-  else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase)
-    Complexity = 4;
-
-  if (AM.IndexReg.getNode())
-    Complexity += 1;
-  else
-    AM.IndexReg = CurDAG->getRegister(0, VT);
-
-  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
-    Complexity += 1;
-
-  if (Complexity > 2) {
-    getAddressOperands(AM, Base, Disp, Index);
-    return true;
-  }
-
-  return false;
-}
-
-bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
-                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
-  if (ISD::isNON_EXTLoad(N.getNode()) &&
-      IsLegalToFold(N, P, P, OptLevel))
-    return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index);
-  return false;
-}
-
-SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
-  EVT NVT = Node->getValueType(0);
-  DebugLoc dl = Node->getDebugLoc();
-  unsigned Opcode = Node->getOpcode();
-
-  // Dump information about the Node being selected
-  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
-
-  // If we have a custom node, we already have selected!
-  if (Node->isMachineOpcode()) {
-    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
-    return NULL; // Already selected.
-  }
-
-  switch (Opcode) {
-  default: break;
-  case ISD::SDIVREM: {
-    unsigned Opc, MOpc;
-    SDValue N0 = Node->getOperand(0);
-    SDValue N1 = Node->getOperand(1);
-
-    EVT ResVT;
-    bool is32Bit = false;
-    switch (NVT.getSimpleVT().SimpleTy) {
-    default: assert(0 && "Unsupported VT!");
-    case MVT::i32:
-      Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
-      ResVT = MVT::v2i64;
-      is32Bit = true;
-      break;
-    case MVT::i64:
-      Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
-      ResVT = MVT::v2i64;
-      break;
-    }
-
-    SDValue Tmp0, Tmp1, Tmp2;
-    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
-
-    // Prepare the dividend
-    SDNode *Dividend;
-    if (is32Bit)
-      Dividend = CurDAG->getMachineNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0);
-    else
-      Dividend = N0.getNode();
-
-    // Insert prepared dividend into suitable 'subreg'
-    SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
-                                         dl, ResVT);
-    Dividend =
-      CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
-                             SDValue(Tmp, 0), SDValue(Dividend, 0),
-                     CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32));
-
-    SDNode *Result;
-    SDValue DivVal = SDValue(Dividend, 0);
-    if (foldedLoad) {
-      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
-      Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
-                                      Ops, array_lengthof(Ops));
-      // Update the chain.
-      ReplaceUses(N1.getValue(1), SDValue(Result, 1));
-    } else {
-      Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
-    }
-
-    // Copy the division (odd subreg) result, if it is needed.
-    if (!SDValue(Node, 0).use_empty()) {
-      unsigned SubRegIdx = (is32Bit ?
-                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
-      SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
-                                           dl, NVT,
-                                           SDValue(Result, 0),
-                                           CurDAG->getTargetConstant(SubRegIdx,
-                                                                     MVT::i32));
-
-      ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
-      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
-    }
-
-    // Copy the remainder (even subreg) result, if it is needed.
-    if (!SDValue(Node, 1).use_empty()) {
-      unsigned SubRegIdx = (is32Bit ?
-                            SystemZ::subreg_32bit : SystemZ::subreg_even);
-      SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
-                                           dl, NVT,
-                                           SDValue(Result, 0),
-                                           CurDAG->getTargetConstant(SubRegIdx,
-                                                                     MVT::i32));
-
-      ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
-      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
-    }
-
-    return NULL;
-  }
-  case ISD::UDIVREM: {
-    unsigned Opc, MOpc, ClrOpc;
-    SDValue N0 = Node->getOperand(0);
-    SDValue N1 = Node->getOperand(1);
-    EVT ResVT;
-
-    bool is32Bit = false;
-    switch (NVT.getSimpleVT().SimpleTy) {
-    default: assert(0 && "Unsupported VT!");
-    case MVT::i32:
-      Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
-      ClrOpc = SystemZ::MOV64Pr0_even;
-      ResVT = MVT::v2i32;
-      is32Bit = true;
-      break;
-    case MVT::i64:
-      Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
-      ClrOpc = SystemZ::MOV128r0_even;
-      ResVT = MVT::v2i64;
-      break;
-    }
-
-    SDValue Tmp0, Tmp1, Tmp2;
-    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
-
-    // Prepare the dividend
-    SDNode *Dividend = N0.getNode();
-
-    // Insert prepared dividend into suitable 'subreg'
-    SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
-                                         dl, ResVT);
-    {
-      unsigned SubRegIdx = (is32Bit ?
-                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
-      Dividend =
-        CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
-                               SDValue(Tmp, 0), SDValue(Dividend, 0),
-                               CurDAG->getTargetConstant(SubRegIdx, MVT::i32));
-    }
-
-    // Zero out even subreg
-    Dividend = CurDAG->getMachineNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0));
-
-    SDValue DivVal = SDValue(Dividend, 0);
-    SDNode *Result;
-    if (foldedLoad) {
-      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
-      Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
-                                      Ops, array_lengthof(Ops));
-      // Update the chain.
-      ReplaceUses(N1.getValue(1), SDValue(Result, 1));
-    } else {
-      Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1);
-    }
-
-    // Copy the division (odd subreg) result, if it is needed.
-    if (!SDValue(Node, 0).use_empty()) {
-      unsigned SubRegIdx = (is32Bit ?
-                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
-      SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
-                                           dl, NVT,
-                                           SDValue(Result, 0),
-                                           CurDAG->getTargetConstant(SubRegIdx,
-                                                                     MVT::i32));
-      ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
-      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
-    }
-
-    // Copy the remainder (even subreg) result, if it is needed.
-    if (!SDValue(Node, 1).use_empty()) {
-      unsigned SubRegIdx = (is32Bit ?
-                            SystemZ::subreg_32bit : SystemZ::subreg_even);
-      SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
-                                           dl, NVT,
-                                           SDValue(Result, 0),
-                                           CurDAG->getTargetConstant(SubRegIdx,
-                                                                     MVT::i32));
-      ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
-      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
-    }
-
-    return NULL;
-  }
-  }
-
-  // Select the default instruction
-  SDNode *ResNode = SelectCode(Node);
-
-  DEBUG(errs() << "=> ";
-        if (ResNode == NULL || ResNode == Node)
-          Node->dump(CurDAG);
-        else
-          ResNode->dump(CurDAG);
-        errs() << "\n";
-        );
-  return ResNode;
-}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
deleted file mode 100644
index 48ca99ff9ea2..000000000000
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ /dev/null
@@ -1,868 +0,0 @@
-//===-- SystemZISelLowering.cpp - SystemZ DAG Lowering Implementation  -----==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SystemZTargetLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "systemz-lower"
-
-#include "SystemZISelLowering.h"
-#include "SystemZ.h"
-#include "SystemZTargetMachine.h"
-#include "SystemZSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/VectorExtras.h"
-using namespace llvm;
-
-SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
-  TargetLowering(tm, new TargetLoweringObjectFileELF()),
-  Subtarget(*tm.getSubtargetImpl()), TM(tm) {
-
-  RegInfo = TM.getRegisterInfo();
-
-  // Set up the register classes.
-  addRegisterClass(MVT::i32,  SystemZ::GR32RegisterClass);
-  addRegisterClass(MVT::i64,  SystemZ::GR64RegisterClass);
-  addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass);
-  addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass);
-
-  if (!UseSoftFloat) {
-    addRegisterClass(MVT::f32, SystemZ::FP32RegisterClass);
-    addRegisterClass(MVT::f64, SystemZ::FP64RegisterClass);
-  }
-
-  // Compute derived properties from the register classes
-  computeRegisterProperties();
-
-  // Provide all sorts of operation actions
-  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
-  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
-
-  setLoadExtAction(ISD::SEXTLOAD, MVT::f32, Expand);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::f32, Expand);
-  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
-
-  setLoadExtAction(ISD::SEXTLOAD, MVT::f64, Expand);
-  setLoadExtAction(ISD::ZEXTLOAD, MVT::f64, Expand);
-  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
-
-  setStackPointerRegisterToSaveRestore(SystemZ::R15D);
-
-  // TODO: It may be better to default to latency-oriented scheduling, however
-  // LLVM's current latency-oriented scheduler can't handle physreg definitions
-  // such as SystemZ has with PSW, so set this to the register-pressure
-  // scheduler, because it can.
-  setSchedulingPreference(Sched::RegPressure);
-
-  setBooleanContents(ZeroOrOneBooleanContent);
-  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
-
-  setOperationAction(ISD::BR_JT,            MVT::Other, Expand);
-  setOperationAction(ISD::BRCOND,           MVT::Other, Expand);
-  setOperationAction(ISD::BR_CC,            MVT::i32, Custom);
-  setOperationAction(ISD::BR_CC,            MVT::i64, Custom);
-  setOperationAction(ISD::BR_CC,            MVT::f32, Custom);
-  setOperationAction(ISD::BR_CC,            MVT::f64, Custom);
-  setOperationAction(ISD::ConstantPool,     MVT::i32, Custom);
-  setOperationAction(ISD::ConstantPool,     MVT::i64, Custom);
-  setOperationAction(ISD::GlobalAddress,    MVT::i64, Custom);
-  setOperationAction(ISD::JumpTable,        MVT::i64, Custom);
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
-
-  setOperationAction(ISD::SDIV,             MVT::i32, Expand);
-  setOperationAction(ISD::UDIV,             MVT::i32, Expand);
-  setOperationAction(ISD::SDIV,             MVT::i64, Expand);
-  setOperationAction(ISD::UDIV,             MVT::i64, Expand);
-  setOperationAction(ISD::SREM,             MVT::i32, Expand);
-  setOperationAction(ISD::UREM,             MVT::i32, Expand);
-  setOperationAction(ISD::SREM,             MVT::i64, Expand);
-  setOperationAction(ISD::UREM,             MVT::i64, Expand);
-
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-
-  setOperationAction(ISD::CTPOP,            MVT::i32, Expand);
-  setOperationAction(ISD::CTPOP,            MVT::i64, Expand);
-  setOperationAction(ISD::CTTZ,             MVT::i32, Expand);
-  setOperationAction(ISD::CTTZ,             MVT::i64, Expand);
-  setOperationAction(ISD::CTLZ,             MVT::i32, Promote);
-  setOperationAction(ISD::CTLZ,             MVT::i64, Legal);
-
-  // FIXME: Can we lower these 2 efficiently?
-  setOperationAction(ISD::SETCC,            MVT::i32, Expand);
-  setOperationAction(ISD::SETCC,            MVT::i64, Expand);
-  setOperationAction(ISD::SETCC,            MVT::f32, Expand);
-  setOperationAction(ISD::SETCC,            MVT::f64, Expand);
-  setOperationAction(ISD::SELECT,           MVT::i32, Expand);
-  setOperationAction(ISD::SELECT,           MVT::i64, Expand);
-  setOperationAction(ISD::SELECT,           MVT::f32, Expand);
-  setOperationAction(ISD::SELECT,           MVT::f64, Expand);
-  setOperationAction(ISD::SELECT_CC,        MVT::i32, Custom);
-  setOperationAction(ISD::SELECT_CC,        MVT::i64, Custom);
-  setOperationAction(ISD::SELECT_CC,        MVT::f32, Custom);
-  setOperationAction(ISD::SELECT_CC,        MVT::f64, Custom);
-
-  setOperationAction(ISD::MULHS,            MVT::i64, Expand);
-  setOperationAction(ISD::SMUL_LOHI,        MVT::i64, Expand);
-
-  // FIXME: Can we support these natively?
-  setOperationAction(ISD::UMUL_LOHI,        MVT::i64, Expand);
-  setOperationAction(ISD::SRL_PARTS,        MVT::i64, Expand);
-  setOperationAction(ISD::SHL_PARTS,        MVT::i64, Expand);
-  setOperationAction(ISD::SRA_PARTS,        MVT::i64, Expand);
-
-  // Lower some FP stuff
-  setOperationAction(ISD::FSIN,             MVT::f32, Expand);
-  setOperationAction(ISD::FSIN,             MVT::f64, Expand);
-  setOperationAction(ISD::FCOS,             MVT::f32, Expand);
-  setOperationAction(ISD::FCOS,             MVT::f64, Expand);
-  setOperationAction(ISD::FREM,             MVT::f32, Expand);
-  setOperationAction(ISD::FREM,             MVT::f64, Expand);
-  setOperationAction(ISD::FMA,              MVT::f32, Expand);
-  setOperationAction(ISD::FMA,              MVT::f64, Expand);
-
-  // We have only 64-bit bitconverts
-  setOperationAction(ISD::BITCAST,          MVT::f32, Expand);
-  setOperationAction(ISD::BITCAST,          MVT::i32, Expand);
-
-  setOperationAction(ISD::UINT_TO_FP,       MVT::i32, Expand);
-  setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Expand);
-  setOperationAction(ISD::FP_TO_UINT,       MVT::i32, Expand);
-  setOperationAction(ISD::FP_TO_UINT,       MVT::i64, Expand);
-
-  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-
-  setMinFunctionAlignment(1);
-}
-
-SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
-                                              SelectionDAG &DAG) const {
-  switch (Op.getOpcode()) {
-  case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
-  case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
-  case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
-  case ISD::JumpTable:        return LowerJumpTable(Op, DAG);
-  case ISD::ConstantPool:     return LowerConstantPool(Op, DAG);
-  default:
-    llvm_unreachable("Should not custom lower this!");
-    return SDValue();
-  }
-}
-
-bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
-  if (UseSoftFloat || (VT != MVT::f32 && VT != MVT::f64))
-    return false;
-
-  // +0.0  lzer
-  // +0.0f lzdr
-  // -0.0  lzer + lner
-  // -0.0f lzdr + lndr
-  return Imm.isZero() || Imm.isNegZero();
-}
-
-//===----------------------------------------------------------------------===//
-//                       SystemZ Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-TargetLowering::ConstraintType
-SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
-  if (Constraint.size() == 1) {
-    switch (Constraint[0]) {
-    case 'r':
-      return C_RegisterClass;
-    default:
-      break;
-    }
-  }
-  return TargetLowering::getConstraintType(Constraint);
-}
-
-std::pair<unsigned, const TargetRegisterClass*>
-SystemZTargetLowering::
-getRegForInlineAsmConstraint(const std::string &Constraint,
-                             EVT VT) const {
-  if (Constraint.size() == 1) {
-    // GCC Constraint Letters
-    switch (Constraint[0]) {
-    default: break;
-    case 'r':   // GENERAL_REGS
-      if (VT == MVT::i32)
-        return std::make_pair(0U, SystemZ::GR32RegisterClass);
-      else if (VT == MVT::i128)
-        return std::make_pair(0U, SystemZ::GR128RegisterClass);
-
-      return std::make_pair(0U, SystemZ::GR64RegisterClass);
-    }
-  }
-
-  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
-
-//===----------------------------------------------------------------------===//
-//                      Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "SystemZGenCallingConv.inc"
-
-SDValue
-SystemZTargetLowering::LowerFormalArguments(SDValue Chain,
-                                            CallingConv::ID CallConv,
-                                            bool isVarArg,
-                                            const SmallVectorImpl<ISD::InputArg>
-                                              &Ins,
-                                            DebugLoc dl,
-                                            SelectionDAG &DAG,
-                                            SmallVectorImpl<SDValue> &InVals)
-                                              const {
-
-  switch (CallConv) {
-  default:
-    llvm_unreachable("Unsupported calling convention");
-  case CallingConv::C:
-  case CallingConv::Fast:
-    return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
-  }
-}
-
-SDValue
-SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
-                                 CallingConv::ID CallConv, bool isVarArg,
-                                 bool &isTailCall,
-                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                 const SmallVectorImpl<SDValue> &OutVals,
-                                 const SmallVectorImpl<ISD::InputArg> &Ins,
-                                 DebugLoc dl, SelectionDAG &DAG,
-                                 SmallVectorImpl<SDValue> &InVals) const {
-  // SystemZ target does not yet support tail call optimization.
-  isTailCall = false;
-
-  switch (CallConv) {
-  default:
-    llvm_unreachable("Unsupported calling convention");
-  case CallingConv::Fast:
-  case CallingConv::C:
-    return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
-                          Outs, OutVals, Ins, dl, DAG, InVals);
-  }
-}
-
-/// LowerCCCArguments - transform physical registers into virtual registers and
-/// generate load operations for arguments places on the stack.
-// FIXME: struct return stuff
-// FIXME: varargs
-SDValue
-SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
-                                         CallingConv::ID CallConv,
-                                         bool isVarArg,
-                                         const SmallVectorImpl<ISD::InputArg>
-                                           &Ins,
-                                         DebugLoc dl,
-                                         SelectionDAG &DAG,
-                                         SmallVectorImpl<SDValue> &InVals)
-                                           const {
-
-  MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineRegisterInfo &RegInfo = MF.getRegInfo();
-
-  // Assign locations to all of the incoming arguments.
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 getTargetMachine(), ArgLocs, *DAG.getContext());
-  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
-
-  if (isVarArg)
-    report_fatal_error("Varargs not supported yet");
-
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    SDValue ArgValue;
-    CCValAssign &VA = ArgLocs[i];
-    EVT LocVT = VA.getLocVT();
-    if (VA.isRegLoc()) {
-      // Arguments passed in registers
-      TargetRegisterClass *RC;
-      switch (LocVT.getSimpleVT().SimpleTy) {
-      default:
-#ifndef NDEBUG
-        errs() << "LowerFormalArguments Unhandled argument type: "
-             << LocVT.getSimpleVT().SimpleTy
-             << "\n";
-#endif
-        llvm_unreachable(0);
-      case MVT::i64:
-        RC = SystemZ::GR64RegisterClass;
-        break;
-      case MVT::f32:
-        RC = SystemZ::FP32RegisterClass;
-        break;
-      case MVT::f64:
-        RC = SystemZ::FP64RegisterClass;
-        break;
-      }
-
-      unsigned VReg = RegInfo.createVirtualRegister(RC);
-      RegInfo.addLiveIn(VA.getLocReg(), VReg);
-      ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
-    } else {
-      // Sanity check
-      assert(VA.isMemLoc());
-
-      // Create the nodes corresponding to a load from this parameter slot.
-      // Create the frame index object for this incoming parameter...
-      int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
-                                      VA.getLocMemOffset(), true);
-
-      // Create the SelectionDAG nodes corresponding to a load
-      // from this parameter
-      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
-      ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
-                             MachinePointerInfo::getFixedStack(FI),
-                             false, false, 0);
-    }
-
-    // If this is an 8/16/32-bit value, it is really passed promoted to 64
-    // bits. Insert an assert[sz]ext to capture this, then truncate to the
-    // right size.
-    if (VA.getLocInfo() == CCValAssign::SExt)
-      ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
-                             DAG.getValueType(VA.getValVT()));
-    else if (VA.getLocInfo() == CCValAssign::ZExt)
-      ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
-                             DAG.getValueType(VA.getValVT()));
-
-    if (VA.getLocInfo() != CCValAssign::Full)
-      ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
-
-    InVals.push_back(ArgValue);
-  }
-
-  return Chain;
-}
-
-/// LowerCCCCallTo - functions arguments are copied from virtual regs to
-/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
-/// TODO: sret.
-SDValue
-SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
-                                      CallingConv::ID CallConv, bool isVarArg,
-                                      bool isTailCall,
-                                      const SmallVectorImpl<ISD::OutputArg>
-                                        &Outs,
-                                      const SmallVectorImpl<SDValue> &OutVals,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                      DebugLoc dl, SelectionDAG &DAG,
-                                      SmallVectorImpl<SDValue> &InVals) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  const TargetFrameLowering *TFI = TM.getFrameLowering();
-
-  // Offset to first argument stack slot.
-  const unsigned FirstArgOffset = 160;
-
-  // Analyze operands of the call, assigning locations to each operand.
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 getTargetMachine(), ArgLocs, *DAG.getContext());
-
-  CCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
-
-  // Get a count of how many bytes are to be pushed on the stack.
-  unsigned NumBytes = CCInfo.getNextStackOffset();
-
-  Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
-                                                      getPointerTy(), true));
-
-  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
-  SmallVector<SDValue, 12> MemOpChains;
-  SDValue StackPtr;
-
-  // Walk the register/memloc assignments, inserting copies/loads.
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
-
-    SDValue Arg = OutVals[i];
-
-    // Promote the value if needed.
-    switch (VA.getLocInfo()) {
-      default: assert(0 && "Unknown loc info!");
-      case CCValAssign::Full: break;
-      case CCValAssign::SExt:
-        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
-        break;
-      case CCValAssign::ZExt:
-        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
-        break;
-      case CCValAssign::AExt:
-        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
-        break;
-    }
-
-    // Arguments that can be passed on register must be kept at RegsToPass
-    // vector
-    if (VA.isRegLoc()) {
-      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
-    } else {
-      assert(VA.isMemLoc());
-
-      if (StackPtr.getNode() == 0)
-        StackPtr =
-          DAG.getCopyFromReg(Chain, dl,
-                             (TFI->hasFP(MF) ?
-                              SystemZ::R11D : SystemZ::R15D),
-                             getPointerTy());
-
-      unsigned Offset = FirstArgOffset + VA.getLocMemOffset();
-      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                                   StackPtr,
-                                   DAG.getIntPtrConstant(Offset));
-
-      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
-                                         MachinePointerInfo(),
-                                         false, false, 0));
-    }
-  }
-
-  // Transform all store nodes into one single node because all store nodes are
-  // independent of each other.
-  if (!MemOpChains.empty())
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        &MemOpChains[0], MemOpChains.size());
-
-  // Build a sequence of copy-to-reg nodes chained together with token chain and
-  // flag operands which copy the outgoing args into registers.  The InFlag in
-  // necessary since all emitted instructions must be stuck together.
-  SDValue InFlag;
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
-                             RegsToPass[i].second, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
-  // If the callee is a GlobalAddress node (quite common, every direct call is)
-  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
-  // Likewise ExternalSymbol -> TargetExternalSymbol.
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
-  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
-    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
-
-  // Returns a chain & a flag for retval copy to use.
-  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-  SmallVector<SDValue, 8> Ops;
-  Ops.push_back(Chain);
-  Ops.push_back(Callee);
-
-  // Add argument registers to the end of the list so that they are
-  // known live into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
-
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
-
-  Chain = DAG.getNode(SystemZISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
-  InFlag = Chain.getValue(1);
-
-  // Create the CALLSEQ_END node.
-  Chain = DAG.getCALLSEQ_END(Chain,
-                             DAG.getConstant(NumBytes, getPointerTy(), true),
-                             DAG.getConstant(0, getPointerTy(), true),
-                             InFlag);
-  InFlag = Chain.getValue(1);
-
-  // Handle result values, copying them out of physregs into vregs that we
-  // return.
-  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
-                         DAG, InVals);
-}
-
-/// LowerCallResult - Lower the result values of a call into the
-/// appropriate copies out of appropriate physical registers.
-///
-SDValue
-SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
-                                       CallingConv::ID CallConv, bool isVarArg,
-                                       const SmallVectorImpl<ISD::InputArg>
-                                         &Ins,
-                                       DebugLoc dl, SelectionDAG &DAG,
-                                       SmallVectorImpl<SDValue> &InVals) const {
-
-  // Assign locations to each value returned by this call.
-  SmallVector<CCValAssign, 16> RVLocs;
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 getTargetMachine(), RVLocs, *DAG.getContext());
-
-  CCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
-
-  // Copy all of the result registers out of their specified physreg.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    CCValAssign &VA = RVLocs[i];
-
-    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
-                               VA.getLocVT(), InFlag).getValue(1);
-    SDValue RetValue = Chain.getValue(0);
-    InFlag = Chain.getValue(2);
-
-    // If this is an 8/16/32-bit value, it is really passed promoted to 64
-    // bits. Insert an assert[sz]ext to capture this, then truncate to the
-    // right size.
-    if (VA.getLocInfo() == CCValAssign::SExt)
-      RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
-                             DAG.getValueType(VA.getValVT()));
-    else if (VA.getLocInfo() == CCValAssign::ZExt)
-      RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
-                             DAG.getValueType(VA.getValVT()));
-
-    if (VA.getLocInfo() != CCValAssign::Full)
-      RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
-
-    InVals.push_back(RetValue);
-  }
-
-  return Chain;
-}
-
-
-SDValue
-SystemZTargetLowering::LowerReturn(SDValue Chain,
-                                   CallingConv::ID CallConv, bool isVarArg,
-                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                   const SmallVectorImpl<SDValue> &OutVals,
-                                   DebugLoc dl, SelectionDAG &DAG) const {
-
-  // CCValAssign - represent the assignment of the return value to a location
-  SmallVector<CCValAssign, 16> RVLocs;
-
-  // CCState - Info about the registers and stack slot.
-  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
-		 getTargetMachine(), RVLocs, *DAG.getContext());
-
-  // Analize return values.
-  CCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
-
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function.
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
-    for (unsigned i = 0; i != RVLocs.size(); ++i)
-      if (RVLocs[i].isRegLoc())
-        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
-  }
-
-  SDValue Flag;
-
-  // Copy the result values into the output registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
-    CCValAssign &VA = RVLocs[i];
-    SDValue ResValue = OutVals[i];
-    assert(VA.isRegLoc() && "Can only return in registers!");
-
-    // If this is an 8/16/32-bit value, it is really should be passed promoted
-    // to 64 bits.
-    if (VA.getLocInfo() == CCValAssign::SExt)
-      ResValue = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ResValue);
-    else if (VA.getLocInfo() == CCValAssign::ZExt)
-      ResValue = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ResValue);
-    else if (VA.getLocInfo() == CCValAssign::AExt)
-      ResValue = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ResValue);
-
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ResValue, Flag);
-
-    // Guarantee that all emitted copies are stuck together,
-    // avoiding something bad.
-    Flag = Chain.getValue(1);
-  }
-
-  if (Flag.getNode())
-    return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
-
-  // Return Void
-  return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain);
-}
-
-SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
-                                       ISD::CondCode CC, SDValue &SystemZCC,
-                                       SelectionDAG &DAG) const {
-  // FIXME: Emit a test if RHS is zero
-
-  bool isUnsigned = false;
-  SystemZCC::CondCodes TCC;
-  switch (CC) {
-  default:
-    llvm_unreachable("Invalid integer condition!");
-  case ISD::SETEQ:
-  case ISD::SETOEQ:
-    TCC = SystemZCC::E;
-    break;
-  case ISD::SETUEQ:
-    TCC = SystemZCC::NLH;
-    break;
-  case ISD::SETNE:
-  case ISD::SETONE:
-    TCC = SystemZCC::NE;
-    break;
-  case ISD::SETUNE:
-    TCC = SystemZCC::LH;
-    break;
-  case ISD::SETO:
-    TCC = SystemZCC::O;
-    break;
-  case ISD::SETUO:
-    TCC = SystemZCC::NO;
-    break;
-  case ISD::SETULE:
-    if (LHS.getValueType().isFloatingPoint()) {
-      TCC = SystemZCC::NH;
-      break;
-    }
-    isUnsigned = true;   // FALLTHROUGH
-  case ISD::SETLE:
-  case ISD::SETOLE:
-    TCC = SystemZCC::LE;
-    break;
-  case ISD::SETUGE:
-    if (LHS.getValueType().isFloatingPoint()) {
-      TCC = SystemZCC::NL;
-      break;
-    }
-    isUnsigned = true;   // FALLTHROUGH
-  case ISD::SETGE:
-  case ISD::SETOGE:
-    TCC = SystemZCC::HE;
-    break;
-  case ISD::SETUGT:
-    if (LHS.getValueType().isFloatingPoint()) {
-      TCC = SystemZCC::NLE;
-      break;
-    }
-    isUnsigned = true;  // FALLTHROUGH
-  case ISD::SETGT:
-  case ISD::SETOGT:
-    TCC = SystemZCC::H;
-    break;
-  case ISD::SETULT:
-    if (LHS.getValueType().isFloatingPoint()) {
-      TCC = SystemZCC::NHE;
-      break;
-    }
-    isUnsigned = true;  // FALLTHROUGH
-  case ISD::SETLT:
-  case ISD::SETOLT:
-    TCC = SystemZCC::L;
-    break;
-  }
-
-  SystemZCC = DAG.getConstant(TCC, MVT::i32);
-
-  DebugLoc dl = LHS.getDebugLoc();
-  return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
-                     dl, MVT::i64, LHS, RHS);
-}
-
-
-SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Chain = Op.getOperand(0);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
-  SDValue LHS   = Op.getOperand(2);
-  SDValue RHS   = Op.getOperand(3);
-  SDValue Dest  = Op.getOperand(4);
-  DebugLoc dl   = Op.getDebugLoc();
-
-  SDValue SystemZCC;
-  SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
-  return DAG.getNode(SystemZISD::BRCOND, dl, Op.getValueType(),
-                     Chain, Dest, SystemZCC, Flag);
-}
-
-SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op,
-                                              SelectionDAG &DAG) const {
-  SDValue LHS    = Op.getOperand(0);
-  SDValue RHS    = Op.getOperand(1);
-  SDValue TrueV  = Op.getOperand(2);
-  SDValue FalseV = Op.getOperand(3);
-  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
-  DebugLoc dl   = Op.getDebugLoc();
-
-  SDValue SystemZCC;
-  SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
-
-  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
-  SmallVector<SDValue, 4> Ops;
-  Ops.push_back(TrueV);
-  Ops.push_back(FalseV);
-  Ops.push_back(SystemZCC);
-  Ops.push_back(Flag);
-
-  return DAG.getNode(SystemZISD::SELECT, dl, VTs, &Ops[0], Ops.size());
-}
-
-SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
-                                                  SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
-
-  bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
-  bool ExtraLoadRequired =
-    Subtarget.GVRequiresExtraLoad(GV, getTargetMachine(), false);
-
-  SDValue Result;
-  if (!IsPic && !ExtraLoadRequired) {
-    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
-    Offset = 0;
-  } else {
-    unsigned char OpFlags = 0;
-    if (ExtraLoadRequired)
-      OpFlags = SystemZII::MO_GOTENT;
-
-    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
-  }
-
-  Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl,
-                       getPointerTy(), Result);
-
-  if (ExtraLoadRequired)
-    Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
-                         MachinePointerInfo::getGOT(), false, false, 0);
-
-  // If there was a non-zero offset that we didn't fold, create an explicit
-  // addition for it.
-  if (Offset != 0)
-    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
-                         DAG.getConstant(Offset, getPointerTy()));
-
-  return Result;
-}
-
-// FIXME: PIC here
-SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
-                                              SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
-
-  return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
-}
-
-
-// FIXME: PIC here
-// FIXME: This is just dirty hack. We need to lower cpool properly
-SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-  DebugLoc dl = Op.getDebugLoc();
-  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-
-  SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
-                                             CP->getAlignment(),
-                                             CP->getOffset());
-
-  return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
-}
-
-const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
-  switch (Opcode) {
-  case SystemZISD::RET_FLAG:           return "SystemZISD::RET_FLAG";
-  case SystemZISD::CALL:               return "SystemZISD::CALL";
-  case SystemZISD::BRCOND:             return "SystemZISD::BRCOND";
-  case SystemZISD::CMP:                return "SystemZISD::CMP";
-  case SystemZISD::UCMP:               return "SystemZISD::UCMP";
-  case SystemZISD::SELECT:             return "SystemZISD::SELECT";
-  case SystemZISD::PCRelativeWrapper:  return "SystemZISD::PCRelativeWrapper";
-  default: return NULL;
-  }
-}
-
-//===----------------------------------------------------------------------===//
-//  Other Lowering Code
-//===----------------------------------------------------------------------===//
-
-MachineBasicBlock*
-SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB) const {
-  const SystemZInstrInfo &TII = *TM.getInstrInfo();
-  DebugLoc dl = MI->getDebugLoc();
-  assert((MI->getOpcode() == SystemZ::Select32  ||
-          MI->getOpcode() == SystemZ::SelectF32 ||
-          MI->getOpcode() == SystemZ::Select64  ||
-          MI->getOpcode() == SystemZ::SelectF64) &&
-         "Unexpected instr type to insert");
-
-  // To "insert" a SELECT instruction, we actually have to insert the diamond
-  // control-flow pattern.  The incoming instruction knows the destination vreg
-  // to set, the condition code register to branch on, the true/false values to
-  // select between, and a branch opcode to use.
-  const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator I = BB;
-  ++I;
-
-  //  thisMBB:
-  //  ...
-  //   TrueVal = ...
-  //   cmpTY ccX, r1, r2
-  //   jCC copy1MBB
-  //   fallthrough --> copy0MBB
-  MachineBasicBlock *thisMBB = BB;
-  MachineFunction *F = BB->getParent();
-  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
-  MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
-  SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm();
-  F->insert(I, copy0MBB);
-  F->insert(I, copy1MBB);
-  // Update machine-CFG edges by transferring all successors of the current
-  // block to the new block which will contain the Phi node for the select.
-  copy1MBB->splice(copy1MBB->begin(), BB,
-                   llvm::next(MachineBasicBlock::iterator(MI)),
-                   BB->end());
-  copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
-  // Next, add the true and fallthrough blocks as its successors.
-  BB->addSuccessor(copy0MBB);
-  BB->addSuccessor(copy1MBB);
-
-  BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
-
-  //  copy0MBB:
-  //   %FalseValue = ...
-  //   # fallthrough to copy1MBB
-  BB = copy0MBB;
-
-  // Update machine-CFG edges
-  BB->addSuccessor(copy1MBB);
-
-  //  copy1MBB:
-  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
-  //  ...
-  BB = copy1MBB;
-  BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI),
-          MI->getOperand(0).getReg())
-    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
-    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
-
-  MI->eraseFromParent();   // The pseudo instruction is gone now.
-  return BB;
-}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
deleted file mode 100644
index bab3dc23eead..000000000000
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ /dev/null
@@ -1,145 +0,0 @@
-//==-- SystemZISelLowering.h - SystemZ DAG Lowering Interface ----*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that SystemZ uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
-#define LLVM_TARGET_SystemZ_ISELLOWERING_H
-
-#include "SystemZ.h"
-#include "SystemZRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
-
-namespace llvm {
-  namespace SystemZISD {
-    enum {
-      FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
-      /// Return with a flag operand. Operand 0 is the chain operand.
-      RET_FLAG,
-
-      /// CALL - These operations represent an abstract call
-      /// instruction, which includes a bunch of information.
-      CALL,
-
-      /// PCRelativeWrapper - PC relative address
-      PCRelativeWrapper,
-
-      /// CMP, UCMP - Compare instruction
-      CMP,
-      UCMP,
-
-      /// BRCOND - Conditional branch. Operand 0 is chain operand, operand 1 is
-      /// the block to branch if condition is true, operand 2 is condition code
-      /// and operand 3 is the flag operand produced by a CMP instruction.
-      BRCOND,
-
-      /// SELECT - Operands 0 and 1 are selection variables, operand 2 is
-      /// condition code and operand 3 is the flag operand.
-      SELECT
-    };
-  }
-
-  class SystemZSubtarget;
-  class SystemZTargetMachine;
-
-  class SystemZTargetLowering : public TargetLowering {
-  public:
-    explicit SystemZTargetLowering(SystemZTargetMachine &TM);
-
-    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
-
-    /// LowerOperation - Provide custom lowering hooks for some operations.
-    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-
-    /// getTargetNodeName - This method returns the name of a target specific
-    /// DAG node.
-    virtual const char *getTargetNodeName(unsigned Opcode) const;
-
-    std::pair<unsigned, const TargetRegisterClass*>
-    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
-    TargetLowering::ConstraintType
-    getConstraintType(const std::string &Constraint) const;
-
-    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
-
-    SDValue EmitCmp(SDValue LHS, SDValue RHS,
-                    ISD::CondCode CC, SDValue &SystemZCC,
-                    SelectionDAG &DAG) const;
-
-
-    MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
-                                                   MachineBasicBlock *BB) const;
-
-    /// isFPImmLegal - Returns true if the target can instruction select the
-    /// specified FP immediate natively. If false, the legalizer will
-    /// materialize the FP immediate as a load from a constant pool.
-    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
-
-  private:
-    SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
-                           CallingConv::ID CallConv, bool isVarArg,
-                           bool isTailCall,
-                           const SmallVectorImpl<ISD::OutputArg> &Outs,
-                           const SmallVectorImpl<SDValue> &OutVals,
-                           const SmallVectorImpl<ISD::InputArg> &Ins,
-                           DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals) const;
-
-    SDValue LowerCCCArguments(SDValue Chain,
-                              CallingConv::ID CallConv,
-                              bool isVarArg,
-                              const SmallVectorImpl<ISD::InputArg> &Ins,
-                              DebugLoc dl,
-                              SelectionDAG &DAG,
-                              SmallVectorImpl<SDValue> &InVals) const;
-
-    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
-                            CallingConv::ID CallConv, bool isVarArg,
-                            const SmallVectorImpl<ISD::InputArg> &Ins,
-                            DebugLoc dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals) const;
-
-    virtual SDValue
-      LowerFormalArguments(SDValue Chain,
-                           CallingConv::ID CallConv, bool isVarArg,
-                           const SmallVectorImpl<ISD::InputArg> &Ins,
-                           DebugLoc dl, SelectionDAG &DAG,
-                           SmallVectorImpl<SDValue> &InVals) const;
-    virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
-                const SmallVectorImpl<ISD::OutputArg> &Outs,
-                const SmallVectorImpl<SDValue> &OutVals,
-                const SmallVectorImpl<ISD::InputArg> &Ins,
-                DebugLoc dl, SelectionDAG &DAG,
-                SmallVectorImpl<SDValue> &InVals) const;
-
-    virtual SDValue
-      LowerReturn(SDValue Chain,
-                  CallingConv::ID CallConv, bool isVarArg,
-                  const SmallVectorImpl<ISD::OutputArg> &Outs,
-                  const SmallVectorImpl<SDValue> &OutVals,
-                  DebugLoc dl, SelectionDAG &DAG) const;
-
-    const SystemZSubtarget &Subtarget;
-    const SystemZTargetMachine &TM;
-    const SystemZRegisterInfo *RegInfo;
-  };
-} // namespace llvm
-
-#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
deleted file mode 100644
index ab45ec5984e3..000000000000
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ /dev/null
@@ -1,128 +0,0 @@
-//===- SystemZInstrBuilder.h - Functions to aid building  insts -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file exposes functions that may be used with BuildMI from the
-// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
-//
-// The BuildMem function may be used with the BuildMI function to add entire
-// memory references in a single, typed, function call.
-//
-// For reference, the order of operands for memory references is:
-// (Operand), Base, Displacement, Index.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SYSTEMZINSTRBUILDER_H
-#define SYSTEMZINSTRBUILDER_H
-
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-
-namespace llvm {
-
-/// SystemZAddressMode - This struct holds a generalized full x86 address mode.
-/// The base register can be a frame index, which will eventually be replaced
-/// with R15 or R11 and Disp being offsetted accordingly.
-struct SystemZAddressMode {
-  enum {
-    RegBase,
-    FrameIndexBase
-  } BaseType;
-
-  union {
-    unsigned Reg;
-    int FrameIndex;
-  } Base;
-
-  unsigned IndexReg;
-  int32_t Disp;
-  const GlobalValue *GV;
-
-  SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) {
-    Base.Reg = 0;
-  }
-};
-
-/// addDirectMem - This function is used to add a direct memory reference to the
-/// current instruction -- that is, a dereference of an address in a register,
-/// with no index or displacement.
-///
-static inline const MachineInstrBuilder &
-addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
-  // Because memory references are always represented with 3
-  // values, this adds: Reg, [0, NoReg] to the instruction.
-  return MIB.addReg(Reg).addImm(0).addReg(0);
-}
-
-static inline const MachineInstrBuilder &
-addOffset(const MachineInstrBuilder &MIB, int Offset) {
-  return MIB.addImm(Offset).addReg(0);
-}
-
-/// addRegOffset - This function is used to add a memory reference of the form
-/// [Reg + Offset], i.e., one with no or index, but with a
-/// displacement. An example is: 10(%r15).
-///
-static inline const MachineInstrBuilder &
-addRegOffset(const MachineInstrBuilder &MIB,
-             unsigned Reg, bool isKill, int Offset) {
-  return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
-}
-
-/// addRegReg - This function is used to add a memory reference of the form:
-/// [Reg + Reg].
-static inline const MachineInstrBuilder &
-addRegReg(const MachineInstrBuilder &MIB,
-            unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) {
-  return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(0)
-    .addReg(Reg2, getKillRegState(isKill2));
-}
-
-static inline const MachineInstrBuilder &
-addFullAddress(const MachineInstrBuilder &MIB, const SystemZAddressMode &AM) {
-  if (AM.BaseType == SystemZAddressMode::RegBase)
-    MIB.addReg(AM.Base.Reg);
-  else if (AM.BaseType == SystemZAddressMode::FrameIndexBase)
-    MIB.addFrameIndex(AM.Base.FrameIndex);
-  else
-    assert(0);
-
-  return MIB.addImm(AM.Disp).addReg(AM.IndexReg);
-}
-
-/// addFrameReference - This function is used to add a reference to the base of
-/// an abstract object on the stack frame of the current function.  This
-/// reference has base register as the FrameIndex offset until it is resolved.
-/// This allows a constant offset to be specified as well...
-///
-static inline const MachineInstrBuilder &
-addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
-  MachineInstr *MI = MIB;
-  MachineFunction &MF = *MI->getParent()->getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
-  const MCInstrDesc &MCID = MI->getDesc();
-  unsigned Flags = 0;
-  if (MCID.mayLoad())
-    Flags |= MachineMemOperand::MOLoad;
-  if (MCID.mayStore())
-    Flags |= MachineMemOperand::MOStore;
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo(
-                                PseudoSourceValue::getFixedStack(FI), Offset),
-                            Flags, MFI.getObjectSize(FI),
-                            MFI.getObjectAlignment(FI));
-  return addOffset(MIB.addFrameIndex(FI), Offset)
-            .addMemOperand(MMO);
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
deleted file mode 100644
index a65828061d3b..000000000000
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ /dev/null
@@ -1,340 +0,0 @@
-//===- SystemZInstrFP.td - SystemZ FP Instruction defs --------*- tblgen-*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the SystemZ (binary) floating point instructions in 
-// TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-// FIXME: multiclassify!
-
-//===----------------------------------------------------------------------===//
-// FP Pattern fragments
-
-def fpimm0 : PatLeaf<(fpimm), [{
-  return N->isExactlyValue(+0.0);
-}]>;
-
-def fpimmneg0 : PatLeaf<(fpimm), [{
-  return N->isExactlyValue(-0.0);
-}]>;
-
-let Uses = [PSW], usesCustomInserter = 1 in {
-  def SelectF32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, i8imm:$cc),
-                        "# SelectF32 PSEUDO",
-                        [(set FP32:$dst,
-                              (SystemZselect FP32:$src1, FP32:$src2, imm:$cc, PSW))]>;
-  def SelectF64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, i8imm:$cc),
-                        "# SelectF64 PSEUDO",
-                        [(set FP64:$dst,
-                              (SystemZselect FP64:$src1, FP64:$src2, imm:$cc, PSW))]>;
-}
-
-//===----------------------------------------------------------------------===//
-// Move Instructions
-
-// Floating point constant loads.
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def LD_Fp032 : Pseudo<(outs FP32:$dst), (ins),
-                      "lzer\t{$dst}",
-                      [(set FP32:$dst, fpimm0)]>;
-def LD_Fp064 : Pseudo<(outs FP64:$dst), (ins),
-                      "lzdr\t{$dst}",
-                      [(set FP64:$dst, fpimm0)]>;
-}
-
-let neverHasSideEffects = 1 in {
-def FMOV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
-                      "ler\t{$dst, $src}",
-                      []>;
-def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
-                      "ldr\t{$dst, $src}",
-                      []>;
-}
-
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def FMOV32rm  : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
-                      "le\t{$dst, $src}",
-                      [(set FP32:$dst, (load rriaddr12:$src))]>;
-def FMOV32rmy : Pseudo<(outs FP32:$dst), (ins rriaddr:$src),
-                      "ley\t{$dst, $src}",
-                      [(set FP32:$dst, (load rriaddr:$src))]>;
-def FMOV64rm  : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
-                      "ld\t{$dst, $src}",
-                      [(set FP64:$dst, (load rriaddr12:$src))]>;
-def FMOV64rmy : Pseudo<(outs FP64:$dst), (ins rriaddr:$src),
-                      "ldy\t{$dst, $src}",
-                      [(set FP64:$dst, (load rriaddr:$src))]>;
-}
-
-def FMOV32mr  : Pseudo<(outs), (ins rriaddr12:$dst, FP32:$src),
-                       "ste\t{$src, $dst}",
-                       [(store FP32:$src, rriaddr12:$dst)]>;
-def FMOV32mry : Pseudo<(outs), (ins rriaddr:$dst, FP32:$src),
-                       "stey\t{$src, $dst}",
-                       [(store FP32:$src, rriaddr:$dst)]>;
-def FMOV64mr  : Pseudo<(outs), (ins rriaddr12:$dst, FP64:$src),
-                       "std\t{$src, $dst}",
-                       [(store FP64:$src, rriaddr12:$dst)]>;
-def FMOV64mry : Pseudo<(outs), (ins rriaddr:$dst, FP64:$src),
-                       "stdy\t{$src, $dst}",
-                       [(store FP64:$src, rriaddr:$dst)]>;
-
-def FCOPYSIGN32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
-                         "cpsdr\t{$dst, $src2, $src1}",
-                         [(set FP32:$dst, (fcopysign FP32:$src1, FP32:$src2))]>;
-def FCOPYSIGN64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
-                         "cpsdr\t{$dst, $src2, $src1}",
-                         [(set FP64:$dst, (fcopysign FP64:$src1, FP64:$src2))]>;
-
-//===----------------------------------------------------------------------===//
-// Arithmetic Instructions
-
-
-let Defs = [PSW] in {
-def FNEG32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
-                       "lcebr\t{$dst, $src}",
-                       [(set FP32:$dst, (fneg FP32:$src)),
-                        (implicit PSW)]>;
-def FNEG64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
-                       "lcdbr\t{$dst, $src}",
-                       [(set FP64:$dst, (fneg FP64:$src)),
-                        (implicit PSW)]>;
-
-def FABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
-                       "lpebr\t{$dst, $src}",
-                       [(set FP32:$dst, (fabs FP32:$src)),
-                        (implicit PSW)]>;
-def FABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
-                       "lpdbr\t{$dst, $src}",
-                       [(set FP64:$dst, (fabs FP64:$src)),
-                        (implicit PSW)]>;
-
-def FNABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
-                       "lnebr\t{$dst, $src}",
-                       [(set FP32:$dst, (fneg(fabs FP32:$src))),
-                        (implicit PSW)]>;
-def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
-                       "lndbr\t{$dst, $src}",
-                       [(set FP64:$dst, (fneg(fabs FP64:$src))),
-                        (implicit PSW)]>;
-}
-
-let Constraints = "$src1 = $dst" in {
-let Defs = [PSW] in {
-let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
-def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
-                       "aebr\t{$dst, $src2}",
-                       [(set FP32:$dst, (fadd FP32:$src1, FP32:$src2)),
-                        (implicit PSW)]>;
-def FADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
-                       "adbr\t{$dst, $src2}",
-                       [(set FP64:$dst, (fadd FP64:$src1, FP64:$src2)),
-                        (implicit PSW)]>;
-}
-
-def FADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
-                       "aeb\t{$dst, $src2}",
-                       [(set FP32:$dst, (fadd FP32:$src1, (load rriaddr12:$src2))),
-                        (implicit PSW)]>;
-def FADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
-                       "adb\t{$dst, $src2}",
-                       [(set FP64:$dst, (fadd FP64:$src1, (load rriaddr12:$src2))),
-                        (implicit PSW)]>;
-
-def FSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
-                       "sebr\t{$dst, $src2}",
-                       [(set FP32:$dst, (fsub FP32:$src1, FP32:$src2)),
-                        (implicit PSW)]>;
-def FSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
-                       "sdbr\t{$dst, $src2}",
-                       [(set FP64:$dst, (fsub FP64:$src1, FP64:$src2)),
-                        (implicit PSW)]>;
-
-def FSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
-                       "seb\t{$dst, $src2}",
-                       [(set FP32:$dst, (fsub FP32:$src1, (load rriaddr12:$src2))),
-                        (implicit PSW)]>;
-def FSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
-                       "sdb\t{$dst, $src2}",
-                       [(set FP64:$dst, (fsub FP64:$src1, (load rriaddr12:$src2))),
-                        (implicit PSW)]>;
-} // Defs = [PSW]
-
-let isCommutable = 1 in { // X = MUL Y, Z  == X = MUL Z, Y
-def FMUL32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
-                       "meebr\t{$dst, $src2}",
-                       [(set FP32:$dst, (fmul FP32:$src1, FP32:$src2))]>;
-def FMUL64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
-                       "mdbr\t{$dst, $src2}",
-                       [(set FP64:$dst, (fmul FP64:$src1, FP64:$src2))]>;
-}
-
-def FMUL32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
-                       "meeb\t{$dst, $src2}",
-                       [(set FP32:$dst, (fmul FP32:$src1, (load rriaddr12:$src2)))]>;
-def FMUL64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
-                       "mdb\t{$dst, $src2}",
-                       [(set FP64:$dst, (fmul FP64:$src1, (load rriaddr12:$src2)))]>;
-
-def FMADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
-                       "maebr\t{$dst, $src3, $src2}",
-                       [(set FP32:$dst, (fadd (fmul FP32:$src2, FP32:$src3),
-                                              FP32:$src1))]>;
-def FMADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
-                       "maeb\t{$dst, $src3, $src2}",
-                       [(set FP32:$dst, (fadd (fmul (load rriaddr12:$src2),
-                                                     FP32:$src3),
-                                              FP32:$src1))]>;
-
-def FMADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
-                       "madbr\t{$dst, $src3, $src2}",
-                       [(set FP64:$dst, (fadd (fmul FP64:$src2, FP64:$src3),
-                                              FP64:$src1))]>;
-def FMADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
-                       "madb\t{$dst, $src3, $src2}",
-                       [(set FP64:$dst, (fadd (fmul (load rriaddr12:$src2),
-                                                     FP64:$src3),
-                                              FP64:$src1))]>;
-
-def FMSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
-                       "msebr\t{$dst, $src3, $src2}",
-                       [(set FP32:$dst, (fsub (fmul FP32:$src2, FP32:$src3),
-                                              FP32:$src1))]>;
-def FMSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
-                       "mseb\t{$dst, $src3, $src2}",
-                       [(set FP32:$dst, (fsub (fmul (load rriaddr12:$src2),
-                                                     FP32:$src3),
-                                              FP32:$src1))]>;
-
-def FMSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
-                       "msdbr\t{$dst, $src3, $src2}",
-                       [(set FP64:$dst, (fsub (fmul FP64:$src2, FP64:$src3),
-                                              FP64:$src1))]>;
-def FMSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
-                       "msdb\t{$dst, $src3, $src2}",
-                       [(set FP64:$dst, (fsub (fmul (load rriaddr12:$src2),
-                                                     FP64:$src3),
-                                              FP64:$src1))]>;
-
-def FDIV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
-                       "debr\t{$dst, $src2}",
-                       [(set FP32:$dst, (fdiv FP32:$src1, FP32:$src2))]>;
-def FDIV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
-                       "ddbr\t{$dst, $src2}",
-                       [(set FP64:$dst, (fdiv FP64:$src1, FP64:$src2))]>;
-
-def FDIV32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
-                       "deb\t{$dst, $src2}",
-                       [(set FP32:$dst, (fdiv FP32:$src1, (load rriaddr12:$src2)))]>;
-def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
-                       "ddb\t{$dst, $src2}",
-                       [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>;
-
-} // Constraints = "$src1 = $dst"
-
-def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
-                       "sqebr\t{$dst, $src}",
-                       [(set FP32:$dst, (fsqrt FP32:$src))]>;
-def FSQRT64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
-                       "sqdbr\t{$dst, $src}",
-                       [(set FP64:$dst, (fsqrt FP64:$src))]>;
-
-def FSQRT32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
-                       "sqeb\t{$dst, $src}",
-                       [(set FP32:$dst, (fsqrt (load rriaddr12:$src)))]>;
-def FSQRT64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
-                       "sqdb\t{$dst, $src}",
-                       [(set FP64:$dst, (fsqrt (load rriaddr12:$src)))]>;
-
-def FROUND64r32 : Pseudo<(outs FP32:$dst), (ins FP64:$src),
-                         "ledbr\t{$dst, $src}",
-                         [(set FP32:$dst, (fround FP64:$src))]>;
-
-def FEXT32r64   : Pseudo<(outs FP64:$dst), (ins FP32:$src),
-                         "ldebr\t{$dst, $src}",
-                         [(set FP64:$dst, (fextend FP32:$src))]>;
-def FEXT32m64   : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
-                         "ldeb\t{$dst, $src}",
-                         [(set FP64:$dst, (fextend (load rriaddr12:$src)))]>;
-
-let Defs = [PSW] in {
-def FCONVFP32   : Pseudo<(outs FP32:$dst), (ins GR32:$src),
-                         "cefbr\t{$dst, $src}",
-                         [(set FP32:$dst, (sint_to_fp GR32:$src)),
-                          (implicit PSW)]>;
-def FCONVFP32r64: Pseudo<(outs FP32:$dst), (ins GR64:$src),
-                         "cegbr\t{$dst, $src}",
-                         [(set FP32:$dst, (sint_to_fp GR64:$src)),
-                          (implicit PSW)]>;
-
-def FCONVFP64r32: Pseudo<(outs FP64:$dst), (ins GR32:$src),
-                         "cdfbr\t{$dst, $src}",
-                         [(set FP64:$dst, (sint_to_fp GR32:$src)),
-                          (implicit PSW)]>;
-def FCONVFP64   : Pseudo<(outs FP64:$dst), (ins GR64:$src),
-                         "cdgbr\t{$dst, $src}",
-                         [(set FP64:$dst, (sint_to_fp GR64:$src)),
-                          (implicit PSW)]>;
-
-def FCONVGR32   : Pseudo<(outs GR32:$dst), (ins FP32:$src),
-                         "cfebr\t{$dst, 5, $src}",
-                         [(set GR32:$dst, (fp_to_sint FP32:$src)),
-                          (implicit PSW)]>;
-def FCONVGR32r64: Pseudo<(outs GR32:$dst), (ins FP64:$src),
-                         "cfdbr\t{$dst, 5, $src}",
-                         [(set GR32:$dst, (fp_to_sint FP64:$src)),
-                          (implicit PSW)]>;
-
-def FCONVGR64r32: Pseudo<(outs GR64:$dst), (ins FP32:$src),
-                         "cgebr\t{$dst, 5, $src}",
-                         [(set GR64:$dst, (fp_to_sint FP32:$src)),
-                          (implicit PSW)]>;
-def FCONVGR64   : Pseudo<(outs GR64:$dst), (ins FP64:$src),
-                         "cgdbr\t{$dst, 5, $src}",
-                         [(set GR64:$dst, (fp_to_sint FP64:$src)),
-                          (implicit PSW)]>;
-} // Defs = [PSW]
-
-def FBCONVG64   : Pseudo<(outs GR64:$dst), (ins FP64:$src),
-                         "lgdr\t{$dst, $src}",
-                         [(set GR64:$dst, (bitconvert FP64:$src))]>;
-def FBCONVF64   : Pseudo<(outs FP64:$dst), (ins GR64:$src),
-                         "ldgr\t{$dst, $src}",
-                         [(set FP64:$dst, (bitconvert GR64:$src))]>;
-
-//===----------------------------------------------------------------------===//
-// Test instructions (like AND but do not produce any result)
-
-// Integer comparisons
-let Defs = [PSW] in {
-def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2),
-                      "cebr\t$src1, $src2",
-                      [(set PSW, (SystemZcmp FP32:$src1, FP32:$src2))]>;
-def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2),
-                      "cdbr\t$src1, $src2",
-                      [(set PSW, (SystemZcmp FP64:$src1, FP64:$src2))]>;
-
-def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2),
-                      "ceb\t$src1, $src2",
-                      [(set PSW, (SystemZcmp FP32:$src1,
-                                             (load rriaddr12:$src2)))]>;
-def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2),
-                      "cdb\t$src1, $src2",
-                      [(set PSW, (SystemZcmp FP64:$src1,
-                                             (load rriaddr12:$src2)))]>;
-} // Defs = [PSW]
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// Floating point constant -0.0
-def : Pat<(f32 fpimmneg0), (FNEG32rr (LD_Fp032))>;
-def : Pat<(f64 fpimmneg0), (FNEG64rr (LD_Fp064))>;
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
deleted file mode 100644
index b4a8993c1971..000000000000
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ /dev/null
@@ -1,133 +0,0 @@
-//===- SystemZInstrFormats.td - SystemZ Instruction Formats ----*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-
-// Format specifies the encoding used by the instruction.  This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-class Format<bits<5> val> {
-  bits<5> Value = val;
-}
-
-def Pseudo   : Format<0>;
-def EForm    : Format<1>;
-def IForm    : Format<2>;
-def RIForm   : Format<3>;
-def RIEForm  : Format<4>;
-def RILForm  : Format<5>;
-def RISForm  : Format<6>;
-def RRForm   : Format<7>;
-def RREForm  : Format<8>;
-def RRFForm  : Format<9>;
-def RRRForm  : Format<10>;
-def RRSForm  : Format<11>;
-def RSForm   : Format<12>;
-def RSIForm  : Format<13>;
-def RSILForm : Format<14>;
-def RSYForm  : Format<15>;
-def RXForm   : Format<16>;
-def RXEForm  : Format<17>;
-def RXFForm  : Format<18>;
-def RXYForm  : Format<19>;
-def SForm    : Format<20>;
-def SIForm   : Format<21>;
-def SILForm  : Format<22>;
-def SIYForm  : Format<23>;
-def SSForm   : Format<24>;
-def SSEForm  : Format<25>;
-def SSFForm  : Format<26>;
-
-class InstSystemZ<bits<16> op, Format f, dag outs, dag ins> : Instruction {
-  let Namespace = "SystemZ";
-
-  bits<16> Opcode = op;
-
-  Format Form = f;
-  bits<5> FormBits = Form.Value;
-
-  dag OutOperandList = outs;
-  dag InOperandList = ins;
-}
-
-class I8<bits<8> op, Format f, dag outs, dag ins, string asmstr, 
-         list<dag> pattern> 
-  : InstSystemZ<0, f, outs, ins> {
-  let Opcode{0-7} = op;
-  let Opcode{8-15} = 0;
-
-  let Pattern = pattern;
-  let AsmString = asmstr;
-}
-
-class I12<bits<12> op, Format f, dag outs, dag ins, string asmstr, 
-         list<dag> pattern> 
-  : InstSystemZ<0, f, outs, ins> {
-  let Opcode{0-11} = op;
-  let Opcode{12-15} = 0;
-
-  let Pattern = pattern;
-  let AsmString = asmstr;
-}
-
-class I16<bits<16> op, Format f, dag outs, dag ins, string asmstr,
-         list<dag> pattern>
-  : InstSystemZ<op, f, outs, ins> {
-  let Pattern = pattern;
-  let AsmString = asmstr;
-}
-
-class RRI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I8<op, RRForm, outs, ins, asmstr, pattern>;
-
-class RII<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I12<op, RIForm, outs, ins, asmstr, pattern>;
-
-class RILI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I12<op, RILForm, outs, ins, asmstr, pattern>;
-
-class RREI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I16<op, RREForm, outs, ins, asmstr, pattern>;
-
-class RXI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I8<op, RXForm, outs, ins, asmstr, pattern> {
-  let AddedComplexity = 1;
-}
-
-class RXYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I16<op, RXYForm, outs, ins, asmstr, pattern>;
-
-class RSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I8<op, RSForm, outs, ins, asmstr, pattern> {
-  let AddedComplexity = 1;
-}
-
-class RSYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I16<op, RSYForm, outs, ins, asmstr, pattern>;
-
-class SII<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I8<op, SIForm, outs, ins, asmstr, pattern> {
-  let AddedComplexity = 1;
-}
-
-class SIYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I16<op, SIYForm, outs, ins, asmstr, pattern>;
-
-class SILI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-  : I16<op, SILForm, outs, ins, asmstr, pattern>;
-
-
-//===----------------------------------------------------------------------===//
-// Pseudo instructions
-//===----------------------------------------------------------------------===//
-
-class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
-   : InstSystemZ<0, Pseudo, outs, ins> {
-
-  let Pattern = pattern;
-  let AsmString = asmstr;
-}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
deleted file mode 100644
index 5f3dd80f2d90..000000000000
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ /dev/null
@@ -1,439 +0,0 @@
-//===- SystemZInstrInfo.cpp - SystemZ Instruction Information --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SystemZ implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZ.h"
-#include "SystemZInstrBuilder.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZMachineFunctionInfo.h"
-#include "SystemZTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_CTOR
-#include "SystemZGenInstrInfo.inc"
-
-using namespace llvm;
-
-SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
-  : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
-    RI(tm, *this), TM(tm) {
-}
-
-/// isGVStub - Return true if the GV requires an extra load to get the
-/// real address.
-static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) {
-  return TM.getSubtarget<SystemZSubtarget>().GVRequiresExtraLoad(GV, TM, false);
-}
-
-void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator MI,
-                                    unsigned SrcReg, bool isKill, int FrameIdx,
-                                           const TargetRegisterClass *RC,
-                                           const TargetRegisterInfo *TRI) const {
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  unsigned Opc = 0;
-  if (RC == &SystemZ::GR32RegClass ||
-      RC == &SystemZ::ADDR32RegClass)
-    Opc = SystemZ::MOV32mr;
-  else if (RC == &SystemZ::GR64RegClass ||
-           RC == &SystemZ::ADDR64RegClass) {
-    Opc = SystemZ::MOV64mr;
-  } else if (RC == &SystemZ::FP32RegClass) {
-    Opc = SystemZ::FMOV32mr;
-  } else if (RC == &SystemZ::FP64RegClass) {
-    Opc = SystemZ::FMOV64mr;
-  } else if (RC == &SystemZ::GR64PRegClass) {
-    Opc = SystemZ::MOV64Pmr;
-  } else if (RC == &SystemZ::GR128RegClass) {
-    Opc = SystemZ::MOV128mr;
-  } else
-    llvm_unreachable("Unsupported regclass to store");
-
-  addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
-    .addReg(SrcReg, getKillRegState(isKill));
-}
-
-void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                           unsigned DestReg, int FrameIdx,
-                                            const TargetRegisterClass *RC,
-                                            const TargetRegisterInfo *TRI) const{
-  DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  unsigned Opc = 0;
-  if (RC == &SystemZ::GR32RegClass ||
-      RC == &SystemZ::ADDR32RegClass)
-    Opc = SystemZ::MOV32rm;
-  else if (RC == &SystemZ::GR64RegClass ||
-           RC == &SystemZ::ADDR64RegClass) {
-    Opc = SystemZ::MOV64rm;
-  } else if (RC == &SystemZ::FP32RegClass) {
-    Opc = SystemZ::FMOV32rm;
-  } else if (RC == &SystemZ::FP64RegClass) {
-    Opc = SystemZ::FMOV64rm;
-  } else if (RC == &SystemZ::GR64PRegClass) {
-    Opc = SystemZ::MOV64Prm;
-  } else if (RC == &SystemZ::GR128RegClass) {
-    Opc = SystemZ::MOV128rm;
-  } else
-    llvm_unreachable("Unsupported regclass to load");
-
-  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
-}
-
-void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator I, DebugLoc DL,
-                                   unsigned DestReg, unsigned SrcReg,
-                                   bool KillSrc) const {
-  unsigned Opc;
-  if (SystemZ::GR64RegClass.contains(DestReg, SrcReg))
-    Opc = SystemZ::MOV64rr;
-  else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg))
-    Opc = SystemZ::MOV32rr;
-  else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg))
-    Opc = SystemZ::MOV64rrP;
-  else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg))
-    Opc = SystemZ::MOV128rr;
-  else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg))
-    Opc = SystemZ::FMOV32rr;
-  else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg))
-    Opc = SystemZ::FMOV64rr;
-  else
-    llvm_unreachable("Impossible reg-to-reg copy");
-
-  BuildMI(MBB, I, DL, get(Opc), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
-}
-
-unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                               int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case SystemZ::MOV32rm:
-  case SystemZ::MOV32rmy:
-  case SystemZ::MOV64rm:
-  case SystemZ::MOVSX32rm8:
-  case SystemZ::MOVSX32rm16y:
-  case SystemZ::MOVSX64rm8:
-  case SystemZ::MOVSX64rm16:
-  case SystemZ::MOVSX64rm32:
-  case SystemZ::MOVZX32rm8:
-  case SystemZ::MOVZX32rm16:
-  case SystemZ::MOVZX64rm8:
-  case SystemZ::MOVZX64rm16:
-  case SystemZ::MOVZX64rm32:
-  case SystemZ::FMOV32rm:
-  case SystemZ::FMOV32rmy:
-  case SystemZ::FMOV64rm:
-  case SystemZ::FMOV64rmy:
-  case SystemZ::MOV64Prm:
-  case SystemZ::MOV64Prmy:
-  case SystemZ::MOV128rm:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() && MI->getOperand(3).isReg() &&
-        MI->getOperand(2).getImm() == 0 && MI->getOperand(3).getReg() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  return 0;
-}
-
-unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                              int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case SystemZ::MOV32mr:
-  case SystemZ::MOV32mry:
-  case SystemZ::MOV64mr:
-  case SystemZ::MOV32m8r:
-  case SystemZ::MOV32m8ry:
-  case SystemZ::MOV32m16r:
-  case SystemZ::MOV32m16ry:
-  case SystemZ::MOV64m8r:
-  case SystemZ::MOV64m8ry:
-  case SystemZ::MOV64m16r:
-  case SystemZ::MOV64m16ry:
-  case SystemZ::MOV64m32r:
-  case SystemZ::MOV64m32ry:
-  case SystemZ::FMOV32mr:
-  case SystemZ::FMOV32mry:
-  case SystemZ::FMOV64mr:
-  case SystemZ::FMOV64mry:
-  case SystemZ::MOV64Pmr:
-  case SystemZ::MOV64Pmry:
-  case SystemZ::MOV128mr:
-    if (MI->getOperand(0).isFI() &&
-        MI->getOperand(1).isImm() && MI->getOperand(2).isReg() &&
-        MI->getOperand(1).getImm() == 0 && MI->getOperand(2).getReg() == 0) {
-      FrameIndex = MI->getOperand(0).getIndex();
-      return MI->getOperand(3).getReg();
-    }
-    break;
-  }
-  return 0;
-}
-
-bool SystemZInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
-  assert(Cond.size() == 1 && "Invalid Xbranch condition!");
-
-  SystemZCC::CondCodes CC = static_cast<SystemZCC::CondCodes>(Cond[0].getImm());
-  Cond[0].setImm(getOppositeCondition(CC));
-  return false;
-}
-
-bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isTerminator()) return false;
-
-  // Conditional branch is a special case.
-  if (MCID.isBranch() && !MCID.isBarrier())
-    return true;
-  if (!MCID.isPredicable())
-    return true;
-  return !isPredicated(MI);
-}
-
-bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
-                                     MachineBasicBlock *&TBB,
-                                     MachineBasicBlock *&FBB,
-                                     SmallVectorImpl<MachineOperand> &Cond,
-                                     bool AllowModify) const {
-  // Start from the bottom of the block and work up, examining the
-  // terminator instructions.
-  MachineBasicBlock::iterator I = MBB.end();
-  while (I != MBB.begin()) {
-    --I;
-    if (I->isDebugValue())
-      continue;
-    // Working from the bottom, when we see a non-terminator
-    // instruction, we're done.
-    if (!isUnpredicatedTerminator(I))
-      break;
-
-    // A terminator that isn't a branch can't easily be handled
-    // by this analysis.
-    if (!I->getDesc().isBranch())
-      return true;
-
-    // Handle unconditional branches.
-    if (I->getOpcode() == SystemZ::JMP) {
-      if (!AllowModify) {
-        TBB = I->getOperand(0).getMBB();
-        continue;
-      }
-
-      // If the block has any instructions after a JMP, delete them.
-      while (llvm::next(I) != MBB.end())
-        llvm::next(I)->eraseFromParent();
-      Cond.clear();
-      FBB = 0;
-
-      // Delete the JMP if it's equivalent to a fall-through.
-      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
-        TBB = 0;
-        I->eraseFromParent();
-        I = MBB.end();
-        continue;
-      }
-
-      // TBB is used to indicate the unconditinal destination.
-      TBB = I->getOperand(0).getMBB();
-      continue;
-    }
-
-    // Handle conditional branches.
-    SystemZCC::CondCodes BranchCode = getCondFromBranchOpc(I->getOpcode());
-    if (BranchCode == SystemZCC::INVALID)
-      return true;  // Can't handle indirect branch.
-
-    // Working from the bottom, handle the first conditional branch.
-    if (Cond.empty()) {
-      FBB = TBB;
-      TBB = I->getOperand(0).getMBB();
-      Cond.push_back(MachineOperand::CreateImm(BranchCode));
-      continue;
-    }
-
-    // Handle subsequent conditional branches. Only handle the case where all
-    // conditional branches branch to the same destination.
-    assert(Cond.size() == 1);
-    assert(TBB);
-
-    // Only handle the case where all conditional branches branch to
-    // the same destination.
-    if (TBB != I->getOperand(0).getMBB())
-      return true;
-
-    SystemZCC::CondCodes OldBranchCode = (SystemZCC::CondCodes)Cond[0].getImm();
-    // If the conditions are the same, we can leave them alone.
-    if (OldBranchCode == BranchCode)
-      continue;
-
-    return true;
-  }
-
-  return false;
-}
-
-unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator I = MBB.end();
-  unsigned Count = 0;
-
-  while (I != MBB.begin()) {
-    --I;
-    if (I->isDebugValue())
-      continue;
-    if (I->getOpcode() != SystemZ::JMP &&
-        getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID)
-      break;
-    // Remove the branch.
-    I->eraseFromParent();
-    I = MBB.end();
-    ++Count;
-  }
-
-  return Count;
-}
-
-unsigned
-SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                               MachineBasicBlock *FBB,
-                               const SmallVectorImpl<MachineOperand> &Cond,
-                               DebugLoc DL) const {
-  // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-  assert((Cond.size() == 1 || Cond.size() == 0) &&
-         "SystemZ branch conditions have one component!");
-
-  if (Cond.empty()) {
-    // Unconditional branch?
-    assert(!FBB && "Unconditional branch with multiple successors!");
-    BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(TBB);
-    return 1;
-  }
-
-  // Conditional branch.
-  unsigned Count = 0;
-  SystemZCC::CondCodes CC = (SystemZCC::CondCodes)Cond[0].getImm();
-  BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
-  ++Count;
-
-  if (FBB) {
-    // Two-way Conditional branch. Insert the second branch.
-    BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(FBB);
-    ++Count;
-  }
-  return Count;
-}
-
-const MCInstrDesc&
-SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const {
-  switch (CC) {
-  default:
-   llvm_unreachable("Unknown condition code!");
-  case SystemZCC::O:   return get(SystemZ::JO);
-  case SystemZCC::H:   return get(SystemZ::JH);
-  case SystemZCC::NLE: return get(SystemZ::JNLE);
-  case SystemZCC::L:   return get(SystemZ::JL);
-  case SystemZCC::NHE: return get(SystemZ::JNHE);
-  case SystemZCC::LH:  return get(SystemZ::JLH);
-  case SystemZCC::NE:  return get(SystemZ::JNE);
-  case SystemZCC::E:   return get(SystemZ::JE);
-  case SystemZCC::NLH: return get(SystemZ::JNLH);
-  case SystemZCC::HE:  return get(SystemZ::JHE);
-  case SystemZCC::NL:  return get(SystemZ::JNL);
-  case SystemZCC::LE:  return get(SystemZ::JLE);
-  case SystemZCC::NH:  return get(SystemZ::JNH);
-  case SystemZCC::NO:  return get(SystemZ::JNO);
-  }
-}
-
-SystemZCC::CondCodes
-SystemZInstrInfo::getCondFromBranchOpc(unsigned Opc) const {
-  switch (Opc) {
-  default:            return SystemZCC::INVALID;
-  case SystemZ::JO:   return SystemZCC::O;
-  case SystemZ::JH:   return SystemZCC::H;
-  case SystemZ::JNLE: return SystemZCC::NLE;
-  case SystemZ::JL:   return SystemZCC::L;
-  case SystemZ::JNHE: return SystemZCC::NHE;
-  case SystemZ::JLH:  return SystemZCC::LH;
-  case SystemZ::JNE:  return SystemZCC::NE;
-  case SystemZ::JE:   return SystemZCC::E;
-  case SystemZ::JNLH: return SystemZCC::NLH;
-  case SystemZ::JHE:  return SystemZCC::HE;
-  case SystemZ::JNL:  return SystemZCC::NL;
-  case SystemZ::JLE:  return SystemZCC::LE;
-  case SystemZ::JNH:  return SystemZCC::NH;
-  case SystemZ::JNO:  return SystemZCC::NO;
-  }
-}
-
-SystemZCC::CondCodes
-SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const {
-  switch (CC) {
-  default:
-    llvm_unreachable("Invalid condition!");
-  case SystemZCC::O:   return SystemZCC::NO;
-  case SystemZCC::H:   return SystemZCC::NH;
-  case SystemZCC::NLE: return SystemZCC::LE;
-  case SystemZCC::L:   return SystemZCC::NL;
-  case SystemZCC::NHE: return SystemZCC::HE;
-  case SystemZCC::LH:  return SystemZCC::NLH;
-  case SystemZCC::NE:  return SystemZCC::E;
-  case SystemZCC::E:   return SystemZCC::NE;
-  case SystemZCC::NLH: return SystemZCC::LH;
-  case SystemZCC::HE:  return SystemZCC::NHE;
-  case SystemZCC::NL:  return SystemZCC::L;
-  case SystemZCC::LE:  return SystemZCC::NLE;
-  case SystemZCC::NH:  return SystemZCC::H;
-  case SystemZCC::NO:  return SystemZCC::O;
-  }
-}
-
-const MCInstrDesc&
-SystemZInstrInfo::getLongDispOpc(unsigned Opc) const {
-  switch (Opc) {
-  default:
-    llvm_unreachable("Don't have long disp version of this instruction");
-  case SystemZ::MOV32mr:   return get(SystemZ::MOV32mry);
-  case SystemZ::MOV32rm:   return get(SystemZ::MOV32rmy);
-  case SystemZ::MOVSX32rm16: return get(SystemZ::MOVSX32rm16y);
-  case SystemZ::MOV32m8r:  return get(SystemZ::MOV32m8ry);
-  case SystemZ::MOV32m16r: return get(SystemZ::MOV32m16ry);
-  case SystemZ::MOV64m8r:  return get(SystemZ::MOV64m8ry);
-  case SystemZ::MOV64m16r: return get(SystemZ::MOV64m16ry);
-  case SystemZ::MOV64m32r: return get(SystemZ::MOV64m32ry);
-  case SystemZ::MOV8mi:    return get(SystemZ::MOV8miy);
-  case SystemZ::MUL32rm:   return get(SystemZ::MUL32rmy);
-  case SystemZ::CMP32rm:   return get(SystemZ::CMP32rmy);
-  case SystemZ::UCMP32rm:  return get(SystemZ::UCMP32rmy);
-  case SystemZ::FMOV32mr:  return get(SystemZ::FMOV32mry);
-  case SystemZ::FMOV64mr:  return get(SystemZ::FMOV64mry);
-  case SystemZ::FMOV32rm:  return get(SystemZ::FMOV32rmy);
-  case SystemZ::FMOV64rm:  return get(SystemZ::FMOV64rmy);
-  case SystemZ::MOV64Pmr:  return get(SystemZ::MOV64Pmry);
-  case SystemZ::MOV64Prm:  return get(SystemZ::MOV64Prmy);
-  }
-}
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
deleted file mode 100644
index 6a31e9496365..000000000000
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ /dev/null
@@ -1,113 +0,0 @@
-//===- SystemZInstrInfo.h - SystemZ Instruction Information -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SystemZ implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
-#define LLVM_TARGET_SYSTEMZINSTRINFO_H
-
-#include "SystemZ.h"
-#include "SystemZRegisterInfo.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "SystemZGenInstrInfo.inc"
-
-namespace llvm {
-
-class SystemZTargetMachine;
-
-/// SystemZII - This namespace holds all of the target specific flags that
-/// instruction info tracks.
-///
-namespace SystemZII {
-  enum {
-    //===------------------------------------------------------------------===//
-    // SystemZ Specific MachineOperand flags.
-
-    MO_NO_FLAG = 0,
-
-    /// MO_GOTENT - On a symbol operand this indicates that the immediate is
-    /// the offset to the location of the symbol name from the base of the GOT.
-    ///
-    ///    SYMBOL_LABEL @GOTENT
-    MO_GOTENT = 1,
-
-    /// MO_PLT - On a symbol operand this indicates that the immediate is
-    /// offset to the PLT entry of symbol name from the current code location.
-    ///
-    ///    SYMBOL_LABEL @PLT
-    MO_PLT = 2
-  };
-}
-
-class SystemZInstrInfo : public SystemZGenInstrInfo {
-  const SystemZRegisterInfo RI;
-  SystemZTargetMachine &TM;
-public:
-  explicit SystemZInstrInfo(SystemZTargetMachine &TM);
-
-  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-  /// such, whenever a client has an instance of instruction info, it should
-  /// always be able to get register info as well (through this method).
-  ///
-  virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
-
-  virtual void copyPhysReg(MachineBasicBlock &MBB,
-                           MachineBasicBlock::iterator I, DebugLoc DL,
-                           unsigned DestReg, unsigned SrcReg,
-                           bool KillSrc) const;
-
-  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
-  unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
-
-  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MI,
-                                   unsigned SrcReg, bool isKill,
-                                   int FrameIndex,
-                                   const TargetRegisterClass *RC,
-                                   const TargetRegisterInfo *TRI) const;
-  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
-                                    MachineBasicBlock::iterator MI,
-                                    unsigned DestReg, int FrameIdx,
-                                    const TargetRegisterClass *RC,
-                                    const TargetRegisterInfo *TRI) const;
-
-  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
-  virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
-                             MachineBasicBlock *&TBB,
-                             MachineBasicBlock *&FBB,
-                             SmallVectorImpl<MachineOperand> &Cond,
-                             bool AllowModify) const;
-  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                                MachineBasicBlock *FBB,
-                                const SmallVectorImpl<MachineOperand> &Cond,
-                                DebugLoc DL) const;
-  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
-  SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
-  SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const;
-  const MCInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
-  const MCInstrDesc& getLongDispOpc(unsigned Opc) const;
-
-  const MCInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
-    if (Offset < 0 || Offset >= 4096)
-      return getLongDispOpc(Opc);
-    else
-      return get(Opc);
-  }
-};
-
-}
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
deleted file mode 100644
index 580d65b27f8a..000000000000
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ /dev/null
@@ -1,1147 +0,0 @@
-//===- SystemZInstrInfo.td - SystemZ Instruction defs ---------*- tblgen-*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the SystemZ instructions in TableGen format.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// SystemZ Instruction Predicate Definitions.
-def IsZ10 : Predicate<"Subtarget.isZ10()">;
-
-include "SystemZInstrFormats.td"
-
-//===----------------------------------------------------------------------===//
-// Type Constraints.
-//===----------------------------------------------------------------------===//
-class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
-class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
-class SDTCisI32<int OpNum> : SDTCisVT<OpNum, i32>;
-class SDTCisI64<int OpNum> : SDTCisVT<OpNum, i64>;
-
-//===----------------------------------------------------------------------===//
-// Type Profiles.
-//===----------------------------------------------------------------------===//
-def SDT_SystemZCall         : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>;
-def SDT_SystemZCallSeqEnd   : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>;
-def SDT_CmpTest             : SDTypeProfile<1, 2, [SDTCisI64<0>,
-                                                   SDTCisSameAs<1, 2>]>;
-def SDT_BrCond              : SDTypeProfile<0, 3,
-                                           [SDTCisVT<0, OtherVT>,
-                                            SDTCisI8<1>, SDTCisVT<2, i64>]>;
-def SDT_SelectCC            : SDTypeProfile<1, 4,
-                                           [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
-                                            SDTCisI8<3>, SDTCisVT<4, i64>]>;
-def SDT_Address             : SDTypeProfile<1, 1,
-                                            [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
-
-//===----------------------------------------------------------------------===//
-// SystemZ Specific Node Definitions.
-//===----------------------------------------------------------------------===//
-def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
-                     [SDNPHasChain, SDNPOptInGlue]>;
-def SystemZcall    : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
-                     [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
-def SystemZcallseq_start :
-                 SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
-                        [SDNPHasChain, SDNPOutGlue]>;
-def SystemZcallseq_end :
-                 SDNode<"ISD::CALLSEQ_END",   SDT_SystemZCallSeqEnd,
-                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def SystemZcmp     : SDNode<"SystemZISD::CMP", SDT_CmpTest>;
-def SystemZucmp    : SDNode<"SystemZISD::UCMP", SDT_CmpTest>;
-def SystemZbrcond  : SDNode<"SystemZISD::BRCOND", SDT_BrCond,
-                            [SDNPHasChain]>;
-def SystemZselect  : SDNode<"SystemZISD::SELECT", SDT_SelectCC>;
-def SystemZpcrelwrapper : SDNode<"SystemZISD::PCRelativeWrapper", SDT_Address, []>;
-
-
-include "SystemZOperands.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction list..
-
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
-                              "#ADJCALLSTACKDOWN",
-                              [(SystemZcallseq_start timm:$amt)]>;
-def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
-                              "#ADJCALLSTACKUP",
-                              [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>;
-
-let Uses = [PSW], usesCustomInserter = 1 in {
-  def Select32 : Pseudo<(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cc),
-                        "# Select32 PSEUDO",
-                        [(set GR32:$dst,
-                              (SystemZselect GR32:$src1, GR32:$src2, imm:$cc, PSW))]>;
-  def Select64 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$cc),
-                        "# Select64 PSEUDO",
-                        [(set GR64:$dst,
-                              (SystemZselect GR64:$src1, GR64:$src2, imm:$cc, PSW))]>;
-}
-
-
-//===----------------------------------------------------------------------===//
-//  Control Flow Instructions...
-//
-
-// FIXME: Provide proper encoding!
-let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
-  def RET : Pseudo<(outs), (ins), "br\t%r14", [(SystemZretflag)]>;
-}
-
-let isBranch = 1, isTerminator = 1 in {
-  let isBarrier = 1 in {
-    def JMP  : Pseudo<(outs), (ins brtarget:$dst), "j\t{$dst}", [(br bb:$dst)]>;
-
-    let isIndirectBranch = 1 in
-      def JMPr   : Pseudo<(outs), (ins GR64:$dst), "br\t{$dst}", [(brind GR64:$dst)]>;
-  }
-
-  let Uses = [PSW] in {
-    def JO  : Pseudo<(outs), (ins brtarget:$dst),
-                     "jo\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O, PSW)]>;
-    def JH  : Pseudo<(outs), (ins brtarget:$dst),
-                     "jh\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H, PSW)]>;
-    def JNLE: Pseudo<(outs), (ins brtarget:$dst),
-                     "jnle\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE, PSW)]>;
-    def JL  : Pseudo<(outs), (ins brtarget:$dst),
-                     "jl\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L, PSW)]>;
-    def JNHE: Pseudo<(outs), (ins brtarget:$dst),
-                     "jnhe\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE, PSW)]>;
-    def JLH : Pseudo<(outs), (ins brtarget:$dst),
-                     "jlh\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH, PSW)]>;
-    def JNE : Pseudo<(outs), (ins brtarget:$dst),
-                     "jne\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE, PSW)]>;
-    def JE  : Pseudo<(outs), (ins brtarget:$dst),
-                     "je\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E, PSW)]>;
-    def JNLH: Pseudo<(outs), (ins brtarget:$dst),
-                     "jnlh\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH, PSW)]>;
-    def JHE : Pseudo<(outs), (ins brtarget:$dst),
-                     "jhe\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE, PSW)]>;
-    def JNL : Pseudo<(outs), (ins brtarget:$dst),
-                     "jnl\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL, PSW)]>;
-    def JLE : Pseudo<(outs), (ins brtarget:$dst),
-                     "jle\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE, PSW)]>;
-    def JNH : Pseudo<(outs), (ins brtarget:$dst),
-                     "jnh\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH, PSW)]>;
-    def JNO : Pseudo<(outs), (ins brtarget:$dst),
-                     "jno\t$dst",
-                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO, PSW)]>;
-  } // Uses = [PSW]
-} // isBranch = 1
-
-//===----------------------------------------------------------------------===//
-//  Call Instructions...
-//
-
-let isCall = 1 in
-  // All calls clobber the non-callee saved registers. Uses for argument
-  // registers are added manually.
-  let Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
-              F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L] in {
-    def CALLi     : Pseudo<(outs), (ins imm_pcrel:$dst, variable_ops),
-                           "brasl\t%r14, $dst", [(SystemZcall imm:$dst)]>;
-    def CALLr     : Pseudo<(outs), (ins ADDR64:$dst, variable_ops),
-                           "basr\t%r14, $dst", [(SystemZcall ADDR64:$dst)]>;
-  }
-
-//===----------------------------------------------------------------------===//
-//  Miscellaneous Instructions.
-//
-
-let isReMaterializable = 1 in
-// FIXME: Provide imm12 variant
-// FIXME: Address should be halfword aligned...
-def LA64r  : RXI<0x47,
-                 (outs GR64:$dst), (ins laaddr:$src),
-                 "lay\t{$dst, $src}",
-                 [(set GR64:$dst, laaddr:$src)]>;
-def LA64rm : RXYI<0x71E3,
-                  (outs GR64:$dst), (ins i64imm:$src),
-                  "larl\t{$dst, $src}",
-                  [(set GR64:$dst,
-                        (SystemZpcrelwrapper tglobaladdr:$src))]>;
-
-let neverHasSideEffects = 1 in
-def NOP : Pseudo<(outs), (ins), "# no-op", []>;
-
-//===----------------------------------------------------------------------===//
-// Move Instructions
-
-let neverHasSideEffects = 1 in {
-def MOV32rr : RRI<0x18,
-                  (outs GR32:$dst), (ins GR32:$src),
-                  "lr\t{$dst, $src}",
-                  []>;
-def MOV64rr : RREI<0xB904,
-                   (outs GR64:$dst), (ins GR64:$src),
-                   "lgr\t{$dst, $src}",
-                   []>;
-def MOV128rr : Pseudo<(outs GR128:$dst), (ins GR128:$src),
-                     "# MOV128 PSEUDO!\n"
-                     "\tlgr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
-                     "\tlgr\t${dst:subreg_even}, ${src:subreg_even}",
-                     []>;
-def MOV64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
-                     "# MOV64P PSEUDO!\n"
-                     "\tlr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
-                     "\tlr\t${dst:subreg_even}, ${src:subreg_even}",
-                     []>;
-}
-
-def MOVSX64rr32 : RREI<0xB914,
-                       (outs GR64:$dst), (ins GR32:$src),
-                       "lgfr\t{$dst, $src}",
-                       [(set GR64:$dst, (sext GR32:$src))]>;
-def MOVZX64rr32 : RREI<0xB916,
-                       (outs GR64:$dst), (ins GR32:$src),
-                       "llgfr\t{$dst, $src}",
-                       [(set GR64:$dst, (zext GR32:$src))]>;
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def MOV32ri16 : RII<0x8A7,
-                    (outs GR32:$dst), (ins s16imm:$src),
-                    "lhi\t{$dst, $src}",
-                    [(set GR32:$dst, immSExt16:$src)]>;
-def MOV64ri16 : RII<0x9A7,
-                    (outs GR64:$dst), (ins s16imm64:$src),
-                    "lghi\t{$dst, $src}",
-                    [(set GR64:$dst, immSExt16:$src)]>;
-
-def MOV64rill16 : RII<0xFA5,
-                      (outs GR64:$dst), (ins u16imm:$src),
-                      "llill\t{$dst, $src}",
-                      [(set GR64:$dst, i64ll16:$src)]>;
-def MOV64rilh16 : RII<0xEA5,
-                      (outs GR64:$dst), (ins u16imm:$src),
-                      "llilh\t{$dst, $src}",
-                      [(set GR64:$dst, i64lh16:$src)]>;
-def MOV64rihl16 : RII<0xDA5,
-                      (outs GR64:$dst), (ins u16imm:$src),
-                      "llihl\t{$dst, $src}",
-                      [(set GR64:$dst, i64hl16:$src)]>;
-def MOV64rihh16 : RII<0xCA5,
-                      (outs GR64:$dst), (ins u16imm:$src),
-                      "llihh\t{$dst, $src}",
-                      [(set GR64:$dst, i64hh16:$src)]>;
-
-def MOV64ri32 : RILI<0x1C0,
-                     (outs GR64:$dst), (ins s32imm64:$src),
-                     "lgfi\t{$dst, $src}",
-                     [(set GR64:$dst, immSExt32:$src)]>;
-def MOV64rilo32 : RILI<0xFC0,
-                       (outs GR64:$dst), (ins u32imm:$src),
-                       "llilf\t{$dst, $src}",
-                       [(set GR64:$dst, i64lo32:$src)]>;
-def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins u32imm:$src),
-                       "llihf\t{$dst, $src}",
-                       [(set GR64:$dst, i64hi32:$src)]>;
-}
-
-let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def MOV32rm  : RXI<0x58,
-                   (outs GR32:$dst), (ins rriaddr12:$src),
-                   "l\t{$dst, $src}",
-                   [(set GR32:$dst, (load rriaddr12:$src))]>;
-def MOV32rmy : RXYI<0x58E3,
-                    (outs GR32:$dst), (ins rriaddr:$src),
-                    "ly\t{$dst, $src}",
-                    [(set GR32:$dst, (load rriaddr:$src))]>;
-def MOV64rm  : RXYI<0x04E3,
-                    (outs GR64:$dst), (ins rriaddr:$src),
-                    "lg\t{$dst, $src}",
-                    [(set GR64:$dst, (load rriaddr:$src))]>;
-def MOV64Prm : Pseudo<(outs GR64P:$dst), (ins rriaddr12:$src),
-                      "# MOV64P PSEUDO!\n"
-                      "\tl\t${dst:subreg_odd},  $src\n"
-                      "\tl\t${dst:subreg_even}, 4+$src",
-                      [(set GR64P:$dst, (load rriaddr12:$src))]>;
-def MOV64Prmy : Pseudo<(outs GR64P:$dst), (ins rriaddr:$src),
-                       "# MOV64P PSEUDO!\n"
-                       "\tly\t${dst:subreg_odd},  $src\n"
-                       "\tly\t${dst:subreg_even}, 4+$src",
-                       [(set GR64P:$dst, (load rriaddr:$src))]>;
-def MOV128rm : Pseudo<(outs GR128:$dst), (ins rriaddr:$src),
-                      "# MOV128 PSEUDO!\n"
-                      "\tlg\t${dst:subreg_odd},  $src\n"
-                      "\tlg\t${dst:subreg_even}, 8+$src",
-                      [(set GR128:$dst, (load rriaddr:$src))]>;
-}
-
-def MOV32mr  : RXI<0x50,
-                   (outs), (ins rriaddr12:$dst, GR32:$src),
-                   "st\t{$src, $dst}",
-                   [(store GR32:$src, rriaddr12:$dst)]>;
-def MOV32mry : RXYI<0x50E3,
-                    (outs), (ins rriaddr:$dst, GR32:$src),
-                    "sty\t{$src, $dst}",
-                    [(store GR32:$src, rriaddr:$dst)]>;
-def MOV64mr  : RXYI<0x24E3,
-                    (outs), (ins rriaddr:$dst, GR64:$src),
-                    "stg\t{$src, $dst}",
-                    [(store GR64:$src, rriaddr:$dst)]>;
-def MOV64Pmr : Pseudo<(outs), (ins rriaddr12:$dst, GR64P:$src),
-                      "# MOV64P PSEUDO!\n"
-                      "\tst\t${src:subreg_odd}, $dst\n"
-                      "\tst\t${src:subreg_even}, 4+$dst",
-                      [(store GR64P:$src, rriaddr12:$dst)]>;
-def MOV64Pmry : Pseudo<(outs), (ins rriaddr:$dst, GR64P:$src),
-                       "# MOV64P PSEUDO!\n"
-                       "\tsty\t${src:subreg_odd}, $dst\n"
-                       "\tsty\t${src:subreg_even}, 4+$dst",
-                       [(store GR64P:$src, rriaddr:$dst)]>;
-def MOV128mr : Pseudo<(outs), (ins rriaddr:$dst, GR128:$src),
-                      "# MOV128 PSEUDO!\n"
-                      "\tstg\t${src:subreg_odd}, $dst\n"
-                      "\tstg\t${src:subreg_even}, 8+$dst",
-                      [(store GR128:$src, rriaddr:$dst)]>;
-
-def MOV8mi    : SII<0x92,
-                    (outs), (ins riaddr12:$dst, i32i8imm:$src),
-                    "mvi\t{$dst, $src}",
-                    [(truncstorei8 (i32 i32immSExt8:$src), riaddr12:$dst)]>;
-def MOV8miy   : SIYI<0x52EB,
-                     (outs), (ins riaddr:$dst, i32i8imm:$src),
-                     "mviy\t{$dst, $src}",
-                     [(truncstorei8 (i32 i32immSExt8:$src), riaddr:$dst)]>;
-
-let AddedComplexity = 2 in {
-def MOV16mi   : SILI<0xE544,
-                     (outs), (ins riaddr12:$dst, s16imm:$src),
-                     "mvhhi\t{$dst, $src}",
-                     [(truncstorei16 (i32 i32immSExt16:$src), riaddr12:$dst)]>,
-                     Requires<[IsZ10]>;
-def MOV32mi16 : SILI<0xE54C,
-                     (outs), (ins riaddr12:$dst, s32imm:$src),
-                     "mvhi\t{$dst, $src}",
-                     [(store (i32 immSExt16:$src), riaddr12:$dst)]>,
-                     Requires<[IsZ10]>;
-def MOV64mi16 : SILI<0xE548,
-                     (outs), (ins riaddr12:$dst, s32imm64:$src),
-                     "mvghi\t{$dst, $src}",
-                     [(store (i64 immSExt16:$src), riaddr12:$dst)]>,
-                     Requires<[IsZ10]>;
-}
-
-// sexts
-def MOVSX32rr8  : RREI<0xB926,
-                       (outs GR32:$dst), (ins GR32:$src),
-                       "lbr\t{$dst, $src}",
-                       [(set GR32:$dst, (sext_inreg GR32:$src, i8))]>;
-def MOVSX64rr8  : RREI<0xB906,
-                       (outs GR64:$dst), (ins GR64:$src),
-                       "lgbr\t{$dst, $src}",
-                       [(set GR64:$dst, (sext_inreg GR64:$src, i8))]>;
-def MOVSX32rr16 : RREI<0xB927,
-                       (outs GR32:$dst), (ins GR32:$src),
-                       "lhr\t{$dst, $src}",
-                       [(set GR32:$dst, (sext_inreg GR32:$src, i16))]>;
-def MOVSX64rr16 : RREI<0xB907,
-                       (outs GR64:$dst), (ins GR64:$src),
-                       "lghr\t{$dst, $src}",
-                       [(set GR64:$dst, (sext_inreg GR64:$src, i16))]>;
-
-// extloads
-def MOVSX32rm8   : RXYI<0x76E3,
-                        (outs GR32:$dst), (ins rriaddr:$src),
-                        "lb\t{$dst, $src}",
-                        [(set GR32:$dst, (sextloadi32i8 rriaddr:$src))]>;
-def MOVSX32rm16  : RXI<0x48,
-                       (outs GR32:$dst), (ins rriaddr12:$src),
-                       "lh\t{$dst, $src}",
-                       [(set GR32:$dst, (sextloadi32i16 rriaddr12:$src))]>;
-def MOVSX32rm16y : RXYI<0x78E3,
-                        (outs GR32:$dst), (ins rriaddr:$src),
-                        "lhy\t{$dst, $src}",
-                        [(set GR32:$dst, (sextloadi32i16 rriaddr:$src))]>;
-def MOVSX64rm8   : RXYI<0x77E3,
-                        (outs GR64:$dst), (ins rriaddr:$src),
-                        "lgb\t{$dst, $src}",
-                        [(set GR64:$dst, (sextloadi64i8 rriaddr:$src))]>;
-def MOVSX64rm16  : RXYI<0x15E3,
-                        (outs GR64:$dst), (ins rriaddr:$src),
-                        "lgh\t{$dst, $src}",
-                        [(set GR64:$dst, (sextloadi64i16 rriaddr:$src))]>;
-def MOVSX64rm32  : RXYI<0x14E3,
-                        (outs GR64:$dst), (ins rriaddr:$src),
-                        "lgf\t{$dst, $src}",
-                        [(set GR64:$dst, (sextloadi64i32 rriaddr:$src))]>;
-
-def MOVZX32rm8  : RXYI<0x94E3,
-                       (outs GR32:$dst), (ins rriaddr:$src),
-                       "llc\t{$dst, $src}",
-                       [(set GR32:$dst, (zextloadi32i8 rriaddr:$src))]>;
-def MOVZX32rm16 : RXYI<0x95E3,
-                       (outs GR32:$dst), (ins rriaddr:$src),
-                       "llh\t{$dst, $src}",
-                       [(set GR32:$dst, (zextloadi32i16 rriaddr:$src))]>;
-def MOVZX64rm8  : RXYI<0x90E3,
-                       (outs GR64:$dst), (ins rriaddr:$src),
-                       "llgc\t{$dst, $src}",
-                       [(set GR64:$dst, (zextloadi64i8 rriaddr:$src))]>;
-def MOVZX64rm16 : RXYI<0x91E3,
-                       (outs GR64:$dst), (ins rriaddr:$src),
-                       "llgh\t{$dst, $src}",
-                       [(set GR64:$dst, (zextloadi64i16 rriaddr:$src))]>;
-def MOVZX64rm32 : RXYI<0x16E3,
-                       (outs GR64:$dst), (ins rriaddr:$src),
-                       "llgf\t{$dst, $src}",
-                       [(set GR64:$dst, (zextloadi64i32 rriaddr:$src))]>;
-
-// truncstores
-def MOV32m8r   : RXI<0x42,
-                     (outs), (ins rriaddr12:$dst, GR32:$src),
-                     "stc\t{$src, $dst}",
-                     [(truncstorei8 GR32:$src, rriaddr12:$dst)]>;
-
-def MOV32m8ry  : RXYI<0x72E3,
-                      (outs), (ins rriaddr:$dst, GR32:$src),
-                      "stcy\t{$src, $dst}",
-                      [(truncstorei8 GR32:$src, rriaddr:$dst)]>;
-
-def MOV32m16r  : RXI<0x40,
-                     (outs), (ins rriaddr12:$dst, GR32:$src),
-                     "sth\t{$src, $dst}",
-                     [(truncstorei16 GR32:$src, rriaddr12:$dst)]>;
-
-def MOV32m16ry : RXYI<0x70E3,
-                      (outs), (ins rriaddr:$dst, GR32:$src),
-                      "sthy\t{$src, $dst}",
-                      [(truncstorei16 GR32:$src, rriaddr:$dst)]>;
-
-def MOV64m8r   : RXI<0x42,
-                     (outs), (ins rriaddr12:$dst, GR64:$src),
-                     "stc\t{$src, $dst}",
-                     [(truncstorei8 GR64:$src, rriaddr12:$dst)]>;
-
-def MOV64m8ry  : RXYI<0x72E3,
-                      (outs), (ins rriaddr:$dst, GR64:$src),
-                      "stcy\t{$src, $dst}",
-                      [(truncstorei8 GR64:$src, rriaddr:$dst)]>;
-
-def MOV64m16r  : RXI<0x40,
-                     (outs), (ins rriaddr12:$dst, GR64:$src),
-                     "sth\t{$src, $dst}",
-                     [(truncstorei16 GR64:$src, rriaddr12:$dst)]>;
-
-def MOV64m16ry : RXYI<0x70E3,
-                      (outs), (ins rriaddr:$dst, GR64:$src),
-                      "sthy\t{$src, $dst}",
-                      [(truncstorei16 GR64:$src, rriaddr:$dst)]>;
-
-def MOV64m32r  : RXI<0x50,
-                     (outs), (ins rriaddr12:$dst, GR64:$src),
-                     "st\t{$src, $dst}",
-                     [(truncstorei32 GR64:$src, rriaddr12:$dst)]>;
-
-def MOV64m32ry : RXYI<0x50E3,
-                      (outs), (ins rriaddr:$dst, GR64:$src),
-                      "sty\t{$src, $dst}",
-                      [(truncstorei32 GR64:$src, rriaddr:$dst)]>;
-
-// multiple regs moves
-// FIXME: should we use multiple arg nodes?
-def MOV32mrm  : RSYI<0x90EB,
-                     (outs), (ins riaddr:$dst, GR32:$from, GR32:$to),
-                     "stmy\t{$from, $to, $dst}",
-                     []>;
-def MOV64mrm  : RSYI<0x24EB,
-                     (outs), (ins riaddr:$dst, GR64:$from, GR64:$to),
-                     "stmg\t{$from, $to, $dst}",
-                     []>;
-def MOV32rmm  : RSYI<0x90EB,
-                     (outs GR32:$from, GR32:$to), (ins riaddr:$dst),
-                     "lmy\t{$from, $to, $dst}",
-                     []>;
-def MOV64rmm  : RSYI<0x04EB,
-                     (outs GR64:$from, GR64:$to), (ins riaddr:$dst),
-                     "lmg\t{$from, $to, $dst}",
-                     []>;
-
-let isReMaterializable = 1, neverHasSideEffects = 1, isAsCheapAsAMove = 1,
-    Constraints = "$src = $dst" in {
-def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
-                           "lhi\t${dst:subreg_even}, 0",
-                           []>;
-def MOV128r0_even : Pseudo<(outs GR128:$dst), (ins GR128:$src),
-                           "lghi\t${dst:subreg_even}, 0",
-                           []>;
-}
-
-// Byte swaps
-def BSWAP32rr : RREI<0xB91F,
-                     (outs GR32:$dst), (ins GR32:$src),
-                     "lrvr\t{$dst, $src}",
-                     [(set GR32:$dst, (bswap GR32:$src))]>;
-def BSWAP64rr : RREI<0xB90F,
-                     (outs GR64:$dst), (ins GR64:$src),
-                     "lrvgr\t{$dst, $src}",
-                     [(set GR64:$dst, (bswap GR64:$src))]>;
-
-// FIXME: this is invalid pattern for big-endian
-//def BSWAP16rm : RXYI<0x1FE3, (outs GR32:$dst), (ins rriaddr:$src),
-//                     "lrvh\t{$dst, $src}",
-//                     [(set GR32:$dst, (bswap (extloadi32i16 rriaddr:$src)))]>;
-def BSWAP32rm : RXYI<0x1EE3, (outs GR32:$dst), (ins rriaddr:$src),
-                     "lrv\t{$dst, $src}",
-                     [(set GR32:$dst, (bswap (load rriaddr:$src)))]>;
-def BSWAP64rm : RXYI<0x0FE3, (outs GR64:$dst), (ins rriaddr:$src),
-                     "lrvg\t{$dst, $src}",
-                     [(set GR64:$dst, (bswap (load rriaddr:$src)))]>;
-
-//def BSWAP16mr : RXYI<0xE33F, (outs), (ins rriaddr:$dst, GR32:$src),
-//                     "strvh\t{$src, $dst}",
-//                     [(truncstorei16 (bswap GR32:$src), rriaddr:$dst)]>;
-def BSWAP32mr : RXYI<0xE33E, (outs), (ins rriaddr:$dst, GR32:$src),
-                     "strv\t{$src, $dst}",
-                     [(store (bswap GR32:$src), rriaddr:$dst)]>;
-def BSWAP64mr : RXYI<0xE32F, (outs), (ins rriaddr:$dst, GR64:$src),
-                     "strvg\t{$src, $dst}",
-                     [(store (bswap GR64:$src), rriaddr:$dst)]>;
-
-//===----------------------------------------------------------------------===//
-// Arithmetic Instructions
-
-let Defs = [PSW] in {
-def NEG32rr : RRI<0x13,
-                  (outs GR32:$dst), (ins GR32:$src),
-                  "lcr\t{$dst, $src}",
-                  [(set GR32:$dst, (ineg GR32:$src)),
-                   (implicit PSW)]>;
-def NEG64rr : RREI<0xB903, (outs GR64:$dst), (ins GR64:$src),
-                   "lcgr\t{$dst, $src}",
-                   [(set GR64:$dst, (ineg GR64:$src)),
-                    (implicit PSW)]>;
-def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src),
-                     "lcgfr\t{$dst, $src}",
-                     [(set GR64:$dst, (ineg (sext GR32:$src))),
-                      (implicit PSW)]>;
-}
-
-let Constraints = "$src1 = $dst" in {
-
-let Defs = [PSW] in {
-
-let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
-def ADD32rr : RRI<0x1A, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "ar\t{$dst, $src2}",
-                  [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
-                   (implicit PSW)]>;
-def ADD64rr : RREI<0xB908, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "agr\t{$dst, $src2}",
-                   [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
-                    (implicit PSW)]>;
-}
-
-def ADD32rm   : RXI<0x5A, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
-                    "a\t{$dst, $src2}",
-                    [(set GR32:$dst, (add GR32:$src1, (load rriaddr12:$src2))),
-                     (implicit PSW)]>;
-def ADD32rmy  : RXYI<0xE35A, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
-                     "ay\t{$dst, $src2}",
-                     [(set GR32:$dst, (add GR32:$src1, (load rriaddr:$src2))),
-                      (implicit PSW)]>;
-def ADD64rm   : RXYI<0xE308, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
-                     "ag\t{$dst, $src2}",
-                     [(set GR64:$dst, (add GR64:$src1, (load rriaddr:$src2))),
-                      (implicit PSW)]>;
-
-
-def ADD32ri16 : RII<0xA7A,
-                    (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
-                    "ahi\t{$dst, $src2}",
-                    [(set GR32:$dst, (add GR32:$src1, immSExt16:$src2)),
-                     (implicit PSW)]>;
-def ADD32ri   : RILI<0xC29,
-                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
-                     "afi\t{$dst, $src2}",
-                     [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
-                      (implicit PSW)]>;
-def ADD64ri16 : RILI<0xA7B,
-                     (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
-                     "aghi\t{$dst, $src2}",
-                     [(set GR64:$dst, (add GR64:$src1, immSExt16:$src2)),
-                      (implicit PSW)]>;
-def ADD64ri32 : RILI<0xC28,
-                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
-                     "agfi\t{$dst, $src2}",
-                     [(set GR64:$dst, (add GR64:$src1, immSExt32:$src2)),
-                      (implicit PSW)]>;
-
-let isCommutable = 1 in { // X = ADC Y, Z  == X = ADC Z, Y
-def ADC32rr : RRI<0x1E, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "alr\t{$dst, $src2}",
-                  [(set GR32:$dst, (addc GR32:$src1, GR32:$src2))]>;
-def ADC64rr : RREI<0xB90A, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "algr\t{$dst, $src2}",
-                   [(set GR64:$dst, (addc GR64:$src1, GR64:$src2))]>;
-}
-
-def ADC32ri   : RILI<0xC2B,
-                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
-                     "alfi\t{$dst, $src2}",
-                     [(set GR32:$dst, (addc GR32:$src1, imm:$src2))]>;
-def ADC64ri32 : RILI<0xC2A,
-                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
-                     "algfi\t{$dst, $src2}",
-                     [(set GR64:$dst, (addc GR64:$src1, immSExt32:$src2))]>;
-
-let Uses = [PSW] in {
-def ADDE32rr : RREI<0xB998, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                    "alcr\t{$dst, $src2}",
-                    [(set GR32:$dst, (adde GR32:$src1, GR32:$src2)),
-                     (implicit PSW)]>;
-def ADDE64rr : RREI<0xB988, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                    "alcgr\t{$dst, $src2}",
-                    [(set GR64:$dst, (adde GR64:$src1, GR64:$src2)),
-                     (implicit PSW)]>;
-}
-
-let isCommutable = 1 in { // X = AND Y, Z  == X = AND Z, Y
-def AND32rr : RRI<0x14,
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "nr\t{$dst, $src2}",
-                  [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>;
-def AND64rr : RREI<0xB980,
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "ngr\t{$dst, $src2}",
-                   [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>;
-}
-
-def AND32rm   : RXI<0x54, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
-                    "n\t{$dst, $src2}",
-                    [(set GR32:$dst, (and GR32:$src1, (load rriaddr12:$src2))),
-                     (implicit PSW)]>;
-def AND32rmy  : RXYI<0xE354, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
-                     "ny\t{$dst, $src2}",
-                     [(set GR32:$dst, (and GR32:$src1, (load rriaddr:$src2))),
-                      (implicit PSW)]>;
-def AND64rm   : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
-                     "ng\t{$dst, $src2}",
-                     [(set GR64:$dst, (and GR64:$src1, (load rriaddr:$src2))),
-                      (implicit PSW)]>;
-
-def AND32rill16 : RII<0xA57,
-                      (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
-                      "nill\t{$dst, $src2}",
-                      [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>;
-def AND64rill16 : RII<0xA57,
-                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
-                      "nill\t{$dst, $src2}",
-                      [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>;
-
-def AND32rilh16 : RII<0xA56,
-                      (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
-                      "nilh\t{$dst, $src2}",
-                      [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>;
-def AND64rilh16 : RII<0xA56,
-                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
-                      "nilh\t{$dst, $src2}",
-                      [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>;
-
-def AND64rihl16 : RII<0xA55,
-                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
-                      "nihl\t{$dst, $src2}",
-                      [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>;
-def AND64rihh16 : RII<0xA54,
-                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
-                      "nihh\t{$dst, $src2}",
-                      [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>;
-
-def AND32ri     : RILI<0xC0B,
-                       (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
-                       "nilf\t{$dst, $src2}",
-                       [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
-def AND64rilo32 : RILI<0xC0B,
-                       (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
-                       "nilf\t{$dst, $src2}",
-                       [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>;
-def AND64rihi32 : RILI<0xC0A,
-                       (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
-                       "nihf\t{$dst, $src2}",
-                       [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>;
-
-let isCommutable = 1 in { // X = OR Y, Z  == X = OR Z, Y
-def OR32rr : RRI<0x16,
-                 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                 "or\t{$dst, $src2}",
-                 [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>;
-def OR64rr : RREI<0xB981,
-                  (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                  "ogr\t{$dst, $src2}",
-                  [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>;
-}
-
-def OR32rm   : RXI<0x56, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
-                   "o\t{$dst, $src2}",
-                   [(set GR32:$dst, (or GR32:$src1, (load rriaddr12:$src2))),
-                    (implicit PSW)]>;
-def OR32rmy  : RXYI<0xE356, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
-                    "oy\t{$dst, $src2}",
-                    [(set GR32:$dst, (or GR32:$src1, (load rriaddr:$src2))),
-                     (implicit PSW)]>;
-def OR64rm   : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
-                    "og\t{$dst, $src2}",
-                    [(set GR64:$dst, (or GR64:$src1, (load rriaddr:$src2))),
-                     (implicit PSW)]>;
-
- // FIXME: Provide proper encoding!
-def OR32ri16  : RII<0xA5B,
-                    (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
-                    "oill\t{$dst, $src2}",
-                    [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>;
-def OR32ri16h : RII<0xA5A,
-                    (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
-                    "oilh\t{$dst, $src2}",
-                    [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>;
-def OR32ri : RILI<0xC0D,
-                  (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
-                  "oilf\t{$dst, $src2}",
-                  [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
-
-def OR64rill16 : RII<0xA5B,
-                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
-                     "oill\t{$dst, $src2}",
-                     [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>;
-def OR64rilh16 : RII<0xA5A,
-                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
-                     "oilh\t{$dst, $src2}",
-                     [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>;
-def OR64rihl16 : RII<0xA59,
-                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
-                     "oihl\t{$dst, $src2}",
-                     [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>;
-def OR64rihh16 : RII<0xA58,
-                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
-                     "oihh\t{$dst, $src2}",
-                     [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>;
-
-def OR64rilo32 : RILI<0xC0D,
-                      (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
-                      "oilf\t{$dst, $src2}",
-                      [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>;
-def OR64rihi32 : RILI<0xC0C,
-                      (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
-                      "oihf\t{$dst, $src2}",
-                      [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>;
-
-def SUB32rr : RRI<0x1B,
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "sr\t{$dst, $src2}",
-                  [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
-def SUB64rr : RREI<0xB909,
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "sgr\t{$dst, $src2}",
-                   [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
-
-def SUB32rm   : RXI<0x5B, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
-                    "s\t{$dst, $src2}",
-                    [(set GR32:$dst, (sub GR32:$src1, (load rriaddr12:$src2))),
-                     (implicit PSW)]>;
-def SUB32rmy  : RXYI<0xE35B, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
-                     "sy\t{$dst, $src2}",
-                     [(set GR32:$dst, (sub GR32:$src1, (load rriaddr:$src2))),
-                      (implicit PSW)]>;
-def SUB64rm   : RXYI<0xE309, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
-                     "sg\t{$dst, $src2}",
-                     [(set GR64:$dst, (sub GR64:$src1, (load rriaddr:$src2))),
-                      (implicit PSW)]>;
- 
-def SBC32rr : RRI<0x1F,
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "slr\t{$dst, $src2}",
-                  [(set GR32:$dst, (subc GR32:$src1, GR32:$src2))]>;
-def SBC64rr : RREI<0xB90B,
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "slgr\t{$dst, $src2}",
-                   [(set GR64:$dst, (subc GR64:$src1, GR64:$src2))]>;
-
-def SBC32ri   : RILI<0xC25,
-                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
-                     "sllfi\t{$dst, $src2}",
-                     [(set GR32:$dst, (subc GR32:$src1, imm:$src2))]>;
-def SBC64ri32 : RILI<0xC24,
-                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
-                     "slgfi\t{$dst, $src2}",
-                     [(set GR64:$dst, (subc GR64:$src1, immSExt32:$src2))]>;
-
-let Uses = [PSW] in {
-def SUBE32rr : RREI<0xB999, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                    "slbr\t{$dst, $src2}",
-                    [(set GR32:$dst, (sube GR32:$src1, GR32:$src2)),
-                     (implicit PSW)]>;
-def SUBE64rr : RREI<0xB989, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                    "slbgr\t{$dst, $src2}",
-                    [(set GR64:$dst, (sube GR64:$src1, GR64:$src2)),
-                     (implicit PSW)]>;
-}
-
-let isCommutable = 1 in { // X = XOR Y, Z  == X = XOR Z, Y
-def XOR32rr : RRI<0x17,
-                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                  "xr\t{$dst, $src2}",
-                  [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
-def XOR64rr : RREI<0xB982,
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "xgr\t{$dst, $src2}",
-                   [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
-}
-
-def XOR32rm   : RXI<0x57,(outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
-                    "x\t{$dst, $src2}",
-                    [(set GR32:$dst, (xor GR32:$src1, (load rriaddr12:$src2))),
-                     (implicit PSW)]>;
-def XOR32rmy  : RXYI<0xE357, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
-                     "xy\t{$dst, $src2}",
-                     [(set GR32:$dst, (xor GR32:$src1, (load rriaddr:$src2))),
-                      (implicit PSW)]>;
-def XOR64rm   : RXYI<0xE382, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
-                     "xg\t{$dst, $src2}",
-                     [(set GR64:$dst, (xor GR64:$src1, (load rriaddr:$src2))),
-                      (implicit PSW)]>;
-
-def XOR32ri : RILI<0xC07,
-                   (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
-                   "xilf\t{$dst, $src2}",
-                   [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>;
-
-} // Defs = [PSW]
-
-let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y
-def MUL32rr : RREI<0xB252,
-                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
-                   "msr\t{$dst, $src2}",
-                   [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>;
-def MUL64rr : RREI<0xB90C,
-                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
-                   "msgr\t{$dst, $src2}",
-                   [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>;
-}
-
-def MUL64rrP   : RRI<0x1C,
-                     (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
-                     "mr\t{$dst, $src2}",
-                     []>;
-def UMUL64rrP  : RREI<0xB996,
-                      (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
-                      "mlr\t{$dst, $src2}",
-                      []>;
-def UMUL128rrP : RREI<0xB986,
-                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
-                      "mlgr\t{$dst, $src2}",
-                      []>;
-
-def MUL32ri16   : RII<0xA7C,
-                      (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
-                      "mhi\t{$dst, $src2}",
-                      [(set GR32:$dst, (mul GR32:$src1, i32immSExt16:$src2))]>;
-def MUL64ri16   : RII<0xA7D,
-                      (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
-                      "mghi\t{$dst, $src2}",
-                      [(set GR64:$dst, (mul GR64:$src1, immSExt16:$src2))]>;
-
-let AddedComplexity = 2 in {
-def MUL32ri     : RILI<0xC21,
-                       (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
-                       "msfi\t{$dst, $src2}",
-                       [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>,
-                       Requires<[IsZ10]>;
-def MUL64ri32   : RILI<0xC20,
-                       (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
-                       "msgfi\t{$dst, $src2}",
-                       [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>,
-                       Requires<[IsZ10]>;
-}
-
-def MUL32rm : RXI<0x71,
-                  (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
-                  "ms\t{$dst, $src2}",
-                  [(set GR32:$dst, (mul GR32:$src1, (load rriaddr12:$src2)))]>;
-def MUL32rmy : RXYI<0xE351,
-                    (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
-                    "msy\t{$dst, $src2}",
-                    [(set GR32:$dst, (mul GR32:$src1, (load rriaddr:$src2)))]>;
-def MUL64rm  : RXYI<0xE30C,
-                    (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
-                    "msg\t{$dst, $src2}",
-                    [(set GR64:$dst, (mul GR64:$src1, (load rriaddr:$src2)))]>;
-
-def MULSX64rr32 : RREI<0xB91C,
-                       (outs GR64:$dst), (ins GR64:$src1, GR32:$src2),
-                       "msgfr\t{$dst, $src2}",
-                       [(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>;
-
-def SDIVREM32r : RREI<0xB91D,
-                      (outs GR128:$dst), (ins GR128:$src1, GR32:$src2),
-                      "dsgfr\t{$dst, $src2}",
-                      []>;
-def SDIVREM64r : RREI<0xB90D,
-                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
-                      "dsgr\t{$dst, $src2}",
-                      []>;
-
-def UDIVREM32r : RREI<0xB997,
-                      (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
-                      "dlr\t{$dst, $src2}",
-                      []>;
-def UDIVREM64r : RREI<0xB987,
-                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
-                      "dlgr\t{$dst, $src2}",
-                      []>;
-let mayLoad = 1 in {
-def SDIVREM32m : RXYI<0xE31D,
-                      (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
-                      "dsgf\t{$dst, $src2}",
-                      []>;
-def SDIVREM64m : RXYI<0xE30D,
-                      (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
-                      "dsg\t{$dst, $src2}",
-                      []>;
-
-def UDIVREM32m : RXYI<0xE397, (outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2),
-                      "dl\t{$dst, $src2}",
-                      []>;
-def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
-                      "dlg\t{$dst, $src2}",
-                      []>;
-} // mayLoad
-} // Constraints = "$src1 = $dst"
-
-//===----------------------------------------------------------------------===//
-// Shifts
-
-let Constraints = "$src = $dst" in
-def SRL32rri : RSI<0x88,
-                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
-                   "srl\t{$src, $amt}",
-                   [(set GR32:$dst, (srl GR32:$src, riaddr32:$amt))]>;
-def SRL64rri : RSYI<0xEB0C,
-                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
-                    "srlg\t{$dst, $src, $amt}",
-                    [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>;
-
-let Constraints = "$src = $dst" in
-def SHL32rri : RSI<0x89,
-                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
-                   "sll\t{$src, $amt}",
-                   [(set GR32:$dst, (shl GR32:$src, riaddr32:$amt))]>;
-def SHL64rri : RSYI<0xEB0D,
-                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
-                    "sllg\t{$dst, $src, $amt}",
-                    [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>;
-
-let Defs = [PSW] in {
-let Constraints = "$src = $dst" in
-def SRA32rri : RSI<0x8A,
-                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
-                   "sra\t{$src, $amt}",
-                   [(set GR32:$dst, (sra GR32:$src, riaddr32:$amt)),
-                    (implicit PSW)]>;
-
-def SRA64rri : RSYI<0xEB0A,
-                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
-                    "srag\t{$dst, $src, $amt}",
-                    [(set GR64:$dst, (sra GR64:$src, riaddr:$amt)),
-                     (implicit PSW)]>;
-} // Defs = [PSW]
-
-def ROTL32rri : RSYI<0xEB1D,
-                     (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
-                     "rll\t{$dst, $src, $amt}",
-                     [(set GR32:$dst, (rotl GR32:$src, riaddr32:$amt))]>;
-def ROTL64rri : RSYI<0xEB1C,
-                     (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
-                     "rllg\t{$dst, $src, $amt}",
-                     [(set GR64:$dst, (rotl GR64:$src, riaddr:$amt))]>;
-
-//===----------------------------------------------------------------------===//
-// Test instructions (like AND but do not produce any result)
-
-// Integer comparisons
-let Defs = [PSW] in {
-def CMP32rr : RRI<0x19,
-                  (outs), (ins GR32:$src1, GR32:$src2),
-                  "cr\t$src1, $src2",
-                  [(set PSW, (SystemZcmp GR32:$src1, GR32:$src2))]>; 
-def CMP64rr : RREI<0xB920,
-                   (outs), (ins GR64:$src1, GR64:$src2),
-                   "cgr\t$src1, $src2",
-                   [(set PSW, (SystemZcmp GR64:$src1, GR64:$src2))]>;
-
-def CMP32ri   : RILI<0xC2D,
-                     (outs), (ins GR32:$src1, s32imm:$src2),
-                     "cfi\t$src1, $src2",
-                     [(set PSW, (SystemZcmp GR32:$src1, imm:$src2))]>;
-def CMP64ri32 : RILI<0xC2C,
-                     (outs), (ins GR64:$src1, s32imm64:$src2),
-                     "cgfi\t$src1, $src2",
-                     [(set PSW, (SystemZcmp GR64:$src1, i64immSExt32:$src2))]>;
-
-def CMP32rm : RXI<0x59,
-                  (outs), (ins GR32:$src1, rriaddr12:$src2),
-                  "c\t$src1, $src2",
-                  [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr12:$src2)))]>;
-def CMP32rmy : RXYI<0xE359,
-                    (outs), (ins GR32:$src1, rriaddr:$src2),
-                    "cy\t$src1, $src2",
-                    [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr:$src2)))]>;
-def CMP64rm  : RXYI<0xE320,
-                    (outs), (ins GR64:$src1, rriaddr:$src2),
-                    "cg\t$src1, $src2",
-                    [(set PSW, (SystemZcmp GR64:$src1, (load rriaddr:$src2)))]>;
-
-def UCMP32rr : RRI<0x15,
-                   (outs), (ins GR32:$src1, GR32:$src2),
-                   "clr\t$src1, $src2",
-                   [(set PSW, (SystemZucmp GR32:$src1, GR32:$src2))]>;
-def UCMP64rr : RREI<0xB921,
-                    (outs), (ins GR64:$src1, GR64:$src2),
-                    "clgr\t$src1, $src2",
-                    [(set PSW, (SystemZucmp GR64:$src1, GR64:$src2))]>;
-
-def UCMP32ri   : RILI<0xC2F,
-                      (outs), (ins GR32:$src1, i32imm:$src2),
-                      "clfi\t$src1, $src2",
-                      [(set PSW, (SystemZucmp GR32:$src1, imm:$src2))]>;
-def UCMP64ri32 : RILI<0xC2E,
-                      (outs), (ins GR64:$src1, i64i32imm:$src2),
-                      "clgfi\t$src1, $src2",
-                      [(set PSW,(SystemZucmp GR64:$src1, i64immZExt32:$src2))]>;
-
-def UCMP32rm  : RXI<0x55,
-                    (outs), (ins GR32:$src1, rriaddr12:$src2),
-                    "cl\t$src1, $src2",
-                    [(set PSW, (SystemZucmp GR32:$src1,
-                                            (load rriaddr12:$src2)))]>;
-def UCMP32rmy : RXYI<0xE355,
-                     (outs), (ins GR32:$src1, rriaddr:$src2),
-                     "cly\t$src1, $src2",
-                     [(set PSW, (SystemZucmp GR32:$src1,
-                                             (load rriaddr:$src2)))]>;
-def UCMP64rm  : RXYI<0xE351,
-                     (outs), (ins GR64:$src1, rriaddr:$src2),
-                     "clg\t$src1, $src2",
-                     [(set PSW, (SystemZucmp GR64:$src1,
-                                             (load rriaddr:$src2)))]>;
-
-def CMPSX64rr32  : RREI<0xB930,
-                        (outs), (ins GR64:$src1, GR32:$src2),
-                        "cgfr\t$src1, $src2",
-                        [(set PSW, (SystemZucmp GR64:$src1,
-                                                (sext GR32:$src2)))]>;
-def UCMPZX64rr32 : RREI<0xB931,
-                        (outs), (ins GR64:$src1, GR32:$src2),
-                        "clgfr\t$src1, $src2",
-                        [(set PSW, (SystemZucmp GR64:$src1,
-                                                (zext GR32:$src2)))]>;
-
-def CMPSX64rm32   : RXYI<0xE330,
-                         (outs), (ins GR64:$src1, rriaddr:$src2),
-                         "cgf\t$src1, $src2",
-                         [(set PSW, (SystemZucmp GR64:$src1,
-                                             (sextloadi64i32 rriaddr:$src2)))]>;
-def UCMPZX64rm32  : RXYI<0xE331,
-                         (outs), (ins GR64:$src1, rriaddr:$src2),
-                         "clgf\t$src1, $src2",
-                         [(set PSW, (SystemZucmp GR64:$src1,
-                                             (zextloadi64i32 rriaddr:$src2)))]>;
-
-// FIXME: Add other crazy ucmp forms
-
-} // Defs = [PSW]
-
-//===----------------------------------------------------------------------===//
-// Other crazy stuff
-let Defs = [PSW] in {
-def FLOGR64 : RREI<0xB983,
-                   (outs GR128:$dst), (ins GR64:$src),
-                   "flogr\t{$dst, $src}",
-                   []>;
-} // Defs = [PSW]
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns.
-//===----------------------------------------------------------------------===//
-
-// ConstPools, JumpTables
-def : Pat<(SystemZpcrelwrapper tjumptable:$src), (LA64rm tjumptable:$src)>;
-def : Pat<(SystemZpcrelwrapper tconstpool:$src), (LA64rm tconstpool:$src)>;
-
-// anyext
-def : Pat<(i64 (anyext GR32:$src)),
-          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
-
-// calls
-def : Pat<(SystemZcall (i64 tglobaladdr:$dst)), (CALLi tglobaladdr:$dst)>;
-def : Pat<(SystemZcall (i64 texternalsym:$dst)), (CALLi texternalsym:$dst)>;
-
-//===----------------------------------------------------------------------===//
-// Peepholes.
-//===----------------------------------------------------------------------===//
-
-// FIXME: use add/sub tricks with 32678/-32768
-
-// Arbitrary immediate support.
-def : Pat<(i32 imm:$src),
-          (EXTRACT_SUBREG (MOV64ri32 (GetI64FromI32 (i32 imm:$src))),
-             subreg_32bit)>;
-
-// Implement in terms of LLIHF/OILF.
-def : Pat<(i64 imm:$imm),
-          (OR64rilo32 (MOV64rihi32 (HI32 imm:$imm)), (LO32 imm:$imm))>;
-
-// trunc patterns
-def : Pat<(i32 (trunc GR64:$src)),
-          (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
-
-// sext_inreg patterns
-def : Pat<(sext_inreg GR64:$src, i32),
-          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
-
-// extload patterns
-def : Pat<(extloadi32i8  rriaddr:$src), (MOVZX32rm8  rriaddr:$src)>;
-def : Pat<(extloadi32i16 rriaddr:$src), (MOVZX32rm16 rriaddr:$src)>;
-def : Pat<(extloadi64i8  rriaddr:$src), (MOVZX64rm8  rriaddr:$src)>;
-def : Pat<(extloadi64i16 rriaddr:$src), (MOVZX64rm16 rriaddr:$src)>;
-def : Pat<(extloadi64i32 rriaddr:$src), (MOVZX64rm32 rriaddr:$src)>;
-
-// muls
-def : Pat<(mulhs GR32:$src1, GR32:$src2),
-          (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
-                                                   GR32:$src1, subreg_odd32),
-                                    GR32:$src2),
-                          subreg_32bit)>;
-
-def : Pat<(mulhu GR32:$src1, GR32:$src2),
-          (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
-                                                    GR32:$src1, subreg_odd32),
-                                     GR32:$src2),
-                          subreg_32bit)>;
-def : Pat<(mulhu GR64:$src1, GR64:$src2),
-          (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
-                                                     GR64:$src1, subreg_odd),
-                                      GR64:$src2),
-                          subreg_even)>;
-
-def : Pat<(ctlz GR64:$src),
-          (EXTRACT_SUBREG (FLOGR64 GR64:$src), subreg_even)>;
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
deleted file mode 100644
index fd6e330344b6..000000000000
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ /dev/null
@@ -1,51 +0,0 @@
-//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares SystemZ-specific per-machine-function information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
-#define SYSTEMZMACHINEFUNCTIONINFO_H
-
-#include "llvm/CodeGen/MachineFunction.h"
-
-namespace llvm {
-
-/// SystemZMachineFunctionInfo - This class is derived from MachineFunction and
-/// contains private SystemZ target-specific information for each MachineFunction.
-class SystemZMachineFunctionInfo : public MachineFunctionInfo {
-  /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
-  /// stack frame in bytes.
-  unsigned CalleeSavedFrameSize;
-
-  /// LowReg - Low register of range of callee-saved registers to store.
-  unsigned LowReg;
-
-  /// HighReg - High register of range of callee-saved registers to store.
-  unsigned HighReg;
-public:
-  SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {}
-
-  explicit SystemZMachineFunctionInfo(MachineFunction &MF)
-    : CalleeSavedFrameSize(0) {}
-
-  unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
-  void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
-
-  unsigned getLowReg() const { return LowReg; }
-  void setLowReg(unsigned Reg) { LowReg = Reg; }
-
-  unsigned getHighReg() const { return HighReg; }
-  void setHighReg(unsigned Reg) { HighReg = Reg; }
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td
deleted file mode 100644
index 8b835cc26e29..000000000000
--- a/lib/Target/SystemZ/SystemZOperands.td
+++ /dev/null
@@ -1,325 +0,0 @@
-//=====- SystemZOperands.td - SystemZ Operands defs ---------*- tblgen-*-=====//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source 
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the various SystemZ instruction operands.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Instruction Pattern Stuff.
-//===----------------------------------------------------------------------===//
-
-// SystemZ specific condition code. These correspond to CondCode in
-// SystemZ.h. They must be kept in synch.
-def SYSTEMZ_COND_O   : PatLeaf<(i8 0)>;
-def SYSTEMZ_COND_H   : PatLeaf<(i8 1)>;
-def SYSTEMZ_COND_NLE : PatLeaf<(i8 2)>;
-def SYSTEMZ_COND_L   : PatLeaf<(i8 3)>;
-def SYSTEMZ_COND_NHE : PatLeaf<(i8 4)>;
-def SYSTEMZ_COND_LH  : PatLeaf<(i8 5)>;
-def SYSTEMZ_COND_NE  : PatLeaf<(i8 6)>;
-def SYSTEMZ_COND_E   : PatLeaf<(i8 7)>;
-def SYSTEMZ_COND_NLH : PatLeaf<(i8 8)>;
-def SYSTEMZ_COND_HE  : PatLeaf<(i8 9)>;
-def SYSTEMZ_COND_NL  : PatLeaf<(i8 10)>;
-def SYSTEMZ_COND_LE  : PatLeaf<(i8 11)>;
-def SYSTEMZ_COND_NH  : PatLeaf<(i8 12)>;
-def SYSTEMZ_COND_NO  : PatLeaf<(i8 13)>;
-
-def LO8 : SDNodeXForm<imm, [{
-  // Transformation function: return low 8 bits.
-  return getI8Imm(N->getZExtValue() & 0x00000000000000FFULL);
-}]>;
-
-def LL16 : SDNodeXForm<imm, [{
-  // Transformation function: return low 16 bits.
-  return getI16Imm(N->getZExtValue() & 0x000000000000FFFFULL);
-}]>;
-
-def LH16 : SDNodeXForm<imm, [{
-  // Transformation function: return bits 16-31.
-  return getI16Imm((N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16);
-}]>;
-
-def HL16 : SDNodeXForm<imm, [{
-  // Transformation function: return bits 32-47.
-  return getI16Imm((N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32);
-}]>;
-
-def HH16 : SDNodeXForm<imm, [{
-  // Transformation function: return bits 48-63.
-  return getI16Imm((N->getZExtValue() & 0xFFFF000000000000ULL) >> 48);
-}]>;
-
-def LO32 : SDNodeXForm<imm, [{
-  // Transformation function: return low 32 bits.
-  return getI32Imm(N->getZExtValue() & 0x00000000FFFFFFFFULL);
-}]>;
-
-def HI32 : SDNodeXForm<imm, [{
-  // Transformation function: return bits 32-63.
-  return getI32Imm(N->getZExtValue() >> 32);
-}]>;
-
-def GetI64FromI32 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64);
-}]>;
-
-def i32ll16 : PatLeaf<(i32 imm), [{
-  // i32ll16 predicate - true if the 32-bit immediate has only rightmost 16
-  // bits set.
-  return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
-}], LL16>;
-
-def i32lh16 : PatLeaf<(i32 imm), [{
-  // i32lh16 predicate - true if the 32-bit immediate has only bits 16-31 set.
-  return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
-}], LH16>;
-
-def i32ll16c : PatLeaf<(i32 imm), [{
-  // i32ll16c predicate - true if the 32-bit immediate has all bits 16-31 set.
-  return ((N->getZExtValue() | 0x00000000FFFF0000ULL) == N->getZExtValue());
-}], LL16>;
-
-def i32lh16c : PatLeaf<(i32 imm), [{
-  // i32lh16c predicate - true if the 32-bit immediate has all rightmost 16
-  //  bits set.
-  return ((N->getZExtValue() | 0x000000000000FFFFULL) == N->getZExtValue());
-}], LH16>;
-
-def i64ll16 : PatLeaf<(i64 imm), [{  
-  // i64ll16 predicate - true if the 64-bit immediate has only rightmost 16
-  // bits set.
-  return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
-}], LL16>;
-
-def i64lh16 : PatLeaf<(i64 imm), [{  
-  // i64lh16 predicate - true if the 64-bit immediate has only bits 16-31 set.
-  return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
-}], LH16>;
-
-def i64hl16 : PatLeaf<(i64 imm), [{  
-  // i64hl16 predicate - true if the 64-bit immediate has only bits 32-47 set.
-  return ((N->getZExtValue() & 0x0000FFFF00000000ULL) == N->getZExtValue());
-}], HL16>;
-
-def i64hh16 : PatLeaf<(i64 imm), [{  
-  // i64hh16 predicate - true if the 64-bit immediate has only bits 48-63 set.
-  return ((N->getZExtValue() & 0xFFFF000000000000ULL) == N->getZExtValue());
-}], HH16>;
-
-def i64ll16c : PatLeaf<(i64 imm), [{  
-  // i64ll16c predicate - true if the 64-bit immediate has only rightmost 16
-  // bits set.
-  return ((N->getZExtValue() | 0xFFFFFFFFFFFF0000ULL) == N->getZExtValue());
-}], LL16>;
-
-def i64lh16c : PatLeaf<(i64 imm), [{  
-  // i64lh16c predicate - true if the 64-bit immediate has only bits 16-31 set.
-  return ((N->getZExtValue() | 0xFFFFFFFF0000FFFFULL) == N->getZExtValue());
-}], LH16>;
-
-def i64hl16c : PatLeaf<(i64 imm), [{  
-  // i64hl16c predicate - true if the 64-bit immediate has only bits 32-47 set.
-  return ((N->getZExtValue() | 0xFFFF0000FFFFFFFFULL) == N->getZExtValue());
-}], HL16>;
-
-def i64hh16c : PatLeaf<(i64 imm), [{  
-  // i64hh16c predicate - true if the 64-bit immediate has only bits 48-63 set.
-  return ((N->getZExtValue() | 0x0000FFFFFFFFFFFFULL) == N->getZExtValue());
-}], HH16>;
-
-def immSExt16 : PatLeaf<(imm), [{
-  // immSExt16 predicate - true if the immediate fits in a 16-bit sign extended
-  // field.
-  if (N->getValueType(0) == MVT::i64) {
-    uint64_t val = N->getZExtValue();
-    return ((int64_t)val == (int16_t)val);
-  } else if (N->getValueType(0) == MVT::i32) {
-    uint32_t val = N->getZExtValue();
-    return ((int32_t)val == (int16_t)val);
-  }
-
-  return false;
-}], LL16>;
-
-def immSExt32 : PatLeaf<(i64 imm), [{
-  // immSExt32 predicate - true if the immediate fits in a 32-bit sign extended
-  // field.
-  uint64_t val = N->getZExtValue();
-  return ((int64_t)val == (int32_t)val);
-}], LO32>;
-
-def i64lo32 : PatLeaf<(i64 imm), [{
-  // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
-  // bits set.
-  return ((N->getZExtValue() & 0x00000000FFFFFFFFULL) == N->getZExtValue());
-}], LO32>;
-
-def i64hi32 : PatLeaf<(i64 imm), [{
-  // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
-  return ((N->getZExtValue() & 0xFFFFFFFF00000000ULL) == N->getZExtValue());
-}], HI32>;
-
-def i64lo32c : PatLeaf<(i64 imm), [{
-  // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
-  // bits set.
-  return ((N->getZExtValue() | 0xFFFFFFFF00000000ULL) == N->getZExtValue());
-}], LO32>;
-
-def i64hi32c : PatLeaf<(i64 imm), [{
-  // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
-  return ((N->getZExtValue() | 0x00000000FFFFFFFFULL) == N->getZExtValue());
-}], HI32>;
-
-def i32immSExt8  : PatLeaf<(i32 imm), [{
-  // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
-  // sign extended field.
-  return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
-}], LO8>;
-
-def i32immSExt16 : PatLeaf<(i32 imm), [{
-  // i32immSExt16 predicate - True if the 32-bit immediate fits in a 16-bit
-  // sign extended field.
-  return (int32_t)N->getZExtValue() == (int16_t)N->getZExtValue();
-}], LL16>;
-
-def i64immSExt32 : PatLeaf<(i64 imm), [{
-  // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
-  // sign extended field.
-  return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
-}], LO32>;
-
-def i64immZExt32 : PatLeaf<(i64 imm), [{
-  // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
-  // zero extended field.
-  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
-}], LO32>;
-
-// extloads
-def extloadi32i8   : PatFrag<(ops node:$ptr), (i32 (extloadi8  node:$ptr))>;
-def extloadi32i16  : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
-def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8  node:$ptr))>;
-def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
-def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
-
-def sextloadi32i8   : PatFrag<(ops node:$ptr), (i32 (sextloadi8  node:$ptr))>;
-def sextloadi32i16  : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
-def sextloadi64i8   : PatFrag<(ops node:$ptr), (i64 (sextloadi8  node:$ptr))>;
-def sextloadi64i16  : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
-def sextloadi64i32  : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
-
-def zextloadi32i8   : PatFrag<(ops node:$ptr), (i32 (zextloadi8  node:$ptr))>;
-def zextloadi32i16  : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
-def zextloadi64i8   : PatFrag<(ops node:$ptr), (i64 (zextloadi8  node:$ptr))>;
-def zextloadi64i16  : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
-def zextloadi64i32  : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
-
-// A couple of more descriptive operand definitions.
-// 32-bits but only 8 bits are significant.
-def i32i8imm  : Operand<i32>;
-// 32-bits but only 16 bits are significant.
-def i32i16imm : Operand<i32>;
-// 64-bits but only 32 bits are significant.
-def i64i32imm : Operand<i64>;
-// Branch targets have OtherVT type.
-def brtarget : Operand<OtherVT>;
-
-// Unsigned i12
-def u12imm : Operand<i32> {
-  let PrintMethod = "printU12ImmOperand";
-}
-def u12imm64 : Operand<i64> {
-  let PrintMethod = "printU12ImmOperand";
-}
-
-// Signed i16
-def s16imm : Operand<i32> {
-  let PrintMethod = "printS16ImmOperand";
-}
-def s16imm64 : Operand<i64> {
-  let PrintMethod = "printS16ImmOperand";
-}
-// Unsigned i16
-def u16imm : Operand<i32> {
-  let PrintMethod = "printU16ImmOperand";
-}
-def u16imm64 : Operand<i64> {
-  let PrintMethod = "printU16ImmOperand";
-}
-
-// Signed i20
-def s20imm : Operand<i32> {
-  let PrintMethod = "printS20ImmOperand";
-}
-def s20imm64 : Operand<i64> {
-  let PrintMethod = "printS20ImmOperand";
-}
-// Signed i32
-def s32imm : Operand<i32> {
-  let PrintMethod = "printS32ImmOperand";
-}
-def s32imm64 : Operand<i64> {
-  let PrintMethod = "printS32ImmOperand";
-}
-// Unsigned i32
-def u32imm : Operand<i32> {
-  let PrintMethod = "printU32ImmOperand";
-}
-def u32imm64 : Operand<i64> {
-  let PrintMethod = "printU32ImmOperand";
-}
-
-def imm_pcrel : Operand<i64> {
-  let PrintMethod = "printPCRelImmOperand";
-}
-
-//===----------------------------------------------------------------------===//
-// SystemZ Operand Definitions.
-//===----------------------------------------------------------------------===//
-
-// Address operands
-
-// riaddr := reg + imm
-def riaddr32 : Operand<i64>,
-               ComplexPattern<i64, 2, "SelectAddrRI12Only", []> {
-  let PrintMethod = "printRIAddrOperand";
-  let MIOperandInfo = (ops ADDR64:$base, u12imm:$disp);
-}
-
-def riaddr12 : Operand<i64>,
-               ComplexPattern<i64, 2, "SelectAddrRI12", []> {
-  let PrintMethod = "printRIAddrOperand";
-  let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp);
-}
-
-def riaddr : Operand<i64>,
-             ComplexPattern<i64, 2, "SelectAddrRI", []> {
-  let PrintMethod = "printRIAddrOperand";
-  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp);
-}
-
-//===----------------------------------------------------------------------===//
-
-// rriaddr := reg + reg + imm
-def rriaddr12 : Operand<i64>,
-                ComplexPattern<i64, 3, "SelectAddrRRI12", [], []> {
-  let PrintMethod = "printRRIAddrOperand";
-  let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp, ADDR64:$index);
-}
-def rriaddr : Operand<i64>,
-              ComplexPattern<i64, 3, "SelectAddrRRI20", [], []> {
-  let PrintMethod = "printRRIAddrOperand";
-  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
-}
-def laaddr : Operand<i64>,
-             ComplexPattern<i64, 3, "SelectLAAddr", [add, sub, or, frameindex], []> {
-  let PrintMethod = "printRRIAddrOperand";
-  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
-}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
deleted file mode 100644
index b1050d46e550..000000000000
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//===- SystemZRegisterInfo.cpp - SystemZ Register Information -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SystemZ implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZ.h"
-#include "SystemZInstrInfo.h"
-#include "SystemZMachineFunctionInfo.h"
-#include "SystemZRegisterInfo.h"
-#include "SystemZSubtarget.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-
-#define GET_REGINFO_TARGET_DESC
-#include "SystemZGenRegisterInfo.inc"
-
-using namespace llvm;
-
-SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
-                                         const SystemZInstrInfo &tii)
-  : SystemZGenRegisterInfo(0), TM(tm), TII(tii) {
-}
-
-const unsigned*
-SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
-  static const unsigned CalleeSavedRegs[] = {
-    SystemZ::R6D,  SystemZ::R7D,  SystemZ::R8D,  SystemZ::R9D,
-    SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
-    SystemZ::R14D, SystemZ::R15D,
-    SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
-    SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
-    0
-  };
-
-  return CalleeSavedRegs;
-}
-
-BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
-  BitVector Reserved(getNumRegs());
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  if (TFI->hasFP(MF)) {
-    // R11D is the frame pointer. Reserve all aliases.
-    Reserved.set(SystemZ::R11D);
-    Reserved.set(SystemZ::R11W);
-    Reserved.set(SystemZ::R10P);
-    Reserved.set(SystemZ::R10Q);
-  }
-
-  Reserved.set(SystemZ::R14D);
-  Reserved.set(SystemZ::R15D);
-  Reserved.set(SystemZ::R14W);
-  Reserved.set(SystemZ::R15W);
-  Reserved.set(SystemZ::R14P);
-  Reserved.set(SystemZ::R14Q);
-  return Reserved;
-}
-
-const TargetRegisterClass*
-SystemZRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
-                                              const TargetRegisterClass *B,
-                                              unsigned Idx) const {
-  switch(Idx) {
-  // Exact sub-classes don't exist for the other sub-register indexes.
-  default: return 0;
-  case SystemZ::subreg_32bit:
-    if (B == SystemZ::ADDR32RegisterClass)
-      return A->getSize() == 8 ? SystemZ::ADDR64RegisterClass : 0;
-    return A;
-  }
-}
-
-void SystemZRegisterInfo::
-eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator I) const {
-  MBB.erase(I);
-}
-
-void
-SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
-                                         int SPAdj, RegScavenger *RS) const {
-  assert(SPAdj == 0 && "Unxpected");
-
-  unsigned i = 0;
-  MachineInstr &MI = *II;
-  MachineFunction &MF = *MI.getParent()->getParent();
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
-  while (!MI.getOperand(i).isFI()) {
-    ++i;
-    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
-  }
-
-  int FrameIndex = MI.getOperand(i).getIndex();
-
-  unsigned BasePtr = (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
-
-  // This must be part of a rri or ri operand memory reference.  Replace the
-  // FrameIndex with base register with BasePtr.  Add an offset to the
-  // displacement field.
-  MI.getOperand(i).ChangeToRegister(BasePtr, false);
-
-  // Offset is a either 12-bit unsigned or 20-bit signed integer.
-  // FIXME: handle "too long" displacements.
-  int Offset =
-    TFI->getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
-
-  // Check whether displacement is too long to fit into 12 bit zext field.
-  MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
-
-  MI.getOperand(i+1).ChangeToImmediate(Offset);
-}
-
-unsigned
-SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-  assert(0 && "What is the frame register");
-  return 0;
-}
-
-unsigned SystemZRegisterInfo::getEHExceptionRegister() const {
-  assert(0 && "What is the exception register");
-  return 0;
-}
-
-unsigned SystemZRegisterInfo::getEHHandlerRegister() const {
-  assert(0 && "What is the exception handler register");
-  return 0;
-}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
deleted file mode 100644
index 03935b2bec10..000000000000
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the SystemZ implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SystemZREGISTERINFO_H
-#define SystemZREGISTERINFO_H
-
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#define GET_REGINFO_HEADER
-#include "SystemZGenRegisterInfo.inc"
-
-namespace llvm {
-
-class SystemZSubtarget;
-class SystemZInstrInfo;
-class Type;
-
-struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
-  SystemZTargetMachine &TM;
-  const SystemZInstrInfo &TII;
-
-  SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
-
-  /// Code Generation virtual methods...
-  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
-  BitVector getReservedRegs(const MachineFunction &MF) const;
-
-  const TargetRegisterClass*
-  getMatchingSuperRegClass(const TargetRegisterClass *A,
-                           const TargetRegisterClass *B, unsigned Idx) const;
-
-  void eliminateCallFramePseudoInstr(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, RegScavenger *RS = NULL) const;
-
-  // Debug information queries.
-  unsigned getFrameRegister(const MachineFunction &MF) const;
-
-  // Exception handling queries.
-  unsigned getEHExceptionRegister() const;
-  unsigned getEHHandlerRegister() const;
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
deleted file mode 100644
index a24cbcf4ccd8..000000000000
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ /dev/null
@@ -1,205 +0,0 @@
-//===- SystemZRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
-// 
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-class SystemZReg<string n> : Register<n> {
-  let Namespace = "SystemZ";
-}
-
-class SystemZRegWithSubregs<string n, list<Register> subregs>
-  : RegisterWithSubRegs<n, subregs> {
-  let Namespace = "SystemZ";
-}
-
-// We identify all our registers with a 4-bit ID, for consistency's sake.
-
-// GPR32 - Lower 32 bits of one of the 16 64-bit general-purpose registers
-class GPR32<bits<4> num, string n> : SystemZReg<n> {
-  field bits<4> Num = num;
-}
-
-// GPR64 - One of the 16 64-bit general-purpose registers
-class GPR64<bits<4> num, string n, list<Register> subregs,
-            list<Register> aliases = []>
- : SystemZRegWithSubregs<n, subregs> {
-  field bits<4> Num = num;
-  let Aliases = aliases;
-}
-
-// GPR128 - 8 even-odd register pairs
-class GPR128<bits<4> num, string n, list<Register> subregs,
-             list<Register> aliases = []>
- : SystemZRegWithSubregs<n, subregs> {
-  field bits<4> Num = num;
-  let Aliases = aliases;
-}
-
-// FPRS - Lower 32 bits of one of the 16 64-bit floating-point registers
-class FPRS<bits<4> num, string n> : SystemZReg<n> {
-  field bits<4> Num = num;
-}
-
-// FPRL - One of the 16 64-bit floating-point registers
-class FPRL<bits<4> num, string n, list<Register> subregs>
- : SystemZRegWithSubregs<n, subregs> {
-  field bits<4> Num = num;
-}
-
-let Namespace = "SystemZ" in {
-def subreg_32bit  : SubRegIndex;
-def subreg_odd32  : SubRegIndex;
-def subreg_even   : SubRegIndex;
-def subreg_odd    : SubRegIndex;
-}
-
-// General-purpose registers
-def R0W  : GPR32< 0,  "r0">;
-def R1W  : GPR32< 1,  "r1">;
-def R2W  : GPR32< 2,  "r2">;
-def R3W  : GPR32< 3,  "r3">;
-def R4W  : GPR32< 4,  "r4">;
-def R5W  : GPR32< 5,  "r5">;
-def R6W  : GPR32< 6,  "r6">;
-def R7W  : GPR32< 7,  "r7">;
-def R8W  : GPR32< 8,  "r8">;
-def R9W  : GPR32< 9,  "r9">;
-def R10W : GPR32<10, "r10">;
-def R11W : GPR32<11, "r11">;
-def R12W : GPR32<12, "r12">;
-def R13W : GPR32<13, "r13">;
-def R14W : GPR32<14, "r14">;
-def R15W : GPR32<15, "r15">;
-
-let SubRegIndices = [subreg_32bit] in {
-def R0D  : GPR64< 0,  "r0", [R0W]>,  DwarfRegNum<[0]>;
-def R1D  : GPR64< 1,  "r1", [R1W]>,  DwarfRegNum<[1]>;
-def R2D  : GPR64< 2,  "r2", [R2W]>,  DwarfRegNum<[2]>;
-def R3D  : GPR64< 3,  "r3", [R3W]>,  DwarfRegNum<[3]>;
-def R4D  : GPR64< 4,  "r4", [R4W]>,  DwarfRegNum<[4]>;
-def R5D  : GPR64< 5,  "r5", [R5W]>,  DwarfRegNum<[5]>;
-def R6D  : GPR64< 6,  "r6", [R6W]>,  DwarfRegNum<[6]>;
-def R7D  : GPR64< 7,  "r7", [R7W]>,  DwarfRegNum<[7]>;
-def R8D  : GPR64< 8,  "r8", [R8W]>,  DwarfRegNum<[8]>;
-def R9D  : GPR64< 9,  "r9", [R9W]>,  DwarfRegNum<[9]>;
-def R10D : GPR64<10, "r10", [R10W]>, DwarfRegNum<[10]>;
-def R11D : GPR64<11, "r11", [R11W]>, DwarfRegNum<[11]>;
-def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>;
-def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>;
-def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>;
-def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
-}
-
-// Register pairs
-let SubRegIndices = [subreg_32bit, subreg_odd32] in {
-def R0P  : GPR64< 0,  "r0", [R0W,  R1W],  [R0D,  R1D]>;
-def R2P  : GPR64< 2,  "r2", [R2W,  R3W],  [R2D,  R3D]>;
-def R4P  : GPR64< 4,  "r4", [R4W,  R5W],  [R4D,  R5D]>;
-def R6P  : GPR64< 6,  "r6", [R6W,  R7W],  [R6D,  R7D]>;
-def R8P  : GPR64< 8,  "r8", [R8W,  R9W],  [R8D,  R9D]>;
-def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>;
-def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>;
-def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>;
-}
-
-let SubRegIndices = [subreg_even, subreg_odd],
- CompositeIndices = [(subreg_odd32  subreg_odd,  subreg_32bit)] in {
-def R0Q  : GPR128< 0,  "r0", [R0D,  R1D],  [R0P]>;
-def R2Q  : GPR128< 2,  "r2", [R2D,  R3D],  [R2P]>;
-def R4Q  : GPR128< 4,  "r4", [R4D,  R5D],  [R4P]>;
-def R6Q  : GPR128< 6,  "r6", [R6D,  R7D],  [R6P]>;
-def R8Q  : GPR128< 8,  "r8", [R8D,  R9D],  [R8P]>;
-def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>;
-def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>;
-def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>;
-}
-
-// Floating-point registers
-def F0S  : FPRS< 0,  "f0">, DwarfRegNum<[16]>;
-def F1S  : FPRS< 1,  "f1">, DwarfRegNum<[17]>;
-def F2S  : FPRS< 2,  "f2">, DwarfRegNum<[18]>;
-def F3S  : FPRS< 3,  "f3">, DwarfRegNum<[19]>;
-def F4S  : FPRS< 4,  "f4">, DwarfRegNum<[20]>;
-def F5S  : FPRS< 5,  "f5">, DwarfRegNum<[21]>;
-def F6S  : FPRS< 6,  "f6">, DwarfRegNum<[22]>;
-def F7S  : FPRS< 7,  "f7">, DwarfRegNum<[23]>;
-def F8S  : FPRS< 8,  "f8">, DwarfRegNum<[24]>;
-def F9S  : FPRS< 9,  "f9">, DwarfRegNum<[25]>;
-def F10S : FPRS<10, "f10">, DwarfRegNum<[26]>;
-def F11S : FPRS<11, "f11">, DwarfRegNum<[27]>;
-def F12S : FPRS<12, "f12">, DwarfRegNum<[28]>;
-def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>;
-def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>;
-def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>;
-
-let SubRegIndices = [subreg_32bit] in {
-def F0L  : FPRL< 0,  "f0", [F0S]>;
-def F1L  : FPRL< 1,  "f1", [F1S]>;
-def F2L  : FPRL< 2,  "f2", [F2S]>;
-def F3L  : FPRL< 3,  "f3", [F3S]>;
-def F4L  : FPRL< 4,  "f4", [F4S]>;
-def F5L  : FPRL< 5,  "f5", [F5S]>;
-def F6L  : FPRL< 6,  "f6", [F6S]>;
-def F7L  : FPRL< 7,  "f7", [F7S]>;
-def F8L  : FPRL< 8,  "f8", [F8S]>;
-def F9L  : FPRL< 9,  "f9", [F9S]>;
-def F10L : FPRL<10, "f10", [F10S]>;
-def F11L : FPRL<11, "f11", [F11S]>;
-def F12L : FPRL<12, "f12", [F12S]>;
-def F13L : FPRL<13, "f13", [F13S]>;
-def F14L : FPRL<14, "f14", [F14S]>;
-def F15L : FPRL<15, "f15", [F15S]>;
-}
-
-// Status register
-def PSW : SystemZReg<"psw">;
-
-/// Register classes.
-/// Allocate the callee-saved R6-R12 backwards. That way they can be saved
-/// together with R14 and R15 in one prolog instruction.
-def GR32 : RegisterClass<"SystemZ", [i32], 32, (add (sequence "R%uW",  0, 5),
-                                                    (sequence "R%uW", 15, 6))>;
-
-/// Registers used to generate address. Everything except R0.
-def ADDR32 : RegisterClass<"SystemZ", [i32], 32, (sub GR32, R0W)>;
-
-def GR64 : RegisterClass<"SystemZ", [i64], 64, (add (sequence "R%uD",  0, 5),
-                                                    (sequence "R%uD", 15, 6))> {
-  let SubRegClasses = [(GR32 subreg_32bit)];
-}
-
-def ADDR64 : RegisterClass<"SystemZ", [i64], 64, (sub GR64, R0D)> {
-  let SubRegClasses = [(ADDR32 subreg_32bit)];
-}
-
-// Even-odd register pairs
-def GR64P : RegisterClass<"SystemZ", [v2i32], 64, (add R0P, R2P, R4P,
-                                                       R12P, R10P, R8P, R6P,
-                                                       R14P)> {
-  let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
-}
-
-def GR128 : RegisterClass<"SystemZ", [v2i64], 128, (add R0Q, R2Q, R4Q,
-                                                        R12Q, R10Q, R8Q, R6Q,
-                                                        R14Q)> {
-  let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
-                       (GR64 subreg_even, subreg_odd)];
-}
-
-def FP32 : RegisterClass<"SystemZ", [f32], 32, (sequence "F%uS", 0, 15)>;
-
-def FP64 : RegisterClass<"SystemZ", [f64], 64, (sequence "F%uL", 0, 15)> {
-  let SubRegClasses = [(FP32 subreg_32bit)];
-}
-
-// Status flags registers.
-def CCR : RegisterClass<"SystemZ", [i64], 64, (add PSW)> {
-  let CopyCost = -1;  // Don't allow copying of status registers.
-}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
deleted file mode 100644
index 3eabcd24c598..000000000000
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SystemZSelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "systemz-selectiondag-info"
-#include "SystemZTargetMachine.h"
-using namespace llvm;
-
-SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
-  : TargetSelectionDAGInfo(TM) {
-}
-
-SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
-}
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
deleted file mode 100644
index 1450401d0403..000000000000
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef SYSTEMZSELECTIONDAGINFO_H
-#define SYSTEMZSELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class SystemZTargetMachine;
-
-class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
-public:
-  explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
-  ~SystemZSelectionDAGInfo();
-};
-
-}
-
-#endif
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
deleted file mode 100644
index 0845510761c2..000000000000
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- SystemZSubtarget.cpp - SystemZ Subtarget Information -------*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SystemZ specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZSubtarget.h"
-#include "SystemZ.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_SUBTARGETINFO_TARGET_DESC
-#define GET_SUBTARGETINFO_CTOR
-#include "SystemZGenSubtargetInfo.inc"
-
-using namespace llvm;
-
-SystemZSubtarget::SystemZSubtarget(const std::string &TT, 
-                                   const std::string &CPU,
-                                   const std::string &FS):
-  SystemZGenSubtargetInfo(TT, CPU, FS), HasZ10Insts(false) {
-  std::string CPUName = CPU;
-  if (CPUName.empty())
-    CPUName = "z9";
-
-  // Parse features string.
-  ParseSubtargetFeatures(CPUName, FS);
-}
-
-/// True if accessing the GV requires an extra load.
-bool SystemZSubtarget::GVRequiresExtraLoad(const GlobalValue* GV,
-                                           const TargetMachine& TM,
-                                           bool isDirectCall) const {
-  if (TM.getRelocationModel() == Reloc::PIC_) {
-    // Extra load is needed for all externally visible.
-    if (isDirectCall)
-      return false;
-
-    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
-      return false;
-
-    return true;
-  }
-
-  return false;
-}
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
deleted file mode 100644
index 55cfd80002bc..000000000000
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//==-- SystemZSubtarget.h - Define Subtarget for the SystemZ ---*- C++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_SystemZ_SUBTARGET_H
-#define LLVM_TARGET_SystemZ_SUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "SystemZGenSubtargetInfo.inc"
-
-namespace llvm {
-class GlobalValue;
-class StringRef;
-class TargetMachine;
-
-class SystemZSubtarget : public SystemZGenSubtargetInfo {
-  bool HasZ10Insts;
-public:
-  /// This constructor initializes the data members to match that
-  /// of the specified triple.
-  ///
-  SystemZSubtarget(const std::string &TT, const std::string &CPU,
-                   const std::string &FS);
-
-  /// ParseSubtargetFeatures - Parses features string setting specified
-  /// subtarget options.  Definition of function is auto generated by tblgen.
-  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
-  bool isZ10() const { return HasZ10Insts; }
-
-  bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
-                           bool isDirectCall) const;
-};
-} // End llvm namespace
-
-#endif  // LLVM_TARGET_SystemZ_SUBTARGET_H
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
deleted file mode 100644
index e390f060c9a9..000000000000
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZTargetMachine.h"
-#include "SystemZ.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-extern "C" void LLVMInitializeSystemZTarget() {
-  // Register the target.
-  RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
-}
-
-/// SystemZTargetMachine ctor - Create an ILP64 architecture model
-///
-SystemZTargetMachine::SystemZTargetMachine(const Target &T,
-                                           StringRef TT, StringRef CPU,
-                                           StringRef FS, Reloc::Model RM,
-                                           CodeModel::Model CM)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
-    Subtarget(TT, CPU, FS),
-    DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
-               "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
-    InstrInfo(*this), TLInfo(*this), TSInfo(*this),
-    FrameLowering(Subtarget) {
-}
-
-bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel) {
-  // Install an instruction selector.
-  PM.add(createSystemZISelDag(*this, OptLevel));
-  return false;
-}
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
deleted file mode 100644
index 43dce4bd148e..000000000000
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the SystemZ specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
-#define LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
-
-#include "SystemZInstrInfo.h"
-#include "SystemZISelLowering.h"
-#include "SystemZFrameLowering.h"
-#include "SystemZSelectionDAGInfo.h"
-#include "SystemZRegisterInfo.h"
-#include "SystemZSubtarget.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
-/// SystemZTargetMachine
-///
-class SystemZTargetMachine : public LLVMTargetMachine {
-  SystemZSubtarget        Subtarget;
-  const TargetData        DataLayout;       // Calculates type size & alignment
-  SystemZInstrInfo        InstrInfo;
-  SystemZTargetLowering   TLInfo;
-  SystemZSelectionDAGInfo TSInfo;
-  SystemZFrameLowering    FrameLowering;
-public:
-  SystemZTargetMachine(const Target &T, StringRef TT,
-                       StringRef CPU, StringRef FS,
-                       Reloc::Model RM, CodeModel::Model CM);
-
-  virtual const TargetFrameLowering *getFrameLowering() const {
-    return &FrameLowering;
-  }
-  virtual const SystemZInstrInfo *getInstrInfo() const  { return &InstrInfo; }
-  virtual const TargetData *getTargetData() const     { return &DataLayout;}
-  virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
-
-  virtual const SystemZRegisterInfo *getRegisterInfo() const {
-    return &InstrInfo.getRegisterInfo();
-  }
-
-  virtual const SystemZTargetLowering *getTargetLowering() const {
-    return &TLInfo;
-  }
-
-  virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const {
-    return &TSInfo;
-  }
-
-  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-}; // SystemZTargetMachine.
-
-} // end namespace llvm
-
-#endif // LLVM_TARGET_SystemZ_TARGETMACHINE_H
diff --git a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 31807081bd6e..000000000000
--- a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMSystemZInfo
-  SystemZTargetInfo.cpp
-  )
-
-add_llvm_library_dependencies(LLVMSystemZInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
-add_dependencies(LLVMSystemZInfo SystemZCommonTableGen)
diff --git a/lib/Target/SystemZ/TargetInfo/Makefile b/lib/Target/SystemZ/TargetInfo/Makefile
deleted file mode 100644
index 0be80eb4e6ad..000000000000
--- a/lib/Target/SystemZ/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/SystemZ/TargetInfo/Makefile --------------*- Makefile -*-===##
-#
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMSystemZInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
deleted file mode 100644
index da99282ecb04..000000000000
--- a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-//===-- SystemZTargetInfo.cpp - SystemZ Target Implementation -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "SystemZ.h"
-#include "llvm/Module.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::TheSystemZTarget;
-
-extern "C" void LLVMInitializeSystemZTargetInfo() {
-  RegisterTarget<Triple::systemz> X(TheSystemZTarget, "systemz", "SystemZ");
-}
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index bd6a6b67beb9..acb74765c193 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -125,15 +125,15 @@ const TargetAlignElem TargetData::InvalidAlignmentElem =
 //===----------------------------------------------------------------------===//
 
 /// getInt - Get an integer ignoring errors.
-static unsigned getInt(StringRef R) {
-  unsigned Result = 0;
+static int getInt(StringRef R) {
+  int Result = 0;
   R.getAsInteger(10, Result);
   return Result;
 }
 
-void TargetData::init(StringRef Desc) {
+void TargetData::init() {
   initializeTargetDataPass(*PassRegistry::getPassRegistry());
-  
+
   LayoutMap = 0;
   LittleEndian = false;
   PointerMemSize = 8;
@@ -147,11 +147,19 @@ void TargetData::init(StringRef Desc) {
   setAlignment(INTEGER_ALIGN,   2,  2, 16);  // i16
   setAlignment(INTEGER_ALIGN,   4,  4, 32);  // i32
   setAlignment(INTEGER_ALIGN,   4,  8, 64);  // i64
+  setAlignment(FLOAT_ALIGN,     2,  2, 16);  // half
   setAlignment(FLOAT_ALIGN,     4,  4, 32);  // float
   setAlignment(FLOAT_ALIGN,     8,  8, 64);  // double
+  setAlignment(FLOAT_ALIGN,    16, 16, 128); // ppcf128, quad, ...
   setAlignment(VECTOR_ALIGN,    8,  8, 64);  // v2i32, v1i64, ...
   setAlignment(VECTOR_ALIGN,   16, 16, 128); // v16i8, v8i16, v4i32, ...
   setAlignment(AGGREGATE_ALIGN, 0,  8,  0);  // struct
+}
+
+std::string TargetData::parseSpecifier(StringRef Desc, TargetData *td) {
+
+  if (td)
+    td->init();
 
   while (!Desc.empty()) {
     std::pair<StringRef, StringRef> Split = Desc.split('-');
@@ -169,28 +177,54 @@ void TargetData::init(StringRef Desc) {
 
     switch (Specifier[0]) {
     case 'E':
-      LittleEndian = false;
+      if (td)
+        td->LittleEndian = false;
       break;
     case 'e':
-      LittleEndian = true;
+      if (td)
+        td->LittleEndian = true;
       break;
-    case 'p':
+    case 'p': {
+      // Pointer size.
       Split = Token.split(':');
-      PointerMemSize = getInt(Split.first) / 8;
+      int PointerMemSizeBits = getInt(Split.first);
+      if (PointerMemSizeBits < 0 || PointerMemSizeBits % 8 != 0)
+        return "invalid pointer size, must be a positive 8-bit multiple";
+      if (td)
+        td->PointerMemSize = PointerMemSizeBits / 8;
+
+      // Pointer ABI alignment.
       Split = Split.second.split(':');
-      PointerABIAlign = getInt(Split.first) / 8;
+      int PointerABIAlignBits = getInt(Split.first);
+      if (PointerABIAlignBits < 0 || PointerABIAlignBits % 8 != 0) {
+        return "invalid pointer ABI alignment, "
+               "must be a positive 8-bit multiple";
+      }
+      if (td)
+        td->PointerABIAlign = PointerABIAlignBits / 8;
+
+      // Pointer preferred alignment.
       Split = Split.second.split(':');
-      PointerPrefAlign = getInt(Split.first) / 8;
-      if (PointerPrefAlign == 0)
-        PointerPrefAlign = PointerABIAlign;
+      int PointerPrefAlignBits = getInt(Split.first);
+      if (PointerPrefAlignBits < 0 || PointerPrefAlignBits % 8 != 0) {
+        return "invalid pointer preferred alignment, "
+               "must be a positive 8-bit multiple";
+      }
+      if (td) {
+        td->PointerPrefAlign = PointerPrefAlignBits / 8;
+        if (td->PointerPrefAlign == 0)
+          td->PointerPrefAlign = td->PointerABIAlign;
+      }
       break;
+    }
     case 'i':
     case 'v':
     case 'f':
     case 'a':
     case 's': {
       AlignTypeEnum AlignType;
-      switch (Specifier[0]) {
+      char field = Specifier[0];
+      switch (field) {
       default:
       case 'i': AlignType = INTEGER_ALIGN; break;
       case 'v': AlignType = VECTOR_ALIGN; break;
@@ -198,37 +232,66 @@ void TargetData::init(StringRef Desc) {
       case 'a': AlignType = AGGREGATE_ALIGN; break;
       case 's': AlignType = STACK_ALIGN; break;
       }
-      unsigned Size = getInt(Specifier.substr(1));
+      int Size = getInt(Specifier.substr(1));
+      if (Size < 0) {
+        return std::string("invalid ") + field + "-size field, "
+               "must be positive";
+      }
+
       Split = Token.split(':');
-      unsigned ABIAlign = getInt(Split.first) / 8;
+      int ABIAlignBits = getInt(Split.first);
+      if (ABIAlignBits < 0 || ABIAlignBits % 8 != 0) {
+        return std::string("invalid ") + field +"-abi-alignment field, "
+               "must be a positive 8-bit multiple";
+      }
+      unsigned ABIAlign = ABIAlignBits / 8;
 
       Split = Split.second.split(':');
-      unsigned PrefAlign = getInt(Split.first) / 8;
+
+      int PrefAlignBits = getInt(Split.first);
+      if (PrefAlignBits < 0 || PrefAlignBits % 8 != 0) {
+        return std::string("invalid ") + field +"-preferred-alignment field, "
+               "must be a positive 8-bit multiple";
+      }
+      unsigned PrefAlign = PrefAlignBits / 8;
       if (PrefAlign == 0)
         PrefAlign = ABIAlign;
-      setAlignment(AlignType, ABIAlign, PrefAlign, Size);
+      
+      if (td)
+        td->setAlignment(AlignType, ABIAlign, PrefAlign, Size);
       break;
     }
     case 'n':  // Native integer types.
       Specifier = Specifier.substr(1);
       do {
-        if (unsigned Width = getInt(Specifier))
-          LegalIntWidths.push_back(Width);
+        int Width = getInt(Specifier);
+        if (Width <= 0) {
+          return std::string("invalid native integer size \'") + Specifier.str() +
+                 "\', must be a positive integer.";
+        }
+        if (td && Width != 0)
+          td->LegalIntWidths.push_back(Width);
         Split = Token.split(':');
         Specifier = Split.first;
         Token = Split.second;
       } while (!Specifier.empty() || !Token.empty());
       break;
-    case 'S': // Stack natural alignment.
-      StackNaturalAlign = getInt(Specifier.substr(1));
-      StackNaturalAlign /= 8;
-      // FIXME: Should we really be truncating these alingments and
-      // sizes silently?
+    case 'S': { // Stack natural alignment.
+      int StackNaturalAlignBits = getInt(Specifier.substr(1));
+      if (StackNaturalAlignBits < 0 || StackNaturalAlignBits % 8 != 0) {
+        return "invalid natural stack alignment (S-field), "
+               "must be a positive 8-bit multiple";
+      }
+      if (td)
+        td->StackNaturalAlign = StackNaturalAlignBits / 8;
       break;
+    }
     default:
       break;
     }
   }
+
+  return "";
 }
 
 /// Default ctor.
@@ -242,7 +305,9 @@ TargetData::TargetData() : ImmutablePass(ID) {
 
 TargetData::TargetData(const Module *M)
   : ImmutablePass(ID) {
-  init(M->getDataLayout());
+  std::string errMsg = parseSpecifier(M->getDataLayout(), this);
+  assert(errMsg == "" && "Module M has malformed target data layout string.");
+  (void)errMsg;
 }
 
 void
@@ -308,7 +373,7 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
       // If the alignment is not a power of 2, round up to the next power of 2.
       // This happens for non-power-of-2 length vectors.
       if (Align & (Align-1))
-        Align = llvm::NextPowerOf2(Align);
+        Align = NextPowerOf2(Align);
       return Align;
     }
   }
@@ -414,6 +479,8 @@ uint64_t TargetData::getTypeSizeInBits(Type *Ty) const {
     return cast<IntegerType>(Ty)->getBitWidth();
   case Type::VoidTyID:
     return 8;
+  case Type::HalfTyID:
+    return 16;
   case Type::FloatTyID:
     return 32;
   case Type::DoubleTyID:
@@ -430,9 +497,7 @@ uint64_t TargetData::getTypeSizeInBits(Type *Ty) const {
     return cast<VectorType>(Ty)->getBitWidth();
   default:
     llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type");
-    break;
   }
-  return 0;
 }
 
 /*!
@@ -471,6 +536,7 @@ unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const {
   case Type::VoidTyID:
     AlignType = INTEGER_ALIGN;
     break;
+  case Type::HalfTyID:
   case Type::FloatTyID:
   case Type::DoubleTyID:
   // PPC_FP128TyID and FP128TyID have different data contents, but the
@@ -486,7 +552,6 @@ unsigned TargetData::getAlignment(Type *Ty, bool abi_or_pref) const {
     break;
   default:
     llvm_unreachable("Bad type for getAlignment!!!");
-    break;
   }
 
   return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
diff --git a/lib/Target/TargetFrameLowering.cpp b/lib/Target/TargetFrameLowering.cpp
deleted file mode 100644
index 122f8696e2ce..000000000000
--- a/lib/Target/TargetFrameLowering.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implements the layout of a stack frame on the target machine.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-
-#include <cstdlib>
-using namespace llvm;
-
-TargetFrameLowering::~TargetFrameLowering() {
-}
-
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index. This is the default implementation
-/// which is overridden for some targets.
-int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                         int FI) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
-    getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
-}
-
-int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
-                                             int FI, unsigned &FrameReg) const {
-  const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
-
-  // By default, assume all frame indices are referenced via whatever
-  // getFrameRegister() says. The target can override this if it's doing
-  // something different.
-  FrameReg = RI->getFrameRegister(MF);
-  return getFrameIndexOffset(MF, FI);
-}
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index d52ecb32cf75..440f9ad00de9 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -13,7 +13,6 @@
 
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -73,23 +72,6 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
 }
 
-int
-TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
-                                   SDNode *DefNode, unsigned DefIdx,
-                                   SDNode *UseNode, unsigned UseIdx) const {
-  if (!ItinData || ItinData->isEmpty())
-    return -1;
-
-  if (!DefNode->isMachineOpcode())
-    return -1;
-
-  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
-  if (!UseNode->isMachineOpcode())
-    return ItinData->getOperandCycle(DefClass, DefIdx);
-  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
-  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
-}
-
 int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
                                      const MachineInstr *MI,
                                      unsigned *PredCost) const {
@@ -99,17 +81,6 @@ int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   return ItinData->getStageLatency(MI->getDesc().getSchedClass());
 }
 
-int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
-                                     SDNode *N) const {
-  if (!ItinData || ItinData->isEmpty())
-    return 1;
-
-  if (!N->isMachineOpcode())
-    return 1;
-
-  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
-}
-
 bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
                                        const MachineInstr *DefMI,
                                        unsigned DefIdx) const {
@@ -129,19 +100,6 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
 }
 
 
-bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isTerminator()) return false;
-
-  // Conditional branch is a special case.
-  if (MCID.isBranch() && !MCID.isBarrier())
-    return true;
-  if (!MCID.isPredicable())
-    return true;
-  return !isPredicated(MI);
-}
-
-
 /// Measure the specified inline asm to determine an approximation of its
 /// length.
 /// Comments (which run till the next SeparatorString or newline) do not
diff --git a/lib/Target/TargetJITInfo.cpp b/lib/Target/TargetJITInfo.cpp
new file mode 100644
index 000000000000..aafedf8749b1
--- /dev/null
+++ b/lib/Target/TargetJITInfo.cpp
@@ -0,0 +1,14 @@
+//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetJITInfo.h"
+
+using namespace llvm;
+
+void TargetJITInfo::anchor() { }
diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp
index 709dfd283f98..269958fd7f17 100644
--- a/lib/Target/TargetLibraryInfo.cpp
+++ b/lib/Target/TargetLibraryInfo.cpp
@@ -20,6 +20,106 @@ INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
                 "Target Library Information", false, true)
 char TargetLibraryInfo::ID = 0;
 
+void TargetLibraryInfo::anchor() { }
+
+const char* TargetLibraryInfo::StandardNames[LibFunc::NumLibFuncs] =
+  {
+    "acos",
+    "acosl",
+    "acosf",
+    "asin",
+    "asinl",
+    "asinf",
+    "atan",
+    "atanl",
+    "atanf",
+    "atan2",
+    "atan2l",
+    "atan2f",
+    "ceil",
+    "ceill",
+    "ceilf",
+    "copysign",
+    "copysignf",
+    "copysignl",
+    "cos",
+    "cosl",
+    "cosf",
+    "cosh",
+    "coshl",
+    "coshf",
+    "exp",
+    "expl",
+    "expf",
+    "exp2",
+    "exp2l",
+    "exp2f",
+    "expm1",
+    "expm1l",
+    "expl1f",
+    "fabs",
+    "fabsl",
+    "fabsf",
+    "floor",
+    "floorl",
+    "floorf",
+    "fiprintf",
+    "fmod",
+    "fmodl",
+    "fmodf",
+    "fputs",
+    "fwrite",
+    "iprintf",
+    "log",
+    "logl",
+    "logf",
+    "log2",
+    "log2l",
+    "log2f",
+    "log10",
+    "log10l",
+    "log10f",
+    "log1p",
+    "log1pl",
+    "log1pf",
+    "memcpy",
+    "memmove",
+    "memset",
+    "memset_pattern16",
+    "nearbyint",
+    "nearbyintf",
+    "nearbyintl",
+    "pow",
+    "powf",
+    "powl",
+    "rint",
+    "rintf",
+    "rintl",
+    "sin",
+    "sinl",
+    "sinf",
+    "sinh",
+    "sinhl",
+    "sinhf",
+    "siprintf",
+    "sqrt",
+    "sqrtl",
+    "sqrtf",
+    "tan",
+    "tanl",
+    "tanf",
+    "tanh",
+    "tanhl",
+    "tanhf",
+    "trunc",
+    "truncf",
+    "truncl",
+    "__cxa_atexit",
+    "__cxa_guard_abort",
+    "__cxa_guard_acquire",
+    "__cxa_guard_release"
+  };
+
 /// initialize - Initialize the set of available library functions based on the
 /// specified target triple.  This should be carefully written so that a missing
 /// target triple gets a sane set of defaults.
@@ -38,6 +138,17 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
     TLI.setUnavailable(LibFunc::memset_pattern16);
   }
 
+  if (T.isMacOSX() && T.getArch() == Triple::x86 &&
+      !T.isMacOSXVersionLT(10, 7)) {
+    // x86-32 OSX has a scheme where fwrite and fputs (and some other functions
+    // we don't care about) have two versions; on recent OSX, the one we want
+    // has a $UNIX2003 suffix. The two implementations are identical except
+    // for the return value in some edge cases.  However, we don't want to
+    // generate code that depends on the old symbols.
+    TLI.setAvailableWithName(LibFunc::fwrite, "fwrite$UNIX2003");
+    TLI.setAvailableWithName(LibFunc::fputs, "fputs$UNIX2003");
+  }
+
   // iprintf and friends are only available on XCore and TCE.
   if (T.getArch() != Triple::xcore && T.getArch() != Triple::tce) {
     TLI.setUnavailable(LibFunc::iprintf);
@@ -64,6 +175,7 @@ TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
 TargetLibraryInfo::TargetLibraryInfo(const TargetLibraryInfo &TLI)
   : ImmutablePass(ID) {
   memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
+  CustomNames = TLI.CustomNames;
 }
 
 
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 56b7b69de0bd..2570e0d0972b 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -28,7 +28,6 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -48,7 +47,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
 TargetLoweringObjectFile::~TargetLoweringObjectFile() {
 }
 
-static bool isSuitableForBSS(const GlobalVariable *GV) {
+static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) {
   const Constant *C = GV->getInitializer();
 
   // Must have zero initializer.
@@ -73,31 +72,27 @@ static bool isSuitableForBSS(const GlobalVariable *GV) {
 
 /// IsNullTerminatedString - Return true if the specified constant (which is
 /// known to have a type that is an array of 1/2/4 byte elements) ends with a
-/// nul value and contains no other nuls in it.
+/// nul value and contains no other nuls in it.  Note that this is more general
+/// than ConstantDataSequential::isString because we allow 2 & 4 byte strings.
 static bool IsNullTerminatedString(const Constant *C) {
-  ArrayType *ATy = cast<ArrayType>(C->getType());
-
-  // First check: is we have constant array of i8 terminated with zero
-  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) {
-    if (ATy->getNumElements() == 0) return false;
-
-    ConstantInt *Null =
-      dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1));
-    if (Null == 0 || !Null->isZero())
+  // First check: is we have constant array terminated with zero
+  if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(C)) {
+    unsigned NumElts = CDS->getNumElements();
+    assert(NumElts != 0 && "Can't have an empty CDS");
+    
+    if (CDS->getElementAsInteger(NumElts-1) != 0)
       return false; // Not null terminated.
-
+    
     // Verify that the null doesn't occur anywhere else in the string.
-    for (unsigned i = 0, e = ATy->getNumElements()-1; i != e; ++i)
-      // Reject constantexpr elements etc.
-      if (!isa<ConstantInt>(CVA->getOperand(i)) ||
-          CVA->getOperand(i) == Null)
+    for (unsigned i = 0; i != NumElts-1; ++i)
+      if (CDS->getElementAsInteger(i) == 0)
         return false;
     return true;
   }
 
   // Another possibility: [1 x i8] zeroinitializer
   if (isa<ConstantAggregateZero>(C))
-    return ATy->getNumElements() == 1;
+    return cast<ArrayType>(C->getType())->getNumElements() == 1;
 
   return false;
 }
@@ -133,7 +128,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
 
   // Handle thread-local data first.
   if (GVar->isThreadLocal()) {
-    if (isSuitableForBSS(GVar))
+    if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS))
       return SectionKind::getThreadBSS();
     return SectionKind::getThreadData();
   }
@@ -143,7 +138,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     return SectionKind::getCommon();
 
   // Variable can be easily put to BSS section.
-  if (isSuitableForBSS(GVar)) {
+  if (isSuitableForBSS(GVar, TM.Options.NoZerosInBSS)) {
     if (GVar->hasLocalLinkage())
       return SectionKind::getBSSLocal();
     else if (GVar->hasExternalLinkage())
@@ -160,7 +155,6 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     // relocation, then we may have to drop this into a wriable data section
     // even though it is marked const.
     switch (C->getRelocationInfo()) {
-    default: assert(0 && "unknown relocation info kind");
     case Constant::NoRelocation:
       // If the global is required to have a unique address, it can't be put
       // into a mergable section: just drop it into the general read-only
@@ -234,7 +228,6 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     return SectionKind::getDataNoRel();
 
   switch (C->getRelocationInfo()) {
-  default: assert(0 && "unknown relocation info kind");
   case Constant::NoRelocation:
     return SectionKind::getDataNoRel();
   case Constant::LocalRelocation:
@@ -242,6 +235,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
   case Constant::GlobalRelocations:
     return SectionKind::getDataRel();
   }
+  llvm_unreachable("Invalid relocation");
 }
 
 /// SectionForGlobal - This method computes the appropriate section to emit
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index fe8a7cebd0a0..b9b2526876fd 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,11 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/GlobalValue.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
@@ -24,153 +23,10 @@ using namespace llvm;
 //
 
 namespace llvm {
-  bool LessPreciseFPMADOption;
-  bool PrintMachineCode;
-  bool NoFramePointerElim;
-  bool NoFramePointerElimNonLeaf;
-  bool NoExcessFPPrecision;
-  bool UnsafeFPMath;
-  bool NoInfsFPMath;
-  bool NoNaNsFPMath;
-  bool HonorSignDependentRoundingFPMathOption;
-  bool UseSoftFloat;
-  FloatABI::ABIType FloatABIType;
-  bool NoImplicitFloat;
-  bool NoZerosInBSS;
-  bool JITExceptionHandling;
-  bool JITEmitDebugInfo;
-  bool JITEmitDebugInfoToDisk;
-  bool GuaranteedTailCallOpt;
-  unsigned StackAlignmentOverride;
-  bool RealignStack;
-  bool DisableJumpTables;
-  bool StrongPHIElim;
   bool HasDivModLibcall;
   bool AsmVerbosityDefault(false);
-  bool EnableSegmentedStacks;
 }
 
-static cl::opt<bool, true>
-PrintCode("print-machineinstrs",
-  cl::desc("Print generated machine code"),
-  cl::location(PrintMachineCode), cl::init(false));
-static cl::opt<bool, true>
-DisableFPElim("disable-fp-elim",
-  cl::desc("Disable frame pointer elimination optimization"),
-  cl::location(NoFramePointerElim),
-  cl::init(false));
-static cl::opt<bool, true>
-DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
-  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
-  cl::location(NoFramePointerElimNonLeaf),
-  cl::init(false));
-static cl::opt<bool, true>
-DisableExcessPrecision("disable-excess-fp-precision",
-  cl::desc("Disable optimizations that may increase FP precision"),
-  cl::location(NoExcessFPPrecision),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableFPMAD("enable-fp-mad",
-  cl::desc("Enable less precise MAD instructions to be generated"),
-  cl::location(LessPreciseFPMADOption),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableUnsafeFPMath("enable-unsafe-fp-math",
-  cl::desc("Enable optimizations that may decrease FP precision"),
-  cl::location(UnsafeFPMath),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableNoInfsFPMath("enable-no-infs-fp-math",
-  cl::desc("Enable FP math optimizations that assume no +-Infs"),
-  cl::location(NoInfsFPMath),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableNoNaNsFPMath("enable-no-nans-fp-math",
-  cl::desc("Enable FP math optimizations that assume no NaNs"),
-  cl::location(NoNaNsFPMath),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
-  cl::Hidden,
-  cl::desc("Force codegen to assume rounding mode can change dynamically"),
-  cl::location(HonorSignDependentRoundingFPMathOption),
-  cl::init(false));
-static cl::opt<bool, true>
-GenerateSoftFloatCalls("soft-float",
-  cl::desc("Generate software floating point library calls"),
-  cl::location(UseSoftFloat),
-  cl::init(false));
-static cl::opt<llvm::FloatABI::ABIType, true>
-FloatABIForCalls("float-abi",
-  cl::desc("Choose float ABI type"),
-  cl::location(FloatABIType),
-  cl::init(FloatABI::Default),
-  cl::values(
-    clEnumValN(FloatABI::Default, "default",
-               "Target default float ABI type"),
-    clEnumValN(FloatABI::Soft, "soft",
-               "Soft float ABI (implied by -soft-float)"),
-    clEnumValN(FloatABI::Hard, "hard",
-               "Hard float ABI (uses FP registers)"),
-    clEnumValEnd));
-static cl::opt<bool, true>
-DontPlaceZerosInBSS("nozero-initialized-in-bss",
-  cl::desc("Don't place zero-initialized symbols into bss section"),
-  cl::location(NoZerosInBSS),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableJITExceptionHandling("jit-enable-eh",
-  cl::desc("Emit exception handling information"),
-  cl::location(JITExceptionHandling),
-  cl::init(false));
-// In debug builds, make this default to true.
-#ifdef NDEBUG
-#define EMIT_DEBUG false
-#else
-#define EMIT_DEBUG true
-#endif
-static cl::opt<bool, true>
-EmitJitDebugInfo("jit-emit-debug",
-  cl::desc("Emit debug information to debugger"),
-  cl::location(JITEmitDebugInfo),
-  cl::init(EMIT_DEBUG));
-#undef EMIT_DEBUG
-static cl::opt<bool, true>
-EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
-  cl::Hidden,
-  cl::desc("Emit debug info objfiles to disk"),
-  cl::location(JITEmitDebugInfoToDisk),
-  cl::init(false));
-
-static cl::opt<bool, true>
-EnableGuaranteedTailCallOpt("tailcallopt",
-  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
-  cl::location(GuaranteedTailCallOpt),
-  cl::init(false));
-static cl::opt<unsigned, true>
-OverrideStackAlignment("stack-alignment",
-  cl::desc("Override default stack alignment"),
-  cl::location(StackAlignmentOverride),
-  cl::init(0));
-static cl::opt<bool, true>
-EnableRealignStack("realign-stack",
-  cl::desc("Realign stack if needed"),
-  cl::location(RealignStack),
-  cl::init(true));
-static cl::opt<bool, true>
-DisableSwitchTables(cl::Hidden, "disable-jump-tables", 
-  cl::desc("Do not generate jump tables."),
-  cl::location(DisableJumpTables),
-  cl::init(false));
-static cl::opt<bool, true>
-EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
-  cl::desc("Use strong PHI elimination."),
-  cl::location(StrongPHIElim),
-  cl::init(false));
-static cl::opt<std::string>
-TrapFuncName("trap-func", cl::Hidden,
-  cl::desc("Emit a call to trap function rather than a trap instruction"),
-  cl::init(""));
 static cl::opt<bool>
 DataSections("fdata-sections",
   cl::desc("Emit data into separate sections"),
@@ -179,29 +35,23 @@ static cl::opt<bool>
 FunctionSections("ffunction-sections",
   cl::desc("Emit functions into separate sections"),
   cl::init(false));
-static cl::opt<bool, true>
-SegmentedStacks("segmented-stacks",
-  cl::desc("Use segmented stacks if possible."),
-  cl::location(EnableSegmentedStacks),
-  cl::init(false));
-                         
+
 //---------------------------------------------------------------------------
 // TargetMachine Class
 //
 
 TargetMachine::TargetMachine(const Target &T,
-                             StringRef TT, StringRef CPU, StringRef FS)
+                             StringRef TT, StringRef CPU, StringRef FS,
+                             const TargetOptions &Options)
   : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
     CodeGenInfo(0), AsmInfo(0),
     MCRelaxAll(false),
     MCNoExecStack(false),
     MCSaveTempLabels(false),
     MCUseLoc(true),
-    MCUseCFI(true) {
-  // Typically it will be subtargets that will adjust FloatABIType from Default
-  // to Soft or Hard.
-  if (UseSoftFloat)
-    FloatABIType = FloatABI::Soft;
+    MCUseCFI(true),
+    MCUseDwarfDirectory(false),
+    Options(Options) {
 }
 
 TargetMachine::~TargetMachine() {
@@ -225,6 +75,35 @@ CodeModel::Model TargetMachine::getCodeModel() const {
   return CodeGenInfo->getCodeModel();
 }
 
+TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
+  bool isLocal = GV->hasLocalLinkage();
+  bool isDeclaration = GV->isDeclaration();
+  // FIXME: what should we do for protected and internal visibility?
+  // For variables, is internal different from hidden?
+  bool isHidden = GV->hasHiddenVisibility();
+
+  if (getRelocationModel() == Reloc::PIC_ &&
+      !Options.PositionIndependentExecutable) {
+    if (isLocal || isHidden)
+      return TLSModel::LocalDynamic;
+    else
+      return TLSModel::GeneralDynamic;
+  } else {
+    if (!isDeclaration || isHidden)
+      return TLSModel::LocalExec;
+    else
+      return TLSModel::InitialExec;
+  }
+}
+
+/// getOptLevel - Returns the optimization level: None, Less,
+/// Default, or Aggressive.
+CodeGenOpt::Level TargetMachine::getOptLevel() const {
+  if (!CodeGenInfo)
+    return CodeGenOpt::Default;
+  return CodeGenInfo->getOptLevel();
+}
+
 bool TargetMachine::getAsmVerbosityDefault() {
   return AsmVerbosityDefault;
 }
@@ -249,36 +128,3 @@ void TargetMachine::setDataSections(bool V) {
   DataSections = V;
 }
 
-namespace llvm {
-  /// DisableFramePointerElim - This returns true if frame pointer elimination
-  /// optimization should be disabled for the given machine function.
-  bool DisableFramePointerElim(const MachineFunction &MF) {
-    // Check to see if we should eliminate non-leaf frame pointers and then
-    // check to see if we should eliminate all frame pointers.
-    if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
-      const MachineFrameInfo *MFI = MF.getFrameInfo();
-      return MFI->hasCalls();
-    }
-
-    return NoFramePointerElim;
-  }
-
-  /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
-  /// is specified on the command line.  When this flag is off(default), the
-  /// code generator is not allowed to generate mad (multiply add) if the
-  /// result is "less precise" than doing those operations individually.
-  bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
-
-  /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
-  /// that the rounding mode of the FPU can change from its default.
-  bool HonorSignDependentRoundingFPMath() {
-    return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
-  }
-
-  /// getTrapFunctionName - If this returns a non-empty string, this means isel
-  /// should lower Intrinsic::trap to a call to the specified function name
-  /// instead of an ISD::TRAP node.
-  StringRef getTrapFunctionName() {
-    return TrapFuncName;
-  }
-}
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
new file mode 100644
index 000000000000..d6bba8b0dd05
--- /dev/null
+++ b/lib/Target/TargetMachineC.cpp
@@ -0,0 +1,197 @@
+//===-- TargetMachine.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM-C part of TargetMachine.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Core.h"
+#include "llvm-c/Target.h"
+#include "llvm-c/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+using namespace llvm;
+
+
+
+LLVMTargetRef LLVMGetFirstTarget() {
+   const Target* target = &*TargetRegistry::begin();
+   return wrap(target);
+}
+LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) {
+  return wrap(unwrap(T)->getNext());
+}
+
+const char * LLVMGetTargetName(LLVMTargetRef T) {
+  return unwrap(T)->getName();
+}
+
+const char * LLVMGetTargetDescription(LLVMTargetRef T) {
+  return unwrap(T)->getShortDescription();
+}
+
+LLVMBool LLVMTargetHasJIT(LLVMTargetRef T) {
+  return unwrap(T)->hasJIT();
+}
+
+LLVMBool LLVMTargetHasTargetMachine(LLVMTargetRef T) {
+  return unwrap(T)->hasTargetMachine();
+}
+
+LLVMBool LLVMTargetHasAsmBackend(LLVMTargetRef T) {
+  return unwrap(T)->hasMCAsmBackend();
+}
+
+LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T, char* Triple,
+  char* CPU, char* Features, LLVMCodeGenOptLevel Level, LLVMRelocMode Reloc,
+  LLVMCodeModel CodeModel) {
+  Reloc::Model RM;
+  switch (Reloc){
+    case LLVMRelocStatic:
+      RM = Reloc::Static;
+      break;
+    case LLVMRelocPIC:
+      RM = Reloc::PIC_;
+      break;
+    case LLVMRelocDynamicNoPic:
+      RM = Reloc::DynamicNoPIC;
+      break;
+    default:
+      RM = Reloc::Default;
+      break;
+  }
+
+  CodeModel::Model CM;
+  switch (CodeModel) {
+    case LLVMCodeModelJITDefault:
+      CM = CodeModel::JITDefault;
+      break;
+    case LLVMCodeModelSmall:
+      CM = CodeModel::Small;
+      break;
+    case LLVMCodeModelKernel:
+      CM = CodeModel::Kernel;
+      break;
+    case LLVMCodeModelMedium:
+      CM = CodeModel::Medium;
+      break;
+    case LLVMCodeModelLarge:
+      CM = CodeModel::Large;
+      break;
+    default:
+      CM = CodeModel::Default;
+      break;
+  }
+  CodeGenOpt::Level OL;
+
+  switch (Level) {
+    case LLVMCodeGenLevelNone:
+      OL = CodeGenOpt::None;
+      break;
+    case LLVMCodeGenLevelLess:
+      OL = CodeGenOpt::Less;
+      break;
+    case LLVMCodeGenLevelAggressive:
+      OL = CodeGenOpt::Aggressive;
+      break;
+    default:
+      OL = CodeGenOpt::Default;
+      break;
+  }
+
+  TargetOptions opt;
+  return wrap(unwrap(T)->createTargetMachine(Triple, CPU, Features, opt, RM,
+    CM, OL));
+}
+
+
+void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) {
+  delete unwrap(T);
+}
+
+LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) {
+  const Target* target = &(unwrap(T)->getTarget());
+  return wrap(target);
+}
+
+char* LLVMGetTargetMachineTriple(LLVMTargetMachineRef T) {
+  std::string StringRep = unwrap(T)->getTargetTriple();
+  return strdup(StringRep.c_str());
+}
+
+char* LLVMGetTargetMachineCPU(LLVMTargetMachineRef T) {
+  std::string StringRep = unwrap(T)->getTargetCPU();
+  return strdup(StringRep.c_str());
+}
+
+char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) {
+  std::string StringRep = unwrap(T)->getTargetFeatureString();
+  return strdup(StringRep.c_str());
+}
+
+LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
+  return wrap(unwrap(T)->getTargetData());
+}
+
+LLVMBool LLVMTargetMachineEmitToFile(LLVMTargetMachineRef T, LLVMModuleRef M,
+  char* Filename, LLVMCodeGenFileType codegen, char** ErrorMessage) {
+  TargetMachine* TM = unwrap(T);
+  Module* Mod = unwrap(M);
+
+  PassManager pass;
+
+  std::string error;
+
+  const TargetData* td = TM->getTargetData();
+
+  if (!td) {
+    error = "No TargetData in TargetMachine";
+    *ErrorMessage = strdup(error.c_str());
+    return true;
+  }
+  pass.add(new TargetData(*td));
+
+  TargetMachine::CodeGenFileType ft;
+  switch (codegen) {
+    case LLVMAssemblyFile:
+      ft = TargetMachine::CGFT_AssemblyFile;
+      break;
+    default:
+      ft = TargetMachine::CGFT_ObjectFile;
+      break;
+  }
+  raw_fd_ostream dest(Filename, error, raw_fd_ostream::F_Binary);
+  formatted_raw_ostream destf(dest);
+  if (!error.empty()) {
+    *ErrorMessage = strdup(error.c_str());
+    return true;
+  }
+
+  if (TM->addPassesToEmitFile(pass, destf, ft)) {
+    error = "No TargetData in TargetMachine";
+    *ErrorMessage = strdup(error.c_str());
+    return true;
+  }
+
+  pass.run(*Mod);
+
+  destf.flush();
+  dest.flush();
+  return false;
+}
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 67239b830eb5..1716423eeeac 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -13,8 +13,6 @@
 
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -73,7 +71,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
 /// registers for the specific register class.
 static void getAllocatableSetForRC(const MachineFunction &MF,
                                    const TargetRegisterClass *RC, BitVector &R){
-  ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF);
+  ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF);
   for (unsigned i = 0; i != Order.size(); ++i)
     R.set(Order[i]);
 }
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index 94aca7abb2bd..47489bb06c4e 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -5,12 +5,4 @@ add_llvm_library(LLVMX86AsmParser
   X86AsmParser.cpp
   )
 
-add_llvm_library_dependencies(LLVMX86AsmParser
-  LLVMMC
-  LLVMMCParser
-  LLVMSupport
-  LLVMX86Desc
-  LLVMX86Info
-  )
-
 add_dependencies(LLVMX86AsmParser X86CommonTableGen)
diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..9f94d5d38864
--- /dev/null
+++ b/lib/Target/X86/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/AsmParser/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86AsmParser
+parent = X86
+required_libraries = MC MCParser Support X86Desc X86Info
+add_to_library_groups = X86
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index 1eaccff58a9d..2794e60df238 100644
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -14,7 +14,6 @@
 #include "llvm/MC/MCTargetAsmLexer.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
 
 using namespace llvm;
 
@@ -144,11 +143,7 @@ AsmToken X86AsmLexer::LexTokenIntel() {
     SetError(Lexer->getErrLoc(), Lexer->getErr());
     return lexedToken;
   case AsmToken::Identifier: {
-    std::string upperCase = lexedToken.getString().str();
-    std::string lowerCase = LowercaseString(upperCase);
-    StringRef lowerRef(lowerCase);
-    
-    unsigned regID = MatchRegisterName(lowerRef);
+    unsigned regID = MatchRegisterName(lexedToken.getString().lower());
     
     if (regID)
       return AsmToken(AsmToken::Register,
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index cb4f15ffed3e..08c732c3886e 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -17,10 +17,8 @@
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
@@ -32,33 +30,47 @@ using namespace llvm;
 namespace {
 struct X86Operand;
 
-class X86ATTAsmParser : public MCTargetAsmParser {
+class X86AsmParser : public MCTargetAsmParser {
   MCSubtargetInfo &STI;
   MCAsmParser &Parser;
-
 private:
   MCAsmParser &getParser() const { return Parser; }
 
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
 
-  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg,
+             ArrayRef<SMRange> Ranges = ArrayRef<SMRange>()) {
+    return Parser.Error(L, Msg, Ranges);
+  }
+
+  X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
+    Error(Loc, Msg);
+    return 0;
+  }
 
   X86Operand *ParseOperand();
+  X86Operand *ParseATTOperand();
+  X86Operand *ParseIntelOperand();
+  X86Operand *ParseIntelMemOperand();
+  X86Operand *ParseIntelBracExpression(unsigned SegReg, unsigned Size);
   X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
 
+  bool processInstruction(MCInst &Inst,
+                          const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
+
   bool MatchAndEmitInstruction(SMLoc IDLoc,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                                MCStreamer &Out);
 
   /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
-  /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+  /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
   bool isSrcOp(X86Operand &Op);
 
-  /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
-  /// or %es:(%edi) in 32bit mode.
+  /// isDstOp - Returns true if operand is either (%rdi) or %es:(%rdi)
+  /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
   bool isDstOp(X86Operand &Op);
 
   bool is64BitMode() const {
@@ -79,7 +91,7 @@ private:
   /// }
 
 public:
-  X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+  X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
     : MCTargetAsmParser(), STI(sti), Parser(parser) {
 
     // Initialize the set of available features.
@@ -91,6 +103,10 @@ public:
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
 
   virtual bool ParseDirective(AsmToken DirectiveID);
+
+  bool isParsingIntelSyntax() {
+    return getParser().getAssemblerDialect();
+  }
 };
 } // end anonymous namespace
 
@@ -101,6 +117,31 @@ static unsigned MatchRegisterName(StringRef Name);
 
 /// }
 
+static  bool isImmSExti16i8Value(uint64_t Value) {
+  return ((                                  Value <= 0x000000000000007FULL)||
+          (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+
+static bool isImmSExti32i8Value(uint64_t Value) {
+  return ((                                  Value <= 0x000000000000007FULL)||
+          (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+
+static bool isImmZExtu32u8Value(uint64_t Value) {
+    return (Value <= 0x00000000000000FFULL);
+}
+
+static bool isImmSExti64i8Value(uint64_t Value) {
+  return ((                                  Value <= 0x000000000000007FULL)||
+	  (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
+
+static bool isImmSExti64i32Value(uint64_t Value) {
+  return ((                                  Value <= 0x000000007FFFFFFFULL)||
+	  (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+}
 namespace {
 
 /// X86Operand - Instances of this class represent a parsed X86 machine
@@ -135,6 +176,7 @@ struct X86Operand : public MCParsedAsmOperand {
       unsigned BaseReg;
       unsigned IndexReg;
       unsigned Scale;
+      unsigned Size;
     } Mem;
   };
 
@@ -145,6 +187,8 @@ struct X86Operand : public MCParsedAsmOperand {
   SMLoc getStartLoc() const { return StartLoc; }
   /// getEndLoc - Get the location of the last token of this operand.
   SMLoc getEndLoc() const { return EndLoc; }
+  
+  SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
 
   virtual void print(raw_ostream &OS) const {}
 
@@ -205,10 +249,7 @@ struct X86Operand : public MCParsedAsmOperand {
 
     // Otherwise, check the value is in a range that makes sense for this
     // extension.
-    uint64_t Value = CE->getValue();
-    return ((                                  Value <= 0x000000000000007FULL)||
-            (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
-            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+    return isImmSExti16i8Value(CE->getValue());
   }
   bool isImmSExti32i8() const {
     if (!isImm())
@@ -222,10 +263,7 @@ struct X86Operand : public MCParsedAsmOperand {
 
     // Otherwise, check the value is in a range that makes sense for this
     // extension.
-    uint64_t Value = CE->getValue();
-    return ((                                  Value <= 0x000000000000007FULL)||
-            (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
-            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+    return isImmSExti32i8Value(CE->getValue());
   }
   bool isImmZExtu32u8() const {
     if (!isImm())
@@ -239,8 +277,7 @@ struct X86Operand : public MCParsedAsmOperand {
 
     // Otherwise, check the value is in a range that makes sense for this
     // extension.
-    uint64_t Value = CE->getValue();
-    return (Value <= 0x00000000000000FFULL);
+    return isImmZExtu32u8Value(CE->getValue());
   }
   bool isImmSExti64i8() const {
     if (!isImm())
@@ -254,9 +291,7 @@ struct X86Operand : public MCParsedAsmOperand {
 
     // Otherwise, check the value is in a range that makes sense for this
     // extension.
-    uint64_t Value = CE->getValue();
-    return ((                                  Value <= 0x000000000000007FULL)||
-            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+    return isImmSExti64i8Value(CE->getValue());
   }
   bool isImmSExti64i32() const {
     if (!isImm())
@@ -270,12 +305,31 @@ struct X86Operand : public MCParsedAsmOperand {
 
     // Otherwise, check the value is in a range that makes sense for this
     // extension.
-    uint64_t Value = CE->getValue();
-    return ((                                  Value <= 0x000000007FFFFFFFULL)||
-            (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+    return isImmSExti64i32Value(CE->getValue());
   }
 
   bool isMem() const { return Kind == Memory; }
+  bool isMem8() const { 
+    return Kind == Memory && (!Mem.Size || Mem.Size == 8);
+  }
+  bool isMem16() const { 
+    return Kind == Memory && (!Mem.Size || Mem.Size == 16);
+  }
+  bool isMem32() const { 
+    return Kind == Memory && (!Mem.Size || Mem.Size == 32);
+  }
+  bool isMem64() const { 
+    return Kind == Memory && (!Mem.Size || Mem.Size == 64);
+  }
+  bool isMem80() const { 
+    return Kind == Memory && (!Mem.Size || Mem.Size == 80);
+  }
+  bool isMem128() const { 
+    return Kind == Memory && (!Mem.Size || Mem.Size == 128);
+  }
+  bool isMem256() const { 
+    return Kind == Memory && (!Mem.Size || Mem.Size == 256);
+  }
 
   bool isAbsMem() const {
     return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
@@ -302,6 +356,28 @@ struct X86Operand : public MCParsedAsmOperand {
     addExpr(Inst, getImm());
   }
 
+  void addMem8Operands(MCInst &Inst, unsigned N) const { 
+    addMemOperands(Inst, N); 
+  }
+  void addMem16Operands(MCInst &Inst, unsigned N) const { 
+    addMemOperands(Inst, N); 
+  }
+  void addMem32Operands(MCInst &Inst, unsigned N) const { 
+    addMemOperands(Inst, N); 
+  }
+  void addMem64Operands(MCInst &Inst, unsigned N) const { 
+    addMemOperands(Inst, N); 
+  }
+  void addMem80Operands(MCInst &Inst, unsigned N) const { 
+    addMemOperands(Inst, N); 
+  }
+  void addMem128Operands(MCInst &Inst, unsigned N) const { 
+    addMemOperands(Inst, N); 
+  }
+  void addMem256Operands(MCInst &Inst, unsigned N) const { 
+    addMemOperands(Inst, N); 
+  }
+
   void addMemOperands(MCInst &Inst, unsigned N) const {
     assert((N == 5) && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
@@ -313,11 +389,16 @@ struct X86Operand : public MCParsedAsmOperand {
 
   void addAbsMemOperands(MCInst &Inst, unsigned N) const {
     assert((N == 1) && "Invalid number of operands!");
-    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+    // Add as immediates when possible.
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemDisp()))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
   }
 
   static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
-    X86Operand *Res = new X86Operand(Token, Loc, Loc);
+    SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size() - 1);
+    X86Operand *Res = new X86Operand(Token, Loc, EndLoc);
     Res->Tok.Data = Str.data();
     Res->Tok.Length = Str.size();
     return Res;
@@ -337,20 +418,22 @@ struct X86Operand : public MCParsedAsmOperand {
 
   /// Create an absolute memory operand.
   static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
-                               SMLoc EndLoc) {
+                               SMLoc EndLoc, unsigned Size = 0) {
     X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
     Res->Mem.SegReg   = 0;
     Res->Mem.Disp     = Disp;
     Res->Mem.BaseReg  = 0;
     Res->Mem.IndexReg = 0;
     Res->Mem.Scale    = 1;
+    Res->Mem.Size     = Size;
     return Res;
   }
 
   /// Create a generalized memory operand.
   static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
                                unsigned BaseReg, unsigned IndexReg,
-                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
+                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
+                               unsigned Size = 0) {
     // We should never just have a displacement, that should be parsed as an
     // absolute memory operand.
     assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -364,13 +447,14 @@ struct X86Operand : public MCParsedAsmOperand {
     Res->Mem.BaseReg  = BaseReg;
     Res->Mem.IndexReg = IndexReg;
     Res->Mem.Scale    = Scale;
+    Res->Mem.Size     = Size;
     return Res;
   }
 };
 
 } // end anonymous namespace.
 
-bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
+bool X86AsmParser::isSrcOp(X86Operand &Op) {
   unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
 
   return (Op.isMem() &&
@@ -380,32 +464,38 @@ bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
     Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
 }
 
-bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
+bool X86AsmParser::isDstOp(X86Operand &Op) {
   unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
 
-  return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+  return Op.isMem() && 
+    (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::ES) &&
     isa<MCConstantExpr>(Op.Mem.Disp) &&
     cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
     Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
 }
 
-bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
-                                    SMLoc &StartLoc, SMLoc &EndLoc) {
+bool X86AsmParser::ParseRegister(unsigned &RegNo,
+                                 SMLoc &StartLoc, SMLoc &EndLoc) {
   RegNo = 0;
-  const AsmToken &TokPercent = Parser.getTok();
-  assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
-  StartLoc = TokPercent.getLoc();
-  Parser.Lex(); // Eat percent token.
+  if (!isParsingIntelSyntax()) {
+    const AsmToken &TokPercent = Parser.getTok();
+    assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
+    StartLoc = TokPercent.getLoc();
+    Parser.Lex(); // Eat percent token.
+  }
 
   const AsmToken &Tok = Parser.getTok();
-  if (Tok.isNot(AsmToken::Identifier))
-    return Error(Tok.getLoc(), "invalid register name");
+  if (Tok.isNot(AsmToken::Identifier)) {
+    if (isParsingIntelSyntax()) return true;
+    return Error(StartLoc, "invalid register name",
+                 SMRange(StartLoc, Tok.getEndLoc()));
+  }
 
   RegNo = MatchRegisterName(Tok.getString());
 
   // If the match failed, try the register name as lowercase.
   if (RegNo == 0)
-    RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
+    RegNo = MatchRegisterName(Tok.getString().lower());
 
   if (!is64BitMode()) {
     // FIXME: This should be done using Requires<In32BitMode> and
@@ -417,8 +507,9 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
         X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
         X86II::isX86_64NonExtLowByteReg(RegNo) ||
         X86II::isX86_64ExtendedReg(RegNo))
-      return Error(Tok.getLoc(), "register %"
-                   + Tok.getString() + " is only available in 64-bit mode");
+      return Error(StartLoc, "register %"
+                   + Tok.getString() + " is only available in 64-bit mode",
+                   SMRange(StartLoc, Tok.getEndLoc()));
   }
 
   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
@@ -478,15 +569,182 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
     }
   }
 
-  if (RegNo == 0)
-    return Error(Tok.getLoc(), "invalid register name");
+  if (RegNo == 0) {
+    if (isParsingIntelSyntax()) return true;
+    return Error(StartLoc, "invalid register name",
+                 SMRange(StartLoc, Tok.getEndLoc()));
+  }
 
-  EndLoc = Tok.getLoc();
+  EndLoc = Tok.getEndLoc();
   Parser.Lex(); // Eat identifier token.
   return false;
 }
 
-X86Operand *X86ATTAsmParser::ParseOperand() {
+X86Operand *X86AsmParser::ParseOperand() {
+  if (isParsingIntelSyntax())
+    return ParseIntelOperand();
+  return ParseATTOperand();
+}
+
+/// getIntelMemOperandSize - Return intel memory operand size.
+static unsigned getIntelMemOperandSize(StringRef OpStr) {
+  unsigned Size = 0;
+  if (OpStr == "BYTE") Size = 8;
+  if (OpStr == "WORD") Size = 16;
+  if (OpStr == "DWORD") Size = 32;
+  if (OpStr == "QWORD") Size = 64;
+  if (OpStr == "XWORD") Size = 80;
+  if (OpStr == "XMMWORD") Size = 128;
+  if (OpStr == "YMMWORD") Size = 256;
+  return Size;
+}
+
+X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg,
+                                                   unsigned Size) {
+  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+  SMLoc Start = Parser.getTok().getLoc(), End;
+
+  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+  // Parse [ BaseReg + Scale*IndexReg + Disp ] or [ symbol ]
+
+  // Eat '['
+  if (getLexer().isNot(AsmToken::LBrac))
+    return ErrorOperand(Start, "Expected '[' token!");
+  Parser.Lex();
+  
+  if (getLexer().is(AsmToken::Identifier)) {
+    // Parse BaseReg
+    if (ParseRegister(BaseReg, Start, End)) {
+      // Handle '[' 'symbol' ']'
+      if (getParser().ParseExpression(Disp, End)) return 0;
+      if (getLexer().isNot(AsmToken::RBrac))
+        return ErrorOperand(Start, "Expected ']' token!");
+      Parser.Lex();
+      return X86Operand::CreateMem(Disp, Start, End, Size);
+    }
+  } else if (getLexer().is(AsmToken::Integer)) {
+      int64_t Val = Parser.getTok().getIntVal();
+      Parser.Lex();
+      SMLoc Loc = Parser.getTok().getLoc();
+      if (getLexer().is(AsmToken::RBrac)) {
+        // Handle '[' number ']'
+        Parser.Lex();
+        const MCExpr *Disp = MCConstantExpr::Create(Val, getContext());
+        if (SegReg)
+          return X86Operand::CreateMem(SegReg, Disp, 0, 0, Scale,
+                                       Start, End, Size);
+        return X86Operand::CreateMem(Disp, Start, End, Size);
+      } else if (getLexer().is(AsmToken::Star)) {
+        // Handle '[' Scale*IndexReg ']'
+        Parser.Lex();
+        SMLoc IdxRegLoc = Parser.getTok().getLoc();
+	if (ParseRegister(IndexReg, IdxRegLoc, End))
+	  return ErrorOperand(IdxRegLoc, "Expected register");
+        Scale = Val;
+      } else
+        return ErrorOperand(Loc, "Unepxeted token");
+  }
+
+  if (getLexer().is(AsmToken::Plus) || getLexer().is(AsmToken::Minus)) {
+    bool isPlus = getLexer().is(AsmToken::Plus);
+    Parser.Lex();
+    SMLoc PlusLoc = Parser.getTok().getLoc();
+    if (getLexer().is(AsmToken::Integer)) {
+      int64_t Val = Parser.getTok().getIntVal();
+      Parser.Lex();
+      if (getLexer().is(AsmToken::Star)) {
+        Parser.Lex();
+        SMLoc IdxRegLoc = Parser.getTok().getLoc();
+	if (ParseRegister(IndexReg, IdxRegLoc, End))
+	  return ErrorOperand(IdxRegLoc, "Expected register");
+        Scale = Val;
+      } else if (getLexer().is(AsmToken::RBrac)) {
+        const MCExpr *ValExpr = MCConstantExpr::Create(Val, getContext());
+        Disp = isPlus ? ValExpr : MCConstantExpr::Create(0-Val, getContext());
+      } else
+        return ErrorOperand(PlusLoc, "unexpected token after +");
+    } else if (getLexer().is(AsmToken::Identifier)) {
+      // This could be an index register or a displacement expression.
+      End = Parser.getTok().getLoc();
+      if (!IndexReg)
+        ParseRegister(IndexReg, Start, End);
+      else if (getParser().ParseExpression(Disp, End)) return 0;        
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::RBrac))
+    if (getParser().ParseExpression(Disp, End)) return 0;
+
+  End = Parser.getTok().getLoc();
+  if (getLexer().isNot(AsmToken::RBrac))
+    return ErrorOperand(End, "expected ']' token!");
+  Parser.Lex();
+  End = Parser.getTok().getLoc();
+
+  // handle [-42]
+  if (!BaseReg && !IndexReg)
+    return X86Operand::CreateMem(Disp, Start, End, Size);
+
+  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
+                               Start, End, Size);
+}
+
+/// ParseIntelMemOperand - Parse intel style memory operand.
+X86Operand *X86AsmParser::ParseIntelMemOperand() {
+  const AsmToken &Tok = Parser.getTok();
+  SMLoc Start = Parser.getTok().getLoc(), End;
+  unsigned SegReg = 0;
+
+  unsigned Size = getIntelMemOperandSize(Tok.getString());
+  if (Size) {
+    Parser.Lex();
+    assert (Tok.getString() == "PTR" && "Unexpected token!");
+    Parser.Lex();
+  }
+
+  if (getLexer().is(AsmToken::LBrac))
+    return ParseIntelBracExpression(SegReg, Size);
+
+  if (!ParseRegister(SegReg, Start, End)) {
+    // Handel SegReg : [ ... ]
+    if (getLexer().isNot(AsmToken::Colon))
+      return ErrorOperand(Start, "Expected ':' token!");
+    Parser.Lex(); // Eat :
+    if (getLexer().isNot(AsmToken::LBrac))
+      return ErrorOperand(Start, "Expected '[' token!");
+    return ParseIntelBracExpression(SegReg, Size);
+  }
+
+  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+  if (getParser().ParseExpression(Disp, End)) return 0;
+  return X86Operand::CreateMem(Disp, Start, End, Size);
+}
+
+X86Operand *X86AsmParser::ParseIntelOperand() {
+  SMLoc Start = Parser.getTok().getLoc(), End;
+
+  // immediate.
+  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Real) ||
+      getLexer().is(AsmToken::Minus)) {
+    const MCExpr *Val;
+    if (!getParser().ParseExpression(Val, End)) {
+      End = Parser.getTok().getLoc();
+      return X86Operand::CreateImm(Val, Start, End);
+    }
+  }
+
+  // register
+  unsigned RegNo = 0;
+  if (!ParseRegister(RegNo, Start, End)) {
+    End = Parser.getTok().getLoc();
+    return X86Operand::CreateReg(RegNo, Start, End);
+  }
+
+  // mem operand
+  return ParseIntelMemOperand();
+}
+
+X86Operand *X86AsmParser::ParseATTOperand() {
   switch (getLexer().getKind()) {
   default:
     // Parse a memory operand with no segment register.
@@ -497,7 +755,8 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
     SMLoc Start, End;
     if (ParseRegister(RegNo, Start, End)) return 0;
     if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
-      Error(Start, "%eiz and %riz can only be used as index registers");
+      Error(Start, "%eiz and %riz can only be used as index registers",
+            SMRange(Start, End));
       return 0;
     }
 
@@ -524,7 +783,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
 
 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
 /// has already been parsed if present.
-X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
 
   // We have to disambiguate a parenthesized expression "(4+5)" from the start
   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
@@ -579,18 +838,21 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
   // If we reached here, then we just ate the ( of the memory operand.  Process
   // the rest of the memory operand.
   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+  SMLoc IndexLoc;
 
   if (getLexer().is(AsmToken::Percent)) {
-    SMLoc L;
-    if (ParseRegister(BaseReg, L, L)) return 0;
+    SMLoc StartLoc, EndLoc;
+    if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
     if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
-      Error(L, "eiz and riz can only be used as index registers");
+      Error(StartLoc, "eiz and riz can only be used as index registers",
+            SMRange(StartLoc, EndLoc));
       return 0;
     }
   }
 
   if (getLexer().is(AsmToken::Comma)) {
     Parser.Lex(); // Eat the comma.
+    IndexLoc = Parser.getTok().getLoc();
 
     // Following the comma we should have either an index register, or a scale
     // value. We don't support the later form, but we want to parse it
@@ -616,8 +878,10 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
           SMLoc Loc = Parser.getTok().getLoc();
 
           int64_t ScaleVal;
-          if (getParser().ParseAbsoluteExpression(ScaleVal))
+          if (getParser().ParseAbsoluteExpression(ScaleVal)){
+            Error(Loc, "expected scale expression");
             return 0;
+	  }
 
           // Validate the scale amount.
           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
@@ -650,11 +914,28 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
   SMLoc MemEnd = Parser.getTok().getLoc();
   Parser.Lex(); // Eat the ')'.
 
+  // If we have both a base register and an index register make sure they are
+  // both 64-bit or 32-bit registers.
+  if (BaseReg != 0 && IndexReg != 0) {
+    if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
+        !X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) &&
+        IndexReg != X86::RIZ) {
+      Error(IndexLoc, "index register is 32-bit, but base register is 64-bit");
+      return 0;
+    }
+    if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
+        !X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) &&
+        IndexReg != X86::EIZ){
+      Error(IndexLoc, "index register is 64-bit, but base register is 32-bit");
+      return 0;
+    }
+  }
+
   return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
                                MemStart, MemEnd);
 }
 
-bool X86ATTAsmParser::
+bool X86AsmParser::
 ParseInstruction(StringRef Name, SMLoc NameLoc,
                  SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   StringRef PatchedName = Name;
@@ -669,20 +950,21 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
-    bool IsVCMP = PatchedName.startswith("vcmp");
+    bool IsVCMP = PatchedName[0] == 'v';
     unsigned SSECCIdx = IsVCMP ? 4 : 3;
     unsigned SSEComparisonCode = StringSwitch<unsigned>(
       PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
-      .Case("eq",          0)
-      .Case("lt",          1)
-      .Case("le",          2)
-      .Case("unord",       3)
-      .Case("neq",         4)
-      .Case("nlt",         5)
-      .Case("nle",         6)
-      .Case("ord",         7)
-      .Case("eq_uq",       8)
-      .Case("nge",         9)
+      .Case("eq",       0x00)
+      .Case("lt",       0x01)
+      .Case("le",       0x02)
+      .Case("unord",    0x03)
+      .Case("neq",      0x04)
+      .Case("nlt",      0x05)
+      .Case("nle",      0x06)
+      .Case("ord",      0x07)
+      /* AVX only from here */
+      .Case("eq_uq",    0x08)
+      .Case("nge",      0x09)
       .Case("ngt",      0x0A)
       .Case("false",    0x0B)
       .Case("neq_oq",   0x0C)
@@ -706,7 +988,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
       .Case("gt_oq",    0x1E)
       .Case("true_us",  0x1F)
       .Default(~0U);
-    if (SSEComparisonCode != ~0U) {
+    if (SSEComparisonCode != ~0U && (IsVCMP || SSEComparisonCode < 8)) {
       ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
                                           getParser().getContext());
       if (PatchedName.endswith("ss")) {
@@ -724,10 +1006,9 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
 
   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 
-  if (ExtraImmOp)
+  if (ExtraImmOp && !isParsingIntelSyntax())
     Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
 
-
   // Determine whether this is an instruction prefix.
   bool isPrefix =
     Name == "lock" || Name == "rep" ||
@@ -781,6 +1062,9 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
   else if (isPrefix && getLexer().is(AsmToken::Slash))
     Parser.Lex(); // Consume the prefix separator Slash
 
+  if (ExtraImmOp && isParsingIntelSyntax())
+    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
   // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
   // documented form in various unofficial manuals, so a lot of code uses it.
@@ -916,11 +1200,21 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
        Name.startswith("rcl") || Name.startswith("rcr") ||
        Name.startswith("rol") || Name.startswith("ror")) &&
       Operands.size() == 3) {
-    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
-    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
-        cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
-      delete Operands[1];
-      Operands.erase(Operands.begin() + 1);
+    if (isParsingIntelSyntax()) {
+      // Intel syntax
+      X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]);
+      if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+	  cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+	delete Operands[2];
+	Operands.pop_back();
+      }
+    } else {
+      X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+      if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+	  cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+	delete Operands[1];
+	Operands.erase(Operands.begin() + 1);
+      }
     }
   }
   
@@ -939,7 +1233,246 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
   return false;
 }
 
-bool X86ATTAsmParser::
+bool X86AsmParser::
+processInstruction(MCInst &Inst,
+                   const SmallVectorImpl<MCParsedAsmOperand*> &Ops) {
+  switch (Inst.getOpcode()) {
+  default: return false;
+  case X86::AND16i16: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::AND16ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::AND32i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::AND32ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::AND64i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::AND64ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::XOR16i16: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::XOR16ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::XOR32i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::XOR32ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::XOR64i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::XOR64ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::OR16i16: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::OR16ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::OR32i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::OR32ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::OR64i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::OR64ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::CMP16i16: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::CMP16ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::CMP32i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::CMP32ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::CMP64i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::CMP64ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::ADD16i16: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::ADD16ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::ADD32i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::ADD32ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::ADD64i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::ADD64ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::SUB16i16: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::SUB16ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::AX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::SUB32i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::SUB32ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::EAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  case X86::SUB64i32: {
+    if (!Inst.getOperand(0).isImm() ||
+        !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
+      return false;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::SUB64ri8);
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(MCOperand::CreateReg(X86::RAX));
+    TmpInst.addOperand(Inst.getOperand(0));
+    Inst = TmpInst;
+    return true;
+  }
+  }
+}
+
+bool X86AsmParser::
 MatchAndEmitInstruction(SMLoc IDLoc,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
                         MCStreamer &Out) {
@@ -957,6 +1490,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
       Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
     MCInst Inst;
     Inst.setOpcode(X86::WAIT);
+    Inst.setLoc(IDLoc);
     Out.EmitInstruction(Inst);
 
     const char *Repl =
@@ -980,9 +1514,17 @@ MatchAndEmitInstruction(SMLoc IDLoc,
   MCInst Inst;
 
   // First, try a direct match.
-  switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+  switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo,
+                               isParsingIntelSyntax())) {
   default: break;
   case Match_Success:
+    // Some instructions need post-processing to, for example, tweak which
+    // encoding is selected. Loop on it while changes happen so the
+    // individual transformations can chain off each other. 
+    while (processInstruction(Inst, Operands))
+      ;
+
+    Inst.setLoc(IDLoc);
     Out.EmitInstruction(Inst);
     return false;
   case Match_MissingFeature:
@@ -1040,6 +1582,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
     (Match1 == Match_Success) + (Match2 == Match_Success) +
     (Match3 == Match_Success) + (Match4 == Match_Success);
   if (NumSuccessfulMatches == 1) {
+    Inst.setLoc(IDLoc);
     Out.EmitInstruction(Inst);
     return false;
   }
@@ -1078,21 +1621,24 @@ MatchAndEmitInstruction(SMLoc IDLoc,
   if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
       (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
     if (!WasOriginallyInvalidOperand) {
-      Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
-      return true;
+      return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
+                   Op->getLocRange());
     }
 
     // Recover location info for the operand if we know which was the problem.
-    SMLoc ErrorLoc = IDLoc;
     if (OrigErrorInfo != ~0U) {
       if (OrigErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction");
 
-      ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
-      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+      X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
+      if (Operand->getStartLoc().isValid()) {
+        SMRange OperandRange = Operand->getLocRange();
+        return Error(Operand->getStartLoc(), "invalid operand for instruction",
+                     OperandRange);
+      }
     }
 
-    return Error(ErrorLoc, "invalid operand for instruction");
+    return Error(IDLoc, "invalid operand for instruction");
   }
 
   // If one instruction matched with a missing feature, report this as a
@@ -1112,24 +1658,34 @@ MatchAndEmitInstruction(SMLoc IDLoc,
   }
 
   // If all of these were an outright failure, report it in a useless way.
-  // FIXME: We should give nicer diagnostics about the exact failure.
   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
   return true;
 }
 
 
-bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getIdentifier();
   if (IDVal == ".word")
     return ParseDirectiveWord(2, DirectiveID.getLoc());
   else if (IDVal.startswith(".code"))
     return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
+  else if (IDVal.startswith(".intel_syntax")) {
+    getParser().setAssemblerDialect(1);
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      if(Parser.getTok().getString() == "noprefix") {
+	// FIXME : Handle noprefix
+	Parser.Lex();
+      } else
+	return true;
+    }
+    return false;
+  }
   return true;
 }
 
 /// ParseDirectiveWord
 ///  ::= .word [ expression (, expression)* ]
-bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       const MCExpr *Value;
@@ -1154,7 +1710,7 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 
 /// ParseDirectiveCode
 ///  ::= .code32 | .code64
-bool X86ATTAsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
+bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
   if (IDVal == ".code32") {
     Parser.Lex();
     if (is64BitMode()) {
@@ -1179,8 +1735,8 @@ extern "C" void LLVMInitializeX86AsmLexer();
 
 // Force static initialization.
 extern "C" void LLVMInitializeX86AsmParser() {
-  RegisterMCAsmParser<X86ATTAsmParser> X(TheX86_32Target);
-  RegisterMCAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
+  RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
+  RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
   LLVMInitializeX86AsmLexer();
 }
 
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 351e7675a7e8..f612e2365ec3 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -1,16 +1,16 @@
 set(LLVM_TARGET_DEFINITIONS X86.td)
 
-llvm_tablegen(X86GenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
-llvm_tablegen(X86GenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(X86GenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
-llvm_tablegen(X86GenAsmMatcher.inc -gen-asm-matcher)
-llvm_tablegen(X86GenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(X86GenFastISel.inc -gen-fast-isel)
-llvm_tablegen(X86GenCallingConv.inc -gen-callingconv)
-llvm_tablegen(X86GenSubtargetInfo.inc -gen-subtarget)
-llvm_tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM X86GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM X86GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM X86GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+tablegen(LLVM X86GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM X86GenFastISel.inc -gen-fast-isel)
+tablegen(LLVM X86GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM X86GenEDInfo.inc -gen-enhanced-disassembly-info)
 add_public_tablegen_target(X86CommonTableGen)
 
 set(sources
@@ -26,6 +26,7 @@ set(sources
   X86InstrInfo.cpp
   X86JITInfo.cpp
   X86MCInstLower.cpp
+  X86MachineFunctionInfo.cpp
   X86RegisterInfo.cpp
   X86SelectionDAGInfo.cpp
   X86Subtarget.cpp
@@ -51,19 +52,6 @@ endif()
 
 add_llvm_target(X86CodeGen ${sources})
 
-add_llvm_library_dependencies(LLVMX86CodeGen
-  LLVMAnalysis
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  LLVMX86AsmPrinter
-  LLVMX86Desc
-  )
-
 add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 4f570d56e60f..0cd6db96dabe 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -5,12 +5,6 @@ add_llvm_library(LLVMX86Disassembler
   X86DisassemblerDecoder.c
   )
 
-add_llvm_library_dependencies(LLVMX86Disassembler
-  LLVMMC
-  LLVMSupport
-  LLVMX86Info
-  )
-
 # workaround for hanging compilation on MSVC9 and 10
 if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
 set_property(
diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..0609f3c28de3
--- /dev/null
+++ b/lib/Target/X86/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/Disassembler/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86Disassembler
+parent = X86
+required_libraries = MC Support X86Desc X86Info
+add_to_library_groups = X86
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 3aacb20e73df..8278bde7c218 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -1,4 +1,4 @@
-//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
+//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -18,9 +18,11 @@
 #include "X86DisassemblerDecoder.h"
 
 #include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MemoryObject.h"
@@ -42,6 +44,11 @@ void x86DisassemblerDebug(const char *file,
   dbgs() << file << ":" << line << ": " << s;
 }
 
+const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii) {
+  const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
+  return MII->getName(Opcode);
+}
+
 #define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
 
 namespace llvm {  
@@ -65,17 +72,19 @@ extern Target TheX86_32Target, TheX86_64Target;
 }
 
 static bool translateInstruction(MCInst &target,
-                                InternalInstruction &source);
+                                InternalInstruction &source,
+                                const MCDisassembler *Dis);
 
-X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) :
-    MCDisassembler(STI),
-    fMode(mode) {
-}
+X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI,
+                                               DisassemblerMode mode,
+                                               const MCInstrInfo *MII)
+  : MCDisassembler(STI), MII(MII), fMode(mode) {}
 
 X86GenericDisassembler::~X86GenericDisassembler() {
+  delete MII;
 }
 
-EDInstInfo *X86GenericDisassembler::getEDInfo() const {
+const EDInstInfo *X86GenericDisassembler::getEDInfo() const {
   return instInfoX86;
 }
 
@@ -116,6 +125,8 @@ X86GenericDisassembler::getInstruction(MCInst &instr,
                                        uint64_t address,
                                        raw_ostream &vStream,
                                        raw_ostream &cStream) const {
+  CommentStream = &cStream;
+
   InternalInstruction internalInstr;
 
   dlog_t loggerFn = logger;
@@ -127,6 +138,7 @@ X86GenericDisassembler::getInstruction(MCInst &instr,
                               (void*)&region,
                               loggerFn,
                               (void*)&vStream,
+                              (void*)MII,
                               address,
                               fMode);
 
@@ -136,7 +148,8 @@ X86GenericDisassembler::getInstruction(MCInst &instr,
   }
   else {
     size = internalInstr.length;
-    return (!translateInstruction(instr, internalInstr)) ? Success : Fail;
+    return (!translateInstruction(instr, internalInstr, this)) ?
+            Success : Fail;
   }
 }
 
@@ -161,6 +174,140 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
   mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
 }
 
+/// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the
+/// immediate Value in the MCInst. 
+///
+/// @param Value      - The immediate Value, has had any PC adjustment made by
+///                     the caller.
+/// @param isBranch   - If the instruction is a branch instruction
+/// @param Address    - The starting address of the instruction
+/// @param Offset     - The byte offset to this immediate in the instruction
+/// @param Width      - The byte width of this immediate in the instruction
+///
+/// If the getOpInfo() function was set when setupForSymbolicDisassembly() was
+/// called then that function is called to get any symbolic information for the
+/// immediate in the instruction using the Address, Offset and Width.  If that
+/// returns non-zero then the symbolic information it returns is used to create 
+/// an MCExpr and that is added as an operand to the MCInst.  If getOpInfo()
+/// returns zero and isBranch is true then a symbol look up for immediate Value
+/// is done and if a symbol is found an MCExpr is created with that, else
+/// an MCExpr with the immediate Value is created.  This function returns true
+/// if it adds an operand to the MCInst and false otherwise.
+static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
+                                     uint64_t Address, uint64_t Offset,
+                                     uint64_t Width, MCInst &MI, 
+                                     const MCDisassembler *Dis) {  
+  LLVMOpInfoCallback getOpInfo = Dis->getLLVMOpInfoCallback();
+  struct LLVMOpInfo1 SymbolicOp;
+  memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+  SymbolicOp.Value = Value;
+  void *DisInfo = Dis->getDisInfoBlock();
+
+  if (!getOpInfo ||
+      !getOpInfo(DisInfo, Address, Offset, Width, 1, &SymbolicOp)) {
+    // Clear SymbolicOp.Value from above and also all other fields.
+    memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+    LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+    if (!SymbolLookUp)
+      return false;
+    uint64_t ReferenceType;
+    if (isBranch)
+       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+    else
+       ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
+    const char *ReferenceName;
+    const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
+                                    &ReferenceName);
+    if (Name) {
+      SymbolicOp.AddSymbol.Name = Name;
+      SymbolicOp.AddSymbol.Present = true;
+    }
+    // For branches always create an MCExpr so it gets printed as hex address.
+    else if (isBranch) {
+      SymbolicOp.Value = Value;
+    }
+    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+      (*Dis->CommentStream) << "symbol stub for: " << ReferenceName;
+    if (!Name && !isBranch)
+      return false;
+  }
+
+  MCContext *Ctx = Dis->getMCContext();
+  const MCExpr *Add = NULL;
+  if (SymbolicOp.AddSymbol.Present) {
+    if (SymbolicOp.AddSymbol.Name) {
+      StringRef Name(SymbolicOp.AddSymbol.Name);
+      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+      Add = MCSymbolRefExpr::Create(Sym, *Ctx);
+    } else {
+      Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, *Ctx);
+    }
+  }
+
+  const MCExpr *Sub = NULL;
+  if (SymbolicOp.SubtractSymbol.Present) {
+      if (SymbolicOp.SubtractSymbol.Name) {
+      StringRef Name(SymbolicOp.SubtractSymbol.Name);
+      MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
+      Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
+    } else {
+      Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, *Ctx);
+    }
+  }
+
+  const MCExpr *Off = NULL;
+  if (SymbolicOp.Value != 0)
+    Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
+
+  const MCExpr *Expr;
+  if (Sub) {
+    const MCExpr *LHS;
+    if (Add)
+      LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
+    else
+      LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
+    if (Off != 0)
+      Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
+    else
+      Expr = LHS;
+  } else if (Add) {
+    if (Off != 0)
+      Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
+    else
+      Expr = Add;
+  } else {
+    if (Off != 0)
+      Expr = Off;
+    else
+      Expr = MCConstantExpr::Create(0, *Ctx);
+  }
+
+  MI.addOperand(MCOperand::CreateExpr(Expr));
+
+  return true;
+}
+
+/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
+/// referenced by a load instruction with the base register that is the rip.
+/// These can often be addresses in a literal pool.  The Address of the
+/// instruction and its immediate Value are used to determine the address
+/// being referenced in the literal pool entry.  The SymbolLookUp call back will
+/// return a pointer to a literal 'C' string if the referenced address is an 
+/// address into a section with 'C' string literals.
+static void tryAddingPcLoadReferenceComment(uint64_t Address, uint64_t Value,
+                                            const void *Decoder) {
+  const MCDisassembler *Dis = static_cast<const MCDisassembler*>(Decoder);
+  LLVMSymbolLookupCallback SymbolLookUp = Dis->getLLVMSymbolLookupCallback();
+  if (SymbolLookUp) {
+    void *DisInfo = Dis->getDisInfoBlock();
+    uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
+    const char *ReferenceName;
+    (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
+    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+      (*Dis->CommentStream) << "literal pool for: " << ReferenceName;
+  }
+}
+
 /// translateImmediate  - Appends an immediate operand to an MCInst.
 ///
 /// @param mcInst       - The MCInst to append to.
@@ -169,10 +316,11 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
 /// @param insn         - The internal instruction.
 static void translateImmediate(MCInst &mcInst, uint64_t immediate,
                                const OperandSpecifier &operand,
-                               InternalInstruction &insn) {
+                               InternalInstruction &insn,
+                               const MCDisassembler *Dis) {  
   // Sign-extend the immediate if necessary.
 
-  OperandType type = operand.type;
+  OperandType type = (OperandType)operand.type;
 
   if (type == TYPE_RELv) {
     switch (insn.displacementSize) {
@@ -225,6 +373,8 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
     }
   }
 
+  bool isBranch = false;
+  uint64_t pcrel = 0;
   switch (type) {
   case TYPE_XMM128:
     mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4)));
@@ -232,8 +382,11 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
   case TYPE_XMM256:
     mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4)));
     return;
-  case TYPE_MOFFS8:
   case TYPE_REL8:
+    isBranch = true;
+    pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+    // fall through to sign extend the immediate if needed.
+  case TYPE_MOFFS8:
     if(immediate & 0x80)
       immediate |= ~(0xffull);
     break;
@@ -241,9 +394,12 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
     if(immediate & 0x8000)
       immediate |= ~(0xffffull);
     break;
-  case TYPE_MOFFS32:
   case TYPE_REL32:
   case TYPE_REL64:
+    isBranch = true;
+    pcrel = insn.startLocation + insn.immediateOffset + insn.immediateSize;
+    // fall through to sign extend the immediate if needed.
+  case TYPE_MOFFS32:
     if(immediate & 0x80000000)
       immediate |= ~(0xffffffffull);
     break;
@@ -253,7 +409,10 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
     break;
   }
     
-  mcInst.addOperand(MCOperand::CreateImm(immediate));
+  if(!tryAddingSymbolicOperand(immediate + pcrel, isBranch, insn.startLocation,
+                               insn.immediateOffset, insn.immediateSize,
+                               mcInst, Dis))
+    mcInst.addOperand(MCOperand::CreateImm(immediate));
 }
 
 /// translateRMRegister - Translates a register stored in the R/M field of the
@@ -300,7 +459,8 @@ static bool translateRMRegister(MCInst &mcInst,
 /// @param insn         - The instruction to extract Mod, R/M, and SIB fields
 ///                       from.
 /// @return             - 0 on success; nonzero otherwise
-static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
+static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
+                              const MCDisassembler *Dis) {  
   // Addresses in an MCInst are represented as five operands:
   //   1. basereg       (register)  The R/M base, or (if there is a SIB) the 
   //                                SIB base
@@ -318,6 +478,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
   MCOperand indexReg;
   MCOperand displacement;
   MCOperand segmentReg;
+  uint64_t pcrel = 0;
   
   if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
     if (insn.sibBase != SIB_BASE_NONE) {
@@ -359,8 +520,14 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
         debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
         return true;
       }
-      if (insn.mode == MODE_64BIT)
+      if (insn.mode == MODE_64BIT){
+        pcrel = insn.startLocation +
+                insn.displacementOffset + insn.displacementSize;
+        tryAddingPcLoadReferenceComment(insn.startLocation +
+                                        insn.displacementOffset,
+                                        insn.displacement + pcrel, Dis);
         baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
+      }
       else
         baseReg = MCOperand::CreateReg(0);
       
@@ -426,7 +593,10 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
   mcInst.addOperand(baseReg);
   mcInst.addOperand(scaleAmount);
   mcInst.addOperand(indexReg);
-  mcInst.addOperand(displacement);
+  if(!tryAddingSymbolicOperand(insn.displacement + pcrel, false,
+                               insn.startLocation, insn.displacementOffset,
+                               insn.displacementSize, mcInst, Dis))
+    mcInst.addOperand(displacement);
   mcInst.addOperand(segmentReg);
   return false;
 }
@@ -440,7 +610,7 @@ static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
 ///                       from.
 /// @return             - 0 on success; nonzero otherwise
 static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
-                        InternalInstruction &insn) {
+                        InternalInstruction &insn, const MCDisassembler *Dis) {  
   switch (operand.type) {
   default:
     debug("Unexpected type for a R/M operand");
@@ -480,7 +650,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
   case TYPE_M1632:
   case TYPE_M1664:
   case TYPE_LEA:
-    return translateRMMemory(mcInst, insn);
+    return translateRMMemory(mcInst, insn, Dis);
   }
 }
   
@@ -510,7 +680,8 @@ static bool translateFPRegister(MCInst &mcInst,
 /// @param insn         - The internal instruction.
 /// @return             - false on success; true otherwise.
 static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
-                             InternalInstruction &insn) {
+                             InternalInstruction &insn,
+                             const MCDisassembler *Dis) {  
   switch (operand.encoding) {
   default:
     debug("Unhandled operand encoding during translation");
@@ -519,7 +690,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
     translateRegister(mcInst, insn.reg);
     return false;
   case ENCODING_RM:
-    return translateRM(mcInst, operand, insn);
+    return translateRM(mcInst, operand, insn, Dis);
   case ENCODING_CB:
   case ENCODING_CW:
   case ENCODING_CD:
@@ -537,7 +708,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
     translateImmediate(mcInst,
                        insn.immediates[insn.numImmediatesTranslated++],
                        operand,
-                       insn);
+                       insn,
+                       Dis);
     return false;
   case ENCODING_RB:
   case ENCODING_RW:
@@ -556,7 +728,7 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
   case ENCODING_DUP:
     return translateOperand(mcInst,
                             insn.spec->operands[operand.type - TYPE_DUP0],
-                            insn);
+                            insn, Dis);
   }
 }
   
@@ -567,7 +739,8 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
 /// @param insn         - The internal instruction.
 /// @return             - false on success; true otherwise.
 static bool translateInstruction(MCInst &mcInst,
-                                InternalInstruction &insn) {  
+                                InternalInstruction &insn,
+                                const MCDisassembler *Dis) {  
   if (!insn.spec) {
     debug("Instruction has no specification");
     return true;
@@ -581,7 +754,7 @@ static bool translateInstruction(MCInst &mcInst,
   
   for (index = 0; index < X86_MAX_OPERANDS; ++index) {
     if (insn.spec->operands[index].encoding != ENCODING_NONE) {
-      if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
+      if (translateOperand(mcInst, insn.spec->operands[index], insn, Dis)) {
         return true;
       }
     }
@@ -590,12 +763,16 @@ static bool translateInstruction(MCInst &mcInst,
   return false;
 }
 
-static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) {
-  return new X86Disassembler::X86_32Disassembler(STI);
+static MCDisassembler *createX86_32Disassembler(const Target &T,
+                                                const MCSubtargetInfo &STI) {
+  return new X86Disassembler::X86GenericDisassembler(STI, MODE_32BIT,
+                                                     T.createMCInstrInfo());
 }
 
-static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) {
-  return new X86Disassembler::X86_64Disassembler(STI);
+static MCDisassembler *createX86_64Disassembler(const Target &T,
+                                                const MCSubtargetInfo &STI) {
+  return new X86Disassembler::X86GenericDisassembler(STI, MODE_64BIT,
+                                                     T.createMCInstrInfo());
 }
 
 extern "C" void LLVMInitializeX86Disassembler() { 
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 6ac9a0ff1019..c11f51c6a9ac 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -1,4 +1,4 @@
-//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
+//===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -78,7 +78,7 @@
   const char*             name;
 
 #define INSTRUCTION_IDS               \
-  const InstrUID *instructionIDs;
+  unsigned instructionIDs;
 
 #include "X86DisassemblerDecoderCommon.h"
 
@@ -87,11 +87,10 @@
 
 #include "llvm/MC/MCDisassembler.h"
 
-struct InternalInstruction;
-
 namespace llvm {
   
 class MCInst;
+class MCInstrInfo;
 class MCSubtargetInfo;
 class MemoryObject;
 class raw_ostream;
@@ -104,13 +103,16 @@ namespace X86Disassembler {
 ///   All each platform class should have to do is subclass the constructor, and
 ///   provide a different disassemblerMode value.
 class X86GenericDisassembler : public MCDisassembler {
-protected:
+  const MCInstrInfo *MII;
+public:
   /// Constructor     - Initializes the disassembler.
   ///
   /// @param mode     - The X86 architecture mode to decode for.
-  X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode);
-public:
+  X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode,
+                         const MCInstrInfo *MII);
+private:
   ~X86GenericDisassembler();
+public:
 
   /// getInstruction - See MCDisassembler.
   DecodeStatus getInstruction(MCInst &instr,
@@ -121,37 +123,13 @@ public:
                               raw_ostream &cStream) const;
 
   /// getEDInfo - See MCDisassembler.
-  EDInstInfo *getEDInfo() const;
+  const EDInstInfo *getEDInfo() const;
 private:
   DisassemblerMode              fMode;
 };
 
-/// X86_16Disassembler - 16-bit X86 disassembler.
-class X86_16Disassembler : public X86GenericDisassembler {
-public:
-  X86_16Disassembler(const MCSubtargetInfo &STI) :
-    X86GenericDisassembler(STI, MODE_16BIT) {
-  }
-};  
-
-/// X86_16Disassembler - 32-bit X86 disassembler.
-class X86_32Disassembler : public X86GenericDisassembler {
-public:
-  X86_32Disassembler(const MCSubtargetInfo &STI) :
-    X86GenericDisassembler(STI, MODE_32BIT) {
-  }
-};
-
-/// X86_16Disassembler - 64-bit X86 disassembler.
-class X86_64Disassembler : public X86GenericDisassembler {
-public:
-  X86_64Disassembler(const MCSubtargetInfo &STI) :
-    X86GenericDisassembler(STI, MODE_64BIT) {
-  }
-};
-
 } // namespace X86Disassembler
-  
+
 } // namespace llvm
-  
+
 #endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index f9b0fe5d51b9..602087756b23 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1,4 +1,4 @@
-/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
+/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
  *
  *                     The LLVM Compiler Infrastructure
  *
@@ -82,11 +82,9 @@ static int modRMRequired(OpcodeType type,
     decision = &THREEBYTEA7_SYM;
     break;
   }
-  
+
   return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
     modrm_type != MODRM_ONEENTRY;
-  
-  return 0;
 }
 
 /*
@@ -103,12 +101,9 @@ static InstrUID decode(OpcodeType type,
                        InstructionContext insnContext,
                        uint8_t opcode,
                        uint8_t modRM) {
-  const struct ModRMDecision* dec;
+  const struct ModRMDecision* dec = 0;
   
   switch (type) {
-  default:
-    debug("Unknown opcode type");
-    return 0;
   case ONEBYTE:
     dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
     break;
@@ -134,14 +129,17 @@ static InstrUID decode(OpcodeType type,
     debug("Corrupt table!  Unknown modrm_type");
     return 0;
   case MODRM_ONEENTRY:
-    return dec->instructionIDs[0];
+    return modRMTable[dec->instructionIDs];
   case MODRM_SPLITRM:
     if (modFromModRM(modRM) == 0x3)
-      return dec->instructionIDs[1];
-    else
-      return dec->instructionIDs[0];
+      return modRMTable[dec->instructionIDs+1];
+    return modRMTable[dec->instructionIDs];
+  case MODRM_SPLITREG:
+    if (modFromModRM(modRM) == 0x3)
+      return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)+8];
+    return modRMTable[dec->instructionIDs+((modRM & 0x38) >> 3)];
   case MODRM_FULL:
-    return dec->instructionIDs[modRM];
+    return modRMTable[dec->instructionIDs+modRM];
   }
 }
 
@@ -314,6 +312,15 @@ static int readPrefixes(struct InternalInstruction* insn) {
     
     if (consumeByte(insn, &byte))
       return -1;
+
+    /*
+     * If the first byte is a LOCK prefix break and let it be disassembled
+     * as a lock "instruction", by creating an <MCInst #xxxx LOCK_PREFIX>.
+     * FIXME there is currently no way to get the disassembler to print the
+     * lock prefix if it is not the first byte.
+     */
+    if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0)
+      break;
     
     switch (byte) {
     case 0xf0:  /* LOCK */
@@ -712,7 +719,7 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
  * @return      - 0 if the ModR/M could be read when needed or was not needed;
  *                nonzero otherwise.
  */
-static int getID(struct InternalInstruction* insn) {  
+static int getID(struct InternalInstruction* insn, void *miiArg) {
   uint8_t attrMask;
   uint16_t instructionID;
   
@@ -765,6 +772,8 @@ static int getID(struct InternalInstruction* insn) {
   else {
     if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
       attrMask |= ATTR_OPSIZE;
+    else if (isPrefixAtLocation(insn, 0x67, insn->necessaryPrefixLocation))
+      attrMask |= ATTR_ADSIZE;
     else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
       attrMask |= ATTR_XS;
     else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
@@ -773,17 +782,20 @@ static int getID(struct InternalInstruction* insn) {
 
   if (insn->rexPrefix & 0x08)
     attrMask |= ATTR_REXW;
-  
+
   if (getIDWithAttrMask(&instructionID, insn, attrMask))
     return -1;
-  
+
   /* The following clauses compensate for limitations of the tables. */
-  
-  if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) {
+
+  if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) &&
+      !(attrMask & ATTR_OPSIZE)) {
     /*
      * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit
      * has precedence since there are no L-bit with W-bit entries in the tables.
      * So if the L-bit isn't significant we should use the W-bit instead.
+     * We only need to do this if the instruction doesn't specify OpSize since
+     * there is a VEX_L_W_OPSIZE table.
      */
 
     const struct InstructionSpecifier *spec;
@@ -823,7 +835,7 @@ static int getID(struct InternalInstruction* insn) {
     
     const struct InstructionSpecifier *spec;
     uint16_t instructionIDWithOpsize;
-    const struct InstructionSpecifier *specWithOpsize;
+    const char *specName, *specWithOpSizeName;
     
     spec = specifierForUID(instructionID);
     
@@ -840,11 +852,13 @@ static int getID(struct InternalInstruction* insn) {
       return 0;
     }
     
-    specWithOpsize = specifierForUID(instructionIDWithOpsize);
-    
-    if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
+    specName = x86DisassemblerGetInstrName(instructionID, miiArg);
+    specWithOpSizeName =
+      x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
+
+    if (is16BitEquvalent(specName, specWithOpSizeName)) {
       insn->instructionID = instructionIDWithOpsize;
-      insn->spec = specWithOpsize;
+      insn->spec = specifierForUID(instructionIDWithOpsize);
     } else {
       insn->instructionID = instructionID;
       insn->spec = spec;
@@ -1011,6 +1025,7 @@ static int readDisplacement(struct InternalInstruction* insn) {
     return 0;
   
   insn->consumedDisplacement = TRUE;
+  insn->displacementOffset = insn->readerCursor - insn->startLocation;
   
   switch (insn->eaDisplacement) {
   case EA_DISP_NONE:
@@ -1407,6 +1422,7 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
     size = insn->immediateSize;
   else
     insn->immediateSize = size;
+  insn->immediateOffset = insn->readerCursor - insn->startLocation;
   
   switch (size) {
   case 1:
@@ -1469,6 +1485,7 @@ static int readVVVV(struct InternalInstruction* insn) {
 static int readOperands(struct InternalInstruction* insn) {
   int index;
   int hasVVVV, needVVVV;
+  int sawRegImm = 0;
   
   dbgprintf(insn, "readOperands()");
 
@@ -1497,11 +1514,25 @@ static int readOperands(struct InternalInstruction* insn) {
       dbgprintf(insn, "We currently don't hande code-offset encodings");
       return -1;
     case ENCODING_IB:
+      if (sawRegImm) {
+        /* Saw a register immediate so don't read again and instead split the
+           previous immediate.  FIXME: This is a hack. */
+        insn->immediates[insn->numImmediatesConsumed] =
+          insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
+        ++insn->numImmediatesConsumed;
+        break;
+      }
       if (readImmediate(insn, 1))
         return -1;
       if (insn->spec->operands[index].type == TYPE_IMM3 &&
           insn->immediates[insn->numImmediatesConsumed - 1] > 7)
         return -1;
+      if (insn->spec->operands[index].type == TYPE_IMM5 &&
+          insn->immediates[insn->numImmediatesConsumed - 1] > 31)
+        return -1;
+      if (insn->spec->operands[index].type == TYPE_XMM128 ||
+          insn->spec->operands[index].type == TYPE_XMM256)
+        sawRegImm = 1;
       break;
     case ENCODING_IW:
       if (readImmediate(insn, 2))
@@ -1593,6 +1624,7 @@ int decodeInstruction(struct InternalInstruction* insn,
                       void* readerArg,
                       dlog_t logger,
                       void* loggerArg,
+                      void* miiArg,
                       uint64_t startLoc,
                       DisassemblerMode mode) {
   memset(insn, 0, sizeof(struct InternalInstruction));
@@ -1608,7 +1640,7 @@ int decodeInstruction(struct InternalInstruction* insn,
   
   if (readPrefixes(insn)       ||
       readOpcode(insn)         ||
-      getID(insn)              ||
+      getID(insn, miiArg)      ||
       insn->instructionID == 0 ||
       readOperands(insn))
     return -1;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index a9c90f8f9bda..fae309b45d02 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -1,4 +1,4 @@
-/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
+/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
  *
  *                     The LLVM Compiler Infrastructure
  *
@@ -20,11 +20,10 @@
 extern "C" {
 #endif
   
-#define INSTRUCTION_SPECIFIER_FIELDS  \
-  const char*             name;
+#define INSTRUCTION_SPECIFIER_FIELDS
 
 #define INSTRUCTION_IDS     \
-  const InstrUID *instructionIDs;
+  unsigned instructionIDs;
 
 #include "X86DisassemblerDecoderCommon.h"
   
@@ -460,6 +459,11 @@ struct InternalInstruction {
   uint8_t addressSize;
   uint8_t displacementSize;
   uint8_t immediateSize;
+
+  /* Offsets from the start of the instruction to the pieces of data, which is
+     needed to find relocation entries for adding symbolic operands */
+  uint8_t displacementOffset;
+  uint8_t immediateOffset;
   
   /* opcode state */
   
@@ -554,6 +558,7 @@ int decodeInstruction(struct InternalInstruction* insn,
                       void* readerArg,
                       dlog_t logger,
                       void* loggerArg,
+                      void* miiArg,
                       uint64_t startLoc,
                       DisassemblerMode mode);
 
@@ -568,6 +573,8 @@ void x86DisassemblerDebug(const char *file,
                           unsigned line,
                           const char *s);
 
+const char *x86DisassemblerGetInstrName(unsigned Opcode, void *mii);
+
 #ifdef __cplusplus 
 }
 #endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 8b7933545a56..13e113609bf3 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -1,4 +1,4 @@
-/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
+/*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===*
  *
  *                     The LLVM Compiler Infrastructure
  *
@@ -54,8 +54,9 @@
   ENUM_ENTRY(ATTR_XD,     0x04) \
   ENUM_ENTRY(ATTR_REXW,   0x08) \
   ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
-  ENUM_ENTRY(ATTR_VEX,    0x20) \
-  ENUM_ENTRY(ATTR_VEXL,   0x40)
+  ENUM_ENTRY(ATTR_ADSIZE, 0x20) \
+  ENUM_ENTRY(ATTR_VEX,    0x40) \
+  ENUM_ENTRY(ATTR_VEXL,   0x80)
 
 #define ENUM_ENTRY(n, v) n = v,
 enum attributeBits {
@@ -77,6 +78,8 @@ enum attributeBits {
                                         "64-bit mode but no more")             \
   ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
                                         "operands change width")               \
+  ENUM_ENTRY(IC_ADSIZE,             3,  "requires an ADSIZE prefix, so "       \
+                                        "operands change width")               \
   ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
                                         "but not the operands")                \
   ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
@@ -88,6 +91,7 @@ enum attributeBits {
   ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
                                         "change width; overrides IC_OPSIZE")   \
   ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
+  ENUM_ENTRY(IC_64BIT_ADSIZE,       3,  "Just as meaningful as IC_ADSIZE")     \
   ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
                                         "secondary")                           \
   ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
@@ -111,7 +115,8 @@ enum attributeBits {
   ENUM_ENTRY(IC_VEX_L,              3,  "requires VEX and the L prefix")       \
   ENUM_ENTRY(IC_VEX_L_XS,           4,  "requires VEX and the L and XS prefix")\
   ENUM_ENTRY(IC_VEX_L_XD,           4,  "requires VEX and the L and XD prefix")\
-  ENUM_ENTRY(IC_VEX_L_OPSIZE,       4,  "requires VEX, L, and OpSize")
+  ENUM_ENTRY(IC_VEX_L_OPSIZE,       4,  "requires VEX, L, and OpSize")         \
+  ENUM_ENTRY(IC_VEX_L_W_OPSIZE,     5,  "requires VEX, L, W and OpSize")
 
 
 #define ENUM_ENTRY(n, r, d) n,    
@@ -155,6 +160,8 @@ typedef uint16_t InstrUID;
  * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
  *                  corresponds to one instruction; otherwise, it corresponds to
  *                  a different instruction.
+ * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This
+                    corresponds to instructions that use reg field as opcode
  * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
  *                  to a different instruction.
  */
@@ -162,6 +169,7 @@ typedef uint16_t InstrUID;
 #define MODRMTYPES            \
   ENUM_ENTRY(MODRM_ONEENTRY)  \
   ENUM_ENTRY(MODRM_SPLITRM)   \
+  ENUM_ENTRY(MODRM_SPLITREG)  \
   ENUM_ENTRY(MODRM_FULL)
 
 #define ENUM_ENTRY(n) n,    
@@ -265,6 +273,7 @@ struct ContextDecision {
   ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
   ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
   ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
+  ENUM_ENTRY(TYPE_IMM5,       "1-byte immediate operand between 0 and 31")     \
   ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
   ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
   ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
@@ -335,8 +344,8 @@ typedef enum {
  *   operand.
  */
 struct OperandSpecifier {
-  OperandEncoding  encoding;
-  OperandType      type;
+  uint8_t encoding;
+  uint8_t type;
 };
 
 /*
@@ -363,7 +372,7 @@ typedef enum {
  * its operands.
  */
 struct InstructionSpecifier {
-  ModifierType modifierType;
+  uint8_t modifierType;
   uint8_t modifierBase;
   struct OperandSpecifier operands[X86_MAX_OPERANDS];
   
diff --git a/lib/Target/X86/InstPrinter/CMakeLists.txt b/lib/Target/X86/InstPrinter/CMakeLists.txt
index 2a2b5dbb43db..28e2460d8233 100644
--- a/lib/Target/X86/InstPrinter/CMakeLists.txt
+++ b/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -6,10 +6,4 @@ add_llvm_library(LLVMX86AsmPrinter
   X86InstComments.cpp
   )
 
-add_llvm_library_dependencies(LLVMX86AsmPrinter
-  LLVMMC
-  LLVMSupport
-  LLVMX86Utils
-  )
-
 add_dependencies(LLVMX86AsmPrinter X86CommonTableGen)
diff --git a/lib/Target/X86/InstPrinter/LLVMBuild.txt b/lib/Target/X86/InstPrinter/LLVMBuild.txt
new file mode 100644
index 000000000000..6868ddefa51f
--- /dev/null
+++ b/lib/Target/X86/InstPrinter/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86AsmPrinter
+parent = X86
+required_libraries = MC Support X86Utils
+add_to_library_groups = X86
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 029d491260f6..5118e4cad4e2 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -19,6 +19,8 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
@@ -26,14 +28,9 @@
 using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
-#define GET_INSTRUCTION_NAME
 #define PRINT_ALIAS_INSTR
 #include "X86GenAsmWriter.inc"
 
-X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI)
-  : MCInstPrinter(MAI) {
-}
-
 void X86ATTInstPrinter::printRegName(raw_ostream &OS,
                                      unsigned RegNo) const {
   OS << '%' << getRegisterName(RegNo);
@@ -45,29 +42,50 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
   if (!printAliasInstr(MI, OS))
     printInstruction(MI, OS);
   
+  // Next always print the annotation.
+  printAnnotation(OS, Annot);
+
   // If verbose assembly is enabled, we can print some informative comments.
-  if (CommentStream) {
-    printAnnotation(OS, Annot);
+  if (CommentStream)
     EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
-  }
-}
-
-StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
-  return getInstructionName(Opcode);
 }
 
 void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
                                    raw_ostream &O) {
   switch (MI->getOperand(Op).getImm()) {
-  default: assert(0 && "Invalid ssecc argument!");
-  case 0: O << "eq"; break;
-  case 1: O << "lt"; break;
-  case 2: O << "le"; break;
-  case 3: O << "unord"; break;
-  case 4: O << "neq"; break;
-  case 5: O << "nlt"; break;
-  case 6: O << "nle"; break;
-  case 7: O << "ord"; break;
+  default: llvm_unreachable("Invalid ssecc argument!");
+  case    0: O << "eq"; break;
+  case    1: O << "lt"; break;
+  case    2: O << "le"; break;
+  case    3: O << "unord"; break;
+  case    4: O << "neq"; break;
+  case    5: O << "nlt"; break;
+  case    6: O << "nle"; break;
+  case    7: O << "ord"; break;
+  case    8: O << "eq_uq"; break;
+  case    9: O << "nge"; break;
+  case  0xa: O << "ngt"; break;
+  case  0xb: O << "false"; break;
+  case  0xc: O << "neq_oq"; break;
+  case  0xd: O << "ge"; break;
+  case  0xe: O << "gt"; break;
+  case  0xf: O << "true"; break;
+  case 0x10: O << "eq_os"; break;
+  case 0x11: O << "lt_oq"; break;
+  case 0x12: O << "le_oq"; break;
+  case 0x13: O << "unord_s"; break;
+  case 0x14: O << "neq_us"; break;
+  case 0x15: O << "nlt_uq"; break;
+  case 0x16: O << "nle_uq"; break;
+  case 0x17: O << "ord_s"; break;
+  case 0x18: O << "eq_us"; break;
+  case 0x19: O << "nge_uq"; break;
+  case 0x1a: O << "ngt_uq"; break;
+  case 0x1b: O << "false_os"; break;
+  case 0x1c: O << "neq_os"; break;
+  case 0x1d: O << "ge_oq"; break;
+  case 0x1e: O << "gt_oq"; break;
+  case 0x1f: O << "true_us"; break;
   }
 }
 
@@ -79,11 +97,21 @@ void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
                                         raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isImm())
-    // Print this as a signed 32-bit value.
-    O << (int)Op.getImm();
+    O << Op.getImm();
   else {
     assert(Op.isExpr() && "unknown pcrel immediate operand");
-    O << *Op.getExpr();
+    // If a symbolic branch target was added as a constant expression then print
+    // that address in hex.
+    const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+    int64_t Address;
+    if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+      O << "0x";
+      O.write_hex(Address);
+    }
+    else {
+      // Otherwise, just print the expression.
+      O << *Op.getExpr();
+    }
   }
 }
 
@@ -97,7 +125,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     O << '$' << (int64_t)Op.getImm();
     
     if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
-      *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
+      *CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Op.getImm());
     
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 0293869b0a9b..2e00bff1738e 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===//
+//==- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -22,11 +22,12 @@ class MCOperand;
   
 class X86ATTInstPrinter : public MCInstPrinter {
 public:
-  X86ATTInstPrinter(const MCAsmInfo &MAI);
-  
+  X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                    const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
+
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
   virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
-  virtual StringRef getOpcodeName(unsigned Opcode) const;
 
   // Autogenerated by tblgen, returns true if we successfully printed an
   // alias.
@@ -35,7 +36,6 @@ public:
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &OS);
   static const char *getRegisterName(unsigned RegNo);
-  static const char *getInstructionName(unsigned Opcode);
 
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
   void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 8d85b95fe81d..f532019acdff 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -29,11 +29,17 @@ using namespace llvm;
 void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
                                   const char *(*getRegName)(unsigned)) {
   // If this is a shuffle operation, the switch should fill in this state.
-  SmallVector<unsigned, 8> ShuffleMask;
+  SmallVector<int, 8> ShuffleMask;
   const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
 
   switch (MI->getOpcode()) {
   case X86::INSERTPSrr:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+    break;
+  case X86::VINSERTPSrr:
+    DestName = getRegName(MI->getOperand(0).getReg());
     Src1Name = getRegName(MI->getOperand(1).getReg());
     Src2Name = getRegName(MI->getOperand(2).getReg());
     DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
@@ -44,34 +50,61 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(0).getReg());
     DecodeMOVLHPSMask(2, ShuffleMask);
     break;
+  case X86::VMOVLHPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVLHPSMask(2, ShuffleMask);
+    break;
 
   case X86::MOVHLPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     Src1Name = getRegName(MI->getOperand(0).getReg());
     DecodeMOVHLPSMask(2, ShuffleMask);
     break;
+  case X86::VMOVHLPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVHLPSMask(2, ShuffleMask);
+    break;
 
   case X86::PSHUFDri:
+  case X86::VPSHUFDri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
   case X86::PSHUFDmi:
+  case X86::VPSHUFDmi:
     DestName = getRegName(MI->getOperand(0).getReg());
-    DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
+    DecodePSHUFMask(MVT::v4i32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                     ShuffleMask);
+    break;
+  case X86::VPSHUFDYri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::VPSHUFDYmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSHUFMask(MVT::v8i32, MI->getOperand(MI->getNumOperands()-1).getImm(),
                     ShuffleMask);
     break;
 
+
   case X86::PSHUFHWri:
+  case X86::VPSHUFHWri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
   case X86::PSHUFHWmi:
+  case X86::VPSHUFHWmi:
     DestName = getRegName(MI->getOperand(0).getReg());
     DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
                       ShuffleMask);
     break;
   case X86::PSHUFLWri:
+  case X86::VPSHUFLWri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
   case X86::PSHUFLWmi:
+  case X86::VPSHUFLWmi:
     DestName = getRegName(MI->getOperand(0).getReg());
     DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
                       ShuffleMask);
@@ -82,28 +115,92 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::PUNPCKHBWrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(16, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+    break;
+  case X86::VPUNPCKHBWrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHBWrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
+    break;
+  case X86::VPUNPCKHBWYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHBWYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
     break;
   case X86::PUNPCKHWDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKHWDrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(8, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+    break;
+  case X86::VPUNPCKHWDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHWDrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
+    break;
+  case X86::VPUNPCKHWDYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHWDYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
     break;
   case X86::PUNPCKHDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKHDQrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(4, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+    break;
+  case X86::VPUNPCKHDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHDQrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
+    break;
+  case X86::VPUNPCKHDQYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHDQYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
     break;
   case X86::PUNPCKHQDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKHQDQrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKHMask(2, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+    break;
+  case X86::VPUNPCKHQDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHQDQrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
+    break;
+  case X86::VPUNPCKHQDQYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKHQDQYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
     break;
 
   case X86::PUNPCKLBWrr:
@@ -111,126 +208,284 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     // FALL THROUGH.
   case X86::PUNPCKLBWrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLBWMask(16, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+    break;
+  case X86::VPUNPCKLBWrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLBWrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
+    break;
+  case X86::VPUNPCKLBWYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLBWYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
     break;
   case X86::PUNPCKLWDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKLWDrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLWDMask(8, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+    break;
+  case X86::VPUNPCKLWDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLWDrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
+    break;
+  case X86::VPUNPCKLWDYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLWDYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
     break;
   case X86::PUNPCKLDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKLDQrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLDQMask(4, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+    break;
+  case X86::VPUNPCKLDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLDQrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
+    break;
+  case X86::VPUNPCKLDQYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLDQYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
     break;
   case X86::PUNPCKLQDQrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PUNPCKLQDQrm:
     Src1Name = getRegName(MI->getOperand(0).getReg());
-    DecodePUNPCKLQDQMask(2, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+    break;
+  case X86::VPUNPCKLQDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLQDQrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
+    break;
+  case X86::VPUNPCKLQDQYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPUNPCKLQDQYrm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
     break;
 
   case X86::SHUFPDrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::SHUFPDrmi:
-    DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+    DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
+  case X86::VSHUFPDrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VSHUFPDrmi:
+    DecodeSHUFPMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::VSHUFPDYrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VSHUFPDYrmi:
+    DecodeSHUFPMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
 
   case X86::SHUFPSrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::SHUFPSrmi:
-    DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+    DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
+  case X86::VSHUFPSrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VSHUFPSrmi:
+    DecodeSHUFPMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::VSHUFPSYrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VSHUFPSYrmi:
+    DecodeSHUFPMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
 
   case X86::UNPCKLPDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKLPDrm:
-    DecodeUNPCKLPDMask(2, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VUNPCKLPDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKLPDrm:
-    DecodeUNPCKLPDMask(2, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VUNPCKLPDYrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKLPDYrm:
-    DecodeUNPCKLPDMask(4, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::UNPCKLPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKLPSrm:
-    DecodeUNPCKLPSMask(4, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VUNPCKLPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKLPSrm:
-    DecodeUNPCKLPSMask(4, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VUNPCKLPSYrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::VUNPCKLPSYrm:
-    DecodeUNPCKLPSMask(8, ShuffleMask);
+    DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::UNPCKHPDrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKHPDrm:
-    DecodeUNPCKHPMask(2, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
+  case X86::VUNPCKHPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKHPDrm:
+    DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::VUNPCKHPDYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKHPDYrm:
+    DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
   case X86::UNPCKHPSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::UNPCKHPSrm:
-    DecodeUNPCKHPMask(4, ShuffleMask);
+    DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
     Src1Name = getRegName(MI->getOperand(0).getReg());
     break;
+  case X86::VUNPCKHPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKHPSrm:
+    DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::VUNPCKHPSYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKHPSYrm:
+    DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
   case X86::VPERMILPSri:
-    DecodeVPERMILPSMask(4, MI->getOperand(2).getImm(),
-                        ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::VPERMILPSmi:
+    DecodePSHUFMask(MVT::v4f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VPERMILPSYri:
-    DecodeVPERMILPSMask(8, MI->getOperand(2).getImm(),
-                        ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::VPERMILPSYmi:
+    DecodePSHUFMask(MVT::v8f32, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VPERMILPDri:
-    DecodeVPERMILPDMask(2, MI->getOperand(2).getImm(),
-                        ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::VPERMILPDmi:
+    DecodePSHUFMask(MVT::v2f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VPERMILPDYri:
-    DecodeVPERMILPDMask(4, MI->getOperand(2).getImm(),
-                        ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::VPERMILPDYmi:
+    DecodePSHUFMask(MVT::v4f64, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                       ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   case X86::VPERM2F128rr:
-    DecodeVPERM2F128Mask(MI->getOperand(3).getImm(), ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
+  case X86::VPERM2I128rr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VPERM2F128rm:
+  case X86::VPERM2I128rm:
+    // For instruction comments purpose, assume the 256-bit vector is v4i64.
+    DecodeVPERM2X128Mask(MVT::v4i64,
+                         MI->getOperand(MI->getNumOperands()-1).getImm(),
+                         ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
     break;
   }
 
@@ -245,7 +500,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     if (Src1Name == Src2Name) {
       for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
         if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
-            ShuffleMask[i] >= e)        // From second mask.
+            ShuffleMask[i] >= (int)e)        // From second mask.
           ShuffleMask[i] -= e;
       }
     }
@@ -263,13 +518,13 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
       // Otherwise, it must come from src1 or src2.  Print the span of elements
       // that comes from this src.
-      bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+      bool isSrc1 = ShuffleMask[i] < (int)ShuffleMask.size();
       const char *SrcName = isSrc1 ? Src1Name : Src2Name;
       OS << (SrcName ? SrcName : "mem") << '[';
       bool IsFirst = true;
       while (i != e &&
              (int)ShuffleMask[i] >= 0 &&
-             (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+             (ShuffleMask[i] < (int)ShuffleMask.size()) == isSrc1) {
         if (!IsFirst)
           OS << ',';
         else
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.h b/lib/Target/X86/InstPrinter/X86InstComments.h
index 6b86db4f9e5c..13fdf9af8c98 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.h
+++ b/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -1,4 +1,4 @@
-//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
+//=- X86InstComments.h - Generate verbose-asm comments for instrs -*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index f9ab5aeee122..4ea662cbe0c1 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -17,15 +17,13 @@
 #include "X86InstComments.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include <cctype>
 using namespace llvm;
 
-// Include the auto-generated portion of the assembly writer.
-#define GET_INSTRUCTION_NAME
 #include "X86GenAsmWriter1.inc"
 
 void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
@@ -35,29 +33,52 @@ void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
 void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
                                     StringRef Annot) {
   printInstruction(MI, OS);
-  
+
+  // Next always print the annotation.
+  printAnnotation(OS, Annot);
+
   // If verbose assembly is enabled, we can print some informative comments.
-  if (CommentStream) {
-    printAnnotation(OS, Annot);
+  if (CommentStream)
     EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
-  }
-}
-StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
-  return getInstructionName(Opcode);
 }
 
 void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
                                      raw_ostream &O) {
   switch (MI->getOperand(Op).getImm()) {
-  default: assert(0 && "Invalid ssecc argument!");
-  case 0: O << "eq"; break;
-  case 1: O << "lt"; break;
-  case 2: O << "le"; break;
-  case 3: O << "unord"; break;
-  case 4: O << "neq"; break;
-  case 5: O << "nlt"; break;
-  case 6: O << "nle"; break;
-  case 7: O << "ord"; break;
+  default: llvm_unreachable("Invalid ssecc argument!");
+  case    0: O << "eq"; break;
+  case    1: O << "lt"; break;
+  case    2: O << "le"; break;
+  case    3: O << "unord"; break;
+  case    4: O << "neq"; break;
+  case    5: O << "nlt"; break;
+  case    6: O << "nle"; break;
+  case    7: O << "ord"; break;
+  case    8: O << "eq_uq"; break;
+  case    9: O << "nge"; break;
+  case  0xa: O << "ngt"; break;
+  case  0xb: O << "false"; break;
+  case  0xc: O << "neq_oq"; break;
+  case  0xd: O << "ge"; break;
+  case  0xe: O << "gt"; break;
+  case  0xf: O << "true"; break;
+  case 0x10: O << "eq_os"; break;
+  case 0x11: O << "lt_oq"; break;
+  case 0x12: O << "le_oq"; break;
+  case 0x13: O << "unord_s"; break;
+  case 0x14: O << "neq_us"; break;
+  case 0x15: O << "nlt_uq"; break;
+  case 0x16: O << "nle_uq"; break;
+  case 0x17: O << "ord_s"; break;
+  case 0x18: O << "eq_us"; break;
+  case 0x19: O << "nge_uq"; break;
+  case 0x1a: O << "ngt_uq"; break;
+  case 0x1b: O << "false_os"; break;
+  case 0x1c: O << "neq_os"; break;
+  case 0x1d: O << "ge_oq"; break;
+  case 0x1e: O << "gt_oq"; break;
+  case 0x1f: O << "true_us"; break;
+
   }
 }
 
@@ -70,7 +91,18 @@ void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
     O << Op.getImm();
   else {
     assert(Op.isExpr() && "unknown pcrel immediate operand");
-    O << *Op.getExpr();
+    // If a symbolic branch target was added as a constant expression then print
+    // that address in hex.
+    const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
+    int64_t Address;
+    if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+      O << "0x";
+      O.write_hex(Address);
+    }
+    else {
+      // Otherwise, just print the expression.
+      O << *Op.getExpr();
+    }
   }
 }
 
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6d5ec6226a9e..4f5938daf4cd 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===//
+//= X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,17 +23,16 @@ class MCOperand;
   
 class X86IntelInstPrinter : public MCInstPrinter {
 public:
-  X86IntelInstPrinter(const MCAsmInfo &MAI)
-    : MCInstPrinter(MAI) {}
+  X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                      const MCRegisterInfo &MRI)
+    : MCInstPrinter(MAI, MII, MRI) {}
 
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
   virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
-  virtual StringRef getOpcodeName(unsigned Opcode) const;
   
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
-  static const char *getInstructionName(unsigned Opcode);
 
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt
new file mode 100644
index 000000000000..87305e0e5f5c
--- /dev/null
+++ b/lib/Target/X86/LLVMBuild.txt
@@ -0,0 +1,35 @@
+;===- ./lib/Target/X86/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils
+
+[component_0]
+type = TargetGroup
+name = X86
+parent = Target
+has_asmparser = 1
+has_asmprinter = 1
+has_disassembler = 1
+has_jit = 1
+
+[component_1]
+type = Library
+name = X86CodeGen
+parent = X86
+required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils
+add_to_library_groups = X86
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
index 87219120e2a8..1c240e52a37d 100644
--- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -2,16 +2,10 @@ add_llvm_library(LLVMX86Desc
   X86AsmBackend.cpp
   X86MCTargetDesc.cpp
   X86MCAsmInfo.cpp
-  X86MCCodeEmitter.cpp 
+  X86MCCodeEmitter.cpp
   X86MachObjectWriter.cpp
-  )
-
-add_llvm_library_dependencies(LLVMX86Desc
-  LLVMMC
-  LLVMSupport
-  LLVMX86AsmPrinter
-  LLVMX86AsmPrinter
-  LLVMX86Info
+  X86ELFObjectWriter.cpp
+  X86WinCOFFObjectWriter.cpp
   )
 
 add_dependencies(LLVMX86Desc X86CommonTableGen)
diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..9e1d29ca0a65
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86Desc
+parent = X86
+required_libraries = MC Support X86AsmPrinter X86Info
+add_to_library_groups = X86
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 69ad7d7b6b32..32e40febd26a 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -7,10 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCAsmBackend.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86FixupKinds.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
@@ -37,18 +36,22 @@ MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
 
 static unsigned getFixupKindLog2Size(unsigned Kind) {
   switch (Kind) {
-  default: assert(0 && "invalid fixup kind!");
+  default: llvm_unreachable("invalid fixup kind!");
   case FK_PCRel_1:
+  case FK_SecRel_1:
   case FK_Data_1: return 0;
   case FK_PCRel_2:
+  case FK_SecRel_2:
   case FK_Data_2: return 1;
   case FK_PCRel_4:
   case X86::reloc_riprel_4byte:
   case X86::reloc_riprel_4byte_movq_load:
   case X86::reloc_signed_4byte:
   case X86::reloc_global_offset_table:
+  case FK_SecRel_4:
   case FK_Data_4: return 2;
   case FK_PCRel_8:
+  case FK_SecRel_8:
   case FK_Data_8: return 3;
   }
 }
@@ -57,9 +60,9 @@ namespace {
 
 class X86ELFObjectWriter : public MCELFObjectTargetWriter {
 public:
-  X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
-                     bool HasRelocationAddend)
-    : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
+  X86ELFObjectWriter(bool is64Bit, uint8_t OSABI, uint16_t EMachine,
+                     bool HasRelocationAddend, bool foobar)
+    : MCELFObjectTargetWriter(is64Bit, OSABI, EMachine, HasRelocationAddend) {}
 };
 
 class X86AsmBackend : public MCAsmBackend {
@@ -87,7 +90,7 @@ public:
     return Infos[Kind - FirstTargetFixupKind];
   }
 
-  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value) const {
     unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
 
@@ -105,11 +108,16 @@ public:
       Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
   }
 
-  bool MayNeedRelaxation(const MCInst &Inst) const;
+  bool mayNeedRelaxation(const MCInst &Inst) const;
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup,
+                            uint64_t Value,
+                            const MCInstFragment *DF,
+                            const MCAsmLayout &Layout) const;
 
-  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+  void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
 
-  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
 };
 } // end anonymous namespace
 
@@ -214,7 +222,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
   return getRelaxedOpcodeBranch(Op);
 }
 
-bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
   // Branches can always be relaxed.
   if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
     return true;
@@ -244,9 +252,17 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
   return hasExp && !hasRIP;
 }
 
+bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+                                         uint64_t Value,
+                                         const MCInstFragment *DF,
+                                         const MCAsmLayout &Layout) const {
+  // Relax if the value is too big for a (signed) i8.
+  return int64_t(Value) != int64_t(int8_t(Value));
+}
+
 // FIXME: Can tblgen help at all here to verify there aren't other instructions
 // we can relax?
-void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
   // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
   unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
 
@@ -262,10 +278,10 @@ void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
   Res.setOpcode(RelaxedOp);
 }
 
-/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// writeNopData - Write optimal nops to the output file for the \arg Count
 /// bytes.  This returns the number of bytes written.  It may return 0 if
 /// the \arg Count is more than the maximum optimal nops.
-bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   static const uint8_t Nops[10][10] = {
     // nop
     {0x90},
@@ -310,9 +326,9 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
 namespace {
 class ELFX86AsmBackend : public X86AsmBackend {
 public:
-  Triple::OSType OSType;
-  ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
-    : X86AsmBackend(T), OSType(_OSType) {
+  uint8_t OSABI;
+  ELFX86AsmBackend(const Target &T, uint8_t _OSABI)
+    : X86AsmBackend(T), OSABI(_OSABI) {
     HasReliableSymbolDifference = true;
   }
 
@@ -324,31 +340,21 @@ public:
 
 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
 public:
-  ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
-    : ELFX86AsmBackend(T, OSType) {}
+  ELFX86_32AsmBackend(const Target &T, uint8_t OSABI)
+    : ELFX86AsmBackend(T, OSABI) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createELFObjectWriter(createELFObjectTargetWriter(),
-                                 OS, /*IsLittleEndian*/ true);
-  }
-
-  MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
-    return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false);
+    return createX86ELFObjectWriter(OS, /*Is64Bit*/ false, OSABI);
   }
 };
 
 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
 public:
-  ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
-    : ELFX86AsmBackend(T, OSType) {}
+  ELFX86_64AsmBackend(const Target &T, uint8_t OSABI)
+    : ELFX86AsmBackend(T, OSABI) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createELFObjectWriter(createELFObjectTargetWriter(),
-                                 OS, /*IsLittleEndian*/ true);
-  }
-
-  MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
-    return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true);
+    return createX86ELFObjectWriter(OS, /*Is64Bit*/ true, OSABI);
   }
 };
 
@@ -362,7 +368,7 @@ public:
   }
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createWinCOFFObjectWriter(OS, Is64Bit);
+    return createX86WinCOFFObjectWriter(OS, Is64Bit);
   }
 };
 
@@ -442,7 +448,8 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) {
   if (TheTriple.isOSWindows())
     return new WindowsX86AsmBackend(T, false);
 
-  return new ELFX86_32AsmBackend(T, TheTriple.getOS());
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+  return new ELFX86_32AsmBackend(T, OSABI);
 }
 
 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) {
@@ -454,5 +461,6 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) {
   if (TheTriple.isOSWindows())
     return new WindowsX86AsmBackend(T, true);
 
-  return new ELFX86_64AsmBackend(T, TheTriple.getOS());
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+  return new ELFX86_64AsmBackend(T, OSABI);
 }
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index e6ba705d4d87..a0bb6dc6d60f 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -19,7 +19,7 @@
 
 #include "X86MCTargetDesc.h"
 #include "llvm/Support/DataTypes.h"
-#include <cassert>
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 
@@ -164,7 +164,13 @@ namespace X86II {
     /// is some TLS offset from the picbase.
     ///
     /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
-    MO_TLVP_PIC_BASE
+    MO_TLVP_PIC_BASE,
+
+    /// MO_SECREL - On a symbol operand this indicates that the immediate is
+    /// the offset from beginning of section.
+    ///
+    /// This is the TLS offset for the COFF/Windows TLS mechanism.
+    MO_SECREL
   };
 
   enum {
@@ -223,19 +229,13 @@ namespace X86II {
     // destinations are the same register.
     MRMInitReg = 32,
 
-    //// MRM_C1 - A mod/rm byte of exactly 0xC1.
-    MRM_C1 = 33,
-    MRM_C2 = 34,
-    MRM_C3 = 35,
-    MRM_C4 = 36,
-    MRM_C8 = 37,
-    MRM_C9 = 38,
-    MRM_E8 = 39,
-    MRM_F0 = 40,
-    MRM_F8 = 41,
-    MRM_F9 = 42,
-    MRM_D0 = 45,
-    MRM_D1 = 46,
+    //// MRM_XX - A mod/rm byte of exactly 0xXX.
+    MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35, MRM_C4 = 36,
+    MRM_C8 = 37, MRM_C9 = 38, MRM_E8 = 39, MRM_F0 = 40,
+    MRM_F8 = 41, MRM_F9 = 42, MRM_D0 = 45, MRM_D1 = 46,
+    MRM_D4 = 47, MRM_D8 = 48, MRM_D9 = 49, MRM_DA = 50,
+    MRM_DB = 51, MRM_DC = 52, MRM_DD = 53, MRM_DE = 54,
+    MRM_DF = 55,
 
     /// RawFrmImm8 - This is used for the ENTER instruction, which has two
     /// immediates, the first of which is a 16-bit immediate (specified by
@@ -295,8 +295,20 @@ namespace X86II {
     T8 = 13 << Op0Shift,  TA = 14 << Op0Shift,
     A6 = 15 << Op0Shift,  A7 = 16 << Op0Shift,
 
-    // TF - Prefix before and after 0x0F
-    TF = 17 << Op0Shift,
+    // T8XD - Prefix before and after 0x0F. Combination of T8 and XD.
+    T8XD = 17 << Op0Shift,
+
+    // T8XS - Prefix before and after 0x0F. Combination of T8 and XS.
+    T8XS = 18 << Op0Shift,
+
+    // TAXD - Prefix before and after 0x0F. Combination of TA and XD.
+    TAXD = 19 << Op0Shift,
+
+    // XOP8 - Prefix to include use of imm byte.
+    XOP8 = 20 << Op0Shift,
+
+    // XOP9 - Prefix to exclude use of imm byte.
+    XOP9 = 21 << Op0Shift,
 
     //===------------------------------------------------------------------===//
     // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
@@ -387,20 +399,24 @@ namespace X86II {
     /// and the additional register is encoded in VEX_VVVV prefix.
     VEX_4V      = 1U << 2,
 
+    /// VEX_4VOp3 - Similar to VEX_4V, but used on instructions that encode
+    /// operand 3 with VEX.vvvv.
+    VEX_4VOp3   = 1U << 3,
+
     /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
     /// must be encoded in the i8 immediate field. This usually happens in
     /// instructions with 4 operands.
-    VEX_I8IMM   = 1U << 3,
+    VEX_I8IMM   = 1U << 4,
 
     /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
     /// instruction uses 256-bit wide registers. This is usually auto detected
     /// if a VR256 register is used, but some AVX instructions also have this
     /// field marked when using a f256 memory references.
-    VEX_L       = 1U << 4,
+    VEX_L       = 1U << 5,
 
     // VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
     // prefix. Usually used for scalar instructions. Needed by disassembler.
-    VEX_LIG     = 1U << 5,
+    VEX_LIG     = 1U << 6,
 
     /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
     /// wacky 0x0F 0x0F prefix for 3DNow! instructions.  The manual documents
@@ -408,7 +424,15 @@ namespace X86II {
     /// storing a classifier in the imm8 field.  To simplify our implementation,
     /// we handle this by storeing the classifier in the opcode field and using
     /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
-    Has3DNow0F0FOpcode = 1U << 6
+    Has3DNow0F0FOpcode = 1U << 7,
+
+    /// MemOp4 - Used to indicate swapping of operand 3 and 4 to be encoded in
+    /// ModRM or I8IMM. This is used for FMA4 and XOP instructions.
+    MemOp4 = 1U << 8,
+
+    /// XOP - Opcode prefix used by XOP instructions.
+    XOP = 1U << 9
+
   };
 
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -426,7 +450,7 @@ namespace X86II {
   /// of the specified instruction.
   static inline unsigned getSizeOfImm(uint64_t TSFlags) {
     switch (TSFlags & X86II::ImmMask) {
-    default: assert(0 && "Unknown immediate size");
+    default: llvm_unreachable("Unknown immediate size");
     case X86II::Imm8:
     case X86II::Imm8PCRel:  return 1;
     case X86II::Imm16:
@@ -441,7 +465,7 @@ namespace X86II {
   /// TSFlags indicates that it is pc relative.
   static inline unsigned isImmPCRel(uint64_t TSFlags) {
     switch (TSFlags & X86II::ImmMask) {
-    default: assert(0 && "Unknown immediate size");
+    default: llvm_unreachable("Unknown immediate size");
     case X86II::Imm8PCRel:
     case X86II::Imm16PCRel:
     case X86II::Imm32PCRel:
@@ -462,10 +486,10 @@ namespace X86II {
   /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
   /// counted as one operand.
   ///
-  static inline int getMemoryOperandNo(uint64_t TSFlags) {
+  static inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
     switch (TSFlags & X86II::FormMask) {
-    case X86II::MRMInitReg:  assert(0 && "FIXME: Remove this form");
-    default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
+    case X86II::MRMInitReg:  llvm_unreachable("FIXME: Remove this form");
+    default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!");
     case X86II::Pseudo:
     case X86II::RawFrm:
     case X86II::AddRegFrm:
@@ -478,9 +502,12 @@ namespace X86II {
       return 0;
     case X86II::MRMSrcMem: {
       bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+      bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
       unsigned FirstMemOp = 1;
       if (HasVEX_4V)
         ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+      if (HasMemOp4)
+        ++FirstMemOp;// Skip the register source (which is encoded in I8IMM).
 
       // FIXME: Maybe lea should have its own form?  This is a horrible hack.
       //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
@@ -495,20 +522,24 @@ namespace X86II {
     case X86II::MRM0m: case X86II::MRM1m:
     case X86II::MRM2m: case X86II::MRM3m:
     case X86II::MRM4m: case X86II::MRM5m:
-    case X86II::MRM6m: case X86II::MRM7m:
-      return 0;
-    case X86II::MRM_C1:
-    case X86II::MRM_C2:
-    case X86II::MRM_C3:
-    case X86II::MRM_C4:
-    case X86II::MRM_C8:
-    case X86II::MRM_C9:
-    case X86II::MRM_E8:
-    case X86II::MRM_F0:
-    case X86II::MRM_F8:
-    case X86II::MRM_F9:
-    case X86II::MRM_D0:
-    case X86II::MRM_D1:
+    case X86II::MRM6m: case X86II::MRM7m: {
+      bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+      unsigned FirstMemOp = 0;
+      if (HasVEX_4V)
+        ++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV).
+      return FirstMemOp;
+    }
+    case X86II::MRM_C1: case X86II::MRM_C2:
+    case X86II::MRM_C3: case X86II::MRM_C4:
+    case X86II::MRM_C8: case X86II::MRM_C9:
+    case X86II::MRM_E8: case X86II::MRM_F0:
+    case X86II::MRM_F8: case X86II::MRM_F9:
+    case X86II::MRM_D0: case X86II::MRM_D1:
+    case X86II::MRM_D4: case X86II::MRM_D8:
+    case X86II::MRM_D9: case X86II::MRM_DA:
+    case X86II::MRM_DB: case X86II::MRM_DC:
+    case X86II::MRM_DD: case X86II::MRM_DE:
+    case X86II::MRM_DF:
       return -1;
     }
   }
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
new file mode 100644
index 000000000000..5a42a801825d
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -0,0 +1,224 @@
+//===-- X86ELFObjectWriter.cpp - X86 ELF Writer ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+  class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+  public:
+    X86ELFObjectWriter(bool is64Bit, uint8_t OSABI);
+
+    virtual ~X86ELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend) const;
+  };
+}
+
+X86ELFObjectWriter::X86ELFObjectWriter(bool Is64Bit, uint8_t OSABI)
+  : MCELFObjectTargetWriter(Is64Bit, OSABI,
+                            Is64Bit ?  ELF::EM_X86_64 : ELF::EM_386,
+                            /*HasRelocationAddend*/ Is64Bit) {}
+
+X86ELFObjectWriter::~X86ELFObjectWriter()
+{}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel,
+                                          bool IsRelocWithSymbol,
+                                          int64_t Addend) const {
+  // determine the type of the relocation
+
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+  unsigned Type;
+  if (is64Bit()) {
+    if (IsPCRel) {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+
+      case FK_Data_8: Type = ELF::R_X86_64_PC64; break;
+      case FK_Data_4: Type = ELF::R_X86_64_PC32; break;
+      case FK_Data_2: Type = ELF::R_X86_64_PC16; break;
+
+      case FK_PCRel_8:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC64;
+        break;
+      case X86::reloc_signed_4byte:
+      case X86::reloc_riprel_4byte_movq_load:
+      case X86::reloc_riprel_4byte:
+      case FK_PCRel_4:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_X86_64_PC32;
+          break;
+        case MCSymbolRefExpr::VK_PLT:
+          Type = ELF::R_X86_64_PLT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTPCREL:
+          Type = ELF::R_X86_64_GOTPCREL;
+          break;
+        case MCSymbolRefExpr::VK_GOTTPOFF:
+          Type = ELF::R_X86_64_GOTTPOFF;
+        break;
+        case MCSymbolRefExpr::VK_TLSGD:
+          Type = ELF::R_X86_64_TLSGD;
+          break;
+        case MCSymbolRefExpr::VK_TLSLD:
+          Type = ELF::R_X86_64_TLSLD;
+          break;
+        }
+        break;
+      case FK_PCRel_2:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC16;
+        break;
+      case FK_PCRel_1:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC8;
+        break;
+      }
+    } else {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+      case FK_Data_8: Type = ELF::R_X86_64_64; break;
+      case X86::reloc_signed_4byte:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_X86_64_32S;
+          break;
+        case MCSymbolRefExpr::VK_GOT:
+          Type = ELF::R_X86_64_GOT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTPCREL:
+          Type = ELF::R_X86_64_GOTPCREL;
+          break;
+        case MCSymbolRefExpr::VK_TPOFF:
+          Type = ELF::R_X86_64_TPOFF32;
+          break;
+        case MCSymbolRefExpr::VK_DTPOFF:
+          Type = ELF::R_X86_64_DTPOFF32;
+          break;
+        }
+        break;
+      case FK_Data_4:
+        Type = ELF::R_X86_64_32;
+        break;
+      case FK_Data_2: Type = ELF::R_X86_64_16; break;
+      case FK_PCRel_1:
+      case FK_Data_1: Type = ELF::R_X86_64_8; break;
+      }
+    }
+  } else {
+    if (IsPCRel) {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+
+      case X86::reloc_global_offset_table:
+        Type = ELF::R_386_GOTPC;
+        break;
+
+      case X86::reloc_signed_4byte:
+      case FK_PCRel_4:
+      case FK_Data_4:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_386_PC32;
+          break;
+        case MCSymbolRefExpr::VK_PLT:
+          Type = ELF::R_386_PLT32;
+          break;
+        }
+        break;
+      }
+    } else {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+
+      case X86::reloc_global_offset_table:
+        Type = ELF::R_386_GOTPC;
+        break;
+
+      // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
+      // instead?
+      case X86::reloc_signed_4byte:
+      case FK_PCRel_4:
+      case FK_Data_4:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_386_32;
+          break;
+        case MCSymbolRefExpr::VK_GOT:
+          Type = ELF::R_386_GOT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTOFF:
+          Type = ELF::R_386_GOTOFF;
+          break;
+        case MCSymbolRefExpr::VK_TLSGD:
+          Type = ELF::R_386_TLS_GD;
+          break;
+        case MCSymbolRefExpr::VK_TPOFF:
+          Type = ELF::R_386_TLS_LE_32;
+          break;
+        case MCSymbolRefExpr::VK_INDNTPOFF:
+          Type = ELF::R_386_TLS_IE;
+          break;
+        case MCSymbolRefExpr::VK_NTPOFF:
+          Type = ELF::R_386_TLS_LE;
+          break;
+        case MCSymbolRefExpr::VK_GOTNTPOFF:
+          Type = ELF::R_386_TLS_GOTIE;
+          break;
+        case MCSymbolRefExpr::VK_TLSLDM:
+          Type = ELF::R_386_TLS_LDM;
+          break;
+        case MCSymbolRefExpr::VK_DTPOFF:
+          Type = ELF::R_386_TLS_LDO_32;
+          break;
+        case MCSymbolRefExpr::VK_GOTTPOFF:
+          Type = ELF::R_386_TLS_IE_32;
+          break;
+        }
+        break;
+      case FK_Data_2: Type = ELF::R_386_16; break;
+      case FK_PCRel_1:
+      case FK_Data_1: Type = ELF::R_386_8; break;
+      }
+    }
+  }
+
+  return Type;
+}
+
+MCObjectWriter *llvm::createX86ELFObjectWriter(raw_ostream &OS,
+                                               bool Is64Bit,
+                                               uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW =
+    new X86ELFObjectWriter(Is64Bit, OSABI);
+  return createELFObjectWriter(MOTW, OS,  /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index 17d242ab761e..f2e34cbe0d65 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -1,4 +1,4 @@
-//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===//
+//===-- X86FixupKinds.h - X86 Specific Fixup Entries ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 27031005bd09..afa545cbb314 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -48,6 +48,8 @@ static const char *const x86_asm_table[] = {
   "{cc}", "cc",
   0,0};
 
+void X86MCAsmInfoDarwin::anchor() { }
+
 X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
   bool is64Bit = T.getArch() == Triple::x86_64;
   if (is64Bit)
@@ -80,6 +82,8 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
   : X86MCAsmInfoDarwin(Triple) {
 }
 
+void X86ELFMCAsmInfo::anchor() { }
+
 X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
   if (T.getArch() == Triple::x86_64)
     PointerSize = 8;
@@ -125,7 +129,23 @@ getNonexecutableStackSection(MCContext &Ctx) const {
                            0, SectionKind::getMetadata());
 }
 
-X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+void X86MCAsmInfoMicrosoft::anchor() { }
+
+X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
+  if (Triple.getArch() == Triple::x86_64) {
+    GlobalPrefix = "";
+    PrivateGlobalPrefix = ".L";
+  }
+
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+
+  TextAlignFillValue = 0x90;
+}
+
+void X86MCAsmInfoGNUCOFF::anchor() { }
+
+X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
   if (Triple.getArch() == Triple::x86_64) {
     GlobalPrefix = "";
     PrivateGlobalPrefix = ".L";
@@ -135,4 +155,7 @@ X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
   AssemblerDialect = AsmWriterFlavor;
 
   TextAlignFillValue = 0x90;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::DwarfCFI;
 }
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index 2cd4c8eb30ec..b6b70fd3e855 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====//
+//===-- X86MCAsmInfo.h - X86 asm properties --------------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,7 +21,9 @@
 namespace llvm {
   class Triple;
 
-  struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+  class X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+    virtual void anchor();
+  public:
     explicit X86MCAsmInfoDarwin(const Triple &Triple);
   };
 
@@ -33,13 +35,23 @@ namespace llvm {
                                 MCStreamer &Streamer) const;
   };
 
-  struct X86ELFMCAsmInfo : public MCAsmInfo {
+  class X86ELFMCAsmInfo : public MCAsmInfo {
+    virtual void anchor();
+  public:
     explicit X86ELFMCAsmInfo(const Triple &Triple);
     virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
   };
 
-  struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
-    explicit X86MCAsmInfoCOFF(const Triple &Triple);
+  class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+    virtual void anchor();
+  public:
+    explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
+  };
+
+  class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
+    virtual void anchor();
+  public:
+    explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
   };
 } // namespace llvm
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 2eee1128119e..80990e5822bd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===//
+//===-- X86MCCodeEmitter.cpp - Convert X86 code to machine code -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -46,6 +46,11 @@ public:
     return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
   }
 
+  bool is32BitMode() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & X86::Mode64Bit) == 0;
+  }
+
   static unsigned GetX86RegNum(const MCOperand &MO) {
     return X86_MC::getX86RegNum(MO.getReg());
   }
@@ -63,9 +68,8 @@ public:
                                               unsigned OpNum) {
     unsigned SrcReg = MI.getOperand(OpNum).getReg();
     unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
-    if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) ||
-        (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15))
-      SrcRegNum += 8;
+    if (X86II::isX86_64ExtendedReg(SrcReg))
+      SrcRegNum |= 8;
 
     // The registers represented through VEX_VVVV should
     // be encoded in 1's complement form.
@@ -86,7 +90,7 @@ public:
     }
   }
 
-  void EmitImmediate(const MCOperand &Disp,
+  void EmitImmediate(const MCOperand &Disp, SMLoc Loc,
                      unsigned ImmSize, MCFixupKind FixupKind,
                      unsigned &CurByte, raw_ostream &OS,
                      SmallVectorImpl<MCFixup> &Fixups,
@@ -155,9 +159,8 @@ static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
   return MCFixup::getKindForSize(Size, isPCRel);
 }
 
-/// Is32BitMemOperand - Return true if the specified instruction with a memory
-/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
-/// memory operand.  Op specifies the operand # of the memoperand.
+/// Is32BitMemOperand - Return true if the specified instruction has
+/// a 32-bit memory operand. Op specifies the operand # of the memoperand.
 static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
   const MCOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
   const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
@@ -170,28 +173,71 @@ static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
   return false;
 }
 
-/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
-/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
-/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// Is64BitMemOperand - Return true if the specified instruction has
+/// a 64-bit memory operand. Op specifies the operand # of the memoperand.
+#ifndef NDEBUG
+static bool Is64BitMemOperand(const MCInst &MI, unsigned Op) {
+  const MCOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+  if ((BaseReg.getReg() != 0 &&
+       X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg.getReg())) ||
+      (IndexReg.getReg() != 0 &&
+       X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg.getReg())))
+    return true;
+  return false;
+}
+#endif
+
+/// Is16BitMemOperand - Return true if the specified instruction has
+/// a 16-bit memory operand. Op specifies the operand # of the memoperand.
+static bool Is16BitMemOperand(const MCInst &MI, unsigned Op) {
+  const MCOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+
+  if ((BaseReg.getReg() != 0 &&
+       X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg.getReg())) ||
+      (IndexReg.getReg() != 0 &&
+       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg.getReg())))
+    return true;
+  return false;
+}
+
+/// StartsWithGlobalOffsetTable - Check if this expression starts with
+///  _GLOBAL_OFFSET_TABLE_ and if it is of the form
+///  _GLOBAL_OFFSET_TABLE_-symbol. This is needed to support PIC on ELF
+/// i386 as _GLOBAL_OFFSET_TABLE_ is magical. We check only simple case that
 /// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
 /// of a binary expression.
-static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+enum GlobalOffsetTableExprKind {
+  GOT_None,
+  GOT_Normal,
+  GOT_SymDiff
+};
+static GlobalOffsetTableExprKind
+StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+  const MCExpr *RHS = 0;
   if (Expr->getKind() == MCExpr::Binary) {
     const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
     Expr = BE->getLHS();
+    RHS = BE->getRHS();
   }
 
   if (Expr->getKind() != MCExpr::SymbolRef)
-    return false;
+    return GOT_None;
 
   const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
   const MCSymbol &S = Ref->getSymbol();
-  return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+  if (S.getName() != "_GLOBAL_OFFSET_TABLE_")
+    return GOT_None;
+  if (RHS && RHS->getKind() == MCExpr::SymbolRef)
+    return GOT_SymDiff;
+  return GOT_Normal;
 }
 
 void X86MCCodeEmitter::
-EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
-              unsigned &CurByte, raw_ostream &OS,
+EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
+              MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
               SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
   const MCExpr *Expr = NULL;
   if (DispOp.isImm()) {
@@ -210,12 +256,21 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
 
   // If we have an immoffset, add it to the expression.
   if ((FixupKind == FK_Data_4 ||
-       FixupKind == MCFixupKind(X86::reloc_signed_4byte)) &&
-      StartsWithGlobalOffsetTable(Expr)) {
-    assert(ImmOffset == 0);
-
-    FixupKind = MCFixupKind(X86::reloc_global_offset_table);
-    ImmOffset = CurByte;
+       FixupKind == FK_Data_8 ||
+       FixupKind == MCFixupKind(X86::reloc_signed_4byte))) {
+    GlobalOffsetTableExprKind Kind = StartsWithGlobalOffsetTable(Expr);
+    if (Kind != GOT_None) {
+      assert(ImmOffset == 0);
+
+      FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+      if (Kind == GOT_Normal)
+        ImmOffset = CurByte;
+    } else if (Expr->getKind() == MCExpr::SymbolRef) {
+      const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+      if (Ref->getKind() == MCSymbolRefExpr::VK_SECREL) {
+        FixupKind = MCFixupKind(FK_SecRel_4);
+      }
+    }
   }
 
   // If the fixup is pc-relative, we need to bias the value to be relative to
@@ -234,7 +289,7 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
                                    Ctx);
 
   // Emit a symbolic constant as a fixup and 4 zeros.
-  Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+  Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind, Loc));
   EmitConstant(0, Size, CurByte, OS);
 }
 
@@ -270,7 +325,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
     // expression to emit.
     int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
 
-    EmitImmediate(Disp, 4, MCFixupKind(FixupKind),
+    EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind),
                   CurByte, OS, Fixups, -ImmSize);
     return;
   }
@@ -294,7 +349,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
 
     if (BaseReg == 0) {          // [disp32]     in X86-32 mode
       EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
-      EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+      EmitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, CurByte, OS, Fixups);
       return;
     }
 
@@ -310,13 +365,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
     // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
     if (Disp.isImm() && isDisp8(Disp.getImm())) {
       EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
-      EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+      EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
       return;
     }
 
     // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
     EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
-    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+    EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
                   Fixups);
     return;
   }
@@ -375,10 +430,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
 
   // Do we need to output a displacement?
   if (ForceDisp8)
-    EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+    EmitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, CurByte, OS, Fixups);
   else if (ForceDisp32 || Disp.getImm() != 0)
-    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
-                  Fixups);
+    EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
+                  CurByte, OS, Fixups);
 }
 
 /// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
@@ -387,9 +442,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
                                            int MemOperand, const MCInst &MI,
                                            const MCInstrDesc &Desc,
                                            raw_ostream &OS) const {
-  bool HasVEX_4V = false;
-  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
-    HasVEX_4V = true;
+  bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+  bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
 
   // VEX_R: opcode externsion equivalent to REX.R in
   // 1's complement (inverted) form
@@ -417,6 +471,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   // opcode extension, or ignored, depending on the opcode byte)
   unsigned char VEX_W = 0;
 
+  // XOP: Use XOP prefix byte 0x8f instead of VEX.
+  unsigned char XOP = 0;
+
   // VEX_5M (VEX m-mmmmm field):
   //
   //  0b00000: Reserved for future use
@@ -424,7 +481,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   //  0b00010: implied 0F 38 leading opcode bytes
   //  0b00011: implied 0F 3A leading opcode bytes
   //  0b00100-0b11111: Reserved for future use
-  //
+  //  0b01000: XOP map select - 08h instructions with imm byte
+  //  0b10001: XOP map select - 09h instructions with no imm byte
   unsigned char VEX_5M = 0x1;
 
   // VEX_4V (VEX vvvv field): a register specifier
@@ -455,27 +513,44 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
     VEX_W = 1;
 
+  if ((TSFlags >> X86II::VEXShift) & X86II::XOP)
+    XOP = 1;
+
   if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
     VEX_L = 1;
 
   switch (TSFlags & X86II::Op0Mask) {
-  default: assert(0 && "Invalid prefix!");
+  default: llvm_unreachable("Invalid prefix!");
   case X86II::T8:  // 0F 38
     VEX_5M = 0x2;
     break;
   case X86II::TA:  // 0F 3A
     VEX_5M = 0x3;
     break;
-  case X86II::TF:  // F2 0F 38
+  case X86II::T8XS: // F3 0F 38
+    VEX_PP = 0x2;
+    VEX_5M = 0x2;
+    break;
+  case X86II::T8XD: // F2 0F 38
     VEX_PP = 0x3;
     VEX_5M = 0x2;
     break;
+  case X86II::TAXD: // F2 0F 3A
+    VEX_PP = 0x3;
+    VEX_5M = 0x3;
+    break;
   case X86II::XS:  // F3 0F
     VEX_PP = 0x2;
     break;
   case X86II::XD:  // F2 0F
     VEX_PP = 0x3;
     break;
+  case X86II::XOP8:
+    VEX_5M = 0x8;
+    break;
+  case X86II::XOP9:
+    VEX_5M = 0x9;
+    break;
   case X86II::A6:  // Bypass: Not used by VEX
   case X86II::A7:  // Bypass: Not used by VEX
   case X86II::TB:  // Bypass: Not used by VEX
@@ -483,6 +558,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     break;  // No prefix!
   }
 
+
   // Set the vector length to 256-bit if YMM0-YMM15 is used
   for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
     if (!MI.getOperand(i).isReg())
@@ -495,7 +571,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   // Classify VEX_B, VEX_4V, VEX_R, VEX_X
   unsigned CurOp = 0;
   switch (TSFlags & X86II::FormMask) {
-  case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+  case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!");
   case X86II::MRMDestMem: {
     // MRMDestMem instructions forms:
     //  MemAddr, src1(ModR/M)
@@ -516,41 +592,50 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
       VEX_R = 0x0;
     break;
   }
-  case X86II::MRMSrcMem: {
+  case X86II::MRMSrcMem:
     // MRMSrcMem instructions forms:
     //  src1(ModR/M), MemAddr
     //  src1(ModR/M), src2(VEX_4V), MemAddr
     //  src1(ModR/M), MemAddr, imm8
     //  src1(ModR/M), MemAddr, src2(VEX_I8IMM)
     //
+    //  FMA4:
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
     if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
       VEX_R = 0x0;
 
-    unsigned MemAddrOffset = 1;
-    if (HasVEX_4V) {
+    if (HasVEX_4V)
       VEX_4V = getVEXRegisterEncoding(MI, 1);
-      MemAddrOffset++;
-    }
 
     if (X86II::isX86_64ExtendedReg(
-               MI.getOperand(MemAddrOffset+X86::AddrBaseReg).getReg()))
+               MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
       VEX_B = 0x0;
     if (X86II::isX86_64ExtendedReg(
-               MI.getOperand(MemAddrOffset+X86::AddrIndexReg).getReg()))
+               MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
       VEX_X = 0x0;
+
+    if (HasVEX_4VOp3)
+      VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
     break;
-  }
   case X86II::MRM0m: case X86II::MRM1m:
   case X86II::MRM2m: case X86II::MRM3m:
   case X86II::MRM4m: case X86II::MRM5m:
-  case X86II::MRM6m: case X86II::MRM7m:
+  case X86II::MRM6m: case X86II::MRM7m: {
     // MRM[0-9]m instructions forms:
     //  MemAddr
-    if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrBaseReg).getReg()))
+    //  src1(VEX_4V), MemAddr
+    if (HasVEX_4V)
+      VEX_4V = getVEXRegisterEncoding(MI, 0);
+
+    if (X86II::isX86_64ExtendedReg(
+               MI.getOperand(MemOperand+X86::AddrBaseReg).getReg()))
       VEX_B = 0x0;
-    if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg()))
+    if (X86II::isX86_64ExtendedReg(
+               MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
       VEX_X = 0x0;
     break;
+  }
   case X86II::MRMSrcReg:
     // MRMSrcReg instructions forms:
     //  dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
@@ -565,6 +650,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
       VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
     if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
       VEX_B = 0x0;
+    CurOp++;
+    if (HasVEX_4VOp3)
+      VEX_4V = getVEXRegisterEncoding(MI, CurOp);
     break;
   case X86II::MRMDestReg:
     // MRMDestReg instructions forms:
@@ -605,14 +693,14 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   //
   unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
 
-  if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+  if (VEX_B && VEX_X && !VEX_W && !XOP && (VEX_5M == 1)) { // 2 byte VEX prefix
     EmitByte(0xC5, CurByte, OS);
     EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
     return;
   }
 
   // 3 byte VEX prefix
-  EmitByte(0xC4, CurByte, OS);
+  EmitByte(XOP ? 0x8F : 0xC4, CurByte, OS);
   EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
   EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
 }
@@ -647,7 +735,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
   }
 
   switch (TSFlags & X86II::FormMask) {
-  case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+  case X86II::MRMInitReg: llvm_unreachable("FIXME: Remove this!");
   case X86II::MRMSrcReg:
     if (MI.getOperand(0).isReg() &&
         X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
@@ -717,12 +805,12 @@ void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
                                         const MCInst &MI,
                                         raw_ostream &OS) const {
   switch (TSFlags & X86II::SegOvrMask) {
-  default: assert(0 && "Invalid segment!");
+  default: llvm_unreachable("Invalid segment!");
   case 0:
     // No segment override, check for explicit one on memory operand.
     if (MemOperand != -1) {   // If the instruction has a memory operand.
       switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
-      default: assert(0 && "Unknown segment register!");
+      default: llvm_unreachable("Unknown segment register!");
       case 0: break;
       case X86::CS: EmitByte(0x2E, CurByte, OS); break;
       case X86::SS: EmitByte(0x36, CurByte, OS); break;
@@ -763,8 +851,22 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     EmitByte(0xF3, CurByte, OS);
 
   // Emit the address size opcode prefix as needed.
-  if ((TSFlags & X86II::AdSize) ||
-      (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
+  bool need_address_override;
+  if (TSFlags & X86II::AdSize) {
+    need_address_override = true;
+  } else if (MemOperand == -1) {
+    need_address_override = false;
+  } else if (is64BitMode()) {
+    assert(!Is16BitMemOperand(MI, MemOperand));
+    need_address_override = Is32BitMemOperand(MI, MemOperand);
+  } else if (is32BitMode()) {
+    assert(!Is64BitMemOperand(MI, MemOperand));
+    need_address_override = Is16BitMemOperand(MI, MemOperand);
+  } else {
+    need_address_override = false;
+  }
+
+  if (need_address_override)
     EmitByte(0x67, CurByte, OS);
 
   // Emit the operand size opcode prefix as needed.
@@ -773,7 +875,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
 
   bool Need0FPrefix = false;
   switch (TSFlags & X86II::Op0Mask) {
-  default: assert(0 && "Invalid prefix!");
+  default: llvm_unreachable("Invalid prefix!");
   case 0: break;  // No prefix!
   case X86II::REP: break; // already handled.
   case X86II::TB:  // Two-byte opcode prefix
@@ -783,7 +885,15 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   case X86II::A7:  // 0F A7
     Need0FPrefix = true;
     break;
-  case X86II::TF: // F2 0F 38
+  case X86II::T8XS: // F3 0F 38
+    EmitByte(0xF3, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::T8XD: // F2 0F 38
+    EmitByte(0xF2, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::TAXD: // F2 0F 3A
     EmitByte(0xF2, CurByte, OS);
     Need0FPrefix = true;
     break;
@@ -818,10 +928,12 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
 
   // FIXME: Pull this up into previous switch if REX can be moved earlier.
   switch (TSFlags & X86II::Op0Mask) {
-  case X86II::TF:    // F2 0F 38
+  case X86II::T8XS:  // F3 0F 38
+  case X86II::T8XD:  // F2 0F 38
   case X86II::T8:    // 0F 38
     EmitByte(0x38, CurByte, OS);
     break;
+  case X86II::TAXD:  // F2 0F 3A
   case X86II::TA:    // 0F 3A
     EmitByte(0x3A, CurByte, OS);
     break;
@@ -859,18 +971,16 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   unsigned CurByte = 0;
 
   // Is this instruction encoded using the AVX VEX prefix?
-  bool HasVEXPrefix = false;
+  bool HasVEXPrefix = (TSFlags >> X86II::VEXShift) & X86II::VEX;
 
   // It uses the VEX.VVVV field?
-  bool HasVEX_4V = false;
-
-  if ((TSFlags >> X86II::VEXShift) & X86II::VEX)
-    HasVEXPrefix = true;
-  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
-    HasVEX_4V = true;
+  bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+  bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
+  bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+  const unsigned MemOp4_I8IMMOperand = 2;
 
   // Determine where the memory operand starts, if present.
-  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
   if (MemoryOperand != -1) MemoryOperand += CurOp;
 
   if (!HasVEXPrefix)
@@ -886,27 +996,29 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   unsigned SrcRegNum = 0;
   switch (TSFlags & X86II::FormMask) {
   case X86II::MRMInitReg:
-    assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
+    llvm_unreachable("FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
   default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
-    assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
+    llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!");
   case X86II::Pseudo:
-    assert(0 && "Pseudo instruction shouldn't be emitted");
+    llvm_unreachable("Pseudo instruction shouldn't be emitted");
   case X86II::RawFrm:
     EmitByte(BaseOpcode, CurByte, OS);
     break;
   case X86II::RawFrmImm8:
     EmitByte(BaseOpcode, CurByte, OS);
-    EmitImmediate(MI.getOperand(CurOp++),
+    EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
                   X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
                   CurByte, OS, Fixups);
-    EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+    EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, CurByte,
+                  OS, Fixups);
     break;
   case X86II::RawFrmImm16:
     EmitByte(BaseOpcode, CurByte, OS);
-    EmitImmediate(MI.getOperand(CurOp++),
+    EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
                   X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
                   CurByte, OS, Fixups);
-    EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+    EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, CurByte,
+                  OS, Fixups);
     break;
 
   case X86II::AddRegFrm:
@@ -940,9 +1052,16 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
       SrcRegNum++;
 
+    if(HasMemOp4) // Skip 2nd src (which is encoded in I8IMM)
+      SrcRegNum++;
+
     EmitRegModRMByte(MI.getOperand(SrcRegNum),
                      GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
-    CurOp = SrcRegNum + 1;
+
+    // 2 operands skipped with HasMemOp4, comensate accordingly
+    CurOp = HasMemOp4 ? SrcRegNum : SrcRegNum + 1;
+    if (HasVEX_4VOp3)
+      ++CurOp;
     break;
 
   case X86II::MRMSrcMem: {
@@ -952,12 +1071,16 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
       ++AddrOperands;
       ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
     }
+    if(HasMemOp4) // Skip second register source (encoded in I8IMM)
+      ++FirstMemOp;
 
     EmitByte(BaseOpcode, CurByte, OS);
 
     EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
                      TSFlags, CurByte, OS, Fixups);
     CurOp += AddrOperands + 1;
+    if (HasVEX_4VOp3)
+      ++CurOp;
     break;
   }
 
@@ -976,58 +1099,52 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   case X86II::MRM2m: case X86II::MRM3m:
   case X86II::MRM4m: case X86II::MRM5m:
   case X86II::MRM6m: case X86II::MRM7m:
+    if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+      CurOp++;
     EmitByte(BaseOpcode, CurByte, OS);
     EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
                      TSFlags, CurByte, OS, Fixups);
     CurOp += X86::AddrNumOperands;
     break;
-  case X86II::MRM_C1:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xC1, CurByte, OS);
-    break;
-  case X86II::MRM_C2:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xC2, CurByte, OS);
-    break;
-  case X86II::MRM_C3:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xC3, CurByte, OS);
-    break;
-  case X86II::MRM_C4:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xC4, CurByte, OS);
-    break;
-  case X86II::MRM_C8:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xC8, CurByte, OS);
-    break;
-  case X86II::MRM_C9:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xC9, CurByte, OS);
-    break;
-  case X86II::MRM_E8:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xE8, CurByte, OS);
-    break;
-  case X86II::MRM_F0:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xF0, CurByte, OS);
-    break;
-  case X86II::MRM_F8:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xF8, CurByte, OS);
-    break;
+  case X86II::MRM_C1: case X86II::MRM_C2:
+  case X86II::MRM_C3: case X86II::MRM_C4:
+  case X86II::MRM_C8: case X86II::MRM_C9:
+  case X86II::MRM_D0: case X86II::MRM_D1:
+  case X86II::MRM_D4: case X86II::MRM_D8:
+  case X86II::MRM_D9: case X86II::MRM_DA:
+  case X86II::MRM_DB: case X86II::MRM_DC:
+  case X86II::MRM_DD: case X86II::MRM_DE:
+  case X86II::MRM_DF: case X86II::MRM_E8:
+  case X86II::MRM_F0: case X86II::MRM_F8:
   case X86II::MRM_F9:
     EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xF9, CurByte, OS);
-    break;
-  case X86II::MRM_D0:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xD0, CurByte, OS);
-    break;
-  case X86II::MRM_D1:
-    EmitByte(BaseOpcode, CurByte, OS);
-    EmitByte(0xD1, CurByte, OS);
+
+    unsigned char MRM;
+    switch (TSFlags & X86II::FormMask) {
+    default: llvm_unreachable("Invalid Form");
+    case X86II::MRM_C1: MRM = 0xC1; break;
+    case X86II::MRM_C2: MRM = 0xC2; break;
+    case X86II::MRM_C3: MRM = 0xC3; break;
+    case X86II::MRM_C4: MRM = 0xC4; break;
+    case X86II::MRM_C8: MRM = 0xC8; break;
+    case X86II::MRM_C9: MRM = 0xC9; break;
+    case X86II::MRM_D0: MRM = 0xD0; break;
+    case X86II::MRM_D1: MRM = 0xD1; break;
+    case X86II::MRM_D4: MRM = 0xD4; break;
+    case X86II::MRM_D8: MRM = 0xD8; break;
+    case X86II::MRM_D9: MRM = 0xD9; break;
+    case X86II::MRM_DA: MRM = 0xDA; break;
+    case X86II::MRM_DB: MRM = 0xDB; break;
+    case X86II::MRM_DC: MRM = 0xDC; break;
+    case X86II::MRM_DD: MRM = 0xDD; break;
+    case X86II::MRM_DE: MRM = 0xDE; break;
+    case X86II::MRM_DF: MRM = 0xDF; break;
+    case X86II::MRM_E8: MRM = 0xE8; break;
+    case X86II::MRM_F0: MRM = 0xF0; break;
+    case X86II::MRM_F8: MRM = 0xF8; break;
+    case X86II::MRM_F9: MRM = 0xF9; break;
+    }
+    EmitByte(MRM, CurByte, OS);
     break;
   }
 
@@ -1035,14 +1152,26 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   // according to the right size for the instruction.
   if (CurOp != NumOps) {
     // The last source register of a 4 operand instruction in AVX is encoded
-    // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+    // in bits[7:4] of a immediate byte.
     if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
-      const MCOperand &MO = MI.getOperand(CurOp++);
+      const MCOperand &MO = MI.getOperand(HasMemOp4 ? MemOp4_I8IMMOperand
+                                                   : CurOp);
+      CurOp++;
       bool IsExtReg = X86II::isX86_64ExtendedReg(MO.getReg());
       unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
       RegNum |= GetX86RegNum(MO) << 4;
-      EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
-                    Fixups);
+      // If there is an additional 5th operand it must be an immediate, which
+      // is encoded in bits[3:0]
+      if(CurOp != NumOps) {
+        const MCOperand &MIMM = MI.getOperand(CurOp++);
+        if(MIMM.isImm()) {
+          unsigned Val = MIMM.getImm();
+          assert(Val < 16 && "Immediate operand value out of range");
+          RegNum |= Val;
+        }
+      }
+      EmitImmediate(MCOperand::CreateImm(RegNum), MI.getLoc(), 1, FK_Data_1,
+                    CurByte, OS, Fixups);
     } else {
       unsigned FixupKind;
       // FIXME: Is there a better way to know that we need a signed relocation?
@@ -1053,7 +1182,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
         FixupKind = X86::reloc_signed_4byte;
       else
         FixupKind = getImmFixupKind(TSFlags);
-      EmitImmediate(MI.getOperand(CurOp++),
+      EmitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
                     X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
                     CurByte, OS, Fixups);
     }
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index f98d5e331fef..348236316c89 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===//
+//===-- X86MCTargetDesc.cpp - X86 Target Descriptions ---------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -24,6 +24,7 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #define GET_REGINFO_MC_DESC
@@ -35,6 +36,10 @@
 #define GET_SUBTARGETINFO_MC_DESC
 #include "X86GenSubtargetInfo.inc"
 
+#if _MSC_VER
+#include <intrin.h>
+#endif
+
 using namespace llvm;
 
 
@@ -45,10 +50,6 @@ std::string X86_MC::ParseX86Triple(StringRef TT) {
     FS = "+64bit-mode";
   else
     FS = "-64bit-mode";
-  if (TheTriple.getOS() == Triple::NativeClient)
-    FS += ",+nacl-mode";
-  else
-    FS += ",-nacl-mode";
   return FS;
 }
 
@@ -76,6 +77,8 @@ bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
     *rECX = registers[2];
     *rEDX = registers[3];
     return false;
+  #else
+    return true;
   #endif
 #elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
   #if defined(__GNUC__)
@@ -102,9 +105,81 @@ bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
       mov   dword ptr [esi],edx
     }
     return false;
+  #else
+    return true;
   #endif
+#else
+  return true;
 #endif
+}
+
+/// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the
+/// 4 values in the specified arguments.  If we can't run cpuid on the host,
+/// return true.
+bool X86_MC::GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
+                               unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+  #if defined(__GNUC__)
+    // gcc desn't know cpuid would clobber ebx/rbx. Preseve it manually.
+    asm ("movq\t%%rbx, %%rsi\n\t"
+         "cpuid\n\t"
+         "xchgq\t%%rbx, %%rsi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value),
+            "c" (subleaf));
+    return false;
+  #elif defined(_MSC_VER)
+    // __cpuidex was added in MSVC++ 9.0 SP1
+    #if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729)
+      int registers[4];
+      __cpuidex(registers, value, subleaf);
+      *rEAX = registers[0];
+      *rEBX = registers[1];
+      *rECX = registers[2];
+      *rEDX = registers[3];
+      return false;
+    #else
+      return true;
+    #endif
+  #else
+    return true;
+  #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+  #if defined(__GNUC__)
+    asm ("movl\t%%ebx, %%esi\n\t"
+         "cpuid\n\t"
+         "xchgl\t%%ebx, %%esi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value),
+            "c" (subleaf));
+    return false;
+  #elif defined(_MSC_VER)
+    __asm {
+      mov   eax,value
+      mov   ecx,subleaf
+      cpuid
+      mov   esi,rEAX
+      mov   dword ptr [esi],eax
+      mov   esi,rEBX
+      mov   dword ptr [esi],ebx
+      mov   esi,rECX
+      mov   dword ptr [esi],ecx
+      mov   esi,rEDX
+      mov   dword ptr [esi],edx
+    }
+    return false;
+  #else
+    return true;
+  #endif
+#else
   return true;
+#endif
 }
 
 void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
@@ -261,7 +336,8 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
 
   std::string CPUName = CPU;
   if (CPUName.empty()) {
-#if defined (__x86_64__) || defined(__i386__)
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+    || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
     CPUName = sys::getHostCPUName();
 #else
     CPUName = "generic";
@@ -303,8 +379,10 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
       MAI = new X86_64MCAsmInfoDarwin(TheTriple);
     else
       MAI = new X86MCAsmInfoDarwin(TheTriple);
-  } else if (TheTriple.isOSWindows()) {
-    MAI = new X86MCAsmInfoCOFF(TheTriple);
+  } else if (TheTriple.getOS() == Triple::Win32) {
+    MAI = new X86MCAsmInfoMicrosoft(TheTriple);
+  } else if (TheTriple.getOS() == Triple::MinGW32 || TheTriple.getOS() == Triple::Cygwin) {
+    MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
   } else {
     MAI = new X86ELFMCAsmInfo(TheTriple);
   }
@@ -327,7 +405,8 @@ static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
 }
 
 static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                             CodeModel::Model CM) {
+                                             CodeModel::Model CM,
+                                             CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
 
   Triple T(TT);
@@ -371,7 +450,7 @@ static MCCodeGenInfo *createX86MCCodeGenInfo(StringRef TT, Reloc::Model RM,
     // 64-bit JIT places everything in the same buffer except external funcs.
     CM = is64Bit ? CodeModel::Large : CodeModel::Small;
 
-  X->InitMCCodeGenInfo(RM, CM);
+  X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
@@ -395,11 +474,13 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
 static MCInstPrinter *createX86MCInstPrinter(const Target &T,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI,
+                                             const MCInstrInfo &MII,
+                                             const MCRegisterInfo &MRI,
                                              const MCSubtargetInfo &STI) {
   if (SyntaxVariant == 0)
-    return new X86ATTInstPrinter(MAI);
+    return new X86ATTInstPrinter(MAI, MII, MRI);
   if (SyntaxVariant == 1)
-    return new X86IntelInstPrinter(MAI);
+    return new X86IntelInstPrinter(MAI, MII, MRI);
   return 0;
 }
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index c144c513de15..9896cbe53632 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -54,6 +54,11 @@ namespace X86_MC {
   /// the specified arguments.  If we can't run cpuid on the host, return true.
   bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
                        unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
+  /// GetCpuIDAndInfoEx - Execute the specified cpuid with subleaf and return
+  /// the 4 values in the specified arguments.  If we can't run cpuid on the
+  /// host, return true.
+  bool GetCpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX,
+                       unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
 
   void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
 
@@ -83,6 +88,12 @@ MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
 
+/// createX86ELFObjectWriter - Construct an X86 ELF object writer.
+MCObjectWriter *createX86ELFObjectWriter(raw_ostream &OS,
+                                         bool Is64Bit,
+                                         uint8_t OSABI);
+/// createX86WinCOFFObjectWriter - Construct an X86 Win COFF object writer.
+MCObjectWriter *createX86WinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
 } // End llvm namespace
 
 
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
new file mode 100644
index 000000000000..bc272efcc9ce
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -0,0 +1,65 @@
+//===-- X86WinCOFFObjectWriter.cpp - X86 Win COFF Writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace llvm {
+  class MCObjectWriter;
+}
+
+namespace {
+  class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+    const bool Is64Bit;
+
+  public:
+    X86WinCOFFObjectWriter(bool Is64Bit_);
+    ~X86WinCOFFObjectWriter();
+
+    virtual unsigned getRelocType(unsigned FixupKind) const;
+  };
+}
+
+X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_)
+  : MCWinCOFFObjectTargetWriter(Is64Bit_ ? COFF::IMAGE_FILE_MACHINE_AMD64 :
+                                COFF::IMAGE_FILE_MACHINE_I386),
+    Is64Bit(Is64Bit_) {}
+
+X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
+
+unsigned X86WinCOFFObjectWriter::getRelocType(unsigned FixupKind) const {
+  switch (FixupKind) {
+  case FK_PCRel_4:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+    return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32;
+  case FK_Data_4:
+  case X86::reloc_signed_4byte:
+    return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32;
+  case FK_Data_8:
+    if (Is64Bit)
+      return COFF::IMAGE_REL_AMD64_ADDR64;
+    llvm_unreachable("unsupported relocation type");
+  case FK_SecRel_4:
+    return Is64Bit ? COFF::IMAGE_REL_AMD64_SECREL : COFF::IMAGE_REL_I386_SECREL;
+  default:
+    llvm_unreachable("unsupported relocation type");
+  }
+}
+
+MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS,
+                                                   bool Is64Bit) {
+  MCWinCOFFObjectTargetWriter *MOTW = new X86WinCOFFObjectWriter(Is64Bit);
+  return createWinCOFFObjectWriter(MOTW, OS);
+}
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 7d901afae474..624e56fa0f64 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -923,15 +923,21 @@ The insertps's of $0 are pointless complex copies.
 
 //===---------------------------------------------------------------------===//
 
-If SSE4.1 is available we should inline rounding functions instead of emitting
-a libcall.
+[UNSAFE FP]
 
-floor: roundsd $0x01, %xmm, %xmm
-ceil:  roundsd $0x02, %xmm, %xmm
+void foo(double, double, double);
+void norm(double x, double y, double z) {
+  double scale = __builtin_sqrt(x*x + y*y + z*z);
+  foo(x/scale, y/scale, z/scale);
+}
 
-and likewise for the single precision versions.
+We currently generate an sqrtsd and 3 divsd instructions. This is bad, fp div is
+slow and not pipelined. In -ffast-math mode we could compute "1.0/scale" first
+and emit 3 mulsd in place of the divs. This can be done as a target-independent
+transform.
 
-Currently, SelectionDAGBuilder doesn't turn calls to these functions into the
-corresponding nodes and some targets (including X86) aren't ready for them.
+If we're dealing with floats instead of doubles we could even replace the sqrtss
+and inversion with an rsqrtss instruction, which computes 1/sqrt faster at the
+cost of reduced accuracy.
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index b40795506071..6a8a4fdf2520 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -56,7 +56,7 @@ cmovs, we should expand to a conditional branch like GCC produces.
 
 Some isel ideas:
 
-1. Dynamic programming based approach when compile time if not an
+1. Dynamic programming based approach when compile time is not an
    issue.
 2. Code duplication (addressing mode) during isel.
 3. Other ideas from "Register-Sensitive Selection, Duplication, and
@@ -2061,34 +2061,20 @@ The trick is to match "fetch_and_add(X, -C) == C".
 
 //===---------------------------------------------------------------------===//
 
-unsigned log2(unsigned x) {
-  return x > 1 ? 32-__builtin_clz(x-1) : 0;
+unsigned t(unsigned a, unsigned b) {
+  return a <= b ? 5 : -5;
 }
 
-generates (x86_64):
-	xorl	%eax, %eax
-	cmpl	$2, %edi
-	jb	LBB0_2
-## BB#1:
-	decl	%edi
-	movl	$63, %ecx
-	bsrl	%edi, %eax
-	cmovel	%ecx, %eax
-	xorl	$-32, %eax
-	addl	$33, %eax
-LBB0_2:
-	ret
-
-The cmov and the early test are redundant:
-	xorl	%eax, %eax
-	cmpl	$2, %edi
-	jb	LBB0_2
-## BB#1:
-	decl	%edi
-	bsrl	%edi, %eax
-	xorl	$-32, %eax
-	addl	$33, %eax
-LBB0_2:
-	ret
+We generate:
+	movl	$5, %ecx
+	cmpl	%esi, %edi
+	movl	$-5, %eax
+	cmovbel	%ecx, %eax
+
+GCC:
+	cmpl	%edi, %esi
+	sbbl	%eax, %eax
+	andl	$-10, %eax
+	addl	$5, %eax
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/TargetInfo/CMakeLists.txt b/lib/Target/X86/TargetInfo/CMakeLists.txt
index 4da00fa44f4d..b1d0b9f9f9bd 100644
--- a/lib/Target/X86/TargetInfo/CMakeLists.txt
+++ b/lib/Target/X86/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMX86Info
   X86TargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMX86Info
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMX86Info X86CommonTableGen)
diff --git a/lib/Target/X86/TargetInfo/LLVMBuild.txt b/lib/Target/X86/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..3c64a2255302
--- /dev/null
+++ b/lib/Target/X86/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86Info
+parent = X86
+required_libraries = MC Support Target
+add_to_library_groups = X86
diff --git a/lib/Target/X86/Utils/CMakeLists.txt b/lib/Target/X86/Utils/CMakeLists.txt
index caffd8b37816..2e72c344d99c 100644
--- a/lib/Target/X86/Utils/CMakeLists.txt
+++ b/lib/Target/X86/Utils/CMakeLists.txt
@@ -4,9 +4,4 @@ add_llvm_library(LLVMX86Utils
   X86ShuffleDecode.cpp
   )
 
-add_llvm_library_dependencies(LLVMX86Utils
-  LLVMCore
-  LLVMSupport
-  )
-
 add_dependencies(LLVMX86Utils X86CommonTableGen)
diff --git a/lib/Target/X86/Utils/LLVMBuild.txt b/lib/Target/X86/Utils/LLVMBuild.txt
new file mode 100644
index 000000000000..de0a30fa19c8
--- /dev/null
+++ b/lib/Target/X86/Utils/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/X86/Utils/LLVMBuild.txt ---------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = X86Utils
+parent = X86
+required_libraries = Core Support
+add_to_library_groups = X86
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index aeb3309d09aa..32c722acc437 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -20,7 +20,7 @@
 
 namespace llvm {
 
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   // Defaults the copying the dest value.
   ShuffleMask.push_back(0);
   ShuffleMask.push_back(1);
@@ -44,8 +44,7 @@ void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
 }
 
 // <3,1> or <6,7,2,3>
-void DecodeMOVHLPSMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
   for (unsigned i = NElts/2; i != NElts; ++i)
     ShuffleMask.push_back(NElts+i);
 
@@ -54,8 +53,7 @@ void DecodeMOVHLPSMask(unsigned NElts,
 }
 
 // <0,2> or <0,1,4,5>
-void DecodeMOVLHPSMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
   for (unsigned i = 0; i != NElts/2; ++i)
     ShuffleMask.push_back(i);
 
@@ -63,16 +61,26 @@ void DecodeMOVLHPSMask(unsigned NElts,
     ShuffleMask.push_back(NElts+i);
 }
 
-void DecodePSHUFMask(unsigned NElts, unsigned Imm,
-                     SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts; ++i) {
-    ShuffleMask.push_back(Imm % NElts);
-    Imm /= NElts;
+/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
+/// VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
+
+  unsigned NumLanes = VT.getSizeInBits() / 128;
+  unsigned NumLaneElts = NumElts / NumLanes;
+
+  int NewImm = Imm;
+  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+    for (unsigned i = 0; i != NumLaneElts; ++i) {
+      ShuffleMask.push_back(NewImm % NumLaneElts + l);
+      NewImm /= NumLaneElts;
+    }
+    if (NumLaneElts == 4) NewImm = Imm; // reload imm
   }
 }
 
-void DecodePSHUFHWMask(unsigned Imm,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   ShuffleMask.push_back(0);
   ShuffleMask.push_back(1);
   ShuffleMask.push_back(2);
@@ -83,8 +91,7 @@ void DecodePSHUFHWMask(unsigned Imm,
   }
 }
 
-void DecodePSHUFLWMask(unsigned Imm,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   for (unsigned i = 0; i != 4; ++i) {
     ShuffleMask.push_back((Imm & 3));
     Imm >>= 2;
@@ -95,76 +102,35 @@ void DecodePSHUFLWMask(unsigned Imm,
   ShuffleMask.push_back(7);
 }
 
-void DecodePUNPCKLBWMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLWDMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLDQMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLQDQMask(unsigned NElts,
-                          SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
-}
-
-void DecodePUNPCKLMask(EVT VT,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(VT, ShuffleMask);
-}
-
-void DecodePUNPCKHMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(i+NElts/2);
-    ShuffleMask.push_back(i+NElts+NElts/2);
-  }
-}
+/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
+/// the type of the vector allowing it to handle different datatypes and vector
+/// widths.
+void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
 
-void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
-                      SmallVectorImpl<unsigned> &ShuffleMask) {
-  // Part that reads from dest.
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(Imm % NElts);
-    Imm /= NElts;
-  }
-  // Part that reads from src.
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(Imm % NElts + NElts);
-    Imm /= NElts;
-  }
-}
+  unsigned NumLanes = VT.getSizeInBits() / 128;
+  unsigned NumLaneElts = NumElts / NumLanes;
 
-void DecodeUNPCKHPMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
-  for (unsigned i = 0; i != NElts/2; ++i) {
-    ShuffleMask.push_back(i+NElts/2);        // Reads from dest
-    ShuffleMask.push_back(i+NElts+NElts/2);  // Reads from src
+  int NewImm = Imm;
+  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+    // Part that reads from dest.
+    for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+      ShuffleMask.push_back(NewImm % NumLaneElts + l);
+      NewImm /= NumLaneElts;
+    }
+    // Part that reads from src.
+    for (unsigned i = 0; i != NumLaneElts/2; ++i) {
+      ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + l);
+      NewImm /= NumLaneElts;
+    }
+    if (NumLaneElts == 4) NewImm = Imm; // reload imm
   }
 }
 
-void DecodeUNPCKLPSMask(unsigned NElts,
-                        SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
-}
-
-void DecodeUNPCKLPDMask(unsigned NElts,
-                        SmallVectorImpl<unsigned> &ShuffleMask) {
-  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
-}
-
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc.  VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT,
-                       SmallVectorImpl<unsigned> &ShuffleMask) {
+/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// and punpckh*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
   unsigned NumElts = VT.getVectorNumElements();
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -173,55 +139,36 @@ void DecodeUNPCKLPMask(EVT VT,
   if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
   unsigned NumLaneElts = NumElts / NumLanes;
 
-  unsigned Start = 0;
-  unsigned End = NumLaneElts / 2;
-  for (unsigned s = 0; s < NumLanes; ++s) {
-    for (unsigned i = Start; i != End; ++i) {
-      ShuffleMask.push_back(i);                 // Reads from dest/src1
-      ShuffleMask.push_back(i+NumLaneElts);  // Reads from src/src2
+  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+    for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) {
+      ShuffleMask.push_back(i);          // Reads from dest/src1
+      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
     }
-    // Process the next 128 bits.
-    Start += NumLaneElts;
-    End += NumLaneElts;
   }
 }
 
-// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes and the mask of the first lane must
-// be the same of the second.
-void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm,
-                         SmallVectorImpl<unsigned> &ShuffleMask) {
-  unsigned NumLanes = (NumElts*32)/128;
-  unsigned LaneSize = NumElts/NumLanes;
-
-  for (unsigned l = 0; l != NumLanes; ++l) {
-    for (unsigned i = 0; i != LaneSize; ++i) {
-      unsigned Idx = (Imm >> (i*2)) & 0x3 ;
-      ShuffleMask.push_back(Idx+(l*LaneSize));
-    }
-  }
-}
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// and punpckl*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
 
-// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes but the mask of the first lane can
-// be the different of the second (not like VPERMILPS).
-void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm,
-                         SmallVectorImpl<unsigned> &ShuffleMask) {
-  unsigned NumLanes = (NumElts*64)/128;
-  unsigned LaneSize = NumElts/NumLanes;
-
-  for (unsigned l = 0; l < NumLanes; ++l) {
-    for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) {
-      unsigned Idx = (Imm >> i) & 0x1;
-      ShuffleMask.push_back(Idx+(l*LaneSize));
+  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+  // independently on 128-bit lanes.
+  unsigned NumLanes = VT.getSizeInBits() / 128;
+  if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
+  unsigned NumLaneElts = NumElts / NumLanes;
+
+  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+    for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) {
+      ShuffleMask.push_back(i);          // Reads from dest/src1
+      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
     }
   }
 }
 
-void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
-                          SmallVectorImpl<unsigned> &ShuffleMask) {
+void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
+                          SmallVectorImpl<int> &ShuffleMask) {
   unsigned HalfSize = VT.getVectorNumElements()/2;
   unsigned FstHalfBegin = (Imm & 0x3) * HalfSize;
   unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize;
@@ -232,12 +179,4 @@ void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
     ShuffleMask.push_back(i);
 }
 
-void DecodeVPERM2F128Mask(unsigned Imm,
-                          SmallVectorImpl<unsigned> &ShuffleMask) {
-  // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only
-  // has information about the instruction and not the types. So for
-  // instruction comments purpose, assume the 256-bit vector is v4i64.
-  return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask);
-}
-
 } // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 58193e6a4688..5b8c6ef62e29 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -24,83 +24,41 @@
 
 namespace llvm {
 enum {
-  SM_SentinelZero = ~0U
+  SM_SentinelZero = -1
 };
 
-void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 // <3,1> or <6,7,2,3>
-void DecodeMOVHLPSMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
 
 // <0,2> or <0,1,4,5>
-void DecodeMOVLHPSMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask);
 
-void DecodePSHUFMask(unsigned NElts, unsigned Imm,
-                     SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
-void DecodePSHUFHWMask(unsigned Imm,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFHWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
-void DecodePSHUFLWMask(unsigned Imm,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
-void DecodePUNPCKLBWMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask);
+/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
+/// the type of the vector allowing it to handle different datatypes and vector
+/// widths.
+void DecodeSHUFPMask(EVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
-void DecodePUNPCKLWDMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask);
+/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
+/// and punpckh*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKHMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
 
-void DecodePUNPCKLDQMask(unsigned NElts,
-                         SmallVectorImpl<unsigned> &ShuffleMask);
+/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// and punpckl*. VT indicates the type of the vector allowing it to handle
+/// different datatypes and vector widths.
+void DecodeUNPCKLMask(EVT VT, SmallVectorImpl<int> &ShuffleMask);
 
-void DecodePUNPCKLQDQMask(unsigned NElts,
-                          SmallVectorImpl<unsigned> &ShuffleMask);
 
-void DecodePUNPCKLMask(EVT VT,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodePUNPCKHMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
-                      SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeUNPCKHPMask(unsigned NElts,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeUNPCKLPSMask(unsigned NElts,
-                        SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeUNPCKLPDMask(unsigned NElts,
-                        SmallVectorImpl<unsigned> &ShuffleMask);
-
-/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc.  VT indicates the type of the vector allowing it to handle different
-/// datatypes and vector widths.
-void DecodeUNPCKLPMask(EVT VT,
-                       SmallVectorImpl<unsigned> &ShuffleMask);
-
-
-// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes and the mask of the first lane must
-// be the same of the second.
-void DecodeVPERMILPSMask(unsigned NElts, unsigned Imm,
-                        SmallVectorImpl<unsigned> &ShuffleMask);
-
-// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit
-// elements. For 256-bit vectors, it's considered as two 128 lanes, the
-// referenced elements can't cross lanes but the mask of the first lane can
-// be the different of the second (not like VPERMILPS).
-void DecodeVPERMILPDMask(unsigned NElts, unsigned Imm,
-                        SmallVectorImpl<unsigned> &ShuffleMask);
-
-void DecodeVPERM2F128Mask(unsigned Imm,
-                          SmallVectorImpl<unsigned> &ShuffleMask);
-void DecodeVPERM2F128Mask(EVT VT, unsigned Imm,
-                          SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeVPERM2X128Mask(EVT VT, unsigned Imm,
+                          SmallVectorImpl<int> &ShuffleMask);
 
 } // llvm namespace
 
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 81e94227fca6..ecc7b59c6fa0 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -24,8 +24,6 @@ namespace llvm {
 
 class FunctionPass;
 class JITCodeEmitter;
-class MachineCodeEmitter;
-class Target;
 class X86TargetMachine;
 
 /// createX86ISelDag - This pass converts a legalized DAG into a 
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 104b91fd3534..b6591d441969 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -1,4 +1,4 @@
-//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
+//===-- X86.td - Target definition file for the Intel X86 --*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,9 +23,6 @@ include "llvm/Target/Target.td"
 def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
                                   "64-bit mode (x86_64)">;
 
-def ModeNaCl  : SubtargetFeature<"nacl-mode", "InNaClMode", "true",
-                                 "Native Client mode">;
-
 //===----------------------------------------------------------------------===//
 // X86 Subtarget features.
 //===----------------------------------------------------------------------===//
@@ -58,7 +55,7 @@ def FeatureSSE41   : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
                                       [FeatureSSSE3]>;
 def FeatureSSE42   : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
                                       "Enable SSE 4.2 instructions",
-                                      [FeatureSSE41, FeaturePOPCNT]>;
+                                      [FeatureSSE41]>;
 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
                                       "Enable 3DNow! instructions",
                                       [FeatureMMX]>;
@@ -81,16 +78,24 @@ def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
                                         "Fast unaligned memory access">;
 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
                                       "Support SSE 4a instructions",
-                                      [FeaturePOPCNT]>;
+                                      [FeatureSSE3]>;
 
-def FeatureAVX     : SubtargetFeature<"avx", "HasAVX", "true",
-                                      "Enable AVX instructions">;
+def FeatureAVX     : SubtargetFeature<"avx", "X86SSELevel", "AVX",
+                                      "Enable AVX instructions",
+                                      [FeatureSSE42]>;
+def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",
+                                      "Enable AVX2 instructions",
+                                      [FeatureAVX]>;
 def FeatureCLMUL   : SubtargetFeature<"clmul", "HasCLMUL", "true",
                                "Enable carry-less multiplication instructions">;
 def FeatureFMA3    : SubtargetFeature<"fma3", "HasFMA3", "true",
-                                     "Enable three-operand fused multiple-add">;
+                                      "Enable three-operand fused multiple-add",
+                                      [FeatureAVX]>;
 def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
-                                      "Enable four-operand fused multiple-add">;
+                                      "Enable four-operand fused multiple-add",
+                                      [FeatureAVX]>;
+def FeatureXOP     : SubtargetFeature<"xop", "HasXOP", "true",
+                                      "Enable XOP instructions">;
 def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
                                           "HasVectorUAMem", "true",
                  "Allow unaligned memory operands on vector/SIMD instructions">;
@@ -102,17 +107,31 @@ def FeatureRDRAND  : SubtargetFeature<"rdrand", "HasRDRAND", "true",
                                       "Support RDRAND instruction">;
 def FeatureF16C    : SubtargetFeature<"f16c", "HasF16C", "true",
                        "Support 16-bit floating point conversion instructions">;
+def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true",
+                                       "Support FS/GS Base instructions">;
 def FeatureLZCNT   : SubtargetFeature<"lzcnt", "HasLZCNT", "true",
                                       "Support LZCNT instruction">;
 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
                                       "Support BMI instructions">;
+def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
+                                      "Support BMI2 instructions">;
+def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+                                     "Use LEA for adjusting the stack pointer">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
 //===----------------------------------------------------------------------===//
 
+include "X86Schedule.td"
+
+def ProcIntelAtom : SubtargetFeature<"atom", "X86ProcFamily", "IntelAtom",
+                    "Intel Atom processors">;
+
 class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+ : Processor<Name, GenericItineraries, Features>;
+
+class AtomProc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, AtomItineraries, Features>;
 
 def : Proc<"generic",         []>;
 def : Proc<"i386",            []>;
@@ -137,34 +156,38 @@ def : Proc<"core2",           [FeatureSSSE3, FeatureCMPXCHG16B,
                                FeatureSlowBTMem]>;
 def : Proc<"penryn",          [FeatureSSE41, FeatureCMPXCHG16B,
                                FeatureSlowBTMem]>;
-def : Proc<"atom",            [FeatureSSE3, FeatureCMPXCHG16B, FeatureMOVBE,
-                               FeatureSlowBTMem]>;
+def : AtomProc<"atom",        [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B,
+                               FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>;
 // "Arrandale" along with corei3 and corei5
 def : Proc<"corei7",          [FeatureSSE42, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem, FeatureAES]>;
+                               FeatureSlowBTMem, FeatureFastUAMem,
+                               FeaturePOPCNT, FeatureAES]>;
 def : Proc<"nehalem",         [FeatureSSE42,  FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem]>;
+                               FeatureSlowBTMem, FeatureFastUAMem,
+                               FeaturePOPCNT]>;
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
 def : Proc<"westmere",        [FeatureSSE42, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureFastUAMem, FeatureAES,
-                               FeatureCLMUL]>;
+                               FeatureSlowBTMem, FeatureFastUAMem,
+                               FeaturePOPCNT, FeatureAES, FeatureCLMUL]>;
 // Sandy Bridge
 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 // rather than a superset.
 // FIXME: Disabling AVX for now since it's not ready.
-def : Proc<"corei7-avx",      [FeatureSSE42, FeatureCMPXCHG16B,
+def : Proc<"corei7-avx",      [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT,
                                FeatureAES, FeatureCLMUL]>;
 // Ivy Bridge
-def : Proc<"core-avx-i",      [FeatureSSE42, FeatureCMPXCHG16B,
+def : Proc<"core-avx-i",      [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT,
                                FeatureAES, FeatureCLMUL,
-                               FeatureRDRAND, FeatureF16C]>;
+                               FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
 
 // Haswell
-def : Proc<"core-avx2",       [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES,
-                               FeatureCLMUL, FeatureRDRAND, FeatureF16C,
-                               FeatureFMA3, FeatureMOVBE, FeatureLZCNT,
-                               FeatureBMI]>;
+// FIXME: Disabling AVX/AVX2/FMA3 for now since it's not ready.
+def : Proc<"core-avx2",       [FeatureSSE42, FeatureCMPXCHG16B, FeaturePOPCNT,
+                               FeatureAES, FeatureCLMUL, FeatureRDRAND,
+                               FeatureF16C, FeatureFSGSBase,
+                               FeatureMOVBE, FeatureLZCNT, FeatureBMI,
+                               FeatureBMI2]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [Feature3DNow]>;
@@ -189,15 +212,21 @@ def : Proc<"opteron-sse3",    [FeatureSSE3,   Feature3DNowA, FeatureCMPXCHG16B,
 def : Proc<"athlon64-sse3",   [FeatureSSE3,   Feature3DNowA, FeatureCMPXCHG16B,
                                FeatureSlowBTMem]>;
 def : Proc<"amdfam10",        [FeatureSSE3,   FeatureSSE4A,
-                               Feature3DNowA, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : Proc<"barcelona",       [FeatureSSE3,   FeatureSSE4A,
-                               Feature3DNowA, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem]>;
-def : Proc<"istanbul",        [Feature3DNowA, FeatureCMPXCHG16B,
-                               FeatureSSE4A, Feature3DNowA]>;
-def : Proc<"shanghai",        [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A,
-                               Feature3DNowA]>;
+                               Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
+                               FeaturePOPCNT, FeatureSlowBTMem]>;
+// Bobcat
+def : Proc<"btver1",          [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
+                               FeatureLZCNT, FeaturePOPCNT]>;
+// FIXME: Disabling AVX/FMA4 for now since it's not ready.
+// Bulldozer
+def : Proc<"bdver1",          [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
+                               FeatureAES, FeatureCLMUL,
+                               FeatureXOP, FeatureLZCNT, FeaturePOPCNT]>;
+// Enhanced Bulldozer
+def : Proc<"bdver2",          [FeatureSSE42, FeatureSSE4A, FeatureCMPXCHG16B,
+                               FeatureAES, FeatureCLMUL,
+                               FeatureXOP, FeatureF16C, FeatureLZCNT,
+                               FeaturePOPCNT, FeatureBMI]>;
 
 def : Proc<"winchip-c6",      [FeatureMMX]>;
 def : Proc<"winchip2",        [Feature3DNow]>;
@@ -229,9 +258,11 @@ include "X86CallingConv.td"
 // Assembly Parser
 //===----------------------------------------------------------------------===//
 
-// Currently the X86 assembly parser only supports ATT syntax.
 def ATTAsmParser : AsmParser {
-  string AsmParserClassName = "ATTAsmParser";
+  string AsmParserClassName = "AsmParser";
+}
+
+def ATTAsmParserVariant : AsmParserVariant {
   int Variant = 0;
 
   // Discard comments in assembly strings.
@@ -241,6 +272,16 @@ def ATTAsmParser : AsmParser {
   string RegisterPrefix = "%";
 }
 
+def IntelAsmParserVariant : AsmParserVariant {
+  int Variant = 1;
+
+  // Discard comments in assembly strings.
+  string CommentDelimiter = ";";
+
+  // Recognize hard coded registers.
+  string RegisterPrefix = "";
+}
+
 //===----------------------------------------------------------------------===//
 // Assembly Printers
 //===----------------------------------------------------------------------===//
@@ -261,8 +302,7 @@ def IntelAsmWriter : AsmWriter {
 def X86 : Target {
   // Information about the instructions...
   let InstructionSet = X86InstrInfo;
-
   let AssemblyParsers = [ATTAsmParser];
-
+  let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
 }
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 4c3ff02826b0..7db7ccbedcfe 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,13 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
-#include "InstPrinter/X86IntelInstPrinter.h"
 #include "X86MCInstLower.h"
 #include "X86.h"
 #include "X86COFFMachineModuleInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86TargetMachine.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
@@ -199,6 +198,7 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
   case X86II::MO_TLVP_PIC_BASE:
     O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
     break;
+  case X86II::MO_SECREL:      O << "@SECREL";      break;
   }
 }
 
@@ -264,16 +264,40 @@ void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op,
                                raw_ostream &O) {
   unsigned char value = MI->getOperand(Op).getImm();
-  assert(value <= 7 && "Invalid ssecc argument!");
   switch (value) {
-  case 0: O << "eq"; break;
-  case 1: O << "lt"; break;
-  case 2: O << "le"; break;
-  case 3: O << "unord"; break;
-  case 4: O << "neq"; break;
-  case 5: O << "nlt"; break;
-  case 6: O << "nle"; break;
-  case 7: O << "ord"; break;
+  default: llvm_unreachable("Invalid ssecc argument!");
+  case    0: O << "eq"; break;
+  case    1: O << "lt"; break;
+  case    2: O << "le"; break;
+  case    3: O << "unord"; break;
+  case    4: O << "neq"; break;
+  case    5: O << "nlt"; break;
+  case    6: O << "nle"; break;
+  case    7: O << "ord"; break;
+  case    8: O << "eq_uq"; break;
+  case    9: O << "nge"; break;
+  case  0xa: O << "ngt"; break;
+  case  0xb: O << "false"; break;
+  case  0xc: O << "neq_oq"; break;
+  case  0xd: O << "ge"; break;
+  case  0xe: O << "gt"; break;
+  case  0xf: O << "true"; break;
+  case 0x10: O << "eq_os"; break;
+  case 0x11: O << "lt_oq"; break;
+  case 0x12: O << "le_oq"; break;
+  case 0x13: O << "unord_s"; break;
+  case 0x14: O << "neq_us"; break;
+  case 0x15: O << "nlt_uq"; break;
+  case 0x16: O << "nle_uq"; break;
+  case 0x17: O << "ord_s"; break;
+  case 0x18: O << "eq_us"; break;
+  case 0x19: O << "nge_uq"; break;
+  case 0x1a: O << "ngt_uq"; break;
+  case 0x1b: O << "false_os"; break;
+  case 0x1c: O << "neq_os"; break;
+  case 0x1d: O << "ge_oq"; break;
+  case 0x1e: O << "gt_oq"; break;
+  case 0x1f: O << "true_us"; break;
   }
 }
 
@@ -575,7 +599,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
   }
 
   if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
-      MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
+      MMI->usesVAFloatArgument()) {
     StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
     MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
     OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 3a50435d38ba..a6ed9ba0060d 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -17,7 +17,6 @@
 #include "X86.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86TargetMachine.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -25,11 +24,7 @@
 
 namespace llvm {
 
-class MachineJumpTableInfo;
-class MCContext;
-class MCInst;
 class MCStreamer;
-class MCSymbol;
 
 class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   const X86Subtarget *Subtarget;
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
index 4326814a7a96..e01ff41ed198 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===//
+//===-- X86COFFMachineModuleInfo.cpp - X86 COFF MMI Impl ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index 98ab2a66a17f..0cec95a57abc 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===//
+//===-- X86COFFMachineModuleInfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,9 +14,9 @@
 #ifndef X86COFF_MACHINEMODULEINFO_H
 #define X86COFF_MACHINEMODULEINFO_H
 
+#include "X86MachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/DenseSet.h"
-#include "X86MachineFunctionInfo.h"
 
 namespace llvm {
   class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 77b99056ae00..d148989e97f9 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -1,10 +1,10 @@
-//===- X86CallingConv.td - Calling Conventions X86 32/64 ---*- tablegen -*-===//
-// 
+//===-- X86CallingConv.td - Calling Conventions X86 32/64 --*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This describes the calling conventions for the X86-32 and X86-64
@@ -61,7 +61,7 @@ def RetCC_X86_32_C : CallingConv<[
   // weirdly; this is really the sse-regparm calling convention) in which
   // case they use XMM0, otherwise it is the same as the common X86 calling
   // conv.
-  CCIfInReg<CCIfSubtarget<"hasXMMInt()",
+  CCIfInReg<CCIfSubtarget<"hasSSE2()",
     CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
   CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
   CCDelegateTo<RetCC_X86Common>
@@ -73,8 +73,8 @@ def RetCC_X86_32_Fast : CallingConv<[
   // SSE2.
   // This can happen when a float, 2 x float, or 3 x float vector is split by
   // target lowering, and is returned in 1-3 sse regs.
-  CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
-  CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+  CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+  CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
 
   // For integers, ECX can be used as an extra return register
   CCIfType<[i8],  CCAssignToReg<[AL, DL, CL]>>,
@@ -150,18 +150,23 @@ def CC_X86_64_C : CallingConv<[
   // The first 8 MMX vector arguments are passed in XMM registers on Darwin.
   CCIfType<[x86mmx],
             CCIfSubtarget<"isTargetDarwin()",
-            CCIfSubtarget<"hasXMMInt()",
+            CCIfSubtarget<"hasSSE2()",
             CCPromoteToType<v2i64>>>>,
 
   // The first 8 FP/Vector arguments are passed in XMM registers.
   CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-            CCIfSubtarget<"hasXMM()",
+            CCIfSubtarget<"hasSSE1()",
             CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
 
-  // The first 8 256-bit vector arguments are passed in YMM registers.
-  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-            CCIfSubtarget<"hasAVX()",
-            CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+  // The first 8 256-bit vector arguments are passed in YMM registers, unless
+  // this is a vararg function.
+  // FIXME: This isn't precisely correct; the x86-64 ABI document says that
+  // fixed arguments to vararg functions are supposed to be passed in
+  // registers.  Actually modeling that would be a lot of work, though.
+  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+                          CCIfSubtarget<"hasAVX()",
+                          CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
+                                         YMM4, YMM5, YMM6, YMM7]>>>>,
 
   // Integer/FP values get stored in stack slots that are 8 bytes in size and
   // 8-byte aligned if there are no more registers to hold them.
@@ -193,6 +198,10 @@ def CC_X86_Win64_C : CallingConv<[
   // 128 bit vectors are passed by pointer
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
 
+
+  // 256 bit vectors are passed by pointer
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,
+
   // The first 4 MMX vector arguments are passed in GPRs.
   CCIfType<[x86mmx], CCBitConvertToType<i64>>,
 
@@ -233,7 +242,7 @@ def CC_X86_64_GHC : CallingConv<[
 
   // Pass in STG registers: F1, F2, F3, F4, D1, D2
   CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-            CCIfSubtarget<"hasXMM()",
+            CCIfSubtarget<"hasSSE1()",
             CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
 ]>;
 
@@ -251,7 +260,7 @@ def CC_X86_32_Common : CallingConv<[
   // The first 3 float or double arguments, if marked 'inreg' and if the call
   // is not a vararg call and if SSE2 is available, are passed in SSE registers.
   CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
-                CCIfSubtarget<"hasXMMInt()",
+                CCIfSubtarget<"hasSSE2()",
                 CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
 
   // The first 3 __m64 vector arguments are passed in mmx registers if the
@@ -322,8 +331,8 @@ def CC_X86_32_ThisCall : CallingConv<[
   // Promote i8/i16 arguments to i32.
   CCIfType<[i8, i16], CCPromoteToType<i32>>,
 
-  // The 'nest' parameter, if any, is passed in EAX.
-  CCIfNest<CCAssignToReg<[EAX]>>,
+  // Pass sret arguments indirectly through EAX
+  CCIfSRet<CCAssignToReg<[EAX]>>,
 
   // The first integer argument is passed in ECX
   CCIfType<[i32], CCAssignToReg<[ECX]>>,
@@ -350,7 +359,7 @@ def CC_X86_32_FastCC : CallingConv<[
   // The first 3 float or double arguments, if the call is not a vararg
   // call and if SSE2 is available, are passed in SSE registers.
   CCIfNotVarArg<CCIfType<[f32,f64],
-                CCIfSubtarget<"hasXMMInt()",
+                CCIfSubtarget<"hasSSE2()",
                 CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
 
   // Doubles get 8-byte slots that are 8-byte aligned.
@@ -399,3 +408,18 @@ def CC_X86 : CallingConv<[
   CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
   CCDelegateTo<CC_X86_32>
 ]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved Registers.
+//===----------------------------------------------------------------------===//
+
+def CSR_Ghc : CalleeSavedRegs<(add)>;
+
+def CSR_32 : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
+def CSR_64 : CalleeSavedRegs<(add RBX, R12, R13, R14, R15, RBP)>;
+
+def CSR_32EHRet : CalleeSavedRegs<(add EAX, EDX, CSR_32)>;
+def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
+
+def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
+                                     (sequence "XMM%u", 6, 15))>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f939510fa06b..ee3de9a3f6fe 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===//
+//===-- X86CodeEmitter.cpp - Convert X86 code to machine code -------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -664,15 +664,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
   case X86II::A7:  // 0F A7
     Need0FPrefix = true;
     break;
-  case X86II::TF: // F2 0F 38
-    MCE.emitByte(0xF2);
-    Need0FPrefix = true;
-    break;
   case X86II::REP: break; // already handled.
+  case X86II::T8XS: // F3 0F 38
   case X86II::XS:   // F3 0F
     MCE.emitByte(0xF3);
     Need0FPrefix = true;
     break;
+  case X86II::T8XD: // F2 0F 38
+  case X86II::TAXD: // F2 0F 3A
   case X86II::XD:   // F2 0F
     MCE.emitByte(0xF2);
     Need0FPrefix = true;
@@ -698,10 +697,12 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
     MCE.emitByte(0x0F);
 
   switch (Desc->TSFlags & X86II::Op0Mask) {
-  case X86II::TF:    // F2 0F 38
+  case X86II::T8XD:  // F2 0F 38
+  case X86II::T8XS:  // F3 0F 38
   case X86II::T8:    // 0F 38
     MCE.emitByte(0x38);
     break;
+  case X86II::TAXD:  // F2 0F 38
   case X86II::TA:    // 0F 3A
     MCE.emitByte(0x3A);
     break;
@@ -805,8 +806,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
     }
     
     assert(MO.isImm() && "Unknown RawFrm operand!");
-    if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 ||
-        Opcode == X86::WINCALL64pcrel32) {
+    if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
       // Fix up immediate operand for pc relative calls.
       intptr_t Imm = (intptr_t)MO.getImm();
       Imm = Imm - MCE.getCurrentPCValue() - 4;
@@ -1003,7 +1003,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
     break;
   }
 
-  if (!Desc->isVariadic() && CurOp != NumOps) {
+  if (!MI.isVariadic() && CurOp != NumOps) {
 #ifndef NDEBUG
     dbgs() << "Cannot encode all operands of: " << MI << "\n";
 #endif
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 4a72d154c335..c1a49a764614 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -60,7 +60,6 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
       llvm_unreachable("unknown x86 machine relocation type");
     }
   }
-  return 0;
 }
 
 long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
@@ -83,7 +82,6 @@ long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
       llvm_unreachable("unknown x86 relocation type");
     }
   }
-  return 0;
 }
 
 unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
@@ -107,7 +105,6 @@ unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
       llvm_unreachable("unknown x86 relocation type");
     }
   }
-  return 0;
 }
 
 bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
@@ -132,7 +129,6 @@ bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
       llvm_unreachable("unknown x86 relocation type");
     }
   }
-  return 0;
 }
 
 unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
@@ -146,8 +142,6 @@ long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
 
   if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
     return SymOffset - (RelOffset + 4);
-  else
-    assert(0 && "computeRelocation unknown for this relocation type");
 
-  return 0;
+  llvm_unreachable("computeRelocation unknown for this relocation type");
 }
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index f912b28eb47b..69752c5c5ddc 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -60,8 +60,8 @@ public:
   explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
     StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
-    X86ScalarSSEf64 = Subtarget->hasSSE2() || Subtarget->hasAVX();
-    X86ScalarSSEf32 = Subtarget->hasSSE1() || Subtarget->hasAVX();
+    X86ScalarSSEf64 = Subtarget->hasSSE2();
+    X86ScalarSSEf32 = Subtarget->hasSSE1();
   }
 
   virtual bool TargetSelectInstruction(const Instruction *I);
@@ -258,6 +258,18 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
     Opc = X86ScalarSSEf64 ?
           (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
     break;
+  case MVT::v4f32:
+    Opc = X86::MOVAPSmr;
+    break;
+  case MVT::v2f64:
+    Opc = X86::MOVAPDmr;
+    break;
+  case MVT::v4i32:
+  case MVT::v2i64:
+  case MVT::v8i16:
+  case MVT::v16i8:
+    Opc = X86::MOVDQAmr;
+    break;
   }
 
   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
@@ -671,7 +683,14 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
 /// X86SelectStore - Select and emit code to implement store instructions.
 bool X86FastISel::X86SelectStore(const Instruction *I) {
   // Atomic stores need special handling.
-  if (cast<StoreInst>(I)->isAtomic())
+  const StoreInst *S = cast<StoreInst>(I);
+
+  if (S->isAtomic())
+    return false;
+
+  unsigned SABIAlignment =
+    TD.getABITypeAlignment(S->getValueOperand()->getType());
+  if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment)
     return false;
 
   MVT VT;
@@ -709,7 +728,7 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
 
   // fastcc with -tailcallopt is intended to provide a guaranteed
   // tail call optimization. Fastisel doesn't know how to do that.
-  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
     return false;
 
   // Let SDISel handle vararg functions.
@@ -818,8 +837,8 @@ bool X86FastISel::X86SelectLoad(const Instruction *I)  {
 
 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
   bool HasAVX = Subtarget->hasAVX();
-  bool X86ScalarSSEf32 = HasAVX || Subtarget->hasSSE1();
-  bool X86ScalarSSEf64 = HasAVX || Subtarget->hasSSE2();
+  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
+  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
 
   switch (VT.getSimpleVT().SimpleTy) {
   default:       return 0;
@@ -1510,7 +1529,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
 
   // fastcc with -tailcallopt is intended to provide a guaranteed
   // tail call optimization. Fastisel doesn't know how to do that.
-  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
     return false;
 
   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
@@ -1524,7 +1543,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
 
   // Fast-isel doesn't know about callee-pop yet.
   if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
-                       GuaranteedTailCallOpt))
+                       TM.Options.GuaranteedTailCallOpt))
     return false;
 
   // Check whether the function can return without sret-demotion.
@@ -1557,10 +1576,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
   SmallVector<unsigned, 8> Args;
   SmallVector<MVT, 8> ArgVTs;
   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
-  Args.reserve(CS.arg_size());
-  ArgVals.reserve(CS.arg_size());
-  ArgVTs.reserve(CS.arg_size());
-  ArgFlags.reserve(CS.arg_size());
+  unsigned arg_size = CS.arg_size();
+  Args.reserve(arg_size);
+  ArgVals.reserve(arg_size);
+  ArgVTs.reserve(arg_size);
+  ArgFlags.reserve(arg_size);
   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
        i != e; ++i) {
     // If we're lowering a mem intrinsic instead of a regular call, skip the
@@ -1740,9 +1760,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
         // If this is a really simple value, emit this with the Value* version
         // of X86FastEmitStore.  If it isn't simple, we don't want to do this,
         // as it can cause us to reevaluate the argument.
-        X86FastEmitStore(ArgVT, ArgVal, AM);
+        if (!X86FastEmitStore(ArgVT, ArgVal, AM))
+          return false;
       } else {
-        X86FastEmitStore(ArgVT, Arg, AM);
+        if (!X86FastEmitStore(ArgVT, Arg, AM))
+          return false;
       }
     }
   }
@@ -1757,7 +1779,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
 
   if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
     // Count the number of XMM registers allocated.
-    static const unsigned XMMArgRegs[] = {
+    static const uint16_t XMMArgRegs[] = {
       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
     };
@@ -1771,9 +1793,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
   if (CalleeOp) {
     // Register-indirect call.
     unsigned CallOpc;
-    if (Subtarget->isTargetWin64())
-      CallOpc = X86::WINCALL64r;
-    else if (Subtarget->is64Bit())
+    if (Subtarget->is64Bit())
       CallOpc = X86::CALL64r;
     else
       CallOpc = X86::CALL32r;
@@ -1784,9 +1804,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
     // Direct call.
     assert(GV && "Not a direct call");
     unsigned CallOpc;
-    if (Subtarget->isTargetWin64())
-      CallOpc = X86::WINCALL64pcrel32;
-    else if (Subtarget->is64Bit())
+    if (Subtarget->is64Bit())
       CallOpc = X86::CALL64pcrel32;
     else
       CallOpc = X86::CALLpcrel32;
@@ -1831,10 +1849,15 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
     MIB.addReg(RegArgs[i]);
 
+  // Add a register mask with the call-preserved registers.
+  // Proper defs for return values will be added by setPhysRegsDeadExcept().
+  MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
+
   // Issue CALLSEQ_END
   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   unsigned NumBytesCallee = 0;
-  if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
+  if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() &&
+      CS.paramHasAttr(1, Attribute::StructRet))
     NumBytesCallee = 4;
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
     .addImm(NumBytes).addImm(NumBytesCallee);
@@ -2081,7 +2104,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
   if (!X86SelectAddress(C, AM))
     return 0;
   unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
-  TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+  const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
   unsigned ResultReg = createResultReg(RC);
   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                          TII.get(Opc), ResultReg), AM);
@@ -2100,7 +2123,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
     default: return false;
     case MVT::f32:
       if (X86ScalarSSEf32) {
-        Opc = Subtarget->hasAVX() ? X86::VFsFLD0SS : X86::FsFLD0SS;
+        Opc = X86::FsFLD0SS;
         RC  = X86::FR32RegisterClass;
       } else {
         Opc = X86::LD_Fp032;
@@ -2109,7 +2132,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
       break;
     case MVT::f64:
       if (X86ScalarSSEf64) {
-        Opc = Subtarget->hasAVX() ? X86::VFsFLD0SD : X86::FsFLD0SD;
+        Opc = X86::FsFLD0SD;
         RC  = X86::FR64RegisterClass;
       } else {
         Opc = X86::LD_Fp064;
@@ -2156,7 +2179,7 @@ bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
 
 
 namespace llvm {
-  llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
+  FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
     return new X86FastISel(funcInfo);
   }
 }
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index e3461c82c7a6..ed1707da13d8 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -26,8 +26,8 @@
 #define DEBUG_TYPE "x86-codegen"
 #include "X86.h"
 #include "X86InstrInfo.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -37,7 +37,6 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -219,7 +218,7 @@ namespace {
     /// getSTReg - Return the X86::ST(i) register which contains the specified
     /// FP<RegNo> register.
     unsigned getSTReg(unsigned RegNo) const {
-      return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+      return StackTop - 1 - getSlot(RegNo) + X86::ST0;
     }
 
     // pushReg - Push the specified FP<n> register onto the stack.
@@ -570,8 +569,8 @@ void FPS::finishBlockStack() {
 
 namespace {
   struct TableEntry {
-    unsigned from;
-    unsigned to;
+    uint16_t from;
+    uint16_t to;
     bool operator<(const TableEntry &TE) const { return from < TE.from; }
     friend bool operator<(const TableEntry &TE, unsigned V) {
       return TE.from < V;
@@ -1644,6 +1643,30 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     return;
   }
 
+  case X86::WIN_FTOL_32:
+  case X86::WIN_FTOL_64: {
+    // Push the operand into ST0.
+    MachineOperand &Op = MI->getOperand(0);
+    assert(Op.isUse() && Op.isReg() &&
+      Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
+    unsigned FPReg = getFPReg(Op);
+    if (Op.isKill())
+      moveToTop(FPReg, I);
+    else
+      duplicateToTop(FPReg, FPReg, I);
+
+    // Emit the call. This will pop the operand.
+    BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
+      .addExternalSymbol("_ftol2")
+      .addReg(X86::ST0, RegState::ImplicitKill)
+      .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+      .addReg(X86::EDX, RegState::Define | RegState::Implicit)
+      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+    --StackTop;
+
+    break;
+  }
+
   case X86::RET:
   case X86::RETI:
     // If RET has an FP register use operand, pass the first one in ST(0) and
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index d54f4ae2a2c8..000e3757cf76 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====//
+//===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -47,7 +47,7 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineModuleInfo &MMI = MF.getMMI();
   const TargetRegisterInfo *RI = TM.getRegisterInfo();
 
-  return (DisableFramePointerElim(MF) ||
+  return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
           RI->needsStackRealignment(MF) ||
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() ||
@@ -79,6 +79,10 @@ static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
   }
 }
 
+static unsigned getLEArOpcode(unsigned is64Bit) {
+  return is64Bit ? X86::LEA64r : X86::LEA32r;
+}
+
 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
 /// when it reaches the "return" instruction. We can then pop a stack object
 /// to this register without worry about clobbering it.
@@ -91,11 +95,11 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
   if (!F || MF->getMMI().callsEHReturn())
     return 0;
 
-  static const unsigned CallerSavedRegs32Bit[] = {
+  static const uint16_t CallerSavedRegs32Bit[] = {
     X86::EAX, X86::EDX, X86::ECX, 0
   };
 
-  static const unsigned CallerSavedRegs64Bit[] = {
+  static const uint16_t CallerSavedRegs64Bit[] = {
     X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
     X86::R8,  X86::R9,  X86::R10, X86::R11, 0
   };
@@ -113,7 +117,7 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
   case X86::TCRETURNmi64:
   case X86::EH_RETURN:
   case X86::EH_RETURN64: {
-    SmallSet<unsigned, 8> Uses;
+    SmallSet<uint16_t, 8> Uses;
     for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MBBI->getOperand(i);
       if (!MO.isReg() || MO.isDef())
@@ -121,11 +125,11 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
       unsigned Reg = MO.getReg();
       if (!Reg)
         continue;
-      for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
+      for (const uint16_t *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
         Uses.insert(*AsI);
     }
 
-    const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+    const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
     for (; *CS; ++CS)
       if (!Uses.count(*CS))
         return *CS;
@@ -141,13 +145,18 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
 static
 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
                   unsigned StackPtr, int64_t NumBytes,
-                  bool Is64Bit, const TargetInstrInfo &TII,
-                  const TargetRegisterInfo &TRI) {
+                  bool Is64Bit, bool UseLEA,
+                  const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
   bool isSub = NumBytes < 0;
   uint64_t Offset = isSub ? -NumBytes : NumBytes;
-  unsigned Opc = isSub ?
-    getSUBriOpcode(Is64Bit, Offset) :
-    getADDriOpcode(Is64Bit, Offset);
+  unsigned Opc;
+  if (UseLEA)
+    Opc = getLEArOpcode(Is64Bit);
+  else
+    Opc = isSub
+      ? getSUBriOpcode(Is64Bit, Offset)
+      : getADDriOpcode(Is64Bit, Offset);
+
   uint64_t Chunk = (1LL << 31) - 1;
   DebugLoc DL = MBB.findDebugLoc(MBBI);
 
@@ -171,13 +180,21 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
       }
     }
 
-    MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
-      .addReg(StackPtr)
-      .addImm(ThisVal);
+    MachineInstr *MI = NULL;
+
+    if (UseLEA) {
+      MI =  addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
+                          StackPtr, false, isSub ? -ThisVal : ThisVal);
+    } else {
+      MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+            .addReg(StackPtr)
+            .addImm(ThisVal);
+      MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+    }
+
     if (isSub)
       MI->setFlag(MachineInstr::FrameSetup);
-    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+
     Offset -= ThisVal;
   }
 }
@@ -191,7 +208,8 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
   MachineBasicBlock::iterator PI = prior(MBBI);
   unsigned Opc = PI->getOpcode();
   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
-       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
+       Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
       PI->getOperand(0).getReg() == StackPtr) {
     if (NumBytes)
       *NumBytes += PI->getOperand(2).getImm();
@@ -210,7 +228,7 @@ static
 void mergeSPUpdatesDown(MachineBasicBlock &MBB,
                         MachineBasicBlock::iterator &MBBI,
                         unsigned StackPtr, uint64_t *NumBytes = NULL) {
-  // FIXME: THIS ISN'T RUN!!!
+  // FIXME:  THIS ISN'T RUN!!!
   return;
 
   if (MBBI == MBB.end()) return;
@@ -237,8 +255,8 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
 }
 
 /// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
-/// stack adjustment is returned as a positive value for ADD and a negative for
+/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD/LEA and a negative for
 /// SUB.
 static int mergeSPUpdates(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator &MBBI,
@@ -254,7 +272,8 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
   int Offset = 0;
 
   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
-       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
+       Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
       PI->getOperand(0).getReg() == StackPtr){
     Offset += PI->getOperand(2).getImm();
     MBB.erase(PI);
@@ -351,20 +370,22 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
 /// register. The number corresponds to the enum lists in
 /// compact_unwind_encoding.h.
 static int getCompactUnwindRegNum(const unsigned *CURegs, unsigned Reg) {
-  int Idx = 1;
-  for (; *CURegs; ++CURegs, ++Idx)
+  for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
     if (*CURegs == Reg)
       return Idx;
 
   return -1;
 }
 
+// Number of registers that can be saved in a compact unwind encoding.
+#define CU_NUM_SAVED_REGS 6
+
 /// encodeCompactUnwindRegistersWithoutFrame - Create the permutation encoding
 /// used with frameless stacks. It is passed the number of registers to be saved
 /// and an array of the registers saved.
-static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
-                                                         unsigned RegCount,
-                                                         bool Is64Bit) {
+static uint32_t
+encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+                                         unsigned RegCount, bool Is64Bit) {
   // The saved registers are numbered from 1 to 6. In order to encode the order
   // in which they were saved, we re-number them according to their place in the
   // register order. The re-numbering is relative to the last re-numbered
@@ -385,14 +406,21 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
   };
   const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
 
-  uint32_t RenumRegs[6];
-  for (unsigned i = 6 - RegCount; i < 6; ++i) {
+  for (unsigned i = 0; i != CU_NUM_SAVED_REGS; ++i) {
     int CUReg = getCompactUnwindRegNum(CURegs, SavedRegs[i]);
     if (CUReg == -1) return ~0U;
     SavedRegs[i] = CUReg;
+  }
 
+  // Reverse the list.
+  std::swap(SavedRegs[0], SavedRegs[5]);
+  std::swap(SavedRegs[1], SavedRegs[4]);
+  std::swap(SavedRegs[2], SavedRegs[3]);
+
+  uint32_t RenumRegs[CU_NUM_SAVED_REGS];
+  for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i) {
     unsigned Countless = 0;
-    for (unsigned j = 6 - RegCount; j < i; ++j)
+    for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)
       if (SavedRegs[j] < SavedRegs[i])
         ++Countless;
 
@@ -435,8 +463,9 @@ static uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned SavedRegs[6],
 
 /// encodeCompactUnwindRegistersWithFrame - Return the registers encoded for a
 /// compact encoding with a frame pointer.
-static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
-                                                      bool Is64Bit) {
+static uint32_t
+encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[CU_NUM_SAVED_REGS],
+                                      bool Is64Bit) {
   static const unsigned CU32BitRegs[] = {
     X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
   };
@@ -446,18 +475,21 @@ static uint32_t encodeCompactUnwindRegistersWithFrame(unsigned SavedRegs[6],
   const unsigned *CURegs = (Is64Bit ? CU64BitRegs : CU32BitRegs);
 
   // Encode the registers in the order they were saved, 3-bits per register. The
-  // registers are numbered from 1 to 6.
+  // registers are numbered from 1 to CU_NUM_SAVED_REGS.
   uint32_t RegEnc = 0;
-  for (int I = 5; I >= 0; --I) {
+  for (int I = CU_NUM_SAVED_REGS - 1, Idx = 0; I != -1; --I) {
     unsigned Reg = SavedRegs[I];
-    if (Reg == 0) break;
+    if (Reg == 0) continue;
+
     int CURegNum = getCompactUnwindRegNum(CURegs, Reg);
-    if (CURegNum == -1)
-      return ~0U;
-    RegEnc |= (CURegNum & 0x7) << (5 - I);
+    if (CURegNum == -1) return ~0U;
+
+    // Encode the 3-bit register number in order, skipping over 3-bits for each
+    // register.
+    RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);
   }
 
-  assert((RegEnc & 0x7FFF) == RegEnc && "Invalid compact register encoding!");
+  assert((RegEnc & 0x3FFFF) == RegEnc && "Invalid compact register encoding!");
   return RegEnc;
 }
 
@@ -466,14 +498,11 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
   unsigned StackPtr = RegInfo->getStackRegister();
 
-  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
-
   bool Is64Bit = STI.is64Bit();
   bool HasFP = hasFP(MF);
 
-  unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
-  int SavedRegIdx = 6;
+  unsigned SavedRegs[CU_NUM_SAVED_REGS] = { 0, 0, 0, 0, 0, 0 };
+  unsigned SavedRegIdx = 0;
 
   unsigned OffsetSize = (Is64Bit ? 8 : 4);
 
@@ -481,14 +510,13 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
   unsigned PushInstrSize = 1;
   unsigned MoveInstr = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
   unsigned MoveInstrSize = (Is64Bit ? 3 : 2);
-  unsigned SubtractInstr = getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta);
   unsigned SubtractInstrIdx = (Is64Bit ? 3 : 2);
 
   unsigned StackDivide = (Is64Bit ? 8 : 4);
 
   unsigned InstrOffset = 0;
-  unsigned CFAOffset = 0;
   unsigned StackAdjust = 0;
+  unsigned StackSize = 0;
 
   MachineBasicBlock &MBB = MF.front(); // Prologue is in entry BB.
   bool ExpectEnd = false;
@@ -504,10 +532,10 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
 
     if (Opc == PushInstr) {
       // If there are too many saved registers, we cannot use compact encoding.
-      if (--SavedRegIdx < 0) return 0;
+      if (SavedRegIdx >= CU_NUM_SAVED_REGS) return 0;
 
-      SavedRegs[SavedRegIdx] = MI.getOperand(0).getReg();
-      CFAOffset += OffsetSize;
+      SavedRegs[SavedRegIdx++] = MI.getOperand(0).getReg();
+      StackAdjust += OffsetSize;
       InstrOffset += PushInstrSize;
     } else if (Opc == MoveInstr) {
       unsigned SrcReg = MI.getOperand(1).getReg();
@@ -516,12 +544,14 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
       if (DstReg != FramePtr || SrcReg != StackPtr)
         return 0;
 
-      CFAOffset = 0;
+      StackAdjust = 0;
       memset(SavedRegs, 0, sizeof(SavedRegs));
+      SavedRegIdx = 0;
       InstrOffset += MoveInstrSize;
-    } else if (Opc == SubtractInstr) {
-      if (StackAdjust)
-        // We all ready have a stack pointer adjustment.
+    } else if (Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) {
+      if (StackSize)
+        // We already have a stack size.
         return 0;
 
       if (!MI.getOperand(0).isReg() ||
@@ -532,7 +562,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
         //   %RSP<def> = SUB64ri8 %RSP, 48
         return 0;
 
-      StackAdjust = MI.getOperand(2).getImm() / StackDivide;
+      StackSize = MI.getOperand(2).getImm() / StackDivide;
       SubtractInstrIdx += InstrOffset;
       ExpectEnd = true;
     }
@@ -540,28 +570,30 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
 
   // Encode that we are using EBP/RBP as the frame pointer.
   uint32_t CompactUnwindEncoding = 0;
-  CFAOffset /= StackDivide;
+  StackAdjust /= StackDivide;
   if (HasFP) {
-    if ((CFAOffset & 0xFF) != CFAOffset)
+    if ((StackAdjust & 0xFF) != StackAdjust)
       // Offset was too big for compact encoding.
       return 0;
 
     // Get the encoding of the saved registers when we have a frame pointer.
     uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame(SavedRegs, Is64Bit);
-    if (RegEnc == ~0U)
-      return 0;
+    if (RegEnc == ~0U) return 0;
 
     CompactUnwindEncoding |= 0x01000000;
-    CompactUnwindEncoding |= (CFAOffset & 0xFF) << 16;
+    CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;
     CompactUnwindEncoding |= RegEnc & 0x7FFF;
   } else {
-    unsigned FullOffset = CFAOffset + StackAdjust;
-    if ((FullOffset & 0xFF) == FullOffset) {
-      // Frameless stack.
+    ++StackAdjust;
+    uint32_t TotalStackSize = StackAdjust + StackSize;
+    if ((TotalStackSize & 0xFF) == TotalStackSize) {
+      // Frameless stack with a small stack size.
       CompactUnwindEncoding |= 0x02000000;
-      CompactUnwindEncoding |= (FullOffset & 0xFF) << 16;
+
+      // Encode the stack size.
+      CompactUnwindEncoding |= (TotalStackSize & 0xFF) << 16;
     } else {
-      if ((CFAOffset & 0x7) != CFAOffset)
+      if ((StackAdjust & 0x7) != StackAdjust)
         // The extra stack adjustments are too big for us to handle.
         return 0;
 
@@ -572,16 +604,21 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
       // instruction.
       CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;
 
-      // Encode any extra stack stack changes (done via push instructions).
-      CompactUnwindEncoding |= (CFAOffset & 0x7) << 13;
+      // Encode any extra stack stack adjustments (done via push instructions).
+      CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;
     }
 
+    // Encode the number of registers saved.
+    CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;
+
     // Get the encoding of the saved registers when we don't have a frame
     // pointer.
-    uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegs,
-                                                               6 - SavedRegIdx,
-                                                               Is64Bit);
+    uint32_t RegEnc =
+      encodeCompactUnwindRegistersWithoutFrame(SavedRegs, SavedRegIdx,
+                                               Is64Bit);
     if (RegEnc == ~0U) return 0;
+
+    // Encode the register encoding.
     CompactUnwindEncoding |= RegEnc & 0x3FF;
   }
 
@@ -608,6 +645,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   bool HasFP = hasFP(MF);
   bool Is64Bit = STI.is64Bit();
   bool IsWin64 = STI.isTargetWin64();
+  bool UseLEA = STI.useLeaForSP();
   unsigned StackAlign = getStackAlignment();
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -637,10 +675,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   // stack pointer (we fit in the Red Zone).
   if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
       !RegInfo->needsStackRealignment(MF) &&
-      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
-      !MFI->adjustsStack() &&                      // No calls.
-      !IsWin64 &&                                  // Win64 has no Red Zone
-      !EnableSegmentedStacks) {                    // Regular stack
+      !MFI->hasVarSizedObjects() &&                     // No dynamic alloca.
+      !MFI->adjustsStack() &&                           // No calls.
+      !IsWin64 &&                                       // Win64 has no Red Zone
+      !MF.getTarget().Options.EnableSegmentedStacks) {  // Regular stack
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
     if (HasFP) MinSize += SlotSize;
     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -861,7 +899,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
     // FIXME: %rax preserves the offset and should be available.
     if (isSPUpdateNeeded)
       emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
-                   TII, *RegInfo);
+                   UseLEA, TII, *RegInfo);
 
     if (isEAXAlive) {
         // Restore EAX
@@ -873,7 +911,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
     }
   } else if (NumBytes)
     emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
-                 TII, *RegInfo);
+                 UseLEA, TII, *RegInfo);
 
   if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
     // Mark end of stack pointer adjustment.
@@ -917,6 +955,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   unsigned RetOpcode = MBBI->getOpcode();
   DebugLoc DL = MBBI->getDebugLoc();
   bool Is64Bit = STI.is64Bit();
+  bool UseLEA = STI.useLeaForSP();
   unsigned StackAlign = getStackAlignment();
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
@@ -977,7 +1016,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     unsigned Opc = PI->getOpcode();
 
     if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
-        !PI->getDesc().isTerminator())
+        !PI->isTerminator())
       break;
 
     --MBBI;
@@ -997,7 +1036,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     // We cannot use LEA here, because stack pointer was realigned. We need to
     // deallocate local frame back.
     if (CSSize) {
-      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,
+                   *RegInfo);
       MBBI = prior(LastCSPop);
     }
 
@@ -1018,7 +1058,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     }
   } else if (NumBytes) {
     // Adjust stack pointer back: ESP += numbytes.
-    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII, *RegInfo);
   }
 
   // We're returning from function via eh_return.
@@ -1053,7 +1093,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     if (Offset) {
       // Check for possible merge with preceding ADD instruction.
       Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
-      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, UseLEA, TII, *RegInfo);
     }
 
     // Jump to label or value in register.
@@ -1097,7 +1137,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Check for possible merge with preceding ADD instruction.
     delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
-    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, UseLEA, TII, *RegInfo);
   }
 }
 
@@ -1280,31 +1320,35 @@ HasNestArgument(const MachineFunction *MF) {
   return false;
 }
 
+
+/// GetScratchRegister - Get a register for performing work in the segmented
+/// stack prologue. Depending on platform and the properties of the function
+/// either one or two registers will be needed. Set primary to true for
+/// the first register, false for the second.
 static unsigned
-GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
-  if (Is64Bit) {
-    return X86::R11;
-  } else {
-    CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
-    bool IsNested = HasNestArgument(&MF);
-
-    if (CallingConvention == CallingConv::X86_FastCall) {
-      if (IsNested) {
-        report_fatal_error("Segmented stacks does not support fastcall with "
-                           "nested function.");
-        return -1;
-      } else {
-        return X86::EAX;
-      }
-    } else {
-      if (IsNested)
-        return X86::EDX;
-      else
-        return X86::ECX;
-    }
+GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+  if (Is64Bit)
+    return Primary ? X86::R11 : X86::R12;
+
+  CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
+  bool IsNested = HasNestArgument(&MF);
+
+  if (CallingConvention == CallingConv::X86_FastCall ||
+      CallingConvention == CallingConv::Fast) {
+    if (IsNested)
+      report_fatal_error("Segmented stacks does not support fastcall with "
+                         "nested function.");
+    return Primary ? X86::EAX : X86::ECX;
   }
+  if (IsNested)
+    return Primary ? X86::EDX : X86::EAX;
+  return Primary ? X86::ECX : X86::EAX;
 }
 
+// The stack limit in the TCB is set to this many bytes above the actual stack
+// limit.
+static const uint64_t kSplitStackAvailable = 256;
+
 void
 X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   MachineBasicBlock &prologueMBB = MF.front();
@@ -1316,14 +1360,15 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   DebugLoc DL;
   const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
 
-  unsigned ScratchReg = GetScratchRegister(Is64Bit, MF);
+  unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
   assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
          "Scratch register is live-in");
 
   if (MF.getFunction()->isVarArg())
     report_fatal_error("Segmented stacks do not support vararg functions.");
-  if (!ST->isTargetLinux())
-    report_fatal_error("Segmented stacks supported only on linux.");
+  if (!ST->isTargetLinux() && !ST->isTargetDarwin() &&
+      !ST->isTargetWin32() && !ST->isTargetFreeBSD())
+    report_fatal_error("Segmented stacks not supported on this platform.");
 
   MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
   MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
@@ -1336,26 +1381,16 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
 
   // The MOV R10, RAX needs to be in a different block, since the RET we emit in
   // allocMBB needs to be last (terminating) instruction.
-  MachineBasicBlock *restoreR10MBB = NULL;
-  if (IsNested)
-    restoreR10MBB = MF.CreateMachineBasicBlock();
 
   for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
          e = prologueMBB.livein_end(); i != e; i++) {
     allocMBB->addLiveIn(*i);
     checkMBB->addLiveIn(*i);
-
-    if (IsNested)
-      restoreR10MBB->addLiveIn(*i);
   }
 
-  if (IsNested) {
+  if (IsNested)
     allocMBB->addLiveIn(X86::R10);
-    restoreR10MBB->addLiveIn(X86::RAX);
-  }
 
-  if (IsNested)
-    MF.push_front(restoreR10MBB);
   MF.push_front(allocMBB);
   MF.push_front(checkMBB);
 
@@ -1364,28 +1399,99 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   // prologue.
   StackSize = MFI->getStackSize();
 
+  // When the frame size is less than 256 we just compare the stack
+  // boundary directly to the value of the stack pointer, per gcc.
+  bool CompareStackPointer = StackSize < kSplitStackAvailable;
+
   // Read the limit off the current stacklet off the stack_guard location.
   if (Is64Bit) {
-    TlsReg = X86::FS;
-    TlsOffset = 0x70;
+    if (ST->isTargetLinux()) {
+      TlsReg = X86::FS;
+      TlsOffset = 0x70;
+    } else if (ST->isTargetDarwin()) {
+      TlsReg = X86::GS;
+      TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
+    } else if (ST->isTargetFreeBSD()) {
+      TlsReg = X86::FS;
+      TlsOffset = 0x18;
+    } else {
+      report_fatal_error("Segmented stacks not supported on this platform.");
+    }
+
+    if (CompareStackPointer)
+      ScratchReg = X86::RSP;
+    else
+      BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+        .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
 
-    BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
-      .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
     BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
-      .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+      .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
   } else {
-    TlsReg = X86::GS;
-    TlsOffset = 0x30;
+    if (ST->isTargetLinux()) {
+      TlsReg = X86::GS;
+      TlsOffset = 0x30;
+    } else if (ST->isTargetDarwin()) {
+      TlsReg = X86::GS;
+      TlsOffset = 0x48 + 90*4;
+    } else if (ST->isTargetWin32()) {
+      TlsReg = X86::FS;
+      TlsOffset = 0x14; // pvArbitrary, reserved for application use
+    } else if (ST->isTargetFreeBSD()) {
+      report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
+    } else {
+      report_fatal_error("Segmented stacks not supported on this platform.");
+    }
+
+    if (CompareStackPointer)
+      ScratchReg = X86::ESP;
+    else
+      BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
+        .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
+
+    if (ST->isTargetLinux() || ST->isTargetWin32()) {
+      BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
+        .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+    } else if (ST->isTargetDarwin()) {
+
+      // TlsOffset doesn't fit into a mod r/m byte so we need an extra register
+      unsigned ScratchReg2;
+      bool SaveScratch2;
+      if (CompareStackPointer) {
+        // The primary scratch register is available for holding the TLS offset
+        ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
+        SaveScratch2 = false;
+      } else {
+        // Need to use a second register to hold the TLS offset
+        ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
+
+        // Unfortunately, with fastcc the second scratch register may hold an arg
+        SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
+      }
 
-    BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
-      .addImm(0).addReg(0).addImm(-StackSize).addReg(0);
-    BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
-      .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+      // If Scratch2 is live-in then it needs to be saved
+      assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
+             "Scratch register is live-in and not saved");
+
+      if (SaveScratch2)
+        BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
+          .addReg(ScratchReg2, RegState::Kill);
+
+      BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
+        .addImm(TlsOffset);
+      BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
+        .addReg(ScratchReg)
+        .addReg(ScratchReg2).addImm(1).addReg(0)
+        .addImm(0)
+        .addReg(TlsReg);
+
+      if (SaveScratch2)
+        BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
+    }
   }
 
   // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
   // It jumps to normal execution of the function body.
-  BuildMI(checkMBB, DL, TII.get(X86::JG_4)).addMBB(&prologueMBB);
+  BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB);
 
   // On 32 bit we first push the arguments size and then the frame size. On 64
   // bit, we pass the stack frame size in r10 and the argument size in r11.
@@ -1403,9 +1509,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
     MF.getRegInfo().setPhysRegUsed(X86::R10);
     MF.getRegInfo().setPhysRegUsed(X86::R11);
   } else {
-    // Since we'll call __morestack, stack alignment needs to be preserved.
-    BuildMI(allocMBB, DL, TII.get(X86::SUB32ri), X86::ESP).addReg(X86::ESP)
-      .addImm(8);
     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
       .addImm(X86FI->getArgumentStackSize());
     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
@@ -1420,23 +1523,12 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
     BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
       .addExternalSymbol("__morestack");
 
-  // __morestack only seems to remove 8 bytes off the stack. Add back the
-  // additional 8 bytes we added before pushing the arguments.
-  if (!Is64Bit)
-    BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP)
-      .addImm(8);
-  BuildMI(allocMBB, DL, TII.get(X86::RET));
-
   if (IsNested)
-    BuildMI(restoreR10MBB, DL, TII.get(X86::MOV64rr), X86::R10)
-      .addReg(X86::RAX);
+    BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
+  else
+    BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
 
-  if (IsNested) {
-    allocMBB->addSuccessor(restoreR10MBB);
-    restoreR10MBB->addSuccessor(&prologueMBB);
-  } else {
-    allocMBB->addSuccessor(&prologueMBB);
-  }
+  allocMBB->addSuccessor(&prologueMBB);
 
   checkMBB->addSuccessor(allocMBB);
   checkMBB->addSuccessor(&prologueMBB);
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 6f490640b4ed..d55a49763a4d 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -1,4 +1,4 @@
-//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===//
+//===-- X86TargetFrameLowering.h - Define frame lowering for X86 -*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 02b0ff26032b..8e2b1d6b5dd2 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -21,7 +21,6 @@
 #include "X86TargetMachine.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Type.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -32,11 +31,11 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
@@ -540,7 +539,7 @@ void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
   const TargetInstrInfo *TII = TM.getInstrInfo();
   if (Subtarget->isTargetCygMing()) {
     unsigned CallOp =
-      Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
+      Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
     BuildMI(BB, DebugLoc(),
             TII->get(CallOp)).addExternalSymbol("__main");
   }
@@ -621,14 +620,14 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
 
   // Handle X86-64 rip-relative addresses.  We check this before checking direct
   // folding because RIP is preferable to non-RIP accesses.
-  if (Subtarget->is64Bit() &&
+  if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
       // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
       // they cannot be folded into immediate fields.
       // FIXME: This can be improved for kernel and other models?
-      (M == CodeModel::Small || M == CodeModel::Kernel) &&
-      // Base and index reg must be 0 in order to use %rip as base and lowering
-      // must allow RIP.
-      !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+      (M == CodeModel::Small || M == CodeModel::Kernel)) {
+    // Base and index reg must be 0 in order to use %rip as base.
+    if (AM.hasBaseOrIndexReg())
+      return true;
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
       X86ISelAddressMode Backup = AM;
       AM.GV = G->getGlobal();
@@ -663,11 +662,12 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
   }
 
   // Handle the case when globals fit in our immediate field: This is true for
-  // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
-  // mode, this results in a non-RIP-relative computation.
+  // X86-32 always and X86-64 when in -mcmodel=small mode.  In 64-bit
+  // mode, this only applies to a non-RIP-relative computation.
   if (!Subtarget->is64Bit() ||
-      ((M == CodeModel::Small || M == CodeModel::Kernel) &&
-       TM.getRelocationModel() == Reloc::Static)) {
+      M == CodeModel::Small || M == CodeModel::Kernel) {
+    assert(N.getOpcode() != X86ISD::WrapperRIP &&
+           "RIP-relative addressing already handled");
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
       AM.GV = G->getGlobal();
       AM.Disp += G->getOffset();
@@ -725,6 +725,213 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
   return false;
 }
 
+// Insert a node into the DAG at least before the Pos node's position. This
+// will reposition the node as needed, and will assign it a node ID that is <=
+// the Pos node's ID. Note that this does *not* preserve the uniqueness of node
+// IDs! The selection DAG must no longer depend on their uniqueness when this
+// is used.
+static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
+  if (N.getNode()->getNodeId() == -1 ||
+      N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
+    DAG.RepositionNode(Pos.getNode(), N.getNode());
+    N.getNode()->setNodeId(Pos.getNode()->getNodeId());
+  }
+}
+
+// Transform "(X >> (8-C1)) & C2" to "(X >> 8) & 0xff)" if safe. This
+// allows us to convert the shift and and into an h-register extract and
+// a scaled index. Returns false if the simplification is performed.
+static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
+                                      uint64_t Mask,
+                                      SDValue Shift, SDValue X,
+                                      X86ISelAddressMode &AM) {
+  if (Shift.getOpcode() != ISD::SRL ||
+      !isa<ConstantSDNode>(Shift.getOperand(1)) ||
+      !Shift.hasOneUse())
+    return true;
+
+  int ScaleLog = 8 - Shift.getConstantOperandVal(1);
+  if (ScaleLog <= 0 || ScaleLog >= 4 ||
+      Mask != (0xffu << ScaleLog))
+    return true;
+
+  EVT VT = N.getValueType();
+  DebugLoc DL = N.getDebugLoc();
+  SDValue Eight = DAG.getConstant(8, MVT::i8);
+  SDValue NewMask = DAG.getConstant(0xff, VT);
+  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
+  SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
+  SDValue ShlCount = DAG.getConstant(ScaleLog, MVT::i8);
+  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
+
+  // Insert the new nodes into the topological ordering. We must do this in
+  // a valid topological ordering as nothing is going to go back and re-sort
+  // these nodes. We continually insert before 'N' in sequence as this is
+  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+  // hierarchy left to express.
+  InsertDAGNode(DAG, N, Eight);
+  InsertDAGNode(DAG, N, Srl);
+  InsertDAGNode(DAG, N, NewMask);
+  InsertDAGNode(DAG, N, And);
+  InsertDAGNode(DAG, N, ShlCount);
+  InsertDAGNode(DAG, N, Shl);
+  DAG.ReplaceAllUsesWith(N, Shl);
+  AM.IndexReg = And;
+  AM.Scale = (1 << ScaleLog);
+  return false;
+}
+
+// Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
+// allows us to fold the shift into this addressing mode. Returns false if the
+// transform succeeded.
+static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
+                                        uint64_t Mask,
+                                        SDValue Shift, SDValue X,
+                                        X86ISelAddressMode &AM) {
+  if (Shift.getOpcode() != ISD::SHL ||
+      !isa<ConstantSDNode>(Shift.getOperand(1)))
+    return true;
+
+  // Not likely to be profitable if either the AND or SHIFT node has more
+  // than one use (unless all uses are for address computation). Besides,
+  // isel mechanism requires their node ids to be reused.
+  if (!N.hasOneUse() || !Shift.hasOneUse())
+    return true;
+
+  // Verify that the shift amount is something we can fold.
+  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+  if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
+    return true;
+
+  EVT VT = N.getValueType();
+  DebugLoc DL = N.getDebugLoc();
+  SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, VT);
+  SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
+  SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
+
+  // Insert the new nodes into the topological ordering. We must do this in
+  // a valid topological ordering as nothing is going to go back and re-sort
+  // these nodes. We continually insert before 'N' in sequence as this is
+  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+  // hierarchy left to express.
+  InsertDAGNode(DAG, N, NewMask);
+  InsertDAGNode(DAG, N, NewAnd);
+  InsertDAGNode(DAG, N, NewShift);
+  DAG.ReplaceAllUsesWith(N, NewShift);
+
+  AM.Scale = 1 << ShiftAmt;
+  AM.IndexReg = NewAnd;
+  return false;
+}
+
+// Implement some heroics to detect shifts of masked values where the mask can
+// be replaced by extending the shift and undoing that in the addressing mode
+// scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
+// (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
+// the addressing mode. This results in code such as:
+//
+//   int f(short *y, int *lookup_table) {
+//     ...
+//     return *y + lookup_table[*y >> 11];
+//   }
+//
+// Turning into:
+//   movzwl (%rdi), %eax
+//   movl %eax, %ecx
+//   shrl $11, %ecx
+//   addl (%rsi,%rcx,4), %eax
+//
+// Instead of:
+//   movzwl (%rdi), %eax
+//   movl %eax, %ecx
+//   shrl $9, %ecx
+//   andl $124, %rcx
+//   addl (%rsi,%rcx), %eax
+//
+// Note that this function assumes the mask is provided as a mask *after* the
+// value is shifted. The input chain may or may not match that, but computing
+// such a mask is trivial.
+static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
+                                    uint64_t Mask,
+                                    SDValue Shift, SDValue X,
+                                    X86ISelAddressMode &AM) {
+  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
+      !isa<ConstantSDNode>(Shift.getOperand(1)))
+    return true;
+
+  unsigned ShiftAmt = Shift.getConstantOperandVal(1);
+  unsigned MaskLZ = CountLeadingZeros_64(Mask);
+  unsigned MaskTZ = CountTrailingZeros_64(Mask);
+
+  // The amount of shift we're trying to fit into the addressing mode is taken
+  // from the trailing zeros of the mask.
+  unsigned AMShiftAmt = MaskTZ;
+
+  // There is nothing we can do here unless the mask is removing some bits.
+  // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
+  if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
+
+  // We also need to ensure that mask is a continuous run of bits.
+  if (CountTrailingOnes_64(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
+
+  // Scale the leading zero count down based on the actual size of the value.
+  // Also scale it down based on the size of the shift.
+  MaskLZ -= (64 - X.getValueSizeInBits()) + ShiftAmt;
+
+  // The final check is to ensure that any masked out high bits of X are
+  // already known to be zero. Otherwise, the mask has a semantic impact
+  // other than masking out a couple of low bits. Unfortunately, because of
+  // the mask, zero extensions will be removed from operands in some cases.
+  // This code works extra hard to look through extensions because we can
+  // replace them with zero extensions cheaply if necessary.
+  bool ReplacingAnyExtend = false;
+  if (X.getOpcode() == ISD::ANY_EXTEND) {
+    unsigned ExtendBits =
+      X.getValueSizeInBits() - X.getOperand(0).getValueSizeInBits();
+    // Assume that we'll replace the any-extend with a zero-extend, and
+    // narrow the search to the extended value.
+    X = X.getOperand(0);
+    MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
+    ReplacingAnyExtend = true;
+  }
+  APInt MaskedHighBits = APInt::getHighBitsSet(X.getValueSizeInBits(),
+                                               MaskLZ);
+  APInt KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
+  if (MaskedHighBits != KnownZero) return true;
+
+  // We've identified a pattern that can be transformed into a single shift
+  // and an addressing mode. Make it so.
+  EVT VT = N.getValueType();
+  if (ReplacingAnyExtend) {
+    assert(X.getValueType() != VT);
+    // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
+    SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, X.getDebugLoc(), VT, X);
+    InsertDAGNode(DAG, N, NewX);
+    X = NewX;
+  }
+  DebugLoc DL = N.getDebugLoc();
+  SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, MVT::i8);
+  SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
+  SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, MVT::i8);
+  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
+
+  // Insert the new nodes into the topological ordering. We must do this in
+  // a valid topological ordering as nothing is going to go back and re-sort
+  // these nodes. We continually insert before 'N' in sequence as this is
+  // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
+  // hierarchy left to express.
+  InsertDAGNode(DAG, N, NewSRLAmt);
+  InsertDAGNode(DAG, N, NewSRL);
+  InsertDAGNode(DAG, N, NewSHLAmt);
+  InsertDAGNode(DAG, N, NewSHL);
+  DAG.ReplaceAllUsesWith(N, NewSHL);
+
+  AM.Scale = 1 << AMShiftAmt;
+  AM.IndexReg = NewSRL;
+  return false;
+}
+
 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                               unsigned Depth) {
   DebugLoc dl = N.getDebugLoc();
@@ -814,6 +1021,33 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     break;
     }
 
+  case ISD::SRL: {
+    // Scale must not be used already.
+    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+
+    SDValue And = N.getOperand(0);
+    if (And.getOpcode() != ISD::AND) break;
+    SDValue X = And.getOperand(0);
+
+    // We only handle up to 64-bit values here as those are what matter for
+    // addressing mode optimizations.
+    if (X.getValueSizeInBits() > 64) break;
+
+    // The mask used for the transform is expected to be post-shift, but we
+    // found the shift first so just apply the shift to the mask before passing
+    // it down.
+    if (!isa<ConstantSDNode>(N.getOperand(1)) ||
+        !isa<ConstantSDNode>(And.getOperand(1)))
+      break;
+    uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
+
+    // Try to fold the mask and shift into the scale, and return false if we
+    // succeed.
+    if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
+      return false;
+    break;
+  }
+
   case ISD::SMUL_LOHI:
   case ISD::UMUL_LOHI:
     // A mul_lohi where we need the low part can be folded as a plain multiply.
@@ -917,16 +1151,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     AM.Scale = 1;
 
     // Insert the new nodes into the topological ordering.
-    if (Zero.getNode()->getNodeId() == -1 ||
-        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
-      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
-      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
-    }
-    if (Neg.getNode()->getNodeId() == -1 ||
-        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
-      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
-      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
-    }
+    InsertDAGNode(*CurDAG, N, Zero);
+    InsertDAGNode(*CurDAG, N, Neg);
     return false;
   }
 
@@ -981,121 +1207,34 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // Perform some heroic transforms on an and of a constant-count shift
     // with a constant to enable use of the scaled offset field.
 
-    SDValue Shift = N.getOperand(0);
-    if (Shift.getNumOperands() != 2) break;
-
     // Scale must not be used already.
     if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
 
+    SDValue Shift = N.getOperand(0);
+    if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
     SDValue X = Shift.getOperand(0);
-    ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
-    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
-    if (!C1 || !C2) break;
-
-    // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
-    // allows us to convert the shift and and into an h-register extract and
-    // a scaled index.
-    if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
-      unsigned ScaleLog = 8 - C1->getZExtValue();
-      if (ScaleLog > 0 && ScaleLog < 4 &&
-          C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
-        SDValue Eight = CurDAG->getConstant(8, MVT::i8);
-        SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
-        SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
-                                      X, Eight);
-        SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
-                                      Srl, Mask);
-        SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
-        SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
-                                      And, ShlCount);
-
-        // Insert the new nodes into the topological ordering.
-        if (Eight.getNode()->getNodeId() == -1 ||
-            Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
-          CurDAG->RepositionNode(X.getNode(), Eight.getNode());
-          Eight.getNode()->setNodeId(X.getNode()->getNodeId());
-        }
-        if (Mask.getNode()->getNodeId() == -1 ||
-            Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
-          CurDAG->RepositionNode(X.getNode(), Mask.getNode());
-          Mask.getNode()->setNodeId(X.getNode()->getNodeId());
-        }
-        if (Srl.getNode()->getNodeId() == -1 ||
-            Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
-          CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
-          Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
-        }
-        if (And.getNode()->getNodeId() == -1 ||
-            And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
-          CurDAG->RepositionNode(N.getNode(), And.getNode());
-          And.getNode()->setNodeId(N.getNode()->getNodeId());
-        }
-        if (ShlCount.getNode()->getNodeId() == -1 ||
-            ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
-          CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
-          ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
-        }
-        if (Shl.getNode()->getNodeId() == -1 ||
-            Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
-          CurDAG->RepositionNode(N.getNode(), Shl.getNode());
-          Shl.getNode()->setNodeId(N.getNode()->getNodeId());
-        }
-        CurDAG->ReplaceAllUsesWith(N, Shl);
-        AM.IndexReg = And;
-        AM.Scale = (1 << ScaleLog);
-        return false;
-      }
-    }
 
-    // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
-    // allows us to fold the shift into this addressing mode.
-    if (Shift.getOpcode() != ISD::SHL) break;
+    // We only handle up to 64-bit values here as those are what matter for
+    // addressing mode optimizations.
+    if (X.getValueSizeInBits() > 64) break;
 
-    // Not likely to be profitable if either the AND or SHIFT node has more
-    // than one use (unless all uses are for address computation). Besides,
-    // isel mechanism requires their node ids to be reused.
-    if (!N.hasOneUse() || !Shift.hasOneUse())
-      break;
-    
-    // Verify that the shift amount is something we can fold.
-    unsigned ShiftCst = C1->getZExtValue();
-    if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
+    if (!isa<ConstantSDNode>(N.getOperand(1)))
       break;
-    
-    // Get the new AND mask, this folds to a constant.
-    SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
-                                         SDValue(C2, 0), SDValue(C1, 0));
-    SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 
-                                     NewANDMask);
-    SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
-                                       NewAND, SDValue(C1, 0));
+    uint64_t Mask = N.getConstantOperandVal(1);
 
-    // Insert the new nodes into the topological ordering.
-    if (C1->getNodeId() > X.getNode()->getNodeId()) {
-      CurDAG->RepositionNode(X.getNode(), C1);
-      C1->setNodeId(X.getNode()->getNodeId());
-    }
-    if (NewANDMask.getNode()->getNodeId() == -1 ||
-        NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
-      CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
-      NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
-    }
-    if (NewAND.getNode()->getNodeId() == -1 ||
-        NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
-      CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
-      NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
-    }
-    if (NewSHIFT.getNode()->getNodeId() == -1 ||
-        NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
-      CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
-      NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
-    }
+    // Try to fold the mask and shift into an extract and scale.
+    if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
+      return false;
 
-    CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
-    
-    AM.Scale = 1 << ShiftCst;
-    AM.IndexReg = NewAND;
-    return false;
+    // Try to fold the mask and shift directly into the scale.
+    if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
+      return false;
+
+    // Try to swap the mask and shift to place shifts which can be done as
+    // a scale on the outside of the mask.
+    if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
+      return false;
+    break;
   }
   }
 
@@ -1515,7 +1654,7 @@ enum AtomicSz {
   AtomicSzEnd
 };
 
-static const unsigned int AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
+static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
   {
     X86::LOCK_OR8mi,
     X86::LOCK_OR8mr,
@@ -1709,6 +1848,96 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
   return true;
 }
 
+/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
+/// is suitable for doing the {load; increment or decrement; store} to modify
+/// transformation.
+static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc, 
+                                SDValue StoredVal, SelectionDAG *CurDAG,
+                                LoadSDNode* &LoadNode, SDValue &InputChain) {
+
+  // is the value stored the result of a DEC or INC?
+  if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
+
+  // is the stored value result 0 of the load?
+  if (StoredVal.getResNo() != 0) return false;
+
+  // are there other uses of the loaded value than the inc or dec?
+  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
+
+  // is the store non-extending and non-indexed?
+  if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
+    return false;
+
+  SDValue Load = StoredVal->getOperand(0);
+  // Is the stored value a non-extending and non-indexed load?
+  if (!ISD::isNormalLoad(Load.getNode())) return false;
+
+  // Return LoadNode by reference.
+  LoadNode = cast<LoadSDNode>(Load);
+  // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
+  EVT LdVT = LoadNode->getMemoryVT();    
+  if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 && 
+      LdVT != MVT::i8)
+    return false;
+
+  // Is store the only read of the loaded value?
+  if (!Load.hasOneUse())
+    return false;
+  
+  // Is the address of the store the same as the load?
+  if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
+      LoadNode->getOffset() != StoreNode->getOffset())
+    return false;
+
+  // Check if the chain is produced by the load or is a TokenFactor with
+  // the load output chain as an operand. Return InputChain by reference.
+  SDValue Chain = StoreNode->getChain();
+
+  bool ChainCheck = false;
+  if (Chain == Load.getValue(1)) {
+    ChainCheck = true;
+    InputChain = LoadNode->getChain();
+  } else if (Chain.getOpcode() == ISD::TokenFactor) {
+    SmallVector<SDValue, 4> ChainOps;
+    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
+      SDValue Op = Chain.getOperand(i);
+      if (Op == Load.getValue(1)) {
+        ChainCheck = true;
+        continue;
+      }
+      ChainOps.push_back(Op);
+    }
+
+    if (ChainCheck)
+      // Make a new TokenFactor with all the other input chains except
+      // for the load.
+      InputChain = CurDAG->getNode(ISD::TokenFactor, Chain.getDebugLoc(),
+                                   MVT::Other, &ChainOps[0], ChainOps.size());
+  }
+  if (!ChainCheck)
+    return false;
+
+  return true;
+}
+
+/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
+/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
+static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
+  if (Opc == X86ISD::DEC) {
+    if (LdVT == MVT::i64) return X86::DEC64m;
+    if (LdVT == MVT::i32) return X86::DEC32m;
+    if (LdVT == MVT::i16) return X86::DEC16m;
+    if (LdVT == MVT::i8)  return X86::DEC8m;
+  } else {
+    assert(Opc == X86ISD::INC && "unrecognized opcode");
+    if (LdVT == MVT::i64) return X86::INC64m;
+    if (LdVT == MVT::i32) return X86::INC32m;
+    if (LdVT == MVT::i16) return X86::INC16m;
+    if (LdVT == MVT::i8)  return X86::INC8m;
+  }
+  llvm_unreachable("unrecognized size for LdVT");
+}
+
 SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   EVT NVT = Node->getValueType(0);
   unsigned Opc, MOpc;
@@ -1829,7 +2058,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
     return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
                                 getI8Imm(ShlVal));
-    break;
   }
   case X86ISD::UMUL: {
     SDValue N0 = Node->getOperand(0);
@@ -2114,7 +2342,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         HasNoSignedComparisonUses(Node))
       // Look past the truncate if CMP is the only use of it.
       N0 = N0.getOperand(0);
-    if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+    if ((N0.getNode()->getOpcode() == ISD::AND ||
+         (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
+        N0.getNode()->hasOneUse() &&
         N0.getValueType() != MVT::i8 &&
         X86::isZeroNode(N1)) {
       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
@@ -2129,7 +2359,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
         // On x86-32, only the ABCD registers have 8-bit subregisters.
         if (!Subtarget->is64Bit()) {
-          TargetRegisterClass *TRC = 0;
+          const TargetRegisterClass *TRC;
           switch (N0.getValueType().getSimpleVT().SimpleTy) {
           case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
           case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
@@ -2158,7 +2388,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
         SDValue Reg = N0.getNode()->getOperand(0);
 
         // Put the value in an ABCD register.
-        TargetRegisterClass *TRC = 0;
+        const TargetRegisterClass *TRC;
         switch (N0.getValueType().getSimpleVT().SimpleTy) {
         case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
         case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
@@ -2214,6 +2444,56 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     }
     break;
   }
+  case ISD::STORE: {
+    // Change a chain of {load; incr or dec; store} of the same value into
+    // a simple increment or decrement through memory of that value, if the
+    // uses of the modified value and its address are suitable.
+    // The DEC64m tablegen pattern is currently not able to match the case where
+    // the EFLAGS on the original DEC are used. (This also applies to 
+    // {INC,DEC}X{64,32,16,8}.)
+    // We'll need to improve tablegen to allow flags to be transferred from a
+    // node in the pattern to the result node.  probably with a new keyword
+    // for example, we have this
+    // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+    //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+    //   (implicit EFLAGS)]>;
+    // but maybe need something like this
+    // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+    //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+    //   (transferrable EFLAGS)]>;
+
+    StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
+    SDValue StoredVal = StoreNode->getOperand(1);
+    unsigned Opc = StoredVal->getOpcode();
+
+    LoadSDNode *LoadNode = 0;
+    SDValue InputChain;
+    if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
+                             LoadNode, InputChain))
+      break;
+
+    SDValue Base, Scale, Index, Disp, Segment;
+    if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
+                    Base, Scale, Index, Disp, Segment))
+      break;
+
+    MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
+    MemOp[0] = StoreNode->getMemOperand();
+    MemOp[1] = LoadNode->getMemOperand();
+    const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
+    EVT LdVT = LoadNode->getMemoryVT();    
+    unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
+    MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
+                                                   Node->getDebugLoc(),
+                                                   MVT::i32, MVT::Other, Ops,
+                                                   array_lengthof(Ops));
+    Result->setMemRefs(MemOp, MemOp + 2);
+
+    ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
+    ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
+
+    return Result;
+  }
   }
 
   SDNode *ResNode = SelectCode(Node);
@@ -2254,6 +2534,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
 /// X86-specific DAG, ready for instruction scheduling.
 ///
 FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
-                                     llvm::CodeGenOpt::Level OptLevel) {
+                                     CodeGenOpt::Level OptLevel) {
   return new X86DAGToDAGISel(TM, OptLevel);
 }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 7c8ce177ecd3..9b83aade3381 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "x86-isel"
+#include "X86ISelLowering.h"
 #include "X86.h"
 #include "X86InstrBuilder.h"
-#include "X86ISelLowering.h"
 #include "X86TargetMachine.h"
 #include "X86TargetObjectFile.h"
 #include "Utils/X86ShuffleDecode.h"
@@ -35,25 +35,21 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/VariadicFunction.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
+#include <bitset>
 using namespace llvm;
-using namespace dwarf;
 
 STATISTIC(NumTailCalls, "Number of tail calls");
 
@@ -61,17 +57,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");
 static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                        SDValue V2);
 
-static SDValue Insert128BitVector(SDValue Result,
-                                  SDValue Vec,
-                                  SDValue Idx,
-                                  SelectionDAG &DAG,
-                                  DebugLoc dl);
-
-static SDValue Extract128BitVector(SDValue Vec,
-                                   SDValue Idx,
-                                   SelectionDAG &DAG,
-                                   DebugLoc dl);
-
 /// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
 /// sets things up to match to an AVX VEXTRACTF128 instruction or a
 /// simple subregister reference.  Idx is an index in the 128 bits we
@@ -169,8 +154,8 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
 X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   : TargetLowering(TM, createTLOF(TM)) {
   Subtarget = &TM.getSubtarget<X86Subtarget>();
-  X86ScalarSSEf64 = Subtarget->hasXMMInt();
-  X86ScalarSSEf32 = Subtarget->hasXMM();
+  X86ScalarSSEf64 = Subtarget->hasSSE2();
+  X86ScalarSSEf32 = Subtarget->hasSSE1();
   X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
 
   RegInfo = TM.getRegisterInfo();
@@ -186,8 +171,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // For 64-bit since we have so many registers use the ILP scheduler, for
   // 32-bit code use the register pressure specific scheduling.
+  // For 32 bit Atom, use Hybrid (register pressure + latency) scheduling.
   if (Subtarget->is64Bit())
     setSchedulingPreference(Sched::ILP);
+  else if (Subtarget->isAtom()) 
+    setSchedulingPreference(Sched::Hybrid);
   else
     setSchedulingPreference(Sched::RegPressure);
   setStackPointerRegisterToSaveRestore(X86StackPtr);
@@ -199,15 +187,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setLibcallName(RTLIB::SREM_I64, "_allrem");
     setLibcallName(RTLIB::UREM_I64, "_aullrem");
     setLibcallName(RTLIB::MUL_I64, "_allmul");
-    setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
-    setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
     setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
     setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
-    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
-    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+
+    // The _ftol2 runtime function has an unusual calling conv, which
+    // is modeled by a special pseudo-instruction.
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, 0);
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, 0);
+    setLibcallName(RTLIB::FPTOUINT_F64_I32, 0);
+    setLibcallName(RTLIB::FPTOUINT_F32_I32, 0);
   }
 
   if (Subtarget->isTargetDarwin()) {
@@ -256,8 +247,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   if (Subtarget->is64Bit()) {
     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
-    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
-  } else if (!UseSoftFloat) {
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
+  } else if (!TM.Options.UseSoftFloat) {
     // We have an algorithm for SSE2->double, and we turn this into a
     // 64-bit FILD followed by conditional FADD for other targets.
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
@@ -271,7 +262,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
 
-  if (!UseSoftFloat) {
+  if (!TM.Options.UseSoftFloat) {
     // SSE has no i16 to fp conversion, only i32
     if (X86ScalarSSEf32) {
       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
@@ -314,7 +305,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   if (Subtarget->is64Bit()) {
     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
     setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
-  } else if (!UseSoftFloat) {
+  } else if (!TM.Options.UseSoftFloat) {
     // Since AVX is a superset of SSE3, only check for SSE here.
     if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
       // Expand FP_TO_UINT into a select.
@@ -327,6 +318,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
   }
 
+  if (isTargetFTOL()) {
+    // Use the _ftol2 runtime function, which has a pseudo-instruction
+    // to handle its weird calling convention.
+    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
+  }
+
   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
   if (!X86ScalarSSEf64) {
     setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
@@ -379,10 +376,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
 
+  // Promote the i8 variants and force them on up to i32 which has a shorter
+  // encoding.
+  setOperationAction(ISD::CTTZ             , MVT::i8   , Promote);
+  AddPromotedToType (ISD::CTTZ             , MVT::i8   , MVT::i32);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , Promote);
+  AddPromotedToType (ISD::CTTZ_ZERO_UNDEF  , MVT::i8   , MVT::i32);
   if (Subtarget->hasBMI()) {
-    setOperationAction(ISD::CTTZ           , MVT::i8   , Promote);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Expand);
+    if (Subtarget->is64Bit())
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
   } else {
-    setOperationAction(ISD::CTTZ           , MVT::i8   , Custom);
     setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
     if (Subtarget->is64Bit())
@@ -390,13 +395,27 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   }
 
   if (Subtarget->hasLZCNT()) {
+    // When promoting the i8 variants, force them to i32 for a shorter
+    // encoding.
     setOperationAction(ISD::CTLZ           , MVT::i8   , Promote);
+    AddPromotedToType (ISD::CTLZ           , MVT::i8   , MVT::i32);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Promote);
+    AddPromotedToType (ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Expand);
+    if (Subtarget->is64Bit())
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
   } else {
     setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
     setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
     setOperationAction(ISD::CTLZ           , MVT::i32  , Custom);
-    if (Subtarget->is64Bit())
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
+    if (Subtarget->is64Bit()) {
       setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
+    }
   }
 
   if (Subtarget->hasPOPCNT()) {
@@ -459,7 +478,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
   }
 
-  if (Subtarget->hasXMM())
+  if (Subtarget->hasSSE1())
     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
 
   setOperationAction(ISD::MEMBARRIER    , MVT::Other, Custom);
@@ -538,14 +557,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
     setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
                        MVT::i64 : MVT::i32, Custom);
-  else if (EnableSegmentedStacks)
+  else if (TM.Options.EnableSegmentedStacks)
     setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
                        MVT::i64 : MVT::i32, Custom);
   else
     setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
                        MVT::i64 : MVT::i32, Expand);
 
-  if (!UseSoftFloat && X86ScalarSSEf64) {
+  if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
     // f32 and f64 use SSE.
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -577,7 +596,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     // cases we handle.
     addLegalFPImmediate(APFloat(+0.0)); // xorpd
     addLegalFPImmediate(APFloat(+0.0f)); // xorps
-  } else if (!UseSoftFloat && X86ScalarSSEf32) {
+  } else if (!TM.Options.UseSoftFloat && X86ScalarSSEf32) {
     // Use SSE for f32, x87 for f64.
     // Set up the FP register classes.
     addRegisterClass(MVT::f32, X86::FR32RegisterClass);
@@ -606,11 +625,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
 
-    if (!UnsafeFPMath) {
+    if (!TM.Options.UnsafeFPMath) {
       setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
       setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
     }
-  } else if (!UseSoftFloat) {
+  } else if (!TM.Options.UseSoftFloat) {
     // f32 and f64 in x87.
     // Set up the FP register classes.
     addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
@@ -621,7 +640,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
-    if (!UnsafeFPMath) {
+    if (!TM.Options.UnsafeFPMath) {
       setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
       setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
     }
@@ -640,7 +659,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::FMA, MVT::f32, Expand);
 
   // Long double always uses X87.
-  if (!UseSoftFloat) {
+  if (!TM.Options.UseSoftFloat) {
     addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -659,11 +678,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
     }
 
-    if (!UnsafeFPMath) {
+    if (!TM.Options.UnsafeFPMath) {
       setOperationAction(ISD::FSIN           , MVT::f80  , Expand);
       setOperationAction(ISD::FCOS           , MVT::f80  , Expand);
     }
 
+    setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
+    setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
+    setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
+    setOperationAction(ISD::FRINT,  MVT::f80, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
     setOperationAction(ISD::FMA, MVT::f80, Expand);
   }
 
@@ -715,7 +739,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
@@ -749,7 +775,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
   // with -msoft-float, disable use of MMX as well.
-  if (!UseSoftFloat && Subtarget->hasMMX()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasMMX()) {
     addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
     // No operations on x86mmx supported, everything uses intrinsics.
   }
@@ -786,7 +812,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::BITCAST,            MVT::v2i32, Expand);
   setOperationAction(ISD::BITCAST,            MVT::v1i64, Expand);
 
-  if (!UseSoftFloat && Subtarget->hasXMM()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasSSE1()) {
     addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
 
     setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
@@ -803,7 +829,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SETCC,              MVT::v4f32, Custom);
   }
 
-  if (!UseSoftFloat && Subtarget->hasXMMInt()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasSSE2()) {
     addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
 
     // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
@@ -909,7 +935,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
   }
 
-  if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
+  if (Subtarget->hasSSE41()) {
     setOperationAction(ISD::FFLOOR,             MVT::f32,   Legal);
     setOperationAction(ISD::FCEIL,              MVT::f32,   Legal);
     setOperationAction(ISD::FTRUNC,             MVT::f32,   Legal);
@@ -924,10 +950,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     // FIXME: Do we need to handle scalar-to-vector here?
     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
 
-    // Can turn SHL into an integer multiply.
-    setOperationAction(ISD::SHL,                MVT::v4i32, Custom);
-    setOperationAction(ISD::SHL,                MVT::v16i8, Custom);
-
     setOperationAction(ISD::VSELECT,            MVT::v2f64, Legal);
     setOperationAction(ISD::VSELECT,            MVT::v2i64, Legal);
     setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
@@ -948,30 +970,47 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
 
+    // FIXME: these should be Legal but thats only for the case where
+    // the index is constant.  For now custom expand to deal with that.
     if (Subtarget->is64Bit()) {
-      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Legal);
-      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
     }
   }
 
-  if (Subtarget->hasXMMInt()) {
-    setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
-    setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
-    setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
+  if (Subtarget->hasSSE2()) {
     setOperationAction(ISD::SRL,               MVT::v8i16, Custom);
+    setOperationAction(ISD::SRL,               MVT::v16i8, Custom);
 
-    setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
-    setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
     setOperationAction(ISD::SHL,               MVT::v8i16, Custom);
+    setOperationAction(ISD::SHL,               MVT::v16i8, Custom);
 
-    setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
+    setOperationAction(ISD::SRA,               MVT::v16i8, Custom);
+
+    if (Subtarget->hasAVX2()) {
+      setOperationAction(ISD::SRL,             MVT::v2i64, Legal);
+      setOperationAction(ISD::SRL,             MVT::v4i32, Legal);
+
+      setOperationAction(ISD::SHL,             MVT::v2i64, Legal);
+      setOperationAction(ISD::SHL,             MVT::v4i32, Legal);
+
+      setOperationAction(ISD::SRA,             MVT::v4i32, Legal);
+    } else {
+      setOperationAction(ISD::SRL,             MVT::v2i64, Custom);
+      setOperationAction(ISD::SRL,             MVT::v4i32, Custom);
+
+      setOperationAction(ISD::SHL,             MVT::v2i64, Custom);
+      setOperationAction(ISD::SHL,             MVT::v4i32, Custom);
+
+      setOperationAction(ISD::SRA,             MVT::v4i32, Custom);
+    }
   }
 
-  if (Subtarget->hasSSE42() || Subtarget->hasAVX())
+  if (Subtarget->hasSSE42())
     setOperationAction(ISD::SETCC,             MVT::v2i64, Custom);
 
-  if (!UseSoftFloat && Subtarget->hasAVX()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
     addRegisterClass(MVT::v32i8,  X86::VR256RegisterClass);
     addRegisterClass(MVT::v16i16, X86::VR256RegisterClass);
     addRegisterClass(MVT::v8i32,  X86::VR256RegisterClass);
@@ -1008,18 +1047,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i8,  Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i16, Custom);
 
-    setOperationAction(ISD::SRL,               MVT::v4i64, Custom);
-    setOperationAction(ISD::SRL,               MVT::v8i32, Custom);
     setOperationAction(ISD::SRL,               MVT::v16i16, Custom);
     setOperationAction(ISD::SRL,               MVT::v32i8, Custom);
 
-    setOperationAction(ISD::SHL,               MVT::v4i64, Custom);
-    setOperationAction(ISD::SHL,               MVT::v8i32, Custom);
     setOperationAction(ISD::SHL,               MVT::v16i16, Custom);
     setOperationAction(ISD::SHL,               MVT::v32i8, Custom);
 
-    setOperationAction(ISD::SRA,               MVT::v8i32, Custom);
     setOperationAction(ISD::SRA,               MVT::v16i16, Custom);
+    setOperationAction(ISD::SRA,               MVT::v32i8, Custom);
 
     setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
     setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
@@ -1030,25 +1065,60 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
 
-    setOperationAction(ISD::VSELECT,            MVT::v4f64, Legal);
-    setOperationAction(ISD::VSELECT,            MVT::v4i64, Legal);
-    setOperationAction(ISD::VSELECT,            MVT::v8i32, Legal);
-    setOperationAction(ISD::VSELECT,            MVT::v8f32, Legal);
+    setOperationAction(ISD::VSELECT,           MVT::v4f64, Legal);
+    setOperationAction(ISD::VSELECT,           MVT::v4i64, Legal);
+    setOperationAction(ISD::VSELECT,           MVT::v8i32, Legal);
+    setOperationAction(ISD::VSELECT,           MVT::v8f32, Legal);
+
+    if (Subtarget->hasAVX2()) {
+      setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
+      setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
+      setOperationAction(ISD::ADD,             MVT::v16i16, Legal);
+      setOperationAction(ISD::ADD,             MVT::v32i8, Legal);
+
+      setOperationAction(ISD::SUB,             MVT::v4i64, Legal);
+      setOperationAction(ISD::SUB,             MVT::v8i32, Legal);
+      setOperationAction(ISD::SUB,             MVT::v16i16, Legal);
+      setOperationAction(ISD::SUB,             MVT::v32i8, Legal);
+
+      setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
+      setOperationAction(ISD::MUL,             MVT::v8i32, Legal);
+      setOperationAction(ISD::MUL,             MVT::v16i16, Legal);
+      // Don't lower v32i8 because there is no 128-bit byte mul
+
+      setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
+
+      setOperationAction(ISD::SRL,             MVT::v4i64, Legal);
+      setOperationAction(ISD::SRL,             MVT::v8i32, Legal);
+
+      setOperationAction(ISD::SHL,             MVT::v4i64, Legal);
+      setOperationAction(ISD::SHL,             MVT::v8i32, Legal);
+
+      setOperationAction(ISD::SRA,             MVT::v8i32, Legal);
+    } else {
+      setOperationAction(ISD::ADD,             MVT::v4i64, Custom);
+      setOperationAction(ISD::ADD,             MVT::v8i32, Custom);
+      setOperationAction(ISD::ADD,             MVT::v16i16, Custom);
+      setOperationAction(ISD::ADD,             MVT::v32i8, Custom);
+
+      setOperationAction(ISD::SUB,             MVT::v4i64, Custom);
+      setOperationAction(ISD::SUB,             MVT::v8i32, Custom);
+      setOperationAction(ISD::SUB,             MVT::v16i16, Custom);
+      setOperationAction(ISD::SUB,             MVT::v32i8, Custom);
+
+      setOperationAction(ISD::MUL,             MVT::v4i64, Custom);
+      setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
+      setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
+      // Don't lower v32i8 because there is no 128-bit byte mul
 
-    setOperationAction(ISD::ADD,               MVT::v4i64, Custom);
-    setOperationAction(ISD::ADD,               MVT::v8i32, Custom);
-    setOperationAction(ISD::ADD,               MVT::v16i16, Custom);
-    setOperationAction(ISD::ADD,               MVT::v32i8, Custom);
+      setOperationAction(ISD::SRL,             MVT::v4i64, Custom);
+      setOperationAction(ISD::SRL,             MVT::v8i32, Custom);
 
-    setOperationAction(ISD::SUB,               MVT::v4i64, Custom);
-    setOperationAction(ISD::SUB,               MVT::v8i32, Custom);
-    setOperationAction(ISD::SUB,               MVT::v16i16, Custom);
-    setOperationAction(ISD::SUB,               MVT::v32i8, Custom);
+      setOperationAction(ISD::SHL,             MVT::v4i64, Custom);
+      setOperationAction(ISD::SHL,             MVT::v8i32, Custom);
 
-    setOperationAction(ISD::MUL,               MVT::v4i64, Custom);
-    setOperationAction(ISD::MUL,               MVT::v8i32, Custom);
-    setOperationAction(ISD::MUL,               MVT::v16i16, Custom);
-    // Don't lower v32i8 because there is no 128-bit byte mul
+      setOperationAction(ISD::SRA,             MVT::v8i32, Custom);
+    }
 
     // Custom lower several nodes for 256-bit types.
     for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@@ -1099,7 +1169,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // of this type with custom code.
   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
          VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
-    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
+                       Custom);
   }
 
   // We want to custom lower some of our intrinsics.
@@ -1137,7 +1208,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
-  setTargetDAGCombine(ISD::BUILD_VECTOR);
   setTargetDAGCombine(ISD::VSELECT);
   setTargetDAGCombine(ISD::SELECT);
   setTargetDAGCombine(ISD::SHL);
@@ -1152,9 +1222,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setTargetDAGCombine(ISD::LOAD);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::ZERO_EXTEND);
+  setTargetDAGCombine(ISD::SIGN_EXTEND);
+  setTargetDAGCombine(ISD::TRUNCATE);
   setTargetDAGCombine(ISD::SINT_TO_FP);
   if (Subtarget->is64Bit())
     setTargetDAGCombine(ISD::MUL);
+  if (Subtarget->hasBMI())
+    setTargetDAGCombine(ISD::XOR);
 
   computeRegisterProperties();
 
@@ -1166,10 +1240,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
   maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
   maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
-  setPrefLoopAlignment(16);
+  setPrefLoopAlignment(4); // 2^4 bytes.
   benefitFromCodePlacementOpt = true;
 
-  setPrefFunctionAlignment(4);
+  setPrefFunctionAlignment(4); // 2^4 bytes.
 }
 
 
@@ -1219,7 +1293,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
   }
 
   unsigned Align = 4;
-  if (Subtarget->hasXMM())
+  if (Subtarget->hasSSE1())
     getMaxByValAlign(Ty, Align);
   return Align;
 }
@@ -1230,7 +1304,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
 /// means there isn't a need to check it against alignment requirement,
 /// probably because the source does not need to be loaded. If
-/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// 'IsZeroVal' is true, that means it's safe to return a
 /// non-scalar-integer type, e.g. empty string source, constant, or loaded
 /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
 /// constant so it does not need to be loaded.
@@ -1239,31 +1313,34 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const {
 EVT
 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
                                        unsigned DstAlign, unsigned SrcAlign,
-                                       bool NonScalarIntSafe,
+                                       bool IsZeroVal,
                                        bool MemcpyStrSrc,
                                        MachineFunction &MF) const {
   // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
   // linux.  This is because the stack realignment code can't handle certain
   // cases like PR2962.  This should be removed when PR2962 is fixed.
   const Function *F = MF.getFunction();
-  if (NonScalarIntSafe &&
+  if (IsZeroVal &&
       !F->hasFnAttr(Attribute::NoImplicitFloat)) {
     if (Size >= 16 &&
         (Subtarget->isUnalignedMemAccessFast() ||
          ((DstAlign == 0 || DstAlign >= 16) &&
           (SrcAlign == 0 || SrcAlign >= 16))) &&
         Subtarget->getStackAlignment() >= 16) {
-      if (Subtarget->hasAVX() &&
-          Subtarget->getStackAlignment() >= 32)
-        return MVT::v8f32;
-      if (Subtarget->hasXMMInt())
+      if (Subtarget->getStackAlignment() >= 32) {
+        if (Subtarget->hasAVX2())
+          return MVT::v8i32;
+        if (Subtarget->hasAVX())
+          return MVT::v8f32;
+      }
+      if (Subtarget->hasSSE2())
         return MVT::v4i32;
-      if (Subtarget->hasXMM())
+      if (Subtarget->hasSSE1())
         return MVT::v4f32;
     } else if (!MemcpyStrSrc && Size >= 8 &&
                !Subtarget->is64Bit() &&
                Subtarget->getStackAlignment() >= 8 &&
-               Subtarget->hasXMMInt()) {
+               Subtarget->hasSSE2()) {
       // Do not use f64 to lower memcpy if source is string constant. It's
       // better to use i32 to avoid the loads.
       return MVT::f64;
@@ -1428,14 +1505,14 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     // or SSE or MMX vectors.
     if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
          VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
-          (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
+          (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
       report_fatal_error("SSE register return with SSE disabled");
     }
     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
     // llvm-gcc has never done it right and no one has noticed, so this
     // should be OK for now.
     if (ValVT == MVT::f64 &&
-        (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
+        (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
       report_fatal_error("SSE2 register return with SSE2 disabled");
 
     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
@@ -1461,7 +1538,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
                                   ValToCopy);
           // If we don't have SSE2 available, convert to v4f32 so the generated
           // register is legal.
-          if (!Subtarget->hasXMMInt())
+          if (!Subtarget->hasSSE2())
             ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
         }
       }
@@ -1501,15 +1578,21 @@ X86TargetLowering::LowerReturn(SDValue Chain,
                      MVT::Other, &RetOps[0], RetOps.size());
 }
 
-bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
   if (N->getNumValues() != 1)
     return false;
   if (!N->hasNUsesOfValue(1, 0))
     return false;
 
+  SDValue TCChain = Chain;
   SDNode *Copy = *N->use_begin();
-  if (Copy->getOpcode() != ISD::CopyToReg &&
-      Copy->getOpcode() != ISD::FP_EXTEND)
+  if (Copy->getOpcode() == ISD::CopyToReg) {
+    // If the copy has a glue operand, we conservatively assume it isn't safe to
+    // perform a tail call.
+    if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
+      return false;
+    TCChain = Copy->getOperand(0);
+  } else if (Copy->getOpcode() != ISD::FP_EXTEND)
     return false;
 
   bool HasRet = false;
@@ -1520,7 +1603,11 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
     HasRet = true;
   }
 
-  return HasRet;
+  if (!HasRet)
+    return false;
+
+  Chain = TCChain;
+  return true;
 }
 
 EVT
@@ -1561,7 +1648,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 
     // If this is x86-64, and we disabled SSE, we can't return FP values
     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
-        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
+        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
       report_fatal_error("SSE register return with SSE disabled");
     }
 
@@ -1651,7 +1738,7 @@ static bool IsTailCallConvention(CallingConv::ID CC) {
 }
 
 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
-  if (!CI->isTailCall())
+  if (!CI->isTailCall() || getTargetMachine().Options.DisableTailCalls)
     return false;
 
   CallSite CS(CI);
@@ -1664,7 +1751,8 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
 
 /// FuncIsMadeTailCallSafe - Return true if the function is being made into
 /// a tailcall target by changing its ABI.
-static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
+                                   bool GuaranteedTailCallOpt) {
   return GuaranteedTailCallOpt && IsTailCallConvention(CC);
 }
 
@@ -1678,7 +1766,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
                                     unsigned i) const {
   // Create the nodes corresponding to a load from this parameter slot.
   ISD::ArgFlagsTy Flags = Ins[i].Flags;
-  bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
+  bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv,
+                              getTargetMachine().Options.GuaranteedTailCallOpt);
   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
   EVT ValVT;
 
@@ -1704,7 +1793,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
     return DAG.getLoad(ValVT, dl, Chain, FIN,
                        MachinePointerInfo::getFixedStack(FI),
-                       false, false, 0);
+                       false, false, false, 0);
   }
 }
 
@@ -1728,6 +1817,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   MachineFrameInfo *MFI = MF.getFrameInfo();
   bool Is64Bit = Subtarget->is64Bit();
+  bool IsWindows = Subtarget->isTargetWindows();
   bool IsWin64 = Subtarget->isTargetWin64();
 
   assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
@@ -1758,7 +1848,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
     if (VA.isRegLoc()) {
       EVT RegVT = VA.getLocVT();
-      TargetRegisterClass *RC = NULL;
+      const TargetRegisterClass *RC;
       if (RegVT == MVT::i32)
         RC = X86::GR32RegisterClass;
       else if (Is64Bit && RegVT == MVT::i64)
@@ -1807,7 +1897,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     // If value is passed via pointer - do a load.
     if (VA.getLocInfo() == CCValAssign::Indirect)
       ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
-                             MachinePointerInfo(), false, false, 0);
+                             MachinePointerInfo(), false, false, false, 0);
 
     InVals.push_back(ArgValue);
   }
@@ -1828,7 +1918,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   unsigned StackSize = CCInfo.getNextStackOffset();
   // Align stack specially for tail calls.
-  if (FuncIsMadeTailCallSafe(CallConv))
+  if (FuncIsMadeTailCallSafe(CallConv,
+                             MF.getTarget().Options.GuaranteedTailCallOpt))
     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
 
   // If the function takes variable number of arguments, make a frame index for
@@ -1842,17 +1933,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
 
       // FIXME: We should really autogenerate these arrays
-      static const unsigned GPR64ArgRegsWin64[] = {
+      static const uint16_t GPR64ArgRegsWin64[] = {
         X86::RCX, X86::RDX, X86::R8,  X86::R9
       };
-      static const unsigned GPR64ArgRegs64Bit[] = {
+      static const uint16_t GPR64ArgRegs64Bit[] = {
         X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
       };
-      static const unsigned XMMArgRegs64Bit[] = {
+      static const uint16_t XMMArgRegs64Bit[] = {
         X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
         X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
       };
-      const unsigned *GPR64ArgRegs;
+      const uint16_t *GPR64ArgRegs;
       unsigned NumXMMRegs = 0;
 
       if (IsWin64) {
@@ -1865,17 +1956,20 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
         GPR64ArgRegs = GPR64ArgRegs64Bit;
 
-        NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
+        NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit,
+                                                TotalNumXMMRegs);
       }
       unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
                                                        TotalNumIntRegs);
 
       bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
-      assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
+      assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
              "SSE register cannot be used when SSE is disabled!");
-      assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
+      assert(!(NumXMMRegs && MF.getTarget().Options.UseSoftFloat &&
+               NoImplicitFloatOps) &&
              "SSE register cannot be used when SSE is disabled!");
-      if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
+      if (MF.getTarget().Options.UseSoftFloat || NoImplicitFloatOps ||
+          !Subtarget->hasSSE1())
         // Kernel mode asks for SSE to be disabled, so don't push them
         // on the stack.
         TotalNumXMMRegs = 0;
@@ -1892,8 +1986,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
           FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
       } else {
         // For X86-64, if there are vararg parameters that are passed via
-        // registers, then we must store them to their spots on the stack so they
-        // may be loaded by deferencing the result of va_next.
+        // registers, then we must store them to their spots on the stack so
+        // they may be loaded by deferencing the result of va_next.
         FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
         FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
         FuncInfo->setRegSaveFrameIndex(
@@ -1953,12 +2047,14 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   }
 
   // Some CCs need callee pop.
-  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
+  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+                       MF.getTarget().Options.GuaranteedTailCallOpt)) {
     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
   } else {
     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
     // If this is an sret function, the return should pop the hidden pointer.
-    if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
+    if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
+        ArgsAreStructReturn(Ins))
       FuncInfo->setBytesToPopOnReturn(4);
   }
 
@@ -2006,7 +2102,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
 
   // Load the "old" Return address.
   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
-                           false, false, 0);
+                           false, false, false, 0);
   return SDValue(OutRetAddr.getNode(), 1);
 }
 
@@ -2033,7 +2129,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
 SDValue
 X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                              CallingConv::ID CallConv, bool isVarArg,
-                             bool &isTailCall,
+                             bool doesNotRet, bool &isTailCall,
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
                              const SmallVectorImpl<SDValue> &OutVals,
                              const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -2042,9 +2138,13 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   MachineFunction &MF = DAG.getMachineFunction();
   bool Is64Bit        = Subtarget->is64Bit();
   bool IsWin64        = Subtarget->isTargetWin64();
+  bool IsWindows      = Subtarget->isTargetWindows();
   bool IsStructRet    = CallIsStructReturn(Outs);
   bool IsSibcall      = false;
 
+  if (MF.getTarget().Options.DisableTailCalls)
+    isTailCall = false;
+
   if (isTailCall) {
     // Check if it's really possible to do a tail call.
     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
@@ -2053,7 +2153,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
     // Sibcalls are automatically detected tailcalls which do not require
     // ABI changes.
-    if (!GuaranteedTailCallOpt && isTailCall)
+    if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
       IsSibcall = true;
 
     if (isTailCall)
@@ -2081,7 +2181,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // This is a sibcall. The memory operands are available in caller's
     // own caller's stack.
     NumBytes = 0;
-  else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
+  else if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+           IsTailCallConvention(CallConv))
     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
 
   int FPDiff = 0;
@@ -2231,12 +2332,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     // registers used and is in the range 0 - 8 inclusive.
 
     // Count the number of XMM registers allocated.
-    static const unsigned XMMArgRegs[] = {
+    static const uint16_t XMMArgRegs[] = {
       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
     };
     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
-    assert((Subtarget->hasXMM() || !NumXMMRegs)
+    assert((Subtarget->hasSSE1() || !NumXMMRegs)
            && "SSE registers cannot be used when SSE is disabled");
 
     Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
@@ -2260,7 +2361,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     int FI = 0;
     // Do not flag preceding copytoreg stuff together with the following stuff.
     InFlag = SDValue();
-    if (GuaranteedTailCallOpt) {
+    if (getTargetMachine().Options.GuaranteedTailCallOpt) {
       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
         CCValAssign &VA = ArgLocs[i];
         if (VA.isRegLoc())
@@ -2368,7 +2469,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
       if (ExtraLoad)
         Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
                              MachinePointerInfo::getGOT(),
-                             false, false, 0);
+                             false, false, false, 0);
     }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     unsigned char OpFlags = 0;
@@ -2421,6 +2522,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (Is64Bit && isVarArg && !IsWin64)
     Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
 
+  // Add a register mask operand representing the call-preserved registers.
+  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+  const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
   if (InFlag.getNode())
     Ops.push_back(InFlag);
 
@@ -2440,12 +2547,15 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Create the CALLSEQ_END node.
   unsigned NumBytesForCalleeToPush;
-  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt))
+  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
+                       getTargetMachine().Options.GuaranteedTailCallOpt))
     NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
-  else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
+  else if (!Is64Bit && !IsTailCallConvention(CallConv) && !IsWindows &&
+           IsStructRet)
     // If this is a call to a struct-return function, the callee
     // pops the hidden struct pointer, so we have to push it back.
     // This is common for Darwin/X86, Linux & Mingw32 targets.
+    // For MSVC Win32 targets, the caller pops the hidden struct pointer.
     NumBytesForCalleeToPush = 4;
   else
     NumBytesForCalleeToPush = 0;  // Callee pops nothing.
@@ -2598,7 +2708,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   CallingConv::ID CallerCC = CallerF->getCallingConv();
   bool CCMatch = CallerCC == CalleeCC;
 
-  if (GuaranteedTailCallOpt) {
+  if (getTargetMachine().Options.GuaranteedTailCallOpt) {
     if (IsTailCallConvention(CalleeCC) && CCMatch)
       return true;
     return false;
@@ -2641,9 +2751,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
         return false;
   }
 
-  // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
-  // Therefore if it's not used by the call it is not safe to optimize this into
-  // a sibcall.
+  // If the call result is in ST0 / ST1, it needs to be popped off the x87
+  // stack.  Therefore, if it's not used by the call it is not safe to optimize
+  // this into a sibcall.
   bool Unused = false;
   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
     if (!Ins[i].Used) {
@@ -2785,9 +2895,8 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::PSHUFD:
   case X86ISD::PSHUFHW:
   case X86ISD::PSHUFLW:
-  case X86ISD::SHUFPD:
+  case X86ISD::SHUFP:
   case X86ISD::PALIGN:
-  case X86ISD::SHUFPS:
   case X86ISD::MOVLHPS:
   case X86ISD::MOVLHPD:
   case X86ISD::MOVHLPS:
@@ -2798,34 +2907,16 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::MOVDDUP:
   case X86ISD::MOVSS:
   case X86ISD::MOVSD:
-  case X86ISD::UNPCKLPS:
-  case X86ISD::UNPCKLPD:
-  case X86ISD::VUNPCKLPSY:
-  case X86ISD::VUNPCKLPDY:
-  case X86ISD::PUNPCKLWD:
-  case X86ISD::PUNPCKLBW:
-  case X86ISD::PUNPCKLDQ:
-  case X86ISD::PUNPCKLQDQ:
-  case X86ISD::UNPCKHPS:
-  case X86ISD::UNPCKHPD:
-  case X86ISD::VUNPCKHPSY:
-  case X86ISD::VUNPCKHPDY:
-  case X86ISD::PUNPCKHWD:
-  case X86ISD::PUNPCKHBW:
-  case X86ISD::PUNPCKHDQ:
-  case X86ISD::PUNPCKHQDQ:
-  case X86ISD::VPERMILPS:
-  case X86ISD::VPERMILPSY:
-  case X86ISD::VPERMILPD:
-  case X86ISD::VPERMILPDY:
-  case X86ISD::VPERM2F128:
+  case X86ISD::UNPCKL:
+  case X86ISD::UNPCKH:
+  case X86ISD::VPERMILP:
+  case X86ISD::VPERM2X128:
     return true;
   }
-  return false;
 }
 
 static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
-                                               SDValue V1, SelectionDAG &DAG) {
+                                    SDValue V1, SelectionDAG &DAG) {
   switch(Opc) {
   default: llvm_unreachable("Unknown x86 shuffle node");
   case X86ISD::MOVSHDUP:
@@ -2833,39 +2924,32 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
   case X86ISD::MOVDDUP:
     return DAG.getNode(Opc, dl, VT, V1);
   }
-
-  return SDValue();
 }
 
 static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
-                          SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+                                    SDValue V1, unsigned TargetMask,
+                                    SelectionDAG &DAG) {
   switch(Opc) {
   default: llvm_unreachable("Unknown x86 shuffle node");
   case X86ISD::PSHUFD:
   case X86ISD::PSHUFHW:
   case X86ISD::PSHUFLW:
-  case X86ISD::VPERMILPS:
-  case X86ISD::VPERMILPSY:
-  case X86ISD::VPERMILPD:
-  case X86ISD::VPERMILPDY:
+  case X86ISD::VPERMILP:
     return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
   }
-
-  return SDValue();
 }
 
 static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
-               SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+                                    SDValue V1, SDValue V2, unsigned TargetMask,
+                                    SelectionDAG &DAG) {
   switch(Opc) {
   default: llvm_unreachable("Unknown x86 shuffle node");
   case X86ISD::PALIGN:
-  case X86ISD::SHUFPD:
-  case X86ISD::SHUFPS:
-  case X86ISD::VPERM2F128:
+  case X86ISD::SHUFP:
+  case X86ISD::VPERM2X128:
     return DAG.getNode(Opc, dl, VT, V1, V2,
                        DAG.getConstant(TargetMask, MVT::i8));
   }
-  return SDValue();
 }
 
 static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
@@ -2879,25 +2963,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
   case X86ISD::MOVLPD:
   case X86ISD::MOVSS:
   case X86ISD::MOVSD:
-  case X86ISD::UNPCKLPS:
-  case X86ISD::UNPCKLPD:
-  case X86ISD::VUNPCKLPSY:
-  case X86ISD::VUNPCKLPDY:
-  case X86ISD::PUNPCKLWD:
-  case X86ISD::PUNPCKLBW:
-  case X86ISD::PUNPCKLDQ:
-  case X86ISD::PUNPCKLQDQ:
-  case X86ISD::UNPCKHPS:
-  case X86ISD::UNPCKHPD:
-  case X86ISD::VUNPCKHPSY:
-  case X86ISD::VUNPCKHPDY:
-  case X86ISD::PUNPCKHWD:
-  case X86ISD::PUNPCKHBW:
-  case X86ISD::PUNPCKHDQ:
-  case X86ISD::PUNPCKHQDQ:
+  case X86ISD::UNPCKL:
+  case X86ISD::UNPCKH:
     return DAG.getNode(Opc, dl, VT, V1, V2);
   }
-  return SDValue();
 }
 
 SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
@@ -3092,17 +3161,6 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) {
   return (Val < 0) || (Val >= Low && Val < Hi);
 }
 
-/// isUndefOrInRange - Return true if every element in Mask, begining
-/// from position Pos and ending in Pos+Size, falls within the specified
-/// range (L, L+Pos]. or is undef.
-static bool isUndefOrInRange(const SmallVectorImpl<int> &Mask,
-                             int Pos, int Size, int Low, int Hi) {
-  for (int i = Pos, e = Pos+Size; i != e; ++i)
-    if (!isUndefOrInRange(Mask[i], Low, Hi))
-      return false;
-  return true;
-}
-
 /// isUndefOrEqual - Val is either less than zero (undef) or equal to the
 /// specified value.
 static bool isUndefOrEqual(int Val, int CmpVal) {
@@ -3114,7 +3172,7 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
 /// isSequentialOrUndefInRange - Return true if every element in Mask, begining
 /// from position Pos and ending in Pos+Size, falls within the specified
 /// sequential range (L, L+Pos]. or is undef.
-static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask,
+static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
                                        int Pos, int Size, int Low) {
   for (int i = Pos, e = Pos+Size; i != e; ++i, ++Low)
     if (!isUndefOrEqual(Mask[i], Low))
@@ -3125,7 +3183,7 @@ static bool isSequentialOrUndefInRange(const SmallVectorImpl<int> &Mask,
 /// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
 /// the second operand.
-static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
   if (VT == MVT::v4f32 || VT == MVT::v4i32 )
     return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
   if (VT == MVT::v2f64 || VT == MVT::v2i64)
@@ -3133,302 +3191,188 @@ static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
   return false;
 }
 
-bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return ::isPSHUFDMask(M, N->getValueType(0));
-}
-
 /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT) {
   if (VT != MVT::v8i16)
     return false;
 
   // Lower quadword copied in order or undef.
-  for (int i = 0; i != 4; ++i)
-    if (Mask[i] >= 0 && Mask[i] != i)
-      return false;
+  if (!isSequentialOrUndefInRange(Mask, 0, 4, 0))
+    return false;
 
   // Upper quadword shuffled.
-  for (int i = 4; i != 8; ++i)
+  for (unsigned i = 4; i != 8; ++i)
     if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
       return false;
 
   return true;
 }
 
-bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return ::isPSHUFHWMask(M, N->getValueType(0));
-}
-
 /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT) {
   if (VT != MVT::v8i16)
     return false;
 
   // Upper quadword copied in order.
-  for (int i = 4; i != 8; ++i)
-    if (Mask[i] >= 0 && Mask[i] != i)
-      return false;
+  if (!isSequentialOrUndefInRange(Mask, 4, 4, 4))
+    return false;
 
   // Lower quadword shuffled.
-  for (int i = 0; i != 4; ++i)
+  for (unsigned i = 0; i != 4; ++i)
     if (Mask[i] >= 4)
       return false;
 
   return true;
 }
 
-bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return ::isPSHUFLWMask(M, N->getValueType(0));
-}
-
 /// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PALIGNR.
-static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                          bool hasSSSE3OrAVX) {
-  int i, e = VT.getVectorNumElements();
-  if (VT.getSizeInBits() != 128 && VT.getSizeInBits() != 64)
+static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
+                          const X86Subtarget *Subtarget) {
+  if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) ||
+      (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()))
     return false;
 
-  // Do not handle v2i64 / v2f64 shuffles with palignr.
-  if (e < 4 || !hasSSSE3OrAVX)
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumLanes = VT.getSizeInBits()/128;
+  unsigned NumLaneElts = NumElts/NumLanes;
+
+  // Do not handle 64-bit element shuffles with palignr.
+  if (NumLaneElts == 2)
     return false;
 
-  for (i = 0; i != e; ++i)
-    if (Mask[i] >= 0)
-      break;
+  for (unsigned l = 0; l != NumElts; l+=NumLaneElts) {
+    unsigned i;
+    for (i = 0; i != NumLaneElts; ++i) {
+      if (Mask[i+l] >= 0)
+        break;
+    }
 
-  // All undef, not a palignr.
-  if (i == e)
-    return false;
+    // Lane is all undef, go to next lane
+    if (i == NumLaneElts)
+      continue;
 
-  // Make sure we're shifting in the right direction.
-  if (Mask[i] <= i)
-    return false;
+    int Start = Mask[i+l];
 
-  int s = Mask[i] - i;
+    // Make sure its in this lane in one of the sources
+    if (!isUndefOrInRange(Start, l, l+NumLaneElts) &&
+        !isUndefOrInRange(Start, l+NumElts, l+NumElts+NumLaneElts))
+      return false;
 
-  // Check the rest of the elements to see if they are consecutive.
-  for (++i; i != e; ++i) {
-    int m = Mask[i];
-    if (m >= 0 && m != s+i)
+    // If not lane 0, then we must match lane 0
+    if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Start, Mask[i]+l))
       return false;
-  }
-  return true;
-}
 
-/// isVSHUFPSYMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 256-bit
-/// VSHUFPSY.
-static bool isVSHUFPSYMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                          const X86Subtarget *Subtarget) {
-  int NumElems = VT.getVectorNumElements();
+    // Correct second source to be contiguous with first source
+    if (Start >= (int)NumElts)
+      Start -= NumElts - NumLaneElts;
 
-  if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
-    return false;
+    // Make sure we're shifting in the right direction.
+    if (Start <= (int)(i+l))
+      return false;
 
-  if (NumElems != 8)
-    return false;
+    Start -= i;
 
-  // VSHUFPSY divides the resulting vector into 4 chunks.
-  // The sources are also splitted into 4 chunks, and each destination
-  // chunk must come from a different source chunk.
-  //
-  //  SRC1 =>   X7    X6    X5    X4    X3    X2    X1    X0
-  //  SRC2 =>   Y7    Y6    Y5    Y4    Y3    Y2    Y1    Y9
-  //
-  //  DST  =>  Y7..Y4,   Y7..Y4,   X7..X4,   X7..X4,
-  //           Y3..Y0,   Y3..Y0,   X3..X0,   X3..X0
-  //
-  int QuarterSize = NumElems/4;
-  int HalfSize = QuarterSize*2;
-  for (int i = 0; i < QuarterSize; ++i)
-    if (!isUndefOrInRange(Mask[i], 0, HalfSize))
-      return false;
-  for (int i = QuarterSize; i < QuarterSize*2; ++i)
-    if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
-      return false;
+    // Check the rest of the elements to see if they are consecutive.
+    for (++i; i != NumLaneElts; ++i) {
+      int Idx = Mask[i+l];
 
-  // The mask of the second half must be the same as the first but with
-  // the appropriate offsets. This works in the same way as VPERMILPS
-  // works with masks.
-  for (int i = QuarterSize*2; i < QuarterSize*3; ++i) {
-    if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
-      return false;
-    int FstHalfIdx = i-HalfSize;
-    if (Mask[FstHalfIdx] < 0)
-      continue;
-    if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
-      return false;
-  }
-  for (int i = QuarterSize*3; i < NumElems; ++i) {
-    if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
-      return false;
-    int FstHalfIdx = i-HalfSize;
-    if (Mask[FstHalfIdx] < 0)
-      continue;
-    if (!isUndefOrEqual(Mask[i], Mask[FstHalfIdx]+HalfSize))
-      return false;
+      // Make sure its in this lane
+      if (!isUndefOrInRange(Idx, l, l+NumLaneElts) &&
+          !isUndefOrInRange(Idx, l+NumElts, l+NumElts+NumLaneElts))
+        return false;
+
+      // If not lane 0, then we must match lane 0
+      if (l != 0 && Mask[i] >= 0 && !isUndefOrEqual(Idx, Mask[i]+l))
+        return false;
+
+      if (Idx >= (int)NumElts)
+        Idx -= NumElts - NumLaneElts;
 
+      if (!isUndefOrEqual(Idx, Start+i))
+        return false;
+
+    }
   }
 
   return true;
 }
 
-/// getShuffleVSHUFPSYImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VSHUFPSY instruction.
-static unsigned getShuffleVSHUFPSYImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  EVT VT = SVOp->getValueType(0);
-  int NumElems = VT.getVectorNumElements();
-
-  assert(NumElems == 8 && VT.getSizeInBits() == 256 &&
-         "Only supports v8i32 and v8f32 types");
-
-  int HalfSize = NumElems/2;
-  unsigned Mask = 0;
-  for (int i = 0; i != NumElems ; ++i) {
-    if (SVOp->getMaskElt(i) < 0)
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
+                                     unsigned NumElems) {
+  for (unsigned i = 0; i != NumElems; ++i) {
+    int idx = Mask[i];
+    if (idx < 0)
       continue;
-    // The mask of the first half must be equal to the second one.
-    unsigned Shamt = (i%HalfSize)*2;
-    unsigned Elt = SVOp->getMaskElt(i) % HalfSize;
-    Mask |= Elt << Shamt;
+    else if (idx < (int)NumElems)
+      Mask[i] = idx + NumElems;
+    else
+      Mask[i] = idx - NumElems;
   }
-
-  return Mask;
 }
 
-/// isVSHUFPDYMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 256-bit
-/// VSHUFPDY. This shuffle doesn't have the same restriction as the PS
-/// version and the mask of the second half isn't binded with the first
-/// one.
-static bool isVSHUFPDYMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                           const X86Subtarget *Subtarget) {
-  int NumElems = VT.getVectorNumElements();
-
-  if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to 128/256-bit
+/// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
+/// reverse of what x86 shuffles want.
+static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX,
+                        bool Commuted = false) {
+  if (!HasAVX && VT.getSizeInBits() == 256)
     return false;
 
-  if (NumElems != 4)
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned NumLanes = VT.getSizeInBits()/128;
+  unsigned NumLaneElems = NumElems/NumLanes;
+
+  if (NumLaneElems != 2 && NumLaneElems != 4)
     return false;
 
   // VSHUFPSY divides the resulting vector into 4 chunks.
   // The sources are also splitted into 4 chunks, and each destination
   // chunk must come from a different source chunk.
   //
+  //  SRC1 =>   X7    X6    X5    X4    X3    X2    X1    X0
+  //  SRC2 =>   Y7    Y6    Y5    Y4    Y3    Y2    Y1    Y9
+  //
+  //  DST  =>  Y7..Y4,   Y7..Y4,   X7..X4,   X7..X4,
+  //           Y3..Y0,   Y3..Y0,   X3..X0,   X3..X0
+  //
+  // VSHUFPDY divides the resulting vector into 4 chunks.
+  // The sources are also splitted into 4 chunks, and each destination
+  // chunk must come from a different source chunk.
+  //
   //  SRC1 =>      X3       X2       X1       X0
   //  SRC2 =>      Y3       Y2       Y1       Y0
   //
-  //  DST  =>  Y2..Y3,  X2..X3,  Y1..Y0,  X1..X0
+  //  DST  =>  Y3..Y2,  X3..X2,  Y1..Y0,  X1..X0
   //
-  int QuarterSize = NumElems/4;
-  int HalfSize = QuarterSize*2;
-  for (int i = 0; i < QuarterSize; ++i)
-    if (!isUndefOrInRange(Mask[i], 0, HalfSize))
-      return false;
-  for (int i = QuarterSize; i < QuarterSize*2; ++i)
-    if (!isUndefOrInRange(Mask[i], NumElems, NumElems+HalfSize))
-      return false;
-  for (int i = QuarterSize*2; i < QuarterSize*3; ++i)
-    if (!isUndefOrInRange(Mask[i], HalfSize, NumElems))
-      return false;
-  for (int i = QuarterSize*3; i < NumElems; ++i)
-    if (!isUndefOrInRange(Mask[i], NumElems+HalfSize, NumElems*2))
-      return false;
-
-  return true;
-}
-
-/// getShuffleVSHUFPDYImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VSHUFPDY instruction.
-static unsigned getShuffleVSHUFPDYImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  EVT VT = SVOp->getValueType(0);
-  int NumElems = VT.getVectorNumElements();
-
-  assert(NumElems == 4 && VT.getSizeInBits() == 256 &&
-         "Only supports v4i64 and v4f64 types");
-
-  int HalfSize = NumElems/2;
-  unsigned Mask = 0;
-  for (int i = 0; i != NumElems ; ++i) {
-    if (SVOp->getMaskElt(i) < 0)
-      continue;
-    int Elt = SVOp->getMaskElt(i) % HalfSize;
-    Mask |= Elt << i;
+  unsigned HalfLaneElems = NumLaneElems/2;
+  for (unsigned l = 0; l != NumElems; l += NumLaneElems) {
+    for (unsigned i = 0; i != NumLaneElems; ++i) {
+      int Idx = Mask[i+l];
+      unsigned RngStart = l + ((Commuted == (i<HalfLaneElems)) ? NumElems : 0);
+      if (!isUndefOrInRange(Idx, RngStart, RngStart+NumLaneElems))
+        return false;
+      // For VSHUFPSY, the mask of the second half must be the same as the
+      // first but with the appropriate offsets. This works in the same way as
+      // VPERMILPS works with masks.
+      if (NumElems != 8 || l == 0 || Mask[i] < 0)
+        continue;
+      if (!isUndefOrEqual(Idx, Mask[i]+l))
+        return false;
+    }
   }
 
-  return Mask;
-}
-
-/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to 128-bit
-/// SHUFPS and SHUFPD.
-static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
-  int NumElems = VT.getVectorNumElements();
-
-  if (VT.getSizeInBits() != 128)
-    return false;
-
-  if (NumElems != 2 && NumElems != 4)
-    return false;
-
-  int Half = NumElems / 2;
-  for (int i = 0; i < Half; ++i)
-    if (!isUndefOrInRange(Mask[i], 0, NumElems))
-      return false;
-  for (int i = Half; i < NumElems; ++i)
-    if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
-      return false;
-
-  return true;
-}
-
-bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return ::isSHUFPMask(M, N->getValueType(0));
-}
-
-/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
-/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
-/// half elements to come from vector 1 (which would equal the dest.) and
-/// the upper half to come from vector 2.
-static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
-  int NumElems = VT.getVectorNumElements();
-
-  if (NumElems != 2 && NumElems != 4)
-    return false;
-
-  int Half = NumElems / 2;
-  for (int i = 0; i < Half; ++i)
-    if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
-      return false;
-  for (int i = Half; i < NumElems; ++i)
-    if (!isUndefOrInRange(Mask[i], 0, NumElems))
-      return false;
   return true;
 }
 
-static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return isCommutedSHUFPMask(M, N->getValueType(0));
-}
-
 /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
-bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
-  EVT VT = N->getValueType(0);
+static bool isMOVHLPSMask(ArrayRef<int> Mask, EVT VT) {
   unsigned NumElems = VT.getVectorNumElements();
 
   if (VT.getSizeInBits() != 128)
@@ -3438,17 +3382,16 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
     return false;
 
   // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
-  return isUndefOrEqual(N->getMaskElt(0), 6) &&
-         isUndefOrEqual(N->getMaskElt(1), 7) &&
-         isUndefOrEqual(N->getMaskElt(2), 2) &&
-         isUndefOrEqual(N->getMaskElt(3), 3);
+  return isUndefOrEqual(Mask[0], 6) &&
+         isUndefOrEqual(Mask[1], 7) &&
+         isUndefOrEqual(Mask[2], 2) &&
+         isUndefOrEqual(Mask[3], 3);
 }
 
 /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
 /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
 /// <2, 3, 2, 3>
-bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
-  EVT VT = N->getValueType(0);
+static bool isMOVHLPS_v_undef_Mask(ArrayRef<int> Mask, EVT VT) {
   unsigned NumElems = VT.getVectorNumElements();
 
   if (VT.getSizeInBits() != 128)
@@ -3457,26 +3400,29 @@ bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
   if (NumElems != 4)
     return false;
 
-  return isUndefOrEqual(N->getMaskElt(0), 2) &&
-         isUndefOrEqual(N->getMaskElt(1), 3) &&
-         isUndefOrEqual(N->getMaskElt(2), 2) &&
-         isUndefOrEqual(N->getMaskElt(3), 3);
+  return isUndefOrEqual(Mask[0], 2) &&
+         isUndefOrEqual(Mask[1], 3) &&
+         isUndefOrEqual(Mask[2], 2) &&
+         isUndefOrEqual(Mask[3], 3);
 }
 
 /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
-bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
-  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+static bool isMOVLPMask(ArrayRef<int> Mask, EVT VT) {
+  if (VT.getSizeInBits() != 128)
+    return false;
+
+  unsigned NumElems = VT.getVectorNumElements();
 
   if (NumElems != 2 && NumElems != 4)
     return false;
 
-  for (unsigned i = 0; i < NumElems/2; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems))
+  for (unsigned i = 0; i != NumElems/2; ++i)
+    if (!isUndefOrEqual(Mask[i], i + NumElems))
       return false;
 
-  for (unsigned i = NumElems/2; i < NumElems; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i), i))
+  for (unsigned i = NumElems/2; i != NumElems; ++i)
+    if (!isUndefOrEqual(Mask[i], i))
       return false;
 
   return true;
@@ -3484,19 +3430,19 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
 
 /// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVLHPS.
-bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
-  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+static bool isMOVLHPSMask(ArrayRef<int> Mask, EVT VT) {
+  unsigned NumElems = VT.getVectorNumElements();
 
   if ((NumElems != 2 && NumElems != 4)
-      || N->getValueType(0).getSizeInBits() > 128)
+      || VT.getSizeInBits() > 128)
     return false;
 
-  for (unsigned i = 0; i < NumElems/2; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i), i))
+  for (unsigned i = 0; i != NumElems/2; ++i)
+    if (!isUndefOrEqual(Mask[i], i))
       return false;
 
-  for (unsigned i = 0; i < NumElems/2; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems))
+  for (unsigned i = 0; i != NumElems/2; ++i)
+    if (!isUndefOrEqual(Mask[i + NumElems/2], i + NumElems))
       return false;
 
   return true;
@@ -3504,14 +3450,15 @@ bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
 
 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
-static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                         bool V2IsSplat = false) {
-  int NumElts = VT.getVectorNumElements();
+static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
+                         bool HasAVX2, bool V2IsSplat = false) {
+  unsigned NumElts = VT.getVectorNumElements();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for unpckh");
 
-  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
+  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3519,11 +3466,9 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
   unsigned NumLanes = VT.getSizeInBits()/128;
   unsigned NumLaneElts = NumElts/NumLanes;
 
-  unsigned Start = 0;
-  unsigned End = NumLaneElts;
-  for (unsigned s = 0; s < NumLanes; ++s) {
-    for (unsigned i = Start, j = s * NumLaneElts;
-         i != End;
+  for (unsigned l = 0; l != NumLanes; ++l) {
+    for (unsigned i = l*NumLaneElts, j = l*NumLaneElts;
+         i != (l+1)*NumLaneElts;
          i += 2, ++j) {
       int BitI  = Mask[i];
       int BitI1 = Mask[i+1];
@@ -3537,30 +3482,22 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
           return false;
       }
     }
-    // Process the next 128 bits.
-    Start += NumLaneElts;
-    End += NumLaneElts;
   }
 
   return true;
 }
 
-bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat);
-}
-
 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to UNPCKH.
-static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                         bool V2IsSplat = false) {
-  int NumElts = VT.getVectorNumElements();
+static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
+                         bool HasAVX2, bool V2IsSplat = false) {
+  unsigned NumElts = VT.getVectorNumElements();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for unpckh");
 
-  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8)
+  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3568,11 +3505,9 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
   unsigned NumLanes = VT.getSizeInBits()/128;
   unsigned NumLaneElts = NumElts/NumLanes;
 
-  unsigned Start = 0;
-  unsigned End = NumLaneElts;
   for (unsigned l = 0; l != NumLanes; ++l) {
-    for (unsigned i = Start, j = (l*NumLaneElts)+NumLaneElts/2;
-                             i != End; i += 2, ++j) {
+    for (unsigned i = l*NumLaneElts, j = (l*NumLaneElts)+NumLaneElts/2;
+         i != (l+1)*NumLaneElts; i += 2, ++j) {
       int BitI  = Mask[i];
       int BitI1 = Mask[i+1];
       if (!isUndefOrEqual(BitI, j))
@@ -3585,42 +3520,39 @@ static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
           return false;
       }
     }
-    // Process the next 128 bits.
-    Start += NumLaneElts;
-    End += NumLaneElts;
   }
   return true;
 }
 
-bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat);
-}
-
 /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
 /// <0, 0, 1, 1>
-static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
-  int NumElems = VT.getVectorNumElements();
-  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
+                                  bool HasAVX2) {
+  unsigned NumElts = VT.getVectorNumElements();
+
+  assert((VT.is128BitVector() || VT.is256BitVector()) &&
+         "Unsupported vector type for unpckh");
+
+  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
   // FIXME: Need a better way to get rid of this, there's no latency difference
   // between UNPCKLPD and MOVDDUP, the later should always be checked first and
   // the former later. We should also remove the "_undef" special mask.
-  if (NumElems == 4 && VT.getSizeInBits() == 256)
+  if (NumElts == 4 && VT.getSizeInBits() == 256)
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
   // independently on 128-bit lanes.
-  unsigned NumLanes = VT.getSizeInBits() / 128;
-  unsigned NumLaneElts = NumElems / NumLanes;
+  unsigned NumLanes = VT.getSizeInBits()/128;
+  unsigned NumLaneElts = NumElts/NumLanes;
 
-  for (unsigned s = 0; s < NumLanes; ++s) {
-    for (unsigned i = s * NumLaneElts, j = s * NumLaneElts;
-         i != NumLaneElts * (s + 1);
+  for (unsigned l = 0; l != NumLanes; ++l) {
+    for (unsigned i = l*NumLaneElts, j = l*NumLaneElts;
+         i != (l+1)*NumLaneElts;
          i += 2, ++j) {
       int BitI  = Mask[i];
       int BitI1 = Mask[i+1];
@@ -3635,81 +3567,77 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
   return true;
 }
 
-bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0));
-}
-
 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
 /// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
-  int NumElems = VT.getVectorNumElements();
-  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
+  unsigned NumElts = VT.getVectorNumElements();
+
+  assert((VT.is128BitVector() || VT.is256BitVector()) &&
+         "Unsupported vector type for unpckh");
+
+  if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
+      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
     return false;
 
-  for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
-    int BitI  = Mask[i];
-    int BitI1 = Mask[i+1];
-    if (!isUndefOrEqual(BitI, j))
-      return false;
-    if (!isUndefOrEqual(BitI1, j))
-      return false;
+  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
+  // independently on 128-bit lanes.
+  unsigned NumLanes = VT.getSizeInBits()/128;
+  unsigned NumLaneElts = NumElts/NumLanes;
+
+  for (unsigned l = 0; l != NumLanes; ++l) {
+    for (unsigned i = l*NumLaneElts, j = (l*NumLaneElts)+NumLaneElts/2;
+         i != (l+1)*NumLaneElts; i += 2, ++j) {
+      int BitI  = Mask[i];
+      int BitI1 = Mask[i+1];
+      if (!isUndefOrEqual(BitI, j))
+        return false;
+      if (!isUndefOrEqual(BitI1, j))
+        return false;
+    }
   }
   return true;
 }
 
-bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0));
-}
-
 /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVSS,
 /// MOVSD, and MOVD, i.e. setting the lowest element.
-static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
   if (VT.getVectorElementType().getSizeInBits() < 32)
     return false;
+  if (VT.getSizeInBits() == 256)
+    return false;
 
-  int NumElts = VT.getVectorNumElements();
+  unsigned NumElts = VT.getVectorNumElements();
 
   if (!isUndefOrEqual(Mask[0], NumElts))
     return false;
 
-  for (int i = 1; i < NumElts; ++i)
+  for (unsigned i = 1; i != NumElts; ++i)
     if (!isUndefOrEqual(Mask[i], i))
       return false;
 
   return true;
 }
 
-bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return ::isMOVLMask(M, N->getValueType(0));
-}
-
-/// isVPERM2F128Mask - Match 256-bit shuffles where the elements are considered
+/// isVPERM2X128Mask - Match 256-bit shuffles where the elements are considered
 /// as permutations between 128-bit chunks or halves. As an example: this
 /// shuffle bellow:
 ///   vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
 /// The first half comes from the second half of V1 and the second half from the
 /// the second half of V2.
-static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
-                             const X86Subtarget *Subtarget) {
-  if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256)
+static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
+  if (!HasAVX || VT.getSizeInBits() != 256)
     return false;
 
   // The shuffle result is divided into half A and half B. In total the two
   // sources have 4 halves, namely: C, D, E, F. The final values of A and
   // B must come from C, D, E or F.
-  int HalfSize = VT.getVectorNumElements()/2;
+  unsigned HalfSize = VT.getVectorNumElements()/2;
   bool MatchA = false, MatchB = false;
 
   // Check if A comes from one of C, D, E, F.
-  for (int Half = 0; Half < 4; ++Half) {
+  for (unsigned Half = 0; Half != 4; ++Half) {
     if (isSequentialOrUndefInRange(Mask, 0, HalfSize, Half*HalfSize)) {
       MatchA = true;
       break;
@@ -3717,7 +3645,7 @@ static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
   }
 
   // Check if B comes from one of C, D, E, F.
-  for (int Half = 0; Half < 4; ++Half) {
+  for (unsigned Half = 0; Half != 4; ++Half) {
     if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, Half*HalfSize)) {
       MatchB = true;
       break;
@@ -3727,22 +3655,21 @@ static bool isVPERM2F128Mask(const SmallVectorImpl<int> &Mask, EVT VT,
   return MatchA && MatchB;
 }
 
-/// getShuffleVPERM2F128Immediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERM2F128 instructions.
-static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+/// getShuffleVPERM2X128Immediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_MASK mask with VPERM2F128/VPERM2I128 instructions.
+static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
   EVT VT = SVOp->getValueType(0);
 
-  int HalfSize = VT.getVectorNumElements()/2;
+  unsigned HalfSize = VT.getVectorNumElements()/2;
 
-  int FstHalf = 0, SndHalf = 0;
-  for (int i = 0; i < HalfSize; ++i) {
+  unsigned FstHalf = 0, SndHalf = 0;
+  for (unsigned i = 0; i < HalfSize; ++i) {
     if (SVOp->getMaskElt(i) > 0) {
       FstHalf = SVOp->getMaskElt(i)/HalfSize;
       break;
     }
   }
-  for (int i = HalfSize; i < HalfSize*2; ++i) {
+  for (unsigned i = HalfSize; i < HalfSize*2; ++i) {
     if (SVOp->getMaskElt(i) > 0) {
       SndHalf = SVOp->getMaskElt(i)/HalfSize;
       break;
@@ -3752,141 +3679,56 @@ static unsigned getShuffleVPERM2F128Immediate(SDNode *N) {
   return (FstHalf | (SndHalf << 4));
 }
 
-/// isVPERMILPDMask - Return true if the specified VECTOR_SHUFFLE operand
+/// isVPERMILPMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to VPERMILPD*.
 /// Note that VPERMIL mask matching is different depending whether theunderlying
 /// type is 32 or 64. In the VPERMILPS the high half of the mask should point
 /// to the same elements of the low, but to the higher half of the source.
 /// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPDMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                            const X86Subtarget *Subtarget) {
-  int NumElts = VT.getVectorNumElements();
-  int NumLanes = VT.getSizeInBits()/128;
-
-  if (!Subtarget->hasAVX())
-    return false;
-
-  // Only match 256-bit with 64-bit types
-  if (VT.getSizeInBits() != 256 || NumElts != 4)
+/// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
+static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
+  if (!HasAVX)
     return false;
 
-  // The mask on the high lane is independent of the low. Both can match
-  // any element in inside its own lane, but can't cross.
-  int LaneSize = NumElts/NumLanes;
-  for (int l = 0; l < NumLanes; ++l)
-    for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
-      int LaneStart = l*LaneSize;
-      if (!isUndefOrInRange(Mask[i], LaneStart, LaneStart+LaneSize))
-        return false;
-    }
-
-  return true;
-}
-
-/// isVPERMILPSMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to VPERMILPS*.
-/// Note that VPERMIL mask matching is different depending whether theunderlying
-/// type is 32 or 64. In the VPERMILPS the high half of the mask should point
-/// to the same elements of the low, but to the higher half of the source.
-/// In VPERMILPD the two lanes could be shuffled independently of each other
-/// with the same restriction that lanes can't be crossed.
-static bool isVPERMILPSMask(const SmallVectorImpl<int> &Mask, EVT VT,
-                            const X86Subtarget *Subtarget) {
   unsigned NumElts = VT.getVectorNumElements();
-  unsigned NumLanes = VT.getSizeInBits()/128;
-
-  if (!Subtarget->hasAVX())
+  // Only match 256-bit with 32/64-bit types
+  if (VT.getSizeInBits() != 256 || (NumElts != 4 && NumElts != 8))
     return false;
 
-  // Only match 256-bit with 32-bit types
-  if (VT.getSizeInBits() != 256 || NumElts != 8)
-    return false;
-
-  // The mask on the high lane should be the same as the low. Actually,
-  // they can differ if any of the corresponding index in a lane is undef
-  // and the other stays in range.
-  int LaneSize = NumElts/NumLanes;
-  for (int i = 0; i < LaneSize; ++i) {
-    int HighElt = i+LaneSize;
-    bool HighValid = isUndefOrInRange(Mask[HighElt], LaneSize, NumElts);
-    bool LowValid = isUndefOrInRange(Mask[i], 0, LaneSize);
-
-    if (!HighValid || !LowValid)
-      return false;
-    if (Mask[i] < 0 || Mask[HighElt] < 0)
-      continue;
-    if (Mask[HighElt]-Mask[i] != LaneSize)
-      return false;
-  }
-
-  return true;
-}
-
-/// getShuffleVPERMILPSImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPS* instructions.
-static unsigned getShuffleVPERMILPSImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  EVT VT = SVOp->getValueType(0);
-
-  int NumElts = VT.getVectorNumElements();
-  int NumLanes = VT.getSizeInBits()/128;
-  int LaneSize = NumElts/NumLanes;
-
-  // Although the mask is equal for both lanes do it twice to get the cases
-  // where a mask will match because the same mask element is undef on the
-  // first half but valid on the second. This would get pathological cases
-  // such as: shuffle <u, 0, 1, 2, 4, 4, 5, 6>, which is completely valid.
-  unsigned Mask = 0;
-  for (int l = 0; l < NumLanes; ++l) {
-    for (int i = 0; i < LaneSize; ++i) {
-      int MaskElt = SVOp->getMaskElt(i+(l*LaneSize));
-      if (MaskElt < 0)
+  unsigned NumLanes = VT.getSizeInBits()/128;
+  unsigned LaneSize = NumElts/NumLanes;
+  for (unsigned l = 0; l != NumElts; l += LaneSize) {
+    for (unsigned i = 0; i != LaneSize; ++i) {
+      if (!isUndefOrInRange(Mask[i+l], l, l+LaneSize))
+        return false;
+      if (NumElts != 8 || l == 0)
         continue;
-      if (MaskElt >= LaneSize)
-        MaskElt -= LaneSize;
-      Mask |= MaskElt << (i*2);
-    }
-  }
-
-  return Mask;
-}
-
-/// getShuffleVPERMILPDImmediate - Return the appropriate immediate to shuffle
-/// the specified VECTOR_MASK mask with VPERMILPD* instructions.
-static unsigned getShuffleVPERMILPDImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  EVT VT = SVOp->getValueType(0);
-
-  int NumElts = VT.getVectorNumElements();
-  int NumLanes = VT.getSizeInBits()/128;
-
-  unsigned Mask = 0;
-  int LaneSize = NumElts/NumLanes;
-  for (int l = 0; l < NumLanes; ++l)
-    for (int i = l*LaneSize; i < LaneSize*(l+1); ++i) {
-      int MaskElt = SVOp->getMaskElt(i);
-      if (MaskElt < 0)
+      // VPERMILPS handling
+      if (Mask[i] < 0)
         continue;
-      Mask |= (MaskElt-l*LaneSize) << i;
+      if (!isUndefOrEqual(Mask[i+l], Mask[i]+l))
+        return false;
     }
+  }
 
-  return Mask;
+  return true;
 }
 
-/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
+/// isCommutedMOVLMask - Returns true if the shuffle mask is except the reverse
 /// of what x86 movss want. X86 movs requires the lowest  element to be lowest
 /// element of vector 2 and the other elements to come from vector 1 in order.
-static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
+static bool isCommutedMOVLMask(ArrayRef<int> Mask, EVT VT,
                                bool V2IsSplat = false, bool V2IsUndef = false) {
-  int NumOps = VT.getVectorNumElements();
+  unsigned NumOps = VT.getVectorNumElements();
+  if (VT.getSizeInBits() == 256)
+    return false;
   if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
     return false;
 
   if (!isUndefOrEqual(Mask[0], 0))
     return false;
 
-  for (int i = 1; i < NumOps; ++i)
+  for (unsigned i = 1; i != NumOps; ++i)
     if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
           (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
           (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
@@ -3895,26 +3737,14 @@ static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
   return true;
 }
 
-static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
-                           bool V2IsUndef = false) {
-  SmallVector<int, 8> M;
-  N->getMask(M);
-  return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef);
-}
-
 /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
 /// Masks to match: <1, 1, 3, 3> or <1, 1, 3, 3, 5, 5, 7, 7>
-bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
-                         const X86Subtarget *Subtarget) {
-  if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
-    return false;
-
-  // The second vector must be undef
-  if (N->getOperand(1).getOpcode() != ISD::UNDEF)
+static bool isMOVSHDUPMask(ArrayRef<int> Mask, EVT VT,
+                           const X86Subtarget *Subtarget) {
+  if (!Subtarget->hasSSE3())
     return false;
 
-  EVT VT = N->getValueType(0);
   unsigned NumElems = VT.getVectorNumElements();
 
   if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
@@ -3922,9 +3752,9 @@ bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
     return false;
 
   // "i+1" is the value the indexed mask element must have
-  for (unsigned i = 0; i < NumElems; i += 2)
-    if (!isUndefOrEqual(N->getMaskElt(i), i+1) ||
-        !isUndefOrEqual(N->getMaskElt(i+1), i+1))
+  for (unsigned i = 0; i != NumElems; i += 2)
+    if (!isUndefOrEqual(Mask[i], i+1) ||
+        !isUndefOrEqual(Mask[i+1], i+1))
       return false;
 
   return true;
@@ -3933,16 +3763,11 @@ bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N,
 /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
 /// Masks to match: <0, 0, 2, 2> or <0, 0, 2, 2, 4, 4, 6, 6>
-bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
-                         const X86Subtarget *Subtarget) {
-  if (!Subtarget->hasSSE3() && !Subtarget->hasAVX())
-    return false;
-
-  // The second vector must be undef
-  if (N->getOperand(1).getOpcode() != ISD::UNDEF)
+static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
+                           const X86Subtarget *Subtarget) {
+  if (!Subtarget->hasSSE3())
     return false;
 
-  EVT VT = N->getValueType(0);
   unsigned NumElems = VT.getVectorNumElements();
 
   if ((VT.getSizeInBits() == 128 && NumElems != 4) ||
@@ -3950,9 +3775,9 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
     return false;
 
   // "i" is the value the indexed mask element must have
-  for (unsigned i = 0; i < NumElems; i += 2)
-    if (!isUndefOrEqual(N->getMaskElt(i), i) ||
-        !isUndefOrEqual(N->getMaskElt(i+1), i))
+  for (unsigned i = 0; i != NumElems; i += 2)
+    if (!isUndefOrEqual(Mask[i], i) ||
+        !isUndefOrEqual(Mask[i+1], i))
       return false;
 
   return true;
@@ -3961,21 +3786,17 @@ bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N,
 /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to 256-bit
 /// version of MOVDDUP.
-static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
-                           const X86Subtarget *Subtarget) {
-  EVT VT = N->getValueType(0);
-  int NumElts = VT.getVectorNumElements();
-  bool V2IsUndef = N->getOperand(1).getOpcode() == ISD::UNDEF;
+static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
+  unsigned NumElts = VT.getVectorNumElements();
 
-  if (!Subtarget->hasAVX() || VT.getSizeInBits() != 256 ||
-      !V2IsUndef || NumElts != 4)
+  if (!HasAVX || VT.getSizeInBits() != 256 || NumElts != 4)
     return false;
 
-  for (int i = 0; i != NumElts/2; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i), 0))
+  for (unsigned i = 0; i != NumElts/2; ++i)
+    if (!isUndefOrEqual(Mask[i], 0))
       return false;
-  for (int i = NumElts/2; i != NumElts; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i), NumElts/2))
+  for (unsigned i = NumElts/2; i != NumElts; ++i)
+    if (!isUndefOrEqual(Mask[i], NumElts/2))
       return false;
   return true;
 }
@@ -3983,18 +3804,16 @@ static bool isMOVDDUPYMask(ShuffleVectorSDNode *N,
 /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to 128-bit
 /// version of MOVDDUP.
-bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
-  EVT VT = N->getValueType(0);
-
+static bool isMOVDDUPMask(ArrayRef<int> Mask, EVT VT) {
   if (VT.getSizeInBits() != 128)
     return false;
 
-  int e = VT.getVectorNumElements() / 2;
-  for (int i = 0; i < e; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(i), i))
+  unsigned e = VT.getVectorNumElements() / 2;
+  for (unsigned i = 0; i != e; ++i)
+    if (!isUndefOrEqual(Mask[i], i))
       return false;
-  for (int i = 0; i < e; ++i)
-    if (!isUndefOrEqual(N->getMaskElt(e+i), i))
+  for (unsigned i = 0; i != e; ++i)
+    if (!isUndefOrEqual(Mask[e+i], i))
       return false;
   return true;
 }
@@ -4039,31 +3858,43 @@ bool X86::isVINSERTF128Index(SDNode *N) {
 
 /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
 /// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
-unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  int NumOperands = SVOp->getValueType(0).getVectorNumElements();
+/// Handles 128-bit and 256-bit.
+static unsigned getShuffleSHUFImmediate(ShuffleVectorSDNode *N) {
+  EVT VT = N->getValueType(0);
+
+  assert((VT.is128BitVector() || VT.is256BitVector()) &&
+         "Unsupported vector type for PSHUF/SHUFP");
 
-  unsigned Shift = (NumOperands == 4) ? 2 : 1;
+  // Handle 128 and 256-bit vector lengths. AVX defines PSHUF/SHUFP to operate
+  // independently on 128-bit lanes.
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumLanes = VT.getSizeInBits()/128;
+  unsigned NumLaneElts = NumElts/NumLanes;
+
+  assert((NumLaneElts == 2 || NumLaneElts == 4) &&
+         "Only supports 2 or 4 elements per lane");
+
+  unsigned Shift = (NumLaneElts == 4) ? 1 : 0;
   unsigned Mask = 0;
-  for (int i = 0; i < NumOperands; ++i) {
-    int Val = SVOp->getMaskElt(NumOperands-i-1);
-    if (Val < 0) Val = 0;
-    if (Val >= NumOperands) Val -= NumOperands;
-    Mask |= Val;
-    if (i != NumOperands - 1)
-      Mask <<= Shift;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    int Elt = N->getMaskElt(i);
+    if (Elt < 0) continue;
+    Elt %= NumLaneElts;
+    unsigned ShAmt = i << Shift;
+    if (ShAmt >= 8) ShAmt -= 8;
+    Mask |= Elt << ShAmt;
   }
+
   return Mask;
 }
 
 /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
 /// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
-unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+static unsigned getShufflePSHUFHWImmediate(ShuffleVectorSDNode *N) {
   unsigned Mask = 0;
   // 8 nodes, but we only care about the last 4.
   for (unsigned i = 7; i >= 4; --i) {
-    int Val = SVOp->getMaskElt(i);
+    int Val = N->getMaskElt(i);
     if (Val >= 0)
       Mask |= (Val - 4);
     if (i != 4)
@@ -4074,12 +3905,11 @@ unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
 
 /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
 /// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
-unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+static unsigned getShufflePSHUFLWImmediate(ShuffleVectorSDNode *N) {
   unsigned Mask = 0;
   // 8 nodes, but we only care about the first 4.
   for (int i = 3; i >= 0; --i) {
-    int Val = SVOp->getMaskElt(i);
+    int Val = N->getMaskElt(i);
     if (Val >= 0)
       Mask |= Val;
     if (i != 0)
@@ -4090,18 +3920,24 @@ unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
 
 /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
 /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
-unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  EVT VVT = N->getValueType(0);
-  unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
-  int Val = 0;
+static unsigned getShufflePALIGNRImmediate(ShuffleVectorSDNode *SVOp) {
+  EVT VT = SVOp->getValueType(0);
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits() >> 3;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumLanes = VT.getSizeInBits()/128;
+  unsigned NumLaneElts = NumElts/NumLanes;
 
-  unsigned i, e;
-  for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
+  int Val = 0;
+  unsigned i;
+  for (i = 0; i != NumElts; ++i) {
     Val = SVOp->getMaskElt(i);
     if (Val >= 0)
       break;
   }
+  if (Val >= (int)NumElts)
+    Val -= NumElts - NumLaneElts;
+
   assert(Val - i > 0 && "PALIGNR imm should be positive");
   return (Val - i) * EltSize;
 }
@@ -4170,36 +4006,20 @@ static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
                               SVOp->getOperand(0), &MaskVec[0]);
 }
 
-/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
-/// the two vector operands have swapped position.
-static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
-  unsigned NumElems = VT.getVectorNumElements();
-  for (unsigned i = 0; i != NumElems; ++i) {
-    int idx = Mask[i];
-    if (idx < 0)
-      continue;
-    else if (idx < (int)NumElems)
-      Mask[i] = idx + NumElems;
-    else
-      Mask[i] = idx - NumElems;
-  }
-}
-
 /// ShouldXformToMOVHLPS - Return true if the node should be transformed to
 /// match movhlps. The lower half elements should come from upper half of
 /// V1 (and in order), and the upper half elements should come from the upper
 /// half of V2 (and in order).
-static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
-  EVT VT = Op->getValueType(0);
+static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, EVT VT) {
   if (VT.getSizeInBits() != 128)
     return false;
   if (VT.getVectorNumElements() != 4)
     return false;
   for (unsigned i = 0, e = 2; i != e; ++i)
-    if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
+    if (!isUndefOrEqual(Mask[i], i+2))
       return false;
   for (unsigned i = 2; i != 4; ++i)
-    if (!isUndefOrEqual(Op->getMaskElt(i), i+4))
+    if (!isUndefOrEqual(Mask[i], i+4))
       return false;
   return true;
 }
@@ -4218,14 +4038,36 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
   return true;
 }
 
+// Test whether the given value is a vector value which will be legalized
+// into a load.
+static bool WillBeConstantPoolLoad(SDNode *N) {
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  // Check for any non-constant elements.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    switch (N->getOperand(i).getNode()->getOpcode()) {
+    case ISD::UNDEF:
+    case ISD::ConstantFP:
+    case ISD::Constant:
+      break;
+    default:
+      return false;
+    }
+
+  // Vectors of all-zeros and all-ones are materialized with special
+  // instructions rather than being loaded.
+  return !ISD::isBuildVectorAllZeros(N) &&
+         !ISD::isBuildVectorAllOnes(N);
+}
+
 /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
 /// match movlp{s|d}. The lower half elements should come from lower half of
 /// V1 (and in order), and the upper half elements should come from the upper
 /// half of V2 (and in order). And since V1 will become the source of the
 /// MOVLP, it must be either a vector load or a scalar load to vector.
 static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
-                               ShuffleVectorSDNode *Op) {
-  EVT VT = Op->getValueType(0);
+                               ArrayRef<int> Mask, EVT VT) {
   if (VT.getSizeInBits() != 128)
     return false;
 
@@ -4233,7 +4075,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
     return false;
   // Is V2 is a vector load, don't do this transformation. We will try to use
   // load folding shufps op.
-  if (ISD::isNON_EXTLoad(V2))
+  if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2))
     return false;
 
   unsigned NumElems = VT.getVectorNumElements();
@@ -4241,10 +4083,10 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
   if (NumElems != 2 && NumElems != 4)
     return false;
   for (unsigned i = 0, e = NumElems/2; i != e; ++i)
-    if (!isUndefOrEqual(Op->getMaskElt(i), i))
+    if (!isUndefOrEqual(Mask[i], i))
       return false;
   for (unsigned i = NumElems/2; i != NumElems; ++i)
-    if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems))
+    if (!isUndefOrEqual(Mask[i], i+NumElems))
       return false;
   return true;
 }
@@ -4292,15 +4134,15 @@ static bool isZeroShuffle(ShuffleVectorSDNode *N) {
 
 /// getZeroVector - Returns a vector of specified type with all zero elements.
 ///
-static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG,
-                             DebugLoc dl) {
+static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
+                             SelectionDAG &DAG, DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
   // Always build SSE zero vectors as <4 x i32> bitcasted
   // to their dest type. This ensures they get CSE'd.
   SDValue Vec;
   if (VT.getSizeInBits() == 128) {  // SSE
-    if (HasXMMInt) {  // SSE2
+    if (Subtarget->hasSSE2()) {  // SSE2
       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
     } else { // SSE1
@@ -4308,34 +4150,46 @@ static SDValue getZeroVector(EVT VT, bool HasXMMInt, SelectionDAG &DAG,
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
     }
   } else if (VT.getSizeInBits() == 256) { // AVX
-    // 256-bit logic and arithmetic instructions in AVX are
-    // all floating-point, no support for integer ops. Default
-    // to emitting fp zeroed vectors then.
-    SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
-    SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
-    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+    if (Subtarget->hasAVX2()) { // AVX2
+      SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+      SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+    } else {
+      // 256-bit logic and arithmetic instructions in AVX are all
+      // floating-point, no support for integer ops. Emit fp zeroed vectors.
+      SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+      SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+    }
   }
   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
 }
 
 /// getOnesVector - Returns a vector of specified type with all bits set.
-/// Always build ones vectors as <4 x i32>. For 256-bit types, use two
-/// <4 x i32> inserted in a <8 x i32> appropriately. Then bitcast to their
-/// original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+/// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
+/// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
+/// Then bitcast to their original type, ensuring they get CSE'd.
+static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG,
+                             DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
   assert((VT.is128BitVector() || VT.is256BitVector())
          && "Expected a 128-bit or 256-bit vector type");
 
   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
-  SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
-                            Cst, Cst, Cst, Cst);
-
-  if (VT.is256BitVector()) {
-    SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
-                              Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
-    Vec = Insert128BitVector(InsV, Vec,
-                  DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
+  SDValue Vec;
+  if (VT.getSizeInBits() == 256) {
+    if (HasAVX2) { // AVX2
+      SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+    } else { // AVX
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+      SDValue InsV = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, MVT::v8i32),
+                                Vec, DAG.getConstant(0, MVT::i32), DAG, dl);
+      Vec = Insert128BitVector(InsV, Vec,
+                    DAG.getConstant(4 /* NumElems/2 */, MVT::i32), DAG, dl);
+    }
+  } else {
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
   }
 
   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
@@ -4343,24 +4197,12 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
 
 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
 /// that point to V2 points to its first element.
-static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
-  EVT VT = SVOp->getValueType(0);
-  unsigned NumElems = VT.getVectorNumElements();
-
-  bool Changed = false;
-  SmallVector<int, 8> MaskVec;
-  SVOp->getMask(MaskVec);
-
+static void NormalizeMask(SmallVectorImpl<int> &Mask, unsigned NumElems) {
   for (unsigned i = 0; i != NumElems; ++i) {
-    if (MaskVec[i] > (int)NumElems) {
-      MaskVec[i] = NumElems;
-      Changed = true;
+    if (Mask[i] > (int)NumElems) {
+      Mask[i] = NumElems;
     }
   }
-  if (Changed)
-    return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
-                                SVOp->getOperand(1), &MaskVec[0]);
-  return SDValue(SVOp, 0);
 }
 
 /// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
@@ -4464,7 +4306,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
   // Extract the 128-bit part containing the splat element and update
   // the splat element index when it refers to the higher register.
   if (Size == 256) {
-    unsigned Idx = (EltNo > NumElems/2) ? NumElems/2 : 0;
+    unsigned Idx = (EltNo >= NumElems/2) ? NumElems/2 : 0;
     V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl);
     if (Idx > 0)
       EltNo -= NumElems/2;
@@ -4496,11 +4338,12 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
 /// element of V2 is swizzled into the zero/undef vector, landing at element
 /// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
-                                           bool isZero, bool HasXMMInt,
+                                           bool IsZero,
+                                           const X86Subtarget *Subtarget,
                                            SelectionDAG &DAG) {
   EVT VT = V2.getValueType();
-  SDValue V1 = isZero
-    ? getZeroVector(VT, HasXMMInt, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+  SDValue V1 = IsZero
+    ? getZeroVector(VT, Subtarget, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
   unsigned NumElems = VT.getVectorNumElements();
   SmallVector<int, 16> MaskVec;
   for (unsigned i = 0; i != NumElems; ++i)
@@ -4509,9 +4352,81 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
   return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
 }
 
+/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
+/// target specific opcode. Returns true if the Mask could be calculated.
+/// Sets IsUnary to true if only uses one source.
+static bool getTargetShuffleMask(SDNode *N, EVT VT,
+                                 SmallVectorImpl<int> &Mask, bool &IsUnary) {
+  unsigned NumElems = VT.getVectorNumElements();
+  SDValue ImmN;
+
+  IsUnary = false;
+  switch(N->getOpcode()) {
+  case X86ISD::SHUFP:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    break;
+  case X86ISD::UNPCKH:
+    DecodeUNPCKHMask(VT, Mask);
+    break;
+  case X86ISD::UNPCKL:
+    DecodeUNPCKLMask(VT, Mask);
+    break;
+  case X86ISD::MOVHLPS:
+    DecodeMOVHLPSMask(NumElems, Mask);
+    break;
+  case X86ISD::MOVLHPS:
+    DecodeMOVLHPSMask(NumElems, Mask);
+    break;
+  case X86ISD::PSHUFD:
+  case X86ISD::VPERMILP:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    IsUnary = true;
+    break;
+  case X86ISD::PSHUFHW:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    IsUnary = true;
+    break;
+  case X86ISD::PSHUFLW:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    IsUnary = true;
+    break;
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD: {
+    // The index 0 always comes from the first element of the second source,
+    // this is why MOVSS and MOVSD are used in the first place. The other
+    // elements come from the other positions of the first source vector
+    Mask.push_back(NumElems);
+    for (unsigned i = 1; i != NumElems; ++i) {
+      Mask.push_back(i);
+    }
+    break;
+  }
+  case X86ISD::VPERM2X128:
+    ImmN = N->getOperand(N->getNumOperands()-1);
+    DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    break;
+  case X86ISD::MOVDDUP:
+  case X86ISD::MOVLHPD:
+  case X86ISD::MOVLPD:
+  case X86ISD::MOVLPS:
+  case X86ISD::MOVSHDUP:
+  case X86ISD::MOVSLDUP:
+  case X86ISD::PALIGN:
+    // Not yet implemented
+    return false;
+  default: llvm_unreachable("unknown target shuffle node");
+  }
+
+  return true;
+}
+
 /// getShuffleScalarElt - Returns the scalar element that will make up the ith
 /// element of the result of the vector shuffle.
-static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
                                    unsigned Depth) {
   if (Depth == 6)
     return SDValue();  // Limit search depth.
@@ -4522,129 +4437,34 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
 
   // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
   if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
-    Index = SV->getMaskElt(Index);
+    int Elt = SV->getMaskElt(Index);
 
-    if (Index < 0)
+    if (Elt < 0)
       return DAG.getUNDEF(VT.getVectorElementType());
 
-    int NumElems = VT.getVectorNumElements();
-    SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
-    return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+    unsigned NumElems = VT.getVectorNumElements();
+    SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
+                                         : SV->getOperand(1);
+    return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
   }
 
   // Recurse into target specific vector shuffles to find scalars.
   if (isTargetShuffle(Opcode)) {
-    int NumElems = VT.getVectorNumElements();
-    SmallVector<unsigned, 16> ShuffleMask;
+    unsigned NumElems = VT.getVectorNumElements();
+    SmallVector<int, 16> ShuffleMask;
     SDValue ImmN;
+    bool IsUnary;
 
-    switch(Opcode) {
-    case X86ISD::SHUFPS:
-    case X86ISD::SHUFPD:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeSHUFPSMask(NumElems,
-                       cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                       ShuffleMask);
-      break;
-    case X86ISD::PUNPCKHBW:
-    case X86ISD::PUNPCKHWD:
-    case X86ISD::PUNPCKHDQ:
-    case X86ISD::PUNPCKHQDQ:
-      DecodePUNPCKHMask(NumElems, ShuffleMask);
-      break;
-    case X86ISD::UNPCKHPS:
-    case X86ISD::UNPCKHPD:
-    case X86ISD::VUNPCKHPSY:
-    case X86ISD::VUNPCKHPDY:
-      DecodeUNPCKHPMask(NumElems, ShuffleMask);
-      break;
-    case X86ISD::PUNPCKLBW:
-    case X86ISD::PUNPCKLWD:
-    case X86ISD::PUNPCKLDQ:
-    case X86ISD::PUNPCKLQDQ:
-      DecodePUNPCKLMask(VT, ShuffleMask);
-      break;
-    case X86ISD::UNPCKLPS:
-    case X86ISD::UNPCKLPD:
-    case X86ISD::VUNPCKLPSY:
-    case X86ISD::VUNPCKLPDY:
-      DecodeUNPCKLPMask(VT, ShuffleMask);
-      break;
-    case X86ISD::MOVHLPS:
-      DecodeMOVHLPSMask(NumElems, ShuffleMask);
-      break;
-    case X86ISD::MOVLHPS:
-      DecodeMOVLHPSMask(NumElems, ShuffleMask);
-      break;
-    case X86ISD::PSHUFD:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodePSHUFMask(NumElems,
-                      cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                      ShuffleMask);
-      break;
-    case X86ISD::PSHUFHW:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::PSHUFLW:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::MOVSS:
-    case X86ISD::MOVSD: {
-      // The index 0 always comes from the first element of the second source,
-      // this is why MOVSS and MOVSD are used in the first place. The other
-      // elements come from the other positions of the first source vector.
-      unsigned OpNum = (Index == 0) ? 1 : 0;
-      return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
-                                 Depth+1);
-    }
-    case X86ISD::VPERMILPS:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeVPERMILPSMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::VPERMILPSY:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeVPERMILPSMask(8, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::VPERMILPD:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeVPERMILPDMask(2, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::VPERMILPDY:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeVPERMILPDMask(4, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                        ShuffleMask);
-      break;
-    case X86ISD::VPERM2F128:
-      ImmN = N->getOperand(N->getNumOperands()-1);
-      DecodeVPERM2F128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(),
-                           ShuffleMask);
-      break;
-    case X86ISD::MOVDDUP:
-    case X86ISD::MOVLHPD:
-    case X86ISD::MOVLPD:
-    case X86ISD::MOVLPS:
-    case X86ISD::MOVSHDUP:
-    case X86ISD::MOVSLDUP:
-    case X86ISD::PALIGN:
-      return SDValue(); // Not yet implemented.
-    default:
-      assert(0 && "unknown target shuffle node");
+    if (!getTargetShuffleMask(N, VT, ShuffleMask, IsUnary))
       return SDValue();
-    }
 
-    Index = ShuffleMask[Index];
-    if (Index < 0)
+    int Elt = ShuffleMask[Index];
+    if (Elt < 0)
       return DAG.getUNDEF(VT.getVectorElementType());
 
-    SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
-    return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+    SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
+                                           : N->getOperand(1);
+    return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
                                Depth+1);
   }
 
@@ -4660,7 +4480,7 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
 
   if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
     return (Index == 0) ? V.getOperand(0)
-                          : DAG.getUNDEF(VT.getVectorElementType());
+                        : DAG.getUNDEF(VT.getVectorElementType());
 
   if (V.getOpcode() == ISD::BUILD_VECTOR)
     return V.getOperand(Index);
@@ -4672,38 +4492,37 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
 /// shuffle operation which come from a consecutively from a zero. The
 /// search can start in two different directions, from left or right.
 static
-unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, unsigned NumElems,
                                   bool ZerosFromLeft, SelectionDAG &DAG) {
-  int i = 0;
-
-  while (i < NumElems) {
+  unsigned i;
+  for (i = 0; i != NumElems; ++i) {
     unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
-    SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+    SDValue Elt = getShuffleScalarElt(SVOp, Index, DAG, 0);
     if (!(Elt.getNode() &&
          (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
       break;
-    ++i;
   }
 
   return i;
 }
 
-/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
-/// MaskE correspond consecutively to elements from one of the vector operands,
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies [MaskI, MaskE)
+/// correspond consecutively to elements from one of the vector operands,
 /// starting from its index OpIdx. Also tell OpNum which source vector operand.
 static
-bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
-                              int OpIdx, int NumElems, unsigned &OpNum) {
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp,
+                              unsigned MaskI, unsigned MaskE, unsigned OpIdx,
+                              unsigned NumElems, unsigned &OpNum) {
   bool SeenV1 = false;
   bool SeenV2 = false;
 
-  for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+  for (unsigned i = MaskI; i != MaskE; ++i, ++OpIdx) {
     int Idx = SVOp->getMaskElt(i);
     // Ignore undef indicies
     if (Idx < 0)
       continue;
 
-    if (Idx < NumElems)
+    if (Idx < (int)NumElems)
       SeenV1 = true;
     else
       SeenV2 = true;
@@ -4738,7 +4557,7 @@ static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
   //
   if (!isShuffleMaskConsecutive(SVOp,
             0,                   // Mask Start Index
-            NumElems-NumZeros-1, // Mask End Index
+            NumElems-NumZeros,   // Mask End Index(exclusive)
             NumZeros,            // Where to start looking in the src vector
             NumElems,            // Number of elements in vector
             OpSrc))              // Which source operand ?
@@ -4771,7 +4590,7 @@ static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
   //
   if (!isShuffleMaskConsecutive(SVOp,
             NumZeros,     // Mask Start Index
-            NumElems-1,   // Mask End Index
+            NumElems,     // Mask End Index(exclusive)
             0,            // Where to start looking in the src vector
             NumElems,     // Number of elements in vector
             OpSrc))       // Which source operand ?
@@ -4804,6 +4623,7 @@ static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
                                        unsigned NumNonZero, unsigned NumZero,
                                        SelectionDAG &DAG,
+                                       const X86Subtarget* Subtarget,
                                        const TargetLowering &TLI) {
   if (NumNonZero > 8)
     return SDValue();
@@ -4815,7 +4635,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
     bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
     if (ThisIsNonZero && First) {
       if (NumZero)
-        V = getZeroVector(MVT::v8i16, true, DAG, dl);
+        V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
       else
         V = DAG.getUNDEF(MVT::v8i16);
       First = false;
@@ -4851,6 +4671,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
                                      unsigned NumNonZero, unsigned NumZero,
                                      SelectionDAG &DAG,
+                                     const X86Subtarget* Subtarget,
                                      const TargetLowering &TLI) {
   if (NumNonZero > 4)
     return SDValue();
@@ -4863,7 +4684,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
     if (isNonZero) {
       if (First) {
         if (NumZero)
-          V = getZeroVector(MVT::v8i16, true, DAG, dl);
+          V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
         else
           V = DAG.getUNDEF(MVT::v8i16);
         First = false;
@@ -4884,7 +4705,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
                          const TargetLowering &TLI, DebugLoc dl) {
   assert(VT.getSizeInBits() == 128 && "Unknown type for VShift");
   EVT ShVT = MVT::v2i64;
-  unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+  unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
   SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
   return DAG.getNode(ISD::BITCAST, dl, VT,
                      DAG.getNode(Opc, dl, ShVT, SrcOp,
@@ -4952,21 +4773,16 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
     int EltNo = (Offset - StartOffset) >> 2;
     int NumElems = VT.getVectorNumElements();
 
-    EVT CanonVT = VT.getSizeInBits() == 128 ? MVT::v4i32 : MVT::v8i32;
     EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
     SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
                              LD->getPointerInfo().getWithOffset(StartOffset),
-                             false, false, 0);
+                             false, false, false, 0);
 
-    // Canonicalize it to a v4i32 or v8i32 shuffle.
     SmallVector<int, 8> Mask;
     for (int i = 0; i < NumElems; ++i)
       Mask.push_back(EltNo);
 
-    V1 = DAG.getNode(ISD::BITCAST, dl, CanonVT, V1);
-    return DAG.getNode(ISD::BITCAST, dl, NVT,
-                       DAG.getVectorShuffle(CanonVT, dl, V1,
-                                            DAG.getUNDEF(CanonVT),&Mask[0]));
+    return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &Mask[0]);
   }
 
   return SDValue();
@@ -5021,11 +4837,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
     if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
       return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
                          LDBase->getPointerInfo(),
-                         LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
+                         LDBase->isVolatile(), LDBase->isNonTemporal(),
+                         LDBase->isInvariant(), 0);
     return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
                        LDBase->getPointerInfo(),
                        LDBase->isVolatile(), LDBase->isNonTemporal(),
-                       LDBase->getAlignment());
+                       LDBase->isInvariant(), LDBase->getAlignment());
   } else if (NumElems == 4 && LastLoadedElt == 1 &&
              DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) {
     SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
@@ -5041,6 +4858,137 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
   return SDValue();
 }
 
+/// LowerVectorBroadcast - Attempt to use the vbroadcast instruction
+/// to generate a splat value for the following cases:
+/// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
+/// 2. A splat shuffle which uses a scalar_to_vector node which comes from
+/// a scalar load, or a constant.
+/// The VBROADCAST node is returned when a pattern is found,
+/// or SDValue() otherwise.
+SDValue
+X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
+  if (!Subtarget->hasAVX())
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  SDValue Ld;
+  bool ConstSplatVal;
+
+  switch (Op.getOpcode()) {
+    default:
+      // Unknown pattern found.
+      return SDValue();
+
+    case ISD::BUILD_VECTOR: {
+      // The BUILD_VECTOR node must be a splat.
+      if (!isSplatVector(Op.getNode()))
+        return SDValue();
+
+      Ld = Op.getOperand(0);
+      ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
+                     Ld.getOpcode() == ISD::ConstantFP);
+
+      // The suspected load node has several users. Make sure that all
+      // of its users are from the BUILD_VECTOR node.
+      // Constants may have multiple users.
+      if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0))
+        return SDValue();
+      break;
+    }
+
+    case ISD::VECTOR_SHUFFLE: {
+      ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+
+      // Shuffles must have a splat mask where the first element is
+      // broadcasted.
+      if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
+        return SDValue();
+
+      SDValue Sc = Op.getOperand(0);
+      if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR)
+        return SDValue();
+
+      Ld = Sc.getOperand(0);
+      ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
+                       Ld.getOpcode() == ISD::ConstantFP);
+
+      // The scalar_to_vector node and the suspected
+      // load node must have exactly one user.
+      // Constants may have multiple users.
+      if (!ConstSplatVal && (!Sc.hasOneUse() || !Ld.hasOneUse()))
+        return SDValue();
+      break;
+    }
+  }
+
+  bool Is256 = VT.getSizeInBits() == 256;
+  bool Is128 = VT.getSizeInBits() == 128;
+
+  // Handle the broadcasting a single constant scalar from the constant pool
+  // into a vector. On Sandybridge it is still better to load a constant vector
+  // from the constant pool and not to broadcast it from a scalar.
+  if (ConstSplatVal && Subtarget->hasAVX2()) {
+    EVT CVT = Ld.getValueType();
+    assert(!CVT.isVector() && "Must not broadcast a vector type");
+    unsigned ScalarSize = CVT.getSizeInBits();
+
+    if ((Is256 && (ScalarSize == 32 || ScalarSize == 64)) ||
+        (Is128 && (ScalarSize == 32))) {
+
+      const Constant *C = 0;
+      if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
+        C = CI->getConstantIntValue();
+      else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
+        C = CF->getConstantFPValue();
+
+      assert(C && "Invalid constant type");
+
+      SDValue CP = DAG.getConstantPool(C, getPointerTy());
+      unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+      Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
+                         MachinePointerInfo::getConstantPool(),
+                         false, false, false, Alignment);
+
+      return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+    }
+  }
+
+  // The scalar source must be a normal load.
+  if (!ISD::isNormalLoad(Ld.getNode()))
+    return SDValue();
+
+  // Reject loads that have uses of the chain result
+  if (Ld->hasAnyUseOfValue(1))
+    return SDValue();
+
+  unsigned ScalarSize = Ld.getValueType().getSizeInBits();
+
+  // VBroadcast to YMM
+  if (Is256 && (ScalarSize == 32 || ScalarSize == 64))
+    return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+
+  // VBroadcast to XMM
+  if (Is128 && (ScalarSize == 32))
+    return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+
+  // The integer check is needed for the 64-bit into 128-bit so it doesn't match
+  // double since there is vbroadcastsd xmm
+  if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
+    // VBroadcast to YMM
+    if (Is256 && (ScalarSize == 8 || ScalarSize == 16))
+      return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+
+    // VBroadcast to XMM
+    if (Is128 && (ScalarSize ==  8 || ScalarSize == 16 || ScalarSize == 64))
+      return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
+  }
+
+  // Unsupported broadcast.
+  return SDValue();
+}
+
 SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
@@ -5053,22 +5001,26 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   if (ISD::isBuildVectorAllZeros(Op.getNode())) {
     // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
     // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
-    if (Op.getValueType() == MVT::v4i32 ||
-        Op.getValueType() == MVT::v8i32)
+    if (VT == MVT::v4i32 || VT == MVT::v8i32)
       return Op;
 
-    return getZeroVector(Op.getValueType(), Subtarget->hasXMMInt(), DAG, dl);
+    return getZeroVector(VT, Subtarget, DAG, dl);
   }
 
   // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
-  // vectors or broken into v4i32 operations on 256-bit vectors.
+  // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
+  // vpcmpeqd on 256-bit vectors.
   if (ISD::isBuildVectorAllOnes(Op.getNode())) {
-    if (Op.getValueType() == MVT::v4i32)
+    if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasAVX2()))
       return Op;
 
-    return getOnesVector(Op.getValueType(), DAG, dl);
+    return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
   }
 
+  SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+  if (Broadcast.getNode())
+    return Broadcast;
+
   unsigned EVTBits = ExtVT.getSizeInBits();
 
   unsigned NumZero  = 0;
@@ -5118,8 +5070,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
         // convert it to a vector with movd (S2V+shuffle to zero extend).
         Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
-        Item = getShuffleVectorZeroOrUndef(Item, 0, true,
-                                           Subtarget->hasXMMInt(), DAG);
+        Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
 
         // Now we have our 32-bit value zero extended in the low element of
         // a vector.  If Idx != 0, swizzle it into place.
@@ -5132,7 +5083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
                                       DAG.getUNDEF(Item.getValueType()),
                                       &Mask[0]);
         }
-        return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Item);
       }
     }
 
@@ -5141,21 +5092,33 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     // the rest of the elements.  This will be matched as movd/movq/movss/movsd
     // depending on what the source datatype is.
     if (Idx == 0) {
-      if (NumZero == 0) {
+      if (NumZero == 0)
         return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
-      } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
+
+      if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
           (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
+        if (VT.getSizeInBits() == 256) {
+          SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
+          return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
+                             Item, DAG.getIntPtrConstant(0));
+        }
+        assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
         // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
-        return getShuffleVectorZeroOrUndef(Item, 0, true,Subtarget->hasXMMInt(),
-                                           DAG);
-      } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
+        return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+      }
+
+      if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
         Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
-        assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
-        EVT MiddleVT = MVT::v4i32;
-        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
-        Item = getShuffleVectorZeroOrUndef(Item, 0, true,
-                                           Subtarget->hasXMMInt(), DAG);
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
+        if (VT.getSizeInBits() == 256) {
+          SDValue ZeroVec = getZeroVector(MVT::v8i32, Subtarget, DAG, dl);
+          Item = Insert128BitVector(ZeroVec, Item, DAG.getConstant(0, MVT::i32),
+                                    DAG, dl);
+        } else {
+          assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+          Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
+        }
         return DAG.getNode(ISD::BITCAST, dl, VT, Item);
       }
     }
@@ -5183,8 +5146,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
 
       // Turn it into a shuffle of zero and zero-extended scalar to vector.
-      Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
-                                         Subtarget->hasXMMInt(), DAG);
+      Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, Subtarget, DAG);
       SmallVector<int, 8> MaskVec;
       for (unsigned i = 0; i < NumElems; i++)
         MaskVec.push_back(i == Idx ? 0 : 1);
@@ -5214,9 +5176,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
   // For AVX-length vectors, build the individual 128-bit pieces and use
   // shuffles to put them in place.
-  if (VT.getSizeInBits() == 256 && !ISD::isBuildVectorAllZeros(Op.getNode())) {
+  if (VT.getSizeInBits() == 256) {
     SmallVector<SDValue, 32> V;
-    for (unsigned i = 0; i < NumElems; ++i)
+    for (unsigned i = 0; i != NumElems; ++i)
       V.push_back(Op.getOperand(i));
 
     EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
@@ -5240,8 +5202,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       unsigned Idx = CountTrailingZeros_32(NonZeros);
       SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
                                  Op.getOperand(Idx));
-      return getShuffleVectorZeroOrUndef(V2, Idx, true,
-                                         Subtarget->hasXMMInt(), DAG);
+      return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
     }
     return SDValue();
   }
@@ -5249,24 +5210,23 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   // If element VT is < 32 bits, convert it to inserts into a zero vector.
   if (EVTBits == 8 && NumElems == 16) {
     SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
-                                        *this);
+                                        Subtarget, *this);
     if (V.getNode()) return V;
   }
 
   if (EVTBits == 16 && NumElems == 8) {
     SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
-                                      *this);
+                                      Subtarget, *this);
     if (V.getNode()) return V;
   }
 
   // If element VT is == 32 bits, turn it into a number of shuffles.
-  SmallVector<SDValue, 8> V;
-  V.resize(NumElems);
+  SmallVector<SDValue, 8> V(NumElems);
   if (NumElems == 4 && NumZero > 0) {
     for (unsigned i = 0; i < 4; ++i) {
       bool isZero = !(NonZeros & (1 << i));
       if (isZero)
-        V[i] = getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl);
+        V[i] = getZeroVector(VT, Subtarget, DAG, dl);
       else
         V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
     }
@@ -5289,13 +5249,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       }
     }
 
-    SmallVector<int, 8> MaskVec;
-    bool Reverse = (NonZeros & 0x3) == 2;
-    for (unsigned i = 0; i < 2; ++i)
-      MaskVec.push_back(Reverse ? 1-i : i);
-    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
-    for (unsigned i = 0; i < 2; ++i)
-      MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems);
+    bool Reverse1 = (NonZeros & 0x3) == 2;
+    bool Reverse2 = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+    int MaskVec[] = {
+      Reverse1 ? 1 : 0,
+      Reverse1 ? 0 : 1,
+      static_cast<int>(Reverse2 ? NumElems+1 : NumElems),
+      static_cast<int>(Reverse2 ? NumElems   : NumElems+1)
+    };
     return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
   }
 
@@ -5310,7 +5271,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       return LD;
 
     // For SSE 4.1, use insertps to put the high elements into the low element.
-    if (getSubtarget()->hasSSE41() || getSubtarget()->hasAVX()) {
+    if (getSubtarget()->hasSSE41()) {
       SDValue Result;
       if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
         Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
@@ -5422,6 +5383,85 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
   return LowerAVXCONCAT_VECTORS(Op, DAG);
 }
 
+// Try to lower a shuffle node into a simple blend instruction.
+static SDValue LowerVECTOR_SHUFFLEtoBlend(SDValue Op,
+                                          const X86Subtarget *Subtarget,
+                                          SelectionDAG &DAG) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  DebugLoc dl = SVOp->getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT InVT = V1.getValueType();
+  int MaskSize = VT.getVectorNumElements();
+  int InSize = InVT.getVectorNumElements();
+
+  if (!Subtarget->hasSSE41())
+    return SDValue();
+
+  if (MaskSize != InSize)
+    return SDValue();
+
+  int ISDNo = 0;
+  MVT OpTy;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return SDValue();
+  case MVT::v8i16:
+           ISDNo = X86ISD::BLENDPW;
+           OpTy = MVT::v8i16;
+           break;
+  case MVT::v4i32:
+  case MVT::v4f32:
+           ISDNo = X86ISD::BLENDPS;
+           OpTy = MVT::v4f32;
+           break;
+  case MVT::v2i64:
+  case MVT::v2f64:
+           ISDNo = X86ISD::BLENDPD;
+           OpTy = MVT::v2f64;
+           break;
+  case MVT::v8i32:
+  case MVT::v8f32:
+           if (!Subtarget->hasAVX())
+             return SDValue();
+           ISDNo = X86ISD::BLENDPS;
+           OpTy = MVT::v8f32;
+           break;
+  case MVT::v4i64:
+  case MVT::v4f64:
+           if (!Subtarget->hasAVX())
+             return SDValue();
+           ISDNo = X86ISD::BLENDPD;
+           OpTy = MVT::v4f64;
+           break;
+  case MVT::v16i16:
+           if (!Subtarget->hasAVX2())
+             return SDValue();
+           ISDNo = X86ISD::BLENDPW;
+           OpTy = MVT::v16i16;
+           break;
+  }
+  assert(ISDNo && "Invalid Op Number");
+
+  unsigned MaskVals = 0;
+
+  for (int i = 0; i < MaskSize; ++i) {
+    int EltIdx = SVOp->getMaskElt(i);
+    if (EltIdx == i || EltIdx == -1)
+      MaskVals |= (1<<i);
+    else if (EltIdx == (i + MaskSize))
+      continue; // Bit is set to zero;
+    else return SDValue();
+  }
+
+  V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
+  V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
+  SDValue Ret =  DAG.getNode(ISDNo, dl, OpTy, V1, V2,
+                             DAG.getConstant(MaskVals, MVT::i32));
+  return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
+}
+
 // v8i16 shuffles - Prefer shuffles in the following order:
 // 1. [all]   pshuflw, pshufhw, optional move
 // 2. [ssse3] 1 x pshufb
@@ -5439,11 +5479,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
   // Determine if more than 1 of the words in each of the low and high quadwords
   // of the result come from the same quadword of one of the two inputs.  Undef
   // mask values count as coming from any quadword, for better codegen.
-  SmallVector<unsigned, 4> LoQuad(4);
-  SmallVector<unsigned, 4> HiQuad(4);
-  BitVector InputQuads(4);
+  unsigned LoQuad[] = { 0, 0, 0, 0 };
+  unsigned HiQuad[] = { 0, 0, 0, 0 };
+  std::bitset<4> InputQuads;
   for (unsigned i = 0; i < 8; ++i) {
-    SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad;
+    unsigned *Quad = i < 4 ? LoQuad : HiQuad;
     int EltIdx = SVOp->getMaskElt(i);
     MaskVals.push_back(EltIdx);
     if (EltIdx < 0) {
@@ -5481,10 +5521,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
   // quads, disable the next transformation since it does not help SSSE3.
   bool V1Used = InputQuads[0] || InputQuads[1];
   bool V2Used = InputQuads[2] || InputQuads[3];
-  if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
+  if (Subtarget->hasSSSE3()) {
     if (InputQuads.count() == 2 && V1Used && V2Used) {
-      BestLoQuad = InputQuads.find_first();
-      BestHiQuad = InputQuads.find_next(BestLoQuad);
+      BestLoQuad = InputQuads[0] ? 0 : 1;
+      BestHiQuad = InputQuads[2] ? 2 : 3;
     }
     if (InputQuads.count() > 2) {
       BestLoQuad = -1;
@@ -5497,9 +5537,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
   // words from all 4 input quadwords.
   SDValue NewV;
   if (BestLoQuad >= 0 || BestHiQuad >= 0) {
-    SmallVector<int, 8> MaskV;
-    MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
-    MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
+    int MaskV[] = {
+      BestLoQuad < 0 ? 0 : BestLoQuad,
+      BestHiQuad < 0 ? 1 : BestHiQuad
+    };
     NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
                   DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
                   DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
@@ -5544,8 +5585,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
       unsigned TargetMask = 0;
       NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
                                   DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
-      TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()):
-                             X86::getShufflePSHUFLWImmediate(NewV.getNode());
+      ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
+      TargetMask = pshufhw ? getShufflePSHUFHWImmediate(SVOp):
+                             getShufflePSHUFLWImmediate(SVOp);
       V1 = NewV.getOperand(0);
       return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
     }
@@ -5554,7 +5596,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
   // If we have SSSE3, and all words of the result are from 1 input vector,
   // case 2 is generated, otherwise case 3 is generated.  If no SSSE3
   // is present, fall back to case 4.
-  if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
+  if (Subtarget->hasSSSE3()) {
     SmallVector<SDValue,16> pshufbMask;
 
     // If we have elements from both input vectors, set the high bit of the
@@ -5602,61 +5644,51 @@ X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
 
   // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
   // and update MaskVals with new element order.
-  BitVector InOrder(8);
+  std::bitset<8> InOrder;
   if (BestLoQuad >= 0) {
-    SmallVector<int, 8> MaskV;
+    int MaskV[] = { -1, -1, -1, -1, 4, 5, 6, 7 };
     for (int i = 0; i != 4; ++i) {
       int idx = MaskVals[i];
       if (idx < 0) {
-        MaskV.push_back(-1);
         InOrder.set(i);
       } else if ((idx / 4) == BestLoQuad) {
-        MaskV.push_back(idx & 3);
+        MaskV[i] = idx & 3;
         InOrder.set(i);
-      } else {
-        MaskV.push_back(-1);
       }
     }
-    for (unsigned i = 4; i != 8; ++i)
-      MaskV.push_back(i);
     NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
                                 &MaskV[0]);
 
-    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
-        (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) {
+      ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
       NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
-                               NewV.getOperand(0),
-                               X86::getShufflePSHUFLWImmediate(NewV.getNode()),
-                               DAG);
+                                  NewV.getOperand(0),
+                                  getShufflePSHUFLWImmediate(SVOp), DAG);
+    }
   }
 
   // If BestHi >= 0, generate a pshufhw to put the high elements in order,
   // and update MaskVals with the new element order.
   if (BestHiQuad >= 0) {
-    SmallVector<int, 8> MaskV;
-    for (unsigned i = 0; i != 4; ++i)
-      MaskV.push_back(i);
+    int MaskV[] = { 0, 1, 2, 3, -1, -1, -1, -1 };
     for (unsigned i = 4; i != 8; ++i) {
       int idx = MaskVals[i];
       if (idx < 0) {
-        MaskV.push_back(-1);
         InOrder.set(i);
       } else if ((idx / 4) == BestHiQuad) {
-        MaskV.push_back((idx & 3) + 4);
+        MaskV[i] = (idx & 3) + 4;
         InOrder.set(i);
-      } else {
-        MaskV.push_back(-1);
       }
     }
     NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
                                 &MaskV[0]);
 
-    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE &&
-        (Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) {
+      ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
       NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
-                              NewV.getOperand(0),
-                              X86::getShufflePSHUFHWImmediate(NewV.getNode()),
-                              DAG);
+                                  NewV.getOperand(0),
+                                  getShufflePSHUFHWImmediate(SVOp), DAG);
+    }
   }
 
   // In case BestHi & BestLo were both -1, which means each quadword has a word
@@ -5698,8 +5730,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
-  SmallVector<int, 16> MaskVals;
-  SVOp->getMask(MaskVals);
+  ArrayRef<int> MaskVals = SVOp->getMask();
 
   // If we have SSSE3, case 1 is generated when all result bytes come from
   // one of  the inputs.  Otherwise, case 2 is generated.  If no SSSE3 is
@@ -5718,7 +5749,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
   }
 
   // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
-  if (TLI.getSubtarget()->hasSSSE3() || TLI.getSubtarget()->hasAVX()) {
+  if (TLI.getSubtarget()->hasSSSE3()) {
     SmallVector<SDValue,16> pshufbMask;
 
     // If all result elements are from one input vector, then only translate
@@ -5849,7 +5880,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
   unsigned NewWidth = (NumElems == 4) ? 2 : 4;
   EVT NewVT;
   switch (VT.getSimpleVT().SimpleTy) {
-  default: assert(false && "Unexpected!");
+  default: llvm_unreachable("Unexpected!");
   case MVT::v4f32: NewVT = MVT::v2f64; break;
   case MVT::v4i32: NewVT = MVT::v2i64; break;
   case MVT::v8i16: NewVT = MVT::v4i32; break;
@@ -5915,96 +5946,89 @@ static SDValue getVZextMovL(EVT VT, EVT OpVT,
                                              OpVT, SrcOp)));
 }
 
-/// areShuffleHalvesWithinDisjointLanes - Check whether each half of a vector
-/// shuffle node referes to only one lane in the sources.
-static bool areShuffleHalvesWithinDisjointLanes(ShuffleVectorSDNode *SVOp) {
-  EVT VT = SVOp->getValueType(0);
-  int NumElems = VT.getVectorNumElements();
-  int HalfSize = NumElems/2;
-  SmallVector<int, 16> M;
-  SVOp->getMask(M);
-  bool MatchA = false, MatchB = false;
-
-  for (int l = 0; l < NumElems*2; l += HalfSize) {
-    if (isUndefOrInRange(M, 0, HalfSize, l, l+HalfSize)) {
-      MatchA = true;
-      break;
-    }
-  }
-
-  for (int l = 0; l < NumElems*2; l += HalfSize) {
-    if (isUndefOrInRange(M, HalfSize, HalfSize, l, l+HalfSize)) {
-      MatchB = true;
-      break;
-    }
-  }
-
-  return MatchA && MatchB;
-}
-
 /// LowerVECTOR_SHUFFLE_256 - Handle all 256-bit wide vectors shuffles
 /// which could not be matched by any known target speficic shuffle
 static SDValue
 LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
-  if (areShuffleHalvesWithinDisjointLanes(SVOp)) {
-    // If each half of a vector shuffle node referes to only one lane in the
-    // source vectors, extract each used 128-bit lane and shuffle them using
-    // 128-bit shuffles. Then, concatenate the results. Otherwise leave
-    // the work to the legalizer.
-    DebugLoc dl = SVOp->getDebugLoc();
-    EVT VT = SVOp->getValueType(0);
-    int NumElems = VT.getVectorNumElements();
-    int HalfSize = NumElems/2;
-
-    // Extract the reference for each half
-    int FstVecExtractIdx = 0, SndVecExtractIdx = 0;
-    int FstVecOpNum = 0, SndVecOpNum = 0;
-    for (int i = 0; i < HalfSize; ++i) {
-      int Elt = SVOp->getMaskElt(i);
-      if (SVOp->getMaskElt(i) < 0)
-        continue;
-      FstVecOpNum = Elt/NumElems;
-      FstVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
-      break;
-    }
-    for (int i = HalfSize; i < NumElems; ++i) {
-      int Elt = SVOp->getMaskElt(i);
-      if (SVOp->getMaskElt(i) < 0)
+  EVT VT = SVOp->getValueType(0);
+
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned NumLaneElems = NumElems / 2;
+
+  DebugLoc dl = SVOp->getDebugLoc();
+  MVT EltVT = VT.getVectorElementType().getSimpleVT();
+  EVT NVT = MVT::getVectorVT(EltVT, NumLaneElems);
+  SDValue Shufs[2];
+
+  SmallVector<int, 16> Mask;
+  for (unsigned l = 0; l < 2; ++l) {
+    // Build a shuffle mask for the output, discovering on the fly which
+    // input vectors to use as shuffle operands (recorded in InputUsed).
+    // If building a suitable shuffle vector proves too hard, then bail
+    // out with useBuildVector set.
+    int InputUsed[2] = { -1, -1 }; // Not yet discovered.
+    unsigned LaneStart = l * NumLaneElems;
+    for (unsigned i = 0; i != NumLaneElems; ++i) {
+      // The mask element.  This indexes into the input.
+      int Idx = SVOp->getMaskElt(i+LaneStart);
+      if (Idx < 0) {
+        // the mask element does not index into any input vector.
+        Mask.push_back(-1);
         continue;
-      SndVecOpNum = Elt/NumElems;
-      SndVecExtractIdx = Elt % NumElems < HalfSize ? 0 : HalfSize;
-      break;
-    }
+      }
 
-    // Extract the subvectors
-    SDValue V1 = Extract128BitVector(SVOp->getOperand(FstVecOpNum),
-                      DAG.getConstant(FstVecExtractIdx, MVT::i32), DAG, dl);
-    SDValue V2 = Extract128BitVector(SVOp->getOperand(SndVecOpNum),
-                      DAG.getConstant(SndVecExtractIdx, MVT::i32), DAG, dl);
+      // The input vector this mask element indexes into.
+      int Input = Idx / NumLaneElems;
 
-    // Generate 128-bit shuffles
-    SmallVector<int, 16> MaskV1, MaskV2;
-    for (int i = 0; i < HalfSize; ++i) {
-      int Elt = SVOp->getMaskElt(i);
-      MaskV1.push_back(Elt < 0 ? Elt : Elt % HalfSize);
-    }
-    for (int i = HalfSize; i < NumElems; ++i) {
-      int Elt = SVOp->getMaskElt(i);
-      MaskV2.push_back(Elt < 0 ? Elt : Elt % HalfSize);
+      // Turn the index into an offset from the start of the input vector.
+      Idx -= Input * NumLaneElems;
+
+      // Find or create a shuffle vector operand to hold this input.
+      unsigned OpNo;
+      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+        if (InputUsed[OpNo] == Input)
+          // This input vector is already an operand.
+          break;
+        if (InputUsed[OpNo] < 0) {
+          // Create a new operand for this input vector.
+          InputUsed[OpNo] = Input;
+          break;
+        }
+      }
+
+      if (OpNo >= array_lengthof(InputUsed)) {
+        // More than two input vectors used! Give up.
+        return SDValue();
+      }
+
+      // Add the mask index for the new shuffle vector.
+      Mask.push_back(Idx + OpNo * NumLaneElems);
     }
 
-    EVT NVT = V1.getValueType();
-    V1 = DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), &MaskV1[0]);
-    V2 = DAG.getVectorShuffle(NVT, dl, V2, DAG.getUNDEF(NVT), &MaskV2[0]);
+    if (InputUsed[0] < 0) {
+      // No input vectors were used! The result is undefined.
+      Shufs[l] = DAG.getUNDEF(NVT);
+    } else {
+      SDValue Op0 = Extract128BitVector(SVOp->getOperand(InputUsed[0] / 2),
+                   DAG.getConstant((InputUsed[0] % 2) * NumLaneElems, MVT::i32),
+                                   DAG, dl);
+      // If only one input was used, use an undefined vector for the other.
+      SDValue Op1 = (InputUsed[1] < 0) ? DAG.getUNDEF(NVT) :
+        Extract128BitVector(SVOp->getOperand(InputUsed[1] / 2),
+                   DAG.getConstant((InputUsed[1] % 2) * NumLaneElems, MVT::i32),
+                                   DAG, dl);
+      // At least one input vector was used. Create a new shuffle vector.
+      Shufs[l] = DAG.getVectorShuffle(NVT, dl, Op0, Op1, &Mask[0]);
+    }
 
-    // Concatenate the result back
-    SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), V1,
-                                   DAG.getConstant(0, MVT::i32), DAG, dl);
-    return Insert128BitVector(V, V2, DAG.getConstant(NumElems/2, MVT::i32),
-                              DAG, dl);
+    Mask.clear();
   }
 
-  return SDValue();
+  // Concatenate the result back
+  SDValue V = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Shufs[0],
+                                 DAG.getConstant(0, MVT::i32), DAG, dl);
+  return Insert128BitVector(V, Shufs[1],DAG.getConstant(NumLaneElems, MVT::i32),
+                            DAG, dl);
 }
 
 /// LowerVECTOR_SHUFFLE_128v4 - Handle all 128-bit wide vectors with
@@ -6018,11 +6042,9 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
 
   assert(VT.getSizeInBits() == 128 && "Unsupported vector size");
 
-  SmallVector<std::pair<int, int>, 8> Locs;
-  Locs.resize(4);
-  SmallVector<int, 8> Mask1(4U, -1);
-  SmallVector<int, 8> PermMask;
-  SVOp->getMask(PermMask);
+  std::pair<int, int> Locs[4];
+  int Mask1[] = { -1, -1, -1, -1 };
+  SmallVector<int, 8> PermMask(SVOp->getMask().begin(), SVOp->getMask().end());
 
   unsigned NumHi = 0;
   unsigned NumLo = 0;
@@ -6052,17 +6074,14 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
     // vector operands, put the elements into the right order.
     V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
 
-    SmallVector<int, 8> Mask2(4U, -1);
+    int Mask2[] = { -1, -1, -1, -1 };
 
-    for (unsigned i = 0; i != 4; ++i) {
-      if (Locs[i].first == -1)
-        continue;
-      else {
+    for (unsigned i = 0; i != 4; ++i)
+      if (Locs[i].first != -1) {
         unsigned Idx = (i < 2) ? 0 : 4;
         Idx += Locs[i].first * 2 + Locs[i].second;
         Mask2[i] = Idx;
       }
-    }
 
     return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
   } else if (NumLo == 3 || NumHi == 3) {
@@ -6075,7 +6094,7 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
     // from X.
     if (NumHi == 3) {
       // Normalize it so the 3 elements come from V1.
-      CommuteVectorShuffleMask(PermMask, VT);
+      CommuteVectorShuffleMask(PermMask, 4);
       std::swap(V1, V2);
     }
 
@@ -6115,18 +6134,16 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
   }
 
   // Break it into (shuffle shuffle_hi, shuffle_lo).
-  Locs.clear();
-  Locs.resize(4);
-  SmallVector<int,8> LoMask(4U, -1);
-  SmallVector<int,8> HiMask(4U, -1);
+  int LoMask[] = { -1, -1, -1, -1 };
+  int HiMask[] = { -1, -1, -1, -1 };
 
-  SmallVector<int,8> *MaskPtr = &LoMask;
+  int *MaskPtr = LoMask;
   unsigned MaskIdx = 0;
   unsigned LoIdx = 0;
   unsigned HiIdx = 2;
   for (unsigned i = 0; i != 4; ++i) {
     if (i == 2) {
-      MaskPtr = &HiMask;
+      MaskPtr = HiMask;
       MaskIdx = 1;
       LoIdx = 0;
       HiIdx = 2;
@@ -6136,26 +6153,21 @@ LowerVECTOR_SHUFFLE_128v4(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
       Locs[i] = std::make_pair(-1, -1);
     } else if (Idx < 4) {
       Locs[i] = std::make_pair(MaskIdx, LoIdx);
-      (*MaskPtr)[LoIdx] = Idx;
+      MaskPtr[LoIdx] = Idx;
       LoIdx++;
     } else {
       Locs[i] = std::make_pair(MaskIdx, HiIdx);
-      (*MaskPtr)[HiIdx] = Idx;
+      MaskPtr[HiIdx] = Idx;
       HiIdx++;
     }
   }
 
   SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
   SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
-  SmallVector<int, 8> MaskOps;
-  for (unsigned i = 0; i != 4; ++i) {
-    if (Locs[i].first == -1) {
-      MaskOps.push_back(-1);
-    } else {
-      unsigned Idx = Locs[i].first * 4 + Locs[i].second;
-      MaskOps.push_back(Idx);
-    }
-  }
+  int MaskOps[] = { -1, -1, -1, -1 };
+  for (unsigned i = 0; i != 4; ++i)
+    if (Locs[i].first != -1)
+      MaskOps[i] = Locs[i].first * 4 + Locs[i].second;
   return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
 }
 
@@ -6164,6 +6176,10 @@ static bool MayFoldVectorLoad(SDValue V) {
     V = V.getOperand(0);
   if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
     V = V.getOperand(0);
+  if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR &&
+      V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF)
+    // BUILD_VECTOR (load), undef
+    V = V.getOperand(0);
   if (MayFoldLoad(V))
     return true;
   return false;
@@ -6186,82 +6202,6 @@ static bool RelaxedMayFoldVectorLoad(SDValue V) {
   return false;
 }
 
-/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
-/// a vector extract, and if both can be later optimized into a single load.
-/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
-/// here because otherwise a target specific shuffle node is going to be
-/// emitted for this shuffle, and the optimization not done.
-/// FIXME: This is probably not the best approach, but fix the problem
-/// until the right path is decided.
-static
-bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
-                                         const TargetLowering &TLI) {
-  EVT VT = V.getValueType();
-  ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
-
-  // Be sure that the vector shuffle is present in a pattern like this:
-  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
-  if (!V.hasOneUse())
-    return false;
-
-  SDNode *N = *V.getNode()->use_begin();
-  if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
-    return false;
-
-  SDValue EltNo = N->getOperand(1);
-  if (!isa<ConstantSDNode>(EltNo))
-    return false;
-
-  // If the bit convert changed the number of elements, it is unsafe
-  // to examine the mask.
-  bool HasShuffleIntoBitcast = false;
-  if (V.getOpcode() == ISD::BITCAST) {
-    EVT SrcVT = V.getOperand(0).getValueType();
-    if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
-      return false;
-    V = V.getOperand(0);
-    HasShuffleIntoBitcast = true;
-  }
-
-  // Select the input vector, guarding against out of range extract vector.
-  unsigned NumElems = VT.getVectorNumElements();
-  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
-  int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
-  V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
-
-  // Skip one more bit_convert if necessary
-  if (V.getOpcode() == ISD::BITCAST)
-    V = V.getOperand(0);
-
-  if (ISD::isNormalLoad(V.getNode())) {
-    // Is the original load suitable?
-    LoadSDNode *LN0 = cast<LoadSDNode>(V);
-
-    // FIXME: avoid the multi-use bug that is preventing lots of
-    // of foldings to be detected, this is still wrong of course, but
-    // give the temporary desired behavior, and if it happens that
-    // the load has real more uses, during isel it will not fold, and
-    // will generate poor code.
-    if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
-      return false;
-
-    if (!HasShuffleIntoBitcast)
-      return true;
-
-    // If there's a bitcast before the shuffle, check if the load type and
-    // alignment is valid.
-    unsigned Align = LN0->getAlignment();
-    unsigned NewAlign =
-      TLI.getTargetData()->getABITypeAlignment(
-                                    VT.getTypeForEVT(*DAG.getContext()));
-
-    if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
-      return false;
-  }
-
-  return true;
-}
-
 static
 SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
@@ -6275,14 +6215,14 @@ SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
 
 static
 SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
-                        bool HasXMMInt) {
+                        bool HasSSE2) {
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   EVT VT = Op.getValueType();
 
   assert(VT != MVT::v2i64 && "unsupported shuffle type");
 
-  if (HasXMMInt && VT == MVT::v2f64)
+  if (HasSSE2 && VT == MVT::v2f64)
     return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
 
   // v4f32 or v4i32: canonizalized to v4f32 (which is legal for SSE1)
@@ -6308,24 +6248,8 @@ SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
   return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
 }
 
-static inline unsigned getSHUFPOpcode(EVT VT) {
-  switch(VT.getSimpleVT().SimpleTy) {
-  case MVT::v8i32: // Use fp unit for int unpack.
-  case MVT::v8f32:
-  case MVT::v4i32: // Use fp unit for int unpack.
-  case MVT::v4f32: return X86ISD::SHUFPS;
-  case MVT::v4i64: // Use fp unit for int unpack.
-  case MVT::v4f64:
-  case MVT::v2i64: // Use fp unit for int unpack.
-  case MVT::v2f64: return X86ISD::SHUFPD;
-  default:
-    llvm_unreachable("Unknown type for shufp*");
-  }
-  return 0;
-}
-
 static
-SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   EVT VT = Op.getValueType();
@@ -6346,32 +6270,30 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
   //    turns into:
   //  (MOVLPSmr addr:$src1, VR128:$src2)
   // So, recognize this potential and also use MOVLPS or MOVLPD
-  if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+  else if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
     CanFoldLoad = true;
 
-  // Both of them can't be memory operations though.
-  if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
-    CanFoldLoad = false;
-
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   if (CanFoldLoad) {
-    if (HasXMMInt && NumElems == 2)
+    if (HasSSE2 && NumElems == 2)
       return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
 
     if (NumElems == 4)
-      return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+      // If we don't care about the second element, procede to use movss.
+      if (SVOp->getMaskElt(1) != -1)
+        return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
   }
 
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   // movl and movlp will both match v2i64, but v2i64 is never matched by
   // movl earlier because we make it strict to avoid messing with the movlp load
   // folding logic (see the code above getMOVLP call). Match it here then,
   // this is horrible, but will stay like this until we move all shuffle
   // matching to x86 specific nodes. Note that for the 1st condition all
   // types are matched with movsd.
-  if (HasXMMInt) {
+  if (HasSSE2) {
     // FIXME: isMOVLMask should be checked and matched before getMOVLP,
     // as to remove this logic from here, as much as possible
-    if (NumElems == 2 || !X86::isMOVLMask(SVOp))
+    if (NumElems == 2 || !isMOVLMask(SVOp->getMask(), VT))
       return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
     return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
   }
@@ -6379,112 +6301,12 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) {
   assert(VT != MVT::v4i32 && "unsupported shuffle type");
 
   // Invert the operand order and use SHUFPS to match it.
-  return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V2, V1,
-                              X86::getShuffleSHUFImmediate(SVOp), DAG);
-}
-
-static inline unsigned getUNPCKLOpcode(EVT VT) {
-  switch(VT.getSimpleVT().SimpleTy) {
-  case MVT::v4i32: return X86ISD::PUNPCKLDQ;
-  case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
-  case MVT::v4f32: return X86ISD::UNPCKLPS;
-  case MVT::v2f64: return X86ISD::UNPCKLPD;
-  case MVT::v8i32: // Use fp unit for int unpack.
-  case MVT::v8f32: return X86ISD::VUNPCKLPSY;
-  case MVT::v4i64: // Use fp unit for int unpack.
-  case MVT::v4f64: return X86ISD::VUNPCKLPDY;
-  case MVT::v16i8: return X86ISD::PUNPCKLBW;
-  case MVT::v8i16: return X86ISD::PUNPCKLWD;
-  default:
-    llvm_unreachable("Unknown type for unpckl");
-  }
-  return 0;
-}
-
-static inline unsigned getUNPCKHOpcode(EVT VT) {
-  switch(VT.getSimpleVT().SimpleTy) {
-  case MVT::v4i32: return X86ISD::PUNPCKHDQ;
-  case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
-  case MVT::v4f32: return X86ISD::UNPCKHPS;
-  case MVT::v2f64: return X86ISD::UNPCKHPD;
-  case MVT::v8i32: // Use fp unit for int unpack.
-  case MVT::v8f32: return X86ISD::VUNPCKHPSY;
-  case MVT::v4i64: // Use fp unit for int unpack.
-  case MVT::v4f64: return X86ISD::VUNPCKHPDY;
-  case MVT::v16i8: return X86ISD::PUNPCKHBW;
-  case MVT::v8i16: return X86ISD::PUNPCKHWD;
-  default:
-    llvm_unreachable("Unknown type for unpckh");
-  }
-  return 0;
-}
-
-static inline unsigned getVPERMILOpcode(EVT VT) {
-  switch(VT.getSimpleVT().SimpleTy) {
-  case MVT::v4i32:
-  case MVT::v4f32: return X86ISD::VPERMILPS;
-  case MVT::v2i64:
-  case MVT::v2f64: return X86ISD::VPERMILPD;
-  case MVT::v8i32:
-  case MVT::v8f32: return X86ISD::VPERMILPSY;
-  case MVT::v4i64:
-  case MVT::v4f64: return X86ISD::VPERMILPDY;
-  default:
-    llvm_unreachable("Unknown type for vpermil");
-  }
-  return 0;
-}
-
-/// isVectorBroadcast - Check if the node chain is suitable to be xformed to
-/// a vbroadcast node. The nodes are suitable whenever we can fold a load coming
-/// from a 32 or 64 bit scalar. Update Op to the desired load to be folded.
-static bool isVectorBroadcast(SDValue &Op) {
-  EVT VT = Op.getValueType();
-  bool Is256 = VT.getSizeInBits() == 256;
-
-  assert((VT.getSizeInBits() == 128 || Is256) &&
-         "Unsupported type for vbroadcast node");
-
-  SDValue V = Op;
-  if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
-    V = V.getOperand(0);
-
-  if (Is256 && !(V.hasOneUse() &&
-                 V.getOpcode() == ISD::INSERT_SUBVECTOR &&
-                 V.getOperand(0).getOpcode() == ISD::UNDEF))
-    return false;
-
-  if (Is256)
-    V = V.getOperand(1);
-
-  if (!V.hasOneUse())
-    return false;
-
-  // Check the source scalar_to_vector type. 256-bit broadcasts are
-  // supported for 32/64-bit sizes, while 128-bit ones are only supported
-  // for 32-bit scalars.
-  if (V.getOpcode() != ISD::SCALAR_TO_VECTOR)
-    return false;
-
-  unsigned ScalarSize = V.getOperand(0).getValueType().getSizeInBits();
-  if (ScalarSize != 32 && ScalarSize != 64)
-    return false;
-  if (!Is256 && ScalarSize == 64)
-    return false;
-
-  V = V.getOperand(0);
-  if (!MayFoldLoad(V))
-    return false;
-
-  // Return the load node
-  Op = V;
-  return true;
+  return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V2, V1,
+                              getShuffleSHUFImmediate(SVOp), DAG);
 }
 
-static
-SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
-                               const TargetLowering &TLI,
-                               const X86Subtarget *Subtarget) {
+SDValue
+X86TargetLowering::NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const {
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
@@ -6492,22 +6314,17 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
   SDValue V2 = Op.getOperand(1);
 
   if (isZeroShuffle(SVOp))
-    return getZeroVector(VT, Subtarget->hasXMMInt(), DAG, dl);
+    return getZeroVector(VT, Subtarget, DAG, dl);
 
   // Handle splat operations
   if (SVOp->isSplat()) {
     unsigned NumElem = VT.getVectorNumElements();
     int Size = VT.getSizeInBits();
-    // Special case, this is the only place now where it's allowed to return
-    // a vector_shuffle operation without using a target specific node, because
-    // *hopefully* it will be optimized away by the dag combiner. FIXME: should
-    // this be moved to DAGCombine instead?
-    if (NumElem <= 4 && CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
-      return Op;
 
     // Use vbroadcast whenever the splat comes from a foldable load
-    if (Subtarget->hasAVX() && isVectorBroadcast(V1))
-      return DAG.getNode(X86ISD::VBROADCAST, dl, VT, V1);
+    SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
+    if (Broadcast.getNode())
+      return Broadcast;
 
     // Handle splats by matching through known shuffle masks
     if ((Size == 128 && NumElem <= 4) ||
@@ -6525,21 +6342,26 @@ SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
     if (NewOp.getNode())
       return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
   } else if ((VT == MVT::v4i32 ||
-             (VT == MVT::v4f32 && Subtarget->hasXMMInt()))) {
+             (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
     // FIXME: Figure out a cleaner way to do this.
     // Try to make use of movq to zero out the top part.
     if (ISD::isBuildVectorAllZeros(V2.getNode())) {
       SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
       if (NewOp.getNode()) {
-        if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
-          return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
+        EVT NewVT = NewOp.getValueType();
+        if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
+                               NewVT, true, false))
+          return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
                               DAG, Subtarget, dl);
       }
     } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
       SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
-      if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
-        return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
-                            DAG, Subtarget, dl);
+      if (NewOp.getNode()) {
+        EVT NewVT = NewOp.getValueType();
+        if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
+          return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
+                              DAG, Subtarget, dl);
+      }
     }
   }
   return SDValue();
@@ -6553,18 +6375,22 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();
   unsigned NumElems = VT.getVectorNumElements();
-  bool isMMX = VT.getSizeInBits() == 64;
   bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
   bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
   bool V1IsSplat = false;
   bool V2IsSplat = false;
-  bool HasXMMInt = Subtarget->hasXMMInt();
+  bool HasSSE2 = Subtarget->hasSSE2();
+  bool HasAVX    = Subtarget->hasAVX();
+  bool HasAVX2   = Subtarget->hasAVX2();
   MachineFunction &MF = DAG.getMachineFunction();
   bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
 
-  // Shuffle operations on MMX not supported.
-  if (isMMX)
-    return Op;
+  assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
+
+  if (V1IsUndef && V2IsUndef)
+    return DAG.getUNDEF(VT);
+
+  assert(!V1IsUndef && "Op 1 of shuffle should not be undef");
 
   // Vector shuffle lowering takes 3 steps:
   //
@@ -6576,50 +6402,54 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   //    so the shuffle can be broken into other shuffles and the legalizer can
   //    try the lowering again.
   //
-  // The general ideia is that no vector_shuffle operation should be left to
+  // The general idea is that no vector_shuffle operation should be left to
   // be matched during isel, all of them must be converted to a target specific
   // node here.
 
   // Normalize the input vectors. Here splats, zeroed vectors, profitable
   // narrowing and commutation of operands should be handled. The actual code
   // doesn't include all of those, work in progress...
-  SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+  SDValue NewOp = NormalizeVectorShuffle(Op, DAG);
   if (NewOp.getNode())
     return NewOp;
 
+  SmallVector<int, 8> M(SVOp->getMask().begin(), SVOp->getMask().end());
+
   // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
   // unpckh_undef). Only use pshufd if speed is more important than size.
-  if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
-    return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
-  if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+  if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
+  if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
-  if (X86::isMOVDDUPMask(SVOp) &&
-      (Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
+  if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() &&
       V2IsUndef && RelaxedMayFoldVectorLoad(V1))
     return getMOVDDup(Op, dl, V1, DAG);
 
-  if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+  if (isMOVHLPS_v_undef_Mask(M, VT))
     return getMOVHighToLow(Op, dl, DAG);
 
   // Use to match splats
-  if (HasXMMInt && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+  if (HasSSE2 && isUNPCKHMask(M, VT, HasAVX2) && V2IsUndef &&
       (VT == MVT::v2f64 || VT == MVT::v2i64))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
-  if (X86::isPSHUFDMask(SVOp)) {
+  if (isPSHUFDMask(M, VT)) {
     // The actual implementation will match the mask in the if above and then
     // during isel it can match several different instructions, not only pshufd
     // as its name says, sad but true, emulate the behavior for now...
-    if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
-        return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+    if (isMOVDDUPMask(M, VT) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+      return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+    unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
 
-    unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+    if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64))
+      return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
 
-    if (HasXMMInt && (VT == MVT::v4f32 || VT == MVT::v4i32))
+    if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
       return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
 
-    return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V1,
+    return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
                                 TargetMask, DAG);
   }
 
@@ -6627,8 +6457,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   bool isLeft = false;
   unsigned ShAmt = 0;
   SDValue ShVal;
-  bool isShift = getSubtarget()->hasXMMInt() &&
-                 isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+  bool isShift = HasSSE2 && isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
   if (isShift && ShVal.hasOneUse()) {
     // If the shifted value has multiple uses, it may be cheaper to use
     // v_set0 + movlhps or movhlps, etc.
@@ -6637,13 +6466,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
   }
 
-  if (X86::isMOVLMask(SVOp)) {
-    if (V1IsUndef)
-      return V2;
+  if (isMOVLMask(M, VT)) {
     if (ISD::isBuildVectorAllZeros(V1.getNode()))
       return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
-    if (!X86::isMOVLPMask(SVOp)) {
-      if (HasXMMInt && (VT == MVT::v2i64 || VT == MVT::v2f64))
+    if (!isMOVLPMask(M, VT)) {
+      if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
         return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
 
       if (VT == MVT::v4i32 || VT == MVT::v4f32)
@@ -6652,27 +6479,27 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // FIXME: fold these into legal mask.
-  if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
-    return getMOVLowToHigh(Op, dl, DAG, HasXMMInt);
+  if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasAVX2))
+    return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
 
-  if (X86::isMOVHLPSMask(SVOp))
+  if (isMOVHLPSMask(M, VT))
     return getMOVHighToLow(Op, dl, DAG);
 
-  if (X86::isMOVSHDUPMask(SVOp, Subtarget))
+  if (V2IsUndef && isMOVSHDUPMask(M, VT, Subtarget))
     return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
 
-  if (X86::isMOVSLDUPMask(SVOp, Subtarget))
+  if (V2IsUndef && isMOVSLDUPMask(M, VT, Subtarget))
     return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
 
-  if (X86::isMOVLPMask(SVOp))
-    return getMOVLP(Op, dl, DAG, HasXMMInt);
+  if (isMOVLPMask(M, VT))
+    return getMOVLP(Op, dl, DAG, HasSSE2);
 
-  if (ShouldXformToMOVHLPS(SVOp) ||
-      ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
+  if (ShouldXformToMOVHLPS(M, VT) ||
+      ShouldXformToMOVLP(V1.getNode(), V2.getNode(), M, VT))
     return CommuteVectorShuffle(SVOp, DAG);
 
   if (isShift) {
-    // No better options. Use a vshl / vsrl.
+    // No better options. Use a vshldq / vsrldq.
     EVT EltVT = VT.getVectorElementType();
     ShAmt *= EltVT.getSizeInBits();
     return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
@@ -6685,17 +6512,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   V2IsSplat = isSplatVector(V2.getNode());
 
   // Canonicalize the splat or undef, if present, to be on the RHS.
-  if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
-    Op = CommuteVectorShuffle(SVOp, DAG);
-    SVOp = cast<ShuffleVectorSDNode>(Op);
-    V1 = SVOp->getOperand(0);
-    V2 = SVOp->getOperand(1);
+  if (!V2IsUndef && V1IsSplat && !V2IsSplat) {
+    CommuteVectorShuffleMask(M, NumElems);
+    std::swap(V1, V2);
     std::swap(V1IsSplat, V2IsSplat);
-    std::swap(V1IsUndef, V2IsUndef);
     Commuted = true;
   }
 
-  if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+  if (isCommutedMOVLMask(M, VT, V2IsSplat, V2IsUndef)) {
     // Shuffling low element of v1 into undef, just return v1.
     if (V2IsUndef)
       return V1;
@@ -6705,81 +6529,77 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     return getMOVL(DAG, dl, VT, V2, V1);
   }
 
-  if (X86::isUNPCKLMask(SVOp))
-    return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+  if (isUNPCKLMask(M, VT, HasAVX2))
+    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
 
-  if (X86::isUNPCKHMask(SVOp))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+  if (isUNPCKHMask(M, VT, HasAVX2))
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
 
   if (V2IsSplat) {
     // Normalize mask so all entries that point to V2 points to its first
     // element then try to match unpck{h|l} again. If match, return a
-    // new vector_shuffle with the corrected mask.
-    SDValue NewMask = NormalizeMask(SVOp, DAG);
-    ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
-    if (NSVOp != SVOp) {
-      if (X86::isUNPCKLMask(NSVOp, true)) {
-        return NewMask;
-      } else if (X86::isUNPCKHMask(NSVOp, true)) {
-        return NewMask;
-      }
+    // new vector_shuffle with the corrected mask.p
+    SmallVector<int, 8> NewMask(M.begin(), M.end());
+    NormalizeMask(NewMask, NumElems);
+    if (isUNPCKLMask(NewMask, VT, HasAVX2, true)) {
+      return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
+    } else if (isUNPCKHMask(NewMask, VT, HasAVX2, true)) {
+      return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
     }
   }
 
   if (Commuted) {
     // Commute is back and try unpck* again.
     // FIXME: this seems wrong.
-    SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
-    ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
+    CommuteVectorShuffleMask(M, NumElems);
+    std::swap(V1, V2);
+    std::swap(V1IsSplat, V2IsSplat);
+    Commuted = false;
 
-    if (X86::isUNPCKLMask(NewSVOp))
-      return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+    if (isUNPCKLMask(M, VT, HasAVX2))
+      return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
 
-    if (X86::isUNPCKHMask(NewSVOp))
-      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+    if (isUNPCKHMask(M, VT, HasAVX2))
+      return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
   }
 
   // Normalize the node to match x86 shuffle ops if needed
-  if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+  if (!V2IsUndef && (isSHUFPMask(M, VT, HasAVX, /* Commuted */ true)))
     return CommuteVectorShuffle(SVOp, DAG);
 
   // The checks below are all present in isShuffleMaskLegal, but they are
   // inlined here right now to enable us to directly emit target specific
   // nodes, and remove one by one until they don't return Op anymore.
-  SmallVector<int, 16> M;
-  SVOp->getMask(M);
 
-  if (isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()))
+  if (isPALIGNRMask(M, VT, Subtarget))
     return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
-                                X86::getShufflePALIGNRImmediate(SVOp),
+                                getShufflePALIGNRImmediate(SVOp),
                                 DAG);
 
   if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
       SVOp->getSplatIndex() == 0 && V2IsUndef) {
-    if (VT == MVT::v2f64)
-      return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
-    if (VT == MVT::v2i64)
-      return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+    if (VT == MVT::v2f64 || VT == MVT::v2i64)
+      return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
   }
 
   if (isPSHUFHWMask(M, VT))
     return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
-                                X86::getShufflePSHUFHWImmediate(SVOp),
+                                getShufflePSHUFHWImmediate(SVOp),
                                 DAG);
 
   if (isPSHUFLWMask(M, VT))
     return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
-                                X86::getShufflePSHUFLWImmediate(SVOp),
+                                getShufflePSHUFLWImmediate(SVOp),
                                 DAG);
 
-  if (isSHUFPMask(M, VT))
-    return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
-                                X86::getShuffleSHUFImmediate(SVOp), DAG);
+  if (isSHUFPMask(M, VT, HasAVX))
+    return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
+                                getShuffleSHUFImmediate(SVOp), DAG);
 
-  if (X86::isUNPCKL_v_undef_Mask(SVOp))
-    return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
-  if (X86::isUNPCKH_v_undef_Mask(SVOp))
-    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+  if (isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+    return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
+  if (isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+    return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   //===--------------------------------------------------------------------===//
   // Generate target specific nodes for 128 or 256-bit shuffles only
@@ -6787,33 +6607,26 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   //
 
   // Handle VMOVDDUPY permutations
-  if (isMOVDDUPYMask(SVOp, Subtarget))
+  if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX))
     return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
 
-  // Handle VPERMILPS* permutations
-  if (isVPERMILPSMask(M, VT, Subtarget))
-    return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
-                                getShuffleVPERMILPSImmediate(SVOp), DAG);
-
-  // Handle VPERMILPD* permutations
-  if (isVPERMILPDMask(M, VT, Subtarget))
-    return getTargetShuffleNode(getVPERMILOpcode(VT), dl, VT, V1,
-                                getShuffleVPERMILPDImmediate(SVOp), DAG);
-
-  // Handle VPERM2F128 permutations
-  if (isVPERM2F128Mask(M, VT, Subtarget))
-    return getTargetShuffleNode(X86ISD::VPERM2F128, dl, VT, V1, V2,
-                                getShuffleVPERM2F128Immediate(SVOp), DAG);
+  // Handle VPERMILPS/D* permutations
+  if (isVPERMILPMask(M, VT, HasAVX)) {
+    if (HasAVX2 && VT == MVT::v8i32)
+      return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
+                                  getShuffleSHUFImmediate(SVOp), DAG);
+    return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
+                                getShuffleSHUFImmediate(SVOp), DAG);
+  }
 
-  // Handle VSHUFPSY permutations
-  if (isVSHUFPSYMask(M, VT, Subtarget))
-    return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
-                                getShuffleVSHUFPSYImmediate(SVOp), DAG);
+  // Handle VPERM2F128/VPERM2I128 permutations
+  if (isVPERM2X128Mask(M, VT, HasAVX))
+    return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
+                                V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
 
-  // Handle VSHUFPDY permutations
-  if (isVSHUFPDYMask(M, VT, Subtarget))
-    return getTargetShuffleNode(getSHUFPOpcode(VT), dl, VT, V1, V2,
-                                getShuffleVSHUFPDYImmediate(SVOp), DAG);
+  SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(Op, Subtarget, DAG);
+  if (BlendOp.getNode())
+    return BlendOp;
 
   //===--------------------------------------------------------------------===//
   // Since no target specific shuffle was selected for this generic one,
@@ -6896,8 +6709,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
                                               Op.getOperand(0)),
                                               Op.getOperand(1));
     return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
-  } else if (VT == MVT::i32) {
-    // ExtractPS works with constant index.
+  } else if (VT == MVT::i32 || VT == MVT::i64) {
+    // ExtractPS/pextrq works with constant index.
     if (isa<ConstantSDNode>(Op.getOperand(1)))
       return Op;
   }
@@ -6933,7 +6746,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
 
   assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
 
-  if (Subtarget->hasSSE41() || Subtarget->hasAVX()) {
+  if (Subtarget->hasSSE41()) {
     SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
     if (Res.getNode())
       return Res;
@@ -7036,7 +6849,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
     // Create this as a scalar to vector..
     N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
     return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
-  } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+  } else if ((EltVT == MVT::i32 || EltVT == MVT::i64) && 
+             isa<ConstantSDNode>(N2)) {
     // PINSR* works with constant index.
     return Op;
   }
@@ -7074,7 +6888,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
     return Insert128BitVector(N0, V, Ins128Idx, DAG, dl);
   }
 
-  if (Subtarget->hasSSE41() || Subtarget->hasAVX())
+  if (Subtarget->hasSSE41())
     return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
 
   if (EltVT == MVT::i8)
@@ -7276,7 +7090,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
   // load.
   if (isGlobalStubReference(OpFlag))
     Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result,
-                         MachinePointerInfo::getGOT(), false, false, 0);
+                         MachinePointerInfo::getGOT(), false, false, false, 0);
 
   return Result;
 }
@@ -7344,7 +7158,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
   // load.
   if (isGlobalStubReference(OpFlags))
     Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
-                         MachinePointerInfo::getGOT(), false, false, 0);
+                         MachinePointerInfo::getGOT(), false, false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.
@@ -7423,7 +7237,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
   SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
                                       DAG.getIntPtrConstant(0),
-                                      MachinePointerInfo(Ptr), false, false, 0);
+                                      MachinePointerInfo(Ptr),
+                                      false, false, false, 0);
 
   unsigned char OperandFlags = 0;
   // Most TLS accesses are not RIP relative, even on x86-64.  One exception is
@@ -7449,7 +7264,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
   if (model == TLSModel::InitialExec)
     Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
-                         MachinePointerInfo::getGOT(), false, false, 0);
+                         MachinePointerInfo::getGOT(), false, false, false, 0);
 
   // The address of the thread local variable is the add of the thread
   // pointer with the offset of the variable.
@@ -7471,8 +7286,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
       GV = GA->resolveAliasedGlobal(false);
 
-    TLSModel::Model model
-      = getTLSModel(GV, getTargetMachine().getRelocationModel());
+    TLSModel::Model model = getTargetMachine().getTLSModel(GV);
 
     switch (model) {
       case TLSModel::GeneralDynamic:
@@ -7529,19 +7343,77 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
     return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(),
                               Chain.getValue(1));
-  }
+  } else if (Subtarget->isTargetWindows()) {
+    // Just use the implicit TLS architecture
+    // Need to generate someting similar to:
+    //   mov     rdx, qword [gs:abs 58H]; Load pointer to ThreadLocalStorage
+    //                                  ; from TEB
+    //   mov     ecx, dword [rel _tls_index]: Load index (from C runtime)
+    //   mov     rcx, qword [rdx+rcx*8]
+    //   mov     eax, .tls$:tlsvar
+    //   [rax+rcx] contains the address
+    // Windows 64bit: gs:0x58
+    // Windows 32bit: fs:__tls_array
 
-  assert(false &&
-         "TLS not implemented for this target.");
+    // If GV is an alias then use the aliasee for determining
+    // thread-localness.
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GV = GA->resolveAliasedGlobal(false);
+    DebugLoc dl = GA->getDebugLoc();
+    SDValue Chain = DAG.getEntryNode();
 
-  llvm_unreachable("Unreachable");
-  return SDValue();
+    // Get the Thread Pointer, which is %fs:__tls_array (32-bit) or
+    // %gs:0x58 (64-bit).
+    Value *Ptr = Constant::getNullValue(Subtarget->is64Bit()
+                                        ? Type::getInt8PtrTy(*DAG.getContext(),
+                                                             256)
+                                        : Type::getInt32PtrTy(*DAG.getContext(),
+                                                              257));
+
+    SDValue ThreadPointer = DAG.getLoad(getPointerTy(), dl, Chain,
+                                        Subtarget->is64Bit()
+                                        ? DAG.getIntPtrConstant(0x58)
+                                        : DAG.getExternalSymbol("_tls_array",
+                                                                getPointerTy()),
+                                        MachinePointerInfo(Ptr),
+                                        false, false, false, 0);
+
+    // Load the _tls_index variable
+    SDValue IDX = DAG.getExternalSymbol("_tls_index", getPointerTy());
+    if (Subtarget->is64Bit())
+      IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, getPointerTy(), Chain,
+                           IDX, MachinePointerInfo(), MVT::i32,
+                           false, false, 0);
+    else
+      IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
+                        false, false, false, 0);
+
+    SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
+		                            getPointerTy());
+    IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
+
+    SDValue res = DAG.getNode(ISD::ADD, dl, getPointerTy(), ThreadPointer, IDX);
+    res = DAG.getLoad(getPointerTy(), dl, Chain, res, MachinePointerInfo(),
+                      false, false, false, 0);
+
+    // Get the offset of start of .tls section
+    SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+                                             GA->getValueType(0),
+                                             GA->getOffset(), X86II::MO_SECREL);
+    SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), TGA);
+
+    // The address of the thread local variable is the add of the thread
+    // pointer with the offset of the variable.
+    return DAG.getNode(ISD::ADD, dl, getPointerTy(), res, Offset);
+  }
+
+  llvm_unreachable("TLS not implemented for this target.");
 }
 
 
-/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values and
-/// take a 2 x i32 value to shift plus a shift amount.
-SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const {
+/// LowerShiftParts - Lower SRA_PARTS and friends, which return two i32 values
+/// and take a 2 x i32 value to shift plus a shift amount.
+SDValue X86TargetLowering::LowerShiftParts(SDValue Op, SelectionDAG &DAG) const{
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -7673,7 +7545,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
                                     Op.getValueType(), MMO);
     Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
                          MachinePointerInfo::getFixedStack(SSFI),
-                         false, false, 0);
+                         false, false, false, 0);
   }
 
   return Result;
@@ -7682,85 +7554,65 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
 // LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
 SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
                                                SelectionDAG &DAG) const {
-  // This algorithm is not obvious. Here it is in C code, more or less:
+  // This algorithm is not obvious. Here it is what we're trying to output:
   /*
-    double uint64_to_double( uint32_t hi, uint32_t lo ) {
-      static const __m128i exp = { 0x4330000045300000ULL, 0 };
-      static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
-
-      // Copy ints to xmm registers.
-      __m128i xh = _mm_cvtsi32_si128( hi );
-      __m128i xl = _mm_cvtsi32_si128( lo );
-
-      // Combine into low half of a single xmm register.
-      __m128i x = _mm_unpacklo_epi32( xh, xl );
-      __m128d d;
-      double sd;
-
-      // Merge in appropriate exponents to give the integer bits the right
-      // magnitude.
-      x = _mm_unpacklo_epi32( x, exp );
-
-      // Subtract away the biases to deal with the IEEE-754 double precision
-      // implicit 1.
-      d = _mm_sub_pd( (__m128d) x, bias );
-
-      // All conversions up to here are exact. The correctly rounded result is
-      // calculated using the current rounding mode using the following
-      // horizontal add.
-      d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
-      _mm_store_sd( &sd, d );   // Because we are returning doubles in XMM, this
-                                // store doesn't really need to be here (except
-                                // maybe to zero the other double)
-      return sd;
-    }
+     movq       %rax,  %xmm0
+     punpckldq  (c0),  %xmm0  // c0: (uint4){ 0x43300000U, 0x45300000U, 0U, 0U }
+     subpd      (c1),  %xmm0  // c1: (double2){ 0x1.0p52, 0x1.0p52 * 0x1.0p32 }
+     #ifdef __SSE3__
+       haddpd   %xmm0, %xmm0          
+     #else
+       pshufd   $0x4e, %xmm0, %xmm1 
+       addpd    %xmm1, %xmm0
+     #endif
   */
 
   DebugLoc dl = Op.getDebugLoc();
   LLVMContext *Context = DAG.getContext();
 
   // Build some magic constants.
-  std::vector<Constant*> CV0;
-  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
-  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
-  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
-  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
-  Constant *C0 = ConstantVector::get(CV0);
+  const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 };
+  Constant *C0 = ConstantDataVector::get(*Context, CV0);
   SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
 
-  std::vector<Constant*> CV1;
+  SmallVector<Constant*,2> CV1;
   CV1.push_back(
-    ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+        ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
   CV1.push_back(
-    ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+        ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
   Constant *C1 = ConstantVector::get(CV1);
   SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
 
-  SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
-                            DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                                        Op.getOperand(0),
-                                        DAG.getIntPtrConstant(1)));
-  SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
-                            DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
-                                        Op.getOperand(0),
-                                        DAG.getIntPtrConstant(0)));
-  SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
+  // Load the 64-bit value into an XMM register.
+  SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+                            Op.getOperand(0));
   SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
                               MachinePointerInfo::getConstantPool(),
-                              false, false, 16);
-  SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
-  SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2);
+                              false, false, false, 16);
+  SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32,
+                              DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, XR1),
+                              CLod0);
+
   SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
                               MachinePointerInfo::getConstantPool(),
-                              false, false, 16);
+                              false, false, false, 16);
+  SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck1);
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+  SDValue Result;
+
+  if (Subtarget->hasSSE3()) {
+    // FIXME: The 'haddpd' instruction may be slower than 'movhlps + addsd'.
+    Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
+  } else {
+    SDValue S2F = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Sub);
+    SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32,
+                                           S2F, 0x4E, DAG);
+    Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
+                         DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Shuffle),
+                         Sub);
+  }
 
-  // Add the halves; easiest way is to swap them into another reg first.
-  int ShufMask[2] = { 1, -1 };
-  SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub,
-                                      DAG.getUNDEF(MVT::v2f64), ShufMask);
-  SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub);
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add,
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result,
                      DAG.getIntPtrConstant(0));
 }
 
@@ -7777,8 +7629,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
                              Op.getOperand(0));
 
   // Zero out the upper parts of the register.
-  Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget->hasXMMInt(),
-                                     DAG);
+  Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG);
 
   Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
                      DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
@@ -7830,6 +7681,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
     return LowerUINT_TO_FP_i64(Op, DAG);
   else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
     return LowerUINT_TO_FP_i32(Op, DAG);
+  else if (Subtarget->is64Bit() &&
+           SrcVT == MVT::i64 && DstVT == MVT::f32)
+    return SDValue();
 
   // Make a 64-bit buffer, and use it to build an FILD.
   SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
@@ -7849,7 +7703,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 
   assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
-                                StackSlot, MachinePointerInfo(),
+                               StackSlot, MachinePointerInfo(),
                                false, false, 0);
   // For i64 source, we need to add the appropriate power of 2 if the input
   // was negative.  This is the same as the optimization in
@@ -7897,19 +7751,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 }
 
 std::pair<SDValue,SDValue> X86TargetLowering::
-FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
+FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) const {
   DebugLoc DL = Op.getDebugLoc();
 
   EVT DstTy = Op.getValueType();
 
-  if (!IsSigned) {
+  if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
     assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
     DstTy = MVT::i64;
   }
 
   assert(DstTy.getSimpleVT() <= MVT::i64 &&
          DstTy.getSimpleVT() >= MVT::i16 &&
-         "Unknown FP_TO_SINT to lower!");
+         "Unknown FP_TO_INT to lower!");
 
   // These are really Legal.
   if (DstTy == MVT::i32 &&
@@ -7920,26 +7774,29 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
       isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
     return std::make_pair(SDValue(), SDValue());
 
-  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
-  // stack slot.
+  // We lower FP->int64 either into FISTP64 followed by a load from a temporary
+  // stack slot, or into the FTOL runtime function.
   MachineFunction &MF = DAG.getMachineFunction();
   unsigned MemSize = DstTy.getSizeInBits()/8;
   int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
 
-
-
   unsigned Opc;
-  switch (DstTy.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
-  case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
-  case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
-  case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
-  }
+  if (!IsSigned && isIntegerTypeFTOL(DstTy))
+    Opc = X86ISD::WIN_FTOL;
+  else
+    switch (DstTy.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+    }
 
   SDValue Chain = DAG.getEntryNode();
   SDValue Value = Op.getOperand(0);
   EVT TheVT = Op.getOperand(0).getValueType();
+  // FIXME This causes a redundant load/store if the SSE-class value is already
+  // in memory, such as if it is on the callstack.
   if (isScalarFPTypeInSSEReg(TheVT)) {
     assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
     Chain = DAG.getStore(Chain, DL, Value, StackSlot,
@@ -7964,12 +7821,26 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
     MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
                             MachineMemOperand::MOStore, MemSize, MemSize);
 
-  // Build the FP_TO_INT*_IN_MEM
-  SDValue Ops[] = { Chain, Value, StackSlot };
-  SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
-                                         Ops, 3, DstTy, MMO);
-
-  return std::make_pair(FIST, StackSlot);
+  if (Opc != X86ISD::WIN_FTOL) {
+    // Build the FP_TO_INT*_IN_MEM
+    SDValue Ops[] = { Chain, Value, StackSlot };
+    SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+                                           Ops, 3, DstTy, MMO);
+    return std::make_pair(FIST, StackSlot);
+  } else {
+    SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
+      DAG.getVTList(MVT::Other, MVT::Glue),
+      Chain, Value);
+    SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
+      MVT::i32, ftol.getValue(1));
+    SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
+      MVT::i32, eax.getValue(2));
+    SDValue Ops[] = { eax, edx };
+    SDValue pair = IsReplace
+      ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, 2)
+      : DAG.getMergeValues(Ops, 2, DL);
+    return std::make_pair(pair, SDValue());
+  }
 }
 
 SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
@@ -7977,25 +7848,37 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
   if (Op.getValueType().isVector())
     return SDValue();
 
-  std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
+  std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
+    /*IsSigned=*/ true, /*IsReplace=*/ false);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
   // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
   if (FIST.getNode() == 0) return Op;
 
-  // Load the result.
-  return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+  if (StackSlot.getNode())
+    // Load the result.
+    return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+                       FIST, StackSlot, MachinePointerInfo(),
+                       false, false, false, 0);
+  else
+    // The node is the result.
+    return FIST;
 }
 
 SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
                                            SelectionDAG &DAG) const {
-  std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false);
+  std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
+    /*IsSigned=*/ false, /*IsReplace=*/ false);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
   assert(FIST.getNode() && "Unexpected failure");
 
-  // Load the result.
-  return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
-                     FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+  if (StackSlot.getNode())
+    // Load the result.
+    return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+                       FIST, StackSlot, MachinePointerInfo(),
+                       false, false, false, 0);
+  else
+    // The node is the result.
+    return FIST;
 }
 
 SDValue X86TargetLowering::LowerFABS(SDValue Op,
@@ -8006,23 +7889,18 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op,
   EVT EltVT = VT;
   if (VT.isVector())
     EltVT = VT.getVectorElementType();
-  std::vector<Constant*> CV;
+  Constant *C;
   if (EltVT == MVT::f64) {
-    Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
-    CV.push_back(C);
-    CV.push_back(C);
+    C = ConstantVector::getSplat(2, 
+                ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
   } else {
-    Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
-    CV.push_back(C);
-    CV.push_back(C);
-    CV.push_back(C);
-    CV.push_back(C);
+    C = ConstantVector::getSplat(4,
+               ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
   }
-  Constant *C = ConstantVector::get(CV);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                              MachinePointerInfo::getConstantPool(),
-                             false, false, 16);
+                             false, false, false, 16);
   return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
 }
 
@@ -8031,31 +7909,28 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   EVT VT = Op.getValueType();
   EVT EltVT = VT;
-  if (VT.isVector())
+  unsigned NumElts = VT == MVT::f64 ? 2 : 4;
+  if (VT.isVector()) {
     EltVT = VT.getVectorElementType();
-  std::vector<Constant*> CV;
-  if (EltVT == MVT::f64) {
-    Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
-    CV.push_back(C);
-    CV.push_back(C);
-  } else {
-    Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
-    CV.push_back(C);
-    CV.push_back(C);
-    CV.push_back(C);
-    CV.push_back(C);
+    NumElts = VT.getVectorNumElements();
   }
-  Constant *C = ConstantVector::get(CV);
+  Constant *C;
+  if (EltVT == MVT::f64)
+    C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+  else
+    C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+  C = ConstantVector::getSplat(NumElts, C);
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                              MachinePointerInfo::getConstantPool(),
-                             false, false, 16);
+                             false, false, false, 16);
   if (VT.isVector()) {
+    MVT XORVT = VT.getSizeInBits() == 128 ? MVT::v2i64 : MVT::v4i64;
     return DAG.getNode(ISD::BITCAST, dl, VT,
-                       DAG.getNode(ISD::XOR, dl, MVT::v2i64,
-                    DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+                       DAG.getNode(ISD::XOR, dl, XORVT,
+                    DAG.getNode(ISD::BITCAST, dl, XORVT,
                                 Op.getOperand(0)),
-                    DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
+                    DAG.getNode(ISD::BITCAST, dl, XORVT, Mask)));
   } else {
     return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
   }
@@ -8084,7 +7959,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   // type, and that won't be f80 since that is not custom lowered.
 
   // First get the sign bit of second operand.
-  std::vector<Constant*> CV;
+  SmallVector<Constant*,4> CV;
   if (SrcVT == MVT::f64) {
     CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
     CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
@@ -8098,7 +7973,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
                               MachinePointerInfo::getConstantPool(),
-                              false, false, 16);
+                              false, false, false, 16);
   SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
 
   // Shift sign bit right or left if the two operands have different types.
@@ -8127,7 +8002,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
   SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
                               MachinePointerInfo::getConstantPool(),
-                              false, false, 16);
+                              false, false, false, 16);
   SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
 
   // Or the value with the sign bit.
@@ -8191,8 +8066,10 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
     // climbing the DAG back to the root, and it doesn't seem to be worth the
     // effort.
     for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
-           UE = Op.getNode()->use_end(); UI != UE; ++UI)
-      if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+         UE = Op.getNode()->use_end(); UI != UE; ++UI)
+      if (UI->getOpcode() != ISD::CopyToReg &&
+          UI->getOpcode() != ISD::SETCC &&
+          UI->getOpcode() != ISD::STORE)
         goto default_case;
 
     if (ConstantSDNode *C =
@@ -8325,8 +8202,8 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
         unsigned BitWidth = Op0.getValueSizeInBits();
         unsigned AndBitWidth = And.getValueSizeInBits();
         if (BitWidth > AndBitWidth) {
-          APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
-          DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+          APInt Zeros, Ones;
+          DAG.ComputeMaskedBits(Op0, Zeros, Ones);
           if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
             return SDValue();
         }
@@ -8335,11 +8212,19 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
       }
   } else if (Op1.getOpcode() == ISD::Constant) {
     ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+    uint64_t AndRHSVal = AndRHS->getZExtValue();
     SDValue AndLHS = Op0;
-    if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+
+    if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) {
       LHS = AndLHS.getOperand(0);
       RHS = AndLHS.getOperand(1);
     }
+
+    // Use BT if the immediate can't be encoded in a TEST instruction.
+    if (!isUInt<32>(AndRHSVal) && isPowerOf2_64(AndRHSVal)) {
+      LHS = AndLHS;
+      RHS = DAG.getConstant(Log2_64_Ceil(AndRHSVal), LHS.getValueType());
+    }
   }
 
   if (LHS.getNode()) {
@@ -8466,9 +8351,8 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
   if (isFP) {
     unsigned SSECC = 8;
     EVT EltVT = Op0.getValueType().getVectorElementType();
-    assert(EltVT == MVT::f32 || EltVT == MVT::f64);
+    assert(EltVT == MVT::f32 || EltVT == MVT::f64); (void)EltVT;
 
-    unsigned Opc = EltVT == MVT::f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
     bool Swap = false;
 
     // SSE Condition code mapping:
@@ -8508,61 +8392,57 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
     if (SSECC == 8) {
       if (SetCCOpcode == ISD::SETUEQ) {
         SDValue UNORD, EQ;
-        UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
-        EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+        UNORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+                            DAG.getConstant(3, MVT::i8));
+        EQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+                         DAG.getConstant(0, MVT::i8));
         return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
-      }
-      else if (SetCCOpcode == ISD::SETONE) {
+      } else if (SetCCOpcode == ISD::SETONE) {
         SDValue ORD, NEQ;
-        ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
-        NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
+        ORD = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+                          DAG.getConstant(7, MVT::i8));
+        NEQ = DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+                          DAG.getConstant(4, MVT::i8));
         return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
       }
       llvm_unreachable("Illegal FP comparison");
     }
     // Handle all other FP comparisons here.
-    return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
+    return DAG.getNode(X86ISD::CMPP, dl, VT, Op0, Op1,
+                       DAG.getConstant(SSECC, MVT::i8));
   }
 
   // Break 256-bit integer vector compare into smaller ones.
-  if (!isFP && VT.getSizeInBits() == 256)
+  if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
     return Lower256IntVSETCC(Op, DAG);
 
   // We are handling one of the integer comparisons here.  Since SSE only has
   // GT and EQ comparisons for integer, swapping operands and multiple
   // operations may be required for some comparisons.
-  unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
+  unsigned Opc = 0;
   bool Swap = false, Invert = false, FlipSigns = false;
 
-  switch (VT.getSimpleVT().SimpleTy) {
-  default: break;
-  case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
-  case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
-  case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
-  case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
-  }
-
   switch (SetCCOpcode) {
   default: break;
   case ISD::SETNE:  Invert = true;
-  case ISD::SETEQ:  Opc = EQOpc; break;
+  case ISD::SETEQ:  Opc = X86ISD::PCMPEQ; break;
   case ISD::SETLT:  Swap = true;
-  case ISD::SETGT:  Opc = GTOpc; break;
+  case ISD::SETGT:  Opc = X86ISD::PCMPGT; break;
   case ISD::SETGE:  Swap = true;
-  case ISD::SETLE:  Opc = GTOpc; Invert = true; break;
+  case ISD::SETLE:  Opc = X86ISD::PCMPGT; Invert = true; break;
   case ISD::SETULT: Swap = true;
-  case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
+  case ISD::SETUGT: Opc = X86ISD::PCMPGT; FlipSigns = true; break;
   case ISD::SETUGE: Swap = true;
-  case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
+  case ISD::SETULE: Opc = X86ISD::PCMPGT; FlipSigns = true; Invert = true; break;
   }
   if (Swap)
     std::swap(Op0, Op1);
 
   // Check that the operation in question is available (most are plain SSE2,
   // but PCMPGTQ and PCMPEQQ have different requirements).
-  if (Opc == X86ISD::PCMPGTQ && !Subtarget->hasSSE42() && !Subtarget->hasAVX())
+  if (Opc == X86ISD::PCMPGT && VT == MVT::v2i64 && !Subtarget->hasSSE42())
     return SDValue();
-  if (Opc == X86ISD::PCMPEQQ && !Subtarget->hasSSE41() && !Subtarget->hasAVX())
+  if (Opc == X86ISD::PCMPEQ && VT == MVT::v2i64 && !Subtarget->hasSSE41())
     return SDValue();
 
   // Since SSE has no unsigned integer comparisons, we need to flip  the sign
@@ -8679,8 +8559,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
 
   // If condition flag is set by a X86ISD::CMP, then use it as the condition
   // setting operand in place of the X86ISD::SETCC.
-  if (Cond.getOpcode() == X86ISD::SETCC ||
-      Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+  unsigned CondOpcode = Cond.getOpcode();
+  if (CondOpcode == X86ISD::SETCC ||
+      CondOpcode == X86ISD::SETCC_CARRY) {
     CC = Cond.getOperand(0);
 
     SDValue Cmp = Cond.getOperand(1);
@@ -8697,6 +8578,39 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       Cond = Cmp;
       addTest = false;
     }
+  } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
+             CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
+             ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
+              Cond.getOperand(0).getValueType() != MVT::i8)) {
+    SDValue LHS = Cond.getOperand(0);
+    SDValue RHS = Cond.getOperand(1);
+    unsigned X86Opcode;
+    unsigned X86Cond;
+    SDVTList VTs;
+    switch (CondOpcode) {
+    case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
+    case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
+    case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
+    case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
+    case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
+    case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
+    default: llvm_unreachable("unexpected overflowing operator");
+    }
+    if (CondOpcode == ISD::UMULO)
+      VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
+                          MVT::i32);
+    else
+      VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+
+    SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS);
+
+    if (CondOpcode == ISD::UMULO)
+      Cond = X86Op.getValue(2);
+    else
+      Cond = X86Op.getValue(1);
+
+    CC = DAG.getConstant(X86Cond, MVT::i8);
+    addTest = false;
   }
 
   if (addTest) {
@@ -8778,11 +8692,27 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   SDValue Dest  = Op.getOperand(2);
   DebugLoc dl = Op.getDebugLoc();
   SDValue CC;
+  bool Inverted = false;
 
   if (Cond.getOpcode() == ISD::SETCC) {
-    SDValue NewCond = LowerSETCC(Cond, DAG);
-    if (NewCond.getNode())
-      Cond = NewCond;
+    // Check for setcc([su]{add,sub,mul}o == 0).
+    if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
+        isa<ConstantSDNode>(Cond.getOperand(1)) &&
+        cast<ConstantSDNode>(Cond.getOperand(1))->isNullValue() &&
+        Cond.getOperand(0).getResNo() == 1 &&
+        (Cond.getOperand(0).getOpcode() == ISD::SADDO ||
+         Cond.getOperand(0).getOpcode() == ISD::UADDO ||
+         Cond.getOperand(0).getOpcode() == ISD::SSUBO ||
+         Cond.getOperand(0).getOpcode() == ISD::USUBO ||
+         Cond.getOperand(0).getOpcode() == ISD::SMULO ||
+         Cond.getOperand(0).getOpcode() == ISD::UMULO)) {
+      Inverted = true;
+      Cond = Cond.getOperand(0);
+    } else {
+      SDValue NewCond = LowerSETCC(Cond, DAG);
+      if (NewCond.getNode())
+        Cond = NewCond;
+    }
   }
 #if 0
   // FIXME: LowerXALUO doesn't handle these!!
@@ -8803,8 +8733,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
 
   // If condition flag is set by a X86ISD::CMP, then use it as the condition
   // setting operand in place of the X86ISD::SETCC.
-  if (Cond.getOpcode() == X86ISD::SETCC ||
-      Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+  unsigned CondOpcode = Cond.getOpcode();
+  if (CondOpcode == X86ISD::SETCC ||
+      CondOpcode == X86ISD::SETCC_CARRY) {
     CC = Cond.getOperand(0);
 
     SDValue Cmp = Cond.getOperand(1);
@@ -8825,6 +8756,43 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
         break;
       }
     }
+  }
+  CondOpcode = Cond.getOpcode();
+  if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
+      CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
+      ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) &&
+       Cond.getOperand(0).getValueType() != MVT::i8)) {
+    SDValue LHS = Cond.getOperand(0);
+    SDValue RHS = Cond.getOperand(1);
+    unsigned X86Opcode;
+    unsigned X86Cond;
+    SDVTList VTs;
+    switch (CondOpcode) {
+    case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
+    case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
+    case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
+    case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break;
+    case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break;
+    case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break;
+    default: llvm_unreachable("unexpected overflowing operator");
+    }
+    if (Inverted)
+      X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond);
+    if (CondOpcode == ISD::UMULO)
+      VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(),
+                          MVT::i32);
+    else
+      VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+
+    SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS);
+
+    if (CondOpcode == ISD::UMULO)
+      Cond = X86Op.getValue(2);
+    else
+      Cond = X86Op.getValue(1);
+
+    CC = DAG.getConstant(X86Cond, MVT::i8);
+    addTest = false;
   } else {
     unsigned CondOpc;
     if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
@@ -8888,6 +8856,66 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
       CC = DAG.getConstant(CCode, MVT::i8);
       Cond = Cond.getOperand(0).getOperand(1);
       addTest = false;
+    } else if (Cond.getOpcode() == ISD::SETCC &&
+               cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETOEQ) {
+      // For FCMP_OEQ, we can emit
+      // two branches instead of an explicit AND instruction with a
+      // separate test. However, we only do this if this block doesn't
+      // have a fall-through edge, because this requires an explicit
+      // jmp when the condition is false.
+      if (Op.getNode()->hasOneUse()) {
+        SDNode *User = *Op.getNode()->use_begin();
+        // Look for an unconditional branch following this conditional branch.
+        // We need this because we need to reverse the successors in order
+        // to implement FCMP_OEQ.
+        if (User->getOpcode() == ISD::BR) {
+          SDValue FalseBB = User->getOperand(1);
+          SDNode *NewBR =
+            DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+          assert(NewBR == User);
+          (void)NewBR;
+          Dest = FalseBB;
+
+          SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+                                    Cond.getOperand(0), Cond.getOperand(1));
+          CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+          Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+                              Chain, Dest, CC, Cmp);
+          CC = DAG.getConstant(X86::COND_P, MVT::i8);
+          Cond = Cmp;
+          addTest = false;
+        }
+      }
+    } else if (Cond.getOpcode() == ISD::SETCC &&
+               cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETUNE) {
+      // For FCMP_UNE, we can emit
+      // two branches instead of an explicit AND instruction with a
+      // separate test. However, we only do this if this block doesn't
+      // have a fall-through edge, because this requires an explicit
+      // jmp when the condition is false.
+      if (Op.getNode()->hasOneUse()) {
+        SDNode *User = *Op.getNode()->use_begin();
+        // Look for an unconditional branch following this conditional branch.
+        // We need this because we need to reverse the successors in order
+        // to implement FCMP_UNE.
+        if (User->getOpcode() == ISD::BR) {
+          SDValue FalseBB = User->getOperand(1);
+          SDNode *NewBR =
+            DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+          assert(NewBR == User);
+          (void)NewBR;
+
+          SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+                                    Cond.getOperand(0), Cond.getOperand(1));
+          CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+          Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+                              Chain, Dest, CC, Cmp);
+          CC = DAG.getConstant(X86::COND_NP, MVT::i8);
+          Cond = Cmp;
+          addTest = false;
+          Dest = FalseBB;
+        }
+      }
     }
   }
 
@@ -8926,7 +8954,7 @@ SDValue
 X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                            SelectionDAG &DAG) const {
   assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows() ||
-          EnableSegmentedStacks) &&
+          getTargetMachine().Options.EnableSegmentedStacks) &&
          "This should be used only on Windows targets or when segmented stacks "
          "are being used");
   assert(!Subtarget->isTargetEnvMacho() && "Not implemented");
@@ -8940,7 +8968,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   bool Is64Bit = Subtarget->is64Bit();
   EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
 
-  if (EnableSegmentedStacks) {
+  if (getTargetMachine().Options.EnableSegmentedStacks) {
     MachineFunction &MF = DAG.getMachineFunction();
     MachineRegisterInfo &MRI = MF.getRegInfo();
 
@@ -9076,10 +9104,10 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
 
   if (ArgMode == 2) {
     // Sanity Check: Make sure using fp_offset makes sense.
-    assert(!UseSoftFloat &&
+    assert(!getTargetMachine().Options.UseSoftFloat &&
            !(DAG.getMachineFunction()
                 .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
-           Subtarget->hasXMM());
+           Subtarget->hasSSE1());
   }
 
   // Insert VAARG_64 node into the DAG
@@ -9106,7 +9134,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
                      Chain,
                      VAARG,
                      MachinePointerInfo(),
-                     false, false, 0);
+                     false, false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
@@ -9125,6 +9153,43 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
+// getTargetVShiftNOde - Handle vector element shifts where the shift amount
+// may or may not be a constant. Takes immediate version of shift as input.
+static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
+                                   SDValue SrcOp, SDValue ShAmt,
+                                   SelectionDAG &DAG) {
+  assert(ShAmt.getValueType() == MVT::i32 && "ShAmt is not i32");
+
+  if (isa<ConstantSDNode>(ShAmt)) {
+    switch (Opc) {
+      default: llvm_unreachable("Unknown target vector shift node");
+      case X86ISD::VSHLI:
+      case X86ISD::VSRLI:
+      case X86ISD::VSRAI:
+        return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
+    }
+  }
+
+  // Change opcode to non-immediate version
+  switch (Opc) {
+    default: llvm_unreachable("Unknown target vector shift node");
+    case X86ISD::VSHLI: Opc = X86ISD::VSHL; break;
+    case X86ISD::VSRLI: Opc = X86ISD::VSRL; break;
+    case X86ISD::VSRAI: Opc = X86ISD::VSRA; break;
+  }
+
+  // Need to build a vector containing shift amount
+  // Shift amount is 32-bits, but SSE instructions read 64-bit, so fill with 0
+  SDValue ShOps[4];
+  ShOps[0] = ShAmt;
+  ShOps[1] = DAG.getConstant(0, MVT::i32);
+  ShOps[2] = DAG.getUNDEF(MVT::i32);
+  ShOps[3] = DAG.getUNDEF(MVT::i32);
+  ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
+  ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
+  return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
+}
+
 SDValue
 X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
@@ -9159,7 +9224,7 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
     unsigned Opc = 0;
     ISD::CondCode CC = ISD::SETCC_INVALID;
     switch (IntNo) {
-    default: break;
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
     case Intrinsic::x86_sse_comieq_ss:
     case Intrinsic::x86_sse2_comieq_sd:
       Opc = X86ISD::COMI;
@@ -9231,7 +9296,201 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
                                 DAG.getConstant(X86CC, MVT::i8), Cond);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
+  // XOP comparison intrinsics
+  case Intrinsic::x86_xop_vpcomltb:
+  case Intrinsic::x86_xop_vpcomltw:
+  case Intrinsic::x86_xop_vpcomltd:
+  case Intrinsic::x86_xop_vpcomltq:
+  case Intrinsic::x86_xop_vpcomltub:
+  case Intrinsic::x86_xop_vpcomltuw:
+  case Intrinsic::x86_xop_vpcomltud:
+  case Intrinsic::x86_xop_vpcomltuq:
+  case Intrinsic::x86_xop_vpcomleb:
+  case Intrinsic::x86_xop_vpcomlew:
+  case Intrinsic::x86_xop_vpcomled:
+  case Intrinsic::x86_xop_vpcomleq:
+  case Intrinsic::x86_xop_vpcomleub:
+  case Intrinsic::x86_xop_vpcomleuw:
+  case Intrinsic::x86_xop_vpcomleud:
+  case Intrinsic::x86_xop_vpcomleuq:
+  case Intrinsic::x86_xop_vpcomgtb:
+  case Intrinsic::x86_xop_vpcomgtw:
+  case Intrinsic::x86_xop_vpcomgtd:
+  case Intrinsic::x86_xop_vpcomgtq:
+  case Intrinsic::x86_xop_vpcomgtub:
+  case Intrinsic::x86_xop_vpcomgtuw:
+  case Intrinsic::x86_xop_vpcomgtud:
+  case Intrinsic::x86_xop_vpcomgtuq:
+  case Intrinsic::x86_xop_vpcomgeb:
+  case Intrinsic::x86_xop_vpcomgew:
+  case Intrinsic::x86_xop_vpcomged:
+  case Intrinsic::x86_xop_vpcomgeq:
+  case Intrinsic::x86_xop_vpcomgeub:
+  case Intrinsic::x86_xop_vpcomgeuw:
+  case Intrinsic::x86_xop_vpcomgeud:
+  case Intrinsic::x86_xop_vpcomgeuq:
+  case Intrinsic::x86_xop_vpcomeqb:
+  case Intrinsic::x86_xop_vpcomeqw:
+  case Intrinsic::x86_xop_vpcomeqd:
+  case Intrinsic::x86_xop_vpcomeqq:
+  case Intrinsic::x86_xop_vpcomequb:
+  case Intrinsic::x86_xop_vpcomequw:
+  case Intrinsic::x86_xop_vpcomequd:
+  case Intrinsic::x86_xop_vpcomequq:
+  case Intrinsic::x86_xop_vpcomneb:
+  case Intrinsic::x86_xop_vpcomnew:
+  case Intrinsic::x86_xop_vpcomned:
+  case Intrinsic::x86_xop_vpcomneq:
+  case Intrinsic::x86_xop_vpcomneub:
+  case Intrinsic::x86_xop_vpcomneuw:
+  case Intrinsic::x86_xop_vpcomneud:
+  case Intrinsic::x86_xop_vpcomneuq:
+  case Intrinsic::x86_xop_vpcomfalseb:
+  case Intrinsic::x86_xop_vpcomfalsew:
+  case Intrinsic::x86_xop_vpcomfalsed:
+  case Intrinsic::x86_xop_vpcomfalseq:
+  case Intrinsic::x86_xop_vpcomfalseub:
+  case Intrinsic::x86_xop_vpcomfalseuw:
+  case Intrinsic::x86_xop_vpcomfalseud:
+  case Intrinsic::x86_xop_vpcomfalseuq:
+  case Intrinsic::x86_xop_vpcomtrueb:
+  case Intrinsic::x86_xop_vpcomtruew:
+  case Intrinsic::x86_xop_vpcomtrued:
+  case Intrinsic::x86_xop_vpcomtrueq:
+  case Intrinsic::x86_xop_vpcomtrueub:
+  case Intrinsic::x86_xop_vpcomtrueuw:
+  case Intrinsic::x86_xop_vpcomtrueud:
+  case Intrinsic::x86_xop_vpcomtrueuq: {
+    unsigned CC = 0;
+    unsigned Opc = 0;
+
+    switch (IntNo) {
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    case Intrinsic::x86_xop_vpcomltb:
+    case Intrinsic::x86_xop_vpcomltw:
+    case Intrinsic::x86_xop_vpcomltd:
+    case Intrinsic::x86_xop_vpcomltq:
+      CC = 0;
+      Opc = X86ISD::VPCOM;
+      break;
+    case Intrinsic::x86_xop_vpcomltub:
+    case Intrinsic::x86_xop_vpcomltuw:
+    case Intrinsic::x86_xop_vpcomltud:
+    case Intrinsic::x86_xop_vpcomltuq:
+      CC = 0;
+      Opc = X86ISD::VPCOMU;
+      break;
+    case Intrinsic::x86_xop_vpcomleb:
+    case Intrinsic::x86_xop_vpcomlew:
+    case Intrinsic::x86_xop_vpcomled:
+    case Intrinsic::x86_xop_vpcomleq:
+      CC = 1;
+      Opc = X86ISD::VPCOM;
+      break;
+    case Intrinsic::x86_xop_vpcomleub:
+    case Intrinsic::x86_xop_vpcomleuw:
+    case Intrinsic::x86_xop_vpcomleud:
+    case Intrinsic::x86_xop_vpcomleuq:
+      CC = 1;
+      Opc = X86ISD::VPCOMU;
+      break;
+    case Intrinsic::x86_xop_vpcomgtb:
+    case Intrinsic::x86_xop_vpcomgtw:
+    case Intrinsic::x86_xop_vpcomgtd:
+    case Intrinsic::x86_xop_vpcomgtq:
+      CC = 2;
+      Opc = X86ISD::VPCOM;
+      break;
+    case Intrinsic::x86_xop_vpcomgtub:
+    case Intrinsic::x86_xop_vpcomgtuw:
+    case Intrinsic::x86_xop_vpcomgtud:
+    case Intrinsic::x86_xop_vpcomgtuq:
+      CC = 2;
+      Opc = X86ISD::VPCOMU;
+      break;
+    case Intrinsic::x86_xop_vpcomgeb:
+    case Intrinsic::x86_xop_vpcomgew:
+    case Intrinsic::x86_xop_vpcomged:
+    case Intrinsic::x86_xop_vpcomgeq:
+      CC = 3;
+      Opc = X86ISD::VPCOM;
+      break;
+    case Intrinsic::x86_xop_vpcomgeub:
+    case Intrinsic::x86_xop_vpcomgeuw:
+    case Intrinsic::x86_xop_vpcomgeud:
+    case Intrinsic::x86_xop_vpcomgeuq:
+      CC = 3;
+      Opc = X86ISD::VPCOMU;
+      break;
+    case Intrinsic::x86_xop_vpcomeqb:
+    case Intrinsic::x86_xop_vpcomeqw:
+    case Intrinsic::x86_xop_vpcomeqd:
+    case Intrinsic::x86_xop_vpcomeqq:
+      CC = 4;
+      Opc = X86ISD::VPCOM;
+      break;
+    case Intrinsic::x86_xop_vpcomequb:
+    case Intrinsic::x86_xop_vpcomequw:
+    case Intrinsic::x86_xop_vpcomequd:
+    case Intrinsic::x86_xop_vpcomequq:
+      CC = 4;
+      Opc = X86ISD::VPCOMU;
+      break;
+    case Intrinsic::x86_xop_vpcomneb:
+    case Intrinsic::x86_xop_vpcomnew:
+    case Intrinsic::x86_xop_vpcomned:
+    case Intrinsic::x86_xop_vpcomneq:
+      CC = 5;
+      Opc = X86ISD::VPCOM;
+      break;
+    case Intrinsic::x86_xop_vpcomneub:
+    case Intrinsic::x86_xop_vpcomneuw:
+    case Intrinsic::x86_xop_vpcomneud:
+    case Intrinsic::x86_xop_vpcomneuq:
+      CC = 5;
+      Opc = X86ISD::VPCOMU;
+      break;
+    case Intrinsic::x86_xop_vpcomfalseb:
+    case Intrinsic::x86_xop_vpcomfalsew:
+    case Intrinsic::x86_xop_vpcomfalsed:
+    case Intrinsic::x86_xop_vpcomfalseq:
+      CC = 6;
+      Opc = X86ISD::VPCOM;
+      break;
+    case Intrinsic::x86_xop_vpcomfalseub:
+    case Intrinsic::x86_xop_vpcomfalseuw:
+    case Intrinsic::x86_xop_vpcomfalseud:
+    case Intrinsic::x86_xop_vpcomfalseuq:
+      CC = 6;
+      Opc = X86ISD::VPCOMU;
+      break;
+    case Intrinsic::x86_xop_vpcomtrueb:
+    case Intrinsic::x86_xop_vpcomtruew:
+    case Intrinsic::x86_xop_vpcomtrued:
+    case Intrinsic::x86_xop_vpcomtrueq:
+      CC = 7;
+      Opc = X86ISD::VPCOM;
+      break;
+    case Intrinsic::x86_xop_vpcomtrueub:
+    case Intrinsic::x86_xop_vpcomtrueuw:
+    case Intrinsic::x86_xop_vpcomtrueud:
+    case Intrinsic::x86_xop_vpcomtrueuq:
+      CC = 7;
+      Opc = X86ISD::VPCOMU;
+      break;
+    }
+
+    SDValue LHS = Op.getOperand(1);
+    SDValue RHS = Op.getOperand(2);
+    return DAG.getNode(Opc, dl, Op.getValueType(), LHS, RHS,
+                       DAG.getConstant(CC, MVT::i8));
+  }
+
   // Arithmetic intrinsics.
+  case Intrinsic::x86_sse2_pmulu_dq:
+  case Intrinsic::x86_avx2_pmulu_dq:
+    return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::x86_sse3_hadd_ps:
   case Intrinsic::x86_sse3_hadd_pd:
   case Intrinsic::x86_avx_hadd_ps_256:
@@ -9244,6 +9503,62 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
   case Intrinsic::x86_avx_hsub_pd_256:
     return DAG.getNode(X86ISD::FHSUB, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_ssse3_phadd_w_128:
+  case Intrinsic::x86_ssse3_phadd_d_128:
+  case Intrinsic::x86_avx2_phadd_w:
+  case Intrinsic::x86_avx2_phadd_d:
+    return DAG.getNode(X86ISD::HADD, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_ssse3_phsub_w_128:
+  case Intrinsic::x86_ssse3_phsub_d_128:
+  case Intrinsic::x86_avx2_phsub_w:
+  case Intrinsic::x86_avx2_phsub_d:
+    return DAG.getNode(X86ISD::HSUB, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_avx2_psllv_d:
+  case Intrinsic::x86_avx2_psllv_q:
+  case Intrinsic::x86_avx2_psllv_d_256:
+  case Intrinsic::x86_avx2_psllv_q_256:
+    return DAG.getNode(ISD::SHL, dl, Op.getValueType(),
+                      Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_avx2_psrlv_d:
+  case Intrinsic::x86_avx2_psrlv_q:
+  case Intrinsic::x86_avx2_psrlv_d_256:
+  case Intrinsic::x86_avx2_psrlv_q_256:
+    return DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+                      Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_avx2_psrav_d:
+  case Intrinsic::x86_avx2_psrav_d_256:
+    return DAG.getNode(ISD::SRA, dl, Op.getValueType(),
+                      Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_ssse3_pshuf_b_128:
+  case Intrinsic::x86_avx2_pshuf_b:
+    return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_ssse3_psign_b_128:
+  case Intrinsic::x86_ssse3_psign_w_128:
+  case Intrinsic::x86_ssse3_psign_d_128:
+  case Intrinsic::x86_avx2_psign_b:
+  case Intrinsic::x86_avx2_psign_w:
+  case Intrinsic::x86_avx2_psign_d:
+    return DAG.getNode(X86ISD::PSIGN, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_sse41_insertps:
+    return DAG.getNode(X86ISD::INSERTPS, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+  case Intrinsic::x86_avx_vperm2f128_ps_256:
+  case Intrinsic::x86_avx_vperm2f128_pd_256:
+  case Intrinsic::x86_avx_vperm2f128_si_256:
+  case Intrinsic::x86_avx2_vperm2i128:
+    return DAG.getNode(X86ISD::VPERM2X128, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+  case Intrinsic::x86_avx_vpermil_ps:
+  case Intrinsic::x86_avx_vpermil_pd:
+  case Intrinsic::x86_avx_vpermil_ps_256:
+  case Intrinsic::x86_avx_vpermil_pd_256:
+    return DAG.getNode(X86ISD::VPERMILP, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+
   // ptest and testp intrinsics. The intrinsic these come from are designed to
   // return an integer value, not just an instruction so lower it to the ptest
   // or testp pattern and a setcc for the result.
@@ -9310,16 +9625,53 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
 
-  // Fix vector shift instructions where the last operand is a non-immediate
-  // i32 value.
+  // SSE/AVX shift intrinsics
+  case Intrinsic::x86_sse2_psll_w:
+  case Intrinsic::x86_sse2_psll_d:
+  case Intrinsic::x86_sse2_psll_q:
+  case Intrinsic::x86_avx2_psll_w:
+  case Intrinsic::x86_avx2_psll_d:
+  case Intrinsic::x86_avx2_psll_q:
+    return DAG.getNode(X86ISD::VSHL, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_sse2_psrl_w:
+  case Intrinsic::x86_sse2_psrl_d:
+  case Intrinsic::x86_sse2_psrl_q:
+  case Intrinsic::x86_avx2_psrl_w:
+  case Intrinsic::x86_avx2_psrl_d:
+  case Intrinsic::x86_avx2_psrl_q:
+    return DAG.getNode(X86ISD::VSRL, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::x86_sse2_psra_w:
+  case Intrinsic::x86_sse2_psra_d:
+  case Intrinsic::x86_avx2_psra_w:
+  case Intrinsic::x86_avx2_psra_d:
+    return DAG.getNode(X86ISD::VSRA, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::x86_sse2_pslli_w:
   case Intrinsic::x86_sse2_pslli_d:
   case Intrinsic::x86_sse2_pslli_q:
+  case Intrinsic::x86_avx2_pslli_w:
+  case Intrinsic::x86_avx2_pslli_d:
+  case Intrinsic::x86_avx2_pslli_q:
+    return getTargetVShiftNode(X86ISD::VSHLI, dl, Op.getValueType(),
+                               Op.getOperand(1), Op.getOperand(2), DAG);
   case Intrinsic::x86_sse2_psrli_w:
   case Intrinsic::x86_sse2_psrli_d:
   case Intrinsic::x86_sse2_psrli_q:
+  case Intrinsic::x86_avx2_psrli_w:
+  case Intrinsic::x86_avx2_psrli_d:
+  case Intrinsic::x86_avx2_psrli_q:
+    return getTargetVShiftNode(X86ISD::VSRLI, dl, Op.getValueType(),
+                               Op.getOperand(1), Op.getOperand(2), DAG);
   case Intrinsic::x86_sse2_psrai_w:
   case Intrinsic::x86_sse2_psrai_d:
+  case Intrinsic::x86_avx2_psrai_w:
+  case Intrinsic::x86_avx2_psrai_d:
+    return getTargetVShiftNode(X86ISD::VSRAI, dl, Op.getValueType(),
+                               Op.getOperand(1), Op.getOperand(2), DAG);
+  // Fix vector shift instructions where the last operand is a non-immediate
+  // i32 value.
   case Intrinsic::x86_mmx_pslli_w:
   case Intrinsic::x86_mmx_pslli_d:
   case Intrinsic::x86_mmx_pslli_q:
@@ -9333,79 +9685,40 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
       return SDValue();
 
     unsigned NewIntNo = 0;
-    EVT ShAmtVT = MVT::v4i32;
     switch (IntNo) {
-    case Intrinsic::x86_sse2_pslli_w:
-      NewIntNo = Intrinsic::x86_sse2_psll_w;
-      break;
-    case Intrinsic::x86_sse2_pslli_d:
-      NewIntNo = Intrinsic::x86_sse2_psll_d;
+    case Intrinsic::x86_mmx_pslli_w:
+      NewIntNo = Intrinsic::x86_mmx_psll_w;
       break;
-    case Intrinsic::x86_sse2_pslli_q:
-      NewIntNo = Intrinsic::x86_sse2_psll_q;
+    case Intrinsic::x86_mmx_pslli_d:
+      NewIntNo = Intrinsic::x86_mmx_psll_d;
       break;
-    case Intrinsic::x86_sse2_psrli_w:
-      NewIntNo = Intrinsic::x86_sse2_psrl_w;
+    case Intrinsic::x86_mmx_pslli_q:
+      NewIntNo = Intrinsic::x86_mmx_psll_q;
       break;
-    case Intrinsic::x86_sse2_psrli_d:
-      NewIntNo = Intrinsic::x86_sse2_psrl_d;
+    case Intrinsic::x86_mmx_psrli_w:
+      NewIntNo = Intrinsic::x86_mmx_psrl_w;
       break;
-    case Intrinsic::x86_sse2_psrli_q:
-      NewIntNo = Intrinsic::x86_sse2_psrl_q;
+    case Intrinsic::x86_mmx_psrli_d:
+      NewIntNo = Intrinsic::x86_mmx_psrl_d;
       break;
-    case Intrinsic::x86_sse2_psrai_w:
-      NewIntNo = Intrinsic::x86_sse2_psra_w;
+    case Intrinsic::x86_mmx_psrli_q:
+      NewIntNo = Intrinsic::x86_mmx_psrl_q;
       break;
-    case Intrinsic::x86_sse2_psrai_d:
-      NewIntNo = Intrinsic::x86_sse2_psra_d;
+    case Intrinsic::x86_mmx_psrai_w:
+      NewIntNo = Intrinsic::x86_mmx_psra_w;
       break;
-    default: {
-      ShAmtVT = MVT::v2i32;
-      switch (IntNo) {
-      case Intrinsic::x86_mmx_pslli_w:
-        NewIntNo = Intrinsic::x86_mmx_psll_w;
-        break;
-      case Intrinsic::x86_mmx_pslli_d:
-        NewIntNo = Intrinsic::x86_mmx_psll_d;
-        break;
-      case Intrinsic::x86_mmx_pslli_q:
-        NewIntNo = Intrinsic::x86_mmx_psll_q;
-        break;
-      case Intrinsic::x86_mmx_psrli_w:
-        NewIntNo = Intrinsic::x86_mmx_psrl_w;
-        break;
-      case Intrinsic::x86_mmx_psrli_d:
-        NewIntNo = Intrinsic::x86_mmx_psrl_d;
-        break;
-      case Intrinsic::x86_mmx_psrli_q:
-        NewIntNo = Intrinsic::x86_mmx_psrl_q;
-        break;
-      case Intrinsic::x86_mmx_psrai_w:
-        NewIntNo = Intrinsic::x86_mmx_psra_w;
-        break;
-      case Intrinsic::x86_mmx_psrai_d:
-        NewIntNo = Intrinsic::x86_mmx_psra_d;
-        break;
-      default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
-      }
+    case Intrinsic::x86_mmx_psrai_d:
+      NewIntNo = Intrinsic::x86_mmx_psra_d;
       break;
-    }
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
     }
 
     // The vector shift intrinsics with scalars uses 32b shift amounts but
     // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
     // to be zero.
-    SDValue ShOps[4];
-    ShOps[0] = ShAmt;
-    ShOps[1] = DAG.getConstant(0, MVT::i32);
-    if (ShAmtVT == MVT::v4i32) {
-      ShOps[2] = DAG.getUNDEF(MVT::i32);
-      ShOps[3] = DAG.getUNDEF(MVT::i32);
-      ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
-    } else {
-      ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+    ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, ShAmt,
+                         DAG.getConstant(0, MVT::i32));
 // FIXME this must be lowered to get rid of the invalid type.
-    }
 
     EVT VT = Op.getValueType();
     ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
@@ -9432,13 +9745,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
-                       MachinePointerInfo(), false, false, 0);
+                       MachinePointerInfo(), false, false, false, 0);
   }
 
   // Just load the return address.
   SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
   return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
-                     RetAddrFI, MachinePointerInfo(), false, false, 0);
+                     RetAddrFI, MachinePointerInfo(), false, false, false, 0);
 }
 
 SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
@@ -9453,7 +9766,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   while (Depth--)
     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
                             MachinePointerInfo(),
-                            false, false, 0);
+                            false, false, false, 0);
   return FrameAddr;
 }
 
@@ -9685,7 +9998,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
 
   // Load FP Control Word from stack slot
   SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
-                            MachinePointerInfo(), false, false, 0);
+                            MachinePointerInfo(), false, false, false, 0);
 
   // Transform as necessary
   SDValue CWD1 =
@@ -9745,7 +10058,8 @@ SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
   return Op;
 }
 
-SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+SDValue X86TargetLowering::LowerCTLZ_ZERO_UNDEF(SDValue Op,
+                                                SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   EVT OpVT = VT;
   unsigned NumBits = VT.getSizeInBits();
@@ -9753,26 +10067,41 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
 
   Op = Op.getOperand(0);
   if (VT == MVT::i8) {
+    // Zero extend to i32 since there is not an i8 bsr.
     OpVT = MVT::i32;
     Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
   }
 
-  // Issue a bsf (scan bits forward) which also sets EFLAGS.
+  // Issue a bsr (scan bits in reverse).
   SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+  Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+  // And xor with NumBits-1.
+  Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+  if (VT == MVT::i8)
+    Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+  return Op;
+}
+
+SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  unsigned NumBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  Op = Op.getOperand(0);
+
+  // Issue a bsf (scan bits forward) which also sets EFLAGS.
+  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
   Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
 
   // If src is zero (i.e. bsf sets ZF), returns NumBits.
   SDValue Ops[] = {
     Op,
-    DAG.getConstant(NumBits, OpVT),
+    DAG.getConstant(NumBits, VT),
     DAG.getConstant(X86::COND_E, MVT::i8),
     Op.getValue(1)
   };
-  Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
-
-  if (VT == MVT::i8)
-    Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
-  return Op;
+  return DAG.getNode(X86ISD::CMOV, dl, VT, Ops, array_lengthof(Ops));
 }
 
 // Lower256IntArith - Break a 256-bit integer operation into two new 128-bit
@@ -9824,49 +10153,49 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
 
   // Decompose 256-bit ops into smaller 128-bit ops.
-  if (VT.getSizeInBits() == 256)
+  if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2())
     return Lower256IntArith(Op, DAG);
 
-  assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+  assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
+         "Only know how to lower V2I64/V4I64 multiply");
+
   DebugLoc dl = Op.getDebugLoc();
 
-  //  ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
-  //  ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
-  //  ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
-  //  ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
-  //  ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
+  //  Ahi = psrlqi(a, 32);
+  //  Bhi = psrlqi(b, 32);
   //
-  //  AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
-  //  AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
+  //  AloBlo = pmuludq(a, b);
+  //  AloBhi = pmuludq(a, Bhi);
+  //  AhiBlo = pmuludq(Ahi, b);
+
+  //  AloBhi = psllqi(AloBhi, 32);
+  //  AhiBlo = psllqi(AhiBlo, 32);
   //  return AloBlo + AloBhi + AhiBlo;
 
   SDValue A = Op.getOperand(0);
   SDValue B = Op.getOperand(1);
 
-  SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
-                       A, DAG.getConstant(32, MVT::i32));
-  SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
-                       B, DAG.getConstant(32, MVT::i32));
-  SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
-                       A, B);
-  SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
-                       A, Bhi);
-  SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
-                       Ahi, B);
-  AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
-                       AloBhi, DAG.getConstant(32, MVT::i32));
-  AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
-                       AhiBlo, DAG.getConstant(32, MVT::i32));
+  SDValue ShAmt = DAG.getConstant(32, MVT::i32);
+
+  SDValue Ahi = DAG.getNode(X86ISD::VSRLI, dl, VT, A, ShAmt);
+  SDValue Bhi = DAG.getNode(X86ISD::VSRLI, dl, VT, B, ShAmt);
+
+  // Bit cast to 32-bit vectors for MULUDQ
+  EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 : MVT::v8i32;
+  A = DAG.getNode(ISD::BITCAST, dl, MulVT, A);
+  B = DAG.getNode(ISD::BITCAST, dl, MulVT, B);
+  Ahi = DAG.getNode(ISD::BITCAST, dl, MulVT, Ahi);
+  Bhi = DAG.getNode(ISD::BITCAST, dl, MulVT, Bhi);
+
+  SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
+  SDValue AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
+  SDValue AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
+
+  AloBhi = DAG.getNode(X86ISD::VSHLI, dl, VT, AloBhi, ShAmt);
+  AhiBlo = DAG.getNode(X86ISD::VSHLI, dl, VT, AhiBlo, ShAmt);
+
   SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
-  Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
-  return Res;
+  return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
 }
 
 SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
@@ -9877,12 +10206,183 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
   SDValue Amt = Op.getOperand(1);
   LLVMContext *Context = DAG.getContext();
 
-  if (!Subtarget->hasXMMInt())
+  if (!Subtarget->hasSSE2())
     return SDValue();
 
+  // Optimize shl/srl/sra with constant shift amount.
+  if (isSplatVector(Amt.getNode())) {
+    SDValue SclrAmt = Amt->getOperand(0);
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
+      uint64_t ShiftAmt = C->getZExtValue();
+
+      if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
+          (Subtarget->hasAVX2() &&
+           (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
+        if (Op.getOpcode() == ISD::SHL)
+          return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
+                             DAG.getConstant(ShiftAmt, MVT::i32));
+        if (Op.getOpcode() == ISD::SRL)
+          return DAG.getNode(X86ISD::VSRLI, dl, VT, R,
+                             DAG.getConstant(ShiftAmt, MVT::i32));
+        if (Op.getOpcode() == ISD::SRA && VT != MVT::v2i64 && VT != MVT::v4i64)
+          return DAG.getNode(X86ISD::VSRAI, dl, VT, R,
+                             DAG.getConstant(ShiftAmt, MVT::i32));
+      }
+
+      if (VT == MVT::v16i8) {
+        if (Op.getOpcode() == ISD::SHL) {
+          // Make a large shift.
+          SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, R,
+                                    DAG.getConstant(ShiftAmt, MVT::i32));
+          SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
+          // Zero out the rightmost bits.
+          SmallVector<SDValue, 16> V(16,
+                                     DAG.getConstant(uint8_t(-1U << ShiftAmt),
+                                                     MVT::i8));
+          return DAG.getNode(ISD::AND, dl, VT, SHL,
+                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
+        }
+        if (Op.getOpcode() == ISD::SRL) {
+          // Make a large shift.
+          SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v8i16, R,
+                                    DAG.getConstant(ShiftAmt, MVT::i32));
+          SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
+          // Zero out the leftmost bits.
+          SmallVector<SDValue, 16> V(16,
+                                     DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
+                                                     MVT::i8));
+          return DAG.getNode(ISD::AND, dl, VT, SRL,
+                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
+        }
+        if (Op.getOpcode() == ISD::SRA) {
+          if (ShiftAmt == 7) {
+            // R s>> 7  ===  R s< 0
+            SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+            return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+          }
+
+          // R s>> a === ((R u>> a) ^ m) - m
+          SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+          SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
+                                                         MVT::i8));
+          SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
+          Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+          Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+          return Res;
+        }
+      }
+
+      if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
+        if (Op.getOpcode() == ISD::SHL) {
+          // Make a large shift.
+          SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
+                                    DAG.getConstant(ShiftAmt, MVT::i32));
+          SHL = DAG.getNode(ISD::BITCAST, dl, VT, SHL);
+          // Zero out the rightmost bits.
+          SmallVector<SDValue, 32> V(32,
+                                     DAG.getConstant(uint8_t(-1U << ShiftAmt),
+                                                     MVT::i8));
+          return DAG.getNode(ISD::AND, dl, VT, SHL,
+                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+        }
+        if (Op.getOpcode() == ISD::SRL) {
+          // Make a large shift.
+          SDValue SRL = DAG.getNode(X86ISD::VSRLI, dl, MVT::v16i16, R,
+                                    DAG.getConstant(ShiftAmt, MVT::i32));
+          SRL = DAG.getNode(ISD::BITCAST, dl, VT, SRL);
+          // Zero out the leftmost bits.
+          SmallVector<SDValue, 32> V(32,
+                                     DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
+                                                     MVT::i8));
+          return DAG.getNode(ISD::AND, dl, VT, SRL,
+                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+        }
+        if (Op.getOpcode() == ISD::SRA) {
+          if (ShiftAmt == 7) {
+            // R s>> 7  ===  R s< 0
+            SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+            return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+          }
+
+          // R s>> a === ((R u>> a) ^ m) - m
+          SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+          SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
+                                                         MVT::i8));
+          SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32);
+          Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
+          Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
+          return Res;
+        }
+      }
+    }
+  }
+
+  // Lower SHL with variable shift amount.
+  if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
+    Op = DAG.getNode(X86ISD::VSHLI, dl, VT, Op.getOperand(1),
+                     DAG.getConstant(23, MVT::i32));
+
+    const uint32_t CV[] = { 0x3f800000U, 0x3f800000U, 0x3f800000U, 0x3f800000U};
+    Constant *C = ConstantDataVector::get(*Context, CV);
+    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+    SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                                 MachinePointerInfo::getConstantPool(),
+                                 false, false, false, 16);
+
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+    Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
+    Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+    return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+  }
+  if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
+    assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
+
+    // a = a << 5;
+    Op = DAG.getNode(X86ISD::VSHLI, dl, MVT::v8i16, Op.getOperand(1),
+                     DAG.getConstant(5, MVT::i32));
+    Op = DAG.getNode(ISD::BITCAST, dl, VT, Op);
+
+    // Turn 'a' into a mask suitable for VSELECT
+    SDValue VSelM = DAG.getConstant(0x80, VT);
+    SDValue OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+    OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+
+    SDValue CM1 = DAG.getConstant(0x0f, VT);
+    SDValue CM2 = DAG.getConstant(0x3f, VT);
+
+    // r = VSELECT(r, psllw(r & (char16)15, 4), a);
+    SDValue M = DAG.getNode(ISD::AND, dl, VT, R, CM1);
+    M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
+                            DAG.getConstant(4, MVT::i32), DAG);
+    M = DAG.getNode(ISD::BITCAST, dl, VT, M);
+    R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
+
+    // a += a
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+    OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+    OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+
+    // r = VSELECT(r, psllw(r & (char16)63, 2), a);
+    M = DAG.getNode(ISD::AND, dl, VT, R, CM2);
+    M = getTargetVShiftNode(X86ISD::VSHLI, dl, MVT::v8i16, M,
+                            DAG.getConstant(2, MVT::i32), DAG);
+    M = DAG.getNode(ISD::BITCAST, dl, VT, M);
+    R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel, M, R);
+
+    // a += a
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+    OpVSel = DAG.getNode(ISD::AND, dl, VT, VSelM, Op);
+    OpVSel = DAG.getNode(X86ISD::PCMPEQ, dl, VT, OpVSel, VSelM);
+
+    // return VSELECT(r, r+r, a);
+    R = DAG.getNode(ISD::VSELECT, dl, VT, OpVSel,
+                    DAG.getNode(ISD::ADD, dl, VT, R, R), R);
+    return R;
+  }
+
   // Decompose 256-bit shifts into smaller 128-bit shifts.
   if (VT.getSizeInBits() == 256) {
-    int NumElems = VT.getVectorNumElements();
+    unsigned NumElems = VT.getVectorNumElements();
     MVT EltVT = VT.getVectorElementType().getSimpleVT();
     EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
 
@@ -9897,9 +10397,9 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
       // Constant shift amount
       SmallVector<SDValue, 4> Amt1Csts;
       SmallVector<SDValue, 4> Amt2Csts;
-      for (int i = 0; i < NumElems/2; ++i)
+      for (unsigned i = 0; i != NumElems/2; ++i)
         Amt1Csts.push_back(Amt->getOperand(i));
-      for (int i = NumElems/2; i < NumElems; ++i)
+      for (unsigned i = NumElems/2; i != NumElems; ++i)
         Amt2Csts.push_back(Amt->getOperand(i));
 
       Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
@@ -9921,120 +10421,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2);
   }
 
-  // Optimize shl/srl/sra with constant shift amount.
-  if (isSplatVector(Amt.getNode())) {
-    SDValue SclrAmt = Amt->getOperand(0);
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) {
-      uint64_t ShiftAmt = C->getZExtValue();
-
-      if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL)
-       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
-                     R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-      if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SHL)
-       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
-                     R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-      if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SHL)
-       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
-                     R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-      if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL)
-       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
-                     R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-      if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRL)
-       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
-                     R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-      if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRL)
-       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
-                     R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-      if (VT == MVT::v4i32 && Op.getOpcode() == ISD::SRA)
-       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
-                     R, DAG.getConstant(ShiftAmt, MVT::i32));
-
-      if (VT == MVT::v8i16 && Op.getOpcode() == ISD::SRA)
-       return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
-                     R, DAG.getConstant(ShiftAmt, MVT::i32));
-    }
-  }
-
-  // Lower SHL with variable shift amount.
-  if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
-    Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
-                     Op.getOperand(1), DAG.getConstant(23, MVT::i32));
-
-    ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
-
-    std::vector<Constant*> CV(4, CI);
-    Constant *C = ConstantVector::get(CV);
-    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
-    SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                                 MachinePointerInfo::getConstantPool(),
-                                 false, false, 16);
-
-    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
-    Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
-    Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
-    return DAG.getNode(ISD::MUL, dl, VT, Op, R);
-  }
-  if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
-    // a = a << 5;
-    Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                     DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
-                     Op.getOperand(1), DAG.getConstant(5, MVT::i32));
-
-    ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
-    ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
-
-    std::vector<Constant*> CVM1(16, CM1);
-    std::vector<Constant*> CVM2(16, CM2);
-    Constant *C = ConstantVector::get(CVM1);
-    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
-    SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                            MachinePointerInfo::getConstantPool(),
-                            false, false, 16);
-
-    // r = pblendv(r, psllw(r & (char16)15, 4), a);
-    M = DAG.getNode(ISD::AND, dl, VT, R, M);
-    M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                    DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
-                    DAG.getConstant(4, MVT::i32));
-    R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
-    // a += a
-    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
-    C = ConstantVector::get(CVM2);
-    CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
-    M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                    MachinePointerInfo::getConstantPool(),
-                    false, false, 16);
-
-    // r = pblendv(r, psllw(r & (char16)63, 2), a);
-    M = DAG.getNode(ISD::AND, dl, VT, R, M);
-    M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                    DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
-                    DAG.getConstant(2, MVT::i32));
-    R = DAG.getNode(ISD::VSELECT, dl, VT, Op, R, M);
-    // a += a
-    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
-
-    // return pblendv(r, r+r, a);
-    R = DAG.getNode(ISD::VSELECT, dl, VT, Op,
-                    R, DAG.getNode(ISD::ADD, dl, VT, R, R));
-    return R;
-  }
   return SDValue();
 }
 
@@ -10113,46 +10499,58 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
 }
 
-SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
+SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+                                                  SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
-  SDNode* Node = Op.getNode();
-  EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
-  EVT VT = Node->getValueType(0);
-  if (Subtarget->hasXMMInt() && VT.isVector()) {
-    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
-                        ExtraVT.getScalarType().getSizeInBits();
-    SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
-
-    unsigned SHLIntrinsicsID = 0;
-    unsigned SRAIntrinsicsID = 0;
-    switch (VT.getSimpleVT().SimpleTy) {
-      default:
-        return SDValue();
-      case MVT::v4i32: {
-        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
-        SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
-        break;
-      }
-      case MVT::v8i16: {
-        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
-        SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
-        break;
-      }
-    }
+  EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+  EVT VT = Op.getValueType();
 
-    SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                         DAG.getConstant(SHLIntrinsicsID, MVT::i32),
-                         Node->getOperand(0), ShAmt);
+  if (!Subtarget->hasSSE2() || !VT.isVector())
+    return SDValue();
 
-    // In case of 1 bit sext, no need to shr
-    if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
+  unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+                      ExtraVT.getScalarType().getSizeInBits();
+  SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
 
-    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
-                       DAG.getConstant(SRAIntrinsicsID, MVT::i32),
-                       Tmp1, ShAmt);
+  switch (VT.getSimpleVT().SimpleTy) {
+    default: return SDValue();
+    case MVT::v8i32:
+    case MVT::v16i16:
+      if (!Subtarget->hasAVX())
+        return SDValue();
+      if (!Subtarget->hasAVX2()) {
+        // needs to be split
+        int NumElems = VT.getVectorNumElements();
+        SDValue Idx0 = DAG.getConstant(0, MVT::i32);
+        SDValue Idx1 = DAG.getConstant(NumElems/2, MVT::i32);
+
+        // Extract the LHS vectors
+        SDValue LHS = Op.getOperand(0);
+        SDValue LHS1 = Extract128BitVector(LHS, Idx0, DAG, dl);
+        SDValue LHS2 = Extract128BitVector(LHS, Idx1, DAG, dl);
+
+        MVT EltVT = VT.getVectorElementType().getSimpleVT();
+        EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+        EVT ExtraEltVT = ExtraVT.getVectorElementType();
+        int ExtraNumElems = ExtraVT.getVectorNumElements();
+        ExtraVT = EVT::getVectorVT(*DAG.getContext(), ExtraEltVT,
+                                   ExtraNumElems/2);
+        SDValue Extra = DAG.getValueType(ExtraVT);
+
+        LHS1 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS1, Extra);
+        LHS2 = DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, Extra);
+
+        return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, LHS1, LHS2);;
+      }
+      // fall through
+    case MVT::v4i32:
+    case MVT::v8i16: {
+      SDValue Tmp1 = getTargetVShiftNode(X86ISD::VSHLI, dl, VT,
+                                         Op.getOperand(0), ShAmt, DAG);
+      return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, Tmp1, ShAmt, DAG);
+    }
   }
-
-  return SDValue();
 }
 
 
@@ -10161,7 +10559,7 @@ SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
 
   // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
   // There isn't any reason to disable it if the target processor supports it.
-  if (!Subtarget->hasXMMInt() && !Subtarget->is64Bit()) {
+  if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
     SDValue Chain = Op.getOperand(0);
     SDValue Zero = DAG.getConstant(0, MVT::i32);
     SDValue Ops[] = {
@@ -10215,7 +10613,7 @@ SDValue X86TargetLowering::LowerATOMIC_FENCE(SDValue Op,
     // Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
     // no-sse2). There isn't any reason to disable it if the target processor
     // supports it.
-    if (Subtarget->hasXMMInt() || Subtarget->is64Bit())
+    if (Subtarget->hasSSE2() || Subtarget->is64Bit())
       return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
 
     SDValue Chain = Op.getOperand(0);
@@ -10246,8 +10644,7 @@ SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
   unsigned Reg = 0;
   unsigned size = 0;
   switch(T.getSimpleVT().SimpleTy) {
-  default:
-    assert(false && "Invalid value type!");
+  default: llvm_unreachable("Invalid value type!");
   case MVT::i8:  Reg = X86::AL;  size = 1; break;
   case MVT::i16: Reg = X86::AX;  size = 2; break;
   case MVT::i32: Reg = X86::EAX; size = 4; break;
@@ -10295,7 +10692,7 @@ SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
                                             SelectionDAG &DAG) const {
   EVT SrcVT = Op.getOperand(0).getValueType();
   EVT DstVT = Op.getValueType();
-  assert(Subtarget->is64Bit() && !Subtarget->hasXMMInt() &&
+  assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
          Subtarget->hasMMX() && "Unexpected custom BITCAST");
   assert((DstVT == MVT::i64 ||
           (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
@@ -10365,7 +10762,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
   unsigned Opc;
   bool ExtraOp = false;
   switch (Op.getOpcode()) {
-  default: assert(0 && "Invalid code");
+  default: llvm_unreachable("Invalid code");
   case ISD::ADDC: Opc = X86ISD::ADD; break;
   case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
   case ISD::SUBC: Opc = X86ISD::SUB; break;
@@ -10432,6 +10829,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
   case ISD::CTLZ:               return LowerCTLZ(Op, DAG);
+  case ISD::CTLZ_ZERO_UNDEF:    return LowerCTLZ_ZERO_UNDEF(Op, DAG);
   case ISD::CTTZ:               return LowerCTTZ(Op, DAG);
   case ISD::MUL:                return LowerMUL(Op, DAG);
   case ISD::SRA:
@@ -10506,8 +10904,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
   default:
-    assert(false && "Do not know how to custom type legalize this operation!");
-    return;
+    llvm_unreachable("Do not know how to custom type legalize this operation!");
   case ISD::SIGN_EXTEND_INREG:
   case ISD::ADDC:
   case ISD::ADDE:
@@ -10515,15 +10912,25 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::SUBE:
     // We don't want to expand or promote these.
     return;
-  case ISD::FP_TO_SINT: {
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT: {
+    bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+    if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
+      return;
+
     std::pair<SDValue,SDValue> Vals =
-        FP_TO_INTHelper(SDValue(N, 0), DAG, true);
+        FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
     SDValue FIST = Vals.first, StackSlot = Vals.second;
     if (FIST.getNode() != 0) {
       EVT VT = N->getValueType(0);
       // Return a load from the stack slot.
-      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
-                                    MachinePointerInfo(), false, false, 0));
+      if (StackSlot.getNode() != 0)
+        Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+                                      MachinePointerInfo(),
+                                      false, false, false, 0));
+      else
+        Results.push_back(FIST);
     }
     return;
   }
@@ -10657,15 +11064,19 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::PINSRW:             return "X86ISD::PINSRW";
   case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
   case X86ISD::ANDNP:              return "X86ISD::ANDNP";
-  case X86ISD::PSIGNB:             return "X86ISD::PSIGNB";
-  case X86ISD::PSIGNW:             return "X86ISD::PSIGNW";
-  case X86ISD::PSIGND:             return "X86ISD::PSIGND";
+  case X86ISD::PSIGN:              return "X86ISD::PSIGN";
+  case X86ISD::BLENDV:             return "X86ISD::BLENDV";
+  case X86ISD::BLENDPW:            return "X86ISD::BLENDPW";
+  case X86ISD::BLENDPS:            return "X86ISD::BLENDPS";
+  case X86ISD::BLENDPD:            return "X86ISD::BLENDPD";
+  case X86ISD::HADD:               return "X86ISD::HADD";
+  case X86ISD::HSUB:               return "X86ISD::HSUB";
+  case X86ISD::FHADD:              return "X86ISD::FHADD";
+  case X86ISD::FHSUB:              return "X86ISD::FHSUB";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
   case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
   case X86ISD::FRCP:               return "X86ISD::FRCP";
-  case X86ISD::FHADD:              return "X86ISD::FHADD";
-  case X86ISD::FHSUB:              return "X86ISD::FHSUB";
   case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
   case X86ISD::TLSCALL:            return "X86ISD::TLSCALL";
   case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
@@ -10681,18 +11092,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::ATOMNAND64_DAG:     return "X86ISD::ATOMNAND64_DAG";
   case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
   case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
+  case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
+  case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
   case X86ISD::VSHL:               return "X86ISD::VSHL";
   case X86ISD::VSRL:               return "X86ISD::VSRL";
-  case X86ISD::CMPPD:              return "X86ISD::CMPPD";
-  case X86ISD::CMPPS:              return "X86ISD::CMPPS";
-  case X86ISD::PCMPEQB:            return "X86ISD::PCMPEQB";
-  case X86ISD::PCMPEQW:            return "X86ISD::PCMPEQW";
-  case X86ISD::PCMPEQD:            return "X86ISD::PCMPEQD";
-  case X86ISD::PCMPEQQ:            return "X86ISD::PCMPEQQ";
-  case X86ISD::PCMPGTB:            return "X86ISD::PCMPGTB";
-  case X86ISD::PCMPGTW:            return "X86ISD::PCMPGTW";
-  case X86ISD::PCMPGTD:            return "X86ISD::PCMPGTD";
-  case X86ISD::PCMPGTQ:            return "X86ISD::PCMPGTQ";
+  case X86ISD::VSRA:               return "X86ISD::VSRA";
+  case X86ISD::VSHLI:              return "X86ISD::VSHLI";
+  case X86ISD::VSRLI:              return "X86ISD::VSRLI";
+  case X86ISD::VSRAI:              return "X86ISD::VSRAI";
+  case X86ISD::CMPP:               return "X86ISD::CMPP";
+  case X86ISD::PCMPEQ:             return "X86ISD::PCMPEQ";
+  case X86ISD::PCMPGT:             return "X86ISD::PCMPGT";
   case X86ISD::ADD:                return "X86ISD::ADD";
   case X86ISD::SUB:                return "X86ISD::SUB";
   case X86ISD::ADC:                return "X86ISD::ADC";
@@ -10705,54 +11115,39 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::XOR:                return "X86ISD::XOR";
   case X86ISD::AND:                return "X86ISD::AND";
   case X86ISD::ANDN:               return "X86ISD::ANDN";
+  case X86ISD::BLSI:               return "X86ISD::BLSI";
+  case X86ISD::BLSMSK:             return "X86ISD::BLSMSK";
+  case X86ISD::BLSR:               return "X86ISD::BLSR";
   case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
   case X86ISD::PTEST:              return "X86ISD::PTEST";
   case X86ISD::TESTP:              return "X86ISD::TESTP";
   case X86ISD::PALIGN:             return "X86ISD::PALIGN";
   case X86ISD::PSHUFD:             return "X86ISD::PSHUFD";
   case X86ISD::PSHUFHW:            return "X86ISD::PSHUFHW";
-  case X86ISD::PSHUFHW_LD:         return "X86ISD::PSHUFHW_LD";
   case X86ISD::PSHUFLW:            return "X86ISD::PSHUFLW";
-  case X86ISD::PSHUFLW_LD:         return "X86ISD::PSHUFLW_LD";
-  case X86ISD::SHUFPS:             return "X86ISD::SHUFPS";
-  case X86ISD::SHUFPD:             return "X86ISD::SHUFPD";
+  case X86ISD::SHUFP:              return "X86ISD::SHUFP";
   case X86ISD::MOVLHPS:            return "X86ISD::MOVLHPS";
   case X86ISD::MOVLHPD:            return "X86ISD::MOVLHPD";
   case X86ISD::MOVHLPS:            return "X86ISD::MOVHLPS";
-  case X86ISD::MOVHLPD:            return "X86ISD::MOVHLPD";
   case X86ISD::MOVLPS:             return "X86ISD::MOVLPS";
   case X86ISD::MOVLPD:             return "X86ISD::MOVLPD";
   case X86ISD::MOVDDUP:            return "X86ISD::MOVDDUP";
   case X86ISD::MOVSHDUP:           return "X86ISD::MOVSHDUP";
   case X86ISD::MOVSLDUP:           return "X86ISD::MOVSLDUP";
-  case X86ISD::MOVSHDUP_LD:        return "X86ISD::MOVSHDUP_LD";
-  case X86ISD::MOVSLDUP_LD:        return "X86ISD::MOVSLDUP_LD";
   case X86ISD::MOVSD:              return "X86ISD::MOVSD";
   case X86ISD::MOVSS:              return "X86ISD::MOVSS";
-  case X86ISD::UNPCKLPS:           return "X86ISD::UNPCKLPS";
-  case X86ISD::UNPCKLPD:           return "X86ISD::UNPCKLPD";
-  case X86ISD::VUNPCKLPDY:         return "X86ISD::VUNPCKLPDY";
-  case X86ISD::UNPCKHPS:           return "X86ISD::UNPCKHPS";
-  case X86ISD::UNPCKHPD:           return "X86ISD::UNPCKHPD";
-  case X86ISD::PUNPCKLBW:          return "X86ISD::PUNPCKLBW";
-  case X86ISD::PUNPCKLWD:          return "X86ISD::PUNPCKLWD";
-  case X86ISD::PUNPCKLDQ:          return "X86ISD::PUNPCKLDQ";
-  case X86ISD::PUNPCKLQDQ:         return "X86ISD::PUNPCKLQDQ";
-  case X86ISD::PUNPCKHBW:          return "X86ISD::PUNPCKHBW";
-  case X86ISD::PUNPCKHWD:          return "X86ISD::PUNPCKHWD";
-  case X86ISD::PUNPCKHDQ:          return "X86ISD::PUNPCKHDQ";
-  case X86ISD::PUNPCKHQDQ:         return "X86ISD::PUNPCKHQDQ";
+  case X86ISD::UNPCKL:             return "X86ISD::UNPCKL";
+  case X86ISD::UNPCKH:             return "X86ISD::UNPCKH";
   case X86ISD::VBROADCAST:         return "X86ISD::VBROADCAST";
-  case X86ISD::VPERMILPS:          return "X86ISD::VPERMILPS";
-  case X86ISD::VPERMILPSY:         return "X86ISD::VPERMILPSY";
-  case X86ISD::VPERMILPD:          return "X86ISD::VPERMILPD";
-  case X86ISD::VPERMILPDY:         return "X86ISD::VPERMILPDY";
-  case X86ISD::VPERM2F128:         return "X86ISD::VPERM2F128";
+  case X86ISD::VPERMILP:           return "X86ISD::VPERMILP";
+  case X86ISD::VPERM2X128:         return "X86ISD::VPERM2X128";
+  case X86ISD::PMULUDQ:            return "X86ISD::PMULUDQ";
   case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
   case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";
   case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
   case X86ISD::MEMBARRIER:         return "X86ISD::MEMBARRIER";
   case X86ISD::SEG_ALLOCA:         return "X86ISD::SEG_ALLOCA";
+  case X86ISD::WIN_FTOL:           return "X86ISD::WIN_FTOL";
   }
 }
 
@@ -10855,21 +11250,21 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
                                       EVT VT) const {
   // Very little shuffling can be done for 64-bit vectors right now.
   if (VT.getSizeInBits() == 64)
-    return isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX());
+    return false;
 
   // FIXME: pshufb, blends, shifts.
   return (VT.getVectorNumElements() == 2 ||
           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
           isMOVLMask(M, VT) ||
-          isSHUFPMask(M, VT) ||
+          isSHUFPMask(M, VT, Subtarget->hasAVX()) ||
           isPSHUFDMask(M, VT) ||
           isPSHUFHWMask(M, VT) ||
           isPSHUFLWMask(M, VT) ||
-          isPALIGNRMask(M, VT, Subtarget->hasSSSE3() || Subtarget->hasAVX()) ||
-          isUNPCKLMask(M, VT) ||
-          isUNPCKHMask(M, VT) ||
-          isUNPCKL_v_undef_Mask(M, VT) ||
-          isUNPCKH_v_undef_Mask(M, VT));
+          isPALIGNRMask(M, VT, Subtarget) ||
+          isUNPCKLMask(M, VT, Subtarget->hasAVX2()) ||
+          isUNPCKHMask(M, VT, Subtarget->hasAVX2()) ||
+          isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasAVX2()) ||
+          isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasAVX2()));
 }
 
 bool
@@ -10882,8 +11277,8 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
   if (NumElts == 4 && VT.getSizeInBits() == 128) {
     return (isMOVLMask(Mask, VT)  ||
             isCommutedMOVLMask(Mask, VT, true) ||
-            isSHUFPMask(Mask, VT) ||
-            isCommutedSHUFPMask(Mask, VT));
+            isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
+            isSHUFPMask(Mask, VT, Subtarget->hasAVX(), /* Commuted */ true));
   }
   return false;
 }
@@ -10902,7 +11297,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
                                                        unsigned CXchgOpc,
                                                        unsigned notOpc,
                                                        unsigned EAXreg,
-                                                       TargetRegisterClass *RC,
+                                                 const TargetRegisterClass *RC,
                                                        bool invSrc) const {
   // For the atomic bitwise operator, we generate
   //   thisMBB:
@@ -11274,7 +11669,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
 MachineBasicBlock *
 X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
                             unsigned numArgs, bool memArg) const {
-  assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
+  assert(Subtarget->hasSSE42() &&
          "Target must have SSE4.2 or AVX features enabled");
 
   DebugLoc dl = MI->getDebugLoc();
@@ -11679,6 +12074,42 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
   return EndMBB;
 }
 
+// The EFLAGS operand of SelectItr might be missing a kill marker
+// because there were multiple uses of EFLAGS, and ISel didn't know
+// which to mark. Figure out whether SelectItr should have had a
+// kill marker, and set it if it should. Returns the correct kill
+// marker value.
+static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
+                                     MachineBasicBlock* BB,
+                                     const TargetRegisterInfo* TRI) {
+  // Scan forward through BB for a use/def of EFLAGS.
+  MachineBasicBlock::iterator miI(llvm::next(SelectItr));
+  for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
+    const MachineInstr& mi = *miI;
+    if (mi.readsRegister(X86::EFLAGS))
+      return false;
+    if (mi.definesRegister(X86::EFLAGS))
+      break; // Should have kill-flag - update below.
+  }
+
+  // If we hit the end of the block, check whether EFLAGS is live into a
+  // successor.
+  if (miI == BB->end()) {
+    for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(),
+                                          sEnd = BB->succ_end();
+         sItr != sEnd; ++sItr) {
+      MachineBasicBlock* succ = *sItr;
+      if (succ->isLiveIn(X86::EFLAGS))
+        return false;
+    }
+  }
+
+  // We found a def, or hit the end of the basic block and EFLAGS wasn't live
+  // out. SelectMI should have a kill flag on EFLAGS.
+  SelectItr->addRegisterKilled(X86::EFLAGS, TRI);
+  return true;
+}
+
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
                                      MachineBasicBlock *BB) const {
@@ -11708,7 +12139,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
 
   // If the EFLAGS register isn't dead in the terminator, then claim that it's
   // live into the sink and copy blocks.
-  if (!MI->killsRegister(X86::EFLAGS)) {
+  const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo();
+  if (!MI->killsRegister(X86::EFLAGS) &&
+      !checkAndUpdateEFLAGSKill(MI, BB, TRI)) {
     copy0MBB->addLiveIn(X86::EFLAGS);
     sinkMBB->addLiveIn(X86::EFLAGS);
   }
@@ -11753,7 +12186,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
   MachineFunction *MF = BB->getParent();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
 
-  assert(EnableSegmentedStacks);
+  assert(getTargetMachine().Options.EnableSegmentedStacks);
 
   unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
   unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
@@ -11785,6 +12218,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
   unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
     bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
     tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
+    SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
     sizeVReg = MI->getOperand(1).getReg(),
     physSPReg = Is64Bit ? X86::RSP : X86::ESP;
 
@@ -11802,33 +12236,39 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
   // Add code to the main basic block to check if the stack limit has been hit,
   // and if so, jump to mallocMBB otherwise to bumpMBB.
   BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
-  BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), tmpSPVReg)
+  BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
     .addReg(tmpSPVReg).addReg(sizeVReg);
   BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr))
-    .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg)
-    .addReg(tmpSPVReg);
+    .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
+    .addReg(SPLimitVReg);
   BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB);
 
   // bumpMBB simply decreases the stack pointer, since we know the current
   // stacklet has enough space.
   BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
-    .addReg(tmpSPVReg);
+    .addReg(SPLimitVReg);
   BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
-    .addReg(tmpSPVReg);
+    .addReg(SPLimitVReg);
   BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);
 
   // Calls into a routine in libgcc to allocate more space from the heap.
+  const uint32_t *RegMask =
+    getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
   if (Is64Bit) {
     BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
       .addReg(sizeVReg);
     BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
-    .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI);
+      .addExternalSymbol("__morestack_allocate_stack_space").addReg(X86::RDI)
+      .addRegMask(RegMask)
+      .addReg(X86::RAX, RegState::ImplicitDefine);
   } else {
     BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
       .addImm(12);
     BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
     BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
-      .addExternalSymbol("__morestack_allocate_stack_space");
+      .addExternalSymbol("__morestack_allocate_stack_space")
+      .addRegMask(RegMask)
+      .addReg(X86::EAX, RegState::ImplicitDefine);
   }
 
   if (!Is64Bit)
@@ -11926,6 +12366,11 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
   assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
   assert(MI->getOperand(3).isGlobal() && "This should be a global");
 
+  // Get a register mask for the lowered call.
+  // FIXME: The 32-bit calls have non-standard calling conventions. Use a
+  // proper register mask.
+  const uint32_t *RegMask =
+    getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C);
   if (Subtarget->is64Bit()) {
     MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
                                       TII->get(X86::MOV64rm), X86::RDI)
@@ -11936,6 +12381,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
     .addReg(0);
     MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
     addDirectMem(MIB, X86::RDI);
+    MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
   } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
     MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
                                       TII->get(X86::MOV32rm), X86::EAX)
@@ -11946,6 +12392,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
     .addReg(0);
     MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
     addDirectMem(MIB, X86::EAX);
+    MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
   } else {
     MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
                                       TII->get(X86::MOV32rm), X86::EAX)
@@ -11956,6 +12403,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
     .addReg(0);
     MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
     addDirectMem(MIB, X86::EAX);
+    MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
   }
 
   MI->eraseFromParent(); // The pseudo instruction is gone now.
@@ -11966,30 +12414,14 @@ MachineBasicBlock *
 X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
   switch (MI->getOpcode()) {
-  default: assert(0 && "Unexpected instr type to insert");
+  default: llvm_unreachable("Unexpected instr type to insert");
   case X86::TAILJMPd64:
   case X86::TAILJMPr64:
   case X86::TAILJMPm64:
-    assert(0 && "TAILJMP64 would not be touched here.");
+    llvm_unreachable("TAILJMP64 would not be touched here.");
   case X86::TCRETURNdi64:
   case X86::TCRETURNri64:
   case X86::TCRETURNmi64:
-    // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
-    // On AMD64, additional defs should be added before register allocation.
-    if (!Subtarget->isTargetWin64()) {
-      MI->addRegisterDefined(X86::RSI);
-      MI->addRegisterDefined(X86::RDI);
-      MI->addRegisterDefined(X86::XMM6);
-      MI->addRegisterDefined(X86::XMM7);
-      MI->addRegisterDefined(X86::XMM8);
-      MI->addRegisterDefined(X86::XMM9);
-      MI->addRegisterDefined(X86::XMM10);
-      MI->addRegisterDefined(X86::XMM11);
-      MI->addRegisterDefined(X86::XMM12);
-      MI->addRegisterDefined(X86::XMM13);
-      MI->addRegisterDefined(X86::XMM14);
-      MI->addRegisterDefined(X86::XMM15);
-    }
     return BB;
   case X86::WIN_ALLOCA:
     return EmitLoweredWinAlloca(MI, BB);
@@ -12294,11 +12726,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 //===----------------------------------------------------------------------===//
 
 void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                       const APInt &Mask,
                                                        APInt &KnownZero,
                                                        APInt &KnownOne,
                                                        const SelectionDAG &DAG,
                                                        unsigned Depth) const {
+  unsigned BitWidth = KnownZero.getBitWidth();
   unsigned Opc = Op.getOpcode();
   assert((Opc >= ISD::BUILTIN_OP_END ||
           Opc == ISD::INTRINSIC_WO_CHAIN ||
@@ -12307,7 +12739,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
          "Should use MaskedValueIsZero if you don't know whether Op"
          " is a target node!");
 
-  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);   // Don't know anything.
+  KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
   switch (Opc) {
   default: break;
   case X86ISD::ADD:
@@ -12326,8 +12758,7 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
       break;
     // Fallthrough
   case X86ISD::SETCC:
-    KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
-                                       Mask.getBitWidth() - 1);
+    KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
     break;
   case ISD::INTRINSIC_WO_CHAIN: {
     unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -12339,18 +12770,20 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
     case Intrinsic::x86_sse2_movmsk_pd:
     case Intrinsic::x86_avx_movmsk_pd_256:
     case Intrinsic::x86_mmx_pmovmskb:
-    case Intrinsic::x86_sse2_pmovmskb_128: {
+    case Intrinsic::x86_sse2_pmovmskb_128:
+    case Intrinsic::x86_avx2_pmovmskb: {
       // High bits of movmskp{s|d}, pmovmskb are known zero.
       switch (IntId) {
+        default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
         case Intrinsic::x86_sse_movmsk_ps:      NumLoBits = 4; break;
         case Intrinsic::x86_avx_movmsk_ps_256:  NumLoBits = 8; break;
         case Intrinsic::x86_sse2_movmsk_pd:     NumLoBits = 2; break;
         case Intrinsic::x86_avx_movmsk_pd_256:  NumLoBits = 4; break;
         case Intrinsic::x86_mmx_pmovmskb:       NumLoBits = 8; break;
         case Intrinsic::x86_sse2_pmovmskb_128:  NumLoBits = 16; break;
+        case Intrinsic::x86_avx2_pmovmskb:      NumLoBits = 32; break;
       }
-      KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
-                                        Mask.getBitWidth() - NumLoBits);
+      KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
       break;
     }
     }
@@ -12418,7 +12851,8 @@ static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
 
 /// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
 static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
-                                        TargetLowering::DAGCombinerInfo &DCI) {
+                                        TargetLowering::DAGCombinerInfo &DCI,
+                                        const X86Subtarget* Subtarget) {
   DebugLoc dl = N->getDebugLoc();
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
   SDValue V1 = SVOp->getOperand(0);
@@ -12454,9 +12888,23 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
           !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
         return SDValue();
 
+    // If V1 is coming from a vector load then just fold to a VZEXT_LOAD.
+    if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) {
+      SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
+      SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
+      SDValue ResNode =
+        DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2,
+                                Ld->getMemoryVT(),
+                                Ld->getPointerInfo(),
+                                Ld->getAlignment(),
+                                false/*isVolatile*/, true/*ReadMem*/,
+                                false/*WriteMem*/);
+      return DAG.getNode(ISD::BITCAST, dl, VT, ResNode);
+    } 
+
     // Emit a zeroed vector and insert the desired subvector on its
     // first half.
-    SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl);
+    SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
     SDValue InsV = Insert128BitVector(Zeros, V1.getOperand(0),
                          DAG.getConstant(0, MVT::i32), DAG, dl);
     return DCI.CombineTo(N, InsV);
@@ -12501,7 +12949,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
   // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
   if (Subtarget->hasAVX() && VT.getSizeInBits() == 256 &&
       N->getOpcode() == ISD::VECTOR_SHUFFLE)
-    return PerformShuffleCombine256(N, DAG, DCI);
+    return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
 
   // Only handle 128 wide vector from here on.
   if (VT.getSizeInBits() != 128)
@@ -12517,11 +12965,185 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
   return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
 }
 
+
+/// PerformTruncateCombine - Converts truncate operation to
+/// a sequence of vector shuffle operations.
+/// It is possible when we truncate 256-bit vector to 128-bit vector
+
+SDValue X86TargetLowering::PerformTruncateCombine(SDNode *N, SelectionDAG &DAG, 
+                                                  DAGCombinerInfo &DCI) const {
+  if (!DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  if (!Subtarget->hasAVX()) return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDValue Op = N->getOperand(0);
+  EVT OpVT = Op.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
+
+    SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
+                          DAG.getIntPtrConstant(0));
+
+    SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Op,
+                          DAG.getIntPtrConstant(2));
+
+    OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
+    OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
+
+    // PSHUFD
+    int ShufMask1[] = {0, 2, 0, 0};
+
+    OpLo = DAG.getVectorShuffle(VT, dl, OpLo, DAG.getUNDEF(VT),
+                                ShufMask1);
+    OpHi = DAG.getVectorShuffle(VT, dl, OpHi, DAG.getUNDEF(VT),
+                                ShufMask1);
+
+    // MOVLHPS
+    int ShufMask2[] = {0, 1, 4, 5};
+
+    return DAG.getVectorShuffle(VT, dl, OpLo, OpHi, ShufMask2);
+  }
+  if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
+
+    SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
+                          DAG.getIntPtrConstant(0));
+
+    SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i32, Op,
+                          DAG.getIntPtrConstant(4));
+
+    OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLo);
+    OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpHi);
+
+    // PSHUFB
+    int ShufMask1[] = {0,  1,  4,  5,  8,  9, 12, 13, 
+                      -1, -1, -1, -1, -1, -1, -1, -1};
+
+    OpLo = DAG.getVectorShuffle(MVT::v16i8, dl, OpLo,
+                                DAG.getUNDEF(MVT::v16i8),
+                                ShufMask1);
+    OpHi = DAG.getVectorShuffle(MVT::v16i8, dl, OpHi,
+                                DAG.getUNDEF(MVT::v16i8),
+                                ShufMask1);
+
+    OpLo = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpLo);
+    OpHi = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, OpHi);
+
+    // MOVLHPS
+    int ShufMask2[] = {0, 1, 4, 5};
+
+    SDValue res = DAG.getVectorShuffle(MVT::v4i32, dl, OpLo, OpHi, ShufMask2);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, res);
+  }
+
+  return SDValue();
+}
+
+/// XFormVExtractWithShuffleIntoLoad - Check if a vector extract from a target
+/// specific shuffle of a load can be folded into a single element load.
+/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
+/// shuffles have been customed lowered so we need to handle those here.
+static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
+                                         TargetLowering::DAGCombinerInfo &DCI) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  SDValue InVec = N->getOperand(0);
+  SDValue EltNo = N->getOperand(1);
+
+  if (!isa<ConstantSDNode>(EltNo))
+    return SDValue();
+
+  EVT VT = InVec.getValueType();
+
+  bool HasShuffleIntoBitcast = false;
+  if (InVec.getOpcode() == ISD::BITCAST) {
+    // Don't duplicate a load with other uses.
+    if (!InVec.hasOneUse())
+      return SDValue();
+    EVT BCVT = InVec.getOperand(0).getValueType();
+    if (BCVT.getVectorNumElements() != VT.getVectorNumElements())
+      return SDValue();
+    InVec = InVec.getOperand(0);
+    HasShuffleIntoBitcast = true;
+  }
+
+  if (!isTargetShuffle(InVec.getOpcode()))
+    return SDValue();
+
+  // Don't duplicate a load with other uses.
+  if (!InVec.hasOneUse())
+    return SDValue();
+
+  SmallVector<int, 16> ShuffleMask;
+  bool UnaryShuffle;
+  if (!getTargetShuffleMask(InVec.getNode(), VT, ShuffleMask, UnaryShuffle))
+    return SDValue();
+
+  // Select the input vector, guarding against out of range extract vector.
+  unsigned NumElems = VT.getVectorNumElements();
+  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+  int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
+  SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
+                                         : InVec.getOperand(1);
+
+  // If inputs to shuffle are the same for both ops, then allow 2 uses
+  unsigned AllowedUses = InVec.getOperand(0) == InVec.getOperand(1) ? 2 : 1;
+
+  if (LdNode.getOpcode() == ISD::BITCAST) {
+    // Don't duplicate a load with other uses.
+    if (!LdNode.getNode()->hasNUsesOfValue(AllowedUses, 0))
+      return SDValue();
+
+    AllowedUses = 1; // only allow 1 load use if we have a bitcast
+    LdNode = LdNode.getOperand(0);
+  }
+
+  if (!ISD::isNormalLoad(LdNode.getNode()))
+    return SDValue();
+
+  LoadSDNode *LN0 = cast<LoadSDNode>(LdNode);
+
+  if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
+    return SDValue();
+
+  if (HasShuffleIntoBitcast) {
+    // If there's a bitcast before the shuffle, check if the load type and
+    // alignment is valid.
+    unsigned Align = LN0->getAlignment();
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    unsigned NewAlign = TLI.getTargetData()->
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+    if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+      return SDValue();
+  }
+
+  // All checks match so transform back to vector_shuffle so that DAG combiner
+  // can finish the job
+  DebugLoc dl = N->getDebugLoc();
+
+  // Create shuffle node taking into account the case that its a unary shuffle
+  SDValue Shuffle = (UnaryShuffle) ? DAG.getUNDEF(VT) : InVec.getOperand(1);
+  Shuffle = DAG.getVectorShuffle(InVec.getValueType(), dl,
+                                 InVec.getOperand(0), Shuffle,
+                                 &ShuffleMask[0]);
+  Shuffle = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0), Shuffle,
+                     EltNo);
+}
+
 /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
 /// generation and convert it from being a bunch of shuffles and extracts
 /// to a simple store and scalar loads to extract the elements.
 static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
-                                                const TargetLowering &TLI) {
+                                         TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI);
+  if (NewOp.getNode())
+    return NewOp;
+
   SDValue InputVector = N->getOperand(0);
 
   // Only operate on vectors of 4 elements, where the alternative shuffling
@@ -12582,6 +13204,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
     unsigned EltSize =
         InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
     uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
 
     SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
@@ -12590,7 +13213,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
     // Load the scalar.
     SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
                                      ScalarAddr, MachinePointerInfo(),
-                                     false, false, 0);
+                                     false, false, false, 0);
 
     // Replace the exact with the load.
     DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
@@ -12603,7 +13226,10 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
 /// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
 /// nodes.
 static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+                                    TargetLowering::DAGCombinerInfo &DCI,
                                     const X86Subtarget *Subtarget) {
+
+
   DebugLoc DL = N->getDebugLoc();
   SDValue Cond = N->getOperand(0);
   // Get the LHS/RHS of the select.
@@ -12617,7 +13243,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
   // ignored in unsafe-math mode).
   if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
       VT != MVT::f80 && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
-      (Subtarget->hasXMMInt() ||
+      (Subtarget->hasSSE2() ||
        (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
 
@@ -12632,7 +13258,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         // the operands would cause it to handle comparisons between positive
         // and negative zero incorrectly.
         if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
-          if (!UnsafeFPMath &&
+          if (!DAG.getTarget().Options.UnsafeFPMath &&
               !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
             break;
           std::swap(LHS, RHS);
@@ -12642,7 +13268,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
       case ISD::SETOLE:
         // Converting this to a min would handle comparisons between positive
         // and negative zero incorrectly.
-        if (!UnsafeFPMath &&
+        if (!DAG.getTarget().Options.UnsafeFPMath &&
             !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
           break;
         Opcode = X86ISD::FMIN;
@@ -12660,7 +13286,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
       case ISD::SETOGE:
         // Converting this to a max would handle comparisons between positive
         // and negative zero incorrectly.
-        if (!UnsafeFPMath &&
+        if (!DAG.getTarget().Options.UnsafeFPMath &&
             !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
           break;
         Opcode = X86ISD::FMAX;
@@ -12670,7 +13296,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         // the operands would cause it to handle comparisons between positive
         // and negative zero incorrectly.
         if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
-          if (!UnsafeFPMath &&
+          if (!DAG.getTarget().Options.UnsafeFPMath &&
               !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
             break;
           std::swap(LHS, RHS);
@@ -12696,7 +13322,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         // Converting this to a min would handle comparisons between positive
         // and negative zero incorrectly, and swapping the operands would
         // cause it to handle NaNs incorrectly.
-        if (!UnsafeFPMath &&
+        if (!DAG.getTarget().Options.UnsafeFPMath &&
             !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
           if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
             break;
@@ -12706,7 +13332,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         break;
       case ISD::SETUGT:
         // Converting this to a min would handle NaNs incorrectly.
-        if (!UnsafeFPMath &&
+        if (!DAG.getTarget().Options.UnsafeFPMath &&
             (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
           break;
         Opcode = X86ISD::FMIN;
@@ -12731,7 +13357,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
         // Converting this to a max would handle comparisons between positive
         // and negative zero incorrectly, and swapping the operands would
         // cause it to handle NaNs incorrectly.
-        if (!UnsafeFPMath &&
+        if (!DAG.getTarget().Options.UnsafeFPMath &&
             !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
           if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
             break;
@@ -12848,6 +13474,57 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
       }
   }
 
+  // Canonicalize max and min:
+  // (x > y) ? x : y -> (x >= y) ? x : y
+  // (x < y) ? x : y -> (x <= y) ? x : y
+  // This allows use of COND_S / COND_NS (see TranslateX86CC) which eliminates
+  // the need for an extra compare
+  // against zero. e.g.
+  // (x - y) > 0 : (x - y) ? 0 -> (x - y) >= 0 : (x - y) ? 0
+  // subl   %esi, %edi
+  // testl  %edi, %edi
+  // movl   $0, %eax
+  // cmovgl %edi, %eax
+  // =>
+  // xorl   %eax, %eax
+  // subl   %esi, $edi
+  // cmovsl %eax, %edi
+  if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC &&
+      DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+      DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+    ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+    switch (CC) {
+    default: break;
+    case ISD::SETLT:
+    case ISD::SETGT: {
+      ISD::CondCode NewCC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGE;
+      Cond = DAG.getSetCC(Cond.getDebugLoc(), Cond.getValueType(),
+                          Cond.getOperand(0), Cond.getOperand(1), NewCC);
+      return DAG.getNode(ISD::SELECT, DL, VT, Cond, LHS, RHS);
+    }
+    }
+  }
+
+  // If we know that this node is legal then we know that it is going to be
+  // matched by one of the SSE/AVX BLEND instructions. These instructions only
+  // depend on the highest bit in each word. Try to use SimplifyDemandedBits
+  // to simplify previous instructions.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
+      !DCI.isBeforeLegalize() &&
+      TLI.isOperationLegal(ISD::VSELECT, VT)) {
+    unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
+    assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
+    APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 1);
+
+    APInt KnownZero, KnownOne;
+    TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
+                                          DCI.isBeforeLegalizeOps());
+    if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) ||
+        TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO))
+      DCI.CommitTargetLoweringOpt(TLO);
+  }
+
   return SDValue();
 }
 
@@ -13042,7 +13719,8 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
 
   // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
   // since the result of setcc_c is all zero's or all ones.
-  if (N1C && N0.getOpcode() == ISD::AND &&
+  if (VT.isInteger() && !VT.isVector() &&
+      N1C && N0.getOpcode() == ISD::AND &&
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     SDValue N00 = N0.getOperand(0);
     if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
@@ -13058,26 +13736,46 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
     }
   }
 
+
+  // Hardware support for vector shifts is sparse which makes us scalarize the
+  // vector operations in many cases. Also, on sandybridge ADD is faster than
+  // shl.
+  // (shl V, 1) -> add V,V
+  if (isSplatVector(N1.getNode())) {
+    assert(N0.getValueType().isVector() && "Invalid vector shift type");
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(0));
+    // We shift all of the values by one. In many cases we do not have
+    // hardware support for this operation. This is better expressed as an ADD
+    // of two values.
+    if (N1C && (1 == N1C->getZExtValue())) {
+      return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N0);
+    }
+  }
+
   return SDValue();
 }
 
 /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
 ///                       when possible.
 static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
+                                   TargetLowering::DAGCombinerInfo &DCI,
                                    const X86Subtarget *Subtarget) {
   EVT VT = N->getValueType(0);
-  if (!VT.isVector() && VT.isInteger() &&
-      N->getOpcode() == ISD::SHL)
-    return PerformSHLCombine(N, DAG);
+  if (N->getOpcode() == ISD::SHL) {
+    SDValue V = PerformSHLCombine(N, DAG);
+    if (V.getNode()) return V;
+  }
 
   // On X86 with SSE2 support, we can transform this to a vector shift if
   // all elements are shifted by the same amount.  We can't do this in legalize
   // because the a constant vector is typically transformed to a constant pool
   // so we have no knowledge of the shift amount.
-  if (!Subtarget->hasXMMInt())
+  if (!Subtarget->hasSSE2())
     return SDValue();
 
-  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
+      (!Subtarget->hasAVX2() ||
+       (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
     return SDValue();
 
   SDValue ShAmtOp = N->getOperand(1);
@@ -13093,6 +13791,11 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
       BaseShAmt = Arg;
       break;
     }
+    // Handle the case where the build_vector is all undef
+    // FIXME: Should DAG allow this?
+    if (i == NumElts)
+      return SDValue();
+
     for (; i != NumElts; ++i) {
       SDValue Arg = ShAmtOp.getOperand(i);
       if (Arg.getOpcode() == ISD::UNDEF) continue;
@@ -13119,9 +13822,16 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
            BaseShAmt = InVec.getOperand(1);
        }
     }
-    if (BaseShAmt.getNode() == 0)
+    if (BaseShAmt.getNode() == 0) {
+      // Don't create instructions with illegal types after legalize
+      // types has run.
+      if (!DAG.getTargetLoweringInfo().isTypeLegal(EltVT) &&
+          !DCI.isBeforeLegalize())
+        return SDValue();
+
       BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
                               DAG.getIntPtrConstant(0));
+    }
   } else
     return SDValue();
 
@@ -13136,47 +13846,38 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Unknown shift opcode!");
-    break;
   case ISD::SHL:
-    if (VT == MVT::v2i64)
-      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                         DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
-                         ValOp, BaseShAmt);
-    if (VT == MVT::v4i32)
-      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                         DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
-                         ValOp, BaseShAmt);
-    if (VT == MVT::v8i16)
-      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                         DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
-                         ValOp, BaseShAmt);
-    break;
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return SDValue();
+    case MVT::v2i64:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v4i64:
+    case MVT::v8i32:
+    case MVT::v16i16:
+      return getTargetVShiftNode(X86ISD::VSHLI, DL, VT, ValOp, BaseShAmt, DAG);
+    }
   case ISD::SRA:
-    if (VT == MVT::v4i32)
-      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                         DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
-                         ValOp, BaseShAmt);
-    if (VT == MVT::v8i16)
-      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                         DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
-                         ValOp, BaseShAmt);
-    break;
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return SDValue();
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v8i32:
+    case MVT::v16i16:
+      return getTargetVShiftNode(X86ISD::VSRAI, DL, VT, ValOp, BaseShAmt, DAG);
+    }
   case ISD::SRL:
-    if (VT == MVT::v2i64)
-      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                         DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
-                         ValOp, BaseShAmt);
-    if (VT == MVT::v4i32)
-      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                         DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
-                         ValOp, BaseShAmt);
-    if (VT ==  MVT::v8i16)
-      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                         DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
-                         ValOp, BaseShAmt);
-    break;
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return SDValue();
+    case MVT::v2i64:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v4i64:
+    case MVT::v8i32:
+    case MVT::v16i16:
+      return getTargetVShiftNode(X86ISD::VSRLI, DL, VT, ValOp, BaseShAmt, DAG);
+    }
   }
-  return SDValue();
 }
 
 
@@ -13190,7 +13891,7 @@ static SDValue CMPEQCombine(SDNode *N, SelectionDAG &DAG,
 
   // SSE1 supports CMP{eq|ne}SS, and SSE2 added CMP{eq|ne}SD, but
   // we're requiring SSE2 for both.
-  if (Subtarget->hasXMMInt() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
+  if (Subtarget->hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) {
     SDValue N0 = N->getOperand(0);
     SDValue N1 = N->getOperand(1);
     SDValue CMP0 = N0->getOperand(1);
@@ -13300,7 +14001,9 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
 
   EVT VT = N->getValueType(0);
 
-  // Create ANDN instructions
+  // Create ANDN, BLSI, and BLSR instructions
+  // BLSI is X & (-X)
+  // BLSR is X & (X-1)
   if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
     SDValue N0 = N->getOperand(0);
     SDValue N1 = N->getOperand(1);
@@ -13313,6 +14016,26 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
     if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1)))
       return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0);
 
+    // Check LHS for neg
+    if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
+        isZero(N0.getOperand(0)))
+      return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
+
+    // Check RHS for neg
+    if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
+        isZero(N1.getOperand(0)))
+      return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
+
+    // Check LHS for X-1
+    if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
+        isAllOnes(N0.getOperand(1)))
+      return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
+
+    // Check RHS for X-1
+    if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
+        isAllOnes(N1.getOperand(1)))
+      return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+
     return SDValue();
   }
 
@@ -13353,98 +14076,87 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
     return R;
 
   EVT VT = N->getValueType(0);
-  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
-    return SDValue();
 
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
   // look for psign/blend
-  if (Subtarget->hasSSSE3() || Subtarget->hasAVX()) {
-    if (VT == MVT::v2i64) {
-      // Canonicalize pandn to RHS
-      if (N0.getOpcode() == X86ISD::ANDNP)
-        std::swap(N0, N1);
-      // or (and (m, x), (pandn m, y))
-      if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
-        SDValue Mask = N1.getOperand(0);
-        SDValue X    = N1.getOperand(1);
-        SDValue Y;
-        if (N0.getOperand(0) == Mask)
-          Y = N0.getOperand(1);
-        if (N0.getOperand(1) == Mask)
-          Y = N0.getOperand(0);
-
-        // Check to see if the mask appeared in both the AND and ANDNP and
-        if (!Y.getNode())
-          return SDValue();
-
-        // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
-        if (Mask.getOpcode() != ISD::BITCAST ||
-            X.getOpcode() != ISD::BITCAST ||
-            Y.getOpcode() != ISD::BITCAST)
-          return SDValue();
-
-        // Look through mask bitcast.
-        Mask = Mask.getOperand(0);
-        EVT MaskVT = Mask.getValueType();
-
-        // Validate that the Mask operand is a vector sra node.  The sra node
-        // will be an intrinsic.
-        if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
-          return SDValue();
-
-        // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
-        // there is no psrai.b
-        switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
-        case Intrinsic::x86_sse2_psrai_w:
-        case Intrinsic::x86_sse2_psrai_d:
-          break;
-        default: return SDValue();
-        }
-
-        // Check that the SRA is all signbits.
-        SDValue SraC = Mask.getOperand(2);
-        unsigned SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
-        unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
-        if ((SraAmt + 1) != EltBits)
-          return SDValue();
+  if (VT == MVT::v2i64 || VT == MVT::v4i64) {
+    if (!Subtarget->hasSSSE3() ||
+        (VT == MVT::v4i64 && !Subtarget->hasAVX2()))
+      return SDValue();
 
-        DebugLoc DL = N->getDebugLoc();
+    // Canonicalize pandn to RHS
+    if (N0.getOpcode() == X86ISD::ANDNP)
+      std::swap(N0, N1);
+    // or (and (m, y), (pandn m, x))
+    if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
+      SDValue Mask = N1.getOperand(0);
+      SDValue X    = N1.getOperand(1);
+      SDValue Y;
+      if (N0.getOperand(0) == Mask)
+        Y = N0.getOperand(1);
+      if (N0.getOperand(1) == Mask)
+        Y = N0.getOperand(0);
+
+      // Check to see if the mask appeared in both the AND and ANDNP and
+      if (!Y.getNode())
+        return SDValue();
 
-        // Now we know we at least have a plendvb with the mask val.  See if
-        // we can form a psignb/w/d.
-        // psign = x.type == y.type == mask.type && y = sub(0, x);
+      // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+      // Look through mask bitcast.
+      if (Mask.getOpcode() == ISD::BITCAST)
+        Mask = Mask.getOperand(0);
+      if (X.getOpcode() == ISD::BITCAST)
         X = X.getOperand(0);
+      if (Y.getOpcode() == ISD::BITCAST)
         Y = Y.getOperand(0);
-        if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
-            ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
-            X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
-          unsigned Opc = 0;
-          switch (EltBits) {
-          case 8: Opc = X86ISD::PSIGNB; break;
-          case 16: Opc = X86ISD::PSIGNW; break;
-          case 32: Opc = X86ISD::PSIGND; break;
-          default: break;
-          }
-          if (Opc) {
-            SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
-            return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
-          }
-        }
-        // PBLENDVB only available on SSE 4.1
-        if (!(Subtarget->hasSSE41() || Subtarget->hasAVX()))
-          return SDValue();
-
-        X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
-        Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
-        Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
-        Mask = DAG.getNode(ISD::VSELECT, DL, MVT::v16i8, Mask, X, Y);
-        return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+
+      EVT MaskVT = Mask.getValueType();
+
+      // Validate that the Mask operand is a vector sra node.
+      // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+      // there is no psrai.b
+      if (Mask.getOpcode() != X86ISD::VSRAI)
+        return SDValue();
+
+      // Check that the SRA is all signbits.
+      SDValue SraC = Mask.getOperand(1);
+      unsigned SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
+      unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+      if ((SraAmt + 1) != EltBits)
+        return SDValue();
+
+      DebugLoc DL = N->getDebugLoc();
+
+      // Now we know we at least have a plendvb with the mask val.  See if
+      // we can form a psignb/w/d.
+      // psign = x.type == y.type == mask.type && y = sub(0, x);
+      if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+          ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+          X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
+        assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
+               "Unsupported VT for PSIGN");
+        Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
+        return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
       }
+      // PBLENDVB only available on SSE 4.1
+      if (!Subtarget->hasSSE41())
+        return SDValue();
+
+      EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
+
+      X = DAG.getNode(ISD::BITCAST, DL, BlendVT, X);
+      Y = DAG.getNode(ISD::BITCAST, DL, BlendVT, Y);
+      Mask = DAG.getNode(ISD::BITCAST, DL, BlendVT, Mask);
+      Mask = DAG.getNode(ISD::VSELECT, DL, BlendVT, Mask, Y, X);
+      return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
     }
   }
 
+  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
   // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
     std::swap(N0, N1);
@@ -13500,6 +14212,36 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
+static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const X86Subtarget *Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  if (VT != MVT::i32 && VT != MVT::i64)
+    return SDValue();
+
+  assert(Subtarget->hasBMI() && "Creating BLSMSK requires BMI instructions");
+
+  // Create BLSMSK instructions by finding X ^ (X-1)
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
+      isAllOnes(N0.getOperand(1)))
+    return DAG.getNode(X86ISD::BLSMSK, DL, VT, N1);
+
+  if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
+      isAllOnes(N1.getOperand(1)))
+    return DAG.getNode(X86ISD::BLSMSK, DL, VT, N0);
+
+  return SDValue();
+}
+
 /// PerformLOADCombine - Do target-specific dag combines on LOAD nodes.
 static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
                                    const X86Subtarget *Subtarget) {
@@ -13515,7 +14257,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
   // shuffle. We need SSE4 for the shuffles.
   // TODO: It is possible to support ZExt by zeroing the undef values
   // during the shuffle phase or after the shuffle.
-  if (RegVT.isVector() && Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) {
+  if (RegVT.isVector() && RegVT.isInteger() &&
+      Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) {
     assert(MemVT != RegVT && "Cannot extend to the same type");
     assert(MemVT.isVector() && "Must load a vector from memory");
 
@@ -13553,7 +14296,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
     SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(),
                                   Ld->getBasePtr(),
                                   Ld->getPointerInfo(), Ld->isVolatile(),
-                                  Ld->isNonTemporal(), Ld->getAlignment());
+                                  Ld->isNonTemporal(), Ld->isInvariant(),
+                                  Ld->getAlignment());
 
     // Insert the word loaded into a vector.
     SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
@@ -13561,7 +14305,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
 
     // Bitcast the loaded value to a vector of the original element type, in
     // the size of the target vector type.
-    SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT, ScalarInVector);
+    SDValue SlicedVec = DAG.getNode(ISD::BITCAST, dl, WideVecVT,
+                                    ScalarInVector);
     unsigned SizeRatio = RegSz/MemSz;
 
     // Redistribute the loaded elements into the different locations.
@@ -13593,7 +14338,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   SDValue StoredVal = St->getOperand(1);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
-  // If we are saving a concatination of two XMM registers, perform two stores.
+  // If we are saving a concatenation of two XMM registers, perform two stores.
   // This is better in Sandy Bridge cause one 256-bit mem op is done via two
   // 128-bit ones. If in the future the cost becomes only one memory access the
   // first version would be better.
@@ -13703,8 +14448,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 
   const Function *F = DAG.getMachineFunction().getFunction();
   bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
-  bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
-                     && Subtarget->hasXMMInt();
+  bool F64IsLegal = !DAG.getTarget().Options.UseSoftFloat && !NoImplicitFloatOps
+                     && Subtarget->hasSSE2();
   if ((VT.isVector() ||
        (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
       isa<LoadSDNode>(St->getValue()) &&
@@ -13722,7 +14467,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
       Ld = cast<LoadSDNode>(St->getChain());
     else if (St->getValue().hasOneUse() &&
              ChainVal->getOpcode() == ISD::TokenFactor) {
-      for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+      for (unsigned i = 0, e = ChainVal->getNumOperands(); i != e; ++i) {
         if (ChainVal->getOperand(i).getNode() == LdVal) {
           TokenFactorIndex = i;
           Ld = cast<LoadSDNode>(St->getValue());
@@ -13749,7 +14494,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
       EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
       SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
                                   Ld->getPointerInfo(), Ld->isVolatile(),
-                                  Ld->isNonTemporal(), Ld->getAlignment());
+                                  Ld->isNonTemporal(), Ld->isInvariant(),
+                                  Ld->getAlignment());
       SDValue NewChain = NewLd.getValue(1);
       if (TokenFactorIndex != -1) {
         Ops.push_back(NewChain);
@@ -13770,10 +14516,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
                                Ld->getPointerInfo(),
                                Ld->isVolatile(), Ld->isNonTemporal(),
-                               Ld->getAlignment());
+                               Ld->isInvariant(), Ld->getAlignment());
     SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
                                Ld->getPointerInfo().getWithOffset(4),
                                Ld->isVolatile(), Ld->isNonTemporal(),
+                               Ld->isInvariant(),
                                MinAlign(Ld->getAlignment(), 4));
 
     SDValue NewChain = LoLd.getValue(1);
@@ -13817,7 +14564,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 /// set to A, RHS to B, and the routine returns 'true'.
 /// Note that the binary operation should have the property that if one of the
 /// operands is UNDEF then the result is UNDEF.
-static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
+static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool IsCommutative) {
   // Look for the following pattern: if
   //   A = < float a0, float a1, float a2, float a3 >
   //   B = < float b0, float b1, float b2, float b3 >
@@ -13833,7 +14580,18 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
     return false;
 
   EVT VT = LHS.getValueType();
-  unsigned N = VT.getVectorNumElements();
+
+  assert((VT.is128BitVector() || VT.is256BitVector()) &&
+         "Unsupported vector type for horizontal add/sub");
+
+  // Handle 128 and 256-bit vector lengths. AVX defines horizontal add/sub to
+  // operate independently on 128-bit lanes.
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumLanes = VT.getSizeInBits()/128;
+  unsigned NumLaneElts = NumElts / NumLanes;
+  assert((NumLaneElts % 2 == 0) &&
+         "Vector type should have an even number of elements in each lane");
+  unsigned HalfLaneElts = NumLaneElts/2;
 
   // View LHS in the form
   //   LHS = VECTOR_SHUFFLE A, B, LMask
@@ -13842,34 +14600,36 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
   // NOTE: in what follows a default initialized SDValue represents an UNDEF of
   // type VT.
   SDValue A, B;
-  SmallVector<int, 8> LMask(N);
+  SmallVector<int, 16> LMask(NumElts);
   if (LHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
     if (LHS.getOperand(0).getOpcode() != ISD::UNDEF)
       A = LHS.getOperand(0);
     if (LHS.getOperand(1).getOpcode() != ISD::UNDEF)
       B = LHS.getOperand(1);
-    cast<ShuffleVectorSDNode>(LHS.getNode())->getMask(LMask);
+    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(LHS.getNode())->getMask();
+    std::copy(Mask.begin(), Mask.end(), LMask.begin());
   } else {
     if (LHS.getOpcode() != ISD::UNDEF)
       A = LHS;
-    for (unsigned i = 0; i != N; ++i)
+    for (unsigned i = 0; i != NumElts; ++i)
       LMask[i] = i;
   }
 
   // Likewise, view RHS in the form
   //   RHS = VECTOR_SHUFFLE C, D, RMask
   SDValue C, D;
-  SmallVector<int, 8> RMask(N);
+  SmallVector<int, 16> RMask(NumElts);
   if (RHS.getOpcode() == ISD::VECTOR_SHUFFLE) {
     if (RHS.getOperand(0).getOpcode() != ISD::UNDEF)
       C = RHS.getOperand(0);
     if (RHS.getOperand(1).getOpcode() != ISD::UNDEF)
       D = RHS.getOperand(1);
-    cast<ShuffleVectorSDNode>(RHS.getNode())->getMask(RMask);
+    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(RHS.getNode())->getMask();
+    std::copy(Mask.begin(), Mask.end(), RMask.begin());
   } else {
     if (RHS.getOpcode() != ISD::UNDEF)
       C = RHS;
-    for (unsigned i = 0; i != N; ++i)
+    for (unsigned i = 0; i != NumElts; ++i)
       RMask[i] = i;
   }
 
@@ -13884,30 +14644,28 @@ static bool isHorizontalBinOp(SDValue &LHS, SDValue &RHS, bool isCommutative) {
   // If A and B occur in reverse order in RHS, then "swap" them (which means
   // rewriting the mask).
   if (A != C)
-    for (unsigned i = 0; i != N; ++i) {
-      unsigned Idx = RMask[i];
-      if (Idx < N)
-        RMask[i] += N;
-      else if (Idx < 2*N)
-        RMask[i] -= N;
-    }
+    CommuteVectorShuffleMask(RMask, NumElts);
 
   // At this point LHS and RHS are equivalent to
   //   LHS = VECTOR_SHUFFLE A, B, LMask
   //   RHS = VECTOR_SHUFFLE A, B, RMask
   // Check that the masks correspond to performing a horizontal operation.
-  for (unsigned i = 0; i != N; ++i) {
-    unsigned LIdx = LMask[i], RIdx = RMask[i];
+  for (unsigned i = 0; i != NumElts; ++i) {
+    int LIdx = LMask[i], RIdx = RMask[i];
 
     // Ignore any UNDEF components.
-    if (LIdx >= 2*N || RIdx >= 2*N || (!A.getNode() && (LIdx < N || RIdx < N))
-        || (!B.getNode() && (LIdx >= N || RIdx >= N)))
+    if (LIdx < 0 || RIdx < 0 ||
+        (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) ||
+        (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts)))
       continue;
 
     // Check that successive elements are being operated on.  If not, this is
     // not a horizontal operation.
-    if (!(LIdx == 2*i && RIdx == 2*i + 1) &&
-        !(isCommutative && LIdx == 2*i + 1 && RIdx == 2*i))
+    unsigned Src = (i/HalfLaneElts) % 2; // each lane is split between srcs
+    unsigned LaneStart = (i/NumLaneElts) * NumLaneElts;
+    int Index = 2*(i%HalfLaneElts) + NumElts*Src + LaneStart;
+    if (!(LIdx == Index && RIdx == Index + 1) &&
+        !(IsCommutative && LIdx == Index + 1 && RIdx == Index))
       return false;
   }
 
@@ -13924,8 +14682,8 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
   SDValue RHS = N->getOperand(1);
 
   // Try to synthesize horizontal adds from adds of shuffles.
-  if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
-      (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+  if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
       isHorizontalBinOp(LHS, RHS, true))
     return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
   return SDValue();
@@ -13939,8 +14697,8 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
   SDValue RHS = N->getOperand(1);
 
   // Try to synthesize horizontal subs from subs of shuffles.
-  if ((Subtarget->hasSSE3() || Subtarget->hasAVX()) &&
-      (VT == MVT::v4f32 || VT == MVT::v2f64) &&
+  if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
+       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
       isHorizontalBinOp(LHS, RHS, false))
     return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
   return SDValue();
@@ -14006,7 +14764,58 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const X86Subtarget *Subtarget) {
+  if (!DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  if (!Subtarget->hasAVX()) 
+    return SDValue();
+
+  // Optimize vectors in AVX mode
+  // Sign extend  v8i16 to v8i32 and
+  //              v4i32 to v4i64
+  //
+  // Divide input vector into two parts
+  // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1}
+  // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32
+  // concat the vectors to original VT
+
+  EVT VT = N->getValueType(0);
+  SDValue Op = N->getOperand(0);
+  EVT OpVT = Op.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
+      (VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
+
+    unsigned NumElems = OpVT.getVectorNumElements();
+    SmallVector<int,8> ShufMask1(NumElems, -1);
+    for (unsigned i = 0; i < NumElems/2; i++) ShufMask1[i] = i;
+
+    SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
+                                        ShufMask1.data());
+
+    SmallVector<int,8> ShufMask2(NumElems, -1);
+    for (unsigned i = 0; i < NumElems/2; i++) ShufMask2[i] = i + NumElems/2;
+
+    SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT),
+                                        ShufMask2.data());
+
+    EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), 
+                                  VT.getVectorNumElements()/2);
+
+    OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); 
+    OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi);
+
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+  }
+  return SDValue();
+}
+
+static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
+                                  const X86Subtarget *Subtarget) {
   // (i32 zext (and (i8  x86isd::setcc_carry), 1)) ->
   //           (and (i32 x86isd::setcc_carry), 1)
   // This eliminates the zext. This transformation is necessary because
@@ -14014,6 +14823,8 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
   DebugLoc dl = N->getDebugLoc();
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
+
   if (N0.getOpcode() == ISD::AND &&
       N0.hasOneUse() &&
       N0.getOperand(0).hasOneUse()) {
@@ -14028,6 +14839,37 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
                                    N00.getOperand(0), N00.getOperand(1)),
                        DAG.getConstant(1, VT));
   }
+  // Optimize vectors in AVX mode:
+  //
+  //   v8i16 -> v8i32
+  //   Use vpunpcklwd for 4 lower elements  v8i16 -> v4i32.
+  //   Use vpunpckhwd for 4 upper elements  v8i16 -> v4i32.
+  //   Concat upper and lower parts.
+  //
+  //   v4i32 -> v4i64
+  //   Use vpunpckldq for 4 lower elements  v4i32 -> v2i64.
+  //   Use vpunpckhdq for 4 upper elements  v4i32 -> v2i64.
+  //   Concat upper and lower parts.
+  //
+  if (Subtarget->hasAVX()) {
+
+    if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16))  ||
+      ((VT == MVT::v4i64) && (OpVT == MVT::v4i32)))  {
+
+      SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
+      SDValue OpLo = getTargetShuffleNode(X86ISD::UNPCKL, dl, OpVT, N0, ZeroVec, DAG);
+      SDValue OpHi = getTargetShuffleNode(X86ISD::UNPCKH, dl, OpVT, N0, ZeroVec, DAG);
+
+      EVT HVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 
+        VT.getVectorNumElements()/2);
+
+      OpLo = DAG.getNode(ISD::BITCAST, dl, HVT, OpLo);
+      OpHi = DAG.getNode(ISD::BITCAST, dl, HVT, OpHi);
+
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi);
+    }
+  }
+
 
   return SDValue();
 }
@@ -14136,7 +14978,24 @@ static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
                      DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
 }
 
-static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
+/// PerformADDCombine - Do target-specific dag combines on integer adds.
+static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
+                                 const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+
+  // Try to synthesize horizontal adds from adds of shuffles.
+  if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+       (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+      isHorizontalBinOp(Op0, Op1, true))
+    return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
+
+  return OptimizeConditionalInDecrement(N, DAG);
+}
+
+static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
+                                 const X86Subtarget *Subtarget) {
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
 
@@ -14158,6 +15017,13 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
     }
   }
 
+  // Try to synthesize horizontal adds from adds of shuffles.
+  EVT VT = N->getValueType(0);
+  if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
+       (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+      isHorizontalBinOp(Op0, Op1, true))
+    return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
+
   return OptimizeConditionalInDecrement(N, DAG);
 }
 
@@ -14167,19 +15033,20 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   switch (N->getOpcode()) {
   default: break;
   case ISD::EXTRACT_VECTOR_ELT:
-    return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+    return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, DCI);
   case ISD::VSELECT:
-  case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
+  case ISD::SELECT:         return PerformSELECTCombine(N, DAG, DCI, Subtarget);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
-  case ISD::ADD:            return OptimizeConditionalInDecrement(N, DAG);
-  case ISD::SUB:            return PerformSubCombine(N, DAG);
+  case ISD::ADD:            return PerformAddCombine(N, DAG, Subtarget);
+  case ISD::SUB:            return PerformSubCombine(N, DAG, Subtarget);
   case X86ISD::ADC:         return PerformADCCombine(N, DAG, DCI);
   case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
   case ISD::SHL:
   case ISD::SRA:
-  case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);
+  case ISD::SRL:            return PerformShiftCombine(N, DAG, DCI, Subtarget);
   case ISD::AND:            return PerformAndCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:             return PerformOrCombine(N, DAG, DCI, Subtarget);
+  case ISD::XOR:            return PerformXorCombine(N, DAG, DCI, Subtarget);
   case ISD::LOAD:           return PerformLOADCombine(N, DAG, Subtarget);
   case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
   case ISD::SINT_TO_FP:     return PerformSINT_TO_FPCombine(N, DAG, this);
@@ -14190,27 +15057,14 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
   case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
   case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
-  case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG);
+  case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG, Subtarget);
+  case ISD::SIGN_EXTEND:    return PerformSExtCombine(N, DAG, DCI, Subtarget);
+  case ISD::TRUNCATE:       return PerformTruncateCombine(N, DAG, DCI);
   case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG);
-  case X86ISD::SHUFPS:      // Handle all target specific shuffles
-  case X86ISD::SHUFPD:
+  case X86ISD::SHUFP:       // Handle all target specific shuffles
   case X86ISD::PALIGN:
-  case X86ISD::PUNPCKHBW:
-  case X86ISD::PUNPCKHWD:
-  case X86ISD::PUNPCKHDQ:
-  case X86ISD::PUNPCKHQDQ:
-  case X86ISD::UNPCKHPS:
-  case X86ISD::UNPCKHPD:
-  case X86ISD::VUNPCKHPSY:
-  case X86ISD::VUNPCKHPDY:
-  case X86ISD::PUNPCKLBW:
-  case X86ISD::PUNPCKLWD:
-  case X86ISD::PUNPCKLDQ:
-  case X86ISD::PUNPCKLQDQ:
-  case X86ISD::UNPCKLPS:
-  case X86ISD::UNPCKLPD:
-  case X86ISD::VUNPCKLPSY:
-  case X86ISD::VUNPCKLPDY:
+  case X86ISD::UNPCKH:
+  case X86ISD::UNPCKL:
   case X86ISD::MOVHLPS:
   case X86ISD::MOVLHPS:
   case X86ISD::PSHUFD:
@@ -14218,11 +15072,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::PSHUFLW:
   case X86ISD::MOVSS:
   case X86ISD::MOVSD:
-  case X86ISD::VPERMILPS:
-  case X86ISD::VPERMILPSY:
-  case X86ISD::VPERMILPD:
-  case X86ISD::VPERMILPDY:
-  case X86ISD::VPERM2F128:
+  case X86ISD::VPERMILP:
+  case X86ISD::VPERM2X128:
   case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
   }
 
@@ -14330,11 +15181,38 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
 //                           X86 Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
+namespace {
+  // Helper to match a string separated by whitespace.
+  bool matchAsmImpl(StringRef s, ArrayRef<const StringRef *> args) {
+    s = s.substr(s.find_first_not_of(" \t")); // Skip leading whitespace.
+
+    for (unsigned i = 0, e = args.size(); i != e; ++i) {
+      StringRef piece(*args[i]);
+      if (!s.startswith(piece)) // Check if the piece matches.
+        return false;
+
+      s = s.substr(piece.size());
+      StringRef::size_type pos = s.find_first_not_of(" \t");
+      if (pos == 0) // We matched a prefix.
+        return false;
+
+      s = s.substr(pos);
+    }
+
+    return s.empty();
+  }
+  const VariadicFunction1<bool, StringRef, StringRef, matchAsmImpl> matchAsm={};
+}
+
 bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
   InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
 
   std::string AsmStr = IA->getAsmString();
 
+  IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  if (!Ty || Ty->getBitWidth() % 16 != 0)
+    return false;
+
   // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
   SmallVector<StringRef, 4> AsmPieces;
   SplitString(AsmStr, AsmPieces, ";\n");
@@ -14342,35 +15220,27 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
   switch (AsmPieces.size()) {
   default: return false;
   case 1:
-    AsmStr = AsmPieces[0];
-    AsmPieces.clear();
-    SplitString(AsmStr, AsmPieces, " \t");  // Split with whitespace.
-
     // FIXME: this should verify that we are targeting a 486 or better.  If not,
-    // we will turn this bswap into something that will be lowered to logical ops
-    // instead of emitting the bswap asm.  For now, we don't support 486 or lower
-    // so don't worry about this.
+    // we will turn this bswap into something that will be lowered to logical
+    // ops instead of emitting the bswap asm.  For now, we don't support 486 or
+    // lower so don't worry about this.
     // bswap $0
-    if (AsmPieces.size() == 2 &&
-        (AsmPieces[0] == "bswap" ||
-         AsmPieces[0] == "bswapq" ||
-         AsmPieces[0] == "bswapl") &&
-        (AsmPieces[1] == "$0" ||
-         AsmPieces[1] == "${0:q}")) {
+    if (matchAsm(AsmPieces[0], "bswap", "$0") ||
+        matchAsm(AsmPieces[0], "bswapl", "$0") ||
+        matchAsm(AsmPieces[0], "bswapq", "$0") ||
+        matchAsm(AsmPieces[0], "bswap", "${0:q}") ||
+        matchAsm(AsmPieces[0], "bswapl", "${0:q}") ||
+        matchAsm(AsmPieces[0], "bswapq", "${0:q}")) {
       // No need to check constraints, nothing other than the equivalent of
       // "=r,0" would be valid here.
-      IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
-      if (!Ty || Ty->getBitWidth() % 16 != 0)
-        return false;
       return IntrinsicLowering::LowerToByteSwap(CI);
     }
+
     // rorw $$8, ${0:w}  -->  llvm.bswap.i16
     if (CI->getType()->isIntegerTy(16) &&
-        AsmPieces.size() == 3 &&
-        (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") &&
-        AsmPieces[1] == "$$8," &&
-        AsmPieces[2] == "${0:w}" &&
-        IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+        IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
+        (matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") ||
+         matchAsm(AsmPieces[0], "rolw", "$$8,", "${0:w}"))) {
       AsmPieces.clear();
       const std::string &ConstraintsStr = IA->getConstraintString();
       SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
@@ -14379,46 +15249,26 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
           AsmPieces[0] == "~{cc}" &&
           AsmPieces[1] == "~{dirflag}" &&
           AsmPieces[2] == "~{flags}" &&
-          AsmPieces[3] == "~{fpsr}") {
-        IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
-        if (!Ty || Ty->getBitWidth() % 16 != 0)
-          return false;
-        return IntrinsicLowering::LowerToByteSwap(CI);
-      }
+          AsmPieces[3] == "~{fpsr}")
+      return IntrinsicLowering::LowerToByteSwap(CI);
     }
     break;
   case 3:
     if (CI->getType()->isIntegerTy(32) &&
-        IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
-      SmallVector<StringRef, 4> Words;
-      SplitString(AsmPieces[0], Words, " \t,");
-      if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
-          Words[2] == "${0:w}") {
-        Words.clear();
-        SplitString(AsmPieces[1], Words, " \t,");
-        if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" &&
-            Words[2] == "$0") {
-          Words.clear();
-          SplitString(AsmPieces[2], Words, " \t,");
-          if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
-              Words[2] == "${0:w}") {
-            AsmPieces.clear();
-            const std::string &ConstraintsStr = IA->getConstraintString();
-            SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
-            std::sort(AsmPieces.begin(), AsmPieces.end());
-            if (AsmPieces.size() == 4 &&
-                AsmPieces[0] == "~{cc}" &&
-                AsmPieces[1] == "~{dirflag}" &&
-                AsmPieces[2] == "~{flags}" &&
-                AsmPieces[3] == "~{fpsr}") {
-              IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
-              if (!Ty || Ty->getBitWidth() % 16 != 0)
-                return false;
-              return IntrinsicLowering::LowerToByteSwap(CI);
-            }
-          }
-        }
-      }
+        IA->getConstraintString().compare(0, 5, "=r,0,") == 0 &&
+        matchAsm(AsmPieces[0], "rorw", "$$8,", "${0:w}") &&
+        matchAsm(AsmPieces[1], "rorl", "$$16,", "$0") &&
+        matchAsm(AsmPieces[2], "rorw", "$$8,", "${0:w}")) {
+      AsmPieces.clear();
+      const std::string &ConstraintsStr = IA->getConstraintString();
+      SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+      std::sort(AsmPieces.begin(), AsmPieces.end());
+      if (AsmPieces.size() == 4 &&
+          AsmPieces[0] == "~{cc}" &&
+          AsmPieces[1] == "~{dirflag}" &&
+          AsmPieces[2] == "~{flags}" &&
+          AsmPieces[3] == "~{fpsr}")
+        return IntrinsicLowering::LowerToByteSwap(CI);
     }
 
     if (CI->getType()->isIntegerTy(64)) {
@@ -14427,23 +15277,10 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
           Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
           Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
         // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
-        SmallVector<StringRef, 4> Words;
-        SplitString(AsmPieces[0], Words, " \t");
-        if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
-          Words.clear();
-          SplitString(AsmPieces[1], Words, " \t");
-          if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
-            Words.clear();
-            SplitString(AsmPieces[2], Words, " \t,");
-            if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
-                Words[2] == "%edx") {
-              IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
-              if (!Ty || Ty->getBitWidth() % 16 != 0)
-                return false;
-              return IntrinsicLowering::LowerToByteSwap(CI);
-            }
-          }
-        }
+        if (matchAsm(AsmPieces[0], "bswap", "%eax") &&
+            matchAsm(AsmPieces[1], "bswap", "%edx") &&
+            matchAsm(AsmPieces[2], "xchgl", "%eax,", "%edx"))
+          return IntrinsicLowering::LowerToByteSwap(CI);
       }
     }
     break;
@@ -14538,7 +15375,8 @@ TargetLowering::ConstraintWeight
       break;
   case 'x':
   case 'Y':
-    if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
+    if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
+        ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasAVX()))
       weight = CW_Register;
     break;
   case 'I':
@@ -14608,9 +15446,9 @@ LowerXConstraint(EVT ConstraintVT) const {
   // FP X constraints get lowered to SSE1/2 registers if available, otherwise
   // 'f' like normal targets.
   if (ConstraintVT.isFloatingPoint()) {
-    if (Subtarget->hasXMMInt())
+    if (Subtarget->hasSSE2())
       return "Y";
-    if (Subtarget->hasXMM())
+    if (Subtarget->hasSSE1())
       return "x";
   }
 
@@ -14816,10 +15654,10 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
       if (!Subtarget->hasMMX()) break;
       return std::make_pair(0U, X86::VR64RegisterClass);
     case 'Y':   // SSE_REGS if SSE2 allowed
-      if (!Subtarget->hasXMMInt()) break;
+      if (!Subtarget->hasSSE2()) break;
       // FALL THROUGH.
-    case 'x':   // SSE_REGS if SSE1 allowed
-      if (!Subtarget->hasXMM()) break;
+    case 'x':   // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
+      if (!Subtarget->hasSSE1()) break;
 
       switch (VT.getSimpleVT().SimpleTy) {
       default: break;
@@ -14838,6 +15676,15 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
       case MVT::v4f32:
       case MVT::v2f64:
         return std::make_pair(0U, X86::VR128RegisterClass);
+      // AVX types.
+      case MVT::v32i8:
+      case MVT::v16i16:
+      case MVT::v8i32:
+      case MVT::v4i64:
+      case MVT::v8f32:
+      case MVT::v4f64:
+        return std::make_pair(0U, X86::VR256RegisterClass);
+        
       }
       break;
     }
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 342a5e617545..4e0073365a73 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -172,12 +172,23 @@ namespace llvm {
       /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
       ANDNP,
 
-      /// PSIGNB/W/D - Copy integer sign.
-      PSIGNB, PSIGNW, PSIGND,
+      /// PSIGN - Copy integer sign.
+      PSIGN,
 
-      /// BLEND family of opcodes
+      /// BLENDV - Blend where the selector is an XMM.
       BLENDV,
 
+      /// BLENDxx - Blend where the selector is an immediate.
+      BLENDPW,
+      BLENDPS,
+      BLENDPD,
+
+      /// HADD - Integer horizontal add.
+      HADD,
+
+      /// HSUB - Integer horizontal sub.
+      HSUB,
+
       /// FHADD - Floating point horizontal add.
       FHADD,
 
@@ -213,16 +224,26 @@ namespace llvm {
       // VZEXT_MOVL - Vector move low and zero extend.
       VZEXT_MOVL,
 
-      // VSHL, VSRL - Vector logical left / right shift.
-      VSHL, VSRL,
+      // VSEXT_MOVL - Vector move low and sign extend.
+      VSEXT_MOVL,
+
+      // VSHL, VSRL - 128-bit vector logical left / right shift
+      VSHLDQ, VSRLDQ,
+
+      // VSHL, VSRL, VSRA - Vector shift elements
+      VSHL, VSRL, VSRA,
 
-      // CMPPD, CMPPS - Vector double/float comparison.
-      // CMPPD, CMPPS - Vector double/float comparison.
-      CMPPD, CMPPS,
+      // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
+      VSHLI, VSRLI, VSRAI,
+
+      // CMPP - Vector packed double/float comparison.
+      CMPP,
 
       // PCMP* - Vector integer comparisons.
-      PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
-      PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
+      PCMPEQ, PCMPGT,
+
+      // VPCOM, VPCOMU - XOP Vector integer comparisons.
+      VPCOM, VPCOMU,
 
       // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
       ADD, SUB, ADC, SBB, SMUL,
@@ -230,6 +251,10 @@ namespace llvm {
 
       ANDN, // ANDN - Bitwise AND NOT with FLAGS results.
 
+      BLSI,   // BLSI - Extract lowest set isolated bit
+      BLSMSK, // BLSMSK - Get mask up to lowest set bit
+      BLSR,   // BLSR - Reset lowest set bit
+
       UMUL, // LOW, HI, FLAGS = umul LHS, RHS
 
       // MUL_IMM - X86 specific multiply by immediate.
@@ -246,46 +271,26 @@ namespace llvm {
       PSHUFD,
       PSHUFHW,
       PSHUFLW,
-      PSHUFHW_LD,
-      PSHUFLW_LD,
-      SHUFPD,
-      SHUFPS,
+      SHUFP,
       MOVDDUP,
       MOVSHDUP,
       MOVSLDUP,
-      MOVSHDUP_LD,
-      MOVSLDUP_LD,
       MOVLHPS,
       MOVLHPD,
       MOVHLPS,
-      MOVHLPD,
       MOVLPS,
       MOVLPD,
       MOVSD,
       MOVSS,
-      UNPCKLPS,
-      UNPCKLPD,
-      VUNPCKLPSY,
-      VUNPCKLPDY,
-      UNPCKHPS,
-      UNPCKHPD,
-      VUNPCKHPSY,
-      VUNPCKHPDY,
-      PUNPCKLBW,
-      PUNPCKLWD,
-      PUNPCKLDQ,
-      PUNPCKLQDQ,
-      PUNPCKHBW,
-      PUNPCKHWD,
-      PUNPCKHDQ,
-      PUNPCKHQDQ,
-      VPERMILPS,
-      VPERMILPSY,
-      VPERMILPD,
-      VPERMILPDY,
-      VPERM2F128,
+      UNPCKL,
+      UNPCKH,
+      VPERMILP,
+      VPERM2X128,
       VBROADCAST,
 
+      // PMULUDQ - Vector multiply packed unsigned doubleword integers
+      PMULUDQ,
+
       // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
       // according to %al. An operator is needed so that this can be expanded
       // with control flow.
@@ -299,6 +304,9 @@ namespace llvm {
       // falls back to heap allocation if not.
       SEG_ALLOCA,
 
+      // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui.
+      WIN_FTOL,
+
       // Memory barrier
       MEMBARRIER,
       MFENCE,
@@ -368,75 +376,6 @@ namespace llvm {
 
   /// Define some predicates that are used for node matching.
   namespace X86 {
-    /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
-    bool isPSHUFDMask(ShuffleVectorSDNode *N);
-
-    /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
-    bool isPSHUFHWMask(ShuffleVectorSDNode *N);
-
-    /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
-    bool isPSHUFLWMask(ShuffleVectorSDNode *N);
-
-    /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to SHUFP*.
-    bool isSHUFPMask(ShuffleVectorSDNode *N);
-
-    /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
-    bool isMOVHLPSMask(ShuffleVectorSDNode *N);
-
-    /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
-    /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
-    /// <2, 3, 2, 3>
-    bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
-
-    /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
-    bool isMOVLPMask(ShuffleVectorSDNode *N);
-
-    /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
-    /// as well as MOVLHPS.
-    bool isMOVLHPSMask(ShuffleVectorSDNode *N);
-
-    /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to UNPCKL.
-    bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
-
-    /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to UNPCKH.
-    bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
-
-    /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
-    /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
-    /// <0, 0, 1, 1>
-    bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
-
-    /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
-    /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
-    /// <2, 2, 3, 3>
-    bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
-
-    /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to MOVSS,
-    /// MOVSD, and MOVD, i.e. setting the lowest element.
-    bool isMOVLMask(ShuffleVectorSDNode *N);
-
-    /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
-    bool isMOVSHDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
-
-    /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
-    bool isMOVSLDUPMask(ShuffleVectorSDNode *N, const X86Subtarget *Subtarget);
-
-    /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
-    bool isMOVDDUPMask(ShuffleVectorSDNode *N);
-
     /// isVEXTRACTF128Index - Return true if the specified
     /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
     /// suitable for input to VEXTRACTF128.
@@ -447,23 +386,6 @@ namespace llvm {
     /// suitable for input to VINSERTF128.
     bool isVINSERTF128Index(SDNode *N);
 
-    /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
-    /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
-    /// instructions.
-    unsigned getShuffleSHUFImmediate(SDNode *N);
-
-    /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
-    /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction.
-    unsigned getShufflePSHUFHWImmediate(SDNode *N);
-
-    /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
-    /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
-    unsigned getShufflePSHUFLWImmediate(SDNode *N);
-
-    /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
-    /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
-    unsigned getShufflePALIGNRImmediate(SDNode *N);
-
     /// getExtractVEXTRACTF128Immediate - Return the appropriate
     /// immediate to extract the specified EXTRACT_SUBVECTOR index
     /// with VEXTRACTF128 instructions.
@@ -529,7 +451,7 @@ namespace llvm {
     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
     /// means there isn't a need to check it against alignment requirement,
     /// probably because the source does not need to be loaded. If
-    /// 'NonScalarIntSafe' is true, that means it's safe to return a
+    /// 'IsZeroVal' is true, that means it's safe to return a
     /// non-scalar-integer type, e.g. empty string source, constant, or loaded
     /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
     /// constant so it does not need to be loaded.
@@ -537,7 +459,7 @@ namespace llvm {
     /// target-independent logic.
     virtual EVT
     getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
-                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        bool IsZeroVal, bool MemcpyStrSrc,
                         MachineFunction &MF) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
@@ -587,7 +509,6 @@ namespace llvm {
     /// in Mask are known to be either zero or one and return them in the
     /// KnownZero/KnownOne bitsets.
     virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-                                                const APInt &Mask,
                                                 APInt &KnownZero,
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
@@ -697,6 +618,18 @@ namespace llvm {
       (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
     }
 
+    /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
+    /// for fptoui.
+    bool isTargetFTOL() const {
+      return Subtarget->isTargetWindows() && !Subtarget->is64Bit();
+    }
+
+    /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
+    /// used for fptoui to the given type.
+    bool isIntegerTypeFTOL(EVT VT) const {
+      return isTargetFTOL() && VT == MVT::i64;
+    }
+
     /// createFastISel - This method returns a target specific FastISel object,
     /// or null if the target does not support "fast" ISel.
     virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
@@ -779,7 +712,8 @@ namespace llvm {
                                          SelectionDAG &DAG) const;
 
     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
-                                               bool isSigned) const;
+                                               bool isSigned,
+                                               bool isReplace) const;
 
     SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
                                    SelectionDAG &DAG) const;
@@ -833,6 +767,7 @@ namespace llvm {
     SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
@@ -846,9 +781,12 @@ namespace llvm {
     SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue PerformTruncateCombine(SDNode* N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const;
 
     // Utility functions to help LowerVECTOR_SHUFFLE
     SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
+    SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
@@ -857,8 +795,8 @@ namespace llvm {
                            DebugLoc dl, SelectionDAG &DAG,
                            SmallVectorImpl<SDValue> &InVals) const;
     virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+      LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+                bool isVarArg, bool doesNotRet, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<SDValue> &OutVals,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -872,7 +810,7 @@ namespace llvm {
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl, SelectionDAG &DAG) const;
 
-    virtual bool isUsedByReturnOnly(SDNode *N) const;
+    virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
 
     virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
 
@@ -916,7 +854,7 @@ namespace llvm {
                                                     unsigned cxchgOpc,
                                                     unsigned notOpc,
                                                     unsigned EAXreg,
-                                                    TargetRegisterClass *RC,
+                                              const TargetRegisterClass *RC,
                                                     bool invSrc = false) const;
 
     MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td
index dd4f6a5a85a4..54b91c3edb8b 100644
--- a/lib/Target/X86/X86Instr3DNow.td
+++ b/lib/Target/X86/X86Instr3DNow.td
@@ -1,4 +1,4 @@
-//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//===-- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 74b647a4f6b1..0eee08339384 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -1,10 +1,10 @@
-//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===//
-// 
+//===-- X86InstrArithmetic.td - Integer Arithmetic Instrs --*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the integer arithmetic instructions in the X86
@@ -18,22 +18,24 @@
 let neverHasSideEffects = 1 in
 def LEA16r   : I<0x8D, MRMSrcMem,
                  (outs GR16:$dst), (ins i32mem:$src),
-                 "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+                 "lea{w}\t{$src|$dst}, {$dst|$src}", [], IIC_LEA_16>, OpSize;
 let isReMaterializable = 1 in
 def LEA32r   : I<0x8D, MRMSrcMem,
                  (outs GR32:$dst), (ins i32mem:$src),
                  "lea{l}\t{$src|$dst}, {$dst|$src}",
-                 [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+                 [(set GR32:$dst, lea32addr:$src)], IIC_LEA>,
+                 Requires<[In32BitMode]>;
 
 def LEA64_32r : I<0x8D, MRMSrcMem,
                   (outs GR32:$dst), (ins lea64_32mem:$src),
                   "lea{l}\t{$src|$dst}, {$dst|$src}",
-                  [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+                  [(set GR32:$dst, lea32addr:$src)], IIC_LEA>,
+                  Requires<[In64BitMode]>;
 
 let isReMaterializable = 1 in
 def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
                   "lea{q}\t{$src|$dst}, {$dst|$src}",
-                  [(set GR64:$dst, lea64addr:$src)]>;
+                  [(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
 
 
 
@@ -51,21 +53,23 @@ def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
                [(set AL, (mul AL, GR8:$src)),
-                (implicit EFLAGS)]>;     // AL,AH = AL*GR8
+                (implicit EFLAGS)], IIC_MUL8>;     // AL,AH = AL*GR8
 
 let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
 def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src),
                "mul{w}\t$src", 
-               []>, OpSize;    // AX,DX = AX*GR16
+               [], IIC_MUL16_REG>, OpSize;    // AX,DX = AX*GR16
 
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
 def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
                "mul{l}\t$src",   // EAX,EDX = EAX*GR32
-               [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+               [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/],
+               IIC_MUL32_REG>;
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
 def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
                 "mul{q}\t$src",          // RAX,RDX = RAX*GR64
-                [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
+                [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/],
+                IIC_MUL64>;
 
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
@@ -74,50 +78,51 @@ def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
                // This probably ought to be moved to a def : Pat<> if the
                // syntax can be accepted.
                [(set AL, (mul AL, (loadi8 addr:$src))),
-                (implicit EFLAGS)]>;   // AL,AH = AL*[mem8]
+                (implicit EFLAGS)], IIC_MUL8>;   // AL,AH = AL*[mem8]
 
 let mayLoad = 1, neverHasSideEffects = 1 in {
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
                "mul{w}\t$src",
-               []>, OpSize; // AX,DX = AX*[mem16]
+               [], IIC_MUL16_MEM>, OpSize; // AX,DX = AX*[mem16]
 
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
               "mul{l}\t$src",
-              []>;          // EAX,EDX = EAX*[mem32]
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+              [], IIC_MUL32_MEM>;          // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
-                "mul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
+                "mul{q}\t$src", [], IIC_MUL64>;   // RAX,RDX = RAX*[mem64]
 }
 
 let neverHasSideEffects = 1 in {
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", []>;
-              // AL,AH = AL*GR8
+def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", [],
+              IIC_IMUL8>; // AL,AH = AL*GR8
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", []>,
-              OpSize;    // AX,DX = AX*GR16
+def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", [],
+              IIC_IMUL16_RR>, OpSize;    // AX,DX = AX*GR16
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", []>;
-              // EAX,EDX = EAX*GR32
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
-def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>;
-              // RAX,RDX = RAX*GR64
+def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", [],
+              IIC_IMUL32_RR>; // EAX,EDX = EAX*GR32
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", [],
+              IIC_IMUL64_RR>; // RAX,RDX = RAX*GR64
 
 let mayLoad = 1 in {
 let Defs = [AL,EFLAGS,AX], Uses = [AL] in
 def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
-                "imul{b}\t$src", []>;    // AL,AH = AL*[mem8]
+                "imul{b}\t$src", [], IIC_IMUL8>;    // AL,AH = AL*[mem8]
 let Defs = [AX,DX,EFLAGS], Uses = [AX] in
 def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
-                "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+                "imul{w}\t$src", [], IIC_IMUL16_MEM>, OpSize;
+                // AX,DX = AX*[mem16]
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
 def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
-                "imul{l}\t$src", []>;  // EAX,EDX = EAX*[mem32]
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+                "imul{l}\t$src", [], IIC_IMUL32_MEM>;  // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
 def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
-                 "imul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
+                 "imul{q}\t$src", [], IIC_IMUL64>;  // RAX,RDX = RAX*[mem64]
 }
 } // neverHasSideEffects
 
@@ -130,16 +135,19 @@ let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
 def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
                  "imul{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, EFLAGS,
-                       (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
+                       (X86smul_flag GR16:$src1, GR16:$src2))], IIC_IMUL16_RR>,
+                       TB, OpSize;
 def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
                  "imul{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, EFLAGS,
-                       (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
+                       (X86smul_flag GR32:$src1, GR32:$src2))], IIC_IMUL32_RR>,
+                 TB;
 def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
                                    (ins GR64:$src1, GR64:$src2),
                   "imul{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, EFLAGS,
-                        (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
+                        (X86smul_flag GR64:$src1, GR64:$src2))], IIC_IMUL64_RR>,
+                 TB;
 }
 
 // Register-Memory Signed Integer Multiply
@@ -147,18 +155,23 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
                                   (ins GR16:$src1, i16mem:$src2),
                  "imul{w}\t{$src2, $dst|$dst, $src2}",
                  [(set GR16:$dst, EFLAGS,
-                       (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+                       (X86smul_flag GR16:$src1, (load addr:$src2)))],
+                       IIC_IMUL16_RM>,
                TB, OpSize;
 def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), 
                  (ins GR32:$src1, i32mem:$src2),
                  "imul{l}\t{$src2, $dst|$dst, $src2}",
                  [(set GR32:$dst, EFLAGS,
-                       (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
+                       (X86smul_flag GR32:$src1, (load addr:$src2)))],
+                       IIC_IMUL32_RM>,
+               TB;
 def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
                                    (ins GR64:$src1, i64mem:$src2),
                   "imul{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, EFLAGS,
-                        (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
+                        (X86smul_flag GR64:$src1, (load addr:$src2)))],
+                        IIC_IMUL64_RM>,
+               TB;
 } // Constraints = "$src1 = $dst"
 
 } // Defs = [EFLAGS]
@@ -170,33 +183,39 @@ def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
                       (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
                       "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR16:$dst, EFLAGS, 
-                            (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
+                            (X86smul_flag GR16:$src1, imm:$src2))], 
+                            IIC_IMUL16_RRI>, OpSize;
 def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
                      (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR16:$dst, EFLAGS,
-                           (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+                           (X86smul_flag GR16:$src1, i16immSExt8:$src2))],
+                           IIC_IMUL16_RRI>,
                  OpSize;
 def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
                       (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
                       "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR32:$dst, EFLAGS,
-                            (X86smul_flag GR32:$src1, imm:$src2))]>;
+                            (X86smul_flag GR32:$src1, imm:$src2))],
+                            IIC_IMUL32_RRI>;
 def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
                      (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR32:$dst, EFLAGS,
-                           (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
+                           (X86smul_flag GR32:$src1, i32immSExt8:$src2))],
+                           IIC_IMUL32_RRI>;
 def IMUL64rri32 : RIi32<0x69, MRMSrcReg,                    // GR64 = GR64*I32
                         (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
                         "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                        [(set GR64:$dst, EFLAGS,
-                             (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
+                             (X86smul_flag GR64:$src1, i64immSExt32:$src2))],
+                             IIC_IMUL64_RRI>;
 def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
                       (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
                       "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR64:$dst, EFLAGS,
-                            (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
+                            (X86smul_flag GR64:$src1, i64immSExt8:$src2))],
+                            IIC_IMUL64_RRI>;
 
 
 // Memory-Integer Signed Integer Multiply
@@ -204,37 +223,43 @@ def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
                       (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
                       "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR16:$dst, EFLAGS,
-                            (X86smul_flag (load addr:$src1), imm:$src2))]>,
+                            (X86smul_flag (load addr:$src1), imm:$src2))],
+                            IIC_IMUL16_RMI>,
                  OpSize;
 def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
                      (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR16:$dst, EFLAGS,
                            (X86smul_flag (load addr:$src1),
-                                         i16immSExt8:$src2))]>, OpSize;
+                                         i16immSExt8:$src2))], IIC_IMUL16_RMI>,
+                                         OpSize;
 def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                     // GR32 = [mem32]*I32
                       (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
                       "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR32:$dst, EFLAGS,
-                            (X86smul_flag (load addr:$src1), imm:$src2))]>;
+                            (X86smul_flag (load addr:$src1), imm:$src2))],
+                            IIC_IMUL32_RMI>;
 def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
                      (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set GR32:$dst, EFLAGS,
                            (X86smul_flag (load addr:$src1),
-                                         i32immSExt8:$src2))]>;
+                                         i32immSExt8:$src2))],
+                                         IIC_IMUL32_RMI>;
 def IMUL64rmi32 : RIi32<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
                         (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
                         "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                         [(set GR64:$dst, EFLAGS,
                               (X86smul_flag (load addr:$src1),
-                                            i64immSExt32:$src2))]>;
+                                            i64immSExt32:$src2))],
+                                            IIC_IMUL64_RMI>;
 def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
                       (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
                       "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set GR64:$dst, EFLAGS,
                             (X86smul_flag (load addr:$src1),
-                                          i64immSExt8:$src2))]>;
+                                          i64immSExt8:$src2))],
+                                          IIC_IMUL64_RMI>;
 } // Defs = [EFLAGS]
 
 
@@ -243,62 +268,62 @@ def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
 // unsigned division/remainder
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
-               "div{b}\t$src", []>;
+               "div{b}\t$src", [], IIC_DIV8_REG>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
 def DIV16r : I<0xF7, MRM6r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
-               "div{w}\t$src", []>, OpSize;
+               "div{w}\t$src", [], IIC_DIV16>, OpSize;
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
 def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
-               "div{l}\t$src", []>;
+               "div{l}\t$src", [], IIC_DIV32>;
 // RDX:RAX/r64 = RAX,RDX
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
 def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
-                "div{q}\t$src", []>;
+                "div{q}\t$src", [], IIC_DIV64>;
 
 let mayLoad = 1 in {
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "div{b}\t$src", []>;
+               "div{b}\t$src", [], IIC_DIV8_MEM>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
 def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "div{w}\t$src", []>, OpSize;
+               "div{w}\t$src", [], IIC_DIV16>, OpSize;
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
 def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
-               "div{l}\t$src", []>;
+               "div{l}\t$src", [], IIC_DIV32>;
 // RDX:RAX/[mem64] = RAX,RDX
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
 def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
-                "div{q}\t$src", []>;
+                "div{q}\t$src", [], IIC_DIV64>;
 }
 
 // Signed division/remainder.
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
-               "idiv{b}\t$src", []>;
+               "idiv{b}\t$src", [], IIC_IDIV8>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
 def IDIV16r: I<0xF7, MRM7r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
-               "idiv{w}\t$src", []>, OpSize;
+               "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
 def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
-               "idiv{l}\t$src", []>;
+               "idiv{l}\t$src", [], IIC_IDIV32>;
 // RDX:RAX/r64 = RAX,RDX
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
 def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
-                "idiv{q}\t$src", []>;
-               
-let mayLoad = 1, mayLoad = 1 in {
+                "idiv{q}\t$src", [], IIC_IDIV64>;
+
+let mayLoad = 1 in {
 let Defs = [AL,EFLAGS,AX], Uses = [AX] in
 def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
-               "idiv{b}\t$src", []>;
+               "idiv{b}\t$src", [], IIC_IDIV8>;
 let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
 def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
-               "idiv{w}\t$src", []>, OpSize;
+               "idiv{w}\t$src", [], IIC_IDIV16>, OpSize;
 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
 def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), 
-               "idiv{l}\t$src", []>;
+               "idiv{l}\t$src", [], IIC_IDIV32>;
 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
 def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
-                "idiv{q}\t$src", []>;
+                "idiv{q}\t$src", [], IIC_IDIV64>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -312,35 +337,35 @@ let Constraints = "$src1 = $dst" in {
 def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
                "neg{b}\t$dst",
                [(set GR8:$dst, (ineg GR8:$src1)),
-                (implicit EFLAGS)]>;
+                (implicit EFLAGS)], IIC_UNARY_REG>;
 def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
                "neg{w}\t$dst",
                [(set GR16:$dst, (ineg GR16:$src1)),
-                (implicit EFLAGS)]>, OpSize;
+                (implicit EFLAGS)], IIC_UNARY_REG>, OpSize;
 def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
                "neg{l}\t$dst",
                [(set GR32:$dst, (ineg GR32:$src1)),
-                (implicit EFLAGS)]>;
+                (implicit EFLAGS)], IIC_UNARY_REG>;
 def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
                 [(set GR64:$dst, (ineg GR64:$src1)),
-                 (implicit EFLAGS)]>;
+                 (implicit EFLAGS)], IIC_UNARY_REG>;
 } // Constraints = "$src1 = $dst"
 
 def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
                "neg{b}\t$dst",
                [(store (ineg (loadi8 addr:$dst)), addr:$dst),
-                (implicit EFLAGS)]>;
+                (implicit EFLAGS)], IIC_UNARY_MEM>;
 def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
                "neg{w}\t$dst",
                [(store (ineg (loadi16 addr:$dst)), addr:$dst),
-                (implicit EFLAGS)]>, OpSize;
+                (implicit EFLAGS)], IIC_UNARY_MEM>, OpSize;
 def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
                "neg{l}\t$dst",
                [(store (ineg (loadi32 addr:$dst)), addr:$dst),
-                (implicit EFLAGS)]>;
+                (implicit EFLAGS)], IIC_UNARY_MEM>;
 def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
                 [(store (ineg (loadi64 addr:$dst)), addr:$dst),
-                 (implicit EFLAGS)]>;
+                 (implicit EFLAGS)], IIC_UNARY_MEM>;
 } // Defs = [EFLAGS]
 
 
@@ -351,29 +376,30 @@ let Constraints = "$src1 = $dst" in {
 let AddedComplexity = 15 in {
 def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
                "not{b}\t$dst",
-               [(set GR8:$dst, (not GR8:$src1))]>;
+               [(set GR8:$dst, (not GR8:$src1))], IIC_UNARY_REG>;
 def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
                "not{w}\t$dst",
-               [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+               [(set GR16:$dst, (not GR16:$src1))], IIC_UNARY_REG>, OpSize;
 def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
                "not{l}\t$dst",
-               [(set GR32:$dst, (not GR32:$src1))]>;
+               [(set GR32:$dst, (not GR32:$src1))], IIC_UNARY_REG>;
 def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
-                [(set GR64:$dst, (not GR64:$src1))]>;
+                [(set GR64:$dst, (not GR64:$src1))], IIC_UNARY_REG>;
 }
 } // Constraints = "$src1 = $dst"
 
 def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
                "not{b}\t$dst",
-               [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+               [(store (not (loadi8 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
 def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
                "not{w}\t$dst",
-               [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+               [(store (not (loadi16 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>,
+               OpSize;
 def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
                "not{l}\t$dst",
-               [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+               [(store (not (loadi32 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
 def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
-                [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+                [(store (not (loadi64 addr:$dst)), addr:$dst)], IIC_UNARY_MEM>;
 } // CodeSize
 
 // TODO: inc/dec is slow for P4, but fast for Pentium-M.
@@ -382,19 +408,22 @@ let Constraints = "$src1 = $dst" in {
 let CodeSize = 2 in
 def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
                "inc{b}\t$dst",
-               [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+               [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))],
+               IIC_UNARY_REG>;
 
 let isConvertibleToThreeAddress = 1, CodeSize = 1 in {  // Can xform into LEA.
 def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
                "inc{w}\t$dst",
-               [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+               [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))], IIC_UNARY_REG>,
              OpSize, Requires<[In32BitMode]>;
 def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
                "inc{l}\t$dst",
-               [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+               [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
+               IIC_UNARY_REG>,
              Requires<[In32BitMode]>;
 def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
-                [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+                [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))],
+                IIC_UNARY_REG>;
 } // isConvertibleToThreeAddress = 1, CodeSize = 1
 
 
@@ -403,19 +432,23 @@ let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
 // Can transform into LEA.
 def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), 
                   "inc{w}\t$dst",
-                  [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+                  [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))],
+                  IIC_UNARY_REG>,
                 OpSize, Requires<[In64BitMode]>;
 def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), 
                   "inc{l}\t$dst",
-                  [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+                  [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))],
+                  IIC_UNARY_REG>,
                 Requires<[In64BitMode]>;
 def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), 
                   "dec{w}\t$dst",
-                  [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+                  [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
+                  IIC_UNARY_REG>,
                 OpSize, Requires<[In64BitMode]>;
 def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), 
                   "dec{l}\t$dst",
-                  [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+                  [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
+                  IIC_UNARY_REG>,
                 Requires<[In64BitMode]>;
 } // isConvertibleToThreeAddress = 1, CodeSize = 2
 
@@ -424,37 +457,37 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
 let CodeSize = 2 in {
   def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
                [(store (add (loadi8 addr:$dst), 1), addr:$dst),
-                (implicit EFLAGS)]>;
+                (implicit EFLAGS)], IIC_UNARY_MEM>;
   def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
                [(store (add (loadi16 addr:$dst), 1), addr:$dst),
-                (implicit EFLAGS)]>,
+                (implicit EFLAGS)], IIC_UNARY_MEM>,
                OpSize, Requires<[In32BitMode]>;
   def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
                [(store (add (loadi32 addr:$dst), 1), addr:$dst),
-                (implicit EFLAGS)]>,
+                (implicit EFLAGS)], IIC_UNARY_MEM>,
                Requires<[In32BitMode]>;
   def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
                   [(store (add (loadi64 addr:$dst), 1), addr:$dst),
-                   (implicit EFLAGS)]>;
+                   (implicit EFLAGS)], IIC_UNARY_MEM>;
                    
 // These are duplicates of their 32-bit counterparts. Only needed so X86 knows
 // how to unfold them.
 // FIXME: What is this for??
 def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
                   [(store (add (loadi16 addr:$dst), 1), addr:$dst),
-                    (implicit EFLAGS)]>,
+                    (implicit EFLAGS)], IIC_UNARY_MEM>,
                 OpSize, Requires<[In64BitMode]>;
 def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
                   [(store (add (loadi32 addr:$dst), 1), addr:$dst),
-                    (implicit EFLAGS)]>,
+                    (implicit EFLAGS)], IIC_UNARY_MEM>,
                 Requires<[In64BitMode]>;
 def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
                   [(store (add (loadi16 addr:$dst), -1), addr:$dst),
-                    (implicit EFLAGS)]>,
+                    (implicit EFLAGS)], IIC_UNARY_MEM>,
                 OpSize, Requires<[In64BitMode]>;
 def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
                   [(store (add (loadi32 addr:$dst), -1), addr:$dst),
-                    (implicit EFLAGS)]>,
+                    (implicit EFLAGS)], IIC_UNARY_MEM>,
                 Requires<[In64BitMode]>;
 } // CodeSize = 2
 
@@ -462,18 +495,22 @@ let Constraints = "$src1 = $dst" in {
 let CodeSize = 2 in
 def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                "dec{b}\t$dst",
-               [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+               [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))],
+               IIC_UNARY_REG>;
 let isConvertibleToThreeAddress = 1, CodeSize = 1 in {   // Can xform into LEA.
 def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
                "dec{w}\t$dst",
-               [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+               [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))],
+               IIC_UNARY_REG>,
              OpSize, Requires<[In32BitMode]>;
 def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
                "dec{l}\t$dst",
-               [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+               [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))],
+               IIC_UNARY_REG>,
              Requires<[In32BitMode]>;
 def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
-                [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+                [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))],
+                IIC_UNARY_REG>;
 } // CodeSize = 2
 } // Constraints = "$src1 = $dst"
 
@@ -481,18 +518,18 @@ def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
 let CodeSize = 2 in {
   def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
                [(store (add (loadi8 addr:$dst), -1), addr:$dst),
-                (implicit EFLAGS)]>;
+                (implicit EFLAGS)], IIC_UNARY_MEM>;
   def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
                [(store (add (loadi16 addr:$dst), -1), addr:$dst),
-                (implicit EFLAGS)]>,
+                (implicit EFLAGS)], IIC_UNARY_MEM>,
                OpSize, Requires<[In32BitMode]>;
   def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
                [(store (add (loadi32 addr:$dst), -1), addr:$dst),
-                (implicit EFLAGS)]>,
+                (implicit EFLAGS)], IIC_UNARY_MEM>,
                Requires<[In32BitMode]>;
   def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
                   [(store (add (loadi64 addr:$dst), -1), addr:$dst),
-                   (implicit EFLAGS)]>;
+                   (implicit EFLAGS)], IIC_UNARY_MEM>;
 } // CodeSize = 2
 } // Defs = [EFLAGS]
 
@@ -588,11 +625,13 @@ def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
 /// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
 ///    or 1 (for i16,i32,i64 operations).
 class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, 
-          string mnemonic, string args, list<dag> pattern>
+          string mnemonic, string args, list<dag> pattern,
+          InstrItinClass itin = IIC_BIN_NONMEM>
   : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
        opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
       f, outs, ins, 
-      !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+      !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern,
+      itin> {
 
   // Infer instruction prefixes from type info.
   let hasOpSizePrefix = typeinfo.HasOpSizePrefix;
@@ -601,10 +640,11 @@ class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
 
 // BinOpRR - Instructions like "add reg, reg, reg".
 class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
-              dag outlist, list<dag> pattern, Format f = MRMDestReg>
+              dag outlist, list<dag> pattern, InstrItinClass itin,
+              Format f = MRMDestReg>
   : ITy<opcode, f, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, itin>;
 
 // BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
 // just a regclass (no eflags) as a result.
@@ -612,7 +652,8 @@ class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                 SDNode opnode>
   : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
             [(set typeinfo.RegClass:$dst,
-                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+                  IIC_BIN_NONMEM>;
 
 // BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
 // just a EFLAGS as a result.
@@ -621,7 +662,7 @@ class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
   : BinOpRR<opcode, mnemonic, typeinfo, (outs),
             [(set EFLAGS,
                   (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
-            f>;
+            IIC_BIN_NONMEM, f>;
 
 // BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
 // both a regclass and EFLAGS as a result.
@@ -629,7 +670,8 @@ class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                  SDNode opnode>
   : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
             [(set typeinfo.RegClass:$dst, EFLAGS,
-                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+                  IIC_BIN_NONMEM>;
 
 // BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
 // both a regclass and EFLAGS as a result, and has EFLAGS as input.
@@ -638,14 +680,14 @@ class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
   : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
             [(set typeinfo.RegClass:$dst, EFLAGS,
                   (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
-                          EFLAGS))]>;
+                          EFLAGS))], IIC_BIN_NONMEM>;
 
 // BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
 class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
   : ITy<opcode, MRMSrcReg, typeinfo,
         (outs typeinfo.RegClass:$dst),
         (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
-        mnemonic, "{$src2, $dst|$dst, $src2}", []> {
+        mnemonic, "{$src2, $dst|$dst, $src2}", [], IIC_BIN_NONMEM> {
   // The disassembler should know about this, but not the asmparser.
   let isCodeGenOnly = 1;
 }
@@ -654,7 +696,7 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
 class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
   : ITy<opcode, MRMSrcReg, typeinfo, (outs),
         (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", []> {
+        mnemonic, "{$src2, $src1|$src1, $src2}", [], IIC_BIN_NONMEM> {
   // The disassembler should know about this, but not the asmparser.
   let isCodeGenOnly = 1;
 }
@@ -664,7 +706,7 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               dag outlist, list<dag> pattern>
   : ITy<opcode, MRMSrcMem, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM>;
 
 // BinOpRM_R - Instructions like "add reg, reg, [mem]".
 class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -700,7 +742,7 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               Format f, dag outlist, list<dag> pattern>
   : ITy<opcode, f, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
   let ImmT = typeinfo.ImmEncoding;
 }
 
@@ -724,7 +766,6 @@ class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
   : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
             [(set typeinfo.RegClass:$dst, EFLAGS, 
                 (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
-
 // BinOpRI_RFF - Instructions like "adc reg, reg, imm".
 class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                  SDNode opnode, Format f>
@@ -738,7 +779,7 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
                Format f, dag outlist, list<dag> pattern>
   : ITy<opcode, f, typeinfo, outlist,
         (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
-        mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern, IIC_BIN_NONMEM> {
   let ImmT = Imm8; // Always 8-bit immediate.
 }
 
@@ -776,7 +817,7 @@ class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
               list<dag> pattern>
   : ITy<opcode, MRMDestMem, typeinfo,
         (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
-        mnemonic, "{$src, $dst|$dst, $src}", pattern>;
+        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM>;
 
 // BinOpMR_RMW - Instructions like "add [mem], reg".
 class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
@@ -804,7 +845,7 @@ class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
               Format f, list<dag> pattern, bits<8> opcode = 0x80>
   : ITy<opcode, f, typeinfo,
         (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
-        mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
   let ImmT = typeinfo.ImmEncoding;
 }
 
@@ -815,7 +856,6 @@ class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
             [(store (opnode (typeinfo.VT (load addr:$dst)),
                             typeinfo.ImmOperator:$src), addr:$dst),
              (implicit EFLAGS)]>;
-
 // BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
 class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
                   SDNode opnode, Format f>
@@ -837,7 +877,7 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
                Format f, list<dag> pattern>
   : ITy<0x82, f, typeinfo,
         (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
-        mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+        mnemonic, "{$src, $dst|$dst, $src}", pattern, IIC_BIN_MEM> {
   let ImmT = Imm8; // Always 8-bit immediate.
 }
 
@@ -1150,7 +1190,7 @@ let Defs = [EFLAGS] in {
   // register class is constrained to GR8_NOREX.
   let isPseudo = 1 in
   def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask),
-                        "", []>;
+                        "", [], IIC_BIN_NONMEM>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1160,14 +1200,39 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
                     PatFrag ld_frag> {
   def rr : I<0xF2, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-            [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))]>;
+            [(set RC:$dst, EFLAGS, (X86andn_flag RC:$src1, RC:$src2))],
+            IIC_BIN_NONMEM>;
   def rm : I<0xF2, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
             [(set RC:$dst, EFLAGS,
-             (X86andn_flag RC:$src1, (ld_frag addr:$src2)))]>;
+             (X86andn_flag RC:$src1, (ld_frag addr:$src2)))], IIC_BIN_MEM>;
 }
 
 let Predicates = [HasBMI], Defs = [EFLAGS] in {
   defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8, VEX_4V;
   defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W;
 }
+
+//===----------------------------------------------------------------------===//
+// MULX Instruction
+//
+multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in {
+  let isCommutable = 1 in
+  def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src),
+             !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+             [], IIC_MUL8>, T8XD, VEX_4V;
+
+  let mayLoad = 1 in
+  def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src),
+             !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"),
+             [], IIC_MUL8>, T8XD, VEX_4V;
+}
+}
+
+let Predicates = [HasBMI2] in {
+  let Uses = [EDX] in
+    defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem>;
+  let Uses = [RDX] in
+    defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W;
+}
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 0245e5c09644..fa1d67644db7 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -27,7 +27,6 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 
 namespace llvm {
 
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index 3a43b22ddf3d..adeaf5410dcc 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -1,10 +1,10 @@
-//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===//
-// 
+//===-- X86InstrCMovSetCC.td - Conditional Move and SetCC --*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the X86 conditional move and set on condition
@@ -21,17 +21,20 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
       : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
           !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
           [(set GR16:$dst,
-                (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize;
+                (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))],
+                IIC_CMOV16_RR>,TB,OpSize;
     def #NAME#32rr
       : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
           !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
           [(set GR32:$dst,
-                (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB;
+                (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))],
+                IIC_CMOV32_RR>, TB;
     def #NAME#64rr
       :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
           !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
           [(set GR64:$dst,
-                (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+                (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))],
+                IIC_CMOV32_RR>, TB;
   }
 
   let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
@@ -39,17 +42,18 @@ multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
       : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
           !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
           [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
-                                    CondNode, EFLAGS))]>, TB, OpSize;
+                                    CondNode, EFLAGS))], IIC_CMOV16_RM>,
+                                    TB, OpSize;
     def #NAME#32rm
       : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
           !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
           [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
-                                    CondNode, EFLAGS))]>, TB;
+                                    CondNode, EFLAGS))], IIC_CMOV32_RM>, TB;
     def #NAME#64rm
       :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
           !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
           [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
-                                    CondNode, EFLAGS))]>, TB;
+                                    CondNode, EFLAGS))], IIC_CMOV32_RM>, TB;
   } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
 } // end multiclass
 
@@ -78,10 +82,12 @@ multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
   let Uses = [EFLAGS] in {
     def r    : I<opc, MRM0r,  (outs GR8:$dst), (ins),
                      !strconcat(Mnemonic, "\t$dst"),
-                     [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB;
+                     [(set GR8:$dst, (X86setcc OpNode, EFLAGS))],
+                     IIC_SET_R>, TB;
     def m    : I<opc, MRM0m,  (outs), (ins i8mem:$dst),
                      !strconcat(Mnemonic, "\t$dst"),
-                     [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB;
+                     [(store (X86setcc OpNode, EFLAGS), addr:$dst)],
+                     IIC_SET_M>, TB;
   } // Uses = [EFLAGS]
 }
 
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index da28690672a6..6f9e8492613a 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -112,23 +112,39 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
 // allocated by bumping the stack pointer. Otherwise memory is allocated from 
 // the heap.
 
-let Defs = [EAX, ESP, EFLAGS], Uses = [ESP, EAX] in
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
 def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
                       "# variable sized alloca for segmented stacks",
                       [(set GR32:$dst,
                          (X86SegAlloca GR32:$size))]>,
                     Requires<[In32BitMode]>;
 
-let Defs = [RAX, RSP, EFLAGS], Uses = [RSP, RAX] in
+let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
 def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
                       "# variable sized alloca for segmented stacks",
                       [(set GR64:$dst,
                          (X86SegAlloca GR64:$size))]>,
                     Requires<[In64BitMode]>;
-
 }
 
+// The MSVC runtime contains an _ftol2 routine for converting floating-point
+// to integer values. It has a strange calling convention: the input is
+// popped from the x87 stack, and the return value is given in EDX:EAX. No
+// other registers (aside from flags) are touched.
+// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
+// variant is unnecessary.
+
+let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in {
+  def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
+                      "# win32 fptoui",
+                      [(X86WinFTOL RFP32:$src)]>,
+                    Requires<[In32BitMode]>;
 
+  def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
+                      "# win32 fptoui",
+                      [(X86WinFTOL RFP64:$src)]>,
+                    Requires<[In32BitMode]>;
+}
 
 //===----------------------------------------------------------------------===//
 // EH Pseudo Instructions
@@ -137,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1, isCodeGenOnly = 1 in {
 def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
                     "ret\t#eh_return, addr: $addr",
-                    [(X86ehret GR32:$addr)]>;
+                    [(X86ehret GR32:$addr)], IIC_RET>;
 
 }
 
@@ -145,8 +161,26 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1, isCodeGenOnly = 1 in {
 def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
                      "ret\t#eh_return, addr: $addr",
-                     [(X86ehret GR64:$addr)]>;
+                     [(X86ehret GR64:$addr)], IIC_RET>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions used by segmented stacks.
+//
 
+// This is lowered into a RET instruction by MCInstLower.  We need
+// this so that we don't have to have a MachineBasicBlock which ends
+// with a RET and also has successors.
+let isPseudo = 1 in {
+def MORESTACK_RET: I<0, Pseudo, (outs), (ins),
+                          "", []>;
+
+// This instruction is lowered to a RET followed by a MOV.  The two
+// instructions are not generated on a higher level since then the
+// verifier sees a MachineBasicBlock ending with a non-terminator.
+def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
+                                  "", []>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -159,7 +193,7 @@ def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
     isCodeGenOnly = 1 in {
 def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
-                 [(set GR8:$dst, 0)]>;
+                 [(set GR8:$dst, 0)], IIC_ALU_NONMEM>;
 
 // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
 // encoding and avoids a partial-register update sometimes, but doing so
@@ -168,11 +202,11 @@ def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
 // to an MCInst.
 def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
                  "",
-                 [(set GR16:$dst, 0)]>, OpSize;
+                 [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize;
 
 // FIXME: Set encoding to pseudo.
 def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
-                 [(set GR32:$dst, 0)]>;
+                 [(set GR32:$dst, 0)], IIC_ALU_NONMEM>;
 }
 
 // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
@@ -184,7 +218,7 @@ def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
 let Defs = [EFLAGS], isCodeGenOnly=1,
     AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, 0)]>;
+                 [(set GR64:$dst, 0)], IIC_ALU_NONMEM>;
 
 // Materialize i64 constant where top 32-bits are zero. This could theoretically
 // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
@@ -192,7 +226,8 @@ def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
 let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
     isCodeGenOnly = 1 in
 def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
-                        "", [(set GR64:$dst, i64immZExt32:$src)]>;
+                        "", [(set GR64:$dst, i64immZExt32:$src)],
+                        IIC_ALU_NONMEM>;
 
 // Use sbb to materialize carry bit.
 let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
@@ -202,14 +237,18 @@ let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
 // FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
 // X86CodeEmitter.
 def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
-                 [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+                 [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+                 IIC_ALU_NONMEM>;
 def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
-                 [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+                 [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+                 IIC_ALU_NONMEM>,
                 OpSize;
 def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
-                 [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+                 [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+                 IIC_ALU_NONMEM>;
 def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
-                 [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+                 [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))],
+                 IIC_ALU_NONMEM>;
 } // isCodeGenOnly
 
 
@@ -262,34 +301,67 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))),
 // String Pseudo Instructions
 //
 let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
-def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
-                  [(X86rep_movs i8)]>, REP;
-def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
-                  [(X86rep_movs i16)]>, REP, OpSize;
-def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
-                  [(X86rep_movs i32)]>, REP;
+def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+                    [(X86rep_movs i8)], IIC_REP_MOVS>, REP,
+                   Requires<[In32BitMode]>;
+def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+                    [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize,
+                   Requires<[In32BitMode]>;
+def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+                    [(X86rep_movs i32)], IIC_REP_MOVS>, REP,
+                   Requires<[In32BitMode]>;
 }
 
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
-def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
-                   [(X86rep_movs i64)]>, REP;
-
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
+def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+                    [(X86rep_movs i8)], IIC_REP_MOVS>, REP,
+                   Requires<[In64BitMode]>;
+def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+                    [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize,
+                   Requires<[In64BitMode]>;
+def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+                    [(X86rep_movs i32)], IIC_REP_MOVS>, REP,
+                   Requires<[In64BitMode]>;
+def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+                    [(X86rep_movs i64)], IIC_REP_MOVS>, REP,
+                   Requires<[In64BitMode]>;
+}
 
 // FIXME: Should use "(X86rep_stos AL)" as the pattern.
-let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
-                  [(X86rep_stos i8)]>, REP;
-let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
-                  [(X86rep_stos i16)]>, REP, OpSize;
-let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
-def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
-                  [(X86rep_stos i32)]>, REP;
-
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
-def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
-                   [(X86rep_stos i64)]>, REP;
+let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
+  let Uses = [AL,ECX,EDI] in
+  def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+                      [(X86rep_stos i8)], IIC_REP_STOS>, REP,
+                     Requires<[In32BitMode]>;
+  let Uses = [AX,ECX,EDI] in
+  def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+                      [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize,
+                     Requires<[In32BitMode]>;
+  let Uses = [EAX,ECX,EDI] in
+  def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+                      [(X86rep_stos i32)], IIC_REP_STOS>, REP,
+                     Requires<[In32BitMode]>;
+}
 
+let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
+  let Uses = [AL,RCX,RDI] in
+  def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+                      [(X86rep_stos i8)], IIC_REP_STOS>, REP,
+                     Requires<[In64BitMode]>;
+  let Uses = [AX,RCX,RDI] in
+  def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+                      [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize,
+                     Requires<[In64BitMode]>;
+  let Uses = [RAX,RCX,RDI] in
+  def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+                      [(X86rep_stos i32)], IIC_REP_STOS>, REP,
+                     Requires<[In64BitMode]>;
+ 
+  let Uses = [RAX,RCX,RDI] in
+  def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+                      [(X86rep_stos i64)], IIC_REP_STOS>, REP,
+                     Requires<[In64BitMode]>;
+}
 
 //===----------------------------------------------------------------------===//
 // Thread Local Storage Instructions
@@ -533,26 +605,17 @@ def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
 // Memory barriers
 
 // TODO: Get this to fold the constant into the instruction.
-let isCodeGenOnly = 1 in
+let isCodeGenOnly = 1, Defs = [EFLAGS] in
 def OR32mrLocked  : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
                       "lock\n\t"
                       "or{l}\t{$zero, $dst|$dst, $zero}",
-                      []>, Requires<[In32BitMode]>, LOCK;
+                      [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK;
 
 let hasSideEffects = 1 in
 def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
                      "#MEMBARRIER",
                      [(X86MemBarrier)]>;
 
-// TODO: Get this to fold the constant into the instruction.
-let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
-def Int_MemBarrierNoSSE64  : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
-                           "lock\n\t"
-                           "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
-                           [(X86MemBarrierNoSSE GR64:$zero)]>,
-                           Requires<[In64BitMode]>, LOCK;
-
-
 // RegOpc corresponds to the mr version of the instruction
 // ImmOpc corresponds to the mi version of the instruction
 // ImmOpc8 corresponds to the mi8 version of the instruction
@@ -566,72 +629,72 @@ def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                    MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
                    !strconcat("lock\n\t", mnemonic, "{b}\t",
                               "{$src2, $dst|$dst, $src2}"),
-                   []>, LOCK;
+                   [], IIC_ALU_NONMEM>, LOCK;
 def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                     RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
                     MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
                     !strconcat("lock\n\t", mnemonic, "{w}\t",
                                "{$src2, $dst|$dst, $src2}"),
-                    []>, OpSize, LOCK;
+                    [], IIC_ALU_NONMEM>, OpSize, LOCK;
 def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                     RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
                     MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
                     !strconcat("lock\n\t", mnemonic, "{l}\t",
                                "{$src2, $dst|$dst, $src2}"),
-                    []>, LOCK;
+                    [], IIC_ALU_NONMEM>, LOCK;
 def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
                      RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
                      MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
                      !strconcat("lock\n\t", mnemonic, "{q}\t",
                                 "{$src2, $dst|$dst, $src2}"),
-                     []>, LOCK;
+                     [], IIC_ALU_NONMEM>, LOCK;
 
 def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                      ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
                      ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
                      !strconcat("lock\n\t", mnemonic, "{b}\t",
                                 "{$src2, $dst|$dst, $src2}"),
-                     []>, LOCK;
+                     [], IIC_ALU_MEM>, LOCK;
 
 def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                        ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
                        ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
                        !strconcat("lock\n\t", mnemonic, "{w}\t",
                                   "{$src2, $dst|$dst, $src2}"),
-                       []>, LOCK;
+                       [], IIC_ALU_MEM>, LOCK;
 
 def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                        ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
                        ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
                        !strconcat("lock\n\t", mnemonic, "{l}\t",
                                   "{$src2, $dst|$dst, $src2}"),
-                       []>, LOCK;
+                       [], IIC_ALU_MEM>, LOCK;
 
 def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
                           ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
                           ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
                           !strconcat("lock\n\t", mnemonic, "{q}\t",
                                      "{$src2, $dst|$dst, $src2}"),
-                          []>, LOCK;
+                          [], IIC_ALU_MEM>, LOCK;
 
 def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
                        ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
                        ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
                        !strconcat("lock\n\t", mnemonic, "{w}\t",
                                   "{$src2, $dst|$dst, $src2}"),
-                       []>, LOCK;
+                       [], IIC_ALU_MEM>, LOCK;
 def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
                        ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
                        ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
                        !strconcat("lock\n\t", mnemonic, "{l}\t",
                                   "{$src2, $dst|$dst, $src2}"),
-                       []>, LOCK;
+                       [], IIC_ALU_MEM>, LOCK;
 def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
                         ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
                         ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
                         !strconcat("lock\n\t", mnemonic, "{q}\t",
                                    "{$src2, $dst|$dst, $src2}"),
-                        []>, LOCK;
+                        [], IIC_ALU_MEM>, LOCK;
 
 }
 
@@ -648,29 +711,29 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
 
 def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
                     "lock\n\t"
-                    "inc{b}\t$dst", []>, LOCK;
+                    "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
                     "lock\n\t"
-                    "inc{w}\t$dst", []>, OpSize, LOCK;
+                    "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
 def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
                     "lock\n\t"
-                    "inc{l}\t$dst", []>, LOCK;
+                    "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
                      "lock\n\t"
-                     "inc{q}\t$dst", []>, LOCK;
+                     "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 
 def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
                     "lock\n\t"
-                    "dec{b}\t$dst", []>, LOCK;
+                    "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
                     "lock\n\t"
-                    "dec{w}\t$dst", []>, OpSize, LOCK;
+                    "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK;
 def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
                     "lock\n\t"
-                    "dec{l}\t$dst", []>, LOCK;
+                    "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
                       "lock\n\t"
-                      "dec{q}\t$dst", []>, LOCK;
+                      "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK;
 }
 
 // Atomic compare and swap.
@@ -679,42 +742,42 @@ let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
 def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
                "lock\n\t"
                "cmpxchg8b\t$ptr",
-               [(X86cas8 addr:$ptr)]>, TB, LOCK;
+               [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK;
 
 let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
     isCodeGenOnly = 1 in
 def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
                     "lock\n\t"
                     "cmpxchg16b\t$ptr",
-                    [(X86cas16 addr:$ptr)]>, TB, LOCK,
+                    [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK,
                     Requires<[HasCmpxchg16b]>;
 
 let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
 def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
                "lock\n\t"
                "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+               [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK;
 }
 
 let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
 def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
                "lock\n\t"
                "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+               [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK;
 }
 
 let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
 def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
                "lock\n\t"
                "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+               [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK;
 }
 
 let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
 def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
                "lock\n\t"
                "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}",
-               [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+               [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK;
 }
 
 // Atomic exchange and add
@@ -722,22 +785,26 @@ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
 def LXADD8  : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
                "lock\n\t"
                "xadd{b}\t{$val, $ptr|$ptr, $val}",
-               [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+               [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))],
+                IIC_XADD_LOCK_MEM8>,
                 TB, LOCK;
 def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
                "lock\n\t"
                "xadd{w}\t{$val, $ptr|$ptr, $val}",
-               [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+               [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))],
+                IIC_XADD_LOCK_MEM>,
                 TB, OpSize, LOCK;
 def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
                "lock\n\t"
                "xadd{l}\t{$val, $ptr|$ptr, $val}",
-               [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+               [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))],
+                IIC_XADD_LOCK_MEM>,
                 TB, LOCK;
 def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
                "lock\n\t"
                "xadd{q}\t{$val, $ptr|$ptr, $val}",
-               [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+               [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))],
+                IIC_XADD_LOCK_MEM>,
                 TB, LOCK;
 }
 
@@ -936,14 +1003,9 @@ def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
 // Direct PC relative function call for small code model. 32-bit displacement
 // sign extended to 64-bit.
 def : Pat<(X86call (i64 tglobaladdr:$dst)),
-          (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
+          (CALL64pcrel32 tglobaladdr:$dst)>;
 def : Pat<(X86call (i64 texternalsym:$dst)),
-          (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
-
-def : Pat<(X86call (i64 tglobaladdr:$dst)),
-          (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
-def : Pat<(X86call (i64 texternalsym:$dst)),
-          (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
+          (CALL64pcrel32 texternalsym:$dst)>;
 
 // tailcall stuff
 def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
@@ -1105,12 +1167,10 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
     return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
 
-  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
-  APInt Mask = APInt::getAllOnesValue(BitWidth);
   APInt KnownZero0, KnownOne0;
-  CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+  CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
   APInt KnownZero1, KnownOne1;
-  CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+  CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
   return (~KnownZero0 & ~KnownZero1) == 0;
 }]>;
 
@@ -1440,58 +1500,62 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
 def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
 
+// Helper imms that check if a mask doesn't change significant shift bits.
+def immShift32 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 5; }]>;
+def immShift64 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 6; }]>;
+
 // (shl x (and y, 31)) ==> (shl x, y)
-def : Pat<(shl GR8:$src1, (and CL, 31)),
+def : Pat<(shl GR8:$src1, (and CL, immShift32)),
           (SHL8rCL GR8:$src1)>;
-def : Pat<(shl GR16:$src1, (and CL, 31)),
+def : Pat<(shl GR16:$src1, (and CL, immShift32)),
           (SHL16rCL GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (and CL, 31)),
+def : Pat<(shl GR32:$src1, (and CL, immShift32)),
           (SHL32rCL GR32:$src1)>;
-def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
           (SHL8mCL addr:$dst)>;
-def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
           (SHL16mCL addr:$dst)>;
-def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
           (SHL32mCL addr:$dst)>;
 
-def : Pat<(srl GR8:$src1, (and CL, 31)),
+def : Pat<(srl GR8:$src1, (and CL, immShift32)),
           (SHR8rCL GR8:$src1)>;
-def : Pat<(srl GR16:$src1, (and CL, 31)),
+def : Pat<(srl GR16:$src1, (and CL, immShift32)),
           (SHR16rCL GR16:$src1)>;
-def : Pat<(srl GR32:$src1, (and CL, 31)),
+def : Pat<(srl GR32:$src1, (and CL, immShift32)),
           (SHR32rCL GR32:$src1)>;
-def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
           (SHR8mCL addr:$dst)>;
-def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
           (SHR16mCL addr:$dst)>;
-def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
           (SHR32mCL addr:$dst)>;
 
-def : Pat<(sra GR8:$src1, (and CL, 31)),
+def : Pat<(sra GR8:$src1, (and CL, immShift32)),
           (SAR8rCL GR8:$src1)>;
-def : Pat<(sra GR16:$src1, (and CL, 31)),
+def : Pat<(sra GR16:$src1, (and CL, immShift32)),
           (SAR16rCL GR16:$src1)>;
-def : Pat<(sra GR32:$src1, (and CL, 31)),
+def : Pat<(sra GR32:$src1, (and CL, immShift32)),
           (SAR32rCL GR32:$src1)>;
-def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst),
           (SAR8mCL addr:$dst)>;
-def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst),
           (SAR16mCL addr:$dst)>;
-def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst),
           (SAR32mCL addr:$dst)>;
 
 // (shl x (and y, 63)) ==> (shl x, y)
-def : Pat<(shl GR64:$src1, (and CL, 63)),
+def : Pat<(shl GR64:$src1, (and CL, immShift64)),
           (SHL64rCL GR64:$src1)>;
 def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
           (SHL64mCL addr:$dst)>;
 
-def : Pat<(srl GR64:$src1, (and CL, 63)),
+def : Pat<(srl GR64:$src1, (and CL, immShift64)),
           (SHR64rCL GR64:$src1)>;
 def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
           (SHR64mCL addr:$dst)>;
 
-def : Pat<(sra GR64:$src1, (and CL, 63)),
+def : Pat<(sra GR64:$src1, (and CL, immShift64)),
           (SAR64rCL GR64:$src1)>;
 def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
           (SAR64mCL addr:$dst)>;
@@ -1735,3 +1799,11 @@ def : Pat<(and GR64:$src1, i64immSExt8:$src2),
           (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
 def : Pat<(and GR64:$src1, i64immSExt32:$src2),
           (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Bit scan instruction patterns to match explicit zero-undef behavior.
+def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
+def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
+def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>;
+def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>;
+def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>;
+def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>;
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index c228a0aed59c..bf11fdec5add 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -1,4 +1,4 @@
-//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===//
+//===-- X86InstrControl.td - Control Flow Instructions -----*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,39 +20,47 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
     hasCtrlDep = 1, FPForm = SpecialFP in {
   def RET    : I   <0xC3, RawFrm, (outs), (ins variable_ops),
                     "ret",
-                    [(X86retflag 0)]>;
+                    [(X86retflag 0)], IIC_RET>;
+  def RETW   : I   <0xC3, RawFrm, (outs), (ins variable_ops),
+                    "ret{w}",
+                    [], IIC_RET>, OpSize;
   def RETI   : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
                     "ret\t$amt",
-                    [(X86retflag timm:$amt)]>;
+                    [(X86retflag timm:$amt)], IIC_RET_IMM>;
   def RETIW  : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
-                    "retw\t$amt",
-                    []>, OpSize;
+                    "ret{w}\t$amt",
+                    [], IIC_RET_IMM>, OpSize;
   def LRETL  : I   <0xCB, RawFrm, (outs), (ins),
-                    "lretl", []>;
+                    "{l}ret{l|f}", [], IIC_RET>;
+  def LRETW  : I   <0xCB, RawFrm, (outs), (ins),
+                    "{l}ret{w|f}", [], IIC_RET>, OpSize;
   def LRETQ  : RI  <0xCB, RawFrm, (outs), (ins),
-                    "lretq", []>;
+                    "{l}ret{q|f}", [], IIC_RET>;
   def LRETI  : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
-                    "lret\t$amt", []>;
+                    "{l}ret{l|f}\t$amt", [], IIC_RET>;
   def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
-                    "lretw\t$amt", []>, OpSize;
+                    "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize;
 }
 
 // Unconditional branches.
 let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
   def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
-                        "jmp\t$dst", [(br bb:$dst)]>;
+                        "jmp\t$dst", [(br bb:$dst)], IIC_JMP_REL>;
   def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
-                       "jmp\t$dst", []>;
+                       "jmp\t$dst", [], IIC_JMP_REL>;
+  // FIXME : Intel syntax for JMP64pcrel32 such that it is not ambiguious
+  // with JMP_1.
   def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
-                       "jmp{q}\t$dst", []>;
+                       "jmpq\t$dst", [], IIC_JMP_REL>;
 }
 
 // Conditional Branches.
 let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
   multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
-    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, [],
+                       IIC_Jcc>;
     def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
-                       [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+                       [(X86brcond bb:$dst, Cond, EFLAGS)], IIC_Jcc>, TB;
   }
 }
 
@@ -74,61 +82,61 @@ defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
 defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
 
 // jcx/jecx/jrcx instructions.
-let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1 in {
   // These are the 32-bit versions of this instruction for the asmparser.  In
   // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
   // jecxz.
   let Uses = [CX] in
     def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
-                        "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>;
+                        "jcxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In32BitMode]>;
   let Uses = [ECX] in
     def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
-                           "jecxz\t$dst", []>, Requires<[In32BitMode]>;
+                           "jecxz\t$dst", [], IIC_JCXZ>, Requires<[In32BitMode]>;
 
   // J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
   // In 64-bit mode, the address size prefix is jecxz and the unprefixed version
   // is jrcxz.
   let Uses = [ECX] in
     def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
-                            "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>;
+                            "jecxz\t$dst", [], IIC_JCXZ>, AdSize, Requires<[In64BitMode]>;
   let Uses = [RCX] in
     def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
-                           "jrcxz\t$dst", []>, Requires<[In64BitMode]>;
+                           "jrcxz\t$dst", [], IIC_JCXZ>, Requires<[In64BitMode]>;
 }
 
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
-                     [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
+                     [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>;
   def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
-                     [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
+                     [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>;
 
   def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
-                     [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
+                     [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>;
   def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
-                     [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
+                     [(brind (loadi64 addr:$dst))], IIC_JMP_MEM>, Requires<[In64BitMode]>;
 
   def FARJMP16i  : Iseg16<0xEA, RawFrmImm16, (outs),
                           (ins i16imm:$off, i16imm:$seg),
-                          "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+                          "ljmp{w}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>, OpSize;
   def FARJMP32i  : Iseg32<0xEA, RawFrmImm16, (outs),
                           (ins i32imm:$off, i16imm:$seg),
-                          "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
+                          "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>;
   def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
-                      "ljmp{q}\t{*}$dst", []>;
+                      "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
 
   def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
-                     "ljmp{w}\t{*}$dst", []>, OpSize;
+                     "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize;
   def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
-                     "ljmp{l}\t{*}$dst", []>;
+                     "ljmp{l}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
 }
 
 
 // Loop instructions
 
-def LOOP   : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
-def LOOPE  : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
-def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+def LOOP   : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", [], IIC_LOOP>;
+def LOOPE  : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", [], IIC_LOOPE>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", [], IIC_LOOPNE>;
 
 //===----------------------------------------------------------------------===//
 //  Call Instructions...
@@ -138,32 +146,30 @@ let isCall = 1 in
   // a use to prevent stack-pointer assignments that appear immediately
   // before calls from potentially appearing dead. Uses for argument
   // registers are added manually.
-  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [ESP] in {
+  let Uses = [ESP] in {
     def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
                            (outs), (ins i32imm_pcrel:$dst,variable_ops),
-                           "call{l}\t$dst", []>, Requires<[In32BitMode]>;
+                           "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
-                        "call{l}\t{*}$dst", [(X86call GR32:$dst)]>,
+                        "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
                          Requires<[In32BitMode]>;
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
-                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>,
                         Requires<[In32BitMode]>;
 
     def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs),
                              (ins i16imm:$off, i16imm:$seg),
-                             "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+                             "lcall{w}\t{$seg, $off|$off, $seg}", [],
+                             IIC_CALL_FAR_PTR>, OpSize;
     def FARCALL32i  : Iseg32<0x9A, RawFrmImm16, (outs),
                              (ins i32imm:$off, i16imm:$seg),
-                             "lcall{l}\t{$seg, $off|$off, $seg}", []>;
+                             "lcall{l}\t{$seg, $off|$off, $seg}", [],
+                             IIC_CALL_FAR_PTR>;
 
     def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
-                        "lcall{w}\t{*}$dst", []>, OpSize;
+                        "lcall{w}\t{*}$dst", [], IIC_CALL_FAR_MEM>, OpSize;
     def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
-                        "lcall{l}\t{*}$dst", []>;
+                        "lcall{l}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
 
     // callw for 16 bit code for the assembler.
     let isAsmParserOnly = 1 in
@@ -177,11 +183,7 @@ let isCall = 1 in
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
     isCodeGenOnly = 1 in
-  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [ESP] in {
+  let Uses = [ESP] in {
   def TCRETURNdi : PseudoI<(outs),
                      (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
   def TCRETURNri : PseudoI<(outs),
@@ -194,74 +196,43 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
   // mcinst.
   def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
                            (ins i32imm_pcrel:$dst, variable_ops),
-                 "jmp\t$dst  # TAILCALL",
-                 []>;
+                           "jmp\t$dst  # TAILCALL",
+                           [], IIC_JMP_REL>;
   def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
-                   "", []>;  // FIXME: Remove encoding when JIT is dead.
+                   "", [], IIC_JMP_REG>;  // FIXME: Remove encoding when JIT is dead.
   let mayLoad = 1 in
   def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
-                   "jmp{l}\t{*}$dst  # TAILCALL", []>;
+                   "jmp{l}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>;
 }
 
 
 //===----------------------------------------------------------------------===//
 //  Call Instructions...
 //
-let isCall = 1 in
-  // All calls clobber the non-callee saved registers. RSP is marked as
-  // a use to prevent stack-pointer assignments that appear immediately
-  // before calls from potentially appearing dead. Uses for argument
-  // registers are added manually.
-  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
-      Uses = [RSP] in {
-
-    // NOTE: this pattern doesn't match "X86call imm", because we do not know
-    // that the offset between an arbitrary immediate and the call will fit in
-    // the 32-bit pcrel field that we have.
-    def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
-                          (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
-                          "call{q}\t$dst", []>,
-                        Requires<[In64BitMode, NotWin64]>;
-    def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
-                          "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
-                        Requires<[In64BitMode, NotWin64]>;
-    def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
-                          "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
-                        Requires<[In64BitMode, NotWin64]>;
 
-    def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
-                         "lcall{q}\t{*}$dst", []>;
-  }
+// RSP is marked as a use to prevent stack-pointer assignments that appear
+// immediately before calls from potentially appearing dead. Uses for argument
+// registers are added manually.
+let isCall = 1, Uses = [RSP] in {
+  // NOTE: this pattern doesn't match "X86call imm", because we do not know
+  // that the offset between an arbitrary immediate and the call will fit in
+  // the 32-bit pcrel field that we have.
+  def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+                        (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                        "call{q}\t$dst", [], IIC_CALL_RI>,
+                      Requires<[In64BitMode]>;
+  def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+                        "call{q}\t{*}$dst", [(X86call GR64:$dst)],
+                        IIC_CALL_RI>,
+                      Requires<[In64BitMode]>;
+  def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+                        "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
+                        IIC_CALL_MEM>,
+                      Requires<[In64BitMode]>;
 
-  // FIXME: We need to teach codegen about single list of call-clobbered
-  // registers.
-let isCall = 1, isCodeGenOnly = 1 in
-  // All calls clobber the non-callee saved registers. RSP is marked as
-  // a use to prevent stack-pointer assignments that appear immediately
-  // before calls from potentially appearing dead. Uses for argument
-  // registers are added manually.
-  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
-              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
-      Uses = [RSP] in {
-    def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
-                             (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
-                             "call{q}\t$dst", []>,
-                           Requires<[IsWin64]>;
-    def WINCALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
-                             "call{q}\t{*}$dst",
-                             [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
-    def WINCALL64m       : I<0xFF, MRM2m, (outs),
-                              (ins i64mem:$dst,variable_ops),
-                             "call{q}\t{*}$dst",
-                             [(X86call (loadi64 addr:$dst))]>,
-                           Requires<[IsWin64]>;
-  }
+  def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+                       "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
+}
 
 let isCall = 1, isCodeGenOnly = 1 in
   // __chkstk(MSVC):     clobber R10, R11 and EFLAGS.
@@ -270,18 +241,13 @@ let isCall = 1, isCodeGenOnly = 1 in
       Uses = [RSP] in {
     def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
                       (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
-                      "call{q}\t$dst", []>,
+                      "call{q}\t$dst", [], IIC_CALL_RI>,
                     Requires<[IsWin64]>;
   }
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
     isCodeGenOnly = 1 in
-  // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
-  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
-              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
-              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
-              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
-      Uses = [RSP],
+  let Uses = [RSP],
       usesCustomInserter = 1 in {
   def TCRETURNdi64 : PseudoI<(outs),
                       (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
@@ -294,11 +260,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
 
   def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
                                       (ins i64i32imm_pcrel:$dst, variable_ops),
-                   "jmp\t$dst  # TAILCALL", []>;
+                   "jmp\t$dst  # TAILCALL", [], IIC_JMP_REL>;
   def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
-                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
+                     "jmp{q}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>;
 
   let mayLoad = 1 in
   def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
-                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
+                     "jmp{q}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>;
 }
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index e62e6b701f46..0d5490ad9cc1 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -1,10 +1,10 @@
-//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===//
-// 
+//===-- X86InstrExtension.td - Sign and Zero Extensions ----*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the sign and zero extension operations.
@@ -37,40 +37,47 @@ let neverHasSideEffects = 1 in {
 }
 
 
+
 // Sign/Zero extenders
 def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
-                   "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_R8>,
+                   TB, OpSize;
 def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
-                   "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+                   "movs{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVSX_R16_M8>,
+                   TB, OpSize;
 def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8:$src),
                    "movs{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sext GR8:$src))]>, TB;
+                   [(set GR32:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
 def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
                    "movs{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+                   [(set GR32:$dst, (sextloadi32i8 addr:$src))], IIC_MOVSX>, TB;
 def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
                    "movs{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sext GR16:$src))]>, TB;
+                   [(set GR32:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
 def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                    "movs{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+                   [(set GR32:$dst, (sextloadi32i16 addr:$src))], IIC_MOVSX>,
+                   TB;
 
 def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
-                   "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_R8>,
+                   TB, OpSize;
 def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
-                   "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+                   "movz{bw|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX_R16_M8>,
+                   TB, OpSize;
 def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
                    "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zext GR8:$src))]>, TB;
+                   [(set GR32:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
 def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
                    "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+                   [(set GR32:$dst, (zextloadi32i8 addr:$src))], IIC_MOVZX>, TB;
 def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
                    "movz{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zext GR16:$src))]>, TB;
+                   [(set GR32:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
 def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                    "movz{wl|x}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+                   [(set GR32:$dst, (zextloadi32i16 addr:$src))], IIC_MOVZX>,
+                   TB;
 
 // These are the same as the regular MOVZX32rr8 and MOVZX32rm8
 // except that they use GR32_NOREX for the output operand register class
@@ -78,12 +85,12 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
 def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
                          (outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
                          "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                         []>, TB;
+                         [], IIC_MOVZX>, TB;
 let mayLoad = 1 in
 def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
                          (outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
                          "movz{bl|x}\t{$src, $dst|$dst, $src}",
-                         []>, TB;
+                         [], IIC_MOVZX>, TB;
 
 // MOVSX64rr8 always has a REX prefix and it has an 8-bit register
 // operand, which makes it a rare instruction with an 8-bit register
@@ -91,32 +98,38 @@ def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
 // were generalized, this would require a special register class.
 def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
                     "movs{bq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR8:$src))]>, TB;
+                    [(set GR64:$dst, (sext GR8:$src))], IIC_MOVSX>, TB;
 def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
                     "movs{bq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+                    [(set GR64:$dst, (sextloadi64i8 addr:$src))], IIC_MOVSX>,
+                    TB;
 def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
                     "movs{wq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR16:$src))]>, TB;
+                    [(set GR64:$dst, (sext GR16:$src))], IIC_MOVSX>, TB;
 def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                     "movs{wq|x}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+                    [(set GR64:$dst, (sextloadi64i16 addr:$src))], IIC_MOVSX>,
+                    TB;
 def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
                     "movs{lq|xd}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sext GR32:$src))]>;
+                    [(set GR64:$dst, (sext GR32:$src))], IIC_MOVSX>;
 def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                     "movs{lq|xd}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+                    [(set GR64:$dst, (sextloadi64i32 addr:$src))], IIC_MOVSX>;
 
 // movzbq and movzwq encodings for the disassembler
 def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
-                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+                       "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+                       TB;
 def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
-                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+                       "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+                       TB;
 def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+                       "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+                       TB;
 def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+                       "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+                       TB;
 
 // FIXME: These should be Pat patterns.
 let isCodeGenOnly = 1 in {
@@ -124,15 +137,17 @@ let isCodeGenOnly = 1 in {
 // Use movzbl instead of movzbq when the destination is a register; it's
 // equivalent due to implicit zero-extending, and it has a smaller encoding.
 def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
-                   "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
+                   "", [(set GR64:$dst, (zext GR8:$src))], IIC_MOVZX>, TB;
 def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
-                   "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+                   "", [(set GR64:$dst, (zextloadi64i8 addr:$src))], IIC_MOVZX>,
+                   TB;
 // Use movzwl instead of movzwq when the destination is a register; it's
 // equivalent due to implicit zero-extending, and it has a smaller encoding.
 def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
-                   "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
+                   "", [(set GR64:$dst, (zext GR16:$src))], IIC_MOVZX>, TB;
 def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
-                   "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+                   "", [(set GR64:$dst, (zextloadi64i16 addr:$src))],
+                   IIC_MOVZX>, TB;
 
 // There's no movzlq instruction, but movl can be used for this purpose, using
 // implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
@@ -142,10 +157,9 @@ def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
 // necessarily all zero. In such cases, we fall back to these explicit zext
 // instructions.
 def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
-                    "", [(set GR64:$dst, (zext GR32:$src))]>;
+                    "", [(set GR64:$dst, (zext GR32:$src))], IIC_MOVZX>;
 def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
-                    "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
-
-
+                    "", [(set GR64:$dst, (zextloadi64i32 addr:$src))],
+                    IIC_MOVZX>;
 }
 
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index d868773d2d69..d57937b2e1b7 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -1,4 +1,4 @@
-//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//===-- X86InstrFMA.td - FMA Instruction Set ---------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,7 +15,7 @@
 // FMA3 - Intel 3 operand Fused Multiply-Add instructions
 //===----------------------------------------------------------------------===//
 
-multiclass fma_rm<bits<8> opc, string OpcodeStr> {
+multiclass fma3p_rm<bits<8> opc, string OpcodeStr> {
   def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -34,27 +34,187 @@ multiclass fma_rm<bits<8> opc, string OpcodeStr> {
            []>;
 }
 
-multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
-                     string OpcodeStr, string PackTy> {
-  defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
-  defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
-  defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                       string OpcodeStr, string PackTy> {
+  defm r132 : fma3p_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
+  defm r213 : fma3p_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
+  defm r231 : fma3p_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
 }
 
-let isAsmParserOnly = 1 in {
-  // Fused Multiply-Add
-  defm VFMADDPS    : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
-  defm VFMADDPD    : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
-  defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
-  defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
-  defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
-  defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
-  defm VFMSUBPS    : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
-  defm VFMSUBPD    : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+// Fused Multiply-Add
+let ExeDomain = SSEPackedSingle in {
+  defm VFMADDPS    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
+  defm VFMSUBPS    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
+  defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
+  defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
+}
+
+let ExeDomain = SSEPackedDouble in {
+  defm VFMADDPD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
+  defm VFMSUBPD    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+  defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
+  defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
+}
+
+// Fused Negative Multiply-Add
+let ExeDomain = SSEPackedSingle in {
+  defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
+  defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
+}
+let ExeDomain = SSEPackedDouble in {
+  defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
+  defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+}
+
+multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop> {
+  def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>;
+  def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, x86memop:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>;
+}
 
-  // Fused Negative Multiply-Add
-  defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
-  defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
-  defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
-  defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                       string OpcodeStr> {
+  defm SSr132 : fma3s_rm<opc132, !strconcat(OpcodeStr, "132ss"), f32mem>;
+  defm SSr213 : fma3s_rm<opc213, !strconcat(OpcodeStr, "213ss"), f32mem>;
+  defm SSr231 : fma3s_rm<opc231, !strconcat(OpcodeStr, "231ss"), f32mem>;
+  defm SDr132 : fma3s_rm<opc132, !strconcat(OpcodeStr, "132sd"), f64mem>, VEX_W;
+  defm SDr213 : fma3s_rm<opc213, !strconcat(OpcodeStr, "213sd"), f64mem>, VEX_W;
+  defm SDr231 : fma3s_rm<opc231, !strconcat(OpcodeStr, "231sd"), f64mem>, VEX_W;
 }
+
+defm VFMADD : fma3s_forms<0x99, 0xA9, 0xB9, "vfmadd">, VEX_LIG;
+defm VFMSUB : fma3s_forms<0x9B, 0xAB, 0xBB, "vfmsub">, VEX_LIG;
+
+defm VFNMADD : fma3s_forms<0x9D, 0xAD, 0xBD, "vfnmadd">, VEX_LIG;
+defm VFNMSUB : fma3s_forms<0x9F, 0xAF, 0xBF, "vfnmsub">, VEX_LIG;
+
+//===----------------------------------------------------------------------===//
+// FMA4 - AMD 4 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+
+multiclass fma4s<bits<8> opc, string OpcodeStr, Operand memop,
+                 ComplexPattern mem_cpat, Intrinsic Int> {
+  def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+  def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, memop:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (Int VR128:$src1, VR128:$src2, mem_cpat:$src3))]>, VEX_W, MemOp4;
+  def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, memop:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>;
+// For disassembler
+let isCodeGenOnly = 1 in
+  def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, VR128:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+}
+
+multiclass fma4p<bits<8> opc, string OpcodeStr,
+                 Intrinsic Int128, Intrinsic Int256,
+                 PatFrag ld_frag128, PatFrag ld_frag256> {
+  def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, MemOp4;
+  def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2,
+                              (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4;
+  def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (Int128 VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
+  def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst,
+             (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>, VEX_W, MemOp4;
+  def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2,
+                              (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4;
+  def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst,
+             (Int256 VR256:$src1, (ld_frag256 addr:$src2), VR256:$src3))]>;
+// For disassembler
+let isCodeGenOnly = 1 in {
+  def rr_REV : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2, VR128:$src3),
+               !strconcat(OpcodeStr,
+               "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+  def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+                (ins VR256:$src1, VR256:$src2, VR256:$src3),
+                !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
+} // isCodeGenOnly = 1
+}
+
+defm VFMADDSS4    : fma4s<0x6A, "vfmaddss", ssmem, sse_load_f32,
+                          int_x86_fma4_vfmadd_ss>;
+defm VFMADDSD4    : fma4s<0x6B, "vfmaddsd", sdmem, sse_load_f64,
+                          int_x86_fma4_vfmadd_sd>;
+defm VFMADDPS4    : fma4p<0x68, "vfmaddps", int_x86_fma4_vfmadd_ps,
+                          int_x86_fma4_vfmadd_ps_256, memopv4f32, memopv8f32>;
+defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", int_x86_fma4_vfmadd_pd,
+                          int_x86_fma4_vfmadd_pd_256, memopv2f64, memopv4f64>;
+defm VFMSUBSS4    : fma4s<0x6E, "vfmsubss", ssmem, sse_load_f32,
+                          int_x86_fma4_vfmsub_ss>;
+defm VFMSUBSD4    : fma4s<0x6F, "vfmsubsd", sdmem, sse_load_f64,
+                          int_x86_fma4_vfmsub_sd>;
+defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", int_x86_fma4_vfmsub_ps,
+                          int_x86_fma4_vfmsub_ps_256, memopv4f32, memopv8f32>;
+defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", int_x86_fma4_vfmsub_pd,
+                          int_x86_fma4_vfmsub_pd_256, memopv2f64, memopv4f64>;
+defm VFNMADDSS4   : fma4s<0x7A, "vfnmaddss", ssmem, sse_load_f32,
+                          int_x86_fma4_vfnmadd_ss>;
+defm VFNMADDSD4   : fma4s<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
+                          int_x86_fma4_vfnmadd_sd>;
+defm VFNMADDPS4   : fma4p<0x78, "vfnmaddps", int_x86_fma4_vfnmadd_ps,
+                          int_x86_fma4_vfnmadd_ps_256, memopv4f32, memopv8f32>;
+defm VFNMADDPD4   : fma4p<0x79, "vfnmaddpd", int_x86_fma4_vfnmadd_pd,
+                          int_x86_fma4_vfnmadd_pd_256, memopv2f64, memopv4f64>;
+defm VFNMSUBSS4   : fma4s<0x7E, "vfnmsubss", ssmem, sse_load_f32,
+                          int_x86_fma4_vfnmsub_ss>;
+defm VFNMSUBSD4   : fma4s<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
+                          int_x86_fma4_vfnmsub_sd>;
+defm VFNMSUBPS4   : fma4p<0x7C, "vfnmsubps", int_x86_fma4_vfnmsub_ps,
+                          int_x86_fma4_vfnmsub_ps_256, memopv4f32, memopv8f32>;
+defm VFNMSUBPD4   : fma4p<0x7D, "vfnmsubpd", int_x86_fma4_vfnmsub_pd,
+                          int_x86_fma4_vfnmsub_pd_256, memopv2f64, memopv4f64>;
+defm VFMADDSUBPS4 : fma4p<0x5C, "vfmaddsubps", int_x86_fma4_vfmaddsub_ps,
+                         int_x86_fma4_vfmaddsub_ps_256, memopv4f32, memopv8f32>;
+defm VFMADDSUBPD4 : fma4p<0x5D, "vfmaddsubpd", int_x86_fma4_vfmaddsub_pd,
+                         int_x86_fma4_vfmaddsub_pd_256, memopv2f64, memopv4f64>;
+defm VFMSUBADDPS4 : fma4p<0x5E, "vfmsubaddps", int_x86_fma4_vfmsubadd_ps,
+                         int_x86_fma4_vfmsubadd_ps_256, memopv4f32, memopv8f32>;
+defm VFMSUBADDPD4 : fma4p<0x5F, "vfmsubaddpd", int_x86_fma4_vfmsubadd_pd,
+                         int_x86_fma4_vfmsubadd_pd_256, memopv2f64, memopv4f64>;
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 7cb870fabd62..a13887e932b2 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -1,10 +1,10 @@
-//==- X86InstrFPStack.td - Describe the X86 Instruction Set --*- tablegen -*-=//
-// 
+//===- X86InstrFPStack.td - FPU Instruction Set ------------*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the X86 x87 FPU instruction set, defining the
@@ -225,22 +225,22 @@ class FPrST0PInst<bits<8> o, string asm>
 // of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
 // we have to put some 'r's in and take them out of weird places.
 def ADD_FST0r   : FPST0rInst <0xC0, "fadd\t$op">;
-def ADD_FrST0   : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">;
+def ADD_FrST0   : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, ST(0)}">;
 def ADD_FPrST0  : FPrST0PInst<0xC0, "faddp\t$op">;
 def SUBR_FST0r  : FPST0rInst <0xE8, "fsubr\t$op">;
-def SUB_FrST0   : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUB_FrST0   : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, ST(0)}">;
 def SUB_FPrST0  : FPrST0PInst<0xE8, "fsub{r}p\t$op">;
 def SUB_FST0r   : FPST0rInst <0xE0, "fsub\t$op">;
-def SUBR_FrST0  : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUBR_FrST0  : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, ST(0)}">;
 def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">;
 def MUL_FST0r   : FPST0rInst <0xC8, "fmul\t$op">;
-def MUL_FrST0   : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">;
+def MUL_FrST0   : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, ST(0)}">;
 def MUL_FPrST0  : FPrST0PInst<0xC8, "fmulp\t$op">;
 def DIVR_FST0r  : FPST0rInst <0xF8, "fdivr\t$op">;
-def DIV_FrST0   : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIV_FrST0   : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, ST(0)}">;
 def DIV_FPrST0  : FPrST0PInst<0xF8, "fdiv{r}p\t$op">;
 def DIV_FST0r   : FPST0rInst <0xF0, "fdiv\t$op">;
-def DIVR_FrST0  : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIVR_FrST0  : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, ST(0)}">;
 def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">;
 
 def COM_FST0r   : FPST0rInst <0xD0, "fcom\t$op">;
@@ -330,21 +330,21 @@ defm CMOVNP : FPCMov<X86_COND_NP>;
 let Predicates = [HasCMov] in {
 // These are not factored because there's no clean way to pass DA/DB.
 def CMOVB_F  : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA;
+                  "fcmovb\t{$op, %st(0)|ST(0), $op}">, DA;
 def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA;
+                  "fcmovbe\t{$op, %st(0)|ST(0), $op}">, DA;
 def CMOVE_F  : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA;
+                  "fcmove\t{$op, %st(0)|ST(0), $op}">, DA;
 def CMOVP_F  : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA;
+                  "fcmovu\t {$op, %st(0)|ST(0), $op}">, DA;
 def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB;
+                  "fcmovnb\t{$op, %st(0)|ST(0), $op}">, DB;
 def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB;
+                  "fcmovnbe\t{$op, %st(0)|ST(0), $op}">, DB;
 def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB;
+                  "fcmovne\t{$op, %st(0)|ST(0), $op}">, DB;
 def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
-                  "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB;
+                  "fcmovnu\t{$op, %st(0)|ST(0), $op}">, DB;
 } // Predicates = [HasCMov]
 
 // Floating point loads & stores.
@@ -437,33 +437,26 @@ def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
 }
 
 // FISTTP requires SSE3 even though it's a FPStack op.
+let Predicates = [HasSSE3] in {
 def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
-                    [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
-                    Requires<[HasSSE3]>;
+                    [(X86fp_to_i16mem RFP32:$src, addr:$op)]>;
 def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
-                    [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
-                    Requires<[HasSSE3]>;
+                    [(X86fp_to_i32mem RFP32:$src, addr:$op)]>;
 def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
-                    [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
-                    Requires<[HasSSE3]>;
+                    [(X86fp_to_i64mem RFP32:$src, addr:$op)]>;
 def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
-                    [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
-                    Requires<[HasSSE3]>;
+                    [(X86fp_to_i16mem RFP64:$src, addr:$op)]>;
 def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
-                    [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
-                    Requires<[HasSSE3]>;
+                    [(X86fp_to_i32mem RFP64:$src, addr:$op)]>;
 def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
-                    [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
-                    Requires<[HasSSE3]>;
+                    [(X86fp_to_i64mem RFP64:$src, addr:$op)]>;
 def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
-                    [(X86fp_to_i16mem RFP80:$src, addr:$op)]>,
-                    Requires<[HasSSE3]>;
+                    [(X86fp_to_i16mem RFP80:$src, addr:$op)]>;
 def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
-                    [(X86fp_to_i32mem RFP80:$src, addr:$op)]>,
-                    Requires<[HasSSE3]>;
+                    [(X86fp_to_i32mem RFP80:$src, addr:$op)]>;
 def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
-                    [(X86fp_to_i64mem RFP80:$src, addr:$op)]>,
-                    Requires<[HasSSE3]>;
+                    [(X86fp_to_i64mem RFP80:$src, addr:$op)]>;
+} // Predicates = [HasSSE3]
 
 let mayStore = 1 in {
 def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 0a1590b3e0a2..b3870906ab0a 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -1,10 +1,10 @@
-//===- X86InstrFormats.td - X86 Instruction Formats --------*- tablegen -*-===//
-// 
+//===-- X86InstrFormats.td - X86 Instruction Formats -------*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -43,6 +43,15 @@ def RawFrmImm8 : Format<43>;
 def RawFrmImm16 : Format<44>;
 def MRM_D0 : Format<45>;
 def MRM_D1 : Format<46>;
+def MRM_D4 : Format<47>;
+def MRM_D8 : Format<48>;
+def MRM_D9 : Format<49>;
+def MRM_DA : Format<50>;
+def MRM_DB : Format<51>;
+def MRM_DC : Format<52>;
+def MRM_DD : Format<53>;
+def MRM_DE : Format<54>;
+def MRM_DF : Format<55>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -107,17 +116,25 @@ class T8     { bits<5> Prefix = 13; }
 class TA     { bits<5> Prefix = 14; }
 class A6     { bits<5> Prefix = 15; }
 class A7     { bits<5> Prefix = 16; }
-class TF     { bits<5> Prefix = 17; }
+class T8XD   { bits<5> Prefix = 17; }
+class T8XS   { bits<5> Prefix = 18; }
+class TAXD   { bits<5> Prefix = 19; }
+class XOP8   { bits<5> Prefix = 20; }
+class XOP9   { bits<5> Prefix = 21; }
 class VEX    { bit hasVEXPrefix = 1; }
 class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_4VOp3 : VEX { bit hasVEX_4VOp3Prefix = 1; }
 class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
 class VEX_L  { bit hasVEX_L = 1; }
 class VEX_LIG { bit ignoresVEX_L = 1; }
 class Has3DNow0F0FOpcode  { bit has3DNow0F0FOpcode = 1; }
-
+class MemOp4 { bit hasMemOp4Prefix = 1; }
+class XOP { bit hasXOP_Prefix = 1; }
 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
-              string AsmStr, Domain d = GenericDomain>
+              string AsmStr,
+              InstrItinClass itin,
+              Domain d = GenericDomain>
   : Instruction {
   let Namespace = "X86";
 
@@ -133,6 +150,8 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   // If this is a pseudo instruction, mark it isCodeGenOnly.
   let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
 
+  let Itinerary = itin;
+
   //
   // Attributes specific to X86 instructions...
   //
@@ -148,11 +167,15 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   bit hasVEXPrefix = 0;     // Does this inst require a VEX prefix?
   bit hasVEX_WPrefix = 0;   // Does this inst set the VEX_W field?
   bit hasVEX_4VPrefix = 0;  // Does this inst require the VEX.VVVV field?
+  bit hasVEX_4VOp3Prefix = 0;  // Does this inst require the VEX.VVVV field to
+                               // encode the third operand?
   bit hasVEX_i8ImmReg = 0;  // Does this inst require the last source register
                             // to be encoded in a immediate field?
   bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
   bit ignoresVEX_L = 0;     // Does this instruction ignore the L-bit
   bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+  bit hasMemOp4Prefix = 0;  // Same bit as VEX_W, but used for swapping operands
+  bit hasXOP_Prefix = 0;    // Does this inst require an XOP prefix?
 
   // TSFlags layout should be kept in sync with X86InstrInfo.h.
   let TSFlags{5-0}   = FormBits;
@@ -169,58 +192,63 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   let TSFlags{33}    = hasVEXPrefix;
   let TSFlags{34}    = hasVEX_WPrefix;
   let TSFlags{35}    = hasVEX_4VPrefix;
-  let TSFlags{36}    = hasVEX_i8ImmReg;
-  let TSFlags{37}    = hasVEX_L;
-  let TSFlags{38}    = ignoresVEX_L;
-  let TSFlags{39}    = has3DNow0F0FOpcode;
+  let TSFlags{36}    = hasVEX_4VOp3Prefix;
+  let TSFlags{37}    = hasVEX_i8ImmReg;
+  let TSFlags{38}    = hasVEX_L;
+  let TSFlags{39}    = ignoresVEX_L;
+  let TSFlags{40}    = has3DNow0F0FOpcode;
+  let TSFlags{41}    = hasMemOp4Prefix;
+  let TSFlags{42}    = hasXOP_Prefix;
 }
 
 class PseudoI<dag oops, dag iops, list<dag> pattern>
-  : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+  : X86Inst<0, Pseudo, NoImm, oops, iops, "", NoItinerary> {
   let Pattern = pattern;
 }
 
 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
-        list<dag> pattern, Domain d = GenericDomain>
-  : X86Inst<o, f, NoImm, outs, ins, asm, d> {
+        list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+        Domain d = GenericDomain>
+  : X86Inst<o, f, NoImm, outs, ins, asm, itin, d> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern, Domain d = GenericDomain>
-  : X86Inst<o, f, Imm8, outs, ins, asm, d> {
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT,
+           Domain d = GenericDomain>
+  : X86Inst<o, f, Imm8, outs, ins, asm, itin, d> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
-               list<dag> pattern>
-  : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+               list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+  : X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern>
-  : X86Inst<o, f, Imm16, outs, ins, asm> {
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+  : X86Inst<o, f, Imm16, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern>
-  : X86Inst<o, f, Imm32, outs, ins, asm> {
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+  : X86Inst<o, f, Imm32, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern>
-  : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+           : X86Inst<o, f, Imm16PCRel, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
-           list<dag> pattern>
-  : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+  : X86Inst<o, f, Imm32PCRel, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
@@ -231,8 +259,9 @@ class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
   : I<o, F, outs, ins, asm, []> {}
 
 // FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
-class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
-  : X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern,
+           InstrItinClass itin = IIC_DEFAULT>
+  : X86Inst<0, Pseudo, NoImm, outs, ins, "", itin> {
   let FPForm = fp;
   let Pattern = pattern;
 }
@@ -244,20 +273,23 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
 //   Iseg32 - 16-bit segment selector, 32-bit offset
 
 class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-              list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> {
+              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : X86Inst<o, f, Imm16, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, 
-              list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> {
+              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : X86Inst<o, f, Imm32, outs, ins, asm, itin> {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 // SI - SSE 1 & 2 scalar instructions
-class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern> {
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm,
+         list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin> {
   let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
             !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
 
@@ -267,8 +299,8 @@ class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
 
 // SIi8 - SSE 1 & 2 scalar instructions
 class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern> {
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin> {
   let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
             !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
 
@@ -278,8 +310,8 @@ class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 
 // PI - SSE 1 & 2 packed instructions
 class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
-         Domain d>
-      : I<o, F, outs, ins, asm, pattern, d> {
+         InstrItinClass itin, Domain d>
+      : I<o, F, outs, ins, asm, pattern, itin, d> {
   let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
         !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
 
@@ -289,8 +321,8 @@ class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
 
 // PIi8 - SSE 1 & 2 packed instructions with immediate
 class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern, Domain d>
-      : Ii8<o, F, outs, ins, asm, pattern, d> {
+           list<dag> pattern, InstrItinClass itin, Domain d>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, d> {
   let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
         !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
 
@@ -306,25 +338,27 @@ class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   VSSI  - SSE1 instructions with XS prefix in AVX form.
 //   VPSI  - SSE1 instructions with TB prefix in AVX form.
 
-class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>;
 class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
-class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
         Requires<[HasSSE1]>;
 class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, TB,
         Requires<[HasSSE1]>;
 class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern>
-      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XS,
         Requires<[HasAVX]>;
 class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern>
-      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedSingle>, TB,
         Requires<[HasAVX]>;
 
 // SSE2 Instruction Templates:
@@ -337,28 +371,30 @@ class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   VSDI   - SSE2 instructions with XD prefix in AVX form.
 //   VPDI   - SSE2 instructions with TB and OpSize prefixes in AVX form.
 
-class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
 class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasSSE2]>;
 class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
              list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
-class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
         Requires<[HasSSE2]>;
 class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
         Requires<[HasSSE2]>;
 class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern>
-      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin>, XD,
         Requires<[HasAVX]>;
 class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern>
-      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB,
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, itin, SSEPackedDouble>, TB,
         OpSize, Requires<[HasAVX]>;
 
 // SSE3 Instruction Templates:
@@ -368,15 +404,16 @@ class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   S3DI  - SSE3 instructions with XD prefix.
 
 class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-           list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedSingle>, XS,
         Requires<[HasSSE3]>;
 class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-           list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, XD,
         Requires<[HasSSE3]>;
-class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm,
+          list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>, TB, OpSize,
         Requires<[HasSSE3]>;
 
 
@@ -386,16 +423,16 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
 //   SS3AI - SSSE3 instructions with TA prefix.
 //
 // Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
-// uses the MMX registers. We put those instructions here because they better
-// fit into the SSSE3 instruction category rather than the MMX category.
+// uses the MMX registers. The 64-bit versions are grouped with the MMX
+// classes. They need to be enabled even if AVX is enabled.
 
 class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[HasSSSE3]>;
 class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[HasSSSE3]>;
 
 // SSE4.1 Instruction Templates:
@@ -404,31 +441,31 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
 //
 class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[HasSSE41]>;
 class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[HasSSE41]>;
 
 // SSE4.2 Instruction Templates:
 // 
 //   SS428I - SSE 4.2 instructions with T8 prefix.
 class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
         Requires<[HasSSE42]>;
 
-//   SS42FI - SSE 4.2 instructions with TF prefix.
+//   SS42FI - SSE 4.2 instructions with T8XD prefix.
 class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
-              list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
-      
+             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin>, T8XD, Requires<[HasSSE42]>;
+
 //   SS42AI = SSE 4.2 instructions with TA prefix
 class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         Requires<[HasSSE42]>;
 
 // AVX Instruction Templates:
@@ -437,76 +474,115 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
 //   AVX8I - AVX instructions with T8 and OpSize prefix.
 //   AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
 class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
         Requires<[HasAVX]>;
 class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-              list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
+              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
         Requires<[HasAVX]>;
 
+// AVX2 Instruction Templates:
+//   Instructions introduced in AVX2 (no SSE equivalent forms)
+//
+//   AVX28I - AVX2 instructions with T8 and OpSize prefix.
+//   AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8, OpSize,
+        Requires<[HasAVX2]>;
+class AVX2AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA, OpSize,
+        Requires<[HasAVX2]>;
+
 // AES Instruction Templates:
 //
 // AES8I
 // These use the same encoding as the SSE4.2 T8 and TA encodings.
 class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag>pattern>
-      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
-        Requires<[HasAES]>;
+            list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, T8,
+        Requires<[HasSSE2, HasAES]>;
 
 class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
-        Requires<[HasAES]>;
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+        Requires<[HasSSE2, HasAES]>;
 
 // CLMUL Instruction Templates
 class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-               list<dag>pattern>
-      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
-        OpSize, Requires<[HasCLMUL]>;
+               list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+        OpSize, Requires<[HasSSE2, HasCLMUL]>;
 
 class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
-                  list<dag>pattern>
-      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+                  list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
         OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
 
 // FMA3 Instruction Templates
 class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag>pattern>
-      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+           list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin>, T8,
         OpSize, VEX_4V, Requires<[HasFMA3]>;
 
+// FMA4 Instruction Templates
+class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+        OpSize, VEX_4V, VEX_I8IMM, Requires<[HasFMA4]>;
+
+// XOP 2, 3 and 4 Operand Instruction Template
+class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
+         XOP, XOP9, Requires<[HasXOP]>;
+
+// XOP 2, 3 and 4 Operand Instruction Templates with imm byte
+class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
+         XOP, XOP8, Requires<[HasXOP]>;
+
+//  XOP 5 operand instruction (VEX encoding!)
+class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TA,
+        OpSize, VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
+
 // X86-64 Instruction templates...
 //
 
-class RI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm,
+         list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
-             list<dag> pattern>
-      : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii32<o, F, outs, ins, asm, pattern, itin>, REX_W;
 
 class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
-            list<dag> pattern>
-  : X86Inst<o, f, Imm64, outs, ins, asm>, REX_W {
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+  : X86Inst<o, f, Imm64, outs, ins, asm, itin>, REX_W {
   let Pattern = pattern;
   let CodeSize = 3;
 }
 
 class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern>
-      : SSI<o, F, outs, ins, asm, pattern>, REX_W;
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : SSI<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern>
-      : SDI<o, F, outs, ins, asm, pattern>, REX_W;
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : SDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern>
-      : PDI<o, F, outs, ins, asm, pattern>, REX_W;
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : PDI<o, F, outs, ins, asm, pattern, itin>, REX_W;
 class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
-           list<dag> pattern>
-      : VPDI<o, F, outs, ins, asm, pattern>, VEX_W;
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : VPDI<o, F, outs, ins, asm, pattern, itin>, VEX_W;
 
 // MMX Instruction templates
 //
@@ -519,23 +595,23 @@ class VRPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
 // MMXID  - MMX instructions with XD prefix.
 // MMXIS  - MMX instructions with XS prefix.
 class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-           list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+           list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
 class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, 
-             list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX,In64BitMode]>;
+             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX,In64BitMode]>;
 class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX]>;
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin>, TB, REX_W, Requires<[HasMMX]>;
 class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasMMX]>;
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : I<o, F, outs, ins, asm, pattern, itin>, TB, OpSize, Requires<[HasMMX]>;
 class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, 
-             list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+             list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin>, TB, Requires<[HasMMX]>;
 class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin>, XD, Requires<[HasMMX]>;
 class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, 
-            list<dag> pattern>
-      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
+            list<dag> pattern, InstrItinClass itin = IIC_DEFAULT>
+      : Ii8<o, F, outs, ins, asm, pattern, itin>, XS, Requires<[HasMMX]>;
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index af919fba8ee4..041a64f336f8 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -1,10 +1,10 @@
-//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====//
+//===-- X86InstrFragmentsSIMD.td - x86 SIMD ISA ------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file provides pattern fragments useful for SIMD instructions.
@@ -41,24 +41,20 @@ def X86fsrl    : SDNode<"X86ISD::FSRL",      SDTX86FPShiftOp>;
 def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
 def X86fhadd   : SDNode<"X86ISD::FHADD",     SDTFPBinOp>;
 def X86fhsub   : SDNode<"X86ISD::FHSUB",     SDTFPBinOp>;
+def X86hadd    : SDNode<"X86ISD::HADD",      SDTIntBinOp>;
+def X86hsub    : SDNode<"X86ISD::HSUB",      SDTIntBinOp>;
 def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest>;
 def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
 def X86cmpss   : SDNode<"X86ISD::FSETCCss",    SDTX86Cmpss>;
 def X86cmpsd   : SDNode<"X86ISD::FSETCCsd",    SDTX86Cmpsd>;
 def X86pshufb  : SDNode<"X86ISD::PSHUFB",
-                 SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
 def X86andnp   : SDNode<"X86ISD::ANDNP",
                  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
-def X86psignb  : SDNode<"X86ISD::PSIGNB",
-                 SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
-                                      SDTCisSameAs<0,2>]>>;
-def X86psignw  : SDNode<"X86ISD::PSIGNW",
-                 SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
-                                      SDTCisSameAs<0,2>]>>;
-def X86psignd  : SDNode<"X86ISD::PSIGND",
-                 SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+def X86psign   : SDNode<"X86ISD::PSIGN",
+                 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
 def X86pextrb  : SDNode<"X86ISD::PEXTRB",
                  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
@@ -75,20 +71,30 @@ def X86insrtps : SDNode<"X86ISD::INSERTPS",
                                       SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
 def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
                  SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vsmovl  : SDNode<"X86ISD::VSEXT_MOVL",
+                 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>, SDTCisInt<0>]>>;
+                 
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
-def X86vshl    : SDNode<"X86ISD::VSHL",      SDTIntShiftOp>;
-def X86vshr    : SDNode<"X86ISD::VSRL",      SDTIntShiftOp>;
-def X86cmpps   : SDNode<"X86ISD::CMPPS",     SDTX86VFCMP>;
-def X86cmppd   : SDNode<"X86ISD::CMPPD",     SDTX86VFCMP>;
-def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
-def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
-def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
-def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;
+def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;
+def X86cmpp    : SDNode<"X86ISD::CMPP",      SDTX86VFCMP>;
+def X86pcmpeq  : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgt  : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
+
+def X86vshl    : SDNode<"X86ISD::VSHL",
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                      SDTCisVec<2>]>>;
+def X86vsrl    : SDNode<"X86ISD::VSRL",
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                      SDTCisVec<2>]>>;
+def X86vsra    : SDNode<"X86ISD::VSRA",
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                      SDTCisVec<2>]>>;
+
+def X86vshli   : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
+def X86vsrli   : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
+def X86vsrai   : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
 
 def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
                                           SDTCisVec<1>,
@@ -96,6 +102,17 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
 def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
 def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
 
+def X86vpcom   : SDNode<"X86ISD::VPCOM",
+                        SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>;
+def X86vpcomu  : SDNode<"X86ISD::VPCOMU",
+                        SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>;
+
+def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                      SDTCisSameAs<1,2>]>>;
+
 // Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
 // translated into one of the target nodes below during lowering.
 // Note: this is a work in progress...
@@ -109,6 +126,8 @@ def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                  SDTCisSameAs<0,2>, SDTCisInt<3>]>;
 
 def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+SDTCisSameAs<1,2>, SDTCisVT<3, i32>]>;
 
 def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
 
@@ -116,8 +135,7 @@ def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
 def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
 def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
 
-def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
-def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
+def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>;
 
 def X86Movddup  : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
 def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
@@ -129,40 +147,23 @@ def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
 def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
 def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
 def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
-def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
 
 def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
 def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
 
-def X86Unpcklps  : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
-def X86Unpcklpd  : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
-def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
-def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
-
-def X86Unpckhps  : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
-def X86Unpckhpd  : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
-def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>;
-def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>;
-
-def X86Punpcklbw  : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
-def X86Punpcklwd  : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
-def X86Punpckldq  : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
-def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
-
-def X86Punpckhbw  : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
-def X86Punpckhwd  : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
-def X86Punpckhdq  : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
-def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
+def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
 
-def X86VPermilps  : SDNode<"X86ISD::VPERMILPS", SDTShuff2OpI>;
-def X86VPermilpsy : SDNode<"X86ISD::VPERMILPSY", SDTShuff2OpI>;
-def X86VPermilpd  : SDNode<"X86ISD::VPERMILPD", SDTShuff2OpI>;
-def X86VPermilpdy : SDNode<"X86ISD::VPERMILPDY", SDTShuff2OpI>;
+def X86VPermilp  : SDNode<"X86ISD::VPERMILP", SDTShuff2OpI>;
 
-def X86VPerm2f128 : SDNode<"X86ISD::VPERM2F128", SDTShuff3OpI>;
+def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
 
 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
 
+def X86Blendpw : SDNode<"X86ISD::BLENDPW", SDTBlend>;
+def X86Blendps : SDNode<"X86ISD::BLENDPS", SDTBlend>;
+def X86Blendpd : SDNode<"X86ISD::BLENDPD", SDTBlend>;
+
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
 //===----------------------------------------------------------------------===//
@@ -195,15 +196,15 @@ def sdmem : Operand<v2f64> {
 //===----------------------------------------------------------------------===//
 
 // 128-bit load pattern fragments
+// NOTE: all 128-bit integer vector loads are promoted to v2i64
 def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
 def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
-def loadv4i32    : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
 def loadv2i64    : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
 
 // 256-bit load pattern fragments
+// NOTE: all 256-bit integer vector loads are promoted to v4i64
 def loadv8f32    : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
 def loadv4f64    : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
-def loadv8i32    : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
 def loadv4i64    : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
 
 // Like 'store', but always requires 128-bit vector alignment.
@@ -223,6 +224,11 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
   return cast<LoadSDNode>(N)->getAlignment() >= 16;
 }]>;
 
+// Like 'X86vzload', but always requires 128-bit vector alignment.
+def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{
+  return cast<MemSDNode>(N)->getAlignment() >= 16;
+}]>;
+
 // Like 'load', but always requires 256-bit vector alignment.
 def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
   return cast<LoadSDNode>(N)->getAlignment() >= 32;
@@ -234,22 +240,20 @@ def alignedloadfsf64 : PatFrag<(ops node:$ptr),
                                (f64 (alignedload node:$ptr))>;
 
 // 128-bit aligned load pattern fragments
+// NOTE: all 128-bit integer vector loads are promoted to v2i64
 def alignedloadv4f32 : PatFrag<(ops node:$ptr),
                                (v4f32 (alignedload node:$ptr))>;
 def alignedloadv2f64 : PatFrag<(ops node:$ptr),
                                (v2f64 (alignedload node:$ptr))>;
-def alignedloadv4i32 : PatFrag<(ops node:$ptr),
-                               (v4i32 (alignedload node:$ptr))>;
 def alignedloadv2i64 : PatFrag<(ops node:$ptr),
                                (v2i64 (alignedload node:$ptr))>;
 
 // 256-bit aligned load pattern fragments
+// NOTE: all 256-bit integer vector loads are promoted to v4i64
 def alignedloadv8f32 : PatFrag<(ops node:$ptr),
                                (v8f32 (alignedload256 node:$ptr))>;
 def alignedloadv4f64 : PatFrag<(ops node:$ptr),
                                (v4f64 (alignedload256 node:$ptr))>;
-def alignedloadv8i32 : PatFrag<(ops node:$ptr),
-                               (v8i32 (alignedload256 node:$ptr))>;
 def alignedloadv4i64 : PatFrag<(ops node:$ptr),
                                (v4i64 (alignedload256 node:$ptr))>;
 
@@ -268,19 +272,16 @@ def memopfsf32 : PatFrag<(ops node:$ptr), (f32   (memop node:$ptr))>;
 def memopfsf64 : PatFrag<(ops node:$ptr), (f64   (memop node:$ptr))>;
 
 // 128-bit memop pattern fragments
+// NOTE: all 128-bit integer vector loads are promoted to v2i64
 def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
 def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
-def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
 def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
-def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
-def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
 
 // 256-bit memop pattern fragments
-def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
+// NOTE: all 256-bit integer vector loads are promoted to v4i64
 def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
 def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
 def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
-def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
 
 // SSSE3 uses MMX registers for some instructions. They aren't aligned on a
 // 16-byte boundary.
@@ -326,6 +327,8 @@ def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
 def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
 
 // 256-bit bitconvert pattern fragments
+def bc_v32i8 : PatFrag<(ops node:$in), (v32i8 (bitconvert node:$in))>;
+def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
 def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
 def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
 
@@ -350,30 +353,6 @@ def BYTE_imm  : SDNodeXForm<imm, [{
   return getI32Imm(N->getZExtValue() >> 3);
 }]>;
 
-// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
-// SHUFP* etc. imm.
-def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
-  return getI8Imm(X86::getShuffleSHUFImmediate(N));
-}]>;
-
-// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
-// PSHUFHW imm.
-def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
-  return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
-}]>;
-
-// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
-// PSHUFLW imm.
-def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
-  return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
-}]>;
-
-// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
-// a PALIGNR imm.
-def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
-  return getI8Imm(X86::getShufflePALIGNRImmediate(N));
-}]>;
-
 // EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
 // to VEXTRACTF128 imm.
 def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
@@ -386,72 +365,6 @@ def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
   return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
 }]>;
 
-def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
-                       (vector_shuffle node:$lhs, node:$rhs), [{
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
-}]>;
-
-def movddup : PatFrag<(ops node:$lhs, node:$rhs),
-                      (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
-                      (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
-                            (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
-                      (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movlp : PatFrag<(ops node:$lhs, node:$rhs),
-                    (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movl : PatFrag<(ops node:$lhs, node:$rhs),
-                   (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
-                     (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
-                     (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
-                     (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_shuf_imm>;
-
-def shufp : PatFrag<(ops node:$lhs, node:$rhs),
-                    (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_shuf_imm>;
-
-def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
-                      (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_pshufhw_imm>;
-
-def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
-                      (vector_shuffle node:$lhs, node:$rhs), [{
-  return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_pshuflw_imm>;
-
 def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
                                    (extract_subvector node:$bigvec,
                                                       node:$index), [{
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 3a02de0aa01b..307c96b8c43f 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//===-- X86InstrInfo.cpp - X86 Instruction Information --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -25,14 +25,13 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include <limits>
 
 #define GET_INSTRINFO_CTOR
@@ -83,6 +82,12 @@ enum {
   TB_FOLDED_STORE = 1 << 19
 };
 
+struct X86OpTblEntry {
+  uint16_t RegOp;
+  uint16_t MemOp;
+  uint32_t Flags;
+};
+
 X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
                      ? X86::ADJCALLSTACKDOWN64
@@ -92,7 +97,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
                      : X86::ADJCALLSTACKUP32)),
     TM(tm), RI(tm, *this) {
 
-  static const unsigned OpTbl2Addr[][3] = {
+  static const X86OpTblEntry OpTbl2Addr[] = {
     { X86::ADC32ri,     X86::ADC32mi,    0 },
     { X86::ADC32ri8,    X86::ADC32mi8,   0 },
     { X86::ADC32rr,     X86::ADC32mr,    0 },
@@ -260,22 +265,21 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
-    unsigned RegOp = OpTbl2Addr[i][0];
-    unsigned MemOp = OpTbl2Addr[i][1];
-    unsigned Flags = OpTbl2Addr[i][2];
+    unsigned RegOp = OpTbl2Addr[i].RegOp;
+    unsigned MemOp = OpTbl2Addr[i].MemOp;
+    unsigned Flags = OpTbl2Addr[i].Flags;
     AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 0, folded load and store, no alignment requirement.
                   Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
   }
 
-  static const unsigned OpTbl0[][3] = {
+  static const X86OpTblEntry OpTbl0[] = {
     { X86::BT16ri8,     X86::BT16mi8,       TB_FOLDED_LOAD },
     { X86::BT32ri8,     X86::BT32mi8,       TB_FOLDED_LOAD },
     { X86::BT64ri8,     X86::BT64mi8,       TB_FOLDED_LOAD },
     { X86::CALL32r,     X86::CALL32m,       TB_FOLDED_LOAD },
     { X86::CALL64r,     X86::CALL64m,       TB_FOLDED_LOAD },
-    { X86::WINCALL64r,  X86::WINCALL64m,    TB_FOLDED_LOAD },
     { X86::CMP16ri,     X86::CMP16mi,       TB_FOLDED_LOAD },
     { X86::CMP16ri8,    X86::CMP16mi8,      TB_FOLDED_LOAD },
     { X86::CMP16rr,     X86::CMP16mr,       TB_FOLDED_LOAD },
@@ -352,6 +356,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr,  TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::FsVMOVAPDrr, X86::VMOVSDmr,      TB_FOLDED_STORE | TB_NO_REVERSE },
     { X86::FsVMOVAPSrr, X86::VMOVSSmr,      TB_FOLDED_STORE | TB_NO_REVERSE },
+    { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::VMOVAPDrr,   X86::VMOVAPDmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::VMOVAPSrr,   X86::VMOVAPSmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::VMOVDQArr,   X86::VMOVDQAmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -362,6 +367,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VMOVUPDrr,   X86::VMOVUPDmr,     TB_FOLDED_STORE },
     { X86::VMOVUPSrr,   X86::VMOVUPSmr,     TB_FOLDED_STORE },
     // AVX 256-bit foldable instructions
+    { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::VMOVAPDYrr,  X86::VMOVAPDYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
     { X86::VMOVAPSYrr,  X86::VMOVAPSYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
     { X86::VMOVDQAYrr,  X86::VMOVDQAYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
@@ -370,14 +376,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
-    unsigned RegOp      = OpTbl0[i][0];
-    unsigned MemOp      = OpTbl0[i][1];
-    unsigned Flags      = OpTbl0[i][2];
+    unsigned RegOp      = OpTbl0[i].RegOp;
+    unsigned MemOp      = OpTbl0[i].MemOp;
+    unsigned Flags      = OpTbl0[i].Flags;
     AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable,
                   RegOp, MemOp, TB_INDEX_0 | Flags);
   }
 
-  static const unsigned OpTbl1[][3] = {
+  static const X86OpTblEntry OpTbl1[] = {
     { X86::CMP16rr,         X86::CMP16rm,             0 },
     { X86::CMP32rr,         X86::CMP32rm,             0 },
     { X86::CMP64rr,         X86::CMP64rm,             0 },
@@ -456,6 +462,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOVZX64rr16,     X86::MOVZX64rm16,         0 },
     { X86::MOVZX64rr32,     X86::MOVZX64rm32,         0 },
     { X86::MOVZX64rr8,      X86::MOVZX64rm8,          0 },
+    { X86::PABSBrr128,      X86::PABSBrm128,          TB_ALIGN_16 },
+    { X86::PABSDrr128,      X86::PABSDrm128,          TB_ALIGN_16 },
+    { X86::PABSWrr128,      X86::PABSWrm128,          TB_ALIGN_16 },
     { X86::PSHUFDri,        X86::PSHUFDmi,            TB_ALIGN_16 },
     { X86::PSHUFHWri,       X86::PSHUFHWmi,           TB_ALIGN_16 },
     { X86::PSHUFLWri,       X86::PSHUFLWmi,           TB_ALIGN_16 },
@@ -508,6 +517,11 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VMOVZDI2PDIrr,   X86::VMOVZDI2PDIrm,       0 },
     { X86::VMOVZQI2PQIrr,   X86::VMOVZQI2PQIrm,       0 },
     { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm,    TB_ALIGN_16 },
+    { X86::VPABSBrr128,     X86::VPABSBrm128,         TB_ALIGN_16 },
+    { X86::VPABSDrr128,     X86::VPABSDrm128,         TB_ALIGN_16 },
+    { X86::VPABSWrr128,     X86::VPABSWrm128,         TB_ALIGN_16 },
+    { X86::VPERMILPDri,     X86::VPERMILPDmi,         TB_ALIGN_16 },
+    { X86::VPERMILPSri,     X86::VPERMILPSmi,         TB_ALIGN_16 },
     { X86::VPSHUFDri,       X86::VPSHUFDmi,           TB_ALIGN_16 },
     { X86::VPSHUFHWri,      X86::VPSHUFHWmi,          TB_ALIGN_16 },
     { X86::VPSHUFLWri,      X86::VPSHUFLWmi,          TB_ALIGN_16 },
@@ -524,22 +538,39 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     // AVX 256-bit foldable instructions
     { X86::VMOVAPDYrr,      X86::VMOVAPDYrm,          TB_ALIGN_32 },
     { X86::VMOVAPSYrr,      X86::VMOVAPSYrm,          TB_ALIGN_32 },
-    { X86::VMOVDQAYrr,      X86::VMOVDQAYrm,          TB_ALIGN_16 },
+    { X86::VMOVDQAYrr,      X86::VMOVDQAYrm,          TB_ALIGN_32 },
     { X86::VMOVUPDYrr,      X86::VMOVUPDYrm,          0 },
-    { X86::VMOVUPSYrr,      X86::VMOVUPSYrm,          0 }
+    { X86::VMOVUPSYrr,      X86::VMOVUPSYrm,          0 },
+    { X86::VPERMILPDYri,    X86::VPERMILPDYmi,        TB_ALIGN_32 },
+    { X86::VPERMILPSYri,    X86::VPERMILPSYmi,        TB_ALIGN_32 },
+    // AVX2 foldable instructions
+    { X86::VPABSBrr256,     X86::VPABSBrm256,         TB_ALIGN_32 },
+    { X86::VPABSDrr256,     X86::VPABSDrm256,         TB_ALIGN_32 },
+    { X86::VPABSWrr256,     X86::VPABSWrm256,         TB_ALIGN_32 },
+    { X86::VPSHUFDYri,      X86::VPSHUFDYmi,          TB_ALIGN_32 },
+    { X86::VPSHUFHWYri,     X86::VPSHUFHWYmi,         TB_ALIGN_32 },
+    { X86::VPSHUFLWYri,     X86::VPSHUFLWYmi,         TB_ALIGN_32 },
+    { X86::VRCPPSYr,        X86::VRCPPSYm,            TB_ALIGN_32 },
+    { X86::VRCPPSYr_Int,    X86::VRCPPSYm_Int,        TB_ALIGN_32 },
+    { X86::VRSQRTPSYr,      X86::VRSQRTPSYm,          TB_ALIGN_32 },
+    { X86::VRSQRTPSYr_Int,  X86::VRSQRTPSYm_Int,      TB_ALIGN_32 },
+    { X86::VSQRTPDYr,       X86::VSQRTPDYm,           TB_ALIGN_32 },
+    { X86::VSQRTPDYr_Int,   X86::VSQRTPDYm_Int,       TB_ALIGN_32 },
+    { X86::VSQRTPSYr,       X86::VSQRTPSYm,           TB_ALIGN_32 },
+    { X86::VSQRTPSYr_Int,   X86::VSQRTPSYm_Int,       TB_ALIGN_32 },
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
-    unsigned RegOp = OpTbl1[i][0];
-    unsigned MemOp = OpTbl1[i][1];
-    unsigned Flags = OpTbl1[i][2];
+    unsigned RegOp = OpTbl1[i].RegOp;
+    unsigned MemOp = OpTbl1[i].MemOp;
+    unsigned Flags = OpTbl1[i].Flags;
     AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 1, folded load
                   Flags | TB_INDEX_1 | TB_FOLDED_LOAD);
   }
 
-  static const unsigned OpTbl2[][3] = {
+  static const X86OpTblEntry OpTbl2[] = {
     { X86::ADC32rr,         X86::ADC32rm,       0 },
     { X86::ADC64rr,         X86::ADC64rm,       0 },
     { X86::ADD16rr,         X86::ADD16rm,       0 },
@@ -563,6 +594,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::ANDNPSrr,        X86::ANDNPSrm,      TB_ALIGN_16 },
     { X86::ANDPDrr,         X86::ANDPDrm,       TB_ALIGN_16 },
     { X86::ANDPSrr,         X86::ANDPSrm,       TB_ALIGN_16 },
+    { X86::BLENDPDrri,      X86::BLENDPDrmi,    TB_ALIGN_16 },
+    { X86::BLENDPSrri,      X86::BLENDPSrmi,    TB_ALIGN_16 },
+    { X86::BLENDVPDrr0,     X86::BLENDVPDrm0,   TB_ALIGN_16 },
+    { X86::BLENDVPSrr0,     X86::BLENDVPSrm0,   TB_ALIGN_16 },
     { X86::CMOVA16rr,       X86::CMOVA16rm,     0 },
     { X86::CMOVA32rr,       X86::CMOVA32rm,     0 },
     { X86::CMOVA64rr,       X86::CMOVA64rm,     0 },
@@ -652,6 +687,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MINSDrr_Int,     X86::MINSDrm_Int,   0 },
     { X86::MINSSrr,         X86::MINSSrm,       0 },
     { X86::MINSSrr_Int,     X86::MINSSrm_Int,   0 },
+    { X86::MPSADBWrri,      X86::MPSADBWrmi,    TB_ALIGN_16 },
     { X86::MULPDrr,         X86::MULPDrm,       TB_ALIGN_16 },
     { X86::MULPSrr,         X86::MULPSrm,       TB_ALIGN_16 },
     { X86::MULSDrr,         X86::MULSDrm,       0 },
@@ -664,30 +700,45 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::ORPSrr,          X86::ORPSrm,        TB_ALIGN_16 },
     { X86::PACKSSDWrr,      X86::PACKSSDWrm,    TB_ALIGN_16 },
     { X86::PACKSSWBrr,      X86::PACKSSWBrm,    TB_ALIGN_16 },
+    { X86::PACKUSDWrr,      X86::PACKUSDWrm,    TB_ALIGN_16 },
     { X86::PACKUSWBrr,      X86::PACKUSWBrm,    TB_ALIGN_16 },
     { X86::PADDBrr,         X86::PADDBrm,       TB_ALIGN_16 },
     { X86::PADDDrr,         X86::PADDDrm,       TB_ALIGN_16 },
     { X86::PADDQrr,         X86::PADDQrm,       TB_ALIGN_16 },
     { X86::PADDSBrr,        X86::PADDSBrm,      TB_ALIGN_16 },
     { X86::PADDSWrr,        X86::PADDSWrm,      TB_ALIGN_16 },
+    { X86::PADDUSBrr,       X86::PADDUSBrm,     TB_ALIGN_16 },
+    { X86::PADDUSWrr,       X86::PADDUSWrm,     TB_ALIGN_16 },
     { X86::PADDWrr,         X86::PADDWrm,       TB_ALIGN_16 },
+    { X86::PALIGNR128rr,    X86::PALIGNR128rm,  TB_ALIGN_16 },
     { X86::PANDNrr,         X86::PANDNrm,       TB_ALIGN_16 },
     { X86::PANDrr,          X86::PANDrm,        TB_ALIGN_16 },
     { X86::PAVGBrr,         X86::PAVGBrm,       TB_ALIGN_16 },
     { X86::PAVGWrr,         X86::PAVGWrm,       TB_ALIGN_16 },
+    { X86::PBLENDWrri,      X86::PBLENDWrmi,    TB_ALIGN_16 },
     { X86::PCMPEQBrr,       X86::PCMPEQBrm,     TB_ALIGN_16 },
     { X86::PCMPEQDrr,       X86::PCMPEQDrm,     TB_ALIGN_16 },
+    { X86::PCMPEQQrr,       X86::PCMPEQQrm,     TB_ALIGN_16 },
     { X86::PCMPEQWrr,       X86::PCMPEQWrm,     TB_ALIGN_16 },
     { X86::PCMPGTBrr,       X86::PCMPGTBrm,     TB_ALIGN_16 },
     { X86::PCMPGTDrr,       X86::PCMPGTDrm,     TB_ALIGN_16 },
+    { X86::PCMPGTQrr,       X86::PCMPGTQrm,     TB_ALIGN_16 },
     { X86::PCMPGTWrr,       X86::PCMPGTWrm,     TB_ALIGN_16 },
+    { X86::PHADDDrr,        X86::PHADDDrm,      TB_ALIGN_16 },
+    { X86::PHADDWrr,        X86::PHADDWrm,      TB_ALIGN_16 },
+    { X86::PHADDSWrr128,    X86::PHADDSWrm128,  TB_ALIGN_16 },
+    { X86::PHSUBDrr,        X86::PHSUBDrm,      TB_ALIGN_16 },
+    { X86::PHSUBSWrr128,    X86::PHSUBSWrm128,  TB_ALIGN_16 },
+    { X86::PHSUBWrr,        X86::PHSUBWrm,      TB_ALIGN_16 },
     { X86::PINSRWrri,       X86::PINSRWrmi,     TB_ALIGN_16 },
+    { X86::PMADDUBSWrr128,  X86::PMADDUBSWrm128, TB_ALIGN_16 },
     { X86::PMADDWDrr,       X86::PMADDWDrm,     TB_ALIGN_16 },
     { X86::PMAXSWrr,        X86::PMAXSWrm,      TB_ALIGN_16 },
     { X86::PMAXUBrr,        X86::PMAXUBrm,      TB_ALIGN_16 },
     { X86::PMINSWrr,        X86::PMINSWrm,      TB_ALIGN_16 },
     { X86::PMINUBrr,        X86::PMINUBrm,      TB_ALIGN_16 },
     { X86::PMULDQrr,        X86::PMULDQrm,      TB_ALIGN_16 },
+    { X86::PMULHRSWrr128,   X86::PMULHRSWrm128, TB_ALIGN_16 },
     { X86::PMULHUWrr,       X86::PMULHUWrm,     TB_ALIGN_16 },
     { X86::PMULHWrr,        X86::PMULHWrm,      TB_ALIGN_16 },
     { X86::PMULLDrr,        X86::PMULLDrm,      TB_ALIGN_16 },
@@ -695,6 +746,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::PMULUDQrr,       X86::PMULUDQrm,     TB_ALIGN_16 },
     { X86::PORrr,           X86::PORrm,         TB_ALIGN_16 },
     { X86::PSADBWrr,        X86::PSADBWrm,      TB_ALIGN_16 },
+    { X86::PSHUFBrr,        X86::PSHUFBrm,      TB_ALIGN_16 },
+    { X86::PSIGNBrr,        X86::PSIGNBrm,      TB_ALIGN_16 },
+    { X86::PSIGNWrr,        X86::PSIGNWrm,      TB_ALIGN_16 },
+    { X86::PSIGNDrr,        X86::PSIGNDrm,      TB_ALIGN_16 },
     { X86::PSLLDrr,         X86::PSLLDrm,       TB_ALIGN_16 },
     { X86::PSLLQrr,         X86::PSLLQrm,       TB_ALIGN_16 },
     { X86::PSLLWrr,         X86::PSLLWrm,       TB_ALIGN_16 },
@@ -778,6 +833,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VANDNPSrr,         X86::VANDNPSrm,          TB_ALIGN_16 },
     { X86::VANDPDrr,          X86::VANDPDrm,           TB_ALIGN_16 },
     { X86::VANDPSrr,          X86::VANDPSrm,           TB_ALIGN_16 },
+    { X86::VBLENDPDrri,       X86::VBLENDPDrmi,        TB_ALIGN_16 },
+    { X86::VBLENDPSrri,       X86::VBLENDPSrmi,        TB_ALIGN_16 },
+    { X86::VBLENDVPDrr,       X86::VBLENDVPDrm,        TB_ALIGN_16 },
+    { X86::VBLENDVPSrr,       X86::VBLENDVPSrm,        TB_ALIGN_16 },
     { X86::VCMPPDrri,         X86::VCMPPDrmi,          TB_ALIGN_16 },
     { X86::VCMPPSrri,         X86::VCMPPSrmi,          TB_ALIGN_16 },
     { X86::VCMPSDrr,          X86::VCMPSDrm,           0 },
@@ -816,6 +875,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VMINSDrr_Int,      X86::VMINSDrm_Int,       0 },
     { X86::VMINSSrr,          X86::VMINSSrm,           0 },
     { X86::VMINSSrr_Int,      X86::VMINSSrm_Int,       0 },
+    { X86::VMPSADBWrri,       X86::VMPSADBWrmi,        TB_ALIGN_16 },
     { X86::VMULPDrr,          X86::VMULPDrm,           TB_ALIGN_16 },
     { X86::VMULPSrr,          X86::VMULPSrm,           TB_ALIGN_16 },
     { X86::VMULSDrr,          X86::VMULSDrm,           0 },
@@ -824,28 +884,47 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VORPSrr,           X86::VORPSrm,            TB_ALIGN_16 },
     { X86::VPACKSSDWrr,       X86::VPACKSSDWrm,        TB_ALIGN_16 },
     { X86::VPACKSSWBrr,       X86::VPACKSSWBrm,        TB_ALIGN_16 },
+    { X86::VPACKUSDWrr,       X86::VPACKUSDWrm,        TB_ALIGN_16 },
     { X86::VPACKUSWBrr,       X86::VPACKUSWBrm,        TB_ALIGN_16 },
     { X86::VPADDBrr,          X86::VPADDBrm,           TB_ALIGN_16 },
     { X86::VPADDDrr,          X86::VPADDDrm,           TB_ALIGN_16 },
     { X86::VPADDQrr,          X86::VPADDQrm,           TB_ALIGN_16 },
     { X86::VPADDSBrr,         X86::VPADDSBrm,          TB_ALIGN_16 },
     { X86::VPADDSWrr,         X86::VPADDSWrm,          TB_ALIGN_16 },
+    { X86::VPADDUSBrr,        X86::VPADDUSBrm,         TB_ALIGN_16 },
+    { X86::VPADDUSWrr,        X86::VPADDUSWrm,         TB_ALIGN_16 },
     { X86::VPADDWrr,          X86::VPADDWrm,           TB_ALIGN_16 },
+    { X86::VPALIGNR128rr,     X86::VPALIGNR128rm,      TB_ALIGN_16 },
     { X86::VPANDNrr,          X86::VPANDNrm,           TB_ALIGN_16 },
     { X86::VPANDrr,           X86::VPANDrm,            TB_ALIGN_16 },
+    { X86::VPAVGBrr,          X86::VPAVGBrm,           TB_ALIGN_16 },
+    { X86::VPAVGWrr,          X86::VPAVGWrm,           TB_ALIGN_16 },
+    { X86::VPBLENDWrri,       X86::VPBLENDWrmi,        TB_ALIGN_16 },
     { X86::VPCMPEQBrr,        X86::VPCMPEQBrm,         TB_ALIGN_16 },
     { X86::VPCMPEQDrr,        X86::VPCMPEQDrm,         TB_ALIGN_16 },
+    { X86::VPCMPEQQrr,        X86::VPCMPEQQrm,         TB_ALIGN_16 },
     { X86::VPCMPEQWrr,        X86::VPCMPEQWrm,         TB_ALIGN_16 },
     { X86::VPCMPGTBrr,        X86::VPCMPGTBrm,         TB_ALIGN_16 },
     { X86::VPCMPGTDrr,        X86::VPCMPGTDrm,         TB_ALIGN_16 },
+    { X86::VPCMPGTQrr,        X86::VPCMPGTQrm,         TB_ALIGN_16 },
     { X86::VPCMPGTWrr,        X86::VPCMPGTWrm,         TB_ALIGN_16 },
+    { X86::VPHADDDrr,         X86::VPHADDDrm,          TB_ALIGN_16 },
+    { X86::VPHADDSWrr128,     X86::VPHADDSWrm128,      TB_ALIGN_16 },
+    { X86::VPHADDWrr,         X86::VPHADDWrm,          TB_ALIGN_16 },
+    { X86::VPHSUBDrr,         X86::VPHSUBDrm,          TB_ALIGN_16 },
+    { X86::VPHSUBSWrr128,     X86::VPHSUBSWrm128,      TB_ALIGN_16 },
+    { X86::VPHSUBWrr,         X86::VPHSUBWrm,          TB_ALIGN_16 },
+    { X86::VPERMILPDrr,       X86::VPERMILPDrm,        TB_ALIGN_16 },
+    { X86::VPERMILPSrr,       X86::VPERMILPSrm,        TB_ALIGN_16 },
     { X86::VPINSRWrri,        X86::VPINSRWrmi,         TB_ALIGN_16 },
+    { X86::VPMADDUBSWrr128,   X86::VPMADDUBSWrm128,    TB_ALIGN_16 },
     { X86::VPMADDWDrr,        X86::VPMADDWDrm,         TB_ALIGN_16 },
     { X86::VPMAXSWrr,         X86::VPMAXSWrm,          TB_ALIGN_16 },
     { X86::VPMAXUBrr,         X86::VPMAXUBrm,          TB_ALIGN_16 },
     { X86::VPMINSWrr,         X86::VPMINSWrm,          TB_ALIGN_16 },
     { X86::VPMINUBrr,         X86::VPMINUBrm,          TB_ALIGN_16 },
     { X86::VPMULDQrr,         X86::VPMULDQrm,          TB_ALIGN_16 },
+    { X86::VPMULHRSWrr128,    X86::VPMULHRSWrm128,     TB_ALIGN_16 },
     { X86::VPMULHUWrr,        X86::VPMULHUWrm,         TB_ALIGN_16 },
     { X86::VPMULHWrr,         X86::VPMULHWrm,          TB_ALIGN_16 },
     { X86::VPMULLDrr,         X86::VPMULLDrm,          TB_ALIGN_16 },
@@ -853,6 +932,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VPMULUDQrr,        X86::VPMULUDQrm,         TB_ALIGN_16 },
     { X86::VPORrr,            X86::VPORrm,             TB_ALIGN_16 },
     { X86::VPSADBWrr,         X86::VPSADBWrm,          TB_ALIGN_16 },
+    { X86::VPSHUFBrr,         X86::VPSHUFBrm,          TB_ALIGN_16 },
+    { X86::VPSIGNBrr,         X86::VPSIGNBrm,          TB_ALIGN_16 },
+    { X86::VPSIGNWrr,         X86::VPSIGNWrm,          TB_ALIGN_16 },
+    { X86::VPSIGNDrr,         X86::VPSIGNDrm,          TB_ALIGN_16 },
     { X86::VPSLLDrr,          X86::VPSLLDrm,           TB_ALIGN_16 },
     { X86::VPSLLQrr,          X86::VPSLLQrm,           TB_ALIGN_16 },
     { X86::VPSLLWrr,          X86::VPSLLWrm,           TB_ALIGN_16 },
@@ -886,14 +969,154 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::VUNPCKLPDrr,       X86::VUNPCKLPDrm,        TB_ALIGN_16 },
     { X86::VUNPCKLPSrr,       X86::VUNPCKLPSrm,        TB_ALIGN_16 },
     { X86::VXORPDrr,          X86::VXORPDrm,           TB_ALIGN_16 },
-    { X86::VXORPSrr,          X86::VXORPSrm,           TB_ALIGN_16 }
+    { X86::VXORPSrr,          X86::VXORPSrm,           TB_ALIGN_16 },
+    // AVX 256-bit foldable instructions
+    { X86::VADDPDYrr,         X86::VADDPDYrm,          TB_ALIGN_32 },
+    { X86::VADDPSYrr,         X86::VADDPSYrm,          TB_ALIGN_32 },
+    { X86::VADDSUBPDYrr,      X86::VADDSUBPDYrm,       TB_ALIGN_32 },
+    { X86::VADDSUBPSYrr,      X86::VADDSUBPSYrm,       TB_ALIGN_32 },
+    { X86::VANDNPDYrr,        X86::VANDNPDYrm,         TB_ALIGN_32 },
+    { X86::VANDNPSYrr,        X86::VANDNPSYrm,         TB_ALIGN_32 },
+    { X86::VANDPDYrr,         X86::VANDPDYrm,          TB_ALIGN_32 },
+    { X86::VANDPSYrr,         X86::VANDPSYrm,          TB_ALIGN_32 },
+    { X86::VBLENDPDYrri,      X86::VBLENDPDYrmi,       TB_ALIGN_32 },
+    { X86::VBLENDPSYrri,      X86::VBLENDPSYrmi,       TB_ALIGN_32 },
+    { X86::VBLENDVPDYrr,      X86::VBLENDVPDYrm,       TB_ALIGN_32 },
+    { X86::VBLENDVPSYrr,      X86::VBLENDVPSYrm,       TB_ALIGN_32 },
+    { X86::VCMPPDYrri,        X86::VCMPPDYrmi,         TB_ALIGN_32 },
+    { X86::VCMPPSYrri,        X86::VCMPPSYrmi,         TB_ALIGN_32 },
+    { X86::VDIVPDYrr,         X86::VDIVPDYrm,          TB_ALIGN_32 },
+    { X86::VDIVPSYrr,         X86::VDIVPSYrm,          TB_ALIGN_32 },
+    { X86::VHADDPDYrr,        X86::VHADDPDYrm,         TB_ALIGN_32 },
+    { X86::VHADDPSYrr,        X86::VHADDPSYrm,         TB_ALIGN_32 },
+    { X86::VHSUBPDYrr,        X86::VHSUBPDYrm,         TB_ALIGN_32 },
+    { X86::VHSUBPSYrr,        X86::VHSUBPSYrm,         TB_ALIGN_32 },
+    { X86::VINSERTF128rr,     X86::VINSERTF128rm,      TB_ALIGN_32 },
+    { X86::VMAXPDYrr,         X86::VMAXPDYrm,          TB_ALIGN_32 },
+    { X86::VMAXPDYrr_Int,     X86::VMAXPDYrm_Int,      TB_ALIGN_32 },
+    { X86::VMAXPSYrr,         X86::VMAXPSYrm,          TB_ALIGN_32 },
+    { X86::VMAXPSYrr_Int,     X86::VMAXPSYrm_Int,      TB_ALIGN_32 },
+    { X86::VMINPDYrr,         X86::VMINPDYrm,          TB_ALIGN_32 },
+    { X86::VMINPDYrr_Int,     X86::VMINPDYrm_Int,      TB_ALIGN_32 },
+    { X86::VMINPSYrr,         X86::VMINPSYrm,          TB_ALIGN_32 },
+    { X86::VMINPSYrr_Int,     X86::VMINPSYrm_Int,      TB_ALIGN_32 },
+    { X86::VMULPDYrr,         X86::VMULPDYrm,          TB_ALIGN_32 },
+    { X86::VMULPSYrr,         X86::VMULPSYrm,          TB_ALIGN_32 },
+    { X86::VORPDYrr,          X86::VORPDYrm,           TB_ALIGN_32 },
+    { X86::VORPSYrr,          X86::VORPSYrm,           TB_ALIGN_32 },
+    { X86::VPERM2F128rr,      X86::VPERM2F128rm,       TB_ALIGN_32 },
+    { X86::VPERMILPDYrr,      X86::VPERMILPDYrm,       TB_ALIGN_32 },
+    { X86::VPERMILPSYrr,      X86::VPERMILPSYrm,       TB_ALIGN_32 },
+    { X86::VSHUFPDYrri,       X86::VSHUFPDYrmi,        TB_ALIGN_32 },
+    { X86::VSHUFPSYrri,       X86::VSHUFPSYrmi,        TB_ALIGN_32 },
+    { X86::VSUBPDYrr,         X86::VSUBPDYrm,          TB_ALIGN_32 },
+    { X86::VSUBPSYrr,         X86::VSUBPSYrm,          TB_ALIGN_32 },
+    { X86::VUNPCKHPDYrr,      X86::VUNPCKHPDYrm,       TB_ALIGN_32 },
+    { X86::VUNPCKHPSYrr,      X86::VUNPCKHPSYrm,       TB_ALIGN_32 },
+    { X86::VUNPCKLPDYrr,      X86::VUNPCKLPDYrm,       TB_ALIGN_32 },
+    { X86::VUNPCKLPSYrr,      X86::VUNPCKLPSYrm,       TB_ALIGN_32 },
+    { X86::VXORPDYrr,         X86::VXORPDYrm,          TB_ALIGN_32 },
+    { X86::VXORPSYrr,         X86::VXORPSYrm,          TB_ALIGN_32 },
+    // AVX2 foldable instructions
+    { X86::VINSERTI128rr,     X86::VINSERTI128rm,      TB_ALIGN_16 },
+    { X86::VPACKSSDWYrr,      X86::VPACKSSDWYrm,       TB_ALIGN_32 },
+    { X86::VPACKSSWBYrr,      X86::VPACKSSWBYrm,       TB_ALIGN_32 },
+    { X86::VPACKUSDWYrr,      X86::VPACKUSDWYrm,       TB_ALIGN_32 },
+    { X86::VPACKUSWBYrr,      X86::VPACKUSWBYrm,       TB_ALIGN_32 },
+    { X86::VPADDBYrr,         X86::VPADDBYrm,          TB_ALIGN_32 },
+    { X86::VPADDDYrr,         X86::VPADDDYrm,          TB_ALIGN_32 },
+    { X86::VPADDQYrr,         X86::VPADDQYrm,          TB_ALIGN_32 },
+    { X86::VPADDSBYrr,        X86::VPADDSBYrm,         TB_ALIGN_32 },
+    { X86::VPADDSWYrr,        X86::VPADDSWYrm,         TB_ALIGN_32 },
+    { X86::VPADDUSBYrr,       X86::VPADDUSBYrm,        TB_ALIGN_32 },
+    { X86::VPADDUSWYrr,       X86::VPADDUSWYrm,        TB_ALIGN_32 },
+    { X86::VPADDWYrr,         X86::VPADDWYrm,          TB_ALIGN_32 },
+    { X86::VPALIGNR256rr,     X86::VPALIGNR256rm,      TB_ALIGN_32 },
+    { X86::VPANDNYrr,         X86::VPANDNYrm,          TB_ALIGN_32 },
+    { X86::VPANDYrr,          X86::VPANDYrm,           TB_ALIGN_32 },
+    { X86::VPAVGBYrr,         X86::VPAVGBYrm,          TB_ALIGN_32 },
+    { X86::VPAVGWYrr,         X86::VPAVGWYrm,          TB_ALIGN_32 },
+    { X86::VPBLENDDrri,       X86::VPBLENDDrmi,        TB_ALIGN_32 },
+    { X86::VPBLENDDYrri,      X86::VPBLENDDYrmi,       TB_ALIGN_32 },
+    { X86::VPBLENDWYrri,      X86::VPBLENDWYrmi,       TB_ALIGN_32 },
+    { X86::VPCMPEQBYrr,       X86::VPCMPEQBYrm,        TB_ALIGN_32 },
+    { X86::VPCMPEQDYrr,       X86::VPCMPEQDYrm,        TB_ALIGN_32 },
+    { X86::VPCMPEQQYrr,       X86::VPCMPEQQYrm,        TB_ALIGN_32 },
+    { X86::VPCMPEQWYrr,       X86::VPCMPEQWYrm,        TB_ALIGN_32 },
+    { X86::VPCMPGTBYrr,       X86::VPCMPGTBYrm,        TB_ALIGN_32 },
+    { X86::VPCMPGTDYrr,       X86::VPCMPGTDYrm,        TB_ALIGN_32 },
+    { X86::VPCMPGTQYrr,       X86::VPCMPGTQYrm,        TB_ALIGN_32 },
+    { X86::VPCMPGTWYrr,       X86::VPCMPGTWYrm,        TB_ALIGN_32 },
+    { X86::VPERM2I128rr,      X86::VPERM2I128rm,       TB_ALIGN_32 },
+    { X86::VPERMDYrr,         X86::VPERMDYrm,          TB_ALIGN_32 },
+    { X86::VPERMPDYrr,        X86::VPERMPDYrm,         TB_ALIGN_32 },
+    { X86::VPERMPSYrr,        X86::VPERMPSYrm,         TB_ALIGN_32 },
+    { X86::VPERMQYrr,         X86::VPERMQYrm,          TB_ALIGN_32 },
+    { X86::VPHADDDYrr,        X86::VPHADDDYrm,         TB_ALIGN_32 },
+    { X86::VPHADDSWrr256,     X86::VPHADDSWrm256,      TB_ALIGN_32 },
+    { X86::VPHADDWYrr,        X86::VPHADDWYrm,         TB_ALIGN_32 },
+    { X86::VPHSUBDYrr,        X86::VPHSUBDYrm,         TB_ALIGN_32 },
+    { X86::VPHSUBSWrr256,     X86::VPHSUBSWrm256,      TB_ALIGN_32 },
+    { X86::VPHSUBWYrr,        X86::VPHSUBWYrm,         TB_ALIGN_32 },
+    { X86::VPMADDUBSWrr256,   X86::VPMADDUBSWrm256,    TB_ALIGN_32 },
+    { X86::VPMADDWDYrr,       X86::VPMADDWDYrm,        TB_ALIGN_32 },
+    { X86::VPMAXSWYrr,        X86::VPMAXSWYrm,         TB_ALIGN_32 },
+    { X86::VPMAXUBYrr,        X86::VPMAXUBYrm,         TB_ALIGN_32 },
+    { X86::VPMINSWYrr,        X86::VPMINSWYrm,         TB_ALIGN_32 },
+    { X86::VPMINUBYrr,        X86::VPMINUBYrm,         TB_ALIGN_32 },
+    { X86::VMPSADBWYrri,      X86::VMPSADBWYrmi,       TB_ALIGN_32 },
+    { X86::VPMULDQYrr,        X86::VPMULDQYrm,         TB_ALIGN_32 },
+    { X86::VPMULHRSWrr256,    X86::VPMULHRSWrm256,     TB_ALIGN_32 },
+    { X86::VPMULHUWYrr,       X86::VPMULHUWYrm,        TB_ALIGN_32 },
+    { X86::VPMULHWYrr,        X86::VPMULHWYrm,         TB_ALIGN_32 },
+    { X86::VPMULLDYrr,        X86::VPMULLDYrm,         TB_ALIGN_32 },
+    { X86::VPMULLWYrr,        X86::VPMULLWYrm,         TB_ALIGN_32 },
+    { X86::VPMULUDQYrr,       X86::VPMULUDQYrm,        TB_ALIGN_32 },
+    { X86::VPORYrr,           X86::VPORYrm,            TB_ALIGN_32 },
+    { X86::VPSADBWYrr,        X86::VPSADBWYrm,         TB_ALIGN_32 },
+    { X86::VPSHUFBYrr,        X86::VPSHUFBYrm,         TB_ALIGN_32 },
+    { X86::VPSIGNBYrr,        X86::VPSIGNBYrm,         TB_ALIGN_32 },
+    { X86::VPSIGNWYrr,        X86::VPSIGNWYrm,         TB_ALIGN_32 },
+    { X86::VPSIGNDYrr,        X86::VPSIGNDYrm,         TB_ALIGN_32 },
+    { X86::VPSLLDYrr,         X86::VPSLLDYrm,          TB_ALIGN_16 },
+    { X86::VPSLLQYrr,         X86::VPSLLQYrm,          TB_ALIGN_16 },
+    { X86::VPSLLWYrr,         X86::VPSLLWYrm,          TB_ALIGN_16 },
+    { X86::VPSLLVDrr,         X86::VPSLLVDrm,          TB_ALIGN_16 },
+    { X86::VPSLLVDYrr,        X86::VPSLLVDYrm,         TB_ALIGN_32 },
+    { X86::VPSLLVQrr,         X86::VPSLLVQrm,          TB_ALIGN_16 },
+    { X86::VPSLLVQYrr,        X86::VPSLLVQYrm,         TB_ALIGN_32 },
+    { X86::VPSRADYrr,         X86::VPSRADYrm,          TB_ALIGN_16 },
+    { X86::VPSRAWYrr,         X86::VPSRAWYrm,          TB_ALIGN_16 },
+    { X86::VPSRAVDrr,         X86::VPSRAVDrm,          TB_ALIGN_16 },
+    { X86::VPSRAVDYrr,        X86::VPSRAVDYrm,         TB_ALIGN_32 },
+    { X86::VPSRLDYrr,         X86::VPSRLDYrm,          TB_ALIGN_16 },
+    { X86::VPSRLQYrr,         X86::VPSRLQYrm,          TB_ALIGN_16 },
+    { X86::VPSRLWYrr,         X86::VPSRLWYrm,          TB_ALIGN_16 },
+    { X86::VPSRLVDrr,         X86::VPSRLVDrm,          TB_ALIGN_16 },
+    { X86::VPSRLVDYrr,        X86::VPSRLVDYrm,         TB_ALIGN_32 },
+    { X86::VPSRLVQrr,         X86::VPSRLVQrm,          TB_ALIGN_16 },
+    { X86::VPSRLVQYrr,        X86::VPSRLVQYrm,         TB_ALIGN_32 },
+    { X86::VPSUBBYrr,         X86::VPSUBBYrm,          TB_ALIGN_32 },
+    { X86::VPSUBDYrr,         X86::VPSUBDYrm,          TB_ALIGN_32 },
+    { X86::VPSUBSBYrr,        X86::VPSUBSBYrm,         TB_ALIGN_32 },
+    { X86::VPSUBSWYrr,        X86::VPSUBSWYrm,         TB_ALIGN_32 },
+    { X86::VPSUBWYrr,         X86::VPSUBWYrm,          TB_ALIGN_32 },
+    { X86::VPUNPCKHBWYrr,     X86::VPUNPCKHBWYrm,      TB_ALIGN_32 },
+    { X86::VPUNPCKHDQYrr,     X86::VPUNPCKHDQYrm,      TB_ALIGN_32 },
+    { X86::VPUNPCKHQDQYrr,    X86::VPUNPCKHQDQYrm,     TB_ALIGN_16 },
+    { X86::VPUNPCKHWDYrr,     X86::VPUNPCKHWDYrm,      TB_ALIGN_32 },
+    { X86::VPUNPCKLBWYrr,     X86::VPUNPCKLBWYrm,      TB_ALIGN_32 },
+    { X86::VPUNPCKLDQYrr,     X86::VPUNPCKLDQYrm,      TB_ALIGN_32 },
+    { X86::VPUNPCKLQDQYrr,    X86::VPUNPCKLQDQYrm,     TB_ALIGN_32 },
+    { X86::VPUNPCKLWDYrr,     X86::VPUNPCKLWDYrm,      TB_ALIGN_32 },
+    { X86::VPXORYrr,          X86::VPXORYrm,           TB_ALIGN_32 },
     // FIXME: add AVX 256-bit foldable instructions
   };
 
   for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
-    unsigned RegOp = OpTbl2[i][0];
-    unsigned MemOp = OpTbl2[i][1];
-    unsigned Flags = OpTbl2[i][2];
+    unsigned RegOp = OpTbl2[i].RegOp;
+    unsigned MemOp = OpTbl2[i].MemOp;
+    unsigned Flags = OpTbl2[i].Flags;
     AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable,
                   RegOp, MemOp,
                   // Index 2, folded load
@@ -946,7 +1169,6 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
     switch (MI.getOpcode()) {
     default:
       llvm_unreachable(0);
-      break;
     case X86::MOVSX16rr8:
     case X86::MOVZX16rr8:
     case X86::MOVSX32rr8:
@@ -989,7 +1211,8 @@ bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
 
 static bool isFrameLoadOpcode(int Opcode) {
   switch (Opcode) {
-  default: break;
+  default:
+    return false;
   case X86::MOV8rm:
   case X86::MOV16rm:
   case X86::MOV32rm:
@@ -1011,9 +1234,7 @@ static bool isFrameLoadOpcode(int Opcode) {
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
     return true;
-    break;
   }
-  return false;
 }
 
 static bool isFrameStoreOpcode(int Opcode) {
@@ -1203,6 +1424,8 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
     bool SeenDef = false;
     for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
       MachineOperand &MO = Iter->getOperand(j);
+      if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+        SeenDef = true;
       if (!MO.isReg())
         continue;
       if (MO.getReg() == X86::EFLAGS) {
@@ -1247,6 +1470,10 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
     bool SawKill = false;
     for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
       MachineOperand &MO = Iter->getOperand(j);
+      // A register mask may clobber EFLAGS, but we should still look for a
+      // live EFLAGS def.
+      if (MO.isRegMask() && MO.clobbersPhysReg(X86::EFLAGS))
+        SawKill = true;
       if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
         if (MO.isDef()) return MO.isDead();
         if (MO.isKill()) SawKill = true;
@@ -1357,7 +1584,6 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
   switch (MIOpc) {
   default:
     llvm_unreachable(0);
-    break;
   case X86::SHL16ri: {
     unsigned ShAmt = MI->getOperand(2).getImm();
     MIB.addReg(0).addImm(1 << ShAmt)
@@ -1392,9 +1618,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
       leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
       // Build and insert into an implicit UNDEF value. This is OK because
       // well be shifting and then extracting the lower 16-bits.
-      BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+      BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF),leaInReg2);
       InsMI2 =
-        BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+        BuildMI(*MFI, &*MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
         .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
         .addReg(Src2, getKillRegState(isKill2));
       addRegReg(MIB, leaInReg, true, leaInReg2, true);
@@ -1469,6 +1695,24 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       .addReg(B, getKillRegState(isKill)).addImm(M);
     break;
   }
+  case X86::SHUFPDrri: {
+    assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
+    if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+    unsigned B = MI->getOperand(1).getReg();
+    unsigned C = MI->getOperand(2).getReg();
+    if (B != C) return 0;
+    unsigned A = MI->getOperand(0).getReg();
+    unsigned M = MI->getOperand(3).getImm();
+
+    // Convert to PSHUFD mask.
+    M = ((M & 1) << 1) | ((M & 1) << 3) | ((M & 2) << 4) | ((M & 2) << 6)| 0x44;
+
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+      .addReg(A, RegState::Define | getDeadRegState(isDead))
+      .addReg(B, getKillRegState(isKill)).addImm(M);
+    break;
+  }
   case X86::SHL64ri: {
     assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
     // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
@@ -1597,7 +1841,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     case X86::ADD32rr_DB: {
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
       unsigned Opc;
-      TargetRegisterClass *RC;
+      const TargetRegisterClass *RC;
       if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
         Opc = X86::LEA64r;
         RC = X86::GR64_NOSPRegisterClass;
@@ -1904,13 +2148,12 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
 }
 
 bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const MCInstrDesc &MCID = MI->getDesc();
-  if (!MCID.isTerminator()) return false;
+  if (!MI->isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (MCID.isBranch() && !MCID.isBarrier())
+  if (MI->isBranch() && !MI->isBarrier())
     return true;
-  if (!MCID.isPredicable())
+  if (!MI->isPredicable())
     return true;
   return !isPredicated(MI);
 }
@@ -1936,7 +2179,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
 
     // A terminator that isn't a branch can't easily be handled by this
     // analysis.
-    if (!I->getDesc().isBranch())
+    if (!I->isBranch())
       return true;
 
     // Handle unconditional branches.
@@ -2420,7 +2663,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
   switch (MI->getOpcode()) {
   case X86::V_SET0:
-    return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr));
+  case X86::FsFLD0SS:
+  case X86::FsFLD0SD:
+    return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr));
   case X86::TEST8ri_NOREX:
     MI->setDesc(get(X86::TEST8ri));
     return true;
@@ -2624,6 +2869,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
 ///
 static bool hasPartialRegUpdate(unsigned Opcode) {
   switch (Opcode) {
+  case X86::CVTSI2SSrr:
+  case X86::CVTSI2SS64rr:
+  case X86::CVTSI2SDrr:
+  case X86::CVTSI2SD64rr:
   case X86::CVTSD2SSrr:
   case X86::Int_CVTSD2SSrr:
   case X86::CVTSS2SDrr:
@@ -2631,7 +2880,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
   case X86::RCPSSr:
   case X86::RCPSSr_Int:
   case X86::ROUNDSDr:
+  case X86::ROUNDSDr_Int:
   case X86::ROUNDSSr:
+  case X86::ROUNDSSr_Int:
   case X86::RSQRTSSr:
   case X86::RSQRTSSr_Int:
   case X86::SQRTSSr:
@@ -2643,7 +2894,9 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
   case X86::Int_VCVTSS2SDrr:
   case X86::VRCPSSr:
   case X86::VROUNDSDr:
+  case X86::VROUNDSDr_Int:
   case X86::VROUNDSSr:
+  case X86::VROUNDSSr_Int:
   case X86::VRSQRTSSr:
   case X86::VSQRTSSr:
     return true;
@@ -2652,6 +2905,54 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
   return false;
 }
 
+/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle
+/// instructions we would like before a partial register update.
+unsigned X86InstrInfo::
+getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+                             const TargetRegisterInfo *TRI) const {
+  if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode()))
+    return 0;
+
+  // If MI is marked as reading Reg, the partial register update is wanted.
+  const MachineOperand &MO = MI->getOperand(0);
+  unsigned Reg = MO.getReg();
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    if (MO.readsReg() || MI->readsVirtualRegister(Reg))
+      return 0;
+  } else {
+    if (MI->readsRegister(Reg, TRI))
+      return 0;
+  }
+
+  // If any of the preceding 16 instructions are reading Reg, insert a
+  // dependency breaking instruction.  The magic number is based on a few
+  // Nehalem experiments.
+  return 16;
+}
+
+void X86InstrInfo::
+breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+                          const TargetRegisterInfo *TRI) const {
+  unsigned Reg = MI->getOperand(OpNum).getReg();
+  if (X86::VR128RegClass.contains(Reg)) {
+    // These instructions are all floating point domain, so xorps is the best
+    // choice.
+    bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
+    unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
+    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
+      .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+  } else if (X86::VR256RegClass.contains(Reg)) {
+    // Use vxorps to clear the full ymm register.
+    // It wants to read and write the xmm sub-register.
+    unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm);
+    BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg)
+      .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef)
+      .addReg(Reg, RegState::ImplicitDefine);
+  } else
+    return;
+  MI->addRegisterKilled(Reg, TRI, true);
+}
+
 MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                   MachineInstr *MI,
                                            const SmallVectorImpl<unsigned> &Ops,
@@ -2714,6 +3015,8 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     switch (LoadMI->getOpcode()) {
     case X86::AVX_SET0PSY:
     case X86::AVX_SET0PDY:
+    case X86::AVX2_SETALLONES:
+    case X86::AVX2_SET0:
       Alignment = 32;
       break;
     case X86::V_SET0:
@@ -2722,11 +3025,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
       Alignment = 16;
       break;
     case X86::FsFLD0SD:
-    case X86::VFsFLD0SD:
       Alignment = 8;
       break;
     case X86::FsFLD0SS:
-    case X86::VFsFLD0SS:
       Alignment = 4;
       break;
     default:
@@ -2759,10 +3060,10 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   case X86::AVX_SET0PSY:
   case X86::AVX_SET0PDY:
   case X86::AVX_SETALLONES:
+  case X86::AVX2_SETALLONES:
+  case X86::AVX2_SET0:
   case X86::FsFLD0SD:
-  case X86::FsFLD0SS:
-  case X86::VFsFLD0SD:
-  case X86::VFsFLD0SS: {
+  case X86::FsFLD0SS: {
     // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
     // Create a constant-pool entry and operands to load from it.
 
@@ -2788,16 +3089,19 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     MachineConstantPool &MCP = *MF.getConstantPool();
     Type *Ty;
     unsigned Opc = LoadMI->getOpcode();
-    if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+    if (Opc == X86::FsFLD0SS)
       Ty = Type::getFloatTy(MF.getFunction()->getContext());
-    else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+    else if (Opc == X86::FsFLD0SD)
       Ty = Type::getDoubleTy(MF.getFunction()->getContext());
     else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
       Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
+    else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX2_SET0)
+      Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 8);
     else
       Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
 
-    bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES);
+    bool IsAllOnes = (Opc == X86::V_SETALLONES || Opc == X86::AVX_SETALLONES ||
+                      Opc == X86::AVX2_SETALLONES);
     const Constant *C = IsAllOnes ? Constant::getAllOnesValue(Ty) :
                                     Constant::getNullValue(Ty);
     unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
@@ -3329,7 +3633,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
 // These are the replaceable SSE instructions. Some of these have Int variants
 // that we don't include here. We don't want to replace instructions selected
 // by intrinsics.
-static const unsigned ReplaceableInstrs[][3] = {
+static const uint16_t ReplaceableInstrs[][3] = {
   //PackedSingle     PackedDouble    PackedInt
   { X86::MOVAPSmr,   X86::MOVAPDmr,  X86::MOVDQAmr  },
   { X86::MOVAPSrm,   X86::MOVAPDrm,  X86::MOVDQArm  },
@@ -3366,31 +3670,66 @@ static const unsigned ReplaceableInstrs[][3] = {
   { X86::VMOVAPSYrr,   X86::VMOVAPDYrr,   X86::VMOVDQAYrr  },
   { X86::VMOVUPSYmr,   X86::VMOVUPDYmr,   X86::VMOVDQUYmr  },
   { X86::VMOVUPSYrm,   X86::VMOVUPDYrm,   X86::VMOVDQUYrm  },
-  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr },
+  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr }
+};
+
+static const uint16_t ReplaceableInstrsAVX2[][3] = {
+  //PackedSingle       PackedDouble       PackedInt
+  { X86::VANDNPSYrm,   X86::VANDNPDYrm,   X86::VPANDNYrm   },
+  { X86::VANDNPSYrr,   X86::VANDNPDYrr,   X86::VPANDNYrr   },
+  { X86::VANDPSYrm,    X86::VANDPDYrm,    X86::VPANDYrm    },
+  { X86::VANDPSYrr,    X86::VANDPDYrr,    X86::VPANDYrr    },
+  { X86::VORPSYrm,     X86::VORPDYrm,     X86::VPORYrm     },
+  { X86::VORPSYrr,     X86::VORPDYrr,     X86::VPORYrr     },
+  { X86::VXORPSYrm,    X86::VXORPDYrm,    X86::VPXORYrm    },
+  { X86::VXORPSYrr,    X86::VXORPDYrr,    X86::VPXORYrr    },
+  { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr },
+  { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr },
+  { X86::VINSERTF128rm,  X86::VINSERTF128rm,  X86::VINSERTI128rm },
+  { X86::VINSERTF128rr,  X86::VINSERTF128rr,  X86::VINSERTI128rr },
+  { X86::VPERM2F128rm,   X86::VPERM2F128rm,   X86::VPERM2I128rm },
+  { X86::VPERM2F128rr,   X86::VPERM2F128rr,   X86::VPERM2I128rr }
 };
 
 // FIXME: Some shuffle and unpack instructions have equivalents in different
 // domains, but they require a bit more work than just switching opcodes.
 
-static const unsigned *lookup(unsigned opcode, unsigned domain) {
+static const uint16_t *lookup(unsigned opcode, unsigned domain) {
   for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
     if (ReplaceableInstrs[i][domain-1] == opcode)
       return ReplaceableInstrs[i];
   return 0;
 }
 
+static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
+  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
+    if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
+      return ReplaceableInstrsAVX2[i];
+  return 0;
+}
+
 std::pair<uint16_t, uint16_t>
 X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
   uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
-  return std::make_pair(domain,
-                        domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+  bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
+  uint16_t validDomains = 0;
+  if (domain && lookup(MI->getOpcode(), domain))
+    validDomains = 0xe;
+  else if (domain && lookupAVX2(MI->getOpcode(), domain))
+    validDomains = hasAVX2 ? 0xe : 0x6;
+  return std::make_pair(domain, validDomains);
 }
 
 void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
   assert(Domain>0 && Domain<4 && "Invalid execution domain");
   uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
   assert(dom && "Not an SSE instruction");
-  const unsigned *table = lookup(MI->getOpcode(), dom);
+  const uint16_t *table = lookup(MI->getOpcode(), dom);
+  if (!table) { // try the other table
+    assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) &&
+           "256-bit vector operations only available in AVX2");
+    table = lookupAVX2(MI->getOpcode(), dom);
+  }
   assert(table && "Cannot change domain");
   MI->setDesc(get(table[Domain-1]));
 }
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 97009dbdbe50..b23d7560ec16 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -1,4 +1,4 @@
-//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===//
+//===-- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,10 +14,10 @@
 #ifndef X86INSTRUCTIONINFO_H
 #define X86INSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "X86.h"
 #include "X86RegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "X86GenInstrInfo.inc"
@@ -345,6 +345,11 @@ public:
 
   void setExecutionDomain(MachineInstr *MI, unsigned Domain) const;
 
+  unsigned getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum,
+                                        const TargetRegisterInfo *TRI) const;
+  void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
+                                 const TargetRegisterInfo *TRI) const;
+
   MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                       MachineInstr* MI,
                                       unsigned OpNum,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index d54bf275c04f..6a2531269d8f 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1,4 +1,4 @@
-//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//===-- X86InstrInfo.td - Main X86 Instruction Definition --*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -99,17 +99,16 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
 
+def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+
 def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
 
 def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
-def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
                             [SDNPHasChain]>;
-def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
-                                [SDNPHasChain]>;
 def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
                         [SDNPHasChain]>;
 def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
@@ -226,6 +225,10 @@ def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags,
                           [SDNPCommutative]>;
 def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>;
 
+def X86blsi_flag : SDNode<"X86ISD::BLSI",  SDTUnaryArithWithFlags>;
+def X86blsmsk_flag : SDNode<"X86ISD::BLSMSK",  SDTUnaryArithWithFlags>;
+def X86blsr_flag : SDNode<"X86ISD::BLSR",  SDTUnaryArithWithFlags>;
+
 def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 
 def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
@@ -237,6 +240,9 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
 def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
+def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL,
+                        [SDNPHasChain, SDNPOutGlue]>;
+
 //===----------------------------------------------------------------------===//
 // X86 Operand Definitions.
 //
@@ -247,10 +253,31 @@ def ptr_rc_nosp : PointerLikeRegClass<1>;
 
 // *mem - Operand definitions for the funky X86 addressing mode operands.
 //
-def X86MemAsmOperand : AsmOperandClass {
-  let Name = "Mem";
-  let SuperClasses = [];
+def X86MemAsmOperand : AsmOperandClass { 
+ let Name = "Mem"; let PredicateMethod = "isMem"; 
+}
+def X86Mem8AsmOperand : AsmOperandClass { 
+  let Name = "Mem8"; let PredicateMethod = "isMem8";
+}
+def X86Mem16AsmOperand : AsmOperandClass { 
+  let Name = "Mem16"; let PredicateMethod = "isMem16";
+}
+def X86Mem32AsmOperand : AsmOperandClass { 
+  let Name = "Mem32"; let PredicateMethod = "isMem32";
+}
+def X86Mem64AsmOperand : AsmOperandClass { 
+  let Name = "Mem64"; let PredicateMethod = "isMem64";
+}
+def X86Mem80AsmOperand : AsmOperandClass { 
+  let Name = "Mem80"; let PredicateMethod = "isMem80";
+}
+def X86Mem128AsmOperand : AsmOperandClass { 
+  let Name = "Mem128"; let PredicateMethod = "isMem128";
 }
+def X86Mem256AsmOperand : AsmOperandClass { 
+  let Name = "Mem256"; let PredicateMethod = "isMem256";
+}
+
 def X86AbsMemAsmOperand : AsmOperandClass {
   let Name = "AbsMem";
   let SuperClasses = [X86MemAsmOperand];
@@ -267,17 +294,28 @@ def opaque48mem : X86MemOperand<"printopaquemem">;
 def opaque80mem : X86MemOperand<"printopaquemem">;
 def opaque512mem : X86MemOperand<"printopaquemem">;
 
-def i8mem   : X86MemOperand<"printi8mem">;
-def i16mem  : X86MemOperand<"printi16mem">;
-def i32mem  : X86MemOperand<"printi32mem">;
-def i64mem  : X86MemOperand<"printi64mem">;
-def i128mem : X86MemOperand<"printi128mem">;
-def i256mem : X86MemOperand<"printi256mem">;
-def f32mem  : X86MemOperand<"printf32mem">;
-def f64mem  : X86MemOperand<"printf64mem">;
-def f80mem  : X86MemOperand<"printf80mem">;
-def f128mem : X86MemOperand<"printf128mem">;
-def f256mem : X86MemOperand<"printf256mem">;
+def i8mem   : X86MemOperand<"printi8mem"> { 
+  let ParserMatchClass = X86Mem8AsmOperand; }
+def i16mem  : X86MemOperand<"printi16mem"> { 
+  let ParserMatchClass = X86Mem16AsmOperand; }
+def i32mem  : X86MemOperand<"printi32mem"> { 
+  let ParserMatchClass = X86Mem32AsmOperand; }
+def i64mem  : X86MemOperand<"printi64mem"> { 
+  let ParserMatchClass = X86Mem64AsmOperand; }
+def i128mem : X86MemOperand<"printi128mem"> { 
+  let ParserMatchClass = X86Mem128AsmOperand; }
+def i256mem : X86MemOperand<"printi256mem"> { 
+  let ParserMatchClass = X86Mem256AsmOperand; }
+def f32mem  : X86MemOperand<"printf32mem"> { 
+  let ParserMatchClass = X86Mem32AsmOperand; }
+def f64mem  : X86MemOperand<"printf64mem"> { 
+  let ParserMatchClass = X86Mem64AsmOperand; }
+def f80mem  : X86MemOperand<"printf80mem"> { 
+  let ParserMatchClass = X86Mem80AsmOperand; }
+def f128mem : X86MemOperand<"printf128mem"> { 
+  let ParserMatchClass = X86Mem128AsmOperand; }
+def f256mem : X86MemOperand<"printf256mem">{ 
+  let ParserMatchClass = X86Mem256AsmOperand; }
 }
 
 // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
@@ -285,7 +323,7 @@ def f256mem : X86MemOperand<"printf256mem">;
 def i8mem_NOREX : Operand<i64> {
   let PrintMethod = "printi8mem";
   let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
-  let ParserMatchClass = X86MemAsmOperand;
+  let ParserMatchClass = X86Mem8AsmOperand;
   let OperandType = "OPERAND_MEMORY";
 }
 
@@ -299,7 +337,7 @@ def ptr_rc_tailcall : PointerLikeRegClass<2>;
 def i32mem_TC : Operand<i32> {
   let PrintMethod = "printi32mem";
   let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
-  let ParserMatchClass = X86MemAsmOperand;
+  let ParserMatchClass = X86Mem32AsmOperand;
   let OperandType = "OPERAND_MEMORY";
 }
 
@@ -310,7 +348,7 @@ def i64mem_TC : Operand<i64> {
   let PrintMethod = "printi64mem";
   let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
                        ptr_rc_tailcall, i32imm, i8imm);
-  let ParserMatchClass = X86MemAsmOperand;
+  let ParserMatchClass = X86Mem64AsmOperand;
   let OperandType = "OPERAND_MEMORY";
 }
 
@@ -336,6 +374,11 @@ def SSECC : Operand<i8> {
   let OperandType = "OPERAND_IMMEDIATE";
 }
 
+def AVXCC : Operand<i8> {
+  let PrintMethod = "printSSECC";
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
 class ImmSExtAsmOperandClass : AsmOperandClass {
   let SuperClasses = [ImmAsmOperand];
   let RenderMethod = "addImmOperands";
@@ -466,37 +509,32 @@ def HasSSSE3     : Predicate<"Subtarget->hasSSSE3()">;
 def HasSSE41     : Predicate<"Subtarget->hasSSE41()">;
 def HasSSE42     : Predicate<"Subtarget->hasSSE42()">;
 def HasSSE4A     : Predicate<"Subtarget->hasSSE4A()">;
-
 def HasAVX       : Predicate<"Subtarget->hasAVX()">;
-def HasXMMInt    : Predicate<"Subtarget->hasXMMInt()">;
+def HasAVX2      : Predicate<"Subtarget->hasAVX2()">;
 
 def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;
 def HasAES       : Predicate<"Subtarget->hasAES()">;
 def HasCLMUL     : Predicate<"Subtarget->hasCLMUL()">;
 def HasFMA3      : Predicate<"Subtarget->hasFMA3()">;
 def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
+def HasXOP       : Predicate<"Subtarget->hasXOP()">;
 def HasMOVBE     : Predicate<"Subtarget->hasMOVBE()">;
 def HasRDRAND    : Predicate<"Subtarget->hasRDRAND()">;
 def HasF16C      : Predicate<"Subtarget->hasF16C()">;
+def HasFSGSBase  : Predicate<"Subtarget->hasFSGSBase()">;
 def HasLZCNT     : Predicate<"Subtarget->hasLZCNT()">;
 def HasBMI       : Predicate<"Subtarget->hasBMI()">;
-def FPStackf32   : Predicate<"!Subtarget->hasXMM()">;
-def FPStackf64   : Predicate<"!Subtarget->hasXMMInt()">;
+def HasBMI2      : Predicate<"Subtarget->hasBMI2()">;
+def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">;
+def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;
 def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
 def In32BitMode  : Predicate<"!Subtarget->is64Bit()">,
                              AssemblerPredicate<"!Mode64Bit">;
 def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
                              AssemblerPredicate<"Mode64Bit">;
 def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
-def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
-def IsNaCl       : Predicate<"Subtarget->isTargetNaCl()">,
-                             AssemblerPredicate<"ModeNaCl">;
-def IsNaCl32     : Predicate<"Subtarget->isTargetNaCl32()">,
-                             AssemblerPredicate<"ModeNaCl,!Mode64Bit">;
-def IsNaCl64     : Predicate<"Subtarget->isTargetNaCl64()">,
-                             AssemblerPredicate<"ModeNaCl,Mode64Bit">;
-def NotNaCl      : Predicate<"!Subtarget->isTargetNaCl()">,
-                             AssemblerPredicate<"!ModeNaCl">;
+def IsNaCl       : Predicate<"Subtarget->isTargetNaCl()">;
+def NotNaCl      : Predicate<"!Subtarget->isTargetNaCl()">;
 def SmallCode    : Predicate<"TM.getCodeModel() == CodeModel::Small">;
 def KernelCode   : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
 def FarData      : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
@@ -1375,7 +1413,7 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
 }
 
 //===----------------------------------------------------------------------===//
-// TZCNT Instruction
+// BMI Instructions
 //
 let Predicates = [HasBMI], Defs = [EFLAGS] in {
   def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
@@ -1405,6 +1443,83 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in {
                       (implicit EFLAGS)]>, XS;
 }
 
+multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
+                  RegisterClass RC, X86MemOperand x86memop, SDNode OpNode,
+                  PatFrag ld_frag> {
+  def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
+             !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
+             [(set RC:$dst, EFLAGS, (OpNode RC:$src))]>, T8, VEX_4V;
+  def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
+             !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"),
+             [(set RC:$dst, EFLAGS, (OpNode (ld_frag addr:$src)))]>,
+             T8, VEX_4V;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+  defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem,
+                        X86blsr_flag, loadi32>;
+  defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem,
+                        X86blsr_flag, loadi64>, VEX_W;
+  defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem,
+                          X86blsmsk_flag, loadi32>;
+  defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem,
+                          X86blsmsk_flag, loadi64>, VEX_W;
+  defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem,
+                        X86blsi_flag, loadi32>;
+  defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem,
+                        X86blsi_flag, loadi64>, VEX_W;
+}
+
+multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
+                          X86MemOperand x86memop, Intrinsic Int,
+                          PatFrag ld_frag> {
+  def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
+             T8, VEX_4VOp3;
+  def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
+              (implicit EFLAGS)]>, T8, VEX_4VOp3;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+  defm BEXTR32 : bmi_bextr_bzhi<0xF7, "bextr{l}", GR32, i32mem,
+                                int_x86_bmi_bextr_32, loadi32>;
+  defm BEXTR64 : bmi_bextr_bzhi<0xF7, "bextr{q}", GR64, i64mem,
+                                int_x86_bmi_bextr_64, loadi64>, VEX_W;
+}
+
+let Predicates = [HasBMI2], Defs = [EFLAGS] in {
+  defm BZHI32 : bmi_bextr_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
+                               int_x86_bmi_bzhi_32, loadi32>;
+  defm BZHI64 : bmi_bextr_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
+                               int_x86_bmi_bzhi_64, loadi64>, VEX_W;
+}
+
+multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
+                         X86MemOperand x86memop, Intrinsic Int,
+                         PatFrag ld_frag> {
+  def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (Int RC:$src1, RC:$src2))]>,
+             VEX_4V;
+  def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))]>, VEX_4V;
+}
+
+let Predicates = [HasBMI2] in {
+  defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
+                               int_x86_bmi_pdep_32, loadi32>, T8XD;
+  defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
+                               int_x86_bmi_pdep_64, loadi64>, T8XD, VEX_W;
+  defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
+                               int_x86_bmi_pext_32, loadi32>, T8XS;
+  defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
+                               int_x86_bmi_pext_64, loadi64>, T8XS, VEX_W;
+}
+
 //===----------------------------------------------------------------------===//
 // Subsystems.
 //===----------------------------------------------------------------------===//
@@ -1424,12 +1539,16 @@ include "X86InstrFragmentsSIMD.td"
 // FMA - Fused Multiply-Add support (requires FMA)
 include "X86InstrFMA.td"
 
+// XOP
+include "X86InstrXOP.td"
+
 // SSE, MMX and 3DNow! vector support.
 include "X86InstrSSE.td"
 include "X86InstrMMX.td"
 include "X86Instr3DNow.td"
 
 include "X86InstrVMX.td"
+include "X86InstrSVM.td"
 
 // System instructions.
 include "X86InstrSystem.td"
@@ -1445,10 +1564,11 @@ def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
 def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
 
 def : MnemonicAlias<"cbw",  "cbtw">;
+def : MnemonicAlias<"cwde", "cwtl">;
 def : MnemonicAlias<"cwd",  "cwtd">;
 def : MnemonicAlias<"cdq", "cltd">;
-def : MnemonicAlias<"cwde", "cwtl">;
 def : MnemonicAlias<"cdqe", "cltq">;
+def : MnemonicAlias<"cqo", "cqto">;
 
 // lret maps to lretl, it is not ambiguous with lretq.
 def : MnemonicAlias<"lret", "lretl">;
@@ -1497,6 +1617,7 @@ def : MnemonicAlias<"verrw", "verr">;
 // System instruction aliases.
 def : MnemonicAlias<"iret", "iretl">;
 def : MnemonicAlias<"sysret", "sysretl">;
+def : MnemonicAlias<"sysexit", "sysexitl">;
 
 def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
 def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
@@ -1516,6 +1637,8 @@ def : MnemonicAlias<"fcmovna",  "fcmovbe">;
 def : MnemonicAlias<"fcmovae",  "fcmovnb">;
 def : MnemonicAlias<"fcomip",   "fcompi">;
 def : MnemonicAlias<"fildq",    "fildll">;
+def : MnemonicAlias<"fistpq",   "fistpll">;
+def : MnemonicAlias<"fisttpq",  "fisttpll">;
 def : MnemonicAlias<"fldcww",   "fldcw">;
 def : MnemonicAlias<"fnstcww", "fnstcw">;
 def : MnemonicAlias<"fnstsww", "fnstsw">;
@@ -1737,20 +1860,20 @@ def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
 // errors, since its encoding is the most compact.
 def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
 
-// shld/shrd op,op -> shld op, op, 1
-def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>;
-def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>;
-def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>;
-def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>;
-def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>;
-def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>;
-
-def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>;
-def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>;
-def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>;
-def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>;
-def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>;
-def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+// shld/shrd op,op -> shld op, op, CL
+def : InstAlias<"shldw $r2, $r1", (SHLD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shldl $r2, $r1", (SHLD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shldq $r2, $r1", (SHLD64rrCL GR64:$r1, GR64:$r2)>;
+def : InstAlias<"shrdw $r2, $r1", (SHRD16rrCL GR16:$r1, GR16:$r2)>;
+def : InstAlias<"shrdl $r2, $r1", (SHRD32rrCL GR32:$r1, GR32:$r2)>;
+def : InstAlias<"shrdq $r2, $r1", (SHRD64rrCL GR64:$r1, GR64:$r2)>;
+
+def : InstAlias<"shldw $reg, $mem", (SHLD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shldl $reg, $mem", (SHLD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shldq $reg, $mem", (SHLD64mrCL i64mem:$mem, GR64:$reg)>;
+def : InstAlias<"shrdw $reg, $mem", (SHRD16mrCL i16mem:$mem, GR16:$reg)>;
+def : InstAlias<"shrdl $reg, $mem", (SHRD32mrCL i32mem:$mem, GR32:$reg)>;
+def : InstAlias<"shrdq $reg, $mem", (SHRD64mrCL i64mem:$mem, GR64:$reg)>;
 
 /*  FIXME: This is disabled because the asm matcher is currently incapable of
  *  matching a fixed immediate like $1.
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index b2d9fca97b02..63f96b6f5d3b 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -1,4 +1,4 @@
-//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
+//===-- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -105,19 +105,23 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                          Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
                          string asm, Domain d> {
   def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-                        [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+                        [(set DstRC:$dst, (Int SrcRC:$src))], 
+                        IIC_DEFAULT, d>;
   def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-                        [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+                        [(set DstRC:$dst, (Int (ld_frag addr:$src)))], 
+                        IIC_DEFAULT, d>;
 }
 
 multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
                     RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
                     PatFrag ld_frag, string asm, Domain d> {
   def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
-              asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+              asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], 
+              IIC_DEFAULT, d>;
   def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
                    (ins DstRC:$src1, x86memop:$src2), asm,
-              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], 
+              IIC_DEFAULT, d>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -175,25 +179,25 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                         "movq\t{$src, $dst|$dst, $src}",
                         [(store (x86mmx VR64:$src), addr:$dst)]>;
 
-def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
-                          "movdq2q\t{$src, $dst|$dst, $src}",
+def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+                          (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
                           [(set VR64:$dst,
                             (x86mmx (bitconvert
                             (i64 (vector_extract (v2i64 VR128:$src),
                                   (iPTR 0))))))]>;
 
-def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
-                           "movq2dq\t{$src, $dst|$dst, $src}",
+def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst),
+                            (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}",
           [(set VR128:$dst,
             (v2i64 (scalar_to_vector
                               (i64 (bitconvert (x86mmx VR64:$src))))))]>;
 
 let neverHasSideEffects = 1 in
-def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
-                           "movq2dq\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst),
+                       (ins VR64:$src), "movq2dq\t{$src, $dst|$dst, $src}", []>;
 
-def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
-                           "movdq2q\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
+                       (ins FR64:$src), "movdq2q\t{$src, $dst|$dst, $src}", []>;
 
 def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                          "movntq\t{$src, $dst|$dst, $src}",
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index d3ced23450fe..408ab16778d1 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1,4 +1,4 @@
-//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,6 +13,126 @@
 //
 //===----------------------------------------------------------------------===//
 
+class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> {
+  InstrItinClass rr = arg_rr;
+  InstrItinClass rm = arg_rm;
+}
+
+class SizeItins<OpndItins arg_s, OpndItins arg_d> {
+  OpndItins s = arg_s;
+  OpndItins d = arg_d;
+}
+
+
+class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm,
+  InstrItinClass arg_ri> {
+  InstrItinClass rr = arg_rr;
+  InstrItinClass rm = arg_rm;
+  InstrItinClass ri = arg_ri;
+}
+
+
+// scalar
+def SSE_ALU_F32S : OpndItins<
+  IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM
+>;
+
+def SSE_ALU_F64S : OpndItins<
+  IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM
+>;
+
+def SSE_ALU_ITINS_S : SizeItins<
+  SSE_ALU_F32S, SSE_ALU_F64S
+>;
+
+def SSE_MUL_F32S : OpndItins<
+  IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM
+>;
+
+def SSE_MUL_F64S : OpndItins<
+  IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM
+>;
+
+def SSE_MUL_ITINS_S : SizeItins<
+  SSE_MUL_F32S, SSE_MUL_F64S
+>;
+
+def SSE_DIV_F32S : OpndItins<
+  IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM
+>;
+
+def SSE_DIV_F64S : OpndItins<
+  IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM
+>;
+
+def SSE_DIV_ITINS_S : SizeItins<
+  SSE_DIV_F32S, SSE_DIV_F64S
+>;
+
+// parallel
+def SSE_ALU_F32P : OpndItins<
+  IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM
+>;
+
+def SSE_ALU_F64P : OpndItins<
+  IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM
+>;
+
+def SSE_ALU_ITINS_P : SizeItins<
+  SSE_ALU_F32P, SSE_ALU_F64P
+>;
+
+def SSE_MUL_F32P : OpndItins<
+  IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM
+>;
+
+def SSE_MUL_F64P : OpndItins<
+  IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM
+>;
+
+def SSE_MUL_ITINS_P : SizeItins<
+  SSE_MUL_F32P, SSE_MUL_F64P
+>;
+
+def SSE_DIV_F32P : OpndItins<
+  IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM
+>;
+
+def SSE_DIV_F64P : OpndItins<
+  IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM
+>;
+
+def SSE_DIV_ITINS_P : SizeItins<
+  SSE_DIV_F32P, SSE_DIV_F64P
+>;
+
+def SSE_BIT_ITINS_P : OpndItins<
+  IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM
+>;
+
+def SSE_INTALU_ITINS_P : OpndItins<
+  IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM
+>;
+
+def SSE_INTALUQ_ITINS_P : OpndItins<
+  IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM
+>;
+
+def SSE_INTMUL_ITINS_P : OpndItins<
+  IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM
+>;
+
+def SSE_INTSHIFT_ITINS_P : ShiftOpndItins<
+  IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI
+>;
+
+def SSE_MOVA_ITINS : OpndItins<
+  IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM
+>;
+
+def SSE_MOVU_ITINS : OpndItins<
+  IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM
+>;
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 Instructions Classes
@@ -21,25 +141,27 @@
 /// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
 multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            RegisterClass RC, X86MemOperand x86memop,
+                           OpndItins itins,
                            bit Is2Addr = 1> {
   let isCommutable = 1 in {
     def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
+       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>;
   }
   def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>;
 }
 
 /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
 multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
                              string asm, string SSEVer, string FPSizeStr,
                              Operand memopr, ComplexPattern mem_cpat,
+                             OpndItins itins,
                              bit Is2Addr = 1> {
   def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
@@ -47,72 +169,74 @@ multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (!cast<Intrinsic>(
                  !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, RC:$src2))]>;
+             RC:$src1, RC:$src2))], itins.rr>;
   def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
                                           SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, mem_cpat:$src2))]>;
+             RC:$src1, mem_cpat:$src2))], itins.rm>;
 }
 
 /// sse12_fp_packed - SSE 1 & 2 packed instructions class
 multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            RegisterClass RC, ValueType vt,
                            X86MemOperand x86memop, PatFrag mem_frag,
-                           Domain d, bit Is2Addr = 1> {
+                           Domain d, OpndItins itins, bit Is2Addr = 1> {
   let isCommutable = 1 in
     def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>;
+       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>;
   let mayLoad = 1 in
     def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>;
+       [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))],
+          itins.rm, d>;
 }
 
 /// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
 multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
                                       string OpcodeStr, X86MemOperand x86memop,
                                       list<dag> pat_rr, list<dag> pat_rm,
-                                      bit Is2Addr = 1> {
-  let isCommutable = 1 in
+                                      bit Is2Addr = 1,
+                                      bit rr_hasSideEffects = 0> {
+  let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in
     def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       pat_rr, d>;
+       pat_rr, IIC_DEFAULT, d>;
   def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       pat_rm, d>;
+       pat_rm, IIC_DEFAULT, d>;
 }
 
 /// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
 multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
                            string asm, string SSEVer, string FPSizeStr,
                            X86MemOperand x86memop, PatFrag mem_frag,
-                           Domain d, bit Is2Addr = 1> {
+                           Domain d, OpndItins itins, bit Is2Addr = 1> {
   def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
            [(set RC:$dst, (!cast<Intrinsic>(
                      !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
-                 RC:$src1, RC:$src2))], d>;
+                 RC:$src1, RC:$src2))], IIC_DEFAULT, d>;
   def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst, (!cast<Intrinsic>(
                      !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, (mem_frag addr:$src2)))], d>;
+             RC:$src1, (mem_frag addr:$src2)))], IIC_DEFAULT, d>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -170,7 +294,7 @@ def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
 
 // Bitcasts between 128-bit vector types. Return the original type since
 // no instruction is needed for the conversion
-let Predicates = [HasXMMInt] in {
+let Predicates = [HasSSE2] in {
   def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
   def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
   def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
@@ -239,21 +363,13 @@ let Predicates = [HasAVX] in {
 }
 
 // Alias instructions that map fld0 to pxor for sse.
-// FIXME: Set encoding to pseudo!
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
-    canFoldAsLoad = 1 in {
-  def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
-                   [(set FR32:$dst, fp32imm0)]>,
-                   Requires<[HasSSE1]>, TB, OpSize;
-  def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
-                   [(set FR64:$dst, fpimm0)]>,
-                 Requires<[HasSSE2]>, TB, OpSize;
-  def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
-                    [(set FR32:$dst, fp32imm0)]>,
-                    Requires<[HasAVX]>, TB, OpSize, VEX_4V;
-  def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
-                    [(set FR64:$dst, fpimm0)]>,
-                    Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+// This is expanded by ExpandPostRAPseudos.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isPseudo = 1 in {
+  def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
+                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
+  def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
+                   [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -286,16 +402,35 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
 // JIT implementatioan, it does not expand the instructions below like
 // X86MCInstLower does.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1, Predicates = [HasAVX] in {
+    isCodeGenOnly = 1 in {
+let Predicates = [HasAVX] in {
 def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
                    [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
 def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
                    [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
 }
+let Predicates = [HasAVX2], neverHasSideEffects = 1 in
+def AVX2_SET0   : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
+                   []>, VEX_4V;
+}
 
+let Predicates = [HasAVX2], AddedComplexity = 5 in {
+  def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>;
+  def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
+  def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
+  def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
+}
 
 // AVX has no support for 256-bit integer instructions, but since the 128-bit
 // VPXOR instruction writes zero to its upper part, it's safe build zeros.
+def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v32i8 (v8f32 immAllZerosV)),
+          (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
+
+def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
+def : Pat<(bc_v16i16 (v8f32 immAllZerosV)),
+          (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>;
+
 def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
 def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
           (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>;
@@ -310,13 +445,16 @@ def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
 // JIT implementation, it does not expand the instructions below like
 // X86MCInstLower does.
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
-  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
-                         [(set VR128:$dst, (v4i32 immAllOnesV))]>;
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
-    isCodeGenOnly = 1, ExeDomain = SSEPackedInt, Predicates = [HasAVX] in
+    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in {
+  let Predicates = [HasAVX] in
   def AVX_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
                          [(set VR128:$dst, (v4i32 immAllOnesV))]>, VEX_4V;
+  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+                         [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+  let Predicates = [HasAVX2] in
+  def AVX2_SETALLONES : PDI<0x76, MRMInitReg, (outs VR256:$dst), (ins), "",
+                          [(set VR256:$dst, (v8i32 immAllOnesV))]>, VEX_4V;
+}
 
 
 //===----------------------------------------------------------------------===//
@@ -329,22 +467,25 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 // in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
 //===----------------------------------------------------------------------===//
 
-class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+class sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, string asm> :
       SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
-      [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+      [(set VR128:$dst, (vt (OpNode VR128:$src1,
+                             (scalar_to_vector RC:$src2))))],
+      IIC_SSE_MOV_S_RR>;
 
 // Loading from memory automatically zeroing upper bits.
 class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
                     PatFrag mem_pat, string OpcodeStr> :
       SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
          !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                        [(set RC:$dst, (mem_pat addr:$src))]>;
+                        [(set RC:$dst, (mem_pat addr:$src))],
+                        IIC_SSE_MOV_S_RM>;
 
 // AVX
-def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+def VMOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
                 "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V,
                 VEX_LIG;
-def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+def VMOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
                 "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V,
                 VEX_LIG;
 
@@ -352,11 +493,13 @@ def VMOVSDrr : sse12_move_rr<FR64, v2f64,
 let isCodeGenOnly = 1 in {
   def VMOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
                         (ins VR128:$src1, FR32:$src2),
-                        "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                        "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+                        IIC_SSE_MOV_S_RR>,
                         XS, VEX_4V, VEX_LIG;
   def VMOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
                         (ins VR128:$src1, FR64:$src2),
-                        "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                        "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+                        IIC_SSE_MOV_S_RR>,
                         XD, VEX_4V, VEX_LIG;
 }
 
@@ -370,26 +513,30 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
 
 def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
                   "movss\t{$src, $dst|$dst, $src}",
-                  [(store FR32:$src, addr:$dst)]>, XS, VEX, VEX_LIG;
+                  [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+                  XS, VEX, VEX_LIG;
 def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
                   "movsd\t{$src, $dst|$dst, $src}",
-                  [(store FR64:$src, addr:$dst)]>, XD, VEX, VEX_LIG;
+                  [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
+                  XD, VEX, VEX_LIG;
 
 // SSE1 & 2
 let Constraints = "$src1 = $dst" in {
-  def MOVSSrr : sse12_move_rr<FR32, v4f32,
+  def MOVSSrr : sse12_move_rr<FR32, X86Movss, v4f32,
                           "movss\t{$src2, $dst|$dst, $src2}">, XS;
-  def MOVSDrr : sse12_move_rr<FR64, v2f64,
+  def MOVSDrr : sse12_move_rr<FR64, X86Movsd, v2f64,
                           "movsd\t{$src2, $dst|$dst, $src2}">, XD;
 
   // For the disassembler
   let isCodeGenOnly = 1 in {
     def MOVSSrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
                          (ins VR128:$src1, FR32:$src2),
-                         "movss\t{$src2, $dst|$dst, $src2}", []>, XS;
+                         "movss\t{$src2, $dst|$dst, $src2}", [],
+                         IIC_SSE_MOV_S_RR>, XS;
     def MOVSDrr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
                          (ins VR128:$src1, FR64:$src2),
-                         "movsd\t{$src2, $dst|$dst, $src2}", []>, XD;
+                         "movsd\t{$src2, $dst|$dst, $src2}", [],
+                         IIC_SSE_MOV_S_RR>, XD;
   }
 }
 
@@ -402,153 +549,14 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
 
 def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
                   "movss\t{$src, $dst|$dst, $src}",
-                  [(store FR32:$src, addr:$dst)]>;
+                  [(store FR32:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
 def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
                   "movsd\t{$src, $dst|$dst, $src}",
-                  [(store FR64:$src, addr:$dst)]>;
+                  [(store FR64:$src, addr:$dst)], IIC_SSE_MOV_S_MR>;
 
 // Patterns
-let Predicates = [HasSSE1] in {
-  let AddedComplexity = 15 in {
-  // Extract the low 32-bit value from one vector and insert it into another.
-  def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
-            (MOVSSrr (v4f32 VR128:$src1),
-                     (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
-  def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
-            (MOVSSrr (v4i32 VR128:$src1),
-                     (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
-
-  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
-  // MOVSS to the lower bits.
-  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
-            (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
-  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
-            (MOVSSrr (v4f32 (V_SET0)),
-                     (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
-  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
-            (MOVSSrr (v4i32 (V_SET0)),
-                     (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
-  }
-
-  let AddedComplexity = 20 in {
-  // MOVSSrm zeros the high parts of the register; represent this
-  // with SUBREG_TO_REG.
-  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
-            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
-  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
-            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
-  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
-            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
-  }
-
-  // Extract and store.
-  def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
-                   addr:$dst),
-            (MOVSSmr addr:$dst,
-                     (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-
-  // Shuffle with MOVSS
-  def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
-            (MOVSSrr VR128:$src1, FR32:$src2)>;
-  def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
-            (MOVSSrr (v4i32 VR128:$src1),
-                     (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
-  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
-            (MOVSSrr (v4f32 VR128:$src1),
-                     (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
-}
-
-let Predicates = [HasSSE2] in {
-  let AddedComplexity = 15 in {
-  // Extract the low 64-bit value from one vector and insert it into another.
-  def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
-            (MOVSDrr (v2f64 VR128:$src1),
-                     (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-  def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
-            (MOVSDrr (v2i64 VR128:$src1),
-                     (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
-
-  // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
-  def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
-  def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
-
-  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
-  // MOVSD to the lower bits.
-  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
-            (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
-  }
-
-  let AddedComplexity = 20 in {
-  // MOVSDrm zeros the high parts of the register; represent this
-  // with SUBREG_TO_REG.
-  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
-            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
-            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
-            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
-            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-  def : Pat<(v2f64 (X86vzload addr:$src)),
-            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-  }
-
-  // Extract and store.
-  def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
-                   addr:$dst),
-            (MOVSDmr addr:$dst,
-                     (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-
-  // Shuffle with MOVSD
-  def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
-            (MOVSDrr VR128:$src1, FR64:$src2)>;
-  def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
-            (MOVSDrr (v2i64 VR128:$src1),
-                     (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
-  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
-            (MOVSDrr (v2f64 VR128:$src1),
-                     (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-  def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
-  def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
-
-  // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
-  // is during lowering, where it's not possible to recognize the fold cause
-  // it has two uses through a bitcast. One use disappears at isel time and the
-  // fold opportunity reappears.
-  def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
-  def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
-            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
-}
-
 let Predicates = [HasAVX] in {
   let AddedComplexity = 15 in {
-  // Extract the low 32-bit value from one vector and insert it into another.
-  def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
-            (VMOVSSrr (v4f32 VR128:$src1),
-                      (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
-  def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
-            (VMOVSSrr (v4i32 VR128:$src1),
-                      (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
-
-  // Extract the low 64-bit value from one vector and insert it into another.
-  def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
-            (VMOVSDrr (v2f64 VR128:$src1),
-                      (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-  def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
-            (VMOVSDrr (v2i64 VR128:$src1),
-                      (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
-
-  // vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
-  def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
-            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
-  def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
-            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>;
-
   // Move scalar to XMM zero-extended, zeroing a VR128 then do a
   // MOVS{S,D} to the lower bits.
   def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
@@ -561,6 +569,16 @@ let Predicates = [HasAVX] in {
                       (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
             (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+
+  // Move low f32 and clear high bits.
+  def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+              (VMOVSSrr (v4f32 (V_SET0)),
+                        (EXTRACT_SUBREG (v8f32 VR256:$src), sub_ss)), sub_xmm)>;
+  def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+              (VMOVSSrr (v4i32 (V_SET0)),
+                        (EXTRACT_SUBREG (v8i32 VR256:$src), sub_ss)), sub_xmm)>;
   }
 
   let AddedComplexity = 20 in {
@@ -588,6 +606,9 @@ let Predicates = [HasAVX] in {
 
   // Represent the same patterns above but in the form they appear for
   // 256-bit types
+  def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+                   (v4i32 (scalar_to_vector (loadi32 addr:$src))), (i32 0)))),
+            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (i32 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_ss)>;
@@ -605,6 +626,20 @@ let Predicates = [HasAVX] in {
             (SUBREG_TO_REG (i64 0),
                            (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
                            sub_xmm)>;
+  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+                   (v2i64 (scalar_to_vector (loadi64 addr:$src))), (i32 0)))),
+            (SUBREG_TO_REG (i64 0), (VMOVSDrm addr:$src), sub_sd)>;
+
+  // Move low f64 and clear high bits.
+  def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+              (VMOVSDrr (v2f64 (V_SET0)),
+                        (EXTRACT_SUBREG (v4f64 VR256:$src), sub_sd)), sub_xmm)>;
+
+  def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))),
+            (SUBREG_TO_REG (i32 0),
+              (VMOVSDrr (v2i64 (V_SET0)),
+                        (EXTRACT_SUBREG (v4i64 VR256:$src), sub_sd)), sub_xmm)>;
 
   // Extract and store.
   def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -617,8 +652,6 @@ let Predicates = [HasAVX] in {
                      (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
 
   // Shuffle with VMOVSS
-  def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
-            (VMOVSSrr VR128:$src1, FR32:$src2)>;
   def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
             (VMOVSSrr (v4i32 VR128:$src1),
                       (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
@@ -626,9 +659,17 @@ let Predicates = [HasAVX] in {
             (VMOVSSrr (v4f32 VR128:$src1),
                       (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
 
+  // 256-bit variants
+  def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
+                          (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
+  def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
+                          (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
+
   // Shuffle with VMOVSD
-  def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
-            (VMOVSDrr VR128:$src1, FR64:$src2)>;
   def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
             (VMOVSDrr (v2i64 VR128:$src1),
                      (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
@@ -642,10 +683,27 @@ let Predicates = [HasAVX] in {
             (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
                                                    sub_sd))>;
 
+  // 256-bit variants
+  def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
+                          (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
+  def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
+            (SUBREG_TO_REG (i32 0),
+                (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
+                          (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
+
+
   // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
   // is during lowering, where it's not possible to recognize the fold cause
   // it has two uses through a bitcast. One use disappears at isel time and the
   // fold opportunity reappears.
+  def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
+            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),
+                                                   sub_sd))>;
+  def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+            (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),
+                                                   sub_sd))>;
   def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
             (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),
                                                    sub_sd))>;
@@ -654,6 +712,101 @@ let Predicates = [HasAVX] in {
                                                    sub_sd))>;
 }
 
+let Predicates = [HasSSE1] in {
+  let AddedComplexity = 15 in {
+  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+  // MOVSS to the lower bits.
+  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+            (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+  def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+            (MOVSSrr (v4f32 (V_SET0)),
+                     (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+  def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+            (MOVSSrr (v4i32 (V_SET0)),
+                     (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+  }
+
+  let AddedComplexity = 20 in {
+  // MOVSSrm zeros the high parts of the register; represent this
+  // with SUBREG_TO_REG.
+  def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+  def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+  def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+            (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+  }
+
+  // Extract and store.
+  def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+                   addr:$dst),
+            (MOVSSmr addr:$dst,
+                     (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+  // Shuffle with MOVSS
+  def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+            (MOVSSrr (v4i32 VR128:$src1),
+                     (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+  def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+            (MOVSSrr (v4f32 VR128:$src1),
+                     (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+}
+
+let Predicates = [HasSSE2] in {
+  let AddedComplexity = 15 in {
+  // Move scalar to XMM zero-extended, zeroing a VR128 then do a
+  // MOVSD to the lower bits.
+  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+            (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+  }
+
+  let AddedComplexity = 20 in {
+  // MOVSDrm zeros the high parts of the register; represent this
+  // with SUBREG_TO_REG.
+  def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+  def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+  def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+  def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+  def : Pat<(v2f64 (X86vzload addr:$src)),
+            (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+  }
+
+  // Extract and store.
+  def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+                   addr:$dst),
+            (MOVSDmr addr:$dst,
+                     (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+  // Shuffle with MOVSD
+  def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+            (MOVSDrr (v2i64 VR128:$src1),
+                     (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+  def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+            (MOVSDrr (v2f64 VR128:$src1),
+                     (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+  def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+  def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+
+  // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+  // is during lowering, where it's not possible to recognize the fold cause
+  // it has two uses through a bitcast. One use disappears at isel time and the
+  // fold opportunity reappears.
+  def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
+            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>;
+  def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)),
+            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2i64 VR128:$src2),sub_sd))>;
+  def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>;
+  def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)),
+            (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),sub_sd))>;
+}
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Move Aligned/Unaligned FP Instructions
 //===----------------------------------------------------------------------===//
@@ -661,126 +814,176 @@ let Predicates = [HasAVX] in {
 multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
                             X86MemOperand x86memop, PatFrag ld_frag,
                             string asm, Domain d,
+                            OpndItins itins,
                             bit IsReMaterializable = 1> {
 let neverHasSideEffects = 1 in
   def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
-              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>;
 let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
   def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-                   [(set RC:$dst, (ld_frag addr:$src))], d>;
+                   [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>;
 }
 
 defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
-                              "movaps", SSEPackedSingle>, TB, VEX;
+                              "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
+                              TB, VEX;
 defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
-                              "movapd", SSEPackedDouble>, TB, OpSize, VEX;
+                              "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+                              TB, OpSize, VEX;
 defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
-                              "movups", SSEPackedSingle>, TB, VEX;
+                              "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
+                              TB, VEX;
 defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
-                              "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
+                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+                              TB, OpSize, VEX;
 
 defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
-                              "movaps", SSEPackedSingle>, TB, VEX;
+                              "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
+                              TB, VEX;
 defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
-                              "movapd", SSEPackedDouble>, TB, OpSize, VEX;
+                              "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+                              TB, OpSize, VEX;
 defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
-                              "movups", SSEPackedSingle>, TB, VEX;
+                              "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
+                              TB, VEX;
 defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
-                              "movupd", SSEPackedDouble, 0>, TB, OpSize, VEX;
+                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+                              TB, OpSize, VEX;
 defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
-                              "movaps", SSEPackedSingle>, TB;
+                              "movaps", SSEPackedSingle, SSE_MOVA_ITINS>,
+                              TB;
 defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
-                              "movapd", SSEPackedDouble>, TB, OpSize;
+                              "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
+                              TB, OpSize;
 defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
-                              "movups", SSEPackedSingle>, TB;
+                              "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
+                              TB;
 defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
-                              "movupd", SSEPackedDouble, 0>, TB, OpSize;
+                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+                              TB, OpSize;
 
 def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
-                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
+                   [(alignedstore (v4f32 VR128:$src), addr:$dst)],
+                   IIC_SSE_MOVA_P_MR>, VEX;
 def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movapd\t{$src, $dst|$dst, $src}",
-                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
+                   [(alignedstore (v2f64 VR128:$src), addr:$dst)],
+                   IIC_SSE_MOVA_P_MR>, VEX;
 def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movups\t{$src, $dst|$dst, $src}",
-                   [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
+                   [(store (v4f32 VR128:$src), addr:$dst)],
+                   IIC_SSE_MOVU_P_MR>, VEX;
 def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movupd\t{$src, $dst|$dst, $src}",
-                   [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
+                   [(store (v2f64 VR128:$src), addr:$dst)],
+                   IIC_SSE_MOVU_P_MR>, VEX;
 def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
-                   [(alignedstore256 (v8f32 VR256:$src), addr:$dst)]>, VEX;
+                   [(alignedstore256 (v8f32 VR256:$src), addr:$dst)],
+                   IIC_SSE_MOVA_P_MR>, VEX;
 def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movapd\t{$src, $dst|$dst, $src}",
-                   [(alignedstore256 (v4f64 VR256:$src), addr:$dst)]>, VEX;
+                   [(alignedstore256 (v4f64 VR256:$src), addr:$dst)],
+                   IIC_SSE_MOVA_P_MR>, VEX;
 def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movups\t{$src, $dst|$dst, $src}",
-                   [(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
+                   [(store (v8f32 VR256:$src), addr:$dst)],
+                   IIC_SSE_MOVU_P_MR>, VEX;
 def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
                    "movupd\t{$src, $dst|$dst, $src}",
-                   [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
+                   [(store (v4f64 VR256:$src), addr:$dst)],
+                   IIC_SSE_MOVU_P_MR>, VEX;
 
 // For disassembler
 let isCodeGenOnly = 1 in {
   def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
                           (ins VR128:$src),
-                          "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+                          "movaps\t{$src, $dst|$dst, $src}", [],
+                          IIC_SSE_MOVA_P_RR>, VEX;
   def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
                            (ins VR128:$src),
-                           "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+                           "movapd\t{$src, $dst|$dst, $src}", [],
+                           IIC_SSE_MOVA_P_RR>, VEX;
   def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
                            (ins VR128:$src),
-                           "movups\t{$src, $dst|$dst, $src}", []>, VEX;
+                           "movups\t{$src, $dst|$dst, $src}", [],
+                           IIC_SSE_MOVU_P_RR>, VEX;
   def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
                            (ins VR128:$src),
-                           "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
+                           "movupd\t{$src, $dst|$dst, $src}", [],
+                           IIC_SSE_MOVU_P_RR>, VEX;
   def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
                             (ins VR256:$src),
-                            "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+                            "movaps\t{$src, $dst|$dst, $src}", [],
+                            IIC_SSE_MOVA_P_RR>, VEX;
   def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
                             (ins VR256:$src),
-                            "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+                            "movapd\t{$src, $dst|$dst, $src}", [],
+                            IIC_SSE_MOVA_P_RR>, VEX;
   def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
                             (ins VR256:$src),
-                            "movups\t{$src, $dst|$dst, $src}", []>, VEX;
+                            "movups\t{$src, $dst|$dst, $src}", [],
+                            IIC_SSE_MOVU_P_RR>, VEX;
   def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
                             (ins VR256:$src),
-                            "movupd\t{$src, $dst|$dst, $src}", []>, VEX;
+                            "movupd\t{$src, $dst|$dst, $src}", [],
+                            IIC_SSE_MOVU_P_RR>, VEX;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86vzmovl
+                        (insert_subvector undef, (v4i32 VR128:$src), (i32 0)))),
+          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzmovl
+                        (insert_subvector undef, (v2i64 VR128:$src), (i32 0)))),
+          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v8f32 (X86vzmovl
+                        (insert_subvector undef, (v4f32 VR128:$src), (i32 0)))),
+          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
+def : Pat<(v4f64 (X86vzmovl
+                        (insert_subvector undef, (v2f64 VR128:$src), (i32 0)))),
+          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
 }
 
-def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+
 def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
           (VMOVUPSYmr addr:$dst, VR256:$src)>;
-
-def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
 def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
           (VMOVUPDYmr addr:$dst, VR256:$src)>;
 
 def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movaps\t{$src, $dst|$dst, $src}",
-                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
+                   [(alignedstore (v4f32 VR128:$src), addr:$dst)],
+                   IIC_SSE_MOVA_P_MR>;
 def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movapd\t{$src, $dst|$dst, $src}",
-                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
+                   [(alignedstore (v2f64 VR128:$src), addr:$dst)],
+                   IIC_SSE_MOVA_P_MR>;
 def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movups\t{$src, $dst|$dst, $src}",
-                   [(store (v4f32 VR128:$src), addr:$dst)]>;
+                   [(store (v4f32 VR128:$src), addr:$dst)],
+                   IIC_SSE_MOVU_P_MR>;
 def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                    "movupd\t{$src, $dst|$dst, $src}",
-                   [(store (v2f64 VR128:$src), addr:$dst)]>;
+                   [(store (v2f64 VR128:$src), addr:$dst)],
+                   IIC_SSE_MOVU_P_MR>;
 
 // For disassembler
 let isCodeGenOnly = 1 in {
   def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
-                         "movaps\t{$src, $dst|$dst, $src}", []>;
+                         "movaps\t{$src, $dst|$dst, $src}", [],
+                         IIC_SSE_MOVA_P_RR>;
   def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
-                         "movapd\t{$src, $dst|$dst, $src}", []>;
+                         "movapd\t{$src, $dst|$dst, $src}", [],
+                         IIC_SSE_MOVA_P_RR>;
   def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
-                         "movups\t{$src, $dst|$dst, $src}", []>;
+                         "movups\t{$src, $dst|$dst, $src}", [],
+                         IIC_SSE_MOVU_P_RR>;
   def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
-                         "movupd\t{$src, $dst|$dst, $src}", []>;
+                         "movupd\t{$src, $dst|$dst, $src}", [],
+                         IIC_SSE_MOVU_P_RR>;
 }
 
 let Predicates = [HasAVX] in {
@@ -797,44 +1000,9 @@ let Predicates = [HasSSE2] in
   def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src),
             (MOVUPDmr addr:$dst, VR128:$src)>;
 
-// Use movaps / movups for SSE integer load / store (one byte shorter).
-// The instructions selected below are then converted to MOVDQA/MOVDQU
-// during the SSE domain pass.
-let Predicates = [HasSSE1] in {
-  def : Pat<(alignedloadv4i32 addr:$src),
-            (MOVAPSrm addr:$src)>;
-  def : Pat<(loadv4i32 addr:$src),
-            (MOVUPSrm addr:$src)>;
-  def : Pat<(alignedloadv2i64 addr:$src),
-            (MOVAPSrm addr:$src)>;
-  def : Pat<(loadv2i64 addr:$src),
-            (MOVUPSrm addr:$src)>;
-
-  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
-            (MOVAPSmr addr:$dst, VR128:$src)>;
-  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
-            (MOVAPSmr addr:$dst, VR128:$src)>;
-  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
-            (MOVAPSmr addr:$dst, VR128:$src)>;
-  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
-            (MOVAPSmr addr:$dst, VR128:$src)>;
-  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
-            (MOVUPSmr addr:$dst, VR128:$src)>;
-  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
-            (MOVUPSmr addr:$dst, VR128:$src)>;
-  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
-            (MOVUPSmr addr:$dst, VR128:$src)>;
-  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
-            (MOVUPSmr addr:$dst, VR128:$src)>;
-}
-
 // Use vmovaps/vmovups for AVX integer load/store.
 let Predicates = [HasAVX] in {
   // 128-bit load/store
-  def : Pat<(alignedloadv4i32 addr:$src),
-            (VMOVAPSrm addr:$src)>;
-  def : Pat<(loadv4i32 addr:$src),
-            (VMOVUPSrm addr:$src)>;
   def : Pat<(alignedloadv2i64 addr:$src),
             (VMOVAPSrm addr:$src)>;
   def : Pat<(loadv2i64 addr:$src),
@@ -862,10 +1030,6 @@ let Predicates = [HasAVX] in {
             (VMOVAPSYrm addr:$src)>;
   def : Pat<(loadv4i64 addr:$src),
             (VMOVUPSYrm addr:$src)>;
-  def : Pat<(alignedloadv8i32 addr:$src),
-            (VMOVAPSYrm addr:$src)>;
-  def : Pat<(loadv8i32 addr:$src),
-            (VMOVUPSYrm addr:$src)>;
   def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst),
             (VMOVAPSYmr addr:$dst, VR256:$src)>;
   def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst),
@@ -884,36 +1048,71 @@ let Predicates = [HasAVX] in {
             (VMOVUPSYmr addr:$dst, VR256:$src)>;
 }
 
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
+let Predicates = [HasSSE1] in {
+  def : Pat<(alignedloadv2i64 addr:$src),
+            (MOVAPSrm addr:$src)>;
+  def : Pat<(loadv2i64 addr:$src),
+            (MOVUPSrm addr:$src)>;
+
+  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
 // Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
 // bits are disregarded. FIXME: Set encoding to pseudo!
 let neverHasSideEffects = 1 in {
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
-                     "movaps\t{$src, $dst|$dst, $src}", []>;
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
-                     "movapd\t{$src, $dst|$dst, $src}", []>;
 def FsVMOVAPSrr : VPSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
-                       "movaps\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "movaps\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_MOVA_P_RR>, VEX;
 def FsVMOVAPDrr : VPDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
-                       "movapd\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "movapd\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_MOVA_P_RR>, VEX;
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+                     "movaps\t{$src, $dst|$dst, $src}", [],
+                     IIC_SSE_MOVA_P_RR>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+                     "movapd\t{$src, $dst|$dst, $src}", [],
+                     IIC_SSE_MOVA_P_RR>;
 }
 
 // Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
 // bits are disregarded. FIXME: Set encoding to pseudo!
 let canFoldAsLoad = 1, isReMaterializable = 1 in {
-def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
-                     "movaps\t{$src, $dst|$dst, $src}",
-                     [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
-def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
-                     "movapd\t{$src, $dst|$dst, $src}",
-                     [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
 let isCodeGenOnly = 1 in {
   def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
                          "movaps\t{$src, $dst|$dst, $src}",
-                         [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>, VEX;
+                         [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
+                         IIC_SSE_MOVA_P_RM>, VEX;
   def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
                          "movapd\t{$src, $dst|$dst, $src}",
-                         [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>, VEX;
+                         [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
+                         IIC_SSE_MOVA_P_RM>, VEX;
 }
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+                     "movaps\t{$src, $dst|$dst, $src}",
+                     [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
+                     IIC_SSE_MOVA_P_RM>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+                     "movapd\t{$src, $dst|$dst, $src}",
+                     [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
+                     IIC_SSE_MOVA_P_RM>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -921,94 +1120,68 @@ let isCodeGenOnly = 1 in {
 //===----------------------------------------------------------------------===//
 
 multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
-                                 PatFrag mov_frag, string base_opc,
-                                 string asm_opr> {
+                                 SDNode psnode, SDNode pdnode, string base_opc,
+                                 string asm_opr, InstrItinClass itin> {
   def PSrm : PI<opc, MRMSrcMem,
          (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
          !strconcat(base_opc, "s", asm_opr),
      [(set RC:$dst,
-       (mov_frag RC:$src1,
+       (psnode RC:$src1,
               (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
-              SSEPackedSingle>, TB;
+              itin, SSEPackedSingle>, TB;
 
   def PDrm : PI<opc, MRMSrcMem,
          (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
          !strconcat(base_opc, "d", asm_opr),
-     [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
+     [(set RC:$dst, (v2f64 (pdnode RC:$src1,
                               (scalar_to_vector (loadf64 addr:$src2)))))],
-              SSEPackedDouble>, TB, OpSize;
+              itin, SSEPackedDouble>, TB, OpSize;
 }
 
 let AddedComplexity = 20 in {
-  defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
-                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+  defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
+                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     IIC_SSE_MOV_LH>, VEX_4V;
 }
 let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
-  defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
-                                   "\t{$src2, $dst|$dst, $src2}">;
+  defm MOVL : sse12_mov_hilo_packed<0x12, VR128, X86Movlps, X86Movlpd, "movlp",
+                                   "\t{$src2, $dst|$dst, $src2}",
+                                   IIC_SSE_MOV_LH>;
 }
 
 def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
-                                 (iPTR 0))), addr:$dst)]>, VEX;
+                                 (iPTR 0))), addr:$dst)],
+                                 IIC_SSE_MOV_LH>, VEX;
 def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlpd\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (v2f64 VR128:$src),
-                                 (iPTR 0))), addr:$dst)]>, VEX;
+                                 (iPTR 0))), addr:$dst)],
+                                 IIC_SSE_MOV_LH>, VEX;
 def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
-                                 (iPTR 0))), addr:$dst)]>;
+                                 (iPTR 0))), addr:$dst)],
+                                 IIC_SSE_MOV_LH>;
 def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlpd\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract (v2f64 VR128:$src),
-                                 (iPTR 0))), addr:$dst)]>;
+                                 (iPTR 0))), addr:$dst)],
+                                 IIC_SSE_MOV_LH>;
 
 let Predicates = [HasAVX] in {
-  let AddedComplexity = 20 in {
-    // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
-    def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
-              (VMOVLPSrm VR128:$src1, addr:$src2)>;
-    def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
-              (VMOVLPSrm VR128:$src1, addr:$src2)>;
-    // vector_shuffle v1, (load v2) <2, 1> using MOVLPS
-    def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
-              (VMOVLPDrm VR128:$src1, addr:$src2)>;
-    def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
-              (VMOVLPDrm VR128:$src1, addr:$src2)>;
-  }
-
-  // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
-  def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
-            (VMOVLPSmr addr:$src1, VR128:$src2)>;
-  def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
-                                 VR128:$src2)), addr:$src1),
-            (VMOVLPSmr addr:$src1, VR128:$src2)>;
-
-  // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS
-  def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
-            (VMOVLPDmr addr:$src1, VR128:$src2)>;
-  def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
-            (VMOVLPDmr addr:$src1, VR128:$src2)>;
-
   // Shuffle with VMOVLPS
   def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
             (VMOVLPSrm VR128:$src1, addr:$src2)>;
   def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
             (VMOVLPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(X86Movlps VR128:$src1,
-                      (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
-            (VMOVLPSrm VR128:$src1, addr:$src2)>;
 
   // Shuffle with VMOVLPD
   def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
             (VMOVLPDrm VR128:$src1, addr:$src2)>;
   def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
             (VMOVLPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Movlpd VR128:$src1,
-                              (scalar_to_vector (loadf64 addr:$src2)))),
-            (VMOVLPDrm VR128:$src1, addr:$src2)>;
 
   // Store patterns
   def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
@@ -1026,19 +1199,9 @@ let Predicates = [HasAVX] in {
 }
 
 let Predicates = [HasSSE1] in {
-  let AddedComplexity = 20 in {
-    // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
-    def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
-              (MOVLPSrm VR128:$src1, addr:$src2)>;
-    def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
-              (MOVLPSrm VR128:$src1, addr:$src2)>;
-  }
-
   // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
-  def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
-            (MOVLPSmr addr:$src1, VR128:$src2)>;
-  def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)),
-                                 VR128:$src2)), addr:$src1),
+  def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
+                                 (iPTR 0))), addr:$src1),
             (MOVLPSmr addr:$src1, VR128:$src2)>;
 
   // Shuffle with MOVLPS
@@ -1047,7 +1210,7 @@ let Predicates = [HasSSE1] in {
   def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
             (MOVLPSrm VR128:$src1, addr:$src2)>;
   def : Pat<(X86Movlps VR128:$src1,
-                      (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+                      (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
             (MOVLPSrm VR128:$src1, addr:$src2)>;
 
   // Store patterns
@@ -1061,28 +1224,11 @@ let Predicates = [HasSSE1] in {
 }
 
 let Predicates = [HasSSE2] in {
-  let AddedComplexity = 20 in {
-    // vector_shuffle v1, (load v2) <2, 1> using MOVLPS
-    def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
-              (MOVLPDrm VR128:$src1, addr:$src2)>;
-    def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
-              (MOVLPDrm VR128:$src1, addr:$src2)>;
-  }
-
-  // (store (vector_shuffle (load addr), v2, <2, 1>), addr) using MOVLPS
-  def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
-            (MOVLPDmr addr:$src1, VR128:$src2)>;
-  def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
-            (MOVLPDmr addr:$src1, VR128:$src2)>;
-
   // Shuffle with MOVLPD
   def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
             (MOVLPDrm VR128:$src1, addr:$src2)>;
   def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
             (MOVLPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Movlpd VR128:$src1,
-                              (scalar_to_vector (loadf64 addr:$src2)))),
-            (MOVLPDrm VR128:$src1, addr:$src2)>;
 
   // Store patterns
   def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
@@ -1098,12 +1244,14 @@ let Predicates = [HasSSE2] in {
 //===----------------------------------------------------------------------===//
 
 let AddedComplexity = 20 in {
-  defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
-                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+  defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
+                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     IIC_SSE_MOV_LH>, VEX_4V;
 }
 let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
-  defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
-                                   "\t{$src2, $dst|$dst, $src2}">;
+  defm MOVH : sse12_mov_hilo_packed<0x16, VR128, X86Movlhps, X86Movlhpd, "movhp",
+                                   "\t{$src2, $dst|$dst, $src2}",
+                                   IIC_SSE_MOV_LH>;
 }
 
 // v2f64 extract element 1 is always custom lowered to unpack high to low
@@ -1111,94 +1259,62 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
 def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract
-                                 (unpckh (bc_v2f64 (v4f32 VR128:$src)),
-                                         (undef)), (iPTR 0))), addr:$dst)]>,
-                   VEX;
+                                 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
+                                            (bc_v2f64 (v4f32 VR128:$src))),
+                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
 def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhpd\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract
-                                 (v2f64 (unpckh VR128:$src, (undef))),
-                                 (iPTR 0))), addr:$dst)]>,
-                   VEX;
+                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
+                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
 def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhps\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract
-                                 (unpckh (bc_v2f64 (v4f32 VR128:$src)),
-                                         (undef)), (iPTR 0))), addr:$dst)]>;
+                                 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
+                                            (bc_v2f64 (v4f32 VR128:$src))),
+                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
 def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhpd\t{$src, $dst|$dst, $src}",
                    [(store (f64 (vector_extract
-                                 (v2f64 (unpckh VR128:$src, (undef))),
-                                 (iPTR 0))), addr:$dst)]>;
+                                 (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
+                                 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
 
 let Predicates = [HasAVX] in {
   // VMOVHPS patterns
-  def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
-            (VMOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
   def : Pat<(X86Movlhps VR128:$src1,
-                 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+                 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
             (VMOVHPSrm VR128:$src1, addr:$src2)>;
   def : Pat<(X86Movlhps VR128:$src1,
                  (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
             (VMOVHPSrm VR128:$src1, addr:$src2)>;
 
-  // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
-  // is during lowering, where it's not possible to recognize the load fold cause
-  // it has two uses through a bitcast. One use disappears at isel time and the
-  // fold opportunity reappears.
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+  // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
+  // is during lowering, where it's not possible to recognize the load fold 
+  // cause it has two uses through a bitcast. One use disappears at isel time
+  // and the fold opportunity reappears.
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (scalar_to_vector (loadf64 addr:$src2)))),
             (VMOVHPDrm VR128:$src1, addr:$src2)>;
-
-  // FIXME: This should be matched by a X86Movhpd instead. Same as above
-  def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
-                      (scalar_to_vector (loadf64 addr:$src2)))),
-            (VMOVHPDrm VR128:$src1, addr:$src2)>;
-
-  // Store patterns
-  def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
-            (VMOVHPSmr addr:$dst, VR128:$src)>;
-  def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))), addr:$dst),
-            (VMOVHPDmr addr:$dst, VR128:$src)>;
 }
 
 let Predicates = [HasSSE1] in {
   // MOVHPS patterns
-  def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
-            (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
   def : Pat<(X86Movlhps VR128:$src1,
-                 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+                 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
             (MOVHPSrm VR128:$src1, addr:$src2)>;
   def : Pat<(X86Movlhps VR128:$src1,
                  (bc_v4f32 (v2i64 (X86vzload addr:$src2)))),
             (MOVHPSrm VR128:$src1, addr:$src2)>;
-
-  // Store patterns
-  def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))), addr:$dst),
-            (MOVHPSmr addr:$dst, VR128:$src)>;
 }
 
 let Predicates = [HasSSE2] in {
-  // FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
-  // is during lowering, where it's not possible to recognize the load fold cause
-  // it has two uses through a bitcast. One use disappears at isel time and the
-  // fold opportunity reappears.
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+  // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem
+  // is during lowering, where it's not possible to recognize the load fold 
+  // cause it has two uses through a bitcast. One use disappears at isel time
+  // and the fold opportunity reappears.
+  def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (scalar_to_vector (loadf64 addr:$src2)))),
             (MOVHPDrm VR128:$src1, addr:$src2)>;
-
-  // FIXME: This should be matched by a X86Movhpd instead. Same as above
-  def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
-                      (scalar_to_vector (loadf64 addr:$src2)))),
-            (MOVHPDrm VR128:$src1, addr:$src2)>;
-
-  // Store patterns
-  def : Pat<(store (f64 (vector_extract
-            (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
-            (MOVHPDmr addr:$dst, VR128:$src)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1210,13 +1326,15 @@ let AddedComplexity = 20 in {
                                        (ins VR128:$src1, VR128:$src2),
                       "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set VR128:$dst,
-                        (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
+                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
+                        IIC_SSE_MOV_LH>,
                       VEX_4V;
   def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
                                        (ins VR128:$src1, VR128:$src2),
                       "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                       [(set VR128:$dst,
-                        (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
+                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
+                        IIC_SSE_MOV_LH>,
                       VEX_4V;
 }
 let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
@@ -1224,86 +1342,36 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
                                        (ins VR128:$src1, VR128:$src2),
                       "movlhps\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst,
-                        (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
+                        (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
+                        IIC_SSE_MOV_LH>;
   def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
                                        (ins VR128:$src1, VR128:$src2),
                       "movhlps\t{$src2, $dst|$dst, $src2}",
                       [(set VR128:$dst,
-                        (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+                        (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))],
+                        IIC_SSE_MOV_LH>;
 }
 
 let Predicates = [HasAVX] in {
   // MOVLHPS patterns
-  let AddedComplexity = 20 in {
-    def : Pat<(v4f32 (movddup VR128:$src, (undef))),
-              (VMOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
-    def : Pat<(v2i64 (movddup VR128:$src, (undef))),
-              (VMOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
-
-    // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
-    def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
-              (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
-  }
-  def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
-            (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
   def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
             (VMOVLHPSrr VR128:$src1, VR128:$src2)>;
   def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
             (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
 
   // MOVHLPS patterns
-  let AddedComplexity = 20 in {
-    // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
-    def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
-              (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
-
-    // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
-    def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
-              (VMOVHLPSrr VR128:$src1, VR128:$src1)>;
-    def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
-              (VMOVHLPSrr VR128:$src1, VR128:$src1)>;
-  }
-
-  def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
-            (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
   def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
             (VMOVHLPSrr VR128:$src1, VR128:$src2)>;
 }
 
 let Predicates = [HasSSE1] in {
   // MOVLHPS patterns
-  let AddedComplexity = 20 in {
-    def : Pat<(v4f32 (movddup VR128:$src, (undef))),
-              (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
-    def : Pat<(v2i64 (movddup VR128:$src, (undef))),
-              (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
-
-    // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
-    def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
-              (MOVLHPSrr VR128:$src1, VR128:$src2)>;
-  }
-  def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
-            (MOVLHPSrr VR128:$src1, VR128:$src2)>;
   def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
             (MOVLHPSrr VR128:$src1, VR128:$src2)>;
   def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
             (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
 
   // MOVHLPS patterns
-  let AddedComplexity = 20 in {
-    // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
-    def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
-              (MOVHLPSrr VR128:$src1, VR128:$src2)>;
-
-    // vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
-    def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
-              (MOVHLPSrr VR128:$src1, VR128:$src1)>;
-    def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
-              (MOVHLPSrr VR128:$src1, VR128:$src1)>;
-  }
-
-  def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
-            (MOVHLPSrr VR128:$src1, VR128:$src2)>;
   def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
             (MOVHLPSrr VR128:$src1, VR128:$src2)>;
 }
@@ -1312,70 +1380,97 @@ let Predicates = [HasSSE1] in {
 // SSE 1 & 2 - Conversion Instructions
 //===----------------------------------------------------------------------===//
 
+def SSE_CVT_PD : OpndItins<
+  IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM
+>;
+
+def SSE_CVT_PS : OpndItins<
+  IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM
+>;
+
+def SSE_CVT_Scalar : OpndItins<
+  IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM
+>;
+
+def SSE_CVT_SS2SI_32 : OpndItins<
+  IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM
+>;
+
+def SSE_CVT_SS2SI_64 : OpndItins<
+  IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM
+>;
+
+def SSE_CVT_SD2SI : OpndItins<
+  IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM
+>;
+
 multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                      SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
-                     string asm> {
+                     string asm, OpndItins itins> {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-                        [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
+                        [(set DstRC:$dst, (OpNode SrcRC:$src))],
+                        itins.rr>;
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
-}
-
-multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                          X86MemOperand x86memop, string asm> {
-  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, []>;
-  let mayLoad = 1 in
-  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, []>;
+                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
+                        itins.rm>;
 }
 
 multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                          SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
-                         string asm, Domain d> {
+                         string asm, Domain d, OpndItins itins> {
   def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-                        [(set DstRC:$dst, (OpNode SrcRC:$src))], d>;
+                        [(set DstRC:$dst, (OpNode SrcRC:$src))],
+                        itins.rr, d>;
   def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>;
+                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))],
+                        itins.rm, d>;
 }
 
 multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                           X86MemOperand x86memop, string asm> {
+let neverHasSideEffects = 1 in {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
   let mayLoad = 1 in
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
               (ins DstRC:$src1, x86memop:$src),
               !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+} // neverHasSideEffects = 1
 }
 
 defm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
-                                "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
-                                VEX_LIG;
+                                "cvttss2si\t{$src, $dst|$dst, $src}",
+                                SSE_CVT_SS2SI_32>,
+                                XS, VEX, VEX_LIG;
 defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
-                                "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
-                                VEX_W, VEX_LIG;
+                                "cvttss2si\t{$src, $dst|$dst, $src}",
+                                SSE_CVT_SS2SI_64>,
+                                XS, VEX, VEX_W, VEX_LIG;
 defm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
-                                "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX,
-                                VEX_LIG;
+                                "cvttsd2si\t{$src, $dst|$dst, $src}",
+                                SSE_CVT_SD2SI>,
+                                XD, VEX, VEX_LIG;
 defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
-                                "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
-                                VEX, VEX_W, VEX_LIG;
+                                "cvttsd2si\t{$src, $dst|$dst, $src}",
+                                SSE_CVT_SD2SI>,
+                                XD, VEX, VEX_W, VEX_LIG;
 
 // The assembler can recognize rr 64-bit instructions by seeing a rxx
 // register, but the same isn't true when only using memory operands,
 // provide other assembly "l" and "q" forms to address this explicitly
 // where appropriate to do so.
-defm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
-                                  VEX_4V, VEX_LIG;
-defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
-                                  VEX_4V, VEX_W, VEX_LIG;
-defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
-                                  VEX_4V, VEX_LIG;
-defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
-                                  VEX_4V, VEX_LIG;
-defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
-                                  VEX_4V, VEX_W, VEX_LIG;
-
-let Predicates = [HasAVX] in {
+defm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">,
+                                  XS, VEX_4V, VEX_LIG;
+defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">,
+                                  XS, VEX_4V, VEX_W, VEX_LIG;
+defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">,
+                                  XD, VEX_4V, VEX_LIG;
+defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">,
+                                  XD, VEX_4V, VEX_LIG;
+defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
+                                  XD, VEX_4V, VEX_W, VEX_LIG;
+
+let Predicates = [HasAVX], AddedComplexity = 1 in {
   def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
             (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
   def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
@@ -1396,169 +1491,185 @@ let Predicates = [HasAVX] in {
 }
 
 defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
-                      "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+                      "cvttss2si\t{$src, $dst|$dst, $src}",
+                      SSE_CVT_SS2SI_32>, XS;
 defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
-                      "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+                      "cvttss2si{q}\t{$src, $dst|$dst, $src}",
+                      SSE_CVT_SS2SI_64>, XS, REX_W;
 defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
-                      "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+                      "cvttsd2si\t{$src, $dst|$dst, $src}",
+                      SSE_CVT_SD2SI>, XD;
 defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
-                      "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+                      "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
+                      SSE_CVT_SD2SI>, XD, REX_W;
 defm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
-                      "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+                      "cvtsi2ss\t{$src, $dst|$dst, $src}",
+                      SSE_CVT_Scalar>, XS;
 defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
-                      "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+                      "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
+                      SSE_CVT_Scalar>, XS, REX_W;
 defm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
-                      "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+                      "cvtsi2sd\t{$src, $dst|$dst, $src}",
+                      SSE_CVT_Scalar>, XD;
 defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
-                      "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+                      "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
+                      SSE_CVT_Scalar>, XD, REX_W;
 
 // Conversion Instructions Intrinsics - Match intrinsics which expect MM
 // and/or XMM operand(s).
 
 multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                          Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
-                         string asm> {
+                         string asm, OpndItins itins> {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (Int SrcRC:$src))]>;
+              [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>;
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+              [(set DstRC:$dst, (Int (ld_frag addr:$src)))], itins.rm>;
 }
 
 multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
                     RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
-                    PatFrag ld_frag, string asm, bit Is2Addr = 1> {
+                    PatFrag ld_frag, string asm, OpndItins itins,
+                    bit Is2Addr = 1> {
   def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
               !if(Is2Addr,
                   !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
                   !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-              [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+              [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))],
+              itins.rr>;
   def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
               (ins DstRC:$src1, x86memop:$src2),
               !if(Is2Addr,
                   !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
                   !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
+              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))],
+              itins.rm>;
 }
 
-defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
-                      f128mem, load, "cvtsd2si">, XD, VEX;
-defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
-                      int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
-                      XD, VEX, VEX_W;
-
-// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
-// Get rid of this hack or rename the intrinsics, there are several
-// intructions that only match with the intrinsic form, why create duplicates
-// to let them be recognized by the assembler?
-defm VCVTSD2SI     : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
-                      "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_LIG;
-defm VCVTSD2SI64   : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
-                      "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W,
-                      VEX_LIG;
+defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+                  f128mem, load, "cvtsd2si", SSE_CVT_SD2SI>, XD, VEX, VEX_LIG;
+defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+                  int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si",
+                  SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG;
 
 defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
-                f128mem, load, "cvtsd2si{l}">, XD;
+                f128mem, load, "cvtsd2si{l}", SSE_CVT_SD2SI>, XD;
 defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
-                  f128mem, load, "cvtsd2si{q}">, XD, REX_W;
+                  f128mem, load, "cvtsd2si{q}", SSE_CVT_SD2SI>, XD, REX_W;
 
 
 defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
-          int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+          int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss",
+          SSE_CVT_Scalar, 0>, XS, VEX_4V;
 defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
-          int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+          int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss",
+          SSE_CVT_Scalar, 0>, XS, VEX_4V,
           VEX_W;
 defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
-          int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+          int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd",
+          SSE_CVT_Scalar, 0>, XD, VEX_4V;
 defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
-          int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+          int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd",
+          SSE_CVT_Scalar, 0>, XD,
           VEX_4V, VEX_W;
 
 let Constraints = "$src1 = $dst" in {
   defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
                         int_x86_sse_cvtsi2ss, i32mem, loadi32,
-                        "cvtsi2ss">, XS;
+                        "cvtsi2ss", SSE_CVT_Scalar>, XS;
   defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
                         int_x86_sse_cvtsi642ss, i64mem, loadi64,
-                        "cvtsi2ss{q}">, XS, REX_W;
+                        "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W;
   defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
                         int_x86_sse2_cvtsi2sd, i32mem, loadi32,
-                        "cvtsi2sd">, XD;
+                        "cvtsi2sd", SSE_CVT_Scalar>, XD;
   defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
                         int_x86_sse2_cvtsi642sd, i64mem, loadi64,
-                        "cvtsi2sd">, XD, REX_W;
+                        "cvtsi2sd", SSE_CVT_Scalar>, XD, REX_W;
 }
 
 /// SSE 1 Only
 
 // Aliases for intrinsics
 defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
-                                    f32mem, load, "cvttss2si">, XS, VEX;
+                                    f32mem, load, "cvttss2si",
+                                    SSE_CVT_SS2SI_32>, XS, VEX;
 defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                     int_x86_sse_cvttss2si64, f32mem, load,
-                                    "cvttss2si">, XS, VEX, VEX_W;
+                                    "cvttss2si", SSE_CVT_SS2SI_64>,
+                                    XS, VEX, VEX_W;
 defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
-                                    f128mem, load, "cvttsd2si">, XD, VEX;
+                                    f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
+                                    XD, VEX;
 defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                     int_x86_sse2_cvttsd2si64, f128mem, load,
-                                    "cvttsd2si">, XD, VEX, VEX_W;
+                                    "cvttsd2si", SSE_CVT_SD2SI>,
+                                    XD, VEX, VEX_W;
 defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
-                                    f32mem, load, "cvttss2si">, XS;
+                                    f32mem, load, "cvttss2si",
+                                    SSE_CVT_SS2SI_32>, XS;
 defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                     int_x86_sse_cvttss2si64, f32mem, load,
-                                    "cvttss2si{q}">, XS, REX_W;
+                                    "cvttss2si{q}", SSE_CVT_SS2SI_64>,
+                                    XS, REX_W;
 defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
-                                    f128mem, load, "cvttsd2si">, XD;
+                                    f128mem, load, "cvttsd2si", SSE_CVT_SD2SI>,
+                                    XD;
 defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
                                     int_x86_sse2_cvttsd2si64, f128mem, load,
-                                    "cvttsd2si{q}">, XD, REX_W;
+                                    "cvttsd2si{q}", SSE_CVT_SD2SI>,
+                                    XD, REX_W;
 
 let Pattern = []<dag> in {
 defm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
-                               "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS,
-                               VEX, VEX_LIG;
+                               "cvtss2si{l}\t{$src, $dst|$dst, $src}",
+                               SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG;
 defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
-                               "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
-                               VEX_W, VEX_LIG;
+                               "cvtss2si\t{$src, $dst|$dst, $src}",
+                               SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG;
 defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
                                "cvtdq2ps\t{$src, $dst|$dst, $src}",
-                               SSEPackedSingle>, TB, VEX;
+                               SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
 defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
                                "cvtdq2ps\t{$src, $dst|$dst, $src}",
-                               SSEPackedSingle>, TB, VEX;
+                               SSEPackedSingle, SSE_CVT_PS>, TB, VEX;
 }
 
 let Pattern = []<dag> in {
 defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
-                          "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
+                          "cvtss2si{l}\t{$src, $dst|$dst, $src}",
+                          SSE_CVT_SS2SI_32>, XS;
 defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
-                          "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+                          "cvtss2si{q}\t{$src, $dst|$dst, $src}",
+                          SSE_CVT_SS2SI_64>, XS, REX_W;
 defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
                             "cvtdq2ps\t{$src, $dst|$dst, $src}",
-                            SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
+                            SSEPackedSingle, SSE_CVT_PS>,
+                            TB; /* PD SSE3 form is avaiable */
 }
 
-let Predicates = [HasSSE1] in {
+let Predicates = [HasAVX] in {
   def : Pat<(int_x86_sse_cvtss2si VR128:$src),
-            (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+            (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
   def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
-            (CVTSS2SIrm addr:$src)>;
+            (VCVTSS2SIrm addr:$src)>;
   def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
-            (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+            (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
   def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
-            (CVTSS2SI64rm addr:$src)>;
+            (VCVTSS2SI64rm addr:$src)>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasSSE1] in {
   def : Pat<(int_x86_sse_cvtss2si VR128:$src),
-            (VCVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+            (CVTSS2SIrr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
   def : Pat<(int_x86_sse_cvtss2si (load addr:$src)),
-            (VCVTSS2SIrm addr:$src)>;
+            (CVTSS2SIrm addr:$src)>;
   def : Pat<(int_x86_sse_cvtss2si64 VR128:$src),
-            (VCVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+            (CVTSS2SI64rr (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
   def : Pat<(int_x86_sse_cvtss2si64 (load addr:$src)),
-            (VCVTSS2SI64rm addr:$src)>;
+            (CVTSS2SI64rm addr:$src)>;
 }
 
 /// SSE 2 Only
@@ -1566,43 +1677,51 @@ let Predicates = [HasAVX] in {
 // Convert scalar double to scalar single
 def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
                        (ins FR64:$src1, FR64:$src2),
-                      "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                      VEX_4V, VEX_LIG;
+                      "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
+                      IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG;
 let mayLoad = 1 in
 def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
                        (ins FR64:$src1, f64mem:$src2),
                       "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
+                      [], IIC_SSE_CVT_Scalar_RM>,
+                      XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG;
 
 def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
           Requires<[HasAVX]>;
 
 def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
-                      [(set FR32:$dst, (fround FR64:$src))]>;
+                      [(set FR32:$dst, (fround FR64:$src))],
+                      IIC_SSE_CVT_Scalar_RR>;
 def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
-                      [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
+                      [(set FR32:$dst, (fround (loadf64 addr:$src)))],
+                      IIC_SSE_CVT_Scalar_RM>,
+                      XD,
                   Requires<[HasSSE2, OptForSize]>;
 
 defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
-                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
+                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
+                      SSE_CVT_Scalar, 0>,
                       XS, VEX_4V;
 let Constraints = "$src1 = $dst" in
 defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
-                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
+                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss",
+                      SSE_CVT_Scalar>, XS;
 
 // Convert scalar single to scalar double
 // SSE2 instructions with XS prefix
 def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
                     (ins FR32:$src1, FR32:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    []>, XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
+                    [], IIC_SSE_CVT_Scalar_RR>,
+                    XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG;
 let mayLoad = 1 in
 def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
                     (ins FR32:$src1, f32mem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    []>, XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
+                    [], IIC_SSE_CVT_Scalar_RM>,
+                    XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>;
 
 let Predicates = [HasAVX] in {
   def : Pat<(f64 (fextend FR32:$src)),
@@ -1619,11 +1738,13 @@ def : Pat<(extloadf32 addr:$src),
 
 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
-                   [(set FR64:$dst, (fextend FR32:$src))]>, XS,
+                   [(set FR64:$dst, (fextend FR32:$src))],
+                   IIC_SSE_CVT_Scalar_RR>, XS,
                  Requires<[HasSSE2]>;
 def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
-                   [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
+                   [(set FR64:$dst, (extloadf32 addr:$src))],
+                   IIC_SSE_CVT_Scalar_RM>, XS,
                  Requires<[HasSSE2, OptForSize]>;
 
 // extload f32 -> f64.  This matches load+fextend because we have a hack in
@@ -1640,26 +1761,30 @@ def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
-                                       VR128:$src2))]>, XS, VEX_4V,
+                                       VR128:$src2))],
+                                       IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V,
                     Requires<[HasAVX]>;
 def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
                     "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
-                                       (load addr:$src2)))]>, XS, VEX_4V,
+                                       (load addr:$src2)))],
+                                       IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V,
                     Requires<[HasAVX]>;
 let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
 def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
                       (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
                     "cvtss2sd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
-                                       VR128:$src2))]>, XS,
+                                       VR128:$src2))],
+                                       IIC_SSE_CVT_Scalar_RR>, XS,
                     Requires<[HasSSE2]>;
 def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
                       (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
                     "cvtss2sd\t{$src2, $dst|$dst, $src2}",
                     [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
-                                       (load addr:$src2)))]>, XS,
+                                       (load addr:$src2)))],
+                                       IIC_SSE_CVT_Scalar_RM>, XS,
                     Requires<[HasSSE2]>;
 }
 
@@ -1667,216 +1792,275 @@ def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
 // SSE2 instructions without OpSize prefix
 def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtdq2ps\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
+                       IIC_SSE_CVT_PS_RR>,
                      TB, VEX, Requires<[HasAVX]>;
 def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                       "vcvtdq2ps\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
-                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                                        (bitconvert (memopv2i64 addr:$src))))],
+                                        IIC_SSE_CVT_PS_RM>,
                      TB, VEX, Requires<[HasAVX]>;
 def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2ps\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))],
+                       IIC_SSE_CVT_PS_RR>,
                      TB, Requires<[HasSSE2]>;
 def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                       "cvtdq2ps\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
-                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                                        (bitconvert (memopv2i64 addr:$src))))],
+                                        IIC_SSE_CVT_PS_RM>,
                      TB, Requires<[HasSSE2]>;
 
 // FIXME: why the non-intrinsic version is described as SSE3?
 // SSE2 instructions with XS prefix
 def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtdq2pd\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
+                       IIC_SSE_CVT_PD_RR>,
                      XS, VEX, Requires<[HasAVX]>;
 def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                        "vcvtdq2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
-                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                                        (bitconvert (memopv2i64 addr:$src))))],
+                                        IIC_SSE_CVT_PD_RM>,
                      XS, VEX, Requires<[HasAVX]>;
 def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtdq2pd\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))],
+                       IIC_SSE_CVT_PD_RR>,
                      XS, Requires<[HasSSE2]>;
 def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "cvtdq2pd\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
-                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                                        (bitconvert (memopv2i64 addr:$src))))],
+                                        IIC_SSE_CVT_PD_RM>,
                      XS, Requires<[HasSSE2]>;
 
 
 // Convert packed single/double fp to doubleword
 def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_CVT_PS_RR>, VEX;
 def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_CVT_PS_RM>, VEX;
 def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                        "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                        IIC_SSE_CVT_PS_RR>, VEX;
 def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
-                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                        "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                        IIC_SSE_CVT_PS_RM>, VEX;
 def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                     IIC_SSE_CVT_PS_RR>;
 def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", [],
+                     IIC_SSE_CVT_PS_RM>;
 
 def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
+                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
+                        IIC_SSE_CVT_PS_RR>,
                         VEX;
 def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
                          (ins f128mem:$src),
                          "cvtps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtps2dq
-                                            (memop addr:$src)))]>, VEX;
+                                            (memop addr:$src)))],
+                                            IIC_SSE_CVT_PS_RM>, VEX;
 def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvtps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
+                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))],
+                        IIC_SSE_CVT_PS_RR>;
 def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          "cvtps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtps2dq
-                                            (memop addr:$src)))]>;
+                                            (memop addr:$src)))],
+                                            IIC_SSE_CVT_PS_RM>;
 
 // SSE2 packed instructions with XD prefix
 def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtpd2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
+                       IIC_SSE_CVT_PD_RR>,
                      XD, VEX, Requires<[HasAVX]>;
 def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "vcvtpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
-                                          (memop addr:$src)))]>,
+                                          (memop addr:$src)))],
+                                          IIC_SSE_CVT_PD_RM>,
                      XD, VEX, Requires<[HasAVX]>;
 def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtpd2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
+                       IIC_SSE_CVT_PD_RR>,
                      XD, Requires<[HasSSE2]>;
 def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvtpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
-                                          (memop addr:$src)))]>,
+                                          (memop addr:$src)))],
+                                          IIC_SSE_CVT_PD_RM>,
                      XD, Requires<[HasSSE2]>;
 
 
 // Convert with truncation packed single/double fp to doubleword
 // SSE2 packed instructions with XS prefix
 def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-let mayLoad = 1 in
+                        "cvttps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                          (int_x86_sse2_cvttps2dq VR128:$src))],
+                          IIC_SSE_CVT_PS_RR>, VEX;
 def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                        "cvttps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+                                           (memop addr:$src)))],
+                                           IIC_SSE_CVT_PS_RM>, VEX;
 def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-let mayLoad = 1 in
+                         "cvttps2dq\t{$src, $dst|$dst, $src}",
+                         [(set VR256:$dst,
+                           (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
+                           IIC_SSE_CVT_PS_RR>, VEX;
 def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
-                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                         "cvttps2dq\t{$src, $dst|$dst, $src}",
+                         [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
+                                            (memopv8f32 addr:$src)))],
+                                            IIC_SSE_CVT_PS_RM>, VEX;
+
 def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
-                            (int_x86_sse2_cvttps2dq VR128:$src))]>;
+                            (int_x86_sse2_cvttps2dq VR128:$src))],
+                            IIC_SSE_CVT_PS_RR>;
 def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                       "cvttps2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
-                            (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
-
-def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                        "vcvttps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                              (int_x86_sse2_cvttps2dq VR128:$src))]>,
-                      XS, VEX, Requires<[HasAVX]>;
-def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                        "vcvttps2dq\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq
-                                           (memop addr:$src)))]>,
-                      XS, VEX, Requires<[HasAVX]>;
-
-let Predicates = [HasSSE2] in {
-  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
-            (Int_CVTDQ2PSrr VR128:$src)>;
-  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
-            (CVTTPS2DQrr VR128:$src)>;
-}
+                            (int_x86_sse2_cvttps2dq (memop addr:$src)))],
+                            IIC_SSE_CVT_PS_RM>;
 
 let Predicates = [HasAVX] in {
   def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
             (Int_VCVTDQ2PSrr VR128:$src)>;
+  def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+            (Int_VCVTDQ2PSrm addr:$src)>;
+
   def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
             (VCVTTPS2DQrr VR128:$src)>;
+  def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
+            (VCVTTPS2DQrm addr:$src)>;
+
   def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
             (VCVTDQ2PSYrr VR256:$src)>;
+  def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (memopv4i64 addr:$src)))),
+            (VCVTDQ2PSYrm addr:$src)>;
+
   def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
             (VCVTTPS2DQYrr VR256:$src)>;
+  def : Pat<(v8i32 (fp_to_sint (memopv8f32 addr:$src))),
+            (VCVTTPS2DQYrm addr:$src)>;
+}
+
+let Predicates = [HasSSE2] in {
+  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+            (Int_CVTDQ2PSrr VR128:$src)>;
+  def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
+            (Int_CVTDQ2PSrm addr:$src)>;
+
+  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+            (CVTTPS2DQrr VR128:$src)>;
+  def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
+            (CVTTPS2DQrm addr:$src)>;
 }
 
 def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvttpd2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
-                              (int_x86_sse2_cvttpd2dq VR128:$src))]>, VEX;
+                              (int_x86_sse2_cvttpd2dq VR128:$src))],
+                              IIC_SSE_CVT_PD_RR>, VEX;
 let isCodeGenOnly = 1 in
 def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                         "cvttpd2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
-                                               (memop addr:$src)))]>, VEX;
+                                               (memop addr:$src)))],
+                                               IIC_SSE_CVT_PD_RM>, VEX;
 def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttpd2dq\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
+                      IIC_SSE_CVT_PD_RR>;
 def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
                       "cvttpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
-                                        (memop addr:$src)))]>;
+                                        (memop addr:$src)))],
+                                        IIC_SSE_CVT_PD_RM>;
 
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
 def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
-                          "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}", [],
+                          IIC_SSE_CVT_PD_RR>, VEX;
 
 // XMM only
 def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
+                         IIC_SSE_CVT_PD_RR>, VEX;
 def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", [],
+                         IIC_SSE_CVT_PD_RM>, VEX;
 
 // YMM only
 def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
-                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
+                         IIC_SSE_CVT_PD_RR>, VEX;
 def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
-                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", [],
+                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
 
 // Convert packed single to packed double
 let Predicates = [HasAVX] in {
                   // SSE2 instructions without OpSize prefix
 def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+                     IIC_SSE_CVT_PD_RR>, TB, VEX;
 def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+                     IIC_SSE_CVT_PD_RM>, TB, VEX;
 def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
-                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+                     IIC_SSE_CVT_PD_RR>, TB, VEX;
 def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
-                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, TB, VEX;
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", [],
+                     IIC_SSE_CVT_PD_RM>, TB, VEX;
 }
 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+                       "cvtps2pd\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_CVT_PD_RR>, TB;
 def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                       "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+                       "cvtps2pd\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_CVT_PD_RM>, TB;
 
 def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtps2pd\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
+                       IIC_SSE_CVT_PD_RR>,
                      TB, VEX, Requires<[HasAVX]>;
 def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                        "vcvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd
-                                          (load addr:$src)))]>,
+                                          (load addr:$src)))],
+                                          IIC_SSE_CVT_PD_RM>,
                      TB, VEX, Requires<[HasAVX]>;
 def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))],
+                       IIC_SSE_CVT_PD_RR>,
                      TB, Requires<[HasSSE2]>;
 def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                        "cvtps2pd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse2_cvtps2pd
-                                          (load addr:$src)))]>,
+                                          (load addr:$src)))],
+                                          IIC_SSE_CVT_PD_RM>,
                      TB, Requires<[HasSSE2]>;
 
 // Convert packed double to packed single
@@ -1884,49 +2068,61 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
 def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+                       "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_CVT_PD_RR>, VEX;
 def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
-                         "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+                         IIC_SSE_CVT_PD_RR>, VEX;
 
 // XMM only
 def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                        "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
+                        IIC_SSE_CVT_PD_RR>, VEX;
 def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                        "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}", [],
+                        IIC_SSE_CVT_PD_RM>, VEX;
 
 // YMM only
 def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
-                        "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
+                        "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
+                        IIC_SSE_CVT_PD_RR>, VEX;
 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
-                        "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+                        "cvtpd2psy\t{$src, $dst|$dst, $src}", [],
+                        IIC_SSE_CVT_PD_RM>, VEX, VEX_L;
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+                     "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+                     IIC_SSE_CVT_PD_RR>;
 def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                     "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+                     "cvtpd2ps\t{$src, $dst|$dst, $src}", [],
+                     IIC_SSE_CVT_PD_RM>;
 
 
 def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvtpd2ps\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
+                        IIC_SSE_CVT_PD_RR>;
 def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
                          (ins f128mem:$src),
                          "cvtpd2ps\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
-                                            (memop addr:$src)))]>;
+                                            (memop addr:$src)))],
+                                            IIC_SSE_CVT_PD_RM>;
 def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvtpd2ps\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
+                        IIC_SSE_CVT_PD_RR>;
 def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          "cvtpd2ps\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
-                                            (memop addr:$src)))]>;
+                                            (memop addr:$src)))],
+                                            IIC_SSE_CVT_PD_RM>;
 
 // AVX 256-bit register conversion intrinsics
 // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
 // whenever possible to avoid declaring two versions of each one.
 def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
           (VCVTDQ2PSYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (memopv4i64 addr:$src))),
           (VCVTDQ2PSYrm addr:$src)>;
 
 def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
@@ -1949,11 +2145,6 @@ def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
 def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
           (VCVTTPD2DQYrm addr:$src)>;
 
-def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
-          (VCVTTPS2DQYrr VR256:$src)>;
-def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
-          (VCVTTPS2DQYrm addr:$src)>;
-
 // Match fround and fextend for 128/256-bit conversions
 def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
           (VCVTPD2PSYrr VR256:$src)>;
@@ -1971,70 +2162,85 @@ def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
 
 // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
 multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
-                            SDNode OpNode, ValueType VT, PatFrag ld_frag,
-                            string asm, string asm_alt> {
+                            Operand CC, SDNode OpNode, ValueType VT, 
+                            PatFrag ld_frag, string asm, string asm_alt,
+                            OpndItins itins> {
   def rr : SIi8<0xC2, MRMSrcReg,
-                (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
-                [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>;
+                (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+                [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
+                itins.rr>;
   def rm : SIi8<0xC2, MRMSrcMem,
-                (outs RC:$dst), (ins RC:$src1, x86memop:$src2, SSECC:$cc), asm,
+                (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
                 [(set RC:$dst, (OpNode (VT RC:$src1),
-                                         (ld_frag addr:$src2), imm:$cc))]>;
+                                         (ld_frag addr:$src2), imm:$cc))],
+                                         itins.rm>;
 
   // Accept explicit immediate argument form instead of comparison code.
   let neverHasSideEffects = 1 in {
     def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst),
-                      (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, []>;
+                      (ins RC:$src1, RC:$src2, i8imm:$cc), asm_alt, [],
+                      IIC_SSE_ALU_F32S_RR>;
     let mayLoad = 1 in
     def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst),
-                      (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, []>;
+                      (ins RC:$src1, x86memop:$src2, i8imm:$cc), asm_alt, [],
+                      IIC_SSE_ALU_F32S_RM>;
   }
 }
 
-defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
+defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmpss, f32, loadf32,
                  "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+                 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+                 SSE_ALU_F32S>,
                  XS, VEX_4V, VEX_LIG;
-defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
+defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmpsd, f64, loadf64,
                  "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">,
+                 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
+                 SSE_ALU_F32S>, // same latency as 32 bit compare
                  XD, VEX_4V, VEX_LIG;
 
 let Constraints = "$src1 = $dst" in {
-  defm CMPSS : sse12_cmp_scalar<FR32, f32mem, X86cmpss, f32, loadf32,
+  defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmpss, f32, loadf32,
                   "cmp${cc}ss\t{$src2, $dst|$dst, $src2}",
-                  "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
+                  "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S>,
                   XS;
-  defm CMPSD : sse12_cmp_scalar<FR64, f64mem, X86cmpsd, f64, loadf64,
+  defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmpsd, f64, loadf64,
                   "cmp${cc}sd\t{$src2, $dst|$dst, $src2}",
-                  "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}">,
+                  "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
+                  SSE_ALU_F32S>, // same latency as 32 bit compare
                   XD;
 }
 
-multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
-                         Intrinsic Int, string asm> {
+multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
+                         Intrinsic Int, string asm, OpndItins itins> {
   def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
-                      (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+                      (ins VR128:$src1, VR128:$src, CC:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
-                                               VR128:$src, imm:$cc))]>;
+                                               VR128:$src, imm:$cc))],
+                                               itins.rr>;
   def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
-                      (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm,
+                      (ins VR128:$src1, x86memop:$src, CC:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
-                                               (load addr:$src), imm:$cc))]>;
+                                               (load addr:$src), imm:$cc))],
+                                               itins.rm>;
 }
 
 // Aliases to match intrinsics which expect XMM operand(s).
-defm Int_VCMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
-                     "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+defm Int_VCMPSS  : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss,
+                     "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+                     SSE_ALU_F32S>,
                      XS, VEX_4V;
-defm Int_VCMPSD  : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
-                     "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+defm Int_VCMPSD  : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd,
+                     "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+                     SSE_ALU_F32S>, // same latency as f32
                      XD, VEX_4V;
 let Constraints = "$src1 = $dst" in {
-  defm Int_CMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
-                       "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
-  defm Int_CMPSD  : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
-                       "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
+  defm Int_CMPSS  : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss,
+                       "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+                       SSE_ALU_F32S>, XS;
+  defm Int_CMPSD  : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
+                       "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+                       SSE_ALU_F32S>, // same latency as f32
+                       XD;
 }
 
 
@@ -2044,11 +2250,13 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
                             PatFrag ld_frag, string OpcodeStr, Domain d> {
   def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
-                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>;
+                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))],
+                     IIC_SSE_COMIS_RR, d>;
   def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
                      !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
                      [(set EFLAGS, (OpNode (vt RC:$src1),
-                                           (ld_frag addr:$src2)))], d>;
+                                           (ld_frag addr:$src2)))],
+                                           IIC_SSE_COMIS_RM, d>;
 }
 
 let Defs = [EFLAGS] in {
@@ -2098,89 +2306,91 @@ let Defs = [EFLAGS] in {
                                   "comisd", SSEPackedDouble>, TB, OpSize;
 } // Defs = [EFLAGS]
 
-// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+// sse12_cmp_packed - sse 1 & 2 compare packed instructions
 multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
-                            Intrinsic Int, string asm, string asm_alt,
-                            Domain d> {
-  let isAsmParserOnly = 1 in {
-    def rri : PIi8<0xC2, MRMSrcReg,
-               (outs RC:$dst), (ins RC:$src1, RC:$src2, SSECC:$cc), asm,
-               [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))], d>;
-    def rmi : PIi8<0xC2, MRMSrcMem,
-               (outs RC:$dst), (ins RC:$src1, f128mem:$src2, SSECC:$cc), asm,
-               [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))], d>;
-  }
+                            Operand CC, Intrinsic Int, string asm, 
+                            string asm_alt, Domain d> {
+  def rri : PIi8<0xC2, MRMSrcReg,
+             (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
+             [(set RC:$dst, (Int RC:$src1, RC:$src2, imm:$cc))],
+             IIC_SSE_CMPP_RR, d>;
+  def rmi : PIi8<0xC2, MRMSrcMem,
+             (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm,
+             [(set RC:$dst, (Int RC:$src1, (memop addr:$src2), imm:$cc))],
+             IIC_SSE_CMPP_RM, d>;
 
   // Accept explicit immediate argument form instead of comparison code.
-  def rri_alt : PIi8<0xC2, MRMSrcReg,
-             (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
-             asm_alt, [], d>;
-  def rmi_alt : PIi8<0xC2, MRMSrcMem,
-             (outs RC:$dst), (ins RC:$src1, f128mem:$src2, i8imm:$cc),
-             asm_alt, [], d>;
+  let neverHasSideEffects = 1 in {
+    def rri_alt : PIi8<0xC2, MRMSrcReg,
+               (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc),
+               asm_alt, [], IIC_SSE_CMPP_RR, d>;
+    def rmi_alt : PIi8<0xC2, MRMSrcMem,
+               (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc),
+               asm_alt, [], IIC_SSE_CMPP_RM, d>;
+  }
 }
 
-defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps,
                "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
                SSEPackedSingle>, TB, VEX_4V;
-defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd,
                "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
                SSEPackedDouble>, TB, OpSize, VEX_4V;
-defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256,
                "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
                SSEPackedSingle>, TB, VEX_4V;
-defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256,
                "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
                SSEPackedDouble>, TB, OpSize, VEX_4V;
 let Constraints = "$src1 = $dst" in {
-  defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+  defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps,
                  "cmp${cc}ps\t{$src2, $dst|$dst, $src2}",
                  "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
                  SSEPackedSingle>, TB;
-  defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+  defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd,
                  "cmp${cc}pd\t{$src2, $dst|$dst, $src2}",
                  "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}",
                  SSEPackedDouble>, TB, OpSize;
 }
 
-let Predicates = [HasSSE1] in {
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
-          (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
-          (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-}
-
-let Predicates = [HasSSE2] in {
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
-          (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
-          (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
-}
-
 let Predicates = [HasAVX] in {
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
           (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
           (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
           (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
           (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
 
-def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
+def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)),
           (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>;
-def : Pat<(v8i32 (X86cmpps (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (memop addr:$src2), imm:$cc)),
           (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>;
-def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
+def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)),
           (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>;
-def : Pat<(v4i64 (X86cmppd (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
+def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (memop addr:$src2), imm:$cc)),
           (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>;
 }
 
+let Predicates = [HasSSE1] in {
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+          (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+          (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+}
+
+let Predicates = [HasSSE2] in {
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+          (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+          (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Shuffle Instructions
 //===----------------------------------------------------------------------===//
@@ -2190,14 +2400,14 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
                          ValueType vt, string asm, PatFrag mem_frag,
                          Domain d, bit IsConvertibleToThreeAddress = 0> {
   def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
-                   (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm,
-                   [(set RC:$dst, (vt (shufp:$src3
-                            RC:$src1, (mem_frag addr:$src2))))], d>;
+                   (ins RC:$src1, x86memop:$src2, i8imm:$src3), asm,
+                   [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
+                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
   let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
     def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
                    (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
-                   [(set RC:$dst,
-                            (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
+                   [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
+                                       (i8 imm:$src3))))], IIC_SSE_SHUFP, d>;
 }
 
 defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2220,133 +2430,52 @@ let Constraints = "$src1 = $dst" in {
                     TB;
   defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
                     "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
-                    memopv2f64, SSEPackedDouble>, TB, OpSize;
-}
-
-let Predicates = [HasSSE1] in {
-  def : Pat<(v4f32 (X86Shufps VR128:$src1,
-                       (memopv4f32 addr:$src2), (i8 imm:$imm))),
-            (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
-  def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-            (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
-  def : Pat<(v4i32 (X86Shufps VR128:$src1,
-                       (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
-            (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
-  def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-            (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
-  // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
-  // fall back to this for SSE1)
-  def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
-            (SHUFPSrri VR128:$src2, VR128:$src1,
-                       (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  // Special unary SHUFPSrri case.
-  def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
-            (SHUFPSrri VR128:$src1, VR128:$src1,
-                       (SHUFFLE_get_shuf_imm VR128:$src3))>;
-}
-
-let Predicates = [HasSSE2] in {
-  // Special binary v4i32 shuffle cases with SHUFPS.
-  def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
-            (SHUFPSrri VR128:$src1, VR128:$src2,
-                       (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  def : Pat<(v4i32 (shufp:$src3 VR128:$src1,
-                                (bc_v4i32 (memopv2i64 addr:$src2)))),
-            (SHUFPSrmi VR128:$src1, addr:$src2,
-                      (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  // Special unary SHUFPDrri cases.
-  def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
-            (SHUFPDrri VR128:$src1, VR128:$src1,
-                       (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
-            (SHUFPDrri VR128:$src1, VR128:$src1,
-                       (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  // Special binary v2i64 shuffle cases using SHUFPDrri.
-  def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
-            (SHUFPDrri VR128:$src1, VR128:$src2,
-                       (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  // Generic SHUFPD patterns
-  def : Pat<(v2f64 (X86Shufps VR128:$src1,
-                       (memopv2f64 addr:$src2), (i8 imm:$imm))),
-            (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
-  def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-            (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
-  def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-            (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+                    memopv2f64, SSEPackedDouble, 1 /* cvt to pshufd */>,
+                    TB, OpSize;
 }
 
 let Predicates = [HasAVX] in {
-  def : Pat<(v4f32 (X86Shufps VR128:$src1,
-                       (memopv4f32 addr:$src2), (i8 imm:$imm))),
-            (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
-  def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-            (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
-  def : Pat<(v4i32 (X86Shufps VR128:$src1,
+  def : Pat<(v4i32 (X86Shufp VR128:$src1,
                        (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
             (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
-  def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+  def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
             (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
-  // vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
-  // fall back to this for SSE1)
-  def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
-            (VSHUFPSrri VR128:$src2, VR128:$src1,
-                        (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  // Special unary SHUFPSrri case.
-  def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
-            (VSHUFPSrri VR128:$src1, VR128:$src1,
-                        (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  // Special binary v4i32 shuffle cases with SHUFPS.
-  def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
-            (VSHUFPSrri VR128:$src1, VR128:$src2,
-                        (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  def : Pat<(v4i32 (shufp:$src3 VR128:$src1,
-                                (bc_v4i32 (memopv2i64 addr:$src2)))),
-            (VSHUFPSrmi VR128:$src1, addr:$src2,
-                        (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  // Special unary SHUFPDrri cases.
-  def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
-            (VSHUFPDrri VR128:$src1, VR128:$src1,
-                        (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
-            (VSHUFPDrri VR128:$src1, VR128:$src1,
-                        (SHUFFLE_get_shuf_imm VR128:$src3))>;
-  // Special binary v2i64 shuffle cases using SHUFPDrri.
-  def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
-            (VSHUFPDrri VR128:$src1, VR128:$src2,
-                        (SHUFFLE_get_shuf_imm VR128:$src3))>;
-
-  def : Pat<(v2f64 (X86Shufps VR128:$src1,
-                       (memopv2f64 addr:$src2), (i8 imm:$imm))),
+
+  def : Pat<(v2i64 (X86Shufp VR128:$src1,
+                       (memopv2i64 addr:$src2), (i8 imm:$imm))),
             (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
-  def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-            (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
-  def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+  def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
             (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
 
   // 256-bit patterns
-  def : Pat<(v8i32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+  def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
             (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
-  def : Pat<(v8i32 (X86Shufps VR256:$src1,
+  def : Pat<(v8i32 (X86Shufp VR256:$src1,
                       (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
             (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
 
-  def : Pat<(v8f32 (X86Shufps VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-            (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>;
-  def : Pat<(v8f32 (X86Shufps VR256:$src1,
-                              (memopv8f32 addr:$src2), (i8 imm:$imm))),
-            (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>;
-
-  def : Pat<(v4i64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+  def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))),
             (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
-  def : Pat<(v4i64 (X86Shufpd VR256:$src1,
+  def : Pat<(v4i64 (X86Shufp VR256:$src1,
                               (memopv4i64 addr:$src2), (i8 imm:$imm))),
             (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+}
 
-  def : Pat<(v4f64 (X86Shufpd VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-            (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>;
-  def : Pat<(v4f64 (X86Shufpd VR256:$src1,
-                              (memopv4f64 addr:$src2), (i8 imm:$imm))),
-            (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>;
+let Predicates = [HasSSE1] in {
+  def : Pat<(v4i32 (X86Shufp VR128:$src1,
+                       (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+            (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+  def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+            (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+}
+
+let Predicates = [HasSSE2] in {
+  // Generic SHUFPD patterns
+  def : Pat<(v2i64 (X86Shufp VR128:$src1,
+                       (memopv2i64 addr:$src2), (i8 imm:$imm))),
+            (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+  def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+            (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2354,159 +2483,80 @@ let Predicates = [HasAVX] in {
 //===----------------------------------------------------------------------===//
 
 /// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
-multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
+multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
                                    PatFrag mem_frag, RegisterClass RC,
                                    X86MemOperand x86memop, string asm,
                                    Domain d> {
     def rr : PI<opc, MRMSrcReg,
                 (outs RC:$dst), (ins RC:$src1, RC:$src2),
                 asm, [(set RC:$dst,
-                           (vt (OpNode RC:$src1, RC:$src2)))], d>;
+                           (vt (OpNode RC:$src1, RC:$src2)))],
+                           IIC_SSE_UNPCK, d>;
     def rm : PI<opc, MRMSrcMem,
                 (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
                 asm, [(set RC:$dst,
                            (vt (OpNode RC:$src1,
-                                       (mem_frag addr:$src2))))], d>;
-}
-
-let AddedComplexity = 10 in {
-  defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
-        VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       SSEPackedSingle>, TB, VEX_4V;
-  defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
-        VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       SSEPackedDouble>, TB, OpSize, VEX_4V;
-  defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
-        VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       SSEPackedSingle>, TB, VEX_4V;
-  defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
-        VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       SSEPackedDouble>, TB, OpSize, VEX_4V;
-
-  defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
-        VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       SSEPackedSingle>, TB, VEX_4V;
-  defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
-        VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       SSEPackedDouble>, TB, OpSize, VEX_4V;
-  defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
-        VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       SSEPackedSingle>, TB, VEX_4V;
-  defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
-        VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                       SSEPackedDouble>, TB, OpSize, VEX_4V;
-
-  let Constraints = "$src1 = $dst" in {
-    defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
-          VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
-                         SSEPackedSingle>, TB;
-    defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
-          VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
-                         SSEPackedDouble>, TB, OpSize;
-    defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
-          VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
-                         SSEPackedSingle>, TB;
-    defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
-          VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
-                         SSEPackedDouble>, TB, OpSize;
-  } // Constraints = "$src1 = $dst"
-} // AddedComplexity
+                                       (mem_frag addr:$src2))))],
+                                       IIC_SSE_UNPCK, d>;
+}
+
+defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+      VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     SSEPackedSingle>, TB, VEX_4V;
+defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+      VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+      VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     SSEPackedSingle>, TB, VEX_4V;
+defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+      VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     SSEPackedDouble>, TB, OpSize, VEX_4V;
+
+defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, memopv8f32,
+      VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     SSEPackedSingle>, TB, VEX_4V;
+defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, memopv4f64,
+      VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, memopv8f32,
+      VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     SSEPackedSingle>, TB, VEX_4V;
+defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, memopv4f64,
+      VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     SSEPackedDouble>, TB, OpSize, VEX_4V;
 
-let Predicates = [HasSSE1] in {
-  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
-            (UNPCKLPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
-            (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
-            (UNPCKHPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
-            (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
-}
+let Constraints = "$src1 = $dst" in {
+  defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
+        VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+                       SSEPackedSingle>, TB;
+  defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
+        VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+                       SSEPackedDouble>, TB, OpSize;
+  defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
+        VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+                       SSEPackedSingle>, TB;
+  defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64,
+        VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+                       SSEPackedDouble>, TB, OpSize;
+} // Constraints = "$src1 = $dst"
 
-let Predicates = [HasSSE2] in {
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
-            (UNPCKLPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
-            (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
-            (UNPCKHPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
-            (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
-
-  // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+let Predicates = [HasAVX], AddedComplexity = 1 in {
+  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
   // problem is during lowering, where it's not possible to recognize the load
   // fold cause it has two uses through a bitcast. One use disappears at isel
   // time and the fold opportunity reappears.
   def : Pat<(v2f64 (X86Movddup VR128:$src)),
-            (UNPCKLPDrr VR128:$src, VR128:$src)>;
-
-  let AddedComplexity = 10 in
-  def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
-            (UNPCKLPDrr VR128:$src, VR128:$src)>;
+            (VUNPCKLPDrr VR128:$src, VR128:$src)>;
 }
 
-let Predicates = [HasAVX] in {
-  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
-            (VUNPCKLPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
-            (VUNPCKLPSrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
-            (VUNPCKHPSrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
-            (VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
-
-  def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
-            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
-            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
-            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))),
-            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))),
-            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
-            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))),
-            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
-            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
-
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
-            (VUNPCKLPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
-            (VUNPCKLPDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
-            (VUNPCKHPDrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
-            (VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
-
-  def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
-            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
-            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))),
-            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
-            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
-            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
-            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
-  def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))),
-            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
-            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
-
-  // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the
+let Predicates = [HasSSE2] in {
+  // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
   // problem is during lowering, where it's not possible to recognize the load
   // fold cause it has two uses through a bitcast. One use disappears at isel
   // time and the fold opportunity reappears.
   def : Pat<(v2f64 (X86Movddup VR128:$src)),
-            (VUNPCKLPDrr VR128:$src, VR128:$src)>;
-  let AddedComplexity = 10 in
-  def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
-            (VUNPCKLPDrr VR128:$src, VR128:$src)>;
+            (UNPCKLPDrr VR128:$src, VR128:$src)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2518,29 +2568,12 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
                                 Domain d> {
   def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
                 !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-                     [(set GR32:$dst, (Int RC:$src))], d>;
+                     [(set GR32:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>;
   def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
-                !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
+                !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [],
+                IIC_SSE_MOVMSK, d>, REX_W;
 }
 
-defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
-                                     SSEPackedSingle>, TB;
-defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
-                                     SSEPackedDouble>, TB, OpSize;
-
-def : Pat<(i32 (X86fgetsign FR32:$src)),
-          (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
-                                       sub_ss))>, Requires<[HasSSE1]>;
-def : Pat<(i64 (X86fgetsign FR32:$src)),
-          (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
-                                       sub_ss))>, Requires<[HasSSE1]>;
-def : Pat<(i32 (X86fgetsign FR64:$src)),
-          (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
-                                       sub_sd))>, Requires<[HasSSE2]>;
-def : Pat<(i64 (X86fgetsign FR64:$src)),
-          (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
-                                       sub_sd))>, Requires<[HasSSE2]>;
-
 let Predicates = [HasAVX] in {
   defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
                                         "movmskps", SSEPackedSingle>, TB, VEX;
@@ -2568,17 +2601,105 @@ let Predicates = [HasAVX] in {
 
   // Assembler Only
   def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
-             "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX;
+             "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+             SSEPackedSingle>, TB, VEX;
   def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
-             "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB,
+             "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+             SSEPackedDouble>, TB,
              OpSize, VEX;
   def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
-             "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, TB, VEX;
+             "movmskps\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+             SSEPackedSingle>, TB, VEX;
   def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
-             "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, TB,
+             "movmskpd\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK,
+             SSEPackedDouble>, TB,
              OpSize, VEX;
 }
 
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+                                     SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+                                     SSEPackedDouble>, TB, OpSize;
+
+def : Pat<(i32 (X86fgetsign FR32:$src)),
+          (MOVMSKPSrr32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+                                       sub_ss))>, Requires<[HasSSE1]>;
+def : Pat<(i64 (X86fgetsign FR32:$src)),
+          (MOVMSKPSrr64 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src,
+                                       sub_ss))>, Requires<[HasSSE1]>;
+def : Pat<(i32 (X86fgetsign FR64:$src)),
+          (MOVMSKPDrr32 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+                                       sub_sd))>, Requires<[HasSSE2]>;
+def : Pat<(i64 (X86fgetsign FR64:$src)),
+          (MOVMSKPDrr64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src,
+                                       sub_sd))>, Requires<[HasSSE2]>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+/// PDI_binop_rm - Simple SSE2 binary operator.
+multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                        ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+                        X86MemOperand x86memop,
+                        OpndItins itins,
+                        bit IsCommutable = 0,
+                        bit Is2Addr = 1> {
+  let isCommutable = IsCommutable in
+  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+       (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>;
+  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+       (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (OpVT (OpNode RC:$src1,
+                                     (bitconvert (memop_frag addr:$src2)))))],
+                                     itins.rm>;
+}
+} // ExeDomain = SSEPackedInt
+
+// These are ordered here for pattern ordering requirements with the fp versions
+
+let Predicates = [HasAVX] in {
+defm VPAND : PDI_binop_rm<0xDB, "vpand", and, v2i64, VR128, memopv2i64,
+                          i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPOR  : PDI_binop_rm<0xEB, "vpor" , or, v2i64, VR128, memopv2i64,
+                          i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm<0xEF, "vpxor", xor, v2i64, VR128, memopv2i64,
+                          i128mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPANDN : PDI_binop_rm<0xDF, "vpandn", X86andnp, v2i64, VR128, memopv2i64,
+                          i128mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PAND : PDI_binop_rm<0xDB, "pand", and, v2i64, VR128, memopv2i64,
+                         i128mem, SSE_BIT_ITINS_P, 1>;
+defm POR  : PDI_binop_rm<0xEB, "por" , or, v2i64, VR128, memopv2i64,
+                         i128mem, SSE_BIT_ITINS_P, 1>;
+defm PXOR : PDI_binop_rm<0xEF, "pxor", xor, v2i64, VR128, memopv2i64,
+                         i128mem, SSE_BIT_ITINS_P, 1>;
+defm PANDN : PDI_binop_rm<0xDF, "pandn", X86andnp, v2i64, VR128, memopv2i64,
+                          i128mem, SSE_BIT_ITINS_P, 0>;
+} // Constraints = "$src1 = $dst"
+
+let Predicates = [HasAVX2] in {
+defm VPANDY : PDI_binop_rm<0xDB, "vpand", and, v4i64, VR256, memopv4i64,
+                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPORY  : PDI_binop_rm<0xEB, "vpor", or, v4i64, VR256, memopv4i64,
+                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPXORY : PDI_binop_rm<0xEF, "vpxor", xor, v4i64, VR256, memopv4i64,
+                           i256mem, SSE_BIT_ITINS_P, 1, 0>, VEX_4V;
+defm VPANDNY : PDI_binop_rm<0xDF, "vpandn", X86andnp, v4i64, VR256, memopv4i64,
+                            i256mem, SSE_BIT_ITINS_P, 0, 0>, VEX_4V;
+}
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Logical Instructions
 //===----------------------------------------------------------------------===//
@@ -2586,31 +2707,39 @@ let Predicates = [HasAVX] in {
 /// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
 ///
 multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
-                                       SDNode OpNode> {
+                                       SDNode OpNode, OpndItins itins> {
   defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
-              FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, TB, VEX_4V;
+              FR32, f32, f128mem, memopfsf32, SSEPackedSingle, itins, 0>,
+              TB, VEX_4V;
 
   defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
-        FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, TB, OpSize, VEX_4V;
+        FR64, f64, f128mem, memopfsf64, SSEPackedDouble, itins, 0>,
+        TB, OpSize, VEX_4V;
 
   let Constraints = "$src1 = $dst" in {
     defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
-                f32, f128mem, memopfsf32, SSEPackedSingle>, TB;
+                f32, f128mem, memopfsf32, SSEPackedSingle, itins>,
+                TB;
 
     defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
-                f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize;
+                f64, f128mem, memopfsf64, SSEPackedDouble, itins>,
+                TB, OpSize;
   }
 }
 
 // Alias bitwise logical operations using SSE logical ops on packed FP values.
 let mayLoad = 0 in {
-  defm FsAND  : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
-  defm FsOR   : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
-  defm FsXOR  : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
+  defm FsAND  : sse12_fp_alias_pack_logical<0x54, "and", X86fand,
+                SSE_BIT_ITINS_P>;
+  defm FsOR   : sse12_fp_alias_pack_logical<0x56, "or", X86for,
+                SSE_BIT_ITINS_P>;
+  defm FsXOR  : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor,
+                SSE_BIT_ITINS_P>;
 }
 
 let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
-  defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>;
+  defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef,
+                SSE_BIT_ITINS_P>;
 
 /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
 ///
@@ -2623,7 +2752,7 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
   defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
        !strconcat(OpcodeStr, "ps"), f128mem, [],
        [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
-                                 (memopv2i64 addr:$src2)))], 0>, TB, VEX_4V;
+                                 (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V;
 
   defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
        !strconcat(OpcodeStr, "pd"), f128mem,
@@ -2697,118 +2826,145 @@ let isCommutable = 0 in
 /// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
 /// classes below
 multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                  SizeItins itins,
                                   bit Is2Addr = 1> {
   defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
-                            OpNode, FR32, f32mem, Is2Addr>, XS;
+                            OpNode, FR32, f32mem,
+                            itins.s, Is2Addr>, XS;
   defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
-                            OpNode, FR64, f64mem, Is2Addr>, XD;
+                            OpNode, FR64, f64mem,
+                            itins.d, Is2Addr>, XD;
 }
 
 multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                   SizeItins itins,
                                    bit Is2Addr = 1> {
   let mayLoad = 0 in {
   defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
-              v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB;
+              v4f32, f128mem, memopv4f32, SSEPackedSingle, itins.s, Is2Addr>,
+              TB;
   defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
-              v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize;
+              v2f64, f128mem, memopv2f64, SSEPackedDouble, itins.d, Is2Addr>,
+              TB, OpSize;
   }
 }
 
 multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
-                                    SDNode OpNode> {
+                                    SDNode OpNode,
+                                    SizeItins itins> {
   let mayLoad = 0 in {
     defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
-                v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB;
+                v8f32, f256mem, memopv8f32, SSEPackedSingle, itins.s, 0>,
+                TB;
     defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
-                v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize;
+                v4f64, f256mem, memopv4f64, SSEPackedDouble, itins.d, 0>,
+                TB, OpSize;
   }
 }
 
 multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+                                      SizeItins itins,
                                       bit Is2Addr = 1> {
   defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
-     !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
+     !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
+     itins.s, Is2Addr>, XS;
   defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
-     !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD;
+     !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
+     itins.d, Is2Addr>, XD;
 }
 
 multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
+                                      SizeItins itins,
                                       bit Is2Addr = 1> {
   defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
      !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
-                                              SSEPackedSingle, Is2Addr>, TB;
+                              SSEPackedSingle, itins.s, Is2Addr>,
+                              TB;
 
   defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
      !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
-                                      SSEPackedDouble, Is2Addr>, TB, OpSize;
+                              SSEPackedDouble, itins.d, Is2Addr>,
+                              TB, OpSize;
 }
 
-multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr,
+                                        SizeItins itins> {
   defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
      !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
-      SSEPackedSingle, 0>, TB;
+      SSEPackedSingle, itins.s, 0>, TB;
 
   defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
      !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
-      SSEPackedDouble, 0>, TB, OpSize;
+      SSEPackedDouble, itins.d, 0>, TB, OpSize;
 }
 
 // Binary Arithmetic instructions
-defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
-            basic_sse12_fp_binop_s_int<0x58, "add", 0>, VEX_4V, VEX_LIG;
-defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
-            basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
-defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
-            basic_sse12_fp_binop_s_int<0x59, "mul", 0>, VEX_4V, VEX_LIG;
-defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
-            basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S, 0>,
+            basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S, 0>,
+              VEX_4V, VEX_LIG;
+defm VADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P, 0>,
+            basic_sse12_fp_binop_p_y<0x58, "add", fadd, SSE_ALU_ITINS_P>,
+              VEX_4V;
+defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S, 0>,
+            basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S, 0>,
+              VEX_4V, VEX_LIG;
+defm VMUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P, 0>,
+            basic_sse12_fp_binop_p_y<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
+              VEX_4V;
 
 let isCommutable = 0 in {
-  defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
-              basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, VEX_4V, VEX_LIG;
-  defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
-              basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
-  defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
-              basic_sse12_fp_binop_s_int<0x5E, "div", 0>, VEX_4V, VEX_LIG;
-  defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
-              basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
-  defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
-              basic_sse12_fp_binop_s_int<0x5F, "max", 0>, VEX_4V, VEX_LIG;
-  defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
-              basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
-              basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
-              basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
-  defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
-              basic_sse12_fp_binop_s_int<0x5D, "min", 0>, VEX_4V, VEX_LIG;
-  defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
-              basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
-              basic_sse12_fp_binop_p_y_int<0x5D, "min">,
-              basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
+  defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S, 0>,
+              basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S, 0>,
+                VEX_4V, VEX_LIG;
+  defm VSUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P, 0>,
+              basic_sse12_fp_binop_p_y<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, VEX_4V;
+  defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S, 0>,
+              basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S, 0>,
+                VEX_4V, VEX_LIG;
+  defm VDIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_ALU_ITINS_P, 0>,
+              basic_sse12_fp_binop_p_y<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
+                VEX_4V;
+  defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S, 0>,
+              basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S, 0>,
+                VEX_4V, VEX_LIG;
+  defm VMAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P, 0>,
+              basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P, 0>,
+              basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
+              basic_sse12_fp_binop_p_y_int<0x5F, "max", SSE_ALU_ITINS_P>,
+                VEX_4V;
+  defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S, 0>,
+              basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S, 0>,
+                VEX_4V, VEX_LIG;
+  defm VMIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P, 0>,
+              basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P, 0>,
+              basic_sse12_fp_binop_p_y_int<0x5D, "min", SSE_ALU_ITINS_P>,
+              basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
+                VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
-  defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>,
-             basic_sse12_fp_binop_p<0x58, "add", fadd>,
-             basic_sse12_fp_binop_s_int<0x58, "add">;
-  defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>,
-             basic_sse12_fp_binop_p<0x59, "mul", fmul>,
-             basic_sse12_fp_binop_s_int<0x59, "mul">;
+  defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
+             basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
+             basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
+  defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
+             basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
+             basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
 
   let isCommutable = 0 in {
-    defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>,
-               basic_sse12_fp_binop_p<0x5C, "sub", fsub>,
-               basic_sse12_fp_binop_s_int<0x5C, "sub">;
-    defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>,
-               basic_sse12_fp_binop_p<0x5E, "div", fdiv>,
-               basic_sse12_fp_binop_s_int<0x5E, "div">;
-    defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>,
-               basic_sse12_fp_binop_p<0x5F, "max", X86fmax>,
-               basic_sse12_fp_binop_s_int<0x5F, "max">,
-               basic_sse12_fp_binop_p_int<0x5F, "max">;
-    defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>,
-               basic_sse12_fp_binop_p<0x5D, "min", X86fmin>,
-               basic_sse12_fp_binop_s_int<0x5D, "min">,
-               basic_sse12_fp_binop_p_int<0x5D, "min">;
+    defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
+               basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
+               basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
+    defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
+               basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
+               basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
+    defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
+               basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
+               basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>,
+               basic_sse12_fp_binop_p_int<0x5F, "max", SSE_ALU_ITINS_P>;
+    defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
+               basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
+               basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>,
+               basic_sse12_fp_binop_p_int<0x5D, "min", SSE_ALU_ITINS_P>;
   }
 }
 
@@ -2820,9 +2976,25 @@ let Constraints = "$src1 = $dst" in {
 ///
 /// And, we have a special variant form for a full-vector intrinsic form.
 
+def SSE_SQRTP : OpndItins<
+  IIC_SSE_SQRTP_RR, IIC_SSE_SQRTP_RM
+>;
+
+def SSE_SQRTS : OpndItins<
+  IIC_SSE_SQRTS_RR, IIC_SSE_SQRTS_RM
+>;
+
+def SSE_RCPP : OpndItins<
+  IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM
+>;
+
+def SSE_RCPS : OpndItins<
+  IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM
+>;
+
 /// sse1_fp_unop_s - SSE1 unops in scalar form.
 multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
-                          SDNode OpNode, Intrinsic F32Int> {
+                          SDNode OpNode, Intrinsic F32Int, OpndItins itins> {
   def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                 [(set FR32:$dst, (OpNode FR32:$src))]>;
@@ -2832,14 +3004,14 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
   // partial register update condition.
   def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+                [(set FR32:$dst, (OpNode (load addr:$src)))], itins.rm>, XS,
             Requires<[HasSSE1, OptForSize]>;
   def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F32Int VR128:$src))]>;
+                    [(set VR128:$dst, (F32Int VR128:$src))], itins.rr>;
   def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
                     !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+                    [(set VR128:$dst, (F32Int sse_load_f32:$src))], itins.rm>;
 }
 
 /// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
@@ -2852,80 +3024,91 @@ multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
                 !strconcat(OpcodeStr,
                            "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
   def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
-                (ins ssmem:$src1, VR128:$src2),
+                (ins VR128:$src1, ssmem:$src2),
                 !strconcat(OpcodeStr,
                            "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
 }
 
 /// sse1_fp_unop_p - SSE1 unops in packed form.
-multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                          OpndItins itins> {
   def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
               !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-              [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
+              [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>;
   def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>;
 }
 
 /// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
-multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            OpndItins itins> {
   def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
               !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-              [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>;
+              [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))],
+              itins.rr>;
   def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>;
+                [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))],
+                itins.rm>;
 }
 
 /// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
 multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
-                              Intrinsic V4F32Int> {
+                              Intrinsic V4F32Int, OpndItins itins> {
   def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V4F32Int VR128:$src))]>;
+                    [(set VR128:$dst, (V4F32Int VR128:$src))],
+                    itins.rr>;
   def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                     !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+                    [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))],
+                    itins.rm>;
 }
 
 /// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
 multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
-                                Intrinsic V4F32Int> {
+                                Intrinsic V4F32Int, OpndItins itins> {
   def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+                    [(set VR256:$dst, (V4F32Int VR256:$src))],
+                    itins.rr>;
   def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                     !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+                    [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))],
+                    itins.rm>;
 }
 
 /// sse2_fp_unop_s - SSE2 unops in scalar form.
 multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
-                          SDNode OpNode, Intrinsic F64Int> {
+                          SDNode OpNode, Intrinsic F64Int, OpndItins itins> {
   def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                [(set FR64:$dst, (OpNode FR64:$src))]>;
+                [(set FR64:$dst, (OpNode FR64:$src))], itins.rr>;
   // See the comments in sse1_fp_unop_s for why this is OptForSize.
   def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
                 !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+                [(set FR64:$dst, (OpNode (load addr:$src)))], itins.rm>, XD,
             Requires<[HasSSE2, OptForSize]>;
   def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int VR128:$src))]>;
+                    [(set VR128:$dst, (F64Int VR128:$src))], itins.rr>;
   def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
                     !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+                    [(set VR128:$dst, (F64Int sse_load_f64:$src))], itins.rm>;
 }
 
 /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
 multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
+  let neverHasSideEffects = 1 in {
   def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
                !strconcat(OpcodeStr,
                           "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  let mayLoad = 1 in
   def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1,f64mem:$src2),
                !strconcat(OpcodeStr,
                           "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  }
   def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
                (ins VR128:$src1, sdmem:$src2),
                !strconcat(OpcodeStr,
@@ -2934,45 +3117,52 @@ multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr> {
 
 /// sse2_fp_unop_p - SSE2 unops in vector forms.
 multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
-                          SDNode OpNode> {
+                          SDNode OpNode, OpndItins itins> {
   def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
               !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
+              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>;
   def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
+                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>;
 }
 
 /// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
-multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                          OpndItins itins> {
   def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
               !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-              [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>;
+              [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))],
+              itins.rr>;
   def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>;
+                [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))],
+                itins.rm>;
 }
 
 /// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
 multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
-                              Intrinsic V2F64Int> {
+                              Intrinsic V2F64Int, OpndItins itins> {
   def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V2F64Int VR128:$src))]>;
+                    [(set VR128:$dst, (V2F64Int VR128:$src))],
+                    itins.rr>;
   def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                     !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
+                    [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))],
+                    itins.rm>;
 }
 
 /// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
 multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
-                                Intrinsic V2F64Int> {
+                                Intrinsic V2F64Int, OpndItins itins> {
   def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+                    [(set VR256:$dst, (V2F64Int VR256:$src))],
+                    itins.rr>;
   def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                     !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+                    [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))],
+                    itins.rm>;
 }
 
 let Predicates = [HasAVX] in {
@@ -2980,31 +3170,40 @@ let Predicates = [HasAVX] in {
   defm VSQRT  : sse1_fp_unop_s_avx<0x51, "vsqrt">,
                 sse2_fp_unop_s_avx<0x51, "vsqrt">, VEX_4V, VEX_LIG;
 
-  defm VSQRT  : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
-                sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
-                sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
-                sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
-                sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
-                sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
-                sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
-                sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
+  defm VSQRT  : sse1_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
+                sse2_fp_unop_p<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
+                sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
+                sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt, SSE_SQRTP>,
+                sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps,
+                                   SSE_SQRTP>,
+                sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd,
+                                    SSE_SQRTP>,
+                sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256,
+                                    SSE_SQRTP>,
+                sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256,
+                                    SSE_SQRTP>,
                 VEX;
 
   // Reciprocal approximations. Note that these typically require refinement
   // in order to obtain suitable precision.
   defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt">, VEX_4V, VEX_LIG;
-  defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
-                sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
-                sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
-                sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
+  defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
+                sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt, SSE_SQRTP>,
+                sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256,
+                                    SSE_SQRTP>,
+                sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps,
+                                    SSE_SQRTP>, VEX;
 
   defm VRCP   : sse1_fp_unop_s_avx<0x53, "vrcp">, VEX_4V, VEX_LIG;
-  defm VRCP   : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
-                sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
-                sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
-                sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
+  defm VRCP   : sse1_fp_unop_p<0x53, "vrcp", X86frcp, SSE_RCPP>,
+                sse1_fp_unop_p_y<0x53, "vrcp", X86frcp, SSE_RCPP>,
+                sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256,
+                                    SSE_RCPP>,
+                sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps,
+                                    SSE_RCPP>, VEX;
 }
 
+let AddedComplexity = 1 in {
 def : Pat<(f32 (fsqrt FR32:$src)),
           (VSQRTSSr (f32 (IMPLICIT_DEF)), FR32:$src)>, Requires<[HasAVX]>;
 def : Pat<(f32 (fsqrt (load addr:$src))),
@@ -3027,8 +3226,9 @@ def : Pat<(f32 (X86frcp FR32:$src)),
 def : Pat<(f32 (X86frcp (load addr:$src))),
           (VRCPSSm (f32 (IMPLICIT_DEF)), addr:$src)>,
           Requires<[HasAVX, OptForSize]>;
+}
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX], AddedComplexity = 1 in {
   def : Pat<(int_x86_sse_sqrt_ss VR128:$src),
             (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)),
                 (VSQRTSSr (f32 (IMPLICIT_DEF)),
@@ -3063,21 +3263,26 @@ let Predicates = [HasAVX] in {
 }
 
 // Square root.
-defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss>,
-             sse1_fp_unop_p<0x51, "sqrt",  fsqrt>,
-             sse1_fp_unop_p_int<0x51, "sqrt",  int_x86_sse_sqrt_ps>,
-             sse2_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_sd>,
-             sse2_fp_unop_p<0x51, "sqrt",  fsqrt>,
-             sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>;
+defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss,
+                            SSE_SQRTS>,
+             sse1_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>,
+             sse1_fp_unop_p_int<0x51, "sqrt",  int_x86_sse_sqrt_ps, SSE_SQRTS>,
+             sse2_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_sd,
+                            SSE_SQRTS>,
+             sse2_fp_unop_p<0x51, "sqrt",  fsqrt, SSE_SQRTS>,
+             sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd, SSE_SQRTS>;
 
 // Reciprocal approximations. Note that these typically require refinement
 // in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
-             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>,
-             sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>;
-defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
-             sse1_fp_unop_p<0x53, "rcp", X86frcp>,
-             sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>;
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss,
+                            SSE_SQRTS>,
+             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_SQRTS>,
+             sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps,
+                            SSE_SQRTS>;
+defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss,
+                            SSE_RCPS>,
+             sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPS>,
+             sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, SSE_RCPS>;
 
 // There is no f64 version of the reciprocal approximation instructions.
 
@@ -3090,24 +3295,22 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
                        (ins f128mem:$dst, VR128:$src),
                        "movntps\t{$src, $dst|$dst, $src}",
                        [(alignednontemporalstore (v4f32 VR128:$src),
-                                                 addr:$dst)]>, VEX;
+                                                 addr:$dst)],
+                                                 IIC_SSE_MOVNT>, VEX;
   def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
                        (ins f128mem:$dst, VR128:$src),
                        "movntpd\t{$src, $dst|$dst, $src}",
                        [(alignednontemporalstore (v2f64 VR128:$src),
-                                                 addr:$dst)]>, VEX;
-  def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
-                        (ins f128mem:$dst, VR128:$src),
-                        "movntdq\t{$src, $dst|$dst, $src}",
-                        [(alignednontemporalstore (v2f64 VR128:$src),
-                                                  addr:$dst)]>, VEX;
+                                                 addr:$dst)],
+                                                 IIC_SSE_MOVNT>, VEX;
 
   let ExeDomain = SSEPackedInt in
   def VMOVNTDQmr    : VPDI<0xE7, MRMDestMem, (outs),
                            (ins f128mem:$dst, VR128:$src),
                            "movntdq\t{$src, $dst|$dst, $src}",
-                           [(alignednontemporalstore (v4f32 VR128:$src),
-                                                     addr:$dst)]>, VEX;
+                           [(alignednontemporalstore (v2i64 VR128:$src),
+                                                     addr:$dst)],
+                                                     IIC_SSE_MOVNT>, VEX;
 
   def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
             (VMOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasAVX]>;
@@ -3116,23 +3319,21 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
                        (ins f256mem:$dst, VR256:$src),
                        "movntps\t{$src, $dst|$dst, $src}",
                        [(alignednontemporalstore (v8f32 VR256:$src),
-                                                 addr:$dst)]>, VEX;
+                                                 addr:$dst)],
+                                                 IIC_SSE_MOVNT>, VEX;
   def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
                        (ins f256mem:$dst, VR256:$src),
                        "movntpd\t{$src, $dst|$dst, $src}",
                        [(alignednontemporalstore (v4f64 VR256:$src),
-                                                 addr:$dst)]>, VEX;
-  def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
-                        (ins f256mem:$dst, VR256:$src),
-                        "movntdq\t{$src, $dst|$dst, $src}",
-                        [(alignednontemporalstore (v4f64 VR256:$src),
-                                                  addr:$dst)]>, VEX;
+                                                 addr:$dst)],
+                                                 IIC_SSE_MOVNT>, VEX;
   let ExeDomain = SSEPackedInt in
   def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
                       (ins f256mem:$dst, VR256:$src),
                       "movntdq\t{$src, $dst|$dst, $src}",
-                      [(alignednontemporalstore (v8f32 VR256:$src),
-                                                addr:$dst)]>, VEX;
+                      [(alignednontemporalstore (v4i64 VR256:$src),
+                                                addr:$dst)],
+                                                IIC_SSE_MOVNT>, VEX;
 }
 
 def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
@@ -3145,19 +3346,18 @@ def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
 let AddedComplexity = 400 in { // Prefer non-temporal versions
 def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                     "movntps\t{$src, $dst|$dst, $src}",
-                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)],
+                    IIC_SSE_MOVNT>;
 def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                     "movntpd\t{$src, $dst|$dst, $src}",
-                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
-
-def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
-                    "movntdq\t{$src, $dst|$dst, $src}",
-                    [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)],
+                    IIC_SSE_MOVNT>;
 
 let ExeDomain = SSEPackedInt in
 def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
                     "movntdq\t{$src, $dst|$dst, $src}",
-                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+                    [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)],
+                    IIC_SSE_MOVNT>;
 
 def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
           (MOVNTDQmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
@@ -3165,11 +3365,13 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
 // There is no AVX form for instructions below this point
 def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
                  "movnti{l}\t{$src, $dst|$dst, $src}",
-                 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+                 [(nontemporalstore (i32 GR32:$src), addr:$dst)],
+                 IIC_SSE_MOVNT>,
                TB, Requires<[HasSSE2]>;
 def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
                      "movnti{q}\t{$src, $dst|$dst, $src}",
-                     [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+                     [(nontemporalstore (i64 GR64:$src), addr:$dst)],
+                     IIC_SSE_MOVNT>,
                   TB, Requires<[HasSSE2]>;
 }
 
@@ -3178,31 +3380,40 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
 //===----------------------------------------------------------------------===//
 
 // Prefetch intrinsic.
-def PREFETCHT0   : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
-    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
-def PREFETCHT1   : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
-    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
-def PREFETCHT2   : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
-    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
-def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
-    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
+let Predicates = [HasSSE1] in {
+def PREFETCHT0   : I<0x18, MRM1m, (outs), (ins i8mem:$src),
+    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
+    IIC_SSE_PREFETCH>, TB;
+def PREFETCHT1   : I<0x18, MRM2m, (outs), (ins i8mem:$src),
+    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))],
+    IIC_SSE_PREFETCH>, TB;
+def PREFETCHT2   : I<0x18, MRM3m, (outs), (ins i8mem:$src),
+    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))],
+    IIC_SSE_PREFETCH>, TB;
+def PREFETCHNTA  : I<0x18, MRM0m, (outs), (ins i8mem:$src),
+    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))],
+    IIC_SSE_PREFETCH>, TB;
+}
 
 // Flush cache
 def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
-               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
-              TB, Requires<[HasSSE2]>;
+               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)],
+               IIC_SSE_PREFETCH>, TB, Requires<[HasSSE2]>;
 
 // Pause. This "instruction" is encoded as "rep; nop", so even though it
 // was introduced with SSE2, it's backward compatible.
-def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", [], IIC_SSE_PAUSE>, REP;
 
 // Load, store, and memory fence
 def SFENCE : I<0xAE, MRM_F8, (outs), (ins),
-               "sfence", [(int_x86_sse_sfence)]>, TB, Requires<[HasSSE1]>;
+               "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>,
+               TB, Requires<[HasSSE1]>;
 def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
-               "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+               "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>,
+               TB, Requires<[HasSSE2]>;
 def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
-               "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+               "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>,
+               TB, Requires<[HasSSE2]>;
 
 def : Pat<(X86SFence), (SFENCE)>;
 def : Pat<(X86LFence), (LFENCE)>;
@@ -3213,14 +3424,18 @@ def : Pat<(X86MFence), (MFENCE)>;
 //===----------------------------------------------------------------------===//
 
 def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
-                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+                  IIC_SSE_LDMXCSR>, VEX;
 def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
-                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
+                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+                  IIC_SSE_STMXCSR>, VEX;
 
 def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
-                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)],
+                  IIC_SSE_LDMXCSR>;
 def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
-                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
+                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)],
+                  IIC_SSE_STMXCSR>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Move Aligned/Unaligned Packed Integer Instructions
@@ -3230,108 +3445,134 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
 
 let neverHasSideEffects = 1 in {
 def VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
+                    VEX;
 def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                    "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
+                    VEX;
 }
 def VMOVDQUrr  : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                    "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+                    "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
+                    VEX;
 def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                    "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+                    "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
+                    VEX;
 
 // For Disassembler
 let isCodeGenOnly = 1 in {
 def VMOVDQArr_REV  : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
-                        "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+                        "movdqa\t{$src, $dst|$dst, $src}", [],
+                        IIC_SSE_MOVA_P_RR>,
+                        VEX;
 def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
-                        "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+                        "movdqa\t{$src, $dst|$dst, $src}", [],
+                        IIC_SSE_MOVA_P_RR>,
+                        VEX;
 def VMOVDQUrr_REV  : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
-                        "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+                        "movdqu\t{$src, $dst|$dst, $src}", [],
+                        IIC_SSE_MOVU_P_RR>,
+                        VEX;
 def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
-                        "movdqu\t{$src, $dst|$dst, $src}", []>, VEX;
+                        "movdqu\t{$src, $dst|$dst, $src}", [],
+                        IIC_SSE_MOVU_P_RR>,
+                        VEX;
 }
 
 let canFoldAsLoad = 1, mayLoad = 1 in {
 def VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                   "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+                   "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
+                   VEX;
 def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
-                   "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+                   "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
+                   VEX;
 let Predicates = [HasAVX] in {
   def VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                    "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+                    "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
+                    XS, VEX;
   def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
-                    "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+                    "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>,
+                    XS, VEX;
 }
 }
 
 let mayStore = 1 in {
 def VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
                      (ins i128mem:$dst, VR128:$src),
-                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
+                     VEX;
 def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
                      (ins i256mem:$dst, VR256:$src),
-                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
+                     VEX;
 let Predicates = [HasAVX] in {
 def VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                  "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+                  "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
+                  XS, VEX;
 def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
-                  "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+                  "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>,
+                  XS, VEX;
 }
 }
 
 let neverHasSideEffects = 1 in
 def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                   "movdqa\t{$src, $dst|$dst, $src}", []>;
+                   "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
 
 def MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movdqu\t{$src, $dst|$dst, $src}",
-                   []>, XS, Requires<[HasSSE2]>;
+                   [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>;
 
 // For Disassembler
 let isCodeGenOnly = 1 in {
 def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
-                       "movdqa\t{$src, $dst|$dst, $src}", []>;
+                       "movdqa\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_MOVA_P_RR>;
 
 def MOVDQUrr_REV :   I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                        "movdqu\t{$src, $dst|$dst, $src}",
-                       []>, XS, Requires<[HasSSE2]>;
+                       [], IIC_SSE_MOVU_P_RR>, XS, Requires<[HasSSE2]>;
 }
 
 let canFoldAsLoad = 1, mayLoad = 1 in {
 def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqa\t{$src, $dst|$dst, $src}",
-                   [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
+                   [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
+                   IIC_SSE_MOVA_P_RM>;
 def MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqu\t{$src, $dst|$dst, $src}",
-                   [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+                   [/*(set VR128:$dst, (loadv2i64 addr:$src))*/],
+                   IIC_SSE_MOVU_P_RM>,
                  XS, Requires<[HasSSE2]>;
 }
 
 let mayStore = 1 in {
 def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                    "movdqa\t{$src, $dst|$dst, $src}",
-                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
+                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/],
+                   IIC_SSE_MOVA_P_MR>;
 def MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                    "movdqu\t{$src, $dst|$dst, $src}",
-                   [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+                   [/*(store (v2i64 VR128:$src), addr:$dst)*/],
+                   IIC_SSE_MOVU_P_MR>,
                  XS, Requires<[HasSSE2]>;
 }
 
 // Intrinsic forms of MOVDQU load and store
 def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                        "vmovdqu\t{$src, $dst|$dst, $src}",
-                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
+                       IIC_SSE_MOVU_P_MR>,
                      XS, VEX, Requires<[HasAVX]>;
 
 def MOVDQUmr_Int :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                        "movdqu\t{$src, $dst|$dst, $src}",
-                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
+                       IIC_SSE_MOVU_P_MR>,
                      XS, Requires<[HasSSE2]>;
 
 } // ExeDomain = SSEPackedInt
 
 let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
   def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
             (VMOVDQUYmr addr:$dst, VR256:$src)>;
 }
@@ -3340,178 +3581,326 @@ let Predicates = [HasAVX] in {
 // SSE2 - Packed Integer Arithmetic Instructions
 //===---------------------------------------------------------------------===//
 
+def SSE_PMADD : OpndItins<
+  IIC_SSE_PMADD, IIC_SSE_PMADD
+>;
+
 let ExeDomain = SSEPackedInt in { // SSE integer instructions
 
 multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
-                            bit IsCommutable = 0, bit Is2Addr = 1> {
+                            RegisterClass RC, PatFrag memop_frag,
+                            X86MemOperand x86memop,
+                            OpndItins itins,
+                            bit IsCommutable = 0,
+                            bit Is2Addr = 1> {
   let isCommutable = IsCommutable in
-  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-       (ins VR128:$src1, VR128:$src2),
+  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+       (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
-  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-       (ins VR128:$src1, i128mem:$src2),
+       [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>;
+  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+       (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (IntId VR128:$src1,
-                                (bitconvert (memopv2i64 addr:$src2))))]>;
-}
-
-multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
-                             string OpcodeStr, Intrinsic IntId,
-                             Intrinsic IntId2, bit Is2Addr = 1> {
-  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-       (ins VR128:$src1, VR128:$src2),
+       [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
+       itins.rm>;
+}
+
+multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
+                         string OpcodeStr, SDNode OpNode,
+                         SDNode OpNode2, RegisterClass RC,
+                         ValueType DstVT, ValueType SrcVT, PatFrag bc_frag,
+                         ShiftOpndItins itins,
+                         bit Is2Addr = 1> {
+  // src2 is always 128-bit
+  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+       (ins RC:$src1, VR128:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
-  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-       (ins VR128:$src1, i128mem:$src2),
-       !if(Is2Addr,
-           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (IntId VR128:$src1,
-                                      (bitconvert (memopv2i64 addr:$src2))))]>;
-  def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
-       (ins VR128:$src1, i32i8imm:$src2),
-       !if(Is2Addr,
-           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
-}
-
-/// PDI_binop_rm - Simple SSE2 binary operator.
-multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                        ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
-  let isCommutable = IsCommutable in
-  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-       (ins VR128:$src1, VR128:$src2),
+       [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
+        itins.rr>;
+  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+       (ins RC:$src1, i128mem:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
-  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-       (ins VR128:$src1, i128mem:$src2),
+       [(set RC:$dst, (DstVT (OpNode RC:$src1,
+                       (bc_frag (memopv2i64 addr:$src2)))))], itins.rm>;
+  def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
+       (ins RC:$src1, i32i8imm:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
-                                     (bitconvert (memopv2i64 addr:$src2)))))]>;
+       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i32 imm:$src2))))], itins.ri>;
 }
 
-/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
-///
-/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
-/// to collapse (bitconvert VT to VT) into its operand.
-///
-multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                              bit IsCommutable = 0, bit Is2Addr = 1> {
+/// PDI_binop_rm - Simple SSE2 binary operator with different src and dst types
+multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                         ValueType DstVT, ValueType SrcVT, RegisterClass RC,
+                         PatFrag memop_frag, X86MemOperand x86memop,
+                         OpndItins itins,
+                         bit IsCommutable = 0, bit Is2Addr = 1> {
   let isCommutable = IsCommutable in
-  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
-       (ins VR128:$src1, VR128:$src2),
+  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+       (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
-  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
-       (ins VR128:$src1, i128mem:$src2),
+       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>;
+  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+       (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
+       [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
+                                     (bitconvert (memop_frag addr:$src2)))))]>;
 }
-
 } // ExeDomain = SSEPackedInt
 
 // 128-bit Integer Arithmetic
 
 let Predicates = [HasAVX] in {
-defm VPADDB  : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
-defm VPADDW  : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
-defm VPADDD  : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
-defm VPADDQ  : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
-defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
-defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
-defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
-defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
-defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+defm VPADDB  : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64,
+                            i128mem, SSE_INTALU_ITINS_P, 1, 0 /*3addr*/>,
+                            VEX_4V;
+defm VPADDW  : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64,
+                            i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDD  : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64,
+                            i128mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDQ  : PDI_binop_rm<0xD4, "vpaddq", add, v2i64, VR128, memopv2i64,
+                            i128mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64,
+                            i128mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64,
+                            i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64,
+                            i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64,
+                            i128mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm<0xFB, "vpsubq", sub, v2i64, VR128, memopv2i64,
+                            i128mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
+defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
+                              memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
+                              VEX_4V;
+
+// Intrinsic forms
+defm VPSUBSB  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBSW  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPADDSB  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDSW  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULHW  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_PMADD, 1, 0>, VEX_4V;
+defm VPAVGB   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPAVGW   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMINUB  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMINSW  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMAXUB  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMAXSW  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPSADBW  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+defm VPADDBY  : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64,
+                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDWY  : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64,
+                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDDY  : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64,
+                             i256mem, SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDQY  : PDI_binop_rm<0xD4, "vpaddq", add, v4i64, VR256, memopv4i64,
+                             i256mem, SSE_INTALUQ_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64,
+                             i256mem, SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPSUBBY  : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64,
+                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBWY  : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64,
+                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBDY  : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64,
+                             i256mem, SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBQY  : PDI_binop_rm<0xFB, "vpsubq", sub, v4i64, VR256, memopv4i64,
+                             i256mem, SSE_INTALUQ_ITINS_P, 0, 0>, VEX_4V;
+defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
+                               VR256, memopv4i64, i256mem,
+                               SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
 
 // Intrinsic forms
-defm VPSUBSB  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
-                                 VEX_4V;
-defm VPSUBSW  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
-                                 VEX_4V;
-defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
-                                 VEX_4V;
-defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
-                                 VEX_4V;
-defm VPADDSB  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
-                                 VEX_4V;
-defm VPADDSW  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
-                                 VEX_4V;
-defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
-                                 VEX_4V;
-defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
-                                 VEX_4V;
-defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
-                                 VEX_4V;
-defm VPMULHW  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
-                                 VEX_4V;
-defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
-                                 VEX_4V;
-defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
-                                 VEX_4V;
-defm VPAVGB   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
-                                 VEX_4V;
-defm VPAVGW   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
-                                 VEX_4V;
-defm VPMINUB  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
-                                 VEX_4V;
-defm VPMINSW  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
-                                 VEX_4V;
-defm VPMAXUB  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
-                                 VEX_4V;
-defm VPMAXSW  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
-                                 VEX_4V;
-defm VPSADBW  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
-                                 VEX_4V;
+defm VPSUBSBY  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBSWY  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPADDSBY  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDSWY  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPMULHWY  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
+defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_PMADD, 1, 0>, VEX_4V;
+defm VPAVGBY   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPAVGWY   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMINUBY  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMINSWY  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMAXUBY  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPMAXSWY  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+defm VPSADBWY  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw,
+                                  VR256, memopv4i64, i256mem,
+                                  SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
-defm PADDB  : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
-defm PADDW  : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
-defm PADDD  : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
-defm PADDQ  : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
-defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
-defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
-defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
-defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
-defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+defm PADDB  : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64,
+                           i128mem, SSE_INTALU_ITINS_P, 1>;
+defm PADDW  : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64,
+                           i128mem, SSE_INTALU_ITINS_P, 1>;
+defm PADDD  : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64,
+                           i128mem, SSE_INTALU_ITINS_P, 1>;
+defm PADDQ  : PDI_binop_rm<0xD4, "paddq", add, v2i64, VR128, memopv2i64,
+                           i128mem, SSE_INTALUQ_ITINS_P, 1>;
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64,
+                           i128mem, SSE_INTMUL_ITINS_P, 1>;
+defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64,
+                          i128mem, SSE_INTALU_ITINS_P>;
+defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64,
+                          i128mem, SSE_INTALU_ITINS_P>;
+defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64,
+                          i128mem, SSE_INTALU_ITINS_P>;
+defm PSUBQ : PDI_binop_rm<0xFB, "psubq", sub, v2i64, VR128, memopv2i64,
+                          i128mem, SSE_INTALUQ_ITINS_P>;
+defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
+                             memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
 
 // Intrinsic forms
-defm PSUBSB  : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
-defm PSUBSW  : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
-defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
-defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
-defm PADDSB  : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
-defm PADDSW  : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
-defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
-defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
-defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
-defm PMULHW  : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
-defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
-defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
-defm PAVGB   : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
-defm PAVGW   : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
-defm PMINUB  : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
-defm PMINSW  : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
-defm PMAXUB  : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
-defm PMAXSW  : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
-defm PSADBW  : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+defm PSUBSB  : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P>;
+defm PSUBSW  : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P>;
+defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P>;
+defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P>;
+defm PADDSB  : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
+defm PADDSW  : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
+defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTMUL_ITINS_P, 1>;
+defm PMULHW  : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTMUL_ITINS_P, 1>;
+defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
+                                VR128, memopv2i64, i128mem,
+                                SSE_PMADD, 1>;
+defm PAVGB   : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
+defm PAVGW   : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
+defm PMINUB  : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
+defm PMINSW  : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
+defm PMAXUB  : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
+defm PMAXSW  : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
+defm PSADBW  : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1>;
 
 } // Constraints = "$src1 = $dst"
 
@@ -3520,145 +3909,176 @@ defm PSADBW  : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
 //===---------------------------------------------------------------------===//
 
 let Predicates = [HasAVX] in {
-defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
-                                int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
-                                VEX_4V;
-defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
-                                int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
-                                VEX_4V;
-defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
-                                int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
-                                VEX_4V;
-
-defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
-                                int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
-                                VEX_4V;
-defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
-                                int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
-                                VEX_4V;
-defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
-                                int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
-                                VEX_4V;
-
-defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
-                                int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
-                                VEX_4V;
-defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
-                                int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
-                                VEX_4V;
-
-defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
-defm VPOR  : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
-defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
+defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+                            VR128, v8i16, v8i16, bc_v8i16,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
+                            VR128, v4i32, v4i32, bc_v4i32,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
+                            VR128, v2i64, v2i64, bc_v2i64,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+                            VR128, v8i16, v8i16, bc_v8i16,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
+                            VR128, v4i32, v4i32, bc_v4i32,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
+                            VR128, v2i64, v2i64, bc_v2i64,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+                            VR128, v8i16, v8i16, bc_v8i16,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
+                            VR128, v4i32, v4i32, bc_v4i32,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 
 let ExeDomain = SSEPackedInt in {
-  let neverHasSideEffects = 1 in {
-    // 128-bit logical shifts.
-    def VPSLLDQri : PDIi8<0x73, MRM7r,
-                      (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                      "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                      VEX_4V;
-    def VPSRLDQri : PDIi8<0x73, MRM3r,
-                      (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                      "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                      VEX_4V;
-    // PSRADQri doesn't exist in SSE[1-3].
-  }
-  def VPANDNrr : PDI<0xDF, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+  // 128-bit logical shifts.
+  def VPSLLDQri : PDIi8<0x73, MRM7r,
+                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst,
+                      (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>,
+                    VEX_4V;
+  def VPSRLDQri : PDIi8<0x73, MRM3r,
+                    (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set VR128:$dst,
-                          (v2i64 (X86andnp VR128:$src1, VR128:$src2)))]>,VEX_4V;
+                      (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>,
+                    VEX_4V;
+  // PSRADQri doesn't exist in SSE[1-3].
+}
+} // Predicates = [HasAVX]
+
+let Predicates = [HasAVX2] in {
+defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+                             VR256, v16i16, v8i16, bc_v8i16,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
+                             VR256, v8i32, v4i32, bc_v4i32,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
+                             VR256, v4i64, v2i64, bc_v2i64,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+                             VR256, v16i16, v8i16, bc_v8i16,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
+                             VR256, v8i32, v4i32, bc_v4i32,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
+                             VR256, v4i64, v2i64, bc_v2i64,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+
+defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+                             VR256, v16i16, v8i16, bc_v8i16,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
+                             VR256, v8i32, v4i32, bc_v4i32,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 
-  def VPANDNrm : PDI<0xDF, MRMSrcMem,
-                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst, (X86andnp VR128:$src1,
-                                            (memopv2i64 addr:$src2)))]>, VEX_4V;
-}
-}
+let ExeDomain = SSEPackedInt in {
+  // 256-bit logical shifts.
+  def VPSLLDQYri : PDIi8<0x73, MRM7r,
+                    (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
+                    "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR256:$dst,
+                      (int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2))]>,
+                    VEX_4V;
+  def VPSRLDQYri : PDIi8<0x73, MRM3r,
+                    (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2),
+                    "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR256:$dst,
+                      (int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2))]>,
+                    VEX_4V;
+  // PSRADQYri doesn't exist in SSE[1-3].
+}
+} // Predicates = [HasAVX2]
 
 let Constraints = "$src1 = $dst" in {
-defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
-                               int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
-defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
-                               int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
-defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
-                               int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
-
-defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
-                               int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
-defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
-                               int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
-defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
-                               int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
-
-defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
-                               int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
-defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
-                               int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
-
-defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
-defm POR  : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
-defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
+                           VR128, v8i16, v8i16, bc_v8i16,
+                           SSE_INTSHIFT_ITINS_P>;
+defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
+                           VR128, v4i32, v4i32, bc_v4i32,
+                           SSE_INTSHIFT_ITINS_P>;
+defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
+                           VR128, v2i64, v2i64, bc_v2i64,
+                           SSE_INTSHIFT_ITINS_P>;
+
+defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
+                           VR128, v8i16, v8i16, bc_v8i16,
+                           SSE_INTSHIFT_ITINS_P>;
+defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
+                           VR128, v4i32, v4i32, bc_v4i32,
+                           SSE_INTSHIFT_ITINS_P>;
+defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
+                           VR128, v2i64, v2i64, bc_v2i64,
+                           SSE_INTSHIFT_ITINS_P>;
+
+defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
+                           VR128, v8i16, v8i16, bc_v8i16,
+                           SSE_INTSHIFT_ITINS_P>;
+defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
+                           VR128, v4i32, v4i32, bc_v4i32,
+                           SSE_INTSHIFT_ITINS_P>;
 
 let ExeDomain = SSEPackedInt in {
-  let neverHasSideEffects = 1 in {
-    // 128-bit logical shifts.
-    def PSLLDQri : PDIi8<0x73, MRM7r,
-                         (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                         "pslldq\t{$src2, $dst|$dst, $src2}", []>;
-    def PSRLDQri : PDIi8<0x73, MRM3r,
-                         (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
-                         "psrldq\t{$src2, $dst|$dst, $src2}", []>;
-    // PSRADQri doesn't exist in SSE[1-3].
-  }
-  def PANDNrr : PDI<0xDF, MRMSrcReg,
-                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                    "pandn\t{$src2, $dst|$dst, $src2}", []>;
-
-  def PANDNrm : PDI<0xDF, MRMSrcMem,
-                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                    "pandn\t{$src2, $dst|$dst, $src2}", []>;
+  // 128-bit logical shifts.
+  def PSLLDQri : PDIi8<0x73, MRM7r,
+                       (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                       "pslldq\t{$src2, $dst|$dst, $src2}",
+                       [(set VR128:$dst,
+                         (int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2))]>;
+  def PSRLDQri : PDIi8<0x73, MRM3r,
+                       (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                       "psrldq\t{$src2, $dst|$dst, $src2}",
+                       [(set VR128:$dst,
+                         (int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2))]>;
+  // PSRADQri doesn't exist in SSE[1-3].
 }
 } // Constraints = "$src1 = $dst"
 
 let Predicates = [HasAVX] in {
   def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
-            (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+            (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
   def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
-            (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
-  def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
-            (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
-  def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
-            (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+            (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
   def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
-            (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+            (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
 
   // Shift up / down and insert zero's.
-  def : Pat<(v2i64 (X86vshl  VR128:$src, (i8 imm:$amt))),
-            (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
-  def : Pat<(v2i64 (X86vshr  VR128:$src, (i8 imm:$amt))),
-            (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+  def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
+            (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+  def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
+            (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+}
+
+let Predicates = [HasAVX2] in {
+  def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2),
+            (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
+  def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2),
+            (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2))>;
 }
 
 let Predicates = [HasSSE2] in {
   def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
-            (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+            (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
   def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
-            (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
-  def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
-            (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
-  def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
-            (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
+            (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
   def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
-            (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+            (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>;
 
   // Shift up / down and insert zero's.
-  def : Pat<(v2i64 (X86vshl  VR128:$src, (i8 imm:$amt))),
-            (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
-  def : Pat<(v2i64 (X86vshr  VR128:$src, (i8 imm:$amt))),
-            (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+  def : Pat<(v2i64 (X86vshldq VR128:$src, (i8 imm:$amt))),
+            (PSLLDQri VR128:$src, (BYTE_imm imm:$amt))>;
+  def : Pat<(v2i64 (X86vshrdq VR128:$src, (i8 imm:$amt))),
+            (PSRLDQri VR128:$src, (BYTE_imm imm:$amt))>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -3666,100 +4086,106 @@ let Predicates = [HasSSE2] in {
 //===---------------------------------------------------------------------===//
 
 let Predicates = [HasAVX] in {
-  defm VPCMPEQB  : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
-                                    0>, VEX_4V;
-  defm VPCMPEQW  : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
-                                    0>, VEX_4V;
-  defm VPCMPEQD  : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
-                                    0>, VEX_4V;
-  defm VPCMPGTB  : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
-                                    0>, VEX_4V;
-  defm VPCMPGTW  : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
-                                    0>, VEX_4V;
-  defm VPCMPGTD  : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
-                                    0>, VEX_4V;
-
-  def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
-            (VPCMPEQBrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
-            (VPCMPEQBrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
-            (VPCMPEQWrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
-            (VPCMPEQWrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
-            (VPCMPEQDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
-            (VPCMPEQDrm VR128:$src1, addr:$src2)>;
-
-  def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
-            (VPCMPGTBrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
-            (VPCMPGTBrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
-            (VPCMPGTWrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
-            (VPCMPGTWrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
-            (VPCMPGTDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
-            (VPCMPGTDrm VR128:$src1, addr:$src2)>;
+  defm VPCMPEQB  : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v16i8,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+  defm VPCMPEQW  : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v8i16,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+  defm VPCMPEQD  : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v4i32,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+  defm VPCMPGTB  : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v16i8,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+  defm VPCMPGTW  : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v8i16,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+  defm VPCMPGTD  : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v4i32,
+                                VR128, memopv2i64, i128mem,
+                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+  defm VPCMPEQBY : PDI_binop_rm<0x74, "vpcmpeqb", X86pcmpeq, v32i8,
+                                VR256, memopv4i64, i256mem,
+                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+  defm VPCMPEQWY : PDI_binop_rm<0x75, "vpcmpeqw", X86pcmpeq, v16i16,
+                                VR256, memopv4i64, i256mem,
+                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+  defm VPCMPEQDY : PDI_binop_rm<0x76, "vpcmpeqd", X86pcmpeq, v8i32,
+                                VR256, memopv4i64, i256mem,
+                                SSE_INTALU_ITINS_P, 1, 0>, VEX_4V;
+  defm VPCMPGTBY : PDI_binop_rm<0x64, "vpcmpgtb", X86pcmpgt, v32i8,
+                                VR256, memopv4i64, i256mem,
+                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+  defm VPCMPGTWY : PDI_binop_rm<0x65, "vpcmpgtw", X86pcmpgt, v16i16,
+                                VR256, memopv4i64, i256mem,
+                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+  defm VPCMPGTDY : PDI_binop_rm<0x66, "vpcmpgtd", X86pcmpgt, v8i32,
+                                VR256, memopv4i64, i256mem,
+                                SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
-  defm PCMPEQB  : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
-  defm PCMPEQW  : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
-  defm PCMPEQD  : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
-  defm PCMPGTB  : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
-  defm PCMPGTW  : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
-  defm PCMPGTD  : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+  defm PCMPEQB  : PDI_binop_rm<0x74, "pcmpeqb", X86pcmpeq, v16i8,
+                               VR128, memopv2i64, i128mem,
+                               SSE_INTALU_ITINS_P, 1>;
+  defm PCMPEQW  : PDI_binop_rm<0x75, "pcmpeqw", X86pcmpeq, v8i16,
+                               VR128, memopv2i64, i128mem,
+                               SSE_INTALU_ITINS_P, 1>;
+  defm PCMPEQD  : PDI_binop_rm<0x76, "pcmpeqd", X86pcmpeq, v4i32,
+                               VR128, memopv2i64, i128mem,
+                               SSE_INTALU_ITINS_P, 1>;
+  defm PCMPGTB  : PDI_binop_rm<0x64, "pcmpgtb", X86pcmpgt, v16i8,
+                               VR128, memopv2i64, i128mem,
+                               SSE_INTALU_ITINS_P>;
+  defm PCMPGTW  : PDI_binop_rm<0x65, "pcmpgtw", X86pcmpgt, v8i16,
+                               VR128, memopv2i64, i128mem,
+                               SSE_INTALU_ITINS_P>;
+  defm PCMPGTD  : PDI_binop_rm<0x66, "pcmpgtd", X86pcmpgt, v4i32,
+                               VR128, memopv2i64, i128mem,
+                               SSE_INTALU_ITINS_P>;
 } // Constraints = "$src1 = $dst"
 
-let Predicates = [HasSSE2] in {
-  def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
-            (PCMPEQBrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
-            (PCMPEQBrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
-            (PCMPEQWrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
-            (PCMPEQWrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
-            (PCMPEQDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
-            (PCMPEQDrm VR128:$src1, addr:$src2)>;
-
-  def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
-            (PCMPGTBrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
-            (PCMPGTBrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
-            (PCMPGTWrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
-            (PCMPGTWrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
-            (PCMPGTDrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
-            (PCMPGTDrm VR128:$src1, addr:$src2)>;
-}
-
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Pack Instructions
 //===---------------------------------------------------------------------===//
 
 let Predicates = [HasAVX] in {
 defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
-                                  0, 0>, VEX_4V;
+                                  VR128, memopv2i64, i128mem,
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
 defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
-                                  0, 0>, VEX_4V;
+                                  VR128, memopv2i64, i128mem,
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
 defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
-                                  0, 0>, VEX_4V;
+                                  VR128, memopv2i64, i128mem,
+                                  SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb,
+                                   VR256, memopv4i64, i256mem,
+                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw,
+                                   VR256, memopv4i64, i256mem,
+                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
+defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb,
+                                   VR256, memopv4i64, i256mem,
+                                   SSE_INTALU_ITINS_P, 0, 0>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
-defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
-defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
-defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P>;
+defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P>;
+defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128,
+                                 VR128, memopv2i64, i128mem,
+                                 SSE_INTALU_ITINS_P>;
 } // Constraints = "$src1 = $dst"
 
 //===---------------------------------------------------------------------===//
@@ -3767,103 +4193,75 @@ defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
 //===---------------------------------------------------------------------===//
 
 let ExeDomain = SSEPackedInt in {
-multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
-                         PatFrag bc_frag> {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, SDNode OpNode> {
 def ri : Ii8<0x70, MRMSrcReg,
-              (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+             (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+             !strconcat(OpcodeStr,
+                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+              [(set VR128:$dst, (vt (OpNode VR128:$src1, (i8 imm:$src2))))],
+              IIC_SSE_PSHUF>;
+def mi : Ii8<0x70, MRMSrcMem,
+             (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+             !strconcat(OpcodeStr,
+                        "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+              [(set VR128:$dst,
+                (vt (OpNode (bitconvert (memopv2i64 addr:$src1)),
+                             (i8 imm:$src2))))],
+                             IIC_SSE_PSHUF>;
+}
+
+multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, SDNode OpNode> {
+def Yri : Ii8<0x70, MRMSrcReg,
+              (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
               !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1,
-                                                      (undef))))]>;
-def mi : Ii8<0x70, MRMSrcMem,
-              (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+              [(set VR256:$dst, (vt (OpNode VR256:$src1, (i8 imm:$src2))))]>;
+def Ymi : Ii8<0x70, MRMSrcMem,
+              (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
               !strconcat(OpcodeStr,
                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              [(set VR128:$dst, (vt (pshuf_frag:$src2
-                                      (bc_frag (memopv2i64 addr:$src1)),
-                                      (undef))))]>;
+              [(set VR256:$dst,
+                (vt (OpNode (bitconvert (memopv4i64 addr:$src1)),
+                             (i8 imm:$src2))))]>;
 }
 } // ExeDomain = SSEPackedInt
 
 let Predicates = [HasAVX] in {
-  let AddedComplexity = 5 in
-  defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize,
-                               VEX;
-
-  // SSE2 with ImmT == Imm8 and XS prefix.
-  defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS,
-                               VEX;
-
-  // SSE2 with ImmT == Imm8 and XD prefix.
-  defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
-                               VEX;
-
-  let AddedComplexity = 5 in
-  def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
-            (VPSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
-  // Unary v4f32 shuffle with VPSHUF* in order to fold a load.
-  def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
-            (VPSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
-
-  def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
-                                   (i8 imm:$imm))),
-            (VPSHUFDmi addr:$src1, imm:$imm)>;
-  def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
-                                   (i8 imm:$imm))),
-            (VPSHUFDmi addr:$src1, imm:$imm)>;
-  def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
-            (VPSHUFDri VR128:$src1, imm:$imm)>;
-  def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
-            (VPSHUFDri VR128:$src1, imm:$imm)>;
-  def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
-            (VPSHUFHWri VR128:$src, imm:$imm)>;
-  def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)),
-                               (i8 imm:$imm))),
-            (VPSHUFHWmi addr:$src, imm:$imm)>;
-  def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
-            (VPSHUFLWri VR128:$src, imm:$imm)>;
-  def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)),
-                               (i8 imm:$imm))),
-            (VPSHUFLWmi addr:$src, imm:$imm)>;
+ let AddedComplexity = 5 in
+  defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, X86PShufd>, TB, OpSize, VEX;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+  defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, X86PShufhw>, XS, VEX;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+  defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, X86PShuflw>, XD, VEX;
+
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+           (VPSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+           (VPSHUFDri VR128:$src1, imm:$imm)>;
+}
+
+let Predicates = [HasAVX2] in {
+  defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, X86PShufd>, TB, OpSize, VEX;
+  defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, X86PShufhw>, XS, VEX;
+  defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, X86PShuflw>, XD, VEX;
 }
 
 let Predicates = [HasSSE2] in {
-  let AddedComplexity = 5 in
-  defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
-
-  // SSE2 with ImmT == Imm8 and XS prefix.
-  defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS;
-
-  // SSE2 with ImmT == Imm8 and XD prefix.
-  defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
-
-  let AddedComplexity = 5 in
-  def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
-            (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
-  // Unary v4f32 shuffle with PSHUF* in order to fold a load.
-  def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
-            (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>;
-
-  def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
-                                   (i8 imm:$imm))),
-            (PSHUFDmi addr:$src1, imm:$imm)>;
-  def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
-                                   (i8 imm:$imm))),
-            (PSHUFDmi addr:$src1, imm:$imm)>;
-  def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
-            (PSHUFDri VR128:$src1, imm:$imm)>;
-  def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
-            (PSHUFDri VR128:$src1, imm:$imm)>;
-  def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
-            (PSHUFHWri VR128:$src, imm:$imm)>;
-  def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)),
-                               (i8 imm:$imm))),
-            (PSHUFHWmi addr:$src, imm:$imm)>;
-  def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
-            (PSHUFLWri VR128:$src, imm:$imm)>;
-  def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)),
-                               (i8 imm:$imm))),
-            (PSHUFLWmi addr:$src, imm:$imm)>;
+ let AddedComplexity = 5 in
+  defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, X86PShufd>, TB, OpSize;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+  defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, X86PShufhw>, XS;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+  defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, X86PShuflw>, XD;
+
+ def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))),
+           (PSHUFDmi addr:$src1, imm:$imm)>;
+ def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+           (PSHUFDri VR128:$src1, imm:$imm)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -3878,7 +4276,8 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
       !if(Is2Addr,
           !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
           !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))]>;
+      [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))],
+      IIC_SSE_UNPCK>;
   def rm : PDI<opc, MRMSrcMem,
       (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
       !if(Is2Addr,
@@ -3886,96 +4285,104 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
           !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
       [(set VR128:$dst, (OpNode VR128:$src1,
                                   (bc_frag (memopv2i64
-                                               addr:$src2))))]>;
+                                               addr:$src2))))],
+                                               IIC_SSE_UNPCK>;
+}
+
+multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
+                         SDNode OpNode, PatFrag bc_frag> {
+  def Yrr : PDI<opc, MRMSrcReg,
+      (outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
+      !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+      [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
+  def Yrm : PDI<opc, MRMSrcMem,
+      (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
+      !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+      [(set VR256:$dst, (OpNode VR256:$src1,
+                                  (bc_frag (memopv4i64 addr:$src2))))]>;
 }
 
 let Predicates = [HasAVX] in {
-  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
+  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
                                  bc_v16i8, 0>, VEX_4V;
-  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Punpcklwd,
+  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
                                  bc_v8i16, 0>, VEX_4V;
-  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Punpckldq,
+  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
                                  bc_v4i32, 0>, VEX_4V;
+  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
+                                 bc_v2i64, 0>, VEX_4V;
 
-  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
-  /// knew to collapse (bitconvert VT to VT) into its operand.
-  def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
-            (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-            "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-            [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
-                                                    VR128:$src2)))]>, VEX_4V;
-  def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
-            (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-            "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-            [(set VR128:$dst, (v2i64 (X86Punpcklqdq VR128:$src1,
-                                        (memopv2i64 addr:$src2))))]>, VEX_4V;
-
-  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Punpckhbw,
+  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
                                  bc_v16i8, 0>, VEX_4V;
-  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Punpckhwd,
+  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
                                  bc_v8i16, 0>, VEX_4V;
-  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Punpckhdq,
+  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
                                  bc_v4i32, 0>, VEX_4V;
-
-  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
-  /// knew to collapse (bitconvert VT to VT) into its operand.
-  def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
-             (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-             "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-             [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
-                                                     VR128:$src2)))]>, VEX_4V;
-  def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
-             (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-             "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-             [(set VR128:$dst, (v2i64 (X86Punpckhqdq VR128:$src1,
-                                        (memopv2i64 addr:$src2))))]>, VEX_4V;
+  defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
+                                 bc_v2i64, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+  defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
+                                   bc_v32i8>, VEX_4V;
+  defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
+                                   bc_v16i16>, VEX_4V;
+  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
+                                   bc_v8i32>, VEX_4V;
+  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
+                                   bc_v4i64>, VEX_4V;
+
+  defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
+                                   bc_v32i8>, VEX_4V;
+  defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
+                                   bc_v16i16>, VEX_4V;
+  defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
+                                   bc_v8i32>, VEX_4V;
+  defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
+                                   bc_v4i64>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
-  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>;
-  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>;
-  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Punpckldq, bc_v4i32>;
-
-  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
-  /// knew to collapse (bitconvert VT to VT) into its operand.
-  def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
-                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "punpcklqdq\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst,
-                          (v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
-                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                         "punpcklqdq\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst,
-                          (v2i64 (X86Punpcklqdq VR128:$src1,
-                                         (memopv2i64 addr:$src2))))]>;
-
-  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Punpckhbw, bc_v16i8>;
-  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Punpckhwd, bc_v8i16>;
-  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Punpckhdq, bc_v4i32>;
-
-  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
-  /// knew to collapse (bitconvert VT to VT) into its operand.
-  def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
-                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-                         "punpckhqdq\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst,
-                          (v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)))]>;
-  def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
-                        (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-                        "punpckhqdq\t{$src2, $dst|$dst, $src2}",
-                        [(set VR128:$dst,
-                          (v2i64 (X86Punpckhqdq VR128:$src1,
-                                         (memopv2i64 addr:$src2))))]>;
+  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl,
+                                bc_v16i8>;
+  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl,
+                                bc_v8i16>;
+  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl,
+                                bc_v4i32>;
+  defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl,
+                                bc_v2i64>;
+
+  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh,
+                                bc_v16i8>;
+  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh,
+                                bc_v8i16>;
+  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh,
+                                bc_v4i32>;
+  defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh,
+                                bc_v2i64>;
 }
 } // ExeDomain = SSEPackedInt
 
-// Splat v2f64 / v2i64
-let AddedComplexity = 10 in {
-  def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
-            (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
-  def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
-            (VPUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasAVX]>;
+// Patterns for using AVX1 instructions with integer vectors
+// Here to give AVX2 priority
+let Predicates = [HasAVX] in {
+  def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+            (VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
+            (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
+            (VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
+            (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
+            (VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
+            (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
+            (VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
+  def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
+            (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -3991,7 +4398,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
            "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
            "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
        [(set VR128:$dst,
-         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))], IIC_SSE_PINSRW>;
   def rmi : Ii8<0xC4, MRMSrcMem,
                        (outs VR128:$dst), (ins VR128:$src1,
                         i16mem:$src2, i32i8imm:$src3),
@@ -4000,7 +4407,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
            "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
        [(set VR128:$dst,
          (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
-                    imm:$src3))]>;
+                    imm:$src3))], IIC_SSE_PINSRW>;
 }
 
 // Extract
@@ -4014,7 +4421,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
                     (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
                     "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                     [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
-                                                imm:$src2))]>;
+                                                imm:$src2))], IIC_SSE_PEXTRW>;
 
 // Insert
 let Predicates = [HasAVX] in {
@@ -4038,12 +4445,23 @@ let ExeDomain = SSEPackedInt in {
 
 def VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
            "pmovmskb\t{$src, $dst|$dst, $src}",
-           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+           IIC_SSE_MOVMSK>, VEX;
 def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+           "pmovmskb\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVMSK>, VEX;
+
+let Predicates = [HasAVX2] in {
+def VPMOVMSKBYrr  : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+           "pmovmskb\t{$src, $dst|$dst, $src}",
+           [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX;
+def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
            "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
 def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
            "pmovmskb\t{$src, $dst|$dst, $src}",
-           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))],
+           IIC_SSE_MOVMSK>;
 
 } // ExeDomain = SSEPackedInt
 
@@ -4057,21 +4475,25 @@ let Uses = [EDI] in
 def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
            (ins VR128:$src, VR128:$mask),
            "maskmovdqu\t{$mask, $src|$src, $mask}",
-           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
+           IIC_SSE_MASKMOV>, VEX;
 let Uses = [RDI] in
 def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
            (ins VR128:$src, VR128:$mask),
            "maskmovdqu\t{$mask, $src|$src, $mask}",
-           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
+           IIC_SSE_MASKMOV>, VEX;
 
 let Uses = [EDI] in
 def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
            "maskmovdqu\t{$mask, $src|$src, $mask}",
-           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)],
+           IIC_SSE_MASKMOV>;
 let Uses = [RDI] in
 def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
            "maskmovdqu\t{$mask, $src|$src, $mask}",
-           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)],
+           IIC_SSE_MASKMOV>;
 
 } // ExeDomain = SSEPackedInt
 
@@ -4085,54 +4507,65 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
 def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
-                        (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
+                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
+                        VEX;
 def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
-                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
+                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
+                        IIC_SSE_MOVDQ>,
                       VEX;
 def VMOV64toPQIrr : VRPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
-                          (v2i64 (scalar_to_vector GR64:$src)))]>, VEX;
+                          (v2i64 (scalar_to_vector GR64:$src)))],
+                          IIC_SSE_MOVDQ>, VEX;
 def VMOV64toSDrr : VRPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
-                       [(set FR64:$dst, (bitconvert GR64:$src))]>, VEX;
+                       [(set FR64:$dst, (bitconvert GR64:$src))],
+                       IIC_SSE_MOVDQ>, VEX;
 
 def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
-                        (v4i32 (scalar_to_vector GR32:$src)))]>;
+                        (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>;
 def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
-                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
+                        IIC_SSE_MOVDQ>;
 def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
-                          (v2i64 (scalar_to_vector GR64:$src)))]>;
+                          (v2i64 (scalar_to_vector GR64:$src)))],
+                          IIC_SSE_MOVDQ>;
 def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
-                       [(set FR64:$dst, (bitconvert GR64:$src))]>;
+                       [(set FR64:$dst, (bitconvert GR64:$src))],
+                       IIC_SSE_MOVDQ>;
 
 //===---------------------------------------------------------------------===//
 // Move Int Doubleword to Single Scalar
 //
 def VMOVDI2SSrr  : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
-                      [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
+                      [(set FR32:$dst, (bitconvert GR32:$src))],
+                      IIC_SSE_MOVDQ>, VEX;
 
 def VMOVDI2SSrm  : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
-                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
+                      IIC_SSE_MOVDQ>,
                       VEX;
 def MOVDI2SSrr  : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
-                      [(set FR32:$dst, (bitconvert GR32:$src))]>;
+                      [(set FR32:$dst, (bitconvert GR32:$src))],
+                      IIC_SSE_MOVDQ>;
 
 def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
                       "movd\t{$src, $dst|$dst, $src}",
-                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
+                      IIC_SSE_MOVDQ>;
 
 //===---------------------------------------------------------------------===//
 // Move Packed Doubleword Int to Packed Double Int
@@ -4140,20 +4573,22 @@ def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
 def VMOVPDI2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
-                                        (iPTR 0)))]>, VEX;
+                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX;
 def VMOVPDI2DImr  : VPDI<0x7E, MRMDestMem, (outs),
                        (ins i32mem:$dst, VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
-                                     (iPTR 0))), addr:$dst)]>, VEX;
+                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
+                                     VEX;
 def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
-                                        (iPTR 0)))]>;
+                                        (iPTR 0)))], IIC_SSE_MOVD_ToGP>;
 def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(store (i32 (vector_extract (v4i32 VR128:$src),
-                                     (iPTR 0))), addr:$dst)]>;
+                                     (iPTR 0))), addr:$dst)],
+                                     IIC_SSE_MOVDQ>;
 
 //===---------------------------------------------------------------------===//
 // Move Packed Doubleword Int first element to Doubleword Int
@@ -4161,13 +4596,15 @@ def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
 def VMOVPQIto64rr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                           "mov{d|q}\t{$src, $dst|$dst, $src}",
                           [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
-                                                           (iPTR 0)))]>,
+                                                           (iPTR 0)))],
+                                                           IIC_SSE_MOVD_ToGP>,
                       TB, OpSize, VEX, VEX_W, Requires<[HasAVX, In64BitMode]>;
 
 def MOVPQIto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
-                                                         (iPTR 0)))]>;
+                                                         (iPTR 0)))],
+                                                         IIC_SSE_MOVD_ToGP>;
 
 //===---------------------------------------------------------------------===//
 // Bitcast FR64 <-> GR64
@@ -4179,36 +4616,45 @@ def VMOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                         VEX;
 def VMOVSDto64rr : VRPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
                          "mov{d|q}\t{$src, $dst|$dst, $src}",
-                         [(set GR64:$dst, (bitconvert FR64:$src))]>;
+                         [(set GR64:$dst, (bitconvert FR64:$src))],
+                         IIC_SSE_MOVDQ>, VEX;
 def VMOVSDto64mr : VRPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                          "movq\t{$src, $dst|$dst, $src}",
-                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+                         IIC_SSE_MOVDQ>, VEX;
 
 def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                        "movq\t{$src, $dst|$dst, $src}",
-                       [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+                       [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))],
+                       IIC_SSE_MOVDQ>;
 def MOVSDto64rr : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
-                       [(set GR64:$dst, (bitconvert FR64:$src))]>;
+                       [(set GR64:$dst, (bitconvert FR64:$src))],
+                       IIC_SSE_MOVD_ToGP>;
 def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
                        "movq\t{$src, $dst|$dst, $src}",
-                       [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+                       [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+                       IIC_SSE_MOVDQ>;
 
 //===---------------------------------------------------------------------===//
 // Move Scalar Single to Double Int
 //
 def VMOVSS2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
-                      [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
+                      [(set GR32:$dst, (bitconvert FR32:$src))],
+                      IIC_SSE_MOVD_ToGP>, VEX;
 def VMOVSS2DImr  : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
-                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
+                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
+                      IIC_SSE_MOVDQ>, VEX;
 def MOVSS2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
-                      [(set GR32:$dst, (bitconvert FR32:$src))]>;
+                      [(set GR32:$dst, (bitconvert FR32:$src))],
+                      IIC_SSE_MOVD_ToGP>;
 def MOVSS2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
-                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
+                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
+                      IIC_SSE_MOVDQ>;
 
 //===---------------------------------------------------------------------===//
 // Patterns and instructions to describe movd/movq to XMM register zero-extends
@@ -4217,23 +4663,26 @@ let AddedComplexity = 15 in {
 def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (v4i32 (X86vzmovl
-                                      (v4i32 (scalar_to_vector GR32:$src)))))]>,
-                                      VEX;
+                                      (v4i32 (scalar_to_vector GR32:$src)))))],
+                                      IIC_SSE_MOVDQ>, VEX;
 def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
                        [(set VR128:$dst, (v2i64 (X86vzmovl
-                                      (v2i64 (scalar_to_vector GR64:$src)))))]>,
+                                      (v2i64 (scalar_to_vector GR64:$src)))))],
+                                      IIC_SSE_MOVDQ>,
                                       VEX, VEX_W;
 }
 let AddedComplexity = 15 in {
 def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (v4i32 (X86vzmovl
-                                      (v4i32 (scalar_to_vector GR32:$src)))))]>;
+                                      (v4i32 (scalar_to_vector GR32:$src)))))],
+                                      IIC_SSE_MOVDQ>;
 def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
                        [(set VR128:$dst, (v2i64 (X86vzmovl
-                                      (v2i64 (scalar_to_vector GR64:$src)))))]>;
+                                      (v2i64 (scalar_to_vector GR64:$src)))))],
+                                      IIC_SSE_MOVDQ>;
 }
 
 let AddedComplexity = 20 in {
@@ -4241,29 +4690,19 @@ def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
-                                                   (loadi32 addr:$src))))))]>,
-                                                   VEX;
+                                                   (loadi32 addr:$src))))))],
+                                                   IIC_SSE_MOVDQ>, VEX;
 def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
                          (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
-                                                   (loadi32 addr:$src))))))]>;
-}
-
-let Predicates = [HasSSE2], AddedComplexity = 20 in {
-  def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
-            (MOVZDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
-            (MOVZDI2PDIrm addr:$src)>;
-  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
-            (MOVZDI2PDIrm addr:$src)>;
+                                                   (loadi32 addr:$src))))))],
+                                                   IIC_SSE_MOVDQ>;
 }
 
 let Predicates = [HasAVX] in {
   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
   let AddedComplexity = 20 in {
-    def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
-              (VMOVZDI2PDIrm addr:$src)>;
     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
               (VMOVZDI2PDIrm addr:$src)>;
     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
@@ -4278,6 +4717,13 @@ let Predicates = [HasAVX] in {
             (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
 }
 
+let Predicates = [HasSSE2], AddedComplexity = 20 in {
+  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+            (MOVZDI2PDIrm addr:$src)>;
+  def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+            (MOVZDI2PDIrm addr:$src)>;
+}
+
 // These are the correct encodings of the instructions so that we know how to
 // read correct assembly, even though we continue to emit the wrong ones for
 // compatibility with Darwin's buggy assembler.
@@ -4309,7 +4755,8 @@ def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
 def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                     "movq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst,
-                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))],
+                      IIC_SSE_MOVDQ>, XS,
                     Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
 
 //===---------------------------------------------------------------------===//
@@ -4318,11 +4765,13 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
 def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       "movq\t{$src, $dst|$dst, $src}",
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
-                                    (iPTR 0))), addr:$dst)]>, VEX;
+                                    (iPTR 0))), addr:$dst)],
+                                    IIC_SSE_MOVDQ>, VEX;
 def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       "movq\t{$src, $dst|$dst, $src}",
                       [(store (i64 (vector_extract (v2i64 VR128:$src),
-                                    (iPTR 0))), addr:$dst)]>;
+                                    (iPTR 0))), addr:$dst)],
+                                    IIC_SSE_MOVDQ>;
 
 //===---------------------------------------------------------------------===//
 // Store / copy lower 64-bits of a XMM register.
@@ -4332,14 +4781,16 @@ def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                      [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
 def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                      "movq\t{$src, $dst|$dst, $src}",
-                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)],
+                     IIC_SSE_MOVDQ>;
 
 let AddedComplexity = 20 in
 def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "vmovq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
-                                                 (loadi64 addr:$src))))))]>,
+                                                 (loadi64 addr:$src))))))],
+                                                 IIC_SSE_MOVDQ>,
                      XS, VEX, Requires<[HasAVX]>;
 
 let AddedComplexity = 20 in
@@ -4347,9 +4798,19 @@ def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                      "movq\t{$src, $dst|$dst, $src}",
                      [(set VR128:$dst,
                        (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
-                                                 (loadi64 addr:$src))))))]>,
+                                                 (loadi64 addr:$src))))))],
+                                                 IIC_SSE_MOVDQ>,
                      XS, Requires<[HasSSE2]>;
 
+let Predicates = [HasAVX], AddedComplexity = 20 in {
+  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+            (VMOVZQI2PQIrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+            (VMOVZQI2PQIrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzload addr:$src)),
+            (VMOVZQI2PQIrm addr:$src)>;
+}
+
 let Predicates = [HasSSE2], AddedComplexity = 20 in {
   def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
             (MOVZQI2PQIrm addr:$src)>;
@@ -4358,13 +4819,11 @@ let Predicates = [HasSSE2], AddedComplexity = 20 in {
   def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
 }
 
-let Predicates = [HasAVX], AddedComplexity = 20 in {
-  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
-            (VMOVZQI2PQIrm addr:$src)>;
-  def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
-            (VMOVZQI2PQIrm addr:$src)>;
-  def : Pat<(v2i64 (X86vzload addr:$src)),
-            (VMOVZQI2PQIrm addr:$src)>;
+let Predicates = [HasAVX] in {
+def : Pat<(v4i64 (alignedX86vzload addr:$src)),
+          (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>;
+def : Pat<(v4i64 (X86vzload addr:$src)),
+          (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -4374,51 +4833,58 @@ let Predicates = [HasAVX], AddedComplexity = 20 in {
 let AddedComplexity = 15 in
 def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
-                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
+                    IIC_SSE_MOVQ_RR>,
                       XS, VEX, Requires<[HasAVX]>;
 let AddedComplexity = 15 in
 def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "movq\t{$src, $dst|$dst, $src}",
-                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))],
+                    IIC_SSE_MOVQ_RR>,
                       XS, Requires<[HasSSE2]>;
 
 let AddedComplexity = 20 in
 def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                         "vmovq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (v2i64 (X86vzmovl
-                                             (loadv2i64 addr:$src))))]>,
+                                             (loadv2i64 addr:$src))))],
+                                             IIC_SSE_MOVDQ>,
                       XS, VEX, Requires<[HasAVX]>;
 let AddedComplexity = 20 in {
 def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                         "movq\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (v2i64 (X86vzmovl
-                                             (loadv2i64 addr:$src))))]>,
+                                             (loadv2i64 addr:$src))))],
+                                             IIC_SSE_MOVDQ>,
                       XS, Requires<[HasSSE2]>;
 }
 
 let AddedComplexity = 20 in {
-  let Predicates = [HasSSE2] in {
-    def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
-              (MOVZPQILo2PQIrm addr:$src)>;
-    def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
-              (MOVZPQILo2PQIrr VR128:$src)>;
-  }
   let Predicates = [HasAVX] in {
-    def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+    def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
               (VMOVZPQILo2PQIrm addr:$src)>;
     def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
               (VMOVZPQILo2PQIrr VR128:$src)>;
   }
+  let Predicates = [HasSSE2] in {
+    def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+              (MOVZPQILo2PQIrm addr:$src)>;
+    def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+              (MOVZPQILo2PQIrr VR128:$src)>;
+  }
 }
 
 // Instructions to match in the assembler
 def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
-                      "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+                      "movq\t{$src, $dst|$dst, $src}", [],
+                      IIC_SSE_MOVDQ>, VEX, VEX_W;
 def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
-                      "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+                      "movq\t{$src, $dst|$dst, $src}", [],
+                      IIC_SSE_MOVDQ>, VEX, VEX_W;
 // Recognize "movd" with GR64 destination, but encode as a "movq"
 def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
-                          "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+                          "movd\t{$src, $dst|$dst, $src}", [],
+                          IIC_SSE_MOVDQ>, VEX, VEX_W;
 
 // Instructions for the disassembler
 // xr = XMM register
@@ -4428,7 +4894,7 @@ let Predicates = [HasAVX] in
 def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                  "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
 def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                 "movq\t{$src, $dst|$dst, $src}", []>, XS;
+                 "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, XS;
 
 //===---------------------------------------------------------------------===//
 // SSE3 - Conversion Instructions
@@ -4458,14 +4924,16 @@ def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
 }
 
 def CVTPD2DQrm  : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_CVT_PD_RM>;
 def CVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_CVT_PD_RR>;
 
 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
-          (VCVTPD2DQYrr VR256:$src)>;
+          (VCVTTPD2DQYrr VR256:$src)>;
 def : Pat<(v4i32 (fp_to_sint (memopv4f64 addr:$src))),
-          (VCVTPD2DQYrm addr:$src)>;
+          (VCVTTPD2DQYrm addr:$src)>;
 
 // Convert Packed DW Integers to Packed Double FP
 let Predicates = [HasAVX] in {
@@ -4480,14 +4948,16 @@ def VCVTDQ2PDYrr  : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
 }
 
 def CVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_CVT_PD_RR>;
 def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+                       IIC_SSE_CVT_PD_RM>;
 
 // AVX 256-bit register conversion intrinsics
 def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
            (VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (bitconvert (memopv2i64 addr:$src))),
            (VCVTDQ2PDYrm addr:$src)>;
 
 def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
@@ -4497,7 +4967,7 @@ def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
 
 def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
           (VCVTDQ2PDYrr VR128:$src)>;
-def : Pat<(v4f64 (sint_to_fp (memopv4i32 addr:$src))),
+def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
           (VCVTDQ2PDYrm addr:$src)>;
 
 //===---------------------------------------------------------------------===//
@@ -4508,10 +4978,12 @@ multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
                               X86MemOperand x86memop> {
 def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                      [(set RC:$dst, (vt (OpNode RC:$src)))]>;
+                      [(set RC:$dst, (vt (OpNode RC:$src)))],
+                      IIC_SSE_MOV_LH>;
 def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                      [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>;
+                      [(set RC:$dst, (OpNode (mem_frag addr:$src)))],
+                      IIC_SSE_MOV_LH>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4529,17 +5001,6 @@ defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
 defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
                                    memopv4f32, f128mem>;
 
-let Predicates = [HasSSE3] in {
-  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
-            (MOVSHDUPrr VR128:$src)>;
-  def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
-            (MOVSHDUPrm addr:$src)>;
-  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
-            (MOVSLDUPrr VR128:$src)>;
-  def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
-            (MOVSLDUPrm addr:$src)>;
-}
-
 let Predicates = [HasAVX] in {
   def : Pat<(v4i32 (X86Movshdup VR128:$src)),
             (VMOVSHDUPrr VR128:$src)>;
@@ -4559,82 +5020,60 @@ let Predicates = [HasAVX] in {
             (VMOVSLDUPYrm addr:$src)>;
 }
 
+let Predicates = [HasSSE3] in {
+  def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+            (MOVSHDUPrr VR128:$src)>;
+  def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))),
+            (MOVSHDUPrm addr:$src)>;
+  def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+            (MOVSLDUPrr VR128:$src)>;
+  def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))),
+            (MOVSLDUPrm addr:$src)>;
+}
+
 //===---------------------------------------------------------------------===//
 // SSE3 - Replicate Double FP - MOVDDUP
 //===---------------------------------------------------------------------===//
 
 multiclass sse3_replicate_dfp<string OpcodeStr> {
+let neverHasSideEffects = 1 in
 def rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+                    [], IIC_SSE_MOV_LH>;
 def rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst,
-                      (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
-                                      (undef))))]>;
+                      (v2f64 (X86Movddup
+                              (scalar_to_vector (loadf64 addr:$src)))))],
+                              IIC_SSE_MOV_LH>;
 }
 
 // FIXME: Merge with above classe when there're patterns for the ymm version
 multiclass sse3_replicate_dfp_y<string OpcodeStr> {
+def rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>;
+def rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst,
+                      (v4f64 (X86Movddup
+                              (scalar_to_vector (loadf64 addr:$src)))))]>;
+}
+
 let Predicates = [HasAVX] in {
-  def rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
-                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                      []>;
-  def rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
-                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                      []>;
-  }
+  defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup">, VEX;
+  defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
 }
 
 defm MOVDDUP : sse3_replicate_dfp<"movddup">;
-defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup">, VEX;
-defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
-
-let Predicates = [HasSSE3] in {
-  def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
-                   (undef)),
-            (MOVDDUPrm addr:$src)>;
-  let AddedComplexity = 5 in {
-  def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>;
-  def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
-            (MOVDDUPrm addr:$src)>;
-  def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>;
-  def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
-            (MOVDDUPrm addr:$src)>;
-  }
-  def : Pat<(X86Movddup (memopv2f64 addr:$src)),
-            (MOVDDUPrm addr:$src)>;
-  def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
-            (MOVDDUPrm addr:$src)>;
-  def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
-            (MOVDDUPrm addr:$src)>;
-  def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
-            (MOVDDUPrm addr:$src)>;
-  def : Pat<(X86Movddup (bc_v2f64
-                             (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
-            (MOVDDUPrm addr:$src)>;
-}
 
 let Predicates = [HasAVX] in {
-  def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
-                   (undef)),
-            (VMOVDDUPrm addr:$src)>;
-  let AddedComplexity = 5 in {
-  def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>;
-  def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
-            (VMOVDDUPrm addr:$src)>;
-  def : Pat<(movddup (memopv2i64 addr:$src), (undef)), (VMOVDDUPrm addr:$src)>;
-  def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
-            (VMOVDDUPrm addr:$src)>;
-  }
   def : Pat<(X86Movddup (memopv2f64 addr:$src)),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
   def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
   def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-  def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
-            (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
   def : Pat<(X86Movddup (bc_v2f64
                              (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
@@ -4644,16 +5083,24 @@ let Predicates = [HasAVX] in {
             (VMOVDDUPYrm addr:$src)>;
   def : Pat<(X86Movddup (memopv4i64 addr:$src)),
             (VMOVDDUPYrm addr:$src)>;
-  def : Pat<(X86Movddup (v4f64 (scalar_to_vector (loadf64 addr:$src)))),
-            (VMOVDDUPYrm addr:$src)>;
   def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
             (VMOVDDUPYrm addr:$src)>;
-  def : Pat<(X86Movddup (v4f64 VR256:$src)),
-            (VMOVDDUPYrr VR256:$src)>;
   def : Pat<(X86Movddup (v4i64 VR256:$src)),
             (VMOVDDUPYrr VR256:$src)>;
 }
 
+let Predicates = [HasSSE3] in {
+  def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+            (MOVDDUPrm addr:$src)>;
+  def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+            (MOVDDUPrm addr:$src)>;
+  def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+            (MOVDDUPrm addr:$src)>;
+  def : Pat<(X86Movddup (bc_v2f64
+                             (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+            (MOVDDUPrm addr:$src)>;
+}
+
 //===---------------------------------------------------------------------===//
 // SSE3 - Move Unaligned Integer
 //===---------------------------------------------------------------------===//
@@ -4668,45 +5115,51 @@ let Predicates = [HasAVX] in {
 }
 def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "lddqu\t{$src, $dst|$dst, $src}",
-                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))],
+                   IIC_SSE_LDDQU>;
 
 //===---------------------------------------------------------------------===//
 // SSE3 - Arithmetic
 //===---------------------------------------------------------------------===//
 
 multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
-                       X86MemOperand x86memop, bit Is2Addr = 1> {
+                       X86MemOperand x86memop, OpndItins itins,
+                       bit Is2Addr = 1> {
   def rr : I<0xD0, MRMSrcReg,
        (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
+       [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>;
   def rm : I<0xD0, MRMSrcMem,
        (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
+       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))], itins.rr>;
 }
 
-let Predicates = [HasAVX],
-  ExeDomain = SSEPackedDouble in {
-  defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
-                               f128mem, 0>, TB, XD, VEX_4V;
-  defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
-                               f128mem, 0>, TB, OpSize, VEX_4V;
-  defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
-                               f256mem, 0>, TB, XD, VEX_4V;
-  defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
-                               f256mem, 0>, TB, OpSize, VEX_4V;
-}
-let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
-    ExeDomain = SSEPackedDouble in {
+let Predicates = [HasAVX] in {
+  let ExeDomain = SSEPackedSingle in {
+    defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+                                 f128mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
+    defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+                                 f256mem, SSE_ALU_F32P, 0>, TB, XD, VEX_4V;
+  }
+  let ExeDomain = SSEPackedDouble in {
+    defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+                                 f128mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
+    defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+                                 f256mem, SSE_ALU_F64P, 0>, TB, OpSize, VEX_4V;
+  }
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3] in {
+  let ExeDomain = SSEPackedSingle in
   defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
-                              f128mem>, TB, XD;
+                              f128mem, SSE_ALU_F32P>, TB, XD;
+  let ExeDomain = SSEPackedDouble in
   defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
-                              f128mem>, TB, OpSize;
+                              f128mem, SSE_ALU_F64P>, TB, OpSize;
 }
 
 //===---------------------------------------------------------------------===//
@@ -4720,13 +5173,14 @@ multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
+      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
 
   def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
+      [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
+        IIC_SSE_HADDSUB_RM>;
 }
 multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
                   X86MemOperand x86memop, SDNode OpNode, bit Is2Addr = 1> {
@@ -4734,39 +5188,48 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))]>;
+      [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>;
 
   def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-      [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))]>;
+      [(set RC:$dst, (vt (OpNode RC:$src1, (memop addr:$src2))))],
+        IIC_SSE_HADDSUB_RM>;
 }
 
 let Predicates = [HasAVX] in {
-  defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
-                          X86fhadd, 0>, VEX_4V;
-  defm VHADDPD  : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
-                          X86fhadd, 0>, VEX_4V;
-  defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
-                          X86fhsub, 0>, VEX_4V;
-  defm VHSUBPD  : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
-                          X86fhsub, 0>, VEX_4V;
-  defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
-                          X86fhadd, 0>, VEX_4V;
-  defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
-                          X86fhadd, 0>, VEX_4V;
-  defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
-                          X86fhsub, 0>, VEX_4V;
-  defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
-                          X86fhsub, 0>, VEX_4V;
+  let ExeDomain = SSEPackedSingle in {
+    defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+                            X86fhadd, 0>, VEX_4V;
+    defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+                            X86fhsub, 0>, VEX_4V;
+    defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+                            X86fhadd, 0>, VEX_4V;
+    defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+                            X86fhsub, 0>, VEX_4V;
+  }
+  let ExeDomain = SSEPackedDouble in {
+    defm VHADDPD  : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+                            X86fhadd, 0>, VEX_4V;
+    defm VHSUBPD  : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+                            X86fhsub, 0>, VEX_4V;
+    defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+                            X86fhadd, 0>, VEX_4V;
+    defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+                            X86fhsub, 0>, VEX_4V;
+  }
 }
 
 let Constraints = "$src1 = $dst" in {
-  defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
-  defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
-  defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
-  defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+  let ExeDomain = SSEPackedSingle in {
+    defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd>;
+    defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub>;
+  }
+  let ExeDomain = SSEPackedDouble in {
+    defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd>;
+    defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub>;
+  }
 }
 
 //===---------------------------------------------------------------------===//
@@ -4776,11 +5239,11 @@ let Constraints = "$src1 = $dst" in {
 
 /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
-                            PatFrag mem_frag128, Intrinsic IntId128> {
+                            Intrinsic IntId128> {
   def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (IntId128 VR128:$src))]>,
+                    [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>,
                     OpSize;
 
   def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
@@ -4788,32 +5251,101 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR128:$dst,
                       (IntId128
-                       (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
+                       (bitconvert (memopv2i64 addr:$src))))], IIC_SSE_PABS_RM>,
+                    OpSize;
+}
+
+/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
+                              Intrinsic IntId256> {
+  def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
+                    (ins VR256:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (IntId256 VR256:$src))]>,
+                    OpSize;
+
+  def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
+                    (ins i256mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst,
+                      (IntId256
+                       (bitconvert (memopv4i64 addr:$src))))]>, OpSize;
 }
 
 let Predicates = [HasAVX] in {
-  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
+  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb",
                                   int_x86_ssse3_pabs_b_128>, VEX;
-  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
+  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw",
                                   int_x86_ssse3_pabs_w_128>, VEX;
-  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
+  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd",
                                   int_x86_ssse3_pabs_d_128>, VEX;
 }
 
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+let Predicates = [HasAVX2] in {
+  defm VPABSB  : SS3I_unop_rm_int_y<0x1C, "vpabsb",
+                                    int_x86_avx2_pabs_b>, VEX;
+  defm VPABSW  : SS3I_unop_rm_int_y<0x1D, "vpabsw",
+                                    int_x86_avx2_pabs_w>, VEX;
+  defm VPABSD  : SS3I_unop_rm_int_y<0x1E, "vpabsd",
+                                    int_x86_avx2_pabs_d>, VEX;
+}
+
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb",
                               int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw",
                               int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd",
                               int_x86_ssse3_pabs_d_128>;
 
 //===---------------------------------------------------------------------===//
 // SSSE3 - Packed Binary Operator Instructions
 //===---------------------------------------------------------------------===//
 
+def SSE_PHADDSUBD : OpndItins<
+  IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM
+>;
+def SSE_PHADDSUBSW : OpndItins<
+  IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM
+>;
+def SSE_PHADDSUBW : OpndItins<
+  IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM
+>;
+def SSE_PSHUFB : OpndItins<
+  IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM
+>;
+def SSE_PSIGN : OpndItins<
+  IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM
+>;
+def SSE_PMULHRSW : OpndItins<
+  IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW
+>;
+
+/// SS3I_binop_rm - Simple SSSE3 bin op
+multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                         ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+                         X86MemOperand x86memop, OpndItins itins,
+                         bit Is2Addr = 1> {
+  let isCommutable = 1 in
+  def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
+       (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+       OpSize;
+  def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
+       (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst,
+         (OpVT (OpNode RC:$src1,
+          (bitconvert (memop_frag addr:$src2)))))], itins.rm>, OpSize;
+}
+
 /// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
-                             PatFrag mem_frag128, Intrinsic IntId128,
+                             Intrinsic IntId128, OpndItins itins,
                              bit Is2Addr = 1> {
   let isCommutable = 1 in
   def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
@@ -4830,94 +5362,134 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
          (IntId128 VR128:$src1,
-          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+}
+
+multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
+                               Intrinsic IntId256> {
+  let isCommutable = 1 in
+  def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
+       (ins VR256:$src1, VR256:$src2),
+       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+       [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
+       OpSize;
+  def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
+       (ins VR256:$src1, i256mem:$src2),
+       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+       [(set VR256:$dst,
+         (IntId256 VR256:$src1,
+          (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
 }
 
 let ImmT = NoImm, Predicates = [HasAVX] in {
 let isCommutable = 0 in {
-  defm VPHADDW    : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
-                                      int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
-  defm VPHADDD    : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
-                                      int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
-  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
-                                      int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
-  defm VPHSUBW    : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
-                                      int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
-  defm VPHSUBD    : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
-                                      int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
-  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
-                                      int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
-  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
-                                      int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
-  defm VPSHUFB    : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
-                                      int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
-  defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
-                                      int_x86_ssse3_psign_b_128, 0>, VEX_4V;
-  defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
-                                      int_x86_ssse3_psign_w_128, 0>, VEX_4V;
-  defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
-                                      int_x86_ssse3_psign_d_128, 0>, VEX_4V;
-}
-defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
-                                      int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
+  defm VPHADDW    : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
+                                  memopv2i64, i128mem,
+                                  SSE_PHADDSUBW, 0>, VEX_4V;
+  defm VPHADDD    : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
+                                  memopv2i64, i128mem,
+                                  SSE_PHADDSUBD, 0>, VEX_4V;
+  defm VPHSUBW    : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
+                                  memopv2i64, i128mem,
+                                  SSE_PHADDSUBW, 0>, VEX_4V;
+  defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
+                                  memopv2i64, i128mem,
+                                  SSE_PHADDSUBD, 0>, VEX_4V;
+  defm VPSIGNB    : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128,
+                                  memopv2i64, i128mem,
+                                  SSE_PSIGN, 0>, VEX_4V;
+  defm VPSIGNW    : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128,
+                                  memopv2i64, i128mem,
+                                  SSE_PSIGN, 0>, VEX_4V;
+  defm VPSIGND    : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128,
+                                  memopv2i64, i128mem,
+                                  SSE_PSIGN, 0>, VEX_4V;
+  defm VPSHUFB    : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
+                                  memopv2i64, i128mem,
+                                  SSE_PSHUFB, 0>, VEX_4V;
+  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw",
+                                      int_x86_ssse3_phadd_sw_128,
+                                      SSE_PHADDSUBSW, 0>, VEX_4V;
+  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw",
+                                      int_x86_ssse3_phsub_sw_128,
+                                      SSE_PHADDSUBSW, 0>, VEX_4V;
+  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
+                                      int_x86_ssse3_pmadd_ub_sw_128,
+                                      SSE_PMADD, 0>, VEX_4V;
+}
+defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
+                                      int_x86_ssse3_pmul_hr_sw_128,
+                                      SSE_PMULHRSW, 0>, VEX_4V;
+}
+
+let ImmT = NoImm, Predicates = [HasAVX2] in {
+let isCommutable = 0 in {
+  defm VPHADDWY   : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
+                                  memopv4i64, i256mem,
+                                  SSE_PHADDSUBW, 0>, VEX_4V;
+  defm VPHADDDY   : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
+                                  memopv4i64, i256mem,
+                                  SSE_PHADDSUBW, 0>, VEX_4V;
+  defm VPHSUBWY   : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
+                                  memopv4i64, i256mem,
+                                  SSE_PHADDSUBW, 0>, VEX_4V;
+  defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
+                                  memopv4i64, i256mem,
+                                  SSE_PHADDSUBW, 0>, VEX_4V;
+  defm VPSIGNBY   : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256,
+                                  memopv4i64, i256mem,
+                                  SSE_PHADDSUBW, 0>, VEX_4V;
+  defm VPSIGNWY   : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256,
+                                  memopv4i64, i256mem,
+                                  SSE_PHADDSUBW, 0>, VEX_4V;
+  defm VPSIGNDY   : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256,
+                                  memopv4i64, i256mem,
+                                  SSE_PHADDSUBW, 0>, VEX_4V;
+  defm VPSHUFBY   : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
+                                  memopv4i64, i256mem,
+                                  SSE_PHADDSUBW, 0>, VEX_4V;
+  defm VPHADDSW   : SS3I_binop_rm_int_y<0x03, "vphaddsw",
+                                        int_x86_avx2_phadd_sw>, VEX_4V;
+  defm VPHSUBSW   : SS3I_binop_rm_int_y<0x07, "vphsubsw",
+                                        int_x86_avx2_phsub_sw>, VEX_4V;
+  defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
+                                        int_x86_avx2_pmadd_ub_sw>, VEX_4V;
+}
+defm VPMULHRSW    : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
+                                        int_x86_avx2_pmul_hr_sw>, VEX_4V;
 }
 
 // None of these have i8 immediate fields.
 let ImmT = NoImm, Constraints = "$src1 = $dst" in {
 let isCommutable = 0 in {
-  defm PHADDW    : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
-                                     int_x86_ssse3_phadd_w_128>;
-  defm PHADDD    : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
-                                     int_x86_ssse3_phadd_d_128>;
-  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
-                                     int_x86_ssse3_phadd_sw_128>;
-  defm PHSUBW    : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
-                                     int_x86_ssse3_phsub_w_128>;
-  defm PHSUBD    : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
-                                     int_x86_ssse3_phsub_d_128>;
-  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
-                                     int_x86_ssse3_phsub_sw_128>;
-  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
-                                     int_x86_ssse3_pmadd_ub_sw_128>;
-  defm PSHUFB    : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
-                                     int_x86_ssse3_pshuf_b_128>;
-  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
-                                     int_x86_ssse3_psign_b_128>;
-  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
-                                     int_x86_ssse3_psign_w_128>;
-  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
-                                       int_x86_ssse3_psign_d_128>;
-}
-defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
-                                     int_x86_ssse3_pmul_hr_sw_128>;
-}
-
-let Predicates = [HasSSSE3] in {
-  def : Pat<(X86pshufb VR128:$src, VR128:$mask),
-            (PSHUFBrr128 VR128:$src, VR128:$mask)>;
-  def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
-            (PSHUFBrm128 VR128:$src, addr:$mask)>;
-
-  def : Pat<(X86psignb VR128:$src1, VR128:$src2),
-            (PSIGNBrr128 VR128:$src1, VR128:$src2)>;
-  def : Pat<(X86psignw VR128:$src1, VR128:$src2),
-            (PSIGNWrr128 VR128:$src1, VR128:$src2)>;
-  def : Pat<(X86psignd VR128:$src1, VR128:$src2),
-            (PSIGNDrr128 VR128:$src1, VR128:$src2)>;
-}
-
-let Predicates = [HasAVX] in {
-  def : Pat<(X86pshufb VR128:$src, VR128:$mask),
-            (VPSHUFBrr128 VR128:$src, VR128:$mask)>;
-  def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
-            (VPSHUFBrm128 VR128:$src, addr:$mask)>;
-
-  def : Pat<(X86psignb VR128:$src1, VR128:$src2),
-            (VPSIGNBrr128 VR128:$src1, VR128:$src2)>;
-  def : Pat<(X86psignw VR128:$src1, VR128:$src2),
-            (VPSIGNWrr128 VR128:$src1, VR128:$src2)>;
-  def : Pat<(X86psignd VR128:$src1, VR128:$src2),
-            (VPSIGNDrr128 VR128:$src1, VR128:$src2)>;
+  defm PHADDW    : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128,
+                                 memopv2i64, i128mem, SSE_PHADDSUBW>;
+  defm PHADDD    : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128,
+                                 memopv2i64, i128mem, SSE_PHADDSUBD>;
+  defm PHSUBW    : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128,
+                                 memopv2i64, i128mem, SSE_PHADDSUBW>;
+  defm PHSUBD    : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
+                                 memopv2i64, i128mem, SSE_PHADDSUBD>;
+  defm PSIGNB    : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128,
+                                 memopv2i64, i128mem, SSE_PSIGN>;
+  defm PSIGNW    : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128,
+                                 memopv2i64, i128mem, SSE_PSIGN>;
+  defm PSIGND    : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128,
+                                 memopv2i64, i128mem, SSE_PSIGN>;
+  defm PSHUFB    : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128,
+                                 memopv2i64, i128mem, SSE_PSHUFB>;
+  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw",
+                                     int_x86_ssse3_phadd_sw_128,
+                                     SSE_PHADDSUBSW>;
+  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw",
+                                     int_x86_ssse3_phsub_sw_128,
+                                     SSE_PHADDSUBSW>;
+  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
+                                     int_x86_ssse3_pmadd_ub_sw_128, SSE_PMADD>;
+}
+defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw",
+                                     int_x86_ssse3_pmul_hr_sw_128,
+                                     SSE_PMULHRSW>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -4925,36 +5497,57 @@ let Predicates = [HasAVX] in {
 //===---------------------------------------------------------------------===//
 
 multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+  let neverHasSideEffects = 1 in {
   def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
       (ins VR128:$src1, VR128:$src2, i8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
-      []>, OpSize;
+      [], IIC_SSE_PALIGNR>, OpSize;
+  let mayLoad = 1 in
   def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
       (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
       !if(Is2Addr,
         !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
         !strconcat(asm,
                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [], IIC_SSE_PALIGNR>, OpSize;
+  }
+}
+
+multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
+  let neverHasSideEffects = 1 in {
+  def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
+      (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+      !strconcat(asm,
+                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+      []>, OpSize;
+  let mayLoad = 1 in
+  def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
+      (ins VR256:$src1, i256mem:$src2, i8imm:$src3),
+      !strconcat(asm,
+                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
       []>, OpSize;
+  }
 }
 
 let Predicates = [HasAVX] in
   defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
+let Predicates = [HasAVX2] in
+  defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V;
 let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
   defm PALIGN : ssse3_palign<"palignr">;
 
-let Predicates = [HasSSSE3] in {
-def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
-def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+let Predicates = [HasAVX2] in {
+def : Pat<(v8i32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+def : Pat<(v8f32 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+def : Pat<(v16i16 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+def : Pat<(v32i8 (X86PAlign VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
 }
 
 let Predicates = [HasAVX] in {
@@ -4968,23 +5561,36 @@ def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
           (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
 }
 
+let Predicates = [HasSSSE3] in {
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+}
+
 //===---------------------------------------------------------------------===//
 // SSSE3 - Thread synchronization
 //===---------------------------------------------------------------------===//
 
 let usesCustomInserter = 1 in {
 def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
-                [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+                [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>,
+                Requires<[HasSSE3]>;
 def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
-                [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+                [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>,
+                Requires<[HasSSE3]>;
 }
 
 let Uses = [EAX, ECX, EDX] in
-def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
-                 Requires<[HasSSE3]>;
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>,
+                 TB, Requires<[HasSSE3]>;
 let Uses = [ECX, EAX] in
-def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
-                Requires<[HasSSE3]>;
+def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait", [], IIC_SSE_MWAIT>,
+                TB, Requires<[HasSSE3]>;
 
 def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
 def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
@@ -5010,6 +5616,17 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
        OpSize;
 }
 
+multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
+                                 Intrinsic IntId> {
+  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                  [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
+
+  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                  [(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize;
+}
+
 let Predicates = [HasAVX] in {
 defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
                                      VEX;
@@ -5025,6 +5642,21 @@ defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
                                      VEX;
 }
 
+let Predicates = [HasAVX2] in {
+defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
+                                        int_x86_avx2_pmovsxbw>, VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
+                                        int_x86_avx2_pmovsxwd>, VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
+                                        int_x86_avx2_pmovsxdq>, VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
+                                        int_x86_avx2_pmovzxbw>, VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
+                                        int_x86_avx2_pmovzxwd>, VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
+                                        int_x86_avx2_pmovzxdq>, VEX;
+}
+
 defm PMOVSXBW   : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
 defm PMOVSXWD   : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
 defm PMOVSXDQ   : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
@@ -5032,70 +5664,80 @@ defm PMOVZXBW   : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
 defm PMOVZXWD   : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
 defm PMOVZXDQ   : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
 
-let Predicates = [HasSSE41] in {
+let Predicates = [HasAVX] in {
   // Common patterns involving scalar load.
   def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
-            (PMOVSXBWrm addr:$src)>;
+            (VPMOVSXBWrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
-            (PMOVSXBWrm addr:$src)>;
+            (VPMOVSXBWrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
-            (PMOVSXWDrm addr:$src)>;
+            (VPMOVSXWDrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
-            (PMOVSXWDrm addr:$src)>;
+            (VPMOVSXWDrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
-            (PMOVSXDQrm addr:$src)>;
+            (VPMOVSXDQrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
-            (PMOVSXDQrm addr:$src)>;
+            (VPMOVSXDQrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
-            (PMOVZXBWrm addr:$src)>;
+            (VPMOVZXBWrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
-            (PMOVZXBWrm addr:$src)>;
+            (VPMOVZXBWrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
-            (PMOVZXWDrm addr:$src)>;
+            (VPMOVZXWDrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
-            (PMOVZXWDrm addr:$src)>;
+            (VPMOVZXWDrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
-            (PMOVZXDQrm addr:$src)>;
+            (VPMOVZXDQrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
-            (PMOVZXDQrm addr:$src)>;
+            (VPMOVZXDQrm addr:$src)>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasSSE41] in {
   // Common patterns involving scalar load.
   def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
-            (VPMOVSXBWrm addr:$src)>;
+            (PMOVSXBWrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
-            (VPMOVSXBWrm addr:$src)>;
+            (PMOVSXBWrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
-            (VPMOVSXWDrm addr:$src)>;
+            (PMOVSXWDrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
-            (VPMOVSXWDrm addr:$src)>;
+            (PMOVSXWDrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
-            (VPMOVSXDQrm addr:$src)>;
+            (PMOVSXDQrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
-            (VPMOVSXDQrm addr:$src)>;
+            (PMOVSXDQrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
-            (VPMOVZXBWrm addr:$src)>;
+            (PMOVZXBWrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
-            (VPMOVZXBWrm addr:$src)>;
+            (PMOVZXBWrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
-            (VPMOVZXWDrm addr:$src)>;
+            (PMOVZXWDrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
-            (VPMOVZXWDrm addr:$src)>;
+            (PMOVZXWDrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
-            (VPMOVZXDQrm addr:$src)>;
+            (PMOVZXDQrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
-            (VPMOVZXDQrm addr:$src)>;
+            (PMOVZXDQrm addr:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (VPMOVSXDQrr VR128:$src)>;
+def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (VPMOVSXWDrr VR128:$src)>;
+}
+
+let Predicates = [HasSSE41] in {
+def : Pat<(v2i64 (X86vsmovl (v4i32 VR128:$src))), (PMOVSXDQrr VR128:$src)>;
+def : Pat<(v4i32 (X86vsmovl (v8i16 VR128:$src))), (PMOVSXWDrr VR128:$src)>;
 }
 
 
@@ -5111,6 +5753,19 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
           OpSize;
 }
 
+multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr,
+                                 Intrinsic IntId> {
+  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                  [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
+
+  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+       [(set VR256:$dst,
+         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+          OpSize;
+}
+
 let Predicates = [HasAVX] in {
 defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
                                      VEX;
@@ -5122,35 +5777,46 @@ defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
                                      VEX;
 }
 
+let Predicates = [HasAVX2] in {
+defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
+                                       int_x86_avx2_pmovsxbd>, VEX;
+defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
+                                       int_x86_avx2_pmovsxwq>, VEX;
+defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
+                                       int_x86_avx2_pmovzxbd>, VEX;
+defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
+                                       int_x86_avx2_pmovzxwq>, VEX;
+}
+
 defm PMOVSXBD   : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
 defm PMOVSXWQ   : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
 defm PMOVZXBD   : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
 defm PMOVZXWQ   : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
 
-let Predicates = [HasSSE41] in {
+let Predicates = [HasAVX] in {
   // Common patterns involving scalar load
   def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
-            (PMOVSXBDrm addr:$src)>;
+            (VPMOVSXBDrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
-            (PMOVSXWQrm addr:$src)>;
+            (VPMOVSXWQrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
-            (PMOVZXBDrm addr:$src)>;
+            (VPMOVZXBDrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
-            (PMOVZXWQrm addr:$src)>;
+            (VPMOVZXWQrm addr:$src)>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasSSE41] in {
   // Common patterns involving scalar load
   def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
-            (VPMOVSXBDrm addr:$src)>;
+            (PMOVSXBDrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
-            (VPMOVSXWQrm addr:$src)>;
+            (PMOVSXWQrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
-            (VPMOVZXBDrm addr:$src)>;
+            (PMOVZXBDrm addr:$src)>;
   def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
-            (VPMOVZXWQrm addr:$src)>;
+            (PMOVZXWQrm addr:$src)>;
 }
 
 multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
@@ -5166,39 +5832,59 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
                  OpSize;
 }
 
+multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr,
+                                 Intrinsic IntId> {
+  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
+
+  // Expecting a i16 load any extended to i32 value.
+  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                  [(set VR256:$dst, (IntId (bitconvert
+                      (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+                  OpSize;
+}
+
 let Predicates = [HasAVX] in {
 defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
                                      VEX;
 defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
                                      VEX;
 }
+let Predicates = [HasAVX2] in {
+defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
+                                       int_x86_avx2_pmovsxbq>, VEX;
+defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
+                                       int_x86_avx2_pmovzxbq>, VEX;
+}
 defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
 defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
 
-let Predicates = [HasSSE41] in {
+let Predicates = [HasAVX] in {
   // Common patterns involving scalar load
   def : Pat<(int_x86_sse41_pmovsxbq
               (bitconvert (v4i32 (X86vzmovl
                             (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
-            (PMOVSXBQrm addr:$src)>;
+            (VPMOVSXBQrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovzxbq
               (bitconvert (v4i32 (X86vzmovl
                             (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
-            (PMOVZXBQrm addr:$src)>;
+            (VPMOVZXBQrm addr:$src)>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasSSE41] in {
   // Common patterns involving scalar load
   def : Pat<(int_x86_sse41_pmovsxbq
               (bitconvert (v4i32 (X86vzmovl
                             (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
-            (VPMOVSXBQrm addr:$src)>;
+            (PMOVSXBQrm addr:$src)>;
 
   def : Pat<(int_x86_sse41_pmovzxbq
               (bitconvert (v4i32 (X86vzmovl
                             (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
-            (VPMOVZXBQrm addr:$src)>;
+            (PMOVZXBQrm addr:$src)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -5213,6 +5899,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
                   "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                  [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
                  OpSize;
+  let neverHasSideEffects = 1, mayStore = 1 in
   def mr : SS4AIi8<opc, MRMDestMem, (outs),
                  (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
                  !strconcat(OpcodeStr,
@@ -5235,6 +5922,7 @@ defm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
 
 /// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
 multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+  let neverHasSideEffects = 1, mayStore = 1 in
   def mr : SS4AIi8<opc, MRMDestMem, (outs),
                  (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
                  !strconcat(OpcodeStr,
@@ -5311,26 +5999,28 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
                           addr:$dst)]>, OpSize;
 }
 
-let Predicates = [HasAVX] in {
-  defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
-  def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
-                  (ins VR128:$src1, i32i8imm:$src2),
-                  "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                  []>, OpSize, VEX;
+let ExeDomain = SSEPackedSingle in {
+  let Predicates = [HasAVX] in {
+    defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+    def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+                    (ins VR128:$src1, i32i8imm:$src2),
+                    "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    []>, OpSize, VEX;
+  }
+  defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
 }
-defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
 
 // Also match an EXTRACTPS store when the store is done as f32 instead of i32.
 def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
                                               imm:$src2))),
                  addr:$dst),
-          (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
-          Requires<[HasSSE41]>;
+          (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+          Requires<[HasAVX]>;
 def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
                                               imm:$src2))),
                  addr:$dst),
-          (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
-          Requires<[HasAVX]>;
+          (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+          Requires<[HasSSE41]>;
 
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Insert Instructions
@@ -5439,17 +6129,12 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
                     imm:$src3))]>, OpSize;
 }
 
-let Constraints = "$src1 = $dst" in
-  defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
-let Predicates = [HasAVX] in
-  defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
-
-def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
-          (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
-          Requires<[HasAVX]>;
-def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
-          (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
-          Requires<[HasSSE41]>;
+let ExeDomain = SSEPackedSingle in {
+  let Predicates = [HasAVX] in
+    defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+  let Constraints = "$src1 = $dst" in
+    defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+}
 
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Round Instructions
@@ -5459,6 +6144,7 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
                             X86MemOperand x86memop, RegisterClass RC,
                             PatFrag mem_frag32, PatFrag mem_frag64,
                             Intrinsic V4F32Int, Intrinsic V2F64Int> {
+let ExeDomain = SSEPackedSingle in {
   // Intrinsic operation, reg.
   // Vector intrinsic operation, reg
   def PSr : SS4AIi8<opcps, MRMSrcReg,
@@ -5469,15 +6155,16 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
                     OpSize;
 
   // Vector intrinsic operation, mem
-  def PSm : Ii8<opcps, MRMSrcMem,
+  def PSm : SS4AIi8<opcps, MRMSrcMem,
                     (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
                     !strconcat(OpcodeStr,
                     "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                     [(set RC:$dst,
                           (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
-                    TA, OpSize,
-                Requires<[HasSSE41]>;
+                    OpSize;
+} // ExeDomain = SSEPackedSingle
 
+let ExeDomain = SSEPackedDouble in {
   // Vector intrinsic operation, reg
   def PDr : SS4AIi8<opcpd, MRMSrcReg,
                     (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
@@ -5494,46 +6181,26 @@ multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
                     [(set RC:$dst,
                           (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
                     OpSize;
-}
-
-multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
-                   RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
-  // Intrinsic operation, reg.
-  // Vector intrinsic operation, reg
-  def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
-                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, OpSize;
-
-  // Vector intrinsic operation, mem
-  def PSm_AVX : Ii8<opcps, MRMSrcMem,
-                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, TA, OpSize, Requires<[HasSSE41]>;
-
-  // Vector intrinsic operation, reg
-  def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
-                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, OpSize;
-
-  // Vector intrinsic operation, mem
-  def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
-                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, OpSize;
+} // ExeDomain = SSEPackedDouble
 }
 
 multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
                             string OpcodeStr,
                             Intrinsic F32Int,
                             Intrinsic F64Int, bit Is2Addr = 1> {
-  // Intrinsic operation, reg.
+let ExeDomain = GenericDomain in {
+  // Operation, reg.
   def SSr : SS4AIi8<opcss, MRMSrcReg,
+      (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+          !strconcat(OpcodeStr,
+              "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+          !strconcat(OpcodeStr,
+              "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      []>, OpSize;
+
+  // Intrinsic operation, reg.
+  def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -5555,8 +6222,18 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
              (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
         OpSize;
 
-  // Intrinsic operation, reg.
+  // Operation, reg.
   def SDr : SS4AIi8<opcsd, MRMSrcReg,
+        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        []>, OpSize;
+
+  // Intrinsic operation, reg.
+  def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -5577,37 +6254,7 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
         [(set VR128:$dst,
               (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
         OpSize;
-}
-
-multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
-                                   string OpcodeStr> {
-  // Intrinsic operation, reg.
-  def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
-        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
-        !strconcat(OpcodeStr,
-                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        []>, OpSize;
-
-  // Intrinsic operation, mem.
-  def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
-        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
-        !strconcat(OpcodeStr,
-                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        []>, OpSize;
-
-  // Intrinsic operation, reg.
-  def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
-        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
-            !strconcat(OpcodeStr,
-                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        []>, OpSize;
-
-  // Intrinsic operation, mem.
-  def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
-        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
-            !strconcat(OpcodeStr,
-                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-        []>, OpSize;
+} // ExeDomain = GenericDomain
 }
 
 // FP round - roundss, roundps, roundsd, roundpd
@@ -5625,12 +6272,26 @@ let Predicates = [HasAVX] in {
                                   int_x86_sse41_round_ss,
                                   int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
 
-  // Instructions for the assembler
-  defm VROUND  : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
-                                        VEX;
-  defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
-                                        VEX;
-  defm VROUND  : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;
+  def : Pat<(ffloor FR32:$src),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+  def : Pat<(f64 (ffloor FR64:$src)),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+  def : Pat<(f32 (fnearbyint FR32:$src)),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+  def : Pat<(f64 (fnearbyint FR64:$src)),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+  def : Pat<(f32 (fceil FR32:$src)),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+  def : Pat<(f64 (fceil FR64:$src)),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+  def : Pat<(f32 (frint FR32:$src)),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+  def : Pat<(f64 (frint FR64:$src)),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+  def : Pat<(f32 (ftrunc FR32:$src)),
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+  def : Pat<(f64 (ftrunc FR64:$src)),
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
 }
 
 defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -5640,6 +6301,27 @@ let Constraints = "$src1 = $dst" in
 defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
                                int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
 
+def : Pat<(ffloor FR32:$src),
+          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+def : Pat<(f64 (ffloor FR64:$src)),
+          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+def : Pat<(f32 (fnearbyint FR32:$src)),
+          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+def : Pat<(f64 (fnearbyint FR64:$src)),
+          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+def : Pat<(f32 (fceil FR32:$src)),
+          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+def : Pat<(f64 (fceil FR64:$src)),
+          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+def : Pat<(f32 (frint FR32:$src)),
+          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+def : Pat<(f64 (frint FR64:$src)),
+          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+def : Pat<(f32 (ftrunc FR32:$src)),
+          (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+def : Pat<(f64 (ftrunc FR64:$src)),
+          (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Packed Bit Test
 //===----------------------------------------------------------------------===//
@@ -5649,11 +6331,11 @@ defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
 let Defs = [EFLAGS], Predicates = [HasAVX] in {
 def VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
                 "vptest\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+                [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
                 OpSize, VEX;
 def VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
                 "vptest\t{$src2, $src1|$src1, $src2}",
-                [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+                [(set EFLAGS,(X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
                 OpSize, VEX;
 
 def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
@@ -5668,12 +6350,12 @@ def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
 
 let Defs = [EFLAGS] in {
 def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
-              "ptest \t{$src2, $src1|$src1, $src2}",
-              [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+              "ptest\t{$src2, $src1|$src1, $src2}",
+              [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
               OpSize;
 def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
-              "ptest \t{$src2, $src1|$src1, $src2}",
-              [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+              "ptest\t{$src2, $src1|$src1, $src2}",
+              [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>,
               OpSize;
 }
 
@@ -5690,11 +6372,15 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
 }
 
 let Defs = [EFLAGS], Predicates = [HasAVX] in {
+let ExeDomain = SSEPackedSingle in {
 defm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
 defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+}
+let ExeDomain = SSEPackedDouble in {
 defm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
 defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
 }
+}
 
 //===----------------------------------------------------------------------===//
 // SSE4.1 - Misc Instructions
@@ -5743,7 +6429,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
                      !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                      [(set VR128:$dst,
                        (IntId128
-                       (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+                        (bitconvert (memopv2i64 addr:$src))))]>, OpSize;
 }
 
 let Predicates = [HasAVX] in
@@ -5769,15 +6455,29 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
          (IntId128 VR128:$src1,
-          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+          (bitconvert (memopv2i64 addr:$src2))))]>, OpSize;
+}
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
+                                Intrinsic IntId256> {
+  let isCommutable = 1 in
+  def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst),
+       (ins VR256:$src1, VR256:$src2),
+       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+       [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, OpSize;
+  def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst),
+       (ins VR256:$src1, i256mem:$src2),
+       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+       [(set VR256:$dst,
+         (IntId256 VR256:$src1,
+          (bitconvert (memopv4i64 addr:$src2))))]>, OpSize;
 }
 
 let Predicates = [HasAVX] in {
   let isCommutable = 0 in
   defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
                                                          0>, VEX_4V;
-  defm VPCMPEQQ  : SS41I_binop_rm_int<0x29, "vpcmpeqq",  int_x86_sse41_pcmpeqq,
-                                                         0>, VEX_4V;
   defm VPMINSB   : SS41I_binop_rm_int<0x38, "vpminsb",   int_x86_sse41_pminsb,
                                                          0>, VEX_4V;
   defm VPMINSD   : SS41I_binop_rm_int<0x39, "vpminsd",   int_x86_sse41_pminsd,
@@ -5796,17 +6496,35 @@ let Predicates = [HasAVX] in {
                                                          0>, VEX_4V;
   defm VPMULDQ   : SS41I_binop_rm_int<0x28, "vpmuldq",   int_x86_sse41_pmuldq,
                                                          0>, VEX_4V;
+}
 
-  def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
-            (VPCMPEQQrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
-            (VPCMPEQQrm VR128:$src1, addr:$src2)>;
+let Predicates = [HasAVX2] in {
+  let isCommutable = 0 in
+  defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw",
+                                        int_x86_avx2_packusdw>, VEX_4V;
+  defm VPMINSB   : SS41I_binop_rm_int_y<0x38, "vpminsb",
+                                        int_x86_avx2_pmins_b>, VEX_4V;
+  defm VPMINSD   : SS41I_binop_rm_int_y<0x39, "vpminsd",
+                                        int_x86_avx2_pmins_d>, VEX_4V;
+  defm VPMINUD   : SS41I_binop_rm_int_y<0x3B, "vpminud",
+                                        int_x86_avx2_pminu_d>, VEX_4V;
+  defm VPMINUW   : SS41I_binop_rm_int_y<0x3A, "vpminuw",
+                                        int_x86_avx2_pminu_w>, VEX_4V;
+  defm VPMAXSB   : SS41I_binop_rm_int_y<0x3C, "vpmaxsb",
+                                        int_x86_avx2_pmaxs_b>, VEX_4V;
+  defm VPMAXSD   : SS41I_binop_rm_int_y<0x3D, "vpmaxsd",
+                                        int_x86_avx2_pmaxs_d>, VEX_4V;
+  defm VPMAXUD   : SS41I_binop_rm_int_y<0x3F, "vpmaxud",
+                                        int_x86_avx2_pmaxu_d>, VEX_4V;
+  defm VPMAXUW   : SS41I_binop_rm_int_y<0x3E, "vpmaxuw",
+                                        int_x86_avx2_pmaxu_w>, VEX_4V;
+  defm VPMULDQ   : SS41I_binop_rm_int_y<0x28, "vpmuldq",
+                                        int_x86_avx2_pmul_dq>, VEX_4V;
 }
 
 let Constraints = "$src1 = $dst" in {
   let isCommutable = 0 in
   defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
-  defm PCMPEQQ  : SS41I_binop_rm_int<0x29, "pcmpeqq",  int_x86_sse41_pcmpeqq>;
   defm PMINSB   : SS41I_binop_rm_int<0x38, "pminsb",   int_x86_sse41_pminsb>;
   defm PMINSD   : SS41I_binop_rm_int<0x39, "pminsd",   int_x86_sse41_pminsd>;
   defm PMINUD   : SS41I_binop_rm_int<0x3B, "pminud",   int_x86_sse41_pminud>;
@@ -5818,36 +6536,46 @@ let Constraints = "$src1 = $dst" in {
   defm PMULDQ   : SS41I_binop_rm_int<0x28, "pmuldq",   int_x86_sse41_pmuldq>;
 }
 
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
-          (PCMPEQQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
-          (PCMPEQQrm VR128:$src1, addr:$src2)>;
-
 /// SS48I_binop_rm - Simple SSE41 binary operator.
 multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                        ValueType OpVT, bit Is2Addr = 1> {
+                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+                          X86MemOperand x86memop, bit Is2Addr = 1> {
   let isCommutable = 1 in
-  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
-       (ins VR128:$src1, VR128:$src2),
+  def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
+       (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
-       OpSize;
-  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
-       (ins VR128:$src1, i128mem:$src2),
+       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, OpSize;
+  def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
+       (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (OpNode VR128:$src1,
-                                  (bc_v4i32 (memopv2i64 addr:$src2))))]>,
-       OpSize;
+       [(set RC:$dst,
+         (OpVT (OpNode RC:$src1,
+          (bitconvert (memop_frag addr:$src2)))))]>, OpSize;
 }
 
-let Predicates = [HasAVX] in
-  defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
-let Constraints = "$src1 = $dst" in
-  defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
+let Predicates = [HasAVX] in {
+  defm VPMULLD  : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
+                                memopv2i64, i128mem, 0>, VEX_4V;
+  defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
+                                 memopv2i64, i128mem, 0>, VEX_4V;
+}
+let Predicates = [HasAVX2] in {
+  defm VPMULLDY  : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V;
+  defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm PMULLD  : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
+                                memopv2i64, i128mem>;
+  defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128,
+                                memopv2i64, i128mem>;
+}
 
 /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
 multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
@@ -5878,77 +6606,106 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
 
 let Predicates = [HasAVX] in {
   let isCommutable = 0 in {
-  defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
-                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
-  defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
-                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
-  defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
-            int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
-  defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
-            int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+    let ExeDomain = SSEPackedSingle in {
+    defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+                                        VR128, memopv4f32, i128mem, 0>, VEX_4V;
+    defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+              int_x86_avx_blend_ps_256, VR256, memopv8f32, i256mem, 0>, VEX_4V;
+    }
+    let ExeDomain = SSEPackedDouble in {
+    defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+                                        VR128, memopv2f64, i128mem, 0>, VEX_4V;
+    defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+              int_x86_avx_blend_pd_256, VR256, memopv4f64, i256mem, 0>, VEX_4V;
+    }
   defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
-                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+                                      VR128, memopv2i64, i128mem, 0>, VEX_4V;
   defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
-                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+                                      VR128, memopv2i64, i128mem, 0>, VEX_4V;
   }
+  let ExeDomain = SSEPackedSingle in
   defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
-                                   VR128, memopv16i8, i128mem, 0>, VEX_4V;
+                                   VR128, memopv4f32, i128mem, 0>, VEX_4V;
+  let ExeDomain = SSEPackedDouble in
   defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
-                                   VR128, memopv16i8, i128mem, 0>, VEX_4V;
+                                   VR128, memopv2f64, i128mem, 0>, VEX_4V;
+  let ExeDomain = SSEPackedSingle in
   defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
-                                   VR256, memopv32i8, i256mem, 0>, VEX_4V;
+                                   VR256, memopv8f32, i256mem, 0>, VEX_4V;
+}
+
+let Predicates = [HasAVX2] in {
+  let isCommutable = 0 in {
+  defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw,
+                                       VR256, memopv4i64, i256mem, 0>, VEX_4V;
+  defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
+                                       VR256, memopv4i64, i256mem, 0>, VEX_4V;
+  }
 }
 
 let Constraints = "$src1 = $dst" in {
   let isCommutable = 0 in {
+  let ExeDomain = SSEPackedSingle in
   defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
-                                     VR128, memopv16i8, i128mem>;
+                                     VR128, memopv4f32, i128mem>;
+  let ExeDomain = SSEPackedDouble in
   defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
-                                     VR128, memopv16i8, i128mem>;
+                                     VR128, memopv2f64, i128mem>;
   defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
-                                     VR128, memopv16i8, i128mem>;
+                                     VR128, memopv2i64, i128mem>;
   defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
-                                     VR128, memopv16i8, i128mem>;
+                                     VR128, memopv2i64, i128mem>;
   }
+  let ExeDomain = SSEPackedSingle in
   defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
-                                  VR128, memopv16i8, i128mem>;
+                                  VR128, memopv4f32, i128mem>;
+  let ExeDomain = SSEPackedDouble in
   defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
-                                  VR128, memopv16i8, i128mem>;
+                                  VR128, memopv2f64, i128mem>;
 }
 
 /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
-let Predicates = [HasAVX] in {
 multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
                                     RegisterClass RC, X86MemOperand x86memop,
                                     PatFrag mem_frag, Intrinsic IntId> {
-  def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+  def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst),
                   (ins RC:$src1, RC:$src2, RC:$src3),
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                   [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
-                  SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+                  IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
 
-  def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+  def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
                   (ins RC:$src1, x86memop:$src2, RC:$src3),
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                   [(set RC:$dst,
                         (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
                                RC:$src3))],
-                  SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
-}
+                  IIC_DEFAULT, SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
 }
 
+let Predicates = [HasAVX] in {
+let ExeDomain = SSEPackedDouble in {
 defm VBLENDVPD  : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
-                                           memopv16i8, int_x86_sse41_blendvpd>;
-defm VBLENDVPS  : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
-                                           memopv16i8, int_x86_sse41_blendvps>;
-defm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
-                                           memopv16i8, int_x86_sse41_pblendvb>;
+                                           memopv2f64, int_x86_sse41_blendvpd>;
 defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
-                                         memopv32i8, int_x86_avx_blendv_pd_256>;
+                                         memopv4f64, int_x86_avx_blendv_pd_256>;
+} // ExeDomain = SSEPackedDouble
+let ExeDomain = SSEPackedSingle in {
+defm VBLENDVPS  : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+                                           memopv4f32, int_x86_sse41_blendvps>;
 defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
-                                         memopv32i8, int_x86_avx_blendv_ps_256>;
+                                         memopv8f32, int_x86_avx_blendv_ps_256>;
+} // ExeDomain = SSEPackedSingle
+defm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+                                           memopv2i64, int_x86_sse41_pblendvb>;
+}
+
+let Predicates = [HasAVX2] in {
+defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem,
+                                           memopv4i64, int_x86_avx2_pblendvb>;
+}
 
 let Predicates = [HasAVX] in {
   def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1),
@@ -5978,11 +6735,28 @@ let Predicates = [HasAVX] in {
   def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1),
                             (v4f64 VR256:$src2))),
             (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+
+  def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
+                               (imm:$mask))),
+            (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+  def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
+                               (imm:$mask))),
+            (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+}
+
+let Predicates = [HasAVX2] in {
+  def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
+                            (v32i8 VR256:$src2))),
+            (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
+  def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
+                               (imm:$mask))),
+            (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
 }
 
 /// SS41I_ternary_int - SSE 4.1 ternary operator
 let Uses = [XMM0], Constraints = "$src1 = $dst" in {
-  multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+  multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+                               Intrinsic IntId> {
     def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
                     (ins VR128:$src1, VR128:$src2),
                     !strconcat(OpcodeStr,
@@ -5996,13 +6770,18 @@ let Uses = [XMM0], Constraints = "$src1 = $dst" in {
                      "\t{$src2, $dst|$dst, $src2}"),
                     [(set VR128:$dst,
                       (IntId VR128:$src1,
-                       (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+                       (bitconvert (mem_frag addr:$src2)), XMM0))]>, OpSize;
   }
 }
 
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+let ExeDomain = SSEPackedDouble in
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64,
+                                  int_x86_sse41_blendvpd>;
+let ExeDomain = SSEPackedSingle in
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32,
+                                  int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64,
+                                  int_x86_sse41_pblendvb>;
 
 let Predicates = [HasSSE41] in {
   def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1),
@@ -6020,6 +6799,17 @@ let Predicates = [HasSSE41] in {
   def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1),
                             (v2f64 VR128:$src2))),
             (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
+
+  def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+                               (imm:$mask))),
+            (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
+  def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+                               (imm:$mask))),
+            (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
+  def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+                               (imm:$mask))),
+            (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+
 }
 
 let Predicates = [HasAVX] in
@@ -6027,6 +6817,11 @@ def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                        "vmovntdqa\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
                        OpSize, VEX;
+let Predicates = [HasAVX2] in
+def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+                         "vmovntdqa\t{$src, $dst|$dst, $src}",
+                         [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>,
+                         OpSize, VEX;
 def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                        "movntdqa\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
@@ -6036,43 +6831,37 @@ def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
 // SSE4.2 - Compare Instructions
 //===----------------------------------------------------------------------===//
 
-/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
-multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
-                              Intrinsic IntId128, bit Is2Addr = 1> {
-  def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
-       (ins VR128:$src1, VR128:$src2),
+/// SS42I_binop_rm - Simple SSE 4.2 binary operator
+multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                          ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
+                          X86MemOperand x86memop, bit Is2Addr = 1> {
+  def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst),
+       (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>,
        OpSize;
-  def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
-       (ins VR128:$src1, i128mem:$src2),
+  def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst),
+       (ins RC:$src1, x86memop:$src2),
        !if(Is2Addr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set VR128:$dst,
-         (IntId128 VR128:$src1,
-          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+       [(set RC:$dst,
+         (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>, OpSize;
 }
 
-let Predicates = [HasAVX] in {
-  defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
-                                     0>, VEX_4V;
+let Predicates = [HasAVX] in
+  defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
+                                 memopv2i64, i128mem, 0>, VEX_4V;
 
-  def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
-            (VPCMPGTQrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
-            (VPCMPGTQrm VR128:$src1, addr:$src2)>;
-}
+let Predicates = [HasAVX2] in
+  defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
+                                  memopv4i64, i256mem, 0>, VEX_4V;
 
 let Constraints = "$src1 = $dst" in
-  defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
-
-def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
-          (PCMPGTQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
-          (PCMPGTQrm VR128:$src1, addr:$src2)>;
+  defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
+                                memopv2i64, i128mem>;
 
 //===----------------------------------------------------------------------===//
 // SSE4.2 - String/text Processing Instructions
@@ -6091,23 +6880,26 @@ multiclass pseudo_pcmpistrm<string asm> {
 }
 
 let Defs = [EFLAGS], usesCustomInserter = 1 in {
+  let AddedComplexity = 1 in
+    defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
   defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
-  defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
 }
 
-let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
+let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in {
   def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
       (ins VR128:$src1, VR128:$src2, i8imm:$src3),
       "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+  let mayLoad = 1 in
   def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
       (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
       "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
 }
 
-let Defs = [XMM0, EFLAGS] in {
+let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in {
   def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
       (ins VR128:$src1, VR128:$src2, i8imm:$src3),
       "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+  let mayLoad = 1 in
   def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
       (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
       "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
@@ -6126,24 +6918,27 @@ multiclass pseudo_pcmpestrm<string asm> {
 }
 
 let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+  let AddedComplexity = 1 in
+    defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
   defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
-  defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
 }
 
 let Predicates = [HasAVX],
-    Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+    Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
   def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
       (ins VR128:$src1, VR128:$src3, i8imm:$src5),
       "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+  let mayLoad = 1 in
   def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
       (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
       "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
 }
 
-let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in {
   def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
       (ins VR128:$src1, VR128:$src3, i8imm:$src5),
       "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+  let mayLoad = 1 in
   def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
       (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
       "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
@@ -6318,8 +7113,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
            !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set VR128:$dst,
-         (IntId128 VR128:$src1,
-          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+         (IntId128 VR128:$src1, (memopv2i64 addr:$src2)))]>, OpSize;
 }
 
 // Perform One Round of an AES Encryption/Decryption Flow
@@ -6345,44 +7139,6 @@ let Constraints = "$src1 = $dst" in {
                          int_x86_aesni_aesdeclast>;
 }
 
-let Predicates = [HasAES] in {
-  def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
-            (AESENCrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
-            (AESENCrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
-            (AESENCLASTrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
-            (AESENCLASTrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
-            (AESDECrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
-            (AESDECrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
-            (AESDECLASTrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
-            (AESDECLASTrm VR128:$src1, addr:$src2)>;
-}
-
-let Predicates = [HasAVX, HasAES], AddedComplexity = 20 in {
-  def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
-            (VAESENCrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
-            (VAESENCrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
-            (VAESENCLASTrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
-            (VAESENCLASTrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
-            (VAESDECrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
-            (VAESDECrm VR128:$src1, addr:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
-            (VAESDECLASTrr VR128:$src1, VR128:$src2)>;
-  def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
-            (VAESDECLASTrm VR128:$src1, addr:$src2)>;
-}
-
 // Perform the AES InvMixColumn Transformation
 let Predicates = [HasAVX, HasAES] in {
   def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
@@ -6394,8 +7150,7 @@ let Predicates = [HasAVX, HasAES] in {
   def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
       (ins i128mem:$src1),
       "vaesimc\t{$src1, $dst|$dst, $src1}",
-      [(set VR128:$dst,
-        (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+      [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
       OpSize, VEX;
 }
 def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
@@ -6407,8 +7162,7 @@ def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
 def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
   (ins i128mem:$src1),
   "aesimc\t{$src1, $dst|$dst, $src1}",
-  [(set VR128:$dst,
-    (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+  [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>,
   OpSize;
 
 // AES Round Key Generation Assist
@@ -6423,8 +7177,7 @@ let Predicates = [HasAVX, HasAES] in {
       (ins i128mem:$src1, i8imm:$src2),
       "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
       [(set VR128:$dst,
-        (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
-                                        imm:$src2))]>,
+        (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
       OpSize, VEX;
 }
 def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
@@ -6437,8 +7190,7 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
   (ins i128mem:$src1, i8imm:$src2),
   "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
   [(set VR128:$dst,
-    (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
-                                    imm:$src2))]>,
+    (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>,
   OpSize;
 
 //===----------------------------------------------------------------------===//
@@ -6446,28 +7198,32 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
 //===----------------------------------------------------------------------===//
 
 // Carry-less Multiplication instructions
-let Constraints = "$src1 = $dst" in {
-def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+let neverHasSideEffects = 1 in {
+// AVX carry-less Multiplication instructions
+def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2, i8imm:$src3),
-           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            []>;
 
-def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+let mayLoad = 1 in
+def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
-           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            []>;
-}
 
-// AVX carry-less Multiplication instructions
-def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+let Constraints = "$src1 = $dst" in {
+def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2, i8imm:$src3),
-           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
            []>;
 
-def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+let mayLoad = 1 in
+def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
-           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
            []>;
+} // Constraints = "$src1 = $dst"
+} // neverHasSideEffects = 1
 
 
 multiclass pclmul_alias<string asm, int immop> {
@@ -6506,51 +7262,60 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
         [(set RC:$dst, (Int addr:$src))]>, VEX;
 
-def VBROADCASTSS   : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
-                                   int_x86_avx_vbroadcastss>;
-def VBROADCASTSSY  : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
-                                   int_x86_avx_vbroadcastss_256>;
-def VBROADCASTSD   : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
-                                   int_x86_avx_vbroadcast_sd_256>;
+// AVX2 adds register forms
+class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                         Intrinsic Int> :
+  AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
+         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+         [(set RC:$dst, (Int VR128:$src))]>, VEX;
+
+let ExeDomain = SSEPackedSingle in {
+  def VBROADCASTSSrm  : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+                                      int_x86_avx_vbroadcast_ss>;
+  def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+                                      int_x86_avx_vbroadcast_ss_256>;
+}
+let ExeDomain = SSEPackedDouble in
+def VBROADCASTSDrm  : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+                                    int_x86_avx_vbroadcast_sd_256>;
 def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
                                    int_x86_avx_vbroadcastf128_pd_256>;
 
+let ExeDomain = SSEPackedSingle in {
+  def VBROADCASTSSrr  : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
+                                           int_x86_avx2_vbroadcast_ss_ps>;
+  def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
+                                           int_x86_avx2_vbroadcast_ss_ps_256>;
+}
+let ExeDomain = SSEPackedDouble in
+def VBROADCASTSDrr  : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
+                                         int_x86_avx2_vbroadcast_sd_pd_256>;
+
+let Predicates = [HasAVX2] in
+def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
+                                   int_x86_avx2_vbroadcasti128>;
+
+let Predicates = [HasAVX] in
 def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
           (VBROADCASTF128 addr:$src)>;
 
-def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
-          (VBROADCASTSSY addr:$src)>;
-def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
-          (VBROADCASTSD addr:$src)>;
-def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
-          (VBROADCASTSSY addr:$src)>;
-def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
-          (VBROADCASTSD addr:$src)>;
-
-def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
-          (VBROADCASTSS addr:$src)>;
-def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
-          (VBROADCASTSS addr:$src)>;
 
 //===----------------------------------------------------------------------===//
 // VINSERTF128 - Insert packed floating-point values
 //
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
 def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
           (ins VR256:$src1, VR128:$src2, i8imm:$src3),
           "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           []>, VEX_4V;
+let mayLoad = 1 in
 def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
           (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
           "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
           []>, VEX_4V;
+}
 
-def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
-          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
-          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
-          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
-
+let Predicates = [HasAVX] in {
 def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
                                    (i32 imm)),
           (VINSERTF128rr VR256:$src1, VR128:$src2,
@@ -6559,11 +7324,11 @@ def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
                                    (i32 imm)),
           (VINSERTF128rr VR256:$src1, VR128:$src2,
                          (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
                                    (i32 imm)),
           (VINSERTF128rr VR256:$src1, VR128:$src2,
                          (INSERT_get_vinsertf128_imm VR256:$ins))>;
-def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
                                    (i32 imm)),
           (VINSERTF128rr VR256:$src1, VR128:$src2,
                          (INSERT_get_vinsertf128_imm VR256:$ins))>;
@@ -6576,18 +7341,54 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
           (VINSERTF128rr VR256:$src1, VR128:$src2,
                          (INSERT_get_vinsertf128_imm VR256:$ins))>;
 
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rm VR256:$src1, addr:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rm VR256:$src1, addr:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rm VR256:$src1, addr:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
 //===----------------------------------------------------------------------===//
 // VEXTRACTF128 - Extract packed floating-point values
 //
+let neverHasSideEffects = 1, ExeDomain = SSEPackedSingle in {
 def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
           (ins VR256:$src1, i8imm:$src2),
           "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
           []>, VEX;
+let mayStore = 1 in
 def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
           (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
           "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
           []>, VEX;
+}
 
+// Extract and store.
+let Predicates = [HasAVX] in {
+  def : Pat<(alignedstore (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2), addr:$dst),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+  def : Pat<(alignedstore (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), addr:$dst),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+  def : Pat<(alignedstore (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2), addr:$dst),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+
+  def : Pat<(int_x86_sse_storeu_ps addr:$dst, (int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2)),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+  def : Pat<(int_x86_sse2_storeu_pd addr:$dst, (int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2)),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+  def : Pat<(int_x86_sse2_storeu_dq addr:$dst, (bc_v16i8 (int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2))),
+          (VEXTRACTF128mr addr:$dst, VR256:$src1, imm:$src2)>;
+}
+
+// AVX1 patterns
+let Predicates = [HasAVX] in {
 def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
           (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
 def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
@@ -6604,14 +7405,14 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
                     (v4f64 VR256:$src1),
                     (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
-          (v4i32 (VEXTRACTF128rr
-                    (v8i32 VR256:$src1),
-                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
-def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
           (v2i64 (VEXTRACTF128rr
                     (v4i64 VR256:$src1),
                     (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
 def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4i32 (VEXTRACTF128rr
+                    (v8i32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
           (v8i16 (VEXTRACTF128rr
                     (v16i16 VR256:$src1),
                     (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
@@ -6619,14 +7420,14 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
           (v16i8 (VEXTRACTF128rr
                     (v32i8 VR256:$src1),
                     (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+}
 
 //===----------------------------------------------------------------------===//
 // VMASKMOV - Conditional SIMD Packed Loads and Stores
 //
 multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
                           Intrinsic IntLd, Intrinsic IntLd256,
-                          Intrinsic IntSt, Intrinsic IntSt256,
-                          PatFrag pf128, PatFrag pf256> {
+                          Intrinsic IntSt, Intrinsic IntSt256> {
   def rm  : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
              (ins VR128:$src1, f128mem:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -6647,26 +7448,26 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
              [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
 }
 
+let ExeDomain = SSEPackedSingle in
 defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
                                  int_x86_avx_maskload_ps,
                                  int_x86_avx_maskload_ps_256,
                                  int_x86_avx_maskstore_ps,
-                                 int_x86_avx_maskstore_ps_256,
-                                 memopv4f32, memopv8f32>;
+                                 int_x86_avx_maskstore_ps_256>;
+let ExeDomain = SSEPackedDouble in
 defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
                                  int_x86_avx_maskload_pd,
                                  int_x86_avx_maskload_pd_256,
                                  int_x86_avx_maskstore_pd,
-                                 int_x86_avx_maskstore_pd_256,
-                                 memopv2f64, memopv4f64>;
+                                 int_x86_avx_maskstore_pd_256>;
 
 //===----------------------------------------------------------------------===//
 // VPERMIL - Permute Single and Double Floating-Point Values
 //
 multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
                       RegisterClass RC, X86MemOperand x86memop_f,
-                      X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
-                      Intrinsic IntVar, Intrinsic IntImm> {
+                      X86MemOperand x86memop_i, PatFrag i_frag,
+                      Intrinsic IntVar, ValueType vt> {
   def rr  : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
              (ins RC:$src1, RC:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -6674,86 +7475,98 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
   def rm  : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
              (ins RC:$src1, x86memop_i:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+             [(set RC:$dst, (IntVar RC:$src1,
+                             (bitconvert (i_frag addr:$src2))))]>, VEX_4V;
 
   def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
              (ins RC:$src1, i8imm:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
+             [(set RC:$dst, (vt (X86VPermilp RC:$src1, (i8 imm:$src2))))]>, VEX;
   def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
              (ins x86memop_f:$src1, i8imm:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
-}
-
-defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
-                             memopv4f32, memopv4i32,
-                             int_x86_avx_vpermilvar_ps,
-                             int_x86_avx_vpermil_ps>;
-defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
-                             memopv8f32, memopv8i32,
-                             int_x86_avx_vpermilvar_ps_256,
-                             int_x86_avx_vpermil_ps_256>;
-defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
-                             memopv2f64, memopv2i64,
-                             int_x86_avx_vpermilvar_pd,
-                             int_x86_avx_vpermil_pd>;
-defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
-                             memopv4f64, memopv4i64,
-                             int_x86_avx_vpermilvar_pd_256,
-                             int_x86_avx_vpermil_pd_256>;
-
-def : Pat<(v8f32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
-          (VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4f64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
-          (VPERMILPDYri VR256:$src1, imm:$imm)>;
-def : Pat<(v8i32 (X86VPermilpsy VR256:$src1, (i8 imm:$imm))),
+             [(set RC:$dst,
+               (vt (X86VPermilp (memop addr:$src1), (i8 imm:$src2))))]>, VEX;
+}
+
+let ExeDomain = SSEPackedSingle in {
+  defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+                               memopv2i64, int_x86_avx_vpermilvar_ps, v4f32>;
+  defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+                              memopv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>;
+}
+let ExeDomain = SSEPackedDouble in {
+  defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+                               memopv2i64, int_x86_avx_vpermilvar_pd, v2f64>;
+  defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+                              memopv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>;
+}
+
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
           (VPERMILPSYri VR256:$src1, imm:$imm)>;
-def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPermilp VR256:$src1, (i8 imm:$imm))),
           (VPERMILPDYri VR256:$src1, imm:$imm)>;
+def : Pat<(v8i32 (X86VPermilp (bc_v8i32 (memopv4i64 addr:$src1)),
+                               (i8 imm:$imm))),
+          (VPERMILPSYmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i64 (X86VPermilp (memopv4i64 addr:$src1), (i8 imm:$imm))),
+          (VPERMILPDYmi addr:$src1, imm:$imm)>;
+
+def : Pat<(v2i64 (X86VPermilp VR128:$src1, (i8 imm:$imm))),
+          (VPERMILPDri VR128:$src1, imm:$imm)>;
+def : Pat<(v2i64 (X86VPermilp (memopv2i64 addr:$src1), (i8 imm:$imm))),
+          (VPERMILPDmi addr:$src1, imm:$imm)>;
+}
 
 //===----------------------------------------------------------------------===//
 // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks
 //
+let ExeDomain = SSEPackedSingle in {
 def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
           (ins VR256:$src1, VR256:$src2, i8imm:$src3),
           "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-          []>, VEX_4V;
+          [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2,
+                              (i8 imm:$src3))))]>, VEX_4V;
 def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
           (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
           "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
-          []>, VEX_4V;
+          [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv8f32 addr:$src2),
+                             (i8 imm:$src3)))]>, VEX_4V;
+}
 
-def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
-          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
-          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
-          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
-
-def : Pat<(int_x86_avx_vperm2f128_ps_256
-                  VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
-          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vperm2f128_pd_256
-                  VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
-          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
-def : Pat<(int_x86_avx_vperm2f128_si_256
-                  VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
-          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
-
-def : Pat<(v8f32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v8i32 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
           (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4i64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
           (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v4f64 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
           (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v32i8 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
           (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
-def : Pat<(v16i16 (X86VPerm2f128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
           (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
 
+def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
+                  (memopv8f32 addr:$src2), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
+                  (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
+                  (memopv4i64 addr:$src2), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
+                  (memopv4f64 addr:$src2), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
+                  (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+                  (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
 //===----------------------------------------------------------------------===//
 // VZERO - Zero YMM registers
 //
@@ -6770,30 +7583,362 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
 
 //===----------------------------------------------------------------------===//
 // Half precision conversion instructions
-//
+//===----------------------------------------------------------------------===//
+multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
 let Predicates = [HasAVX, HasF16C] in {
-  def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
-                     "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
-  def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                     "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
-  def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
-                     "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
-  def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
-                     "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
-  def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst),
-                      (ins VR128:$src1, i32i8imm:$src2),
-                      "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                      TA, OpSize, VEX;
-  def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
-                      (ins VR128:$src1, i32i8imm:$src2),
-                      "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                      TA, OpSize, VEX;
-  def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst),
-                      (ins VR256:$src1, i32i8imm:$src2),
-                      "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                      TA, OpSize, VEX;
-  def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
-                      (ins VR256:$src1, i32i8imm:$src2),
-                      "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
-                      TA, OpSize, VEX;
+  def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
+             "vcvtph2ps\t{$src, $dst|$dst, $src}",
+             [(set RC:$dst, (Int VR128:$src))]>,
+             T8, OpSize, VEX;
+  let neverHasSideEffects = 1, mayLoad = 1 in
+  def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+             "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX;
+}
+}
+
+multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
+let Predicates = [HasAVX, HasF16C] in {
+  def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
+               (ins RC:$src1, i32i8imm:$src2),
+               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+               [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>,
+               TA, OpSize, VEX;
+  let neverHasSideEffects = 1, mayLoad = 1 in
+  def mr : Ii8<0x1D, MRMDestMem, (outs x86memop:$dst),
+               (ins RC:$src1, i32i8imm:$src2),
+               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+               TA, OpSize, VEX;
+}
+}
+
+defm VCVTPH2PS  : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
+defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>;
+defm VCVTPS2PH  : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
+defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
+
+//===----------------------------------------------------------------------===//
+// AVX2 Instructions
+//===----------------------------------------------------------------------===//
+
+/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate
+multiclass AVX2_binop_rmi_int<bits<8> opc, string OpcodeStr,
+                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+                 X86MemOperand x86memop> {
+  let isCommutable = 1 in
+  def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst),
+        (ins RC:$src1, RC:$src2, u32u8imm:$src3),
+        !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+        VEX_4V;
+  def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
+        (ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
+        !strconcat(OpcodeStr,
+            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        [(set RC:$dst,
+          (IntId RC:$src1,
+           (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+        VEX_4V;
 }
+
+let isCommutable = 0 in {
+defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
+                                   VR128, memopv2i64, i128mem>;
+defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
+                                    VR256, memopv4i64, i256mem>;
+}
+
+//===----------------------------------------------------------------------===//
+// VPBROADCAST - Load from memory and broadcast to all elements of the
+//               destination operand
+//
+multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
+                          X86MemOperand x86memop, PatFrag ld_frag,
+                          Intrinsic Int128, Intrinsic Int256> {
+  def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                  [(set VR128:$dst, (Int128 VR128:$src))]>, VEX;
+  def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                  [(set VR128:$dst,
+                    (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
+  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR256:$dst, (Int256 VR128:$src))]>, VEX;
+  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR256:$dst,
+                    (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
+}
+
+defm VPBROADCASTB  : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
+                                    int_x86_avx2_pbroadcastb_128,
+                                    int_x86_avx2_pbroadcastb_256>;
+defm VPBROADCASTW  : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
+                                    int_x86_avx2_pbroadcastw_128,
+                                    int_x86_avx2_pbroadcastw_256>;
+defm VPBROADCASTD  : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
+                                    int_x86_avx2_pbroadcastd_128,
+                                    int_x86_avx2_pbroadcastd_256>;
+defm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
+                                    int_x86_avx2_pbroadcastq_128,
+                                    int_x86_avx2_pbroadcastq_256>;
+
+let Predicates = [HasAVX2] in {
+  def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
+          (VPBROADCASTBrm addr:$src)>;
+  def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
+          (VPBROADCASTBYrm addr:$src)>;
+  def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
+          (VPBROADCASTWrm addr:$src)>;
+  def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
+          (VPBROADCASTWYrm addr:$src)>;
+  def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+          (VPBROADCASTDrm addr:$src)>;
+  def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+          (VPBROADCASTDYrm addr:$src)>;
+  def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
+          (VPBROADCASTQrm addr:$src)>;
+  def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+          (VPBROADCASTQYrm addr:$src)>;
+}
+
+// AVX1 broadcast patterns
+let Predicates = [HasAVX] in {
+def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
+          (VBROADCASTSSYrm addr:$src)>;
+def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
+          (VBROADCASTSDrm addr:$src)>;
+def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
+          (VBROADCASTSSYrm addr:$src)>;
+def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
+          (VBROADCASTSDrm addr:$src)>;
+
+def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
+          (VBROADCASTSSrm addr:$src)>;
+def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
+          (VBROADCASTSSrm addr:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// VPERM - Permute instructions
+//
+
+multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+                     Intrinsic Int> {
+  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
+                   (ins VR256:$src1, VR256:$src2),
+                   !strconcat(OpcodeStr,
+                       "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                   [(set VR256:$dst, (Int VR256:$src1, VR256:$src2))]>, VEX_4V;
+  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
+                   (ins VR256:$src1, i256mem:$src2),
+                   !strconcat(OpcodeStr,
+                       "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                   [(set VR256:$dst, (Int VR256:$src1,
+                                      (bitconvert (mem_frag addr:$src2))))]>,
+                   VEX_4V;
+}
+
+defm VPERMD : avx2_perm<0x36, "vpermd", memopv4i64, int_x86_avx2_permd>;
+let ExeDomain = SSEPackedSingle in
+defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>;
+
+multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
+                         Intrinsic Int> {
+  def Yrr : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst),
+                     (ins VR256:$src1, i8imm:$src2),
+                     !strconcat(OpcodeStr,
+                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                     [(set VR256:$dst, (Int VR256:$src1, imm:$src2))]>, VEX;
+  def Yrm : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
+                     (ins i256mem:$src1, i8imm:$src2),
+                     !strconcat(OpcodeStr,
+                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                     [(set VR256:$dst, (Int (mem_frag addr:$src1), imm:$src2))]>,
+                     VEX;
+}
+
+defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>,
+                            VEX_W;
+let ExeDomain = SSEPackedDouble in
+defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>,
+                             VEX_W;
+
+//===----------------------------------------------------------------------===//
+// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
+//
+let AddedComplexity = 1 in {
+def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
+          (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
+                            (i8 imm:$src3))))]>, VEX_4V;
+def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
+          (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+          "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
+                             (i8 imm:$src3)))]>, VEX_4V;
+}
+
+let Predicates = [HasAVX2], AddedComplexity = 1 in {
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
+          (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
+
+def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (memopv4i64 addr:$src2)),
+                  (i8 imm:$imm))),
+          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1,
+                   (bc_v16i16 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
+          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)),
+                  (i8 imm:$imm))),
+          (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// VINSERTI128 - Insert packed integer values
+//
+let neverHasSideEffects = 1 in {
+def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst),
+          (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          []>,
+          VEX_4V;
+def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
+          (ins VR256:$src1, i128mem:$src2, i8imm:$src3),
+          "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          []>, VEX_4V;
+}
+
+let Predicates = [HasAVX2], AddedComplexity = 1 in {
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTI128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTI128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTI128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTI128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+}
+
+//===----------------------------------------------------------------------===//
+// VEXTRACTI128 - Extract packed integer values
+//
+def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst),
+          (ins VR256:$src1, i8imm:$src2),
+          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+          [(set VR128:$dst,
+            (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>,
+          VEX;
+let neverHasSideEffects = 1, mayStore = 1 in
+def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
+          (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
+          "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
+
+let Predicates = [HasAVX2], AddedComplexity = 1 in {
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2i64 (VEXTRACTI128rr
+                    (v4i64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4i32 (VEXTRACTI128rr
+                    (v8i32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v8i16 (VEXTRACTI128rr
+                    (v16i16 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v16i8 (VEXTRACTI128rr
+                    (v32i8 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+}
+
+//===----------------------------------------------------------------------===//
+// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores
+//
+multiclass avx2_pmovmask<string OpcodeStr,
+                         Intrinsic IntLd128, Intrinsic IntLd256,
+                         Intrinsic IntSt128, Intrinsic IntSt256> {
+  def rm  : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst),
+             (ins VR128:$src1, i128mem:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V;
+  def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst),
+             (ins VR256:$src1, i256mem:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V;
+  def mr  : AVX28I<0x8e, MRMDestMem, (outs),
+             (ins i128mem:$dst, VR128:$src1, VR128:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+  def Ymr : AVX28I<0x8e, MRMDestMem, (outs),
+             (ins i256mem:$dst, VR256:$src1, VR256:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+}
+
+defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd",
+                                int_x86_avx2_maskload_d,
+                                int_x86_avx2_maskload_d_256,
+                                int_x86_avx2_maskstore_d,
+                                int_x86_avx2_maskstore_d_256>;
+defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
+                                int_x86_avx2_maskload_q,
+                                int_x86_avx2_maskload_q_256,
+                                int_x86_avx2_maskstore_q,
+                                int_x86_avx2_maskstore_q_256>, VEX_W;
+
+
+//===----------------------------------------------------------------------===//
+// Variable Bit Shifts
+//
+multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                          ValueType vt128, ValueType vt256> {
+  def rr  : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
+             (ins VR128:$src1, VR128:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst,
+               (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>,
+             VEX_4V;
+  def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst),
+             (ins VR128:$src1, i128mem:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst,
+               (vt128 (OpNode VR128:$src1,
+                       (vt128 (bitconvert (memopv2i64 addr:$src2))))))]>,
+             VEX_4V;
+  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
+             (ins VR256:$src1, VR256:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR256:$dst,
+               (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>,
+             VEX_4V;
+  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst),
+             (ins VR256:$src1, i256mem:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR256:$dst,
+               (vt256 (OpNode VR256:$src1,
+                       (vt256 (bitconvert (memopv4i64 addr:$src2))))))]>,
+             VEX_4V;
+}
+
+defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
+defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
+defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
+defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
+defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
diff --git a/lib/Target/X86/X86InstrSVM.td b/lib/Target/X86/X86InstrSVM.td
new file mode 100644
index 000000000000..757dcd0b5dcb
--- /dev/null
+++ b/lib/Target/X86/X86InstrSVM.td
@@ -0,0 +1,62 @@
+//===-- X86InstrSVM.td - SVM Instruction Set Extension -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the AMD SVM instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SVM instructions
+
+// 0F 01 D9
+def VMMCALL : I<0x01, MRM_D9, (outs), (ins), "vmmcall", []>, TB;
+
+// 0F 01 DC
+def STGI : I<0x01, MRM_DC, (outs), (ins), "stgi", []>, TB;
+
+// 0F 01 DD
+def CLGI : I<0x01, MRM_DD, (outs), (ins), "clgi", []>, TB;
+
+// 0F 01 DE
+let Uses = [EAX] in
+def SKINIT : I<0x01, MRM_DE, (outs), (ins), "skinit\t{%eax|EAX}", []>, TB;
+
+// 0F 01 D8
+let Uses = [EAX] in
+def VMRUN32 : I<0x01, MRM_D8, (outs), (ins),
+                "vmrun\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX] in
+def VMRUN64 : I<0x01, MRM_D8, (outs), (ins),
+                "vmrun\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>;
+
+// 0F 01 DA
+let Uses = [EAX] in
+def VMLOAD32 : I<0x01, MRM_DA, (outs), (ins),
+                "vmload\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX] in
+def VMLOAD64 : I<0x01, MRM_DA, (outs), (ins),
+                "vmload\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>;
+
+// 0F 01 DB
+let Uses = [EAX] in
+def VMSAVE32 : I<0x01, MRM_DB, (outs), (ins),
+                "vmsave\t{%eax|EAX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX] in
+def VMSAVE64 : I<0x01, MRM_DB, (outs), (ins),
+                "vmsave\t{%rax|RAX}", []>, TB, Requires<[In64BitMode]>;
+
+// 0F 01 DF
+let Uses = [EAX, ECX] in
+def INVLPGA32 : I<0x01, MRM_DF, (outs), (ins),
+                "invlpga\t{%ecx, %eax|EAX, ECX}", []>, TB, Requires<[In32BitMode]>;
+let Uses = [RAX, ECX] in
+def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins),
+                "invlpga\t{%ecx, %rax|RAX, ECX}", []>, TB, Requires<[In64BitMode]>;
+
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index 8278568184ff..bdeb63ffbd69 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -1,10 +1,10 @@
-//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===//
-// 
+//===-- X86InstrShiftRotate.td - Shift and Rotate Instrs ---*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the shift and rotate instructions.
@@ -19,44 +19,46 @@ let Constraints = "$src1 = $dst" in {
 let Uses = [CL] in {
 def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "shl{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+                 [(set GR8:$dst, (shl GR8:$src1, CL))], IIC_SR>;
 def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
                  "shl{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+                 [(set GR16:$dst, (shl GR16:$src1, CL))], IIC_SR>, OpSize;
 def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
                  "shl{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (shl GR32:$src1, CL))]>;
+                 [(set GR32:$dst, (shl GR32:$src1, CL))], IIC_SR>;
 def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
-                  "shl{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (shl GR64:$src1, CL))]>;
+                  "shl{q}\t{%cl, $dst|$dst, CL}",
+                  [(set GR64:$dst, (shl GR64:$src1, CL))], IIC_SR>;
 } // Uses = [CL]
 
 def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
                    "shl{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+                   [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
                    
 let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
 def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
                    "shl{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+                   [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))], IIC_SR>,
+                   OpSize;
 def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
                    "shl{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+                   [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>;
 def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst), 
                     (ins GR64:$src1, i8imm:$src2),
                     "shl{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+                    [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))],
+                    IIC_SR>;
 
 // NOTE: We don't include patterns for shifts of a register by one, because
 // 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
 def SHL8r1   : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
-                 "shl{b}\t$dst", []>;
+                 "shl{b}\t$dst", [], IIC_SR>;
 def SHL16r1  : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
-                 "shl{w}\t$dst", []>, OpSize;
+                 "shl{w}\t$dst", [], IIC_SR>, OpSize;
 def SHL32r1  : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
-                 "shl{l}\t$dst", []>;
+                 "shl{l}\t$dst", [], IIC_SR>;
 def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
-                 "shl{q}\t$dst", []>;
+                 "shl{q}\t$dst", [], IIC_SR>;
 } // isConvertibleToThreeAddress = 1
 } // Constraints = "$src = $dst" 
 
@@ -66,223 +68,266 @@ def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
 let Uses = [CL] in {
 def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
                  "shl{b}\t{%cl, $dst|$dst, CL}",
-                 [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+                 [(store (shl (loadi8 addr:$dst), CL), addr:$dst)], IIC_SR>;
 def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
                  "shl{w}\t{%cl, $dst|$dst, CL}",
-                 [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+                 [(store (shl (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>,
+                 OpSize;
 def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
                  "shl{l}\t{%cl, $dst|$dst, CL}",
-                 [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+                 [(store (shl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>;
 def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
-                  "shl{q}\t{%cl, $dst|$dst, %CL}",
-                  [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+                  "shl{q}\t{%cl, $dst|$dst, CL}",
+                  [(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
 }
 def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
                    "shl{b}\t{$src, $dst|$dst, $src}",
-                [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+                [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+                IIC_SR>;
 def SHL16mi  : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
                    "shl{w}\t{$src, $dst|$dst, $src}",
-               [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+               [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+               IIC_SR>,
                    OpSize;
 def SHL32mi  : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
                    "shl{l}\t{$src, $dst|$dst, $src}",
-               [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+               [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+               IIC_SR>;
 def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
                   "shl{q}\t{$src, $dst|$dst, $src}",
-                 [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+                 [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+                 IIC_SR>;
 
 // Shift by 1
 def SHL8m1   : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
                  "shl{b}\t$dst",
-                [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+                [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+                IIC_SR>;
 def SHL16m1  : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
                  "shl{w}\t$dst",
-               [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+               [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+               IIC_SR>,
                    OpSize;
 def SHL32m1  : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
                  "shl{l}\t$dst",
-               [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+               [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+               IIC_SR>;
 def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
                   "shl{q}\t$dst",
-                 [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+                 [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+                 IIC_SR>;
 
 let Constraints = "$src1 = $dst" in {
 let Uses = [CL] in {
 def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "shr{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+                 [(set GR8:$dst, (srl GR8:$src1, CL))], IIC_SR>;
 def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
                  "shr{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+                 [(set GR16:$dst, (srl GR16:$src1, CL))], IIC_SR>, OpSize;
 def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
                  "shr{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (srl GR32:$src1, CL))]>;
+                 [(set GR32:$dst, (srl GR32:$src1, CL))], IIC_SR>;
 def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
-                  "shr{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (srl GR64:$src1, CL))]>;
+                  "shr{q}\t{%cl, $dst|$dst, CL}",
+                  [(set GR64:$dst, (srl GR64:$src1, CL))], IIC_SR>;
 }
 
 def SHR8ri   : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
                    "shr{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+                   [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
 def SHR16ri  : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
                    "shr{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+                   [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))],
+                   IIC_SR>, OpSize;
 def SHR32ri  : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
                    "shr{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+                   [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))],
+                   IIC_SR>;
 def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
                   "shr{q}\t{$src2, $dst|$dst, $src2}",
-                  [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+                  [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))], IIC_SR>;
 
 // Shift right by 1
 def SHR8r1   : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
                  "shr{b}\t$dst",
-                 [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+                 [(set GR8:$dst, (srl GR8:$src1, (i8 1)))], IIC_SR>;
 def SHR16r1  : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
                  "shr{w}\t$dst",
-                 [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+                 [(set GR16:$dst, (srl GR16:$src1, (i8 1)))], IIC_SR>, OpSize;
 def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
                  "shr{l}\t$dst",
-                 [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+                 [(set GR32:$dst, (srl GR32:$src1, (i8 1)))], IIC_SR>;
 def SHR64r1  : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
                  "shr{q}\t$dst",
-                 [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+                 [(set GR64:$dst, (srl GR64:$src1, (i8 1)))], IIC_SR>;
 } // Constraints = "$src = $dst"
 
 
 let Uses = [CL] in {
 def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
                  "shr{b}\t{%cl, $dst|$dst, CL}",
-                 [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+                 [(store (srl (loadi8 addr:$dst), CL), addr:$dst)], IIC_SR>;
 def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
                  "shr{w}\t{%cl, $dst|$dst, CL}",
-                 [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+                 [(store (srl (loadi16 addr:$dst), CL), addr:$dst)], IIC_SR>,
                  OpSize;
 def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
                  "shr{l}\t{%cl, $dst|$dst, CL}",
-                 [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+                 [(store (srl (loadi32 addr:$dst), CL), addr:$dst)], IIC_SR>;
 def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
-                  "shr{q}\t{%cl, $dst|$dst, %CL}",
-                  [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+                  "shr{q}\t{%cl, $dst|$dst, CL}",
+                  [(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
 }
 def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
                    "shr{b}\t{$src, $dst|$dst, $src}",
-                [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+                [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+                IIC_SR>;
 def SHR16mi  : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
                    "shr{w}\t{$src, $dst|$dst, $src}",
-               [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+               [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+               IIC_SR>,
                    OpSize;
 def SHR32mi  : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
                    "shr{l}\t{$src, $dst|$dst, $src}",
-               [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+               [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+               IIC_SR>;
 def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
                   "shr{q}\t{$src, $dst|$dst, $src}",
-                 [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+                 [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+                 IIC_SR>;
 
 // Shift by 1
 def SHR8m1   : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
                  "shr{b}\t$dst",
-                [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+                [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+                IIC_SR>;
 def SHR16m1  : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
                  "shr{w}\t$dst",
-               [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+               [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+               IIC_SR>,OpSize;
 def SHR32m1  : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
                  "shr{l}\t$dst",
-               [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+               [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+               IIC_SR>;
 def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
                   "shr{q}\t$dst",
-                 [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+                 [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+                 IIC_SR>;
 
 let Constraints = "$src1 = $dst" in {
 let Uses = [CL] in {
 def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "sar{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+                 [(set GR8:$dst, (sra GR8:$src1, CL))],
+                 IIC_SR>;
 def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
                  "sar{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+                 [(set GR16:$dst, (sra GR16:$src1, CL))],
+                 IIC_SR>, OpSize;
 def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
                  "sar{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (sra GR32:$src1, CL))]>;
+                 [(set GR32:$dst, (sra GR32:$src1, CL))],
+                 IIC_SR>;
 def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
-                 "sar{q}\t{%cl, $dst|$dst, %CL}",
-                 [(set GR64:$dst, (sra GR64:$src1, CL))]>;
+                 "sar{q}\t{%cl, $dst|$dst, CL}",
+                 [(set GR64:$dst, (sra GR64:$src1, CL))],
+                 IIC_SR>;
 }
 
 def SAR8ri   : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
                    "sar{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+                   [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))],
+                   IIC_SR>;
 def SAR16ri  : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
                    "sar{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+                   [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))],
+                   IIC_SR>,
                    OpSize;
 def SAR32ri  : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
                    "sar{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+                   [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))],
+                   IIC_SR>;
 def SAR64ri  : RIi8<0xC1, MRM7r, (outs GR64:$dst),
                     (ins GR64:$src1, i8imm:$src2),
                     "sar{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+                    [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))],
+                    IIC_SR>;
 
 // Shift by 1
 def SAR8r1   : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "sar{b}\t$dst",
-                 [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+                 [(set GR8:$dst, (sra GR8:$src1, (i8 1)))],
+                 IIC_SR>;
 def SAR16r1  : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
                  "sar{w}\t$dst",
-                 [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+                 [(set GR16:$dst, (sra GR16:$src1, (i8 1)))],
+                 IIC_SR>, OpSize;
 def SAR32r1  : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
                  "sar{l}\t$dst",
-                 [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+                 [(set GR32:$dst, (sra GR32:$src1, (i8 1)))],
+                 IIC_SR>;
 def SAR64r1  : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
                  "sar{q}\t$dst",
-                 [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+                 [(set GR64:$dst, (sra GR64:$src1, (i8 1)))],
+                 IIC_SR>;
 } // Constraints = "$src = $dst"
 
 
 let Uses = [CL] in {
 def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
                  "sar{b}\t{%cl, $dst|$dst, CL}",
-                 [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+                 [(store (sra (loadi8 addr:$dst), CL), addr:$dst)],
+                 IIC_SR>;
 def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
                  "sar{w}\t{%cl, $dst|$dst, CL}",
-                 [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+                 [(store (sra (loadi16 addr:$dst), CL), addr:$dst)],
+                 IIC_SR>, OpSize;
 def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), 
                  "sar{l}\t{%cl, $dst|$dst, CL}",
-                 [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+                 [(store (sra (loadi32 addr:$dst), CL), addr:$dst)],
+                 IIC_SR>;
 def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), 
-                 "sar{q}\t{%cl, $dst|$dst, %CL}",
-                 [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+                 "sar{q}\t{%cl, $dst|$dst, CL}",
+                 [(store (sra (loadi64 addr:$dst), CL), addr:$dst)],
+                 IIC_SR>;
 }
 def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
                    "sar{b}\t{$src, $dst|$dst, $src}",
-                [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+                [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+                IIC_SR>;
 def SAR16mi  : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
                    "sar{w}\t{$src, $dst|$dst, $src}",
-               [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+               [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+               IIC_SR>,
                    OpSize;
 def SAR32mi  : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
                    "sar{l}\t{$src, $dst|$dst, $src}",
-               [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+               [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+               IIC_SR>;
 def SAR64mi  : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
                     "sar{q}\t{$src, $dst|$dst, $src}",
-                 [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+                 [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+                 IIC_SR>;
 
 // Shift by 1
 def SAR8m1   : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
                  "sar{b}\t$dst",
-                [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+                [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+                IIC_SR>;
 def SAR16m1  : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
                  "sar{w}\t$dst",
-               [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+               [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+               IIC_SR>,
                    OpSize;
 def SAR32m1  : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
                  "sar{l}\t$dst",
-               [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+               [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+               IIC_SR>;
 def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
                   "sar{q}\t$dst",
-                 [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+                 [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+                 IIC_SR>;
 
 //===----------------------------------------------------------------------===//
 // Rotate instructions
@@ -290,125 +335,125 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
 
 let Constraints = "$src1 = $dst" in {
 def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
-               "rcl{b}\t$dst", []>;
+               "rcl{b}\t$dst", [], IIC_SR>;
 def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
-                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 let Uses = [CL] in
 def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
-                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+                "rcl{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
   
 def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
-                "rcl{w}\t$dst", []>, OpSize;
+                "rcl{w}\t$dst", [], IIC_SR>, OpSize;
 def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
-                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
 let Uses = [CL] in
 def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
 
 def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
-                "rcl{l}\t$dst", []>;
+                "rcl{l}\t$dst", [], IIC_SR>;
 def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
-                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 let Uses = [CL] in
 def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 
 
 def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
-                 "rcl{q}\t$dst", []>;
+                 "rcl{q}\t$dst", [], IIC_SR>;
 def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
-                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 let Uses = [CL] in
 def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
-                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 
 
 def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
-               "rcr{b}\t$dst", []>;
+               "rcr{b}\t$dst", [], IIC_SR>;
 def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
-                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 let Uses = [CL] in
 def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
-                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+                "rcr{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
   
 def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
-                "rcr{w}\t$dst", []>, OpSize;
+                "rcr{w}\t$dst", [], IIC_SR>, OpSize;
 def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
-                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
 let Uses = [CL] in
 def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
-                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
 
 def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
-                "rcr{l}\t$dst", []>;
+                "rcr{l}\t$dst", [], IIC_SR>;
 def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
-                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 let Uses = [CL] in
 def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
-                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
                  
 def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
-                 "rcr{q}\t$dst", []>;
+                 "rcr{q}\t$dst", [], IIC_SR>;
 def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
-                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 let Uses = [CL] in
 def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
-                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 
 } // Constraints = "$src = $dst"
 
 def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
-               "rcl{b}\t$dst", []>;
+               "rcl{b}\t$dst", [], IIC_SR>;
 def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
-                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
-                "rcl{w}\t$dst", []>, OpSize;
+                "rcl{w}\t$dst", [], IIC_SR>, OpSize;
 def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
-                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
 def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
-                "rcl{l}\t$dst", []>;
+                "rcl{l}\t$dst", [], IIC_SR>;
 def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
-                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
-                 "rcl{q}\t$dst", []>;
+                 "rcl{q}\t$dst", [], IIC_SR>;
 def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
-                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 
 def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
-               "rcr{b}\t$dst", []>;
+               "rcr{b}\t$dst", [], IIC_SR>;
 def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
-                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
-                "rcr{w}\t$dst", []>, OpSize;
+                "rcr{w}\t$dst", [], IIC_SR>, OpSize;
 def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
-                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize;
 def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
-                "rcr{l}\t$dst", []>;
+                "rcr{l}\t$dst", [], IIC_SR>;
 def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
-                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
-                 "rcr{q}\t$dst", []>;
+                 "rcr{q}\t$dst", [], IIC_SR>;
 def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
-                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 
 let Uses = [CL] in {
 def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
-                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+                "rcl{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
-                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
 def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
-                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
-                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 
 def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
-                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+                "rcr{b}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
-                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", [], IIC_SR>, OpSize;
 def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
-                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
-                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", [], IIC_SR>;
 }
 
 let Constraints = "$src1 = $dst" in {
@@ -416,179 +461,217 @@ let Constraints = "$src1 = $dst" in {
 let Uses = [CL] in {
 def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "rol{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+                 [(set GR8:$dst, (rotl GR8:$src1, CL))], IIC_SR>;
 def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
                  "rol{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+                 [(set GR16:$dst, (rotl GR16:$src1, CL))], IIC_SR>, OpSize;
 def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
                  "rol{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
+                 [(set GR32:$dst, (rotl GR32:$src1, CL))], IIC_SR>;
 def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
-                  "rol{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
+                  "rol{q}\t{%cl, $dst|$dst, CL}",
+                  [(set GR64:$dst, (rotl GR64:$src1, CL))], IIC_SR>;
 }
 
 def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
                    "rol{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+                   [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
 def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
                    "rol{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, 
+                   [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))],
+                   IIC_SR>, 
                    OpSize;
 def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
                    "rol{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+                   [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))],
+                   IIC_SR>;
 def ROL64ri  : RIi8<0xC1, MRM0r, (outs GR64:$dst), 
                     (ins GR64:$src1, i8imm:$src2),
                     "rol{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+                    [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))],
+                    IIC_SR>;
 
 // Rotate by 1
 def ROL8r1   : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "rol{b}\t$dst",
-                 [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+                 [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))],
+                 IIC_SR>;
 def ROL16r1  : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
                  "rol{w}\t$dst",
-                 [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+                 [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))],
+                 IIC_SR>, OpSize;
 def ROL32r1  : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
                  "rol{l}\t$dst",
-                 [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+                 [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))],
+                 IIC_SR>;
 def ROL64r1  : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
                   "rol{q}\t$dst",
-                  [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+                  [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))],
+                  IIC_SR>;
 } // Constraints = "$src = $dst"
 
 let Uses = [CL] in {
 def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
                  "rol{b}\t{%cl, $dst|$dst, CL}",
-                 [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+                 [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)],
+                 IIC_SR>;
 def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
                  "rol{w}\t{%cl, $dst|$dst, CL}",
-                 [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+                 [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)],
+                 IIC_SR>, OpSize;
 def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
                  "rol{l}\t{%cl, $dst|$dst, CL}",
-                 [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+                 [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)],
+                 IIC_SR>;
 def ROL64mCL :  RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
-                   "rol{q}\t{%cl, $dst|$dst, %CL}",
-                   [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+                   "rol{q}\t{%cl, $dst|$dst, %cl}",
+                   [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)],
+                   IIC_SR>;
 }
 def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
                    "rol{b}\t{$src1, $dst|$dst, $src1}",
-               [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+               [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+               IIC_SR>;
 def ROL16mi  : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
                    "rol{w}\t{$src1, $dst|$dst, $src1}",
-              [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>,
+              [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+              IIC_SR>,
                    OpSize;
 def ROL32mi  : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
                    "rol{l}\t{$src1, $dst|$dst, $src1}",
-              [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+              [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+              IIC_SR>;
 def ROL64mi  : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
                     "rol{q}\t{$src1, $dst|$dst, $src1}",
-                [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+                [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)],
+                IIC_SR>;
 
 // Rotate by 1
 def ROL8m1   : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
                  "rol{b}\t$dst",
-               [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+               [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+               IIC_SR>;
 def ROL16m1  : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
                  "rol{w}\t$dst",
-              [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+              [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+              IIC_SR>,
                    OpSize;
 def ROL32m1  : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
                  "rol{l}\t$dst",
-              [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+              [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+              IIC_SR>;
 def ROL64m1  : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
                  "rol{q}\t$dst",
-               [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+               [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+               IIC_SR>;
 
 let Constraints = "$src1 = $dst" in {
 let Uses = [CL] in {
 def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "ror{b}\t{%cl, $dst|$dst, CL}",
-                 [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+                 [(set GR8:$dst, (rotr GR8:$src1, CL))], IIC_SR>;
 def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
                  "ror{w}\t{%cl, $dst|$dst, CL}",
-                 [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+                 [(set GR16:$dst, (rotr GR16:$src1, CL))], IIC_SR>, OpSize;
 def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
                  "ror{l}\t{%cl, $dst|$dst, CL}",
-                 [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
+                 [(set GR32:$dst, (rotr GR32:$src1, CL))], IIC_SR>;
 def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
-                  "ror{q}\t{%cl, $dst|$dst, %CL}",
-                  [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
+                  "ror{q}\t{%cl, $dst|$dst, CL}",
+                  [(set GR64:$dst, (rotr GR64:$src1, CL))], IIC_SR>;
 }
 
 def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
                    "ror{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+                   [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
 def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
                    "ror{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, 
+                   [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))],
+                   IIC_SR>, 
                    OpSize;
 def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
                    "ror{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+                   [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))],
+                   IIC_SR>;
 def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst), 
                     (ins GR64:$src1, i8imm:$src2),
                     "ror{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+                    [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))],
+                    IIC_SR>;
 
 // Rotate by 1
 def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "ror{b}\t$dst",
-                 [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+                 [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))],
+                 IIC_SR>;
 def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
                  "ror{w}\t$dst",
-                 [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+                 [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))],
+                 IIC_SR>, OpSize;
 def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
                  "ror{l}\t$dst",
-                 [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+                 [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))],
+                 IIC_SR>;
 def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
                   "ror{q}\t$dst",
-                  [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+                  [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))],
+                  IIC_SR>;
 } // Constraints = "$src = $dst"
 
 let Uses = [CL] in {
 def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
                  "ror{b}\t{%cl, $dst|$dst, CL}",
-                 [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+                 [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)],
+                 IIC_SR>;
 def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
                  "ror{w}\t{%cl, $dst|$dst, CL}",
-                 [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+                 [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)],
+                 IIC_SR>, OpSize;
 def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), 
                  "ror{l}\t{%cl, $dst|$dst, CL}",
-                 [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+                 [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)],
+                 IIC_SR>;
 def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), 
-                  "ror{q}\t{%cl, $dst|$dst, %CL}",
-                  [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+                  "ror{q}\t{%cl, $dst|$dst, CL}",
+                  [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)],
+                  IIC_SR>;
 }
 def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
                    "ror{b}\t{$src, $dst|$dst, $src}",
-               [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+               [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
+               IIC_SR>;
 def ROR16mi  : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
                    "ror{w}\t{$src, $dst|$dst, $src}",
-              [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+              [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
+              IIC_SR>,
                    OpSize;
 def ROR32mi  : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
                    "ror{l}\t{$src, $dst|$dst, $src}",
-              [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+              [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
+              IIC_SR>;
 def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
                     "ror{q}\t{$src, $dst|$dst, $src}",
-                [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+                [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
+                IIC_SR>;
 
 // Rotate by 1
 def ROR8m1   : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
                  "ror{b}\t$dst",
-               [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+               [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)],
+               IIC_SR>;
 def ROR16m1  : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
                  "ror{w}\t$dst",
-              [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+              [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)],
+              IIC_SR>,
                    OpSize;
 def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
                  "ror{l}\t$dst",
-              [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+              [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)],
+              IIC_SR>;
 def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
                  "ror{q}\t$dst",
-               [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+               [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)],
+               IIC_SR>;
 
 
 //===----------------------------------------------------------------------===//
@@ -601,30 +684,36 @@ let Uses = [CL] in {
 def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), 
                    (ins GR16:$src1, GR16:$src2),
                    "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+                   [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))],
+                    IIC_SHD16_REG_CL>,
                    TB, OpSize;
 def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), 
                    (ins GR16:$src1, GR16:$src2),
                    "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+                   [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))],
+                    IIC_SHD16_REG_CL>,
                    TB, OpSize;
 def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), 
                    (ins GR32:$src1, GR32:$src2),
                    "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+                   [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))],
+                    IIC_SHD32_REG_CL>, TB;
 def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
                    (ins GR32:$src1, GR32:$src2),
                    "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
-                   [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+                   [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))],
+                   IIC_SHD32_REG_CL>, TB;
 def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), 
                     (ins GR64:$src1, GR64:$src2),
-                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
-                    [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, 
+                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                    [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))],
+                    IIC_SHD64_REG_CL>, 
                     TB;
 def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), 
                     (ins GR64:$src1, GR64:$src2),
-                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
-                    [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, 
+                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                    [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))],
+                    IIC_SHD64_REG_CL>, 
                     TB;
 }
 
@@ -634,42 +723,42 @@ def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
                      (ins GR16:$src1, GR16:$src2, i8imm:$src3),
                      "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
-                                      (i8 imm:$src3)))]>,
+                                      (i8 imm:$src3)))], IIC_SHD16_REG_IM>,
                      TB, OpSize;
 def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
                      (outs GR16:$dst), 
                      (ins GR16:$src1, GR16:$src2, i8imm:$src3),
                      "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
-                                      (i8 imm:$src3)))]>,
+                                      (i8 imm:$src3)))], IIC_SHD16_REG_IM>,
                      TB, OpSize;
 def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
                      (outs GR32:$dst), 
                      (ins GR32:$src1, GR32:$src2, i8imm:$src3),
                      "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
-                                      (i8 imm:$src3)))]>,
+                                      (i8 imm:$src3)))], IIC_SHD32_REG_IM>,
                  TB;
 def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
                      (outs GR32:$dst), 
                      (ins GR32:$src1, GR32:$src2, i8imm:$src3),
                      "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
-                                      (i8 imm:$src3)))]>,
+                                      (i8 imm:$src3)))], IIC_SHD32_REG_IM>,
                  TB;
 def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
                       (outs GR64:$dst), 
                       (ins GR64:$src1, GR64:$src2, i8imm:$src3),
                       "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
-                                       (i8 imm:$src3)))]>,
+                                       (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
                  TB;
 def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
                       (outs GR64:$dst), 
                       (ins GR64:$src1, GR64:$src2, i8imm:$src3),
                       "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
-                                       (i8 imm:$src3)))]>,
+                                       (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
                  TB;
 }
 } // Constraints = "$src = $dst"
@@ -678,69 +767,110 @@ let Uses = [CL] in {
 def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
                    "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
-                     addr:$dst)]>, TB, OpSize;
+                     addr:$dst)], IIC_SHD16_MEM_CL>, TB, OpSize;
 def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                   [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
-                    addr:$dst)]>, TB, OpSize;
+                    addr:$dst)], IIC_SHD16_MEM_CL>, TB, OpSize;
 
 def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
                    "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                    [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
-                     addr:$dst)]>, TB;
+                     addr:$dst)], IIC_SHD32_MEM_CL>, TB;
 def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                   [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
-                    addr:$dst)]>, TB;
+                    addr:$dst)], IIC_SHD32_MEM_CL>, TB;
                     
 def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                     [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
-                      addr:$dst)]>, TB;
+                      addr:$dst)], IIC_SHD64_MEM_CL>, TB;
 def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
-                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, CL}",
                     [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
-                      addr:$dst)]>, TB;
+                      addr:$dst)], IIC_SHD64_MEM_CL>, TB;
 }
 
 def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
                     (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
                     "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
-                                      (i8 imm:$src3)), addr:$dst)]>,
+                                      (i8 imm:$src3)), addr:$dst)],
+                                      IIC_SHD16_MEM_IM>,
                     TB, OpSize;
 def SHRD16mri8 : Ii8<0xAC, MRMDestMem, 
                      (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
                      "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
-                                      (i8 imm:$src3)), addr:$dst)]>,
+                                      (i8 imm:$src3)), addr:$dst)],
+                                      IIC_SHD16_MEM_IM>,
                      TB, OpSize;
 
 def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
                     (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
                     "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
-                                      (i8 imm:$src3)), addr:$dst)]>,
+                                      (i8 imm:$src3)), addr:$dst)],
+                                      IIC_SHD32_MEM_IM>,
                     TB;
 def SHRD32mri8 : Ii8<0xAC, MRMDestMem, 
                      (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
                      "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
-                                       (i8 imm:$src3)), addr:$dst)]>,
+                                       (i8 imm:$src3)), addr:$dst)],
+                                       IIC_SHD32_MEM_IM>,
                      TB;
 
 def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
                       (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
                       "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
-                                       (i8 imm:$src3)), addr:$dst)]>,
+                                       (i8 imm:$src3)), addr:$dst)],
+                                       IIC_SHD64_MEM_IM>,
                  TB;
 def SHRD64mri8 : RIi8<0xAC, MRMDestMem, 
                       (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
                       "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
-                                       (i8 imm:$src3)), addr:$dst)]>,
+                                       (i8 imm:$src3)), addr:$dst)],
+                                       IIC_SHD64_MEM_IM>,
                  TB;
 
 } // Defs = [EFLAGS]
 
+multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in {
+  def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
+               !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+               []>, TAXD, VEX;
+  let mayLoad = 1 in
+  def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
+               (ins x86memop:$src1, i8imm:$src2),
+               !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+               []>, TAXD, VEX;
+}
+}
+
+multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
+let neverHasSideEffects = 1 in {
+  def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+             VEX_4VOp3;
+  let mayLoad = 1 in
+  def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
+             VEX_4VOp3;
+}
+}
+
+let Predicates = [HasBMI2] in {
+  defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>;
+  defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, VEX_W;
+  defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS;
+  defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8XS, VEX_W;
+  defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8XD;
+  defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W;
+  defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize;
+  defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W;
+}
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 05a5b36b95ed..bddba6cb0c4d 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -1,10 +1,10 @@
-//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===//
-// 
+//===-- X86InstrSystem.td - System Instructions ------------*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the X86 instructions that are generally used in
@@ -45,18 +45,17 @@ def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
 
 
 def SYSCALL  : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
-def SYSRETL  : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
-def SYSRETQ  :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+def SYSRET   : I<0x07, RawFrm, (outs), (ins), "sysret{l}", []>, TB;
+def SYSRET64 :RI<0x07, RawFrm, (outs), (ins), "sysret{q}", []>, TB,
                Requires<[In64BitMode]>;
 
 def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
                  
-def SYSEXIT   : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
-                Requires<[In32BitMode]>;
-def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+def SYSEXIT   : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", []>, TB;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", []>, TB,
                 Requires<[In64BitMode]>;
 
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
 def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
 def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
              Requires<[In64BitMode]>;
@@ -215,18 +214,18 @@ def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
 def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
 
 def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
-               "str{w}\t{$dst}", []>, TB, OpSize;
+               "str{w}\t$dst", []>, TB, OpSize;
 def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
-               "str{l}\t{$dst}", []>, TB;
+               "str{l}\t$dst", []>, TB;
 def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
-                "str{q}\t{$dst}", []>, TB;
+                "str{q}\t$dst", []>, TB;
 def STRm   : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
-               "str{w}\t{$dst}", []>, TB;
+               "str{w}\t$dst", []>, TB;
 
 def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
-             "ltr{w}\t{$src}", []>, TB;
+             "ltr{w}\t$src", []>, TB;
 def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
-             "ltr{w}\t{$src}", []>, TB;
+             "ltr{w}\t$src", []>, TB;
              
 def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
                  "push{w}\t{%cs|CS}", []>, Requires<[In32BitMode]>, OpSize;
@@ -447,21 +446,38 @@ let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
 
 //===----------------------------------------------------------------------===//
 // FS/GS Base Instructions
-let Predicates = [In64BitMode] in {
+let Predicates = [HasFSGSBase, In64BitMode] in {
   def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins),
-                   "rdfsbase{l}\t$dst", []>, TB, XS;
+                   "rdfsbase{l}\t$dst",
+                   [(set GR32:$dst, (int_x86_rdfsbase_32))]>, TB, XS;
   def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins),
-                     "rdfsbase{q}\t$dst", []>, TB, XS;
+                     "rdfsbase{q}\t$dst",
+                     [(set GR64:$dst, (int_x86_rdfsbase_64))]>, TB, XS;
   def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins),
-                   "rdgsbase{l}\t$dst", []>, TB, XS;
+                   "rdgsbase{l}\t$dst",
+                   [(set GR32:$dst, (int_x86_rdgsbase_32))]>, TB, XS;
   def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins),
-                     "rdgsbase{q}\t$dst", []>, TB, XS;
-  def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$dst),
-                   "wrfsbase{l}\t$dst", []>, TB, XS;
-  def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$dst),
-                   "wrfsbase{q}\t$dst", []>, TB, XS;
-  def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$dst),
-                   "wrgsbase{l}\t$dst", []>, TB, XS;
-  def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$dst),
-                    "wrgsbase{q}\t$dst", []>, TB, XS;
+                     "rdgsbase{q}\t$dst",
+                     [(set GR64:$dst, (int_x86_rdgsbase_64))]>, TB, XS;
+  def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src),
+                   "wrfsbase{l}\t$src",
+                   [(int_x86_wrfsbase_32 GR32:$src)]>, TB, XS;
+  def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src),
+                      "wrfsbase{q}\t$src",
+                      [(int_x86_wrfsbase_64 GR64:$src)]>, TB, XS;
+  def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src),
+                   "wrgsbase{l}\t$src",
+                   [(int_x86_wrgsbase_32 GR32:$src)]>, TB, XS;
+  def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src),
+                      "wrgsbase{q}\t$src",
+                      [(int_x86_wrgsbase_64 GR64:$src)]>, TB, XS;
 }
+
+//===----------------------------------------------------------------------===//
+// INVPCID Instruction
+def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
+                "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+                Requires<[In32BitMode]>;
+def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
+                "invpcid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+                Requires<[In64BitMode]>;
diff --git a/lib/Target/X86/X86InstrVMX.td b/lib/Target/X86/X86InstrVMX.td
index 09a7a7d0c4d0..6a8f0c848673 100644
--- a/lib/Target/X86/X86InstrVMX.td
+++ b/lib/Target/X86/X86InstrVMX.td
@@ -1,10 +1,10 @@
-//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===//
-// 
+//===-- X86InstrVMX.td - VMX Instruction Set Extension -----*- tablegen -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file describes the instructions that make up the Intel VMX instruction
@@ -17,18 +17,24 @@
 
 // 66 0F 38 80
 def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
-               "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+               "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+               Requires<[In32BitMode]>;
 def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
-               "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+               "invept {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+               Requires<[In64BitMode]>;
 // 66 0F 38 81
 def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
-                "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+                "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+                Requires<[In32BitMode]>;
 def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
-                "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8;
+                "invvpid {$src2, $src1|$src1, $src2}", []>, OpSize, T8,
+                Requires<[In64BitMode]>;
 // 0F 01 C1
 def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
 def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
   "vmclear\t$vmcs", []>, OpSize, TB;
+// OF 01 D4
+def VMFUNC : I<0x01, MRM_D4, (outs), (ins), "vmfunc", []>, TB;
 // 0F 01 C2
 def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
 // 0F 01 C3
@@ -38,23 +44,23 @@ def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
 def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
   "vmptrst\t$vmcs", []>, TB;
 def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
-  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
 def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
-  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
 def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
-  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
 def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
-  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
 def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
-  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
 def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
-  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In64BitMode]>;
 def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
-  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
 def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
-  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB, Requires<[In32BitMode]>;
 // 0F 01 C4
 def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
 def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
-  "vmxon\t{$vmxon}", []>, XS;
+  "vmxon\t$vmxon", []>, XS;
 
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
new file mode 100644
index 000000000000..65bbcb55ae12
--- /dev/null
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -0,0 +1,307 @@
+//===-- X86InstrXOP.td - XOP Instruction Set ---------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes XOP (eXtended OPerations)
+//
+//===----------------------------------------------------------------------===//
+
+multiclass xop2op<bits<8> opc, string OpcodeStr, Intrinsic Int, PatFrag memop> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int VR128:$src))]>, VEX;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR128:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPHSUBWD  : xop2op<0xE2, "vphsubwd", int_x86_xop_vphsubwd, memopv2i64>;
+  defm VPHSUBDQ  : xop2op<0xE3, "vphsubdq", int_x86_xop_vphsubdq, memopv2i64>;
+  defm VPHSUBBW  : xop2op<0xE1, "vphsubbw", int_x86_xop_vphsubbw, memopv2i64>;
+  defm VPHADDWQ  : xop2op<0xC7, "vphaddwq", int_x86_xop_vphaddwq, memopv2i64>;
+  defm VPHADDWD  : xop2op<0xC6, "vphaddwd", int_x86_xop_vphaddwd, memopv2i64>;
+  defm VPHADDUWQ : xop2op<0xD7, "vphadduwq", int_x86_xop_vphadduwq, memopv2i64>;
+  defm VPHADDUWD : xop2op<0xD6, "vphadduwd", int_x86_xop_vphadduwd, memopv2i64>;
+  defm VPHADDUDQ : xop2op<0xDB, "vphaddudq", int_x86_xop_vphaddudq, memopv2i64>;
+  defm VPHADDUBW : xop2op<0xD1, "vphaddubw", int_x86_xop_vphaddubw, memopv2i64>;
+  defm VPHADDUBQ : xop2op<0xD3, "vphaddubq", int_x86_xop_vphaddubq, memopv2i64>;
+  defm VPHADDUBD : xop2op<0xD2, "vphaddubd", int_x86_xop_vphaddubd, memopv2i64>;
+  defm VPHADDDQ  : xop2op<0xCB, "vphadddq", int_x86_xop_vphadddq, memopv2i64>;
+  defm VPHADDBW  : xop2op<0xC1, "vphaddbw", int_x86_xop_vphaddbw, memopv2i64>;
+  defm VPHADDBQ  : xop2op<0xC3, "vphaddbq", int_x86_xop_vphaddbq, memopv2i64>;
+  defm VPHADDBD  : xop2op<0xC2, "vphaddbd", int_x86_xop_vphaddbd, memopv2i64>;
+  defm VFRCZPS   : xop2op<0x80, "vfrczps", int_x86_xop_vfrcz_ps, memopv4f32>;
+  defm VFRCZPD   : xop2op<0x81, "vfrczpd", int_x86_xop_vfrcz_pd, memopv2f64>;
+}
+
+// Scalar load 2 addr operand instructions
+let Constraints = "$src1 = $dst" in {
+multiclass xop2opsld<bits<8> opc, string OpcodeStr, Intrinsic Int,
+                     Operand memop, ComplexPattern mem_cpat> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1,
+                                                        VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1,
+                                                        memop:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           [(set VR128:$dst, (Int VR128:$src1,
+                                  (bitconvert mem_cpat:$src2)))]>, VEX;
+}
+
+} // Constraints = "$src1 = $dst"
+
+let isAsmParserOnly = 1 in {
+  defm VFRCZSS   : xop2opsld<0x82, "vfrczss", int_x86_xop_vfrcz_ss,
+                   ssmem, sse_load_f32>;
+  defm VFRCZSD   : xop2opsld<0x83, "vfrczsd", int_x86_xop_vfrcz_sd,
+                   sdmem, sse_load_f64>;
+}
+
+
+multiclass xop2op256<bits<8> opc, string OpcodeStr, Intrinsic Int,
+                     PatFrag memop> {
+  def rrY : IXOP<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR256:$dst, (Int VR256:$src))]>, VEX, VEX_L;
+  def rmY : IXOP<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+           !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+           [(set VR256:$dst, (Int (bitconvert (memop addr:$src))))]>, VEX;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VFRCZPS : xop2op256<0x80, "vfrczps", int_x86_xop_vfrcz_ps_256,
+                           memopv8f32>;
+  defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256,
+                           memopv4f64>;
+}
+
+multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, VEX_4VOp3;
+  def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>,
+           VEX_4V, VEX_W;
+  def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins f128mem:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (Int (bitconvert (memopv2i64 addr:$src1)), VR128:$src2))]>,
+             VEX_4VOp3;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
+  defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
+  defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
+  defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
+  defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
+  defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
+  defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
+  defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
+  defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
+  defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
+  defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
+  defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
+}
+
+multiclass xop3opimm<bits<8> opc, string OpcodeStr> {
+  let neverHasSideEffects = 1 in {
+    def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+             (ins VR128:$src1, i8imm:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             []>, VEX;
+    let mayLoad = 1 in
+    def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+             (ins f128mem:$src1, i8imm:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             []>, VEX;
+  }
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPROTW : xop3opimm<0xC1, "vprotw">;
+  defm VPROTQ : xop3opimm<0xC3, "vprotq">;
+  defm VPROTD : xop3opimm<0xC2, "vprotd">;
+  defm VPROTB : xop3opimm<0xC0, "vprotb">;
+}
+
+// Instruction where second source can be memory, but third must be register
+multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+  def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+              (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_4V, VEX_I8IMM;
+  def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+              (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+              VR128:$src3))]>, VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPMADCSWD  : xop4opm2<0xB6, "vpmadcswd", int_x86_xop_vpmadcswd>;
+  defm VPMADCSSWD : xop4opm2<0xA6, "vpmadcsswd", int_x86_xop_vpmadcsswd>;
+  defm VPMACSWW   : xop4opm2<0x95, "vpmacsww", int_x86_xop_vpmacsww>;
+  defm VPMACSWD   : xop4opm2<0x96, "vpmacswd", int_x86_xop_vpmacswd>;
+  defm VPMACSSWW  : xop4opm2<0x85, "vpmacssww", int_x86_xop_vpmacssww>;
+  defm VPMACSSWD  : xop4opm2<0x86, "vpmacsswd", int_x86_xop_vpmacsswd>;
+  defm VPMACSSDQL : xop4opm2<0x87, "vpmacssdql", int_x86_xop_vpmacssdql>;
+  defm VPMACSSDQH : xop4opm2<0x8F, "vpmacssdqh", int_x86_xop_vpmacssdqh>;
+  defm VPMACSSDD  : xop4opm2<0x8E, "vpmacssdd", int_x86_xop_vpmacssdd>;
+  defm VPMACSDQL  : xop4opm2<0x97, "vpmacsdql", int_x86_xop_vpmacsdql>;
+  defm VPMACSDQH  : xop4opm2<0x9F, "vpmacsdqh", int_x86_xop_vpmacsdqh>;
+  defm VPMACSDD   : xop4opm2<0x9E, "vpmacsdd", int_x86_xop_vpmacsdd>;
+}
+
+// Instruction where second source can be memory, third must be imm8
+multiclass xop4opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                     ValueType VT> {
+  def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (VT (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>, VEX_4V;
+  def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, i8imm:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (VT (OpNode VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+                  imm:$src3)))]>, VEX_4V;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPCOMB  : xop4opimm<0xCC, "vpcomb", X86vpcom, v16i8>;
+  defm VPCOMW  : xop4opimm<0xCD, "vpcomw", X86vpcom, v8i16>;
+  defm VPCOMD  : xop4opimm<0xCE, "vpcomd", X86vpcom, v4i32>;
+  defm VPCOMQ  : xop4opimm<0xCF, "vpcomq", X86vpcom, v2i64>;
+  defm VPCOMUB : xop4opimm<0xEC, "vpcomub", X86vpcomu, v16i8>;
+  defm VPCOMUW : xop4opimm<0xED, "vpcomuw", X86vpcomu, v8i16>;
+  defm VPCOMUD : xop4opimm<0xEE, "vpcomud", X86vpcomu, v4i32>;
+  defm VPCOMUQ : xop4opimm<0xEF, "vpcomuq", X86vpcomu, v2i64>;
+}
+
+// Instruction where either second or third source can be memory
+multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+  def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, VR128:$src3))]>,
+           VEX_4V, VEX_I8IMM;
+  def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, f128mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (Int VR128:$src1, VR128:$src2,
+              (bitconvert (memopv2i64 addr:$src3))))]>,
+           VEX_4V, VEX_I8IMM, VEX_W, MemOp4;
+  def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2, VR128:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR128:$dst,
+             (Int VR128:$src1, (bitconvert (memopv2i64 addr:$src2)),
+              VR128:$src3))]>,
+           VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
+  defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
+}
+
+multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+  def rrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst, (Int VR256:$src1, VR256:$src2, VR256:$src3))]>,
+           VEX_4V, VEX_I8IMM;
+  def rmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2, f256mem:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst,
+             (Int VR256:$src1, VR256:$src2,
+              (bitconvert (memopv4i64 addr:$src3))))]>,
+           VEX_4V, VEX_I8IMM, VEX_W, MemOp4;
+  def mrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, f256mem:$src2, VR256:$src3),
+           !strconcat(OpcodeStr,
+           "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+           [(set VR256:$dst,
+             (Int VR256:$src1, (bitconvert (memopv4i64 addr:$src2)),
+              VR256:$src3))]>,
+           VEX_4V, VEX_I8IMM;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
+}
+
+multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
+                  Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
+  def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
+        (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set VR128:$dst,
+           (Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>;
+  def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+        (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set VR128:$dst,
+           (Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>,
+        VEX_W, MemOp4;
+  def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+        (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set VR128:$dst,
+           (Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>;
+  def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
+        (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set VR256:$dst,
+          (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>;
+  def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+        (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set VR256:$dst,
+          (Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
+        VEX_W, MemOp4;
+  def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+        (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
+        !strconcat(OpcodeStr,
+        "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
+        [(set VR256:$dst,
+           (Int256 VR256:$src1, (ld_256 addr:$src2), VR256:$src3, imm:$src4))]>;
+}
+
+defm VPERMIL2PD : xop5op<0x49, "vpermil2pd", int_x86_xop_vpermil2pd,
+                         int_x86_xop_vpermil2pd_256, memopv2f64, memopv4f64>;
+defm VPERMIL2PS : xop5op<0x48, "vpermil2ps", int_x86_xop_vpermil2ps,
+                         int_x86_xop_vpermil2ps_256, memopv4f32, memopv8f32>;
+
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 3f88fa69d0ee..0168d12231f7 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -300,7 +300,10 @@ extern "C" {
     SIZE(X86CompilationCallback_SSE)
   );
 # else
-  void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+  // the following function is called only from this translation unit,
+  // unless we are under 64bit Windows with MSC, where there is
+  // no support for inline assembly
+  static void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
 
   _declspec(naked) void X86CompilationCallback(void) {
     __asm {
@@ -424,7 +427,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
 
 TargetJITInfo::LazyResolverFn
 X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+  TsanIgnoreWritesBegin();
   JITCompilerFunction = F;
+  TsanIgnoreWritesEnd();
 
 #if defined (X86_32_JIT) && !defined (_MSC_VER)
   if (Subtarget->hasSSE1())
@@ -569,6 +574,5 @@ char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
   return TLSOffset;
 #else
   llvm_unreachable("Cannot allocate thread local storage on this arch!");
-  return 0;
 #endif
 }
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index 238420c236b1..c76d3ccf5d94 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -1,4 +1,4 @@
-//===- X86JITInfo.h - X86 implementation of the JIT interface  --*- C++ -*-===//
+//===-- X86JITInfo.h - X86 implementation of the JIT interface --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 50bc14d357f8..b578e8d9285d 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -12,10 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "InstPrinter/X86ATTInstPrinter.h"
 #include "X86MCInstLower.h"
 #include "X86AsmPrinter.h"
 #include "X86COFFMachineModuleInfo.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "llvm/Type.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -26,7 +27,6 @@
 #include "llvm/Target/Mangler.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Type.h"
 using namespace llvm;
 
 X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
@@ -154,6 +154,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
                                                            Ctx),
                                    Ctx);
     break;
+  case X86II::MO_SECREL:    RefKind = MCSymbolRefExpr::VK_SECREL; break;
   case X86II::MO_TLSGD:     RefKind = MCSymbolRefExpr::VK_TLSGD; break;
   case X86II::MO_GOTTPOFF:  RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
   case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
@@ -230,7 +231,8 @@ static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
 /// a short fixed-register form.
 static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
   unsigned ImmOp = Inst.getNumOperands() - 1;
-  assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() &&
+  assert(Inst.getOperand(0).isReg() &&
+         (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
          ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
            Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
           Inst.getNumOperands() == 2) && "Unexpected instruction!");
@@ -335,6 +337,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
       MCOp = LowerSymbolOperand(MO,
                      AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
       break;
+    case MachineOperand::MO_RegisterMask:
+      // Ignore call clobbers.
+      continue;
     }
     
     OutMI.addOperand(MCOp);
@@ -368,14 +373,12 @@ ReSimplify:
   case X86::SETB_C64r:    LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
   case X86::MOV8r0:       LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
   case X86::MOV32r0:      LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
-  case X86::FsFLD0SS:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
-  case X86::FsFLD0SD:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
-  case X86::VFsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
-  case X86::VFsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
   case X86::V_SETALLONES:  LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
   case X86::AVX_SET0PSY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
   case X86::AVX_SET0PDY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
   case X86::AVX_SETALLONES:  LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDrr); break;
+  case X86::AVX2_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::VPCMPEQDYrr);break;
+  case X86::AVX2_SET0:     LowerUnaryToTwoAddr(OutMI, X86::VPXORYrr); break;
 
   case X86::MOV16r0:
     LowerSubReg32_Op0(OutMI, X86::MOV32r0);   // MOV16r0 -> MOV32r0
@@ -386,14 +389,12 @@ ReSimplify:
     LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
     break;
 
-  // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
-  // register inputs modeled as normal uses instead of implicit uses.  As such,
-  // truncate off all but the first operand (the callee).  FIXME: Change isel.
+  // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have register
+  // inputs modeled as normal uses instead of implicit uses.  As such, truncate
+  // off all but the first operand (the callee).  FIXME: Change isel.
   case X86::TAILJMPr64:
   case X86::CALL64r:
-  case X86::CALL64pcrel32:
-  case X86::WINCALL64r:
-  case X86::WINCALL64pcrel32: {
+  case X86::CALL64pcrel32: {
     unsigned Opcode = OutMI.getOpcode();
     MCOperand Saved = OutMI.getOperand(0);
     OutMI = MCInst();
@@ -415,7 +416,7 @@ ReSimplify:
   case X86::TAILJMPd64: {
     unsigned Opcode;
     switch (OutMI.getOpcode()) {
-    default: assert(0 && "Invalid opcode");
+    default: llvm_unreachable("Invalid opcode");
     case X86::TAILJMPr: Opcode = X86::JMP32r; break;
     case X86::TAILJMPd:
     case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
@@ -527,6 +528,22 @@ ReSimplify:
   case X86::XOR16ri:    SimplifyShortImmForm(OutMI, X86::XOR16i16);  break;
   case X86::XOR32ri:    SimplifyShortImmForm(OutMI, X86::XOR32i32);  break;
   case X86::XOR64ri32:  SimplifyShortImmForm(OutMI, X86::XOR64i32);  break;
+
+  case X86::MORESTACK_RET:
+    OutMI.setOpcode(X86::RET);
+    break;
+
+  case X86::MORESTACK_RET_RESTORE_R10: {
+    MCInst retInst;
+
+    OutMI.setOpcode(X86::MOV64rr);
+    OutMI.addOperand(MCOperand::CreateReg(X86::R10));
+    OutMI.addOperand(MCOperand::CreateReg(X86::RAX));
+
+    retInst.setOpcode(X86::RET);
+    AsmPrinter.OutStreamer.EmitInstruction(retInst);
+    break;
+  }
   }
 }
 
diff --git a/lib/Target/X86/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 021007239128..40df3db7d480 100644
--- a/lib/Target/X86/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -1,4 +1,4 @@
-//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//===-- X86MCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp
new file mode 100644
index 000000000000..568dc222d9d1
--- /dev/null
+++ b/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- X86MachineFuctionInfo.cpp - X86 machine function info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MachineFunctionInfo.h"
+
+using namespace llvm;
+
+void X86MachineFunctionInfo::anchor() { }
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index b0bb313ec639..c7471091ec47 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -1,10 +1,10 @@
-//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
-// 
+//===-- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file declares X86-specific per-machine-function information.
@@ -21,6 +21,8 @@ namespace llvm {
 /// X86MachineFunctionInfo - This class is derived from MachineFunction and
 /// contains private X86 target-specific information for each MachineFunction.
 class X86MachineFunctionInfo : public MachineFunctionInfo {
+  virtual void anchor();
+
   /// ForceFramePointer - True if the function is required to use of frame
   /// pointer for reasons other than it containing dynamic allocation or 
   /// that FP eliminatation is turned off. For example, Cygwin main function
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c1ac9f343116..b56025fbb09c 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
+//===-- X86RegisterInfo.cpp - X86 Register Information --------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86.h"
 #include "X86RegisterInfo.h"
+#include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
@@ -127,121 +127,13 @@ const TargetRegisterClass *
 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
                                           const TargetRegisterClass *B,
                                           unsigned SubIdx) const {
-  switch (SubIdx) {
-  default: return 0;
-  case X86::sub_8bit:
-    if (B == &X86::GR8RegClass) {
-      if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
-        return A;
-    } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
-      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
-          A == &X86::GR64_NOREXRegClass ||
-          A == &X86::GR64_NOSPRegClass ||
-          A == &X86::GR64_NOREX_NOSPRegClass)
-        return &X86::GR64_ABCDRegClass;
-      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
-               A == &X86::GR32_NOREXRegClass ||
-               A == &X86::GR32_NOSPRegClass)
-        return &X86::GR32_ABCDRegClass;
-      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
-               A == &X86::GR16_NOREXRegClass)
-        return &X86::GR16_ABCDRegClass;
-    } else if (B == &X86::GR8_NOREXRegClass) {
-      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
-          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
-        return &X86::GR64_NOREXRegClass;
-      else if (A == &X86::GR64_ABCDRegClass)
-        return &X86::GR64_ABCDRegClass;
-      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
-               A == &X86::GR32_NOSPRegClass)
-        return &X86::GR32_NOREXRegClass;
-      else if (A == &X86::GR32_ABCDRegClass)
-        return &X86::GR32_ABCDRegClass;
-      else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
-        return &X86::GR16_NOREXRegClass;
-      else if (A == &X86::GR16_ABCDRegClass)
-        return &X86::GR16_ABCDRegClass;
-    }
-    break;
-  case X86::sub_8bit_hi:
-    if (B->hasSubClassEq(&X86::GR8_ABCD_HRegClass))
-      switch (A->getSize()) {
-        case 2: return getCommonSubClass(A, &X86::GR16_ABCDRegClass);
-        case 4: return getCommonSubClass(A, &X86::GR32_ABCDRegClass);
-        case 8: return getCommonSubClass(A, &X86::GR64_ABCDRegClass);
-        default: return 0;
-      }
-    break;
-  case X86::sub_16bit:
-    if (B == &X86::GR16RegClass) {
-      if (A->getSize() == 4 || A->getSize() == 8)
-        return A;
-    } else if (B == &X86::GR16_ABCDRegClass) {
-      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
-          A == &X86::GR64_NOREXRegClass ||
-          A == &X86::GR64_NOSPRegClass ||
-          A == &X86::GR64_NOREX_NOSPRegClass)
-        return &X86::GR64_ABCDRegClass;
-      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
-               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
-        return &X86::GR32_ABCDRegClass;
-    } else if (B == &X86::GR16_NOREXRegClass) {
-      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
-          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
-        return &X86::GR64_NOREXRegClass;
-      else if (A == &X86::GR64_ABCDRegClass)
-        return &X86::GR64_ABCDRegClass;
-      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
-               A == &X86::GR32_NOSPRegClass)
-        return &X86::GR32_NOREXRegClass;
-      else if (A == &X86::GR32_ABCDRegClass)
-        return &X86::GR64_ABCDRegClass;
-    }
-    break;
-  case X86::sub_32bit:
-    if (B == &X86::GR32RegClass) {
-      if (A->getSize() == 8)
-        return A;
-    } else if (B == &X86::GR32_NOSPRegClass) {
-      if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
-        return &X86::GR64_NOSPRegClass;
-      if (A->getSize() == 8)
-        return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
-    } else if (B == &X86::GR32_ABCDRegClass) {
-      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
-          A == &X86::GR64_NOREXRegClass ||
-          A == &X86::GR64_NOSPRegClass ||
-          A == &X86::GR64_NOREX_NOSPRegClass)
-        return &X86::GR64_ABCDRegClass;
-    } else if (B == &X86::GR32_NOREXRegClass) {
-      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass)
-        return &X86::GR64_NOREXRegClass;
-      else if (A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
-        return &X86::GR64_NOREX_NOSPRegClass;
-      else if (A == &X86::GR64_ABCDRegClass)
-        return &X86::GR64_ABCDRegClass;
-    } else if (B == &X86::GR32_NOREX_NOSPRegClass) {
-      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
-          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
-        return &X86::GR64_NOREX_NOSPRegClass;
-      else if (A == &X86::GR64_ABCDRegClass)
-        return &X86::GR64_ABCDRegClass;
-    }
-    break;
-  case X86::sub_ss:
-    if (B == &X86::FR32RegClass)
-      return A;
-    break;
-  case X86::sub_sd:
-    if (B == &X86::FR64RegClass)
-      return A;
-    break;
-  case X86::sub_xmm:
-    if (B == &X86::VR128RegClass)
-      return A;
-    break;
+  // The sub_8bit sub-register index is more constrained in 32-bit mode.
+  if (!Is64Bit && SubIdx == X86::sub_8bit) {
+    A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
+    if (!A)
+      return 0;
   }
-  return 0;
+  return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
 }
 
 const TargetRegisterClass*
@@ -334,7 +226,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   }
 }
 
-const unsigned *
+const uint16_t *
 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   bool callsEHReturn = false;
   bool ghcCall = false;
@@ -345,45 +237,29 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
   }
 
-  static const unsigned GhcCalleeSavedRegs[] = {
-    0
-  };
-
-  static const unsigned CalleeSavedRegs32Bit[] = {
-    X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
-  };
-
-  static const unsigned CalleeSavedRegs32EHRet[] = {
-    X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
-  };
-
-  static const unsigned CalleeSavedRegs64Bit[] = {
-    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
-  };
-
-  static const unsigned CalleeSavedRegs64EHRet[] = {
-    X86::RAX, X86::RDX, X86::RBX, X86::R12,
-    X86::R13, X86::R14, X86::R15, X86::RBP, 0
-  };
-
-  static const unsigned CalleeSavedRegsWin64[] = {
-    X86::RBX,   X86::RBP,   X86::RDI,   X86::RSI,
-    X86::R12,   X86::R13,   X86::R14,   X86::R15,
-    X86::XMM6,  X86::XMM7,  X86::XMM8,  X86::XMM9,
-    X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
-    X86::XMM14, X86::XMM15, 0
-  };
-
-  if (ghcCall) {
-    return GhcCalleeSavedRegs;
-  } else if (Is64Bit) {
+  if (ghcCall)
+    return CSR_Ghc_SaveList;
+  if (Is64Bit) {
     if (IsWin64)
-      return CalleeSavedRegsWin64;
-    else
-      return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
-  } else {
-    return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
+      return CSR_Win64_SaveList;
+    if (callsEHReturn)
+      return CSR_64EHRet_SaveList;
+    return CSR_64_SaveList;
   }
+  if (callsEHReturn)
+    return CSR_32EHRet_SaveList;
+  return CSR_32_SaveList;
+}
+
+const uint32_t*
+X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+  if (CC == CallingConv::GHC)
+    return CSR_Ghc_RegMask;
+  if (!Is64Bit)
+    return CSR_32_RegMask;
+  if (IsWin64)
+    return CSR_Win64_RegMask;
+  return CSR_64_RegMask;
 }
 
 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -428,16 +304,16 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
     for (unsigned n = 0; n != 8; ++n) {
       // R8, R9, ...
-      const unsigned GPR64[] = {
+      static const uint16_t GPR64[] = {
         X86::R8,  X86::R9,  X86::R10, X86::R11,
         X86::R12, X86::R13, X86::R14, X86::R15
       };
-      for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
+      for (const uint16_t *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
         Reserved.set(Reg);
 
       // XMM8, XMM9, ...
       assert(X86::XMM15 == X86::XMM8+7);
-      for (const unsigned *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI;
+      for (const uint16_t *AI = getOverlaps(X86::XMM8 + n); unsigned Reg = *AI;
            ++AI)
         Reserved.set(Reg);
     }
@@ -452,7 +328,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return (RealignStack &&
+  return (MF.getTarget().Options.RealignStack &&
           !MFI->hasVarSizedObjects());
 }
 
@@ -583,7 +459,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // sure we restore the stack pointer immediately after the call, there may
     // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
     MachineBasicBlock::iterator B = MBB.begin();
-    while (I != B && !llvm::prior(I)->getDesc().isCall())
+    while (I != B && !llvm::prior(I)->isCall())
       --I;
     MBB.insert(I, New);
   }
@@ -650,12 +526,10 @@ unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 
 unsigned X86RegisterInfo::getEHExceptionRegister() const {
   llvm_unreachable("What is the exception register");
-  return 0;
 }
 
 unsigned X86RegisterInfo::getEHHandlerRegister() const {
   llvm_unreachable("What is the exception handler register");
-  return 0;
 }
 
 namespace llvm {
@@ -665,7 +539,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
   case MVT::i8:
     if (High) {
       switch (Reg) {
-      default: return 0;
+      default: return getX86SubSuperRegister(Reg, MVT::i64, High);
       case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
         return X86::AH;
       case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -785,6 +659,22 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
       return X86::R15D;
     }
   case MVT::i64:
+    // For 64-bit mode if we've requested a "high" register and the
+    // Q or r constraints we want one of these high registers or
+    // just the register name otherwise.
+    if (High) {
+      switch (Reg) {
+      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+        return X86::SI;
+      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+        return X86::DI;
+      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+        return X86::BP;
+      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+        return X86::SP;
+      // Fallthrough.
+      }
+    }
     switch (Reg) {
     default: return Reg;
     case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
@@ -821,8 +711,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
       return X86::R15;
     }
   }
-
-  return Reg;
 }
 }
 
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 7d39c6853597..bee03936f1f7 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===//
+//===-- X86RegisterInfo.h - X86 Register Information Impl -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -95,7 +95,8 @@ public:
 
   /// getCalleeSavedRegs - Return a null-terminated list of all of the
   /// callee-save registers on this target.
-  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+  const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+  const uint32_t *getCallPreservedMask(CallingConv::ID) const;
 
   /// getReservedRegs - Returns a bitset indexed by physical register number
   /// indicating if a register is a special register that has particular uses and
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 9a7db36e0871..5263a4934cbd 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -70,7 +70,7 @@ let Namespace = "X86" in {
   def BH : Register<"bh">;
 
   // 16-bit registers
-  let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
+  let SubRegIndices = [sub_8bit, sub_8bit_hi], CoveredBySubRegs = 1 in {
   def AX : RegisterWithSubRegs<"ax", [AL,AH]>;
   def DX : RegisterWithSubRegs<"dx", [DL,DH]>;
   def CX : RegisterWithSubRegs<"cx", [CL,CH]>;
diff --git a/lib/Target/X86/X86Relocations.h b/lib/Target/X86/X86Relocations.h
index 990962dc4173..857becff66d7 100644
--- a/lib/Target/X86/X86Relocations.h
+++ b/lib/Target/X86/X86Relocations.h
@@ -1,4 +1,4 @@
-//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===//
+//===-- X86Relocations.h - X86 Code Relocations -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
new file mode 100644
index 000000000000..17f4efd8bcb0
--- /dev/null
+++ b/lib/Target/X86/X86Schedule.td
@@ -0,0 +1,273 @@
+//===-- X86Schedule.td - X86 Scheduling Definitions --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for X86 
+def IIC_DEFAULT     : InstrItinClass;
+def IIC_ALU_MEM     : InstrItinClass;
+def IIC_ALU_NONMEM  : InstrItinClass;
+def IIC_LEA         : InstrItinClass;
+def IIC_LEA_16      : InstrItinClass;
+def IIC_MUL8        : InstrItinClass;
+def IIC_MUL16_MEM   : InstrItinClass;
+def IIC_MUL16_REG   : InstrItinClass;
+def IIC_MUL32_MEM   : InstrItinClass;
+def IIC_MUL32_REG   : InstrItinClass;
+def IIC_MUL64       : InstrItinClass;
+// imul by al, ax, eax, tax
+def IIC_IMUL8       : InstrItinClass;
+def IIC_IMUL16_MEM  : InstrItinClass;
+def IIC_IMUL16_REG  : InstrItinClass;
+def IIC_IMUL32_MEM  : InstrItinClass;
+def IIC_IMUL32_REG  : InstrItinClass;
+def IIC_IMUL64      : InstrItinClass;
+// imul reg by reg|mem
+def IIC_IMUL16_RM   : InstrItinClass;
+def IIC_IMUL16_RR   : InstrItinClass;
+def IIC_IMUL32_RM   : InstrItinClass;
+def IIC_IMUL32_RR   : InstrItinClass;
+def IIC_IMUL64_RM   : InstrItinClass;
+def IIC_IMUL64_RR   : InstrItinClass;
+// imul reg = reg/mem * imm
+def IIC_IMUL16_RMI  : InstrItinClass;
+def IIC_IMUL16_RRI  : InstrItinClass;
+def IIC_IMUL32_RMI  : InstrItinClass;
+def IIC_IMUL32_RRI  : InstrItinClass;
+def IIC_IMUL64_RMI  : InstrItinClass;
+def IIC_IMUL64_RRI  : InstrItinClass;
+// div
+def IIC_DIV8_MEM    : InstrItinClass;
+def IIC_DIV8_REG    : InstrItinClass;
+def IIC_DIV16       : InstrItinClass;
+def IIC_DIV32       : InstrItinClass;
+def IIC_DIV64       : InstrItinClass;
+// idiv
+def IIC_IDIV8       : InstrItinClass;
+def IIC_IDIV16      : InstrItinClass;
+def IIC_IDIV32      : InstrItinClass;
+def IIC_IDIV64      : InstrItinClass;
+// neg/not/inc/dec
+def IIC_UNARY_REG   : InstrItinClass;
+def IIC_UNARY_MEM   : InstrItinClass;
+// add/sub/and/or/xor/adc/sbc/cmp/test
+def IIC_BIN_MEM     : InstrItinClass;
+def IIC_BIN_NONMEM  : InstrItinClass;
+// shift/rotate
+def IIC_SR          : InstrItinClass;
+// shift double
+def IIC_SHD16_REG_IM : InstrItinClass;
+def IIC_SHD16_REG_CL : InstrItinClass;
+def IIC_SHD16_MEM_IM : InstrItinClass;
+def IIC_SHD16_MEM_CL : InstrItinClass;
+def IIC_SHD32_REG_IM : InstrItinClass;
+def IIC_SHD32_REG_CL : InstrItinClass;
+def IIC_SHD32_MEM_IM : InstrItinClass;
+def IIC_SHD32_MEM_CL : InstrItinClass;
+def IIC_SHD64_REG_IM : InstrItinClass;
+def IIC_SHD64_REG_CL : InstrItinClass;
+def IIC_SHD64_MEM_IM : InstrItinClass;
+def IIC_SHD64_MEM_CL : InstrItinClass;
+// cmov
+def IIC_CMOV16_RM : InstrItinClass;
+def IIC_CMOV16_RR : InstrItinClass;
+def IIC_CMOV32_RM : InstrItinClass;
+def IIC_CMOV32_RR : InstrItinClass;
+def IIC_CMOV64_RM : InstrItinClass;
+def IIC_CMOV64_RR : InstrItinClass;
+// set
+def IIC_SET_R : InstrItinClass;
+def IIC_SET_M : InstrItinClass;
+// jmp/jcc/jcxz
+def IIC_Jcc : InstrItinClass;
+def IIC_JCXZ : InstrItinClass;
+def IIC_JMP_REL : InstrItinClass;
+def IIC_JMP_REG : InstrItinClass;
+def IIC_JMP_MEM : InstrItinClass;
+def IIC_JMP_FAR_MEM : InstrItinClass;
+def IIC_JMP_FAR_PTR : InstrItinClass;
+// loop
+def IIC_LOOP : InstrItinClass;
+def IIC_LOOPE : InstrItinClass;
+def IIC_LOOPNE : InstrItinClass;
+// call
+def IIC_CALL_RI : InstrItinClass;
+def IIC_CALL_MEM : InstrItinClass;
+def IIC_CALL_FAR_MEM : InstrItinClass;
+def IIC_CALL_FAR_PTR : InstrItinClass;
+// ret
+def IIC_RET : InstrItinClass;
+def IIC_RET_IMM : InstrItinClass;
+//sign extension movs
+def IIC_MOVSX : InstrItinClass;
+def IIC_MOVSX_R16_R8 : InstrItinClass;
+def IIC_MOVSX_R16_M8 : InstrItinClass;
+def IIC_MOVSX_R16_R16 : InstrItinClass;
+def IIC_MOVSX_R32_R32 : InstrItinClass;
+//zero extension movs
+def IIC_MOVZX : InstrItinClass;
+def IIC_MOVZX_R16_R8 : InstrItinClass;
+def IIC_MOVZX_R16_M8 : InstrItinClass;
+
+def IIC_REP_MOVS : InstrItinClass;
+def IIC_REP_STOS : InstrItinClass;
+
+// SSE scalar/parallel binary operations
+def IIC_SSE_ALU_F32S_RR : InstrItinClass;
+def IIC_SSE_ALU_F32S_RM : InstrItinClass;
+def IIC_SSE_ALU_F64S_RR : InstrItinClass;
+def IIC_SSE_ALU_F64S_RM : InstrItinClass;
+def IIC_SSE_MUL_F32S_RR : InstrItinClass;
+def IIC_SSE_MUL_F32S_RM : InstrItinClass;
+def IIC_SSE_MUL_F64S_RR : InstrItinClass;
+def IIC_SSE_MUL_F64S_RM : InstrItinClass;
+def IIC_SSE_DIV_F32S_RR : InstrItinClass;
+def IIC_SSE_DIV_F32S_RM : InstrItinClass;
+def IIC_SSE_DIV_F64S_RR : InstrItinClass;
+def IIC_SSE_DIV_F64S_RM : InstrItinClass;
+def IIC_SSE_ALU_F32P_RR : InstrItinClass;
+def IIC_SSE_ALU_F32P_RM : InstrItinClass;
+def IIC_SSE_ALU_F64P_RR : InstrItinClass;
+def IIC_SSE_ALU_F64P_RM : InstrItinClass;
+def IIC_SSE_MUL_F32P_RR : InstrItinClass;
+def IIC_SSE_MUL_F32P_RM : InstrItinClass;
+def IIC_SSE_MUL_F64P_RR : InstrItinClass;
+def IIC_SSE_MUL_F64P_RM : InstrItinClass;
+def IIC_SSE_DIV_F32P_RR : InstrItinClass;
+def IIC_SSE_DIV_F32P_RM : InstrItinClass;
+def IIC_SSE_DIV_F64P_RR : InstrItinClass;
+def IIC_SSE_DIV_F64P_RM : InstrItinClass;
+
+def IIC_SSE_COMIS_RR : InstrItinClass;
+def IIC_SSE_COMIS_RM : InstrItinClass;
+
+def IIC_SSE_HADDSUB_RR : InstrItinClass;
+def IIC_SSE_HADDSUB_RM : InstrItinClass;
+
+def IIC_SSE_BIT_P_RR  : InstrItinClass;
+def IIC_SSE_BIT_P_RM  : InstrItinClass;
+
+def IIC_SSE_INTALU_P_RR  : InstrItinClass;
+def IIC_SSE_INTALU_P_RM  : InstrItinClass;
+def IIC_SSE_INTALUQ_P_RR  : InstrItinClass;
+def IIC_SSE_INTALUQ_P_RM  : InstrItinClass;
+
+def IIC_SSE_INTMUL_P_RR : InstrItinClass;
+def IIC_SSE_INTMUL_P_RM : InstrItinClass;
+
+def IIC_SSE_INTSH_P_RR : InstrItinClass;
+def IIC_SSE_INTSH_P_RM : InstrItinClass;
+def IIC_SSE_INTSH_P_RI : InstrItinClass;
+
+def IIC_SSE_CMPP_RR : InstrItinClass;
+def IIC_SSE_CMPP_RM : InstrItinClass;
+
+def IIC_SSE_SHUFP : InstrItinClass;
+def IIC_SSE_PSHUF : InstrItinClass;
+
+def IIC_SSE_UNPCK : InstrItinClass;
+
+def IIC_SSE_MOVMSK : InstrItinClass;
+def IIC_SSE_MASKMOV : InstrItinClass;
+
+def IIC_SSE_PEXTRW : InstrItinClass;
+def IIC_SSE_PINSRW : InstrItinClass;
+
+def IIC_SSE_PABS_RR : InstrItinClass;
+def IIC_SSE_PABS_RM : InstrItinClass;
+
+def IIC_SSE_SQRTP_RR : InstrItinClass;
+def IIC_SSE_SQRTP_RM : InstrItinClass;
+def IIC_SSE_SQRTS_RR : InstrItinClass;
+def IIC_SSE_SQRTS_RM : InstrItinClass;
+
+def IIC_SSE_RCPP_RR : InstrItinClass;
+def IIC_SSE_RCPP_RM : InstrItinClass;
+def IIC_SSE_RCPS_RR : InstrItinClass;
+def IIC_SSE_RCPS_RM : InstrItinClass;
+
+def IIC_SSE_MOV_S_RR : InstrItinClass;
+def IIC_SSE_MOV_S_RM : InstrItinClass;
+def IIC_SSE_MOV_S_MR : InstrItinClass;
+
+def IIC_SSE_MOVA_P_RR : InstrItinClass;
+def IIC_SSE_MOVA_P_RM : InstrItinClass;
+def IIC_SSE_MOVA_P_MR : InstrItinClass;
+
+def IIC_SSE_MOVU_P_RR : InstrItinClass;
+def IIC_SSE_MOVU_P_RM : InstrItinClass;
+def IIC_SSE_MOVU_P_MR : InstrItinClass;
+
+def IIC_SSE_MOVDQ : InstrItinClass;
+def IIC_SSE_MOVD_ToGP : InstrItinClass;
+def IIC_SSE_MOVQ_RR : InstrItinClass;
+
+def IIC_SSE_MOV_LH : InstrItinClass;
+
+def IIC_SSE_LDDQU : InstrItinClass;
+
+def IIC_SSE_MOVNT : InstrItinClass;
+
+def IIC_SSE_PHADDSUBD_RR : InstrItinClass;
+def IIC_SSE_PHADDSUBD_RM : InstrItinClass;
+def IIC_SSE_PHADDSUBSW_RR : InstrItinClass;
+def IIC_SSE_PHADDSUBSW_RM : InstrItinClass;
+def IIC_SSE_PHADDSUBW_RR : InstrItinClass;
+def IIC_SSE_PHADDSUBW_RM : InstrItinClass;
+def IIC_SSE_PSHUFB_RR : InstrItinClass;
+def IIC_SSE_PSHUFB_RM : InstrItinClass;
+def IIC_SSE_PSIGN_RR : InstrItinClass;
+def IIC_SSE_PSIGN_RM : InstrItinClass;
+
+def IIC_SSE_PMADD : InstrItinClass;
+def IIC_SSE_PMULHRSW : InstrItinClass;
+def IIC_SSE_PALIGNR : InstrItinClass;
+def IIC_SSE_MWAIT : InstrItinClass;
+def IIC_SSE_MONITOR : InstrItinClass;
+
+def IIC_SSE_PREFETCH : InstrItinClass;
+def IIC_SSE_PAUSE : InstrItinClass;
+def IIC_SSE_LFENCE : InstrItinClass;
+def IIC_SSE_MFENCE : InstrItinClass;
+def IIC_SSE_SFENCE : InstrItinClass;
+def IIC_SSE_LDMXCSR : InstrItinClass;
+def IIC_SSE_STMXCSR : InstrItinClass;
+
+def IIC_SSE_CVT_PD_RR : InstrItinClass;
+def IIC_SSE_CVT_PD_RM : InstrItinClass;
+def IIC_SSE_CVT_PS_RR : InstrItinClass;
+def IIC_SSE_CVT_PS_RM : InstrItinClass;
+def IIC_SSE_CVT_PI2PS_RR : InstrItinClass;
+def IIC_SSE_CVT_PI2PS_RM : InstrItinClass;
+def IIC_SSE_CVT_Scalar_RR : InstrItinClass;
+def IIC_SSE_CVT_Scalar_RM : InstrItinClass;
+def IIC_SSE_CVT_SS2SI32_RM : InstrItinClass;
+def IIC_SSE_CVT_SS2SI32_RR : InstrItinClass;
+def IIC_SSE_CVT_SS2SI64_RM : InstrItinClass;
+def IIC_SSE_CVT_SS2SI64_RR : InstrItinClass;
+def IIC_SSE_CVT_SD2SI_RM : InstrItinClass;
+def IIC_SSE_CVT_SD2SI_RR : InstrItinClass;
+
+def IIC_CMPX_LOCK : InstrItinClass;
+def IIC_CMPX_LOCK_8 : InstrItinClass;
+def IIC_CMPX_LOCK_8B : InstrItinClass;
+def IIC_CMPX_LOCK_16B : InstrItinClass;
+
+def IIC_XADD_LOCK_MEM : InstrItinClass;
+def IIC_XADD_LOCK_MEM8 : InstrItinClass;
+
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
+
+include "X86ScheduleAtom.td"
+
+
+
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
new file mode 100644
index 000000000000..77d4e56d7c03
--- /dev/null
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -0,0 +1,305 @@
+//===- X86ScheduleAtom.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Intel Atom (Bonnell)
+// processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from the "Intel 64 and IA32 Architectures
+// Optimization Reference Manual", Chapter 13, Section 4.
+// Functional Units
+//    Port 0
+def Port0 : FuncUnit; // ALU: ALU0, shift/rotate, load/store
+                      // SIMD/FP: SIMD ALU, Shuffle,SIMD/FP multiply, divide
+def Port1 : FuncUnit; // ALU: ALU1, bit processing, jump, and LEA
+                      // SIMD/FP: SIMD ALU, FP Adder
+
+def AtomItineraries : ProcessorItineraries<
+  [ Port0, Port1 ],
+  [], [
+  // P0 only
+  // InstrItinData<class, [InstrStage<N, [P0]>] >,
+  // P0 or P1
+  // InstrItinData<class, [InstrStage<N, [P0, P1]>] >,
+  // P0 and P1
+  // InstrItinData<class, [InstrStage<N, [P0], 0>,  InstrStage<N, [P1]>] >,
+  //
+  // Default is 1 cycle, port0 or port1
+  InstrItinData<IIC_DEFAULT, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_ALU_MEM, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_LEA, [InstrStage<1, [Port1]>] >,
+  InstrItinData<IIC_LEA_16, [InstrStage<2, [Port0, Port1]>] >,
+  // mul
+  InstrItinData<IIC_MUL8, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_MUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
+  InstrItinData<IIC_MUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_MUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_MUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_MUL64, [InstrStage<12, [Port0, Port1]>] >,
+  // imul by al, ax, eax, rax
+  InstrItinData<IIC_IMUL8, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL16_MEM, [InstrStage<8, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL16_REG, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL32_MEM, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL64, [InstrStage<12, [Port0, Port1]>] >,
+  // imul reg by reg|mem
+  InstrItinData<IIC_IMUL16_RM, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL16_RR, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL32_RM, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_IMUL32_RR, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_IMUL64_RM, [InstrStage<12, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL64_RR, [InstrStage<12, [Port0, Port1]>] >,
+  // imul reg = reg/mem * imm
+  InstrItinData<IIC_IMUL16_RRI, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL32_RRI, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_IMUL64_RRI, [InstrStage<14, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL16_RMI, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_IMUL32_RMI, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_IMUL64_RMI, [InstrStage<14, [Port0, Port1]>] >,
+  // idiv
+  InstrItinData<IIC_IDIV8, [InstrStage<62, [Port0, Port1]>] >,
+  InstrItinData<IIC_IDIV16, [InstrStage<62, [Port0, Port1]>] >,
+  InstrItinData<IIC_IDIV32, [InstrStage<62, [Port0, Port1]>] >,
+  InstrItinData<IIC_IDIV64, [InstrStage<130, [Port0, Port1]>] >,
+  // div
+  InstrItinData<IIC_DIV8_REG, [InstrStage<50, [Port0, Port1]>] >,
+  InstrItinData<IIC_DIV8_MEM, [InstrStage<68, [Port0, Port1]>] >,
+  InstrItinData<IIC_DIV16, [InstrStage<50, [Port0, Port1]>] >,
+  InstrItinData<IIC_DIV32, [InstrStage<50, [Port0, Port1]>] >,
+  InstrItinData<IIC_DIV64, [InstrStage<130, [Port0, Port1]>] >,
+  // neg/not/inc/dec
+  InstrItinData<IIC_UNARY_REG, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [Port0]>] >,
+  // add/sub/and/or/xor/adc/sbc/cmp/test
+  InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_BIN_MEM, [InstrStage<1, [Port0]>] >,
+  // shift/rotate
+  InstrItinData<IIC_SR, [InstrStage<1, [Port0]>] >,
+  // shift double
+  InstrItinData<IIC_SHD16_REG_IM, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD16_REG_CL, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD32_REG_CL, [InstrStage<2, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<4, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD64_REG_IM, [InstrStage<9, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD64_REG_CL, [InstrStage<8, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<9, [Port0, Port1]>] >,
+  InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<9, [Port0, Port1]>] >,
+  // cmov
+  InstrItinData<IIC_CMOV16_RM, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_CMOV16_RR, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_CMOV32_RM, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_CMOV32_RR, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_CMOV64_RM, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_CMOV64_RR, [InstrStage<1, [Port0, Port1]>] >,
+  // set
+  InstrItinData<IIC_SET_M, [InstrStage<2, [Port0, Port1]>] >, 
+  InstrItinData<IIC_SET_R, [InstrStage<1, [Port0, Port1]>] >,
+  // jcc
+  InstrItinData<IIC_Jcc, [InstrStage<1, [Port1]>] >,
+  // jcxz/jecxz/jrcxz
+  InstrItinData<IIC_JCXZ, [InstrStage<4, [Port0, Port1]>] >,
+  // jmp rel
+  InstrItinData<IIC_JMP_REL, [InstrStage<1, [Port1]>] >,
+  // jmp indirect
+  InstrItinData<IIC_JMP_REG, [InstrStage<1, [Port1]>] >,
+  InstrItinData<IIC_JMP_MEM, [InstrStage<2, [Port0, Port1]>] >,
+  // jmp far
+  InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<32, [Port0, Port1]>] >,
+  InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<31, [Port0, Port1]>] >,
+  // loop/loope/loopne
+  InstrItinData<IIC_LOOP, [InstrStage<18, [Port0, Port1]>] >,
+  InstrItinData<IIC_LOOPE, [InstrStage<8, [Port0, Port1]>] >,
+  InstrItinData<IIC_LOOPNE, [InstrStage<17, [Port0, Port1]>] >,
+  // call - all but reg/imm
+  InstrItinData<IIC_CALL_RI, [InstrStage<1, [Port0], 0>,
+                              InstrStage<1, [Port1]>] >,
+  InstrItinData<IIC_CALL_MEM, [InstrStage<15, [Port0, Port1]>] >,
+  InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<40, [Port0, Port1]>] >,
+  InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<39, [Port0, Port1]>] >,
+  //ret
+  InstrItinData<IIC_RET, [InstrStage<79, [Port0, Port1]>] >,
+  InstrItinData<IIC_RET_IMM, [InstrStage<1, [Port0], 0>,  InstrStage<1, [Port1]>] >,
+  //sign extension movs
+  InstrItinData<IIC_MOVSX,[InstrStage<1, [Port0] >] >,
+  InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
+  InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
+  InstrItinData<IIC_MOVSX_R16_R16, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_MOVSX_R32_R32, [InstrStage<1, [Port0, Port1]>] >,
+  //zero extension movs
+  InstrItinData<IIC_MOVZX,[InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<2, [Port0, Port1]>] >,
+  InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<3, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_REP_MOVS, [InstrStage<75, [Port0, Port1]>] >,
+  InstrItinData<IIC_REP_STOS, [InstrStage<74, [Port0, Port1]>] >,
+
+  // SSE binary operations
+  // arithmetic fp scalar
+  InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<5, [Port1]>] >,
+  InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<5, [Port0], 0>,
+                                   InstrStage<5, [Port1]>] >,
+  InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<5, [Port1]>] >,
+  InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<5, [Port0], 0>,
+                                   InstrStage<5, [Port1]>] >,
+  InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<4, [Port0]>] >,
+  InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<4, [Port0]>] >,
+  InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<34, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<34, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<62, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<62, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<9, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<10, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<8, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<9, [Port0, Port1]>] >,
+
+  // arithmetic fp parallel
+  InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<5, [Port1]>] >,
+  InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<5, [Port0], 0>,
+                                   InstrStage<5, [Port1]>] >,
+  InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<9, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<10, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<70, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<70, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<125, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<125, [Port0, Port1]>] >,
+
+  // bitwise parallel
+  InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [Port0]>] >,
+
+  // arithmetic int parallel
+  InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<2, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<3, [Port0, Port1]>] >,
+
+  // multiply int parallel
+  InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [Port0]>] >,
+
+  // shift parallel
+  InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<3, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_SSE_CMPP_RR, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CMPP_RM, [InstrStage<7, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_SSE_PSHUF, [InstrStage<1, [Port0]>] >,
+
+  InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [Port0]>] >,
+
+  InstrItinData<IIC_SSE_SQRTP_RR, [InstrStage<13, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_SQRTP_RM, [InstrStage<14, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_SQRTS_RR, [InstrStage<11, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_SQRTS_RM, [InstrStage<12, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<10, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [Port0]>] >,
+  InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [Port0]>] >,
+
+  InstrItinData<IIC_SSE_MOVMSK, [InstrStage<3, [Port0]>] >,
+  InstrItinData<IIC_SSE_MASKMOV, [InstrStage<2, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [Port0]>] >,
+
+  InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [Port0]>] >,
+
+  InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [Port0]>] >,
+
+  InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [Port0]>] >,
+
+  InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<3, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<2, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [Port0]>] >,
+
+  InstrItinData<IIC_SSE_LDDQU, [InstrStage<3, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<3, [Port0]>] >,
+  InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [Port0]>] >,
+
+  InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_SSE_PAUSE, [InstrStage<17, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<5, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_STMXCSR, [InstrStage<15, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<3, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<4, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<8, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<8, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<4, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [Port0]>] >,
+
+  InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [Port0]>] >,
+  InstrItinData<IIC_SSE_PALIGNR, [InstrStage<1, [Port0]>] >,
+  InstrItinData<IIC_SSE_MWAIT, [InstrStage<46, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_MONITOR, [InstrStage<45, [Port0, Port1]>] >,
+
+  // conversions
+  // to/from PD ...
+  InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<8, [Port0, Port1]>] >,
+  // to/from PS except to/from PD and PS2PI
+  InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<7, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<8, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<9, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<9, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<10, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<8, [Port0, Port1]>] >,
+  InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<9, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_CMPX_LOCK, [InstrStage<14, [Port0, Port1]>] >,
+  InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<6, [Port0, Port1]>] >,
+  InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<18, [Port0, Port1]>] >,
+  InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<22, [Port0, Port1]>] >,
+
+  InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<2, [Port0, Port1]>] >,
+  InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<3, [Port0, Port1]>] >
+  ]>;
+
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 6406bce31187..9a04e352ab62 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -65,7 +65,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
       std::pair<SDValue,SDValue> CallResult =
         TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
                         false, false, false, false,
-                        0, CallingConv::C, false, /*isReturnValueUsed=*/false,
+                        0, CallingConv::C, /*isTailCall=*/false,
+                        /*doesNotRet=*/false, /*isReturnValueUsed=*/false,
                         DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
                         DAG, dl);
       return CallResult.second;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 7064dd06fa30..452dd7eba326 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Support/Host.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallVector.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
@@ -177,16 +176,18 @@ unsigned X86Subtarget::getSpecialAddressLatency() const {
 
 void X86Subtarget::AutoDetectSubtargetFeatures() {
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+  unsigned MaxLevel;
   union {
     unsigned u[3];
     char     c[12];
   } text;
-  
-  if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+
+  if (X86_MC::GetCpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) ||
+      MaxLevel < 1)
     return;
 
   X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
-  
+
   if ((EDX >> 15) & 1) { HasCMov = true;      ToggleFeature(X86::FeatureCMOV); }
   if ((EDX >> 23) & 1) { X86SSELevel = MMX;   ToggleFeature(X86::FeatureMMX);  }
   if ((EDX >> 25) & 1) { X86SSELevel = SSE1;  ToggleFeature(X86::FeatureSSE1); }
@@ -196,7 +197,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
   if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
   if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
   // FIXME: AVX codegen support is not ready.
-  //if ((ECX >> 28) & 1) { HasAVX = true;  ToggleFeature(X86::FeatureAVX); }
+  //if ((ECX >> 28) & 1) { X86SSELevel = AVX;  ToggleFeature(X86::FeatureAVX); }
 
   bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
   bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
@@ -244,28 +245,69 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
       IsBTMemSlow = true;
       ToggleFeature(X86::FeatureSlowBTMem);
     }
+
     // If it's Nehalem, unaligned memory access is fast.
+    // FIXME: Nehalem is family 6. Also include Westmere and later processors?
     if (Family == 15 && Model == 26) {
       IsUAMemFast = true;
       ToggleFeature(X86::FeatureFastUAMem);
     }
 
-    X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
-    if ((EDX >> 29) & 0x1) {
-      HasX86_64 = true;
-      ToggleFeature(X86::Feature64Bit);
-    }
-    if ((ECX >> 5) & 0x1) {
-      HasLZCNT = true;
-      ToggleFeature(X86::FeatureLZCNT);
+    // Set processor type. Currently only Atom is detected.
+    if (Family == 6 && Model == 28) {
+      X86ProcFamily = IntelAtom;
+      ToggleFeature(X86::FeatureLeaForSP);
     }
-    if (IsAMD && ((ECX >> 6) & 0x1)) {
-      HasSSE4A = true;
-      ToggleFeature(X86::FeatureSSE4A);
+
+    unsigned MaxExtLevel;
+    X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
+
+    if (MaxExtLevel >= 0x80000001) {
+      X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+      if ((EDX >> 29) & 0x1) {
+        HasX86_64 = true;
+        ToggleFeature(X86::Feature64Bit);
+      }
+      if ((ECX >> 5) & 0x1) {
+        HasLZCNT = true;
+        ToggleFeature(X86::FeatureLZCNT);
+      }
+      if (IsAMD) {
+        if ((ECX >> 6) & 0x1) {
+          HasSSE4A = true;
+          ToggleFeature(X86::FeatureSSE4A);
+        }
+        if ((ECX >> 11) & 0x1) {
+          HasXOP = true;
+          ToggleFeature(X86::FeatureXOP);
+        }
+        if ((ECX >> 16) & 0x1) {
+          HasFMA4 = true;
+          ToggleFeature(X86::FeatureFMA4);
+        }
+      }
     }
-    if (IsAMD && ((ECX >> 16) & 0x1)) {
-      HasFMA4 = true;
-      ToggleFeature(X86::FeatureFMA4);
+  }
+
+  if (IsIntel && MaxLevel >= 7) {
+    if (!X86_MC::GetCpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX)) {
+      if (EBX & 0x1) {
+        HasFSGSBase = true;
+        ToggleFeature(X86::FeatureFSGSBase);
+      }
+      if ((EBX >> 3) & 0x1) {
+        HasBMI = true;
+        ToggleFeature(X86::FeatureBMI);
+      }
+      // FIXME: AVX2 codegen support is not ready.
+      //if ((EBX >> 5) & 0x1) {
+      //  X86SSELevel = AVX2;
+      //  ToggleFeature(X86::FeatureAVX2);
+      //}
+      if ((EBX >> 8) & 0x1) {
+        HasBMI2 = true;
+        ToggleFeature(X86::FeatureBMI2);
+      }
     }
   }
 }
@@ -274,6 +316,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
                            const std::string &FS, 
                            unsigned StackAlignOverride, bool is64Bit)
   : X86GenSubtargetInfo(TT, CPU, FS)
+  , X86ProcFamily(Others)
   , PICStyle(PICStyles::None)
   , X86SSELevel(NoMMXSSE)
   , X863DNowLevel(NoThreeDNow)
@@ -281,31 +324,35 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
   , HasX86_64(false)
   , HasPOPCNT(false)
   , HasSSE4A(false)
-  , HasAVX(false)
   , HasAES(false)
   , HasCLMUL(false)
   , HasFMA3(false)
   , HasFMA4(false)
+  , HasXOP(false)
   , HasMOVBE(false)
   , HasRDRAND(false)
   , HasF16C(false)
+  , HasFSGSBase(false)
   , HasLZCNT(false)
   , HasBMI(false)
+  , HasBMI2(false)
   , IsBTMemSlow(false)
   , IsUAMemFast(false)
   , HasVectorUAMem(false)
   , HasCmpxchg16b(false)
-  , stackAlignment(8)
+  , UseLeaForSP(false)
+  , PostRAScheduler(false)
+  , stackAlignment(4)
   // FIXME: this is a known good value for Yonah. How about others?
   , MaxInlineSizeThreshold(128)
   , TargetTriple(TT)
-  , In64BitMode(is64Bit)
-  , InNaClMode(false) {
+  , In64BitMode(is64Bit) {
   // Determine default and user specified characteristics
+  std::string CPUName = CPU;
   if (!FS.empty() || !CPU.empty()) {
-    std::string CPUName = CPU;
     if (CPUName.empty()) {
-#if defined (__x86_64__) || defined(__i386__)
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+    || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
       CPUName = sys::getHostCPUName();
 #else
       CPUName = "generic";
@@ -325,6 +372,13 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
     // If feature string is not empty, parse features string.
     ParseSubtargetFeatures(CPUName, FullFS);
   } else {
+    if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+      CPUName = sys::getHostCPUName();
+#else
+      CPUName = "generic";
+#endif
+    }
     // Otherwise, use CPUID to auto-detect feature set.
     AutoDetectSubtargetFeatures();
 
@@ -333,7 +387,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
       HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
       HasCMov = true;   ToggleFeature(X86::FeatureCMOV);
 
-      if (!HasAVX && X86SSELevel < SSE2) {
+      if (X86SSELevel < SSE2) {
         X86SSELevel = SSE2;
         ToggleFeature(X86::FeatureSSE1);
         ToggleFeature(X86::FeatureSSE2);
@@ -341,28 +395,22 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
     }
   }
 
+  if (X86ProcFamily == IntelAtom) {
+    PostRAScheduler = true;
+    InstrItins = getInstrItineraryForCPU(CPUName);
+  }
+
   // It's important to keep the MCSubtargetInfo feature bits in sync with
   // target data structure which is shared with MC code emitter, etc.
   if (In64BitMode)
     ToggleFeature(X86::Mode64Bit);
 
-  if (isTargetNaCl()) {
-    InNaClMode = true;
-    ToggleFeature(X86::ModeNaCl);
-  }
-
-  if (HasAVX)
-    X86SSELevel = NoMMXSSE;
-    
   DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
                << ", 3DNowLevel " << X863DNowLevel
                << ", 64bit " << HasX86_64 << "\n");
   assert((!In64BitMode || HasX86_64) &&
          "64-bit code requested on a subtarget that doesn't support it!");
 
-  if(EnableSegmentedStacks && !isTargetELF())
-    report_fatal_error("Segmented stacks are only implemented on ELF.");
-
   // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
   // 32 and 64 bit) and for all 64-bit targets.
   if (StackAlignOverride)
@@ -371,3 +419,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
            isTargetSolaris() || In64BitMode)
     stackAlignment = 16;
 }
+
+bool X86Subtarget::enablePostRAScheduler(
+           CodeGenOpt::Level OptLevel,
+           TargetSubtargetInfo::AntiDepBreakMode& Mode,
+           RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+  CriticalPathRCs.clear();
+  return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 3258d3d0ada3..7fd832bf0678 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -1,4 +1,4 @@
-//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====//
+//===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,9 +14,9 @@
 #ifndef X86SUBTARGET_H
 #define X86SUBTARGET_H
 
+#include "llvm/CallingConv.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/CallingConv.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -42,13 +42,20 @@ enum Style {
 class X86Subtarget : public X86GenSubtargetInfo {
 protected:
   enum X86SSEEnum {
-    NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
+    NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2
   };
 
   enum X863DNowEnum {
     NoThreeDNow, ThreeDNow, ThreeDNowA
   };
 
+  enum X86ProcFamilyEnum {
+    Others, IntelAtom
+  };
+
+  /// X86ProcFamily - X86 processor family: Intel Atom, and others
+  X86ProcFamilyEnum X86ProcFamily;
+  
   /// PICStyle - Which PIC style to use
   ///
   PICStyles::Style PICStyle;
@@ -75,9 +82,6 @@ protected:
   /// HasSSE4A - True if the processor supports SSE4A instructions.
   bool HasSSE4A;
 
-  /// HasAVX - Target has AVX instructions
-  bool HasAVX;
-
   /// HasAES - Target has AES instructions
   bool HasAES;
 
@@ -90,6 +94,9 @@ protected:
   /// HasFMA4 - Target has 4-operand fused multiply-add
   bool HasFMA4;
 
+  /// HasXOP - Target has XOP instructions
+  bool HasXOP;
+
   /// HasMOVBE - True if the processor has the MOVBE instruction.
   bool HasMOVBE;
 
@@ -99,12 +106,18 @@ protected:
   /// HasF16C - Processor has 16-bit floating point conversion instructions.
   bool HasF16C;
 
+  /// HasFSGSBase - Processor has FS/GS base insturctions.
+  bool HasFSGSBase;
+
   /// HasLZCNT - Processor has LZCNT instruction.
   bool HasLZCNT;
 
   /// HasBMI - Processor has BMI1 instructions.
   bool HasBMI;
 
+  /// HasBMI2 - Processor has BMI2 instructions.
+  bool HasBMI2;
+
   /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
@@ -119,6 +132,13 @@ protected:
   /// this is true for most x86-64 chips, but not the first AMD chips.
   bool HasCmpxchg16b;
 
+  /// UseLeaForSP - True if the LEA instruction should be used for adjusting
+  /// the stack pointer. This is an optimization for Intel Atom processors.
+  bool UseLeaForSP;
+
+  /// PostRAScheduler - True if using post-register-allocation scheduler.
+  bool PostRAScheduler;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -129,14 +149,14 @@ protected:
 
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
+  
+  /// Instruction itineraries for scheduling
+  InstrItineraryData InstrItins;
 
 private:
   /// In64BitMode - True if compiling for 64-bit, false for 32-bit.
   bool In64BitMode;
 
-  /// InNaClMode - True if compiling for Native Client target.
-  bool InNaClMode;
-
 public:
 
   /// This constructor initializes the data members to match that
@@ -176,26 +196,31 @@ public:
   bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
   bool hasSSE41() const { return X86SSELevel >= SSE41; }
   bool hasSSE42() const { return X86SSELevel >= SSE42; }
+  bool hasAVX() const { return X86SSELevel >= AVX; }
+  bool hasAVX2() const { return X86SSELevel >= AVX2; }
   bool hasSSE4A() const { return HasSSE4A; }
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
   bool hasPOPCNT() const { return HasPOPCNT; }
-  bool hasAVX() const { return HasAVX; }
-  bool hasXMM() const { return hasSSE1() || hasAVX(); }
-  bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
   bool hasAES() const { return HasAES; }
   bool hasCLMUL() const { return HasCLMUL; }
   bool hasFMA3() const { return HasFMA3; }
   bool hasFMA4() const { return HasFMA4; }
+  bool hasXOP() const { return HasXOP; }
   bool hasMOVBE() const { return HasMOVBE; }
   bool hasRDRAND() const { return HasRDRAND; }
   bool hasF16C() const { return HasF16C; }
+  bool hasFSGSBase() const { return HasFSGSBase; }
   bool hasLZCNT() const { return HasLZCNT; }
   bool hasBMI() const { return HasBMI; }
+  bool hasBMI2() const { return HasBMI2; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
   bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
   bool hasVectorUAMem() const { return HasVectorUAMem; }
   bool hasCmpxchg16b() const { return HasCmpxchg16b; }
+  bool useLeaForSP() const { return UseLeaForSP; }
+
+  bool isAtom() const { return X86ProcFamily == IntelAtom; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
 
@@ -209,38 +234,28 @@ public:
 
   // ELF is a reasonably sane default and the only other X86 targets we
   // support are Darwin and Windows. Just use "not those".
-  bool isTargetELF() const {
-    return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
-  }
+  bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
   bool isTargetNaCl() const {
     return TargetTriple.getOS() == Triple::NativeClient;
   }
   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
-
   bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
   bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
   bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
-  bool isTargetCygMing() const {
-    return isTargetMingw() || isTargetCygwin();
-  }
-
-  /// isTargetCOFF - Return true if this is any COFF/Windows target variant.
-  bool isTargetCOFF() const {
-    return isTargetMingw() || isTargetCygwin() || isTargetWindows();
-  }
+  bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
+  bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
+  bool isTargetEnvMacho() const { return TargetTriple.isEnvironmentMachO(); }
 
   bool isTargetWin64() const {
     // FIXME: x86_64-cygwin has not been released yet.
-    return In64BitMode && (isTargetCygMing() || isTargetWindows());
-  }
-
-  bool isTargetEnvMacho() const {
-    return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO);
+    return In64BitMode && TargetTriple.isOSWindows();
   }
 
   bool isTargetWin32() const {
+    // FIXME: Cygwin is included for isTargetWin64 -- should it be included
+    // here too?
     return !In64BitMode && (isTargetMingw() || isTargetWindows());
   }
 
@@ -286,6 +301,15 @@ public:
   /// indicating the number of scheduling cycles of backscheduling that
   /// should be attempted.
   unsigned getSpecialAddressLatency() const;
+
+  /// enablePostRAScheduler - run for Atom optimization.
+  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
+                             RegClassVector& CriticalPathRCs) const;
+
+  /// getInstrItins = Return the instruction itineraries based on the
+  /// subtarget selection.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 15c6c4e7a7d2..f4b7a6277ade 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -28,11 +28,14 @@ extern "C" void LLVMInitializeX86Target() {
   RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
 }
 
+void X86_32TargetMachine::anchor() { }
 
 X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
                                          StringRef CPU, StringRef FS,
-                                         Reloc::Model RM, CodeModel::Model CM)
-  : X86TargetMachine(T, TT, CPU, FS, RM, CM, false),
+                                         const TargetOptions &Options,
+                                         Reloc::Model RM, CodeModel::Model CM,
+                                         CodeGenOpt::Level OL)
+  : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false),
     DataLayout(getSubtargetImpl()->isTargetDarwin() ?
                "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-"
                "n8:16:32-S128" :
@@ -48,11 +51,14 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
     JITInfo(*this) {
 }
 
+void X86_64TargetMachine::anchor() { }
 
 X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
                                          StringRef CPU, StringRef FS,
-                                         Reloc::Model RM, CodeModel::Model CM)
-  : X86TargetMachine(T, TT, CPU, FS, RM, CM, true),
+                                         const TargetOptions &Options,
+                                         Reloc::Model RM, CodeModel::Model CM,
+                                         CodeGenOpt::Level OL)
+  : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
     DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
                "n8:16:32:64-S128"),
     InstrInfo(*this),
@@ -65,12 +71,15 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
 ///
 X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
                                    StringRef CPU, StringRef FS,
+                                   const TargetOptions &Options,
                                    Reloc::Model RM, CodeModel::Model CM,
+                                   CodeGenOpt::Level OL,
                                    bool is64Bit)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
-    Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+    Subtarget(TT, CPU, FS, Options.StackAlignmentOverride, is64Bit),
     FrameLowering(*this, Subtarget),
-    ELFWriterInfo(is64Bit, true) {
+    ELFWriterInfo(is64Bit, true),
+    InstrItins(Subtarget.getInstrItineraryData()){
   // Determine the PICStyle based on the target selected.
   if (getRelocationModel() == Reloc::Static) {
     // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
@@ -92,8 +101,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
   }
 
   // default to hard float ABI
-  if (FloatABIType == FloatABI::Default)
-    FloatABIType = FloatABI::Hard;    
+  if (Options.FloatABIType == FloatABI::Default)
+    this->Options.FloatABIType = FloatABI::Hard;
 }
 
 //===----------------------------------------------------------------------===//
@@ -102,46 +111,67 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
 static cl::opt<bool>
 UseVZeroUpper("x86-use-vzeroupper",
   cl::desc("Minimize AVX to SSE transition penalty"),
-  cl::init(false));
+  cl::init(true));
 
 //===----------------------------------------------------------------------===//
 // Pass Pipeline Configuration
 //===----------------------------------------------------------------------===//
 
-bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
-                                       CodeGenOpt::Level OptLevel) {
+namespace {
+/// X86 Code Generator Pass Configuration Options.
+class X86PassConfig : public TargetPassConfig {
+public:
+  X86PassConfig(X86TargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  X86TargetMachine &getX86TargetMachine() const {
+    return getTM<X86TargetMachine>();
+  }
+
+  const X86Subtarget &getX86Subtarget() const {
+    return *getX86TargetMachine().getSubtargetImpl();
+  }
+
+  virtual bool addInstSelector();
+  virtual bool addPreRegAlloc();
+  virtual bool addPostRegAlloc();
+  virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new X86PassConfig(this, PM);
+}
+
+bool X86PassConfig::addInstSelector() {
   // Install an instruction selector.
-  PM.add(createX86ISelDag(*this, OptLevel));
+  PM.add(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
 
   // For 32-bit, prepend instructions to set the "global base reg" for PIC.
-  if (!Subtarget.is64Bit())
+  if (!getX86Subtarget().is64Bit())
     PM.add(createGlobalBaseRegPass());
 
   return false;
 }
 
-bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel) {
+bool X86PassConfig::addPreRegAlloc() {
   PM.add(createX86MaxStackAlignmentHeuristicPass());
   return false;  // -print-machineinstr shouldn't print after this.
 }
 
-bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
-                                       CodeGenOpt::Level OptLevel) {
+bool X86PassConfig::addPostRegAlloc() {
   PM.add(createX86FloatingPointStackifierPass());
   return true;  // -print-machineinstr should print after this.
 }
 
-bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel) {
+bool X86PassConfig::addPreEmitPass() {
   bool ShouldPrint = false;
-  if (OptLevel != CodeGenOpt::None &&
-      (Subtarget.hasSSE2() || Subtarget.hasAVX())) {
+  if (getOptLevel() != CodeGenOpt::None && getX86Subtarget().hasSSE2()) {
     PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
     ShouldPrint = true;
   }
 
-  if (Subtarget.hasAVX() && UseVZeroUpper) {
+  if (getX86Subtarget().hasAVX() && UseVZeroUpper) {
     PM.add(createX86IssueVZeroUpperPass());
     ShouldPrint = true;
   }
@@ -150,7 +180,6 @@ bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
 }
 
 bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel,
                                       JITCodeEmitter &JCE) {
   PM.add(createX86JITCodeEmitterPass(*this, JCE));
 
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index d1569aa9d751..8e935af67fe3 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -27,19 +27,20 @@
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
-  
-class formatted_raw_ostream;
+
 class StringRef;
 
 class X86TargetMachine : public LLVMTargetMachine {
-  X86Subtarget      Subtarget;
-  X86FrameLowering  FrameLowering;
-  X86ELFWriterInfo  ELFWriterInfo;
+  X86Subtarget       Subtarget;
+  X86FrameLowering   FrameLowering;
+  X86ELFWriterInfo   ELFWriterInfo;
+  InstrItineraryData InstrItins;
 
 public:
-  X86TargetMachine(const Target &T, StringRef TT, 
-                   StringRef CPU, StringRef FS,
+  X86TargetMachine(const Target &T, StringRef TT,
+                   StringRef CPU, StringRef FS, const TargetOptions &Options,
                    Reloc::Model RM, CodeModel::Model CM,
+                   CodeGenOpt::Level OL,
                    bool is64Bit);
 
   virtual const X86InstrInfo     *getInstrInfo() const {
@@ -55,7 +56,7 @@ public:
   virtual const X86TargetLowering *getTargetLowering() const {
     llvm_unreachable("getTargetLowering not implemented");
   }
-  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
     llvm_unreachable("getSelectionDAGInfo not implemented");
   }
   virtual const X86RegisterInfo  *getRegisterInfo() const {
@@ -64,19 +65,21 @@ public:
   virtual const X86ELFWriterInfo *getELFWriterInfo() const {
     return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
   }
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return &InstrItins;
+  }
 
   // Set up the pass pipeline.
-  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
-  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+
+  virtual bool addCodeEmitter(PassManagerBase &PM,
                               JITCodeEmitter &JCE);
 };
 
 /// X86_32TargetMachine - X86 32-bit target machine.
 ///
 class X86_32TargetMachine : public X86TargetMachine {
+  virtual void anchor();
   const TargetData  DataLayout; // Calculates type size & alignment
   X86InstrInfo      InstrInfo;
   X86SelectionDAGInfo TSInfo;
@@ -84,13 +87,14 @@ class X86_32TargetMachine : public X86TargetMachine {
   X86JITInfo        JITInfo;
 public:
   X86_32TargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
-                      Reloc::Model RM, CodeModel::Model CM);
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
+                      Reloc::Model RM, CodeModel::Model CM,
+                      CodeGenOpt::Level OL);
   virtual const TargetData *getTargetData() const { return &DataLayout; }
   virtual const X86TargetLowering *getTargetLowering() const {
     return &TLInfo;
   }
-  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
     return &TSInfo;
   }
   virtual const X86InstrInfo     *getInstrInfo() const {
@@ -104,6 +108,7 @@ public:
 /// X86_64TargetMachine - X86 64-bit target machine.
 ///
 class X86_64TargetMachine : public X86TargetMachine {
+  virtual void anchor();
   const TargetData  DataLayout; // Calculates type size & alignment
   X86InstrInfo      InstrInfo;
   X86SelectionDAGInfo TSInfo;
@@ -111,13 +116,14 @@ class X86_64TargetMachine : public X86TargetMachine {
   X86JITInfo        JITInfo;
 public:
   X86_64TargetMachine(const Target &T, StringRef TT,
-                      StringRef CPU, StringRef FS,
-                      Reloc::Model RM, CodeModel::Model CM);
+                      StringRef CPU, StringRef FS, const TargetOptions &Options,
+                      Reloc::Model RM, CodeModel::Model CM,
+                      CodeGenOpt::Level OL);
   virtual const TargetData *getTargetData() const { return &DataLayout; }
   virtual const X86TargetLowering *getTargetLowering() const {
     return &TLInfo;
   }
-  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
     return &TSInfo;
   }
   virtual const X86InstrInfo     *getInstrInfo() const {
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 991f322a6346..718f35ea84ac 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===//
+//===-- X86TargetObjectFile.cpp - X86 Object Info -------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,7 +14,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Dwarf.h"
 using namespace llvm;
 using namespace dwarf;
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index d7adf27ecaef..a02a36809ef2 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -1,4 +1,4 @@
-//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===//
+//===-- X86TargetObjectFile.h - X86 Object Info -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,7 +15,6 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 
 namespace llvm {
-  class X86TargetMachine;
 
   /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
   /// x86-64.
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 39584942468d..2fd78a7231c6 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -14,14 +14,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "x86-codegen"
+#define DEBUG_TYPE "x86-vzeroupper"
 #include "X86.h"
 #include "X86InstrInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
@@ -41,6 +43,60 @@ namespace {
   private:
     const TargetInstrInfo *TII; // Machine instruction info.
     MachineBasicBlock *MBB;     // Current basic block
+
+    // Any YMM register live-in to this function?
+    bool FnHasLiveInYmm;
+
+    // BBState - Contains the state of each MBB: unknown, clean, dirty
+    SmallVector<uint8_t, 8> BBState;
+
+    // BBSolved - Keep track of all MBB which had been already analyzed
+    // and there is no further processing required.
+    BitVector BBSolved;
+
+    // Machine Basic Blocks are classified according this pass:
+    //
+    //  ST_UNKNOWN - The MBB state is unknown, meaning from the entry state
+    //    until the MBB exit there isn't a instruction using YMM to change
+    //    the state to dirty, or one of the incoming predecessors is unknown
+    //    and there's not a dirty predecessor between them.
+    //
+    //  ST_CLEAN - No YMM usage in the end of the MBB. A MBB could have
+    //    instructions using YMM and be marked ST_CLEAN, as long as the state
+    //    is cleaned by a vzeroupper before any call.
+    //
+    //  ST_DIRTY - Any MBB ending with a YMM usage not cleaned up by a
+    //    vzeroupper instruction.
+    //
+    //  ST_INIT - Placeholder for an empty state set
+    //
+    enum {
+      ST_UNKNOWN = 0,
+      ST_CLEAN   = 1,
+      ST_DIRTY   = 2,
+      ST_INIT    = 3
+    };
+
+    // computeState - Given two states, compute the resulting state, in
+    // the following way
+    //
+    //  1) One dirty state yields another dirty state
+    //  2) All states must be clean for the result to be clean
+    //  3) If none above and one unknown, the result state is also unknown
+    //
+    unsigned computeState(unsigned PrevState, unsigned CurState) {
+      if (PrevState == ST_INIT)
+        return CurState;
+
+      if (PrevState == ST_DIRTY || CurState == ST_DIRTY)
+        return ST_DIRTY;
+
+      if (PrevState == ST_CLEAN && CurState == ST_CLEAN)
+        return ST_CLEAN;
+
+      return ST_UNKNOWN;
+    }
+
   };
   char VZeroUpperInserter::ID = 0;
 }
@@ -49,37 +105,82 @@ FunctionPass *llvm::createX86IssueVZeroUpperPass() {
   return new VZeroUpperInserter();
 }
 
+static bool isYmmReg(unsigned Reg) {
+  if (Reg >= X86::YMM0 && Reg <= X86::YMM15)
+    return true;
+
+  return false;
+}
+
+static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {
+  for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),
+       E = MRI.livein_end(); I != E; ++I)
+    if (isYmmReg(I->first))
+      return true;
+
+  return false;
+}
+
+static bool hasYmmReg(MachineInstr *MI) {
+  for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    if (MO.isDebug())
+      continue;
+    if (isYmmReg(MO.getReg()))
+      return true;
+  }
+  return false;
+}
+
 /// runOnMachineFunction - Loop over all of the basic blocks, inserting
 /// vzero upper instructions before function calls.
 bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
-  bool Changed = false;
-
-  // Process any unreachable blocks in arbitrary order now.
-  for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
-    Changed |= processBasicBlock(MF, *BB);
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  bool EverMadeChange = false;
 
-  return Changed;
-}
+  // Fast check: if the function doesn't use any ymm registers, we don't need
+  // to insert any VZEROUPPER instructions.  This is constant-time, so it is
+  // cheap in the common case of no ymm use.
+  bool YMMUsed = false;
+  const TargetRegisterClass *RC = X86::VR256RegisterClass;
+  for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
+       i != e; i++) {
+    if (MRI.isPhysRegUsed(*i)) {
+      YMMUsed = true;
+      break;
+    }
+  }
+  if (!YMMUsed)
+    return EverMadeChange;
 
-static bool isCallToModuleFn(const MachineInstr *MI) {
-  assert(MI->getDesc().isCall() && "Isn't a call instruction");
+  // Pre-compute the existence of any live-in YMM registers to this function
+  FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
 
-  for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  assert(BBState.empty());
+  BBState.resize(MF.getNumBlockIDs(), 0);
+  BBSolved.resize(MF.getNumBlockIDs(), 0);
 
-    if (!MO.isGlobal())
-      continue;
+  // Each BB state depends on all predecessors, loop over until everything
+  // converges.  (Once we converge, we can implicitly mark everything that is
+  // still ST_UNKNOWN as ST_CLEAN.)
+  while (1) {
+    bool MadeChange = false;
 
-    const GlobalValue *GV = MO.getGlobal();
-    GlobalValue::LinkageTypes LT = GV->getLinkage();
-    if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) ||
-        (GV->isExternalLinkage(LT) && !GV->isDeclaration()))
-      return true;
+    // Process all basic blocks.
+    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+      MadeChange |= processBasicBlock(MF, *I);
 
-    return false;
+    // If this iteration over the code changed anything, keep iterating.
+    if (!MadeChange) break;
+    EverMadeChange = true;
   }
-  return false;
+
+  BBState.clear();
+  BBSolved.clear();
+  return EverMadeChange;
 }
 
 /// processBasicBlock - Loop over all of the instructions in the basic block,
@@ -87,19 +188,98 @@ static bool isCallToModuleFn(const MachineInstr *MI) {
 bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF,
                                            MachineBasicBlock &BB) {
   bool Changed = false;
+  unsigned BBNum = BB.getNumber();
   MBB = &BB;
 
+  // Don't process already solved BBs
+  if (BBSolved[BBNum])
+    return false; // No changes
+
+  // Check the state of all predecessors
+  unsigned EntryState = ST_INIT;
+  for (MachineBasicBlock::const_pred_iterator PI = BB.pred_begin(),
+       PE = BB.pred_end(); PI != PE; ++PI) {
+    EntryState = computeState(EntryState, BBState[(*PI)->getNumber()]);
+    if (EntryState == ST_DIRTY)
+      break;
+  }
+
+
+  // The entry MBB for the function may set the inital state to dirty if
+  // the function receives any YMM incoming arguments
+  if (MBB == MF.begin()) {
+    EntryState = ST_CLEAN;
+    if (FnHasLiveInYmm)
+      EntryState = ST_DIRTY;
+  }
+
+  // The current state is initialized according to the predecessors
+  unsigned CurState = EntryState;
+  bool BBHasCall = false;
+
   for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
     MachineInstr *MI = I;
     DebugLoc dl = I->getDebugLoc();
+    bool isControlFlow = MI->isCall() || MI->isReturn();
+
+    // Shortcut: don't need to check regular instructions in dirty state. 
+    if (!isControlFlow && CurState == ST_DIRTY)
+      continue;
+
+    if (hasYmmReg(MI)) {
+      // We found a ymm-using instruction; this could be an AVX instruction,
+      // or it could be control flow.
+      CurState = ST_DIRTY;
+      continue;
+    }
 
-    // Insert a vzeroupper instruction before each control transfer
-    // to functions outside this module
-    if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) {
-      BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
-      ++NumVZU;
+    // Check for control-flow out of the current function (which might
+    // indirectly execute SSE instructions).
+    if (!isControlFlow)
+      continue;
+
+    BBHasCall = true;
+
+    // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
+    // registers. This instruction has zero latency. In addition, the processor
+    // changes back to Clean state, after which execution of Intel SSE
+    // instructions or Intel AVX instructions has no transition penalty. Add
+    // the VZEROUPPER instruction before any function call/return that might
+    // execute SSE code.
+    // FIXME: In some cases, we may want to move the VZEROUPPER into a
+    // predecessor block.
+    if (CurState == ST_DIRTY) {
+      // Only insert the VZEROUPPER in case the entry state isn't unknown.
+      // When unknown, only compute the information within the block to have
+      // it available in the exit if possible, but don't change the block.
+      if (EntryState != ST_UNKNOWN) {
+        BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER));
+        ++NumVZU;
+      }
+
+      // After the inserted VZEROUPPER the state becomes clean again, but
+      // other YMM may appear before other subsequent calls or even before
+      // the end of the BB.
+      CurState = ST_CLEAN;
     }
   }
 
+  DEBUG(dbgs() << "MBB #" << BBNum
+               << ", current state: " << CurState << '\n');
+
+  // A BB can only be considered solved when we both have done all the
+  // necessary transformations, and have computed the exit state.  This happens
+  // in two cases:
+  //  1) We know the entry state: this immediately implies the exit state and
+  //     all the necessary transformations.
+  //  2) There are no calls, and and a non-call instruction marks this block:
+  //     no transformations are necessary, and we know the exit state.
+  if (EntryState != ST_UNKNOWN || (!BBHasCall && CurState != ST_UNKNOWN))
+    BBSolved[BBNum] = true;
+
+  if (CurState != BBState[BBNum])
+    Changed = true;
+
+  BBState[BBNum] = CurState;
   return Changed;
 }
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 3dc51e1991ed..0d59572a0d57 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -1,11 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS XCore.td)
 
-llvm_tablegen(XCoreGenRegisterInfo.inc -gen-register-info)
-llvm_tablegen(XCoreGenInstrInfo.inc -gen-instr-info)
-llvm_tablegen(XCoreGenAsmWriter.inc -gen-asm-writer)
-llvm_tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
-llvm_tablegen(XCoreGenCallingConv.inc -gen-callingconv)
-llvm_tablegen(XCoreGenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM XCoreGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM XCoreGenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM XCoreGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM XCoreGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM XCoreGenCallingConv.inc -gen-callingconv)
+tablegen(LLVM XCoreGenSubtargetInfo.inc -gen-subtarget)
 add_public_tablegen_target(XCoreCommonTableGen)
 
 add_llvm_target(XCoreCodeGen
@@ -14,6 +14,7 @@ add_llvm_target(XCoreCodeGen
   XCoreInstrInfo.cpp
   XCoreISelDAGToDAG.cpp
   XCoreISelLowering.cpp
+  XCoreMachineFunctionInfo.cpp
   XCoreRegisterInfo.cpp
   XCoreSubtarget.cpp
   XCoreTargetMachine.cpp
@@ -21,17 +22,5 @@ add_llvm_target(XCoreCodeGen
   XCoreSelectionDAGInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMXCoreCodeGen
-  LLVMAsmPrinter
-  LLVMCodeGen
-  LLVMCore
-  LLVMMC
-  LLVMSelectionDAG
-  LLVMSupport
-  LLVMTarget
-  LLVMXCoreDesc
-  LLVMXCoreInfo
-  )
-
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt
new file mode 100644
index 000000000000..53b4a9e3f5f7
--- /dev/null
+++ b/lib/Target/XCore/LLVMBuild.txt
@@ -0,0 +1,32 @@
+;===- ./lib/Target/XCore/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = MCTargetDesc TargetInfo
+
+[component_0]
+type = TargetGroup
+name = XCore
+parent = Target
+has_asmprinter = 1
+
+[component_1]
+type = Library
+name = XCoreCodeGen
+parent = XCore
+required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support Target XCoreDesc XCoreInfo
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
index 269822db7113..3a3f5b4cc63e 100644
--- a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
@@ -3,11 +3,6 @@ add_llvm_library(LLVMXCoreDesc
   XCoreMCAsmInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMXCoreDesc
-  LLVMMC
-  LLVMXCoreInfo
-  )
-
 add_dependencies(LLVMXCoreDesc XCoreCommonTableGen)
 
 # Hack: we need to include 'main' target directory to grab private headers
diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
new file mode 100644
index 000000000000..a80c939b4372
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/XCore/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = XCoreDesc
+parent = XCore
+required_libraries = MC XCoreInfo
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index 42ab1b31d57a..1cfdbda003b5 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -8,8 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "XCoreMCAsmInfo.h"
+#include "llvm/ADT/StringRef.h"
 using namespace llvm;
 
+void XCoreMCAsmInfo::anchor() { }
+
 XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) {
   SupportsDebugInformation = true;
   Data16bitsDirective = "\t.short\t";
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index 840392263881..076777541e33 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -1,4 +1,4 @@
-//=====-- XCoreMCAsmInfo.h - XCore asm properties -------------*- C++ -*--====//
+//===-- XCoreMCAsmInfo.h - XCore asm properties ----------------*- C++ -*--===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,13 +14,14 @@
 #ifndef XCORETARGETASMINFO_H
 #define XCORETARGETASMINFO_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
+  class StringRef;
   class Target;
 
   class XCoreMCAsmInfo : public MCAsmInfo {
+    virtual void anchor();
   public:
     explicit XCoreMCAsmInfo(const Target &T, StringRef TT);
   };
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 276e841e6acc..bbfdd4356f2a 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -------*- C++ -*-===//
+//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,6 +17,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #define GET_INSTRINFO_MC_DESC
@@ -61,9 +62,10 @@ static MCAsmInfo *createXCoreMCAsmInfo(const Target &T, StringRef TT) {
 }
 
 static MCCodeGenInfo *createXCoreMCCodeGenInfo(StringRef TT, Reloc::Model RM,
-                                               CodeModel::Model CM) {
+                                               CodeModel::Model CM,
+                                               CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  X->InitMCCodeGenInfo(RM, CM);
+  X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index 3cfc3764a62c..a255adb2e0f2 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -15,9 +15,7 @@
 #define XCOREMCTARGETDESC_H
 
 namespace llvm {
-class MCSubtargetInfo;
 class Target;
-class StringRef;
 
 extern Target TheXCoreTarget;
 
diff --git a/lib/Target/XCore/TargetInfo/CMakeLists.txt b/lib/Target/XCore/TargetInfo/CMakeLists.txt
index 7f84f6904305..2c34b8730c85 100644
--- a/lib/Target/XCore/TargetInfo/CMakeLists.txt
+++ b/lib/Target/XCore/TargetInfo/CMakeLists.txt
@@ -4,10 +4,4 @@ add_llvm_library(LLVMXCoreInfo
   XCoreTargetInfo.cpp
   )
 
-add_llvm_library_dependencies(LLVMXCoreInfo
-  LLVMMC
-  LLVMSupport
-  LLVMTarget
-  )
-
 add_dependencies(LLVMXCoreInfo XCoreCommonTableGen)
diff --git a/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/lib/Target/XCore/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..770ba87e4a39
--- /dev/null
+++ b/lib/Target/XCore/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/XCore/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = XCoreInfo
+parent = XCore
+required_libraries = MC Support Target
+add_to_library_groups = XCore
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index b8fb0cac319b..08f091e5b870 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -24,7 +24,8 @@ namespace llvm {
   class XCoreTargetMachine;
   class formatted_raw_ostream;
 
-  FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+  FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM,
+                                   CodeGenOpt::Level OptLevel);
 
 } // end namespace llvm;
 
diff --git a/lib/Target/XCore/XCore.td b/lib/Target/XCore/XCore.td
index 38401895e634..04a1dd5e95be 100644
--- a/lib/Target/XCore/XCore.td
+++ b/lib/Target/XCore/XCore.td
@@ -1,4 +1,4 @@
-//===- XCore.td - Describe the XCore Target Machine --------*- tablegen -*-===//
+//===-- XCore.td - Describe the XCore Target Machine -------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
+// This is the top level entry point for the XCore target.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 7f8b169819a7..50fda58cf574 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -1,4 +1,4 @@
-//===-- XCoreFrameLowering.cpp - Frame info for XCore Target -----*- C++ -*-==//
+//===-- XCoreFrameLowering.cpp - Frame info for XCore Target --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "XCore.h"
 #include "XCoreFrameLowering.h"
+#include "XCore.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreMachineFunctionInfo.h"
 #include "llvm/Function.h"
@@ -84,7 +84,8 @@ XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
 }
 
 bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
-  return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+  return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+    MF.getFrameInfo()->hasVarSizedObjects();
 }
 
 void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
@@ -92,8 +93,6 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo *MMI = &MF.getMMI();
-  const XCoreRegisterInfo *RegInfo =
-    static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo());
   const XCoreInstrInfo &TII =
     *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
@@ -118,7 +117,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
     // FIXME could emit multiple instructions.
     report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
   }
-  bool emitFrameMoves = RegInfo->needsFrameMoves(MF);
+  bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(MF);
 
   // Do we need to allocate space on the stack?
   if (FrameSize) {
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index c591e93780aa..4c51aa5e79cc 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -1,4 +1,4 @@
-//===-- XCoreFrameLowering.h - Frame info for XCore Target -------*- C++ -*-==//
+//===-- XCoreFrameLowering.h - Frame info for XCore Target ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 4dac1cee9827..7564fbad7d45 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -41,8 +41,8 @@ namespace {
     const XCoreSubtarget &Subtarget;
 
   public:
-    XCoreDAGToDAGISel(XCoreTargetMachine &TM)
-      : SelectionDAGISel(TM),
+    XCoreDAGToDAGISel(XCoreTargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel),
         Lowering(*TM.getTargetLowering()), 
         Subtarget(*TM.getSubtargetImpl()) { }
 
@@ -83,8 +83,9 @@ namespace {
 /// createXCoreISelDag - This pass converts a legalized DAG into a 
 /// XCore-specific DAG, ready for instruction scheduling.
 ///
-FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
-  return new XCoreDAGToDAGISel(TM);
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM,
+                                       CodeGenOpt::Level OptLevel) {
+  return new XCoreDAGToDAGISel(TM, OptLevel);
 }
 
 bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
@@ -120,7 +121,7 @@ bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
     ConstantSDNode *CN = 0;
     if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
       && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
-      && (CN->getSExtValue() % 4 == 0)) {
+      && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
       // Constant word offset from a object in the data region
       Base = Addr.getOperand(0).getOperand(0);
       Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
@@ -141,7 +142,7 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
     ConstantSDNode *CN = 0;
     if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
       && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
-      && (CN->getSExtValue() % 4 == 0)) {
+      && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
       // Constant word offset from a object in the data region
       Base = Addr.getOperand(0).getOperand(0);
       Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 2afe0e35afb1..fdf2b783241c 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation   ------===//
+//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation ---------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -36,7 +36,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/VectorExtras.h"
 using namespace llvm;
 
 const char *XCoreTargetLowering::
@@ -109,6 +108,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   setOperationAction(ISD::ROTL , MVT::i32, Expand);
   setOperationAction(ISD::ROTR , MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
 
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
@@ -186,7 +187,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
   default:
     llvm_unreachable("unimplemented operand");
-    return SDValue();
   }
 }
 
@@ -198,7 +198,6 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Don't know how to custom expand this!");
-    return;
   case ISD::ADD:
   case ISD::SUB:
     Results.push_back(ExpandADDSUB(N, DAG));
@@ -274,9 +273,8 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
     if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
       GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
   }
-  if (! GVar) {
+  if (!GVar) {
     llvm_unreachable("Thread local object not a GlobalVariable?");
-    return SDValue();
   }
   Type *Ty = cast<PointerType>(GV->getType())->getElementType();
   if (!Ty->isSized() || isZeroLengthArray(Ty)) {
@@ -386,6 +384,15 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
     Offset = off;
     return true;
   }
+  // Check for an aligned global variable.
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(*Root)) {
+    const GlobalValue *GV = GA->getGlobal();
+    if (GA->getOffset() == 0 && GV->getAlignment() >= 4) {
+      AlignedBase = Base;
+      Offset = off;
+      return true;
+    }
+  }
   return false;
 }
 
@@ -418,7 +425,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
       //
       return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
                          MachinePointerInfo(),
-                         false, false, 0);
+                         false, false, false, 0);
     }
     // Lower to
     // ldw low, base[offset >> 2]
@@ -435,9 +442,11 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
 
     SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
-                              LowAddr, MachinePointerInfo(), false, false, 0);
+                              LowAddr, MachinePointerInfo(),
+                              false, false, false, 0);
     SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
-                               HighAddr, MachinePointerInfo(), false, false, 0);
+                               HighAddr, MachinePointerInfo(),
+                               false, false, false, 0);
     SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
     SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
     SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
@@ -478,8 +487,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 
   std::pair<SDValue, SDValue> CallResult =
         LowerCallTo(Chain, IntPtrTy, false, false,
-                    false, false, 0, CallingConv::C, false,
-                    /*isReturnValueUsed=*/true,
+                    false, false, 0, CallingConv::C, /*isTailCall=*/false,
+                    /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                     DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
                     Args, DAG, DL);
 
@@ -540,8 +549,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
 
   std::pair<SDValue, SDValue> CallResult =
         LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false,
-                    false, false, 0, CallingConv::C, false,
-                    /*isReturnValueUsed=*/true,
+                    false, false, 0, CallingConv::C, /*isTailCall=*/false,
+                    /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
                     DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
                     Args, DAG, dl);
 
@@ -745,14 +754,14 @@ SDValue XCoreTargetLowering::
 LowerVAARG(SDValue Op, SelectionDAG &DAG) const
 {
   llvm_unreachable("unimplemented");
-  // FIX Arguments passed by reference need a extra dereference.
+  // FIXME Arguments passed by reference need a extra dereference.
   SDNode *Node = Op.getNode();
   DebugLoc dl = Node->getDebugLoc();
   const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
   EVT VT = Node->getValueType(0);
   SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
                                Node->getOperand(1), MachinePointerInfo(V),
-                               false, false, 0);
+                               false, false, false, 0);
   // Increment the pointer, VAList, to the next vararg
   SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
                      DAG.getConstant(VT.getSizeInBits(),
@@ -762,7 +771,7 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const
                       MachinePointerInfo(V), false, false, 0);
   // Load the actual argument out of the pointer VAList
   return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
-                     false, false, 0);
+                     false, false, false, 0);
 }
 
 SDValue XCoreTargetLowering::
@@ -866,7 +875,7 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
 SDValue
 XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
                                CallingConv::ID CallConv, bool isVarArg,
-                               bool &isTailCall,
+                               bool doesNotRet, bool &isTailCall,
                                const SmallVectorImpl<ISD::OutputArg> &Outs,
                                const SmallVectorImpl<SDValue> &OutVals,
                                const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -1137,13 +1146,13 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
       InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
                                    MachinePointerInfo::getFixedStack(FI),
-                                   false, false, 0));
+                                   false, false, false, 0));
     }
   }
 
   if (isVarArg) {
     /* Argument registers */
-    static const unsigned ArgRegs[] = {
+    static const uint16_t ArgRegs[] = {
       XCore::R0, XCore::R1, XCore::R2, XCore::R3
     };
     XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
@@ -1354,8 +1363,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       APInt KnownZero, KnownOne;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
-      DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
-      if (KnownZero == Mask) {
+      DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+      if ((KnownZero & Mask) == Mask) {
         SDValue Carry = DAG.getConstant(0, VT);
         SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
         SDValue Ops [] = { Carry, Result };
@@ -1377,8 +1386,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       APInt KnownZero, KnownOne;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
-      DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
-      if (KnownZero == Mask) {
+      DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+      if ((KnownZero & Mask) == Mask) {
         SDValue Borrow = N2;
         SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
                                      DAG.getConstant(0, VT), N2);
@@ -1393,8 +1402,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
       APInt KnownZero, KnownOne;
       APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
                                          VT.getSizeInBits() - 1);
-      DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
-      if (KnownZero == Mask) {
+      DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+      if ((KnownZero & Mask) == Mask) {
         SDValue Borrow = DAG.getConstant(0, VT);
         SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
         SDValue Ops [] = { Borrow, Result };
@@ -1512,21 +1521,19 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
 }
 
 void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
-                                                         const APInt &Mask,
                                                          APInt &KnownZero,
                                                          APInt &KnownOne,
                                                          const SelectionDAG &DAG,
                                                          unsigned Depth) const {
-  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
   switch (Op.getOpcode()) {
   default: break;
   case XCoreISD::LADD:
   case XCoreISD::LSUB:
     if (Op.getResNo() == 0) {
       // Top bits of carry / borrow are clear.
-      KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
-                                        Mask.getBitWidth() - 1);
-      KnownZero &= Mask;
+      KnownZero = APInt::getHighBitsSet(KnownZero.getBitWidth(),
+                                        KnownZero.getBitWidth() - 1);
     }
     break;
   }
@@ -1590,8 +1597,6 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
     // reg + reg<<2
     return AM.Scale == 4 && AM.BaseOffs == 0;
   }
-
-  return false;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index d6c5b329a0a0..0b63ecd0f78e 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -15,9 +15,9 @@
 #ifndef XCOREISELLOWERING_H
 #define XCOREISELLOWERING_H
 
+#include "XCore.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Target/TargetLowering.h"
-#include "XCore.h"
 
 namespace llvm {
 
@@ -160,7 +160,6 @@ namespace llvm {
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-                                                const APInt &Mask,
                                                 APInt &KnownZero,
                                                 APInt &KnownOne,
                                                 const SelectionDAG &DAG,
@@ -175,9 +174,8 @@ namespace llvm {
                            SmallVectorImpl<SDValue> &InVals) const;
 
     virtual SDValue
-      LowerCall(SDValue Chain, SDValue Callee,
-                CallingConv::ID CallConv, bool isVarArg,
-                bool &isTailCall,
+      LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+                bool isVarArg, bool doesNotRet, bool &isTailCall,
                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                 const SmallVectorImpl<SDValue> &OutVals,
                 const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/XCore/XCoreInstrFormats.td b/lib/Target/XCore/XCoreInstrFormats.td
index 8002c993270c..1963a70fb30d 100644
--- a/lib/Target/XCore/XCoreInstrFormats.td
+++ b/lib/Target/XCore/XCoreInstrFormats.td
@@ -1,4 +1,4 @@
-//===- XCoreInstrFormats.td - XCore Instruction Formats ----*- tablegen -*-===//
+//===-- XCoreInstrFormats.td - XCore Instruction Formats ---*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index a0946a197a1a..0a3008d7ab33 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- XCoreInstrInfo.cpp - XCore Instruction Information -------*- C++ -*-===//
+//===-- XCoreInstrInfo.cpp - XCore Instruction Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "XCoreMachineFunctionInfo.h"
 #include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index d354802ee03f..42eeed8370f4 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -1,4 +1,4 @@
-//===- XCoreInstrInfo.h - XCore Instruction Information ---------*- C++ -*-===//
+//===-- XCoreInstrInfo.h - XCore Instruction Information --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,8 +14,8 @@
 #ifndef XCOREINSTRUCTIONINFO_H
 #define XCOREINSTRUCTIONINFO_H
 
-#include "llvm/Target/TargetInstrInfo.h"
 #include "XCoreRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "XCoreGenInstrInfo.inc"
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 4d2e93bc7a04..b25a08d25c1a 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -1,4 +1,4 @@
-//===- XCoreInstrInfo.td - Target Description for XCore ----*- tablegen -*-===//
+//===-- XCoreInstrInfo.td - Target Description for XCore ---*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..7ca06729120e
--- /dev/null
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -0,0 +1,14 @@
+//===-- XCoreMachineFuctionInfo.cpp - XCore machine function info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void XCoreFunctionInfo::anchor() { }
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index a575a0f69541..f869fcf26de3 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//====- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//===-- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -26,7 +26,7 @@ class Function;
 /// XCoreFunctionInfo - This class is derived from MachineFunction private
 /// XCore target-specific information for each MachineFunction.
 class XCoreFunctionInfo : public MachineFunctionInfo {
-private:
+  virtual void anchor();
   bool UsesLR;
   int LRSpillSlot;
   int FPSpillSlot;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 1b78b373fffa..f3b4b4c4f88a 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- XCoreRegisterInfo.cpp - XCore Register Information -------*- C++ -*-===//
+//===-- XCoreRegisterInfo.cpp - XCore Register Information ----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,6 +14,8 @@
 #include "XCoreRegisterInfo.h"
 #include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,8 +26,6 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
@@ -54,28 +54,14 @@ static inline bool isImmU16(unsigned val) {
   return val < (1 << 16);
 }
 
-static const unsigned XCore_ArgRegs[] = {
-  XCore::R0, XCore::R1, XCore::R2, XCore::R3
-};
-
-const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
-{
-  return XCore_ArgRegs;
-}
-
-unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
-{
-  return array_lengthof(XCore_ArgRegs);
-}
-
 bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
   return MF.getMMI().hasDebugInfo() ||
     MF.getFunction()->needsUnwindTableEntry();
 }
 
-const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+const uint16_t* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
                                                                          const {
-  static const unsigned CalleeSavedRegs[] = {
+  static const uint16_t CalleeSavedRegs[] = {
     XCore::R4, XCore::R5, XCore::R6, XCore::R7,
     XCore::R8, XCore::R9, XCore::R10, XCore::LR,
     0
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 5c28f39d8788..7391cfdf0734 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- XCoreRegisterInfo.h - XCore Register Information Impl ----*- C++ -*-===//
+//===-- XCoreRegisterInfo.h - XCore Register Information Impl ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -44,7 +44,7 @@ public:
 
   /// Code Generation virtual methods...
 
-  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+  const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const;
   
@@ -62,15 +62,6 @@ public:
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const;
 
-  //! Return the array of argument passing registers
-  /*!
-    \note The size of this array is returned by getArgRegsSize().
-    */
-  static const unsigned *getArgRegs(const MachineFunction *MF = 0);
-
-  //! Return the size of the argument passing register array
-  static unsigned getNumArgRegs(const MachineFunction *MF = 0);
-  
   //! Return whether to emit frame moves
   static bool needsFrameMoves(const MachineFunction &MF);
 };
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index c3542304a4ec..9edfda1f5007 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- XCoreRegisterInfo.td - XCore Register defs ----------*- tablegen -*-===//
+//===-- XCoreRegisterInfo.td - XCore Register defs ---------*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index b4e992710419..8cfb77089f31 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -1,4 +1,4 @@
-//===- XCoreSubtarget.cpp - XCore Subtarget Information -----------*- C++ -*-=//
+//===-- XCoreSubtarget.cpp - XCore Subtarget Information ------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -21,6 +21,8 @@
 
 using namespace llvm;
 
+void XCoreSubtarget::anchor() { }
+
 XCoreSubtarget::XCoreSubtarget(const std::string &TT,
                                const std::string &CPU, const std::string &FS)
   : XCoreGenSubtargetInfo(TT, CPU, FS)
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 7b29fa236710..8d0f254e087a 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- XCoreSubtarget.h - Define Subtarget for the XCore -----*- C++ -*--==//
+//===-- XCoreSubtarget.h - Define Subtarget for the XCore -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -25,6 +25,7 @@ namespace llvm {
 class StringRef;
 
 class XCoreSubtarget : public XCoreGenSubtargetInfo {
+  virtual void anchor();
 
 public:
   /// This constructor initializes the data members to match that
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index fdc5d35036bb..f65297e54a79 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -14,6 +14,7 @@
 #include "XCore.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
@@ -21,8 +22,10 @@ using namespace llvm;
 ///
 XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
                                        StringRef CPU, StringRef FS,
-                                       Reloc::Model RM, CodeModel::Model CM)
-  : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
+                                       const TargetOptions &Options,
+                                       Reloc::Model RM, CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+  : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     Subtarget(TT, CPU, FS),
     DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
                "i16:16:32-i32:32:32-i64:32:32-n32"),
@@ -32,9 +35,27 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT,
     TSInfo(*this) {
 }
 
-bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
-                                         CodeGenOpt::Level OptLevel) {
-  PM.add(createXCoreISelDag(*this));
+namespace {
+/// XCore Code Generator Pass Configuration Options.
+class XCorePassConfig : public TargetPassConfig {
+public:
+  XCorePassConfig(XCoreTargetMachine *TM, PassManagerBase &PM)
+    : TargetPassConfig(TM, PM) {}
+
+  XCoreTargetMachine &getXCoreTargetMachine() const {
+    return getTM<XCoreTargetMachine>();
+  }
+
+  virtual bool addInstSelector();
+};
+} // namespace
+
+TargetPassConfig *XCoreTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new XCorePassConfig(this, PM);
+}
+
+bool XCorePassConfig::addInstSelector() {
+  PM.add(createXCoreISelDag(getXCoreTargetMachine(), getOptLevel()));
   return false;
 }
 
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 83d09d6df49d..254668142aaf 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -14,13 +14,13 @@
 #ifndef XCORETARGETMACHINE_H
 #define XCORETARGETMACHINE_H
 
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetData.h"
 #include "XCoreFrameLowering.h"
 #include "XCoreSubtarget.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreISelLowering.h"
 #include "XCoreSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
 
 namespace llvm {
 
@@ -33,8 +33,9 @@ class XCoreTargetMachine : public LLVMTargetMachine {
   XCoreSelectionDAGInfo TSInfo;
 public:
   XCoreTargetMachine(const Target &T, StringRef TT,
-                     StringRef CPU, StringRef FS,
-                     Reloc::Model RM, CodeModel::Model CM);
+                     StringRef CPU, StringRef FS, const TargetOptions &Options,
+                     Reloc::Model RM, CodeModel::Model CM,
+                     CodeGenOpt::Level OL);
 
   virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const XCoreFrameLowering *getFrameLowering() const {
@@ -55,7 +56,7 @@ public:
   virtual const TargetData       *getTargetData() const { return &DataLayout; }
 
   // Pass Pipeline Configuration
-  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index 7424c78be305..27875e783b33 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -1,4 +1,4 @@
-//===-- llvm/Target/XCoreTargetObjectFile.h - XCore Object Info -*- C++ -*-===//
+//===-- XCoreTargetObjectFile.h - XCore Object Info -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt
index 10e0cc6b5691..de1353e6c12d 100644
--- a/lib/Transforms/CMakeLists.txt
+++ b/lib/Transforms/CMakeLists.txt
@@ -3,4 +3,5 @@ add_subdirectory(Instrumentation)
 add_subdirectory(InstCombine)
 add_subdirectory(Scalar)
 add_subdirectory(IPO)
+add_subdirectory(Vectorize)
 add_subdirectory(Hello)
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 4d8dbc2189a6..58b3551cd7a2 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -20,13 +20,3 @@ add_llvm_library(LLVMipo
   StripDeadPrototypes.cpp
   StripSymbols.cpp
   )
-
-add_llvm_library_dependencies(LLVMipo
-  LLVMAnalysis
-  LLVMCore
-  LLVMScalarOpts
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  LLVMipa
-  )
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index c3ecb7afff7a..d8fae8a4b2b9 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -140,18 +140,24 @@ bool ConstantMerge::runOnModule(Module &M) {
           UsedGlobals.count(GV))
         continue;
 
+      // This transformation is legal for weak ODR globals in the sense it
+      // doesn't change semantics, but we really don't want to perform it
+      // anyway; it's likely to pessimize code generation, and some tools
+      // (like the Darwin linker in cases involving CFString) don't expect it.
+      if (GV->isWeakForLinker())
+        continue;
+
       Constant *Init = GV->getInitializer();
 
       // Check to see if the initializer is already known.
       PointerIntPair<Constant*, 1, bool> Pair(Init, hasKnownAlignment(GV));
       GlobalVariable *&Slot = CMap[Pair];
 
-      // If this is the first constant we find or if the old on is local,
-      // replace with the current one. It the current is externally visible
+      // If this is the first constant we find or if the old one is local,
+      // replace with the current one. If the current is externally visible
       // it cannot be replace, but can be the canonical constant we merge with.
-      if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) {
+      if (Slot == 0 || IsBetterCannonical(*GV, *Slot))
         Slot = GV;
-      }
     }
 
     // Second: identify all globals that can be merged together, filling in
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4bb6f7a90e93..95aef272211e 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -74,7 +74,7 @@ namespace {
 
       std::string getDescription() const {
         return std::string((IsArg ? "Argument #" : "Return value #"))
-               + utostr(Idx) + " of function " + F->getNameStr();
+               + utostr(Idx) + " of function " + F->getName().str();
       }
     };
 
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 0edf3427507b..f3f622843340 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/UniqueVector.h"
@@ -225,31 +226,247 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
   return MadeChange;
 }
 
+namespace {
+  // For a given pointer Argument, this retains a list of Arguments of functions
+  // in the same SCC that the pointer data flows into. We use this to build an
+  // SCC of the arguments.
+  struct ArgumentGraphNode {
+    Argument *Definition;
+    SmallVector<ArgumentGraphNode*, 4> Uses;
+  };
+
+  class ArgumentGraph {
+    // We store pointers to ArgumentGraphNode objects, so it's important that
+    // that they not move around upon insert.
+    typedef std::map<Argument*, ArgumentGraphNode> ArgumentMapTy;
+
+    ArgumentMapTy ArgumentMap;
+
+    // There is no root node for the argument graph, in fact:
+    //   void f(int *x, int *y) { if (...) f(x, y); }
+    // is an example where the graph is disconnected. The SCCIterator requires a
+    // single entry point, so we maintain a fake ("synthetic") root node that
+    // uses every node. Because the graph is directed and nothing points into
+    // the root, it will not participate in any SCCs (except for its own).
+    ArgumentGraphNode SyntheticRoot;
+
+  public:
+    ArgumentGraph() { SyntheticRoot.Definition = 0; }
+
+    typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator;
+
+    iterator begin() { return SyntheticRoot.Uses.begin(); }
+    iterator end() { return SyntheticRoot.Uses.end(); }
+    ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+
+    ArgumentGraphNode *operator[](Argument *A) {
+      ArgumentGraphNode &Node = ArgumentMap[A];
+      Node.Definition = A;
+      SyntheticRoot.Uses.push_back(&Node);
+      return &Node;
+    }
+  };
+
+  // This tracker checks whether callees are in the SCC, and if so it does not
+  // consider that a capture, instead adding it to the "Uses" list and
+  // continuing with the analysis.
+  struct ArgumentUsesTracker : public CaptureTracker {
+    ArgumentUsesTracker(const SmallPtrSet<Function*, 8> &SCCNodes)
+      : Captured(false), SCCNodes(SCCNodes) {}
+
+    void tooManyUses() { Captured = true; }
+
+    bool shouldExplore(Use *U) { return true; }
+
+    bool captured(Use *U) {
+      CallSite CS(U->getUser());
+      if (!CS.getInstruction()) { Captured = true; return true; }
+
+      Function *F = CS.getCalledFunction();
+      if (!F || !SCCNodes.count(F)) { Captured = true; return true; }
+
+      Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+      for (CallSite::arg_iterator PI = CS.arg_begin(), PE = CS.arg_end();
+           PI != PE; ++PI, ++AI) {
+        if (AI == AE) {
+          assert(F->isVarArg() && "More params than args in non-varargs call");
+          Captured = true;
+          return true;
+        }
+        if (PI == U) {
+          Uses.push_back(AI);
+          break;
+        }
+      }
+      assert(!Uses.empty() && "Capturing call-site captured nothing?");
+      return false;
+    }
+
+    bool Captured;  // True only if certainly captured (used outside our SCC).
+    SmallVector<Argument*, 4> Uses;  // Uses within our SCC.
+
+    const SmallPtrSet<Function*, 8> &SCCNodes;
+  };
+}
+
+namespace llvm {
+  template<> struct GraphTraits<ArgumentGraphNode*> {
+    typedef ArgumentGraphNode NodeType;
+    typedef SmallVectorImpl<ArgumentGraphNode*>::iterator ChildIteratorType;
+
+    static inline NodeType *getEntryNode(NodeType *A) { return A; }
+    static inline ChildIteratorType child_begin(NodeType *N) {
+      return N->Uses.begin();
+    }
+    static inline ChildIteratorType child_end(NodeType *N) {
+      return N->Uses.end();
+    }
+  };
+  template<> struct GraphTraits<ArgumentGraph*>
+    : public GraphTraits<ArgumentGraphNode*> {
+    static NodeType *getEntryNode(ArgumentGraph *AG) {
+      return AG->getEntryNode();
+    }
+    static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
+      return AG->begin();
+    }
+    static ChildIteratorType nodes_end(ArgumentGraph *AG) {
+      return AG->end();
+    }
+  };
+}
+
 /// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
 bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
   bool Changed = false;
 
+  SmallPtrSet<Function*, 8> SCCNodes;
+
+  // Fill SCCNodes with the elements of the SCC.  Used for quickly
+  // looking up whether a given CallGraphNode is in this SCC.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (F && !F->isDeclaration() && !F->mayBeOverridden())
+      SCCNodes.insert(F);
+  }
+
+  ArgumentGraph AG;
+
   // Check each function in turn, determining which pointer arguments are not
   // captured.
   for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
     Function *F = (*I)->getFunction();
 
     if (F == 0)
-      // External node - skip it;
+      // External node - only a problem for arguments that we pass to it.
       continue;
 
     // Definitions with weak linkage may be overridden at linktime with
-    // something that writes memory, so treat them like declarations.
+    // something that captures pointers, so treat them like declarations.
     if (F->isDeclaration() || F->mayBeOverridden())
       continue;
 
+    // Functions that are readonly (or readnone) and nounwind and don't return
+    // a value can't capture arguments. Don't analyze them.
+    if (F->onlyReadsMemory() && F->doesNotThrow() &&
+        F->getReturnType()->isVoidTy()) {
+      for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
+           A != E; ++A) {
+        if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
+          A->addAttr(Attribute::NoCapture);
+          ++NumNoCapture;
+          Changed = true;
+        }
+      }
+      continue;
+    }
+
     for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
-      if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() &&
-          !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) {
-        A->addAttr(Attribute::NoCapture);
+      if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
+        ArgumentUsesTracker Tracker(SCCNodes);
+        PointerMayBeCaptured(A, &Tracker);
+        if (!Tracker.Captured) {
+          if (Tracker.Uses.empty()) {
+            // If it's trivially not captured, mark it nocapture now.
+            A->addAttr(Attribute::NoCapture);
+            ++NumNoCapture;
+            Changed = true;
+          } else {
+            // If it's not trivially captured and not trivially not captured,
+            // then it must be calling into another function in our SCC. Save
+            // its particulars for Argument-SCC analysis later.
+            ArgumentGraphNode *Node = AG[A];
+            for (SmallVectorImpl<Argument*>::iterator UI = Tracker.Uses.begin(),
+                   UE = Tracker.Uses.end(); UI != UE; ++UI)
+              Node->Uses.push_back(AG[*UI]);
+          }
+        }
+        // Otherwise, it's captured. Don't bother doing SCC analysis on it.
+      }
+  }
+
+  // The graph we've collected is partial because we stopped scanning for
+  // argument uses once we solved the argument trivially. These partial nodes
+  // show up as ArgumentGraphNode objects with an empty Uses list, and for
+  // these nodes the final decision about whether they capture has already been
+  // made.  If the definition doesn't have a 'nocapture' attribute by now, it
+  // captures.
+
+  for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG), E = scc_end(&AG);
+       I != E; ++I) {
+    std::vector<ArgumentGraphNode*> &ArgumentSCC = *I;
+    if (ArgumentSCC.size() == 1) {
+      if (!ArgumentSCC[0]->Definition) continue;  // synthetic root node
+
+      // eg. "void f(int* x) { if (...) f(x); }"
+      if (ArgumentSCC[0]->Uses.size() == 1 &&
+          ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
+        ArgumentSCC[0]->Definition->addAttr(Attribute::NoCapture);
         ++NumNoCapture;
         Changed = true;
       }
+      continue;
+    }
+
+    bool SCCCaptured = false;
+    for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
+           E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+      ArgumentGraphNode *Node = *I;
+      if (Node->Uses.empty()) {
+        if (!Node->Definition->hasNoCaptureAttr())
+          SCCCaptured = true;
+      }
+    }
+    if (SCCCaptured) continue;
+
+    SmallPtrSet<Argument*, 8> ArgumentSCCNodes;
+    // Fill ArgumentSCCNodes with the elements of the ArgumentSCC.  Used for
+    // quickly looking up whether a given Argument is in this ArgumentSCC.
+    for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
+           E = ArgumentSCC.end(); I != E; ++I) {
+      ArgumentSCCNodes.insert((*I)->Definition);
+    }
+
+    for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
+           E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+      ArgumentGraphNode *N = *I;
+      for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(),
+             UE = N->Uses.end(); UI != UE; ++UI) {
+        Argument *A = (*UI)->Definition;
+        if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
+          continue;
+        SCCCaptured = true;
+        break;
+      }
+    }
+    if (SCCCaptured) continue;
+
+    for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
+      Argument *A = ArgumentSCC[i]->Definition;
+      A->addAttr(Attribute::NoCapture);
+      ++NumNoCapture;
+      Changed = true;
+    }
   }
 
   return Changed;
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 3552d03919ba..1522aa408b6b 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -61,6 +62,7 @@ namespace {
   struct GlobalStatus;
   struct GlobalOpt : public ModulePass {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetLibraryInfo>();
     }
     static char ID; // Pass identification, replacement for typeid
     GlobalOpt() : ModulePass(ID) {
@@ -80,11 +82,17 @@ namespace {
                                const SmallPtrSet<const PHINode*, 16> &PHIUsers,
                                const GlobalStatus &GS);
     bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
+
+    TargetData *TD;
+    TargetLibraryInfo *TLI;
   };
 }
 
 char GlobalOpt::ID = 0;
-INITIALIZE_PASS(GlobalOpt, "globalopt",
+INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
+                "Global Variable Optimizer", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(GlobalOpt, "globalopt",
                 "Global Variable Optimizer", false, false)
 
 ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
@@ -143,18 +151,31 @@ struct GlobalStatus {
   /// HasPHIUser - Set to true if this global has a user that is a PHI node.
   bool HasPHIUser;
 
+  /// AtomicOrdering - Set to the strongest atomic ordering requirement.
+  AtomicOrdering Ordering;
+
   GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
                    StoredOnceValue(0), AccessingFunction(0),
-                   HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
-                   HasPHIUser(false) {}
+                   HasMultipleAccessingFunctions(false),
+                   HasNonInstructionUser(false), HasPHIUser(false),
+                   Ordering(NotAtomic) {}
 };
 
 }
 
-// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
-// by constants itself.  Note that constants cannot be cyclic, so this test is
-// pretty easy to implement recursively.
-//
+/// StrongerOrdering - Return the stronger of the two ordering. If the two
+/// orderings are acquire and release, then return AcquireRelease.
+///
+static AtomicOrdering StrongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
+  if (X == Acquire && Y == Release) return AcquireRelease;
+  if (Y == Acquire && X == Release) return AcquireRelease;
+  return (AtomicOrdering)std::max(X, Y);
+}
+
+/// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+/// by constants itself.  Note that constants cannot be cyclic, so this test is
+/// pretty easy to implement recursively.
+///
 static bool SafeToDestroyConstant(const Constant *C) {
   if (isa<GlobalValue>(C)) return false;
 
@@ -195,14 +216,16 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
       }
       if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
         GS.isLoaded = true;
-        // Don't hack on volatile/atomic loads.
-        if (!LI->isSimple()) return true;
+        // Don't hack on volatile loads.
+        if (LI->isVolatile()) return true;
+        GS.Ordering = StrongerOrdering(GS.Ordering, LI->getOrdering());
       } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
         // Don't allow a store OF the address, only stores TO the address.
         if (SI->getOperand(0) == V) return true;
 
-        // Don't hack on volatile/atomic stores.
-        if (!SI->isSimple()) return true;
+        // Don't hack on volatile stores.
+        if (SI->isVolatile()) return true;
+        GS.Ordering = StrongerOrdering(GS.Ordering, SI->getOrdering());
 
         // If this is a direct store to the global (i.e., the global is a scalar
         // value, not an aggregate), keep more specific information about
@@ -271,43 +294,12 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
   return false;
 }
 
-static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
-  ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
-  if (!CI) return 0;
-  unsigned IdxV = CI->getZExtValue();
-
-  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) {
-    if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV);
-  } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) {
-    if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV);
-  } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) {
-    if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV);
-  } else if (isa<ConstantAggregateZero>(Agg)) {
-    if (StructType *STy = dyn_cast<StructType>(Agg->getType())) {
-      if (IdxV < STy->getNumElements())
-        return Constant::getNullValue(STy->getElementType(IdxV));
-    } else if (SequentialType *STy =
-               dyn_cast<SequentialType>(Agg->getType())) {
-      return Constant::getNullValue(STy->getElementType());
-    }
-  } else if (isa<UndefValue>(Agg)) {
-    if (StructType *STy = dyn_cast<StructType>(Agg->getType())) {
-      if (IdxV < STy->getNumElements())
-        return UndefValue::get(STy->getElementType(IdxV));
-    } else if (SequentialType *STy =
-               dyn_cast<SequentialType>(Agg->getType())) {
-      return UndefValue::get(STy->getElementType());
-    }
-  }
-  return 0;
-}
-
-
 /// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
 /// users of the global, cleaning up the obvious ones.  This is largely just a
 /// quick scan over the use list to clean up the easy and obvious cruft.  This
 /// returns true if it made a change.
-static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
+                                       TargetData *TD, TargetLibraryInfo *TLI) {
   bool Changed = false;
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
     User *U = *UI++;
@@ -328,11 +320,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
         Constant *SubInit = 0;
         if (Init)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
-        Changed |= CleanupConstantGlobalUsers(CE, SubInit);
+        Changed |= CleanupConstantGlobalUsers(CE, SubInit, TD, TLI);
       } else if (CE->getOpcode() == Instruction::BitCast &&
                  CE->getType()->isPointerTy()) {
         // Pointer cast, delete any stores and memsets to the global.
-        Changed |= CleanupConstantGlobalUsers(CE, 0);
+        Changed |= CleanupConstantGlobalUsers(CE, 0, TD, TLI);
       }
 
       if (CE->use_empty()) {
@@ -346,11 +338,17 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
       Constant *SubInit = 0;
       if (!isa<ConstantExpr>(GEP->getOperand(0))) {
         ConstantExpr *CE =
-          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
+          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, TD, TLI));
         if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
           SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+
+        // If the initializer is an all-null value and we have an inbounds GEP,
+        // we already know what the result of any load from that GEP is.
+        // TODO: Handle splats.
+        if (Init && isa<ConstantAggregateZero>(Init) && GEP->isInBounds())
+          SubInit = Constant::getNullValue(GEP->getType()->getElementType());
       }
-      Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
+      Changed |= CleanupConstantGlobalUsers(GEP, SubInit, TD, TLI);
 
       if (GEP->use_empty()) {
         GEP->eraseFromParent();
@@ -368,7 +366,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
       if (SafeToDestroyConstant(C)) {
         C->destroyConstant();
         // This could have invalidated UI, start over from scratch.
-        CleanupConstantGlobalUsers(V, Init);
+        CleanupConstantGlobalUsers(V, Init, TD, TLI);
         return true;
       }
     }
@@ -514,8 +512,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
     NewGlobals.reserve(STy->getNumElements());
     const StructLayout &Layout = *TD.getStructLayout(STy);
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      Constant *In = getAggregateConstantElement(Init,
-                    ConstantInt::get(Type::getInt32Ty(STy->getContext()), i));
+      Constant *In = Init->getAggregateElement(i);
       assert(In && "Couldn't get element of initializer?");
       GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
                                                GlobalVariable::InternalLinkage,
@@ -547,8 +544,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
     uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
     unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
     for (unsigned i = 0, e = NumElements; i != e; ++i) {
-      Constant *In = getAggregateConstantElement(Init,
-                    ConstantInt::get(Type::getInt32Ty(Init->getContext()), i));
+      Constant *In = Init->getAggregateElement(i);
       assert(In && "Couldn't get element of initializer?");
 
       GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
@@ -770,7 +766,9 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
 /// value stored into it.  If there are uses of the loaded value that would trap
 /// if the loaded value is dynamically null, then we know that they cannot be
 /// reachable with a null optimize away the load.
-static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
+                                            TargetData *TD,
+                                            TargetLibraryInfo *TLI) {
   bool Changed = false;
 
   // Keep track of whether we are able to remove all the uses of the global
@@ -813,7 +811,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
   // nor is the global.
   if (AllNonStoreUsesGone) {
     DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
-    CleanupConstantGlobalUsers(GV, 0);
+    CleanupConstantGlobalUsers(GV, 0, TD, TLI);
     if (GV->use_empty()) {
       GV->eraseFromParent();
       ++NumDeleted;
@@ -825,10 +823,11 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
 
 /// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
 /// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V) {
+static void ConstantPropUsersOf(Value *V,
+                                TargetData *TD, TargetLibraryInfo *TLI) {
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
     if (Instruction *I = dyn_cast<Instruction>(*UI++))
-      if (Constant *NewC = ConstantFoldInstruction(I)) {
+      if (Constant *NewC = ConstantFoldInstruction(I, TD, TLI)) {
         I->replaceAllUsesWith(NewC);
 
         // Advance UI to the next non-I use to avoid invalidating it!
@@ -848,7 +847,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                                      CallInst *CI,
                                                      Type *AllocTy,
                                                      ConstantInt *NElements,
-                                                     TargetData* TD) {
+                                                     TargetData *TD,
+                                                     TargetLibraryInfo *TLI) {
   DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
 
   Type *GlobalType;
@@ -906,7 +906,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   while (!GV->use_empty()) {
     if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
       // The global is initialized when the store to it occurs.
-      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
+      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, false, 0,
+                    SI->getOrdering(), SI->getSynchScope(), SI);
       SI->eraseFromParent();
       continue;
     }
@@ -921,7 +922,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
 
       ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
       // Replace the cmp X, 0 with a use of the bool value.
-      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+      // Sink the load to where the compare was, if atomic rules allow us to.
+      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", false, 0,
+                               LI->getOrdering(), LI->getSynchScope(),
+                               LI->isUnordered() ? (Instruction*)ICI : LI);
       InitBoolUsed = true;
       switch (ICI->getPredicate()) {
       default: llvm_unreachable("Unknown ICmp Predicate!");
@@ -962,9 +966,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
   // To further other optimizations, loop over all users of NewGV and try to
   // constant prop them.  This will promote GEP instructions with constant
   // indices into GEP constant-exprs, which will allow global-opt to hack on it.
-  ConstantPropUsersOf(NewGV);
+  ConstantPropUsersOf(NewGV, TD, TLI);
   if (RepValue != NewGV)
-    ConstantPropUsersOf(RepValue);
+    ConstantPropUsersOf(RepValue, TD, TLI);
 
   return NewGV;
 }
@@ -1203,7 +1207,6 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
     PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
   } else {
     llvm_unreachable("Unknown usable value");
-    Result = 0;
   }
 
   return FieldVals[FieldNo] = Result;
@@ -1293,9 +1296,9 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
 /// it up into multiple allocations of arrays of the fields.
 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
-                                            Value* NElems, TargetData *TD) {
+                                            Value *NElems, TargetData *TD) {
   DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
-  Type* MAT = getMallocAllocatedType(CI);
+  Type *MAT = getMallocAllocatedType(CI);
   StructType *STy = cast<StructType>(MAT);
 
   // There is guaranteed to be at least one use of the malloc (storing
@@ -1482,8 +1485,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
                                                CallInst *CI,
                                                Type *AllocTy,
+                                               AtomicOrdering Ordering,
                                                Module::global_iterator &GVI,
-                                               TargetData *TD) {
+                                               TargetData *TD,
+                                               TargetLibraryInfo *TLI) {
   if (!TD)
     return false;
 
@@ -1502,7 +1507,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
 
   // We can't optimize this if the malloc itself is used in a complex way,
   // for example, being stored into multiple globals.  This allows the
-  // malloc to be stored into the specified global, loaded setcc'd, and
+  // malloc to be stored into the specified global, loaded icmp'd, and
   // GEP'd.  These are all things we could transform to using the global
   // for.
   SmallPtrSet<const PHINode*, 8> PHIs;
@@ -1523,7 +1528,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
     // (2048 bytes currently), as we don't want to introduce a 16M global or
     // something.
     if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
-      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
+      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD, TLI);
       return true;
     }
 
@@ -1531,6 +1536,9 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
   // into multiple malloc'd arrays, one for each field.  This is basically
   // SRoA for malloc'd memory.
 
+  if (Ordering != NotAtomic)
+    return false;
+
   // If this is an allocation of a fixed size array of structs, analyze as a
   // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
   if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
@@ -1563,7 +1571,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
         extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
     }
 
-    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
+    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true), TD);
     return true;
   }
 
@@ -1573,8 +1581,9 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
 // OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
 // that only one value (besides its initializer) is ever stored to the global.
 static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+                                     AtomicOrdering Ordering,
                                      Module::global_iterator &GVI,
-                                     TargetData *TD) {
+                                     TargetData *TD, TargetLibraryInfo *TLI) {
   // Ignore no-op GEPs and bitcasts.
   StoredOnceVal = StoredOnceVal->stripPointerCasts();
 
@@ -1589,12 +1598,13 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
         SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
 
       // Optimize away any trapping uses of the loaded value.
-      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
+      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, TD, TLI))
         return true;
     } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
-      Type* MallocType = getMallocAllocatedType(CI);
-      if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
-                                                           GVI, TD))
+      Type *MallocType = getMallocAllocatedType(CI);
+      if (MallocType &&
+          TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
+                                             TD, TLI))
         return true;
     }
   }
@@ -1670,7 +1680,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
         if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
           assert(LI->getOperand(0) == GV && "Not a copy!");
           // Insert a new load, to preserve the saved value.
-          StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI);
+          StoreVal = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+                                  LI->getOrdering(), LI->getSynchScope(), LI);
         } else {
           assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
                  "This is not a form that we understand!");
@@ -1678,11 +1689,13 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
           assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
         }
       }
-      new StoreInst(StoreVal, NewGV, SI);
+      new StoreInst(StoreVal, NewGV, false, 0,
+                    SI->getOrdering(), SI->getSynchScope(), SI);
     } else {
       // Change the load into a load of bool then a select.
       LoadInst *LI = cast<LoadInst>(UI);
-      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI);
+      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", false, 0,
+                                   LI->getOrdering(), LI->getSynchScope(), LI);
       Value *NSI;
       if (IsOneZero)
         NSI = new ZExtInst(NLI, LI->getType(), "", LI);
@@ -1699,8 +1712,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
 }
 
 
-/// ProcessInternalGlobal - Analyze the specified global variable and optimize
-/// it if possible.  If we make a change, return true.
+/// ProcessGlobal - Analyze the specified global variable and optimize it if
+/// possible.  If we make a change, return true.
 bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
                               Module::global_iterator &GVI) {
   if (!GV->hasLocalLinkage())
@@ -1737,7 +1750,7 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
 /// it if possible.  If we make a change, return true.
 bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
                                       Module::global_iterator &GVI,
-                                      const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+                                const SmallPtrSet<const PHINode*, 16> &PHIUsers,
                                       const GlobalStatus &GS) {
   // If this is a first class global and has only one accessing function
   // and this function is main (which we know is not recursive we can make
@@ -1755,11 +1768,11 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
       GS.AccessingFunction->hasExternalLinkage() &&
       GV->getType()->getAddressSpace() == 0) {
     DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
-    Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+    Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
                                                    ->getEntryBlock().begin());
-    Type* ElemTy = GV->getType()->getElementType();
+    Type *ElemTy = GV->getType()->getElementType();
     // FIXME: Pass Global's alignment when globals have alignment
-    AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+    AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
     if (!isa<UndefValue>(GV->getInitializer()))
       new StoreInst(GV->getInitializer(), Alloca, &FirstI);
 
@@ -1776,7 +1789,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
 
     // Delete any stores we can find to the global.  We may not be able to
     // make it completely dead though.
-    bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+    bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(),
+                                              TD, TLI);
 
     // If the global is dead now, delete it.
     if (GV->use_empty()) {
@@ -1791,7 +1805,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     GV->setConstant(true);
 
     // Clean up any obviously simplifiable users now.
-    CleanupConstantGlobalUsers(GV, GV->getInitializer());
+    CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
 
     // If the global is dead now, just nuke it.
     if (GV->use_empty()) {
@@ -1820,7 +1834,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
         GV->setInitializer(SOVConstant);
 
         // Clean up any obviously simplifiable users now.
-        CleanupConstantGlobalUsers(GV, GV->getInitializer());
+        CleanupConstantGlobalUsers(GV, GV->getInitializer(), TD, TLI);
 
         if (GV->use_empty()) {
           DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
@@ -1836,8 +1850,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
 
     // Try to optimize globals based on the knowledge that only one value
     // (besides its initializer) is ever stored to the global.
-    if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
-                                 getAnalysisIfAvailable<TargetData>()))
+    if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
+                                 TD, TLI))
       return true;
 
     // Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -1890,7 +1904,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
     if (!F->hasName() && !F->isDeclaration())
       F->setLinkage(GlobalValue::InternalLinkage);
     F->removeDeadConstantUsers();
-    if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) {
+    if (F->isDefTriviallyDead()) {
       F->eraseFromParent();
       Changed = true;
       ++NumFnDeleted;
@@ -1930,8 +1944,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
     // Simplify the initializer.
     if (GV->hasInitializer())
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
-        TargetData *TD = getAnalysisIfAvailable<TargetData>();
-        Constant *New = ConstantFoldConstantExpression(CE, TD);
+        Constant *New = ConstantFoldConstantExpression(CE, TD, TLI);
         if (New && New != CE)
           GV->setInitializer(New);
       }
@@ -2052,16 +2065,10 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
 }
 
 
-static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) {
-  if (Constant *CV = dyn_cast<Constant>(V)) return CV;
-  Constant *R = ComputedValues[V];
-  assert(R && "Reference to an uncomputed value!");
-  return R;
-}
-
 static inline bool 
 isSimpleEnoughValueToCommit(Constant *C,
-                            SmallPtrSet<Constant*, 8> &SimpleConstants);
+                            SmallPtrSet<Constant*, 8> &SimpleConstants,
+                            const TargetData *TD);
 
 
 /// isSimpleEnoughValueToCommit - Return true if the specified constant can be
@@ -2073,7 +2080,8 @@ isSimpleEnoughValueToCommit(Constant *C,
 /// in SimpleConstants to avoid having to rescan the same constants all the
 /// time.
 static bool isSimpleEnoughValueToCommitHelper(Constant *C,
-                                   SmallPtrSet<Constant*, 8> &SimpleConstants) {
+                                   SmallPtrSet<Constant*, 8> &SimpleConstants,
+                                   const TargetData *TD) {
   // Simple integer, undef, constant aggregate zero, global addresses, etc are
   // all supported.
   if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
@@ -2085,7 +2093,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
       isa<ConstantVector>(C)) {
     for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
       Constant *Op = cast<Constant>(C->getOperand(i));
-      if (!isSimpleEnoughValueToCommit(Op, SimpleConstants))
+      if (!isSimpleEnoughValueToCommit(Op, SimpleConstants, TD))
         return false;
     }
     return true;
@@ -2097,34 +2105,42 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
   ConstantExpr *CE = cast<ConstantExpr>(C);
   switch (CE->getOpcode()) {
   case Instruction::BitCast:
+    // Bitcast is fine if the casted value is fine.
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
+
   case Instruction::IntToPtr:
   case Instruction::PtrToInt:
-    // These casts are always fine if the casted value is.
-    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+    // int <=> ptr is fine if the int type is the same size as the
+    // pointer type.
+    if (!TD || TD->getTypeSizeInBits(CE->getType()) !=
+               TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
+      return false;
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
       
   // GEP is fine if it is simple + constant offset.
   case Instruction::GetElementPtr:
     for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
       if (!isa<ConstantInt>(CE->getOperand(i)))
         return false;
-    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
       
   case Instruction::Add:
     // We allow simple+cst.
     if (!isa<ConstantInt>(CE->getOperand(1)))
       return false;
-    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
   }
   return false;
 }
 
 static inline bool 
 isSimpleEnoughValueToCommit(Constant *C,
-                            SmallPtrSet<Constant*, 8> &SimpleConstants) {
+                            SmallPtrSet<Constant*, 8> &SimpleConstants,
+                            const TargetData *TD) {
   // If we already checked this constant, we win.
   if (!SimpleConstants.insert(C)) return true;
   // Check the constant.
-  return isSimpleEnoughValueToCommitHelper(C, SimpleConstants);
+  return isSimpleEnoughValueToCommitHelper(C, SimpleConstants, TD);
 }
 
 
@@ -2191,23 +2207,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     return Val;
   }
 
-  std::vector<Constant*> Elts;
+  SmallVector<Constant*, 32> Elts;
   if (StructType *STy = dyn_cast<StructType>(Init->getType())) {
-
     // Break up the constant into its elements.
-    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
-      for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i)
-        Elts.push_back(cast<Constant>(*i));
-    } else if (isa<ConstantAggregateZero>(Init)) {
-      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
-        Elts.push_back(Constant::getNullValue(STy->getElementType(i)));
-    } else if (isa<UndefValue>(Init)) {
-      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
-        Elts.push_back(UndefValue::get(STy->getElementType(i)));
-    } else {
-      llvm_unreachable("This code is out of sync with "
-             " ConstantFoldLoadThroughGEPConstantExpr");
-    }
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+      Elts.push_back(Init->getAggregateElement(i));
 
     // Replace the element that we are supposed to.
     ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
@@ -2226,22 +2230,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
   if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
     NumElts = ATy->getNumElements();
   else
-    NumElts = cast<VectorType>(InitTy)->getNumElements();
+    NumElts = InitTy->getVectorNumElements();
 
   // Break up the array into elements.
-  if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
-    for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
-      Elts.push_back(cast<Constant>(*i));
-  } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
-    for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
-      Elts.push_back(cast<Constant>(*i));
-  } else if (isa<ConstantAggregateZero>(Init)) {
-    Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
-  } else {
-    assert(isa<UndefValue>(Init) && "This code is out of sync with "
-           " ConstantFoldLoadThroughGEPConstantExpr");
-    Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
-  }
+  for (uint64_t i = 0, e = NumElts; i != e; ++i)
+    Elts.push_back(Init->getAggregateElement(i));
 
   assert(CI->getZExtValue() < NumElts);
   Elts[CI->getZExtValue()] =
@@ -2266,15 +2259,109 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
   GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
 }
 
+namespace {
+
+/// Evaluator - This class evaluates LLVM IR, producing the Constant
+/// representing each SSA instruction.  Changes to global variables are stored
+/// in a mapping that can be iterated over after the evaluation is complete.
+/// Once an evaluation call fails, the evaluation object should not be reused.
+class Evaluator {
+public:
+  Evaluator(const TargetData *TD, const TargetLibraryInfo *TLI)
+    : TD(TD), TLI(TLI) {
+    ValueStack.push_back(new DenseMap<Value*, Constant*>);
+  }
+
+  ~Evaluator() {
+    DeleteContainerPointers(ValueStack);
+    while (!AllocaTmps.empty()) {
+      GlobalVariable *Tmp = AllocaTmps.back();
+      AllocaTmps.pop_back();
+
+      // If there are still users of the alloca, the program is doing something
+      // silly, e.g. storing the address of the alloca somewhere and using it
+      // later.  Since this is undefined, we'll just make it be null.
+      if (!Tmp->use_empty())
+        Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
+      delete Tmp;
+    }
+  }
+
+  /// EvaluateFunction - Evaluate a call to function F, returning true if
+  /// successful, false if we can't evaluate it.  ActualArgs contains the formal
+  /// arguments for the function.
+  bool EvaluateFunction(Function *F, Constant *&RetVal,
+                        const SmallVectorImpl<Constant*> &ActualArgs);
+
+  /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
+  /// successful, false if we can't evaluate it.  NewBB returns the next BB that
+  /// control flows into, or null upon return.
+  bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
+
+  Constant *getVal(Value *V) {
+    if (Constant *CV = dyn_cast<Constant>(V)) return CV;
+    Constant *R = ValueStack.back()->lookup(V);
+    assert(R && "Reference to an uncomputed value!");
+    return R;
+  }
+
+  void setVal(Value *V, Constant *C) {
+    ValueStack.back()->operator[](V) = C;
+  }
+
+  const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
+    return MutatedMemory;
+  }
+
+  const SmallPtrSet<GlobalVariable*, 8> &getInvariants() const {
+    return Invariants;
+  }
+
+private:
+  Constant *ComputeLoadResult(Constant *P);
+
+  /// ValueStack - As we compute SSA register values, we store their contents
+  /// here. The back of the vector contains the current function and the stack
+  /// contains the values in the calling frames.
+  SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
+
+  /// CallStack - This is used to detect recursion.  In pathological situations
+  /// we could hit exponential behavior, but at least there is nothing
+  /// unbounded.
+  SmallVector<Function*, 4> CallStack;
+
+  /// MutatedMemory - For each store we execute, we update this map.  Loads
+  /// check this to get the most up-to-date value.  If evaluation is successful,
+  /// this state is committed to the process.
+  DenseMap<Constant*, Constant*> MutatedMemory;
+
+  /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
+  /// to represent its body.  This vector is needed so we can delete the
+  /// temporary globals when we are done.
+  SmallVector<GlobalVariable*, 32> AllocaTmps;
+
+  /// Invariants - These global variables have been marked invariant by the
+  /// static constructor.
+  SmallPtrSet<GlobalVariable*, 8> Invariants;
+
+  /// SimpleConstants - These are constants we have checked and know to be
+  /// simple enough to live in a static initializer of a global.
+  SmallPtrSet<Constant*, 8> SimpleConstants;
+
+  const TargetData *TD;
+  const TargetLibraryInfo *TLI;
+};
+
+}  // anonymous namespace
+
 /// ComputeLoadResult - Return the value that would be computed by a load from
 /// P after the stores reflected by 'memory' have been performed.  If we can't
 /// decide, return null.
-static Constant *ComputeLoadResult(Constant *P,
-                                const DenseMap<Constant*, Constant*> &Memory) {
+Constant *Evaluator::ComputeLoadResult(Constant *P) {
   // If this memory location has been recently stored, use the stored value: it
   // is the most up-to-date.
-  DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
-  if (I != Memory.end()) return I->second;
+  DenseMap<Constant*, Constant*>::const_iterator I = MutatedMemory.find(P);
+  if (I != MutatedMemory.end()) return I->second;
 
   // Access it.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
@@ -2295,56 +2382,29 @@ static Constant *ComputeLoadResult(Constant *P,
   return 0;  // don't know how to evaluate.
 }
 
-/// EvaluateFunction - Evaluate a call to function F, returning true if
-/// successful, false if we can't evaluate it.  ActualArgs contains the formal
-/// arguments for the function.
-static bool EvaluateFunction(Function *F, Constant *&RetVal,
-                             const SmallVectorImpl<Constant*> &ActualArgs,
-                             std::vector<Function*> &CallStack,
-                             DenseMap<Constant*, Constant*> &MutatedMemory,
-                             std::vector<GlobalVariable*> &AllocaTmps,
-                             SmallPtrSet<Constant*, 8> &SimpleConstants,
-                             const TargetData *TD) {
-  // Check to see if this function is already executing (recursion).  If so,
-  // bail out.  TODO: we might want to accept limited recursion.
-  if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
-    return false;
-
-  CallStack.push_back(F);
-
-  /// Values - As we compute SSA register values, we store their contents here.
-  DenseMap<Value*, Constant*> Values;
-
-  // Initialize arguments to the incoming values specified.
-  unsigned ArgNo = 0;
-  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
-       ++AI, ++ArgNo)
-    Values[AI] = ActualArgs[ArgNo];
-
-  /// ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
-  /// we can only evaluate any one basic block at most once.  This set keeps
-  /// track of what we have executed so we can detect recursive cases etc.
-  SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
-
-  // CurInst - The current instruction we're evaluating.
-  BasicBlock::iterator CurInst = F->begin()->begin();
-
+/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
+/// successful, false if we can't evaluate it.  NewBB returns the next BB that
+/// control flows into, or null upon return.
+bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
+                              BasicBlock *&NextBB) {
   // This is the main evaluation loop.
   while (1) {
     Constant *InstResult = 0;
 
     if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
       if (!SI->isSimple()) return false;  // no volatile/atomic accesses.
-      Constant *Ptr = getVal(Values, SI->getOperand(1));
+      Constant *Ptr = getVal(SI->getOperand(1));
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+        Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
       if (!isSimpleEnoughPointerToCommit(Ptr))
         // If this is too complex for us to commit, reject it.
         return false;
       
-      Constant *Val = getVal(Values, SI->getOperand(0));
+      Constant *Val = getVal(SI->getOperand(0));
 
       // If this might be too difficult for the backend to handle (e.g. the addr
       // of one global variable divided by another) then we can't commit it.
-      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants))
+      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD))
         return false;
         
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
@@ -2354,7 +2414,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
           // stored value.
           Ptr = CE->getOperand(0);
           
-          Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType();
+          Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
           
           // In order to push the bitcast onto the stored value, a bitcast
           // from NewTy to Val's type must be legal.  If it's not, we can try
@@ -2366,16 +2426,18 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
             if (StructType *STy = dyn_cast<StructType>(NewTy)) {
               NewTy = STy->getTypeAtIndex(0U);
 
-              IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32);
+              IntegerType *IdxTy = IntegerType::get(NewTy->getContext(), 32);
               Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
               Constant * const IdxList[] = {IdxZero, IdxZero};
 
               Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList);
-            
+              if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+                Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+
             // If we can't improve the situation by introspecting NewTy,
             // we have to give up.
             } else {
-              return 0;
+              return false;
             }
           }
           
@@ -2387,33 +2449,35 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       MutatedMemory[Ptr] = Val;
     } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
       InstResult = ConstantExpr::get(BO->getOpcode(),
-                                     getVal(Values, BO->getOperand(0)),
-                                     getVal(Values, BO->getOperand(1)));
+                                     getVal(BO->getOperand(0)),
+                                     getVal(BO->getOperand(1)));
     } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
       InstResult = ConstantExpr::getCompare(CI->getPredicate(),
-                                            getVal(Values, CI->getOperand(0)),
-                                            getVal(Values, CI->getOperand(1)));
+                                            getVal(CI->getOperand(0)),
+                                            getVal(CI->getOperand(1)));
     } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
       InstResult = ConstantExpr::getCast(CI->getOpcode(),
-                                         getVal(Values, CI->getOperand(0)),
+                                         getVal(CI->getOperand(0)),
                                          CI->getType());
     } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
-      InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
-                                           getVal(Values, SI->getOperand(1)),
-                                           getVal(Values, SI->getOperand(2)));
+      InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),
+                                           getVal(SI->getOperand(1)),
+                                           getVal(SI->getOperand(2)));
     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
-      Constant *P = getVal(Values, GEP->getOperand(0));
+      Constant *P = getVal(GEP->getOperand(0));
       SmallVector<Constant*, 8> GEPOps;
       for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
            i != e; ++i)
-        GEPOps.push_back(getVal(Values, *i));
+        GEPOps.push_back(getVal(*i));
       InstResult =
         ConstantExpr::getGetElementPtr(P, GEPOps,
                                        cast<GEPOperator>(GEP)->isInBounds());
     } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
       if (!LI->isSimple()) return false;  // no volatile/atomic accesses.
-      InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
-                                     MutatedMemory);
+      Constant *Ptr = getVal(LI->getOperand(0));
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+        Ptr = ConstantFoldConstantExpression(CE, TD, TLI);
+      InstResult = ComputeLoadResult(Ptr);
       if (InstResult == 0) return false; // Could not evaluate load.
     } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
       if (AI->isArrayAllocation()) return false;  // Cannot handle array allocs.
@@ -2423,25 +2487,53 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
                                               UndefValue::get(Ty),
                                               AI->getName()));
       InstResult = AllocaTmps.back();
-    } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
+    } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
+      CallSite CS(CurInst);
 
       // Debug info can safely be ignored here.
-      if (isa<DbgInfoIntrinsic>(CI)) {
+      if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
         ++CurInst;
         continue;
       }
 
       // Cannot handle inline asm.
-      if (isa<InlineAsm>(CI->getCalledValue())) return false;
-
-      if (MemSetInst *MSI = dyn_cast<MemSetInst>(CI)) {
-        if (MSI->isVolatile()) return false;
-        Constant *Ptr = getVal(Values, MSI->getDest());
-        Constant *Val = getVal(Values, MSI->getValue());
-        Constant *DestVal = ComputeLoadResult(getVal(Values, Ptr),
-                                              MutatedMemory);
-        if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
-          // This memset is a no-op.
+      if (isa<InlineAsm>(CS.getCalledValue())) return false;
+
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
+        if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {
+          if (MSI->isVolatile()) return false;
+          Constant *Ptr = getVal(MSI->getDest());
+          Constant *Val = getVal(MSI->getValue());
+          Constant *DestVal = ComputeLoadResult(getVal(Ptr));
+          if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {
+            // This memset is a no-op.
+            ++CurInst;
+            continue;
+          }
+        }
+
+        if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+            II->getIntrinsicID() == Intrinsic::lifetime_end) {
+          ++CurInst;
+          continue;
+        }
+
+        if (II->getIntrinsicID() == Intrinsic::invariant_start) {
+          // We don't insert an entry into Values, as it doesn't have a
+          // meaningful return value.
+          if (!II->use_empty())
+            return false;
+          ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0));
+          Value *PtrArg = getVal(II->getArgOperand(1));
+          Value *Ptr = PtrArg->stripPointerCasts();
+          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+            Type *ElemTy = cast<PointerType>(GV->getType())->getElementType();
+            if (!Size->isAllOnesValue() &&
+                Size->getValue().getLimitedValue() >=
+                TD->getTypeStoreSize(ElemTy))
+              Invariants.insert(GV);
+          }
+          // Continue even if we do nothing.
           ++CurInst;
           continue;
         }
@@ -2449,19 +2541,17 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
       }
 
       // Resolve function pointers.
-      Function *Callee = dyn_cast<Function>(getVal(Values,
-                                                   CI->getCalledValue()));
-      if (!Callee) return false;  // Cannot resolve.
+      Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue()));
+      if (!Callee || Callee->mayBeOverridden())
+        return false;  // Cannot resolve.
 
       SmallVector<Constant*, 8> Formals;
-      CallSite CS(CI);
-      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end();
-           i != e; ++i)
-        Formals.push_back(getVal(Values, *i));
+      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i)
+        Formals.push_back(getVal(*i));
 
       if (Callee->isDeclaration()) {
         // If this is a function we can constant fold, do it.
-        if (Constant *C = ConstantFoldCall(Callee, Formals)) {
+        if (Constant *C = ConstantFoldCall(Callee, Formals, TLI)) {
           InstResult = C;
         } else {
           return false;
@@ -2472,62 +2562,43 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
 
         Constant *RetVal;
         // Execute the call, if successful, use the return value.
-        if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
-                              MutatedMemory, AllocaTmps, SimpleConstants, TD))
+        ValueStack.push_back(new DenseMap<Value*, Constant*>);
+        if (!EvaluateFunction(Callee, RetVal, Formals))
           return false;
+        delete ValueStack.pop_back_val();
         InstResult = RetVal;
       }
     } else if (isa<TerminatorInst>(CurInst)) {
-      BasicBlock *NewBB = 0;
       if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
         if (BI->isUnconditional()) {
-          NewBB = BI->getSuccessor(0);
+          NextBB = BI->getSuccessor(0);
         } else {
           ConstantInt *Cond =
-            dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
+            dyn_cast<ConstantInt>(getVal(BI->getCondition()));
           if (!Cond) return false;  // Cannot determine.
 
-          NewBB = BI->getSuccessor(!Cond->getZExtValue());
+          NextBB = BI->getSuccessor(!Cond->getZExtValue());
         }
       } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
         ConstantInt *Val =
-          dyn_cast<ConstantInt>(getVal(Values, SI->getCondition()));
+          dyn_cast<ConstantInt>(getVal(SI->getCondition()));
         if (!Val) return false;  // Cannot determine.
-        NewBB = SI->getSuccessor(SI->findCaseValue(Val));
+        NextBB = SI->findCaseValue(Val).getCaseSuccessor();
       } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
-        Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts();
+        Value *Val = getVal(IBI->getAddress())->stripPointerCasts();
         if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
-          NewBB = BA->getBasicBlock();
+          NextBB = BA->getBasicBlock();
         else
           return false;  // Cannot determine.
-      } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
-        if (RI->getNumOperands())
-          RetVal = getVal(Values, RI->getOperand(0));
-
-        CallStack.pop_back();  // return from fn.
-        return true;  // We succeeded at evaluating this ctor!
+      } else if (isa<ReturnInst>(CurInst)) {
+        NextBB = 0;
       } else {
         // invoke, unwind, resume, unreachable.
         return false;  // Cannot handle this terminator.
       }
 
-      // Okay, we succeeded in evaluating this control flow.  See if we have
-      // executed the new block before.  If so, we have a looping function,
-      // which we cannot evaluate in reasonable time.
-      if (!ExecutedBlocks.insert(NewBB))
-        return false;  // looped!
-
-      // Okay, we have never been in this block before.  Check to see if there
-      // are any PHI nodes.  If so, evaluate them with information about where
-      // we came from.
-      BasicBlock *OldBB = CurInst->getParent();
-      CurInst = NewBB->begin();
-      PHINode *PN;
-      for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
-        Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB));
-
-      // Do NOT increment CurInst.  We know that the terminator had no value.
-      continue;
+      // We succeeded at evaluating this block!
+      return true;
     } else {
       // Did not know how to evaluate this!
       return false;
@@ -2535,9 +2606,15 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
 
     if (!CurInst->use_empty()) {
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
-        InstResult = ConstantFoldConstantExpression(CE, TD);
+        InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
       
-      Values[CurInst] = InstResult;
+      setVal(CurInst, InstResult);
+    }
+
+    // If we just processed an invoke, we finished evaluating the block.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {
+      NextBB = II->getNormalDest();
+      return true;
     }
 
     // Advance program counter.
@@ -2545,64 +2622,96 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
   }
 }
 
-/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
-/// we can.  Return true if we can, false otherwise.
-static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
-  /// MutatedMemory - For each store we execute, we update this map.  Loads
-  /// check this to get the most up-to-date value.  If evaluation is successful,
-  /// this state is committed to the process.
-  DenseMap<Constant*, Constant*> MutatedMemory;
+/// EvaluateFunction - Evaluate a call to function F, returning true if
+/// successful, false if we can't evaluate it.  ActualArgs contains the formal
+/// arguments for the function.
+bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
+                                 const SmallVectorImpl<Constant*> &ActualArgs) {
+  // Check to see if this function is already executing (recursion).  If so,
+  // bail out.  TODO: we might want to accept limited recursion.
+  if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+    return false;
 
-  /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
-  /// to represent its body.  This vector is needed so we can delete the
-  /// temporary globals when we are done.
-  std::vector<GlobalVariable*> AllocaTmps;
+  CallStack.push_back(F);
 
-  /// CallStack - This is used to detect recursion.  In pathological situations
-  /// we could hit exponential behavior, but at least there is nothing
-  /// unbounded.
-  std::vector<Function*> CallStack;
+  // Initialize arguments to the incoming values specified.
+  unsigned ArgNo = 0;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+       ++AI, ++ArgNo)
+    setVal(AI, ActualArgs[ArgNo]);
 
-  /// SimpleConstants - These are constants we have checked and know to be
-  /// simple enough to live in a static initializer of a global.
-  SmallPtrSet<Constant*, 8> SimpleConstants;
-  
+  // ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
+  // we can only evaluate any one basic block at most once.  This set keeps
+  // track of what we have executed so we can detect recursive cases etc.
+  SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+  // CurBB - The current basic block we're evaluating.
+  BasicBlock *CurBB = F->begin();
+
+  BasicBlock::iterator CurInst = CurBB->begin();
+
+  while (1) {
+    BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
+    if (!EvaluateBlock(CurInst, NextBB))
+      return false;
+
+    if (NextBB == 0) {
+      // Successfully running until there's no next block means that we found
+      // the return.  Fill it the return value and pop the call stack.
+      ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
+      if (RI->getNumOperands())
+        RetVal = getVal(RI->getOperand(0));
+      CallStack.pop_back();
+      return true;
+    }
+
+    // Okay, we succeeded in evaluating this control flow.  See if we have
+    // executed the new block before.  If so, we have a looping function,
+    // which we cannot evaluate in reasonable time.
+    if (!ExecutedBlocks.insert(NextBB))
+      return false;  // looped!
+
+    // Okay, we have never been in this block before.  Check to see if there
+    // are any PHI nodes.  If so, evaluate them with information about where
+    // we came from.
+    PHINode *PN = 0;
+    for (CurInst = NextBB->begin();
+         (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+      setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
+
+    // Advance to the next block.
+    CurBB = NextBB;
+  }
+}
+
+/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
+/// we can.  Return true if we can, false otherwise.
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD,
+                                      const TargetLibraryInfo *TLI) {
   // Call the function.
+  Evaluator Eval(TD, TLI);
   Constant *RetValDummy;
-  bool EvalSuccess = EvaluateFunction(F, RetValDummy,
-                                      SmallVector<Constant*, 0>(), CallStack,
-                                      MutatedMemory, AllocaTmps,
-                                      SimpleConstants, TD);
+  bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
+                                           SmallVector<Constant*, 0>());
   
   if (EvalSuccess) {
     // We succeeded at evaluation: commit the result.
     DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
-          << F->getName() << "' to " << MutatedMemory.size()
+          << F->getName() << "' to " << Eval.getMutatedMemory().size()
           << " stores.\n");
-    for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
-         E = MutatedMemory.end(); I != E; ++I)
+    for (DenseMap<Constant*, Constant*>::const_iterator I =
+           Eval.getMutatedMemory().begin(), E = Eval.getMutatedMemory().end();
+	 I != E; ++I)
       CommitValueTo(I->second, I->first);
-  }
-
-  // At this point, we are done interpreting.  If we created any 'alloca'
-  // temporaries, release them now.
-  while (!AllocaTmps.empty()) {
-    GlobalVariable *Tmp = AllocaTmps.back();
-    AllocaTmps.pop_back();
-
-    // If there are still users of the alloca, the program is doing something
-    // silly, e.g. storing the address of the alloca somewhere and using it
-    // later.  Since this is undefined, we'll just make it be null.
-    if (!Tmp->use_empty())
-      Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
-    delete Tmp;
+    for (SmallPtrSet<GlobalVariable*, 8>::const_iterator I =
+           Eval.getInvariants().begin(), E = Eval.getInvariants().end();
+         I != E; ++I)
+      (*I)->setConstant(true);
   }
 
   return EvalSuccess;
 }
 
-
-
 /// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
 /// Return true if anything changed.
 bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
@@ -2610,7 +2719,6 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
   bool MadeChange = false;
   if (Ctors.empty()) return false;
 
-  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
   // Loop over global ctors, optimizing them when we can.
   for (unsigned i = 0; i != Ctors.size(); ++i) {
     Function *F = Ctors[i];
@@ -2628,7 +2736,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
     if (F->empty()) continue;
 
     // If we can evaluate the ctor at compile time, do.
-    if (EvaluateStaticConstructor(F, TD)) {
+    if (EvaluateStaticConstructor(F, TD, TLI)) {
       Ctors.erase(Ctors.begin()+i);
       MadeChange = true;
       --i;
@@ -2700,12 +2808,15 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
   return Changed;
 }
 
-static Function *FindCXAAtExit(Module &M) {
-  Function *Fn = M.getFunction("__cxa_atexit");
+static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
+  if (!TLI->has(LibFunc::cxa_atexit))
+    return 0;
+
+  Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
   
   if (!Fn)
     return 0;
-  
+
   FunctionType *FTy = Fn->getFunctionType();
   
   // Checking that the function has the right return type, the right number of 
@@ -2724,7 +2835,8 @@ static Function *FindCXAAtExit(Module &M) {
 /// destructor and can therefore be eliminated.
 /// Note that we assume that other optimization passes have already simplified
 /// the code so we only look for a function with a single basic block, where
-/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor.
+/// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
+/// other side-effect free instructions.
 static bool cxxDtorIsEmpty(const Function &Fn,
                            SmallPtrSet<const Function *, 8> &CalledFunctions) {
   // FIXME: We could eliminate C++ destructors if they're readonly/readnone and
@@ -2757,9 +2869,9 @@ static bool cxxDtorIsEmpty(const Function &Fn,
       if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions))
         return false;
     } else if (isa<ReturnInst>(*I))
-      return true;
-    else
-      return false;
+      return true; // We're done.
+    else if (I->mayHaveSideEffects())
+      return false; // Destructor with side effects, bail.
   }
 
   return false;
@@ -2815,10 +2927,13 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
 bool GlobalOpt::runOnModule(Module &M) {
   bool Changed = false;
 
+  TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+
   // Try to find the llvm.globalctors list.
   GlobalVariable *GlobalCtors = FindGlobalCtors(M);
 
-  Function *CXAAtExitFn = FindCXAAtExit(M);
+  Function *CXAAtExitFn = FindCXAAtExit(M, TLI);
 
   bool LocalChange = true;
   while (LocalChange) {
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index c0426da2c687..664ddf6f7a2b 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -32,34 +32,21 @@ namespace {
 
   // AlwaysInliner only inlines functions that are mark as "always inline".
   class AlwaysInliner : public Inliner {
-    // Functions that are never inlined
-    SmallPtrSet<const Function*, 16> NeverInline;
-    InlineCostAnalyzer CA;
   public:
     // Use extremely low threshold.
-    AlwaysInliner() : Inliner(ID, -2000000000) {
+    AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/true) {
       initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
     }
-    static char ID; // Pass identification, replacement for typeid
-    InlineCost getInlineCost(CallSite CS) {
-      return CA.getInlineCost(CS, NeverInline);
-    }
-    float getInlineFudgeFactor(CallSite CS) {
-      return CA.getInlineFudgeFactor(CS);
-    }
-    void resetCachedCostInfo(Function *Caller) {
-      CA.resetCachedCostInfo(Caller);
-    }
-    void growCachedCostInfo(Function* Caller, Function* Callee) {
-      CA.growCachedCostInfo(Caller, Callee);
+    AlwaysInliner(bool InsertLifetime) : Inliner(ID, -2000000000,
+                                                 InsertLifetime) {
+      initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
     }
+    static char ID; // Pass identification, replacement for typeid
+    virtual InlineCost getInlineCost(CallSite CS);
     virtual bool doFinalization(CallGraph &CG) {
-      return removeDeadFunctions(CG, &NeverInline);
+      return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true);
     }
     virtual bool doInitialization(CallGraph &CG);
-    void releaseMemory() {
-      CA.clear();
-    }
   };
 }
 
@@ -72,17 +59,74 @@ INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
 
 Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
 
-// doInitialization - Initializes the vector of functions that have not
-// been annotated with the "always inline" attribute.
-bool AlwaysInliner::doInitialization(CallGraph &CG) {
-  CA.setTargetData(getAnalysisIfAvailable<TargetData>());
+Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
+  return new AlwaysInliner(InsertLifetime);
+}
+
+/// \brief Minimal filter to detect invalid constructs for inlining.
+static bool isInlineViable(Function &F) {
+  bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice);
+  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+    // Disallow inlining of functions which contain an indirect branch.
+    if (isa<IndirectBrInst>(BI->getTerminator()))
+      return false;
 
-  Module &M = CG.getModule();
+    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
+         ++II) {
+      CallSite CS(II);
+      if (!CS)
+        continue;
 
-  for (Module::iterator I = M.begin(), E = M.end();
-       I != E; ++I)
-    if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
-      NeverInline.insert(I);
+      // Disallow recursive calls.
+      if (&F == CS.getCalledFunction())
+        return false;
 
+      // Disallow calls which expose returns-twice to a function not previously
+      // attributed as such.
+      if (!ReturnsTwice && CS.isCall() &&
+          cast<CallInst>(CS.getInstruction())->canReturnTwice())
+        return false;
+    }
+  }
+
+  return true;
+}
+
+/// \brief Get the inline cost for the always-inliner.
+///
+/// The always inliner *only* handles functions which are marked with the
+/// attribute to force inlining. As such, it is dramatically simpler and avoids
+/// using the powerful (but expensive) inline cost analysis. Instead it uses
+/// a very simple and boring direct walk of the instructions looking for
+/// impossible-to-inline constructs.
+///
+/// Note, it would be possible to go to some lengths to cache the information
+/// computed here, but as we only expect to do this for relatively few and
+/// small functions which have the explicit attribute to force inlining, it is
+/// likely not worth it in practice.
+InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
+  Function *Callee = CS.getCalledFunction();
+  // We assume indirect calls aren't calling an always-inline function.
+  if (!Callee) return InlineCost::getNever();
+
+  // We can't inline calls to external functions.
+  // FIXME: We shouldn't even get here.
+  if (Callee->isDeclaration()) return InlineCost::getNever();
+
+  // Return never for anything not marked as always inline.
+  if (!Callee->hasFnAttr(Attribute::AlwaysInline))
+    return InlineCost::getNever();
+
+  // Do some minimal analysis to preclude non-viable functions.
+  if (!isInlineViable(*Callee))
+    return InlineCost::getNever();
+
+  // Otherwise, force inlining.
+  return InlineCost::getAlways();
+}
+
+// doInitialization - Initializes the vector of functions that have not
+// been annotated with the "always inline" attribute.
+bool AlwaysInliner::doInitialization(CallGraph &CG) {
   return false;
 }
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 84dd4fdd9887..50038d81161b 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -23,40 +23,26 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/ADT/SmallPtrSet.h"
 
 using namespace llvm;
 
 namespace {
 
   class SimpleInliner : public Inliner {
-    // Functions that are never inlined
-    SmallPtrSet<const Function*, 16> NeverInline;
     InlineCostAnalyzer CA;
   public:
     SimpleInliner() : Inliner(ID) {
       initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
     }
-    SimpleInliner(int Threshold) : Inliner(ID, Threshold) {
+    SimpleInliner(int Threshold) : Inliner(ID, Threshold,
+                                           /*InsertLifetime*/true) {
       initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
     }
     static char ID; // Pass identification, replacement for typeid
     InlineCost getInlineCost(CallSite CS) {
-      return CA.getInlineCost(CS, NeverInline);
-    }
-    float getInlineFudgeFactor(CallSite CS) {
-      return CA.getInlineFudgeFactor(CS);
-    }
-    void resetCachedCostInfo(Function *Caller) {
-      CA.resetCachedCostInfo(Caller);
-    }
-    void growCachedCostInfo(Function* Caller, Function* Callee) {
-      CA.growCachedCostInfo(Caller, Callee);
+      return CA.getInlineCost(CS, getInlineThreshold(CS));
     }
     virtual bool doInitialization(CallGraph &CG);
-    void releaseMemory() {
-      CA.clear();
-    }
   };
 }
 
@@ -77,44 +63,6 @@ Pass *llvm::createFunctionInliningPass(int Threshold) {
 // annotated with the noinline attribute.
 bool SimpleInliner::doInitialization(CallGraph &CG) {
   CA.setTargetData(getAnalysisIfAvailable<TargetData>());
-
-  Module &M = CG.getModule();
-
-  for (Module::iterator I = M.begin(), E = M.end();
-       I != E; ++I)
-    if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
-      NeverInline.insert(I);
-
-  // Get llvm.noinline
-  GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
-
-  if (GV == 0)
-    return false;
-
-  // Don't crash on invalid code
-  if (!GV->hasDefinitiveInitializer())
-    return false;
-
-  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-
-  if (InitList == 0)
-    return false;
-
-  // Iterate over each element and add to the NeverInline set
-  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-
-    // Get Source
-    const Constant *Elt = InitList->getOperand(i);
-
-    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
-      if (CE->getOpcode() == Instruction::BitCast)
-        Elt = CE->getOperand(0);
-
-    // Insert into set of functions to never inline
-    if (const Function *F = dyn_cast<Function>(Elt))
-      NeverInline.insert(F);
-  }
-
   return false;
 }
 
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index f00935b08887..dc9cbfb05e29 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -36,6 +36,11 @@ STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
 STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
 STATISTIC(NumMergedAllocas, "Number of allocas merged together");
 
+// This weirdly named statistic tracks the number of times that, when attemting
+// to inline a function A into B, we analyze the callers of B in order to see
+// if those would be more profitable and blocked inline steps.
+STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
+
 static cl::opt<int>
 InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
         cl::desc("Control the amount of inlining to perform (default = 225)"));
@@ -48,11 +53,12 @@ HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
 const int OptSizeThreshold = 75;
 
 Inliner::Inliner(char &ID) 
-  : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
+  : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {}
 
-Inliner::Inliner(char &ID, int Threshold) 
+Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
   : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
-                                          InlineLimit : Threshold) {}
+                                          InlineLimit : Threshold),
+    InsertLifetime(InsertLifetime) {}
 
 /// getAnalysisUsage - For this class, we declare that we require and preserve
 /// the call graph.  If the derived class implements this method, it should
@@ -75,13 +81,13 @@ InlinedArrayAllocasTy;
 /// any new allocas to the set if not possible.
 static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
                                  InlinedArrayAllocasTy &InlinedArrayAllocas,
-                                 int InlineHistory) {
+                                 int InlineHistory, bool InsertLifetime) {
   Function *Callee = CS.getCalledFunction();
   Function *Caller = CS.getCaller();
 
   // Try to inline the function.  Get the list of static allocas that were
   // inlined.
-  if (!InlineFunction(CS, IFI))
+  if (!InlineFunction(CS, IFI, InsertLifetime))
     return false;
 
   // If the inlined function had a higher stack protection level than the
@@ -230,29 +236,37 @@ bool Inliner::shouldInline(CallSite CS) {
     return false;
   }
   
-  int Cost = IC.getValue();
   Function *Caller = CS.getCaller();
-  int CurrentThreshold = getInlineThreshold(CS);
-  float FudgeFactor = getInlineFudgeFactor(CS);
-  int AdjThreshold = (int)(CurrentThreshold * FudgeFactor);
-  if (Cost >= AdjThreshold) {
-    DEBUG(dbgs() << "    NOT Inlining: cost=" << Cost
-          << ", thres=" << AdjThreshold
+  if (!IC) {
+    DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
+          << ", thres=" << (IC.getCostDelta() + IC.getCost())
           << ", Call: " << *CS.getInstruction() << "\n");
     return false;
   }
   
-  // Try to detect the case where the current inlining candidate caller
-  // (call it B) is a static function and is an inlining candidate elsewhere,
-  // and the current candidate callee (call it C) is large enough that
-  // inlining it into B would make B too big to inline later.  In these
-  // circumstances it may be best not to inline C into B, but to inline B
-  // into its callers.
-  if (Caller->hasLocalLinkage()) {
+  // Try to detect the case where the current inlining candidate caller (call
+  // it B) is a static or linkonce-ODR function and is an inlining candidate
+  // elsewhere, and the current candidate callee (call it C) is large enough
+  // that inlining it into B would make B too big to inline later. In these
+  // circumstances it may be best not to inline C into B, but to inline B into
+  // its callers.
+  //
+  // This only applies to static and linkonce-ODR functions because those are
+  // expected to be available for inlining in the translation units where they
+  // are used. Thus we will always have the opportunity to make local inlining
+  // decisions. Importantly the linkonce-ODR linkage covers inline functions
+  // and templates in C++.
+  //
+  // FIXME: All of this logic should be sunk into getInlineCost. It relies on
+  // the internal implementation of the inline cost metrics rather than
+  // treating them as truly abstract units etc.
+  if (Caller->hasLocalLinkage() ||
+      Caller->getLinkage() == GlobalValue::LinkOnceODRLinkage) {
     int TotalSecondaryCost = 0;
-    bool outerCallsFound = false;
+    // The candidate cost to be imposed upon the current function.
+    int CandidateCost = IC.getCost() - (InlineConstants::CallPenalty + 1);
     // This bool tracks what happens if we do NOT inline C into B.
-    bool callerWillBeRemoved = true;
+    bool callerWillBeRemoved = Caller->hasLocalLinkage();
     // This bool tracks what happens if we DO inline C into B.
     bool inliningPreventsSomeOuterInline = false;
     for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 
@@ -268,26 +282,20 @@ bool Inliner::shouldInline(CallSite CS) {
       }
 
       InlineCost IC2 = getInlineCost(CS2);
-      if (IC2.isNever())
+      ++NumCallerCallersAnalyzed;
+      if (!IC2) {
         callerWillBeRemoved = false;
-      if (IC2.isAlways() || IC2.isNever())
+        continue;
+      }
+      if (IC2.isAlways())
         continue;
 
-      outerCallsFound = true;
-      int Cost2 = IC2.getValue();
-      int CurrentThreshold2 = getInlineThreshold(CS2);
-      float FudgeFactor2 = getInlineFudgeFactor(CS2);
-
-      if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
-        callerWillBeRemoved = false;
-
-      // See if we have this case.  We subtract off the penalty
-      // for the call instruction, which we would be deleting.
-      if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
-          Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= 
-                (int)(CurrentThreshold2 * FudgeFactor2)) {
+      // See if inlining or original callsite would erase the cost delta of
+      // this callsite. We subtract off the penalty for the call instruction,
+      // which we would be deleting.
+      if (IC2.getCostDelta() <= CandidateCost) {
         inliningPreventsSomeOuterInline = true;
-        TotalSecondaryCost += Cost2;
+        TotalSecondaryCost += IC2.getCost();
       }
     }
     // If all outer calls to Caller would get inlined, the cost for the last
@@ -297,17 +305,16 @@ bool Inliner::shouldInline(CallSite CS) {
     if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
       TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
 
-    if (outerCallsFound && inliningPreventsSomeOuterInline &&
-        TotalSecondaryCost < Cost) {
-      DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() << 
-           " Cost = " << Cost << 
+    if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost()) {
+      DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() <<
+           " Cost = " << IC.getCost() <<
            ", outer Cost = " << TotalSecondaryCost << '\n');
       return false;
     }
   }
 
-  DEBUG(dbgs() << "    Inlining: cost=" << Cost
-        << ", thres=" << AdjThreshold
+  DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
+        << ", thres=" << (IC.getCostDelta() + IC.getCost())
         << ", Call: " << *CS.getInstruction() << '\n');
   return true;
 }
@@ -326,7 +333,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
   return false;
 }
 
-
 bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraph>();
   const TargetData *TD = getAnalysisIfAvailable<TargetData>();
@@ -415,8 +421,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
         CG[Caller]->removeCallEdgeFor(CS);
         CS.getInstruction()->eraseFromParent();
         ++NumCallsDeleted;
-        // Update the cached cost info with the missing call
-        growCachedCostInfo(Caller, NULL);
       } else {
         // We can only inline direct calls to non-declarations.
         if (Callee == 0 || Callee->isDeclaration()) continue;
@@ -439,7 +443,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
 
         // Attempt to inline the function.
         if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
-                                  InlineHistoryID))
+                                  InlineHistoryID, InsertLifetime))
           continue;
         ++NumInlined;
         
@@ -457,9 +461,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
             CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
           }
         }
-        
-        // Update the cached cost info with the inlined call.
-        growCachedCostInfo(Caller, Callee);
       }
       
       // If we inlined or deleted the last possible call site to the function,
@@ -479,8 +480,6 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
         // Remove any call graph edges from the callee to its callees.
         CalleeNode->removeAllCalledFunctions();
         
-        resetCachedCostInfo(Callee);
-        
         // Removing the node for callee from the call graph and delete it.
         delete CG.removeFunctionFromModule(CalleeNode);
         ++NumDeleted;
@@ -514,29 +513,28 @@ bool Inliner::doFinalization(CallGraph &CG) {
 
 /// removeDeadFunctions - Remove dead functions that are not included in
 /// DNR (Do Not Remove) list.
-bool Inliner::removeDeadFunctions(CallGraph &CG, 
-                                  SmallPtrSet<const Function *, 16> *DNR) {
-  SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
+bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
+  SmallVector<CallGraphNode*, 16> FunctionsToRemove;
 
   // Scan for all of the functions, looking for ones that should now be removed
   // from the program.  Insert the dead ones in the FunctionsToRemove set.
   for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
     CallGraphNode *CGN = I->second;
-    if (CGN->getFunction() == 0)
-      continue;
-    
     Function *F = CGN->getFunction();
-    
+    if (!F || F->isDeclaration())
+      continue;
+
+    // Handle the case when this function is called and we only want to care
+    // about always-inline functions. This is a bit of a hack to share code
+    // between here and the InlineAlways pass.
+    if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline))
+      continue;
+
     // If the only remaining users of the function are dead constants, remove
     // them.
     F->removeDeadConstantUsers();
 
-    if (DNR && DNR->count(F))
-      continue;
-    if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
-        !F->hasAvailableExternallyLinkage())
-      continue;
-    if (!F->use_empty())
+    if (!F->isDefTriviallyDead())
       continue;
     
     // Remove any call graph edges from the function to its callees.
@@ -548,24 +546,27 @@ bool Inliner::removeDeadFunctions(CallGraph &CG,
     CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
 
     // Removing the node for callee from the call graph and delete it.
-    FunctionsToRemove.insert(CGN);
+    FunctionsToRemove.push_back(CGN);
   }
+  if (FunctionsToRemove.empty())
+    return false;
 
   // Now that we know which functions to delete, do so.  We didn't want to do
   // this inline, because that would invalidate our CallGraph::iterator
   // objects. :(
   //
-  // Note that it doesn't matter that we are iterating over a non-stable set
+  // Note that it doesn't matter that we are iterating over a non-stable order
   // here to do this, it doesn't matter which order the functions are deleted
   // in.
-  bool Changed = false;
-  for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
-       E = FunctionsToRemove.end(); I != E; ++I) {
-    resetCachedCostInfo((*I)->getFunction());
+  array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
+  FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
+                                      FunctionsToRemove.end()),
+                          FunctionsToRemove.end());
+  for (SmallVectorImpl<CallGraphNode *>::iterator I = FunctionsToRemove.begin(),
+                                                  E = FunctionsToRemove.end();
+       I != E; ++I) {
     delete CG.removeFunctionFromModule(*I);
     ++NumDeleted;
-    Changed = true;
   }
-
-  return Changed;
+  return true;
 }
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 7cb1d18f933d..cd29e7a7a7da 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -122,6 +122,9 @@ bool InternalizePass::runOnModule(Module &M) {
 
   bool Changed = false;
 
+  // Never internalize functions which code-gen might insert.
+  ExternalNames.insert("__stack_chk_fail");
+
   // Mark all functions not in the api as internal.
   // FIXME: maybe use private linkage?
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
@@ -148,9 +151,11 @@ bool InternalizePass::runOnModule(Module &M) {
   // won't find them.  (see MachineModuleInfo.)
   ExternalNames.insert("llvm.global_ctors");
   ExternalNames.insert("llvm.global_dtors");
-  ExternalNames.insert("llvm.noinline");
   ExternalNames.insert("llvm.global.annotations");
 
+  // Never internalize symbols code-gen inserts.
+  ExternalNames.insert("__stack_chk_guard");
+
   // Mark all global variables with initializers that are not in the api as
   // internal as well.
   // FIXME: maybe use private linkage?
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
new file mode 100644
index 000000000000..b18c9150f440
--- /dev/null
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Transforms/IPO/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = IPO
+parent = Transforms
+library_name = ipo
+required_libraries = Analysis Core IPA InstCombine Scalar Vectorize Support Target TransformUtils
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 8fdfd72237f5..a1b0a4580bf5 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -22,14 +22,19 @@
 #include "llvm/PassManager.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Vectorize.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ManagedStatic.h"
 
 using namespace llvm;
 
+static cl::opt<bool>
+RunVectorization("vectorize", cl::desc("Run vectorization passes"));
+
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
@@ -38,6 +43,7 @@ PassManagerBuilder::PassManagerBuilder() {
     DisableSimplifyLibCalls = false;
     DisableUnitAtATime = false;
     DisableUnrollLoops = false;
+    Vectorize = RunVectorization;
 }
 
 PassManagerBuilder::~PassManagerBuilder() {
@@ -101,6 +107,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
       MPM.add(Inliner);
       Inliner = 0;
     }
+    addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
     return;
   }
 
@@ -110,6 +117,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   addInitialAliasAnalysisPasses(MPM);
 
   if (!DisableUnitAtATime) {
+    addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
+
     MPM.add(createGlobalOptimizerPass());     // Optimize out global vars
 
     MPM.add(createIPSCCPPass());              // IP SCCP
@@ -170,6 +179,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
 
+  if (Vectorize) {
+    MPM.add(createBBVectorizePass());
+    MPM.add(createInstructionCombiningPass());
+    if (OptLevel > 1)
+      MPM.add(createGVNPass());                 // Remove redundancies
+  }
+
   MPM.add(createAggressiveDCEPass());         // Delete dead instructions
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
   MPM.add(createInstructionCombiningPass());  // Clean up after everything.
@@ -186,11 +202,13 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
     if (OptLevel > 1)
       MPM.add(createConstantMergePass());     // Merge dup global constants
   }
+  addExtensionsToPM(EP_OptimizerLast, MPM);
 }
 
 void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
                                                 bool Internalize,
-                                                bool RunInliner) {
+                                                bool RunInliner,
+                                                bool DisableGVNLoadPRE) {
   // Provide AliasAnalysis services for optimizations.
   addInitialAliasAnalysisPasses(PM);
 
@@ -246,9 +264,9 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
   PM.add(createFunctionAttrsPass()); // Add nocapture.
   PM.add(createGlobalsModRefPass()); // IP alias analysis.
 
-  PM.add(createLICMPass());      // Hoist loop invariants.
-  PM.add(createGVNPass());       // Remove redundancies.
-  PM.add(createMemCpyOptPass()); // Remove dead memcpys.
+  PM.add(createLICMPass());                 // Hoist loop invariants.
+  PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
+  PM.add(createMemCpyOptPass());            // Remove dead memcpys.
   // Nuke dead stores.
   PM.add(createDeadStoreEliminationPass());
 
@@ -340,4 +358,3 @@ void LLVMPassManagerBuilderPopulateLTOPassManager(LLVMPassManagerBuilderRef PMB,
   PassManagerBase *LPM = unwrap(PM);
   Builder->populateLTOPassManager(*LPM, Internalize, RunInliner);
 }
-
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index cbb80f075087..c8cc8fd1930b 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -101,8 +101,7 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
       // Check to see if this function performs an unwind or calls an
       // unwinding function.
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-        if (CheckUnwind && (isa<UnwindInst>(BB->getTerminator()) ||
-                            isa<ResumeInst>(BB->getTerminator()))) {
+        if (CheckUnwind && isa<ResumeInst>(BB->getTerminator())) {
           // Uses unwind / resume!
           SCCMightUnwind = true;
         } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
diff --git a/lib/Transforms/InstCombine/CMakeLists.txt b/lib/Transforms/InstCombine/CMakeLists.txt
index a46d5adc0ab4..d070ccc0d63f 100644
--- a/lib/Transforms/InstCombine/CMakeLists.txt
+++ b/lib/Transforms/InstCombine/CMakeLists.txt
@@ -13,11 +13,3 @@ add_llvm_library(LLVMInstCombine
   InstCombineSimplifyDemanded.cpp
   InstCombineVectorOps.cpp
   )
-
-add_llvm_library_dependencies(LLVMInstCombine
-  LLVMAnalysis
-  LLVMCore
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 38082787ce4d..199df519ce07 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -22,6 +22,7 @@
 namespace llvm {
   class CallSite;
   class TargetData;
+  class TargetLibraryInfo;
   class DbgDeclareInst;
   class MemIntrinsic;
   class MemSetInst;
@@ -71,6 +72,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
                              : public FunctionPass,
                                public InstVisitor<InstCombiner, Instruction*> {
   TargetData *TD;
+  TargetLibraryInfo *TLI;
   bool MadeIRChange;
 public:
   /// Worklist - All of the instructions that need to be simplified.
@@ -92,9 +94,11 @@ public:
   bool DoOneIteration(Function &F, unsigned ItNum);
 
   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-                                 
+
   TargetData *getTargetData() const { return TD; }
 
+  TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
+
   // Visitation implementation - Implement instruction combining for different
   // instruction types.  The semantics are as follows:
   // Return Value:
@@ -287,9 +291,9 @@ public:
     return 0;  // Don't do anything with FI
   }
       
-  void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+  void ComputeMaskedBits(Value *V, APInt &KnownZero,
                          APInt &KnownOne, unsigned Depth = 0) const {
-    return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+    return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
   }
   
   bool MaskedValueIsZero(Value *V, const APInt &Mask, 
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d10046c10baf..05e702fa43b5 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -136,6 +136,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
         Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
         return BinaryOperator::CreateAShr(NewShl, ShAmt);
       }
+
+      // If this is a xor that was canonicalized from a sub, turn it back into
+      // a sub and fuse this add with it.
+      if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) {
+        IntegerType *IT = cast<IntegerType>(I.getType());
+        APInt LHSKnownOne(IT->getBitWidth(), 0);
+        APInt LHSKnownZero(IT->getBitWidth(), 0);
+        ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne);
+        if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue())
+          return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
+                                           XorLHS);
+      }
     }
   }
 
@@ -189,14 +201,13 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
 
   // A+B --> A|B iff A and B have no bits set in common.
   if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
-    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
     APInt LHSKnownOne(IT->getBitWidth(), 0);
     APInt LHSKnownZero(IT->getBitWidth(), 0);
-    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
     if (LHSKnownZero != 0) {
       APInt RHSKnownOne(IT->getBitWidth(), 0);
       APInt RHSKnownZero(IT->getBitWidth(), 0);
-      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
       
       // No bits in common -> bitwise or.
       if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
@@ -466,57 +477,57 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
   // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
   // this.
   bool Swapped = false;
-  GetElementPtrInst *GEP = 0;
-  ConstantExpr *CstGEP = 0;
-  
-  // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
+  GEPOperator *GEP1 = 0, *GEP2 = 0;
+
   // For now we require one side to be the base pointer "A" or a constant
-  // expression derived from it.
-  if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
+  // GEP derived from it.
+  if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
     // (gep X, ...) - X
     if (LHSGEP->getOperand(0) == RHS) {
-      GEP = LHSGEP;
+      GEP1 = LHSGEP;
       Swapped = false;
-    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
-      // (gep X, ...) - (ce_gep X, ...)
-      if (CE->getOpcode() == Instruction::GetElementPtr &&
-          LHSGEP->getOperand(0) == CE->getOperand(0)) {
-        CstGEP = CE;
-        GEP = LHSGEP;
+    } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
+      // (gep X, ...) - (gep X, ...)
+      if (LHSGEP->getOperand(0)->stripPointerCasts() ==
+            RHSGEP->getOperand(0)->stripPointerCasts()) {
+        GEP2 = RHSGEP;
+        GEP1 = LHSGEP;
         Swapped = false;
       }
     }
   }
   
-  if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
+  if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) {
     // X - (gep X, ...)
     if (RHSGEP->getOperand(0) == LHS) {
-      GEP = RHSGEP;
+      GEP1 = RHSGEP;
       Swapped = true;
-    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
-      // (ce_gep X, ...) - (gep X, ...)
-      if (CE->getOpcode() == Instruction::GetElementPtr &&
-          RHSGEP->getOperand(0) == CE->getOperand(0)) {
-        CstGEP = CE;
-        GEP = RHSGEP;
+    } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) {
+      // (gep X, ...) - (gep X, ...)
+      if (RHSGEP->getOperand(0)->stripPointerCasts() ==
+            LHSGEP->getOperand(0)->stripPointerCasts()) {
+        GEP2 = LHSGEP;
+        GEP1 = RHSGEP;
         Swapped = true;
       }
     }
   }
   
-  if (GEP == 0)
+  // Avoid duplicating the arithmetic if GEP2 has non-constant indices and
+  // multiple users.
+  if (GEP1 == 0 ||
+      (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
     return 0;
   
   // Emit the offset of the GEP and an intptr_t.
-  Value *Result = EmitGEPOffset(GEP);
+  Value *Result = EmitGEPOffset(GEP1);
   
   // If we had a constant expression GEP on the other side offsetting the
   // pointer, subtract it from the offset we have.
-  if (CstGEP) {
-    Value *CstOffset = EmitGEPOffset(CstGEP);
-    Result = Builder->CreateSub(Result, CstOffset);
+  if (GEP2) {
+    Value *Offset = EmitGEPOffset(GEP2);
+    Result = Builder->CreateSub(Result, Offset);
   }
-  
 
   // If we have p - gep(p, ...)  then we have to negate the result.
   if (Swapped)
@@ -587,6 +598,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     ConstantInt *C2;
     if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
       return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
+
+    if (SimplifyDemandedInstructionBits(I))
+      return &I;
   }
 
   
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 5e0bfe8e26d2..0dbe11d2f01f 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -14,6 +14,7 @@
 #include "InstCombine.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
@@ -62,50 +63,6 @@ static inline Value *dyn_castNotVal(Value *V) {
   return 0;
 }
 
-
-/// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
-/// are carefully arranged to allow folding of expressions such as:
-///
-///      (A < B) | (A > B) --> (A != B)
-///
-/// Note that this is only valid if the first and second predicates have the
-/// same sign. Is illegal to do: (A u< B) | (A s> B) 
-///
-/// Three bits are used to represent the condition, as follows:
-///   0  A > B
-///   1  A == B
-///   2  A < B
-///
-/// <=>  Value  Definition
-/// 000     0   Always false
-/// 001     1   A >  B
-/// 010     2   A == B
-/// 011     3   A >= B
-/// 100     4   A <  B
-/// 101     5   A != B
-/// 110     6   A <= B
-/// 111     7   Always true
-///  
-static unsigned getICmpCode(const ICmpInst *ICI) {
-  switch (ICI->getPredicate()) {
-    // False -> 0
-  case ICmpInst::ICMP_UGT: return 1;  // 001
-  case ICmpInst::ICMP_SGT: return 1;  // 001
-  case ICmpInst::ICMP_EQ:  return 2;  // 010
-  case ICmpInst::ICMP_UGE: return 3;  // 011
-  case ICmpInst::ICMP_SGE: return 3;  // 011
-  case ICmpInst::ICMP_ULT: return 4;  // 100
-  case ICmpInst::ICMP_SLT: return 4;  // 100
-  case ICmpInst::ICMP_NE:  return 5;  // 101
-  case ICmpInst::ICMP_ULE: return 6;  // 110
-  case ICmpInst::ICMP_SLE: return 6;  // 110
-    // True -> 7
-  default:
-    llvm_unreachable("Invalid ICmp predicate!");
-    return 0;
-  }
-}
-
 /// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
 /// predicate into a three bit mask. It also returns whether it is an ordered
 /// predicate by reference.
@@ -130,31 +87,19 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
   default:
     // Not expecting FCMP_FALSE and FCMP_TRUE;
     llvm_unreachable("Unexpected FCmp predicate!");
-    return 0;
   }
 }
 
-/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// getNewICmpValue - This is the complement of getICmpCode, which turns an
 /// opcode and two operands into either a constant true or false, or a brand 
 /// new ICmp instruction. The sign is passed in to determine which kind
 /// of predicate to use in the new icmp instruction.
-static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
-                           InstCombiner::BuilderTy *Builder) {
-  CmpInst::Predicate Pred;
-  switch (Code) {
-  default: assert(0 && "Illegal ICmp code!");
-  case 0: // False.
-    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
-  case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
-  case 2: Pred = ICmpInst::ICMP_EQ; break;
-  case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
-  case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
-  case 5: Pred = ICmpInst::ICMP_NE; break;
-  case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
-  case 7: // True.
-    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
-  }
-  return Builder->CreateICmp(Pred, LHS, RHS);
+static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+                              InstCombiner::BuilderTy *Builder) {
+  ICmpInst::Predicate NewPred;
+  if (Value *NewConstant = getICmpValue(Sign, Code, LHS, RHS, NewPred))
+    return NewConstant;
+  return Builder->CreateICmp(NewPred, LHS, RHS);
 }
 
 /// getFCmpValue - This is the complement of getFCmpCode, which turns an
@@ -165,7 +110,7 @@ static Value *getFCmpValue(bool isordered, unsigned code,
                            InstCombiner::BuilderTy *Builder) {
   CmpInst::Predicate Pred;
   switch (code) {
-  default: assert(0 && "Illegal FCmp code!");
+  default: llvm_unreachable("Illegal FCmp code!");
   case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break;
   case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break;
   case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break;
@@ -180,14 +125,6 @@ static Value *getFCmpValue(bool isordered, unsigned code,
   return Builder->CreateFCmp(Pred, LHS, RHS);
 }
 
-/// PredicatesFoldable - Return true if both predicates match sign or if at
-/// least one of them is an equality comparison (which is signless).
-static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
-  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
-         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
-         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
-}
-
 // OptAndOp - This handles expressions of the form ((val OP C1) & C2).  Where
 // the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.  Op is
 // guaranteed to be a binary operator.
@@ -558,6 +495,38 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
   return result;
 }
 
+/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
+/// if possible. The returned predicate is either == or !=. Returns false if
+/// decomposition fails.
+static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
+                                 Value *&X, Value *&Y, Value *&Z) {
+  // X < 0 is equivalent to (X & SignBit) != 0.
+  if (I->getPredicate() == ICmpInst::ICMP_SLT)
+    if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+      if (C->isZero()) {
+        X = I->getOperand(0);
+        Y = ConstantInt::get(I->getContext(),
+                             APInt::getSignBit(C->getBitWidth()));
+        Pred = ICmpInst::ICMP_NE;
+        Z = C;
+        return true;
+      }
+
+  // X > -1 is equivalent to (X & SignBit) == 0.
+  if (I->getPredicate() == ICmpInst::ICMP_SGT)
+    if (ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1)))
+      if (C->isAllOnesValue()) {
+        X = I->getOperand(0);
+        Y = ConstantInt::get(I->getContext(),
+                             APInt::getSignBit(C->getBitWidth()));
+        Pred = ICmpInst::ICMP_EQ;
+        Z = ConstantInt::getNullValue(C->getType());
+        return true;
+      }
+
+  return false;
+}
+
 /// foldLogOpOfMaskedICmpsHelper:
 /// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
 /// return the set of pattern classes (from MaskedICmpType)
@@ -565,10 +534,9 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
 static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, 
                                              Value*& B, Value*& C,
                                              Value*& D, Value*& E,
-                                             ICmpInst *LHS, ICmpInst *RHS) {
-  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
-  if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0;
-  if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0;
+                                             ICmpInst *LHS, ICmpInst *RHS,
+                                             ICmpInst::Predicate &LHSCC,
+                                             ICmpInst::Predicate &RHSCC) {
   if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
   // vectors are not (yet?) supported
   if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
@@ -582,40 +550,60 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
   Value *L1 = LHS->getOperand(0);
   Value *L2 = LHS->getOperand(1);
   Value *L11,*L12,*L21,*L22;
-  if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
-    if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+  // Check whether the icmp can be decomposed into a bit test.
+  if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) {
+    L21 = L22 = L1 = 0;
+  } else {
+    // Look for ANDs in the LHS icmp.
+    if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+      if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+        L21 = L22 = 0;
+    } else {
+      if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+        return 0;
+      std::swap(L1, L2);
       L21 = L22 = 0;
-  }
-  else {
-    if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
-      return 0;
-    std::swap(L1, L2);
-    L21 = L22 = 0;
+    }
   }
 
+  // Bail if LHS was a icmp that can't be decomposed into an equality.
+  if (!ICmpInst::isEquality(LHSCC))
+    return 0;
+
   Value *R1 = RHS->getOperand(0);
   Value *R2 = RHS->getOperand(1);
   Value *R11,*R12;
   bool ok = false;
-  if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
-    if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
-      A = R11; D = R12; E = R2; ok = true;
+  if (decomposeBitTestICmp(RHS, RHSCC, R11, R12, R2)) {
+    if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+      A = R11; D = R12;
+    } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
+      A = R12; D = R11;
+    } else {
+      return 0;
     }
-    else 
-    if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+    E = R2; R1 = 0; ok = true;
+  } else if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+    if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+      A = R11; D = R12; E = R2; ok = true;
+    } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
       A = R12; D = R11; E = R2; ok = true;
     }
   }
+
+  // Bail if RHS was a icmp that can't be decomposed into an equality.
+  if (!ICmpInst::isEquality(RHSCC))
+    return 0;
+
+  // Look for ANDs in on the right side of the RHS icmp.
   if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
-    if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
-       A = R11; D = R12; E = R1; ok = true;
-    }
-    else 
-    if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+    if (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22) {
+      A = R11; D = R12; E = R1; ok = true;
+    } else if (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22) {
       A = R12; D = R11; E = R1; ok = true;
-    }
-    else
+    } else {
       return 0;
+    }
   }
   if (!ok)
     return 0;
@@ -644,8 +632,12 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
                                      ICmpInst::Predicate NEWCC,
                                      llvm::InstCombiner::BuilderTy* Builder) {
   Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
-  unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS);
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+  unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS,
+                                               LHSCC, RHSCC);
   if (mask == 0) return 0;
+  assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
+         "foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
 
   if (NEWCC == ICmpInst::ICMP_NE)
     mask >>= 1; // treat "Not"-states as normal states
@@ -693,11 +685,11 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
 
     ConstantInt *CCst = dyn_cast<ConstantInt>(C);
     if (CCst == 0) return 0;
-    if (LHS->getPredicate() != NEWCC)
+    if (LHSCC != NEWCC)
       CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
     ConstantInt *ECst = dyn_cast<ConstantInt>(E);
     if (ECst == 0) return 0;
-    if (RHS->getPredicate() != NEWCC)
+    if (RHSCC != NEWCC)
       ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
     ConstantInt* MCst = dyn_cast<ConstantInt>(
       ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
@@ -728,7 +720,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
       unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
       bool isSigned = LHS->isSigned() || RHS->isSigned();
-      return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+      return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
     }
   }
 
@@ -756,24 +748,12 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
       return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
-
-    // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0)
-    if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
-      Value *NewAnd = Builder->CreateAnd(Val, Val2);
-      return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
-    }
-
-    // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1)
-    if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
-      Value *NewOr = Builder->CreateOr(Val, Val2);
-      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
-    }
   }
 
   // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2
   // where CMAX is the all ones value for the truncated type,
   // iff the lower bits of C2 and CA are zero.
-  if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) &&
+  if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC &&
       LHS->hasOneUse() && RHS->hasOneUse()) {
     Value *V;
     ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
@@ -805,7 +785,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       }
     }
   }
-  
+
   // From here on, we only handle:
   //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
   if (Val != Val2) return 0;
@@ -1382,13 +1362,8 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
   // part of the value (e.g. byte 3) then it must be shifted right.  If from the
   // low part, it must be shifted left.
   unsigned DestByteNo = InputByteNo + OverallLeftShift;
-  if (InputByteNo < ByteValues.size()/2) {
-    if (ByteValues.size()-1-DestByteNo != InputByteNo)
-      return true;
-  } else {
-    if (ByteValues.size()-1-DestByteNo != InputByteNo)
-      return true;
-  }
+  if (ByteValues.size()-1-DestByteNo != InputByteNo)
+    return true;
   
   // If the destination byte value is already defined, the values are or'd
   // together, which isn't a bswap (unless it's an or of the same bits).
@@ -1469,7 +1444,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
       unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
       bool isSigned = LHS->isSigned() || RHS->isSigned();
-      return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+      return getNewICmpValue(isSigned, Code, Op0, Op1, Builder);
     }
   }
 
@@ -1490,18 +1465,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       Value *NewOr = Builder->CreateOr(Val, Val2);
       return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
     }
-
-    // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0)
-    if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) {
-      Value *NewOr = Builder->CreateOr(Val, Val2);
-      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
-    }
-
-    // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1)
-    if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) {
-      Value *NewAnd = Builder->CreateAnd(Val, Val2);
-      return Builder->CreateICmp(LHSCC, NewAnd, LHSCst);
-    }
   }
 
   // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
@@ -1586,7 +1549,6 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
     case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
       return ConstantInt::getTrue(LHS->getContext());
     }
-    break;
   case ICmpInst::ICMP_ULT:
     switch (RHSCC) {
     default: llvm_unreachable("Unknown integer condition code!");
@@ -1962,8 +1924,11 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       }
 
   // Canonicalize xor to the RHS.
-  if (match(Op0, m_Xor(m_Value(), m_Value())))
+  bool SwappedForXor = false;
+  if (match(Op0, m_Xor(m_Value(), m_Value()))) {
     std::swap(Op0, Op1);
+    SwappedForXor = true;
+  }
 
   // A | ( A ^ B) -> A |  B
   // A | (~A ^ B) -> A | ~B
@@ -1994,6 +1959,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         return BinaryOperator::CreateOr(Not, Op0);
       }
 
+  if (SwappedForXor)
+    std::swap(Op0, Op1);
+
   if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
     if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
       if (Value *Res = FoldOrOfICmps(LHS, RHS))
@@ -2281,7 +2249,8 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
           unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
           bool isSigned = LHS->isSigned() || RHS->isSigned();
           return ReplaceInstUsesWith(I, 
-                               getICmpValue(isSigned, Code, Op0, Op1, Builder));
+                               getNewICmpValue(isSigned, Code, Op0, Op1,
+                                               Builder));
         }
       }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c7b3ff8504ac..77e47271008c 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -37,26 +37,26 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   unsigned CopyAlign = MI->getAlignment();
 
   if (CopyAlign < MinAlign) {
-    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
                                              MinAlign, false));
     return MI;
   }
-  
+
   // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
   // load/store.
   ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
   if (MemOpLength == 0) return 0;
-  
+
   // Source and destination pointer types are always "i8*" for intrinsic.  See
   // if the size is something we can handle with a single primitive load/store.
   // A single load+store correctly handles overlapping memory in the memmove
   // case.
   unsigned Size = MemOpLength->getZExtValue();
   if (Size == 0) return MI;  // Delete this mem transfer.
-  
+
   if (Size > 8 || (Size&(Size-1)))
     return 0;  // If not 1/2/4/8 bytes, exit.
-  
+
   // Use an integer load+store unless we can find something better.
   unsigned SrcAddrSp =
     cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
@@ -66,7 +66,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
   Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
   Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
-  
+
   // Memcpy forces the use of i8* for the source and destination.  That means
   // that if you're using memcpy to move one double around, you'll get a cast
   // from double* to i8*.  We'd much rather use a double load+store rather than
@@ -94,20 +94,20 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
         } else
           break;
       }
-      
+
       if (SrcETy->isSingleValueType()) {
         NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
         NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
       }
     }
   }
-  
-  
+
+
   // If the memcpy/memmove provides better alignment info than we can
   // infer, use it.
   SrcAlign = std::max(SrcAlign, CopyAlign);
   DstAlign = std::max(DstAlign, CopyAlign);
-  
+
   Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
   Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
   LoadInst *L = Builder->CreateLoad(Src, MI->isVolatile());
@@ -127,7 +127,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
                                              Alignment, false));
     return MI;
   }
-  
+
   // Extract the length and alignment and fill if they are constant.
   ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
   ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
@@ -135,14 +135,14 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
     return 0;
   uint64_t Len = LenC->getZExtValue();
   Alignment = MI->getAlignment();
-  
+
   // If the length is zero, this is a no-op
   if (Len == 0) return MI; // memset(d,c,0,a) -> noop
-  
+
   // memset(s,c,n) -> store s, c (for n=1,2,4,8)
   if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
     Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
-    
+
     Value *Dest = MI->getDest();
     unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
     Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
@@ -150,13 +150,13 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
 
     // Alignment 0 is identity for alignment 1 for memset, but not store.
     if (Alignment == 0) Alignment = 1;
-    
+
     // Extract the fill value and store.
     uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
     StoreInst *S = Builder->CreateStore(ConstantInt::get(ITy, Fill), Dest,
                                         MI->isVolatile());
     S->setAlignment(Alignment);
-    
+
     // Set the size of the copy to 0, it will be deleted on the next iteration.
     MI->setLength(Constant::getNullValue(LenC->getType()));
     return MI;
@@ -165,7 +165,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   return 0;
 }
 
-/// visitCallInst - CallInst simplification.  This mostly only handles folding 
+/// visitCallInst - CallInst simplification.  This mostly only handles folding
 /// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
 /// the heavy lifting.
 ///
@@ -182,7 +182,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     CI.setDoesNotThrow();
     return &CI;
   }
-  
+
   IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
   if (!II) return visitCallSite(&CI);
 
@@ -203,7 +203,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           // alignment is sufficient.
         }
     }
-    
+
     // No other transformations apply to volatile transfers.
     if (MI->isVolatile())
       return 0;
@@ -242,13 +242,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     if (Changed) return II;
   }
-  
+
   switch (II->getIntrinsicID()) {
   default: break;
   case Intrinsic::objectsize: {
     // We need target data for just about everything so depend on it.
     if (!TD) break;
-    
+
     Type *ReturnTy = CI.getType();
     uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL;
 
@@ -265,6 +265,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       // Get the current byte offset into the thing. Use the original
       // operand in case we're looking through a bitcast.
       SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+      if (!GEP->getPointerOperandType()->isPointerTy())
+        return 0;
       Offset = TD->getIndexedOffset(GEP->getPointerOperandType(), Ops);
 
       Op1 = GEP->getPointerOperand()->stripPointerCasts();
@@ -322,7 +324,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
       if (Operand->getIntrinsicID() == Intrinsic::bswap)
         return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
-      
+
     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
     if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
       if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
@@ -334,7 +336,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           return new TruncInst(V, TI->getType());
         }
     }
-      
+
     break;
   case Intrinsic::powi:
     if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
@@ -359,14 +361,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     uint32_t BitWidth = IT->getBitWidth();
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
-    ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
-                      KnownZero, KnownOne);
+    ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
     unsigned TrailingZeros = KnownOne.countTrailingZeros();
     APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
     if ((Mask & KnownZero) == Mask)
       return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
                                  APInt(BitWidth, TrailingZeros)));
-    
+
     }
     break;
   case Intrinsic::ctlz: {
@@ -378,31 +379,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     uint32_t BitWidth = IT->getBitWidth();
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
-    ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
-                      KnownZero, KnownOne);
+    ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
     unsigned LeadingZeros = KnownOne.countLeadingZeros();
     APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
     if ((Mask & KnownZero) == Mask)
       return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
                                  APInt(BitWidth, LeadingZeros)));
-    
+
     }
     break;
   case Intrinsic::uadd_with_overflow: {
     Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
     IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
     uint32_t BitWidth = IT->getBitWidth();
-    APInt Mask = APInt::getSignBit(BitWidth);
     APInt LHSKnownZero(BitWidth, 0);
     APInt LHSKnownOne(BitWidth, 0);
-    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
     bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
     bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
 
     if (LHSKnownNegative || LHSKnownPositive) {
       APInt RHSKnownZero(BitWidth, 0);
       APInt RHSKnownOne(BitWidth, 0);
-      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
       bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
       bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
       if (LHSKnownNegative && RHSKnownNegative) {
@@ -448,7 +447,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // X + undef -> undef
     if (isa<UndefValue>(II->getArgOperand(1)))
       return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-      
+
     if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // X + 0 -> {X, false}
       if (RHS->isZero()) {
@@ -469,7 +468,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     if (isa<UndefValue>(II->getArgOperand(0)) ||
         isa<UndefValue>(II->getArgOperand(1)))
       return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-      
+
     if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // X - 0 -> {X, false}
       if (RHS->isZero()) {
@@ -477,7 +476,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = 
+        Constant *Struct =
           ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
@@ -486,14 +485,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::umul_with_overflow: {
     Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
     unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
-    APInt Mask = APInt::getAllOnesValue(BitWidth);
 
     APInt LHSKnownZero(BitWidth, 0);
     APInt LHSKnownOne(BitWidth, 0);
-    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
     APInt RHSKnownZero(BitWidth, 0);
     APInt RHSKnownOne(BitWidth, 0);
-    ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+    ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
 
     // Get the largest possible values for each operand.
     APInt LHSMax = ~LHSKnownZero;
@@ -526,19 +524,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // X * undef -> undef
     if (isa<UndefValue>(II->getArgOperand(1)))
       return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
-      
+
     if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // X*0 -> {0, false}
       if (RHSI->isZero())
         return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
-      
+
       // X * 1 -> {X, false}
       if (RHSI->equalsInt(1)) {
         Constant *V[] = {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = 
+        Constant *Struct =
           ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
@@ -557,7 +555,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::ppc_altivec_stvxl:
     // Turn stvx -> store if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
-      Type *OpPtrTy = 
+      Type *OpPtrTy =
         PointerType::getUnqual(II->getArgOperand(0)->getType());
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
       return new StoreInst(II->getArgOperand(0), Ptr);
@@ -568,7 +566,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_sse2_storeu_dq:
     // Turn X86 storeu -> store if the pointer is known aligned.
     if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
-      Type *OpPtrTy = 
+      Type *OpPtrTy =
         PointerType::getUnqual(II->getArgOperand(1)->getType());
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
       return new StoreInst(II->getArgOperand(1), Ptr);
@@ -621,19 +619,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
   case Intrinsic::ppc_altivec_vperm:
     // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
-    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
-      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
-      
+    if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
+      assert(Mask->getType()->getVectorNumElements() == 16 &&
+             "Bad type for intrinsic!");
+
       // Check that all of the elements are integer constants or undefs.
       bool AllEltsOk = true;
       for (unsigned i = 0; i != 16; ++i) {
-        if (!isa<ConstantInt>(Mask->getOperand(i)) && 
-            !isa<UndefValue>(Mask->getOperand(i))) {
+        Constant *Elt = Mask->getAggregateElement(i);
+        if (Elt == 0 ||
+            !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
           AllEltsOk = false;
           break;
         }
       }
-      
+
       if (AllEltsOk) {
         // Cast the input vectors to byte vectors.
         Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
@@ -641,23 +641,24 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
                                             Mask->getType());
         Value *Result = UndefValue::get(Op0->getType());
-        
+
         // Only extract each element once.
         Value *ExtractedElts[32];
         memset(ExtractedElts, 0, sizeof(ExtractedElts));
-        
+
         for (unsigned i = 0; i != 16; ++i) {
-          if (isa<UndefValue>(Mask->getOperand(i)))
+          if (isa<UndefValue>(Mask->getAggregateElement(i)))
             continue;
-          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+          unsigned Idx =
+            cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
           Idx &= 31;  // Match the hardware behavior.
-          
+
           if (ExtractedElts[Idx] == 0) {
-            ExtractedElts[Idx] = 
+            ExtractedElts[Idx] =
               Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
                                             Builder->getInt32(Idx&15));
           }
-        
+
           // Insert this value into the result vector.
           Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
                                                 Builder->getInt32(i));
@@ -703,7 +704,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           return EraseInstFromFunction(CI);
       }
     }
-    
+
     // Scan down this block to see if there is another stack restore in the
     // same block without an intervening call/alloca.
     BasicBlock::iterator BI = II;
@@ -728,12 +729,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         }
       }
     }
-    
+
     // If the stack restore is in a return, resume, or unwind block and if there
     // are no allocas or calls between the restore and the return, nuke the
     // restore.
-    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI) ||
-                          isa<UnwindInst>(TI)))
+    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
       return EraseInstFromFunction(CI);
     break;
   }
@@ -748,7 +748,7 @@ Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
   return visitCallSite(&II);
 }
 
-/// isSafeToEliminateVarargsCast - If this cast does not affect the value 
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value
 /// passed through the varargs area, we can eliminate the use of the cast.
 static bool isSafeToEliminateVarargsCast(const CallSite CS,
                                          const CastInst * const CI,
@@ -760,10 +760,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
   // The size of ByVal arguments is derived from the type, so we
   // can't change to a type with a different size.  If the size were
   // passed explicitly we could avoid this check.
-  if (!CS.paramHasAttr(ix, Attribute::ByVal))
+  if (!CS.isByValArgument(ix))
     return true;
 
-  Type* SrcTy = 
+  Type* SrcTy =
             cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
   Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
   if (!SrcTy->isSized() || !DstTy->isSized())
@@ -807,7 +807,7 @@ public:
 } // end anonymous namespace
 
 // Try to fold some different type of calls here.
-// Currently we're only working with the checking functions, memcpy_chk, 
+// Currently we're only working with the checking functions, memcpy_chk,
 // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
 // strcat_chk and strncat_chk.
 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
@@ -916,7 +916,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
         !CalleeF->isDeclaration()) {
       Instruction *OldCall = CS.getInstruction();
       new StoreInst(ConstantInt::getTrue(Callee->getContext()),
-                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 
+                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
                                   OldCall);
       // If OldCall dues not return void then replaceAllUsesWith undef.
       // This allows ValueHandlers and custom metadata to adjust itself.
@@ -924,7 +924,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
         ReplaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
       if (isa<CallInst>(OldCall))
         return EraseInstFromFunction(*OldCall);
-      
+
       // We cannot remove an invoke, because it would change the CFG, just
       // change the callee to a null pointer.
       cast<InvokeInst>(OldCall)->setCalledFunction(
@@ -960,7 +960,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
   PointerType *PTy = cast<PointerType>(Callee->getType());
   FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
   if (FTy->isVarArg()) {
-    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
+    int ix = FTy->getNumParams();
     // See if we can optimize any arguments passed through the varargs area of
     // the call.
     for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
@@ -1061,17 +1061,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     if (!CastInst::isCastable(ActTy, ParamTy))
       return false;   // Cannot transform this parameter value.
 
-    unsigned Attrs = CallerPAL.getParamAttributes(i + 1);
+    Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
     if (Attrs & Attribute::typeIncompatible(ParamTy))
       return false;   // Attribute not compatible with transformed value.
-    
+
     // If the parameter is passed as a byval argument, then we have to have a
     // sized type and the sized type has to have the same size as the old type.
     if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
       PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
       if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
         return false;
-      
+
       Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
       if (TD->getTypeAllocSize(CurElTy) !=
           TD->getTypeAllocSize(ParamPTy->getElementType()))
@@ -1099,8 +1099,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
     if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
       return false;
+
+    // If both the callee and the cast type are varargs, we still have to make
+    // sure the number of fixed parameters are the same or we have the same
+    // ABI issues as if we introduce a varargs call.
+    if (FT->isVarArg() &&
+        cast<FunctionType>(APTy->getElementType())->isVarArg() &&
+        FT->getNumParams() !=
+        cast<FunctionType>(APTy->getElementType())->getNumParams())
+      return false;
   }
-      
+
   if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
       !CallerPAL.isEmpty())
     // In this case we have more arguments than the new function type, but we
@@ -1114,7 +1123,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         return false;
     }
 
-  
+
   // Okay, we decided that this is a safe thing to do: go ahead and start
   // inserting cast instructions as necessary.
   std::vector<Value*> Args;
@@ -1352,11 +1361,11 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
 
       // Replace the trampoline call with a direct call.  Let the generic
       // code sort out any function type mismatches.
-      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 
+      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
                                                 FTy->isVarArg());
       Constant *NewCallee =
         NestF->getType() == PointerType::getUnqual(NewFTy) ?
-        NestF : ConstantExpr::getBitCast(NestF, 
+        NestF : ConstantExpr::getBitCast(NestF,
                                          PointerType::getUnqual(NewFTy));
       const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
                                                    NewAttrs.end());
@@ -1385,9 +1394,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
   // parameter, there is no need to adjust the argument list.  Let the generic
   // code sort out any function type mismatches.
   Constant *NewCallee =
-    NestF->getType() == PTy ? NestF : 
+    NestF->getType() == PTy ? NestF :
                               ConstantExpr::getBitCast(NestF, PTy);
   CS.setCalledFunction(NewCallee);
   return CS.getInstruction();
 }
-
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index f10e48abf108..39279f437205 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -14,6 +14,7 @@
 #include "InstCombine.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -147,8 +148,6 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   return ReplaceInstUsesWith(CI, New);
 }
 
-
-
 /// EvaluateInDifferentType - Given an expression that 
 /// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
 /// insert the code to evaluate the expression.
@@ -158,7 +157,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
     C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
     // If we got a constantexpr back, try to simplify it with TD info.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-      C = ConstantFoldConstantExpression(CE, TD);
+      C = ConstantFoldConstantExpression(CE, TD, TLI);
     return C;
   }
 
@@ -216,7 +215,6 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
   default: 
     // TODO: Can handle more cases here.
     llvm_unreachable("Unreachable!");
-    break;
   }
   
   Res->takeName(I);
@@ -528,9 +526,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
 
       return ReplaceInstUsesWith(CI, In);
     }
-      
-      
-      
+
     // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
     // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
     // zext (X == 1) to i32 --> X        iff X has only the low bit set.
@@ -545,8 +541,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
       // If Op1C some other power of two, convert:
       uint32_t BitWidth = Op1C->getType()->getBitWidth();
       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
-      ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+      ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
         
       APInt KnownZeroMask(~KnownZero);
       if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
@@ -594,9 +589,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
 
       APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
       APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
-      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
-      ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
-      ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+      ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS);
+      ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS);
 
       if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
         APInt KnownBits = KnownZeroLHS | KnownOneLHS;
@@ -915,8 +909,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
         ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
       unsigned BitWidth = Op1C->getType()->getBitWidth();
       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
-      ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne);
+      ComputeMaskedBits(Op0, KnownZero, KnownOne);
 
       APInt KnownZeroMask(~KnownZero);
       if (KnownZeroMask.isPowerOf2()) {
@@ -1163,6 +1156,9 @@ static Value *LookThroughFPExtensions(Value *V) {
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
     if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
       return V;  // No constant folding of this.
+    // See if the value can be truncated to half and then reextended.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf))
+      return V;
     // See if the value can be truncated to float and then reextended.
     if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
       return V;
@@ -1213,10 +1209,9 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   }
   
   // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
-  // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
   CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
-  if (Call && Call->getCalledFunction() &&
-      Call->getCalledFunction()->getName() == "sqrt" &&
+  if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) &&
+      Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) &&
       Call->getNumArgOperands() == 1 &&
       Call->hasOneUse()) {
     CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
@@ -1423,16 +1418,15 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
   // Now that the element types match, get the shuffle mask and RHS of the
   // shuffle to use, which depends on whether we're increasing or decreasing the
   // size of the input.
-  SmallVector<Constant*, 16> ShuffleMask;
+  SmallVector<uint32_t, 16> ShuffleMask;
   Value *V2;
-  IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
   
   if (SrcTy->getNumElements() > DestTy->getNumElements()) {
     // If we're shrinking the number of elements, just shuffle in the low
     // elements from the input and use undef as the second shuffle input.
     V2 = UndefValue::get(SrcTy);
     for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
-      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+      ShuffleMask.push_back(i);
     
   } else {
     // If we're increasing the number of elements, shuffle in all of the
@@ -1441,14 +1435,16 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
     V2 = Constant::getNullValue(SrcTy);
     unsigned SrcElts = SrcTy->getNumElements();
     for (unsigned i = 0, e = SrcElts; i != e; ++i)
-      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+      ShuffleMask.push_back(i);
 
     // The excess elements reference the first element of the zero input.
-    ShuffleMask.append(DestTy->getNumElements()-SrcElts,
-                       ConstantInt::get(Int32Ty, SrcElts));
+    for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
+      ShuffleMask.push_back(SrcElts);
   }
   
-  return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask));
+  return new ShuffleVectorInst(InVal, V2,
+                               ConstantDataVector::get(V2->getContext(),
+                                                       ShuffleMask));
 }
 
 static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index bb1cbfade34d..ab2987ff24cd 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -203,8 +203,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   // We need TD information to know the pointer size unless this is inbounds.
   if (!GEP->isInBounds() && TD == 0) return 0;
 
-  ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
-  if (Init == 0 || Init->getNumOperands() > 1024) return 0;
+  Constant *Init = GV->getInitializer();
+  if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
+    return 0;
+  
+  uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
+  if (ArrayElementCount > 1024) return 0;  // Don't blow up on huge arrays.
 
   // There are many forms of this optimization we can handle, for now, just do
   // the simple index into a single-dimensional array.
@@ -221,7 +225,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   // structs.
   SmallVector<unsigned, 4> LaterIndices;
 
-  Type *EltTy = cast<ArrayType>(Init->getType())->getElementType();
+  Type *EltTy = Init->getType()->getArrayElementType();
   for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
     ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
     if (Idx == 0) return 0;  // Variable index.
@@ -272,8 +276,9 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
 
   // Scan the array and see if one of our patterns matches.
   Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
-  for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
-    Constant *Elt = Init->getOperand(i);
+  for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
+    Constant *Elt = Init->getAggregateElement(i);
+    if (Elt == 0) return 0;
 
     // If this is indexing an array of structures, get the structure element.
     if (!LaterIndices.empty())
@@ -284,7 +289,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
 
     // Find out if the comparison would be true or false for the i'th element.
     Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
-                                                  CompareRHS, TD);
+                                                  CompareRHS, TD, TLI);
     // If the result is undef for this element, ignore it.
     if (isa<UndefValue>(C)) {
       // Extend range state machines to cover this element in case there is an
@@ -440,10 +445,10 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
   // of this load, replace it with computation that does:
   //   ((magic_cst >> i) & 1) != 0
-  if (Init->getNumOperands() <= 32 ||
-      (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) {
+  if (ArrayElementCount <= 32 ||
+      (TD && ArrayElementCount <= 64 && TD->isLegalInteger(64))) {
     Type *Ty;
-    if (Init->getNumOperands() <= 32)
+    if (ArrayElementCount <= 32)
       Ty = Type::getInt32Ty(Init->getContext());
     else
       Ty = Type::getInt64Ty(Init->getContext());
@@ -566,6 +571,14 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
 Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
                                        ICmpInst::Predicate Cond,
                                        Instruction &I) {
+  // Don't transform signed compares of GEPs into index compares. Even if the
+  // GEP is inbounds, the final add of the base pointer can have signed overflow
+  // and would change the result of the icmp.
+  // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
+  // the maximum signed value for the pointer type.
+  if (ICmpInst::isSigned(Cond))
+    return 0;
+
   // Look through bitcasts.
   if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
     RHS = BCI->getOperand(0);
@@ -602,6 +615,20 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
         return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
                             GEPLHS->getOperand(0), GEPRHS->getOperand(0));
 
+      // If we're comparing GEPs with two base pointers that only differ in type
+      // and both GEPs have only constant indices or just one use, then fold
+      // the compare with the adjusted indices.
+      if (TD && GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
+          (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
+          (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
+          PtrBase->stripPointerCasts() ==
+            GEPRHS->getOperand(0)->stripPointerCasts()) {
+        Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond),
+                                         EmitGEPOffset(GEPLHS),
+                                         EmitGEPOffset(GEPRHS));
+        return ReplaceInstUsesWith(I, Cmp);
+      }
+
       // Otherwise, the base pointers are different and the indices are
       // different, bail out.
       return 0;
@@ -1001,9 +1028,8 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       // of the high bits truncated out of x are known.
       unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
              SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
-      APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
       APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
-      ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
+      ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne);
 
       // If all the high bits are known, we can do this xform.
       if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
@@ -1657,6 +1683,14 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
       CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
     return 0;
 
+  // This is only really a signed overflow check if the inputs have been
+  // sign-extended; check for that condition. For example, if CI2 is 2^31 and
+  // the operands of the add are 64 bits wide, we need at least 33 sign bits.
+  unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
+  if (IC.ComputeNumSignBits(A) < NeededSignBits ||
+      IC.ComputeNumSignBits(B) < NeededSignBits)
+    return 0;
+
   // In order to replace the original add with a narrower
   // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
   // and truncates that discard the high bits of the add.  Verify that this is
@@ -1787,6 +1821,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
     return ReplaceInstUsesWith(I, V);
 
+  // comparing -val or val with non-zero is the same as just comparing val
+  // ie, abs(val) != 0 -> val != 0
+  if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero()))
+  {
+    Value *Cond, *SelectTrue, *SelectFalse;
+    if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
+                            m_Value(SelectFalse)))) {
+      if (Value *V = dyn_castNegVal(SelectTrue)) {
+        if (V == SelectFalse)
+          return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+      }
+      else if (Value *V = dyn_castNegVal(SelectFalse)) {
+        if (V == SelectTrue)
+          return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
+      }
+    }
+  }
+
   Type *Ty = Op0->getType();
 
   // icmp's with boolean values can always be turned into bitwise operations
@@ -2683,6 +2735,17 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
         return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
       return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
     }
+  } else {
+    // See if the RHS value is < UnsignedMin.
+    APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+    SMin.convertFromAPInt(APInt::getMinValue(IntWidth), true,
+                          APFloat::rmNearestTiesToEven);
+    if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // umin > 12312.0
+      if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_UGT ||
+          Pred == ICmpInst::ICMP_UGE)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    }
   }
 
   // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
@@ -2822,7 +2885,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
 
         const fltSemantics *Sem;
         // FIXME: This shouldn't be here.
-        if (LHSExt->getSrcTy()->isFloatTy())
+        if (LHSExt->getSrcTy()->isHalfTy())
+          Sem = &APFloat::IEEEhalf;
+        else if (LHSExt->getSrcTy()->isFloatTy())
           Sem = &APFloat::IEEEsingle;
         else if (LHSExt->getSrcTy()->isDoubleTy())
           Sem = &APFloat::IEEEdouble;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 7446a51a4db1..b2f2e248e417 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -22,6 +22,72 @@ using namespace llvm;
 
 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
 
+// Try to kill dead allocas by walking through its uses until we see some use
+// that could escape. This is a conservative analysis which tries to handle
+// GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things
+// left after inlining and SROA finish chewing on an alloca.
+static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) {
+  SmallVector<Instruction *, 4> Worklist, DeadStores;
+  Worklist.push_back(&AI);
+  do {
+    Instruction *PI = Worklist.pop_back_val();
+    for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end();
+         UI != UE; ++UI) {
+      Instruction *I = cast<Instruction>(*UI);
+      switch (I->getOpcode()) {
+      default:
+        // Give up the moment we see something we can't handle.
+        return 0;
+
+      case Instruction::GetElementPtr:
+      case Instruction::BitCast:
+        Worklist.push_back(I);
+        continue;
+
+      case Instruction::Call:
+        // We can handle a limited subset of calls to no-op intrinsics.
+        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+          switch (II->getIntrinsicID()) {
+          case Intrinsic::dbg_declare:
+          case Intrinsic::dbg_value:
+          case Intrinsic::invariant_start:
+          case Intrinsic::invariant_end:
+          case Intrinsic::lifetime_start:
+          case Intrinsic::lifetime_end:
+            continue;
+          default:
+            return 0;
+          }
+        }
+        // Reject everything else.
+        return 0;
+
+      case Instruction::Store: {
+        // Stores into the alloca are only live if the alloca is live.
+        StoreInst *SI = cast<StoreInst>(I);
+        // We can eliminate atomic stores, but not volatile.
+        if (SI->isVolatile())
+          return 0;
+        // The store is only trivially safe if the poniter is the destination
+        // as opposed to the value. We're conservative here and don't check for
+        // the case where we store the address of a dead alloca into a dead
+        // alloca.
+        if (SI->getPointerOperand() != PI)
+          return 0;
+        DeadStores.push_back(I);
+        continue;
+      }
+      }
+    }
+  } while (!Worklist.empty());
+
+  // The alloca is dead. Kill off all the stores to it, and then replace it
+  // with undef.
+  while (!DeadStores.empty())
+    IC.EraseInstFromFunction(*DeadStores.pop_back_val());
+  return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType()));
+}
+
 Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
   // Ensure that the alloca array size argument has type intptr_t, so that
   // any casting is exposed early.
@@ -81,7 +147,10 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
       AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
   }
 
-  return 0;
+  // Try to aggressively remove allocas which are only used for GEPs, lifetime
+  // markers, and stores. This happens when SROA iteratively promotes stores
+  // out of the alloca, and we need to cleanup after it.
+  return removeDeadAlloca(*this, AI);
 }
 
 
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 7f48125a97ab..5168e2a113ca 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -256,22 +256,18 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-  // Simplify mul instructions with a constant RHS...
+  // Simplify mul instructions with a constant RHS.
   if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
     if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
       // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
       // ANSI says we can drop signals, so we can do this anyway." (from GCC)
       if (Op1F->isExactlyValue(1.0))
         return ReplaceInstUsesWith(I, Op0);  // Eliminate 'fmul double %X, 1.0'
-    } else if (Op1C->getType()->isVectorTy()) {
-      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
-        // As above, vector X*splat(1.0) -> X in all defined cases.
-        if (Constant *Splat = Op1V->getSplatValue()) {
-          if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
-            if (F->isExactlyValue(1.0))
-              return ReplaceInstUsesWith(I, Op0);
-        }
-      }
+    } else if (ConstantDataVector *Op1V = dyn_cast<ConstantDataVector>(Op1C)) {
+      // As above, vector X*splat(1.0) -> X in all defined cases.
+      if (ConstantFP *F = dyn_cast_or_null<ConstantFP>(Op1V->getSplatValue()))
+        if (F->isExactlyValue(1.0))
+          return ReplaceInstUsesWith(I, Op0);
     }
 
     // Try to fold constant mul into select arguments.
@@ -441,19 +437,23 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   // Handle the integer div common cases
   if (Instruction *Common = commonIDivTransforms(I))
     return Common;
-
-  if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
+  
+  { 
     // X udiv 2^C -> X >> C
     // Check to see if this is an unsigned division with an exact power of 2,
     // if so, convert to a right shift.
-    if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2
+    const APInt *C;
+    if (match(Op1, m_Power2(C))) {
       BinaryOperator *LShr =
-        BinaryOperator::CreateLShr(Op0, 
-            ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+      BinaryOperator::CreateLShr(Op0, 
+                                 ConstantInt::get(Op0->getType(), 
+                                                  C->logBase2()));
       if (I.isExact()) LShr->setIsExact();
       return LShr;
     }
+  }
 
+  if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
     // X udiv C, where C >= signbit
     if (C->getValue().isNegative()) {
       Value *IC = Builder->CreateICmpULT(Op0, C);
@@ -684,28 +684,36 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   }
 
   // If it's a constant vector, flip any negative values positive.
-  if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
-    unsigned VWidth = RHSV->getNumOperands();
+  if (isa<ConstantVector>(Op1) || isa<ConstantDataVector>(Op1)) {
+    Constant *C = cast<Constant>(Op1);
+    unsigned VWidth = C->getType()->getVectorNumElements();
 
     bool hasNegative = false;
-    for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
-      if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
+    bool hasMissing = false;
+    for (unsigned i = 0; i != VWidth; ++i) {
+      Constant *Elt = C->getAggregateElement(i);
+      if (Elt == 0) {
+        hasMissing = true;
+        break;
+      }
+
+      if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elt))
         if (RHS->isNegative())
           hasNegative = true;
+    }
 
-    if (hasNegative) {
-      std::vector<Constant *> Elts(VWidth);
+    if (hasNegative && !hasMissing) {
+      SmallVector<Constant *, 16> Elts(VWidth);
       for (unsigned i = 0; i != VWidth; ++i) {
-        if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
+        Elts[i] = C->getAggregateElement(i);  // Handle undef, etc.
+        if (ConstantInt *RHS = dyn_cast<ConstantInt>(Elts[i])) {
           if (RHS->isNegative())
             Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
-          else
-            Elts[i] = RHS;
         }
       }
 
       Constant *NewRHSV = ConstantVector::get(Elts);
-      if (NewRHSV != RHSV) {
+      if (NewRHSV != C) {  // Don't loop on -MININT
         Worklist.AddValue(I.getOperand(1));
         I.setOperand(1, NewRHSV);
         return &I;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 91e60a4fb244..e727b2c592db 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -184,7 +184,6 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
       return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
   }
   llvm_unreachable("Shouldn't get here");
-  return 0;
 }
 
 static bool isSelect01(Constant *C1, Constant *C2) {
@@ -282,7 +281,8 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
 /// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
 /// replaced with RepOp.
 static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
-                                     const TargetData *TD) {
+                                     const TargetData *TD,
+                                     const TargetLibraryInfo *TLI) {
   // Trivial replacement.
   if (V == Op)
     return RepOp;
@@ -294,17 +294,19 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   // If this is a binary operator, try to simplify it with the replaced op.
   if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
     if (B->getOperand(0) == Op)
-      return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD);
+      return SimplifyBinOp(B->getOpcode(), RepOp, B->getOperand(1), TD, TLI);
     if (B->getOperand(1) == Op)
-      return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD);
+      return SimplifyBinOp(B->getOpcode(), B->getOperand(0), RepOp, TD, TLI);
   }
 
   // Same for CmpInsts.
   if (CmpInst *C = dyn_cast<CmpInst>(I)) {
     if (C->getOperand(0) == Op)
-      return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD);
+      return SimplifyCmpInst(C->getPredicate(), RepOp, C->getOperand(1), TD,
+                             TLI);
     if (C->getOperand(1) == Op)
-      return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD);
+      return SimplifyCmpInst(C->getPredicate(), C->getOperand(0), RepOp, TD,
+                             TLI);
   }
 
   // TODO: We could hand off more cases to instsimplify here.
@@ -330,7 +332,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
           return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
 
       return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
-                                      ConstOps, TD);
+                                      ConstOps, TD, TLI);
     }
   }
 
@@ -479,18 +481,18 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
   // arms of the select. See if substituting this value into the arm and
   // simplifying the result yields the same value as the other arm.
   if (Pred == ICmpInst::ICMP_EQ) {
-    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
-        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
       return ReplaceInstUsesWith(SI, FalseVal);
-    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
-        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
       return ReplaceInstUsesWith(SI, FalseVal);
   } else if (Pred == ICmpInst::ICMP_NE) {
-    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD) == FalseVal ||
-        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD) == FalseVal)
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, TD, TLI) == FalseVal ||
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, TD, TLI) == FalseVal)
       return ReplaceInstUsesWith(SI, TrueVal);
-    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD) == TrueVal ||
-        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD) == TrueVal)
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, TD, TLI) == TrueVal ||
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, TD, TLI) == TrueVal)
       return ReplaceInstUsesWith(SI, TrueVal);
   }
 
@@ -679,6 +681,13 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       return BinaryOperator::CreateOr(CondVal, FalseVal);
     else if (CondVal == FalseVal)
       return BinaryOperator::CreateAnd(CondVal, TrueVal);
+
+    // select a, ~a, b -> (~a)&b
+    // select a, b, ~a -> (~a)|b
+    if (match(TrueVal, m_Not(m_Specific(CondVal))))
+      return BinaryOperator::CreateAnd(TrueVal, FalseVal);
+    else if (match(FalseVal, m_Not(m_Specific(CondVal))))
+      return BinaryOperator::CreateOr(TrueVal, FalseVal);
   }
 
   // Selecting between two integer constants?
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 6d85adde9b85..b31049e59f18 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -190,7 +190,8 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       V = IC.Builder->CreateLShr(C, NumBits);
     // If we got a constantexpr back, try to simplify it with TD info.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+      V = ConstantFoldConstantExpression(CE, IC.getTargetData(),
+                                         IC.getTargetLibraryInfo());
     return V;
   }
   
@@ -198,7 +199,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
   IC.Worklist.Add(I);
 
   switch (I->getOpcode()) {
-  default: assert(0 && "Inconsistency with CanEvaluateShifted");
+  default: llvm_unreachable("Inconsistency with CanEvaluateShifted");
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
@@ -535,12 +536,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
     Value *X = ShiftOp->getOperand(0);
     
-    uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
-    
     IntegerType *Ty = cast<IntegerType>(I.getType());
     
     // Check for (X << c1) << c2  and  (X >> c1) >> c2
     if (I.getOpcode() == ShiftOp->getOpcode()) {
+      uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
       // If this is oversized composite shift, then unsigned shifts get 0, ashr
       // saturates.
       if (AmtSum >= TypeBits) {
@@ -576,7 +576,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           ShiftOp->getOpcode() != Instruction::Shl) {
         assert(ShiftOp->getOpcode() == Instruction::LShr ||
                ShiftOp->getOpcode() == Instruction::AShr);
-        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+        if (ShiftOp->isExact()) {
+          // (X >>?,exact C1) << C2 --> X << (C2-C1)
+          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+                                                          X, ShiftDiffCst);
+          NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+          NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
+          return NewShl;
+        }
+        Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
         
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
         return BinaryOperator::CreateAnd(Shift,
@@ -586,15 +595,34 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr &&
           ShiftOp->getOpcode() == Instruction::Shl) {
-        assert(ShiftOp->getOpcode() == Instruction::Shl);
-        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
+        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+        // (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
+        if (ShiftOp->hasNoUnsignedWrap()) {
+          BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr,
+                                                           X, ShiftDiffCst);
+          NewLShr->setIsExact(I.isExact());
+          return NewLShr;
+        }
+        Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
         return BinaryOperator::CreateAnd(Shift,
                                          ConstantInt::get(I.getContext(),Mask));
       }
-      
-      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
+
+      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
+      // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+      if (I.getOpcode() == Instruction::AShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        if (ShiftOp->hasNoSignedWrap()) {
+          // (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
+          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+          BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr,
+                                                           X, ShiftDiffCst);
+          NewAShr->setIsExact(I.isExact());
+          return NewAShr;
+        }
+      }
     } else {
       assert(ShiftAmt2 < ShiftAmt1);
       uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
@@ -602,9 +630,16 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
       if (I.getOpcode() == Instruction::Shl &&
           ShiftOp->getOpcode() != Instruction::Shl) {
-        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
-                                            ConstantInt::get(Ty, ShiftDiff));
-        
+        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+        if (ShiftOp->isExact()) {
+          // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+          BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
+                                                          X, ShiftDiffCst);
+          NewShr->setIsExact(true);
+          return NewShr;
+        }
+        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(),
+                                            X, ShiftDiffCst);
         APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
         return BinaryOperator::CreateAnd(Shift,
                                          ConstantInt::get(I.getContext(),Mask));
@@ -613,14 +648,34 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
       if (I.getOpcode() == Instruction::LShr &&
           ShiftOp->getOpcode() == Instruction::Shl) {
-        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+        if (ShiftOp->hasNoUnsignedWrap()) {
+          // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2)
+          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+                                                          X, ShiftDiffCst);
+          NewShl->setHasNoUnsignedWrap(true);
+          return NewShl;
+        }
+        Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
         
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
         return BinaryOperator::CreateAnd(Shift,
                                          ConstantInt::get(I.getContext(),Mask));
       }
       
-      // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
+      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
+      // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+      if (I.getOpcode() == Instruction::AShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        if (ShiftOp->hasNoSignedWrap()) {
+          // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2)
+          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
+                                                          X, ShiftDiffCst);
+          NewShl->setHasNoSignedWrap(true);
+          return NewShl;
+        }
+      }
     }
   }
   return 0;
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 5cd9a4b7954c..125c74a89a11 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -142,7 +142,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
 
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) {
-    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+    ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
     return 0;        // Only analyze instructions.
   }
 
@@ -156,10 +156,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // this instruction has a simpler value in that context.
     if (I->getOpcode() == Instruction::And) {
       // If either the LHS or the RHS are Zero, the result is zero.
-      ComputeMaskedBits(I->getOperand(1), DemandedMask,
-                        RHSKnownZero, RHSKnownOne, Depth+1);
-      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
-                        LHSKnownZero, LHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
       
       // If all of the demanded bits are known 1 on one side, return the other.
       // These bits cannot contribute to the result of the 'and' in this
@@ -180,10 +178,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       // only bits from X or Y are demanded.
       
       // If either the LHS or the RHS are One, the result is One.
-      ComputeMaskedBits(I->getOperand(1), DemandedMask, 
-                        RHSKnownZero, RHSKnownOne, Depth+1);
-      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, 
-                        LHSKnownZero, LHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
       
       // If all of the demanded bits are known zero on one side, return the
       // other.  These bits cannot contribute to the result of the 'or' in this
@@ -206,7 +202,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     }
     
     // Compute the KnownZero/KnownOne bits to simplify things downstream.
-    ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+    ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
     return 0;
   }
   
@@ -219,7 +215,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   
   switch (I->getOpcode()) {
   default:
-    ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+    ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
     break;
   case Instruction::And:
     // If either the LHS or the RHS are Zero, the result is zero.
@@ -567,9 +563,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
                                LHSKnownZero, LHSKnownOne, Depth+1))
         return I;
     }
+
     // Otherwise just hand the sub off to ComputeMaskedBits to fill in
     // the known zeros and ones.
-    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+    ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
+
+    // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
+    // zero.
+    if (ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(0))) {
+      APInt I0 = C0->getValue();
+      if ((I0 + 1).isPowerOf2() && (I0 | KnownZero).isAllOnesValue()) {
+        Instruction *Xor = BinaryOperator::CreateXor(I->getOperand(1), C0);
+        return InsertNewInstWith(Xor, *I);
+      }
+    }
     break;
   case Instruction::Shl:
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -671,8 +678,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || 
           (HighBits & ~DemandedMask) == HighBits) {
         // Perform the logical shift right.
-        Instruction *NewVal = BinaryOperator::CreateLShr(
-                          I->getOperand(0), SA, I->getName());
+        BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0),
+                                                            SA, I->getName());
+        NewVal->setIsExact(cast<BinaryOperator>(I)->isExact());
         return InsertNewInstWith(NewVal, *I);
       } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
         KnownOne |= HighBits;
@@ -717,10 +725,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // The sign bit is the LHS's sign bit, except when the result of the
     // remainder is zero.
     if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
-      APInt Mask2 = APInt::getSignBit(BitWidth);
       APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
-      ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne,
-                        Depth+1);
+      ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
       // If it's known zero, our sign bit is also zero.
       if (LHSKnownZero.isNegative())
         KnownZero |= LHSKnownZero;
@@ -783,7 +789,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         return 0;
       }
     }
-    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+    ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
     break;
   }
   
@@ -822,46 +828,39 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
   }
 
   UndefElts = 0;
-  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+  
+  // Handle ConstantAggregateZero, ConstantVector, ConstantDataSequential.
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    // Check if this is identity. If so, return 0 since we are not simplifying
+    // anything.
+    if (DemandedElts.isAllOnesValue())
+      return 0;
+
     Type *EltTy = cast<VectorType>(V->getType())->getElementType();
     Constant *Undef = UndefValue::get(EltTy);
-
-    std::vector<Constant*> Elts;
-    for (unsigned i = 0; i != VWidth; ++i)
+    
+    SmallVector<Constant*, 16> Elts;
+    for (unsigned i = 0; i != VWidth; ++i) {
       if (!DemandedElts[i]) {   // If not demanded, set to undef.
         Elts.push_back(Undef);
         UndefElts.setBit(i);
-      } else if (isa<UndefValue>(CV->getOperand(i))) {   // Already undef.
+        continue;
+      }
+      
+      Constant *Elt = C->getAggregateElement(i);
+      if (Elt == 0) return 0;
+      
+      if (isa<UndefValue>(Elt)) {   // Already undef.
         Elts.push_back(Undef);
         UndefElts.setBit(i);
       } else {                               // Otherwise, defined.
-        Elts.push_back(CV->getOperand(i));
+        Elts.push_back(Elt);
       }
-
-    // If we changed the constant, return it.
-    Constant *NewCP = ConstantVector::get(Elts);
-    return NewCP != CV ? NewCP : 0;
-  }
-  
-  if (isa<ConstantAggregateZero>(V)) {
-    // Simplify the CAZ to a ConstantVector where the non-demanded elements are
-    // set to undef.
-    
-    // Check if this is identity. If so, return 0 since we are not simplifying
-    // anything.
-    if (DemandedElts.isAllOnesValue())
-      return 0;
-    
-    Type *EltTy = cast<VectorType>(V->getType())->getElementType();
-    Constant *Zero = Constant::getNullValue(EltTy);
-    Constant *Undef = UndefValue::get(EltTy);
-    std::vector<Constant*> Elts;
-    for (unsigned i = 0; i != VWidth; ++i) {
-      Constant *Elt = DemandedElts[i] ? Zero : Undef;
-      Elts.push_back(Elt);
     }
-    UndefElts = DemandedElts ^ EltMask;
-    return ConstantVector::get(Elts);
+    
+    // If we changed the constant, return it.
+    Constant *NewCV = ConstantVector::get(Elts);
+    return NewCV != C ? NewCV : 0;
   }
   
   // Limit search depth.
@@ -977,7 +976,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
     if (NewUndefElts) {
       // Add additional discovered undefs.
-      std::vector<Constant*> Elts;
+      SmallVector<Constant*, 16> Elts;
       for (unsigned i = 0; i < VWidth; ++i) {
         if (UndefElts[i])
           Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 154267c03465..cf60f0f426dc 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -16,16 +16,16 @@
 using namespace llvm;
 
 /// CheapToScalarize - Return true if the value is cheaper to scalarize than it
-/// is to leave as a vector operation.
+/// is to leave as a vector operation.  isConstant indicates whether we're
+/// extracting one known element.  If false we're extracting a variable index.
 static bool CheapToScalarize(Value *V, bool isConstant) {
-  if (isa<ConstantAggregateZero>(V))
-    return true;
-  if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
     if (isConstant) return true;
-    // If all elts are the same, we can extract.
-    Constant *Op0 = C->getOperand(0);
-    for (unsigned i = 1; i < C->getNumOperands(); ++i)
-      if (C->getOperand(i) != Op0)
+
+    // If all elts are the same, we can extract it and use any of the values.
+    Constant *Op0 = C->getAggregateElement(0U);
+    for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
+      if (C->getAggregateElement(i) != Op0)
         return false;
     return true;
   }
@@ -53,41 +53,18 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
   return false;
 }
 
-/// getShuffleMask - Read and decode a shufflevector mask.
-/// Turn undef elements into negative values.
-static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) {
-  unsigned NElts = SVI->getType()->getNumElements();
-  if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
-    return std::vector<int>(NElts, 0);
-  if (isa<UndefValue>(SVI->getOperand(2)))
-    return std::vector<int>(NElts, -1);
-
-  std::vector<int> Result;
-  const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
-  for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
-    if (isa<UndefValue>(*i))
-      Result.push_back(-1);  // undef
-    else
-      Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
-  return Result;
-}
-
 /// FindScalarElement - Given a vector and an element number, see if the scalar
 /// value is already around as a register, for example if it were inserted then
 /// extracted from the vector.
 static Value *FindScalarElement(Value *V, unsigned EltNo) {
   assert(V->getType()->isVectorTy() && "Not looking at a vector?");
-  VectorType *PTy = cast<VectorType>(V->getType());
-  unsigned Width = PTy->getNumElements();
+  VectorType *VTy = cast<VectorType>(V->getType());
+  unsigned Width = VTy->getNumElements();
   if (EltNo >= Width)  // Out of range access.
-    return UndefValue::get(PTy->getElementType());
+    return UndefValue::get(VTy->getElementType());
 
-  if (isa<UndefValue>(V))
-    return UndefValue::get(PTy->getElementType());
-  if (isa<ConstantAggregateZero>(V))
-    return Constant::getNullValue(PTy->getElementType());
-  if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
-    return CP->getOperand(EltNo);
+  if (Constant *C = dyn_cast<Constant>(V))
+    return C->getAggregateElement(EltNo);
 
   if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert to a variable element, we don't know what it is.
@@ -106,11 +83,10 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
   }
 
   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
-    unsigned LHSWidth =
-      cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
-    int InEl = getShuffleMask(SVI)[EltNo];
+    unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
+    int InEl = SVI->getMaskValue(EltNo);
     if (InEl < 0)
-      return UndefValue::get(PTy->getElementType());
+      return UndefValue::get(VTy->getElementType());
     if (InEl < (int)LHSWidth)
       return FindScalarElement(SVI->getOperand(0), InEl);
     return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
@@ -121,27 +97,11 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
 }
 
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
-  // If vector val is undef, replace extract with scalar undef.
-  if (isa<UndefValue>(EI.getOperand(0)))
-    return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
-
-  // If vector val is constant 0, replace extract with scalar 0.
-  if (isa<ConstantAggregateZero>(EI.getOperand(0)))
-    return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
-
-  if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
-    // If vector val is constant with all elements the same, replace EI with
-    // that element. When the elements are not identical, we cannot replace yet
-    // (we do that below, but only when the index is constant).
-    Constant *op0 = C->getOperand(0);
-    for (unsigned i = 1; i != C->getNumOperands(); ++i)
-      if (C->getOperand(i) != op0) {
-        op0 = 0;
-        break;
-      }
-    if (op0)
-      return ReplaceInstUsesWith(EI, op0);
-  }
+  // If vector val is constant with all elements the same, replace EI with
+  // that element.  We handle a known element # below.
+  if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
+    if (CheapToScalarize(C, false))
+      return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
 
   // If extracting a specified index from the vector, see if we can recursively
   // find a previously computed scalar that was inserted into the vector.
@@ -175,8 +135,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
     // the same number of elements, see if we can find the source element from
     // it.  In this case, we will end up needing to bitcast the scalars.
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
-      if (VectorType *VT =
-          dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+      if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
         if (VT->getNumElements() == VectorWidth)
           if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
             return new BitCastInst(Elt, EI.getType());
@@ -212,10 +171,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       // If this is extracting an element from a shufflevector, figure out where
       // it came from and extract from the appropriate input element instead.
       if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
-        int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+        int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
         Value *Src;
         unsigned LHSWidth =
-          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+          SVI->getOperand(0)->getType()->getVectorNumElements();
 
         if (SrcIdx < 0)
           return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
@@ -248,7 +207,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
 /// elements from either LHS or RHS, return the shuffle mask and true.
 /// Otherwise, return false.
 static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
-                                         std::vector<Constant*> &Mask) {
+                                         SmallVectorImpl<Constant*> &Mask) {
   assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
          "Invalid CollectSingleShuffleElements");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
@@ -325,7 +284,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
 /// CollectShuffleElements - We are building a shuffle of V, using RHS as the
 /// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
 /// that computes V and the LHS value of the shuffle.
-static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
+static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
                                      Value *&RHS) {
   assert(V->getType()->isVectorTy() &&
          (RHS == 0 || V->getType() == RHS->getType()) &&
@@ -335,10 +294,14 @@ static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
   if (isa<UndefValue>(V)) {
     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
     return V;
-  } else if (isa<ConstantAggregateZero>(V)) {
+  }
+  
+  if (isa<ConstantAggregateZero>(V)) {
     Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
     return V;
-  } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+  }
+  
+  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
     // If this is an insert of an extract from some other vector, include it.
     Value *VecOp    = IEI->getOperand(0);
     Value *ScalarOp = IEI->getOperand(1);
@@ -421,7 +384,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
       // If this insertelement isn't used by some other insertelement, turn it
       // (and any insertelements it points to), into one big shuffle.
       if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
-        std::vector<Constant*> Mask;
+        SmallVector<Constant*, 16> Mask;
         Value *RHS = 0;
         Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
         if (RHS == 0) RHS = UndefValue::get(LHS->getType());
@@ -447,7 +410,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   Value *LHS = SVI.getOperand(0);
   Value *RHS = SVI.getOperand(1);
-  std::vector<int> Mask = getShuffleMask(&SVI);
+  SmallVector<int, 16> Mask = SVI.getShuffleMask();
 
   bool MadeChange = false;
 
@@ -457,9 +420,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
 
   unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
 
-  if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
-    return 0;
-
   APInt UndefElts(VWidth, 0);
   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
   if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
@@ -470,29 +430,34 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     MadeChange = true;
   }
 
+  unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
+
   // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
   // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
   if (LHS == RHS || isa<UndefValue>(LHS)) {
     if (isa<UndefValue>(LHS) && LHS == RHS) {
       // shuffle(undef,undef,mask) -> undef.
-      return ReplaceInstUsesWith(SVI, LHS);
+      Value* result = (VWidth == LHSWidth)
+                      ? LHS : UndefValue::get(SVI.getType());
+      return ReplaceInstUsesWith(SVI, result);
     }
 
     // Remap any references to RHS to use LHS.
-    std::vector<Constant*> Elts;
-    for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
-      if (Mask[i] < 0)
+    SmallVector<Constant*, 16> Elts;
+    for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
+      if (Mask[i] < 0) {
         Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
-      else {
-        if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
-            (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
-          Mask[i] = -1;     // Turn into undef.
-          Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
-        } else {
-          Mask[i] = Mask[i] % e;  // Force to LHS.
-          Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
-                                          Mask[i]));
-        }
+        continue;
+      }
+
+      if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
+          (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
+        Mask[i] = -1;     // Turn into undef.
+        Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+      } else {
+        Mask[i] = Mask[i] % e;  // Force to LHS.
+        Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
+                                        Mask[i]));
       }
     }
     SVI.setOperand(0, SVI.getOperand(1));
@@ -503,72 +468,204 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     MadeChange = true;
   }
 
-  // Analyze the shuffle, are the LHS or RHS and identity shuffles?
-  bool isLHSID = true, isRHSID = true;
+  if (VWidth == LHSWidth) {
+    // Analyze the shuffle, are the LHS or RHS and identity shuffles?
+    bool isLHSID = true, isRHSID = true;
 
-  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
-    if (Mask[i] < 0) continue;  // Ignore undef values.
-    // Is this an identity shuffle of the LHS value?
-    isLHSID &= (Mask[i] == (int)i);
+    for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+      if (Mask[i] < 0) continue;  // Ignore undef values.
+      // Is this an identity shuffle of the LHS value?
+      isLHSID &= (Mask[i] == (int)i);
 
-    // Is this an identity shuffle of the RHS value?
-    isRHSID &= (Mask[i]-e == i);
-  }
+      // Is this an identity shuffle of the RHS value?
+      isRHSID &= (Mask[i]-e == i);
+    }
 
-  // Eliminate identity shuffles.
-  if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
-  if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+    // Eliminate identity shuffles.
+    if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+    if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+  }
 
   // If the LHS is a shufflevector itself, see if we can combine it with this
-  // one without producing an unusual shuffle.  Here we are really conservative:
+  // one without producing an unusual shuffle.
+  // Cases that might be simplified:
+  // 1.
+  // x1=shuffle(v1,v2,mask1)
+  //  x=shuffle(x1,undef,mask)
+  //        ==>
+  //  x=shuffle(v1,undef,newMask)
+  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
+  // 2.
+  // x1=shuffle(v1,undef,mask1)
+  //  x=shuffle(x1,x2,mask)
+  // where v1.size() == mask1.size()
+  //        ==>
+  //  x=shuffle(v1,x2,newMask)
+  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
+  // 3.
+  // x2=shuffle(v2,undef,mask2)
+  //  x=shuffle(x1,x2,mask)
+  // where v2.size() == mask2.size()
+  //        ==>
+  //  x=shuffle(x1,v2,newMask)
+  // newMask[i] = (mask[i] < x1.size())
+  //              ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
+  // 4.
+  // x1=shuffle(v1,undef,mask1)
+  // x2=shuffle(v2,undef,mask2)
+  //  x=shuffle(x1,x2,mask)
+  // where v1.size() == v2.size()
+  //        ==>
+  //  x=shuffle(v1,v2,newMask)
+  // newMask[i] = (mask[i] < x1.size())
+  //              ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
+  //
+  // Here we are really conservative:
   // we are absolutely afraid of producing a shuffle mask not in the input
   // program, because the code gen may not be smart enough to turn a merged
   // shuffle into two specific shuffles: it may produce worse code.  As such,
   // we only merge two shuffles if the result is either a splat or one of the
-  // two input shuffle masks.  In this case, merging the shuffles just removes
+  // input shuffle masks.  In this case, merging the shuffles just removes
   // one instruction, which we know is safe.  This is good for things like
-  // turning: (splat(splat)) -> splat.
-  if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
+  // turning: (splat(splat)) -> splat, or
+  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
+  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
+  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
+  if (LHSShuffle)
+    if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
+      LHSShuffle = NULL;
+  if (RHSShuffle)
+    if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
+      RHSShuffle = NULL;
+  if (!LHSShuffle && !RHSShuffle)
+    return MadeChange ? &SVI : 0;
+
+  Value* LHSOp0 = NULL;
+  Value* LHSOp1 = NULL;
+  Value* RHSOp0 = NULL;
+  unsigned LHSOp0Width = 0;
+  unsigned RHSOp0Width = 0;
+  if (LHSShuffle) {
+    LHSOp0 = LHSShuffle->getOperand(0);
+    LHSOp1 = LHSShuffle->getOperand(1);
+    LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
+  }
+  if (RHSShuffle) {
+    RHSOp0 = RHSShuffle->getOperand(0);
+    RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
+  }
+  Value* newLHS = LHS;
+  Value* newRHS = RHS;
+  if (LHSShuffle) {
+    // case 1
     if (isa<UndefValue>(RHS)) {
-      std::vector<int> LHSMask = getShuffleMask(LHSSVI);
-
-      if (LHSMask.size() == Mask.size()) {
-        std::vector<int> NewMask;
-        bool isSplat = true;
-        int SplatElt = -1; // undef
-        for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
-          int MaskElt;
-          if (Mask[i] < 0 || Mask[i] >= (int)e)
-            MaskElt = -1; // undef
-          else
-            MaskElt = LHSMask[Mask[i]];
-          // Check if this could still be a splat.
-          if (MaskElt >= 0) {
-            if (SplatElt >=0 && SplatElt != MaskElt)
-              isSplat = false;
-            SplatElt = MaskElt;
-          }
-          NewMask.push_back(MaskElt);
-        }
+      newLHS = LHSOp0;
+      newRHS = LHSOp1;
+    }
+    // case 2 or 4
+    else if (LHSOp0Width == LHSWidth) {
+      newLHS = LHSOp0;
+    }
+  }
+  // case 3 or 4
+  if (RHSShuffle && RHSOp0Width == LHSWidth) {
+    newRHS = RHSOp0;
+  }
+  // case 4
+  if (LHSOp0 == RHSOp0) {
+    newLHS = LHSOp0;
+    newRHS = NULL;
+  }
 
-        // If the result mask is equal to the src shuffle or this
-        // shuffle mask, do the replacement.
-        if (isSplat || NewMask == LHSMask || NewMask == Mask) {
-          std::vector<Constant*> Elts;
-          Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
-          for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
-            if (NewMask[i] < 0) {
-              Elts.push_back(UndefValue::get(Int32Ty));
-            } else {
-              Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i]));
-            }
-          }
-          return new ShuffleVectorInst(LHSSVI->getOperand(0),
-                                       LHSSVI->getOperand(1),
-                                       ConstantVector::get(Elts));
+  if (newLHS == LHS && newRHS == RHS)
+    return MadeChange ? &SVI : 0;
+
+  SmallVector<int, 16> LHSMask;
+  SmallVector<int, 16> RHSMask;
+  if (newLHS != LHS)
+    LHSMask = LHSShuffle->getShuffleMask();
+  if (RHSShuffle && newRHS != RHS)
+    RHSMask = RHSShuffle->getShuffleMask();
+
+  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
+  SmallVector<int, 16> newMask;
+  bool isSplat = true;
+  int SplatElt = -1;
+  // Create a new mask for the new ShuffleVectorInst so that the new
+  // ShuffleVectorInst is equivalent to the original one.
+  for (unsigned i = 0; i < VWidth; ++i) {
+    int eltMask;
+    if (Mask[i] == -1) {
+      // This element is an undef value.
+      eltMask = -1;
+    } else if (Mask[i] < (int)LHSWidth) {
+      // This element is from left hand side vector operand.
+      // 
+      // If LHS is going to be replaced (case 1, 2, or 4), calculate the
+      // new mask value for the element.
+      if (newLHS != LHS) {
+        eltMask = LHSMask[Mask[i]];
+        // If the value selected is an undef value, explicitly specify it
+        // with a -1 mask value.
+        if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
+          eltMask = -1;
+      }
+      else
+        eltMask = Mask[i];
+    } else {
+      // This element is from right hand side vector operand
+      //
+      // If the value selected is an undef value, explicitly specify it
+      // with a -1 mask value. (case 1)
+      if (isa<UndefValue>(RHS))
+        eltMask = -1;
+      // If RHS is going to be replaced (case 3 or 4), calculate the
+      // new mask value for the element.
+      else if (newRHS != RHS) {
+        eltMask = RHSMask[Mask[i]-LHSWidth];
+        // If the value selected is an undef value, explicitly specify it
+        // with a -1 mask value.
+        if (eltMask >= (int)RHSOp0Width) {
+          assert(isa<UndefValue>(RHSShuffle->getOperand(1))
+                 && "should have been check above");
+          eltMask = -1;
         }
       }
+      else
+        eltMask = Mask[i]-LHSWidth;
+
+      // If LHS's width is changed, shift the mask value accordingly.
+      // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
+      // references to RHSOp0 to LHSOp0, so we don't need to shift the mask.
+      if (eltMask >= 0 && newRHS != NULL)
+        eltMask += newLHSWidth;
+    }
+
+    // Check if this could still be a splat.
+    if (eltMask >= 0) {
+      if (SplatElt >= 0 && SplatElt != eltMask)
+        isSplat = false;
+      SplatElt = eltMask;
+    }
+
+    newMask.push_back(eltMask);
+  }
+
+  // If the result mask is equal to one of the original shuffle masks,
+  // or is a splat, do the replacement.
+  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
+    SmallVector<Constant*, 16> Elts;
+    Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
+    for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
+      if (newMask[i] < 0) {
+        Elts.push_back(UndefValue::get(Int32Ty));
+      } else {
+        Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
+      }
     }
+    if (newRHS == NULL)
+      newRHS = UndefValue::get(newLHS->getType());
+    return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
   }
 
   return MadeChange ? &SVI : 0;
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 32009c39ec25..99a02fc0df3f 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -55,9 +55,9 @@ public:
     Worklist.reserve(NumEntries+16);
     WorklistMap.resize(NumEntries);
     DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
-    for (; NumEntries; --NumEntries) {
+    for (unsigned Idx = 0; NumEntries; --NumEntries) {
       Instruction *I = List[NumEntries-1];
-      WorklistMap.insert(std::make_pair(I, Worklist.size()));
+      WorklistMap.insert(std::make_pair(I, Idx++));
       Worklist.push_back(I);
     }
   }
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index c15b8058f292..066b2ec89c3e 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
@@ -74,11 +75,15 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
 }
 
 char InstCombiner::ID = 0;
-INITIALIZE_PASS(InstCombiner, "instcombine",
+INITIALIZE_PASS_BEGIN(InstCombiner, "instcombine",
+                "Combine redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstCombiner, "instcombine",
                 "Combine redundant instructions", false, false)
 
 void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
+  AU.addRequired<TargetLibraryInfo>();
 }
 
 
@@ -490,7 +495,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
   if (ConstantInt *C = dyn_cast<ConstantInt>(V))
     return ConstantExpr::getNeg(C);
 
-  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+  if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
     if (C->getType()->getElementType()->isIntegerTy())
       return ConstantExpr::getNeg(C);
 
@@ -509,7 +514,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V) const {
   if (ConstantFP *C = dyn_cast<ConstantFP>(V))
     return ConstantExpr::getFNeg(C);
 
-  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+  if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V))
     if (C->getType()->getElementType()->isFloatingPointTy())
       return ConstantExpr::getFNeg(C);
 
@@ -826,7 +831,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           MadeChange = true;
         }
 
-      if ((*I)->getType() != IntPtrTy) {
+      Type *IndexTy = (*I)->getType();
+      if (IndexTy != IntPtrTy && !IndexTy->isVectorTy()) {
         // If we are using a wider index than needed for this platform, shrink
         // it to what we need.  If narrower, sign-extend it to what we need.
         // This explicit cast can make subsequent optimizations more obvious.
@@ -909,7 +915,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 
   // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
   Value *StrippedPtr = PtrOp->stripPointerCasts();
-  PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+  PointerType *StrippedPtrTy = dyn_cast<PointerType>(StrippedPtr->getType());
+
+  // We do not handle pointer-vector geps here.
+  if (!StrippedPtrTy)
+    return 0;
+
   if (StrippedPtr != PtrOp &&
     StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) {
 
@@ -1235,15 +1246,15 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
     if (I->getOpcode() == Instruction::Add)
       if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
         // change 'switch (X+4) case 1:' into 'switch (X) case -3'
-        unsigned NumCases = SI.getNumCases();
         // Skip the first item since that's the default case.
-        for (unsigned i = 1; i < NumCases; ++i) {
-          ConstantInt* CaseVal = SI.getCaseValue(i);
+        for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+             i != e; ++i) {
+          ConstantInt* CaseVal = i.getCaseValue();
           Constant* NewCaseVal = ConstantExpr::getSub(cast<Constant>(CaseVal),
                                                       AddRHS);
           assert(isa<ConstantInt>(NewCaseVal) &&
                  "Result of expression should be constant");
-          SI.setSuccessorValue(i, cast<ConstantInt>(NewCaseVal));
+          i.setValue(cast<ConstantInt>(NewCaseVal));
         }
         SI.setCondition(I->getOperand(0));
         Worklist.Add(I);
@@ -1260,24 +1271,16 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
     return ReplaceInstUsesWith(EV, Agg);
 
   if (Constant *C = dyn_cast<Constant>(Agg)) {
-    if (isa<UndefValue>(C))
-      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
-      
-    if (isa<ConstantAggregateZero>(C))
-      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
-
-    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
-      // Extract the element indexed by the first index out of the constant
-      Value *V = C->getOperand(*EV.idx_begin());
-      if (EV.getNumIndices() > 1)
-        // Extract the remaining indices out of the constant indexed by the
-        // first index
-        return ExtractValueInst::Create(V, EV.getIndices().slice(1));
-      else
-        return ReplaceInstUsesWith(EV, V);
+    if (Constant *C2 = C->getAggregateElement(*EV.idx_begin())) {
+      if (EV.getNumIndices() == 0)
+        return ReplaceInstUsesWith(EV, C2);
+      // Extract the remaining indices out of the constant indexed by the
+      // first index
+      return ExtractValueInst::Create(C2, EV.getIndices().slice(1));
     }
     return 0; // Can't handle other constants
-  } 
+  }
+  
   if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
     // We're extracting from an insertvalue instruction, compare the indices
     const unsigned *exti, *exte, *insi, *inse;
@@ -1414,7 +1417,8 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
 enum Personality_Type {
   Unknown_Personality,
   GNU_Ada_Personality,
-  GNU_CXX_Personality
+  GNU_CXX_Personality,
+  GNU_ObjC_Personality
 };
 
 /// RecognizePersonality - See if the given exception handling personality
@@ -1426,7 +1430,8 @@ static Personality_Type RecognizePersonality(Value *Pers) {
     return Unknown_Personality;
   return StringSwitch<Personality_Type>(F->getName())
     .Case("__gnat_eh_personality", GNU_Ada_Personality)
-    .Case("__gxx_personality_v0", GNU_CXX_Personality)
+    .Case("__gxx_personality_v0",  GNU_CXX_Personality)
+    .Case("__objc_personality_v0", GNU_ObjC_Personality)
     .Default(Unknown_Personality);
 }
 
@@ -1440,6 +1445,7 @@ static bool isCatchAll(Personality_Type Personality, Constant *TypeInfo) {
     // match foreign exceptions (or didn't, before gcc-4.7).
     return false;
   case GNU_CXX_Personality:
+  case GNU_ObjC_Personality:
     return TypeInfo->isNullValue();
   }
   llvm_unreachable("Unknown personality!");
@@ -1795,7 +1801,8 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
 static bool AddReachableCodeToWorklist(BasicBlock *BB, 
                                        SmallPtrSet<BasicBlock*, 64> &Visited,
                                        InstCombiner &IC,
-                                       const TargetData *TD) {
+                                       const TargetData *TD,
+                                       const TargetLibraryInfo *TLI) {
   bool MadeIRChange = false;
   SmallVector<BasicBlock*, 256> Worklist;
   Worklist.push_back(BB);
@@ -1822,7 +1829,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
       
       // ConstantProp instruction if trivially constant.
       if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
-        if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+        if (Constant *C = ConstantFoldInstruction(Inst, TD, TLI)) {
           DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
                        << *Inst << '\n');
           Inst->replaceAllUsesWith(C);
@@ -1840,7 +1847,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
 
           Constant*& FoldRes = FoldedConstants[CE];
           if (!FoldRes)
-            FoldRes = ConstantFoldConstantExpression(CE, TD);
+            FoldRes = ConstantFoldConstantExpression(CE, TD, TLI);
           if (!FoldRes)
             FoldRes = CE;
 
@@ -1867,15 +1874,16 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
       if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
         // See if this is an explicit destination.
-        for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
-          if (SI->getCaseValue(i) == Cond) {
-            BasicBlock *ReachableBB = SI->getSuccessor(i);
+        for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+             i != e; ++i)
+          if (i.getCaseValue() == Cond) {
+            BasicBlock *ReachableBB = i.getCaseSuccessor();
             Worklist.push_back(ReachableBB);
             continue;
           }
         
         // Otherwise it is the default destination.
-        Worklist.push_back(SI->getSuccessor(0));
+        Worklist.push_back(SI->getDefaultDest());
         continue;
       }
     }
@@ -1899,14 +1907,15 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
   MadeIRChange = false;
   
   DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
-        << F.getNameStr() << "\n");
+               << F.getName() << "\n");
 
   {
     // Do a depth-first traversal of the function, populate the worklist with
     // the reachable instructions.  Ignore blocks that are not reachable.  Keep
     // track of which blocks we visit.
     SmallPtrSet<BasicBlock*, 64> Visited;
-    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD,
+                                               TLI);
 
     // Do a quick scan over the function.  If we find any blocks that are
     // unreachable, remove any instructions inside of them.  This prevents
@@ -1951,7 +1960,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 
     // Instruction isn't dead, see if we can constant propagate it.
     if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
-      if (Constant *C = ConstantFoldInstruction(I, TD)) {
+      if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
         DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
 
         // Add operands to the worklist.
@@ -2059,7 +2068,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
 
 bool InstCombiner::runOnFunction(Function &F) {
   TD = getAnalysisIfAvailable<TargetData>();
-
+  TLI = &getAnalysis<TargetLibraryInfo>();
   
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.
diff --git a/lib/Transforms/InstCombine/LLVMBuild.txt b/lib/Transforms/InstCombine/LLVMBuild.txt
new file mode 100644
index 000000000000..62c616160cfa
--- /dev/null
+++ b/lib/Transforms/InstCombine/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Transforms/InstCombine/LLVMBuild.txt ---------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = InstCombine
+parent = Transforms
+required_libraries = Analysis Core Support Target TransformUtils
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
new file mode 100644
index 000000000000..b43b9e5facee
--- /dev/null
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -0,0 +1,937 @@
+//===-- AddressSanitizer.cpp - memory error detector ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of AddressSanitizer, an address sanity checker.
+// Details of the algorithm:
+//  http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asan"
+
+#include "FunctionBlackList.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Type.h"
+
+#include <string>
+#include <algorithm>
+
+using namespace llvm;
+
+static const uint64_t kDefaultShadowScale = 3;
+static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
+static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
+
+static const size_t kMaxStackMallocSize = 1 << 16;  // 64K
+static const uintptr_t kCurrentStackFrameMagic = 0x41B58AB3;
+static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E;
+
+static const char *kAsanModuleCtorName = "asan.module_ctor";
+static const char *kAsanModuleDtorName = "asan.module_dtor";
+static const int   kAsanCtorAndCtorPriority = 1;
+static const char *kAsanReportErrorTemplate = "__asan_report_";
+static const char *kAsanRegisterGlobalsName = "__asan_register_globals";
+static const char *kAsanUnregisterGlobalsName = "__asan_unregister_globals";
+static const char *kAsanInitName = "__asan_init";
+static const char *kAsanHandleNoReturnName = "__asan_handle_no_return";
+static const char *kAsanMappingOffsetName = "__asan_mapping_offset";
+static const char *kAsanMappingScaleName = "__asan_mapping_scale";
+static const char *kAsanStackMallocName = "__asan_stack_malloc";
+static const char *kAsanStackFreeName = "__asan_stack_free";
+
+static const int kAsanStackLeftRedzoneMagic = 0xf1;
+static const int kAsanStackMidRedzoneMagic = 0xf2;
+static const int kAsanStackRightRedzoneMagic = 0xf3;
+static const int kAsanStackPartialRedzoneMagic = 0xf4;
+
+// Command-line flags.
+
+// This flag may need to be replaced with -f[no-]asan-reads.
+static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
+       cl::desc("instrument read instructions"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClInstrumentWrites("asan-instrument-writes",
+       cl::desc("instrument write instructions"), cl::Hidden, cl::init(true));
+// This flag may need to be replaced with -f[no]asan-stack.
+static cl::opt<bool> ClStack("asan-stack",
+       cl::desc("Handle stack memory"), cl::Hidden, cl::init(true));
+// This flag may need to be replaced with -f[no]asan-use-after-return.
+static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
+       cl::desc("Check return-after-free"), cl::Hidden, cl::init(false));
+// This flag may need to be replaced with -f[no]asan-globals.
+static cl::opt<bool> ClGlobals("asan-globals",
+       cl::desc("Handle global objects"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClMemIntrin("asan-memintrin",
+       cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
+// This flag may need to be replaced with -fasan-blacklist.
+static cl::opt<std::string>  ClBlackListFile("asan-blacklist",
+       cl::desc("File containing the list of functions to ignore "
+                "during instrumentation"), cl::Hidden);
+
+// These flags allow to change the shadow mapping.
+// The shadow mapping looks like
+//    Shadow = (Mem >> scale) + (1 << offset_log)
+static cl::opt<int> ClMappingScale("asan-mapping-scale",
+       cl::desc("scale of asan shadow mapping"), cl::Hidden, cl::init(0));
+static cl::opt<int> ClMappingOffsetLog("asan-mapping-offset-log",
+       cl::desc("offset of asan shadow mapping"), cl::Hidden, cl::init(-1));
+
+// Optimization flags. Not user visible, used mostly for testing
+// and benchmarking the tool.
+static cl::opt<bool> ClOpt("asan-opt",
+       cl::desc("Optimize instrumentation"), cl::Hidden, cl::init(true));
+static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp",
+       cl::desc("Instrument the same temp just once"), cl::Hidden,
+       cl::init(true));
+static cl::opt<bool> ClOptGlobals("asan-opt-globals",
+       cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true));
+
+// Debug flags.
+static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
+                            cl::init(0));
+static cl::opt<int> ClDebugStack("asan-debug-stack", cl::desc("debug stack"),
+                                 cl::Hidden, cl::init(0));
+static cl::opt<std::string> ClDebugFunc("asan-debug-func",
+                                        cl::Hidden, cl::desc("Debug func"));
+static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
+                               cl::Hidden, cl::init(-1));
+static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
+                               cl::Hidden, cl::init(-1));
+
+namespace {
+
+/// AddressSanitizer: instrument the code in module to find memory bugs.
+struct AddressSanitizer : public ModulePass {
+  AddressSanitizer();
+  virtual const char *getPassName() const;
+  void instrumentMop(Instruction *I);
+  void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
+                         Value *Addr, uint32_t TypeSize, bool IsWrite);
+  Instruction *generateCrashCode(IRBuilder<> &IRB, Value *Addr,
+                                 bool IsWrite, uint32_t TypeSize);
+  bool instrumentMemIntrinsic(MemIntrinsic *MI);
+  void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
+                                  Value *Size,
+                                   Instruction *InsertBefore, bool IsWrite);
+  Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
+  bool handleFunction(Module &M, Function &F);
+  bool maybeInsertAsanInitAtFunctionEntry(Function &F);
+  bool poisonStackInFunction(Module &M, Function &F);
+  virtual bool runOnModule(Module &M);
+  bool insertGlobalRedzones(Module &M);
+  BranchInst *splitBlockAndInsertIfThen(Instruction *SplitBefore, Value *Cmp);
+  static char ID;  // Pass identification, replacement for typeid
+
+ private:
+
+  uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
+    Type *Ty = AI->getAllocatedType();
+    uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
+    return SizeInBytes;
+  }
+  uint64_t getAlignedSize(uint64_t SizeInBytes) {
+    return ((SizeInBytes + RedzoneSize - 1)
+            / RedzoneSize) * RedzoneSize;
+  }
+  uint64_t getAlignedAllocaSize(AllocaInst *AI) {
+    uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
+    return getAlignedSize(SizeInBytes);
+  }
+
+  void PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec, IRBuilder<> IRB,
+                   Value *ShadowBase, bool DoPoison);
+  bool LooksLikeCodeInBug11395(Instruction *I);
+
+  Module      *CurrentModule;
+  LLVMContext *C;
+  TargetData *TD;
+  uint64_t MappingOffset;
+  int MappingScale;
+  size_t RedzoneSize;
+  int LongSize;
+  Type *IntptrTy;
+  Type *IntptrPtrTy;
+  Function *AsanCtorFunction;
+  Function *AsanInitFunction;
+  Instruction *CtorInsertBefore;
+  OwningPtr<FunctionBlackList> BL;
+};
+}  // namespace
+
+char AddressSanitizer::ID = 0;
+INITIALIZE_PASS(AddressSanitizer, "asan",
+    "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
+    false, false)
+AddressSanitizer::AddressSanitizer() : ModulePass(ID) { }
+ModulePass *llvm::createAddressSanitizerPass() {
+  return new AddressSanitizer();
+}
+
+const char *AddressSanitizer::getPassName() const {
+  return "AddressSanitizer";
+}
+
+// Create a constant for Str so that we can pass it to the run-time lib.
+static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
+  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+  return new GlobalVariable(M, StrConst->getType(), true,
+                            GlobalValue::PrivateLinkage, StrConst, "");
+}
+
+// Split the basic block and insert an if-then code.
+// Before:
+//   Head
+//   SplitBefore
+//   Tail
+// After:
+//   Head
+//   if (Cmp)
+//     NewBasicBlock
+//   SplitBefore
+//   Tail
+//
+// Returns the NewBasicBlock's terminator.
+BranchInst *AddressSanitizer::splitBlockAndInsertIfThen(
+    Instruction *SplitBefore, Value *Cmp) {
+  BasicBlock *Head = SplitBefore->getParent();
+  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+  TerminatorInst *HeadOldTerm = Head->getTerminator();
+  BasicBlock *NewBasicBlock =
+      BasicBlock::Create(*C, "", Head->getParent());
+  BranchInst *HeadNewTerm = BranchInst::Create(/*ifTrue*/NewBasicBlock,
+                                               /*ifFalse*/Tail,
+                                               Cmp);
+  ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+
+  BranchInst *CheckTerm = BranchInst::Create(Tail, NewBasicBlock);
+  return CheckTerm;
+}
+
+Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
+  // Shadow >> scale
+  Shadow = IRB.CreateLShr(Shadow, MappingScale);
+  if (MappingOffset == 0)
+    return Shadow;
+  // (Shadow >> scale) | offset
+  return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy,
+                                               MappingOffset));
+}
+
+void AddressSanitizer::instrumentMemIntrinsicParam(Instruction *OrigIns,
+    Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
+  // Check the first byte.
+  {
+    IRBuilder<> IRB(InsertBefore);
+    instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);
+  }
+  // Check the last byte.
+  {
+    IRBuilder<> IRB(InsertBefore);
+    Value *SizeMinusOne = IRB.CreateSub(
+        Size, ConstantInt::get(Size->getType(), 1));
+    SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);
+    Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+    Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);
+    instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
+  }
+}
+
+// Instrument memset/memmove/memcpy
+bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+  Value *Dst = MI->getDest();
+  MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
+  Value *Src = MemTran ? MemTran->getSource() : NULL;
+  Value *Length = MI->getLength();
+
+  Constant *ConstLength = dyn_cast<Constant>(Length);
+  Instruction *InsertBefore = MI;
+  if (ConstLength) {
+    if (ConstLength->isNullValue()) return false;
+  } else {
+    // The size is not a constant so it could be zero -- check at run-time.
+    IRBuilder<> IRB(InsertBefore);
+
+    Value *Cmp = IRB.CreateICmpNE(Length,
+                                   Constant::getNullValue(Length->getType()));
+    InsertBefore = splitBlockAndInsertIfThen(InsertBefore, Cmp);
+  }
+
+  instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
+  if (Src)
+    instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
+  return true;
+}
+
+static Value *getLDSTOperand(Instruction *I) {
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    return LI->getPointerOperand();
+  }
+  return cast<StoreInst>(*I).getPointerOperand();
+}
+
+void AddressSanitizer::instrumentMop(Instruction *I) {
+  int IsWrite = isa<StoreInst>(*I);
+  Value *Addr = getLDSTOperand(I);
+  if (ClOpt && ClOptGlobals && isa<GlobalVariable>(Addr)) {
+    // We are accessing a global scalar variable. Nothing to catch here.
+    return;
+  }
+  Type *OrigPtrTy = Addr->getType();
+  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
+
+  assert(OrigTy->isSized());
+  uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
+
+  if (TypeSize != 8  && TypeSize != 16 &&
+      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+    // Ignore all unusual sizes.
+    return;
+  }
+
+  IRBuilder<> IRB(I);
+  instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);
+}
+
+Instruction *AddressSanitizer::generateCrashCode(
+    IRBuilder<> &IRB, Value *Addr, bool IsWrite, uint32_t TypeSize) {
+  // IsWrite and TypeSize are encoded in the function name.
+  std::string FunctionName = std::string(kAsanReportErrorTemplate) +
+      (IsWrite ? "store" : "load") + itostr(TypeSize / 8);
+  Value *ReportWarningFunc = CurrentModule->getOrInsertFunction(
+      FunctionName, IRB.getVoidTy(), IntptrTy, NULL);
+  CallInst *Call = IRB.CreateCall(ReportWarningFunc, Addr);
+  Call->setDoesNotReturn();
+  return Call;
+}
+
+void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
+                                         IRBuilder<> &IRB, Value *Addr,
+                                         uint32_t TypeSize, bool IsWrite) {
+  Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+
+  Type *ShadowTy  = IntegerType::get(
+      *C, std::max(8U, TypeSize >> MappingScale));
+  Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
+  Value *ShadowPtr = memToShadow(AddrLong, IRB);
+  Value *CmpVal = Constant::getNullValue(ShadowTy);
+  Value *ShadowValue = IRB.CreateLoad(
+      IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+
+  Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
+
+  Instruction *CheckTerm = splitBlockAndInsertIfThen(
+      cast<Instruction>(Cmp)->getNextNode(), Cmp);
+  IRBuilder<> IRB2(CheckTerm);
+
+  size_t Granularity = 1 << MappingScale;
+  if (TypeSize < 8 * Granularity) {
+    // Addr & (Granularity - 1)
+    Value *Lower3Bits = IRB2.CreateAnd(
+        AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
+    // (Addr & (Granularity - 1)) + size - 1
+    Value *LastAccessedByte = IRB2.CreateAdd(
+        Lower3Bits, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
+    // (uint8_t) ((Addr & (Granularity-1)) + size - 1)
+    LastAccessedByte = IRB2.CreateIntCast(
+        LastAccessedByte, IRB.getInt8Ty(), false);
+    // ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
+    Value *Cmp2 = IRB2.CreateICmpSGE(LastAccessedByte, ShadowValue);
+
+    CheckTerm = splitBlockAndInsertIfThen(CheckTerm, Cmp2);
+  }
+
+  IRBuilder<> IRB1(CheckTerm);
+  Instruction *Crash = generateCrashCode(IRB1, AddrLong, IsWrite, TypeSize);
+  Crash->setDebugLoc(OrigIns->getDebugLoc());
+  ReplaceInstWithInst(CheckTerm, new UnreachableInst(*C));
+}
+
+// This function replaces all global variables with new variables that have
+// trailing redzones. It also creates a function that poisons
+// redzones and inserts this function into llvm.global_ctors.
+bool AddressSanitizer::insertGlobalRedzones(Module &M) {
+  SmallVector<GlobalVariable *, 16> GlobalsToChange;
+
+  for (Module::GlobalListType::iterator G = M.getGlobalList().begin(),
+       E = M.getGlobalList().end(); G != E; ++G) {
+    Type *Ty = cast<PointerType>(G->getType())->getElementType();
+    DEBUG(dbgs() << "GLOBAL: " << *G);
+
+    if (!Ty->isSized()) continue;
+    if (!G->hasInitializer()) continue;
+    // Touch only those globals that will not be defined in other modules.
+    // Don't handle ODR type linkages since other modules may be built w/o asan.
+    if (G->getLinkage() != GlobalVariable::ExternalLinkage &&
+        G->getLinkage() != GlobalVariable::PrivateLinkage &&
+        G->getLinkage() != GlobalVariable::InternalLinkage)
+      continue;
+    // Two problems with thread-locals:
+    //   - The address of the main thread's copy can't be computed at link-time.
+    //   - Need to poison all copies, not just the main thread's one.
+    if (G->isThreadLocal())
+      continue;
+    // For now, just ignore this Alloca if the alignment is large.
+    if (G->getAlignment() > RedzoneSize) continue;
+
+    // Ignore all the globals with the names starting with "\01L_OBJC_".
+    // Many of those are put into the .cstring section. The linker compresses
+    // that section by removing the spare \0s after the string terminator, so
+    // our redzones get broken.
+    if ((G->getName().find("\01L_OBJC_") == 0) ||
+        (G->getName().find("\01l_OBJC_") == 0)) {
+      DEBUG(dbgs() << "Ignoring \\01L_OBJC_* global: " << *G);
+      continue;
+    }
+
+    if (G->hasSection()) {
+      StringRef Section(G->getSection());
+      // Ignore the globals from the __OBJC section. The ObjC runtime assumes
+      // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
+      // them.
+      if ((Section.find("__OBJC,") == 0) ||
+          (Section.find("__DATA, __objc_") == 0)) {
+        DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G);
+        continue;
+      }
+      // See http://code.google.com/p/address-sanitizer/issues/detail?id=32
+      // Constant CFString instances are compiled in the following way:
+      //  -- the string buffer is emitted into
+      //     __TEXT,__cstring,cstring_literals
+      //  -- the constant NSConstantString structure referencing that buffer
+      //     is placed into __DATA,__cfstring
+      // Therefore there's no point in placing redzones into __DATA,__cfstring.
+      // Moreover, it causes the linker to crash on OS X 10.7
+      if (Section.find("__DATA,__cfstring") == 0) {
+        DEBUG(dbgs() << "Ignoring CFString: " << *G);
+        continue;
+      }
+    }
+
+    GlobalsToChange.push_back(G);
+  }
+
+  size_t n = GlobalsToChange.size();
+  if (n == 0) return false;
+
+  // A global is described by a structure
+  //   size_t beg;
+  //   size_t size;
+  //   size_t size_with_redzone;
+  //   const char *name;
+  // We initialize an array of such structures and pass it to a run-time call.
+  StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy,
+                                               IntptrTy, IntptrTy, NULL);
+  SmallVector<Constant *, 16> Initializers(n);
+
+  IRBuilder<> IRB(CtorInsertBefore);
+
+  for (size_t i = 0; i < n; i++) {
+    GlobalVariable *G = GlobalsToChange[i];
+    PointerType *PtrTy = cast<PointerType>(G->getType());
+    Type *Ty = PtrTy->getElementType();
+    uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
+    uint64_t RightRedzoneSize = RedzoneSize +
+        (RedzoneSize - (SizeInBytes % RedzoneSize));
+    Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
+
+    StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL);
+    Constant *NewInitializer = ConstantStruct::get(
+        NewTy, G->getInitializer(),
+        Constant::getNullValue(RightRedZoneTy), NULL);
+
+    SmallString<2048> DescriptionOfGlobal = G->getName();
+    DescriptionOfGlobal += " (";
+    DescriptionOfGlobal += M.getModuleIdentifier();
+    DescriptionOfGlobal += ")";
+    GlobalVariable *Name = createPrivateGlobalForString(M, DescriptionOfGlobal);
+
+    // Create a new global variable with enough space for a redzone.
+    GlobalVariable *NewGlobal = new GlobalVariable(
+        M, NewTy, G->isConstant(), G->getLinkage(),
+        NewInitializer, "", G, G->isThreadLocal());
+    NewGlobal->copyAttributesFrom(G);
+    NewGlobal->setAlignment(RedzoneSize);
+
+    Value *Indices2[2];
+    Indices2[0] = IRB.getInt32(0);
+    Indices2[1] = IRB.getInt32(0);
+
+    G->replaceAllUsesWith(
+        ConstantExpr::getGetElementPtr(NewGlobal, Indices2, true));
+    NewGlobal->takeName(G);
+    G->eraseFromParent();
+
+    Initializers[i] = ConstantStruct::get(
+        GlobalStructTy,
+        ConstantExpr::getPointerCast(NewGlobal, IntptrTy),
+        ConstantInt::get(IntptrTy, SizeInBytes),
+        ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize),
+        ConstantExpr::getPointerCast(Name, IntptrTy),
+        NULL);
+    DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
+  }
+
+  ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n);
+  GlobalVariable *AllGlobals = new GlobalVariable(
+      M, ArrayOfGlobalStructTy, false, GlobalVariable::PrivateLinkage,
+      ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
+
+  Function *AsanRegisterGlobals = cast<Function>(M.getOrInsertFunction(
+      kAsanRegisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+  AsanRegisterGlobals->setLinkage(Function::ExternalLinkage);
+
+  IRB.CreateCall2(AsanRegisterGlobals,
+                  IRB.CreatePointerCast(AllGlobals, IntptrTy),
+                  ConstantInt::get(IntptrTy, n));
+
+  // We also need to unregister globals at the end, e.g. when a shared library
+  // gets closed.
+  Function *AsanDtorFunction = Function::Create(
+      FunctionType::get(Type::getVoidTy(*C), false),
+      GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);
+  BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
+  IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
+  Function *AsanUnregisterGlobals = cast<Function>(M.getOrInsertFunction(
+      kAsanUnregisterGlobalsName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+  AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+
+  IRB_Dtor.CreateCall2(AsanUnregisterGlobals,
+                       IRB.CreatePointerCast(AllGlobals, IntptrTy),
+                       ConstantInt::get(IntptrTy, n));
+  appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority);
+
+  DEBUG(dbgs() << M);
+  return true;
+}
+
+// virtual
+bool AddressSanitizer::runOnModule(Module &M) {
+  // Initialize the private fields. No one has accessed them before.
+  TD = getAnalysisIfAvailable<TargetData>();
+  if (!TD)
+    return false;
+  BL.reset(new FunctionBlackList(ClBlackListFile));
+
+  CurrentModule = &M;
+  C = &(M.getContext());
+  LongSize = TD->getPointerSizeInBits();
+  IntptrTy = Type::getIntNTy(*C, LongSize);
+  IntptrPtrTy = PointerType::get(IntptrTy, 0);
+
+  AsanCtorFunction = Function::Create(
+      FunctionType::get(Type::getVoidTy(*C), false),
+      GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
+  BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
+  CtorInsertBefore = ReturnInst::Create(*C, AsanCtorBB);
+
+  // call __asan_init in the module ctor.
+  IRBuilder<> IRB(CtorInsertBefore);
+  AsanInitFunction = cast<Function>(
+      M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL));
+  AsanInitFunction->setLinkage(Function::ExternalLinkage);
+  IRB.CreateCall(AsanInitFunction);
+
+  MappingOffset = LongSize == 32
+      ? kDefaultShadowOffset32 : kDefaultShadowOffset64;
+  if (ClMappingOffsetLog >= 0) {
+    if (ClMappingOffsetLog == 0) {
+      // special case
+      MappingOffset = 0;
+    } else {
+      MappingOffset = 1ULL << ClMappingOffsetLog;
+    }
+  }
+  MappingScale = kDefaultShadowScale;
+  if (ClMappingScale) {
+    MappingScale = ClMappingScale;
+  }
+  // Redzone used for stack and globals is at least 32 bytes.
+  // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+  RedzoneSize = std::max(32, (int)(1 << MappingScale));
+
+  bool Res = false;
+
+  if (ClGlobals)
+    Res |= insertGlobalRedzones(M);
+
+  if (ClMappingOffsetLog >= 0) {
+    // Tell the run-time the current values of mapping offset and scale.
+    GlobalValue *asan_mapping_offset =
+        new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+                       ConstantInt::get(IntptrTy, MappingOffset),
+                       kAsanMappingOffsetName);
+    // Read the global, otherwise it may be optimized away.
+    IRB.CreateLoad(asan_mapping_offset, true);
+  }
+  if (ClMappingScale) {
+    GlobalValue *asan_mapping_scale =
+        new GlobalVariable(M, IntptrTy, true, GlobalValue::LinkOnceODRLinkage,
+                           ConstantInt::get(IntptrTy, MappingScale),
+                           kAsanMappingScaleName);
+    // Read the global, otherwise it may be optimized away.
+    IRB.CreateLoad(asan_mapping_scale, true);
+  }
+
+
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    Res |= handleFunction(M, *F);
+  }
+
+  appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
+
+  return Res;
+}
+
+bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
+  // For each NSObject descendant having a +load method, this method is invoked
+  // by the ObjC runtime before any of the static constructors is called.
+  // Therefore we need to instrument such methods with a call to __asan_init
+  // at the beginning in order to initialize our runtime before any access to
+  // the shadow memory.
+  // We cannot just ignore these methods, because they may call other
+  // instrumented functions.
+  if (F.getName().find(" load]") != std::string::npos) {
+    IRBuilder<> IRB(F.begin()->begin());
+    IRB.CreateCall(AsanInitFunction);
+    return true;
+  }
+  return false;
+}
+
+bool AddressSanitizer::handleFunction(Module &M, Function &F) {
+  if (BL->isIn(F)) return false;
+  if (&F == AsanCtorFunction) return false;
+
+  // If needed, insert __asan_init before checking for AddressSafety attr.
+  maybeInsertAsanInitAtFunctionEntry(F);
+
+  if (!F.hasFnAttr(Attribute::AddressSafety)) return false;
+
+  if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
+    return false;
+  // We want to instrument every address only once per basic block
+  // (unless there are calls between uses).
+  SmallSet<Value*, 16> TempsToInstrument;
+  SmallVector<Instruction*, 16> ToInstrument;
+  SmallVector<Instruction*, 8> NoReturnCalls;
+
+  // Fill the set of memory operations to instrument.
+  for (Function::iterator FI = F.begin(), FE = F.end();
+       FI != FE; ++FI) {
+    TempsToInstrument.clear();
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+         BI != BE; ++BI) {
+      if (LooksLikeCodeInBug11395(BI)) return false;
+      if ((isa<LoadInst>(BI) && ClInstrumentReads) ||
+          (isa<StoreInst>(BI) && ClInstrumentWrites)) {
+        Value *Addr = getLDSTOperand(BI);
+        if (ClOpt && ClOptSameTemp) {
+          if (!TempsToInstrument.insert(Addr))
+            continue;  // We've seen this temp in the current BB.
+        }
+      } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) {
+        // ok, take it.
+      } else {
+        if (CallInst *CI = dyn_cast<CallInst>(BI)) {
+          // A call inside BB.
+          TempsToInstrument.clear();
+          if (CI->doesNotReturn()) {
+            NoReturnCalls.push_back(CI);
+          }
+        }
+        continue;
+      }
+      ToInstrument.push_back(BI);
+    }
+  }
+
+  // Instrument.
+  int NumInstrumented = 0;
+  for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
+    Instruction *Inst = ToInstrument[i];
+    if (ClDebugMin < 0 || ClDebugMax < 0 ||
+        (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
+      if (isa<StoreInst>(Inst) || isa<LoadInst>(Inst))
+        instrumentMop(Inst);
+      else
+        instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
+    }
+    NumInstrumented++;
+  }
+
+  DEBUG(dbgs() << F);
+
+  bool ChangedStack = poisonStackInFunction(M, F);
+
+  // We must unpoison the stack before every NoReturn call (throw, _exit, etc).
+  // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
+  for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) {
+    Instruction *CI = NoReturnCalls[i];
+    IRBuilder<> IRB(CI);
+    IRB.CreateCall(M.getOrInsertFunction(kAsanHandleNoReturnName,
+                                         IRB.getVoidTy(), NULL));
+  }
+
+  return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
+}
+
+static uint64_t ValueForPoison(uint64_t PoisonByte, size_t ShadowRedzoneSize) {
+  if (ShadowRedzoneSize == 1) return PoisonByte;
+  if (ShadowRedzoneSize == 2) return (PoisonByte << 8) + PoisonByte;
+  if (ShadowRedzoneSize == 4)
+    return (PoisonByte << 24) + (PoisonByte << 16) +
+        (PoisonByte << 8) + (PoisonByte);
+  llvm_unreachable("ShadowRedzoneSize is either 1, 2 or 4");
+}
+
+static void PoisonShadowPartialRightRedzone(uint8_t *Shadow,
+                                            size_t Size,
+                                            size_t RedzoneSize,
+                                            size_t ShadowGranularity,
+                                            uint8_t Magic) {
+  for (size_t i = 0; i < RedzoneSize;
+       i+= ShadowGranularity, Shadow++) {
+    if (i + ShadowGranularity <= Size) {
+      *Shadow = 0;  // fully addressable
+    } else if (i >= Size) {
+      *Shadow = Magic;  // unaddressable
+    } else {
+      *Shadow = Size - i;  // first Size-i bytes are addressable
+    }
+  }
+}
+
+void AddressSanitizer::PoisonStack(const ArrayRef<AllocaInst*> &AllocaVec,
+                                   IRBuilder<> IRB,
+                                   Value *ShadowBase, bool DoPoison) {
+  size_t ShadowRZSize = RedzoneSize >> MappingScale;
+  assert(ShadowRZSize >= 1 && ShadowRZSize <= 4);
+  Type *RZTy = Type::getIntNTy(*C, ShadowRZSize * 8);
+  Type *RZPtrTy = PointerType::get(RZTy, 0);
+
+  Value *PoisonLeft  = ConstantInt::get(RZTy,
+    ValueForPoison(DoPoison ? kAsanStackLeftRedzoneMagic : 0LL, ShadowRZSize));
+  Value *PoisonMid   = ConstantInt::get(RZTy,
+    ValueForPoison(DoPoison ? kAsanStackMidRedzoneMagic : 0LL, ShadowRZSize));
+  Value *PoisonRight = ConstantInt::get(RZTy,
+    ValueForPoison(DoPoison ? kAsanStackRightRedzoneMagic : 0LL, ShadowRZSize));
+
+  // poison the first red zone.
+  IRB.CreateStore(PoisonLeft, IRB.CreateIntToPtr(ShadowBase, RZPtrTy));
+
+  // poison all other red zones.
+  uint64_t Pos = RedzoneSize;
+  for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
+    AllocaInst *AI = AllocaVec[i];
+    uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
+    uint64_t AlignedSize = getAlignedAllocaSize(AI);
+    assert(AlignedSize - SizeInBytes < RedzoneSize);
+    Value *Ptr = NULL;
+
+    Pos += AlignedSize;
+
+    assert(ShadowBase->getType() == IntptrTy);
+    if (SizeInBytes < AlignedSize) {
+      // Poison the partial redzone at right
+      Ptr = IRB.CreateAdd(
+          ShadowBase, ConstantInt::get(IntptrTy,
+                                       (Pos >> MappingScale) - ShadowRZSize));
+      size_t AddressableBytes = RedzoneSize - (AlignedSize - SizeInBytes);
+      uint32_t Poison = 0;
+      if (DoPoison) {
+        PoisonShadowPartialRightRedzone((uint8_t*)&Poison, AddressableBytes,
+                                        RedzoneSize,
+                                        1ULL << MappingScale,
+                                        kAsanStackPartialRedzoneMagic);
+      }
+      Value *PartialPoison = ConstantInt::get(RZTy, Poison);
+      IRB.CreateStore(PartialPoison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
+    }
+
+    // Poison the full redzone at right.
+    Ptr = IRB.CreateAdd(ShadowBase,
+                        ConstantInt::get(IntptrTy, Pos >> MappingScale));
+    Value *Poison = i == AllocaVec.size() - 1 ? PoisonRight : PoisonMid;
+    IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, RZPtrTy));
+
+    Pos += RedzoneSize;
+  }
+}
+
+// Workaround for bug 11395: we don't want to instrument stack in functions
+// with large assembly blobs (32-bit only), otherwise reg alloc may crash.
+// FIXME: remove once the bug 11395 is fixed.
+bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
+  if (LongSize != 32) return false;
+  CallInst *CI = dyn_cast<CallInst>(I);
+  if (!CI || !CI->isInlineAsm()) return false;
+  if (CI->getNumArgOperands() <= 5) return false;
+  // We have inline assembly with quite a few arguments.
+  return true;
+}
+
+// Find all static Alloca instructions and put
+// poisoned red zones around all of them.
+// Then unpoison everything back before the function returns.
+//
+// Stack poisoning does not play well with exception handling.
+// When an exception is thrown, we essentially bypass the code
+// that unpoisones the stack. This is why the run-time library has
+// to intercept __cxa_throw (as well as longjmp, etc) and unpoison the entire
+// stack in the interceptor. This however does not work inside the
+// actual function which catches the exception. Most likely because the
+// compiler hoists the load of the shadow value somewhere too high.
+// This causes asan to report a non-existing bug on 453.povray.
+// It sounds like an LLVM bug.
+bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
+  if (!ClStack) return false;
+  SmallVector<AllocaInst*, 16> AllocaVec;
+  SmallVector<Instruction*, 8> RetVec;
+  uint64_t TotalSize = 0;
+
+  // Filter out Alloca instructions we want (and can) handle.
+  // Collect Ret instructions.
+  for (Function::iterator FI = F.begin(), FE = F.end();
+       FI != FE; ++FI) {
+    BasicBlock &BB = *FI;
+    for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
+         BI != BE; ++BI) {
+      if (isa<ReturnInst>(BI)) {
+          RetVec.push_back(BI);
+          continue;
+      }
+
+      AllocaInst *AI = dyn_cast<AllocaInst>(BI);
+      if (!AI) continue;
+      if (AI->isArrayAllocation()) continue;
+      if (!AI->isStaticAlloca()) continue;
+      if (!AI->getAllocatedType()->isSized()) continue;
+      if (AI->getAlignment() > RedzoneSize) continue;
+      AllocaVec.push_back(AI);
+      uint64_t AlignedSize =  getAlignedAllocaSize(AI);
+      TotalSize += AlignedSize;
+    }
+  }
+
+  if (AllocaVec.empty()) return false;
+
+  uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize;
+
+  bool DoStackMalloc = ClUseAfterReturn
+      && LocalStackSize <= kMaxStackMallocSize;
+
+  Instruction *InsBefore = AllocaVec[0];
+  IRBuilder<> IRB(InsBefore);
+
+
+  Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
+  AllocaInst *MyAlloca =
+      new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore);
+  MyAlloca->setAlignment(RedzoneSize);
+  assert(MyAlloca->isStaticAlloca());
+  Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy);
+  Value *LocalStackBase = OrigStackBase;
+
+  if (DoStackMalloc) {
+    Value *AsanStackMallocFunc = M.getOrInsertFunction(
+        kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL);
+    LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
+        ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
+  }
+
+  // This string will be parsed by the run-time (DescribeStackAddress).
+  SmallString<2048> StackDescriptionStorage;
+  raw_svector_ostream StackDescription(StackDescriptionStorage);
+  StackDescription << F.getName() << " " << AllocaVec.size() << " ";
+
+  uint64_t Pos = RedzoneSize;
+  // Replace Alloca instructions with base+offset.
+  for (size_t i = 0, n = AllocaVec.size(); i < n; i++) {
+    AllocaInst *AI = AllocaVec[i];
+    uint64_t SizeInBytes = getAllocaSizeInBytes(AI);
+    StringRef Name = AI->getName();
+    StackDescription << Pos << " " << SizeInBytes << " "
+                     << Name.size() << " " << Name << " ";
+    uint64_t AlignedSize = getAlignedAllocaSize(AI);
+    assert((AlignedSize % RedzoneSize) == 0);
+    AI->replaceAllUsesWith(
+        IRB.CreateIntToPtr(
+            IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)),
+            AI->getType()));
+    Pos += AlignedSize + RedzoneSize;
+  }
+  assert(Pos == LocalStackSize);
+
+  // Write the Magic value and the frame description constant to the redzone.
+  Value *BasePlus0 = IRB.CreateIntToPtr(LocalStackBase, IntptrPtrTy);
+  IRB.CreateStore(ConstantInt::get(IntptrTy, kCurrentStackFrameMagic),
+                  BasePlus0);
+  Value *BasePlus1 = IRB.CreateAdd(LocalStackBase,
+                                   ConstantInt::get(IntptrTy, LongSize/8));
+  BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
+  Value *Description = IRB.CreatePointerCast(
+      createPrivateGlobalForString(M, StackDescription.str()),
+      IntptrTy);
+  IRB.CreateStore(Description, BasePlus1);
+
+  // Poison the stack redzones at the entry.
+  Value *ShadowBase = memToShadow(LocalStackBase, IRB);
+  PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true);
+
+  Value *AsanStackFreeFunc = NULL;
+  if (DoStackMalloc) {
+    AsanStackFreeFunc = M.getOrInsertFunction(
+        kAsanStackFreeName, IRB.getVoidTy(),
+        IntptrTy, IntptrTy, IntptrTy, NULL);
+  }
+
+  // Unpoison the stack before all ret instructions.
+  for (size_t i = 0, n = RetVec.size(); i < n; i++) {
+    Instruction *Ret = RetVec[i];
+    IRBuilder<> IRBRet(Ret);
+
+    // Mark the current frame as retired.
+    IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic),
+                       BasePlus0);
+    // Unpoison the stack.
+    PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRBRet, ShadowBase, false);
+
+    if (DoStackMalloc) {
+      IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase,
+                         ConstantInt::get(IntptrTy, LocalStackSize),
+                         OrigStackBase);
+    }
+  }
+
+  if (ClDebugStack) {
+    DEBUG(dbgs() << F);
+  }
+
+  return true;
+}
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 7b3a927a4e68..e4c8cf105cee 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -1,15 +1,11 @@
 add_llvm_library(LLVMInstrumentation
+  AddressSanitizer.cpp
   EdgeProfiling.cpp
+  FunctionBlackList.cpp
   GCOVProfiling.cpp
   Instrumentation.cpp
   OptimalEdgeProfiling.cpp
   PathProfiling.cpp
   ProfilingUtils.cpp
-  )
-
-add_llvm_library_dependencies(LLVMInstrumentation
-  LLVMAnalysis
-  LLVMCore
-  LLVMSupport
-  LLVMTransformUtils
+  ThreadSanitizer.cpp
   )
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.cpp b/lib/Transforms/Instrumentation/FunctionBlackList.cpp
new file mode 100644
index 000000000000..188ea4d9b3cb
--- /dev/null
+++ b/lib/Transforms/Instrumentation/FunctionBlackList.cpp
@@ -0,0 +1,79 @@
+//===-- FunctionBlackList.cpp - blacklist of functions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer 
+// or ThreadSanitizer) to avoid instrumenting some functions based on
+// user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FunctionBlackList.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Function.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+
+FunctionBlackList::FunctionBlackList(const std::string &Path) {
+  Functions = NULL;
+  const char *kFunPrefix = "fun:";
+  if (!Path.size()) return;
+  std::string Fun;
+
+  OwningPtr<MemoryBuffer> File;
+  if (error_code EC = MemoryBuffer::getFile(Path.c_str(), File)) {
+    report_fatal_error("Can't open blacklist file " + Path + ": " +
+                       EC.message());
+  }
+  MemoryBuffer *Buff = File.take();
+  const char *Data = Buff->getBufferStart();
+  size_t DataLen = Buff->getBufferSize();
+  SmallVector<StringRef, 16> Lines;
+  SplitString(StringRef(Data, DataLen), Lines, "\n\r");
+  for (size_t i = 0, numLines = Lines.size(); i < numLines; i++) {
+    if (Lines[i].startswith(kFunPrefix)) {
+      std::string ThisFunc = Lines[i].substr(strlen(kFunPrefix));
+      std::string ThisFuncRE;
+      // add ThisFunc replacing * with .*
+      for (size_t j = 0, n = ThisFunc.size(); j < n; j++) {
+        if (ThisFunc[j] == '*')
+          ThisFuncRE += '.';
+        ThisFuncRE += ThisFunc[j];
+      }
+      // Check that the regexp is valid.
+      Regex CheckRE(ThisFuncRE);
+      std::string Error;
+      if (!CheckRE.isValid(Error))
+        report_fatal_error("malformed blacklist regex: " + ThisFunc +
+                           ": " + Error);
+      // Append to the final regexp.
+      if (Fun.size())
+        Fun += "|";
+      Fun += ThisFuncRE;
+    }
+  }
+  if (Fun.size()) {
+    Functions = new Regex(Fun);
+  }
+}
+
+bool FunctionBlackList::isIn(const Function &F) {
+  if (Functions) {
+    bool Res = Functions->match(F.getName());
+    return Res;
+  }
+  return false;
+}
+
+}  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/FunctionBlackList.h b/lib/Transforms/Instrumentation/FunctionBlackList.h
new file mode 100644
index 000000000000..c1239b9b7e0d
--- /dev/null
+++ b/lib/Transforms/Instrumentation/FunctionBlackList.h
@@ -0,0 +1,37 @@
+//===-- FunctionBlackList.cpp - blacklist of functions ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// This is a utility class for instrumentation passes (like AddressSanitizer
+// or ThreadSanitizer) to avoid instrumenting some functions based on
+// user-supplied blacklist.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include <string>
+
+namespace llvm {
+class Function;
+class Regex;
+
+// Blacklisted functions are not instrumented.
+// The blacklist file contains one or more lines like this:
+// ---
+// fun:FunctionWildCard
+// ---
+// This is similar to the "ignore" feature of ThreadSanitizer.
+// http://code.google.com/p/data-race-test/wiki/ThreadSanitizerIgnores
+class FunctionBlackList {
+ public:
+  FunctionBlackList(const std::string &Path);
+  bool isIn(const Function &F);
+ private:
+  Regex *Functions;
+};
+
+}  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index ccf7e1109cd9..96e5d5b31140 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -43,12 +43,14 @@ namespace {
   public:
     static char ID;
     GCOVProfiler()
-        : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false) {
+        : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
+          UseExtraChecksum(false) {
       initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
     }
-    GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false)
+    GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false,
+                 bool useExtraChecksum = false)
         : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
-          Use402Format(use402Format) {
+          Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) {
       assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
       initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
     }
@@ -94,6 +96,7 @@ namespace {
     bool EmitNotes;
     bool EmitData;
     bool Use402Format;
+    bool UseExtraChecksum;
 
     Module *M;
     LLVMContext *Ctx;
@@ -105,8 +108,9 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
                 "Insert instrumentation for GCOV profiling", false, false)
 
 ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
-                                         bool Use402Format) {
-  return new GCOVProfiler(EmitNotes, EmitData, Use402Format);
+                                         bool Use402Format,
+                                         bool UseExtraChecksum) {
+  return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum);
 }
 
 namespace {
@@ -167,7 +171,7 @@ namespace {
     }
 
     uint32_t length() {
-      // Here 2 = 1 for string lenght + 1 for '0' id#.
+      // Here 2 = 1 for string length + 1 for '0' id#.
       return lengthOfGCOVString(Filename) + 2 + Lines.size();
     }
 
@@ -244,10 +248,12 @@ namespace {
   // object users can construct, the blocks and lines will be rooted here.
   class GCOVFunction : public GCOVRecord {
    public:
-    GCOVFunction(DISubprogram SP, raw_ostream *os, bool Use402Format) {
+    GCOVFunction(DISubprogram SP, raw_ostream *os,
+                 bool Use402Format, bool UseExtraChecksum) {
       this->os = os;
 
       Function *F = SP.getFunction();
+      DEBUG(dbgs() << "Function: " << F->getName() << "\n");
       uint32_t i = 0;
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
         Blocks[BB] = new GCOVBlock(i++, os);
@@ -257,14 +263,14 @@ namespace {
       writeBytes(FunctionTag, 4);
       uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) +
           1 + lengthOfGCOVString(SP.getFilename()) + 1;
-      if (!Use402Format)
-        ++BlockLen; // For second checksum.
+      if (UseExtraChecksum)
+        ++BlockLen;
       write(BlockLen);
       uint32_t Ident = reinterpret_cast<intptr_t>((MDNode*)SP);
       write(Ident);
-      write(0);  // checksum #1
-      if (!Use402Format)
-        write(0);  // checksum #2
+      write(0);  // lineno checksum
+      if (UseExtraChecksum)
+        write(0);  // cfg checksum
       writeGCOVString(SP.getName());
       writeGCOVString(SP.getFilename());
       write(SP.getLineNumber());
@@ -290,6 +296,7 @@ namespace {
       for (int i = 0, e = Blocks.size() + 1; i != e; ++i) {
         write(0);  // No flags on our blocks.
       }
+      DEBUG(dbgs() << Blocks.size() << " blocks.\n");
 
       // Emit edges between blocks.
       for (DenseMap<BasicBlock *, GCOVBlock *>::iterator I = Blocks.begin(),
@@ -301,6 +308,8 @@ namespace {
         write(Block.OutEdges.size() * 2 + 1);
         write(Block.Number);
         for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) {
+          DEBUG(dbgs() << Block.Number << " -> " << Block.OutEdges[i]->Number
+                       << "\n");
           write(Block.OutEdges[i]->Number);
           write(0);  // no flags
         }
@@ -350,68 +359,60 @@ bool GCOVProfiler::runOnModule(Module &M) {
 }
 
 void GCOVProfiler::emitGCNO() {
-  DenseMap<const MDNode *, raw_fd_ostream *> GcnoFiles;
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
-  if (CU_Nodes) {
-    for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
-      // Each compile unit gets its own .gcno file. This means that whether we run
-      // this pass over the original .o's as they're produced, or run it after
-      // LTO, we'll generate the same .gcno files.
-      
-      DICompileUnit CU(CU_Nodes->getOperand(i));
-      raw_fd_ostream *&out = GcnoFiles[CU];
-      std::string ErrorInfo;
-      out = new raw_fd_ostream(mangleName(CU, "gcno").c_str(), ErrorInfo,
-                               raw_fd_ostream::F_Binary);
-      if (!Use402Format)
-        out->write("oncg*404MVLL", 12);
-      else
-        out->write("oncg*204MVLL", 12);
-  
-      DIArray SPs = CU.getSubprograms();
-      for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
-        DISubprogram SP(SPs.getElement(i));
-        if (!SP.Verify()) continue;
-        raw_fd_ostream *&os = GcnoFiles[CU];
-        
-        Function *F = SP.getFunction();
-        if (!F) continue;
-        GCOVFunction Func(SP, os, Use402Format);
-        
-        for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-          GCOVBlock &Block = Func.getBlock(BB);
-          TerminatorInst *TI = BB->getTerminator();
-          if (int successors = TI->getNumSuccessors()) {
-            for (int i = 0; i != successors; ++i) {
-              Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
-            }
-          } else if (isa<ReturnInst>(TI)) {
-            Block.addEdge(Func.getReturnBlock());
-          }
-          
-          uint32_t Line = 0;
-          for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) {
-            const DebugLoc &Loc = I->getDebugLoc();
-            if (Loc.isUnknown()) continue;
-            if (Line == Loc.getLine()) continue;
-            Line = Loc.getLine();
-            if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
-            
-            GCOVLines &Lines = Block.getFile(SP.getFilename());
-            Lines.addLine(Loc.getLine());
+  if (!CU_Nodes) return;
+
+  for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) {
+    // Each compile unit gets its own .gcno file. This means that whether we run
+    // this pass over the original .o's as they're produced, or run it after
+    // LTO, we'll generate the same .gcno files.
+
+    DICompileUnit CU(CU_Nodes->getOperand(i));
+    std::string ErrorInfo;
+    raw_fd_ostream out(mangleName(CU, "gcno").c_str(), ErrorInfo,
+                       raw_fd_ostream::F_Binary);
+    if (!Use402Format)
+      out.write("oncg*404MVLL", 12);
+    else
+      out.write("oncg*204MVLL", 12);
+
+    DIArray SPs = CU.getSubprograms();
+    for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) {
+      DISubprogram SP(SPs.getElement(i));
+      if (!SP.Verify()) continue;
+
+      Function *F = SP.getFunction();
+      if (!F) continue;
+      GCOVFunction Func(SP, &out, Use402Format, UseExtraChecksum);
+
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        GCOVBlock &Block = Func.getBlock(BB);
+        TerminatorInst *TI = BB->getTerminator();
+        if (int successors = TI->getNumSuccessors()) {
+          for (int i = 0; i != successors; ++i) {
+            Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
           }
+        } else if (isa<ReturnInst>(TI)) {
+          Block.addEdge(Func.getReturnBlock());
+        }
+
+        uint32_t Line = 0;
+        for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
+             I != IE; ++I) {
+          const DebugLoc &Loc = I->getDebugLoc();
+          if (Loc.isUnknown()) continue;
+          if (Line == Loc.getLine()) continue;
+          Line = Loc.getLine();
+          if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue;
+
+          GCOVLines &Lines = Block.getFile(SP.getFilename());
+          Lines.addLine(Loc.getLine());
         }
-        Func.writeOut();
       }
+      Func.writeOut();
     }
-  }
-
-  for (DenseMap<const MDNode *, raw_fd_ostream *>::iterator
-           I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) {
-    raw_fd_ostream *&out = I->second;
-    out->write("\0\0\0\0\0\0\0\0", 8);  // EOF
-    out->close();
-    delete out;
+    out.write("\0\0\0\0\0\0\0\0", 8);  // EOF
+    out.close();
   }
 }
 
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 71adc1ec6de0..c7266e2f8de6 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -24,6 +24,8 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializeOptimalEdgeProfilerPass(Registry);
   initializePathProfilerPass(Registry);
   initializeGCOVProfilerPass(Registry);
+  initializeAddressSanitizerPass(Registry);
+  initializeThreadSanitizerPass(Registry);
 }
 
 /// LLVMInitializeInstrumentation - C binding for
diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt
new file mode 100644
index 000000000000..d36ad540ee80
--- /dev/null
+++ b/lib/Transforms/Instrumentation/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Transforms/Instrumentation/LLVMBuild.txt -----------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Instrumentation
+parent = Transforms
+required_libraries = Analysis Core Support TransformUtils
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 62c21b8e9c59..1fe12545d294 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -69,7 +69,7 @@ inline static void printEdgeCounter(ProfileInfo::Edge e,
                                     BasicBlock* b,
                                     unsigned i) {
   DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
-               << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n");
+               << ((b)?(b)->getName():"0") << " (# " << (i) << ")\n");
 }
 
 bool OptimalEdgeProfiler::runOnModule(Module &M) {
@@ -127,7 +127,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
   unsigned i = 0;
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
     if (F->isDeclaration()) continue;
-    DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n");
+    DEBUG(dbgs() << "Working on " << F->getName() << "\n");
 
     // Calculate a Maximum Spanning Tree with the edge weights determined by
     // ProfileEstimator. ProfileEstimator also assign weights to the virtual
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index 23915d39f214..b2147968dfac 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -665,7 +665,7 @@ void BLInstrumentationDag::unlinkPhony() {
 // Generate a .dot graph to represent the DAG and pathNumbers
 void BLInstrumentationDag::generateDotGraph() {
   std::string errorInfo;
-  std::string functionName = getFunction().getNameStr();
+  std::string functionName = getFunction().getName().str();
   std::string filename = "pathdag." + functionName + ".dot";
 
   DEBUG (dbgs() << "Writing '" << filename << "'...\n");
@@ -750,7 +750,8 @@ Value* BLInstrumentationNode::getStartingPathNumber(){
 // Sets the Value of the pathNumber.  Used by the instrumentation code.
 void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
   DEBUG(dbgs() << "  SPN-" << getName() << " <-- " << (pathNumber ?
-                                                       pathNumber->getNameStr() : "unused") << "\n");
+                                                       pathNumber->getName() :
+                                                       "unused") << "\n");
   _startingPathNumber = pathNumber;
 }
 
@@ -760,7 +761,7 @@ Value* BLInstrumentationNode::getEndingPathNumber(){
 
 void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
   DEBUG(dbgs() << "  EPN-" << getName() << " <-- "
-        << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n");
+               << (pathNumber ? pathNumber->getName() : "unused") << "\n");
   _endingPathNumber = pathNumber;
 }
 
@@ -1239,9 +1240,9 @@ void PathProfiler::insertInstrumentation(
       insertPoint++;
 
     DEBUG(dbgs() << "\nInstrumenting method call block '"
-          << node->getBlock()->getNameStr() << "'\n");
+                 << node->getBlock()->getName() << "'\n");
     DEBUG(dbgs() << "   Path number initialized: "
-          << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
+                 << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
 
     Value* newpn;
     if( node->getStartingPathNumber() ) {
@@ -1370,7 +1371,7 @@ bool PathProfiler::runOnModule(Module &M) {
     if (F->isDeclaration())
       continue;
 
-    DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n");
+    DEBUG(dbgs() << "Function: " << F->getName() << "\n");
     functionNumber++;
 
     // set function number
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
new file mode 100644
index 000000000000..8bb337eb2b05
--- /dev/null
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -0,0 +1,311 @@
+//===-- ThreadSanitizer.cpp - race detector -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer, a race detector.
+//
+// The tool is under development, for the details about previous versions see
+// http://code.google.com/p/data-race-test
+//
+// The instrumentation phase is quite simple:
+//   - Insert calls to run-time library before every memory access.
+//      - Optimizations may apply to avoid instrumenting some of the accesses.
+//   - Insert calls at function entry/exit.
+// The rest is handled by the run-time library.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tsan"
+
+#include "FunctionBlackList.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Type.h"
+
+using namespace llvm;
+
+static cl::opt<std::string>  ClBlackListFile("tsan-blacklist",
+       cl::desc("Blacklist file"), cl::Hidden);
+
+static cl::opt<bool> ClPrintStats("tsan-print-stats",
+       cl::desc("Print ThreadSanitizer instrumentation stats"), cl::Hidden);
+
+namespace {
+
+// Stats counters for ThreadSanitizer instrumentation.
+struct ThreadSanitizerStats {
+  size_t NumInstrumentedReads;
+  size_t NumInstrumentedWrites;
+  size_t NumOmittedReadsBeforeWrite;
+  size_t NumAccessesWithBadSize;
+  size_t NumInstrumentedVtableWrites;
+  size_t NumOmittedReadsFromConstantGlobals;
+  size_t NumOmittedReadsFromVtable;
+};
+
+/// ThreadSanitizer: instrument the code in module to find races.
+struct ThreadSanitizer : public FunctionPass {
+  ThreadSanitizer();
+  bool runOnFunction(Function &F);
+  bool doInitialization(Module &M);
+  bool doFinalization(Module &M);
+  bool instrumentLoadOrStore(Instruction *I);
+  static char ID;  // Pass identification, replacement for typeid.
+
+ private:
+  void choseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
+                                     SmallVectorImpl<Instruction*> &All);
+  bool addrPointsToConstantData(Value *Addr);
+
+  TargetData *TD;
+  OwningPtr<FunctionBlackList> BL;
+  // Callbacks to run-time library are computed in doInitialization.
+  Value *TsanFuncEntry;
+  Value *TsanFuncExit;
+  // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
+  static const size_t kNumberOfAccessSizes = 5;
+  Value *TsanRead[kNumberOfAccessSizes];
+  Value *TsanWrite[kNumberOfAccessSizes];
+  Value *TsanVptrUpdate;
+
+  // Stats are modified w/o synchronization.
+  ThreadSanitizerStats stats;
+};
+}  // namespace
+
+char ThreadSanitizer::ID = 0;
+INITIALIZE_PASS(ThreadSanitizer, "tsan",
+    "ThreadSanitizer: detects data races.",
+    false, false)
+
+ThreadSanitizer::ThreadSanitizer()
+  : FunctionPass(ID),
+  TD(NULL) {
+}
+
+FunctionPass *llvm::createThreadSanitizerPass() {
+  return new ThreadSanitizer();
+}
+
+bool ThreadSanitizer::doInitialization(Module &M) {
+  TD = getAnalysisIfAvailable<TargetData>();
+  if (!TD)
+    return false;
+  BL.reset(new FunctionBlackList(ClBlackListFile));
+  memset(&stats, 0, sizeof(stats));
+
+  // Always insert a call to __tsan_init into the module's CTORs.
+  IRBuilder<> IRB(M.getContext());
+  Value *TsanInit = M.getOrInsertFunction("__tsan_init",
+                                          IRB.getVoidTy(), NULL);
+  appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
+
+  // Initialize the callbacks.
+  TsanFuncEntry = M.getOrInsertFunction("__tsan_func_entry", IRB.getVoidTy(),
+                                        IRB.getInt8PtrTy(), NULL);
+  TsanFuncExit = M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(),
+                                       NULL);
+  for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
+    SmallString<32> ReadName("__tsan_read");
+    ReadName += itostr(1 << i);
+    TsanRead[i] = M.getOrInsertFunction(ReadName, IRB.getVoidTy(),
+                                        IRB.getInt8PtrTy(), NULL);
+    SmallString<32> WriteName("__tsan_write");
+    WriteName += itostr(1 << i);
+    TsanWrite[i] = M.getOrInsertFunction(WriteName, IRB.getVoidTy(),
+                                         IRB.getInt8PtrTy(), NULL);
+  }
+  TsanVptrUpdate = M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(),
+                                         IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+                                         NULL);
+  return true;
+}
+
+bool ThreadSanitizer::doFinalization(Module &M) {
+  if (ClPrintStats) {
+    errs() << "ThreadSanitizerStats " << M.getModuleIdentifier()
+           << ": wr " << stats.NumInstrumentedWrites
+           << "; rd " << stats.NumInstrumentedReads
+           << "; vt " << stats.NumInstrumentedVtableWrites
+           << "; bs " << stats.NumAccessesWithBadSize
+           << "; rbw " << stats.NumOmittedReadsBeforeWrite
+           << "; rcg " << stats.NumOmittedReadsFromConstantGlobals
+           << "; rvt " << stats.NumOmittedReadsFromVtable
+           << "\n";
+  }
+  return true;
+}
+
+static bool isVtableAccess(Instruction *I) {
+  if (MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) {
+    if (Tag->getNumOperands() < 1) return false;
+    if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
+      if (Tag1->getString() == "vtable pointer") return true;
+    }
+  }
+  return false;
+}
+
+bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
+  // If this is a GEP, just analyze its pointer operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Addr))
+    Addr = GEP->getPointerOperand();
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+    if (GV->isConstant()) {
+      // Reads from constant globals can not race with any writes.
+      stats.NumOmittedReadsFromConstantGlobals++;
+      return true;
+    }
+  } else if(LoadInst *L = dyn_cast<LoadInst>(Addr)) {
+    if (isVtableAccess(L)) {
+      // Reads from a vtable pointer can not race with any writes.
+      stats.NumOmittedReadsFromVtable++;
+      return true;
+    }
+  }
+  return false;
+}
+
+// Instrumenting some of the accesses may be proven redundant.
+// Currently handled:
+//  - read-before-write (within same BB, no calls between)
+//
+// We do not handle some of the patterns that should not survive
+// after the classic compiler optimizations.
+// E.g. two reads from the same temp should be eliminated by CSE,
+// two writes should be eliminated by DSE, etc.
+//
+// 'Local' is a vector of insns within the same BB (no calls between).
+// 'All' is a vector of insns that will be instrumented.
+void ThreadSanitizer::choseInstructionsToInstrument(
+    SmallVectorImpl<Instruction*> &Local,
+    SmallVectorImpl<Instruction*> &All) {
+  SmallSet<Value*, 8> WriteTargets;
+  // Iterate from the end.
+  for (SmallVectorImpl<Instruction*>::reverse_iterator It = Local.rbegin(),
+       E = Local.rend(); It != E; ++It) {
+    Instruction *I = *It;
+    if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
+      WriteTargets.insert(Store->getPointerOperand());
+    } else {
+      LoadInst *Load = cast<LoadInst>(I);
+      Value *Addr = Load->getPointerOperand();
+      if (WriteTargets.count(Addr)) {
+        // We will write to this temp, so no reason to analyze the read.
+        stats.NumOmittedReadsBeforeWrite++;
+        continue;
+      }
+      if (addrPointsToConstantData(Addr)) {
+        // Addr points to some constant data -- it can not race with any writes.
+        continue;
+      }
+    }
+    All.push_back(I);
+  }
+  Local.clear();
+}
+
+bool ThreadSanitizer::runOnFunction(Function &F) {
+  if (!TD) return false;
+  if (BL->isIn(F)) return false;
+  SmallVector<Instruction*, 8> RetVec;
+  SmallVector<Instruction*, 8> AllLoadsAndStores;
+  SmallVector<Instruction*, 8> LocalLoadsAndStores;
+  bool Res = false;
+  bool HasCalls = false;
+
+  // Traverse all instructions, collect loads/stores/returns, check for calls.
+  for (Function::iterator FI = F.begin(), FE = F.end();
+       FI != FE; ++FI) {
+    BasicBlock &BB = *FI;
+    for (BasicBlock::iterator BI = BB.begin(), BE = BB.end();
+         BI != BE; ++BI) {
+      if (isa<LoadInst>(BI) || isa<StoreInst>(BI))
+        LocalLoadsAndStores.push_back(BI);
+      else if (isa<ReturnInst>(BI))
+        RetVec.push_back(BI);
+      else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) {
+        HasCalls = true;
+        choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+      }
+    }
+    choseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores);
+  }
+
+  // We have collected all loads and stores.
+  // FIXME: many of these accesses do not need to be checked for races
+  // (e.g. variables that do not escape, etc).
+
+  // Instrument memory accesses.
+  for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) {
+    Res |= instrumentLoadOrStore(AllLoadsAndStores[i]);
+  }
+
+  // Instrument function entry/exit points if there were instrumented accesses.
+  if (Res || HasCalls) {
+    IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+    Value *ReturnAddress = IRB.CreateCall(
+        Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
+        IRB.getInt32(0));
+    IRB.CreateCall(TsanFuncEntry, ReturnAddress);
+    for (size_t i = 0, n = RetVec.size(); i < n; ++i) {
+      IRBuilder<> IRBRet(RetVec[i]);
+      IRBRet.CreateCall(TsanFuncExit);
+    }
+    Res = true;
+  }
+  return Res;
+}
+
+bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I) {
+  IRBuilder<> IRB(I);
+  bool IsWrite = isa<StoreInst>(*I);
+  Value *Addr = IsWrite
+      ? cast<StoreInst>(I)->getPointerOperand()
+      : cast<LoadInst>(I)->getPointerOperand();
+  Type *OrigPtrTy = Addr->getType();
+  Type *OrigTy = cast<PointerType>(OrigPtrTy)->getElementType();
+  assert(OrigTy->isSized());
+  uint32_t TypeSize = TD->getTypeStoreSizeInBits(OrigTy);
+  if (TypeSize != 8  && TypeSize != 16 &&
+      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+    stats.NumAccessesWithBadSize++;
+    // Ignore all unusual sizes.
+    return false;
+  }
+  if (IsWrite && isVtableAccess(I)) {
+    Value *StoredValue = cast<StoreInst>(I)->getValueOperand();
+    IRB.CreateCall2(TsanVptrUpdate,
+                    IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+                    IRB.CreatePointerCast(StoredValue, IRB.getInt8PtrTy()));
+    stats.NumInstrumentedVtableWrites++;
+    return true;
+  }
+  size_t Idx = CountTrailingZeros_32(TypeSize / 8);
+  assert(Idx < kNumberOfAccessSizes);
+  Value *OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
+  IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
+  if (IsWrite) stats.NumInstrumentedWrites++;
+  else         stats.NumInstrumentedReads++;
+  return true;
+}
diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt
new file mode 100644
index 000000000000..f7bca064c7e1
--- /dev/null
+++ b/lib/Transforms/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Transforms/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize
+
+[component_0]
+type = Group
+name = Transforms
+parent = Libraries
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index e527be25decb..8b1df92fa28b 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
-PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello
+PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello
 
 include $(LEVEL)/Makefile.config
 
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 79bcae58250f..d660c722c7ca 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_library(LLVMScalarOpts
   DCE.cpp
   DeadStoreElimination.cpp
   EarlyCSE.cpp
+  GlobalMerge.cpp
   GVN.cpp
   IndVarSimplify.cpp
   JumpThreading.cpp
@@ -31,12 +32,3 @@ add_llvm_library(LLVMScalarOpts
   Sink.cpp
   TailRecursionElimination.cpp
   )
-
-add_llvm_library_dependencies(LLVMScalarOpts
-  LLVMAnalysis
-  LLVMCore
-  LLVMInstCombine
-  LLVMSupport
-  LLVMTarget
-  LLVMTransformUtils
-  )
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index f8f18b217355..9a5423f4e2eb 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -64,11 +65,17 @@ static cl::opt<bool> DisableBranchOpts(
   "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
   cl::desc("Disable branch optimizations in CodeGenPrepare"));
 
+// FIXME: Remove this abomination once all of the tests pass without it!
+static cl::opt<bool> DisableDeleteDeadBlocks(
+  "disable-cgp-delete-dead-blocks", cl::Hidden, cl::init(false),
+  cl::desc("Disable deleting dead blocks in CodeGenPrepare"));
+
 namespace {
   class CodeGenPrepare : public FunctionPass {
     /// TLI - Keep a pointer of a TargetLowering to consult for determining
     /// transformation profitability.
     const TargetLowering *TLI;
+    const TargetLibraryInfo *TLInfo;
     DominatorTree *DT;
     ProfileInfo *PFI;
     
@@ -97,6 +104,7 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<ProfileInfo>();
+      AU.addRequired<TargetLibraryInfo>();
     }
 
   private:
@@ -116,7 +124,10 @@ namespace {
 }
 
 char CodeGenPrepare::ID = 0;
-INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+                "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
                 "Optimize for code generation", false, false)
 
 FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
@@ -127,6 +138,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   bool EverMadeChange = false;
 
   ModifiedDT = false;
+  TLInfo = &getAnalysis<TargetLibraryInfo>();
   DT = getAnalysisIfAvailable<DominatorTree>();
   PFI = getAnalysisIfAvailable<ProfileInfo>();
 
@@ -153,8 +165,22 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
 
   if (!DisableBranchOpts) {
     MadeChange = false;
-    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    SmallPtrSet<BasicBlock*, 8> WorkList;
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+      SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
       MadeChange |= ConstantFoldTerminator(BB, true);
+      if (!MadeChange) continue;
+
+      for (SmallVectorImpl<BasicBlock*>::iterator
+             II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+        if (pred_begin(*II) == pred_end(*II))
+          WorkList.insert(*II);
+    }
+
+    if (!DisableDeleteDeadBlocks)
+      for (SmallPtrSet<BasicBlock*, 8>::iterator
+             I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
+        DeleteDeadBlock(*I);
 
     if (MadeChange)
       ModifiedDT = true;
@@ -541,8 +567,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
     // happens.
     WeakVH IterHandle(CurInstIterator);
     
-    ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0,
-                              ModifiedDT ? 0 : DT);
+    replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0,
+                                  TLInfo, ModifiedDT ? 0 : DT);
 
     // If the iterator instruction was recursively deleted, start over at the
     // start of the block.
@@ -553,6 +579,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
     return true;
   }
 
+  if (II && TLI) {
+    SmallVector<Value*, 2> PtrOps;
+    Type *AccessTy;
+    if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
+      while (!PtrOps.empty())
+        if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
+          return true;
+  }
+
   // From here on out we're working with named functions.
   if (CI->getCalledFunction() == 0) return false;
 
@@ -612,7 +647,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
   // It's not safe to eliminate the sign / zero extension of the return value.
   // See llvm::isInTailCallPosition().
   const Function *F = BB->getParent();
-  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
   if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
     return false;
 
@@ -667,7 +702,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
 
     // Conservatively require the attributes of the call to match those of the
     // return. Ignore noalias because it doesn't affect the call sequence.
-    unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+    Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
     if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
       continue;
 
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 664c3f6a222f..5430f6253884 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -24,6 +24,8 @@
 #include "llvm/Constant.h"
 #include "llvm/Instruction.h"
 #include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include <set>
@@ -42,19 +44,22 @@ namespace {
 
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+      AU.addRequired<TargetLibraryInfo>();
     }
   };
 }
 
 char ConstantPropagation::ID = 0;
-INITIALIZE_PASS(ConstantPropagation, "constprop",
+INITIALIZE_PASS_BEGIN(ConstantPropagation, "constprop",
+                "Simple constant propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(ConstantPropagation, "constprop",
                 "Simple constant propagation", false, false)
 
 FunctionPass *llvm::createConstantPropagationPass() {
   return new ConstantPropagation();
 }
 
-
 bool ConstantPropagation::runOnFunction(Function &F) {
   // Initialize the worklist to all of the instructions ready to process...
   std::set<Instruction*> WorkList;
@@ -62,13 +67,15 @@ bool ConstantPropagation::runOnFunction(Function &F) {
       WorkList.insert(&*i);
   }
   bool Changed = false;
+  TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
 
   while (!WorkList.empty()) {
     Instruction *I = *WorkList.begin();
     WorkList.erase(WorkList.begin());    // Get an element from the worklist...
 
     if (!I->use_empty())                 // Don't muck with dead instructions...
-      if (Constant *C = ConstantFoldInstruction(I)) {
+      if (Constant *C = ConstantFoldInstruction(I, TD, TLI)) {
         // Add all of the users of this instruction to the worklist, they might
         // be constant propagatable now...
         for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index e275268fc4ea..9b0aadb0b5b0 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -28,6 +28,7 @@ STATISTIC(NumPhis,      "Number of phis propagated");
 STATISTIC(NumSelects,   "Number of selects propagated");
 STATISTIC(NumMemAccess, "Number of memory access targets propagated");
 STATISTIC(NumCmps,      "Number of comparisons propagated");
+STATISTIC(NumDeadCases, "Number of switch cases removed");
 
 namespace {
   class CorrelatedValuePropagation : public FunctionPass {
@@ -37,6 +38,7 @@ namespace {
     bool processPHI(PHINode *P);
     bool processMemAccess(Instruction *I);
     bool processCmp(CmpInst *C);
+    bool processSwitch(SwitchInst *SI);
 
   public:
     static char ID;
@@ -110,7 +112,8 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
     Changed = true;
   }
 
-  ++NumPhis;
+  if (Changed)
+    ++NumPhis;
 
   return Changed;
 }
@@ -173,6 +176,86 @@ bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
   return true;
 }
 
+/// processSwitch - Simplify a switch instruction by removing cases which can
+/// never fire.  If the uselessness of a case could be determined locally then
+/// constant propagation would already have figured it out.  Instead, walk the
+/// predecessors and statically evaluate cases based on information available
+/// on that edge.  Cases that cannot fire no matter what the incoming edge can
+/// safely be removed.  If a case fires on every incoming edge then the entire
+/// switch can be removed and replaced with a branch to the case destination.
+bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
+  Value *Cond = SI->getCondition();
+  BasicBlock *BB = SI->getParent();
+
+  // If the condition was defined in same block as the switch then LazyValueInfo
+  // currently won't say anything useful about it, though in theory it could.
+  if (isa<Instruction>(Cond) && cast<Instruction>(Cond)->getParent() == BB)
+    return false;
+
+  // If the switch is unreachable then trying to improve it is a waste of time.
+  pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
+  if (PB == PE) return false;
+
+  // Analyse each switch case in turn.  This is done in reverse order so that
+  // removing a case doesn't cause trouble for the iteration.
+  bool Changed = false;
+  for (SwitchInst::CaseIt CI = SI->case_end(), CE = SI->case_begin(); CI-- != CE;
+       ) {
+    ConstantInt *Case = CI.getCaseValue();
+
+    // Check to see if the switch condition is equal to/not equal to the case
+    // value on every incoming edge, equal/not equal being the same each time.
+    LazyValueInfo::Tristate State = LazyValueInfo::Unknown;
+    for (pred_iterator PI = PB; PI != PE; ++PI) {
+      // Is the switch condition equal to the case value?
+      LazyValueInfo::Tristate Value = LVI->getPredicateOnEdge(CmpInst::ICMP_EQ,
+                                                              Cond, Case, *PI, BB);
+      // Give up on this case if nothing is known.
+      if (Value == LazyValueInfo::Unknown) {
+        State = LazyValueInfo::Unknown;
+        break;
+      }
+
+      // If this was the first edge to be visited, record that all other edges
+      // need to give the same result.
+      if (PI == PB) {
+        State = Value;
+        continue;
+      }
+
+      // If this case is known to fire for some edges and known not to fire for
+      // others then there is nothing we can do - give up.
+      if (Value != State) {
+        State = LazyValueInfo::Unknown;
+        break;
+      }
+    }
+
+    if (State == LazyValueInfo::False) {
+      // This case never fires - remove it.
+      CI.getCaseSuccessor()->removePredecessor(BB);
+      SI->removeCase(CI); // Does not invalidate the iterator.
+      ++NumDeadCases;
+      Changed = true;
+    } else if (State == LazyValueInfo::True) {
+      // This case always fires.  Arrange for the switch to be turned into an
+      // unconditional branch by replacing the switch condition with the case
+      // value.
+      SI->setCondition(Case);
+      NumDeadCases += SI->getNumCases();
+      Changed = true;
+      break;
+    }
+  }
+
+  if (Changed)
+    // If the switch has been simplified to the point where it can be replaced
+    // by a branch then do so now.
+    ConstantFoldTerminator(BB);
+
+  return Changed;
+}
+
 bool CorrelatedValuePropagation::runOnFunction(Function &F) {
   LVI = &getAnalysis<LazyValueInfo>();
 
@@ -200,6 +283,13 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
       }
     }
 
+    Instruction *Term = FI->getTerminator();
+    switch (Term->getOpcode()) {
+    case Instruction::Switch:
+      BBChanged |= processSwitch(cast<SwitchInst>(Term));
+      break;
+    }
+
     FnChanged |= BBChanged;
   }
 
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index a593d0f44633..c8c53606015a 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -33,6 +34,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
 STATISTIC(NumFastStores, "Number of stores deleted");
@@ -42,25 +44,26 @@ namespace {
   struct DSE : public FunctionPass {
     AliasAnalysis *AA;
     MemoryDependenceAnalysis *MD;
+    DominatorTree *DT;
 
     static char ID; // Pass identification, replacement for typeid
-    DSE() : FunctionPass(ID), AA(0), MD(0) {
+    DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
       initializeDSEPass(*PassRegistry::getPassRegistry());
     }
 
     virtual bool runOnFunction(Function &F) {
       AA = &getAnalysis<AliasAnalysis>();
       MD = &getAnalysis<MemoryDependenceAnalysis>();
-      DominatorTree &DT = getAnalysis<DominatorTree>();
+      DT = &getAnalysis<DominatorTree>();
 
       bool Changed = false;
       for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
         // Only check non-dead blocks.  Dead blocks may have strange pointer
         // cycles that will confuse alias analysis.
-        if (DT.isReachableFromEntry(I))
+        if (DT->isReachableFromEntry(I))
           Changed |= runOnBasicBlock(*I);
 
-      AA = 0; MD = 0;
+      AA = 0; MD = 0; DT = 0;
       return Changed;
     }
 
@@ -221,7 +224,7 @@ static bool isRemovable(Instruction *I) {
 
   IntrinsicInst *II = cast<IntrinsicInst>(I);
   switch (II->getIntrinsicID()) {
-  default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
+  default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
   case Intrinsic::lifetime_end:
     // Never remove dead lifetime_end's, e.g. because it is followed by a
     // free.
@@ -238,6 +241,24 @@ static bool isRemovable(Instruction *I) {
   }
 }
 
+
+/// isShortenable - Returns true if this instruction can be safely shortened in
+/// length.
+static bool isShortenable(Instruction *I) {
+  // Don't shorten stores for now
+  if (isa<StoreInst>(I))
+    return false;
+  
+  IntrinsicInst *II = cast<IntrinsicInst>(I);
+  switch (II->getIntrinsicID()) {
+    default: return false;
+    case Intrinsic::memset:
+    case Intrinsic::memcpy:
+      // Do shorten memory intrinsics.
+      return true;
+  }
+}
+
 /// getStoredPointerOperand - Return the pointer that is being written to.
 static Value *getStoredPointerOperand(Instruction *I) {
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
@@ -247,46 +268,61 @@ static Value *getStoredPointerOperand(Instruction *I) {
 
   IntrinsicInst *II = cast<IntrinsicInst>(I);
   switch (II->getIntrinsicID()) {
-  default: assert(false && "Unexpected intrinsic!");
+  default: llvm_unreachable("Unexpected intrinsic!");
   case Intrinsic::init_trampoline:
     return II->getArgOperand(0);
   }
 }
 
-static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
+static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
   const TargetData *TD = AA.getTargetData();
+
+  if (const CallInst *CI = extractMallocCall(V)) {
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
+      return C->getZExtValue();
+  }
+
   if (TD == 0)
     return AliasAnalysis::UnknownSize;
 
-  if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+  if (const AllocaInst *A = dyn_cast<AllocaInst>(V)) {
     // Get size information for the alloca
-    if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
       return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
-    return AliasAnalysis::UnknownSize;
   }
 
-  assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
-  PointerType *PT = cast<PointerType>(V->getType());
-  return TD->getTypeAllocSize(PT->getElementType());
+  if (const Argument *A = dyn_cast<Argument>(V)) {
+    if (A->hasByValAttr())
+      if (PointerType *PT = dyn_cast<PointerType>(A->getType()))
+        return TD->getTypeAllocSize(PT->getElementType());
+  }
+
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+    if (!GV->mayBeOverridden())
+      return TD->getTypeAllocSize(GV->getType()->getElementType());
+  }
+
+  return AliasAnalysis::UnknownSize;
 }
 
-/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is
-/// pointing to an object with a pointer size we can trust.
-static bool isObjectPointerWithTrustworthySize(const Value *V) {
-  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
-    return !AI->isArrayAllocation();
-  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    return !GV->mayBeOverridden();
-  if (const Argument *A = dyn_cast<Argument>(V))
-    return A->hasByValAttr();
-  return false;
+namespace {
+  enum OverwriteResult
+  {
+    OverwriteComplete,
+    OverwriteEnd,
+    OverwriteUnknown
+  };
 }
 
-/// isCompleteOverwrite - Return true if a store to the 'Later' location
+/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location
 /// completely overwrites a store to the 'Earlier' location.
-static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
-                                const AliasAnalysis::Location &Earlier,
-                                AliasAnalysis &AA) {
+/// 'OverwriteEnd' if the end of the 'Earlier' location is completely 
+/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined
+static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
+                                   const AliasAnalysis::Location &Earlier,
+                                   AliasAnalysis &AA,
+                                   int64_t &EarlierOff,
+                                   int64_t &LaterOff) {
   const Value *P1 = Earlier.Ptr->stripPointerCasts();
   const Value *P2 = Later.Ptr->stripPointerCasts();
 
@@ -300,23 +336,24 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
       // If we have no TargetData information around, then the size of the store
       // is inferrable from the pointee type.  If they are the same type, then
       // we know that the store is safe.
-      if (AA.getTargetData() == 0)
-        return Later.Ptr->getType() == Earlier.Ptr->getType();
-      return false;
+      if (AA.getTargetData() == 0 &&
+          Later.Ptr->getType() == Earlier.Ptr->getType())
+        return OverwriteComplete;
+        
+      return OverwriteUnknown;
     }
 
     // Make sure that the Later size is >= the Earlier size.
-    if (Later.Size < Earlier.Size)
-      return false;
-    return true;
+    if (Later.Size >= Earlier.Size)
+      return OverwriteComplete;
   }
 
   // Otherwise, we have to have size information, and the later store has to be
   // larger than the earlier one.
   if (Later.Size == AliasAnalysis::UnknownSize ||
       Earlier.Size == AliasAnalysis::UnknownSize ||
-      Later.Size <= Earlier.Size || AA.getTargetData() == 0)
-    return false;
+      AA.getTargetData() == 0)
+    return OverwriteUnknown;
 
   // Check to see if the later store is to the entire object (either a global,
   // an alloca, or a byval argument).  If so, then it clearly overwrites any
@@ -329,26 +366,25 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
   // If we can't resolve the same pointers to the same object, then we can't
   // analyze them at all.
   if (UO1 != UO2)
-    return false;
+    return OverwriteUnknown;
 
   // If the "Later" store is to a recognizable object, get its size.
-  if (isObjectPointerWithTrustworthySize(UO2)) {
-    uint64_t ObjectSize =
-      TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
-    if (ObjectSize == Later.Size)
-      return true;
-  }
+  uint64_t ObjectSize = getPointerSize(UO2, AA);
+  if (ObjectSize != AliasAnalysis::UnknownSize)
+    if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size)
+      return OverwriteComplete;
 
   // Okay, we have stores to two completely different pointers.  Try to
   // decompose the pointer into a "base + constant_offset" form.  If the base
   // pointers are equal, then we can reason about the two stores.
-  int64_t EarlierOff = 0, LaterOff = 0;
+  EarlierOff = 0;
+  LaterOff = 0;
   const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
   const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
 
   // If the base pointers still differ, we have two completely different stores.
   if (BP1 != BP2)
-    return false;
+    return OverwriteUnknown;
 
   // The later store completely overlaps the earlier store if:
   //
@@ -366,11 +402,25 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
   //
   // We have to be careful here as *Off is signed while *.Size is unsigned.
   if (EarlierOff >= LaterOff &&
+      Later.Size > Earlier.Size &&
       uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
-    return true;
+    return OverwriteComplete;
+  
+  // The other interesting case is if the later store overwrites the end of
+  // the earlier store
+  //
+  //      |--earlier--|
+  //                |--   later   --|
+  //
+  // In this case we may want to trim the size of earlier to avoid generating
+  // writes to addresses which will definitely be overwritten later
+  if (LaterOff > EarlierOff &&
+      LaterOff < int64_t(EarlierOff + Earlier.Size) &&
+      int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
+    return OverwriteEnd;
 
   // Otherwise, they don't completely overlap.
-  return false;
+  return OverwriteUnknown;
 }
 
 /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
@@ -494,22 +544,52 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       // If we find a write that is a) removable (i.e., non-volatile), b) is
       // completely obliterated by the store to 'Loc', and c) which we know that
       // 'Inst' doesn't load from, then we can remove it.
-      if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
+      if (isRemovable(DepWrite) && 
           !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
-        DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
-              << *DepWrite << "\n  KILLER: " << *Inst << '\n');
-
-        // Delete the store and now-dead instructions that feed it.
-        DeleteDeadInstruction(DepWrite, *MD);
-        ++NumFastStores;
-        MadeChange = true;
-
-        // DeleteDeadInstruction can delete the current instruction in loop
-        // cases, reset BBI.
-        BBI = Inst;
-        if (BBI != BB.begin())
-          --BBI;
-        break;
+        int64_t InstWriteOffset, DepWriteOffset; 
+        OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, 
+                                         DepWriteOffset, InstWriteOffset); 
+        if (OR == OverwriteComplete) {
+          DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
+                << *DepWrite << "\n  KILLER: " << *Inst << '\n');
+
+          // Delete the store and now-dead instructions that feed it.
+          DeleteDeadInstruction(DepWrite, *MD);
+          ++NumFastStores;
+          MadeChange = true;
+          
+          // DeleteDeadInstruction can delete the current instruction in loop
+          // cases, reset BBI.
+          BBI = Inst;
+          if (BBI != BB.begin())
+            --BBI;
+          break;
+        } else if (OR == OverwriteEnd && isShortenable(DepWrite)) {
+          // TODO: base this on the target vector size so that if the earlier
+          // store was too small to get vector writes anyway then its likely
+          // a good idea to shorten it
+          // Power of 2 vector writes are probably always a bad idea to optimize
+          // as any store/memset/memcpy is likely using vector instructions so
+          // shortening it to not vector size is likely to be slower
+          MemIntrinsic* DepIntrinsic = cast<MemIntrinsic>(DepWrite);
+          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
+          if (llvm::isPowerOf2_64(InstWriteOffset) ||
+              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
+            
+            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW END: "
+                  << *DepWrite << "\n  KILLER (offset " 
+                  << InstWriteOffset << ", " 
+                  << DepLoc.Size << ")"
+                  << *Inst << '\n');
+            
+            Value* DepWriteLength = DepIntrinsic->getLength();
+            Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(),
+                                                    InstWriteOffset - 
+                                                    DepWriteOffset);
+            DepIntrinsic->setLength(TrimmedLength);
+            MadeChange = true;
+          }
+        }
       }
 
       // If this is a may-aliased store that is clobbering the store value, we
@@ -538,37 +618,67 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
   return MadeChange;
 }
 
+/// Find all blocks that will unconditionally lead to the block BB and append
+/// them to F.
+static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
+                                   BasicBlock *BB, DominatorTree *DT) {
+  for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+    BasicBlock *Pred = *I;
+    if (Pred == BB) continue;
+    TerminatorInst *PredTI = Pred->getTerminator();
+    if (PredTI->getNumSuccessors() != 1)
+      continue;
+
+    if (DT->isReachableFromEntry(Pred))
+      Blocks.push_back(Pred);
+  }
+}
+
 /// HandleFree - Handle frees of entire structures whose dependency is a store
 /// to a field of that structure.
 bool DSE::HandleFree(CallInst *F) {
   bool MadeChange = false;
 
-  MemDepResult Dep = MD->getDependency(F);
+  AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0));
+  SmallVector<BasicBlock *, 16> Blocks;
+  Blocks.push_back(F->getParent());
 
-  while (Dep.isDef() || Dep.isClobber()) {
-    Instruction *Dependency = Dep.getInst();
-    if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
-      return MadeChange;
+  while (!Blocks.empty()) {
+    BasicBlock *BB = Blocks.pop_back_val();
+    Instruction *InstPt = BB->getTerminator();
+    if (BB == F->getParent()) InstPt = F;
 
-    Value *DepPointer =
-      GetUnderlyingObject(getStoredPointerOperand(Dependency));
+    MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
+    while (Dep.isDef() || Dep.isClobber()) {
+      Instruction *Dependency = Dep.getInst();
+      if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+        break;
 
-    // Check for aliasing.
-    if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
-      return MadeChange;
+      Value *DepPointer =
+        GetUnderlyingObject(getStoredPointerOperand(Dependency));
 
-    // DCE instructions only used to calculate that store
-    DeleteDeadInstruction(Dependency, *MD);
-    ++NumFastStores;
-    MadeChange = true;
+      // Check for aliasing.
+      if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
+        break;
 
-    // Inst's old Dependency is now deleted. Compute the next dependency,
-    // which may also be dead, as in
-    //    s[0] = 0;
-    //    s[1] = 0; // This has just been deleted.
-    //    free(s);
-    Dep = MD->getDependency(F);
-  };
+      Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
+
+      // DCE instructions only used to calculate that store
+      DeleteDeadInstruction(Dependency, *MD);
+      ++NumFastStores;
+      MadeChange = true;
+
+      // Inst's old Dependency is now deleted. Compute the next dependency,
+      // which may also be dead, as in
+      //    s[0] = 0;
+      //    s[1] = 0; // This has just been deleted.
+      //    free(s);
+      Dep = MD->getPointerDependencyFrom(Loc, false, Next, BB);
+    }
+
+    if (Dep.isNonLocal())
+      FindUnconditionalPreds(Blocks, BB, DT);
+  }
 
   return MadeChange;
 }
@@ -588,10 +698,17 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 
   // Find all of the alloca'd pointers in the entry block.
   BasicBlock *Entry = BB.getParent()->begin();
-  for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
+  for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
     if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
       DeadStackObjects.insert(AI);
 
+    // Okay, so these are dead heap objects, but if the pointer never escapes
+    // then it's leaked by this function anyways.
+    if (CallInst *CI = extractMallocCall(I))
+      if (!PointerMayBeCaptured(CI, true, true))
+        DeadStackObjects.insert(CI);
+  }
+
   // Treat byval arguments the same, stores to them are dead at the end of the
   // function.
   for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
@@ -637,6 +754,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       continue;
     }
 
+    if (CallInst *CI = extractMallocCall(BBI)) {
+      DeadStackObjects.erase(CI);
+      continue;
+    }
+
     if (CallSite CS = cast<Value>(BBI)) {
       // If this call does not access memory, it can't be loading any of our
       // pointers.
@@ -732,4 +854,3 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
        I != E; ++I)
     DeadStackObjects.erase(*I);
 }
-
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index c0223d2bf199..f3c92d64c2a3 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -19,11 +19,13 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/Statistic.h"
+#include <deque>
 using namespace llvm;
 
 STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
@@ -215,6 +217,7 @@ namespace {
 class EarlyCSE : public FunctionPass {
 public:
   const TargetData *TD;
+  const TargetLibraryInfo *TLI;
   DominatorTree *DT;
   typedef RecyclingAllocator<BumpPtrAllocator,
                       ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
@@ -257,12 +260,77 @@ public:
   bool runOnFunction(Function &F);
 
 private:
-  
+
+  // NodeScope - almost a POD, but needs to call the constructors for the
+  // scoped hash tables so that a new scope gets pushed on. These are RAII so
+  // that the scope gets popped when the NodeScope is destroyed.
+  class NodeScope {
+   public:
+    NodeScope(ScopedHTType *availableValues,
+              LoadHTType *availableLoads,
+              CallHTType *availableCalls) :
+        Scope(*availableValues),
+        LoadScope(*availableLoads),
+        CallScope(*availableCalls) {}
+
+   private:
+    NodeScope(const NodeScope&); // DO NOT IMPLEMENT
+
+    ScopedHTType::ScopeTy Scope;
+    LoadHTType::ScopeTy LoadScope;
+    CallHTType::ScopeTy CallScope;
+  };
+
+  // StackNode - contains all the needed information to create a stack for
+  // doing a depth first tranversal of the tree. This includes scopes for
+  // values, loads, and calls as well as the generation. There is a child
+  // iterator so that the children do not need to be store spearately.
+  class StackNode {
+   public:
+    StackNode(ScopedHTType *availableValues,
+              LoadHTType *availableLoads,
+              CallHTType *availableCalls,
+              unsigned cg, DomTreeNode *n,
+              DomTreeNode::iterator child, DomTreeNode::iterator end) :
+        CurrentGeneration(cg), ChildGeneration(cg), Node(n),
+        ChildIter(child), EndIter(end),
+        Scopes(availableValues, availableLoads, availableCalls),
+        Processed(false) {}
+
+    // Accessors.
+    unsigned currentGeneration() { return CurrentGeneration; }
+    unsigned childGeneration() { return ChildGeneration; }
+    void childGeneration(unsigned generation) { ChildGeneration = generation; }
+    DomTreeNode *node() { return Node; }
+    DomTreeNode::iterator childIter() { return ChildIter; }
+    DomTreeNode *nextChild() {
+      DomTreeNode *child = *ChildIter;
+      ++ChildIter;
+      return child;
+    }
+    DomTreeNode::iterator end() { return EndIter; }
+    bool isProcessed() { return Processed; }
+    void process() { Processed = true; }
+
+   private:
+    StackNode(const StackNode&); // DO NOT IMPLEMENT
+
+    // Members.
+    unsigned CurrentGeneration;
+    unsigned ChildGeneration;
+    DomTreeNode *Node;
+    DomTreeNode::iterator ChildIter;
+    DomTreeNode::iterator EndIter;
+    NodeScope Scopes;
+    bool Processed;
+  };
+
   bool processNode(DomTreeNode *Node);
   
   // This transformation requires dominator postdominator info
   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequired<DominatorTree>();
+    AU.addRequired<TargetLibraryInfo>();
     AU.setPreservesCFG();
   }
 };
@@ -277,22 +345,10 @@ FunctionPass *llvm::createEarlyCSEPass() {
 
 INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
 
 bool EarlyCSE::processNode(DomTreeNode *Node) {
-  // Define a scope in the scoped hash table.  When we are done processing this
-  // domtree node and recurse back up to our parent domtree node, this will pop
-  // off all the values we install.
-  ScopedHTType::ScopeTy Scope(*AvailableValues);
-  
-  // Define a scope for the load values so that anything we add will get
-  // popped when we recurse back up to our parent domtree node.
-  LoadHTType::ScopeTy LoadScope(*AvailableLoads);
-  
-  // Define a scope for the call values so that anything we add will get
-  // popped when we recurse back up to our parent domtree node.
-  CallHTType::ScopeTy CallScope(*AvailableCalls);
-  
   BasicBlock *BB = Node->getBlock();
   
   // If this block has a single predecessor, then the predecessor is the parent
@@ -328,7 +384,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     
     // If the instruction can be simplified (e.g. X+0 = X) then replace it with
     // its simpler value.
-    if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+    if (Value *V = SimplifyInstruction(Inst, TD, TLI, DT)) {
       DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
       Inst->replaceAllUsesWith(V);
       Inst->eraseFromParent();
@@ -442,19 +498,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       }
     }
   }
-  
-  unsigned LiveOutGeneration = CurrentGeneration;
-  for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) {
-    Changed |= processNode(*I);
-    // Pop any generation changes off the stack from the recursive walk.
-    CurrentGeneration = LiveOutGeneration;
-  }
+
   return Changed;
 }
 
 
 bool EarlyCSE::runOnFunction(Function &F) {
+  std::deque<StackNode *> nodesToProcess;
+
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
   DT = &getAnalysis<DominatorTree>();
   
   // Tables that the pass uses when walking the domtree.
@@ -466,5 +519,52 @@ bool EarlyCSE::runOnFunction(Function &F) {
   AvailableCalls = &CallTable;
   
   CurrentGeneration = 0;
-  return processNode(DT->getRootNode());
+  bool Changed = false;
+
+  // Process the root node.
+  nodesToProcess.push_front(
+      new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
+                    CurrentGeneration, DT->getRootNode(),
+                    DT->getRootNode()->begin(),
+                    DT->getRootNode()->end()));
+
+  // Save the current generation.
+  unsigned LiveOutGeneration = CurrentGeneration;
+
+  // Process the stack.
+  while (!nodesToProcess.empty()) {
+    // Grab the first item off the stack. Set the current generation, remove
+    // the node from the stack, and process it.
+    StackNode *NodeToProcess = nodesToProcess.front();
+
+    // Initialize class members.
+    CurrentGeneration = NodeToProcess->currentGeneration();
+
+    // Check if the node needs to be processed.
+    if (!NodeToProcess->isProcessed()) {
+      // Process the node.
+      Changed |= processNode(NodeToProcess->node());
+      NodeToProcess->childGeneration(CurrentGeneration);
+      NodeToProcess->process();
+    } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
+      // Push the next child onto the stack.
+      DomTreeNode *child = NodeToProcess->nextChild();
+      nodesToProcess.push_front(
+          new StackNode(AvailableValues,
+                        AvailableLoads,
+                        AvailableCalls,
+                        NodeToProcess->childGeneration(), child,
+                        child->begin(), child->end()));
+    } else {
+      // It has been processed, and there are no more children to process,
+      // so delete it and pop it off the stack.
+      delete NodeToProcess;
+      nodesToProcess.pop_front();
+    }
+  } // while (!nodes...)
+
+  // Reset the current generation.
+  CurrentGeneration = LiveOutGeneration;
+
+  return Changed;
 }
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index cbfdbcddaeca..fb733ada5a19 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -31,10 +31,12 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Allocator.h"
@@ -83,6 +85,12 @@ namespace {
         return false;
       return true;
     }
+
+    friend hash_code hash_value(const Expression &Value) {
+      return hash_combine(Value.opcode, Value.type,
+                          hash_combine_range(Value.varargs.begin(),
+                                             Value.varargs.end()));
+    }
   };
 
   class ValueTable {
@@ -95,12 +103,17 @@ namespace {
     uint32_t nextValueNumber;
 
     Expression create_expression(Instruction* I);
+    Expression create_cmp_expression(unsigned Opcode,
+                                     CmpInst::Predicate Predicate,
+                                     Value *LHS, Value *RHS);
     Expression create_extractvalue_expression(ExtractValueInst* EI);
     uint32_t lookup_or_add_call(CallInst* C);
   public:
     ValueTable() : nextValueNumber(1) { }
     uint32_t lookup_or_add(Value *V);
     uint32_t lookup(Value *V) const;
+    uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred,
+                               Value *LHS, Value *RHS);
     void add(Value *V, uint32_t num);
     void clear();
     void erase(Value *v);
@@ -124,16 +137,8 @@ template <> struct DenseMapInfo<Expression> {
   }
 
   static unsigned getHashValue(const Expression e) {
-    unsigned hash = e.opcode;
-
-    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
-            (unsigned)((uintptr_t)e.type >> 9));
-
-    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
-         E = e.varargs.end(); I != E; ++I)
-      hash = *I + hash * 37;
-    
-    return hash;
+    using llvm::hash_value;
+    return static_cast<unsigned>(hash_value(e));
   }
   static bool isEqual(const Expression &LHS, const Expression &RHS) {
     return LHS == RHS;
@@ -153,9 +158,24 @@ Expression ValueTable::create_expression(Instruction *I) {
   for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
        OI != OE; ++OI)
     e.varargs.push_back(lookup_or_add(*OI));
+  if (I->isCommutative()) {
+    // Ensure that commutative instructions that only differ by a permutation
+    // of their operands get the same value number by sorting the operand value
+    // numbers.  Since all commutative instructions have two operands it is more
+    // efficient to sort by hand rather than using, say, std::sort.
+    assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
+    if (e.varargs[0] > e.varargs[1])
+      std::swap(e.varargs[0], e.varargs[1]);
+  }
   
   if (CmpInst *C = dyn_cast<CmpInst>(I)) {
-    e.opcode = (C->getOpcode() << 8) | C->getPredicate();
+    // Sort the operand value numbers so x<y and y>x get the same value number.
+    CmpInst::Predicate Predicate = C->getPredicate();
+    if (e.varargs[0] > e.varargs[1]) {
+      std::swap(e.varargs[0], e.varargs[1]);
+      Predicate = CmpInst::getSwappedPredicate(Predicate);
+    }
+    e.opcode = (C->getOpcode() << 8) | Predicate;
   } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
     for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
          II != IE; ++II)
@@ -165,6 +185,25 @@ Expression ValueTable::create_expression(Instruction *I) {
   return e;
 }
 
+Expression ValueTable::create_cmp_expression(unsigned Opcode,
+                                             CmpInst::Predicate Predicate,
+                                             Value *LHS, Value *RHS) {
+  assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
+         "Not a comparison!");
+  Expression e;
+  e.type = CmpInst::makeCmpResultType(LHS->getType());
+  e.varargs.push_back(lookup_or_add(LHS));
+  e.varargs.push_back(lookup_or_add(RHS));
+
+  // Sort the operand value numbers so x<y and y>x get the same value number.
+  if (e.varargs[0] > e.varargs[1]) {
+    std::swap(e.varargs[0], e.varargs[1]);
+    Predicate = CmpInst::getSwappedPredicate(Predicate);
+  }
+  e.opcode = (Opcode << 8) | Predicate;
+  return e;
+}
+
 Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
   assert(EI != 0 && "Not an ExtractValueInst?");
   Expression e;
@@ -414,6 +453,19 @@ uint32_t ValueTable::lookup(Value *V) const {
   return VI->second;
 }
 
+/// lookup_or_add_cmp - Returns the value number of the given comparison,
+/// assigning it a new number if it did not have one before.  Useful when
+/// we deduced the result of a comparison, but don't immediately have an
+/// instruction realizing that comparison to hand.
+uint32_t ValueTable::lookup_or_add_cmp(unsigned Opcode,
+                                       CmpInst::Predicate Predicate,
+                                       Value *LHS, Value *RHS) {
+  Expression exp = create_cmp_expression(Opcode, Predicate, LHS, RHS);
+  uint32_t& e = expressionNumbering[exp];
+  if (!e) e = nextValueNumber++;
+  return e;
+}
+
 /// clear - Remove all entries from the ValueTable.
 void ValueTable::clear() {
   valueNumbering.clear();
@@ -446,7 +498,8 @@ namespace {
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
     const TargetData *TD;
-    
+    const TargetLibraryInfo *TLI;
+
     ValueTable VN;
     
     /// LeaderTable - A mapping from value numbers to lists of Value*'s that
@@ -530,6 +583,7 @@ namespace {
     // This transformation requires dominator postdominator info
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<DominatorTree>();
+      AU.addRequired<TargetLibraryInfo>();
       if (!NoLoads)
         AU.addRequired<MemoryDependenceAnalysis>();
       AU.addRequired<AliasAnalysis>();
@@ -568,6 +622,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
 INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
 
@@ -776,7 +831,7 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
                                           Value *WritePtr,
                                           uint64_t WriteSizeInBits,
                                           const TargetData &TD) {
-  // If the loaded or stored value is an first class array or struct, don't try
+  // If the loaded or stored value is a first class array or struct, don't try
   // to transform them.  We need to be able to bitcast to integer.
   if (LoadTy->isStructTy() || LoadTy->isArrayTy())
     return -1;
@@ -973,7 +1028,7 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
   return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
 }
 
-/// GetStoreValueForLoad - This function is called when we have a
+/// GetLoadValueForLoad - This function is called when we have a
 /// memdep query of a load that ends up being a clobbering load.  This means
 /// that the load *may* provide bits used by the load but we can't be sure
 /// because the pointers don't mustalias.  Check this case to see if there is
@@ -1274,14 +1329,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   // If we had to process more than one hundred blocks to find the
   // dependencies, this load isn't worth worrying about.  Optimizing
   // it will be too expensive.
-  if (Deps.size() > 100)
+  unsigned NumDeps = Deps.size();
+  if (NumDeps > 100)
     return false;
 
   // If we had a phi translation failure, we'll have a single entry which is a
   // clobber in the current block.  Reject this early.
-  if (Deps.size() == 1
-      && !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber())
-  {
+  if (NumDeps == 1 &&
+      !Deps[0].getResult().isDef() && !Deps[0].getResult().isClobber()) {
     DEBUG(
       dbgs() << "GVN: non-local load ";
       WriteAsOperand(dbgs(), LI);
@@ -1294,10 +1349,10 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   // where we have a value available in repl, also keep track of whether we see
   // dependencies that produce an unknown value for the load (such as a call
   // that could potentially clobber the load).
-  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
-  SmallVector<BasicBlock*, 16> UnavailableBlocks;
+  SmallVector<AvailableValueInBlock, 64> ValuesPerBlock;
+  SmallVector<BasicBlock*, 64> UnavailableBlocks;
 
-  for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+  for (unsigned i = 0, e = NumDeps; i != e; ++i) {
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
 
@@ -1896,12 +1951,19 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
   unsigned Count = 0;
   for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
        UI != UE; ) {
-    Instruction *User = cast<Instruction>(*UI);
-    unsigned OpNum = UI.getOperandNo();
-    ++UI;
+    Use &U = (UI++).getUse();
+
+    // If From occurs as a phi node operand then the use implicitly lives in the
+    // corresponding incoming block.  Otherwise it is the block containing the
+    // user that must be dominated by Root.
+    BasicBlock *UsingBlock;
+    if (PHINode *PN = dyn_cast<PHINode>(U.getUser()))
+      UsingBlock = PN->getIncomingBlock(U);
+    else
+      UsingBlock = cast<Instruction>(U.getUser())->getParent();
 
-    if (DT->dominates(Root, User->getParent())) {
-      User->setOperand(OpNum, To);
+    if (DT->dominates(Root, UsingBlock)) {
+      U.set(To);
       ++Count;
     }
   }
@@ -1912,69 +1974,119 @@ unsigned GVN::replaceAllDominatedUsesWith(Value *From, Value *To,
 /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
 bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
-  if (LHS == RHS) return false;
-  assert(LHS->getType() == RHS->getType() && "Equal but types differ!");
+  SmallVector<std::pair<Value*, Value*>, 4> Worklist;
+  Worklist.push_back(std::make_pair(LHS, RHS));
+  bool Changed = false;
 
-  // Don't try to propagate equalities between constants.
-  if (isa<Constant>(LHS) && isa<Constant>(RHS))
-    return false;
+  while (!Worklist.empty()) {
+    std::pair<Value*, Value*> Item = Worklist.pop_back_val();
+    LHS = Item.first; RHS = Item.second;
+
+    if (LHS == RHS) continue;
+    assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
+
+    // Don't try to propagate equalities between constants.
+    if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue;
+
+    // Prefer a constant on the right-hand side, or an Argument if no constants.
+    if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
+      std::swap(LHS, RHS);
+    assert((isa<Argument>(LHS) || isa<Instruction>(LHS)) && "Unexpected value!");
+
+    // If there is no obvious reason to prefer the left-hand side over the right-
+    // hand side, ensure the longest lived term is on the right-hand side, so the
+    // shortest lived term will be replaced by the longest lived.  This tends to
+    // expose more simplifications.
+    uint32_t LVN = VN.lookup_or_add(LHS);
+    if ((isa<Argument>(LHS) && isa<Argument>(RHS)) ||
+        (isa<Instruction>(LHS) && isa<Instruction>(RHS))) {
+      // Move the 'oldest' value to the right-hand side, using the value number as
+      // a proxy for age.
+      uint32_t RVN = VN.lookup_or_add(RHS);
+      if (LVN < RVN) {
+        std::swap(LHS, RHS);
+        LVN = RVN;
+      }
+    }
+    assert((!isa<Instruction>(RHS) ||
+            DT->properlyDominates(cast<Instruction>(RHS)->getParent(), Root)) &&
+           "Instruction doesn't dominate scope!");
+
+    // If value numbering later deduces that an instruction in the scope is equal
+    // to 'LHS' then ensure it will be turned into 'RHS'.
+    addToLeaderTable(LVN, RHS, Root);
+
+    // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.  As
+    // LHS always has at least one use that is not dominated by Root, this will
+    // never do anything if LHS has only one use.
+    if (!LHS->hasOneUse()) {
+      unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
+      Changed |= NumReplacements > 0;
+      NumGVNEqProp += NumReplacements;
+    }
 
-  // Make sure that any constants are on the right-hand side.  In general the
-  // best results are obtained by placing the longest lived value on the RHS.
-  if (isa<Constant>(LHS))
-    std::swap(LHS, RHS);
+    // Now try to deduce additional equalities from this one.  For example, if the
+    // known equality was "(A != B)" == "false" then it follows that A and B are
+    // equal in the scope.  Only boolean equalities with an explicit true or false
+    // RHS are currently supported.
+    if (!RHS->getType()->isIntegerTy(1))
+      // Not a boolean equality - bail out.
+      continue;
+    ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
+    if (!CI)
+      // RHS neither 'true' nor 'false' - bail out.
+      continue;
+    // Whether RHS equals 'true'.  Otherwise it equals 'false'.
+    bool isKnownTrue = CI->isAllOnesValue();
+    bool isKnownFalse = !isKnownTrue;
+
+    // If "A && B" is known true then both A and B are known true.  If "A || B"
+    // is known false then both A and B are known false.
+    Value *A, *B;
+    if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
+        (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
+      Worklist.push_back(std::make_pair(A, RHS));
+      Worklist.push_back(std::make_pair(B, RHS));
+      continue;
+    }
 
-  // If neither term is constant then bail out.  This is not for correctness,
-  // it's just that the non-constant case is much less useful: it occurs just
-  // as often as the constant case but handling it hardly ever results in an
-  // improvement.
-  if (!isa<Constant>(RHS))
-    return false;
+    // If we are propagating an equality like "(A == B)" == "true" then also
+    // propagate the equality A == B.  When propagating a comparison such as
+    // "(A >= B)" == "true", replace all instances of "A < B" with "false".
+    if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
+      Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
 
-  // If value numbering later deduces that an instruction in the scope is equal
-  // to 'LHS' then ensure it will be turned into 'RHS'.
-  addToLeaderTable(VN.lookup_or_add(LHS), RHS, Root);
-
-  // Replace all occurrences of 'LHS' with 'RHS' everywhere in the scope.
-  unsigned NumReplacements = replaceAllDominatedUsesWith(LHS, RHS, Root);
-  bool Changed = NumReplacements > 0;
-  NumGVNEqProp += NumReplacements;
-
-  // Now try to deduce additional equalities from this one.  For example, if the
-  // known equality was "(A != B)" == "false" then it follows that A and B are
-  // equal in the scope.  Only boolean equalities with an explicit true or false
-  // RHS are currently supported.
-  if (!RHS->getType()->isIntegerTy(1))
-    // Not a boolean equality - bail out.
-    return Changed;
-  ConstantInt *CI = dyn_cast<ConstantInt>(RHS);
-  if (!CI)
-    // RHS neither 'true' nor 'false' - bail out.
-    return Changed;
-  // Whether RHS equals 'true'.  Otherwise it equals 'false'.
-  bool isKnownTrue = CI->isAllOnesValue();
-  bool isKnownFalse = !isKnownTrue;
-
-  // If "A && B" is known true then both A and B are known true.  If "A || B"
-  // is known false then both A and B are known false.
-  Value *A, *B;
-  if ((isKnownTrue && match(LHS, m_And(m_Value(A), m_Value(B)))) ||
-      (isKnownFalse && match(LHS, m_Or(m_Value(A), m_Value(B))))) {
-    Changed |= propagateEquality(A, RHS, Root);
-    Changed |= propagateEquality(B, RHS, Root);
-    return Changed;
-  }
+      // If "A == B" is known true, or "A != B" is known false, then replace
+      // A with B everywhere in the scope.
+      if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
+          (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE))
+        Worklist.push_back(std::make_pair(Op0, Op1));
+
+      // If "A >= B" is known true, replace "A < B" with false everywhere.
+      CmpInst::Predicate NotPred = Cmp->getInversePredicate();
+      Constant *NotVal = ConstantInt::get(Cmp->getType(), isKnownFalse);
+      // Since we don't have the instruction "A < B" immediately to hand, work out
+      // the value number that it would have and use that to find an appropriate
+      // instruction (if any).
+      uint32_t NextNum = VN.getNextUnusedValueNumber();
+      uint32_t Num = VN.lookup_or_add_cmp(Cmp->getOpcode(), NotPred, Op0, Op1);
+      // If the number we were assigned was brand new then there is no point in
+      // looking for an instruction realizing it: there cannot be one!
+      if (Num < NextNum) {
+        Value *NotCmp = findLeader(Root, Num);
+        if (NotCmp && isa<Instruction>(NotCmp)) {
+          unsigned NumReplacements =
+            replaceAllDominatedUsesWith(NotCmp, NotVal, Root);
+          Changed |= NumReplacements > 0;
+          NumGVNEqProp += NumReplacements;
+        }
+      }
+      // Ensure that any instruction in scope that gets the "A < B" value number
+      // is replaced with false.
+      addToLeaderTable(Num, NotVal, Root);
 
-  // If we are propagating an equality like "(A == B)" == "true" then also
-  // propagate the equality A == B.
-  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(LHS)) {
-    // Only equality comparisons are supported.
-    if ((isKnownTrue && Cmp->getPredicate() == CmpInst::ICMP_EQ) ||
-        (isKnownFalse && Cmp->getPredicate() == CmpInst::ICMP_NE)) {
-      Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
-      Changed |= propagateEquality(Op0, Op1, Root);
+      continue;
     }
-    return Changed;
   }
 
   return Changed;
@@ -1985,35 +2097,15 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS, BasicBlock *Root) {
 /// particular 'Dst' must not be reachable via another edge from 'Src'.
 static bool isOnlyReachableViaThisEdge(BasicBlock *Src, BasicBlock *Dst,
                                        DominatorTree *DT) {
-  // First off, there must not be more than one edge from Src to Dst, there
-  // should be exactly one.  So keep track of the number of times Src occurs
-  // as a predecessor of Dst and fail if it's more than once.  Secondly, any
-  // other predecessors of Dst should be dominated by Dst (see logic below).
-  bool SawEdgeFromSrc = false;
-  for (pred_iterator PI = pred_begin(Dst), PE = pred_end(Dst); PI != PE; ++PI) {
-    BasicBlock *Pred = *PI;
-    if (Pred == Src) {
-      // An edge from Src to Dst.
-      if (SawEdgeFromSrc)
-        // There are multiple edges from Src to Dst - fail.
-        return false;
-      SawEdgeFromSrc = true;
-      continue;
-    }
-    // If the predecessor is not dominated by Dst, then it must be possible to
-    // reach it either without passing through Src (and thus not via the edge)
-    // or by passing through Src but taking a different edge out of Src.  Either
-    // way it is possible to reach Dst without passing via the edge, so fail.
-    if (!DT->dominates(Dst, *PI))
-      return false;
-  }
-  assert(SawEdgeFromSrc && "No edge between these basic blocks!");
-
-  // Every path from the entry block to Dst must at some point pass to Dst from
-  // a predecessor that is not dominated by Dst.  This predecessor can only be
-  // Src, since all others are dominated by Dst.  As there is only one edge from
-  // Src to Dst, the path passes by this edge.
-  return true;
+  // While in theory it is interesting to consider the case in which Dst has
+  // more than one predecessor, because Dst might be part of a loop which is
+  // only reachable from Src, in practice it is pointless since at the time
+  // GVN runs all such loops have preheaders, which means that Dst will have
+  // been changed to have only one predecessor, namely Src.
+  BasicBlock *Pred = Dst->getSinglePredecessor();
+  assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+  (void)Src;
+  return Pred != 0;
 }
 
 /// processInstruction - When calculating availability, handle an instruction
@@ -2027,7 +2119,7 @@ bool GVN::processInstruction(Instruction *I) {
   // to value numbering it.  Value numbering often exposes redundancies, for
   // example if it determines that %y is equal to %x then the instruction
   // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
-  if (Value *V = SimplifyInstruction(I, TD, DT)) {
+  if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
     I->replaceAllUsesWith(V);
     if (MD && V->getType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
@@ -2076,16 +2168,17 @@ bool GVN::processInstruction(Instruction *I) {
     Value *SwitchCond = SI->getCondition();
     BasicBlock *Parent = SI->getParent();
     bool Changed = false;
-    for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
-      BasicBlock *Dst = SI->getSuccessor(i);
+    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+         i != e; ++i) {
+      BasicBlock *Dst = i.getCaseSuccessor();
       if (isOnlyReachableViaThisEdge(Parent, Dst, DT))
-        Changed |= propagateEquality(SwitchCond, SI->getCaseValue(i), Dst);
+        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), Dst);
     }
     return Changed;
   }
 
   // Instructions with void type don't return a value, so there's
-  // no point in trying to find redudancies in them.
+  // no point in trying to find redundancies in them.
   if (I->getType()->isVoidTy()) return false;
   
   uint32_t NextNum = VN.getNextUnusedValueNumber();
@@ -2101,7 +2194,7 @@ bool GVN::processInstruction(Instruction *I) {
   // If the number we were assigned was a brand new VN, then we don't
   // need to do a lookup to see if the number already exists
   // somewhere in the domtree: it can't!
-  if (Num == NextNum) {
+  if (Num >= NextNum) {
     addToLeaderTable(Num, I, I->getParent());
     return false;
   }
@@ -2129,6 +2222,7 @@ bool GVN::runOnFunction(Function& F) {
     MD = &getAnalysis<MemoryDependenceAnalysis>();
   DT = &getAnalysis<DominatorTree>();
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
   VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
   VN.setMemDep(MD);
   VN.setDomTree(DT);
@@ -2241,7 +2335,14 @@ bool GVN::performPRE(Function &F) {
           CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
           isa<DbgInfoIntrinsic>(CurInst))
         continue;
-      
+
+      // Don't do PRE on compares. The PHI would prevent CodeGenPrepare from
+      // sinking the compare again, and it would force the code generator to
+      // move the i1 from processor flags or predicate registers into a general
+      // purpose register.
+      if (isa<CmpInst>(CurInst))
+        continue;
+
       // We don't currently value number ANY inline asm calls.
       if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
         if (CallI->isInlineAsm())
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
new file mode 100644
index 000000000000..c2bd6e69ee10
--- /dev/null
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -0,0 +1,226 @@
+//===-- GlobalMerge.cpp - Internal globals merging  -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at 
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+//    foo[i] = bar[i] * baz[i];
+// }
+//
+//  On ARM the addresses of 3 arrays should be kept in the registers, thus
+//  this code has quite large register pressure (loop body):
+//
+//  ldr     r1, [r5], #4
+//  ldr     r2, [r6], #4
+//  mul     r1, r2, r1
+//  str     r1, [r0], #4
+//
+//  Pass converts the code to something like:
+//
+//  static struct {
+//    int foo[N];
+//    int bar[N];
+//    int baz[N];
+//  } merged;
+//
+//  for (i = 0; i < N; ++i) {
+//    merged.foo[i] = merged.bar[i] * merged.baz[i];
+//  }
+//
+//  and in ARM code this becomes:
+//
+//  ldr     r0, [r5, #40]
+//  ldr     r1, [r5, #80]
+//  mul     r0, r1, r0
+//  str     r0, [r5], #4
+//
+//  note that we saved 2 registers here almostly "for free".
+// ===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "global-merge"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMerged      , "Number of globals merged");
+namespace {
+  class GlobalMerge : public FunctionPass {
+    /// TLI - Keep a pointer of a TargetLowering to consult for determining
+    /// target type sizes.
+    const TargetLowering *TLI;
+
+    bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+                 Module &M, bool isConst) const;
+
+  public:
+    static char ID;             // Pass identification, replacement for typeid.
+    explicit GlobalMerge(const TargetLowering *tli = 0)
+      : FunctionPass(ID), TLI(tli) {
+      initializeGlobalMergePass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+    const char *getPassName() const {
+      return "Merge internal globals";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+
+    struct GlobalCmp {
+      const TargetData *TD;
+
+      GlobalCmp(const TargetData *td) : TD(td) { }
+
+      bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+        Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+        Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+        return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+      }
+    };
+  };
+} // end anonymous namespace
+
+char GlobalMerge::ID = 0;
+INITIALIZE_PASS(GlobalMerge, "global-merge",
+                "Global Merge", false, false)
+
+
+bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+                             Module &M, bool isConst) const {
+  const TargetData *TD = TLI->getTargetData();
+
+  // FIXME: Infer the maximum possible offset depending on the actual users
+  // (these max offsets are different for the users inside Thumb or ARM
+  // functions)
+  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+  // FIXME: Find better heuristics
+  std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+  Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+  for (size_t i = 0, e = Globals.size(); i != e; ) {
+    size_t j = 0;
+    uint64_t MergedSize = 0;
+    std::vector<Type*> Tys;
+    std::vector<Constant*> Inits;
+    for (j = i; j != e; ++j) {
+      Type *Ty = Globals[j]->getType()->getElementType();
+      MergedSize += TD->getTypeAllocSize(Ty);
+      if (MergedSize > MaxOffset) {
+        break;
+      }
+      Tys.push_back(Ty);
+      Inits.push_back(Globals[j]->getInitializer());
+    }
+
+    StructType *MergedTy = StructType::get(M.getContext(), Tys);
+    Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+    GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
+                                                  GlobalValue::InternalLinkage,
+                                                  MergedInit, "_MergedGlobals");
+    for (size_t k = i; k < j; ++k) {
+      Constant *Idx[2] = {
+        ConstantInt::get(Int32Ty, 0),
+        ConstantInt::get(Int32Ty, k-i)
+      };
+      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx);
+      Globals[k]->replaceAllUsesWith(GEP);
+      Globals[k]->eraseFromParent();
+      NumMerged++;
+    }
+    i = j;
+  }
+
+  return true;
+}
+
+
+bool GlobalMerge::doInitialization(Module &M) {
+  SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
+  const TargetData *TD = TLI->getTargetData();
+  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+  bool Changed = false;
+
+  // Grab all non-const globals.
+  for (Module::global_iterator I = M.global_begin(),
+         E = M.global_end(); I != E; ++I) {
+    // Merge is safe for "normal" internal globals only
+    if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+      continue;
+
+    // Ignore fancy-aligned globals for now.
+    unsigned Alignment = TD->getPreferredAlignment(I);
+    Type *Ty = I->getType()->getElementType();
+    if (Alignment > TD->getABITypeAlignment(Ty))
+      continue;
+
+    // Ignore all 'special' globals.
+    if (I->getName().startswith("llvm.") ||
+        I->getName().startswith(".llvm."))
+      continue;
+
+    if (TD->getTypeAllocSize(Ty) < MaxOffset) {
+      if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
+          .isBSSLocal())
+        BSSGlobals.push_back(I);
+      else if (I->isConstant())
+        ConstGlobals.push_back(I);
+      else
+        Globals.push_back(I);
+    }
+  }
+
+  if (Globals.size() > 1)
+    Changed |= doMerge(Globals, M, false);
+  if (BSSGlobals.size() > 1)
+    Changed |= doMerge(BSSGlobals, M, false);
+
+  // FIXME: This currently breaks the EH processing due to way how the 
+  // typeinfo detection works. We might want to detect the TIs and ignore 
+  // them in the future.
+  // if (ConstGlobals.size() > 1)
+  //  Changed |= doMerge(ConstGlobals, M, true);
+
+  return Changed;
+}
+
+bool GlobalMerge::runOnFunction(Function &F) {
+  return false;
+}
+
+Pass *llvm::createGlobalMergePass(const TargetLowering *tli) {
+  return new GlobalMerge(tli);
+}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 75fa011a14b7..a9ba6579db5a 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -33,7 +33,6 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -50,30 +49,21 @@
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
-STATISTIC(NumRemoved     , "Number of aux indvars removed");
 STATISTIC(NumWidened     , "Number of indvars widened");
-STATISTIC(NumInserted    , "Number of canonical indvars added");
 STATISTIC(NumReplaced    , "Number of exit values replaced");
 STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
 STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
 STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
 
-namespace llvm {
-  cl::opt<bool> EnableIVRewrite(
-    "enable-iv-rewrite", cl::Hidden,
-    cl::desc("Enable canonical induction variable rewriting"));
-
-  // Trip count verification can be enabled by default under NDEBUG if we
-  // implement a strong expression equivalence checker in SCEV. Until then, we
-  // use the verify-indvars flag, which may assert in some cases.
-  cl::opt<bool> VerifyIndvars(
-    "verify-indvars", cl::Hidden,
-    cl::desc("Verify the ScalarEvolution result after running indvars"));
-}
+// Trip count verification can be enabled by default under NDEBUG if we
+// implement a strong expression equivalence checker in SCEV. Until then, we
+// use the verify-indvars flag, which may assert in some cases.
+static cl::opt<bool> VerifyIndvars(
+  "verify-indvars", cl::Hidden,
+  cl::desc("Verify the ScalarEvolution result after running indvars"));
 
 namespace {
   class IndVarSimplify : public LoopPass {
-    IVUsers         *IU;
     LoopInfo        *LI;
     ScalarEvolution *SE;
     DominatorTree   *DT;
@@ -84,7 +74,7 @@ namespace {
   public:
 
     static char ID; // Pass identification, replacement for typeid
-    IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
+    IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), TD(0),
                        Changed(false) {
       initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
     }
@@ -97,13 +87,9 @@ namespace {
       AU.addRequired<ScalarEvolution>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
-      if (EnableIVRewrite)
-        AU.addRequired<IVUsers>();
       AU.addPreserved<ScalarEvolution>();
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
-      if (EnableIVRewrite)
-        AU.addPreserved<IVUsers>();
       AU.setPreservesCFG();
     }
 
@@ -121,8 +107,6 @@ namespace {
 
     void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
 
-    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
-
     Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
                                      PHINode *IndVar, SCEVExpander &Rewriter);
 
@@ -138,7 +122,6 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(IVUsers)
 INITIALIZE_PASS_END(IndVarSimplify, "indvars",
                 "Induction Variable Simplification", false, false)
 
@@ -180,6 +163,11 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
     // base of a recurrence. This handles the case in which SCEV expansion
     // converts a pointer type recurrence into a nonrecurrent pointer base
     // indexed by an integer recurrence.
+
+    // If the GEP base pointer is a vector of pointers, abort.
+    if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
+      return false;
+
     const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
     const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
     if (FromBase == ToBase)
@@ -445,11 +433,6 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
     PN->replaceAllUsesWith(Conv);
     RecursivelyDeleteTriviallyDeadInstructions(PN);
   }
-
-  // Add a new IVUsers entry for the newly-created integer PHI.
-  if (IU)
-    IU->AddUsersIfInteresting(NewPHI);
-
   Changed = true;
 }
 
@@ -595,124 +578,6 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
 }
 
 //===----------------------------------------------------------------------===//
-//  Rewrite IV users based on a canonical IV.
-//  Only for use with -enable-iv-rewrite.
-//===----------------------------------------------------------------------===//
-
-/// FIXME: It is an extremely bad idea to indvar substitute anything more
-/// complex than affine induction variables.  Doing so will put expensive
-/// polynomial evaluations inside of the loop, and the str reduction pass
-/// currently can only reduce affine polynomials.  For now just disable
-/// indvar subst on anything more complex than an affine addrec, unless
-/// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
-  // Loop-invariant values are safe.
-  if (SE->isLoopInvariant(S, L)) return true;
-
-  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
-  // to transform them into efficient code.
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
-    return AR->isAffine();
-
-  // An add is safe it all its operands are safe.
-  if (const SCEVCommutativeExpr *Commutative
-      = dyn_cast<SCEVCommutativeExpr>(S)) {
-    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
-         E = Commutative->op_end(); I != E; ++I)
-      if (!isSafe(*I, L, SE)) return false;
-    return true;
-  }
-
-  // A cast is safe if its operand is.
-  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
-    return isSafe(C->getOperand(), L, SE);
-
-  // A udiv is safe if its operands are.
-  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
-    return isSafe(UD->getLHS(), L, SE) &&
-           isSafe(UD->getRHS(), L, SE);
-
-  // SCEVUnknown is always safe.
-  if (isa<SCEVUnknown>(S))
-    return true;
-
-  // Nothing else is safe.
-  return false;
-}
-
-void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
-  // Rewrite all induction variable expressions in terms of the canonical
-  // induction variable.
-  //
-  // If there were induction variables of other sizes or offsets, manually
-  // add the offsets to the primary induction variable and cast, avoiding
-  // the need for the code evaluation methods to insert induction variables
-  // of different sizes.
-  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
-    Value *Op = UI->getOperandValToReplace();
-    Type *UseTy = Op->getType();
-    Instruction *User = UI->getUser();
-
-    // Compute the final addrec to expand into code.
-    const SCEV *AR = IU->getReplacementExpr(*UI);
-
-    // Evaluate the expression out of the loop, if possible.
-    if (!L->contains(UI->getUser())) {
-      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
-      if (SE->isLoopInvariant(ExitVal, L))
-        AR = ExitVal;
-    }
-
-    // FIXME: It is an extremely bad idea to indvar substitute anything more
-    // complex than affine induction variables.  Doing so will put expensive
-    // polynomial evaluations inside of the loop, and the str reduction pass
-    // currently can only reduce affine polynomials.  For now just disable
-    // indvar subst on anything more complex than an affine addrec, unless
-    // it can be expanded to a trivial value.
-    if (!isSafe(AR, L, SE))
-      continue;
-
-    // Determine the insertion point for this user. By default, insert
-    // immediately before the user. The SCEVExpander class will automatically
-    // hoist loop invariants out of the loop. For PHI nodes, there may be
-    // multiple uses, so compute the nearest common dominator for the
-    // incoming blocks.
-    Instruction *InsertPt = getInsertPointForUses(User, Op, DT);
-
-    // Now expand it into actual Instructions and patch it into place.
-    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
-    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
-                 << "   into = " << *NewVal << "\n");
-
-    if (!isValidRewrite(Op, NewVal)) {
-      DeadInsts.push_back(NewVal);
-      continue;
-    }
-    // Inform ScalarEvolution that this value is changing. The change doesn't
-    // affect its value, but it does potentially affect which use lists the
-    // value will be on after the replacement, which affects ScalarEvolution's
-    // ability to walk use lists and drop dangling pointers when a value is
-    // deleted.
-    SE->forgetValue(User);
-
-    // Patch the new value into place.
-    if (Op->hasName())
-      NewVal->takeName(Op);
-    if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
-      NewValI->setDebugLoc(User->getDebugLoc());
-    User->replaceUsesOfWith(Op, NewVal);
-    UI->setOperandValToReplace(NewVal);
-
-    ++NumRemoved;
-    Changed = true;
-
-    // The old value may be dead now.
-    DeadInsts.push_back(Op);
-  }
-}
-
-//===----------------------------------------------------------------------===//
 //  IV Widening - Extend the width of an IV to cover its widest uses.
 //===----------------------------------------------------------------------===//
 
@@ -843,7 +708,7 @@ protected:
 
   const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
 
-  Instruction *WidenIVUse(NarrowIVDefUse DU);
+  Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
 
   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
 };
@@ -917,7 +782,6 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
     }
     return WideBO;
   }
-  llvm_unreachable(0);
 }
 
 /// No-wrap operations can transfer sign extension of their result to their
@@ -946,9 +810,13 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
   else
     return 0;
 
+  // When creating this AddExpr, don't apply the current operations NSW or NUW
+  // flags. This instruction may be guarded by control flow that the no-wrap
+  // behavior depends on. Non-control-equivalent instructions can be mapped to
+  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
+  // semantics to those operations.
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(
-    SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr,
-                   IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW));
+    SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
 
   if (!AddRec || AddRec->getLoop() != L)
     return 0;
@@ -983,7 +851,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
 
 /// WidenIVUse - Determine whether an individual user of the narrow IV can be
 /// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
+Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
 
   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
   if (isa<PHINode>(DU.NarrowUse) &&
@@ -1051,7 +919,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU) {
   // NarrowUse.
   Instruction *WideUse = 0;
   if (WideAddRec == WideIncExpr
-      && SCEVExpander::hoistStep(WideInc, DU.NarrowUse, DT))
+      && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
     WideUse = WideInc;
   else {
     WideUse = CloneIVUser(DU);
@@ -1156,7 +1024,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
 
     // Process a def-use edge. This may replace the use, so don't hold a
     // use_iterator across it.
-    Instruction *WideUse = WidenIVUse(DU);
+    Instruction *WideUse = WidenIVUse(DU, Rewriter);
 
     // Follow all def-use edges from the previous narrow use.
     if (WideUse)
@@ -1231,7 +1099,11 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
 /// BackedgeTakenInfo. If these expressions have not been reduced, then
 /// expanding them may incur additional cost (albeit in the loop preheader).
 static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
+                                SmallPtrSet<const SCEV*, 8> &Processed,
                                 ScalarEvolution *SE) {
+  if (!Processed.insert(S))
+    return false;
+
   // If the backedge-taken count is a UDiv, it's very likely a UDiv that
   // ScalarEvolution's HowFarToZero or HowManyLessThans produced to compute a
   // precise expression, rather than a UDiv from the user's code. If we can't
@@ -1250,16 +1122,13 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
     }
   }
 
-  if (EnableIVRewrite)
-    return false;
-
   // Recurse past add expressions, which commonly occur in the
   // BackedgeTakenCount. They may already exist in program code, and if not,
   // they are not too expensive rematerialize.
   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
     for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
          I != E; ++I) {
-      if (isHighCostExpansion(*I, BI, SE))
+      if (isHighCostExpansion(*I, BI, Processed, SE))
         return true;
     }
     return false;
@@ -1270,14 +1139,24 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI,
   if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
     return true;
 
-  // If we haven't recognized an expensive SCEV patter, assume its an expression
-  // produced by program code.
+  // If we haven't recognized an expensive SCEV pattern, assume it's an
+  // expression produced by program code.
   return false;
 }
 
 /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
 /// count expression can be safely and cheaply expanded into an instruction
 /// sequence that can be used by LinearFunctionTestReplace.
+///
+/// TODO: This fails for pointer-type loop counters with greater than one byte
+/// strides, consequently preventing LFTR from running. For the purpose of LFTR
+/// we could skip this check in the case that the LFTR loop counter (chosen by
+/// FindLoopCounter) is also pointer type. Instead, we could directly convert
+/// the loop test to an inequality test by checking the target data's alignment
+/// of element types (given that the initial pointer value originates from or is
+/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint).
+/// However, we don't yet have a strong motivation for converting loop tests
+/// into inequality tests.
 static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
@@ -1292,42 +1171,13 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
   if (!BI)
     return false;
 
-  if (isHighCostExpansion(BackedgeTakenCount, BI, SE))
+  SmallPtrSet<const SCEV*, 8> Processed;
+  if (isHighCostExpansion(BackedgeTakenCount, BI, Processed, SE))
     return false;
 
   return true;
 }
 
-/// getBackedgeIVType - Get the widest type used by the loop test after peeking
-/// through Truncs.
-///
-/// TODO: Unnecessary when ForceLFTR is removed.
-static Type *getBackedgeIVType(Loop *L) {
-  if (!L->getExitingBlock())
-    return 0;
-
-  // Can't rewrite non-branch yet.
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  if (!BI)
-    return 0;
-
-  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
-  if (!Cond)
-    return 0;
-
-  Type *Ty = 0;
-  for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
-      OI != OE; ++OI) {
-    assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
-    TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
-    if (!Trunc)
-      continue;
-
-    return Trunc->getSrcTy();
-  }
-  return Ty;
-}
-
 /// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
 /// invariant value to the phi.
 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
@@ -1429,6 +1279,10 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
 
 /// FindLoopCounter - Find an affine IV in canonical form.
 ///
+/// BECount may be an i8* pointer type. The pointer difference is already
+/// valid count without scaling the address stride, so it remains a pointer
+/// expression as far as SCEV is concerned.
+///
 /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount
 ///
 /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride.
@@ -1437,11 +1291,6 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
 static PHINode *
 FindLoopCounter(Loop *L, const SCEV *BECount,
                 ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) {
-  // I'm not sure how BECount could be a pointer type, but we definitely don't
-  // want to LFTR that.
-  if (BECount->getType()->isPointerTy())
-    return 0;
-
   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
 
   Value *Cond =
@@ -1458,6 +1307,10 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
     if (!SE->isSCEVable(Phi->getType()))
       continue;
 
+    // Avoid comparing an integer IV against a pointer Limit.
+    if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
+      continue;
+
     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
     if (!AR || AR->getLoop() != L || !AR->isAffine())
       continue;
@@ -1503,6 +1356,82 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
   return BestPhi;
 }
 
+/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
+/// holds the RHS of the new loop test.
+static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
+                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
+  const SCEV *IVInit = AR->getStart();
+
+  // IVInit may be a pointer while IVCount is an integer when FindLoopCounter
+  // finds a valid pointer IV. Sign extend BECount in order to materialize a
+  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
+  // the existing GEPs whenever possible.
+  if (IndVar->getType()->isPointerTy()
+      && !IVCount->getType()->isPointerTy()) {
+
+    Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
+    const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy);
+
+    // Expand the code for the iteration count.
+    assert(SE->isLoopInvariant(IVOffset, L) &&
+           "Computed iteration count is not loop invariant!");
+    BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+    Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI);
+
+    Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
+    assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter");
+    // We could handle pointer IVs other than i8*, but we need to compensate for
+    // gep index scaling. See canExpandBackedgeTakenCount comments.
+    assert(SE->getSizeOfExpr(
+             cast<PointerType>(GEPBase->getType())->getElementType())->isOne()
+           && "unit stride pointer IV must be i8*");
+
+    IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+    return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit");
+  }
+  else {
+    // In any other case, convert both IVInit and IVCount to integers before
+    // comparing. This may result in SCEV expension of pointers, but in practice
+    // SCEV will fold the pointer arithmetic away as such:
+    // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
+    //
+    // Valid Cases: (1) both integers is most common; (2) both may be pointers
+    // for simple memset-style loops; (3) IVInit is an integer and IVCount is a
+    // pointer may occur when enable-iv-rewrite generates a canonical IV on top
+    // of case #2.
+
+    const SCEV *IVLimit = 0;
+    // For unit stride, IVCount = Start + BECount with 2's complement overflow.
+    // For non-zero Start, compute IVCount here.
+    if (AR->getStart()->isZero())
+      IVLimit = IVCount;
+    else {
+      assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
+      const SCEV *IVInit = AR->getStart();
+
+      // For integer IVs, truncate the IV before computing IVInit + BECount.
+      if (SE->getTypeSizeInBits(IVInit->getType())
+          > SE->getTypeSizeInBits(IVCount->getType()))
+        IVInit = SE->getTruncateExpr(IVInit, IVCount->getType());
+
+      IVLimit = SE->getAddExpr(IVInit, IVCount);
+    }
+    // Expand the code for the iteration count.
+    BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+    IRBuilder<> Builder(BI);
+    assert(SE->isLoopInvariant(IVLimit, L) &&
+           "Computed iteration count is not loop invariant!");
+    // Ensure that we generate the same type as IndVar, or a smaller integer
+    // type. In the presence of null pointer values, we have an integer type
+    // SCEV expression (IVInit) for a pointer type IV value (IndVar).
+    Type *LimitTy = IVCount->getType()->isPointerTy() ?
+      IndVar->getType() : IVCount->getType();
+    return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
+  }
+}
+
 /// LinearFunctionTestReplace - This method rewrites the exit condition of the
 /// loop to be a canonical != comparison against the incremented loop induction
 /// variable.  This pass is able to rewrite the exit tests of any loop where the
@@ -1514,37 +1443,35 @@ LinearFunctionTestReplace(Loop *L,
                           PHINode *IndVar,
                           SCEVExpander &Rewriter) {
   assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
-  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
 
   // LFTR can ignore IV overflow and truncate to the width of
   // BECount. This avoids materializing the add(zext(add)) expression.
-  Type *CntTy = !EnableIVRewrite ?
-    BackedgeTakenCount->getType() : IndVar->getType();
+  Type *CntTy = BackedgeTakenCount->getType();
 
-  const SCEV *IVLimit = BackedgeTakenCount;
+  const SCEV *IVCount = BackedgeTakenCount;
 
-  // If the exiting block is not the same as the backedge block, we must compare
-  // against the preincremented value, otherwise we prefer to compare against
-  // the post-incremented value.
+  // If the exiting block is the same as the backedge block, we prefer to
+  // compare against the post-incremented value, otherwise we must compare
+  // against the preincremented value.
   Value *CmpIndVar;
   if (L->getExitingBlock() == L->getLoopLatch()) {
     // Add one to the "backedge-taken" count to get the trip count.
     // If this addition may overflow, we have to be more pessimistic and
     // cast the induction variable before doing the add.
     const SCEV *N =
-      SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1));
-    if (CntTy == IVLimit->getType())
-      IVLimit = N;
+      SE->getAddExpr(IVCount, SE->getConstant(IVCount->getType(), 1));
+    if (CntTy == IVCount->getType())
+      IVCount = N;
     else {
-      const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0);
+      const SCEV *Zero = SE->getConstant(IVCount->getType(), 0);
       if ((isa<SCEVConstant>(N) && !N->isZero()) ||
           SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
         // No overflow. Cast the sum.
-        IVLimit = SE->getTruncateOrZeroExtend(N, CntTy);
+        IVCount = SE->getTruncateOrZeroExtend(N, CntTy);
       } else {
         // Potential overflow. Cast before doing the add.
-        IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
-        IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1));
+        IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
+        IVCount = SE->getAddExpr(IVCount, SE->getConstant(CntTy, 1));
       }
     }
     // The BackedgeTaken expression contains the number of times that the
@@ -1552,62 +1479,17 @@ LinearFunctionTestReplace(Loop *L,
     // number of times the loop executes, so use the incremented indvar.
     CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
   } else {
-    // We have to use the preincremented value...
-    IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy);
+    // We must use the preincremented value...
+    IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy);
     CmpIndVar = IndVar;
   }
 
-  // For unit stride, IVLimit = Start + BECount with 2's complement overflow.
-  // So for, non-zero start compute the IVLimit here.
-  bool isPtrIV = false;
-  Type *CmpTy = CntTy;
-  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
-  assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter");
-  if (!AR->getStart()->isZero()) {
-    assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
-    const SCEV *IVInit = AR->getStart();
-
-    // For pointer types, sign extend BECount in order to materialize a GEP.
-    // Note that for without EnableIVRewrite, we never run SCEVExpander on a
-    // pointer type, because we must preserve the existing GEPs. Instead we
-    // directly generate a GEP later.
-    if (IVInit->getType()->isPointerTy()) {
-      isPtrIV = true;
-      CmpTy = SE->getEffectiveSCEVType(IVInit->getType());
-      IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy);
-    }
-    // For integer types, truncate the IV before computing IVInit + BECount.
-    else {
-      if (SE->getTypeSizeInBits(IVInit->getType())
-          > SE->getTypeSizeInBits(CmpTy))
-        IVInit = SE->getTruncateExpr(IVInit, CmpTy);
-
-      IVLimit = SE->getAddExpr(IVInit, IVLimit);
-    }
-  }
-  // Expand the code for the iteration count.
-  IRBuilder<> Builder(BI);
-
-  assert(SE->isLoopInvariant(IVLimit, L) &&
-         "Computed iteration count is not loop invariant!");
-  Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI);
-
-  // Create a gep for IVInit + IVLimit from on an existing pointer base.
-  assert(isPtrIV == IndVar->getType()->isPointerTy() &&
-         "IndVar type must match IVInit type");
-  if (isPtrIV) {
-      Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader());
-      assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter");
-      assert(SE->getSizeOfExpr(
-               cast<PointerType>(IVStart->getType())->getElementType())->isOne()
-             && "unit stride pointer IV must be i8*");
-
-      Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
-      ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit");
-      Builder.SetInsertPoint(BI);
-  }
+  Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE);
+  assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy()
+         && "genLoopLimit missed a cast");
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
+  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
   ICmpInst::Predicate P;
   if (L->contains(BI->getSuccessor(0)))
     P = ICmpInst::ICMP_NE;
@@ -1619,11 +1501,13 @@ LinearFunctionTestReplace(Loop *L,
                << "       op:\t"
                << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
                << "      RHS:\t" << *ExitCnt << "\n"
-               << "     Expr:\t" << *IVLimit << "\n");
+               << "  IVCount:\t" << *IVCount << "\n");
 
+  IRBuilder<> Builder(BI);
   if (SE->getTypeSizeInBits(CmpIndVar->getType())
-      > SE->getTypeSizeInBits(CmpTy)) {
-    CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv");
+      > SE->getTypeSizeInBits(ExitCnt->getType())) {
+    CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+                                    "lftr.wideiv");
   }
 
   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
@@ -1680,11 +1564,12 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
     if (isa<LandingPadInst>(I))
       continue;
 
-    // Don't sink static AllocaInsts out of the entry block, which would
-    // turn them into dynamic allocas!
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      if (AI->isStaticAlloca())
-        continue;
+    // Don't sink alloca: we never want to sink static alloca's out of the
+    // entry block, and correctly sinking dynamic alloca's requires
+    // checks for stacksave/stackrestore intrinsics.
+    // FIXME: Refactor this check somehow?
+    if (isa<AllocaInst>(I))
+      continue;
 
     // Determine if there is a use in or before the loop (direct or
     // otherwise).
@@ -1746,8 +1631,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!L->isLoopSimplifyForm())
     return false;
 
-  if (EnableIVRewrite)
-    IU = &getAnalysis<IVUsers>();
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
   DT = &getAnalysis<DominatorTree>();
@@ -1774,10 +1657,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // attempt to avoid evaluating SCEVs for sign/zero extend operations until
   // other expressions involving loop IVs have been evaluated. This helps SCEV
   // set no-wrap flags before normalizing sign/zero extension.
-  if (!EnableIVRewrite) {
-    Rewriter.disableCanonicalMode();
-    SimplifyAndExtend(L, Rewriter, LPM);
-  }
+  Rewriter.disableCanonicalMode();
+  SimplifyAndExtend(L, Rewriter, LPM);
 
   // Check to see if this loop has a computable loop-invariant execution count.
   // If so, this means that we can compute the final value of any expressions
@@ -1788,106 +1669,28 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
     RewriteLoopExitValues(L, Rewriter);
 
-  // Eliminate redundant IV users.
-  if (EnableIVRewrite)
-    Changed |= simplifyIVUsers(IU, SE, &LPM, DeadInsts);
-
   // Eliminate redundant IV cycles.
-  if (!EnableIVRewrite)
-    NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
-
-  // Compute the type of the largest recurrence expression, and decide whether
-  // a canonical induction variable should be inserted.
-  Type *LargestType = 0;
-  bool NeedCannIV = false;
-  bool ExpandBECount = canExpandBackedgeTakenCount(L, SE);
-  if (EnableIVRewrite && ExpandBECount) {
-    // If we have a known trip count and a single exit block, we'll be
-    // rewriting the loop exit test condition below, which requires a
-    // canonical induction variable.
-    NeedCannIV = true;
-    Type *Ty = BackedgeTakenCount->getType();
-    if (!EnableIVRewrite) {
-      // In this mode, SimplifyIVUsers may have already widened the IV used by
-      // the backedge test and inserted a Trunc on the compare's operand. Get
-      // the wider type to avoid creating a redundant narrow IV only used by the
-      // loop test.
-      LargestType = getBackedgeIVType(L);
-    }
-    if (!LargestType ||
-        SE->getTypeSizeInBits(Ty) >
-        SE->getTypeSizeInBits(LargestType))
-      LargestType = SE->getEffectiveSCEVType(Ty);
-  }
-  if (EnableIVRewrite) {
-    for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
-      NeedCannIV = true;
-      Type *Ty =
-        SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
-      if (!LargestType ||
-          SE->getTypeSizeInBits(Ty) >
-          SE->getTypeSizeInBits(LargestType))
-        LargestType = Ty;
-    }
-  }
-
-  // Now that we know the largest of the induction variable expressions
-  // in this loop, insert a canonical induction variable of the largest size.
-  PHINode *IndVar = 0;
-  if (NeedCannIV) {
-    // Check to see if the loop already has any canonical-looking induction
-    // variables. If any are present and wider than the planned canonical
-    // induction variable, temporarily remove them, so that the Rewriter
-    // doesn't attempt to reuse them.
-    SmallVector<PHINode *, 2> OldCannIVs;
-    while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
-      if (SE->getTypeSizeInBits(OldCannIV->getType()) >
-          SE->getTypeSizeInBits(LargestType))
-        OldCannIV->removeFromParent();
-      else
-        break;
-      OldCannIVs.push_back(OldCannIV);
-    }
+  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
 
-    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
-
-    ++NumInserted;
-    Changed = true;
-    DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
-
-    // Now that the official induction variable is established, reinsert
-    // any old canonical-looking variables after it so that the IR remains
-    // consistent. They will be deleted as part of the dead-PHI deletion at
-    // the end of the pass.
-    while (!OldCannIVs.empty()) {
-      PHINode *OldCannIV = OldCannIVs.pop_back_val();
-      OldCannIV->insertBefore(L->getHeader()->getFirstInsertionPt());
-    }
-  }
-  else if (!EnableIVRewrite && ExpandBECount && needsLFTR(L, DT)) {
-    IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
-  }
   // If we have a trip count expression, rewrite the loop's exit condition
   // using it.  We can currently only handle loops with a single exit.
-  Value *NewICmp = 0;
-  if (ExpandBECount && IndVar) {
-    // Check preconditions for proper SCEVExpander operation. SCEV does not
-    // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
-    // pass that uses the SCEVExpander must do it. This does not work well for
-    // loop passes because SCEVExpander makes assumptions about all loops, while
-    // LoopPassManager only forces the current loop to be simplified.
-    //
-    // FIXME: SCEV expansion has no way to bail out, so the caller must
-    // explicitly check any assumptions made by SCEV. Brittle.
-    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
-    if (!AR || AR->getLoop()->getLoopPreheader())
-      NewICmp =
-        LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
+  if (canExpandBackedgeTakenCount(L, SE) && needsLFTR(L, DT)) {
+    PHINode *IndVar = FindLoopCounter(L, BackedgeTakenCount, SE, DT, TD);
+    if (IndVar) {
+      // Check preconditions for proper SCEVExpander operation. SCEV does not
+      // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+      // pass that uses the SCEVExpander must do it. This does not work well for
+      // loop passes because SCEVExpander makes assumptions about all loops, while
+      // LoopPassManager only forces the current loop to be simplified.
+      //
+      // FIXME: SCEV expansion has no way to bail out, so the caller must
+      // explicitly check any assumptions made by SCEV. Brittle.
+      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+      if (!AR || AR->getLoop()->getLoopPreheader())
+        (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+                                        Rewriter);
+    }
   }
-  // Rewrite IV-derived expressions.
-  if (EnableIVRewrite)
-    RewriteIVExpressions(L, Rewriter);
-
   // Clear the rewriter cache, because values that are in the rewriter's cache
   // can be deleted in the loop below, causing the AssertingVH in the cache to
   // trigger.
@@ -1906,13 +1709,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // loop may be sunk below the loop to reduce register pressure.
   SinkUnusedInvariants(L);
 
-  // For completeness, inform IVUsers of the IV use in the newly-created
-  // loop exit test instruction.
-  if (IU && NewICmp) {
-    ICmpInst *NewICmpInst = dyn_cast<ICmpInst>(NewICmp);
-    if (NewICmpInst)
-      IU->AddUsersIfInteresting(cast<Instruction>(NewICmpInst->getOperand(0)));
-  }
   // Clean up dead instructions.
   Changed |= DeleteDeadPHIs(L->getHeader());
   // Check a post-condition.
@@ -1922,8 +1718,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Verify that LFTR, and any other change have not interfered with SCEV's
   // ability to compute trip count.
 #ifndef NDEBUG
-  if (!EnableIVRewrite && VerifyIndvars &&
-      !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
     SE->forgetLoop(L);
     const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
     if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index f410af32759c..429b61b6e501 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -75,6 +76,7 @@ namespace {
   ///
   class JumpThreading : public FunctionPass {
     TargetData *TD;
+    TargetLibraryInfo *TLI;
     LazyValueInfo *LVI;
 #ifdef NDEBUG
     SmallPtrSet<BasicBlock*, 16> LoopHeaders;
@@ -107,6 +109,7 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addRequired<LazyValueInfo>();
       AU.addPreserved<LazyValueInfo>();
+      AU.addRequired<TargetLibraryInfo>();
     }
 
     void FindLoopHeaders(Function &F);
@@ -133,6 +136,7 @@ char JumpThreading::ID = 0;
 INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
                 "Jump Threading", false, false)
 INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_END(JumpThreading, "jump-threading",
                 "Jump Threading", false, false)
 
@@ -144,6 +148,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
 bool JumpThreading::runOnFunction(Function &F) {
   DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
   TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
   LVI = &getAnalysis<LazyValueInfo>();
 
   FindLoopHeaders(F);
@@ -674,7 +679,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   // Run constant folding to see if we can reduce the condition to a simple
   // constant.
   if (Instruction *I = dyn_cast<Instruction>(Condition)) {
-    Value *SimpleVal = ConstantFoldInstruction(I, TD);
+    Value *SimpleVal = ConstantFoldInstruction(I, TD, TLI);
     if (SimpleVal) {
       I->replaceAllUsesWith(SimpleVal);
       I->eraseFromParent();
@@ -852,6 +857,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   if (BBIt != LoadBB->begin())
     return false;
 
+  // If all of the loads and stores that feed the value have the same TBAA tag,
+  // then we can propagate it onto any newly inserted loads.
+  MDNode *TBAATag = LI->getMetadata(LLVMContext::MD_tbaa); 
 
   SmallPtrSet<BasicBlock*, 8> PredsScanned;
   typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
@@ -870,11 +878,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
 
     // Scan the predecessor to see if the value is available in the pred.
     BBIt = PredBB->end();
-    Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+    MDNode *ThisTBAATag = 0;
+    Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+                                                    0, &ThisTBAATag);
     if (!PredAvailable) {
       OneUnavailablePred = PredBB;
       continue;
     }
+    
+    // If tbaa tags disagree or are not present, forget about them.
+    if (TBAATag != ThisTBAATag) TBAATag = 0;
 
     // If so, this load is partially redundant.  Remember this info so that we
     // can create a PHI node.
@@ -921,8 +934,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
 
     // Split them out to their own block.
     UnavailablePred =
-      SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
-                             "thread-pre-split", this);
+      SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split", this);
   }
 
   // If the value isn't available in all predecessors, then there will be
@@ -935,6 +947,9 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
                                  LI->getAlignment(),
                                  UnavailablePred->getTerminator());
     NewVal->setDebugLoc(LI->getDebugLoc());
+    if (TBAATag)
+      NewVal->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+    
     AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
   }
 
@@ -1082,9 +1097,9 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
       DestBB = 0;
     else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
       DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
-    else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
-      DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
-    else {
+    else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+      DestBB = SI->findCaseValue(cast<ConstantInt>(Val)).getCaseSuccessor();
+    } else {
       assert(isa<IndirectBrInst>(BB->getTerminator())
               && "Unexpected terminator");
       DestBB = cast<BlockAddress>(Val)->getBasicBlock();
@@ -1334,8 +1349,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
   else {
     DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
           << " common predecessors.\n");
-    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
-                                    ".thr_comm", this);
+    PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
   }
 
   // And finally, do it!
@@ -1479,8 +1493,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
   else {
     DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
           << " common predecessors.\n");
-    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
-                                    ".thr_comm", this);
+    PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm", this);
   }
 
   // Okay, we decided to do this!  Clone all the instructions in BB onto the end
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index b79bb1300fec..8795cd853fae 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -43,8 +43,11 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
@@ -84,6 +87,7 @@ namespace {
       AU.addPreserved<AliasAnalysis>();
       AU.addPreserved("scalar-evolution");
       AU.addPreservedID(LoopSimplifyID);
+      AU.addRequired<TargetLibraryInfo>();
     }
 
     bool doFinalization() {
@@ -96,6 +100,9 @@ namespace {
     LoopInfo      *LI;       // Current LoopInfo
     DominatorTree *DT;       // Dominator Tree for the current Loop.
 
+    TargetData *TD;          // TargetData for constant folding.
+    TargetLibraryInfo *TLI;  // TargetLibraryInfo for constant folding.
+
     // State that is updated as we process loops.
     bool Changed;            // Set to true when we change anything.
     BasicBlock *Preheader;   // The preheader block of the current loop...
@@ -177,6 +184,7 @@ INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
 
@@ -194,6 +202,9 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
   AA = &getAnalysis<AliasAnalysis>();
   DT = &getAnalysis<DominatorTree>();
 
+  TD = getAnalysisIfAvailable<TargetData>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+
   CurAST = new AliasSetTracker(*AA);
   // Collect Alias info from subloops.
   for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
@@ -333,7 +344,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
       // Try constant folding this instruction.  If all the operands are
       // constants, it is technically hoistable, but it would be better to just
       // fold it.
-      if (Constant *C = ConstantFoldInstruction(&I)) {
+      if (Constant *C = ConstantFoldInstruction(&I, TD, TLI)) {
         DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
         CurAST->copyValue(&I, C);
         CurAST->deleteValue(&I);
@@ -369,6 +380,8 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
     // in the same alias set as something that ends up being modified.
     if (AA->pointsToConstantMemory(LI->getOperand(0)))
       return true;
+    if (LI->getMetadata("invariant.load"))
+      return true;
 
     // Don't hoist loads which have may-aliased stores in loop.
     uint64_t Size = 0;
@@ -579,7 +592,7 @@ void LICM::hoist(Instruction &I) {
 ///
 bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
   // If it is not a trapping instruction, it is always safe to hoist.
-  if (Inst.isSafeToSpeculativelyExecute())
+  if (isSafeToSpeculativelyExecute(&Inst))
     return true;
 
   return isGuaranteedToExecute(Inst);
diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt
new file mode 100644
index 000000000000..cee911976804
--- /dev/null
+++ b/lib/Transforms/Scalar/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Transforms/Scalar/LLVMBuild.txt --------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Scalar
+parent = Transforms
+library_name = ScalarOpts
+required_libraries = Analysis Core InstCombine Support Target TransformUtils
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index af25c5c1a661..f0f05e6f500a 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/Statistic.h"
@@ -43,6 +44,7 @@ namespace {
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved("scalar-evolution");
+      AU.addRequired<TargetLibraryInfo>();
     }
   };
 }
@@ -50,6 +52,7 @@ namespace {
 char LoopInstSimplify::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
                 "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -64,6 +67,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
   LoopInfo *LI = &getAnalysis<LoopInfo>();
   const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   L->getUniqueExitBlocks(ExitBlocks);
@@ -104,7 +108,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
         // Don't bother simplifying unused instructions.
         if (!I->use_empty()) {
-          Value *V = SimplifyInstruction(I, TD, DT);
+          Value *V = SimplifyInstruction(I, TD, TLI, DT);
           if (V && LI->replacementPreservesLCSSAForm(I, V)) {
             // Mark all uses for resimplification next time round the loop.
             for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 9fd0958fd4c3..59aace9e36dd 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -52,13 +53,14 @@ namespace {
     }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
+    void simplifyLoopLatch(Loop *L);
     bool rotateLoop(Loop *L);
-    
+
   private:
     LoopInfo *LI;
   };
 }
-  
+
 char LoopRotate::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -73,6 +75,11 @@ Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
 bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
   LI = &getAnalysis<LoopInfo>();
 
+  // Simplify the loop latch before attempting to rotate the header
+  // upward. Rotation may not be needed if the loop tail can be folded into the
+  // loop exit.
+  simplifyLoopLatch(L);
+
   // One loop can be rotated multiple times.
   bool MadeChange = false;
   while (rotateLoop(L))
@@ -92,18 +99,18 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
   BasicBlock::iterator I, E = OrigHeader->end();
   for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
     PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
-    
+
   // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
   // as necessary.
   SSAUpdater SSA;
   for (I = OrigHeader->begin(); I != E; ++I) {
     Value *OrigHeaderVal = I;
-    
+
     // If there are no uses of the value (e.g. because it returns void), there
     // is nothing to rewrite.
     if (OrigHeaderVal->use_empty())
       continue;
-    
+
     Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
 
     // The value now exits in two versions: the initial value in the preheader
@@ -111,27 +118,27 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
     SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
     SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
     SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
-    
+
     // Visit each use of the OrigHeader instruction.
     for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
          UE = OrigHeaderVal->use_end(); UI != UE; ) {
       // Grab the use before incrementing the iterator.
       Use &U = UI.getUse();
-      
+
       // Increment the iterator before removing the use from the list.
       ++UI;
-      
+
       // SSAUpdater can't handle a non-PHI use in the same block as an
       // earlier def. We can easily handle those cases manually.
       Instruction *UserInst = cast<Instruction>(U.getUser());
       if (!isa<PHINode>(UserInst)) {
         BasicBlock *UserBB = UserInst->getParent();
-        
+
         // The original users in the OrigHeader are already using the
         // original definitions.
         if (UserBB == OrigHeader)
           continue;
-        
+
         // Users in the OrigPreHeader need to use the value to which the
         // original definitions are mapped.
         if (UserBB == OrigPreheader) {
@@ -139,32 +146,128 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
           continue;
         }
       }
-      
+
       // Anything else can be handled by SSAUpdater.
       SSA.RewriteUse(U);
     }
   }
-}  
+}
+
+/// Determine whether the instructions in this range my be safely and cheaply
+/// speculated. This is not an important enough situation to develop complex
+/// heuristics. We handle a single arithmetic instruction along with any type
+/// conversions.
+static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
+                                  BasicBlock::iterator End) {
+  bool seenIncrement = false;
+  for (BasicBlock::iterator I = Begin; I != End; ++I) {
+
+    if (!isSafeToSpeculativelyExecute(I))
+      return false;
+
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+
+    switch (I->getOpcode()) {
+    default:
+      return false;
+    case Instruction::GetElementPtr:
+      // GEPs are cheap if all indices are constant.
+      if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+        return false;
+      // fall-thru to increment case
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+      if (seenIncrement)
+        return false;
+      seenIncrement = true;
+      break;
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      // ignore type conversions
+      break;
+    }
+  }
+  return true;
+}
+
+/// Fold the loop tail into the loop exit by speculating the loop tail
+/// instructions. Typically, this is a single post-increment. In the case of a
+/// simple 2-block loop, hoisting the increment can be much better than
+/// duplicating the entire loop header. In the cast of loops with early exits,
+/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
+/// canonical form so downstream passes can handle it.
+///
+/// I don't believe this invalidates SCEV.
+void LoopRotate::simplifyLoopLatch(Loop *L) {
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch || Latch->hasAddressTaken())
+    return;
+
+  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
+  if (!Jmp || !Jmp->isUnconditional())
+    return;
+
+  BasicBlock *LastExit = Latch->getSinglePredecessor();
+  if (!LastExit || !L->isLoopExiting(LastExit))
+    return;
+
+  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
+  if (!BI)
+    return;
+
+  if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
+    return;
+
+  DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
+        << LastExit->getName() << "\n");
+
+  // Hoist the instructions from Latch into LastExit.
+  LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);
+
+  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
+  BasicBlock *Header = Jmp->getSuccessor(0);
+  assert(Header == L->getHeader() && "expected a backward branch");
+
+  // Remove Latch from the CFG so that LastExit becomes the new Latch.
+  BI->setSuccessor(FallThruPath, Header);
+  Latch->replaceSuccessorsPhiUsesWith(LastExit);
+  Jmp->eraseFromParent();
+
+  // Nuke the Latch block.
+  assert(Latch->empty() && "unable to evacuate Latch");
+  LI->removeBlock(Latch);
+  if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+    DT->eraseNode(Latch);
+  Latch->eraseFromParent();
+}
 
 /// Rotate loop LP. Return true if the loop is rotated.
 bool LoopRotate::rotateLoop(Loop *L) {
   // If the loop has only one block then there is not much to rotate.
   if (L->getBlocks().size() == 1)
     return false;
-  
+
   BasicBlock *OrigHeader = L->getHeader();
-  
+
   BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
   if (BI == 0 || BI->isUnconditional())
     return false;
-  
+
   // If the loop header is not one of the loop exiting blocks then
   // either this loop is already rotated or it is not
   // suitable for loop rotation transformations.
   if (!L->isLoopExiting(OrigHeader))
     return false;
 
-  // Updating PHInodes in loops with multiple exits adds complexity. 
+  // Updating PHInodes in loops with multiple exits adds complexity.
   // Keep it simple, and restrict loop rotation to loops with one exit only.
   // In future, lift this restriction and support for multiple exits if
   // required.
@@ -184,7 +287,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
   // Now, this loop is suitable for rotation.
   BasicBlock *OrigPreheader = L->getLoopPreheader();
   BasicBlock *OrigLatch = L->getLoopLatch();
-  
+
   // If the loop could not be converted to canonical form, it must have an
   // indirectbr in it, just give up.
   if (OrigPreheader == 0 || OrigLatch == 0)
@@ -203,9 +306,9 @@ bool LoopRotate::rotateLoop(Loop *L) {
   if (L->contains(Exit))
     std::swap(Exit, NewHeader);
   assert(NewHeader && "Unable to determine new loop header");
-  assert(L->contains(NewHeader) && !L->contains(Exit) && 
+  assert(L->contains(NewHeader) && !L->contains(Exit) &&
          "Unable to determine loop header and exit blocks");
-  
+
   // This code assumes that the new header has exactly one predecessor.
   // Remove any single-entry PHI nodes in it.
   assert(NewHeader->getSinglePredecessor() &&
@@ -227,7 +330,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
   TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
   while (I != E) {
     Instruction *Inst = I++;
-    
+
     // If the instruction's operands are invariant and it doesn't read or write
     // memory, then it is safe to hoist.  Doing this doesn't change the order of
     // execution in the preheader, but does prevent the instruction from
@@ -236,18 +339,19 @@ bool LoopRotate::rotateLoop(Loop *L) {
     // memory (without proving that the loop doesn't write).
     if (L->hasLoopInvariantOperands(Inst) &&
         !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
-        !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
+        !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst) &&
+        !isa<AllocaInst>(Inst)) {
       Inst->moveBefore(LoopEntryBranch);
       continue;
     }
-    
+
     // Otherwise, create a duplicate of the instruction.
     Instruction *C = Inst->clone();
-    
+
     // Eagerly remap the operands of the instruction.
     RemapInstruction(C, ValueMap,
                      RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
-    
+
     // With the operands remapped, see if the instruction constant folds or is
     // otherwise simplifyable.  This commonly occurs because the entry from PHI
     // nodes allows icmps and other instructions to fold.
@@ -287,7 +391,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
   L->moveToHeader(NewHeader);
   assert(L->getHeader() == NewHeader && "Latch block is our new header");
 
-  
+
   // At this point, we've finished our major CFG changes.  As part of cloning
   // the loop into the preheader we've simplified instructions and the
   // duplicated conditional branch may now be branching on a constant.  If it is
@@ -308,16 +412,16 @@ bool LoopRotate::rotateLoop(Loop *L) {
       // the dominator of Exit.
       DT->changeImmediateDominator(Exit, OrigPreheader);
       DT->changeImmediateDominator(NewHeader, OrigPreheader);
-      
+
       // Update OrigHeader to be dominated by the new header block.
       DT->changeImmediateDominator(OrigHeader, OrigLatch);
     }
-    
+
     // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
     // thus is not a preheader anymore.  Split the edge to form a real preheader.
     BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
     NewPH->setName(NewHeader->getName() + ".lr.ph");
-    
+
     // Preserve canonical loop form, which means that 'Exit' should have only one
     // predecessor.
     BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
@@ -329,7 +433,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
     BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
     NewBI->setDebugLoc(PHBI->getDebugLoc());
     PHBI->eraseFromParent();
-    
+
     // With our CFG finalized, update DomTree if it is available.
     if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
       // Update OrigHeader to be dominated by the new header block.
@@ -337,7 +441,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
       DT->changeImmediateDominator(OrigHeader, OrigLatch);
     }
   }
-  
+
   assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
   assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
 
@@ -346,7 +450,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
   // connected by an unconditional branch.  This is just a cleanup so the
   // emitted code isn't too gross in this common case.
   MergeBlockIntoPredecessor(OrigHeader, this);
-  
+
   ++NumRotated;
   return true;
 }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 3e122c2a866e..d57ec22f44ab 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -77,19 +77,22 @@
 #include <algorithm>
 using namespace llvm;
 
-namespace llvm {
-cl::opt<bool> EnableNested(
-  "enable-lsr-nested", cl::Hidden, cl::desc("Enable LSR on nested loops"));
-
-cl::opt<bool> EnableRetry(
-    "enable-lsr-retry", cl::Hidden, cl::desc("Enable LSR retry"));
-
 // Temporary flag to cleanup congruent phis after LSR phi expansion.
 // It's currently disabled until we can determine whether it's truly useful or
 // not. The flag should be removed after the v3.0 release.
-cl::opt<bool> EnablePhiElim(
-    "enable-lsr-phielim", cl::Hidden, cl::desc("Enable LSR phi elimination"));
-}
+// This is now needed for ivchains.
+static cl::opt<bool> EnablePhiElim(
+  "enable-lsr-phielim", cl::Hidden, cl::init(true),
+  cl::desc("Enable LSR phi elimination"));
+
+#ifndef NDEBUG
+// Stress test IV chain generation.
+static cl::opt<bool> StressIVChain(
+  "stress-ivchain", cl::Hidden, cl::init(false),
+  cl::desc("Stress test LSR IV chains"));
+#else
+static bool StressIVChain = false;
+#endif
 
 namespace {
 
@@ -636,6 +639,91 @@ static Type *getAccessType(const Instruction *Inst) {
   return AccessTy;
 }
 
+/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+  for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    if (SE.isSCEVable(PN->getType()) &&
+        (SE.getEffectiveSCEVType(PN->getType()) ==
+         SE.getEffectiveSCEVType(AR->getType())) &&
+        SE.getSCEV(PN) == AR)
+      return true;
+  }
+  return false;
+}
+
+/// Check if expanding this expression is likely to incur significant cost. This
+/// is tricky because SCEV doesn't track which expressions are actually computed
+/// by the current IR.
+///
+/// We currently allow expansion of IV increments that involve adds,
+/// multiplication by constants, and AddRecs from existing phis.
+///
+/// TODO: Allow UDivExpr if we can find an existing IV increment that is an
+/// obvious multiple of the UDivExpr.
+static bool isHighCostExpansion(const SCEV *S,
+                                SmallPtrSet<const SCEV*, 8> &Processed,
+                                ScalarEvolution &SE) {
+  // Zero/One operand expressions
+  switch (S->getSCEVType()) {
+  case scUnknown:
+  case scConstant:
+    return false;
+  case scTruncate:
+    return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
+                               Processed, SE);
+  case scZeroExtend:
+    return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
+                               Processed, SE);
+  case scSignExtend:
+    return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
+                               Processed, SE);
+  }
+
+  if (!Processed.insert(S))
+    return false;
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I) {
+      if (isHighCostExpansion(*I, Processed, SE))
+        return true;
+    }
+    return false;
+  }
+
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    if (Mul->getNumOperands() == 2) {
+      // Multiplication by a constant is ok
+      if (isa<SCEVConstant>(Mul->getOperand(0)))
+        return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
+
+      // If we have the value of one operand, check if an existing
+      // multiplication already generates this expression.
+      if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
+        Value *UVal = U->getValue();
+        for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end();
+             UI != UE; ++UI) {
+          // If U is a constant, it may be used by a ConstantExpr.
+          Instruction *User = dyn_cast<Instruction>(*UI);
+          if (User && User->getOpcode() == Instruction::Mul
+              && SE.isSCEVable(User->getType())) {
+            return SE.getSCEV(User) == Mul;
+          }
+        }
+      }
+    }
+  }
+
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    if (isExistingPhi(AR, SE))
+      return false;
+  }
+
+  // Fow now, consider any other type of expression (div/mul/min/max) high cost.
+  return true;
+}
+
 /// DeleteTriviallyDeadInstructions - If any of the instructions is the
 /// specified set are trivially dead, delete them and see if this makes any of
 /// their operands subsequently dead.
@@ -705,7 +793,8 @@ public:
                    const DenseSet<const SCEV *> &VisitedRegs,
                    const Loop *L,
                    const SmallVectorImpl<int64_t> &Offsets,
-                   ScalarEvolution &SE, DominatorTree &DT);
+                   ScalarEvolution &SE, DominatorTree &DT,
+                   SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
 
   void print(raw_ostream &OS) const;
   void dump() const;
@@ -718,7 +807,8 @@ private:
   void RatePrimaryRegister(const SCEV *Reg,
                            SmallPtrSet<const SCEV *, 16> &Regs,
                            const Loop *L,
-                           ScalarEvolution &SE, DominatorTree &DT);
+                           ScalarEvolution &SE, DominatorTree &DT,
+                           SmallPtrSet<const SCEV *, 16> *LoserRegs);
 };
 
 }
@@ -729,41 +819,20 @@ void Cost::RateRegister(const SCEV *Reg,
                         const Loop *L,
                         ScalarEvolution &SE, DominatorTree &DT) {
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
-    if (AR->getLoop() == L)
-      AddRecCost += 1; /// TODO: This should be a function of the stride.
-
     // If this is an addrec for another loop, don't second-guess its addrec phi
     // nodes. LSR isn't currently smart enough to reason about more than one
-    // loop at a time. LSR has either already run on inner loops, will not run
-    // on other loops, and cannot be expected to change sibling loops. If the
-    // AddRec exists, consider it's register free and leave it alone. Otherwise,
-    // do not consider this formula at all.
-    // FIXME: why do we need to generate such fomulae?
-    else if (!EnableNested || L->contains(AR->getLoop()) ||
-             (!AR->getLoop()->contains(L) &&
-              DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
-      for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
-           PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-        if (SE.isSCEVable(PN->getType()) &&
-            (SE.getEffectiveSCEVType(PN->getType()) ==
-             SE.getEffectiveSCEVType(AR->getType())) &&
-            SE.getSCEV(PN) == AR)
-          return;
-      }
-      if (!EnableNested) {
-        Loose();
+    // loop at a time. LSR has already run on inner loops, will not run on outer
+    // loops, and cannot be expected to change sibling loops.
+    if (AR->getLoop() != L) {
+      // If the AddRec exists, consider it's register free and leave it alone.
+      if (isExistingPhi(AR, SE))
         return;
-      }
-      // If this isn't one of the addrecs that the loop already has, it
-      // would require a costly new phi and add. TODO: This isn't
-      // precisely modeled right now.
-      ++NumBaseAdds;
-      if (!Regs.count(AR->getStart())) {
-        RateRegister(AR->getStart(), Regs, L, SE, DT);
-        if (isLoser())
-          return;
-      }
+
+      // Otherwise, do not consider this formula at all.
+      Loose();
+      return;
     }
+    AddRecCost += 1; /// TODO: This should be a function of the stride.
 
     // Add the step value register, if it needs one.
     // TODO: The non-affine case isn't precisely modeled here.
@@ -791,13 +860,22 @@ void Cost::RateRegister(const SCEV *Reg,
 }
 
 /// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it.
+/// before, rate it. Optional LoserRegs provides a way to declare any formula
+/// that refers to one of those regs an instant loser.
 void Cost::RatePrimaryRegister(const SCEV *Reg,
                                SmallPtrSet<const SCEV *, 16> &Regs,
                                const Loop *L,
-                               ScalarEvolution &SE, DominatorTree &DT) {
-  if (Regs.insert(Reg))
+                               ScalarEvolution &SE, DominatorTree &DT,
+                               SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+  if (LoserRegs && LoserRegs->count(Reg)) {
+    Loose();
+    return;
+  }
+  if (Regs.insert(Reg)) {
     RateRegister(Reg, Regs, L, SE, DT);
+    if (isLoser())
+      LoserRegs->insert(Reg);
+  }
 }
 
 void Cost::RateFormula(const Formula &F,
@@ -805,14 +883,15 @@ void Cost::RateFormula(const Formula &F,
                        const DenseSet<const SCEV *> &VisitedRegs,
                        const Loop *L,
                        const SmallVectorImpl<int64_t> &Offsets,
-                       ScalarEvolution &SE, DominatorTree &DT) {
+                       ScalarEvolution &SE, DominatorTree &DT,
+                       SmallPtrSet<const SCEV *, 16> *LoserRegs) {
   // Tally up the registers.
   if (const SCEV *ScaledReg = F.ScaledReg) {
     if (VisitedRegs.count(ScaledReg)) {
       Loose();
       return;
     }
-    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
     if (isLoser())
       return;
   }
@@ -823,7 +902,7 @@ void Cost::RateFormula(const Formula &F,
       Loose();
       return;
     }
-    RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+    RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
     if (isLoser())
       return;
   }
@@ -1105,7 +1184,6 @@ bool LSRUse::InsertFormula(const Formula &F) {
   Formulae.push_back(F);
 
   // Record registers now being used by this use.
-  if (F.ScaledReg) Regs.insert(F.ScaledReg);
   Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
 
   return true;
@@ -1116,7 +1194,6 @@ void LSRUse::DeleteFormula(Formula &F) {
   if (&F != &Formulae.back())
     std::swap(F, Formulae.back());
   Formulae.pop_back();
-  assert(!Formulae.empty() && "LSRUse has no formulae left!");
 }
 
 /// RecomputeRegs - Recompute the Regs field, and update RegUses.
@@ -1205,10 +1282,19 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
     // If we have low-level target information, ask the target if it can fold an
     // integer immediate on an icmp.
     if (AM.BaseOffs != 0) {
-      if (TLI) return TLI->isLegalICmpImmediate(-(uint64_t)AM.BaseOffs);
-      return false;
+      if (!TLI)
+        return false;
+      // We have one of:
+      // ICmpZero     BaseReg + Offset => ICmp BaseReg, -Offset
+      // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+      // Offs is the ICmp immediate.
+      int64_t Offs = AM.BaseOffs;
+      if (AM.Scale == 0)
+        Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
+      return TLI->isLegalICmpImmediate(Offs);
     }
 
+    // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
     return true;
 
   case LSRUse::Basic:
@@ -1220,7 +1306,7 @@ static bool isLegalUse(const TargetLowering::AddrMode &AM,
     return AM.Scale == 0 || AM.Scale == -1;
   }
 
-  return false;
+  llvm_unreachable("Invalid LSRUse Kind!");
 }
 
 static bool isLegalUse(TargetLowering::AddrMode AM,
@@ -1327,6 +1413,36 @@ struct UseMapDenseMapInfo {
   }
 };
 
+/// IVInc - An individual increment in a Chain of IV increments.
+/// Relate an IV user to an expression that computes the IV it uses from the IV
+/// used by the previous link in the Chain.
+///
+/// For the head of a chain, IncExpr holds the absolute SCEV expression for the
+/// original IVOperand. The head of the chain's IVOperand is only valid during
+/// chain collection, before LSR replaces IV users. During chain generation,
+/// IncExpr can be used to find the new IVOperand that computes the same
+/// expression.
+struct IVInc {
+  Instruction *UserInst;
+  Value* IVOperand;
+  const SCEV *IncExpr;
+
+  IVInc(Instruction *U, Value *O, const SCEV *E):
+    UserInst(U), IVOperand(O), IncExpr(E) {}
+};
+
+// IVChain - The list of IV increments in program order.
+// We typically add the head of a chain without finding subsequent links.
+typedef SmallVector<IVInc,1> IVChain;
+
+/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
+/// Distinguish between FarUsers that definitely cross IV increments and
+/// NearUsers that may be used between IV increments.
+struct ChainUsers {
+  SmallPtrSet<Instruction*, 4> FarUsers;
+  SmallPtrSet<Instruction*, 4> NearUsers;
+};
+
 /// LSRInstance - This class holds state for the main loop strength reduction
 /// logic.
 class LSRInstance {
@@ -1359,11 +1475,29 @@ class LSRInstance {
   /// RegUses - Track which uses use which register candidates.
   RegUseTracker RegUses;
 
+  // Limit the number of chains to avoid quadratic behavior. We don't expect to
+  // have more than a few IV increment chains in a loop. Missing a Chain falls
+  // back to normal LSR behavior for those uses.
+  static const unsigned MaxChains = 8;
+
+  /// IVChainVec - IV users can form a chain of IV increments.
+  SmallVector<IVChain, MaxChains> IVChainVec;
+
+  /// IVIncSet - IV users that belong to profitable IVChains.
+  SmallPtrSet<Use*, MaxChains> IVIncSet;
+
   void OptimizeShadowIV();
   bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
   ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
   void OptimizeLoopTermCond();
 
+  void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
+                        SmallVectorImpl<ChainUsers> &ChainUsersVec);
+  void FinalizeChain(IVChain &Chain);
+  void CollectChains();
+  void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+                       SmallVectorImpl<WeakVH> &DeadInsts);
+
   void CollectInterestingTypesAndFactors();
   void CollectFixupsAndInitialFormulae();
 
@@ -1389,7 +1523,6 @@ class LSRInstance {
 
   LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
 
-public:
   void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
   void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
   void CountRegisters(const Formula &F, size_t LUIdx);
@@ -1428,9 +1561,11 @@ public:
   BasicBlock::iterator
     HoistInsertPosition(BasicBlock::iterator IP,
                         const SmallVectorImpl<Instruction *> &Inputs) const;
-  BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
-                                                     const LSRFixup &LF,
-                                                     const LSRUse &LU) const;
+  BasicBlock::iterator
+    AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+                                  const LSRFixup &LF,
+                                  const LSRUse &LU,
+                                  SCEVExpander &Rewriter) const;
 
   Value *Expand(const LSRFixup &LF,
                 const Formula &F,
@@ -1450,6 +1585,7 @@ public:
   void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
                          Pass *P);
 
+public:
   LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
 
   bool getChanged() const { return Changed; }
@@ -2045,7 +2181,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
     do {
       const SCEV *S = Worklist.pop_back_val();
       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
-        Strides.insert(AR->getStepRecurrence(SE));
+        if (AR->getLoop() == L)
+          Strides.insert(AR->getStepRecurrence(SE));
         Worklist.push_back(AR->getStart());
       } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
         Worklist.append(Add->op_begin(), Add->op_end());
@@ -2091,11 +2228,544 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
   DEBUG(print_factors_and_types(dbgs()));
 }
 
+/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
+/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
+/// Instructions to IVStrideUses, we could partially skip this.
+static User::op_iterator
+findIVOperand(User::op_iterator OI, User::op_iterator OE,
+              Loop *L, ScalarEvolution &SE) {
+  for(; OI != OE; ++OI) {
+    if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
+      if (!SE.isSCEVable(Oper->getType()))
+        continue;
+
+      if (const SCEVAddRecExpr *AR =
+          dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
+        if (AR->getLoop() == L)
+          break;
+      }
+    }
+  }
+  return OI;
+}
+
+/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
+/// operands, so wrap it in a convenient helper.
+static Value *getWideOperand(Value *Oper) {
+  if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
+    return Trunc->getOperand(0);
+  return Oper;
+}
+
+/// isCompatibleIVType - Return true if we allow an IV chain to include both
+/// types.
+static bool isCompatibleIVType(Value *LVal, Value *RVal) {
+  Type *LType = LVal->getType();
+  Type *RType = RVal->getType();
+  return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
+}
+
+/// getExprBase - Return an approximation of this SCEV expression's "base", or
+/// NULL for any constant. Returning the expression itself is
+/// conservative. Returning a deeper subexpression is more precise and valid as
+/// long as it isn't less complex than another subexpression. For expressions
+/// involving multiple unscaled values, we need to return the pointer-type
+/// SCEVUnknown. This avoids forming chains across objects, such as:
+/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
+///
+/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
+/// SCEVUnknown, we simply return the rightmost SCEV operand.
+static const SCEV *getExprBase(const SCEV *S) {
+  switch (S->getSCEVType()) {
+  default: // uncluding scUnknown.
+    return S;
+  case scConstant:
+    return 0;
+  case scTruncate:
+    return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
+  case scZeroExtend:
+    return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
+  case scSignExtend:
+    return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
+  case scAddExpr: {
+    // Skip over scaled operands (scMulExpr) to follow add operands as long as
+    // there's nothing more complex.
+    // FIXME: not sure if we want to recognize negation.
+    const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
+    for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()),
+           E(Add->op_begin()); I != E; ++I) {
+      const SCEV *SubExpr = *I;
+      if (SubExpr->getSCEVType() == scAddExpr)
+        return getExprBase(SubExpr);
+
+      if (SubExpr->getSCEVType() != scMulExpr)
+        return SubExpr;
+    }
+    return S; // all operands are scaled, be conservative.
+  }
+  case scAddRecExpr:
+    return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
+  }
+}
+
+/// Return true if the chain increment is profitable to expand into a loop
+/// invariant value, which may require its own register. A profitable chain
+/// increment will be an offset relative to the same base. We allow such offsets
+/// to potentially be used as chain increment as long as it's not obviously
+/// expensive to expand using real instructions.
+static const SCEV *
+getProfitableChainIncrement(Value *NextIV, Value *PrevIV,
+                            const IVChain &Chain, Loop *L,
+                            ScalarEvolution &SE, const TargetLowering *TLI) {
+  // Prune the solution space aggressively by checking that both IV operands
+  // are expressions that operate on the same unscaled SCEVUnknown. This
+  // "base" will be canceled by the subsequent getMinusSCEV call. Checking first
+  // avoids creating extra SCEV expressions.
+  const SCEV *OperExpr = SE.getSCEV(NextIV);
+  const SCEV *PrevExpr = SE.getSCEV(PrevIV);
+  if (getExprBase(OperExpr) != getExprBase(PrevExpr) && !StressIVChain)
+    return 0;
+
+  const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
+  if (!SE.isLoopInvariant(IncExpr, L))
+    return 0;
+
+  // We are not able to expand an increment unless it is loop invariant,
+  // however, the following checks are purely for profitability.
+  if (StressIVChain)
+    return IncExpr;
+
+  // Do not replace a constant offset from IV head with a nonconstant IV
+  // increment.
+  if (!isa<SCEVConstant>(IncExpr)) {
+    const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Chain[0].IVOperand));
+    if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
+      return 0;
+  }
+
+  SmallPtrSet<const SCEV*, 8> Processed;
+  if (isHighCostExpansion(IncExpr, Processed, SE))
+    return 0;
+
+  return IncExpr;
+}
+
+/// Return true if the number of registers needed for the chain is estimated to
+/// be less than the number required for the individual IV users. First prohibit
+/// any IV users that keep the IV live across increments (the Users set should
+/// be empty). Next count the number and type of increments in the chain.
+///
+/// Chaining IVs can lead to considerable code bloat if ISEL doesn't
+/// effectively use postinc addressing modes. Only consider it profitable it the
+/// increments can be computed in fewer registers when chained.
+///
+/// TODO: Consider IVInc free if it's already used in another chains.
+static bool
+isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
+                  ScalarEvolution &SE, const TargetLowering *TLI) {
+  if (StressIVChain)
+    return true;
+
+  if (Chain.size() <= 2)
+    return false;
+
+  if (!Users.empty()) {
+    DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " users:\n";
+          for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(),
+                 E = Users.end(); I != E; ++I) {
+            dbgs() << "  " << **I << "\n";
+          });
+    return false;
+  }
+  assert(!Chain.empty() && "empty IV chains are not allowed");
+
+  // The chain itself may require a register, so intialize cost to 1.
+  int cost = 1;
+
+  // A complete chain likely eliminates the need for keeping the original IV in
+  // a register. LSR does not currently know how to form a complete chain unless
+  // the header phi already exists.
+  if (isa<PHINode>(Chain.back().UserInst)
+      && SE.getSCEV(Chain.back().UserInst) == Chain[0].IncExpr) {
+    --cost;
+  }
+  const SCEV *LastIncExpr = 0;
+  unsigned NumConstIncrements = 0;
+  unsigned NumVarIncrements = 0;
+  unsigned NumReusedIncrements = 0;
+  for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end();
+       I != E; ++I) {
+
+    if (I->IncExpr->isZero())
+      continue;
+
+    // Incrementing by zero or some constant is neutral. We assume constants can
+    // be folded into an addressing mode or an add's immediate operand.
+    if (isa<SCEVConstant>(I->IncExpr)) {
+      ++NumConstIncrements;
+      continue;
+    }
+
+    if (I->IncExpr == LastIncExpr)
+      ++NumReusedIncrements;
+    else
+      ++NumVarIncrements;
+
+    LastIncExpr = I->IncExpr;
+  }
+  // An IV chain with a single increment is handled by LSR's postinc
+  // uses. However, a chain with multiple increments requires keeping the IV's
+  // value live longer than it needs to be if chained.
+  if (NumConstIncrements > 1)
+    --cost;
+
+  // Materializing increment expressions in the preheader that didn't exist in
+  // the original code may cost a register. For example, sign-extended array
+  // indices can produce ridiculous increments like this:
+  // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
+  cost += NumVarIncrements;
+
+  // Reusing variable increments likely saves a register to hold the multiple of
+  // the stride.
+  cost -= NumReusedIncrements;
+
+  DEBUG(dbgs() << "Chain: " << *Chain[0].UserInst << " Cost: " << cost << "\n");
+
+  return cost < 0;
+}
+
+/// ChainInstruction - Add this IV user to an existing chain or make it the head
+/// of a new chain.
+void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
+                                   SmallVectorImpl<ChainUsers> &ChainUsersVec) {
+  // When IVs are used as types of varying widths, they are generally converted
+  // to a wider type with some uses remaining narrow under a (free) trunc.
+  Value *NextIV = getWideOperand(IVOper);
+
+  // Visit all existing chains. Check if its IVOper can be computed as a
+  // profitable loop invariant increment from the last link in the Chain.
+  unsigned ChainIdx = 0, NChains = IVChainVec.size();
+  const SCEV *LastIncExpr = 0;
+  for (; ChainIdx < NChains; ++ChainIdx) {
+    Value *PrevIV = getWideOperand(IVChainVec[ChainIdx].back().IVOperand);
+    if (!isCompatibleIVType(PrevIV, NextIV))
+      continue;
+
+    // A phi node terminates a chain.
+    if (isa<PHINode>(UserInst)
+        && isa<PHINode>(IVChainVec[ChainIdx].back().UserInst))
+      continue;
+
+    if (const SCEV *IncExpr =
+        getProfitableChainIncrement(NextIV, PrevIV, IVChainVec[ChainIdx],
+                                    L, SE, TLI)) {
+      LastIncExpr = IncExpr;
+      break;
+    }
+  }
+  // If we haven't found a chain, create a new one, unless we hit the max. Don't
+  // bother for phi nodes, because they must be last in the chain.
+  if (ChainIdx == NChains) {
+    if (isa<PHINode>(UserInst))
+      return;
+    if (NChains >= MaxChains && !StressIVChain) {
+      DEBUG(dbgs() << "IV Chain Limit\n");
+      return;
+    }
+    LastIncExpr = SE.getSCEV(NextIV);
+    // IVUsers may have skipped over sign/zero extensions. We don't currently
+    // attempt to form chains involving extensions unless they can be hoisted
+    // into this loop's AddRec.
+    if (!isa<SCEVAddRecExpr>(LastIncExpr))
+      return;
+    ++NChains;
+    IVChainVec.resize(NChains);
+    ChainUsersVec.resize(NChains);
+    DEBUG(dbgs() << "IV Head: (" << *UserInst << ") IV=" << *LastIncExpr
+          << "\n");
+  }
+  else
+    DEBUG(dbgs() << "IV  Inc: (" << *UserInst << ") IV+" << *LastIncExpr
+          << "\n");
+
+  // Add this IV user to the end of the chain.
+  IVChainVec[ChainIdx].push_back(IVInc(UserInst, IVOper, LastIncExpr));
+
+  SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
+  // This chain's NearUsers become FarUsers.
+  if (!LastIncExpr->isZero()) {
+    ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
+                                            NearUsers.end());
+    NearUsers.clear();
+  }
+
+  // All other uses of IVOperand become near uses of the chain.
+  // We currently ignore intermediate values within SCEV expressions, assuming
+  // they will eventually be used be the current chain, or can be computed
+  // from one of the chain increments. To be more precise we could
+  // transitively follow its user and only add leaf IV users to the set.
+  for (Value::use_iterator UseIter = IVOper->use_begin(),
+         UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
+    Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
+    if (!OtherUse || OtherUse == UserInst)
+      continue;
+    if (SE.isSCEVable(OtherUse->getType())
+        && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
+        && IU.isIVUserOrOperand(OtherUse)) {
+      continue;
+    }
+    NearUsers.insert(OtherUse);
+  }
+
+  // Since this user is part of the chain, it's no longer considered a use
+  // of the chain.
+  ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
+}
+
+/// CollectChains - Populate the vector of Chains.
+///
+/// This decreases ILP at the architecture level. Targets with ample registers,
+/// multiple memory ports, and no register renaming probably don't want
+/// this. However, such targets should probably disable LSR altogether.
+///
+/// The job of LSR is to make a reasonable choice of induction variables across
+/// the loop. Subsequent passes can easily "unchain" computation exposing more
+/// ILP *within the loop* if the target wants it.
+///
+/// Finding the best IV chain is potentially a scheduling problem. Since LSR
+/// will not reorder memory operations, it will recognize this as a chain, but
+/// will generate redundant IV increments. Ideally this would be corrected later
+/// by a smart scheduler:
+///        = A[i]
+///        = A[i+x]
+/// A[i]   =
+/// A[i+x] =
+///
+/// TODO: Walk the entire domtree within this loop, not just the path to the
+/// loop latch. This will discover chains on side paths, but requires
+/// maintaining multiple copies of the Chains state.
+void LSRInstance::CollectChains() {
+  SmallVector<ChainUsers, 8> ChainUsersVec;
+
+  SmallVector<BasicBlock *,8> LatchPath;
+  BasicBlock *LoopHeader = L->getHeader();
+  for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
+       Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
+    LatchPath.push_back(Rung->getBlock());
+  }
+  LatchPath.push_back(LoopHeader);
+
+  // Walk the instruction stream from the loop header to the loop latch.
+  for (SmallVectorImpl<BasicBlock *>::reverse_iterator
+         BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend();
+       BBIter != BBEnd; ++BBIter) {
+    for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
+         I != E; ++I) {
+      // Skip instructions that weren't seen by IVUsers analysis.
+      if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
+        continue;
+
+      // Ignore users that are part of a SCEV expression. This way we only
+      // consider leaf IV Users. This effectively rediscovers a portion of
+      // IVUsers analysis but in program order this time.
+      if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
+        continue;
+
+      // Remove this instruction from any NearUsers set it may be in.
+      for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
+           ChainIdx < NChains; ++ChainIdx) {
+        ChainUsersVec[ChainIdx].NearUsers.erase(I);
+      }
+      // Search for operands that can be chained.
+      SmallPtrSet<Instruction*, 4> UniqueOperands;
+      User::op_iterator IVOpEnd = I->op_end();
+      User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE);
+      while (IVOpIter != IVOpEnd) {
+        Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
+        if (UniqueOperands.insert(IVOpInst))
+          ChainInstruction(I, IVOpInst, ChainUsersVec);
+        IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+      }
+    } // Continue walking down the instructions.
+  } // Continue walking down the domtree.
+  // Visit phi backedges to determine if the chain can generate the IV postinc.
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    if (!SE.isSCEVable(PN->getType()))
+      continue;
+
+    Instruction *IncV =
+      dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
+    if (IncV)
+      ChainInstruction(PN, IncV, ChainUsersVec);
+  }
+  // Remove any unprofitable chains.
+  unsigned ChainIdx = 0;
+  for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
+       UsersIdx < NChains; ++UsersIdx) {
+    if (!isProfitableChain(IVChainVec[UsersIdx],
+                           ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+      continue;
+    // Preserve the chain at UsesIdx.
+    if (ChainIdx != UsersIdx)
+      IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
+    FinalizeChain(IVChainVec[ChainIdx]);
+    ++ChainIdx;
+  }
+  IVChainVec.resize(ChainIdx);
+}
+
+void LSRInstance::FinalizeChain(IVChain &Chain) {
+  assert(!Chain.empty() && "empty IV chains are not allowed");
+  DEBUG(dbgs() << "Final Chain: " << *Chain[0].UserInst << "\n");
+
+  for (IVChain::const_iterator I = llvm::next(Chain.begin()), E = Chain.end();
+       I != E; ++I) {
+    DEBUG(dbgs() << "        Inc: " << *I->UserInst << "\n");
+    User::op_iterator UseI =
+      std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand);
+    assert(UseI != I->UserInst->op_end() && "cannot find IV operand");
+    IVIncSet.insert(UseI);
+  }
+}
+
+/// Return true if the IVInc can be folded into an addressing mode.
+static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
+                             Value *Operand, const TargetLowering *TLI) {
+  const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
+  if (!IncConst || !isAddressUse(UserInst, Operand))
+    return false;
+
+  if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
+    return false;
+
+  int64_t IncOffset = IncConst->getValue()->getSExtValue();
+  if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
+                       LSRUse::Address, getAccessType(UserInst), TLI))
+    return false;
+
+  return true;
+}
+
+/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
+/// materialize the IV user's operand from the previous IV user's operand.
+void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
+                                  SmallVectorImpl<WeakVH> &DeadInsts) {
+  // Find the new IVOperand for the head of the chain. It may have been replaced
+  // by LSR.
+  const IVInc &Head = Chain[0];
+  User::op_iterator IVOpEnd = Head.UserInst->op_end();
+  User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
+                                             IVOpEnd, L, SE);
+  Value *IVSrc = 0;
+  while (IVOpIter != IVOpEnd) {
+    IVSrc = getWideOperand(*IVOpIter);
+
+    // If this operand computes the expression that the chain needs, we may use
+    // it. (Check this after setting IVSrc which is used below.)
+    //
+    // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
+    // narrow for the chain, so we can no longer use it. We do allow using a
+    // wider phi, assuming the LSR checked for free truncation. In that case we
+    // should already have a truncate on this operand such that
+    // getSCEV(IVSrc) == IncExpr.
+    if (SE.getSCEV(*IVOpIter) == Head.IncExpr
+        || SE.getSCEV(IVSrc) == Head.IncExpr) {
+      break;
+    }
+    IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE);
+  }
+  if (IVOpIter == IVOpEnd) {
+    // Gracefully give up on this chain.
+    DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
+    return;
+  }
+
+  DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
+  Type *IVTy = IVSrc->getType();
+  Type *IntTy = SE.getEffectiveSCEVType(IVTy);
+  const SCEV *LeftOverExpr = 0;
+  for (IVChain::const_iterator IncI = llvm::next(Chain.begin()),
+         IncE = Chain.end(); IncI != IncE; ++IncI) {
+
+    Instruction *InsertPt = IncI->UserInst;
+    if (isa<PHINode>(InsertPt))
+      InsertPt = L->getLoopLatch()->getTerminator();
+
+    // IVOper will replace the current IV User's operand. IVSrc is the IV
+    // value currently held in a register.
+    Value *IVOper = IVSrc;
+    if (!IncI->IncExpr->isZero()) {
+      // IncExpr was the result of subtraction of two narrow values, so must
+      // be signed.
+      const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy);
+      LeftOverExpr = LeftOverExpr ?
+        SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
+    }
+    if (LeftOverExpr && !LeftOverExpr->isZero()) {
+      // Expand the IV increment.
+      Rewriter.clearPostInc();
+      Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
+      const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
+                                             SE.getUnknown(IncV));
+      IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
+
+      // If an IV increment can't be folded, use it as the next IV value.
+      if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
+                            TLI)) {
+        assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
+        IVSrc = IVOper;
+        LeftOverExpr = 0;
+      }
+    }
+    Type *OperTy = IncI->IVOperand->getType();
+    if (IVTy != OperTy) {
+      assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
+             "cannot extend a chained IV");
+      IRBuilder<> Builder(InsertPt);
+      IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
+    }
+    IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper);
+    DeadInsts.push_back(IncI->IVOperand);
+  }
+  // If LSR created a new, wider phi, we may also replace its postinc. We only
+  // do this if we also found a wide value for the head of the chain.
+  if (isa<PHINode>(Chain.back().UserInst)) {
+    for (BasicBlock::iterator I = L->getHeader()->begin();
+         PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
+      if (!isCompatibleIVType(Phi, IVSrc))
+        continue;
+      Instruction *PostIncV = dyn_cast<Instruction>(
+        Phi->getIncomingValueForBlock(L->getLoopLatch()));
+      if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
+        continue;
+      Value *IVOper = IVSrc;
+      Type *PostIncTy = PostIncV->getType();
+      if (IVTy != PostIncTy) {
+        assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
+        IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
+        Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
+        IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
+      }
+      Phi->replaceUsesOfWith(PostIncV, IVOper);
+      DeadInsts.push_back(PostIncV);
+    }
+  }
+}
+
 void LSRInstance::CollectFixupsAndInitialFormulae() {
   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    Instruction *UserInst = UI->getUser();
+    // Skip IV users that are part of profitable IV Chains.
+    User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
+                                       UI->getOperandValToReplace());
+    assert(UseI != UserInst->op_end() && "cannot find IV operand");
+    if (IVIncSet.count(UseI))
+      continue;
+
     // Record the uses.
     LSRFixup &LF = getNewFixup();
-    LF.UserInst = UI->getUser();
+    LF.UserInst = UserInst;
     LF.OperandValToReplace = UI->getOperandValToReplace();
     LF.PostIncLoops = UI->getPostIncLoops();
 
@@ -2914,6 +3584,7 @@ LSRInstance::GenerateAllReuseFormulae() {
 void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
   DenseSet<const SCEV *> VisitedRegs;
   SmallPtrSet<const SCEV *, 16> Regs;
+  SmallPtrSet<const SCEV *, 16> LoserRegs;
 #ifndef NDEBUG
   bool ChangedFormulae = false;
 #endif
@@ -2933,46 +3604,66 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
          FIdx != NumForms; ++FIdx) {
       Formula &F = LU.Formulae[FIdx];
 
-      SmallVector<const SCEV *, 2> Key;
-      for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
-           JE = F.BaseRegs.end(); J != JE; ++J) {
-        const SCEV *Reg = *J;
-        if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
-          Key.push_back(Reg);
+      // Some formulas are instant losers. For example, they may depend on
+      // nonexistent AddRecs from other loops. These need to be filtered
+      // immediately, otherwise heuristics could choose them over others leading
+      // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
+      // avoids the need to recompute this information across formulae using the
+      // same bad AddRec. Passing LoserRegs is also essential unless we remove
+      // the corresponding bad register from the Regs set.
+      Cost CostF;
+      Regs.clear();
+      CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT,
+                        &LoserRegs);
+      if (CostF.isLoser()) {
+        // During initial formula generation, undesirable formulae are generated
+        // by uses within other loops that have some non-trivial address mode or
+        // use the postinc form of the IV. LSR needs to provide these formulae
+        // as the basis of rediscovering the desired formula that uses an AddRec
+        // corresponding to the existing phi. Once all formulae have been
+        // generated, these initial losers may be pruned.
+        DEBUG(dbgs() << "  Filtering loser "; F.print(dbgs());
+              dbgs() << "\n");
       }
-      if (F.ScaledReg &&
-          RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
-        Key.push_back(F.ScaledReg);
-      // Unstable sort by host order ok, because this is only used for
-      // uniquifying.
-      std::sort(Key.begin(), Key.end());
-
-      std::pair<BestFormulaeTy::const_iterator, bool> P =
-        BestFormulae.insert(std::make_pair(Key, FIdx));
-      if (!P.second) {
+      else {
+        SmallVector<const SCEV *, 2> Key;
+        for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+               JE = F.BaseRegs.end(); J != JE; ++J) {
+          const SCEV *Reg = *J;
+          if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+            Key.push_back(Reg);
+        }
+        if (F.ScaledReg &&
+            RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+          Key.push_back(F.ScaledReg);
+        // Unstable sort by host order ok, because this is only used for
+        // uniquifying.
+        std::sort(Key.begin(), Key.end());
+
+        std::pair<BestFormulaeTy::const_iterator, bool> P =
+          BestFormulae.insert(std::make_pair(Key, FIdx));
+        if (P.second)
+          continue;
+
         Formula &Best = LU.Formulae[P.first->second];
 
-        Cost CostF;
-        CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
-        Regs.clear();
         Cost CostBest;
-        CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
         Regs.clear();
+        CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
         if (CostF < CostBest)
           std::swap(F, Best);
         DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
               dbgs() << "\n"
                         "    in favor of formula "; Best.print(dbgs());
               dbgs() << '\n');
+      }
 #ifndef NDEBUG
-        ChangedFormulae = true;
+      ChangedFormulae = true;
 #endif
-        LU.DeleteFormula(F);
-        --FIdx;
-        --NumForms;
-        Any = true;
-        continue;
-      }
+      LU.DeleteFormula(F);
+      --FIdx;
+      --NumForms;
+      Any = true;
     }
 
     // Now that we've filtered out some formulae, recompute the Regs set.
@@ -3284,24 +3975,29 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
     if (LU.Regs.count(*I))
       ReqRegs.insert(*I);
 
-  bool AnySatisfiedReqRegs = false;
   SmallPtrSet<const SCEV *, 16> NewRegs;
   Cost NewCost;
-retry:
   for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
        E = LU.Formulae.end(); I != E; ++I) {
     const Formula &F = *I;
 
     // Ignore formulae which do not use any of the required registers.
+    bool SatisfiedReqReg = true;
     for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
          JE = ReqRegs.end(); J != JE; ++J) {
       const SCEV *Reg = *J;
       if ((!F.ScaledReg || F.ScaledReg != Reg) &&
           std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
-          F.BaseRegs.end())
-        goto skip;
+          F.BaseRegs.end()) {
+        SatisfiedReqReg = false;
+        break;
+      }
+    }
+    if (!SatisfiedReqReg) {
+      // If none of the formulae satisfied the required registers, then we could
+      // clear ReqRegs and try again. Currently, we simply give up in this case.
+      continue;
     }
-    AnySatisfiedReqRegs = true;
 
     // Evaluate the cost of the current formula. If it's already worse than
     // the current best, prune the search at that point.
@@ -3317,7 +4013,7 @@ retry:
           VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
       } else {
         DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
-              dbgs() << ". Regs:";
+              dbgs() << ".\n Regs:";
               for (SmallPtrSet<const SCEV *, 16>::const_iterator
                    I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
                 dbgs() << ' ' << **I;
@@ -3328,18 +4024,6 @@ retry:
       }
       Workspace.pop_back();
     }
-  skip:;
-  }
-
-  if (!EnableRetry && !AnySatisfiedReqRegs)
-    return;
-
-  // If none of the formulae had all of the required registers, relax the
-  // constraint so that we don't exclude all formulae.
-  if (!AnySatisfiedReqRegs) {
-    assert(!ReqRegs.empty() && "Solver failed even without required registers");
-    ReqRegs.clear();
-    goto retry;
   }
 }
 
@@ -3435,9 +4119,10 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
 /// AdjustInsertPositionForExpand - Determine an input position which will be
 /// dominated by the operands and which will dominate the result.
 BasicBlock::iterator
-LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
                                            const LSRFixup &LF,
-                                           const LSRUse &LU) const {
+                                           const LSRUse &LU,
+                                           SCEVExpander &Rewriter) const {
   // Collect some instructions which must be dominated by the
   // expanding replacement. These must be dominated by any operands that
   // will be required in the expansion.
@@ -3472,9 +4157,13 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
     }
   }
 
+  assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
+         && !isa<DbgInfoIntrinsic>(LowestIP) &&
+         "Insertion point must be a normal instruction");
+
   // Then, climb up the immediate dominator tree as far as we can go while
   // still being dominated by the input positions.
-  IP = HoistInsertPosition(IP, Inputs);
+  BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
 
   // Don't insert instructions before PHI nodes.
   while (isa<PHINode>(IP)) ++IP;
@@ -3485,6 +4174,11 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
   // Ignore debug intrinsics.
   while (isa<DbgInfoIntrinsic>(IP)) ++IP;
 
+  // Set IP below instructions recently inserted by SCEVExpander. This keeps the
+  // IP consistent across expansions and allows the previously inserted
+  // instructions to be reused by subsequent expansion.
+  while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
+
   return IP;
 }
 
@@ -3499,7 +4193,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
 
   // Determine an input position which will be dominated by the operands and
   // which will dominate the result.
-  IP = AdjustInsertPositionForExpand(IP, LF, LU);
+  IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
 
   // Inform the Rewriter if we have a post-increment use, so that it can
   // perform an advantageous expansion.
@@ -3775,10 +4469,20 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   SmallVector<WeakVH, 16> DeadInsts;
 
   SCEVExpander Rewriter(SE, "lsr");
+#ifndef NDEBUG
+  Rewriter.setDebugType(DEBUG_TYPE);
+#endif
   Rewriter.disableCanonicalMode();
   Rewriter.enableLSRMode();
   Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
 
+  // Mark phi nodes that terminate chains so the expander tries to reuse them.
+  for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
+         ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
+    if (PHINode *PN = dyn_cast<PHINode>(ChainI->back().UserInst))
+      Rewriter.setChainedPhi(PN);
+  }
+
   // Expand the new value definitions and update the users.
   for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
        E = Fixups.end(); I != E; ++I) {
@@ -3789,6 +4493,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
     Changed = true;
   }
 
+  for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(),
+         ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) {
+    GenerateIVChain(*ChainI, Rewriter, DeadInsts);
+    Changed = true;
+  }
   // Clean up after ourselves. This must be done before deleting any
   // instructions.
   Rewriter.clear();
@@ -3804,11 +4513,29 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
     TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
 
   // If LoopSimplify form is not available, stay out of trouble.
-  if (!L->isLoopSimplifyForm()) return;
+  if (!L->isLoopSimplifyForm())
+    return;
 
   // If there's no interesting work to be done, bail early.
   if (IU.empty()) return;
 
+#ifndef NDEBUG
+  // All dominating loops must have preheaders, or SCEVExpander may not be able
+  // to materialize an AddRecExpr whose Start is an outer AddRecExpr.
+  //
+  // IVUsers analysis should only create users that are dominated by simple loop
+  // headers. Since this loop should dominate all of its users, its user list
+  // should be empty if this loop itself is not within a simple loop nest.
+  for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader());
+       Rung; Rung = Rung->getIDom()) {
+    BasicBlock *BB = Rung->getBlock();
+    const Loop *DomLoop = LI.getLoopFor(BB);
+    if (DomLoop && DomLoop->getHeader() == BB) {
+      assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest");
+    }
+  }
+#endif // DEBUG
+
   DEBUG(dbgs() << "\nLSR on loop ";
         WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
         dbgs() << ":\n");
@@ -3821,24 +4548,18 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
   if (IU.empty()) return;
 
   // Skip nested loops until we can model them better with formulae.
-  if (!EnableNested && !L->empty()) {
-
-    if (EnablePhiElim) {
-      // Remove any extra phis created by processing inner loops.
-      SmallVector<WeakVH, 16> DeadInsts;
-      SCEVExpander Rewriter(SE, "lsr");
-      Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);
-      Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
-    }
+  if (!L->empty()) {
     DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
     return;
   }
 
   // Start collecting data and preparing for the solver.
+  CollectChains();
   CollectInterestingTypesAndFactors();
   CollectFixupsAndInitialFormulae();
   CollectLoopInvariantFixupsAndFormulae();
 
+  assert(!Uses.empty() && "IVUsers reported at least one use");
   DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
         print_uses(dbgs()));
 
@@ -3875,14 +4596,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
 
   // Now that we've decided what we want, make it so.
   ImplementSolution(Solution, P);
-
-  if (EnablePhiElim) {
-    // Remove any extra phis created by processing inner loops.
-    SmallVector<WeakVH, 16> DeadInsts;
-    SCEVExpander Rewriter(SE, "lsr");
-    Changed |= Rewriter.replaceCongruentIVs(L, &DT, DeadInsts);
-    Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
-  }
 }
 
 void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
@@ -4008,9 +4721,21 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
   // Run the main LSR transformation.
   Changed |= LSRInstance(TLI, L, this).getChanged();
 
-  // At this point, it is worth checking to see if any recurrence PHIs are also
-  // dead, so that we can remove them as well.
+  // Remove any extra phis created by processing inner loops.
   Changed |= DeleteDeadPHIs(L->getHeader());
-
+  if (EnablePhiElim) {
+    SmallVector<WeakVH, 16> DeadInsts;
+    SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
+#ifndef NDEBUG
+    Rewriter.setDebugType(DEBUG_TYPE);
+#endif
+    unsigned numFolded = Rewriter.
+      replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+    if (numFolded) {
+      Changed = true;
+      DeleteTriviallyDeadInstructions(DeadInsts);
+      DeleteDeadPHIs(L->getHeader());
+    }
+  }
   return Changed;
 }
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 91395b2af6aa..09a186f7f940 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -40,10 +40,9 @@ UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
   cl::desc("Allows loops to be partially unrolled until "
            "-unroll-threshold loop size is reached."));
 
-// Temporary flag to be removed in 3.0
 static cl::opt<bool>
-NoSCEVUnroll("disable-unroll-scev", cl::init(false), cl::Hidden,
-  cl::desc("Use ScalarEvolution to analyze loop trip counts for unrolling"));
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+  cl::desc("Unroll loops with run-time trip counts"));
 
 namespace {
   class LoopUnroll : public LoopPass {
@@ -68,6 +67,10 @@ namespace {
     // explicit -unroll-threshold).
     static const unsigned OptSizeUnrollThreshold = 50;
 
+    // Default unroll count for loops with run-time trip count if
+    // -unroll-count is not set
+    static const unsigned UnrollRuntimeCount = 8;
+
     unsigned CurrentCount;
     unsigned CurrentThreshold;
     bool     CurrentAllowPartial;
@@ -101,6 +104,7 @@ INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
@@ -147,23 +151,21 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // Find trip count and trip multiple if count is not available
   unsigned TripCount = 0;
   unsigned TripMultiple = 1;
-  if (!NoSCEVUnroll) {
-    // Find "latch trip count". UnrollLoop assumes that control cannot exit
-    // via the loop latch on any iteration prior to TripCount. The loop may exit
-    // early via an earlier branch.
-    BasicBlock *LatchBlock = L->getLoopLatch();
-    if (LatchBlock) {
-      TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
-      TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
-    }
-  }
-  else {
-    TripCount = L->getSmallConstantTripCount();
-    if (TripCount == 0)
-      TripMultiple = L->getSmallConstantTripMultiple();
+  // Find "latch trip count". UnrollLoop assumes that control cannot exit
+  // via the loop latch on any iteration prior to TripCount. The loop may exit
+  // early via an earlier branch.
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (LatchBlock) {
+    TripCount = SE->getSmallConstantTripCount(L, LatchBlock);
+    TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock);
   }
-  // Automatically select an unroll count.
+  // Use a default unroll-count if the user doesn't specify a value
+  // and the trip count is a run-time value.  The default is different
+  // for run-time or compile-time trip count loops.
   unsigned Count = CurrentCount;
+  if (UnrollRuntime && CurrentCount == 0 && TripCount == 0)
+    Count = UnrollRuntimeCount;
+
   if (Count == 0) {
     // Conservative heuristic: if we know the trip count, see if we can
     // completely unroll (subject to the threshold, checked below); otherwise
@@ -188,15 +190,23 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     if (TripCount != 1 && Size > Threshold) {
       DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
             << " because size: " << Size << ">" << Threshold << "\n");
-      if (!CurrentAllowPartial) {
+      if (!CurrentAllowPartial && !(UnrollRuntime && TripCount == 0)) {
         DEBUG(dbgs() << "  will not try to unroll partially because "
               << "-unroll-allow-partial not given\n");
         return false;
       }
-      // Reduce unroll count to be modulo of TripCount for partial unrolling
-      Count = Threshold / LoopSize;
-      while (Count != 0 && TripCount%Count != 0) {
-        Count--;
+      if (TripCount) {
+        // Reduce unroll count to be modulo of TripCount for partial unrolling
+        Count = Threshold / LoopSize;
+        while (Count != 0 && TripCount%Count != 0)
+          Count--;
+      }
+      else if (UnrollRuntime) {
+        // Reduce unroll count to be a lower power-of-two value
+        while (Count != 0 && Size > Threshold) {
+          Count >>= 1;
+          Size = LoopSize*Count;
+        }
       }
       if (Count < 2) {
         DEBUG(dbgs() << "  could not unroll partially\n");
@@ -207,7 +217,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   // Unroll the loop.
-  if (!UnrollLoop(L, Count, TripCount, TripMultiple, LI, &LPM))
+  if (!UnrollLoop(L, Count, TripCount, UnrollRuntime, TripMultiple, LI, &LPM))
     return false;
 
   return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 458949c8444d..ee232687ffde 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,7 +32,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -48,6 +48,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <map>
 #include <set>
 using namespace llvm;
 
@@ -56,14 +57,70 @@ STATISTIC(NumSwitches, "Number of switches unswitched");
 STATISTIC(NumSelects , "Number of selects unswitched");
 STATISTIC(NumTrivial , "Number of unswitches that are trivial");
 STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
+STATISTIC(TotalInsts,  "Total number of instructions analyzed");
 
-// The specific value of 50 here was chosen based only on intuition and a
+// The specific value of 100 here was chosen based only on intuition and a
 // few specific examples.
 static cl::opt<unsigned>
 Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
-          cl::init(50), cl::Hidden);
-  
+          cl::init(100), cl::Hidden);
+
 namespace {
+
+  class LUAnalysisCache {
+
+    typedef DenseMap<const SwitchInst*, SmallPtrSet<const Value *, 8> >
+      UnswitchedValsMap;
+
+    typedef UnswitchedValsMap::iterator UnswitchedValsIt;
+
+    struct LoopProperties {
+      unsigned CanBeUnswitchedCount;
+      unsigned SizeEstimation;
+      UnswitchedValsMap UnswitchedVals;
+    };
+
+    // Here we use std::map instead of DenseMap, since we need to keep valid
+    // LoopProperties pointer for current loop for better performance.
+    typedef std::map<const Loop*, LoopProperties> LoopPropsMap;
+    typedef LoopPropsMap::iterator LoopPropsMapIt;
+
+    LoopPropsMap LoopsProperties;
+    UnswitchedValsMap* CurLoopInstructions;
+    LoopProperties* CurrentLoopProperties;
+
+    // Max size of code we can produce on remained iterations.
+    unsigned MaxSize;
+
+    public:
+
+      LUAnalysisCache() :
+        CurLoopInstructions(NULL), CurrentLoopProperties(NULL),
+        MaxSize(Threshold)
+      {}
+
+      // Analyze loop. Check its size, calculate is it possible to unswitch
+      // it. Returns true if we can unswitch this loop.
+      bool countLoop(const Loop* L);
+
+      // Clean all data related to given loop.
+      void forgetLoop(const Loop* L);
+
+      // Mark case value as unswitched.
+      // Since SI instruction can be partly unswitched, in order to avoid
+      // extra unswitching in cloned loops keep track all unswitched values.
+      void setUnswitched(const SwitchInst* SI, const Value* V);
+
+      // Check was this case value unswitched before or not.
+      bool isUnswitched(const SwitchInst* SI, const Value* V);
+
+      // Clone all loop-unswitch related loop properties.
+      // Redistribute unswitching quotas.
+      // Note, that new loop data is stored inside the VMap.
+      void cloneData(const Loop* NewLoop, const Loop* OldLoop,
+                     const ValueToValueMapTy& VMap);
+  };
+
   class LoopUnswitch : public LoopPass {
     LoopInfo *LI;  // Loop information
     LPPassManager *LPM;
@@ -71,8 +128,9 @@ namespace {
     // LoopProcessWorklist - Used to check if second loop needs processing
     // after RewriteLoopBodyWithConditionConstant rewrites first loop.
     std::vector<Loop*> LoopProcessWorklist;
-    SmallPtrSet<Value *,8> UnswitchedVals;
-    
+
+    LUAnalysisCache BranchesInfo;
+
     bool OptimizeForSize;
     bool redoLoop;
 
@@ -80,9 +138,9 @@ namespace {
     DominatorTree *DT;
     BasicBlock *loopHeader;
     BasicBlock *loopPreheader;
-    
+
     // LoopBlocks contains all of the basic blocks of the loop, including the
-    // preheader of the loop, the body of the loop, and the exit blocks of the 
+    // preheader of the loop, the body of the loop, and the exit blocks of the
     // loop, in that order.
     std::vector<BasicBlock*> LoopBlocks;
     // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
@@ -90,8 +148,8 @@ namespace {
 
   public:
     static char ID; // Pass ID, replacement for typeid
-    explicit LoopUnswitch(bool Os = false) : 
-      LoopPass(ID), OptimizeForSize(Os), redoLoop(false), 
+    explicit LoopUnswitch(bool Os = false) :
+      LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
       currentLoop(NULL), DT(NULL), loopHeader(NULL),
       loopPreheader(NULL) {
         initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
@@ -117,7 +175,7 @@ namespace {
   private:
 
     virtual void releaseMemory() {
-      UnswitchedVals.clear();
+      BranchesInfo.forgetLoop(currentLoop);
     }
 
     /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
@@ -147,7 +205,7 @@ namespace {
                                               Constant *Val, bool isEqual);
 
     void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
-                                        BasicBlock *TrueDest, 
+                                        BasicBlock *TrueDest,
                                         BasicBlock *FalseDest,
                                         Instruction *InsertPt);
 
@@ -160,6 +218,112 @@ namespace {
 
   };
 }
+
+// Analyze loop. Check its size, calculate is it possible to unswitch
+// it. Returns true if we can unswitch this loop.
+bool LUAnalysisCache::countLoop(const Loop* L) {
+
+  std::pair<LoopPropsMapIt, bool> InsertRes =
+      LoopsProperties.insert(std::make_pair(L, LoopProperties()));
+
+  LoopProperties& Props = InsertRes.first->second;
+
+  if (InsertRes.second) {
+    // New loop.
+
+    // Limit the number of instructions to avoid causing significant code
+    // expansion, and the number of basic blocks, to avoid loops with
+    // large numbers of branches which cause loop unswitching to go crazy.
+    // This is a very ad-hoc heuristic.
+
+    // FIXME: This is overly conservative because it does not take into
+    // consideration code simplification opportunities and code that can
+    // be shared by the resultant unswitched loops.
+    CodeMetrics Metrics;
+    for (Loop::block_iterator I = L->block_begin(),
+           E = L->block_end();
+         I != E; ++I)
+      Metrics.analyzeBasicBlock(*I);
+
+    Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
+    Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
+    MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
+  }
+
+  if (!Props.CanBeUnswitchedCount) {
+    DEBUG(dbgs() << "NOT unswitching loop %"
+          << L->getHeader()->getName() << ", cost too high: "
+          << L->getBlocks().size() << "\n");
+
+    return false;
+  }
+
+  // Be careful. This links are good only before new loop addition.
+  CurrentLoopProperties = &Props;
+  CurLoopInstructions = &Props.UnswitchedVals;
+
+  return true;
+}
+
+// Clean all data related to given loop.
+void LUAnalysisCache::forgetLoop(const Loop* L) {
+
+  LoopPropsMapIt LIt = LoopsProperties.find(L);
+
+  if (LIt != LoopsProperties.end()) {
+    LoopProperties& Props = LIt->second;
+    MaxSize += Props.CanBeUnswitchedCount * Props.SizeEstimation;
+    LoopsProperties.erase(LIt);
+  }
+
+  CurrentLoopProperties = NULL;
+  CurLoopInstructions = NULL;
+}
+
+// Mark case value as unswitched.
+// Since SI instruction can be partly unswitched, in order to avoid
+// extra unswitching in cloned loops keep track all unswitched values.
+void LUAnalysisCache::setUnswitched(const SwitchInst* SI, const Value* V) {
+  (*CurLoopInstructions)[SI].insert(V);
+}
+
+// Check was this case value unswitched before or not.
+bool LUAnalysisCache::isUnswitched(const SwitchInst* SI, const Value* V) {
+  return (*CurLoopInstructions)[SI].count(V);
+}
+
+// Clone all loop-unswitch related loop properties.
+// Redistribute unswitching quotas.
+// Note, that new loop data is stored inside the VMap.
+void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop,
+                     const ValueToValueMapTy& VMap) {
+
+  LoopProperties& NewLoopProps = LoopsProperties[NewLoop];
+  LoopProperties& OldLoopProps = *CurrentLoopProperties;
+  UnswitchedValsMap& Insts = OldLoopProps.UnswitchedVals;
+
+  // Reallocate "can-be-unswitched quota"
+
+  --OldLoopProps.CanBeUnswitchedCount;
+  unsigned Quota = OldLoopProps.CanBeUnswitchedCount;
+  NewLoopProps.CanBeUnswitchedCount = Quota / 2;
+  OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2;
+
+  NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation;
+
+  // Clone unswitched values info:
+  // for new loop switches we clone info about values that was
+  // already unswitched and has redundant successors.
+  for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); ++I) {
+    const SwitchInst* OldInst = I->first;
+    Value* NewI = VMap.lookup(OldInst);
+    const SwitchInst* NewInst = cast_or_null<SwitchInst>(NewI);
+    assert(NewInst && "All instructions that are in SrcBB must be in VMap.");
+
+    NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst];
+  }
+}
+
 char LoopUnswitch::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
                       false, false)
@@ -169,14 +333,18 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
                       false, false)
 
-Pass *llvm::createLoopUnswitchPass(bool Os) { 
-  return new LoopUnswitch(Os); 
+Pass *llvm::createLoopUnswitchPass(bool Os) {
+  return new LoopUnswitch(Os);
 }
 
 /// FindLIVLoopCondition - Cond is a condition that occurs in L.  If it is
 /// invariant in the loop, or has an invariant piece, return the invariant.
 /// Otherwise, return null.
 static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+
+  // We started analyze new instruction, increment scanned instructions counter.
+  ++TotalInsts;
+
   // We can never unswitch on vector conditions.
   if (Cond->getType()->isVectorTy())
     return 0;
@@ -201,7 +369,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
       if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
         return RHS;
     }
-  
+
   return 0;
 }
 
@@ -226,16 +394,36 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
   return Changed;
 }
 
-/// processCurrentLoop - Do actual work and unswitch loop if possible 
+/// processCurrentLoop - Do actual work and unswitch loop if possible
 /// and profitable.
 bool LoopUnswitch::processCurrentLoop() {
   bool Changed = false;
-  LLVMContext &Context = currentLoop->getHeader()->getContext();
+
+  initLoopData();
+
+  // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+  if (!loopPreheader)
+    return false;
+
+  // Loops with indirectbr cannot be cloned.
+  if (!currentLoop->isSafeToClone())
+    return false;
+
+  // Without dedicated exits, splitting the exit edge may fail.
+  if (!currentLoop->hasDedicatedExits())
+    return false;
+
+  LLVMContext &Context = loopHeader->getContext();
+
+  // Probably we reach the quota of branches for this loop. If so
+  // stop unswitching.
+  if (!BranchesInfo.countLoop(currentLoop))
+    return false;
 
   // Loop over all of the basic blocks in the loop.  If we find an interior
   // block that is branching on a loop-invariant condition, we can unswitch this
   // loop.
-  for (Loop::block_iterator I = currentLoop->block_begin(), 
+  for (Loop::block_iterator I = currentLoop->block_begin(),
          E = currentLoop->block_end(); I != E; ++I) {
     TerminatorInst *TI = (*I)->getTerminator();
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -244,24 +432,37 @@ bool LoopUnswitch::processCurrentLoop() {
       if (BI->isConditional()) {
         // See if this, or some part of it, is loop invariant.  If so, we can
         // unswitch on it if we desire.
-        Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), 
+        Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
                                                currentLoop, Changed);
-        if (LoopCond && UnswitchIfProfitable(LoopCond, 
+        if (LoopCond && UnswitchIfProfitable(LoopCond,
                                              ConstantInt::getTrue(Context))) {
           ++NumBranches;
           return true;
         }
-      }      
+      }
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
-      Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 
+      Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
                                              currentLoop, Changed);
-      if (LoopCond && SI->getNumCases() > 1) {
+      unsigned NumCases = SI->getNumCases();
+      if (LoopCond && NumCases) {
         // Find a value to unswitch on:
         // FIXME: this should chose the most expensive case!
         // FIXME: scan for a case with a non-critical edge?
-        Constant *UnswitchVal = SI->getCaseValue(1);
+        Constant *UnswitchVal = NULL;
+
         // Do not process same value again and again.
-        if (!UnswitchedVals.insert(UnswitchVal))
+        // At this point we have some cases already unswitched and
+        // some not yet unswitched. Let's find the first not yet unswitched one.
+        for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+             i != e; ++i) {
+          Constant* UnswitchValCandidate = i.getCaseValue();
+          if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) {
+            UnswitchVal = UnswitchValCandidate;
+            break;
+          }
+        }
+
+        if (!UnswitchVal)
           continue;
 
         if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
@@ -270,14 +471,14 @@ bool LoopUnswitch::processCurrentLoop() {
         }
       }
     }
-    
+
     // Scan the instructions to check for unswitchable values.
-    for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); 
+    for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end();
          BBI != E; ++BBI)
       if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
-        Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 
+        Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
                                                currentLoop, Changed);
-        if (LoopCond && UnswitchIfProfitable(LoopCond, 
+        if (LoopCond && UnswitchIfProfitable(LoopCond,
                                              ConstantInt::getTrue(Context))) {
           ++NumSelects;
           return true;
@@ -297,7 +498,8 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
                                          BasicBlock *&ExitBB,
                                          std::set<BasicBlock*> &Visited) {
   if (!Visited.insert(BB).second) {
-    // Already visited. Without more analysis, this could indicate an infinte loop.
+    // Already visited. Without more analysis, this could indicate an infinite
+    // loop.
     return false;
   } else if (!L->contains(BB)) {
     // Otherwise, this is a loop exit, this is fine so long as this is the
@@ -306,7 +508,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
     ExitBB = BB;
     return true;
   }
-  
+
   // Otherwise, this is an unvisited intra-loop node.  Check all successors.
   for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
     // Check to see if the successor is a trivial loop exit.
@@ -319,12 +521,12 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
     if (I->mayHaveSideEffects())
       return false;
-  
+
   return true;
 }
 
 /// isTrivialLoopExitBlock - Return true if the specified block unconditionally
-/// leads to an exit from the specified loop, and has no side-effects in the 
+/// leads to an exit from the specified loop, and has no side-effects in the
 /// process.  If so, return the block that is exited to, otherwise return null.
 static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
   std::set<BasicBlock*> Visited;
@@ -352,49 +554,61 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
   BasicBlock *Header = currentLoop->getHeader();
   TerminatorInst *HeaderTerm = Header->getTerminator();
   LLVMContext &Context = Header->getContext();
-  
+
   BasicBlock *LoopExitBB = 0;
   if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
     // If the header block doesn't end with a conditional branch on Cond, we
     // can't handle it.
     if (!BI->isConditional() || BI->getCondition() != Cond)
       return false;
-  
-    // Check to see if a successor of the branch is guaranteed to 
-    // exit through a unique exit block without having any 
+
+    // Check to see if a successor of the branch is guaranteed to
+    // exit through a unique exit block without having any
     // side-effects.  If so, determine the value of Cond that causes it to do
     // this.
-    if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
+    if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
                                              BI->getSuccessor(0)))) {
       if (Val) *Val = ConstantInt::getTrue(Context);
-    } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
+    } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
                                                     BI->getSuccessor(1)))) {
       if (Val) *Val = ConstantInt::getFalse(Context);
     }
   } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
     // If this isn't a switch on Cond, we can't handle it.
     if (SI->getCondition() != Cond) return false;
-    
+
     // Check to see if a successor of the switch is guaranteed to go to the
-    // latch block or exit through a one exit block without having any 
+    // latch block or exit through a one exit block without having any
     // side-effects.  If so, determine the value of Cond that causes it to do
-    // this.  Note that we can't trivially unswitch on the default case.
-    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
-      if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
-                                               SI->getSuccessor(i)))) {
+    // this.
+    // Note that we can't trivially unswitch on the default case or
+    // on already unswitched cases.
+    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+         i != e; ++i) {
+      BasicBlock* LoopExitCandidate;
+      if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
+                                               i.getCaseSuccessor()))) {
         // Okay, we found a trivial case, remember the value that is trivial.
-        if (Val) *Val = SI->getCaseValue(i);
+        ConstantInt* CaseVal = i.getCaseValue();
+
+        // Check that it was not unswitched before, since already unswitched
+        // trivial vals are looks trivial too.
+        if (BranchesInfo.isUnswitched(SI, CaseVal))
+          continue;
+        LoopExitBB = LoopExitCandidate;
+        if (Val) *Val = CaseVal;
         break;
       }
+    }
   }
 
   // If we didn't find a single unique LoopExit block, or if the loop exit block
   // contains phi nodes, this isn't trivial.
   if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
     return false;   // Can't handle this.
-  
+
   if (LoopExit) *LoopExit = LoopExitBB;
-  
+
   // We already know that nothing uses any scalar values defined inside of this
   // loop.  As such, we just have to check to see if this loop will execute any
   // side-effecting instructions (e.g. stores, calls, volatile loads) in the
@@ -411,12 +625,6 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
 /// unswitch the loop, reprocess the pieces, then return true.
 bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
 
-  initLoopData();
-
-  // If LoopSimplify was unable to form a preheader, don't do any unswitching.
-  if (!loopPreheader)
-    return false;
-
   Function *F = loopHeader->getParent();
 
   Constant *CondVal = 0;
@@ -434,28 +642,6 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
   if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
     return false;
 
-  // FIXME: This is overly conservative because it does not take into
-  // consideration code simplification opportunities and code that can
-  // be shared by the resultant unswitched loops.
-  CodeMetrics Metrics;
-  for (Loop::block_iterator I = currentLoop->block_begin(), 
-         E = currentLoop->block_end();
-       I != E; ++I)
-    Metrics.analyzeBasicBlock(*I);
-
-  // Limit the number of instructions to avoid causing significant code
-  // expansion, and the number of basic blocks, to avoid loops with
-  // large numbers of branches which cause loop unswitching to go crazy.
-  // This is a very ad-hoc heuristic.
-  if (Metrics.NumInsts > Threshold ||
-      Metrics.NumBlocks * 5 > Threshold ||
-      Metrics.containsIndirectBr || Metrics.isRecursive) {
-    DEBUG(dbgs() << "NOT unswitching loop %"
-          << currentLoop->getHeader()->getName() << ", cost too high: "
-          << currentLoop->getBlocks().size() << "\n");
-    return false;
-  }
-
   UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
   return true;
 }
@@ -508,17 +694,17 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
 
 /// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
 /// condition in it (a cond branch from its header block to its latch block,
-/// where the path through the loop that doesn't execute its body has no 
+/// where the path through the loop that doesn't execute its body has no
 /// side-effects), unswitch it.  This doesn't involve any code duplication, just
 /// moving the conditional branch outside of the loop and updating loop info.
-void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, 
-                                            Constant *Val, 
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
+                                            Constant *Val,
                                             BasicBlock *ExitBlock) {
   DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
         << loopHeader->getName() << " [" << L->getBlocks().size()
         << " blocks] in Function " << L->getHeader()->getParent()->getName()
         << " on cond: " << *Val << " == " << *Cond << "\n");
-  
+
   // First step, split the preheader, so that we know that there is a safe place
   // to insert the conditional branch.  We will change loopPreheader to have a
   // conditional branch on Cond.
@@ -527,24 +713,24 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
   // Now that we have a place to insert the conditional branch, create a place
   // to branch to: this is the exit block out of the loop that we should
   // short-circuit to.
-  
+
   // Split this block now, so that the loop maintains its exit block, and so
   // that the jump from the preheader can execute the contents of the exit block
   // without actually branching to it (the exit block should be dominated by the
   // loop header, not the preheader).
   assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
   BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
-    
-  // Okay, now we have a position to branch from and a position to branch to, 
+
+  // Okay, now we have a position to branch from and a position to branch to,
   // insert the new conditional branch.
-  EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, 
+  EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH,
                                  loopPreheader->getTerminator());
   LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
   loopPreheader->getTerminator()->eraseFromParent();
 
   // We need to reprocess this loop, it could be unswitched again.
   redoLoop = true;
-  
+
   // Now that we know that the loop is never entered when this condition is a
   // particular value, rewrite the loop with this info.  We know that this will
   // at least eliminate the old branch.
@@ -554,7 +740,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond,
 
 /// SplitExitEdges - Split all of the edges from inside the loop to their exit
 /// blocks.  Update the appropriate Phi nodes as we do so.
-void LoopUnswitch::SplitExitEdges(Loop *L, 
+void LoopUnswitch::SplitExitEdges(Loop *L,
                                 const SmallVector<BasicBlock *, 8> &ExitBlocks){
 
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
@@ -565,8 +751,7 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
     // Although SplitBlockPredecessors doesn't preserve loop-simplify in
     // general, if we call it on all predecessors of all exits then it does.
     if (!ExitBlock->isLandingPad()) {
-      SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
-                             ".us-lcssa", this);
+      SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", this);
     } else {
       SmallVector<BasicBlock*, 2> NewBBs;
       SplitLandingPadPredecessors(ExitBlock, Preds, ".us-lcssa", ".us-lcssa",
@@ -575,10 +760,10 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
   }
 }
 
-/// UnswitchNontrivialCondition - We determined that the loop is profitable 
-/// to unswitch when LIC equal Val.  Split it into loop versions and test the 
+/// UnswitchNontrivialCondition - We determined that the loop is profitable
+/// to unswitch when LIC equal Val.  Split it into loop versions and test the
 /// condition outside of either loop.  Return the loops created as Out1/Out2.
-void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, 
+void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
                                                Loop *L) {
   Function *F = loopHeader->getParent();
   DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
@@ -621,6 +806,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   ValueToValueMapTy VMap;
   for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
     BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+
     NewBlocks.push_back(NewBB);
     VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
     LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
@@ -633,6 +819,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
 
   // Now we create the new Loop object for the versioned loop.
   Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
+
+  // Recalculate unswitching quota, inherit simplified switches info for NewBB,
+  // Probably clone more loop-unswitch related loop properties.
+  BranchesInfo.cloneData(NewLoop, L, VMap);
+
   Loop *ParentLoop = L->getParentLoop();
   if (ParentLoop) {
     // Make sure to add the cloned preheader and exit blocks to the parent loop
@@ -645,7 +836,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
     // The new exit block should be in the same loop as the old one.
     if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
       ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
-    
+
     assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
            "Exit block should have been split to have one successor!");
     BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
@@ -680,7 +871,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
     for (BasicBlock::iterator I = NewBlocks[i]->begin(),
            E = NewBlocks[i]->end(); I != E; ++I)
       RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
-  
+
   // Rewrite the original preheader to select between versions of the loop.
   BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
   assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
@@ -699,7 +890,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   // the condition that we're unswitching on), we don't rewrite the second
   // iteration.
   WeakVH LICHandle(LIC);
-  
+
   // Now we rewrite the original code to know that the condition is true and the
   // new code to know that the condition is false.
   RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
@@ -714,7 +905,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
 
 /// RemoveFromWorklist - Remove all instances of I from the worklist vector
 /// specified.
-static void RemoveFromWorklist(Instruction *I, 
+static void RemoveFromWorklist(Instruction *I,
                                std::vector<Instruction*> &Worklist) {
   std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
                                                      Worklist.end(), I);
@@ -727,7 +918,7 @@ static void RemoveFromWorklist(Instruction *I,
 
 /// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
 /// program, replacing all uses with V and update the worklist.
-static void ReplaceUsesOfWith(Instruction *I, Value *V, 
+static void ReplaceUsesOfWith(Instruction *I, Value *V,
                               std::vector<Instruction*> &Worklist,
                               Loop *L, LPPassManager *LPM) {
   DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
@@ -760,10 +951,10 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
     if (BasicBlock *Pred = BB->getSinglePredecessor()) {
       // If it has one pred, fold phi nodes in BB.
       while (isa<PHINode>(BB->begin()))
-        ReplaceUsesOfWith(BB->begin(), 
-                          cast<PHINode>(BB->begin())->getIncomingValue(0), 
+        ReplaceUsesOfWith(BB->begin(),
+                          cast<PHINode>(BB->begin())->getIncomingValue(0),
                           Worklist, L, LPM);
-      
+
       // If this is the header of a loop and the only pred is the latch, we now
       // have an unreachable loop.
       if (Loop *L = LI->getLoopFor(BB))
@@ -774,15 +965,15 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
           LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
           Pred->getTerminator()->eraseFromParent();
           new UnreachableInst(BB->getContext(), Pred);
-          
+
           // The loop is now broken, remove it from LI.
           RemoveLoopFromHierarchy(L);
-          
+
           // Reprocess the header, which now IS dead.
           RemoveBlockIfDead(BB, Worklist, L);
           return;
         }
-      
+
       // If pred ends in a uncond branch, add uncond branch to worklist so that
       // the two blocks will get merged.
       if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
@@ -793,11 +984,11 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
   }
 
   DEBUG(dbgs() << "Nuking dead block: " << *BB);
-  
+
   // Remove the instructions in the basic block from the worklist.
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     RemoveFromWorklist(I, Worklist);
-    
+
     // Anything that uses the instructions in this basic block should have their
     // uses replaced with undefs.
     // If I is not void type then replaceAllUsesWith undef.
@@ -805,7 +996,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
     if (!I->getType()->isVoidTy())
       I->replaceAllUsesWith(UndefValue::get(I->getType()));
   }
-  
+
   // If this is the edge to the header block for a loop, remove the loop and
   // promote all subloops.
   if (Loop *BBLoop = LI->getLoopFor(BB)) {
@@ -821,8 +1012,8 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
   // Remove the block from the loop info, which removes it from any loops it
   // was in.
   LI->removeBlock(BB);
-  
-  
+
+
   // Remove phi node entries in successors for this block.
   TerminatorInst *TI = BB->getTerminator();
   SmallVector<BasicBlock*, 4> Succs;
@@ -830,13 +1021,13 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
     Succs.push_back(TI->getSuccessor(i));
     TI->getSuccessor(i)->removePredecessor(BB);
   }
-  
+
   // Unique the successors, remove anything with multiple uses.
   array_pod_sort(Succs.begin(), Succs.end());
   Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
-  
+
   // Remove the basic block, including all of the instructions contained in it.
-  LPM->deleteSimpleAnalysisValue(BB, L);  
+  LPM->deleteSimpleAnalysisValue(BB, L);
   BB->eraseFromParent();
   // Remove successor blocks here that are not dead, so that we know we only
   // have dead blocks in this list.  Nondead blocks have a way of becoming dead,
@@ -854,7 +1045,7 @@ void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
         --i;
       }
     }
-  
+
   for (unsigned i = 0, e = Succs.size(); i != e; ++i)
     RemoveBlockIfDead(Succs[i], Worklist, L);
 }
@@ -877,14 +1068,14 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
                                                         Constant *Val,
                                                         bool IsEqual) {
   assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
-  
+
   // FIXME: Support correlated properties, like:
   //  for (...)
   //    if (li1 < li2)
   //      ...
   //    if (li1 > li2)
   //      ...
-  
+
   // FOLD boolean conditions (X|LIC), (X&LIC).  Fold conditional branches,
   // selects, switches.
   std::vector<Instruction*> Worklist;
@@ -899,21 +1090,25 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
     if (IsEqual)
       Replacement = Val;
     else
-      Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), 
+      Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()),
                                      !cast<ConstantInt>(Val)->getZExtValue());
-    
+
     for (Value::use_iterator UI = LIC->use_begin(), E = LIC->use_end();
          UI != E; ++UI) {
       Instruction *U = dyn_cast<Instruction>(*UI);
       if (!U || !L->contains(U))
         continue;
-      U->replaceUsesOfWith(LIC, Replacement);
       Worklist.push_back(U);
     }
+
+    for (std::vector<Instruction*>::iterator UI = Worklist.begin();
+         UI != Worklist.end(); ++UI)
+      (*UI)->replaceUsesOfWith(LIC, Replacement);
+
     SimplifyCode(Worklist, L);
     return;
   }
-  
+
   // Otherwise, we don't know the precise value of LIC, but we do know that it
   // is certainly NOT "Val".  As such, simplify any uses in the loop that we
   // can.  This case occurs when we unswitch switch statements.
@@ -925,23 +1120,27 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
 
     Worklist.push_back(U);
 
-    // TODO: We could do other simplifications, for example, turning 
+    // TODO: We could do other simplifications, for example, turning
     // 'icmp eq LIC, Val' -> false.
 
     // If we know that LIC is not Val, use this info to simplify code.
     SwitchInst *SI = dyn_cast<SwitchInst>(U);
     if (SI == 0 || !isa<ConstantInt>(Val)) continue;
-    
-    unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
-    if (DeadCase == 0) continue;  // Default case is live for multiple values.
-    
-    // Found a dead case value.  Don't remove PHI nodes in the 
+
+    SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+    // Default case is live for multiple values.
+    if (DeadCase == SI->case_default()) continue;
+
+    // Found a dead case value.  Don't remove PHI nodes in the
     // successor if they become single-entry, those PHI nodes may
     // be in the Users list.
 
     BasicBlock *Switch = SI->getParent();
-    BasicBlock *SISucc = SI->getSuccessor(DeadCase);
+    BasicBlock *SISucc = DeadCase.getCaseSuccessor();
     BasicBlock *Latch = L->getLoopLatch();
+
+    BranchesInfo.setUnswitched(SI, Val);
+
     if (!SI->findCaseDest(SISucc)) continue;  // Edge is critical.
     // If the DeadCase successor dominates the loop latch, then the
     // transformation isn't safe since it will delete the sole predecessor edge
@@ -957,7 +1156,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
     // Compute the successors instead of relying on the return value
     // of SplitEdge, since it may have split the switch successor
     // after PHI nodes.
-    BasicBlock *NewSISucc = SI->getSuccessor(DeadCase);
+    BasicBlock *NewSISucc = DeadCase.getCaseSuccessor();
     BasicBlock *OldSISucc = *succ_begin(NewSISucc);
     // Create an "unreachable" destination.
     BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
@@ -981,7 +1180,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
     if (DT)
       DT->addNewBlock(Abort, NewSISucc);
   }
-  
+
   SimplifyCode(Worklist, L);
 }
 
@@ -1002,7 +1201,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     // Simple DCE.
     if (isInstructionTriviallyDead(I)) {
       DEBUG(dbgs() << "Remove dead instruction '" << *I);
-      
+
       // Add uses to the worklist, which may be dead now.
       for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
         if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
@@ -1017,7 +1216,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
     // See if instruction simplification can hack this up.  This is common for
     // things like "select false, X, Y" after unswitching made the condition be
     // 'false'.
-    if (Value *V = SimplifyInstruction(I, 0, DT))
+    if (Value *V = SimplifyInstruction(I, 0, 0, DT))
       if (LI->replacementPreservesLCSSAForm(I, V)) {
         ReplaceUsesOfWith(I, V, Worklist, L, LPM);
         continue;
@@ -1034,24 +1233,24 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         if (!SinglePred) continue;  // Nothing to do.
         assert(SinglePred == Pred && "CFG broken");
 
-        DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " 
+        DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- "
               << Succ->getName() << "\n");
-        
+
         // Resolve any single entry PHI nodes in Succ.
         while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
           ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
-        
+
         // If Succ has any successors with PHI nodes, update them to have
         // entries coming from Pred instead of Succ.
         Succ->replaceAllUsesWith(Pred);
-        
+
         // Move all of the successor contents from Succ to Pred.
         Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
                                    Succ->end());
         LPM->deleteSimpleAnalysisValue(BI, L);
         BI->eraseFromParent();
         RemoveFromWorklist(BI, Worklist);
-        
+
         // Remove Succ from the loop tree.
         LI->removeBlock(Succ);
         LPM->deleteSimpleAnalysisValue(Succ, L);
@@ -1059,7 +1258,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         ++NumSimplify;
         continue;
       }
-      
+
       if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
         // Conditional branch.  Turn it into an unconditional branch, then
         // remove dead blocks.
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index eeb8931446dc..a87cce3f9d3e 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -147,8 +147,8 @@ struct MemsetRange {
 } // end anon namespace
 
 bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
-  // If we found more than 8 stores to merge or 64 bytes, use memset.
-  if (TheStores.size() >= 8 || End-Start >= 64) return true;
+  // If we found more than 4 stores to merge or 16 bytes, use memset.
+  if (TheStores.size() >= 4 || End-Start >= 16) return true;
 
   // If there is nothing to merge, don't do anything.
   if (TheStores.size() < 2) return false;
@@ -806,21 +806,25 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
   //   a) memcpy-memcpy xform which exposes redundance for DSE.
   //   b) call-memcpy xform for return slot optimization.
   MemDepResult DepInfo = MD->getDependency(M);
-  if (!DepInfo.isClobber())
-    return false;
-  
-  if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
-    return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
-    
-  if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
-    if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
-                             CopySize->getZExtValue(), C)) {
-      MD->removeInstruction(M);
-      M->eraseFromParent();
-      return true;
+  if (DepInfo.isClobber()) {
+    if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+      if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+                               CopySize->getZExtValue(), C)) {
+        MD->removeInstruction(M);
+        M->eraseFromParent();
+        return true;
+      }
     }
   }
-  
+
+  AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M);
+  MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
+                                                         M, M->getParent());
+  if (SrcDepInfo.isClobber()) {
+    if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
+      return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+  }
+
   return false;
 }
 
@@ -945,7 +949,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
         RepeatInstruction = processMemMove(M);
       else if (CallSite CS = (Value*)I) {
         for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
-          if (CS.paramHasAttr(i+1, Attribute::ByVal))
+          if (CS.isByValArgument(i))
             MadeChange |= processByValArgument(CS, i);
       }
 
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index da74e9c3ec79..40b0b2061689 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -88,13 +88,14 @@ namespace {
     }
 #endif
 
-    ValueT &operator[](KeyT Arg) {
+    ValueT &operator[](const KeyT &Arg) {
       std::pair<typename MapTy::iterator, bool> Pair =
         Map.insert(std::make_pair(Arg, size_t(0)));
       if (Pair.second) {
-        Pair.first->second = Vector.size();
+        size_t Num = Vector.size();
+        Pair.first->second = Num;
         Vector.push_back(std::make_pair(Arg, ValueT()));
-        return Vector.back().second;
+        return Vector[Num].second;
       }
       return Vector[Pair.first->second].second;
     }
@@ -104,14 +105,15 @@ namespace {
       std::pair<typename MapTy::iterator, bool> Pair =
         Map.insert(std::make_pair(InsertPair.first, size_t(0)));
       if (Pair.second) {
-        Pair.first->second = Vector.size();
+        size_t Num = Vector.size();
+        Pair.first->second = Num;
         Vector.push_back(InsertPair);
-        return std::make_pair(llvm::prior(Vector.end()), true);
+        return std::make_pair(Vector.begin() + Num, true);
       }
       return std::make_pair(Vector.begin() + Pair.first->second, false);
     }
 
-    const_iterator find(KeyT Key) const {
+    const_iterator find(const KeyT &Key) const {
       typename MapTy::const_iterator It = Map.find(Key);
       if (It == Map.end()) return Vector.end();
       return Vector.begin() + It->second;
@@ -121,7 +123,7 @@ namespace {
     /// from the vector, it just zeros out the key in the vector. This leaves
     /// iterators intact, but clients must be prepared for zeroed-out keys when
     /// iterating.
-    void blot(KeyT Key) {
+    void blot(const KeyT &Key) {
       typename MapTy::iterator It = Map.find(Key);
       if (It == Map.end()) return;
       Vector[It->second].first = KeyT();
@@ -179,9 +181,13 @@ static bool IsPotentialUse(const Value *Op) {
         Arg->hasNestAttr() ||
         Arg->hasStructRetAttr())
       return false;
-  // Only consider values with pointer types, and not function pointers.
+  // Only consider values with pointer types.
+  // It seemes intuitive to exclude function pointer types as well, since
+  // functions are never reference-counted, however clang occasionally
+  // bitcasts reference-counted pointers to function-pointer type
+  // temporarily.
   PointerType *Ty = dyn_cast<PointerType>(Op->getType());
-  if (!Ty || isa<FunctionType>(Ty->getElementType()))
+  if (!Ty)
     return false;
   // Conservatively assume anything else is a potential use.
   return true;
@@ -371,7 +377,7 @@ static InstructionClass GetBasicInstructionClass(const Value *V) {
   }
 
   // Otherwise, be conservative.
-  return IC_User;
+  return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
 }
 
 /// IsRetain - Test if the the given class is objc_retain or
@@ -597,6 +603,46 @@ static bool ModuleHasARC(const Module &M) {
     M.getNamedValue("objc_unretainedPointer");
 }
 
+/// DoesObjCBlockEscape - Test whether the given pointer, which is an
+/// Objective C block pointer, does not "escape". This differs from regular
+/// escape analysis in that a use as an argument to a call is not considered
+/// an escape.
+static bool DoesObjCBlockEscape(const Value *BlockPtr) {
+  // Walk the def-use chains.
+  SmallVector<const Value *, 4> Worklist;
+  Worklist.push_back(BlockPtr);
+  do {
+    const Value *V = Worklist.pop_back_val();
+    for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+         UI != UE; ++UI) {
+      const User *UUser = *UI;
+      // Special - Use by a call (callee or argument) is not considered
+      // to be an escape.
+      if (isa<CallInst>(UUser) || isa<InvokeInst>(UUser))
+        continue;
+      // Use by an instruction which copies the value is an escape if the
+      // result is an escape.
+      if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
+          isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
+        Worklist.push_back(UUser);
+        continue;
+      }
+      // Use by a load is not an escape.
+      if (isa<LoadInst>(UUser))
+        continue;
+      // Use by a store is not an escape if the use is the address.
+      if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
+        if (V != SI->getValueOperand())
+          continue;
+      // Otherwise, conservatively assume an escape.
+      return true;
+    }
+  } while (!Worklist.empty());
+
+  // No escapes found.
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // ARC AliasAnalysis.
 //===----------------------------------------------------------------------===//
@@ -850,6 +896,139 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
 }
 
 //===----------------------------------------------------------------------===//
+// ARC autorelease pool elimination.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+
+namespace {
+  /// ObjCARCAPElim - Autorelease pool elimination.
+  class ObjCARCAPElim : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnModule(Module &M);
+
+    bool MayAutorelease(CallSite CS, unsigned Depth = 0);
+    bool OptimizeBB(BasicBlock *BB);
+
+  public:
+    static char ID;
+    ObjCARCAPElim() : ModulePass(ID) {
+      initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCAPElim::ID = 0;
+INITIALIZE_PASS(ObjCARCAPElim,
+                "objc-arc-apelim",
+                "ObjC ARC autorelease pool elimination",
+                false, false)
+
+Pass *llvm::createObjCARCAPElimPass() {
+  return new ObjCARCAPElim();
+}
+
+void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+}
+
+/// MayAutorelease - Interprocedurally determine if calls made by the
+/// given call site can possibly produce autoreleases.
+bool ObjCARCAPElim::MayAutorelease(CallSite CS, unsigned Depth) {
+  if (Function *Callee = CS.getCalledFunction()) {
+    if (Callee->isDeclaration() || Callee->mayBeOverridden())
+      return true;
+    for (Function::iterator I = Callee->begin(), E = Callee->end();
+         I != E; ++I) {
+      BasicBlock *BB = I;
+      for (BasicBlock::iterator J = BB->begin(), F = BB->end(); J != F; ++J)
+        if (CallSite JCS = CallSite(J))
+          // This recursion depth limit is arbitrary. It's just great
+          // enough to cover known interesting testcases.
+          if (Depth < 3 &&
+              !JCS.onlyReadsMemory() &&
+              MayAutorelease(JCS, Depth + 1))
+            return true;
+    }
+    return false;
+  }
+
+  return true;
+}
+
+bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
+  bool Changed = false;
+
+  Instruction *Push = 0;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+    Instruction *Inst = I++;
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_AutoreleasepoolPush:
+      Push = Inst;
+      break;
+    case IC_AutoreleasepoolPop:
+      // If this pop matches a push and nothing in between can autorelease,
+      // zap the pair.
+      if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
+        Changed = true;
+        Inst->eraseFromParent();
+        Push->eraseFromParent();
+      }
+      Push = 0;
+      break;
+    case IC_CallOrUser:
+      if (MayAutorelease(CallSite(Inst)))
+        Push = 0;
+      break;
+    default:
+      break;
+    }
+  }
+
+  return Changed;
+}
+
+bool ObjCARCAPElim::runOnModule(Module &M) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!ModuleHasARC(M))
+    return false;
+
+  // Find the llvm.global_ctors variable, as the first step in
+  // identifying the global constructors.
+  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+  if (!GV)
+    return false;
+
+  assert(GV->hasDefinitiveInitializer() &&
+         "llvm.global_ctors is uncooperative!");
+
+  bool Changed = false;
+
+  // Dig the constructor functions out of GV's initializer.
+  ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
+  for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
+       OI != OE; ++OI) {
+    Value *Op = *OI;
+    // llvm.global_ctors is an array of pairs where the second members
+    // are constructor functions.
+    Function *F = cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
+    // Only look at function definitions.
+    if (F->isDeclaration())
+      continue;
+    // Only look at functions with one basic block.
+    if (llvm::next(F->begin()) != F->end())
+      continue;
+    // Ok, a single-block constructor function definition. Try to optimize it.
+    Changed |= OptimizeBB(F->begin());
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
 // ARC optimization.
 //===----------------------------------------------------------------------===//
 
@@ -896,8 +1075,9 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseSet.h"
 
 STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
@@ -1158,6 +1338,12 @@ namespace {
     /// with the "tail" keyword.
     bool IsTailCallRelease;
 
+    /// Partial - True of we've seen an opportunity for partial RR elimination,
+    /// such as pushing calls into a CFG triangle or into one side of a
+    /// CFG diamond.
+    /// TODO: Consider moving this to PtrState.
+    bool Partial;
+
     /// ReleaseMetadata - If the Calls are objc_release calls and they all have
     /// a clang.imprecise_release tag, this is the metadata tag.
     MDNode *ReleaseMetadata;
@@ -1171,7 +1357,8 @@ namespace {
     SmallPtrSet<Instruction *, 2> ReverseInsertPts;
 
     RRInfo() :
-      KnownSafe(false), IsRetainBlock(false), IsTailCallRelease(false),
+      KnownSafe(false), IsRetainBlock(false),
+      IsTailCallRelease(false), Partial(false),
       ReleaseMetadata(0) {}
 
     void clear();
@@ -1182,6 +1369,7 @@ void RRInfo::clear() {
   KnownSafe = false;
   IsRetainBlock = false;
   IsTailCallRelease = false;
+  Partial = false;
   ReleaseMetadata = 0;
   Calls.clear();
   ReverseInsertPts.clear();
@@ -1239,16 +1427,6 @@ namespace {
       Seq = NewSeq;
     }
 
-    void SetSeqToRelease(MDNode *M) {
-      if (Seq == S_None || Seq == S_Use) {
-        Seq = M ? S_MovableRelease : S_Release;
-        RRI.ReleaseMetadata = M;
-      } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
-        Seq = S_Release;
-        RRI.ReleaseMetadata = 0;
-      }
-    }
-
     Sequence GetSeq() const {
       return Seq;
     }
@@ -1272,8 +1450,16 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
   if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
     Seq = S_None;
 
+  // If we're not in a sequence (anymore), drop all associated state.
   if (Seq == S_None) {
     RRI.clear();
+  } else if (RRI.Partial || Other.RRI.Partial) {
+    // If we're doing a merge on a path that's previously seen a partial
+    // merge, conservatively drop the sequence, to avoid doing partial
+    // RR elimination. If the branch predicates for the two merge differ,
+    // mixing them is unsafe.
+    Seq = S_None;
+    RRI.clear();
   } else {
     // Conservatively merge the ReleaseMetadata information.
     if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
@@ -1282,8 +1468,15 @@ PtrState::Merge(const PtrState &Other, bool TopDown) {
     RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
     RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
     RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
-    RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
-                                Other.RRI.ReverseInsertPts.end());
+
+    // Merge the insert point sets. If there are any differences,
+    // that makes this a partial merge.
+    RRI.Partial = RRI.ReverseInsertPts.size() !=
+                  Other.RRI.ReverseInsertPts.size();
+    for (SmallPtrSet<Instruction *, 2>::const_iterator
+         I = Other.RRI.ReverseInsertPts.begin(),
+         E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
+      RRI.Partial |= RRI.ReverseInsertPts.insert(*I);
   }
 }
 
@@ -1460,6 +1653,14 @@ namespace {
     /// metadata.
     unsigned ImpreciseReleaseMDKind;
 
+    /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape
+    /// metadata.
+    unsigned CopyOnEscapeMDKind;
+
+    /// NoObjCARCExceptionsMDKind - The Metadata Kind for
+    /// clang.arc.no_objc_arc_exceptions metadata.
+    unsigned NoObjCARCExceptionsMDKind;
+
     Constant *getRetainRVCallee(Module *M);
     Constant *getAutoreleaseRVCallee(Module *M);
     Constant *getReleaseCallee(Module *M);
@@ -1467,6 +1668,8 @@ namespace {
     Constant *getRetainBlockCallee(Module *M);
     Constant *getAutoreleaseCallee(Module *M);
 
+    bool IsRetainBlockOptimizable(const Instruction *Inst);
+
     void OptimizeRetainCall(Function &F, Instruction *Retain);
     bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
     void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
@@ -1475,9 +1678,16 @@ namespace {
     void CheckForCFGHazards(const BasicBlock *BB,
                             DenseMap<const BasicBlock *, BBState> &BBStates,
                             BBState &MyStates) const;
+    bool VisitInstructionBottomUp(Instruction *Inst,
+                                  BasicBlock *BB,
+                                  MapVector<Value *, RRInfo> &Retains,
+                                  BBState &MyStates);
     bool VisitBottomUp(BasicBlock *BB,
                        DenseMap<const BasicBlock *, BBState> &BBStates,
                        MapVector<Value *, RRInfo> &Retains);
+    bool VisitInstructionTopDown(Instruction *Inst,
+                                 DenseMap<Value *, RRInfo> &Releases,
+                                 BBState &MyStates);
     bool VisitTopDown(BasicBlock *BB,
                       DenseMap<const BasicBlock *, BBState> &BBStates,
                       DenseMap<Value *, RRInfo> &Releases);
@@ -1534,6 +1744,22 @@ void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
 }
 
+bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
+  // Without the magic metadata tag, we have to assume this might be an
+  // objc_retainBlock call inserted to convert a block pointer to an id,
+  // in which case it really is needed.
+  if (!Inst->getMetadata(CopyOnEscapeMDKind))
+    return false;
+
+  // If the pointer "escapes" (not including being used in a call),
+  // the copy may be needed.
+  if (DoesObjCBlockEscape(Inst))
+    return false;
+
+  // Otherwise, it's not needed.
+  return true;
+}
+
 Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
   if (!RetainRVCallee) {
     LLVMContext &C = M->getContext();
@@ -1737,6 +1963,7 @@ namespace {
   /// use here.
   enum DependenceKind {
     NeedsPositiveRetainCount,
+    AutoreleasePoolBoundary,
     CanChangeRetainCount,
     RetainAutoreleaseDep,       ///< Blocks objc_retainAutorelease.
     RetainAutoreleaseRVDep,     ///< Blocks objc_retainAutoreleaseReturnValue.
@@ -1766,6 +1993,19 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
     }
   }
 
+  case AutoreleasePoolBoundary: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+    case IC_AutoreleasepoolPush:
+      // These mark the end and begin of an autorelease pool scope.
+      return true;
+    default:
+      // Nothing else does this.
+      return false;
+    }
+  }
+
   case CanChangeRetainCount: {
     InstructionClass Class = GetInstructionClass(Inst);
     switch (Class) {
@@ -1783,6 +2023,7 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
   case RetainAutoreleaseDep:
     switch (GetBasicInstructionClass(Inst)) {
     case IC_AutoreleasepoolPop:
+    case IC_AutoreleasepoolPush:
       // Don't merge an objc_autorelease with an objc_retain inside a different
       // autoreleasepool scope.
       return true;
@@ -1794,7 +2035,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
       // Nothing else matters for objc_retainAutorelease formation.
       return false;
     }
-    break;
 
   case RetainAutoreleaseRVDep: {
     InstructionClass Class = GetBasicInstructionClass(Inst);
@@ -1808,7 +2048,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
       // retainAutoreleaseReturnValue formation.
       return CanInterruptRV(Class);
     }
-    break;
   }
 
   case RetainRVDep:
@@ -1816,7 +2055,6 @@ Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
   }
 
   llvm_unreachable("Invalid dependence flavor");
-  return true;
 }
 
 /// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
@@ -1920,17 +2158,26 @@ ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
 /// return true.
 bool
 ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
-  // Check for the argument being from an immediately preceding call.
+  // Check for the argument being from an immediately preceding call or invoke.
   Value *Arg = GetObjCArg(RetainRV);
   CallSite CS(Arg);
-  if (Instruction *Call = CS.getInstruction())
+  if (Instruction *Call = CS.getInstruction()) {
     if (Call->getParent() == RetainRV->getParent()) {
       BasicBlock::iterator I = Call;
       ++I;
       while (isNoopInstruction(I)) ++I;
       if (&*I == RetainRV)
         return false;
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      BasicBlock *RetainRVParent = RetainRV->getParent();
+      if (II->getNormalDest() == RetainRVParent) {
+        BasicBlock::iterator I = RetainRVParent->begin();
+        while (isNoopInstruction(I)) ++I;
+        if (&*I == RetainRV)
+          return false;
+      }
     }
+  }
 
   // Check for being preceded by an objc_autoreleaseReturnValue on the same
   // pointer. In this case, we can delete the pair.
@@ -2144,9 +2391,34 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
 
         // Check that there is nothing that cares about the reference
         // count between the call and the phi.
-        FindDependencies(NeedsPositiveRetainCount, Arg,
-                         Inst->getParent(), Inst,
-                         DependingInstructions, Visited, PA);
+        switch (Class) {
+        case IC_Retain:
+        case IC_RetainBlock:
+          // These can always be moved up.
+          break;
+        case IC_Release:
+          // These can't be moved across things that care about the retain count.
+          FindDependencies(NeedsPositiveRetainCount, Arg,
+                           Inst->getParent(), Inst,
+                           DependingInstructions, Visited, PA);
+          break;
+        case IC_Autorelease:
+          // These can't be moved across autorelease pool scope boundaries.
+          FindDependencies(AutoreleasePoolBoundary, Arg,
+                           Inst->getParent(), Inst,
+                           DependingInstructions, Visited, PA);
+          break;
+        case IC_RetainRV:
+        case IC_AutoreleaseRV:
+          // Don't move these; the RV optimization depends on the autoreleaseRV
+          // being tail called, and the retainRV being immediately after a call
+          // (which might still happen if we get lucky with codegen layout, but
+          // it's not worth taking the chance).
+          continue;
+        default:
+          llvm_unreachable("Invalid dependence flavor");
+        }
+
         if (DependingInstructions.size() == 1 &&
             *DependingInstructions.begin() == PN) {
           Changed = true;
@@ -2186,7 +2458,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
                                BBState &MyStates) const {
   // If any top-down local-use or possible-dec has a succ which is earlier in
   // the sequence, forget it.
-  for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(),
+  for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
        E = MyStates.top_down_ptr_end(); I != E; ++I)
     switch (I->second.GetSeq()) {
     default: break;
@@ -2195,14 +2467,32 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
       const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
       bool SomeSuccHasSame = false;
       bool AllSuccsHaveSame = true;
-      PtrState &S = MyStates.getPtrTopDownState(Arg);
-      for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
-        PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
-        switch (SuccS.GetSeq()) {
+      PtrState &S = I->second;
+      succ_const_iterator SI(TI), SE(TI, false);
+
+      // If the terminator is an invoke marked with the
+      // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
+      // ignored, for ARC purposes.
+      if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
+        --SE;
+
+      for (; SI != SE; ++SI) {
+        Sequence SuccSSeq = S_None;
+        bool SuccSRRIKnownSafe = false;
+        // If VisitBottomUp has visited this successor, take what we know about it.
+        DenseMap<const BasicBlock *, BBState>::iterator BBI = BBStates.find(*SI);
+        if (BBI != BBStates.end()) {
+          const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+          SuccSSeq = SuccS.GetSeq();
+          SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+        }
+        switch (SuccSSeq) {
         case S_None:
         case S_CanRelease: {
-          if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
             S.ClearSequenceProgress();
+            break;
+          }
           continue;
         }
         case S_Use:
@@ -2211,7 +2501,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
         case S_Stop:
         case S_Release:
         case S_MovableRelease:
-          if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
             AllSuccsHaveSame = false;
           break;
         case S_Retain:
@@ -2223,19 +2513,38 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
       // guards against loops in the middle of a sequence.
       if (SomeSuccHasSame && !AllSuccsHaveSame)
         S.ClearSequenceProgress();
+      break;
     }
     case S_CanRelease: {
       const Value *Arg = I->first;
       const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
       bool SomeSuccHasSame = false;
       bool AllSuccsHaveSame = true;
-      PtrState &S = MyStates.getPtrTopDownState(Arg);
-      for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
-        PtrState &SuccS = BBStates[*SI].getPtrBottomUpState(Arg);
-        switch (SuccS.GetSeq()) {
+      PtrState &S = I->second;
+      succ_const_iterator SI(TI), SE(TI, false);
+
+      // If the terminator is an invoke marked with the
+      // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
+      // ignored, for ARC purposes.
+      if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
+        --SE;
+
+      for (; SI != SE; ++SI) {
+        Sequence SuccSSeq = S_None;
+        bool SuccSRRIKnownSafe = false;
+        // If VisitBottomUp has visited this successor, take what we know about it.
+        DenseMap<const BasicBlock *, BBState>::iterator BBI = BBStates.find(*SI);
+        if (BBI != BBStates.end()) {
+          const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
+          SuccSSeq = SuccS.GetSeq();
+          SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
+        }
+        switch (SuccSSeq) {
         case S_None: {
-          if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
             S.ClearSequenceProgress();
+            break;
+          }
           continue;
         }
         case S_CanRelease:
@@ -2245,7 +2554,7 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
         case S_Release:
         case S_MovableRelease:
         case S_Use:
-          if (!S.RRI.KnownSafe && !SuccS.RRI.KnownSafe)
+          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
             AllSuccsHaveSame = false;
           break;
         case S_Retain:
@@ -2257,8 +2566,167 @@ ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
       // guards against loops in the middle of a sequence.
       if (SomeSuccHasSame && !AllSuccsHaveSame)
         S.ClearSequenceProgress();
+      break;
+    }
+    }
+}
+
+bool
+ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
+                                     BasicBlock *BB,
+                                     MapVector<Value *, RRInfo> &Retains,
+                                     BBState &MyStates) {
+  bool NestingDetected = false;
+  InstructionClass Class = GetInstructionClass(Inst);
+  const Value *Arg = 0;
+
+  switch (Class) {
+  case IC_Release: {
+    Arg = GetObjCArg(Inst);
+
+    PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+    // If we see two releases in a row on the same pointer. If so, make
+    // a note, and we'll cicle back to revisit it after we've
+    // hopefully eliminated the second release, which may allow us to
+    // eliminate the first release too.
+    // Theoretically we could implement removal of nested retain+release
+    // pairs by making PtrState hold a stack of states, but this is
+    // simple and avoids adding overhead for the non-nested case.
+    if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+      NestingDetected = true;
+
+    S.RRI.clear();
+
+    MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+    S.SetSeq(ReleaseMetadata ? S_MovableRelease : S_Release);
+    S.RRI.ReleaseMetadata = ReleaseMetadata;
+    S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
+    S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+    S.RRI.Calls.insert(Inst);
+
+    S.IncrementRefCount();
+    S.IncrementNestCount();
+    break;
+  }
+  case IC_RetainBlock:
+    // An objc_retainBlock call with just a use may need to be kept,
+    // because it may be copying a block from the stack to the heap.
+    if (!IsRetainBlockOptimizable(Inst))
+      break;
+    // FALLTHROUGH
+  case IC_Retain:
+  case IC_RetainRV: {
+    Arg = GetObjCArg(Inst);
+
+    PtrState &S = MyStates.getPtrBottomUpState(Arg);
+    S.DecrementRefCount();
+    S.SetAtLeastOneRefCount();
+    S.DecrementNestCount();
+
+    switch (S.GetSeq()) {
+    case S_Stop:
+    case S_Release:
+    case S_MovableRelease:
+    case S_Use:
+      S.RRI.ReverseInsertPts.clear();
+      // FALL THROUGH
+    case S_CanRelease:
+      // Don't do retain+release tracking for IC_RetainRV, because it's
+      // better to let it remain as the first instruction after a call.
+      if (Class != IC_RetainRV) {
+        S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+        Retains[Inst] = S.RRI;
+      }
+      S.ClearSequenceProgress();
+      break;
+    case S_None:
+      break;
+    case S_Retain:
+      llvm_unreachable("bottom-up pointer in retain state!");
     }
+    return NestingDetected;
+  }
+  case IC_AutoreleasepoolPop:
+    // Conservatively, clear MyStates for all known pointers.
+    MyStates.clearBottomUpPointers();
+    return NestingDetected;
+  case IC_AutoreleasepoolPush:
+  case IC_None:
+    // These are irrelevant.
+    return NestingDetected;
+  default:
+    break;
+  }
+
+  // Consider any other possible effects of this instruction on each
+  // pointer being tracked.
+  for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+       ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+    const Value *Ptr = MI->first;
+    if (Ptr == Arg)
+      continue; // Handled above.
+    PtrState &S = MI->second;
+    Sequence Seq = S.GetSeq();
+
+    // Check for possible releases.
+    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+      S.DecrementRefCount();
+      switch (Seq) {
+      case S_Use:
+        S.SetSeq(S_CanRelease);
+        continue;
+      case S_CanRelease:
+      case S_Release:
+      case S_MovableRelease:
+      case S_Stop:
+      case S_None:
+        break;
+      case S_Retain:
+        llvm_unreachable("bottom-up pointer in retain state!");
+      }
     }
+
+    // Check for possible direct uses.
+    switch (Seq) {
+    case S_Release:
+    case S_MovableRelease:
+      if (CanUse(Inst, Ptr, PA, Class)) {
+        assert(S.RRI.ReverseInsertPts.empty());
+        // If this is an invoke instruction, we're scanning it as part of
+        // one of its successor blocks, since we can't insert code after it
+        // in its own block, and we don't want to split critical edges.
+        if (isa<InvokeInst>(Inst))
+          S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+        else
+          S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+        S.SetSeq(S_Use);
+      } else if (Seq == S_Release &&
+                 (Class == IC_User || Class == IC_CallOrUser)) {
+        // Non-movable releases depend on any possible objc pointer use.
+        S.SetSeq(S_Stop);
+        assert(S.RRI.ReverseInsertPts.empty());
+        // As above; handle invoke specially.
+        if (isa<InvokeInst>(Inst))
+          S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
+        else
+          S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
+      }
+      break;
+    case S_Stop:
+      if (CanUse(Inst, Ptr, PA, Class))
+        S.SetSeq(S_Use);
+      break;
+    case S_CanRelease:
+    case S_Use:
+    case S_None:
+      break;
+    case S_Retain:
+      llvm_unreachable("bottom-up pointer in retain state!");
+    }
+  }
+
+  return NestingDetected;
 }
 
 bool
@@ -2274,7 +2742,13 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
   succ_const_iterator SI(TI), SE(TI, false);
   if (SI == SE)
     MyStates.SetAsExit();
-  else
+  else {
+    // If the terminator is an invoke marked with the
+    // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
+    // ignored, for ARC purposes.
+    if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
+      --SE;
+
     do {
       const BasicBlock *Succ = *SI++;
       if (Succ == BB)
@@ -2295,145 +2769,169 @@ ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
       }
       break;
     } while (SI != SE);
+  }
 
   // Visit all the instructions, bottom-up.
   for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
     Instruction *Inst = llvm::prior(I);
-    InstructionClass Class = GetInstructionClass(Inst);
-    const Value *Arg = 0;
 
-    switch (Class) {
-    case IC_Release: {
-      Arg = GetObjCArg(Inst);
+    // Invoke instructions are visited as part of their successors (below).
+    if (isa<InvokeInst>(Inst))
+      continue;
+
+    NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
+  }
+
+  // If there's a predecessor with an invoke, visit the invoke as
+  // if it were part of this block, since we can't insert code after
+  // an invoke in its own block, and we don't want to split critical
+  // edges.
+  for (pred_iterator PI(BB), PE(BB, false); PI != PE; ++PI) {
+    BasicBlock *Pred = *PI;
+    TerminatorInst *PredTI = cast<TerminatorInst>(&Pred->back());
+    if (isa<InvokeInst>(PredTI))
+      NestingDetected |= VisitInstructionBottomUp(PredTI, BB, Retains, MyStates);
+  }
+
+  return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
+                                    DenseMap<Value *, RRInfo> &Releases,
+                                    BBState &MyStates) {
+  bool NestingDetected = false;
+  InstructionClass Class = GetInstructionClass(Inst);
+  const Value *Arg = 0;
+
+  switch (Class) {
+  case IC_RetainBlock:
+    // An objc_retainBlock call with just a use may need to be kept,
+    // because it may be copying a block from the stack to the heap.
+    if (!IsRetainBlockOptimizable(Inst))
+      break;
+    // FALLTHROUGH
+  case IC_Retain:
+  case IC_RetainRV: {
+    Arg = GetObjCArg(Inst);
 
-      PtrState &S = MyStates.getPtrBottomUpState(Arg);
+    PtrState &S = MyStates.getPtrTopDownState(Arg);
 
-      // If we see two releases in a row on the same pointer. If so, make
+    // Don't do retain+release tracking for IC_RetainRV, because it's
+    // better to let it remain as the first instruction after a call.
+    if (Class != IC_RetainRV) {
+      // If we see two retains in a row on the same pointer. If so, make
       // a note, and we'll cicle back to revisit it after we've
-      // hopefully eliminated the second release, which may allow us to
-      // eliminate the first release too.
+      // hopefully eliminated the second retain, which may allow us to
+      // eliminate the first retain too.
       // Theoretically we could implement removal of nested retain+release
       // pairs by making PtrState hold a stack of states, but this is
       // simple and avoids adding overhead for the non-nested case.
-      if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+      if (S.GetSeq() == S_Retain)
         NestingDetected = true;
 
-      S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
+      S.SetSeq(S_Retain);
       S.RRI.clear();
-      S.RRI.KnownSafe = S.IsKnownNested() || S.IsKnownIncremented();
-      S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+      S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+      // Don't check S.IsKnownIncremented() here because it's not
+      // sufficient.
+      S.RRI.KnownSafe = S.IsKnownNested();
       S.RRI.Calls.insert(Inst);
+    }
 
-      S.IncrementRefCount();
-      S.IncrementNestCount();
+    S.SetAtLeastOneRefCount();
+    S.IncrementRefCount();
+    S.IncrementNestCount();
+    return NestingDetected;
+  }
+  case IC_Release: {
+    Arg = GetObjCArg(Inst);
+
+    PtrState &S = MyStates.getPtrTopDownState(Arg);
+    S.DecrementRefCount();
+    S.DecrementNestCount();
+
+    switch (S.GetSeq()) {
+    case S_Retain:
+    case S_CanRelease:
+      S.RRI.ReverseInsertPts.clear();
+      // FALL THROUGH
+    case S_Use:
+      S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+      S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+      Releases[Inst] = S.RRI;
+      S.ClearSequenceProgress();
+      break;
+    case S_None:
       break;
+    case S_Stop:
+    case S_Release:
+    case S_MovableRelease:
+      llvm_unreachable("top-down pointer in release state!");
     }
-    case IC_RetainBlock:
-    case IC_Retain:
-    case IC_RetainRV: {
-      Arg = GetObjCArg(Inst);
+    break;
+  }
+  case IC_AutoreleasepoolPop:
+    // Conservatively, clear MyStates for all known pointers.
+    MyStates.clearTopDownPointers();
+    return NestingDetected;
+  case IC_AutoreleasepoolPush:
+  case IC_None:
+    // These are irrelevant.
+    return NestingDetected;
+  default:
+    break;
+  }
 
-      PtrState &S = MyStates.getPtrBottomUpState(Arg);
+  // Consider any other possible effects of this instruction on each
+  // pointer being tracked.
+  for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+       ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+    const Value *Ptr = MI->first;
+    if (Ptr == Arg)
+      continue; // Handled above.
+    PtrState &S = MI->second;
+    Sequence Seq = S.GetSeq();
+
+    // Check for possible releases.
+    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
       S.DecrementRefCount();
-      S.SetAtLeastOneRefCount();
-      S.DecrementNestCount();
-
-      // An objc_retainBlock call with just a use still needs to be kept,
-      // because it may be copying a block from the stack to the heap.
-      if (Class == IC_RetainBlock && S.GetSeq() == S_Use)
+      switch (Seq) {
+      case S_Retain:
         S.SetSeq(S_CanRelease);
+        assert(S.RRI.ReverseInsertPts.empty());
+        S.RRI.ReverseInsertPts.insert(Inst);
 
-      switch (S.GetSeq()) {
-      case S_Stop:
-      case S_Release:
-      case S_MovableRelease:
+        // One call can't cause a transition from S_Retain to S_CanRelease
+        // and S_CanRelease to S_Use. If we've made the first transition,
+        // we're done.
+        continue;
       case S_Use:
-        S.RRI.ReverseInsertPts.clear();
-        // FALL THROUGH
       case S_CanRelease:
-        // Don't do retain+release tracking for IC_RetainRV, because it's
-        // better to let it remain as the first instruction after a call.
-        if (Class != IC_RetainRV) {
-          S.RRI.IsRetainBlock = Class == IC_RetainBlock;
-          Retains[Inst] = S.RRI;
-        }
-        S.ClearSequenceProgress();
-        break;
       case S_None:
         break;
-      case S_Retain:
-        llvm_unreachable("bottom-up pointer in retain state!");
-      }
-      continue;
-    }
-    case IC_AutoreleasepoolPop:
-      // Conservatively, clear MyStates for all known pointers.
-      MyStates.clearBottomUpPointers();
-      continue;
-    case IC_AutoreleasepoolPush:
-    case IC_None:
-      // These are irrelevant.
-      continue;
-    default:
-      break;
-    }
-
-    // Consider any other possible effects of this instruction on each
-    // pointer being tracked.
-    for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
-         ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
-      const Value *Ptr = MI->first;
-      if (Ptr == Arg)
-        continue; // Handled above.
-      PtrState &S = MI->second;
-      Sequence Seq = S.GetSeq();
-
-      // Check for possible releases.
-      if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
-        S.DecrementRefCount();
-        switch (Seq) {
-        case S_Use:
-          S.SetSeq(S_CanRelease);
-          continue;
-        case S_CanRelease:
-        case S_Release:
-        case S_MovableRelease:
-        case S_Stop:
-        case S_None:
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
-      }
-
-      // Check for possible direct uses.
-      switch (Seq) {
+      case S_Stop:
       case S_Release:
       case S_MovableRelease:
-        if (CanUse(Inst, Ptr, PA, Class)) {
-          assert(S.RRI.ReverseInsertPts.empty());
-          S.RRI.ReverseInsertPts.insert(Inst);
-          S.SetSeq(S_Use);
-        } else if (Seq == S_Release &&
-                   (Class == IC_User || Class == IC_CallOrUser)) {
-          // Non-movable releases depend on any possible objc pointer use.
-          S.SetSeq(S_Stop);
-          assert(S.RRI.ReverseInsertPts.empty());
-          S.RRI.ReverseInsertPts.insert(Inst);
-        }
-        break;
-      case S_Stop:
-        if (CanUse(Inst, Ptr, PA, Class))
-          S.SetSeq(S_Use);
-        break;
-      case S_CanRelease:
-      case S_Use:
-      case S_None:
-        break;
-      case S_Retain:
-        llvm_unreachable("bottom-up pointer in retain state!");
+        llvm_unreachable("top-down pointer in release state!");
       }
     }
+
+    // Check for possible direct uses.
+    switch (Seq) {
+    case S_CanRelease:
+      if (CanUse(Inst, Ptr, PA, Class))
+        S.SetSeq(S_Use);
+      break;
+    case S_Retain:
+    case S_Use:
+    case S_None:
+      break;
+    case S_Stop:
+    case S_Release:
+    case S_MovableRelease:
+      llvm_unreachable("top-down pointer in release state!");
+    }
   }
 
   return NestingDetected;
@@ -2453,22 +2951,31 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
     MyStates.SetAsEntry();
   else
     do {
-      const BasicBlock *Pred = *PI++;
+      unsigned OperandNo = PI.getOperandNo();
+      const Use &Us = PI.getUse();
+      ++PI;
+
+      // Skip invoke unwind edges on invoke instructions marked with
+      // clang.arc.no_objc_arc_exceptions.
+      if (const InvokeInst *II = dyn_cast<InvokeInst>(Us.getUser()))
+        if (OperandNo == II->getNumArgOperands() + 2 &&
+            II->getMetadata(NoObjCARCExceptionsMDKind))
+          continue;
+
+      const BasicBlock *Pred = cast<TerminatorInst>(Us.getUser())->getParent();
       if (Pred == BB)
         continue;
       DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
-      assert(I != BBStates.end());
       // If we haven't seen this node yet, then we've found a CFG cycle.
       // Be optimistic here; it's CheckForCFGHazards' job detect trouble.
-      if (!I->second.isVisitedTopDown())
+      if (I == BBStates.end() || !I->second.isVisitedTopDown())
         continue;
       MyStates.InitFromPred(I->second);
       while (PI != PE) {
         Pred = *PI++;
         if (Pred != BB) {
           I = BBStates.find(Pred);
-          assert(I != BBStates.end());
-          if (I->second.isVisitedTopDown())
+          if (I != BBStates.end() && I->second.isVisitedTopDown())
             MyStates.MergePred(I->second);
         }
       }
@@ -2478,147 +2985,89 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
   // Visit all the instructions, top-down.
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
     Instruction *Inst = I;
-    InstructionClass Class = GetInstructionClass(Inst);
-    const Value *Arg = 0;
+    NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
+  }
 
-    switch (Class) {
-    case IC_RetainBlock:
-    case IC_Retain:
-    case IC_RetainRV: {
-      Arg = GetObjCArg(Inst);
+  CheckForCFGHazards(BB, BBStates, MyStates);
+  return NestingDetected;
+}
 
-      PtrState &S = MyStates.getPtrTopDownState(Arg);
+static void
+ComputePostOrders(Function &F,
+                  SmallVectorImpl<BasicBlock *> &PostOrder,
+                  SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder) {
+  /// Backedges - Backedges detected in the DFS. These edges will be
+  /// ignored in the reverse-CFG DFS, so that loops with multiple exits will be
+  /// traversed in the desired order.
+  DenseSet<std::pair<BasicBlock *, BasicBlock *> > Backedges;
+
+  /// Visited - The visited set, for doing DFS walks.
+  SmallPtrSet<BasicBlock *, 16> Visited;
 
-      // Don't do retain+release tracking for IC_RetainRV, because it's
-      // better to let it remain as the first instruction after a call.
-      if (Class != IC_RetainRV) {
-        // If we see two retains in a row on the same pointer. If so, make
-        // a note, and we'll cicle back to revisit it after we've
-        // hopefully eliminated the second retain, which may allow us to
-        // eliminate the first retain too.
-        // Theoretically we could implement removal of nested retain+release
-        // pairs by making PtrState hold a stack of states, but this is
-        // simple and avoids adding overhead for the non-nested case.
-        if (S.GetSeq() == S_Retain)
-          NestingDetected = true;
-
-        S.SetSeq(S_Retain);
-        S.RRI.clear();
-        S.RRI.IsRetainBlock = Class == IC_RetainBlock;
-        // Don't check S.IsKnownIncremented() here because it's not
-        // sufficient.
-        S.RRI.KnownSafe = S.IsKnownNested();
-        S.RRI.Calls.insert(Inst);
+  // Do DFS, computing the PostOrder.
+  SmallPtrSet<BasicBlock *, 16> OnStack;
+  SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
+  BasicBlock *EntryBB = &F.getEntryBlock();
+  SuccStack.push_back(std::make_pair(EntryBB, succ_begin(EntryBB)));
+  Visited.insert(EntryBB);
+  OnStack.insert(EntryBB);
+  do {
+  dfs_next_succ:
+    TerminatorInst *TI = cast<TerminatorInst>(&SuccStack.back().first->back());
+    succ_iterator End = succ_iterator(TI, true);
+    while (SuccStack.back().second != End) {
+      BasicBlock *BB = *SuccStack.back().second++;
+      if (Visited.insert(BB)) {
+        SuccStack.push_back(std::make_pair(BB, succ_begin(BB)));
+        OnStack.insert(BB);
+        goto dfs_next_succ;
       }
-
-      S.SetAtLeastOneRefCount();
-      S.IncrementRefCount();
-      S.IncrementNestCount();
-      continue;
+      if (OnStack.count(BB))
+        Backedges.insert(std::make_pair(SuccStack.back().first, BB));
     }
-    case IC_Release: {
-      Arg = GetObjCArg(Inst);
+    OnStack.erase(SuccStack.back().first);
+    PostOrder.push_back(SuccStack.pop_back_val().first);
+  } while (!SuccStack.empty());
 
-      PtrState &S = MyStates.getPtrTopDownState(Arg);
-      S.DecrementRefCount();
-      S.DecrementNestCount();
-
-      switch (S.GetSeq()) {
-      case S_Retain:
-      case S_CanRelease:
-        S.RRI.ReverseInsertPts.clear();
-        // FALL THROUGH
-      case S_Use:
-        S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-        S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
-        Releases[Inst] = S.RRI;
-        S.ClearSequenceProgress();
-        break;
-      case S_None:
-        break;
-      case S_Stop:
-      case S_Release:
-      case S_MovableRelease:
-        llvm_unreachable("top-down pointer in release state!");
-      }
-      break;
-    }
-    case IC_AutoreleasepoolPop:
-      // Conservatively, clear MyStates for all known pointers.
-      MyStates.clearTopDownPointers();
-      continue;
-    case IC_AutoreleasepoolPush:
-    case IC_None:
-      // These are irrelevant.
-      continue;
-    default:
-      break;
-    }
+  Visited.clear();
 
-    // Consider any other possible effects of this instruction on each
-    // pointer being tracked.
-    for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
-         ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
-      const Value *Ptr = MI->first;
-      if (Ptr == Arg)
-        continue; // Handled above.
-      PtrState &S = MI->second;
-      Sequence Seq = S.GetSeq();
-
-      // Check for possible releases.
-      if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
-        S.DecrementRefCount();
-        switch (Seq) {
-        case S_Retain:
-          S.SetSeq(S_CanRelease);
-          assert(S.RRI.ReverseInsertPts.empty());
-          S.RRI.ReverseInsertPts.insert(Inst);
+  // Compute the exits, which are the starting points for reverse-CFG DFS.
+  // This includes blocks where all the successors are backedges that
+  // we're skipping.
+  SmallVector<BasicBlock *, 4> Exits;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    BasicBlock *BB = I;
+    TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+    for (succ_iterator SI(TI), SE(TI, true); SI != SE; ++SI)
+      if (!Backedges.count(std::make_pair(BB, *SI)))
+        goto HasNonBackedgeSucc;
+    Exits.push_back(BB);
+  HasNonBackedgeSucc:;
+  }
 
-          // One call can't cause a transition from S_Retain to S_CanRelease
-          // and S_CanRelease to S_Use. If we've made the first transition,
-          // we're done.
+  // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
+  SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> PredStack;
+  for (SmallVectorImpl<BasicBlock *>::iterator I = Exits.begin(), E = Exits.end();
+       I != E; ++I) {
+    BasicBlock *ExitBB = *I;
+    PredStack.push_back(std::make_pair(ExitBB, pred_begin(ExitBB)));
+    Visited.insert(ExitBB);
+    while (!PredStack.empty()) {
+    reverse_dfs_next_succ:
+      pred_iterator End = pred_end(PredStack.back().first);
+      while (PredStack.back().second != End) {
+        BasicBlock *BB = *PredStack.back().second++;
+        // Skip backedges detected in the forward-CFG DFS.
+        if (Backedges.count(std::make_pair(BB, PredStack.back().first)))
           continue;
-        case S_Use:
-        case S_CanRelease:
-        case S_None:
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-          llvm_unreachable("top-down pointer in release state!");
-        }
-      }
-
-      // Check for possible direct uses.
-      switch (Seq) {
-      case S_CanRelease:
-        if (CanUse(Inst, Ptr, PA, Class))
-          S.SetSeq(S_Use);
-        break;
-      case S_Retain:
-        // An objc_retainBlock call may be responsible for copying the block
-        // data from the stack to the heap. Model this by moving it straight
-        // from S_Retain to S_Use.
-        if (S.RRI.IsRetainBlock &&
-            CanUse(Inst, Ptr, PA, Class)) {
-          assert(S.RRI.ReverseInsertPts.empty());
-          S.RRI.ReverseInsertPts.insert(Inst);
-          S.SetSeq(S_Use);
+        if (Visited.insert(BB)) {
+          PredStack.push_back(std::make_pair(BB, pred_begin(BB)));
+          goto reverse_dfs_next_succ;
         }
-        break;
-      case S_Use:
-      case S_None:
-        break;
-      case S_Stop:
-      case S_Release:
-      case S_MovableRelease:
-        llvm_unreachable("top-down pointer in release state!");
       }
+      ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
     }
   }
-
-  CheckForCFGHazards(BB, BBStates, MyStates);
-  return NestingDetected;
 }
 
 // Visit - Visit the function both top-down and bottom-up.
@@ -2627,43 +3076,29 @@ ObjCARCOpt::Visit(Function &F,
                   DenseMap<const BasicBlock *, BBState> &BBStates,
                   MapVector<Value *, RRInfo> &Retains,
                   DenseMap<Value *, RRInfo> &Releases) {
-  // Use reverse-postorder on the reverse CFG for bottom-up, because we
-  // magically know that loops will be well behaved, i.e. they won't repeatedly
-  // call retain on a single pointer without doing a release. We can't use
-  // ReversePostOrderTraversal here because we want to walk up from each
-  // function exit point.
-  SmallPtrSet<BasicBlock *, 16> Visited;
-  SmallVector<std::pair<BasicBlock *, pred_iterator>, 16> Stack;
-  SmallVector<BasicBlock *, 16> Order;
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    BasicBlock *BB = I;
-    if (BB->getTerminator()->getNumSuccessors() == 0)
-      Stack.push_back(std::make_pair(BB, pred_begin(BB)));
-  }
-  while (!Stack.empty()) {
-    pred_iterator End = pred_end(Stack.back().first);
-    while (Stack.back().second != End) {
-      BasicBlock *BB = *Stack.back().second++;
-      if (Visited.insert(BB))
-        Stack.push_back(std::make_pair(BB, pred_begin(BB)));
-    }
-    Order.push_back(Stack.pop_back_val().first);
-  }
+
+  // Use reverse-postorder traversals, because we magically know that loops
+  // will be well behaved, i.e. they won't repeatedly call retain on a single
+  // pointer without doing a release. We can't use the ReversePostOrderTraversal
+  // class here because we want the reverse-CFG postorder to consider each
+  // function exit point, and we want to ignore selected cycle edges.
+  SmallVector<BasicBlock *, 16> PostOrder;
+  SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
+  ComputePostOrders(F, PostOrder, ReverseCFGPostOrder);
+
+  // Use reverse-postorder on the reverse CFG for bottom-up.
   bool BottomUpNestingDetected = false;
   for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
-         Order.rbegin(), E = Order.rend(); I != E; ++I) {
-    BasicBlock *BB = *I;
-    BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
-  }
+       ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
+       I != E; ++I)
+    BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
 
-  // Use regular reverse-postorder for top-down.
+  // Use reverse-postorder for top-down.
   bool TopDownNestingDetected = false;
-  typedef ReversePostOrderTraversal<Function *> RPOTType;
-  RPOTType RPOT(&F);
-  for (RPOTType::rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
-    BasicBlock *BB = *I;
-    TopDownNestingDetected |= VisitTopDown(BB, BBStates, Releases);
-  }
+  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
+       PostOrder.rbegin(), E = PostOrder.rend();
+       I != E; ++I)
+    TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
 
   return TopDownNestingDetected && BottomUpNestingDetected;
 }
@@ -2691,40 +3126,26 @@ void ObjCARCOpt::MoveCalls(Value *Arg,
                          getRetainBlockCallee(M) : getRetainCallee(M),
                        MyArg, "", InsertPt);
     Call->setDoesNotThrow();
-    if (!RetainsToMove.IsRetainBlock)
+    if (RetainsToMove.IsRetainBlock)
+      Call->setMetadata(CopyOnEscapeMDKind,
+                        MDNode::get(M->getContext(), ArrayRef<Value *>()));
+    else
       Call->setTailCall();
   }
   for (SmallPtrSet<Instruction *, 2>::const_iterator
        PI = RetainsToMove.ReverseInsertPts.begin(),
        PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
-    Instruction *LastUse = *PI;
-    Instruction *InsertPts[] = { 0, 0, 0 };
-    if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) {
-      // We can't insert code immediately after an invoke instruction, so
-      // insert code at the beginning of both successor blocks instead.
-      // The invoke's return value isn't available in the unwind block,
-      // but our releases will never depend on it, because they must be
-      // paired with retains from before the invoke.
-      InsertPts[0] = II->getNormalDest()->getFirstInsertionPt();
-      InsertPts[1] = II->getUnwindDest()->getFirstInsertionPt();
-    } else {
-      // Insert code immediately after the last use.
-      InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
-    }
-
-    for (Instruction **I = InsertPts; *I; ++I) {
-      Instruction *InsertPt = *I;
-      Value *MyArg = ArgTy == ParamTy ? Arg :
-                     new BitCastInst(Arg, ParamTy, "", InsertPt);
-      CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
-                                        "", InsertPt);
-      // Attach a clang.imprecise_release metadata tag, if appropriate.
-      if (MDNode *M = ReleasesToMove.ReleaseMetadata)
-        Call->setMetadata(ImpreciseReleaseMDKind, M);
-      Call->setDoesNotThrow();
-      if (ReleasesToMove.IsTailCallRelease)
-        Call->setTailCall();
-    }
+    Instruction *InsertPt = *PI;
+    Value *MyArg = ArgTy == ParamTy ? Arg :
+                   new BitCastInst(Arg, ParamTy, "", InsertPt);
+    CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
+                                      "", InsertPt);
+    // Attach a clang.imprecise_release metadata tag, if appropriate.
+    if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+      Call->setMetadata(ImpreciseReleaseMDKind, M);
+    Call->setDoesNotThrow();
+    if (ReleasesToMove.IsTailCallRelease)
+      Call->setTailCall();
   }
 
   // Delete the original retain and release calls.
@@ -2765,17 +3186,11 @@ ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
     Instruction *Retain = cast<Instruction>(V);
     Value *Arg = GetObjCArg(Retain);
 
-    // If the object being released is in static storage, we know it's
+    // If the object being released is in static or stack storage, we know it's
     // not being managed by ObjC reference counting, so we can delete pairs
     // regardless of what possible decrements or uses lie between them.
-    bool KnownSafe = isa<Constant>(Arg);
+    bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
    
-    // Same for stack storage, unless this is an objc_retainBlock call,
-    // which is responsible for copying the block data from the stack to
-    // the heap.
-    if (!I->second.IsRetainBlock && isa<AllocaInst>(Arg))
-      KnownSafe = true;
-
     // A constant pointer can't be pointing to an object on the heap. It may
     // be reference-counted, but it won't be deleted.
     if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
@@ -3091,7 +3506,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
            UE = Alloca->use_end(); UI != UE; ) {
         CallInst *UserInst = cast<CallInst>(*UI++);
         if (!UserInst->use_empty())
-          UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+          UserInst->replaceAllUsesWith(UserInst->getArgOperand(0));
         UserInst->eraseFromParent();
       }
       Alloca->eraseFromParent();
@@ -3243,6 +3658,10 @@ bool ObjCARCOpt::doInitialization(Module &M) {
   // Identify the imprecise release metadata kind.
   ImpreciseReleaseMDKind =
     M.getContext().getMDKindID("clang.imprecise_release");
+  CopyOnEscapeMDKind =
+    M.getContext().getMDKindID("clang.arc.copy_on_escape");
+  NoObjCARCExceptionsMDKind =
+    M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
 
   // Intuitively, objc_retain and others are nocapture, however in practice
   // they are not, because they return their argument value. And objc_release
@@ -3344,6 +3763,11 @@ namespace {
     /// RetainRV calls to make the optimization work on targets which need it.
     const MDString *RetainRVMarker;
 
+    /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If
+    /// at the end of walking the function we have found no alloca
+    /// instructions, these calls can be marked "tail".
+    DenseSet<CallInst *> StoreStrongCalls;
+
     Constant *getStoreStrongCallee(Module *M);
     Constant *getRetainAutoreleaseCallee(Module *M);
     Constant *getRetainAutoreleaseRVCallee(Module *M);
@@ -3547,6 +3971,11 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
   StoreStrong->setDoesNotThrow();
   StoreStrong->setDebugLoc(Store->getDebugLoc());
 
+  // We can't set the tail flag yet, because we haven't yet determined
+  // whether there are any escaping allocas. Remember this call, so that
+  // we can set the tail flag once we know it's safe.
+  StoreStrongCalls.insert(StoreStrong);
+
   if (&*Iter == Store) ++Iter;
   Store->eraseFromParent();
   Release->eraseFromParent();
@@ -3593,6 +4022,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
 
   PA.setAA(&getAnalysis<AliasAnalysis>());
 
+  // Track whether it's ok to mark objc_storeStrong calls with the "tail"
+  // keyword. Be conservative if the function has variadic arguments.
+  // It seems that functions which "return twice" are also unsafe for the
+  // "tail" argument, because they are setjmp, which could need to
+  // return to an earlier stack state.
+  bool TailOkForStoreStrongs = !F.isVarArg() && !F.callsFunctionThatReturnsTwice();
+
   // For ObjC library calls which return their argument, replace uses of the
   // argument with uses of the call return value, if it dominates the use. This
   // reduces register pressure.
@@ -3649,6 +4085,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
     case IC_Release:
       ContractRelease(Inst, I);
       continue;
+    case IC_User:
+      // Be conservative if the function has any alloca instructions.
+      // Technically we only care about escaping alloca instructions,
+      // but this is sufficient to handle some interesting cases.
+      if (isa<AllocaInst>(Inst))
+        TailOkForStoreStrongs = false;
+      continue;
     default:
       continue;
     }
@@ -3666,36 +4109,37 @@ bool ObjCARCContract::runOnFunction(Function &F) {
         Use &U = UI.getUse();
         unsigned OperandNo = UI.getOperandNo();
         ++UI; // Increment UI now, because we may unlink its element.
-        if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser()))
-          if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
-            Changed = true;
-            Instruction *Replacement = Inst;
-            Type *UseTy = U.get()->getType();
-            if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
-              // For PHI nodes, insert the bitcast in the predecessor block.
-              unsigned ValNo =
-                PHINode::getIncomingValueNumForOperand(OperandNo);
-              BasicBlock *BB =
-                PHI->getIncomingBlock(ValNo);
-              if (Replacement->getType() != UseTy)
-                Replacement = new BitCastInst(Replacement, UseTy, "",
-                                              &BB->back());
-              for (unsigned i = 0, e = PHI->getNumIncomingValues();
-                   i != e; ++i)
-                if (PHI->getIncomingBlock(i) == BB) {
-                  // Keep the UI iterator valid.
-                  if (&PHI->getOperandUse(
-                        PHINode::getOperandNumForIncomingValue(i)) ==
-                        &UI.getUse())
-                    ++UI;
-                  PHI->setIncomingValue(i, Replacement);
-                }
-            } else {
-              if (Replacement->getType() != UseTy)
-                Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
-              U.set(Replacement);
-            }
+        if (DT->isReachableFromEntry(U) &&
+            DT->dominates(Inst, U)) {
+          Changed = true;
+          Instruction *Replacement = Inst;
+          Type *UseTy = U.get()->getType();
+          if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
+            // For PHI nodes, insert the bitcast in the predecessor block.
+            unsigned ValNo =
+              PHINode::getIncomingValueNumForOperand(OperandNo);
+            BasicBlock *BB =
+              PHI->getIncomingBlock(ValNo);
+            if (Replacement->getType() != UseTy)
+              Replacement = new BitCastInst(Replacement, UseTy, "",
+                                            &BB->back());
+            for (unsigned i = 0, e = PHI->getNumIncomingValues();
+                 i != e; ++i)
+              if (PHI->getIncomingBlock(i) == BB) {
+                // Keep the UI iterator valid.
+                if (&PHI->getOperandUse(
+                      PHINode::getOperandNumForIncomingValue(i)) ==
+                    &UI.getUse())
+                  ++UI;
+                PHI->setIncomingValue(i, Replacement);
+              }
+          } else {
+            if (Replacement->getType() != UseTy)
+              Replacement = new BitCastInst(Replacement, UseTy, "",
+                                            cast<Instruction>(U.getUser()));
+            U.set(Replacement);
           }
+        }
       }
 
       // If Arg is a no-op casted pointer, strip one level of casts and
@@ -3713,5 +4157,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
     }
   }
 
+  // If this function has no escaping allocas or suspicious vararg usage,
+  // objc_storeStrong calls can be marked with the "tail" keyword.
+  if (TailOkForStoreStrongs)
+    for (DenseSet<CallInst *>::iterator I = StoreStrongCalls.begin(),
+         E = StoreStrongCalls.end(); I != E; ++I)
+      (*I)->setTailCall();
+  StoreStrongCalls.clear();
+
   return Changed;
 }
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 8f98a5b6503e..cb408a137eab 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -74,7 +74,7 @@ static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
 namespace {
   class Reassociate : public FunctionPass {
     DenseMap<BasicBlock*, unsigned> RankMap;
-    DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+    DenseMap<AssertingVH<Value>, unsigned> ValueRankMap;
     SmallVector<WeakVH, 8> RedoInsts;
     SmallVector<WeakVH, 8> DeadInsts;
     bool MadeChange;
@@ -210,7 +210,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
 /// LowerNegateToMultiply - Replace 0-X with X*-1.
 ///
 static Instruction *LowerNegateToMultiply(Instruction *Neg,
-                              DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+                         DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
   Constant *Cst = Constant::getAllOnesValue(Neg->getType());
 
   Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
@@ -492,7 +492,7 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
 /// only used by an add, transform this into (X+(0-Y)) to promote better
 /// reassociation.
 static Instruction *BreakUpSubtract(Instruction *Sub,
-                              DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+                         DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
   // Convert a subtract into an add and a neg instruction. This allows sub
   // instructions to be commuted with other add instructions.
   //
@@ -517,8 +517,8 @@ static Instruction *BreakUpSubtract(Instruction *Sub,
 /// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
 /// by one, change this into a multiply by a constant to assist with further
 /// reassociation.
-static Instruction *ConvertShiftToMul(Instruction *Shl, 
-                              DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+static Instruction *ConvertShiftToMul(Instruction *Shl,
+                         DenseMap<AssertingVH<Value>, unsigned> &ValueRankMap) {
   // If an operand of this shift is a reassociable multiply, or if the shift
   // is used by a reassociable multiply or add, turn into a multiply.
   if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 196a847fc0ea..16b64a500b34 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -25,9 +25,9 @@
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -39,9 +39,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
-#include <map>
 using namespace llvm;
 
 STATISTIC(NumInstRemoved, "Number of instructions removed");
@@ -59,7 +57,7 @@ class LatticeVal {
   enum LatticeValueTy {
     /// undefined - This LLVM Value has no known value yet.
     undefined,
-    
+
     /// constant - This LLVM Value has a specific constant value.
     constant,
 
@@ -68,7 +66,7 @@ class LatticeVal {
     /// with another (different) constant, it goes to overdefined, instead of
     /// asserting.
     forcedconstant,
-    
+
     /// overdefined - This instruction is not known to be constant, and we know
     /// it has a value.
     overdefined
@@ -77,30 +75,30 @@ class LatticeVal {
   /// Val: This stores the current lattice value along with the Constant* for
   /// the constant if this is a 'constant' or 'forcedconstant' value.
   PointerIntPair<Constant *, 2, LatticeValueTy> Val;
-  
+
   LatticeValueTy getLatticeValue() const {
     return Val.getInt();
   }
-  
+
 public:
   LatticeVal() : Val(0, undefined) {}
-  
+
   bool isUndefined() const { return getLatticeValue() == undefined; }
   bool isConstant() const {
     return getLatticeValue() == constant || getLatticeValue() == forcedconstant;
   }
   bool isOverdefined() const { return getLatticeValue() == overdefined; }
-  
+
   Constant *getConstant() const {
     assert(isConstant() && "Cannot get the constant of a non-constant!");
     return Val.getPointer();
   }
-  
+
   /// markOverdefined - Return true if this is a change in status.
   bool markOverdefined() {
     if (isOverdefined())
       return false;
-    
+
     Val.setInt(overdefined);
     return true;
   }
@@ -111,17 +109,17 @@ public:
       assert(getConstant() == V && "Marking constant with different value");
       return false;
     }
-    
+
     if (isUndefined()) {
       Val.setInt(constant);
       assert(V && "Marking constant with NULL");
       Val.setPointer(V);
     } else {
-      assert(getLatticeValue() == forcedconstant && 
+      assert(getLatticeValue() == forcedconstant &&
              "Cannot move from overdefined to constant!");
       // Stay at forcedconstant if the constant is the same.
       if (V == getConstant()) return false;
-      
+
       // Otherwise, we go to overdefined.  Assumptions made based on the
       // forced value are possibly wrong.  Assuming this is another constant
       // could expose a contradiction.
@@ -137,7 +135,7 @@ public:
       return dyn_cast<ConstantInt>(getConstant());
     return 0;
   }
-  
+
   void markForcedConstant(Constant *V) {
     assert(isUndefined() && "Can't force a defined value!");
     Val.setInt(forcedconstant);
@@ -156,6 +154,7 @@ namespace {
 ///
 class SCCPSolver : public InstVisitor<SCCPSolver> {
   const TargetData *TD;
+  const TargetLibraryInfo *TLI;
   SmallPtrSet<BasicBlock*, 8> BBExecutable; // The BBs that are executable.
   DenseMap<Value*, LatticeVal> ValueState;  // The state each value is in.
 
@@ -163,7 +162,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   /// StructType, for example for formal arguments, calls, insertelement, etc.
   ///
   DenseMap<std::pair<Value*, unsigned>, LatticeVal> StructValueState;
-  
+
   /// GlobalValue - If we are tracking any values for the contents of a global
   /// variable, we keep a mapping from the constant accessor to the element of
   /// the global, to the currently known value.  If the value becomes
@@ -178,7 +177,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
   /// that return multiple values.
   DenseMap<std::pair<Function*, unsigned>, LatticeVal> TrackedMultipleRetVals;
-  
+
   /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
   /// represented here for efficient lookup.
   SmallPtrSet<Function*, 16> MRVFunctionsTracked;
@@ -187,7 +186,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
   /// arguments we make optimistic assumptions about and try to prove as
   /// constants.
   SmallPtrSet<Function*, 16> TrackingIncomingArguments;
-  
+
   /// The reason for two worklists is that overdefined is the lowest state
   /// on the lattice, and moving things to overdefined as fast as possible
   /// makes SCCP converge much faster.
@@ -201,16 +200,13 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
 
   SmallVector<BasicBlock*, 64>  BBWorkList;  // The BasicBlock work list
 
-  /// UsersOfOverdefinedPHIs - Keep track of any users of PHI nodes that are not
-  /// overdefined, despite the fact that the PHI node is overdefined.
-  std::multimap<PHINode*, Instruction*> UsersOfOverdefinedPHIs;
-
   /// KnownFeasibleEdges - Entries in this set are edges which have already had
   /// PHI nodes retriggered.
   typedef std::pair<BasicBlock*, BasicBlock*> Edge;
   DenseSet<Edge> KnownFeasibleEdges;
 public:
-  SCCPSolver(const TargetData *td) : TD(td) {}
+  SCCPSolver(const TargetData *td, const TargetLibraryInfo *tli)
+    : TD(td), TLI(tli) {}
 
   /// MarkBlockExecutable - This method can be used by clients to mark all of
   /// the blocks that are known to be intrinsically live in the processed unit.
@@ -253,7 +249,7 @@ public:
   void AddArgumentTrackedFunction(Function *F) {
     TrackingIncomingArguments.insert(F);
   }
-  
+
   /// Solve - Solve for constants and executable blocks.
   ///
   void Solve();
@@ -274,9 +270,9 @@ public:
     assert(I != ValueState.end() && "V is not in valuemap!");
     return I->second;
   }
-  
+
   /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
-    DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = 
+    DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
       StructValueState.find(std::make_pair(V, i));
     assert(I != StructValueState.end() && "V is not in valuemap!");
     return I->second;
@@ -308,7 +304,7 @@ public:
     else
       markOverdefined(V);
   }
-  
+
 private:
   // markConstant - Make a value be marked as "constant".  If the value
   // is not already a constant, add it to the instruction work list so that
@@ -322,7 +318,7 @@ private:
     else
       InstWorkList.push_back(V);
   }
-  
+
   void markConstant(Value *V, Constant *C) {
     assert(!V->getType()->isStructTy() && "Should use other method");
     markConstant(ValueState[V], V, C);
@@ -338,14 +334,14 @@ private:
     else
       InstWorkList.push_back(V);
   }
-  
-  
+
+
   // markOverdefined - Make a value be marked as "overdefined". If the
   // value is not already overdefined, add it to the overdefined instruction
   // work list so that the users of the instruction are updated later.
   void markOverdefined(LatticeVal &IV, Value *V) {
     if (!IV.markOverdefined()) return;
-    
+
     DEBUG(dbgs() << "markOverdefined: ";
           if (Function *F = dyn_cast<Function>(V))
             dbgs() << "Function '" << F->getName() << "'\n";
@@ -365,7 +361,7 @@ private:
     else if (IV.getConstant() != MergeWithV.getConstant())
       markOverdefined(IV, V);
   }
-  
+
   void mergeInValue(Value *V, LatticeVal MergeWithV) {
     assert(!V->getType()->isStructTy() && "Should use other method");
     mergeInValue(ValueState[V], V, MergeWithV);
@@ -390,7 +386,7 @@ private:
       if (!isa<UndefValue>(V))
         LV.markConstant(C);          // Constants are constant
     }
-    
+
     // All others are underdefined by default.
     return LV;
   }
@@ -412,21 +408,20 @@ private:
       return LV;  // Common case, already in the map.
 
     if (Constant *C = dyn_cast<Constant>(V)) {
-      if (isa<UndefValue>(C))
-        ; // Undef values remain undefined.
-      else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
-        LV.markConstant(CS->getOperand(i));      // Constants are constant.
-      else if (isa<ConstantAggregateZero>(C)) {
-        Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
-        LV.markConstant(Constant::getNullValue(FieldTy));
-      } else
+      Constant *Elt = C->getAggregateElement(i);
+      
+      if (Elt == 0)
         LV.markOverdefined();      // Unknown sort of constant.
+      else if (isa<UndefValue>(Elt))
+        ; // Undef values remain undefined.
+      else
+        LV.markConstant(Elt);      // Constants are constant.
     }
-    
+
     // All others are underdefined by default.
     return LV;
   }
-  
+
 
   /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
   /// work list if it is not already executable.
@@ -466,33 +461,6 @@ private:
     if (BBExecutable.count(I->getParent()))   // Inst is executable?
       visit(*I);
   }
-  
-  /// RemoveFromOverdefinedPHIs - If I has any entries in the
-  /// UsersOfOverdefinedPHIs map for PN, remove them now.
-  void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) {
-    if (UsersOfOverdefinedPHIs.empty()) return;
-    typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
-    std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN);
-    for (ItTy It = Range.first, E = Range.second; It != E;) {
-      if (It->second == I)
-        UsersOfOverdefinedPHIs.erase(It++);
-      else
-        ++It;
-    }
-  }
-
-  /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS
-  /// map for I and PN, but if one is there already, do not create another.
-  /// (Duplicate entries do not break anything directly, but can lead to
-  /// exponential growth of the table in rare cases.)
-  void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
-    typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
-    std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(PN);
-    for (ItTy J = Range.first, E = Range.second; J != E; ++J)
-      if (J->second == I)
-        return;
-    UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
-  }
 
 private:
   friend class InstVisitor<SCCPSolver>;
@@ -559,7 +527,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
       Succs[0] = true;
       return;
     }
-    
+
     LatticeVal BCValue = getValueState(BI->getCondition());
     ConstantInt *CI = BCValue.getConstantInt();
     if (CI == 0) {
@@ -569,44 +537,44 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
         Succs[0] = Succs[1] = true;
       return;
     }
-    
+
     // Constant condition variables mean the branch can only go a single way.
     Succs[CI->isZero()] = true;
     return;
   }
-  
+
   if (isa<InvokeInst>(TI)) {
     // Invoke instructions successors are always executable.
     Succs[0] = Succs[1] = true;
     return;
   }
-  
+
   if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
-    if (TI.getNumSuccessors() < 2) {
+    if (!SI->getNumCases()) {
       Succs[0] = true;
       return;
     }
     LatticeVal SCValue = getValueState(SI->getCondition());
     ConstantInt *CI = SCValue.getConstantInt();
-    
+
     if (CI == 0) {   // Overdefined or undefined condition?
       // All destinations are executable!
       if (!SCValue.isUndefined())
         Succs.assign(TI.getNumSuccessors(), true);
       return;
     }
-      
-    Succs[SI->findCaseValue(CI)] = true;
+
+    Succs[SI->findCaseValue(CI).getSuccessorIndex()] = true;
     return;
   }
-  
+
   // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
   if (isa<IndirectBrInst>(&TI)) {
     // Just mark all destinations executable!
     Succs.assign(TI.getNumSuccessors(), true);
     return;
   }
-  
+
 #ifndef NDEBUG
   dbgs() << "Unknown terminator instruction: " << TI << '\n';
 #endif
@@ -628,7 +596,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
   if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
     if (BI->isUnconditional())
       return true;
-    
+
     LatticeVal BCValue = getValueState(BI->getCondition());
 
     // Overdefined condition variables mean the branch could go either way,
@@ -636,40 +604,33 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
     ConstantInt *CI = BCValue.getConstantInt();
     if (CI == 0)
       return !BCValue.isUndefined();
-    
+
     // Constant condition variables mean the branch can only go a single way.
     return BI->getSuccessor(CI->isZero()) == To;
   }
-  
+
   // Invoke instructions successors are always executable.
   if (isa<InvokeInst>(TI))
     return true;
-  
+
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
-    if (SI->getNumSuccessors() < 2)
+    if (SI->getNumCases() < 1)
       return true;
 
     LatticeVal SCValue = getValueState(SI->getCondition());
     ConstantInt *CI = SCValue.getConstantInt();
-    
+
     if (CI == 0)
       return !SCValue.isUndefined();
 
-    // Make sure to skip the "default value" which isn't a value
-    for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i)
-      if (SI->getSuccessorValue(i) == CI) // Found the taken branch.
-        return SI->getSuccessor(i) == To;
-
-    // If the constant value is not equal to any of the branches, we must
-    // execute default branch.
-    return SI->getDefaultDest() == To;
+    return SI->findCaseValue(CI).getCaseSuccessor() == To;
   }
-  
+
   // Just mark all destinations executable!
   // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
   if (isa<IndirectBrInst>(TI))
     return true;
-  
+
 #ifndef NDEBUG
   dbgs() << "Unknown terminator instruction: " << *TI << '\n';
 #endif
@@ -699,30 +660,15 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
   // TODO: We could do a lot better than this if code actually uses this.
   if (PN.getType()->isStructTy())
     return markAnythingOverdefined(&PN);
-  
-  if (getValueState(&PN).isOverdefined()) {
-    // There may be instructions using this PHI node that are not overdefined
-    // themselves.  If so, make sure that they know that the PHI node operand
-    // changed.
-    typedef std::multimap<PHINode*, Instruction*>::iterator ItTy;
-    std::pair<ItTy, ItTy> Range = UsersOfOverdefinedPHIs.equal_range(&PN);
-    
-    if (Range.first == Range.second)
-      return;
-    
-    SmallVector<Instruction*, 16> Users;
-    for (ItTy I = Range.first, E = Range.second; I != E; ++I)
-      Users.push_back(I->second);
-    while (!Users.empty())
-      visit(Users.pop_back_val());
+
+  if (getValueState(&PN).isOverdefined())
     return;  // Quick exit
-  }
 
   // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
   // and slow us down a lot.  Just mark them overdefined.
   if (PN.getNumIncomingValues() > 64)
     return markOverdefined(&PN);
-  
+
   // Look at all of the executable operands of the PHI node.  If any of them
   // are overdefined, the PHI becomes overdefined as well.  If they are all
   // constant, and they agree with each other, the PHI becomes the identical
@@ -736,7 +682,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
 
     if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
       continue;
-    
+
     if (IV.isOverdefined())    // PHI node becomes overdefined!
       return markOverdefined(&PN);
 
@@ -744,11 +690,11 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
       OperandVal = IV.getConstant();
       continue;
     }
-    
+
     // There is already a reachable operand.  If we conflict with it,
     // then the PHI node becomes overdefined.  If we agree with it, we
     // can continue on.
-    
+
     // Check to see if there are two different constants merging, if so, the PHI
     // node is overdefined.
     if (IV.getConstant() != OperandVal)
@@ -772,7 +718,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
 
   Function *F = I.getParent()->getParent();
   Value *ResultOp = I.getOperand(0);
-  
+
   // If we are tracking the return value of this function, merge it in.
   if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
     DenseMap<Function*, LatticeVal>::iterator TFRVI =
@@ -782,7 +728,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
       return;
     }
   }
-  
+
   // Handle functions that return multiple values.
   if (!TrackedMultipleRetVals.empty()) {
     if (StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
@@ -790,7 +736,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
           mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
                        getStructValueState(ResultOp, i));
-    
+
   }
 }
 
@@ -811,7 +757,7 @@ void SCCPSolver::visitCastInst(CastInst &I) {
   if (OpSt.isOverdefined())          // Inherit overdefinedness of operand
     markOverdefined(&I);
   else if (OpSt.isConstant())        // Propagate constant value
-    markConstant(&I, ConstantExpr::getCast(I.getOpcode(), 
+    markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
                                            OpSt.getConstant(), I.getType()));
 }
 
@@ -821,7 +767,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
   // structs in structs.
   if (EVI.getType()->isStructTy())
     return markAnythingOverdefined(&EVI);
-    
+
   // If this is extracting from more than one level of struct, we don't know.
   if (EVI.getNumIndices() != 1)
     return markOverdefined(&EVI);
@@ -841,15 +787,15 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
   StructType *STy = dyn_cast<StructType>(IVI.getType());
   if (STy == 0)
     return markOverdefined(&IVI);
-  
+
   // If this has more than one index, we can't handle it, drive all results to
   // undef.
   if (IVI.getNumIndices() != 1)
     return markAnythingOverdefined(&IVI);
-  
+
   Value *Aggr = IVI.getAggregateOperand();
   unsigned Idx = *IVI.idx_begin();
-  
+
   // Compute the result based on what we're inserting.
   for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
     // This passes through all values that aren't the inserted element.
@@ -858,7 +804,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
       mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
       continue;
     }
-    
+
     Value *Val = IVI.getInsertedValueOperand();
     if (Val->getType()->isStructTy())
       // We don't track structs in structs.
@@ -875,25 +821,25 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
   // TODO: We could do a lot better than this if code actually uses this.
   if (I.getType()->isStructTy())
     return markAnythingOverdefined(&I);
-  
+
   LatticeVal CondValue = getValueState(I.getCondition());
   if (CondValue.isUndefined())
     return;
-  
+
   if (ConstantInt *CondCB = CondValue.getConstantInt()) {
     Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
     mergeInValue(&I, getValueState(OpVal));
     return;
   }
-  
+
   // Otherwise, the condition is overdefined or a constant we can't evaluate.
   // See if we can produce something better than overdefined based on the T/F
   // value.
   LatticeVal TVal = getValueState(I.getTrueValue());
   LatticeVal FVal = getValueState(I.getFalseValue());
-  
+
   // select ?, C, C -> C.
-  if (TVal.isConstant() && FVal.isConstant() && 
+  if (TVal.isConstant() && FVal.isConstant() &&
       TVal.getConstant() == FVal.getConstant())
     return markConstant(&I, FVal.getConstant());
 
@@ -908,7 +854,7 @@ void SCCPSolver::visitSelectInst(SelectInst &I) {
 void SCCPSolver::visitBinaryOperator(Instruction &I) {
   LatticeVal V1State = getValueState(I.getOperand(0));
   LatticeVal V2State = getValueState(I.getOperand(1));
-  
+
   LatticeVal &IV = ValueState[&I];
   if (IV.isOverdefined()) return;
 
@@ -916,14 +862,14 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
     return markConstant(IV, &I,
                         ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
                                           V2State.getConstant()));
-  
+
   // If something is undef, wait for it to resolve.
   if (!V1State.isOverdefined() && !V2State.isOverdefined())
     return;
-  
+
   // Otherwise, one of our operands is overdefined.  Try to produce something
   // better than overdefined with some tricks.
-  
+
   // If this is an AND or OR with 0 or -1, it doesn't matter that the other
   // operand is overdefined.
   if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
@@ -945,7 +891,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
                        Constant::getAllOnesValue(I.getType()));
         return;
       }
-      
+
       if (I.getOpcode() == Instruction::And) {
         // X and 0 = 0
         if (NonOverdefVal->getConstant()->isNullValue())
@@ -959,64 +905,6 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
   }
 
 
-  // If both operands are PHI nodes, it is possible that this instruction has
-  // a constant value, despite the fact that the PHI node doesn't.  Check for
-  // this condition now.
-  if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
-    if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
-      if (PN1->getParent() == PN2->getParent()) {
-        // Since the two PHI nodes are in the same basic block, they must have
-        // entries for the same predecessors.  Walk the predecessor list, and
-        // if all of the incoming values are constants, and the result of
-        // evaluating this expression with all incoming value pairs is the
-        // same, then this expression is a constant even though the PHI node
-        // is not a constant!
-        LatticeVal Result;
-        for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
-          LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
-          BasicBlock *InBlock = PN1->getIncomingBlock(i);
-          LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
-
-          if (In1.isOverdefined() || In2.isOverdefined()) {
-            Result.markOverdefined();
-            break;  // Cannot fold this operation over the PHI nodes!
-          }
-          
-          if (In1.isConstant() && In2.isConstant()) {
-            Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(),
-                                            In2.getConstant());
-            if (Result.isUndefined())
-              Result.markConstant(V);
-            else if (Result.isConstant() && Result.getConstant() != V) {
-              Result.markOverdefined();
-              break;
-            }
-          }
-        }
-
-        // If we found a constant value here, then we know the instruction is
-        // constant despite the fact that the PHI nodes are overdefined.
-        if (Result.isConstant()) {
-          markConstant(IV, &I, Result.getConstant());
-          // Remember that this instruction is virtually using the PHI node
-          // operands. 
-          InsertInOverdefinedPHIs(&I, PN1);
-          InsertInOverdefinedPHIs(&I, PN2);
-          return;
-        }
-        
-        if (Result.isUndefined())
-          return;
-
-        // Okay, this really is overdefined now.  Since we might have
-        // speculatively thought that this was not overdefined before, and
-        // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
-        // make sure to clean out any entries that we put there, for
-        // efficiency.
-        RemoveFromOverdefinedPHIs(&I, PN1);
-        RemoveFromOverdefinedPHIs(&I, PN2);
-      }
-
   markOverdefined(&I);
 }
 
@@ -1029,75 +917,13 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
   if (IV.isOverdefined()) return;
 
   if (V1State.isConstant() && V2State.isConstant())
-    return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), 
-                                                         V1State.getConstant(), 
+    return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
+                                                         V1State.getConstant(),
                                                         V2State.getConstant()));
-  
+
   // If operands are still undefined, wait for it to resolve.
   if (!V1State.isOverdefined() && !V2State.isOverdefined())
     return;
-  
-  // If something is overdefined, use some tricks to avoid ending up and over
-  // defined if we can.
-  
-  // If both operands are PHI nodes, it is possible that this instruction has
-  // a constant value, despite the fact that the PHI node doesn't.  Check for
-  // this condition now.
-  if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
-    if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
-      if (PN1->getParent() == PN2->getParent()) {
-        // Since the two PHI nodes are in the same basic block, they must have
-        // entries for the same predecessors.  Walk the predecessor list, and
-        // if all of the incoming values are constants, and the result of
-        // evaluating this expression with all incoming value pairs is the
-        // same, then this expression is a constant even though the PHI node
-        // is not a constant!
-        LatticeVal Result;
-        for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
-          LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
-          BasicBlock *InBlock = PN1->getIncomingBlock(i);
-          LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
-
-          if (In1.isOverdefined() || In2.isOverdefined()) {
-            Result.markOverdefined();
-            break;  // Cannot fold this operation over the PHI nodes!
-          }
-          
-          if (In1.isConstant() && In2.isConstant()) {
-            Constant *V = ConstantExpr::getCompare(I.getPredicate(), 
-                                                   In1.getConstant(), 
-                                                   In2.getConstant());
-            if (Result.isUndefined())
-              Result.markConstant(V);
-            else if (Result.isConstant() && Result.getConstant() != V) {
-              Result.markOverdefined();
-              break;
-            }
-          }
-        }
-
-        // If we found a constant value here, then we know the instruction is
-        // constant despite the fact that the PHI nodes are overdefined.
-        if (Result.isConstant()) {
-          markConstant(&I, Result.getConstant());
-          // Remember that this instruction is virtually using the PHI node
-          // operands.
-          InsertInOverdefinedPHIs(&I, PN1);
-          InsertInOverdefinedPHIs(&I, PN2);
-          return;
-        }
-        
-        if (Result.isUndefined())
-          return;
-
-        // Okay, this really is overdefined now.  Since we might have
-        // speculatively thought that this was not overdefined before, and
-        // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
-        // make sure to clean out any entries that we put there, for
-        // efficiency.
-        RemoveFromOverdefinedPHIs(&I, PN1);
-        RemoveFromOverdefinedPHIs(&I, PN2);
-      }
 
   markOverdefined(&I);
 }
@@ -1135,7 +961,7 @@ void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
                                                     EltState.getConstant(),
                                                     IdxState.getConstant()));
   else if (ValState.isUndefined() && EltState.isConstant() &&
-           IdxState.isConstant()) 
+           IdxState.isConstant())
     markConstant(&I,ConstantExpr::getInsertElement(UndefValue::get(I.getType()),
                                                    EltState.getConstant(),
                                                    IdxState.getConstant()));
@@ -1153,17 +979,17 @@ void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
   if (MaskState.isUndefined() ||
       (V1State.isUndefined() && V2State.isUndefined()))
     return;  // Undefined output if mask or both inputs undefined.
-  
+
   if (V1State.isOverdefined() || V2State.isOverdefined() ||
       MaskState.isOverdefined()) {
     markOverdefined(&I);
   } else {
     // A mix of constant/undef inputs.
-    Constant *V1 = V1State.isConstant() ? 
+    Constant *V1 = V1State.isConstant() ?
         V1State.getConstant() : UndefValue::get(I.getType());
-    Constant *V2 = V2State.isConstant() ? 
+    Constant *V2 = V2State.isConstant() ?
         V2State.getConstant() : UndefValue::get(I.getType());
-    Constant *Mask = MaskState.isConstant() ? 
+    Constant *Mask = MaskState.isConstant() ?
       MaskState.getConstant() : UndefValue::get(I.getOperand(2)->getType());
     markConstant(&I, ConstantExpr::getShuffleVector(V1, V2, Mask));
   }
@@ -1183,7 +1009,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
     LatticeVal State = getValueState(I.getOperand(i));
     if (State.isUndefined())
       return;  // Operands are not resolved yet.
-    
+
     if (State.isOverdefined())
       return markOverdefined(&I);
 
@@ -1200,10 +1026,10 @@ void SCCPSolver::visitStoreInst(StoreInst &SI) {
   // If this store is of a struct, ignore it.
   if (SI.getOperand(0)->getType()->isStructTy())
     return;
-  
+
   if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
     return;
-  
+
   GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
   DenseMap<GlobalVariable*, LatticeVal>::iterator I = TrackedGlobals.find(GV);
   if (I == TrackedGlobals.end() || I->second.isOverdefined()) return;
@@ -1221,22 +1047,22 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
   // If this load is of a struct, just mark the result overdefined.
   if (I.getType()->isStructTy())
     return markAnythingOverdefined(&I);
-  
+
   LatticeVal PtrVal = getValueState(I.getOperand(0));
   if (PtrVal.isUndefined()) return;   // The pointer is not resolved yet!
-  
+
   LatticeVal &IV = ValueState[&I];
   if (IV.isOverdefined()) return;
 
   if (!PtrVal.isConstant() || I.isVolatile())
     return markOverdefined(IV, &I);
-    
+
   Constant *Ptr = PtrVal.getConstant();
 
   // load null -> null
   if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
     return markConstant(IV, &I, Constant::getNullValue(I.getType()));
-  
+
   // Transform load (constant global) into the value loaded.
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
     if (!TrackedGlobals.empty()) {
@@ -1262,7 +1088,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
 void SCCPSolver::visitCallSite(CallSite CS) {
   Function *F = CS.getCalledFunction();
   Instruction *I = CS.getInstruction();
-  
+
   // The common case is that we aren't tracking the callee, either because we
   // are not doing interprocedural analysis or the callee is indirect, or is
   // external.  Handle these cases first.
@@ -1270,17 +1096,17 @@ void SCCPSolver::visitCallSite(CallSite CS) {
 CallOverdefined:
     // Void return and not tracking callee, just bail.
     if (I->getType()->isVoidTy()) return;
-    
+
     // Otherwise, if we have a single return value case, and if the function is
     // a declaration, maybe we can constant fold it.
     if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
         canConstantFoldCallTo(F)) {
-      
+
       SmallVector<Constant*, 8> Operands;
       for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
            AI != E; ++AI) {
         LatticeVal State = getValueState(*AI);
-        
+
         if (State.isUndefined())
           return;  // Operands are not resolved yet.
         if (State.isOverdefined())
@@ -1288,10 +1114,10 @@ CallOverdefined:
         assert(State.isConstant() && "Unknown state!");
         Operands.push_back(State.getConstant());
       }
-     
+
       // If we can constant fold this, mark the result of the call as a
       // constant.
-      if (Constant *C = ConstantFoldCall(F, Operands))
+      if (Constant *C = ConstantFoldCall(F, Operands, TLI))
         return markConstant(I, C);
     }
 
@@ -1304,7 +1130,7 @@ CallOverdefined:
   // the formal arguments of the function.
   if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
     MarkBlockExecutable(F->begin());
-    
+
     // Propagate information from this call site into the callee.
     CallSite::arg_iterator CAI = CS.arg_begin();
     for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
@@ -1315,7 +1141,7 @@ CallOverdefined:
         markOverdefined(AI);
         continue;
       }
-      
+
       if (StructType *STy = dyn_cast<StructType>(AI->getType())) {
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
           LatticeVal CallArg = getStructValueState(*CAI, i);
@@ -1326,22 +1152,22 @@ CallOverdefined:
       }
     }
   }
-  
+
   // If this is a single/zero retval case, see if we're tracking the function.
   if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
     if (!MRVFunctionsTracked.count(F))
       goto CallOverdefined;  // Not tracking this callee.
-    
+
     // If we are tracking this callee, propagate the result of the function
     // into this call site.
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
-      mergeInValue(getStructValueState(I, i), I, 
+      mergeInValue(getStructValueState(I, i), I,
                    TrackedMultipleRetVals[std::make_pair(F, i)]);
   } else {
     DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
     if (TFRVI == TrackedRetVals.end())
       goto CallOverdefined;  // Not tracking this callee.
-      
+
     // If so, propagate the return value of the callee into this call result.
     mergeInValue(I, TFRVI->second);
   }
@@ -1370,7 +1196,7 @@ void SCCPSolver::Solve() {
         if (Instruction *I = dyn_cast<Instruction>(*UI))
           OperandChangedState(I);
     }
-    
+
     // Process the instruction work list.
     while (!InstWorkList.empty()) {
       Value *I = InstWorkList.pop_back_val();
@@ -1427,11 +1253,11 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
     if (!BBExecutable.count(BB))
       continue;
-    
+
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
       // Look for instructions which produce undef values.
       if (I->getType()->isVoidTy()) continue;
-      
+
       if (StructType *STy = dyn_cast<StructType>(I->getType())) {
         // Only a few things that can be structs matter for undef.
 
@@ -1442,7 +1268,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
               continue;
 
         // extractvalue and insertvalue don't need to be marked; they are
-        // tracked as precisely as their operands. 
+        // tracked as precisely as their operands.
         if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
           continue;
 
@@ -1549,12 +1375,12 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // X / undef -> undef.  No change.
         // X % undef -> undef.  No change.
         if (Op1LV.isUndefined()) break;
-        
+
         // undef / X -> 0.   X could be maxint.
         // undef % X -> 0.   X could be 1.
         markForcedConstant(I, Constant::getNullValue(ITy));
         return true;
-        
+
       case Instruction::AShr:
         // X >>a undef -> undef.
         if (Op1LV.isUndefined()) break;
@@ -1587,7 +1413,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         } else {
           // Leave Op1LV as Operand(1)'s LatticeValue.
         }
-        
+
         if (Op1LV.isConstant())
           markForcedConstant(I, Op1LV.getConstant());
         else
@@ -1627,7 +1453,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         return true;
       }
     }
-  
+
     // Check to see if we have a branch or switch on an undefined value.  If so
     // we force the branch to go one way or the other to make the successor
     // values live.  It doesn't really matter which way we force it.
@@ -1636,7 +1462,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       if (!BI->isConditional()) continue;
       if (!getValueState(BI->getCondition()).isUndefined())
         continue;
-    
+
       // If the input to SCCP is actually branch on undef, fix the undef to
       // false.
       if (isa<UndefValue>(BI->getCondition())) {
@@ -1644,7 +1470,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         markEdgeExecutable(BB, TI->getSuccessor(1));
         return true;
       }
-      
+
       // Otherwise, it is a branch on a symbolic value which is currently
       // considered to be undef.  Handle this by forcing the input value to the
       // branch to false.
@@ -1652,22 +1478,22 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
                          ConstantInt::getFalse(TI->getContext()));
       return true;
     }
-    
+
     if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
-      if (SI->getNumSuccessors() < 2)   // no cases
+      if (!SI->getNumCases())
         continue;
       if (!getValueState(SI->getCondition()).isUndefined())
         continue;
-      
+
       // If the input to SCCP is actually switch on undef, fix the undef to
       // the first constant.
       if (isa<UndefValue>(SI->getCondition())) {
-        SI->setCondition(SI->getCaseValue(1));
-        markEdgeExecutable(BB, TI->getSuccessor(1));
+        SI->setCondition(SI->case_begin().getCaseValue());
+        markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor());
         return true;
       }
-      
-      markForcedConstant(SI->getCondition(), SI->getCaseValue(1));
+
+      markForcedConstant(SI->getCondition(), SI->case_begin().getCaseValue());
       return true;
     }
   }
@@ -1683,6 +1509,9 @@ namespace {
   /// Sparse Conditional Constant Propagator.
   ///
   struct SCCP : public FunctionPass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetLibraryInfo>();
+    }
     static char ID; // Pass identification, replacement for typeid
     SCCP() : FunctionPass(ID) {
       initializeSCCPPass(*PassRegistry::getPassRegistry());
@@ -1735,7 +1564,9 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
 //
 bool SCCP::runOnFunction(Function &F) {
   DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
-  SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  SCCPSolver Solver(TD, TLI);
 
   // Mark the first block of the function as being executable.
   Solver.MarkBlockExecutable(F.begin());
@@ -1764,7 +1595,7 @@ bool SCCP::runOnFunction(Function &F) {
       MadeChanges = true;
       continue;
     }
-  
+
     // Iterate over all of the instructions in a function, replacing them with
     // constants if we have found them to be of constant values.
     //
@@ -1772,25 +1603,25 @@ bool SCCP::runOnFunction(Function &F) {
       Instruction *Inst = BI++;
       if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
         continue;
-      
+
       // TODO: Reconstruct structs from their elements.
       if (Inst->getType()->isStructTy())
         continue;
-      
+
       LatticeVal IV = Solver.getLatticeValueFor(Inst);
       if (IV.isOverdefined())
         continue;
-      
+
       Constant *Const = IV.isConstant()
         ? IV.getConstant() : UndefValue::get(Inst->getType());
       DEBUG(dbgs() << "  Constant: " << *Const << " = " << *Inst);
 
       // Replaces all of the uses of a variable with uses of the constant.
       Inst->replaceAllUsesWith(Const);
-      
+
       // Delete the instruction.
       Inst->eraseFromParent();
-      
+
       // Hey, we just changed something!
       MadeChanges = true;
       ++NumInstRemoved;
@@ -1807,6 +1638,9 @@ namespace {
   /// Constant Propagation.
   ///
   struct IPSCCP : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetLibraryInfo>();
+    }
     static char ID;
     IPSCCP() : ModulePass(ID) {
       initializeIPSCCPPass(*PassRegistry::getPassRegistry());
@@ -1816,7 +1650,11 @@ namespace {
 } // end anonymous namespace
 
 char IPSCCP::ID = 0;
-INITIALIZE_PASS(IPSCCP, "ipsccp",
+INITIALIZE_PASS_BEGIN(IPSCCP, "ipsccp",
+                "Interprocedural Sparse Conditional Constant Propagation",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(IPSCCP, "ipsccp",
                 "Interprocedural Sparse Conditional Constant Propagation",
                 false, false)
 
@@ -1855,7 +1693,9 @@ static bool AddressIsTaken(const GlobalValue *GV) {
 }
 
 bool IPSCCP::runOnModule(Module &M) {
-  SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
+  SCCPSolver Solver(TD, TLI);
 
   // AddressTakenFunctions - This set keeps track of the address-taken functions
   // that are in the input.  As IPSCCP runs through and simplifies code,
@@ -1863,19 +1703,19 @@ bool IPSCCP::runOnModule(Module &M) {
   // address-taken-ness.  Because of this, we keep track of their addresses from
   // the first pass so we can use them for the later simplification pass.
   SmallPtrSet<Function*, 32> AddressTakenFunctions;
-  
+
   // Loop over all functions, marking arguments to those with their addresses
   // taken or that are external as overdefined.
   //
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
     if (F->isDeclaration())
       continue;
-    
+
     // If this is a strong or ODR definition of this function, then we can
     // propagate information about its result into callsites of it.
     if (!F->mayBeOverridden())
       Solver.AddTrackedFunction(F);
-    
+
     // If this function only has direct calls that we can see, we can track its
     // arguments and return value aggressively, and can assume it is not called
     // unless we see evidence to the contrary.
@@ -1890,7 +1730,7 @@ bool IPSCCP::runOnModule(Module &M) {
 
     // Assume the function is called.
     Solver.MarkBlockExecutable(F->begin());
-    
+
     // Assume nothing about the incoming arguments.
     for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
          AI != E; ++AI)
@@ -1928,17 +1768,17 @@ bool IPSCCP::runOnModule(Module &M) {
       for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
            AI != E; ++AI) {
         if (AI->use_empty() || AI->getType()->isStructTy()) continue;
-        
+
         // TODO: Could use getStructLatticeValueFor to find out if the entire
         // result is a constant and replace it entirely if so.
 
         LatticeVal IV = Solver.getLatticeValueFor(AI);
         if (IV.isOverdefined()) continue;
-        
+
         Constant *CST = IV.isConstant() ?
         IV.getConstant() : UndefValue::get(AI->getType());
         DEBUG(dbgs() << "***  Arg " << *AI << " = " << *CST <<"\n");
-        
+
         // Replaces all of the uses of a variable with uses of the
         // constant.
         AI->replaceAllUsesWith(CST);
@@ -1967,19 +1807,19 @@ bool IPSCCP::runOnModule(Module &M) {
           new UnreachableInst(M.getContext(), BB);
         continue;
       }
-      
+
       for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
         Instruction *Inst = BI++;
         if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
           continue;
-        
+
         // TODO: Could use getStructLatticeValueFor to find out if the entire
         // result is a constant and replace it entirely if so.
-        
+
         LatticeVal IV = Solver.getLatticeValueFor(Inst);
         if (IV.isOverdefined())
           continue;
-        
+
         Constant *Const = IV.isConstant()
           ? IV.getConstant() : UndefValue::get(Inst->getType());
         DEBUG(dbgs() << "  Constant: " << *Const << " = " << *Inst);
@@ -1987,7 +1827,7 @@ bool IPSCCP::runOnModule(Module &M) {
         // Replaces all of the uses of a variable with uses of the
         // constant.
         Inst->replaceAllUsesWith(Const);
-        
+
         // Delete the instruction.
         if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
           Inst->eraseFromParent();
@@ -2029,15 +1869,15 @@ bool IPSCCP::runOnModule(Module &M) {
             llvm_unreachable("Didn't fold away reference to block!");
           }
 #endif
-          
+
           // Make this an uncond branch to the first successor.
           TerminatorInst *TI = I->getParent()->getTerminator();
           BranchInst::Create(TI->getSuccessor(0), TI);
-          
+
           // Remove entries in successor phi nodes to remove edges.
           for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
             TI->getSuccessor(i)->removePredecessor(TI->getParent());
-          
+
           // Remove the old terminator.
           TI->eraseFromParent();
         }
@@ -2060,7 +1900,7 @@ bool IPSCCP::runOnModule(Module &M) {
   // last use of a function, the order of processing functions would affect
   // whether other functions are optimizable.
   SmallVector<ReturnInst*, 8> ReturnsToZap;
-  
+
   // TODO: Process multiple value ret instructions also.
   const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
   for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
@@ -2068,11 +1908,11 @@ bool IPSCCP::runOnModule(Module &M) {
     Function *F = I->first;
     if (I->second.isOverdefined() || F->getReturnType()->isVoidTy())
       continue;
-  
+
     // We can only do this if we know that nothing else can call the function.
     if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
       continue;
-    
+
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
         if (!isa<UndefValue>(RI->getOperand(0)))
@@ -2084,9 +1924,9 @@ bool IPSCCP::runOnModule(Module &M) {
     Function *F = ReturnsToZap[i]->getParent()->getParent();
     ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
   }
-    
-  // If we inferred constant or undef values for globals variables, we can delete
-  // the global and any stores that remain to it.
+
+  // If we inferred constant or undef values for globals variables, we can
+  // delete the global and any stores that remain to it.
   const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
   for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
          E = TG.end(); I != E; ++I) {
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index f6918deafebd..7d65bcc064e1 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -51,6 +51,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
   initializeObjCARCAliasAnalysisPass(Registry);
+  initializeObjCARCAPElimPass(Registry);
   initializeObjCARCExpandPass(Registry);
   initializeObjCARCContractPass(Registry);
   initializeObjCARCOptPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index c6d9123d6611..026fea117b20 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -13,7 +13,7 @@
 // each member (if possible).  Then, if possible, it transforms the individual
 // alloca instructions into nice clean scalar SSA form.
 //
-// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
+// This combines a simple SRoA algorithm with the Mem2Reg algorithm because they
 // often interact, especially for C++ programs.  As such, iterating between
 // SRoA, then Mem2Reg until we run out of things to promote works well.
 //
@@ -453,6 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
 
       // Compute the offset that this GEP adds to the pointer.
       SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+      if (!GEP->getPointerOperandType()->isPointerTy())
+        return false;
       uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
                                                Indices);
       // See if all uses can be converted.
@@ -572,8 +574,9 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
     // transform it into a store of the expanded constant value.
     if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
       assert(MSI->getRawDest() == Ptr && "Consistency error!");
-      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
-      if (NumBytes != 0) {
+      int64_t SNumBytes = cast<ConstantInt>(MSI->getLength())->getSExtValue();
+      if (SNumBytes > 0 && (SNumBytes >> 32) == 0) {
+        unsigned NumBytes = static_cast<unsigned>(SNumBytes);
         unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
 
         // Compute the value replicated the right number of times.
@@ -806,8 +809,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
         return Builder.CreateBitCast(SV, AllocaType);
 
     // Must be an element insertion.
-    assert(SV->getType() == VTy->getElementType());
-    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+    Type *EltTy = VTy->getElementType();
+    if (SV->getType() != EltTy)
+      SV = Builder.CreateBitCast(SV, EltTy);
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy);
     unsigned Elt = Offset/EltSize;
     return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
   }
@@ -934,13 +939,14 @@ public:
   void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
     // Remember which alloca we're promoting (for isInstInList).
     this->AI = AI;
-    if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI))
+    if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) {
       for (Value::use_iterator UI = DebugNode->use_begin(),
              E = DebugNode->use_end(); UI != E; ++UI)
         if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
           DDIs.push_back(DDI);
         else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
           DVIs.push_back(DVI);
+    }
 
     LoadAndStorePromoter::run(Insts);
     AI->eraseFromParent();
@@ -975,30 +981,25 @@ public:
     for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), 
            E = DVIs.end(); I != E; ++I) {
       DbgValueInst *DVI = *I;
+      Value *Arg = NULL;
       if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-        Instruction *DbgVal = NULL;
         // If an argument is zero extended then use argument directly. The ZExt
         // may be zapped by an optimization pass in future.
-        Argument *ExtendedArg = NULL;
         if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
-          ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+          Arg = dyn_cast<Argument>(ZExt->getOperand(0));
         if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
-          ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
-        if (ExtendedArg)
-          DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, 
-                                                DIVariable(DVI->getVariable()),
-                                                SI);
-        else
-          DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, 
-                                                DIVariable(DVI->getVariable()),
-                                                SI);
-        DbgVal->setDebugLoc(DVI->getDebugLoc());
+          Arg = dyn_cast<Argument>(SExt->getOperand(0));
+        if (!Arg)
+          Arg = SI->getOperand(0);
       } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-        Instruction *DbgVal = 
-          DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, 
-                                       DIVariable(DVI->getVariable()), LI);
-        DbgVal->setDebugLoc(DVI->getDebugLoc());
+        Arg = LI->getOperand(0);
+      } else {
+        continue;
       }
+      Instruction *DbgVal =
+        DIB->insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()),
+                                     Inst);
+      DbgVal->setDebugLoc(DVI->getDebugLoc());
     }
   }
 };
@@ -1517,6 +1518,9 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
       ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
       if (Length == 0)
         return MarkUnsafe(Info, User);
+      if (Length->isNegative())
+        return MarkUnsafe(Info, User);
+
       isSafeMemAccess(Offset, Length->getZExtValue(), 0,
                       UI.getOperandNo() == 0, Info, MI,
                       true /*AllowWholeAccess*/);
@@ -1873,8 +1877,14 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
     return;
 
   // The bitcast references the original alloca.  Replace its uses with
-  // references to the first new element alloca.
-  Instruction *Val = NewElts[0];
+  // references to the alloca containing offset zero (which is normally at
+  // index zero, but might not be in cases involving structs with elements
+  // of size zero).
+  Type *T = AI->getAllocatedType();
+  uint64_t EltOffset = 0;
+  Type *IdxTy;
+  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+  Instruction *Val = NewElts[Idx];
   if (Val->getType() != BC->getDestTy()) {
     Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
     Val->takeName(BC);
@@ -2146,8 +2156,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
           // If the requested value was a vector constant, create it.
           if (EltTy->isVectorTy()) {
             unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
-            SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
-            StoreVal = ConstantVector::get(Elts);
+            StoreVal = ConstantVector::getSplat(NumElts, StoreVal);
           }
         }
         new StoreInst(StoreVal, EltPtr, MI);
@@ -2158,6 +2167,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
     }
 
     unsigned EltSize = TD->getTypeAllocSize(EltTy);
+    if (!EltSize)
+      continue;
 
     IRBuilder<> Builder(MI);
 
@@ -2524,13 +2535,12 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
       // ignore it if we know that the value isn't captured.
       unsigned ArgNo = CS.getArgumentNo(UI);
       if (CS.onlyReadsMemory() &&
-          (CS.getInstruction()->use_empty() ||
-           CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)))
+          (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
         continue;
 
       // If this is being passed as a byval argument, the caller is making a
       // copy, so it is only a read of the alloca.
-      if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
+      if (CS.isByValArgument(ArgNo))
         continue;
     }
 
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index fbb9465743ce..9c49ec1c84d2 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -256,19 +256,18 @@ struct StrChrOpt : public LibCallOptimization {
                         ConstantInt::get(TD->getIntPtrType(*Context), Len),
                         B, TD);
     }
-
+    
     // Otherwise, the character is a constant, see if the first argument is
     // a string literal.  If so, we can constant fold.
-    std::string Str;
-    if (!GetConstantStringInfo(SrcStr, Str))
+    StringRef Str;
+    if (!getConstantStringInfo(SrcStr, Str))
       return 0;
 
-    // strchr can find the nul character.
-    Str += '\0';
-
-    // Compute the offset.
-    size_t I = Str.find(CharC->getSExtValue());
-    if (I == std::string::npos) // Didn't find the char.  strchr returns null.
+    // Compute the offset, make sure to handle the case when we're searching for
+    // zero (a weird way to spell strlen).
+    size_t I = CharC->getSExtValue() == 0 ?
+        Str.size() : Str.find(CharC->getSExtValue());
+    if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
       return Constant::getNullValue(CI->getType());
 
     // strchr(s+n,c)  -> gep(s+n+i,c)
@@ -296,20 +295,18 @@ struct StrRChrOpt : public LibCallOptimization {
     if (!CharC)
       return 0;
 
-    std::string Str;
-    if (!GetConstantStringInfo(SrcStr, Str)) {
+    StringRef Str;
+    if (!getConstantStringInfo(SrcStr, Str)) {
       // strrchr(s, 0) -> strchr(s, 0)
       if (TD && CharC->isZero())
         return EmitStrChr(SrcStr, '\0', B, TD);
       return 0;
     }
 
-    // strrchr can find the nul character.
-    Str += '\0';
-
     // Compute the offset.
-    size_t I = Str.rfind(CharC->getSExtValue());
-    if (I == std::string::npos) // Didn't find the char. Return null.
+    size_t I = CharC->getSExtValue() == 0 ?
+        Str.size() : Str.rfind(CharC->getSExtValue());
+    if (I == StringRef::npos) // Didn't find the char. Return null.
       return Constant::getNullValue(CI->getType());
 
     // strrchr(s+n,c) -> gep(s+n+i,c)
@@ -334,14 +331,13 @@ struct StrCmpOpt : public LibCallOptimization {
     if (Str1P == Str2P)      // strcmp(x,x)  -> 0
       return ConstantInt::get(CI->getType(), 0);
 
-    std::string Str1, Str2;
-    bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
-    bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+    StringRef Str1, Str2;
+    bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+    bool HasStr2 = getConstantStringInfo(Str2P, Str2);
 
     // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
     if (HasStr1 && HasStr2)
-      return ConstantInt::get(CI->getType(),
-                              StringRef(Str1).compare(Str2));
+      return ConstantInt::get(CI->getType(), Str1.compare(Str2));
 
     if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
       return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
@@ -397,14 +393,14 @@ struct StrNCmpOpt : public LibCallOptimization {
     if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
       return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
 
-    std::string Str1, Str2;
-    bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
-    bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+    StringRef Str1, Str2;
+    bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+    bool HasStr2 = getConstantStringInfo(Str2P, Str2);
 
     // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
     if (HasStr1 && HasStr2) {
-      StringRef SubStr1 = StringRef(Str1).substr(0, Length);
-      StringRef SubStr2 = StringRef(Str2).substr(0, Length);
+      StringRef SubStr1 = Str1.substr(0, Length);
+      StringRef SubStr2 = Str2.substr(0, Length);
       return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
     }
 
@@ -549,9 +545,9 @@ struct StrPBrkOpt : public LibCallOptimization {
         FT->getReturnType() != FT->getParamType(0))
       return 0;
 
-    std::string S1, S2;
-    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
-    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+    StringRef S1, S2;
+    bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
 
     // strpbrk(s, "") -> NULL
     // strpbrk("", s) -> NULL
@@ -609,9 +605,9 @@ struct StrSpnOpt : public LibCallOptimization {
         !FT->getReturnType()->isIntegerTy())
       return 0;
 
-    std::string S1, S2;
-    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
-    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+    StringRef S1, S2;
+    bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
 
     // strspn(s, "") -> 0
     // strspn("", s) -> 0
@@ -619,8 +615,11 @@ struct StrSpnOpt : public LibCallOptimization {
       return Constant::getNullValue(CI->getType());
 
     // Constant folding.
-    if (HasS1 && HasS2)
-      return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str()));
+    if (HasS1 && HasS2) {
+      size_t Pos = S1.find_first_not_of(S2);
+      if (Pos == StringRef::npos) Pos = S1.size();
+      return ConstantInt::get(CI->getType(), Pos);
+    }
 
     return 0;
   }
@@ -638,17 +637,20 @@ struct StrCSpnOpt : public LibCallOptimization {
         !FT->getReturnType()->isIntegerTy())
       return 0;
 
-    std::string S1, S2;
-    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
-    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+    StringRef S1, S2;
+    bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = getConstantStringInfo(CI->getArgOperand(1), S2);
 
     // strcspn("", s) -> 0
     if (HasS1 && S1.empty())
       return Constant::getNullValue(CI->getType());
 
     // Constant folding.
-    if (HasS1 && HasS2)
-      return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str()));
+    if (HasS1 && HasS2) {
+      size_t Pos = S1.find_first_of(S2);
+      if (Pos == StringRef::npos) Pos = S1.size();
+      return ConstantInt::get(CI->getType(), Pos);
+    }
 
     // strcspn(s, "") -> strlen(s)
     if (TD && HasS2 && S2.empty())
@@ -692,9 +694,9 @@ struct StrStrOpt : public LibCallOptimization {
     }
 
     // See if either input string is a constant string.
-    std::string SearchStr, ToFindStr;
-    bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr);
-    bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+    StringRef SearchStr, ToFindStr;
+    bool HasStr1 = getConstantStringInfo(CI->getArgOperand(0), SearchStr);
+    bool HasStr2 = getConstantStringInfo(CI->getArgOperand(1), ToFindStr);
 
     // fold strstr(x, "") -> x.
     if (HasStr2 && ToFindStr.empty())
@@ -704,7 +706,7 @@ struct StrStrOpt : public LibCallOptimization {
     if (HasStr1 && HasStr2) {
       std::string::size_type Offset = SearchStr.find(ToFindStr);
 
-      if (Offset == std::string::npos) // strstr("foo", "bar") -> null
+      if (Offset == StringRef::npos) // strstr("foo", "bar") -> null
         return Constant::getNullValue(CI->getType());
 
       // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
@@ -756,11 +758,11 @@ struct MemCmpOpt : public LibCallOptimization {
     }
 
     // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
-    std::string LHSStr, RHSStr;
-    if (GetConstantStringInfo(LHS, LHSStr) &&
-        GetConstantStringInfo(RHS, RHSStr)) {
+    StringRef LHSStr, RHSStr;
+    if (getConstantStringInfo(LHS, LHSStr) &&
+        getConstantStringInfo(RHS, RHSStr)) {
       // Make sure we're not reading out-of-bounds memory.
-      if (Len > LHSStr.length() || Len > RHSStr.length())
+      if (Len > LHSStr.size() || Len > RHSStr.size())
         return 0;
       uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
       return ConstantInt::get(CI->getType(), Ret);
@@ -841,6 +843,28 @@ struct MemSetOpt : public LibCallOptimization {
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------===//
+// 'cos*' Optimizations
+
+struct CosOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 1 argument of FP type, which matches the
+    // result type.
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return 0;
+
+    // cos(-x) -> cos(x)
+    Value *Op1 = CI->getArgOperand(0);
+    if (BinaryOperator::isFNeg(Op1)) {
+      BinaryOperator *BinExpr = cast<BinaryOperator>(Op1);
+      return B.CreateCall(Callee, BinExpr->getOperand(1), "cos");
+    }
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
 // 'pow*' Optimizations
 
 struct PowOpt : public LibCallOptimization {
@@ -870,7 +894,7 @@ struct PowOpt : public LibCallOptimization {
     if (Op2C->isExactlyValue(0.5)) {
       // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
       // This is faster than calling pow, and still handles negative zero
-      // and negative infinite correctly.
+      // and negative infinity correctly.
       // TODO: In fast-math mode, this could be just sqrt(x).
       // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
       Value *Inf = ConstantFP::getInfinity(CI->getType());
@@ -963,8 +987,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
 
     // floor((double)floatval) -> (double)floorf(floatval)
     Value *V = Cast->getOperand(0);
-    V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
-                             Callee->getAttributes());
+    V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes());
     return B.CreateFPExt(V, B.getDoubleTy());
   }
 };
@@ -1000,7 +1023,7 @@ struct FFSOpt : public LibCallOptimization {
     Type *ArgType = Op->getType();
     Value *F = Intrinsic::getDeclaration(Callee->getParent(),
                                          Intrinsic::cttz, ArgType);
-    Value *V = B.CreateCall(F, Op, "cttz");
+    Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
     V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
     V = B.CreateIntCast(V, B.getInt32Ty(), false);
 
@@ -1095,8 +1118,8 @@ struct PrintFOpt : public LibCallOptimization {
   Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
                                    IRBuilder<> &B) {
     // Check for a fixed format string.
-    std::string FormatStr;
-    if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
       return 0;
 
     // Empty format string -> noop.
@@ -1122,11 +1145,9 @@ struct PrintFOpt : public LibCallOptimization {
         FormatStr.find('%') == std::string::npos) {  // no format characters.
       // Create a string literal with no \n on it.  We expect the constant merge
       // pass to be run after this pass, to merge duplicate strings.
-      FormatStr.erase(FormatStr.end()-1);
-      Constant *C = ConstantArray::get(*Context, FormatStr, true);
-      C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
-                             GlobalVariable::InternalLinkage, C, "str");
-      EmitPutS(C, B, TD);
+      FormatStr = FormatStr.drop_back();
+      Value *GV = B.CreateGlobalString(FormatStr, "str");
+      EmitPutS(GV, B, TD);
       return CI->use_empty() ? (Value*)CI :
                     ConstantInt::get(CI->getType(), FormatStr.size()+1);
     }
@@ -1184,8 +1205,8 @@ struct SPrintFOpt : public LibCallOptimization {
   Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
                                    IRBuilder<> &B) {
     // Check for a fixed format string.
-    std::string FormatStr;
-    if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
       return 0;
 
     // If we just have a format string (nothing else crazy) transform it.
@@ -1296,7 +1317,8 @@ struct FWriteOpt : public LibCallOptimization {
       return ConstantInt::get(CI->getType(), 0);
 
     // If this is writing one byte, turn it into fputc.
-    if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
+    // This optimisation is only valid, if the return value is unused.
+    if (Bytes == 1 && CI->use_empty()) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
       Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
       EmitFPutC(Char, CI->getArgOperand(3), B, TD);
       return ConstantInt::get(CI->getType(), 1);
@@ -1326,7 +1348,7 @@ struct FPutsOpt : public LibCallOptimization {
     if (!Len) return 0;
     EmitFWrite(CI->getArgOperand(0),
                ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
-               CI->getArgOperand(1), B, TD);
+               CI->getArgOperand(1), B, TD, TLI);
     return CI;  // Known to have no uses (see above).
   }
 };
@@ -1338,8 +1360,8 @@ struct FPrintFOpt : public LibCallOptimization {
   Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
                                    IRBuilder<> &B) {
     // All the optimizations depend on the format string.
-    std::string FormatStr;
-    if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
       return 0;
 
     // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
@@ -1354,7 +1376,7 @@ struct FPrintFOpt : public LibCallOptimization {
       EmitFWrite(CI->getArgOperand(1),
                  ConstantInt::get(TD->getIntPtrType(*Context),
                                   FormatStr.size()),
-                 CI->getArgOperand(0), B, TD);
+                 CI->getArgOperand(0), B, TD, TLI);
       return ConstantInt::get(CI->getType(), FormatStr.size());
     }
 
@@ -1376,7 +1398,7 @@ struct FPrintFOpt : public LibCallOptimization {
       // fprintf(F, "%s", str) --> fputs(str, F)
       if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
         return 0;
-      EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
+      EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
       return CI;
     }
     return 0;
@@ -1422,8 +1444,8 @@ struct PutsOpt : public LibCallOptimization {
       return 0;
 
     // Check for a constant string.
-    std::string Str;
-    if (!GetConstantStringInfo(CI->getArgOperand(0), Str))
+    StringRef Str;
+    if (!getConstantStringInfo(CI->getArgOperand(0), Str))
       return 0;
 
     if (Str.empty() && CI->use_empty()) {
@@ -1457,7 +1479,7 @@ namespace {
     StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
     MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
     // Math Library Optimizations
-    PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+    CosOpt Cos; PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
     // Integer Optimizations
     FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
     ToAsciiOpt ToAscii;
@@ -1472,6 +1494,7 @@ namespace {
     SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
       initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
     }
+    void AddOpt(LibFunc::Func F, LibCallOptimization* Opt);
     void InitOptimizations();
     bool runOnFunction(Function &F);
 
@@ -1502,6 +1525,11 @@ FunctionPass *llvm::createSimplifyLibCallsPass() {
   return new SimplifyLibCalls();
 }
 
+void SimplifyLibCalls::AddOpt(LibFunc::Func F, LibCallOptimization* Opt) {
+  if (TLI->has(F))
+    Optimizations[TLI->getName(F)] = Opt;
+}
+
 /// Optimizations - Populate the Optimizations map with all the optimizations
 /// we know.
 void SimplifyLibCalls::InitOptimizations() {
@@ -1527,14 +1555,17 @@ void SimplifyLibCalls::InitOptimizations() {
   Optimizations["strcspn"] = &StrCSpn;
   Optimizations["strstr"] = &StrStr;
   Optimizations["memcmp"] = &MemCmp;
-  if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy;
+  AddOpt(LibFunc::memcpy, &MemCpy);
   Optimizations["memmove"] = &MemMove;
-  if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet;
+  AddOpt(LibFunc::memset, &MemSet);
 
   // _chk variants of String and Memory LibCall Optimizations.
   Optimizations["__strcpy_chk"] = &StrCpyChk;
 
   // Math Library Optimizations
+  Optimizations["cosf"] = &Cos;
+  Optimizations["cos"] = &Cos;
+  Optimizations["cosl"] = &Cos;
   Optimizations["powf"] = &Pow;
   Optimizations["pow"] = &Pow;
   Optimizations["powl"] = &Pow;
@@ -1582,8 +1613,8 @@ void SimplifyLibCalls::InitOptimizations() {
   // Formatting and IO Optimizations
   Optimizations["sprintf"] = &SPrintF;
   Optimizations["printf"] = &PrintF;
-  Optimizations["fwrite"] = &FWrite;
-  Optimizations["fputs"] = &FPuts;
+  AddOpt(LibFunc::fwrite, &FWrite);
+  AddOpt(LibFunc::fputs, &FPuts);
   Optimizations["fprintf"] = &FPrintF;
   Optimizations["puts"] = &Puts;
 }
@@ -2348,9 +2379,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
 //   * cbrt(sqrt(x))  -> pow(x,1/6)
 //   * cbrt(sqrt(x))  -> pow(x,1/9)
 //
-// cos, cosf, cosl:
-//   * cos(-x)  -> cos(x)
-//
 // exp, expf, expl:
 //   * exp(log(x))  -> x
 //
@@ -2387,6 +2415,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
 //   * stpcpy(str, "literal") ->
 //           llvm.memcpy(str,"literal",strlen("literal")+1,1)
 //
+// strchr:
+//   * strchr(p, 0) -> strlen(p)
 // tan, tanf, tanl:
 //   * tan(atan(x)) -> x
 //
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index c83f56c4d2d7..ef65c0a3a907 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CFG.h"
@@ -240,7 +241,7 @@ bool Sinking::SinkInstruction(Instruction *Inst,
   if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
     // We cannot sink a load across a critical edge - there may be stores in
     // other code paths.
-    if (!Inst->isSafeToSpeculativelyExecute()) {
+    if (!isSafeToSpeculativelyExecute(Inst)) {
       DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
       return false;
     }
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 8e5a1eb2c843..d83145289ce2 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -473,14 +473,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
   // Check to see if this value is already used in the memory instruction's
   // block.  If so, it's already live into the block at the very least, so we
   // can reasonably fold it.
-  BasicBlock *MemBB = MemoryInst->getParent();
-  for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
-       UI != E; ++UI)
-    // We know that uses of arguments and instructions have to be instructions.
-    if (cast<Instruction>(*UI)->getParent() == MemBB)
-      return true;
-  
-  return false;
+  return Val->isUsedInBasicBlock(MemoryInst->getParent());
 }
 
 
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index a7f9efd562e1..3859a1aec4a7 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -249,7 +249,6 @@ unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
     if (Term->getSuccessor(i) == Succ)
       return i;
   }
-  return 0;
 }
 
 /// SplitEdge -  Split the edge connecting specified block. Pass P must 
@@ -453,9 +452,8 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
 /// of the edges being split is an exit of a loop with other exits).
 ///
 BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
-                                         BasicBlock *const *Preds,
-                                         unsigned NumPreds, const char *Suffix,
-                                         Pass *P) {
+                                         ArrayRef<BasicBlock*> Preds,
+                                         const char *Suffix, Pass *P) {
   // Create new basic block, insert right before the original block.
   BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
                                          BB->getParent(), BB);
@@ -464,7 +462,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
   BranchInst *BI = BranchInst::Create(BB, NewBB);
   
   // Move the edges from Preds to point to NewBB instead of BB.
-  for (unsigned i = 0; i != NumPreds; ++i) {
+  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
     // This is slightly more strict than necessary; the minimum requirement
     // is that there be no more than one indirectbr branching to BB. And
     // all BlockAddress uses would need to be updated.
@@ -477,7 +475,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
   // node becomes an incoming value for BB's phi node.  However, if the Preds
   // list is empty, we need to insert dummy entries into the PHI nodes in BB to
   // account for the newly created predecessor.
-  if (NumPreds == 0) {
+  if (Preds.size() == 0) {
     // Insert dummy values as the incoming value.
     for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
       cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
@@ -486,12 +484,10 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
 
   // Update DominatorTree, LoopInfo, and LCCSA analysis information.
   bool HasLoopExit = false;
-  UpdateAnalysisInformation(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds),
-                            P, HasLoopExit);
+  UpdateAnalysisInformation(BB, NewBB, Preds, P, HasLoopExit);
 
   // Update the PHI nodes in BB with the values coming from NewBB.
-  UpdatePHINodes(BB, NewBB, ArrayRef<BasicBlock*>(Preds, NumPreds), BI,
-                 P, HasLoopExit);
+  UpdatePHINodes(BB, NewBB, Preds, BI, P, HasLoopExit);
   return NewBB;
 }
 
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
deleted file mode 100644
index 23a30cc58507..000000000000
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-//===- BasicInliner.cpp - Basic function level inliner --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a simple function based inliner that does not use
-// call graph information. 
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "basicinliner"
-#include "llvm/Module.h"
-#include "llvm/Function.h"
-#include "llvm/Transforms/Utils/BasicInliner.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include <vector>
-
-using namespace llvm;
-
-static cl::opt<unsigned>     
-BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200),
-   cl::desc("Control the amount of basic inlining to perform (default = 200)"));
-
-namespace llvm {
-
-  /// BasicInlinerImpl - BasicInliner implemantation class. This hides
-  /// container info, used by basic inliner, from public interface.
-  struct BasicInlinerImpl {
-    
-    BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
-    void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
-  public:
-    BasicInlinerImpl(TargetData *T) : TD(T) {}
-
-    /// addFunction - Add function into the list of functions to process.
-    /// All functions must be inserted using this interface before invoking
-    /// inlineFunctions().
-    void addFunction(Function *F) {
-      Functions.push_back(F);
-    }
-
-    /// neverInlineFunction - Sometimes a function is never to be inlined 
-    /// because of one or other reason. 
-    void neverInlineFunction(Function *F) {
-      NeverInline.insert(F);
-    }
-
-    /// inlineFuctions - Walk all call sites in all functions supplied by
-    /// client. Inline as many call sites as possible. Delete completely
-    /// inlined functions.
-    void inlineFunctions();
-    
-  private:
-    TargetData *TD;
-    std::vector<Function *> Functions;
-    SmallPtrSet<const Function *, 16> NeverInline;
-    SmallPtrSet<Function *, 8> DeadFunctions;
-    InlineCostAnalyzer CA;
-  };
-
-/// inlineFuctions - Walk all call sites in all functions supplied by
-/// client. Inline as many call sites as possible. Delete completely
-/// inlined functions.
-void BasicInlinerImpl::inlineFunctions() {
-      
-  // Scan through and identify all call sites ahead of time so that we only
-  // inline call sites in the original functions, not call sites that result
-  // from inlining other functions.
-  std::vector<CallSite> CallSites;
-  
-  for (std::vector<Function *>::iterator FI = Functions.begin(),
-         FE = Functions.end(); FI != FE; ++FI) {
-    Function *F = *FI;
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-      for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
-        CallSite CS(cast<Value>(I));
-        if (CS && CS.getCalledFunction()
-            && !CS.getCalledFunction()->isDeclaration())
-          CallSites.push_back(CS);
-      }
-  }
-  
-  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
-  
-  // Inline call sites.
-  bool Changed = false;
-  do {
-    Changed = false;
-    for (unsigned index = 0; index != CallSites.size() && !CallSites.empty(); 
-         ++index) {
-      CallSite CS = CallSites[index];
-      if (Function *Callee = CS.getCalledFunction()) {
-        
-        // Eliminate calls that are never inlinable.
-        if (Callee->isDeclaration() ||
-            CS.getInstruction()->getParent()->getParent() == Callee) {
-          CallSites.erase(CallSites.begin() + index);
-          --index;
-          continue;
-        }
-        InlineCost IC = CA.getInlineCost(CS, NeverInline);
-        if (IC.isAlways()) {        
-          DEBUG(dbgs() << "  Inlining: cost=always"
-                       <<", call: " << *CS.getInstruction());
-        } else if (IC.isNever()) {
-          DEBUG(dbgs() << "  NOT Inlining: cost=never"
-                       <<", call: " << *CS.getInstruction());
-          continue;
-        } else {
-          int Cost = IC.getValue();
-          
-          if (Cost >= (int) BasicInlineThreshold) {
-            DEBUG(dbgs() << "  NOT Inlining: cost = " << Cost
-                         << ", call: " <<  *CS.getInstruction());
-            continue;
-          } else {
-            DEBUG(dbgs() << "  Inlining: cost = " << Cost
-                         << ", call: " <<  *CS.getInstruction());
-          }
-        }
-        
-        // Inline
-        InlineFunctionInfo IFI(0, TD);
-        if (InlineFunction(CS, IFI)) {
-          if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
-                                      Callee->hasAvailableExternallyLinkage()))
-            DeadFunctions.insert(Callee);
-          Changed = true;
-          CallSites.erase(CallSites.begin() + index);
-          --index;
-        }
-      }
-    }
-  } while (Changed);
-  
-  // Remove completely inlined functions from module.
-  for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(),
-        E = DeadFunctions.end(); I != E; ++I) {
-    Function *D = *I;
-    Module *M = D->getParent();
-    M->getFunctionList().remove(D);
-  }
-}
-
-BasicInliner::BasicInliner(TargetData *TD) {
-  Impl = new BasicInlinerImpl(TD);
-}
-
-BasicInliner::~BasicInliner() {
-  delete Impl;
-}
-
-/// addFunction - Add function into the list of functions to process.
-/// All functions must be inserted using this interface before invoking
-/// inlineFunctions().
-void BasicInliner::addFunction(Function *F) {
-  Impl->addFunction(F);
-}
-
-/// neverInlineFunction - Sometimes a function is never to be inlined because
-/// of one or other reason. 
-void BasicInliner::neverInlineFunction(Function *F) {
-  Impl->neverInlineFunction(F);
-}
-
-/// inlineFuctions - Walk all call sites in all functions supplied by
-/// client. Inline as many call sites as possible. Delete completely
-/// inlined functions.
-void BasicInliner::inlineFunctions() {
-  Impl->inlineFunctions();
-}
-
-}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index c05291088157..f752d7981c5f 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -372,8 +372,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
           // form, which we're in the process of restoring!
           if (!Preds.empty() && HasPredOutsideOfLoop) {
             BasicBlock *NewExitBB =
-              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
-                                     "split", P);
+              SplitBlockPredecessors(Exit, Preds, "split", P);
             if (P->mustPreserveAnalysisID(LCSSAID))
               CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
           }
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 4b5f45b31f9b..a80830328d53 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -15,11 +15,15 @@
 #include "llvm/Type.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Support/IRBuilder.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/ADT/SmallString.h"
 
 using namespace llvm;
 
@@ -206,19 +210,16 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
 /// 'floor').  This function is known to take a single of type matching 'Op' and
 /// returns one value with the same type.  If 'Op' is a long double, 'l' is
 /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
-Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
-                                  IRBuilder<> &B, const AttrListPtr &Attrs) {
-  char NameBuffer[20];
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
+                                  const AttrListPtr &Attrs) {
+  SmallString<20> NameBuffer;
   if (!Op->getType()->isDoubleTy()) {
     // If we need to add a suffix, copy into NameBuffer.
-    unsigned NameLen = strlen(Name);
-    assert(NameLen < sizeof(NameBuffer)-2);
-    memcpy(NameBuffer, Name, NameLen);
+    NameBuffer += Name;
     if (Op->getType()->isFloatTy())
-      NameBuffer[NameLen] = 'f';  // floorf
+      NameBuffer += 'f'; // floorf
     else
-      NameBuffer[NameLen] = 'l';  // floorl
-    NameBuffer[NameLen+1] = 0;
+      NameBuffer += 'l'; // floorl
     Name = NameBuffer;
   }
 
@@ -299,20 +300,21 @@ void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
 /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
 /// pointer and File is a pointer to FILE.
 void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
-                     const TargetData *TD) {
+                     const TargetData *TD, const TargetLibraryInfo *TLI) {
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
   AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
   AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
   AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  StringRef FPutsName = TLI->getName(LibFunc::fputs);
   Constant *F;
   if (File->getType()->isPointerTy())
-    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+    F = M->getOrInsertFunction(FPutsName, AttrListPtr::get(AWI, 3),
                                B.getInt32Ty(),
                                B.getInt8PtrTy(),
                                File->getType(), NULL);
   else
-    F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+    F = M->getOrInsertFunction(FPutsName, B.getInt32Ty(),
                                B.getInt8PtrTy(),
                                File->getType(), NULL);
   CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
@@ -324,23 +326,25 @@ void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
 /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
 /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
 void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
-                      IRBuilder<> &B, const TargetData *TD) {
+                      IRBuilder<> &B, const TargetData *TD,
+                      const TargetLibraryInfo *TLI) {
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
   AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
   AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
   AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
+  StringRef FWriteName = TLI->getName(LibFunc::fwrite);
   Constant *F;
   if (File->getType()->isPointerTy())
-    F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+    F = M->getOrInsertFunction(FWriteName, AttrListPtr::get(AWI, 3),
                                TD->getIntPtrType(Context),
                                B.getInt8PtrTy(),
                                TD->getIntPtrType(Context),
                                TD->getIntPtrType(Context),
                                File->getType(), NULL);
   else
-    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+    F = M->getOrInsertFunction(FWriteName, TD->getIntPtrType(Context),
                                B.getInt8PtrTy(),
                                TD->getIntPtrType(Context),
                                TD->getIntPtrType(Context),
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 7adc5f1ac2eb..7f5cb5e096ae 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -1,11 +1,11 @@
 add_llvm_library(LLVMTransformUtils
   AddrModeMatcher.cpp
   BasicBlockUtils.cpp
-  BasicInliner.cpp
   BreakCriticalEdges.cpp
   BuildLibCalls.cpp
   CloneFunction.cpp
   CloneModule.cpp
+  CmpInstAnalysis.cpp
   CodeExtractor.cpp
   DemoteRegToStack.cpp
   InlineFunction.cpp
@@ -14,10 +14,12 @@ add_llvm_library(LLVMTransformUtils
   Local.cpp
   LoopSimplify.cpp
   LoopUnroll.cpp
+  LoopUnrollRuntime.cpp
   LowerExpectIntrinsic.cpp
   LowerInvoke.cpp
   LowerSwitch.cpp
   Mem2Reg.cpp
+  ModuleUtils.cpp
   PromoteMemoryToRegister.cpp
   SSAUpdater.cpp
   SimplifyCFG.cpp
@@ -27,11 +29,3 @@ add_llvm_library(LLVMTransformUtils
   Utils.cpp
   ValueMapper.cpp
   )
-
-add_llvm_library_dependencies(LLVMTransformUtils
-  LLVMAnalysis
-  LLVMCore
-  LLVMSupport
-  LLVMTarget
-  LLVMipa
-  )
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index cf21f1ed9703..20052a412277 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -23,8 +23,11 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include <map>
@@ -60,7 +63,6 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
   
   if (CodeInfo) {
     CodeInfo->ContainsCalls          |= hasCalls;
-    CodeInfo->ContainsUnwinds        |= isa<UnwindInst>(BB->getTerminator());
     CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
     CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
                                         BB != &BB->getParent()->getEntryBlock();
@@ -75,7 +77,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                              ValueToValueMapTy &VMap,
                              bool ModuleLevelChanges,
                              SmallVectorImpl<ReturnInst*> &Returns,
-                             const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
+                             const char *NameSuffix, ClonedCodeInfo *CodeInfo,
+                             ValueMapTypeRemapper *TypeMapper) {
   assert(NameSuffix && "NameSuffix cannot be null!");
 
 #ifndef NDEBUG
@@ -113,8 +116,23 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 
     // Create a new basic block and copy instructions into it!
     BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
-    VMap[&BB] = CBB;                       // Add basic block mapping.
 
+    // Add basic block mapping.
+    VMap[&BB] = CBB;
+
+    // It is only legal to clone a function if a block address within that
+    // function is never referenced outside of the function.  Given that, we
+    // want to map block addresses from the old function to block addresses in
+    // the clone. (This is different from the generic ValueMapper
+    // implementation, which generates an invalid blockaddress when
+    // cloning a function.)
+    if (BB.hasAddressTaken()) {
+      Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+                                              const_cast<BasicBlock*>(&BB));
+      VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);                                         
+    }
+
+    // Note return instructions for the caller.
     if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
       Returns.push_back(RI);
   }
@@ -126,7 +144,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     // Loop over all instructions, fixing each one as we find it...
     for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
       RemapInstruction(II, VMap,
-                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+                       TypeMapper);
 }
 
 /// CloneFunction - Return a copy of the specified function, but without
@@ -181,7 +200,6 @@ namespace {
     const Function *OldFunc;
     ValueToValueMapTy &VMap;
     bool ModuleLevelChanges;
-    SmallVectorImpl<ReturnInst*> &Returns;
     const char *NameSuffix;
     ClonedCodeInfo *CodeInfo;
     const TargetData *TD;
@@ -189,24 +207,18 @@ namespace {
     PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
                           ValueToValueMapTy &valueMap,
                           bool moduleLevelChanges,
-                          SmallVectorImpl<ReturnInst*> &returns,
                           const char *nameSuffix, 
                           ClonedCodeInfo *codeInfo,
                           const TargetData *td)
     : NewFunc(newFunc), OldFunc(oldFunc),
       VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
-      Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+      NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
     }
 
     /// CloneBlock - The specified block is found to be reachable, clone it and
     /// anything that it can reach.
     void CloneBlock(const BasicBlock *BB,
                     std::vector<const BasicBlock*> &ToClone);
-    
-  public:
-    /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
-    /// mapping its operands through VMap if they are available.
-    Constant *ConstantFoldMappedInstruction(const Instruction *I);
   };
 }
 
@@ -214,7 +226,7 @@ namespace {
 /// anything that it can reach.
 void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
                                        std::vector<const BasicBlock*> &ToClone){
-  TrackingVH<Value> &BBEntry = VMap[BB];
+  WeakVH &BBEntry = VMap[BB];
 
   // Have we already cloned this block?
   if (BBEntry) return;
@@ -224,25 +236,55 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   BBEntry = NewBB = BasicBlock::Create(BB->getContext());
   if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
 
+  // It is only legal to clone a function if a block address within that
+  // function is never referenced outside of the function.  Given that, we
+  // want to map block addresses from the old function to block addresses in
+  // the clone. (This is different from the generic ValueMapper
+  // implementation, which generates an invalid blockaddress when
+  // cloning a function.)
+  //
+  // Note that we don't need to fix the mapping for unreachable blocks;
+  // the default mapping there is safe.
+  if (BB->hasAddressTaken()) {
+    Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
+                                            const_cast<BasicBlock*>(BB));
+    VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB);
+  }
+    
+
   bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
   
   // Loop over all instructions, and copy them over, DCE'ing as we go.  This
   // loop doesn't include the terminator.
   for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
        II != IE; ++II) {
-    // If this instruction constant folds, don't bother cloning the instruction,
-    // instead, just add the constant to the value map.
-    if (Constant *C = ConstantFoldMappedInstruction(II)) {
-      VMap[II] = C;
-      continue;
+    Instruction *NewInst = II->clone();
+
+    // Eagerly remap operands to the newly cloned instruction, except for PHI
+    // nodes for which we defer processing until we update the CFG.
+    if (!isa<PHINode>(NewInst)) {
+      RemapInstruction(NewInst, VMap,
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+
+      // If we can simplify this instruction to some other value, simply add
+      // a mapping to that value rather than inserting a new instruction into
+      // the basic block.
+      if (Value *V = SimplifyInstruction(NewInst, TD)) {
+        // On the off-chance that this simplifies to an instruction in the old
+        // function, map it back into the new function.
+        if (Value *MappedV = VMap.lookup(V))
+          V = MappedV;
+
+        VMap[II] = V;
+        delete NewInst;
+        continue;
+      }
     }
 
-    Instruction *NewInst = II->clone();
     if (II->hasName())
       NewInst->setName(II->getName()+NameSuffix);
-    NewBB->getInstList().push_back(NewInst);
     VMap[II] = NewInst;                // Add instruction map to value.
-    
+    NewBB->getInstList().push_back(NewInst);
     hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
       if (isa<ConstantInt>(AI->getArraySize()))
@@ -281,7 +323,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
       Cond = dyn_cast_or_null<ConstantInt>(V);
     }
     if (Cond) {     // Constant fold to uncond branch!
-      BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
+      SwitchInst::ConstCaseIt Case = SI->findCaseValue(Cond);
+      BasicBlock *Dest = const_cast<BasicBlock*>(Case.getCaseSuccessor());
       VMap[OldTI] = BranchInst::Create(Dest, NewBB);
       ToClone.push_back(Dest);
       TerminatorDone = true;
@@ -303,38 +346,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
   
   if (CodeInfo) {
     CodeInfo->ContainsCalls          |= hasCalls;
-    CodeInfo->ContainsUnwinds        |= isa<UnwindInst>(OldTI);
     CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
     CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
       BB != &BB->getParent()->front();
   }
-  
-  if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator()))
-    Returns.push_back(RI);
-}
-
-/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
-/// mapping its operands through VMap if they are available.
-Constant *PruningFunctionCloner::
-ConstantFoldMappedInstruction(const Instruction *I) {
-  SmallVector<Constant*, 8> Ops;
-  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-    if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
-                                                           VMap,
-                  ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
-      Ops.push_back(Op);
-    else
-      return 0;  // All operands not constant!
-
-  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
-    return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
-                                           TD);
-
-  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
-    if (!LI->isVolatile())
-      return ConstantFoldLoadFromConstPtr(Ops[0], TD);
-
-  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
 }
 
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -361,7 +376,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
 #endif
 
   PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
-                            Returns, NameSuffix, CodeInfo, TD);
+                            NameSuffix, CodeInfo, TD);
 
   // Clone the entry block, and anything recursively reachable from it.
   std::vector<const BasicBlock*> CloneWorklist;
@@ -386,29 +401,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
 
     // Add the new block to the new function.
     NewFunc->getBasicBlockList().push_back(NewBB);
-    
-    // Loop over all of the instructions in the block, fixing up operand
-    // references as we go.  This uses VMap to do all the hard work.
-    //
-    BasicBlock::iterator I = NewBB->begin();
-
-    DebugLoc TheCallDL;
-    if (TheCall) 
-      TheCallDL = TheCall->getDebugLoc();
-    
+
     // Handle PHI nodes specially, as we have to remove references to dead
     // blocks.
-    if (PHINode *PN = dyn_cast<PHINode>(I)) {
-      // Skip over all PHI nodes, remembering them for later.
-      BasicBlock::const_iterator OldI = BI->begin();
-      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
-        PHIToResolve.push_back(cast<PHINode>(OldI));
-    }
-    
-    // Otherwise, remap the rest of the instructions normally.
-    for (; I != NewBB->end(); ++I)
-      RemapInstruction(I, VMap,
-                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+    for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I)
+      if (const PHINode *PN = dyn_cast<PHINode>(I))
+        PHIToResolve.push_back(PN);
+      else
+        break;
+
+    // Finally, remap the terminator instructions, as those can't be remapped
+    // until all BBs are mapped.
+    RemapInstruction(NewBB->getTerminator(), VMap,
+                     ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
   }
   
   // Defer PHI resolution until rest of function is resolved, PHI resolution
@@ -490,31 +495,55 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
         ++OldI;
       }
     }
-    // NOTE: We cannot eliminate single entry phi nodes here, because of
-    // VMap.  Single entry phi nodes can have multiple VMap entries
-    // pointing at them.  Thus, deleting one would require scanning the VMap
-    // to update any entries in it that would require that.  This would be
-    // really slow.
   }
-  
+
+  // Make a second pass over the PHINodes now that all of them have been
+  // remapped into the new function, simplifying the PHINode and performing any
+  // recursive simplifications exposed. This will transparently update the
+  // WeakVH in the VMap. Notably, we rely on that so that if we coalesce
+  // two PHINodes, the iteration over the old PHIs remains valid, and the
+  // mapping will just map us to the new node (which may not even be a PHI
+  // node).
+  for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
+    if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
+      recursivelySimplifyInstruction(PN, TD);
+
   // Now that the inlined function body has been fully constructed, go through
   // and zap unconditional fall-through branches.  This happen all the time when
   // specializing code: code specialization turns conditional branches into
   // uncond branches, and this code folds them.
-  Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+  Function::iterator Begin = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+  Function::iterator I = Begin;
   while (I != NewFunc->end()) {
+    // Check if this block has become dead during inlining or other
+    // simplifications. Note that the first block will appear dead, as it has
+    // not yet been wired up properly.
+    if (I != Begin && (pred_begin(I) == pred_end(I) ||
+                       I->getSinglePredecessor() == I)) {
+      BasicBlock *DeadBB = I++;
+      DeleteDeadBlock(DeadBB);
+      continue;
+    }
+
+    // We need to simplify conditional branches and switches with a constant
+    // operand. We try to prune these out when cloning, but if the
+    // simplification required looking through PHI nodes, those are only
+    // available after forming the full basic block. That may leave some here,
+    // and we still want to prune the dead code as early as possible.
+    ConstantFoldTerminator(I);
+
     BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
     if (!BI || BI->isConditional()) { ++I; continue; }
     
-    // Note that we can't eliminate uncond branches if the destination has
-    // single-entry PHI nodes.  Eliminating the single-entry phi nodes would
-    // require scanning the VMap to update any entries that point to the phi
-    // node.
     BasicBlock *Dest = BI->getSuccessor(0);
-    if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+    if (!Dest->getSinglePredecessor()) {
       ++I; continue;
     }
-    
+
+    // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
+    // above should have zapped all of them..
+    assert(!isa<PHINode>(Dest->begin()));
+
     // We know all single-entry PHI nodes in the inlined function have been
     // removed, so we just need to splice the blocks.
     BI->eraseFromParent();
@@ -530,4 +559,13 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     
     // Do not increment I, iteratively merge all things this block branches to.
   }
+
+  // Make a final pass over the basic blocks from theh old function to gather
+  // any return instructions which survived folding. We have to do this here
+  // because we can iteratively remove and merge returns above.
+  for (Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]),
+                          E = NewFunc->end();
+       I != E; ++I)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
+      Returns.push_back(RI);
 }
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
new file mode 100644
index 000000000000..9b099150a7af
--- /dev/null
+++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -0,0 +1,96 @@
+//===- CmpInstAnalysis.cpp - Utils to help fold compares ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file holds routines to help analyse compare instructions
+// and fold them into constants or other compare instructions
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+
+using namespace llvm;
+
+/// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
+/// are carefully arranged to allow folding of expressions such as:
+///
+///      (A < B) | (A > B) --> (A != B)
+///
+/// Note that this is only valid if the first and second predicates have the
+/// same sign. Is illegal to do: (A u< B) | (A s> B)
+///
+/// Three bits are used to represent the condition, as follows:
+///   0  A > B
+///   1  A == B
+///   2  A < B
+///
+/// <=>  Value  Definition
+/// 000     0   Always false
+/// 001     1   A >  B
+/// 010     2   A == B
+/// 011     3   A >= B
+/// 100     4   A <  B
+/// 101     5   A != B
+/// 110     6   A <= B
+/// 111     7   Always true
+///
+unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
+  ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
+                                        : ICI->getPredicate();
+  switch (Pred) {
+      // False -> 0
+    case ICmpInst::ICMP_UGT: return 1;  // 001
+    case ICmpInst::ICMP_SGT: return 1;  // 001
+    case ICmpInst::ICMP_EQ:  return 2;  // 010
+    case ICmpInst::ICMP_UGE: return 3;  // 011
+    case ICmpInst::ICMP_SGE: return 3;  // 011
+    case ICmpInst::ICMP_ULT: return 4;  // 100
+    case ICmpInst::ICMP_SLT: return 4;  // 100
+    case ICmpInst::ICMP_NE:  return 5;  // 101
+    case ICmpInst::ICMP_ULE: return 6;  // 110
+    case ICmpInst::ICMP_SLE: return 6;  // 110
+      // True -> 7
+    default:
+      llvm_unreachable("Invalid ICmp predicate!");
+  }
+}
+
+/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or the
+/// predicate for a new ICmp instruction. The sign is passed in to determine
+/// which kind of predicate to use in the new icmp instruction.
+/// Non-NULL return value will be a true or false constant.
+/// NULL return means a new ICmp is needed.  The predicate for which is
+/// output in NewICmpPred.
+Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+                          CmpInst::Predicate &NewICmpPred) {
+  switch (Code) {
+    default: llvm_unreachable("Illegal ICmp code!");
+    case 0: // False.
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+    case 1: NewICmpPred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+    case 2: NewICmpPred = ICmpInst::ICMP_EQ; break;
+    case 3: NewICmpPred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+    case 4: NewICmpPred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+    case 5: NewICmpPred = ICmpInst::ICMP_NE; break;
+    case 6: NewICmpPred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+    case 7: // True.
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+  }
+  return NULL;
+}
+
+/// PredicatesFoldable - Return true if both predicates match sign or if at
+/// least one of them is an equality comparison (which is signless).
+bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 5f47ebb78202..e8c0b80c2126 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -615,9 +615,10 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   default:
     // Otherwise, make the default destination of the switch instruction be one
     // of the other successors.
-    TheSwitch->setOperand(0, call);
-    TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks));
-    TheSwitch->removeCase(NumExitBlocks);  // Remove redundant case
+    TheSwitch->setCondition(call);
+    TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks));
+    // Remove redundant case
+    TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1));
     break;
   }
 }
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 8cc26492c292..99b58301634a 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -6,21 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file provide the function DemoteRegToStack().  This function takes a
-// virtual register computed by an Instruction and replaces it with a slot in
-// the stack frame, allocated via alloca. It returns the pointer to the
-// AllocaInst inserted.  After this function is called on an instruction, we are
-// guaranteed that the only user of the instruction is a store that is
-// immediately after it.
-//
-//===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Type.h"
-#include <map>
+#include "llvm/ADT/DenseMap.h"
 using namespace llvm;
 
 /// DemoteRegToStack - This function takes a virtual register computed by an
@@ -28,8 +19,7 @@ using namespace llvm;
 /// alloca.  This allows the CFG to be changed around without fear of
 /// invalidating the SSA information for the value.  It returns the pointer to
 /// the alloca inserted to create a stack slot for I.
-///
-AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
                                    Instruction *AllocaPoint) {
   if (I.use_empty()) {
     I.eraseFromParent();
@@ -47,21 +37,20 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
                           F->getEntryBlock().begin());
   }
 
-  // Change all of the users of the instruction to read from the stack slot
-  // instead.
+  // Change all of the users of the instruction to read from the stack slot.
   while (!I.use_empty()) {
     Instruction *U = cast<Instruction>(I.use_back());
     if (PHINode *PN = dyn_cast<PHINode>(U)) {
       // If this is a PHI node, we can't insert a load of the value before the
-      // use.  Instead, insert the load in the predecessor block corresponding
+      // use.  Instead insert the load in the predecessor block corresponding
       // to the incoming value.
       //
       // Note that if there are multiple edges from a basic block to this PHI
-      // node that we cannot multiple loads.  The problem is that the resultant
-      // PHI node will have multiple values (from each load) coming in from the
-      // same block, which is illegal SSA form.  For this reason, we keep track
-      // and reuse loads we insert.
-      std::map<BasicBlock*, Value*> Loads;
+      // node that we cannot have multiple loads. The problem is that the
+      // resulting PHI node will have multiple values (from each load) coming in
+      // from the same block, which is illegal SSA form. For this reason, we
+      // keep track of and reuse loads we insert.
+      DenseMap<BasicBlock*, Value*> Loads;
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
         if (PN->getIncomingValue(i) == &I) {
           Value *&V = Loads[PN->getIncomingBlock(i)];
@@ -81,9 +70,9 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
   }
 
 
-  // Insert stores of the computed value into the stack slot.  We have to be
-  // careful is I is an invoke instruction though, because we can't insert the
-  // store AFTER the terminator instruction.
+  // Insert stores of the computed value into the stack slot. We have to be
+  // careful if I is an invoke instruction, because we can't insert the store
+  // AFTER the terminator instruction.
   BasicBlock::iterator InsertPt;
   if (!isa<TerminatorInst>(I)) {
     InsertPt = &I;
@@ -97,18 +86,17 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
     InsertPt = II.getNormalDest()->begin();
   }
 
-  for (; isa<PHINode>(InsertPt); ++InsertPt)
-  /* empty */;   // Don't insert before any PHI nodes.
-  new StoreInst(&I, Slot, InsertPt);
+  for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+    /* empty */;   // Don't insert before PHI nodes or landingpad instrs.
 
+  new StoreInst(&I, Slot, InsertPt);
   return Slot;
 }
 
-
-/// DemotePHIToStack - This function takes a virtual register computed by a phi
-/// node and replaces it with a slot in the stack frame, allocated via alloca.
-/// The phi node is deleted and it returns the pointer to the alloca inserted.
-AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+/// DemotePHIToStack - This function takes a virtual register computed by a PHI
+/// node and replaces it with a slot in the stack frame allocated via alloca.
+/// The PHI node is deleted. It returns the pointer to the alloca inserted.
+AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
   if (P->use_empty()) {
     P->eraseFromParent();
     return 0;
@@ -125,7 +113,7 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
                           F->getEntryBlock().begin());
   }
 
-  // Iterate over each operand, insert store in each predecessor.
+  // Iterate over each operand inserting a store in each predecessor.
   for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
     if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
       assert(II->getParent() != P->getIncomingBlock(i) &&
@@ -135,12 +123,11 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
                   P->getIncomingBlock(i)->getTerminator());
   }
 
-  // Insert load in place of the phi and replace all uses.
+  // Insert a load in place of the PHI and replace all uses.
   Value *V = new LoadInst(Slot, P->getName()+".reload", P);
   P->replaceAllUsesWith(V);
 
-  // Delete phi.
+  // Delete PHI.
   P->eraseFromParent();
-
   return Slot;
 }
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 5464dbc4a830..d2b167acb0e7 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -10,13 +10,6 @@
 // This file implements inlining of a function into a call site, resolving
 // parameters and the return value as appropriate.
 //
-// The code in this file for handling inlines through invoke
-// instructions preserves semantics only under some assumptions about
-// the behavior of unwinders which correspond to gcc-style libUnwind
-// exception personality functions.  Eventually the IR will be
-// improved to make this unnecessary, but until then, this code is
-// marked [LIBUNWIND].
-//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
@@ -38,271 +31,52 @@
 #include "llvm/Support/IRBuilder.h"
 using namespace llvm;
 
-bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) {
-  return InlineFunction(CallSite(CI), IFI);
-}
-bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
-  return InlineFunction(CallSite(II), IFI);
-}
-
-// FIXME: New EH - Remove the functions marked [LIBUNWIND] when new EH is
-// turned on.
-
-/// [LIBUNWIND] Look for an llvm.eh.exception call in the given block.
-static EHExceptionInst *findExceptionInBlock(BasicBlock *bb) {
-  for (BasicBlock::iterator i = bb->begin(), e = bb->end(); i != e; i++) {
-    EHExceptionInst *exn = dyn_cast<EHExceptionInst>(i);
-    if (exn) return exn;
-  }
-
-  return 0;
-}
-
-/// [LIBUNWIND] Look for the 'best' llvm.eh.selector instruction for
-/// the given llvm.eh.exception call.
-static EHSelectorInst *findSelectorForException(EHExceptionInst *exn) {
-  BasicBlock *exnBlock = exn->getParent();
-
-  EHSelectorInst *outOfBlockSelector = 0;
-  for (Instruction::use_iterator
-         ui = exn->use_begin(), ue = exn->use_end(); ui != ue; ++ui) {
-    EHSelectorInst *sel = dyn_cast<EHSelectorInst>(*ui);
-    if (!sel) continue;
-
-    // Immediately accept an eh.selector in the same block as the
-    // excepton call.
-    if (sel->getParent() == exnBlock) return sel;
-
-    // Otherwise, use the first selector we see.
-    if (!outOfBlockSelector) outOfBlockSelector = sel;
-  }
-
-  return outOfBlockSelector;
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
+                          bool InsertLifetime) {
+  return InlineFunction(CallSite(CI), IFI, InsertLifetime);
 }
-
-/// [LIBUNWIND] Find the (possibly absent) call to @llvm.eh.selector
-/// in the given landing pad.  In principle, llvm.eh.exception is
-/// required to be in the landing pad; in practice, SplitCriticalEdge
-/// can break that invariant, and then inlining can break it further.
-/// There's a real need for a reliable solution here, but until that
-/// happens, we have some fragile workarounds here.
-static EHSelectorInst *findSelectorForLandingPad(BasicBlock *lpad) {
-  // Look for an exception call in the actual landing pad.
-  EHExceptionInst *exn = findExceptionInBlock(lpad);
-  if (exn) return findSelectorForException(exn);
-
-  // Okay, if that failed, look for one in an obvious successor.  If
-  // we find one, we'll fix the IR by moving things back to the
-  // landing pad.
-
-  bool dominates = true; // does the lpad dominate the exn call
-  BasicBlock *nonDominated = 0; // if not, the first non-dominated block
-  BasicBlock *lastDominated = 0; // and the block which branched to it
-
-  BasicBlock *exnBlock = lpad;
-
-  // We need to protect against lpads that lead into infinite loops.
-  SmallPtrSet<BasicBlock*,4> visited;
-  visited.insert(exnBlock);
-
-  do {
-    // We're not going to apply this hack to anything more complicated
-    // than a series of unconditional branches, so if the block
-    // doesn't terminate in an unconditional branch, just fail.  More
-    // complicated cases can arise when, say, sinking a call into a
-    // split unwind edge and then inlining it; but that can do almost
-    // *anything* to the CFG, including leaving the selector
-    // completely unreachable.  The only way to fix that properly is
-    // to (1) prohibit transforms which move the exception or selector
-    // values away from the landing pad, e.g. by producing them with
-    // instructions that are pinned to an edge like a phi, or
-    // producing them with not-really-instructions, and (2) making
-    // transforms which split edges deal with that.
-    BranchInst *branch = dyn_cast<BranchInst>(&exnBlock->back());
-    if (!branch || branch->isConditional()) return 0;
-
-    BasicBlock *successor = branch->getSuccessor(0);
-
-    // Fail if we found an infinite loop.
-    if (!visited.insert(successor)) return 0;
-
-    // If the successor isn't dominated by exnBlock:
-    if (!successor->getSinglePredecessor()) {
-      // We don't want to have to deal with threading the exception
-      // through multiple levels of phi, so give up if we've already
-      // followed a non-dominating edge.
-      if (!dominates) return 0;
-
-      // Otherwise, remember this as a non-dominating edge.
-      dominates = false;
-      nonDominated = successor;
-      lastDominated = exnBlock;
-    }
-
-    exnBlock = successor;
-
-    // Can we stop here?
-    exn = findExceptionInBlock(exnBlock);
-  } while (!exn);
-
-  // Look for a selector call for the exception we found.
-  EHSelectorInst *selector = findSelectorForException(exn);
-  if (!selector) return 0;
-
-  // The easy case is when the landing pad still dominates the
-  // exception call, in which case we can just move both calls back to
-  // the landing pad.
-  if (dominates) {
-    selector->moveBefore(lpad->getFirstNonPHI());
-    exn->moveBefore(selector);
-    return selector;
-  }
-
-  // Otherwise, we have to split at the first non-dominating block.
-  // The CFG looks basically like this:
-  //    lpad:
-  //      phis_0
-  //      insnsAndBranches_1
-  //      br label %nonDominated
-  //    nonDominated:
-  //      phis_2
-  //      insns_3
-  //      %exn = call i8* @llvm.eh.exception()
-  //      insnsAndBranches_4
-  //      %selector = call @llvm.eh.selector(i8* %exn, ...
-  // We need to turn this into:
-  //    lpad:
-  //      phis_0
-  //      %exn0 = call i8* @llvm.eh.exception()
-  //      %selector0 = call @llvm.eh.selector(i8* %exn0, ...
-  //      insnsAndBranches_1
-  //      br label %split // from lastDominated
-  //    nonDominated:
-  //      phis_2 (without edge from lastDominated)
-  //      %exn1 = call i8* @llvm.eh.exception()
-  //      %selector1 = call i8* @llvm.eh.selector(i8* %exn1, ...
-  //      br label %split
-  //    split:
-  //      phis_2 (edge from lastDominated, edge from split)
-  //      %exn = phi ...
-  //      %selector = phi ...
-  //      insns_3
-  //      insnsAndBranches_4
-
-  assert(nonDominated);
-  assert(lastDominated);
-
-  // First, make clones of the intrinsics to go in lpad.
-  EHExceptionInst *lpadExn = cast<EHExceptionInst>(exn->clone());
-  EHSelectorInst *lpadSelector = cast<EHSelectorInst>(selector->clone());
-  lpadSelector->setArgOperand(0, lpadExn);
-  lpadSelector->insertBefore(lpad->getFirstNonPHI());
-  lpadExn->insertBefore(lpadSelector);
-
-  // Split the non-dominated block.
-  BasicBlock *split =
-    nonDominated->splitBasicBlock(nonDominated->getFirstNonPHI(),
-                                  nonDominated->getName() + ".lpad-fix");
-
-  // Redirect the last dominated branch there.
-  cast<BranchInst>(lastDominated->back()).setSuccessor(0, split);
-
-  // Move the existing intrinsics to the end of the old block.
-  selector->moveBefore(&nonDominated->back());
-  exn->moveBefore(selector);
-
-  Instruction *splitIP = &split->front();
-
-  // For all the phis in nonDominated, make a new phi in split to join
-  // that phi with the edge from lastDominated.
-  for (BasicBlock::iterator
-         i = nonDominated->begin(), e = nonDominated->end(); i != e; ++i) {
-    PHINode *phi = dyn_cast<PHINode>(i);
-    if (!phi) break;
-
-    PHINode *splitPhi = PHINode::Create(phi->getType(), 2, phi->getName(),
-                                        splitIP);
-    phi->replaceAllUsesWith(splitPhi);
-    splitPhi->addIncoming(phi, nonDominated);
-    splitPhi->addIncoming(phi->removeIncomingValue(lastDominated),
-                          lastDominated);
-  }
-
-  // Make new phis for the exception and selector.
-  PHINode *exnPhi = PHINode::Create(exn->getType(), 2, "", splitIP);
-  exn->replaceAllUsesWith(exnPhi);
-  selector->setArgOperand(0, exn); // except for this use
-  exnPhi->addIncoming(exn, nonDominated);
-  exnPhi->addIncoming(lpadExn, lastDominated);
-
-  PHINode *selectorPhi = PHINode::Create(selector->getType(), 2, "", splitIP);
-  selector->replaceAllUsesWith(selectorPhi);
-  selectorPhi->addIncoming(selector, nonDominated);
-  selectorPhi->addIncoming(lpadSelector, lastDominated);
-
-  return lpadSelector;
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
+                          bool InsertLifetime) {
+  return InlineFunction(CallSite(II), IFI, InsertLifetime);
 }
 
 namespace {
   /// A class for recording information about inlining through an invoke.
   class InvokeInliningInfo {
-    BasicBlock *OuterUnwindDest;
-    EHSelectorInst *OuterSelector;
-    BasicBlock *InnerUnwindDest;
-    PHINode *InnerExceptionPHI;
-    PHINode *InnerSelectorPHI;
-    SmallVector<Value*, 8> UnwindDestPHIValues;
-
-    // FIXME: New EH - These will replace the analogous ones above.
     BasicBlock *OuterResumeDest; //< Destination of the invoke's unwind.
     BasicBlock *InnerResumeDest; //< Destination for the callee's resume.
     LandingPadInst *CallerLPad;  //< LandingPadInst associated with the invoke.
     PHINode *InnerEHValuesPHI;   //< PHI for EH values from landingpad insts.
+    SmallVector<Value*, 8> UnwindDestPHIValues;
 
   public:
     InvokeInliningInfo(InvokeInst *II)
-      : OuterUnwindDest(II->getUnwindDest()), OuterSelector(0),
-        InnerUnwindDest(0), InnerExceptionPHI(0), InnerSelectorPHI(0),
-        OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
+      : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
         CallerLPad(0), InnerEHValuesPHI(0) {
       // If there are PHI nodes in the unwind destination block, we need to keep
       // track of which values came into them from the invoke before removing
       // the edge from this block.
       llvm::BasicBlock *InvokeBB = II->getParent();
-      BasicBlock::iterator I = OuterUnwindDest->begin();
+      BasicBlock::iterator I = OuterResumeDest->begin();
       for (; isa<PHINode>(I); ++I) {
         // Save the value to use for this edge.
         PHINode *PHI = cast<PHINode>(I);
         UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
       }
 
-      // FIXME: With the new EH, this if/dyn_cast should be a 'cast'.
-      if (LandingPadInst *LPI = dyn_cast<LandingPadInst>(I)) {
-        CallerLPad = LPI;
-      }
+      CallerLPad = cast<LandingPadInst>(I);
     }
 
-    /// The outer unwind destination is the target of unwind edges
-    /// introduced for calls within the inlined function.
-    BasicBlock *getOuterUnwindDest() const {
-      return OuterUnwindDest;
+    /// getOuterResumeDest - The outer unwind destination is the target of
+    /// unwind edges introduced for calls within the inlined function.
+    BasicBlock *getOuterResumeDest() const {
+      return OuterResumeDest;
     }
 
-    EHSelectorInst *getOuterSelector() {
-      if (!OuterSelector)
-        OuterSelector = findSelectorForLandingPad(OuterUnwindDest);
-      return OuterSelector;
-    }
-
-    BasicBlock *getInnerUnwindDest();
-
-    // FIXME: New EH - Rename when new EH is turned on.
-    BasicBlock *getInnerUnwindDestNewEH();
+    BasicBlock *getInnerResumeDest();
 
     LandingPadInst *getLandingPadInst() const { return CallerLPad; }
 
-    bool forwardEHResume(CallInst *call, BasicBlock *src);
-
     /// forwardResume - Forward the 'resume' instruction to the caller's landing
     /// pad block. When the landing pad block has only one predecessor, this is
     /// a simple branch. When there is more than one predecessor, we need to
@@ -314,7 +88,7 @@ namespace {
     /// destination block for the given basic block, using the values for the
     /// original invoke's source block.
     void addIncomingPHIValuesFor(BasicBlock *BB) const {
-      addIncomingPHIValuesForInto(BB, OuterUnwindDest);
+      addIncomingPHIValuesForInto(BB, OuterResumeDest);
     }
 
     void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {
@@ -327,113 +101,8 @@ namespace {
   };
 }
 
-/// [LIBUNWIND] Get or create a target for the branch out of rewritten calls to
-/// llvm.eh.resume.
-BasicBlock *InvokeInliningInfo::getInnerUnwindDest() {
-  if (InnerUnwindDest) return InnerUnwindDest;
-
-  // Find and hoist the llvm.eh.exception and llvm.eh.selector calls
-  // in the outer landing pad to immediately following the phis.
-  EHSelectorInst *selector = getOuterSelector();
-  if (!selector) return 0;
-
-  // The call to llvm.eh.exception *must* be in the landing pad.
-  Instruction *exn = cast<Instruction>(selector->getArgOperand(0));
-  assert(exn->getParent() == OuterUnwindDest);
-
-  // TODO: recognize when we've already done this, so that we don't
-  // get a linear number of these when inlining calls into lots of
-  // invokes with the same landing pad.
-
-  // Do the hoisting.
-  Instruction *splitPoint = exn->getParent()->getFirstNonPHI();
-  assert(splitPoint != selector && "selector-on-exception dominance broken!");
-  if (splitPoint == exn) {
-    selector->removeFromParent();
-    selector->insertAfter(exn);
-    splitPoint = selector->getNextNode();
-  } else {
-    exn->moveBefore(splitPoint);
-    selector->moveBefore(splitPoint);
-  }
-
-  // Split the landing pad.
-  InnerUnwindDest = OuterUnwindDest->splitBasicBlock(splitPoint,
-                                        OuterUnwindDest->getName() + ".body");
-
-  // The number of incoming edges we expect to the inner landing pad.
-  const unsigned phiCapacity = 2;
-
-  // Create corresponding new phis for all the phis in the outer landing pad.
-  BasicBlock::iterator insertPoint = InnerUnwindDest->begin();
-  BasicBlock::iterator I = OuterUnwindDest->begin();
-  for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
-    PHINode *outerPhi = cast<PHINode>(I);
-    PHINode *innerPhi = PHINode::Create(outerPhi->getType(), phiCapacity,
-                                        outerPhi->getName() + ".lpad-body",
-                                        insertPoint);
-    outerPhi->replaceAllUsesWith(innerPhi);
-    innerPhi->addIncoming(outerPhi, OuterUnwindDest);
-  }
-
-  // Create a phi for the exception value...
-  InnerExceptionPHI = PHINode::Create(exn->getType(), phiCapacity,
-                                      "exn.lpad-body", insertPoint);
-  exn->replaceAllUsesWith(InnerExceptionPHI);
-  selector->setArgOperand(0, exn); // restore this use
-  InnerExceptionPHI->addIncoming(exn, OuterUnwindDest);
-
-  // ...and the selector.
-  InnerSelectorPHI = PHINode::Create(selector->getType(), phiCapacity,
-                                     "selector.lpad-body", insertPoint);
-  selector->replaceAllUsesWith(InnerSelectorPHI);
-  InnerSelectorPHI->addIncoming(selector, OuterUnwindDest);
-
-  // All done.
-  return InnerUnwindDest;
-}
-
-/// [LIBUNWIND] Try to forward the given call, which logically occurs
-/// at the end of the given block, as a branch to the inner unwind
-/// block.  Returns true if the call was forwarded.
-bool InvokeInliningInfo::forwardEHResume(CallInst *call, BasicBlock *src) {
-  // First, check whether this is a call to the intrinsic.
-  Function *fn = dyn_cast<Function>(call->getCalledValue());
-  if (!fn || fn->getName() != "llvm.eh.resume")
-    return false;
-  
-  // At this point, we need to return true on all paths, because
-  // otherwise we'll construct an invoke of the intrinsic, which is
-  // not well-formed.
-
-  // Try to find or make an inner unwind dest, which will fail if we
-  // can't find a selector call for the outer unwind dest.
-  BasicBlock *dest = getInnerUnwindDest();
-  bool hasSelector = (dest != 0);
-
-  // If we failed, just use the outer unwind dest, dropping the
-  // exception and selector on the floor.
-  if (!hasSelector)
-    dest = OuterUnwindDest;
-
-  // Make a branch.
-  BranchInst::Create(dest, src);
-
-  // Update the phis in the destination.  They were inserted in an
-  // order which makes this work.
-  addIncomingPHIValuesForInto(src, dest);
-
-  if (hasSelector) {
-    InnerExceptionPHI->addIncoming(call->getArgOperand(0), src);
-    InnerSelectorPHI->addIncoming(call->getArgOperand(1), src);
-  }
-
-  return true;
-}
-
-/// Get or create a target for the branch from ResumeInsts.
-BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() {
-  // FIXME: New EH - rename this function when new EH is turned on.
+/// getInnerResumeDest - Get or create a target for the branch from ResumeInsts.
+BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
   if (InnerResumeDest) return InnerResumeDest;
 
   // Split the landing pad.
@@ -472,7 +141,7 @@ BasicBlock *InvokeInliningInfo::getInnerUnwindDestNewEH() {
 /// branch. When there is more than one predecessor, we need to split the
 /// landing pad block after the landingpad instruction and jump to there.
 void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
-  BasicBlock *Dest = getInnerUnwindDestNewEH();
+  BasicBlock *Dest = getInnerResumeDest();
   BasicBlock *Src = RI->getParent();
 
   BranchInst::Create(Dest, Src);
@@ -485,14 +154,6 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI) {
   RI->eraseFromParent();
 }
 
-/// [LIBUNWIND] Check whether this selector is "only cleanups":
-///   call i32 @llvm.eh.selector(blah, blah, i32 0)
-static bool isCleanupOnlySelector(EHSelectorInst *selector) {
-  if (selector->getNumArgOperands() != 3) return false;
-  ConstantInt *val = dyn_cast<ConstantInt>(selector->getArgOperand(2));
-  return (val && val->isZero());
-}
-
 /// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
 /// an invoke, we have to turn all of the calls that can throw into
 /// invokes.  This function analyze BB to see if there are any calls, and if so,
@@ -507,77 +168,34 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
   for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
     Instruction *I = BBI++;
 
-    if (LPI) // FIXME: New EH - This won't be NULL in the new EH.
-      if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
-        unsigned NumClauses = LPI->getNumClauses();
-        L->reserveClauses(NumClauses);
-        for (unsigned i = 0; i != NumClauses; ++i)
-          L->addClause(LPI->getClause(i));
-      }
+    if (LandingPadInst *L = dyn_cast<LandingPadInst>(I)) {
+      unsigned NumClauses = LPI->getNumClauses();
+      L->reserveClauses(NumClauses);
+      for (unsigned i = 0; i != NumClauses; ++i)
+        L->addClause(LPI->getClause(i));
+    }
 
     // We only need to check for function calls: inlined invoke
     // instructions require no special handling.
     CallInst *CI = dyn_cast<CallInst>(I);
-    if (CI == 0) continue;
-
-    // LIBUNWIND: merge selector instructions.
-    if (EHSelectorInst *Inner = dyn_cast<EHSelectorInst>(CI)) {
-      EHSelectorInst *Outer = Invoke.getOuterSelector();
-      if (!Outer) continue;
-
-      bool innerIsOnlyCleanup = isCleanupOnlySelector(Inner);
-      bool outerIsOnlyCleanup = isCleanupOnlySelector(Outer);
-
-      // If both selectors contain only cleanups, we don't need to do
-      // anything.  TODO: this is really just a very specific instance
-      // of a much more general optimization.
-      if (innerIsOnlyCleanup && outerIsOnlyCleanup) continue;
-
-      // Otherwise, we just append the outer selector to the inner selector.
-      SmallVector<Value*, 16> NewSelector;
-      for (unsigned i = 0, e = Inner->getNumArgOperands(); i != e; ++i)
-        NewSelector.push_back(Inner->getArgOperand(i));
-      for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i)
-        NewSelector.push_back(Outer->getArgOperand(i));
-
-      CallInst *NewInner =
-        IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector);
-      // No need to copy attributes, calling convention, etc.
-      NewInner->takeName(Inner);
-      Inner->replaceAllUsesWith(NewInner);
-      Inner->eraseFromParent();
-      continue;
-    }
-    
+
     // If this call cannot unwind, don't convert it to an invoke.
-    if (CI->doesNotThrow())
+    if (!CI || CI->doesNotThrow())
       continue;
-    
-    // Convert this function call into an invoke instruction.
-    // First, split the basic block.
+
+    // Convert this function call into an invoke instruction.  First, split the
+    // basic block.
     BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
 
     // Delete the unconditional branch inserted by splitBasicBlock
     BB->getInstList().pop_back();
 
-    // LIBUNWIND: If this is a call to @llvm.eh.resume, just branch
-    // directly to the new landing pad.
-    if (Invoke.forwardEHResume(CI, BB)) {
-      // TODO: 'Split' is now unreachable; clean it up.
-
-      // We want to leave the original call intact so that the call
-      // graph and other structures won't get misled.  We also have to
-      // avoid processing the next block, or we'll iterate here forever.
-      return true;
-    }
-
-    // Otherwise, create the new invoke instruction.
+    // Create the new invoke instruction.
     ImmutableCallSite CS(CI);
     SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
-    InvokeInst *II =
-      InvokeInst::Create(CI->getCalledValue(), Split,
-                         Invoke.getOuterUnwindDest(),
-                         InvokeArgs, CI->getName(), BB);
+    InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
+                                        Invoke.getOuterResumeDest(),
+                                        InvokeArgs, CI->getName(), BB);
     II->setCallingConv(CI->getCallingConv());
     II->setAttributes(CI->getAttributes());
     
@@ -585,21 +203,20 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
     // updates the CallGraph if present, because it uses a WeakVH.
     CI->replaceAllUsesWith(II);
 
-    Split->getInstList().pop_front();  // Delete the original call
+    // Delete the original call
+    Split->getInstList().pop_front();
 
-    // Update any PHI nodes in the exceptional block to indicate that
-    // there is now a new entry in them.
+    // Update any PHI nodes in the exceptional block to indicate that there is
+    // now a new entry in them.
     Invoke.addIncomingPHIValuesFor(BB);
     return false;
   }
 
   return false;
 }
-  
 
 /// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
-/// in the body of the inlined function into invokes and turn unwind
-/// instructions into branches to the invoke unwind dest.
+/// in the body of the inlined function into invokes.
 ///
 /// II is the invoke instruction being inlined.  FirstNewBlock is the first
 /// block of the inlined code (the last block is the end of the function),
@@ -614,7 +231,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
   // start of the inlined code to its end, checking for stuff we need to
   // rewrite.  If the code doesn't have calls or unwinds, we know there is
   // nothing to rewrite.
-  if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
+  if (!InlinedCodeInfo.ContainsCalls) {
     // Now that everything is happy, we have one final detail.  The PHI nodes in
     // the exception destination block still have entries due to the original
     // invoke instruction.  Eliminate these entries (which might even delete the
@@ -628,30 +245,13 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
   for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
     if (InlinedCodeInfo.ContainsCalls)
       if (HandleCallsInBlockInlinedThroughInvoke(BB, Invoke)) {
-        // Honor a request to skip the next block.  We don't need to
-        // consider UnwindInsts in this case either.
+        // Honor a request to skip the next block.
         ++BB;
         continue;
       }
 
-    if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-      // An UnwindInst requires special handling when it gets inlined into an
-      // invoke site.  Once this happens, we know that the unwind would cause
-      // a control transfer to the invoke exception destination, so we can
-      // transform it into a direct branch to the exception destination.
-      BranchInst::Create(InvokeDest, UI);
-
-      // Delete the unwind instruction!
-      UI->eraseFromParent();
-
-      // Update any PHI nodes in the exceptional block to indicate that
-      // there is now a new entry in them.
-      Invoke.addIncomingPHIValuesFor(BB);
-    }
-
-    if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+    if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
       Invoke.forwardResume(RI);
-    }
   }
 
   // Now that everything is happy, we have one final detail.  The PHI nodes in
@@ -836,8 +436,8 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
   return false;
 }
 
-/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively
-/// update InlinedAtEntry of a DebugLoc.
+/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to
+/// recursively update InlinedAtEntry of a DebugLoc.
 static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, 
                                     const DebugLoc &InlinedAtDL,
                                     LLVMContext &Ctx) {
@@ -847,16 +447,15 @@ static DebugLoc updateInlinedAtInfo(const DebugLoc &DL,
     return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
                          NewInlinedAtDL.getAsMDNode(Ctx));
   }
-                                             
+
   return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
                        InlinedAtDL.getAsMDNode(Ctx));
 }
 
-
 /// fixupLineNumbers - Update inlined instructions' line numbers to 
 /// to encode location where these instructions are inlined.
 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
-                              Instruction *TheCall) {
+                             Instruction *TheCall) {
   DebugLoc TheCallDL = TheCall->getDebugLoc();
   if (TheCallDL.isUnknown())
     return;
@@ -878,18 +477,18 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
   }
 }
 
-// InlineFunction - This function inlines the called function into the basic
-// block of the caller.  This returns false if it is not possible to inline this
-// call.  The program is still in a well defined state if this occurs though.
-//
-// Note that this only does one level of inlining.  For example, if the
-// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
-// exists in the instruction stream.  Similarly this will inline a recursive
-// function by one level.
-//
-bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
+/// InlineFunction - This function inlines the called function into the basic
+/// block of the caller.  This returns false if it is not possible to inline
+/// this call.  The program is still in a well defined state if this occurs
+/// though.
+///
+/// Note that this only does one level of inlining.  For example, if the
+/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+/// exists in the instruction stream.  Similarly this will inline a recursive
+/// function by one level.
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
+                          bool InsertLifetime) {
   Instruction *TheCall = CS.getInstruction();
-  LLVMContext &Context = TheCall->getContext();
   assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
          "Instruction not in function!");
 
@@ -924,43 +523,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       return false;
   }
 
-  // Find the personality function used by the landing pads of the caller. If it
-  // exists, then check to see that it matches the personality function used in
-  // the callee.
-  for (Function::const_iterator
-         I = Caller->begin(), E = Caller->end(); I != E; ++I)
+  // Get the personality function from the callee if it contains a landing pad.
+  Value *CalleePersonality = 0;
+  for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
+       I != E; ++I)
     if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
       const BasicBlock *BB = II->getUnwindDest();
-      // FIXME: This 'isa' here should become go away once the new EH system is
-      // in place.
-      if (!isa<LandingPadInst>(BB->getFirstNonPHI()))
-        continue;
-      const LandingPadInst *LP = cast<LandingPadInst>(BB->getFirstNonPHI());
-      const Value *CallerPersFn = LP->getPersonalityFn();
-
-      // If the personality functions match, then we can perform the
-      // inlining. Otherwise, we can't inline.
-      // TODO: This isn't 100% true. Some personality functions are proper
-      //       supersets of others and can be used in place of the other.
-      for (Function::const_iterator
-             I = CalledFunc->begin(), E = CalledFunc->end(); I != E; ++I)
-        if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
-          const BasicBlock *BB = II->getUnwindDest();
-          // FIXME: This 'if/dyn_cast' here should become a normal 'cast' once
-          // the new EH system is in place.
-          if (const LandingPadInst *LP =
-              dyn_cast<LandingPadInst>(BB->getFirstNonPHI()))
-            if (CallerPersFn != LP->getPersonalityFn())
-              return false;
-          break;
-        }
-
+      const LandingPadInst *LP = BB->getLandingPadInst();
+      CalleePersonality = LP->getPersonalityFn();
       break;
     }
 
+  // Find the personality function used by the landing pads of the caller. If it
+  // exists, then check to see that it matches the personality function used in
+  // the callee.
+  if (CalleePersonality) {
+    for (Function::const_iterator I = Caller->begin(), E = Caller->end();
+         I != E; ++I)
+      if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
+        const BasicBlock *BB = II->getUnwindDest();
+        const LandingPadInst *LP = BB->getLandingPadInst();
+
+        // If the personality functions match, then we can perform the
+        // inlining. Otherwise, we can't inline.
+        // TODO: This isn't 100% true. Some personality functions are proper
+        //       supersets of others and can be used in place of the other.
+        if (LP->getPersonalityFn() != CalleePersonality)
+          return false;
+
+        break;
+      }
+  }
+
   // Get an iterator to the last basic block in the function, which will have
   // the new function inlined after it.
-  //
   Function::iterator LastBlock = &Caller->back();
 
   // Make sure to capture all of the return instructions from the cloned
@@ -987,7 +583,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       // by them explicit.  However, we don't do this if the callee is readonly
       // or readnone, because the copy would be unneeded: the callee doesn't
       // modify the struct.
-      if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+      if (CS.isByValArgument(ArgNo)) {
         ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
                                         CalledFunc->getParamAlignment(ArgNo+1));
  
@@ -1023,7 +619,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
   // block for the callee, move them to the entry block of the caller.  First
   // calculate which instruction they should be inserted before.  We insert the
   // instructions at the end of the current alloca list.
-  //
   {
     BasicBlock::iterator InsertPoint = Caller->begin()->begin();
     for (BasicBlock::iterator I = FirstNewBlock->begin(),
@@ -1063,7 +658,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
 
   // Leave lifetime markers for the static alloca's, scoping them to the
   // function we just inlined.
-  if (!IFI.StaticAllocas.empty()) {
+  if (InsertLifetime && !IFI.StaticAllocas.empty()) {
     IRBuilder<> builder(FirstNewBlock->begin());
     for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
       AllocaInst *AI = IFI.StaticAllocas[ai];
@@ -1098,20 +693,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
       IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
     }
-
-    // Count the number of StackRestore calls we insert.
-    unsigned NumStackRestores = Returns.size();
-
-    // If we are inlining an invoke instruction, insert restores before each
-    // unwind.  These unwinds will be rewritten into branches later.
-    if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
-      for (Function::iterator BB = FirstNewBlock, E = Caller->end();
-           BB != E; ++BB)
-        if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-          IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr);
-          ++NumStackRestores;
-        }
-    }
   }
 
   // If we are inlining tail call instruction through a call site that isn't
@@ -1131,21 +712,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
         }
   }
 
-  // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
-  // instructions are unreachable.
-  if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
-    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
-         BB != E; ++BB) {
-      TerminatorInst *Term = BB->getTerminator();
-      if (isa<UnwindInst>(Term)) {
-        new UnreachableInst(Context, Term);
-        BB->getInstList().erase(Term);
-      }
-    }
-
   // If we are inlining for an invoke instruction, we must make sure to rewrite
-  // any inlined 'unwind' instructions into branches to the invoke exception
-  // destination, and call instructions into invoke instructions.
+  // any call instructions into invoke instructions.
   if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
     HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
 
@@ -1308,11 +876,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
   // If we inserted a phi node, check to see if it has a single value (e.g. all
   // the entries are the same or undef).  If so, remove the PHI so it doesn't
   // block other optimizations.
-  if (PHI)
+  if (PHI) {
     if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
       PHI->replaceAllUsesWith(V);
       PHI->eraseFromParent();
     }
+  }
 
   return true;
 }
diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt
new file mode 100644
index 000000000000..88b2ffee64da
--- /dev/null
+++ b/lib/Transforms/Utils/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Transforms/Utils/LLVMBuild.txt ---------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = TransformUtils
+parent = Transforms
+required_libraries = Analysis Core IPA Support Target
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 7034feb227ad..d1c4d5968231 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
@@ -105,33 +106,32 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
     // If we are switching on a constant, we can convert the switch into a
     // single branch instruction!
     ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
-    BasicBlock *TheOnlyDest = SI->getSuccessor(0);  // The default dest
+    BasicBlock *TheOnlyDest = SI->getDefaultDest();
     BasicBlock *DefaultDest = TheOnlyDest;
-    assert(TheOnlyDest == SI->getDefaultDest() &&
-           "Default destination is not successor #0?");
 
     // Figure out which case it goes to.
-    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+         i != e; ++i) {
       // Found case matching a constant operand?
-      if (SI->getSuccessorValue(i) == CI) {
-        TheOnlyDest = SI->getSuccessor(i);
+      if (i.getCaseValue() == CI) {
+        TheOnlyDest = i.getCaseSuccessor();
         break;
       }
 
       // Check to see if this branch is going to the same place as the default
       // dest.  If so, eliminate it as an explicit compare.
-      if (SI->getSuccessor(i) == DefaultDest) {
+      if (i.getCaseSuccessor() == DefaultDest) {
         // Remove this entry.
         DefaultDest->removePredecessor(SI->getParent());
         SI->removeCase(i);
-        --i; --e;  // Don't skip an entry...
+        --i; --e;
         continue;
       }
 
       // Otherwise, check to see if the switch only branches to one destination.
       // We do this by reseting "TheOnlyDest" to null when we find two non-equal
       // destinations.
-      if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+      if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0;
     }
 
     if (CI && !TheOnlyDest) {
@@ -165,14 +165,16 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions) {
       return true;
     }
     
-    if (SI->getNumSuccessors() == 2) {
+    if (SI->getNumCases() == 1) {
       // Otherwise, we can fold this switch into a conditional branch
       // instruction if it has only one non-default destination.
+      SwitchInst::CaseIt FirstCase = SI->case_begin();
       Value *Cond = Builder.CreateICmpEQ(SI->getCondition(),
-                                         SI->getSuccessorValue(1), "cond");
+          FirstCase.getCaseValue(), "cond");
 
       // Insert the new branch.
-      Builder.CreateCondBr(Cond, SI->getSuccessor(1), SI->getSuccessor(0));
+      Builder.CreateCondBr(Cond, FirstCase.getCaseSuccessor(),
+                           SI->getDefaultDest());
 
       // Delete the old switch.
       SI->eraseFromParent();
@@ -257,6 +259,13 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
         II->getIntrinsicID() == Intrinsic::lifetime_end)
       return isa<UndefValue>(II->getArgOperand(1));
   }
+
+  if (extractMallocCall(I)) return true;
+
+  if (CallInst *CI = isFreeCall(I))
+    if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
+      return C->isNullValue() || isa<UndefValue>(C);
+
   return false;
 }
 
@@ -346,22 +355,27 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
 /// instructions in other blocks as well in this block.
 bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
   bool MadeChange = false;
-  for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+
+#ifndef NDEBUG
+  // In debug builds, ensure that the terminator of the block is never replaced
+  // or deleted by these simplifications. The idea of simplification is that it
+  // cannot introduce new instructions, and there is no way to replace the
+  // terminator of a block without introducing a new instruction.
+  AssertingVH<Instruction> TerminatorVH(--BB->end());
+#endif
+
+  for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) {
+    assert(!BI->isTerminator());
     Instruction *Inst = BI++;
-    
-    if (Value *V = SimplifyInstruction(Inst, TD)) {
-      WeakVH BIHandle(BI);
-      ReplaceAndSimplifyAllUses(Inst, V, TD);
+
+    WeakVH BIHandle(BI);
+    if (recursivelySimplifyInstruction(Inst, TD)) {
       MadeChange = true;
       if (BIHandle != BI)
         BI = BB->begin();
       continue;
     }
 
-    if (Inst->isTerminator())
-      break;
-
-    WeakVH BIHandle(BI);
     MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
     if (BIHandle != BI)
       BI = BB->begin();
@@ -399,17 +413,11 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
   WeakVH PhiIt = &BB->front();
   while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
     PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+    Value *OldPhiIt = PhiIt;
 
-    Value *PNV = SimplifyInstruction(PN, TD);
-    if (PNV == 0) continue;
+    if (!recursivelySimplifyInstruction(PN, TD))
+      continue;
 
-    // If we're able to simplify the phi to a single value, substitute the new
-    // value into all of its uses.
-    assert(PNV != PN && "SimplifyInstruction broken!");
-    
-    Value *OldPhiIt = PhiIt;
-    ReplaceAndSimplifyAllUses(PN, PNV, TD);
-    
     // If recursive simplification ended up deleting the next PHI node we would
     // iterate to, then our iterator is invalid, restart scanning from the top
     // of the block.
@@ -486,22 +494,8 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
   if (Succ->getSinglePredecessor()) return true;
 
   // Make a list of the predecessors of BB
-  typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
-  BlockSet BBPreds(pred_begin(BB), pred_end(BB));
-
-  // Use that list to make another list of common predecessors of BB and Succ
-  BlockSet CommonPreds;
-  for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
-       PI != PE; ++PI) {
-    BasicBlock *P = *PI;
-    if (BBPreds.count(P))
-      CommonPreds.insert(P);
-  }
+  SmallPtrSet<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
 
-  // Shortcut, if there are no common predecessors, merging is always safe
-  if (CommonPreds.empty())
-    return true;
-  
   // Look at all the phi nodes in Succ, to see if they present a conflict when
   // merging these blocks
   for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
@@ -512,28 +506,28 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
     // merge the phi nodes and then the blocks can still be merged
     PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
     if (BBPN && BBPN->getParent() == BB) {
-      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
-            PI != PE; PI++) {
-        if (BBPN->getIncomingValueForBlock(*PI) 
-              != PN->getIncomingValueForBlock(*PI)) {
+      for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
+        BasicBlock *IBB = PN->getIncomingBlock(PI);
+        if (BBPreds.count(IBB) &&
+            BBPN->getIncomingValueForBlock(IBB) != PN->getIncomingValue(PI)) {
           DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
                 << Succ->getName() << " is conflicting with " 
                 << BBPN->getName() << " with regard to common predecessor "
-                << (*PI)->getName() << "\n");
+                << IBB->getName() << "\n");
           return false;
         }
       }
     } else {
       Value* Val = PN->getIncomingValueForBlock(BB);
-      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
-            PI != PE; PI++) {
+      for (unsigned PI = 0, PE = PN->getNumIncomingValues(); PI != PE; ++PI) {
         // See if the incoming value for the common predecessor is equal to the
         // one for BB, in which case this phi node will not prevent the merging
         // of the block.
-        if (Val != PN->getIncomingValueForBlock(*PI)) {
+        BasicBlock *IBB = PN->getIncomingBlock(PI);
+        if (BBPreds.count(IBB) && Val != PN->getIncomingValue(PI)) {
           DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
                 << Succ->getName() << " is conflicting with regard to common "
-                << "predecessor " << (*PI)->getName() << "\n");
+                << "predecessor " << IBB->getName() << "\n");
           return false;
         }
       }
@@ -740,6 +734,10 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
     // If there is a large requested alignment and we can, bump up the alignment
     // of the global.
     if (GV->isDeclaration()) return Align;
+    // If the memory we set aside for the global may not be the memory used by
+    // the final program then it is impossible for us to reliably enforce the
+    // preferred alignment.
+    if (GV->isWeakForLinker()) return Align;
     
     if (GV->getAlignment() >= PrefAlign)
       return GV->getAlignment();
@@ -764,9 +762,8 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
   assert(V->getType()->isPointerTy() &&
          "getOrEnforceKnownAlignment expects a pointer!");
   unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
-  APInt Mask = APInt::getAllOnesValue(BitWidth);
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+  ComputeMaskedBits(V, KnownZero, KnownOne, TD);
   unsigned TrailZ = KnownZero.countTrailingOnes();
   
   // Avoid trouble with rediculously large TrailZ values, such as
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index cbd54a8dcbf8..0bc185d8b722 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -99,7 +99,8 @@ namespace {
     bool ProcessLoop(Loop *L, LPPassManager &LPM);
     BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
     BasicBlock *InsertPreheaderForLoop(Loop *L);
-    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+                             BasicBlock *Preheader);
     BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
     void PlaceSplitBlockCarefully(BasicBlock *NewBB,
                                   SmallVectorImpl<BasicBlock*> &SplitPreds,
@@ -240,7 +241,7 @@ ReprocessLoop:
     // this for loops with a giant number of backedges, just factor them into a
     // common backedge instead.
     if (L->getNumBackEdges() < 8) {
-      if (SeparateNestedLoop(L, LPM)) {
+      if (SeparateNestedLoop(L, LPM, Preheader)) {
         ++NumNested;
         // This is a big restructuring change, reprocess the whole loop.
         Changed = true;
@@ -265,7 +266,7 @@ ReprocessLoop:
   PHINode *PN;
   for (BasicBlock::iterator I = L->getHeader()->begin();
        (PN = dyn_cast<PHINode>(I++)); )
-    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+    if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
       if (AA) AA->deleteValue(PN);
       if (SE) SE->forgetValue(PN);
       PN->replaceAllUsesWith(V);
@@ -379,19 +380,27 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
   }
 
   // Split out the loop pre-header.
-  BasicBlock *NewBB =
-    SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
-                           ".preheader", this);
+  BasicBlock *PreheaderBB;
+  if (!Header->isLandingPad()) {
+    PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
+                                         this);
+  } else {
+    SmallVector<BasicBlock*, 2> NewBBs;
+    SplitLandingPadPredecessors(Header, OutsideBlocks, ".preheader",
+                                ".split-lp", this, NewBBs);
+    PreheaderBB = NewBBs[0];
+  }
 
-  NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
-  DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
-               << "\n");
+  PreheaderBB->getTerminator()->setDebugLoc(
+                                      Header->getFirstNonPHI()->getDebugLoc());
+  DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+               << PreheaderBB->getName() << "\n");
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
-  PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+  PlaceSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
 
-  return NewBB;
+  return PreheaderBB;
 }
 
 /// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
@@ -420,9 +429,7 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
                                 this, NewBBs);
     NewExitBB = NewBBs[0];
   } else {
-    NewExitBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
-                                       LoopBlocks.size(), ".loopexit",
-                                       this);
+    NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", this);
   }
 
   DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
@@ -456,7 +463,7 @@ static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
     PHINode *PN = cast<PHINode>(I);
     ++I;
-    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+    if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
       // This is a degenerate PHI already, don't modify it!
       PN->replaceAllUsesWith(V);
       if (AA) AA->deleteValue(PN);
@@ -529,7 +536,16 @@ void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
 /// If we are able to separate out a loop, return the new outer loop that was
 /// created.
 ///
-Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM,
+                                       BasicBlock *Preheader) {
+  // Don't try to separate loops without a preheader.
+  if (!Preheader)
+    return 0;
+
+  // The header is not a landing pad; preheader insertion should ensure this.
+  assert(!L->getHeader()->isLandingPad() &&
+         "Can't insert backedge to landing pad");
+
   PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
   if (PN == 0) return 0;  // No known way to partition.
 
@@ -537,16 +553,15 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
   // handles the case when a PHI node has multiple instances of itself as
   // arguments.
   SmallVector<BasicBlock*, 8> OuterLoopPreds;
-  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
     if (PN->getIncomingValue(i) != PN ||
         !L->contains(PN->getIncomingBlock(i))) {
       // We can't split indirectbr edges.
       if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
         return 0;
-
       OuterLoopPreds.push_back(PN->getIncomingBlock(i));
     }
-
+  }
   DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
 
   // If ScalarEvolution is around and knows anything about values in
@@ -556,9 +571,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
     SE->forgetLoop(L);
 
   BasicBlock *Header = L->getHeader();
-  BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
-                                             OuterLoopPreds.size(),
-                                             ".outer", this);
+  BasicBlock *NewBB =
+    SplitBlockPredecessors(Header, OuterLoopPreds,  ".outer", this);
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
@@ -640,6 +654,9 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
   if (!Preheader)
     return 0;
 
+  // The header is not a landing pad; preheader insertion should ensure this.
+  assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+
   // Figure out which basic blocks contain back-edges to the loop header.
   std::vector<BasicBlock*> BackedgeBlocks;
   for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 62e4fa295378..e15497a77ae3 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -135,7 +135,8 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
 /// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
 /// available it must also preserve those analyses.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
-                      unsigned TripMultiple, LoopInfo *LI, LPPassManager *LPM) {
+                      bool AllowRuntime, unsigned TripMultiple,
+                      LoopInfo *LI, LPPassManager *LPM) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -148,6 +149,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
     return false;
   }
 
+  // Loops with indirectbr cannot be cloned.
+  if (!L->isSafeToClone()) {
+    DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");
+    return false;
+  }
+
   BasicBlock *Header = L->getHeader();
   BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
 
@@ -165,12 +172,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
     return false;
   }
 
-  // Notify ScalarEvolution that the loop will be substantially changed,
-  // if not outright eliminated.
-  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
-  if (SE)
-    SE->forgetLoop(L);
-
   if (TripCount != 0)
     DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
   if (TripMultiple != 1)
@@ -181,6 +182,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   if (TripCount != 0 && Count > TripCount)
     Count = TripCount;
 
+  // Don't enter the unroll code if there is nothing to do. This way we don't
+  // need to support "partial unrolling by 1".
+  if (TripCount == 0 && Count < 2)
+    return false;
+
   assert(Count > 0);
   assert(TripMultiple > 0);
   assert(TripCount == 0 || TripCount % TripMultiple == 0);
@@ -188,6 +194,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   // Are we eliminating the loop control altogether?
   bool CompletelyUnroll = Count == TripCount;
 
+  // We assume a run-time trip count if the compiler cannot
+  // figure out the loop trip count and the unroll-runtime
+  // flag is specified.
+  bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
+
+  if (RuntimeTripCount && !UnrollRuntimeLoopProlog(L, Count, LI, LPM))
+    return false;
+
+  // Notify ScalarEvolution that the loop will be substantially changed,
+  // if not outright eliminated.
+  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+  if (SE)
+    SE->forgetLoop(L);
+
   // If we know the trip count, we know the multiple...
   unsigned BreakoutTrip = 0;
   if (TripCount != 0) {
@@ -209,6 +229,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
       DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
     } else if (TripMultiple != 1) {
       DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+    } else if (RuntimeTripCount) {
+      DEBUG(dbgs() << " with run-time trip count");
     }
     DEBUG(dbgs() << "!\n");
   }
@@ -332,6 +354,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
     BasicBlock *Dest = Headers[j];
     bool NeedConditional = true;
 
+    if (RuntimeTripCount && j != 0) {
+      NeedConditional = false;
+    }
+
     // For a complete unroll, make the last iteration end with a branch
     // to the exit block.
     if (CompletelyUnroll && j == 0) {
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
new file mode 100644
index 000000000000..3aa6befe1f15
--- /dev/null
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -0,0 +1,372 @@
+//===-- UnrollLoopRuntime.cpp - Runtime Loop unrolling utilities ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for loops with run-time
+// trip counts.  See LoopUnroll.cpp for unrolling loops with compile-time
+// trip counts.
+//
+// The functions in this file are used to generate extra code when the
+// run-time trip count modulo the unroll factor is not 0.  When this is the
+// case, we need to generate code to execute these 'left over' iterations.
+//
+// The current strategy generates an if-then-else sequence prior to the
+// unrolled loop to execute the 'left over' iterations.  Other strategies
+// include generate a loop before or after the unrolled loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <algorithm>
+
+using namespace llvm;
+
+STATISTIC(NumRuntimeUnrolled,
+          "Number of loops unrolled with run-time trip counts");
+
+/// Connect the unrolling prolog code to the original loop.
+/// The unrolling prolog code contains code to execute the
+/// 'extra' iterations if the run-time trip count modulo the
+/// unroll count is non-zero.
+///
+/// This function performs the following:
+/// - Create PHI nodes at prolog end block to combine values
+///   that exit the prolog code and jump around the prolog.
+/// - Add a PHI operand to a PHI node at the loop exit block
+///   for values that exit the prolog and go around the loop.
+/// - Branch around the original loop if the trip count is less
+///   than the unroll factor.
+///
+static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
+                          BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
+                          BasicBlock *OrigPH, BasicBlock *NewPH,
+                          ValueToValueMapTy &LVMap, Pass *P) {
+  BasicBlock *Latch = L->getLoopLatch();
+  assert(Latch != 0 && "Loop must have a latch");
+
+  // Create a PHI node for each outgoing value from the original loop
+  // (which means it is an outgoing value from the prolog code too).
+  // The new PHI node is inserted in the prolog end basic block.
+  // The new PHI name is added as an operand of a PHI node in either
+  // the loop header or the loop exit block.
+  for (succ_iterator SBI = succ_begin(Latch), SBE = succ_end(Latch);
+       SBI != SBE; ++SBI) {
+    for (BasicBlock::iterator BBI = (*SBI)->begin();
+         PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+
+      // Add a new PHI node to the prolog end block and add the
+      // appropriate incoming values.
+      PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName()+".unr",
+                                       PrologEnd->getTerminator());
+      // Adding a value to the new PHI node from the original loop preheader.
+      // This is the value that skips all the prolog code.
+      if (L->contains(PN)) {
+        NewPN->addIncoming(PN->getIncomingValueForBlock(NewPH), OrigPH);
+      } else {
+        NewPN->addIncoming(Constant::getNullValue(PN->getType()), OrigPH);
+      }
+
+      Value *V = PN->getIncomingValueForBlock(Latch);
+      if (Instruction *I = dyn_cast<Instruction>(V)) {
+        if (L->contains(I)) {
+          V = LVMap[I];
+        }
+      }
+      // Adding a value to the new PHI node from the last prolog block
+      // that was created.
+      NewPN->addIncoming(V, LastPrologBB);
+
+      // Update the existing PHI node operand with the value from the
+      // new PHI node.  How this is done depends on if the existing
+      // PHI node is in the original loop block, or the exit block.
+      if (L->contains(PN)) {
+        PN->setIncomingValue(PN->getBasicBlockIndex(NewPH), NewPN);
+      } else {
+        PN->addIncoming(NewPN, PrologEnd);
+      }
+    }
+  }
+
+  // Create a branch around the orignal loop, which is taken if the
+  // trip count is less than the unroll factor.
+  Instruction *InsertPt = PrologEnd->getTerminator();
+  Instruction *BrLoopExit =
+    new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
+                 ConstantInt::get(TripCount->getType(), Count));
+  BasicBlock *Exit = L->getUniqueExitBlock();
+  assert(Exit != 0 && "Loop must have a single exit block only");
+  // Split the exit to maintain loop canonicalization guarantees
+  SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
+  if (!Exit->isLandingPad()) {
+    SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", P);
+  } else {
+    SmallVector<BasicBlock*, 2> NewBBs;
+    SplitLandingPadPredecessors(Exit, Preds, ".unr1-lcssa", ".unr2-lcssa",
+                                P, NewBBs);
+  }
+  // Add the branch to the exit block (around the unrolled loop)
+  BranchInst::Create(Exit, NewPH, BrLoopExit, InsertPt);
+  InsertPt->eraseFromParent();
+}
+
+/// Create a clone of the blocks in a loop and connect them together.
+/// This function doesn't create a clone of the loop structure.
+///
+/// There are two value maps that are defined and used.  VMap is
+/// for the values in the current loop instance.  LVMap contains
+/// the values from the last loop instance.  We need the LVMap values
+/// to update the inital values for the current loop instance.
+///
+static void CloneLoopBlocks(Loop *L,
+                            bool FirstCopy,
+                            BasicBlock *InsertTop,
+                            BasicBlock *InsertBot,
+                            std::vector<BasicBlock *> &NewBlocks,
+                            LoopBlocksDFS &LoopBlocks,
+                            ValueToValueMapTy &VMap,
+                            ValueToValueMapTy &LVMap,
+                            LoopInfo *LI) {
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *Latch = L->getLoopLatch();
+  Function *F = Header->getParent();
+  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+  // For each block in the original loop, create a new copy,
+  // and update the value map with the newly created values.
+  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".unr", F);
+    NewBlocks.push_back(NewBB);
+
+    if (Loop *ParentLoop = L->getParentLoop())
+      ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+
+    VMap[*BB] = NewBB;
+    if (Header == *BB) {
+      // For the first block, add a CFG connection to this newly
+      // created block
+      InsertTop->getTerminator()->setSuccessor(0, NewBB);
+
+      // Change the incoming values to the ones defined in the
+      // previously cloned loop.
+      for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+        PHINode *NewPHI = cast<PHINode>(VMap[I]);
+        if (FirstCopy) {
+          // We replace the first phi node with the value from the preheader
+          VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+          NewBB->getInstList().erase(NewPHI);
+        } else {
+          // Update VMap with values from the previous block
+          unsigned idx = NewPHI->getBasicBlockIndex(Latch);
+          Value *InVal = NewPHI->getIncomingValue(idx);
+          if (Instruction *I = dyn_cast<Instruction>(InVal))
+            if (L->contains(I))
+              InVal = LVMap[InVal];
+          NewPHI->setIncomingValue(idx, InVal);
+          NewPHI->setIncomingBlock(idx, InsertTop);
+        }
+      }
+    }
+
+    if (Latch == *BB) {
+      VMap.erase((*BB)->getTerminator());
+      NewBB->getTerminator()->eraseFromParent();
+      BranchInst::Create(InsertBot, NewBB);
+    }
+  }
+  // LastValueMap is updated with the values for the current loop
+  // which are used the next time this function is called.
+  for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+       VI != VE; ++VI) {
+    LVMap[VI->first] = VI->second;
+  }
+}
+
+/// Insert code in the prolog code when unrolling a loop with a
+/// run-time trip-count.
+///
+/// This method assumes that the loop unroll factor is total number
+/// of loop bodes in the loop after unrolling. (Some folks refer
+/// to the unroll factor as the number of *extra* copies added).
+/// We assume also that the loop unroll factor is a power-of-two. So, after
+/// unrolling the loop, the number of loop bodies executed is 2,
+/// 4, 8, etc.  Note - LLVM converts the if-then-sequence to a switch
+/// instruction in SimplifyCFG.cpp.  Then, the backend decides how code for
+/// the switch instruction is generated.
+///
+///    extraiters = tripcount % loopfactor
+///    if (extraiters == 0) jump Loop:
+///    if (extraiters == loopfactor) jump L1
+///    if (extraiters == loopfactor-1) jump L2
+///    ...
+///    L1:  LoopBody;
+///    L2:  LoopBody;
+///    ...
+///    if tripcount < loopfactor jump End
+///    Loop:
+///    ...
+///    End:
+///
+bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
+                                   LPPassManager *LPM) {
+  // for now, only unroll loops that contain a single exit
+  if (!L->getExitingBlock())
+    return false;
+
+  // Make sure the loop is in canonical form, and there is a single
+  // exit block only.
+  if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+    return false;
+
+  // Use Scalar Evolution to compute the trip count.  This allows more
+  // loops to be unrolled than relying on induction var simplification
+  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
+  if (SE == 0)
+    return false;
+
+  // Only unroll loops with a computable trip count and the trip count needs
+  // to be an int value (allowing a pointer type is a TODO item)
+  const SCEV *BECount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
+    return false;
+
+  // Add 1 since the backedge count doesn't include the first loop iteration
+  const SCEV *TripCountSC =
+    SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
+  if (isa<SCEVCouldNotCompute>(TripCountSC))
+    return false;
+
+  // We only handle cases when the unroll factor is a power of 2.
+  // Count is the loop unroll factor, the number of extra copies added + 1.
+  if ((Count & (Count-1)) != 0)
+    return false;
+
+  // If this loop is nested, then the loop unroller changes the code in
+  // parent loop, so the Scalar Evolution pass needs to be run again
+  if (Loop *ParentLoop = L->getParentLoop())
+    SE->forgetLoop(ParentLoop);
+
+  BasicBlock *PH = L->getLoopPreheader();
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *Latch = L->getLoopLatch();
+  // It helps to splits the original preheader twice, one for the end of the
+  // prolog code and one for a new loop preheader
+  BasicBlock *PEnd = SplitEdge(PH, Header, LPM->getAsPass());
+  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), LPM->getAsPass());
+  BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+
+  // Compute the number of extra iterations required, which is:
+  //  extra iterations = run-time trip count % (loop unroll factor + 1)
+  SCEVExpander Expander(*SE, "loop-unroll");
+  Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
+                                            PreHeaderBR);
+  Type *CountTy = TripCount->getType();
+  BinaryOperator *ModVal =
+    BinaryOperator::CreateURem(TripCount,
+                               ConstantInt::get(CountTy, Count),
+                               "xtraiter");
+  ModVal->insertBefore(PreHeaderBR);
+
+  // Check if for no extra iterations, then jump to unrolled loop
+  Value *BranchVal = new ICmpInst(PreHeaderBR,
+                                  ICmpInst::ICMP_NE, ModVal,
+                                  ConstantInt::get(CountTy, 0), "lcmp");
+  // Branch to either the extra iterations or the unrolled loop
+  // We will fix up the true branch label when adding loop body copies
+  BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR);
+  assert(PreHeaderBR->isUnconditional() &&
+         PreHeaderBR->getSuccessor(0) == PEnd &&
+         "CFG edges in Preheader are not correct");
+  PreHeaderBR->eraseFromParent();
+
+  ValueToValueMapTy LVMap;
+  Function *F = Header->getParent();
+  // These variables are used to update the CFG links in each iteration
+  BasicBlock *CompareBB = 0;
+  BasicBlock *LastLoopBB = PH;
+  // Get an ordered list of blocks in the loop to help with the ordering of the
+  // cloned blocks in the prolog code
+  LoopBlocksDFS LoopBlocks(L);
+  LoopBlocks.perform(LI);
+
+  //
+  // For each extra loop iteration, create a copy of the loop's basic blocks
+  // and generate a condition that branches to the copy depending on the
+  // number of 'left over' iterations.
+  //
+  for (unsigned leftOverIters = Count-1; leftOverIters > 0; --leftOverIters) {
+    std::vector<BasicBlock*> NewBlocks;
+    ValueToValueMapTy VMap;
+
+    // Clone all the basic blocks in the loop, but we don't clone the loop
+    // This function adds the appropriate CFG connections.
+    CloneLoopBlocks(L, (leftOverIters == Count-1), LastLoopBB, PEnd, NewBlocks,
+                    LoopBlocks, VMap, LVMap, LI);
+    LastLoopBB = cast<BasicBlock>(VMap[Latch]);
+
+    // Insert the cloned blocks into function just before the original loop
+    F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(),
+                                  NewBlocks[0], F->end());
+
+    // Generate the code for the comparison which determines if the loop
+    // prolog code needs to be executed.
+    if (leftOverIters == Count-1) {
+      // There is no compare block for the fall-thru case when for the last
+      // left over iteration
+      CompareBB = NewBlocks[0];
+    } else {
+      // Create a new block for the comparison
+      BasicBlock *NewBB = BasicBlock::Create(CompareBB->getContext(), "unr.cmp",
+                                             F, CompareBB);
+      if (Loop *ParentLoop = L->getParentLoop()) {
+        // Add the new block to the parent loop, if needed
+        ParentLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+      }
+
+      // The comparison w/ the extra iteration value and branch
+      Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal,
+                                      ConstantInt::get(CountTy, leftOverIters),
+                                      "un.tmp");
+      // Branch to either the extra iterations or the unrolled loop
+      BranchInst::Create(NewBlocks[0], CompareBB,
+                         BranchVal, NewBB);
+      CompareBB = NewBB;
+      PH->getTerminator()->setSuccessor(0, NewBB);
+      VMap[NewPH] = CompareBB;
+    }
+
+    // Rewrite the cloned instruction operands to use the values
+    // created when the clone is created.
+    for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
+      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+             E = NewBlocks[i]->end(); I != E; ++I) {
+        RemapInstruction(I, VMap,
+                         RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+      }
+    }
+  }
+
+  // Connect the prolog code to the original loop and update the
+  // PHI functions.
+  ConnectProlog(L, TripCount, Count, LastLoopBB, PEnd, PH, NewPH, LVMap,
+                LPM->getAsPass());
+  NumRuntimeUnrolled++;
+  return true;
+}
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 61ab3f65330a..c70ced18e45d 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -1,3 +1,16 @@
+//===- LowerExpectIntrinsic.cpp - Lower expect intrinsic ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the 'expect' intrinsic to LLVM metadata.
+//
+//===----------------------------------------------------------------------===//
+
 #define DEBUG_TYPE "lower-expect-intrinsic"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
@@ -60,14 +73,17 @@ bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
   LLVMContext &Context = CI->getContext();
   Type *Int32Ty = Type::getInt32Ty(Context);
 
-  unsigned caseNo = SI->findCaseValue(ExpectedValue);
+  SwitchInst::CaseIt Case = SI->findCaseValue(ExpectedValue);
   std::vector<Value *> Vec;
   unsigned n = SI->getNumCases();
-  Vec.resize(n + 1); // +1 for MDString
+  Vec.resize(n + 1 + 1); // +1 for MDString and +1 for default case
 
   Vec[0] = MDString::get(Context, "branch_weights");
+  Vec[1] = ConstantInt::get(Int32Ty, Case == SI->case_default() ?
+                            LikelyBranchWeight : UnlikelyBranchWeight);
   for (unsigned i = 0; i < n; ++i) {
-    Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight);
+    Vec[i + 1 + 1] = ConstantInt::get(Int32Ty, i == Case.getCaseIndex() ?
+        LikelyBranchWeight : UnlikelyBranchWeight);
   }
 
   MDNode *WeightsNode = llvm::MDNode::get(Context, Vec);
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index c96c8fce7b19..930555424ded 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -54,7 +54,6 @@
 using namespace llvm;
 
 STATISTIC(NumInvokes, "Number of invokes replaced");
-STATISTIC(NumUnwinds, "Number of unwinds replaced");
 STATISTIC(NumSpilled, "Number of registers live across unwind edges");
 
 static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
@@ -193,20 +192,6 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
       BB->getInstList().erase(II);
 
       ++NumInvokes; Changed = true;
-    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-      // Insert a call to abort()
-      CallInst::Create(AbortFn, "", UI)->setTailCall();
-
-      // Insert a return instruction.  This really should be a "barrier", as it
-      // is unreachable.
-      ReturnInst::Create(F.getContext(),
-                         F.getReturnType()->isVoidTy() ?
-                          0 : Constant::getNullValue(F.getReturnType()), UI);
-
-      // Remove the unwind instruction now.
-      BB->getInstList().erase(UI);
-
-      ++NumUnwinds; Changed = true;
     }
   return Changed;
 }
@@ -404,7 +389,6 @@ splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
 
 bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
   SmallVector<ReturnInst*,16> Returns;
-  SmallVector<UnwindInst*,16> Unwinds;
   SmallVector<InvokeInst*,16> Invokes;
   UnreachableInst* UnreachablePlaceholder = 0;
 
@@ -415,14 +399,11 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
       Returns.push_back(RI);
     } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
       Invokes.push_back(II);
-    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-      Unwinds.push_back(UI);
     }
 
-  if (Unwinds.empty() && Invokes.empty()) return false;
+  if (Invokes.empty()) return false;
 
   NumInvokes += Invokes.size();
-  NumUnwinds += Unwinds.size();
 
   // TODO: This is not an optimal way to do this.  In particular, this always
   // inserts setjmp calls into the entries of functions with invoke instructions
@@ -572,13 +553,6 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
   CallInst::Create(AbortFn, "",
                    TermBlock->getTerminator())->setTailCall();
 
-
-  // Replace all unwinds with a branch to the unwind handler.
-  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
-    BranchInst::Create(UnwindHandler, Unwinds[i]);
-    Unwinds[i]->eraseFromParent();
-  }
-
   // Replace the inserted unreachable with a branch to the unwind handler.
   if (UnreachablePlaceholder) {
     BranchInst::Create(UnwindHandler, UnreachablePlaceholder);
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 686178ca01cc..a16130d3d74a 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -237,10 +237,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
   unsigned numCmps = 0;
 
   // Start with "simple" cases
-  for (unsigned i = 1; i < SI->getNumSuccessors(); ++i)
-    Cases.push_back(CaseRange(SI->getSuccessorValue(i),
-                              SI->getSuccessorValue(i),
-                              SI->getSuccessor(i)));
+  for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+    Cases.push_back(CaseRange(i.getCaseValue(), i.getCaseValue(),
+                              i.getCaseSuccessor()));
+  
   std::sort(Cases.begin(), Cases.end(), CaseCmp());
 
   // Merge case into clusters
@@ -281,7 +281,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
   BasicBlock* Default = SI->getDefaultDest();
 
   // If there is only the default destination, don't bother with the code below.
-  if (SI->getNumCases() == 1) {
+  if (!SI->getNumCases()) {
     BranchInst::Create(SI->getDefaultDest(), CurBlock);
     CurBlock->getInstList().erase(SI);
     return;
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
new file mode 100644
index 000000000000..8491c5582d97
--- /dev/null
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -0,0 +1,64 @@
+//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on Modules.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+static void appendToGlobalArray(const char *Array, 
+                                Module &M, Function *F, int Priority) {
+  IRBuilder<> IRB(M.getContext());
+  FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
+  StructType *Ty = StructType::get(
+      IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL);
+
+  Constant *RuntimeCtorInit = ConstantStruct::get(
+      Ty, IRB.getInt32(Priority), F, NULL);
+
+  // Get the current set of static global constructors and add the new ctor
+  // to the list.
+  SmallVector<Constant *, 16> CurrentCtors;
+  if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
+    if (Constant *Init = GVCtor->getInitializer()) {
+      unsigned n = Init->getNumOperands();
+      CurrentCtors.reserve(n + 1);
+      for (unsigned i = 0; i != n; ++i)
+        CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
+    }
+    GVCtor->eraseFromParent();
+  }
+
+  CurrentCtors.push_back(RuntimeCtorInit);
+
+  // Create a new initializer.
+  ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(),
+                                 CurrentCtors.size());
+  Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
+
+  // Create the new global variable and replace all uses of
+  // the old global variable with the new one.
+  (void)new GlobalVariable(M, NewInit->getType(), false,
+                           GlobalValue::AppendingLinkage, NewInit, Array);
+}
+
+void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) {
+  appendToGlobalArray("llvm.global_ctors", M, F, Priority);
+}
+
+void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority) {
+  appendToGlobalArray("llvm.global_dtors", M, F, Priority);
+}
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index db3e94251350..2357d81916a4 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -66,7 +67,8 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
     return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
   }
   static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
-    return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2;
+    using llvm::hash_value;
+    return static_cast<unsigned>(hash_value(Val));
   }
   static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
     return LHS == RHS;
@@ -423,7 +425,8 @@ void PromoteMem2Reg::run() {
 
       // Finally, after the scan, check to see if the store is all that is left.
       if (Info.UsingBlocks.empty()) {
-        // Record debuginfo for the store and remove the declaration's debuginfo.
+        // Record debuginfo for the store and remove the declaration's 
+        // debuginfo.
         if (DbgDeclareInst *DDI = Info.DbgDeclare) {
           if (!DIB)
             DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent());
@@ -590,7 +593,7 @@ void PromoteMem2Reg::run() {
       PHINode *PN = I->second;
 
       // If this PHI node merges one value and/or undefs, get the value.
-      if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
+      if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
         if (AST && PN->getType()->isPointerTy())
           AST->deleteValue(PN);
         PN->replaceAllUsesWith(V);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index fa8061c2b44c..e60a41b786a7 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -518,3 +518,10 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     User->eraseFromParent();
   }
 }
+
+bool
+LoadAndStorePromoter::isInstInList(Instruction *I,
+                                   const SmallVectorImpl<Instruction*> &Insts)
+                                   const {
+  return std::find(Insts.begin(), Insts.end(), I) != Insts.end();
+}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index b8c3ab4c6077..66dd2c954e29 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -14,16 +14,20 @@
 #define DEBUG_TYPE "simplifycfg"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Operator.h"
 #include "llvm/Type.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -63,9 +67,8 @@ class SimplifyCFGOpt {
   bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
                                            IRBuilder<> &Builder);
 
-  bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
   bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
-  bool SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder);
+  bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
   bool SimplifyUnreachable(UnreachableInst *UI);
   bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
   bool SimplifyIndirectBr(IndirectBrInst *IBI);
@@ -205,6 +208,42 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
   return BI->getCondition();
 }
 
+/// ComputeSpeculuationCost - Compute an abstract "cost" of speculating the
+/// given instruction, which is assumed to be safe to speculate. 1 means
+/// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive.
+static unsigned ComputeSpeculationCost(const User *I) {
+  assert(isSafeToSpeculativelyExecute(I) &&
+         "Instruction is not safe to speculatively execute!");
+  switch (Operator::getOpcode(I)) {
+  default:
+    // In doubt, be conservative.
+    return UINT_MAX;
+  case Instruction::GetElementPtr:
+    // GEPs are cheap if all indices are constant.
+    if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+      return UINT_MAX;
+    return 1;
+  case Instruction::Load:
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::ICmp:
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    return 1; // These are all cheap.
+
+  case Instruction::Call:
+  case Instruction::Select:
+    return 2;
+  }
+}
+
 /// DominatesMergePoint - If we have a merge point of an "if condition" as
 /// accepted above, return true if the specified value dominates the block.  We
 /// don't handle the true generality of domination here, just a special case
@@ -257,46 +296,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
   // Okay, it looks like the instruction IS in the "condition".  Check to
   // see if it's a cheap instruction to unconditionally compute, and if it
   // only uses stuff defined outside of the condition.  If so, hoist it out.
-  if (!I->isSafeToSpeculativelyExecute())
+  if (!isSafeToSpeculativelyExecute(I))
     return false;
 
-  unsigned Cost = 0;
-
-  switch (I->getOpcode()) {
-  default: return false;  // Cannot hoist this out safely.
-  case Instruction::Load:
-    // We have to check to make sure there are no instructions before the
-    // load in its basic block, as we are going to hoist the load out to its
-    // predecessor.
-    if (PBB->getFirstNonPHIOrDbg() != I)
-      return false;
-    Cost = 1;
-    break;
-  case Instruction::GetElementPtr:
-    // GEPs are cheap if all indices are constant.
-    if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices())
-      return false;
-    Cost = 1;
-    break;
-  case Instruction::Add:
-  case Instruction::Sub:
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-  case Instruction::ICmp:
-  case Instruction::Trunc:
-  case Instruction::ZExt:
-  case Instruction::SExt:
-    Cost = 1;
-    break;   // These are all cheap and non-trapping instructions.
-
-  case Instruction::Select:
-    Cost = 2;
-    break;
-  }
+  unsigned Cost = ComputeSpeculationCost(I);
 
   if (Cost > CostRemaining)
     return false;
@@ -373,9 +376,7 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
         Span = Span.inverse();
       
       // If there are a ton of values, we don't want to make a ginormous switch.
-      if (Span.getSetSize().ugt(8) || Span.isEmptySet() ||
-          // We don't handle wrapped sets yet.
-          Span.isWrappedSet())
+      if (Span.getSetSize().ugt(8) || Span.isEmptySet())
         return 0;
       
       for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
@@ -430,9 +431,9 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
   
   return 0;
 }
-      
+
 static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
-  Instruction* Cond = 0;
+  Instruction *Cond = 0;
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
     Cond = dyn_cast<Instruction>(SI->getCondition());
   } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -479,8 +480,9 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
                                                       BasicBlock*> > &Cases) {
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
     Cases.reserve(SI->getNumCases());
-    for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
-      Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i)));
+    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end(); i != e; ++i)
+      Cases.push_back(std::make_pair(i.getCaseValue(),
+                                     i.getCaseSuccessor()));
     return SI->getDefaultDest();
   }
 
@@ -603,11 +605,13 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
     DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
                  << "Through successor TI: " << *TI);
 
-    for (unsigned i = SI->getNumCases()-1; i != 0; --i)
-      if (DeadCases.count(SI->getCaseValue(i))) {
-        SI->getSuccessor(i)->removePredecessor(TI->getParent());
+    for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
+      --i;
+      if (DeadCases.count(i.getCaseValue())) {
+        i.getCaseSuccessor()->removePredecessor(TI->getParent());
         SI->removeCase(i);
       }
+    }
 
     DEBUG(dbgs() << "Leaving: " << *TI << "\n");
     return true;
@@ -951,6 +955,20 @@ HoistTerminator:
 /// and an BB2 and the only successor of BB1 is BB2, hoist simple code
 /// (for now, restricted to a single instruction that's side effect free) from
 /// the BB1 into the branch block to speculatively execute it.
+///
+/// Turn
+/// BB:
+///     %t1 = icmp
+///     br i1 %t1, label %BB1, label %BB2
+/// BB1:
+///     %t3 = add %t2, c
+///     br label BB2
+/// BB2:
+/// =>
+/// BB:
+///     %t1 = icmp
+///     %t4 = add %t2, c
+///     %t3 = select i1 %t1, %t2, %t3
 static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
   // Only speculatively execution a single instruction (not counting the
   // terminator) for now.
@@ -967,8 +985,29 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
       return false;
     HInst = I;
   }
-  if (!HInst)
-    return false;
+
+  BasicBlock *BIParent = BI->getParent();
+
+  // Check the instruction to be hoisted, if there is one.
+  if (HInst) {
+    // Don't hoist the instruction if it's unsafe or expensive.
+    if (!isSafeToSpeculativelyExecute(HInst))
+      return false;
+    if (ComputeSpeculationCost(HInst) > PHINodeFoldingThreshold)
+      return false;
+
+    // Do not hoist the instruction if any of its operands are defined but not
+    // used in this BB. The transformation will prevent the operand from
+    // being sunk into the use block.
+    for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); 
+         i != e; ++i) {
+      Instruction *OpI = dyn_cast<Instruction>(*i);
+      if (OpI && OpI->getParent() == BIParent &&
+          !OpI->mayHaveSideEffects() &&
+          !OpI->isUsedInBasicBlock(BIParent))
+        return false;
+    }
+  }
 
   // Be conservative for now. FP select instruction can often be expensive.
   Value *BrCond = BI->getCondition();
@@ -983,130 +1022,78 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
     Invert = true;
   }
 
-  // Turn
-  // BB:
-  //     %t1 = icmp
-  //     br i1 %t1, label %BB1, label %BB2
-  // BB1:
-  //     %t3 = add %t2, c
-  //     br label BB2
-  // BB2:
-  // =>
-  // BB:
-  //     %t1 = icmp
-  //     %t4 = add %t2, c
-  //     %t3 = select i1 %t1, %t2, %t3
-  switch (HInst->getOpcode()) {
-  default: return false;  // Not safe / profitable to hoist.
-  case Instruction::Add:
-  case Instruction::Sub:
-    // Not worth doing for vector ops.
-    if (HInst->getType()->isVectorTy())
-      return false;
-    break;
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor:
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-    // Don't mess with vector operations.
-    if (HInst->getType()->isVectorTy())
-      return false;
-    break;   // These are all cheap and non-trapping instructions.
-  }
-  
-  // If the instruction is obviously dead, don't try to predicate it.
-  if (HInst->use_empty()) {
-    HInst->eraseFromParent();
-    return true;
+  // Collect interesting PHIs, and scan for hazards.
+  SmallSetVector<std::pair<Value *, Value *>, 4> PHIs;
+  BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
+  for (BasicBlock::iterator I = BB2->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    Value *BB1V = PN->getIncomingValueForBlock(BB1);
+    Value *BIParentV = PN->getIncomingValueForBlock(BIParent);
+
+    // Skip PHIs which are trivial.
+    if (BB1V == BIParentV)
+      continue;
+
+    // Check for saftey.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BB1V)) {
+      // An unfolded ConstantExpr could end up getting expanded into
+      // Instructions. Don't speculate this and another instruction at
+      // the same time.
+      if (HInst)
+        return false;
+      if (!isSafeToSpeculativelyExecute(CE))
+        return false;
+      if (ComputeSpeculationCost(CE) > PHINodeFoldingThreshold)
+        return false;
+    }
+
+    // Ok, we may insert a select for this PHI.
+    PHIs.insert(std::make_pair(BB1V, BIParentV));
   }
 
-  // Can we speculatively execute the instruction? And what is the value 
-  // if the condition is false? Consider the phi uses, if the incoming value
-  // from the "if" block are all the same V, then V is the value of the
-  // select if the condition is false.
-  BasicBlock *BIParent = BI->getParent();
-  SmallVector<PHINode*, 4> PHIUses;
-  Value *FalseV = NULL;
+  // If there are no PHIs to process, bail early. This helps ensure idempotence
+  // as well.
+  if (PHIs.empty())
+    return false;
   
-  BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
-  for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end();
-       UI != E; ++UI) {
-    // Ignore any user that is not a PHI node in BB2.  These can only occur in
-    // unreachable blocks, because they would not be dominated by the instr.
-    PHINode *PN = dyn_cast<PHINode>(*UI);
-    if (!PN || PN->getParent() != BB2)
-      return false;
-    PHIUses.push_back(PN);
-    
-    Value *PHIV = PN->getIncomingValueForBlock(BIParent);
-    if (!FalseV)
-      FalseV = PHIV;
-    else if (FalseV != PHIV)
-      return false;  // Inconsistent value when condition is false.
-  }
-  
-  assert(FalseV && "Must have at least one user, and it must be a PHI");
-
-  // Do not hoist the instruction if any of its operands are defined but not
-  // used in this BB. The transformation will prevent the operand from
-  // being sunk into the use block.
-  for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); 
-       i != e; ++i) {
-    Instruction *OpI = dyn_cast<Instruction>(*i);
-    if (OpI && OpI->getParent() == BIParent &&
-        !OpI->isUsedInBasicBlock(BIParent))
-      return false;
-  }
+  // If we get here, we can hoist the instruction and if-convert.
+  DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *BB1 << "\n";);
 
-  // If we get here, we can hoist the instruction. Try to place it
-  // before the icmp instruction preceding the conditional branch.
-  BasicBlock::iterator InsertPos = BI;
-  if (InsertPos != BIParent->begin())
-    --InsertPos;
-  // Skip debug info between condition and branch.
-  while (InsertPos != BIParent->begin() && isa<DbgInfoIntrinsic>(InsertPos))
-    --InsertPos;
-  if (InsertPos == BrCond && !isa<PHINode>(BrCond)) {
-    SmallPtrSet<Instruction *, 4> BB1Insns;
-    for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end(); 
-        BB1I != BB1E; ++BB1I) 
-      BB1Insns.insert(BB1I);
-    for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
-        UI != UE; ++UI) {
-      Instruction *Use = cast<Instruction>(*UI);
-      if (!BB1Insns.count(Use)) continue;
-      
-      // If BrCond uses the instruction that place it just before
-      // branch instruction.
-      InsertPos = BI;
-      break;
-    }
-  } else
-    InsertPos = BI;
-  BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst);
+  // Hoist the instruction.
+  if (HInst)
+    BIParent->getInstList().splice(BI, BB1->getInstList(), HInst);
 
-  // Create a select whose true value is the speculatively executed value and
-  // false value is the previously determined FalseV.
+  // Insert selects and rewrite the PHI operands.
   IRBuilder<true, NoFolder> Builder(BI);
-  SelectInst *SI;
-  if (Invert)
-    SI = cast<SelectInst>
-      (Builder.CreateSelect(BrCond, FalseV, HInst,
-                            FalseV->getName() + "." + HInst->getName()));
-  else
-    SI = cast<SelectInst>
-      (Builder.CreateSelect(BrCond, HInst, FalseV,
-                            HInst->getName() + "." + FalseV->getName()));
-
-  // Make the PHI node use the select for all incoming values for "then" and
-  // "if" blocks.
-  for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
-    PHINode *PN = PHIUses[i];
-    for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
-      if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent)
-        PN->setIncomingValue(j, SI);
+  for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
+    Value *TrueV = PHIs[i].first;
+    Value *FalseV = PHIs[i].second;
+
+    // Create a select whose true value is the speculatively executed value and
+    // false value is the previously determined FalseV.
+    SelectInst *SI;
+    if (Invert)
+      SI = cast<SelectInst>
+        (Builder.CreateSelect(BrCond, FalseV, TrueV,
+                              FalseV->getName() + "." + TrueV->getName()));
+    else
+      SI = cast<SelectInst>
+        (Builder.CreateSelect(BrCond, TrueV, FalseV,
+                              TrueV->getName() + "." + FalseV->getName()));
+
+    // Make the PHI node use the select for all incoming values for "then" and
+    // "if" blocks.
+    for (BasicBlock::iterator I = BB2->begin();
+         PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      unsigned BB1I = PN->getBasicBlockIndex(BB1);
+      unsigned BIParentI = PN->getBasicBlockIndex(BIParent);
+      Value *BB1V = PN->getIncomingValue(BB1I);
+      Value *BIParentV = PN->getIncomingValue(BIParentI);
+      if (TrueV == BB1V && FalseV == BIParentV) {
+        PN->setIncomingValue(BB1I, SI);
+        PN->setIncomingValue(BIParentI, SI);
+      }
+    }
   }
 
   ++NumSpeculations;
@@ -1461,6 +1448,49 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,
   return true;
 }
 
+/// ExtractBranchMetadata - Given a conditional BranchInstruction, retrieve the
+/// probabilities of the branch taking each edge. Fills in the two APInt
+/// parameters and return true, or returns false if no or invalid metadata was
+/// found.
+static bool ExtractBranchMetadata(BranchInst *BI,
+                                  APInt &ProbTrue, APInt &ProbFalse) {
+  assert(BI->isConditional() &&
+         "Looking for probabilities on unconditional branch?");
+  MDNode *ProfileData = BI->getMetadata(LLVMContext::MD_prof);
+  if (!ProfileData || ProfileData->getNumOperands() != 3) return false;
+  ConstantInt *CITrue = dyn_cast<ConstantInt>(ProfileData->getOperand(1));
+  ConstantInt *CIFalse = dyn_cast<ConstantInt>(ProfileData->getOperand(2));
+  if (!CITrue || !CIFalse) return false;
+  ProbTrue = CITrue->getValue();
+  ProbFalse = CIFalse->getValue();
+  assert(ProbTrue.getBitWidth() == 32 && ProbFalse.getBitWidth() == 32 &&
+         "Branch probability metadata must be 32-bit integers");
+  return true;
+}
+
+/// MultiplyAndLosePrecision - Multiplies A and B, then returns the result. In
+/// the event of overflow, logically-shifts all four inputs right until the
+/// multiply fits.
+static APInt MultiplyAndLosePrecision(APInt &A, APInt &B, APInt &C, APInt &D,
+                                      unsigned &BitsLost) {
+  BitsLost = 0;
+  bool Overflow = false;
+  APInt Result = A.umul_ov(B, Overflow);
+  if (Overflow) {
+    APInt MaxB = APInt::getMaxValue(A.getBitWidth()).udiv(A);
+    do {
+      B = B.lshr(1);
+      ++BitsLost;
+    } while (B.ugt(MaxB));
+    A = A.lshr(BitsLost);
+    C = C.lshr(BitsLost);
+    D = D.lshr(BitsLost);
+    Result = A * B;
+  }
+  return Result;
+}
+
+
 /// FoldBranchToCommonDest - If this basic block is simple enough, and if a
 /// predecessor branches to us and one of our successors, fold the block into
 /// the predecessor and use logical operations to pick the right destination.
@@ -1479,7 +1509,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
 
   // Ignore dbg intrinsics.
   while (isa<DbgInfoIntrinsic>(FrontIt)) ++FrontIt;
-    
+
   // Allow a single instruction to be hoisted in addition to the compare
   // that feeds the branch.  We later ensure that any values that _it_ uses
   // were also live in the predecessor, so that we don't unnecessarily create
@@ -1487,7 +1517,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   Instruction *BonusInst = 0;
   if (&*FrontIt != Cond &&
       FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
-      FrontIt->isSafeToSpeculativelyExecute()) {
+      isSafeToSpeculativelyExecute(FrontIt)) {
     BonusInst = &*FrontIt;
     ++FrontIt;
     
@@ -1557,7 +1587,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
       SmallPtrSet<Value*, 4> UsedValues;
       for (Instruction::op_iterator OI = BonusInst->op_begin(),
            OE = BonusInst->op_end(); OI != OE; ++OI) {
-        Value* V = *OI;
+        Value *V = *OI;
         if (!isa<Constant>(V))
           UsedValues.insert(V);
       }
@@ -1602,10 +1632,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
       }
       
       PBI->setCondition(NewCond);
-      BasicBlock *OldTrue = PBI->getSuccessor(0);
-      BasicBlock *OldFalse = PBI->getSuccessor(1);
-      PBI->setSuccessor(0, OldFalse);
-      PBI->setSuccessor(1, OldTrue);
+      PBI->swapSuccessors();
     }
     
     // If we have a bonus inst, clone it into the predecessor block.
@@ -1638,6 +1665,94 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
       PBI->setSuccessor(1, FalseDest);
     }
 
+    // TODO: If BB is reachable from all paths through PredBlock, then we
+    // could replace PBI's branch probabilities with BI's.
+
+    // Merge probability data into PredBlock's branch.
+    APInt A, B, C, D;
+    if (ExtractBranchMetadata(PBI, C, D) && ExtractBranchMetadata(BI, A, B)) {
+      // Given IR which does:
+      //   bbA:
+      //     br i1 %x, label %bbB, label %bbC
+      //   bbB:
+      //     br i1 %y, label %bbD, label %bbC
+      // Let's call the probability that we take the edge from %bbA to %bbB
+      // 'a', from %bbA to %bbC, 'b', from %bbB to %bbD 'c' and from %bbB to
+      // %bbC probability 'd'.
+      //
+      // We transform the IR into:
+      //   bbA:
+      //     br i1 %z, label %bbD, label %bbC
+      // where the probability of going to %bbD is (a*c) and going to bbC is
+      // (b+a*d).
+      //
+      // Probabilities aren't stored as ratios directly. Using branch weights,
+      // we get:
+      // (a*c)% = A*C, (b+(a*d))% = A*D+B*C+B*D.
+
+      // In the event of overflow, we want to drop the LSB of the input
+      // probabilities.
+      unsigned BitsLost;
+
+      // Ignore overflow result on ProbTrue.
+      APInt ProbTrue = MultiplyAndLosePrecision(A, C, B, D, BitsLost);
+
+      APInt Tmp1 = MultiplyAndLosePrecision(B, D, A, C, BitsLost);
+      if (BitsLost) {
+        ProbTrue = ProbTrue.lshr(BitsLost*2);
+      }
+
+      APInt Tmp2 = MultiplyAndLosePrecision(A, D, C, B, BitsLost);
+      if (BitsLost) {
+        ProbTrue = ProbTrue.lshr(BitsLost*2);
+        Tmp1 = Tmp1.lshr(BitsLost*2);
+      }
+
+      APInt Tmp3 = MultiplyAndLosePrecision(B, C, A, D, BitsLost);
+      if (BitsLost) {
+        ProbTrue = ProbTrue.lshr(BitsLost*2);
+        Tmp1 = Tmp1.lshr(BitsLost*2);
+        Tmp2 = Tmp2.lshr(BitsLost*2);
+      }
+
+      bool Overflow1 = false, Overflow2 = false;
+      APInt Tmp4 = Tmp2.uadd_ov(Tmp3, Overflow1);
+      APInt ProbFalse = Tmp4.uadd_ov(Tmp1, Overflow2);
+
+      if (Overflow1 || Overflow2) {
+        ProbTrue = ProbTrue.lshr(1);
+        Tmp1 = Tmp1.lshr(1);
+        Tmp2 = Tmp2.lshr(1);
+        Tmp3 = Tmp3.lshr(1);
+        Tmp4 = Tmp2 + Tmp3;
+        ProbFalse = Tmp4 + Tmp1;
+      }
+
+      // The sum of branch weights must fit in 32-bits.
+      if (ProbTrue.isNegative() && ProbFalse.isNegative()) {
+        ProbTrue = ProbTrue.lshr(1);
+        ProbFalse = ProbFalse.lshr(1);
+      }
+
+      if (ProbTrue != ProbFalse) {
+        // Normalize the result.
+        APInt GCD = APIntOps::GreatestCommonDivisor(ProbTrue, ProbFalse);
+        ProbTrue = ProbTrue.udiv(GCD);
+        ProbFalse = ProbFalse.udiv(GCD);
+
+        LLVMContext &Context = BI->getContext();
+        Value *Ops[3];
+        Ops[0] = BI->getMetadata(LLVMContext::MD_prof)->getOperand(0);
+        Ops[1] = ConstantInt::get(Context, ProbTrue);
+        Ops[2] = ConstantInt::get(Context, ProbFalse);
+        PBI->setMetadata(LLVMContext::MD_prof, MDNode::get(Context, Ops));
+      } else {
+        PBI->setMetadata(LLVMContext::MD_prof, NULL);
+      }
+    } else {
+      PBI->setMetadata(LLVMContext::MD_prof, NULL);
+    }
+
     // Copy any debug value intrinsics into the end of PredBlock.
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
       if (isa<DbgInfoIntrinsic>(*I))
@@ -1894,8 +2009,8 @@ static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
 
   // Find the relevant condition and destinations.
   Value *Condition = Select->getCondition();
-  BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal));
-  BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal));
+  BasicBlock *TrueBB = SI->findCaseValue(TrueVal).getCaseSuccessor();
+  BasicBlock *FalseBB = SI->findCaseValue(FalseVal).getCaseSuccessor();
 
   // Perform the actual simplification.
   return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
@@ -1979,7 +2094,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
   // Ok, the block is reachable from the default dest.  If the constant we're
   // comparing exists in one of the other edges, then we can constant fold ICI
   // and zap it.
-  if (SI->findCaseValue(Cst) != 0) {
+  if (SI->findCaseValue(Cst) != SI->case_default()) {
     Value *V;
     if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
       V = ConstantInt::getFalse(BB->getContext());
@@ -2235,52 +2350,6 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
   return false;
 }
 
-bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) {
-  // Check to see if the first instruction in this block is just an unwind.
-  // If so, replace any invoke instructions which use this as an exception
-  // destination with call instructions.
-  BasicBlock *BB = UI->getParent();
-  if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
-
-  bool Changed = false;
-  SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
-  while (!Preds.empty()) {
-    BasicBlock *Pred = Preds.back();
-    InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator());
-    if (II && II->getUnwindDest() == BB) {
-      // Insert a new branch instruction before the invoke, because this
-      // is now a fall through.
-      Builder.SetInsertPoint(II);
-      BranchInst *BI = Builder.CreateBr(II->getNormalDest());
-      Pred->getInstList().remove(II);   // Take out of symbol table
-      
-      // Insert the call now.
-      SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
-      Builder.SetInsertPoint(BI);
-      CallInst *CI = Builder.CreateCall(II->getCalledValue(),
-                                        Args, II->getName());
-      CI->setCallingConv(II->getCallingConv());
-      CI->setAttributes(II->getAttributes());
-      // If the invoke produced a value, the Call now does instead.
-      II->replaceAllUsesWith(CI);
-      delete II;
-      Changed = true;
-    }
-    
-    Preds.pop_back();
-  }
-  
-  // If this block is now dead (and isn't the entry block), remove it.
-  if (pred_begin(BB) == pred_end(BB) &&
-      BB != &BB->getParent()->getEntryBlock()) {
-    // We know there are no successors, so just nuke the block.
-    BB->eraseFromParent();
-    return true;
-  }
-  
-  return Changed;  
-}
-
 bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
   BasicBlock *BB = UI->getParent();
   
@@ -2352,8 +2421,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
         }
       }
     } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
-      for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
-        if (SI->getSuccessor(i) == BB) {
+      for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+           i != e; ++i)
+        if (i.getCaseSuccessor() == BB) {
           BB->removePredecessor(SI->getParent());
           SI->removeCase(i);
           --i; --e;
@@ -2361,14 +2431,15 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
         }
       // If the default value is unreachable, figure out the most popular
       // destination and make it the default.
-      if (SI->getSuccessor(0) == BB) {
+      if (SI->getDefaultDest() == BB) {
         std::map<BasicBlock*, std::pair<unsigned, unsigned> > Popularity;
-        for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) {
-          std::pair<unsigned, unsigned>& entry =
-              Popularity[SI->getSuccessor(i)];
+        for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+             i != e; ++i) {
+          std::pair<unsigned, unsigned> &entry =
+              Popularity[i.getCaseSuccessor()];
           if (entry.first == 0) {
             entry.first = 1;
-            entry.second = i;
+            entry.second = i.getCaseIndex();
           } else {
             entry.first++;
           }
@@ -2390,7 +2461,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
         if (MaxBlock) {
           // Make this the new default, allowing us to delete any explicit
           // edges to it.
-          SI->setSuccessor(0, MaxBlock);
+          SI->setDefaultDest(MaxBlock);
           Changed = true;
           
           // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
@@ -2399,8 +2470,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
             for (unsigned i = 0; i != MaxPop-1; ++i)
               MaxBlock->removePredecessor(SI->getParent());
           
-          for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
-            if (SI->getSuccessor(i) == MaxBlock) {
+          for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+               i != e; ++i)
+            if (i.getCaseSuccessor() == MaxBlock) {
               SI->removeCase(i);
               --i; --e;
             }
@@ -2442,17 +2514,19 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
 /// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
 /// integer range comparison into a sub, an icmp and a branch.
 static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
-  assert(SI->getNumCases() > 2 && "Degenerate switch?");
+  assert(SI->getNumCases() > 1 && "Degenerate switch?");
 
   // Make sure all cases point to the same destination and gather the values.
   SmallVector<ConstantInt *, 16> Cases;
-  Cases.push_back(SI->getCaseValue(1));
-  for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) {
-    if (SI->getSuccessor(I-1) != SI->getSuccessor(I))
+  SwitchInst::CaseIt I = SI->case_begin();
+  Cases.push_back(I.getCaseValue());
+  SwitchInst::CaseIt PrevI = I++;
+  for (SwitchInst::CaseIt E = SI->case_end(); I != E; PrevI = I++) {
+    if (PrevI.getCaseSuccessor() != I.getCaseSuccessor())
       return false;
-    Cases.push_back(SI->getCaseValue(I));
+    Cases.push_back(I.getCaseValue());
   }
-  assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered");
+  assert(Cases.size() == SI->getNumCases() && "Not all cases gathered");
 
   // Sort the case values, then check if they form a range we can transform.
   array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
@@ -2462,18 +2536,19 @@ static bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder) {
   }
 
   Constant *Offset = ConstantExpr::getNeg(Cases.back());
-  Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1);
+  Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases());
 
   Value *Sub = SI->getCondition();
   if (!Offset->isNullValue())
     Sub = Builder.CreateAdd(Sub, Offset, Sub->getName()+".off");
   Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
-  Builder.CreateCondBr(Cmp, SI->getSuccessor(1), SI->getDefaultDest());
+  Builder.CreateCondBr(
+      Cmp, SI->case_begin().getCaseSuccessor(), SI->getDefaultDest());
 
   // Prune obsolete incoming values off the successor's PHI nodes.
-  for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin();
+  for (BasicBlock::iterator BBI = SI->case_begin().getCaseSuccessor()->begin();
        isa<PHINode>(BBI); ++BBI) {
-    for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I)
+    for (unsigned I = 0, E = SI->getNumCases()-1; I != E; ++I)
       cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
   }
   SI->eraseFromParent();
@@ -2487,24 +2562,26 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
   Value *Cond = SI->getCondition();
   unsigned Bits = cast<IntegerType>(Cond->getType())->getBitWidth();
   APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
-  ComputeMaskedBits(Cond, APInt::getAllOnesValue(Bits), KnownZero, KnownOne);
+  ComputeMaskedBits(Cond, KnownZero, KnownOne);
 
   // Gather dead cases.
   SmallVector<ConstantInt*, 8> DeadCases;
-  for (unsigned I = 1, E = SI->getNumCases(); I != E; ++I) {
-    if ((SI->getCaseValue(I)->getValue() & KnownZero) != 0 ||
-        (SI->getCaseValue(I)->getValue() & KnownOne) != KnownOne) {
-      DeadCases.push_back(SI->getCaseValue(I));
+  for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+    if ((I.getCaseValue()->getValue() & KnownZero) != 0 ||
+        (I.getCaseValue()->getValue() & KnownOne) != KnownOne) {
+      DeadCases.push_back(I.getCaseValue());
       DEBUG(dbgs() << "SimplifyCFG: switch case '"
-                   << SI->getCaseValue(I)->getValue() << "' is dead.\n");
+                   << I.getCaseValue() << "' is dead.\n");
     }
   }
 
   // Remove dead cases from the switch.
   for (unsigned I = 0, E = DeadCases.size(); I != E; ++I) {
-    unsigned Case = SI->findCaseValue(DeadCases[I]);
+    SwitchInst::CaseIt Case = SI->findCaseValue(DeadCases[I]);
+    assert(Case != SI->case_default() &&
+           "Case was not found. Probably mistake in DeadCases forming.");
     // Prune unused values from PHI nodes.
-    SI->getSuccessor(Case)->removePredecessor(SI->getParent());
+    Case.getCaseSuccessor()->removePredecessor(SI->getParent());
     SI->removeCase(Case);
   }
 
@@ -2553,9 +2630,9 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
   typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
   ForwardingNodesMap ForwardingNodes;
 
-  for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case.
-    ConstantInt *CaseValue = SI->getCaseValue(I);
-    BasicBlock *CaseDest = SI->getSuccessor(I);
+  for (SwitchInst::CaseIt I = SI->case_begin(), E = SI->case_end(); I != E; ++I) {
+    ConstantInt *CaseValue = I.getCaseValue();
+    BasicBlock *CaseDest = I.getCaseSuccessor();
 
     int PhiIndex;
     PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
@@ -2676,8 +2753,8 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
     if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
       for (++I; isa<DbgInfoIntrinsic>(I); ++I)
         ;
-      if (I->isTerminator() 
-          && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
+      if (I->isTerminator() &&
+          TryToSimplifyUncondBranchWithICmpInIt(ICI, TD, Builder))
         return true;
     }
   
@@ -2720,6 +2797,12 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   if (SimplifyBranchOnICmpChain(BI, TD, Builder))
     return true;
   
+  // If this basic block is ONLY a compare and a branch, and if a predecessor
+  // branches to us and one of our successors, fold the comparison into the
+  // predecessor and use logical operations to pick the right destination.
+  if (FoldBranchToCommonDest(BI))
+    return SimplifyCFG(BB) | true;
+  
   // We have a conditional branch to two blocks that are only reachable
   // from BI.  We know that the condbr dominates the two blocks, so see if
   // there is any identical code in the "then" and "else" blocks.  If so, we
@@ -2754,12 +2837,6 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       if (FoldCondBranchOnPHI(BI, TD))
         return SimplifyCFG(BB) | true;
   
-  // If this basic block is ONLY a setcc and a branch, and if a predecessor
-  // branches to us and one of our successors, fold the setcc into the
-  // predecessor and use logical operations to pick the right destination.
-  if (FoldBranchToCommonDest(BI))
-    return SimplifyCFG(BB) | true;
-  
   // Scan predecessor blocks for conditional branches.
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
@@ -2809,7 +2886,7 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
 }
 
 /// If BB has an incoming value that will always trigger undefined behavior
-/// (eg. null pointer derefence), remove the branch leading here.
+/// (eg. null pointer dereference), remove the branch leading here.
 static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
   for (BasicBlock::iterator i = BB->begin();
        PHINode *PHI = dyn_cast<PHINode>(i); ++i)
@@ -2883,17 +2960,15 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
     } else {
       if (SimplifyCondBranch(BI, Builder)) return true;
     }
-  } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
-    if (SimplifyResume(RI, Builder)) return true;
   } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
     if (SimplifyReturn(RI, Builder)) return true;
+  } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
+    if (SimplifyResume(RI, Builder)) return true;
   } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
     if (SimplifySwitch(SI, Builder)) return true;
   } else if (UnreachableInst *UI =
                dyn_cast<UnreachableInst>(BB->getTerminator())) {
     if (SimplifyUnreachable(UI)) return true;
-  } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-    if (SimplifyUnwind(UI, Builder)) return true;
   } else if (IndirectBrInst *IBI =
                dyn_cast<IndirectBrInst>(BB->getTerminator())) {
     if (SimplifyIndirectBr(IBI)) return true;
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 76289c055b90..4030befaffa1 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -46,7 +46,6 @@ namespace {
     LoopInfo         *LI;
     DominatorTree    *DT;
     ScalarEvolution  *SE;
-    IVUsers          *IU; // NULL for DisableIVRewrite
     const TargetData *TD; // May be NULL
 
     SmallVectorImpl<WeakVH> &DeadInsts;
@@ -59,7 +58,6 @@ namespace {
       L(Loop),
       LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
       SE(SE),
-      IU(IVU),
       TD(LPM->getAnalysisIfAvailable<TargetData>()),
       DeadInsts(Dead),
       Changed(false) {
@@ -107,8 +105,8 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
 
     // Attempt to fold a binary operator with constant operand.
     // e.g. ((I + 1) >> 2) => I >> 2
-    if (IVOperand->getNumOperands() != 2 ||
-        !isa<ConstantInt>(IVOperand->getOperand(1)))
+    if (!isa<BinaryOperator>(IVOperand)
+        || !isa<ConstantInt>(IVOperand->getOperand(1)))
       return 0;
 
     IVSrc = IVOperand->getOperand(0);
@@ -229,11 +227,6 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
     Rem->replaceAllUsesWith(Sel);
   }
 
-  // Inform IVUsers about the new users.
-  if (IU) {
-    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
-      IU->AddUsersIfInteresting(I);
-  }
   DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
   ++NumElimRem;
   Changed = true;
@@ -375,6 +368,8 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
 
 namespace llvm {
 
+void IVVisitor::anchor() { }
+
 /// simplifyUsersOfIV - Simplify instructions that use this induction variable
 /// by using ScalarEvolution to analyze the IV's recurrence.
 bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
@@ -397,36 +392,4 @@ bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
   return Changed;
 }
 
-/// simplifyIVUsers - Perform simplification on instructions recorded by the
-/// IVUsers pass.
-///
-/// This is the old approach to IV simplification to be replaced by
-/// SimplifyLoopIVs.
-bool simplifyIVUsers(IVUsers *IU, ScalarEvolution *SE, LPPassManager *LPM,
-                     SmallVectorImpl<WeakVH> &Dead) {
-  SimplifyIndvar SIV(IU->getLoop(), SE, LPM, Dead);
-
-  // Each round of simplification involves a round of eliminating operations
-  // followed by a round of widening IVs. A single IVUsers worklist is used
-  // across all rounds. The inner loop advances the user. If widening exposes
-  // more uses, then another pass through the outer loop is triggered.
-  for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
-    Instruction *UseInst = I->getUser();
-    Value *IVOperand = I->getOperandValToReplace();
-
-    if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
-      SIV.eliminateIVComparison(ICmp, IVOperand);
-      continue;
-    }
-    if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
-      bool IsSigned = Rem->getOpcode() == Instruction::SRem;
-      if (IsSigned || Rem->getOpcode() == Instruction::URem) {
-        SIV.eliminateIVRemainder(Rem, IVOperand, IsSigned);
-        continue;
-      }
-    }
-  }
-  return SIV.hasChanged();
-}
-
 } // namespace llvm
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index ac005f95b33a..81eb9e0f8ae1 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
@@ -39,12 +40,14 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesCFG();
+      AU.addRequired<TargetLibraryInfo>();
     }
 
     /// runOnFunction - Remove instructions that simplify.
     bool runOnFunction(Function &F) {
       const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
       const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+      const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
       SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
       bool Changed = false;
 
@@ -60,7 +63,7 @@ namespace {
               continue;
             // Don't waste time simplifying unused instructions.
             if (!I->use_empty())
-              if (Value *V = SimplifyInstruction(I, TD, DT)) {
+              if (Value *V = SimplifyInstruction(I, TD, TLI, DT)) {
                 // Mark all uses for resimplification next time round the loop.
                 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
                      UI != UE; ++UI)
@@ -84,8 +87,11 @@ namespace {
 }
 
 char InstSimplifier::ID = 0;
-INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
-                false, false)
+INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
+                      "Remove redundant instructions", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
+                    "Remove redundant instructions", false, false)
 char &llvm::InstructionSimplifierID = InstSimplifier::ID;
 
 // Public interface to the simplify instructions pass.
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 46d4adaaa154..b1cad06dffe9 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -50,33 +50,13 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   // return.
   //
   std::vector<BasicBlock*> ReturningBlocks;
-  std::vector<BasicBlock*> UnwindingBlocks;
   std::vector<BasicBlock*> UnreachableBlocks;
   for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
     if (isa<ReturnInst>(I->getTerminator()))
       ReturningBlocks.push_back(I);
-    else if (isa<UnwindInst>(I->getTerminator()))
-      UnwindingBlocks.push_back(I);
     else if (isa<UnreachableInst>(I->getTerminator()))
       UnreachableBlocks.push_back(I);
 
-  // Handle unwinding blocks first.
-  if (UnwindingBlocks.empty()) {
-    UnwindBlock = 0;
-  } else if (UnwindingBlocks.size() == 1) {
-    UnwindBlock = UnwindingBlocks.front();
-  } else {
-    UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
-    new UnwindInst(F.getContext(), UnwindBlock);
-
-    for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
-           E = UnwindingBlocks.end(); I != E; ++I) {
-      BasicBlock *BB = *I;
-      BB->getInstList().pop_back();  // Remove the unwind insn
-      BranchInst::Create(UnwindBlock, BB);
-    }
-  }
-
   // Then unreachable blocks.
   if (UnreachableBlocks.empty()) {
     UnreachableBlock = 0;
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
new file mode 100644
index 000000000000..286b54f2f067
--- /dev/null
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -0,0 +1,1907 @@
+//===- BBVectorize.cpp - A Basic-Block Vectorizer -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a basic-block vectorization pass. The algorithm was
+// inspired by that used by the Vienna MAP Vectorizor by Franchetti and Kral,
+// et al. It works by looking for chains of pairable operations and then
+// pairing them.
+//
+//===----------------------------------------------------------------------===//
+
+#define BBV_NAME "bb-vectorize"
+#define DEBUG_TYPE BBV_NAME
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Vectorize.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+static cl::opt<unsigned>
+ReqChainDepth("bb-vectorize-req-chain-depth", cl::init(6), cl::Hidden,
+  cl::desc("The required chain depth for vectorization"));
+
+static cl::opt<unsigned>
+SearchLimit("bb-vectorize-search-limit", cl::init(400), cl::Hidden,
+  cl::desc("The maximum search distance for instruction pairs"));
+
+static cl::opt<bool>
+SplatBreaksChain("bb-vectorize-splat-breaks-chain", cl::init(false), cl::Hidden,
+  cl::desc("Replicating one element to a pair breaks the chain"));
+
+static cl::opt<unsigned>
+VectorBits("bb-vectorize-vector-bits", cl::init(128), cl::Hidden,
+  cl::desc("The size of the native vector registers"));
+
+static cl::opt<unsigned>
+MaxIter("bb-vectorize-max-iter", cl::init(0), cl::Hidden,
+  cl::desc("The maximum number of pairing iterations"));
+
+static cl::opt<unsigned>
+MaxInsts("bb-vectorize-max-instr-per-group", cl::init(500), cl::Hidden,
+  cl::desc("The maximum number of pairable instructions per group"));
+
+static cl::opt<unsigned>
+MaxCandPairsForCycleCheck("bb-vectorize-max-cycle-check-pairs", cl::init(200),
+  cl::Hidden, cl::desc("The maximum number of candidate pairs with which to use"
+                       " a full cycle check"));
+
+static cl::opt<bool>
+NoInts("bb-vectorize-no-ints", cl::init(false), cl::Hidden,
+  cl::desc("Don't try to vectorize integer values"));
+
+static cl::opt<bool>
+NoFloats("bb-vectorize-no-floats", cl::init(false), cl::Hidden,
+  cl::desc("Don't try to vectorize floating-point values"));
+
+static cl::opt<bool>
+NoCasts("bb-vectorize-no-casts", cl::init(false), cl::Hidden,
+  cl::desc("Don't try to vectorize casting (conversion) operations"));
+
+static cl::opt<bool>
+NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden,
+  cl::desc("Don't try to vectorize floating-point math intrinsics"));
+
+static cl::opt<bool>
+NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
+  cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
+
+static cl::opt<bool>
+NoMemOps("bb-vectorize-no-mem-ops", cl::init(false), cl::Hidden,
+  cl::desc("Don't try to vectorize loads and stores"));
+
+static cl::opt<bool>
+AlignedOnly("bb-vectorize-aligned-only", cl::init(false), cl::Hidden,
+  cl::desc("Only generate aligned loads and stores"));
+
+static cl::opt<bool>
+NoMemOpBoost("bb-vectorize-no-mem-op-boost",
+  cl::init(false), cl::Hidden,
+  cl::desc("Don't boost the chain-depth contribution of loads and stores"));
+
+static cl::opt<bool>
+FastDep("bb-vectorize-fast-dep", cl::init(false), cl::Hidden,
+  cl::desc("Use a fast instruction dependency analysis"));
+
+#ifndef NDEBUG
+static cl::opt<bool>
+DebugInstructionExamination("bb-vectorize-debug-instruction-examination",
+  cl::init(false), cl::Hidden,
+  cl::desc("When debugging is enabled, output information on the"
+           " instruction-examination process"));
+static cl::opt<bool>
+DebugCandidateSelection("bb-vectorize-debug-candidate-selection",
+  cl::init(false), cl::Hidden,
+  cl::desc("When debugging is enabled, output information on the"
+           " candidate-selection process"));
+static cl::opt<bool>
+DebugPairSelection("bb-vectorize-debug-pair-selection",
+  cl::init(false), cl::Hidden,
+  cl::desc("When debugging is enabled, output information on the"
+           " pair-selection process"));
+static cl::opt<bool>
+DebugCycleCheck("bb-vectorize-debug-cycle-check",
+  cl::init(false), cl::Hidden,
+  cl::desc("When debugging is enabled, output information on the"
+           " cycle-checking process"));
+#endif
+
+STATISTIC(NumFusedOps, "Number of operations fused by bb-vectorize");
+
+namespace {
+  struct BBVectorize : public BasicBlockPass {
+    static char ID; // Pass identification, replacement for typeid
+
+    const VectorizeConfig Config;
+
+    BBVectorize(const VectorizeConfig &C = VectorizeConfig())
+      : BasicBlockPass(ID), Config(C) {
+      initializeBBVectorizePass(*PassRegistry::getPassRegistry());
+    }
+
+    BBVectorize(Pass *P, const VectorizeConfig &C)
+      : BasicBlockPass(ID), Config(C) {
+      AA = &P->getAnalysis<AliasAnalysis>();
+      SE = &P->getAnalysis<ScalarEvolution>();
+      TD = P->getAnalysisIfAvailable<TargetData>();
+    }
+
+    typedef std::pair<Value *, Value *> ValuePair;
+    typedef std::pair<ValuePair, size_t> ValuePairWithDepth;
+    typedef std::pair<ValuePair, ValuePair> VPPair; // A ValuePair pair
+    typedef std::pair<std::multimap<Value *, Value *>::iterator,
+              std::multimap<Value *, Value *>::iterator> VPIteratorPair;
+    typedef std::pair<std::multimap<ValuePair, ValuePair>::iterator,
+              std::multimap<ValuePair, ValuePair>::iterator>
+                VPPIteratorPair;
+
+    AliasAnalysis *AA;
+    ScalarEvolution *SE;
+    TargetData *TD;
+
+    // FIXME: const correct?
+
+    bool vectorizePairs(BasicBlock &BB);
+
+    bool getCandidatePairs(BasicBlock &BB,
+                       BasicBlock::iterator &Start,
+                       std::multimap<Value *, Value *> &CandidatePairs,
+                       std::vector<Value *> &PairableInsts);
+
+    void computeConnectedPairs(std::multimap<Value *, Value *> &CandidatePairs,
+                       std::vector<Value *> &PairableInsts,
+                       std::multimap<ValuePair, ValuePair> &ConnectedPairs);
+
+    void buildDepMap(BasicBlock &BB,
+                       std::multimap<Value *, Value *> &CandidatePairs,
+                       std::vector<Value *> &PairableInsts,
+                       DenseSet<ValuePair> &PairableInstUsers);
+
+    void choosePairs(std::multimap<Value *, Value *> &CandidatePairs,
+                        std::vector<Value *> &PairableInsts,
+                        std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                        DenseSet<ValuePair> &PairableInstUsers,
+                        DenseMap<Value *, Value *>& ChosenPairs);
+
+    void fuseChosenPairs(BasicBlock &BB,
+                     std::vector<Value *> &PairableInsts,
+                     DenseMap<Value *, Value *>& ChosenPairs);
+
+    bool isInstVectorizable(Instruction *I, bool &IsSimpleLoadStore);
+
+    bool areInstsCompatible(Instruction *I, Instruction *J,
+                       bool IsSimpleLoadStore);
+
+    bool trackUsesOfI(DenseSet<Value *> &Users,
+                      AliasSetTracker &WriteSet, Instruction *I,
+                      Instruction *J, bool UpdateUsers = true,
+                      std::multimap<Value *, Value *> *LoadMoveSet = 0);
+
+    void computePairsConnectedTo(
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      ValuePair P);
+
+    bool pairsConflict(ValuePair P, ValuePair Q,
+                 DenseSet<ValuePair> &PairableInstUsers,
+                 std::multimap<ValuePair, ValuePair> *PairableInstUserMap = 0);
+
+    bool pairWillFormCycle(ValuePair P,
+                       std::multimap<ValuePair, ValuePair> &PairableInstUsers,
+                       DenseSet<ValuePair> &CurrentPairs);
+
+    void pruneTreeFor(
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      DenseSet<ValuePair> &PairableInstUsers,
+                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+                      DenseMap<Value *, Value *> &ChosenPairs,
+                      DenseMap<ValuePair, size_t> &Tree,
+                      DenseSet<ValuePair> &PrunedTree, ValuePair J,
+                      bool UseCycleCheck);
+
+    void buildInitialTreeFor(
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      DenseSet<ValuePair> &PairableInstUsers,
+                      DenseMap<Value *, Value *> &ChosenPairs,
+                      DenseMap<ValuePair, size_t> &Tree, ValuePair J);
+
+    void findBestTreeFor(
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      DenseSet<ValuePair> &PairableInstUsers,
+                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+                      DenseMap<Value *, Value *> &ChosenPairs,
+                      DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
+                      size_t &BestEffSize, VPIteratorPair ChoiceRange,
+                      bool UseCycleCheck);
+
+    Value *getReplacementPointerInput(LLVMContext& Context, Instruction *I,
+                     Instruction *J, unsigned o, bool &FlipMemInputs);
+
+    void fillNewShuffleMask(LLVMContext& Context, Instruction *J,
+                     unsigned NumElem, unsigned MaskOffset, unsigned NumInElem,
+                     unsigned IdxOffset, std::vector<Constant*> &Mask);
+
+    Value *getReplacementShuffleMask(LLVMContext& Context, Instruction *I,
+                     Instruction *J);
+
+    Value *getReplacementInput(LLVMContext& Context, Instruction *I,
+                     Instruction *J, unsigned o, bool FlipMemInputs);
+
+    void getReplacementInputsForPair(LLVMContext& Context, Instruction *I,
+                     Instruction *J, SmallVector<Value *, 3> &ReplacedOperands,
+                     bool &FlipMemInputs);
+
+    void replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
+                     Instruction *J, Instruction *K,
+                     Instruction *&InsertionPt, Instruction *&K1,
+                     Instruction *&K2, bool &FlipMemInputs);
+
+    void collectPairLoadMoveSet(BasicBlock &BB,
+                     DenseMap<Value *, Value *> &ChosenPairs,
+                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     Instruction *I);
+
+    void collectLoadMoveSet(BasicBlock &BB,
+                     std::vector<Value *> &PairableInsts,
+                     DenseMap<Value *, Value *> &ChosenPairs,
+                     std::multimap<Value *, Value *> &LoadMoveSet);
+
+    bool canMoveUsesOfIAfterJ(BasicBlock &BB,
+                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     Instruction *I, Instruction *J);
+
+    void moveUsesOfIAfterJ(BasicBlock &BB,
+                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     Instruction *&InsertionPt,
+                     Instruction *I, Instruction *J);
+
+    bool vectorizeBB(BasicBlock &BB) {
+      bool changed = false;
+      // Iterate a sufficient number of times to merge types of size 1 bit,
+      // then 2 bits, then 4, etc. up to half of the target vector width of the
+      // target vector register.
+      for (unsigned v = 2, n = 1;
+           v <= Config.VectorBits && (!Config.MaxIter || n <= Config.MaxIter);
+           v *= 2, ++n) {
+        DEBUG(dbgs() << "BBV: fusing loop #" << n <<
+              " for " << BB.getName() << " in " <<
+              BB.getParent()->getName() << "...\n");
+        if (vectorizePairs(BB))
+          changed = true;
+        else
+          break;
+      }
+
+      DEBUG(dbgs() << "BBV: done!\n");
+      return changed;
+    }
+
+    virtual bool runOnBasicBlock(BasicBlock &BB) {
+      AA = &getAnalysis<AliasAnalysis>();
+      SE = &getAnalysis<ScalarEvolution>();
+      TD = getAnalysisIfAvailable<TargetData>();
+
+      return vectorizeBB(BB);
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      BasicBlockPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.setPreservesCFG();
+    }
+
+    // This returns the vector type that holds a pair of the provided type.
+    // If the provided type is already a vector, then its length is doubled.
+    static inline VectorType *getVecTypeForPair(Type *ElemTy) {
+      if (VectorType *VTy = dyn_cast<VectorType>(ElemTy)) {
+        unsigned numElem = VTy->getNumElements();
+        return VectorType::get(ElemTy->getScalarType(), numElem*2);
+      }
+
+      return VectorType::get(ElemTy, 2);
+    }
+
+    // Returns the weight associated with the provided value. A chain of
+    // candidate pairs has a length given by the sum of the weights of its
+    // members (one weight per pair; the weight of each member of the pair
+    // is assumed to be the same). This length is then compared to the
+    // chain-length threshold to determine if a given chain is significant
+    // enough to be vectorized. The length is also used in comparing
+    // candidate chains where longer chains are considered to be better.
+    // Note: when this function returns 0, the resulting instructions are
+    // not actually fused.
+    inline size_t getDepthFactor(Value *V) {
+      // InsertElement and ExtractElement have a depth factor of zero. This is
+      // for two reasons: First, they cannot be usefully fused. Second, because
+      // the pass generates a lot of these, they can confuse the simple metric
+      // used to compare the trees in the next iteration. Thus, giving them a
+      // weight of zero allows the pass to essentially ignore them in
+      // subsequent iterations when looking for vectorization opportunities
+      // while still tracking dependency chains that flow through those
+      // instructions.
+      if (isa<InsertElementInst>(V) || isa<ExtractElementInst>(V))
+        return 0;
+
+      // Give a load or store half of the required depth so that load/store
+      // pairs will vectorize.
+      if (!Config.NoMemOpBoost && (isa<LoadInst>(V) || isa<StoreInst>(V)))
+        return Config.ReqChainDepth/2;
+
+      return 1;
+    }
+
+    // This determines the relative offset of two loads or stores, returning
+    // true if the offset could be determined to be some constant value.
+    // For example, if OffsetInElmts == 1, then J accesses the memory directly
+    // after I; if OffsetInElmts == -1 then I accesses the memory
+    // directly after J. This function assumes that both instructions
+    // have the same type.
+    bool getPairPtrInfo(Instruction *I, Instruction *J,
+        Value *&IPtr, Value *&JPtr, unsigned &IAlignment, unsigned &JAlignment,
+        int64_t &OffsetInElmts) {
+      OffsetInElmts = 0;
+      if (isa<LoadInst>(I)) {
+        IPtr = cast<LoadInst>(I)->getPointerOperand();
+        JPtr = cast<LoadInst>(J)->getPointerOperand();
+        IAlignment = cast<LoadInst>(I)->getAlignment();
+        JAlignment = cast<LoadInst>(J)->getAlignment();
+      } else {
+        IPtr = cast<StoreInst>(I)->getPointerOperand();
+        JPtr = cast<StoreInst>(J)->getPointerOperand();
+        IAlignment = cast<StoreInst>(I)->getAlignment();
+        JAlignment = cast<StoreInst>(J)->getAlignment();
+      }
+
+      const SCEV *IPtrSCEV = SE->getSCEV(IPtr);
+      const SCEV *JPtrSCEV = SE->getSCEV(JPtr);
+
+      // If this is a trivial offset, then we'll get something like
+      // 1*sizeof(type). With target data, which we need anyway, this will get
+      // constant folded into a number.
+      const SCEV *OffsetSCEV = SE->getMinusSCEV(JPtrSCEV, IPtrSCEV);
+      if (const SCEVConstant *ConstOffSCEV =
+            dyn_cast<SCEVConstant>(OffsetSCEV)) {
+        ConstantInt *IntOff = ConstOffSCEV->getValue();
+        int64_t Offset = IntOff->getSExtValue();
+
+        Type *VTy = cast<PointerType>(IPtr->getType())->getElementType();
+        int64_t VTyTSS = (int64_t) TD->getTypeStoreSize(VTy);
+
+        assert(VTy == cast<PointerType>(JPtr->getType())->getElementType());
+
+        OffsetInElmts = Offset/VTyTSS;
+        return (abs64(Offset) % VTyTSS) == 0;
+      }
+
+      return false;
+    }
+
+    // Returns true if the provided CallInst represents an intrinsic that can
+    // be vectorized.
+    bool isVectorizableIntrinsic(CallInst* I) {
+      Function *F = I->getCalledFunction();
+      if (!F) return false;
+
+      unsigned IID = F->getIntrinsicID();
+      if (!IID) return false;
+
+      switch(IID) {
+      default:
+        return false;
+      case Intrinsic::sqrt:
+      case Intrinsic::powi:
+      case Intrinsic::sin:
+      case Intrinsic::cos:
+      case Intrinsic::log:
+      case Intrinsic::log2:
+      case Intrinsic::log10:
+      case Intrinsic::exp:
+      case Intrinsic::exp2:
+      case Intrinsic::pow:
+        return Config.VectorizeMath;
+      case Intrinsic::fma:
+        return Config.VectorizeFMA;
+      }
+    }
+
+    // Returns true if J is the second element in some pair referenced by
+    // some multimap pair iterator pair.
+    template <typename V>
+    bool isSecondInIteratorPair(V J, std::pair<
+           typename std::multimap<V, V>::iterator,
+           typename std::multimap<V, V>::iterator> PairRange) {
+      for (typename std::multimap<V, V>::iterator K = PairRange.first;
+           K != PairRange.second; ++K)
+        if (K->second == J) return true;
+
+      return false;
+    }
+  };
+
+  // This function implements one vectorization iteration on the provided
+  // basic block. It returns true if the block is changed.
+  bool BBVectorize::vectorizePairs(BasicBlock &BB) {
+    bool ShouldContinue;
+    BasicBlock::iterator Start = BB.getFirstInsertionPt();
+
+    std::vector<Value *> AllPairableInsts;
+    DenseMap<Value *, Value *> AllChosenPairs;
+
+    do {
+      std::vector<Value *> PairableInsts;
+      std::multimap<Value *, Value *> CandidatePairs;
+      ShouldContinue = getCandidatePairs(BB, Start, CandidatePairs,
+                                         PairableInsts);
+      if (PairableInsts.empty()) continue;
+
+      // Now we have a map of all of the pairable instructions and we need to
+      // select the best possible pairing. A good pairing is one such that the
+      // users of the pair are also paired. This defines a (directed) forest
+      // over the pairs such that two pairs are connected iff the second pair
+      // uses the first.
+
+      // Note that it only matters that both members of the second pair use some
+      // element of the first pair (to allow for splatting).
+
+      std::multimap<ValuePair, ValuePair> ConnectedPairs;
+      computeConnectedPairs(CandidatePairs, PairableInsts, ConnectedPairs);
+      if (ConnectedPairs.empty()) continue;
+
+      // Build the pairable-instruction dependency map
+      DenseSet<ValuePair> PairableInstUsers;
+      buildDepMap(BB, CandidatePairs, PairableInsts, PairableInstUsers);
+
+      // There is now a graph of the connected pairs. For each variable, pick
+      // the pairing with the largest tree meeting the depth requirement on at
+      // least one branch. Then select all pairings that are part of that tree
+      // and remove them from the list of available pairings and pairable
+      // variables.
+
+      DenseMap<Value *, Value *> ChosenPairs;
+      choosePairs(CandidatePairs, PairableInsts, ConnectedPairs,
+        PairableInstUsers, ChosenPairs);
+
+      if (ChosenPairs.empty()) continue;
+      AllPairableInsts.insert(AllPairableInsts.end(), PairableInsts.begin(),
+                              PairableInsts.end());
+      AllChosenPairs.insert(ChosenPairs.begin(), ChosenPairs.end());
+    } while (ShouldContinue);
+
+    if (AllChosenPairs.empty()) return false;
+    NumFusedOps += AllChosenPairs.size();
+
+    // A set of pairs has now been selected. It is now necessary to replace the
+    // paired instructions with vector instructions. For this procedure each
+    // operand must be replaced with a vector operand. This vector is formed
+    // by using build_vector on the old operands. The replaced values are then
+    // replaced with a vector_extract on the result.  Subsequent optimization
+    // passes should coalesce the build/extract combinations.
+
+    fuseChosenPairs(BB, AllPairableInsts, AllChosenPairs);
+    return true;
+  }
+
+  // This function returns true if the provided instruction is capable of being
+  // fused into a vector instruction. This determination is based only on the
+  // type and other attributes of the instruction.
+  bool BBVectorize::isInstVectorizable(Instruction *I,
+                                         bool &IsSimpleLoadStore) {
+    IsSimpleLoadStore = false;
+
+    if (CallInst *C = dyn_cast<CallInst>(I)) {
+      if (!isVectorizableIntrinsic(C))
+        return false;
+    } else if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+      // Vectorize simple loads if possbile:
+      IsSimpleLoadStore = L->isSimple();
+      if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
+        return false;
+    } else if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+      // Vectorize simple stores if possbile:
+      IsSimpleLoadStore = S->isSimple();
+      if (!IsSimpleLoadStore || !Config.VectorizeMemOps)
+        return false;
+    } else if (CastInst *C = dyn_cast<CastInst>(I)) {
+      // We can vectorize casts, but not casts of pointer types, etc.
+      if (!Config.VectorizeCasts)
+        return false;
+
+      Type *SrcTy = C->getSrcTy();
+      if (!SrcTy->isSingleValueType() || SrcTy->isPointerTy())
+        return false;
+
+      Type *DestTy = C->getDestTy();
+      if (!DestTy->isSingleValueType() || DestTy->isPointerTy())
+        return false;
+    } else if (!(I->isBinaryOp() || isa<ShuffleVectorInst>(I) ||
+        isa<ExtractElementInst>(I) || isa<InsertElementInst>(I))) {
+      return false;
+    }
+
+    // We can't vectorize memory operations without target data
+    if (TD == 0 && IsSimpleLoadStore)
+      return false;
+
+    Type *T1, *T2;
+    if (isa<StoreInst>(I)) {
+      // For stores, it is the value type, not the pointer type that matters
+      // because the value is what will come from a vector register.
+
+      Value *IVal = cast<StoreInst>(I)->getValueOperand();
+      T1 = IVal->getType();
+    } else {
+      T1 = I->getType();
+    }
+
+    if (I->isCast())
+      T2 = cast<CastInst>(I)->getSrcTy();
+    else
+      T2 = T1;
+
+    // Not every type can be vectorized...
+    if (!(VectorType::isValidElementType(T1) || T1->isVectorTy()) ||
+        !(VectorType::isValidElementType(T2) || T2->isVectorTy()))
+      return false;
+
+    if (!Config.VectorizeInts
+        && (T1->isIntOrIntVectorTy() || T2->isIntOrIntVectorTy()))
+      return false;
+
+    if (!Config.VectorizeFloats
+        && (T1->isFPOrFPVectorTy() || T2->isFPOrFPVectorTy()))
+      return false;
+
+    if (T1->getPrimitiveSizeInBits() > Config.VectorBits/2 ||
+        T2->getPrimitiveSizeInBits() > Config.VectorBits/2)
+      return false;
+
+    return true;
+  }
+
+  // This function returns true if the two provided instructions are compatible
+  // (meaning that they can be fused into a vector instruction). This assumes
+  // that I has already been determined to be vectorizable and that J is not
+  // in the use tree of I.
+  bool BBVectorize::areInstsCompatible(Instruction *I, Instruction *J,
+                       bool IsSimpleLoadStore) {
+    DEBUG(if (DebugInstructionExamination) dbgs() << "BBV: looking at " << *I <<
+                     " <-> " << *J << "\n");
+
+    // Loads and stores can be merged if they have different alignments,
+    // but are otherwise the same.
+    LoadInst *LI, *LJ;
+    StoreInst *SI, *SJ;
+    if ((LI = dyn_cast<LoadInst>(I)) && (LJ = dyn_cast<LoadInst>(J))) {
+      if (I->getType() != J->getType())
+        return false;
+
+      if (LI->getPointerOperand()->getType() !=
+            LJ->getPointerOperand()->getType() ||
+          LI->isVolatile() != LJ->isVolatile() ||
+          LI->getOrdering() != LJ->getOrdering() ||
+          LI->getSynchScope() != LJ->getSynchScope())
+        return false;
+    } else if ((SI = dyn_cast<StoreInst>(I)) && (SJ = dyn_cast<StoreInst>(J))) {
+      if (SI->getValueOperand()->getType() !=
+            SJ->getValueOperand()->getType() ||
+          SI->getPointerOperand()->getType() !=
+            SJ->getPointerOperand()->getType() ||
+          SI->isVolatile() != SJ->isVolatile() ||
+          SI->getOrdering() != SJ->getOrdering() ||
+          SI->getSynchScope() != SJ->getSynchScope())
+        return false;
+    } else if (!J->isSameOperationAs(I)) {
+      return false;
+    }
+    // FIXME: handle addsub-type operations!
+
+    if (IsSimpleLoadStore) {
+      Value *IPtr, *JPtr;
+      unsigned IAlignment, JAlignment;
+      int64_t OffsetInElmts = 0;
+      if (getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+            OffsetInElmts) && abs64(OffsetInElmts) == 1) {
+        if (Config.AlignedOnly) {
+          Type *aType = isa<StoreInst>(I) ?
+            cast<StoreInst>(I)->getValueOperand()->getType() : I->getType();
+          // An aligned load or store is possible only if the instruction
+          // with the lower offset has an alignment suitable for the
+          // vector type.
+
+          unsigned BottomAlignment = IAlignment;
+          if (OffsetInElmts < 0) BottomAlignment = JAlignment;
+
+          Type *VType = getVecTypeForPair(aType);
+          unsigned VecAlignment = TD->getPrefTypeAlignment(VType);
+          if (BottomAlignment < VecAlignment)
+            return false;
+        }
+      } else {
+        return false;
+      }
+    } else if (isa<ShuffleVectorInst>(I)) {
+      // Only merge two shuffles if they're both constant
+      return isa<Constant>(I->getOperand(2)) &&
+             isa<Constant>(J->getOperand(2));
+      // FIXME: We may want to vectorize non-constant shuffles also.
+    }
+
+    // The powi intrinsic is special because only the first argument is
+    // vectorized, the second arguments must be equal.
+    CallInst *CI = dyn_cast<CallInst>(I);
+    Function *FI;
+    if (CI && (FI = CI->getCalledFunction()) &&
+        FI->getIntrinsicID() == Intrinsic::powi) {
+
+      Value *A1I = CI->getArgOperand(1),
+            *A1J = cast<CallInst>(J)->getArgOperand(1);
+      const SCEV *A1ISCEV = SE->getSCEV(A1I),
+                 *A1JSCEV = SE->getSCEV(A1J);
+      return (A1ISCEV == A1JSCEV);
+    }
+
+    return true;
+  }
+
+  // Figure out whether or not J uses I and update the users and write-set
+  // structures associated with I. Specifically, Users represents the set of
+  // instructions that depend on I. WriteSet represents the set
+  // of memory locations that are dependent on I. If UpdateUsers is true,
+  // and J uses I, then Users is updated to contain J and WriteSet is updated
+  // to contain any memory locations to which J writes. The function returns
+  // true if J uses I. By default, alias analysis is used to determine
+  // whether J reads from memory that overlaps with a location in WriteSet.
+  // If LoadMoveSet is not null, then it is a previously-computed multimap
+  // where the key is the memory-based user instruction and the value is
+  // the instruction to be compared with I. So, if LoadMoveSet is provided,
+  // then the alias analysis is not used. This is necessary because this
+  // function is called during the process of moving instructions during
+  // vectorization and the results of the alias analysis are not stable during
+  // that process.
+  bool BBVectorize::trackUsesOfI(DenseSet<Value *> &Users,
+                       AliasSetTracker &WriteSet, Instruction *I,
+                       Instruction *J, bool UpdateUsers,
+                       std::multimap<Value *, Value *> *LoadMoveSet) {
+    bool UsesI = false;
+
+    // This instruction may already be marked as a user due, for example, to
+    // being a member of a selected pair.
+    if (Users.count(J))
+      UsesI = true;
+
+    if (!UsesI)
+      for (User::op_iterator JU = J->op_begin(), JE = J->op_end();
+           JU != JE; ++JU) {
+        Value *V = *JU;
+        if (I == V || Users.count(V)) {
+          UsesI = true;
+          break;
+        }
+      }
+    if (!UsesI && J->mayReadFromMemory()) {
+      if (LoadMoveSet) {
+        VPIteratorPair JPairRange = LoadMoveSet->equal_range(J);
+        UsesI = isSecondInIteratorPair<Value*>(I, JPairRange);
+      } else {
+        for (AliasSetTracker::iterator W = WriteSet.begin(),
+             WE = WriteSet.end(); W != WE; ++W) {
+          if (W->aliasesUnknownInst(J, *AA)) {
+            UsesI = true;
+            break;
+          }
+        }
+      }
+    }
+
+    if (UsesI && UpdateUsers) {
+      if (J->mayWriteToMemory()) WriteSet.add(J);
+      Users.insert(J);
+    }
+
+    return UsesI;
+  }
+
+  // This function iterates over all instruction pairs in the provided
+  // basic block and collects all candidate pairs for vectorization.
+  bool BBVectorize::getCandidatePairs(BasicBlock &BB,
+                       BasicBlock::iterator &Start,
+                       std::multimap<Value *, Value *> &CandidatePairs,
+                       std::vector<Value *> &PairableInsts) {
+    BasicBlock::iterator E = BB.end();
+    if (Start == E) return false;
+
+    bool ShouldContinue = false, IAfterStart = false;
+    for (BasicBlock::iterator I = Start++; I != E; ++I) {
+      if (I == Start) IAfterStart = true;
+
+      bool IsSimpleLoadStore;
+      if (!isInstVectorizable(I, IsSimpleLoadStore)) continue;
+
+      // Look for an instruction with which to pair instruction *I...
+      DenseSet<Value *> Users;
+      AliasSetTracker WriteSet(*AA);
+      bool JAfterStart = IAfterStart;
+      BasicBlock::iterator J = llvm::next(I);
+      for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
+        if (J == Start) JAfterStart = true;
+
+        // Determine if J uses I, if so, exit the loop.
+        bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
+        if (Config.FastDep) {
+          // Note: For this heuristic to be effective, independent operations
+          // must tend to be intermixed. This is likely to be true from some
+          // kinds of grouped loop unrolling (but not the generic LLVM pass),
+          // but otherwise may require some kind of reordering pass.
+
+          // When using fast dependency analysis,
+          // stop searching after first use:
+          if (UsesI) break;
+        } else {
+          if (UsesI) continue;
+        }
+
+        // J does not use I, and comes before the first use of I, so it can be
+        // merged with I if the instructions are compatible.
+        if (!areInstsCompatible(I, J, IsSimpleLoadStore)) continue;
+
+        // J is a candidate for merging with I.
+        if (!PairableInsts.size() ||
+             PairableInsts[PairableInsts.size()-1] != I) {
+          PairableInsts.push_back(I);
+        }
+
+        CandidatePairs.insert(ValuePair(I, J));
+
+        // The next call to this function must start after the last instruction
+        // selected during this invocation.
+        if (JAfterStart) {
+          Start = llvm::next(J);
+          IAfterStart = JAfterStart = false;
+        }
+
+        DEBUG(if (DebugCandidateSelection) dbgs() << "BBV: candidate pair "
+                     << *I << " <-> " << *J << "\n");
+
+        // If we have already found too many pairs, break here and this function
+        // will be called again starting after the last instruction selected
+        // during this invocation.
+        if (PairableInsts.size() >= Config.MaxInsts) {
+          ShouldContinue = true;
+          break;
+        }
+      }
+
+      if (ShouldContinue)
+        break;
+    }
+
+    DEBUG(dbgs() << "BBV: found " << PairableInsts.size()
+           << " instructions with candidate pairs\n");
+
+    return ShouldContinue;
+  }
+
+  // Finds candidate pairs connected to the pair P = <PI, PJ>. This means that
+  // it looks for pairs such that both members have an input which is an
+  // output of PI or PJ.
+  void BBVectorize::computePairsConnectedTo(
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      ValuePair P) {
+    // For each possible pairing for this variable, look at the uses of
+    // the first value...
+    for (Value::use_iterator I = P.first->use_begin(),
+         E = P.first->use_end(); I != E; ++I) {
+      VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
+
+      // For each use of the first variable, look for uses of the second
+      // variable...
+      for (Value::use_iterator J = P.second->use_begin(),
+           E2 = P.second->use_end(); J != E2; ++J) {
+        VPIteratorPair JPairRange = CandidatePairs.equal_range(*J);
+
+        // Look for <I, J>:
+        if (isSecondInIteratorPair<Value*>(*J, IPairRange))
+          ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+
+        // Look for <J, I>:
+        if (isSecondInIteratorPair<Value*>(*I, JPairRange))
+          ConnectedPairs.insert(VPPair(P, ValuePair(*J, *I)));
+      }
+
+      if (Config.SplatBreaksChain) continue;
+      // Look for cases where just the first value in the pair is used by
+      // both members of another pair (splatting).
+      for (Value::use_iterator J = P.first->use_begin(); J != E; ++J) {
+        if (isSecondInIteratorPair<Value*>(*J, IPairRange))
+          ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+      }
+    }
+
+    if (Config.SplatBreaksChain) return;
+    // Look for cases where just the second value in the pair is used by
+    // both members of another pair (splatting).
+    for (Value::use_iterator I = P.second->use_begin(),
+         E = P.second->use_end(); I != E; ++I) {
+      VPIteratorPair IPairRange = CandidatePairs.equal_range(*I);
+
+      for (Value::use_iterator J = P.second->use_begin(); J != E; ++J) {
+        if (isSecondInIteratorPair<Value*>(*J, IPairRange))
+          ConnectedPairs.insert(VPPair(P, ValuePair(*I, *J)));
+      }
+    }
+  }
+
+  // This function figures out which pairs are connected.  Two pairs are
+  // connected if some output of the first pair forms an input to both members
+  // of the second pair.
+  void BBVectorize::computeConnectedPairs(
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs) {
+
+    for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
+         PE = PairableInsts.end(); PI != PE; ++PI) {
+      VPIteratorPair choiceRange = CandidatePairs.equal_range(*PI);
+
+      for (std::multimap<Value *, Value *>::iterator P = choiceRange.first;
+           P != choiceRange.second; ++P)
+        computePairsConnectedTo(CandidatePairs, PairableInsts,
+                                ConnectedPairs, *P);
+    }
+
+    DEBUG(dbgs() << "BBV: found " << ConnectedPairs.size()
+                 << " pair connections.\n");
+  }
+
+  // This function builds a set of use tuples such that <A, B> is in the set
+  // if B is in the use tree of A. If B is in the use tree of A, then B
+  // depends on the output of A.
+  void BBVectorize::buildDepMap(
+                      BasicBlock &BB,
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      DenseSet<ValuePair> &PairableInstUsers) {
+    DenseSet<Value *> IsInPair;
+    for (std::multimap<Value *, Value *>::iterator C = CandidatePairs.begin(),
+         E = CandidatePairs.end(); C != E; ++C) {
+      IsInPair.insert(C->first);
+      IsInPair.insert(C->second);
+    }
+
+    // Iterate through the basic block, recording all Users of each
+    // pairable instruction.
+
+    BasicBlock::iterator E = BB.end();
+    for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
+      if (IsInPair.find(I) == IsInPair.end()) continue;
+
+      DenseSet<Value *> Users;
+      AliasSetTracker WriteSet(*AA);
+      for (BasicBlock::iterator J = llvm::next(I); J != E; ++J)
+        (void) trackUsesOfI(Users, WriteSet, I, J);
+
+      for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
+           U != E; ++U)
+        PairableInstUsers.insert(ValuePair(I, *U));
+    }
+  }
+
+  // Returns true if an input to pair P is an output of pair Q and also an
+  // input of pair Q is an output of pair P. If this is the case, then these
+  // two pairs cannot be simultaneously fused.
+  bool BBVectorize::pairsConflict(ValuePair P, ValuePair Q,
+                     DenseSet<ValuePair> &PairableInstUsers,
+                     std::multimap<ValuePair, ValuePair> *PairableInstUserMap) {
+    // Two pairs are in conflict if they are mutual Users of eachother.
+    bool QUsesP = PairableInstUsers.count(ValuePair(P.first,  Q.first))  ||
+                  PairableInstUsers.count(ValuePair(P.first,  Q.second)) ||
+                  PairableInstUsers.count(ValuePair(P.second, Q.first))  ||
+                  PairableInstUsers.count(ValuePair(P.second, Q.second));
+    bool PUsesQ = PairableInstUsers.count(ValuePair(Q.first,  P.first))  ||
+                  PairableInstUsers.count(ValuePair(Q.first,  P.second)) ||
+                  PairableInstUsers.count(ValuePair(Q.second, P.first))  ||
+                  PairableInstUsers.count(ValuePair(Q.second, P.second));
+    if (PairableInstUserMap) {
+      // FIXME: The expensive part of the cycle check is not so much the cycle
+      // check itself but this edge insertion procedure. This needs some
+      // profiling and probably a different data structure (same is true of
+      // most uses of std::multimap).
+      if (PUsesQ) {
+        VPPIteratorPair QPairRange = PairableInstUserMap->equal_range(Q);
+        if (!isSecondInIteratorPair(P, QPairRange))
+          PairableInstUserMap->insert(VPPair(Q, P));
+      }
+      if (QUsesP) {
+        VPPIteratorPair PPairRange = PairableInstUserMap->equal_range(P);
+        if (!isSecondInIteratorPair(Q, PPairRange))
+          PairableInstUserMap->insert(VPPair(P, Q));
+      }
+    }
+
+    return (QUsesP && PUsesQ);
+  }
+
+  // This function walks the use graph of current pairs to see if, starting
+  // from P, the walk returns to P.
+  bool BBVectorize::pairWillFormCycle(ValuePair P,
+                       std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+                       DenseSet<ValuePair> &CurrentPairs) {
+    DEBUG(if (DebugCycleCheck)
+            dbgs() << "BBV: starting cycle check for : " << *P.first << " <-> "
+                   << *P.second << "\n");
+    // A lookup table of visisted pairs is kept because the PairableInstUserMap
+    // contains non-direct associations.
+    DenseSet<ValuePair> Visited;
+    SmallVector<ValuePair, 32> Q;
+    // General depth-first post-order traversal:
+    Q.push_back(P);
+    do {
+      ValuePair QTop = Q.pop_back_val();
+      Visited.insert(QTop);
+
+      DEBUG(if (DebugCycleCheck)
+              dbgs() << "BBV: cycle check visiting: " << *QTop.first << " <-> "
+                     << *QTop.second << "\n");
+      VPPIteratorPair QPairRange = PairableInstUserMap.equal_range(QTop);
+      for (std::multimap<ValuePair, ValuePair>::iterator C = QPairRange.first;
+           C != QPairRange.second; ++C) {
+        if (C->second == P) {
+          DEBUG(dbgs()
+                 << "BBV: rejected to prevent non-trivial cycle formation: "
+                 << *C->first.first << " <-> " << *C->first.second << "\n");
+          return true;
+        }
+
+        if (CurrentPairs.count(C->second) && !Visited.count(C->second))
+          Q.push_back(C->second);
+      }
+    } while (!Q.empty());
+
+    return false;
+  }
+
+  // This function builds the initial tree of connected pairs with the
+  // pair J at the root.
+  void BBVectorize::buildInitialTreeFor(
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      DenseSet<ValuePair> &PairableInstUsers,
+                      DenseMap<Value *, Value *> &ChosenPairs,
+                      DenseMap<ValuePair, size_t> &Tree, ValuePair J) {
+    // Each of these pairs is viewed as the root node of a Tree. The Tree
+    // is then walked (depth-first). As this happens, we keep track of
+    // the pairs that compose the Tree and the maximum depth of the Tree.
+    SmallVector<ValuePairWithDepth, 32> Q;
+    // General depth-first post-order traversal:
+    Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
+    do {
+      ValuePairWithDepth QTop = Q.back();
+
+      // Push each child onto the queue:
+      bool MoreChildren = false;
+      size_t MaxChildDepth = QTop.second;
+      VPPIteratorPair qtRange = ConnectedPairs.equal_range(QTop.first);
+      for (std::multimap<ValuePair, ValuePair>::iterator k = qtRange.first;
+           k != qtRange.second; ++k) {
+        // Make sure that this child pair is still a candidate:
+        bool IsStillCand = false;
+        VPIteratorPair checkRange =
+          CandidatePairs.equal_range(k->second.first);
+        for (std::multimap<Value *, Value *>::iterator m = checkRange.first;
+             m != checkRange.second; ++m) {
+          if (m->second == k->second.second) {
+            IsStillCand = true;
+            break;
+          }
+        }
+
+        if (IsStillCand) {
+          DenseMap<ValuePair, size_t>::iterator C = Tree.find(k->second);
+          if (C == Tree.end()) {
+            size_t d = getDepthFactor(k->second.first);
+            Q.push_back(ValuePairWithDepth(k->second, QTop.second+d));
+            MoreChildren = true;
+          } else {
+            MaxChildDepth = std::max(MaxChildDepth, C->second);
+          }
+        }
+      }
+
+      if (!MoreChildren) {
+        // Record the current pair as part of the Tree:
+        Tree.insert(ValuePairWithDepth(QTop.first, MaxChildDepth));
+        Q.pop_back();
+      }
+    } while (!Q.empty());
+  }
+
+  // Given some initial tree, prune it by removing conflicting pairs (pairs
+  // that cannot be simultaneously chosen for vectorization).
+  void BBVectorize::pruneTreeFor(
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      DenseSet<ValuePair> &PairableInstUsers,
+                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+                      DenseMap<Value *, Value *> &ChosenPairs,
+                      DenseMap<ValuePair, size_t> &Tree,
+                      DenseSet<ValuePair> &PrunedTree, ValuePair J,
+                      bool UseCycleCheck) {
+    SmallVector<ValuePairWithDepth, 32> Q;
+    // General depth-first post-order traversal:
+    Q.push_back(ValuePairWithDepth(J, getDepthFactor(J.first)));
+    do {
+      ValuePairWithDepth QTop = Q.pop_back_val();
+      PrunedTree.insert(QTop.first);
+
+      // Visit each child, pruning as necessary...
+      DenseMap<ValuePair, size_t> BestChildren;
+      VPPIteratorPair QTopRange = ConnectedPairs.equal_range(QTop.first);
+      for (std::multimap<ValuePair, ValuePair>::iterator K = QTopRange.first;
+           K != QTopRange.second; ++K) {
+        DenseMap<ValuePair, size_t>::iterator C = Tree.find(K->second);
+        if (C == Tree.end()) continue;
+
+        // This child is in the Tree, now we need to make sure it is the
+        // best of any conflicting children. There could be multiple
+        // conflicting children, so first, determine if we're keeping
+        // this child, then delete conflicting children as necessary.
+
+        // It is also necessary to guard against pairing-induced
+        // dependencies. Consider instructions a .. x .. y .. b
+        // such that (a,b) are to be fused and (x,y) are to be fused
+        // but a is an input to x and b is an output from y. This
+        // means that y cannot be moved after b but x must be moved
+        // after b for (a,b) to be fused. In other words, after
+        // fusing (a,b) we have y .. a/b .. x where y is an input
+        // to a/b and x is an output to a/b: x and y can no longer
+        // be legally fused. To prevent this condition, we must
+        // make sure that a child pair added to the Tree is not
+        // both an input and output of an already-selected pair.
+
+        // Pairing-induced dependencies can also form from more complicated
+        // cycles. The pair vs. pair conflicts are easy to check, and so
+        // that is done explicitly for "fast rejection", and because for
+        // child vs. child conflicts, we may prefer to keep the current
+        // pair in preference to the already-selected child.
+        DenseSet<ValuePair> CurrentPairs;
+
+        bool CanAdd = true;
+        for (DenseMap<ValuePair, size_t>::iterator C2
+              = BestChildren.begin(), E2 = BestChildren.end();
+             C2 != E2; ++C2) {
+          if (C2->first.first == C->first.first ||
+              C2->first.first == C->first.second ||
+              C2->first.second == C->first.first ||
+              C2->first.second == C->first.second ||
+              pairsConflict(C2->first, C->first, PairableInstUsers,
+                            UseCycleCheck ? &PairableInstUserMap : 0)) {
+            if (C2->second >= C->second) {
+              CanAdd = false;
+              break;
+            }
+
+            CurrentPairs.insert(C2->first);
+          }
+        }
+        if (!CanAdd) continue;
+
+        // Even worse, this child could conflict with another node already
+        // selected for the Tree. If that is the case, ignore this child.
+        for (DenseSet<ValuePair>::iterator T = PrunedTree.begin(),
+             E2 = PrunedTree.end(); T != E2; ++T) {
+          if (T->first == C->first.first ||
+              T->first == C->first.second ||
+              T->second == C->first.first ||
+              T->second == C->first.second ||
+              pairsConflict(*T, C->first, PairableInstUsers,
+                            UseCycleCheck ? &PairableInstUserMap : 0)) {
+            CanAdd = false;
+            break;
+          }
+
+          CurrentPairs.insert(*T);
+        }
+        if (!CanAdd) continue;
+
+        // And check the queue too...
+        for (SmallVector<ValuePairWithDepth, 32>::iterator C2 = Q.begin(),
+             E2 = Q.end(); C2 != E2; ++C2) {
+          if (C2->first.first == C->first.first ||
+              C2->first.first == C->first.second ||
+              C2->first.second == C->first.first ||
+              C2->first.second == C->first.second ||
+              pairsConflict(C2->first, C->first, PairableInstUsers,
+                            UseCycleCheck ? &PairableInstUserMap : 0)) {
+            CanAdd = false;
+            break;
+          }
+
+          CurrentPairs.insert(C2->first);
+        }
+        if (!CanAdd) continue;
+
+        // Last but not least, check for a conflict with any of the
+        // already-chosen pairs.
+        for (DenseMap<Value *, Value *>::iterator C2 =
+              ChosenPairs.begin(), E2 = ChosenPairs.end();
+             C2 != E2; ++C2) {
+          if (pairsConflict(*C2, C->first, PairableInstUsers,
+                            UseCycleCheck ? &PairableInstUserMap : 0)) {
+            CanAdd = false;
+            break;
+          }
+
+          CurrentPairs.insert(*C2);
+        }
+        if (!CanAdd) continue;
+
+        // To check for non-trivial cycles formed by the addition of the
+        // current pair we've formed a list of all relevant pairs, now use a
+        // graph walk to check for a cycle. We start from the current pair and
+        // walk the use tree to see if we again reach the current pair. If we
+        // do, then the current pair is rejected.
+
+        // FIXME: It may be more efficient to use a topological-ordering
+        // algorithm to improve the cycle check. This should be investigated.
+        if (UseCycleCheck &&
+            pairWillFormCycle(C->first, PairableInstUserMap, CurrentPairs))
+          continue;
+
+        // This child can be added, but we may have chosen it in preference
+        // to an already-selected child. Check for this here, and if a
+        // conflict is found, then remove the previously-selected child
+        // before adding this one in its place.
+        for (DenseMap<ValuePair, size_t>::iterator C2
+              = BestChildren.begin(); C2 != BestChildren.end();) {
+          if (C2->first.first == C->first.first ||
+              C2->first.first == C->first.second ||
+              C2->first.second == C->first.first ||
+              C2->first.second == C->first.second ||
+              pairsConflict(C2->first, C->first, PairableInstUsers))
+            BestChildren.erase(C2++);
+          else
+            ++C2;
+        }
+
+        BestChildren.insert(ValuePairWithDepth(C->first, C->second));
+      }
+
+      for (DenseMap<ValuePair, size_t>::iterator C
+            = BestChildren.begin(), E2 = BestChildren.end();
+           C != E2; ++C) {
+        size_t DepthF = getDepthFactor(C->first.first);
+        Q.push_back(ValuePairWithDepth(C->first, QTop.second+DepthF));
+      }
+    } while (!Q.empty());
+  }
+
+  // This function finds the best tree of mututally-compatible connected
+  // pairs, given the choice of root pairs as an iterator range.
+  void BBVectorize::findBestTreeFor(
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      DenseSet<ValuePair> &PairableInstUsers,
+                      std::multimap<ValuePair, ValuePair> &PairableInstUserMap,
+                      DenseMap<Value *, Value *> &ChosenPairs,
+                      DenseSet<ValuePair> &BestTree, size_t &BestMaxDepth,
+                      size_t &BestEffSize, VPIteratorPair ChoiceRange,
+                      bool UseCycleCheck) {
+    for (std::multimap<Value *, Value *>::iterator J = ChoiceRange.first;
+         J != ChoiceRange.second; ++J) {
+
+      // Before going any further, make sure that this pair does not
+      // conflict with any already-selected pairs (see comment below
+      // near the Tree pruning for more details).
+      DenseSet<ValuePair> ChosenPairSet;
+      bool DoesConflict = false;
+      for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(),
+           E = ChosenPairs.end(); C != E; ++C) {
+        if (pairsConflict(*C, *J, PairableInstUsers,
+                          UseCycleCheck ? &PairableInstUserMap : 0)) {
+          DoesConflict = true;
+          break;
+        }
+
+        ChosenPairSet.insert(*C);
+      }
+      if (DoesConflict) continue;
+
+      if (UseCycleCheck &&
+          pairWillFormCycle(*J, PairableInstUserMap, ChosenPairSet))
+        continue;
+
+      DenseMap<ValuePair, size_t> Tree;
+      buildInitialTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
+                          PairableInstUsers, ChosenPairs, Tree, *J);
+
+      // Because we'll keep the child with the largest depth, the largest
+      // depth is still the same in the unpruned Tree.
+      size_t MaxDepth = Tree.lookup(*J);
+
+      DEBUG(if (DebugPairSelection) dbgs() << "BBV: found Tree for pair {"
+                   << *J->first << " <-> " << *J->second << "} of depth " <<
+                   MaxDepth << " and size " << Tree.size() << "\n");
+
+      // At this point the Tree has been constructed, but, may contain
+      // contradictory children (meaning that different children of
+      // some tree node may be attempting to fuse the same instruction).
+      // So now we walk the tree again, in the case of a conflict,
+      // keep only the child with the largest depth. To break a tie,
+      // favor the first child.
+
+      DenseSet<ValuePair> PrunedTree;
+      pruneTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
+                   PairableInstUsers, PairableInstUserMap, ChosenPairs, Tree,
+                   PrunedTree, *J, UseCycleCheck);
+
+      size_t EffSize = 0;
+      for (DenseSet<ValuePair>::iterator S = PrunedTree.begin(),
+           E = PrunedTree.end(); S != E; ++S)
+        EffSize += getDepthFactor(S->first);
+
+      DEBUG(if (DebugPairSelection)
+             dbgs() << "BBV: found pruned Tree for pair {"
+             << *J->first << " <-> " << *J->second << "} of depth " <<
+             MaxDepth << " and size " << PrunedTree.size() <<
+            " (effective size: " << EffSize << ")\n");
+      if (MaxDepth >= Config.ReqChainDepth && EffSize > BestEffSize) {
+        BestMaxDepth = MaxDepth;
+        BestEffSize = EffSize;
+        BestTree = PrunedTree;
+      }
+    }
+  }
+
+  // Given the list of candidate pairs, this function selects those
+  // that will be fused into vector instructions.
+  void BBVectorize::choosePairs(
+                      std::multimap<Value *, Value *> &CandidatePairs,
+                      std::vector<Value *> &PairableInsts,
+                      std::multimap<ValuePair, ValuePair> &ConnectedPairs,
+                      DenseSet<ValuePair> &PairableInstUsers,
+                      DenseMap<Value *, Value *>& ChosenPairs) {
+    bool UseCycleCheck =
+     CandidatePairs.size() <= Config.MaxCandPairsForCycleCheck;
+    std::multimap<ValuePair, ValuePair> PairableInstUserMap;
+    for (std::vector<Value *>::iterator I = PairableInsts.begin(),
+         E = PairableInsts.end(); I != E; ++I) {
+      // The number of possible pairings for this variable:
+      size_t NumChoices = CandidatePairs.count(*I);
+      if (!NumChoices) continue;
+
+      VPIteratorPair ChoiceRange = CandidatePairs.equal_range(*I);
+
+      // The best pair to choose and its tree:
+      size_t BestMaxDepth = 0, BestEffSize = 0;
+      DenseSet<ValuePair> BestTree;
+      findBestTreeFor(CandidatePairs, PairableInsts, ConnectedPairs,
+                      PairableInstUsers, PairableInstUserMap, ChosenPairs,
+                      BestTree, BestMaxDepth, BestEffSize, ChoiceRange,
+                      UseCycleCheck);
+
+      // A tree has been chosen (or not) at this point. If no tree was
+      // chosen, then this instruction, I, cannot be paired (and is no longer
+      // considered).
+
+      DEBUG(if (BestTree.size() > 0)
+              dbgs() << "BBV: selected pairs in the best tree for: "
+                     << *cast<Instruction>(*I) << "\n");
+
+      for (DenseSet<ValuePair>::iterator S = BestTree.begin(),
+           SE2 = BestTree.end(); S != SE2; ++S) {
+        // Insert the members of this tree into the list of chosen pairs.
+        ChosenPairs.insert(ValuePair(S->first, S->second));
+        DEBUG(dbgs() << "BBV: selected pair: " << *S->first << " <-> " <<
+               *S->second << "\n");
+
+        // Remove all candidate pairs that have values in the chosen tree.
+        for (std::multimap<Value *, Value *>::iterator K =
+               CandidatePairs.begin(); K != CandidatePairs.end();) {
+          if (K->first == S->first || K->second == S->first ||
+              K->second == S->second || K->first == S->second) {
+            // Don't remove the actual pair chosen so that it can be used
+            // in subsequent tree selections.
+            if (!(K->first == S->first && K->second == S->second))
+              CandidatePairs.erase(K++);
+            else
+              ++K;
+          } else {
+            ++K;
+          }
+        }
+      }
+    }
+
+    DEBUG(dbgs() << "BBV: selected " << ChosenPairs.size() << " pairs.\n");
+  }
+
+  std::string getReplacementName(Instruction *I, bool IsInput, unsigned o,
+                     unsigned n = 0) {
+    if (!I->hasName())
+      return "";
+
+    return (I->getName() + (IsInput ? ".v.i" : ".v.r") + utostr(o) +
+             (n > 0 ? "." + utostr(n) : "")).str();
+  }
+
+  // Returns the value that is to be used as the pointer input to the vector
+  // instruction that fuses I with J.
+  Value *BBVectorize::getReplacementPointerInput(LLVMContext& Context,
+                     Instruction *I, Instruction *J, unsigned o,
+                     bool &FlipMemInputs) {
+    Value *IPtr, *JPtr;
+    unsigned IAlignment, JAlignment;
+    int64_t OffsetInElmts;
+    (void) getPairPtrInfo(I, J, IPtr, JPtr, IAlignment, JAlignment,
+                          OffsetInElmts);
+
+    // The pointer value is taken to be the one with the lowest offset.
+    Value *VPtr;
+    if (OffsetInElmts > 0) {
+      VPtr = IPtr;
+    } else {
+      FlipMemInputs = true;
+      VPtr = JPtr;
+    }
+
+    Type *ArgType = cast<PointerType>(IPtr->getType())->getElementType();
+    Type *VArgType = getVecTypeForPair(ArgType);
+    Type *VArgPtrType = PointerType::get(VArgType,
+      cast<PointerType>(IPtr->getType())->getAddressSpace());
+    return new BitCastInst(VPtr, VArgPtrType, getReplacementName(I, true, o),
+                        /* insert before */ FlipMemInputs ? J : I);
+  }
+
+  void BBVectorize::fillNewShuffleMask(LLVMContext& Context, Instruction *J,
+                     unsigned NumElem, unsigned MaskOffset, unsigned NumInElem,
+                     unsigned IdxOffset, std::vector<Constant*> &Mask) {
+    for (unsigned v = 0; v < NumElem/2; ++v) {
+      int m = cast<ShuffleVectorInst>(J)->getMaskValue(v);
+      if (m < 0) {
+        Mask[v+MaskOffset] = UndefValue::get(Type::getInt32Ty(Context));
+      } else {
+        unsigned mm = m + (int) IdxOffset;
+        if (m >= (int) NumInElem)
+          mm += (int) NumInElem;
+
+        Mask[v+MaskOffset] =
+          ConstantInt::get(Type::getInt32Ty(Context), mm);
+      }
+    }
+  }
+
+  // Returns the value that is to be used as the vector-shuffle mask to the
+  // vector instruction that fuses I with J.
+  Value *BBVectorize::getReplacementShuffleMask(LLVMContext& Context,
+                     Instruction *I, Instruction *J) {
+    // This is the shuffle mask. We need to append the second
+    // mask to the first, and the numbers need to be adjusted.
+
+    Type *ArgType = I->getType();
+    Type *VArgType = getVecTypeForPair(ArgType);
+
+    // Get the total number of elements in the fused vector type.
+    // By definition, this must equal the number of elements in
+    // the final mask.
+    unsigned NumElem = cast<VectorType>(VArgType)->getNumElements();
+    std::vector<Constant*> Mask(NumElem);
+
+    Type *OpType = I->getOperand(0)->getType();
+    unsigned NumInElem = cast<VectorType>(OpType)->getNumElements();
+
+    // For the mask from the first pair...
+    fillNewShuffleMask(Context, I, NumElem, 0, NumInElem, 0, Mask);
+
+    // For the mask from the second pair...
+    fillNewShuffleMask(Context, J, NumElem, NumElem/2, NumInElem, NumInElem,
+                       Mask);
+
+    return ConstantVector::get(Mask);
+  }
+
+  // Returns the value to be used as the specified operand of the vector
+  // instruction that fuses I with J.
+  Value *BBVectorize::getReplacementInput(LLVMContext& Context, Instruction *I,
+                     Instruction *J, unsigned o, bool FlipMemInputs) {
+    Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+    Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
+
+      // Compute the fused vector type for this operand
+    Type *ArgType = I->getOperand(o)->getType();
+    VectorType *VArgType = getVecTypeForPair(ArgType);
+
+    Instruction *L = I, *H = J;
+    if (FlipMemInputs) {
+      L = J;
+      H = I;
+    }
+
+    if (ArgType->isVectorTy()) {
+      unsigned numElem = cast<VectorType>(VArgType)->getNumElements();
+      std::vector<Constant*> Mask(numElem);
+      for (unsigned v = 0; v < numElem; ++v)
+        Mask[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+
+      Instruction *BV = new ShuffleVectorInst(L->getOperand(o),
+                                              H->getOperand(o),
+                                              ConstantVector::get(Mask),
+                                              getReplacementName(I, true, o));
+      BV->insertBefore(J);
+      return BV;
+    }
+
+    // If these two inputs are the output of another vector instruction,
+    // then we should use that output directly. It might be necessary to
+    // permute it first. [When pairings are fused recursively, you can
+    // end up with cases where a large vector is decomposed into scalars
+    // using extractelement instructions, then built into size-2
+    // vectors using insertelement and the into larger vectors using
+    // shuffles. InstCombine does not simplify all of these cases well,
+    // and so we make sure that shuffles are generated here when possible.
+    ExtractElementInst *LEE
+      = dyn_cast<ExtractElementInst>(L->getOperand(o));
+    ExtractElementInst *HEE
+      = dyn_cast<ExtractElementInst>(H->getOperand(o));
+
+    if (LEE && HEE &&
+        LEE->getOperand(0)->getType() == HEE->getOperand(0)->getType()) {
+      VectorType *EEType = cast<VectorType>(LEE->getOperand(0)->getType());
+      unsigned LowIndx = cast<ConstantInt>(LEE->getOperand(1))->getZExtValue();
+      unsigned HighIndx = cast<ConstantInt>(HEE->getOperand(1))->getZExtValue();
+      if (LEE->getOperand(0) == HEE->getOperand(0)) {
+        if (LowIndx == 0 && HighIndx == 1)
+          return LEE->getOperand(0);
+
+        std::vector<Constant*> Mask(2);
+        Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx);
+        Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx);
+
+        Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0),
+                                          UndefValue::get(EEType),
+                                          ConstantVector::get(Mask),
+                                          getReplacementName(I, true, o));
+        BV->insertBefore(J);
+        return BV;
+      }
+
+      std::vector<Constant*> Mask(2);
+      HighIndx += EEType->getNumElements();
+      Mask[0] = ConstantInt::get(Type::getInt32Ty(Context), LowIndx);
+      Mask[1] = ConstantInt::get(Type::getInt32Ty(Context), HighIndx);
+
+      Instruction *BV = new ShuffleVectorInst(LEE->getOperand(0),
+                                          HEE->getOperand(0),
+                                          ConstantVector::get(Mask),
+                                          getReplacementName(I, true, o));
+      BV->insertBefore(J);
+      return BV;
+    }
+
+    Instruction *BV1 = InsertElementInst::Create(
+                                          UndefValue::get(VArgType),
+                                          L->getOperand(o), CV0,
+                                          getReplacementName(I, true, o, 1));
+    BV1->insertBefore(I);
+    Instruction *BV2 = InsertElementInst::Create(BV1, H->getOperand(o),
+                                          CV1,
+                                          getReplacementName(I, true, o, 2));
+    BV2->insertBefore(J);
+    return BV2;
+  }
+
+  // This function creates an array of values that will be used as the inputs
+  // to the vector instruction that fuses I with J.
+  void BBVectorize::getReplacementInputsForPair(LLVMContext& Context,
+                     Instruction *I, Instruction *J,
+                     SmallVector<Value *, 3> &ReplacedOperands,
+                     bool &FlipMemInputs) {
+    FlipMemInputs = false;
+    unsigned NumOperands = I->getNumOperands();
+
+    for (unsigned p = 0, o = NumOperands-1; p < NumOperands; ++p, --o) {
+      // Iterate backward so that we look at the store pointer
+      // first and know whether or not we need to flip the inputs.
+
+      if (isa<LoadInst>(I) || (o == 1 && isa<StoreInst>(I))) {
+        // This is the pointer for a load/store instruction.
+        ReplacedOperands[o] = getReplacementPointerInput(Context, I, J, o,
+                                FlipMemInputs);
+        continue;
+      } else if (isa<CallInst>(I)) {
+        Function *F = cast<CallInst>(I)->getCalledFunction();
+        unsigned IID = F->getIntrinsicID();
+        if (o == NumOperands-1) {
+          BasicBlock &BB = *I->getParent();
+
+          Module *M = BB.getParent()->getParent();
+          Type *ArgType = I->getType();
+          Type *VArgType = getVecTypeForPair(ArgType);
+
+          // FIXME: is it safe to do this here?
+          ReplacedOperands[o] = Intrinsic::getDeclaration(M,
+            (Intrinsic::ID) IID, VArgType);
+          continue;
+        } else if (IID == Intrinsic::powi && o == 1) {
+          // The second argument of powi is a single integer and we've already
+          // checked that both arguments are equal. As a result, we just keep
+          // I's second argument.
+          ReplacedOperands[o] = I->getOperand(o);
+          continue;
+        }
+      } else if (isa<ShuffleVectorInst>(I) && o == NumOperands-1) {
+        ReplacedOperands[o] = getReplacementShuffleMask(Context, I, J);
+        continue;
+      }
+
+      ReplacedOperands[o] =
+        getReplacementInput(Context, I, J, o, FlipMemInputs);
+    }
+  }
+
+  // This function creates two values that represent the outputs of the
+  // original I and J instructions. These are generally vector shuffles
+  // or extracts. In many cases, these will end up being unused and, thus,
+  // eliminated by later passes.
+  void BBVectorize::replaceOutputsOfPair(LLVMContext& Context, Instruction *I,
+                     Instruction *J, Instruction *K,
+                     Instruction *&InsertionPt,
+                     Instruction *&K1, Instruction *&K2,
+                     bool &FlipMemInputs) {
+    Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+    Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), 1);
+
+    if (isa<StoreInst>(I)) {
+      AA->replaceWithNewValue(I, K);
+      AA->replaceWithNewValue(J, K);
+    } else {
+      Type *IType = I->getType();
+      Type *VType = getVecTypeForPair(IType);
+
+      if (IType->isVectorTy()) {
+          unsigned numElem = cast<VectorType>(IType)->getNumElements();
+          std::vector<Constant*> Mask1(numElem), Mask2(numElem);
+          for (unsigned v = 0; v < numElem; ++v) {
+            Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+            Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElem+v);
+          }
+
+          K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
+                                       ConstantVector::get(
+                                         FlipMemInputs ? Mask2 : Mask1),
+                                       getReplacementName(K, false, 1));
+          K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
+                                       ConstantVector::get(
+                                         FlipMemInputs ? Mask1 : Mask2),
+                                       getReplacementName(K, false, 2));
+      } else {
+        K1 = ExtractElementInst::Create(K, FlipMemInputs ? CV1 : CV0,
+                                          getReplacementName(K, false, 1));
+        K2 = ExtractElementInst::Create(K, FlipMemInputs ? CV0 : CV1,
+                                          getReplacementName(K, false, 2));
+      }
+
+      K1->insertAfter(K);
+      K2->insertAfter(K1);
+      InsertionPt = K2;
+    }
+  }
+
+  // Move all uses of the function I (including pairing-induced uses) after J.
+  bool BBVectorize::canMoveUsesOfIAfterJ(BasicBlock &BB,
+                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     Instruction *I, Instruction *J) {
+    // Skip to the first instruction past I.
+    BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+
+    DenseSet<Value *> Users;
+    AliasSetTracker WriteSet(*AA);
+    for (; cast<Instruction>(L) != J; ++L)
+      (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet);
+
+    assert(cast<Instruction>(L) == J &&
+      "Tracking has not proceeded far enough to check for dependencies");
+    // If J is now in the use set of I, then trackUsesOfI will return true
+    // and we have a dependency cycle (and the fusing operation must abort).
+    return !trackUsesOfI(Users, WriteSet, I, J, true, &LoadMoveSet);
+  }
+
+  // Move all uses of the function I (including pairing-induced uses) after J.
+  void BBVectorize::moveUsesOfIAfterJ(BasicBlock &BB,
+                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     Instruction *&InsertionPt,
+                     Instruction *I, Instruction *J) {
+    // Skip to the first instruction past I.
+    BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+
+    DenseSet<Value *> Users;
+    AliasSetTracker WriteSet(*AA);
+    for (; cast<Instruction>(L) != J;) {
+      if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSet)) {
+        // Move this instruction
+        Instruction *InstToMove = L; ++L;
+
+        DEBUG(dbgs() << "BBV: moving: " << *InstToMove <<
+                        " to after " << *InsertionPt << "\n");
+        InstToMove->removeFromParent();
+        InstToMove->insertAfter(InsertionPt);
+        InsertionPt = InstToMove;
+      } else {
+        ++L;
+      }
+    }
+  }
+
+  // Collect all load instruction that are in the move set of a given first
+  // pair member.  These loads depend on the first instruction, I, and so need
+  // to be moved after J (the second instruction) when the pair is fused.
+  void BBVectorize::collectPairLoadMoveSet(BasicBlock &BB,
+                     DenseMap<Value *, Value *> &ChosenPairs,
+                     std::multimap<Value *, Value *> &LoadMoveSet,
+                     Instruction *I) {
+    // Skip to the first instruction past I.
+    BasicBlock::iterator L = llvm::next(BasicBlock::iterator(I));
+
+    DenseSet<Value *> Users;
+    AliasSetTracker WriteSet(*AA);
+
+    // Note: We cannot end the loop when we reach J because J could be moved
+    // farther down the use chain by another instruction pairing. Also, J
+    // could be before I if this is an inverted input.
+    for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) {
+      if (trackUsesOfI(Users, WriteSet, I, L)) {
+        if (L->mayReadFromMemory())
+          LoadMoveSet.insert(ValuePair(L, I));
+      }
+    }
+  }
+
+  // In cases where both load/stores and the computation of their pointers
+  // are chosen for vectorization, we can end up in a situation where the
+  // aliasing analysis starts returning different query results as the
+  // process of fusing instruction pairs continues. Because the algorithm
+  // relies on finding the same use trees here as were found earlier, we'll
+  // need to precompute the necessary aliasing information here and then
+  // manually update it during the fusion process.
+  void BBVectorize::collectLoadMoveSet(BasicBlock &BB,
+                     std::vector<Value *> &PairableInsts,
+                     DenseMap<Value *, Value *> &ChosenPairs,
+                     std::multimap<Value *, Value *> &LoadMoveSet) {
+    for (std::vector<Value *>::iterator PI = PairableInsts.begin(),
+         PIE = PairableInsts.end(); PI != PIE; ++PI) {
+      DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(*PI);
+      if (P == ChosenPairs.end()) continue;
+
+      Instruction *I = cast<Instruction>(P->first);
+      collectPairLoadMoveSet(BB, ChosenPairs, LoadMoveSet, I);
+    }
+  }
+
+  // This function fuses the chosen instruction pairs into vector instructions,
+  // taking care preserve any needed scalar outputs and, then, it reorders the
+  // remaining instructions as needed (users of the first member of the pair
+  // need to be moved to after the location of the second member of the pair
+  // because the vector instruction is inserted in the location of the pair's
+  // second member).
+  void BBVectorize::fuseChosenPairs(BasicBlock &BB,
+                     std::vector<Value *> &PairableInsts,
+                     DenseMap<Value *, Value *> &ChosenPairs) {
+    LLVMContext& Context = BB.getContext();
+
+    // During the vectorization process, the order of the pairs to be fused
+    // could be flipped. So we'll add each pair, flipped, into the ChosenPairs
+    // list. After a pair is fused, the flipped pair is removed from the list.
+    std::vector<ValuePair> FlippedPairs;
+    FlippedPairs.reserve(ChosenPairs.size());
+    for (DenseMap<Value *, Value *>::iterator P = ChosenPairs.begin(),
+         E = ChosenPairs.end(); P != E; ++P)
+      FlippedPairs.push_back(ValuePair(P->second, P->first));
+    for (std::vector<ValuePair>::iterator P = FlippedPairs.begin(),
+         E = FlippedPairs.end(); P != E; ++P)
+      ChosenPairs.insert(*P);
+
+    std::multimap<Value *, Value *> LoadMoveSet;
+    collectLoadMoveSet(BB, PairableInsts, ChosenPairs, LoadMoveSet);
+
+    DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
+
+    for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
+      DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI);
+      if (P == ChosenPairs.end()) {
+        ++PI;
+        continue;
+      }
+
+      if (getDepthFactor(P->first) == 0) {
+        // These instructions are not really fused, but are tracked as though
+        // they are. Any case in which it would be interesting to fuse them
+        // will be taken care of by InstCombine.
+        --NumFusedOps;
+        ++PI;
+        continue;
+      }
+
+      Instruction *I = cast<Instruction>(P->first),
+        *J = cast<Instruction>(P->second);
+
+      DEBUG(dbgs() << "BBV: fusing: " << *I <<
+             " <-> " << *J << "\n");
+
+      // Remove the pair and flipped pair from the list.
+      DenseMap<Value *, Value *>::iterator FP = ChosenPairs.find(P->second);
+      assert(FP != ChosenPairs.end() && "Flipped pair not found in list");
+      ChosenPairs.erase(FP);
+      ChosenPairs.erase(P);
+
+      if (!canMoveUsesOfIAfterJ(BB, LoadMoveSet, I, J)) {
+        DEBUG(dbgs() << "BBV: fusion of: " << *I <<
+               " <-> " << *J <<
+               " aborted because of non-trivial dependency cycle\n");
+        --NumFusedOps;
+        ++PI;
+        continue;
+      }
+
+      bool FlipMemInputs;
+      unsigned NumOperands = I->getNumOperands();
+      SmallVector<Value *, 3> ReplacedOperands(NumOperands);
+      getReplacementInputsForPair(Context, I, J, ReplacedOperands,
+        FlipMemInputs);
+
+      // Make a copy of the original operation, change its type to the vector
+      // type and replace its operands with the vector operands.
+      Instruction *K = I->clone();
+      if (I->hasName()) K->takeName(I);
+
+      if (!isa<StoreInst>(K))
+        K->mutateType(getVecTypeForPair(I->getType()));
+
+      for (unsigned o = 0; o < NumOperands; ++o)
+        K->setOperand(o, ReplacedOperands[o]);
+
+      // If we've flipped the memory inputs, make sure that we take the correct
+      // alignment.
+      if (FlipMemInputs) {
+        if (isa<StoreInst>(K))
+          cast<StoreInst>(K)->setAlignment(cast<StoreInst>(J)->getAlignment());
+        else
+          cast<LoadInst>(K)->setAlignment(cast<LoadInst>(J)->getAlignment());
+      }
+
+      K->insertAfter(J);
+
+      // Instruction insertion point:
+      Instruction *InsertionPt = K;
+      Instruction *K1 = 0, *K2 = 0;
+      replaceOutputsOfPair(Context, I, J, K, InsertionPt, K1, K2,
+        FlipMemInputs);
+
+      // The use tree of the first original instruction must be moved to after
+      // the location of the second instruction. The entire use tree of the
+      // first instruction is disjoint from the input tree of the second
+      // (by definition), and so commutes with it.
+
+      moveUsesOfIAfterJ(BB, LoadMoveSet, InsertionPt, I, J);
+
+      if (!isa<StoreInst>(I)) {
+        I->replaceAllUsesWith(K1);
+        J->replaceAllUsesWith(K2);
+        AA->replaceWithNewValue(I, K1);
+        AA->replaceWithNewValue(J, K2);
+      }
+
+      // Instructions that may read from memory may be in the load move set.
+      // Once an instruction is fused, we no longer need its move set, and so
+      // the values of the map never need to be updated. However, when a load
+      // is fused, we need to merge the entries from both instructions in the
+      // pair in case those instructions were in the move set of some other
+      // yet-to-be-fused pair. The loads in question are the keys of the map.
+      if (I->mayReadFromMemory()) {
+        std::vector<ValuePair> NewSetMembers;
+        VPIteratorPair IPairRange = LoadMoveSet.equal_range(I);
+        VPIteratorPair JPairRange = LoadMoveSet.equal_range(J);
+        for (std::multimap<Value *, Value *>::iterator N = IPairRange.first;
+             N != IPairRange.second; ++N)
+          NewSetMembers.push_back(ValuePair(K, N->second));
+        for (std::multimap<Value *, Value *>::iterator N = JPairRange.first;
+             N != JPairRange.second; ++N)
+          NewSetMembers.push_back(ValuePair(K, N->second));
+        for (std::vector<ValuePair>::iterator A = NewSetMembers.begin(),
+             AE = NewSetMembers.end(); A != AE; ++A)
+          LoadMoveSet.insert(*A);
+      }
+
+      // Before removing I, set the iterator to the next instruction.
+      PI = llvm::next(BasicBlock::iterator(I));
+      if (cast<Instruction>(PI) == J)
+        ++PI;
+
+      SE->forgetValue(I);
+      SE->forgetValue(J);
+      I->eraseFromParent();
+      J->eraseFromParent();
+    }
+
+    DEBUG(dbgs() << "BBV: final: \n" << BB << "\n");
+  }
+}
+
+char BBVectorize::ID = 0;
+static const char bb_vectorize_name[] = "Basic-Block Vectorization";
+INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
+
+BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
+  return new BBVectorize(C);
+}
+
+bool
+llvm::vectorizeBasicBlock(Pass *P, BasicBlock &BB, const VectorizeConfig &C) {
+  BBVectorize BBVectorizer(P, C);
+  return BBVectorizer.vectorizeBB(BB);
+}
+
+//===----------------------------------------------------------------------===//
+VectorizeConfig::VectorizeConfig() {
+  VectorBits = ::VectorBits;
+  VectorizeInts = !::NoInts;
+  VectorizeFloats = !::NoFloats;
+  VectorizeCasts = !::NoCasts;
+  VectorizeMath = !::NoMath;
+  VectorizeFMA = !::NoFMA;
+  VectorizeMemOps = !::NoMemOps;
+  AlignedOnly = ::AlignedOnly;
+  ReqChainDepth= ::ReqChainDepth;
+  SearchLimit = ::SearchLimit;
+  MaxCandPairsForCycleCheck = ::MaxCandPairsForCycleCheck;
+  SplatBreaksChain = ::SplatBreaksChain;
+  MaxInsts = ::MaxInsts;
+  MaxIter = ::MaxIter;
+  NoMemOpBoost = ::NoMemOpBoost;
+  FastDep = ::FastDep;
+}
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
new file mode 100644
index 000000000000..4b6693015ce9
--- /dev/null
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMVectorize
+  BBVectorize.cpp
+  Vectorize.cpp
+  )
diff --git a/lib/Transforms/Vectorize/LLVMBuild.txt b/lib/Transforms/Vectorize/LLVMBuild.txt
new file mode 100644
index 000000000000..7167d273ae50
--- /dev/null
+++ b/lib/Transforms/Vectorize/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Transforms/Scalar/LLVMBuild.txt --------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Vectorize
+parent = Transforms
+library_name = Vectorize
+required_libraries = Analysis Core InstCombine Support Target TransformUtils
+
diff --git a/lib/Transforms/Vectorize/Makefile b/lib/Transforms/Vectorize/Makefile
new file mode 100644
index 000000000000..86c36585f23f
--- /dev/null
+++ b/lib/Transforms/Vectorize/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Vectorize/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMVectorize
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
new file mode 100644
index 000000000000..1ef60029bcf4
--- /dev/null
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -0,0 +1,39 @@
+//===-- Vectorize.cpp -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMVectorizeOpts.a, which 
+// implements several vectorization transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/Vectorize.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Vectorize.h"
+
+using namespace llvm;
+
+/// initializeVectorizationPasses - Initialize all passes linked into the 
+/// Vectorization library.
+void llvm::initializeVectorization(PassRegistry &Registry) {
+  initializeBBVectorizePass(Registry);
+}
+
+void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
+  initializeVectorization(*unwrap(R));
+}
+
+void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createBBVectorizePass());
+}
+
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 18308f27cfe4..7b39efb7c7a0 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -89,7 +89,6 @@ enum PrefixType {
 static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
   assert(!Name.empty() && "Cannot get empty name!");
   switch (Prefix) {
-  default: llvm_unreachable("Bad prefix!");
   case NoPrefix: break;
   case GlobalPrefix: OS << '@'; break;
   case LabelPrefix:  break;
@@ -189,6 +188,7 @@ void TypePrinting::incorporateTypes(const Module &M) {
 void TypePrinting::print(Type *Ty, raw_ostream &OS) {
   switch (Ty->getTypeID()) {
   case Type::VoidTyID:      OS << "void"; break;
+  case Type::HalfTyID:      OS << "half"; break;
   case Type::FloatTyID:     OS << "float"; break;
   case Type::DoubleTyID:    OS << "double"; break;
   case Type::X86_FP80TyID:  OS << "x86_fp80"; break;
@@ -231,7 +231,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
     if (I != NumberedTypes.end())
       OS << '%' << I->second;
     else  // Not enumerated, print the hex address.
-      OS << "%\"type 0x" << STy << '\"';
+      OS << "%\"type " << STy << '\"';
     return;
   }
   case Type::PointerTyID: {
@@ -708,31 +708,37 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
   }
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-    if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble ||
-        &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle) {
+    if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf ||
+        &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle ||
+        &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble) {
       // We would like to output the FP constant value in exponential notation,
       // but we cannot do this if doing so will lose precision.  Check here to
       // make sure that we only output it in exponential format if we can parse
       // the value back and get the same value.
       //
       bool ignored;
+      bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf;
       bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
-      double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
-                              CFP->getValueAPF().convertToFloat();
-      SmallString<128> StrVal;
-      raw_svector_ostream(StrVal) << Val;
-
-      // Check to make sure that the stringized number is not some string like
-      // "Inf" or NaN, that atof will accept, but the lexer will not.  Check
-      // that the string matches the "[-+]?[0-9]" regex.
-      //
-      if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
-          ((StrVal[0] == '-' || StrVal[0] == '+') &&
-           (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
-        // Reparse stringized version!
-        if (atof(StrVal.c_str()) == Val) {
-          Out << StrVal.str();
-          return;
+      bool isInf = CFP->getValueAPF().isInfinity();
+      bool isNaN = CFP->getValueAPF().isNaN();
+      if (!isHalf && !isInf && !isNaN) {
+        double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
+                                CFP->getValueAPF().convertToFloat();
+        SmallString<128> StrVal;
+        raw_svector_ostream(StrVal) << Val;
+
+        // Check to make sure that the stringized number is not some string like
+        // "Inf" or NaN, that atof will accept, but the lexer will not.  Check
+        // that the string matches the "[-+]?[0-9]" regex.
+        //
+        if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+            ((StrVal[0] == '-' || StrVal[0] == '+') &&
+             (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
+          // Reparse stringized version!
+          if (APFloat(APFloat::IEEEdouble, StrVal).convertToDouble() == Val) {
+            Out << StrVal.str();
+            return;
+          }
         }
       }
       // Otherwise we could not reparse it to exactly the same value, so we must
@@ -743,7 +749,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
              "assuming that double is 64 bits!");
       char Buffer[40];
       APFloat apf = CFP->getValueAPF();
-      // Floats are represented in ASCII IR as double, convert.
+      // Halves and floats are represented in ASCII IR as double, convert.
       if (!isDouble)
         apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
                           &ignored);
@@ -823,35 +829,53 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
   }
 
   if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+    Type *ETy = CA->getType()->getElementType();
+    Out << '[';
+    TypePrinter.print(ETy, Out);
+    Out << ' ';
+    WriteAsOperandInternal(Out, CA->getOperand(0),
+                           &TypePrinter, Machine,
+                           Context);
+    for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+      Out << ", ";
+      TypePrinter.print(ETy, Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
+                             Context);
+    }
+    Out << ']';
+    return;
+  }
+  
+  if (const ConstantDataArray *CA = dyn_cast<ConstantDataArray>(CV)) {
     // As a special case, print the array as a string if it is an array of
     // i8 with ConstantInt values.
-    //
-    Type *ETy = CA->getType()->getElementType();
     if (CA->isString()) {
       Out << "c\"";
       PrintEscapedString(CA->getAsString(), Out);
       Out << '"';
-    } else {                // Cannot output in string format...
-      Out << '[';
-      if (CA->getNumOperands()) {
-        TypePrinter.print(ETy, Out);
-        Out << ' ';
-        WriteAsOperandInternal(Out, CA->getOperand(0),
-                               &TypePrinter, Machine,
-                               Context);
-        for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
-          Out << ", ";
-          TypePrinter.print(ETy, Out);
-          Out << ' ';
-          WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
-                                 Context);
-        }
-      }
-      Out << ']';
+      return;
     }
+
+    Type *ETy = CA->getType()->getElementType();
+    Out << '[';
+    TypePrinter.print(ETy, Out);
+    Out << ' ';
+    WriteAsOperandInternal(Out, CA->getElementAsConstant(0),
+                           &TypePrinter, Machine,
+                           Context);
+    for (unsigned i = 1, e = CA->getNumElements(); i != e; ++i) {
+      Out << ", ";
+      TypePrinter.print(ETy, Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, CA->getElementAsConstant(i), &TypePrinter,
+                             Machine, Context);
+    }
+    Out << ']';
     return;
   }
 
+
   if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
     if (CS->getType()->isPacked())
       Out << '<';
@@ -882,21 +906,19 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     return;
   }
 
-  if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
-    Type *ETy = CP->getType()->getElementType();
-    assert(CP->getNumOperands() > 0 &&
-           "Number of operands for a PackedConst must be > 0");
+  if (isa<ConstantVector>(CV) || isa<ConstantDataVector>(CV)) {
+    Type *ETy = CV->getType()->getVectorElementType();
     Out << '<';
     TypePrinter.print(ETy, Out);
     Out << ' ';
-    WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine,
-                           Context);
-    for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+    WriteAsOperandInternal(Out, CV->getAggregateElement(0U), &TypePrinter,
+                           Machine, Context);
+    for (unsigned i = 1, e = CV->getType()->getVectorNumElements(); i != e;++i){
       Out << ", ";
       TypePrinter.print(ETy, Out);
       Out << ' ';
-      WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine,
-                             Context);
+      WriteAsOperandInternal(Out, CV->getAggregateElement(i), &TypePrinter,
+                             Machine, Context);
     }
     Out << '>';
     return;
@@ -1162,7 +1184,6 @@ void AssemblyWriter::writeAtomic(AtomicOrdering Ordering,
     return;
 
   switch (SynchScope) {
-  default: Out << " <bad scope " << int(SynchScope) << ">"; break;
   case SingleThread: Out << " singlethread"; break;
   case CrossThread: break;
   }
@@ -1710,13 +1731,12 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     Out << ", ";
     writeOperand(SI.getDefaultDest(), true);
     Out << " [";
-    // Skip the first item since that's the default case.
-    unsigned NumCases = SI.getNumCases();
-    for (unsigned i = 1; i < NumCases; ++i) {
+    for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end();
+         i != e; ++i) {
       Out << "\n    ";
-      writeOperand(SI.getCaseValue(i), true);
+      writeOperand(i.getCaseValue(), true);
       Out << ", ";
-      writeOperand(SI.getSuccessor(i), true);
+      writeOperand(i.getCaseSuccessor(), true);
     }
     Out << "\n  ]";
   } else if (isa<IndirectBrInst>(I)) {
@@ -1988,7 +2008,7 @@ static void WriteMDNodeComment(const MDNode *Node,
   if (!CI) return;
   APInt Val = CI->getValue();
   APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask);
-  if (Val.ult(LLVMDebugVersion))
+  if (Val.ult(LLVMDebugVersion11))
     return;
 
   Out.PadToColumn(50);
@@ -2110,3 +2130,6 @@ void Type::dump() const { print(dbgs()); }
 
 // Module::dump() - Allow printing of Modules from the debugger.
 void Module::dump() const { print(dbgs(), 0); }
+
+// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
+void NamedMDNode::dump() const { print(dbgs(), 0); }
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 485be75d1b12..c05132be5a44 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -76,6 +76,8 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "naked ";
   if (Attrs & Attribute::NonLazyBind)
     Result += "nonlazybind ";
+  if (Attrs & Attribute::AddressSafety)
+    Result += "address_safety ";
   if (Attrs & Attribute::StackAlignment) {
     Result += "alignstack(";
     Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
@@ -152,8 +154,10 @@ public:
   }
   static void Profile(FoldingSetNodeID &ID, const AttributeWithIndex *Attr,
                       unsigned NumAttrs) {
-    for (unsigned i = 0; i != NumAttrs; ++i)
-      ID.AddInteger(uint64_t(Attr[i].Attrs) << 32 | unsigned(Attr[i].Index));
+    for (unsigned i = 0; i != NumAttrs; ++i) {
+      ID.AddInteger(Attr[i].Attrs.Raw());
+      ID.AddInteger(Attr[i].Index);
+    }
   }
 };
 }
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index b849d3ef8dc2..ea3d4ba22436 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -38,105 +38,31 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     return false;
   Name = Name.substr(5); // Strip off "llvm."
 
-  FunctionType *FTy = F->getFunctionType();
-  Module *M = F->getParent();
-  
   switch (Name[0]) {
   default: break;
-  case 'a':
-    if (Name.startswith("atomic.cmp.swap") ||
-        Name.startswith("atomic.swap") ||
-        Name.startswith("atomic.load.add") ||
-        Name.startswith("atomic.load.sub") ||
-        Name.startswith("atomic.load.and") ||
-        Name.startswith("atomic.load.nand") ||
-        Name.startswith("atomic.load.or") ||
-        Name.startswith("atomic.load.xor") ||
-        Name.startswith("atomic.load.max") ||
-        Name.startswith("atomic.load.min") ||
-        Name.startswith("atomic.load.umax") ||
-        Name.startswith("atomic.load.umin"))
-      return true;
-  case 'i':
-    //  This upgrades the old llvm.init.trampoline to the new
-    //  llvm.init.trampoline and llvm.adjust.trampoline pair.
-    if (Name == "init.trampoline") {
-      // The new llvm.init.trampoline returns nothing.
-      if (FTy->getReturnType()->isVoidTy())
-        break;
-
-      assert(FTy->getNumParams() == 3 && "old init.trampoline takes 3 args!");
-
-      // Change the name of the old intrinsic so that we can play with its type.
-      std::string NameTmp = F->getName();
-      F->setName("");
-      NewFn = cast<Function>(M->getOrInsertFunction(
-                               NameTmp,
-                               Type::getVoidTy(M->getContext()),
-                               FTy->getParamType(0), FTy->getParamType(1),
-                               FTy->getParamType(2), (Type *)0));
+  case 'c': {
+    if (Name.startswith("ctlz.") && F->arg_size() == 1) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
+                                        F->arg_begin()->getType());
       return true;
     }
-  case 'm':
-    if (Name == "memory.barrier")
-      return true;
-  case 'p':
-    //  This upgrades the llvm.prefetch intrinsic to accept one more parameter,
-    //  which is a instruction / data cache identifier. The old version only
-    //  implicitly accepted the data version.
-    if (Name == "prefetch") {
-      // Don't do anything if it has the correct number of arguments already
-      if (FTy->getNumParams() == 4)
-        break;
-
-      assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
-      //  We first need to change the name of the old (bad) intrinsic, because
-      //  its type is incorrect, but we cannot overload that name. We
-      //  arbitrarily unique it here allowing us to construct a correctly named
-      //  and typed function below.
-      std::string NameTmp = F->getName();
-      F->setName("");
-      NewFn = cast<Function>(M->getOrInsertFunction(NameTmp,
-                                                    FTy->getReturnType(),
-                                                    FTy->getParamType(0),
-                                                    FTy->getParamType(1),
-                                                    FTy->getParamType(2),
-                                                    FTy->getParamType(2),
-                                                    (Type*)0));
+    if (Name.startswith("cttz.") && F->arg_size() == 1) {
+      F->setName(Name + ".old");
+      NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
+                                        F->arg_begin()->getType());
       return true;
     }
-
     break;
+  }
   case 'x': {
-    const char *NewFnName = NULL;
-    // This fixes the poorly named crc32 intrinsics.
-    if (Name == "x86.sse42.crc32.8")
-      NewFnName = "llvm.x86.sse42.crc32.32.8";
-    else if (Name == "x86.sse42.crc32.16")
-      NewFnName = "llvm.x86.sse42.crc32.32.16";
-    else if (Name == "x86.sse42.crc32.32")
-      NewFnName = "llvm.x86.sse42.crc32.32.32";
-    else if (Name == "x86.sse42.crc64.8")
-      NewFnName = "llvm.x86.sse42.crc32.64.8";
-    else if (Name == "x86.sse42.crc64.64")
-      NewFnName = "llvm.x86.sse42.crc32.64.64";
-    
-    if (NewFnName) {
-      F->setName(NewFnName);
-      NewFn = F;
+    if (Name.startswith("x86.sse2.pcmpeq.") ||
+        Name.startswith("x86.sse2.pcmpgt.") ||
+        Name.startswith("x86.avx2.pcmpeq.") ||
+        Name.startswith("x86.avx2.pcmpgt.")) {
+      NewFn = 0;
       return true;
     }
-
-    // Calls to these instructions are transformed into unaligned loads.
-    if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" ||
-        Name == "x86.sse2.loadu.pd")
-      return true;
-      
-    // Calls to these instructions are transformed into nontemporal stores.
-    if (Name == "x86.sse.movnt.ps"  || Name == "x86.sse2.movnt.dq" ||
-        Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i")
-      return true;
-
     break;
   }
   }
@@ -171,188 +97,52 @@ bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   Function *F = CI->getCalledFunction();
   LLVMContext &C = CI->getContext();
-  ImmutableCallSite CS(CI);
+  IRBuilder<> Builder(C);
+  Builder.SetInsertPoint(CI->getParent(), CI);
 
-  assert(F && "CallInst has no function associated with it.");
+  assert(F && "Intrinsic call is not direct?");
 
   if (!NewFn) {
-    if (F->getName() == "llvm.x86.sse.loadu.ps" ||
-        F->getName() == "llvm.x86.sse2.loadu.dq" ||
-        F->getName() == "llvm.x86.sse2.loadu.pd") {
-      // Convert to a native, unaligned load.
-      Type *VecTy = CI->getType();
-      Type *IntTy = IntegerType::get(C, 128);
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      Value *BC = Builder.CreateBitCast(CI->getArgOperand(0),
-                                        PointerType::getUnqual(IntTy),
-                                        "cast");
-      LoadInst *LI = Builder.CreateLoad(BC, CI->getName());
-      LI->setAlignment(1);      // Unaligned load.
-      BC = Builder.CreateBitCast(LI, VecTy, "new.cast");
-
-      // Fix up all the uses with our new load.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(BC);
-
-      // Remove intrinsic.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.sse.movnt.ps" ||
-               F->getName() == "llvm.x86.sse2.movnt.dq" ||
-               F->getName() == "llvm.x86.sse2.movnt.pd" ||
-               F->getName() == "llvm.x86.sse2.movnt.i") {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      Module *M = F->getParent();
-      SmallVector<Value *, 1> Elts;
-      Elts.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-      MDNode *Node = MDNode::get(C, Elts);
-
-      Value *Arg0 = CI->getArgOperand(0);
-      Value *Arg1 = CI->getArgOperand(1);
-
-      // Convert the type of the pointer to a pointer to the stored type.
-      Value *BC = Builder.CreateBitCast(Arg0,
-                                        PointerType::getUnqual(Arg1->getType()),
-                                        "cast");
-      StoreInst *SI = Builder.CreateStore(Arg1, BC);
-      SI->setMetadata(M->getMDKindID("nontemporal"), Node);
-      SI->setAlignment(16);
-
-      // Remove intrinsic.
-      CI->eraseFromParent();
-    } else if (F->getName().startswith("llvm.atomic.cmp.swap")) {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-      Value *Val = Builder.CreateAtomicCmpXchg(CI->getArgOperand(0),
-                                               CI->getArgOperand(1),
-                                               CI->getArgOperand(2),
-                                               Monotonic);
-
-      // Replace intrinsic.
-      Val->takeName(CI);
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Val);
-      CI->eraseFromParent();
-    } else if (F->getName().startswith("llvm.atomic")) {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      AtomicRMWInst::BinOp Op;
-      if (F->getName().startswith("llvm.atomic.swap"))
-        Op = AtomicRMWInst::Xchg;
-      else if (F->getName().startswith("llvm.atomic.load.add"))
-        Op = AtomicRMWInst::Add;
-      else if (F->getName().startswith("llvm.atomic.load.sub"))
-        Op = AtomicRMWInst::Sub;
-      else if (F->getName().startswith("llvm.atomic.load.and"))
-        Op = AtomicRMWInst::And;
-      else if (F->getName().startswith("llvm.atomic.load.nand"))
-        Op = AtomicRMWInst::Nand;
-      else if (F->getName().startswith("llvm.atomic.load.or"))
-        Op = AtomicRMWInst::Or;
-      else if (F->getName().startswith("llvm.atomic.load.xor"))
-        Op = AtomicRMWInst::Xor;
-      else if (F->getName().startswith("llvm.atomic.load.max"))
-        Op = AtomicRMWInst::Max;
-      else if (F->getName().startswith("llvm.atomic.load.min"))
-        Op = AtomicRMWInst::Min;
-      else if (F->getName().startswith("llvm.atomic.load.umax"))
-        Op = AtomicRMWInst::UMax;
-      else if (F->getName().startswith("llvm.atomic.load.umin"))
-        Op = AtomicRMWInst::UMin;
-      else
-        llvm_unreachable("Unknown atomic");
-
-      Value *Val = Builder.CreateAtomicRMW(Op, CI->getArgOperand(0),
-                                           CI->getArgOperand(1),
-                                           Monotonic);
-
-      // Replace intrinsic.
-      Val->takeName(CI);
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Val);
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.memory.barrier") {
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // Note that this conversion ignores the "device" bit; it was not really
-      // well-defined, and got abused because nobody paid enough attention to
-      // get it right. In practice, this probably doesn't matter; application
-      // code generally doesn't need anything stronger than
-      // SequentiallyConsistent (and realistically, SequentiallyConsistent
-      // is lowered to a strong enough barrier for almost anything).
-
-      if (cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue())
-        Builder.CreateFence(SequentiallyConsistent);
-      else if (!cast<ConstantInt>(CI->getArgOperand(0))->getZExtValue())
-        Builder.CreateFence(Release);
-      else if (!cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue())
-        Builder.CreateFence(Acquire);
-      else
-        Builder.CreateFence(AcquireRelease);
-
-      // Remove intrinsic.
-      CI->eraseFromParent();
+    // Get the Function's name.
+    StringRef Name = F->getName();
+
+    Value *Rep;
+    // Upgrade packed integer vector compares intrinsics to compare instructions
+    if (Name.startswith("llvm.x86.sse2.pcmpeq.") ||
+        Name.startswith("llvm.x86.avx2.pcmpeq.")) {
+      Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
+                                 "pcmpeq");
+      // need to sign extend since icmp returns vector of i1
+      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+    } else if (Name.startswith("llvm.x86.sse2.pcmpgt.") ||
+               Name.startswith("llvm.x86.avx2.pcmpgt.")) {
+      Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
+                                  "pcmpgt");
+      // need to sign extend since icmp returns vector of i1
+      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
     } else {
       llvm_unreachable("Unknown function for CallInst upgrade.");
     }
-    return;
-  }
 
-  switch (NewFn->getIntrinsicID()) {
-  case Intrinsic::prefetch: {
-    IRBuilder<> Builder(C);
-    Builder.SetInsertPoint(CI->getParent(), CI);
-    llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
-
-    // Add the extra "data cache" argument
-    Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
-                           CI->getArgOperand(2),
-                           llvm::ConstantInt::get(I32Ty, 1) };
-    CallInst *NewCI = CallInst::Create(NewFn, Operands,
-                                       CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the
-      //  correct type.
-      CI->replaceAllUsesWith(NewCI);
-
-    //  Clean up the old call now that it has been completely upgraded.
+    CI->replaceAllUsesWith(Rep);
     CI->eraseFromParent();
-    break;
+    return;
   }
-  case Intrinsic::init_trampoline: {
-
-    //  Transform
-    //    %tramp = call i8* llvm.init.trampoline (i8* x, i8* y, i8* z)
-    //  to
-    //    call void llvm.init.trampoline (i8* %x, i8* %y, i8* %z)
-    //    %tramp = call i8* llvm.adjust.trampoline (i8* %x)
-
-    Function *AdjustTrampolineFn =
-      cast<Function>(Intrinsic::getDeclaration(F->getParent(),
-                                               Intrinsic::adjust_trampoline));
-
-    IRBuilder<> Builder(C);
-    Builder.SetInsertPoint(CI);
 
-    Builder.CreateCall3(NewFn, CI->getArgOperand(0), CI->getArgOperand(1),
-                        CI->getArgOperand(2));
-
-    CallInst *AdjustCall = Builder.CreateCall(AdjustTrampolineFn,
-                                              CI->getArgOperand(0),
-                                              CI->getName());
-    if (!CI->use_empty())
-      CI->replaceAllUsesWith(AdjustCall);
+  switch (NewFn->getIntrinsicID()) {
+  default:
+    llvm_unreachable("Unknown function for CallInst upgrade.");
+
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+    assert(CI->getNumArgOperands() == 1 &&
+           "Mismatch between function args and call args");
+    StringRef Name = CI->getName();
+    CI->setName(Name + ".old");
+    CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0),
+                                               Builder.getFalse(), Name));
     CI->eraseFromParent();
-    break;
-  }
+    return;
   }
 }
 
@@ -378,291 +168,3 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
   }
 }
 
-/// This function strips all debug info intrinsics, except for llvm.dbg.declare.
-/// If an llvm.dbg.declare intrinsic is invalid, then this function simply
-/// strips that use.
-void llvm::CheckDebugInfoIntrinsics(Module *M) {
-  if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
-    while (!FuncStart->use_empty())
-      cast<CallInst>(FuncStart->use_back())->eraseFromParent();
-    FuncStart->eraseFromParent();
-  }
-  
-  if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
-    while (!StopPoint->use_empty())
-      cast<CallInst>(StopPoint->use_back())->eraseFromParent();
-    StopPoint->eraseFromParent();
-  }
-
-  if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
-    while (!RegionStart->use_empty())
-      cast<CallInst>(RegionStart->use_back())->eraseFromParent();
-    RegionStart->eraseFromParent();
-  }
-
-  if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
-    while (!RegionEnd->use_empty())
-      cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
-    RegionEnd->eraseFromParent();
-  }
-  
-  if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
-    if (!Declare->use_empty()) {
-      DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
-      if (!isa<MDNode>(DDI->getArgOperand(0)) ||
-          !isa<MDNode>(DDI->getArgOperand(1))) {
-        while (!Declare->use_empty()) {
-          CallInst *CI = cast<CallInst>(Declare->use_back());
-          CI->eraseFromParent();
-        }
-        Declare->eraseFromParent();
-      }
-    }
-  }
-}
-
-/// FindExnAndSelIntrinsics - Find the eh_exception and eh_selector intrinsic
-/// calls reachable from the unwind basic block.
-static void FindExnAndSelIntrinsics(BasicBlock *BB, CallInst *&Exn,
-                                    CallInst *&Sel,
-                                    SmallPtrSet<BasicBlock*, 8> &Visited) {
-  if (!Visited.insert(BB)) return;
-
-  for (BasicBlock::iterator
-         I = BB->begin(), E = BB->end(); I != E; ++I) {
-    if (CallInst *CI = dyn_cast<CallInst>(I)) {
-      switch (CI->getCalledFunction()->getIntrinsicID()) {
-      default: break;
-      case Intrinsic::eh_exception:
-        assert(!Exn && "Found more than one eh.exception call!");
-        Exn = CI;
-        break;
-      case Intrinsic::eh_selector:
-        assert(!Sel && "Found more than one eh.selector call!");
-        Sel = CI;
-        break;
-      }
-
-      if (Exn && Sel) return;
-    }
-  }
-
-  if (Exn && Sel) return;
-
-  for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
-    FindExnAndSelIntrinsics(*I, Exn, Sel, Visited);
-    if (Exn && Sel) return;
-  }
-}
-
-/// TransferClausesToLandingPadInst - Transfer the exception handling clauses
-/// from the eh_selector call to the new landingpad instruction.
-static void TransferClausesToLandingPadInst(LandingPadInst *LPI,
-                                            CallInst *EHSel) {
-  LLVMContext &Context = LPI->getContext();
-  unsigned N = EHSel->getNumArgOperands();
-
-  for (unsigned i = N - 1; i > 1; --i) {
-    if (const ConstantInt *CI = dyn_cast<ConstantInt>(EHSel->getArgOperand(i))){
-      unsigned FilterLength = CI->getZExtValue();
-      unsigned FirstCatch = i + FilterLength + !FilterLength;
-      assert(FirstCatch <= N && "Invalid filter length");
-
-      if (FirstCatch < N)
-        for (unsigned j = FirstCatch; j < N; ++j) {
-          Value *Val = EHSel->getArgOperand(j);
-          if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") {
-            LPI->addClause(EHSel->getArgOperand(j));
-          } else {
-            GlobalVariable *GV = cast<GlobalVariable>(Val);
-            LPI->addClause(GV->getInitializer());
-          }
-        }
-
-      if (!FilterLength) {
-        // Cleanup.
-        LPI->setCleanup(true);
-      } else {
-        // Filter.
-        SmallVector<Constant *, 4> TyInfo;
-        TyInfo.reserve(FilterLength - 1);
-        for (unsigned j = i + 1; j < FirstCatch; ++j)
-          TyInfo.push_back(cast<Constant>(EHSel->getArgOperand(j)));
-        ArrayType *AType =
-          ArrayType::get(!TyInfo.empty() ? TyInfo[0]->getType() :
-                         PointerType::getUnqual(Type::getInt8Ty(Context)),
-                         TyInfo.size());
-        LPI->addClause(ConstantArray::get(AType, TyInfo));
-      }
-
-      N = i;
-    }
-  }
-
-  if (N > 2)
-    for (unsigned j = 2; j < N; ++j) {
-      Value *Val = EHSel->getArgOperand(j);
-      if (!Val->hasName() || Val->getName() != "llvm.eh.catch.all.value") {
-        LPI->addClause(EHSel->getArgOperand(j));
-      } else {
-        GlobalVariable *GV = cast<GlobalVariable>(Val);
-        LPI->addClause(GV->getInitializer());
-      }
-    }
-}
-
-/// This function upgrades the old pre-3.0 exception handling system to the new
-/// one. N.B. This will be removed in 3.1.
-void llvm::UpgradeExceptionHandling(Module *M) {
-  Function *EHException = M->getFunction("llvm.eh.exception");
-  Function *EHSelector = M->getFunction("llvm.eh.selector");
-  if (!EHException || !EHSelector)
-    return;
-
-  LLVMContext &Context = M->getContext();
-  Type *ExnTy = PointerType::getUnqual(Type::getInt8Ty(Context));
-  Type *SelTy = Type::getInt32Ty(Context);
-  Type *LPadSlotTy = StructType::get(ExnTy, SelTy, NULL);
-
-  // This map links the invoke instruction with the eh.exception and eh.selector
-  // calls associated with it.
-  DenseMap<InvokeInst*, std::pair<Value*, Value*> > InvokeToIntrinsicsMap;
-  for (Module::iterator
-         I = M->begin(), E = M->end(); I != E; ++I) {
-    Function &F = *I;
-
-    for (Function::iterator
-           II = F.begin(), IE = F.end(); II != IE; ++II) {
-      BasicBlock *BB = &*II;
-      InvokeInst *Inst = dyn_cast<InvokeInst>(BB->getTerminator());
-      if (!Inst) continue;
-      BasicBlock *UnwindDest = Inst->getUnwindDest();
-      if (UnwindDest->isLandingPad()) continue; // Already converted.
-
-      SmallPtrSet<BasicBlock*, 8> Visited;
-      CallInst *Exn = 0;
-      CallInst *Sel = 0;
-      FindExnAndSelIntrinsics(UnwindDest, Exn, Sel, Visited);
-      assert(Exn && Sel && "Cannot find eh.exception and eh.selector calls!");
-      InvokeToIntrinsicsMap[Inst] = std::make_pair(Exn, Sel);
-    }
-  }
-
-  // This map stores the slots where the exception object and selector value are
-  // stored within a function.
-  DenseMap<Function*, std::pair<Value*, Value*> > FnToLPadSlotMap;
-  SmallPtrSet<Instruction*, 32> DeadInsts;
-  for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator
-         I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end();
-       I != E; ++I) {
-    InvokeInst *Invoke = I->first;
-    BasicBlock *UnwindDest = Invoke->getUnwindDest();
-    Function *F = UnwindDest->getParent();
-    std::pair<Value*, Value*> EHIntrinsics = I->second;
-    CallInst *Exn = cast<CallInst>(EHIntrinsics.first);
-    CallInst *Sel = cast<CallInst>(EHIntrinsics.second);
-
-    // Store the exception object and selector value in the entry block.
-    Value *ExnSlot = 0;
-    Value *SelSlot = 0;
-    if (!FnToLPadSlotMap[F].first) {
-      BasicBlock *Entry = &F->front();
-      ExnSlot = new AllocaInst(ExnTy, "exn", Entry->getTerminator());
-      SelSlot = new AllocaInst(SelTy, "sel", Entry->getTerminator());
-      FnToLPadSlotMap[F] = std::make_pair(ExnSlot, SelSlot);
-    } else {
-      ExnSlot = FnToLPadSlotMap[F].first;
-      SelSlot = FnToLPadSlotMap[F].second;
-    }
-
-    if (!UnwindDest->getSinglePredecessor()) {
-      // The unwind destination doesn't have a single predecessor. Create an
-      // unwind destination which has only one predecessor.
-      BasicBlock *NewBB = BasicBlock::Create(Context, "new.lpad",
-                                             UnwindDest->getParent());
-      BranchInst::Create(UnwindDest, NewBB);
-      Invoke->setUnwindDest(NewBB);
-
-      // Fix up any PHIs in the original unwind destination block.
-      for (BasicBlock::iterator
-             II = UnwindDest->begin(); isa<PHINode>(II); ++II) {
-        PHINode *PN = cast<PHINode>(II);
-        int Idx = PN->getBasicBlockIndex(Invoke->getParent());
-        if (Idx == -1) continue;
-        PN->setIncomingBlock(Idx, NewBB);
-      }
-
-      UnwindDest = NewBB;
-    }
-
-    IRBuilder<> Builder(Context);
-    Builder.SetInsertPoint(UnwindDest, UnwindDest->getFirstInsertionPt());
-
-    Value *PersFn = Sel->getArgOperand(1);
-    LandingPadInst *LPI = Builder.CreateLandingPad(LPadSlotTy, PersFn, 0);
-    Value *LPExn = Builder.CreateExtractValue(LPI, 0);
-    Value *LPSel = Builder.CreateExtractValue(LPI, 1);
-    Builder.CreateStore(LPExn, ExnSlot);
-    Builder.CreateStore(LPSel, SelSlot);
-
-    TransferClausesToLandingPadInst(LPI, Sel);
-
-    DeadInsts.insert(Exn);
-    DeadInsts.insert(Sel);
-  }
-
-  // Replace the old intrinsic calls with the values from the landingpad
-  // instruction(s). These values were stored in allocas for us to use here.
-  for (DenseMap<InvokeInst*, std::pair<Value*, Value*> >::iterator
-         I = InvokeToIntrinsicsMap.begin(), E = InvokeToIntrinsicsMap.end();
-       I != E; ++I) {
-    std::pair<Value*, Value*> EHIntrinsics = I->second;
-    CallInst *Exn = cast<CallInst>(EHIntrinsics.first);
-    CallInst *Sel = cast<CallInst>(EHIntrinsics.second);
-    BasicBlock *Parent = Exn->getParent();
-
-    std::pair<Value*,Value*> ExnSelSlots = FnToLPadSlotMap[Parent->getParent()];
-
-    IRBuilder<> Builder(Context);
-    Builder.SetInsertPoint(Parent, Exn);
-    LoadInst *LPExn = Builder.CreateLoad(ExnSelSlots.first, "exn.load");
-    LoadInst *LPSel = Builder.CreateLoad(ExnSelSlots.second, "sel.load");
-
-    Exn->replaceAllUsesWith(LPExn);
-    Sel->replaceAllUsesWith(LPSel);
-  }
-
-  // Remove the dead instructions.
-  for (SmallPtrSet<Instruction*, 32>::iterator
-         I = DeadInsts.begin(), E = DeadInsts.end(); I != E; ++I) {
-    Instruction *Inst = *I;
-    Inst->eraseFromParent();
-  }
-
-  // Replace calls to "llvm.eh.resume" with the 'resume' instruction. Load the
-  // exception and selector values from the stored place.
-  Function *EHResume = M->getFunction("llvm.eh.resume");
-  if (!EHResume) return;
-
-  while (!EHResume->use_empty()) {
-    CallInst *Resume = cast<CallInst>(EHResume->use_back());
-    BasicBlock *BB = Resume->getParent();
-
-    IRBuilder<> Builder(Context);
-    Builder.SetInsertPoint(BB, Resume);
-
-    Value *LPadVal =
-      Builder.CreateInsertValue(UndefValue::get(LPadSlotTy),
-                                Resume->getArgOperand(0), 0, "lpad.val");
-    LPadVal = Builder.CreateInsertValue(LPadVal, Resume->getArgOperand(1),
-                                        1, "lpad.val");
-    Builder.CreateResume(LPadVal);
-
-    // Remove all instructions after the 'resume.'
-    BasicBlock::iterator I = Resume;
-    while (I != BB->end()) {
-      Instruction *Inst = &*I++;
-      Inst->eraseFromParent();
-    }
-  }
-}
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index d0aa275c8fe2..d353b0adcff7 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -366,3 +366,6 @@ bool BasicBlock::isLandingPad() const {
 LandingPadInst *BasicBlock::getLandingPadInst() {
   return dyn_cast<LandingPadInst>(getFirstNonPHI());
 }
+const LandingPadInst *BasicBlock::getLandingPadInst() const {
+  return dyn_cast<LandingPadInst>(getFirstNonPHI());
+}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 0404297500c0..e1efcdadc711 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -8,7 +8,6 @@ add_llvm_library(LLVMCore
   ConstantFold.cpp
   Constants.cpp
   Core.cpp
-  DebugInfoProbe.cpp
   DebugLoc.cpp
   Dominators.cpp
   Function.cpp
@@ -37,5 +36,3 @@ add_llvm_library(LLVMCore
   ValueTypes.cpp
   Verifier.cpp
   )
-
-add_llvm_library_dependencies(LLVMCore LLVMSupport)
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 30bae7162cea..b743287adf39 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -38,11 +38,10 @@ using namespace llvm;
 //                ConstantFold*Instruction Implementations
 //===----------------------------------------------------------------------===//
 
-/// BitCastConstantVector - Convert the specified ConstantVector node to the
+/// BitCastConstantVector - Convert the specified vector Constant node to the
 /// specified vector type.  At this point, we know that the elements of the
 /// input vector constant are all simple integer or FP values.
-static Constant *BitCastConstantVector(ConstantVector *CV,
-                                       VectorType *DstTy) {
+static Constant *BitCastConstantVector(Constant *CV, VectorType *DstTy) {
 
   if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
   if (CV->isNullValue()) return Constant::getNullValue(DstTy);
@@ -51,22 +50,21 @@ static Constant *BitCastConstantVector(ConstantVector *CV,
   // doing so requires endianness information.  This should be handled by
   // Analysis/ConstantFolding.cpp
   unsigned NumElts = DstTy->getNumElements();
-  if (NumElts != CV->getNumOperands())
+  if (NumElts != CV->getType()->getVectorNumElements())
     return 0;
+  
+  Type *DstEltTy = DstTy->getElementType();
 
   // Check to verify that all elements of the input are simple.
+  SmallVector<Constant*, 16> Result;
   for (unsigned i = 0; i != NumElts; ++i) {
-    if (!isa<ConstantInt>(CV->getOperand(i)) &&
-        !isa<ConstantFP>(CV->getOperand(i)))
-      return 0;
+    Constant *C = CV->getAggregateElement(i);
+    if (C == 0) return 0;
+    C = ConstantExpr::getBitCast(C, DstEltTy);
+    if (isa<ConstantExpr>(C)) return 0;
+    Result.push_back(C);
   }
 
-  // Bitcast each element now.
-  std::vector<Constant*> Result;
-  Type *DstEltTy = DstTy->getElementType();
-  for (unsigned i = 0; i != NumElts; ++i)
-    Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i),
-                                                    DstEltTy));
   return ConstantVector::get(Result);
 }
 
@@ -104,7 +102,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
   // the first element.  If so, return the appropriate GEP instruction.
   if (PointerType *PTy = dyn_cast<PointerType>(V->getType()))
     if (PointerType *DPTy = dyn_cast<PointerType>(DestTy))
-      if (PTy->getAddressSpace() == DPTy->getAddressSpace()) {
+      if (PTy->getAddressSpace() == DPTy->getAddressSpace()
+          && DPTy->getElementType()->isSized()) {
         SmallVector<Value*, 8> IdxList;
         Value *Zero =
           Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
@@ -141,8 +140,8 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
       if (isa<ConstantAggregateZero>(V))
         return Constant::getNullValue(DestTy);
 
-      if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
-        return BitCastConstantVector(CV, DestPTy);
+      // Handle ConstantVector and ConstantAggregateVector.
+      return BitCastConstantVector(V, DestPTy);
     }
 
     // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
@@ -548,18 +547,17 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
   // If the cast operand is a constant vector, perform the cast by
   // operating on each element. In the cast of bitcasts, the element
   // count may be mismatched; don't attempt to handle that here.
-  if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
-    if (DestTy->isVectorTy() &&
-        cast<VectorType>(DestTy)->getNumElements() ==
-        CV->getType()->getNumElements()) {
-      std::vector<Constant*> res;
-      VectorType *DestVecTy = cast<VectorType>(DestTy);
-      Type *DstEltTy = DestVecTy->getElementType();
-      for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-        res.push_back(ConstantExpr::getCast(opc,
-                                            CV->getOperand(i), DstEltTy));
-      return ConstantVector::get(res);
-    }
+  if ((isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) &&
+      DestTy->isVectorTy() &&
+      DestTy->getVectorNumElements() == V->getType()->getVectorNumElements()) {
+    SmallVector<Constant*, 16> res;
+    VectorType *DestVecTy = cast<VectorType>(DestTy);
+    Type *DstEltTy = DestVecTy->getElementType();
+    for (unsigned i = 0, e = V->getType()->getVectorNumElements(); i != e; ++i)
+      res.push_back(ConstantExpr::getCast(opc,
+                                          V->getAggregateElement(i), DstEltTy));
+    return ConstantVector::get(res);
+  }
 
   // We actually have to do a cast now. Perform the cast according to the
   // opcode specified.
@@ -571,7 +569,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
     if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
       bool ignored;
       APFloat Val = FPC->getValueAPF();
-      Val.convert(DestTy->isFloatTy() ? APFloat::IEEEsingle :
+      Val.convert(DestTy->isHalfTy() ? APFloat::IEEEhalf :
+                  DestTy->isFloatTy() ? APFloat::IEEEsingle :
                   DestTy->isDoubleTy() ? APFloat::IEEEdouble :
                   DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
                   DestTy->isFP128Ty() ? APFloat::IEEEquad :
@@ -690,45 +689,27 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
 
 Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
                                               Constant *V1, Constant *V2) {
-  if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
-    return CB->getZExtValue() ? V1 : V2;
-
-  // Check for zero aggregate and ConstantVector of zeros
+  // Check for i1 and vector true/false conditions.
   if (Cond->isNullValue()) return V2;
-
-  if (ConstantVector* CondV = dyn_cast<ConstantVector>(Cond)) {
-
-    if (CondV->isAllOnesValue()) return V1;
-
-    VectorType *VTy = cast<VectorType>(V1->getType());
-    ConstantVector *CP1 = dyn_cast<ConstantVector>(V1);
-    ConstantVector *CP2 = dyn_cast<ConstantVector>(V2);
-
-    if ((CP1 || isa<ConstantAggregateZero>(V1)) &&
-        (CP2 || isa<ConstantAggregateZero>(V2))) {
-
-      // Find the element type of the returned vector
-      Type *EltTy = VTy->getElementType();
-      unsigned NumElem = VTy->getNumElements();
-      std::vector<Constant*> Res(NumElem);
-
-      bool Valid = true;
-      for (unsigned i = 0; i < NumElem; ++i) {
-        ConstantInt* c = dyn_cast<ConstantInt>(CondV->getOperand(i));
-        if (!c) {
-          Valid = false;
-          break;
-        }
-        Constant *C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-        Constant *C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-        Res[i] = c->getZExtValue() ? C1 : C2;
-      }
-      // If we were able to build the vector, return it
-      if (Valid) return ConstantVector::get(Res);
+  if (Cond->isAllOnesValue()) return V1;
+
+  // If the condition is a vector constant, fold the result elementwise.
+  if (ConstantVector *CondV = dyn_cast<ConstantVector>(Cond)) {
+    SmallVector<Constant*, 16> Result;
+    for (unsigned i = 0, e = V1->getType()->getVectorNumElements(); i != e;++i){
+      ConstantInt *Cond = dyn_cast<ConstantInt>(CondV->getOperand(i));
+      if (Cond == 0) break;
+      
+      Constant *Res = (Cond->getZExtValue() ? V2 : V1)->getAggregateElement(i);
+      if (Res == 0) break;
+      Result.push_back(Res);
     }
+    
+    // If we were able to build the vector, return it.
+    if (Result.size() == V1->getType()->getVectorNumElements())
+      return ConstantVector::get(Result);
   }
 
-
   if (isa<UndefValue>(Cond)) {
     if (isa<UndefValue>(V1)) return V1;
     return V2;
@@ -754,22 +735,19 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
 Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
                                                       Constant *Idx) {
   if (isa<UndefValue>(Val))  // ee(undef, x) -> undef
-    return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
+    return UndefValue::get(Val->getType()->getVectorElementType());
   if (Val->isNullValue())  // ee(zero, x) -> zero
-    return Constant::getNullValue(
-                          cast<VectorType>(Val->getType())->getElementType());
-
-  if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
-    if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
-      uint64_t Index = CIdx->getZExtValue();
-      if (Index >= CVal->getNumOperands())
-        // ee({w,x,y,z}, wrong_value) -> undef
-        return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
-      return CVal->getOperand(CIdx->getZExtValue());
-    } else if (isa<UndefValue>(Idx)) {
-      // ee({w,x,y,z}, undef) -> undef
-      return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
-    }
+    return Constant::getNullValue(Val->getType()->getVectorElementType());
+  // ee({w,x,y,z}, undef) -> undef
+  if (isa<UndefValue>(Idx))
+    return UndefValue::get(Val->getType()->getVectorElementType());
+
+  if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
+    uint64_t Index = CIdx->getZExtValue();
+    // ee({w,x,y,z}, wrong_value) -> undef
+    if (Index >= Val->getType()->getVectorNumElements())
+      return UndefValue::get(Val->getType()->getVectorElementType());
+    return Val->getAggregateElement(Index);
   }
   return 0;
 }
@@ -779,103 +757,55 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
                                                      Constant *Idx) {
   ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
   if (!CIdx) return 0;
-  APInt idxVal = CIdx->getValue();
-  if (isa<UndefValue>(Val)) { 
-    // Insertion of scalar constant into vector undef
-    // Optimize away insertion of undef
-    if (isa<UndefValue>(Elt))
-      return Val;
-    // Otherwise break the aggregate undef into multiple undefs and do
-    // the insertion
-    unsigned numOps = 
-      cast<VectorType>(Val->getType())->getNumElements();
-    std::vector<Constant*> Ops; 
-    Ops.reserve(numOps);
-    for (unsigned i = 0; i < numOps; ++i) {
-      Constant *Op =
-        (idxVal == i) ? Elt : UndefValue::get(Elt->getType());
-      Ops.push_back(Op);
-    }
-    return ConstantVector::get(Ops);
-  }
-  if (isa<ConstantAggregateZero>(Val)) {
-    // Insertion of scalar constant into vector aggregate zero
-    // Optimize away insertion of zero
-    if (Elt->isNullValue())
-      return Val;
-    // Otherwise break the aggregate zero into multiple zeros and do
-    // the insertion
-    unsigned numOps = 
-      cast<VectorType>(Val->getType())->getNumElements();
-    std::vector<Constant*> Ops; 
-    Ops.reserve(numOps);
-    for (unsigned i = 0; i < numOps; ++i) {
-      Constant *Op =
-        (idxVal == i) ? Elt : Constant::getNullValue(Elt->getType());
-      Ops.push_back(Op);
-    }
-    return ConstantVector::get(Ops);
-  }
-  if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
-    // Insertion of scalar constant into vector constant
-    std::vector<Constant*> Ops; 
-    Ops.reserve(CVal->getNumOperands());
-    for (unsigned i = 0; i < CVal->getNumOperands(); ++i) {
-      Constant *Op =
-        (idxVal == i) ? Elt : cast<Constant>(CVal->getOperand(i));
-      Ops.push_back(Op);
+  const APInt &IdxVal = CIdx->getValue();
+  
+  SmallVector<Constant*, 16> Result;
+  for (unsigned i = 0, e = Val->getType()->getVectorNumElements(); i != e; ++i){
+    if (i == IdxVal) {
+      Result.push_back(Elt);
+      continue;
     }
-    return ConstantVector::get(Ops);
+    
+    if (Constant *C = Val->getAggregateElement(i))
+      Result.push_back(C);
+    else
+      return 0;
   }
-
-  return 0;
-}
-
-/// GetVectorElement - If C is a ConstantVector, ConstantAggregateZero or Undef
-/// return the specified element value.  Otherwise return null.
-static Constant *GetVectorElement(Constant *C, unsigned EltNo) {
-  if (ConstantVector *CV = dyn_cast<ConstantVector>(C))
-    return CV->getOperand(EltNo);
-
-  Type *EltTy = cast<VectorType>(C->getType())->getElementType();
-  if (isa<ConstantAggregateZero>(C))
-    return Constant::getNullValue(EltTy);
-  if (isa<UndefValue>(C))
-    return UndefValue::get(EltTy);
-  return 0;
+  
+  return ConstantVector::get(Result);
 }
 
 Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
                                                      Constant *V2,
                                                      Constant *Mask) {
+  unsigned MaskNumElts = Mask->getType()->getVectorNumElements();
+  Type *EltTy = V1->getType()->getVectorElementType();
+
   // Undefined shuffle mask -> undefined value.
-  if (isa<UndefValue>(Mask)) return UndefValue::get(V1->getType());
+  if (isa<UndefValue>(Mask))
+    return UndefValue::get(VectorType::get(EltTy, MaskNumElts));
 
-  unsigned MaskNumElts = cast<VectorType>(Mask->getType())->getNumElements();
-  unsigned SrcNumElts = cast<VectorType>(V1->getType())->getNumElements();
-  Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+  // Don't break the bitcode reader hack.
+  if (isa<ConstantExpr>(Mask)) return 0;
+  
+  unsigned SrcNumElts = V1->getType()->getVectorNumElements();
 
   // Loop over the shuffle mask, evaluating each element.
   SmallVector<Constant*, 32> Result;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
-    Constant *InElt = GetVectorElement(Mask, i);
-    if (InElt == 0) return 0;
-
-    if (isa<UndefValue>(InElt))
-      InElt = UndefValue::get(EltTy);
-    else if (ConstantInt *CI = dyn_cast<ConstantInt>(InElt)) {
-      unsigned Elt = CI->getZExtValue();
-      if (Elt >= SrcNumElts*2)
-        InElt = UndefValue::get(EltTy);
-      else if (Elt >= SrcNumElts)
-        InElt = GetVectorElement(V2, Elt - SrcNumElts);
-      else
-        InElt = GetVectorElement(V1, Elt);
-      if (InElt == 0) return 0;
-    } else {
-      // Unknown value.
-      return 0;
+    int Elt = ShuffleVectorInst::getMaskValue(Mask, i);
+    if (Elt == -1) {
+      Result.push_back(UndefValue::get(EltTy));
+      continue;
     }
+    Constant *InElt;
+    if (unsigned(Elt) >= SrcNumElts*2)
+      InElt = UndefValue::get(EltTy);
+    else if (unsigned(Elt) >= SrcNumElts)
+      InElt = V2->getAggregateElement(Elt - SrcNumElts);
+    else
+      InElt = V1->getAggregateElement(Elt);
+    if (InElt == 0) return 0;
     Result.push_back(InElt);
   }
 
@@ -888,26 +818,10 @@ Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
   if (Idxs.empty())
     return Agg;
 
-  if (isa<UndefValue>(Agg))  // ev(undef, x) -> undef
-    return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(),
-                                                            Idxs));
-
-  if (isa<ConstantAggregateZero>(Agg))  // ev(0, x) -> 0
-    return
-      Constant::getNullValue(ExtractValueInst::getIndexedType(Agg->getType(),
-                                                              Idxs));
-
-  // Otherwise recurse.
-  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg))
-    return ConstantFoldExtractValueInstruction(CS->getOperand(Idxs[0]),
-                                               Idxs.slice(1));
-
-  if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg))
-    return ConstantFoldExtractValueInstruction(CA->getOperand(Idxs[0]),
-                                               Idxs.slice(1));
-  ConstantVector *CV = cast<ConstantVector>(Agg);
-  return ConstantFoldExtractValueInstruction(CV->getOperand(Idxs[0]),
-                                             Idxs.slice(1));
+  if (Constant *C = Agg->getAggregateElement(Idxs[0]))
+    return ConstantFoldExtractValueInstruction(C, Idxs.slice(1));
+
+  return 0;
 }
 
 Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
@@ -917,84 +831,30 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
   if (Idxs.empty())
     return Val;
 
-  if (isa<UndefValue>(Agg)) {
-    // Insertion of constant into aggregate undef
-    // Optimize away insertion of undef.
-    if (isa<UndefValue>(Val))
-      return Agg;
-    
-    // Otherwise break the aggregate undef into multiple undefs and do
-    // the insertion.
-    CompositeType *AggTy = cast<CompositeType>(Agg->getType());
-    unsigned numOps;
-    if (ArrayType *AR = dyn_cast<ArrayType>(AggTy))
-      numOps = AR->getNumElements();
-    else
-      numOps = cast<StructType>(AggTy)->getNumElements();
-    
-    std::vector<Constant*> Ops(numOps); 
-    for (unsigned i = 0; i < numOps; ++i) {
-      Type *MemberTy = AggTy->getTypeAtIndex(i);
-      Constant *Op =
-        (Idxs[0] == i) ?
-        ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
-                                           Val, Idxs.slice(1)) :
-        UndefValue::get(MemberTy);
-      Ops[i] = Op;
-    }
-    
-    if (StructType* ST = dyn_cast<StructType>(AggTy))
-      return ConstantStruct::get(ST, Ops);
-    return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
-  }
+  unsigned NumElts;
+  if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
+    NumElts = ST->getNumElements();
+  else if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
+    NumElts = AT->getNumElements();
+  else
+    NumElts = AT->getVectorNumElements();
   
-  if (isa<ConstantAggregateZero>(Agg)) {
-    // Insertion of constant into aggregate zero
-    // Optimize away insertion of zero.
-    if (Val->isNullValue())
-      return Agg;
-    
-    // Otherwise break the aggregate zero into multiple zeros and do
-    // the insertion.
-    CompositeType *AggTy = cast<CompositeType>(Agg->getType());
-    unsigned numOps;
-    if (ArrayType *AR = dyn_cast<ArrayType>(AggTy))
-      numOps = AR->getNumElements();
-    else
-      numOps = cast<StructType>(AggTy)->getNumElements();
+  SmallVector<Constant*, 32> Result;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    Constant *C = Agg->getAggregateElement(i);
+    if (C == 0) return 0;
     
-    std::vector<Constant*> Ops(numOps);
-    for (unsigned i = 0; i < numOps; ++i) {
-      Type *MemberTy = AggTy->getTypeAtIndex(i);
-      Constant *Op =
-        (Idxs[0] == i) ?
-        ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
-                                           Val, Idxs.slice(1)) :
-        Constant::getNullValue(MemberTy);
-      Ops[i] = Op;
-    }
+    if (Idxs[0] == i)
+      C = ConstantFoldInsertValueInstruction(C, Val, Idxs.slice(1));
     
-    if (StructType *ST = dyn_cast<StructType>(AggTy))
-      return ConstantStruct::get(ST, Ops);
-    return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+    Result.push_back(C);
   }
   
-  if (isa<ConstantStruct>(Agg) || isa<ConstantArray>(Agg)) {
-    // Insertion of constant into aggregate constant.
-    std::vector<Constant*> Ops(Agg->getNumOperands());
-    for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
-      Constant *Op = cast<Constant>(Agg->getOperand(i));
-      if (Idxs[0] == i)
-        Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs.slice(1));
-      Ops[i] = Op;
-    }
-    
-    if (StructType* ST = dyn_cast<StructType>(Agg->getType()))
-      return ConstantStruct::get(ST, Ops);
-    return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
-  }
-
-  return 0;
+  if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
+    return ConstantStruct::get(ST, Result);
+  if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
+    return ConstantArray::get(AT, Result);
+  return ConstantVector::get(Result);
 }
 
 
@@ -1172,7 +1032,6 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
   // At this point we know neither constant is an UndefValue.
   if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
     if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
-      using namespace APIntOps;
       const APInt &C1V = CI1->getValue();
       const APInt &C2V = CI2->getValue();
       switch (Opcode) {
@@ -1269,145 +1128,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       }
     }
   } else if (VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
-    ConstantVector *CP1 = dyn_cast<ConstantVector>(C1);
-    ConstantVector *CP2 = dyn_cast<ConstantVector>(C2);
-    if ((CP1 != NULL || isa<ConstantAggregateZero>(C1)) &&
-        (CP2 != NULL || isa<ConstantAggregateZero>(C2))) {
-      std::vector<Constant*> Res;
-      Type* EltTy = VTy->getElementType();  
-      Constant *C1 = 0;
-      Constant *C2 = 0;
-      switch (Opcode) {
-      default:
-        break;
-      case Instruction::Add:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getAdd(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::FAdd:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getFAdd(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::Sub:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getSub(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::FSub:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getFSub(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::Mul:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getMul(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::FMul:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getFMul(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::UDiv:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getUDiv(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::SDiv:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getSDiv(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::FDiv:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getFDiv(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::URem:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getURem(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::SRem:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getSRem(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::FRem:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getFRem(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::And: 
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getAnd(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::Or:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getOr(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::Xor:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getXor(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::LShr:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getLShr(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::AShr:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getAShr(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      case Instruction::Shl:
-        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
-          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
-          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
-          Res.push_back(ConstantExpr::getShl(C1, C2));
-        }
-        return ConstantVector::get(Res);
-      }
+    // Perform elementwise folding.
+    SmallVector<Constant*, 16> Result;
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *LHS = C1->getAggregateElement(i);
+      Constant *RHS = C2->getAggregateElement(i);
+      if (LHS == 0 || RHS == 0) break;
+      
+      Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
     }
+    
+    if (Result.size() == VTy->getNumElements())
+      return ConstantVector::get(Result);
   }
 
   if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
@@ -1906,7 +1638,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
     APInt V1 = cast<ConstantInt>(C1)->getValue();
     APInt V2 = cast<ConstantInt>(C2)->getValue();
     switch (pred) {
-    default: llvm_unreachable("Invalid ICmp Predicate"); return 0;
+    default: llvm_unreachable("Invalid ICmp Predicate");
     case ICmpInst::ICMP_EQ:  return ConstantInt::get(ResultTy, V1 == V2);
     case ICmpInst::ICMP_NE:  return ConstantInt::get(ResultTy, V1 != V2);
     case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
@@ -1923,7 +1655,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
     APFloat C2V = cast<ConstantFP>(C2)->getValueAPF();
     APFloat::cmpResult R = C1V.compare(C2V);
     switch (pred) {
-    default: llvm_unreachable("Invalid FCmp Predicate"); return 0;
+    default: llvm_unreachable("Invalid FCmp Predicate");
     case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy);
     case FCmpInst::FCMP_TRUE:  return Constant::getAllOnesValue(ResultTy);
     case FCmpInst::FCMP_UNO:
@@ -1962,20 +1694,20 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
                                         R==APFloat::cmpEqual);
     }
   } else if (C1->getType()->isVectorTy()) {
-    SmallVector<Constant*, 16> C1Elts, C2Elts;
-    C1->getVectorElements(C1Elts);
-    C2->getVectorElements(C2Elts);
-    if (C1Elts.empty() || C2Elts.empty())
-      return 0;
-
     // If we can constant fold the comparison of each element, constant fold
     // the whole vector comparison.
     SmallVector<Constant*, 4> ResElts;
     // Compare the elements, producing an i1 result or constant expr.
-    for (unsigned i = 0, e = C1Elts.size(); i != e; ++i)
-      ResElts.push_back(ConstantExpr::getCompare(pred, C1Elts[i], C2Elts[i]));
-
-    return ConstantVector::get(ResElts);
+    for (unsigned i = 0, e = C1->getType()->getVectorNumElements(); i != e;++i){
+      Constant *C1E = C1->getAggregateElement(i);
+      Constant *C2E = C2->getAggregateElement(i);
+      if (C1E == 0 || C2E == 0) break;
+      
+      ResElts.push_back(ConstantExpr::getCompare(pred, C1E, C2E));
+    }
+    
+    if (ResElts.size() == C1->getType()->getVectorNumElements())
+      return ConstantVector::get(ResElts);
   }
 
   if (C1->getType()->isFloatingPointTy()) {
@@ -2209,7 +1941,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
            I != E; ++I)
         LastTy = *I;
 
-      if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
+      if ((LastTy && isa<SequentialType>(LastTy)) || Idx0->isNullValue()) {
         SmallVector<Value*, 16> NewIndices;
         NewIndices.reserve(Idxs.size() + CE->getNumOperands());
         for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index a84a046bb998..6dbc1449f245 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -40,6 +40,8 @@ using namespace llvm;
 //                              Constant Class
 //===----------------------------------------------------------------------===//
 
+void Constant::anchor() { }
+
 bool Constant::isNegativeZeroValue() const {
   // Floating point values have an explicit -0.0 value.
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
@@ -71,17 +73,27 @@ bool Constant::isAllOnesValue() const {
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
     return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue();
 
-  // Check for constant vectors
+  // Check for constant vectors which are splats of -1 values.
   if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
-    return CV->isAllOnesValue();
+    if (Constant *Splat = CV->getSplatValue())
+      return Splat->isAllOnesValue();
+
+  // Check for constant vectors which are splats of -1 values.
+  if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+    if (Constant *Splat = CV->getSplatValue())
+      return Splat->isAllOnesValue();
 
   return false;
 }
+
 // Constructor to create a '0' constant of arbitrary type...
 Constant *Constant::getNullValue(Type *Ty) {
   switch (Ty->getTypeID()) {
   case Type::IntegerTyID:
     return ConstantInt::get(Ty, 0);
+  case Type::HalfTyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEhalf));
   case Type::FloatTyID:
     return ConstantFP::get(Ty->getContext(),
                            APFloat::getZero(APFloat::IEEEsingle));
@@ -105,8 +117,7 @@ Constant *Constant::getNullValue(Type *Ty) {
     return ConstantAggregateZero::get(Ty);
   default:
     // Function, Label, or Opaque type?
-    assert(0 && "Cannot create a null constant of that type!");
-    return 0;
+    llvm_unreachable("Cannot create a null constant of that type!");
   }
 }
 
@@ -122,7 +133,7 @@ Constant *Constant::getIntegerValue(Type *Ty, const APInt &V) {
 
   // Broadcast a scalar to a vector, if necessary.
   if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    C = ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+    C = ConstantVector::getSplat(VTy->getNumElements(), C);
 
   return C;
 }
@@ -138,13 +149,44 @@ Constant *Constant::getAllOnesValue(Type *Ty) {
     return ConstantFP::get(Ty->getContext(), FL);
   }
 
-  SmallVector<Constant*, 16> Elts;
   VectorType *VTy = cast<VectorType>(Ty);
-  Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType()));
-  assert(Elts[0] && "Invalid AllOnes value!");
-  return cast<ConstantVector>(ConstantVector::get(Elts));
+  return ConstantVector::getSplat(VTy->getNumElements(),
+                                  getAllOnesValue(VTy->getElementType()));
 }
 
+/// getAggregateElement - For aggregates (struct/array/vector) return the
+/// constant that corresponds to the specified element if possible, or null if
+/// not.  This can return null if the element index is a ConstantExpr, or if
+/// 'this' is a constant expr.
+Constant *Constant::getAggregateElement(unsigned Elt) const {
+  if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(this))
+    return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : 0;
+  
+  if (const ConstantArray *CA = dyn_cast<ConstantArray>(this))
+    return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : 0;
+  
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+    return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : 0;
+  
+  if (const ConstantAggregateZero *CAZ =dyn_cast<ConstantAggregateZero>(this))
+    return CAZ->getElementValue(Elt);
+  
+  if (const UndefValue *UV = dyn_cast<UndefValue>(this))
+    return UV->getElementValue(Elt);
+  
+  if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
+    return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt) : 0;
+  return 0;
+}
+
+Constant *Constant::getAggregateElement(Constant *Elt) const {
+  assert(isa<IntegerType>(Elt->getType()) && "Index must be an integer");
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Elt))
+    return getAggregateElement(CI->getZExtValue());
+  return 0;
+}
+
+
 void Constant::destroyConstantImpl() {
   // When a Constant is destroyed, there may be lingering
   // references to the constant by other constants in the constant pool.  These
@@ -163,8 +205,7 @@ void Constant::destroyConstantImpl() {
     }
 #endif
     assert(isa<Constant>(V) && "References remain to Constant being destroyed");
-    Constant *CV = cast<Constant>(V);
-    CV->destroyConstant();
+    cast<Constant>(V)->destroyConstant();
 
     // The constant should remove itself from our use list...
     assert((use_empty() || use_back() != V) && "Constant not removed!");
@@ -270,36 +311,6 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
   return Result;
 }
 
-
-/// getVectorElements - This method, which is only valid on constant of vector
-/// type, returns the elements of the vector in the specified smallvector.
-/// This handles breaking down a vector undef into undef elements, etc.  For
-/// constant exprs and other cases we can't handle, we return an empty vector.
-void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
-  assert(getType()->isVectorTy() && "Not a vector constant!");
-  
-  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
-    for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i)
-      Elts.push_back(CV->getOperand(i));
-    return;
-  }
-  
-  VectorType *VT = cast<VectorType>(getType());
-  if (isa<ConstantAggregateZero>(this)) {
-    Elts.assign(VT->getNumElements(), 
-                Constant::getNullValue(VT->getElementType()));
-    return;
-  }
-  
-  if (isa<UndefValue>(this)) {
-    Elts.assign(VT->getNumElements(), UndefValue::get(VT->getElementType()));
-    return;
-  }
-  
-  // Unknown type, must be constant expr etc.
-}
-
-
 /// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
 /// it.  This involves recursively eliminating any dead users of the
 /// constantexpr.
@@ -358,6 +369,8 @@ void Constant::removeDeadConstantUsers() const {
 //                                ConstantInt
 //===----------------------------------------------------------------------===//
 
+void ConstantInt::anchor() { }
+
 ConstantInt::ConstantInt(IntegerType *Ty, const APInt& V)
   : Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
   assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
@@ -385,9 +398,8 @@ Constant *ConstantInt::getTrue(Type *Ty) {
   }
   assert(VTy->getElementType()->isIntegerTy(1) &&
          "True must be vector of i1 or i1.");
-  SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
-                                   ConstantInt::getTrue(Ty->getContext()));
-  return ConstantVector::get(Splat);
+  return ConstantVector::getSplat(VTy->getNumElements(),
+                                  ConstantInt::getTrue(Ty->getContext()));
 }
 
 Constant *ConstantInt::getFalse(Type *Ty) {
@@ -398,9 +410,8 @@ Constant *ConstantInt::getFalse(Type *Ty) {
   }
   assert(VTy->getElementType()->isIntegerTy(1) &&
          "False must be vector of i1 or i1.");
-  SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
-                                   ConstantInt::getFalse(Ty->getContext()));
-  return ConstantVector::get(Splat);
+  return ConstantVector::getSplat(VTy->getNumElements(),
+                                  ConstantInt::getFalse(Ty->getContext()));
 }
 
 
@@ -424,18 +435,17 @@ Constant *ConstantInt::get(Type *Ty, uint64_t V, bool isSigned) {
 
   // For vectors, broadcast the value.
   if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return ConstantVector::get(SmallVector<Constant*,
-                                           16>(VTy->getNumElements(), C));
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
 
   return C;
 }
 
-ConstantInt* ConstantInt::get(IntegerType* Ty, uint64_t V, 
+ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V, 
                               bool isSigned) {
   return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
 }
 
-ConstantInt* ConstantInt::getSigned(IntegerType* Ty, int64_t V) {
+ConstantInt *ConstantInt::getSigned(IntegerType *Ty, int64_t V) {
   return get(Ty, V, true);
 }
 
@@ -443,20 +453,19 @@ Constant *ConstantInt::getSigned(Type *Ty, int64_t V) {
   return get(Ty, V, true);
 }
 
-Constant *ConstantInt::get(Type* Ty, const APInt& V) {
+Constant *ConstantInt::get(Type *Ty, const APInt& V) {
   ConstantInt *C = get(Ty->getContext(), V);
   assert(C->getType() == Ty->getScalarType() &&
          "ConstantInt type doesn't match the type implied by its value!");
 
   // For vectors, broadcast the value.
   if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return ConstantVector::get(
-      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
 
   return C;
 }
 
-ConstantInt* ConstantInt::get(IntegerType* Ty, StringRef Str,
+ConstantInt *ConstantInt::get(IntegerType* Ty, StringRef Str,
                               uint8_t radix) {
   return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
 }
@@ -466,6 +475,8 @@ ConstantInt* ConstantInt::get(IntegerType* Ty, StringRef Str,
 //===----------------------------------------------------------------------===//
 
 static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
+  if (Ty->isHalfTy())
+    return &APFloat::IEEEhalf;
   if (Ty->isFloatTy())
     return &APFloat::IEEEsingle;
   if (Ty->isDoubleTy())
@@ -479,10 +490,12 @@ static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
   return &APFloat::PPCDoubleDouble;
 }
 
+void ConstantFP::anchor() { }
+
 /// get() - This returns a constant fp for the specified value in the
 /// specified type.  This should only be used for simple constant values like
 /// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-Constant *ConstantFP::get(Type* Ty, double V) {
+Constant *ConstantFP::get(Type *Ty, double V) {
   LLVMContext &Context = Ty->getContext();
   
   APFloat FV(V);
@@ -493,14 +506,13 @@ Constant *ConstantFP::get(Type* Ty, double V) {
 
   // For vectors, broadcast the value.
   if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return ConstantVector::get(
-      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
 
   return C;
 }
 
 
-Constant *ConstantFP::get(Type* Ty, StringRef Str) {
+Constant *ConstantFP::get(Type *Ty, StringRef Str) {
   LLVMContext &Context = Ty->getContext();
 
   APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
@@ -508,31 +520,28 @@ Constant *ConstantFP::get(Type* Ty, StringRef Str) {
 
   // For vectors, broadcast the value.
   if (VectorType *VTy = dyn_cast<VectorType>(Ty))
-    return ConstantVector::get(
-      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+    return ConstantVector::getSplat(VTy->getNumElements(), C);
 
   return C; 
 }
 
 
-ConstantFP* ConstantFP::getNegativeZero(Type* Ty) {
+ConstantFP *ConstantFP::getNegativeZero(Type *Ty) {
   LLVMContext &Context = Ty->getContext();
-  APFloat apf = cast <ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
+  APFloat apf = cast<ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
   apf.changeSign();
   return get(Context, apf);
 }
 
 
-Constant *ConstantFP::getZeroValueForNegation(Type* Ty) {
-  if (VectorType *PTy = dyn_cast<VectorType>(Ty))
-    if (PTy->getElementType()->isFloatingPointTy()) {
-      SmallVector<Constant*, 16> zeros(PTy->getNumElements(),
-                           getNegativeZero(PTy->getElementType()));
-      return ConstantVector::get(zeros);
-    }
-
-  if (Ty->isFloatingPointTy()) 
-    return getNegativeZero(Ty);
+Constant *ConstantFP::getZeroValueForNegation(Type *Ty) {
+  Type *ScalarTy = Ty->getScalarType();
+  if (ScalarTy->isFloatingPointTy()) {
+    Constant *C = getNegativeZero(ScalarTy);
+    if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+      return ConstantVector::getSplat(VTy->getNumElements(), C);
+    return C;
+  }
 
   return Constant::getNullValue(Ty);
 }
@@ -548,7 +557,9 @@ ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
     
   if (!Slot) {
     Type *Ty;
-    if (&V.getSemantics() == &APFloat::IEEEsingle)
+    if (&V.getSemantics() == &APFloat::IEEEhalf)
+      Ty = Type::getHalfTy(Context);
+    else if (&V.getSemantics() == &APFloat::IEEEsingle)
       Ty = Type::getFloatTy(Context);
     else if (&V.getSemantics() == &APFloat::IEEEdouble)
       Ty = Type::getDoubleTy(Context);
@@ -584,9 +595,83 @@ bool ConstantFP::isExactlyValue(const APFloat &V) const {
 }
 
 //===----------------------------------------------------------------------===//
+//                   ConstantAggregateZero Implementation
+//===----------------------------------------------------------------------===//
+
+/// getSequentialElement - If this CAZ has array or vector type, return a zero
+/// with the right element type.
+Constant *ConstantAggregateZero::getSequentialElement() const {
+  return Constant::getNullValue(getType()->getSequentialElementType());
+}
+
+/// getStructElement - If this CAZ has struct type, return a zero with the
+/// right element type for the specified element.
+Constant *ConstantAggregateZero::getStructElement(unsigned Elt) const {
+  return Constant::getNullValue(getType()->getStructElementType(Elt));
+}
+
+/// getElementValue - Return a zero of the right value for the specified GEP
+/// index if we can, otherwise return null (e.g. if C is a ConstantExpr).
+Constant *ConstantAggregateZero::getElementValue(Constant *C) const {
+  if (isa<SequentialType>(getType()))
+    return getSequentialElement();
+  return getStructElement(cast<ConstantInt>(C)->getZExtValue());
+}
+
+/// getElementValue - Return a zero of the right value for the specified GEP
+/// index.
+Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
+  if (isa<SequentialType>(getType()))
+    return getSequentialElement();
+  return getStructElement(Idx);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         UndefValue Implementation
+//===----------------------------------------------------------------------===//
+
+/// getSequentialElement - If this undef has array or vector type, return an
+/// undef with the right element type.
+UndefValue *UndefValue::getSequentialElement() const {
+  return UndefValue::get(getType()->getSequentialElementType());
+}
+
+/// getStructElement - If this undef has struct type, return a zero with the
+/// right element type for the specified element.
+UndefValue *UndefValue::getStructElement(unsigned Elt) const {
+  return UndefValue::get(getType()->getStructElementType(Elt));
+}
+
+/// getElementValue - Return an undef of the right value for the specified GEP
+/// index if we can, otherwise return null (e.g. if C is a ConstantExpr).
+UndefValue *UndefValue::getElementValue(Constant *C) const {
+  if (isa<SequentialType>(getType()))
+    return getSequentialElement();
+  return getStructElement(cast<ConstantInt>(C)->getZExtValue());
+}
+
+/// getElementValue - Return an undef of the right value for the specified GEP
+/// index.
+UndefValue *UndefValue::getElementValue(unsigned Idx) const {
+  if (isa<SequentialType>(getType()))
+    return getSequentialElement();
+  return getStructElement(Idx);
+}
+
+
+
+//===----------------------------------------------------------------------===//
 //                            ConstantXXX Classes
 //===----------------------------------------------------------------------===//
 
+template <typename ItTy, typename EltTy>
+static bool rangeOnlyContains(ItTy Start, ItTy End, EltTy Elt) {
+  for (; Start != End; ++Start)
+    if (*Start != Elt)
+      return false;
+  return true;
+}
 
 ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
   : Constant(T, ConstantArrayVal,
@@ -601,45 +686,97 @@ ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
 }
 
 Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
+  // Empty arrays are canonicalized to ConstantAggregateZero.
+  if (V.empty())
+    return ConstantAggregateZero::get(Ty);
+
   for (unsigned i = 0, e = V.size(); i != e; ++i) {
     assert(V[i]->getType() == Ty->getElementType() &&
            "Wrong type in array element initializer");
   }
   LLVMContextImpl *pImpl = Ty->getContext().pImpl;
-  // If this is an all-zero array, return a ConstantAggregateZero object
-  if (!V.empty()) {
-    Constant *C = V[0];
-    if (!C->isNullValue())
-      return pImpl->ArrayConstants.getOrCreate(Ty, V);
-    
-    for (unsigned i = 1, e = V.size(); i != e; ++i)
-      if (V[i] != C)
-        return pImpl->ArrayConstants.getOrCreate(Ty, V);
-  }
   
-  return ConstantAggregateZero::get(Ty);
-}
+  // If this is an all-zero array, return a ConstantAggregateZero object.  If
+  // all undef, return an UndefValue, if "all simple", then return a
+  // ConstantDataArray.
+  Constant *C = V[0];
+  if (isa<UndefValue>(C) && rangeOnlyContains(V.begin(), V.end(), C))
+    return UndefValue::get(Ty);
 
-/// ConstantArray::get(const string&) - Return an array that is initialized to
-/// contain the specified string.  If length is zero then a null terminator is 
-/// added to the specified string so that it may be used in a natural way. 
-/// Otherwise, the length parameter specifies how much of the string to use 
-/// and it won't be null terminated.
-///
-Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
-                             bool AddNull) {
-  std::vector<Constant*> ElementVals;
-  ElementVals.reserve(Str.size() + size_t(AddNull));
-  for (unsigned i = 0; i < Str.size(); ++i)
-    ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i]));
-
-  // Add a null terminator to the string...
-  if (AddNull) {
-    ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0));
+  if (C->isNullValue() && rangeOnlyContains(V.begin(), V.end(), C))
+    return ConstantAggregateZero::get(Ty);
+
+  // Check to see if all of the elements are ConstantFP or ConstantInt and if
+  // the element type is compatible with ConstantDataVector.  If so, use it.
+  if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
+    // We speculatively build the elements here even if it turns out that there
+    // is a constantexpr or something else weird in the array, since it is so
+    // uncommon for that to happen.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+      if (CI->getType()->isIntegerTy(8)) {
+        SmallVector<uint8_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(16)) {
+        SmallVector<uint16_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(32)) {
+        SmallVector<uint32_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(64)) {
+        SmallVector<uint64_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      }
+    }
+    
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+      if (CFP->getType()->isFloatTy()) {
+        SmallVector<float, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+            Elts.push_back(CFP->getValueAPF().convertToFloat());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      } else if (CFP->getType()->isDoubleTy()) {
+        SmallVector<double, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+            Elts.push_back(CFP->getValueAPF().convertToDouble());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataArray::get(C->getContext(), Elts);
+      }
+    }
   }
 
-  ArrayType *ATy = ArrayType::get(Type::getInt8Ty(Context), ElementVals.size());
-  return get(ATy, ElementVals);
+  // Otherwise, we really do want to create a ConstantArray.
+  return pImpl->ArrayConstants.getOrCreate(Ty, V);
 }
 
 /// getTypeForElements - Return an anonymous struct type to use for a constant
@@ -647,9 +784,10 @@ Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
 StructType *ConstantStruct::getTypeForElements(LLVMContext &Context,
                                                ArrayRef<Constant*> V,
                                                bool Packed) {
-  SmallVector<Type*, 16> EltTypes;
-  for (unsigned i = 0, e = V.size(); i != e; ++i)
-    EltTypes.push_back(V[i]->getType());
+  unsigned VecSize = V.size();
+  SmallVector<Type*, 16> EltTypes(VecSize);
+  for (unsigned i = 0; i != VecSize; ++i)
+    EltTypes[i] = V[i]->getType();
   
   return StructType::get(Context, EltTypes, Packed);
 }
@@ -677,14 +815,31 @@ ConstantStruct::ConstantStruct(StructType *T, ArrayRef<Constant *> V)
 
 // ConstantStruct accessors.
 Constant *ConstantStruct::get(StructType *ST, ArrayRef<Constant*> V) {
-  // Create a ConstantAggregateZero value if all elements are zeros.
-  for (unsigned i = 0, e = V.size(); i != e; ++i)
-    if (!V[i]->isNullValue())
-      return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
-
   assert((ST->isOpaque() || ST->getNumElements() == V.size()) &&
          "Incorrect # elements specified to ConstantStruct::get");
-  return ConstantAggregateZero::get(ST);
+
+  // Create a ConstantAggregateZero value if all elements are zeros.
+  bool isZero = true;
+  bool isUndef = false;
+  
+  if (!V.empty()) {
+    isUndef = isa<UndefValue>(V[0]);
+    isZero = V[0]->isNullValue();
+    if (isUndef || isZero) {
+      for (unsigned i = 0, e = V.size(); i != e; ++i) {
+        if (!V[i]->isNullValue())
+          isZero = false;
+        if (!isa<UndefValue>(V[i]))
+          isUndef = false;
+      }
+    }
+  }  
+  if (isZero)
+    return ConstantAggregateZero::get(ST);
+  if (isUndef)
+    return UndefValue::get(ST);
+    
+  return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
 }
 
 Constant *ConstantStruct::get(StructType *T, ...) {
@@ -731,10 +886,93 @@ Constant *ConstantVector::get(ArrayRef<Constant*> V) {
     return ConstantAggregateZero::get(T);
   if (isUndef)
     return UndefValue::get(T);
+   
+  // Check to see if all of the elements are ConstantFP or ConstantInt and if
+  // the element type is compatible with ConstantDataVector.  If so, use it.
+  if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
+    // We speculatively build the elements here even if it turns out that there
+    // is a constantexpr or something else weird in the array, since it is so
+    // uncommon for that to happen.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+      if (CI->getType()->isIntegerTy(8)) {
+        SmallVector<uint8_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(16)) {
+        SmallVector<uint16_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(32)) {
+        SmallVector<uint32_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      } else if (CI->getType()->isIntegerTy(64)) {
+        SmallVector<uint64_t, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
+            Elts.push_back(CI->getZExtValue());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      }
+    }
     
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+      if (CFP->getType()->isFloatTy()) {
+        SmallVector<float, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+            Elts.push_back(CFP->getValueAPF().convertToFloat());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      } else if (CFP->getType()->isDoubleTy()) {
+        SmallVector<double, 16> Elts;
+        for (unsigned i = 0, e = V.size(); i != e; ++i)
+          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
+            Elts.push_back(CFP->getValueAPF().convertToDouble());
+          else
+            break;
+        if (Elts.size() == V.size())
+          return ConstantDataVector::get(C->getContext(), Elts);
+      }
+    }
+  }
+  
+  // Otherwise, the element type isn't compatible with ConstantDataVector, or
+  // the operand list constants a ConstantExpr or something else strange.
   return pImpl->VectorConstants.getOrCreate(T, V);
 }
 
+Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) {
+  // If this splat is compatible with ConstantDataVector, use it instead of
+  // ConstantVector.
+  if ((isa<ConstantFP>(V) || isa<ConstantInt>(V)) &&
+      ConstantDataSequential::isElementTypeCompatible(V->getType()))
+    return ConstantDataVector::getSplat(NumElts, V);
+  
+  SmallVector<Constant*, 32> Elts(NumElts, V);
+  return get(Elts);
+}
+
+
 // Utility function for determining if a ConstantExpr is a CastOp or not. This
 // can't be inline because we don't want to #include Instruction.h into
 // Constant.h
@@ -793,66 +1031,16 @@ unsigned ConstantExpr::getPredicate() const {
 /// one, but with the specified operand set to the specified value.
 Constant *
 ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
-  assert(OpNo < getNumOperands() && "Operand num is out of range!");
   assert(Op->getType() == getOperand(OpNo)->getType() &&
          "Replacing operand with value of different type!");
   if (getOperand(OpNo) == Op)
     return const_cast<ConstantExpr*>(this);
+
+  SmallVector<Constant*, 8> NewOps;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    NewOps.push_back(i == OpNo ? Op : getOperand(i));
   
-  Constant *Op0, *Op1, *Op2;
-  switch (getOpcode()) {
-  case Instruction::Trunc:
-  case Instruction::ZExt:
-  case Instruction::SExt:
-  case Instruction::FPTrunc:
-  case Instruction::FPExt:
-  case Instruction::UIToFP:
-  case Instruction::SIToFP:
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-  case Instruction::PtrToInt:
-  case Instruction::IntToPtr:
-  case Instruction::BitCast:
-    return ConstantExpr::getCast(getOpcode(), Op, getType());
-  case Instruction::Select:
-    Op0 = (OpNo == 0) ? Op : getOperand(0);
-    Op1 = (OpNo == 1) ? Op : getOperand(1);
-    Op2 = (OpNo == 2) ? Op : getOperand(2);
-    return ConstantExpr::getSelect(Op0, Op1, Op2);
-  case Instruction::InsertElement:
-    Op0 = (OpNo == 0) ? Op : getOperand(0);
-    Op1 = (OpNo == 1) ? Op : getOperand(1);
-    Op2 = (OpNo == 2) ? Op : getOperand(2);
-    return ConstantExpr::getInsertElement(Op0, Op1, Op2);
-  case Instruction::ExtractElement:
-    Op0 = (OpNo == 0) ? Op : getOperand(0);
-    Op1 = (OpNo == 1) ? Op : getOperand(1);
-    return ConstantExpr::getExtractElement(Op0, Op1);
-  case Instruction::ShuffleVector:
-    Op0 = (OpNo == 0) ? Op : getOperand(0);
-    Op1 = (OpNo == 1) ? Op : getOperand(1);
-    Op2 = (OpNo == 2) ? Op : getOperand(2);
-    return ConstantExpr::getShuffleVector(Op0, Op1, Op2);
-  case Instruction::GetElementPtr: {
-    SmallVector<Constant*, 8> Ops;
-    Ops.resize(getNumOperands()-1);
-    for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
-      Ops[i-1] = getOperand(i);
-    if (OpNo == 0)
-      return
-        ConstantExpr::getGetElementPtr(Op, Ops,
-                                       cast<GEPOperator>(this)->isInBounds());
-    Ops[OpNo-1] = Op;
-    return
-      ConstantExpr::getGetElementPtr(getOperand(0), Ops,
-                                     cast<GEPOperator>(this)->isInBounds());
-  }
-  default:
-    assert(getNumOperands() == 2 && "Must be binary operator?");
-    Op0 = (OpNo == 0) ? Op : getOperand(0);
-    Op1 = (OpNo == 1) ? Op : getOperand(1);
-    return ConstantExpr::get(getOpcode(), Op0, Op1, SubclassOptionalData);
-  }
+  return getWithOperands(NewOps);
 }
 
 /// getWithOperands - This returns the current constant expression with the
@@ -888,12 +1076,15 @@ getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
     return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
   case Instruction::ExtractElement:
     return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+  case Instruction::InsertValue:
+    return ConstantExpr::getInsertValue(Ops[0], Ops[1], getIndices());
+  case Instruction::ExtractValue:
+    return ConstantExpr::getExtractValue(Ops[0], getIndices());
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
   case Instruction::GetElementPtr:
-    return
-      ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
-                                     cast<GEPOperator>(this)->isInBounds());
+    return ConstantExpr::getGetElementPtr(Ops[0], Ops.slice(1),
+                                      cast<GEPOperator>(this)->isInBounds());
   case Instruction::ICmp:
   case Instruction::FCmp:
     return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
@@ -908,8 +1099,8 @@ getWithOperands(ArrayRef<Constant*> Ops, Type *Ty) const {
 //                      isValueValidForType implementations
 
 bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) {
-  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
-  if (Ty == Type::getInt1Ty(Ty->getContext()))
+  unsigned NumBits = Ty->getIntegerBitWidth(); // assert okay
+  if (Ty->isIntegerTy(1))
     return Val == 0 || Val == 1;
   if (NumBits >= 64)
     return true; // always true, has to fit in largest type
@@ -918,8 +1109,8 @@ bool ConstantInt::isValueValidForType(Type *Ty, uint64_t Val) {
 }
 
 bool ConstantInt::isValueValidForType(Type *Ty, int64_t Val) {
-  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
-  if (Ty == Type::getInt1Ty(Ty->getContext()))
+  unsigned NumBits = Ty->getIntegerBitWidth();
+  if (Ty->isIntegerTy(1))
     return Val == 0 || Val == 1 || Val == -1;
   if (NumBits >= 64)
     return true; // always true, has to fit in largest type
@@ -937,6 +1128,12 @@ bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
     return false;         // These can't be represented as floating point!
 
   // FIXME rounding mode needs to be more flexible
+  case Type::HalfTyID: {
+    if (&Val2.getSemantics() == &APFloat::IEEEhalf)
+      return true;
+    Val2.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &losesInfo);
+    return !losesInfo;
+  }
   case Type::FloatTyID: {
     if (&Val2.getSemantics() == &APFloat::IEEEsingle)
       return true;
@@ -944,42 +1141,50 @@ bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
     return !losesInfo;
   }
   case Type::DoubleTyID: {
-    if (&Val2.getSemantics() == &APFloat::IEEEsingle ||
+    if (&Val2.getSemantics() == &APFloat::IEEEhalf ||
+        &Val2.getSemantics() == &APFloat::IEEEsingle ||
         &Val2.getSemantics() == &APFloat::IEEEdouble)
       return true;
     Val2.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
     return !losesInfo;
   }
   case Type::X86_FP80TyID:
-    return &Val2.getSemantics() == &APFloat::IEEEsingle || 
+    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+           &Val2.getSemantics() == &APFloat::IEEEsingle || 
            &Val2.getSemantics() == &APFloat::IEEEdouble ||
            &Val2.getSemantics() == &APFloat::x87DoubleExtended;
   case Type::FP128TyID:
-    return &Val2.getSemantics() == &APFloat::IEEEsingle || 
+    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+           &Val2.getSemantics() == &APFloat::IEEEsingle || 
            &Val2.getSemantics() == &APFloat::IEEEdouble ||
            &Val2.getSemantics() == &APFloat::IEEEquad;
   case Type::PPC_FP128TyID:
-    return &Val2.getSemantics() == &APFloat::IEEEsingle || 
+    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
+           &Val2.getSemantics() == &APFloat::IEEEsingle || 
            &Val2.getSemantics() == &APFloat::IEEEdouble ||
            &Val2.getSemantics() == &APFloat::PPCDoubleDouble;
   }
 }
 
+
 //===----------------------------------------------------------------------===//
 //                      Factory Function Implementation
 
-ConstantAggregateZero* ConstantAggregateZero::get(Type* Ty) {
+ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) {
   assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
          "Cannot create an aggregate zero of non-aggregate type!");
   
-  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
-  return pImpl->AggZeroConstants.getOrCreate(Ty, 0);
+  ConstantAggregateZero *&Entry = Ty->getContext().pImpl->CAZConstants[Ty];
+  if (Entry == 0)
+    Entry = new ConstantAggregateZero(Ty);
+  
+  return Entry;
 }
 
-/// destroyConstant - Remove the constant from the constant table...
+/// destroyConstant - Remove the constant from the constant table.
 ///
 void ConstantAggregateZero::destroyConstant() {
-  getType()->getContext().pImpl->AggZeroConstants.remove(this);
+  getContext().pImpl->CAZConstants.erase(getType());
   destroyConstantImpl();
 }
 
@@ -990,69 +1195,6 @@ void ConstantArray::destroyConstant() {
   destroyConstantImpl();
 }
 
-/// isString - This method returns true if the array is an array of i8, and 
-/// if the elements of the array are all ConstantInt's.
-bool ConstantArray::isString() const {
-  // Check the element type for i8...
-  if (!getType()->getElementType()->isIntegerTy(8))
-    return false;
-  // Check the elements to make sure they are all integers, not constant
-  // expressions.
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (!isa<ConstantInt>(getOperand(i)))
-      return false;
-  return true;
-}
-
-/// isCString - This method returns true if the array is a string (see
-/// isString) and it ends in a null byte \\0 and does not contains any other
-/// null bytes except its terminator.
-bool ConstantArray::isCString() const {
-  // Check the element type for i8...
-  if (!getType()->getElementType()->isIntegerTy(8))
-    return false;
-
-  // Last element must be a null.
-  if (!getOperand(getNumOperands()-1)->isNullValue())
-    return false;
-  // Other elements must be non-null integers.
-  for (unsigned i = 0, e = getNumOperands()-1; i != e; ++i) {
-    if (!isa<ConstantInt>(getOperand(i)))
-      return false;
-    if (getOperand(i)->isNullValue())
-      return false;
-  }
-  return true;
-}
-
-
-/// convertToString - Helper function for getAsString() and getAsCString().
-static std::string convertToString(const User *U, unsigned len) {
-  std::string Result;
-  Result.reserve(len);
-  for (unsigned i = 0; i != len; ++i)
-    Result.push_back((char)cast<ConstantInt>(U->getOperand(i))->getZExtValue());
-  return Result;
-}
-
-/// getAsString - If this array is isString(), then this method converts the
-/// array to an std::string and returns it.  Otherwise, it asserts out.
-///
-std::string ConstantArray::getAsString() const {
-  assert(isString() && "Not a string!");
-  return convertToString(this, getNumOperands());
-}
-
-
-/// getAsCString - If this array is isCString(), then this method converts the
-/// array (without the trailing null byte) to an std::string and returns it.
-/// Otherwise, it asserts out.
-///
-std::string ConstantArray::getAsCString() const {
-  assert(isCString() && "Not a string!");
-  return convertToString(this, getNumOperands() - 1);
-}
-
 
 //---- ConstantStruct::get() implementation...
 //
@@ -1071,26 +1213,6 @@ void ConstantVector::destroyConstant() {
   destroyConstantImpl();
 }
 
-/// This function will return true iff every element in this vector constant
-/// is set to all ones.
-/// @returns true iff this constant's elements are all set to all ones.
-/// @brief Determine if the value is all ones.
-bool ConstantVector::isAllOnesValue() const {
-  // Check out first element.
-  const Constant *Elt = getOperand(0);
-  const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
-  const ConstantFP *CF = dyn_cast<ConstantFP>(Elt);
-
-  // Then make sure all remaining elements point to the same value.
-  for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
-    if (getOperand(I) != Elt)
-      return false;
-  
-  // First value is all-ones.
-  return (CI && CI->isAllOnesValue()) || 
-         (CF && CF->isAllOnesValue());
-}
-
 /// getSplatValue - If this is a splat constant, where all of the
 /// elements have the same value, return that value. Otherwise return null.
 Constant *ConstantVector::getSplatValue() const {
@@ -1107,13 +1229,18 @@ Constant *ConstantVector::getSplatValue() const {
 //
 
 ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) {
-  return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0);
+  ConstantPointerNull *&Entry = Ty->getContext().pImpl->CPNConstants[Ty];
+  if (Entry == 0)
+    Entry = new ConstantPointerNull(Ty);
+  
+  return Entry;
 }
 
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantPointerNull::destroyConstant() {
-  getType()->getContext().pImpl->NullPtrConstants.remove(this);
+  getContext().pImpl->CPNConstants.erase(getType());
+  // Free the constant and any dangling references to it.
   destroyConstantImpl();
 }
 
@@ -1122,13 +1249,18 @@ void ConstantPointerNull::destroyConstant() {
 //
 
 UndefValue *UndefValue::get(Type *Ty) {
-  return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0);
+  UndefValue *&Entry = Ty->getContext().pImpl->UVConstants[Ty];
+  if (Entry == 0)
+    Entry = new UndefValue(Ty);
+  
+  return Entry;
 }
 
 // destroyConstant - Remove the constant from the constant table.
 //
 void UndefValue::destroyConstant() {
-  getType()->getContext().pImpl->UndefValueConstants.remove(this);
+  // Free the constant and any dangling references to it.
+  getContext().pImpl->UVConstants.erase(getType());
   destroyConstantImpl();
 }
 
@@ -1236,7 +1368,6 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
   switch (opc) {
   default:
     llvm_unreachable("Invalid cast opcode");
-    break;
   case Instruction::Trunc:    return getTrunc(C, Ty);
   case Instruction::ZExt:     return getZExt(C, Ty);
   case Instruction::SExt:     return getSExt(C, Ty);
@@ -1250,7 +1381,6 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) {
   case Instruction::IntToPtr: return getIntToPtr(C, Ty);
   case Instruction::BitCast:  return getBitCast(C, Ty);
   }
-  return 0;
 } 
 
 Constant *ConstantExpr::getZExtOrBitCast(Constant *C, Type *Ty) {
@@ -1416,14 +1546,26 @@ Constant *ConstantExpr::getFPToSI(Constant *C, Type *Ty) {
 }
 
 Constant *ConstantExpr::getPtrToInt(Constant *C, Type *DstTy) {
-  assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
-  assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
+  assert(C->getType()->getScalarType()->isPointerTy() &&
+         "PtrToInt source must be pointer or pointer vector");
+  assert(DstTy->getScalarType()->isIntegerTy() && 
+         "PtrToInt destination must be integer or integer vector");
+  assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
+  if (isa<VectorType>(C->getType()))
+    assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
+           "Invalid cast between a different number of vector elements");
   return getFoldedCast(Instruction::PtrToInt, C, DstTy);
 }
 
 Constant *ConstantExpr::getIntToPtr(Constant *C, Type *DstTy) {
-  assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
-  assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
+  assert(C->getType()->getScalarType()->isIntegerTy() &&
+         "IntToPtr source must be integer or integer vector");
+  assert(DstTy->getScalarType()->isPointerTy() &&
+         "IntToPtr destination must be a pointer or pointer vector");
+  assert(isa<VectorType>(C->getType()) == isa<VectorType>(DstTy));
+  if (isa<VectorType>(C->getType()))
+    assert(C->getType()->getVectorNumElements()==DstTy->getVectorNumElements()&&
+           "Invalid cast between a different number of vector elements");
   return getFoldedCast(Instruction::IntToPtr, C, DstTy);
 }
 
@@ -1603,7 +1745,7 @@ Constant *ConstantExpr::getGetElementPtr(Constant *C, ArrayRef<Value *> Idxs,
   // Get the result type of the getelementptr!
   Type *Ty = GetElementPtrInst::getIndexedType(C->getType(), Idxs);
   assert(Ty && "GEP indices invalid!");
-  unsigned AS = cast<PointerType>(C->getType())->getAddressSpace();
+  unsigned AS = C->getType()->getPointerAddressSpace();
   Type *ReqTy = Ty->getPointerTo(AS);
   
   assert(C->getType()->isPointerTy() &&
@@ -1683,7 +1825,7 @@ Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
   const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
   
   LLVMContextImpl *pImpl = Val->getContext().pImpl;
-  Type *ReqTy = cast<VectorType>(Val->getType())->getElementType();
+  Type *ReqTy = Val->getType()->getVectorElementType();
   return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
@@ -1691,8 +1833,8 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
                                          Constant *Idx) {
   assert(Val->getType()->isVectorTy() &&
          "Tried to create insertelement operation on non-vector type!");
-  assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
-         && "Insertelement types must match!");
+  assert(Elt->getType() == Val->getType()->getVectorElementType() &&
+         "Insertelement types must match!");
   assert(Idx->getType()->isIntegerTy(32) &&
          "Insertelement index must be i32 type!");
 
@@ -1716,8 +1858,8 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
   if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
     return FC;          // Fold a few common cases.
 
-  unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements();
-  Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+  unsigned NElts = Mask->getType()->getVectorNumElements();
+  Type *EltTy = V1->getType()->getVectorElementType();
   Type *ShufTy = VectorType::get(EltTy, NElts);
 
   // Look up the constant in the table first to ensure uniqueness
@@ -1879,7 +2021,7 @@ const char *ConstantExpr::getOpcodeName() const {
 
 
 GetElementPtrConstantExpr::
-GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
                           Type *DestTy)
   : ConstantExpr(DestTy, Instruction::GetElementPtr,
                  OperandTraits<GetElementPtrConstantExpr>::op_end(this)
@@ -1889,6 +2031,341 @@ GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
     OperandList[i+1] = IdxList[i];
 }
 
+//===----------------------------------------------------------------------===//
+//                       ConstantData* implementations
+
+void ConstantDataArray::anchor() {}
+void ConstantDataVector::anchor() {}
+
+/// getElementType - Return the element type of the array/vector.
+Type *ConstantDataSequential::getElementType() const {
+  return getType()->getElementType();
+}
+
+StringRef ConstantDataSequential::getRawDataValues() const {
+  return StringRef(DataElements, getNumElements()*getElementByteSize());
+}
+
+/// isElementTypeCompatible - Return true if a ConstantDataSequential can be
+/// formed with a vector or array of the specified element type.
+/// ConstantDataArray only works with normal float and int types that are
+/// stored densely in memory, not with things like i42 or x86_f80.
+bool ConstantDataSequential::isElementTypeCompatible(const Type *Ty) {
+  if (Ty->isFloatTy() || Ty->isDoubleTy()) return true;
+  if (const IntegerType *IT = dyn_cast<IntegerType>(Ty)) {
+    switch (IT->getBitWidth()) {
+    case 8:
+    case 16:
+    case 32:
+    case 64:
+      return true;
+    default: break;
+    }
+  }
+  return false;
+}
+
+/// getNumElements - Return the number of elements in the array or vector.
+unsigned ConstantDataSequential::getNumElements() const {
+  if (ArrayType *AT = dyn_cast<ArrayType>(getType()))
+    return AT->getNumElements();
+  return getType()->getVectorNumElements();
+}
+
+
+/// getElementByteSize - Return the size in bytes of the elements in the data.
+uint64_t ConstantDataSequential::getElementByteSize() const {
+  return getElementType()->getPrimitiveSizeInBits()/8;
+}
+
+/// getElementPointer - Return the start of the specified element.
+const char *ConstantDataSequential::getElementPointer(unsigned Elt) const {
+  assert(Elt < getNumElements() && "Invalid Elt");
+  return DataElements+Elt*getElementByteSize();
+}
+
+
+/// isAllZeros - return true if the array is empty or all zeros.
+static bool isAllZeros(StringRef Arr) {
+  for (StringRef::iterator I = Arr.begin(), E = Arr.end(); I != E; ++I)
+    if (*I != 0)
+      return false;
+  return true;
+}
+
+/// getImpl - This is the underlying implementation of all of the
+/// ConstantDataSequential::get methods.  They all thunk down to here, providing
+/// the correct element type.  We take the bytes in as a StringRef because
+/// we *want* an underlying "char*" to avoid TBAA type punning violations.
+Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) {
+  assert(isElementTypeCompatible(Ty->getSequentialElementType()));
+  // If the elements are all zero or there are no elements, return a CAZ, which
+  // is more dense and canonical.
+  if (isAllZeros(Elements))
+    return ConstantAggregateZero::get(Ty);
+
+  // Do a lookup to see if we have already formed one of these.
+  StringMap<ConstantDataSequential*>::MapEntryTy &Slot =
+    Ty->getContext().pImpl->CDSConstants.GetOrCreateValue(Elements);
+  
+  // The bucket can point to a linked list of different CDS's that have the same
+  // body but different types.  For example, 0,0,0,1 could be a 4 element array
+  // of i8, or a 1-element array of i32.  They'll both end up in the same
+  /// StringMap bucket, linked up by their Next pointers.  Walk the list.
+  ConstantDataSequential **Entry = &Slot.getValue();
+  for (ConstantDataSequential *Node = *Entry; Node != 0;
+       Entry = &Node->Next, Node = *Entry)
+    if (Node->getType() == Ty)
+      return Node;
+  
+  // Okay, we didn't get a hit.  Create a node of the right class, link it in,
+  // and return it.
+  if (isa<ArrayType>(Ty))
+    return *Entry = new ConstantDataArray(Ty, Slot.getKeyData());
+
+  assert(isa<VectorType>(Ty));
+  return *Entry = new ConstantDataVector(Ty, Slot.getKeyData());
+}
+
+void ConstantDataSequential::destroyConstant() {
+  // Remove the constant from the StringMap.
+  StringMap<ConstantDataSequential*> &CDSConstants = 
+    getType()->getContext().pImpl->CDSConstants;
+  
+  StringMap<ConstantDataSequential*>::iterator Slot =
+    CDSConstants.find(getRawDataValues());
+
+  assert(Slot != CDSConstants.end() && "CDS not found in uniquing table");
+
+  ConstantDataSequential **Entry = &Slot->getValue();
+
+  // Remove the entry from the hash table.
+  if ((*Entry)->Next == 0) {
+    // If there is only one value in the bucket (common case) it must be this
+    // entry, and removing the entry should remove the bucket completely.
+    assert((*Entry) == this && "Hash mismatch in ConstantDataSequential");
+    getContext().pImpl->CDSConstants.erase(Slot);
+  } else {
+    // Otherwise, there are multiple entries linked off the bucket, unlink the 
+    // node we care about but keep the bucket around.
+    for (ConstantDataSequential *Node = *Entry; ;
+         Entry = &Node->Next, Node = *Entry) {
+      assert(Node && "Didn't find entry in its uniquing hash table!");
+      // If we found our entry, unlink it from the list and we're done.
+      if (Node == this) {
+        *Entry = Node->Next;
+        break;
+      }
+    }
+  }
+  
+  // If we were part of a list, make sure that we don't delete the list that is
+  // still owned by the uniquing map.
+  Next = 0;
+  
+  // Finally, actually delete it.
+  destroyConstantImpl();
+}
+
+/// get() constructors - Return a constant with array type with an element
+/// count and element type matching the ArrayRef passed in.  Note that this
+/// can return a ConstantAggregateZero object.
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint8_t> Elts) {
+  Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*1), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
+  Type *Ty = ArrayType::get(Type::getInt16Ty(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*2), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
+  Type *Ty = ArrayType::get(Type::getInt32Ty(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
+  Type *Ty = ArrayType::get(Type::getInt64Ty(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<float> Elts) {
+  Type *Ty = ArrayType::get(Type::getFloatTy(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty);
+}
+Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<double> Elts) {
+  Type *Ty = ArrayType::get(Type::getDoubleTy(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty);
+}
+
+/// getString - This method constructs a CDS and initializes it with a text
+/// string. The default behavior (AddNull==true) causes a null terminator to
+/// be placed at the end of the array (increasing the length of the string by
+/// one more than the StringRef would normally indicate.  Pass AddNull=false
+/// to disable this behavior.
+Constant *ConstantDataArray::getString(LLVMContext &Context,
+                                       StringRef Str, bool AddNull) {
+  if (!AddNull)
+    return get(Context, ArrayRef<uint8_t>((uint8_t*)Str.data(), Str.size()));
+  
+  SmallVector<uint8_t, 64> ElementVals;
+  ElementVals.append(Str.begin(), Str.end());
+  ElementVals.push_back(0);
+  return get(Context, ElementVals);
+}
+
+/// get() constructors - Return a constant with vector type with an element
+/// count and element type matching the ArrayRef passed in.  Note that this
+/// can return a ConstantAggregateZero object.
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint8_t> Elts){
+  Type *Ty = VectorType::get(Type::getInt8Ty(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*1), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint16_t> Elts){
+  Type *Ty = VectorType::get(Type::getInt16Ty(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*2), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint32_t> Elts){
+  Type *Ty = VectorType::get(Type::getInt32Ty(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<uint64_t> Elts){
+  Type *Ty = VectorType::get(Type::getInt64Ty(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<float> Elts) {
+  Type *Ty = VectorType::get(Type::getFloatTy(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*4), Ty);
+}
+Constant *ConstantDataVector::get(LLVMContext &Context, ArrayRef<double> Elts) {
+  Type *Ty = VectorType::get(Type::getDoubleTy(Context), Elts.size());
+  return getImpl(StringRef((char*)Elts.data(), Elts.size()*8), Ty);
+}
+
+Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
+  assert(isElementTypeCompatible(V->getType()) &&
+         "Element type not compatible with ConstantData");
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (CI->getType()->isIntegerTy(8)) {
+      SmallVector<uint8_t, 16> Elts(NumElts, CI->getZExtValue());
+      return get(V->getContext(), Elts);
+    }
+    if (CI->getType()->isIntegerTy(16)) {
+      SmallVector<uint16_t, 16> Elts(NumElts, CI->getZExtValue());
+      return get(V->getContext(), Elts);
+    }
+    if (CI->getType()->isIntegerTy(32)) {
+      SmallVector<uint32_t, 16> Elts(NumElts, CI->getZExtValue());
+      return get(V->getContext(), Elts);
+    }
+    assert(CI->getType()->isIntegerTy(64) && "Unsupported ConstantData type");
+    SmallVector<uint64_t, 16> Elts(NumElts, CI->getZExtValue());
+    return get(V->getContext(), Elts);
+  }
+
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType()->isFloatTy()) {
+      SmallVector<float, 16> Elts(NumElts, CFP->getValueAPF().convertToFloat());
+      return get(V->getContext(), Elts);
+    }
+    if (CFP->getType()->isDoubleTy()) {
+      SmallVector<double, 16> Elts(NumElts,
+                                   CFP->getValueAPF().convertToDouble());
+      return get(V->getContext(), Elts);
+    }
+  }
+  return ConstantVector::getSplat(NumElts, V);
+}
+
+
+/// getElementAsInteger - If this is a sequential container of integers (of
+/// any size), return the specified element in the low bits of a uint64_t.
+uint64_t ConstantDataSequential::getElementAsInteger(unsigned Elt) const {
+  assert(isa<IntegerType>(getElementType()) &&
+         "Accessor can only be used when element is an integer");
+  const char *EltPtr = getElementPointer(Elt);
+  
+  // The data is stored in host byte order, make sure to cast back to the right
+  // type to load with the right endianness.
+  switch (getElementType()->getIntegerBitWidth()) {
+  default: llvm_unreachable("Invalid bitwidth for CDS");
+  case 8:  return *(uint8_t*)EltPtr;
+  case 16: return *(uint16_t*)EltPtr;
+  case 32: return *(uint32_t*)EltPtr;
+  case 64: return *(uint64_t*)EltPtr;
+  }
+}
+
+/// getElementAsAPFloat - If this is a sequential container of floating point
+/// type, return the specified element as an APFloat.
+APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
+  const char *EltPtr = getElementPointer(Elt);
+
+  switch (getElementType()->getTypeID()) {
+  default:
+    llvm_unreachable("Accessor can only be used when element is float/double!");
+  case Type::FloatTyID: return APFloat(*(float*)EltPtr);
+  case Type::DoubleTyID: return APFloat(*(double*)EltPtr);
+  }
+}
+
+/// getElementAsFloat - If this is an sequential container of floats, return
+/// the specified element as a float.
+float ConstantDataSequential::getElementAsFloat(unsigned Elt) const {
+  assert(getElementType()->isFloatTy() &&
+         "Accessor can only be used when element is a 'float'");
+  return *(float*)getElementPointer(Elt);
+}
+
+/// getElementAsDouble - If this is an sequential container of doubles, return
+/// the specified element as a float.
+double ConstantDataSequential::getElementAsDouble(unsigned Elt) const {
+  assert(getElementType()->isDoubleTy() &&
+         "Accessor can only be used when element is a 'float'");
+  return *(double*)getElementPointer(Elt);
+}
+
+/// getElementAsConstant - Return a Constant for a specified index's element.
+/// Note that this has to compute a new constant to return, so it isn't as
+/// efficient as getElementAsInteger/Float/Double.
+Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const {
+  if (getElementType()->isFloatTy() || getElementType()->isDoubleTy())
+    return ConstantFP::get(getContext(), getElementAsAPFloat(Elt));
+  
+  return ConstantInt::get(getElementType(), getElementAsInteger(Elt));
+}
+
+/// isString - This method returns true if this is an array of i8.
+bool ConstantDataSequential::isString() const {
+  return isa<ArrayType>(getType()) && getElementType()->isIntegerTy(8);
+}
+
+/// isCString - This method returns true if the array "isString", ends with a
+/// nul byte, and does not contains any other nul bytes.
+bool ConstantDataSequential::isCString() const {
+  if (!isString())
+    return false;
+  
+  StringRef Str = getAsString();
+  
+  // The last value must be nul.
+  if (Str.back() != 0) return false;
+  
+  // Other elements must be non-nul.
+  return Str.drop_back().find(0) == StringRef::npos;
+}
+
+/// getSplatValue - If this is a splat constant, meaning that all of the
+/// elements have the same value, return that value. Otherwise return NULL.
+Constant *ConstantDataVector::getSplatValue() const {
+  const char *Base = getRawDataValues().data();
+  
+  // Compare elements 1+ to the 0'th element.
+  unsigned EltSize = getElementByteSize();
+  for (unsigned i = 1, e = getNumElements(); i != e; ++i)
+    if (memcmp(Base, Base+i*EltSize, EltSize))
+      return 0;
+  
+  // If they're all the same, return the 0th one as a representative.
+  return getElementAsConstant(0);
+}
 
 //===----------------------------------------------------------------------===//
 //                replaceUsesOfWithOnConstant implementations
@@ -1911,56 +2388,46 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
 
   LLVMContextImpl *pImpl = getType()->getContext().pImpl;
 
-  std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup;
-  Lookup.first.first = cast<ArrayType>(getType());
-  Lookup.second = this;
-
-  std::vector<Constant*> &Values = Lookup.first.second;
+  SmallVector<Constant*, 8> Values;
+  LLVMContextImpl::ArrayConstantsTy::LookupKey Lookup;
+  Lookup.first = cast<ArrayType>(getType());
   Values.reserve(getNumOperands());  // Build replacement array.
 
   // Fill values with the modified operands of the constant array.  Also, 
   // compute whether this turns into an all-zeros array.
-  bool isAllZeros = false;
   unsigned NumUpdated = 0;
-  if (!ToC->isNullValue()) {
-    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
-      Constant *Val = cast<Constant>(O->get());
-      if (Val == From) {
-        Val = ToC;
-        ++NumUpdated;
-      }
-      Values.push_back(Val);
-    }
-  } else {
-    isAllZeros = true;
-    for (Use *O = OperandList, *E = OperandList+getNumOperands();O != E; ++O) {
-      Constant *Val = cast<Constant>(O->get());
-      if (Val == From) {
-        Val = ToC;
-        ++NumUpdated;
-      }
-      Values.push_back(Val);
-      if (isAllZeros) isAllZeros = Val->isNullValue();
+  
+  // Keep track of whether all the values in the array are "ToC".
+  bool AllSame = true;
+  for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+    Constant *Val = cast<Constant>(O->get());
+    if (Val == From) {
+      Val = ToC;
+      ++NumUpdated;
     }
+    Values.push_back(Val);
+    AllSame &= Val == ToC;
   }
   
   Constant *Replacement = 0;
-  if (isAllZeros) {
+  if (AllSame && ToC->isNullValue()) {
     Replacement = ConstantAggregateZero::get(getType());
+  } else if (AllSame && isa<UndefValue>(ToC)) {
+    Replacement = UndefValue::get(getType());
   } else {
     // Check to see if we have this array type already.
-    bool Exists;
+    Lookup.second = makeArrayRef(Values);
     LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I =
-      pImpl->ArrayConstants.InsertOrGetItem(Lookup, Exists);
+      pImpl->ArrayConstants.find(Lookup);
     
-    if (Exists) {
-      Replacement = I->second;
+    if (I != pImpl->ArrayConstants.map_end()) {
+      Replacement = I->first;
     } else {
       // Okay, the new shape doesn't exist in the system yet.  Instead of
       // creating a new constant array, inserting it, replaceallusesof'ing the
       // old with the new, then deleting the old... just update the current one
       // in place!
-      pImpl->ArrayConstants.MoveConstantToNewSlot(this, I);
+      pImpl->ArrayConstants.remove(this);
       
       // Update to the new value.  Optimize for the case when we have a single
       // operand that we're changing, but handle bulk updates efficiently.
@@ -1974,6 +2441,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
           if (getOperand(i) == From)
             setOperand(i, ToC);
       }
+      pImpl->ArrayConstants.insert(this);
       return;
     }
   }
@@ -1996,26 +2464,32 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   unsigned OperandToUpdate = U-OperandList;
   assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
 
-  std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup;
-  Lookup.first.first = cast<StructType>(getType());
-  Lookup.second = this;
-  std::vector<Constant*> &Values = Lookup.first.second;
+  SmallVector<Constant*, 8> Values;
+  LLVMContextImpl::StructConstantsTy::LookupKey Lookup;
+  Lookup.first = cast<StructType>(getType());
   Values.reserve(getNumOperands());  // Build replacement struct.
   
-  
   // Fill values with the modified operands of the constant struct.  Also, 
   // compute whether this turns into an all-zeros struct.
   bool isAllZeros = false;
-  if (!ToC->isNullValue()) {
-    for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
-      Values.push_back(cast<Constant>(O->get()));
-  } else {
+  bool isAllUndef = false;
+  if (ToC->isNullValue()) {
     isAllZeros = true;
     for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
       Constant *Val = cast<Constant>(O->get());
       Values.push_back(Val);
       if (isAllZeros) isAllZeros = Val->isNullValue();
     }
+  } else if (isa<UndefValue>(ToC)) {
+    isAllUndef = true;
+    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+      Constant *Val = cast<Constant>(O->get());
+      Values.push_back(Val);
+      if (isAllUndef) isAllUndef = isa<UndefValue>(Val);
+    }
+  } else {
+    for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
+      Values.push_back(cast<Constant>(O->get()));
   }
   Values[OperandToUpdate] = ToC;
   
@@ -2024,23 +2498,26 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   Constant *Replacement = 0;
   if (isAllZeros) {
     Replacement = ConstantAggregateZero::get(getType());
+  } else if (isAllUndef) {
+    Replacement = UndefValue::get(getType());
   } else {
     // Check to see if we have this struct type already.
-    bool Exists;
+    Lookup.second = makeArrayRef(Values);
     LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
-      pImpl->StructConstants.InsertOrGetItem(Lookup, Exists);
+      pImpl->StructConstants.find(Lookup);
     
-    if (Exists) {
-      Replacement = I->second;
+    if (I != pImpl->StructConstants.map_end()) {
+      Replacement = I->first;
     } else {
       // Okay, the new shape doesn't exist in the system yet.  Instead of
       // creating a new constant struct, inserting it, replaceallusesof'ing the
       // old with the new, then deleting the old... just update the current one
       // in place!
-      pImpl->StructConstants.MoveConstantToNewSlot(this, I);
+      pImpl->StructConstants.remove(this);
       
       // Update to the new value.
       setOperand(OperandToUpdate, ToC);
+      pImpl->StructConstants.insert(this);
       return;
     }
   }
@@ -2058,7 +2535,7 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
                                                  Use *U) {
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
   
-  std::vector<Constant*> Values;
+  SmallVector<Constant*, 8> Values;
   Values.reserve(getNumOperands());  // Build replacement array...
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     Constant *Val = getOperand(i);
@@ -2081,89 +2558,13 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
   assert(isa<Constant>(ToV) && "Cannot make Constant refer to non-constant!");
   Constant *To = cast<Constant>(ToV);
   
-  Constant *Replacement = 0;
-  if (getOpcode() == Instruction::GetElementPtr) {
-    SmallVector<Constant*, 8> Indices;
-    Constant *Pointer = getOperand(0);
-    Indices.reserve(getNumOperands()-1);
-    if (Pointer == From) Pointer = To;
-    
-    for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
-      Constant *Val = getOperand(i);
-      if (Val == From) Val = To;
-      Indices.push_back(Val);
-    }
-    Replacement = ConstantExpr::getGetElementPtr(Pointer, Indices,
-                                         cast<GEPOperator>(this)->isInBounds());
-  } else if (getOpcode() == Instruction::ExtractValue) {
-    Constant *Agg = getOperand(0);
-    if (Agg == From) Agg = To;
-    
-    ArrayRef<unsigned> Indices = getIndices();
-    Replacement = ConstantExpr::getExtractValue(Agg, Indices);
-  } else if (getOpcode() == Instruction::InsertValue) {
-    Constant *Agg = getOperand(0);
-    Constant *Val = getOperand(1);
-    if (Agg == From) Agg = To;
-    if (Val == From) Val = To;
-    
-    ArrayRef<unsigned> Indices = getIndices();
-    Replacement = ConstantExpr::getInsertValue(Agg, Val, Indices);
-  } else if (isCast()) {
-    assert(getOperand(0) == From && "Cast only has one use!");
-    Replacement = ConstantExpr::getCast(getOpcode(), To, getType());
-  } else if (getOpcode() == Instruction::Select) {
-    Constant *C1 = getOperand(0);
-    Constant *C2 = getOperand(1);
-    Constant *C3 = getOperand(2);
-    if (C1 == From) C1 = To;
-    if (C2 == From) C2 = To;
-    if (C3 == From) C3 = To;
-    Replacement = ConstantExpr::getSelect(C1, C2, C3);
-  } else if (getOpcode() == Instruction::ExtractElement) {
-    Constant *C1 = getOperand(0);
-    Constant *C2 = getOperand(1);
-    if (C1 == From) C1 = To;
-    if (C2 == From) C2 = To;
-    Replacement = ConstantExpr::getExtractElement(C1, C2);
-  } else if (getOpcode() == Instruction::InsertElement) {
-    Constant *C1 = getOperand(0);
-    Constant *C2 = getOperand(1);
-    Constant *C3 = getOperand(1);
-    if (C1 == From) C1 = To;
-    if (C2 == From) C2 = To;
-    if (C3 == From) C3 = To;
-    Replacement = ConstantExpr::getInsertElement(C1, C2, C3);
-  } else if (getOpcode() == Instruction::ShuffleVector) {
-    Constant *C1 = getOperand(0);
-    Constant *C2 = getOperand(1);
-    Constant *C3 = getOperand(2);
-    if (C1 == From) C1 = To;
-    if (C2 == From) C2 = To;
-    if (C3 == From) C3 = To;
-    Replacement = ConstantExpr::getShuffleVector(C1, C2, C3);
-  } else if (isCompare()) {
-    Constant *C1 = getOperand(0);
-    Constant *C2 = getOperand(1);
-    if (C1 == From) C1 = To;
-    if (C2 == From) C2 = To;
-    if (getOpcode() == Instruction::ICmp)
-      Replacement = ConstantExpr::getICmp(getPredicate(), C1, C2);
-    else {
-      assert(getOpcode() == Instruction::FCmp);
-      Replacement = ConstantExpr::getFCmp(getPredicate(), C1, C2);
-    }
-  } else if (getNumOperands() == 2) {
-    Constant *C1 = getOperand(0);
-    Constant *C2 = getOperand(1);
-    if (C1 == From) C1 = To;
-    if (C2 == From) C2 = To;
-    Replacement = ConstantExpr::get(getOpcode(), C1, C2, SubclassOptionalData);
-  } else {
-    llvm_unreachable("Unknown ConstantExpr type!");
-    return;
+  SmallVector<Constant*, 8> NewOps;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    Constant *Op = getOperand(i);
+    NewOps.push_back(Op == From ? To : Op);
   }
   
+  Constant *Replacement = getWithOperands(NewOps);
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 1077004d7c76..8903a8f40f95 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_CONSTANTSCONTEXT_H
 #define LLVM_CONSTANTSCONTEXT_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/Operator.h"
@@ -30,6 +32,7 @@ struct ConstantTraits;
 /// UnaryConstantExpr - This class is private to Constants.cpp, and is used
 /// behind the scenes to implement unary constant exprs.
 class UnaryConstantExpr : public ConstantExpr {
+  virtual void anchor();
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
 public:
   // allocate space for exactly one operand
@@ -46,6 +49,7 @@ public:
 /// BinaryConstantExpr - This class is private to Constants.cpp, and is used
 /// behind the scenes to implement binary constant exprs.
 class BinaryConstantExpr : public ConstantExpr {
+  virtual void anchor();
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
 public:
   // allocate space for exactly two operands
@@ -66,6 +70,7 @@ public:
 /// SelectConstantExpr - This class is private to Constants.cpp, and is used
 /// behind the scenes to implement select constant exprs.
 class SelectConstantExpr : public ConstantExpr {
+  virtual void anchor();
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
 public:
   // allocate space for exactly three operands
@@ -86,6 +91,7 @@ public:
 /// Constants.cpp, and is used behind the scenes to implement
 /// extractelement constant exprs.
 class ExtractElementConstantExpr : public ConstantExpr {
+  virtual void anchor();
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
 public:
   // allocate space for exactly two operands
@@ -106,6 +112,7 @@ public:
 /// Constants.cpp, and is used behind the scenes to implement
 /// insertelement constant exprs.
 class InsertElementConstantExpr : public ConstantExpr {
+  virtual void anchor();
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
 public:
   // allocate space for exactly three operands
@@ -127,6 +134,7 @@ public:
 /// Constants.cpp, and is used behind the scenes to implement
 /// shufflevector constant exprs.
 class ShuffleVectorConstantExpr : public ConstantExpr {
+  virtual void anchor();
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
 public:
   // allocate space for exactly three operands
@@ -151,6 +159,7 @@ public:
 /// Constants.cpp, and is used behind the scenes to implement
 /// extractvalue constant exprs.
 class ExtractValueConstantExpr : public ConstantExpr {
+  virtual void anchor();
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
 public:
   // allocate space for exactly one operand
@@ -176,6 +185,7 @@ public:
 /// Constants.cpp, and is used behind the scenes to implement
 /// insertvalue constant exprs.
 class InsertValueConstantExpr : public ConstantExpr {
+  virtual void anchor();
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
 public:
   // allocate space for exactly one operand
@@ -202,11 +212,12 @@ public:
 /// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
 /// used behind the scenes to implement getelementpr constant exprs.
 class GetElementPtrConstantExpr : public ConstantExpr {
-  GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+  virtual void anchor();
+  GetElementPtrConstantExpr(Constant *C, ArrayRef<Constant*> IdxList,
                             Type *DestTy);
 public:
   static GetElementPtrConstantExpr *Create(Constant *C,
-                                           const std::vector<Constant*>&IdxList,
+                                           ArrayRef<Constant*> IdxList,
                                            Type *DestTy,
                                            unsigned Flags) {
     GetElementPtrConstantExpr *Result =
@@ -221,8 +232,10 @@ public:
 // CompareConstantExpr - This class is private to Constants.cpp, and is used
 // behind the scenes to implement ICmp and FCmp constant expressions. This is
 // needed in order to store the predicate value for these instructions.
-struct CompareConstantExpr : public ConstantExpr {
+class CompareConstantExpr : public ConstantExpr {
+  virtual void anchor();
   void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
     return User::operator new(s, 2);
@@ -397,6 +410,13 @@ struct ConstantCreator {
   }
 };
 
+template<class ConstantClass, class TypeClass>
+struct ConstantArrayCreator {
+  static ConstantClass *create(TypeClass *Ty, ArrayRef<Constant*> V) {
+    return new(V.size()) ConstantClass(Ty, V);
+  }
+};
+
 template<class ConstantClass>
 struct ConstantKeyData {
   typedef void ValType;
@@ -447,7 +467,6 @@ struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
       return new CompareConstantExpr(Ty, Instruction::FCmp, V.subclassdata,
                                      V.operands[0], V.operands[1]);
     llvm_unreachable("Invalid ConstantExpr!");
-    return 0;
   }
 };
 
@@ -467,90 +486,6 @@ struct ConstantKeyData<ConstantExpr> {
   }
 };
 
-// ConstantAggregateZero does not take extra "value" argument...
-template<class ValType>
-struct ConstantCreator<ConstantAggregateZero, Type, ValType> {
-  static ConstantAggregateZero *create(Type *Ty, const ValType &V){
-    return new ConstantAggregateZero(Ty);
-  }
-};
-
-template<>
-struct ConstantKeyData<ConstantVector> {
-  typedef std::vector<Constant*> ValType;
-  static ValType getValType(ConstantVector *CP) {
-    std::vector<Constant*> Elements;
-    Elements.reserve(CP->getNumOperands());
-    for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
-      Elements.push_back(CP->getOperand(i));
-    return Elements;
-  }
-};
-
-template<>
-struct ConstantKeyData<ConstantAggregateZero> {
-  typedef char ValType;
-  static ValType getValType(ConstantAggregateZero *C) {
-    return 0;
-  }
-};
-
-template<>
-struct ConstantKeyData<ConstantArray> {
-  typedef std::vector<Constant*> ValType;
-  static ValType getValType(ConstantArray *CA) {
-    std::vector<Constant*> Elements;
-    Elements.reserve(CA->getNumOperands());
-    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
-      Elements.push_back(cast<Constant>(CA->getOperand(i)));
-    return Elements;
-  }
-};
-
-template<>
-struct ConstantKeyData<ConstantStruct> {
-  typedef std::vector<Constant*> ValType;
-  static ValType getValType(ConstantStruct *CS) {
-    std::vector<Constant*> Elements;
-    Elements.reserve(CS->getNumOperands());
-    for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i)
-      Elements.push_back(cast<Constant>(CS->getOperand(i)));
-    return Elements;
-  }
-};
-
-// ConstantPointerNull does not take extra "value" argument...
-template<class ValType>
-struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
-  static ConstantPointerNull *create(PointerType *Ty, const ValType &V){
-    return new ConstantPointerNull(Ty);
-  }
-};
-
-template<>
-struct ConstantKeyData<ConstantPointerNull> {
-  typedef char ValType;
-  static ValType getValType(ConstantPointerNull *C) {
-    return 0;
-  }
-};
-
-// UndefValue does not take extra "value" argument...
-template<class ValType>
-struct ConstantCreator<UndefValue, Type, ValType> {
-  static UndefValue *create(Type *Ty, const ValType &V) {
-    return new UndefValue(Ty);
-  }
-};
-
-template<>
-struct ConstantKeyData<UndefValue> {
-  typedef char ValType;
-  static ValType getValType(UndefValue *C) {
-    return 0;
-  }
-};
-
 template<>
 struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
   static InlineAsm *create(PointerType *Ty, const InlineAsmKeyType &Key) {
@@ -704,6 +639,129 @@ public:
   }
 };
 
+// Unique map for aggregate constants
+template<class TypeClass, class ConstantClass>
+class ConstantAggrUniqueMap {
+public:
+  typedef ArrayRef<Constant*> Operands;
+  typedef std::pair<TypeClass*, Operands> LookupKey;
+private:
+  struct MapInfo {
+    typedef DenseMapInfo<ConstantClass*> ConstantClassInfo;
+    typedef DenseMapInfo<Constant*> ConstantInfo;
+    typedef DenseMapInfo<TypeClass*> TypeClassInfo;
+    static inline ConstantClass* getEmptyKey() {
+      return ConstantClassInfo::getEmptyKey();
+    }
+    static inline ConstantClass* getTombstoneKey() {
+      return ConstantClassInfo::getTombstoneKey();
+    }
+    static unsigned getHashValue(const ConstantClass *CP) {
+      SmallVector<Constant*, 8> CPOperands;
+      CPOperands.reserve(CP->getNumOperands());
+      for (unsigned I = 0, E = CP->getNumOperands(); I < E; ++I)
+        CPOperands.push_back(CP->getOperand(I));
+      return getHashValue(LookupKey(CP->getType(), CPOperands));
+    }
+    static bool isEqual(const ConstantClass *LHS, const ConstantClass *RHS) {
+      return LHS == RHS;
+    }
+    static unsigned getHashValue(const LookupKey &Val) {
+      return hash_combine(Val.first, hash_combine_range(Val.second.begin(),
+                                                        Val.second.end()));
+    }
+    static bool isEqual(const LookupKey &LHS, const ConstantClass *RHS) {
+      if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+        return false;
+      if (LHS.first != RHS->getType()
+          || LHS.second.size() != RHS->getNumOperands())
+        return false;
+      for (unsigned I = 0, E = RHS->getNumOperands(); I < E; ++I) {
+        if (LHS.second[I] != RHS->getOperand(I))
+          return false;
+      }
+      return true;
+    }
+  };
+public:
+  typedef DenseMap<ConstantClass *, char, MapInfo> MapTy;
+
+private:
+  /// Map - This is the main map from the element descriptor to the Constants.
+  /// This is the primary way we avoid creating two of the same shape
+  /// constant.
+  MapTy Map;
+
+public:
+  typename MapTy::iterator map_begin() { return Map.begin(); }
+  typename MapTy::iterator map_end() { return Map.end(); }
+
+  void freeConstants() {
+    for (typename MapTy::iterator I=Map.begin(), E=Map.end();
+         I != E; ++I) {
+      // Asserts that use_empty().
+      delete I->first;
+    }
+  }
+
+private:
+  typename MapTy::iterator findExistingElement(ConstantClass *CP) {
+    return Map.find(CP);
+  }
+
+  ConstantClass *Create(TypeClass *Ty, Operands V, typename MapTy::iterator I) {
+    ConstantClass* Result =
+      ConstantArrayCreator<ConstantClass,TypeClass>::create(Ty, V);
+
+    assert(Result->getType() == Ty && "Type specified is not correct!");
+    Map[Result] = '\0';
+
+    return Result;
+  }
+public:
+
+  /// getOrCreate - Return the specified constant from the map, creating it if
+  /// necessary.
+  ConstantClass *getOrCreate(TypeClass *Ty, Operands V) {
+    LookupKey Lookup(Ty, V);
+    ConstantClass* Result = 0;
+
+    typename MapTy::iterator I = Map.find_as(Lookup);
+    // Is it in the map?
+    if (I != Map.end())
+      Result = I->first;
+
+    if (!Result) {
+      // If no preexisting value, create one now...
+      Result = Create(Ty, V, I);
+    }
+
+    return Result;
+  }
+
+  /// Find the constant by lookup key.
+  typename MapTy::iterator find(LookupKey Lookup) {
+    return Map.find_as(Lookup);
+  }
+
+  /// Insert the constant into its proper slot.
+  void insert(ConstantClass *CP) {
+    Map[CP] = '\0';
+  }
+
+  /// Remove this constant from the map
+  void remove(ConstantClass *CP) {
+    typename MapTy::iterator I = findExistingElement(CP);
+    assert(I != Map.end() && "Constant not found in constant table!");
+    assert(I->first == CP && "Didn't find correct element?");
+    Map.erase(I);
+  }
+
+  void dump() const {
+    DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
+  }
+};
+
 }
 
 #endif
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index a505e4b4f5e5..a9cca22d0dd4 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/Core.h"
+#include "llvm/Attributes.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -132,10 +133,11 @@ LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
 
 LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
   switch (unwrap(Ty)->getTypeID()) {
-  default:
-    assert(false && "Unhandled TypeID.");
+  default: llvm_unreachable("Unhandled TypeID.");
   case Type::VoidTyID:
     return LLVMVoidTypeKind;
+  case Type::HalfTyID:
+    return LLVMHalfTypeKind;
   case Type::FloatTyID:
     return LLVMFloatTypeKind;
   case Type::DoubleTyID:
@@ -222,6 +224,9 @@ unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
 
 /*--.. Operations on real types ............................................--*/
 
+LLVMTypeRef LLVMHalfTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getHalfTy(*unwrap(C));
+}
 LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C) {
   return (LLVMTypeRef) Type::getFloatTy(*unwrap(C));
 }
@@ -241,6 +246,9 @@ LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
   return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
 }
 
+LLVMTypeRef LLVMHalfType(void) {
+  return LLVMHalfTypeInContext(LLVMGetGlobalContext());
+}
 LLVMTypeRef LLVMFloatType(void) {
   return LLVMFloatTypeInContext(LLVMGetGlobalContext());
 }
@@ -558,6 +566,17 @@ void LLVMGetNamedMetadataOperands(LLVMModuleRef M, const char* name, LLVMValueRe
     Dest[i] = wrap(N->getOperand(i));
 }
 
+void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
+                                 LLVMValueRef Val)
+{
+  NamedMDNode *N = unwrap(M)->getOrInsertNamedMetadata(name);
+  if (!N)
+    return;
+  MDNode *Op = Val ? unwrap<MDNode>(Val) : NULL;
+  if (Op)
+    N->addOperand(Op);
+}
+
 /*--.. Operations on scalar constants ......................................--*/
 
 LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
@@ -614,8 +633,8 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
                                       LLVMBool DontNullTerminate) {
   /* Inverted the sense of AddNull because ', 0)' is a
      better mnemonic for null termination than ', 1)'. */
-  return wrap(ConstantArray::get(*unwrap(C), StringRef(Str, Length),
-                                 DontNullTerminate == 0));
+  return wrap(ConstantDataArray::getString(*unwrap(C), StringRef(Str, Length),
+                                           DontNullTerminate == 0));
 }
 LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
                                       LLVMValueRef *ConstantVals,
@@ -660,8 +679,7 @@ LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
 static LLVMOpcode map_to_llvmopcode(int opcode)
 {
     switch (opcode) {
-      default:
-        assert(0 && "Unhandled Opcode.");
+      default: llvm_unreachable("Unhandled Opcode.");
 #define HANDLE_INST(num, opc, clas) case num: return LLVM##opc;
 #include "llvm/Instruction.def"
 #undef HANDLE_INST
@@ -671,12 +689,11 @@ static LLVMOpcode map_to_llvmopcode(int opcode)
 static int map_from_llvmopcode(LLVMOpcode code)
 {
     switch (code) {
-      default:
-        assert(0 && "Unhandled Opcode.");
 #define HANDLE_INST(num, opc, clas) case LLVM##opc: return num;
 #include "llvm/Instruction.def"
 #undef HANDLE_INST
     }
+    llvm_unreachable("Unhandled Opcode.");
 }
 
 /*--.. Constant expressions ................................................--*/
@@ -1040,8 +1057,6 @@ LLVMBool LLVMIsDeclaration(LLVMValueRef Global) {
 
 LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
   switch (unwrap<GlobalValue>(Global)->getLinkage()) {
-  default:
-    assert(false && "Unhandled Linkage Type.");
   case GlobalValue::ExternalLinkage:
     return LLVMExternalLinkage;
   case GlobalValue::AvailableExternallyLinkage:
@@ -1076,16 +1091,13 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
     return LLVMCommonLinkage;
   }
 
-  // Should never get here.
-  return static_cast<LLVMLinkage>(0);
+  llvm_unreachable("Invalid GlobalValue linkage!");
 }
 
 void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
   GlobalValue *GV = unwrap<GlobalValue>(Global);
 
   switch (Linkage) {
-  default:
-    assert(false && "Unhandled Linkage Type.");
   case LLVMExternalLinkage:
     GV->setLinkage(GlobalValue::ExternalLinkage);
     break;
@@ -1337,14 +1349,14 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
 void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttrListPtr PAL = Func->getAttributes();
-  const AttrListPtr PALnew = PAL.addAttr(~0U, PA);
+  const AttrListPtr PALnew = PAL.addAttr(~0U, Attributes(PA));
   Func->setAttributes(PALnew);
 }
 
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttrListPtr PAL = Func->getAttributes();
-  const AttrListPtr PALnew = PAL.removeAttr(~0U, PA);
+  const AttrListPtr PALnew = PAL.removeAttr(~0U, Attributes(PA));
   Func->setAttributes(PALnew);
 }
 
@@ -1352,7 +1364,7 @@ LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
   Function *Func = unwrap<Function>(Fn);
   const AttrListPtr PAL = Func->getAttributes();
   Attributes attr = PAL.getFnAttributes();
-  return (LLVMAttribute)attr;
+  return (LLVMAttribute)attr.Raw();
 }
 
 /*--.. Operations on parameters ............................................--*/
@@ -1414,18 +1426,18 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
 }
 
 void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
-  unwrap<Argument>(Arg)->addAttr(PA);
+  unwrap<Argument>(Arg)->addAttr(Attributes(PA));
 }
 
 void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
-  unwrap<Argument>(Arg)->removeAttr(PA);
+  unwrap<Argument>(Arg)->removeAttr(Attributes(PA));
 }
 
 LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
   Argument *A = unwrap<Argument>(Arg);
   Attributes attr = A->getParent()->getAttributes().getParamAttributes(
     A->getArgNo()+1);
-  return (LLVMAttribute)attr;
+  return (LLVMAttribute)attr.Raw();
 }
   
 
@@ -1603,10 +1615,9 @@ unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
   Value *V = unwrap(Instr);
   if (CallInst *CI = dyn_cast<CallInst>(V))
     return CI->getCallingConv();
-  else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+  if (InvokeInst *II = dyn_cast<InvokeInst>(V))
     return II->getCallingConv();
   llvm_unreachable("LLVMGetInstructionCallConv applies only to call and invoke!");
-  return 0;
 }
 
 void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
@@ -1622,14 +1633,14 @@ void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index,
                            LLVMAttribute PA) {
   CallSite Call = CallSite(unwrap<Instruction>(Instr));
   Call.setAttributes(
-    Call.getAttributes().addAttr(index, PA));
+    Call.getAttributes().addAttr(index, Attributes(PA)));
 }
 
 void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, 
                               LLVMAttribute PA) {
   CallSite Call = CallSite(unwrap<Instruction>(Instr));
   Call.setAttributes(
-    Call.getAttributes().removeAttr(index, PA));
+    Call.getAttributes().removeAttr(index, Attributes(PA)));
 }
 
 void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, 
@@ -2055,6 +2066,20 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
   return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name));
 }
 
+LLVMBool LLVMGetVolatile(LLVMValueRef MemAccessInst) {
+  Value *P = unwrap<Value>(MemAccessInst);
+  if (LoadInst *LI = dyn_cast<LoadInst>(P))
+    return LI->isVolatile();
+  return cast<StoreInst>(P)->isVolatile();
+}
+
+void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) {
+  Value *P = unwrap<Value>(MemAccessInst);
+  if (LoadInst *LI = dyn_cast<LoadInst>(P))
+    return LI->setVolatile(isVolatile);
+  return cast<StoreInst>(P)->setVolatile(isVolatile);
+}
+
 /*--.. Casts ...............................................................--*/
 
 LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
diff --git a/lib/VMCore/DebugInfoProbe.cpp b/lib/VMCore/DebugInfoProbe.cpp
deleted file mode 100644
index d1275ff58caa..000000000000
--- a/lib/VMCore/DebugInfoProbe.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-//===-- DebugInfoProbe.cpp - DebugInfo Probe ------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements DebugInfoProbe. This probe can be used by a pass
-// manager to analyze how optimizer is treating debugging information.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "debuginfoprobe"
-#include "llvm/DebugInfoProbe.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Metadata.h"
-#include "llvm/PassManager.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/DebugLoc.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringRef.h"
-#include <set>
-#include <string>
-
-using namespace llvm;
-
-static cl::opt<bool>
-EnableDebugInfoProbe("enable-debug-info-probe", cl::Hidden,
-                     cl::desc("Enable debug info probe"));
-
-// CreateInfoOutputFile - Return a file stream to print our output on.
-namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
-
-//===----------------------------------------------------------------------===//
-// DebugInfoProbeImpl - This class implements a interface to monitor
-// how an optimization pass is preserving debugging information.
-
-namespace llvm {
-
-  class DebugInfoProbeImpl {
-  public:
-    DebugInfoProbeImpl() : NumDbgLineLost(0),NumDbgValueLost(0) {}
-    void initialize(StringRef PName, Function &F);
-    void finalize(Function &F);
-    void report();
-  private:
-    unsigned NumDbgLineLost, NumDbgValueLost;
-    std::string PassName;
-    Function *TheFn;
-    std::set<MDNode *> DbgVariables;
-    std::set<Instruction *> MissingDebugLoc;
-  };
-}
-
-//===----------------------------------------------------------------------===//
-// DebugInfoProbeImpl
-
-/// initialize - Collect information before running an optimization pass.
-void DebugInfoProbeImpl::initialize(StringRef PName, Function &F) {
-  if (!EnableDebugInfoProbe) return;
-  PassName = PName;
-
-  DbgVariables.clear();
-  MissingDebugLoc.clear();
-  TheFn = &F;
-
-  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
-    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 
-         BI != BE; ++BI) {
-      if (!isa<PHINode>(BI) && BI->getDebugLoc().isUnknown())
-        MissingDebugLoc.insert(BI);
-      if (!isa<DbgInfoIntrinsic>(BI)) continue;
-      Value *Addr = NULL;
-      MDNode *Node = NULL;
-      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
-        Addr = DDI->getAddress();
-        Node = DDI->getVariable();
-      } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
-        Addr = DVI->getValue();
-        Node = DVI->getVariable();
-      }
-      if (Addr)
-        DbgVariables.insert(Node);
-    }
-}
-
-/// report - Report findings. This should be invoked after finalize.
-void DebugInfoProbeImpl::report() {
-  if (!EnableDebugInfoProbe) return;
-  if (NumDbgLineLost || NumDbgValueLost) {
-    raw_ostream *OutStream = CreateInfoOutputFile();
-    if (NumDbgLineLost)
-      *OutStream << NumDbgLineLost
-                 << "\t times line number info lost by "
-                 << PassName << "\n";
-    if (NumDbgValueLost)
-      *OutStream << NumDbgValueLost
-                 << "\t times variable info lost by    "
-                 << PassName << "\n";
-    delete OutStream;
-  }
-  NumDbgLineLost = 0;
-  NumDbgValueLost = 0;
-}
-
-/// finalize - Collect information after running an optimization pass. This
-/// must be used after initialization.
-void DebugInfoProbeImpl::finalize(Function &F) {
-  if (!EnableDebugInfoProbe) return;
-  assert (TheFn == &F && "Invalid function to measure!");
-
-  std::set<MDNode *>DbgVariables2;
-  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
-    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); 
-         BI != BE; ++BI) {
-      if (!isa<PHINode>(BI) && BI->getDebugLoc().isUnknown() &&
-          MissingDebugLoc.count(BI) == 0) {
-        ++NumDbgLineLost;
-        DEBUG(dbgs() << "DebugInfoProbe (" << PassName << "): --- ");
-        DEBUG(BI->print(dbgs()));
-        DEBUG(dbgs() << "\n");
-      }
-      if (!isa<DbgInfoIntrinsic>(BI)) continue;
-      Value *Addr = NULL;
-      MDNode *Node = NULL;
-      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI)) {
-        Addr = DDI->getAddress();
-        Node = DDI->getVariable();
-      } else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) {
-        Addr = DVI->getValue();
-        Node = DVI->getVariable();
-      }
-      if (Addr)
-        DbgVariables2.insert(Node);
-    }
-
-  for (std::set<MDNode *>::iterator I = DbgVariables.begin(), 
-         E = DbgVariables.end(); I != E; ++I) {
-    if (DbgVariables2.count(*I) == 0 && (*I)->getNumOperands() >= 2) {
-      DEBUG(dbgs() 
-            << "DebugInfoProbe("
-            << PassName
-            << "): Losing dbg info for variable: ";
-            if (MDString *MDS = dyn_cast_or_null<MDString>(
-                (*I)->getOperand(2)))
-              dbgs() << MDS->getString();
-            else
-              dbgs() << "...";
-            dbgs() << "\n");
-      ++NumDbgValueLost;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// DebugInfoProbe
-
-DebugInfoProbe::DebugInfoProbe() {
-  pImpl = new DebugInfoProbeImpl();
-}
-
-DebugInfoProbe::~DebugInfoProbe() {
-  delete pImpl;
-}
-
-/// initialize - Collect information before running an optimization pass.
-void DebugInfoProbe::initialize(StringRef PName, Function &F) {
-  pImpl->initialize(PName, F);
-}
-
-/// finalize - Collect information after running an optimization pass. This
-/// must be used after initialization.
-void DebugInfoProbe::finalize(Function &F) {
-  pImpl->finalize(F);
-}
-
-/// report - Report findings. This should be invoked after finalize.
-void DebugInfoProbe::report() {
-  pImpl->report();
-}
-
-//===----------------------------------------------------------------------===//
-// DebugInfoProbeInfo
-
-/// ~DebugInfoProbeInfo - Report data collected by all probes before deleting
-/// them.
-DebugInfoProbeInfo::~DebugInfoProbeInfo() {
-  if (!EnableDebugInfoProbe) return;
-    for (StringMap<DebugInfoProbe*>::iterator I = Probes.begin(),
-           E = Probes.end(); I != E; ++I) {
-      I->second->report();
-      delete I->second;
-    }
-  }
-
-/// initialize - Collect information before running an optimization pass.
-void DebugInfoProbeInfo::initialize(Pass *P, Function &F) {
-  if (!EnableDebugInfoProbe) return;
-  if (P->getAsPMDataManager())
-    return;
-
-  StringMapEntry<DebugInfoProbe *> &Entry =
-    Probes.GetOrCreateValue(P->getPassName());
-  DebugInfoProbe *&Probe = Entry.getValue();
-  if (!Probe)
-    Probe = new DebugInfoProbe();
-  Probe->initialize(P->getPassName(), F);
-}
-
-/// finalize - Collect information after running an optimization pass. This
-/// must be used after initialization.
-void DebugInfoProbeInfo::finalize(Pass *P, Function &F) {
-  if (!EnableDebugInfoProbe) return;
-  if (P->getAsPMDataManager())
-    return;
-  StringMapEntry<DebugInfoProbe *> &Entry =
-    Probes.GetOrCreateValue(P->getPassName());
-  DebugInfoProbe *&Probe = Entry.getValue();
-  assert (Probe && "DebugInfoProbe is not initialized!");
-  Probe->finalize(F);
-}
diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp
index 328244f80673..9013d28bb67d 100644
--- a/lib/VMCore/DebugLoc.cpp
+++ b/lib/VMCore/DebugLoc.cpp
@@ -173,10 +173,7 @@ DebugLoc DenseMapInfo<DebugLoc>::getTombstoneKey() {
 }
 
 unsigned DenseMapInfo<DebugLoc>::getHashValue(const DebugLoc &Key) {
-  FoldingSetNodeID ID;
-  ID.AddInteger(Key.LineCol);
-  ID.AddInteger(Key.ScopeIdx);
-  return ID.ComputeHash();
+  return static_cast<unsigned>(hash_combine(Key.LineCol, Key.ScopeIdx));
 }
 
 bool DenseMapInfo<DebugLoc>::isEqual(const DebugLoc &LHS, const DebugLoc &RHS) {
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 08b845ef9d6b..219e6315cf4e 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -80,27 +80,187 @@ void DominatorTree::print(raw_ostream &OS, const Module *) const {
   DT->print(OS);
 }
 
-// dominates - Return true if A dominates a use in B. This performs the
-// special checks necessary if A and B are in the same basic block.
-bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{
-  const BasicBlock *BBA = A->getParent(), *BBB = B->getParent();
-  
-  // If A is an invoke instruction, its value is only available in this normal
-  // successor block.
-  if (const InvokeInst *II = dyn_cast<InvokeInst>(A))
-    BBA = II->getNormalDest();
-  
-  if (BBA != BBB) return dominates(BBA, BBB);
-  
-  // It is not possible to determine dominance between two PHI nodes 
-  // based on their ordering.
-  if (isa<PHINode>(A) && isa<PHINode>(B)) 
+// dominates - Return true if Def dominates a use in User. This performs
+// the special checks necessary if Def and User are in the same basic block.
+// Note that Def doesn't dominate a use in Def itself!
+bool DominatorTree::dominates(const Instruction *Def,
+                              const Instruction *User) const {
+  const BasicBlock *UseBB = User->getParent();
+  const BasicBlock *DefBB = Def->getParent();
+
+  // Any unreachable use is dominated, even if Def == User.
+  if (!isReachableFromEntry(UseBB))
+    return true;
+
+  // Unreachable definitions don't dominate anything.
+  if (!isReachableFromEntry(DefBB))
+    return false;
+
+  // An instruction doesn't dominate a use in itself.
+  if (Def == User)
     return false;
-  
-  // Loop through the basic block until we find A or B.
-  BasicBlock::const_iterator I = BBA->begin();
-  for (; &*I != A && &*I != B; ++I)
+
+  // The value defined by an invoke dominates an instruction only if
+  // it dominates every instruction in UseBB.
+  // A PHI is dominated only if the instruction dominates every possible use
+  // in the UseBB.
+  if (isa<InvokeInst>(Def) || isa<PHINode>(User))
+    return dominates(Def, UseBB);
+
+  if (DefBB != UseBB)
+    return dominates(DefBB, UseBB);
+
+  // Loop through the basic block until we find Def or User.
+  BasicBlock::const_iterator I = DefBB->begin();
+  for (; &*I != Def && &*I != User; ++I)
     /*empty*/;
-  
-  return &*I == A;
+
+  return &*I == Def;
+}
+
+// true if Def would dominate a use in any instruction in UseBB.
+// note that dominates(Def, Def->getParent()) is false.
+bool DominatorTree::dominates(const Instruction *Def,
+                              const BasicBlock *UseBB) const {
+  const BasicBlock *DefBB = Def->getParent();
+
+  // Any unreachable use is dominated, even if DefBB == UseBB.
+  if (!isReachableFromEntry(UseBB))
+    return true;
+
+  // Unreachable definitions don't dominate anything.
+  if (!isReachableFromEntry(DefBB))
+    return false;
+
+  if (DefBB == UseBB)
+    return false;
+
+  const InvokeInst *II = dyn_cast<InvokeInst>(Def);
+  if (!II)
+    return dominates(DefBB, UseBB);
+
+  // Invoke results are only usable in the normal destination, not in the
+  // exceptional destination.
+  BasicBlock *NormalDest = II->getNormalDest();
+  if (!dominates(NormalDest, UseBB))
+    return false;
+
+  // Simple case: if the normal destination has a single predecessor, the
+  // fact that it dominates the use block implies that we also do.
+  if (NormalDest->getSinglePredecessor())
+    return true;
+
+  // The normal edge from the invoke is critical. Conceptually, what we would
+  // like to do is split it and check if the new block dominates the use.
+  // With X being the new block, the graph would look like:
+  //
+  //        DefBB
+  //          /\      .  .
+  //         /  \     .  .
+  //        /    \    .  .
+  //       /      \   |  |
+  //      A        X  B  C
+  //      |         \ | /
+  //      .          \|/
+  //      .      NormalDest
+  //      .
+  //
+  // Given the definition of dominance, NormalDest is dominated by X iff X
+  // dominates all of NormalDest's predecessors (X, B, C in the example). X
+  // trivially dominates itself, so we only have to find if it dominates the
+  // other predecessors. Since the only way out of X is via NormalDest, X can
+  // only properly dominate a node if NormalDest dominates that node too.
+  for (pred_iterator PI = pred_begin(NormalDest),
+         E = pred_end(NormalDest); PI != E; ++PI) {
+    const BasicBlock *BB = *PI;
+    if (BB == DefBB)
+      continue;
+
+    if (!DT->isReachableFromEntry(BB))
+      continue;
+
+    if (!dominates(NormalDest, BB))
+      return false;
+  }
+  return true;
+}
+
+bool DominatorTree::dominates(const Instruction *Def,
+                              const Use &U) const {
+  Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
+
+  // Instructions do not dominate non-instructions.
+  if (!UserInst)
+    return false;
+
+  const BasicBlock *DefBB = Def->getParent();
+
+  // Determine the block in which the use happens. PHI nodes use
+  // their operands on edges; simulate this by thinking of the use
+  // happening at the end of the predecessor block.
+  const BasicBlock *UseBB;
+  if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+    UseBB = PN->getIncomingBlock(U);
+  else
+    UseBB = UserInst->getParent();
+
+  // Any unreachable use is dominated, even if Def == User.
+  if (!isReachableFromEntry(UseBB))
+    return true;
+
+  // Unreachable definitions don't dominate anything.
+  if (!isReachableFromEntry(DefBB))
+    return false;
+
+  // Invoke instructions define their return values on the edges
+  // to their normal successors, so we have to handle them specially.
+  // Among other things, this means they don't dominate anything in
+  // their own block, except possibly a phi, so we don't need to
+  // walk the block in any case.
+  if (const InvokeInst *II = dyn_cast<InvokeInst>(Def)) {
+    // A PHI in the normal successor using the invoke's return value is
+    // dominated by the invoke's return value.
+    if (isa<PHINode>(UserInst) &&
+        UserInst->getParent() == II->getNormalDest() &&
+        cast<PHINode>(UserInst)->getIncomingBlock(U) == DefBB)
+      return true;
+
+    // Otherwise use the instruction-dominates-block query, which
+    // handles the crazy case of an invoke with a critical edge
+    // properly.
+    return dominates(Def, UseBB);
+  }
+
+  // If the def and use are in different blocks, do a simple CFG dominator
+  // tree query.
+  if (DefBB != UseBB)
+    return dominates(DefBB, UseBB);
+
+  // Ok, def and use are in the same block. If the def is an invoke, it
+  // doesn't dominate anything in the block. If it's a PHI, it dominates
+  // everything in the block.
+  if (isa<PHINode>(UserInst))
+    return true;
+
+  // Otherwise, just loop through the basic block until we find Def or User.
+  BasicBlock::const_iterator I = DefBB->begin();
+  for (; &*I != Def && &*I != UserInst; ++I)
+    /*empty*/;
+
+  return &*I != UserInst;
+}
+
+bool DominatorTree::isReachableFromEntry(const Use &U) const {
+  Instruction *I = dyn_cast<Instruction>(U.getUser());
+
+  // ConstantExprs aren't really reachable from the entry block, but they
+  // don't need to be treated like unreachable code either.
+  if (!I) return true;
+
+  // PHI nodes use their operands on their incoming edges.
+  if (PHINode *PN = dyn_cast<PHINode>(I))
+    return isReachableFromEntry(PN->getIncomingBlock(U));
+
+  // Everything else uses their operands in their own block.
+  return isReachableFromEntry(I->getParent());
 }
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 1215e6a57ced..af6344ef6168 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -39,6 +39,8 @@ template class llvm::SymbolTableListTraits<BasicBlock, Function>;
 // Argument Implementation
 //===----------------------------------------------------------------------===//
 
+void Argument::anchor() { }
+
 Argument::Argument(Type *Ty, const Twine &Name, Function *Par)
   : Value(Ty, Value::ArgumentVal) {
   Parent = 0;
@@ -359,7 +361,7 @@ std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
 FunctionType *Intrinsic::getType(LLVMContext &Context,
                                        ID id, ArrayRef<Type*> Tys) {
   Type *ResultTy = NULL;
-  std::vector<Type*> ArgTys;
+  SmallVector<Type*, 8> ArgTys;
   bool IsVarArg = false;
   
 #define GET_INTRINSIC_GENERATOR
@@ -370,13 +372,9 @@ FunctionType *Intrinsic::getType(LLVMContext &Context,
 }
 
 bool Intrinsic::isOverloaded(ID id) {
-  static const bool OTable[] = {
-    false,
 #define GET_INTRINSIC_OVERLOAD_TABLE
 #include "llvm/Intrinsics.gen"
 #undef GET_INTRINSIC_OVERLOAD_TABLE
-  };
-  return OTable[id];
 }
 
 /// This defines the "Intrinsic::getAttributes(ID id)" method.
@@ -402,6 +400,7 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
 bool Function::hasAddressTaken(const User* *PutOffender) const {
   for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
     const User *U = *I;
+    // FIXME: Check for blockaddress, which does not take the address.
     if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
       return PutOffender ? (*PutOffender = U, true) : true;
     ImmutableCallSite CS(cast<Instruction>(U));
@@ -411,41 +410,30 @@ bool Function::hasAddressTaken(const User* *PutOffender) const {
   return false;
 }
 
+bool Function::isDefTriviallyDead() const {
+  // Check the linkage
+  if (!hasLinkOnceLinkage() && !hasLocalLinkage() &&
+      !hasAvailableExternallyLinkage())
+    return false;
+
+  // Check if the function is used by anything other than a blockaddress.
+  for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I)
+    if (!isa<BlockAddress>(*I))
+      return false;
+
+  return true;
+}
+
 /// callsFunctionThatReturnsTwice - Return true if the function has a call to
 /// setjmp or other function that gcc recognizes as "returning twice".
-///
-/// FIXME: Remove after <rdar://problem/8031714> is fixed.
-/// FIXME: Is the above FIXME valid?
 bool Function::callsFunctionThatReturnsTwice() const {
-  static const char *const ReturnsTwiceFns[] = {
-    "_setjmp",
-    "setjmp",
-    "sigsetjmp",
-    "setjmp_syscall",
-    "savectx",
-    "qsetjmp",
-    "vfork",
-    "getcontext"
-  };
-
-  for (const_inst_iterator I = inst_begin(this), E = inst_end(this); I != E;
-       ++I) {
+  for (const_inst_iterator
+         I = inst_begin(this), E = inst_end(this); I != E; ++I) {
     const CallInst* callInst = dyn_cast<CallInst>(&*I);
     if (!callInst)
       continue;
     if (callInst->canReturnTwice())
       return true;
-
-    // check for known function names.
-    // FIXME: move this to clang.
-    Function *F = callInst->getCalledFunction();
-    if (!F)
-      continue;
-    StringRef Name = F->getName();
-    for (unsigned J = 0, e = array_lengthof(ReturnsTwiceFns); J != e; ++J) {
-      if (Name == ReturnsTwiceFns[J])
-        return true;
-    }
   }
 
   return false;
diff --git a/lib/VMCore/GCOV.cpp b/lib/VMCore/GCOV.cpp
index fc7f96fccaaa..595c45235995 100644
--- a/lib/VMCore/GCOV.cpp
+++ b/lib/VMCore/GCOV.cpp
@@ -107,7 +107,7 @@ bool GCOVFunction::read(GCOVBuffer &Buff, GCOVFormat Format) {
     for (unsigned i = 0, e = Count; i != e; ++i) {
       Blocks[i]->addCount(Buff.readInt64());
     }
-    return true;;
+    return true;
   }
 
   LineNumber = Buff.readInt();
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index 5114e2d498c5..b45923489af4 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -24,10 +24,10 @@ using namespace llvm;
 /// specified.  If Name is specified, it is the name of the global variable
 /// created.
 Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
-  Constant *StrConstant = ConstantArray::get(Context, Str, true);
+  Constant *StrConstant = ConstantDataArray::getString(Context, Str);
   Module &M = *BB->getParent()->getParent();
   GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
-                                          true, GlobalValue::InternalLinkage,
+                                          true, GlobalValue::PrivateLinkage,
                                           StrConstant, "", 0, false);
   GV->setName(Name);
   GV->setUnnamedAddr(true);
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 73191c19658c..5449714280d3 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -102,7 +102,6 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
   case IndirectBr: return "indirectbr";
   case Invoke: return "invoke";
   case Resume: return "resume";
-  case Unwind: return "unwind";
   case Unreachable: return "unreachable";
 
   // Standard binary operators...
@@ -166,8 +165,6 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
 
   default: return "<Invalid operator> ";
   }
-
-  return 0;
 }
 
 /// isIdenticalTo - Return true if the specified instruction is exactly
@@ -391,59 +388,6 @@ bool Instruction::isCommutative(unsigned op) {
   }
 }
 
-bool Instruction::isSafeToSpeculativelyExecute() const {
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (Constant *C = dyn_cast<Constant>(getOperand(i)))
-      if (C->canTrap())
-        return false;
-
-  switch (getOpcode()) {
-  default:
-    return true;
-  case UDiv:
-  case URem: {
-    // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
-    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
-    return Op && !Op->isNullValue();
-  }
-  case SDiv:
-  case SRem: {
-    // x / y is undefined if y == 0, and might be undefined if y == -1,
-    // but calcuations like x / 3 are safe.
-    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
-    return Op && !Op->isNullValue() && !Op->isAllOnesValue();
-  }
-  case Load: {
-    const LoadInst *LI = cast<LoadInst>(this);
-    if (!LI->isUnordered())
-      return false;
-    return LI->getPointerOperand()->isDereferenceablePointer();
-  }
-  case Call:
-    return false; // The called function could have undefined behavior or
-                  // side-effects.
-                  // FIXME: We should special-case some intrinsics (bswap,
-                  // overflow-checking arithmetic, etc.)
-  case VAArg:
-  case Alloca:
-  case Invoke:
-  case PHI:
-  case Store:
-  case Ret:
-  case Br:
-  case IndirectBr:
-  case Switch:
-  case Unwind:
-  case Unreachable:
-  case Fence:
-  case LandingPad:
-  case AtomicRMW:
-  case AtomicCmpXchg:
-  case Resume:
-    return false; // Misc instructions which have effects
-  }
-}
-
 Instruction *Instruction::clone() const {
   Instruction *New = clone_impl();
   New->SubclassOptionalData = SubclassOptionalData;
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index b3a720527a8e..8db6ac9a33f4 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -625,40 +625,12 @@ void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
 
 BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
   llvm_unreachable("ReturnInst has no successors!");
-  return 0;
 }
 
 ReturnInst::~ReturnInst() {
 }
 
 //===----------------------------------------------------------------------===//
-//                        UnwindInst Implementation
-//===----------------------------------------------------------------------===//
-
-UnwindInst::UnwindInst(LLVMContext &Context, Instruction *InsertBefore)
-  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
-                   0, 0, InsertBefore) {
-}
-UnwindInst::UnwindInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
-  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
-                   0, 0, InsertAtEnd) {
-}
-
-
-unsigned UnwindInst::getNumSuccessorsV() const {
-  return getNumSuccessors();
-}
-
-void UnwindInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  llvm_unreachable("UnwindInst has no successors!");
-}
-
-BasicBlock *UnwindInst::getSuccessorV(unsigned idx) const {
-  llvm_unreachable("UnwindInst has no successors!");
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
 //                        ResumeInst Implementation
 //===----------------------------------------------------------------------===//
 
@@ -690,7 +662,6 @@ void ResumeInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
 
 BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const {
   llvm_unreachable("ResumeInst has no successors!");
-  return 0;
 }
 
 //===----------------------------------------------------------------------===//
@@ -712,12 +683,11 @@ unsigned UnreachableInst::getNumSuccessorsV() const {
 }
 
 void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
-  llvm_unreachable("UnwindInst has no successors!");
+  llvm_unreachable("UnreachableInst has no successors!");
 }
 
 BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
-  llvm_unreachable("UnwindInst has no successors!");
-  return 0;
+  llvm_unreachable("UnreachableInst has no successors!");
 }
 
 //===----------------------------------------------------------------------===//
@@ -1359,6 +1329,15 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
 ///
 template <typename IndexTy>
 static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
+  if (Ptr->isVectorTy()) {
+    assert(IdxList.size() == 1 &&
+      "GEP with vector pointers must have a single index");
+    PointerType *PTy = dyn_cast<PointerType>(
+        cast<VectorType>(Ptr)->getElementType());
+    assert(PTy && "Gep with invalid vector pointer found");
+    return PTy->getElementType();
+  }
+
   PointerType *PTy = dyn_cast<PointerType>(Ptr);
   if (!PTy) return 0;   // Type isn't a pointer type!
   Type *Agg = PTy->getElementType();
@@ -1366,7 +1345,7 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
   // Handle the special case of the empty set index set, which is always valid.
   if (IdxList.empty())
     return Agg;
-  
+
   // If there is at least one index, the top level type must be sized, otherwise
   // it cannot be 'stepped over'.
   if (!Agg->isSized())
@@ -1396,6 +1375,18 @@ Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<uint64_t> IdxList) {
   return getIndexedTypeInternal(Ptr, IdxList);
 }
 
+unsigned GetElementPtrInst::getAddressSpace(Value *Ptr) {
+  Type *Ty = Ptr->getType();
+
+  if (VectorType *VTy = dyn_cast<VectorType>(Ty))
+    Ty = VTy->getElementType();
+
+  if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+    return PTy->getAddressSpace();
+
+  llvm_unreachable("Invalid GEP pointer type");
+}
+
 /// hasAllZeroIndices - Return true if all of the indices of this GEP are
 /// zeros.  If so, the result pointer and the first operand have the same
 /// value, just potentially different types.
@@ -1558,46 +1549,84 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
 
 bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
                                         const Value *Mask) {
+  // V1 and V2 must be vectors of the same type.
   if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
     return false;
   
+  // Mask must be vector of i32.
   VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
   if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
     return false;
 
   // Check to see if Mask is valid.
+  if (isa<UndefValue>(Mask) || isa<ConstantAggregateZero>(Mask))
+    return true;
+
   if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
-    VectorType *VTy = cast<VectorType>(V1->getType());
+    unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
     for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
-      if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
-        if (CI->uge(VTy->getNumElements()*2))
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
+        if (CI->uge(V1Size*2))
           return false;
       } else if (!isa<UndefValue>(MV->getOperand(i))) {
         return false;
       }
     }
+    return true;
   }
-  else if (!isa<UndefValue>(Mask) && !isa<ConstantAggregateZero>(Mask))
-    return false;
   
-  return true;
+  if (const ConstantDataSequential *CDS =
+        dyn_cast<ConstantDataSequential>(Mask)) {
+    unsigned V1Size = cast<VectorType>(V1->getType())->getNumElements();
+    for (unsigned i = 0, e = MaskTy->getNumElements(); i != e; ++i)
+      if (CDS->getElementAsInteger(i) >= V1Size*2)
+        return false;
+    return true;
+  }
+  
+  // The bitcode reader can create a place holder for a forward reference
+  // used as the shuffle mask. When this occurs, the shuffle mask will
+  // fall into this case and fail. To avoid this error, do this bit of
+  // ugliness to allow such a mask pass.
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Mask))
+    if (CE->getOpcode() == Instruction::UserOp1)
+      return true;
+
+  return false;
 }
 
 /// getMaskValue - Return the index from the shuffle mask for the specified
 /// output result.  This is either -1 if the element is undef or a number less
 /// than 2*numelements.
-int ShuffleVectorInst::getMaskValue(unsigned i) const {
-  const Constant *Mask = cast<Constant>(getOperand(2));
-  if (isa<UndefValue>(Mask)) return -1;
-  if (isa<ConstantAggregateZero>(Mask)) return 0;
-  const ConstantVector *MaskCV = cast<ConstantVector>(Mask);
-  assert(i < MaskCV->getNumOperands() && "Index out of range");
-
-  if (isa<UndefValue>(MaskCV->getOperand(i)))
+int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) {
+  assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
+  if (ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(Mask))
+    return CDS->getElementAsInteger(i);
+  Constant *C = Mask->getAggregateElement(i);
+  if (isa<UndefValue>(C))
     return -1;
-  return cast<ConstantInt>(MaskCV->getOperand(i))->getZExtValue();
+  return cast<ConstantInt>(C)->getZExtValue();
+}
+
+/// getShuffleMask - Return the full mask for this instruction, where each
+/// element is the element number and undef's are returned as -1.
+void ShuffleVectorInst::getShuffleMask(Constant *Mask,
+                                       SmallVectorImpl<int> &Result) {
+  unsigned NumElts = Mask->getType()->getVectorNumElements();
+  
+  if (ConstantDataSequential *CDS=dyn_cast<ConstantDataSequential>(Mask)) {
+    for (unsigned i = 0; i != NumElts; ++i)
+      Result.push_back(CDS->getElementAsInteger(i));
+    return;
+  }    
+  for (unsigned i = 0; i != NumElts; ++i) {
+    Constant *C = Mask->getAggregateElement(i);
+    Result.push_back(isa<UndefValue>(C) ? -1 :
+                     cast<ConstantInt>(C)->getZExtValue());
+  }
 }
 
+
 //===----------------------------------------------------------------------===//
 //                             InsertValueInst Class
 //===----------------------------------------------------------------------===//
@@ -1848,46 +1877,27 @@ BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
 BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
                                            Instruction *InsertBefore) {
   Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
-  return new BinaryOperator(Instruction::FSub,
-                            zero, Op,
+  return new BinaryOperator(Instruction::FSub, zero, Op,
                             Op->getType(), Name, InsertBefore);
 }
 
 BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
                                            BasicBlock *InsertAtEnd) {
   Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
-  return new BinaryOperator(Instruction::FSub,
-                            zero, Op,
+  return new BinaryOperator(Instruction::FSub, zero, Op,
                             Op->getType(), Name, InsertAtEnd);
 }
 
 BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
                                           Instruction *InsertBefore) {
-  Constant *C;
-  if (VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
-    C = Constant::getAllOnesValue(PTy->getElementType());
-    C = ConstantVector::get(
-                              std::vector<Constant*>(PTy->getNumElements(), C));
-  } else {
-    C = Constant::getAllOnesValue(Op->getType());
-  }
-  
+  Constant *C = Constant::getAllOnesValue(Op->getType());
   return new BinaryOperator(Instruction::Xor, Op, C,
                             Op->getType(), Name, InsertBefore);
 }
 
 BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
                                           BasicBlock *InsertAtEnd) {
-  Constant *AllOnes;
-  if (VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
-    // Create a vector of all ones values.
-    Constant *Elt = Constant::getAllOnesValue(PTy->getElementType());
-    AllOnes = ConstantVector::get(
-                            std::vector<Constant*>(PTy->getNumElements(), Elt));
-  } else {
-    AllOnes = Constant::getAllOnesValue(Op->getType());
-  }
-  
+  Constant *AllOnes = Constant::getAllOnesValue(Op->getType());
   return new BinaryOperator(Instruction::Xor, Op, AllOnes,
                             Op->getType(), Name, InsertAtEnd);
 }
@@ -1895,10 +1905,8 @@ BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
 
 // isConstantAllOnes - Helper function for several functions below
 static inline bool isConstantAllOnes(const Value *V) {
-  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
-    return CI->isAllOnesValue();
-  if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
-    return CV->isAllOnesValue();
+  if (const Constant *C = dyn_cast<Constant>(V))
+    return C->isAllOnesValue();
   return false;
 }
 
@@ -1998,6 +2006,8 @@ bool BinaryOperator::isExact() const {
 //                                CastInst Class
 //===----------------------------------------------------------------------===//
 
+void CastInst::anchor() {}
+
 // Just determine if this cast only deals with integral->integral conversion.
 bool CastInst::isIntegerCast() const {
   switch (getOpcode()) {
@@ -2042,8 +2052,7 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode,
                           Type *DestTy,
                           Type *IntPtrTy) {
   switch (Opcode) {
-    default:
-      assert(0 && "Invalid CastOp");
+    default: llvm_unreachable("Invalid CastOp");
     case Instruction::Trunc:
     case Instruction::ZExt:
     case Instruction::SExt: 
@@ -2236,13 +2245,10 @@ unsigned CastInst::isEliminableCastPair(
     case 99: 
       // cast combination can't happen (error in input). This is for all cases
       // where the MidTy is not the same for the two cast instructions.
-      assert(0 && "Invalid Cast Combination");
-      return 0;
+      llvm_unreachable("Invalid Cast Combination");
     default:
-      assert(0 && "Error in CastResults table!!!");
-      return 0;
+      llvm_unreachable("Error in CastResults table!!!");
   }
-  return 0;
 }
 
 CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, 
@@ -2262,10 +2268,8 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
     case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
     case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
     case BitCast:  return new BitCastInst  (S, Ty, Name, InsertBefore);
-    default:
-      assert(0 && "Invalid opcode provided");
+    default: llvm_unreachable("Invalid opcode provided");
   }
-  return 0;
 }
 
 CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
@@ -2285,10 +2289,8 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty,
     case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
     case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
     case BitCast:  return new BitCastInst  (S, Ty, Name, InsertAtEnd);
-    default:
-      assert(0 && "Invalid opcode provided");
+    default: llvm_unreachable("Invalid opcode provided");
   }
-  return 0;
 }
 
 CastInst *CastInst::CreateZExtOrBitCast(Value *S, Type *Ty, 
@@ -2557,9 +2559,8 @@ CastInst::getCastOpcode(
       assert(DestBits == SrcBits &&
              "Casting vector to floating point of different width");
       return BitCast;                             // same size, no-op cast
-    } else {
-      llvm_unreachable("Casting pointer or non-first class to float");
     }
+    llvm_unreachable("Casting pointer or non-first class to float");
   } else if (DestTy->isVectorTy()) {
     assert(DestBits == SrcBits &&
            "Illegal cast to vector (wrong type or size)");
@@ -2569,24 +2570,16 @@ CastInst::getCastOpcode(
       return BitCast;                               // ptr -> ptr
     } else if (SrcTy->isIntegerTy()) {
       return IntToPtr;                              // int -> ptr
-    } else {
-      assert(0 && "Casting pointer to other than pointer or int");
     }
+    llvm_unreachable("Casting pointer to other than pointer or int");
   } else if (DestTy->isX86_MMXTy()) {
     if (SrcTy->isVectorTy()) {
       assert(DestBits == SrcBits && "Casting vector of wrong width to X86_MMX");
       return BitCast;                               // 64-bit vector to MMX
-    } else {
-      assert(0 && "Illegal cast to X86_MMX");
     }
-  } else {
-    assert(0 && "Casting to type that is not first-class");
+    llvm_unreachable("Illegal cast to X86_MMX");
   }
-
-  // If we fall through to here we probably hit an assertion cast above
-  // and assertions are not turned on. Anything we return is an error, so
-  // BitCast is as good a choice as any.
-  return BitCast;
+  llvm_unreachable("Casting to type that is not first-class");
 }
 
 //===----------------------------------------------------------------------===//
@@ -2645,9 +2638,21 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) {
     return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy() &&
       SrcLength == DstLength;
   case Instruction::PtrToInt:
-    return SrcTy->isPointerTy() && DstTy->isIntegerTy();
+    if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
+      return false;
+    if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
+      if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
+        return false;
+    return SrcTy->getScalarType()->isPointerTy() &&
+           DstTy->getScalarType()->isIntegerTy();
   case Instruction::IntToPtr:
-    return SrcTy->isIntegerTy() && DstTy->isPointerTy();
+    if (isa<VectorType>(SrcTy) != isa<VectorType>(DstTy))
+      return false;
+    if (VectorType *VT = dyn_cast<VectorType>(SrcTy))
+      if (VT->getNumElements() != cast<VectorType>(DstTy)->getNumElements())
+        return false;
+    return SrcTy->getScalarType()->isIntegerTy() &&
+           DstTy->getScalarType()->isPointerTy();
   case Instruction::BitCast:
     // BitCast implies a no-op cast of type only. No bits change.
     // However, you can't cast pointers to anything but pointers.
@@ -2890,7 +2895,7 @@ bool CmpInst::isEquality() const {
 
 CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
   switch (pred) {
-    default: assert(0 && "Unknown cmp predicate!");
+    default: llvm_unreachable("Unknown cmp predicate!");
     case ICMP_EQ: return ICMP_NE;
     case ICMP_NE: return ICMP_EQ;
     case ICMP_UGT: return ICMP_ULE;
@@ -2923,7 +2928,7 @@ CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
 
 ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
   switch (pred) {
-    default: assert(0 && "Unknown icmp predicate!");
+    default: llvm_unreachable("Unknown icmp predicate!");
     case ICMP_EQ: case ICMP_NE: 
     case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE: 
        return pred;
@@ -2936,7 +2941,7 @@ ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
 
 ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
   switch (pred) {
-    default: assert(0 && "Unknown icmp predicate!");
+    default: llvm_unreachable("Unknown icmp predicate!");
     case ICMP_EQ: case ICMP_NE: 
     case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE: 
        return pred;
@@ -3012,7 +3017,7 @@ ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
 
 CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
   switch (pred) {
-    default: assert(0 && "Unknown cmp predicate!");
+    default: llvm_unreachable("Unknown cmp predicate!");
     case ICMP_EQ: case ICMP_NE:
       return pred;
     case ICMP_SGT: return ICMP_SLT;
@@ -3147,31 +3152,32 @@ SwitchInst::~SwitchInst() {
 /// addCase - Add an entry to the switch instruction...
 ///
 void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
+  unsigned NewCaseIdx = getNumCases(); 
   unsigned OpNo = NumOperands;
   if (OpNo+2 > ReservedSpace)
     growOperands();  // Get more space!
   // Initialize some new operands.
   assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
   NumOperands = OpNo+2;
-  OperandList[OpNo] = OnVal;
-  OperandList[OpNo+1] = Dest;
+  CaseIt Case(this, NewCaseIdx);
+  Case.setValue(OnVal);
+  Case.setSuccessor(Dest);
 }
 
-/// removeCase - This method removes the specified successor from the switch
-/// instruction.  Note that this cannot be used to remove the default
-/// destination (successor #0).
-///
-void SwitchInst::removeCase(unsigned idx) {
-  assert(idx != 0 && "Cannot remove the default case!");
-  assert(idx*2 < getNumOperands() && "Successor index out of range!!!");
+/// removeCase - This method removes the specified case and its successor
+/// from the switch instruction.
+void SwitchInst::removeCase(CaseIt i) {
+  unsigned idx = i.getCaseIndex();
+  
+  assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!");
 
   unsigned NumOps = getNumOperands();
   Use *OL = OperandList;
 
   // Overwrite this case with the end of the list.
-  if ((idx + 1) * 2 != NumOps) {
-    OL[idx * 2] = OL[NumOps - 2];
-    OL[idx * 2 + 1] = OL[NumOps - 1];
+  if (2 + (idx + 1) * 2 != NumOps) {
+    OL[2 + idx * 2] = OL[NumOps - 2];
+    OL[2 + idx * 2 + 1] = OL[NumOps - 1];
   }
 
   // Nuke the last value.
@@ -3438,15 +3444,11 @@ ExtractElementInst *ExtractElementInst::clone_impl() const {
 }
 
 InsertElementInst *InsertElementInst::clone_impl() const {
-  return InsertElementInst::Create(getOperand(0),
-                                   getOperand(1),
-                                   getOperand(2));
+  return InsertElementInst::Create(getOperand(0), getOperand(1), getOperand(2));
 }
 
 ShuffleVectorInst *ShuffleVectorInst::clone_impl() const {
-  return new ShuffleVectorInst(getOperand(0),
-                           getOperand(1),
-                           getOperand(2));
+  return new ShuffleVectorInst(getOperand(0), getOperand(1), getOperand(2));
 }
 
 PHINode *PHINode::clone_impl() const {
@@ -3482,11 +3484,6 @@ ResumeInst *ResumeInst::clone_impl() const {
   return new(1) ResumeInst(*this);
 }
 
-UnwindInst *UnwindInst::clone_impl() const {
-  LLVMContext &Context = getContext();
-  return new UnwindInst(Context);
-}
-
 UnreachableInst *UnreachableInst::clone_impl() const {
   LLVMContext &Context = getContext();
   return new UnreachableInst(Context);
diff --git a/lib/VMCore/LLVMBuild.txt b/lib/VMCore/LLVMBuild.txt
new file mode 100644
index 000000000000..bca8b2c97e95
--- /dev/null
+++ b/lib/VMCore/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/VMCore/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Core
+parent = Libraries
+required_libraries = Support
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index ebd1e0aa1b0f..68c56212bc6c 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -43,6 +43,16 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
   // Create the 'prof' metadata kind.
   unsigned ProfID = getMDKindID("prof");
   assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID;
+
+  // Create the 'fpaccuracy' metadata kind.
+  unsigned FPAccuracyID = getMDKindID("fpaccuracy");
+  assert(FPAccuracyID == MD_fpaccuracy && "fpaccuracy kind id drifted");
+  (void)FPAccuracyID;
+
+  // Create the 'range' metadata kind.
+  unsigned RangeID = getMDKindID("range");
+  assert(RangeID == MD_range && "range kind id drifted");
+  (void)RangeID;
 }
 LLVMContext::~LLVMContext() { delete pImpl; }
 
@@ -78,11 +88,11 @@ void *LLVMContext::getInlineAsmDiagnosticContext() const {
   return pImpl->InlineAsmDiagContext;
 }
 
-void LLVMContext::emitError(StringRef ErrorStr) {
+void LLVMContext::emitError(const Twine &ErrorStr) {
   emitError(0U, ErrorStr);
 }
 
-void LLVMContext::emitError(const Instruction *I, StringRef ErrorStr) {
+void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) {
   unsigned LocCookie = 0;
   if (const MDNode *SrcLoc = I->getMetadata("srcloc")) {
     if (SrcLoc->getNumOperands() != 0)
@@ -92,7 +102,7 @@ void LLVMContext::emitError(const Instruction *I, StringRef ErrorStr) {
   return emitError(LocCookie, ErrorStr);
 }
 
-void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
+void LLVMContext::emitError(unsigned LocCookie, const Twine &ErrorStr) {
   // If there is no error handler installed, just print the error and exit.
   if (pImpl->InlineAsmDiagHandler == 0) {
     errs() << "error: " << ErrorStr << "\n";
@@ -100,7 +110,7 @@ void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
   }
 
   // If we do have an error handler, we can report the error and keep going.
-  SMDiagnostic Diag("", "error: " + ErrorStr.str());
+  SMDiagnostic Diag("", SourceMgr::DK_Error, ErrorStr.str());
 
   pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie);
 }
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index 504b37267f70..6279bb823dbf 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -21,6 +21,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
   : TheTrueVal(0), TheFalseVal(0),
     VoidTy(C, Type::VoidTyID),
     LabelTy(C, Type::LabelTyID),
+    HalfTy(C, Type::HalfTyID),
     FloatTy(C, Type::FloatTyID),
     DoubleTy(C, Type::DoubleTyID),
     MetadataTy(C, Type::MetadataTyID),
@@ -47,6 +48,16 @@ struct DropReferences {
     P.second->dropAllReferences();
   }
 };
+
+// Temporary - drops pair.first instead of second.
+struct DropFirst {
+  // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
+  // is a Constant*.
+  template<typename PairT>
+  void operator()(const PairT &P) {
+    P.first->dropAllReferences();
+  }
+};
 }
 
 LLVMContextImpl::~LLVMContextImpl() {
@@ -57,25 +68,32 @@ LLVMContextImpl::~LLVMContextImpl() {
   std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
   DeleteContainerPointers(Modules);
   
+  // Free the constants.  This is important to do here to ensure that they are
+  // freed before the LeakDetector is torn down.
   std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
                 DropReferences());
   std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
-                DropReferences());
+                DropFirst());
   std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
-                DropReferences());
+                DropFirst());
   std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
-                DropReferences());
+                DropFirst());
   ExprConstants.freeConstants();
   ArrayConstants.freeConstants();
   StructConstants.freeConstants();
   VectorConstants.freeConstants();
-  AggZeroConstants.freeConstants();
-  NullPtrConstants.freeConstants();
-  UndefValueConstants.freeConstants();
+  DeleteContainerSeconds(CAZConstants);
+  DeleteContainerSeconds(CPNConstants);
+  DeleteContainerSeconds(UVConstants);
   InlineAsms.freeConstants();
   DeleteContainerSeconds(IntConstants);
   DeleteContainerSeconds(FPConstants);
   
+  for (StringMap<ConstantDataSequential*>::iterator I = CDSConstants.begin(),
+       E = CDSConstants.end(); I != E; ++I)
+    delete I->second;
+  CDSConstants.clear();
+  
   // Destroy MDNodes.  ~MDNode can move and remove nodes between the MDNodeSet
   // and the NonUniquedMDNodes sets, so copy the values out first.
   SmallVector<MDNode*, 8> MDNodes;
@@ -92,3 +110,24 @@ LLVMContextImpl::~LLVMContextImpl() {
   // Destroy MDStrings.
   DeleteContainerSeconds(MDStringCache);
 }
+
+// ConstantsContext anchors
+void UnaryConstantExpr::anchor() { }
+
+void BinaryConstantExpr::anchor() { }
+
+void SelectConstantExpr::anchor() { }
+
+void ExtractElementConstantExpr::anchor() { }
+
+void InsertElementConstantExpr::anchor() { }
+
+void ShuffleVectorConstantExpr::anchor() { }
+
+void ExtractValueConstantExpr::anchor() { }
+
+void InsertValueConstantExpr::anchor() { }
+
+void GetElementPtrConstantExpr::anchor() { }
+
+void CompareConstantExpr::anchor() { }
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index a3f68fecbbf6..2252028b1569 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -29,6 +29,7 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Hashing.h"
 #include <vector>
 
 namespace llvm {
@@ -51,12 +52,14 @@ struct DenseMapAPIntKeyInfo {
     bool operator!=(const KeyTy& that) const {
       return !this->operator==(that);
     }
+    friend hash_code hash_value(const KeyTy &Key) {
+      return hash_combine(Key.type, Key.val);
+    }
   };
   static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
   static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
   static unsigned getHashValue(const KeyTy &Key) {
-    return DenseMapInfo<void*>::getHashValue(Key.type) ^ 
-      Key.val.getHashValue();
+    return static_cast<unsigned>(hash_value(Key));
   }
   static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
     return LHS == RHS;
@@ -74,6 +77,9 @@ struct DenseMapAPFloatKeyInfo {
     bool operator!=(const KeyTy& that) const {
       return !this->operator==(that);
     }
+    friend hash_code hash_value(const KeyTy &Key) {
+      return hash_combine(Key.val);
+    }
   };
   static inline KeyTy getEmptyKey() { 
     return KeyTy(APFloat(APFloat::Bogus,1));
@@ -82,13 +88,132 @@ struct DenseMapAPFloatKeyInfo {
     return KeyTy(APFloat(APFloat::Bogus,2)); 
   }
   static unsigned getHashValue(const KeyTy &Key) {
-    return Key.val.getHashValue();
+    return static_cast<unsigned>(hash_value(Key));
   }
   static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
     return LHS == RHS;
   }
 };
 
+struct AnonStructTypeKeyInfo {
+  struct KeyTy {
+    ArrayRef<Type*> ETypes;
+    bool isPacked;
+    KeyTy(const ArrayRef<Type*>& E, bool P) :
+      ETypes(E), isPacked(P) {}
+    KeyTy(const KeyTy& that) :
+      ETypes(that.ETypes), isPacked(that.isPacked) {}
+    KeyTy(const StructType* ST) :
+      ETypes(ArrayRef<Type*>(ST->element_begin(), ST->element_end())),
+      isPacked(ST->isPacked()) {}
+    bool operator==(const KeyTy& that) const {
+      if (isPacked != that.isPacked)
+        return false;
+      if (ETypes != that.ETypes)
+        return false;
+      return true;
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+  };
+  static inline StructType* getEmptyKey() {
+    return DenseMapInfo<StructType*>::getEmptyKey();
+  }
+  static inline StructType* getTombstoneKey() {
+    return DenseMapInfo<StructType*>::getTombstoneKey();
+  }
+  static unsigned getHashValue(const KeyTy& Key) {
+    return hash_combine(hash_combine_range(Key.ETypes.begin(),
+                                           Key.ETypes.end()),
+                        Key.isPacked);
+  }
+  static unsigned getHashValue(const StructType *ST) {
+    return getHashValue(KeyTy(ST));
+  }
+  static bool isEqual(const KeyTy& LHS, const StructType *RHS) {
+    if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+      return false;
+    return LHS == KeyTy(RHS);
+  }
+  static bool isEqual(const StructType *LHS, const StructType *RHS) {
+    return LHS == RHS;
+  }
+};
+
+struct FunctionTypeKeyInfo {
+  struct KeyTy {
+    const Type *ReturnType;
+    ArrayRef<Type*> Params;
+    bool isVarArg;
+    KeyTy(const Type* R, const ArrayRef<Type*>& P, bool V) :
+      ReturnType(R), Params(P), isVarArg(V) {}
+    KeyTy(const KeyTy& that) :
+      ReturnType(that.ReturnType),
+      Params(that.Params),
+      isVarArg(that.isVarArg) {}
+    KeyTy(const FunctionType* FT) :
+      ReturnType(FT->getReturnType()),
+      Params(ArrayRef<Type*>(FT->param_begin(), FT->param_end())),
+      isVarArg(FT->isVarArg()) {}
+    bool operator==(const KeyTy& that) const {
+      if (ReturnType != that.ReturnType)
+        return false;
+      if (isVarArg != that.isVarArg)
+        return false;
+      if (Params != that.Params)
+        return false;
+      return true;
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+  };
+  static inline FunctionType* getEmptyKey() {
+    return DenseMapInfo<FunctionType*>::getEmptyKey();
+  }
+  static inline FunctionType* getTombstoneKey() {
+    return DenseMapInfo<FunctionType*>::getTombstoneKey();
+  }
+  static unsigned getHashValue(const KeyTy& Key) {
+    return hash_combine(Key.ReturnType,
+                        hash_combine_range(Key.Params.begin(),
+                                           Key.Params.end()),
+                        Key.isVarArg);
+  }
+  static unsigned getHashValue(const FunctionType *FT) {
+    return getHashValue(KeyTy(FT));
+  }
+  static bool isEqual(const KeyTy& LHS, const FunctionType *RHS) {
+    if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+      return false;
+    return LHS == KeyTy(RHS);
+  }
+  static bool isEqual(const FunctionType *LHS, const FunctionType *RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide a FoldingSetTrait::Equals specialization for MDNode that can use a
+// shortcut to avoid comparing all operands.
+template<> struct FoldingSetTrait<MDNode> : DefaultFoldingSetTrait<MDNode> {
+  static bool Equals(const MDNode &X, const FoldingSetNodeID &ID,
+                     unsigned IDHash, FoldingSetNodeID &TempID) {
+    assert(!X.isNotUniqued() && "Non-uniqued MDNode in FoldingSet?");
+    // First, check if the cached hashes match.  If they don't we can skip the
+    // expensive operand walk.
+    if (X.Hash != IDHash)
+      return false;
+
+    // If they match we have to compare the operands.
+    X.Profile(TempID);
+    return TempID == ID;
+  }
+  static unsigned ComputeHash(const MDNode &X, FoldingSetNodeID &) {
+    return X.Hash; // Return cached hash.
+  }
+};
+
 /// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps
 /// up to date as MDNodes mutate.  This class is implemented in DebugLoc.cpp.
 class DebugRecVH : public CallbackVH {
@@ -129,7 +254,7 @@ public:
                          DenseMapAPFloatKeyInfo> FPMapTy;
   FPMapTy FPConstants;
   
-  StringMap<MDString*> MDStringCache;
+  StringMap<Value*> MDStringCache;
   
   FoldingSet<MDNode> MDNodeSet;
   // MDNodes may be uniqued or not uniqued.  When they're not uniqued, they
@@ -138,23 +263,23 @@ public:
   // on Context destruction.
   SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
   
-  ConstantUniqueMap<char, char, Type, ConstantAggregateZero> AggZeroConstants;
+  DenseMap<Type*, ConstantAggregateZero*> CAZConstants;
 
-  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
-    ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy;
+  typedef ConstantAggrUniqueMap<ArrayType, ConstantArray> ArrayConstantsTy;
   ArrayConstantsTy ArrayConstants;
   
-  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
-    StructType, ConstantStruct, true /*largekey*/> StructConstantsTy;
+  typedef ConstantAggrUniqueMap<StructType, ConstantStruct> StructConstantsTy;
   StructConstantsTy StructConstants;
   
-  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
-                            VectorType, ConstantVector> VectorConstantsTy;
+  typedef ConstantAggrUniqueMap<VectorType, ConstantVector> VectorConstantsTy;
   VectorConstantsTy VectorConstants;
   
-  ConstantUniqueMap<char, char, PointerType, ConstantPointerNull>
-    NullPtrConstants;
-  ConstantUniqueMap<char, char, Type, UndefValue> UndefValueConstants;
+  DenseMap<PointerType*, ConstantPointerNull*> CPNConstants;
+
+  DenseMap<Type*, UndefValue*> UVConstants;
+  
+  StringMap<ConstantDataSequential*> CDSConstants;
+
   
   DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
   ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
@@ -169,7 +294,7 @@ public:
   LeakDetectorImpl<Value> LLVMObjects;
   
   // Basic type instances.
-  Type VoidTy, LabelTy, FloatTy, DoubleTy, MetadataTy;
+  Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy;
   Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
   IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty;
 
@@ -180,9 +305,10 @@ public:
   
   DenseMap<unsigned, IntegerType*> IntegerTypes;
   
-  // TODO: Optimize FunctionTypes/AnonStructTypes!
-  std::map<std::vector<Type*>, FunctionType*> FunctionTypes;
-  std::map<std::vector<Type*>, StructType*> AnonStructTypes;
+  typedef DenseMap<FunctionType*, bool, FunctionTypeKeyInfo> FunctionTypeMap;
+  FunctionTypeMap FunctionTypes;
+  typedef DenseMap<StructType*, bool, AnonStructTypeKeyInfo> StructTypeMap;
+  StructTypeMap AnonStructTypes;
   StringMap<StructType*> NamedStructTypes;
   unsigned NamedStructTypesUniqueID;
     
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index ace4dc2de271..090b09a4ccd7 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -29,16 +29,19 @@ using namespace llvm;
 // MDString implementation.
 //
 
-MDString::MDString(LLVMContext &C, StringRef S)
-  : Value(Type::getMetadataTy(C), Value::MDStringVal), Str(S) {}
+void MDString::anchor() { }
+
+MDString::MDString(LLVMContext &C)
+  : Value(Type::getMetadataTy(C), Value::MDStringVal) {}
 
 MDString *MDString::get(LLVMContext &Context, StringRef Str) {
   LLVMContextImpl *pImpl = Context.pImpl;
-  StringMapEntry<MDString *> &Entry =
+  StringMapEntry<Value*> &Entry =
     pImpl->MDStringCache.GetOrCreateValue(Str);
-  MDString *&S = Entry.getValue();
-  if (!S) S = new MDString(Context, Entry.getKey());
-  return S;
+  Value *&S = Entry.getValue();
+  if (!S) S = new MDString(Context);
+  S->setValueName(&Entry);
+  return cast<MDString>(S);
 }
 
 //===----------------------------------------------------------------------===//
@@ -48,14 +51,26 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) {
 // Use CallbackVH to hold MDNode operands.
 namespace llvm {
 class MDNodeOperand : public CallbackVH {
-  MDNode *Parent;
+  MDNode *getParent() {
+    MDNodeOperand *Cur = this;
+
+    while (Cur->getValPtrInt() != 1)
+      --Cur;
+
+    assert(Cur->getValPtrInt() == 1 &&
+           "Couldn't find the beginning of the operand list!");
+    return reinterpret_cast<MDNode*>(Cur) - 1;
+  }
+
 public:
-  MDNodeOperand(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {}
+  MDNodeOperand(Value *V) : CallbackVH(V) {}
   ~MDNodeOperand() {}
 
-  void set(Value *V) {
-    setValPtr(V);
-  }
+  void set(Value *V) { this->setValPtr(V); }
+
+  /// setAsFirstOperand - Accessor method to mark the operand as the first in
+  /// the list.
+  void setAsFirstOperand(unsigned V) { this->setValPtrInt(V); }
 
   virtual void deleted();
   virtual void allUsesReplacedWith(Value *NV);
@@ -64,15 +79,13 @@ public:
 
 
 void MDNodeOperand::deleted() {
-  Parent->replaceOperand(this, 0);
+  getParent()->replaceOperand(this, 0);
 }
 
 void MDNodeOperand::allUsesReplacedWith(Value *NV) {
-  Parent->replaceOperand(this, NV);
+  getParent()->replaceOperand(this, NV);
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // MDNode implementation.
 //
@@ -85,6 +98,11 @@ static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
   return reinterpret_cast<MDNodeOperand*>(N+1)+Op;
 }
 
+void MDNode::replaceOperandWith(unsigned i, Value *Val) {
+  MDNodeOperand *Op = getOperandPtr(this, i);
+  replaceOperand(Op, Val);
+}
+
 MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
 : Value(Type::getMetadataTy(C), Value::MDNodeVal) {
   NumOperands = Vals.size();
@@ -95,8 +113,13 @@ MDNode::MDNode(LLVMContext &C, ArrayRef<Value*> Vals, bool isFunctionLocal)
   // Initialize the operand list, which is co-allocated on the end of the node.
   unsigned i = 0;
   for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
-       Op != E; ++Op, ++i)
-    new (Op) MDNodeOperand(Vals[i], this);
+       Op != E; ++Op, ++i) {
+    new (Op) MDNodeOperand(Vals[i]);
+
+    // Mark the first MDNodeOperand as being the first in the list of operands.
+    if (i == 0)
+      Op->setAsFirstOperand(1);
+  }
 }
 
 
@@ -161,12 +184,13 @@ static const Function *assertLocalFunction(const MDNode *N) {
 const Function *MDNode::getFunction() const {
 #ifndef NDEBUG
   return assertLocalFunction(this);
-#endif
+#else
   if (!isFunctionLocal()) return NULL;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
     if (const Function *F = getFunctionForValue(getOperand(i)))
       return F;
   return NULL;
+#endif
 }
 
 // destroy - Delete this node.  Only when there are no uses.
@@ -197,11 +221,11 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
     ID.AddPointer(Vals[i]);
 
   void *InsertPoint;
-  MDNode *N = NULL;
-  
-  if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
+  MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint);
+
+  if (N || !Insert)
     return N;
-    
+
   bool isFunctionLocal = false;
   switch (FL) {
   case FL_Unknown:
@@ -226,6 +250,9 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef<Value*> Vals,
   void *Ptr = malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand));
   N = new (Ptr) MDNode(Context, Vals, isFunctionLocal);
 
+  // Cache the operand hash.
+  N->Hash = ID.ComputeHash();
+
   // InsertPoint will have been set by the FindNodeOrInsertPos call.
   pImpl->MDNodeSet.InsertNode(N, InsertPoint);
 
@@ -349,6 +376,8 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
     return;
   }
 
+  // Cache the operand hash.
+  Hash = ID.ComputeHash();
   // InsertPoint will have been set by the FindNodeOrInsertPos call.
   pImpl->MDNodeSet.InsertNode(this, InsertPoint);
 
@@ -425,12 +454,12 @@ StringRef NamedMDNode::getName() const {
 // Instruction Metadata method implementations.
 //
 
-void Instruction::setMetadata(const char *Kind, MDNode *Node) {
+void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
   if (Node == 0 && !hasMetadata()) return;
   setMetadata(getContext().getMDKindID(Kind), Node);
 }
 
-MDNode *Instruction::getMetadataImpl(const char *Kind) const {
+MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
   return getMetadataImpl(getContext().getMDKindID(Kind));
 }
 
@@ -468,9 +497,11 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
   }
 
   // Otherwise, we're removing metadata from an instruction.
-  assert(hasMetadataHashEntry() &&
-         getContext().pImpl->MetadataStore.count(this) &&
+  assert((hasMetadataHashEntry() ==
+          getContext().pImpl->MetadataStore.count(this)) &&
          "HasMetadata bit out of date!");
+  if (!hasMetadataHashEntry())
+    return;  // Nothing to remove!
   LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
 
   // Common case is removing the only entry.
@@ -541,17 +572,15 @@ getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
          getContext().pImpl->MetadataStore.count(this) &&
          "Shouldn't have called this");
   const LLVMContextImpl::MDMapTy &Info =
-  getContext().pImpl->MetadataStore.find(this)->second;
+    getContext().pImpl->MetadataStore.find(this)->second;
   assert(!Info.empty() && "Shouldn't have called this");
-  
   Result.append(Info.begin(), Info.end());
-  
+
   // Sort the resulting array so it is stable.
   if (Result.size() > 1)
     array_pod_sort(Result.begin(), Result.end());
 }
 
-
 /// clearMetadataHashEntries - Clear all hashtable-based metadata from
 /// this instruction.
 void Instruction::clearMetadataHashEntries() {
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index c29029bf6c06..e8bc6dbe9706 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -321,11 +321,67 @@ NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
   return NMD;
 }
 
+/// eraseNamedMetadata - Remove the given NamedMDNode from this module and
+/// delete it.
 void Module::eraseNamedMetadata(NamedMDNode *NMD) {
   static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
   NamedMDList.erase(NMD);
 }
 
+/// getModuleFlagsMetadata - Returns the module flags in the provided vector.
+void Module::
+getModuleFlagsMetadata(SmallVectorImpl<ModuleFlagEntry> &Flags) const {
+  const NamedMDNode *ModFlags = getModuleFlagsMetadata();
+  if (!ModFlags) return;
+
+  for (unsigned i = 0, e = ModFlags->getNumOperands(); i != e; ++i) {
+    MDNode *Flag = ModFlags->getOperand(i);
+    ConstantInt *Behavior = cast<ConstantInt>(Flag->getOperand(0));
+    MDString *Key = cast<MDString>(Flag->getOperand(1));
+    Value *Val = Flag->getOperand(2);
+    Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()),
+                                    Key, Val));
+  }
+}
+
+/// getModuleFlagsMetadata - Returns the NamedMDNode in the module that
+/// represents module-level flags. This method returns null if there are no
+/// module-level flags.
+NamedMDNode *Module::getModuleFlagsMetadata() const {
+  return getNamedMetadata("llvm.module.flags");
+}
+
+/// getOrInsertModuleFlagsMetadata - Returns the NamedMDNode in the module that
+/// represents module-level flags. If module-level flags aren't found, it
+/// creates the named metadata that contains them.
+NamedMDNode *Module::getOrInsertModuleFlagsMetadata() {
+  return getOrInsertNamedMetadata("llvm.module.flags");
+}
+
+/// addModuleFlag - Add a module-level flag to the module-level flags
+/// metadata. It will create the module-level flags named metadata if it doesn't
+/// already exist.
+void Module::addModuleFlag(ModFlagBehavior Behavior, StringRef Key,
+                           Value *Val) {
+  Type *Int32Ty = Type::getInt32Ty(Context);
+  Value *Ops[3] = {
+    ConstantInt::get(Int32Ty, Behavior), MDString::get(Context, Key), Val
+  };
+  getOrInsertModuleFlagsMetadata()->addOperand(MDNode::get(Context, Ops));
+}
+void Module::addModuleFlag(ModFlagBehavior Behavior, StringRef Key,
+                           uint32_t Val) {
+  Type *Int32Ty = Type::getInt32Ty(Context);
+  addModuleFlag(Behavior, Key, ConstantInt::get(Int32Ty, Val));
+}
+void Module::addModuleFlag(MDNode *Node) {
+  assert(Node->getNumOperands() == 3 &&
+         "Invalid number of operands for module flag!");
+  assert(isa<ConstantInt>(Node->getOperand(0)) &&
+         isa<MDString>(Node->getOperand(1)) &&
+         "Invalid operand types for module flag!");
+  getOrInsertModuleFlagsMetadata()->addOperand(Node);
+}
 
 //===----------------------------------------------------------------------===//
 // Methods to control the materialization of GlobalValues in the Module.
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 9afc54063321..994a7ffceea5 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -25,11 +25,9 @@ using namespace llvm;
 // Pass Implementation
 //
 
-Pass::Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { }
-
 // Force out-of-line virtual method.
-Pass::~Pass() { 
-  delete Resolver; 
+Pass::~Pass() {
+  delete Resolver;
 }
 
 // Force out-of-line virtual method.
@@ -48,7 +46,7 @@ bool Pass::mustPreserveAnalysisID(char &AID) const {
   return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
 }
 
-// dumpPassStructure - Implement the -debug-passes=Structure option
+// dumpPassStructure - Implement the -debug-pass=Structure option
 void Pass::dumpPassStructure(unsigned Offset) {
   dbgs().indent(Offset*2) << getPassName() << "\n";
 }
@@ -71,7 +69,7 @@ void Pass::preparePassManager(PMStack &) {
 
 PassManagerType Pass::getPotentialPassManagerType() const {
   // Default implementation.
-  return PMT_Unknown; 
+  return PMT_Unknown;
 }
 
 void Pass::getAnalysisUsage(AnalysisUsage &) const {
@@ -155,9 +153,8 @@ PassManagerType FunctionPass::getPotentialPassManagerType() const {
 
 Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
                                         const std::string &Banner) const {
-  
+
   llvm_unreachable("BasicBlockPass printing unsupported.");
-  return 0;
 }
 
 bool BasicBlockPass::doInitialization(Module &) {
@@ -181,7 +178,7 @@ bool BasicBlockPass::doFinalization(Module &) {
 }
 
 PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
-  return PMT_BasicBlockPassManager; 
+  return PMT_BasicBlockPassManager;
 }
 
 const PassInfo *Pass::lookupPassInfo(const void *TI) {
@@ -192,6 +189,13 @@ const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
   return PassRegistry::getPassRegistry()->getPassInfo(Arg);
 }
 
+Pass *Pass::createPass(AnalysisID ID) {
+  const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID);
+  if (!PI)
+    return NULL;
+  return PI->createPass();
+}
+
 Pass *PassInfo::createPass() const {
   assert((!isAnalysisGroup() || NormalCtor) &&
          "No default implementation found for analysis group!");
@@ -246,7 +250,7 @@ namespace {
     typedef AnalysisUsage::VectorType VectorType;
     VectorType &CFGOnlyList;
     GetCFGOnlyPasses(VectorType &L) : CFGOnlyList(L) {}
-    
+
     void passEnumerate(const PassInfo *P) {
       if (P->isCFGOnlyPass())
         CFGOnlyList.push_back(P->getTypeInfo());
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index ecedb1db66b0..28fbaa667841 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/PassManagers.h"
 #include "llvm/PassManager.h"
-#include "llvm/DebugInfoProbe.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CommandLine.h"
@@ -26,7 +25,6 @@
 #include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/ADT/StringMap.h"
 #include <algorithm>
 #include <map>
 using namespace llvm;
@@ -84,32 +82,28 @@ PrintAfterAll("print-after-all",
 /// This is a helper to determine whether to print IR before or
 /// after a pass.
 
-static bool ShouldPrintBeforeOrAfterPass(const void *PassID,
+static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI,
                                          PassOptionList &PassesToPrint) {
-  if (const llvm::PassInfo *PI =
-      PassRegistry::getPassRegistry()->getPassInfo(PassID)) {
-    for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
-      const llvm::PassInfo *PassInf = PassesToPrint[i];
-      if (PassInf)
-        if (PassInf->getPassArgument() == PI->getPassArgument()) {
-          return true;
-        }
-    }
+  for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+    const llvm::PassInfo *PassInf = PassesToPrint[i];
+    if (PassInf)
+      if (PassInf->getPassArgument() == PI->getPassArgument()) {
+        return true;
+      }
   }
   return false;
 }
 
-
 /// This is a utility to check whether a pass should have IR dumped
 /// before it.
-static bool ShouldPrintBeforePass(const void *PassID) {
-  return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore);
+static bool ShouldPrintBeforePass(const PassInfo *PI) {
+  return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore);
 }
 
 /// This is a utility to check whether a pass should have IR dumped
 /// after it.
-static bool ShouldPrintAfterPass(const void *PassID) {
-  return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter);
+static bool ShouldPrintAfterPass(const PassInfo *PI) {
+  return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
 }
 
 } // End of llvm namespace
@@ -223,6 +217,7 @@ namespace llvm {
 class FunctionPassManagerImpl : public Pass,
                                 public PMDataManager,
                                 public PMTopLevelManager {
+  virtual void anchor();
 private:
   bool wasRun;
 public:
@@ -263,27 +258,15 @@ public:
 
   virtual PMDataManager *getAsPMDataManager() { return this; }
   virtual Pass *getAsPass() { return this; }
+  virtual PassManagerType getTopLevelPassManagerType() {
+    return PMT_FunctionPassManager;
+  }
 
   /// Pass Manager itself does not invalidate any analysis info.
   void getAnalysisUsage(AnalysisUsage &Info) const {
     Info.setPreservesAll();
   }
 
-  void addTopLevelPass(Pass *P) {
-    if (ImmutablePass *IP = P->getAsImmutablePass()) {
-      // P is a immutable pass and it will be managed by this
-      // top level manager. Set up analysis resolver to connect them.
-      AnalysisResolver *AR = new AnalysisResolver(*this);
-      P->setResolver(AR);
-      initializeAnalysisImpl(P);
-      addImmutablePass(IP);
-      recordAvailableAnalysis(IP);
-    } else {
-      P->assignPassManager(activeStack, PMT_FunctionPassManager);
-    }
-
-  }
-
   FPPassManager *getContainedManager(unsigned N) {
     assert(N < PassManagers.size() && "Pass number out of range!");
     FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
@@ -291,6 +274,8 @@ public:
   }
 };
 
+void FunctionPassManagerImpl::anchor() {}
+
 char FunctionPassManagerImpl::ID = 0;
 
 //===----------------------------------------------------------------------===//
@@ -384,6 +369,7 @@ char MPPassManager::ID = 0;
 class PassManagerImpl : public Pass,
                         public PMDataManager,
                         public PMTopLevelManager {
+  virtual void anchor();
 
 public:
   static char ID;
@@ -413,22 +399,11 @@ public:
     Info.setPreservesAll();
   }
 
-  void addTopLevelPass(Pass *P) {
-    if (ImmutablePass *IP = P->getAsImmutablePass()) {
-      // P is a immutable pass and it will be managed by this
-      // top level manager. Set up analysis resolver to connect them.
-      AnalysisResolver *AR = new AnalysisResolver(*this);
-      P->setResolver(AR);
-      initializeAnalysisImpl(P);
-      addImmutablePass(IP);
-      recordAvailableAnalysis(IP);
-    } else {
-      P->assignPassManager(activeStack, PMT_ModulePassManager);
-    }
-  }
-
   virtual PMDataManager *getAsPMDataManager() { return this; }
   virtual Pass *getAsPass() { return this; }
+  virtual PassManagerType getTopLevelPassManagerType() {
+    return PMT_ModulePassManager;
+  }
 
   MPPassManager *getContainedManager(unsigned N) {
     assert(N < PassManagers.size() && "Pass number out of range!");
@@ -437,26 +412,14 @@ public:
   }
 };
 
+void PassManagerImpl::anchor() {}
+
 char PassManagerImpl::ID = 0;
 } // End of llvm namespace
 
 namespace {
 
 //===----------------------------------------------------------------------===//
-// DebugInfoProbe
-
-static DebugInfoProbeInfo *TheDebugProbe;
-static void createDebugInfoProbe() {
-  if (TheDebugProbe) return;
-
-  // Constructed the first time this is called. This guarantees that the
-  // object will be constructed, if -enable-debug-info-probe is set,
-  // before static globals, thus it will be destroyed before them.
-  static ManagedStatic<DebugInfoProbeInfo> DIP;
-  TheDebugProbe = &*DIP;
-}
-
-//===----------------------------------------------------------------------===//
 /// TimingInfo Class - This class is used to calculate information about the
 /// amount of time each pass takes to execute.  This only happens when
 /// -time-passes is enabled on the command line.
@@ -654,7 +617,32 @@ void PMTopLevelManager::schedulePass(Pass *P) {
   }
 
   // Now all required passes are available.
-  addTopLevelPass(P);
+  if (ImmutablePass *IP = P->getAsImmutablePass()) {
+    // P is a immutable pass and it will be managed by this
+    // top level manager. Set up analysis resolver to connect them.
+    PMDataManager *DM = getAsPMDataManager();
+    AnalysisResolver *AR = new AnalysisResolver(*DM);
+    P->setResolver(AR);
+    DM->initializeAnalysisImpl(P);
+    addImmutablePass(IP);
+    DM->recordAvailableAnalysis(IP);
+    return;
+  }
+
+  if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) {
+    Pass *PP = P->createPrinterPass(
+      dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***");
+    PP->assignPassManager(activeStack, getTopLevelPassManagerType());
+  }
+
+  // Add the requested pass to the best available pass manager.
+  P->assignPassManager(activeStack, getTopLevelPassManagerType());
+
+  if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) {
+    Pass *PP = P->createPrinterPass(
+      dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***");
+    PP->assignPassManager(activeStack, getTopLevelPassManagerType());
+  }
 }
 
 /// Find the pass that implements Analysis AID. Search immutable
@@ -1224,8 +1212,7 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
 }
 
 Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
-  assert(0 && "Unable to find on the fly pass");
-  return NULL;
+  llvm_unreachable("Unable to find on the fly pass");
 }
 
 // Destructor
@@ -1351,31 +1338,13 @@ FunctionPassManager::~FunctionPassManager() {
   delete FPM;
 }
 
-/// addImpl - Add a pass to the queue of passes to run, without
-/// checking whether to add a printer pass.
-void FunctionPassManager::addImpl(Pass *P) {
-  FPM->add(P);
-}
-
 /// add - Add a pass to the queue of passes to run.  This passes
 /// ownership of the Pass to the PassManager.  When the
 /// PassManager_X is destroyed, the pass will be destroyed as well, so
 /// there is no need to delete the pass. (TODO delete passes.)
 /// This implies that all passes MUST be allocated with 'new'.
 void FunctionPassManager::add(Pass *P) {
-  // If this is a not a function pass, don't add a printer for it.
-  const void *PassID = P->getPassID();
-  if (P->getPassKind() == PT_Function)
-    if (ShouldPrintBeforePass(PassID))
-      addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
-                                   + P->getPassName() + " ***"));
-
-  addImpl(P);
-
-  if (P->getPassKind() == PT_Function)
-    if (ShouldPrintAfterPass(PassID))
-      addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
-                                   + P->getPassName() + " ***"));
+  FPM->add(P);
 }
 
 /// run - Execute all of the passes scheduled for execution.  Keep
@@ -1455,7 +1424,6 @@ void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
 bool FunctionPassManagerImpl::run(Function &F) {
   bool Changed = false;
   TimingInfo::createTheTimeInfo();
-  createDebugInfoProbe();
 
   initializeAllAnalysisInfo();
   for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
@@ -1474,7 +1442,7 @@ bool FunctionPassManagerImpl::run(Function &F) {
 char FPPassManager::ID = 0;
 /// Print passes managed by this manager
 void FPPassManager::dumpPassStructure(unsigned Offset) {
-  llvm::dbgs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n";
+  dbgs().indent(Offset*2) << "FunctionPass Manager\n";
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     FunctionPass *FP = getContainedPass(Index);
     FP->dumpPassStructure(Offset + 1);
@@ -1503,16 +1471,13 @@ bool FPPassManager::runOnFunction(Function &F) {
     dumpRequiredSet(FP);
 
     initializeAnalysisImpl(FP);
-    if (TheDebugProbe)
-      TheDebugProbe->initialize(FP, F);
+
     {
       PassManagerPrettyStackEntry X(FP, F);
       TimeRegion PassTimer(getPassTimer(FP));
 
       LocalChanged |= FP->runOnFunction(F);
     }
-    if (TheDebugProbe)
-      TheDebugProbe->finalize(FP, F);
 
     Changed |= LocalChanged;
     if (LocalChanged)
@@ -1662,7 +1627,6 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
 bool PassManagerImpl::run(Module &M) {
   bool Changed = false;
   TimingInfo::createTheTimeInfo();
-  createDebugInfoProbe();
 
   dumpArguments();
   dumpPasses();
@@ -1687,27 +1651,12 @@ PassManager::~PassManager() {
   delete PM;
 }
 
-/// addImpl - Add a pass to the queue of passes to run, without
-/// checking whether to add a printer pass.
-void PassManager::addImpl(Pass *P) {
-  PM->add(P);
-}
-
 /// add - Add a pass to the queue of passes to run.  This passes ownership of
 /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
 /// will be destroyed as well, so there is no need to delete the pass.  This
 /// implies that all passes MUST be allocated with 'new'.
 void PassManager::add(Pass *P) {
-  const void* PassID = P->getPassID();
-  if (ShouldPrintBeforePass(PassID))
-    addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
-                                 + P->getPassName() + " ***"));
-
-  addImpl(P);
-
-  if (ShouldPrintAfterPass(PassID))
-    addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
-                                 + P->getPassName() + " ***"));
+  PM->add(P);
 }
 
 /// run - Execute all of the passes scheduled for execution.  Keep track of
@@ -1817,7 +1766,7 @@ void ModulePass::assignPassManager(PMStack &PMS,
 void FunctionPass::assignPassManager(PMStack &PMS,
                                      PassManagerType PreferredType) {
 
-  // Find Module Pass Manager
+  // Find Function Pass Manager
   while (!PMS.empty()) {
     if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
       PMS.pop();
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 10184bc6f0e3..c6f35580e158 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
 Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
   switch (IDNumber) {
   case VoidTyID      : return getVoidTy(C);
+  case HalfTyID      : return getHalfTy(C);
   case FloatTyID     : return getFloatTy(C);
   case DoubleTyID    : return getDoubleTy(C);
   case X86_FP80TyID  : return getX86_FP80Ty(C);
@@ -57,7 +58,7 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
 bool Type::isIntOrIntVectorTy() const {
   if (isIntegerTy())
     return true;
-  if (ID != Type::VectorTyID) return false;
+  if (getTypeID() != Type::VectorTyID) return false;
   
   return cast<VectorType>(this)->getElementType()->isIntegerTy();
 }
@@ -65,11 +66,12 @@ bool Type::isIntOrIntVectorTy() const {
 /// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types.
 ///
 bool Type::isFPOrFPVectorTy() const {
-  if (ID == Type::FloatTyID || ID == Type::DoubleTyID || 
-      ID == Type::FP128TyID || ID == Type::X86_FP80TyID || 
-      ID == Type::PPC_FP128TyID)
+  if (getTypeID() == Type::HalfTyID || getTypeID() == Type::FloatTyID ||
+      getTypeID() == Type::DoubleTyID ||
+      getTypeID() == Type::FP128TyID || getTypeID() == Type::X86_FP80TyID || 
+      getTypeID() == Type::PPC_FP128TyID)
     return true;
-  if (ID != Type::VectorTyID) return false;
+  if (getTypeID() != Type::VectorTyID) return false;
   
   return cast<VectorType>(this)->getElementType()->isFloatingPointTy();
 }
@@ -131,6 +133,7 @@ bool Type::isEmptyTy() const {
 
 unsigned Type::getPrimitiveSizeInBits() const {
   switch (getTypeID()) {
+  case Type::HalfTyID: return 16;
   case Type::FloatTyID: return 32;
   case Type::DoubleTyID: return 64;
   case Type::X86_FP80TyID: return 80;
@@ -157,11 +160,12 @@ int Type::getFPMantissaWidth() const {
   if (const VectorType *VTy = dyn_cast<VectorType>(this))
     return VTy->getElementType()->getFPMantissaWidth();
   assert(isFloatingPointTy() && "Not a floating point type!");
-  if (ID == FloatTyID) return 24;
-  if (ID == DoubleTyID) return 53;
-  if (ID == X86_FP80TyID) return 64;
-  if (ID == FP128TyID) return 113;
-  assert(ID == PPC_FP128TyID && "unknown fp type");
+  if (getTypeID() == HalfTyID) return 11;
+  if (getTypeID() == FloatTyID) return 24;
+  if (getTypeID() == DoubleTyID) return 53;
+  if (getTypeID() == X86_FP80TyID) return 64;
+  if (getTypeID() == FP128TyID) return 113;
+  assert(getTypeID() == PPC_FP128TyID && "unknown fp type");
   return -1;
 }
 
@@ -181,24 +185,69 @@ bool Type::isSizedDerivedType() const {
   if (!this->isStructTy()) 
     return false;
 
-  // Opaque structs have no size.
-  if (cast<StructType>(this)->isOpaque())
-    return false;
-  
-  // Okay, our struct is sized if all of the elements are.
-  for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I)
-    if (!(*I)->isSized()) 
-      return false;
+  return cast<StructType>(this)->isSized();
+}
 
-  return true;
+//===----------------------------------------------------------------------===//
+//                         Subclass Helper Methods
+//===----------------------------------------------------------------------===//
+
+unsigned Type::getIntegerBitWidth() const {
+  return cast<IntegerType>(this)->getBitWidth();
+}
+
+bool Type::isFunctionVarArg() const {
+  return cast<FunctionType>(this)->isVarArg();
+}
+
+Type *Type::getFunctionParamType(unsigned i) const {
+  return cast<FunctionType>(this)->getParamType(i);
+}
+
+unsigned Type::getFunctionNumParams() const {
+  return cast<FunctionType>(this)->getNumParams();
+}
+
+StringRef Type::getStructName() const {
+  return cast<StructType>(this)->getName();
+}
+
+unsigned Type::getStructNumElements() const {
+  return cast<StructType>(this)->getNumElements();
+}
+
+Type *Type::getStructElementType(unsigned N) const {
+  return cast<StructType>(this)->getElementType(N);
+}
+
+
+
+Type *Type::getSequentialElementType() const {
+  return cast<SequentialType>(this)->getElementType();
+}
+
+uint64_t Type::getArrayNumElements() const {
+  return cast<ArrayType>(this)->getNumElements();
+}
+
+unsigned Type::getVectorNumElements() const {
+  return cast<VectorType>(this)->getNumElements();
 }
 
+unsigned Type::getPointerAddressSpace() const {
+  return cast<PointerType>(this)->getAddressSpace();
+}
+
+
+
+
 //===----------------------------------------------------------------------===//
 //                          Primitive 'Type' data
 //===----------------------------------------------------------------------===//
 
 Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; }
 Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; }
+Type *Type::getHalfTy(LLVMContext &C) { return &C.pImpl->HalfTy; }
 Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
 Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
 Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
@@ -217,6 +266,10 @@ IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
   return IntegerType::get(C, N);
 }
 
+PointerType *Type::getHalfPtrTy(LLVMContext &C, unsigned AS) {
+  return getHalfTy(C)->getPointerTo(AS);
+}
+
 PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
   return getFloatTy(C)->getPointerTo(AS);
 }
@@ -328,23 +381,20 @@ FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params,
 // FunctionType::get - The factory function for the FunctionType class.
 FunctionType *FunctionType::get(Type *ReturnType,
                                 ArrayRef<Type*> Params, bool isVarArg) {
-  // TODO: This is brutally slow.
-  std::vector<Type*> Key;
-  Key.reserve(Params.size()+2);
-  Key.push_back(const_cast<Type*>(ReturnType));
-  for (unsigned i = 0, e = Params.size(); i != e; ++i)
-    Key.push_back(const_cast<Type*>(Params[i]));
-  if (isVarArg)
-    Key.push_back(0);
-  
   LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
-  FunctionType *&FT = pImpl->FunctionTypes[Key];
-  
-  if (FT == 0) {
+  FunctionTypeKeyInfo::KeyTy Key(ReturnType, Params, isVarArg);
+  LLVMContextImpl::FunctionTypeMap::iterator I =
+    pImpl->FunctionTypes.find_as(Key);
+  FunctionType *FT;
+
+  if (I == pImpl->FunctionTypes.end()) {
     FT = (FunctionType*) pImpl->TypeAllocator.
-      Allocate(sizeof(FunctionType) + sizeof(Type*)*(Params.size()+1),
+      Allocate(sizeof(FunctionType) + sizeof(Type*) * (Params.size() + 1),
                AlignOf<FunctionType>::Alignment);
     new (FT) FunctionType(ReturnType, Params, isVarArg);
+    pImpl->FunctionTypes[FT] = true;
+  } else {
+    FT = I->first;
   }
 
   return FT;
@@ -377,23 +427,22 @@ bool FunctionType::isValidArgumentType(Type *ArgTy) {
 
 StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes, 
                             bool isPacked) {
-  // FIXME: std::vector is horribly inefficient for this probe.
-  std::vector<Type*> Key;
-  for (unsigned i = 0, e = ETypes.size(); i != e; ++i) {
-    assert(isValidElementType(ETypes[i]) &&
-           "Invalid type for structure element!");
-    Key.push_back(ETypes[i]);
+  LLVMContextImpl *pImpl = Context.pImpl;
+  AnonStructTypeKeyInfo::KeyTy Key(ETypes, isPacked);
+  LLVMContextImpl::StructTypeMap::iterator I =
+    pImpl->AnonStructTypes.find_as(Key);
+  StructType *ST;
+
+  if (I == pImpl->AnonStructTypes.end()) {
+    // Value not found.  Create a new type!
+    ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+    ST->setSubclassData(SCDB_IsLiteral);  // Literal struct.
+    ST->setBody(ETypes, isPacked);
+    Context.pImpl->AnonStructTypes[ST] = true;
+  } else {
+    ST = I->first;
   }
-  if (isPacked)
-    Key.push_back(0);
-  
-  StructType *&ST = Context.pImpl->AnonStructTypes[Key];
-  if (ST) return ST;
-  
-  // Value not found.  Create a new type!
-  ST = new (Context.pImpl->TypeAllocator) StructType(Context);
-  ST->setSubclassData(SCDB_IsLiteral);  // Literal struct.
-  ST->setBody(ETypes, isPacked);
+
   return ST;
 }
 
@@ -403,13 +452,13 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
   setSubclassData(getSubclassData() | SCDB_HasBody);
   if (isPacked)
     setSubclassData(getSubclassData() | SCDB_Packed);
-  
-  Type **Elts = getContext().pImpl->
-    TypeAllocator.Allocate<Type*>(Elements.size());
-  memcpy(Elts, Elements.data(), sizeof(Elements[0])*Elements.size());
+
+  unsigned NumElements = Elements.size();
+  Type **Elts = getContext().pImpl->TypeAllocator.Allocate<Type*>(NumElements);
+  memcpy(Elts, Elements.data(), sizeof(Elements[0]) * NumElements);
   
   ContainedTys = Elts;
-  NumContainedTys = Elements.size();
+  NumContainedTys = NumElements;
 }
 
 void StructType::setName(StringRef Name) {
@@ -434,9 +483,10 @@ void StructType::setName(StringRef Name) {
     SmallString<64> TempStr(Name);
     TempStr.push_back('.');
     raw_svector_ostream TmpStream(TempStr);
+    unsigned NameSize = Name.size();
    
     do {
-      TempStr.resize(Name.size()+1);
+      TempStr.resize(NameSize + 1);
       TmpStream.resync();
       TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
       
@@ -520,6 +570,26 @@ StructType *StructType::create(StringRef Name, Type *type, ...) {
   return llvm::StructType::create(Ctx, StructFields, Name);
 }
 
+bool StructType::isSized() const {
+  if ((getSubclassData() & SCDB_IsSized) != 0)
+    return true;
+  if (isOpaque())
+    return false;
+
+  // Okay, our struct is sized if all of the elements are, but if one of the
+  // elements is opaque, the struct isn't sized *yet*, but may become sized in
+  // the future, so just bail out without caching.
+  for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+    if (!(*I)->isSized())
+      return false;
+
+  // Here we cheat a bit and cast away const-ness. The goal is to memoize when
+  // we find a sized type, as types can only move from opaque to sized, not the
+  // other way.
+  const_cast<StructType*>(this)->setSubclassData(
+    getSubclassData() | SCDB_IsSized);
+  return true;
+}
 
 StringRef StructType::getName() const {
   assert(!isLiteral() && "Literal structs never have names");
@@ -664,6 +734,8 @@ VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
 }
 
 bool VectorType::isValidElementType(Type *ElemTy) {
+  if (PointerType *PTy = dyn_cast<PointerType>(ElemTy))
+    ElemTy = PTy->getElementType();
   return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
 }
 
@@ -689,7 +761,12 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
 
 PointerType::PointerType(Type *E, unsigned AddrSpace)
   : SequentialType(PointerTyID, E) {
+#ifndef NDEBUG
+  const unsigned oldNCT = NumContainedTys;
+#endif
   setSubclassData(AddrSpace);
+  // Check for miscompile. PR11652.
+  assert(oldNCT == NumContainedTys && "bitfield written out of bounds?");
 }
 
 PointerType *Type::getPointerTo(unsigned addrs) {
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
index 359a1517ab79..0128adc3f776 100644
--- a/lib/VMCore/Use.cpp
+++ b/lib/VMCore/Use.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Value.h"
+#include <new>
 
 namespace llvm {
 
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
index f01fa349adfd..5f35ce4b9a4f 100644
--- a/lib/VMCore/User.cpp
+++ b/lib/VMCore/User.cpp
@@ -17,6 +17,8 @@ namespace llvm {
 //                                 User Class
 //===----------------------------------------------------------------------===//
 
+void User::anchor() {}
+
 // replaceUsesOfWith - Replaces all references to the "From" definition with
 // references to the "To" definition.
 //
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 2fa5f08a3e7f..4006b2c55418 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -66,7 +66,7 @@ Value::~Value() {
   // a <badref>
   //
   if (!use_empty()) {
-    dbgs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n";
+    dbgs() << "While deleting: " << *VTy << " %" << getName() << "\n";
     for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
       dbgs() << "Use still stuck around after Def is destroyed:"
            << **I << "\n";
@@ -76,7 +76,7 @@ Value::~Value() {
 
   // If this value is named, destroy the name.  This should not be in a symtab
   // at this point.
-  if (Name)
+  if (Name && SubclassID != MDStringVal)
     Name->Destroy();
 
   // There should be no uses of this object anymore, remove it.
@@ -108,6 +108,19 @@ bool Value::hasNUsesOrMore(unsigned N) const {
 /// isUsedInBasicBlock - Return true if this value is used in the specified
 /// basic block.
 bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+  // Start by scanning over the instructions looking for a use before we start
+  // the expensive use iteration.
+  unsigned MaxBlockSize = 3;
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    if (std::find(I->op_begin(), I->op_end(), this) != I->op_end())
+      return true;
+    if (MaxBlockSize-- == 0) // If the block is larger fall back to use_iterator
+      break;
+  }
+
+  if (MaxBlockSize != 0) // We scanned the entire block and found no use.
+    return false;
+
   for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
     const Instruction *User = dyn_cast<Instruction>(*I);
     if (User && User->getParent() == BB)
@@ -156,11 +169,10 @@ StringRef Value::getName() const {
   return Name->getKey();
 }
 
-std::string Value::getNameStr() const {
-  return getName().str();
-}
-
 void Value::setName(const Twine &NewName) {
+  assert(SubclassID != MDStringVal &&
+         "Cannot set the name of MDString with this method!");
+
   // Fast path for common IRBuilder case of setName("") when there is no name.
   if (NewName.isTriviallyEmpty() && !hasName())
     return;
@@ -219,6 +231,8 @@ void Value::setName(const Twine &NewName) {
 /// takeName - transfer the name from V to this value, setting V's name to
 /// empty.  It is an error to call V->takeName(V).
 void Value::takeName(Value *V) {
+  assert(SubclassID != MDStringVal && "Cannot take the name of an MDString!");
+
   ValueSymbolTable *ST = 0;
   // If this value has a name, drop it.
   if (hasName()) {
@@ -308,20 +322,40 @@ void Value::replaceAllUsesWith(Value *New) {
     BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
 }
 
-Value *Value::stripPointerCasts() {
-  if (!getType()->isPointerTy())
-    return this;
+namespace {
+// Various metrics for how much to strip off of pointers.
+enum PointerStripKind {
+  PSK_ZeroIndices,
+  PSK_InBoundsConstantIndices,
+  PSK_InBounds
+};
+
+template <PointerStripKind StripKind>
+static Value *stripPointerCastsAndOffsets(Value *V) {
+  if (!V->getType()->isPointerTy())
+    return V;
 
   // Even though we don't look through PHI nodes, we could be called on an
   // instruction in an unreachable block, which may be on a cycle.
   SmallPtrSet<Value *, 4> Visited;
 
-  Value *V = this;
   Visited.insert(V);
   do {
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-      if (!GEP->hasAllZeroIndices())
-        return V;
+      switch (StripKind) {
+      case PSK_ZeroIndices:
+        if (!GEP->hasAllZeroIndices())
+          return V;
+        break;
+      case PSK_InBoundsConstantIndices:
+        if (!GEP->hasAllConstantIndices())
+          return V;
+        // fallthrough
+      case PSK_InBounds:
+        if (!GEP->isInBounds())
+          return V;
+        break;
+      }
       V = GEP->getPointerOperand();
     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
       V = cast<Operator>(V)->getOperand(0);
@@ -337,10 +371,24 @@ Value *Value::stripPointerCasts() {
 
   return V;
 }
+} // namespace
+
+Value *Value::stripPointerCasts() {
+  return stripPointerCastsAndOffsets<PSK_ZeroIndices>(this);
+}
+
+Value *Value::stripInBoundsConstantOffsets() {
+  return stripPointerCastsAndOffsets<PSK_InBoundsConstantIndices>(this);
+}
+
+Value *Value::stripInBoundsOffsets() {
+  return stripPointerCastsAndOffsets<PSK_InBounds>(this);
+}
 
 /// isDereferenceablePointer - Test if this value is always a pointer to
 /// allocated and suitably aligned memory for a simple load or store.
-bool Value::isDereferenceablePointer() const {
+static bool isDereferenceablePointer(const Value *V,
+                                     SmallPtrSet<const Value *, 32> &Visited) {
   // Note that it is not safe to speculate into a malloc'd region because
   // malloc may return null.
   // It's also not always safe to follow a bitcast, for example:
@@ -349,20 +397,22 @@ bool Value::isDereferenceablePointer() const {
   // be handled using TargetData to check sizes and alignments though.
 
   // These are obviously ok.
-  if (isa<AllocaInst>(this)) return true;
+  if (isa<AllocaInst>(V)) return true;
 
   // Global variables which can't collapse to null are ok.
-  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
     return !GV->hasExternalWeakLinkage();
 
   // byval arguments are ok.
-  if (const Argument *A = dyn_cast<Argument>(this))
+  if (const Argument *A = dyn_cast<Argument>(V))
     return A->hasByValAttr();
-  
+
   // For GEPs, determine if the indexing lands within the allocated object.
-  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(this)) {
+  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
     // Conservatively require that the base pointer be fully dereferenceable.
-    if (!GEP->getOperand(0)->isDereferenceablePointer())
+    if (!Visited.insert(GEP->getOperand(0)))
+      return false;
+    if (!isDereferenceablePointer(GEP->getOperand(0), Visited))
       return false;
     // Check the indices.
     gep_type_iterator GTI = gep_type_begin(GEP);
@@ -396,6 +446,13 @@ bool Value::isDereferenceablePointer() const {
   return false;
 }
 
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+bool Value::isDereferenceablePointer() const {
+  SmallPtrSet<const Value *, 32> Visited;
+  return ::isDereferenceablePointer(this, Visited);
+}
+
 /// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
 /// return the value in the PHI node corresponding to PredBB.  If not, return
 /// ourself.  This is useful if you want to know the value something has in a
@@ -425,7 +482,7 @@ void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
   setPrevPtr(List);
   if (Next) {
     Next->setPrevPtr(&Next);
-    assert(VP == Next->VP && "Added to wrong list?");
+    assert(VP.getPointer() == Next->VP.getPointer() && "Added to wrong list?");
   }
 }
 
@@ -441,14 +498,14 @@ void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
 
 /// AddToUseList - Add this ValueHandle to the use list for VP.
 void ValueHandleBase::AddToUseList() {
-  assert(VP && "Null pointer doesn't have a use list!");
+  assert(VP.getPointer() && "Null pointer doesn't have a use list!");
 
-  LLVMContextImpl *pImpl = VP->getContext().pImpl;
+  LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl;
 
-  if (VP->HasValueHandle) {
+  if (VP.getPointer()->HasValueHandle) {
     // If this value already has a ValueHandle, then it must be in the
     // ValueHandles map already.
-    ValueHandleBase *&Entry = pImpl->ValueHandles[VP];
+    ValueHandleBase *&Entry = pImpl->ValueHandles[VP.getPointer()];
     assert(Entry != 0 && "Value doesn't have any handles?");
     AddToExistingUseList(&Entry);
     return;
@@ -462,10 +519,10 @@ void ValueHandleBase::AddToUseList() {
   DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
   const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
 
-  ValueHandleBase *&Entry = Handles[VP];
+  ValueHandleBase *&Entry = Handles[VP.getPointer()];
   assert(Entry == 0 && "Value really did already have handles?");
   AddToExistingUseList(&Entry);
-  VP->HasValueHandle = true;
+  VP.getPointer()->HasValueHandle = true;
 
   // If reallocation didn't happen or if this was the first insertion, don't
   // walk the table.
@@ -477,14 +534,16 @@ void ValueHandleBase::AddToUseList() {
   // Okay, reallocation did happen.  Fix the Prev Pointers.
   for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(),
        E = Handles.end(); I != E; ++I) {
-    assert(I->second && I->first == I->second->VP && "List invariant broken!");
+    assert(I->second && I->first == I->second->VP.getPointer() &&
+           "List invariant broken!");
     I->second->setPrevPtr(&I->second);
   }
 }
 
 /// RemoveFromUseList - Remove this ValueHandle from its current use list.
 void ValueHandleBase::RemoveFromUseList() {
-  assert(VP && VP->HasValueHandle && "Pointer doesn't have a use list!");
+  assert(VP.getPointer() && VP.getPointer()->HasValueHandle &&
+         "Pointer doesn't have a use list!");
 
   // Unlink this from its use list.
   ValueHandleBase **PrevPtr = getPrevPtr();
@@ -500,11 +559,11 @@ void ValueHandleBase::RemoveFromUseList() {
   // If the Next pointer was null, then it is possible that this was the last
   // ValueHandle watching VP.  If so, delete its entry from the ValueHandles
   // map.
-  LLVMContextImpl *pImpl = VP->getContext().pImpl;
+  LLVMContextImpl *pImpl = VP.getPointer()->getContext().pImpl;
   DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
   if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
-    Handles.erase(VP);
-    VP->HasValueHandle = false;
+    Handles.erase(VP.getPointer());
+    VP.getPointer()->HasValueHandle = false;
   }
 }
 
@@ -554,7 +613,7 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
   // All callbacks, weak references, and assertingVHs should be dropped by now.
   if (V->HasValueHandle) {
 #ifndef NDEBUG      // Only in +Asserts mode...
-    dbgs() << "While deleting: " << *V->getType() << " %" << V->getNameStr()
+    dbgs() << "While deleting: " << *V->getType() << " %" << V->getName()
            << "\n";
     if (pImpl->ValueHandles[V]->getKind() == Assert)
       llvm_unreachable("An asserting value handle still pointed to this"
@@ -617,8 +676,8 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
       case Tracking:
       case Weak:
         dbgs() << "After RAUW from " << *Old->getType() << " %"
-          << Old->getNameStr() << " to " << *New->getType() << " %"
-          << New->getNameStr() << "\n";
+               << Old->getName() << " to " << *New->getType() << " %"
+               << New->getName() << "\n";
         llvm_unreachable("A tracking or weak value handle still pointed to the"
                          " old value!\n");
       default:
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index e13bd7df7375..9a8e1859e2d4 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -87,8 +87,7 @@ unsigned EVT::getExtendedSizeInBits() const {
     return ITy->getBitWidth();
   if (VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
     return VTy->getBitWidth();
-  assert(false && "Unrecognized extended type!");
-  return 0; // Suppress warnings.
+  llvm_unreachable("Unrecognized extended type!");
 }
 
 /// getEVTString - This function returns value type as a string, e.g. "i32".
@@ -101,13 +100,13 @@ std::string EVT::getEVTString() const {
     if (isInteger())
       return "i" + utostr(getSizeInBits());
     llvm_unreachable("Invalid EVT!");
-    return "?";
   case MVT::i1:      return "i1";
   case MVT::i8:      return "i8";
   case MVT::i16:     return "i16";
   case MVT::i32:     return "i32";
   case MVT::i64:     return "i64";
   case MVT::i128:    return "i128";
+  case MVT::f16:     return "f16";
   case MVT::f32:     return "f32";
   case MVT::f64:     return "f64";
   case MVT::f80:     return "f80";
@@ -134,12 +133,13 @@ std::string EVT::getEVTString() const {
   case MVT::v4i64:   return "v4i64";
   case MVT::v8i64:   return "v8i64";
   case MVT::v2f32:   return "v2f32";
+  case MVT::v2f16:   return "v2f16";
   case MVT::v4f32:   return "v4f32";
   case MVT::v8f32:   return "v8f32";
   case MVT::v2f64:   return "v2f64";
   case MVT::v4f64:   return "v4f64";
   case MVT::Metadata:return "Metadata";
-  case MVT::untyped: return "untyped";
+  case MVT::Untyped: return "Untyped";
   }
 }
 
@@ -158,6 +158,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::i32:     return Type::getInt32Ty(Context);
   case MVT::i64:     return Type::getInt64Ty(Context);
   case MVT::i128:    return IntegerType::get(Context, 128);
+  case MVT::f16:     return Type::getHalfTy(Context);
   case MVT::f32:     return Type::getFloatTy(Context);
   case MVT::f64:     return Type::getDoubleTy(Context);
   case MVT::f80:     return Type::getX86_FP80Ty(Context);
@@ -180,6 +181,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
   case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
   case MVT::v8i64:   return VectorType::get(Type::getInt64Ty(Context), 8);
+  case MVT::v2f16:   return VectorType::get(Type::getHalfTy(Context), 2);
   case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
   case MVT::v4f32:   return VectorType::get(Type::getFloatTy(Context), 4);
   case MVT::v8f32:   return VectorType::get(Type::getFloatTy(Context), 8);
@@ -197,11 +199,11 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
   default:
     if (HandleUnknown) return MVT(MVT::Other);
     llvm_unreachable("Unknown type!");
-    return MVT::isVoid;
   case Type::VoidTyID:
     return MVT::isVoid;
   case Type::IntegerTyID:
     return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
+  case Type::HalfTyID:      return MVT(MVT::f16);
   case Type::FloatTyID:     return MVT(MVT::f32);
   case Type::DoubleTyID:    return MVT(MVT::f64);
   case Type::X86_FP80TyID:  return MVT(MVT::f80);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 9564b7d71f6a..96492e44d56f 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -1,4 +1,4 @@
-//===-- Verifier.cpp - Implement the Module Verifier -------------*- C++ -*-==//
+//===-- Verifier.cpp - Implement the Module Verifier -----------------------==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -51,6 +51,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
@@ -117,7 +118,6 @@ namespace {
   struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
     static char ID; // Pass ID, replacement for typeid
     bool Broken;          // Is this module found to be broken?
-    bool RealPass;        // Are we not being run by a PassManager?
     VerifierFailureAction action;
                           // What to do if verification fails.
     Module *Mod;          // Module we are verifying right now
@@ -143,13 +143,13 @@ namespace {
     const Value *PersonalityFn;
 
     Verifier()
-      : FunctionPass(ID), Broken(false), RealPass(true),
+      : FunctionPass(ID), Broken(false),
         action(AbortProcessAction), Mod(0), Context(0), DT(0),
         MessagesStr(Messages), PersonalityFn(0) {
       initializeVerifierPass(*PassRegistry::getPassRegistry());
     }
     explicit Verifier(VerifierFailureAction ctn)
-      : FunctionPass(ID), Broken(false), RealPass(true), action(ctn), Mod(0),
+      : FunctionPass(ID), Broken(false), action(ctn), Mod(0),
         Context(0), DT(0), MessagesStr(Messages), PersonalityFn(0) {
       initializeVerifierPass(*PassRegistry::getPassRegistry());
     }
@@ -158,17 +158,14 @@ namespace {
       Mod = &M;
       Context = &M.getContext();
 
-      // If this is a real pass, in a pass manager, we must abort before
-      // returning back to the pass manager, or else the pass manager may try to
-      // run other passes on the broken module.
-      if (RealPass)
-        return abortIfBroken();
-      return false;
+      // We must abort before returning back to the pass manager, or else the
+      // pass manager may try to run other passes on the broken module.
+      return abortIfBroken();
     }
 
     bool runOnFunction(Function &F) {
       // Get dominator information if we are being run by PassManager
-      if (RealPass) DT = &getAnalysis<DominatorTree>();
+      DT = &getAnalysis<DominatorTree>();
 
       Mod = F.getParent();
       if (!Context) Context = &F.getContext();
@@ -177,13 +174,9 @@ namespace {
       InstsInThisBlock.clear();
       PersonalityFn = 0;
 
-      // If this is a real pass, in a pass manager, we must abort before
-      // returning back to the pass manager, or else the pass manager may try to
-      // run other passes on the broken module.
-      if (RealPass)
-        return abortIfBroken();
-
-      return false;
+      // We must abort before returning back to the pass manager, or else the
+      // pass manager may try to run other passes on the broken module.
+      return abortIfBroken();
     }
 
     bool doFinalization(Module &M) {
@@ -214,8 +207,7 @@ namespace {
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
       AU.addRequiredID(PreVerifyID);
-      if (RealPass)
-        AU.addRequired<DominatorTree>();
+      AU.addRequired<DominatorTree>();
     }
 
     /// abortIfBroken - If the module is broken and we are supposed to abort on
@@ -225,7 +217,6 @@ namespace {
       if (!Broken) return false;
       MessagesStr << "Broken module found, ";
       switch (action) {
-      default: llvm_unreachable("Unknown action");
       case AbortProcessAction:
         MessagesStr << "compilation aborted!\n";
         dbgs() << MessagesStr.str();
@@ -239,6 +230,7 @@ namespace {
         MessagesStr << "compilation terminated.\n";
         return true;
       }
+      llvm_unreachable("Invalid action");
     }
 
 
@@ -279,6 +271,7 @@ namespace {
     void visitGetElementPtrInst(GetElementPtrInst &GEP);
     void visitLoadInst(LoadInst &LI);
     void visitStoreInst(StoreInst &SI);
+    void verifyDominatesUse(Instruction &I, unsigned i);
     void visitInstruction(Instruction &I);
     void visitTerminatorInst(TerminatorInst &I);
     void visitBranchInst(BranchInst &BI);
@@ -547,7 +540,7 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty,
   for (unsigned i = 0;
        i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
     Attributes MutI = Attrs & Attribute::MutuallyIncompatible[i];
-    Assert1(!(MutI & (MutI - 1)), "Attributes " +
+    Assert1(MutI.isEmptyOrSingleton(), "Attributes " +
             Attribute::getAsString(MutI) + " are incompatible!", V);
   }
 
@@ -607,7 +600,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
   for (unsigned i = 0;
        i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
     Attributes MutI = FAttrs & Attribute::MutuallyIncompatible[i];
-    Assert1(!(MutI & (MutI - 1)), "Attributes " +
+    Assert1(MutI.isEmptyOrSingleton(), "Attributes " +
             Attribute::getAsString(MutI) + " are incompatible!", V);
   }
 }
@@ -812,11 +805,11 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
   // have the same type as the switched-on value.
   Type *SwitchTy = SI.getCondition()->getType();
   SmallPtrSet<ConstantInt*, 32> Constants;
-  for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) {
-    Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
+  for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
+    Assert1(i.getCaseValue()->getType() == SwitchTy,
             "Switch constants must all be same type as switch value!", &SI);
-    Assert2(Constants.insert(SI.getCaseValue(i)),
-            "Duplicate integer as switch case", &SI, SI.getCaseValue(i));
+    Assert2(Constants.insert(i.getCaseValue()),
+            "Duplicate integer as switch case", &SI, i.getCaseValue());
   }
 
   visitTerminatorInst(SI);
@@ -1035,8 +1028,19 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
   Type *SrcTy = I.getOperand(0)->getType();
   Type *DestTy = I.getType();
 
-  Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
-  Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
+  Assert1(SrcTy->getScalarType()->isPointerTy(),
+          "PtrToInt source must be pointer", &I);
+  Assert1(DestTy->getScalarType()->isIntegerTy(),
+          "PtrToInt result must be integral", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "PtrToInt type mismatch", &I);
+
+  if (SrcTy->isVectorTy()) {
+    VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+    VectorType *VDest = dyn_cast<VectorType>(DestTy);
+    Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+          "PtrToInt Vector width mismatch", &I);
+  }
 
   visitInstruction(I);
 }
@@ -1046,9 +1050,18 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
   Type *SrcTy = I.getOperand(0)->getType();
   Type *DestTy = I.getType();
 
-  Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
-  Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
-
+  Assert1(SrcTy->getScalarType()->isIntegerTy(),
+          "IntToPtr source must be an integral", &I);
+  Assert1(DestTy->getScalarType()->isPointerTy(),
+          "IntToPtr result must be a pointer",&I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "IntToPtr type mismatch", &I);
+  if (SrcTy->isVectorTy()) {
+    VectorType *VSrc = dyn_cast<VectorType>(SrcTy);
+    VectorType *VDest = dyn_cast<VectorType>(DestTy);
+    Assert1(VSrc->getNumElements() == VDest->getNumElements(),
+          "IntToPtr Vector width mismatch", &I);
+  }
   visitInstruction(I);
 }
 
@@ -1245,7 +1258,7 @@ void Verifier::visitICmpInst(ICmpInst &IC) {
   Assert1(Op0Ty == Op1Ty,
           "Both operands to ICmp instruction are not of the same type!", &IC);
   // Check that the operands are the right type
-  Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
+  Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->getScalarType()->isPointerTy(),
           "Invalid operand types for ICmp instruction", &IC);
   // Check that the predicate is valid.
   Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
@@ -1295,17 +1308,41 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
 }
 
 void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
-  Assert1(cast<PointerType>(GEP.getOperand(0)->getType())
-            ->getElementType()->isSized(),
+  Type *TargetTy = GEP.getPointerOperandType()->getScalarType();
+
+  Assert1(isa<PointerType>(TargetTy),
+    "GEP base pointer is not a vector or a vector of pointers", &GEP);
+  Assert1(cast<PointerType>(TargetTy)->getElementType()->isSized(),
           "GEP into unsized type!", &GEP);
-  
+
   SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
   Type *ElTy =
-    GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(), Idxs);
+    GetElementPtrInst::getIndexedType(GEP.getPointerOperandType(), Idxs);
   Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
-  Assert2(GEP.getType()->isPointerTy() &&
-          cast<PointerType>(GEP.getType())->getElementType() == ElTy,
-          "GEP is not of right type for indices!", &GEP, ElTy);
+
+  if (GEP.getPointerOperandType()->isPointerTy()) {
+    // Validate GEPs with scalar indices.
+    Assert2(GEP.getType()->isPointerTy() &&
+           cast<PointerType>(GEP.getType())->getElementType() == ElTy,
+           "GEP is not of right type for indices!", &GEP, ElTy);
+  } else {
+    // Validate GEPs with a vector index.
+    Assert1(Idxs.size() == 1, "Invalid number of indices!", &GEP);
+    Value *Index = Idxs[0];
+    Type  *IndexTy = Index->getType();
+    Assert1(IndexTy->isVectorTy(),
+      "Vector GEP must have vector indices!", &GEP);
+    Assert1(GEP.getType()->isVectorTy(),
+      "Vector GEP must return a vector value", &GEP);
+    Type *ElemPtr = cast<VectorType>(GEP.getType())->getElementType();
+    Assert1(ElemPtr->isPointerTy(),
+      "Vector GEP pointer operand is not a pointer!", &GEP);
+    unsigned IndexWidth = cast<VectorType>(IndexTy)->getNumElements();
+    unsigned GepWidth = cast<VectorType>(GEP.getType())->getNumElements();
+    Assert1(IndexWidth == GepWidth, "Invalid GEP index vector width", &GEP);
+    Assert1(ElTy == cast<PointerType>(ElemPtr)->getElementType(),
+      "Vector GEP type does not match pointer type!", &GEP);
+  }
   visitInstruction(GEP);
 }
 
@@ -1324,6 +1361,25 @@ void Verifier::visitLoadInst(LoadInst &LI) {
     Assert1(LI.getSynchScope() == CrossThread,
             "Non-atomic load cannot have SynchronizationScope specified", &LI);
   }
+
+  if (MDNode *Range = LI.getMetadata(LLVMContext::MD_range)) {
+    unsigned NumOperands = Range->getNumOperands();
+    Assert1(NumOperands % 2 == 0, "Unfinished range!", Range);
+    unsigned NumRanges = NumOperands / 2;
+    Assert1(NumRanges >= 1, "It should have at least one range!", Range);
+    for (unsigned i = 0; i < NumRanges; ++i) {
+      ConstantInt *Low = dyn_cast<ConstantInt>(Range->getOperand(2*i));
+      Assert1(Low, "The lower limit must be an integer!", Low);
+      ConstantInt *High = dyn_cast<ConstantInt>(Range->getOperand(2*i + 1));
+      Assert1(High, "The upper limit must be an integer!", High);
+      Assert1(High->getType() == Low->getType() &&
+              High->getType() == ElTy, "Range types must match load type!",
+              &LI);
+      Assert1(High->getValue() != Low->getValue(), "Range must not be empty!",
+              Range);
+    }
+  }
+
   visitInstruction(LI);
 }
 
@@ -1468,6 +1524,58 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
   visitInstruction(LPI);
 }
 
+void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
+  Instruction *Op = cast<Instruction>(I.getOperand(i));
+  BasicBlock *BB = I.getParent();
+  BasicBlock *OpBlock = Op->getParent();
+  PHINode *PN = dyn_cast<PHINode>(&I);
+
+  // DT can handle non phi instructions for us.
+  if (!PN) {
+    // Definition must dominate use unless use is unreachable!
+    Assert2(InstsInThisBlock.count(Op) || !DT->isReachableFromEntry(BB) ||
+            DT->dominates(Op, &I),
+            "Instruction does not dominate all uses!", Op, &I);
+    return;
+  }
+
+  // Check that a definition dominates all of its uses.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
+    // Invoke results are only usable in the normal destination, not in the
+    // exceptional destination.
+    BasicBlock *NormalDest = II->getNormalDest();
+
+
+    // PHI nodes differ from other nodes because they actually "use" the
+    // value in the predecessor basic blocks they correspond to.
+    BasicBlock *UseBlock = BB;
+    unsigned j = PHINode::getIncomingValueNumForOperand(i);
+    UseBlock = PN->getIncomingBlock(j);
+    Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
+            Op, &I);
+
+    if (UseBlock == OpBlock) {
+      // Special case of a phi node in the normal destination or the unwind
+      // destination.
+      Assert2(BB == NormalDest || !DT->isReachableFromEntry(UseBlock),
+              "Invoke result not available in the unwind destination!",
+              Op, &I);
+    } else {
+      Assert2(DT->dominates(II, UseBlock) ||
+              !DT->isReachableFromEntry(UseBlock),
+              "Invoke result does not dominate all uses!", Op, &I);
+    }
+  }
+
+  // PHI nodes are more difficult than other nodes because they actually
+  // "use" the value in the predecessor basic blocks they correspond to.
+  unsigned j = PHINode::getIncomingValueNumForOperand(i);
+  BasicBlock *PredBB = PN->getIncomingBlock(j);
+  Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
+                     !DT->isReachableFromEntry(PredBB)),
+          "Instruction does not dominate all uses!", Op, &I);
+}
+
 /// verifyInstruction - Verify that an instruction is well formed.
 ///
 void Verifier::visitInstruction(Instruction &I) {
@@ -1536,84 +1644,30 @@ void Verifier::visitInstruction(Instruction &I) {
     } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
       Assert1(GV->getParent() == Mod, "Referencing global in another module!",
               &I);
-    } else if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i))) {
-      BasicBlock *OpBlock = Op->getParent();
-
-      // Check that a definition dominates all of its uses.
-      if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
-        // Invoke results are only usable in the normal destination, not in the
-        // exceptional destination.
-        BasicBlock *NormalDest = II->getNormalDest();
-
-        Assert2(NormalDest != II->getUnwindDest(),
-                "No uses of invoke possible due to dominance structure!",
-                Op, &I);
-
-        // PHI nodes differ from other nodes because they actually "use" the
-        // value in the predecessor basic blocks they correspond to.
-        BasicBlock *UseBlock = BB;
-        if (PHINode *PN = dyn_cast<PHINode>(&I)) {
-          unsigned j = PHINode::getIncomingValueNumForOperand(i);
-          UseBlock = PN->getIncomingBlock(j);
-        }
-        Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
-                Op, &I);
-
-        if (isa<PHINode>(I) && UseBlock == OpBlock) {
-          // Special case of a phi node in the normal destination or the unwind
-          // destination.
-          Assert2(BB == NormalDest || !DT->isReachableFromEntry(UseBlock),
-                  "Invoke result not available in the unwind destination!",
-                  Op, &I);
-        } else {
-          Assert2(DT->dominates(NormalDest, UseBlock) ||
-                  !DT->isReachableFromEntry(UseBlock),
-                  "Invoke result does not dominate all uses!", Op, &I);
-
-          // If the normal successor of an invoke instruction has multiple
-          // predecessors, then the normal edge from the invoke is critical,
-          // so the invoke value can only be live if the destination block
-          // dominates all of it's predecessors (other than the invoke).
-          if (!NormalDest->getSinglePredecessor() &&
-              DT->isReachableFromEntry(UseBlock))
-            // If it is used by something non-phi, then the other case is that
-            // 'NormalDest' dominates all of its predecessors other than the
-            // invoke.  In this case, the invoke value can still be used.
-            for (pred_iterator PI = pred_begin(NormalDest),
-                 E = pred_end(NormalDest); PI != E; ++PI)
-              if (*PI != II->getParent() && !DT->dominates(NormalDest, *PI) &&
-                  DT->isReachableFromEntry(*PI)) {
-                CheckFailed("Invoke result does not dominate all uses!", Op,&I);
-                return;
-              }
-        }
-      } else if (PHINode *PN = dyn_cast<PHINode>(&I)) {
-        // PHI nodes are more difficult than other nodes because they actually
-        // "use" the value in the predecessor basic blocks they correspond to.
-        unsigned j = PHINode::getIncomingValueNumForOperand(i);
-        BasicBlock *PredBB = PN->getIncomingBlock(j);
-        Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
-                           !DT->isReachableFromEntry(PredBB)),
-                "Instruction does not dominate all uses!", Op, &I);
-      } else {
-        if (OpBlock == BB) {
-          // If they are in the same basic block, make sure that the definition
-          // comes before the use.
-          Assert2(InstsInThisBlock.count(Op) || !DT->isReachableFromEntry(BB),
-                  "Instruction does not dominate all uses!", Op, &I);
-        }
-
-        // Definition must dominate use unless use is unreachable!
-        Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, &I) ||
-                !DT->isReachableFromEntry(BB),
-                "Instruction does not dominate all uses!", Op, &I);
-      }
+    } else if (isa<Instruction>(I.getOperand(i))) {
+      verifyDominatesUse(I, i);
     } else if (isa<InlineAsm>(I.getOperand(i))) {
       Assert1((i + 1 == e && isa<CallInst>(I)) ||
               (i + 3 == e && isa<InvokeInst>(I)),
               "Cannot take the address of an inline asm!", &I);
     }
   }
+
+  if (MDNode *MD = I.getMetadata(LLVMContext::MD_fpaccuracy)) {
+    Assert1(I.getType()->isFPOrFPVectorTy(),
+            "fpaccuracy requires a floating point result!", &I);
+    Assert1(MD->getNumOperands() == 1, "fpaccuracy takes one operand!", &I);
+    ConstantFP *Op = dyn_cast_or_null<ConstantFP>(MD->getOperand(0));
+    Assert1(Op, "fpaccuracy ULPs not a floating point number!", &I);
+    APFloat ULPs = Op->getValueAPF();
+    Assert1(ULPs.isNormal() || ULPs.isZero(),
+            "fpaccuracy ULPs not a normal number!", &I);
+    Assert1(!ULPs.isNegative(), "fpaccuracy ULPs is negative!", &I);
+  }
+
+  MDNode *MD = I.getMetadata(LLVMContext::MD_range);
+  Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I);
+
   InstsInThisBlock.insert(&I);
 }
 
@@ -1642,6 +1696,12 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
   switch (ID) {
   default:
     break;
+  case Intrinsic::ctlz:  // llvm.ctlz
+  case Intrinsic::cttz:  // llvm.cttz
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+            "is_zero_undef argument of bit counting intrinsics must be a "
+            "constant int", &CI);
+    break;
   case Intrinsic::dbg_declare: {  // llvm.dbg.declare
     Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
                 "invalid llvm.dbg.declare intrinsic call 1", &CI);
diff --git a/llvm.spec.in b/llvm.spec.in
index 9284d65d0077..0a3f6e807849 100644
--- a/llvm.spec.in
+++ b/llvm.spec.in
@@ -1,7 +1,7 @@
 Name: @PACKAGE_NAME@
 Version: @PACKAGE_VERSION@
 Release: 0
-Summary: The Low Level Virtual Machine (An Optimizing Compiler Infrastructure)
+Summary: LLVM (An Optimizing Compiler Infrastructure)
 License: University of Illinois/NCSA Open Source License
 Vendor: None (open source)
 Group: Development/Compilers
diff --git a/projects/LLVMBuild.txt b/projects/LLVMBuild.txt
new file mode 100644
index 000000000000..3c24d1a3bdca
--- /dev/null
+++ b/projects/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./projects/LLVMBuild.txt ---------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Group
+name = Projects
+parent = $ROOT
diff --git a/projects/sample/Makefile.common.in b/projects/sample/Makefile.common.in
index 33bfcd678809..758423b89599 100644
--- a/projects/sample/Makefile.common.in
+++ b/projects/sample/Makefile.common.in
@@ -1,7 +1,7 @@
 # Set the name of the project here
 PROJECT_NAME := sample
 PROJ_VERSION := 0.9
- 
+
 # Set this variable to the top of the LLVM source tree.
 LLVM_SRC_ROOT = @LLVM_SRC@
 
@@ -13,10 +13,14 @@ LLVM_OBJ_ROOT = @LLVM_OBJ@
 PROJ_SRC_ROOT := $(subst //,/,@abs_top_srcdir@)
 
 # Set the root directory of this project's object files
-PROJ_OBJ_ROOT := $(subst //,/,@abs_top_objdir@)
+PROJ_OBJ_ROOT := $(subst //,/,@abs_top_builddir@)
 
 # Set the root directory of this project's install prefix
 PROJ_INSTALL_ROOT := @prefix@
 
-# Include LLVM's Master Makefile.
-include $(LLVM_SRC_ROOT)/Makefile.common
+# Configuration file to set paths specific to local installation of LLVM
+include $(PROJ_OBJ_ROOT)/Makefile.llvm.config
+
+# Include all of the build rules used for making LLVM
+include $(PROJ_SRC_ROOT)/Makefile.llvm.rules
+
diff --git a/projects/sample/Makefile.llvm.config.in b/projects/sample/Makefile.llvm.config.in
new file mode 100644
index 000000000000..697660c0caab
--- /dev/null
+++ b/projects/sample/Makefile.llvm.config.in
@@ -0,0 +1,313 @@
+#===-- Makefile.config - Local configuration for LLVM ------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+#
+# This file is included by Makefile.common.  It defines paths and other
+# values specific to a particular installation of LLVM.
+#
+#===------------------------------------------------------------------------===#
+
+# Define LLVM specific info and directories based on the autoconf variables
+LLVMVersion       := @LLVM_VERSION@
+
+###########################################################################
+# Directory Configuration
+#	This section of the Makefile determines what is where.  To be
+#	specific, there are several locations that need to be defined:
+#
+#	o LLVM_SRC_ROOT  : The root directory of the LLVM source code.
+#	o LLVM_OBJ_ROOT  : The root directory containing the built LLVM code.
+#
+#	o PROJ_SRC_DIR  : The directory containing the code to build.
+#	o PROJ_SRC_ROOT : The root directory of the code to build.
+#
+#	o PROJ_OBJ_DIR  : The directory in which compiled code will be placed.
+#	o PROJ_OBJ_ROOT : The root directory in which compiled code is placed.
+#
+###########################################################################
+
+PWD := @BINPWD@
+
+# The macro below is expanded when 'realpath' is not built-in.
+# Built-in 'realpath' is available on GNU Make 3.81.
+realpath = $(shell cd $(1); $(PWD))
+
+PROJ_OBJ_DIR  := $(call realpath, .)
+PROJ_OBJ_ROOT := $(call realpath, $(PROJ_OBJ_DIR)/$(LEVEL))
+
+ifndef PROJ_SRC_ROOT
+$(error Projects must define PROJ_SRC_ROOT)
+endif
+ifndef PROJ_OBJ_ROOT
+$(error Projects must define PROJ_OBJ_ROOT)
+endif
+ifndef PROJ_INSTALL_ROOT
+$(error Projects must define PROJ_INSTALL_ROOT)
+endif
+ifndef LLVM_SRC_ROOT
+$(error Projects must define LLVM_SRC_ROOT)
+endif
+ifndef LLVM_OBJ_ROOT
+$(error Projects must define LLVM_OBJ_ROOT)
+endif
+PROJ_SRC_DIR := $(call realpath, $(PROJ_SRC_ROOT)/$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR)))
+prefix          := $(PROJ_INSTALL_ROOT)
+PROJ_prefix     := $(prefix)
+ifndef PROJ_VERSION
+PROJ_VERSION := 1.0
+endif
+
+PROJ_bindir     := $(PROJ_prefix)/bin
+PROJ_libdir     := $(PROJ_prefix)/lib
+PROJ_datadir    := $(PROJ_prefix)/share
+PROJ_docsdir    := $(PROJ_prefix)/docs/llvm
+PROJ_etcdir     := $(PROJ_prefix)/etc/llvm
+PROJ_includedir := $(PROJ_prefix)/include
+PROJ_infodir    := $(PROJ_prefix)/info
+PROJ_mandir     := $(PROJ_prefix)/share/man
+
+# Determine if we're on a unix type operating system
+LLVM_ON_UNIX:=@LLVM_ON_UNIX@
+LLVM_ON_WIN32:=@LLVM_ON_WIN32@
+
+# Host operating system for which LLVM will be run.
+OS=@OS@
+HOST_OS=@HOST_OS@
+# Target operating system for which LLVM will compile for.
+TARGET_OS=@TARGET_OS@
+
+# Target hardware architecture
+ARCH=@ARCH@
+
+# Indicates, whether we're cross-compiling LLVM or not
+LLVM_CROSS_COMPILING=@LLVM_CROSS_COMPILING@
+
+# Executable file extension for build platform (mainly for
+# tablegen call if we're cross-compiling).
+BUILD_EXEEXT=@BUILD_EXEEXT@
+
+# Compilers for the build platflorm (mainly for tablegen
+# call if we're cross-compiling).
+BUILD_CC=@BUILD_CC@
+BUILD_CXX=@BUILD_CXX@
+
+# Triple for configuring build tools when cross-compiling
+BUILD_TRIPLE=@build@
+
+# Target triple (cpu-vendor-os) for which we should generate code
+TARGET_TRIPLE=@target@
+
+# Extra options to compile LLVM with
+EXTRA_OPTIONS=@EXTRA_OPTIONS@
+
+# Extra options to link LLVM with
+EXTRA_LD_OPTIONS=@EXTRA_LD_OPTIONS@
+
+# Endian-ness of the target
+ENDIAN=@ENDIAN@
+
+# Path to the C++ compiler to use.  This is an optional setting, which defaults
+# to whatever your gmake defaults to.
+CXX = @CXX@
+
+# Path to the CC binary, which use used by testcases for native builds.
+CC := @CC@
+
+# Linker flags.
+LDFLAGS+=@LDFLAGS@
+
+# Path to the library archiver program.
+AR_PATH = @AR@
+AR = @AR@
+
+# Path to the nm program
+NM_PATH = @NM@
+
+# The pathnames of the programs we require to build
+CMP        := @CMP@
+CP         := @CP@
+DATE       := @DATE@
+FIND       := @FIND@
+GREP       := @GREP@
+INSTALL    := @INSTALL@
+MKDIR      := $(PROJ_SRC_ROOT)/autoconf/mkinstalldirs
+MV         := @MV@
+RANLIB     := @RANLIB@
+RM         := @RM@
+SED        := @SED@
+TAR        := @TAR@
+
+# Paths to miscellaneous programs we hope are present but might not be
+BZIP2      := @BZIP2@
+CAT        := @CAT@
+DOT        := @DOT@
+DOXYGEN    := @DOXYGEN@
+GROFF      := @GROFF@
+GZIPBIN    := @GZIPBIN@
+OCAMLC     := @OCAMLC@
+OCAMLOPT   := @OCAMLOPT@
+OCAMLDEP   := @OCAMLDEP@
+OCAMLDOC   := @OCAMLDOC@
+GAS        := @GAS@
+POD2HTML   := @POD2HTML@
+POD2MAN    := @POD2MAN@
+PDFROFF    := @PDFROFF@
+RUNTEST    := @RUNTEST@
+TCLSH      := @TCLSH@
+ZIP        := @ZIP@
+
+HAVE_PTHREAD := @HAVE_PTHREAD@
+
+LIBS       := @LIBS@
+
+# Targets that we should build
+TARGETS_TO_BUILD=@TARGETS_TO_BUILD@
+
+# Path to directory where object files should be stored during a build.
+# Set OBJ_ROOT to "." if you do not want to use a separate place for
+# object files.
+OBJ_ROOT := .
+
+# What to pass as rpath flag to g++
+RPATH := @RPATH@
+
+# What to pass as -rdynamic flag to g++
+RDYNAMIC := @RDYNAMIC@
+
+# These are options that can either be enabled here, or can be enabled on the
+# make command line (ie, make ENABLE_PROFILING=1):
+
+# When ENABLE_LIBCPP is enabled, LLVM uses libc++ by default to build.
+#ENABLE_LIBCPP = 0
+ENABLE_LIBCPP = @ENABLE_LIBCPP@
+
+# When ENABLE_OPTIMIZED is enabled, LLVM code is optimized and output is put
+# into the "Release" directories. Otherwise, LLVM code is not optimized and
+# output is put in the "Debug" directories.
+#ENABLE_OPTIMIZED = 1
+@ENABLE_OPTIMIZED@
+
+# When ENABLE_PROFILING is enabled, profile instrumentation is done
+# and output is put into the "<Flavor>+Profile" directories, where
+# <Flavor> is either Debug or Release depending on how other build
+# flags are set. Otherwise, output is put in the <Flavor>
+# directories.
+#ENABLE_PROFILING = 1
+@ENABLE_PROFILING@
+
+# When DISABLE_ASSERTIONS is enabled, builds of all of the LLVM code will
+# exclude assertion checks, otherwise they are included.
+#DISABLE_ASSERTIONS = 1
+@DISABLE_ASSERTIONS@
+
+# When ENABLE_EXPENSIVE_CHECKS is enabled, builds of all of the LLVM
+# code will include expensive checks, otherwise they are excluded.
+#ENABLE_EXPENSIVE_CHECKS = 0
+@ENABLE_EXPENSIVE_CHECKS@
+
+# When DEBUG_RUNTIME is enabled, the runtime libraries will retain debug
+# symbols.
+#DEBUG_RUNTIME = 1
+@DEBUG_RUNTIME@
+
+# When DEBUG_SYMBOLS is enabled, the compiler libraries will retain debug
+# symbols.
+#DEBUG_SYMBOLS = 1
+@DEBUG_SYMBOLS@
+
+# The compiler flags to use for optimized builds.
+OPTIMIZE_OPTION := @OPTIMIZE_OPTION@
+
+# When ENABLE_PROFILING is enabled, the llvm source base is built with profile
+# information to allow gprof to be used to get execution frequencies.
+#ENABLE_PROFILING = 1
+
+# When ENABLE_DOCS is disabled, docs/ will not be built.
+ENABLE_DOCS = @ENABLE_DOCS@
+
+# When ENABLE_DOXYGEN is enabled, the doxygen documentation will be built
+ENABLE_DOXYGEN = @ENABLE_DOXYGEN@
+
+# Do we want to enable threads?
+ENABLE_THREADS := @ENABLE_THREADS@
+
+# Do we want to build with position independent code?
+ENABLE_PIC := @ENABLE_PIC@
+
+# Do we want to build a shared library and link the tools with it?
+ENABLE_SHARED := @ENABLE_SHARED@
+
+# Do we want to link the stdc++ into a shared library? (Cygming)
+ENABLE_EMBED_STDCXX := @ENABLE_EMBED_STDCXX@
+
+# Use -fvisibility-inlines-hidden?
+ENABLE_VISIBILITY_INLINES_HIDDEN := @ENABLE_VISIBILITY_INLINES_HIDDEN@
+
+# Do we want to allow timestamping information into builds?
+ENABLE_TIMESTAMPS := @ENABLE_TIMESTAMPS@
+
+# This option tells the Makefiles to produce verbose output.
+# It essentially prints the commands that make is executing
+#VERBOSE = 1
+
+# Enable JIT for this platform
+TARGET_HAS_JIT = @TARGET_HAS_JIT@
+
+# Environment variable to set to change the runtime shared library search path.
+SHLIBPATH_VAR = @SHLIBPATH_VAR@
+
+# Shared library extension for host platform.
+SHLIBEXT = @SHLIBEXT@
+
+# Executable file extension for host platform.
+EXEEXT = @EXEEXT@
+
+# Things we just assume are "there"
+ECHO := echo
+
+# Get the options for causing archives to link all their content instead of
+# just missing symbols, and the inverse of that. This is used for certain LLVM
+# tools that permit loadable modules. It ensures that the LLVM symbols will be
+# available to those loadable modules.
+LINKALL := @LINKALL@
+NOLINKALL := @NOLINKALL@
+
+# Get the value of HUGE_VAL_SANITY which will be either "yes" or "no" depending
+# on the check.
+HUGE_VAL_SANITY = @HUGE_VAL_SANITY@
+
+# Bindings that we should build
+BINDINGS_TO_BUILD := @BINDINGS_TO_BUILD@
+ALL_BINDINGS      := @ALL_BINDINGS@
+OCAML_LIBDIR      := @OCAML_LIBDIR@
+
+# When compiling under Mingw/Cygwin, executables such as tblgen
+# expect Windows paths, whereas the build system uses Unix paths.
+# The function SYSPATH transforms Unix paths into Windows paths.
+ifneq (,$(findstring -mno-cygwin, $(CXX)))
+  SYSPATH = $(shell echo $(1) | cygpath -m -f -)
+else
+  SYSPATH = $(1)
+endif
+
+# Location of the plugin header file for gold.
+BINUTILS_INCDIR := @BINUTILS_INCDIR@
+
+# Optional flags supported by the compiler
+# -Wno-missing-field-initializers
+NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@
+# -Wno-variadic-macros
+NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@
+# -Wcovered-switch-default
+COVERED_SWITCH_DEFAULT = @COVERED_SWITCH_DEFAULT@
+
+# Was polly found in tools/polly?
+LLVM_HAS_POLLY = @LLVM_HAS_POLLY@
+# Flags supported by the linker.
+# bfd ld / gold --version-script=file
+HAVE_LINK_VERSION_SCRIPT = @HAVE_LINK_VERSION_SCRIPT@
diff --git a/projects/sample/Makefile.llvm.rules b/projects/sample/Makefile.llvm.rules
new file mode 100644
index 000000000000..6e047247c616
--- /dev/null
+++ b/projects/sample/Makefile.llvm.rules
@@ -0,0 +1,2250 @@
+#===-- Makefile.rules - Common make rules for LLVM ---------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+#
+# This file is included by all of the LLVM makefiles.  For details on how to use
+# it properly, please see the document MakefileGuide.html in the docs directory.
+#
+#===-----------------------------------------------------------------------====#
+
+################################################################################
+# TARGETS: Define standard targets that can be invoked
+################################################################################
+
+#--------------------------------------------------------------------
+# Define the various target sets
+#--------------------------------------------------------------------
+RecursiveTargets := all clean clean-all install uninstall install-bytecode \
+                    unitcheck
+LocalTargets     := all-local clean-local clean-all-local check-local \
+                    install-local printvars uninstall-local \
+		    install-bytecode-local
+TopLevelTargets  := check dist dist-check dist-clean dist-gzip dist-bzip2 \
+                    dist-zip unittests
+UserTargets      := $(RecursiveTargets) $(LocalTargets) $(TopLevelTargets)
+InternalTargets  := preconditions distdir dist-hook
+
+################################################################################
+# INITIALIZATION: Basic things the makefile needs
+################################################################################
+
+#--------------------------------------------------------------------
+# Set the VPATH so that we can find source files.
+#--------------------------------------------------------------------
+VPATH=$(PROJ_SRC_DIR)
+
+#--------------------------------------------------------------------
+# Reset the list of suffixes we know how to build.
+#--------------------------------------------------------------------
+.SUFFIXES:
+.SUFFIXES: .c .cpp .cc .h .hpp .o .a .bc .td .ps .dot .ll .m .mm
+.SUFFIXES: $(SHLIBEXT) $(SUFFIXES)
+
+#--------------------------------------------------------------------
+# Mark all of these targets as phony to avoid implicit rule search
+#--------------------------------------------------------------------
+.PHONY: $(UserTargets) $(InternalTargets)
+
+#--------------------------------------------------------------------
+# Make sure all the user-target rules are double colon rules and
+# they are defined first.
+#--------------------------------------------------------------------
+
+$(UserTargets)::
+
+################################################################################
+# PRECONDITIONS: that which must be built/checked first
+################################################################################
+
+SrcMakefiles       := $(filter %Makefile %Makefile.tests,\
+                      $(wildcard $(PROJ_SRC_DIR)/Makefile*))
+ObjMakefiles       := $(subst $(PROJ_SRC_DIR),$(PROJ_OBJ_DIR),$(SrcMakefiles))
+ConfigureScript    := $(PROJ_SRC_ROOT)/configure
+ConfigStatusScript := $(PROJ_OBJ_ROOT)/config.status
+MakefileConfigIn   := $(strip $(wildcard $(PROJ_SRC_ROOT)/Makefile.config.in))
+MakefileCommonIn   := $(strip $(wildcard $(PROJ_SRC_ROOT)/Makefile.common.in))
+MakefileConfig     := $(PROJ_OBJ_ROOT)/Makefile.config
+MakefileCommon     := $(PROJ_OBJ_ROOT)/Makefile.common
+PreConditions      := $(ConfigStatusScript) $(ObjMakefiles)
+ifneq ($(MakefileCommonIn),)
+PreConditions      += $(MakefileCommon)
+endif
+
+ifneq ($(MakefileConfigIn),)
+PreConditions      += $(MakefileConfig)
+endif
+
+preconditions: $(PreConditions)
+
+#------------------------------------------------------------------------
+# Make sure the BUILT_SOURCES are built first
+#------------------------------------------------------------------------
+$(filter-out clean clean-local,$(UserTargets)):: $(BUILT_SOURCES)
+
+clean-all-local::
+ifneq ($(strip $(BUILT_SOURCES)),)
+	-$(Verb) $(RM) -f $(BUILT_SOURCES)
+endif
+
+ifneq ($(PROJ_OBJ_ROOT),$(PROJ_SRC_ROOT))
+spotless:
+	$(Verb) if test -x config.status ; then \
+	  $(EchoCmd) Wiping out $(PROJ_OBJ_ROOT) ; \
+	  $(MKDIR) .spotless.save ; \
+	  $(MV) config.status .spotless.save ; \
+	  $(MV) mklib  .spotless.save ; \
+	  $(MV) projects  .spotless.save ; \
+	  $(RM) -rf * ; \
+	  $(MV) .spotless.save/config.status . ; \
+	  $(MV) .spotless.save/mklib . ; \
+	  $(MV) .spotless.save/projects . ; \
+	  $(RM) -rf .spotless.save ; \
+	  $(EchoCmd) Rebuilding configuration of $(PROJ_OBJ_ROOT) ; \
+	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
+	  $(ConfigStatusScript) ; \
+	else \
+	  $(EchoCmd) "make spotless" can only be run from $(PROJ_OBJ_ROOT); \
+	fi
+else
+spotless:
+	$(EchoCmd) "spotless target not supported for objdir == srcdir"
+endif
+
+$(BUILT_SOURCES) : $(ObjMakefiles)
+
+#------------------------------------------------------------------------
+# Make sure we're not using a stale configuration
+#------------------------------------------------------------------------
+reconfigure:
+	$(Echo) Reconfiguring $(PROJ_OBJ_ROOT)
+	$(Verb) cd $(PROJ_OBJ_ROOT) && \
+	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
+	  $(ConfigStatusScript)
+
+.PRECIOUS: $(ConfigStatusScript)
+$(ConfigStatusScript): $(ConfigureScript)
+	$(Echo) Reconfiguring with $<
+	$(Verb) cd $(PROJ_OBJ_ROOT) && \
+	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
+	  $(ConfigStatusScript)
+
+#------------------------------------------------------------------------
+# Make sure the configuration makefile is up to date
+#------------------------------------------------------------------------
+ifneq ($(MakefileConfigIn),)
+$(MakefileConfig): $(MakefileConfigIn) $(ConfigStatusScript)
+	$(Echo) Regenerating $@
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(ConfigStatusScript) Makefile.config
+endif
+
+ifneq ($(MakefileCommonIn),)
+$(MakefileCommon): $(MakefileCommonIn) $(ConfigStatusScript)
+	$(Echo) Regenerating $@
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(ConfigStatusScript) Makefile.common
+endif
+
+#------------------------------------------------------------------------
+# If the Makefile in the source tree has been updated, copy it over into the
+# build tree. But, only do this if the source and object makefiles differ
+#------------------------------------------------------------------------
+ifndef PROJ_MAKEFILE
+PROJ_MAKEFILE := $(PROJ_SRC_DIR)/Makefile
+endif
+
+ifneq ($(PROJ_OBJ_DIR),$(PROJ_SRC_DIR))
+
+Makefile: $(PROJ_MAKEFILE) $(ExtraMakefiles)
+	$(Echo) "Updating Makefile"
+	$(Verb) $(MKDIR) $(@D)
+	$(Verb) $(CP) -f $< $@
+
+# Copy the Makefile.* files unless we're in the root directory which avoids
+# the copying of Makefile.config.in or other things that should be explicitly
+# taken care of.
+$(PROJ_OBJ_DIR)/Makefile% : $(PROJ_MAKEFILE)%
+	@case '$?' in \
+          *Makefile.rules) ;; \
+          *.in) ;; \
+          *) $(EchoCmd) "Updating $(@F)" ; \
+	     $(MKDIR) $(@D) ; \
+	     $(CP) -f $< $@ ;; \
+	esac
+
+endif
+
+#------------------------------------------------------------------------
+# Set up the basic dependencies
+#------------------------------------------------------------------------
+$(UserTargets):: $(PreConditions)
+
+all:: all-local
+clean:: clean-local
+clean-all:: clean-local clean-all-local
+install:: install-local
+uninstall:: uninstall-local
+install-local:: all-local
+install-bytecode:: install-bytecode-local
+
+###############################################################################
+# VARIABLES: Set up various variables based on configuration data
+###############################################################################
+
+# Variable for if this make is for a "cleaning" target
+ifneq ($(strip $(filter clean clean-local dist-clean,$(MAKECMDGOALS))),)
+  IS_CLEANING_TARGET=1
+endif
+
+#--------------------------------------------------------------------
+# Variables derived from configuration we are building
+#--------------------------------------------------------------------
+
+CPP.Defines :=
+ifeq ($(ENABLE_OPTIMIZED),1)
+  BuildMode := Release
+  # Don't use -fomit-frame-pointer on Darwin or FreeBSD.
+  ifneq ($(HOST_OS),FreeBSD)
+  ifneq ($(HOST_OS),Darwin)
+    OmitFramePointer := -fomit-frame-pointer
+  endif
+  endif
+
+  # Darwin requires -fstrict-aliasing to be explicitly enabled.
+  # Avoid -fstrict-aliasing on Darwin for now, there are unresolved issues
+  # with -fstrict-aliasing and ipa-type-escape radr://6756684
+  #ifeq ($(HOST_OS),Darwin)
+  #  EXTRA_OPTIONS += -fstrict-aliasing -Wstrict-aliasing
+  #endif
+  CXX.Flags += $(OPTIMIZE_OPTION) $(OmitFramePointer)
+  C.Flags   += $(OPTIMIZE_OPTION) $(OmitFramePointer)
+  LD.Flags  += $(OPTIMIZE_OPTION)
+  ifdef DEBUG_SYMBOLS
+    BuildMode := $(BuildMode)+Debug
+    CXX.Flags += -g
+    C.Flags   += -g
+    LD.Flags  += -g
+    KEEP_SYMBOLS := 1
+  endif
+else
+  ifdef NO_DEBUG_SYMBOLS
+    BuildMode := Unoptimized
+    CXX.Flags +=
+    C.Flags   +=
+    LD.Flags  +=
+    KEEP_SYMBOLS := 1
+  else
+    BuildMode := Debug
+    CXX.Flags += -g
+    C.Flags   += -g
+    LD.Flags  += -g
+    KEEP_SYMBOLS := 1
+  endif
+endif
+
+ifeq ($(ENABLE_LIBCPP),1)
+  CXX.Flags +=  -stdlib=libc++
+  LD.Flags +=  -stdlib=libc++
+endif
+
+ifeq ($(ENABLE_PROFILING),1)
+  BuildMode := $(BuildMode)+Profile
+  CXX.Flags := $(filter-out -fomit-frame-pointer,$(CXX.Flags)) -pg -g
+  C.Flags   := $(filter-out -fomit-frame-pointer,$(C.Flags)) -pg -g
+  LD.Flags  := $(filter-out -fomit-frame-pointer,$(LD.Flags)) -pg -g
+  KEEP_SYMBOLS := 1
+endif
+
+#ifeq ($(ENABLE_VISIBILITY_INLINES_HIDDEN),1)
+#    CXX.Flags += -fvisibility-inlines-hidden
+#endif
+
+ifdef ENABLE_EXPENSIVE_CHECKS
+  # GNU libstdc++ uses RTTI if you define _GLIBCXX_DEBUG, which we did above.
+  # See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40160
+  REQUIRES_RTTI := 1
+endif
+
+# IF REQUIRES_EH=1 is specified then don't disable exceptions
+ifndef REQUIRES_EH
+  CXX.Flags += -fno-exceptions
+else
+  # If the library requires EH, it also requires RTTI.
+  REQUIRES_RTTI := 1
+endif
+
+ifdef REQUIRES_FRAME_POINTER
+  CXX.Flags := $(filter-out -fomit-frame-pointer,$(CXX.Flags))
+  C.Flags   := $(filter-out -fomit-frame-pointer,$(C.Flags))
+  LD.Flags  := $(filter-out -fomit-frame-pointer,$(LD.Flags))
+endif
+
+# If REQUIRES_RTTI=1 is specified then don't disable run-time type id.
+ifneq ($(REQUIRES_RTTI), 1)
+  CXX.Flags += -fno-rtti
+endif
+
+ifeq ($(ENABLE_COVERAGE),1)
+  BuildMode := $(BuildMode)+Coverage
+  CXX.Flags += -ftest-coverage -fprofile-arcs
+  C.Flags   += -ftest-coverage -fprofile-arcs
+endif
+
+# If DISABLE_ASSERTIONS=1 is specified (make command line or configured),
+# then disable assertions by defining the appropriate preprocessor symbols.
+ifeq ($(DISABLE_ASSERTIONS),1)
+  CPP.Defines += -DNDEBUG
+else
+  BuildMode := $(BuildMode)+Asserts
+  CPP.Defines += -D_DEBUG
+endif
+
+# If ENABLE_EXPENSIVE_CHECKS=1 is specified (make command line or
+# configured), then enable expensive checks by defining the
+# appropriate preprocessor symbols.
+ifeq ($(ENABLE_EXPENSIVE_CHECKS),1)
+  BuildMode := $(BuildMode)+Checks
+  CPP.Defines += -D_GLIBCXX_DEBUG -DXDEBUG
+endif
+
+# LOADABLE_MODULE implies several other things so we force them to be
+# defined/on.
+ifdef LOADABLE_MODULE
+  SHARED_LIBRARY := 1
+  LINK_LIBS_IN_SHARED := 1
+endif
+
+ifdef SHARED_LIBRARY
+  ENABLE_PIC := 1
+  PIC_FLAG = "(PIC)"
+endif
+
+ifeq ($(ENABLE_PIC),1)
+  ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+    # Nothing. Win32 defaults to PIC and warns when given -fPIC
+  else
+    ifeq ($(HOST_OS),Darwin)
+      # Common symbols not allowed in dylib files
+      CXX.Flags += -fno-common
+      C.Flags   += -fno-common
+    else
+      # Linux and others; pass -fPIC
+      CXX.Flags += -fPIC
+      C.Flags   += -fPIC
+    endif
+  endif
+else
+  ifeq ($(HOST_OS),Darwin)
+      CXX.Flags += -mdynamic-no-pic
+      C.Flags   += -mdynamic-no-pic
+  endif
+endif
+
+# Support makefile variable to disable any kind of timestamp/non-deterministic
+# info from being used in the build.
+ifeq ($(ENABLE_TIMESTAMPS),1)
+  DOTDIR_TIMESTAMP_COMMAND := $(DATE)
+else
+  DOTDIR_TIMESTAMP_COMMAND := echo 'Created.'
+endif
+
+ifeq ($(HOST_OS),MingW)
+  # Work around PR4957
+  CPP.Defines += -D__NO_CTYPE_INLINE
+  ifeq ($(LLVM_CROSS_COMPILING),1)
+    # Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=525016
+    ifdef TOOLNAME
+      LD.Flags += -Wl,--allow-multiple-definition
+    endif
+  endif
+endif
+
+CXX.Flags     += -Woverloaded-virtual
+CPP.BaseFlags += $(CPP.Defines)
+AR.Flags      := cru
+
+# Make Floating point IEEE compliant on Alpha.
+ifeq ($(ARCH),Alpha)
+  CXX.Flags     += -mieee
+  CPP.BaseFlags += -mieee
+ifeq ($(ENABLE_PIC),0)
+  CXX.Flags     += -fPIC
+  CPP.BaseFlags += -fPIC
+endif
+
+  LD.Flags += -Wl,--no-relax
+endif
+
+# GNU ld/PECOFF accepts but ignores them below;
+#   --version-script
+#   --export-dynamic
+#   --rpath
+# FIXME: autoconf should be aware of them.
+ifneq (,$(filter $(HOST_OS),Cygwin MingW))
+  HAVE_LINK_VERSION_SCRIPT := 0
+  RPATH :=
+  RDYNAMIC := -Wl,--export-all-symbols
+endif
+
+#--------------------------------------------------------------------
+# Directory locations
+#--------------------------------------------------------------------
+TargetMode :=
+ifeq ($(LLVM_CROSS_COMPILING),1)
+  BuildLLVMToolDir := $(LLVM_OBJ_ROOT)/BuildTools/$(BuildMode)/bin
+endif
+
+ObjRootDir  := $(PROJ_OBJ_DIR)/$(BuildMode)
+ObjDir      := $(ObjRootDir)
+LibDir      := $(PROJ_OBJ_ROOT)/$(BuildMode)/lib
+ToolDir     := $(PROJ_OBJ_ROOT)/$(BuildMode)/bin
+ExmplDir    := $(PROJ_OBJ_ROOT)/$(BuildMode)/examples
+LLVMLibDir  := $(LLVM_OBJ_ROOT)/$(BuildMode)/lib
+LLVMToolDir := $(LLVM_OBJ_ROOT)/$(BuildMode)/bin
+LLVMExmplDir:= $(LLVM_OBJ_ROOT)/$(BuildMode)/examples
+
+#--------------------------------------------------------------------
+# Locations of shared libraries
+#--------------------------------------------------------------------
+
+SharedPrefix     := lib
+SharedLibDir     := $(LibDir)
+LLVMSharedLibDir := $(LLVMLibDir)
+
+# Win32.DLL prefers to be located on the "PATH" of binaries.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+  SharedLibDir     := $(ToolDir)
+  LLVMSharedLibDir := $(LLVMToolDir)
+
+  ifeq ($(HOST_OS),Cygwin)
+    SharedPrefix  := cyg
+  else
+    SharedPrefix  :=
+  endif
+endif
+
+#--------------------------------------------------------------------
+# LLVM Capable Compiler
+#--------------------------------------------------------------------
+
+ifneq ($(findstring llvm-gcc,$(LLVMCC_OPTION)),)
+  LLVMCC := $(LLVMGCC)
+  LLVMCXX := $(LLVMGXX)
+else
+  ifneq ($(findstring clang,$(LLVMCC_OPTION)),)
+    ifneq ($(CLANGPATH),)
+      LLVMCC := $(CLANGPATH)
+      LLVMCXX := $(CLANGXXPATH)
+    else
+      ifeq ($(ENABLE_BUILT_CLANG),1)
+        LLVMCC := $(LLVMToolDir)/clang
+        LLVMCXX := $(LLVMToolDir)/clang++
+      endif
+    endif
+  endif
+endif
+
+#--------------------------------------------------------------------
+# Full Paths To Compiled Tools and Utilities
+#--------------------------------------------------------------------
+EchoCmd  = $(ECHO) llvm[$(MAKELEVEL)]:
+Echo     = @$(EchoCmd)
+ifndef LLVMAS
+LLVMAS   := $(LLVMToolDir)/llvm-as$(EXEEXT)
+endif
+ifndef LLVM_TBLGEN
+  ifeq ($(LLVM_CROSS_COMPILING),1)
+    LLVM_TBLGEN   := $(BuildLLVMToolDir)/llvm-tblgen$(BUILD_EXEEXT)
+  else
+    LLVM_TBLGEN   := $(LLVMToolDir)/llvm-tblgen$(EXEEXT)
+  endif
+endif
+LLVM_CONFIG := $(LLVMToolDir)/llvm-config
+ifndef LLVMLD
+LLVMLD    := $(LLVMToolDir)/llvm-ld$(EXEEXT)
+endif
+ifndef LLVMDIS
+LLVMDIS  := $(LLVMToolDir)/llvm-dis$(EXEEXT)
+endif
+ifndef LLI
+LLI      := $(LLVMToolDir)/lli$(EXEEXT)
+endif
+ifndef LLC
+LLC      := $(LLVMToolDir)/llc$(EXEEXT)
+endif
+ifndef LOPT
+LOPT     := $(LLVMToolDir)/opt$(EXEEXT)
+endif
+ifndef LBUGPOINT
+LBUGPOINT := $(LLVMToolDir)/bugpoint$(EXEEXT)
+endif
+
+#--------------------------------------------------------------------
+# Adjust to user's request
+#--------------------------------------------------------------------
+
+ifeq ($(HOST_OS),Darwin)
+  DARWIN_VERSION := `sw_vers -productVersion`
+  # Strip a number like 10.4.7 to 10.4
+  DARWIN_VERSION := $(shell echo $(DARWIN_VERSION)| sed -E 's/(10.[0-9]).*/\1/')
+  # Get "4" out of 10.4 for later pieces in the makefile.
+  DARWIN_MAJVERS := $(shell echo $(DARWIN_VERSION)| sed -E 's/10.([0-9]).*/\1/')
+
+  LoadableModuleOptions := -Wl,-flat_namespace -Wl,-undefined,suppress
+  SharedLinkOptions := -dynamiclib
+  ifneq ($(ARCH),ARM)
+    SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION)
+  endif
+else
+  SharedLinkOptions=-shared
+endif
+
+ifeq ($(TARGET_OS),Darwin)
+  ifneq ($(ARCH),ARM)
+    TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
+  endif
+endif
+
+ifdef SHARED_LIBRARY
+ifneq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ifneq ($(HOST_OS),Darwin)
+  LD.Flags += $(RPATH) -Wl,'$$ORIGIN'
+endif
+endif
+endif
+
+ifdef TOOL_VERBOSE
+  C.Flags += -v
+  CXX.Flags += -v
+  LD.Flags += -v
+  VERBOSE := 1
+endif
+
+# Adjust settings for verbose mode
+ifndef VERBOSE
+  Verb := @
+  AR.Flags += >/dev/null 2>/dev/null
+  ConfigureScriptFLAGS += >$(PROJ_OBJ_DIR)/configure.out 2>&1
+else
+  ConfigureScriptFLAGS :=
+endif
+
+# By default, strip symbol information from executable
+ifndef KEEP_SYMBOLS
+  Strip := $(PLATFORMSTRIPOPTS)
+  StripWarnMsg := "(without symbols)"
+  Install.StripFlag += -s
+endif
+
+ifdef TOOL_NO_EXPORTS
+  DynamicFlags :=
+else
+  DynamicFlag := $(RDYNAMIC)
+endif
+
+# Adjust linker flags for building an executable
+ifneq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ifneq ($(HOST_OS), Darwin)
+ifdef TOOLNAME
+  LD.Flags += $(RPATH) -Wl,'$$ORIGIN/../lib'
+  ifdef EXAMPLE_TOOL
+    LD.Flags += $(RPATH) -Wl,$(ExmplDir) $(DynamicFlag)
+  else
+    LD.Flags += $(RPATH) -Wl,$(ToolDir) $(DynamicFlag)
+  endif
+endif
+else
+ifneq ($(DARWIN_MAJVERS),4)
+  LD.Flags += $(RPATH) -Wl,@executable_path/../lib
+endif
+endif
+endif
+
+
+#----------------------------------------------------------
+# Options To Invoke Tools
+#----------------------------------------------------------
+
+ifdef EXTRA_LD_OPTIONS
+LD.Flags += $(EXTRA_LD_OPTIONS)
+endif
+
+ifndef NO_PEDANTIC
+CompileCommonOpts += -pedantic -Wno-long-long
+endif
+CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \
+                     $(EXTRA_OPTIONS) $(COVERED_SWITCH_DEFAULT)
+# Enable cast-qual for C++; the workaround is to use const_cast.
+CXX.Flags += -Wcast-qual
+
+ifeq ($(HOST_OS),HP-UX)
+  CompileCommonOpts := -D_REENTRANT -D_HPUX_SOURCE
+endif
+
+# If we are building a universal binary on Mac OS/X, pass extra options.  This
+# is useful to people that want to link the LLVM libraries into their universal
+# apps.
+#
+# The following can be optionally specified:
+#   UNIVERSAL_SDK_PATH variable can be specified as a path to the SDK to use.
+#      For Mac OS/X 10.4 Intel machines, the traditional one is:
+#      UNIVERSAL_SDK_PATH=/Developer/SDKs/MacOSX10.4u.sdk/
+#   UNIVERSAL_ARCH can be optionally specified to be a list of architectures
+#      to build for, e.g. UNIVERSAL_ARCH="i386 ppc ppc64".  This defaults to
+#      i386/ppc only.
+ifdef UNIVERSAL
+  ifndef UNIVERSAL_ARCH
+    UNIVERSAL_ARCH := i386 ppc
+  endif
+  UNIVERSAL_ARCH_OPTIONS := $(UNIVERSAL_ARCH:%=-arch %)
+  CompileCommonOpts += $(UNIVERSAL_ARCH_OPTIONS)
+  ifdef UNIVERSAL_SDK_PATH
+    CompileCommonOpts += -isysroot $(UNIVERSAL_SDK_PATH)
+  endif
+
+  # Building universal cannot compute dependencies automatically.
+  DISABLE_AUTO_DEPENDENCIES=1
+else
+  ifeq ($(TARGET_OS),Darwin)
+    ifeq ($(ARCH),x86_64)
+      TargetCommonOpts = -m64
+    else
+      ifeq ($(ARCH),x86)
+        TargetCommonOpts = -m32
+      endif
+    endif
+  endif
+endif
+
+ifeq ($(HOST_OS),SunOS)
+CPP.BaseFlags += -include llvm/Support/Solaris.h
+endif
+
+ifeq ($(HOST_OS),AuroraUX)
+CPP.BaseFlags += -include llvm/Support/Solaris.h
+endif # !HOST_OS - AuroraUX.
+
+LD.Flags      += -L$(LibDir) -L$(LLVMLibDir)
+CPP.BaseFlags += -D_GNU_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS
+# All -I flags should go here, so that they don't confuse llvm-config.
+CPP.Flags     += $(sort -I$(PROJ_OBJ_DIR) -I$(PROJ_SRC_DIR) \
+	         $(patsubst %,-I%/include,\
+	         $(PROJ_OBJ_ROOT) $(PROJ_SRC_ROOT) \
+	         $(LLVM_OBJ_ROOT) $(LLVM_SRC_ROOT))) \
+	         $(CPP.BaseFlags)
+
+# SHOW_DIAGNOSTICS support.
+ifeq ($(SHOW_DIAGNOSTICS),1)
+  Compile.Wrapper := env CC_LOG_DIAGNOSTICS=1 \
+	                  CC_LOG_DIAGNOSTICS_FILE="$(LLVM_OBJ_ROOT)/$(BuildMode)/diags"
+else
+  Compile.Wrapper :=
+endif
+
+ifeq ($(BUILD_COMPONENT), 1)
+  Compile.C     = $(Compile.Wrapper) \
+	          $(BUILD_CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) -c
+  Compile.CXX   = $(Compile.Wrapper) \
+	          $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
+		  $(CPPFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) -c
+  Preprocess.CXX= $(Compile.Wrapper) \
+	          $(BUILD_CXX) $(CPP.Flags) $(CPPFLAGS) $(TargetCommonOpts) \
+                  $(CompileCommonOpts) $(CXX.Flags) -E
+  Link          = $(Compile.Wrapper) \
+	          $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
+		  $(LD.Flags) $(LDFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) $(Strip)
+else
+  Compile.C     = $(Compile.Wrapper) \
+	          $(CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) -c
+  Compile.CXX   = $(Compile.Wrapper) \
+	          $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) -c
+  Preprocess.CXX= $(Compile.Wrapper) \
+	          $(CXX) $(CPP.Flags) $(TargetCommonOpts) $(CPPFLAGS) \
+                  $(CompileCommonOpts) $(CXX.Flags) -E
+  Link          = $(Compile.Wrapper) \
+	          $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(LD.Flags) \
+                  $(LDFLAGS) $(TargetCommonOpts)  $(CompileCommonOpts) $(Strip)
+endif
+
+BCCompile.C   = $(LLVMCC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
+                $(TargetCommonOpts) $(CompileCommonOpts)
+Preprocess.C  = $(CC) $(CPP.Flags) $(C.Flags) $(CPPFLAGS) \
+                $(TargetCommonOpts) $(CompileCommonOpts) -E
+
+BCCompile.CXX = $(LLVMCXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \
+                $(TargetCommonOpts) $(CompileCommonOpts)
+
+ProgInstall   = $(INSTALL) $(Install.StripFlag) -m 0755
+ScriptInstall = $(INSTALL) -m 0755
+DataInstall   = $(INSTALL) -m 0644
+
+# When compiling under Mingw/Cygwin, the tblgen tool expects Windows
+# paths. In this case, the SYSPATH function (defined in
+# Makefile.config) transforms Unix paths into Windows paths.
+TableGen.Flags= -I $(call SYSPATH, $(PROJ_SRC_DIR)) \
+                -I $(call SYSPATH, $(LLVM_SRC_ROOT)/include) \
+                -I $(call SYSPATH, $(PROJ_SRC_ROOT)/include) \
+                -I $(call SYSPATH, $(PROJ_SRC_ROOT)/lib/Target)
+LLVMTableGen  = $(LLVM_TBLGEN) $(TableGen.Flags)
+
+Archive       = $(AR) $(AR.Flags)
+LArchive      = $(LLVMToolDir)/llvm-ar rcsf
+ifdef RANLIB
+Ranlib        = $(RANLIB)
+else
+Ranlib        = ranlib
+endif
+
+AliasTool     = ln -s
+
+#----------------------------------------------------------
+# Get the list of source files and compute object file
+# names from them.
+#----------------------------------------------------------
+
+ifndef SOURCES
+  Sources := $(notdir $(wildcard $(PROJ_SRC_DIR)/*.cpp \
+             $(PROJ_SRC_DIR)/*.cc $(PROJ_SRC_DIR)/*.c))
+else
+  Sources := $(SOURCES)
+endif
+
+ifdef BUILT_SOURCES
+Sources += $(filter %.cpp %.c %.cc,$(BUILT_SOURCES))
+endif
+
+BaseNameSources := $(sort $(basename $(Sources)))
+
+ObjectsO  := $(BaseNameSources:%=$(ObjDir)/%.o)
+ObjectsBC := $(BaseNameSources:%=$(ObjDir)/%.bc)
+
+#----------------------------------------------------------
+# For Mingw MSYS bash and Python/w32:
+#
+# $(ECHOPATH) prints DOSish pathstring.
+#   ex) $(ECHOPATH) /include/sys/types.h
+#   --> C:/mingw/include/sys/types.h
+# built-in "echo" does not transform path to DOSish path.
+#
+# FIXME: It would not be needed when MSYS's python
+# were provided.
+#----------------------------------------------------------
+
+ifeq (-mingw32,$(findstring -mingw32,$(BUILD_TRIPLE)))
+  ECHOPATH := $(Verb)python -u -c "import sys;print ' '.join(sys.argv[1:])"
+else
+  ECHOPATH := $(Verb)$(ECHO)
+endif
+
+###############################################################################
+# DIRECTORIES: Handle recursive descent of directory structure
+###############################################################################
+
+#---------------------------------------------------------
+# Provide rules to make install dirs. This must be early
+# in the file so they get built before dependencies
+#---------------------------------------------------------
+
+$(DESTDIR)$(PROJ_bindir) $(DESTDIR)$(PROJ_libdir) $(DESTDIR)$(PROJ_includedir) $(DESTDIR)$(PROJ_etcdir)::
+	$(Verb) $(MKDIR) $@
+
+# To create other directories, as needed, and timestamp their creation
+%/.dir:
+	$(Verb) $(MKDIR) $* > /dev/null
+	$(Verb) $(DOTDIR_TIMESTAMP_COMMAND) > $@
+
+.PRECIOUS: $(ObjDir)/.dir $(LibDir)/.dir $(ToolDir)/.dir $(ExmplDir)/.dir
+.PRECIOUS: $(LLVMLibDir)/.dir $(LLVMToolDir)/.dir $(LLVMExmplDir)/.dir
+
+#---------------------------------------------------------
+# Handle the DIRS options for sequential construction
+#---------------------------------------------------------
+
+SubDirs :=
+ifdef DIRS
+SubDirs += $(DIRS)
+
+ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT))
+$(RecursiveTargets)::
+	$(Verb) for dir in $(DIRS); do \
+	  if ([ ! -f $$dir/Makefile ] || \
+	      command test $$dir/Makefile -ot $(PROJ_SRC_DIR)/$$dir/Makefile ); then \
+	    $(MKDIR) $$dir; \
+	    $(CP) $(PROJ_SRC_DIR)/$$dir/Makefile $$dir/Makefile; \
+	  fi; \
+	  ($(MAKE) -C $$dir $@ ) || exit 1; \
+	done
+else
+$(RecursiveTargets)::
+	$(Verb) for dir in $(DIRS); do \
+	  ($(MAKE) -C $$dir $@ ) || exit 1; \
+	done
+endif
+
+endif
+
+#---------------------------------------------------------
+# Handle the EXPERIMENTAL_DIRS options ensuring success
+# after each directory is built.
+#---------------------------------------------------------
+ifdef EXPERIMENTAL_DIRS
+$(RecursiveTargets)::
+	$(Verb) for dir in $(EXPERIMENTAL_DIRS); do \
+	  if ([ ! -f $$dir/Makefile ] || \
+	      command test $$dir/Makefile -ot $(PROJ_SRC_DIR)/$$dir/Makefile ); then \
+	    $(MKDIR) $$dir; \
+	    $(CP) $(PROJ_SRC_DIR)/$$dir/Makefile $$dir/Makefile; \
+	  fi; \
+	  ($(MAKE) -C $$dir $@ ) || exit 0; \
+	done
+endif
+
+#-----------------------------------------------------------
+# Handle the OPTIONAL_PARALLEL_DIRS options for optional parallel construction
+#-----------------------------------------------------------
+ifdef OPTIONAL_PARALLEL_DIRS
+  PARALLEL_DIRS += $(foreach T,$(OPTIONAL_PARALLEL_DIRS),$(shell test -d $(PROJ_SRC_DIR)/$(T) && echo "$(T)"))
+endif
+
+#-----------------------------------------------------------
+# Handle the PARALLEL_DIRS options for parallel construction
+#-----------------------------------------------------------
+ifdef PARALLEL_DIRS
+
+SubDirs += $(PARALLEL_DIRS)
+
+# Unfortunately, this list must be maintained if new recursive targets are added
+all      :: $(addsuffix /.makeall      ,$(PARALLEL_DIRS))
+clean    :: $(addsuffix /.makeclean    ,$(PARALLEL_DIRS))
+clean-all:: $(addsuffix /.makeclean-all,$(PARALLEL_DIRS))
+install  :: $(addsuffix /.makeinstall  ,$(PARALLEL_DIRS))
+uninstall:: $(addsuffix /.makeuninstall,$(PARALLEL_DIRS))
+install-bytecode  :: $(addsuffix /.makeinstall-bytecode,$(PARALLEL_DIRS))
+unitcheck:: $(addsuffix /.makeunitcheck,$(PARALLEL_DIRS))
+
+ParallelTargets := $(foreach T,$(RecursiveTargets),%/.make$(T))
+
+$(ParallelTargets) :
+	$(Verb) if ([ ! -f $(@D)/Makefile ] || \
+	            command test $(@D)/Makefile -ot \
+                      $(PROJ_SRC_DIR)/$(@D)/Makefile ); then \
+	  $(MKDIR) $(@D); \
+	  $(CP) $(PROJ_SRC_DIR)/$(@D)/Makefile $(@D)/Makefile; \
+	fi; \
+	$(MAKE) -C $(@D) $(subst $(@D)/.make,,$@)
+endif
+
+#---------------------------------------------------------
+# Handle the OPTIONAL_DIRS options for directores that may
+# or may not exist.
+#---------------------------------------------------------
+ifdef OPTIONAL_DIRS
+
+SubDirs += $(OPTIONAL_DIRS)
+
+ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT))
+$(RecursiveTargets)::
+	$(Verb) for dir in $(OPTIONAL_DIRS); do \
+	  if [ -d $(PROJ_SRC_DIR)/$$dir ]; then\
+	    if ([ ! -f $$dir/Makefile ] || \
+	        command test $$dir/Makefile -ot $(PROJ_SRC_DIR)/$$dir/Makefile ); then \
+	      $(MKDIR) $$dir; \
+	      $(CP) $(PROJ_SRC_DIR)/$$dir/Makefile $$dir/Makefile; \
+	    fi; \
+	    ($(MAKE) -C$$dir $@ ) || exit 1; \
+	  fi \
+	done
+else
+$(RecursiveTargets)::
+	$(Verb) for dir in $(OPTIONAL_DIRS); do \
+	  if [ -d $(PROJ_SRC_DIR)/$$dir ]; then\
+	    ($(MAKE) -C$$dir $@ ) || exit 1; \
+	  fi \
+	done
+endif
+endif
+
+#---------------------------------------------------------
+# Handle the CONFIG_FILES options
+#---------------------------------------------------------
+ifdef CONFIG_FILES
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) UnInstall circumvented with NO_INSTALL
+else
+install-local:: $(DESTDIR)$(PROJ_etcdir) $(CONFIG_FILES)
+	$(Echo) Installing Configuration Files To $(DESTDIR)$(PROJ_etcdir)
+	$(Verb)for file in $(CONFIG_FILES); do \
+          if test -f $(PROJ_OBJ_DIR)/$${file} ; then \
+            $(DataInstall) $(PROJ_OBJ_DIR)/$${file} $(DESTDIR)$(PROJ_etcdir) ; \
+          elif test -f $(PROJ_SRC_DIR)/$${file} ; then \
+            $(DataInstall) $(PROJ_SRC_DIR)/$${file} $(DESTDIR)$(PROJ_etcdir) ; \
+          else \
+            $(ECHO) Error: cannot find config file $${file}. ; \
+          fi \
+	done
+
+uninstall-local::
+	$(Echo) Uninstalling Configuration Files From $(DESTDIR)$(PROJ_etcdir)
+	$(Verb)for file in $(CONFIG_FILES); do \
+	  $(RM) -f $(DESTDIR)$(PROJ_etcdir)/$${file} ; \
+	done
+endif
+
+endif
+
+###############################################################################
+# Set up variables for building libraries
+###############################################################################
+
+#---------------------------------------------------------
+# Define various command line options pertaining to the
+# libraries needed when linking. There are "Proj" libs
+# (defined by the user's project) and "LLVM" libs (defined
+# by the LLVM project).
+#---------------------------------------------------------
+
+ifdef USEDLIBS
+ProjLibsOptions := $(patsubst %.a.o, -l%, $(addsuffix .o, $(USEDLIBS)))
+ProjLibsOptions := $(patsubst %.o, $(LibDir)/%.o,  $(ProjLibsOptions))
+ProjUsedLibs    := $(patsubst %.a.o, lib%.a, $(addsuffix .o, $(USEDLIBS)))
+ProjLibsPaths   := $(addprefix $(LibDir)/,$(ProjUsedLibs))
+endif
+
+ifdef LLVMLIBS
+LLVMLibsOptions := $(patsubst %.a.o, -l%, $(addsuffix .o, $(LLVMLIBS)))
+LLVMLibsOptions := $(patsubst %.o, $(LLVMLibDir)/%.o, $(LLVMLibsOptions))
+LLVMUsedLibs    := $(patsubst %.a.o, lib%.a, $(addsuffix .o, $(LLVMLIBS)))
+LLVMLibsPaths   := $(addprefix $(LLVMLibDir)/,$(LLVMUsedLibs))
+endif
+
+# Loadable module for Win32 requires all symbols resolved for linking.
+# Then all symbols in LLVM.dll will be available.
+ifeq ($(ENABLE_SHARED),1)
+  ifdef LOADABLE_MODULE
+    ifneq (,$(filter $(HOST_OS),Cygwin MingW))
+      LINK_COMPONENTS += all
+    endif
+  endif
+endif
+
+ifndef IS_CLEANING_TARGET
+ifdef LINK_COMPONENTS
+
+# If LLVM_CONFIG doesn't exist, build it.  This can happen if you do a make
+# clean in tools, then do a make in tools (instead of at the top level).
+$(LLVM_CONFIG):
+	@echo "*** llvm-config doesn't exist - rebuilding it."
+	@$(MAKE) -C $(PROJ_OBJ_ROOT)/tools/llvm-config
+
+$(ToolDir)/$(strip $(TOOLNAME))$(EXEEXT): $(LLVM_CONFIG)
+
+ifeq ($(ENABLE_SHARED), 1)
+# We can take the "auto-import" feature to get rid of using dllimport.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+LLVMLibsOptions += -Wl,--enable-auto-import,--enable-runtime-pseudo-reloc \
+                   -L $(SharedLibDir)
+endif
+LLVMLibsOptions += -lLLVM-$(LLVMVersion)
+LLVMLibsPaths += $(SharedLibDir)/$(SharedPrefix)LLVM-$(LLVMVersion)$(SHLIBEXT)
+else
+
+ifndef NO_LLVM_CONFIG
+LLVMConfigLibs := $(shell $(LLVM_CONFIG) --libs $(LINK_COMPONENTS) || echo Error)
+ifeq ($(LLVMConfigLibs),Error)
+$(error llvm-config --libs failed)
+endif
+LLVMLibsOptions += $(LLVMConfigLibs)
+LLVMConfigLibfiles := $(shell $(LLVM_CONFIG) --libfiles $(LINK_COMPONENTS) || echo Error)
+ifeq ($(LLVMConfigLibfiles),Error)
+$(error llvm-config --libfiles failed)
+endif
+LLVMLibsPaths += $(LLVM_CONFIG) $(LLVMConfigLibfiles)
+endif
+
+endif
+endif
+endif
+
+# Set up the library exports file.
+ifdef EXPORTED_SYMBOL_FILE
+
+# First, set up the native export file, which may differ from the source
+# export file.
+
+ifeq ($(HOST_OS),Darwin)
+# Darwin convention prefixes symbols with underscores.
+NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).sed
+$(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
+	$(Verb) sed -e 's/^/_/' < $< > $@
+clean-local::
+	-$(Verb) $(RM) -f $(NativeExportsFile)
+else
+ifeq ($(HAVE_LINK_VERSION_SCRIPT),1)
+# Gold and BFD ld require a version script rather than a plain list.
+NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).map
+$(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
+	$(Verb) echo "{" > $@
+	$(Verb) grep -q "\<" $< && echo "  global:" >> $@ || :
+	$(Verb) sed -e 's/$$/;/' -e 's/^/    /' < $< >> $@
+ifneq ($(HOST_OS),OpenBSD)
+	$(Verb) echo "  local: *;" >> $@
+endif
+	$(Verb) echo "};" >> $@
+clean-local::
+	-$(Verb) $(RM) -f $(NativeExportsFile)
+else
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+# GNU ld Win32 accepts .DEF files that contain "DATA" entries.
+NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE:.exports=.def))
+$(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
+	$(Echo) Generating $(notdir $@)
+	$(Verb) $(ECHO) "EXPORTS" > $@
+	$(Verb) $(CAT) $< >> $@
+clean-local::
+	-$(Verb) $(RM) -f $(NativeExportsFile)
+else
+# Default behavior: just use the exports file verbatim.
+NativeExportsFile := $(EXPORTED_SYMBOL_FILE)
+endif
+endif
+endif
+
+# Now add the linker command-line options to use the native export file.
+
+# Darwin
+ifeq ($(HOST_OS),Darwin)
+LLVMLibsOptions += -Wl,-exported_symbols_list,$(NativeExportsFile)
+endif
+
+# gold, bfd ld, etc.
+ifeq ($(HAVE_LINK_VERSION_SCRIPT),1)
+LLVMLibsOptions += -Wl,--version-script,$(NativeExportsFile)
+endif
+
+# Windows
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+# LLVMLibsOptions is invalidated at processing tools/llvm-shlib.
+SharedLinkOptions += $(NativeExportsFile)
+endif
+
+endif
+
+###############################################################################
+# Library Build Rules: Four ways to build a library
+###############################################################################
+
+#---------------------------------------------------------
+# Bytecode Module Targets:
+#   If the user set MODULE_NAME then they want to build a
+#   bytecode module from the sources. We compile all the
+#   sources and link it together into a single bytecode
+#   module.
+#---------------------------------------------------------
+
+ifdef MODULE_NAME
+ifeq ($(strip $(LLVMCC)),)
+$(warning Modules require LLVM capable compiler but none is available ****)
+else
+
+Module     := $(LibDir)/$(MODULE_NAME).bc
+LinkModule := $(LLVMLD) -r
+
+
+ifdef EXPORTED_SYMBOL_FILE
+LinkModule += -internalize-public-api-file=$(EXPORTED_SYMBOL_FILE)
+endif
+
+$(Module): $(BUILT_SOURCES) $(ObjectsBC) $(LibDir)/.dir $(LLVMLD)
+	$(Echo) Building $(BuildMode) Bytecode Module $(notdir $@)
+	$(Verb) $(LinkModule) -o $@ $(ObjectsBC)
+
+all-local:: $(Module)
+
+clean-local::
+ifneq ($(strip $(Module)),)
+	-$(Verb) $(RM) -f $(Module)
+endif
+
+ifdef BYTECODE_DESTINATION
+ModuleDestDir := $(BYTECODE_DESTINATION)
+else
+ModuleDestDir := $(DESTDIR)$(PROJ_libdir)
+endif
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+DestModule := $(ModuleDestDir)/$(MODULE_NAME).bc
+
+install-module:: $(DestModule)
+install-local:: $(DestModule)
+
+$(DestModule): $(ModuleDestDir) $(Module)
+	$(Echo) Installing $(BuildMode) Bytecode Module $(DestModule)
+	$(Verb) $(DataInstall) $(Module) $(DestModule)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) Bytecode Module $(DestModule)
+	-$(Verb) $(RM) -f $(DestModule)
+endif
+
+endif
+endif
+
+# if we're building a library ...
+ifdef LIBRARYNAME
+
+# Make sure there isn't any extraneous whitespace on the LIBRARYNAME option
+LIBRARYNAME := $(strip $(LIBRARYNAME))
+ifdef LOADABLE_MODULE
+BaseLibName.A  := $(LIBRARYNAME).a
+BaseLibName.SO := $(LIBRARYNAME)$(SHLIBEXT)
+else
+BaseLibName.A  := lib$(LIBRARYNAME).a
+BaseLibName.SO := $(SharedPrefix)$(LIBRARYNAME)$(SHLIBEXT)
+endif
+LibName.A  := $(LibDir)/$(BaseLibName.A)
+LibName.SO := $(SharedLibDir)/$(BaseLibName.SO)
+LibName.O  := $(LibDir)/$(LIBRARYNAME).o
+LibName.BCA:= $(LibDir)/lib$(LIBRARYNAME).bca
+
+#---------------------------------------------------------
+# Shared Library Targets:
+#   If the user asked for a shared library to be built
+#   with the SHARED_LIBRARY variable, then we provide
+#   targets for building them.
+#---------------------------------------------------------
+ifdef SHARED_LIBRARY
+
+all-local:: $(LibName.SO)
+
+ifdef EXPORTED_SYMBOL_FILE
+$(LibName.SO): $(NativeExportsFile)
+endif
+
+ifdef LINK_LIBS_IN_SHARED
+ifdef LOADABLE_MODULE
+SharedLibKindMessage := "Loadable Module"
+SharedLinkOptions := $(LoadableModuleOptions) $(SharedLinkOptions)
+else
+SharedLibKindMessage := "Shared Library"
+endif
+$(LibName.SO): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths) $(SharedLibDir)/.dir
+	$(Echo) Linking $(BuildMode) $(SharedLibKindMessage) \
+	  $(notdir $@)
+	$(Verb) $(Link) $(SharedLinkOptions) -o $@ $(ObjectsO) \
+	  $(ProjLibsOptions) $(LLVMLibsOptions) $(LIBS)
+else
+$(LibName.SO): $(ObjectsO) $(SharedLibDir)/.dir
+	$(Echo) Linking $(BuildMode) Shared Library $(notdir $@)
+	$(Verb) $(Link) $(SharedLinkOptions) -o $@ $(ObjectsO)
+endif
+
+clean-local::
+ifneq ($(strip $(LibName.SO)),)
+	-$(Verb) $(RM) -f $(LibName.SO)
+endif
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+
+# Win32.DLL prefers to be located on the "PATH" of binaries.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+DestSharedLibDir := $(DESTDIR)$(PROJ_bindir)
+else
+DestSharedLibDir := $(DESTDIR)$(PROJ_libdir)
+endif
+DestSharedLib := $(DestSharedLibDir)/$(BaseLibName.SO)
+
+install-local:: $(DestSharedLib)
+
+$(DestSharedLib): $(LibName.SO) $(DestSharedLibDir)
+	$(Echo) Installing $(BuildMode) Shared Library $(DestSharedLib)
+	$(Verb) $(INSTALL) $(LibName.SO) $(DestSharedLib)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) Shared Library $(DestSharedLib)
+	-$(Verb) $(RM) -f $(DestSharedLibDir)/$(SharedPrefix)$(LIBRARYNAME).*
+endif
+endif
+
+#---------------------------------------------------------
+# Bytecode Library Targets:
+#   If the user asked for a bytecode library to be built
+#   with the BYTECODE_LIBRARY variable, then we provide
+#   targets for building them.
+#---------------------------------------------------------
+ifdef BYTECODE_LIBRARY
+ifeq ($(strip $(LLVMCC)),)
+$(warning Bytecode libraries require LLVM capable compiler but none is available ****)
+else
+
+all-local:: $(LibName.BCA)
+
+ifdef EXPORTED_SYMBOL_FILE
+BCLinkLib = $(LLVMLD) -internalize-public-api-file=$(EXPORTED_SYMBOL_FILE)
+
+$(LibName.BCA): $(ObjectsBC) $(LibDir)/.dir $(LLVMLD) \
+                $(LLVMToolDir)/llvm-ar
+	$(Echo) Building $(BuildMode) Bytecode Archive $(notdir $@) \
+	  "(internalize)"
+	$(Verb) $(BCLinkLib) -o $(ObjDir)/$(LIBRARYNAME).internalize $(ObjectsBC)
+	$(Verb) $(RM) -f $@
+	$(Verb) $(LArchive) $@ $(ObjDir)/$(LIBRARYNAME).internalize.bc
+else
+$(LibName.BCA): $(ObjectsBC) $(LibDir)/.dir \
+                $(LLVMToolDir)/llvm-ar
+	$(Echo) Building $(BuildMode) Bytecode Archive $(notdir $@)
+	$(Verb) $(RM) -f $@
+	$(Verb) $(LArchive) $@ $(ObjectsBC)
+
+endif
+
+clean-local::
+ifneq ($(strip $(LibName.BCA)),)
+	-$(Verb) $(RM) -f $(LibName.BCA)
+endif
+
+ifdef BYTECODE_DESTINATION
+BytecodeDestDir := $(BYTECODE_DESTINATION)
+else
+BytecodeDestDir := $(DESTDIR)$(PROJ_libdir)
+endif
+
+DestBytecodeLib = $(BytecodeDestDir)/lib$(LIBRARYNAME).bca
+
+install-bytecode-local:: $(DestBytecodeLib)
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+install-local:: $(DestBytecodeLib)
+
+$(DestBytecodeLib): $(LibName.BCA) $(BytecodeDestDir)
+	$(Echo) Installing $(BuildMode) Bytecode Archive $(DestBytecodeLib)
+	$(Verb) $(DataInstall) $(LibName.BCA) $(DestBytecodeLib)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) Bytecode Archive $(DestBytecodeLib)
+	-$(Verb) $(RM) -f $(DestBytecodeLib)
+endif
+endif
+endif
+
+#---------------------------------------------------------
+# Library Targets:
+#   If neither BUILD_ARCHIVE or LOADABLE_MODULE are specified, default to
+#   building an archive.
+#---------------------------------------------------------
+ifndef NO_BUILD_ARCHIVE
+ifndef BUILD_ARCHIVE
+ifndef LOADABLE_MODULE
+BUILD_ARCHIVE = 1
+endif
+endif
+endif
+
+#---------------------------------------------------------
+# Archive Library Targets:
+#   If the user wanted a regular archive library built,
+#   then we provide targets for building them.
+#---------------------------------------------------------
+ifdef BUILD_ARCHIVE
+
+all-local:: $(LibName.A)
+
+$(LibName.A): $(ObjectsO) $(LibDir)/.dir
+	$(Echo) Building $(BuildMode) Archive Library $(notdir $@)
+	-$(Verb) $(RM) -f $@
+	$(Verb) $(Archive) $@ $(ObjectsO)
+	$(Verb) $(Ranlib) $@
+
+clean-local::
+ifneq ($(strip $(LibName.A)),)
+	-$(Verb) $(RM) -f $(LibName.A)
+endif
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+ifdef NO_INSTALL_ARCHIVES
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+DestArchiveLib := $(DESTDIR)$(PROJ_libdir)/lib$(LIBRARYNAME).a
+
+install-local:: $(DestArchiveLib)
+
+$(DestArchiveLib): $(LibName.A) $(DESTDIR)$(PROJ_libdir)
+	$(Echo) Installing $(BuildMode) Archive Library $(DestArchiveLib)
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_libdir)
+	$(Verb) $(INSTALL) $(LibName.A) $(DestArchiveLib)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) Archive Library $(DestArchiveLib)
+	-$(Verb) $(RM) -f $(DestArchiveLib)
+endif
+endif
+endif
+
+# endif LIBRARYNAME
+endif
+
+###############################################################################
+# Tool Build Rules: Build executable tool based on TOOLNAME option
+###############################################################################
+
+ifdef TOOLNAME
+
+#---------------------------------------------------------
+# Set up variables for building a tool.
+#---------------------------------------------------------
+TOOLEXENAME := $(strip $(TOOLNAME))$(EXEEXT)
+ifdef EXAMPLE_TOOL
+ToolBuildPath   := $(ExmplDir)/$(TOOLEXENAME)
+else
+ToolBuildPath   := $(ToolDir)/$(TOOLEXENAME)
+endif
+
+# TOOLALIAS is a name to symlink (or copy) the tool to.
+ifdef TOOLALIAS
+ifdef EXAMPLE_TOOL
+ToolAliasBuildPath   := $(ExmplDir)/$(strip $(TOOLALIAS))$(EXEEXT)
+else
+ToolAliasBuildPath   := $(ToolDir)/$(strip $(TOOLALIAS))$(EXEEXT)
+endif
+endif
+
+#---------------------------------------------------------
+# Prune Exports
+#---------------------------------------------------------
+
+# If the tool opts in with TOOL_NO_EXPORTS, optimize startup time of the app by
+# not exporting all of the weak symbols from the binary.  This reduces dyld
+# startup time by 4x on darwin in some cases.
+ifdef TOOL_NO_EXPORTS
+ifeq ($(HOST_OS),Darwin)
+
+# Tiger tools don't support this.
+ifneq ($(DARWIN_MAJVERS),4)
+LD.Flags += -Wl,-exported_symbol,_main
+endif
+endif
+
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux NetBSD FreeBSD))
+ifneq ($(ARCH), Mips)
+  LD.Flags += -Wl,--version-script=$(PROJ_SRC_ROOT)/autoconf/ExportMap.map
+endif
+endif
+endif
+
+#---------------------------------------------------------
+# Tool Order File Support
+#---------------------------------------------------------
+
+ifeq ($(HOST_OS),Darwin)
+ifdef TOOL_ORDER_FILE
+
+LD.Flags += -Wl,-order_file,$(TOOL_ORDER_FILE)
+
+endif
+endif
+
+#---------------------------------------------------------
+# Tool Version Info Support
+#---------------------------------------------------------
+
+ifeq ($(HOST_OS),Darwin)
+ifdef TOOL_INFO_PLIST
+
+LD.Flags += -Wl,-sectcreate,__TEXT,__info_plist,$(ObjDir)/$(TOOL_INFO_PLIST)
+
+$(ToolBuildPath): $(ObjDir)/$(TOOL_INFO_PLIST)
+
+$(ObjDir)/$(TOOL_INFO_PLIST): $(PROJ_SRC_DIR)/$(TOOL_INFO_PLIST).in $(ObjDir)/.dir
+	$(Echo) "Creating $(TOOLNAME) '$(TOOL_INFO_PLIST)' file..."
+	$(Verb)sed -e "s#@TOOL_INFO_UTI@#$(TOOL_INFO_UTI)#g" \
+	           -e "s#@TOOL_INFO_NAME@#$(TOOL_INFO_NAME)#g" \
+	           -e "s#@TOOL_INFO_VERSION@#$(TOOL_INFO_VERSION)#g" \
+	         -e "s#@TOOL_INFO_BUILD_VERSION@#$(TOOL_INFO_BUILD_VERSION)#g" \
+	           $< > $@
+
+endif
+endif
+
+#---------------------------------------------------------
+# Provide targets for building the tools
+#---------------------------------------------------------
+all-local:: $(ToolBuildPath) $(ToolAliasBuildPath)
+
+clean-local::
+ifneq ($(strip $(ToolBuildPath)),)
+	-$(Verb) $(RM) -f $(ToolBuildPath)
+endif
+ifneq ($(strip $(ToolAliasBuildPath)),)
+	-$(Verb) $(RM) -f $(ToolAliasBuildPath)
+endif
+
+ifdef EXAMPLE_TOOL
+$(ToolBuildPath): $(ExmplDir)/.dir
+else
+$(ToolBuildPath): $(ToolDir)/.dir
+endif
+
+$(ToolBuildPath): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
+	$(Echo) Linking $(BuildMode) executable $(TOOLNAME) $(StripWarnMsg)
+	$(Verb) $(Link) -o $@ $(TOOLLINKOPTS) $(ObjectsO) $(ProjLibsOptions) \
+	$(LLVMLibsOptions) $(ExtraLibs) $(TOOLLINKOPTSB) $(LIBS)
+	$(Echo) ======= Finished Linking $(BuildMode) Executable $(TOOLNAME) \
+          $(StripWarnMsg)
+
+ifneq ($(strip $(ToolAliasBuildPath)),)
+$(ToolAliasBuildPath): $(ToolBuildPath)
+	$(Echo) Creating $(BuildMode) Alias $(TOOLALIAS) $(StripWarnMsg)
+	$(Verb) $(RM) -f $(ToolAliasBuildPath)
+	$(Verb) $(AliasTool) $(TOOLEXENAME) $(ToolAliasBuildPath)
+	$(Echo) ======= Finished Creating $(BuildMode) Alias $(TOOLALIAS) \
+          $(StripWarnMsg)
+endif
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+DestTool = $(DESTDIR)$(PROJ_bindir)/$(TOOLEXENAME)
+
+install-local:: $(DestTool)
+
+$(DestTool): $(ToolBuildPath) $(DESTDIR)$(PROJ_bindir)
+	$(Echo) Installing $(BuildMode) $(DestTool)
+	$(Verb) $(ProgInstall) $(ToolBuildPath) $(DestTool)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) $(DestTool)
+	-$(Verb) $(RM) -f $(DestTool)
+
+# TOOLALIAS install.
+ifdef TOOLALIAS
+DestToolAlias = $(DESTDIR)$(PROJ_bindir)/$(TOOLALIAS)$(EXEEXT)
+
+install-local:: $(DestToolAlias)
+
+$(DestToolAlias): $(DestTool)
+	$(Echo) Installing $(BuildMode) $(DestToolAlias)
+	$(Verb) $(RM) -f $(DestToolAlias)
+	$(Verb) $(AliasTool) $(TOOLEXENAME) $(DestToolAlias)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) $(DestToolAlias)
+	-$(Verb) $(RM) -f $(DestToolAlias)
+endif
+
+endif
+endif
+
+###############################################################################
+# Object Build Rules: Build object files based on sources
+###############################################################################
+
+# FIXME: This should be checking for "if not GCC or ICC", not for "if HP-UX"
+ifeq ($(HOST_OS),HP-UX)
+  DISABLE_AUTO_DEPENDENCIES=1
+endif
+
+# Provide rule sets for when dependency generation is enabled
+ifndef DISABLE_AUTO_DEPENDENCIES
+
+#---------------------------------------------------------
+# Create .o files in the ObjDir directory from the .cpp and .c files...
+#---------------------------------------------------------
+
+DEPEND_OPTIONS = -MMD -MP -MF "$(ObjDir)/$*.d.tmp" \
+         -MT "$(ObjDir)/$*.o" -MT "$(ObjDir)/$*.d"
+
+# If the build succeeded, move the dependency file over, otherwise
+# remove it.
+DEPEND_MOVEFILE = then $(MV) -f "$(ObjDir)/$*.d.tmp" "$(ObjDir)/$*.d"; \
+                  else $(RM) "$(ObjDir)/$*.d.tmp"; exit 1; fi
+
+$(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
+$(ObjDir)/%.o: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
+$(ObjDir)/%.o: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
+	$(Echo) "Compiling $*.cc for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
+$(ObjDir)/%.o: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
+	$(Echo) "Compiling $*.c for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.C) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
+$(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
+	$(Echo) "Compiling $*.m for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.C) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
+#---------------------------------------------------------
+# Create .bc files in the ObjDir directory from .cpp .cc and .c files...
+#---------------------------------------------------------
+
+BC_DEPEND_OPTIONS = -MMD -MP -MF "$(ObjDir)/$*.bc.d.tmp" \
+	-MT "$(ObjDir)/$*.ll" -MT "$(ObjDir)/$*.bc.d"
+
+# If the build succeeded, move the dependency file over, otherwise
+# remove it.
+BC_DEPEND_MOVEFILE = then $(MV) -f "$(ObjDir)/$*.bc.d.tmp" "$(ObjDir)/$*.bc.d"; \
+                     else $(RM) "$(ObjDir)/$*.bc.d.tmp"; exit 1; fi
+
+$(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
+			$< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \
+	        $(BC_DEPEND_MOVEFILE)
+
+$(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
+			$< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \
+	        $(BC_DEPEND_MOVEFILE)
+
+$(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
+			$< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \
+	        $(BC_DEPEND_MOVEFILE)
+
+$(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \
+			$< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \
+	        $(BC_DEPEND_MOVEFILE)
+
+$(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \
+			$< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \
+	        $(BC_DEPEND_MOVEFILE)
+
+# Provide alternate rule sets if dependencies are disabled
+else
+
+$(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@
+
+$(ObjDir)/%.o: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@
+
+$(ObjDir)/%.o: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cc for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@
+
+$(ObjDir)/%.o: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.c for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@
+
+$(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@
+
+$(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)"
+	$(BCCompile.CXX) $< -o $@ -S $(LLVMCC_EMITIR_FLAG)
+
+$(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
+	$(BCCompile.CXX) $< -o $@ -S $(LLVMCC_EMITIR_FLAG)
+
+$(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
+	$(BCCompile.CXX) $< -o $@ -S $(LLVMCC_EMITIR_FLAG)
+
+$(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)"
+	$(BCCompile.C) $< -o $@ -S $(LLVMCC_EMITIR_FLAG)
+
+$(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
+	$(BCCompile.C) $< -o $@ -S $(LLVMCC_EMITIR_FLAG)
+
+endif
+
+
+## Rules for building preprocessed (.i/.ii) outputs.
+$(BuildMode)/%.ii: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build to .ii file"
+	$(Verb) $(Preprocess.CXX) $< -o $@
+
+$(BuildMode)/%.ii: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build to .ii file"
+	$(Verb) $(Preprocess.CXX) $< -o $@
+
+$(BuildMode)/%.ii: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cc for $(BuildMode) build to .ii file"
+	$(Verb) $(Preprocess.CXX) $< -o $@
+
+$(BuildMode)/%.i: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.c for $(BuildMode) build to .i file"
+	$(Verb) $(Preprocess.C) $< -o $@
+
+$(BuildMode)/%.i: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m for $(BuildMode) build to .i file"
+	$(Verb) $(Preprocess.C) $< -o $@
+
+
+$(ObjDir)/%.s: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cpp to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@ -S
+
+$(ObjDir)/%.s: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@ -S
+
+$(ObjDir)/%.s: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cc to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@ -S
+
+$(ObjDir)/%.s: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.c to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@ -S
+
+$(ObjDir)/%.s: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@ -S
+
+
+# make the C and C++ compilers strip debug info out of bytecode libraries.
+ifdef DEBUG_RUNTIME
+$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LOPT)
+	$(Echo) "Compiling $*.ll to $*.bc for $(BuildMode) build (bytecode)"
+	$(Verb) $(LOPT) $< -std-compile-opts -o $@
+else
+$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LOPT)
+	$(Echo) "Compiling $*.ll to $*.bc for $(BuildMode) build (bytecode)"
+	$(Verb) $(LOPT) $< -std-compile-opts -strip-debug -o $@
+endif
+
+
+#---------------------------------------------------------
+# Provide rule to build .bc files from .ll sources,
+# regardless of dependencies
+#---------------------------------------------------------
+$(ObjDir)/%.bc: %.ll $(ObjDir)/.dir $(LLVMAS)
+	$(Echo) "Compiling $*.ll for $(BuildMode) build"
+	$(Verb) $(LLVMAS) $< -f -o $@
+
+###############################################################################
+# TABLEGEN: Provide rules for running tblgen to produce *.inc files
+###############################################################################
+
+ifdef TARGET
+TABLEGEN_INC_FILES_COMMON = 1
+endif
+
+ifdef TABLEGEN_INC_FILES_COMMON
+
+INCFiles := $(filter %.inc,$(BUILT_SOURCES))
+INCTMPFiles := $(INCFiles:%=$(ObjDir)/%.tmp)
+.PRECIOUS: $(INCTMPFiles) $(INCFiles)
+
+# INCFiles rule: All of the tblgen generated files are emitted to
+# $(ObjDir)/%.inc.tmp, instead of emitting them directly to %.inc.  This allows
+# us to only "touch" the real file if the contents of it change.  IOW, if
+# tblgen is modified, all of the .inc.tmp files are regenerated, but no
+# dependencies of the .inc files are, unless the contents of the .inc file
+# changes.
+$(INCFiles) : %.inc : $(ObjDir)/%.inc.tmp
+	$(Verb) $(CMP) -s $@ $< || $(CP) $< $@
+
+endif # TABLEGEN_INC_FILES_COMMON
+
+ifdef TARGET
+
+TDFiles := $(strip $(wildcard $(PROJ_SRC_DIR)/*.td) \
+           $(LLVM_SRC_ROOT)/include/llvm/Target/Target.td \
+           $(LLVM_SRC_ROOT)/include/llvm/Target/TargetCallingConv.td \
+           $(LLVM_SRC_ROOT)/include/llvm/Target/TargetSchedule.td \
+           $(LLVM_SRC_ROOT)/include/llvm/Target/TargetSelectionDAG.td \
+           $(LLVM_SRC_ROOT)/include/llvm/CodeGen/ValueTypes.td) \
+           $(wildcard $(LLVM_SRC_ROOT)/include/llvm/Intrinsics*.td)
+
+# All .inc.tmp files depend on the .td files.
+$(INCTMPFiles) : $(TDFiles)
+
+$(TARGET:%=$(ObjDir)/%GenRegisterInfo.inc.tmp): \
+$(ObjDir)/%GenRegisterInfo.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) register info implementation with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-register-info -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenInstrInfo.inc.tmp): \
+$(ObjDir)/%GenInstrInfo.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) instruction information with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-instr-info -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenAsmWriter.inc.tmp): \
+$(ObjDir)/%GenAsmWriter.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) assembly writer with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-asm-writer -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenAsmWriter1.inc.tmp): \
+$(ObjDir)/%GenAsmWriter1.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) assembly writer #1 with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-asm-writer -asmwriternum=1 -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenAsmMatcher.inc.tmp): \
+$(ObjDir)/%GenAsmMatcher.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) assembly matcher with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-asm-matcher -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenMCCodeEmitter.inc.tmp): \
+$(ObjDir)/%GenMCCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) MC code emitter with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-emitter -mc-emitter -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenMCPseudoLowering.inc.tmp): \
+$(ObjDir)/%GenMCPseudoLowering.inc.tmp: %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) MC Pseudo instruction expander with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-pseudo-lowering -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenCodeEmitter.inc.tmp): \
+$(ObjDir)/%GenCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) code emitter with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-emitter -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenDAGISel.inc.tmp): \
+$(ObjDir)/%GenDAGISel.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) DAG instruction selector implementation with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-dag-isel -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenDisassemblerTables.inc.tmp): \
+$(ObjDir)/%GenDisassemblerTables.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) disassembly tables with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-disassembler -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenEDInfo.inc.tmp): \
+$(ObjDir)/%GenEDInfo.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) enhanced disassembly information with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-enhanced-disassembly-info -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenFastISel.inc.tmp): \
+$(ObjDir)/%GenFastISel.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) \"fast\" instruction selector implementation with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-fast-isel -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenSubtargetInfo.inc.tmp): \
+$(ObjDir)/%GenSubtargetInfo.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) subtarget information with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-subtarget -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenCallingConv.inc.tmp): \
+$(ObjDir)/%GenCallingConv.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) calling convention information with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-callingconv -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenIntrinsics.inc.tmp): \
+$(ObjDir)/%GenIntrinsics.inc.tmp : %.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) intrinsics information with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-tgt-intrinsic -o $(call SYSPATH, $@) $<
+
+$(ObjDir)/ARMGenDecoderTables.inc.tmp : ARM.td $(ObjDir)/.dir $(LLVM_TBLGEN)
+	$(Echo) "Building $(<F) decoder tables with tblgen"
+	$(Verb) $(LLVMTableGen) -gen-arm-decoder -o $(call SYSPATH, $@) $<
+
+
+clean-local::
+	-$(Verb) $(RM) -f $(INCFiles)
+
+endif # TARGET
+
+###############################################################################
+# OTHER RULES: Other rules needed
+###############################################################################
+
+# To create postscript files from dot files...
+ifneq ($(DOT),false)
+%.ps: %.dot
+	$(DOT) -Tps < $< > $@
+else
+%.ps: %.dot
+	$(Echo) "Cannot build $@: The program dot is not installed"
+endif
+
+# This rules ensures that header files that are removed still have a rule for
+# which they can be "generated."  This allows make to ignore them and
+# reproduce the dependency lists.
+%.h:: ;
+%.hpp:: ;
+
+# Define clean-local to clean the current directory. Note that this uses a
+# very conservative approach ensuring that empty variables do not cause
+# errors or disastrous removal.
+clean-local::
+ifneq ($(strip $(ObjRootDir)),)
+	-$(Verb) $(RM) -rf $(ObjRootDir)
+endif
+	-$(Verb) $(RM) -f core core.[0-9][0-9]* *.o *.d *~ *.flc
+ifneq ($(strip $(SHLIBEXT)),) # Extra paranoia - make real sure SHLIBEXT is set
+	-$(Verb) $(RM) -f *$(SHLIBEXT)
+endif
+
+clean-all-local::
+	-$(Verb) $(RM) -rf Debug Release Profile
+
+
+###############################################################################
+# DEPENDENCIES: Include the dependency files if we should
+###############################################################################
+ifndef DISABLE_AUTO_DEPENDENCIES
+
+# If its not one of the cleaning targets
+ifndef IS_CLEANING_TARGET
+
+# Get the list of dependency files
+DependSourceFiles := $(basename $(filter %.cpp %.c %.cc %.m %.mm, $(Sources)))
+DependFiles := $(DependSourceFiles:%=$(PROJ_OBJ_DIR)/$(BuildMode)/%.d)
+
+# Include bitcode dependency files if using bitcode libraries
+ifdef BYTECODE_LIBRARY
+DependFiles += $(DependSourceFiles:%=$(PROJ_OBJ_DIR)/$(BuildMode)/%.bc.d)
+endif
+
+-include $(DependFiles) ""
+
+endif
+
+endif
+
+###############################################################################
+# CHECK: Running the test suite
+###############################################################################
+
+check::
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
+	  if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
+	    $(EchoCmd) Running test suite ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local \
+	      TESTSUITE=$(TESTSUITE) ; \
+	  else \
+	    $(EchoCmd) No Makefile in test directory ; \
+	  fi ; \
+	else \
+	  $(EchoCmd) No test directory ; \
+	fi
+
+check-lit:: check
+
+check-dg::
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
+	  if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
+	    $(EchoCmd) Running test suite ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local-dg ; \
+	  else \
+	    $(EchoCmd) No Makefile in test directory ; \
+	  fi ; \
+	else \
+	  $(EchoCmd) No test directory ; \
+	fi
+
+check-all::
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
+	  if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
+	    $(EchoCmd) Running test suite ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local-all ; \
+	  else \
+	    $(EchoCmd) No Makefile in test directory ; \
+	  fi ; \
+	else \
+	  $(EchoCmd) No test directory ; \
+	fi
+
+###############################################################################
+# UNITTESTS: Running the unittests test suite
+###############################################################################
+
+unittests::
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/unittests" ; then \
+	  if test -f "$(PROJ_OBJ_ROOT)/unittests/Makefile" ; then \
+	    $(EchoCmd) Running unittests test suite ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/unittests unitcheck; \
+	  else \
+	    $(EchoCmd) No Makefile in unittests directory ; \
+	  fi ; \
+	else \
+	  $(EchoCmd) No unittests directory ; \
+	fi
+
+###############################################################################
+# DISTRIBUTION: Handle construction of a distribution tarball
+###############################################################################
+
+#------------------------------------------------------------------------
+# Define distribution related variables
+#------------------------------------------------------------------------
+DistName    := $(PROJECT_NAME)-$(PROJ_VERSION)
+DistDir     := $(PROJ_OBJ_ROOT)/$(DistName)
+TopDistDir  := $(PROJ_OBJ_ROOT)/$(DistName)
+DistTarGZip := $(PROJ_OBJ_ROOT)/$(DistName).tar.gz
+DistZip     := $(PROJ_OBJ_ROOT)/$(DistName).zip
+DistTarBZ2  := $(PROJ_OBJ_ROOT)/$(DistName).tar.bz2
+DistAlways  := CREDITS.TXT LICENSE.TXT README.txt README AUTHORS COPYING \
+	       ChangeLog INSTALL NEWS Makefile Makefile.common Makefile.rules \
+	       Makefile.config.in configure autoconf
+DistOther   := $(notdir $(wildcard \
+               $(PROJ_SRC_DIR)/*.h \
+               $(PROJ_SRC_DIR)/*.td \
+               $(PROJ_SRC_DIR)/*.def \
+               $(PROJ_SRC_DIR)/*.ll \
+               $(PROJ_SRC_DIR)/*.in))
+DistSubDirs := $(SubDirs)
+DistSources  = $(Sources) $(EXTRA_DIST)
+DistFiles    = $(DistAlways) $(DistSources) $(DistOther)
+
+#------------------------------------------------------------------------
+# We MUST build distribution with OBJ_DIR != SRC_DIR
+#------------------------------------------------------------------------
+ifeq ($(PROJ_SRC_DIR),$(PROJ_OBJ_DIR))
+dist dist-check dist-clean dist-gzip dist-bzip2 dist-zip ::
+	$(Echo) ERROR: Target $@ only available with OBJ_DIR != SRC_DIR
+
+else
+
+#------------------------------------------------------------------------
+# Prevent attempt to run dist targets from anywhere but the top level
+#------------------------------------------------------------------------
+ifneq ($(LEVEL),.)
+dist dist-check dist-clean dist-gzip dist-bzip2 dist-zip ::
+	$(Echo) ERROR: You must run $@ from $(PROJ_OBJ_ROOT)
+else
+
+#------------------------------------------------------------------------
+# Provide the top level targets
+#------------------------------------------------------------------------
+
+dist-gzip:: $(DistTarGZip)
+
+$(DistTarGZip) : $(TopDistDir)/.makedistdir
+	$(Echo) Packing gzipped distribution tar file.
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(TAR) chf - "$(DistName)" | \
+	  $(GZIP) -c > "$(DistTarGZip)"
+
+dist-bzip2:: $(DistTarBZ2)
+
+$(DistTarBZ2) : $(TopDistDir)/.makedistdir
+	$(Echo) Packing bzipped distribution tar file.
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(TAR) chf - $(DistName) | \
+	  $(BZIP2) -c >$(DistTarBZ2)
+
+dist-zip:: $(DistZip)
+
+$(DistZip) : $(TopDistDir)/.makedistdir
+	$(Echo) Packing zipped distribution file.
+	$(Verb) rm -f $(DistZip)
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(ZIP) -rq $(DistZip) $(DistName)
+
+dist :: $(DistTarGZip) $(DistTarBZ2) $(DistZip)
+	$(Echo) ===== DISTRIBUTION PACKAGING SUCCESSFUL =====
+
+DistCheckDir := $(PROJ_OBJ_ROOT)/_distcheckdir
+
+dist-check:: $(DistTarGZip)
+	$(Echo) Checking distribution tar file.
+	$(Verb) if test -d $(DistCheckDir) ; then \
+	  $(RM) -rf $(DistCheckDir) ; \
+	fi
+	$(Verb) $(MKDIR) $(DistCheckDir)
+	$(Verb) cd $(DistCheckDir) && \
+	  $(MKDIR) $(DistCheckDir)/build && \
+	  $(MKDIR) $(DistCheckDir)/install && \
+	  gunzip -c $(DistTarGZip) | $(TAR) xf - && \
+	  cd build && \
+	  ../$(DistName)/configure --prefix="$(DistCheckDir)/install" \
+	    --srcdir=../$(DistName) $(DIST_CHECK_CONFIG_OPTIONS) && \
+	  $(MAKE) all && \
+	  $(MAKE) check && \
+	  $(MAKE) unittests && \
+	  $(MAKE) install && \
+	  $(MAKE) uninstall && \
+	  $(MAKE) dist-clean && \
+	  $(EchoCmd) ===== $(DistTarGZip) Ready For Distribution =====
+
+dist-clean::
+	$(Echo) Cleaning distribution files
+	-$(Verb) $(RM) -rf $(DistTarGZip) $(DistTarBZ2) $(DistZip) $(DistName) \
+	  $(DistCheckDir)
+
+endif
+
+#------------------------------------------------------------------------
+# Provide the recursive distdir target for building the distribution directory
+#------------------------------------------------------------------------
+distdir: $(DistDir)/.makedistdir
+$(DistDir)/.makedistdir: $(DistSources)
+	$(Verb) if test "$(DistDir)" = "$(TopDistDir)" ; then \
+	  if test -d "$(DistDir)" ; then \
+	    find $(DistDir) -type d ! -perm -200 -exec chmod u+w {} ';'  || \
+	      exit 1 ; \
+	  fi ; \
+	  $(EchoCmd) Removing old $(DistDir) ; \
+	  $(RM) -rf $(DistDir); \
+	  $(EchoCmd) Making 'all' to verify build ; \
+	  $(MAKE) ENABLE_OPTIMIZED=1 all ; \
+	fi
+	$(Echo) Building Distribution Directory $(DistDir)
+	$(Verb) $(MKDIR) $(DistDir)
+	$(Verb) srcdirstrip=`echo "$(PROJ_SRC_DIR)" | sed 's|.|.|g'`; \
+	srcrootstrip=`echo "$(PROJ_SRC_ROOT)" | sed 's|.|.|g'`; \
+	for file in $(DistFiles) ; do \
+	  case "$$file" in \
+	    $(PROJ_SRC_DIR)/*) \
+	      file=`echo "$$file" | sed "s#^$$srcdirstrip/##"` \
+	      ;; \
+	    $(PROJ_SRC_ROOT)/*) \
+	      file=`echo "$$file" | \
+		sed "s#^$$srcrootstrip/##"` \
+	      ;; \
+	  esac; \
+	  if test -f "$(PROJ_SRC_DIR)/$$file" || \
+	     test -d "$(PROJ_SRC_DIR)/$$file" ; then \
+	    from_dir="$(PROJ_SRC_DIR)" ; \
+	  elif test -f "$$file" || test -d "$$file" ; then \
+	    from_dir=. ; \
+	  fi ; \
+	  to_dir=`echo "$$file" | sed -e 's#/[^/]*$$##'` ; \
+	  if test "$$to_dir" != "$$file" && test "$$to_dir" != "."; then \
+	    to_dir="$(DistDir)/$$dir"; \
+	    $(MKDIR) "$$to_dir" ; \
+	  else \
+	    to_dir="$(DistDir)"; \
+	  fi; \
+	  mid_dir=`echo "$$file" | sed -n -e 's#^\(.*\)/[^/]*$$#\1#p'`; \
+	  if test -n "$$mid_dir" ; then \
+            $(MKDIR) "$$to_dir/$$mid_dir" || exit 1; \
+          fi ; \
+	  if test -d "$$from_dir/$$file"; then \
+	    if test -d "$(PROJ_SRC_DIR)/$$file" && \
+	       test "$$from_dir" != "$(PROJ_SRC_DIR)" ; then \
+	       cd $(PROJ_SRC_DIR) ; \
+	       $(TAR) cf - $$file --exclude .svn --exclude CVS | \
+	         ( cd $$to_dir ; $(TAR) xf - ) ; \
+	       cd $(PROJ_OBJ_DIR) ; \
+	    else \
+	       cd $$from_dir ; \
+	       $(TAR) cf - $$file --exclude .svn --exclude CVS | \
+	         ( cd $$to_dir ; $(TAR) xf - ) ; \
+	       cd $(PROJ_OBJ_DIR) ; \
+	    fi; \
+	  elif test -f "$$from_dir/$$file" ; then \
+	    $(CP) -p "$$from_dir/$$file" "$(DistDir)/$$file" || exit 1; \
+	  elif test -L "$$from_dir/$$file" ; then \
+	    $(CP) -pd "$$from_dir/$$file" $(DistDir)/$$file || exit 1; \
+	  elif echo "$(DistAlways)" | grep -v "$$file" >/dev/null ; then \
+	    $(EchoCmd) "===== WARNING: Distribution Source " \
+	      "$$from_dir/$$file Not Found!" ; \
+	  elif test "$(Verb)" != '@' ; then \
+	    $(EchoCmd) "Skipping non-existent $$from_dir/$$file" ; \
+	  fi; \
+	done
+	$(Verb) for subdir in $(DistSubDirs) ; do \
+	  if test "$$subdir" \!= "." ; then \
+	    new_distdir="$(DistDir)/$$subdir" ; \
+	    test -d "$$new_distdir" || $(MKDIR) "$$new_distdir" || exit 1; \
+	    ( cd $$subdir && $(MAKE) ENABLE_OPTIMIZED=1 \
+	      DistDir="$$new_distdir" distdir ) || exit 1; \
+	  fi; \
+	done
+	$(Verb) if test "$(DistDir)" = "$(TopDistDir)" ; then \
+	  $(EchoCmd) Eliminating CVS/.svn directories from distribution ; \
+	  $(RM) -rf `find $(TopDistDir) -type d \( -name CVS -o \
+                                  -name .svn \) -print` ;\
+	  $(MAKE) dist-hook ; \
+	  $(FIND) $(TopDistDir) -type d ! -perm -777 -exec chmod a+rwx {} \; \
+	    -o ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; \
+	    -o ! -type d ! -perm -400 -exec chmod a+r {} \; \
+	    -o ! -type d ! -perm -444 -exec \
+	      $(SHELL) $(INSTALL_SH) -c -m a+r {} {} \; \
+	    || chmod -R a+r $(DistDir) ; \
+	fi
+
+# This is invoked by distdir target, define it as a no-op to avoid errors if not
+# defined by user.
+dist-hook::
+
+endif
+
+###############################################################################
+# TOP LEVEL - targets only to apply at the top level directory
+###############################################################################
+
+ifeq ($(LEVEL),.)
+
+#------------------------------------------------------------------------
+# Install support for the project's include files:
+#------------------------------------------------------------------------
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+install-local::
+	$(Echo) Installing include files
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_includedir)
+	$(Verb) if test -d "$(PROJ_SRC_ROOT)/include" ; then \
+	  cd $(PROJ_SRC_ROOT)/include && \
+	  for hdr in `find . -type f \
+	      '(' -name LICENSE.TXT \
+	       -o -name '*.def' \
+	       -o -name '*.h' \
+	       -o -name '*.inc' \
+	       -o -name '*.td' \
+	      ')' -print | grep -v CVS | \
+	      grep -v .svn` ; do \
+	    instdir=`dirname "$(DESTDIR)$(PROJ_includedir)/$$hdr"` ; \
+	    if test \! -d "$$instdir" ; then \
+	      $(EchoCmd) Making install directory $$instdir ; \
+	      $(MKDIR) $$instdir ;\
+	    fi ; \
+	    $(DataInstall) $$hdr $(DESTDIR)$(PROJ_includedir)/$$hdr ; \
+	  done ; \
+	fi
+ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT))
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/include" ; then \
+	  cd $(PROJ_OBJ_ROOT)/include && \
+	  for hdr in `find . -type f \
+	      '(' -name LICENSE.TXT \
+	       -o -name '*.def' \
+	       -o -name '*.h' \
+	       -o -name '*.inc' \
+	       -o -name '*.td' \
+	      ')' -print | grep -v CVS | \
+	      grep -v .svn` ; do \
+	    instdir=`dirname "$(DESTDIR)$(PROJ_includedir)/$$hdr"` ; \
+	    if test \! -d "$$instdir" ; then \
+	      $(EchoCmd) Making install directory $$instdir ; \
+	      $(MKDIR) $$instdir ;\
+	    fi ; \
+	    $(DataInstall) $$hdr $(DESTDIR)$(PROJ_includedir)/$$hdr ; \
+	  done ; \
+	fi
+endif
+
+uninstall-local::
+	$(Echo) Uninstalling include files
+	$(Verb) if [ -d "$(PROJ_SRC_ROOT)/include" ] ; then \
+	  cd $(PROJ_SRC_ROOT)/include && \
+	    $(RM) -f `find . -path '*/Internal' -prune -o '(' -type f \
+	      '!' '(' -name '*~' -o -name '.#*' \
+        -o -name '*.in' ')' -print ')' | \
+        grep -v CVS | sed 's#^#$(DESTDIR)$(PROJ_includedir)/#'` ; \
+	  cd $(PROJ_SRC_ROOT)/include && \
+	    $(RM) -f `find . -path '*/Internal' -prune -o '(' -type f -name '*.in' \
+      -print ')' | sed 's#\.in$$##;s#^#$(DESTDIR)$(PROJ_includedir)/#'` ; \
+	fi
+endif
+endif
+
+check-line-length:
+	@echo searching for overlength lines in files: $(Sources)
+	@echo
+	@echo
+	egrep -n '.{81}' $(Sources) /dev/null
+
+check-for-tabs:
+	@echo searching for tabs in files: $(Sources)
+	@echo
+	@echo
+	egrep -n '	' $(Sources) /dev/null
+
+check-footprint:
+	@ls -l $(LibDir) | awk '\
+	  BEGIN { sum = 0; } \
+	        { sum += $$5; } \
+	  END   { printf("Libraries: %6.3f MBytes\n", sum/(1024.0*1024.0)); }'
+	@ls -l $(ToolDir) | awk '\
+	  BEGIN { sum = 0; } \
+	        { sum += $$5; } \
+	  END   { printf("Programs:  %6.3f MBytes\n", sum/(1024.0*1024.0)); }'
+#------------------------------------------------------------------------
+# Print out the directories used for building
+#------------------------------------------------------------------------
+printvars::
+	$(Echo) "BuildMode    : " '$(BuildMode)'
+	$(Echo) "PROJ_SRC_ROOT: " '$(PROJ_SRC_ROOT)'
+	$(Echo) "PROJ_SRC_DIR : " '$(PROJ_SRC_DIR)'
+	$(Echo) "PROJ_OBJ_ROOT: " '$(PROJ_OBJ_ROOT)'
+	$(Echo) "PROJ_OBJ_DIR : " '$(PROJ_OBJ_DIR)'
+	$(Echo) "LLVM_SRC_ROOT: " '$(LLVM_SRC_ROOT)'
+	$(Echo) "LLVM_OBJ_ROOT: " '$(LLVM_OBJ_ROOT)'
+	$(Echo) "PROJ_prefix  : " '$(PROJ_prefix)'
+	$(Echo) "PROJ_bindir  : " '$(PROJ_bindir)'
+	$(Echo) "PROJ_libdir  : " '$(PROJ_libdir)'
+	$(Echo) "PROJ_etcdir  : " '$(PROJ_etcdir)'
+	$(Echo) "PROJ_includedir  : " '$(PROJ_includedir)'
+	$(Echo) "UserTargets  : " '$(UserTargets)'
+	$(Echo) "ObjMakefiles : " '$(ObjMakefiles)'
+	$(Echo) "SrcMakefiles : " '$(SrcMakefiles)'
+	$(Echo) "ObjDir       : " '$(ObjDir)'
+	$(Echo) "LibDir       : " '$(LibDir)'
+	$(Echo) "ToolDir      : " '$(ToolDir)'
+	$(Echo) "ExmplDir     : " '$(ExmplDir)'
+	$(Echo) "Sources      : " '$(Sources)'
+	$(Echo) "TDFiles      : " '$(TDFiles)'
+	$(Echo) "INCFiles     : " '$(INCFiles)'
+	$(Echo) "INCTMPFiles  : " '$(INCTMPFiles)'
+	$(Echo) "PreConditions: " '$(PreConditions)'
+	$(Echo) "Compile.CXX  : " '$(Compile.CXX)'
+	$(Echo) "Compile.C    : " '$(Compile.C)'
+	$(Echo) "Archive      : " '$(Archive)'
+	$(Echo) "YaccFiles    : " '$(YaccFiles)'
+	$(Echo) "LexFiles     : " '$(LexFiles)'
+	$(Echo) "Module       : " '$(Module)'
+	$(Echo) "FilesToConfig: " '$(FilesToConfigPATH)'
+	$(Echo) "SubDirs      : " '$(SubDirs)'
+	$(Echo) "ProjLibsPaths: " '$(ProjLibsPaths)'
+	$(Echo) "ProjLibsOptions: " '$(ProjLibsOptions)'
+
+###
+# Debugging
+
+# General debugging rule, use 'make dbg-print-XXX' to print the
+# definition, value and origin of XXX.
+make-print-%:
+	$(error PRINT: $(value $*) = "$($*)" (from $(origin $*)))
diff --git a/projects/sample/autoconf/AutoRegen.sh b/projects/sample/autoconf/AutoRegen.sh
index 6e6931c7a9bc..b91b3e446a13 100755
--- a/projects/sample/autoconf/AutoRegen.sh
+++ b/projects/sample/autoconf/AutoRegen.sh
@@ -12,15 +12,13 @@ fi
 cwd=`pwd`
 if test -d ../../../autoconf/m4 ; then
   cd ../../../autoconf/m4
-  llvm_m4=`pwd`
   llvm_src_root=../..
   llvm_obj_root=../..
   cd $cwd
 elif test -d ../../llvm/autoconf/m4 ; then
   cd ../../llvm/autoconf/m4
-  llvm_m4=`pwd`
-  llvm_src_root=..
-  llvm_obj_root=..
+  llvm_src_root=../..
+  llvm_obj_root=../..
   cd $cwd
 else
   while true ; do
@@ -28,7 +26,6 @@ else
     read -p "Enter full path to LLVM source:" REPLY
     if test -d "$REPLY/autoconf/m4" ; then
       llvm_src_root="$REPLY"
-      llvm_m4="$REPLY/autoconf/m4"
       read -p "Enter full path to LLVM objects (empty for same as source):" REPLY
       if test -d "$REPLY" ; then
         llvm_obj_root="$REPLY"
@@ -39,13 +36,9 @@ else
     fi
   done
 fi
-# Patch the LLVM_ROOT in configure.ac, if it needs it
-cp configure.ac configure.bak
-sed -e "s#^LLVM_SRC_ROOT=.*#LLVM_SRC_ROOT=\"$llvm_src_root\"#" \
-    -e "s#^LLVM_OBJ_ROOT=.*#LLVM_OBJ_ROOT=\"$llvm_obj_root\"#" configure.bak > configure.ac
 echo "Regenerating aclocal.m4 with aclocal"
 rm -f aclocal.m4
-aclocal -I $llvm_m4 -I "$llvm_m4/.." || die "aclocal failed"
+aclocal -I $cwd/m4 || die "aclocal failed"
 echo "Regenerating configure with autoconf"
 autoconf --warnings=all -o ../configure configure.ac || die "autoconf failed"
 cd ..
diff --git a/projects/sample/autoconf/ExportMap.map b/projects/sample/autoconf/ExportMap.map
new file mode 100644
index 000000000000..17b185fed914
--- /dev/null
+++ b/projects/sample/autoconf/ExportMap.map
@@ -0,0 +1,7 @@
+{
+	global: main;
+		__progname;
+		environ;
+
+	local: *;
+};
diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac
index bb75bbdeb594..c3a49d5b12ea 100644
--- a/projects/sample/autoconf/configure.ac
+++ b/projects/sample/autoconf/configure.ac
@@ -15,59 +15,1457 @@ dnl Tell autoconf that this is an LLVM project being configured
 dnl This provides the --with-llvmsrc and --with-llvmobj options
 LLVM_CONFIG_PROJECT($LLVM_ABS_SRC_ROOT,$LLVM_ABS_OBJ_ROOT)
 
-dnl Tell autoconf that the auxiliary files are actually located in
-dnl the LLVM autoconf directory, not here.
-AC_CONFIG_AUX_DIR($LLVM_SRC/autoconf)
+dnl Try and find an llvm-config in the build directory. We are only using this
+dnl to detect the package level LLVM information (currently just the version),
+dnl so we just whatever one we find regardless of build mode.
+AC_MSG_CHECKING([llvm-config])
+llvm_config_path="`ls -1 $llvm_obj/*/bin/llvm-config 2> /dev/null | head -1`"
+if ! test -f "$llvm_config_path" ; then
+   llvm_config_path="no"
+fi
+AC_MSG_RESULT([$llvm_config_path])
+
+dnl Determine the LLVM version, which may be required by the current Makefile
+dnl rules.
+AC_MSG_CHECKING([LLVM package version])
+if test "$llvm_config_path" != no ; then
+  llvm_package_version=`$llvm_config_path --version`
+else
+  llvm_package_version="unknown";
+fi
+AC_MSG_RESULT([$llvm_package_version])
+AC_SUBST(LLVM_VERSION, [$llvm_package_version])
 
 dnl Verify that the source directory is valid
 AC_CONFIG_SRCDIR(["Makefile.common.in"])
 
-dnl Configure a common Makefile
-AC_CONFIG_FILES(Makefile.common)
-
-dnl Configure project makefiles
-dnl List every Makefile that exists within your source tree
-AC_CONFIG_MAKEFILE(Makefile)
-AC_CONFIG_MAKEFILE(lib/Makefile)
-AC_CONFIG_MAKEFILE(lib/sample/Makefile)
-AC_CONFIG_MAKEFILE(tools/Makefile)
-AC_CONFIG_MAKEFILE(tools/sample/Makefile)
+dnl Place all of the extra autoconf files into the config subdirectory. Tell
+dnl various tools where the m4 autoconf macros are.
+AC_CONFIG_AUX_DIR([autoconf])
 
 dnl **************************************************************************
-dnl * Determine which system we are building on
+dnl Begin LLVM configure.ac Import
 dnl **************************************************************************
+dnl
+dnl Derived from LLVM's configure.ac. This was imported directly here so that we
+dnl could reuse LLVM's build infrastructure without introducing a direct source
+dnl dependency on the LLVM files.
 
-dnl **************************************************************************
-dnl * Check for programs.
-dnl **************************************************************************
+dnl We need to check for the compiler up here to avoid anything else
+dnl starting with a different one.
+AC_PROG_CC(clang llvm-gcc gcc)
+AC_PROG_CXX(clang++ llvm-g++ g++)
+AC_PROG_CPP
 
-dnl **************************************************************************
-dnl * Check for libraries.
-dnl **************************************************************************
+dnl Configure all of the projects present in our source tree. While we could
+dnl just AC_CONFIG_SUBDIRS on the set of directories in projects that have a
+dnl configure script, that usage of the AC_CONFIG_SUBDIRS macro is deprecated.
+dnl Instead we match on the known projects.
 
-dnl **************************************************************************
-dnl * Checks for header files.
-dnl **************************************************************************
+dnl
+dnl One tricky part of doing this is that some projects depend upon other
+dnl projects.  For example, several projects rely upon the LLVM test suite.
+dnl We want to configure those projects first so that their object trees are
+dnl created before running the configure scripts of projects that depend upon
+dnl them.
+dnl
 
-dnl **************************************************************************
-dnl * Checks for typedefs, structures, and compiler characteristics.
-dnl **************************************************************************
+dnl Disable the build of polly, even if it is checked out into tools/polly.
+AC_ARG_ENABLE(polly,
+              AS_HELP_STRING([--enable-polly],
+                             [Use polly if available (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_POLLY,[1]) ;;
+  no)  AC_SUBST(ENABLE_POLLY,[0]) ;;
+  default) AC_SUBST(ENABLE_POLLY,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-polly. Use "yes" or "no"]) ;;
+esac
 
-dnl **************************************************************************
-dnl * Checks for library functions.
-dnl **************************************************************************
 
-dnl **************************************************************************
-dnl * Enable various compile-time options
-dnl **************************************************************************
+dnl Check if polly is checked out into tools/polly and configure it if
+dnl available.
+if (test -d ${srcdir}/tools/polly) && (test $ENABLE_POLLY -eq 1) ; then
+  AC_SUBST(LLVM_HAS_POLLY,1)
+  AC_CONFIG_SUBDIRS([tools/polly])
+fi
 
-dnl **************************************************************************
-dnl * Set the location of various third-party software packages
-dnl **************************************************************************
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 2: Architecture, target, and host checks
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+dnl Check the target for which we're compiling and the host that will do the
+dnl compilations. This will tell us which LLVM compiler will be used for
+dnl compiling SSA into object code. This needs to be done early because
+dnl following tests depend on it.
+AC_CANONICAL_TARGET
+
+dnl Determine the platform type and cache its value. This helps us configure
+dnl the System library to the correct build platform.
+AC_CACHE_CHECK([type of operating system we're going to host on],
+               [llvm_cv_os_type],
+[case $host in
+  *-*-aix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="AIX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-irix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="IRIX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-cygwin*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Cygwin"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-darwin*)
+    llvm_cv_link_all_option="-Wl,-all_load"
+    llvm_cv_no_link_all_option="-Wl,-noall_load"
+    llvm_cv_os_type="Darwin"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-minix*)
+    llvm_cv_link_all_option="-Wl,-all_load"
+    llvm_cv_no_link_all_option="-Wl,-noall_load"
+    llvm_cv_os_type="Minix"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-freebsd* | *-*-kfreebsd-gnu)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="FreeBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-openbsd*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="OpenBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-netbsd*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="NetBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-dragonfly*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="DragonFly"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-hpux*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="HP-UX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-interix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Interix"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-linux*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Linux"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-gnu*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="GNU"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-solaris*)
+    llvm_cv_link_all_option="-Wl,-z,allextract"
+    llvm_cv_no_link_all_option="-Wl,-z,defaultextract"
+    llvm_cv_os_type="SunOS"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-auroraux*)
+    llvm_cv_link_all_option="-Wl,-z,allextract"
+    llvm_cv_link_all_option="-Wl,-z,defaultextract"
+    llvm_cv_os_type="AuroraUX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-win32*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Win32"
+    llvm_cv_platform_type="Win32" ;;
+  *-*-mingw*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="MingW"
+    llvm_cv_platform_type="Win32" ;;
+  *-*-haiku*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Haiku"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-eabi*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-elf*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
+  *)
+    llvm_cv_link_all_option=""
+    llvm_cv_no_link_all_option=""
+    llvm_cv_os_type="Unknown"
+    llvm_cv_platform_type="Unknown" ;;
+esac])
+
+AC_CACHE_CHECK([type of operating system we're going to target],
+               [llvm_cv_target_os_type],
+[case $target in
+  *-*-aix*)
+    llvm_cv_target_os_type="AIX" ;;
+  *-*-irix*)
+    llvm_cv_target_os_type="IRIX" ;;
+  *-*-cygwin*)
+    llvm_cv_target_os_type="Cygwin" ;;
+  *-*-darwin*)
+    llvm_cv_target_os_type="Darwin" ;;
+  *-*-minix*)
+    llvm_cv_target_os_type="Minix" ;;
+  *-*-freebsd* | *-*-kfreebsd-gnu)
+    llvm_cv_target_os_type="FreeBSD" ;;
+  *-*-openbsd*)
+    llvm_cv_target_os_type="OpenBSD" ;;
+  *-*-netbsd*)
+    llvm_cv_target_os_type="NetBSD" ;;
+  *-*-dragonfly*)
+    llvm_cv_target_os_type="DragonFly" ;;
+  *-*-hpux*)
+    llvm_cv_target_os_type="HP-UX" ;;
+  *-*-interix*)
+    llvm_cv_target_os_type="Interix" ;;
+  *-*-linux*)
+    llvm_cv_target_os_type="Linux" ;;
+  *-*-gnu*)
+    llvm_cv_target_os_type="GNU" ;;
+  *-*-solaris*)
+    llvm_cv_target_os_type="SunOS" ;;
+  *-*-auroraux*)
+    llvm_cv_target_os_type="AuroraUX" ;;
+  *-*-win32*)
+    llvm_cv_target_os_type="Win32" ;;
+  *-*-mingw*)
+    llvm_cv_target_os_type="MingW" ;;
+  *-*-haiku*)
+    llvm_cv_target_os_type="Haiku" ;;
+  *-*-rtems*)
+    llvm_cv_target_os_type="RTEMS" ;;
+  *-*-nacl*)
+    llvm_cv_target_os_type="NativeClient" ;;
+  *-unknown-eabi*)
+    llvm_cv_target_os_type="Freestanding" ;;
+  *)
+    llvm_cv_target_os_type="Unknown" ;;
+esac])
+
+dnl Make sure we aren't attempting to configure for an unknown system
+if test "$llvm_cv_os_type" = "Unknown" ; then
+  AC_MSG_ERROR([Operating system is unknown, configure can't continue])
+fi
+
+dnl Set the "OS" Makefile variable based on the platform type so the
+dnl makefile can configure itself to specific build hosts
+AC_SUBST(OS,$llvm_cv_os_type)
+AC_SUBST(HOST_OS,$llvm_cv_os_type)
+AC_SUBST(TARGET_OS,$llvm_cv_target_os_type)
+
+dnl Set the LINKALL and NOLINKALL Makefile variables based on the platform
+AC_SUBST(LINKALL,$llvm_cv_link_all_option)
+AC_SUBST(NOLINKALL,$llvm_cv_no_link_all_option)
+
+dnl Set the "LLVM_ON_*" variables based on llvm_cv_platform_type
+dnl This is used by lib/Support to determine the basic kind of implementation
+dnl to use.
+case $llvm_cv_platform_type in
+  Unix)
+    AC_DEFINE([LLVM_ON_UNIX],[1],[Define if this is Unixish platform])
+    AC_SUBST(LLVM_ON_UNIX,[1])
+    AC_SUBST(LLVM_ON_WIN32,[0])
+    ;;
+  Win32)
+    AC_DEFINE([LLVM_ON_WIN32],[1],[Define if this is Win32ish platform])
+    AC_SUBST(LLVM_ON_UNIX,[0])
+    AC_SUBST(LLVM_ON_WIN32,[1])
+    ;;
+esac
+
+dnl Determine what our target architecture is and configure accordingly.
+dnl This will allow Makefiles to make a distinction between the hardware and
+dnl the OS.
+AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
+[case $target in
+  i?86-*)                 llvm_cv_target_arch="x86" ;;
+  amd64-* | x86_64-*)     llvm_cv_target_arch="x86_64" ;;
+  sparc*-*)               llvm_cv_target_arch="Sparc" ;;
+  powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
+  arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  mips-*)                 llvm_cv_target_arch="Mips" ;;
+  xcore-*)                llvm_cv_target_arch="XCore" ;;
+  msp430-*)               llvm_cv_target_arch="MSP430" ;;
+  hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
+  mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
+  ptx-*)                  llvm_cv_target_arch="PTX" ;;
+  *)                      llvm_cv_target_arch="Unknown" ;;
+esac])
+
+if test "$llvm_cv_target_arch" = "Unknown" ; then
+  AC_MSG_WARN([Configuring LLVM for an unknown target archicture])
+fi
+
+# Determine the LLVM native architecture for the target
+case "$llvm_cv_target_arch" in
+    x86)     LLVM_NATIVE_ARCH="X86" ;;
+    x86_64)  LLVM_NATIVE_ARCH="X86" ;;
+    *)       LLVM_NATIVE_ARCH="$llvm_cv_target_arch" ;;
+esac
+
+dnl Define a substitution, ARCH, for the target architecture
+AC_SUBST(ARCH,$llvm_cv_target_arch)
+
+dnl Check for the endianness of the target
+AC_C_BIGENDIAN(AC_SUBST([ENDIAN],[big]),AC_SUBST([ENDIAN],[little]))
+
+dnl Check for build platform executable suffix if we're crosscompiling
+if test "$cross_compiling" = yes; then
+  AC_SUBST(LLVM_CROSS_COMPILING, [1])
+  AC_BUILD_EXEEXT
+  ac_build_prefix=${build_alias}-
+  AC_CHECK_PROG(BUILD_CXX, ${ac_build_prefix}g++, ${ac_build_prefix}g++)
+  if test -z "$BUILD_CXX"; then
+     AC_CHECK_PROG(BUILD_CXX, g++, g++)
+     if test -z "$BUILD_CXX"; then
+       AC_CHECK_PROG(BUILD_CXX, c++, c++, , , /usr/ucb/c++)
+     fi
+  fi
+else
+  AC_SUBST(LLVM_CROSS_COMPILING, [0])
+fi
+
+dnl Check to see if there's a .svn or .git directory indicating that this
+dnl build is being done from a checkout. This sets up several defaults for
+dnl the command line switches. When we build with a checkout directory,
+dnl we get a debug with assertions turned on. Without, we assume a source
+dnl release and we get an optimized build without assertions.
+dnl See --enable-optimized and --enable-assertions below
+if test -d ".svn" -o -d "${srcdir}/.svn" -o -d ".git" -o -d "${srcdir}/.git"; then
+  cvsbuild="yes"
+  optimize="no"
+  AC_SUBST(CVSBUILD,[[CVSBUILD=1]])
+else
+  cvsbuild="no"
+  optimize="yes"
+fi
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 3: Command line arguments for the configure script.
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+dnl --enable-libcpp : check whether or not to use libc++ on the command line
+AC_ARG_ENABLE(libcpp,
+              AS_HELP_STRING([--enable-libcpp],
+                             [Use libc++ if available (default is NO)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_LIBCPP,[1]) ;;
+  no)  AC_SUBST(ENABLE_LIBCPP,[0]) ;;
+  default) AC_SUBST(ENABLE_LIBCPP,[0]);;
+  *) AC_MSG_ERROR([Invalid setting for --enable-libcpp. Use "yes" or "no"]) ;;
+esac
+
+dnl --enable-optimized : check whether they want to do an optimized build:
+AC_ARG_ENABLE(optimized, AS_HELP_STRING(
+ --enable-optimized,[Compile with optimizations enabled (default is NO)]),,enableval=$optimize)
+if test ${enableval} = "no" ; then
+  AC_SUBST(ENABLE_OPTIMIZED,[[]])
+else
+  AC_SUBST(ENABLE_OPTIMIZED,[[ENABLE_OPTIMIZED=1]])
+fi
+
+dnl --enable-profiling : check whether they want to do a profile build:
+AC_ARG_ENABLE(profiling, AS_HELP_STRING(
+ --enable-profiling,[Compile with profiling enabled (default is NO)]),,enableval="no")
+if test ${enableval} = "no" ; then
+  AC_SUBST(ENABLE_PROFILING,[[]])
+else
+  AC_SUBST(ENABLE_PROFILING,[[ENABLE_PROFILING=1]])
+fi
+
+dnl --enable-assertions : check whether they want to turn on assertions or not:
+AC_ARG_ENABLE(assertions,AS_HELP_STRING(
+  --enable-assertions,[Compile with assertion checks enabled (default is YES)]),, enableval="yes")
+if test ${enableval} = "yes" ; then
+  AC_SUBST(DISABLE_ASSERTIONS,[[]])
+else
+  AC_SUBST(DISABLE_ASSERTIONS,[[DISABLE_ASSERTIONS=1]])
+fi
+
+dnl --enable-expensive-checks : check whether they want to turn on expensive debug checks:
+AC_ARG_ENABLE(expensive-checks,AS_HELP_STRING(
+  --enable-expensive-checks,[Compile with expensive debug checks enabled (default is NO)]),, enableval="no")
+if test ${enableval} = "yes" ; then
+  AC_SUBST(ENABLE_EXPENSIVE_CHECKS,[[ENABLE_EXPENSIVE_CHECKS=1]])
+  AC_SUBST(EXPENSIVE_CHECKS,[[yes]])
+else
+  AC_SUBST(ENABLE_EXPENSIVE_CHECKS,[[]])
+  AC_SUBST(EXPENSIVE_CHECKS,[[no]])
+fi
+
+dnl --enable-debug-runtime : should runtime libraries have debug symbols?
+AC_ARG_ENABLE(debug-runtime,
+   AS_HELP_STRING(--enable-debug-runtime,[Build runtime libs with debug symbols (default is NO)]),,enableval=no)
+if test ${enableval} = "no" ; then
+  AC_SUBST(DEBUG_RUNTIME,[[]])
+else
+  AC_SUBST(DEBUG_RUNTIME,[[DEBUG_RUNTIME=1]])
+fi
+
+dnl --enable-debug-symbols : should even optimized compiler libraries
+dnl have debug symbols?
+AC_ARG_ENABLE(debug-symbols,
+   AS_HELP_STRING(--enable-debug-symbols,[Build compiler with debug symbols (default is NO if optimization is on and YES if it's off)]),,enableval=no)
+if test ${enableval} = "no" ; then
+  AC_SUBST(DEBUG_SYMBOLS,[[]])
+else
+  AC_SUBST(DEBUG_SYMBOLS,[[DEBUG_SYMBOLS=1]])
+fi
+
+dnl --enable-jit: check whether they want to enable the jit
+AC_ARG_ENABLE(jit,
+  AS_HELP_STRING(--enable-jit,
+                 [Enable Just In Time Compiling (default is YES)]),,
+  enableval=default)
+if test ${enableval} = "no"
+then
+  AC_SUBST(JIT,[[]])
+else
+  case "$llvm_cv_target_arch" in
+    x86)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    Sparc)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
+    x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
+    ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
+    XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
+    Hexagon)     AC_SUBST(TARGET_HAS_JIT,0) ;;
+    MBlaze)      AC_SUBST(TARGET_HAS_JIT,0) ;;
+    PTX)         AC_SUBST(TARGET_HAS_JIT,0) ;;
+    *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
+  esac
+fi
+
+dnl Allow enablement of building and installing docs
+AC_ARG_ENABLE(docs,
+              AS_HELP_STRING([--enable-docs],
+                             [Build documents (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_DOCS,[1]) ;;
+  no)  AC_SUBST(ENABLE_DOCS,[0]) ;;
+  default) AC_SUBST(ENABLE_DOCS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-docs. Use "yes" or "no"]) ;;
+esac
+
+dnl Allow enablement of doxygen generated documentation
+AC_ARG_ENABLE(doxygen,
+              AS_HELP_STRING([--enable-doxygen],
+                             [Build doxygen documentation (default is NO)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_DOXYGEN,[1]) ;;
+  no)  AC_SUBST(ENABLE_DOXYGEN,[0]) ;;
+  default) AC_SUBST(ENABLE_DOXYGEN,[0]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-doxygen. Use "yes" or "no"]) ;;
+esac
+
+dnl Allow disablement of threads
+AC_ARG_ENABLE(threads,
+              AS_HELP_STRING([--enable-threads],
+                             [Use threads if available (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_THREADS,[1]) ;;
+  no)  AC_SUBST(ENABLE_THREADS,[0]) ;;
+  default) AC_SUBST(ENABLE_THREADS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-threads. Use "yes" or "no"]) ;;
+esac
+AC_DEFINE_UNQUOTED([ENABLE_THREADS],$ENABLE_THREADS,[Define if threads enabled])
+
+dnl Allow disablement of pthread.h
+AC_ARG_ENABLE(pthreads,
+              AS_HELP_STRING([--enable-pthreads],
+                             [Use pthreads if available (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_PTHREADS,[1]) ;;
+  no)  AC_SUBST(ENABLE_PTHREADS,[0]) ;;
+  default) AC_SUBST(ENABLE_PTHREADS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-pthreads. Use "yes" or "no"]) ;;
+esac
+
+dnl Allow building without position independent code
+AC_ARG_ENABLE(pic,
+  AS_HELP_STRING([--enable-pic],
+                 [Build LLVM with Position Independent Code (default is YES)]),,
+                 enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_PIC,[1]) ;;
+  no)  AC_SUBST(ENABLE_PIC,[0]) ;;
+  default) AC_SUBST(ENABLE_PIC,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-pic. Use "yes" or "no"]) ;;
+esac
+AC_DEFINE_UNQUOTED([ENABLE_PIC],$ENABLE_PIC,
+                   [Define if position independent code is enabled])
+
+dnl Allow building a shared library and linking tools against it.
+AC_ARG_ENABLE(shared,
+  AS_HELP_STRING([--enable-shared],
+                 [Build a shared library and link tools against it (default is NO)]),,
+                 enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_SHARED,[1]) ;;
+  no)  AC_SUBST(ENABLE_SHARED,[0]) ;;
+  default) AC_SUBST(ENABLE_SHARED,[0]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-shared. Use "yes" or "no"]) ;;
+esac
+
+dnl Allow libstdc++ is embedded in LLVM.dll.
+AC_ARG_ENABLE(embed-stdcxx,
+  AS_HELP_STRING([--enable-embed-stdcxx],
+                 [Build a shared library with embedded libstdc++ for Win32 DLL (default is YES)]),,
+                 enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_EMBED_STDCXX,[1]) ;;
+  no)  AC_SUBST(ENABLE_EMBED_STDCXX,[0]) ;;
+  default) AC_SUBST(ENABLE_EMBED_STDCXX,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-embed-stdcxx. Use "yes" or "no"]) ;;
+esac
+
+dnl Enable embedding timestamp information into build.
+AC_ARG_ENABLE(timestamps,
+  AS_HELP_STRING([--enable-timestamps],
+                 [Enable embedding timestamp information in build (default is YES)]),,
+                 enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;;
+  no)  AC_SUBST(ENABLE_TIMESTAMPS,[0]) ;;
+  default) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-timestamps. Use "yes" or "no"]) ;;
+esac
+AC_DEFINE_UNQUOTED([ENABLE_TIMESTAMPS],$ENABLE_TIMESTAMPS,
+                   [Define if timestamp information (e.g., __DATE___) is allowed])
+
+dnl Allow specific targets to be specified for building (or not)
+TARGETS_TO_BUILD=""
+AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
+    [Build specific host targets: all or target1,target2,... Valid targets are:
+     host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
+     xcore, msp430, ptx, cbe, and cpp (default=all)]),,
+    enableval=all)
+if test "$enableval" = host-only ; then
+  enableval=host
+fi
+case "$enableval" in
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX" ;;
+  *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
+      case "$a_target" in
+        x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+        powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+        arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+        spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+        xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+        msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+        hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
+        cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
+        ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+        host) case "$llvm_cv_target_arch" in
+            x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+            PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+            ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+            MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
+            CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+            XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+            MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+            Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
+            PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+            *)       AC_MSG_ERROR([Can not set target to build]) ;;
+          esac ;;
+        *) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
+      esac
+  done
+  ;;
+esac
+AC_SUBST(TARGETS_TO_BUILD,$TARGETS_TO_BUILD)
+
+# Determine whether we are building LLVM support for the native architecture.
+# If so, define LLVM_NATIVE_ARCH to that LLVM target.
+for a_target in $TARGETS_TO_BUILD; do
+  if test "$a_target" = "$LLVM_NATIVE_ARCH"; then
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_ARCH, $LLVM_NATIVE_ARCH,
+      [LLVM architecture name for the native architecture, if available])
+    LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target"
+    LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo"
+    LLVM_NATIVE_TARGETMC="LLVMInitialize${LLVM_NATIVE_ARCH}TargetMC"
+    LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter"
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
+      LLVM_NATIVE_ASMPARSER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser"
+    fi
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/Disassembler/Makefile ; then
+      LLVM_NATIVE_DISASSEMBLER="LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler"
+    fi
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGET, $LLVM_NATIVE_TARGET,
+      [LLVM name for the native Target init function, if available])
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGETINFO, $LLVM_NATIVE_TARGETINFO,
+      [LLVM name for the native TargetInfo init function, if available])
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGETMC, $LLVM_NATIVE_TARGETMC,
+      [LLVM name for the native target MC init function, if available])
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_ASMPRINTER, $LLVM_NATIVE_ASMPRINTER,
+      [LLVM name for the native AsmPrinter init function, if available])
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
+      AC_DEFINE_UNQUOTED(LLVM_NATIVE_ASMPARSER, $LLVM_NATIVE_ASMPARSER,
+       [LLVM name for the native AsmParser init function, if available])
+    fi
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/Disassembler/Makefile ; then
+      AC_DEFINE_UNQUOTED(LLVM_NATIVE_DISASSEMBLER, $LLVM_NATIVE_DISASSEMBLER,
+       [LLVM name for the native Disassembler init function, if available])
+    fi
+  fi
+done
+
+# Build the LLVM_TARGET and LLVM_... macros for Targets.def and the individual
+# target feature def files.
+LLVM_ENUM_TARGETS=""
+LLVM_ENUM_ASM_PRINTERS=""
+LLVM_ENUM_ASM_PARSERS=""
+LLVM_ENUM_DISASSEMBLERS=""
+for target_to_build in $TARGETS_TO_BUILD; do
+  LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS"
+  if test -f ${srcdir}/lib/Target/${target_to_build}/*AsmPrinter.cpp ; then
+    LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS";
+  fi
+  if test -f ${srcdir}/lib/Target/${target_to_build}/AsmParser/Makefile ; then
+    LLVM_ENUM_ASM_PARSERS="LLVM_ASM_PARSER($target_to_build) $LLVM_ENUM_ASM_PARSERS";
+  fi
+  if test -f ${srcdir}/lib/Target/${target_to_build}/Disassembler/Makefile ; then
+    LLVM_ENUM_DISASSEMBLERS="LLVM_DISASSEMBLER($target_to_build) $LLVM_ENUM_DISASSEMBLERS";
+  fi
+done
+AC_SUBST(LLVM_ENUM_TARGETS)
+AC_SUBST(LLVM_ENUM_ASM_PRINTERS)
+AC_SUBST(LLVM_ENUM_ASM_PARSERS)
+AC_SUBST(LLVM_ENUM_DISASSEMBLERS)
+
+dnl Override the option to use for optimized builds.
+AC_ARG_WITH(optimize-option,
+  AS_HELP_STRING([--with-optimize-option],
+                 [Select the compiler options to use for optimized builds]),,
+                 withval=default)
+AC_MSG_CHECKING([optimization flags])
+case "$withval" in
+  default)
+    case "$llvm_cv_os_type" in
+    FreeBSD) optimize_option=-O2 ;;
+    MingW) optimize_option=-O2 ;;
+    *)     optimize_option=-O3 ;;
+    esac ;;
+  *) optimize_option="$withval" ;;
+esac
+AC_SUBST(OPTIMIZE_OPTION,$optimize_option)
+AC_MSG_RESULT([$optimize_option])
+
+dnl Specify extra build options
+AC_ARG_WITH(extra-options,
+  AS_HELP_STRING([--with-extra-options],
+                 [Specify additional options to compile LLVM with]),,
+                 withval=default)
+case "$withval" in
+  default) EXTRA_OPTIONS= ;;
+  *) EXTRA_OPTIONS=$withval ;;
+esac
+AC_SUBST(EXTRA_OPTIONS,$EXTRA_OPTIONS)
+
+dnl Specify extra linker build options
+AC_ARG_WITH(extra-ld-options,
+  AS_HELP_STRING([--with-extra-ld-options],
+                 [Specify additional options to link LLVM with]),,
+                 withval=default)
+case "$withval" in
+  default) EXTRA_LD_OPTIONS= ;;
+  *) EXTRA_LD_OPTIONS=$withval ;;
+esac
+AC_SUBST(EXTRA_LD_OPTIONS,$EXTRA_LD_OPTIONS)
+
+dnl Allow specific bindings to be specified for building (or not)
+AC_ARG_ENABLE([bindings],AS_HELP_STRING([--enable-bindings],
+    [Build specific language bindings: all,auto,none,{binding-name} (default=auto)]),,
+    enableval=default)
+BINDINGS_TO_BUILD=""
+case "$enableval" in
+  yes | default | auto) BINDINGS_TO_BUILD="auto" ;;
+  all ) BINDINGS_TO_BUILD="ocaml" ;;
+  none | no) BINDINGS_TO_BUILD="" ;;
+  *)for a_binding in `echo $enableval|sed -e 's/,/ /g' ` ; do
+      case "$a_binding" in
+        ocaml) BINDINGS_TO_BUILD="ocaml $BINDINGS_TO_BUILD" ;;
+        *) AC_MSG_ERROR([Unrecognized binding $a_binding]) ;;
+      esac
+  done
+  ;;
+esac
+
+dnl Allow the ocaml libdir to be overridden. This could go in a configure
+dnl script for bindings/ocaml/configure, except that its auto value depends on
+dnl OCAMLC, which is found here to support tests.
+AC_ARG_WITH([ocaml-libdir],
+  [AS_HELP_STRING([--with-ocaml-libdir],
+    [Specify install location for ocaml bindings (default is stdlib)])],
+  [],
+  [withval=auto])
+case "$withval" in
+  auto) with_ocaml_libdir="$withval" ;;
+  /* | [[A-Za-z]]:[[\\/]]*) with_ocaml_libdir="$withval" ;;
+  *) AC_MSG_ERROR([Invalid path for --with-ocaml-libdir. Provide full path]) ;;
+esac
+
+AC_ARG_WITH(clang-resource-dir,
+  AS_HELP_STRING([--with-clang-resource-dir],
+    [Relative directory from the Clang binary for resource files]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CLANG_RESOURCE_DIR,"$withval",
+                   [Relative directory for resource files])
+
+AC_ARG_WITH(c-include-dirs,
+  AS_HELP_STRING([--with-c-include-dirs],
+    [Colon separated list of directories clang will search for headers]),,
+    withval="")
+AC_DEFINE_UNQUOTED(C_INCLUDE_DIRS,"$withval",
+                   [Directories clang will search for headers])
+
+# Clang normally uses the system c++ headers and libraries. With this option,
+# clang will use the ones provided by a gcc installation instead. This option should
+# be passed the same value that was used with --prefix when configuring gcc.
+AC_ARG_WITH(gcc-toolchain,
+  AS_HELP_STRING([--with-gcc-toolchain],
+    [Directory where gcc is installed.]),,
+    withval="")
+AC_DEFINE_UNQUOTED(GCC_INSTALL_PREFIX,"$withval",
+                   [Directory where gcc is installed.])
+
+dnl Allow linking of LLVM with GPLv3 binutils code.
+AC_ARG_WITH(binutils-include,
+  AS_HELP_STRING([--with-binutils-include],
+    [Specify path to binutils/include/ containing plugin-api.h file for gold plugin.]),,
+  withval=default)
+case "$withval" in
+  default) WITH_BINUTILS_INCDIR=default ;;
+  /* | [[A-Za-z]]:[[\\/]]*)      WITH_BINUTILS_INCDIR=$withval ;;
+  *) AC_MSG_ERROR([Invalid path for --with-binutils-include. Provide full path]) ;;
+esac
+if test "x$WITH_BINUTILS_INCDIR" != xdefault ; then
+  AC_SUBST(BINUTILS_INCDIR,$WITH_BINUTILS_INCDIR)
+  if test ! -f "$WITH_BINUTILS_INCDIR/plugin-api.h"; then
+     echo "$WITH_BINUTILS_INCDIR/plugin-api.h"
+     AC_MSG_ERROR([Invalid path to directory containing plugin-api.h.]);
+  fi
+fi
+
+dnl Specify the URL where bug reports should be submitted.
+AC_ARG_WITH(bug-report-url,
+  AS_HELP_STRING([--with-bug-report-url],
+    [Specify the URL where bug reports should be submitted (default=http://llvm.org/bugs/)]),,
+    withval="http://llvm.org/bugs/")
+AC_DEFINE_UNQUOTED(BUG_REPORT_URL,"$withval",
+                   [Bug report URL.])
+
+dnl --enable-libffi : check whether the user wants to turn off libffi:
+AC_ARG_ENABLE(libffi,AS_HELP_STRING(
+  --enable-libffi,[Check for the presence of libffi (default is NO)]),
+  [case "$enableval" in
+    yes) llvm_cv_enable_libffi="yes" ;;
+    no)  llvm_cv_enable_libffi="no"  ;;
+    *) AC_MSG_ERROR([Invalid setting for --enable-libffi. Use "yes" or "no"]) ;;
+  esac],
+  llvm_cv_enable_libffi=no)
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 4: Check for programs we need and that they are the right version
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+AC_PROG_NM
+AC_SUBST(NM)
+
+dnl Check for the tools that the makefiles require
+AC_CHECK_GNU_MAKE
+AC_PROG_LN_S
+AC_PATH_PROG(CMP, [cmp], [cmp])
+AC_PATH_PROG(CP, [cp], [cp])
+AC_PATH_PROG(DATE, [date], [date])
+AC_PATH_PROG(FIND, [find], [find])
+AC_PATH_PROG(GREP, [grep], [grep])
+AC_PATH_PROG(MKDIR,[mkdir],[mkdir])
+AC_PATH_PROG(MV,   [mv],   [mv])
+AC_PROG_RANLIB
+AC_CHECK_TOOL(AR, ar, false)
+AC_PATH_PROG(RM,   [rm],   [rm])
+AC_PATH_PROG(SED,  [sed],  [sed])
+AC_PATH_PROG(TAR,  [tar],  [gtar])
+AC_PATH_PROG(BINPWD,[pwd],  [pwd])
+
+dnl Looking for misc. graph plotting software
+AC_PATH_PROG(GRAPHVIZ, [Graphviz], [echo Graphviz])
+if test "$GRAPHVIZ" != "echo Graphviz" ; then
+  AC_DEFINE([HAVE_GRAPHVIZ],[1],[Define if the Graphviz program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    GRAPHVIZ=`echo $GRAPHVIZ | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_GRAPHVIZ],"$GRAPHVIZ${EXEEXT}",
+   [Define to path to Graphviz program if found or 'echo Graphviz' otherwise])
+fi
+AC_PATH_PROG(DOT, [dot], [echo dot])
+if test "$DOT" != "echo dot" ; then
+  AC_DEFINE([HAVE_DOT],[1],[Define if the dot program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    DOT=`echo $DOT | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_DOT],"$DOT${EXEEXT}",
+   [Define to path to dot program if found or 'echo dot' otherwise])
+fi
+AC_PATH_PROG(FDP, [fdp], [echo fdp])
+if test "$FDP" != "echo fdp" ; then
+  AC_DEFINE([HAVE_FDP],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    FDP=`echo $FDP | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_FDP],"$FDP${EXEEXT}",
+   [Define to path to fdp program if found or 'echo fdp' otherwise])
+fi
+AC_PATH_PROG(NEATO, [neato], [echo neato])
+if test "$NEATO" != "echo neato" ; then
+  AC_DEFINE([HAVE_NEATO],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    NEATO=`echo $NEATO | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_NEATO],"$NEATO${EXEEXT}",
+   [Define to path to neato program if found or 'echo neato' otherwise])
+fi
+AC_PATH_PROG(TWOPI, [twopi], [echo twopi])
+if test "$TWOPI" != "echo twopi" ; then
+  AC_DEFINE([HAVE_TWOPI],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    TWOPI=`echo $TWOPI | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_TWOPI],"$TWOPI${EXEEXT}",
+   [Define to path to twopi program if found or 'echo twopi' otherwise])
+fi
+AC_PATH_PROG(CIRCO, [circo], [echo circo])
+if test "$CIRCO" != "echo circo" ; then
+  AC_DEFINE([HAVE_CIRCO],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    CIRCO=`echo $CIRCO | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_CIRCO],"$CIRCO${EXEEXT}",
+   [Define to path to circo program if found or 'echo circo' otherwise])
+fi
+AC_PATH_PROGS(GV, [gv gsview32], [echo gv])
+if test "$GV" != "echo gv" ; then
+  AC_DEFINE([HAVE_GV],[1],[Define if the gv program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    GV=`echo $GV | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_GV],"$GV${EXEEXT}",
+   [Define to path to gv program if found or 'echo gv' otherwise])
+fi
+AC_PATH_PROG(DOTTY, [dotty], [echo dotty])
+if test "$DOTTY" != "echo dotty" ; then
+  AC_DEFINE([HAVE_DOTTY],[1],[Define if the dotty program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    DOTTY=`echo $DOTTY | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_DOTTY],"$DOTTY${EXEEXT}",
+   [Define to path to dotty program if found or 'echo dotty' otherwise])
+fi
+AC_PATH_PROG(XDOT_PY, [xdot.py], [echo xdot.py])
+if test "$XDOT_PY" != "echo xdot.py" ; then
+  AC_DEFINE([HAVE_XDOT_PY],[1],[Define if the xdot.py program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    XDOT_PY=`echo $XDOT_PY | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_XDOT_PY],"$XDOT_PY${EXEEXT}",
+   [Define to path to xdot.py program if found or 'echo xdot.py' otherwise])
+fi
+
+dnl Find the install program
+AC_PROG_INSTALL
+dnl Prepend src dir to install path dir if it's a relative path
+dnl This is a hack for installs that take place in something other
+dnl than the top level.
+case "$INSTALL" in
+ [[\\/$]]* | ?:[[\\/]]* ) ;;
+ *)  INSTALL="\\\$(TOPSRCDIR)/$INSTALL" ;;
+esac
+
+dnl Checks for documentation and testing tools that we can do without. If these
+dnl are not found then they are set to "true" which always succeeds but does
+dnl nothing. This just lets the build output show that we could have done
+dnl something if the tool was available.
+AC_PATH_PROG(BZIP2, [bzip2])
+AC_PATH_PROG(CAT, [cat])
+AC_PATH_PROG(DOXYGEN, [doxygen])
+AC_PATH_PROG(GROFF, [groff])
+AC_PATH_PROG(GZIPBIN, [gzip])
+AC_PATH_PROG(POD2HTML, [pod2html])
+AC_PATH_PROG(POD2MAN, [pod2man])
+AC_PATH_PROG(PDFROFF, [pdfroff])
+AC_PATH_PROG(RUNTEST, [runtest])
+DJ_AC_PATH_TCLSH
+AC_PATH_PROG(ZIP, [zip])
+AC_PATH_PROGS(OCAMLC, [ocamlc])
+AC_PATH_PROGS(OCAMLOPT, [ocamlopt])
+AC_PATH_PROGS(OCAMLDEP, [ocamldep])
+AC_PATH_PROGS(OCAMLDOC, [ocamldoc])
+AC_PATH_PROGS(GAS, [gas as])
+
+dnl Get the version of the linker in use.
+AC_LINK_GET_VERSION
+
+dnl Determine whether the linker supports the -R option.
+AC_LINK_USE_R
+
+dnl Determine whether the linker supports the -export-dynamic option.
+AC_LINK_EXPORT_DYNAMIC
+
+dnl Determine whether the linker supports the --version-script option.
+AC_LINK_VERSION_SCRIPT
+
+dnl Check for libtool and the library that has dlopen function (which must come
+dnl before the AC_PROG_LIBTOOL check in order to enable dlopening libraries with
+dnl libtool).
+AC_LIBTOOL_DLOPEN
+AC_LIB_LTDL
+
+AC_MSG_CHECKING([tool compatibility])
+
+dnl Ensure that compilation tools are GCC or a GNU compatible compiler such as
+dnl ICC; we use GCC specific options in the makefiles so the compiler needs
+dnl to support those options.
+dnl "icc" emits gcc signatures
+dnl "icc -no-gcc" emits no gcc signature BUT is still compatible
+ICC=no
+IXX=no
+case $CC in
+  icc*|icpc*)
+    ICC=yes
+    IXX=yes
+    ;;
+   *)
+    ;;
+esac
+
+if test "$GCC" != "yes" && test "$ICC" != "yes"
+then
+  AC_MSG_ERROR([gcc|icc required but not found])
+fi
+
+dnl Ensure that compilation tools are compatible with GCC extensions
+if test "$GXX" != "yes" && test "$IXX" != "yes"
+then
+  AC_MSG_ERROR([g++|clang++|icc required but not found])
+fi
+
+dnl Verify that GCC is version 3.0 or higher
+if test "$GCC" = "yes"
+then
+  AC_COMPILE_IFELSE([[#if !defined(__GNUC__) || __GNUC__ < 3
+#error Unsupported GCC version
+#endif
+]], [], [AC_MSG_ERROR([gcc 3.x required, but you have a lower version])])
+fi
+
+dnl Check for GNU Make.  We use its extensions, so don't build without it
+if test -z "$llvm_cv_gnu_make_command"
+then
+  AC_MSG_ERROR([GNU Make required but not found])
+fi
+
+dnl Tool compatibility is okay if we make it here.
+AC_MSG_RESULT([ok])
+
+dnl Check optional compiler flags.
+AC_MSG_CHECKING([optional compiler flags])
+CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros])
+CXX_FLAG_CHECK(NO_MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers])
+CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default])
+AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT])
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 5: Check for libraries
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+AC_CHECK_LIB(m,sin)
+if test "$llvm_cv_os_type" = "MingW" ; then
+  AC_CHECK_LIB(imagehlp, main)
+  AC_CHECK_LIB(psapi, main)
+fi
+
+dnl dlopen() is required for plugin support.
+AC_SEARCH_LIBS(dlopen,dl,AC_DEFINE([HAVE_DLOPEN],[1],
+               [Define if dlopen() is available on this platform.]),
+               AC_MSG_WARN([dlopen() not found - disabling plugin support]))
+
+dnl libffi is optional; used to call external functions from the interpreter
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  AC_SEARCH_LIBS(ffi_call,ffi,AC_DEFINE([HAVE_FFI_CALL],[1],
+                 [Define if libffi is available on this platform.]),
+                 AC_MSG_ERROR([libffi not found - configure without --enable-libffi to compile without it]))
+fi
+
+dnl mallinfo is optional; the code can compile (minus features) without it
+AC_SEARCH_LIBS(mallinfo,malloc,AC_DEFINE([HAVE_MALLINFO],[1],
+               [Define if mallinfo() is available on this platform.]))
+
+dnl pthread locking functions are optional - but llvm will not be thread-safe
+dnl without locks.
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+  AC_CHECK_LIB(pthread, pthread_mutex_init)
+  AC_SEARCH_LIBS(pthread_mutex_lock,pthread,
+                 AC_DEFINE([HAVE_PTHREAD_MUTEX_LOCK],[1],
+                           [Have pthread_mutex_lock]))
+  AC_SEARCH_LIBS(pthread_rwlock_init,pthread,
+                 AC_DEFINE([HAVE_PTHREAD_RWLOCK_INIT],[1],
+                 [Have pthread_rwlock_init]))
+  AC_SEARCH_LIBS(pthread_getspecific,pthread,
+                 AC_DEFINE([HAVE_PTHREAD_GETSPECIFIC],[1],
+                 [Have pthread_getspecific]))
+fi
+
+dnl Allow extra x86-disassembler library
+AC_ARG_WITH(udis86,
+  AS_HELP_STRING([--with-udis86=<path>],
+    [Use udis86 external x86 disassembler library]),
+    [
+      AC_SUBST(USE_UDIS86, [1])
+      case "$withval" in
+        /usr/lib|yes) ;;
+        *) LDFLAGS="$LDFLAGS -L${withval}" ;;
+      esac
+      AC_CHECK_LIB(udis86, ud_init, [], [
+        echo "Error! You need to have libudis86 around."
+        exit -1
+      ])
+    ],
+    AC_SUBST(USE_UDIS86, [0]))
+AC_DEFINE_UNQUOTED([USE_UDIS86],$USE_UDIS86,
+                   [Define if use udis86 library])
+
+dnl Allow OProfile support for JIT output.
+AC_ARG_WITH(oprofile,
+  AS_HELP_STRING([--with-oprofile=<prefix>],
+    [Tell OProfile >= 0.9.4 how to symbolize JIT output]),
+    [
+      AC_SUBST(USE_OPROFILE, [1])
+      case "$withval" in
+        /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;;
+        no) llvm_cv_oppath=
+            AC_SUBST(USE_OPROFILE, [0]) ;;
+        *) llvm_cv_oppath="${withval}/lib/oprofile"
+           CPPFLAGS="-I${withval}/include";;
+      esac
+      if test -n "$llvm_cv_oppath" ; then
+        LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
+        dnl Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=537744:
+        dnl libbfd is not included properly in libopagent in some Debian
+        dnl versions.  If libbfd isn't found at all, we assume opagent works
+        dnl anyway.
+        AC_SEARCH_LIBS(bfd_init, bfd, [], [])
+        AC_SEARCH_LIBS(op_open_agent, opagent, [], [
+          echo "Error! You need to have libopagent around."
+          exit -1
+        ])
+        AC_CHECK_HEADER([opagent.h], [], [
+          echo "Error! You need to have opagent.h around."
+          exit -1
+          ])
+      fi
+    ],
+    [
+      AC_SUBST(USE_OPROFILE, [0])
+    ])
+AC_DEFINE_UNQUOTED([USE_OPROFILE],$USE_OPROFILE,
+                   [Define if we have the oprofile JIT-support library])
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 6: Check for header files
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+dnl First, use autoconf provided macros for specific headers that we need
+dnl We don't check for ancient stuff or things that are guaranteed to be there
+dnl by the C++ standard. We always use the <cfoo> versions of <foo.h> C headers.
+dnl Generally we're looking for POSIX headers.
+AC_HEADER_DIRENT
+AC_HEADER_MMAP_ANONYMOUS
+AC_HEADER_STAT
+AC_HEADER_SYS_WAIT
+AC_HEADER_TIME
+
+AC_CHECK_HEADERS([dlfcn.h execinfo.h fcntl.h inttypes.h limits.h link.h])
+AC_CHECK_HEADERS([malloc.h setjmp.h signal.h stdint.h termios.h unistd.h])
+AC_CHECK_HEADERS([utime.h windows.h])
+AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h])
+AC_CHECK_HEADERS([sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h])
+AC_CHECK_HEADERS([valgrind/valgrind.h])
+AC_CHECK_HEADERS([fenv.h])
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+  AC_CHECK_HEADERS(pthread.h,
+                   AC_SUBST(HAVE_PTHREAD, 1),
+                   AC_SUBST(HAVE_PTHREAD, 0))
+else
+  AC_SUBST(HAVE_PTHREAD, 0)
+fi
+
+dnl Try to find ffi.h.
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  AC_CHECK_HEADERS([ffi.h ffi/ffi.h])
+fi
+
+dnl Try to find Darwin specific crash reporting libraries.
+AC_CHECK_HEADERS([CrashReporterClient.h])
+
+dnl Try to find Darwin specific crash reporting global.
+AC_MSG_CHECKING([__crashreporter_info__])
+AC_LINK_IFELSE(
+  AC_LANG_SOURCE(
+    [[extern const char *__crashreporter_info__;
+      int main() {
+        __crashreporter_info__ = "test";
+        return 0;
+      }
+    ]]),
+  AC_MSG_RESULT(yes)
+  AC_DEFINE(HAVE_CRASHREPORTER_INFO, 1, Can use __crashreporter_info__),
+  AC_MSG_RESULT(no)
+  AC_DEFINE(HAVE_CRASHREPORTER_INFO, 0,
+            Define if __crashreporter_info__ exists.))
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 7: Check for types and structures
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+AC_HUGE_VAL_CHECK
+AC_TYPE_PID_T
+AC_TYPE_SIZE_T
+AC_DEFINE_UNQUOTED([RETSIGTYPE],[void],[Define as the return type of signal handlers (`int' or `void').])
+AC_STRUCT_TM
+AC_CHECK_TYPES([int64_t],,AC_MSG_ERROR([Type int64_t required but not found]))
+AC_CHECK_TYPES([uint64_t],,
+         AC_CHECK_TYPES([u_int64_t],,
+         AC_MSG_ERROR([Type uint64_t or u_int64_t required but not found])))
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 8: Check for specific functions needed
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+AC_CHECK_FUNCS([backtrace ceilf floorf roundf rintf nearbyintf getcwd ])
+AC_CHECK_FUNCS([powf fmodf strtof round ])
+AC_CHECK_FUNCS([getpagesize getrusage getrlimit setrlimit gettimeofday ])
+AC_CHECK_FUNCS([isatty mkdtemp mkstemp ])
+AC_CHECK_FUNCS([mktemp posix_spawn realpath sbrk setrlimit strdup ])
+AC_CHECK_FUNCS([strerror strerror_r setenv ])
+AC_CHECK_FUNCS([strtoll strtoq sysconf malloc_zone_statistics ])
+AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp writev])
+AC_C_PRINTF_A
+AC_FUNC_RAND48
+
+dnl Check the declaration "Secure API" on Windows environments.
+AC_CHECK_DECLS([strerror_s])
+
+dnl Check symbols in libgcc.a for JIT on Mingw.
+if test "$llvm_cv_os_type" = "MingW" ; then
+  AC_CHECK_LIB(gcc,_alloca,AC_DEFINE([HAVE__ALLOCA],[1],[Have host's _alloca]))
+  AC_CHECK_LIB(gcc,__alloca,AC_DEFINE([HAVE___ALLOCA],[1],[Have host's __alloca]))
+  AC_CHECK_LIB(gcc,__chkstk,AC_DEFINE([HAVE___CHKSTK],[1],[Have host's __chkstk]))
+  AC_CHECK_LIB(gcc,___chkstk,AC_DEFINE([HAVE____CHKSTK],[1],[Have host's ___chkstk]))
+
+  AC_CHECK_LIB(gcc,__ashldi3,AC_DEFINE([HAVE___ASHLDI3],[1],[Have host's __ashldi3]))
+  AC_CHECK_LIB(gcc,__ashrdi3,AC_DEFINE([HAVE___ASHRDI3],[1],[Have host's __ashrdi3]))
+  AC_CHECK_LIB(gcc,__divdi3,AC_DEFINE([HAVE___DIVDI3],[1],[Have host's __divdi3]))
+  AC_CHECK_LIB(gcc,__fixdfdi,AC_DEFINE([HAVE___FIXDFDI],[1],[Have host's __fixdfdi]))
+  AC_CHECK_LIB(gcc,__fixsfdi,AC_DEFINE([HAVE___FIXSFDI],[1],[Have host's __fixsfdi]))
+  AC_CHECK_LIB(gcc,__floatdidf,AC_DEFINE([HAVE___FLOATDIDF],[1],[Have host's __floatdidf]))
+  AC_CHECK_LIB(gcc,__lshrdi3,AC_DEFINE([HAVE___LSHRDI3],[1],[Have host's __lshrdi3]))
+  AC_CHECK_LIB(gcc,__moddi3,AC_DEFINE([HAVE___MODDI3],[1],[Have host's __moddi3]))
+  AC_CHECK_LIB(gcc,__udivdi3,AC_DEFINE([HAVE___UDIVDI3],[1],[Have host's __udivdi3]))
+  AC_CHECK_LIB(gcc,__umoddi3,AC_DEFINE([HAVE___UMODDI3],[1],[Have host's __umoddi3]))
+
+  AC_CHECK_LIB(gcc,__main,AC_DEFINE([HAVE___MAIN],[1],[Have host's __main]))
+  AC_CHECK_LIB(gcc,__cmpdi2,AC_DEFINE([HAVE___CMPDI2],[1],[Have host's __cmpdi2]))
+fi
+
+dnl Check Win32 API EnumerateLoadedModules.
+if test "$llvm_cv_os_type" = "MingW" ; then
+  AC_MSG_CHECKING([whether EnumerateLoadedModules() accepts new decl])
+  AC_COMPILE_IFELSE([[#include <windows.h>
+#include <imagehlp.h>
+extern void foo(PENUMLOADED_MODULES_CALLBACK);
+extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));]],
+[
+  AC_MSG_RESULT([yes])
+  llvm_cv_win32_elmcb_pcstr="PCSTR"
+],
+[
+  AC_MSG_RESULT([no])
+  llvm_cv_win32_elmcb_pcstr="PSTR"
+])
+  AC_DEFINE_UNQUOTED([WIN32_ELMCB_PCSTR],$llvm_cv_win32_elmcb_pcstr,[Type of 1st arg on ELM Callback])
+fi
+
+dnl Check for variations in the Standard C++ library and STL. These macros are
+dnl provided by LLVM in the autoconf/m4 directory.
+AC_FUNC_ISNAN
+AC_FUNC_ISINF
+
+dnl Check for mmap support.We also need to know if /dev/zero is required to
+dnl be opened for allocating RWX memory.
+dnl Make sure we aren't attempting to configure for an unknown system
+if test "$llvm_cv_platform_type" = "Unix" ; then
+  AC_FUNC_MMAP
+  AC_FUNC_MMAP_FILE
+  AC_NEED_DEV_ZERO_FOR_MMAP
+
+  if test "$ac_cv_func_mmap_fixed_mapped" = "no"
+  then
+    AC_MSG_WARN([mmap() of a fixed address required but not supported])
+  fi
+  if test "$ac_cv_func_mmap_file" = "no"
+  then
+    AC_MSG_WARN([mmap() of files required but not found])
+  fi
+fi
+
+dnl atomic builtins are required for threading support.
+AC_MSG_CHECKING(for GCC atomic builtins)
+dnl Since we'll be using these atomic builtins in C++ files we should test
+dnl the C++ compiler.
+AC_LANG_PUSH([C++])
+AC_LINK_IFELSE(
+  AC_LANG_SOURCE(
+    [[int main() {
+        volatile unsigned long val = 1;
+        __sync_synchronize();
+        __sync_val_compare_and_swap(&val, 1, 0);
+        __sync_add_and_fetch(&val, 1);
+        __sync_sub_and_fetch(&val, 1);
+        return 0;
+      }
+    ]]),
+  AC_LANG_POP([C++])
+  AC_MSG_RESULT(yes)
+  AC_DEFINE(LLVM_HAS_ATOMICS, 1, Has gcc/MSVC atomic intrinsics),
+  AC_MSG_RESULT(no)
+  AC_DEFINE(LLVM_HAS_ATOMICS, 0, Has gcc/MSVC atomic intrinsics)
+  AC_MSG_WARN([LLVM will be built thread-unsafe because atomic builtins are missing]))
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 9: Additional checks, variables, etc.
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+dnl Handle 32-bit linux systems running a 64-bit kernel.
+dnl This has to come after section 4 because it invokes the compiler.
+if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then
+  AC_IS_LINUX_MIXED
+  if test "$llvm_cv_linux_mixed" = "yes"; then
+    llvm_cv_target_arch="x86"
+    ARCH="x86"
+  fi
+fi
+
+dnl Check whether __dso_handle is present
+AC_CHECK_FUNCS([__dso_handle])
+
+dnl Propagate the shared library extension that the libltdl checks did to
+dnl the Makefiles so we can use it there too
+AC_SUBST(SHLIBEXT,$libltdl_cv_shlibext)
+
+dnl Propagate the run-time library path variable that the libltdl
+dnl checks found to the Makefiles so we can use it there too
+AC_SUBST(SHLIBPATH_VAR,$libltdl_cv_shlibpath_var)
+
+# Translate the various configuration directories and other basic
+# information into substitutions that will end up in Makefile.config.in
+# that these configured values can be used by the makefiles
+if test "${prefix}" = "NONE" ; then
+  prefix="/usr/local"
+fi
+eval LLVM_PREFIX="${prefix}";
+eval LLVM_BINDIR="${prefix}/bin";
+eval LLVM_LIBDIR="${prefix}/lib";
+eval LLVM_DATADIR="${prefix}/share/llvm";
+eval LLVM_DOCSDIR="${prefix}/share/doc/llvm";
+eval LLVM_ETCDIR="${prefix}/etc/llvm";
+eval LLVM_INCLUDEDIR="${prefix}/include";
+eval LLVM_INFODIR="${prefix}/info";
+eval LLVM_MANDIR="${prefix}/man";
+LLVM_CONFIGTIME=`date`
+AC_SUBST(LLVM_PREFIX)
+AC_SUBST(LLVM_BINDIR)
+AC_SUBST(LLVM_LIBDIR)
+AC_SUBST(LLVM_DATADIR)
+AC_SUBST(LLVM_DOCSDIR)
+AC_SUBST(LLVM_ETCDIR)
+AC_SUBST(LLVM_INCLUDEDIR)
+AC_SUBST(LLVM_INFODIR)
+AC_SUBST(LLVM_MANDIR)
+AC_SUBST(LLVM_CONFIGTIME)
+
+# Place the various directores into the config.h file as #defines so that we
+# can know about the installation paths within LLVM.
+AC_DEFINE_UNQUOTED(LLVM_PREFIX,"$LLVM_PREFIX",
+                   [Installation prefix directory])
+AC_DEFINE_UNQUOTED(LLVM_BINDIR, "$LLVM_BINDIR",
+                   [Installation directory for binary executables])
+AC_DEFINE_UNQUOTED(LLVM_LIBDIR, "$LLVM_LIBDIR",
+                   [Installation directory for libraries])
+AC_DEFINE_UNQUOTED(LLVM_DATADIR, "$LLVM_DATADIR",
+                   [Installation directory for data files])
+AC_DEFINE_UNQUOTED(LLVM_DOCSDIR, "$LLVM_DOCSDIR",
+                   [Installation directory for documentation])
+AC_DEFINE_UNQUOTED(LLVM_ETCDIR, "$LLVM_ETCDIR",
+                   [Installation directory for config files])
+AC_DEFINE_UNQUOTED(LLVM_INCLUDEDIR, "$LLVM_INCLUDEDIR",
+                   [Installation directory for include files])
+AC_DEFINE_UNQUOTED(LLVM_INFODIR, "$LLVM_INFODIR",
+                   [Installation directory for .info files])
+AC_DEFINE_UNQUOTED(LLVM_MANDIR, "$LLVM_MANDIR",
+                   [Installation directory for man pages])
+AC_DEFINE_UNQUOTED(LLVM_CONFIGTIME, "$LLVM_CONFIGTIME",
+                   [Time at which LLVM was configured])
+AC_DEFINE_UNQUOTED(LLVM_DEFAULT_TARGET_TRIPLE, "$target",
+                   [Target triple LLVM will generate code for by default])
+
+# Determine which bindings to build.
+if test "$BINDINGS_TO_BUILD" = auto ; then
+  BINDINGS_TO_BUILD=""
+  if test "x$OCAMLC" != x -a "x$OCAMLDEP" != x ; then
+    BINDINGS_TO_BUILD="ocaml $BINDINGS_TO_BUILD"
+  fi
+fi
+AC_SUBST(BINDINGS_TO_BUILD,$BINDINGS_TO_BUILD)
+
+# This isn't really configurey, but it avoids having to repeat the list in
+# other files.
+AC_SUBST(ALL_BINDINGS,ocaml)
+
+# Do any work necessary to ensure that bindings have what they need.
+binding_prereqs_failed=0
+for a_binding in $BINDINGS_TO_BUILD ; do
+  case "$a_binding" in
+  ocaml)
+    if test "x$OCAMLC" = x ; then
+      AC_MSG_WARN([--enable-bindings=ocaml specified, but ocamlc not found. Try configure OCAMLC=/path/to/ocamlc])
+      binding_prereqs_failed=1
+    fi
+    if test "x$OCAMLDEP" = x ; then
+      AC_MSG_WARN([--enable-bindings=ocaml specified, but ocamldep not found. Try configure OCAMLDEP=/path/to/ocamldep])
+      binding_prereqs_failed=1
+    fi
+    if test "x$OCAMLOPT" = x ; then
+      AC_MSG_WARN([--enable-bindings=ocaml specified, but ocamlopt not found. Try configure OCAMLOPT=/path/to/ocamlopt])
+      dnl ocamlopt is optional!
+    fi
+    if test "x$with_ocaml_libdir" != xauto ; then
+      AC_SUBST(OCAML_LIBDIR,$with_ocaml_libdir)
+    else
+      ocaml_stdlib="`"$OCAMLC" -where`"
+      if test "$LLVM_PREFIX" '<' "$ocaml_stdlib" -a "$ocaml_stdlib" '<' "$LLVM_PREFIX~"
+      then
+        # ocaml stdlib is beneath our prefix; use stdlib
+        AC_SUBST(OCAML_LIBDIR,$ocaml_stdlib)
+      else
+        # ocaml stdlib is outside our prefix; use libdir/ocaml
+        AC_SUBST(OCAML_LIBDIR,$LLVM_LIBDIR/ocaml)
+      fi
+    fi
+    ;;
+  esac
+done
+if test "$binding_prereqs_failed" = 1 ; then
+  AC_MSG_ERROR([Prequisites for bindings not satisfied. Fix them or use configure --disable-bindings.])
+fi
+
+dnl Determine whether the compiler supports -fvisibility-inlines-hidden.
+AC_CXX_USE_VISIBILITY_INLINES_HIDDEN
+
+dnl Determine linker rpath flag
+if test "$llvm_cv_link_use_r" = "yes" ; then
+  RPATH="-Wl,-R"
+else
+  RPATH="-Wl,-rpath"
+fi
+AC_SUBST(RPATH)
+
+dnl Determine linker rdynamic flag
+if test "$llvm_cv_link_use_export_dynamic" = "yes" ; then
+  RDYNAMIC="-Wl,-export-dynamic"
+else
+  RDYNAMIC=""
+fi
+AC_SUBST(RDYNAMIC)
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 10: Specify the output files and generate it
+dnl===
+dnl===-----------------------------------------------------------------------===
 
 dnl **************************************************************************
-dnl * Create the output files
+dnl End LLVM configure.ac Import
 dnl **************************************************************************
 
+dnl Configure a common Makefile
+AC_CONFIG_FILES(Makefile.common)
+AC_CONFIG_FILES(Makefile.llvm.config)
+
+dnl Configure project makefiles
+dnl List every Makefile that exists within your source tree
+AC_CONFIG_MAKEFILE(Makefile)
+AC_CONFIG_MAKEFILE(lib/Makefile)
+AC_CONFIG_MAKEFILE(lib/sample/Makefile)
+AC_CONFIG_MAKEFILE(tools/Makefile)
+AC_CONFIG_MAKEFILE(tools/sample/Makefile)
+
 dnl This must be last
 AC_OUTPUT
diff --git a/projects/sample/autoconf/install-sh b/projects/sample/autoconf/install-sh
new file mode 100755
index 000000000000..dd97db7aa1ca
--- /dev/null
+++ b/projects/sample/autoconf/install-sh
@@ -0,0 +1,322 @@
+#!/bin/sh
+# install - install a program, script, or datafile
+
+scriptversion=2004-09-10.20
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.  It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+chmodcmd="$chmodprog 0755"
+chowncmd=
+chgrpcmd=
+stripcmd=
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=
+dst=
+dir_arg=
+dstarg=
+no_target_directory=
+
+usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
+   or: $0 [OPTION]... SRCFILES... DIRECTORY
+   or: $0 [OPTION]... -t DIRECTORY SRCFILES...
+   or: $0 [OPTION]... -d DIRECTORIES...
+
+In the 1st form, copy SRCFILE to DSTFILE.
+In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
+In the 4th, create DIRECTORIES.
+
+Options:
+-c         (ignored)
+-d         create directories instead of installing files.
+-g GROUP   $chgrpprog installed files to GROUP.
+-m MODE    $chmodprog installed files to MODE.
+-o USER    $chownprog installed files to USER.
+-s         $stripprog installed files.
+-t DIRECTORY  install into DIRECTORY.
+-T         report an error if DSTFILE is a directory.
+--help     display this help and exit.
+--version  display version info and exit.
+
+Environment variables override the default commands:
+  CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG
+"
+
+while test -n "$1"; do
+  case $1 in
+    -c) shift
+        continue;;
+
+    -d) dir_arg=true
+        shift
+        continue;;
+
+    -g) chgrpcmd="$chgrpprog $2"
+        shift
+        shift
+        continue;;
+
+    --help) echo "$usage"; exit 0;;
+
+    -m) chmodcmd="$chmodprog $2"
+        shift
+        shift
+        continue;;
+
+    -o) chowncmd="$chownprog $2"
+        shift
+        shift
+        continue;;
+
+    -s) stripcmd=$stripprog
+        shift
+        continue;;
+
+    -t) dstarg=$2
+	shift
+	shift
+	continue;;
+
+    -T) no_target_directory=true
+	shift
+	continue;;
+
+    --version) echo "$0 $scriptversion"; exit 0;;
+
+    *)  # When -d is used, all remaining arguments are directories to create.
+	# When -t is used, the destination is already specified.
+	test -n "$dir_arg$dstarg" && break
+        # Otherwise, the last argument is the destination.  Remove it from $@.
+	for arg
+	do
+          if test -n "$dstarg"; then
+	    # $@ is not empty: it contains at least $arg.
+	    set fnord "$@" "$dstarg"
+	    shift # fnord
+	  fi
+	  shift # arg
+	  dstarg=$arg
+	done
+	break;;
+  esac
+done
+
+if test -z "$1"; then
+  if test -z "$dir_arg"; then
+    echo "$0: no input file specified." >&2
+    exit 1
+  fi
+  # It's OK to call `install-sh -d' without argument.
+  # This can happen when creating conditional directories.
+  exit 0
+fi
+
+for src
+do
+  # Protect names starting with `-'.
+  case $src in
+    -*) src=./$src ;;
+  esac
+
+  if test -n "$dir_arg"; then
+    dst=$src
+    src=
+
+    if test -d "$dst"; then
+      mkdircmd=:
+      chmodcmd=
+    else
+      mkdircmd=$mkdirprog
+    fi
+  else
+    # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
+    # might cause directories to be created, which would be especially bad
+    # if $src (and thus $dsttmp) contains '*'.
+    if test ! -f "$src" && test ! -d "$src"; then
+      echo "$0: $src does not exist." >&2
+      exit 1
+    fi
+
+    if test -z "$dstarg"; then
+      echo "$0: no destination specified." >&2
+      exit 1
+    fi
+
+    dst=$dstarg
+    # Protect names starting with `-'.
+    case $dst in
+      -*) dst=./$dst ;;
+    esac
+
+    # If destination is a directory, append the input filename; won't work
+    # if double slashes aren't ignored.
+    if test -d "$dst"; then
+      if test -n "$no_target_directory"; then
+	echo "$0: $dstarg: Is a directory" >&2
+	exit 1
+      fi
+      dst=$dst/`basename "$src"`
+    fi
+  fi
+
+  # This sed command emulates the dirname command.
+  dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+  # Make sure that the destination directory exists.
+
+  # Skip lots of stat calls in the usual case.
+  if test ! -d "$dstdir"; then
+    defaultIFS='
+	 '
+    IFS="${IFS-$defaultIFS}"
+
+    oIFS=$IFS
+    # Some sh's can't handle IFS=/ for some reason.
+    IFS='%'
+    set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
+    IFS=$oIFS
+
+    pathcomp=
+
+    while test $# -ne 0 ; do
+      pathcomp=$pathcomp$1
+      shift
+      if test ! -d "$pathcomp"; then
+        $mkdirprog "$pathcomp"
+	# mkdir can fail with a `File exist' error in case several
+	# install-sh are creating the directory concurrently.  This
+	# is OK.
+	test -d "$pathcomp" || exit
+      fi
+      pathcomp=$pathcomp/
+    done
+  fi
+
+  if test -n "$dir_arg"; then
+    $doit $mkdircmd "$dst" \
+      && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \
+      && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \
+      && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \
+      && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; }
+
+  else
+    dstfile=`basename "$dst"`
+
+    # Make a couple of temp file names in the proper directory.
+    dsttmp=$dstdir/_inst.$$_
+    rmtmp=$dstdir/_rm.$$_
+
+    # Trap to clean up those temp files at exit.
+    trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
+    trap '(exit $?); exit' 1 2 13 15
+
+    # Copy the file name to the temp name.
+    $doit $cpprog "$src" "$dsttmp" &&
+
+    # and set any options; do chmod last to preserve setuid bits.
+    #
+    # If any of these fail, we abort the whole thing.  If we want to
+    # ignore errors from any of these, just make sure not to ignore
+    # errors from the above "$doit $cpprog $src $dsttmp" command.
+    #
+    { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \
+      && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \
+      && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \
+      && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } &&
+
+    # Now rename the file to the real destination.
+    { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \
+      || {
+	   # The rename failed, perhaps because mv can't rename something else
+	   # to itself, or perhaps because mv is so ancient that it does not
+	   # support -f.
+
+	   # Now remove or move aside any old file at destination location.
+	   # We try this two ways since rm can't unlink itself on some
+	   # systems and the destination file might be busy for other
+	   # reasons.  In this case, the final cleanup might fail but the new
+	   # file should still install successfully.
+	   {
+	     if test -f "$dstdir/$dstfile"; then
+	       $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \
+	       || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \
+	       || {
+		 echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
+		 (exit 1); exit
+	       }
+	     else
+	       :
+	     fi
+	   } &&
+
+	   # Now rename the file to the real destination.
+	   $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
+	 }
+    }
+  fi || { (exit 1); exit; }
+done
+
+# The final little trick to "correctly" pass the exit status to the exit trap.
+{
+  (exit 0); exit
+}
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/projects/sample/autoconf/ltmain.sh b/projects/sample/autoconf/ltmain.sh
new file mode 100644
index 000000000000..2455278a4d75
--- /dev/null
+++ b/projects/sample/autoconf/ltmain.sh
@@ -0,0 +1,6863 @@
+# ltmain.sh - Provide generalized library-building support services.
+# NOTE: Changing this file will not affect anything until you rerun configure.
+#
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005
+# Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+basename="s,^.*/,,g"
+
+# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh
+# is ksh but when the shell is invoked as "sh" and the current value of
+# the _XPG environment variable is not equal to 1 (one), the special
+# positional parameter $0, within a function call, is the name of the
+# function.
+progpath="$0"
+
+# The name of this program:
+progname=`echo "$progpath" | $SED $basename`
+modename="$progname"
+
+# Global variables:
+EXIT_SUCCESS=0
+EXIT_FAILURE=1
+
+PROGRAM=ltmain.sh
+PACKAGE=libtool
+VERSION=1.5.22
+TIMESTAMP=" (1.1220.2.365 2005/12/18 22:14:06)"
+
+# See if we are running on zsh, and set the options which allow our
+# commands through without removal of \ escapes.
+if test -n "${ZSH_VERSION+set}" ; then
+  setopt NO_GLOB_SUBST
+fi
+
+# Check that we have a working $echo.
+if test "X$1" = X--no-reexec; then
+  # Discard the --no-reexec flag, and continue.
+  shift
+elif test "X$1" = X--fallback-echo; then
+  # Avoid inline document here, it may be left over
+  :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then
+  # Yippee, $echo works!
+  :
+else
+  # Restart under the correct shell, and then maybe $echo will work.
+  exec $SHELL "$progpath" --no-reexec ${1+"$@"}
+fi
+
+if test "X$1" = X--fallback-echo; then
+  # used as fallback echo
+  shift
+  cat <<EOF
+$*
+EOF
+  exit $EXIT_SUCCESS
+fi
+
+default_mode=
+help="Try \`$progname --help' for more information."
+magic="%%%MAGIC variable%%%"
+mkdir="mkdir"
+mv="mv -f"
+rm="rm -f"
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed="${SED}"' -e 1s/^X//'
+sed_quote_subst='s/\([\\`\\"$\\\\]\)/\\\1/g'
+# test EBCDIC or ASCII
+case `echo X|tr X '\101'` in
+ A) # ASCII based system
+    # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr
+  SP2NL='tr \040 \012'
+  NL2SP='tr \015\012 \040\040'
+  ;;
+ *) # EBCDIC based system
+  SP2NL='tr \100 \n'
+  NL2SP='tr \r\n \100\100'
+  ;;
+esac
+
+# NLS nuisances.
+# Only set LANG and LC_ALL to C if already set.
+# These must not be set unconditionally because not all systems understand
+# e.g. LANG=C (notably SCO).
+# We save the old values to restore during execute mode.
+if test "${LC_ALL+set}" = set; then
+  save_LC_ALL="$LC_ALL"; LC_ALL=C; export LC_ALL
+fi
+if test "${LANG+set}" = set; then
+  save_LANG="$LANG"; LANG=C; export LANG
+fi
+
+# Make sure IFS has a sensible default
+lt_nl='
+'
+IFS=" 	$lt_nl"
+
+if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
+  $echo "$modename: not configured to build any kind of library" 1>&2
+  $echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
+  exit $EXIT_FAILURE
+fi
+
+# Global variables.
+mode=$default_mode
+nonopt=
+prev=
+prevopt=
+run=
+show="$echo"
+show_help=
+execute_dlfiles=
+duplicate_deps=no
+preserve_args=
+lo2o="s/\\.lo\$/.${objext}/"
+o2lo="s/\\.${objext}\$/.lo/"
+
+#####################################
+# Shell function definitions:
+# This seems to be the best place for them
+
+# func_mktempdir [string]
+# Make a temporary directory that won't clash with other running
+# libtool processes, and avoids race conditions if possible.  If
+# given, STRING is the basename for that directory.
+func_mktempdir ()
+{
+    my_template="${TMPDIR-/tmp}/${1-$progname}"
+
+    if test "$run" = ":"; then
+      # Return a directory name, but don't create it in dry-run mode
+      my_tmpdir="${my_template}-$$"
+    else
+
+      # If mktemp works, use that first and foremost
+      my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null`
+
+      if test ! -d "$my_tmpdir"; then
+	# Failing that, at least try and use $RANDOM to avoid a race
+	my_tmpdir="${my_template}-${RANDOM-0}$$"
+
+	save_mktempdir_umask=`umask`
+	umask 0077
+	$mkdir "$my_tmpdir"
+	umask $save_mktempdir_umask
+      fi
+
+      # If we're not in dry-run mode, bomb out on failure
+      test -d "$my_tmpdir" || {
+        $echo "cannot create temporary directory \`$my_tmpdir'" 1>&2
+	exit $EXIT_FAILURE
+      }
+    fi
+
+    $echo "X$my_tmpdir" | $Xsed
+}
+
+
+# func_win32_libid arg
+# return the library type of file 'arg'
+#
+# Need a lot of goo to handle *both* DLLs and import libs
+# Has to be a shell function in order to 'eat' the argument
+# that is supplied when $file_magic_command is called.
+func_win32_libid ()
+{
+  win32_libid_type="unknown"
+  win32_fileres=`file -L $1 2>/dev/null`
+  case $win32_fileres in
+  *ar\ archive\ import\ library*) # definitely import
+    win32_libid_type="x86 archive import"
+    ;;
+  *ar\ archive*) # could be an import, or static
+    if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | \
+      $EGREP -e 'file format pe-i386(.*architecture: i386)?' >/dev/null ; then
+      win32_nmres=`eval $NM -f posix -A $1 | \
+	$SED -n -e '1,100{/ I /{s,.*,import,;p;q;};}'`
+      case $win32_nmres in
+      import*)  win32_libid_type="x86 archive import";;
+      *)        win32_libid_type="x86 archive static";;
+      esac
+    fi
+    ;;
+  *DLL*)
+    win32_libid_type="x86 DLL"
+    ;;
+  *executable*) # but shell scripts are "executable" too...
+    case $win32_fileres in
+    *MS\ Windows\ PE\ Intel*)
+      win32_libid_type="x86 DLL"
+      ;;
+    esac
+    ;;
+  esac
+  $echo $win32_libid_type
+}
+
+
+# func_infer_tag arg
+# Infer tagged configuration to use if any are available and
+# if one wasn't chosen via the "--tag" command line option.
+# Only attempt this if the compiler in the base compile
+# command doesn't match the default compiler.
+# arg is usually of the form 'gcc ...'
+func_infer_tag ()
+{
+    if test -n "$available_tags" && test -z "$tagname"; then
+      CC_quoted=
+      for arg in $CC; do
+	case $arg in
+	  *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+	CC_quoted="$CC_quoted $arg"
+      done
+      case $@ in
+      # Blanks in the command may have been stripped by the calling shell,
+      # but not from the CC environment variable when configure was run.
+      " $CC "* | "$CC "* | " `$echo $CC` "* | "`$echo $CC` "* | " $CC_quoted"* | "$CC_quoted "* | " `$echo $CC_quoted` "* | "`$echo $CC_quoted` "*) ;;
+      # Blanks at the start of $base_compile will cause this to fail
+      # if we don't check for them as well.
+      *)
+	for z in $available_tags; do
+	  if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then
+	    # Evaluate the configuration.
+	    eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`"
+	    CC_quoted=
+	    for arg in $CC; do
+	    # Double-quote args containing other shell metacharacters.
+	    case $arg in
+	      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	      arg="\"$arg\""
+	      ;;
+	    esac
+	    CC_quoted="$CC_quoted $arg"
+	  done
+	    case "$@ " in
+	      " $CC "* | "$CC "* | " `$echo $CC` "* | "`$echo $CC` "* | " $CC_quoted"* | "$CC_quoted "* | " `$echo $CC_quoted` "* | "`$echo $CC_quoted` "*)
+	      # The compiler in the base compile command matches
+	      # the one in the tagged configuration.
+	      # Assume this is the tagged configuration we want.
+	      tagname=$z
+	      break
+	      ;;
+	    esac
+	  fi
+	done
+	# If $tagname still isn't set, then no tagged configuration
+	# was found and let the user know that the "--tag" command
+	# line option must be used.
+	if test -z "$tagname"; then
+	  $echo "$modename: unable to infer tagged configuration"
+	  $echo "$modename: specify a tag with \`--tag'" 1>&2
+	  exit $EXIT_FAILURE
+#        else
+#          $echo "$modename: using $tagname tagged configuration"
+	fi
+	;;
+      esac
+    fi
+}
+
+
+# func_extract_an_archive dir oldlib
+func_extract_an_archive ()
+{
+    f_ex_an_ar_dir="$1"; shift
+    f_ex_an_ar_oldlib="$1"
+
+    $show "(cd $f_ex_an_ar_dir && $AR x $f_ex_an_ar_oldlib)"
+    $run eval "(cd \$f_ex_an_ar_dir && $AR x \$f_ex_an_ar_oldlib)" || exit $?
+    if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then
+     :
+    else
+      $echo "$modename: ERROR: object name conflicts: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" 1>&2
+      exit $EXIT_FAILURE
+    fi
+}
+
+# func_extract_archives gentop oldlib ...
+func_extract_archives ()
+{
+    my_gentop="$1"; shift
+    my_oldlibs=${1+"$@"}
+    my_oldobjs=""
+    my_xlib=""
+    my_xabs=""
+    my_xdir=""
+    my_status=""
+
+    $show "${rm}r $my_gentop"
+    $run ${rm}r "$my_gentop"
+    $show "$mkdir $my_gentop"
+    $run $mkdir "$my_gentop"
+    my_status=$?
+    if test "$my_status" -ne 0 && test ! -d "$my_gentop"; then
+      exit $my_status
+    fi
+
+    for my_xlib in $my_oldlibs; do
+      # Extract the objects.
+      case $my_xlib in
+	[\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;;
+	*) my_xabs=`pwd`"/$my_xlib" ;;
+      esac
+      my_xlib=`$echo "X$my_xlib" | $Xsed -e 's%^.*/%%'`
+      my_xdir="$my_gentop/$my_xlib"
+
+      $show "${rm}r $my_xdir"
+      $run ${rm}r "$my_xdir"
+      $show "$mkdir $my_xdir"
+      $run $mkdir "$my_xdir"
+      exit_status=$?
+      if test "$exit_status" -ne 0 && test ! -d "$my_xdir"; then
+	exit $exit_status
+      fi
+      case $host in
+      *-darwin*)
+	$show "Extracting $my_xabs"
+	# Do not bother doing anything if just a dry run
+	if test -z "$run"; then
+	  darwin_orig_dir=`pwd`
+	  cd $my_xdir || exit $?
+	  darwin_archive=$my_xabs
+	  darwin_curdir=`pwd`
+	  darwin_base_archive=`$echo "X$darwin_archive" | $Xsed -e 's%^.*/%%'`
+	  darwin_arches=`lipo -info "$darwin_archive" 2>/dev/null | $EGREP Architectures 2>/dev/null`
+	  if test -n "$darwin_arches"; then 
+	    darwin_arches=`echo "$darwin_arches" | $SED -e 's/.*are://'`
+	    darwin_arch=
+	    $show "$darwin_base_archive has multiple architectures $darwin_arches"
+	    for darwin_arch in  $darwin_arches ; do
+	      mkdir -p "unfat-$$/${darwin_base_archive}-${darwin_arch}"
+	      lipo -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}"
+	      cd "unfat-$$/${darwin_base_archive}-${darwin_arch}"
+	      func_extract_an_archive "`pwd`" "${darwin_base_archive}"
+	      cd "$darwin_curdir"
+	      $rm "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}"
+	    done # $darwin_arches
+      ## Okay now we have a bunch of thin objects, gotta fatten them up :)
+	    darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print| xargs basename | sort -u | $NL2SP`
+	    darwin_file=
+	    darwin_files=
+	    for darwin_file in $darwin_filelist; do
+	      darwin_files=`find unfat-$$ -name $darwin_file -print | $NL2SP`
+	      lipo -create -output "$darwin_file" $darwin_files
+	    done # $darwin_filelist
+	    ${rm}r unfat-$$
+	    cd "$darwin_orig_dir"
+	  else
+	    cd "$darwin_orig_dir"
+ 	    func_extract_an_archive "$my_xdir" "$my_xabs"
+	  fi # $darwin_arches
+	fi # $run
+	;;
+      *)
+        func_extract_an_archive "$my_xdir" "$my_xabs"
+        ;;
+      esac
+      my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | $NL2SP`
+    done
+    func_extract_archives_result="$my_oldobjs"
+}
+# End of Shell function definitions
+#####################################
+
+# Darwin sucks
+eval std_shrext=\"$shrext_cmds\"
+
+disable_libs=no
+
+# Parse our command line options once, thoroughly.
+while test "$#" -gt 0
+do
+  arg="$1"
+  shift
+
+  case $arg in
+  -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;;
+  *) optarg= ;;
+  esac
+
+  # If the previous option needs an argument, assign it.
+  if test -n "$prev"; then
+    case $prev in
+    execute_dlfiles)
+      execute_dlfiles="$execute_dlfiles $arg"
+      ;;
+    tag)
+      tagname="$arg"
+      preserve_args="${preserve_args}=$arg"
+
+      # Check whether tagname contains only valid characters
+      case $tagname in
+      *[!-_A-Za-z0-9,/]*)
+	$echo "$progname: invalid tag name: $tagname" 1>&2
+	exit $EXIT_FAILURE
+	;;
+      esac
+
+      case $tagname in
+      CC)
+	# Don't test for the "default" C tag, as we know, it's there, but
+	# not specially marked.
+	;;
+      *)
+	if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$" < "$progpath" > /dev/null; then
+	  taglist="$taglist $tagname"
+	  # Evaluate the configuration.
+	  eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$tagname'$/,/^# ### END LIBTOOL TAG CONFIG: '$tagname'$/p' < $progpath`"
+	else
+	  $echo "$progname: ignoring unknown tag $tagname" 1>&2
+	fi
+	;;
+      esac
+      ;;
+    *)
+      eval "$prev=\$arg"
+      ;;
+    esac
+
+    prev=
+    prevopt=
+    continue
+  fi
+
+  # Have we seen a non-optional argument yet?
+  case $arg in
+  --help)
+    show_help=yes
+    ;;
+
+  --version)
+    $echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP"
+    $echo
+    $echo "Copyright (C) 2005  Free Software Foundation, Inc."
+    $echo "This is free software; see the source for copying conditions.  There is NO"
+    $echo "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+    exit $?
+    ;;
+
+  --config)
+    ${SED} -e '1,/^# ### BEGIN LIBTOOL CONFIG/d' -e '/^# ### END LIBTOOL CONFIG/,$d' $progpath
+    # Now print the configurations for the tags.
+    for tagname in $taglist; do
+      ${SED} -n -e "/^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$/,/^# ### END LIBTOOL TAG CONFIG: $tagname$/p" < "$progpath"
+    done
+    exit $?
+    ;;
+
+  --debug)
+    $echo "$progname: enabling shell trace mode"
+    set -x
+    preserve_args="$preserve_args $arg"
+    ;;
+
+  --dry-run | -n)
+    run=:
+    ;;
+
+  --features)
+    $echo "host: $host"
+    if test "$build_libtool_libs" = yes; then
+      $echo "enable shared libraries"
+    else
+      $echo "disable shared libraries"
+    fi
+    if test "$build_old_libs" = yes; then
+      $echo "enable static libraries"
+    else
+      $echo "disable static libraries"
+    fi
+    exit $?
+    ;;
+
+  --finish) mode="finish" ;;
+
+  --mode) prevopt="--mode" prev=mode ;;
+  --mode=*) mode="$optarg" ;;
+
+  --preserve-dup-deps) duplicate_deps="yes" ;;
+
+  --quiet | --silent)
+    show=:
+    preserve_args="$preserve_args $arg"
+    ;;
+
+  --tag)
+    prevopt="--tag"
+    prev=tag
+    preserve_args="$preserve_args --tag"
+    ;;
+  --tag=*)
+    set tag "$optarg" ${1+"$@"}
+    shift
+    prev=tag
+    preserve_args="$preserve_args --tag"
+    ;;
+
+  -dlopen)
+    prevopt="-dlopen"
+    prev=execute_dlfiles
+    ;;
+
+  -*)
+    $echo "$modename: unrecognized option \`$arg'" 1>&2
+    $echo "$help" 1>&2
+    exit $EXIT_FAILURE
+    ;;
+
+  *)
+    nonopt="$arg"
+    break
+    ;;
+  esac
+done
+
+if test -n "$prevopt"; then
+  $echo "$modename: option \`$prevopt' requires an argument" 1>&2
+  $echo "$help" 1>&2
+  exit $EXIT_FAILURE
+fi
+
+case $disable_libs in
+no) 
+  ;;
+shared)
+  build_libtool_libs=no
+  build_old_libs=yes
+  ;;
+static)
+  build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac`
+  ;;
+esac
+
+# If this variable is set in any of the actions, the command in it
+# will be execed at the end.  This prevents here-documents from being
+# left over by shells.
+exec_cmd=
+
+if test -z "$show_help"; then
+
+  # Infer the operation mode.
+  if test -z "$mode"; then
+    $echo "*** Warning: inferring the mode of operation is deprecated." 1>&2
+    $echo "*** Future versions of Libtool will require --mode=MODE be specified." 1>&2
+    case $nonopt in
+    *cc | cc* | *++ | gcc* | *-gcc* | g++* | xlc*)
+      mode=link
+      for arg
+      do
+	case $arg in
+	-c)
+	   mode=compile
+	   break
+	   ;;
+	esac
+      done
+      ;;
+    *db | *dbx | *strace | *truss)
+      mode=execute
+      ;;
+    *install*|cp|mv)
+      mode=install
+      ;;
+    *rm)
+      mode=uninstall
+      ;;
+    *)
+      # If we have no mode, but dlfiles were specified, then do execute mode.
+      test -n "$execute_dlfiles" && mode=execute
+
+      # Just use the default operation mode.
+      if test -z "$mode"; then
+	if test -n "$nonopt"; then
+	  $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2
+	else
+	  $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2
+	fi
+      fi
+      ;;
+    esac
+  fi
+
+  # Only execute mode is allowed to have -dlopen flags.
+  if test -n "$execute_dlfiles" && test "$mode" != execute; then
+    $echo "$modename: unrecognized option \`-dlopen'" 1>&2
+    $echo "$help" 1>&2
+    exit $EXIT_FAILURE
+  fi
+
+  # Change the help message to a mode-specific one.
+  generic_help="$help"
+  help="Try \`$modename --help --mode=$mode' for more information."
+
+  # These modes are in order of execution frequency so that they run quickly.
+  case $mode in
+  # libtool compile mode
+  compile)
+    modename="$modename: compile"
+    # Get the compilation command and the source file.
+    base_compile=
+    srcfile="$nonopt"  #  always keep a non-empty value in "srcfile"
+    suppress_opt=yes
+    suppress_output=
+    arg_mode=normal
+    libobj=
+    later=
+
+    for arg
+    do
+      case $arg_mode in
+      arg  )
+	# do not "continue".  Instead, add this to base_compile
+	lastarg="$arg"
+	arg_mode=normal
+	;;
+
+      target )
+	libobj="$arg"
+	arg_mode=normal
+	continue
+	;;
+
+      normal )
+	# Accept any command-line options.
+	case $arg in
+	-o)
+	  if test -n "$libobj" ; then
+	    $echo "$modename: you cannot specify \`-o' more than once" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+	  arg_mode=target
+	  continue
+	  ;;
+
+	-static | -prefer-pic | -prefer-non-pic)
+	  later="$later $arg"
+	  continue
+	  ;;
+
+	-no-suppress)
+	  suppress_opt=no
+	  continue
+	  ;;
+
+	-Xcompiler)
+	  arg_mode=arg  #  the next one goes into the "base_compile" arg list
+	  continue      #  The current "srcfile" will either be retained or
+	  ;;            #  replaced later.  I would guess that would be a bug.
+
+	-Wc,*)
+	  args=`$echo "X$arg" | $Xsed -e "s/^-Wc,//"`
+	  lastarg=
+	  save_ifs="$IFS"; IFS=','
+ 	  for arg in $args; do
+	    IFS="$save_ifs"
+
+	    # Double-quote args containing other shell metacharacters.
+	    # Many Bourne shells cannot handle close brackets correctly
+	    # in scan sets, so we specify it separately.
+	    case $arg in
+	      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	      arg="\"$arg\""
+	      ;;
+	    esac
+	    lastarg="$lastarg $arg"
+	  done
+	  IFS="$save_ifs"
+	  lastarg=`$echo "X$lastarg" | $Xsed -e "s/^ //"`
+
+	  # Add the arguments to base_compile.
+	  base_compile="$base_compile $lastarg"
+	  continue
+	  ;;
+
+	* )
+	  # Accept the current argument as the source file.
+	  # The previous "srcfile" becomes the current argument.
+	  #
+	  lastarg="$srcfile"
+	  srcfile="$arg"
+	  ;;
+	esac  #  case $arg
+	;;
+      esac    #  case $arg_mode
+
+      # Aesthetically quote the previous argument.
+      lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"`
+
+      case $lastarg in
+      # Double-quote args containing other shell metacharacters.
+      # Many Bourne shells cannot handle close brackets correctly
+      # in scan sets, and some SunOS ksh mistreat backslash-escaping
+      # in scan sets (worked around with variable expansion),
+      # and furthermore cannot handle '|' '&' '(' ')' in scan sets 
+      # at all, so we specify them separately.
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	lastarg="\"$lastarg\""
+	;;
+      esac
+
+      base_compile="$base_compile $lastarg"
+    done # for arg
+
+    case $arg_mode in
+    arg)
+      $echo "$modename: you must specify an argument for -Xcompile"
+      exit $EXIT_FAILURE
+      ;;
+    target)
+      $echo "$modename: you must specify a target with \`-o'" 1>&2
+      exit $EXIT_FAILURE
+      ;;
+    *)
+      # Get the name of the library object.
+      [ -z "$libobj" ] && libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'`
+      ;;
+    esac
+
+    # Recognize several different file suffixes.
+    # If the user specifies -o file.o, it is replaced with file.lo
+    xform='[cCFSifmso]'
+    case $libobj in
+    *.ada) xform=ada ;;
+    *.adb) xform=adb ;;
+    *.ads) xform=ads ;;
+    *.asm) xform=asm ;;
+    *.c++) xform=c++ ;;
+    *.cc) xform=cc ;;
+    *.ii) xform=ii ;;
+    *.class) xform=class ;;
+    *.cpp) xform=cpp ;;
+    *.cxx) xform=cxx ;;
+    *.f90) xform=f90 ;;
+    *.for) xform=for ;;
+    *.java) xform=java ;;
+    esac
+
+    libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"`
+
+    case $libobj in
+    *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;;
+    *)
+      $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2
+      exit $EXIT_FAILURE
+      ;;
+    esac
+
+    func_infer_tag $base_compile
+
+    for arg in $later; do
+      case $arg in
+      -static)
+	build_old_libs=yes
+	continue
+	;;
+
+      -prefer-pic)
+	pic_mode=yes
+	continue
+	;;
+
+      -prefer-non-pic)
+	pic_mode=no
+	continue
+	;;
+      esac
+    done
+
+    qlibobj=`$echo "X$libobj" | $Xsed -e "$sed_quote_subst"`
+    case $qlibobj in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	qlibobj="\"$qlibobj\"" ;;
+    esac
+    test "X$libobj" != "X$qlibobj" \
+	&& $echo "X$libobj" | grep '[]~#^*{};<>?"'"'"' 	&()|`$[]' \
+	&& $echo "$modename: libobj name \`$libobj' may not contain shell special characters."
+    objname=`$echo "X$obj" | $Xsed -e 's%^.*/%%'`
+    xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'`
+    if test "X$xdir" = "X$obj"; then
+      xdir=
+    else
+      xdir=$xdir/
+    fi
+    lobj=${xdir}$objdir/$objname
+
+    if test -z "$base_compile"; then
+      $echo "$modename: you must specify a compilation command" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    # Delete any leftover library objects.
+    if test "$build_old_libs" = yes; then
+      removelist="$obj $lobj $libobj ${libobj}T"
+    else
+      removelist="$lobj $libobj ${libobj}T"
+    fi
+
+    $run $rm $removelist
+    trap "$run $rm $removelist; exit $EXIT_FAILURE" 1 2 15
+
+    # On Cygwin there's no "real" PIC flag so we must build both object types
+    case $host_os in
+    cygwin* | mingw* | pw32* | os2*)
+      pic_mode=default
+      ;;
+    esac
+    if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then
+      # non-PIC code in shared libraries is not supported
+      pic_mode=default
+    fi
+
+    # Calculate the filename of the output object if compiler does
+    # not support -o with -c
+    if test "$compiler_c_o" = no; then
+      output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.${objext}
+      lockfile="$output_obj.lock"
+      removelist="$removelist $output_obj $lockfile"
+      trap "$run $rm $removelist; exit $EXIT_FAILURE" 1 2 15
+    else
+      output_obj=
+      need_locks=no
+      lockfile=
+    fi
+
+    # Lock this critical section if it is needed
+    # We use this script file to make the link, it avoids creating a new file
+    if test "$need_locks" = yes; then
+      until $run ln "$progpath" "$lockfile" 2>/dev/null; do
+	$show "Waiting for $lockfile to be removed"
+	sleep 2
+      done
+    elif test "$need_locks" = warn; then
+      if test -f "$lockfile"; then
+	$echo "\
+*** ERROR, $lockfile exists and contains:
+`cat $lockfile 2>/dev/null`
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$run $rm $removelist
+	exit $EXIT_FAILURE
+      fi
+      $echo "$srcfile" > "$lockfile"
+    fi
+
+    if test -n "$fix_srcfile_path"; then
+      eval srcfile=\"$fix_srcfile_path\"
+    fi
+    qsrcfile=`$echo "X$srcfile" | $Xsed -e "$sed_quote_subst"`
+    case $qsrcfile in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+      qsrcfile="\"$qsrcfile\"" ;;
+    esac
+
+    $run $rm "$libobj" "${libobj}T"
+
+    # Create a libtool object file (analogous to a ".la" file),
+    # but don't create it if we're doing a dry run.
+    test -z "$run" && cat > ${libobj}T <<EOF
+# $libobj - a libtool object file
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# Name of the PIC object.
+EOF
+
+    # Only build a PIC object if we are building libtool libraries.
+    if test "$build_libtool_libs" = yes; then
+      # Without this assignment, base_compile gets emptied.
+      fbsd_hideous_sh_bug=$base_compile
+
+      if test "$pic_mode" != no; then
+	command="$base_compile $qsrcfile $pic_flag"
+      else
+	# Don't build PIC code
+	command="$base_compile $qsrcfile"
+      fi
+
+      if test ! -d "${xdir}$objdir"; then
+	$show "$mkdir ${xdir}$objdir"
+	$run $mkdir ${xdir}$objdir
+	exit_status=$?
+	if test "$exit_status" -ne 0 && test ! -d "${xdir}$objdir"; then
+	  exit $exit_status
+	fi
+      fi
+
+      if test -z "$output_obj"; then
+	# Place PIC objects in $objdir
+	command="$command -o $lobj"
+      fi
+
+      $run $rm "$lobj" "$output_obj"
+
+      $show "$command"
+      if $run eval "$command"; then :
+      else
+	test -n "$output_obj" && $run $rm $removelist
+	exit $EXIT_FAILURE
+      fi
+
+      if test "$need_locks" = warn &&
+	 test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
+	$echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$run $rm $removelist
+	exit $EXIT_FAILURE
+      fi
+
+      # Just move the object if needed, then go on to compile the next one
+      if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then
+	$show "$mv $output_obj $lobj"
+	if $run $mv $output_obj $lobj; then :
+	else
+	  error=$?
+	  $run $rm $removelist
+	  exit $error
+	fi
+      fi
+
+      # Append the name of the PIC object to the libtool object file.
+      test -z "$run" && cat >> ${libobj}T <<EOF
+pic_object='$objdir/$objname'
+
+EOF
+
+      # Allow error messages only from the first compilation.
+      if test "$suppress_opt" = yes; then
+        suppress_output=' >/dev/null 2>&1'
+      fi
+    else
+      # No PIC object so indicate it doesn't exist in the libtool
+      # object file.
+      test -z "$run" && cat >> ${libobj}T <<EOF
+pic_object=none
+
+EOF
+    fi
+
+    # Only build a position-dependent object if we build old libraries.
+    if test "$build_old_libs" = yes; then
+      if test "$pic_mode" != yes; then
+	# Don't build PIC code
+	command="$base_compile $qsrcfile"
+      else
+	command="$base_compile $qsrcfile $pic_flag"
+      fi
+      if test "$compiler_c_o" = yes; then
+	command="$command -o $obj"
+      fi
+
+      # Suppress compiler output if we already did a PIC compilation.
+      command="$command$suppress_output"
+      $run $rm "$obj" "$output_obj"
+      $show "$command"
+      if $run eval "$command"; then :
+      else
+	$run $rm $removelist
+	exit $EXIT_FAILURE
+      fi
+
+      if test "$need_locks" = warn &&
+	 test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
+	$echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$run $rm $removelist
+	exit $EXIT_FAILURE
+      fi
+
+      # Just move the object if needed
+      if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then
+	$show "$mv $output_obj $obj"
+	if $run $mv $output_obj $obj; then :
+	else
+	  error=$?
+	  $run $rm $removelist
+	  exit $error
+	fi
+      fi
+
+      # Append the name of the non-PIC object the libtool object file.
+      # Only append if the libtool object file exists.
+      test -z "$run" && cat >> ${libobj}T <<EOF
+# Name of the non-PIC object.
+non_pic_object='$objname'
+
+EOF
+    else
+      # Append the name of the non-PIC object the libtool object file.
+      # Only append if the libtool object file exists.
+      test -z "$run" && cat >> ${libobj}T <<EOF
+# Name of the non-PIC object.
+non_pic_object=none
+
+EOF
+    fi
+
+    $run $mv "${libobj}T" "${libobj}"
+
+    # Unlock the critical section if it was locked
+    if test "$need_locks" != no; then
+      $run $rm "$lockfile"
+    fi
+
+    exit $EXIT_SUCCESS
+    ;;
+
+  # libtool link mode
+  link | relink)
+    modename="$modename: link"
+    case $host in
+    *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+      # It is impossible to link a dll without this setting, and
+      # we shouldn't force the makefile maintainer to figure out
+      # which system we are compiling for in order to pass an extra
+      # flag for every libtool invocation.
+      # allow_undefined=no
+
+      # FIXME: Unfortunately, there are problems with the above when trying
+      # to make a dll which has undefined symbols, in which case not
+      # even a static library is built.  For now, we need to specify
+      # -no-undefined on the libtool link line when we can be certain
+      # that all symbols are satisfied, otherwise we get a static library.
+      allow_undefined=yes
+      ;;
+    *)
+      allow_undefined=yes
+      ;;
+    esac
+    libtool_args="$nonopt"
+    base_compile="$nonopt $@"
+    compile_command="$nonopt"
+    finalize_command="$nonopt"
+
+    compile_rpath=
+    finalize_rpath=
+    compile_shlibpath=
+    finalize_shlibpath=
+    convenience=
+    old_convenience=
+    deplibs=
+    old_deplibs=
+    compiler_flags=
+    linker_flags=
+    dllsearchpath=
+    lib_search_path=`pwd`
+    inst_prefix_dir=
+
+    avoid_version=no
+    dlfiles=
+    dlprefiles=
+    dlself=no
+    export_dynamic=no
+    export_symbols=
+    export_symbols_regex=
+    generated=
+    libobjs=
+    ltlibs=
+    module=no
+    no_install=no
+    objs=
+    non_pic_objects=
+    notinst_path= # paths that contain not-installed libtool libraries
+    precious_files_regex=
+    prefer_static_libs=no
+    preload=no
+    prev=
+    prevarg=
+    release=
+    rpath=
+    xrpath=
+    perm_rpath=
+    temp_rpath=
+    thread_safe=no
+    vinfo=
+    vinfo_number=no
+
+    func_infer_tag $base_compile
+
+    # We need to know -static, to get the right output filenames.
+    for arg
+    do
+      case $arg in
+      -all-static | -static)
+	if test "X$arg" = "X-all-static"; then
+	  if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
+	    $echo "$modename: warning: complete static linking is impossible in this configuration" 1>&2
+	  fi
+	  if test -n "$link_static_flag"; then
+	    dlopen_self=$dlopen_self_static
+	  fi
+	  prefer_static_libs=yes
+	else
+	  if test -z "$pic_flag" && test -n "$link_static_flag"; then
+	    dlopen_self=$dlopen_self_static
+	  fi
+	  prefer_static_libs=built
+	fi
+	build_libtool_libs=no
+	build_old_libs=yes
+	break
+	;;
+      esac
+    done
+
+    # See if our shared archives depend on static archives.
+    test -n "$old_archive_from_new_cmds" && build_old_libs=yes
+
+    # Go through the arguments, transforming them on the way.
+    while test "$#" -gt 0; do
+      arg="$1"
+      shift
+      case $arg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	qarg=\"`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`\" ### testsuite: skip nested quoting test
+	;;
+      *) qarg=$arg ;;
+      esac
+      libtool_args="$libtool_args $qarg"
+
+      # If the previous option needs an argument, assign it.
+      if test -n "$prev"; then
+	case $prev in
+	output)
+	  compile_command="$compile_command @OUTPUT@"
+	  finalize_command="$finalize_command @OUTPUT@"
+	  ;;
+	esac
+
+	case $prev in
+	dlfiles|dlprefiles)
+	  if test "$preload" = no; then
+	    # Add the symbol object into the linking commands.
+	    compile_command="$compile_command @SYMFILE@"
+	    finalize_command="$finalize_command @SYMFILE@"
+	    preload=yes
+	  fi
+	  case $arg in
+	  *.la | *.lo) ;;  # We handle these cases below.
+	  force)
+	    if test "$dlself" = no; then
+	      dlself=needless
+	      export_dynamic=yes
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  self)
+	    if test "$prev" = dlprefiles; then
+	      dlself=yes
+	    elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then
+	      dlself=yes
+	    else
+	      dlself=needless
+	      export_dynamic=yes
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  *)
+	    if test "$prev" = dlfiles; then
+	      dlfiles="$dlfiles $arg"
+	    else
+	      dlprefiles="$dlprefiles $arg"
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  esac
+	  ;;
+	expsyms)
+	  export_symbols="$arg"
+	  if test ! -f "$arg"; then
+	    $echo "$modename: symbol file \`$arg' does not exist"
+	    exit $EXIT_FAILURE
+	  fi
+	  prev=
+	  continue
+	  ;;
+	expsyms_regex)
+	  export_symbols_regex="$arg"
+	  prev=
+	  continue
+	  ;;
+	inst_prefix)
+	  inst_prefix_dir="$arg"
+	  prev=
+	  continue
+	  ;;
+	precious_regex)
+	  precious_files_regex="$arg"
+	  prev=
+	  continue
+	  ;;
+	release)
+	  release="-$arg"
+	  prev=
+	  continue
+	  ;;
+	objectlist)
+	  if test -f "$arg"; then
+	    save_arg=$arg
+	    moreargs=
+	    for fil in `cat $save_arg`
+	    do
+#	      moreargs="$moreargs $fil"
+	      arg=$fil
+	      # A libtool-controlled object.
+
+	      # Check to see that this really is a libtool object.
+	      if (${SED} -e '2q' $arg | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+		pic_object=
+		non_pic_object=
+
+		# Read the .lo file
+		# If there is no directory component, then add one.
+		case $arg in
+		*/* | *\\*) . $arg ;;
+		*) . ./$arg ;;
+		esac
+
+		if test -z "$pic_object" || \
+		   test -z "$non_pic_object" ||
+		   test "$pic_object" = none && \
+		   test "$non_pic_object" = none; then
+		  $echo "$modename: cannot find name of object for \`$arg'" 1>&2
+		  exit $EXIT_FAILURE
+		fi
+
+		# Extract subdirectory from the argument.
+		xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'`
+		if test "X$xdir" = "X$arg"; then
+		  xdir=
+		else
+		  xdir="$xdir/"
+		fi
+
+		if test "$pic_object" != none; then
+		  # Prepend the subdirectory the object is found in.
+		  pic_object="$xdir$pic_object"
+
+		  if test "$prev" = dlfiles; then
+		    if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+		      dlfiles="$dlfiles $pic_object"
+		      prev=
+		      continue
+		    else
+		      # If libtool objects are unsupported, then we need to preload.
+		      prev=dlprefiles
+		    fi
+		  fi
+
+		  # CHECK ME:  I think I busted this.  -Ossama
+		  if test "$prev" = dlprefiles; then
+		    # Preload the old-style object.
+		    dlprefiles="$dlprefiles $pic_object"
+		    prev=
+		  fi
+
+		  # A PIC object.
+		  libobjs="$libobjs $pic_object"
+		  arg="$pic_object"
+		fi
+
+		# Non-PIC object.
+		if test "$non_pic_object" != none; then
+		  # Prepend the subdirectory the object is found in.
+		  non_pic_object="$xdir$non_pic_object"
+
+		  # A standard non-PIC object
+		  non_pic_objects="$non_pic_objects $non_pic_object"
+		  if test -z "$pic_object" || test "$pic_object" = none ; then
+		    arg="$non_pic_object"
+		  fi
+		else
+		  # If the PIC object exists, use it instead.
+		  # $xdir was prepended to $pic_object above.
+		  non_pic_object="$pic_object"
+		  non_pic_objects="$non_pic_objects $non_pic_object"
+		fi
+	      else
+		# Only an error if not doing a dry-run.
+		if test -z "$run"; then
+		  $echo "$modename: \`$arg' is not a valid libtool object" 1>&2
+		  exit $EXIT_FAILURE
+		else
+		  # Dry-run case.
+
+		  # Extract subdirectory from the argument.
+		  xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'`
+		  if test "X$xdir" = "X$arg"; then
+		    xdir=
+		  else
+		    xdir="$xdir/"
+		  fi
+
+		  pic_object=`$echo "X${xdir}${objdir}/${arg}" | $Xsed -e "$lo2o"`
+		  non_pic_object=`$echo "X${xdir}${arg}" | $Xsed -e "$lo2o"`
+		  libobjs="$libobjs $pic_object"
+		  non_pic_objects="$non_pic_objects $non_pic_object"
+		fi
+	      fi
+	    done
+	  else
+	    $echo "$modename: link input file \`$save_arg' does not exist"
+	    exit $EXIT_FAILURE
+	  fi
+	  arg=$save_arg
+	  prev=
+	  continue
+	  ;;
+	rpath | xrpath)
+	  # We need an absolute path.
+	  case $arg in
+	  [\\/]* | [A-Za-z]:[\\/]*) ;;
+	  *)
+	    $echo "$modename: only absolute run-paths are allowed" 1>&2
+	    exit $EXIT_FAILURE
+	    ;;
+	  esac
+	  if test "$prev" = rpath; then
+	    case "$rpath " in
+	    *" $arg "*) ;;
+	    *) rpath="$rpath $arg" ;;
+	    esac
+	  else
+	    case "$xrpath " in
+	    *" $arg "*) ;;
+	    *) xrpath="$xrpath $arg" ;;
+	    esac
+	  fi
+	  prev=
+	  continue
+	  ;;
+	xcompiler)
+	  compiler_flags="$compiler_flags $qarg"
+	  prev=
+	  compile_command="$compile_command $qarg"
+	  finalize_command="$finalize_command $qarg"
+	  continue
+	  ;;
+	xlinker)
+	  linker_flags="$linker_flags $qarg"
+	  compiler_flags="$compiler_flags $wl$qarg"
+	  prev=
+	  compile_command="$compile_command $wl$qarg"
+	  finalize_command="$finalize_command $wl$qarg"
+	  continue
+	  ;;
+	xcclinker)
+	  linker_flags="$linker_flags $qarg"
+	  compiler_flags="$compiler_flags $qarg"
+	  prev=
+	  compile_command="$compile_command $qarg"
+	  finalize_command="$finalize_command $qarg"
+	  continue
+	  ;;
+	shrext)
+  	  shrext_cmds="$arg"
+	  prev=
+	  continue
+	  ;;
+	darwin_framework|darwin_framework_skip)
+	  test "$prev" = "darwin_framework" && compiler_flags="$compiler_flags $arg"
+	  compile_command="$compile_command $arg"
+	  finalize_command="$finalize_command $arg"
+	  prev=
+	  continue
+	  ;;
+	*)
+	  eval "$prev=\"\$arg\""
+	  prev=
+	  continue
+	  ;;
+	esac
+      fi # test -n "$prev"
+
+      prevarg="$arg"
+
+      case $arg in
+      -all-static)
+	if test -n "$link_static_flag"; then
+	  compile_command="$compile_command $link_static_flag"
+	  finalize_command="$finalize_command $link_static_flag"
+	fi
+	continue
+	;;
+
+      -allow-undefined)
+	# FIXME: remove this flag sometime in the future.
+	$echo "$modename: \`-allow-undefined' is deprecated because it is the default" 1>&2
+	continue
+	;;
+
+      -avoid-version)
+	avoid_version=yes
+	continue
+	;;
+
+      -dlopen)
+	prev=dlfiles
+	continue
+	;;
+
+      -dlpreopen)
+	prev=dlprefiles
+	continue
+	;;
+
+      -export-dynamic)
+	export_dynamic=yes
+	continue
+	;;
+
+      -export-symbols | -export-symbols-regex)
+	if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+	  $echo "$modename: more than one -exported-symbols argument is not allowed"
+	  exit $EXIT_FAILURE
+	fi
+	if test "X$arg" = "X-export-symbols"; then
+	  prev=expsyms
+	else
+	  prev=expsyms_regex
+	fi
+	continue
+	;;
+
+      -framework|-arch|-isysroot)
+	case " $CC " in
+	  *" ${arg} ${1} "* | *" ${arg}	${1} "*) 
+		prev=darwin_framework_skip ;;
+	  *) compiler_flags="$compiler_flags $arg"
+	     prev=darwin_framework ;;
+	esac
+	compile_command="$compile_command $arg"
+	finalize_command="$finalize_command $arg"
+	continue
+	;;
+
+      -inst-prefix-dir)
+	prev=inst_prefix
+	continue
+	;;
+
+      # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:*
+      # so, if we see these flags be careful not to treat them like -L
+      -L[A-Z][A-Z]*:*)
+	case $with_gcc/$host in
+	no/*-*-irix* | /*-*-irix*)
+	  compile_command="$compile_command $arg"
+	  finalize_command="$finalize_command $arg"
+	  ;;
+	esac
+	continue
+	;;
+
+      -L*)
+	dir=`$echo "X$arg" | $Xsed -e 's/^-L//'`
+	# We need an absolute path.
+	case $dir in
+	[\\/]* | [A-Za-z]:[\\/]*) ;;
+	*)
+	  absdir=`cd "$dir" && pwd`
+	  if test -z "$absdir"; then
+	    $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2
+	    absdir="$dir"
+	    notinst_path="$notinst_path $dir"
+	  fi
+	  dir="$absdir"
+	  ;;
+	esac
+	case "$deplibs " in
+	*" -L$dir "*) ;;
+	*)
+	  deplibs="$deplibs -L$dir"
+	  lib_search_path="$lib_search_path $dir"
+	  ;;
+	esac
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+	  testbindir=`$echo "X$dir" | $Xsed -e 's*/lib$*/bin*'`
+	  case :$dllsearchpath: in
+	  *":$dir:"*) ;;
+	  *) dllsearchpath="$dllsearchpath:$dir";;
+	  esac
+	  case :$dllsearchpath: in
+	  *":$testbindir:"*) ;;
+	  *) dllsearchpath="$dllsearchpath:$testbindir";;
+	  esac
+	  ;;
+	esac
+	continue
+	;;
+
+      -l*)
+	if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then
+	  case $host in
+	  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos*)
+	    # These systems don't actually have a C or math library (as such)
+	    continue
+	    ;;
+	  *-*-os2*)
+	    # These systems don't actually have a C library (as such)
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	    # Do not include libc due to us having libc/libc_r.
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  *-*-rhapsody* | *-*-darwin1.[012])
+	    # Rhapsody C and math libraries are in the System framework
+	    deplibs="$deplibs -framework System"
+	    continue
+	    ;;
+	  *-*-sco3.2v5* | *-*-sco5v6*)
+	    # Causes problems with __ctype
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
+	    # Compiler inserts libc in the correct place for threads to work
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  esac
+	elif test "X$arg" = "X-lc_r"; then
+	 case $host in
+	 *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	   # Do not include libc_r directly, use -pthread flag.
+	   continue
+	   ;;
+	 esac
+	fi
+	deplibs="$deplibs $arg"
+	continue
+	;;
+
+      # Tru64 UNIX uses -model [arg] to determine the layout of C++
+      # classes, name mangling, and exception handling.
+      -model)
+	compile_command="$compile_command $arg"
+	compiler_flags="$compiler_flags $arg"
+	finalize_command="$finalize_command $arg"
+	prev=xcompiler
+	continue
+	;;
+
+     -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe)
+	compiler_flags="$compiler_flags $arg"
+	compile_command="$compile_command $arg"
+	finalize_command="$finalize_command $arg"
+	continue
+	;;
+
+      -module)
+	module=yes
+	continue
+	;;
+
+      # -64, -mips[0-9] enable 64-bit mode on the SGI compiler
+      # -r[0-9][0-9]* specifies the processor on the SGI compiler
+      # -xarch=*, -xtarget=* enable 64-bit mode on the Sun compiler
+      # +DA*, +DD* enable 64-bit mode on the HP compiler
+      # -q* pass through compiler args for the IBM compiler
+      # -m* pass through architecture-specific compiler args for GCC
+      # -m*, -t[45]*, -txscale* pass through architecture-specific
+      # compiler args for GCC
+      # -pg pass through profiling flag for GCC
+      # @file GCC response files
+      -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*|-pg| \
+      -t[45]*|-txscale*|@*)
+
+	# Unknown arguments in both finalize_command and compile_command need
+	# to be aesthetically quoted because they are evaled later.
+	arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+	case $arg in
+	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+        compile_command="$compile_command $arg"
+        finalize_command="$finalize_command $arg"
+        compiler_flags="$compiler_flags $arg"
+        continue
+        ;;
+
+      -shrext)
+	prev=shrext
+	continue
+	;;
+
+      -no-fast-install)
+	fast_install=no
+	continue
+	;;
+
+      -no-install)
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+	  # The PATH hackery in wrapper scripts is required on Windows
+	  # in order for the loader to find any dlls it needs.
+	  $echo "$modename: warning: \`-no-install' is ignored for $host" 1>&2
+	  $echo "$modename: warning: assuming \`-no-fast-install' instead" 1>&2
+	  fast_install=no
+	  ;;
+	*) no_install=yes ;;
+	esac
+	continue
+	;;
+
+      -no-undefined)
+	allow_undefined=no
+	continue
+	;;
+
+      -objectlist)
+	prev=objectlist
+	continue
+	;;
+
+      -o) prev=output ;;
+
+      -precious-files-regex)
+	prev=precious_regex
+	continue
+	;;
+
+      -release)
+	prev=release
+	continue
+	;;
+
+      -rpath)
+	prev=rpath
+	continue
+	;;
+
+      -R)
+	prev=xrpath
+	continue
+	;;
+
+      -R*)
+	dir=`$echo "X$arg" | $Xsed -e 's/^-R//'`
+	# We need an absolute path.
+	case $dir in
+	[\\/]* | [A-Za-z]:[\\/]*) ;;
+	*)
+	  $echo "$modename: only absolute run-paths are allowed" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+	case "$xrpath " in
+	*" $dir "*) ;;
+	*) xrpath="$xrpath $dir" ;;
+	esac
+	continue
+	;;
+
+      -static)
+	# The effects of -static are defined in a previous loop.
+	# We used to do the same as -all-static on platforms that
+	# didn't have a PIC flag, but the assumption that the effects
+	# would be equivalent was wrong.  It would break on at least
+	# Digital Unix and AIX.
+	continue
+	;;
+
+      -thread-safe)
+	thread_safe=yes
+	continue
+	;;
+
+      -version-info)
+	prev=vinfo
+	continue
+	;;
+      -version-number)
+	prev=vinfo
+	vinfo_number=yes
+	continue
+	;;
+
+      -Wc,*)
+	args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wc,//'`
+	arg=
+	save_ifs="$IFS"; IFS=','
+	for flag in $args; do
+	  IFS="$save_ifs"
+	  case $flag in
+	    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	    flag="\"$flag\""
+	    ;;
+	  esac
+	  arg="$arg $wl$flag"
+	  compiler_flags="$compiler_flags $flag"
+	done
+	IFS="$save_ifs"
+	arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+	;;
+
+      -Wl,*)
+	args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wl,//'`
+	arg=
+	save_ifs="$IFS"; IFS=','
+	for flag in $args; do
+	  IFS="$save_ifs"
+	  case $flag in
+	    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	    flag="\"$flag\""
+	    ;;
+	  esac
+	  arg="$arg $wl$flag"
+	  compiler_flags="$compiler_flags $wl$flag"
+	  linker_flags="$linker_flags $flag"
+	done
+	IFS="$save_ifs"
+	arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+	;;
+
+      -Xcompiler)
+	prev=xcompiler
+	continue
+	;;
+
+      -Xlinker)
+	prev=xlinker
+	continue
+	;;
+
+      -XCClinker)
+	prev=xcclinker
+	continue
+	;;
+
+      # Some other compiler flag.
+      -* | +*)
+	# Unknown arguments in both finalize_command and compile_command need
+	# to be aesthetically quoted because they are evaled later.
+	arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+	case $arg in
+	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+	;;
+
+      *.$objext)
+	# A standard object.
+	objs="$objs $arg"
+	;;
+
+      *.lo)
+	# A libtool-controlled object.
+
+	# Check to see that this really is a libtool object.
+	if (${SED} -e '2q' $arg | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	  pic_object=
+	  non_pic_object=
+
+	  # Read the .lo file
+	  # If there is no directory component, then add one.
+	  case $arg in
+	  */* | *\\*) . $arg ;;
+	  *) . ./$arg ;;
+	  esac
+
+	  if test -z "$pic_object" || \
+	     test -z "$non_pic_object" ||
+	     test "$pic_object" = none && \
+	     test "$non_pic_object" = none; then
+	    $echo "$modename: cannot find name of object for \`$arg'" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+
+	  # Extract subdirectory from the argument.
+	  xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'`
+	  if test "X$xdir" = "X$arg"; then
+	    xdir=
+ 	  else
+	    xdir="$xdir/"
+	  fi
+
+	  if test "$pic_object" != none; then
+	    # Prepend the subdirectory the object is found in.
+	    pic_object="$xdir$pic_object"
+
+	    if test "$prev" = dlfiles; then
+	      if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+		dlfiles="$dlfiles $pic_object"
+		prev=
+		continue
+	      else
+		# If libtool objects are unsupported, then we need to preload.
+		prev=dlprefiles
+	      fi
+	    fi
+
+	    # CHECK ME:  I think I busted this.  -Ossama
+	    if test "$prev" = dlprefiles; then
+	      # Preload the old-style object.
+	      dlprefiles="$dlprefiles $pic_object"
+	      prev=
+	    fi
+
+	    # A PIC object.
+	    libobjs="$libobjs $pic_object"
+	    arg="$pic_object"
+	  fi
+
+	  # Non-PIC object.
+	  if test "$non_pic_object" != none; then
+	    # Prepend the subdirectory the object is found in.
+	    non_pic_object="$xdir$non_pic_object"
+
+	    # A standard non-PIC object
+	    non_pic_objects="$non_pic_objects $non_pic_object"
+	    if test -z "$pic_object" || test "$pic_object" = none ; then
+	      arg="$non_pic_object"
+	    fi
+	  else
+	    # If the PIC object exists, use it instead.
+	    # $xdir was prepended to $pic_object above.
+	    non_pic_object="$pic_object"
+	    non_pic_objects="$non_pic_objects $non_pic_object"
+	  fi
+	else
+	  # Only an error if not doing a dry-run.
+	  if test -z "$run"; then
+	    $echo "$modename: \`$arg' is not a valid libtool object" 1>&2
+	    exit $EXIT_FAILURE
+	  else
+	    # Dry-run case.
+
+	    # Extract subdirectory from the argument.
+	    xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'`
+	    if test "X$xdir" = "X$arg"; then
+	      xdir=
+	    else
+	      xdir="$xdir/"
+	    fi
+
+	    pic_object=`$echo "X${xdir}${objdir}/${arg}" | $Xsed -e "$lo2o"`
+	    non_pic_object=`$echo "X${xdir}${arg}" | $Xsed -e "$lo2o"`
+	    libobjs="$libobjs $pic_object"
+	    non_pic_objects="$non_pic_objects $non_pic_object"
+	  fi
+	fi
+	;;
+
+      *.$libext)
+	# An archive.
+	deplibs="$deplibs $arg"
+	old_deplibs="$old_deplibs $arg"
+	continue
+	;;
+
+      *.la)
+	# A libtool-controlled library.
+
+	if test "$prev" = dlfiles; then
+	  # This library was specified with -dlopen.
+	  dlfiles="$dlfiles $arg"
+	  prev=
+	elif test "$prev" = dlprefiles; then
+	  # The library was specified with -dlpreopen.
+	  dlprefiles="$dlprefiles $arg"
+	  prev=
+	else
+	  deplibs="$deplibs $arg"
+	fi
+	continue
+	;;
+
+      # Some other compiler argument.
+      *)
+	# Unknown arguments in both finalize_command and compile_command need
+	# to be aesthetically quoted because they are evaled later.
+	arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+	case $arg in
+	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+	;;
+      esac # arg
+
+      # Now actually substitute the argument into the commands.
+      if test -n "$arg"; then
+	compile_command="$compile_command $arg"
+	finalize_command="$finalize_command $arg"
+      fi
+    done # argument parsing loop
+
+    if test -n "$prev"; then
+      $echo "$modename: the \`$prevarg' option requires an argument" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then
+      eval arg=\"$export_dynamic_flag_spec\"
+      compile_command="$compile_command $arg"
+      finalize_command="$finalize_command $arg"
+    fi
+
+    oldlibs=
+    # calculate the name of the file, without its directory
+    outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'`
+    libobjs_save="$libobjs"
+
+    if test -n "$shlibpath_var"; then
+      # get the directories listed in $shlibpath_var
+      eval shlib_search_path=\`\$echo \"X\${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\`
+    else
+      shlib_search_path=
+    fi
+    eval sys_lib_search_path=\"$sys_lib_search_path_spec\"
+    eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\"
+
+    output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'`
+    if test "X$output_objdir" = "X$output"; then
+      output_objdir="$objdir"
+    else
+      output_objdir="$output_objdir/$objdir"
+    fi
+    # Create the object directory.
+    if test ! -d "$output_objdir"; then
+      $show "$mkdir $output_objdir"
+      $run $mkdir $output_objdir
+      exit_status=$?
+      if test "$exit_status" -ne 0 && test ! -d "$output_objdir"; then
+	exit $exit_status
+      fi
+    fi
+
+    # Determine the type of output
+    case $output in
+    "")
+      $echo "$modename: you must specify an output file" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+      ;;
+    *.$libext) linkmode=oldlib ;;
+    *.lo | *.$objext) linkmode=obj ;;
+    *.la) linkmode=lib ;;
+    *) linkmode=prog ;; # Anything else should be a program.
+    esac
+
+    case $host in
+    *cygwin* | *mingw* | *pw32*)
+      # don't eliminate duplications in $postdeps and $predeps
+      duplicate_compiler_generated_deps=yes
+      ;;
+    *)
+      duplicate_compiler_generated_deps=$duplicate_deps
+      ;;
+    esac
+    specialdeplibs=
+
+    libs=
+    # Find all interdependent deplibs by searching for libraries
+    # that are linked more than once (e.g. -la -lb -la)
+    for deplib in $deplibs; do
+      if test "X$duplicate_deps" = "Xyes" ; then
+	case "$libs " in
+	*" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	esac
+      fi
+      libs="$libs $deplib"
+    done
+
+    if test "$linkmode" = lib; then
+      libs="$predeps $libs $compiler_lib_search_path $postdeps"
+
+      # Compute libraries that are listed more than once in $predeps
+      # $postdeps and mark them as special (i.e., whose duplicates are
+      # not to be eliminated).
+      pre_post_deps=
+      if test "X$duplicate_compiler_generated_deps" = "Xyes" ; then
+	for pre_post_dep in $predeps $postdeps; do
+	  case "$pre_post_deps " in
+	  *" $pre_post_dep "*) specialdeplibs="$specialdeplibs $pre_post_deps" ;;
+	  esac
+	  pre_post_deps="$pre_post_deps $pre_post_dep"
+	done
+      fi
+      pre_post_deps=
+    fi
+
+    deplibs=
+    newdependency_libs=
+    newlib_search_path=
+    need_relink=no # whether we're linking any uninstalled libtool libraries
+    notinst_deplibs= # not-installed libtool libraries
+    case $linkmode in
+    lib)
+	passes="conv link"
+	for file in $dlfiles $dlprefiles; do
+	  case $file in
+	  *.la) ;;
+	  *)
+	    $echo "$modename: libraries can \`-dlopen' only libtool libraries: $file" 1>&2
+	    exit $EXIT_FAILURE
+	    ;;
+	  esac
+	done
+	;;
+    prog)
+	compile_deplibs=
+	finalize_deplibs=
+	alldeplibs=no
+	newdlfiles=
+	newdlprefiles=
+	passes="conv scan dlopen dlpreopen link"
+	;;
+    *)  passes="conv"
+	;;
+    esac
+    for pass in $passes; do
+      if test "$linkmode,$pass" = "lib,link" ||
+	 test "$linkmode,$pass" = "prog,scan"; then
+	libs="$deplibs"
+	deplibs=
+      fi
+      if test "$linkmode" = prog; then
+	case $pass in
+	dlopen) libs="$dlfiles" ;;
+	dlpreopen) libs="$dlprefiles" ;;
+	link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
+	esac
+      fi
+      if test "$pass" = dlopen; then
+	# Collect dlpreopened libraries
+	save_deplibs="$deplibs"
+	deplibs=
+      fi
+      for deplib in $libs; do
+	lib=
+	found=no
+	case $deplib in
+	-mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe)
+	  if test "$linkmode,$pass" = "prog,link"; then
+	    compile_deplibs="$deplib $compile_deplibs"
+	    finalize_deplibs="$deplib $finalize_deplibs"
+	  else
+	    compiler_flags="$compiler_flags $deplib"
+	  fi
+	  continue
+	  ;;
+	-l*)
+	  if test "$linkmode" != lib && test "$linkmode" != prog; then
+	    $echo "$modename: warning: \`-l' is ignored for archives/objects" 1>&2
+	    continue
+	  fi
+	  name=`$echo "X$deplib" | $Xsed -e 's/^-l//'`
+	  for searchdir in $newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path; do
+	    for search_ext in .la $std_shrext .so .a; do
+	      # Search the libtool library
+	      lib="$searchdir/lib${name}${search_ext}"
+	      if test -f "$lib"; then
+		if test "$search_ext" = ".la"; then
+		  found=yes
+		else
+		  found=no
+		fi
+		break 2
+	      fi
+	    done
+	  done
+	  if test "$found" != yes; then
+	    # deplib doesn't seem to be a libtool library
+	    if test "$linkmode,$pass" = "prog,link"; then
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    else
+	      deplibs="$deplib $deplibs"
+	      test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
+	    fi
+	    continue
+	  else # deplib is a libtool library
+	    # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib,
+	    # We need to do some special things here, and not later.
+	    if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+	      case " $predeps $postdeps " in
+	      *" $deplib "*)
+		if (${SED} -e '2q' $lib |
+                    grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+		  library_names=
+		  old_library=
+		  case $lib in
+		  */* | *\\*) . $lib ;;
+		  *) . ./$lib ;;
+		  esac
+		  for l in $old_library $library_names; do
+		    ll="$l"
+		  done
+		  if test "X$ll" = "X$old_library" ; then # only static version available
+		    found=no
+		    ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'`
+		    test "X$ladir" = "X$lib" && ladir="."
+		    lib=$ladir/$old_library
+		    if test "$linkmode,$pass" = "prog,link"; then
+		      compile_deplibs="$deplib $compile_deplibs"
+		      finalize_deplibs="$deplib $finalize_deplibs"
+		    else
+		      deplibs="$deplib $deplibs"
+		      test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
+		    fi
+		    continue
+		  fi
+		fi
+	        ;;
+	      *) ;;
+	      esac
+	    fi
+	  fi
+	  ;; # -l
+	-L*)
+	  case $linkmode in
+	  lib)
+	    deplibs="$deplib $deplibs"
+	    test "$pass" = conv && continue
+	    newdependency_libs="$deplib $newdependency_libs"
+	    newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+	    ;;
+	  prog)
+	    if test "$pass" = conv; then
+	      deplibs="$deplib $deplibs"
+	      continue
+	    fi
+	    if test "$pass" = scan; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    fi
+	    newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+	    ;;
+	  *)
+	    $echo "$modename: warning: \`-L' is ignored for archives/objects" 1>&2
+	    ;;
+	  esac # linkmode
+	  continue
+	  ;; # -L
+	-R*)
+	  if test "$pass" = link; then
+	    dir=`$echo "X$deplib" | $Xsed -e 's/^-R//'`
+	    # Make sure the xrpath contains only unique directories.
+	    case "$xrpath " in
+	    *" $dir "*) ;;
+	    *) xrpath="$xrpath $dir" ;;
+	    esac
+	  fi
+	  deplibs="$deplib $deplibs"
+	  continue
+	  ;;
+	*.la) lib="$deplib" ;;
+	*.$libext)
+	  if test "$pass" = conv; then
+	    deplibs="$deplib $deplibs"
+	    continue
+	  fi
+	  case $linkmode in
+	  lib)
+	    valid_a_lib=no
+	    case $deplibs_check_method in
+	      match_pattern*)
+		set dummy $deplibs_check_method
+	        match_pattern_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+		if eval $echo \"$deplib\" 2>/dev/null \
+		    | $SED 10q \
+		    | $EGREP "$match_pattern_regex" > /dev/null; then
+		  valid_a_lib=yes
+		fi
+		;;
+	      pass_all)
+		valid_a_lib=yes
+		;;
+            esac
+	    if test "$valid_a_lib" != yes; then
+	      $echo
+	      $echo "*** Warning: Trying to link with static lib archive $deplib."
+	      $echo "*** I have the capability to make that library automatically link in when"
+	      $echo "*** you link to this library.  But I can only do this if you have a"
+	      $echo "*** shared version of the library, which you do not appear to have"
+	      $echo "*** because the file extensions .$libext of this argument makes me believe"
+	      $echo "*** that it is just a static archive that I should not used here."
+	    else
+	      $echo
+	      $echo "*** Warning: Linking the shared library $output against the"
+	      $echo "*** static library $deplib is not portable!"
+	      deplibs="$deplib $deplibs"
+	    fi
+	    continue
+	    ;;
+	  prog)
+	    if test "$pass" != link; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    fi
+	    continue
+	    ;;
+	  esac # linkmode
+	  ;; # *.$libext
+	*.lo | *.$objext)
+	  if test "$pass" = conv; then
+	    deplibs="$deplib $deplibs"
+	  elif test "$linkmode" = prog; then
+	    if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+	      # If there is no dlopen support or we're linking statically,
+	      # we need to preload.
+	      newdlprefiles="$newdlprefiles $deplib"
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    else
+	      newdlfiles="$newdlfiles $deplib"
+	    fi
+	  fi
+	  continue
+	  ;;
+	%DEPLIBS%)
+	  alldeplibs=yes
+	  continue
+	  ;;
+	esac # case $deplib
+	if test "$found" = yes || test -f "$lib"; then :
+	else
+	  $echo "$modename: cannot find the library \`$lib' or unhandled argument \`$deplib'" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	# Check to see that this really is a libtool archive.
+	if (${SED} -e '2q' $lib | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+	else
+	  $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'`
+	test "X$ladir" = "X$lib" && ladir="."
+
+	dlname=
+	dlopen=
+	dlpreopen=
+	libdir=
+	library_names=
+	old_library=
+	# If the library was installed with an old release of libtool,
+	# it will not redefine variables installed, or shouldnotlink
+	installed=yes
+	shouldnotlink=no
+	avoidtemprpath=
+
+
+	# Read the .la file
+	case $lib in
+	*/* | *\\*) . $lib ;;
+	*) . ./$lib ;;
+	esac
+
+	if test "$linkmode,$pass" = "lib,link" ||
+	   test "$linkmode,$pass" = "prog,scan" ||
+	   { test "$linkmode" != prog && test "$linkmode" != lib; }; then
+	  test -n "$dlopen" && dlfiles="$dlfiles $dlopen"
+	  test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen"
+	fi
+
+	if test "$pass" = conv; then
+	  # Only check for convenience libraries
+	  deplibs="$lib $deplibs"
+	  if test -z "$libdir"; then
+	    if test -z "$old_library"; then
+	      $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+	      exit $EXIT_FAILURE
+	    fi
+	    # It is a libtool convenience library, so add in its objects.
+	    convenience="$convenience $ladir/$objdir/$old_library"
+	    old_convenience="$old_convenience $ladir/$objdir/$old_library"
+	    tmp_libs=
+	    for deplib in $dependency_libs; do
+	      deplibs="$deplib $deplibs"
+              if test "X$duplicate_deps" = "Xyes" ; then
+	        case "$tmp_libs " in
+	        *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	        esac
+              fi
+	      tmp_libs="$tmp_libs $deplib"
+	    done
+	  elif test "$linkmode" != prog && test "$linkmode" != lib; then
+	    $echo "$modename: \`$lib' is not a convenience library" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+	  continue
+	fi # $pass = conv
+
+
+	# Get the name of the library we link against.
+	linklib=
+	for l in $old_library $library_names; do
+	  linklib="$l"
+	done
+	if test -z "$linklib"; then
+	  $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	# This library was specified with -dlopen.
+	if test "$pass" = dlopen; then
+	  if test -z "$libdir"; then
+	    $echo "$modename: cannot -dlopen a convenience library: \`$lib'" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+	  if test -z "$dlname" ||
+	     test "$dlopen_support" != yes ||
+	     test "$build_libtool_libs" = no; then
+	    # If there is no dlname, no dlopen support or we're linking
+	    # statically, we need to preload.  We also need to preload any
+	    # dependent libraries so libltdl's deplib preloader doesn't
+	    # bomb out in the load deplibs phase.
+	    dlprefiles="$dlprefiles $lib $dependency_libs"
+	  else
+	    newdlfiles="$newdlfiles $lib"
+	  fi
+	  continue
+	fi # $pass = dlopen
+
+	# We need an absolute path.
+	case $ladir in
+	[\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;;
+	*)
+	  abs_ladir=`cd "$ladir" && pwd`
+	  if test -z "$abs_ladir"; then
+	    $echo "$modename: warning: cannot determine absolute directory name of \`$ladir'" 1>&2
+	    $echo "$modename: passing it literally to the linker, although it might fail" 1>&2
+	    abs_ladir="$ladir"
+	  fi
+	  ;;
+	esac
+	laname=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+
+	# Find the relevant object directory and library name.
+	if test "X$installed" = Xyes; then
+	  if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+	    $echo "$modename: warning: library \`$lib' was moved." 1>&2
+	    dir="$ladir"
+	    absdir="$abs_ladir"
+	    libdir="$abs_ladir"
+	  else
+	    dir="$libdir"
+	    absdir="$libdir"
+	  fi
+	  test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes
+	else
+	  if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+	    dir="$ladir"
+	    absdir="$abs_ladir"
+	    # Remove this search path later
+	    notinst_path="$notinst_path $abs_ladir"
+	  else
+	    dir="$ladir/$objdir"
+	    absdir="$abs_ladir/$objdir"
+	    # Remove this search path later
+	    notinst_path="$notinst_path $abs_ladir"
+	  fi
+	fi # $installed = yes
+	name=`$echo "X$laname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+
+	# This library was specified with -dlpreopen.
+	if test "$pass" = dlpreopen; then
+	  if test -z "$libdir"; then
+	    $echo "$modename: cannot -dlpreopen a convenience library: \`$lib'" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+	  # Prefer using a static library (so that no silly _DYNAMIC symbols
+	  # are required to link).
+	  if test -n "$old_library"; then
+	    newdlprefiles="$newdlprefiles $dir/$old_library"
+	  # Otherwise, use the dlname, so that lt_dlopen finds it.
+	  elif test -n "$dlname"; then
+	    newdlprefiles="$newdlprefiles $dir/$dlname"
+	  else
+	    newdlprefiles="$newdlprefiles $dir/$linklib"
+	  fi
+	fi # $pass = dlpreopen
+
+	if test -z "$libdir"; then
+	  # Link the convenience library
+	  if test "$linkmode" = lib; then
+	    deplibs="$dir/$old_library $deplibs"
+	  elif test "$linkmode,$pass" = "prog,link"; then
+	    compile_deplibs="$dir/$old_library $compile_deplibs"
+	    finalize_deplibs="$dir/$old_library $finalize_deplibs"
+	  else
+	    deplibs="$lib $deplibs" # used for prog,scan pass
+	  fi
+	  continue
+	fi
+
+
+	if test "$linkmode" = prog && test "$pass" != link; then
+	  newlib_search_path="$newlib_search_path $ladir"
+	  deplibs="$lib $deplibs"
+
+	  linkalldeplibs=no
+	  if test "$link_all_deplibs" != no || test -z "$library_names" ||
+	     test "$build_libtool_libs" = no; then
+	    linkalldeplibs=yes
+	  fi
+
+	  tmp_libs=
+	  for deplib in $dependency_libs; do
+	    case $deplib in
+	    -L*) newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`;; ### testsuite: skip nested quoting test
+	    esac
+	    # Need to link against all dependency_libs?
+	    if test "$linkalldeplibs" = yes; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      # Need to hardcode shared library paths
+	      # or/and link against static libraries
+	      newdependency_libs="$deplib $newdependency_libs"
+	    fi
+	    if test "X$duplicate_deps" = "Xyes" ; then
+	      case "$tmp_libs " in
+	      *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	      esac
+	    fi
+	    tmp_libs="$tmp_libs $deplib"
+	  done # for deplib
+	  continue
+	fi # $linkmode = prog...
+
+	if test "$linkmode,$pass" = "prog,link"; then
+	  if test -n "$library_names" &&
+	     { test "$prefer_static_libs" = no || test -z "$old_library"; }; then
+	    # We need to hardcode the library path
+	    if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then
+	      # Make sure the rpath contains only unique directories.
+	      case "$temp_rpath " in
+	      *" $dir "*) ;;
+	      *" $absdir "*) ;;
+	      *) temp_rpath="$temp_rpath $absdir" ;;
+	      esac
+	    fi
+
+	    # Hardcode the library path.
+	    # Skip directories that are in the system default run-time
+	    # search path.
+	    case " $sys_lib_dlsearch_path " in
+	    *" $absdir "*) ;;
+	    *)
+	      case "$compile_rpath " in
+	      *" $absdir "*) ;;
+	      *) compile_rpath="$compile_rpath $absdir"
+	      esac
+	      ;;
+	    esac
+	    case " $sys_lib_dlsearch_path " in
+	    *" $libdir "*) ;;
+	    *)
+	      case "$finalize_rpath " in
+	      *" $libdir "*) ;;
+	      *) finalize_rpath="$finalize_rpath $libdir"
+	      esac
+	      ;;
+	    esac
+	  fi # $linkmode,$pass = prog,link...
+
+	  if test "$alldeplibs" = yes &&
+	     { test "$deplibs_check_method" = pass_all ||
+	       { test "$build_libtool_libs" = yes &&
+		 test -n "$library_names"; }; }; then
+	    # We only need to search for static libraries
+	    continue
+	  fi
+	fi
+
+	link_static=no # Whether the deplib will be linked statically
+	use_static_libs=$prefer_static_libs
+	if test "$use_static_libs" = built && test "$installed" = yes ; then
+	  use_static_libs=no
+	fi
+	if test -n "$library_names" &&
+	   { test "$use_static_libs" = no || test -z "$old_library"; }; then
+	  if test "$installed" = no; then
+	    notinst_deplibs="$notinst_deplibs $lib"
+	    need_relink=yes
+	  fi
+	  # This is a shared library
+
+	  # Warn about portability, can't link against -module's on
+	  # some systems (darwin)
+	  if test "$shouldnotlink" = yes && test "$pass" = link ; then
+	    $echo
+	    if test "$linkmode" = prog; then
+	      $echo "*** Warning: Linking the executable $output against the loadable module"
+	    else
+	      $echo "*** Warning: Linking the shared library $output against the loadable module"
+	    fi
+	    $echo "*** $linklib is not portable!"
+	  fi
+	  if test "$linkmode" = lib &&
+	     test "$hardcode_into_libs" = yes; then
+	    # Hardcode the library path.
+	    # Skip directories that are in the system default run-time
+	    # search path.
+	    case " $sys_lib_dlsearch_path " in
+	    *" $absdir "*) ;;
+	    *)
+	      case "$compile_rpath " in
+	      *" $absdir "*) ;;
+	      *) compile_rpath="$compile_rpath $absdir"
+	      esac
+	      ;;
+	    esac
+	    case " $sys_lib_dlsearch_path " in
+	    *" $libdir "*) ;;
+	    *)
+	      case "$finalize_rpath " in
+	      *" $libdir "*) ;;
+	      *) finalize_rpath="$finalize_rpath $libdir"
+	      esac
+	      ;;
+	    esac
+	  fi
+
+	  if test -n "$old_archive_from_expsyms_cmds"; then
+	    # figure out the soname
+	    set dummy $library_names
+	    realname="$2"
+	    shift; shift
+	    libname=`eval \\$echo \"$libname_spec\"`
+	    # use dlname if we got it. it's perfectly good, no?
+	    if test -n "$dlname"; then
+	      soname="$dlname"
+	    elif test -n "$soname_spec"; then
+	      # bleh windows
+	      case $host in
+	      *cygwin* | mingw*)
+		major=`expr $current - $age`
+		versuffix="-$major"
+		;;
+	      esac
+	      eval soname=\"$soname_spec\"
+	    else
+	      soname="$realname"
+	    fi
+
+	    # Make a new name for the extract_expsyms_cmds to use
+	    soroot="$soname"
+	    soname=`$echo $soroot | ${SED} -e 's/^.*\///'`
+	    newlib="libimp-`$echo $soname | ${SED} 's/^lib//;s/\.dll$//'`.a"
+
+	    # If the library has no export list, then create one now
+	    if test -f "$output_objdir/$soname-def"; then :
+	    else
+	      $show "extracting exported symbol list from \`$soname'"
+	      save_ifs="$IFS"; IFS='~'
+	      cmds=$extract_expsyms_cmds
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		eval cmd=\"$cmd\"
+		$show "$cmd"
+		$run eval "$cmd" || exit $?
+	      done
+	      IFS="$save_ifs"
+	    fi
+
+	    # Create $newlib
+	    if test -f "$output_objdir/$newlib"; then :; else
+	      $show "generating import library for \`$soname'"
+	      save_ifs="$IFS"; IFS='~'
+	      cmds=$old_archive_from_expsyms_cmds
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		eval cmd=\"$cmd\"
+		$show "$cmd"
+		$run eval "$cmd" || exit $?
+	      done
+	      IFS="$save_ifs"
+	    fi
+	    # make sure the library variables are pointing to the new library
+	    dir=$output_objdir
+	    linklib=$newlib
+	  fi # test -n "$old_archive_from_expsyms_cmds"
+
+	  if test "$linkmode" = prog || test "$mode" != relink; then
+	    add_shlibpath=
+	    add_dir=
+	    add=
+	    lib_linked=yes
+	    case $hardcode_action in
+	    immediate | unsupported)
+	      if test "$hardcode_direct" = no; then
+		add="$dir/$linklib"
+		case $host in
+		  *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;;
+		  *-*-sysv4*uw2*) add_dir="-L$dir" ;;
+		  *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \
+		    *-*-unixware7*) add_dir="-L$dir" ;;
+		  *-*-darwin* )
+		    # if the lib is a module then we can not link against
+		    # it, someone is ignoring the new warnings I added
+		    if /usr/bin/file -L $add 2> /dev/null |
+                      $EGREP ": [^:]* bundle" >/dev/null ; then
+		      $echo "** Warning, lib $linklib is a module, not a shared library"
+		      if test -z "$old_library" ; then
+		        $echo
+		        $echo "** And there doesn't seem to be a static archive available"
+		        $echo "** The link will probably fail, sorry"
+		      else
+		        add="$dir/$old_library"
+		      fi
+		    fi
+		esac
+	      elif test "$hardcode_minus_L" = no; then
+		case $host in
+		*-*-sunos*) add_shlibpath="$dir" ;;
+		esac
+		add_dir="-L$dir"
+		add="-l$name"
+	      elif test "$hardcode_shlibpath_var" = no; then
+		add_shlibpath="$dir"
+		add="-l$name"
+	      else
+		lib_linked=no
+	      fi
+	      ;;
+	    relink)
+	      if test "$hardcode_direct" = yes; then
+		add="$dir/$linklib"
+	      elif test "$hardcode_minus_L" = yes; then
+		add_dir="-L$dir"
+		# Try looking first in the location we're being installed to.
+		if test -n "$inst_prefix_dir"; then
+		  case $libdir in
+		    [\\/]*)
+		      add_dir="$add_dir -L$inst_prefix_dir$libdir"
+		      ;;
+		  esac
+		fi
+		add="-l$name"
+	      elif test "$hardcode_shlibpath_var" = yes; then
+		add_shlibpath="$dir"
+		add="-l$name"
+	      else
+		lib_linked=no
+	      fi
+	      ;;
+	    *) lib_linked=no ;;
+	    esac
+
+	    if test "$lib_linked" != yes; then
+	      $echo "$modename: configuration error: unsupported hardcode properties"
+	      exit $EXIT_FAILURE
+	    fi
+
+	    if test -n "$add_shlibpath"; then
+	      case :$compile_shlibpath: in
+	      *":$add_shlibpath:"*) ;;
+	      *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;;
+	      esac
+	    fi
+	    if test "$linkmode" = prog; then
+	      test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs"
+	      test -n "$add" && compile_deplibs="$add $compile_deplibs"
+	    else
+	      test -n "$add_dir" && deplibs="$add_dir $deplibs"
+	      test -n "$add" && deplibs="$add $deplibs"
+	      if test "$hardcode_direct" != yes && \
+		 test "$hardcode_minus_L" != yes && \
+		 test "$hardcode_shlibpath_var" = yes; then
+		case :$finalize_shlibpath: in
+		*":$libdir:"*) ;;
+		*) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+		esac
+	      fi
+	    fi
+	  fi
+
+	  if test "$linkmode" = prog || test "$mode" = relink; then
+	    add_shlibpath=
+	    add_dir=
+	    add=
+	    # Finalize command for both is simple: just hardcode it.
+	    if test "$hardcode_direct" = yes; then
+	      add="$libdir/$linklib"
+	    elif test "$hardcode_minus_L" = yes; then
+	      add_dir="-L$libdir"
+	      add="-l$name"
+	    elif test "$hardcode_shlibpath_var" = yes; then
+	      case :$finalize_shlibpath: in
+	      *":$libdir:"*) ;;
+	      *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+	      esac
+	      add="-l$name"
+	    elif test "$hardcode_automatic" = yes; then
+	      if test -n "$inst_prefix_dir" &&
+		 test -f "$inst_prefix_dir$libdir/$linklib" ; then
+	        add="$inst_prefix_dir$libdir/$linklib"
+	      else
+	        add="$libdir/$linklib"
+	      fi
+	    else
+	      # We cannot seem to hardcode it, guess we'll fake it.
+	      add_dir="-L$libdir"
+	      # Try looking first in the location we're being installed to.
+	      if test -n "$inst_prefix_dir"; then
+		case $libdir in
+		  [\\/]*)
+		    add_dir="$add_dir -L$inst_prefix_dir$libdir"
+		    ;;
+		esac
+	      fi
+	      add="-l$name"
+	    fi
+
+	    if test "$linkmode" = prog; then
+	      test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs"
+	      test -n "$add" && finalize_deplibs="$add $finalize_deplibs"
+	    else
+	      test -n "$add_dir" && deplibs="$add_dir $deplibs"
+	      test -n "$add" && deplibs="$add $deplibs"
+	    fi
+	  fi
+	elif test "$linkmode" = prog; then
+	  # Here we assume that one of hardcode_direct or hardcode_minus_L
+	  # is not unsupported.  This is valid on all known static and
+	  # shared platforms.
+	  if test "$hardcode_direct" != unsupported; then
+	    test -n "$old_library" && linklib="$old_library"
+	    compile_deplibs="$dir/$linklib $compile_deplibs"
+	    finalize_deplibs="$dir/$linklib $finalize_deplibs"
+	  else
+	    compile_deplibs="-l$name -L$dir $compile_deplibs"
+	    finalize_deplibs="-l$name -L$dir $finalize_deplibs"
+	  fi
+	elif test "$build_libtool_libs" = yes; then
+	  # Not a shared library
+	  if test "$deplibs_check_method" != pass_all; then
+	    # We're trying link a shared library against a static one
+	    # but the system doesn't support it.
+
+	    # Just print a warning and add the library to dependency_libs so
+	    # that the program can be linked against the static library.
+	    $echo
+	    $echo "*** Warning: This system can not link to static lib archive $lib."
+	    $echo "*** I have the capability to make that library automatically link in when"
+	    $echo "*** you link to this library.  But I can only do this if you have a"
+	    $echo "*** shared version of the library, which you do not appear to have."
+	    if test "$module" = yes; then
+	      $echo "*** But as you try to build a module library, libtool will still create "
+	      $echo "*** a static module, that should work as long as the dlopening application"
+	      $echo "*** is linked with the -dlopen flag to resolve symbols at runtime."
+	      if test -z "$global_symbol_pipe"; then
+		$echo
+		$echo "*** However, this would only work if libtool was able to extract symbol"
+		$echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+		$echo "*** not find such a program.  So, this module is probably useless."
+		$echo "*** \`nm' from GNU binutils and a full rebuild may help."
+	      fi
+	      if test "$build_old_libs" = no; then
+		build_libtool_libs=module
+		build_old_libs=yes
+	      else
+		build_libtool_libs=no
+	      fi
+	    fi
+	  else
+	    deplibs="$dir/$old_library $deplibs"
+	    link_static=yes
+	  fi
+	fi # link shared/static library?
+
+	if test "$linkmode" = lib; then
+	  if test -n "$dependency_libs" &&
+	     { test "$hardcode_into_libs" != yes ||
+	       test "$build_old_libs" = yes ||
+	       test "$link_static" = yes; }; then
+	    # Extract -R from dependency_libs
+	    temp_deplibs=
+	    for libdir in $dependency_libs; do
+	      case $libdir in
+	      -R*) temp_xrpath=`$echo "X$libdir" | $Xsed -e 's/^-R//'`
+		   case " $xrpath " in
+		   *" $temp_xrpath "*) ;;
+		   *) xrpath="$xrpath $temp_xrpath";;
+		   esac;;
+	      *) temp_deplibs="$temp_deplibs $libdir";;
+	      esac
+	    done
+	    dependency_libs="$temp_deplibs"
+	  fi
+
+	  newlib_search_path="$newlib_search_path $absdir"
+	  # Link against this library
+	  test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs"
+	  # ... and its dependency_libs
+	  tmp_libs=
+	  for deplib in $dependency_libs; do
+	    newdependency_libs="$deplib $newdependency_libs"
+	    if test "X$duplicate_deps" = "Xyes" ; then
+	      case "$tmp_libs " in
+	      *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	      esac
+	    fi
+	    tmp_libs="$tmp_libs $deplib"
+	  done
+
+	  if test "$link_all_deplibs" != no; then
+	    # Add the search paths of all dependency libraries
+	    for deplib in $dependency_libs; do
+	      case $deplib in
+	      -L*) path="$deplib" ;;
+	      *.la)
+		dir=`$echo "X$deplib" | $Xsed -e 's%/[^/]*$%%'`
+		test "X$dir" = "X$deplib" && dir="."
+		# We need an absolute path.
+		case $dir in
+		[\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;;
+		*)
+		  absdir=`cd "$dir" && pwd`
+		  if test -z "$absdir"; then
+		    $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2
+		    absdir="$dir"
+		  fi
+		  ;;
+		esac
+		if grep "^installed=no" $deplib > /dev/null; then
+		  path="$absdir/$objdir"
+		else
+		  eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+		  if test -z "$libdir"; then
+		    $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+		    exit $EXIT_FAILURE
+		  fi
+		  if test "$absdir" != "$libdir"; then
+		    $echo "$modename: warning: \`$deplib' seems to be moved" 1>&2
+		  fi
+		  path="$absdir"
+		fi
+		depdepl=
+		case $host in
+		*-*-darwin*)
+		  # we do not want to link against static libs,
+		  # but need to link against shared
+		  eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib`
+		  if test -n "$deplibrary_names" ; then
+		    for tmp in $deplibrary_names ; do
+		      depdepl=$tmp
+		    done
+		    if test -f "$path/$depdepl" ; then
+		      depdepl="$path/$depdepl"
+		    fi
+		    # do not add paths which are already there
+		    case " $newlib_search_path " in
+		    *" $path "*) ;;
+		    *) newlib_search_path="$newlib_search_path $path";;
+		    esac
+		  fi
+		  path=""
+		  ;;
+		*)
+		  path="-L$path"
+		  ;;
+		esac
+		;;
+	      -l*)
+		case $host in
+		*-*-darwin*)
+		  # Again, we only want to link against shared libraries
+		  eval tmp_libs=`$echo "X$deplib" | $Xsed -e "s,^\-l,,"`
+		  for tmp in $newlib_search_path ; do
+		    if test -f "$tmp/lib$tmp_libs.dylib" ; then
+		      eval depdepl="$tmp/lib$tmp_libs.dylib"
+		      break
+		    fi
+		  done
+		  path=""
+		  ;;
+		*) continue ;;
+		esac
+		;;
+	      *) continue ;;
+	      esac
+	      case " $deplibs " in
+	      *" $path "*) ;;
+	      *) deplibs="$path $deplibs" ;;
+	      esac
+	      case " $deplibs " in
+	      *" $depdepl "*) ;;
+	      *) deplibs="$depdepl $deplibs" ;;
+	      esac
+	    done
+	  fi # link_all_deplibs != no
+	fi # linkmode = lib
+      done # for deplib in $libs
+      dependency_libs="$newdependency_libs"
+      if test "$pass" = dlpreopen; then
+	# Link the dlpreopened libraries before other libraries
+	for deplib in $save_deplibs; do
+	  deplibs="$deplib $deplibs"
+	done
+      fi
+      if test "$pass" != dlopen; then
+	if test "$pass" != conv; then
+	  # Make sure lib_search_path contains only unique directories.
+	  lib_search_path=
+	  for dir in $newlib_search_path; do
+	    case "$lib_search_path " in
+	    *" $dir "*) ;;
+	    *) lib_search_path="$lib_search_path $dir" ;;
+	    esac
+	  done
+	  newlib_search_path=
+	fi
+
+	if test "$linkmode,$pass" != "prog,link"; then
+	  vars="deplibs"
+	else
+	  vars="compile_deplibs finalize_deplibs"
+	fi
+	for var in $vars dependency_libs; do
+	  # Add libraries to $var in reverse order
+	  eval tmp_libs=\"\$$var\"
+	  new_libs=
+	  for deplib in $tmp_libs; do
+	    # FIXME: Pedantically, this is the right thing to do, so
+	    #        that some nasty dependency loop isn't accidentally
+	    #        broken:
+	    #new_libs="$deplib $new_libs"
+	    # Pragmatically, this seems to cause very few problems in
+	    # practice:
+	    case $deplib in
+	    -L*) new_libs="$deplib $new_libs" ;;
+	    -R*) ;;
+	    *)
+	      # And here is the reason: when a library appears more
+	      # than once as an explicit dependence of a library, or
+	      # is implicitly linked in more than once by the
+	      # compiler, it is considered special, and multiple
+	      # occurrences thereof are not removed.  Compare this
+	      # with having the same library being listed as a
+	      # dependency of multiple other libraries: in this case,
+	      # we know (pedantically, we assume) the library does not
+	      # need to be listed more than once, so we keep only the
+	      # last copy.  This is not always right, but it is rare
+	      # enough that we require users that really mean to play
+	      # such unportable linking tricks to link the library
+	      # using -Wl,-lname, so that libtool does not consider it
+	      # for duplicate removal.
+	      case " $specialdeplibs " in
+	      *" $deplib "*) new_libs="$deplib $new_libs" ;;
+	      *)
+		case " $new_libs " in
+		*" $deplib "*) ;;
+		*) new_libs="$deplib $new_libs" ;;
+		esac
+		;;
+	      esac
+	      ;;
+	    esac
+	  done
+	  tmp_libs=
+	  for deplib in $new_libs; do
+	    case $deplib in
+	    -L*)
+	      case " $tmp_libs " in
+	      *" $deplib "*) ;;
+	      *) tmp_libs="$tmp_libs $deplib" ;;
+	      esac
+	      ;;
+	    *) tmp_libs="$tmp_libs $deplib" ;;
+	    esac
+	  done
+	  eval $var=\"$tmp_libs\"
+	done # for var
+      fi
+      # Last step: remove runtime libs from dependency_libs
+      # (they stay in deplibs)
+      tmp_libs=
+      for i in $dependency_libs ; do
+	case " $predeps $postdeps $compiler_lib_search_path " in
+	*" $i "*)
+	  i=""
+	  ;;
+	esac
+	if test -n "$i" ; then
+	  tmp_libs="$tmp_libs $i"
+	fi
+      done
+      dependency_libs=$tmp_libs
+    done # for pass
+    if test "$linkmode" = prog; then
+      dlfiles="$newdlfiles"
+      dlprefiles="$newdlprefiles"
+    fi
+
+    case $linkmode in
+    oldlib)
+      if test -n "$deplibs"; then
+	$echo "$modename: warning: \`-l' and \`-L' are ignored for archives" 1>&2
+      fi
+
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	$echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$rpath"; then
+	$echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$xrpath"; then
+	$echo "$modename: warning: \`-R' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$vinfo"; then
+	$echo "$modename: warning: \`-version-info/-version-number' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$release"; then
+	$echo "$modename: warning: \`-release' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+	$echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2
+      fi
+
+      # Now set the variables for building old libraries.
+      build_libtool_libs=no
+      oldlibs="$output"
+      objs="$objs$old_deplibs"
+      ;;
+
+    lib)
+      # Make sure we only generate libraries of the form `libNAME.la'.
+      case $outputname in
+      lib*)
+	name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+	eval shared_ext=\"$shrext_cmds\"
+	eval libname=\"$libname_spec\"
+	;;
+      *)
+	if test "$module" = no; then
+	  $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+	if test "$need_lib_prefix" != no; then
+	  # Add the "lib" prefix for modules if required
+	  name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+	  eval shared_ext=\"$shrext_cmds\"
+	  eval libname=\"$libname_spec\"
+	else
+	  libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+	fi
+	;;
+      esac
+
+      if test -n "$objs"; then
+	if test "$deplibs_check_method" != pass_all; then
+	  $echo "$modename: cannot build libtool library \`$output' from non-libtool objects on this host:$objs" 2>&1
+	  exit $EXIT_FAILURE
+	else
+	  $echo
+	  $echo "*** Warning: Linking the shared library $output against the non-libtool"
+	  $echo "*** objects $objs is not portable!"
+	  libobjs="$libobjs $objs"
+	fi
+      fi
+
+      if test "$dlself" != no; then
+	$echo "$modename: warning: \`-dlopen self' is ignored for libtool libraries" 1>&2
+      fi
+
+      set dummy $rpath
+      if test "$#" -gt 2; then
+	$echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2
+      fi
+      install_libdir="$2"
+
+      oldlibs=
+      if test -z "$rpath"; then
+	if test "$build_libtool_libs" = yes; then
+	  # Building a libtool convenience library.
+	  # Some compilers have problems with a `.al' extension so
+	  # convenience libraries should have the same extension an
+	  # archive normally would.
+	  oldlibs="$output_objdir/$libname.$libext $oldlibs"
+	  build_libtool_libs=convenience
+	  build_old_libs=yes
+	fi
+
+	if test -n "$vinfo"; then
+	  $echo "$modename: warning: \`-version-info/-version-number' is ignored for convenience libraries" 1>&2
+	fi
+
+	if test -n "$release"; then
+	  $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2
+	fi
+      else
+
+	# Parse the version information argument.
+	save_ifs="$IFS"; IFS=':'
+	set dummy $vinfo 0 0 0
+	IFS="$save_ifs"
+
+	if test -n "$8"; then
+	  $echo "$modename: too many parameters to \`-version-info'" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	# convert absolute version numbers to libtool ages
+	# this retains compatibility with .la files and attempts
+	# to make the code below a bit more comprehensible
+
+	case $vinfo_number in
+	yes)
+	  number_major="$2"
+	  number_minor="$3"
+	  number_revision="$4"
+	  #
+	  # There are really only two kinds -- those that
+	  # use the current revision as the major version
+	  # and those that subtract age and use age as
+	  # a minor version.  But, then there is irix
+	  # which has an extra 1 added just for fun
+	  #
+	  case $version_type in
+	  darwin|linux|osf|windows)
+	    current=`expr $number_major + $number_minor`
+	    age="$number_minor"
+	    revision="$number_revision"
+	    ;;
+	  freebsd-aout|freebsd-elf|sunos)
+	    current="$number_major"
+	    revision="$number_minor"
+	    age="0"
+	    ;;
+	  irix|nonstopux)
+	    current=`expr $number_major + $number_minor - 1`
+	    age="$number_minor"
+	    revision="$number_minor"
+	    ;;
+	  esac
+	  ;;
+	no)
+	  current="$2"
+	  revision="$3"
+	  age="$4"
+	  ;;
+	esac
+
+	# Check that each of the things are valid numbers.
+	case $current in
+	0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+	*)
+	  $echo "$modename: CURRENT \`$current' must be a nonnegative integer" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+
+	case $revision in
+	0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+	*)
+	  $echo "$modename: REVISION \`$revision' must be a nonnegative integer" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+
+	case $age in
+	0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+	*)
+	  $echo "$modename: AGE \`$age' must be a nonnegative integer" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+
+	if test "$age" -gt "$current"; then
+	  $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	# Calculate the version variables.
+	major=
+	versuffix=
+	verstring=
+	case $version_type in
+	none) ;;
+
+	darwin)
+	  # Like Linux, but with the current version available in
+	  # verstring for coding it into the library header
+	  major=.`expr $current - $age`
+	  versuffix="$major.$age.$revision"
+	  # Darwin ld doesn't like 0 for these options...
+	  minor_current=`expr $current + 1`
+	  verstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision"
+	  ;;
+
+	freebsd-aout)
+	  major=".$current"
+	  versuffix=".$current.$revision";
+	  ;;
+
+	freebsd-elf)
+	  major=".$current"
+	  versuffix=".$current";
+	  ;;
+
+	irix | nonstopux)
+	  major=`expr $current - $age + 1`
+
+	  case $version_type in
+	    nonstopux) verstring_prefix=nonstopux ;;
+	    *)         verstring_prefix=sgi ;;
+	  esac
+	  verstring="$verstring_prefix$major.$revision"
+
+	  # Add in all the interfaces that we are compatible with.
+	  loop=$revision
+	  while test "$loop" -ne 0; do
+	    iface=`expr $revision - $loop`
+	    loop=`expr $loop - 1`
+	    verstring="$verstring_prefix$major.$iface:$verstring"
+	  done
+
+	  # Before this point, $major must not contain `.'.
+	  major=.$major
+	  versuffix="$major.$revision"
+	  ;;
+
+	linux)
+	  major=.`expr $current - $age`
+	  versuffix="$major.$age.$revision"
+	  ;;
+
+	osf)
+	  major=.`expr $current - $age`
+	  versuffix=".$current.$age.$revision"
+	  verstring="$current.$age.$revision"
+
+	  # Add in all the interfaces that we are compatible with.
+	  loop=$age
+	  while test "$loop" -ne 0; do
+	    iface=`expr $current - $loop`
+	    loop=`expr $loop - 1`
+	    verstring="$verstring:${iface}.0"
+	  done
+
+	  # Make executables depend on our current version.
+	  verstring="$verstring:${current}.0"
+	  ;;
+
+	sunos)
+	  major=".$current"
+	  versuffix=".$current.$revision"
+	  ;;
+
+	windows)
+	  # Use '-' rather than '.', since we only want one
+	  # extension on DOS 8.3 filesystems.
+	  major=`expr $current - $age`
+	  versuffix="-$major"
+	  ;;
+
+	*)
+	  $echo "$modename: unknown library version type \`$version_type'" 1>&2
+	  $echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+
+	# Clear the version info if we defaulted, and they specified a release.
+	if test -z "$vinfo" && test -n "$release"; then
+	  major=
+	  case $version_type in
+	  darwin)
+	    # we can't check for "0.0" in archive_cmds due to quoting
+	    # problems, so we reset it completely
+	    verstring=
+	    ;;
+	  *)
+	    verstring="0.0"
+	    ;;
+	  esac
+	  if test "$need_version" = no; then
+	    versuffix=
+	  else
+	    versuffix=".0.0"
+	  fi
+	fi
+
+	# Remove version info from name if versioning should be avoided
+	if test "$avoid_version" = yes && test "$need_version" = no; then
+	  major=
+	  versuffix=
+	  verstring=""
+	fi
+
+	# Check to see if the archive will have undefined symbols.
+	if test "$allow_undefined" = yes; then
+	  if test "$allow_undefined_flag" = unsupported; then
+	    $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2
+	    build_libtool_libs=no
+	    build_old_libs=yes
+	  fi
+	else
+	  # Don't allow undefined symbols.
+	  allow_undefined_flag="$no_undefined_flag"
+	fi
+      fi
+
+      if test "$mode" != relink; then
+	# Remove our outputs, but don't remove object files since they
+	# may have been created when compiling PIC objects.
+	removelist=
+	tempremovelist=`$echo "$output_objdir/*"`
+	for p in $tempremovelist; do
+	  case $p in
+	    *.$objext)
+	       ;;
+	    $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*)
+	       if test "X$precious_files_regex" != "X"; then
+	         if echo $p | $EGREP -e "$precious_files_regex" >/dev/null 2>&1
+	         then
+		   continue
+		 fi
+	       fi
+	       removelist="$removelist $p"
+	       ;;
+	    *) ;;
+	  esac
+	done
+	if test -n "$removelist"; then
+	  $show "${rm}r $removelist"
+	  $run ${rm}r $removelist
+	fi
+      fi
+
+      # Now set the variables for building old libraries.
+      if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then
+	oldlibs="$oldlibs $output_objdir/$libname.$libext"
+
+	# Transform .lo files to .o files.
+	oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP`
+      fi
+
+      # Eliminate all temporary directories.
+      for path in $notinst_path; do
+	lib_search_path=`$echo "$lib_search_path " | ${SED} -e "s% $path % %g"`
+	deplibs=`$echo "$deplibs " | ${SED} -e "s% -L$path % %g"`
+	dependency_libs=`$echo "$dependency_libs " | ${SED} -e "s% -L$path % %g"`
+      done
+
+      if test -n "$xrpath"; then
+	# If the user specified any rpath flags, then add them.
+	temp_xrpath=
+	for libdir in $xrpath; do
+	  temp_xrpath="$temp_xrpath -R$libdir"
+	  case "$finalize_rpath " in
+	  *" $libdir "*) ;;
+	  *) finalize_rpath="$finalize_rpath $libdir" ;;
+	  esac
+	done
+	if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then
+	  dependency_libs="$temp_xrpath $dependency_libs"
+	fi
+      fi
+
+      # Make sure dlfiles contains only unique files that won't be dlpreopened
+      old_dlfiles="$dlfiles"
+      dlfiles=
+      for lib in $old_dlfiles; do
+	case " $dlprefiles $dlfiles " in
+	*" $lib "*) ;;
+	*) dlfiles="$dlfiles $lib" ;;
+	esac
+      done
+
+      # Make sure dlprefiles contains only unique files
+      old_dlprefiles="$dlprefiles"
+      dlprefiles=
+      for lib in $old_dlprefiles; do
+	case "$dlprefiles " in
+	*" $lib "*) ;;
+	*) dlprefiles="$dlprefiles $lib" ;;
+	esac
+      done
+
+      if test "$build_libtool_libs" = yes; then
+	if test -n "$rpath"; then
+	  case $host in
+	  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos*)
+	    # these systems don't actually have a c library (as such)!
+	    ;;
+	  *-*-rhapsody* | *-*-darwin1.[012])
+	    # Rhapsody C library is in the System framework
+	    deplibs="$deplibs -framework System"
+	    ;;
+	  *-*-netbsd*)
+	    # Don't link with libc until the a.out ld.so is fixed.
+	    ;;
+	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	    # Do not include libc due to us having libc/libc_r.
+	    ;;
+	  *-*-sco3.2v5* | *-*-sco5v6*)
+	    # Causes problems with __ctype
+	    ;;
+	  *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
+	    # Compiler inserts libc in the correct place for threads to work
+	    ;;
+ 	  *)
+	    # Add libc to deplibs on all other systems if necessary.
+	    if test "$build_libtool_need_lc" = "yes"; then
+	      deplibs="$deplibs -lc"
+	    fi
+	    ;;
+	  esac
+	fi
+
+	# Transform deplibs into only deplibs that can be linked in shared.
+	name_save=$name
+	libname_save=$libname
+	release_save=$release
+	versuffix_save=$versuffix
+	major_save=$major
+	# I'm not sure if I'm treating the release correctly.  I think
+	# release should show up in the -l (ie -lgmp5) so we don't want to
+	# add it in twice.  Is that correct?
+	release=""
+	versuffix=""
+	major=""
+	newdeplibs=
+	droppeddeps=no
+	case $deplibs_check_method in
+	pass_all)
+	  # Don't check for shared/static.  Everything works.
+	  # This might be a little naive.  We might want to check
+	  # whether the library exists or not.  But this is on
+	  # osf3 & osf4 and I'm not really sure... Just
+	  # implementing what was already the behavior.
+	  newdeplibs=$deplibs
+	  ;;
+	test_compile)
+	  # This code stresses the "libraries are programs" paradigm to its
+	  # limits. Maybe even breaks it.  We compile a program, linking it
+	  # against the deplibs as a proxy for the library.  Then we can check
+	  # whether they linked in statically or dynamically with ldd.
+	  $rm conftest.c
+	  cat > conftest.c <<EOF
+	  int main() { return 0; }
+EOF
+	  $rm conftest
+	  $LTCC $LTCFLAGS -o conftest conftest.c $deplibs
+	  if test "$?" -eq 0 ; then
+	    ldd_output=`ldd conftest`
+	    for i in $deplibs; do
+	      name=`expr $i : '-l\(.*\)'`
+	      # If $name is empty we are operating on a -L argument.
+              if test "$name" != "" && test "$name" -ne "0"; then
+		if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		  case " $predeps $postdeps " in
+		  *" $i "*)
+		    newdeplibs="$newdeplibs $i"
+		    i=""
+		    ;;
+		  esac
+	        fi
+		if test -n "$i" ; then
+		  libname=`eval \\$echo \"$libname_spec\"`
+		  deplib_matches=`eval \\$echo \"$library_names_spec\"`
+		  set dummy $deplib_matches
+		  deplib_match=$2
+		  if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+		    newdeplibs="$newdeplibs $i"
+		  else
+		    droppeddeps=yes
+		    $echo
+		    $echo "*** Warning: dynamic linker does not accept needed library $i."
+		    $echo "*** I have the capability to make that library automatically link in when"
+		    $echo "*** you link to this library.  But I can only do this if you have a"
+		    $echo "*** shared version of the library, which I believe you do not have"
+		    $echo "*** because a test_compile did reveal that the linker did not use it for"
+		    $echo "*** its dynamic dependency list that programs get resolved with at runtime."
+		  fi
+		fi
+	      else
+		newdeplibs="$newdeplibs $i"
+	      fi
+	    done
+	  else
+	    # Error occurred in the first compile.  Let's try to salvage
+	    # the situation: Compile a separate program for each library.
+	    for i in $deplibs; do
+	      name=`expr $i : '-l\(.*\)'`
+	      # If $name is empty we are operating on a -L argument.
+              if test "$name" != "" && test "$name" != "0"; then
+		$rm conftest
+		$LTCC $LTCFLAGS -o conftest conftest.c $i
+		# Did it work?
+		if test "$?" -eq 0 ; then
+		  ldd_output=`ldd conftest`
+		  if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		    case " $predeps $postdeps " in
+		    *" $i "*)
+		      newdeplibs="$newdeplibs $i"
+		      i=""
+		      ;;
+		    esac
+		  fi
+		  if test -n "$i" ; then
+		    libname=`eval \\$echo \"$libname_spec\"`
+		    deplib_matches=`eval \\$echo \"$library_names_spec\"`
+		    set dummy $deplib_matches
+		    deplib_match=$2
+		    if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+		      newdeplibs="$newdeplibs $i"
+		    else
+		      droppeddeps=yes
+		      $echo
+		      $echo "*** Warning: dynamic linker does not accept needed library $i."
+		      $echo "*** I have the capability to make that library automatically link in when"
+		      $echo "*** you link to this library.  But I can only do this if you have a"
+		      $echo "*** shared version of the library, which you do not appear to have"
+		      $echo "*** because a test_compile did reveal that the linker did not use this one"
+		      $echo "*** as a dynamic dependency that programs can get resolved with at runtime."
+		    fi
+		  fi
+		else
+		  droppeddeps=yes
+		  $echo
+		  $echo "*** Warning!  Library $i is needed by this library but I was not able to"
+		  $echo "***  make it link in!  You will probably need to install it or some"
+		  $echo "*** library that it depends on before this library will be fully"
+		  $echo "*** functional.  Installing it before continuing would be even better."
+		fi
+	      else
+		newdeplibs="$newdeplibs $i"
+	      fi
+	    done
+	  fi
+	  ;;
+	file_magic*)
+	  set dummy $deplibs_check_method
+	  file_magic_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+	  for a_deplib in $deplibs; do
+	    name=`expr $a_deplib : '-l\(.*\)'`
+	    # If $name is empty we are operating on a -L argument.
+            if test "$name" != "" && test  "$name" != "0"; then
+	      if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		case " $predeps $postdeps " in
+		*" $a_deplib "*)
+		  newdeplibs="$newdeplibs $a_deplib"
+		  a_deplib=""
+		  ;;
+		esac
+	      fi
+	      if test -n "$a_deplib" ; then
+		libname=`eval \\$echo \"$libname_spec\"`
+		for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+		  potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+		  for potent_lib in $potential_libs; do
+		      # Follow soft links.
+		      if ls -lLd "$potent_lib" 2>/dev/null \
+			 | grep " -> " >/dev/null; then
+			continue
+		      fi
+		      # The statement above tries to avoid entering an
+		      # endless loop below, in case of cyclic links.
+		      # We might still enter an endless loop, since a link
+		      # loop can be closed while we follow links,
+		      # but so what?
+		      potlib="$potent_lib"
+		      while test -h "$potlib" 2>/dev/null; do
+			potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'`
+			case $potliblink in
+			[\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";;
+			*) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";;
+			esac
+		      done
+		      if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \
+			 | ${SED} 10q \
+			 | $EGREP "$file_magic_regex" > /dev/null; then
+			newdeplibs="$newdeplibs $a_deplib"
+			a_deplib=""
+			break 2
+		      fi
+		  done
+		done
+	      fi
+	      if test -n "$a_deplib" ; then
+		droppeddeps=yes
+		$echo
+		$echo "*** Warning: linker path does not have real file for library $a_deplib."
+		$echo "*** I have the capability to make that library automatically link in when"
+		$echo "*** you link to this library.  But I can only do this if you have a"
+		$echo "*** shared version of the library, which you do not appear to have"
+		$echo "*** because I did check the linker path looking for a file starting"
+		if test -z "$potlib" ; then
+		  $echo "*** with $libname but no candidates were found. (...for file magic test)"
+		else
+		  $echo "*** with $libname and none of the candidates passed a file format test"
+		  $echo "*** using a file magic. Last file checked: $potlib"
+		fi
+	      fi
+	    else
+	      # Add a -L argument.
+	      newdeplibs="$newdeplibs $a_deplib"
+	    fi
+	  done # Gone through all deplibs.
+	  ;;
+	match_pattern*)
+	  set dummy $deplibs_check_method
+	  match_pattern_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+	  for a_deplib in $deplibs; do
+	    name=`expr $a_deplib : '-l\(.*\)'`
+	    # If $name is empty we are operating on a -L argument.
+	    if test -n "$name" && test "$name" != "0"; then
+	      if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		case " $predeps $postdeps " in
+		*" $a_deplib "*)
+		  newdeplibs="$newdeplibs $a_deplib"
+		  a_deplib=""
+		  ;;
+		esac
+	      fi
+	      if test -n "$a_deplib" ; then
+		libname=`eval \\$echo \"$libname_spec\"`
+		for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+		  potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+		  for potent_lib in $potential_libs; do
+		    potlib="$potent_lib" # see symlink-check above in file_magic test
+		    if eval $echo \"$potent_lib\" 2>/dev/null \
+		        | ${SED} 10q \
+		        | $EGREP "$match_pattern_regex" > /dev/null; then
+		      newdeplibs="$newdeplibs $a_deplib"
+		      a_deplib=""
+		      break 2
+		    fi
+		  done
+		done
+	      fi
+	      if test -n "$a_deplib" ; then
+		droppeddeps=yes
+		$echo
+		$echo "*** Warning: linker path does not have real file for library $a_deplib."
+		$echo "*** I have the capability to make that library automatically link in when"
+		$echo "*** you link to this library.  But I can only do this if you have a"
+		$echo "*** shared version of the library, which you do not appear to have"
+		$echo "*** because I did check the linker path looking for a file starting"
+		if test -z "$potlib" ; then
+		  $echo "*** with $libname but no candidates were found. (...for regex pattern test)"
+		else
+		  $echo "*** with $libname and none of the candidates passed a file format test"
+		  $echo "*** using a regex pattern. Last file checked: $potlib"
+		fi
+	      fi
+	    else
+	      # Add a -L argument.
+	      newdeplibs="$newdeplibs $a_deplib"
+	    fi
+	  done # Gone through all deplibs.
+	  ;;
+	none | unknown | *)
+	  newdeplibs=""
+	  tmp_deplibs=`$echo "X $deplibs" | $Xsed -e 's/ -lc$//' \
+	    -e 's/ -[LR][^ ]*//g'`
+	  if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+	    for i in $predeps $postdeps ; do
+	      # can't use Xsed below, because $i might contain '/'
+	      tmp_deplibs=`$echo "X $tmp_deplibs" | ${SED} -e "1s,^X,," -e "s,$i,,"`
+	    done
+	  fi
+	  if $echo "X $tmp_deplibs" | $Xsed -e 's/[ 	]//g' \
+	    | grep . >/dev/null; then
+	    $echo
+	    if test "X$deplibs_check_method" = "Xnone"; then
+	      $echo "*** Warning: inter-library dependencies are not supported in this platform."
+	    else
+	      $echo "*** Warning: inter-library dependencies are not known to be supported."
+	    fi
+	    $echo "*** All declared inter-library dependencies are being dropped."
+	    droppeddeps=yes
+	  fi
+	  ;;
+	esac
+	versuffix=$versuffix_save
+	major=$major_save
+	release=$release_save
+	libname=$libname_save
+	name=$name_save
+
+	case $host in
+	*-*-rhapsody* | *-*-darwin1.[012])
+	  # On Rhapsody replace the C library is the System framework
+	  newdeplibs=`$echo "X $newdeplibs" | $Xsed -e 's/ -lc / -framework System /'`
+	  ;;
+	esac
+
+	if test "$droppeddeps" = yes; then
+	  if test "$module" = yes; then
+	    $echo
+	    $echo "*** Warning: libtool could not satisfy all declared inter-library"
+	    $echo "*** dependencies of module $libname.  Therefore, libtool will create"
+	    $echo "*** a static module, that should work as long as the dlopening"
+	    $echo "*** application is linked with the -dlopen flag."
+	    if test -z "$global_symbol_pipe"; then
+	      $echo
+	      $echo "*** However, this would only work if libtool was able to extract symbol"
+	      $echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+	      $echo "*** not find such a program.  So, this module is probably useless."
+	      $echo "*** \`nm' from GNU binutils and a full rebuild may help."
+	    fi
+	    if test "$build_old_libs" = no; then
+	      oldlibs="$output_objdir/$libname.$libext"
+	      build_libtool_libs=module
+	      build_old_libs=yes
+	    else
+	      build_libtool_libs=no
+	    fi
+	  else
+	    $echo "*** The inter-library dependencies that have been dropped here will be"
+	    $echo "*** automatically added whenever a program is linked with this library"
+	    $echo "*** or is declared to -dlopen it."
+
+	    if test "$allow_undefined" = no; then
+	      $echo
+	      $echo "*** Since this library must not contain undefined symbols,"
+	      $echo "*** because either the platform does not support them or"
+	      $echo "*** it was explicitly requested with -no-undefined,"
+	      $echo "*** libtool will only create a static version of it."
+	      if test "$build_old_libs" = no; then
+		oldlibs="$output_objdir/$libname.$libext"
+		build_libtool_libs=module
+		build_old_libs=yes
+	      else
+		build_libtool_libs=no
+	      fi
+	    fi
+	  fi
+	fi
+	# Done checking deplibs!
+	deplibs=$newdeplibs
+      fi
+
+
+      # move library search paths that coincide with paths to not yet
+      # installed libraries to the beginning of the library search list
+      new_libs=
+      for path in $notinst_path; do
+	case " $new_libs " in
+	*" -L$path/$objdir "*) ;;
+	*)
+	  case " $deplibs " in
+	  *" -L$path/$objdir "*)
+	    new_libs="$new_libs -L$path/$objdir" ;;
+	  esac
+	  ;;
+	esac
+      done
+      for deplib in $deplibs; do
+	case $deplib in
+	-L*)
+	  case " $new_libs " in
+	  *" $deplib "*) ;;
+	  *) new_libs="$new_libs $deplib" ;;
+	  esac
+	  ;;
+	*) new_libs="$new_libs $deplib" ;;
+	esac
+      done
+      deplibs="$new_libs"
+
+
+      # All the library-specific variables (install_libdir is set above).
+      library_names=
+      old_library=
+      dlname=
+
+      # Test again, we may have decided not to build it any more
+      if test "$build_libtool_libs" = yes; then
+	if test "$hardcode_into_libs" = yes; then
+	  # Hardcode the library paths
+	  hardcode_libdirs=
+	  dep_rpath=
+	  rpath="$finalize_rpath"
+	  test "$mode" != relink && rpath="$compile_rpath$rpath"
+	  for libdir in $rpath; do
+	    if test -n "$hardcode_libdir_flag_spec"; then
+	      if test -n "$hardcode_libdir_separator"; then
+		if test -z "$hardcode_libdirs"; then
+		  hardcode_libdirs="$libdir"
+		else
+		  # Just accumulate the unique libdirs.
+		  case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+		  *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		    ;;
+		  *)
+		    hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+		    ;;
+		  esac
+		fi
+	      else
+		eval flag=\"$hardcode_libdir_flag_spec\"
+		dep_rpath="$dep_rpath $flag"
+	      fi
+	    elif test -n "$runpath_var"; then
+	      case "$perm_rpath " in
+	      *" $libdir "*) ;;
+	      *) perm_rpath="$perm_rpath $libdir" ;;
+	      esac
+	    fi
+	  done
+	  # Substitute the hardcoded libdirs into the rpath.
+	  if test -n "$hardcode_libdir_separator" &&
+	     test -n "$hardcode_libdirs"; then
+	    libdir="$hardcode_libdirs"
+	    if test -n "$hardcode_libdir_flag_spec_ld"; then
+	      eval dep_rpath=\"$hardcode_libdir_flag_spec_ld\"
+	    else
+	      eval dep_rpath=\"$hardcode_libdir_flag_spec\"
+	    fi
+	  fi
+	  if test -n "$runpath_var" && test -n "$perm_rpath"; then
+	    # We should set the runpath_var.
+	    rpath=
+	    for dir in $perm_rpath; do
+	      rpath="$rpath$dir:"
+	    done
+	    eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var"
+	  fi
+	  test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs"
+	fi
+
+	shlibpath="$finalize_shlibpath"
+	test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath"
+	if test -n "$shlibpath"; then
+	  eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var"
+	fi
+
+	# Get the real and link names of the library.
+	eval shared_ext=\"$shrext_cmds\"
+	eval library_names=\"$library_names_spec\"
+	set dummy $library_names
+	realname="$2"
+	shift; shift
+
+	if test -n "$soname_spec"; then
+	  eval soname=\"$soname_spec\"
+	else
+	  soname="$realname"
+	fi
+	if test -z "$dlname"; then
+	  dlname=$soname
+	fi
+
+	lib="$output_objdir/$realname"
+	linknames=
+	for link
+	do
+	  linknames="$linknames $link"
+	done
+
+	# Use standard objects if they are pic
+	test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+
+	# Prepare the list of exported symbols
+	if test -z "$export_symbols"; then
+	  if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then
+	    $show "generating symbol list for \`$libname.la'"
+	    export_symbols="$output_objdir/$libname.exp"
+	    $run $rm $export_symbols
+	    cmds=$export_symbols_cmds
+	    save_ifs="$IFS"; IFS='~'
+	    for cmd in $cmds; do
+	      IFS="$save_ifs"
+	      eval cmd=\"$cmd\"
+	      if len=`expr "X$cmd" : ".*"` &&
+	       test "$len" -le "$max_cmd_len" || test "$max_cmd_len" -le -1; then
+	        $show "$cmd"
+	        $run eval "$cmd" || exit $?
+	        skipped_export=false
+	      else
+	        # The command line is too long to execute in one step.
+	        $show "using reloadable object file for export list..."
+	        skipped_export=:
+		# Break out early, otherwise skipped_export may be
+		# set to false by a later but shorter cmd.
+		break
+	      fi
+	    done
+	    IFS="$save_ifs"
+	    if test -n "$export_symbols_regex"; then
+	      $show "$EGREP -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\""
+	      $run eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
+	      $show "$mv \"${export_symbols}T\" \"$export_symbols\""
+	      $run eval '$mv "${export_symbols}T" "$export_symbols"'
+	    fi
+	  fi
+	fi
+
+	if test -n "$export_symbols" && test -n "$include_expsyms"; then
+	  $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"'
+	fi
+
+	tmp_deplibs=
+	for test_deplib in $deplibs; do
+		case " $convenience " in
+		*" $test_deplib "*) ;;
+		*)
+			tmp_deplibs="$tmp_deplibs $test_deplib"
+			;;
+		esac
+	done
+	deplibs="$tmp_deplibs"
+
+	if test -n "$convenience"; then
+	  if test -n "$whole_archive_flag_spec"; then
+	    save_libobjs=$libobjs
+	    eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+	  else
+	    gentop="$output_objdir/${outputname}x"
+	    generated="$generated $gentop"
+
+	    func_extract_archives $gentop $convenience
+	    libobjs="$libobjs $func_extract_archives_result"
+	  fi
+	fi
+	
+	if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then
+	  eval flag=\"$thread_safe_flag_spec\"
+	  linker_flags="$linker_flags $flag"
+	fi
+
+	# Make a backup of the uninstalled library when relinking
+	if test "$mode" = relink; then
+	  $run eval '(cd $output_objdir && $rm ${realname}U && $mv $realname ${realname}U)' || exit $?
+	fi
+
+	# Do each of the archive commands.
+	if test "$module" = yes && test -n "$module_cmds" ; then
+	  if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
+	    eval test_cmds=\"$module_expsym_cmds\"
+	    cmds=$module_expsym_cmds
+	  else
+	    eval test_cmds=\"$module_cmds\"
+	    cmds=$module_cmds
+	  fi
+	else
+	if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+	  eval test_cmds=\"$archive_expsym_cmds\"
+	  cmds=$archive_expsym_cmds
+	else
+	  eval test_cmds=\"$archive_cmds\"
+	  cmds=$archive_cmds
+	  fi
+	fi
+
+	if test "X$skipped_export" != "X:" &&
+	   len=`expr "X$test_cmds" : ".*" 2>/dev/null` &&
+	   test "$len" -le "$max_cmd_len" || test "$max_cmd_len" -le -1; then
+	  :
+	else
+	  # The command line is too long to link in one step, link piecewise.
+	  $echo "creating reloadable object files..."
+
+	  # Save the value of $output and $libobjs because we want to
+	  # use them later.  If we have whole_archive_flag_spec, we
+	  # want to use save_libobjs as it was before
+	  # whole_archive_flag_spec was expanded, because we can't
+	  # assume the linker understands whole_archive_flag_spec.
+	  # This may have to be revisited, in case too many
+	  # convenience libraries get linked in and end up exceeding
+	  # the spec.
+	  if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then
+	    save_libobjs=$libobjs
+	  fi
+	  save_output=$output
+	  output_la=`$echo "X$output" | $Xsed -e "$basename"`
+
+	  # Clear the reloadable object creation command queue and
+	  # initialize k to one.
+	  test_cmds=
+	  concat_cmds=
+	  objlist=
+	  delfiles=
+	  last_robj=
+	  k=1
+	  output=$output_objdir/$output_la-${k}.$objext
+	  # Loop over the list of objects to be linked.
+	  for obj in $save_libobjs
+	  do
+	    eval test_cmds=\"$reload_cmds $objlist $last_robj\"
+	    if test "X$objlist" = X ||
+	       { len=`expr "X$test_cmds" : ".*" 2>/dev/null` &&
+		 test "$len" -le "$max_cmd_len"; }; then
+	      objlist="$objlist $obj"
+	    else
+	      # The command $test_cmds is almost too long, add a
+	      # command to the queue.
+	      if test "$k" -eq 1 ; then
+		# The first file doesn't have a previous command to add.
+		eval concat_cmds=\"$reload_cmds $objlist $last_robj\"
+	      else
+		# All subsequent reloadable object files will link in
+		# the last one created.
+		eval concat_cmds=\"\$concat_cmds~$reload_cmds $objlist $last_robj\"
+	      fi
+	      last_robj=$output_objdir/$output_la-${k}.$objext
+	      k=`expr $k + 1`
+	      output=$output_objdir/$output_la-${k}.$objext
+	      objlist=$obj
+	      len=1
+	    fi
+	  done
+	  # Handle the remaining objects by creating one last
+	  # reloadable object file.  All subsequent reloadable object
+	  # files will link in the last one created.
+	  test -z "$concat_cmds" || concat_cmds=$concat_cmds~
+	  eval concat_cmds=\"\${concat_cmds}$reload_cmds $objlist $last_robj\"
+
+	  if ${skipped_export-false}; then
+	    $show "generating symbol list for \`$libname.la'"
+	    export_symbols="$output_objdir/$libname.exp"
+	    $run $rm $export_symbols
+	    libobjs=$output
+	    # Append the command to create the export file.
+	    eval concat_cmds=\"\$concat_cmds~$export_symbols_cmds\"
+          fi
+
+	  # Set up a command to remove the reloadable object files
+	  # after they are used.
+	  i=0
+	  while test "$i" -lt "$k"
+	  do
+	    i=`expr $i + 1`
+	    delfiles="$delfiles $output_objdir/$output_la-${i}.$objext"
+	  done
+
+	  $echo "creating a temporary reloadable object file: $output"
+
+	  # Loop through the commands generated above and execute them.
+	  save_ifs="$IFS"; IFS='~'
+	  for cmd in $concat_cmds; do
+	    IFS="$save_ifs"
+	    $show "$cmd"
+	    $run eval "$cmd" || exit $?
+	  done
+	  IFS="$save_ifs"
+
+	  libobjs=$output
+	  # Restore the value of output.
+	  output=$save_output
+
+	  if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then
+	    eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+	  fi
+	  # Expand the library linking commands again to reset the
+	  # value of $libobjs for piecewise linking.
+
+	  # Do each of the archive commands.
+	  if test "$module" = yes && test -n "$module_cmds" ; then
+	    if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
+	      cmds=$module_expsym_cmds
+	    else
+	      cmds=$module_cmds
+	    fi
+	  else
+	  if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+	    cmds=$archive_expsym_cmds
+	  else
+	    cmds=$archive_cmds
+	    fi
+	  fi
+
+	  # Append the command to remove the reloadable object files
+	  # to the just-reset $cmds.
+	  eval cmds=\"\$cmds~\$rm $delfiles\"
+	fi
+	save_ifs="$IFS"; IFS='~'
+	for cmd in $cmds; do
+	  IFS="$save_ifs"
+	  eval cmd=\"$cmd\"
+	  $show "$cmd"
+	  $run eval "$cmd" || {
+	    lt_exit=$?
+
+	    # Restore the uninstalled library and exit
+	    if test "$mode" = relink; then
+	      $run eval '(cd $output_objdir && $rm ${realname}T && $mv ${realname}U $realname)'
+	    fi
+
+	    exit $lt_exit
+	  }
+	done
+	IFS="$save_ifs"
+
+	# Restore the uninstalled library and exit
+	if test "$mode" = relink; then
+	  $run eval '(cd $output_objdir && $rm ${realname}T && $mv $realname ${realname}T && $mv "$realname"U $realname)' || exit $?
+
+	  if test -n "$convenience"; then
+	    if test -z "$whole_archive_flag_spec"; then
+	      $show "${rm}r $gentop"
+	      $run ${rm}r "$gentop"
+	    fi
+	  fi
+
+	  exit $EXIT_SUCCESS
+	fi
+
+	# Create links to the real library.
+	for linkname in $linknames; do
+	  if test "$realname" != "$linkname"; then
+	    $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)"
+	    $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $?
+	  fi
+	done
+
+	# If -module or -export-dynamic was specified, set the dlname.
+	if test "$module" = yes || test "$export_dynamic" = yes; then
+	  # On all known operating systems, these are identical.
+	  dlname="$soname"
+	fi
+      fi
+      ;;
+
+    obj)
+      if test -n "$deplibs"; then
+	$echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2
+      fi
+
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	$echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$rpath"; then
+	$echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$xrpath"; then
+	$echo "$modename: warning: \`-R' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$vinfo"; then
+	$echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$release"; then
+	$echo "$modename: warning: \`-release' is ignored for objects" 1>&2
+      fi
+
+      case $output in
+      *.lo)
+	if test -n "$objs$old_deplibs"; then
+	  $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+	libobj="$output"
+	obj=`$echo "X$output" | $Xsed -e "$lo2o"`
+	;;
+      *)
+	libobj=
+	obj="$output"
+	;;
+      esac
+
+      # Delete the old objects.
+      $run $rm $obj $libobj
+
+      # Objects from convenience libraries.  This assumes
+      # single-version convenience libraries.  Whenever we create
+      # different ones for PIC/non-PIC, this we'll have to duplicate
+      # the extraction.
+      reload_conv_objs=
+      gentop=
+      # reload_cmds runs $LD directly, so let us get rid of
+      # -Wl from whole_archive_flag_spec
+      wl=
+
+      if test -n "$convenience"; then
+	if test -n "$whole_archive_flag_spec"; then
+	  eval reload_conv_objs=\"\$reload_objs $whole_archive_flag_spec\"
+	else
+	  gentop="$output_objdir/${obj}x"
+	  generated="$generated $gentop"
+
+	  func_extract_archives $gentop $convenience
+	  reload_conv_objs="$reload_objs $func_extract_archives_result"
+	fi
+      fi
+
+      # Create the old-style object.
+      reload_objs="$objs$old_deplibs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test
+
+      output="$obj"
+      cmds=$reload_cmds
+      save_ifs="$IFS"; IFS='~'
+      for cmd in $cmds; do
+	IFS="$save_ifs"
+	eval cmd=\"$cmd\"
+	$show "$cmd"
+	$run eval "$cmd" || exit $?
+      done
+      IFS="$save_ifs"
+
+      # Exit if we aren't doing a library object file.
+      if test -z "$libobj"; then
+	if test -n "$gentop"; then
+	  $show "${rm}r $gentop"
+	  $run ${rm}r $gentop
+	fi
+
+	exit $EXIT_SUCCESS
+      fi
+
+      if test "$build_libtool_libs" != yes; then
+	if test -n "$gentop"; then
+	  $show "${rm}r $gentop"
+	  $run ${rm}r $gentop
+	fi
+
+	# Create an invalid libtool object if no PIC, so that we don't
+	# accidentally link it into a program.
+	# $show "echo timestamp > $libobj"
+	# $run eval "echo timestamp > $libobj" || exit $?
+	exit $EXIT_SUCCESS
+      fi
+
+      if test -n "$pic_flag" || test "$pic_mode" != default; then
+	# Only do commands if we really have different PIC objects.
+	reload_objs="$libobjs $reload_conv_objs"
+	output="$libobj"
+	cmds=$reload_cmds
+	save_ifs="$IFS"; IFS='~'
+	for cmd in $cmds; do
+	  IFS="$save_ifs"
+	  eval cmd=\"$cmd\"
+	  $show "$cmd"
+	  $run eval "$cmd" || exit $?
+	done
+	IFS="$save_ifs"
+      fi
+
+      if test -n "$gentop"; then
+	$show "${rm}r $gentop"
+	$run ${rm}r $gentop
+      fi
+
+      exit $EXIT_SUCCESS
+      ;;
+
+    prog)
+      case $host in
+	*cygwin*) output=`$echo $output | ${SED} -e 's,.exe$,,;s,$,.exe,'` ;;
+      esac
+      if test -n "$vinfo"; then
+	$echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2
+      fi
+
+      if test -n "$release"; then
+	$echo "$modename: warning: \`-release' is ignored for programs" 1>&2
+      fi
+
+      if test "$preload" = yes; then
+	if test "$dlopen_support" = unknown && test "$dlopen_self" = unknown &&
+	   test "$dlopen_self_static" = unknown; then
+	  $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support."
+	fi
+      fi
+
+      case $host in
+      *-*-rhapsody* | *-*-darwin1.[012])
+	# On Rhapsody replace the C library is the System framework
+	compile_deplibs=`$echo "X $compile_deplibs" | $Xsed -e 's/ -lc / -framework System /'`
+	finalize_deplibs=`$echo "X $finalize_deplibs" | $Xsed -e 's/ -lc / -framework System /'`
+	;;
+      esac
+
+      case $host in
+      *darwin*)
+        # Don't allow lazy linking, it breaks C++ global constructors
+        if test "$tagname" = CXX ; then
+        compile_command="$compile_command ${wl}-bind_at_load"
+        finalize_command="$finalize_command ${wl}-bind_at_load"
+        fi
+        ;;
+      esac
+
+
+      # move library search paths that coincide with paths to not yet
+      # installed libraries to the beginning of the library search list
+      new_libs=
+      for path in $notinst_path; do
+	case " $new_libs " in
+	*" -L$path/$objdir "*) ;;
+	*)
+	  case " $compile_deplibs " in
+	  *" -L$path/$objdir "*)
+	    new_libs="$new_libs -L$path/$objdir" ;;
+	  esac
+	  ;;
+	esac
+      done
+      for deplib in $compile_deplibs; do
+	case $deplib in
+	-L*)
+	  case " $new_libs " in
+	  *" $deplib "*) ;;
+	  *) new_libs="$new_libs $deplib" ;;
+	  esac
+	  ;;
+	*) new_libs="$new_libs $deplib" ;;
+	esac
+      done
+      compile_deplibs="$new_libs"
+
+
+      compile_command="$compile_command $compile_deplibs"
+      finalize_command="$finalize_command $finalize_deplibs"
+
+      if test -n "$rpath$xrpath"; then
+	# If the user specified any rpath flags, then add them.
+	for libdir in $rpath $xrpath; do
+	  # This is the magic to use -rpath.
+	  case "$finalize_rpath " in
+	  *" $libdir "*) ;;
+	  *) finalize_rpath="$finalize_rpath $libdir" ;;
+	  esac
+	done
+      fi
+
+      # Now hardcode the library paths
+      rpath=
+      hardcode_libdirs=
+      for libdir in $compile_rpath $finalize_rpath; do
+	if test -n "$hardcode_libdir_flag_spec"; then
+	  if test -n "$hardcode_libdir_separator"; then
+	    if test -z "$hardcode_libdirs"; then
+	      hardcode_libdirs="$libdir"
+	    else
+	      # Just accumulate the unique libdirs.
+	      case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+	      *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		;;
+	      *)
+		hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+		;;
+	      esac
+	    fi
+	  else
+	    eval flag=\"$hardcode_libdir_flag_spec\"
+	    rpath="$rpath $flag"
+	  fi
+	elif test -n "$runpath_var"; then
+	  case "$perm_rpath " in
+	  *" $libdir "*) ;;
+	  *) perm_rpath="$perm_rpath $libdir" ;;
+	  esac
+	fi
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+	  testbindir=`$echo "X$libdir" | $Xsed -e 's*/lib$*/bin*'`
+	  case :$dllsearchpath: in
+	  *":$libdir:"*) ;;
+	  *) dllsearchpath="$dllsearchpath:$libdir";;
+	  esac
+	  case :$dllsearchpath: in
+	  *":$testbindir:"*) ;;
+	  *) dllsearchpath="$dllsearchpath:$testbindir";;
+	  esac
+	  ;;
+	esac
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+	 test -n "$hardcode_libdirs"; then
+	libdir="$hardcode_libdirs"
+	eval rpath=\" $hardcode_libdir_flag_spec\"
+      fi
+      compile_rpath="$rpath"
+
+      rpath=
+      hardcode_libdirs=
+      for libdir in $finalize_rpath; do
+	if test -n "$hardcode_libdir_flag_spec"; then
+	  if test -n "$hardcode_libdir_separator"; then
+	    if test -z "$hardcode_libdirs"; then
+	      hardcode_libdirs="$libdir"
+	    else
+	      # Just accumulate the unique libdirs.
+	      case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+	      *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		;;
+	      *)
+		hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+		;;
+	      esac
+	    fi
+	  else
+	    eval flag=\"$hardcode_libdir_flag_spec\"
+	    rpath="$rpath $flag"
+	  fi
+	elif test -n "$runpath_var"; then
+	  case "$finalize_perm_rpath " in
+	  *" $libdir "*) ;;
+	  *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;;
+	  esac
+	fi
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+	 test -n "$hardcode_libdirs"; then
+	libdir="$hardcode_libdirs"
+	eval rpath=\" $hardcode_libdir_flag_spec\"
+      fi
+      finalize_rpath="$rpath"
+
+      if test -n "$libobjs" && test "$build_old_libs" = yes; then
+	# Transform all the library objects into standard objects.
+	compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+	finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+      fi
+
+      dlsyms=
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	if test -n "$NM" && test -n "$global_symbol_pipe"; then
+	  dlsyms="${outputname}S.c"
+	else
+	  $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2
+	fi
+      fi
+
+      if test -n "$dlsyms"; then
+	case $dlsyms in
+	"") ;;
+	*.c)
+	  # Discover the nlist of each of the dlfiles.
+	  nlist="$output_objdir/${outputname}.nm"
+
+	  $show "$rm $nlist ${nlist}S ${nlist}T"
+	  $run $rm "$nlist" "${nlist}S" "${nlist}T"
+
+	  # Parse the name list into a source file.
+	  $show "creating $output_objdir/$dlsyms"
+
+	  test -z "$run" && $echo > "$output_objdir/$dlsyms" "\
+/* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */
+/* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */
+
+#ifdef __cplusplus
+extern \"C\" {
+#endif
+
+/* Prevent the only kind of declaration conflicts we can make. */
+#define lt_preloaded_symbols some_other_symbol
+
+/* External symbol declarations for the compiler. */\
+"
+
+	  if test "$dlself" = yes; then
+	    $show "generating symbol list for \`$output'"
+
+	    test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist"
+
+	    # Add our own program objects to the symbol list.
+	    progfiles=`$echo "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+	    for arg in $progfiles; do
+	      $show "extracting global C symbols from \`$arg'"
+	      $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+	    done
+
+	    if test -n "$exclude_expsyms"; then
+	      $run eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T'
+	      $run eval '$mv "$nlist"T "$nlist"'
+	    fi
+
+	    if test -n "$export_symbols_regex"; then
+	      $run eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T'
+	      $run eval '$mv "$nlist"T "$nlist"'
+	    fi
+
+	    # Prepare the list of exported symbols
+	    if test -z "$export_symbols"; then
+	      export_symbols="$output_objdir/$outputname.exp"
+	      $run $rm $export_symbols
+	      $run eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"'
+              case $host in
+              *cygwin* | *mingw* )
+	        $run eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
+		$run eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"'
+                ;;
+              esac
+	    else
+	      $run eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"'
+	      $run eval 'grep -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T'
+	      $run eval 'mv "$nlist"T "$nlist"'
+              case $host in
+              *cygwin* | *mingw* )
+	        $run eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
+		$run eval 'cat "$nlist" >> "$output_objdir/$outputname.def"'
+                ;;
+              esac
+	    fi
+	  fi
+
+	  for arg in $dlprefiles; do
+	    $show "extracting global C symbols from \`$arg'"
+	    name=`$echo "$arg" | ${SED} -e 's%^.*/%%'`
+	    $run eval '$echo ": $name " >> "$nlist"'
+	    $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+	  done
+
+	  if test -z "$run"; then
+	    # Make sure we have at least an empty file.
+	    test -f "$nlist" || : > "$nlist"
+
+	    if test -n "$exclude_expsyms"; then
+	      $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T
+	      $mv "$nlist"T "$nlist"
+	    fi
+
+	    # Try sorting and uniquifying the output.
+	    if grep -v "^: " < "$nlist" |
+		if sort -k 3 </dev/null >/dev/null 2>&1; then
+		  sort -k 3
+		else
+		  sort +2
+		fi |
+		uniq > "$nlist"S; then
+	      :
+	    else
+	      grep -v "^: " < "$nlist" > "$nlist"S
+	    fi
+
+	    if test -f "$nlist"S; then
+	      eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"'
+	    else
+	      $echo '/* NONE */' >> "$output_objdir/$dlsyms"
+	    fi
+
+	    $echo >> "$output_objdir/$dlsyms" "\
+
+#undef lt_preloaded_symbols
+
+#if defined (__STDC__) && __STDC__
+# define lt_ptr void *
+#else
+# define lt_ptr char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+"
+
+	    case $host in
+	    *cygwin* | *mingw* )
+	  $echo >> "$output_objdir/$dlsyms" "\
+/* DATA imports from DLLs on WIN32 can't be const, because
+   runtime relocations are performed -- see ld's documentation
+   on pseudo-relocs */
+struct {
+"
+	      ;;
+	    * )
+	  $echo >> "$output_objdir/$dlsyms" "\
+const struct {
+"
+	      ;;
+	    esac
+
+
+	  $echo >> "$output_objdir/$dlsyms" "\
+  const char *name;
+  lt_ptr address;
+}
+lt_preloaded_symbols[] =
+{\
+"
+
+	    eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$dlsyms"
+
+	    $echo >> "$output_objdir/$dlsyms" "\
+  {0, (lt_ptr) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif\
+"
+	  fi
+
+	  pic_flag_for_symtable=
+	  case $host in
+	  # compiling the symbol table file with pic_flag works around
+	  # a FreeBSD bug that causes programs to crash when -lm is
+	  # linked before any other PIC object.  But we must not use
+	  # pic_flag when linking with -static.  The problem exists in
+	  # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
+	  *-*-freebsd2.*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
+	    case "$compile_command " in
+	    *" -static "*) ;;
+	    *) pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND";;
+	    esac;;
+	  *-*-hpux*)
+	    case "$compile_command " in
+	    *" -static "*) ;;
+	    *) pic_flag_for_symtable=" $pic_flag";;
+	    esac
+	  esac
+
+	  # Now compile the dynamic symbol file.
+	  $show "(cd $output_objdir && $LTCC  $LTCFLAGS -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")"
+	  $run eval '(cd $output_objdir && $LTCC  $LTCFLAGS -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $?
+
+	  # Clean up the generated files.
+	  $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T"
+	  $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T"
+
+	  # Transform the symbol file into the correct name.
+          case $host in
+          *cygwin* | *mingw* )
+            if test -f "$output_objdir/${outputname}.def" ; then
+              compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}.def $output_objdir/${outputname}S.${objext}%"`
+              finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}.def $output_objdir/${outputname}S.${objext}%"`
+            else
+              compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+              finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+             fi
+            ;;
+          * )
+            compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+            finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+            ;;
+          esac
+	  ;;
+	*)
+	  $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+      else
+	# We keep going just in case the user didn't refer to
+	# lt_preloaded_symbols.  The linker will fail if global_symbol_pipe
+	# really was required.
+
+	# Nullify the symbol file.
+	compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
+	finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
+      fi
+
+      if test "$need_relink" = no || test "$build_libtool_libs" != yes; then
+	# Replace the output file specification.
+	compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+	link_command="$compile_command$compile_rpath"
+
+	# We have no uninstalled library dependencies, so finalize right now.
+	$show "$link_command"
+	$run eval "$link_command"
+	exit_status=$?
+
+	# Delete the generated files.
+	if test -n "$dlsyms"; then
+	  $show "$rm $output_objdir/${outputname}S.${objext}"
+	  $run $rm "$output_objdir/${outputname}S.${objext}"
+	fi
+
+	exit $exit_status
+      fi
+
+      if test -n "$shlibpath_var"; then
+	# We should set the shlibpath_var
+	rpath=
+	for dir in $temp_rpath; do
+	  case $dir in
+	  [\\/]* | [A-Za-z]:[\\/]*)
+	    # Absolute path.
+	    rpath="$rpath$dir:"
+	    ;;
+	  *)
+	    # Relative path: add a thisdir entry.
+	    rpath="$rpath\$thisdir/$dir:"
+	    ;;
+	  esac
+	done
+	temp_rpath="$rpath"
+      fi
+
+      if test -n "$compile_shlibpath$finalize_shlibpath"; then
+	compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command"
+      fi
+      if test -n "$finalize_shlibpath"; then
+	finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
+      fi
+
+      compile_var=
+      finalize_var=
+      if test -n "$runpath_var"; then
+	if test -n "$perm_rpath"; then
+	  # We should set the runpath_var.
+	  rpath=
+	  for dir in $perm_rpath; do
+	    rpath="$rpath$dir:"
+	  done
+	  compile_var="$runpath_var=\"$rpath\$$runpath_var\" "
+	fi
+	if test -n "$finalize_perm_rpath"; then
+	  # We should set the runpath_var.
+	  rpath=
+	  for dir in $finalize_perm_rpath; do
+	    rpath="$rpath$dir:"
+	  done
+	  finalize_var="$runpath_var=\"$rpath\$$runpath_var\" "
+	fi
+      fi
+
+      if test "$no_install" = yes; then
+	# We don't need to create a wrapper script.
+	link_command="$compile_var$compile_command$compile_rpath"
+	# Replace the output file specification.
+	link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+	# Delete the old output file.
+	$run $rm $output
+	# Link the executable and exit
+	$show "$link_command"
+	$run eval "$link_command" || exit $?
+	exit $EXIT_SUCCESS
+      fi
+
+      if test "$hardcode_action" = relink; then
+	# Fast installation is not supported
+	link_command="$compile_var$compile_command$compile_rpath"
+	relink_command="$finalize_var$finalize_command$finalize_rpath"
+
+	$echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2
+	$echo "$modename: \`$output' will be relinked during installation" 1>&2
+      else
+	if test "$fast_install" != no; then
+	  link_command="$finalize_var$compile_command$finalize_rpath"
+	  if test "$fast_install" = yes; then
+	    relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'`
+	  else
+	    # fast_install is set to needless
+	    relink_command=
+	  fi
+	else
+	  link_command="$compile_var$compile_command$compile_rpath"
+	  relink_command="$finalize_var$finalize_command$finalize_rpath"
+	fi
+      fi
+
+      # Replace the output file specification.
+      link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'`
+
+      # Delete the old output files.
+      $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname
+
+      $show "$link_command"
+      $run eval "$link_command" || exit $?
+
+      # Now create the wrapper script.
+      $show "creating $output"
+
+      # Quote the relink command for shipping.
+      if test -n "$relink_command"; then
+	# Preserve any variables that may affect compiler behavior
+	for var in $variables_saved_for_relink; do
+	  if eval test -z \"\${$var+set}\"; then
+	    relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command"
+	  elif eval var_value=\$$var; test -z "$var_value"; then
+	    relink_command="$var=; export $var; $relink_command"
+	  else
+	    var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"`
+	    relink_command="$var=\"$var_value\"; export $var; $relink_command"
+	  fi
+	done
+	relink_command="(cd `pwd`; $relink_command)"
+	relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+      fi
+
+      # Quote $echo for shipping.
+      if test "X$echo" = "X$SHELL $progpath --fallback-echo"; then
+	case $progpath in
+	[\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $progpath --fallback-echo";;
+	*) qecho="$SHELL `pwd`/$progpath --fallback-echo";;
+	esac
+	qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"`
+      else
+	qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"`
+      fi
+
+      # Only actually do things if our run command is non-null.
+      if test -z "$run"; then
+	# win32 will think the script is a binary if it has
+	# a .exe suffix, so we strip it off here.
+	case $output in
+	  *.exe) output=`$echo $output|${SED} 's,.exe$,,'` ;;
+	esac
+	# test for cygwin because mv fails w/o .exe extensions
+	case $host in
+	  *cygwin*)
+	    exeext=.exe
+	    outputname=`$echo $outputname|${SED} 's,.exe$,,'` ;;
+	  *) exeext= ;;
+	esac
+	case $host in
+	  *cygwin* | *mingw* )
+            output_name=`basename $output`
+            output_path=`dirname $output`
+            cwrappersource="$output_path/$objdir/lt-$output_name.c"
+            cwrapper="$output_path/$output_name.exe"
+            $rm $cwrappersource $cwrapper
+            trap "$rm $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15
+
+	    cat > $cwrappersource <<EOF
+
+/* $cwrappersource - temporary wrapper executable for $objdir/$outputname
+   Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+
+   The $output program cannot be directly executed until all the libtool
+   libraries that it depends on are installed.
+
+   This wrapper executable should never be moved out of the build directory.
+   If it is, it will not operate correctly.
+
+   Currently, it simply execs the wrapper *script* "/bin/sh $output",
+   but could eventually absorb all of the scripts functionality and
+   exec $objdir/$outputname directly.
+*/
+EOF
+	    cat >> $cwrappersource<<"EOF"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <malloc.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/stat.h>
+
+#if defined(PATH_MAX)
+# define LT_PATHMAX PATH_MAX
+#elif defined(MAXPATHLEN)
+# define LT_PATHMAX MAXPATHLEN
+#else
+# define LT_PATHMAX 1024
+#endif
+
+#ifndef DIR_SEPARATOR
+# define DIR_SEPARATOR '/'
+# define PATH_SEPARATOR ':'
+#endif
+
+#if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \
+  defined (__OS2__)
+# define HAVE_DOS_BASED_FILE_SYSTEM
+# ifndef DIR_SEPARATOR_2
+#  define DIR_SEPARATOR_2 '\\'
+# endif
+# ifndef PATH_SEPARATOR_2
+#  define PATH_SEPARATOR_2 ';'
+# endif
+#endif
+
+#ifndef DIR_SEPARATOR_2
+# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
+#else /* DIR_SEPARATOR_2 */
+# define IS_DIR_SEPARATOR(ch) \
+        (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
+#endif /* DIR_SEPARATOR_2 */
+
+#ifndef PATH_SEPARATOR_2
+# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR)
+#else /* PATH_SEPARATOR_2 */
+# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2)
+#endif /* PATH_SEPARATOR_2 */
+
+#define XMALLOC(type, num)      ((type *) xmalloc ((num) * sizeof(type)))
+#define XFREE(stale) do { \
+  if (stale) { free ((void *) stale); stale = 0; } \
+} while (0)
+
+/* -DDEBUG is fairly common in CFLAGS.  */
+#undef DEBUG
+#if defined DEBUGWRAPPER
+# define DEBUG(format, ...) fprintf(stderr, format, __VA_ARGS__)
+#else
+# define DEBUG(format, ...)
+#endif
+
+const char *program_name = NULL;
+
+void * xmalloc (size_t num);
+char * xstrdup (const char *string);
+const char * base_name (const char *name);
+char * find_executable(const char *wrapper);
+int    check_executable(const char *path);
+char * strendzap(char *str, const char *pat);
+void lt_fatal (const char *message, ...);
+
+int
+main (int argc, char *argv[])
+{
+  char **newargz;
+  int i;
+
+  program_name = (char *) xstrdup (base_name (argv[0]));
+  DEBUG("(main) argv[0]      : %s\n",argv[0]);
+  DEBUG("(main) program_name : %s\n",program_name);
+  newargz = XMALLOC(char *, argc+2);
+EOF
+
+            cat >> $cwrappersource <<EOF
+  newargz[0] = (char *) xstrdup("$SHELL");
+EOF
+
+            cat >> $cwrappersource <<"EOF"
+  newargz[1] = find_executable(argv[0]);
+  if (newargz[1] == NULL)
+    lt_fatal("Couldn't find %s", argv[0]);
+  DEBUG("(main) found exe at : %s\n",newargz[1]);
+  /* we know the script has the same name, without the .exe */
+  /* so make sure newargz[1] doesn't end in .exe */
+  strendzap(newargz[1],".exe");
+  for (i = 1; i < argc; i++)
+    newargz[i+1] = xstrdup(argv[i]);
+  newargz[argc+1] = NULL;
+
+  for (i=0; i<argc+1; i++)
+  {
+    DEBUG("(main) newargz[%d]   : %s\n",i,newargz[i]);
+    ;
+  }
+
+EOF
+
+            case $host_os in
+              mingw*)
+                cat >> $cwrappersource <<EOF
+  execv("$SHELL",(char const **)newargz);
+EOF
+              ;;
+              *)
+                cat >> $cwrappersource <<EOF
+  execv("$SHELL",newargz);
+EOF
+              ;;
+            esac
+
+            cat >> $cwrappersource <<"EOF"
+  return 127;
+}
+
+void *
+xmalloc (size_t num)
+{
+  void * p = (void *) malloc (num);
+  if (!p)
+    lt_fatal ("Memory exhausted");
+
+  return p;
+}
+
+char *
+xstrdup (const char *string)
+{
+  return string ? strcpy ((char *) xmalloc (strlen (string) + 1), string) : NULL
+;
+}
+
+const char *
+base_name (const char *name)
+{
+  const char *base;
+
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+  /* Skip over the disk name in MSDOS pathnames. */
+  if (isalpha ((unsigned char)name[0]) && name[1] == ':')
+    name += 2;
+#endif
+
+  for (base = name; *name; name++)
+    if (IS_DIR_SEPARATOR (*name))
+      base = name + 1;
+  return base;
+}
+
+int
+check_executable(const char * path)
+{
+  struct stat st;
+
+  DEBUG("(check_executable)  : %s\n", path ? (*path ? path : "EMPTY!") : "NULL!");
+  if ((!path) || (!*path))
+    return 0;
+
+  if ((stat (path, &st) >= 0) &&
+      (
+        /* MinGW & native WIN32 do not support S_IXOTH or S_IXGRP */
+#if defined (S_IXOTH)
+       ((st.st_mode & S_IXOTH) == S_IXOTH) ||
+#endif
+#if defined (S_IXGRP)
+       ((st.st_mode & S_IXGRP) == S_IXGRP) ||
+#endif
+       ((st.st_mode & S_IXUSR) == S_IXUSR))
+      )
+    return 1;
+  else
+    return 0;
+}
+
+/* Searches for the full path of the wrapper.  Returns
+   newly allocated full path name if found, NULL otherwise */
+char *
+find_executable (const char* wrapper)
+{
+  int has_slash = 0;
+  const char* p;
+  const char* p_next;
+  /* static buffer for getcwd */
+  char tmp[LT_PATHMAX + 1];
+  int tmp_len;
+  char* concat_name;
+
+  DEBUG("(find_executable)  : %s\n", wrapper ? (*wrapper ? wrapper : "EMPTY!") : "NULL!");
+
+  if ((wrapper == NULL) || (*wrapper == '\0'))
+    return NULL;
+
+  /* Absolute path? */
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+  if (isalpha ((unsigned char)wrapper[0]) && wrapper[1] == ':')
+  {
+    concat_name = xstrdup (wrapper);
+    if (check_executable(concat_name))
+      return concat_name;
+    XFREE(concat_name);
+  }
+  else
+  {
+#endif
+    if (IS_DIR_SEPARATOR (wrapper[0]))
+    {
+      concat_name = xstrdup (wrapper);
+      if (check_executable(concat_name))
+        return concat_name;
+      XFREE(concat_name);
+    }
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+  }
+#endif
+
+  for (p = wrapper; *p; p++)
+    if (*p == '/')
+    {
+      has_slash = 1;
+      break;
+    }
+  if (!has_slash)
+  {
+    /* no slashes; search PATH */
+    const char* path = getenv ("PATH");
+    if (path != NULL)
+    {
+      for (p = path; *p; p = p_next)
+      {
+        const char* q;
+        size_t p_len;
+        for (q = p; *q; q++)
+          if (IS_PATH_SEPARATOR(*q))
+            break;
+        p_len = q - p;
+        p_next = (*q == '\0' ? q : q + 1);
+        if (p_len == 0)
+        {
+          /* empty path: current directory */
+          if (getcwd (tmp, LT_PATHMAX) == NULL)
+            lt_fatal ("getcwd failed");
+          tmp_len = strlen(tmp);
+          concat_name = XMALLOC(char, tmp_len + 1 + strlen(wrapper) + 1);
+          memcpy (concat_name, tmp, tmp_len);
+          concat_name[tmp_len] = '/';
+          strcpy (concat_name + tmp_len + 1, wrapper);
+        }
+        else
+        {
+          concat_name = XMALLOC(char, p_len + 1 + strlen(wrapper) + 1);
+          memcpy (concat_name, p, p_len);
+          concat_name[p_len] = '/';
+          strcpy (concat_name + p_len + 1, wrapper);
+        }
+        if (check_executable(concat_name))
+          return concat_name;
+        XFREE(concat_name);
+      }
+    }
+    /* not found in PATH; assume curdir */
+  }
+  /* Relative path | not found in path: prepend cwd */
+  if (getcwd (tmp, LT_PATHMAX) == NULL)
+    lt_fatal ("getcwd failed");
+  tmp_len = strlen(tmp);
+  concat_name = XMALLOC(char, tmp_len + 1 + strlen(wrapper) + 1);
+  memcpy (concat_name, tmp, tmp_len);
+  concat_name[tmp_len] = '/';
+  strcpy (concat_name + tmp_len + 1, wrapper);
+
+  if (check_executable(concat_name))
+    return concat_name;
+  XFREE(concat_name);
+  return NULL;
+}
+
+char *
+strendzap(char *str, const char *pat)
+{
+  size_t len, patlen;
+
+  assert(str != NULL);
+  assert(pat != NULL);
+
+  len = strlen(str);
+  patlen = strlen(pat);
+
+  if (patlen <= len)
+  {
+    str += len - patlen;
+    if (strcmp(str, pat) == 0)
+      *str = '\0';
+  }
+  return str;
+}
+
+static void
+lt_error_core (int exit_status, const char * mode,
+          const char * message, va_list ap)
+{
+  fprintf (stderr, "%s: %s: ", program_name, mode);
+  vfprintf (stderr, message, ap);
+  fprintf (stderr, ".\n");
+
+  if (exit_status >= 0)
+    exit (exit_status);
+}
+
+void
+lt_fatal (const char *message, ...)
+{
+  va_list ap;
+  va_start (ap, message);
+  lt_error_core (EXIT_FAILURE, "FATAL", message, ap);
+  va_end (ap);
+}
+EOF
+          # we should really use a build-platform specific compiler
+          # here, but OTOH, the wrappers (shell script and this C one)
+          # are only useful if you want to execute the "real" binary.
+          # Since the "real" binary is built for $host, then this
+          # wrapper might as well be built for $host, too.
+          $run $LTCC $LTCFLAGS -s -o $cwrapper $cwrappersource
+          ;;
+        esac
+        $rm $output
+        trap "$rm $output; exit $EXIT_FAILURE" 1 2 15
+
+	$echo > $output "\
+#! $SHELL
+
+# $output - temporary wrapper script for $objdir/$outputname
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# The $output program cannot be directly executed until all the libtool
+# libraries that it depends on are installed.
+#
+# This wrapper script should never be moved out of the build directory.
+# If it is, it will not operate correctly.
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='${SED} -e 1s/^X//'
+sed_quote_subst='$sed_quote_subst'
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+relink_command=\"$relink_command\"
+
+# This environment variable determines our operation mode.
+if test \"\$libtool_install_magic\" = \"$magic\"; then
+  # install mode needs the following variable:
+  notinst_deplibs='$notinst_deplibs'
+else
+  # When we are sourced in execute mode, \$file and \$echo are already set.
+  if test \"\$libtool_execute_magic\" != \"$magic\"; then
+    echo=\"$qecho\"
+    file=\"\$0\"
+    # Make sure echo works.
+    if test \"X\$1\" = X--no-reexec; then
+      # Discard the --no-reexec flag, and continue.
+      shift
+    elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then
+      # Yippee, \$echo works!
+      :
+    else
+      # Restart under the correct shell, and then maybe \$echo will work.
+      exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"}
+    fi
+  fi\
+"
+	$echo >> $output "\
+
+  # Find the directory that this script lives in.
+  thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\`
+  test \"x\$thisdir\" = \"x\$file\" && thisdir=.
+
+  # Follow symbolic links until we get to the real thisdir.
+  file=\`ls -ld \"\$file\" | ${SED} -n 's/.*-> //p'\`
+  while test -n \"\$file\"; do
+    destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\`
+
+    # If there was a directory component, then change thisdir.
+    if test \"x\$destdir\" != \"x\$file\"; then
+      case \"\$destdir\" in
+      [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;;
+      *) thisdir=\"\$thisdir/\$destdir\" ;;
+      esac
+    fi
+
+    file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\`
+    file=\`ls -ld \"\$thisdir/\$file\" | ${SED} -n 's/.*-> //p'\`
+  done
+
+  # Try to get the absolute directory name.
+  absdir=\`cd \"\$thisdir\" && pwd\`
+  test -n \"\$absdir\" && thisdir=\"\$absdir\"
+"
+
+	if test "$fast_install" = yes; then
+	  $echo >> $output "\
+  program=lt-'$outputname'$exeext
+  progdir=\"\$thisdir/$objdir\"
+
+  if test ! -f \"\$progdir/\$program\" || \\
+     { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\
+       test \"X\$file\" != \"X\$progdir/\$program\"; }; then
+
+    file=\"\$\$-\$program\"
+
+    if test ! -d \"\$progdir\"; then
+      $mkdir \"\$progdir\"
+    else
+      $rm \"\$progdir/\$file\"
+    fi"
+
+	  $echo >> $output "\
+
+    # relink executable if necessary
+    if test -n \"\$relink_command\"; then
+      if relink_command_output=\`eval \$relink_command 2>&1\`; then :
+      else
+	$echo \"\$relink_command_output\" >&2
+	$rm \"\$progdir/\$file\"
+	exit $EXIT_FAILURE
+      fi
+    fi
+
+    $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null ||
+    { $rm \"\$progdir/\$program\";
+      $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; }
+    $rm \"\$progdir/\$file\"
+  fi"
+	else
+	  $echo >> $output "\
+  program='$outputname'
+  progdir=\"\$thisdir/$objdir\"
+"
+	fi
+
+	$echo >> $output "\
+
+  if test -f \"\$progdir/\$program\"; then"
+
+	# Export our shlibpath_var if we have one.
+	if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
+	  $echo >> $output "\
+    # Add our own library path to $shlibpath_var
+    $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
+
+    # Some systems cannot cope with colon-terminated $shlibpath_var
+    # The second colon is a workaround for a bug in BeOS R4 sed
+    $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\`
+
+    export $shlibpath_var
+"
+	fi
+
+	# fixup the dll searchpath if we need to.
+	if test -n "$dllsearchpath"; then
+	  $echo >> $output "\
+    # Add the dll search path components to the executable PATH
+    PATH=$dllsearchpath:\$PATH
+"
+	fi
+
+	$echo >> $output "\
+    if test \"\$libtool_execute_magic\" != \"$magic\"; then
+      # Run the actual program with our arguments.
+"
+	case $host in
+	# Backslashes separate directories on plain windows
+	*-*-mingw | *-*-os2*)
+	  $echo >> $output "\
+      exec \"\$progdir\\\\\$program\" \${1+\"\$@\"}
+"
+	  ;;
+
+	*)
+	  $echo >> $output "\
+      exec \"\$progdir/\$program\" \${1+\"\$@\"}
+"
+	  ;;
+	esac
+	$echo >> $output "\
+      \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\"
+      exit $EXIT_FAILURE
+    fi
+  else
+    # The program doesn't exist.
+    \$echo \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2
+    \$echo \"This script is just a wrapper for \$program.\" 1>&2
+    $echo \"See the $PACKAGE documentation for more information.\" 1>&2
+    exit $EXIT_FAILURE
+  fi
+fi\
+"
+	chmod +x $output
+      fi
+      exit $EXIT_SUCCESS
+      ;;
+    esac
+
+    # See if we need to build an old-fashioned archive.
+    for oldlib in $oldlibs; do
+
+      if test "$build_libtool_libs" = convenience; then
+	oldobjs="$libobjs_save"
+	addlibs="$convenience"
+	build_libtool_libs=no
+      else
+	if test "$build_libtool_libs" = module; then
+	  oldobjs="$libobjs_save"
+	  build_libtool_libs=no
+	else
+	  oldobjs="$old_deplibs $non_pic_objects"
+	fi
+	addlibs="$old_convenience"
+      fi
+
+      if test -n "$addlibs"; then
+	gentop="$output_objdir/${outputname}x"
+	generated="$generated $gentop"
+
+	func_extract_archives $gentop $addlibs
+	oldobjs="$oldobjs $func_extract_archives_result"
+      fi
+
+      # Do each command in the archive commands.
+      if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
+       cmds=$old_archive_from_new_cmds
+      else
+	# POSIX demands no paths to be encoded in archives.  We have
+	# to avoid creating archives with duplicate basenames if we
+	# might have to extract them afterwards, e.g., when creating a
+	# static archive out of a convenience library, or when linking
+	# the entirety of a libtool archive into another (currently
+	# not supported by libtool).
+	if (for obj in $oldobjs
+	    do
+	      $echo "X$obj" | $Xsed -e 's%^.*/%%'
+	    done | sort | sort -uc >/dev/null 2>&1); then
+	  :
+	else
+	  $echo "copying selected object files to avoid basename conflicts..."
+
+	  if test -z "$gentop"; then
+	    gentop="$output_objdir/${outputname}x"
+	    generated="$generated $gentop"
+
+	    $show "${rm}r $gentop"
+	    $run ${rm}r "$gentop"
+	    $show "$mkdir $gentop"
+	    $run $mkdir "$gentop"
+	    exit_status=$?
+	    if test "$exit_status" -ne 0 && test ! -d "$gentop"; then
+	      exit $exit_status
+	    fi
+	  fi
+
+	  save_oldobjs=$oldobjs
+	  oldobjs=
+	  counter=1
+	  for obj in $save_oldobjs
+	  do
+	    objbase=`$echo "X$obj" | $Xsed -e 's%^.*/%%'`
+	    case " $oldobjs " in
+	    " ") oldobjs=$obj ;;
+	    *[\ /]"$objbase "*)
+	      while :; do
+		# Make sure we don't pick an alternate name that also
+		# overlaps.
+		newobj=lt$counter-$objbase
+		counter=`expr $counter + 1`
+		case " $oldobjs " in
+		*[\ /]"$newobj "*) ;;
+		*) if test ! -f "$gentop/$newobj"; then break; fi ;;
+		esac
+	      done
+	      $show "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj"
+	      $run ln "$obj" "$gentop/$newobj" ||
+	      $run cp "$obj" "$gentop/$newobj"
+	      oldobjs="$oldobjs $gentop/$newobj"
+	      ;;
+	    *) oldobjs="$oldobjs $obj" ;;
+	    esac
+	  done
+	fi
+
+	eval cmds=\"$old_archive_cmds\"
+
+	if len=`expr "X$cmds" : ".*"` &&
+	     test "$len" -le "$max_cmd_len" || test "$max_cmd_len" -le -1; then
+	  cmds=$old_archive_cmds
+	else
+	  # the command line is too long to link in one step, link in parts
+	  $echo "using piecewise archive linking..."
+	  save_RANLIB=$RANLIB
+	  RANLIB=:
+	  objlist=
+	  concat_cmds=
+	  save_oldobjs=$oldobjs
+
+	  # Is there a better way of finding the last object in the list?
+	  for obj in $save_oldobjs
+	  do
+	    last_oldobj=$obj
+	  done
+	  for obj in $save_oldobjs
+	  do
+	    oldobjs="$objlist $obj"
+	    objlist="$objlist $obj"
+	    eval test_cmds=\"$old_archive_cmds\"
+	    if len=`expr "X$test_cmds" : ".*" 2>/dev/null` &&
+	       test "$len" -le "$max_cmd_len"; then
+	      :
+	    else
+	      # the above command should be used before it gets too long
+	      oldobjs=$objlist
+	      if test "$obj" = "$last_oldobj" ; then
+	        RANLIB=$save_RANLIB
+	      fi
+	      test -z "$concat_cmds" || concat_cmds=$concat_cmds~
+	      eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\"
+	      objlist=
+	    fi
+	  done
+	  RANLIB=$save_RANLIB
+	  oldobjs=$objlist
+	  if test "X$oldobjs" = "X" ; then
+	    eval cmds=\"\$concat_cmds\"
+	  else
+	    eval cmds=\"\$concat_cmds~\$old_archive_cmds\"
+	  fi
+	fi
+      fi
+      save_ifs="$IFS"; IFS='~'
+      for cmd in $cmds; do
+        eval cmd=\"$cmd\"
+	IFS="$save_ifs"
+	$show "$cmd"
+	$run eval "$cmd" || exit $?
+      done
+      IFS="$save_ifs"
+    done
+
+    if test -n "$generated"; then
+      $show "${rm}r$generated"
+      $run ${rm}r$generated
+    fi
+
+    # Now create the libtool archive.
+    case $output in
+    *.la)
+      old_library=
+      test "$build_old_libs" = yes && old_library="$libname.$libext"
+      $show "creating $output"
+
+      # Preserve any variables that may affect compiler behavior
+      for var in $variables_saved_for_relink; do
+	if eval test -z \"\${$var+set}\"; then
+	  relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command"
+	elif eval var_value=\$$var; test -z "$var_value"; then
+	  relink_command="$var=; export $var; $relink_command"
+	else
+	  var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"`
+	  relink_command="$var=\"$var_value\"; export $var; $relink_command"
+	fi
+      done
+      # Quote the link command for shipping.
+      relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)"
+      relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+      if test "$hardcode_automatic" = yes ; then
+	relink_command=
+      fi
+
+
+      # Only create the output if not a dry run.
+      if test -z "$run"; then
+	for installed in no yes; do
+	  if test "$installed" = yes; then
+	    if test -z "$install_libdir"; then
+	      break
+	    fi
+	    output="$output_objdir/$outputname"i
+	    # Replace all uninstalled libtool libraries with the installed ones
+	    newdependency_libs=
+	    for deplib in $dependency_libs; do
+	      case $deplib in
+	      *.la)
+		name=`$echo "X$deplib" | $Xsed -e 's%^.*/%%'`
+		eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+		if test -z "$libdir"; then
+		  $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+		  exit $EXIT_FAILURE
+		fi
+		newdependency_libs="$newdependency_libs $libdir/$name"
+		;;
+	      *) newdependency_libs="$newdependency_libs $deplib" ;;
+	      esac
+	    done
+	    dependency_libs="$newdependency_libs"
+	    newdlfiles=
+	    for lib in $dlfiles; do
+	      name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+	      eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+	      if test -z "$libdir"; then
+		$echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+		exit $EXIT_FAILURE
+	      fi
+	      newdlfiles="$newdlfiles $libdir/$name"
+	    done
+	    dlfiles="$newdlfiles"
+	    newdlprefiles=
+	    for lib in $dlprefiles; do
+	      name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+	      eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+	      if test -z "$libdir"; then
+		$echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+		exit $EXIT_FAILURE
+	      fi
+	      newdlprefiles="$newdlprefiles $libdir/$name"
+	    done
+	    dlprefiles="$newdlprefiles"
+	  else
+	    newdlfiles=
+	    for lib in $dlfiles; do
+	      case $lib in
+		[\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
+		*) abs=`pwd`"/$lib" ;;
+	      esac
+	      newdlfiles="$newdlfiles $abs"
+	    done
+	    dlfiles="$newdlfiles"
+	    newdlprefiles=
+	    for lib in $dlprefiles; do
+	      case $lib in
+		[\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
+		*) abs=`pwd`"/$lib" ;;
+	      esac
+	      newdlprefiles="$newdlprefiles $abs"
+	    done
+	    dlprefiles="$newdlprefiles"
+	  fi
+	  $rm $output
+	  # place dlname in correct position for cygwin
+	  tdlname=$dlname
+	  case $host,$output,$installed,$module,$dlname in
+	    *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll) tdlname=../bin/$dlname ;;
+	  esac
+	  $echo > $output "\
+# $outputname - a libtool library file
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# The name that we can dlopen(3).
+dlname='$tdlname'
+
+# Names of this library.
+library_names='$library_names'
+
+# The name of the static archive.
+old_library='$old_library'
+
+# Libraries that this one depends upon.
+dependency_libs='$dependency_libs'
+
+# Version information for $libname.
+current=$current
+age=$age
+revision=$revision
+
+# Is this an already installed library?
+installed=$installed
+
+# Should we warn about portability when linking against -modules?
+shouldnotlink=$module
+
+# Files to dlopen/dlpreopen
+dlopen='$dlfiles'
+dlpreopen='$dlprefiles'
+
+# Directory that this library needs to be installed in:
+libdir='$install_libdir'"
+	  if test "$installed" = no && test "$need_relink" = yes; then
+	    $echo >> $output "\
+relink_command=\"$relink_command\""
+	  fi
+	done
+      fi
+
+      # Do a symbolic link so that the libtool archive can be found in
+      # LD_LIBRARY_PATH before the program is installed.
+      $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)"
+      $run eval '(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)' || exit $?
+      ;;
+    esac
+    exit $EXIT_SUCCESS
+    ;;
+
+  # libtool install mode
+  install)
+    modename="$modename: install"
+
+    # There may be an optional sh(1) argument at the beginning of
+    # install_prog (especially on Windows NT).
+    if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh ||
+       # Allow the use of GNU shtool's install command.
+       $echo "X$nonopt" | grep shtool > /dev/null; then
+      # Aesthetically quote it.
+      arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"`
+      case $arg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	arg="\"$arg\""
+	;;
+      esac
+      install_prog="$arg "
+      arg="$1"
+      shift
+    else
+      install_prog=
+      arg=$nonopt
+    fi
+
+    # The real first argument should be the name of the installation program.
+    # Aesthetically quote it.
+    arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+    case $arg in
+    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+      arg="\"$arg\""
+      ;;
+    esac
+    install_prog="$install_prog$arg"
+
+    # We need to accept at least all the BSD install flags.
+    dest=
+    files=
+    opts=
+    prev=
+    install_type=
+    isdir=no
+    stripme=
+    for arg
+    do
+      if test -n "$dest"; then
+	files="$files $dest"
+	dest=$arg
+	continue
+      fi
+
+      case $arg in
+      -d) isdir=yes ;;
+      -f) 
+      	case " $install_prog " in
+	*[\\\ /]cp\ *) ;;
+	*) prev=$arg ;;
+	esac
+	;;
+      -g | -m | -o) prev=$arg ;;
+      -s)
+	stripme=" -s"
+	continue
+	;;
+      -*)
+	;;
+      *)
+	# If the previous option needed an argument, then skip it.
+	if test -n "$prev"; then
+	  prev=
+	else
+	  dest=$arg
+	  continue
+	fi
+	;;
+      esac
+
+      # Aesthetically quote the argument.
+      arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+      case $arg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	arg="\"$arg\""
+	;;
+      esac
+      install_prog="$install_prog $arg"
+    done
+
+    if test -z "$install_prog"; then
+      $echo "$modename: you must specify an install program" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    if test -n "$prev"; then
+      $echo "$modename: the \`$prev' option requires an argument" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    if test -z "$files"; then
+      if test -z "$dest"; then
+	$echo "$modename: no file or destination specified" 1>&2
+      else
+	$echo "$modename: you must specify a destination" 1>&2
+      fi
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    # Strip any trailing slash from the destination.
+    dest=`$echo "X$dest" | $Xsed -e 's%/$%%'`
+
+    # Check to see that the destination is a directory.
+    test -d "$dest" && isdir=yes
+    if test "$isdir" = yes; then
+      destdir="$dest"
+      destname=
+    else
+      destdir=`$echo "X$dest" | $Xsed -e 's%/[^/]*$%%'`
+      test "X$destdir" = "X$dest" && destdir=.
+      destname=`$echo "X$dest" | $Xsed -e 's%^.*/%%'`
+
+      # Not a directory, so check to see that there is only one file specified.
+      set dummy $files
+      if test "$#" -gt 2; then
+	$echo "$modename: \`$dest' is not a directory" 1>&2
+	$echo "$help" 1>&2
+	exit $EXIT_FAILURE
+      fi
+    fi
+    case $destdir in
+    [\\/]* | [A-Za-z]:[\\/]*) ;;
+    *)
+      for file in $files; do
+	case $file in
+	*.lo) ;;
+	*)
+	  $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+      done
+      ;;
+    esac
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
+
+    staticlibs=
+    future_libdirs=
+    current_libdirs=
+    for file in $files; do
+
+      # Do each installation.
+      case $file in
+      *.$libext)
+	# Do the static libraries later.
+	staticlibs="$staticlibs $file"
+	;;
+
+      *.la)
+	# Check to see that this really is a libtool archive.
+	if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+	else
+	  $echo "$modename: \`$file' is not a valid libtool archive" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	library_names=
+	old_library=
+	relink_command=
+	# If there is no directory component, then add one.
+	case $file in
+	*/* | *\\*) . $file ;;
+	*) . ./$file ;;
+	esac
+
+	# Add the libdir to current_libdirs if it is the destination.
+	if test "X$destdir" = "X$libdir"; then
+	  case "$current_libdirs " in
+	  *" $libdir "*) ;;
+	  *) current_libdirs="$current_libdirs $libdir" ;;
+	  esac
+	else
+	  # Note the libdir as a future libdir.
+	  case "$future_libdirs " in
+	  *" $libdir "*) ;;
+	  *) future_libdirs="$future_libdirs $libdir" ;;
+	  esac
+	fi
+
+	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/
+	test "X$dir" = "X$file/" && dir=
+	dir="$dir$objdir"
+
+	if test -n "$relink_command"; then
+	  # Determine the prefix the user has applied to our future dir.
+	  inst_prefix_dir=`$echo "$destdir" | $SED "s%$libdir\$%%"`
+
+	  # Don't allow the user to place us outside of our expected
+	  # location b/c this prevents finding dependent libraries that
+	  # are installed to the same prefix.
+	  # At present, this check doesn't affect windows .dll's that
+	  # are installed into $libdir/../bin (currently, that works fine)
+	  # but it's something to keep an eye on.
+	  if test "$inst_prefix_dir" = "$destdir"; then
+	    $echo "$modename: error: cannot install \`$file' to a directory not ending in $libdir" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+
+	  if test -n "$inst_prefix_dir"; then
+	    # Stick the inst_prefix_dir data into the link command.
+	    relink_command=`$echo "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"`
+	  else
+	    relink_command=`$echo "$relink_command" | $SED "s%@inst_prefix_dir@%%"`
+	  fi
+
+	  $echo "$modename: warning: relinking \`$file'" 1>&2
+	  $show "$relink_command"
+	  if $run eval "$relink_command"; then :
+	  else
+	    $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+	fi
+
+	# See the names of the shared library.
+	set dummy $library_names
+	if test -n "$2"; then
+	  realname="$2"
+	  shift
+	  shift
+
+	  srcname="$realname"
+	  test -n "$relink_command" && srcname="$realname"T
+
+	  # Install the shared library and build the symlinks.
+	  $show "$install_prog $dir/$srcname $destdir/$realname"
+	  $run eval "$install_prog $dir/$srcname $destdir/$realname" || exit $?
+	  if test -n "$stripme" && test -n "$striplib"; then
+	    $show "$striplib $destdir/$realname"
+	    $run eval "$striplib $destdir/$realname" || exit $?
+	  fi
+
+	  if test "$#" -gt 0; then
+	    # Delete the old symlinks, and create new ones.
+	    # Try `ln -sf' first, because the `ln' binary might depend on
+	    # the symlink we replace!  Solaris /bin/ln does not understand -f,
+	    # so we also need to try rm && ln -s.
+	    for linkname
+	    do
+	      if test "$linkname" != "$realname"; then
+                $show "(cd $destdir && { $LN_S -f $realname $linkname || { $rm $linkname && $LN_S $realname $linkname; }; })"
+                $run eval "(cd $destdir && { $LN_S -f $realname $linkname || { $rm $linkname && $LN_S $realname $linkname; }; })"
+	      fi
+	    done
+	  fi
+
+	  # Do each command in the postinstall commands.
+	  lib="$destdir/$realname"
+	  cmds=$postinstall_cmds
+	  save_ifs="$IFS"; IFS='~'
+	  for cmd in $cmds; do
+	    IFS="$save_ifs"
+	    eval cmd=\"$cmd\"
+	    $show "$cmd"
+	    $run eval "$cmd" || {
+	      lt_exit=$?
+
+	      # Restore the uninstalled library and exit
+	      if test "$mode" = relink; then
+		$run eval '(cd $output_objdir && $rm ${realname}T && $mv ${realname}U $realname)'
+	      fi
+
+	      exit $lt_exit
+	    }
+	  done
+	  IFS="$save_ifs"
+	fi
+
+	# Install the pseudo-library for information purposes.
+	name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+	instname="$dir/$name"i
+	$show "$install_prog $instname $destdir/$name"
+	$run eval "$install_prog $instname $destdir/$name" || exit $?
+
+	# Maybe install the static library, too.
+	test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library"
+	;;
+
+      *.lo)
+	# Install (i.e. copy) a libtool object.
+
+	# Figure out destination file name, if it wasn't already specified.
+	if test -n "$destname"; then
+	  destfile="$destdir/$destname"
+	else
+	  destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+	  destfile="$destdir/$destfile"
+	fi
+
+	# Deduce the name of the destination old-style object file.
+	case $destfile in
+	*.lo)
+	  staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"`
+	  ;;
+	*.$objext)
+	  staticdest="$destfile"
+	  destfile=
+	  ;;
+	*)
+	  $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+
+	# Install the libtool object if requested.
+	if test -n "$destfile"; then
+	  $show "$install_prog $file $destfile"
+	  $run eval "$install_prog $file $destfile" || exit $?
+	fi
+
+	# Install the old object if enabled.
+	if test "$build_old_libs" = yes; then
+	  # Deduce the name of the old-style object file.
+	  staticobj=`$echo "X$file" | $Xsed -e "$lo2o"`
+
+	  $show "$install_prog $staticobj $staticdest"
+	  $run eval "$install_prog \$staticobj \$staticdest" || exit $?
+	fi
+	exit $EXIT_SUCCESS
+	;;
+
+      *)
+	# Figure out destination file name, if it wasn't already specified.
+	if test -n "$destname"; then
+	  destfile="$destdir/$destname"
+	else
+	  destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+	  destfile="$destdir/$destfile"
+	fi
+
+	# If the file is missing, and there is a .exe on the end, strip it
+	# because it is most likely a libtool script we actually want to
+	# install
+	stripped_ext=""
+	case $file in
+	  *.exe)
+	    if test ! -f "$file"; then
+	      file=`$echo $file|${SED} 's,.exe$,,'`
+	      stripped_ext=".exe"
+	    fi
+	    ;;
+	esac
+
+	# Do a test to see if this is really a libtool program.
+	case $host in
+	*cygwin*|*mingw*)
+	    wrapper=`$echo $file | ${SED} -e 's,.exe$,,'`
+	    ;;
+	*)
+	    wrapper=$file
+	    ;;
+	esac
+	if (${SED} -e '4q' $wrapper | grep "^# Generated by .*$PACKAGE")>/dev/null 2>&1; then
+	  notinst_deplibs=
+	  relink_command=
+
+	  # Note that it is not necessary on cygwin/mingw to append a dot to
+	  # foo even if both foo and FILE.exe exist: automatic-append-.exe
+	  # behavior happens only for exec(3), not for open(2)!  Also, sourcing
+	  # `FILE.' does not work on cygwin managed mounts.
+	  #
+	  # If there is no directory component, then add one.
+	  case $wrapper in
+	  */* | *\\*) . ${wrapper} ;;
+	  *) . ./${wrapper} ;;
+	  esac
+
+	  # Check the variables that should have been set.
+	  if test -z "$notinst_deplibs"; then
+	    $echo "$modename: invalid libtool wrapper script \`$wrapper'" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+
+	  finalize=yes
+	  for lib in $notinst_deplibs; do
+	    # Check to see that each library is installed.
+	    libdir=
+	    if test -f "$lib"; then
+	      # If there is no directory component, then add one.
+	      case $lib in
+	      */* | *\\*) . $lib ;;
+	      *) . ./$lib ;;
+	      esac
+	    fi
+	    libfile="$libdir/"`$echo "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test
+	    if test -n "$libdir" && test ! -f "$libfile"; then
+	      $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2
+	      finalize=no
+	    fi
+	  done
+
+	  relink_command=
+	  # Note that it is not necessary on cygwin/mingw to append a dot to
+	  # foo even if both foo and FILE.exe exist: automatic-append-.exe
+	  # behavior happens only for exec(3), not for open(2)!  Also, sourcing
+	  # `FILE.' does not work on cygwin managed mounts.
+	  #
+	  # If there is no directory component, then add one.
+	  case $wrapper in
+	  */* | *\\*) . ${wrapper} ;;
+	  *) . ./${wrapper} ;;
+	  esac
+
+	  outputname=
+	  if test "$fast_install" = no && test -n "$relink_command"; then
+	    if test "$finalize" = yes && test -z "$run"; then
+	      tmpdir=`func_mktempdir`
+	      file=`$echo "X$file$stripped_ext" | $Xsed -e 's%^.*/%%'`
+	      outputname="$tmpdir/$file"
+	      # Replace the output file specification.
+	      relink_command=`$echo "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'`
+
+	      $show "$relink_command"
+	      if $run eval "$relink_command"; then :
+	      else
+		$echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+		${rm}r "$tmpdir"
+		continue
+	      fi
+	      file="$outputname"
+	    else
+	      $echo "$modename: warning: cannot relink \`$file'" 1>&2
+	    fi
+	  else
+	    # Install the binary that we compiled earlier.
+	    file=`$echo "X$file$stripped_ext" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"`
+	  fi
+	fi
+
+	# remove .exe since cygwin /usr/bin/install will append another
+	# one anyway 
+	case $install_prog,$host in
+	*/usr/bin/install*,*cygwin*)
+	  case $file:$destfile in
+	  *.exe:*.exe)
+	    # this is ok
+	    ;;
+	  *.exe:*)
+	    destfile=$destfile.exe
+	    ;;
+	  *:*.exe)
+	    destfile=`$echo $destfile | ${SED} -e 's,.exe$,,'`
+	    ;;
+	  esac
+	  ;;
+	esac
+	$show "$install_prog$stripme $file $destfile"
+	$run eval "$install_prog\$stripme \$file \$destfile" || exit $?
+	test -n "$outputname" && ${rm}r "$tmpdir"
+	;;
+      esac
+    done
+
+    for file in $staticlibs; do
+      name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+
+      # Set up the ranlib parameters.
+      oldlib="$destdir/$name"
+
+      $show "$install_prog $file $oldlib"
+      $run eval "$install_prog \$file \$oldlib" || exit $?
+
+      if test -n "$stripme" && test -n "$old_striplib"; then
+	$show "$old_striplib $oldlib"
+	$run eval "$old_striplib $oldlib" || exit $?
+      fi
+
+      # Do each command in the postinstall commands.
+      cmds=$old_postinstall_cmds
+      save_ifs="$IFS"; IFS='~'
+      for cmd in $cmds; do
+	IFS="$save_ifs"
+	eval cmd=\"$cmd\"
+	$show "$cmd"
+	$run eval "$cmd" || exit $?
+      done
+      IFS="$save_ifs"
+    done
+
+    if test -n "$future_libdirs"; then
+      $echo "$modename: warning: remember to run \`$progname --finish$future_libdirs'" 1>&2
+    fi
+
+    if test -n "$current_libdirs"; then
+      # Maybe just do a dry run.
+      test -n "$run" && current_libdirs=" -n$current_libdirs"
+      exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs'
+    else
+      exit $EXIT_SUCCESS
+    fi
+    ;;
+
+  # libtool finish mode
+  finish)
+    modename="$modename: finish"
+    libdirs="$nonopt"
+    admincmds=
+
+    if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
+      for dir
+      do
+	libdirs="$libdirs $dir"
+      done
+
+      for libdir in $libdirs; do
+	if test -n "$finish_cmds"; then
+	  # Do each command in the finish commands.
+	  cmds=$finish_cmds
+	  save_ifs="$IFS"; IFS='~'
+	  for cmd in $cmds; do
+	    IFS="$save_ifs"
+	    eval cmd=\"$cmd\"
+	    $show "$cmd"
+	    $run eval "$cmd" || admincmds="$admincmds
+       $cmd"
+	  done
+	  IFS="$save_ifs"
+	fi
+	if test -n "$finish_eval"; then
+	  # Do the single finish_eval.
+	  eval cmds=\"$finish_eval\"
+	  $run eval "$cmds" || admincmds="$admincmds
+       $cmds"
+	fi
+      done
+    fi
+
+    # Exit here if they wanted silent mode.
+    test "$show" = : && exit $EXIT_SUCCESS
+
+    $echo "X----------------------------------------------------------------------" | $Xsed
+    $echo "Libraries have been installed in:"
+    for libdir in $libdirs; do
+      $echo "   $libdir"
+    done
+    $echo
+    $echo "If you ever happen to want to link against installed libraries"
+    $echo "in a given directory, LIBDIR, you must either use libtool, and"
+    $echo "specify the full pathname of the library, or use the \`-LLIBDIR'"
+    $echo "flag during linking and do at least one of the following:"
+    if test -n "$shlibpath_var"; then
+      $echo "   - add LIBDIR to the \`$shlibpath_var' environment variable"
+      $echo "     during execution"
+    fi
+    if test -n "$runpath_var"; then
+      $echo "   - add LIBDIR to the \`$runpath_var' environment variable"
+      $echo "     during linking"
+    fi
+    if test -n "$hardcode_libdir_flag_spec"; then
+      libdir=LIBDIR
+      eval flag=\"$hardcode_libdir_flag_spec\"
+
+      $echo "   - use the \`$flag' linker flag"
+    fi
+    if test -n "$admincmds"; then
+      $echo "   - have your system administrator run these commands:$admincmds"
+    fi
+    if test -f /etc/ld.so.conf; then
+      $echo "   - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
+    fi
+    $echo
+    $echo "See any operating system documentation about shared libraries for"
+    $echo "more information, such as the ld(1) and ld.so(8) manual pages."
+    $echo "X----------------------------------------------------------------------" | $Xsed
+    exit $EXIT_SUCCESS
+    ;;
+
+  # libtool execute mode
+  execute)
+    modename="$modename: execute"
+
+    # The first argument is the command name.
+    cmd="$nonopt"
+    if test -z "$cmd"; then
+      $echo "$modename: you must specify a COMMAND" 1>&2
+      $echo "$help"
+      exit $EXIT_FAILURE
+    fi
+
+    # Handle -dlopen flags immediately.
+    for file in $execute_dlfiles; do
+      if test ! -f "$file"; then
+	$echo "$modename: \`$file' is not a file" 1>&2
+	$echo "$help" 1>&2
+	exit $EXIT_FAILURE
+      fi
+
+      dir=
+      case $file in
+      *.la)
+	# Check to see that this really is a libtool archive.
+	if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+	else
+	  $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	# Read the libtool library.
+	dlname=
+	library_names=
+
+	# If there is no directory component, then add one.
+	case $file in
+	*/* | *\\*) . $file ;;
+	*) . ./$file ;;
+	esac
+
+	# Skip this library if it cannot be dlopened.
+	if test -z "$dlname"; then
+	  # Warn if it was a shared library.
+	  test -n "$library_names" && $echo "$modename: warning: \`$file' was not linked with \`-export-dynamic'"
+	  continue
+	fi
+
+	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+	test "X$dir" = "X$file" && dir=.
+
+	if test -f "$dir/$objdir/$dlname"; then
+	  dir="$dir/$objdir"
+	else
+	  $echo "$modename: cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+	;;
+
+      *.lo)
+	# Just add the directory containing the .lo file.
+	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+	test "X$dir" = "X$file" && dir=.
+	;;
+
+      *)
+	$echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2
+	continue
+	;;
+      esac
+
+      # Get the absolute pathname.
+      absdir=`cd "$dir" && pwd`
+      test -n "$absdir" && dir="$absdir"
+
+      # Now add the directory to shlibpath_var.
+      if eval "test -z \"\$$shlibpath_var\""; then
+	eval "$shlibpath_var=\"\$dir\""
+      else
+	eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\""
+      fi
+    done
+
+    # This variable tells wrapper scripts just to set shlibpath_var
+    # rather than running their programs.
+    libtool_execute_magic="$magic"
+
+    # Check if any of the arguments is a wrapper script.
+    args=
+    for file
+    do
+      case $file in
+      -*) ;;
+      *)
+	# Do a test to see if this is really a libtool program.
+	if (${SED} -e '4q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	  # If there is no directory component, then add one.
+	  case $file in
+	  */* | *\\*) . $file ;;
+	  *) . ./$file ;;
+	  esac
+
+	  # Transform arg to wrapped name.
+	  file="$progdir/$program"
+	fi
+	;;
+      esac
+      # Quote arguments (to preserve shell metacharacters).
+      file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"`
+      args="$args \"$file\""
+    done
+
+    if test -z "$run"; then
+      if test -n "$shlibpath_var"; then
+	# Export the shlibpath_var.
+	eval "export $shlibpath_var"
+      fi
+
+      # Restore saved environment variables
+      if test "${save_LC_ALL+set}" = set; then
+	LC_ALL="$save_LC_ALL"; export LC_ALL
+      fi
+      if test "${save_LANG+set}" = set; then
+	LANG="$save_LANG"; export LANG
+      fi
+
+      # Now prepare to actually exec the command.
+      exec_cmd="\$cmd$args"
+    else
+      # Display what would be done.
+      if test -n "$shlibpath_var"; then
+	eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\""
+	$echo "export $shlibpath_var"
+      fi
+      $echo "$cmd$args"
+      exit $EXIT_SUCCESS
+    fi
+    ;;
+
+  # libtool clean and uninstall mode
+  clean | uninstall)
+    modename="$modename: $mode"
+    rm="$nonopt"
+    files=
+    rmforce=
+    exit_status=0
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
+
+    for arg
+    do
+      case $arg in
+      -f) rm="$rm $arg"; rmforce=yes ;;
+      -*) rm="$rm $arg" ;;
+      *) files="$files $arg" ;;
+      esac
+    done
+
+    if test -z "$rm"; then
+      $echo "$modename: you must specify an RM program" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    rmdirs=
+
+    origobjdir="$objdir"
+    for file in $files; do
+      dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+      if test "X$dir" = "X$file"; then
+	dir=.
+	objdir="$origobjdir"
+      else
+	objdir="$dir/$origobjdir"
+      fi
+      name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+      test "$mode" = uninstall && objdir="$dir"
+
+      # Remember objdir for removal later, being careful to avoid duplicates
+      if test "$mode" = clean; then
+	case " $rmdirs " in
+	  *" $objdir "*) ;;
+	  *) rmdirs="$rmdirs $objdir" ;;
+	esac
+      fi
+
+      # Don't error if the file doesn't exist and rm -f was used.
+      if (test -L "$file") >/dev/null 2>&1 \
+	|| (test -h "$file") >/dev/null 2>&1 \
+	|| test -f "$file"; then
+	:
+      elif test -d "$file"; then
+	exit_status=1
+	continue
+      elif test "$rmforce" = yes; then
+	continue
+      fi
+
+      rmfiles="$file"
+
+      case $name in
+      *.la)
+	# Possibly a libtool archive, so verify it.
+	if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	  . $dir/$name
+
+	  # Delete the libtool libraries and symlinks.
+	  for n in $library_names; do
+	    rmfiles="$rmfiles $objdir/$n"
+	  done
+	  test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library"
+
+	  case "$mode" in
+	  clean)
+	    case "  $library_names " in
+	    # "  " in the beginning catches empty $dlname
+	    *" $dlname "*) ;;
+	    *) rmfiles="$rmfiles $objdir/$dlname" ;;
+	    esac
+	     test -n "$libdir" && rmfiles="$rmfiles $objdir/$name $objdir/${name}i"
+	    ;;
+	  uninstall)
+	    if test -n "$library_names"; then
+	      # Do each command in the postuninstall commands.
+	      cmds=$postuninstall_cmds
+	      save_ifs="$IFS"; IFS='~'
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		eval cmd=\"$cmd\"
+		$show "$cmd"
+		$run eval "$cmd"
+		if test "$?" -ne 0 && test "$rmforce" != yes; then
+		  exit_status=1
+		fi
+	      done
+	      IFS="$save_ifs"
+	    fi
+
+	    if test -n "$old_library"; then
+	      # Do each command in the old_postuninstall commands.
+	      cmds=$old_postuninstall_cmds
+	      save_ifs="$IFS"; IFS='~'
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		eval cmd=\"$cmd\"
+		$show "$cmd"
+		$run eval "$cmd"
+		if test "$?" -ne 0 && test "$rmforce" != yes; then
+		  exit_status=1
+		fi
+	      done
+	      IFS="$save_ifs"
+	    fi
+	    # FIXME: should reinstall the best remaining shared library.
+	    ;;
+	  esac
+	fi
+	;;
+
+      *.lo)
+	# Possibly a libtool object, so verify it.
+	if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+
+	  # Read the .lo file
+	  . $dir/$name
+
+	  # Add PIC object to the list of files to remove.
+	  if test -n "$pic_object" \
+	     && test "$pic_object" != none; then
+	    rmfiles="$rmfiles $dir/$pic_object"
+	  fi
+
+	  # Add non-PIC object to the list of files to remove.
+	  if test -n "$non_pic_object" \
+	     && test "$non_pic_object" != none; then
+	    rmfiles="$rmfiles $dir/$non_pic_object"
+	  fi
+	fi
+	;;
+
+      *)
+	if test "$mode" = clean ; then
+	  noexename=$name
+	  case $file in
+	  *.exe)
+	    file=`$echo $file|${SED} 's,.exe$,,'`
+	    noexename=`$echo $name|${SED} 's,.exe$,,'`
+	    # $file with .exe has already been added to rmfiles,
+	    # add $file without .exe
+	    rmfiles="$rmfiles $file"
+	    ;;
+	  esac
+	  # Do a test to see if this is a libtool program.
+	  if (${SED} -e '4q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	    relink_command=
+	    . $dir/$noexename
+
+	    # note $name still contains .exe if it was in $file originally
+	    # as does the version of $file that was added into $rmfiles
+	    rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}"
+	    if test "$fast_install" = yes && test -n "$relink_command"; then
+	      rmfiles="$rmfiles $objdir/lt-$name"
+	    fi
+	    if test "X$noexename" != "X$name" ; then
+	      rmfiles="$rmfiles $objdir/lt-${noexename}.c"
+	    fi
+	  fi
+	fi
+	;;
+      esac
+      $show "$rm $rmfiles"
+      $run $rm $rmfiles || exit_status=1
+    done
+    objdir="$origobjdir"
+
+    # Try to remove the ${objdir}s in the directories where we deleted files
+    for dir in $rmdirs; do
+      if test -d "$dir"; then
+	$show "rmdir $dir"
+	$run rmdir $dir >/dev/null 2>&1
+      fi
+    done
+
+    exit $exit_status
+    ;;
+
+  "")
+    $echo "$modename: you must specify a MODE" 1>&2
+    $echo "$generic_help" 1>&2
+    exit $EXIT_FAILURE
+    ;;
+  esac
+
+  if test -z "$exec_cmd"; then
+    $echo "$modename: invalid operation mode \`$mode'" 1>&2
+    $echo "$generic_help" 1>&2
+    exit $EXIT_FAILURE
+  fi
+fi # test -z "$show_help"
+
+if test -n "$exec_cmd"; then
+  eval exec $exec_cmd
+  exit $EXIT_FAILURE
+fi
+
+# We need to display help for each of the modes.
+case $mode in
+"") $echo \
+"Usage: $modename [OPTION]... [MODE-ARG]...
+
+Provide generalized library-building support services.
+
+    --config          show all configuration variables
+    --debug           enable verbose shell tracing
+-n, --dry-run         display commands without modifying any files
+    --features        display basic configuration information and exit
+    --finish          same as \`--mode=finish'
+    --help            display this help message and exit
+    --mode=MODE       use operation mode MODE [default=inferred from MODE-ARGS]
+    --quiet           same as \`--silent'
+    --silent          don't print informational messages
+    --tag=TAG         use configuration variables from tag TAG
+    --version         print version information
+
+MODE must be one of the following:
+
+      clean           remove files from the build directory
+      compile         compile a source file into a libtool object
+      execute         automatically set library path, then run a program
+      finish          complete the installation of libtool libraries
+      install         install libraries or executables
+      link            create a library or an executable
+      uninstall       remove libraries from an installed directory
+
+MODE-ARGS vary depending on the MODE.  Try \`$modename --help --mode=MODE' for
+a more detailed description of MODE.
+
+Report bugs to <bug-libtool@gnu.org>."
+  exit $EXIT_SUCCESS
+  ;;
+
+clean)
+  $echo \
+"Usage: $modename [OPTION]... --mode=clean RM [RM-OPTION]... FILE...
+
+Remove files from the build directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, object or program, all the files associated
+with it are deleted. Otherwise, only FILE itself is deleted using RM."
+  ;;
+
+compile)
+  $echo \
+"Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
+
+Compile a source file into a libtool library object.
+
+This mode accepts the following additional options:
+
+  -o OUTPUT-FILE    set the output file name to OUTPUT-FILE
+  -prefer-pic       try to building PIC objects only
+  -prefer-non-pic   try to building non-PIC objects only
+  -static           always build a \`.o' file suitable for static linking
+
+COMPILE-COMMAND is a command to be used in creating a \`standard' object file
+from the given SOURCEFILE.
+
+The output file name is determined by removing the directory component from
+SOURCEFILE, then substituting the C source code suffix \`.c' with the
+library object suffix, \`.lo'."
+  ;;
+
+execute)
+  $echo \
+"Usage: $modename [OPTION]... --mode=execute COMMAND [ARGS]...
+
+Automatically set library path, then run a program.
+
+This mode accepts the following additional options:
+
+  -dlopen FILE      add the directory containing FILE to the library path
+
+This mode sets the library path environment variable according to \`-dlopen'
+flags.
+
+If any of the ARGS are libtool executable wrappers, then they are translated
+into their corresponding uninstalled binary, and any of their required library
+directories are added to the library path.
+
+Then, COMMAND is executed, with ARGS as arguments."
+  ;;
+
+finish)
+  $echo \
+"Usage: $modename [OPTION]... --mode=finish [LIBDIR]...
+
+Complete the installation of libtool libraries.
+
+Each LIBDIR is a directory that contains libtool libraries.
+
+The commands that this mode executes may require superuser privileges.  Use
+the \`--dry-run' option if you just want to see what would be executed."
+  ;;
+
+install)
+  $echo \
+"Usage: $modename [OPTION]... --mode=install INSTALL-COMMAND...
+
+Install executables or libraries.
+
+INSTALL-COMMAND is the installation command.  The first component should be
+either the \`install' or \`cp' program.
+
+The rest of the components are interpreted as arguments to that command (only
+BSD-compatible install options are recognized)."
+  ;;
+
+link)
+  $echo \
+"Usage: $modename [OPTION]... --mode=link LINK-COMMAND...
+
+Link object files or libraries together to form another library, or to
+create an executable program.
+
+LINK-COMMAND is a command using the C compiler that you would use to create
+a program from several object files.
+
+The following components of LINK-COMMAND are treated specially:
+
+  -all-static       do not do any dynamic linking at all
+  -avoid-version    do not add a version suffix if possible
+  -dlopen FILE      \`-dlpreopen' FILE if it cannot be dlopened at runtime
+  -dlpreopen FILE   link in FILE and add its symbols to lt_preloaded_symbols
+  -export-dynamic   allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
+  -export-symbols SYMFILE
+		    try to export only the symbols listed in SYMFILE
+  -export-symbols-regex REGEX
+		    try to export only the symbols matching REGEX
+  -LLIBDIR          search LIBDIR for required installed libraries
+  -lNAME            OUTPUT-FILE requires the installed library libNAME
+  -module           build a library that can dlopened
+  -no-fast-install  disable the fast-install mode
+  -no-install       link a not-installable executable
+  -no-undefined     declare that a library does not refer to external symbols
+  -o OUTPUT-FILE    create OUTPUT-FILE from the specified objects
+  -objectlist FILE  Use a list of object files found in FILE to specify objects
+  -precious-files-regex REGEX
+                    don't remove output files matching REGEX
+  -release RELEASE  specify package release information
+  -rpath LIBDIR     the created library will eventually be installed in LIBDIR
+  -R[ ]LIBDIR       add LIBDIR to the runtime path of programs and libraries
+  -static           do not do any dynamic linking of libtool libraries
+  -version-info CURRENT[:REVISION[:AGE]]
+		    specify library version info [each variable defaults to 0]
+
+All other options (arguments beginning with \`-') are ignored.
+
+Every other argument is treated as a filename.  Files ending in \`.la' are
+treated as uninstalled libtool libraries, other files are standard or library
+object files.
+
+If the OUTPUT-FILE ends in \`.la', then a libtool library is created,
+only library objects (\`.lo' files) may be specified, and \`-rpath' is
+required, except when creating a convenience library.
+
+If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created
+using \`ar' and \`ranlib', or on Windows using \`lib'.
+
+If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file
+is created, otherwise an executable program is created."
+  ;;
+
+uninstall)
+  $echo \
+"Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
+
+Remove libraries from an installation directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, all the files associated with it are deleted.
+Otherwise, only FILE itself is deleted using RM."
+  ;;
+
+*)
+  $echo "$modename: invalid operation mode \`$mode'" 1>&2
+  $echo "$help" 1>&2
+  exit $EXIT_FAILURE
+  ;;
+esac
+
+$echo
+$echo "Try \`$modename --help' for more information about other modes."
+
+exit $?
+
+# The TAGs below are defined such that we never get into a situation
+# in which we disable both kinds of libraries.  Given conflicting
+# choices, we go for a static library, that is the most portable,
+# since we can't tell whether shared libraries were disabled because
+# the user asked for that or because the platform doesn't support
+# them.  This is particularly important on AIX, because we don't
+# support having both static and shared libraries enabled at the same
+# time on that platform, so we default to a shared-only configuration.
+# If a disable-shared tag is given, we'll fallback to a static-only
+# configuration.  But we'll never go from static-only to shared-only.
+
+# ### BEGIN LIBTOOL TAG CONFIG: disable-shared
+disable_libs=shared
+# ### END LIBTOOL TAG CONFIG: disable-shared
+
+# ### BEGIN LIBTOOL TAG CONFIG: disable-static
+disable_libs=static
+# ### END LIBTOOL TAG CONFIG: disable-static
+
+# Local Variables:
+# mode:shell-script
+# sh-indentation:2
+# End:
diff --git a/projects/sample/autoconf/m4/build_exeext.m4 b/projects/sample/autoconf/m4/build_exeext.m4
new file mode 100644
index 000000000000..1bdecc1ba578
--- /dev/null
+++ b/projects/sample/autoconf/m4/build_exeext.m4
@@ -0,0 +1,42 @@
+# Check for the extension used for executables on build platform.
+# This is necessary for cross-compiling where the build platform
+# may differ from the host platform.
+AC_DEFUN([AC_BUILD_EXEEXT],
+[
+AC_MSG_CHECKING([for executable suffix on build platform])
+AC_CACHE_VAL(ac_cv_build_exeext,
+[if test "$CYGWIN" = yes || test "$MINGW32" = yes; then
+  ac_cv_build_exeext=.exe
+else
+  ac_build_prefix=${build_alias}-
+
+  AC_CHECK_PROG(BUILD_CC, ${ac_build_prefix}gcc, ${ac_build_prefix}gcc)
+  if test -z "$BUILD_CC"; then
+     AC_CHECK_PROG(BUILD_CC, gcc, gcc)
+     if test -z "$BUILD_CC"; then
+       AC_CHECK_PROG(BUILD_CC, cc, cc, , , /usr/ucb/cc)
+     fi
+  fi
+  test -z "$BUILD_CC" && AC_MSG_ERROR([no acceptable cc found in \$PATH])
+  ac_build_link='${BUILD_CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&AS_MESSAGE_LOG_FD'
+  rm -f conftest*
+  echo 'int main () { return 0; }' > conftest.$ac_ext
+  ac_cv_build_exeext=
+  if AC_TRY_EVAL(ac_build_link); then
+    for file in conftest.*; do
+      case $file in
+      *.c | *.o | *.obj | *.dSYM) ;;
+      *) ac_cv_build_exeext=`echo $file | sed -e s/conftest//` ;;
+      esac
+    done
+  else
+    AC_MSG_ERROR([installation or configuration problem: compiler cannot create executables.])
+  fi
+  rm -f conftest*
+  test x"${ac_cv_build_exeext}" = x && ac_cv_build_exeext=blank
+fi])
+BUILD_EXEEXT=""
+test x"${ac_cv_build_exeext}" != xblank && BUILD_EXEEXT=${ac_cv_build_exeext}
+AC_MSG_RESULT(${ac_cv_build_exeext})
+ac_build_exeext=$BUILD_EXEEXT
+AC_SUBST(BUILD_EXEEXT)])
diff --git a/projects/sample/autoconf/m4/c_printf_a.m4 b/projects/sample/autoconf/m4/c_printf_a.m4
new file mode 100644
index 000000000000..61bac8c9dd11
--- /dev/null
+++ b/projects/sample/autoconf/m4/c_printf_a.m4
@@ -0,0 +1,31 @@
+#
+# Determine if the printf() functions have the %a format character.
+# This is modified from:
+# http://www.gnu.org/software/ac-archive/htmldoc/ac_cxx_have_ext_slist.html
+AC_DEFUN([AC_C_PRINTF_A],
+[AC_CACHE_CHECK([if printf has the %a format character],[llvm_cv_c_printf_a],
+[AC_LANG_PUSH([C])
+ AC_RUN_IFELSE([
+  AC_LANG_PROGRAM([[
+#include <stdio.h>
+#include <stdlib.h>
+]],[[
+volatile double A, B;
+char Buffer[100];
+A = 1;
+A /= 10.0;
+sprintf(Buffer, "%a", A);
+B = atof(Buffer);
+if (A != B)
+  return (1);
+if (A != 0x1.999999999999ap-4)
+  return (1);
+return (0);]])],
+  llvm_cv_c_printf_a=yes,
+  llvmac_cv_c_printf_a=no,
+  llvmac_cv_c_printf_a=no)
+ AC_LANG_POP([C])])
+ if test "$llvm_cv_c_printf_a" = "yes"; then
+   AC_DEFINE([HAVE_PRINTF_A],[1],[Define to have the %a format string])
+ fi
+])
diff --git a/projects/sample/autoconf/m4/check_gnu_make.m4 b/projects/sample/autoconf/m4/check_gnu_make.m4
new file mode 100644
index 000000000000..7355e1c85bb9
--- /dev/null
+++ b/projects/sample/autoconf/m4/check_gnu_make.m4
@@ -0,0 +1,26 @@
+#
+# Check for GNU Make.  This is originally from
+# http://www.gnu.org/software/ac-archive/htmldoc/check_gnu_make.html
+#
+AC_DEFUN([AC_CHECK_GNU_MAKE],
+[AC_CACHE_CHECK([for GNU make],[llvm_cv_gnu_make_command],
+dnl Search all the common names for GNU make
+[llvm_cv_gnu_make_command=''
+ for a in "$MAKE" make gmake gnumake ; do
+  if test -z "$a" ; then continue ; fi ;
+  if  ( sh -c "$a --version" 2> /dev/null | grep GNU 2>&1 > /dev/null ) 
+  then
+   llvm_cv_gnu_make_command=$a ;
+   break;
+  fi
+ done])
+dnl If there was a GNU version, then set @ifGNUmake@ to the empty string, 
+dnl '#' otherwise
+ if test "x$llvm_cv_gnu_make_command" != "x"  ; then
+   ifGNUmake='' ;
+ else
+   ifGNUmake='#' ;
+   AC_MSG_RESULT("Not found");
+ fi
+ AC_SUBST(ifGNUmake)
+])
diff --git a/projects/sample/autoconf/m4/config_makefile.m4 b/projects/sample/autoconf/m4/config_makefile.m4
new file mode 100644
index 000000000000..d9bfcb852894
--- /dev/null
+++ b/projects/sample/autoconf/m4/config_makefile.m4
@@ -0,0 +1,9 @@
+#
+# Configure a Makefile without clobbering it if it exists and is not out of
+# date.  This macro is unique to LLVM.
+#
+AC_DEFUN([AC_CONFIG_MAKEFILE],
+[AC_CONFIG_COMMANDS($1,
+  [${srcdir}/autoconf/mkinstalldirs `dirname $1`
+   ${SHELL} ${srcdir}/autoconf/install-sh -m 0644 -c ${srcdir}/$1 $1])
+])
diff --git a/projects/sample/autoconf/m4/config_project.m4 b/projects/sample/autoconf/m4/config_project.m4
new file mode 100644
index 000000000000..eea7faf165c2
--- /dev/null
+++ b/projects/sample/autoconf/m4/config_project.m4
@@ -0,0 +1,14 @@
+#
+# Provide the arguments and other processing needed for an LLVM project
+#
+AC_DEFUN([LLVM_CONFIG_PROJECT],
+  [AC_ARG_WITH([llvmsrc],
+    AS_HELP_STRING([--with-llvmsrc],[Location of LLVM Source Code]),
+    [llvm_src="$withval"],[llvm_src="]$1["])
+  AC_SUBST(LLVM_SRC,$llvm_src)
+  AC_ARG_WITH([llvmobj],
+    AS_HELP_STRING([--with-llvmobj],[Location of LLVM Object Code]),
+    [llvm_obj="$withval"],[llvm_obj="]$2["])
+  AC_SUBST(LLVM_OBJ,$llvm_obj)
+  AC_CONFIG_COMMANDS([setup],,[llvm_src="${LLVM_SRC}"])
+])
diff --git a/projects/sample/autoconf/m4/cxx_flag_check.m4 b/projects/sample/autoconf/m4/cxx_flag_check.m4
new file mode 100644
index 000000000000..62454b7147f9
--- /dev/null
+++ b/projects/sample/autoconf/m4/cxx_flag_check.m4
@@ -0,0 +1,2 @@
+AC_DEFUN([CXX_FLAG_CHECK],
+  [AC_SUBST($1, `$CXX -Werror $2 -fsyntax-only -xc /dev/null 2>/dev/null && echo $2`)])
diff --git a/projects/sample/autoconf/m4/find_std_program.m4 b/projects/sample/autoconf/m4/find_std_program.m4
new file mode 100644
index 000000000000..c789df8e641d
--- /dev/null
+++ b/projects/sample/autoconf/m4/find_std_program.m4
@@ -0,0 +1,118 @@
+dnl Check for a standard program that has a bin, include and lib directory
+dnl 
+dnl Parameters:
+dnl   $1 - prefix directory to check
+dnl   $2 - program name to check
+dnl   $3 - header file to check 
+dnl   $4 - library file to check 
+AC_DEFUN([CHECK_STD_PROGRAM],
+[m4_define([allcapsname],translit($2,a-z,A-Z))
+if test -n "$1" -a -d "$1" -a -n "$2" -a -d "$1/bin" -a -x "$1/bin/$2" ; then
+  AC_SUBST([USE_]allcapsname(),["USE_]allcapsname()[ = 1"])
+  AC_SUBST(allcapsname(),[$1/bin/$2])
+  AC_SUBST(allcapsname()[_BIN],[$1/bin])
+  AC_SUBST(allcapsname()[_DIR],[$1])
+  if test -n "$3" -a -d "$1/include" -a -f "$1/include/$3" ; then 
+    AC_SUBST(allcapsname()[_INC],[$1/include])
+  fi
+  if test -n "$4" -a -d "$1/lib" -a -f "$1/lib/$4" ; then
+    AC_SUBST(allcapsname()[_LIB],[$1/lib])
+  fi
+fi
+])
+
+dnl Find a program via --with options, in the path, or well known places
+dnl
+dnl Parameters:
+dnl   $1 - program's executable name
+dnl   $2 - header file name to check (optional)
+dnl   $3 - library file name to check (optional)
+dnl   $4 - alternate (long) name for the program
+AC_DEFUN([FIND_STD_PROGRAM],
+[m4_define([allcapsname],translit($1,a-z,A-Z))
+m4_define([stdprog_long_name],ifelse($4,,translit($1,[ !@#$%^&*()-+={}[]:;"',./?],[-]),translit($4,[ !@#$%^&*()-+={}[]:;"',./?],[-])))
+AC_MSG_CHECKING([for ]stdprog_long_name()[ bin/lib/include locations])
+AC_ARG_WITH($1,
+  AS_HELP_STRING([--with-]stdprog_long_name()[=DIR],
+  [Specify that the ]stdprog_long_name()[ install prefix is DIR]),
+  $1[pfxdir=$withval],$1[pfxdir=nada])
+AC_ARG_WITH($1[-bin],
+  AS_HELP_STRING([--with-]stdprog_long_name()[-bin=DIR],
+  [Specify that the ]stdprog_long_name()[ binary is in DIR]),
+    $1[bindir=$withval],$1[bindir=nada])
+AC_ARG_WITH($1[-lib],
+  AS_HELP_STRING([--with-]stdprog_long_name()[-lib=DIR],
+  [Specify that ]stdprog_long_name()[ libraries are in DIR]),
+  $1[libdir=$withval],$1[libdir=nada])
+AC_ARG_WITH($1[-inc],
+  AS_HELP_STRING([--with-]stdprog_long_name()[-inc=DIR],
+  [Specify that the ]stdprog_long_name()[ includes are in DIR]),
+  $1[incdir=$withval],$1[incdir=nada])
+eval pfxval=\$\{$1pfxdir\}
+eval binval=\$\{$1bindir\}
+eval incval=\$\{$1incdir\}
+eval libval=\$\{$1libdir\}
+if test "${pfxval}" != "nada" ; then
+  CHECK_STD_PROGRAM(${pfxval},$1,$2,$3)
+elif test "${binval}" != "nada" ; then
+  if test "${libval}" != "nada" ; then
+    if test "${incval}" != "nada" ; then
+      if test -d "${binval}" ; then
+        if test -d "${incval}" ; then
+          if test -d "${libval}" ; then
+            AC_SUBST(allcapsname(),${binval}/$1)
+            AC_SUBST(allcapsname()[_BIN],${binval})
+            AC_SUBST(allcapsname()[_INC],${incval})
+            AC_SUBST(allcapsname()[_LIB],${libval})
+            AC_SUBST([USE_]allcapsname(),["USE_]allcapsname()[ = 1"])
+            AC_MSG_RESULT([found via --with options])
+          else
+            AC_MSG_RESULT([failed])
+            AC_MSG_ERROR([The --with-]$1[-libdir value must be a directory])
+          fi
+        else
+          AC_MSG_RESULT([failed])
+          AC_MSG_ERROR([The --with-]$1[-incdir value must be a directory])
+        fi
+      else
+        AC_MSG_RESULT([failed])
+        AC_MSG_ERROR([The --with-]$1[-bindir value must be a directory])
+      fi
+    else
+      AC_MSG_RESULT([failed])
+      AC_MSG_ERROR([The --with-]$1[-incdir option must be specified])
+    fi
+  else
+    AC_MSG_RESULT([failed])
+    AC_MSG_ERROR([The --with-]$1[-libdir option must be specified])
+  fi
+else
+  tmppfxdir=`which $1 2>&1`
+  if test -n "$tmppfxdir" -a -d "${tmppfxdir%*$1}" -a \
+          -d "${tmppfxdir%*$1}/.." ; then
+    tmppfxdir=`cd "${tmppfxdir%*$1}/.." ; pwd`
+    CHECK_STD_PROGRAM($tmppfxdir,$1,$2,$3)
+    AC_MSG_RESULT([found in PATH at ]$tmppfxdir)
+  else
+    checkresult="yes"
+    eval checkval=\$\{"USE_"allcapsname()\}
+    CHECK_STD_PROGRAM([/usr],$1,$2,$3)
+    if test -z "${checkval}" ; then
+      CHECK_STD_PROGRAM([/usr/local],$1,$2,$3)
+      if test -z "${checkval}" ; then
+        CHECK_STD_PROGRAM([/sw],$1,$2,$3)
+        if test -z "${checkval}" ; then
+          CHECK_STD_PROGRAM([/opt],$1,$2,$3)
+          if test -z "${checkval}" ; then
+            CHECK_STD_PROGRAM([/],$1,$2,$3)
+            if test -z "${checkval}" ; then
+              checkresult="no"
+            fi
+          fi
+        fi
+      fi
+    fi
+    AC_MSG_RESULT($checkresult)
+  fi
+fi
+])
diff --git a/projects/sample/autoconf/m4/func_isinf.m4 b/projects/sample/autoconf/m4/func_isinf.m4
new file mode 100644
index 000000000000..5c000f8fad91
--- /dev/null
+++ b/projects/sample/autoconf/m4/func_isinf.m4
@@ -0,0 +1,36 @@
+#
+# This function determins if the the isinf function isavailable on this
+# platform.
+#
+AC_DEFUN([AC_FUNC_ISINF],[
+AC_SINGLE_CXX_CHECK([ac_cv_func_isinf_in_math_h],   
+                    [isinf], [<math.h>],
+                    [float f; isinf(f);])
+if test "$ac_cv_func_isinf_in_math_h" = "yes" ; then 
+  AC_DEFINE([HAVE_ISINF_IN_MATH_H],1,[Set to 1 if the isinf function is found in <math.h>])
+fi
+
+AC_SINGLE_CXX_CHECK([ac_cv_func_isinf_in_cmath],    
+                    [isinf], [<cmath>],
+                    [float f; isinf(f);])
+if test "$ac_cv_func_isinf_in_cmath" = "yes" ; then
+  AC_DEFINE([HAVE_ISINF_IN_CMATH],1,[Set to 1 if the isinf function is found in <cmath>])
+fi
+
+AC_SINGLE_CXX_CHECK([ac_cv_func_std_isinf_in_cmath],
+                    [std::isinf], [<cmath>],
+                    [float f; std::isinf(f);])
+if test "$ac_cv_func_std_isinf_in_cmath" = "yes" ; then 
+  AC_DEFINE([HAVE_STD_ISINF_IN_CMATH],1,[Set to 1 if the std::isinf function is found in <cmath>])
+fi
+
+AC_SINGLE_CXX_CHECK([ac_cv_func_finite_in_ieeefp_h],
+                    [finite], [<ieeefp.h>],
+                    [float f; finite(f);])
+if test "$ac_cv_func_finite_in_ieeefp_h" = "yes" ; then
+  AC_DEFINE([HAVE_FINITE_IN_IEEEFP_H],1,[Set to 1 if the finite function is found in <ieeefp.h>])
+fi
+
+])
+
+
diff --git a/projects/sample/autoconf/m4/func_isnan.m4 b/projects/sample/autoconf/m4/func_isnan.m4
new file mode 100644
index 000000000000..eb5ca0daeb5e
--- /dev/null
+++ b/projects/sample/autoconf/m4/func_isnan.m4
@@ -0,0 +1,27 @@
+#
+# This function determines if the isnan function is available on this
+# platform.
+#
+AC_DEFUN([AC_FUNC_ISNAN],[
+AC_SINGLE_CXX_CHECK([ac_cv_func_isnan_in_math_h],   
+                    [isnan], [<math.h>],
+                    [float f; isnan(f);])
+
+if test "$ac_cv_func_isnan_in_math_h" = "yes" ; then
+  AC_DEFINE([HAVE_ISNAN_IN_MATH_H],1,[Set to 1 if the isnan function is found in <math.h>])
+fi
+
+AC_SINGLE_CXX_CHECK([ac_cv_func_isnan_in_cmath],    
+                    [isnan], [<cmath>],
+                    [float f; isnan(f);])
+if test "$ac_cv_func_isnan_in_cmath" = "yes" ; then
+  AC_DEFINE([HAVE_ISNAN_IN_CMATH],1,[Set to 1 if the isnan function is found in <cmath>])
+fi
+
+AC_SINGLE_CXX_CHECK([ac_cv_func_std_isnan_in_cmath],
+                    [std::isnan], [<cmath>],
+                    [float f; std::isnan(f);])
+if test "$ac_cv_func_std_isnan_in_cmath" = "yes" ; then
+  AC_DEFINE([HAVE_STD_ISNAN_IN_CMATH],1,[Set to 1 if the std::isnan function is found in <cmath>])
+fi
+])
diff --git a/projects/sample/autoconf/m4/func_mmap_file.m4 b/projects/sample/autoconf/m4/func_mmap_file.m4
new file mode 100644
index 000000000000..372c87fbe5c6
--- /dev/null
+++ b/projects/sample/autoconf/m4/func_mmap_file.m4
@@ -0,0 +1,26 @@
+#
+# Check for the ability to mmap a file.  
+#
+AC_DEFUN([AC_FUNC_MMAP_FILE],
+[AC_CACHE_CHECK(for mmap of files,
+ac_cv_func_mmap_file,
+[ AC_LANG_PUSH([C])
+  AC_RUN_IFELSE([
+    AC_LANG_PROGRAM([[
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+]],[[
+  int fd;
+  fd = creat ("foo",0777); 
+  fd = (int) mmap (0, 1, PROT_READ, MAP_SHARED, fd, 0);
+  unlink ("foo"); 
+  return (fd != (int) MAP_FAILED);]])],
+  [ac_cv_func_mmap_file=yes],[ac_cv_func_mmap_file=no],[ac_cv_func_mmap_file=no])
+  AC_LANG_POP([C])
+])
+if test "$ac_cv_func_mmap_file" = yes; then
+   AC_DEFINE([HAVE_MMAP_FILE],[],[Define if mmap() can map files into memory])
+   AC_SUBST(MMAP_FILE,[yes])
+fi
+])
diff --git a/projects/sample/autoconf/m4/header_mmap_anonymous.m4 b/projects/sample/autoconf/m4/header_mmap_anonymous.m4
new file mode 100644
index 000000000000..2270d29557b6
--- /dev/null
+++ b/projects/sample/autoconf/m4/header_mmap_anonymous.m4
@@ -0,0 +1,21 @@
+#
+# Check for anonymous mmap macros.  This is modified from
+# http://www.gnu.org/software/ac-archive/htmldoc/ac_cxx_have_ext_slist.html
+#
+AC_DEFUN([AC_HEADER_MMAP_ANONYMOUS],
+[AC_CACHE_CHECK(for MAP_ANONYMOUS vs. MAP_ANON,
+ac_cv_header_mmap_anon,
+[ AC_LANG_PUSH([C])
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+    [[#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>]],
+  [[mmap (0, 1, PROT_READ, MAP_ANONYMOUS, -1, 0); return (0);]])],
+  ac_cv_header_mmap_anon=yes, 
+  ac_cv_header_mmap_anon=no)
+  AC_LANG_POP([C])
+])
+if test "$ac_cv_header_mmap_anon" = yes; then
+   AC_DEFINE([HAVE_MMAP_ANONYMOUS],[1],[Define if mmap() uses MAP_ANONYMOUS to map anonymous pages, or undefine if it uses MAP_ANON])
+fi
+])
diff --git a/projects/sample/autoconf/m4/huge_val.m4 b/projects/sample/autoconf/m4/huge_val.m4
new file mode 100644
index 000000000000..7ef9dcae6943
--- /dev/null
+++ b/projects/sample/autoconf/m4/huge_val.m4
@@ -0,0 +1,20 @@
+#
+# This function determins if the the HUGE_VAL macro is compilable with the 
+# -pedantic switch or not. XCode < 2.4.1 doesn't get it right.
+#
+AC_DEFUN([AC_HUGE_VAL_CHECK],[
+  AC_CACHE_CHECK([for HUGE_VAL sanity], [ac_cv_huge_val_sanity],[
+    AC_LANG_PUSH([C++])
+    ac_save_CXXFLAGS=$CXXFLAGS
+    CXXFLAGS+=" -pedantic"
+    AC_RUN_IFELSE(
+      AC_LANG_PROGRAM(
+        [#include <math.h>],
+        [double x = HUGE_VAL; return x != x; ]),
+      [ac_cv_huge_val_sanity=yes],[ac_cv_huge_val_sanity=no],
+      [ac_cv_huge_val_sanity=yes])
+    CXXFLAGS=$ac_save_CXXFLAGS
+    AC_LANG_POP([C++])
+    ])
+  AC_SUBST(HUGE_VAL_SANITY,$ac_cv_huge_val_sanity)
+])
diff --git a/projects/sample/autoconf/m4/libtool.m4 b/projects/sample/autoconf/m4/libtool.m4
new file mode 100644
index 000000000000..36ac3d15def6
--- /dev/null
+++ b/projects/sample/autoconf/m4/libtool.m4
@@ -0,0 +1,6389 @@
+# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
+## Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005
+## Free Software Foundation, Inc.
+## Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+##
+## This file is free software; the Free Software Foundation gives
+## unlimited permission to copy and/or distribute it, with or without
+## modifications, as long as this notice is preserved.
+
+# serial 48 AC_PROG_LIBTOOL
+
+
+# AC_PROVIDE_IFELSE(MACRO-NAME, IF-PROVIDED, IF-NOT-PROVIDED)
+# -----------------------------------------------------------
+# If this macro is not defined by Autoconf, define it here.
+m4_ifdef([AC_PROVIDE_IFELSE],
+         [],
+         [m4_define([AC_PROVIDE_IFELSE],
+	         [m4_ifdef([AC_PROVIDE_$1],
+		           [$2], [$3])])])
+
+
+# AC_PROG_LIBTOOL
+# ---------------
+AC_DEFUN([AC_PROG_LIBTOOL],
+[AC_REQUIRE([_AC_PROG_LIBTOOL])dnl
+dnl If AC_PROG_CXX has already been expanded, run AC_LIBTOOL_CXX
+dnl immediately, otherwise, hook it in at the end of AC_PROG_CXX.
+  AC_PROVIDE_IFELSE([AC_PROG_CXX],
+    [AC_LIBTOOL_CXX],
+    [define([AC_PROG_CXX], defn([AC_PROG_CXX])[AC_LIBTOOL_CXX
+  ])])
+dnl And a similar setup for Fortran 77 support
+  AC_PROVIDE_IFELSE([AC_PROG_F77],
+    [AC_LIBTOOL_F77],
+    [define([AC_PROG_F77], defn([AC_PROG_F77])[AC_LIBTOOL_F77
+])])
+
+dnl Quote A][M_PROG_GCJ so that aclocal doesn't bring it in needlessly.
+dnl If either AC_PROG_GCJ or A][M_PROG_GCJ have already been expanded, run
+dnl AC_LIBTOOL_GCJ immediately, otherwise, hook it in at the end of both.
+  AC_PROVIDE_IFELSE([AC_PROG_GCJ],
+    [AC_LIBTOOL_GCJ],
+    [AC_PROVIDE_IFELSE([A][M_PROG_GCJ],
+      [AC_LIBTOOL_GCJ],
+      [AC_PROVIDE_IFELSE([LT_AC_PROG_GCJ],
+	[AC_LIBTOOL_GCJ],
+      [ifdef([AC_PROG_GCJ],
+	     [define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[AC_LIBTOOL_GCJ])])
+       ifdef([A][M_PROG_GCJ],
+	     [define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[AC_LIBTOOL_GCJ])])
+       ifdef([LT_AC_PROG_GCJ],
+	     [define([LT_AC_PROG_GCJ],
+		defn([LT_AC_PROG_GCJ])[AC_LIBTOOL_GCJ])])])])
+])])# AC_PROG_LIBTOOL
+
+
+# _AC_PROG_LIBTOOL
+# ----------------
+AC_DEFUN([_AC_PROG_LIBTOOL],
+[AC_REQUIRE([AC_LIBTOOL_SETUP])dnl
+AC_BEFORE([$0],[AC_LIBTOOL_CXX])dnl
+AC_BEFORE([$0],[AC_LIBTOOL_F77])dnl
+AC_BEFORE([$0],[AC_LIBTOOL_GCJ])dnl
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ac_aux_dir/ltmain.sh"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/mklib'
+AC_SUBST(LIBTOOL)dnl
+
+# Prevent multiple expansion
+define([AC_PROG_LIBTOOL], [])
+])# _AC_PROG_LIBTOOL
+
+
+# AC_LIBTOOL_SETUP
+# ----------------
+AC_DEFUN([AC_LIBTOOL_SETUP],
+[AC_PREREQ(2.60)dnl
+AC_REQUIRE([AC_ENABLE_SHARED])dnl
+AC_REQUIRE([AC_ENABLE_STATIC])dnl
+AC_REQUIRE([AC_ENABLE_FAST_INSTALL])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_PROG_LD])dnl
+AC_REQUIRE([AC_PROG_LD_RELOAD_FLAG])dnl
+AC_REQUIRE([AC_PROG_NM])dnl
+
+AC_REQUIRE([AC_PROG_LN_S])dnl
+AC_REQUIRE([AC_DEPLIBS_CHECK_METHOD])dnl
+# Autoconf 2.13's AC_OBJEXT and AC_EXEEXT macros only works for C compilers!
+AC_REQUIRE([AC_OBJEXT])dnl
+AC_REQUIRE([AC_EXEEXT])dnl
+dnl
+
+AC_LIBTOOL_SYS_MAX_CMD_LEN
+AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE
+AC_LIBTOOL_OBJDIR
+
+AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl
+_LT_AC_PROG_ECHO_BACKSLASH
+
+case $host_os in
+aix3*)
+  # AIX sometimes has problems with the GCC collect2 program.  For some
+  # reason, if we set the COLLECT_NAMES environment variable, the problems
+  # vanish in a puff of smoke.
+  if test "X${COLLECT_NAMES+set}" != Xset; then
+    COLLECT_NAMES=
+    export COLLECT_NAMES
+  fi
+  ;;
+esac
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e 1s/^X//'
+[sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g']
+
+# Same as above, but do not quote variable references.
+[double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g']
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# Sed substitution to avoid accidental globbing in evaled expressions
+no_glob_subst='s/\*/\\\*/g'
+
+# Constants:
+rm="rm -f"
+
+# Global variables:
+default_ofile=mklib
+can_build_shared=yes
+
+# All known linkers require a `.a' archive for static linking (except MSVC,
+# which needs '.lib').
+libext=a
+ltmain="$ac_aux_dir/ltmain.sh"
+ofile="$default_ofile"
+with_gnu_ld="$lt_cv_prog_gnu_ld"
+
+AC_CHECK_TOOL(AR, ar, false)
+AC_CHECK_TOOL(RANLIB, ranlib, :)
+AC_CHECK_TOOL(STRIP, strip, :)
+
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+
+# Set sane defaults for various variables
+test -z "$AR" && AR=ar
+test -z "$AR_FLAGS" && AR_FLAGS=cru
+test -z "$AS" && AS=as
+test -z "$CC" && CC=cc
+test -z "$LTCC" && LTCC=$CC
+test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+test -z "$LD" && LD=ld
+test -z "$LN_S" && LN_S="ln -s"
+test -z "$MAGIC_CMD" && MAGIC_CMD=file
+test -z "$NM" && NM=nm
+test -z "$SED" && SED=sed
+test -z "$OBJDUMP" && OBJDUMP=objdump
+test -z "$RANLIB" && RANLIB=:
+test -z "$STRIP" && STRIP=:
+test -z "$ac_objext" && ac_objext=o
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+if test -n "$RANLIB"; then
+  case $host_os in
+  openbsd*)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib"
+    ;;
+  *)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib"
+    ;;
+  esac
+  old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
+fi
+
+_LT_CC_BASENAME([$compiler])
+
+# Only perform the check for file, if the check method requires it
+case $deplibs_check_method in
+file_magic*)
+  if test "$file_magic_cmd" = '$MAGIC_CMD'; then
+    AC_PATH_MAGIC
+  fi
+  ;;
+esac
+
+AC_PROVIDE_IFELSE([AC_LIBTOOL_DLOPEN], enable_dlopen=yes, enable_dlopen=no)
+AC_PROVIDE_IFELSE([AC_LIBTOOL_WIN32_DLL],
+enable_win32_dll=yes, enable_win32_dll=no)
+
+AC_ARG_ENABLE([libtool-lock],
+    [AS_HELP_STRING([--disable-libtool-lock],[avoid locking (might break parallel builds)])])
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+AC_ARG_WITH([pic],
+    [AS_HELP_STRING([--with-pic],[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
+    [pic_mode="$withval"],
+    [pic_mode=default])
+test -z "$pic_mode" && pic_mode=default
+
+# Use C for the default configuration in the libtool script
+tagname=
+AC_LIBTOOL_LANG_C_CONFIG
+_LT_AC_TAGCONFIG
+])# AC_LIBTOOL_SETUP
+
+
+# _LT_AC_SYS_COMPILER
+# -------------------
+AC_DEFUN([_LT_AC_SYS_COMPILER],
+[AC_REQUIRE([AC_PROG_CC])dnl
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+])# _LT_AC_SYS_COMPILER
+
+
+# _LT_CC_BASENAME(CC)
+# -------------------
+# Calculate cc_basename.  Skip known compiler wrappers and cross-prefix.
+AC_DEFUN([_LT_CC_BASENAME],
+[for cc_temp in $1""; do
+  case $cc_temp in
+    compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;;
+    distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$echo "X$cc_temp" | $Xsed -e 's%.*/%%' -e "s%^$host_alias-%%"`
+])
+
+
+# _LT_COMPILER_BOILERPLATE
+# ------------------------
+# Check for compiler boilerplate output or warnings with
+# the simple compiler test code.
+AC_DEFUN([_LT_COMPILER_BOILERPLATE],
+[ac_outfile=conftest.$ac_objext
+printf "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$rm conftest*
+])# _LT_COMPILER_BOILERPLATE
+
+
+# _LT_LINKER_BOILERPLATE
+# ----------------------
+# Check for linker boilerplate output or warnings with
+# the simple link test code.
+AC_DEFUN([_LT_LINKER_BOILERPLATE],
+[ac_outfile=conftest.$ac_objext
+printf "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$rm conftest*
+])# _LT_LINKER_BOILERPLATE
+
+
+# _LT_AC_SYS_LIBPATH_AIX
+# ----------------------
+# Links a minimal program and checks the executable
+# for the system default hardcoded library path. In most cases,
+# this is /usr/lib:/lib, but when the MPI compilers are used
+# the location of the communication and MPI libs are included too.
+# If we don't find anything, use the default library path according
+# to the aix ld manual.
+AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX],
+[AC_LINK_IFELSE(AC_LANG_PROGRAM,[
+aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
+}'`
+# Check for a 64-bit object if we didn't find anything.
+if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
+}'`; fi],[])
+if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi
+])# _LT_AC_SYS_LIBPATH_AIX
+
+
+# _LT_AC_SHELL_INIT(ARG)
+# ----------------------
+AC_DEFUN([_LT_AC_SHELL_INIT],
+[ifdef([AC_DIVERSION_NOTICE],
+	     [AC_DIVERT_PUSH(AC_DIVERSION_NOTICE)],
+	 [AC_DIVERT_PUSH(NOTICE)])
+$1
+AC_DIVERT_POP
+])# _LT_AC_SHELL_INIT
+
+
+# _LT_AC_PROG_ECHO_BACKSLASH
+# --------------------------
+# Add some code to the start of the generated configure script which
+# will find an echo command which doesn't interpret backslashes.
+AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH],
+[_LT_AC_SHELL_INIT([
+# Check that we are running under the correct shell.
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+case X$ECHO in
+X*--fallback-echo)
+  # Remove one level of quotation (which was required for Make).
+  ECHO=`echo "$ECHO" | sed 's,\\\\\[$]\\[$]0,'[$]0','`
+  ;;
+esac
+
+echo=${ECHO-echo}
+if test "X[$]1" = X--no-reexec; then
+  # Discard the --no-reexec flag, and continue.
+  shift
+elif test "X[$]1" = X--fallback-echo; then
+  # Avoid inline document here, it may be left over
+  :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t' ; then
+  # Yippee, $echo works!
+  :
+else
+  # Restart under the correct shell.
+  exec $SHELL "[$]0" --no-reexec ${1+"[$]@"}
+fi
+
+if test "X[$]1" = X--fallback-echo; then
+  # used as fallback echo
+  shift
+  cat <<EOF
+[$]*
+EOF
+  exit 0
+fi
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+if test -z "$ECHO"; then
+if test "X${echo_test_string+set}" != Xset; then
+# find a string as large as possible, as long as the shell can cope with it
+  for cmd in 'sed 50q "[$]0"' 'sed 20q "[$]0"' 'sed 10q "[$]0"' 'sed 2q "[$]0"' 'echo test'; do
+    # expected sizes: less than 2Kb, 1Kb, 512 bytes, 16 bytes, ...
+    if (echo_test_string=`eval $cmd`) 2>/dev/null &&
+       echo_test_string=`eval $cmd` &&
+       (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null
+    then
+      break
+    fi
+  done
+fi
+
+if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+   echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+   test "X$echo_testing_string" = "X$echo_test_string"; then
+  :
+else
+  # The Solaris, AIX, and Digital Unix default echo programs unquote
+  # backslashes.  This makes it impossible to quote backslashes using
+  #   echo "$something" | sed 's/\\/\\\\/g'
+  #
+  # So, first we look for a working echo in the user's PATH.
+
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for dir in $PATH /usr/ucb; do
+    IFS="$lt_save_ifs"
+    if (test -f $dir/echo || test -f $dir/echo$ac_exeext) &&
+       test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' &&
+       echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` &&
+       test "X$echo_testing_string" = "X$echo_test_string"; then
+      echo="$dir/echo"
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+
+  if test "X$echo" = Xecho; then
+    # We didn't find a better echo, so look for alternatives.
+    if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' &&
+       echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` &&
+       test "X$echo_testing_string" = "X$echo_test_string"; then
+      # This shell has a builtin print -r that does the trick.
+      echo='print -r'
+    elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) &&
+	 test "X$CONFIG_SHELL" != X/bin/ksh; then
+      # If we have ksh, try running configure again with it.
+      ORIGINAL_CONFIG_SHELL=${CONFIG_SHELL-/bin/sh}
+      export ORIGINAL_CONFIG_SHELL
+      CONFIG_SHELL=/bin/ksh
+      export CONFIG_SHELL
+      exec $CONFIG_SHELL "[$]0" --no-reexec ${1+"[$]@"}
+    else
+      # Try using printf.
+      echo='printf %s\n'
+      if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+	 echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+	 test "X$echo_testing_string" = "X$echo_test_string"; then
+	# Cool, printf works
+	:
+      elif echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` &&
+	   test "X$echo_testing_string" = 'X\t' &&
+	   echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+	   test "X$echo_testing_string" = "X$echo_test_string"; then
+	CONFIG_SHELL=$ORIGINAL_CONFIG_SHELL
+	export CONFIG_SHELL
+	SHELL="$CONFIG_SHELL"
+	export SHELL
+	echo="$CONFIG_SHELL [$]0 --fallback-echo"
+      elif echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` &&
+	   test "X$echo_testing_string" = 'X\t' &&
+	   echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+	   test "X$echo_testing_string" = "X$echo_test_string"; then
+	echo="$CONFIG_SHELL [$]0 --fallback-echo"
+      else
+	# maybe with a smaller string...
+	prev=:
+
+	for cmd in 'echo test' 'sed 2q "[$]0"' 'sed 10q "[$]0"' 'sed 20q "[$]0"' 'sed 50q "[$]0"'; do
+	  if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null
+	  then
+	    break
+	  fi
+	  prev="$cmd"
+	done
+
+	if test "$prev" != 'sed 50q "[$]0"'; then
+	  echo_test_string=`eval $prev`
+	  export echo_test_string
+	  exec ${ORIGINAL_CONFIG_SHELL-${CONFIG_SHELL-/bin/sh}} "[$]0" ${1+"[$]@"}
+	else
+	  # Oops.  We lost completely, so just stick with echo.
+	  echo=echo
+	fi
+      fi
+    fi
+  fi
+fi
+fi
+
+# Copy echo and quote the copy suitably for passing to libtool from
+# the Makefile, instead of quoting the original, which is used later.
+ECHO=$echo
+if test "X$ECHO" = "X$CONFIG_SHELL [$]0 --fallback-echo"; then
+   ECHO="$CONFIG_SHELL \\\$\[$]0 --fallback-echo"
+fi
+
+AC_SUBST(ECHO)
+])])# _LT_AC_PROG_ECHO_BACKSLASH
+
+
+# _LT_AC_LOCK
+# -----------
+AC_DEFUN([_LT_AC_LOCK],
+[AC_ARG_ENABLE([libtool-lock],
+    [AS_HELP_STRING([--disable-libtool-lock],[avoid locking (might break parallel builds)])])
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case $host in
+ia64-*-hpux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.$ac_objext` in
+    *ELF-32*)
+      HPUX_IA64_MODE="32"
+      ;;
+    *ELF-64*)
+      HPUX_IA64_MODE="64"
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+*-*-irix6*)
+  # Find out which ABI we are using.
+  echo '[#]line __oline__ "configure"' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+   if test "$lt_cv_prog_gnu_ld" = yes; then
+    case `/usr/bin/file conftest.$ac_objext` in
+    *32-bit*)
+      LD="${LD-ld} -melf32bsmip"
+      ;;
+    *N32*)
+      LD="${LD-ld} -melf32bmipn32"
+      ;;
+    *64-bit*)
+      LD="${LD-ld} -melf64bmip"
+      ;;
+    esac
+   else
+    case `/usr/bin/file conftest.$ac_objext` in
+    *32-bit*)
+      LD="${LD-ld} -32"
+      ;;
+    *N32*)
+      LD="${LD-ld} -n32"
+      ;;
+    *64-bit*)
+      LD="${LD-ld} -64"
+      ;;
+    esac
+   fi
+  fi
+  rm -rf conftest*
+  ;;
+
+x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.o` in
+    *32-bit*)
+      case $host in
+        x86_64-*linux*)
+          LD="${LD-ld} -m elf_i386"
+          ;;
+        ppc64-*linux*|powerpc64-*linux*)
+          LD="${LD-ld} -m elf32ppclinux"
+          ;;
+        s390x-*linux*)
+          LD="${LD-ld} -m elf_s390"
+          ;;
+        sparc64-*linux*)
+          LD="${LD-ld} -m elf32_sparc"
+          ;;
+      esac
+      ;;
+    *64-bit*)
+      case $host in
+        x86_64-*linux*)
+          LD="${LD-ld} -m elf_x86_64"
+          ;;
+        ppc*-*linux*|powerpc*-*linux*)
+          LD="${LD-ld} -m elf64ppc"
+          ;;
+        s390*-*linux*)
+          LD="${LD-ld} -m elf64_s390"
+          ;;
+        sparc*-*linux*)
+          LD="${LD-ld} -m elf64_sparc"
+          ;;
+      esac
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+*-*-sco3.2v5*)
+  # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -belf"
+  AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf,
+    [AC_LANG_PUSH(C)
+     AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no])
+     AC_LANG_POP])
+  if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+    # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+    CFLAGS="$SAVE_CFLAGS"
+  fi
+  ;;
+sparc*-*solaris*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.o` in
+    *64-bit*)
+      case $lt_cv_prog_gnu_ld in
+      yes*) LD="${LD-ld} -m elf64_sparc" ;;
+      *)    LD="${LD-ld} -64" ;;
+      esac
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+AC_PROVIDE_IFELSE([AC_LIBTOOL_WIN32_DLL],
+[*-*-cygwin* | *-*-mingw* | *-*-pw32*)
+  AC_CHECK_TOOL(DLLTOOL, dlltool, false)
+  AC_CHECK_TOOL(AS, as, false)
+  AC_CHECK_TOOL(OBJDUMP, objdump, false)
+  ;;
+  ])
+esac
+
+need_locks="$enable_libtool_lock"
+
+])# _LT_AC_LOCK
+
+
+# AC_LIBTOOL_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
+#		[OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE])
+# ----------------------------------------------------------------
+# Check whether the given compiler option works
+AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION],
+[AC_REQUIRE([LT_AC_PROG_SED])
+AC_CACHE_CHECK([$1], [$2],
+  [$2=no
+  ifelse([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4])
+   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="$3"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:__oline__: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&AS_MESSAGE_LOG_FD
+   echo "$as_me:__oline__: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       $2=yes
+     fi
+   fi
+   $rm conftest*
+])
+
+if test x"[$]$2" = xyes; then
+    ifelse([$5], , :, [$5])
+else
+    ifelse([$6], , :, [$6])
+fi
+])# AC_LIBTOOL_COMPILER_OPTION
+
+
+# AC_LIBTOOL_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
+#                          [ACTION-SUCCESS], [ACTION-FAILURE])
+# ------------------------------------------------------------
+# Check whether the given compiler option works
+AC_DEFUN([AC_LIBTOOL_LINKER_OPTION],
+[AC_CACHE_CHECK([$1], [$2],
+  [$2=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS $3"
+   printf "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&AS_MESSAGE_LOG_FD
+       $echo "X$_lt_linker_boilerplate" | $Xsed -e '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         $2=yes
+       fi
+     else
+       $2=yes
+     fi
+   fi
+   $rm conftest*
+   LDFLAGS="$save_LDFLAGS"
+])
+
+if test x"[$]$2" = xyes; then
+    ifelse([$4], , :, [$4])
+else
+    ifelse([$5], , :, [$5])
+fi
+])# AC_LIBTOOL_LINKER_OPTION
+
+
+# AC_LIBTOOL_SYS_MAX_CMD_LEN
+# --------------------------
+AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN],
+[# find the maximum length of command line arguments
+AC_MSG_CHECKING([the maximum length of command line arguments])
+AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
+  i=0
+  teststring="ABCD"
+
+  case $build_os in
+  msdosdjgpp*)
+    # On DJGPP, this test can blow up pretty badly due to problems in libc
+    # (any single argument exceeding 2000 bytes causes a buffer overrun
+    # during glob expansion).  Even if it were fixed, the result of this
+    # check would be larger than it should be.
+    lt_cv_sys_max_cmd_len=12288;    # 12K is about right
+    ;;
+
+  gnu*)
+    # Under GNU Hurd, this test is not required because there is
+    # no limit to the length of command line arguments.
+    # Libtool will interpret -1 as no limit whatsoever
+    lt_cv_sys_max_cmd_len=-1;
+    ;;
+
+  cygwin* | mingw*)
+    # On Win9x/ME, this test blows up -- it succeeds, but takes
+    # about 5 minutes as the teststring grows exponentially.
+    # Worse, since 9x/ME are not pre-emptively multitasking,
+    # you end up with a "frozen" computer, even though with patience
+    # the test eventually succeeds (with a max line length of 256k).
+    # Instead, let's just punt: use the minimum linelength reported by
+    # all of the supported platforms: 8192 (on NT/2K/XP).
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  amigaos*)
+    # On AmigaOS with pdksh, this test takes hours, literally.
+    # So we just punt and use a minimum line length of 8192.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
+    # This has been around since 386BSD, at least.  Likely further.
+    if test -x /sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
+    elif test -x /usr/sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax`
+    else
+      lt_cv_sys_max_cmd_len=65536	# usable default for all BSDs
+    fi
+    # And add a safety zone
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    ;;
+
+  interix*)
+    # We know the value 262144 and hardcode it with a safety zone (like BSD)
+    lt_cv_sys_max_cmd_len=196608
+    ;;
+
+  osf*)
+    # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
+    # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
+    # nice to cause kernel panics so lets avoid the loop below.
+    # First set a reasonable default.
+    lt_cv_sys_max_cmd_len=16384
+    #
+    if test -x /sbin/sysconfig; then
+      case `/sbin/sysconfig -q proc exec_disable_arg_limit` in
+        *1*) lt_cv_sys_max_cmd_len=-1 ;;
+      esac
+    fi
+    ;;
+  sco3.2v5*)
+    lt_cv_sys_max_cmd_len=102400
+    ;;
+  sysv5* | sco5v6* | sysv4.2uw2*)
+    kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
+    if test -n "$kargmax"; then
+      lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ 	]]//'`
+    else
+      lt_cv_sys_max_cmd_len=32768
+    fi
+    ;;
+  *)
+    # If test is not a shell built-in, we'll probably end up computing a
+    # maximum length that is only half of the actual maximum length, but
+    # we can't tell.
+    SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
+    while (test "X"`$SHELL [$]0 --fallback-echo "X$teststring" 2>/dev/null` \
+	       = "XX$teststring") >/dev/null 2>&1 &&
+	    new_result=`expr "X$teststring" : ".*" 2>&1` &&
+	    lt_cv_sys_max_cmd_len=$new_result &&
+	    test $i != 17 # 1/2 MB should be enough
+    do
+      i=`expr $i + 1`
+      teststring=$teststring$teststring
+    done
+    teststring=
+    # Add a significant safety factor because C++ compilers can tack on massive
+    # amounts of additional arguments before passing them to the linker.
+    # It appears as though 1/2 is a usable value.
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
+    ;;
+  esac
+])
+if test -n $lt_cv_sys_max_cmd_len ; then
+  AC_MSG_RESULT($lt_cv_sys_max_cmd_len)
+else
+  AC_MSG_RESULT(none)
+fi
+])# AC_LIBTOOL_SYS_MAX_CMD_LEN
+
+
+# _LT_AC_CHECK_DLFCN
+# ------------------
+AC_DEFUN([_LT_AC_CHECK_DLFCN],
+[AC_CHECK_HEADERS(dlfcn.h)dnl
+])# _LT_AC_CHECK_DLFCN
+
+
+# _LT_AC_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE,
+#                           ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING)
+# ---------------------------------------------------------------------
+AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF],
+[AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl
+if test "$cross_compiling" = yes; then :
+  [$4]
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<EOF
+[#line __oline__ "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL		RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL		DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL		0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW		RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW		DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW	RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW	DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW	0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" void exit (int);
+#endif
+
+void fnord() { int i=42;}
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore;
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+    exit (status);
+}]
+EOF
+  if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) $1 ;;
+      x$lt_dlneed_uscore) $2 ;;
+      x$lt_dlunknown|x*) $3 ;;
+    esac
+  else :
+    # compilation failed
+    $3
+  fi
+fi
+rm -fr conftest*
+])# _LT_AC_TRY_DLOPEN_SELF
+
+
+# AC_LIBTOOL_DLOPEN_SELF
+# ----------------------
+AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF],
+[AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl
+if test "x$enable_dlopen" != xyes; then
+  enable_dlopen=unknown
+  enable_dlopen_self=unknown
+  enable_dlopen_self_static=unknown
+else
+  lt_cv_dlopen=no
+  lt_cv_dlopen_libs=
+
+  case $host_os in
+  beos*)
+    lt_cv_dlopen="load_add_on"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ;;
+
+  mingw* | pw32*)
+    lt_cv_dlopen="LoadLibrary"
+    lt_cv_dlopen_libs=
+   ;;
+
+  cygwin*)
+    lt_cv_dlopen="dlopen"
+    lt_cv_dlopen_libs=
+   ;;
+
+  darwin*)
+  # if libdl is installed we need to link against it
+    AC_CHECK_LIB([dl], [dlopen],
+		[lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[
+    lt_cv_dlopen="dyld"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ])
+   ;;
+
+  *)
+    AC_CHECK_FUNC([shl_load],
+	  [lt_cv_dlopen="shl_load"],
+      [AC_CHECK_LIB([dld], [shl_load],
+	    [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld"],
+	[AC_CHECK_FUNC([dlopen],
+	      [lt_cv_dlopen="dlopen"],
+	  [AC_CHECK_LIB([dl], [dlopen],
+		[lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],
+	    [AC_CHECK_LIB([svld], [dlopen],
+		  [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"],
+	      [AC_CHECK_LIB([dld], [dld_link],
+		    [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"])
+	      ])
+	    ])
+	  ])
+	])
+      ])
+    ;;
+  esac
+
+  if test "x$lt_cv_dlopen" != xno; then
+    enable_dlopen=yes
+  else
+    enable_dlopen=no
+  fi
+
+  case $lt_cv_dlopen in
+  dlopen)
+    save_CPPFLAGS="$CPPFLAGS"
+    test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+
+    save_LDFLAGS="$LDFLAGS"
+    wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
+
+    save_LIBS="$LIBS"
+    LIBS="$lt_cv_dlopen_libs $LIBS"
+
+    AC_CACHE_CHECK([whether a program can dlopen itself],
+	  lt_cv_dlopen_self, [dnl
+	  _LT_AC_TRY_DLOPEN_SELF(
+	    lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes,
+	    lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross)
+    ])
+
+    if test "x$lt_cv_dlopen_self" = xyes; then
+      wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
+      AC_CACHE_CHECK([whether a statically linked program can dlopen itself],
+    	  lt_cv_dlopen_self_static, [dnl
+	  _LT_AC_TRY_DLOPEN_SELF(
+	    lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes,
+	    lt_cv_dlopen_self_static=no,  lt_cv_dlopen_self_static=cross)
+      ])
+    fi
+
+    CPPFLAGS="$save_CPPFLAGS"
+    LDFLAGS="$save_LDFLAGS"
+    LIBS="$save_LIBS"
+    ;;
+  esac
+
+  case $lt_cv_dlopen_self in
+  yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+  *) enable_dlopen_self=unknown ;;
+  esac
+
+  case $lt_cv_dlopen_self_static in
+  yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+  *) enable_dlopen_self_static=unknown ;;
+  esac
+fi
+])# AC_LIBTOOL_DLOPEN_SELF
+
+
+# AC_LIBTOOL_PROG_CC_C_O([TAGNAME])
+# ---------------------------------
+# Check to see if options -c and -o are simultaneously supported by compiler
+AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O],
+[AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl
+AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext],
+  [_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)],
+  [_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no
+   $rm -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:__oline__: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&AS_MESSAGE_LOG_FD
+   echo "$as_me:__oline__: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       _LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
+     fi
+   fi
+   chmod u+w . 2>&AS_MESSAGE_LOG_FD
+   $rm conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $rm out/ii_files/* && rmdir out/ii_files
+   $rm out/* && rmdir out
+   cd ..
+   rmdir conftest
+   $rm conftest*
+])
+])# AC_LIBTOOL_PROG_CC_C_O
+
+
+# AC_LIBTOOL_SYS_HARD_LINK_LOCKS([TAGNAME])
+# -----------------------------------------
+# Check to see if we can do hard links to lock some files if needed
+AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS],
+[AC_REQUIRE([_LT_AC_LOCK])dnl
+
+hard_links="nottested"
+if test "$_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  AC_MSG_CHECKING([if we can lock with hard links])
+  hard_links=yes
+  $rm conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  AC_MSG_RESULT([$hard_links])
+  if test "$hard_links" = no; then
+    AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe])
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+])# AC_LIBTOOL_SYS_HARD_LINK_LOCKS
+
+
+# AC_LIBTOOL_OBJDIR
+# -----------------
+AC_DEFUN([AC_LIBTOOL_OBJDIR],
+[AC_CACHE_CHECK([for objdir], [lt_cv_objdir],
+[rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+  lt_cv_objdir=.libs
+else
+  # MS-DOS does not allow filenames that begin with a dot.
+  lt_cv_objdir=_libs
+fi
+rmdir .libs 2>/dev/null])
+objdir=$lt_cv_objdir
+])# AC_LIBTOOL_OBJDIR
+
+
+# AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH([TAGNAME])
+# ----------------------------------------------
+# Check hardcoding attributes.
+AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH],
+[AC_MSG_CHECKING([how to hardcode library paths into programs])
+_LT_AC_TAGVAR(hardcode_action, $1)=
+if test -n "$_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)" || \
+   test -n "$_LT_AC_TAGVAR(runpath_var, $1)" || \
+   test "X$_LT_AC_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then
+
+  # We can hardcode non-existent directories.
+  if test "$_LT_AC_TAGVAR(hardcode_direct, $1)" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)" != no &&
+     test "$_LT_AC_TAGVAR(hardcode_minus_L, $1)" != no; then
+    # Linking always hardcodes the temporary library directory.
+    _LT_AC_TAGVAR(hardcode_action, $1)=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    _LT_AC_TAGVAR(hardcode_action, $1)=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  _LT_AC_TAGVAR(hardcode_action, $1)=unsupported
+fi
+AC_MSG_RESULT([$_LT_AC_TAGVAR(hardcode_action, $1)])
+
+if test "$_LT_AC_TAGVAR(hardcode_action, $1)" = relink; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+])# AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH
+
+
+# AC_LIBTOOL_SYS_LIB_STRIP
+# ------------------------
+AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP],
+[striplib=
+old_striplib=
+AC_MSG_CHECKING([whether stripping libraries is possible])
+if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then
+  test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+  test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+  AC_MSG_RESULT([yes])
+else
+# FIXME - insert some real tests, host_os isn't really good enough
+  case $host_os in
+   darwin*)
+       if test -n "$STRIP" ; then
+         striplib="$STRIP -x"
+         AC_MSG_RESULT([yes])
+       else
+  AC_MSG_RESULT([no])
+fi
+       ;;
+   *)
+  AC_MSG_RESULT([no])
+    ;;
+  esac
+fi
+])# AC_LIBTOOL_SYS_LIB_STRIP
+
+
+# AC_LIBTOOL_SYS_DYNAMIC_LINKER
+# -----------------------------
+# PORTME Fill in your ld.so characteristics
+AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER],
+[AC_MSG_CHECKING([dynamic linker characteristics])
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+if test "$GCC" = yes; then
+  sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
+  if echo "$sys_lib_search_path_spec" | grep ';' >/dev/null ; then
+    # if the path contains ";" then we assume it to be the separator
+    # otherwise default to the standard path separator (i.e. ":") - it is
+    # assumed that no part of a normal pathname contains ";" but that should
+    # okay in the real world where ";" in dirpaths is itself problematic.
+    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+  else
+    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
+  fi
+else
+  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+fi
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix4* | aix5*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[[01]] | aix4.[[01]].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+	   echo ' yes '
+	   echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then
+	:
+      else
+	can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  library_names_spec='$libname.ixlibrary $libname.a'
+  # Create ${libname}_ixlibrary.a entries in /sys/libs.
+  finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[[45]]*)
+  version_type=linux
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$host_os in
+  yes,cygwin* | yes,mingw* | yes,pw32*)
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i;echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $rm \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+      sys_lib_search_path_spec="/usr/lib /lib/w32api /lib /usr/local/lib"
+      ;;
+    mingw*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+      sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
+      if echo "$sys_lib_search_path_spec" | [grep ';[c-zC-Z]:/' >/dev/null]; then
+        # It is most probably a Windows format PATH printed by
+        # mingw gcc, but we are running on Cygwin. Gcc prints its search
+        # path with ; separators, and with drive letters. We can handle the
+        # drive letters (cygwin fileutils understands them), so leave them,
+        # especially as we might pass files found there to a mingw objdump,
+        # which wouldn't understand a cygwinified path. Ahh.
+        sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    ;;
+
+  *)
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    ;;
+  esac
+  dynamic_linker='Win32 ld.exe'
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${versuffix}$shared_ext ${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='.dylib'
+  # Apple's gcc prints 'gcc -print-search-dirs' doesn't operate the same.
+  if test "$GCC" = yes; then
+    sys_lib_search_path_spec=`$CC -print-search-dirs | tr "\n" "$PATH_SEPARATOR" | sed -e 's/libraries:/@libraries:/' | tr "@" "\n" | grep "^libraries:" | sed -e "s/^libraries://" -e "s,=/,/,g" -e "s,$PATH_SEPARATOR, ,g" -e "s,.*,& /lib /usr/lib /usr/local/lib,g"`
+  else
+    sys_lib_search_path_spec='/lib /usr/lib /usr/local/lib'
+  fi
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd1.*)
+  dynamic_linker=no
+  ;;
+
+kfreebsd*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='GNU ld.so'
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[[123]].*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2.*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[[01]]* | freebsdelf3.[[01]]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \
+  freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  freebsd*) # from 4.6 on
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+   hppa*64*)
+     shrext_cmds='.sl'
+     hardcode_into_libs=yes
+     dynamic_linker="$host_os dld.sl"
+     shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+     shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+     library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+     soname_spec='${libname}${release}${shared_ext}$major'
+     sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+     sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+     ;;
+   *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555.
+  postinstall_cmds='chmod 555 $lib'
+  ;;
+
+interix3*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+	if test "$lt_cv_prog_gnu_ld" = yes; then
+		version_type=linux
+	else
+		version_type=irix
+	fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be Linux ELF.
+linux*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+knetbsd*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='GNU ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+nto-qnx*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*) need_version=yes ;;
+    *)                         need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[[89]] | openbsd2.[[89]].*)
+	shlibpath_overrides_runpath=no
+	;;
+      *)
+	shlibpath_overrides_runpath=yes
+	;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+solaris*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      export_dynamic_flag_spec='${wl}-Blargedynsym'
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+    shlibpath_overrides_runpath=no
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    shlibpath_overrides_runpath=yes
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+	;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+uts4*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+AC_MSG_RESULT([$dynamic_linker])
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+])# AC_LIBTOOL_SYS_DYNAMIC_LINKER
+
+
+# _LT_AC_TAGCONFIG
+# ----------------
+AC_DEFUN([_LT_AC_TAGCONFIG],
+[AC_ARG_WITH([tags],
+    [AS_HELP_STRING([--with-tags@<:@=TAGS@:>@],[include additional configurations @<:@automatic@:>@])],
+    [tagnames="$withval"])
+
+if test -f "$ltmain" && test -n "$tagnames"; then
+  if test ! -f "${ofile}"; then
+    AC_MSG_WARN([output file `$ofile' does not exist])
+  fi
+
+  if test -z "$LTCC"; then
+    eval "`$SHELL ${ofile} --config | grep '^LTCC='`"
+    if test -z "$LTCC"; then
+      AC_MSG_WARN([output file `$ofile' does not look like a libtool script])
+    else
+      AC_MSG_WARN([using `LTCC=$LTCC', extracted from `$ofile'])
+    fi
+  fi
+  if test -z "$LTCFLAGS"; then
+    eval "`$SHELL ${ofile} --config | grep '^LTCFLAGS='`"
+  fi
+
+  # Extract list of available tagged configurations in $ofile.
+  # Note that this assumes the entire list is on one line.
+  available_tags=`grep "^available_tags=" "${ofile}" | $SED -e 's/available_tags=\(.*$\)/\1/' -e 's/\"//g'`
+
+  lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+  for tagname in $tagnames; do
+    IFS="$lt_save_ifs"
+    # Check whether tagname contains only valid characters
+    case `$echo "X$tagname" | $Xsed -e 's:[[-_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890,/]]::g'` in
+    "") ;;
+    *)  AC_MSG_ERROR([invalid tag name: $tagname])
+	;;
+    esac
+
+    if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$" < "${ofile}" > /dev/null
+    then
+      AC_MSG_ERROR([tag name "$tagname" already exists])
+    fi
+
+    # Update the list of available tags.
+    if test -n "$tagname"; then
+      echo appending configuration tag \"$tagname\" to $ofile
+
+      case $tagname in
+      CXX)
+	if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
+	    ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
+	    (test "X$CXX" != "Xg++"))) ; then
+	  AC_LIBTOOL_LANG_CXX_CONFIG
+	else
+	  tagname=""
+	fi
+	;;
+
+      F77)
+	if test -n "$F77" && test "X$F77" != "Xno"; then
+	  AC_LIBTOOL_LANG_F77_CONFIG
+	else
+	  tagname=""
+	fi
+	;;
+
+      GCJ)
+	if test -n "$GCJ" && test "X$GCJ" != "Xno"; then
+	  AC_LIBTOOL_LANG_GCJ_CONFIG
+	else
+	  tagname=""
+	fi
+	;;
+
+      RC)
+	AC_LIBTOOL_LANG_RC_CONFIG
+	;;
+
+      *)
+	AC_MSG_ERROR([Unsupported tag name: $tagname])
+	;;
+      esac
+
+      # Append the new tag name to the list of available tags.
+      if test -n "$tagname" ; then
+      available_tags="$available_tags $tagname"
+    fi
+    fi
+  done
+  IFS="$lt_save_ifs"
+
+  # Now substitute the updated list of available tags.
+  if eval "sed -e 's/^available_tags=.*\$/available_tags=\"$available_tags\"/' \"$ofile\" > \"${ofile}T\""; then
+    mv "${ofile}T" "$ofile"
+    chmod +x "$ofile"
+  else
+    rm -f "${ofile}T"
+    AC_MSG_ERROR([unable to update list of available tagged configurations.])
+  fi
+fi
+])# _LT_AC_TAGCONFIG
+
+
+# AC_LIBTOOL_DLOPEN
+# -----------------
+# enable checks for dlopen support
+AC_DEFUN([AC_LIBTOOL_DLOPEN],
+ [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])
+])# AC_LIBTOOL_DLOPEN
+
+
+# AC_LIBTOOL_WIN32_DLL
+# --------------------
+# declare package support for building win32 DLLs
+AC_DEFUN([AC_LIBTOOL_WIN32_DLL],
+[AC_BEFORE([$0], [AC_LIBTOOL_SETUP])
+])# AC_LIBTOOL_WIN32_DLL
+
+
+# AC_ENABLE_SHARED([DEFAULT])
+# ---------------------------
+# implement the --enable-shared flag
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+AC_DEFUN([AC_ENABLE_SHARED],
+[define([enable_shared_default], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE([shared],
+    AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],[build shared libraries @<:@default=enable_shared_default@:>@]),
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_shared=yes ;;
+    no) enable_shared=no ;;
+    *)
+      enable_shared=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_shared=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_shared=]enable_shared_default)
+])# AC_ENABLE_SHARED
+
+
+# AC_DISABLE_SHARED
+# -----------------
+# set the default shared flag to --disable-shared
+AC_DEFUN([AC_DISABLE_SHARED],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_SHARED(no)
+])# AC_DISABLE_SHARED
+
+
+# AC_ENABLE_STATIC([DEFAULT])
+# ---------------------------
+# implement the --enable-static flag
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+AC_DEFUN([AC_ENABLE_STATIC],
+[define([enable_static_default], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE([static],
+    AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],[build static libraries @<:@default=enable_static_default@:>@]),
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_static=yes ;;
+    no) enable_static=no ;;
+    *)
+     enable_static=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_static=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_static=]enable_static_default)
+])# AC_ENABLE_STATIC
+
+
+# AC_DISABLE_STATIC
+# -----------------
+# set the default static flag to --disable-static
+AC_DEFUN([AC_DISABLE_STATIC],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_STATIC(no)
+])# AC_DISABLE_STATIC
+
+
+# AC_ENABLE_FAST_INSTALL([DEFAULT])
+# ---------------------------------
+# implement the --enable-fast-install flag
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+AC_DEFUN([AC_ENABLE_FAST_INSTALL],
+[define([enable_Fast_install_default], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE([fast-install],
+    AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],[optimize for fast installation @<:@default=enable_Fast_install_default@:>@]),
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_fast_install=yes ;;
+    no) enable_fast_install=no ;;
+    *)
+      enable_fast_install=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_fast_install=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_fast_install=]enable_Fast_install_default)
+])# AC_ENABLE_FAST_INSTALL
+
+
+# AC_DISABLE_FAST_INSTALL
+# -----------------------
+# set the default to --disable-fast-install
+AC_DEFUN([AC_DISABLE_FAST_INSTALL],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_FAST_INSTALL(no)
+])# AC_DISABLE_FAST_INSTALL
+
+
+# AC_LIBTOOL_PICMODE([MODE])
+# --------------------------
+# implement the --with-pic flag
+# MODE is either `yes' or `no'.  If omitted, it defaults to `both'.
+AC_DEFUN([AC_LIBTOOL_PICMODE],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+pic_mode=ifelse($#,1,$1,default)
+])# AC_LIBTOOL_PICMODE
+
+
+# AC_PROG_EGREP
+# -------------
+# This is predefined starting with Autoconf 2.54, so this conditional
+# definition can be removed once we require Autoconf 2.54 or later.
+m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP],
+[AC_CACHE_CHECK([for egrep], [ac_cv_prog_egrep],
+   [if echo a | (grep -E '(a|b)') >/dev/null 2>&1
+    then ac_cv_prog_egrep='grep -E'
+    else ac_cv_prog_egrep='egrep'
+    fi])
+ EGREP=$ac_cv_prog_egrep
+ AC_SUBST([EGREP])
+])])
+
+
+# AC_PATH_TOOL_PREFIX
+# -------------------
+# find a file program which can recognise shared library
+AC_DEFUN([AC_PATH_TOOL_PREFIX],
+[AC_REQUIRE([AC_PROG_EGREP])dnl
+AC_MSG_CHECKING([for $1])
+AC_CACHE_VAL(lt_cv_path_MAGIC_CMD,
+[case $MAGIC_CMD in
+[[\\/*] |  ?:[\\/]*])
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+*)
+  lt_save_MAGIC_CMD="$MAGIC_CMD"
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word.  This closes a longstanding sh security hole.
+  ac_dummy="ifelse([$2], , $PATH, [$2])"
+  for ac_dir in $ac_dummy; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$1; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/$1"
+      if test -n "$file_magic_test_file"; then
+	case $deplibs_check_method in
+	"file_magic "*)
+	  file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
+	  MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+	  if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+	    $EGREP "$file_magic_regex" > /dev/null; then
+	    :
+	  else
+	    cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+	  fi ;;
+	esac
+      fi
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+  MAGIC_CMD="$lt_save_MAGIC_CMD"
+  ;;
+esac])
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  AC_MSG_RESULT($MAGIC_CMD)
+else
+  AC_MSG_RESULT(no)
+fi
+])# AC_PATH_TOOL_PREFIX
+
+
+# AC_PATH_MAGIC
+# -------------
+# find a file program which can recognise a shared library
+AC_DEFUN([AC_PATH_MAGIC],
+[AC_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH)
+if test -z "$lt_cv_path_MAGIC_CMD"; then
+  if test -n "$ac_tool_prefix"; then
+    AC_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH)
+  else
+    MAGIC_CMD=:
+  fi
+fi
+])# AC_PATH_MAGIC
+
+
+# AC_PROG_LD
+# ----------
+# find the pathname to the GNU or non-GNU linker
+AC_DEFUN([AC_PROG_LD],
+[AC_ARG_WITH([gnu-ld],
+    [AS_HELP_STRING([--with-gnu-ld],[assume the C compiler uses GNU ld @<:@default=no@:>@])],
+    [test "$withval" = no || with_gnu_ld=yes],
+    [with_gnu_ld=no])
+AC_REQUIRE([LT_AC_PROG_SED])dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  AC_MSG_CHECKING([for ld used by $CC])
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [[\\/]]* | ?:[[\\/]]*)
+      re_direlt='/[[^/]][[^/]]*/\.\./'
+      # Canonicalize the pathname of ld
+      ac_prog=`echo $ac_prog| $SED 's%\\\\%/%g'`
+      while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do
+	ac_prog=`echo $ac_prog| $SED "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  AC_MSG_CHECKING([for GNU ld])
+else
+  AC_MSG_CHECKING([for non-GNU ld])
+fi
+AC_CACHE_VAL(lt_cv_path_LD,
+[if test -z "$LD"; then
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some variants of GNU ld only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
+      *GNU* | *'with BFD'*)
+	test "$with_gnu_ld" != no && break
+	;;
+      *)
+	test "$with_gnu_ld" != yes && break
+	;;
+      esac
+    fi
+  done
+  IFS="$lt_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi])
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  AC_MSG_RESULT($LD)
+else
+  AC_MSG_RESULT(no)
+fi
+test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH])
+AC_PROG_LD_GNU
+])# AC_PROG_LD
+
+
+# AC_PROG_LD_GNU
+# --------------
+AC_DEFUN([AC_PROG_LD_GNU],
+[AC_REQUIRE([AC_PROG_EGREP])dnl
+AC_CACHE_CHECK([if the linker ($LD) is GNU ld], lt_cv_prog_gnu_ld,
+[# I'd rather use --version here, but apparently some GNU lds only accept -v.
+case `$LD -v 2>&1 </dev/null` in
+*GNU* | *'with BFD'*)
+  lt_cv_prog_gnu_ld=yes
+  ;;
+*)
+  lt_cv_prog_gnu_ld=no
+  ;;
+esac])
+with_gnu_ld=$lt_cv_prog_gnu_ld
+])# AC_PROG_LD_GNU
+
+
+# AC_PROG_LD_RELOAD_FLAG
+# ----------------------
+# find reload flag for linker
+#   -- PORTME Some linkers may need a different reload flag.
+AC_DEFUN([AC_PROG_LD_RELOAD_FLAG],
+[AC_CACHE_CHECK([for $LD option to reload object files],
+  lt_cv_ld_reload_flag,
+  [lt_cv_ld_reload_flag='-r'])
+reload_flag=$lt_cv_ld_reload_flag
+case $reload_flag in
+"" | " "*) ;;
+*) reload_flag=" $reload_flag" ;;
+esac
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+case $host_os in
+  darwin*)
+    if test "$GCC" = yes; then
+      reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r $compiler_flags -o $output$reload_objs'
+    else
+      reload_cmds='$LD$reload_flag -o $output$reload_objs'
+    fi
+    ;;
+esac
+])# AC_PROG_LD_RELOAD_FLAG
+
+
+# AC_DEPLIBS_CHECK_METHOD
+# -----------------------
+# how to check for library dependencies
+#  -- PORTME fill in with the dynamic library characteristics
+AC_DEFUN([AC_DEPLIBS_CHECK_METHOD],
+[AC_CACHE_CHECK([how to recognise dependent libraries],
+lt_cv_deplibs_check_method,
+[lt_cv_file_magic_cmd='$MAGIC_CMD'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [[regex]]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given extended regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case $host_os in
+aix4* | aix5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+beos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+bsdi[[45]]*)
+  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  ;;
+
+cygwin*)
+  # func_win32_libid is a shell function defined in ltmain.sh
+  lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+  lt_cv_file_magic_cmd='func_win32_libid'
+  ;;
+
+mingw* | pw32*)
+  # Base MSYS/MinGW do not provide the 'file' command needed by
+  # func_win32_libid shell function, so use a weaker test based on 'objdump'.
+  lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?'
+  lt_cv_file_magic_cmd='$OBJDUMP -f'
+  ;;
+
+darwin* | rhapsody*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+freebsd* | kfreebsd*-gnu | dragonfly*)
+  if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+    case $host_cpu in
+    i*86 )
+      # Not sure whether the presence of OpenBSD here was a mistake.
+      # Let's accept both of them until this is cleared up.
+      lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library'
+      lt_cv_file_magic_cmd=/usr/bin/file
+      lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+      ;;
+    esac
+  else
+    lt_cv_deplibs_check_method=pass_all
+  fi
+  ;;
+
+gnu*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+hpux10.20* | hpux11*)
+  lt_cv_file_magic_cmd=/usr/bin/file
+  case $host_cpu in
+  ia64*)
+    lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64'
+    lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so
+    ;;
+  hppa*64*)
+    [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - PA-RISC [0-9].[0-9]']
+    lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl
+    ;;
+  *)
+    lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]].[[0-9]]) shared library'
+    lt_cv_file_magic_test_file=/usr/lib/libc.sl
+    ;;
+  esac
+  ;;
+
+interix3*)
+  # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
+  lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$'
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $LD in
+  *-32|*"-32 ") libmagic=32-bit;;
+  *-n32|*"-n32 ") libmagic=N32;;
+  *-64|*"-64 ") libmagic=64-bit;;
+  *) libmagic=never-match;;
+  esac
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+# This must be Linux ELF.
+linux*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+netbsd*)
+  if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$'
+  fi
+  ;;
+
+newos6*)
+  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libnls.so
+  ;;
+
+nto-qnx*)
+  lt_cv_deplibs_check_method=unknown
+  ;;
+
+openbsd*)
+  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
+  fi
+  ;;
+
+osf3* | osf4* | osf5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+solaris*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv4 | sysv4.3*)
+  case $host_vendor in
+  motorola)
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]'
+    lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+    ;;
+  ncr)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  sequent)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )'
+    ;;
+  sni)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib"
+    lt_cv_file_magic_test_file=/lib/libc.so
+    ;;
+  siemens)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  pc)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  esac
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+esac
+])
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+])# AC_DEPLIBS_CHECK_METHOD
+
+
+# AC_PROG_NM
+# ----------
+# find the pathname to a BSD-compatible name lister
+AC_DEFUN([AC_PROG_NM],
+[AC_CACHE_CHECK([for BSD-compatible nm], lt_cv_path_NM,
+[if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  lt_nm_to_check="${ac_tool_prefix}nm"
+  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then 
+    lt_nm_to_check="$lt_nm_to_check nm"
+  fi
+  for lt_tmp_nm in $lt_nm_to_check; do
+    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
+      IFS="$lt_save_ifs"
+      test -z "$ac_dir" && ac_dir=.
+      tmp_nm="$ac_dir/$lt_tmp_nm"
+      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
+	# Check to see if the nm accepts a BSD-compat flag.
+	# Adding the `sed 1q' prevents false positives on HP-UX, which says:
+	#   nm: unknown option "B" ignored
+	# Tru64's nm complains that /dev/null is an invalid object file
+	case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
+	*/dev/null* | *'Invalid file or object type'*)
+	  lt_cv_path_NM="$tmp_nm -B"
+	  break
+	  ;;
+	*)
+	  case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+	  */dev/null*)
+	    lt_cv_path_NM="$tmp_nm -p"
+	    break
+	    ;;
+	  *)
+	    lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+	    continue # so that we can try to find one that supports BSD flags
+	    ;;
+	  esac
+	  ;;
+	esac
+      fi
+    done
+    IFS="$lt_save_ifs"
+  done
+  test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm
+fi])
+NM="$lt_cv_path_NM"
+])# AC_PROG_NM
+
+
+# AC_CHECK_LIBM
+# -------------
+# check for math library
+AC_DEFUN([AC_CHECK_LIBM],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+LIBM=
+case $host in
+*-*-beos* | *-*-cygwin* | *-*-pw32* | *-*-darwin*)
+  # These system don't have libm, or don't need it
+  ;;
+*-ncr-sysv4.3*)
+  AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw")
+  AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm")
+  ;;
+*)
+  AC_CHECK_LIB(m, cos, LIBM="-lm")
+  ;;
+esac
+])# AC_CHECK_LIBM
+
+
+# AC_LIBLTDL_CONVENIENCE([DIRECTORY])
+# -----------------------------------
+# sets LIBLTDL to the link flags for the libltdl convenience library and
+# LTDLINCL to the include flags for the libltdl header and adds
+# --enable-ltdl-convenience to the configure arguments.  Note that
+# AC_CONFIG_SUBDIRS is not called here.  If DIRECTORY is not provided,
+# it is assumed to be `libltdl'.  LIBLTDL will be prefixed with
+# '${top_builddir}/' and LTDLINCL will be prefixed with '${top_srcdir}/'
+# (note the single quotes!).  If your package is not flat and you're not
+# using automake, define top_builddir and top_srcdir appropriately in
+# the Makefiles.
+AC_DEFUN([AC_LIBLTDL_CONVENIENCE],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+  case $enable_ltdl_convenience in
+  no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;;
+  "") enable_ltdl_convenience=yes
+      ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;;
+  esac
+  LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la
+  LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
+  # For backwards non-gettext consistent compatibility...
+  INCLTDL="$LTDLINCL"
+])# AC_LIBLTDL_CONVENIENCE
+
+
+# AC_LIBLTDL_INSTALLABLE([DIRECTORY])
+# -----------------------------------
+# sets LIBLTDL to the link flags for the libltdl installable library and
+# LTDLINCL to the include flags for the libltdl header and adds
+# --enable-ltdl-install to the configure arguments.  Note that
+# AC_CONFIG_SUBDIRS is not called here.  If DIRECTORY is not provided,
+# and an installed libltdl is not found, it is assumed to be `libltdl'.
+# LIBLTDL will be prefixed with '${top_builddir}/'# and LTDLINCL with
+# '${top_srcdir}/' (note the single quotes!).  If your package is not
+# flat and you're not using automake, define top_builddir and top_srcdir
+# appropriately in the Makefiles.
+# In the future, this macro may have to be called after AC_PROG_LIBTOOL.
+AC_DEFUN([AC_LIBLTDL_INSTALLABLE],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+  AC_CHECK_LIB(ltdl, lt_dlinit,
+  [test x"$enable_ltdl_install" != xyes && enable_ltdl_install=no],
+  [if test x"$enable_ltdl_install" = xno; then
+     AC_MSG_WARN([libltdl not installed, but installation disabled])
+   else
+     enable_ltdl_install=yes
+   fi
+  ])
+  if test x"$enable_ltdl_install" = x"yes"; then
+    ac_configure_args="$ac_configure_args --enable-ltdl-install"
+    LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la
+    LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
+  else
+    ac_configure_args="$ac_configure_args --enable-ltdl-install=no"
+    LIBLTDL="-lltdl"
+    LTDLINCL=
+  fi
+  # For backwards non-gettext consistent compatibility...
+  INCLTDL="$LTDLINCL"
+])# AC_LIBLTDL_INSTALLABLE
+
+
+# AC_LIBTOOL_CXX
+# --------------
+# enable support for C++ libraries
+AC_DEFUN([AC_LIBTOOL_CXX],
+[AC_REQUIRE([_LT_AC_LANG_CXX])
+])# AC_LIBTOOL_CXX
+
+
+# _LT_AC_LANG_CXX
+# ---------------
+AC_DEFUN([_LT_AC_LANG_CXX],
+[AC_REQUIRE([AC_PROG_CXX])
+AC_REQUIRE([_LT_AC_PROG_CXXCPP])
+_LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}CXX])
+])# _LT_AC_LANG_CXX
+
+# _LT_AC_PROG_CXXCPP
+# ------------------
+AC_DEFUN([_LT_AC_PROG_CXXCPP],
+[
+AC_REQUIRE([AC_PROG_CXX])
+if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
+    ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
+    (test "X$CXX" != "Xg++"))) ; then
+  AC_PROG_CXXCPP
+fi
+])# _LT_AC_PROG_CXXCPP
+
+# AC_LIBTOOL_F77
+# --------------
+# enable support for Fortran 77 libraries
+AC_DEFUN([AC_LIBTOOL_F77],
+[AC_REQUIRE([_LT_AC_LANG_F77])
+])# AC_LIBTOOL_F77
+
+
+# _LT_AC_LANG_F77
+# ---------------
+AC_DEFUN([_LT_AC_LANG_F77],
+[AC_REQUIRE([AC_PROG_F77])
+_LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}F77])
+])# _LT_AC_LANG_F77
+
+
+# AC_LIBTOOL_GCJ
+# --------------
+# enable support for GCJ libraries
+AC_DEFUN([AC_LIBTOOL_GCJ],
+[AC_REQUIRE([_LT_AC_LANG_GCJ])
+])# AC_LIBTOOL_GCJ
+
+
+# _LT_AC_LANG_GCJ
+# ---------------
+AC_DEFUN([_LT_AC_LANG_GCJ],
+[AC_PROVIDE_IFELSE([AC_PROG_GCJ],[],
+  [AC_PROVIDE_IFELSE([A][M_PROG_GCJ],[],
+    [AC_PROVIDE_IFELSE([LT_AC_PROG_GCJ],[],
+      [ifdef([AC_PROG_GCJ],[AC_REQUIRE([AC_PROG_GCJ])],
+	 [ifdef([A][M_PROG_GCJ],[AC_REQUIRE([A][M_PROG_GCJ])],
+	   [AC_REQUIRE([A][C_PROG_GCJ_OR_A][M_PROG_GCJ])])])])])])
+_LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}GCJ])
+])# _LT_AC_LANG_GCJ
+
+
+# AC_LIBTOOL_RC
+# -------------
+# enable support for Windows resource files
+AC_DEFUN([AC_LIBTOOL_RC],
+[AC_REQUIRE([LT_AC_PROG_RC])
+_LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}RC])
+])# AC_LIBTOOL_RC
+
+
+# AC_LIBTOOL_LANG_C_CONFIG
+# ------------------------
+# Ensure that the configuration vars for the C compiler are
+# suitably defined.  Those variables are subsequently used by
+# AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'.
+AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG], [_LT_AC_LANG_C_CONFIG])
+AC_DEFUN([_LT_AC_LANG_C_CONFIG],
+[lt_save_CC="$CC"
+AC_LANG_PUSH(C)
+
+# Source file extension for C test sources.
+ac_ext=c
+
+# Object file extension for compiled C test sources.
+objext=o
+_LT_AC_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="int some_variable = 0;\n"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='int main(){return(0);}\n'
+
+_LT_AC_SYS_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+AC_LIBTOOL_PROG_COMPILER_NO_RTTI($1)
+AC_LIBTOOL_PROG_COMPILER_PIC($1)
+AC_LIBTOOL_PROG_CC_C_O($1)
+AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1)
+AC_LIBTOOL_PROG_LD_SHLIBS($1)
+AC_LIBTOOL_SYS_DYNAMIC_LINKER($1)
+AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1)
+AC_LIBTOOL_SYS_LIB_STRIP
+AC_LIBTOOL_DLOPEN_SELF
+
+# Report which library types will actually be built
+AC_MSG_CHECKING([if libtool supports shared libraries])
+AC_MSG_RESULT([$can_build_shared])
+
+AC_MSG_CHECKING([whether to build shared libraries])
+test "$can_build_shared" = "no" && enable_shared=no
+
+# On AIX, shared libraries and static libraries use the same namespace, and
+# are all built from PIC.
+case $host_os in
+aix3*)
+  test "$enable_shared" = yes && enable_static=no
+  if test -n "$RANLIB"; then
+    archive_cmds="$archive_cmds~\$RANLIB \$lib"
+    postinstall_cmds='$RANLIB $lib'
+  fi
+  ;;
+
+aix4* | aix5*)
+  if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+    test "$enable_shared" = yes && enable_static=no
+  fi
+    ;;
+esac
+AC_MSG_RESULT([$enable_shared])
+
+AC_MSG_CHECKING([whether to build static libraries])
+# Make sure either enable_shared or enable_static is yes.
+test "$enable_shared" = yes || enable_static=yes
+AC_MSG_RESULT([$enable_static])
+
+AC_LIBTOOL_CONFIG($1)
+
+AC_LANG_POP
+CC="$lt_save_CC"
+])# AC_LIBTOOL_LANG_C_CONFIG
+
+
+# AC_LIBTOOL_LANG_CXX_CONFIG
+# --------------------------
+# Ensure that the configuration vars for the C compiler are
+# suitably defined.  Those variables are subsequently used by
+# AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'.
+AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG], [_LT_AC_LANG_CXX_CONFIG(CXX)])
+AC_DEFUN([_LT_AC_LANG_CXX_CONFIG],
+[AC_LANG_PUSH(C++)
+AC_REQUIRE([AC_PROG_CXX])
+AC_REQUIRE([_LT_AC_PROG_CXXCPP])
+
+_LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+_LT_AC_TAGVAR(allow_undefined_flag, $1)=
+_LT_AC_TAGVAR(always_export_symbols, $1)=no
+_LT_AC_TAGVAR(archive_expsym_cmds, $1)=
+_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=
+_LT_AC_TAGVAR(hardcode_direct, $1)=no
+_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=
+_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
+_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=
+_LT_AC_TAGVAR(hardcode_minus_L, $1)=no
+_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+_LT_AC_TAGVAR(hardcode_automatic, $1)=no
+_LT_AC_TAGVAR(module_cmds, $1)=
+_LT_AC_TAGVAR(module_expsym_cmds, $1)=
+_LT_AC_TAGVAR(link_all_deplibs, $1)=unknown
+_LT_AC_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_AC_TAGVAR(no_undefined_flag, $1)=
+_LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+_LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+
+# Dependencies to place before and after the object being linked:
+_LT_AC_TAGVAR(predep_objects, $1)=
+_LT_AC_TAGVAR(postdep_objects, $1)=
+_LT_AC_TAGVAR(predeps, $1)=
+_LT_AC_TAGVAR(postdeps, $1)=
+_LT_AC_TAGVAR(compiler_lib_search_path, $1)=
+
+# Source file extension for C++ test sources.
+ac_ext=cpp
+
+# Object file extension for compiled C++ test sources.
+objext=o
+_LT_AC_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="int some_variable = 0;\n"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }\n'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_AC_SYS_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC=$CC
+lt_save_LD=$LD
+lt_save_GCC=$GCC
+GCC=$GXX
+lt_save_with_gnu_ld=$with_gnu_ld
+lt_save_path_LD=$lt_cv_path_LD
+if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then
+  lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx
+else
+  $as_unset lt_cv_prog_gnu_ld
+fi
+if test -n "${lt_cv_path_LDCXX+set}"; then
+  lt_cv_path_LD=$lt_cv_path_LDCXX
+else
+  $as_unset lt_cv_path_LD
+fi
+test -z "${LDCXX+set}" || LD=$LDCXX
+CC=${CXX-"c++"}
+compiler=$CC
+_LT_AC_TAGVAR(compiler, $1)=$CC
+_LT_CC_BASENAME([$compiler])
+
+# We don't want -fno-exception wen compiling C++ code, so set the
+# no_builtin_flag separately
+if test "$GXX" = yes; then
+  _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin'
+else
+  _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
+fi
+
+if test "$GXX" = yes; then
+  # Set up default GNU C++ configuration
+
+  AC_PROG_LD
+
+  # Check if GNU C++ uses GNU ld as the underlying linker, since the
+  # archiving commands below assume that GNU ld is being used.
+  if test "$with_gnu_ld" = yes; then
+    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+    _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir'
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+
+    # If archive_cmds runs LD, not CC, wlarc should be empty
+    # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to
+    #     investigate it a little bit more. (MM)
+    wlarc='${wl}'
+
+    # ancient GNU ld didn't support --whole-archive et. al.
+    if eval "`$CC -print-prog-name=ld` --help 2>&1" | \
+	grep 'no-whole-archive' > /dev/null; then
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+    else
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+    fi
+  else
+    with_gnu_ld=no
+    wlarc=
+
+    # A generic and very simple default shared library creation
+    # command for GNU C++ for the case where it uses the native
+    # linker, instead of GNU ld.  If possible, this setting should
+    # overridden to take advantage of the native linker features on
+    # the platform it is being used on.
+    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+  fi
+
+  # Commands to make compiler produce verbose output that lists
+  # what "hidden" libraries, object files and flags are used when
+  # linking a shared library.
+  output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"'
+
+else
+  GXX=no
+  with_gnu_ld=no
+  wlarc=
+fi
+
+# PORTME: fill in a description of your system's C++ link characteristics
+AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
+_LT_AC_TAGVAR(ld_shlibs, $1)=yes
+case $host_os in
+  aix3*)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  aix4* | aix5*)
+    if test "$host_cpu" = ia64; then
+      # On IA64, the linker does run time linking by default, so we don't
+      # have to do anything special.
+      aix_use_runtimelinking=no
+      exp_sym_flag='-Bexport'
+      no_entry_flag=""
+    else
+      aix_use_runtimelinking=no
+
+      # Test if we are trying to use run time linking or normal
+      # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+      # need to do runtime linking.
+      case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*)
+	for ld_flag in $LDFLAGS; do
+	  case $ld_flag in
+	  *-brtl*)
+	    aix_use_runtimelinking=yes
+	    break
+	    ;;
+	  esac
+	done
+	;;
+      esac
+
+      exp_sym_flag='-bexport'
+      no_entry_flag='-bnoentry'
+    fi
+
+    # When large executables or shared objects are built, AIX ld can
+    # have problems creating the table of contents.  If linking a library
+    # or program results in "error TOC overflow" add -mminimal-toc to
+    # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+    # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+    _LT_AC_TAGVAR(archive_cmds, $1)=''
+    _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+    _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':'
+    _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+
+    if test "$GXX" = yes; then
+      case $host_os in aix4.[[012]]|aix4.[[012]].*)
+      # We only want to do this on AIX 4.2 and lower, the check
+      # below for broken collect2 doesn't work under 4.3+
+	collect2name=`${CC} -print-prog-name=collect2`
+	if test -f "$collect2name" && \
+	   strings "$collect2name" | grep resolve_lib_name >/dev/null
+	then
+	  # We have reworked collect2
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+	else
+	  # We have old collect2
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=unsupported
+	  # It fails to find uninstalled libraries when the uninstalled
+	  # path is not listed in the libpath.  Setting hardcode_minus_L
+	  # to unsupported forces relinking
+	  _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+	  _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=
+	fi
+	;;
+      esac
+      shared_flag='-shared'
+      if test "$aix_use_runtimelinking" = yes; then
+	shared_flag="$shared_flag "'${wl}-G'
+      fi
+    else
+      # not using gcc
+      if test "$host_cpu" = ia64; then
+	# VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+	# chokes on -Wl,-G. The following line is correct:
+	shared_flag='-G'
+      else
+	if test "$aix_use_runtimelinking" = yes; then
+	  shared_flag='${wl}-G'
+	else
+	  shared_flag='${wl}-bM:SRE'
+	fi
+      fi
+    fi
+
+    # It seems that -bexpall does not export symbols beginning with
+    # underscore (_), so it is better to generate a list of symbols to export.
+    _LT_AC_TAGVAR(always_export_symbols, $1)=yes
+    if test "$aix_use_runtimelinking" = yes; then
+      # Warning - without using the other runtime loading flags (-brtl),
+      # -berok will link without error, but may produce a broken library.
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)='-berok'
+      # Determine the default libpath from the value encoded in an empty executable.
+      _LT_AC_SYS_LIBPATH_AIX
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+     else
+      if test "$host_cpu" = ia64; then
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+      else
+	# Determine the default libpath from the value encoded in an empty executable.
+	_LT_AC_SYS_LIBPATH_AIX
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+	# Warning - without using the other run time loading flags,
+	# -berok will link without error, but may produce a broken library.
+	_LT_AC_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
+	# Exported symbols can be pulled into shared objects from archives
+	_LT_AC_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
+	_LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes
+	# This is similar to how AIX traditionally builds its shared libraries.
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+      fi
+    fi
+    ;;
+
+  beos*)
+    if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+      # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+      # support --undefined.  This deserves some investigation.  FIXME
+      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+    else
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    fi
+    ;;
+
+  chorus*)
+    case $cc_basename in
+      *)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+    esac
+    ;;
+
+  cygwin* | mingw* | pw32*)
+    # _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
+    # as there is no search path for DLLs.
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+    _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+    _LT_AC_TAGVAR(always_export_symbols, $1)=no
+    _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+
+    if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then
+      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+      # If the export-symbols file already is a .def file (1st line
+      # is EXPORTS), use it as is; otherwise, prepend...
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	cp $export_symbols $output_objdir/$soname.def;
+      else
+	echo EXPORTS > $output_objdir/$soname.def;
+	cat $export_symbols >> $output_objdir/$soname.def;
+      fi~
+      $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+    else
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    fi
+  ;;
+      darwin* | rhapsody*)
+        case $host_os in
+        rhapsody* | darwin1.[[012]])
+         _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}suppress'
+         ;;
+       *) # Darwin 1.3 on
+         if test -z ${MACOSX_DEPLOYMENT_TARGET} ; then
+           _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress'
+         else
+           case ${MACOSX_DEPLOYMENT_TARGET} in
+             10.[[012]])
+               _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress'
+               ;;
+             10.*)
+               _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}dynamic_lookup'
+               ;;
+           esac
+         fi
+         ;;
+        esac
+      _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_AC_TAGVAR(hardcode_direct, $1)=no
+      _LT_AC_TAGVAR(hardcode_automatic, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)=''
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+
+    if test "$GXX" = yes ; then
+      lt_int_apple_cc_single_mod=no
+      output_verbose_link_cmd='echo'
+      if $CC -dumpspecs 2>&1 | $EGREP 'single_module' >/dev/null ; then
+       lt_int_apple_cc_single_mod=yes
+      fi
+      if test "X$lt_int_apple_cc_single_mod" = Xyes ; then
+       _LT_AC_TAGVAR(archive_cmds, $1)='$CC -dynamiclib -single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring'
+      else
+          _LT_AC_TAGVAR(archive_cmds, $1)='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs~$CC -dynamiclib $allow_undefined_flag -o $lib ${lib}-master.o $deplibs $compiler_flags -install_name $rpath/$soname $verstring'
+        fi
+        _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
+        # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
+          if test "X$lt_int_apple_cc_single_mod" = Xyes ; then
+            _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -dynamiclib -single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          else
+            _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs~$CC -dynamiclib $allow_undefined_flag -o $lib ${lib}-master.o $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          fi
+            _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+      else
+      case $cc_basename in
+        xlc*)
+         output_verbose_link_cmd='echo'
+          _LT_AC_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $verstring'
+          _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
+          # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
+          _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          ;;
+       *)
+         _LT_AC_TAGVAR(ld_shlibs, $1)=no
+          ;;
+      esac
+      fi
+        ;;
+
+  dgux*)
+    case $cc_basename in
+      ec++*)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      ghcx*)
+	# Green Hills C++ Compiler
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      *)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+    esac
+    ;;
+  freebsd[[12]].*)
+    # C++ shared libraries reported to be fairly broken before switch to ELF
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  freebsd-elf*)
+    _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+    ;;
+  freebsd* | kfreebsd*-gnu | dragonfly*)
+    # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
+    # conventions
+    _LT_AC_TAGVAR(ld_shlibs, $1)=yes
+    ;;
+  gnu*)
+    ;;
+  hpux9*)
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+    _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+    _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+    _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
+				# but as the default
+				# location of the library.
+
+    case $cc_basename in
+    CC*)
+      # FIXME: insert proper C++ library support
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      ;;
+    aCC*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      # Commands to make compiler produce verbose output that lists
+      # what "hidden" libraries, object files and flags are used when
+      # linking a shared library.
+      #
+      # There doesn't appear to be a way to prevent this compiler from
+      # explicitly linking system object files so we need to strip them
+      # from the output so that they don't get included in the library
+      # dependencies.
+      output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | grep "[[-]]L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+      ;;
+    *)
+      if test "$GXX" = yes; then
+        _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$CC -shared -nostdlib -fPIC ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      else
+        # FIXME: insert proper C++ library support
+        _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+    esac
+    ;;
+  hpux10*|hpux11*)
+    if test $with_gnu_ld = no; then
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+      case $host_cpu in
+      hppa*64*|ia64*)
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='+b $libdir'
+        ;;
+      *)
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+        ;;
+      esac
+    fi
+    case $host_cpu in
+    hppa*64*|ia64*)
+      _LT_AC_TAGVAR(hardcode_direct, $1)=no
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+    *)
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
+					      # but as the default
+					      # location of the library.
+      ;;
+    esac
+
+    case $cc_basename in
+      CC*)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      aCC*)
+	case $host_cpu in
+	hppa*64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	  ;;
+	ia64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	  ;;
+	*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	  ;;
+	esac
+	# Commands to make compiler produce verbose output that lists
+	# what "hidden" libraries, object files and flags are used when
+	# linking a shared library.
+	#
+	# There doesn't appear to be a way to prevent this compiler from
+	# explicitly linking system object files so we need to strip them
+	# from the output so that they don't get included in the library
+	# dependencies.
+	output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | grep "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+	;;
+      *)
+	if test "$GXX" = yes; then
+	  if test $with_gnu_ld = no; then
+	    case $host_cpu in
+	    hppa*64*)
+	      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	      ;;
+	    ia64*)
+	      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	      ;;
+	    *)
+	      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	      ;;
+	    esac
+	  fi
+	else
+	  # FIXME: insert proper C++ library support
+	  _LT_AC_TAGVAR(ld_shlibs, $1)=no
+	fi
+	;;
+    esac
+    ;;
+  interix3*)
+    _LT_AC_TAGVAR(hardcode_direct, $1)=no
+    _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+    # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+    # Instead, shared libraries are loaded at an image base (0x10000000 by
+    # default) and relocated if they conflict, which is a slow very memory
+    # consuming and fragmenting process.  To avoid this, we pick a random,
+    # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+    # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+    _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+    ;;
+  irix5* | irix6*)
+    case $cc_basename in
+      CC*)
+	# SGI C++
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+
+	# Archives containing C++ object files must be created using
+	# "CC -ar", where "CC" is the IRIX C++ compiler.  This is
+	# necessary to make sure instantiated templates are included
+	# in the archive.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs'
+	;;
+      *)
+	if test "$GXX" = yes; then
+	  if test "$with_gnu_ld" = no; then
+	    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+	  else
+	    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` -o $lib'
+	  fi
+	fi
+	_LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+	;;
+    esac
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+    _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+    ;;
+  linux*)
+    case $cc_basename in
+      KCC*)
+	# Kuck and Associates, Inc. (KAI) C++ Compiler
+
+	# KCC will only create a shared library if the output file
+	# ends with ".so" (or ".sl" for HP-UX), so rename the library
+	# to its proper name (with version) after linking.
+	_LT_AC_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib'
+	# Commands to make compiler produce verbose output that lists
+	# what "hidden" libraries, object files and flags are used when
+	# linking a shared library.
+	#
+	# There doesn't appear to be a way to prevent this compiler from
+	# explicitly linking system object files so we need to strip them
+	# from the output so that they don't get included in the library
+	# dependencies.
+	output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | grep "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath,$libdir'
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+
+	# Archives containing C++ object files must be created using
+	# "CC -Bstatic", where "CC" is the KAI C++ compiler.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs'
+	;;
+      icpc*)
+	# Intel C++
+	with_gnu_ld=yes
+	# version 8.0 and above of icpc choke on multiply defined symbols
+	# if we add $predep_objects and $postdep_objects, however 7.1 and
+	# earlier do not add the objects themselves.
+	case `$CC -V 2>&1` in
+	*"Version 7."*)
+  	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+  	  _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+	  ;;
+	*)  # Version 8.0 or newer
+	  tmp_idyn=
+	  case $host_cpu in
+	    ia64*) tmp_idyn=' -i_dynamic';;
+	  esac
+  	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	  _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+	  ;;
+	esac
+	_LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+	_LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+	;;
+      pgCC*)
+        # Portland Group C++ compiler
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
+  	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir'
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+	_LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive'
+        ;;
+      cxx*)
+	# Compaq C++
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname  -o $lib ${wl}-retain-symbols-file $wl$export_symbols'
+
+	runpath_var=LD_RUN_PATH
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	# Commands to make compiler produce verbose output that lists
+	# what "hidden" libraries, object files and flags are used when
+	# linking a shared library.
+	#
+	# There doesn't appear to be a way to prevent this compiler from
+	# explicitly linking system object files so we need to strip them
+	# from the output so that they don't get included in the library
+	# dependencies.
+	output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+	;;
+    esac
+    ;;
+  lynxos*)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  m88k*)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  mvs*)
+    case $cc_basename in
+      cxx*)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      *)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+    esac
+    ;;
+  netbsd*)
+    if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable  -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags'
+      wlarc=
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+    fi
+    # Workaround some broken pre-1.5 toolchains
+    output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"'
+    ;;
+  openbsd2*)
+    # C++ shared libraries are fairly broken
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  openbsd*)
+    _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+    _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+    if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+    fi
+    output_verbose_link_cmd='echo'
+    ;;
+  osf3*)
+    case $cc_basename in
+      KCC*)
+	# Kuck and Associates, Inc. (KAI) C++ Compiler
+
+	# KCC will only create a shared library if the output file
+	# ends with ".so" (or ".sl" for HP-UX), so rename the library
+	# to its proper name (with version) after linking.
+	_LT_AC_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	# Archives containing C++ object files must be created using
+	# "CC -Bstatic", where "CC" is the KAI C++ compiler.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs'
+
+	;;
+      RCC*)
+	# Rational C++ 2.4.1
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      cxx*)
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && echo ${wl}-set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	# Commands to make compiler produce verbose output that lists
+	# what "hidden" libraries, object files and flags are used when
+	# linking a shared library.
+	#
+	# There doesn't appear to be a way to prevent this compiler from
+	# explicitly linking system object files so we need to strip them
+	# from the output so that they don't get included in the library
+	# dependencies.
+	output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld" | grep -v "ld:"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+	;;
+      *)
+	if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+	  _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+	  _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	  # Commands to make compiler produce verbose output that lists
+	  # what "hidden" libraries, object files and flags are used when
+	  # linking a shared library.
+	  output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"'
+
+	else
+	  # FIXME: insert proper C++ library support
+	  _LT_AC_TAGVAR(ld_shlibs, $1)=no
+	fi
+	;;
+    esac
+    ;;
+  osf4* | osf5*)
+    case $cc_basename in
+      KCC*)
+	# Kuck and Associates, Inc. (KAI) C++ Compiler
+
+	# KCC will only create a shared library if the output file
+	# ends with ".so" (or ".sl" for HP-UX), so rename the library
+	# to its proper name (with version) after linking.
+	_LT_AC_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	# Archives containing C++ object files must be created using
+	# the KAI C++ compiler.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs'
+	;;
+      RCC*)
+	# Rational C++ 2.4.1
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      cxx*)
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~
+	  echo "-hidden">> $lib.exp~
+	  $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname -Wl,-input -Wl,$lib.exp  `test -n "$verstring" && echo -set_version	$verstring` -update_registry ${output_objdir}/so_locations -o $lib~
+	  $rm $lib.exp'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	# Commands to make compiler produce verbose output that lists
+	# what "hidden" libraries, object files and flags are used when
+	# linking a shared library.
+	#
+	# There doesn't appear to be a way to prevent this compiler from
+	# explicitly linking system object files so we need to strip them
+	# from the output so that they don't get included in the library
+	# dependencies.
+	output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld" | grep -v "ld:"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+	;;
+      *)
+	if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+	  _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	 _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+	  _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	  # Commands to make compiler produce verbose output that lists
+	  # what "hidden" libraries, object files and flags are used when
+	  # linking a shared library.
+	  output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"'
+
+	else
+	  # FIXME: insert proper C++ library support
+	  _LT_AC_TAGVAR(ld_shlibs, $1)=no
+	fi
+	;;
+    esac
+    ;;
+  psos*)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  sunos4*)
+    case $cc_basename in
+      CC*)
+	# Sun C++ 4.x
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      lcc*)
+	# Lucid
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      *)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+    esac
+    ;;
+  solaris*)
+    case $cc_basename in
+      CC*)
+	# Sun C++ 4.2, 5.x and Centerline C++
+        _LT_AC_TAGVAR(archive_cmds_need_lc,$1)=yes
+	_LT_AC_TAGVAR(no_undefined_flag, $1)=' -zdefs'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag}  -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+	$CC -G${allow_undefined_flag}  ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+	_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+	case $host_os in
+	  solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+	  *)
+	    # The C++ compiler is used as linker so we must use $wl
+	    # flag to pass the commands to the underlying system
+	    # linker. We must also pass each convience library through
+	    # to the system linker between allextract/defaultextract.
+	    # The C++ compiler will combine linker options so we
+	    # cannot just pass the convience library names through
+	    # without $wl.
+	    # Supported since Solaris 2.6 (maybe 2.5.1?)
+	    _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}-z ${wl}defaultextract'
+	    ;;
+	esac
+	_LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+
+	output_verbose_link_cmd='echo'
+
+	# Archives containing C++ object files must be created using
+	# "CC -xar", where "CC" is the Sun C++ compiler.  This is
+	# necessary to make sure instantiated templates are included
+	# in the archive.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs'
+	;;
+      gcx*)
+	# Green Hills C++ Compiler
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+
+	# The C++ compiler must be used to create the archive.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs'
+	;;
+      *)
+	# GNU C++ compiler with Solaris linker
+	if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+	  _LT_AC_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs'
+	  if $CC --version | grep -v '^2\.7' > /dev/null; then
+	    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+	    _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+		$CC -shared -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp'
+
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    output_verbose_link_cmd="$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep \"\-L\""
+	  else
+	    # g++ 2.7 appears to require `-G' NOT `-shared' on this
+	    # platform.
+	    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+	    _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+		$CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp'
+
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    output_verbose_link_cmd="$CC -G $CFLAGS -v conftest.$objext 2>&1 | grep \"\-L\""
+	  fi
+
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir'
+	fi
+	;;
+    esac
+    ;;
+  sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*)
+    _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+    _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+    _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+    runpath_var='LD_RUN_PATH'
+
+    case $cc_basename in
+      CC*)
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	;;
+      *)
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	;;
+    esac
+    ;;
+  sysv5* | sco3.2v5* | sco5v6*)
+    # Note: We can NOT use -z defs as we might desire, because we do not
+    # link with -lc, and that would cause any symbols used from libc to
+    # always be unresolved, which means just about no library would
+    # ever link correctly.  If we're not using GNU ld we use -z text
+    # though, which does catch some bad symbols but isn't as heavy-handed
+    # as -z defs.
+    # For security reasons, it is highly recommended that you always
+    # use absolute paths for naming shared libraries, and exclude the
+    # DT_RUNPATH tag from executables and libraries.  But doing so
+    # requires that you compile everything twice, which is a pain.
+    # So that behaviour is only enabled if SCOABSPATH is set to a
+    # non-empty value in the environment.  Most likely only useful for
+    # creating official distributions of packages.
+    # This is a hack until libtool officially supports absolute path
+    # names for shared libraries.
+    _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+    _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
+    _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+    _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='`test -z "$SCOABSPATH" && echo ${wl}-R,$libdir`'
+    _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':'
+    _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
+    runpath_var='LD_RUN_PATH'
+
+    case $cc_basename in
+      CC*)
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	;;
+      *)
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	;;
+    esac
+    ;;
+  tandem*)
+    case $cc_basename in
+      NCC*)
+	# NonStop-UX NCC 3.20
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      *)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+    esac
+    ;;
+  vxworks*)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  *)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+esac
+AC_MSG_RESULT([$_LT_AC_TAGVAR(ld_shlibs, $1)])
+test "$_LT_AC_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
+
+_LT_AC_TAGVAR(GCC, $1)="$GXX"
+_LT_AC_TAGVAR(LD, $1)="$LD"
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+AC_LIBTOOL_POSTDEP_PREDEP($1)
+AC_LIBTOOL_PROG_COMPILER_PIC($1)
+AC_LIBTOOL_PROG_CC_C_O($1)
+AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1)
+AC_LIBTOOL_PROG_LD_SHLIBS($1)
+AC_LIBTOOL_SYS_DYNAMIC_LINKER($1)
+AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1)
+
+AC_LIBTOOL_CONFIG($1)
+
+AC_LANG_POP
+CC=$lt_save_CC
+LDCXX=$LD
+LD=$lt_save_LD
+GCC=$lt_save_GCC
+with_gnu_ldcxx=$with_gnu_ld
+with_gnu_ld=$lt_save_with_gnu_ld
+lt_cv_path_LDCXX=$lt_cv_path_LD
+lt_cv_path_LD=$lt_save_path_LD
+lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld
+lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld
+])# AC_LIBTOOL_LANG_CXX_CONFIG
+
+# AC_LIBTOOL_POSTDEP_PREDEP([TAGNAME])
+# ------------------------------------
+# Figure out "hidden" library dependencies from verbose
+# compiler output when linking a shared library.
+# Parse the compiler output and extract the necessary
+# objects, libraries and library flags.
+AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP],[
+dnl we can't use the lt_simple_compile_test_code here,
+dnl because it contains code intended for an executable,
+dnl not a library.  It's possible we should let each
+dnl tag define a new lt_????_link_test_code variable,
+dnl but it's only used here...
+ifelse([$1],[],[cat > conftest.$ac_ext <<EOF
+int a;
+void foo (void) { a = 0; }
+EOF
+],[$1],[CXX],[cat > conftest.$ac_ext <<EOF
+class Foo
+{
+public:
+  Foo (void) { a = 0; }
+private:
+  int a;
+};
+EOF
+],[$1],[F77],[cat > conftest.$ac_ext <<EOF
+      subroutine foo
+      implicit none
+      integer*4 a
+      a=0
+      return
+      end
+EOF
+],[$1],[GCJ],[cat > conftest.$ac_ext <<EOF
+public class foo {
+  private int a;
+  public void bar (void) {
+    a = 0;
+  }
+};
+EOF
+])
+dnl Parse the compiler output and extract the necessary
+dnl objects, libraries and library flags.
+if AC_TRY_EVAL(ac_compile); then
+  # Parse the compiler output and extract the necessary
+  # objects, libraries and library flags.
+
+  # Sentinel used to keep track of whether or not we are before
+  # the conftest object file.
+  pre_test_object_deps_done=no
+
+  # The `*' in the case matches for architectures that use `case' in
+  # $output_verbose_cmd can trigger glob expansion during the loop
+  # eval without this substitution.
+  output_verbose_link_cmd=`$echo "X$output_verbose_link_cmd" | $Xsed -e "$no_glob_subst"`
+
+  for p in `eval $output_verbose_link_cmd`; do
+    case $p in
+
+    -L* | -R* | -l*)
+       # Some compilers place space between "-{L,R}" and the path.
+       # Remove the space.
+       if test $p = "-L" \
+	  || test $p = "-R"; then
+	 prev=$p
+	 continue
+       else
+	 prev=
+       fi
+
+       if test "$pre_test_object_deps_done" = no; then
+	 case $p in
+	 -L* | -R*)
+	   # Internal compiler library paths should come after those
+	   # provided the user.  The postdeps already come after the
+	   # user supplied libs so there is no need to process them.
+	   if test -z "$_LT_AC_TAGVAR(compiler_lib_search_path, $1)"; then
+	     _LT_AC_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}"
+	   else
+	     _LT_AC_TAGVAR(compiler_lib_search_path, $1)="${_LT_AC_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}"
+	   fi
+	   ;;
+	 # The "-l" case would never come before the object being
+	 # linked, so don't bother handling this case.
+	 esac
+       else
+	 if test -z "$_LT_AC_TAGVAR(postdeps, $1)"; then
+	   _LT_AC_TAGVAR(postdeps, $1)="${prev}${p}"
+	 else
+	   _LT_AC_TAGVAR(postdeps, $1)="${_LT_AC_TAGVAR(postdeps, $1)} ${prev}${p}"
+	 fi
+       fi
+       ;;
+
+    *.$objext)
+       # This assumes that the test object file only shows up
+       # once in the compiler output.
+       if test "$p" = "conftest.$objext"; then
+	 pre_test_object_deps_done=yes
+	 continue
+       fi
+
+       if test "$pre_test_object_deps_done" = no; then
+	 if test -z "$_LT_AC_TAGVAR(predep_objects, $1)"; then
+	   _LT_AC_TAGVAR(predep_objects, $1)="$p"
+	 else
+	   _LT_AC_TAGVAR(predep_objects, $1)="$_LT_AC_TAGVAR(predep_objects, $1) $p"
+	 fi
+       else
+	 if test -z "$_LT_AC_TAGVAR(postdep_objects, $1)"; then
+	   _LT_AC_TAGVAR(postdep_objects, $1)="$p"
+	 else
+	   _LT_AC_TAGVAR(postdep_objects, $1)="$_LT_AC_TAGVAR(postdep_objects, $1) $p"
+	 fi
+       fi
+       ;;
+
+    *) ;; # Ignore the rest.
+
+    esac
+  done
+
+  # Clean up.
+  rm -f a.out a.exe
+else
+  echo "libtool.m4: error: problem compiling $1 test program"
+fi
+
+$rm -f confest.$objext
+
+# PORTME: override above test on systems where it is broken
+ifelse([$1],[CXX],
+[case $host_os in
+interix3*)
+  # Interix 3.5 installs completely hosed .la files for C++, so rather than
+  # hack all around it, let's just trust "g++" to DTRT.
+  _LT_AC_TAGVAR(predep_objects,$1)=
+  _LT_AC_TAGVAR(postdep_objects,$1)=
+  _LT_AC_TAGVAR(postdeps,$1)=
+  ;;
+
+solaris*)
+  case $cc_basename in
+  CC*)
+    # Adding this requires a known-good setup of shared libraries for
+    # Sun compiler versions before 5.6, else PIC objects from an old
+    # archive will be linked into the output, leading to subtle bugs.
+    _LT_AC_TAGVAR(postdeps,$1)='-lCstd -lCrun'
+    ;;
+  esac
+  ;;
+esac
+])
+
+case " $_LT_AC_TAGVAR(postdeps, $1) " in
+*" -lc "*) _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no ;;
+esac
+])# AC_LIBTOOL_POSTDEP_PREDEP
+
+# AC_LIBTOOL_LANG_F77_CONFIG
+# --------------------------
+# Ensure that the configuration vars for the C compiler are
+# suitably defined.  Those variables are subsequently used by
+# AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'.
+AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG], [_LT_AC_LANG_F77_CONFIG(F77)])
+AC_DEFUN([_LT_AC_LANG_F77_CONFIG],
+[AC_REQUIRE([AC_PROG_F77])
+AC_LANG_PUSH(Fortran 77)
+
+_LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+_LT_AC_TAGVAR(allow_undefined_flag, $1)=
+_LT_AC_TAGVAR(always_export_symbols, $1)=no
+_LT_AC_TAGVAR(archive_expsym_cmds, $1)=
+_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=
+_LT_AC_TAGVAR(hardcode_direct, $1)=no
+_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=
+_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
+_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=
+_LT_AC_TAGVAR(hardcode_minus_L, $1)=no
+_LT_AC_TAGVAR(hardcode_automatic, $1)=no
+_LT_AC_TAGVAR(module_cmds, $1)=
+_LT_AC_TAGVAR(module_expsym_cmds, $1)=
+_LT_AC_TAGVAR(link_all_deplibs, $1)=unknown
+_LT_AC_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_AC_TAGVAR(no_undefined_flag, $1)=
+_LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+_LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+
+# Source file extension for f77 test sources.
+ac_ext=f
+
+# Object file extension for compiled f77 test sources.
+objext=o
+_LT_AC_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="      subroutine t\n      return\n      end\n"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code="      program t\n      end\n"
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_AC_SYS_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC="$CC"
+CC=${F77-"f77"}
+compiler=$CC
+_LT_AC_TAGVAR(compiler, $1)=$CC
+_LT_CC_BASENAME([$compiler])
+
+AC_MSG_CHECKING([if libtool supports shared libraries])
+AC_MSG_RESULT([$can_build_shared])
+
+AC_MSG_CHECKING([whether to build shared libraries])
+test "$can_build_shared" = "no" && enable_shared=no
+
+# On AIX, shared libraries and static libraries use the same namespace, and
+# are all built from PIC.
+case $host_os in
+aix3*)
+  test "$enable_shared" = yes && enable_static=no
+  if test -n "$RANLIB"; then
+    archive_cmds="$archive_cmds~\$RANLIB \$lib"
+    postinstall_cmds='$RANLIB $lib'
+  fi
+  ;;
+aix4* | aix5*)
+  if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+    test "$enable_shared" = yes && enable_static=no
+  fi
+  ;;
+esac
+AC_MSG_RESULT([$enable_shared])
+
+AC_MSG_CHECKING([whether to build static libraries])
+# Make sure either enable_shared or enable_static is yes.
+test "$enable_shared" = yes || enable_static=yes
+AC_MSG_RESULT([$enable_static])
+
+_LT_AC_TAGVAR(GCC, $1)="$G77"
+_LT_AC_TAGVAR(LD, $1)="$LD"
+
+AC_LIBTOOL_PROG_COMPILER_PIC($1)
+AC_LIBTOOL_PROG_CC_C_O($1)
+AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1)
+AC_LIBTOOL_PROG_LD_SHLIBS($1)
+AC_LIBTOOL_SYS_DYNAMIC_LINKER($1)
+AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1)
+
+AC_LIBTOOL_CONFIG($1)
+
+AC_LANG_POP
+CC="$lt_save_CC"
+])# AC_LIBTOOL_LANG_F77_CONFIG
+
+
+# AC_LIBTOOL_LANG_GCJ_CONFIG
+# --------------------------
+# Ensure that the configuration vars for the C compiler are
+# suitably defined.  Those variables are subsequently used by
+# AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'.
+AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG], [_LT_AC_LANG_GCJ_CONFIG(GCJ)])
+AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG],
+[AC_LANG_PUSH(C)
+
+# Source file extension for Java test sources.
+ac_ext=java
+
+# Object file extension for compiled Java test sources.
+objext=o
+_LT_AC_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="class foo {}\n"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }\n'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_AC_SYS_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC="$CC"
+CC=${GCJ-"gcj"}
+compiler=$CC
+_LT_AC_TAGVAR(compiler, $1)=$CC
+_LT_CC_BASENAME([$compiler])
+
+# GCJ did not exist at the time GCC didn't implicitly link libc in.
+_LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+
+_LT_AC_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+AC_LIBTOOL_PROG_COMPILER_NO_RTTI($1)
+AC_LIBTOOL_PROG_COMPILER_PIC($1)
+AC_LIBTOOL_PROG_CC_C_O($1)
+AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1)
+AC_LIBTOOL_PROG_LD_SHLIBS($1)
+AC_LIBTOOL_SYS_DYNAMIC_LINKER($1)
+AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1)
+
+AC_LIBTOOL_CONFIG($1)
+
+AC_LANG_POP([])
+CC="$lt_save_CC"
+])# AC_LIBTOOL_LANG_GCJ_CONFIG
+
+
+# AC_LIBTOOL_LANG_RC_CONFIG
+# -------------------------
+# Ensure that the configuration vars for the Windows resource compiler are
+# suitably defined.  Those variables are subsequently used by
+# AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'.
+AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG], [_LT_AC_LANG_RC_CONFIG(RC)])
+AC_DEFUN([_LT_AC_LANG_RC_CONFIG],
+[AC_LANG_PUSH(C)
+
+# Source file extension for RC test sources.
+ac_ext=rc
+
+# Object file extension for compiled RC test sources.
+objext=o
+_LT_AC_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }\n'
+
+# Code to be used in simple link tests
+lt_simple_link_test_code="$lt_simple_compile_test_code"
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_AC_SYS_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC="$CC"
+CC=${RC-"windres"}
+compiler=$CC
+_LT_AC_TAGVAR(compiler, $1)=$CC
+_LT_CC_BASENAME([$compiler])
+_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
+
+AC_LIBTOOL_CONFIG($1)
+
+AC_LANG_POP([])
+CC="$lt_save_CC"
+])# AC_LIBTOOL_LANG_RC_CONFIG
+
+
+# AC_LIBTOOL_CONFIG([TAGNAME])
+# ----------------------------
+# If TAGNAME is not passed, then create an initial libtool script
+# with a default configuration from the untagged config vars.  Otherwise
+# add code to config.status for appending the configuration named by
+# TAGNAME from the matching tagged config vars.
+AC_DEFUN([AC_LIBTOOL_CONFIG],
+[# The else clause should only fire when bootstrapping the
+# libtool distribution, otherwise you forgot to ship ltmain.sh
+# with your package, and you will get complaints that there are
+# no rules to generate ltmain.sh.
+if test -f "$ltmain"; then
+  # See if we are running on zsh, and set the options which allow our commands through
+  # without removal of \ escapes.
+  if test -n "${ZSH_VERSION+set}" ; then
+    setopt NO_GLOB_SUBST
+  fi
+  # Now quote all the things that may contain metacharacters while being
+  # careful not to overquote the AC_SUBSTed values.  We take copies of the
+  # variables and quote the copies for generation of the libtool script.
+  for var in echo old_CC old_CFLAGS AR AR_FLAGS EGREP RANLIB LN_S LTCC LTCFLAGS NM \
+    SED SHELL STRIP \
+    libname_spec library_names_spec soname_spec extract_expsyms_cmds \
+    old_striplib striplib file_magic_cmd finish_cmds finish_eval \
+    deplibs_check_method reload_flag reload_cmds need_locks \
+    lt_cv_sys_global_symbol_pipe lt_cv_sys_global_symbol_to_cdecl \
+    lt_cv_sys_global_symbol_to_c_name_address \
+    sys_lib_search_path_spec sys_lib_dlsearch_path_spec \
+    old_postinstall_cmds old_postuninstall_cmds \
+    _LT_AC_TAGVAR(compiler, $1) \
+    _LT_AC_TAGVAR(CC, $1) \
+    _LT_AC_TAGVAR(LD, $1) \
+    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1) \
+    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1) \
+    _LT_AC_TAGVAR(lt_prog_compiler_static, $1) \
+    _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) \
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1) \
+    _LT_AC_TAGVAR(thread_safe_flag_spec, $1) \
+    _LT_AC_TAGVAR(whole_archive_flag_spec, $1) \
+    _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1) \
+    _LT_AC_TAGVAR(old_archive_cmds, $1) \
+    _LT_AC_TAGVAR(old_archive_from_new_cmds, $1) \
+    _LT_AC_TAGVAR(predep_objects, $1) \
+    _LT_AC_TAGVAR(postdep_objects, $1) \
+    _LT_AC_TAGVAR(predeps, $1) \
+    _LT_AC_TAGVAR(postdeps, $1) \
+    _LT_AC_TAGVAR(compiler_lib_search_path, $1) \
+    _LT_AC_TAGVAR(archive_cmds, $1) \
+    _LT_AC_TAGVAR(archive_expsym_cmds, $1) \
+    _LT_AC_TAGVAR(postinstall_cmds, $1) \
+    _LT_AC_TAGVAR(postuninstall_cmds, $1) \
+    _LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1) \
+    _LT_AC_TAGVAR(allow_undefined_flag, $1) \
+    _LT_AC_TAGVAR(no_undefined_flag, $1) \
+    _LT_AC_TAGVAR(export_symbols_cmds, $1) \
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1) \
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1) \
+    _LT_AC_TAGVAR(hardcode_libdir_separator, $1) \
+    _LT_AC_TAGVAR(hardcode_automatic, $1) \
+    _LT_AC_TAGVAR(module_cmds, $1) \
+    _LT_AC_TAGVAR(module_expsym_cmds, $1) \
+    _LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1) \
+    _LT_AC_TAGVAR(exclude_expsyms, $1) \
+    _LT_AC_TAGVAR(include_expsyms, $1); do
+
+    case $var in
+    _LT_AC_TAGVAR(old_archive_cmds, $1) | \
+    _LT_AC_TAGVAR(old_archive_from_new_cmds, $1) | \
+    _LT_AC_TAGVAR(archive_cmds, $1) | \
+    _LT_AC_TAGVAR(archive_expsym_cmds, $1) | \
+    _LT_AC_TAGVAR(module_cmds, $1) | \
+    _LT_AC_TAGVAR(module_expsym_cmds, $1) | \
+    _LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1) | \
+    _LT_AC_TAGVAR(export_symbols_cmds, $1) | \
+    extract_expsyms_cmds | reload_cmds | finish_cmds | \
+    postinstall_cmds | postuninstall_cmds | \
+    old_postinstall_cmds | old_postuninstall_cmds | \
+    sys_lib_search_path_spec | sys_lib_dlsearch_path_spec)
+      # Double-quote double-evaled strings.
+      eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\""
+      ;;
+    *)
+      eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\""
+      ;;
+    esac
+  done
+
+  case $lt_echo in
+  *'\[$]0 --fallback-echo"')
+    lt_echo=`$echo "X$lt_echo" | $Xsed -e 's/\\\\\\\[$]0 --fallback-echo"[$]/[$]0 --fallback-echo"/'`
+    ;;
+  esac
+
+ifelse([$1], [],
+  [cfgfile="${ofile}T"
+  trap "$rm \"$cfgfile\"; exit 1" 1 2 15
+  $rm -f "$cfgfile"
+  AC_MSG_NOTICE([creating $ofile])],
+  [cfgfile="$ofile"])
+
+  cat <<__EOF__ >> "$cfgfile"
+ifelse([$1], [],
+[#! $SHELL
+
+# `$echo "$cfgfile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
+# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP)
+# NOTE: Changes made to this file will be lost: look at ltmain.sh.
+#
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
+# Free Software Foundation, Inc.
+#
+# This file is part of GNU Libtool:
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# A sed program that does not truncate output.
+SED=$lt_SED
+
+# Sed that helps us avoid accidentally triggering echo(1) options like -n.
+Xsed="$SED -e 1s/^X//"
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+# The names of the tagged configurations supported by this script.
+available_tags=
+
+# ### BEGIN LIBTOOL CONFIG],
+[# ### BEGIN LIBTOOL TAG CONFIG: $tagname])
+
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+
+# Shell to use when invoking shell scripts.
+SHELL=$lt_SHELL
+
+# Whether or not to build shared libraries.
+build_libtool_libs=$enable_shared
+
+# Whether or not to build static libraries.
+build_old_libs=$enable_static
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$_LT_AC_TAGVAR(archive_cmds_need_lc, $1)
+
+# Whether or not to disallow shared libs when runtime libs are static
+allow_libtool_libs_with_static_runtimes=$_LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)
+
+# Whether or not to optimize for fast installation.
+fast_install=$enable_fast_install
+
+# The host system.
+host_alias=$host_alias
+host=$host
+host_os=$host_os
+
+# The build system.
+build_alias=$build_alias
+build=$build
+build_os=$build_os
+
+# An echo program that does not interpret backslashes.
+echo=$lt_echo
+
+# The archiver.
+AR=$lt_AR
+AR_FLAGS=$lt_AR_FLAGS
+
+# A C compiler.
+LTCC=$lt_LTCC
+
+# LTCC compiler flags.
+LTCFLAGS=$lt_LTCFLAGS
+
+# A language-specific compiler.
+CC=$lt_[]_LT_AC_TAGVAR(compiler, $1)
+
+# Is the compiler the GNU C compiler?
+with_gcc=$_LT_AC_TAGVAR(GCC, $1)
+
+# An ERE matcher.
+EGREP=$lt_EGREP
+
+# The linker used to build libraries.
+LD=$lt_[]_LT_AC_TAGVAR(LD, $1)
+
+# Whether we need hard or soft links.
+LN_S=$lt_LN_S
+
+# A BSD-compatible nm program.
+NM=$lt_NM
+
+# A symbol stripping program
+STRIP=$lt_STRIP
+
+# Used to examine libraries when file_magic_cmd begins "file"
+MAGIC_CMD=$MAGIC_CMD
+
+# Used on cygwin: DLL creation program.
+DLLTOOL="$DLLTOOL"
+
+# Used on cygwin: object dumper.
+OBJDUMP="$OBJDUMP"
+
+# Used on cygwin: assembler.
+AS="$AS"
+
+# The name of the directory that contains temporary libtool files.
+objdir=$objdir
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag
+reload_cmds=$lt_reload_cmds
+
+# How to pass a linker flag through the compiler.
+wl=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)
+
+# Object file suffix (normally "o").
+objext="$ac_objext"
+
+# Old archive suffix (normally "a").
+libext="$libext"
+
+# Shared library suffix (normally ".so").
+shrext_cmds='$shrext_cmds'
+
+# Executable file suffix (normally "").
+exeext="$exeext"
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)
+pic_mode=$pic_mode
+
+# What is the maximum length of a command?
+max_cmd_len=$lt_cv_sys_max_cmd_len
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_[]_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)
+
+# Must we lock files when doing compilation?
+need_locks=$lt_need_locks
+
+# Do we need the lib prefix for modules?
+need_lib_prefix=$need_lib_prefix
+
+# Do we need a version for libraries?
+need_version=$need_version
+
+# Whether dlopen is supported.
+dlopen_support=$enable_dlopen
+
+# Whether dlopen of programs is supported.
+dlopen_self=$enable_dlopen_self
+
+# Whether dlopen of statically linked programs is supported.
+dlopen_self_static=$enable_dlopen_self_static
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_static, $1)
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_[]_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_[]_LT_AC_TAGVAR(whole_archive_flag_spec, $1)
+
+# Compiler flag to generate thread-safe objects.
+thread_safe_flag_spec=$lt_[]_LT_AC_TAGVAR(thread_safe_flag_spec, $1)
+
+# Library versioning type.
+version_type=$version_type
+
+# Format of library name prefix.
+libname_spec=$lt_libname_spec
+
+# List of archive names.  First name is the real one, the rest are links.
+# The last name is the one that the linker finds with -lNAME.
+library_names_spec=$lt_library_names_spec
+
+# The coded name of the library, if different from the real name.
+soname_spec=$lt_soname_spec
+
+# Commands used to build and install an old-style archive.
+RANLIB=$lt_RANLIB
+old_archive_cmds=$lt_[]_LT_AC_TAGVAR(old_archive_cmds, $1)
+old_postinstall_cmds=$lt_old_postinstall_cmds
+old_postuninstall_cmds=$lt_old_postuninstall_cmds
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_[]_LT_AC_TAGVAR(old_archive_from_new_cmds, $1)
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_[]_LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1)
+
+# Commands used to build and install a shared archive.
+archive_cmds=$lt_[]_LT_AC_TAGVAR(archive_cmds, $1)
+archive_expsym_cmds=$lt_[]_LT_AC_TAGVAR(archive_expsym_cmds, $1)
+postinstall_cmds=$lt_postinstall_cmds
+postuninstall_cmds=$lt_postuninstall_cmds
+
+# Commands used to build a loadable module (assumed same as above if empty)
+module_cmds=$lt_[]_LT_AC_TAGVAR(module_cmds, $1)
+module_expsym_cmds=$lt_[]_LT_AC_TAGVAR(module_expsym_cmds, $1)
+
+# Commands to strip libraries.
+old_striplib=$lt_old_striplib
+striplib=$lt_striplib
+
+# Dependencies to place before the objects being linked to create a
+# shared library.
+predep_objects=$lt_[]_LT_AC_TAGVAR(predep_objects, $1)
+
+# Dependencies to place after the objects being linked to create a
+# shared library.
+postdep_objects=$lt_[]_LT_AC_TAGVAR(postdep_objects, $1)
+
+# Dependencies to place before the objects being linked to create a
+# shared library.
+predeps=$lt_[]_LT_AC_TAGVAR(predeps, $1)
+
+# Dependencies to place after the objects being linked to create a
+# shared library.
+postdeps=$lt_[]_LT_AC_TAGVAR(postdeps, $1)
+
+# The library search path used internally by the compiler when linking
+# a shared library.
+compiler_lib_search_path=$lt_[]_LT_AC_TAGVAR(compiler_lib_search_path, $1)
+
+# Method to check whether dependent libraries are shared objects.
+deplibs_check_method=$lt_deplibs_check_method
+
+# Command to use when deplibs_check_method == file_magic.
+file_magic_cmd=$lt_file_magic_cmd
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_[]_LT_AC_TAGVAR(allow_undefined_flag, $1)
+
+# Flag that forces no undefined symbols.
+no_undefined_flag=$lt_[]_LT_AC_TAGVAR(no_undefined_flag, $1)
+
+# Commands used to finish a libtool library installation in a directory.
+finish_cmds=$lt_finish_cmds
+
+# Same as above, but a single script fragment to be evaled but not shown.
+finish_eval=$lt_finish_eval
+
+# Take the output of nm and produce a listing of raw symbols and C names.
+global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe
+
+# Transform the output of nm in a proper C declaration
+global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl
+
+# Transform the output of nm in a C name address pair
+global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address
+
+# This is the shared library runtime path variable.
+runpath_var=$runpath_var
+
+# This is the shared library path variable.
+shlibpath_var=$shlibpath_var
+
+# Is shlibpath searched before the hard-coded library search path?
+shlibpath_overrides_runpath=$shlibpath_overrides_runpath
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$_LT_AC_TAGVAR(hardcode_action, $1)
+
+# Whether we should hardcode library paths into libraries.
+hardcode_into_libs=$hardcode_into_libs
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist.
+hardcode_libdir_flag_spec=$lt_[]_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)
+
+# If ld is used when linking, flag to hardcode \$libdir into
+# a binary during linking. This must work even if \$libdir does
+# not exist.
+hardcode_libdir_flag_spec_ld=$lt_[]_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)
+
+# Whether we need a single -rpath flag with a separated argument.
+hardcode_libdir_separator=$lt_[]_LT_AC_TAGVAR(hardcode_libdir_separator, $1)
+
+# Set to yes if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the
+# resulting binary.
+hardcode_direct=$_LT_AC_TAGVAR(hardcode_direct, $1)
+
+# Set to yes if using the -LDIR flag during linking hardcodes DIR into the
+# resulting binary.
+hardcode_minus_L=$_LT_AC_TAGVAR(hardcode_minus_L, $1)
+
+# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into
+# the resulting binary.
+hardcode_shlibpath_var=$_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)
+
+# Set to yes if building a shared library automatically hardcodes DIR into the library
+# and all subsequent libraries and executables linked against it.
+hardcode_automatic=$_LT_AC_TAGVAR(hardcode_automatic, $1)
+
+# Variables whose values should be saved in libtool wrapper scripts and
+# restored at relink time.
+variables_saved_for_relink="$variables_saved_for_relink"
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$_LT_AC_TAGVAR(link_all_deplibs, $1)
+
+# Compile-time system search path for libraries
+sys_lib_search_path_spec=$lt_sys_lib_search_path_spec
+
+# Run-time system search path for libraries
+sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
+
+# Fix the shell variable \$srcfile for the compiler.
+fix_srcfile_path="$_LT_AC_TAGVAR(fix_srcfile_path, $1)"
+
+# Set to yes if exported symbols are required.
+always_export_symbols=$_LT_AC_TAGVAR(always_export_symbols, $1)
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_[]_LT_AC_TAGVAR(export_symbols_cmds, $1)
+
+# The commands to extract the exported symbol list from a shared archive.
+extract_expsyms_cmds=$lt_extract_expsyms_cmds
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_[]_LT_AC_TAGVAR(exclude_expsyms, $1)
+
+# Symbols that must always be exported.
+include_expsyms=$lt_[]_LT_AC_TAGVAR(include_expsyms, $1)
+
+ifelse([$1],[],
+[# ### END LIBTOOL CONFIG],
+[# ### END LIBTOOL TAG CONFIG: $tagname])
+
+__EOF__
+
+ifelse([$1],[], [
+  case $host_os in
+  aix3*)
+    cat <<\EOF >> "$cfgfile"
+
+# AIX sometimes has problems with the GCC collect2 program.  For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+  COLLECT_NAMES=
+  export COLLECT_NAMES
+fi
+EOF
+    ;;
+  esac
+
+  # We use sed instead of cat because bash on DJGPP gets confused if
+  # if finds mixed CR/LF and LF-only lines.  Since sed operates in
+  # text mode, it properly converts lines to CR/LF.  This bash problem
+  # is reportedly fixed, but why not run on old versions too?
+  sed '$q' "$ltmain" >> "$cfgfile" || (rm -f "$cfgfile"; exit 1)
+
+  mv -f "$cfgfile" "$ofile" || \
+    (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
+  chmod +x "$ofile"
+])
+else
+  # If there is no Makefile yet, we rely on a make rule to execute
+  # `config.status --recheck' to rerun these tests and create the
+  # libtool script then.
+  ltmain_in=`echo $ltmain | sed -e 's/\.sh$/.in/'`
+  if test -f "$ltmain_in"; then
+    test -f Makefile && make "$ltmain"
+  fi
+fi
+])# AC_LIBTOOL_CONFIG
+
+
+# AC_LIBTOOL_PROG_COMPILER_NO_RTTI([TAGNAME])
+# -------------------------------------------
+AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI],
+[AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl
+
+_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
+
+if test "$GCC" = yes; then
+  _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin'
+
+  AC_LIBTOOL_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions],
+    lt_cv_prog_compiler_rtti_exceptions,
+    [-fno-rtti -fno-exceptions], [],
+    [_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"])
+fi
+])# AC_LIBTOOL_PROG_COMPILER_NO_RTTI
+
+
+# AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE
+# ---------------------------------
+AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE],
+[AC_REQUIRE([AC_CANONICAL_HOST])
+AC_REQUIRE([AC_PROG_NM])
+AC_REQUIRE([AC_OBJEXT])
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+AC_MSG_CHECKING([command to parse $NM output from $compiler object])
+AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe],
+[
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[[BCDEGRST]]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)'
+
+# Transform an extracted symbol line into a proper C declaration
+lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern int \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[[BCDT]]'
+  ;;
+cygwin* | mingw* | pw32*)
+  symcode='[[ABCDGISTW]]'
+  ;;
+hpux*) # Its linker distinguishes data from code symbols
+  if test "$host_cpu" = ia64; then
+    symcode='[[ABCDEGRST]]'
+  fi
+  lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+  lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+  ;;
+linux*)
+  if test "$host_cpu" = ia64; then
+    symcode='[[ABCDGIRSTW]]'
+    lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+    lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+  fi
+  ;;
+irix* | nonstopux*)
+  symcode='[[BCDEGRST]]'
+  ;;
+osf*)
+  symcode='[[BCDEGQRST]]'
+  ;;
+solaris*)
+  symcode='[[BDRT]]'
+  ;;
+sco3.2v5*)
+  symcode='[[DT]]'
+  ;;
+sysv4.2uw2*)
+  symcode='[[DT]]'
+  ;;
+sysv5* | sco5v6* | unixware* | OpenUNIX*)
+  symcode='[[ABDT]]'
+  ;;
+sysv4)
+  symcode='[[DFNSTU]]'
+  ;;
+esac
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $build_os in
+mingw*)
+  opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+case `$NM -V 2>&1` in
+*GNU* | *'with BFD'*)
+  symcode='[[ABCDGIRSTW]]' ;;
+esac
+
+# Try without a prefix undercore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
+  symxfrm="\\1 $ac_symprfx\\2 \\2"
+
+  # Write the raw and C identifiers.
+  lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ 	]]\($symcode$symcode*\)[[ 	]][[ 	]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+
+  rm -f conftest*
+  cat > conftest.$ac_ext <<EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+EOF
+
+  if AC_TRY_EVAL(ac_compile); then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if AC_TRY_EVAL(NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist) && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+	mv -f "$nlist"T "$nlist"
+      else
+	rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if grep ' nm_test_var$' "$nlist" >/dev/null; then
+	if grep ' nm_test_func$' "$nlist" >/dev/null; then
+	  cat <<EOF > conftest.$ac_ext
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+	  # Now generate the symbol file.
+	  eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | grep -v main >> conftest.$ac_ext'
+
+	  cat <<EOF >> conftest.$ac_ext
+#if defined (__STDC__) && __STDC__
+# define lt_ptr_t void *
+#else
+# define lt_ptr_t char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+  const char *name;
+  lt_ptr_t address;
+}
+lt_preloaded_symbols[[]] =
+{
+EOF
+	  $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (lt_ptr_t) \&\2},/" < "$nlist" | grep -v main >> conftest.$ac_ext
+	  cat <<\EOF >> conftest.$ac_ext
+  {0, (lt_ptr_t) 0}
+};
+
+#ifdef __cplusplus
+}
+#endif
+EOF
+	  # Now try linking the two files.
+	  mv conftest.$ac_objext conftstm.$ac_objext
+	  lt_save_LIBS="$LIBS"
+	  lt_save_CFLAGS="$CFLAGS"
+	  LIBS="conftstm.$ac_objext"
+	  CFLAGS="$CFLAGS$_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)"
+	  if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then
+	    pipe_works=yes
+	  fi
+	  LIBS="$lt_save_LIBS"
+	  CFLAGS="$lt_save_CFLAGS"
+	else
+	  echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD
+	fi
+      else
+	echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD
+    fi
+  else
+    echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD
+    cat conftest.$ac_ext >&5
+  fi
+  rm -f conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+])
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  lt_cv_sys_global_symbol_to_cdecl=
+fi
+if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
+  AC_MSG_RESULT(failed)
+else
+  AC_MSG_RESULT(ok)
+fi
+]) # AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE
+
+
+# AC_LIBTOOL_PROG_COMPILER_PIC([TAGNAME])
+# ---------------------------------------
+AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC],
+[_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)=
+_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+_LT_AC_TAGVAR(lt_prog_compiler_static, $1)=
+
+AC_MSG_CHECKING([for $compiler option to produce PIC])
+ ifelse([$1],[CXX],[
+  # C++ specific cases for pic, static, wl, etc.
+  if test "$GXX" = yes; then
+    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static'
+
+    case $host_os in
+    aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+    amigaos*)
+      # FIXME: we need at least 68020 code to build shared libraries, but
+      # adding the `-m68020' flag to GCC prevents building anything better,
+      # like `-m68040'.
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
+      ;;
+    beos* | cygwin* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+    mingw* | os2* | pw32*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'
+      ;;
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
+      ;;
+    *djgpp*)
+      # DJGPP does not support shared libraries at all
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+      ;;
+    interix3*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
+      fi
+      ;;
+    hpux*)
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+	;;
+      *)
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	;;
+      esac
+      ;;
+    *)
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+      ;;
+    esac
+  else
+    case $host_os in
+      aix4* | aix5*)
+	# All AIX code is PIC.
+	if test "$host_cpu" = ia64; then
+	  # AIX 5 now supports IA64 processor
+	  _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	else
+	  _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
+	fi
+	;;
+      chorus*)
+	case $cc_basename in
+	cxch68*)
+	  # Green Hills C++ Compiler
+	  # _LT_AC_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a"
+	  ;;
+	esac
+	;;
+       darwin*)
+         # PIC is the default on this platform
+         # Common symbols not allowed in MH_DYLIB files
+         case $cc_basename in
+           xlc*)
+           _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-qnocommon'
+           _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           ;;
+         esac
+       ;;
+      dgux*)
+	case $cc_basename in
+	  ec++*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    ;;
+	  ghcx*)
+	    # Green Hills C++ Compiler
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      freebsd* | kfreebsd*-gnu | dragonfly*)
+	# FreeBSD uses GNU C++
+	;;
+      hpux9* | hpux10* | hpux11*)
+	case $cc_basename in
+	  CC*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+	    if test "$host_cpu" != ia64; then
+	      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+	    fi
+	    ;;
+	  aCC*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+	    case $host_cpu in
+	    hppa*64*|ia64*)
+	      # +Z the default
+	      ;;
+	    *)
+	      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+	      ;;
+	    esac
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      interix*)
+	# This is c89, which is MS Visual C++ (no shared libs)
+	# Anyone wants to do a port?
+	;;
+      irix5* | irix6* | nonstopux*)
+	case $cc_basename in
+	  CC*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+	    # CC pic flag -KPIC is the default.
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      linux*)
+	case $cc_basename in
+	  KCC*)
+	    # KAI C++ Compiler
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	    ;;
+	  icpc* | ecpc*)
+	    # Intel C++
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static'
+	    ;;
+	  pgCC*)
+	    # Portland Group C++ compiler.
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    ;;
+	  cxx*)
+	    # Compaq C++
+	    # Make sure the PIC flag is empty.  It appears that all Alpha
+	    # Linux and Compaq Tru64 Unix objects are PIC.
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      lynxos*)
+	;;
+      m88k*)
+	;;
+      mvs*)
+	case $cc_basename in
+	  cxx*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      netbsd*)
+	;;
+      osf3* | osf4* | osf5*)
+	case $cc_basename in
+	  KCC*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
+	    ;;
+	  RCC*)
+	    # Rational C++ 2.4.1
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    ;;
+	  cxx*)
+	    # Digital/Compaq C++
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    # Make sure the PIC flag is empty.  It appears that all Alpha
+	    # Linux and Compaq Tru64 Unix objects are PIC.
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      psos*)
+	;;
+      solaris*)
+	case $cc_basename in
+	  CC*)
+	    # Sun C++ 4.2, 5.x and Centerline C++
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+	    ;;
+	  gcx*)
+	    # Green Hills C++ Compiler
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      sunos4*)
+	case $cc_basename in
+	  CC*)
+	    # Sun C++ 4.x
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    ;;
+	  lcc*)
+	    # Lucid
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      tandem*)
+	case $cc_basename in
+	  NCC*)
+	    # NonStop-UX NCC 3.20
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+	case $cc_basename in
+	  CC*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    ;;
+	esac
+	;;
+      vxworks*)
+	;;
+      *)
+	_LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+	;;
+    esac
+  fi
+],
+[
+  if test "$GCC" = yes; then
+    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static'
+
+    case $host_os in
+      aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      # FIXME: we need at least 68020 code to build shared libraries, but
+      # adding the `-m68020' flag to GCC prevents building anything better,
+      # like `-m68040'.
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
+      ;;
+
+    beos* | cygwin* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+
+    mingw* | pw32* | os2*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'
+      ;;
+
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
+      ;;
+
+    interix3*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+
+    msdosdjgpp*)
+      # Just because we use GCC doesn't mean we suddenly get shared libraries
+      # on systems that don't support them.
+      _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      enable_shared=no
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
+      fi
+      ;;
+
+    hpux*)
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+	# +Z the default
+	;;
+      *)
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	;;
+      esac
+      ;;
+
+    *)
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+      ;;
+    esac
+  else
+    # PORTME Check for flag to pass linker flags through the system compiler.
+    case $host_os in
+    aix*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      else
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
+      fi
+      ;;
+      darwin*)
+        # PIC is the default on this platform
+        # Common symbols not allowed in MH_DYLIB files
+       case $cc_basename in
+         xlc*)
+         _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-qnocommon'
+         _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+         ;;
+       esac
+       ;;
+
+    mingw* | pw32* | os2*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'
+      ;;
+
+    hpux9* | hpux10* | hpux11*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+	# +Z the default
+	;;
+      *)
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+	;;
+      esac
+      # Is there a better lt_prog_compiler_static that works with the bundled CC?
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # PIC (with -KPIC) is the default.
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+      ;;
+
+    newsos6)
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    linux*)
+      case $cc_basename in
+      icc* | ecc*)
+	_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static'
+        ;;
+      pgcc* | pgf77* | pgf90* | pgf95*)
+        # Portland Group compilers (*not* the Pentium gcc compiler,
+	# which looks to be a dead project)
+	_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+        ;;
+      ccc*)
+        _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+        # All Alpha code is PIC.
+        _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+        ;;
+      esac
+      ;;
+
+    osf3* | osf4* | osf5*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # All OSF/1 code is PIC.
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+      ;;
+
+    solaris*)
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      case $cc_basename in
+      f77* | f90* | f95*)
+	_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';;
+      *)
+	_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';;
+      esac
+      ;;
+
+    sunos4*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    sysv4 | sysv4.2uw2* | sysv4.3*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec ;then
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic'
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+
+    sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    unicos*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      ;;
+
+    uts4*)
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    *)
+      _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      ;;
+    esac
+  fi
+])
+AC_MSG_RESULT([$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)])
+
+#
+# Check to make sure the PIC flag actually works.
+#
+if test -n "$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)"; then
+  AC_LIBTOOL_COMPILER_OPTION([if $compiler PIC flag $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1) works],
+    _LT_AC_TAGVAR(lt_prog_compiler_pic_works, $1),
+    [$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)ifelse([$1],[],[ -DPIC],[ifelse([$1],[CXX],[ -DPIC],[])])], [],
+    [case $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1) in
+     "" | " "*) ;;
+     *) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)" ;;
+     esac],
+    [_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+     _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no])
+fi
+case $host_os in
+  # For platforms which do not support PIC, -DPIC is meaningless:
+  *djgpp*)
+    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+    ;;
+  *)
+    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)ifelse([$1],[],[ -DPIC],[ifelse([$1],[CXX],[ -DPIC],[])])"
+    ;;
+esac
+
+#
+# Check to make sure the static flag actually works.
+#
+wl=$_LT_AC_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_AC_TAGVAR(lt_prog_compiler_static, $1)\"
+AC_LIBTOOL_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works],
+  _LT_AC_TAGVAR(lt_prog_compiler_static_works, $1),
+  $lt_tmp_static_flag,
+  [],
+  [_LT_AC_TAGVAR(lt_prog_compiler_static, $1)=])
+])
+
+
+# AC_LIBTOOL_PROG_LD_SHLIBS([TAGNAME])
+# ------------------------------------
+# See if the linker supports building shared libraries.
+AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS],
+[AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
+ifelse([$1],[CXX],[
+  _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  case $host_os in
+  aix4* | aix5*)
+    # If we're using GNU nm, then we don't want the "-C" option.
+    # -C means demangle to AIX nm, but means don't demangle with GNU nm
+    if $NM -V 2>&1 | grep 'GNU' > /dev/null; then
+      _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols'
+    else
+      _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols'
+    fi
+    ;;
+  pw32*)
+    _LT_AC_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds"
+  ;;
+  cygwin* | mingw*)
+    _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]] /s/.* \([[^ ]]*\)/\1 DATA/;/^.* __nm__/s/^.* __nm__\([[^ ]]*\) [[^ ]]*/\1 DATA/;/^I /d;/^[[AITW]] /s/.* //'\'' | sort | uniq > $export_symbols'
+  ;;
+  *)
+    _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  ;;
+  esac
+],[
+  runpath_var=
+  _LT_AC_TAGVAR(allow_undefined_flag, $1)=
+  _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+  _LT_AC_TAGVAR(archive_cmds, $1)=
+  _LT_AC_TAGVAR(archive_expsym_cmds, $1)=
+  _LT_AC_TAGVAR(old_archive_From_new_cmds, $1)=
+  _LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1)=
+  _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=
+  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+  _LT_AC_TAGVAR(thread_safe_flag_spec, $1)=
+  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=
+  _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
+  _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=
+  _LT_AC_TAGVAR(hardcode_direct, $1)=no
+  _LT_AC_TAGVAR(hardcode_minus_L, $1)=no
+  _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+  _LT_AC_TAGVAR(link_all_deplibs, $1)=unknown
+  _LT_AC_TAGVAR(hardcode_automatic, $1)=no
+  _LT_AC_TAGVAR(module_cmds, $1)=
+  _LT_AC_TAGVAR(module_expsym_cmds, $1)=
+  _LT_AC_TAGVAR(always_export_symbols, $1)=no
+  _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  # include_expsyms should be a list of space-separated symbols to be *always*
+  # included in the symbol list
+  _LT_AC_TAGVAR(include_expsyms, $1)=
+  # exclude_expsyms can be an extended regexp of symbols to exclude
+  # it will be wrapped by ` (' and `)$', so one must not match beginning or
+  # end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+  # as well as any symbol that contains `d'.
+  _LT_AC_TAGVAR(exclude_expsyms, $1)="_GLOBAL_OFFSET_TABLE_"
+  # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+  # platforms (ab)use it in PIC code, but their linkers get confused if
+  # the symbol is explicitly referenced.  Since portable code cannot
+  # rely on this symbol name, it's probably fine to never include it in
+  # preloaded symbol tables.
+  extract_expsyms_cmds=
+  # Just being paranoid about ensuring that cc_basename is set.
+  _LT_CC_BASENAME([$compiler])
+  case $host_os in
+  cygwin* | mingw* | pw32*)
+    # FIXME: the MSVC++ port hasn't been tested in a loooong time
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    if test "$GCC" != yes; then
+      with_gnu_ld=no
+    fi
+    ;;
+  interix*)
+    # we just hope/assume this is gcc and not c89 (= MSVC++)
+    with_gnu_ld=yes
+    ;;
+  openbsd*)
+    with_gnu_ld=no
+    ;;
+  esac
+
+  _LT_AC_TAGVAR(ld_shlibs, $1)=yes
+  if test "$with_gnu_ld" = yes; then
+    # If archive_cmds runs LD, not CC, wlarc should be empty
+    wlarc='${wl}'
+
+    # Set some defaults for GNU ld with shared library support. These
+    # are reset later if shared libraries are not supported. Putting them
+    # here allows them to be overridden if necessary.
+    runpath_var=LD_RUN_PATH
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir'
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+    # ancient GNU ld didn't support --whole-archive et. al.
+    if $LD --help 2>&1 | grep 'no-whole-archive' > /dev/null; then
+	_LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+      else
+  	_LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+    fi
+    supports_anon_versioning=no
+    case `$LD -v 2>/dev/null` in
+      *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11
+      *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
+      *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
+      *\ 2.11.*) ;; # other 2.11 versions
+      *) supports_anon_versioning=yes ;;
+    esac
+
+    # See if GNU ld supports shared libraries.
+    case $host_os in
+    aix3* | aix4* | aix5*)
+      # On AIX/PPC, the GNU linker is very broken
+      if test "$host_cpu" != ia64; then
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	cat <<EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.9.1, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support.  If you
+*** really care for shared libraries, you may want to modify your PATH
+*** so that a non-GNU linker is found, and then restart.
+
+EOF
+      fi
+      ;;
+
+    amigaos*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+
+      # Samuel A. Falvo II <kc5tja@dolphin.openprojects.net> reports
+      # that the semantics of dynamic libraries on AmigaOS, at least up
+      # to version 4, is to share data among multiple programs linked
+      # with the same dynamic library.  Since this doesn't match the
+      # behavior of shared libraries on other platforms, we can't use
+      # them.
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      ;;
+
+    beos*)
+      if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+	# Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+	# support --undefined.  This deserves some investigation.  FIXME
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      else
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    cygwin* | mingw* | pw32*)
+      # _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
+      # as there is no search path for DLLs.
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_AC_TAGVAR(always_export_symbols, $1)=no
+      _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+      _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]] /s/.* \([[^ ]]*\)/\1 DATA/'\'' | $SED -e '\''/^[[AITW]] /s/.* //'\'' | sort | uniq > $export_symbols'
+
+      if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then
+        _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+	# If the export-symbols file already is a .def file (1st line
+	# is EXPORTS), use it as is; otherwise, prepend...
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	  cp $export_symbols $output_objdir/$soname.def;
+	else
+	  echo EXPORTS > $output_objdir/$soname.def;
+	  cat $export_symbols >> $output_objdir/$soname.def;
+	fi~
+	$CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+      else
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    interix3*)
+      _LT_AC_TAGVAR(hardcode_direct, $1)=no
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+      # Instead, shared libraries are loaded at an image base (0x10000000 by
+      # default) and relocated if they conflict, which is a slow very memory
+      # consuming and fragmenting process.  To avoid this, we pick a random,
+      # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+      # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      ;;
+
+    linux*)
+      if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+	tmp_addflag=
+	case $cc_basename,$host_cpu in
+	pgcc*)				# Portland Group C compiler
+	  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_addflag=' $pic_flag'
+	  ;;
+	pgf77* | pgf90* | pgf95*)	# Portland Group f77 and f90 compilers
+	  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_addflag=' $pic_flag -Mnomain' ;;
+	ecc*,ia64* | icc*,ia64*)		# Intel C compiler on ia64
+	  tmp_addflag=' -i_dynamic' ;;
+	efc*,ia64* | ifort*,ia64*)	# Intel Fortran compiler on ia64
+	  tmp_addflag=' -i_dynamic -nofor_main' ;;
+	ifc* | ifort*)			# Intel Fortran compiler
+	  tmp_addflag=' -nofor_main' ;;
+	esac
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+
+	if test $supports_anon_versioning = yes; then
+	  _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $output_objdir/$libname.ver~
+  cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+  $echo "local: *; };" >> $output_objdir/$libname.ver~
+	  $CC -shared'"$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+	fi
+      else
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+	wlarc=
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      fi
+      ;;
+
+    solaris*)
+      if $LD -v 2>&1 | grep 'BFD 2\.8' > /dev/null; then
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	cat <<EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+EOF
+      elif $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
+      case `$LD -v 2>&1` in
+        *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) 
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	cat <<_LT_EOF 1>&2
+
+*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
+*** reliably create shared libraries on SCO systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.16.91.0.3 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+	;;
+	*)
+	  if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+	    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='`test -z "$SCOABSPATH" && echo ${wl}-rpath,$libdir`'
+	    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib'
+	    _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname,\${SCOABSPATH:+${install_libdir}/}$soname,-retain-symbols-file,$export_symbols -o $lib'
+	  else
+	    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+	  fi
+	;;
+      esac
+      ;;
+
+    sunos4*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      wlarc=
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    *)
+      if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+    esac
+
+    if test "$_LT_AC_TAGVAR(ld_shlibs, $1)" = no; then
+      runpath_var=
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+    fi
+  else
+    # PORTME fill in a description of your system's linker (not GNU ld)
+    case $host_os in
+    aix3*)
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_AC_TAGVAR(always_export_symbols, $1)=yes
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
+      # Note: this linker hardcodes the directories in LIBPATH if there
+      # are no directories specified by -L.
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
+	# Neither direct hardcoding nor static linking is supported with a
+	# broken collect2.
+	_LT_AC_TAGVAR(hardcode_direct, $1)=unsupported
+      fi
+      ;;
+
+    aix4* | aix5*)
+      if test "$host_cpu" = ia64; then
+	# On IA64, the linker does run time linking by default, so we don't
+	# have to do anything special.
+	aix_use_runtimelinking=no
+	exp_sym_flag='-Bexport'
+	no_entry_flag=""
+      else
+	# If we're using GNU nm, then we don't want the "-C" option.
+	# -C means demangle to AIX nm, but means don't demangle with GNU nm
+	if $NM -V 2>&1 | grep 'GNU' > /dev/null; then
+	  _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols'
+	else
+	  _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols'
+	fi
+	aix_use_runtimelinking=no
+
+	# Test if we are trying to use run time linking or normal
+	# AIX style linking. If -brtl is somewhere in LDFLAGS, we
+	# need to do runtime linking.
+	case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*)
+	  for ld_flag in $LDFLAGS; do
+  	  if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+  	    aix_use_runtimelinking=yes
+  	    break
+  	  fi
+	  done
+	  ;;
+	esac
+
+	exp_sym_flag='-bexport'
+	no_entry_flag='-bnoentry'
+      fi
+
+      # When large executables or shared objects are built, AIX ld can
+      # have problems creating the table of contents.  If linking a library
+      # or program results in "error TOC overflow" add -mminimal-toc to
+      # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+      # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+      _LT_AC_TAGVAR(archive_cmds, $1)=''
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':'
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+
+      if test "$GCC" = yes; then
+	case $host_os in aix4.[[012]]|aix4.[[012]].*)
+	# We only want to do this on AIX 4.2 and lower, the check
+	# below for broken collect2 doesn't work under 4.3+
+	  collect2name=`${CC} -print-prog-name=collect2`
+	  if test -f "$collect2name" && \
+  	   strings "$collect2name" | grep resolve_lib_name >/dev/null
+	  then
+  	  # We have reworked collect2
+  	  _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+	  else
+  	  # We have old collect2
+  	  _LT_AC_TAGVAR(hardcode_direct, $1)=unsupported
+  	  # It fails to find uninstalled libraries when the uninstalled
+  	  # path is not listed in the libpath.  Setting hardcode_minus_L
+  	  # to unsupported forces relinking
+  	  _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+  	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+  	  _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=
+	  fi
+	  ;;
+	esac
+	shared_flag='-shared'
+	if test "$aix_use_runtimelinking" = yes; then
+	  shared_flag="$shared_flag "'${wl}-G'
+	fi
+      else
+	# not using gcc
+	if test "$host_cpu" = ia64; then
+  	# VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+  	# chokes on -Wl,-G. The following line is correct:
+	  shared_flag='-G'
+	else
+	  if test "$aix_use_runtimelinking" = yes; then
+	    shared_flag='${wl}-G'
+	  else
+	    shared_flag='${wl}-bM:SRE'
+	  fi
+	fi
+      fi
+
+      # It seems that -bexpall does not export symbols beginning with
+      # underscore (_), so it is better to generate a list of symbols to export.
+      _LT_AC_TAGVAR(always_export_symbols, $1)=yes
+      if test "$aix_use_runtimelinking" = yes; then
+	# Warning - without using the other runtime loading flags (-brtl),
+	# -berok will link without error, but may produce a broken library.
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)='-berok'
+       # Determine the default libpath from the value encoded in an empty executable.
+       _LT_AC_SYS_LIBPATH_AIX
+       _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+       else
+	if test "$host_cpu" = ia64; then
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
+	  _LT_AC_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
+	  _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+	else
+	 # Determine the default libpath from the value encoded in an empty executable.
+	 _LT_AC_SYS_LIBPATH_AIX
+	 _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+	  # Warning - without using the other run time loading flags,
+	  # -berok will link without error, but may produce a broken library.
+	  _LT_AC_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
+	  _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
+	  # Exported symbols can be pulled into shared objects from archives
+	  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
+	  _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes
+	  # This is similar to how AIX traditionally builds its shared libraries.
+	  _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+	fi
+      fi
+      ;;
+
+    amigaos*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      # see comment about different semantics on the GNU ld section
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      ;;
+
+    bsdi[[45]]*)
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic
+      ;;
+
+    cygwin* | mingw* | pw32*)
+      # When not using gcc, we currently assume that we are using
+      # Microsoft Visual C++.
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+      # Tell ltmain to make .lib files, not .a files.
+      libext=lib
+      # Tell ltmain to make .dll files, not .so files.
+      shrext_cmds=".dll"
+      # FIXME: Setting linknames here is a bad hack.
+      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | $SED -e '\''s/ -lc$//'\''` -link -dll~linknames='
+      # The linker will automatically build a .lib file if we build a DLL.
+      _LT_AC_TAGVAR(old_archive_From_new_cmds, $1)='true'
+      # FIXME: Should let the user specify the lib program.
+      _LT_AC_TAGVAR(old_archive_cmds, $1)='lib /OUT:$oldlib$oldobjs$old_deplibs'
+      _LT_AC_TAGVAR(fix_srcfile_path, $1)='`cygpath -w "$srcfile"`'
+      _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+      ;;
+
+    darwin* | rhapsody*)
+      case $host_os in
+        rhapsody* | darwin1.[[012]])
+         _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}suppress'
+         ;;
+       *) # Darwin 1.3 on
+         if test -z ${MACOSX_DEPLOYMENT_TARGET} ; then
+           _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress'
+         else
+           case ${MACOSX_DEPLOYMENT_TARGET} in
+             10.[[012]])
+               _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress'
+               ;;
+             10.*)
+               _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}dynamic_lookup'
+               ;;
+           esac
+         fi
+         ;;
+      esac
+      _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_AC_TAGVAR(hardcode_direct, $1)=no
+      _LT_AC_TAGVAR(hardcode_automatic, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)=''
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+    if test "$GCC" = yes ; then
+    	output_verbose_link_cmd='echo'
+        _LT_AC_TAGVAR(archive_cmds, $1)='$CC -dynamiclib $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring'
+      _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
+      # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -dynamiclib $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+      _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+    else
+      case $cc_basename in
+        xlc*)
+         output_verbose_link_cmd='echo'
+         _LT_AC_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $verstring'
+         _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
+          # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
+         _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          ;;
+       *)
+         _LT_AC_TAGVAR(ld_shlibs, $1)=no
+          ;;
+      esac
+    fi
+      ;;
+
+    dgux*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    freebsd1.*)
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      ;;
+
+    # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+    # support.  Future versions do this automatically, but an explicit c++rt0.o
+    # does not break anything, and helps significantly (at the cost of a little
+    # extra space).
+    freebsd2.2*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+    freebsd2.*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+    freebsd* | kfreebsd*-gnu | dragonfly*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -o $lib $libobjs $deplibs $compiler_flags'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    hpux9*)
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$CC -shared -fPIC ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+
+      # hardcode_minus_L: Not really in the search PATH,
+      # but as the default location of the library.
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      ;;
+
+    hpux10*)
+      if test "$GCC" = yes -a "$with_gnu_ld" = no; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      if test "$with_gnu_ld" = no; then
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	_LT_AC_TAGVAR(hardcode_direct, $1)=yes
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+
+	# hardcode_minus_L: Not really in the search PATH,
+	# but as the default location of the library.
+	_LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      fi
+      ;;
+
+    hpux11*)
+      if test "$GCC" = yes -a "$with_gnu_ld" = no; then
+	case $host_cpu in
+	hppa*64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	ia64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	esac
+      else
+	case $host_cpu in
+	hppa*64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	ia64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	esac
+      fi
+      if test "$with_gnu_ld" = no; then
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	case $host_cpu in
+	hppa*64*|ia64*)
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='+b $libdir'
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=no
+	  _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+	  ;;
+	*)
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+	  _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+
+	  # hardcode_minus_L: Not really in the search PATH,
+	  # but as the default location of the library.
+	  _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+	  ;;
+	esac
+      fi
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='-rpath $libdir'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      # ELF
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    newsos6)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    openbsd*)
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      else
+       case $host_os in
+	 openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*)
+	   _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+	   _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+	   ;;
+	 *)
+	   _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	   _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	   ;;
+       esac
+      fi
+      ;;
+
+    os2*)
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_AC_TAGVAR(archive_cmds, $1)='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+      _LT_AC_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+      ;;
+
+    osf3*)
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      else
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+      ;;
+
+    osf4* | osf5*)	# as osf3* with the addition of -msym flag
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      else
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; echo "-hidden">> $lib.exp~
+	$LD -shared${allow_undefined_flag} -input $lib.exp $linker_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib~$rm $lib.exp'
+
+	# Both c and cxx compiler support -rpath directly
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+      ;;
+
+    solaris*)
+      _LT_AC_TAGVAR(no_undefined_flag, $1)=' -z text'
+      if test "$GCC" = yes; then
+	wlarc='${wl}'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+	  $CC -shared ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$rm $lib.exp'
+      else
+	wlarc=''
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+  	$LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      case $host_os in
+      solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+      *)
+ 	# The compiler driver will combine linker options so we
+ 	# cannot just pass the convience library names through
+ 	# without $wl, iff we do not link with $LD.
+ 	# Luckily, gcc supports the same syntax we need for Sun Studio.
+ 	# Supported since Solaris 2.6 (maybe 2.5.1?)
+ 	case $wlarc in
+ 	'')
+ 	  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' ;;
+ 	*)
+ 	  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}-z ${wl}defaultextract' ;;
+ 	esac ;;
+      esac
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+      ;;
+
+    sunos4*)
+      if test "x$host_vendor" = xsequent; then
+	# Use $CC to link under sequent, because it throws in some extra .o
+	# files that make .init and .fini sections work.
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    sysv4)
+      case $host_vendor in
+	sni)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=yes # is this really true???
+	;;
+	siemens)
+	  ## LD is ld it makes a PLAMLIB
+	  ## CC just makes a GrossModule.
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+	  _LT_AC_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs'
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=no
+        ;;
+	motorola)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie
+	;;
+      esac
+      runpath_var='LD_RUN_PATH'
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    sysv4.3*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+	runpath_var=LD_RUN_PATH
+	hardcode_runpath_var=yes
+	_LT_AC_TAGVAR(ld_shlibs, $1)=yes
+      fi
+      ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7*)
+      _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+      _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6*)
+      # Note: We can NOT use -z defs as we might desire, because we do not
+      # link with -lc, and that would cause any symbols used from libc to
+      # always be unresolved, which means just about no library would
+      # ever link correctly.  If we're not using GNU ld we use -z text
+      # though, which does catch some bad symbols but isn't as heavy-handed
+      # as -z defs.
+      _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
+      _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='`test -z "$SCOABSPATH" && echo ${wl}-R,$libdir`'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':'
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    uts4*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    *)
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      ;;
+    esac
+  fi
+])
+AC_MSG_RESULT([$_LT_AC_TAGVAR(ld_shlibs, $1)])
+test "$_LT_AC_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
+
+#
+# Do we need to explicitly link libc?
+#
+case "x$_LT_AC_TAGVAR(archive_cmds_need_lc, $1)" in
+x|xyes)
+  # Assume -lc should be added
+  _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes
+
+  if test "$enable_shared" = yes && test "$GCC" = yes; then
+    case $_LT_AC_TAGVAR(archive_cmds, $1) in
+    *'~'*)
+      # FIXME: we may have to deal with multi-command sequences.
+      ;;
+    '$CC '*)
+      # Test whether the compiler implicitly links with -lc since on some
+      # systems, -lgcc has to come before -lc. If gcc already passes -lc
+      # to ld, don't add -lc before -lgcc.
+      AC_MSG_CHECKING([whether -lc should be explicitly linked in])
+      $rm conftest*
+      printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+      if AC_TRY_EVAL(ac_compile) 2>conftest.err; then
+        soname=conftest
+        lib=conftest
+        libobjs=conftest.$ac_objext
+        deplibs=
+        wl=$_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)
+	pic_flag=$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)
+        compiler_flags=-v
+        linker_flags=-v
+        verstring=
+        output_objdir=.
+        libname=conftest
+        lt_save_allow_undefined_flag=$_LT_AC_TAGVAR(allow_undefined_flag, $1)
+        _LT_AC_TAGVAR(allow_undefined_flag, $1)=
+        if AC_TRY_EVAL(_LT_AC_TAGVAR(archive_cmds, $1) 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1)
+        then
+	  _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+        else
+	  _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes
+        fi
+        _LT_AC_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag
+      else
+        cat conftest.err 1>&5
+      fi
+      $rm conftest*
+      AC_MSG_RESULT([$_LT_AC_TAGVAR(archive_cmds_need_lc, $1)])
+      ;;
+    esac
+  fi
+  ;;
+esac
+])# AC_LIBTOOL_PROG_LD_SHLIBS
+
+
+# _LT_AC_FILE_LTDLL_C
+# -------------------
+# Be careful that the start marker always follows a newline.
+AC_DEFUN([_LT_AC_FILE_LTDLL_C], [
+# /* ltdll.c starts here */
+# #define WIN32_LEAN_AND_MEAN
+# #include <windows.h>
+# #undef WIN32_LEAN_AND_MEAN
+# #include <stdio.h>
+#
+# #ifndef __CYGWIN__
+# #  ifdef __CYGWIN32__
+# #    define __CYGWIN__ __CYGWIN32__
+# #  endif
+# #endif
+#
+# #ifdef __cplusplus
+# extern "C" {
+# #endif
+# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved);
+# #ifdef __cplusplus
+# }
+# #endif
+#
+# #ifdef __CYGWIN__
+# #include <cygwin/cygwin_dll.h>
+# DECLARE_CYGWIN_DLL( DllMain );
+# #endif
+# HINSTANCE __hDllInstance_base;
+#
+# BOOL APIENTRY
+# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved)
+# {
+#   __hDllInstance_base = hInst;
+#   return TRUE;
+# }
+# /* ltdll.c ends here */
+])# _LT_AC_FILE_LTDLL_C
+
+
+# _LT_AC_TAGVAR(VARNAME, [TAGNAME])
+# ---------------------------------
+AC_DEFUN([_LT_AC_TAGVAR], [ifelse([$2], [], [$1], [$1_$2])])
+
+
+# old names
+AC_DEFUN([AM_PROG_LIBTOOL],   [AC_PROG_LIBTOOL])
+AC_DEFUN([AM_ENABLE_SHARED],  [AC_ENABLE_SHARED($@)])
+AC_DEFUN([AM_ENABLE_STATIC],  [AC_ENABLE_STATIC($@)])
+AC_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
+AC_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
+AC_DEFUN([AM_PROG_LD],        [AC_PROG_LD])
+AC_DEFUN([AM_PROG_NM],        [AC_PROG_NM])
+
+# This is just to silence aclocal about the macro not being used
+ifelse([AC_DISABLE_FAST_INSTALL])
+
+AC_DEFUN([LT_AC_PROG_GCJ],
+[AC_CHECK_TOOL(GCJ, gcj, no)
+  test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2"
+  AC_SUBST(GCJFLAGS)
+])
+
+AC_DEFUN([LT_AC_PROG_RC],
+[AC_CHECK_TOOL(RC, windres, no)
+])
+
+############################################################
+# NOTE: This macro has been submitted for inclusion into   #
+#  GNU Autoconf as AC_PROG_SED.  When it is available in   #
+#  a released version of Autoconf we should remove this    #
+#  macro and use it instead.                               #
+############################################################
+# LT_AC_PROG_SED
+# --------------
+# Check for a fully-functional sed program, that truncates
+# as few characters as possible.  Prefer GNU sed if found.
+AC_DEFUN([LT_AC_PROG_SED],
+[AC_MSG_CHECKING([for a sed that does not truncate output])
+AC_CACHE_VAL(lt_cv_path_SED,
+[# Loop through the user's path and test for sed and gsed.
+# Then use that list of sed's as ones to test for truncation.
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for lt_ac_prog in sed gsed; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then
+        lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext"
+      fi
+    done
+  done
+done
+lt_ac_max=0
+lt_ac_count=0
+# Add /usr/xpg4/bin/sed as it is typically found on Solaris
+# along with /bin/sed that truncates output.
+for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do
+  test ! -f $lt_ac_sed && continue
+  cat /dev/null > conftest.in
+  lt_ac_count=0
+  echo $ECHO_N "0123456789$ECHO_C" >conftest.in
+  # Check for GNU sed and select it if it is found.
+  if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then
+    lt_cv_path_SED=$lt_ac_sed
+    break
+  fi
+  while true; do
+    cat conftest.in conftest.in >conftest.tmp
+    mv conftest.tmp conftest.in
+    cp conftest.in conftest.nl
+    echo >>conftest.nl
+    $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break
+    cmp -s conftest.out conftest.nl || break
+    # 10000 chars as input seems more than enough
+    test $lt_ac_count -gt 10 && break
+    lt_ac_count=`expr $lt_ac_count + 1`
+    if test $lt_ac_count -gt $lt_ac_max; then
+      lt_ac_max=$lt_ac_count
+      lt_cv_path_SED=$lt_ac_sed
+    fi
+  done
+done
+])
+SED=$lt_cv_path_SED
+AC_MSG_RESULT([$SED])
+])
diff --git a/projects/sample/autoconf/m4/link_options.m4 b/projects/sample/autoconf/m4/link_options.m4
new file mode 100644
index 000000000000..4c5f2f435d04
--- /dev/null
+++ b/projects/sample/autoconf/m4/link_options.m4
@@ -0,0 +1,108 @@
+#
+# Get the linker version string.
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_LINK_GET_VERSION],
+  [AC_CACHE_CHECK([for linker version],[llvm_cv_link_version],
+  [
+   version_string="$(ld -v 2>&1 | head -1)"
+
+   # Check for ld64.
+   if (echo "$version_string" | grep -q "ld64"); then
+     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#.*ld64-\([^ ]*\)#\1#")
+   else
+     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#[^0-9]*\([0-9.]*\).*#\1#")
+   fi
+  ])
+  AC_DEFINE_UNQUOTED([HOST_LINK_VERSION],"$llvm_cv_link_version",
+                     [Linker version detected at compile time.])
+])
+
+#
+# Determine if the system can handle the -R option being passed to the linker.
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_LINK_USE_R],
+[AC_CACHE_CHECK([for compiler -Wl,-R<path> option],[llvm_cv_link_use_r],
+[ AC_LANG_PUSH([C])
+  oldcflags="$CFLAGS"
+  CFLAGS="$CFLAGS -Wl,-R."
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],
+    [llvm_cv_link_use_r=yes],[llvm_cv_link_use_r=no])
+  CFLAGS="$oldcflags"
+  AC_LANG_POP([C])
+])
+if test "$llvm_cv_link_use_r" = yes ; then
+  AC_DEFINE([HAVE_LINK_R],[1],[Define if you can use -Wl,-R. to pass -R. to the linker, in order to add the current directory to the dynamic linker search path.])
+  fi
+])
+
+#
+# Determine if the system can handle the -R option being passed to the linker.
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_LINK_EXPORT_DYNAMIC],
+[AC_CACHE_CHECK([for compiler -Wl,-export-dynamic option],
+                [llvm_cv_link_use_export_dynamic],
+[ AC_LANG_PUSH([C])
+  oldcflags="$CFLAGS"
+  CFLAGS="$CFLAGS -Wl,-export-dynamic"
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],
+    [llvm_cv_link_use_export_dynamic=yes],[llvm_cv_link_use_export_dynamic=no])
+  CFLAGS="$oldcflags"
+  AC_LANG_POP([C])
+])
+if test "$llvm_cv_link_use_export_dynamic" = yes ; then
+  AC_DEFINE([HAVE_LINK_EXPORT_DYNAMIC],[1],[Define if you can use -Wl,-export-dynamic.])
+  fi
+])
+
+#
+# Determine if the system can handle the --version-script option being
+# passed to the linker.
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_LINK_VERSION_SCRIPT],
+[AC_CACHE_CHECK([for compiler -Wl,--version-script option],
+                [llvm_cv_link_use_version_script],
+[ AC_LANG_PUSH([C])
+  oldcflags="$CFLAGS"
+
+  # The following code is from the autoconf manual,
+  # "11.13: Limitations of Usual Tools".
+  # Create a temporary directory $tmp in $TMPDIR (default /tmp).
+  # Use mktemp if possible; otherwise fall back on mkdir,
+  # with $RANDOM to make collisions less likely.
+  : ${TMPDIR=/tmp}
+  {
+    tmp=`
+      (umask 077 && mktemp -d "$TMPDIR/fooXXXXXX") 2>/dev/null
+    ` &&
+    test -n "$tmp" && test -d "$tmp"
+  } || {
+    tmp=$TMPDIR/foo$$-$RANDOM
+    (umask 077 && mkdir "$tmp")
+  } || exit $?
+
+  echo "{" > "$tmp/export.map"
+  echo "  global: main;" >> "$tmp/export.map"
+  echo "  local: *;" >> "$tmp/export.map"
+  echo "};" >> "$tmp/export.map"
+
+  CFLAGS="$CFLAGS -Wl,--version-script=$tmp/export.map"
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],
+    [llvm_cv_link_use_version_script=yes],[llvm_cv_link_use_version_script=no])
+  rm "$tmp/export.map"
+  rmdir "$tmp"
+  CFLAGS="$oldcflags"
+  AC_LANG_POP([C])
+])
+if test "$llvm_cv_link_use_version_script" = yes ; then
+  AC_SUBST(HAVE_LINK_VERSION_SCRIPT,1)
+  fi
+])
+
diff --git a/projects/sample/autoconf/m4/linux_mixed_64_32.m4 b/projects/sample/autoconf/m4/linux_mixed_64_32.m4
new file mode 100644
index 000000000000..123491f87e5e
--- /dev/null
+++ b/projects/sample/autoconf/m4/linux_mixed_64_32.m4
@@ -0,0 +1,17 @@
+#
+# Some Linux machines run a 64-bit kernel with a 32-bit userspace. 'uname -m'
+# shows these as x86_64. Ask the system 'gcc' what it thinks.
+#
+AC_DEFUN([AC_IS_LINUX_MIXED],
+[AC_CACHE_CHECK(for 32-bit userspace on 64-bit system,llvm_cv_linux_mixed,
+[ AC_LANG_PUSH([C])
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+      [[#ifndef __x86_64__
+       error: Not x86-64 even if uname says so!
+      #endif
+      ]])],
+      [llvm_cv_linux_mixed=no],
+      [llvm_cv_linux_mixed=yes])
+  AC_LANG_POP([C])
+])
+])
diff --git a/projects/sample/autoconf/m4/ltdl.m4 b/projects/sample/autoconf/m4/ltdl.m4
new file mode 100644
index 000000000000..407a16e2d694
--- /dev/null
+++ b/projects/sample/autoconf/m4/ltdl.m4
@@ -0,0 +1,418 @@
+## ltdl.m4 - Configure ltdl for the target system. -*-Autoconf-*-
+## Copyright (C) 1999-2000 Free Software Foundation, Inc.
+##
+## This file is free software; the Free Software Foundation gives
+## unlimited permission to copy and/or distribute it, with or without
+## modifications, as long as this notice is preserved.
+
+# serial 7 AC_LIB_LTDL
+
+# AC_WITH_LTDL
+# ------------
+# Clients of libltdl can use this macro to allow the installer to
+# choose between a shipped copy of the ltdl sources or a preinstalled
+# version of the library.
+AC_DEFUN([AC_WITH_LTDL],
+[AC_REQUIRE([AC_LIB_LTDL])
+AC_SUBST([LIBLTDL])
+AC_SUBST([INCLTDL])
+
+# Unless the user asks us to check, assume no installed ltdl exists.
+use_installed_libltdl=no
+
+AC_ARG_WITH([included_ltdl],
+    [  --with-included-ltdl    use the GNU ltdl sources included here])
+
+if test "x$with_included_ltdl" != xyes; then
+  # We are not being forced to use the included libltdl sources, so
+  # decide whether there is a useful installed version we can use.
+  AC_CHECK_HEADER([ltdl.h],
+      [AC_CHECK_LIB([ltdl], [lt_dlcaller_register],
+          [with_included_ltdl=no],
+          [with_included_ltdl=yes])
+  ])
+fi
+
+if test "x$enable_ltdl_install" != xyes; then
+  # If the user did not specify an installable libltdl, then default
+  # to a convenience lib.
+  AC_LIBLTDL_CONVENIENCE
+fi
+
+if test "x$with_included_ltdl" = xno; then
+  # If the included ltdl is not to be used. then Use the
+  # preinstalled libltdl we found.
+  AC_DEFINE([HAVE_LTDL], [1],
+    [Define this if a modern libltdl is already installed])
+  LIBLTDL=-lltdl
+fi
+
+# Report our decision...
+AC_MSG_CHECKING([whether to use included libltdl])
+AC_MSG_RESULT([$with_included_ltdl])
+
+AC_CONFIG_SUBDIRS([libltdl])
+])# AC_WITH_LTDL
+
+
+# AC_LIB_LTDL
+# -----------
+# Perform all the checks necessary for compilation of the ltdl objects
+#  -- including compiler checks and header checks.
+AC_DEFUN([AC_LIB_LTDL],
+[AC_PREREQ(2.60)
+AC_REQUIRE([AC_PROG_CC])
+AC_REQUIRE([AC_C_CONST])
+AC_REQUIRE([AC_HEADER_STDC])
+AC_REQUIRE([AC_HEADER_DIRENT])
+AC_REQUIRE([_LT_AC_CHECK_DLFCN])
+AC_REQUIRE([AC_LTDL_ENABLE_INSTALL])
+AC_REQUIRE([AC_LTDL_SHLIBEXT])
+AC_REQUIRE([AC_LTDL_SHLIBPATH])
+AC_REQUIRE([AC_LTDL_SYSSEARCHPATH])
+AC_REQUIRE([AC_LTDL_OBJDIR])
+AC_REQUIRE([AC_LTDL_DLPREOPEN])
+AC_REQUIRE([AC_LTDL_DLLIB])
+AC_REQUIRE([AC_LTDL_SYMBOL_USCORE])
+AC_REQUIRE([AC_LTDL_DLSYM_USCORE])
+AC_REQUIRE([AC_LTDL_SYS_DLOPEN_DEPLIBS])
+AC_REQUIRE([AC_LTDL_FUNC_ARGZ])
+
+AC_CHECK_HEADERS([assert.h ctype.h errno.h malloc.h memory.h stdlib.h \
+		  stdio.h unistd.h])
+AC_CHECK_HEADERS([dl.h sys/dl.h dld.h mach-o/dyld.h])
+AC_CHECK_HEADERS([string.h strings.h], [break])
+
+AC_CHECK_FUNCS([strchr index], [break])
+AC_CHECK_FUNCS([strrchr rindex], [break])
+AC_CHECK_FUNCS([memcpy bcopy], [break])
+AC_CHECK_FUNCS([memmove strcmp])
+AC_CHECK_FUNCS([closedir opendir readdir])
+])# AC_LIB_LTDL
+
+
+# AC_LTDL_ENABLE_INSTALL
+# ----------------------
+AC_DEFUN([AC_LTDL_ENABLE_INSTALL],
+[AC_ARG_ENABLE([ltdl-install],
+    [AS_HELP_STRING([--enable-ltdl-install],[install libltdl])])
+
+AM_CONDITIONAL(INSTALL_LTDL, test x"${enable_ltdl_install-no}" != xno)
+AM_CONDITIONAL(CONVENIENCE_LTDL, test x"${enable_ltdl_convenience-no}" != xno)
+])# AC_LTDL_ENABLE_INSTALL
+
+
+# AC_LTDL_SYS_DLOPEN_DEPLIBS
+# --------------------------
+AC_DEFUN([AC_LTDL_SYS_DLOPEN_DEPLIBS],
+[AC_REQUIRE([AC_CANONICAL_HOST])
+AC_CACHE_CHECK([whether deplibs are loaded by dlopen],
+  [libltdl_cv_sys_dlopen_deplibs],
+  [# PORTME does your system automatically load deplibs for dlopen?
+  # or its logical equivalent (e.g. shl_load for HP-UX < 11)
+  # For now, we just catch OSes we know something about -- in the
+  # future, we'll try test this programmatically.
+  libltdl_cv_sys_dlopen_deplibs=unknown
+  case "$host_os" in
+  aix3*|aix4.1.*|aix4.2.*)
+    # Unknown whether this is true for these versions of AIX, but
+    # we want this `case' here to explicitly catch those versions.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  aix[[45]]*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  darwin*)
+    # Assuming the user has installed a libdl from somewhere, this is true
+    # If you are looking for one http://www.opendarwin.org/projects/dlcompat
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  gnu* | linux* | kfreebsd*-gnu | knetbsd*-gnu)
+    # GNU and its variants, using gnu ld.so (Glibc)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  hpux10*|hpux11*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  interix*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  irix[[12345]]*|irix6.[[01]]*)
+    # Catch all versions of IRIX before 6.2, and indicate that we don't
+    # know how it worked for any of those versions.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  irix*)
+    # The case above catches anything before 6.2, and it's known that
+    # at 6.2 and later dlopen does load deplibs.
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  netbsd*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  openbsd*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  osf[[1234]]*)
+    # dlopen did load deplibs (at least at 4.x), but until the 5.x series,
+    # it did *not* use an RPATH in a shared library to find objects the
+    # library depends on, so we explicitly say `no'.
+    libltdl_cv_sys_dlopen_deplibs=no
+    ;;
+  osf5.0|osf5.0a|osf5.1)
+    # dlopen *does* load deplibs and with the right loader patch applied
+    # it even uses RPATH in a shared library to search for shared objects
+    # that the library depends on, but there's no easy way to know if that
+    # patch is installed.  Since this is the case, all we can really
+    # say is unknown -- it depends on the patch being installed.  If
+    # it is, this changes to `yes'.  Without it, it would be `no'.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  osf*)
+    # the two cases above should catch all versions of osf <= 5.1.  Read
+    # the comments above for what we know about them.
+    # At > 5.1, deplibs are loaded *and* any RPATH in a shared library
+    # is used to find them so we can finally say `yes'.
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  solaris*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  esac
+  ])
+if test "$libltdl_cv_sys_dlopen_deplibs" != yes; then
+ AC_DEFINE([LTDL_DLOPEN_DEPLIBS], [1],
+    [Define if the OS needs help to load dependent libraries for dlopen().])
+fi
+])# AC_LTDL_SYS_DLOPEN_DEPLIBS
+
+
+# AC_LTDL_SHLIBEXT
+# ----------------
+AC_DEFUN([AC_LTDL_SHLIBEXT],
+[AC_REQUIRE([AC_LIBTOOL_SYS_DYNAMIC_LINKER])
+AC_CACHE_CHECK([which extension is used for loadable modules],
+  [libltdl_cv_shlibext],
+[
+module=yes
+eval libltdl_cv_shlibext=$shrext_cmds
+  ])
+if test -n "$libltdl_cv_shlibext"; then
+  AC_DEFINE_UNQUOTED([LTDL_SHLIB_EXT], ["$libltdl_cv_shlibext"],
+    [Define to the extension used for shared libraries, say, ".so".])
+fi
+])# AC_LTDL_SHLIBEXT
+
+
+# AC_LTDL_SHLIBPATH
+# -----------------
+AC_DEFUN([AC_LTDL_SHLIBPATH],
+[AC_REQUIRE([AC_LIBTOOL_SYS_DYNAMIC_LINKER])
+AC_CACHE_CHECK([which variable specifies run-time library path],
+  [libltdl_cv_shlibpath_var], [libltdl_cv_shlibpath_var="$shlibpath_var"])
+if test -n "$libltdl_cv_shlibpath_var"; then
+  AC_DEFINE_UNQUOTED([LTDL_SHLIBPATH_VAR], ["$libltdl_cv_shlibpath_var"],
+    [Define to the name of the environment variable that determines the dynamic library search path.])
+fi
+])# AC_LTDL_SHLIBPATH
+
+
+# AC_LTDL_SYSSEARCHPATH
+# ---------------------
+AC_DEFUN([AC_LTDL_SYSSEARCHPATH],
+[AC_REQUIRE([AC_LIBTOOL_SYS_DYNAMIC_LINKER])
+AC_CACHE_CHECK([for the default library search path],
+  [libltdl_cv_sys_search_path],
+  [libltdl_cv_sys_search_path="$sys_lib_dlsearch_path_spec"])
+if test -n "$libltdl_cv_sys_search_path"; then
+  sys_search_path=
+  for dir in $libltdl_cv_sys_search_path; do
+    if test -z "$sys_search_path"; then
+      sys_search_path="$dir"
+    else
+      sys_search_path="$sys_search_path$PATH_SEPARATOR$dir"
+    fi
+  done
+  AC_DEFINE_UNQUOTED([LTDL_SYSSEARCHPATH], ["$sys_search_path"],
+    [Define to the system default library search path.])
+fi
+])# AC_LTDL_SYSSEARCHPATH
+
+
+# AC_LTDL_OBJDIR
+# --------------
+AC_DEFUN([AC_LTDL_OBJDIR],
+[AC_CACHE_CHECK([for objdir],
+  [libltdl_cv_objdir],
+  [libltdl_cv_objdir="$objdir"
+  if test -n "$objdir"; then
+    :
+  else
+    rm -f .libs 2>/dev/null
+    mkdir .libs 2>/dev/null
+    if test -d .libs; then
+      libltdl_cv_objdir=.libs
+    else
+      # MS-DOS does not allow filenames that begin with a dot.
+      libltdl_cv_objdir=_libs
+    fi
+  rmdir .libs 2>/dev/null
+  fi
+  ])
+AC_DEFINE_UNQUOTED([LTDL_OBJDIR], ["$libltdl_cv_objdir/"],
+  [Define to the sub-directory in which libtool stores uninstalled libraries.])
+])# AC_LTDL_OBJDIR
+
+
+# AC_LTDL_DLPREOPEN
+# -----------------
+AC_DEFUN([AC_LTDL_DLPREOPEN],
+[AC_REQUIRE([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])
+AC_CACHE_CHECK([whether libtool supports -dlopen/-dlpreopen],
+  [libltdl_cv_preloaded_symbols],
+  [if test -n "$lt_cv_sys_global_symbol_pipe"; then
+    libltdl_cv_preloaded_symbols=yes
+  else
+    libltdl_cv_preloaded_symbols=no
+  fi
+  ])
+if test x"$libltdl_cv_preloaded_symbols" = xyes; then
+  AC_DEFINE([HAVE_PRELOADED_SYMBOLS], [1],
+    [Define if libtool can extract symbol lists from object files.])
+fi
+])# AC_LTDL_DLPREOPEN
+
+
+# AC_LTDL_DLLIB
+# -------------
+AC_DEFUN([AC_LTDL_DLLIB],
+[LIBADD_DL=
+AC_SUBST(LIBADD_DL)
+AC_LANG_PUSH([C])
+
+AC_CHECK_FUNC([shl_load],
+      [AC_DEFINE([HAVE_SHL_LOAD], [1],
+		 [Define if you have the shl_load function.])],
+  [AC_CHECK_LIB([dld], [shl_load],
+	[AC_DEFINE([HAVE_SHL_LOAD], [1],
+		   [Define if you have the shl_load function.])
+	LIBADD_DL="$LIBADD_DL -ldld"],
+    [AC_CHECK_LIB([dl], [dlopen],
+	  [AC_DEFINE([HAVE_LIBDL], [1],
+		     [Define if you have the libdl library or equivalent.])
+	        LIBADD_DL="-ldl" libltdl_cv_lib_dl_dlopen="yes"],
+      [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#if HAVE_DLFCN_H
+#  include <dlfcn.h>
+#endif
+      ]], [[dlopen(0, 0);]])],[AC_DEFINE([HAVE_LIBDL], [1],
+		             [Define if you have the libdl library or equivalent.]) libltdl_cv_func_dlopen="yes"],[AC_CHECK_LIB([svld], [dlopen],
+	      [AC_DEFINE([HAVE_LIBDL], [1],
+			 [Define if you have the libdl library or equivalent.])
+	            LIBADD_DL="-lsvld" libltdl_cv_func_dlopen="yes"],
+	  [AC_CHECK_LIB([dld], [dld_link],
+	        [AC_DEFINE([HAVE_DLD], [1],
+			   [Define if you have the GNU dld library.])
+	 	LIBADD_DL="$LIBADD_DL -ldld"],
+	 	[AC_CHECK_FUNC([_dyld_func_lookup],
+	 	       [AC_DEFINE([HAVE_DYLD], [1],
+	 	          [Define if you have the _dyld_func_lookup function.])])
+          ])
+        ])
+      ])
+    ])
+  ])
+])
+
+if test x"$libltdl_cv_func_dlopen" = xyes || test x"$libltdl_cv_lib_dl_dlopen" = xyes
+then
+  lt_save_LIBS="$LIBS"
+  LIBS="$LIBS $LIBADD_DL"
+  AC_CHECK_FUNCS([dlerror])
+  LIBS="$lt_save_LIBS"
+fi
+AC_LANG_POP
+])# AC_LTDL_DLLIB
+
+
+# AC_LTDL_SYMBOL_USCORE
+# ---------------------
+# does the compiler prefix global symbols with an underscore?
+AC_DEFUN([AC_LTDL_SYMBOL_USCORE],
+[AC_REQUIRE([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])
+AC_CACHE_CHECK([for _ prefix in compiled symbols],
+  [ac_cv_sys_symbol_underscore],
+  [ac_cv_sys_symbol_underscore=no
+  cat > conftest.$ac_ext <<EOF
+void nm_test_func(){}
+int main(){nm_test_func;return 0;}
+EOF
+  if AC_TRY_EVAL(ac_compile); then
+    # Now try to grab the symbols.
+    ac_nlist=conftest.nm
+    if AC_TRY_EVAL(NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $ac_nlist) && test -s "$ac_nlist"; then
+      # See whether the symbols have a leading underscore.
+      if grep '^. _nm_test_func' "$ac_nlist" >/dev/null; then
+        ac_cv_sys_symbol_underscore=yes
+      else
+        if grep '^. nm_test_func ' "$ac_nlist" >/dev/null; then
+	  :
+        else
+	  echo "configure: cannot find nm_test_func in $ac_nlist" >&AS_MESSAGE_LOG_FD
+        fi
+      fi
+    else
+      echo "configure: cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD
+    fi
+  else
+    echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
+    cat conftest.c >&AS_MESSAGE_LOG_FD
+  fi
+  rm -rf conftest*
+  ])
+])# AC_LTDL_SYMBOL_USCORE
+
+
+# AC_LTDL_DLSYM_USCORE
+# --------------------
+AC_DEFUN([AC_LTDL_DLSYM_USCORE],
+[AC_REQUIRE([AC_LTDL_SYMBOL_USCORE])
+if test x"$ac_cv_sys_symbol_underscore" = xyes; then
+  if test x"$libltdl_cv_func_dlopen" = xyes ||
+     test x"$libltdl_cv_lib_dl_dlopen" = xyes ; then
+	AC_CACHE_CHECK([whether we have to add an underscore for dlsym],
+	  [libltdl_cv_need_uscore],
+	  [libltdl_cv_need_uscore=unknown
+          save_LIBS="$LIBS"
+          LIBS="$LIBS $LIBADD_DL"
+	  _LT_AC_TRY_DLOPEN_SELF(
+	    [libltdl_cv_need_uscore=no], [libltdl_cv_need_uscore=yes],
+	    [],				 [libltdl_cv_need_uscore=cross])
+	  LIBS="$save_LIBS"
+	])
+  fi
+fi
+
+if test x"$libltdl_cv_need_uscore" = xyes; then
+  AC_DEFINE([NEED_USCORE], [1],
+    [Define if dlsym() requires a leading underscore in symbol names.])
+fi
+])# AC_LTDL_DLSYM_USCORE
+
+# AC_LTDL_FUNC_ARGZ
+# -----------------
+AC_DEFUN([AC_LTDL_FUNC_ARGZ],
+[AC_CHECK_HEADERS([argz.h])
+
+AC_CHECK_TYPES([error_t],
+  [],
+  [AC_DEFINE([error_t], [int],
+    [Define to a type to use for `error_t' if it is not otherwise available.])],
+  [#if HAVE_ARGZ_H
+#  include <argz.h>
+#endif])
+
+AC_CHECK_FUNCS([argz_append argz_create_sep argz_insert argz_next argz_stringify])
+])# AC_LTDL_FUNC_ARGZ
diff --git a/projects/sample/autoconf/m4/need_dev_zero_for_mmap.m4 b/projects/sample/autoconf/m4/need_dev_zero_for_mmap.m4
new file mode 100644
index 000000000000..57b322830172
--- /dev/null
+++ b/projects/sample/autoconf/m4/need_dev_zero_for_mmap.m4
@@ -0,0 +1,17 @@
+#
+# When allocating RWX memory, check whether we need to use /dev/zero
+# as the file descriptor or not.
+#
+AC_DEFUN([AC_NEED_DEV_ZERO_FOR_MMAP],
+[AC_CACHE_CHECK([if /dev/zero is needed for mmap],
+ac_cv_need_dev_zero_for_mmap,
+[if test "$llvm_cv_os_type" = "Interix" ; then
+   ac_cv_need_dev_zero_for_mmap=yes
+ else
+   ac_cv_need_dev_zero_for_mmap=no
+ fi
+])
+if test "$ac_cv_need_dev_zero_for_mmap" = yes; then
+  AC_DEFINE([NEED_DEV_ZERO_FOR_MMAP],[1],
+   [Define if /dev/zero should be used when mapping RWX memory, or undefine if its not necessary])
+fi])
diff --git a/projects/sample/autoconf/m4/path_tclsh.m4 b/projects/sample/autoconf/m4/path_tclsh.m4
new file mode 100644
index 000000000000..85433de71cc5
--- /dev/null
+++ b/projects/sample/autoconf/m4/path_tclsh.m4
@@ -0,0 +1,39 @@
+dnl This macro checks for tclsh which is required to run dejagnu. On some 
+dnl platforms (notably FreeBSD), tclsh is named tclshX.Y - this handles
+dnl that for us so we can get the latest installed tclsh version.
+dnl 
+AC_DEFUN([DJ_AC_PATH_TCLSH], [
+no_itcl=true
+AC_MSG_CHECKING(for the tclsh program in tclinclude directory)
+AC_ARG_WITH(tclinclude,
+  AS_HELP_STRING([--with-tclinclude],
+                [directory where tcl headers are]), 
+  [with_tclinclude=${withval}],[with_tclinclude=''])
+AC_CACHE_VAL(ac_cv_path_tclsh,[
+dnl first check to see if --with-itclinclude was specified
+if test x"${with_tclinclude}" != x ; then
+  if test -f ${with_tclinclude}/tclsh ; then
+    ac_cv_path_tclsh=`(cd ${with_tclinclude}; pwd)`
+  elif test -f ${with_tclinclude}/src/tclsh ; then
+    ac_cv_path_tclsh=`(cd ${with_tclinclude}/src; pwd)`
+  else
+    AC_MSG_ERROR([${with_tclinclude} directory doesn't contain tclsh])
+  fi
+fi])
+
+dnl see if one is installed
+if test x"${ac_cv_path_tclsh}" = x ; then
+  AC_MSG_RESULT(none)
+  AC_PATH_PROGS([TCLSH],[tclsh8.4 tclsh8.4.8 tclsh8.4.7 tclsh8.4.6 tclsh8.4.5 tclsh8.4.4 tclsh8.4.3 tclsh8.4.2 tclsh8.4.1 tclsh8.4.0 tclsh8.3 tclsh8.3.5 tclsh8.3.4 tclsh8.3.3 tclsh8.3.2 tclsh8.3.1 tclsh8.3.0 tclsh])
+  if test x"${TCLSH}" = x ; then
+    ac_cv_path_tclsh='';
+  else
+    ac_cv_path_tclsh="${TCLSH}";
+  fi
+else
+  AC_MSG_RESULT(${ac_cv_path_tclsh})
+  TCLSH="${ac_cv_path_tclsh}"
+  AC_SUBST(TCLSH)
+fi
+])
+
diff --git a/projects/sample/autoconf/m4/rand48.m4 b/projects/sample/autoconf/m4/rand48.m4
new file mode 100644
index 000000000000..56705d85c9c5
--- /dev/null
+++ b/projects/sample/autoconf/m4/rand48.m4
@@ -0,0 +1,12 @@
+#
+# This function determins if the the srand48,drand48,lrand48 functions are
+# available on this platform.
+#
+AC_DEFUN([AC_FUNC_RAND48],[
+AC_SINGLE_CXX_CHECK([ac_cv_func_rand48],   
+                    [srand48/lrand48/drand48], [<stdlib.h>],
+                    [srand48(0);lrand48();drand48();])
+if test "$ac_cv_func_rand48" = "yes" ; then
+AC_DEFINE([HAVE_RAND48],1,[Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h>])
+fi
+])
diff --git a/projects/sample/autoconf/m4/sanity_check.m4 b/projects/sample/autoconf/m4/sanity_check.m4
new file mode 100644
index 000000000000..639fccca2464
--- /dev/null
+++ b/projects/sample/autoconf/m4/sanity_check.m4
@@ -0,0 +1,31 @@
+dnl Check a program for version sanity. The test runs a program, passes it an
+dnl argument to make it print out some identification string, and filters that 
+dnl output with a regular expression. If the output is non-empty, the program
+dnl passes the sanity check.
+dnl   $1 - Name or full path of the program to run
+dnl   $2 - Argument to pass to print out identification string
+dnl   $3 - grep RE to match identification string
+dnl   $4 - set to 1 to make errors only a warning
+AC_DEFUN([CHECK_PROGRAM_SANITY],
+[
+AC_MSG_CHECKING([sanity for program ]$1)
+sanity="0"
+sanity_path=`which $1 2>/dev/null`
+if test "$?" -eq 0 -a -x "$sanity_path" ; then
+  sanity=`$1 $2 2>&1 | grep "$3"`
+  if test -z "$sanity" ; then
+    AC_MSG_RESULT([no])
+    sanity="0"
+    if test "$4" -eq 1 ; then
+      AC_MSG_WARN([Program ]$1[ failed to pass sanity check.])
+    else
+      AC_MSG_ERROR([Program ]$1[ failed to pass sanity check.])
+    fi
+  else
+    AC_MSG_RESULT([yes])
+    sanity="1"
+  fi
+else
+  AC_MSG_RESULT([not found])
+fi
+])
diff --git a/projects/sample/autoconf/m4/single_cxx_check.m4 b/projects/sample/autoconf/m4/single_cxx_check.m4
new file mode 100644
index 000000000000..21efa4bed353
--- /dev/null
+++ b/projects/sample/autoconf/m4/single_cxx_check.m4
@@ -0,0 +1,10 @@
+dnl AC_SINGLE_CXX_CHECK(CACHEVAR, FUNCTION, HEADER, PROGRAM)
+dnl                     $1,       $2,       $3,     $4,     
+dnl 
+AC_DEFUN([AC_SINGLE_CXX_CHECK],
+ [AC_CACHE_CHECK([for $2 in $3], [$1],
+  [AC_LANG_PUSH([C++])
+   AC_COMPILE_IFELSE(AC_LANG_PROGRAM([#include $3],[$4]),[$1=yes],[$1=no])
+  AC_LANG_POP([C++])])
+ ])
+
diff --git a/projects/sample/autoconf/m4/visibility_inlines_hidden.m4 b/projects/sample/autoconf/m4/visibility_inlines_hidden.m4
new file mode 100644
index 000000000000..42ddbe9128b3
--- /dev/null
+++ b/projects/sample/autoconf/m4/visibility_inlines_hidden.m4
@@ -0,0 +1,22 @@
+#
+# Determine if the compiler accepts -fvisibility-inlines-hidden
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_CXX_USE_VISIBILITY_INLINES_HIDDEN],
+[AC_CACHE_CHECK([for compiler -fvisibility-inlines-hidden option],
+                [llvm_cv_cxx_visibility_inlines_hidden],
+[ AC_LANG_PUSH([C++])
+  oldcxxflags="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -fvisibility-inlines-hidden"
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
+    [llvm_cv_cxx_visibility_inlines_hidden=yes],[llvm_cv_cxx_visibility_inlines_hidden=no])
+  CXXFLAGS="$oldcxxflags"
+  AC_LANG_POP([C++])
+])
+if test "$llvm_cv_cxx_visibility_inlines_hidden" = yes ; then
+  AC_SUBST([ENABLE_VISIBILITY_INLINES_HIDDEN],[1])
+else
+  AC_SUBST([ENABLE_VISIBILITY_INLINES_HIDDEN],[0])
+fi
+])
diff --git a/projects/sample/autoconf/mkinstalldirs b/projects/sample/autoconf/mkinstalldirs
new file mode 100755
index 000000000000..1ee2d580177f
--- /dev/null
+++ b/projects/sample/autoconf/mkinstalldirs
@@ -0,0 +1,150 @@
+#! /bin/sh
+# mkinstalldirs --- make directory hierarchy
+
+scriptversion=2004-02-15.20
+
+# Original author: Noah Friedman <friedman@prep.ai.mit.edu>
+# Created: 1993-05-16
+# Public domain.
+#
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+errstatus=0
+dirmode=""
+
+usage="\
+Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ...
+
+Create each directory DIR (with mode MODE, if specified), including all
+leading file name components.
+
+Report bugs to <bug-automake@gnu.org>."
+
+# process command line arguments
+while test $# -gt 0 ; do
+  case $1 in
+    -h | --help | --h*)         # -h for help
+      echo "$usage"
+      exit 0
+      ;;
+    -m)                         # -m PERM arg
+      shift
+      test $# -eq 0 && { echo "$usage" 1>&2; exit 1; }
+      dirmode=$1
+      shift
+      ;;
+    --version)
+      echo "$0 $scriptversion"
+      exit 0
+      ;;
+    --)                         # stop option processing
+      shift
+      break
+      ;;
+    -*)                         # unknown option
+      echo "$usage" 1>&2
+      exit 1
+      ;;
+    *)                          # first non-opt arg
+      break
+      ;;
+  esac
+done
+
+for file
+do
+  if test -d "$file"; then
+    shift
+  else
+    break
+  fi
+done
+
+case $# in
+  0) exit 0 ;;
+esac
+
+# Solaris 8's mkdir -p isn't thread-safe.  If you mkdir -p a/b and
+# mkdir -p a/c at the same time, both will detect that a is missing,
+# one will create a, then the other will try to create a and die with
+# a "File exists" error.  This is a problem when calling mkinstalldirs
+# from a parallel make.  We use --version in the probe to restrict
+# ourselves to GNU mkdir, which is thread-safe.
+case $dirmode in
+  '')
+    if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then
+      # echo "mkdir -p -- $*"
+      exec mkdir -p -- "$@"
+    else
+      # On NextStep and OpenStep, the `mkdir' command does not
+      # recognize any option.  It will interpret all options as
+      # directories to create, and then abort because `.' already
+      # exists.
+      test -d ./-p && rmdir ./-p
+      test -d ./--version && rmdir ./--version
+    fi
+    ;;
+  *)
+    if mkdir -m "$dirmode" -p --version . >/dev/null 2>&1 &&
+       test ! -d ./--version; then
+      # echo "mkdir -m $dirmode -p -- $*"
+      exec mkdir -m "$dirmode" -p -- "$@"
+    else
+      # Clean up after NextStep and OpenStep mkdir.
+      for d in ./-m ./-p ./--version "./$dirmode";
+      do
+        test -d $d && rmdir $d
+      done
+    fi
+    ;;
+esac
+
+for file
+do
+  set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
+  shift
+
+  pathcomp=
+  for d
+  do
+    pathcomp="$pathcomp$d"
+    case $pathcomp in
+      -*) pathcomp=./$pathcomp ;;
+    esac
+
+    if test ! -d "$pathcomp"; then
+      # echo "mkdir $pathcomp"
+
+      mkdir "$pathcomp" || lasterr=$?
+
+      if test ! -d "$pathcomp"; then
+	errstatus=$lasterr
+      else
+	if test ! -z "$dirmode"; then
+	  # echo "chmod $dirmode $pathcomp"
+	  lasterr=""
+	  chmod "$dirmode" "$pathcomp" || lasterr=$?
+
+	  if test ! -z "$lasterr"; then
+	    errstatus=$lasterr
+	  fi
+	fi
+      fi
+    fi
+
+    pathcomp="$pathcomp/"
+  done
+done
+
+exit $errstatus
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/projects/sample/configure b/projects/sample/configure
index 27b64bf8084a..7c5e2ee37dbf 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -564,6 +564,42 @@ PACKAGE_STRING='[SAMPLE] [x.xx]'
 PACKAGE_BUGREPORT='bugs@yourdomain'
 
 ac_unique_file=""Makefile.common.in""
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#if STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# if HAVE_STDLIB_H
+#  include <stdlib.h>
+# endif
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+#  include <memory.h>
+# endif
+# include <string.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#if HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#if HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
 ac_subst_vars='SHELL
 PATH_SEPARATOR
 PACKAGE_NAME
@@ -603,13 +639,166 @@ host_alias
 target_alias
 LLVM_SRC
 LLVM_OBJ
+LLVM_VERSION
+CC
+CFLAGS
+LDFLAGS
+CPPFLAGS
+ac_ct_CC
+EXEEXT
+OBJEXT
+CXX
+CXXFLAGS
+ac_ct_CXX
+CPP
+ENABLE_POLLY
+LLVM_HAS_POLLY
+subdirs
+build
+build_cpu
+build_vendor
+build_os
+host
+host_cpu
+host_vendor
+host_os
+target
+target_cpu
+target_vendor
+target_os
+OS
+HOST_OS
+TARGET_OS
+LINKALL
+NOLINKALL
+LLVM_ON_UNIX
+LLVM_ON_WIN32
+ARCH
+ENDIAN
+GREP
+EGREP
+LLVM_CROSS_COMPILING
+BUILD_CC
+BUILD_EXEEXT
+BUILD_CXX
+CVSBUILD
+ENABLE_LIBCPP
+ENABLE_OPTIMIZED
+ENABLE_PROFILING
+DISABLE_ASSERTIONS
+ENABLE_EXPENSIVE_CHECKS
+EXPENSIVE_CHECKS
+DEBUG_RUNTIME
+DEBUG_SYMBOLS
+JIT
+TARGET_HAS_JIT
+ENABLE_DOCS
+ENABLE_DOXYGEN
+ENABLE_THREADS
+ENABLE_PTHREADS
+ENABLE_PIC
+ENABLE_SHARED
+ENABLE_EMBED_STDCXX
+ENABLE_TIMESTAMPS
+TARGETS_TO_BUILD
+LLVM_ENUM_TARGETS
+LLVM_ENUM_ASM_PRINTERS
+LLVM_ENUM_ASM_PARSERS
+LLVM_ENUM_DISASSEMBLERS
+OPTIMIZE_OPTION
+EXTRA_OPTIONS
+EXTRA_LD_OPTIONS
+BINUTILS_INCDIR
+NM
+ifGNUmake
+LN_S
+CMP
+CP
+DATE
+FIND
+MKDIR
+MV
+RANLIB
+AR
+RM
+SED
+TAR
+BINPWD
+GRAPHVIZ
+DOT
+FDP
+NEATO
+TWOPI
+CIRCO
+GV
+DOTTY
+XDOT_PY
+INSTALL_PROGRAM
+INSTALL_SCRIPT
+INSTALL_DATA
+BZIP2
+CAT
+DOXYGEN
+GROFF
+GZIPBIN
+POD2HTML
+POD2MAN
+PDFROFF
+RUNTEST
+TCLSH
+ZIP
+OCAMLC
+OCAMLOPT
+OCAMLDEP
+OCAMLDOC
+GAS
+HAVE_LINK_VERSION_SCRIPT
+INSTALL_LTDL_TRUE
+INSTALL_LTDL_FALSE
+CONVENIENCE_LTDL_TRUE
+CONVENIENCE_LTDL_FALSE
+LIBADD_DL
+NO_VARIADIC_MACROS
+NO_MISSING_FIELD_INITIALIZERS
+COVERED_SWITCH_DEFAULT
+USE_UDIS86
+USE_OPROFILE
+HAVE_PTHREAD
+HUGE_VAL_SANITY
+MMAP_FILE
+SHLIBEXT
+SHLIBPATH_VAR
+LLVM_PREFIX
+LLVM_BINDIR
+LLVM_LIBDIR
+LLVM_DATADIR
+LLVM_DOCSDIR
+LLVM_ETCDIR
+LLVM_INCLUDEDIR
+LLVM_INFODIR
+LLVM_MANDIR
+LLVM_CONFIGTIME
+BINDINGS_TO_BUILD
+ALL_BINDINGS
+OCAML_LIBDIR
+ENABLE_VISIBILITY_INLINES_HIDDEN
+RPATH
+RDYNAMIC
 LIBOBJS
 LTLIBOBJS'
 ac_subst_files=''
       ac_precious_vars='build_alias
 host_alias
-target_alias'
-
+target_alias
+CC
+CFLAGS
+LDFLAGS
+CPPFLAGS
+CXX
+CXXFLAGS
+CCC
+CPP'
+ac_subdirs_all='tools/polly'
 
 # Initialize some variables set by options.
 ac_init_help=
@@ -1167,6 +1356,11 @@ Fine tuning of the installation directories:
 _ACEOF
 
   cat <<\_ACEOF
+
+System types:
+  --build=BUILD     configure for building on BUILD [guessed]
+  --host=HOST       cross-compile to build programs to run on HOST [BUILD]
+  --target=TARGET   configure for building compilers for TARGET [HOST]
 _ACEOF
 fi
 
@@ -1176,11 +1370,83 @@ if test -n "$ac_init_help"; then
    esac
   cat <<\_ACEOF
 
+Optional Features:
+  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
+  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --enable-polly          Use polly if available (default is YES)
+  --enable-libcpp         Use libc++ if available (default is NO)
+  --enable-optimized      Compile with optimizations enabled (default is NO)
+  --enable-profiling      Compile with profiling enabled (default is NO)
+  --enable-assertions     Compile with assertion checks enabled (default is
+                          YES)
+  --enable-expensive-checks
+                          Compile with expensive debug checks enabled (default
+                          is NO)
+  --enable-debug-runtime  Build runtime libs with debug symbols (default is
+                          NO)
+  --enable-debug-symbols  Build compiler with debug symbols (default is NO if
+                          optimization is on and YES if it's off)
+  --enable-jit            Enable Just In Time Compiling (default is YES)
+  --enable-docs           Build documents (default is YES)
+  --enable-doxygen        Build doxygen documentation (default is NO)
+  --enable-threads        Use threads if available (default is YES)
+  --enable-pthreads       Use pthreads if available (default is YES)
+  --enable-pic            Build LLVM with Position Independent Code (default
+                          is YES)
+  --enable-shared         Build a shared library and link tools against it
+                          (default is NO)
+  --enable-embed-stdcxx   Build a shared library with embedded libstdc++ for
+                          Win32 DLL (default is YES)
+  --enable-timestamps     Enable embedding timestamp information in build
+                          (default is YES)
+  --enable-targets        Build specific host targets: all or
+                          target1,target2,... Valid targets are: host, x86,
+                          x86_64, sparc, powerpc, arm, mips, spu, hexagon,
+                          xcore, msp430, ptx, cbe, and cpp (default=all)
+  --enable-bindings       Build specific language bindings:
+                          all,auto,none,{binding-name} (default=auto)
+  --enable-libffi         Check for the presence of libffi (default is NO)
+  --enable-ltdl-install   install libltdl
+
 Optional Packages:
   --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
   --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
   --with-llvmsrc          Location of LLVM Source Code
   --with-llvmobj          Location of LLVM Object Code
+  --with-optimize-option  Select the compiler options to use for optimized
+                          builds
+  --with-extra-options    Specify additional options to compile LLVM with
+  --with-extra-ld-options Specify additional options to link LLVM with
+  --with-ocaml-libdir     Specify install location for ocaml bindings (default
+                          is stdlib)
+  --with-clang-resource-dir
+                          Relative directory from the Clang binary for
+                          resource files
+  --with-c-include-dirs   Colon separated list of directories clang will
+                          search for headers
+  --with-gcc-toolchain    Directory where gcc is installed.
+  --with-binutils-include Specify path to binutils/include/ containing
+                          plugin-api.h file for gold plugin.
+  --with-bug-report-url   Specify the URL where bug reports should be
+                          submitted (default=http://llvm.org/bugs/)
+  --with-tclinclude       directory where tcl headers are
+  --with-udis86=<path>    Use udis86 external x86 disassembler library
+  --with-oprofile=<prefix>
+                          Tell OProfile >= 0.9.4 how to symbolize JIT output
+
+Some influential environment variables:
+  CC          C compiler command
+  CFLAGS      C compiler flags
+  LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
+              nonstandard directory <lib dir>
+  CPPFLAGS    C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
+              you have headers in a nonstandard directory <include dir>
+  CXX         C++ compiler command
+  CXXFLAGS    C++ compiler flags
+  CPP         C preprocessor
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
 
 Report bugs to <bugs@yourdomain>.
 _ACEOF
@@ -1641,8 +1907,31 @@ fi
 
 
 
+{ echo "$as_me:$LINENO: checking llvm-config" >&5
+echo $ECHO_N "checking llvm-config... $ECHO_C" >&6; }
+llvm_config_path="`ls -1 $llvm_obj/*/bin/llvm-config 2> /dev/null | head -1`"
+if ! test -f "$llvm_config_path" ; then
+   llvm_config_path="no"
+fi
+{ echo "$as_me:$LINENO: result: $llvm_config_path" >&5
+echo "${ECHO_T}$llvm_config_path" >&6; }
+
+{ echo "$as_me:$LINENO: checking LLVM package version" >&5
+echo $ECHO_N "checking LLVM package version... $ECHO_C" >&6; }
+if test "$llvm_config_path" != no ; then
+  llvm_package_version=`$llvm_config_path --version`
+else
+  llvm_package_version="unknown";
+fi
+{ echo "$as_me:$LINENO: result: $llvm_package_version" >&5
+echo "${ECHO_T}$llvm_package_version" >&6; }
+LLVM_VERSION=$llvm_package_version
+
+
+
+
 ac_aux_dir=
-for ac_dir in $LLVM_SRC/autoconf "$srcdir"/$LLVM_SRC/autoconf; do
+for ac_dir in autoconf "$srcdir"/autoconf; do
   if test -f "$ac_dir/install-sh"; then
     ac_aux_dir=$ac_dir
     ac_install_sh="$ac_aux_dir/install-sh -c"
@@ -1658,8 +1947,8 @@ for ac_dir in $LLVM_SRC/autoconf "$srcdir"/$LLVM_SRC/autoconf; do
   fi
 done
 if test -z "$ac_aux_dir"; then
-  { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in $LLVM_SRC/autoconf \"$srcdir\"/$LLVM_SRC/autoconf" >&5
-echo "$as_me: error: cannot find install-sh or install.sh in $LLVM_SRC/autoconf \"$srcdir\"/$LLVM_SRC/autoconf" >&2;}
+  { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in autoconf \"$srcdir\"/autoconf" >&5
+echo "$as_me: error: cannot find install-sh or install.sh in autoconf \"$srcdir\"/autoconf" >&2;}
    { (exit 1); exit 1; }; }
 fi
 
@@ -1674,32 +1963,18950 @@ ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
 
 
 
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  for ac_prog in clang llvm-gcc gcc
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
 
-ac_config_files="$ac_config_files Makefile.common"
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
 
 
-ac_config_commands="$ac_config_commands Makefile"
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in clang llvm-gcc gcc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
 
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
 
-ac_config_commands="$ac_config_commands lib/Makefile"
 
+  test -n "$ac_ct_CC" && break
+done
 
-ac_config_commands="$ac_config_commands lib/sample/Makefile"
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
 
 
-ac_config_commands="$ac_config_commands tools/Makefile"
+test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
 
+# Provide some information about the compiler.
+echo "$as_me:$LINENO: checking for C compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (ac_try="$ac_compiler --version >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler --version >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler -v >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler -V >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
 
-ac_config_commands="$ac_config_commands tools/sample/Makefile"
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ echo "$as_me:$LINENO: checking for C compiler default output file name" >&5
+echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6; }
+ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+#
+# List of possible output files, starting from the most likely.
+# The algorithm is not robust to junk in `.', hence go to wildcards (a.*)
+# only as a last resort.  b.out is created by i960 compilers.
+ac_files='a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out'
+#
+# The IRIX 6 linker writes into existing files which may not be
+# executable, retaining their permissions.  Remove them first so a
+# subsequent execution test works.
+ac_rmfiles=
+for ac_file in $ac_files
+do
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;;
+    * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+  esac
+done
+rm -f $ac_rmfiles
+
+if { (ac_try="$ac_link_default"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link_default") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+  # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile.  We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files
+do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj )
+	;;
+    [ab].out )
+	# We found the default executable, but exeext='' is most
+	# certainly right.
+	break;;
+    *.* )
+        if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+	then :; else
+	   ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	fi
+	# We set ac_cv_exeext here because the later test for it is not
+	# safe: cross compilers may not add the suffix if given an `-o'
+	# argument, so we may need to know it at that point already.
+	# Even if this section looks crufty: it has the advantage of
+	# actually working.
+	break;;
+    * )
+	break;;
+  esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: C compiler cannot create executables
+See \`config.log' for more details." >&5
+echo "$as_me: error: C compiler cannot create executables
+See \`config.log' for more details." >&2;}
+   { (exit 77); exit 77; }; }
+fi
+
+ac_exeext=$ac_cv_exeext
+{ echo "$as_me:$LINENO: result: $ac_file" >&5
+echo "${ECHO_T}$ac_file" >&6; }
+
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ echo "$as_me:$LINENO: checking whether the C compiler works" >&5
+echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6; }
+# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
+# If not cross compiling, check that we can run a simple program.
+if test "$cross_compiling" != yes; then
+  if { ac_try='./$ac_file'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+	cross_compiling=yes
+    else
+	{ { echo "$as_me:$LINENO: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+    fi
+  fi
+fi
+{ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+rm -f a.out a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
+echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6; }
+{ echo "$as_me:$LINENO: result: $cross_compiling" >&5
+echo "${ECHO_T}$cross_compiling" >&6; }
+
+{ echo "$as_me:$LINENO: checking for suffix of executables" >&5
+echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6; }
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+  # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'.  For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;;
+    *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	  break;;
+    * ) break;;
+  esac
+done
+else
+  { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest$ac_cv_exeext
+{ echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
+echo "${ECHO_T}$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+{ echo "$as_me:$LINENO: checking for suffix of object files" >&5
+echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6; }
+if test "${ac_cv_objext+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+  for ac_file in conftest.o conftest.obj conftest.*; do
+  test -f "$ac_file" || continue;
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf ) ;;
+    *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+       break;;
+  esac
+done
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
+echo "${ECHO_T}$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6; }
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_compiler_gnu=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_compiler_gnu=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6; }
+GCC=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
+echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6; }
+if test "${ac_cv_prog_cc_g+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cc_g=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	CFLAGS=""
+      cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_c_werror_flag=$ac_save_c_werror_flag
+	 CFLAGS="-g"
+	 cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cc_g=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+{ echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5
+echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; }
+if test "${ac_cv_prog_cc_c89+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+	-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cc_c89=$ac_arg
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { echo "$as_me:$LINENO: result: none needed" >&5
+echo "${ECHO_T}none needed" >&6; } ;;
+  xno)
+    { echo "$as_me:$LINENO: result: unsupported" >&5
+echo "${ECHO_T}unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+if test -z "$CXX"; then
+  if test -n "$CCC"; then
+    CXX=$CCC
+  else
+    if test -n "$ac_tool_prefix"; then
+  for ac_prog in clang++ llvm-g++ g++
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$CXX"; then
+  ac_cv_prog_CXX="$CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CXX=$ac_cv_prog_CXX
+if test -n "$CXX"; then
+  { echo "$as_me:$LINENO: result: $CXX" >&5
+echo "${ECHO_T}$CXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+    test -n "$CXX" && break
+  done
+fi
+if test -z "$CXX"; then
+  ac_ct_CXX=$CXX
+  for ac_prog in clang++ llvm-g++ g++
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$ac_ct_CXX"; then
+  ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CXX="$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CXX=$ac_cv_prog_ac_ct_CXX
+if test -n "$ac_ct_CXX"; then
+  { echo "$as_me:$LINENO: result: $ac_ct_CXX" >&5
+echo "${ECHO_T}$ac_ct_CXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CXX" && break
+done
+
+  if test "x$ac_ct_CXX" = x; then
+    CXX="g++"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+    CXX=$ac_ct_CXX
+  fi
+fi
+
+  fi
+fi
+# Provide some information about the compiler.
+echo "$as_me:$LINENO: checking for C++ compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (ac_try="$ac_compiler --version >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler --version >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler -v >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler -V >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+
+{ echo "$as_me:$LINENO: checking whether we are using the GNU C++ compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C++ compiler... $ECHO_C" >&6; }
+if test "${ac_cv_cxx_compiler_gnu+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_compiler_gnu=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_compiler_gnu=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_cxx_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_cxx_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_cxx_compiler_gnu" >&6; }
+GXX=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CXXFLAGS=${CXXFLAGS+set}
+ac_save_CXXFLAGS=$CXXFLAGS
+{ echo "$as_me:$LINENO: checking whether $CXX accepts -g" >&5
+echo $ECHO_N "checking whether $CXX accepts -g... $ECHO_C" >&6; }
+if test "${ac_cv_prog_cxx_g+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_save_cxx_werror_flag=$ac_cxx_werror_flag
+   ac_cxx_werror_flag=yes
+   ac_cv_prog_cxx_g=no
+   CXXFLAGS="-g"
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cxx_g=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	CXXFLAGS=""
+      cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+	 CXXFLAGS="-g"
+	 cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cxx_g=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_prog_cxx_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cxx_g" >&6; }
+if test "$ac_test_CXXFLAGS" = set; then
+  CXXFLAGS=$ac_save_CXXFLAGS
+elif test $ac_cv_prog_cxx_g = yes; then
+  if test "$GXX" = yes; then
+    CXXFLAGS="-g -O2"
+  else
+    CXXFLAGS="-g"
+  fi
+else
+  if test "$GXX" = yes; then
+    CXXFLAGS="-O2"
+  else
+    CXXFLAGS=
+  fi
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5
+echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+  if test "${ac_cv_prog_CPP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+      # Double quotes because CPP needs to be expanded
+    for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Broken: fails on valid input.
+continue
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  # Broken: success on invalid input.
+continue
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+  break
+fi
+
+    done
+    ac_cv_prog_CPP=$CPP
+
+fi
+  CPP=$ac_cv_prog_CPP
+else
+  ac_cv_prog_CPP=$CPP
+fi
+{ echo "$as_me:$LINENO: result: $CPP" >&5
+echo "${ECHO_T}$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Broken: fails on valid input.
+continue
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  # Broken: success on invalid input.
+continue
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+  :
+else
+  { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&5
+echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+
+# Check whether --enable-polly was given.
+if test "${enable_polly+set}" = set; then
+  enableval=$enable_polly;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_POLLY=1
+ ;;
+  no)  ENABLE_POLLY=0
+ ;;
+  default) ENABLE_POLLY=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-polly. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-polly. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+
+if (test -d ${srcdir}/tools/polly) && (test $ENABLE_POLLY -eq 1) ; then
+  LLVM_HAS_POLLY=1
+
+  subdirs="$subdirs tools/polly"
+
+fi
+
+
+# Make sure we can run config.sub.
+$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
+  { { echo "$as_me:$LINENO: error: cannot run $SHELL $ac_aux_dir/config.sub" >&5
+echo "$as_me: error: cannot run $SHELL $ac_aux_dir/config.sub" >&2;}
+   { (exit 1); exit 1; }; }
+
+{ echo "$as_me:$LINENO: checking build system type" >&5
+echo $ECHO_N "checking build system type... $ECHO_C" >&6; }
+if test "${ac_cv_build+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_build_alias=$build_alias
+test "x$ac_build_alias" = x &&
+  ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
+test "x$ac_build_alias" = x &&
+  { { echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5
+echo "$as_me: error: cannot guess build type; you must specify one" >&2;}
+   { (exit 1); exit 1; }; }
+ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
+  { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&5
+echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&2;}
+   { (exit 1); exit 1; }; }
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_build" >&5
+echo "${ECHO_T}$ac_cv_build" >&6; }
+case $ac_cv_build in
+*-*-*) ;;
+*) { { echo "$as_me:$LINENO: error: invalid value of canonical build" >&5
+echo "$as_me: error: invalid value of canonical build" >&2;}
+   { (exit 1); exit 1; }; };;
+esac
+build=$ac_cv_build
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_build
+shift
+build_cpu=$1
+build_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+build_os=$*
+IFS=$ac_save_IFS
+case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac
+
+
+{ echo "$as_me:$LINENO: checking host system type" >&5
+echo $ECHO_N "checking host system type... $ECHO_C" >&6; }
+if test "${ac_cv_host+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test "x$host_alias" = x; then
+  ac_cv_host=$ac_cv_build
+else
+  ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
+    { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&5
+echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_host" >&5
+echo "${ECHO_T}$ac_cv_host" >&6; }
+case $ac_cv_host in
+*-*-*) ;;
+*) { { echo "$as_me:$LINENO: error: invalid value of canonical host" >&5
+echo "$as_me: error: invalid value of canonical host" >&2;}
+   { (exit 1); exit 1; }; };;
+esac
+host=$ac_cv_host
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_host
+shift
+host_cpu=$1
+host_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+host_os=$*
+IFS=$ac_save_IFS
+case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
+
+
+{ echo "$as_me:$LINENO: checking target system type" >&5
+echo $ECHO_N "checking target system type... $ECHO_C" >&6; }
+if test "${ac_cv_target+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test "x$target_alias" = x; then
+  ac_cv_target=$ac_cv_host
+else
+  ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` ||
+    { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $target_alias failed" >&5
+echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $target_alias failed" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_target" >&5
+echo "${ECHO_T}$ac_cv_target" >&6; }
+case $ac_cv_target in
+*-*-*) ;;
+*) { { echo "$as_me:$LINENO: error: invalid value of canonical target" >&5
+echo "$as_me: error: invalid value of canonical target" >&2;}
+   { (exit 1); exit 1; }; };;
+esac
+target=$ac_cv_target
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_target
+shift
+target_cpu=$1
+target_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+target_os=$*
+IFS=$ac_save_IFS
+case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac
+
+
+# The aliases save the names the user supplied, while $host etc.
+# will get canonicalized.
+test -n "$target_alias" &&
+  test "$program_prefix$program_suffix$program_transform_name" = \
+    NONENONEs,x,x, &&
+  program_prefix=${target_alias}-
+
+{ echo "$as_me:$LINENO: checking type of operating system we're going to host on" >&5
+echo $ECHO_N "checking type of operating system we're going to host on... $ECHO_C" >&6; }
+if test "${llvm_cv_os_type+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $host in
+  *-*-aix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="AIX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-irix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="IRIX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-cygwin*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Cygwin"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-darwin*)
+    llvm_cv_link_all_option="-Wl,-all_load"
+    llvm_cv_no_link_all_option="-Wl,-noall_load"
+    llvm_cv_os_type="Darwin"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-minix*)
+    llvm_cv_link_all_option="-Wl,-all_load"
+    llvm_cv_no_link_all_option="-Wl,-noall_load"
+    llvm_cv_os_type="Minix"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-freebsd* | *-*-kfreebsd-gnu)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="FreeBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-openbsd*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="OpenBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-netbsd*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="NetBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-dragonfly*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="DragonFly"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-hpux*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="HP-UX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-interix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Interix"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-linux*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Linux"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-gnu*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="GNU"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-solaris*)
+    llvm_cv_link_all_option="-Wl,-z,allextract"
+    llvm_cv_no_link_all_option="-Wl,-z,defaultextract"
+    llvm_cv_os_type="SunOS"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-auroraux*)
+    llvm_cv_link_all_option="-Wl,-z,allextract"
+    llvm_cv_link_all_option="-Wl,-z,defaultextract"
+    llvm_cv_os_type="AuroraUX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-win32*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Win32"
+    llvm_cv_platform_type="Win32" ;;
+  *-*-mingw*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="MingW"
+    llvm_cv_platform_type="Win32" ;;
+  *-*-haiku*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Haiku"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-eabi*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-elf*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
+  *)
+    llvm_cv_link_all_option=""
+    llvm_cv_no_link_all_option=""
+    llvm_cv_os_type="Unknown"
+    llvm_cv_platform_type="Unknown" ;;
+esac
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_os_type" >&5
+echo "${ECHO_T}$llvm_cv_os_type" >&6; }
+
+{ echo "$as_me:$LINENO: checking type of operating system we're going to target" >&5
+echo $ECHO_N "checking type of operating system we're going to target... $ECHO_C" >&6; }
+if test "${llvm_cv_target_os_type+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $target in
+  *-*-aix*)
+    llvm_cv_target_os_type="AIX" ;;
+  *-*-irix*)
+    llvm_cv_target_os_type="IRIX" ;;
+  *-*-cygwin*)
+    llvm_cv_target_os_type="Cygwin" ;;
+  *-*-darwin*)
+    llvm_cv_target_os_type="Darwin" ;;
+  *-*-minix*)
+    llvm_cv_target_os_type="Minix" ;;
+  *-*-freebsd* | *-*-kfreebsd-gnu)
+    llvm_cv_target_os_type="FreeBSD" ;;
+  *-*-openbsd*)
+    llvm_cv_target_os_type="OpenBSD" ;;
+  *-*-netbsd*)
+    llvm_cv_target_os_type="NetBSD" ;;
+  *-*-dragonfly*)
+    llvm_cv_target_os_type="DragonFly" ;;
+  *-*-hpux*)
+    llvm_cv_target_os_type="HP-UX" ;;
+  *-*-interix*)
+    llvm_cv_target_os_type="Interix" ;;
+  *-*-linux*)
+    llvm_cv_target_os_type="Linux" ;;
+  *-*-gnu*)
+    llvm_cv_target_os_type="GNU" ;;
+  *-*-solaris*)
+    llvm_cv_target_os_type="SunOS" ;;
+  *-*-auroraux*)
+    llvm_cv_target_os_type="AuroraUX" ;;
+  *-*-win32*)
+    llvm_cv_target_os_type="Win32" ;;
+  *-*-mingw*)
+    llvm_cv_target_os_type="MingW" ;;
+  *-*-haiku*)
+    llvm_cv_target_os_type="Haiku" ;;
+  *-*-rtems*)
+    llvm_cv_target_os_type="RTEMS" ;;
+  *-*-nacl*)
+    llvm_cv_target_os_type="NativeClient" ;;
+  *-unknown-eabi*)
+    llvm_cv_target_os_type="Freestanding" ;;
+  *)
+    llvm_cv_target_os_type="Unknown" ;;
+esac
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_target_os_type" >&5
+echo "${ECHO_T}$llvm_cv_target_os_type" >&6; }
+
+if test "$llvm_cv_os_type" = "Unknown" ; then
+  { { echo "$as_me:$LINENO: error: Operating system is unknown, configure can't continue" >&5
+echo "$as_me: error: Operating system is unknown, configure can't continue" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+OS=$llvm_cv_os_type
+
+HOST_OS=$llvm_cv_os_type
+
+TARGET_OS=$llvm_cv_target_os_type
+
+
+LINKALL=$llvm_cv_link_all_option
+
+NOLINKALL=$llvm_cv_no_link_all_option
+
+
+case $llvm_cv_platform_type in
+  Unix)
+
+cat >>confdefs.h <<\_ACEOF
+#define LLVM_ON_UNIX 1
+_ACEOF
+
+    LLVM_ON_UNIX=1
+
+    LLVM_ON_WIN32=0
+
+    ;;
+  Win32)
+
+cat >>confdefs.h <<\_ACEOF
+#define LLVM_ON_WIN32 1
+_ACEOF
+
+    LLVM_ON_UNIX=0
+
+    LLVM_ON_WIN32=1
+
+    ;;
+esac
+
+{ echo "$as_me:$LINENO: checking target architecture" >&5
+echo $ECHO_N "checking target architecture... $ECHO_C" >&6; }
+if test "${llvm_cv_target_arch+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $target in
+  i?86-*)                 llvm_cv_target_arch="x86" ;;
+  amd64-* | x86_64-*)     llvm_cv_target_arch="x86_64" ;;
+  sparc*-*)               llvm_cv_target_arch="Sparc" ;;
+  powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
+  arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  mips-*)                 llvm_cv_target_arch="Mips" ;;
+  xcore-*)                llvm_cv_target_arch="XCore" ;;
+  msp430-*)               llvm_cv_target_arch="MSP430" ;;
+  hexagon-*)              llvm_cv_target_arch="Hexagon" ;;
+  mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
+  ptx-*)                  llvm_cv_target_arch="PTX" ;;
+  *)                      llvm_cv_target_arch="Unknown" ;;
+esac
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_target_arch" >&5
+echo "${ECHO_T}$llvm_cv_target_arch" >&6; }
+
+if test "$llvm_cv_target_arch" = "Unknown" ; then
+  { echo "$as_me:$LINENO: WARNING: Configuring LLVM for an unknown target archicture" >&5
+echo "$as_me: WARNING: Configuring LLVM for an unknown target archicture" >&2;}
+fi
+
+# Determine the LLVM native architecture for the target
+case "$llvm_cv_target_arch" in
+    x86)     LLVM_NATIVE_ARCH="X86" ;;
+    x86_64)  LLVM_NATIVE_ARCH="X86" ;;
+    *)       LLVM_NATIVE_ARCH="$llvm_cv_target_arch" ;;
+esac
+
+ARCH=$llvm_cv_target_arch
+
+
+
+
+{ echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5
+echo $ECHO_N "checking for grep that handles long lines and -e... $ECHO_C" >&6; }
+if test "${ac_cv_path_GREP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  # Extract the first word of "grep ggrep" to use in msg output
+if test -z "$GREP"; then
+set dummy grep ggrep; ac_prog_name=$2
+if test "${ac_cv_path_GREP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_path_GREP_found=false
+# Loop through the user's path and test for each of PROGNAME-LIST
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_prog in grep ggrep; do
+  for ac_exec_ext in '' $ac_executable_extensions; do
+    ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+    { test -f "$ac_path_GREP" && $as_executable_p "$ac_path_GREP"; } || continue
+    # Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  echo $ECHO_N "0123456789$ECHO_C" >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    echo 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    ac_count=`expr $ac_count + 1`
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+
+    $ac_path_GREP_found && break 3
+  done
+done
+
+done
+IFS=$as_save_IFS
+
+
+fi
+
+GREP="$ac_cv_path_GREP"
+if test -z "$GREP"; then
+  { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5
+echo "${ECHO_T}$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ echo "$as_me:$LINENO: checking for egrep" >&5
+echo $ECHO_N "checking for egrep... $ECHO_C" >&6; }
+if test "${ac_cv_path_EGREP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     # Extract the first word of "egrep" to use in msg output
+if test -z "$EGREP"; then
+set dummy egrep; ac_prog_name=$2
+if test "${ac_cv_path_EGREP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_path_EGREP_found=false
+# Loop through the user's path and test for each of PROGNAME-LIST
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_prog in egrep; do
+  for ac_exec_ext in '' $ac_executable_extensions; do
+    ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+    { test -f "$ac_path_EGREP" && $as_executable_p "$ac_path_EGREP"; } || continue
+    # Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  echo $ECHO_N "0123456789$ECHO_C" >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    echo 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    ac_count=`expr $ac_count + 1`
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+
+    $ac_path_EGREP_found && break 3
+  done
+done
+
+done
+IFS=$as_save_IFS
+
+
+fi
+
+EGREP="$ac_cv_path_EGREP"
+if test -z "$EGREP"; then
+  { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+
+   fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5
+echo "${ECHO_T}$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ echo "$as_me:$LINENO: checking for ANSI C header files" >&5
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; }
+if test "${ac_cv_header_stdc+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_header_stdc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_header_stdc=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "memchr" >/dev/null 2>&1; then
+  :
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "free" >/dev/null 2>&1; then
+  :
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+  if test "$cross_compiling" = yes; then
+  :
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+		   (('a' <= (c) && (c) <= 'i') \
+		     || ('j' <= (c) && (c) <= 'r') \
+		     || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (XOR (islower (i), ISLOWER (i))
+	|| toupper (i) != TOUPPER (i))
+      return 2;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  :
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
+echo "${ECHO_T}$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+
+
+
+
+
+
+
+
+
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+		  inttypes.h stdint.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_Header=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+{ echo "$as_me:$LINENO: checking whether byte ordering is bigendian" >&5
+echo $ECHO_N "checking whether byte ordering is bigendian... $ECHO_C" >&6; }
+if test "${ac_cv_c_bigendian+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  # See if sys/param.h defines the BYTE_ORDER macro.
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/param.h>
+
+int
+main ()
+{
+#if !BYTE_ORDER || !BIG_ENDIAN || !LITTLE_ENDIAN
+ bogus endian macros
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  # It does; now see whether it defined to BIG_ENDIAN or not.
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/param.h>
+
+int
+main ()
+{
+#if BYTE_ORDER != BIG_ENDIAN
+ not big endian
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_c_bigendian=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_c_bigendian=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	# It does not; compile a test program.
+if test "$cross_compiling" = yes; then
+  # try to guess the endianness by grepping values into an object file
+  ac_cv_c_bigendian=unknown
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+short int ascii_mm[] = { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
+short int ascii_ii[] = { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
+void _ascii () { char *s = (char *) ascii_mm; s = (char *) ascii_ii; }
+short int ebcdic_ii[] = { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
+short int ebcdic_mm[] = { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
+void _ebcdic () { char *s = (char *) ebcdic_mm; s = (char *) ebcdic_ii; }
+int
+main ()
+{
+ _ascii (); _ebcdic ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  if grep BIGenDianSyS conftest.$ac_objext >/dev/null ; then
+  ac_cv_c_bigendian=yes
+fi
+if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
+  if test "$ac_cv_c_bigendian" = unknown; then
+    ac_cv_c_bigendian=no
+  else
+    # finding both strings is unlikely to happen, but who knows?
+    ac_cv_c_bigendian=unknown
+  fi
+fi
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+
+  /* Are we little or big endian?  From Harbison&Steele.  */
+  union
+  {
+    long int l;
+    char c[sizeof (long int)];
+  } u;
+  u.l = 1;
+  return u.c[sizeof (long int) - 1] == 1;
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_c_bigendian=no
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_c_bigendian=yes
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_c_bigendian" >&5
+echo "${ECHO_T}$ac_cv_c_bigendian" >&6; }
+case $ac_cv_c_bigendian in
+  yes)
+    ENDIAN=big
+ ;;
+  no)
+    ENDIAN=little
+ ;;
+  *)
+    { { echo "$as_me:$LINENO: error: unknown endianness
+presetting ac_cv_c_bigendian=no (or yes) will help" >&5
+echo "$as_me: error: unknown endianness
+presetting ac_cv_c_bigendian=no (or yes) will help" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+
+if test "$cross_compiling" = yes; then
+  LLVM_CROSS_COMPILING=1
+
+
+{ echo "$as_me:$LINENO: checking for executable suffix on build platform" >&5
+echo $ECHO_N "checking for executable suffix on build platform... $ECHO_C" >&6; }
+if test "${ac_cv_build_exeext+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test "$CYGWIN" = yes || test "$MINGW32" = yes; then
+  ac_cv_build_exeext=.exe
+else
+  ac_build_prefix=${build_alias}-
+
+  # Extract the first word of "${ac_build_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_build_prefix}gcc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CC"; then
+  ac_cv_prog_BUILD_CC="$BUILD_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_BUILD_CC="${ac_build_prefix}gcc"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+BUILD_CC=$ac_cv_prog_BUILD_CC
+if test -n "$BUILD_CC"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CC" >&5
+echo "${ECHO_T}$BUILD_CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  if test -z "$BUILD_CC"; then
+     # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CC"; then
+  ac_cv_prog_BUILD_CC="$BUILD_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_BUILD_CC="gcc"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+BUILD_CC=$ac_cv_prog_BUILD_CC
+if test -n "$BUILD_CC"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CC" >&5
+echo "${ECHO_T}$BUILD_CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+     if test -z "$BUILD_CC"; then
+       # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CC"; then
+  ac_cv_prog_BUILD_CC="$BUILD_CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_BUILD_CC="cc"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_BUILD_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set BUILD_CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_BUILD_CC="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+BUILD_CC=$ac_cv_prog_BUILD_CC
+if test -n "$BUILD_CC"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CC" >&5
+echo "${ECHO_T}$BUILD_CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+     fi
+  fi
+  test -z "$BUILD_CC" && { { echo "$as_me:$LINENO: error: no acceptable cc found in \$PATH" >&5
+echo "$as_me: error: no acceptable cc found in \$PATH" >&2;}
+   { (exit 1); exit 1; }; }
+  ac_build_link='${BUILD_CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+  rm -f conftest*
+  echo 'int main () { return 0; }' > conftest.$ac_ext
+  ac_cv_build_exeext=
+  if { (eval echo "$as_me:$LINENO: \"$ac_build_link\"") >&5
+  (eval $ac_build_link) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+    for file in conftest.*; do
+      case $file in
+      *.c | *.o | *.obj | *.dSYM) ;;
+      *) ac_cv_build_exeext=`echo $file | sed -e s/conftest//` ;;
+      esac
+    done
+  else
+    { { echo "$as_me:$LINENO: error: installation or configuration problem: compiler cannot create executables." >&5
+echo "$as_me: error: installation or configuration problem: compiler cannot create executables." >&2;}
+   { (exit 1); exit 1; }; }
+  fi
+  rm -f conftest*
+  test x"${ac_cv_build_exeext}" = x && ac_cv_build_exeext=blank
+fi
+fi
+
+BUILD_EXEEXT=""
+test x"${ac_cv_build_exeext}" != xblank && BUILD_EXEEXT=${ac_cv_build_exeext}
+{ echo "$as_me:$LINENO: result: ${ac_cv_build_exeext}" >&5
+echo "${ECHO_T}${ac_cv_build_exeext}" >&6; }
+ac_build_exeext=$BUILD_EXEEXT
+
+  ac_build_prefix=${build_alias}-
+  # Extract the first word of "${ac_build_prefix}g++", so it can be a program name with args.
+set dummy ${ac_build_prefix}g++; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CXX"; then
+  ac_cv_prog_BUILD_CXX="$BUILD_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_BUILD_CXX="${ac_build_prefix}g++"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+BUILD_CXX=$ac_cv_prog_BUILD_CXX
+if test -n "$BUILD_CXX"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CXX" >&5
+echo "${ECHO_T}$BUILD_CXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  if test -z "$BUILD_CXX"; then
+     # Extract the first word of "g++", so it can be a program name with args.
+set dummy g++; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CXX"; then
+  ac_cv_prog_BUILD_CXX="$BUILD_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_BUILD_CXX="g++"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+BUILD_CXX=$ac_cv_prog_BUILD_CXX
+if test -n "$BUILD_CXX"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CXX" >&5
+echo "${ECHO_T}$BUILD_CXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+     if test -z "$BUILD_CXX"; then
+       # Extract the first word of "c++", so it can be a program name with args.
+set dummy c++; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CXX"; then
+  ac_cv_prog_BUILD_CXX="$BUILD_CXX" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/c++"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_BUILD_CXX="c++"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_BUILD_CXX
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set BUILD_CXX to just the basename; use the full file name.
+    shift
+    ac_cv_prog_BUILD_CXX="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+BUILD_CXX=$ac_cv_prog_BUILD_CXX
+if test -n "$BUILD_CXX"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CXX" >&5
+echo "${ECHO_T}$BUILD_CXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+     fi
+  fi
+else
+  LLVM_CROSS_COMPILING=0
+
+fi
+
+if test -d ".svn" -o -d "${srcdir}/.svn" -o -d ".git" -o -d "${srcdir}/.git"; then
+  cvsbuild="yes"
+  optimize="no"
+  CVSBUILD=CVSBUILD=1
+
+else
+  cvsbuild="no"
+  optimize="yes"
+fi
+
+
+# Check whether --enable-libcpp was given.
+if test "${enable_libcpp+set}" = set; then
+  enableval=$enable_libcpp;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_LIBCPP=1
+ ;;
+  no)  ENABLE_LIBCPP=0
+ ;;
+  default) ENABLE_LIBCPP=0
+;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-libcpp. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-libcpp. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-optimized was given.
+if test "${enable_optimized+set}" = set; then
+  enableval=$enable_optimized;
+else
+  enableval=$optimize
+fi
+
+if test ${enableval} = "no" ; then
+  ENABLE_OPTIMIZED=
+
+else
+  ENABLE_OPTIMIZED=ENABLE_OPTIMIZED=1
+
+fi
+
+# Check whether --enable-profiling was given.
+if test "${enable_profiling+set}" = set; then
+  enableval=$enable_profiling;
+else
+  enableval="no"
+fi
+
+if test ${enableval} = "no" ; then
+  ENABLE_PROFILING=
+
+else
+  ENABLE_PROFILING=ENABLE_PROFILING=1
+
+fi
+
+# Check whether --enable-assertions was given.
+if test "${enable_assertions+set}" = set; then
+  enableval=$enable_assertions;
+else
+  enableval="yes"
+fi
+
+if test ${enableval} = "yes" ; then
+  DISABLE_ASSERTIONS=
+
+else
+  DISABLE_ASSERTIONS=DISABLE_ASSERTIONS=1
+
+fi
+
+# Check whether --enable-expensive-checks was given.
+if test "${enable_expensive_checks+set}" = set; then
+  enableval=$enable_expensive_checks;
+else
+  enableval="no"
+fi
+
+if test ${enableval} = "yes" ; then
+  ENABLE_EXPENSIVE_CHECKS=ENABLE_EXPENSIVE_CHECKS=1
+
+  EXPENSIVE_CHECKS=yes
+
+else
+  ENABLE_EXPENSIVE_CHECKS=
+
+  EXPENSIVE_CHECKS=no
+
+fi
+
+# Check whether --enable-debug-runtime was given.
+if test "${enable_debug_runtime+set}" = set; then
+  enableval=$enable_debug_runtime;
+else
+  enableval=no
+fi
+
+if test ${enableval} = "no" ; then
+  DEBUG_RUNTIME=
+
+else
+  DEBUG_RUNTIME=DEBUG_RUNTIME=1
+
+fi
+
+# Check whether --enable-debug-symbols was given.
+if test "${enable_debug_symbols+set}" = set; then
+  enableval=$enable_debug_symbols;
+else
+  enableval=no
+fi
+
+if test ${enableval} = "no" ; then
+  DEBUG_SYMBOLS=
+
+else
+  DEBUG_SYMBOLS=DEBUG_SYMBOLS=1
+
+fi
+
+# Check whether --enable-jit was given.
+if test "${enable_jit+set}" = set; then
+  enableval=$enable_jit;
+else
+  enableval=default
+fi
+
+if test ${enableval} = "no"
+then
+  JIT=
+
+else
+  case "$llvm_cv_target_arch" in
+    x86)         TARGET_HAS_JIT=1
+ ;;
+    Sparc)       TARGET_HAS_JIT=0
+ ;;
+    PowerPC)     TARGET_HAS_JIT=1
+ ;;
+    x86_64)      TARGET_HAS_JIT=1
+ ;;
+    ARM)         TARGET_HAS_JIT=1
+ ;;
+    Mips)        TARGET_HAS_JIT=1
+ ;;
+    XCore)       TARGET_HAS_JIT=0
+ ;;
+    MSP430)      TARGET_HAS_JIT=0
+ ;;
+    Hexagon)     TARGET_HAS_JIT=0
+ ;;
+    MBlaze)      TARGET_HAS_JIT=0
+ ;;
+    PTX)         TARGET_HAS_JIT=0
+ ;;
+    *)           TARGET_HAS_JIT=0
+ ;;
+  esac
+fi
+
+# Check whether --enable-docs was given.
+if test "${enable_docs+set}" = set; then
+  enableval=$enable_docs;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_DOCS=1
+ ;;
+  no)  ENABLE_DOCS=0
+ ;;
+  default) ENABLE_DOCS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-docs. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-docs. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-doxygen was given.
+if test "${enable_doxygen+set}" = set; then
+  enableval=$enable_doxygen;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_DOXYGEN=1
+ ;;
+  no)  ENABLE_DOXYGEN=0
+ ;;
+  default) ENABLE_DOXYGEN=0
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-doxygen. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-doxygen. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-threads was given.
+if test "${enable_threads+set}" = set; then
+  enableval=$enable_threads;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_THREADS=1
+ ;;
+  no)  ENABLE_THREADS=0
+ ;;
+  default) ENABLE_THREADS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-threads. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-threads. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+cat >>confdefs.h <<_ACEOF
+#define ENABLE_THREADS $ENABLE_THREADS
+_ACEOF
+
+
+# Check whether --enable-pthreads was given.
+if test "${enable_pthreads+set}" = set; then
+  enableval=$enable_pthreads;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_PTHREADS=1
+ ;;
+  no)  ENABLE_PTHREADS=0
+ ;;
+  default) ENABLE_PTHREADS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-pthreads. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-pthreads. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-pic was given.
+if test "${enable_pic+set}" = set; then
+  enableval=$enable_pic;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_PIC=1
+ ;;
+  no)  ENABLE_PIC=0
+ ;;
+  default) ENABLE_PIC=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-pic. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-pic. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+cat >>confdefs.h <<_ACEOF
+#define ENABLE_PIC $ENABLE_PIC
+_ACEOF
+
+
+# Check whether --enable-shared was given.
+if test "${enable_shared+set}" = set; then
+  enableval=$enable_shared;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_SHARED=1
+ ;;
+  no)  ENABLE_SHARED=0
+ ;;
+  default) ENABLE_SHARED=0
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-shared. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-shared. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-embed-stdcxx was given.
+if test "${enable_embed_stdcxx+set}" = set; then
+  enableval=$enable_embed_stdcxx;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_EMBED_STDCXX=1
+ ;;
+  no)  ENABLE_EMBED_STDCXX=0
+ ;;
+  default) ENABLE_EMBED_STDCXX=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-embed-stdcxx. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-embed-stdcxx. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-timestamps was given.
+if test "${enable_timestamps+set}" = set; then
+  enableval=$enable_timestamps;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_TIMESTAMPS=1
+ ;;
+  no)  ENABLE_TIMESTAMPS=0
+ ;;
+  default) ENABLE_TIMESTAMPS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-timestamps. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-timestamps. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+cat >>confdefs.h <<_ACEOF
+#define ENABLE_TIMESTAMPS $ENABLE_TIMESTAMPS
+_ACEOF
+
+
+TARGETS_TO_BUILD=""
+# Check whether --enable-targets was given.
+if test "${enable_targets+set}" = set; then
+  enableval=$enable_targets;
+else
+  enableval=all
+fi
+
+if test "$enableval" = host-only ; then
+  enableval=host
+fi
+case "$enableval" in
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX" ;;
+  *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
+      case "$a_target" in
+        x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+        powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+        arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+        spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+        xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+        msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+        hexagon)  TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
+        cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
+        ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+        host) case "$llvm_cv_target_arch" in
+            x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+            PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+            ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+            MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
+            CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+            XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+            MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+            Hexagon)     TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
+            PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+            *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
+echo "$as_me: error: Can not set target to build" >&2;}
+   { (exit 1); exit 1; }; } ;;
+          esac ;;
+        *) { { echo "$as_me:$LINENO: error: Unrecognized target $a_target" >&5
+echo "$as_me: error: Unrecognized target $a_target" >&2;}
+   { (exit 1); exit 1; }; } ;;
+      esac
+  done
+  ;;
+esac
+TARGETS_TO_BUILD=$TARGETS_TO_BUILD
+
+
+# Determine whether we are building LLVM support for the native architecture.
+# If so, define LLVM_NATIVE_ARCH to that LLVM target.
+for a_target in $TARGETS_TO_BUILD; do
+  if test "$a_target" = "$LLVM_NATIVE_ARCH"; then
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_ARCH $LLVM_NATIVE_ARCH
+_ACEOF
+
+    LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target"
+    LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo"
+    LLVM_NATIVE_TARGETMC="LLVMInitialize${LLVM_NATIVE_ARCH}TargetMC"
+    LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter"
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
+      LLVM_NATIVE_ASMPARSER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser"
+    fi
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/Disassembler/Makefile ; then
+      LLVM_NATIVE_DISASSEMBLER="LLVMInitialize${LLVM_NATIVE_ARCH}Disassembler"
+    fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_TARGET $LLVM_NATIVE_TARGET
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_TARGETINFO $LLVM_NATIVE_TARGETINFO
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_TARGETMC $LLVM_NATIVE_TARGETMC
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_ASMPRINTER $LLVM_NATIVE_ASMPRINTER
+_ACEOF
+
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_ASMPARSER $LLVM_NATIVE_ASMPARSER
+_ACEOF
+
+    fi
+    if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/Disassembler/Makefile ; then
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_DISASSEMBLER $LLVM_NATIVE_DISASSEMBLER
+_ACEOF
+
+    fi
+  fi
+done
+
+# Build the LLVM_TARGET and LLVM_... macros for Targets.def and the individual
+# target feature def files.
+LLVM_ENUM_TARGETS=""
+LLVM_ENUM_ASM_PRINTERS=""
+LLVM_ENUM_ASM_PARSERS=""
+LLVM_ENUM_DISASSEMBLERS=""
+for target_to_build in $TARGETS_TO_BUILD; do
+  LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS"
+  if test -f ${srcdir}/lib/Target/${target_to_build}/*AsmPrinter.cpp ; then
+    LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS";
+  fi
+  if test -f ${srcdir}/lib/Target/${target_to_build}/AsmParser/Makefile ; then
+    LLVM_ENUM_ASM_PARSERS="LLVM_ASM_PARSER($target_to_build) $LLVM_ENUM_ASM_PARSERS";
+  fi
+  if test -f ${srcdir}/lib/Target/${target_to_build}/Disassembler/Makefile ; then
+    LLVM_ENUM_DISASSEMBLERS="LLVM_DISASSEMBLER($target_to_build) $LLVM_ENUM_DISASSEMBLERS";
+  fi
+done
+
+
+
+
+
+
+# Check whether --with-optimize-option was given.
+if test "${with_optimize_option+set}" = set; then
+  withval=$with_optimize_option;
+else
+  withval=default
+fi
+
+{ echo "$as_me:$LINENO: checking optimization flags" >&5
+echo $ECHO_N "checking optimization flags... $ECHO_C" >&6; }
+case "$withval" in
+  default)
+    case "$llvm_cv_os_type" in
+    FreeBSD) optimize_option=-O2 ;;
+    MingW) optimize_option=-O2 ;;
+    *)     optimize_option=-O3 ;;
+    esac ;;
+  *) optimize_option="$withval" ;;
+esac
+OPTIMIZE_OPTION=$optimize_option
+
+{ echo "$as_me:$LINENO: result: $optimize_option" >&5
+echo "${ECHO_T}$optimize_option" >&6; }
+
+
+# Check whether --with-extra-options was given.
+if test "${with_extra_options+set}" = set; then
+  withval=$with_extra_options;
+else
+  withval=default
+fi
+
+case "$withval" in
+  default) EXTRA_OPTIONS= ;;
+  *) EXTRA_OPTIONS=$withval ;;
+esac
+EXTRA_OPTIONS=$EXTRA_OPTIONS
+
+
+
+# Check whether --with-extra-ld-options was given.
+if test "${with_extra_ld_options+set}" = set; then
+  withval=$with_extra_ld_options;
+else
+  withval=default
+fi
+
+case "$withval" in
+  default) EXTRA_LD_OPTIONS= ;;
+  *) EXTRA_LD_OPTIONS=$withval ;;
+esac
+EXTRA_LD_OPTIONS=$EXTRA_LD_OPTIONS
+
+
+# Check whether --enable-bindings was given.
+if test "${enable_bindings+set}" = set; then
+  enableval=$enable_bindings;
+else
+  enableval=default
+fi
+
+BINDINGS_TO_BUILD=""
+case "$enableval" in
+  yes | default | auto) BINDINGS_TO_BUILD="auto" ;;
+  all ) BINDINGS_TO_BUILD="ocaml" ;;
+  none | no) BINDINGS_TO_BUILD="" ;;
+  *)for a_binding in `echo $enableval|sed -e 's/,/ /g' ` ; do
+      case "$a_binding" in
+        ocaml) BINDINGS_TO_BUILD="ocaml $BINDINGS_TO_BUILD" ;;
+        *) { { echo "$as_me:$LINENO: error: Unrecognized binding $a_binding" >&5
+echo "$as_me: error: Unrecognized binding $a_binding" >&2;}
+   { (exit 1); exit 1; }; } ;;
+      esac
+  done
+  ;;
+esac
+
+
+# Check whether --with-ocaml-libdir was given.
+if test "${with_ocaml_libdir+set}" = set; then
+  withval=$with_ocaml_libdir;
+else
+  withval=auto
+fi
+
+case "$withval" in
+  auto) with_ocaml_libdir="$withval" ;;
+  /* | [A-Za-z]:[\\/]*) with_ocaml_libdir="$withval" ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid path for --with-ocaml-libdir. Provide full path" >&5
+echo "$as_me: error: Invalid path for --with-ocaml-libdir. Provide full path" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+
+# Check whether --with-clang-resource-dir was given.
+if test "${with_clang_resource_dir+set}" = set; then
+  withval=$with_clang_resource_dir;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CLANG_RESOURCE_DIR "$withval"
+_ACEOF
+
+
+
+# Check whether --with-c-include-dirs was given.
+if test "${with_c_include_dirs+set}" = set; then
+  withval=$with_c_include_dirs;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define C_INCLUDE_DIRS "$withval"
+_ACEOF
+
+
+# Clang normally uses the system c++ headers and libraries. With this option,
+# clang will use the ones provided by a gcc installation instead. This option should
+# be passed the same value that was used with --prefix when configuring gcc.
+
+# Check whether --with-gcc-toolchain was given.
+if test "${with_gcc_toolchain+set}" = set; then
+  withval=$with_gcc_toolchain;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define GCC_INSTALL_PREFIX "$withval"
+_ACEOF
+
+
+
+# Check whether --with-binutils-include was given.
+if test "${with_binutils_include+set}" = set; then
+  withval=$with_binutils_include;
+else
+  withval=default
+fi
+
+case "$withval" in
+  default) WITH_BINUTILS_INCDIR=default ;;
+  /* | [A-Za-z]:[\\/]*)      WITH_BINUTILS_INCDIR=$withval ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid path for --with-binutils-include. Provide full path" >&5
+echo "$as_me: error: Invalid path for --with-binutils-include. Provide full path" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+if test "x$WITH_BINUTILS_INCDIR" != xdefault ; then
+  BINUTILS_INCDIR=$WITH_BINUTILS_INCDIR
+
+  if test ! -f "$WITH_BINUTILS_INCDIR/plugin-api.h"; then
+     echo "$WITH_BINUTILS_INCDIR/plugin-api.h"
+     { { echo "$as_me:$LINENO: error: Invalid path to directory containing plugin-api.h." >&5
+echo "$as_me: error: Invalid path to directory containing plugin-api.h." >&2;}
+   { (exit 1); exit 1; }; };
+  fi
+fi
+
+
+# Check whether --with-bug-report-url was given.
+if test "${with_bug_report_url+set}" = set; then
+  withval=$with_bug_report_url;
+else
+  withval="http://llvm.org/bugs/"
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define BUG_REPORT_URL "$withval"
+_ACEOF
+
+
+# Check whether --enable-libffi was given.
+if test "${enable_libffi+set}" = set; then
+  enableval=$enable_libffi; case "$enableval" in
+    yes) llvm_cv_enable_libffi="yes" ;;
+    no)  llvm_cv_enable_libffi="no"  ;;
+    *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-libffi. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-libffi. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+  esac
+else
+  llvm_cv_enable_libffi=no
+fi
+
+
+
+{ echo "$as_me:$LINENO: checking for BSD-compatible nm" >&5
+echo $ECHO_N "checking for BSD-compatible nm... $ECHO_C" >&6; }
+if test "${lt_cv_path_NM+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  lt_nm_to_check="${ac_tool_prefix}nm"
+  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
+    lt_nm_to_check="$lt_nm_to_check nm"
+  fi
+  for lt_tmp_nm in $lt_nm_to_check; do
+    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
+      IFS="$lt_save_ifs"
+      test -z "$ac_dir" && ac_dir=.
+      tmp_nm="$ac_dir/$lt_tmp_nm"
+      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
+	# Check to see if the nm accepts a BSD-compat flag.
+	# Adding the `sed 1q' prevents false positives on HP-UX, which says:
+	#   nm: unknown option "B" ignored
+	# Tru64's nm complains that /dev/null is an invalid object file
+	case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
+	*/dev/null* | *'Invalid file or object type'*)
+	  lt_cv_path_NM="$tmp_nm -B"
+	  break
+	  ;;
+	*)
+	  case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+	  */dev/null*)
+	    lt_cv_path_NM="$tmp_nm -p"
+	    break
+	    ;;
+	  *)
+	    lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+	    continue # so that we can try to find one that supports BSD flags
+	    ;;
+	  esac
+	  ;;
+	esac
+      fi
+    done
+    IFS="$lt_save_ifs"
+  done
+  test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm
+fi
+fi
+{ echo "$as_me:$LINENO: result: $lt_cv_path_NM" >&5
+echo "${ECHO_T}$lt_cv_path_NM" >&6; }
+NM="$lt_cv_path_NM"
+
+
+
+{ echo "$as_me:$LINENO: checking for GNU make" >&5
+echo $ECHO_N "checking for GNU make... $ECHO_C" >&6; }
+if test "${llvm_cv_gnu_make_command+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  llvm_cv_gnu_make_command=''
+ for a in "$MAKE" make gmake gnumake ; do
+  if test -z "$a" ; then continue ; fi ;
+  if  ( sh -c "$a --version" 2> /dev/null | grep GNU 2>&1 > /dev/null )
+  then
+   llvm_cv_gnu_make_command=$a ;
+   break;
+  fi
+ done
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_gnu_make_command" >&5
+echo "${ECHO_T}$llvm_cv_gnu_make_command" >&6; }
+ if test "x$llvm_cv_gnu_make_command" != "x"  ; then
+   ifGNUmake='' ;
+ else
+   ifGNUmake='#' ;
+   { echo "$as_me:$LINENO: result: \"Not found\"" >&5
+echo "${ECHO_T}\"Not found\"" >&6; };
+ fi
+
+
+{ echo "$as_me:$LINENO: checking whether ln -s works" >&5
+echo $ECHO_N "checking whether ln -s works... $ECHO_C" >&6; }
+LN_S=$as_ln_s
+if test "$LN_S" = "ln -s"; then
+  { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no, using $LN_S" >&5
+echo "${ECHO_T}no, using $LN_S" >&6; }
+fi
+
+# Extract the first word of "cmp", so it can be a program name with args.
+set dummy cmp; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_CMP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $CMP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_CMP="$CMP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_CMP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_CMP" && ac_cv_path_CMP="cmp"
+  ;;
+esac
+fi
+CMP=$ac_cv_path_CMP
+if test -n "$CMP"; then
+  { echo "$as_me:$LINENO: result: $CMP" >&5
+echo "${ECHO_T}$CMP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "cp", so it can be a program name with args.
+set dummy cp; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_CP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $CP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_CP="$CP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_CP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_CP" && ac_cv_path_CP="cp"
+  ;;
+esac
+fi
+CP=$ac_cv_path_CP
+if test -n "$CP"; then
+  { echo "$as_me:$LINENO: result: $CP" >&5
+echo "${ECHO_T}$CP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "date", so it can be a program name with args.
+set dummy date; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_DATE+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $DATE in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_DATE="$DATE" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_DATE="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_DATE" && ac_cv_path_DATE="date"
+  ;;
+esac
+fi
+DATE=$ac_cv_path_DATE
+if test -n "$DATE"; then
+  { echo "$as_me:$LINENO: result: $DATE" >&5
+echo "${ECHO_T}$DATE" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "find", so it can be a program name with args.
+set dummy find; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_FIND+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $FIND in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_FIND="$FIND" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_FIND="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_FIND" && ac_cv_path_FIND="find"
+  ;;
+esac
+fi
+FIND=$ac_cv_path_FIND
+if test -n "$FIND"; then
+  { echo "$as_me:$LINENO: result: $FIND" >&5
+echo "${ECHO_T}$FIND" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "grep", so it can be a program name with args.
+set dummy grep; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GREP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GREP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GREP="$GREP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GREP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_GREP" && ac_cv_path_GREP="grep"
+  ;;
+esac
+fi
+GREP=$ac_cv_path_GREP
+if test -n "$GREP"; then
+  { echo "$as_me:$LINENO: result: $GREP" >&5
+echo "${ECHO_T}$GREP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "mkdir", so it can be a program name with args.
+set dummy mkdir; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_MKDIR+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $MKDIR in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_MKDIR="$MKDIR" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_MKDIR="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_MKDIR" && ac_cv_path_MKDIR="mkdir"
+  ;;
+esac
+fi
+MKDIR=$ac_cv_path_MKDIR
+if test -n "$MKDIR"; then
+  { echo "$as_me:$LINENO: result: $MKDIR" >&5
+echo "${ECHO_T}$MKDIR" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "mv", so it can be a program name with args.
+set dummy mv; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_MV+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $MV in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_MV="$MV" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_MV="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_MV" && ac_cv_path_MV="mv"
+  ;;
+esac
+fi
+MV=$ac_cv_path_MV
+if test -n "$MV"; then
+  { echo "$as_me:$LINENO: result: $MV" >&5
+echo "${ECHO_T}$MV" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_RANLIB+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+RANLIB=$ac_cv_prog_RANLIB
+if test -n "$RANLIB"; then
+  { echo "$as_me:$LINENO: result: $RANLIB" >&5
+echo "${ECHO_T}$RANLIB" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_RANLIB"; then
+  ac_ct_RANLIB=$RANLIB
+  # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$ac_ct_RANLIB"; then
+  ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_RANLIB="ranlib"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
+if test -n "$ac_ct_RANLIB"; then
+  { echo "$as_me:$LINENO: result: $ac_ct_RANLIB" >&5
+echo "${ECHO_T}$ac_ct_RANLIB" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+  if test "x$ac_ct_RANLIB" = x; then
+    RANLIB=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+    RANLIB=$ac_ct_RANLIB
+  fi
+else
+  RANLIB="$ac_cv_prog_RANLIB"
+fi
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ar; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_AR+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$AR"; then
+  ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_AR="${ac_tool_prefix}ar"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+  { echo "$as_me:$LINENO: result: $AR" >&5
+echo "${ECHO_T}$AR" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_AR"; then
+  ac_ct_AR=$AR
+  # Extract the first word of "ar", so it can be a program name with args.
+set dummy ar; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_AR+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$ac_ct_AR"; then
+  ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_AR="ar"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AR=$ac_cv_prog_ac_ct_AR
+if test -n "$ac_ct_AR"; then
+  { echo "$as_me:$LINENO: result: $ac_ct_AR" >&5
+echo "${ECHO_T}$ac_ct_AR" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+  if test "x$ac_ct_AR" = x; then
+    AR="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+    AR=$ac_ct_AR
+  fi
+else
+  AR="$ac_cv_prog_AR"
+fi
+
+# Extract the first word of "rm", so it can be a program name with args.
+set dummy rm; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_RM+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $RM in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_RM="$RM" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_RM="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_RM" && ac_cv_path_RM="rm"
+  ;;
+esac
+fi
+RM=$ac_cv_path_RM
+if test -n "$RM"; then
+  { echo "$as_me:$LINENO: result: $RM" >&5
+echo "${ECHO_T}$RM" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "sed", so it can be a program name with args.
+set dummy sed; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_SED+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $SED in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_SED="$SED" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_SED="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_SED" && ac_cv_path_SED="sed"
+  ;;
+esac
+fi
+SED=$ac_cv_path_SED
+if test -n "$SED"; then
+  { echo "$as_me:$LINENO: result: $SED" >&5
+echo "${ECHO_T}$SED" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "tar", so it can be a program name with args.
+set dummy tar; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_TAR+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $TAR in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_TAR="$TAR" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_TAR="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_TAR" && ac_cv_path_TAR="gtar"
+  ;;
+esac
+fi
+TAR=$ac_cv_path_TAR
+if test -n "$TAR"; then
+  { echo "$as_me:$LINENO: result: $TAR" >&5
+echo "${ECHO_T}$TAR" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "pwd", so it can be a program name with args.
+set dummy pwd; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_BINPWD+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $BINPWD in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_BINPWD="$BINPWD" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_BINPWD="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_BINPWD" && ac_cv_path_BINPWD="pwd"
+  ;;
+esac
+fi
+BINPWD=$ac_cv_path_BINPWD
+if test -n "$BINPWD"; then
+  { echo "$as_me:$LINENO: result: $BINPWD" >&5
+echo "${ECHO_T}$BINPWD" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+
+# Extract the first word of "Graphviz", so it can be a program name with args.
+set dummy Graphviz; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GRAPHVIZ+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GRAPHVIZ in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GRAPHVIZ="$GRAPHVIZ" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GRAPHVIZ="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_GRAPHVIZ" && ac_cv_path_GRAPHVIZ="echo Graphviz"
+  ;;
+esac
+fi
+GRAPHVIZ=$ac_cv_path_GRAPHVIZ
+if test -n "$GRAPHVIZ"; then
+  { echo "$as_me:$LINENO: result: $GRAPHVIZ" >&5
+echo "${ECHO_T}$GRAPHVIZ" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$GRAPHVIZ" != "echo Graphviz" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_GRAPHVIZ 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    GRAPHVIZ=`echo $GRAPHVIZ | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_GRAPHVIZ "$GRAPHVIZ${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "dot", so it can be a program name with args.
+set dummy dot; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_DOT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $DOT in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_DOT="$DOT" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_DOT="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_DOT" && ac_cv_path_DOT="echo dot"
+  ;;
+esac
+fi
+DOT=$ac_cv_path_DOT
+if test -n "$DOT"; then
+  { echo "$as_me:$LINENO: result: $DOT" >&5
+echo "${ECHO_T}$DOT" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$DOT" != "echo dot" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_DOT 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    DOT=`echo $DOT | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_DOT "$DOT${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "fdp", so it can be a program name with args.
+set dummy fdp; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_FDP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $FDP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_FDP="$FDP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_FDP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_FDP" && ac_cv_path_FDP="echo fdp"
+  ;;
+esac
+fi
+FDP=$ac_cv_path_FDP
+if test -n "$FDP"; then
+  { echo "$as_me:$LINENO: result: $FDP" >&5
+echo "${ECHO_T}$FDP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$FDP" != "echo fdp" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_FDP 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    FDP=`echo $FDP | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_FDP "$FDP${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "neato", so it can be a program name with args.
+set dummy neato; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_NEATO+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $NEATO in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_NEATO="$NEATO" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_NEATO="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_NEATO" && ac_cv_path_NEATO="echo neato"
+  ;;
+esac
+fi
+NEATO=$ac_cv_path_NEATO
+if test -n "$NEATO"; then
+  { echo "$as_me:$LINENO: result: $NEATO" >&5
+echo "${ECHO_T}$NEATO" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$NEATO" != "echo neato" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_NEATO 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    NEATO=`echo $NEATO | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_NEATO "$NEATO${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "twopi", so it can be a program name with args.
+set dummy twopi; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_TWOPI+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $TWOPI in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_TWOPI="$TWOPI" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_TWOPI="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_TWOPI" && ac_cv_path_TWOPI="echo twopi"
+  ;;
+esac
+fi
+TWOPI=$ac_cv_path_TWOPI
+if test -n "$TWOPI"; then
+  { echo "$as_me:$LINENO: result: $TWOPI" >&5
+echo "${ECHO_T}$TWOPI" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$TWOPI" != "echo twopi" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_TWOPI 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    TWOPI=`echo $TWOPI | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_TWOPI "$TWOPI${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "circo", so it can be a program name with args.
+set dummy circo; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_CIRCO+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $CIRCO in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_CIRCO="$CIRCO" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_CIRCO="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_CIRCO" && ac_cv_path_CIRCO="echo circo"
+  ;;
+esac
+fi
+CIRCO=$ac_cv_path_CIRCO
+if test -n "$CIRCO"; then
+  { echo "$as_me:$LINENO: result: $CIRCO" >&5
+echo "${ECHO_T}$CIRCO" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$CIRCO" != "echo circo" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_CIRCO 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    CIRCO=`echo $CIRCO | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_CIRCO "$CIRCO${EXEEXT}"
+_ACEOF
+
+fi
+for ac_prog in gv gsview32
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GV+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GV in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GV="$GV" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GV="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+GV=$ac_cv_path_GV
+if test -n "$GV"; then
+  { echo "$as_me:$LINENO: result: $GV" >&5
+echo "${ECHO_T}$GV" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$GV" && break
+done
+test -n "$GV" || GV="echo gv"
+
+if test "$GV" != "echo gv" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_GV 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    GV=`echo $GV | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_GV "$GV${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "dotty", so it can be a program name with args.
+set dummy dotty; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_DOTTY+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $DOTTY in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_DOTTY="$DOTTY" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_DOTTY="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_DOTTY" && ac_cv_path_DOTTY="echo dotty"
+  ;;
+esac
+fi
+DOTTY=$ac_cv_path_DOTTY
+if test -n "$DOTTY"; then
+  { echo "$as_me:$LINENO: result: $DOTTY" >&5
+echo "${ECHO_T}$DOTTY" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$DOTTY" != "echo dotty" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_DOTTY 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    DOTTY=`echo $DOTTY | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_DOTTY "$DOTTY${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "xdot.py", so it can be a program name with args.
+set dummy xdot.py; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_XDOT_PY+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $XDOT_PY in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_XDOT_PY="$XDOT_PY" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_XDOT_PY="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_XDOT_PY" && ac_cv_path_XDOT_PY="echo xdot.py"
+  ;;
+esac
+fi
+XDOT_PY=$ac_cv_path_XDOT_PY
+if test -n "$XDOT_PY"; then
+  { echo "$as_me:$LINENO: result: $XDOT_PY" >&5
+echo "${ECHO_T}$XDOT_PY" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$XDOT_PY" != "echo xdot.py" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_XDOT_PY 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    XDOT_PY=`echo $XDOT_PY | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_XDOT_PY "$XDOT_PY${EXEEXT}"
+_ACEOF
+
+fi
+
+# Find a good install program.  We prefer a C program (faster),
+# so one script is as good as another.  But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# OS/2's system install, which has a completely different semantic
+# ./install, which can be erroneously created by make from ./install.sh.
+{ echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5
+echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6; }
+if test -z "$INSTALL"; then
+if test "${ac_cv_path_install+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  # Account for people who put trailing slashes in PATH elements.
+case $as_dir/ in
+  ./ | .// | /cC/* | \
+  /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+  ?:\\/os2\\/install\\/* | ?:\\/OS2\\/INSTALL\\/* | \
+  /usr/ucb/* ) ;;
+  *)
+    # OSF1 and SCO ODT 3.0 have their own names for install.
+    # Don't use installbsd from OSF since it installs stuff as root
+    # by default.
+    for ac_prog in ginstall scoinst install; do
+      for ac_exec_ext in '' $ac_executable_extensions; do
+	if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; }; then
+	  if test $ac_prog = install &&
+	    grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # AIX install.  It has an incompatible calling convention.
+	    :
+	  elif test $ac_prog = install &&
+	    grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # program-specific install script used by HP pwplus--don't use.
+	    :
+	  else
+	    ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
+	    break 3
+	  fi
+	fi
+      done
+    done
+    ;;
+esac
+done
+IFS=$as_save_IFS
+
+
+fi
+  if test "${ac_cv_path_install+set}" = set; then
+    INSTALL=$ac_cv_path_install
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for INSTALL within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    INSTALL=$ac_install_sh
+  fi
+fi
+{ echo "$as_me:$LINENO: result: $INSTALL" >&5
+echo "${ECHO_T}$INSTALL" >&6; }
 
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
 
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+case "$INSTALL" in
+ [\\/$]* | ?:[\\/]* ) ;;
+ *)  INSTALL="\\\$(TOPSRCDIR)/$INSTALL" ;;
+esac
 
+# Extract the first word of "bzip2", so it can be a program name with args.
+set dummy bzip2; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_BZIP2+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $BZIP2 in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_BZIP2="$BZIP2" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_BZIP2="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+BZIP2=$ac_cv_path_BZIP2
+if test -n "$BZIP2"; then
+  { echo "$as_me:$LINENO: result: $BZIP2" >&5
+echo "${ECHO_T}$BZIP2" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "cat", so it can be a program name with args.
+set dummy cat; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_CAT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $CAT in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_CAT="$CAT" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_CAT="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
 
+  ;;
+esac
+fi
+CAT=$ac_cv_path_CAT
+if test -n "$CAT"; then
+  { echo "$as_me:$LINENO: result: $CAT" >&5
+echo "${ECHO_T}$CAT" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
 
 
+# Extract the first word of "doxygen", so it can be a program name with args.
+set dummy doxygen; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_DOXYGEN+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $DOXYGEN in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_DOXYGEN="$DOXYGEN" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_DOXYGEN="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
 
+  ;;
+esac
+fi
+DOXYGEN=$ac_cv_path_DOXYGEN
+if test -n "$DOXYGEN"; then
+  { echo "$as_me:$LINENO: result: $DOXYGEN" >&5
+echo "${ECHO_T}$DOXYGEN" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
 
 
+# Extract the first word of "groff", so it can be a program name with args.
+set dummy groff; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GROFF+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GROFF in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GROFF="$GROFF" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GROFF="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+GROFF=$ac_cv_path_GROFF
+if test -n "$GROFF"; then
+  { echo "$as_me:$LINENO: result: $GROFF" >&5
+echo "${ECHO_T}$GROFF" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "gzip", so it can be a program name with args.
+set dummy gzip; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GZIPBIN+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GZIPBIN in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GZIPBIN="$GZIPBIN" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GZIPBIN="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+GZIPBIN=$ac_cv_path_GZIPBIN
+if test -n "$GZIPBIN"; then
+  { echo "$as_me:$LINENO: result: $GZIPBIN" >&5
+echo "${ECHO_T}$GZIPBIN" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "pod2html", so it can be a program name with args.
+set dummy pod2html; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_POD2HTML+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $POD2HTML in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_POD2HTML="$POD2HTML" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_POD2HTML="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+POD2HTML=$ac_cv_path_POD2HTML
+if test -n "$POD2HTML"; then
+  { echo "$as_me:$LINENO: result: $POD2HTML" >&5
+echo "${ECHO_T}$POD2HTML" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "pod2man", so it can be a program name with args.
+set dummy pod2man; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_POD2MAN+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $POD2MAN in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_POD2MAN="$POD2MAN" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_POD2MAN="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+POD2MAN=$ac_cv_path_POD2MAN
+if test -n "$POD2MAN"; then
+  { echo "$as_me:$LINENO: result: $POD2MAN" >&5
+echo "${ECHO_T}$POD2MAN" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "pdfroff", so it can be a program name with args.
+set dummy pdfroff; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_PDFROFF+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $PDFROFF in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_PDFROFF="$PDFROFF" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_PDFROFF="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+PDFROFF=$ac_cv_path_PDFROFF
+if test -n "$PDFROFF"; then
+  { echo "$as_me:$LINENO: result: $PDFROFF" >&5
+echo "${ECHO_T}$PDFROFF" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "runtest", so it can be a program name with args.
+set dummy runtest; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_RUNTEST+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $RUNTEST in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_RUNTEST="$RUNTEST" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_RUNTEST="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+RUNTEST=$ac_cv_path_RUNTEST
+if test -n "$RUNTEST"; then
+  { echo "$as_me:$LINENO: result: $RUNTEST" >&5
+echo "${ECHO_T}$RUNTEST" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+
+no_itcl=true
+{ echo "$as_me:$LINENO: checking for the tclsh program in tclinclude directory" >&5
+echo $ECHO_N "checking for the tclsh program in tclinclude directory... $ECHO_C" >&6; }
+
+# Check whether --with-tclinclude was given.
+if test "${with_tclinclude+set}" = set; then
+  withval=$with_tclinclude; with_tclinclude=${withval}
+else
+  with_tclinclude=''
+fi
+
+if test "${ac_cv_path_tclsh+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+if test x"${with_tclinclude}" != x ; then
+  if test -f ${with_tclinclude}/tclsh ; then
+    ac_cv_path_tclsh=`(cd ${with_tclinclude}; pwd)`
+  elif test -f ${with_tclinclude}/src/tclsh ; then
+    ac_cv_path_tclsh=`(cd ${with_tclinclude}/src; pwd)`
+  else
+    { { echo "$as_me:$LINENO: error: ${with_tclinclude} directory doesn't contain tclsh" >&5
+echo "$as_me: error: ${with_tclinclude} directory doesn't contain tclsh" >&2;}
+   { (exit 1); exit 1; }; }
+  fi
+fi
+fi
+
+
+if test x"${ac_cv_path_tclsh}" = x ; then
+  { echo "$as_me:$LINENO: result: none" >&5
+echo "${ECHO_T}none" >&6; }
+  for ac_prog in tclsh8.4 tclsh8.4.8 tclsh8.4.7 tclsh8.4.6 tclsh8.4.5 tclsh8.4.4 tclsh8.4.3 tclsh8.4.2 tclsh8.4.1 tclsh8.4.0 tclsh8.3 tclsh8.3.5 tclsh8.3.4 tclsh8.3.3 tclsh8.3.2 tclsh8.3.1 tclsh8.3.0 tclsh
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_TCLSH+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $TCLSH in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_TCLSH="$TCLSH" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_TCLSH="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+TCLSH=$ac_cv_path_TCLSH
+if test -n "$TCLSH"; then
+  { echo "$as_me:$LINENO: result: $TCLSH" >&5
+echo "${ECHO_T}$TCLSH" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$TCLSH" && break
+done
+
+  if test x"${TCLSH}" = x ; then
+    ac_cv_path_tclsh='';
+  else
+    ac_cv_path_tclsh="${TCLSH}";
+  fi
+else
+  { echo "$as_me:$LINENO: result: ${ac_cv_path_tclsh}" >&5
+echo "${ECHO_T}${ac_cv_path_tclsh}" >&6; }
+  TCLSH="${ac_cv_path_tclsh}"
+
+fi
+
+# Extract the first word of "zip", so it can be a program name with args.
+set dummy zip; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_ZIP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $ZIP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_ZIP="$ZIP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_ZIP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+ZIP=$ac_cv_path_ZIP
+if test -n "$ZIP"; then
+  { echo "$as_me:$LINENO: result: $ZIP" >&5
+echo "${ECHO_T}$ZIP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+for ac_prog in ocamlc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_OCAMLC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $OCAMLC in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_OCAMLC="$OCAMLC" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_OCAMLC="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+OCAMLC=$ac_cv_path_OCAMLC
+if test -n "$OCAMLC"; then
+  { echo "$as_me:$LINENO: result: $OCAMLC" >&5
+echo "${ECHO_T}$OCAMLC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$OCAMLC" && break
+done
+
+for ac_prog in ocamlopt
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_OCAMLOPT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $OCAMLOPT in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_OCAMLOPT="$OCAMLOPT" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_OCAMLOPT="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+OCAMLOPT=$ac_cv_path_OCAMLOPT
+if test -n "$OCAMLOPT"; then
+  { echo "$as_me:$LINENO: result: $OCAMLOPT" >&5
+echo "${ECHO_T}$OCAMLOPT" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$OCAMLOPT" && break
+done
+
+for ac_prog in ocamldep
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_OCAMLDEP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $OCAMLDEP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_OCAMLDEP="$OCAMLDEP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_OCAMLDEP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+OCAMLDEP=$ac_cv_path_OCAMLDEP
+if test -n "$OCAMLDEP"; then
+  { echo "$as_me:$LINENO: result: $OCAMLDEP" >&5
+echo "${ECHO_T}$OCAMLDEP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$OCAMLDEP" && break
+done
+
+for ac_prog in ocamldoc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_OCAMLDOC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $OCAMLDOC in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_OCAMLDOC="$OCAMLDOC" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_OCAMLDOC="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+OCAMLDOC=$ac_cv_path_OCAMLDOC
+if test -n "$OCAMLDOC"; then
+  { echo "$as_me:$LINENO: result: $OCAMLDOC" >&5
+echo "${ECHO_T}$OCAMLDOC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$OCAMLDOC" && break
+done
+
+for ac_prog in gas as
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GAS+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GAS in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GAS="$GAS" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GAS="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+GAS=$ac_cv_path_GAS
+if test -n "$GAS"; then
+  { echo "$as_me:$LINENO: result: $GAS" >&5
+echo "${ECHO_T}$GAS" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$GAS" && break
+done
+
+
+{ echo "$as_me:$LINENO: checking for linker version" >&5
+echo $ECHO_N "checking for linker version... $ECHO_C" >&6; }
+if test "${llvm_cv_link_version+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+   version_string="$(ld -v 2>&1 | head -1)"
+
+   # Check for ld64.
+   if (echo "$version_string" | grep -q "ld64"); then
+     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#.*ld64-\([^ ]*\)#\1#")
+   else
+     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#[^0-9]*\([0-9.]*\).*#\1#")
+   fi
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_link_version" >&5
+echo "${ECHO_T}$llvm_cv_link_version" >&6; }
+
+cat >>confdefs.h <<_ACEOF
+#define HOST_LINK_VERSION "$llvm_cv_link_version"
+_ACEOF
+
+
+
+{ echo "$as_me:$LINENO: checking for compiler -Wl,-R<path> option" >&5
+echo $ECHO_N "checking for compiler -Wl,-R<path> option... $ECHO_C" >&6; }
+if test "${llvm_cv_link_use_r+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  oldcflags="$CFLAGS"
+  CFLAGS="$CFLAGS -Wl,-R."
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_link_use_r=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_link_use_r=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+  CFLAGS="$oldcflags"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_link_use_r" >&5
+echo "${ECHO_T}$llvm_cv_link_use_r" >&6; }
+if test "$llvm_cv_link_use_r" = yes ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LINK_R 1
+_ACEOF
+
+  fi
+
+
+{ echo "$as_me:$LINENO: checking for compiler -Wl,-export-dynamic option" >&5
+echo $ECHO_N "checking for compiler -Wl,-export-dynamic option... $ECHO_C" >&6; }
+if test "${llvm_cv_link_use_export_dynamic+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  oldcflags="$CFLAGS"
+  CFLAGS="$CFLAGS -Wl,-export-dynamic"
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_link_use_export_dynamic=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_link_use_export_dynamic=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+  CFLAGS="$oldcflags"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_link_use_export_dynamic" >&5
+echo "${ECHO_T}$llvm_cv_link_use_export_dynamic" >&6; }
+if test "$llvm_cv_link_use_export_dynamic" = yes ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LINK_EXPORT_DYNAMIC 1
+_ACEOF
+
+  fi
+
+
+{ echo "$as_me:$LINENO: checking for compiler -Wl,--version-script option" >&5
+echo $ECHO_N "checking for compiler -Wl,--version-script option... $ECHO_C" >&6; }
+if test "${llvm_cv_link_use_version_script+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  oldcflags="$CFLAGS"
+
+  # The following code is from the autoconf manual,
+  # "11.13: Limitations of Usual Tools".
+  # Create a temporary directory $tmp in $TMPDIR (default /tmp).
+  # Use mktemp if possible; otherwise fall back on mkdir,
+  # with $RANDOM to make collisions less likely.
+  : ${TMPDIR=/tmp}
+  {
+    tmp=`
+      (umask 077 && mktemp -d "$TMPDIR/fooXXXXXX") 2>/dev/null
+    ` &&
+    test -n "$tmp" && test -d "$tmp"
+  } || {
+    tmp=$TMPDIR/foo$$-$RANDOM
+    (umask 077 && mkdir "$tmp")
+  } || exit $?
+
+  echo "{" > "$tmp/export.map"
+  echo "  global: main;" >> "$tmp/export.map"
+  echo "  local: *;" >> "$tmp/export.map"
+  echo "};" >> "$tmp/export.map"
+
+  CFLAGS="$CFLAGS -Wl,--version-script=$tmp/export.map"
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_link_use_version_script=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_link_use_version_script=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+  rm "$tmp/export.map"
+  rmdir "$tmp"
+  CFLAGS="$oldcflags"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_link_use_version_script" >&5
+echo "${ECHO_T}$llvm_cv_link_use_version_script" >&6; }
+if test "$llvm_cv_link_use_version_script" = yes ; then
+  HAVE_LINK_VERSION_SCRIPT=1
+
+  fi
+
+
+
+
+{ echo "$as_me:$LINENO: checking for an ANSI C-conforming const" >&5
+echo $ECHO_N "checking for an ANSI C-conforming const... $ECHO_C" >&6; }
+if test "${ac_cv_c_const+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+/* FIXME: Include the comments suggested by Paul. */
+#ifndef __cplusplus
+  /* Ultrix mips cc rejects this.  */
+  typedef int charset[2];
+  const charset x;
+  /* SunOS 4.1.1 cc rejects this.  */
+  char const *const *ccp;
+  char **p;
+  /* NEC SVR4.0.2 mips cc rejects this.  */
+  struct point {int x, y;};
+  static struct point const zero = {0,0};
+  /* AIX XL C 1.02.0.0 rejects this.
+     It does not let you subtract one const X* pointer from another in
+     an arm of an if-expression whose if-part is not a constant
+     expression */
+  const char *g = "string";
+  ccp = &g + (g ? g-g : 0);
+  /* HPUX 7.0 cc rejects these. */
+  ++ccp;
+  p = (char**) ccp;
+  ccp = (char const *const *) p;
+  { /* SCO 3.2v4 cc rejects this.  */
+    char *t;
+    char const *s = 0 ? (char *) 0 : (char const *) 0;
+
+    *t++ = 0;
+    if (s) return 0;
+  }
+  { /* Someone thinks the Sun supposedly-ANSI compiler will reject this.  */
+    int x[] = {25, 17};
+    const int *foo = &x[0];
+    ++foo;
+  }
+  { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */
+    typedef const int *iptr;
+    iptr p = 0;
+    ++p;
+  }
+  { /* AIX XL C 1.02.0.0 rejects this saying
+       "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
+    struct s { int j; const int *ap[3]; };
+    struct s *b; b->j = 5;
+  }
+  { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
+    const int foo = 10;
+    if (!foo) return 0;
+  }
+  return !x[0] && !zero.x;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_c_const=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_c_const=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_c_const" >&5
+echo "${ECHO_T}$ac_cv_c_const" >&6; }
+if test $ac_cv_c_const = no; then
+
+cat >>confdefs.h <<\_ACEOF
+#define const
+_ACEOF
+
+fi
+
+
+
+
+
+
+ac_header_dirent=no
+for ac_hdr in dirent.h sys/ndir.h sys/dir.h ndir.h; do
+  as_ac_Header=`echo "ac_cv_header_dirent_$ac_hdr" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_hdr that defines DIR" >&5
+echo $ECHO_N "checking for $ac_hdr that defines DIR... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <$ac_hdr>
+
+int
+main ()
+{
+if ((DIR *) 0)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_Header=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_hdr" | $as_tr_cpp` 1
+_ACEOF
+
+ac_header_dirent=$ac_hdr; break
+fi
+
+done
+# Two versions of opendir et al. are in -ldir and -lx on SCO Xenix.
+if test $ac_header_dirent = dirent.h; then
+  { echo "$as_me:$LINENO: checking for library containing opendir" >&5
+echo $ECHO_N "checking for library containing opendir... $ECHO_C" >&6; }
+if test "${ac_cv_search_opendir+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char opendir ();
+int
+main ()
+{
+return opendir ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' dir; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_opendir=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_opendir+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_opendir+set}" = set; then
+  :
+else
+  ac_cv_search_opendir=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_opendir" >&5
+echo "${ECHO_T}$ac_cv_search_opendir" >&6; }
+ac_res=$ac_cv_search_opendir
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+else
+  { echo "$as_me:$LINENO: checking for library containing opendir" >&5
+echo $ECHO_N "checking for library containing opendir... $ECHO_C" >&6; }
+if test "${ac_cv_search_opendir+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char opendir ();
+int
+main ()
+{
+return opendir ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' x; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_opendir=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_opendir+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_opendir+set}" = set; then
+  :
+else
+  ac_cv_search_opendir=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_opendir" >&5
+echo "${ECHO_T}$ac_cv_search_opendir" >&6; }
+ac_res=$ac_cv_search_opendir
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+fi
+
+
+for ac_header in dlfcn.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+# Check whether --enable-ltdl-install was given.
+if test "${enable_ltdl_install+set}" = set; then
+  enableval=$enable_ltdl_install;
+fi
+
+
+
+
+if test x"${enable_ltdl_install-no}" != xno; then
+  INSTALL_LTDL_TRUE=
+  INSTALL_LTDL_FALSE='#'
+else
+  INSTALL_LTDL_TRUE='#'
+  INSTALL_LTDL_FALSE=
+fi
+
+
+
+if test x"${enable_ltdl_convenience-no}" != xno; then
+  CONVENIENCE_LTDL_TRUE=
+  CONVENIENCE_LTDL_FALSE='#'
+else
+  CONVENIENCE_LTDL_TRUE='#'
+  CONVENIENCE_LTDL_FALSE=
+fi
+
+
+{ echo "$as_me:$LINENO: checking dynamic linker characteristics" >&5
+echo $ECHO_N "checking dynamic linker characteristics... $ECHO_C" >&6; }
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+if test "$GCC" = yes; then
+  sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
+  if echo "$sys_lib_search_path_spec" | grep ';' >/dev/null ; then
+    # if the path contains ";" then we assume it to be the separator
+    # otherwise default to the standard path separator (i.e. ":") - it is
+    # assumed that no part of a normal pathname contains ";" but that should
+    # okay in the real world where ";" in dirpaths is itself problematic.
+    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+  else
+    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
+  fi
+else
+  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+fi
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix4* | aix5*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[01] | aix4.[01].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+	   echo ' yes '
+	   echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then
+	:
+      else
+	can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  library_names_spec='$libname.ixlibrary $libname.a'
+  # Create ${libname}_ixlibrary.a entries in /sys/libs.
+  finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[45]*)
+  version_type=linux
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$host_os in
+  yes,cygwin* | yes,mingw* | yes,pw32*)
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i;echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $rm \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      sys_lib_search_path_spec="/usr/lib /lib/w32api /lib /usr/local/lib"
+      ;;
+    mingw*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
+      if echo "$sys_lib_search_path_spec" | grep ';[c-zC-Z]:/' >/dev/null; then
+        # It is most probably a Windows format PATH printed by
+        # mingw gcc, but we are running on Cygwin. Gcc prints its search
+        # path with ; separators, and with drive letters. We can handle the
+        # drive letters (cygwin fileutils understands them), so leave them,
+        # especially as we might pass files found there to a mingw objdump,
+        # which wouldn't understand a cygwinified path. Ahh.
+        sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    ;;
+
+  *)
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    ;;
+  esac
+  dynamic_linker='Win32 ld.exe'
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${versuffix}$shared_ext ${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='.dylib'
+  # Apple's gcc prints 'gcc -print-search-dirs' doesn't operate the same.
+  if test "$GCC" = yes; then
+    sys_lib_search_path_spec=`$CC -print-search-dirs | tr "\n" "$PATH_SEPARATOR" | sed -e 's/libraries:/@libraries:/' | tr "@" "\n" | grep "^libraries:" | sed -e "s/^libraries://" -e "s,=/,/,g" -e "s,$PATH_SEPARATOR, ,g" -e "s,.*,& /lib /usr/lib /usr/local/lib,g"`
+  else
+    sys_lib_search_path_spec='/lib /usr/lib /usr/local/lib'
+  fi
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd1.*)
+  dynamic_linker=no
+  ;;
+
+kfreebsd*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='GNU ld.so'
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[123].*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2.*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[01]* | freebsdelf3.[01]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[2-9]* | freebsdelf3.[2-9]* | \
+  freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  freebsd*) # from 4.6 on
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+   hppa*64*)
+     shrext_cmds='.sl'
+     hardcode_into_libs=yes
+     dynamic_linker="$host_os dld.sl"
+     shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+     shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+     library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+     soname_spec='${libname}${release}${shared_ext}$major'
+     sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+     sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+     ;;
+   *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555.
+  postinstall_cmds='chmod 555 $lib'
+  ;;
+
+interix3*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+	if test "$lt_cv_prog_gnu_ld" = yes; then
+		version_type=linux
+	else
+		version_type=irix
+	fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be Linux ELF.
+linux*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+knetbsd*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='GNU ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+nto-qnx*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*) need_version=yes ;;
+    *)                         need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[89] | openbsd2.[89].*)
+	shlibpath_overrides_runpath=no
+	;;
+      *)
+	shlibpath_overrides_runpath=yes
+	;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+solaris*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      export_dynamic_flag_spec='${wl}-Blargedynsym'
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+    shlibpath_overrides_runpath=no
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    shlibpath_overrides_runpath=yes
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+	;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+uts4*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+{ echo "$as_me:$LINENO: result: $dynamic_linker" >&5
+echo "${ECHO_T}$dynamic_linker" >&6; }
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+
+{ echo "$as_me:$LINENO: checking which extension is used for loadable modules" >&5
+echo $ECHO_N "checking which extension is used for loadable modules... $ECHO_C" >&6; }
+if test "${libltdl_cv_shlibext+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+module=yes
+eval libltdl_cv_shlibext=$shrext_cmds
+
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_shlibext" >&5
+echo "${ECHO_T}$libltdl_cv_shlibext" >&6; }
+if test -n "$libltdl_cv_shlibext"; then
+
+cat >>confdefs.h <<_ACEOF
+#define LTDL_SHLIB_EXT "$libltdl_cv_shlibext"
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking which variable specifies run-time library path" >&5
+echo $ECHO_N "checking which variable specifies run-time library path... $ECHO_C" >&6; }
+if test "${libltdl_cv_shlibpath_var+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  libltdl_cv_shlibpath_var="$shlibpath_var"
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_shlibpath_var" >&5
+echo "${ECHO_T}$libltdl_cv_shlibpath_var" >&6; }
+if test -n "$libltdl_cv_shlibpath_var"; then
+
+cat >>confdefs.h <<_ACEOF
+#define LTDL_SHLIBPATH_VAR "$libltdl_cv_shlibpath_var"
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking for the default library search path" >&5
+echo $ECHO_N "checking for the default library search path... $ECHO_C" >&6; }
+if test "${libltdl_cv_sys_search_path+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  libltdl_cv_sys_search_path="$sys_lib_dlsearch_path_spec"
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_sys_search_path" >&5
+echo "${ECHO_T}$libltdl_cv_sys_search_path" >&6; }
+if test -n "$libltdl_cv_sys_search_path"; then
+  sys_search_path=
+  for dir in $libltdl_cv_sys_search_path; do
+    if test -z "$sys_search_path"; then
+      sys_search_path="$dir"
+    else
+      sys_search_path="$sys_search_path$PATH_SEPARATOR$dir"
+    fi
+  done
+
+cat >>confdefs.h <<_ACEOF
+#define LTDL_SYSSEARCHPATH "$sys_search_path"
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for objdir" >&5
+echo $ECHO_N "checking for objdir... $ECHO_C" >&6; }
+if test "${libltdl_cv_objdir+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  libltdl_cv_objdir="$objdir"
+  if test -n "$objdir"; then
+    :
+  else
+    rm -f .libs 2>/dev/null
+    mkdir .libs 2>/dev/null
+    if test -d .libs; then
+      libltdl_cv_objdir=.libs
+    else
+      # MS-DOS does not allow filenames that begin with a dot.
+      libltdl_cv_objdir=_libs
+    fi
+  rmdir .libs 2>/dev/null
+  fi
+
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_objdir" >&5
+echo "${ECHO_T}$libltdl_cv_objdir" >&6; }
+
+cat >>confdefs.h <<_ACEOF
+#define LTDL_OBJDIR "$libltdl_cv_objdir/"
+_ACEOF
+
+
+
+
+
+
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+{ echo "$as_me:$LINENO: checking command to parse $NM output from $compiler object" >&5
+echo $ECHO_N "checking command to parse $NM output from $compiler object... $ECHO_C" >&6; }
+if test "${lt_cv_sys_global_symbol_pipe+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[BCDEGRST]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
+
+# Transform an extracted symbol line into a proper C declaration
+lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern int \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([^ ]*\) \([^ ]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[BCDT]'
+  ;;
+cygwin* | mingw* | pw32*)
+  symcode='[ABCDGISTW]'
+  ;;
+hpux*) # Its linker distinguishes data from code symbols
+  if test "$host_cpu" = ia64; then
+    symcode='[ABCDEGRST]'
+  fi
+  lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+  lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+  ;;
+linux*)
+  if test "$host_cpu" = ia64; then
+    symcode='[ABCDGIRSTW]'
+    lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+    lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+  fi
+  ;;
+irix* | nonstopux*)
+  symcode='[BCDEGRST]'
+  ;;
+osf*)
+  symcode='[BCDEGQRST]'
+  ;;
+solaris*)
+  symcode='[BDRT]'
+  ;;
+sco3.2v5*)
+  symcode='[DT]'
+  ;;
+sysv4.2uw2*)
+  symcode='[DT]'
+  ;;
+sysv5* | sco5v6* | unixware* | OpenUNIX*)
+  symcode='[ABDT]'
+  ;;
+sysv4)
+  symcode='[DFNSTU]'
+  ;;
+esac
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $build_os in
+mingw*)
+  opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+case `$NM -V 2>&1` in
+*GNU* | *'with BFD'*)
+  symcode='[ABCDGIRSTW]' ;;
+esac
+
+# Try without a prefix undercore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
+  symxfrm="\\1 $ac_symprfx\\2 \\2"
+
+  # Write the raw and C identifiers.
+  lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ 	]\($symcode$symcode*\)[ 	][ 	]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+
+  rm -f conftest*
+  cat > conftest.$ac_ext <<EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+EOF
+
+  if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if { (eval echo "$as_me:$LINENO: \"$NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist\"") >&5
+  (eval $NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+	mv -f "$nlist"T "$nlist"
+      else
+	rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if grep ' nm_test_var$' "$nlist" >/dev/null; then
+	if grep ' nm_test_func$' "$nlist" >/dev/null; then
+	  cat <<EOF > conftest.$ac_ext
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+	  # Now generate the symbol file.
+	  eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | grep -v main >> conftest.$ac_ext'
+
+	  cat <<EOF >> conftest.$ac_ext
+#if defined (__STDC__) && __STDC__
+# define lt_ptr_t void *
+#else
+# define lt_ptr_t char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+  const char *name;
+  lt_ptr_t address;
+}
+lt_preloaded_symbols[] =
+{
+EOF
+	  $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (lt_ptr_t) \&\2},/" < "$nlist" | grep -v main >> conftest.$ac_ext
+	  cat <<\EOF >> conftest.$ac_ext
+  {0, (lt_ptr_t) 0}
+};
+
+#ifdef __cplusplus
+}
+#endif
+EOF
+	  # Now try linking the two files.
+	  mv conftest.$ac_objext conftstm.$ac_objext
+	  lt_save_LIBS="$LIBS"
+	  lt_save_CFLAGS="$CFLAGS"
+	  LIBS="conftstm.$ac_objext"
+	  CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag"
+	  if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && test -s conftest${ac_exeext}; then
+	    pipe_works=yes
+	  fi
+	  LIBS="$lt_save_LIBS"
+	  CFLAGS="$lt_save_CFLAGS"
+	else
+	  echo "cannot find nm_test_func in $nlist" >&5
+	fi
+      else
+	echo "cannot find nm_test_var in $nlist" >&5
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5
+    fi
+  else
+    echo "$progname: failed program was:" >&5
+    cat conftest.$ac_ext >&5
+  fi
+  rm -f conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+
+fi
+
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  lt_cv_sys_global_symbol_to_cdecl=
+fi
+if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
+  { echo "$as_me:$LINENO: result: failed" >&5
+echo "${ECHO_T}failed" >&6; }
+else
+  { echo "$as_me:$LINENO: result: ok" >&5
+echo "${ECHO_T}ok" >&6; }
+fi
+
+
+{ echo "$as_me:$LINENO: checking whether libtool supports -dlopen/-dlpreopen" >&5
+echo $ECHO_N "checking whether libtool supports -dlopen/-dlpreopen... $ECHO_C" >&6; }
+if test "${libltdl_cv_preloaded_symbols+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$lt_cv_sys_global_symbol_pipe"; then
+    libltdl_cv_preloaded_symbols=yes
+  else
+    libltdl_cv_preloaded_symbols=no
+  fi
+
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_preloaded_symbols" >&5
+echo "${ECHO_T}$libltdl_cv_preloaded_symbols" >&6; }
+if test x"$libltdl_cv_preloaded_symbols" = xyes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PRELOADED_SYMBOLS 1
+_ACEOF
+
+fi
+
+LIBADD_DL=
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ echo "$as_me:$LINENO: checking for shl_load" >&5
+echo $ECHO_N "checking for shl_load... $ECHO_C" >&6; }
+if test "${ac_cv_func_shl_load+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define shl_load to an innocuous variant, in case <limits.h> declares shl_load.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define shl_load innocuous_shl_load
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char shl_load (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef shl_load
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shl_load ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_shl_load || defined __stub___shl_load
+choke me
+#endif
+
+int
+main ()
+{
+return shl_load ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_shl_load=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_shl_load=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_shl_load" >&5
+echo "${ECHO_T}$ac_cv_func_shl_load" >&6; }
+if test $ac_cv_func_shl_load = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_SHL_LOAD 1
+_ACEOF
+
+else
+  { echo "$as_me:$LINENO: checking for shl_load in -ldld" >&5
+echo $ECHO_N "checking for shl_load in -ldld... $ECHO_C" >&6; }
+if test "${ac_cv_lib_dld_shl_load+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldld  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shl_load ();
+int
+main ()
+{
+return shl_load ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_dld_shl_load=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_dld_shl_load=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_dld_shl_load" >&5
+echo "${ECHO_T}$ac_cv_lib_dld_shl_load" >&6; }
+if test $ac_cv_lib_dld_shl_load = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_SHL_LOAD 1
+_ACEOF
+
+	LIBADD_DL="$LIBADD_DL -ldld"
+else
+  { echo "$as_me:$LINENO: checking for dlopen in -ldl" >&5
+echo $ECHO_N "checking for dlopen in -ldl... $ECHO_C" >&6; }
+if test "${ac_cv_lib_dl_dlopen+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_dl_dlopen=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_dl_dlopen=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_dl_dlopen" >&5
+echo "${ECHO_T}$ac_cv_lib_dl_dlopen" >&6; }
+if test $ac_cv_lib_dl_dlopen = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LIBDL 1
+_ACEOF
+
+	        LIBADD_DL="-ldl" libltdl_cv_lib_dl_dlopen="yes"
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#if HAVE_DLFCN_H
+#  include <dlfcn.h>
+#endif
+
+int
+main ()
+{
+dlopen(0, 0);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LIBDL 1
+_ACEOF
+ libltdl_cv_func_dlopen="yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	{ echo "$as_me:$LINENO: checking for dlopen in -lsvld" >&5
+echo $ECHO_N "checking for dlopen in -lsvld... $ECHO_C" >&6; }
+if test "${ac_cv_lib_svld_dlopen+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lsvld  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_svld_dlopen=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_svld_dlopen=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_svld_dlopen" >&5
+echo "${ECHO_T}$ac_cv_lib_svld_dlopen" >&6; }
+if test $ac_cv_lib_svld_dlopen = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LIBDL 1
+_ACEOF
+
+	            LIBADD_DL="-lsvld" libltdl_cv_func_dlopen="yes"
+else
+  { echo "$as_me:$LINENO: checking for dld_link in -ldld" >&5
+echo $ECHO_N "checking for dld_link in -ldld... $ECHO_C" >&6; }
+if test "${ac_cv_lib_dld_dld_link+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldld  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dld_link ();
+int
+main ()
+{
+return dld_link ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_dld_dld_link=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_dld_dld_link=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_dld_dld_link" >&5
+echo "${ECHO_T}$ac_cv_lib_dld_dld_link" >&6; }
+if test $ac_cv_lib_dld_dld_link = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_DLD 1
+_ACEOF
+
+	 	LIBADD_DL="$LIBADD_DL -ldld"
+else
+  { echo "$as_me:$LINENO: checking for _dyld_func_lookup" >&5
+echo $ECHO_N "checking for _dyld_func_lookup... $ECHO_C" >&6; }
+if test "${ac_cv_func__dyld_func_lookup+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define _dyld_func_lookup to an innocuous variant, in case <limits.h> declares _dyld_func_lookup.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define _dyld_func_lookup innocuous__dyld_func_lookup
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char _dyld_func_lookup (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef _dyld_func_lookup
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char _dyld_func_lookup ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub__dyld_func_lookup || defined __stub____dyld_func_lookup
+choke me
+#endif
+
+int
+main ()
+{
+return _dyld_func_lookup ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func__dyld_func_lookup=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func__dyld_func_lookup=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func__dyld_func_lookup" >&5
+echo "${ECHO_T}$ac_cv_func__dyld_func_lookup" >&6; }
+if test $ac_cv_func__dyld_func_lookup = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_DYLD 1
+_ACEOF
+
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+
+fi
+
+
+fi
+
+
+fi
+
+
+if test x"$libltdl_cv_func_dlopen" = xyes || test x"$libltdl_cv_lib_dl_dlopen" = xyes
+then
+  lt_save_LIBS="$LIBS"
+  LIBS="$LIBS $LIBADD_DL"
+
+for ac_func in dlerror
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+  LIBS="$lt_save_LIBS"
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+{ echo "$as_me:$LINENO: checking for _ prefix in compiled symbols" >&5
+echo $ECHO_N "checking for _ prefix in compiled symbols... $ECHO_C" >&6; }
+if test "${ac_cv_sys_symbol_underscore+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_cv_sys_symbol_underscore=no
+  cat > conftest.$ac_ext <<EOF
+void nm_test_func(){}
+int main(){nm_test_func;return 0;}
+EOF
+  if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+    # Now try to grab the symbols.
+    ac_nlist=conftest.nm
+    if { (eval echo "$as_me:$LINENO: \"$NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $ac_nlist\"") >&5
+  (eval $NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $ac_nlist) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && test -s "$ac_nlist"; then
+      # See whether the symbols have a leading underscore.
+      if grep '^. _nm_test_func' "$ac_nlist" >/dev/null; then
+        ac_cv_sys_symbol_underscore=yes
+      else
+        if grep '^. nm_test_func ' "$ac_nlist" >/dev/null; then
+	  :
+        else
+	  echo "configure: cannot find nm_test_func in $ac_nlist" >&5
+        fi
+      fi
+    else
+      echo "configure: cannot run $lt_cv_sys_global_symbol_pipe" >&5
+    fi
+  else
+    echo "configure: failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -rf conftest*
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_sys_symbol_underscore" >&5
+echo "${ECHO_T}$ac_cv_sys_symbol_underscore" >&6; }
+
+
+if test x"$ac_cv_sys_symbol_underscore" = xyes; then
+  if test x"$libltdl_cv_func_dlopen" = xyes ||
+     test x"$libltdl_cv_lib_dl_dlopen" = xyes ; then
+	{ echo "$as_me:$LINENO: checking whether we have to add an underscore for dlsym" >&5
+echo $ECHO_N "checking whether we have to add an underscore for dlsym... $ECHO_C" >&6; }
+if test "${libltdl_cv_need_uscore+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  libltdl_cv_need_uscore=unknown
+          save_LIBS="$LIBS"
+          LIBS="$LIBS $LIBADD_DL"
+	  if test "$cross_compiling" = yes; then :
+  libltdl_cv_need_uscore=cross
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<EOF
+#line 10303 "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL		RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL		DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL		0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW		RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW		DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW	RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW	DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW	0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" void exit (int);
+#endif
+
+void fnord() { int i=42;}
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore;
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+    exit (status);
+}
+EOF
+  if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&5 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) libltdl_cv_need_uscore=no ;;
+      x$lt_dlneed_uscore) libltdl_cv_need_uscore=yes ;;
+      x$lt_dlunknown|x*)  ;;
+    esac
+  else :
+    # compilation failed
+
+  fi
+fi
+rm -fr conftest*
+
+	  LIBS="$save_LIBS"
+
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_need_uscore" >&5
+echo "${ECHO_T}$libltdl_cv_need_uscore" >&6; }
+  fi
+fi
+
+if test x"$libltdl_cv_need_uscore" = xyes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define NEED_USCORE 1
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking whether deplibs are loaded by dlopen" >&5
+echo $ECHO_N "checking whether deplibs are loaded by dlopen... $ECHO_C" >&6; }
+if test "${libltdl_cv_sys_dlopen_deplibs+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  # PORTME does your system automatically load deplibs for dlopen?
+  # or its logical equivalent (e.g. shl_load for HP-UX < 11)
+  # For now, we just catch OSes we know something about -- in the
+  # future, we'll try test this programmatically.
+  libltdl_cv_sys_dlopen_deplibs=unknown
+  case "$host_os" in
+  aix3*|aix4.1.*|aix4.2.*)
+    # Unknown whether this is true for these versions of AIX, but
+    # we want this `case' here to explicitly catch those versions.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  aix[45]*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  darwin*)
+    # Assuming the user has installed a libdl from somewhere, this is true
+    # If you are looking for one http://www.opendarwin.org/projects/dlcompat
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  gnu* | linux* | kfreebsd*-gnu | knetbsd*-gnu)
+    # GNU and its variants, using gnu ld.so (Glibc)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  hpux10*|hpux11*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  interix*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  irix[12345]*|irix6.[01]*)
+    # Catch all versions of IRIX before 6.2, and indicate that we don't
+    # know how it worked for any of those versions.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  irix*)
+    # The case above catches anything before 6.2, and it's known that
+    # at 6.2 and later dlopen does load deplibs.
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  netbsd*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  openbsd*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  osf[1234]*)
+    # dlopen did load deplibs (at least at 4.x), but until the 5.x series,
+    # it did *not* use an RPATH in a shared library to find objects the
+    # library depends on, so we explicitly say `no'.
+    libltdl_cv_sys_dlopen_deplibs=no
+    ;;
+  osf5.0|osf5.0a|osf5.1)
+    # dlopen *does* load deplibs and with the right loader patch applied
+    # it even uses RPATH in a shared library to search for shared objects
+    # that the library depends on, but there's no easy way to know if that
+    # patch is installed.  Since this is the case, all we can really
+    # say is unknown -- it depends on the patch being installed.  If
+    # it is, this changes to `yes'.  Without it, it would be `no'.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  osf*)
+    # the two cases above should catch all versions of osf <= 5.1.  Read
+    # the comments above for what we know about them.
+    # At > 5.1, deplibs are loaded *and* any RPATH in a shared library
+    # is used to find them so we can finally say `yes'.
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  solaris*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  esac
+
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_sys_dlopen_deplibs" >&5
+echo "${ECHO_T}$libltdl_cv_sys_dlopen_deplibs" >&6; }
+if test "$libltdl_cv_sys_dlopen_deplibs" != yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define LTDL_DLOPEN_DEPLIBS 1
+_ACEOF
+
+fi
+
+
+for ac_header in argz.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+{ echo "$as_me:$LINENO: checking for error_t" >&5
+echo $ECHO_N "checking for error_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_error_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#if HAVE_ARGZ_H
+#  include <argz.h>
+#endif
+
+typedef error_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_error_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_error_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_error_t" >&5
+echo "${ECHO_T}$ac_cv_type_error_t" >&6; }
+if test $ac_cv_type_error_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_ERROR_T 1
+_ACEOF
+
+
+else
+
+cat >>confdefs.h <<\_ACEOF
+#define error_t int
+_ACEOF
+
+fi
+
+
+
+
+
+
+
+for ac_func in argz_append argz_create_sep argz_insert argz_next argz_stringify
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+for ac_header in assert.h ctype.h errno.h malloc.h memory.h stdlib.h \
+		  stdio.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+for ac_header in dl.h sys/dl.h dld.h mach-o/dyld.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+for ac_header in string.h strings.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+
+done
+
+
+
+
+for ac_func in strchr index
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+done
+
+
+
+for ac_func in strrchr rindex
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+done
+
+
+
+for ac_func in memcpy bcopy
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+done
+
+
+
+for ac_func in memmove strcmp
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+for ac_func in closedir opendir readdir
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+{ echo "$as_me:$LINENO: checking tool compatibility" >&5
+echo $ECHO_N "checking tool compatibility... $ECHO_C" >&6; }
+
+ICC=no
+IXX=no
+case $CC in
+  icc*|icpc*)
+    ICC=yes
+    IXX=yes
+    ;;
+   *)
+    ;;
+esac
+
+if test "$GCC" != "yes" && test "$ICC" != "yes"
+then
+  { { echo "$as_me:$LINENO: error: gcc|icc required but not found" >&5
+echo "$as_me: error: gcc|icc required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+if test "$GXX" != "yes" && test "$IXX" != "yes"
+then
+  { { echo "$as_me:$LINENO: error: g++|clang++|icc required but not found" >&5
+echo "$as_me: error: g++|clang++|icc required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+if test "$GCC" = "yes"
+then
+  cat >conftest.$ac_ext <<_ACEOF
+#if !defined(__GNUC__) || __GNUC__ < 3
+#error Unsupported GCC version
+#endif
+
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	{ { echo "$as_me:$LINENO: error: gcc 3.x required, but you have a lower version" >&5
+echo "$as_me: error: gcc 3.x required, but you have a lower version" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+if test -z "$llvm_cv_gnu_make_command"
+then
+  { { echo "$as_me:$LINENO: error: GNU Make required but not found" >&5
+echo "$as_me: error: GNU Make required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+{ echo "$as_me:$LINENO: result: ok" >&5
+echo "${ECHO_T}ok" >&6; }
+
+{ echo "$as_me:$LINENO: checking optional compiler flags" >&5
+echo $ECHO_N "checking optional compiler flags... $ECHO_C" >&6; }
+NO_VARIADIC_MACROS=`$CXX -Werror -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros`
+
+NO_MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers`
+
+COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default`
+
+{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT" >&5
+echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT" >&6; }
+
+
+
+{ echo "$as_me:$LINENO: checking for sin in -lm" >&5
+echo $ECHO_N "checking for sin in -lm... $ECHO_C" >&6; }
+if test "${ac_cv_lib_m_sin+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lm  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char sin ();
+int
+main ()
+{
+return sin ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_m_sin=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_m_sin=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_m_sin" >&5
+echo "${ECHO_T}$ac_cv_lib_m_sin" >&6; }
+if test $ac_cv_lib_m_sin = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBM 1
+_ACEOF
+
+  LIBS="-lm $LIBS"
+
+fi
+
+if test "$llvm_cv_os_type" = "MingW" ; then
+
+{ echo "$as_me:$LINENO: checking for main in -limagehlp" >&5
+echo $ECHO_N "checking for main in -limagehlp... $ECHO_C" >&6; }
+if test "${ac_cv_lib_imagehlp_main+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-limagehlp  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+return main ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_imagehlp_main=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_imagehlp_main=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_imagehlp_main" >&5
+echo "${ECHO_T}$ac_cv_lib_imagehlp_main" >&6; }
+if test $ac_cv_lib_imagehlp_main = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBIMAGEHLP 1
+_ACEOF
+
+  LIBS="-limagehlp $LIBS"
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking for main in -lpsapi" >&5
+echo $ECHO_N "checking for main in -lpsapi... $ECHO_C" >&6; }
+if test "${ac_cv_lib_psapi_main+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lpsapi  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+return main ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_psapi_main=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_psapi_main=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_psapi_main" >&5
+echo "${ECHO_T}$ac_cv_lib_psapi_main" >&6; }
+if test $ac_cv_lib_psapi_main = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBPSAPI 1
+_ACEOF
+
+  LIBS="-lpsapi $LIBS"
+
+fi
+
+fi
+
+{ echo "$as_me:$LINENO: checking for library containing dlopen" >&5
+echo $ECHO_N "checking for library containing dlopen... $ECHO_C" >&6; }
+if test "${ac_cv_search_dlopen+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' dl; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_dlopen=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_dlopen+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_dlopen+set}" = set; then
+  :
+else
+  ac_cv_search_dlopen=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_dlopen" >&5
+echo "${ECHO_T}$ac_cv_search_dlopen" >&6; }
+ac_res=$ac_cv_search_dlopen
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_DLOPEN 1
+_ACEOF
+
+else
+  { echo "$as_me:$LINENO: WARNING: dlopen() not found - disabling plugin support" >&5
+echo "$as_me: WARNING: dlopen() not found - disabling plugin support" >&2;}
+fi
+
+
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  { echo "$as_me:$LINENO: checking for library containing ffi_call" >&5
+echo $ECHO_N "checking for library containing ffi_call... $ECHO_C" >&6; }
+if test "${ac_cv_search_ffi_call+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ffi_call ();
+int
+main ()
+{
+return ffi_call ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' ffi; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_ffi_call=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_ffi_call+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_ffi_call+set}" = set; then
+  :
+else
+  ac_cv_search_ffi_call=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_ffi_call" >&5
+echo "${ECHO_T}$ac_cv_search_ffi_call" >&6; }
+ac_res=$ac_cv_search_ffi_call
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_FFI_CALL 1
+_ACEOF
+
+else
+  { { echo "$as_me:$LINENO: error: libffi not found - configure without --enable-libffi to compile without it" >&5
+echo "$as_me: error: libffi not found - configure without --enable-libffi to compile without it" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+
+{ echo "$as_me:$LINENO: checking for library containing mallinfo" >&5
+echo $ECHO_N "checking for library containing mallinfo... $ECHO_C" >&6; }
+if test "${ac_cv_search_mallinfo+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char mallinfo ();
+int
+main ()
+{
+return mallinfo ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' malloc; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_mallinfo=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_mallinfo+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_mallinfo+set}" = set; then
+  :
+else
+  ac_cv_search_mallinfo=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_mallinfo" >&5
+echo "${ECHO_T}$ac_cv_search_mallinfo" >&6; }
+ac_res=$ac_cv_search_mallinfo
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_MALLINFO 1
+_ACEOF
+
+fi
+
+
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+
+{ echo "$as_me:$LINENO: checking for pthread_mutex_init in -lpthread" >&5
+echo $ECHO_N "checking for pthread_mutex_init in -lpthread... $ECHO_C" >&6; }
+if test "${ac_cv_lib_pthread_pthread_mutex_init+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lpthread  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_mutex_init ();
+int
+main ()
+{
+return pthread_mutex_init ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_pthread_pthread_mutex_init=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_pthread_pthread_mutex_init=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_pthread_pthread_mutex_init" >&5
+echo "${ECHO_T}$ac_cv_lib_pthread_pthread_mutex_init" >&6; }
+if test $ac_cv_lib_pthread_pthread_mutex_init = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBPTHREAD 1
+_ACEOF
+
+  LIBS="-lpthread $LIBS"
+
+fi
+
+  { echo "$as_me:$LINENO: checking for library containing pthread_mutex_lock" >&5
+echo $ECHO_N "checking for library containing pthread_mutex_lock... $ECHO_C" >&6; }
+if test "${ac_cv_search_pthread_mutex_lock+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_mutex_lock ();
+int
+main ()
+{
+return pthread_mutex_lock ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' pthread; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_pthread_mutex_lock=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_pthread_mutex_lock+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_pthread_mutex_lock+set}" = set; then
+  :
+else
+  ac_cv_search_pthread_mutex_lock=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_pthread_mutex_lock" >&5
+echo "${ECHO_T}$ac_cv_search_pthread_mutex_lock" >&6; }
+ac_res=$ac_cv_search_pthread_mutex_lock
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTHREAD_MUTEX_LOCK 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for library containing pthread_rwlock_init" >&5
+echo $ECHO_N "checking for library containing pthread_rwlock_init... $ECHO_C" >&6; }
+if test "${ac_cv_search_pthread_rwlock_init+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_rwlock_init ();
+int
+main ()
+{
+return pthread_rwlock_init ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' pthread; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_pthread_rwlock_init=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_pthread_rwlock_init+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_pthread_rwlock_init+set}" = set; then
+  :
+else
+  ac_cv_search_pthread_rwlock_init=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_pthread_rwlock_init" >&5
+echo "${ECHO_T}$ac_cv_search_pthread_rwlock_init" >&6; }
+ac_res=$ac_cv_search_pthread_rwlock_init
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTHREAD_RWLOCK_INIT 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for library containing pthread_getspecific" >&5
+echo $ECHO_N "checking for library containing pthread_getspecific... $ECHO_C" >&6; }
+if test "${ac_cv_search_pthread_getspecific+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_getspecific ();
+int
+main ()
+{
+return pthread_getspecific ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' pthread; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_pthread_getspecific=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_pthread_getspecific+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_pthread_getspecific+set}" = set; then
+  :
+else
+  ac_cv_search_pthread_getspecific=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_pthread_getspecific" >&5
+echo "${ECHO_T}$ac_cv_search_pthread_getspecific" >&6; }
+ac_res=$ac_cv_search_pthread_getspecific
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTHREAD_GETSPECIFIC 1
+_ACEOF
+
+fi
+
+fi
+
+
+# Check whether --with-udis86 was given.
+if test "${with_udis86+set}" = set; then
+  withval=$with_udis86;
+      USE_UDIS86=1
+
+      case "$withval" in
+        /usr/lib|yes) ;;
+        *) LDFLAGS="$LDFLAGS -L${withval}" ;;
+      esac
+
+{ echo "$as_me:$LINENO: checking for ud_init in -ludis86" >&5
+echo $ECHO_N "checking for ud_init in -ludis86... $ECHO_C" >&6; }
+if test "${ac_cv_lib_udis86_ud_init+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ludis86  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ud_init ();
+int
+main ()
+{
+return ud_init ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_udis86_ud_init=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_udis86_ud_init=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_udis86_ud_init" >&5
+echo "${ECHO_T}$ac_cv_lib_udis86_ud_init" >&6; }
+if test $ac_cv_lib_udis86_ud_init = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBUDIS86 1
+_ACEOF
+
+  LIBS="-ludis86 $LIBS"
+
+else
+
+        echo "Error! You need to have libudis86 around."
+        exit -1
+
+fi
+
+
+else
+  USE_UDIS86=0
+
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define USE_UDIS86 $USE_UDIS86
+_ACEOF
+
+
+
+# Check whether --with-oprofile was given.
+if test "${with_oprofile+set}" = set; then
+  withval=$with_oprofile;
+      USE_OPROFILE=1
+
+      case "$withval" in
+        /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;;
+        no) llvm_cv_oppath=
+            USE_OPROFILE=0
+ ;;
+        *) llvm_cv_oppath="${withval}/lib/oprofile"
+           CPPFLAGS="-I${withval}/include";;
+      esac
+      if test -n "$llvm_cv_oppath" ; then
+        LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
+                                        { echo "$as_me:$LINENO: checking for library containing bfd_init" >&5
+echo $ECHO_N "checking for library containing bfd_init... $ECHO_C" >&6; }
+if test "${ac_cv_search_bfd_init+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char bfd_init ();
+int
+main ()
+{
+return bfd_init ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' bfd; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_bfd_init=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_bfd_init+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_bfd_init+set}" = set; then
+  :
+else
+  ac_cv_search_bfd_init=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_bfd_init" >&5
+echo "${ECHO_T}$ac_cv_search_bfd_init" >&6; }
+ac_res=$ac_cv_search_bfd_init
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+        { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5
+echo $ECHO_N "checking for library containing op_open_agent... $ECHO_C" >&6; }
+if test "${ac_cv_search_op_open_agent+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char op_open_agent ();
+int
+main ()
+{
+return op_open_agent ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' opagent; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_op_open_agent=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_op_open_agent+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_op_open_agent+set}" = set; then
+  :
+else
+  ac_cv_search_op_open_agent=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_op_open_agent" >&5
+echo "${ECHO_T}$ac_cv_search_op_open_agent" >&6; }
+ac_res=$ac_cv_search_op_open_agent
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+
+          echo "Error! You need to have libopagent around."
+          exit -1
+
+fi
+
+        if test "${ac_cv_header_opagent_h+set}" = set; then
+  { echo "$as_me:$LINENO: checking for opagent.h" >&5
+echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_opagent_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5
+echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking opagent.h usability" >&5
+echo $ECHO_N "checking opagent.h usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <opagent.h>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking opagent.h presence" >&5
+echo $ECHO_N "checking opagent.h presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <opagent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: opagent.h: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: opagent.h: present but cannot be compiled" >&5
+echo "$as_me: WARNING: opagent.h: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: opagent.h:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: opagent.h: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: opagent.h:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: opagent.h: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: opagent.h: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for opagent.h" >&5
+echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_opagent_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_cv_header_opagent_h=$ac_header_preproc
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5
+echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; }
+
+fi
+if test $ac_cv_header_opagent_h = yes; then
+  :
+else
+
+          echo "Error! You need to have opagent.h around."
+          exit -1
+
+fi
+
+
+      fi
+
+else
+
+      USE_OPROFILE=0
+
+
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define USE_OPROFILE $USE_OPROFILE
+_ACEOF
+
+
+
+
+
+
+
+
+ac_header_dirent=no
+for ac_hdr in dirent.h sys/ndir.h sys/dir.h ndir.h; do
+  as_ac_Header=`echo "ac_cv_header_dirent_$ac_hdr" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_hdr that defines DIR" >&5
+echo $ECHO_N "checking for $ac_hdr that defines DIR... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <$ac_hdr>
+
+int
+main ()
+{
+if ((DIR *) 0)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_Header=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_hdr" | $as_tr_cpp` 1
+_ACEOF
+
+ac_header_dirent=$ac_hdr; break
+fi
+
+done
+# Two versions of opendir et al. are in -ldir and -lx on SCO Xenix.
+if test $ac_header_dirent = dirent.h; then
+  { echo "$as_me:$LINENO: checking for library containing opendir" >&5
+echo $ECHO_N "checking for library containing opendir... $ECHO_C" >&6; }
+if test "${ac_cv_search_opendir+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char opendir ();
+int
+main ()
+{
+return opendir ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' dir; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_opendir=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_opendir+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_opendir+set}" = set; then
+  :
+else
+  ac_cv_search_opendir=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_opendir" >&5
+echo "${ECHO_T}$ac_cv_search_opendir" >&6; }
+ac_res=$ac_cv_search_opendir
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+else
+  { echo "$as_me:$LINENO: checking for library containing opendir" >&5
+echo $ECHO_N "checking for library containing opendir... $ECHO_C" >&6; }
+if test "${ac_cv_search_opendir+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char opendir ();
+int
+main ()
+{
+return opendir ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' x; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_opendir=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_opendir+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_opendir+set}" = set; then
+  :
+else
+  ac_cv_search_opendir=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_opendir" >&5
+echo "${ECHO_T}$ac_cv_search_opendir" >&6; }
+ac_res=$ac_cv_search_opendir
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+fi
+
+{ echo "$as_me:$LINENO: checking for MAP_ANONYMOUS vs. MAP_ANON" >&5
+echo $ECHO_N "checking for MAP_ANONYMOUS vs. MAP_ANON... $ECHO_C" >&6; }
+if test "${ac_cv_header_mmap_anon+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+int
+main ()
+{
+mmap (0, 1, PROT_READ, MAP_ANONYMOUS, -1, 0); return (0);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_header_mmap_anon=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_header_mmap_anon=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_mmap_anon" >&5
+echo "${ECHO_T}$ac_cv_header_mmap_anon" >&6; }
+if test "$ac_cv_header_mmap_anon" = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_MMAP_ANONYMOUS 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking whether stat file-mode macros are broken" >&5
+echo $ECHO_N "checking whether stat file-mode macros are broken... $ECHO_C" >&6; }
+if test "${ac_cv_header_stat_broken+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if defined S_ISBLK && defined S_IFDIR
+# if S_ISBLK (S_IFDIR)
+You lose.
+# endif
+#endif
+
+#if defined S_ISBLK && defined S_IFCHR
+# if S_ISBLK (S_IFCHR)
+You lose.
+# endif
+#endif
+
+#if defined S_ISLNK && defined S_IFREG
+# if S_ISLNK (S_IFREG)
+You lose.
+# endif
+#endif
+
+#if defined S_ISSOCK && defined S_IFREG
+# if S_ISSOCK (S_IFREG)
+You lose.
+# endif
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "You lose" >/dev/null 2>&1; then
+  ac_cv_header_stat_broken=yes
+else
+  ac_cv_header_stat_broken=no
+fi
+rm -f conftest*
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_stat_broken" >&5
+echo "${ECHO_T}$ac_cv_header_stat_broken" >&6; }
+if test $ac_cv_header_stat_broken = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STAT_MACROS_BROKEN 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for sys/wait.h that is POSIX.1 compatible" >&5
+echo $ECHO_N "checking for sys/wait.h that is POSIX.1 compatible... $ECHO_C" >&6; }
+if test "${ac_cv_header_sys_wait_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/wait.h>
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned int) (stat_val) >> 8)
+#endif
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+int
+main ()
+{
+  int s;
+  wait (&s);
+  s = WIFEXITED (s) ? WEXITSTATUS (s) : 1;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_header_sys_wait_h=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_header_sys_wait_h=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_sys_wait_h" >&5
+echo "${ECHO_T}$ac_cv_header_sys_wait_h" >&6; }
+if test $ac_cv_header_sys_wait_h = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_SYS_WAIT_H 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking whether time.h and sys/time.h may both be included" >&5
+echo $ECHO_N "checking whether time.h and sys/time.h may both be included... $ECHO_C" >&6; }
+if test "${ac_cv_header_time+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+
+int
+main ()
+{
+if ((struct tm *) 0)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_header_time=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_header_time=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_time" >&5
+echo "${ECHO_T}$ac_cv_header_time" >&6; }
+if test $ac_cv_header_time = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define TIME_WITH_SYS_TIME 1
+_ACEOF
+
+fi
+
+
+
+
+
+
+
+
+for ac_header in dlfcn.h execinfo.h fcntl.h inttypes.h limits.h link.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+
+
+for ac_header in malloc.h setjmp.h signal.h stdint.h termios.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+for ac_header in utime.h windows.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+
+for ac_header in sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+for ac_header in sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in valgrind/valgrind.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in fenv.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+
+for ac_header in pthread.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+ HAVE_PTHREAD=1
+
+else
+  HAVE_PTHREAD=0
+
+fi
+
+done
+
+else
+  HAVE_PTHREAD=0
+
+fi
+
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+
+
+for ac_header in ffi.h ffi/ffi.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+fi
+
+
+for ac_header in CrashReporterClient.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+{ echo "$as_me:$LINENO: checking __crashreporter_info__" >&5
+echo $ECHO_N "checking __crashreporter_info__... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+extern const char *__crashreporter_info__;
+      int main() {
+        __crashreporter_info__ = "test";
+        return 0;
+      }
+
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_CRASHREPORTER_INFO 1
+_ACEOF
+
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	{ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_CRASHREPORTER_INFO 0
+_ACEOF
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+
+
+
+
+  { echo "$as_me:$LINENO: checking for HUGE_VAL sanity" >&5
+echo $ECHO_N "checking for HUGE_VAL sanity... $ECHO_C" >&6; }
+if test "${ac_cv_huge_val_sanity+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+    ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+    ac_save_CXXFLAGS=$CXXFLAGS
+    CXXFLAGS+=" -pedantic"
+    if test "$cross_compiling" = yes; then
+  ac_cv_huge_val_sanity=yes
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <math.h>
+int
+main ()
+{
+double x = HUGE_VAL; return x != x;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_huge_val_sanity=yes
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_huge_val_sanity=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+    CXXFLAGS=$ac_save_CXXFLAGS
+    ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_huge_val_sanity" >&5
+echo "${ECHO_T}$ac_cv_huge_val_sanity" >&6; }
+  HUGE_VAL_SANITY=$ac_cv_huge_val_sanity
+
+
+{ echo "$as_me:$LINENO: checking for pid_t" >&5
+echo $ECHO_N "checking for pid_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_pid_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef pid_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_pid_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_pid_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_pid_t" >&5
+echo "${ECHO_T}$ac_cv_type_pid_t" >&6; }
+if test $ac_cv_type_pid_t = yes; then
+  :
+else
+
+cat >>confdefs.h <<_ACEOF
+#define pid_t int
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for size_t" >&5
+echo $ECHO_N "checking for size_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_size_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef size_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_size_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_size_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_size_t" >&5
+echo "${ECHO_T}$ac_cv_type_size_t" >&6; }
+if test $ac_cv_type_size_t = yes; then
+  :
+else
+
+cat >>confdefs.h <<_ACEOF
+#define size_t unsigned int
+_ACEOF
+
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define RETSIGTYPE void
+_ACEOF
+
+{ echo "$as_me:$LINENO: checking whether struct tm is in sys/time.h or time.h" >&5
+echo $ECHO_N "checking whether struct tm is in sys/time.h or time.h... $ECHO_C" >&6; }
+if test "${ac_cv_struct_tm+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <time.h>
+
+int
+main ()
+{
+struct tm *tp; tp->tm_sec;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_struct_tm=time.h
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_struct_tm=sys/time.h
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_struct_tm" >&5
+echo "${ECHO_T}$ac_cv_struct_tm" >&6; }
+if test $ac_cv_struct_tm = sys/time.h; then
+
+cat >>confdefs.h <<\_ACEOF
+#define TM_IN_SYS_TIME 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for int64_t" >&5
+echo $ECHO_N "checking for int64_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_int64_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef int64_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_int64_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_int64_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_int64_t" >&5
+echo "${ECHO_T}$ac_cv_type_int64_t" >&6; }
+if test $ac_cv_type_int64_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_INT64_T 1
+_ACEOF
+
+
+else
+  { { echo "$as_me:$LINENO: error: Type int64_t required but not found" >&5
+echo "$as_me: error: Type int64_t required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+{ echo "$as_me:$LINENO: checking for uint64_t" >&5
+echo $ECHO_N "checking for uint64_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_uint64_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef uint64_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_uint64_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_uint64_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_uint64_t" >&5
+echo "${ECHO_T}$ac_cv_type_uint64_t" >&6; }
+if test $ac_cv_type_uint64_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_UINT64_T 1
+_ACEOF
+
+
+else
+  { echo "$as_me:$LINENO: checking for u_int64_t" >&5
+echo $ECHO_N "checking for u_int64_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_u_int64_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef u_int64_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_u_int64_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_u_int64_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_u_int64_t" >&5
+echo "${ECHO_T}$ac_cv_type_u_int64_t" >&6; }
+if test $ac_cv_type_u_int64_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_U_INT64_T 1
+_ACEOF
+
+
+else
+  { { echo "$as_me:$LINENO: error: Type uint64_t or u_int64_t required but not found" >&5
+echo "$as_me: error: Type uint64_t or u_int64_t required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+
+
+
+
+
+
+
+
+
+
+for ac_func in backtrace ceilf floorf roundf rintf nearbyintf getcwd
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+for ac_func in powf fmodf strtof round
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+for ac_func in getpagesize getrusage getrlimit setrlimit gettimeofday
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+for ac_func in isatty mkdtemp mkstemp
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+
+for ac_func in mktemp posix_spawn realpath sbrk setrlimit strdup
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+for ac_func in strerror strerror_r setenv
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+for ac_func in strtoll strtoq sysconf malloc_zone_statistics
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+for ac_func in setjmp longjmp sigsetjmp siglongjmp writev
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+{ echo "$as_me:$LINENO: checking if printf has the %a format character" >&5
+echo $ECHO_N "checking if printf has the %a format character... $ECHO_C" >&6; }
+if test "${llvm_cv_c_printf_a+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ if test "$cross_compiling" = yes; then
+  llvmac_cv_c_printf_a=no
+else
+  cat >conftest.$ac_ext <<_ACEOF
+
+  /* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+
+volatile double A, B;
+char Buffer[100];
+A = 1;
+A /= 10.0;
+sprintf(Buffer, "%a", A);
+B = atof(Buffer);
+if (A != B)
+  return (1);
+if (A != 0x1.999999999999ap-4)
+  return (1);
+return (0);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_c_printf_a=yes
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+llvmac_cv_c_printf_a=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+ ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_c_printf_a" >&5
+echo "${ECHO_T}$llvm_cv_c_printf_a" >&6; }
+ if test "$llvm_cv_c_printf_a" = "yes"; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PRINTF_A 1
+_ACEOF
+
+ fi
+
+
+{ echo "$as_me:$LINENO: checking for srand48/lrand48/drand48 in <stdlib.h>" >&5
+echo $ECHO_N "checking for srand48/lrand48/drand48 in <stdlib.h>... $ECHO_C" >&6; }
+if test "${ac_cv_func_rand48+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdlib.h>
+int
+main ()
+{
+srand48(0);lrand48();drand48();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_rand48=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_rand48=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_rand48" >&5
+echo "${ECHO_T}$ac_cv_func_rand48" >&6; }
+
+if test "$ac_cv_func_rand48" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_RAND48 1
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking whether strerror_s is declared" >&5
+echo $ECHO_N "checking whether strerror_s is declared... $ECHO_C" >&6; }
+if test "${ac_cv_have_decl_strerror_s+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+#ifndef strerror_s
+  char *p = (char *) strerror_s;
+  return !p;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_have_decl_strerror_s=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_have_decl_strerror_s=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_strerror_s" >&5
+echo "${ECHO_T}$ac_cv_have_decl_strerror_s" >&6; }
+if test $ac_cv_have_decl_strerror_s = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_STRERROR_S 1
+_ACEOF
+
+
+else
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_STRERROR_S 0
+_ACEOF
+
+
+fi
+
+
+
+if test "$llvm_cv_os_type" = "MingW" ; then
+  { echo "$as_me:$LINENO: checking for _alloca in -lgcc" >&5
+echo $ECHO_N "checking for _alloca in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc__alloca+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char _alloca ();
+int
+main ()
+{
+return _alloca ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc__alloca=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc__alloca=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc__alloca" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc__alloca" >&6; }
+if test $ac_cv_lib_gcc__alloca = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE__ALLOCA 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __alloca in -lgcc" >&5
+echo $ECHO_N "checking for __alloca in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___alloca+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __alloca ();
+int
+main ()
+{
+return __alloca ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___alloca=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___alloca=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___alloca" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___alloca" >&6; }
+if test $ac_cv_lib_gcc___alloca = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___ALLOCA 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __chkstk in -lgcc" >&5
+echo $ECHO_N "checking for __chkstk in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___chkstk+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __chkstk ();
+int
+main ()
+{
+return __chkstk ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___chkstk=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___chkstk=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___chkstk" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___chkstk" >&6; }
+if test $ac_cv_lib_gcc___chkstk = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___CHKSTK 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for ___chkstk in -lgcc" >&5
+echo $ECHO_N "checking for ___chkstk in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc____chkstk+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ___chkstk ();
+int
+main ()
+{
+return ___chkstk ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc____chkstk=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc____chkstk=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc____chkstk" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc____chkstk" >&6; }
+if test $ac_cv_lib_gcc____chkstk = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE____CHKSTK 1
+_ACEOF
+
+fi
+
+
+  { echo "$as_me:$LINENO: checking for __ashldi3 in -lgcc" >&5
+echo $ECHO_N "checking for __ashldi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___ashldi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __ashldi3 ();
+int
+main ()
+{
+return __ashldi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___ashldi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___ashldi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___ashldi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___ashldi3" >&6; }
+if test $ac_cv_lib_gcc___ashldi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___ASHLDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __ashrdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __ashrdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___ashrdi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __ashrdi3 ();
+int
+main ()
+{
+return __ashrdi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___ashrdi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___ashrdi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___ashrdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___ashrdi3" >&6; }
+if test $ac_cv_lib_gcc___ashrdi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___ASHRDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __divdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __divdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___divdi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __divdi3 ();
+int
+main ()
+{
+return __divdi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___divdi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___divdi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___divdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___divdi3" >&6; }
+if test $ac_cv_lib_gcc___divdi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___DIVDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __fixdfdi in -lgcc" >&5
+echo $ECHO_N "checking for __fixdfdi in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___fixdfdi+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __fixdfdi ();
+int
+main ()
+{
+return __fixdfdi ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___fixdfdi=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___fixdfdi=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___fixdfdi" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___fixdfdi" >&6; }
+if test $ac_cv_lib_gcc___fixdfdi = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___FIXDFDI 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __fixsfdi in -lgcc" >&5
+echo $ECHO_N "checking for __fixsfdi in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___fixsfdi+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __fixsfdi ();
+int
+main ()
+{
+return __fixsfdi ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___fixsfdi=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___fixsfdi=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___fixsfdi" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___fixsfdi" >&6; }
+if test $ac_cv_lib_gcc___fixsfdi = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___FIXSFDI 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __floatdidf in -lgcc" >&5
+echo $ECHO_N "checking for __floatdidf in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___floatdidf+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __floatdidf ();
+int
+main ()
+{
+return __floatdidf ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___floatdidf=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___floatdidf=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___floatdidf" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___floatdidf" >&6; }
+if test $ac_cv_lib_gcc___floatdidf = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___FLOATDIDF 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __lshrdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __lshrdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___lshrdi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __lshrdi3 ();
+int
+main ()
+{
+return __lshrdi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___lshrdi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___lshrdi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___lshrdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___lshrdi3" >&6; }
+if test $ac_cv_lib_gcc___lshrdi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___LSHRDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __moddi3 in -lgcc" >&5
+echo $ECHO_N "checking for __moddi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___moddi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __moddi3 ();
+int
+main ()
+{
+return __moddi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___moddi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___moddi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___moddi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___moddi3" >&6; }
+if test $ac_cv_lib_gcc___moddi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___MODDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __udivdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __udivdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___udivdi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __udivdi3 ();
+int
+main ()
+{
+return __udivdi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___udivdi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___udivdi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___udivdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___udivdi3" >&6; }
+if test $ac_cv_lib_gcc___udivdi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___UDIVDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __umoddi3 in -lgcc" >&5
+echo $ECHO_N "checking for __umoddi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___umoddi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __umoddi3 ();
+int
+main ()
+{
+return __umoddi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___umoddi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___umoddi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___umoddi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___umoddi3" >&6; }
+if test $ac_cv_lib_gcc___umoddi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___UMODDI3 1
+_ACEOF
+
+fi
+
+
+  { echo "$as_me:$LINENO: checking for __main in -lgcc" >&5
+echo $ECHO_N "checking for __main in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___main+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __main ();
+int
+main ()
+{
+return __main ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___main=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___main=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___main" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___main" >&6; }
+if test $ac_cv_lib_gcc___main = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___MAIN 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __cmpdi2 in -lgcc" >&5
+echo $ECHO_N "checking for __cmpdi2 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___cmpdi2+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __cmpdi2 ();
+int
+main ()
+{
+return __cmpdi2 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___cmpdi2=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___cmpdi2=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___cmpdi2" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___cmpdi2" >&6; }
+if test $ac_cv_lib_gcc___cmpdi2 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___CMPDI2 1
+_ACEOF
+
+fi
+
+fi
+
+if test "$llvm_cv_os_type" = "MingW" ; then
+  { echo "$as_me:$LINENO: checking whether EnumerateLoadedModules() accepts new decl" >&5
+echo $ECHO_N "checking whether EnumerateLoadedModules() accepts new decl... $ECHO_C" >&6; }
+  cat >conftest.$ac_ext <<_ACEOF
+#include <windows.h>
+#include <imagehlp.h>
+extern void foo(PENUMLOADED_MODULES_CALLBACK);
+extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+
+  { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+  llvm_cv_win32_elmcb_pcstr="PCSTR"
+
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+  llvm_cv_win32_elmcb_pcstr="PSTR"
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+cat >>confdefs.h <<_ACEOF
+#define WIN32_ELMCB_PCSTR $llvm_cv_win32_elmcb_pcstr
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking for isnan in <math.h>" >&5
+echo $ECHO_N "checking for isnan in <math.h>... $ECHO_C" >&6; }
+if test "${ac_cv_func_isnan_in_math_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <math.h>
+int
+main ()
+{
+float f; isnan(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_isnan_in_math_h=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_isnan_in_math_h=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_isnan_in_math_h" >&5
+echo "${ECHO_T}$ac_cv_func_isnan_in_math_h" >&6; }
+
+
+if test "$ac_cv_func_isnan_in_math_h" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ISNAN_IN_MATH_H 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for isnan in <cmath>" >&5
+echo $ECHO_N "checking for isnan in <cmath>... $ECHO_C" >&6; }
+if test "${ac_cv_func_isnan_in_cmath+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <cmath>
+int
+main ()
+{
+float f; isnan(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_isnan_in_cmath=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_isnan_in_cmath=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_isnan_in_cmath" >&5
+echo "${ECHO_T}$ac_cv_func_isnan_in_cmath" >&6; }
+
+if test "$ac_cv_func_isnan_in_cmath" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ISNAN_IN_CMATH 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for std::isnan in <cmath>" >&5
+echo $ECHO_N "checking for std::isnan in <cmath>... $ECHO_C" >&6; }
+if test "${ac_cv_func_std_isnan_in_cmath+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <cmath>
+int
+main ()
+{
+float f; std::isnan(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_std_isnan_in_cmath=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_std_isnan_in_cmath=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_std_isnan_in_cmath" >&5
+echo "${ECHO_T}$ac_cv_func_std_isnan_in_cmath" >&6; }
+
+if test "$ac_cv_func_std_isnan_in_cmath" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_STD_ISNAN_IN_CMATH 1
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking for isinf in <math.h>" >&5
+echo $ECHO_N "checking for isinf in <math.h>... $ECHO_C" >&6; }
+if test "${ac_cv_func_isinf_in_math_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <math.h>
+int
+main ()
+{
+float f; isinf(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_isinf_in_math_h=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_isinf_in_math_h=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_isinf_in_math_h" >&5
+echo "${ECHO_T}$ac_cv_func_isinf_in_math_h" >&6; }
+
+if test "$ac_cv_func_isinf_in_math_h" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ISINF_IN_MATH_H 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for isinf in <cmath>" >&5
+echo $ECHO_N "checking for isinf in <cmath>... $ECHO_C" >&6; }
+if test "${ac_cv_func_isinf_in_cmath+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <cmath>
+int
+main ()
+{
+float f; isinf(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_isinf_in_cmath=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_isinf_in_cmath=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_isinf_in_cmath" >&5
+echo "${ECHO_T}$ac_cv_func_isinf_in_cmath" >&6; }
+
+if test "$ac_cv_func_isinf_in_cmath" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ISINF_IN_CMATH 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for std::isinf in <cmath>" >&5
+echo $ECHO_N "checking for std::isinf in <cmath>... $ECHO_C" >&6; }
+if test "${ac_cv_func_std_isinf_in_cmath+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <cmath>
+int
+main ()
+{
+float f; std::isinf(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_std_isinf_in_cmath=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_std_isinf_in_cmath=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_std_isinf_in_cmath" >&5
+echo "${ECHO_T}$ac_cv_func_std_isinf_in_cmath" >&6; }
+
+if test "$ac_cv_func_std_isinf_in_cmath" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_STD_ISINF_IN_CMATH 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for finite in <ieeefp.h>" >&5
+echo $ECHO_N "checking for finite in <ieeefp.h>... $ECHO_C" >&6; }
+if test "${ac_cv_func_finite_in_ieeefp_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ieeefp.h>
+int
+main ()
+{
+float f; finite(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_finite_in_ieeefp_h=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_finite_in_ieeefp_h=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_finite_in_ieeefp_h" >&5
+echo "${ECHO_T}$ac_cv_func_finite_in_ieeefp_h" >&6; }
+
+if test "$ac_cv_func_finite_in_ieeefp_h" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_FINITE_IN_IEEEFP_H 1
+_ACEOF
+
+fi
+
+
+
+if test "$llvm_cv_platform_type" = "Unix" ; then
+
+
+for ac_header in stdlib.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ------------------------------ ##
+## Report this to bugs@yourdomain ##
+## ------------------------------ ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_func in getpagesize
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+{ echo "$as_me:$LINENO: checking for working mmap" >&5
+echo $ECHO_N "checking for working mmap... $ECHO_C" >&6; }
+if test "${ac_cv_func_mmap_fixed_mapped+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test "$cross_compiling" = yes; then
+  ac_cv_func_mmap_fixed_mapped=no
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+/* malloc might have been renamed as rpl_malloc. */
+#undef malloc
+
+/* Thanks to Mike Haertel and Jim Avera for this test.
+   Here is a matrix of mmap possibilities:
+	mmap private not fixed
+	mmap private fixed at somewhere currently unmapped
+	mmap private fixed at somewhere already mapped
+	mmap shared not fixed
+	mmap shared fixed at somewhere currently unmapped
+	mmap shared fixed at somewhere already mapped
+   For private mappings, we should verify that changes cannot be read()
+   back from the file, nor mmap's back from the file at a different
+   address.  (There have been systems where private was not correctly
+   implemented like the infamous i386 svr4.0, and systems where the
+   VM page cache was not coherent with the file system buffer cache
+   like early versions of FreeBSD and possibly contemporary NetBSD.)
+   For shared mappings, we should conversely verify that changes get
+   propagated back to all the places they're supposed to be.
+
+   Grep wants private fixed already mapped.
+   The main things grep needs to know about mmap are:
+   * does it exist and is it safe to write into the mmap'd area
+   * how to use it (BSD variants)  */
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !STDC_HEADERS && !HAVE_STDLIB_H
+char *malloc ();
+#endif
+
+/* This mess was copied from the GNU getpagesize.h.  */
+#if !HAVE_GETPAGESIZE
+/* Assume that all systems that can run configure have sys/param.h.  */
+# if !HAVE_SYS_PARAM_H
+#  define HAVE_SYS_PARAM_H 1
+# endif
+
+# ifdef _SC_PAGESIZE
+#  define getpagesize() sysconf(_SC_PAGESIZE)
+# else /* no _SC_PAGESIZE */
+#  if HAVE_SYS_PARAM_H
+#   include <sys/param.h>
+#   ifdef EXEC_PAGESIZE
+#    define getpagesize() EXEC_PAGESIZE
+#   else /* no EXEC_PAGESIZE */
+#    ifdef NBPG
+#     define getpagesize() NBPG * CLSIZE
+#     ifndef CLSIZE
+#      define CLSIZE 1
+#     endif /* no CLSIZE */
+#    else /* no NBPG */
+#     ifdef NBPC
+#      define getpagesize() NBPC
+#     else /* no NBPC */
+#      ifdef PAGESIZE
+#       define getpagesize() PAGESIZE
+#      endif /* PAGESIZE */
+#     endif /* no NBPC */
+#    endif /* no NBPG */
+#   endif /* no EXEC_PAGESIZE */
+#  else /* no HAVE_SYS_PARAM_H */
+#   define getpagesize() 8192	/* punt totally */
+#  endif /* no HAVE_SYS_PARAM_H */
+# endif /* no _SC_PAGESIZE */
+
+#endif /* no HAVE_GETPAGESIZE */
+
+int
+main ()
+{
+  char *data, *data2, *data3;
+  int i, pagesize;
+  int fd;
+
+  pagesize = getpagesize ();
+
+  /* First, make a file with some known garbage in it. */
+  data = (char *) malloc (pagesize);
+  if (!data)
+    return 1;
+  for (i = 0; i < pagesize; ++i)
+    *(data + i) = rand ();
+  umask (0);
+  fd = creat ("conftest.mmap", 0600);
+  if (fd < 0)
+    return 1;
+  if (write (fd, data, pagesize) != pagesize)
+    return 1;
+  close (fd);
+
+  /* Next, try to mmap the file at a fixed address which already has
+     something else allocated at it.  If we can, also make sure that
+     we see the same garbage.  */
+  fd = open ("conftest.mmap", O_RDWR);
+  if (fd < 0)
+    return 1;
+  data2 = (char *) malloc (2 * pagesize);
+  if (!data2)
+    return 1;
+  data2 += (pagesize - ((long int) data2 & (pagesize - 1))) & (pagesize - 1);
+  if (data2 != mmap (data2, pagesize, PROT_READ | PROT_WRITE,
+		     MAP_PRIVATE | MAP_FIXED, fd, 0L))
+    return 1;
+  for (i = 0; i < pagesize; ++i)
+    if (*(data + i) != *(data2 + i))
+      return 1;
+
+  /* Finally, make sure that changes to the mapped area do not
+     percolate back to the file as seen by read().  (This is a bug on
+     some variants of i386 svr4.0.)  */
+  for (i = 0; i < pagesize; ++i)
+    *(data2 + i) = *(data2 + i) + 1;
+  data3 = (char *) malloc (pagesize);
+  if (!data3)
+    return 1;
+  if (read (fd, data3, pagesize) != pagesize)
+    return 1;
+  for (i = 0; i < pagesize; ++i)
+    if (*(data + i) != *(data3 + i))
+      return 1;
+  close (fd);
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_mmap_fixed_mapped=yes
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_func_mmap_fixed_mapped=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_mmap_fixed_mapped" >&5
+echo "${ECHO_T}$ac_cv_func_mmap_fixed_mapped" >&6; }
+if test $ac_cv_func_mmap_fixed_mapped = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_MMAP 1
+_ACEOF
+
+fi
+rm -f conftest.mmap
+
+  { echo "$as_me:$LINENO: checking for mmap of files" >&5
+echo $ECHO_N "checking for mmap of files... $ECHO_C" >&6; }
+if test "${ac_cv_func_mmap_file+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  if test "$cross_compiling" = yes; then
+  ac_cv_func_mmap_file=no
+else
+  cat >conftest.$ac_ext <<_ACEOF
+
+    /* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+int
+main ()
+{
+
+  int fd;
+  fd = creat ("foo",0777);
+  fd = (int) mmap (0, 1, PROT_READ, MAP_SHARED, fd, 0);
+  unlink ("foo");
+  return (fd != (int) MAP_FAILED);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_mmap_file=yes
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_func_mmap_file=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_mmap_file" >&5
+echo "${ECHO_T}$ac_cv_func_mmap_file" >&6; }
+if test "$ac_cv_func_mmap_file" = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_MMAP_FILE
+_ACEOF
+
+   MMAP_FILE=yes
+
+fi
+
+  { echo "$as_me:$LINENO: checking if /dev/zero is needed for mmap" >&5
+echo $ECHO_N "checking if /dev/zero is needed for mmap... $ECHO_C" >&6; }
+if test "${ac_cv_need_dev_zero_for_mmap+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test "$llvm_cv_os_type" = "Interix" ; then
+   ac_cv_need_dev_zero_for_mmap=yes
+ else
+   ac_cv_need_dev_zero_for_mmap=no
+ fi
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_need_dev_zero_for_mmap" >&5
+echo "${ECHO_T}$ac_cv_need_dev_zero_for_mmap" >&6; }
+if test "$ac_cv_need_dev_zero_for_mmap" = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define NEED_DEV_ZERO_FOR_MMAP 1
+_ACEOF
+
+fi
+
+  if test "$ac_cv_func_mmap_fixed_mapped" = "no"
+  then
+    { echo "$as_me:$LINENO: WARNING: mmap() of a fixed address required but not supported" >&5
+echo "$as_me: WARNING: mmap() of a fixed address required but not supported" >&2;}
+  fi
+  if test "$ac_cv_func_mmap_file" = "no"
+  then
+    { echo "$as_me:$LINENO: WARNING: mmap() of files required but not found" >&5
+echo "$as_me: WARNING: mmap() of files required but not found" >&2;}
+  fi
+fi
+
+{ echo "$as_me:$LINENO: checking for GCC atomic builtins" >&5
+echo $ECHO_N "checking for GCC atomic builtins... $ECHO_C" >&6; }
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+int main() {
+        volatile unsigned long val = 1;
+        __sync_synchronize();
+        __sync_val_compare_and_swap(&val, 1, 0);
+        __sync_add_and_fetch(&val, 1);
+        __sync_sub_and_fetch(&val, 1);
+        return 0;
+      }
+
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define LLVM_HAS_ATOMICS 1
+_ACEOF
+
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	{ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define LLVM_HAS_ATOMICS 0
+_ACEOF
+
+  { echo "$as_me:$LINENO: WARNING: LLVM will be built thread-unsafe because atomic builtins are missing" >&5
+echo "$as_me: WARNING: LLVM will be built thread-unsafe because atomic builtins are missing" >&2;}
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+
+
+if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then
+  { echo "$as_me:$LINENO: checking for 32-bit userspace on 64-bit system" >&5
+echo $ECHO_N "checking for 32-bit userspace on 64-bit system... $ECHO_C" >&6; }
+if test "${llvm_cv_linux_mixed+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifndef __x86_64__
+       error: Not x86-64 even if uname says so!
+      #endif
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_linux_mixed=no
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_linux_mixed=yes
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_linux_mixed" >&5
+echo "${ECHO_T}$llvm_cv_linux_mixed" >&6; }
+
+  if test "$llvm_cv_linux_mixed" = "yes"; then
+    llvm_cv_target_arch="x86"
+    ARCH="x86"
+  fi
+fi
+
+
+for ac_func in __dso_handle
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+SHLIBEXT=$libltdl_cv_shlibext
+
+
+SHLIBPATH_VAR=$libltdl_cv_shlibpath_var
+
+
+# Translate the various configuration directories and other basic
+# information into substitutions that will end up in Makefile.config.in
+# that these configured values can be used by the makefiles
+if test "${prefix}" = "NONE" ; then
+  prefix="/usr/local"
+fi
+eval LLVM_PREFIX="${prefix}";
+eval LLVM_BINDIR="${prefix}/bin";
+eval LLVM_LIBDIR="${prefix}/lib";
+eval LLVM_DATADIR="${prefix}/share/llvm";
+eval LLVM_DOCSDIR="${prefix}/share/doc/llvm";
+eval LLVM_ETCDIR="${prefix}/etc/llvm";
+eval LLVM_INCLUDEDIR="${prefix}/include";
+eval LLVM_INFODIR="${prefix}/info";
+eval LLVM_MANDIR="${prefix}/man";
+LLVM_CONFIGTIME=`date`
+
+
+
+
+
+
+
+
+
+
+
+# Place the various directores into the config.h file as #defines so that we
+# can know about the installation paths within LLVM.
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PREFIX "$LLVM_PREFIX"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_BINDIR "$LLVM_BINDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_LIBDIR "$LLVM_LIBDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_DATADIR "$LLVM_DATADIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_DOCSDIR "$LLVM_DOCSDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_ETCDIR "$LLVM_ETCDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_INCLUDEDIR "$LLVM_INCLUDEDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_INFODIR "$LLVM_INFODIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_MANDIR "$LLVM_MANDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_CONFIGTIME "$LLVM_CONFIGTIME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_DEFAULT_TARGET_TRIPLE "$target"
+_ACEOF
+
+
+# Determine which bindings to build.
+if test "$BINDINGS_TO_BUILD" = auto ; then
+  BINDINGS_TO_BUILD=""
+  if test "x$OCAMLC" != x -a "x$OCAMLDEP" != x ; then
+    BINDINGS_TO_BUILD="ocaml $BINDINGS_TO_BUILD"
+  fi
+fi
+BINDINGS_TO_BUILD=$BINDINGS_TO_BUILD
+
+
+# This isn't really configurey, but it avoids having to repeat the list in
+# other files.
+ALL_BINDINGS=ocaml
+
+
+# Do any work necessary to ensure that bindings have what they need.
+binding_prereqs_failed=0
+for a_binding in $BINDINGS_TO_BUILD ; do
+  case "$a_binding" in
+  ocaml)
+    if test "x$OCAMLC" = x ; then
+      { echo "$as_me:$LINENO: WARNING: --enable-bindings=ocaml specified, but ocamlc not found. Try configure OCAMLC=/path/to/ocamlc" >&5
+echo "$as_me: WARNING: --enable-bindings=ocaml specified, but ocamlc not found. Try configure OCAMLC=/path/to/ocamlc" >&2;}
+      binding_prereqs_failed=1
+    fi
+    if test "x$OCAMLDEP" = x ; then
+      { echo "$as_me:$LINENO: WARNING: --enable-bindings=ocaml specified, but ocamldep not found. Try configure OCAMLDEP=/path/to/ocamldep" >&5
+echo "$as_me: WARNING: --enable-bindings=ocaml specified, but ocamldep not found. Try configure OCAMLDEP=/path/to/ocamldep" >&2;}
+      binding_prereqs_failed=1
+    fi
+    if test "x$OCAMLOPT" = x ; then
+      { echo "$as_me:$LINENO: WARNING: --enable-bindings=ocaml specified, but ocamlopt not found. Try configure OCAMLOPT=/path/to/ocamlopt" >&5
+echo "$as_me: WARNING: --enable-bindings=ocaml specified, but ocamlopt not found. Try configure OCAMLOPT=/path/to/ocamlopt" >&2;}
+          fi
+    if test "x$with_ocaml_libdir" != xauto ; then
+      OCAML_LIBDIR=$with_ocaml_libdir
+
+    else
+      ocaml_stdlib="`"$OCAMLC" -where`"
+      if test "$LLVM_PREFIX" '<' "$ocaml_stdlib" -a "$ocaml_stdlib" '<' "$LLVM_PREFIX~"
+      then
+        # ocaml stdlib is beneath our prefix; use stdlib
+        OCAML_LIBDIR=$ocaml_stdlib
+
+      else
+        # ocaml stdlib is outside our prefix; use libdir/ocaml
+        OCAML_LIBDIR=$LLVM_LIBDIR/ocaml
+
+      fi
+    fi
+    ;;
+  esac
+done
+if test "$binding_prereqs_failed" = 1 ; then
+  { { echo "$as_me:$LINENO: error: Prequisites for bindings not satisfied. Fix them or use configure --disable-bindings." >&5
+echo "$as_me: error: Prequisites for bindings not satisfied. Fix them or use configure --disable-bindings." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+{ echo "$as_me:$LINENO: checking for compiler -fvisibility-inlines-hidden option" >&5
+echo $ECHO_N "checking for compiler -fvisibility-inlines-hidden option... $ECHO_C" >&6; }
+if test "${llvm_cv_cxx_visibility_inlines_hidden+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+  oldcxxflags="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -fvisibility-inlines-hidden"
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_cxx_visibility_inlines_hidden=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_cxx_visibility_inlines_hidden=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  CXXFLAGS="$oldcxxflags"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_cxx_visibility_inlines_hidden" >&5
+echo "${ECHO_T}$llvm_cv_cxx_visibility_inlines_hidden" >&6; }
+if test "$llvm_cv_cxx_visibility_inlines_hidden" = yes ; then
+  ENABLE_VISIBILITY_INLINES_HIDDEN=1
+
+else
+  ENABLE_VISIBILITY_INLINES_HIDDEN=0
+
+fi
+
+
+if test "$llvm_cv_link_use_r" = "yes" ; then
+  RPATH="-Wl,-R"
+else
+  RPATH="-Wl,-rpath"
+fi
+
+
+if test "$llvm_cv_link_use_export_dynamic" = "yes" ; then
+  RDYNAMIC="-Wl,-export-dynamic"
+else
+  RDYNAMIC=""
+fi
+
+
+
+
+ac_config_files="$ac_config_files Makefile.common"
+
+ac_config_files="$ac_config_files Makefile.llvm.config"
+
+
+ac_config_commands="$ac_config_commands Makefile"
+
+
+ac_config_commands="$ac_config_commands lib/Makefile"
+
+
+ac_config_commands="$ac_config_commands lib/sample/Makefile"
+
+
+ac_config_commands="$ac_config_commands tools/Makefile"
+
+
+ac_config_commands="$ac_config_commands tools/sample/Makefile"
 
 
 
@@ -1829,6 +21036,20 @@ LIBOBJS=$ac_libobjs
 LTLIBOBJS=$ac_ltlibobjs
 
 
+if test -z "${INSTALL_LTDL_TRUE}" && test -z "${INSTALL_LTDL_FALSE}"; then
+  { { echo "$as_me:$LINENO: error: conditional \"INSTALL_LTDL\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+echo "$as_me: error: conditional \"INSTALL_LTDL\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+if test -z "${CONVENIENCE_LTDL_TRUE}" && test -z "${CONVENIENCE_LTDL_FALSE}"; then
+  { { echo "$as_me:$LINENO: error: conditional \"CONVENIENCE_LTDL\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+echo "$as_me: error: conditional \"CONVENIENCE_LTDL\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+   { (exit 1); exit 1; }; }
+fi
 
 : ${CONFIG_STATUS=./config.status}
 ac_clean_files_save=$ac_clean_files
@@ -2173,6 +21394,7 @@ gives unlimited permission to copy, distribute and modify it."
 
 ac_pwd='$ac_pwd'
 srcdir='$srcdir'
+INSTALL='$INSTALL'
 _ACEOF
 
 cat >>$CONFIG_STATUS <<\_ACEOF
@@ -2268,6 +21490,7 @@ do
   case $ac_config_target in
     "setup") CONFIG_COMMANDS="$CONFIG_COMMANDS setup" ;;
     "Makefile.common") CONFIG_FILES="$CONFIG_FILES Makefile.common" ;;
+    "Makefile.llvm.config") CONFIG_FILES="$CONFIG_FILES Makefile.llvm.config" ;;
     "Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS Makefile" ;;
     "lib/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS lib/Makefile" ;;
     "lib/sample/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS lib/sample/Makefile" ;;
@@ -2373,11 +21596,197 @@ host_alias!$host_alias$ac_delim
 target_alias!$target_alias$ac_delim
 LLVM_SRC!$LLVM_SRC$ac_delim
 LLVM_OBJ!$LLVM_OBJ$ac_delim
+LLVM_VERSION!$LLVM_VERSION$ac_delim
+CC!$CC$ac_delim
+CFLAGS!$CFLAGS$ac_delim
+LDFLAGS!$LDFLAGS$ac_delim
+CPPFLAGS!$CPPFLAGS$ac_delim
+ac_ct_CC!$ac_ct_CC$ac_delim
+EXEEXT!$EXEEXT$ac_delim
+OBJEXT!$OBJEXT$ac_delim
+CXX!$CXX$ac_delim
+CXXFLAGS!$CXXFLAGS$ac_delim
+ac_ct_CXX!$ac_ct_CXX$ac_delim
+CPP!$CPP$ac_delim
+ENABLE_POLLY!$ENABLE_POLLY$ac_delim
+LLVM_HAS_POLLY!$LLVM_HAS_POLLY$ac_delim
+subdirs!$subdirs$ac_delim
+build!$build$ac_delim
+build_cpu!$build_cpu$ac_delim
+build_vendor!$build_vendor$ac_delim
+build_os!$build_os$ac_delim
+host!$host$ac_delim
+host_cpu!$host_cpu$ac_delim
+host_vendor!$host_vendor$ac_delim
+host_os!$host_os$ac_delim
+target!$target$ac_delim
+target_cpu!$target_cpu$ac_delim
+target_vendor!$target_vendor$ac_delim
+target_os!$target_os$ac_delim
+OS!$OS$ac_delim
+HOST_OS!$HOST_OS$ac_delim
+TARGET_OS!$TARGET_OS$ac_delim
+LINKALL!$LINKALL$ac_delim
+NOLINKALL!$NOLINKALL$ac_delim
+LLVM_ON_UNIX!$LLVM_ON_UNIX$ac_delim
+LLVM_ON_WIN32!$LLVM_ON_WIN32$ac_delim
+ARCH!$ARCH$ac_delim
+ENDIAN!$ENDIAN$ac_delim
+GREP!$GREP$ac_delim
+EGREP!$EGREP$ac_delim
+LLVM_CROSS_COMPILING!$LLVM_CROSS_COMPILING$ac_delim
+BUILD_CC!$BUILD_CC$ac_delim
+BUILD_EXEEXT!$BUILD_EXEEXT$ac_delim
+BUILD_CXX!$BUILD_CXX$ac_delim
+CVSBUILD!$CVSBUILD$ac_delim
+ENABLE_LIBCPP!$ENABLE_LIBCPP$ac_delim
+ENABLE_OPTIMIZED!$ENABLE_OPTIMIZED$ac_delim
+ENABLE_PROFILING!$ENABLE_PROFILING$ac_delim
+DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
+ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
+EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
+DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
+DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
+JIT!$JIT$ac_delim
+TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
+ENABLE_DOCS!$ENABLE_DOCS$ac_delim
+ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
+ENABLE_THREADS!$ENABLE_THREADS$ac_delim
+ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
+ENABLE_PIC!$ENABLE_PIC$ac_delim
+_ACEOF
+
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
+    break
+  elif $ac_last_try; then
+    { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed`
+if test -n "$ac_eof"; then
+  ac_eof=`echo "$ac_eof" | sort -nru | sed 1q`
+  ac_eof=`expr $ac_eof + 1`
+fi
+
+cat >>$CONFIG_STATUS <<_ACEOF
+cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+_ACEOF
+sed '
+s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g
+s/^/s,@/; s/!/@,|#_!!_#|/
+:n
+t n
+s/'"$ac_delim"'$/,g/; t
+s/$/\\/; p
+N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n
+' >>$CONFIG_STATUS <conf$$subs.sed
+rm -f conf$$subs.sed
+cat >>$CONFIG_STATUS <<_ACEOF
+CEOF$ac_eof
+_ACEOF
+
+
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  cat >conf$$subs.sed <<_ACEOF
+ENABLE_SHARED!$ENABLE_SHARED$ac_delim
+ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
+ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim
+TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim
+LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim
+LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim
+LLVM_ENUM_ASM_PARSERS!$LLVM_ENUM_ASM_PARSERS$ac_delim
+LLVM_ENUM_DISASSEMBLERS!$LLVM_ENUM_DISASSEMBLERS$ac_delim
+OPTIMIZE_OPTION!$OPTIMIZE_OPTION$ac_delim
+EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
+EXTRA_LD_OPTIONS!$EXTRA_LD_OPTIONS$ac_delim
+BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim
+NM!$NM$ac_delim
+ifGNUmake!$ifGNUmake$ac_delim
+LN_S!$LN_S$ac_delim
+CMP!$CMP$ac_delim
+CP!$CP$ac_delim
+DATE!$DATE$ac_delim
+FIND!$FIND$ac_delim
+MKDIR!$MKDIR$ac_delim
+MV!$MV$ac_delim
+RANLIB!$RANLIB$ac_delim
+AR!$AR$ac_delim
+RM!$RM$ac_delim
+SED!$SED$ac_delim
+TAR!$TAR$ac_delim
+BINPWD!$BINPWD$ac_delim
+GRAPHVIZ!$GRAPHVIZ$ac_delim
+DOT!$DOT$ac_delim
+FDP!$FDP$ac_delim
+NEATO!$NEATO$ac_delim
+TWOPI!$TWOPI$ac_delim
+CIRCO!$CIRCO$ac_delim
+GV!$GV$ac_delim
+DOTTY!$DOTTY$ac_delim
+XDOT_PY!$XDOT_PY$ac_delim
+INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim
+INSTALL_SCRIPT!$INSTALL_SCRIPT$ac_delim
+INSTALL_DATA!$INSTALL_DATA$ac_delim
+BZIP2!$BZIP2$ac_delim
+CAT!$CAT$ac_delim
+DOXYGEN!$DOXYGEN$ac_delim
+GROFF!$GROFF$ac_delim
+GZIPBIN!$GZIPBIN$ac_delim
+POD2HTML!$POD2HTML$ac_delim
+POD2MAN!$POD2MAN$ac_delim
+PDFROFF!$PDFROFF$ac_delim
+RUNTEST!$RUNTEST$ac_delim
+TCLSH!$TCLSH$ac_delim
+ZIP!$ZIP$ac_delim
+OCAMLC!$OCAMLC$ac_delim
+OCAMLOPT!$OCAMLOPT$ac_delim
+OCAMLDEP!$OCAMLDEP$ac_delim
+OCAMLDOC!$OCAMLDOC$ac_delim
+GAS!$GAS$ac_delim
+HAVE_LINK_VERSION_SCRIPT!$HAVE_LINK_VERSION_SCRIPT$ac_delim
+INSTALL_LTDL_TRUE!$INSTALL_LTDL_TRUE$ac_delim
+INSTALL_LTDL_FALSE!$INSTALL_LTDL_FALSE$ac_delim
+CONVENIENCE_LTDL_TRUE!$CONVENIENCE_LTDL_TRUE$ac_delim
+CONVENIENCE_LTDL_FALSE!$CONVENIENCE_LTDL_FALSE$ac_delim
+LIBADD_DL!$LIBADD_DL$ac_delim
+NO_VARIADIC_MACROS!$NO_VARIADIC_MACROS$ac_delim
+NO_MISSING_FIELD_INITIALIZERS!$NO_MISSING_FIELD_INITIALIZERS$ac_delim
+COVERED_SWITCH_DEFAULT!$COVERED_SWITCH_DEFAULT$ac_delim
+USE_UDIS86!$USE_UDIS86$ac_delim
+USE_OPROFILE!$USE_OPROFILE$ac_delim
+HAVE_PTHREAD!$HAVE_PTHREAD$ac_delim
+HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim
+MMAP_FILE!$MMAP_FILE$ac_delim
+SHLIBEXT!$SHLIBEXT$ac_delim
+SHLIBPATH_VAR!$SHLIBPATH_VAR$ac_delim
+LLVM_PREFIX!$LLVM_PREFIX$ac_delim
+LLVM_BINDIR!$LLVM_BINDIR$ac_delim
+LLVM_LIBDIR!$LLVM_LIBDIR$ac_delim
+LLVM_DATADIR!$LLVM_DATADIR$ac_delim
+LLVM_DOCSDIR!$LLVM_DOCSDIR$ac_delim
+LLVM_ETCDIR!$LLVM_ETCDIR$ac_delim
+LLVM_INCLUDEDIR!$LLVM_INCLUDEDIR$ac_delim
+LLVM_INFODIR!$LLVM_INFODIR$ac_delim
+LLVM_MANDIR!$LLVM_MANDIR$ac_delim
+LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim
+BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim
+ALL_BINDINGS!$ALL_BINDINGS$ac_delim
+OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
+ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim
+RPATH!$RPATH$ac_delim
+RDYNAMIC!$RDYNAMIC$ac_delim
 LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 41; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 89; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
@@ -2395,7 +21804,7 @@ if test -n "$ac_eof"; then
 fi
 
 cat >>$CONFIG_STATUS <<_ACEOF
-cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof
+cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof
 /@[a-zA-Z_][a-zA-Z_0-9]*@/!b end
 _ACEOF
 sed '
@@ -2600,6 +22009,10 @@ ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
   # CONFIG_FILE
   #
 
+  case $INSTALL in
+  [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+  *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+  esac
 _ACEOF
 
 cat >>$CONFIG_STATUS <<\_ACEOF
@@ -2652,8 +22065,9 @@ s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
 s&@builddir@&$ac_builddir&;t t
 s&@abs_builddir@&$ac_abs_builddir&;t t
 s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
 $ac_datarootdir_hack
-" $ac_file_inputs | sed -f "$tmp/subs-1.sed" >$tmp/out
+" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" >$tmp/out
 
 test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
   { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
@@ -2678,16 +22092,16 @@ echo "$as_me: executing $ac_file commands" >&6;}
 
 
   case $ac_file$ac_mode in
-    "Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile Makefile ;;
-    "lib/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/Makefile lib/Makefile ;;
-    "lib/sample/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/sample/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/sample/Makefile lib/sample/Makefile ;;
-    "tools/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/Makefile tools/Makefile ;;
-    "tools/sample/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/sample/Makefile`
-   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/sample/Makefile tools/sample/Makefile ;;
+    "Makefile":C) ${srcdir}/autoconf/mkinstalldirs `dirname Makefile`
+   ${SHELL} ${srcdir}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile Makefile ;;
+    "lib/Makefile":C) ${srcdir}/autoconf/mkinstalldirs `dirname lib/Makefile`
+   ${SHELL} ${srcdir}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/Makefile lib/Makefile ;;
+    "lib/sample/Makefile":C) ${srcdir}/autoconf/mkinstalldirs `dirname lib/sample/Makefile`
+   ${SHELL} ${srcdir}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/sample/Makefile lib/sample/Makefile ;;
+    "tools/Makefile":C) ${srcdir}/autoconf/mkinstalldirs `dirname tools/Makefile`
+   ${SHELL} ${srcdir}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/Makefile tools/Makefile ;;
+    "tools/sample/Makefile":C) ${srcdir}/autoconf/mkinstalldirs `dirname tools/sample/Makefile`
+   ${SHELL} ${srcdir}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/sample/Makefile tools/sample/Makefile ;;
 
   esac
 done # for ac_tag
@@ -2720,3 +22134,178 @@ if test "$no_create" != yes; then
   $ac_cs_success || { (exit 1); exit 1; }
 fi
 
+#
+# CONFIG_SUBDIRS section.
+#
+if test "$no_recursion" != yes; then
+
+  # Remove --cache-file and --srcdir arguments so they do not pile up.
+  ac_sub_configure_args=
+  ac_prev=
+  eval "set x $ac_configure_args"
+  shift
+  for ac_arg
+  do
+    if test -n "$ac_prev"; then
+      ac_prev=
+      continue
+    fi
+    case $ac_arg in
+    -cache-file | --cache-file | --cache-fil | --cache-fi \
+    | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+      ac_prev=cache_file ;;
+    -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+    | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* \
+    | --c=*)
+      ;;
+    --config-cache | -C)
+      ;;
+    -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+      ac_prev=srcdir ;;
+    -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+      ;;
+    -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+      ac_prev=prefix ;;
+    -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+      ;;
+    *)
+      case $ac_arg in
+      *\'*) ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+      esac
+      ac_sub_configure_args="$ac_sub_configure_args '$ac_arg'" ;;
+    esac
+  done
+
+  # Always prepend --prefix to ensure using the same prefix
+  # in subdir configurations.
+  ac_arg="--prefix=$prefix"
+  case $ac_arg in
+  *\'*) ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+  esac
+  ac_sub_configure_args="$ac_arg $ac_sub_configure_args"
+
+  ac_popdir=`pwd`
+  for ac_dir in : $subdirs; do test "x$ac_dir" = x: && continue
+
+    # Do not complain, so a configure script can configure whichever
+    # parts of a large source tree are present.
+    test -d "$srcdir/$ac_dir" || continue
+
+    ac_msg="=== configuring in $ac_dir (`pwd`/$ac_dir)"
+    echo "$as_me:$LINENO: $ac_msg" >&5
+    echo "$ac_msg" >&6
+    { as_dir="$ac_dir"
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5
+echo "$as_me: error: cannot create directory $as_dir" >&2;}
+   { (exit 1); exit 1; }; }; }
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+    cd "$ac_dir"
+
+    # Check for guested configure; otherwise get Cygnus style configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      ac_sub_configure=$ac_srcdir/configure.gnu
+    elif test -f "$ac_srcdir/configure"; then
+      ac_sub_configure=$ac_srcdir/configure
+    elif test -f "$ac_srcdir/configure.in"; then
+      # This should be Cygnus configure.
+      ac_sub_configure=$ac_aux_dir/configure
+    else
+      { echo "$as_me:$LINENO: WARNING: no configuration information is in $ac_dir" >&5
+echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2;}
+      ac_sub_configure=
+    fi
+
+    # The recursion is here.
+    if test -n "$ac_sub_configure"; then
+      # Make the cache file name correct relative to the subdirectory.
+      case $cache_file in
+      [\\/]* | ?:[\\/]* ) ac_sub_cache_file=$cache_file ;;
+      *) # Relative name.
+	ac_sub_cache_file=$ac_top_build_prefix$cache_file ;;
+      esac
+
+      { echo "$as_me:$LINENO: running $SHELL $ac_sub_configure $ac_sub_configure_args --cache-file=$ac_sub_cache_file --srcdir=$ac_srcdir" >&5
+echo "$as_me: running $SHELL $ac_sub_configure $ac_sub_configure_args --cache-file=$ac_sub_cache_file --srcdir=$ac_srcdir" >&6;}
+      # The eval makes quoting arguments work.
+      eval "\$SHELL \"\$ac_sub_configure\" $ac_sub_configure_args \
+	   --cache-file=\"\$ac_sub_cache_file\" --srcdir=\"\$ac_srcdir\"" ||
+	{ { echo "$as_me:$LINENO: error: $ac_sub_configure failed for $ac_dir" >&5
+echo "$as_me: error: $ac_sub_configure failed for $ac_dir" >&2;}
+   { (exit 1); exit 1; }; }
+    fi
+
+    cd "$ac_popdir"
+  done
+fi
+
diff --git a/runtime/LLVMBuild.txt b/runtime/LLVMBuild.txt
new file mode 100644
index 000000000000..05334fda5076
--- /dev/null
+++ b/runtime/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./runtime/LLVMBuild.txt ----------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Group
+name = Runtime
+parent = $ROOT
diff --git a/runtime/libprofile/CommonProfiling.c b/runtime/libprofile/CommonProfiling.c
index fbc1ef448518..d55f51c5b44a 100644
--- a/runtime/libprofile/CommonProfiling.c
+++ b/runtime/libprofile/CommonProfiling.c
@@ -46,7 +46,7 @@ int save_arguments(int argc, const char **argv) {
      * what to do with it.
      */
     const char *Arg = argv[1];
-    memmove(&argv[1], &argv[2], (argc-1)*sizeof(char*));
+    memmove((char**)&argv[1], &argv[2], (argc-1)*sizeof(char*));
     --argc;
 
     if (!strcmp(Arg, "-llvmprof-output")) {
@@ -54,7 +54,7 @@ int save_arguments(int argc, const char **argv) {
         puts("-llvmprof-output requires a filename argument!");
       else {
         OutputFilename = strdup(argv[1]);
-        memmove(&argv[1], &argv[2], (argc-1)*sizeof(char*));
+        memmove((char**)&argv[1], &argv[2], (argc-1)*sizeof(char*));
         --argc;
       }
     } else {
diff --git a/runtime/libprofile/GCDAProfiling.c b/runtime/libprofile/GCDAProfiling.c
index 4ffb12b15ebf..3a48bb282652 100644
--- a/runtime/libprofile/GCDAProfiling.c
+++ b/runtime/libprofile/GCDAProfiling.c
@@ -113,6 +113,18 @@ void llvm_gcda_start_file(const char *orig_filename) {
   recursive_mkdir(filename);
   output_file = fopen(filename, "wb");
 
+  if (!output_file) {
+    const char *cptr = strrchr(orig_filename, '/');
+    output_file = fopen(cptr ? cptr + 1 : orig_filename, "wb");
+
+    if (!output_file) {
+      fprintf(stderr, "LLVM profiling runtime: while opening '%s': ",
+              cptr ? cptr + 1 : orig_filename);
+      perror("");
+      exit(1);
+    }
+  }
+
   /* gcda file, version 404*, stamp LLVM. */
 #ifdef __APPLE__
   fwrite("adcg*204MVLL", 12, 1, output_file);
diff --git a/runtime/libprofile/PathProfiling.c b/runtime/libprofile/PathProfiling.c
index 283678521381..71ee944fc569 100644
--- a/runtime/libprofile/PathProfiling.c
+++ b/runtime/libprofile/PathProfiling.c
@@ -26,11 +26,6 @@
 #include <stdlib.h>
 #include <stdio.h>
 
-/* Must use __inline in Microsoft C */
-#if defined(_MSC_VER)
-#define inline __inline
-#endif
-
 /* note that this is used for functions with large path counts,
          but it is unlikely those paths will ALL be executed */
 #define ARBITRARY_HASH_BIN_COUNT 100
@@ -112,7 +107,7 @@ void writeArrayTable(uint32_t fNumber, ftEntry_t* ft, uint32_t* funcCount) {
   }
 }
 
-static inline uint32_t hash (uint32_t key) {
+static uint32_t hash (uint32_t key) {
   /* this may benefit from a proper hash function */
   return key%ARBITRARY_HASH_BIN_COUNT;
 }
@@ -155,7 +150,7 @@ void writeHashTable(uint32_t functionNumber, pathHashTable_t* hashTable) {
 }
 
 /* Return a pointer to this path's specific path counter */
-static inline uint32_t* getPathCounter(uint32_t functionNumber,
+static uint32_t* getPathCounter(uint32_t functionNumber,
                                        uint32_t pathNumber) {
   pathHashTable_t* hashTable;
   pathHashEntry_t* hashEntry;
diff --git a/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll b/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
index 035299e0ac82..5b81c17d43a3 100644
--- a/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
+++ b/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
@@ -3,12 +3,12 @@
 ; RUN:   grep {ret i32 0}
 ; END.
 
-declare i16 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16, i1)
 
 define i32 @test(i32* %P, i16* %Q) {
         %A = load i16* %Q               ; <i16> [#uses=1]
         %x = load i32* %P               ; <i32> [#uses=1]
-        %B = call i16 @llvm.cttz.i16( i16 %A )          ; <i16> [#uses=1]
+        %B = call i16 @llvm.cttz.i16( i16 %A, i1 true )          ; <i16> [#uses=1]
         %y = load i32* %P               ; <i32> [#uses=1]
         store i16 %B, i16* %Q
         %z = sub i32 %x, %y             ; <i32> [#uses=1]
diff --git a/test/Analysis/BasicAA/aligned-overread.ll b/test/Analysis/BasicAA/aligned-overread.ll
new file mode 100644
index 000000000000..b05f8eb69483
--- /dev/null
+++ b/test/Analysis/BasicAA/aligned-overread.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.S0 = type <{ i8, [4 x i8] }>
+
+@a = global { i8, i8, i8, i8, i8 } { i8 undef, i8 0, i8 0, i8 0, i8 0 }, align 8
+
+define i32 @main() nounwind uwtable ssp {
+entry:
+  %tmp = load i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
+  %tmp1 = or i8 %tmp, -128
+  store i8 %tmp1, i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
+  %tmp2 = load i64* bitcast ({ i8, i8, i8, i8, i8 }* @a to i64*), align 8
+  store i8 11, i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
+  %tmp3 = trunc i64 %tmp2 to i32
+  ret i32 %tmp3
+
+; Make sure we don't delete either store here
+; CHECK: @main
+; CHECK: store i8 %tmp1
+; CHECK: store i8 11
+}
+
diff --git a/test/Analysis/BasicAA/constant-over-index.ll b/test/Analysis/BasicAA/constant-over-index.ll
index 8a8ac4f72103..48ef2595f2c3 100644
--- a/test/Analysis/BasicAA/constant-over-index.ll
+++ b/test/Analysis/BasicAA/constant-over-index.ll
@@ -16,8 +16,8 @@ loop:
 
   %p.0.i.0 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %i, i64 0
 
-  volatile store double 0.0, double* %p3
-  volatile store double 0.1, double* %p.0.i.0
+  store volatile double 0.0, double* %p3
+  store volatile double 0.1, double* %p.0.i.0
 
   %i.next = add i64 %i, 1
   %cmp = icmp slt i64 %i.next, 3
diff --git a/test/Analysis/BasicAA/dg.exp b/test/Analysis/BasicAA/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/BasicAA/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/BasicAA/lit.local.cfg b/test/Analysis/BasicAA/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/BasicAA/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BasicAA/phi-and-select.ll b/test/Analysis/BasicAA/phi-and-select.ll
index 9bc47ae44a97..0ed4a2c5a576 100644
--- a/test/Analysis/BasicAA/phi-and-select.ll
+++ b/test/Analysis/BasicAA/phi-and-select.ll
@@ -17,8 +17,8 @@ false:
 exit:
   %a = phi double* [ %x, %true ], [ %y, %false ]
   %b = phi double* [ %x, %false ], [ %y, %true ]
-  volatile store double 0.0, double* %a
-  volatile store double 1.0, double* %b
+  store volatile double 0.0, double* %a
+  store volatile double 1.0, double* %b
   ret void
 }
 
@@ -27,8 +27,8 @@ define void @bar(i1 %m, double* noalias %x, double* noalias %y) {
 entry:
   %a = select i1 %m, double* %x, double* %y
   %b = select i1 %m, double* %y, double* %x
-  volatile store double 0.000000e+00, double* %a
-  volatile store double 1.000000e+00, double* %b
+  store volatile double 0.000000e+00, double* %a
+  store volatile double 1.000000e+00, double* %b
   ret void
 }
 
@@ -56,8 +56,8 @@ nfalse:
 
 nexit:
   %b = phi double* [ %v, %ntrue ], [ %w, %nfalse ]
-  volatile store double 0.0, double* %a
-  volatile store double 1.0, double* %b
+  store volatile double 0.0, double* %a
+  store volatile double 1.0, double* %b
   ret void
 }
 
@@ -67,7 +67,7 @@ define void @fin(i1 %m, double* noalias %x, double* noalias %y,
 entry:
   %a = select i1 %m, double* %x, double* %y
   %b = select i1 %n, double* %v, double* %w
-  volatile store double 0.000000e+00, double* %a
-  volatile store double 1.000000e+00, double* %b
+  store volatile double 0.000000e+00, double* %a
+  store volatile double 1.000000e+00, double* %b
   ret void
 }
diff --git a/test/Analysis/BlockFrequencyInfo/dg.exp b/test/Analysis/BlockFrequencyInfo/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/BlockFrequencyInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/BlockFrequencyInfo/lit.local.cfg b/test/Analysis/BlockFrequencyInfo/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
new file mode 100644
index 000000000000..74d06a18f7b9
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -0,0 +1,90 @@
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+
+define i32 @test1(i32 %i, i32* %a) {
+; CHECK: Printing analysis {{.*}} for function 'test1'
+entry:
+  br label %body
+; CHECK: edge entry -> body probability is 16 / 16 = 100%
+
+body:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body
+; CHECK: edge body -> exit probability is 4 / 128
+; CHECK: edge body -> body probability is 124 / 128
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @test2(i32 %i, i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test2'
+entry:
+  %cond = icmp ult i32 %i, 42
+  br i1 %cond, label %then, label %else, !prof !0
+; CHECK: edge entry -> then probability is 64 / 68
+; CHECK: edge entry -> else probability is 4 / 68
+
+then:
+  br label %exit
+; CHECK: edge then -> exit probability is 16 / 16 = 100%
+
+else:
+  br label %exit
+; CHECK: edge else -> exit probability is 16 / 16 = 100%
+
+exit:
+  %result = phi i32 [ %a, %then ], [ %b, %else ]
+  ret i32 %result
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+
+define i32 @test3(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; CHECK: Printing analysis {{.*}} for function 'test3'
+entry:
+  switch i32 %i, label %case_a [ i32 1, label %case_b
+                                 i32 2, label %case_c
+                                 i32 3, label %case_d
+                                 i32 4, label %case_e ], !prof !1
+; CHECK: edge entry -> case_a probability is 4 / 80
+; CHECK: edge entry -> case_b probability is 4 / 80
+; CHECK: edge entry -> case_c probability is 64 / 80
+; CHECK: edge entry -> case_d probability is 4 / 80
+; CHECK: edge entry -> case_e probability is 4 / 80
+
+case_a:
+  br label %exit
+; CHECK: edge case_a -> exit probability is 16 / 16 = 100%
+
+case_b:
+  br label %exit
+; CHECK: edge case_b -> exit probability is 16 / 16 = 100%
+
+case_c:
+  br label %exit
+; CHECK: edge case_c -> exit probability is 16 / 16 = 100%
+
+case_d:
+  br label %exit
+; CHECK: edge case_d -> exit probability is 16 / 16 = 100%
+
+case_e:
+  br label %exit
+; CHECK: edge case_e -> exit probability is 16 / 16 = 100%
+
+exit:
+  %result = phi i32 [ %a, %case_a ],
+                    [ %b, %case_b ],
+                    [ %c, %case_c ],
+                    [ %d, %case_d ],
+                    [ %e, %case_e ]
+  ret i32 %result
+}
+
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4}
diff --git a/test/Analysis/BranchProbabilityInfo/lit.local.cfg b/test/Analysis/BranchProbabilityInfo/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/BranchProbabilityInfo/loop.ll b/test/Analysis/BranchProbabilityInfo/loop.ll
new file mode 100644
index 000000000000..b648cbb16a60
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/loop.ll
@@ -0,0 +1,365 @@
+; Test the static branch probability heuristics for no-return functions.
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+
+declare void @g1()
+declare void @g2()
+declare void @g3()
+declare void @g4()
+
+define void @test1(i32 %a, i32 %b) {
+entry:
+  br label %do.body
+; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+
+do.body:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc3, %do.end ]
+  call void @g1()
+  br label %do.body1
+; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+
+do.body1:
+  %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.body1 ]
+  call void @g2()
+  %inc = add nsw i32 %j.0, 1
+  %cmp = icmp slt i32 %inc, %b
+  br i1 %cmp, label %do.body1, label %do.end
+; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
+; CHECK: edge do.body1 -> do.end probability is 4 / 128
+
+do.end:
+  call void @g3()
+  %inc3 = add nsw i32 %i.0, 1
+  %cmp4 = icmp slt i32 %inc3, %a
+  br i1 %cmp4, label %do.body, label %do.end5
+; CHECK: edge do.end -> do.body probability is 124 / 128
+; CHECK: edge do.end -> do.end5 probability is 4 / 128
+
+do.end5:
+  call void @g4()
+  ret void
+}
+
+define void @test2(i32 %a, i32 %b) {
+entry:
+  %cmp9 = icmp sgt i32 %a, 0
+  br i1 %cmp9, label %for.body.lr.ph, label %for.end6
+; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
+; CHECK: edge entry -> for.end6 probability is 12 / 32
+
+for.body.lr.ph:
+  %cmp27 = icmp sgt i32 %b, 0
+  br label %for.body
+; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+
+for.body:
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc5, %for.end ]
+  call void @g1()
+  br i1 %cmp27, label %for.body3, label %for.end
+; CHECK: edge for.body -> for.body3 probability is 62 / 124 = 50%
+; CHECK: edge for.body -> for.end probability is 62 / 124 = 50%
+
+for.body3:
+  %j.08 = phi i32 [ %inc, %for.body3 ], [ 0, %for.body ]
+  call void @g2()
+  %inc = add nsw i32 %j.08, 1
+  %exitcond = icmp eq i32 %inc, %b
+  br i1 %exitcond, label %for.end, label %for.body3
+; CHECK: edge for.body3 -> for.end probability is 4 / 128
+; CHECK: edge for.body3 -> for.body3 probability is 124 / 128
+
+for.end:
+  call void @g3()
+  %inc5 = add nsw i32 %i.010, 1
+  %exitcond11 = icmp eq i32 %inc5, %a
+  br i1 %exitcond11, label %for.end6, label %for.body
+; CHECK: edge for.end -> for.end6 probability is 4 / 128
+; CHECK: edge for.end -> for.body probability is 124 / 128
+
+for.end6:
+  call void @g4()
+  ret void
+}
+
+define void @test3(i32 %a, i32 %b, i32* %c) {
+entry:
+  br label %do.body
+; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+
+do.body:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc4, %if.end ]
+  call void @g1()
+  %0 = load i32* %c, align 4
+  %cmp = icmp slt i32 %0, 42
+  br i1 %cmp, label %do.body1, label %if.end
+; CHECK: edge do.body -> do.body1 probability is 62 / 124 = 50%
+; CHECK: edge do.body -> if.end probability is 62 / 124 = 50%
+
+do.body1:
+  %j.0 = phi i32 [ %inc, %do.body1 ], [ 0, %do.body ]
+  call void @g2()
+  %inc = add nsw i32 %j.0, 1
+  %cmp2 = icmp slt i32 %inc, %b
+  br i1 %cmp2, label %do.body1, label %if.end
+; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
+; CHECK: edge do.body1 -> if.end probability is 4 / 128
+
+if.end:
+  call void @g3()
+  %inc4 = add nsw i32 %i.0, 1
+  %cmp5 = icmp slt i32 %inc4, %a
+  br i1 %cmp5, label %do.body, label %do.end6
+; CHECK: edge if.end -> do.body probability is 124 / 128
+; CHECK: edge if.end -> do.end6 probability is 4 / 128
+
+do.end6:
+  call void @g4()
+  ret void
+}
+
+define void @test4(i32 %a, i32 %b, i32* %c) {
+entry:
+  br label %do.body
+; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+
+do.body:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
+  call void @g1()
+  %0 = load i32* %c, align 4
+  %cmp = icmp slt i32 %0, 42
+  br i1 %cmp, label %return, label %do.body1
+; CHECK: edge do.body -> return probability is 4 / 128
+; CHECK: edge do.body -> do.body1 probability is 124 / 128
+
+do.body1:
+  %j.0 = phi i32 [ %inc, %do.body1 ], [ 0, %do.body ]
+  call void @g2()
+  %inc = add nsw i32 %j.0, 1
+  %cmp2 = icmp slt i32 %inc, %b
+  br i1 %cmp2, label %do.body1, label %do.end
+; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
+; CHECK: edge do.body1 -> do.end probability is 4 / 128
+
+do.end:
+  call void @g3()
+  %inc4 = add nsw i32 %i.0, 1
+  %cmp5 = icmp slt i32 %inc4, %a
+  br i1 %cmp5, label %do.body, label %do.end6
+; CHECK: edge do.end -> do.body probability is 124 / 128
+; CHECK: edge do.end -> do.end6 probability is 4 / 128
+
+do.end6:
+  call void @g4()
+  br label %return
+; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+
+return:
+  ret void
+}
+
+define void @test5(i32 %a, i32 %b, i32* %c) {
+entry:
+  br label %do.body
+; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+
+do.body:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
+  call void @g1()
+  br label %do.body1
+; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+
+do.body1:
+  %j.0 = phi i32 [ 0, %do.body ], [ %inc, %if.end ]
+  %0 = load i32* %c, align 4
+  %cmp = icmp slt i32 %0, 42
+  br i1 %cmp, label %return, label %if.end
+; CHECK: edge do.body1 -> return probability is 4 / 128
+; CHECK: edge do.body1 -> if.end probability is 124 / 128
+
+if.end:
+  call void @g2()
+  %inc = add nsw i32 %j.0, 1
+  %cmp2 = icmp slt i32 %inc, %b
+  br i1 %cmp2, label %do.body1, label %do.end
+; CHECK: edge if.end -> do.body1 probability is 124 / 128
+; CHECK: edge if.end -> do.end probability is 4 / 128
+
+do.end:
+  call void @g3()
+  %inc4 = add nsw i32 %i.0, 1
+  %cmp5 = icmp slt i32 %inc4, %a
+  br i1 %cmp5, label %do.body, label %do.end6
+; CHECK: edge do.end -> do.body probability is 124 / 128
+; CHECK: edge do.end -> do.end6 probability is 4 / 128
+
+do.end6:
+  call void @g4()
+  br label %return
+; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+
+return:
+  ret void
+}
+
+define void @test6(i32 %a, i32 %b, i32* %c) {
+entry:
+  br label %do.body
+; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+
+do.body:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
+  call void @g1()
+  br label %do.body1
+; CHECK: edge do.body -> do.body1 probability is 124 / 124 = 100%
+
+do.body1:
+  %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.cond ]
+  call void @g2()
+  %0 = load i32* %c, align 4
+  %cmp = icmp slt i32 %0, 42
+  br i1 %cmp, label %return, label %do.cond
+; CHECK: edge do.body1 -> return probability is 4 / 128
+; CHECK: edge do.body1 -> do.cond probability is 124 / 128
+
+do.cond:
+  %inc = add nsw i32 %j.0, 1
+  %cmp2 = icmp slt i32 %inc, %b
+  br i1 %cmp2, label %do.body1, label %do.end
+; CHECK: edge do.cond -> do.body1 probability is 124 / 128
+; CHECK: edge do.cond -> do.end probability is 4 / 128
+
+do.end:
+  call void @g3()
+  %inc4 = add nsw i32 %i.0, 1
+  %cmp5 = icmp slt i32 %inc4, %a
+  br i1 %cmp5, label %do.body, label %do.end6
+; CHECK: edge do.end -> do.body probability is 124 / 128
+; CHECK: edge do.end -> do.end6 probability is 4 / 128
+
+do.end6:
+  call void @g4()
+  br label %return
+; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+
+return:
+  ret void
+}
+
+define void @test7(i32 %a, i32 %b, i32* %c) {
+entry:
+  %cmp10 = icmp sgt i32 %a, 0
+  br i1 %cmp10, label %for.body.lr.ph, label %for.end7
+; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
+; CHECK: edge entry -> for.end7 probability is 12 / 32
+
+for.body.lr.ph:
+  %cmp38 = icmp sgt i32 %b, 0
+  br label %for.body
+; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+
+for.body:
+  %i.011 = phi i32 [ 0, %for.body.lr.ph ], [ %inc6, %for.inc5 ]
+  %0 = load i32* %c, align 4
+  %cmp1 = icmp eq i32 %0, %i.011
+  br i1 %cmp1, label %for.inc5, label %if.end
+; CHECK: edge for.body -> for.inc5 probability is 62 / 124 = 50%
+; CHECK: edge for.body -> if.end probability is 62 / 124 = 50%
+
+if.end:
+  call void @g1()
+  br i1 %cmp38, label %for.body4, label %for.end
+; CHECK: edge if.end -> for.body4 probability is 62 / 124 = 50%
+; CHECK: edge if.end -> for.end probability is 62 / 124 = 50%
+
+for.body4:
+  %j.09 = phi i32 [ %inc, %for.body4 ], [ 0, %if.end ]
+  call void @g2()
+  %inc = add nsw i32 %j.09, 1
+  %exitcond = icmp eq i32 %inc, %b
+  br i1 %exitcond, label %for.end, label %for.body4
+; CHECK: edge for.body4 -> for.end probability is 4 / 128
+; CHECK: edge for.body4 -> for.body4 probability is 124 / 128
+
+for.end:
+  call void @g3()
+  br label %for.inc5
+; CHECK: edge for.end -> for.inc5 probability is 124 / 124 = 100%
+
+for.inc5:
+  %inc6 = add nsw i32 %i.011, 1
+  %exitcond12 = icmp eq i32 %inc6, %a
+  br i1 %exitcond12, label %for.end7, label %for.body
+; CHECK: edge for.inc5 -> for.end7 probability is 4 / 128
+; CHECK: edge for.inc5 -> for.body probability is 124 / 128
+
+for.end7:
+  call void @g4()
+  ret void
+}
+
+define void @test8(i32 %a, i32 %b, i32* %c) {
+entry:
+  %cmp18 = icmp sgt i32 %a, 0
+  br i1 %cmp18, label %for.body.lr.ph, label %for.end15
+; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
+; CHECK: edge entry -> for.end15 probability is 12 / 32
+
+for.body.lr.ph:
+  %cmp216 = icmp sgt i32 %b, 0
+  %arrayidx5 = getelementptr inbounds i32* %c, i64 1
+  %arrayidx9 = getelementptr inbounds i32* %c, i64 2
+  br label %for.body
+; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+
+for.body:
+  %i.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc14, %for.end ]
+  call void @g1()
+  br i1 %cmp216, label %for.body3, label %for.end
+; CHECK: edge for.body -> for.body3 probability is 62 / 124 = 50%
+; CHECK: edge for.body -> for.end probability is 62 / 124 = 50%
+
+for.body3:
+  %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %0 = load i32* %c, align 4
+  %cmp4 = icmp eq i32 %0, %j.017
+  br i1 %cmp4, label %for.inc, label %if.end
+; CHECK: edge for.body3 -> for.inc probability is 62 / 124 = 50%
+; CHECK: edge for.body3 -> if.end probability is 62 / 124 = 50%
+
+if.end:
+  %1 = load i32* %arrayidx5, align 4
+  %cmp6 = icmp eq i32 %1, %j.017
+  br i1 %cmp6, label %for.inc, label %if.end8
+; CHECK: edge if.end -> for.inc probability is 62 / 124 = 50%
+; CHECK: edge if.end -> if.end8 probability is 62 / 124 = 50%
+
+if.end8:
+  %2 = load i32* %arrayidx9, align 4
+  %cmp10 = icmp eq i32 %2, %j.017
+  br i1 %cmp10, label %for.inc, label %if.end12
+; CHECK: edge if.end8 -> for.inc probability is 62 / 124 = 50%
+; CHECK: edge if.end8 -> if.end12 probability is 62 / 124 = 50%
+
+if.end12:
+  call void @g2()
+  br label %for.inc
+; CHECK: edge if.end12 -> for.inc probability is 124 / 124 = 100%
+
+for.inc:
+  %inc = add nsw i32 %j.017, 1
+  %exitcond = icmp eq i32 %inc, %b
+  br i1 %exitcond, label %for.end, label %for.body3
+; CHECK: edge for.inc -> for.end probability is 4 / 128
+; CHECK: edge for.inc -> for.body3 probability is 124 / 128
+
+for.end:
+  call void @g3()
+  %inc14 = add nsw i32 %i.019, 1
+  %exitcond20 = icmp eq i32 %inc14, %a
+  br i1 %exitcond20, label %for.end15, label %for.body
+; CHECK: edge for.end -> for.end15 probability is 4 / 128
+; CHECK: edge for.end -> for.body probability is 124 / 128
+
+for.end15:
+  call void @g4()
+  ret void
+}
diff --git a/test/Analysis/BranchProbabilityInfo/noreturn.ll b/test/Analysis/BranchProbabilityInfo/noreturn.ll
new file mode 100644
index 000000000000..8b9ae11f7d35
--- /dev/null
+++ b/test/Analysis/BranchProbabilityInfo/noreturn.ll
@@ -0,0 +1,79 @@
+; Test the static branch probability heuristics for no-return functions.
+; RUN: opt < %s -analyze -branch-prob | FileCheck %s
+
+declare void @abort() noreturn
+
+define i32 @test1(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test1'
+entry:
+  %cond = icmp eq i32 %a, 42
+  br i1 %cond, label %exit, label %abort
+; CHECK: edge entry -> exit probability is 1048575 / 1048576
+; CHECK: edge entry -> abort probability is 1 / 1048576
+
+abort:
+  call void @abort() noreturn
+  unreachable
+
+exit:
+  ret i32 %b
+}
+
+define i32 @test2(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test2'
+entry:
+  switch i32 %a, label %exit [i32 1, label %case_a
+                              i32 2, label %case_b
+                              i32 3, label %case_c
+                              i32 4, label %case_d]
+; CHECK: edge entry -> exit probability is 1048575 / 1048579
+; CHECK: edge entry -> case_a probability is 1 / 1048579
+; CHECK: edge entry -> case_b probability is 1 / 1048579
+; CHECK: edge entry -> case_c probability is 1 / 1048579
+; CHECK: edge entry -> case_d probability is 1 / 1048579
+
+case_a:
+  br label %case_b
+
+case_b:
+  br label %case_c
+
+case_c:
+  br label %case_d
+
+case_d:
+  call void @abort() noreturn
+  unreachable
+
+exit:
+  ret i32 %b
+}
+
+define i32 @test3(i32 %a, i32 %b) {
+; CHECK: Printing analysis {{.*}} for function 'test3'
+; Make sure we unify across multiple conditional branches.
+entry:
+  %cond1 = icmp eq i32 %a, 42
+  br i1 %cond1, label %exit, label %dom
+; CHECK: edge entry -> exit probability is 1048575 / 1048576
+; CHECK: edge entry -> dom probability is 1 / 1048576
+
+dom:
+  %cond2 = icmp ult i32 %a, 42
+  br i1 %cond2, label %idom1, label %idom2
+; CHECK: edge dom -> idom1 probability is 1 / 2
+; CHECK: edge dom -> idom2 probability is 1 / 2
+
+idom1:
+  br label %abort
+
+idom2:
+  br label %abort
+
+abort:
+  call void @abort() noreturn
+  unreachable
+
+exit:
+  ret i32 %b
+}
diff --git a/test/Analysis/CallGraph/dg.exp b/test/Analysis/CallGraph/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/CallGraph/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/CallGraph/lit.local.cfg b/test/Analysis/CallGraph/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/CallGraph/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/Dominators/dg.exp b/test/Analysis/Dominators/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/Dominators/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/Dominators/invoke.ll b/test/Analysis/Dominators/invoke.ll
new file mode 100644
index 000000000000..f935750c987e
--- /dev/null
+++ b/test/Analysis/Dominators/invoke.ll
@@ -0,0 +1,19 @@
+; RUN: opt -verify -disable-output %s
+; This tests that we handle unreachable blocks correctly
+
+define void @f() {
+  %v1 = invoke i32* @g()
+          to label %bb1 unwind label %bb2
+  invoke void @__dynamic_cast()
+          to label %bb1 unwind label %bb2
+bb1:
+  %Hidden = getelementptr inbounds i32* %v1, i64 1
+  ret void
+bb2:
+  %lpad.loopexit80 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  ret void
+}
+declare i32 @__gxx_personality_v0(...)
+declare void @__dynamic_cast()
+declare i32* @g()
diff --git a/test/Analysis/Dominators/lit.local.cfg b/test/Analysis/Dominators/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/Dominators/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/GlobalsModRef/dg.exp b/test/Analysis/GlobalsModRef/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/GlobalsModRef/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/GlobalsModRef/lit.local.cfg b/test/Analysis/GlobalsModRef/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/GlobalsModRef/pr12351.ll b/test/Analysis/GlobalsModRef/pr12351.ll
new file mode 100644
index 000000000000..1c5ac43f8d27
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/pr12351.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+define void @foo(i8* %x, i8* %y) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x, i8* %y, i32 1, i32 1, i1 false);
+  ret void
+}
+
+define void @bar(i8* %y, i8* %z) {
+  %x = alloca i8
+  call void @foo(i8* %x, i8* %y)
+  %t = load i8* %x
+  store i8 %t, i8* %y
+; CHECK: store i8 %t, i8* %y
+  ret void
+}
+
+
+define i32 @foo2() {
+  %foo = alloca i32
+  call void @bar2(i32* %foo)
+  %t0 = load i32* %foo, align 4
+; CHECK: %t0 = load i32* %foo, align 4
+  ret i32 %t0
+}
+
+define void @bar2(i32* %foo)  {
+  store i32 0, i32* %foo, align 4
+  tail call void @llvm.dbg.value(metadata !{}, i64 0, metadata !{})
+  ret void
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
diff --git a/test/Analysis/LoopDependenceAnalysis/dg.exp b/test/Analysis/LoopDependenceAnalysis/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/LoopDependenceAnalysis/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/LoopDependenceAnalysis/lit.local.cfg b/test/Analysis/LoopDependenceAnalysis/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/LoopDependenceAnalysis/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/LoopInfo/dg.exp b/test/Analysis/LoopInfo/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/LoopInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/LoopInfo/lit.local.cfg b/test/Analysis/LoopInfo/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/LoopInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/PostDominators/dg.exp b/test/Analysis/PostDominators/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/PostDominators/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/PostDominators/lit.local.cfg b/test/Analysis/PostDominators/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/PostDominators/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/Profiling/dg.exp b/test/Analysis/Profiling/dg.exp
deleted file mode 100644
index 1eb4755c4102..000000000000
--- a/test/Analysis/Profiling/dg.exp
+++ /dev/null
@@ -1,4 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-
diff --git a/test/Analysis/Profiling/lit.local.cfg b/test/Analysis/Profiling/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/Profiling/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/RegionInfo/dg.exp b/test/Analysis/RegionInfo/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/RegionInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/RegionInfo/lit.local.cfg b/test/Analysis/RegionInfo/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/RegionInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
index 4f14a0d9a5ea..ce0329d9ce8c 100644
--- a/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
+++ b/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
@@ -1,6 +1,7 @@
-; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %loop: backedge-taken count is (100 + (-100 smax %n))}
+; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
 ; PR2002
 
+; CHECK: Loop %loop: backedge-taken count is (100 + (-100 smax %n))
 define void @foo(i8 %n) {
 entry:
 	br label %loop
diff --git a/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
new file mode 100644
index 000000000000..138c015f12ca
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -basicaa -globalopt -instcombine -loop-rotate -licm -instcombine -indvars -loop-deletion -constmerge -S
+; PR11882: ComputeLoadConstantCompareExitLimit crash.
+;
+; for.body is deleted leaving a loop-invariant load.
+; CHECK-NOT: for.body
+target datalayout = "e-p:64:64:64-n32:64"
+
+@func_21_l_773 = external global i32, align 4
+@g_814 = external global i32, align 4
+@g_244 = internal global [1 x [0 x i32]] zeroinitializer, align 4
+
+define void @func_21() nounwind uwtable ssp {
+entry:
+  br label %lbl_818
+
+lbl_818:                                          ; preds = %for.end, %entry
+  call void (...)* @func_27()
+  store i32 0, i32* @g_814, align 4, !tbaa !0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %lbl_818
+  %0 = load i32* @g_814, align 4, !tbaa !0
+  %cmp = icmp sle i32 %0, 0
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %idxprom = sext i32 %0 to i64
+  %arrayidx = getelementptr inbounds [0 x i32]* getelementptr inbounds ([1 x [0 x i32]]* @g_244, i32 0, i64 0), i32 0, i64 %idxprom
+  %1 = load i32* %arrayidx, align 1, !tbaa !0
+  store i32 %1, i32* @func_21_l_773, align 4, !tbaa !0
+  store i32 1, i32* @g_814, align 4, !tbaa !0
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %2 = load i32* @func_21_l_773, align 4, !tbaa !0
+  %tobool = icmp ne i32 %2, 0
+  br i1 %tobool, label %lbl_818, label %if.end
+
+if.end:                                           ; preds = %for.end
+  ret void
+}
+
+declare void @func_27(...)
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
index e90a55559d7f..d9b83a929aa7 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
@@ -1,14 +1,12 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-
-; Indvars should be able to insert a canonical induction variable
-; for the bb6 loop without using a maximum calculation (icmp, select)
-; because it should be able to prove that the comparison is guarded
-; by an appropriate conditional branch. Unfortunately, indvars is
-; not yet able to find the comparison for the other two loops in
-; this testcase.
-; CHECK: entry:
-; CHECK-NOT: select
-; CHECK: bb6:
+; RUN: opt < %s -analyze -scalar-evolution -S | FileCheck %s
+
+; Indvars should be able to find the trip count for the bb6 loop
+; without using a maximum calculation (icmp, select) because it should
+; be able to prove that the comparison is guarded by an appropriate
+; conditional branch. Unfortunately, indvars is not yet able to find
+; the comparison for the other two loops in this testcase.
+;
+; CHECK: Loop %bb6: backedge-taken count is (-1 + %w)
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"
diff --git a/test/Analysis/ScalarEvolution/dg.exp b/test/Analysis/ScalarEvolution/dg.exp
deleted file mode 100644
index b65a2503ac6f..000000000000
--- a/test/Analysis/ScalarEvolution/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]] 
diff --git a/test/Analysis/ScalarEvolution/lit.local.cfg b/test/Analysis/ScalarEvolution/lit.local.cfg
new file mode 100644
index 000000000000..c6106e4746f2
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Analysis/ScalarEvolution/load.ll b/test/Analysis/ScalarEvolution/load.ll
new file mode 100644
index 000000000000..2c753f5befcb
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/load.ll
@@ -0,0 +1,65 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+@arr1 = internal unnamed_addr constant [50 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50], align 4
+@arr2 = internal unnamed_addr constant [50 x i32] [i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0], align 4
+
+; PR11034
+define i32 @test1() nounwind readnone {
+; CHECK: test1
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %sum.04 = phi i32 [ 0, %entry ], [ %add2, %for.body ]
+; CHECK: -->  %sum.04{{ *}}Exits: 2450
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [50 x i32]* @arr1, i32 0, i32 %i.03
+  %0 = load i32* %arrayidx, align 4
+; CHECK: -->  %0{{ *}}Exits: 50
+  %arrayidx1 = getelementptr inbounds [50 x i32]* @arr2, i32 0, i32 %i.03
+  %1 = load i32* %arrayidx1, align 4
+; CHECK: -->  %1{{ *}}Exits: 0
+  %add = add i32 %0, %sum.04
+  %add2 = add i32 %add, %1
+  %inc = add nsw i32 %i.03, 1
+  %cmp = icmp eq i32 %inc, 50
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add2
+}
+
+
+%struct.ListNode = type { %struct.ListNode*, i32 }
+
+@node5 = internal constant { %struct.ListNode*, i32, [4 x i8] } { %struct.ListNode* bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node4 to %struct.ListNode*), i32 4, [4 x i8] undef }, align 8
+@node4 = internal constant { %struct.ListNode*, i32, [4 x i8] } { %struct.ListNode* bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node3 to %struct.ListNode*), i32 3, [4 x i8] undef }, align 8
+@node3 = internal constant { %struct.ListNode*, i32, [4 x i8] } { %struct.ListNode* bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node2 to %struct.ListNode*), i32 2, [4 x i8] undef }, align 8
+@node2 = internal constant { %struct.ListNode*, i32, [4 x i8] } { %struct.ListNode* bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node1 to %struct.ListNode*), i32 1, [4 x i8] undef }, align 8
+@node1 = internal constant { %struct.ListNode*, i32, [4 x i8] } { %struct.ListNode* null, i32 0, [4 x i8] undef }, align 8
+
+define i32 @test2() nounwind uwtable readonly {
+; CHECK: test2
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+; CHECK: -->  %sum.02{{ *}}Exits: 10
+  %n.01 = phi %struct.ListNode* [ bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node5 to %struct.ListNode*), %entry ], [ %1, %for.body ]
+; CHECK: -->  %n.01{{ *}}Exits: @node1
+  %i = getelementptr inbounds %struct.ListNode* %n.01, i64 0, i32 1
+  %0 = load i32* %i, align 4
+  %add = add nsw i32 %0, %sum.02
+  %next = getelementptr inbounds %struct.ListNode* %n.01, i64 0, i32 0
+  %1 = load %struct.ListNode** %next, align 8
+; CHECK: -->  %1{{ *}}Exits: 0
+  %cmp = icmp eq %struct.ListNode* %1, null
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll
index 8969a5ad4ceb..a919319bdcd5 100644
--- a/test/Analysis/ScalarEvolution/nsw-offset.ll
+++ b/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -23,7 +23,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %1 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
 
 ; CHECK: %2 = getelementptr inbounds double* %d, i64 %1
-; CHECK: -->  {%d,+,16}<nsw><%bb>
+; CHECK: -->  {%d,+,16}<nuw><%bb>
   %2 = getelementptr inbounds double* %d, i64 %1  ; <double*> [#uses=1]
 
   %3 = load double* %2, align 8                   ; <double> [#uses=1]
@@ -37,7 +37,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %8 = sext i32 %7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %9 = getelementptr inbounds double* %q, i64 %8
-; CHECK: {(8 + %q),+,16}<nsw><%bb>
+; CHECK: {(8 + %q),+,16}<nuw><%bb>
   %9 = getelementptr inbounds double* %q, i64 %8  ; <double*> [#uses=1]
 
 ; Artificially repeat the above three instructions, this time using
@@ -49,7 +49,7 @@ bb:                                               ; preds = %bb.nph, %bb1
   %t8 = sext i32 %t7 to i64                         ; <i64> [#uses=1]
 
 ; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8
-; CHECK: {(8 + %q),+,16}<nsw><%bb>
+; CHECK: {(8 + %q),+,16}<nuw><%bb>
   %t9 = getelementptr inbounds double* %q, i64 %t8  ; <double*> [#uses=1]
 
   %10 = load double* %9, align 8                  ; <double> [#uses=1]
diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll
index da35a6cf7ae2..288b6facd94d 100644
--- a/test/Analysis/ScalarEvolution/nsw.ll
+++ b/test/Analysis/ScalarEvolution/nsw.ll
@@ -92,10 +92,10 @@ for.body.i.i:                                     ; preds = %entry, %for.body.i.
 ; CHECK: {1,+,1}<nuw><nsw><%for.body.i.i>
   %ptrincdec.i.i = getelementptr inbounds i32* %begin, i64 %tmp
 ; CHECK: %ptrincdec.i.i =
-; CHECK: {(4 + %begin),+,4}<nsw><%for.body.i.i>
+; CHECK: {(4 + %begin),+,4}<nuw><%for.body.i.i>
   %__first.addr.08.i.i = getelementptr inbounds i32* %begin, i64 %indvar.i.i
 ; CHECK: %__first.addr.08.i.i
-; CHECK: {%begin,+,4}<nsw><%for.body.i.i>
+; CHECK: {%begin,+,4}<nuw><%for.body.i.i>
   store i32 0, i32* %__first.addr.08.i.i, align 4
   %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
   br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i
@@ -103,4 +103,22 @@ for.body.i.i:                                     ; preds = %entry, %for.body.i.
 ; CHECK: Loop %for.body.i.i: max backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4)
 _ZSt4fillIPiiEvT_S1_RKT0_.exit:                   ; preds = %for.body.i.i, %entry
   ret void
-}
-\ No newline at end of file
+}
+
+; A single AddExpr exists for (%a + %b), which is not always <nsw>.
+; CHECK: @addnsw
+; CHECK-NOT: --> (%a + %b)<nsw>
+define i32 @addnsw(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %tmp = add i32 %a, %b
+  %cmp = icmp sgt i32 %tmp, 0
+  br i1 %cmp, label %greater, label %exit
+
+greater:
+  %tmp2 = add nsw i32 %a, %b
+  br label %exit
+
+exit:
+  %result = phi i32 [ %a, %entry ], [ %tmp2, %greater ]
+  ret i32 %result
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count11.ll b/test/Analysis/ScalarEvolution/trip-count11.ll
new file mode 100644
index 000000000000..71915037ec8b
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count11.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@foo.a = internal constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 16
+
+define i32 @foo() nounwind uwtable noinline {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+; CHECK: --> %sum.0 Exits: 28
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp ult i32 %i.0, 8
+  br i1 %cmp, label %for.inc, label %for.end
+
+for.inc:                                          ; preds = %for.cond
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [8 x i32]* @foo.a, i64 0, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i32 %sum.0
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count12.ll b/test/Analysis/ScalarEvolution/trip-count12.ll
new file mode 100644
index 000000000000..8f960e1c1c77
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count12.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: Determining loop execution counts for: @test
+; CHECK: Loop %for.body: backedge-taken count is ((-2 + %len) /u 2)
+; CHECK: Loop %for.body: max backedge-taken count is 1073741823
+
+define zeroext i16 @test(i16* nocapture %p, i32 %len) nounwind readonly {
+entry:
+  %cmp2 = icmp sgt i32 %len, 1
+  br i1 %cmp2, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %for.body.preheader ]
+  %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %for.body.preheader ]
+  %res.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i16* %p.addr.05, i32 1
+  %0 = load i16* %p.addr.05, align 2
+  %conv = zext i16 %0 to i32
+  %add = add i32 %conv, %res.03
+  %sub = add nsw i32 %len.addr.04, -2
+  %cmp = icmp sgt i32 %sub, 1
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %extract.t = trunc i32 %add to i16
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res.0.lcssa.off0 = phi i16 [ %extract.t, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i16 %res.0.lcssa.off0
+}
+
diff --git a/test/Analysis/TypeBasedAliasAnalysis/dg.exp b/test/Analysis/TypeBasedAliasAnalysis/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Analysis/TypeBasedAliasAnalysis/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index 8fb5ffffbaea..1ac59278e7ea 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -24,7 +24,7 @@ define void @test0_no(i32* %p) nounwind {
 ; Add the readonly attribute, since there's just a call to a function which 
 ; TBAA says doesn't modify any memory.
 
-; CHECK: define void @test1_yes(i32* %p) nounwind readonly {
+; CHECK: define void @test1_yes(i32* nocapture %p) nounwind readonly {
 define void @test1_yes(i32* %p) nounwind {
   call void @callee(i32* %p), !tbaa !1
   ret void
diff --git a/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg b/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Analysis/TypeBasedAliasAnalysis/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Archive/dg.exp b/test/Archive/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Archive/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Archive/lit.local.cfg b/test/Archive/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Archive/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
deleted file mode 100644
index daffa3d3af95..000000000000
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; Tests to make sure intrinsics are automatically upgraded.
-; RUN: llvm-as < %s | llvm-dis | FileCheck %s
-
-
-declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone
-declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone
-declare <2 x double> @llvm.x86.sse2.loadu.pd(double*) nounwind readnone
-define void @test_loadu(i8* %a, double* %b) {
-  %v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a)
-  %v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a)
-  %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
-
-; CHECK: load i128* {{.*}}, align 1
-; CHECK: load i128* {{.*}}, align 1
-; CHECK: load i128* {{.*}}, align 1
-  ret void
-}
-
-declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind readnone 
-declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x double>) nounwind readnone 
-declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind readnone 
-declare void @llvm.x86.sse2.movnt.i(i8*, i32) nounwind readnone 
-
-define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) {
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse.movnt.ps(i8* %B, <4 x float> %A)
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse2.movnt.dq(i8* %B, <2 x double> %C)
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse2.movnt.pd(i8* %B, <2 x double> %C)
-; CHECK: store{{.*}}nontemporal
-  call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D)
-  ret void
-}
-
-declare void @llvm.prefetch(i8*, i32, i32) nounwind
-
-define void @p(i8* %ptr) {
-; CHECK: llvm.prefetch(i8* %ptr, i32 0, i32 1, i32 1)
-  tail call void @llvm.prefetch(i8* %ptr, i32 0, i32 1)
-  ret void
-}
-
-declare i32 @nest_f(i8* nest, i32)
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
-
-define void @test_trampolines() {
-; CHECK: call void @llvm.init.trampoline(i8* null, i8* bitcast (i32 (i8*, i32)* @nest_f to i8*), i8* null)
-; CHECK: call i8* @llvm.adjust.trampoline(i8* null)
-
-  call i8* @llvm.init.trampoline(i8* null,
-                                 i8* bitcast (i32 (i8*, i32)* @nest_f to i8*),
-                                 i8* null)
-  ret void
-}
diff --git a/test/Assembler/aggregate-constant-values.ll b/test/Assembler/aggregate-constant-values.ll
index a37d03ebb387..d0aab81a4d68 100644
--- a/test/Assembler/aggregate-constant-values.ll
+++ b/test/Assembler/aggregate-constant-values.ll
@@ -1,25 +1,48 @@
-; RUN: llvm-as < %s | llvm-dis | grep 7 | count 3
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 
+; CHECK: @foo
+; CHECK: store { i32, i32 } { i32 7, i32 9 }, { i32, i32 }* %x
+; CHECK: ret
 define void @foo({i32, i32}* %x) nounwind {
   store {i32, i32}{i32 7, i32 9}, {i32, i32}* %x
   ret void
 }
+
+; CHECK: @foo_empty
+; CHECK: store {} zeroinitializer, {}* %x
+; CHECK: ret
 define void @foo_empty({}* %x) nounwind {
   store {}{}, {}* %x
   ret void
 }
+
+; CHECK: @bar
+; CHECK: store [2 x i32] [i32 7, i32 9], [2 x i32]* %x
+; CHECK: ret
 define void @bar([2 x i32]* %x) nounwind {
   store [2 x i32][i32 7, i32 9], [2 x i32]* %x
   ret void
 }
+
+; CHECK: @bar_empty
+; CHECK: store [0 x i32] undef, [0 x i32]* %x
+; CHECK: ret
 define void @bar_empty([0 x i32]* %x) nounwind {
   store [0 x i32][], [0 x i32]* %x
   ret void
 }
+
+; CHECK: @qux
+; CHECK: store <{ i32, i32 }> <{ i32 7, i32 9 }>, <{ i32, i32 }>* %x
+; CHECK: ret
 define void @qux(<{i32, i32}>* %x) nounwind {
   store <{i32, i32}><{i32 7, i32 9}>, <{i32, i32}>* %x
   ret void
 }
+
+; CHECK: @qux_empty
+; CHECK: store <{}> zeroinitializer, <{}>* %x
+; CHECK: ret
 define void @qux_empty(<{}>* %x) nounwind {
   store <{}><{}>, <{}>* %x
   ret void
diff --git a/test/Assembler/auto_upgrade_intrinsics.ll b/test/Assembler/auto_upgrade_intrinsics.ll
new file mode 100644
index 000000000000..7ad5cc30fa71
--- /dev/null
+++ b/test/Assembler/auto_upgrade_intrinsics.ll
@@ -0,0 +1,44 @@
+; Test to make sure intrinsics are automatically upgraded.
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+declare i8 @llvm.ctlz.i8(i8)
+declare i16 @llvm.ctlz.i16(i16)
+declare i32 @llvm.ctlz.i32(i32)
+declare i42 @llvm.ctlz.i42(i42)  ; Not a power-of-2
+
+define void @test.ctlz(i8 %a, i16 %b, i32 %c, i42 %d) {
+; CHECK: @test.ctlz
+
+entry:
+  ; CHECK: call i8 @llvm.ctlz.i8(i8 %a, i1 false)
+  call i8 @llvm.ctlz.i8(i8 %a)
+  ; CHECK: call i16 @llvm.ctlz.i16(i16 %b, i1 false)
+  call i16 @llvm.ctlz.i16(i16 %b)
+  ; CHECK: call i32 @llvm.ctlz.i32(i32 %c, i1 false)
+  call i32 @llvm.ctlz.i32(i32 %c)
+  ; CHECK: call i42 @llvm.ctlz.i42(i42 %d, i1 false)
+  call i42 @llvm.ctlz.i42(i42 %d)
+
+  ret void
+}
+
+declare i8 @llvm.cttz.i8(i8)
+declare i16 @llvm.cttz.i16(i16)
+declare i32 @llvm.cttz.i32(i32)
+declare i42 @llvm.cttz.i42(i42)  ; Not a power-of-2
+
+define void @test.cttz(i8 %a, i16 %b, i32 %c, i42 %d) {
+; CHECK: @test.cttz
+
+entry:
+  ; CHECK: call i8 @llvm.cttz.i8(i8 %a, i1 false)
+  call i8 @llvm.cttz.i8(i8 %a)
+  ; CHECK: call i16 @llvm.cttz.i16(i16 %b, i1 false)
+  call i16 @llvm.cttz.i16(i16 %b)
+  ; CHECK: call i32 @llvm.cttz.i32(i32 %c, i1 false)
+  call i32 @llvm.cttz.i32(i32 %c)
+  ; CHECK: call i42 @llvm.cttz.i42(i42 %d, i1 false)
+  call i42 @llvm.cttz.i42(i42 %d)
+
+  ret void
+}
diff --git a/test/Assembler/bcwrap.ll b/test/Assembler/bcwrap.ll
index 859dc4bc80f9..4bec48cf63ba 100644
--- a/test/Assembler/bcwrap.ll
+++ b/test/Assembler/bcwrap.ll
@@ -1,9 +1,11 @@
 ; RUN: llvm-as < %s > %t
-; RUN: llvm-nm %t | grep foo
-; test for isBitcodeFile, llvm-nm must read from a file for this test
+; RUN: llvm-nm %t | FileCheck %s
+; Test for isBitcodeFile, llvm-nm must read from a file for this test.
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9.2.2"
 
+; CHECK: foo
+
 define i32 @foo() {
   ret i32 0
 }
diff --git a/test/Assembler/dg.exp b/test/Assembler/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Assembler/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Assembler/extractvalue-invalid-idx.ll b/test/Assembler/extractvalue-invalid-idx.ll
index f9644eadbd59..9a215f719418 100644
--- a/test/Assembler/extractvalue-invalid-idx.ll
+++ b/test/Assembler/extractvalue-invalid-idx.ll
@@ -1,6 +1,8 @@
-; RUN: not llvm-as < %s |& grep {invalid indices for extractvalue}
+; RUN: not llvm-as < %s |& FileCheck %s
 ; PR4170
 
+; CHECK: invalid indices for extractvalue
+
 define void @test() {
 entry:
         extractvalue [0 x i32] undef, 0
diff --git a/test/Assembler/getelementptr_struct.ll b/test/Assembler/getelementptr_struct.ll
index c8779a64077c..bfebf29bd5ea 100644
--- a/test/Assembler/getelementptr_struct.ll
+++ b/test/Assembler/getelementptr_struct.ll
@@ -1,6 +1,8 @@
-; RUN: not llvm-as < %s >/dev/null |& grep {invalid getelementptr indices}
+; RUN: not llvm-as < %s >/dev/null |& FileCheck %s
 ; Test the case of a incorrect indices type into struct
 
+; CHECK: invalid getelementptr indices
+
 %RT = type { i8 , [10 x [20 x i32]], i8  }
 %ST = type { i32, double, %RT }
 
diff --git a/test/Assembler/huge-array.ll b/test/Assembler/huge-array.ll
index e080947b9945..a1abf879710f 100644
--- a/test/Assembler/huge-array.ll
+++ b/test/Assembler/huge-array.ll
@@ -1,5 +1,7 @@
-; RUN: llvm-as < %s | llvm-dis | grep 18446744073709551615 | count 2
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
+; CHECK: define [18446744073709551615 x i8]* @foo() {
+; CHECK: ret [18446744073709551615 x i8]* null
 define [18446744073709551615 x i8]* @foo() {
   ret [18446744073709551615 x i8]* null
 }
diff --git a/test/Assembler/insertextractvalue.ll b/test/Assembler/insertextractvalue.ll
index 2f5521fba872..6c00b138ceeb 100644
--- a/test/Assembler/insertextractvalue.ll
+++ b/test/Assembler/insertextractvalue.ll
@@ -1,7 +1,11 @@
-; RUN: llvm-as < %s | llvm-dis > %t
-; RUN: grep insertvalue %t | count 1
-; RUN: grep extractvalue %t | count 1
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 
+; CHECK:      @foo
+; CHECK-NEXT: load
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: insertvalue
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
 define float @foo({{i32},{float, double}}* %p) nounwind {
   %t = load {{i32},{float, double}}* %p
   %s = extractvalue {{i32},{float, double}} %t, 1, 0
@@ -9,21 +13,34 @@ define float @foo({{i32},{float, double}}* %p) nounwind {
   store {{i32},{float, double}} %r, {{i32},{float, double}}* %p
   ret float %s
 }
+
+; CHECK:      @bar
+; CHECK-NEXT: store { { i32 }, { float, double } } { { i32 } { i32 4 }, { float, double } { float 4.000000e+00, double 2.000000e+01 } }, { { i32 }, { float, double } }* %p
+; CHECK-NEXT: ret float 7.000000e+00
 define float @bar({{i32},{float, double}}* %p) nounwind {
   store {{i32},{float, double}} insertvalue ({{i32},{float, double}}{{i32}{i32 4},{float, double}{float 4.0, double 5.0}}, double 20.0, 1, 1), {{i32},{float, double}}* %p
   ret float extractvalue ({{i32},{float, double}}{{i32}{i32 3},{float, double}{float 7.0, double 9.0}}, 1, 0)
 }
+
+; CHECK:      @car
+; CHECK-NEXT: store { { i32 }, { float, double } } { { i32 } undef, { float, double } { float undef, double 2.000000e+01 } }, { { i32 }, { float, double } }* %p
+; CHECK-NEXT: ret float undef
 define float @car({{i32},{float, double}}* %p) nounwind {
   store {{i32},{float, double}} insertvalue ({{i32},{float, double}} undef, double 20.0, 1, 1), {{i32},{float, double}}* %p
   ret float extractvalue ({{i32},{float, double}} undef, 1, 0)
 }
+
+; CHECK:      @dar
+; CHECK-NEXT: store { { i32 }, { float, double } } { { i32 } zeroinitializer, { float, double } { float 0.000000e+00, double 2.000000e+01 } }, { { i32 }, { float, double } }* %p
+; CHECK-NEXT: ret float 0.000000e+00
 define float @dar({{i32},{float, double}}* %p) nounwind {
   store {{i32},{float, double}} insertvalue ({{i32},{float, double}} zeroinitializer, double 20.0, 1, 1), {{i32},{float, double}}* %p
   ret float extractvalue ({{i32},{float, double}} zeroinitializer, 1, 0)
 }
 
-
 ; PR4963
+; CHECK:      @test57
+; CHECK-NEXT: ret <{ i32, i32 }> <{ i32 0, i32 4 }>
 define <{ i32, i32 }> @test57() {
   ret <{ i32, i32 }> insertvalue (<{ i32, i32 }> zeroinitializer, i32 4, 1)
 }
diff --git a/test/Assembler/insertvalue-invalid-idx.ll b/test/Assembler/insertvalue-invalid-idx.ll
index 86e7258cc593..355d4e8c10fd 100644
--- a/test/Assembler/insertvalue-invalid-idx.ll
+++ b/test/Assembler/insertvalue-invalid-idx.ll
@@ -1,7 +1,9 @@
-; RUN: not llvm-as < %s |& grep {invalid indices for insertvalue}
+; RUN: not llvm-as < %s |& FileCheck %s
+
+; CHECK: invalid indices for insertvalue
 
 define void @test() {
 entry:
-        insertvalue [0 x i32] undef, i32 0, 0
-        ret void
+  insertvalue [0 x i32] undef, i32 0, 0
+  ret void
 }
diff --git a/test/Assembler/invalid_cast.ll b/test/Assembler/invalid_cast.ll
index c5b082b6b8d6..f68283572436 100644
--- a/test/Assembler/invalid_cast.ll
+++ b/test/Assembler/invalid_cast.ll
@@ -1,4 +1,6 @@
-; RUN: not llvm-as < %s |& grep {invalid cast opcode}
+; RUN: not llvm-as < %s |& FileCheck %s
+
+; CHECK: invalid cast opcode for cast from '<4 x i64>' to '<3 x i8>'
 
 define <3 x i8> @foo(<4 x i64> %x) {
   %y = trunc <4 x i64> %x to <3 x i8>
diff --git a/test/Assembler/invalid_cast2.ll b/test/Assembler/invalid_cast2.ll
index f2e7c414e710..a01b935629f5 100644
--- a/test/Assembler/invalid_cast2.ll
+++ b/test/Assembler/invalid_cast2.ll
@@ -1,4 +1,6 @@
-; RUN: not llvm-as < %s |& grep {invalid cast opcode}
+; RUN: not llvm-as < %s |& FileCheck %s
+
+; CHECK: invalid cast opcode for cast from '<4 x i64>' to 'i8'
 
 define i8 @foo(<4 x i64> %x) {
   %y = trunc <4 x i64> %x to i8
diff --git a/test/Assembler/lit.local.cfg b/test/Assembler/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Assembler/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Assembler/metadata.ll b/test/Assembler/metadata.ll
index 50f27b4eb45f..56888fd70347 100644
--- a/test/Assembler/metadata.ll
+++ b/test/Assembler/metadata.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {ret void, !bar !1, !foo !0}
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: @test
+; CHECK: ret void, !bar !1, !foo !0
 define void @test() {
   add i32 2, 1, !bar !0
   add i32 1, 2, !foo !1
-  
   call void @llvm.dbg.func.start(metadata !"foo")
-  
   extractvalue {{i32, i32}, i32} undef, 0, 1, !foo !0
-  
   ret void, !foo !0, !bar !1
 }
 
@@ -15,8 +15,5 @@ define void @test() {
 
 declare void @llvm.dbg.func.start(metadata) nounwind readnone
 
-
 !foo = !{ !0 }
 !bar = !{ !1 }
-
-; !foo = !{ !0, !"foo" }
diff --git a/test/Assembler/vbool-cmp.ll b/test/Assembler/vbool-cmp.ll
index ac8fb29362f7..e652d2ff3b36 100644
--- a/test/Assembler/vbool-cmp.ll
+++ b/test/Assembler/vbool-cmp.ll
@@ -1,15 +1,18 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {icmp slt}
-; rudimentary test of fcmp/icmp on vectors returning vector of bool
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; Rudimentary test of fcmp/icmp on vectors returning vector of bool
 
+; CHECK: @ffoo
+; CHECK: fcmp olt <4 x float> %a, %b
 define <4 x i1> @ffoo(<4 x float> %a, <4 x float> %b) nounwind {
 entry:
-	%cmp = fcmp olt <4 x float> %a, %b		; <4 x i1> [#uses=1]
-	ret <4 x i1> %cmp
+  %cmp = fcmp olt <4 x float> %a, %b		; <4 x i1> [#uses=1]
+  ret <4 x i1> %cmp
 }
 
+; CHECK: @ifoo
+; CHECK: icmp slt <4 x i32> %a, %b
 define <4 x i1> @ifoo(<4 x i32> %a, <4 x i32> %b) nounwind {
 entry:
-	%cmp = icmp slt <4 x i32> %a, %b		; <4 x i1> [#uses=1]
-	ret <4 x i1> %cmp
+  %cmp = icmp slt <4 x i32> %a, %b		; <4 x i1> [#uses=1]
+  ret <4 x i1> %cmp
 }
-
diff --git a/test/Assembler/vector-cmp.ll b/test/Assembler/vector-cmp.ll
index 688369bb62b4..6e3894ca1007 100644
--- a/test/Assembler/vector-cmp.ll
+++ b/test/Assembler/vector-cmp.ll
@@ -1,16 +1,16 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {global.*icmp slt}
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 ; PR2317
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9.2.2"
 
+; CHECK: @1 = global <4 x i1> <i1 icmp slt (i32 ptrtoint (i32* @B to i32), i32 1), i1 true, i1 false, i1 true>
+
 define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind  {
 entry:
-	%cmp = fcmp olt <4 x float> %a, %b		; <4 x i32> [#uses=1]
-	ret <4 x i1> %cmp
+  %cmp = fcmp olt <4 x float> %a, %b		; <4 x i32> [#uses=1]
+  ret <4 x i1> %cmp
 }
 
 global <4 x i1> icmp slt ( <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> )
-
 @B = external global i32
-
 global <4 x i1> icmp slt ( <4 x i32> <i32 ptrtoint (i32 * @B to i32), i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> )
diff --git a/test/Assembler/vector-select.ll b/test/Assembler/vector-select.ll
index 87af602aaf59..ae8358abf9a1 100644
--- a/test/Assembler/vector-select.ll
+++ b/test/Assembler/vector-select.ll
@@ -1,11 +1,11 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep select
-; rudimentary test of select on vectors returning vector of bool
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; Rudimentary test of select on vectors returning vector of bool
 
-define <4 x i32> @foo(<4 x i32> %a, <4 x i32> %b,
-    <4 x i1> %cond) nounwind  {
+; CHECK: @foo
+; CHECK: select <4 x i1> %cond, <4 x i32> %a, <4 x i32> %b
+define <4 x i32> @foo(<4 x i32> %a, <4 x i32> %b, <4 x i1> %cond) nounwind  {
 entry:
-  %cmp = select <4 x i1>  %cond, <4 x i32> %a, <4 x i32> %b 
-                             ; <4 x i32> [#uses=1]
+  %cmp = select <4 x i1>  %cond, <4 x i32> %a, <4 x i32> %b
   ret <4 x i32> %cmp
 }
 
diff --git a/test/Assembler/vector-shift.ll b/test/Assembler/vector-shift.ll
index 1850e66e8edc..6a6531b4d2ff 100644
--- a/test/Assembler/vector-shift.ll
+++ b/test/Assembler/vector-shift.ll
@@ -1,32 +1,45 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep shl | count 1
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep ashr | count 1
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep lshr | count 1
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 
+; CHECK: @foo
+; CHECK: shl
 define <4 x i32> @foo(<4 x i32> %a, <4 x i32> %b) nounwind  {
 entry:
-	%cmp = shl <4 x i32> %a, %b		; <4 x i32> [#uses=1]
-	ret <4 x i32> %cmp
+  %cmp = shl <4 x i32> %a, %b		; <4 x i32> [#uses=1]
+  ret <4 x i32> %cmp
 }
 
+; CHECK: @bar
+; CHECK: lshr
 define <4 x i32> @bar(<4 x i32> %a, <4 x i32> %b) nounwind  {
 entry:
-	%cmp = lshr <4 x i32> %a, %b		; <4 x i32> [#uses=1]
-	ret <4 x i32> %cmp
+  %cmp = lshr <4 x i32> %a, %b		; <4 x i32> [#uses=1]
+  ret <4 x i32> %cmp
 }
 
+; CHECK: @baz
+; CHECK: ashr 
 define <4 x i32> @baz(<4 x i32> %a, <4 x i32> %b) nounwind  {
 entry:
-	%cmp = ashr <4 x i32> %a, %b		; <4 x i32> [#uses=1]
-	ret <4 x i32> %cmp
+  %cmp = ashr <4 x i32> %a, %b		; <4 x i32> [#uses=1]
+  ret <4 x i32> %cmp
 }
 
 ; Constant expressions: these should be folded.
+
+; CHECK: @foo_ce
+; CHECK: ret <2 x i64> <i64 40, i64 192>
 define <2 x i64> @foo_ce() nounwind {
   ret <2 x i64> shl (<2 x i64> <i64 5, i64 6>, <2 x i64> <i64 3, i64 5>)
 }
+
+; CHECK: @bar_ce
+; CHECK: ret <2 x i64> <i64 42, i64 11>
 define <2 x i64> @bar_ce() nounwind {
   ret <2 x i64> lshr (<2 x i64> <i64 340, i64 380>, <2 x i64> <i64 3, i64 5>)
 }
+
+; CHECK: baz_ce
+; CHECK: ret <2 x i64> <i64 71, i64 12>
 define <2 x i64> @baz_ce() nounwind {
   ret <2 x i64> ashr (<2 x i64> <i64 573, i64 411>, <2 x i64> <i64 3, i64 5>)
 }
diff --git a/test/Bindings/Ocaml/dg.exp b/test/Bindings/Ocaml/dg.exp
deleted file mode 100644
index fb4bd078e37f..000000000000
--- a/test/Bindings/Ocaml/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if [ llvm_supports_binding ocaml ] then {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,ml}]]
-}
diff --git a/test/Bindings/Ocaml/lit.local.cfg b/test/Bindings/Ocaml/lit.local.cfg
new file mode 100644
index 000000000000..640c58d2f3d6
--- /dev/null
+++ b/test/Bindings/Ocaml/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.ml']
+
+bindings = set([s.strip() for s in config.root.llvm_bindings.split(',')])
+if not 'ocaml' in bindings:
+    config.unsupported = True
+
diff --git a/test/Bitcode/AutoUpgradeGlobals.ll b/test/Bitcode/AutoUpgradeGlobals.ll
deleted file mode 100644
index a5af2b8b07d1..000000000000
--- a/test/Bitcode/AutoUpgradeGlobals.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; This isn't really an assembly file. It just runs test on bitcode to ensure
-; it is auto-upgraded.
-; RUN: llvm-dis < %s.bc | FileCheck %s 
-; CHECK-NOT: {i32 @\\.llvm\\.eh}
diff --git a/test/Bitcode/AutoUpgradeGlobals.ll.bc b/test/Bitcode/AutoUpgradeGlobals.ll.bc
deleted file mode 100644
index 1abe9688e291..000000000000
--- a/test/Bitcode/AutoUpgradeGlobals.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/dg.exp b/test/Bitcode/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Bitcode/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Bitcode/lit.local.cfg b/test/Bitcode/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Bitcode/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Bitcode/null-type.ll b/test/Bitcode/null-type.ll
index 5d3dfab5753a..b972753da1dd 100644
--- a/test/Bitcode/null-type.ll
+++ b/test/Bitcode/null-type.ll
@@ -1,2 +1,4 @@
-; RUN: not llvm-dis < %s.bc > /dev/null |& grep "Invalid MODULE_CODE_FUNCTION record"
+; RUN: not llvm-dis < %s.bc > /dev/null |& FileCheck %s
 ; PR8494
+
+; CHECK: Invalid MODULE_CODE_FUNCTION record
diff --git a/test/Bitcode/shuffle.ll b/test/Bitcode/shuffle.ll
new file mode 100644
index 000000000000..c3c01c6692c4
--- /dev/null
+++ b/test/Bitcode/shuffle.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llvm-dis
+
+; <rdar://problem/8622574>
+; tests the bitcodereader can handle the case where the reader will initially
+; create shuffle with a place holder mask.
+
+
+define <4 x float> @test(<2 x double> %d2)  {
+entry:
+  %call20.i = tail call <4 x float> @cmp(<2 x double> %d2,
+                                        <2 x double> bitcast (
+                                          <4 x float> shufflevector (
+                                            <3 x float> shufflevector (
+                                              <4 x float> shufflevector (
+                                                <3 x float> bitcast (
+                                                  i96 trunc (
+                                                    i128 bitcast (<2 x double> bitcast (
+                                                      <4 x i32> <i32 0, i32 0, i32 0, i32 undef> to <2 x double>)
+                                                    to i128) to i96)
+                                                  to <3 x float>),
+                                                <3 x float> undef,
+                                                <4 x i32> <i32 0, i32 1, i32 2, i32 undef>),
+                                              <4 x float> undef,
+                                            <3 x i32> <i32 0, i32 1, i32 2>),
+                                            <3 x float> undef,
+                                            <4 x i32> <i32 0, i32 1, i32 2, i32 undef>)
+                                          to <2 x double>))
+  ret <4 x float> %call20.i
+}
+
+declare <4 x float> @cmp(<2 x double>, <2 x double>)
diff --git a/test/Bitcode/sse42_crc32.ll b/test/Bitcode/sse42_crc32.ll
deleted file mode 100644
index 1c371c3a235f..000000000000
--- a/test/Bitcode/sse42_crc32.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; Check to make sure old CRC32 intrinsics are auto-upgraded
-; correctly.
-;
-; Rdar: 9472944
-;
-; RUN: llvm-dis < %s.bc | FileCheck %s
-
-; crc32.8 should upgrade to crc32.32.8
-; CHECK: i32 @llvm.x86.sse42.crc32.32.8(
-; CHECK-NOT: i32 @llvm.x86.sse42.crc32.8(
-
-; crc32.16 should upgrade to crc32.32.16
-; CHECK: i32 @llvm.x86.sse42.crc32.32.16(
-; CHECK-NOT: i32 @llvm.x86.sse42.crc32.16(
-
-; crc32.32 should upgrade to crc32.32.32
-; CHECK: i32 @llvm.x86.sse42.crc32.32.32(
-; CHECK-NOT: i32 @llvm.x86.sse42.crc32.32(
-
-; crc64.8 should upgrade to crc32.64.8
-; CHECK: i64 @llvm.x86.sse42.crc32.64.8(
-; CHECK-NOT: i64 @llvm.x86.sse42.crc64.8(
-
-; crc64.64 should upgrade to crc32.64.64
-; CHECK: i64 @llvm.x86.sse42.crc32.64.64(
-; CHECK-NOT: i64 @llvm.x86.sse42.crc64.64(
-
-
diff --git a/test/Bitcode/sse42_crc32.ll.bc b/test/Bitcode/sse42_crc32.ll.bc
deleted file mode 100644
index d895fad2ac4b..000000000000
--- a/test/Bitcode/sse42_crc32.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/ssse3_palignr.ll b/test/Bitcode/ssse3_palignr.ll
index f62ca118c1b1..90b4394a8b46 100644
--- a/test/Bitcode/ssse3_palignr.ll
+++ b/test/Bitcode/ssse3_palignr.ll
@@ -1,2 +1,82 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s 
+; RUN: opt < %s -S | FileCheck %s
 ; CHECK-NOT: {@llvm\\.palign}
+
+define <4 x i32> @align1(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <4 x i32> %b to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %1 = bitcast <4 x i32> %a to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 15) ; <<2 x i64>> [#uses=1]
+  %3 = bitcast <2 x i64> %2 to <4 x i32>          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %3
+}
+
+define double @align8(<2 x i32> %a, <2 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <2 x i32> %b to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %1 = bitcast <2 x i32> %a to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %2 = tail call <1 x i64> @llvm.x86.ssse3.palign.r(<1 x i64> %1, <1 x i64> %0, i8 7) ; <<1 x i64>> [#uses=1]
+  %3 = extractelement <1 x i64> %2, i32 0         ; <i64> [#uses=1]
+  %retval12 = bitcast i64 %3 to double            ; <double> [#uses=1]
+  ret double %retval12
+}
+
+declare <1 x i64> @llvm.x86.ssse3.palign.r(<1 x i64>, <1 x i64>, i8) nounwind readnone
+
+define double @align7(<2 x i32> %a, <2 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <2 x i32> %b to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %1 = bitcast <2 x i32> %a to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %2 = tail call <1 x i64> @llvm.x86.ssse3.palign.r(<1 x i64> %1, <1 x i64> %0, i8 16) ; <<1 x i64>> [#uses=1]
+  %3 = extractelement <1 x i64> %2, i32 0         ; <i64> [#uses=1]
+  %retval12 = bitcast i64 %3 to double            ; <double> [#uses=1]
+  ret double %retval12
+}
+
+define double @align6(<2 x i32> %a, <2 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <2 x i32> %b to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %1 = bitcast <2 x i32> %a to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %2 = tail call <1 x i64> @llvm.x86.ssse3.palign.r(<1 x i64> %1, <1 x i64> %0, i8 9) ; <<1 x i64>> [#uses=1]
+  %3 = extractelement <1 x i64> %2, i32 0         ; <i64> [#uses=1]
+  %retval12 = bitcast i64 %3 to double            ; <double> [#uses=1]
+  ret double %retval12
+}
+
+define double @align5(<2 x i32> %a, <2 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <2 x i32> %b to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %1 = bitcast <2 x i32> %a to <1 x i64>          ; <<1 x i64>> [#uses=1]
+  %2 = tail call <1 x i64> @llvm.x86.ssse3.palign.r(<1 x i64> %1, <1 x i64> %0, i8 8) ; <<1 x i64>> [#uses=1]
+  %3 = extractelement <1 x i64> %2, i32 0         ; <i64> [#uses=1]
+  %retval12 = bitcast i64 %3 to double            ; <double> [#uses=1]
+  ret double %retval12
+}
+
+define <4 x i32> @align4(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <4 x i32> %b to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %1 = bitcast <4 x i32> %a to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 32) ; <<2 x i64>> [#uses=1]
+  %3 = bitcast <2 x i64> %2 to <4 x i32>          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %3
+}
+
+declare <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define <4 x i32> @align3(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <4 x i32> %b to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %1 = bitcast <4 x i32> %a to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 17) ; <<2 x i64>> [#uses=1]
+  %3 = bitcast <2 x i64> %2 to <4 x i32>          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @align2(<4 x i32> %a, <4 x i32> %b) nounwind readnone ssp {
+entry:
+  %0 = bitcast <4 x i32> %b to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %1 = bitcast <4 x i32> %a to <2 x i64>          ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 16) ; <<2 x i64>> [#uses=1]
+  %3 = bitcast <2 x i64> %2 to <4 x i32>          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %3
+}
diff --git a/test/Bitcode/ssse3_palignr.ll.bc b/test/Bitcode/ssse3_palignr.ll.bc
deleted file mode 100644
index 3fc9cdf15a35..000000000000
--- a/test/Bitcode/ssse3_palignr.ll.bc
+++ /dev/null
diff --git a/test/BugPoint/dg.exp b/test/BugPoint/dg.exp
deleted file mode 100644
index de42dad163fd..000000000000
--- a/test/BugPoint/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/BugPoint/lit.local.cfg b/test/BugPoint/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/BugPoint/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 7bb1bddc2d92..8cebb7c74774 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -24,98 +24,58 @@ else() # Default for all other unix like systems.
   set(SHLIBPATH_VAR "LD_LIBRARY_PATH")
 endif()
 
-include(FindPythonInterp)
-if(PYTHONINTERP_FOUND)
-  set(LIT_ARGS "${LLVM_LIT_ARGS}")
-  separate_arguments(LIT_ARGS)
+set(LIT_ARGS "${LLVM_LIT_ARGS}")
+separate_arguments(LIT_ARGS)
 
-  get_directory_property(DEFINITIONS COMPILE_DEFINITIONS)
-  foreach(DEF ${DEFINITIONS})
-    set(DEFS "${DEFS} -D${DEF}")
-  endforeach()
-  get_directory_property(INC_DIRS INCLUDE_DIRECTORIES)
-  foreach(INC_DIR ${INC_DIRS})
-    set(IDIRS "${IDIRS} -I${INC_DIR}")
-  endforeach()
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in
+  ${CMAKE_CURRENT_BINARY_DIR}/site.exp)
 
-  if( MSVC )
-    # The compiler's path may contain white space. Wrap it:
-    string(REPLACE "<CMAKE_CXX_COMPILER>" "\\\"${CMAKE_CXX_COMPILER}\\\"" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT})
-    # Eliminate continuation lines from NMake flow. PR9680
-    string(REPLACE "@<<\n"                " "                     TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-    string(REPLACE "\n<<"                 " "                     TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  else()
-    string(REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT})
-  endif()
+MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit)
 
-  string(REPLACE "<DEFINES>"            "${DEFS}"               TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  string(REPLACE "<FLAGS>"              "${CMAKE_CXX_FLAGS}"    TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  if (MSVC) # PR9680
-    # Eliminate MSVC equivalent of -o
-    string(REPLACE "/Fo<OBJECT>"        ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-    # Eliminate "how to rename program database" argument
-    string(REPLACE "/Fd<TARGET_PDB>"    ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  else()
-    string(REPLACE "-o"                 ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  endif(MSVC)
-  string(REGEX REPLACE "<[^>]+>"        ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
-  set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} ${IDIRS}")
-  if(NOT MSVC)
-    set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} -x c++")
-    # MSVC already has /TP to indicate a C++ source file
-  endif()
-  configure_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in
-    ${CMAKE_CURRENT_BINARY_DIR}/site.exp)
+# Configuration-time: See Unit/lit.site.cfg.in
+set(LLVM_BUILD_MODE "%(build_mode)s")
 
-  MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit)
-
-  # Configuration-time: See Unit/lit.site.cfg.in
-  set(LLVM_BUILD_MODE "%(build_mode)s")
-
-  set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR})
-  set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR})
-  set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
-  set(LLVMGCCDIR "")
-  set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE})
-  set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED})
-  set(SHLIBPATH_VAR ${SHLIBPATH_VAR})
-
-  if(LLVM_ENABLE_ASSERTIONS AND NOT MSVC_IDE)
-    set(ENABLE_ASSERTIONS "1")
-  else()
-    set(ENABLE_ASSERTIONS "0")
-  endif()
-
-  configure_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
-    ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
-    @ONLY)
-  configure_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
-    ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
-    @ONLY)
-
-  add_custom_target(check
-    COMMAND ${PYTHON_EXECUTABLE}
-                ${LLVM_SOURCE_DIR}/utils/lit/lit.py
-                --param llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
-                --param llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
-                --param build_config=${CMAKE_CFG_INTDIR}
-                --param build_mode=${RUNTIME_BUILD_MODE}
-                ${LIT_ARGS}
-                ${CMAKE_CURRENT_BINARY_DIR}
-                COMMENT "Running LLVM regression tests")
-  set_target_properties(check PROPERTIES FOLDER "Tests")
-
-  add_custom_target(check.deps)
-  add_dependencies(check check.deps)
-  add_dependencies(check.deps
-                UnitTests
-                BugpointPasses LLVMHello
-                llc lli llvm-ar llvm-as llvm-dis llvm-extract
-                llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump macho-dump opt
-                FileCheck count not)
-  set_target_properties(check.deps PROPERTIES FOLDER "Tests")
+set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR})
+set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR})
+set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
+set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE})
+set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED})
+set(SHLIBPATH_VAR ${SHLIBPATH_VAR})
 
+if(LLVM_ENABLE_ASSERTIONS AND NOT MSVC_IDE)
+  set(ENABLE_ASSERTIONS "1")
+else()
+  set(ENABLE_ASSERTIONS "0")
 endif()
+
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+  @ONLY)
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
+  ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+  @ONLY)
+
+add_custom_target(check
+  COMMAND ${PYTHON_EXECUTABLE}
+              ${LLVM_SOURCE_DIR}/utils/lit/lit.py
+              --param llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+              --param llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+              --param build_config=${CMAKE_CFG_INTDIR}
+              --param build_mode=${RUNTIME_BUILD_MODE}
+              ${LIT_ARGS}
+              ${CMAKE_CURRENT_BINARY_DIR}
+              COMMENT "Running LLVM regression tests")
+
+add_custom_target(check.deps)
+add_dependencies(check check.deps)
+add_dependencies(check.deps
+              UnitTests
+              BugpointPasses LLVMHello
+              llc lli llvm-ar llvm-as llvm-dis llvm-extract llvm-dwarfdump
+              llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump llvm-readobj
+              macho-dump opt
+              FileCheck count not json-bench)
+set_target_properties(check.deps PROPERTIES FOLDER "Tests")
diff --git a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index 3694aaad5549..0bfe33175196 100644
--- a/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6,+vfp2 | FileCheck %s
 
 @quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
 @dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 78c622237563..94c562bf0129 100644
--- a/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -11,7 +11,7 @@ bb74.i:		; preds = %bb88.i, %bb74.i, %entry
 bb88.i:		; preds = %bb74.i
 	br i1 false, label %mandel.exit, label %bb74.i
 mandel.exit:		; preds = %bb88.i
-	%tmp2 = volatile load double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
+	%tmp2 = load volatile double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
 	%tmp23 = fptosi double %tmp2 to i32		; <i32> [#uses=1]
 	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index 8bde7489d9b6..a016809857e7 100644
--- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -37,10 +37,11 @@ return:                                           ; preds = %invcont
   ret void
 
 lpad:                                             ; preds = %entry
-  %eh_ptr = call i8* @llvm.eh.exception()
+  %exn = landingpad {i8*, i32} personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           cleanup
+  %eh_ptr = extractvalue {i8*, i32} %exn, 0
   store i8* %eh_ptr, i8** %eh_exception
-  %eh_ptr1 = load i8** %eh_exception
-  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
+  %eh_select2 = extractvalue {i8*, i32} %exn, 1
   store i32 %eh_select2, i32* %eh_selector
   br label %ppad
 
@@ -94,10 +95,6 @@ declare void @_ZdlPv(i8*) nounwind
 
 declare void @_Z3barv()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gxx_personality_sj0(...)
diff --git a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 0a157c96b31f..426bd17590b7 100644
--- a/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -9,7 +9,7 @@ define void @test(double* %x, double* %y) nounwind {
   br i1 %4, label %bb1, label %bb2
 
 bb1:
-;CHECK: vstrhi.64
+;CHECK: vstrhi
   store double %1, double* %y
   br label %bb2
 
diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll
index 8bfd02697b79..eb9c2d0f7f8f 100644
--- a/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -6,7 +6,7 @@ entry:
   %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-; CHECK: vldr.64
+; CHECK: vldr
   %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   store i16 %1, i16* %out_poly16_t, align 2
diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
index 7aae3acd76e6..a8afc20bc130 100644
--- a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
+++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; RUN: llc -mcpu=cortex-a8 -mattr=-neonfp < %s | FileCheck %s
 ; PR5423
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index df9dbca313f2..0ae7f84f3ef3 100644
--- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -11,7 +11,7 @@ entry:
 
 ; THUMB:     t:
 ; THUMB-NOT: str r0, [r1], r0
-; THUMB:     str r2, [r1]
+; THUMB:     str r1, [r0]
   %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
   store i32 0, i32* inttoptr (i32 8 to i32*), align 8
   br i1 undef, label %bb.nph96, label %bb3
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index e47c03839375..e0f50c97ba52 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=linearscan
+; RUN: llc < %s -march=arm -mattr=+neon -O0 -optimize-regalloc -regalloc=basic
 
 ; This test would crash the rewriter when trying to handle a spill after one of
 ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index cd1c9c8c0421..a400b7b288ce 100644
--- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -10,28 +10,28 @@ entry:
   %4 = ashr i32 %3, 30
   %.sum = add i32 %4, 4
   %5 = getelementptr inbounds float* %table, i32 %.sum
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %6 = load float* %5, align 4
   %tmp11 = insertelement <4 x float> undef, float %6, i32 0
   %7 = shl i32 %packedValue, 18
   %8 = ashr i32 %7, 30
   %.sum12 = add i32 %8, 4
   %9 = getelementptr inbounds float* %table, i32 %.sum12
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %10 = load float* %9, align 4
   %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
   %11 = shl i32 %packedValue, 20
   %12 = ashr i32 %11, 30
   %.sum13 = add i32 %12, 4
   %13 = getelementptr inbounds float* %table, i32 %.sum13
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %14 = load float* %13, align 4
   %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
   %15 = shl i32 %packedValue, 22
   %16 = ashr i32 %15, 30
   %.sum14 = add i32 %16, 4
   %17 = getelementptr inbounds float* %table, i32 %.sum14
-;CHECK: vldr.32 s
+;CHECK: vldr s
   %18 = load float* %17, align 4
   %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
   %19 = fmul <4 x float> %tmp5, %2
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index b9d5600d2ad8..1aee5088eee4 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-darwin10"
 
 ; CHECK: vld1.64 {d16, d17}, [r{{.}}]
 ; CHECK-NOT: vld1.64 {d16, d17}
-; CHECK: vmov.f64 d19, d16
+; CHECK: vmov.f64
 
 define i32 @test(i8* %arg) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
index c03c81545946..2842437e7e42 100644
--- a/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
+++ b/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -21,12 +21,8 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare i8* @__cxa_allocate_exception(i32)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_sj0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare void @_Unwind_SjLj_Resume(i8*)
@@ -75,8 +71,11 @@ try.cont:                                         ; preds = %lpad
   ret i32 %conv
 
 lpad:                                             ; preds = %entry
-  %exn = tail call i8* @llvm.eh.exception() nounwind ; <i8*> [#uses=4]
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* bitcast (%0* @_ZTI1A to i8*), i8* null) nounwind ; <i32> [#uses=1]
+  %exn.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* bitcast (%0* @_ZTI1A to i8*)
+           catch i8* null
+  %exn = extractvalue { i8*, i32 } %exn.ptr, 0
+  %eh.selector = extractvalue { i8*, i32 } %exn.ptr, 1
   %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%0* @_ZTI1A to i8*)) nounwind ; <i32> [#uses=1]
   %3 = icmp eq i32 %eh.selector, %2               ; <i1> [#uses=1]
   br i1 %3, label %try.cont, label %eh.resume
diff --git a/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
index f57b7e676949..4b47085afd5e 100644
--- a/test/CodeGen/ARM/2010-08-04-EHCrash.ll
+++ b/test/CodeGen/ARM/2010-08-04-EHCrash.ll
@@ -34,10 +34,12 @@ return:                                           ; preds = %entry
   ret void
 
 lpad:                                             ; preds = %bb
-  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
-  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+              cleanup
+  %exn = extractvalue { i8*, i32 } %eh_ptr, 0
+  store i8* %exn, i8** %eh_exception
   %eh_ptr13 = load i8** %eh_exception             ; <i8*> [#uses=1]
-  %eh_select14 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr13, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 1)
+  %eh_select14 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select14, i32* %eh_selector
   br label %ppad
 
@@ -54,10 +56,6 @@ declare arm_apcscc void @func2()
 
 declare arm_apcscc void @_ZSt9terminatev() noreturn nounwind
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
 
 declare arm_apcscc void @func3()
diff --git a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
index 04220949027f..ec7488089556 100644
--- a/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
+++ b/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -verify-machineinstrs -spiller=standard
+; RUN: llc < %s -verify-machineinstrs -spiller=trivial
 ; RUN: llc < %s -verify-machineinstrs -spiller=inline
 ; PR8612
 ;
diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
index e3c18cefd51d..da4d15771f48 100644
--- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
+++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; RUN: llc < %s -mtriple=armv7-apple-ios   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB2
 ; rdar://8690640
 
 define i32* @t(i32* %x) nounwind {
diff --git a/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
index c65952be3c64..770ad4466aff 100644
--- a/test/CodeGen/ARM/2010-12-07-PEIBug.ll
+++ b/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -1,39 +1,15 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
 ; rdar://8728956
 
 define hidden void @foo() nounwind ssp {
 entry:
 ; CHECK: foo:
-; CHECK: push {r7, lr}
-; CHECK-NEXT: mov r7, sp
+; CHECK: mov r7, sp
 ; CHECK-NEXT: vpush {d8}
 ; CHECK-NEXT: vpush {d10, d11}
-  %tmp40 = load <4 x i8>* undef
-  %tmp41 = extractelement <4 x i8> %tmp40, i32 2
-  %conv42 = zext i8 %tmp41 to i32
-  %conv43 = sitofp i32 %conv42 to float
-  %div44 = fdiv float %conv43, 2.560000e+02
-  %vecinit45 = insertelement <4 x float> undef, float %div44, i32 2
-  %vecinit46 = insertelement <4 x float> %vecinit45, float 1.000000e+00, i32 3
-  store <4 x float> %vecinit46, <4 x float>* undef
-  br i1 undef, label %if.then105, label %if.else109
-
-if.then105:                                       ; preds = %entry
-  br label %if.end114
-
-if.else109:                                       ; preds = %entry
-  br label %if.end114
-
-if.end114:                                        ; preds = %if.else109, %if.then105
-  %call185 = call float @bar()
-  %vecinit186 = insertelement <4 x float> undef, float %call185, i32 1
-  %call189 = call float @bar()
-  %vecinit190 = insertelement <4 x float> %vecinit186, float %call189, i32 2
-  %vecinit191 = insertelement <4 x float> %vecinit190, float 1.000000e+00, i32 3
-  store <4 x float> %vecinit191, <4 x float>* undef
+  tail call void asm sideeffect "","~{d8},~{d10},~{d11}"() nounwind
 ; CHECK: vpop {d10, d11}
 ; CHECK-NEXT: vpop {d8}
-; CHECK-NEXT: pop {r7, pc}
   ret void
 }
 
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 94842124fb08..ca88eedcea60 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -3,11 +3,11 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
-@x1 = internal global i8 1
-@x2 = internal global i8 1
-@x3 = internal global i8 1
-@x4 = internal global i8 1
-@x5 = global i8 1
+@x1 = internal global i8 1, align 1
+@x2 = internal global i8 1, align 1
+@x3 = internal global i8 1, align 1
+@x4 = internal global i8 1, align 1
+@x5 = global i8 1, align 1
 
 ; Check debug info output for merged global.
 ; DW_AT_location
@@ -17,8 +17,7 @@ target triple = "thumbv7-apple-darwin10"
 ; DW_OP_constu
 ; offset
 
-;CHECK:        .ascii   "x2"                   @ DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset6
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index ccda281e901e..2faa04af8bac 100644
--- a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -disable-cgp-delete-dead-blocks -mcpu=cortex-a8 | FileCheck %s
 
 ; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4.
 ; rdar://9133587
diff --git a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
index 0b5f96251d4f..d3394b58ed93 100644
--- a/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
+++ b/test/CodeGen/ARM/2011-05-04-MultipleLandingPadSuccs.ll
@@ -35,14 +35,14 @@ for.cond.backedge:
   br label %for.cond
 
 lpad:
-  %exn = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
 lpad26:
-  %exn27 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector28 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn27, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn27 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
@@ -57,31 +57,26 @@ call8.i.i.i.noexc:
   ret void
 
 lpad44:
-  %exn45 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector46 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn45, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn45 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   invoke void @foo()
           to label %eh.resume unwind label %terminate.lpad
 
 eh.resume:
-  %exn.slot.0 = phi i8* [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
-  tail call void @_Unwind_SjLj_Resume_or_Rethrow(i8* %exn.slot.0) noreturn
-  unreachable
+  %exn.slot.0 = phi { i8*, i32 } [ %exn27, %lpad26 ], [ %exn, %lpad ], [ %exn45, %lpad44 ]
+  resume { i8*, i32 } %exn.slot.0
 
 terminate.lpad:
-  %exn51 = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector52 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn51, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* null) nounwind
+  %exn51 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           catch i8* null
   tail call void @_ZSt9terminatev() noreturn nounwind
   unreachable
 }
 
 declare void @foo()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_sj0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_Unwind_SjLj_Resume_or_Rethrow(i8*)
 
 declare void @_ZSt9terminatev()
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 7baacfe79a65..3e78c4623859 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
+target triple = "thumbv7-apple-ios"
 
 %struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
 %struct.B = type <{ i32, i16, i16 }>
diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
index 1b5b8a99c2e2..091d037356de 100644
--- a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
+++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
@@ -1,12 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
 ; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2
 
-%struct.config = type { i16, i16, i16, i16 }
-
 @prev = external global [0 x i16]
 @max_lazy_match = internal unnamed_addr global i32 0, align 4
 @read_buf = external global i32 (i8*, i32)*
 @window = external global [0 x i8]
 @lookahead = internal unnamed_addr global i32 0, align 4
-@eofile.b = internal unnamed_addr global i1 false
+@eofile.b = internal unnamed_addr global i32 0
 @ins_h = internal unnamed_addr global i32 0, align 4
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index f681c34bdaa2..f2b0c5d7d090 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -8,8 +8,7 @@
 ; DW_OP_constu
 ; offset
 
-;CHECK:        .ascii   "x2"                   @ DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset33
 ;CHECK-NEXT:        @ DW_AT_type
 ;CHECK-NEXT:        @ DW_AT_decl_file
 ;CHECK-NEXT:        @ DW_AT_decl_line
diff --git a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
index 17264ee44ae5..216057a31385 100644
--- a/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
+++ b/test/CodeGen/ARM/2011-08-25-ldmia_ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; Test that ldmia_ret preserves implicit operands for return values.
 ;
 ; This CFG is reduced from a benchmark miscompile. With current
diff --git a/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
new file mode 100644
index 000000000000..09db740b7f76
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 -mattr=+neon,+neonfp -relocation-model=pic
+
+target triple = "armv6-none-linux-gnueabi"
+
+define void @sample_test(i8* %.T0348, i16* nocapture %sourceA, i16* nocapture %destValues) {
+L.entry:
+  %0 = call i32 (...)* @get_index(i8* %.T0348, i32 0)
+  %1 = bitcast i16* %destValues to i8*
+  %2 = mul i32 %0, 6
+  %3 = getelementptr i8* %1, i32 %2
+  %4 = bitcast i8* %3 to <3 x i16>*
+  %5 = load <3 x i16>* %4, align 1
+  %6 = bitcast i16* %sourceA to i8*
+  %7 = getelementptr i8* %6, i32 %2
+  %8 = bitcast i8* %7 to <3 x i16>*
+  %9 = load <3 x i16>* %8, align 1
+  %10 = or <3 x i16> %9, %5
+  store <3 x i16> %10, <3 x i16>* %4, align 1
+  ret void
+}
+
+declare i32 @get_index(...)
diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
new file mode 100644
index 000000000000..ff049c89860d
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
@@ -0,0 +1,21 @@
+; Make sure short memsets on ARM lower to stores, even when optimizing for size.
+; RUN: llc -march=arm < %s | FileCheck %s -check-prefix=CHECK-GENERIC
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s -check-prefix=CHECK-UNALIGNED
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios5.0.0"
+
+; CHECK-GENERIC:      strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-GENERIT-NEXT: strb
+; CHECK-UNALIGNED:      strb
+; CHECK-UNALIGNED-NEXT: str 
+define void @foo(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 -1, i64 5, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
new file mode 100644
index 000000000000..42b14914814a
--- /dev/null
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
+
+; Should trigger a NEON store.
+; CHECK: vstr
+define void @f_0_12(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
+  ret void
+}
+
+; Trigger multiple NEON stores.
+; CHECK:      vstmia
+; CHECK-NEXT: vstmia
+define void @f_0_40(i8* nocapture %c) nounwind optsize {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
new file mode 100644
index 000000000000..113cbfe39620
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; PR11319
+
+@i8_res  = global <2 x i8> <i8 0, i8 0>
+@i8_src1 = global <2 x i8> <i8 1, i8 2>
+@i8_src2 = global <2 x i8> <i8 2, i8 1>
+
+define void @test_neon_vector_add_2xi8() nounwind {
+; CHECK: test_neon_vector_add_2xi8:
+  %1 = load <2 x i8>* @i8_src1
+  %2 = load <2 x i8>* @i8_src2
+  %3 = add <2 x i8> %1, %2
+  store <2 x i8> %3, <2 x i8>* @i8_res
+  ret void
+}
+
+define void @test_neon_ld_st_volatile_with_ashr_2xi8() {
+; CHECK: test_neon_ld_st_volatile_with_ashr_2xi8:
+  %1 = load volatile <2 x i8>* @i8_src1
+  %2 = load volatile <2 x i8>* @i8_src2
+  %3 = ashr <2 x i8> %1, %2
+  store volatile <2 x i8> %3, <2 x i8>* @i8_res
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
new file mode 100644
index 000000000000..2ab6a4fcc9b4
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; PR11319
+
+@src1_v2i16 = global <2 x i16> <i16 0, i16 1>
+@res_v2i16  = global <2 x i16> <i16 0, i16 0>
+
+declare <2 x i16> @foo_v2i16(<2 x i16>) nounwind
+
+define void @test_neon_call_return_v2i16() {
+; CHECK: test_neon_call_return_v2i16:
+  %1 = load <2 x i16>* @src1_v2i16
+  %2 = call <2 x i16> @foo_v2i16(<2 x i16> %1) nounwind
+  store <2 x i16> %2, <2 x i16>* @res_v2i16
+  ret void
+}
diff --git a/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
new file mode 100644
index 000000000000..719571b3d1fd
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @test1(<2 x double>* %A) {
+; CHECK: test1
+; CHECK: vcvt.s32.f64
+; CHECK: vcvt.s32.f64
+  %tmp1 = load <2 x double>* %A
+	%tmp2 = fptosi <2 x double> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @test2(<2 x double>* %A) {
+; CHECK: test2
+; CHECK: vcvt.u32.f64
+; CHECK: vcvt.u32.f64
+  %tmp1 = load <2 x double>* %A
+	%tmp2 = fptoui <2 x double> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x double> @test3(<2 x i32>* %A) {
+; CHECK: test3
+; CHECK: vcvt.f64.s32
+; CHECK: vcvt.f64.s32
+  %tmp1 = load <2 x i32>* %A
+	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x double>
+	ret <2 x double> %tmp2
+}
+
+define <2 x double> @test4(<2 x i32>* %A) {
+; CHECK: test4
+; CHECK: vcvt.f64.u32
+; CHECK: vcvt.f64.u32
+  %tmp1 = load <2 x i32>* %A
+	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x double>
+	ret <2 x double> %tmp2
+}
diff --git a/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
new file mode 100644
index 000000000000..52aa0bfaa545
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-regalloc
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; This test calls shrinkToUses with an early-clobber redefined live range during
+; spilling.
+;
+;   Shrink: %vreg47,1.158257e-02 = [384r,400e:0)[400e,420r:1)  0@384r 1@400e
+;
+; The early-clobber instruction is an str:
+;
+;   %vreg12<earlyclobber,def> = t2STR_PRE %vreg6, %vreg12, 32, pred:14, pred:%noreg
+;
+; This tests that shrinkToUses handles the EC redef correctly.
+
+%struct.Transform_Struct.0.11.12.17.43.46.56.58.60 = type { [4 x [4 x double]] }
+
+define void @Compute_Axis_Rotation_Transform(%struct.Transform_Struct.0.11.12.17.43.46.56.58.60* nocapture %transform, double* nocapture %V1, double %angle) nounwind {
+entry:
+  store double 1.000000e+00, double* null, align 4
+  %arrayidx5.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 1
+  store double 0.000000e+00, double* %arrayidx5.1.i, align 4
+  %arrayidx5.2.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 0, i32 2
+  store double 0.000000e+00, double* %arrayidx5.2.i, align 4
+  %arrayidx5.114.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 0
+  store double 0.000000e+00, double* %arrayidx5.114.i, align 4
+  %arrayidx5.1.1.i = getelementptr inbounds %struct.Transform_Struct.0.11.12.17.43.46.56.58.60* %transform, i32 0, i32 0, i32 1, i32 1
+  store double 1.000000e+00, double* %arrayidx5.1.1.i, align 4
+  store double 0.000000e+00, double* null, align 4
+  store double 1.000000e+00, double* null, align 4
+  store double 0.000000e+00, double* null, align 4
+  %call = tail call double @cos(double %angle) nounwind readnone
+  %call1 = tail call double @sin(double %angle) nounwind readnone
+  %0 = load double* %V1, align 4
+  %arrayidx2 = getelementptr inbounds double* %V1, i32 1
+  %1 = load double* %arrayidx2, align 4
+  %mul = fmul double %0, %1
+  %sub = fsub double 1.000000e+00, %call
+  %mul3 = fmul double %mul, %sub
+  %2 = load double* undef, align 4
+  %mul5 = fmul double %2, %call1
+  %add = fadd double %mul3, %mul5
+  store double %add, double* %arrayidx5.1.i, align 4
+  %3 = load double* %V1, align 4
+  %mul11 = fmul double %3, undef
+  %mul13 = fmul double %mul11, %sub
+  %4 = load double* %arrayidx2, align 4
+  %mul15 = fmul double %4, %call1
+  %sub16 = fsub double %mul13, %mul15
+  store double %sub16, double* %arrayidx5.2.i, align 4
+  %5 = load double* %V1, align 4
+  %6 = load double* %arrayidx2, align 4
+  %mul22 = fmul double %5, %6
+  %mul24 = fmul double %mul22, %sub
+  %sub27 = fsub double %mul24, undef
+  store double %sub27, double* %arrayidx5.114.i, align 4
+  ret void
+}
+
+declare double @cos(double) nounwind readnone
+
+declare double @sin(double) nounwind readnone
diff --git a/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
new file mode 100644
index 000000000000..5409f8c60887
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0.0 | FileCheck %s
+; rdar://10464621
+
+; DAG combine increases loads from packed types. ARM load / store optimizer then
+; combined them into a ldm which causes runtime exception.
+
+%struct.InformationBlock = type <{ i32, %struct.FlagBits, %struct.FlagBits }>
+%struct.FlagBits = type <{ [4 x i32] }>
+
+@infoBlock = external global %struct.InformationBlock
+
+define hidden void @foo() {
+; CHECK: foo:
+; CHECK: ldr.w
+; CHECK: ldr.w
+; CHECK-NOT: ldm
+entry:
+  %tmp13 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
+  %tmp15 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
+  %tmp17 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
+  %tmp19 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
+  %tmp = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
+  %tmp3 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
+  %tmp4 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
+  %tmp5 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
+  %insert21 = insertvalue [4 x i32] undef, i32 %tmp13, 0
+  %insert23 = insertvalue [4 x i32] %insert21, i32 %tmp15, 1
+  %insert25 = insertvalue [4 x i32] %insert23, i32 %tmp17, 2
+  %insert27 = insertvalue [4 x i32] %insert25, i32 %tmp19, 3
+  %insert = insertvalue [4 x i32] undef, i32 %tmp, 0
+  %insert7 = insertvalue [4 x i32] %insert, i32 %tmp3, 1
+  %insert9 = insertvalue [4 x i32] %insert7, i32 %tmp4, 2
+  %insert11 = insertvalue [4 x i32] %insert9, i32 %tmp5, 3
+  tail call void @bar([4 x i32] %insert27, [4 x i32] %insert11)
+  ret void
+}
+
+declare void @bar([4 x i32], [4 x i32])
diff --git a/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
new file mode 100644
index 000000000000..6fbae199aaed
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -0,0 +1,302 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+@A = global <4 x float> <float 0., float 1., float 2., float 3.>
+
+define void @test_sqrt(<4 x float>* %X) nounwind {
+
+; CHECK: test_sqrt:
+
+; CHECK:      movw    r1, :lower16:{{.*}}
+; CHECK:      movt    r1, :upper16:{{.*}}
+; CHECK:      vldmia  r1
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vsqrt.f32       {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_cos(<4 x float>* %X) nounwind {
+
+; CHECK: test_cos:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}cosf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}expf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp.v4f32(<4 x float>) nounwind readonly
+
+define void @test_exp2(<4 x float>* %X) nounwind {
+
+; CHECK: test_exp2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}exp2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.exp2.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log10(<4 x float>* %X) nounwind {
+
+; CHECK: test_log10:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log10f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log10.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log10.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log(<4 x float>* %X) nounwind {
+
+; CHECK: test_log:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}logf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log.v4f32(<4 x float>) nounwind readonly
+
+define void @test_log2(<4 x float>* %X) nounwind {
+
+; CHECK: test_log2:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}log2f
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.log2.v4f32(<4 x float>) nounwind readonly
+
+
+define void @test_pow(<4 x float>* %X) nounwind {
+
+; CHECK: test_pow:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}powf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %0, <4 x float> <float 2., float 2., float 2., float 2.>)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) nounwind readonly
+
+define void @test_powi(<4 x float>* %X) nounwind {
+
+; CHECK: test_powi:
+
+; CHECK:       movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:       movt  [[reg0]], :upper16:{{.*}}
+; CHECK:       vldmia  [[reg0]], {{.*}}
+; CHECK:       vmul.f32 {{.*}}
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.powi.v4f32(<4 x float> %0, i32 2)
+
+  store <4 x float> %1, <4 x float>* %X, align 16
+
+  ret void
+}
+
+declare <4 x float> @llvm.powi.v4f32(<4 x float>, i32) nounwind readonly
+
+define void @test_sin(<4 x float>* %X) nounwind {
+
+; CHECK: test_sin:
+
+; CHECK:      movw  [[reg0:r[0-9]+]], :lower16:{{.*}}
+; CHECK:      movt  [[reg0]], :upper16:{{.*}}
+; CHECK:      vldmia r{{[0-9][0-9]?}}, {{.*}}
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      {{[v]?mov}}  r0, {{[r|s][0-9]+}}
+; CHECK:      bl  {{.*}}sinf
+
+; CHECK:      vstmia  {{.*}}
+
+L.entry:
+  %0 = load <4 x float>* @A, align 16
+  %1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %0)
+  store <4 x float> %1, <4 x float>* %X, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.sin.v4f32(<4 x float>) nounwind readonly
+
diff --git a/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
new file mode 100644
index 000000000000..0c90f4cf949a
--- /dev/null
+++ b/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+; <rdar://problem/10497732>
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@x1 = internal global i32 1
+@x2 = internal global i64 12
+
+define i64 @f() {
+  %ax = load i32* @x1
+  %a = zext i32 %ax to i64
+  %b = load i64* @x2
+  %c = add i64 %a, %b
+  ret i64 %c
+}
+
+; We can global-merge the i64 in theory, but the current code doesn't handle
+; the alignment correctly; for the moment, just check that we don't do it.
+; See also 
+
+; CHECK-NOT: MergedGlobals
+; CHECK: _x2
+; CHECK-NOT: MergedGlobals
diff --git a/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
new file mode 100644
index 000000000000..5ce600d1a939
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; Radar 10266272
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios4.0.0"
+; STATS-NOT: machine-sink
+
+define i32 @foo(i32 %h) nounwind readonly ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %cmp = icmp slt i32 0, %h
+  br i1 %cmp, label %for.body, label %if.end299
+
+for.body:                                         ; preds = %for.cond
+  %v.5 = select i1 undef, i32 undef, i32 0
+  %0 = load i8* undef, align 1, !tbaa !0
+  %conv88 = zext i8 %0 to i32
+  %sub89 = sub nsw i32 0, %conv88
+  %v.8 = select i1 undef, i32 undef, i32 %sub89
+  %1 = load i8* null, align 1, !tbaa !0
+  %conv108 = zext i8 %1 to i32
+  %2 = load i8* undef, align 1, !tbaa !0
+  %conv110 = zext i8 %2 to i32
+  %sub111 = sub nsw i32 %conv108, %conv110
+  %cmp112 = icmp slt i32 %sub111, 0
+  %sub115 = sub nsw i32 0, %sub111
+  %v.10 = select i1 %cmp112, i32 %sub115, i32 %sub111
+  %add62 = add i32 0, %v.5
+  %add73 = add i32 %add62, 0
+  %add84 = add i32 %add73, 0
+  %add95 = add i32 %add84, %v.8
+  %add106 = add i32 %add95, 0
+  %add117 = add i32 %add106, %v.10
+  %add128 = add i32 %add117, 0
+  %add139 = add i32 %add128, 0
+  %add150 = add i32 %add139, 0
+  %add161 = add i32 %add150, 0
+  %add172 = add i32 %add161, 0
+  br i1 undef, label %for.cond, label %if.end299
+
+if.end299:                                        ; preds = %for.body, %for.cond
+  %s.10 = phi i32 [ %add172, %for.body ], [ 0, %for.cond ]
+  ret i32 %s.10
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
new file mode 100644
index 000000000000..ddb76326947c
--- /dev/null
+++ b/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Radar 10567930: Make sure that all the caller-saved registers are saved and
+; restored in a function with setjmp/longjmp EH.  In particular, r6 was not
+; being saved here.
+; CHECK: push {r4, r5, r6, r7, lr}
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+define i32 @asdf(i32 %a, i32 %b, i8** %c, i8* %d) {
+bb:
+  %tmp = alloca i32, align 4
+  %tmp1 = alloca i32, align 4
+  %tmp2 = alloca i8*, align 4
+  %tmp3 = alloca i1
+  %myException = alloca %0*, align 4
+  %tmp4 = alloca i8*
+  %tmp5 = alloca i32
+  %exception = alloca %0*, align 4
+  store i32 %a, i32* %tmp, align 4
+  store i32 %b, i32* %tmp1, align 4
+  store i8* %d, i8** %tmp2, align 4
+  store i1 false, i1* %tmp3
+  %tmp7 = load i8** %c
+  %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
+          to label %bb11 unwind label %bb15
+
+bb11:                                             ; preds = %bb
+  store %0* %tmp10, %0** %myException, align 4
+  %tmp12 = load %0** %myException, align 4
+  %tmp13 = bitcast %0* %tmp12 to i8*
+  invoke void @objc_exception_throw(i8* %tmp13) noreturn
+          to label %bb14 unwind label %bb15
+
+bb14:                                             ; preds = %bb11
+  unreachable
+
+bb15:                                             ; preds = %bb11, %bb
+  %tmp16 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  %tmp17 = extractvalue { i8*, i32 } %tmp16, 0
+  store i8* %tmp17, i8** %tmp4
+  %tmp18 = extractvalue { i8*, i32 } %tmp16, 1
+  store i32 %tmp18, i32* %tmp5
+  store i1 true, i1* %tmp3
+  br label %bb56
+
+bb56:
+  unreachable
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+declare i32 @__objc_personality_v0(...)
+declare void @objc_exception_throw(i8*)
diff --git a/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
new file mode 100644
index 000000000000..926daafbb7f1
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br label %bb3
+
+bb3:                                              ; preds = %bb4, %bb2
+  %tmp = icmp slt i32 undef, undef
+  br i1 %tmp, label %bb4, label %bb67
+
+bb4:                                              ; preds = %bb3
+  %tmp5 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
+  %tmp9 = fsub <4 x float> %tmp8, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp10 = fmul <4 x float> undef, %tmp9
+  %tmp11 = fadd <4 x float> undef, %tmp10
+  %tmp12 = bitcast <4 x float> zeroinitializer to i128
+  %tmp13 = lshr i128 %tmp12, 64
+  %tmp14 = trunc i128 %tmp13 to i64
+  %tmp15 = insertvalue [2 x i64] undef, i64 %tmp14, 1
+  %tmp16 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp11) nounwind
+  %tmp17 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp16, <4 x float> %tmp11) nounwind
+  %tmp18 = fmul <4 x float> %tmp17, %tmp16
+  %tmp19 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp18, <4 x float> %tmp11) nounwind
+  %tmp20 = fmul <4 x float> %tmp19, %tmp18
+  %tmp21 = fmul <4 x float> %tmp20, zeroinitializer
+  %tmp22 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp21, <4 x float> undef) nounwind
+  call arm_aapcs_vfpcc  void @bar(i8* null, i8* undef, <4 x i32>* undef, [2 x i64] zeroinitializer) nounwind
+  %tmp23 = bitcast <4 x float> %tmp22 to i128
+  %tmp24 = trunc i128 %tmp23 to i64
+  %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
+  %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
+  %tmp27 = load float* undef, align 4, !tbaa !2
+  %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
+  %tmp29 = load <4 x i32>* undef, align 16, !tbaa !0
+  %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
+  %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
+  %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
+  %tmp33 = fsub <4 x float> %tmp32, bitcast (i128 or (i128 shl (i128 zext (i64 trunc (i128 lshr (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128), i128 64) to i64) to i128), i128 64), i128 zext (i64 trunc (i128 bitcast (<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> to i128) to i64) to i128)) to <4 x float>)
+  %tmp34 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> undef, <4 x float> %tmp28) nounwind
+  %tmp35 = fmul <4 x float> %tmp34, undef
+  %tmp36 = fmul <4 x float> %tmp35, undef
+  %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp38 = load float* undef, align 4, !tbaa !2
+  %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
+  %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp41 = load float* undef, align 4, !tbaa !2
+  %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
+  %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp44 = fmul <4 x float> %tmp33, %tmp43
+  %tmp45 = fadd <4 x float> %tmp42, %tmp44
+  %tmp46 = fsub <4 x float> %tmp45, undef
+  %tmp47 = fmul <4 x float> %tmp46, %tmp36
+  %tmp48 = fadd <4 x float> undef, %tmp47
+  %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
+  %tmp50 = load float* undef, align 4, !tbaa !2
+  %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
+  %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
+  %tmp54 = load float* %tmp52, align 4, !tbaa !2
+  %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
+  %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
+  %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
+  %tmp58 = fmul <4 x float> undef, %tmp57
+  %tmp59 = fsub <4 x float> %tmp51, %tmp48
+  %tmp60 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp58
+  %tmp61 = fmul <4 x float> %tmp59, %tmp60
+  %tmp62 = fadd <4 x float> %tmp48, %tmp61
+  call arm_aapcs_vfpcc  void @baz(i8* undef, i8* undef, [2 x i64] %tmp26, <4 x i32>* undef)
+  %tmp63 = bitcast <4 x float> %tmp62 to i128
+  %tmp64 = lshr i128 %tmp63, 64
+  %tmp65 = trunc i128 %tmp64 to i64
+  %tmp66 = insertvalue [2 x i64] zeroinitializer, i64 %tmp65, 1
+  call arm_aapcs_vfpcc  void @quux(i8* undef, i8* undef, [2 x i64] undef, i8* undef, [2 x i64] %tmp66, i8* undef, i8* undef, [2 x i64] %tmp26, [2 x i64] %tmp15, <4 x i32>* undef)
+  br label %bb3
+
+bb67:                                             ; preds = %bb3
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, i8*, <4 x i32>*, [2 x i64])
+
+declare arm_aapcs_vfpcc void @baz(i8*, i8* nocapture, [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare arm_aapcs_vfpcc void @quux(i8*, i8*, [2 x i64], i8* nocapture, [2 x i64], i8* nocapture, i8* nocapture, [2 x i64], [2 x i64], <4 x i32>* nocapture) nounwind uwtable inlinehint align 2
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!2 = metadata !{metadata !"float", metadata !0}
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
new file mode 100644
index 000000000000..872eca34ad11
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -verify-coalescing
+; PR11841
+; PR11829
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
+define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
+bb:
+  %tmp = load <2 x float>* undef, align 8, !tbaa !0
+  %tmp2 = extractelement <2 x float> %tmp, i32 0
+  %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
+  %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
+  %tmp5 = insertelement <4 x float> %tmp4, float 0.000000e+00, i32 2
+  %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3
+  %tmp7 = extractelement <2 x float> %tmp, i32 1
+  %tmp8 = insertelement <4 x float> %tmp3, float %tmp7, i32 1
+  %tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 2
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 3
+  %tmp11 = bitcast <4 x float> %tmp6 to <2 x i64>
+  %tmp12 = shufflevector <2 x i64> %tmp11, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp13 = bitcast <1 x i64> %tmp12 to <2 x float>
+  %tmp14 = shufflevector <2 x float> %tmp13, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp15 = bitcast <4 x float> %tmp14 to <2 x i64>
+  %tmp16 = shufflevector <2 x i64> %tmp15, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp17 = bitcast <1 x i64> %tmp16 to <2 x float>
+  %tmp18 = extractelement <2 x float> %tmp17, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float %tmp18, float undef, float 0.000000e+00) nounwind
+  %tmp19 = bitcast <4 x float> %tmp10 to <2 x i64>
+  %tmp20 = shufflevector <2 x i64> %tmp19, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp21 = bitcast <1 x i64> %tmp20 to <2 x float>
+  %tmp22 = shufflevector <2 x float> %tmp21, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp23 = bitcast <4 x float> %tmp22 to <2 x i64>
+  %tmp24 = shufflevector <2 x i64> %tmp23, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp25 = bitcast <1 x i64> %tmp24 to <2 x float>
+  %tmp26 = extractelement <2 x float> %tmp25, i32 0
+  tail call arm_aapcs_vfpcc  void @bar(i8* undef, float undef, float %tmp26, float 0.000000e+00) nounwind
+  ret void
+}
+
+define arm_aapcs_vfpcc void @foo2() nounwind uwtable {
+entry:
+  br i1 undef, label %for.end, label %cond.end295
+
+cond.end295:                                      ; preds = %entry
+  %shuffle.i39.i.i1035 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i38.i.i1036 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i37.i.i1037 = shufflevector <1 x i64> %shuffle.i39.i.i1035, <1 x i64> %shuffle.i38.i.i1036, <2 x i32> <i32 0, i32 1>
+  %0 = bitcast <2 x i64> %shuffle.i37.i.i1037 to <4 x float>
+  %1 = bitcast <4 x float> undef to <2 x i64>
+  %shuffle.i36.i.i = shufflevector <2 x i64> %1, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
+  %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
+  unreachable
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
new file mode 100644
index 000000000000..ec5b2e9de7ca
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CoalescerBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -verify-coalescing
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-eabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind uwtable align 2 {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi <4 x float> [ undef, %0 ], [ %11, %1 ]
+  %3 = bitcast <4 x float> %2 to <2 x i64>
+  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> zeroinitializer
+  %5 = xor <2 x i32> zeroinitializer, <i32 -1, i32 -1>
+  %6 = bitcast <2 x i32> zeroinitializer to <2 x float>
+  %7 = shufflevector <2 x float> zeroinitializer, <2 x float> %6, <2 x i32> <i32 0, i32 2>
+  %8 = shufflevector <2 x i64> %3, <2 x i64> undef, <1 x i32> <i32 1>
+  %9 = bitcast <2 x float> %7 to <1 x i64>
+  %10 = shufflevector <1 x i64> %9, <1 x i64> %8, <2 x i32> <i32 0, i32 1>
+  %11 = bitcast <2 x i64> %10 to <4 x float>
+  br label %1
+}
diff --git a/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
new file mode 100644
index 000000000000..5f24e427c229
--- /dev/null
+++ b/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mcpu=cortex-a9 -join-liveintervals=0 -verify-machineinstrs
+; PR11765
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+; This test case exercises the MachineCopyPropagation pass by disabling the
+; RegisterCoalescer.
+
+define arm_aapcs_vfpcc void @foo(i8* %arg) nounwind uwtable align 2 {
+bb:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  unreachable
+
+bb2:                                              ; preds = %bb
+  br i1 undef, label %bb92, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %tmp = or <4 x i32> undef, undef
+  %tmp4 = bitcast <4 x i32> %tmp to <4 x float>
+  %tmp5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp4
+  %tmp6 = bitcast <4 x i32> zeroinitializer to <4 x float>
+  %tmp7 = fmul <4 x float> %tmp6, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp8 = bitcast <4 x float> %tmp7 to <2 x i64>
+  %tmp9 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float>
+  %tmp11 = shufflevector <2 x i64> %tmp8, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp12 = bitcast <1 x i64> %tmp11 to <2 x float>
+  %tmp13 = shufflevector <2 x float> %tmp10, <2 x float> %tmp12, <2 x i32> <i32 0, i32 2>
+  %tmp14 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 1, i32 2>
+  %tmp15 = bitcast <2 x float> %tmp14 to <1 x i64>
+  %tmp16 = bitcast <4 x i32> zeroinitializer to <2 x i64>
+  %tmp17 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp18 = bitcast <1 x i64> %tmp17 to <2 x i32>
+  %tmp19 = and <2 x i32> %tmp18, <i32 -1, i32 0>
+  %tmp20 = bitcast <2 x float> %tmp13 to <2 x i32>
+  %tmp21 = and <2 x i32> %tmp20, <i32 0, i32 -1>
+  %tmp22 = or <2 x i32> %tmp19, %tmp21
+  %tmp23 = bitcast <2 x i32> %tmp22 to <1 x i64>
+  %tmp24 = shufflevector <1 x i64> %tmp23, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp25 = bitcast <2 x i64> %tmp24 to <4 x float>
+  %tmp26 = shufflevector <2 x i64> %tmp16, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp27 = bitcast <1 x i64> %tmp26 to <2 x i32>
+  %tmp28 = and <2 x i32> %tmp27, <i32 -1, i32 0>
+  %tmp29 = and <2 x i32> undef, <i32 0, i32 -1>
+  %tmp30 = or <2 x i32> %tmp28, %tmp29
+  %tmp31 = bitcast <2 x i32> %tmp30 to <1 x i64>
+  %tmp32 = insertelement <4 x float> %tmp25, float 0.000000e+00, i32 3
+  %tmp33 = fmul <4 x float> undef, <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
+  %tmp34 = fadd <4 x float> %tmp33, %tmp32
+  %tmp35 = fmul <4 x float> %tmp33, zeroinitializer
+  %tmp36 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp37 = fadd <4 x float> %tmp35, zeroinitializer
+  %tmp38 = bitcast <4 x float> %tmp34 to <2 x i64>
+  %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
+  %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp42 = load <4 x float>* null, align 16, !tbaa !0
+  %tmp43 = fmul <4 x float> %tmp42, %tmp41
+  %tmp44 = load <4 x float>* undef, align 16, !tbaa !0
+  %tmp45 = fadd <4 x float> undef, %tmp43
+  %tmp46 = fadd <4 x float> undef, %tmp45
+  %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
+  %tmp48 = shufflevector <2 x i64> %tmp47, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp49 = bitcast <1 x i64> %tmp48 to <2 x float>
+  %tmp50 = shufflevector <2 x float> %tmp49, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp51 = fmul <4 x float> %tmp42, %tmp50
+  %tmp52 = fmul <4 x float> %tmp44, undef
+  %tmp53 = fadd <4 x float> %tmp52, %tmp51
+  %tmp54 = fadd <4 x float> undef, %tmp53
+  %tmp55 = bitcast <4 x float> %tmp37 to <2 x i64>
+  %tmp56 = shufflevector <2 x i64> %tmp55, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp57 = bitcast <1 x i64> %tmp56 to <2 x float>
+  %tmp58 = shufflevector <2 x float> %tmp57, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp59 = fmul <4 x float> undef, %tmp58
+  %tmp60 = fadd <4 x float> %tmp59, undef
+  %tmp61 = fadd <4 x float> %tmp60, zeroinitializer
+  %tmp62 = load void (i8*, i8*)** undef, align 4
+  call arm_aapcs_vfpcc  void %tmp62(i8* sret undef, i8* undef) nounwind
+  %tmp63 = bitcast <4 x float> %tmp46 to i128
+  %tmp64 = bitcast <4 x float> %tmp54 to i128
+  %tmp65 = bitcast <4 x float> %tmp61 to i128
+  %tmp66 = lshr i128 %tmp63, 64
+  %tmp67 = trunc i128 %tmp66 to i64
+  %tmp68 = insertvalue [8 x i64] undef, i64 %tmp67, 1
+  %tmp69 = insertvalue [8 x i64] %tmp68, i64 undef, 2
+  %tmp70 = lshr i128 %tmp64, 64
+  %tmp71 = trunc i128 %tmp70 to i64
+  %tmp72 = insertvalue [8 x i64] %tmp69, i64 %tmp71, 3
+  %tmp73 = trunc i128 %tmp65 to i64
+  %tmp74 = insertvalue [8 x i64] %tmp72, i64 %tmp73, 4
+  %tmp75 = insertvalue [8 x i64] %tmp74, i64 undef, 5
+  %tmp76 = insertvalue [8 x i64] %tmp75, i64 undef, 6
+  %tmp77 = insertvalue [8 x i64] %tmp76, i64 undef, 7
+  call arm_aapcs_vfpcc  void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
+  %tmp78 = call arm_aapcs_vfpcc  i8* null(i8* null) nounwind
+  %tmp79 = bitcast i8* %tmp78 to i512*
+  %tmp80 = load i512* %tmp79, align 16
+  %tmp81 = lshr i512 %tmp80, 128
+  %tmp82 = trunc i512 %tmp80 to i128
+  %tmp83 = trunc i512 %tmp81 to i128
+  %tmp84 = bitcast i128 %tmp83 to <4 x float>
+  %tmp85 = bitcast <4 x float> %tmp84 to <2 x i64>
+  %tmp86 = shufflevector <2 x i64> %tmp85, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp87 = bitcast <1 x i64> %tmp86 to <2 x float>
+  %tmp88 = shufflevector <2 x float> %tmp87, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp89 = fmul <4 x float> undef, %tmp88
+  %tmp90 = fadd <4 x float> %tmp89, undef
+  %tmp91 = fadd <4 x float> undef, %tmp90
+  store <4 x float> %tmp91, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+bb92:                                             ; preds = %bb2
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @bar(i8* noalias nocapture sret, [8 x i64]) nounwind uwtable inlinehint
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
new file mode 100644
index 000000000000..6c7aaad7c692
--- /dev/null
+++ b/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -0,0 +1,26 @@
+; RUN: llc -verify-coalescing < %s
+; PR11868
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { <4 x float> }
+%1 = type { <4 x float> }
+
+@foo = external global %0, align 16
+
+define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
+  %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+  %5 = extractelement <4 x float> %4, i32 0
+  %6 = extractelement <4 x float> %4, i32 1
+  %7 = extractelement <4 x float> %4, i32 2
+  %8 = insertelement <4 x float> undef, float %5, i32 0
+  %9 = insertelement <4 x float> %8, float %6, i32 1
+  %10 = insertelement <4 x float> %9, float %7, i32 2
+  %11 = insertelement <4 x float> %10, float 0.000000e+00, i32 3
+  store <4 x float> %11, <4 x float>* undef, align 16 
+  call arm_aapcs_vfpcc  void @baz(%1* undef, float 0.000000e+00) nounwind
+  ret void
+}
+
+declare arm_aapcs_vfpcc void @baz(%1*, float)
diff --git a/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll b/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll
new file mode 100644
index 000000000000..c9ea69124968
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-05-FPSCR-bug.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=arm -mcpu=cortex-a8 -verify-machineinstrs < %s
+; PR12165
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "arm-none-linux"
+
+define hidden void @_strtod_r() nounwind {
+  br i1 undef, label %1, label %2
+
+; <label>:1                                       ; preds = %0
+  br label %2
+
+; <label>:2                                       ; preds = %1, %0
+  br i1 undef, label %3, label %8
+
+; <label>:3                                       ; preds = %2
+  br i1 undef, label %4, label %7
+
+; <label>:4                                       ; preds = %3
+  %5 = call i32 @llvm.flt.rounds()
+  %6 = icmp eq i32 %5, 1
+  br i1 %6, label %8, label %7
+
+; <label>:7                                       ; preds = %4, %3
+  unreachable
+
+; <label>:8                                       ; preds = %4, %2
+  br i1 undef, label %9, label %10
+
+; <label>:9                                       ; preds = %8
+  br label %10
+
+; <label>:10                                      ; preds = %9, %8
+  ret void
+}
+
+declare i32 @llvm.flt.rounds() nounwind
diff --git a/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
new file mode 100644
index 000000000000..6206cd74d584
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
+; rdar://11035895
+
+; DAG combine incorrectly optimize (i32 vextract (v4i16 load $addr), c) to
+; (i16 load $addr+c*sizeof(i16)). It should have issued an extload instead. i.e.
+; (i32 extload $addr+c*sizeof(i16)
+define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
+entry:
+; CHECK: vst1.32
+  %0 = load <3 x i16> * %srcA, align 8
+  %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
+  store <2 x i16> %1, <2 x i16> * %dst, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
new file mode 100644
index 000000000000..0ff4f510eb3e
--- /dev/null
+++ b/test/CodeGen/ARM/2012-03-26-FoldImmBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
+
+; ARM has a peephole optimization which looks for a def / use pair. The def
+; produces a 32-bit immediate which is consumed by the use. It tries to 
+; fold the immediate by breaking it into two parts and fold them into the
+; immmediate fields of two uses. e.g
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        add     r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        add.w   r0, r0, #30146560
+;
+; However, this transformation is incorrect if the user produces a flag. e.g.
+;        movw    r2, #40885
+;        movt    r3, #46540
+;        adds    r0, r0, r3
+; =>
+;        add.w   r0, r0, #3019898880
+;        adds.w  r0, r0, #30146560
+; Note the adds.w may not set the carry flag even if the original sequence
+; would.
+;
+; rdar://11116189
+define i64 @t(i64 %aInput) nounwind {
+; CHECK: t:
+; CHECK: movs [[REG:(r[0-9]+)]], #0
+; CHECK: movt [[REG]], #46540
+; CHECK: adds r{{[0-9]+}}, r{{[0-9]+}}, [[REG]]
+  %1 = mul i64 %aInput, 1000000
+  %2 = add i64 %1, -7952618389194932224
+  ret i64 %2
+}
diff --git a/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
new file mode 100644
index 000000000000..33ad187926bf
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-02-TwoAddrInstrCrash.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s
+; PR11861
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo() nounwind align 2 {
+  br i1 undef, label %5, label %1
+
+; <label>:1                                       ; preds = %0
+  %2 = shufflevector <1 x i64> zeroinitializer, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %3 = bitcast <2 x i64> %2 to <4 x float>
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> zeroinitializer, <4 x float>* undef, align 16, !tbaa !0
+  store <4 x float> %3, <4 x float>* undef, align 16, !tbaa !0
+  %4 = insertelement <4 x float> %3, float 8.000000e+00, i32 2
+  store <4 x float> %4, <4 x float>* undef, align 16, !tbaa !0
+  unreachable
+
+; <label>:5                                       ; preds = %0
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/2012-04-10-DAGCombine.ll b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
new file mode 100644
index 000000000000..6f50f279b5de
--- /dev/null
+++ b/test/CodeGen/ARM/2012-04-10-DAGCombine.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 -enable-unsafe-fp-math
+;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+;target triple = "armv7-none-linux-gnueabi"
+
+define arm_aapcs_vfpcc void @foo(<4 x float> %arg) nounwind align 2 {
+bb4:
+  %tmp = extractelement <2 x float> undef, i32 0
+  br i1 undef, label %bb18, label %bb5
+
+bb5:                                              ; preds = %bb4
+  %tmp6 = fadd float %tmp, -1.500000e+01
+  %tmp7 = fdiv float %tmp6, 2.000000e+01
+  %tmp8 = fadd float %tmp7, 1.000000e+00
+  %tmp9 = fdiv float 1.000000e+00, %tmp8
+  %tmp10 = fsub float 1.000000e+00, %tmp9
+  %tmp11 = fmul float %tmp10, 1.000000e+01
+  %tmp12 = fadd float %tmp11, 1.500000e+01
+  %tmp13 = fdiv float %tmp12, %tmp
+  %tmp14 = insertelement <2 x float> undef, float %tmp13, i32 0
+  %tmp15 = shufflevector <2 x float> %tmp14, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp16 = fmul <4 x float> zeroinitializer, %tmp15
+  %tmp17 = fadd <4 x float> %tmp16, %arg
+  store <4 x float> %tmp17, <4 x float>* undef, align 8, !tbaa !0
+  br label %bb18
+
+bb18:                                             ; preds = %bb5, %bb4
+  ret void
+}
+
+!0 = metadata !{metadata !"omnipotent char", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll
index 95edaad47e5f..1272e8efc26b 100644
--- a/test/CodeGen/ARM/arm-returnaddr.ll
+++ b/test/CodeGen/ARM/arm-returnaddr.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-ios -regalloc=basic | FileCheck %s
 ; rdar://8015977
 ; rdar://8020118
 
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index 02ce5a14691a..8967730835a5 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
 
 define void @func(i32 %argc, i8** %argv) nounwind {
 entry:
@@ -61,7 +61,7 @@ entry:
   ; CHECK: strex
   %7 = atomicrmw min i32* %val2, i32 16 monotonic
 	store i32 %7, i32* %old
-	%neg = sub i32 0, 1		; <i32> [#uses=1]
+	%neg = sub i32 0, 1
   ; CHECK: ldrex
   ; CHECK: cmp
   ; CHECK: strex
@@ -77,5 +77,85 @@ entry:
   ; CHECK: strex
   %10 = atomicrmw max i32* %val2, i32 0 monotonic
 	store i32 %10, i32* %old
-	ret void
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %11 = atomicrmw umin i32* %val2, i32 16 monotonic
+	store i32 %11, i32* %old
+	%uneg = sub i32 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %12 = atomicrmw umin i32* %val2, i32 %uneg monotonic
+	store i32 %12, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %13 = atomicrmw umax i32* %val2, i32 1 monotonic
+	store i32 %13, i32* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %14 = atomicrmw umax i32* %val2, i32 0 monotonic
+	store i32 %14, i32* %old
+
+  ret void
+}
+
+define void @func2() nounwind {
+entry:
+  %val = alloca i16
+  %old = alloca i16
+  store i16 31, i16* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i16* %val, i16 16 monotonic
+  store i16 %0, i16* %old
+  %uneg = sub i16 0, 1
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %1 = atomicrmw umin i16* %val, i16 %uneg monotonic
+  store i16 %1, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i16* %val, i16 1 monotonic
+  store i16 %2, i16* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i16* %val, i16 0 monotonic
+  store i16 %3, i16* %old
+  ret void
+}
+
+define void @func3() nounwind {
+entry:
+  %val = alloca i8
+  %old = alloca i8
+  store i8 31, i8* %val
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %0 = atomicrmw umin i8* %val, i8 16 monotonic
+  store i8 %0, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %uneg = sub i8 0, 1
+  %1 = atomicrmw umin i8* %val, i8 %uneg monotonic
+  store i8 %1, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %2 = atomicrmw umax i8* %val, i8 1 monotonic
+  store i8 %2, i8* %old
+  ; CHECK: ldrex
+  ; CHECK: cmp
+  ; CHECK: strex
+  %3 = atomicrmw umax i8* %val, i8 0 monotonic
+  store i8 %3, i8* %old
+  ret void
 }
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 92aff7007f23..1b385ab79c4e 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -3,14 +3,48 @@
 ; dependency) when it isn't dependent on last CPSR defining instruction.
 ; rdar://8928208
 
-define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  entry:
-; CHECK: t:
-; CHECK: muls [[REG:(r[0-9]+)]], r2, r3
-; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r0, r1
-; CHECK-NEXT: muls r0, [[REG2]], [[REG]]
+; CHECK: t1:
+; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
   %0 = mul nsw i32 %a, %b
   %1 = mul nsw i32 %c, %d
   %2 = mul nsw i32 %0, %1
   ret i32 %2
 }
+
+; Avoid partial CPSR dependency via loop backedge.
+; rdar://10357570
+define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind {
+entry:
+; CHECK: t2:
+  %tobool7 = icmp eq i32* %ptr2, null
+  br i1 %tobool7, label %while.end, label %while.body
+
+while.body:
+; CHECK: while.body
+; CHECK: mul r{{[0-9]+}}
+; CHECK-NOT: muls
+  %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
+  %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
+  %0 = load i32* %ptr1.addr.09, align 4
+  %arrayidx1 = getelementptr inbounds i32* %ptr1.addr.09, i32 1
+  %1 = load i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds i32* %ptr1.addr.09, i32 2
+  %2 = load i32* %arrayidx3, align 4
+  %arrayidx4 = getelementptr inbounds i32* %ptr1.addr.09, i32 3
+  %3 = load i32* %arrayidx4, align 4
+  %add.ptr = getelementptr inbounds i32* %ptr1.addr.09, i32 4
+  %mul = mul i32 %1, %0
+  %mul5 = mul i32 %mul, %2
+  %mul6 = mul i32 %mul5, %3
+  store i32 %mul6, i32* %ptr2.addr.08, align 4
+  %incdec.ptr = getelementptr inbounds i32* %ptr2.addr.08, i32 -1
+  %tobool = icmp eq i32* %incdec.ptr, null
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index f78d9980befc..94edff5c0be5 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-apple-ios -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2D
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios5.0 | FileCheck %s -check-prefix=CHECKT2D
 
 ; Enable tailcall optimization for iOS 5.0
@@ -96,3 +96,70 @@ bb:
   tail call void @foo() nounwind
   ret void
 }
+
+; Make sure codegenprep is duplicating ret instructions to enable tail calls.
+; rdar://11140249
+define i32 @t8(i32 %x) nounwind ssp {
+entry:
+; CHECKT2D: t8:
+; CHECKT2D-NOT: push
+; CHECKT2D-NOT
+  %and = and i32 %x, 1
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+; CHECKT2D: bne.w _a
+  %call = tail call i32 @a(i32 %x) nounwind
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %and1 = and i32 %x, 2
+  %tobool2 = icmp eq i32 %and1, 0
+  br i1 %tobool2, label %if.end5, label %if.then3
+
+if.then3:                                         ; preds = %if.end
+; CHECKT2D: bne.w _b
+  %call4 = tail call i32 @b(i32 %x) nounwind
+  br label %return
+
+if.end5:                                          ; preds = %if.end
+; CHECKT2D: b.w _c
+  %call6 = tail call i32 @c(i32 %x) nounwind
+  br label %return
+
+return:                                           ; preds = %if.end5, %if.then3, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call4, %if.then3 ], [ %call6, %if.end5 ]
+  ret i32 %retval.0
+}
+
+declare i32 @a(i32)
+
+declare i32 @b(i32)
+
+declare i32 @c(i32)
+
+; PR12419
+; rdar://11195178
+; Use the correct input chain for the tailcall node or else the call to
+; _ZN9MutexLockD1Ev would be lost.
+%class.MutexLock = type { i8 }
+
+@x = external global i32, align 4
+
+define i32 @t9() nounwind {
+; CHECKT2D: t9:
+; CHECKT2D: blx __ZN9MutexLockC1Ev
+; CHECKT2D: blx __ZN9MutexLockD1Ev
+; CHECKT2D: b.w ___divsi3
+  %lock = alloca %class.MutexLock, align 1
+  %1 = call %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock* %lock)
+  %2 = load i32* @x, align 4
+  %3 = sdiv i32 1000, %2
+  %4 = call %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock* %lock)
+  ret i32 %3
+}
+
+declare %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
+
+declare %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock*) unnamed_addr nounwind align 2
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index 0f9543f0a19d..107e79a9e01e 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -26,7 +26,7 @@ define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
 ; CHECKV4: bx r{{.*}}
 BB0:
   %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
-  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
+  %t35 = load volatile i32* %5                    ; <i32> [#uses=1]
   %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
   %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
   %8 = load i32** %7                              ; <i32*> [#uses=1]
diff --git a/test/CodeGen/ARM/clz.ll b/test/CodeGen/ARM/clz.ll
index e381e0029819..5b6a584bbee8 100644
--- a/test/CodeGen/ARM/clz.ll
+++ b/test/CodeGen/ARM/clz.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
 define i32 @test(i32 %x) {
 ; CHECK: test
 ; CHECK: clz r0, r0
-        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll
index 91ef65925221..487ec690ea5d 100644
--- a/test/CodeGen/ARM/code-placement.ll
+++ b/test/CodeGen/ARM/code-placement.ll
@@ -12,9 +12,9 @@ entry:
   br i1 %0, label %bb2, label %bb
 
 bb:
-; CHECK: LBB0_2:
-; CHECK: bne LBB0_2
-; CHECK-NOT: b LBB0_2
+; CHECK: LBB0_1:
+; CHECK: bne LBB0_1
+; CHECK-NOT: b LBB0_1
 ; CHECK: bx lr
   %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
   %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
diff --git a/test/CodeGen/ARM/commute-movcc.ll b/test/CodeGen/ARM/commute-movcc.ll
new file mode 100644
index 000000000000..7316452cd617
--- /dev/null
+++ b/test/CodeGen/ARM/commute-movcc.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LLVM IR optimizers canonicalize icmp+select this way.
+; Make sure that TwoAddressInstructionPass can commute the corresponding
+; MOVCC instructions to avoid excessive copies in one of the if blocks.
+;
+; CHECK: %if.then
+; CHECK-NOT: mov
+; CHECK: movlo
+; CHECK: movlo
+; CHECK-NOT: mov
+
+; CHECK: %if.else
+; CHECK-NOT: mov
+; CHECK: movls
+; CHECK: movls
+; CHECK-NOT: mov
+
+; This is really an LSR test: Make sure that cmp is using the incremented
+; induction variable.
+; CHECK: %if.end8
+; CHECK: add{{(s|\.w)?}} [[IV:r[0-9]+]], {{.*}}#1
+; CHECK: cmp [[IV]], #
+
+define i32 @f(i32* nocapture %a, i32 %Pref) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %if.end8
+  %i.012 = phi i32 [ 0, %entry ], [ %inc, %if.end8 ]
+  %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
+  %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.012
+  %0 = load i32* %arrayidx, align 4, !tbaa !0
+  %mul = mul i32 %0, %0
+  %sub = add nsw i32 %i.012, -5
+  %cmp2 = icmp eq i32 %sub, %Pref
+  br i1 %cmp2, label %if.else, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %cmp3 = icmp ult i32 %mul, %BestCost.011
+  %i.0.BestIdx.0 = select i1 %cmp3, i32 %i.012, i32 %BestIdx.010
+  %mul.BestCost.0 = select i1 %cmp3, i32 %mul, i32 %BestCost.011
+  br label %if.end8
+
+if.else:                                          ; preds = %for.body
+  %cmp5 = icmp ugt i32 %mul, %BestCost.011
+  %BestIdx.0.i.0 = select i1 %cmp5, i32 %BestIdx.010, i32 %i.012
+  %BestCost.0.mul = select i1 %cmp5, i32 %BestCost.011, i32 %mul
+  br label %if.end8
+
+if.end8:                                          ; preds = %if.else, %if.then
+  %BestIdx.1 = phi i32 [ %i.0.BestIdx.0, %if.then ], [ %BestIdx.0.i.0, %if.else ]
+  %BestCost.1 = phi i32 [ %mul.BestCost.0, %if.then ], [ %BestCost.0.mul, %if.else ]
+  store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+  %inc = add i32 %i.012, 1
+  %cmp = icmp eq i32 %inc, 11
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %if.end8
+  ret i32 %BestIdx.1
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/cse-call.ll b/test/CodeGen/ARM/cse-call.ll
new file mode 100644
index 000000000000..eff5de5a2428
--- /dev/null
+++ b/test/CodeGen/ARM/cse-call.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=arm1136jf-s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "armv6-apple-ios0.0.0"
+
+; Don't CSE a cmp across a call that clobbers CPSR.
+;
+; CHECK: cmp
+; CHECK: S_trimzeros
+; CHECK: cmp
+; CHECK: strlen
+
+@F_floatmul.man1 = external global [200 x i8], align 1
+@F_floatmul.man2 = external global [200 x i8], align 1
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+declare void @S_trimzeros(...)
+
+define i8* @F_floatmul(i8* %f1, i8* %f2) nounwind ssp {
+entry:
+  br i1 undef, label %while.end42, label %while.body37
+
+while.body37:                                     ; preds = %while.body37, %entry
+  br i1 false, label %while.end42, label %while.body37
+
+while.end42:                                      ; preds = %while.body37, %entry
+  %. = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0)
+  %.92 = select i1 undef, i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man2, i32 0, i32 0), i8* getelementptr inbounds ([200 x i8]* @F_floatmul.man1, i32 0, i32 0)
+  tail call void bitcast (void (...)* @S_trimzeros to void (i8*)*)(i8* %.92) nounwind
+  %call47 = tail call i32 @strlen(i8* %.) nounwind
+  unreachable
+}
diff --git a/test/CodeGen/ARM/cse-libcalls.ll b/test/CodeGen/ARM/cse-libcalls.ll
index 0dcf9ddc0bb1..1d011be93c3c 100644
--- a/test/CodeGen/ARM/cse-libcalls.ll
+++ b/test/CodeGen/ARM/cse-libcalls.ll
@@ -4,7 +4,7 @@ target triple = "i386-apple-darwin8"
 
 ; Without CSE of libcalls, there are two calls in the output instead of one.
 
-define i32 @u_f_nonbon(double %lambda) nounwind {
+define double @u_f_nonbon(double %lambda) nounwind {
 entry:
 	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
 	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
@@ -26,5 +26,5 @@ bb502.loopexit.i:		; preds = %bb28.i
 	br i1 false, label %bb.nph53.i, label %bb508.i
 
 bb508.i:		; preds = %bb502.loopexit.i, %entry
-	ret i32 1
+	ret double %tmp10.i4
 }
diff --git a/test/CodeGen/ARM/ctor_order.ll b/test/CodeGen/ARM/ctor_order.ll
new file mode 100644
index 000000000000..6419292280f1
--- /dev/null
+++ b/test/CodeGen/ARM/ctor_order.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
+
+; DARWIN:      .section	__DATA,__mod_init_func,mod_init_funcs
+; DARWIN:      .long _f151
+; DARWIN-NEXT: .long _f152
+
+; ELF:      .section .ctors.65384,"aw",%progbits
+; ELF:      .long    f151
+; ELF:      .section .ctors.65383,"aw",%progbits
+; ELF:      .long    f152
+
+; GNUEABI:      .section .init_array.151,"aw",%init_array
+; GNUEABI:      .long    f151
+; GNUEABI:      .section .init_array.152,"aw",%init_array
+; GNUEABI:      .long    f152
+
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [ { i32, void ()* } { i32 151, void ()* @f151 }, { i32, void ()* } { i32 152, void ()* @f152 } ]
+
+define void @f151() {
+entry:
+        ret void
+}
+
+define void @f152() {
+entry:
+        ret void
+}
diff --git a/test/CodeGen/ARM/ctz.ll b/test/CodeGen/ARM/ctz.ll
index 1d2ced37b035..5ebca53b4692 100644
--- a/test/CodeGen/ARM/ctz.ll
+++ b/test/CodeGen/ARM/ctz.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
 ; CHECK: rbit
 ; CHECK: clz
-  %tmp = call i32 @llvm.cttz.i32( i32 %a )
+  %tmp = call i32 @llvm.cttz.i32( i32 %a, i1 true )
   ret i32 %tmp
 }
diff --git a/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
new file mode 100644
index 000000000000..18f57ea41cd8
--- /dev/null
+++ b/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -0,0 +1,30 @@
+; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+
+; CHECK: f:
+define float @f(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u16
+  ; CHECK-NOT: vand
+  %1 = load <4 x i16>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i16> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+define float @g(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  %1 = load <4 x i16>* %in
+  ; CHECK-NOT: uxth
+  %2 = extractelement <4 x i16> %1, i32 0
+  ; CHECK: vcvt.f32.u32
+  %3 = uitofp i16 %2 to float
+  ret float %3
+}
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index b0270f9b45fd..a7b44e6fe709 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -2,7 +2,7 @@
 ; Test to check argument y's debug info uses FI
 ; Radar 10048772
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.tag_s = type { i32, i32, i32 }
 
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 2c593160fd05..0ad0a15ca3d9 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -O0 < %s | FileCheck %s
-; CHECK: @DEBUG_VALUE: mydata <- [sp+#4]+#0
+; CHECK: @DEBUG_VALUE: mydata <- [sp+#{{[0-9]+}}]+#0
 ; Radar 9331779
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %0 = type opaque
 %1 = type { [4 x i32] }
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 8c9095e3a9ea..325eea00c8d6 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s - | FileCheck %s
+; RUN: llc < %s | FileCheck %s
 ; Radar 9309221
 ; Test dwarf reg no for d16
 ;CHECK: DW_OP_regx
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index e83a83d1f10a..97c9c66c58aa 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -3,13 +3,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: DW_OP_regx for Q register: D1
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece 8
 ;CHECK-NEXT: byte   8
 ;CHECK-NEXT: DW_OP_regx for Q register: D2
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_piece 8
 ;CHECK-NEXT: byte   8
 
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index 548c9bdebf02..db41143fb3b1 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -2,8 +2,7 @@
 ; Radar 9309221
 ; Test dwarf reg no for s16
 ;CHECK: DW_OP_regx for S register
-;CHECK-NEXT: byte
-;CHECK-NEXT: byte
+;CHECK-NEXT: ascii
 ;CHECK-NEXT: DW_OP_bit_piece 32 0
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index ee777cec54c8..ae7af0afad50 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -4,11 +4,11 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 target triple = "thumbv7-apple-macosx10.6.7"
 
 ;CHECK: Ldebug_loc0:
+;CHECK-NEXT:        .long   Ltmp0
 ;CHECK-NEXT:        .long   Ltmp1
-;CHECK-NEXT:        .long   Ltmp2
-;CHECK-NEXT: Lset8 = Ltmp10-Ltmp9                    @ Loc expr size
-;CHECK-NEXT:        .short  Lset8
-;CHECK-NEXT: Ltmp9:
+;CHECK-NEXT: Lset[[N:[0-9]+]] = Ltmp{{[0-9]+}}-Ltmp[[M:[0-9]+]]        @ Loc expr size
+;CHECK-NEXT:        .short  Lset[[N]]
+;CHECK-NEXT: Ltmp[[M]]:
 ;CHECK-NEXT:        .byte   144                     @ DW_OP_regx for S register
 
 define void @_Z3foov() optsize ssp {
diff --git a/test/CodeGen/ARM/dg.exp b/test/CodeGen/ARM/dg.exp
deleted file mode 100644
index 3ff359aab39b..000000000000
--- a/test/CodeGen/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/ARM/eh-resume-darwin.ll b/test/CodeGen/ARM/eh-resume-darwin.ll
index e4755085de7d..d1252f4c9867 100644
--- a/test/CodeGen/ARM/eh-resume-darwin.ll
+++ b/test/CodeGen/ARM/eh-resume-darwin.ll
@@ -3,12 +3,6 @@ target triple = "armv6-apple-macosx10.6"
 
 declare void @func()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
-declare void @llvm.eh.resume(i8*, i32)
-
 declare i32 @__gxx_personality_sj0(...)
 
 define void @test0() {
@@ -20,10 +14,9 @@ cont:
   ret void
 
 lpad:
-  %exn = call i8* @llvm.eh.exception()
-  %sel = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
-  call void @llvm.eh.resume(i8* %exn, i32 %sel) noreturn
-  unreachable
+  %exn = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+           cleanup
+  resume { i8*, i32 } %exn
 }
 
 ; CHECK: __Unwind_SjLj_Resume
diff --git a/test/CodeGen/ARM/ehabi-unwind.ll b/test/CodeGen/ARM/ehabi-unwind.ll
new file mode 100644
index 000000000000..fd7d0e63f3b8
--- /dev/null
+++ b/test/CodeGen/ARM/ehabi-unwind.ll
@@ -0,0 +1,16 @@
+; Test that the EHABI unwind instruction generator does not encounter any
+; unfamiliar instructions.
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -disable-fp-elim
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi
+; RUN: llc < %s -mtriple=thumbv7 -arm-enable-ehabi -arm-enable-ehabi-descriptors
+
+define void @_Z1fv() nounwind {
+entry:
+  ret void
+}
+
+define void @_Z1gv() nounwind {
+entry:
+  call void @_Z1fv()
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
new file mode 100644
index 000000000000..dbb634df0a1e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+%struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
+%struct.B = type { i32, [2 x [2 x [2 x %struct.A]]] }
+
+@arr = common global [2 x [2 x [2 x [2 x [2 x i32]]]]] zeroinitializer, align 4
+@A = common global [3 x [3 x %struct.A]] zeroinitializer, align 4
+@B = common global [2 x [2 x [2 x %struct.B]]] zeroinitializer, align 4
+
+define i32* @t1() nounwind {
+entry:
+; ARM: t1
+; THUMB: t1
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #124
+; THUMB: adds r0, #124
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t2() nounwind {
+entry:
+; ARM: t2
+; THUMB: t2
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
+; ARM: movw r1, #1148
+; ARM: add r0, r0, r1
+; THUMB: addw r0, r0, #1148
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t3() nounwind {
+entry:
+; ARM: t3
+; THUMB: t3
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
+; ARM: add r0, r0, #140
+; THUMB: adds r0, #140
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
+
+define i32* @t4() nounwind {
+entry:
+; ARM: t4
+; THUMB: t4
+  %addr = alloca i32*, align 4
+  store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
+; ARM-NOT: movw r{{[0-9]}}, #1060
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #132
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #36
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #24
+; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
+; ARM: movw r{{[0-9]}}, #1284
+; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284
+  %0 = load i32** %addr, align 4
+  ret i32* %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
new file mode 100644
index 000000000000..723383e04b8e
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test add with non-legal types
+
+define void @add_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: add_i1
+; THUMB: add_i1
+  %a.addr = alloca i1, align 4
+  %0 = add i1 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: add_i8
+; THUMB: add_i8
+  %a.addr = alloca i8, align 4
+  %0 = add i8 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: add_i16
+; THUMB: add_i16
+  %a.addr = alloca i16, align 4
+  %0 = add i16 %a, %b
+; ARM: add r0, r0, r1
+; THUMB: add r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test or with non-legal types
+
+define void @or_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: or_i1
+; THUMB: or_i1
+  %a.addr = alloca i1, align 4
+  %0 = or i1 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: or_i8
+; THUMB: or_i8
+  %a.addr = alloca i8, align 4
+  %0 = or i8 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: or_i16
+; THUMB: or_i16
+  %a.addr = alloca i16, align 4
+  %0 = or i16 %a, %b
+; ARM: orr r0, r0, r1
+; THUMB: orrs r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
+
+; Test sub with non-legal types
+
+define void @sub_i1(i1 %a, i1 %b) nounwind ssp {
+entry:
+; ARM: sub_i1
+; THUMB: sub_i1
+  %a.addr = alloca i1, align 4
+  %0 = sub i1 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i1 %0, i1* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+entry:
+; ARM: sub_i8
+; THUMB: sub_i8
+  %a.addr = alloca i8, align 4
+  %0 = sub i8 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i8 %0, i8* %a.addr, align 4
+  ret void
+}
+
+define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+entry:
+; ARM: sub_i16
+; THUMB: sub_i16
+  %a.addr = alloca i16, align 4
+  %0 = sub i16 %a, %b
+; ARM: sub r0, r0, r1
+; THUMB: subs r0, r0, r1
+  store i16 %0, i16* %a.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
new file mode 100644
index 000000000000..7c532d5fba38
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
+entry:
+; THUMB: t1:
+; ARM: t1:
+  %x = add i32 %a, %b  
+  br i1 1, label %if.then, label %if.else
+; THUMB-NOT: b LBB0_1
+; ARM-NOT:  b LBB0_1
+
+if.then:                                          ; preds = %entry
+  call void @foo1()
+  br label %if.end7
+
+if.else:                                          ; preds = %entry
+  br i1 0, label %if.then2, label %if.else3
+; THUMB: b LBB0_4
+; ARM:  b LBB0_4
+
+if.then2:                                         ; preds = %if.else
+  call void @foo2()
+  br label %if.end6
+
+if.else3:                                         ; preds = %if.else
+  %y = sub i32 %a, %b
+  br i1 1, label %if.then5, label %if.end
+; THUMB-NOT: b LBB0_5
+; ARM-NOT:  b LBB0_5
+
+if.then5:                                         ; preds = %if.else3
+  call void @foo1()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then5, %if.else3
+  br label %if.end6
+
+if.end6:                                          ; preds = %if.end, %if.then2
+  br label %if.end7
+
+if.end7:                                          ; preds = %if.end6, %if.then
+  ret i32 0
+}
+
+declare void @foo1()
+
+declare void @foo2()
diff --git a/test/CodeGen/ARM/fast-isel-br-phi.ll b/test/CodeGen/ARM/fast-isel-br-phi.ll
new file mode 100644
index 000000000000..a0aba694e43c
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-br-phi.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios
+
+; This test ensures HandlePHINodesInSuccessorBlocks() is able to promote basic
+; non-legal integer types (i.e., i1, i8, i16).
+
+declare void @fooi8(i8)
+declare void @fooi16(i16)
+
+define void @foo(i1 %cmp) nounwind ssp {
+entry:
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i1 [ 0, %cond.true ], [ 1, %cond.false ]
+  br i1 %cond, label %cond.true8, label %cond.false8
+
+cond.true8:                                       ; preds = %cond.end
+  br label %cond.end8
+
+cond.false8:                                      ; preds = %cond.end
+  br label %cond.end8
+
+cond.end8:                                        ; preds = %cond.false8, %cond.true8
+  %cond8 = phi i8 [ 0, %cond.true8 ], [ 1, %cond.false8 ]
+  call void @fooi8(i8 %cond8)
+  br i1 0, label %cond.true16, label %cond.false16
+
+cond.true16:                                       ; preds = %cond.end8
+  br label %cond.end16
+
+cond.false16:                                      ; preds = %cond.end8
+  br label %cond.end16
+
+cond.end16:                                        ; preds = %cond.false16, %cond.true16
+  %cond16 = phi i16 [ 0, %cond.true16 ], [ 1, %cond.false16 ]
+  call void @fooi16(i16 %cond16)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
new file mode 100644
index 000000000000..dd460b2a0361
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t0(i1 zeroext %a) nounwind {
+  %1 = zext i1 %a to i32
+  ret i32 %1
+}
+
+define i32 @t1(i8 signext %a) nounwind {
+  %1 = sext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t2(i8 zeroext %a) nounwind {
+  %1 = zext i8 %a to i32
+  ret i32 %1
+}
+
+define i32 @t3(i16 signext %a) nounwind {
+  %1 = sext i16 %a to i32
+  ret i32 %1
+}
+
+define i32 @t4(i16 zeroext %a) nounwind {
+  %1 = zext i16 %a to i32
+  ret i32 %1
+}
+
+define void @foo(i8 %a, i16 %b) nounwind {
+; ARM: foo
+; THUMB: foo
+;; Materialize i1 1
+; ARM: movw r2, #1
+;; zero-ext
+; ARM: and r2, r2, #1
+; THUMB: and r2, r2, #1
+  %1 = call i32 @t0(i1 zeroext 1)
+; ARM: sxtb	r2, r1
+; ARM: mov r0, r2
+; THUMB: sxtb	r2, r1
+; THUMB: mov r0, r2
+  %2 = call i32 @t1(i8 signext %a)
+; ARM: uxtb	r2, r1
+; ARM: mov r0, r2
+; THUMB: uxtb	r2, r1
+; THUMB: mov r0, r2
+  %3 = call i32 @t2(i8 zeroext %a)
+; ARM: sxth	r2, r1
+; ARM: mov r0, r2
+; THUMB: sxth	r2, r1
+; THUMB: mov r0, r2
+  %4 = call i32 @t3(i16 signext %b)
+; ARM: uxth	r2, r1
+; ARM: mov r0, r2
+; THUMB: uxth	r2, r1
+; THUMB: mov r0, r2
+  %5 = call i32 @t4(i16 zeroext %b)
+
+;; A few test to check materialization
+;; Note: i1 1 was materialized with t1 call
+; ARM: movw r1, #255
+%6 = call i32 @t2(i8 zeroext 255)
+; ARM: movw r1, #65535
+; THUMB: movw r1, #65535
+%7 = call i32 @t4(i16 zeroext 65535)
+  ret void
+}
+
+define void @foo2() nounwind {
+  %1 = call signext i16 @t5()
+  %2 = call zeroext i16 @t6()
+  %3 = call signext i8 @t7()
+  %4 = call zeroext i8 @t8()
+  %5 = call zeroext i1 @t9()
+  ret void
+}
+
+declare signext i16 @t5();
+declare zeroext i16 @t6();
+declare signext i8 @t7();
+declare zeroext i8 @t8();
+declare zeroext i1 @t9();
+
+define i32 @t10(i32 %argc, i8** nocapture %argv) {
+entry:
+; ARM: @t10
+; ARM: movw r0, #0
+; ARM: movw r1, #248
+; ARM: movw r2, #187
+; ARM: movw r3, #28
+; ARM: movw r9, #40
+; ARM: movw r12, #186
+; ARM: uxtb r0, r0
+; ARM: uxtb r1, r1
+; ARM: uxtb r2, r2
+; ARM: uxtb r3, r3
+; ARM: uxtb r9, r9
+; ARM: str r9, [sp]
+; ARM: uxtb r9, r12
+; ARM: str r9, [sp, #4]
+; ARM: bl _bar
+; THUMB: @t10
+; THUMB: movs r0, #0
+; THUMB: movt r0, #0
+; THUMB: movs r1, #248
+; THUMB: movt r1, #0
+; THUMB: movs r2, #187
+; THUMB: movt r2, #0
+; THUMB: movs r3, #28
+; THUMB: movt r3, #0
+; THUMB: movw r9, #40
+; THUMB: movt r9, #0
+; THUMB: movw r12, #186
+; THUMB: movt r12, #0
+; THUMB: uxtb r0, r0
+; THUMB: uxtb r1, r1
+; THUMB: uxtb r2, r2
+; THUMB: uxtb r3, r3
+; THUMB: uxtb.w r9, r9
+; THUMB: str.w r9, [sp]
+; THUMB: uxtb.w r9, r12
+; THUMB: str.w r9, [sp, #4]
+; THUMB: bl _bar
+  %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
+  ret i32 0
+}
+
+declare i32 @bar(i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext, i8 zeroext)
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
new file mode 100644
index 000000000000..660156aa48bd
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -0,0 +1,250 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1a(float %a) uwtable ssp {
+entry:
+; ARM: t1a
+; THUMB: t1a
+  %cmp = fcmp oeq float %a, 0.000000e+00
+; ARM: vcmpe.f32 s{{[0-9]+}}, #0
+; THUMB: vcmpe.f32 s{{[0-9]+}}, #0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare void @foo()
+
+; Shouldn't be able to encode -0.0 imm.
+define void @t1b(float %a) uwtable ssp {
+entry:
+; ARM: t1b
+; THUMB: t1b
+  %cmp = fcmp oeq float %a, -0.000000e+00
+; ARM: vldr
+; ARM: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+; THUMB: vldr
+; THUMB: vcmpe.f32 s{{[0-9]+}}, s{{[0-9]+}}
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t2a(double %a) uwtable ssp {
+entry:
+; ARM: t2a
+; THUMB: t2a
+  %cmp = fcmp oeq double %a, 0.000000e+00
+; ARM: vcmpe.f64 d{{[0-9]+}}, #0
+; THUMB: vcmpe.f64 d{{[0-9]+}}, #0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; Shouldn't be able to encode -0.0 imm.
+define void @t2b(double %a) uwtable ssp {
+entry:
+; ARM: t2b
+; THUMB: t2b
+  %cmp = fcmp oeq double %a, -0.000000e+00
+; ARM: vldr
+; ARM: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+; THUMB: vldr
+; THUMB: vcmpe.f64 d{{[0-9]+}}, d{{[0-9]+}}
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t4(i8 signext %a) uwtable ssp {
+entry:
+; ARM: t4
+; THUMB: t4
+  %cmp = icmp eq i8 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t5(i8 zeroext %a) uwtable ssp {
+entry:
+; ARM: t5
+; THUMB: t5
+  %cmp = icmp eq i8 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t6(i16 signext %a) uwtable ssp {
+entry:
+; ARM: t6
+; THUMB: t6
+  %cmp = icmp eq i16 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t7(i16 zeroext %a) uwtable ssp {
+entry:
+; ARM: t7
+; THUMB: t7
+  %cmp = icmp eq i16 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t8(i32 %a) uwtable ssp {
+entry:
+; ARM: t8
+; THUMB: t8
+  %cmp = icmp eq i32 %a, -1
+; ARM: cmn r{{[0-9]}}, #1
+; THUMB: cmn.w r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t9(i32 %a) uwtable ssp {
+entry:
+; ARM: t9
+; THUMB: t9
+  %cmp = icmp eq i32 %a, 1
+; ARM: cmp r{{[0-9]}}, #1
+; THUMB: cmp r{{[0-9]}}, #1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t10(i32 %a) uwtable ssp {
+entry:
+; ARM: t10
+; THUMB: t10
+  %cmp = icmp eq i32 %a, 384
+; ARM: cmp r{{[0-9]}}, #384
+; THUMB: cmp.w r{{[0-9]}}, #384
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t11(i32 %a) uwtable ssp {
+entry:
+; ARM: t11
+; THUMB: t11
+  %cmp = icmp eq i32 %a, 4096
+; ARM: cmp r{{[0-9]}}, #4096
+; THUMB: cmp.w r{{[0-9]}}, #4096
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+define void @t12(i8 %a) uwtable ssp {
+entry:
+; ARM: t12
+; THUMB: t12
+  %cmp = icmp ugt i8 %a, -113
+; ARM: cmp r{{[0-9]}}, #143
+; THUMB: cmp r{{[0-9]}}, #143
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+; rdar://11038907
+; When comparing LONG_MIN/INT_MIN use a cmp instruction.
+define void @t13() nounwind ssp {
+entry:
+; ARM: t13
+; THUMB: t13
+  %cmp = icmp slt i32 -123, -2147483648
+; ARM: cmp r{{[0-9]}}, #-2147483648
+; THUMB: cmp.w r{{[0-9]}}, #-2147483648
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
new file mode 100644
index 000000000000..686ccad029d8
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -0,0 +1,242 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Test sitofp
+
+define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: sitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: sitofp_single_i16
+; ARM: sxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i16
+; THUMB: sxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: sitofp_single_i8
+; ARM: sxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.s32 s0, s0
+; THUMB: sitofp_single_i8
+; THUMB: sxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.s32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = sitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i16
+; ARM: sxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i16
+; THUMB: sxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: sitofp_double_i8
+; ARM: sxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.s32 d16, s0
+; THUMB: sitofp_double_i8
+; THUMB: sxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.s32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = sitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test uitofp
+
+define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i32 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+entry:
+; ARM: uitofp_single_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i16 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_single_i8(i8 %a) nounwind ssp {
+entry:
+; ARM: uitofp_single_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f32.u32 s0, s0
+; THUMB: uitofp_single_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f32.u32 s0, s0
+  %b.addr = alloca float, align 4
+  %conv = uitofp i8 %a to float
+  store float %conv, float* %b.addr, align 4
+  ret void
+}
+
+define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i32
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i32
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i32 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i16
+; ARM: uxth r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i16
+; THUMB: uxth r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i16 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+entry:
+; ARM: uitofp_double_i8
+; ARM: uxtb r0, r0
+; ARM: vmov s0, r0
+; ARM: vcvt.f64.u32 d16, s0
+; THUMB: uitofp_double_i8
+; THUMB: uxtb r0, r0
+; THUMB: vmov s0, r0
+; THUMB: vcvt.f64.u32 d16, s0
+  %b.addr = alloca double, align 8
+  %conv = uitofp i8 %a to double
+  store double %conv, double* %b.addr, align 8
+  ret void
+}
+
+; Test fptosi
+
+define void @fptosi_float(float %a) nounwind ssp {
+entry:
+; ARM: fptosi_float
+; ARM: vcvt.s32.f32 s0, s0
+; THUMB: fptosi_float
+; THUMB: vcvt.s32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptosi float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptosi_double(double %a) nounwind ssp {
+entry:
+; ARM: fptosi_double
+; ARM: vcvt.s32.f64 s0, d16
+; THUMB: fptosi_double
+; THUMB: vcvt.s32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptosi double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
+
+; Test fptoui
+
+define void @fptoui_float(float %a) nounwind ssp {
+entry:
+; ARM: fptoui_float
+; ARM: vcvt.u32.f32 s0, s0
+; THUMB: fptoui_float
+; THUMB: vcvt.u32.f32 s0, s0
+  %b.addr = alloca i32, align 4
+  %conv = fptoui float %a to i32
+  store i32 %conv, i32* %b.addr, align 4
+  ret void
+}
+
+define void @fptoui_double(double %a) nounwind ssp {
+entry:
+; ARM: fptoui_double
+; ARM: vcvt.u32.f64 s0, d16
+; THUMB: fptoui_double
+; THUMB: vcvt.u32.f64 s0, d16
+  %b.addr = alloca i32, align 8
+  %conv = fptoui double %a to i32
+  store i32 %conv, i32* %b.addr, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
new file mode 100644
index 000000000000..7e147c7b4d7d
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+; Target-specific selector can't properly handle the double because it isn't
+; being passed via a register, so the materialized arguments become dead code.
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+; THUMB: main
+  call void @printArgsNoRet(i32 1, float 0x4000CCCCC0000000, i8 signext 99, double 4.100000e+00)
+; THUMB: blx _printArgsNoRet
+; THUMB-NOT: ldr
+; THUMB-NOT: vldr
+; THUMB-NOT: vmov
+; THUMB-NOT: ldr
+; THUMB-NOT: sxtb
+; THUMB: movs r0, #0
+; THUMB: movt r0, #0
+; THUMB: pop
+  ret i32 0
+}
+
+declare void @printArgsNoRet(i32 %a1, float %a2, i8 signext %a3, double %a4)
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
new file mode 100644
index 000000000000..61bd18504c5c
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+
+@a = global i8 1, align 1
+@b = global i16 2, align 2
+
+define void @t1() nounwind uwtable ssp {
+; ARM: t1
+; ARM: ldrb
+; ARM-NOT: uxtb
+; THUMB: t1
+; THUMB: ldrb
+; THUMB-NOT: uxtb
+  %1 = load i8* @a, align 1
+  call void @foo1(i8 zeroext %1)
+  ret void
+}
+
+define void @t2() nounwind uwtable ssp {
+; ARM: t2
+; ARM: ldrh
+; ARM-NOT: uxth
+; THUMB: t2
+; THUMB: ldrh
+; THUMB-NOT: uxth
+  %1 = load i16* @b, align 2
+  call void @foo2(i16 zeroext %1)
+  ret void
+}
+
+declare void @foo1(i8 zeroext)
+declare void @foo2(i16 zeroext)
+
+define i32 @t3() nounwind uwtable ssp {
+; ARM: t3
+; ARM: ldrb
+; ARM-NOT: uxtb
+; THUMB: t3
+; THUMB: ldrb
+; THUMB-NOT: uxtb
+  %1 = load i8* @a, align 1
+  %2 = zext i8 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t4() nounwind uwtable ssp {
+; ARM: t4
+; ARM: ldrh
+; ARM-NOT: uxth
+; THUMB: t4
+; THUMB: ldrh
+; THUMB-NOT: uxth
+  %1 = load i16* @b, align 2
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t5() nounwind uwtable ssp {
+; ARM: t5
+; ARM: ldrsh
+; ARM-NOT: sxth
+; THUMB: t5
+; THUMB: ldrsh
+; THUMB-NOT: sxth
+  %1 = load i16* @b, align 2
+  %2 = sext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t6() nounwind uwtable ssp {
+; ARM: t6
+; ARM: ldrsb
+; ARM-NOT: sxtb
+; THUMB: t6
+; THUMB: ldrsb
+; THUMB-NOT: sxtb
+  %1 = load i8* @a, align 2
+  %2 = sext i8 %1 to i32
+  ret i32 %2
+}
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
new file mode 100644
index 000000000000..8764bef7dab9
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
+entry:
+; ARM: icmp_i16_unsigned
+; ARM: uxth r0, r0
+; ARM: uxth r1, r1
+; ARM: cmp	r0, r1
+; THUMB: icmp_i16_unsigned
+; THUMB: uxth r0, r0
+; THUMB: uxth r1, r1
+; THUMB: cmp	r0, r1
+  %cmp = icmp ult i16 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i8_signed(i8 %a, i8 %b) nounwind {
+entry:
+; ARM: icmp_i8_signed
+; ARM: sxtb r0, r0
+; ARM: sxtb r1, r1
+; ARM: cmp r0, r1
+; THUMB: icmp_i8_signed
+; THUMB: sxtb r0, r0
+; THUMB: sxtb r1, r1
+; THUMB: cmp r0, r1
+  %cmp = icmp sgt i8 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
+define i32 @icmp_i1_unsigned(i1 %a, i1 %b) nounwind {
+entry:
+; ARM: icmp_i1_unsigned
+; ARM: and r0, r0, #1
+; ARM: and r1, r1, #1
+; ARM: cmp r0, r1
+; THUMB: icmp_i1_unsigned
+; THUMB: and r0, r0, #1
+; THUMB: and r1, r1, #1
+; THUMB: cmp r0, r1
+  %cmp = icmp ult i1 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
new file mode 100644
index 000000000000..be8035ec794d
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define void @t1(i8* %x) {
+entry:
+; ARM: t1
+; THUMB: t1
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+; ARM: bx r0
+; THUMB: mov pc, r0
+}
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
new file mode 100644
index 000000000000..e6bdfa78d49b
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -0,0 +1,110 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+@message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
+@temp = common global [60 x i8] zeroinitializer, align 1
+
+define void @t1() nounwind ssp {
+; ARM: t1
+; ARM: movw r0, :lower16:_message1
+; ARM: movt r0, :upper16:_message1
+; ARM: add r0, r0, #5
+; ARM: movw r1, #64
+; ARM: movw r2, #10
+; ARM: uxtb r1, r1
+; ARM: bl _memset
+; THUMB: t1
+; THUMB: movw r0, :lower16:_message1
+; THUMB: movt r0, :upper16:_message1
+; THUMB: adds r0, #5
+; THUMB: movs r1, #64
+; THUMB: movt r1, #0
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: uxtb r1, r1
+; THUMB: bl _memset
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t2() nounwind ssp {
+; ARM: t2
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: movw r2, #17
+; ARM: str r0, [sp]                @ 4-byte Spill
+; ARM: mov r0, r1
+; ARM: ldr r1, [sp]                @ 4-byte Reload
+; ARM: bl _memcpy
+; THUMB: t2
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: movs r2, #17
+; THUMB: movt r2, #0
+; THUMB: mov r0, r1
+; THUMB: bl _memcpy
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @t3() nounwind ssp {
+; ARM: t3
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: add r1, r0, #4
+; ARM: add r0, r0, #16
+; ARM: movw r2, #10
+; ARM: mov r0, r1
+; ARM: bl _memmove
+; THUMB: t3
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: adds r1, r0, #4
+; THUMB: adds r0, #16
+; THUMB: movs r2, #10
+; THUMB: movt r2, #0
+; THUMB: mov r0, r1
+; THUMB: bl _memmove
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+define void @t4() nounwind ssp {
+; ARM: t4
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: ldr r1, [r0, #16]
+; ARM: str r1, [r0, #4]
+; ARM: ldr r1, [r0, #20]
+; ARM: str r1, [r0, #8]
+; ARM: ldrh r1, [r0, #24]
+; ARM: strh r1, [r0, #12]
+; ARM: bx lr
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: ldr r1, [r0, #16]
+; THUMB: str r1, [r0, #4]
+; THUMB: ldr r1, [r0, #20]
+; THUMB: str r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #24]
+; THUMB: strh r1, [r0, #12]
+; THUMB: bx lr
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
new file mode 100644
index 000000000000..dfb8c53735a3
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
+
+define i32 @t1(i32* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t1
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 1
+  %0 = load i32* %add.ptr, align 4
+; ARM: ldr r{{[0-9]}}, [r0, #4]
+  ret i32 %0
+}
+
+define i32 @t2(i32* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t2
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 63
+  %0 = load i32* %add.ptr, align 4
+; ARM: ldr.w r{{[0-9]}}, [r0, #252]
+  ret i32 %0
+}
+
+define zeroext i16 @t3(i16* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t3
+  %add.ptr = getelementptr inbounds i16* %ptr, i16 1
+  %0 = load i16* %add.ptr, align 4
+; ARM: ldrh r{{[0-9]}}, [r0, #2]
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t4
+  %add.ptr = getelementptr inbounds i16* %ptr, i16 63
+  %0 = load i16* %add.ptr, align 4
+; ARM: ldrh.w r{{[0-9]}}, [r0, #126]
+  ret i16 %0
+}
+
+define zeroext i8 @t5(i8* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t5
+  %add.ptr = getelementptr inbounds i8* %ptr, i8 1
+  %0 = load i8* %add.ptr, align 4
+; ARM: ldrb r{{[0-9]}}, [r0, #1]
+  ret i8 %0
+}
+
+define zeroext i8 @t6(i8* nocapture %ptr) nounwind readonly {
+entry:
+; ARM: t6
+  %add.ptr = getelementptr inbounds i8* %ptr, i8 63
+  %0 = load i8* %add.ptr, align 4
+; ARM: ldrb.w r{{[0-9]}}, [r0, #63]
+  ret i8 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
new file mode 100644
index 000000000000..2a88678da767
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -0,0 +1,168 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t1
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0, #-4]
+  ret i32 %0
+}
+
+define i32 @t2(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t2
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0, #-252]
+  ret i32 %0
+}
+
+define i32 @t3(i32* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t3
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
+  %0 = load i32* %add.ptr, align 4, !tbaa !0
+; THUMB: ldr r{{[0-9]}}, [r0]
+  ret i32 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t4
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
+  ret i16 %0
+}
+
+define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t5
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
+  ret i16 %0
+}
+
+define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t6
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
+  %0 = load i16* %add.ptr, align 2, !tbaa !3
+; THUMB: ldrh r{{[0-9]}}, [r0]
+  ret i16 %0
+}
+
+define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t7
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
+  ret i8 %0
+}
+
+define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t8
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
+  ret i8 %0
+}
+
+define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly {
+entry:
+; THUMB: t9
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
+  %0 = load i8* %add.ptr, align 1, !tbaa !1
+; THUMB: ldrb r{{[0-9]}}, [r0]
+  ret i8 %0
+}
+
+define void @t10(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t10
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -1
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0, #-4]
+  ret void
+}
+
+define void @t11(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t11
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -63
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0, #-252]
+  ret void
+}
+
+define void @t12(i32* nocapture %ptr) nounwind {
+entry:
+; THUMB: t12
+  %add.ptr = getelementptr inbounds i32* %ptr, i32 -64
+  store i32 0, i32* %add.ptr, align 4, !tbaa !0
+; THUMB: str r{{[0-9]}}, [r0]
+  ret void
+}
+
+define void @t13(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t13
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -1
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0, #-2]
+  ret void
+}
+
+define void @t14(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t14
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -127
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0, #-254]
+  ret void
+}
+
+define void @t15(i16* nocapture %ptr) nounwind {
+entry:
+; THUMB: t15
+  %add.ptr = getelementptr inbounds i16* %ptr, i32 -128
+  store i16 0, i16* %add.ptr, align 2, !tbaa !3
+; THUMB: strh r{{[0-9]}}, [r0]
+  ret void
+}
+
+define void @t16(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t16
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -1
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0, #-1]
+  ret void
+}
+
+define void @t17(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t17
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -255
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0, #-255]
+  ret void
+}
+
+define void @t18(i8* nocapture %ptr) nounwind {
+entry:
+; THUMB: t18
+  %add.ptr = getelementptr inbounds i8* %ptr, i32 -256
+  store i8 0, i8* %add.ptr, align 1, !tbaa !1
+; THUMB: strb r{{[0-9]}}, [r0]
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"short", metadata !1}
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
new file mode 100644
index 000000000000..e8cc2b238dff
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -0,0 +1,149 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; rdar://10418009
+
+define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t1
+  %add.ptr = getelementptr inbounds i16* %a, i64 -8
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-16]
+  ret i16 %0
+}
+
+define zeroext i16 @t2(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t2
+  %add.ptr = getelementptr inbounds i16* %a, i64 -16
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-32]
+  ret i16 %0
+}
+
+define zeroext i16 @t3(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t3
+  %add.ptr = getelementptr inbounds i16* %a, i64 -127
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #-254]
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t4
+  %add.ptr = getelementptr inbounds i16* %a, i64 -128
+  %0 = load i16* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: ldrh r0, [r0]
+  ret i16 %0
+}
+
+define zeroext i16 @t5(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t5
+  %add.ptr = getelementptr inbounds i16* %a, i64 8
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #16]
+  ret i16 %0
+}
+
+define zeroext i16 @t6(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t6
+  %add.ptr = getelementptr inbounds i16* %a, i64 16
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #32]
+  ret i16 %0
+}
+
+define zeroext i16 @t7(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t7
+  %add.ptr = getelementptr inbounds i16* %a, i64 127
+  %0 = load i16* %add.ptr, align 2
+; ARM: ldrh r0, [r0, #254]
+  ret i16 %0
+}
+
+define zeroext i16 @t8(i16* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t8
+  %add.ptr = getelementptr inbounds i16* %a, i64 128
+  %0 = load i16* %add.ptr, align 2
+; ARM: add r0, r0, #256
+; ARM: ldrh r0, [r0]
+  ret i16 %0
+}
+
+define void @t9(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t9
+  %add.ptr = getelementptr inbounds i16* %a, i64 -8
+  store i16 0, i16* %add.ptr, align 2
+; ARM: strh	r1, [r0, #-16]
+  ret void
+}
+
+; mvn r1, #255
+; strh r2, [r0, r1]
+define void @t10(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t10
+  %add.ptr = getelementptr inbounds i16* %a, i64 -128
+  store i16 0, i16* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: strh r{{[1-9]}}, [r0]
+  ret void
+}
+
+define void @t11(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t11
+  %add.ptr = getelementptr inbounds i16* %a, i64 8
+  store i16 0, i16* %add.ptr, align 2
+; ARM strh r{{[1-9]}}, [r0, #16]
+  ret void
+}
+
+; mov r1, #256
+; strh r2, [r0, r1]
+define void @t12(i16* nocapture %a) nounwind uwtable ssp {
+entry:
+; ARM: t12
+  %add.ptr = getelementptr inbounds i16* %a, i64 128
+  store i16 0, i16* %add.ptr, align 2
+; ARM: add r0, r0, #256
+; ARM: strh r{{[1-9]}}, [r0]
+  ret void
+}
+
+define signext i8 @t13(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t13
+  %add.ptr = getelementptr inbounds i8* %a, i64 -8
+  %0 = load i8* %add.ptr, align 2
+; ARM: ldrsb r0, [r0, #-8]
+  ret i8 %0
+}
+
+define signext i8 @t14(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t14
+  %add.ptr = getelementptr inbounds i8* %a, i64 -255
+  %0 = load i8* %add.ptr, align 2
+; ARM: ldrsb r0, [r0, #-255]
+  ret i8 %0
+}
+
+define signext i8 @t15(i8* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+; ARM: t15
+  %add.ptr = getelementptr inbounds i8* %a, i64 -256
+  %0 = load i8* %add.ptr, align 2
+; ARM: mvn r{{[1-9]}}, #255
+; ARM: add r0, r0, r{{[1-9]}}
+; ARM: ldrsb r0, [r0]
+  ret i8 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
new file mode 100644
index 000000000000..b180e439dd6f
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; rdar://10412592
+
+; Note: The Thumb code is being generated by the target-independent selector.
+
+define void @t1() nounwind {
+entry:
+; ARM: t1
+; THUMB: t1
+; ARM: mvn r0, #0
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #65535
+  call void @foo(i32 -1)
+  ret void
+}
+
+declare void @foo(i32)
+
+define void @t2() nounwind {
+entry:
+; ARM: t2
+; THUMB: t2
+; ARM: mvn r0, #233
+; THUMB: movw r0, #65302
+; THUMB: movt r0, #65535
+  call void @foo(i32 -234)
+  ret void
+}
+
+define void @t3() nounwind {
+entry:
+; ARM: t3
+; THUMB: t3
+; ARM: mvn	r0, #256
+; THUMB: movw r0, #65279
+; THUMB: movt r0, #65535
+  call void @foo(i32 -257)
+  ret void
+}
+
+; Load from constant pool
+define void @t4() nounwind {
+entry:
+; ARM: t4
+; THUMB: t4
+; ARM: ldr	r0
+; THUMB: movw r0, #65278
+; THUMB: movt r0, #65535
+  call void @foo(i32 -258)
+  ret void
+}
+
+define void @t5() nounwind {
+entry:
+; ARM: t5
+; THUMB: t5
+; ARM: mvn r0, #65280
+; THUMB: movs r0, #255
+; THUMB: movt r0, #65535
+  call void @foo(i32 -65281)
+  ret void
+}
+
+define void @t6() nounwind {
+entry:
+; ARM: t6
+; THUMB: t6
+; ARM: mvn r0, #978944
+; THUMB: movw r0, #4095
+; THUMB: movt r0, #65521
+  call void @foo(i32 -978945)
+  ret void
+}
+
+define void @t7() nounwind {
+entry:
+; ARM: t7
+; THUMB: t7
+; ARM: mvn r0, #267386880
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #61455
+  call void @foo(i32 -267386881)
+  ret void
+}
+
+define void @t8() nounwind {
+entry:
+; ARM: t8
+; THUMB: t8
+; ARM: mvn r0, #65280
+; THUMB: movs r0, #255
+; THUMB: movt r0, #65535
+  call void @foo(i32 -65281)
+  ret void
+}
+
+define void @t9() nounwind {
+entry:
+; ARM: t9
+; THUMB: t9
+; ARM: mvn r0, #2130706432
+; THUMB: movw r0, #65535
+; THUMB: movt r0, #33023
+  call void @foo(i32 -2130706433)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 08dcc64c9c84..e50c3a4954e1 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=linearscan < %s
+; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
new file mode 100644
index 000000000000..689b169ee32f
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
+
+; Sign-extend of i1 currently not supported by fast-isel
+;define signext i1 @ret0(i1 signext %a) nounwind uwtable ssp {
+;entry:
+;  ret i1 %a
+;}
+
+define zeroext i1 @ret1(i1 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret1
+; CHECK: and r0, r0, #1
+; CHECK: bx lr
+  ret i1 %a
+}
+
+define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret2
+; CHECK: sxtb r0, r0
+; CHECK: bx lr
+  ret i8 %a
+}
+
+define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret3
+; CHECK: uxtb r0, r0
+; CHECK: bx lr
+  ret i8 %a
+}
+
+define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret4
+; CHECK: sxth r0, r0
+; CHECK: bx lr
+  ret i16 %a
+}
+
+define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret5
+; CHECK: uxth r0, r0
+; CHECK: bx lr
+  ret i16 %a
+}
+
+define i16 @ret6(i16 %a) nounwind uwtable ssp {
+entry:
+; CHECK: ret6
+; CHECK-NOT: uxth
+; CHECK-NOT: sxth
+; CHECK: bx lr
+  ret i16 %a
+}
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
new file mode 100644
index 000000000000..b83a73366948
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @t1(i1 %c) nounwind readnone {
+entry:
+; ARM: t1
+; ARM: movw r{{[1-9]}}, #10
+; ARM: cmp r0, #0
+; ARM: moveq r{{[1-9]}}, #20
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t1
+; THUMB: movs r{{[1-9]}}, #10
+; THUMB: movt r{{[1-9]}}, #0
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: moveq r{{[1-9]}}, #20
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 10, i32 20
+  ret i32 %0
+}
+
+define i32 @t2(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t2
+; ARM: cmp r0, #0
+; ARM: moveq r{{[1-9]}}, #20
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t2
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: moveq r{{[1-9]}}, #20
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 20
+  ret i32 %0
+}
+
+define i32 @t3(i1 %c, i32 %a, i32 %b) nounwind readnone {
+entry:
+; ARM: t3
+; ARM: cmp r0, #0
+; ARM: movne r{{[1-9]}}, r{{[1-9]}}
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t3
+; THUMB: cmp r0, #0
+; THUMB: it ne
+; THUMB: movne r{{[1-9]}}, r{{[1-9]}}
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 %b
+  ret i32 %0
+}
+
+define i32 @t4(i1 %c) nounwind readnone {
+entry:
+; ARM: t4
+; ARM: mvn r{{[1-9]}}, #9
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #0
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t4
+; THUMB: movw r{{[1-9]}}, #65526
+; THUMB: movt r{{[1-9]}}, #65535
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #0
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 -10, i32 -1
+  ret i32 %0
+}
+
+define i32 @t5(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t5
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #1
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t5
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #1
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 -2
+  ret i32 %0
+}
+
+; Check one large negative immediates.
+define i32 @t6(i1 %c, i32 %a) nounwind readnone {
+entry:
+; ARM: t6
+; ARM: cmp r0, #0
+; ARM: mvneq r{{[1-9]}}, #978944
+; ARM: mov r0, r{{[1-9]}}
+; THUMB: t6
+; THUMB: cmp r0, #0
+; THUMB: it eq
+; THUMB: mvneq r{{[1-9]}}, #978944
+; THUMB: mov r0, r{{[1-9]}}
+  %0 = select i1 %c, i32 %a, i32 -978945
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 465e85f9a832..417e2d9e410a 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Very basic fast-isel functionality.
 define i32 @add(i32 %a, i32 %b) nounwind {
@@ -142,19 +142,87 @@ define void @test4() {
   store i32 %b, i32* @test4g
   ret void
 
-; THUMB: ldr.n r0, LCPI4_1
+; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
-; THUMB: ldr r0, [r0]
-; THUMB: adds r0, #1
-; THUMB: ldr.n r1, LCPI4_0
-; THUMB: ldr r1, [r1]
-; THUMB: str r0, [r1]
+; THUMB: ldr r1, [r0]
+; THUMB: adds r1, #1
+; THUMB: str r1, [r0]
 
-; ARM: ldr r0, LCPI4_1
-; ARM: ldr r0, [r0]
+; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
+; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
 ; ARM: ldr r0, [r0]
-; ARM: add r0, r0, #1
-; ARM: ldr r1, LCPI4_0
-; ARM: ldr r1, [r1]
-; ARM: str r0, [r1]
+; ARM: ldr r1, [r0]
+; ARM: add r1, r1, #1
+; ARM: str r1, [r0]
+}
+
+; Check unaligned stores
+%struct.anon = type <{ float }>
+
+@a = common global %struct.anon* null, align 4
+
+define void @unaligned_store(float %x, float %y) nounwind {
+entry:
+; ARM: @unaligned_store
+; ARM: vmov r1, s0
+; ARM: str r1, [r0]
+
+; THUMB: @unaligned_store
+; THUMB: vmov r1, s0
+; THUMB: str r1, [r0]
+
+  %add = fadd float %x, %y
+  %0 = load %struct.anon** @a, align 4
+  %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
+  store float %add, float* %x1, align 1
+  ret void
+}
+
+; Doublewords require only word-alignment.
+; rdar://10528060
+%struct.anon.0 = type { double }
+
+@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
+
+define void @test5(double %a, double %b) nounwind {
+entry:
+; ARM: @test5
+; THUMB: @test5
+  %add = fadd double %a, %b
+  store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
+; ARM: vstr d16, [r0]
+; THUMB: vstr d16, [r0]
+  ret void
+}
+
+; Check unaligned loads of floats
+%class.TAlignTest = type <{ i16, float }>
+
+define zeroext i1 @test6(%class.TAlignTest* %this) nounwind align 2 {
+entry:
+; ARM: @test6
+; THUMB: @test6
+  %0 = alloca %class.TAlignTest*, align 4
+  store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
+  %1 = load %class.TAlignTest** %0
+  %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
+  %3 = load float* %2, align 1
+  %4 = fcmp une float %3, 0.000000e+00
+; ARM: ldr r0, [r0, #2]
+; ARM: vmov s0, r0
+; ARM: vcmpe.f32 s0, #0
+; THUMB: ldr.w r0, [r0, #2]
+; THUMB: vmov s0, r0
+; THUMB: vcmpe.f32 s0, #0
+  ret i1 %4
+}
+
+; ARM: @urem_fold
+; THUMB: @urem_fold
+; ARM: and r0, r0, #31
+; THUMB: and r0, r0, #31
+define i32 @urem_fold(i32 %a) nounwind {
+  %rem = urem i32 %a, 32
+  ret i32 %rem
 }
diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll
index c4dbeb9ee50f..27fa2b093d89 100644
--- a/test/CodeGen/ARM/fcopysign.ll
+++ b/test/CodeGen/ARM/fcopysign.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
-; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
 
 ; rdar://8984306
 define float @test1(float %x, float %y) nounwind {
@@ -40,28 +40,12 @@ entry:
   ret double %1
 }
 
-; rdar://9059537
-define i32 @test4() ssp {
-entry:
-; SOFT: test4:
-; SOFT: vmov.f64 [[REG4:(d[0-9]+)]], #1.000000e+00
-; This S-reg must be the first sub-reg of the last D-reg on vbsl.
-; SOFT: vcvt.f32.f64 {{s1?[02468]}}, [[REG4]]
-; SOFT: vshr.u64 [[REG4]], [[REG4]], #32
-; SOFT: vmov.i32 [[REG5:(d[0-9]+)]], #0x80000000
-; SOFT: vbsl [[REG5]], [[REG4]], {{d[0-9]+}}
-  %call80 = tail call double @copysign(double 1.000000e+00, double undef)
-  %conv81 = fptrunc double %call80 to float
-  %tmp88 = bitcast float %conv81 to i32
-  ret i32 %tmp88
-}
-
 ; rdar://9287902
-define float @test5() nounwind {
+define float @test4() nounwind {
 entry:
-; SOFT: test5:
-; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
+; SOFT: test4:
 ; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
+; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
 ; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
 ; SOFT: vbsl [[REG6]], [[REG7]], 
   %0 = tail call double (...)* @bar() nounwind
diff --git a/test/CodeGen/ARM/fold-const.ll b/test/CodeGen/ARM/fold-const.ll
index 227e4e8aaa24..1ba561dd70b0 100644
--- a/test/CodeGen/ARM/fold-const.ll
+++ b/test/CodeGen/ARM/fold-const.ll
@@ -3,7 +3,7 @@
 define i32 @f(i32 %a) nounwind readnone optsize ssp {
 entry:
   %conv = zext i32 %a to i64
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv, i1 true)
 ; CHECK: clz
 ; CHECK-NOT: adds
   %cast = trunc i64 %tmp1 to i32
@@ -11,4 +11,4 @@ entry:
   ret i32 %sub
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index ac023d19ec35..93601cf9d6c9 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -42,7 +42,7 @@ entry:
 
 define double @h(double* %v) {
 ;CHECK: h:
-;CHECK: vldr.64
+;CHECK: vldr
 ;CHECK-NEXT: vmov
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll
index 7c0dd0e12a79..2d8f7108e0ec 100644
--- a/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/test/CodeGen/ARM/fpcmp-opt.ll
@@ -1,24 +1,16 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck %s
 ; rdar://7461510
+; rdar://10964603
 
+; Disable this optimization unless we know one of them is zero.
 define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t1:
-; FINITE-NOT: vldr
-; FINITE: ldr
-; FINITE: ldr
-; FINITE: cmp r0, r1
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: beq
-
-; NAN: t1:
-; NAN: vldr.32 s0,
-; NAN: vldr.32 s1,
-; NAN: vcmpe.f32 s1, s0
-; NAN: vmrs apsr_nzcv, fpscr
-; NAN: beq
+; CHECK: t1:
+; CHECK: vldr [[S0:s[0-9]+]],
+; CHECK: vldr [[S1:s[0-9]+]],
+; CHECK: vcmpe.f32 [[S1]], [[S0]]
+; CHECK: vmrs APSR_nzcv, fpscr
+; CHECK: beq
   %0 = load float* %a
   %1 = load float* %b
   %2 = fcmp une float %0, %1
@@ -33,17 +25,21 @@ bb2:
   ret i32 %4
 }
 
+; If one side is zero, the other size sign bit is masked off to allow
+; +0.0 == -0.0
 define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
 entry:
-; FINITE: t2:
-; FINITE-NOT: vldr
-; FINITE: ldrd r0, r1, [r0]
-; FINITE-NOT: b LBB
-; FINITE: cmp r0, #0
-; FINITE: cmpeq r1, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t2:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG1:(r[0-9]+)]], [r0]
+; CHECK: ldr [[REG2:(r[0-9]+)]], [r0, #4]
+; CHECK-NOT: b LBB
+; CHECK: cmp [[REG1]], #0
+; CHECK: bfc [[REG2]], #31, #1
+; CHECK: cmpeq [[REG2]], #0
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load double* %a
   %1 = fcmp oeq double %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
@@ -59,13 +55,14 @@ bb2:
 
 define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
 entry:
-; FINITE: t3:
-; FINITE-NOT: vldr
-; FINITE: ldr r0, [r0]
-; FINITE: cmp r0, #0
-; FINITE-NOT: vcmpe.f32
-; FINITE-NOT: vmrs
-; FINITE: bne
+; CHECK: t3:
+; CHECK-NOT: vldr
+; CHECK: ldr [[REG3:(r[0-9]+)]], [r0]
+; CHECK: mvn [[REG4:(r[0-9]+)]], #-2147483648
+; CHECK: tst [[REG3]], [[REG4]]
+; CHECK-NOT: vcmpe.f32
+; CHECK-NOT: vmrs
+; CHECK: bne
   %0 = load float* %a
   %1 = fcmp oeq float %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
diff --git a/test/CodeGen/ARM/fpcmp_ueq.ll b/test/CodeGen/ARM/fpcmp_ueq.ll
index 2e6b3e3167ae..4a4c5b1c8b05 100644
--- a/test/CodeGen/ARM/fpcmp_ueq.ll
+++ b/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -5,7 +5,7 @@ define i32 @f7(float %a, float %b) {
 entry:
 ; CHECK: f7:
 ; CHECK: vcmpe.f32
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: movweq
 ; CHECK-NOT: vmrs
 ; CHECK: movwvs
diff --git a/test/CodeGen/ARM/fpmem.ll b/test/CodeGen/ARM/fpmem.ll
index 38339334b445..8faa57896a8d 100644
--- a/test/CodeGen/ARM/fpmem.ll
+++ b/test/CodeGen/ARM/fpmem.ll
@@ -8,7 +8,7 @@ define float @f1(float %a) {
 
 define float @f2(float* %v, float %u) {
 ; CHECK: f2:
-; CHECK: vldr.32{{.*}}[
+; CHECK: vldr{{.*}}[
         %tmp = load float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
@@ -16,7 +16,7 @@ define float @f2(float* %v, float %u) {
 
 define float @f2offset(float* %v, float %u) {
 ; CHECK: f2offset:
-; CHECK: vldr.32{{.*}}, #4]
+; CHECK: vldr{{.*}}, #4]
         %addr = getelementptr float* %v, i32 1
         %tmp = load float* %addr
         %tmp1 = fadd float %tmp, %u
@@ -25,7 +25,7 @@ define float @f2offset(float* %v, float %u) {
 
 define float @f2noffset(float* %v, float %u) {
 ; CHECK: f2noffset:
-; CHECK: vldr.32{{.*}}, #-4]
+; CHECK: vldr{{.*}}, #-4]
         %addr = getelementptr float* %v, i32 -1
         %tmp = load float* %addr
         %tmp1 = fadd float %tmp, %u
@@ -34,7 +34,7 @@ define float @f2noffset(float* %v, float %u) {
 
 define void @f3(float %a, float %b, float* %v) {
 ; CHECK: f3:
-; CHECK: vstr.32{{.*}}[
+; CHECK: vstr{{.*}}[
         %tmp = fadd float %a, %b         ; <float> [#uses=1]
         store float %tmp, float* %v
         ret void
diff --git a/test/CodeGen/ARM/fusedMAC.ll b/test/CodeGen/ARM/fusedMAC.ll
new file mode 100644
index 000000000000..a8b3999d2bf5
--- /dev/null
+++ b/test/CodeGen/ARM/fusedMAC.ll
@@ -0,0 +1,185 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+vfp4 | FileCheck %s
+; Check generated fused MAC and MLS.
+
+define double @fusedMACTest1(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest1:
+;CHECK: vfma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fadd double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest2(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest2:
+;CHECK: vfma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fadd float %1, %f3
+  ret float %2
+}
+
+define double @fusedMACTest3(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest3:
+;CHECK: vfms.f64
+  %1 = fmul double %d2, %d3
+  %2 = fsub double %d1, %1
+  ret double %2
+}
+
+define float @fusedMACTest4(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest4:
+;CHECK: vfms.f32
+  %1 = fmul float %f2, %f3
+  %2 = fsub float %f1, %1
+  ret float %2
+}
+
+define double @fusedMACTest5(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest5:
+;CHECK: vfnma.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double -0.0, %1
+  %3 = fsub double %2, %d3
+  ret double %3
+}
+
+define float @fusedMACTest6(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest6:
+;CHECK: vfnma.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float -0.0, %1
+  %3 = fsub float %2, %f3
+  ret float %3
+}
+
+define double @fusedMACTest7(double %d1, double %d2, double %d3) {
+;CHECK: fusedMACTest7:
+;CHECK: vfnms.f64
+  %1 = fmul double %d1, %d2
+  %2 = fsub double %1, %d3
+  ret double %2
+}
+
+define float @fusedMACTest8(float %f1, float %f2, float %f3) {
+;CHECK: fusedMACTest8:
+;CHECK: vfnms.f32
+  %1 = fmul float %f1, %f2
+  %2 = fsub float %1, %f3
+  ret float %2
+}
+
+define <2 x float> @fusedMACTest9(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest9:
+;CHECK: vfma.f32
+  %mul = fmul <2 x float> %a, %b
+  %add = fadd <2 x float> %mul, %a
+  ret <2 x float> %add
+}
+
+define <2 x float> @fusedMACTest10(<2 x float> %a, <2 x float> %b) {
+;CHECK: fusedMACTest10:
+;CHECK: vfms.f32
+  %mul = fmul <2 x float> %a, %b
+  %sub = fsub <2 x float> %a, %mul
+  ret <2 x float> %sub
+}
+
+define <4 x float> @fusedMACTest11(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest11:
+;CHECK: vfma.f32
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %a
+  ret <4 x float> %add
+}
+
+define <4 x float> @fusedMACTest12(<4 x float> %a, <4 x float> %b) {
+;CHECK: fusedMACTest12:
+;CHECK: vfms.f32
+  %mul = fmul <4 x float> %a, %b
+  %sub = fsub <4 x float> %a, %mul
+  ret <4 x float> %sub
+}
+
+define float @test_fma_f32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_f32
+; CHECK: vfma.f32
+  %tmp1 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  ret float %tmp1
+}
+
+define double @test_fma_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_f64
+; CHECK: vfma.f64
+  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  ret double %tmp1
+}
+
+define <2 x float> @test_fma_v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fma_v2f32
+; CHECK: vfma.f32
+  %tmp1 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
+  ret <2 x float> %tmp1
+}
+
+define double @test_fms_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fms_f64
+; CHECK: vfms.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
+  ret double %tmp2
+}
+
+define double @test_fms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fms_f64_2
+; CHECK: vfms.f64
+  %tmp1 = fsub double -0.0, %b
+  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
+}
+
+define double @test_fnms_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnms_f64
+; CHECK: vfnms.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %c) nounwind readnone
+  %tmp3 = fsub double -0.0, %tmp2
+  ret double %tmp3
+}
+
+define double @test_fnms_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnms_f64_2
+; CHECK: vfnms.f64
+  %tmp1 = fsub double -0.0, %b
+  %tmp2 = tail call double @llvm.fma.f64(double %a, double %tmp1, double %c) nounwind readnone
+  ret double %tmp2
+}
+
+define double @test_fnma_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnma_f64
+; CHECK: vfnma.f64
+  %tmp1 = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  %tmp2 = fsub double -0.0, %tmp1
+  ret double %tmp2
+}
+
+define double @test_fnma_f64_2(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+; CHECK: test_fnma_f64_2
+; CHECK: vfnma.f64
+  %tmp1 = fsub double -0.0, %a
+  %tmp2 = fsub double -0.0, %c
+  %tmp3 = tail call double @llvm.fma.f64(double %tmp1, double %b, double %tmp2) nounwind readnone
+  ret double %tmp3
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/global-merge.ll b/test/CodeGen/ARM/global-merge.ll
index 28bf2214740a..1732df3fa5ef 100644
--- a/test/CodeGen/ARM/global-merge.ll
+++ b/test/CodeGen/ARM/global-merge.ll
@@ -14,7 +14,7 @@
 ; offset.  Having the starting offset in range is not sufficient.
 ; When this works properly, @g3 is placed in a separate chunk of merged globals.
 ; CHECK: _MergedGlobals1:
-@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ]
+@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ], align 4
 
 ; Global variables that can be placed in BSS should be kept together in a
 ; separate pool of merged globals.
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index 5e7e3f2a92eb..eb71149d83a9 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -70,6 +70,5 @@ define i32 @test1() {
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_0:
 ; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0_0+8)
-; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_1:
 ; LinuxPIC:     .long	G(GOT)
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index 9f46ae078d83..893b4266ac3c 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1
 ; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 2
-; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=armv6-apple-ios | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index b073a05fc9c5..cd870bb5d4b2 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,15 +1,17 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
-; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b) {
+; CHECK: t1:
 	%tmp2 = icmp eq i32 %a, 0
 	br i1 %tmp2, label %cond_false, label %cond_true
 
 cond_true:
+; CHECK: subeq r0, r1, #1
 	%tmp5 = add i32 %b, 1
 	ret i32 %tmp5
 
 cond_false:
+; CHECK: addne r0, r1, #1
 	%tmp7 = add i32 %b, -1
 	ret i32 %tmp7
 }
diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll
index 18f87bfc2e71..a5082d836587 100644
--- a/test/CodeGen/ARM/ifcvt10.ll
+++ b/test/CodeGen/ARM/ifcvt10.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a9 | FileCheck %s
 ; rdar://8402126
 ; Make sure if-converter is not predicating vldmia and ldmia. These are
 ; micro-coded and would have long issue latency even if predicated on
diff --git a/test/CodeGen/ARM/ifcvt11.ll b/test/CodeGen/ARM/ifcvt11.ll
index 63f8557d555b..0f142eef7a3c 100644
--- a/test/CodeGen/ARM/ifcvt11.ll
+++ b/test/CodeGen/ARM/ifcvt11.ll
@@ -18,7 +18,7 @@ bb.nph:                                           ; preds = %entry
 
 bb:                                               ; preds = %bb4, %bb.nph
 ; CHECK: vcmpe.f64
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
   %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
   %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
   %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
@@ -33,7 +33,7 @@ bb1:                                              ; preds = %bb
 ; CHECK-NOT: vcmpemi
 ; CHECK-NOT: vmrsmi
 ; CHECK: vcmpe.f64
-; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: vmrs APSR_nzcv, fpscr
   %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
   %6 = load double* %scevgep12, align 4
   %7 = fcmp uge double %3, %6
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index 3e2c578dd062..eef4de050b35 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,14 +1,19 @@
-; RUN: llc < %s -march=arm -mattr=+v4t
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1
 ; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: cmp r2, #1
+; CHECK: cmpne r2, #7
 	switch i32 %c, label %cond_next [
 		 i32 1, label %cond_true
 		 i32 7, label %cond_true
 	]
 
 cond_true:
+; CHECK: addne r0
+; CHECK: bxne
 	%tmp12 = add i32 %a, 1
 	%tmp1518 = add i32 %tmp12, %b
 	ret i32 %tmp1518
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 3615055f8b29..95f5c97f2a9a 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 @x = external global i32*		; <i32**> [#uses=1]
 
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 232765768550..a00dedaee670 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index cb5243c9062b..2fcc45f4af9c 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -110,3 +110,13 @@ entry:
   call void asm "str $1, $0", "=*Q,r"(i8** %f.addr, i32 %g) nounwind
   ret void
 }
+
+; Radar 10551006
+
+define <4 x i32> @t11(i32* %p) nounwind {
+entry:
+; CHECK: t11
+; CHECK: vld1.s32 {d16[], d17[]}, [r0]
+  %0 = tail call <4 x i32> asm "vld1.s32 {${0:e}[], ${0:f}[]}, [$1]", "=w,r"(i32* %p) nounwind
+  ret <4 x i32> %0
+}
diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll
index 1d323228cd1f..d188fae70340 100644
--- a/test/CodeGen/ARM/insn-sched1.ll
+++ b/test/CodeGen/ARM/insn-sched1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=arm -mattr=+v6
-; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+v6 |\
 ; RUN:   grep mov | count 3
 
 define i32 @test(i32 %x) {
diff --git a/test/CodeGen/ARM/ldrd-memoper.ll b/test/CodeGen/ARM/ldrd-memoper.ll
new file mode 100644
index 000000000000..f1a1121f6aa4
--- /dev/null
+++ b/test/CodeGen/ARM/ldrd-memoper.ll
@@ -0,0 +1,15 @@
+; RUN: llc %s -o /dev/null -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -debug-only=arm-ldst-opt 2> %t
+; RUN: FileCheck %s < %t
+; REQUIRES: asserts
+; PR8113: ARMLoadStoreOptimizer must preserve memoperands.
+
+@b = external global i64*
+
+; CHECK: Formed {{.*}} t2LDRD{{.*}} mem:LD4[%0] LD4[%0+4]
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 8010f20689be..3f8fd75f49f9 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,24 +1,70 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V6
-; RUN: llc < %s -mtriple=armv5-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V5
-; RUN: llc < %s -mtriple=armv6-eabi -regalloc=linearscan | FileCheck %s -check-prefix=EABI
-; rdar://r6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast | FileCheck %s -check-prefix=M3
+; rdar://6949835
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY
 
-; Magic ARM pair hints works best with linearscan.
+; Magic ARM pair hints works best with linearscan / fast.
+
+; Cortex-M3 errata 602117: LDRD with base in list may result in incorrect base
+; register when interrupted or faulted.
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-;V6:   ldrd r2, r3, [r2]
-
-;V5:   ldr r{{[0-9]+}}, [r2]
-;V5:   ldr r{{[0-9]+}}, [r2, #4]
+; A8: t:
+; A8:   ldrd r2, r3, [r2]
 
-;EABI: ldr r{{[0-9]+}}, [r2]
-;EABI: ldr r{{[0-9]+}}, [r2, #4]
+; M3: t:
+; M3-NOT: ldrd
+; M3: ldm.w r2, {r2, r3}
 
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
+
+; rdar://10435045 mixed LDRi8/LDRi12
+;
+; In this case, LSR generate a sequence of LDRi8/LDRi12. We should be
+; able to generate an LDRD pair here, but this is highly sensitive to
+; regalloc hinting. So, this doubles as a register allocation
+; test. RABasic currently does a better job within the inner loop
+; because of its *lack* of hinting ability. Whereas RAGreedy keeps
+; R0/R1/R2 live as the three arguments, forcing the LDRD's odd
+; destination into R3. We then sensibly split LDRD again rather then
+; evict another live range or use callee saved regs. Sorry if the test
+; is sensitive to Regalloc changes, but it is an interesting case.
+;
+; BASIC: @f
+; BASIC: %bb
+; BASIC: ldrd
+; BASIC: str
+; GREEDY: @f
+; GREEDY: %bb
+; GREEDY: ldrd
+; GREEDY: str
+define void @f(i32* nocapture %a, i32* nocapture %b, i32 %n) nounwind {
+entry:
+  %0 = add nsw i32 %n, -1                         ; <i32> [#uses=2]
+  %1 = icmp sgt i32 %0, 0                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ %tmp, %bb ], [ 0, %entry ]    ; <i32> [#uses=3]
+  %scevgep = getelementptr i32* %a, i32 %i.03     ; <i32*> [#uses=1]
+  %scevgep4 = getelementptr i32* %b, i32 %i.03    ; <i32*> [#uses=1]
+  %tmp = add i32 %i.03, 1                         ; <i32> [#uses=3]
+  %scevgep5 = getelementptr i32* %a, i32 %tmp     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = load i32* %scevgep5, align 4               ; <i32> [#uses=1]
+  %4 = add nsw i32 %3, %2                         ; <i32> [#uses=1]
+  store i32 %4, i32* %scevgep4, align 4
+  %exitcond = icmp eq i32 %tmp, %0                ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/lit.local.cfg b/test/CodeGen/ARM/lit.local.cfg
new file mode 100644
index 000000000000..cb77b09ef4ad
--- /dev/null
+++ b/test/CodeGen/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/ARM/load_i1_select.ll b/test/CodeGen/ARM/load_i1_select.ll
new file mode 100644
index 000000000000..bdd408164992
--- /dev/null
+++ b/test/CodeGen/ARM/load_i1_select.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; Codegen should only compare one bit of the loaded value.
+; rdar://10887484
+
+; CHECK: foo:
+; CHECK: ldrb r[[R0:[0-9]+]], [r0]
+; CHECK: tst.w r[[R0]], #1
+define void @foo(i8* %call, double* %p) nounwind {
+entry:
+  %tmp2 = load i8* %call
+  %tmp3 = trunc i8 %tmp2 to i1
+  %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
+  store double %cond, double* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/log2_not_readnone.ll b/test/CodeGen/ARM/log2_not_readnone.ll
new file mode 100644
index 000000000000..8068abdff908
--- /dev/null
+++ b/test/CodeGen/ARM/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=arm-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: bl log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: bl exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index d5aac2e3ddaf..a99a7ec86c1e 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -25,8 +25,8 @@ define i32 @f2(i64 %x, i64 %y) {
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: asrge   r0, r1, r2
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
@@ -38,8 +38,8 @@ define i32 @f3(i64 %x, i64 %y) {
 ; CHECK:      lsr{{.*}}r2
 ; CHECK-NEXT: rsb     r3, r2, #32
 ; CHECK-NEXT: sub     r2, r2, #32
-; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: cmp     r2, #0
 ; CHECK-NEXT: lsrge   r0, r1, r2
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32
diff --git a/test/CodeGen/ARM/lsr-icmp-imm.ll b/test/CodeGen/ARM/lsr-icmp-imm.ll
new file mode 100644
index 000000000000..5283f5747d96
--- /dev/null
+++ b/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=thumbv7-apple-ios -disable-code-place < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios   -disable-code-place < %s | FileCheck %s
+
+; LSR should compare against the post-incremented induction variable.
+; In this case, the immediate value is -2 which requires a cmn instruction.
+;
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: sub{{.*}}[[IV:r[0-9]+]], #2
+; CHECK: cmn{{.*}}[[IV]], #2
+; CHECK: bne
+define i32 @f(i32* nocapture %a, i32 %i) nounwind readonly ssp {
+entry:
+  %cmp3 = icmp eq i32 %i, -2
+  br i1 %cmp3, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %bi.06 = phi i32 [ %i.addr.0.bi.0, %for.body ], [ 0, %entry ]
+  %i.addr.05 = phi i32 [ %sub, %for.body ], [ %i, %entry ]
+  %b.04 = phi i32 [ %.b.0, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %i.addr.05
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, %b.04
+  %.b.0 = select i1 %cmp1, i32 %0, i32 %b.04
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %i.addr.05, i32 %bi.06
+  %sub = add nsw i32 %i.addr.05, -2
+  %cmp = icmp eq i32 %i.addr.05, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
deleted file mode 100644
index 47379016cf14..000000000000
--- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
+++ /dev/null
@@ -1,640 +0,0 @@
-; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8  -enable-lsr-nested < %s | FileCheck %s
-
-; LSR should recognize that this is an unrolled loop which can use
-; constant offset addressing, so that each of the following stores
-; uses the same register.
-
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
-; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
-
-; We can also save a register in the outer loop, but that requires
-; performing LSR on the outer loop.
-
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
-
-%0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* }
-%1 = type { void (%2*)*, void (%2*, i32)*, void (%2*)*, void (%2*, i8*)*, void (%2*)*, i32, %7, i32, i32, i8**, i32, i8**, i32, i32 }
-%2 = type { %1*, %3*, %6*, i8*, i32, i32 }
-%3 = type { i8* (%2*, i32, i32)*, i8* (%2*, i32, i32)*, i8** (%2*, i32, i32, i32)*, [64 x i16]** (%2*, i32, i32, i32)*, %4* (%2*, i32, i32, i32, i32, i32)*, %5* (%2*, i32, i32, i32, i32, i32)*, void (%2*)*, i8** (%2*, %4*, i32, i32, i32)*, [64 x i16]** (%2*, %5*, i32, i32, i32)*, void (%2*, i32)*, void (%2*)*, i32, i32 }
-%4 = type opaque
-%5 = type opaque
-%6 = type { void (%2*)*, i32, i32, i32, i32 }
-%7 = type { [8 x i32], [12 x i32] }
-%8 = type { i8*, i32, void (%0*)*, i32 (%0*)*, void (%0*, i32)*, i32 (%0*, i32)*, void (%0*)* }
-%9 = type { [64 x i16], i32 }
-%10 = type { [17 x i8], [256 x i8], i32 }
-%11 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %9*, i8* }
-%12 = type { %12*, i8, i32, i32, i8* }
-%13 = type { void (%0*)*, void (%0*)*, i32 }
-%14 = type { void (%0*, i32)*, void (%0*, i8**, i32*, i32)* }
-%15 = type { void (%0*)*, i32 (%0*)*, void (%0*)*, i32 (%0*, i8***)*, %5** }
-%16 = type { void (%0*, i32)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)* }
-%17 = type { i32 (%0*)*, void (%0*)*, void (%0*)*, void (%0*)*, i32, i32 }
-%18 = type { void (%0*)*, i32 (%0*)*, i32 (%0*)*, i32, i32, i32, i32 }
-%19 = type { void (%0*)*, i32 (%0*, [64 x i16]**)*, i32 }
-%20 = type { void (%0*)*, [10 x void (%0*, %11*, i16*, i8**, i32)*] }
-%21 = type { void (%0*)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
-%22 = type { void (%0*)*, void (%0*, i8***, i32, i8**, i32)* }
-%23 = type { void (%0*, i32)*, void (%0*, i8**, i8**, i32)*, void (%0*)*, void (%0*)* }
-
-define void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind {
-bb:
-  %t = alloca [64 x float], align 4
-  %t5 = getelementptr inbounds %0* %a0, i32 0, i32 65
-  %t6 = load i8** %t5, align 4
-  %t7 = getelementptr inbounds %11* %a1, i32 0, i32 20
-  %t8 = load i8** %t7, align 4
-  br label %bb9
-
-bb9:
-  %t10 = phi i32 [ 0, %bb ], [ %t157, %bb156 ]
-  %t11 = add i32 %t10, 8
-  %t12 = getelementptr [64 x float]* %t, i32 0, i32 %t11
-  %t13 = add i32 %t10, 16
-  %t14 = getelementptr [64 x float]* %t, i32 0, i32 %t13
-  %t15 = add i32 %t10, 24
-  %t16 = getelementptr [64 x float]* %t, i32 0, i32 %t15
-  %t17 = add i32 %t10, 32
-  %t18 = getelementptr [64 x float]* %t, i32 0, i32 %t17
-  %t19 = add i32 %t10, 40
-  %t20 = getelementptr [64 x float]* %t, i32 0, i32 %t19
-  %t21 = add i32 %t10, 48
-  %t22 = getelementptr [64 x float]* %t, i32 0, i32 %t21
-  %t23 = add i32 %t10, 56
-  %t24 = getelementptr [64 x float]* %t, i32 0, i32 %t23
-  %t25 = getelementptr [64 x float]* %t, i32 0, i32 %t10
-  %t26 = shl i32 %t10, 5
-  %t27 = or i32 %t26, 8
-  %t28 = getelementptr i8* %t8, i32 %t27
-  %t29 = bitcast i8* %t28 to float*
-  %t30 = or i32 %t26, 16
-  %t31 = getelementptr i8* %t8, i32 %t30
-  %t32 = bitcast i8* %t31 to float*
-  %t33 = or i32 %t26, 24
-  %t34 = getelementptr i8* %t8, i32 %t33
-  %t35 = bitcast i8* %t34 to float*
-  %t36 = or i32 %t26, 4
-  %t37 = getelementptr i8* %t8, i32 %t36
-  %t38 = bitcast i8* %t37 to float*
-  %t39 = or i32 %t26, 12
-  %t40 = getelementptr i8* %t8, i32 %t39
-  %t41 = bitcast i8* %t40 to float*
-  %t42 = or i32 %t26, 20
-  %t43 = getelementptr i8* %t8, i32 %t42
-  %t44 = bitcast i8* %t43 to float*
-  %t45 = or i32 %t26, 28
-  %t46 = getelementptr i8* %t8, i32 %t45
-  %t47 = bitcast i8* %t46 to float*
-  %t48 = getelementptr i8* %t8, i32 %t26
-  %t49 = bitcast i8* %t48 to float*
-  %t50 = shl i32 %t10, 3
-  %t51 = or i32 %t50, 1
-  %t52 = getelementptr i16* %a2, i32 %t51
-  %t53 = or i32 %t50, 2
-  %t54 = getelementptr i16* %a2, i32 %t53
-  %t55 = or i32 %t50, 3
-  %t56 = getelementptr i16* %a2, i32 %t55
-  %t57 = or i32 %t50, 4
-  %t58 = getelementptr i16* %a2, i32 %t57
-  %t59 = or i32 %t50, 5
-  %t60 = getelementptr i16* %a2, i32 %t59
-  %t61 = or i32 %t50, 6
-  %t62 = getelementptr i16* %a2, i32 %t61
-  %t63 = or i32 %t50, 7
-  %t64 = getelementptr i16* %a2, i32 %t63
-  %t65 = getelementptr i16* %a2, i32 %t50
-  %t66 = load i16* %t52, align 2
-  %t67 = icmp eq i16 %t66, 0
-  %t68 = load i16* %t54, align 2
-  %t69 = icmp eq i16 %t68, 0
-  %t70 = and i1 %t67, %t69
-  br i1 %t70, label %bb71, label %bb91
-
-bb71:
-  %t72 = load i16* %t56, align 2
-  %t73 = icmp eq i16 %t72, 0
-  br i1 %t73, label %bb74, label %bb91
-
-bb74:
-  %t75 = load i16* %t58, align 2
-  %t76 = icmp eq i16 %t75, 0
-  br i1 %t76, label %bb77, label %bb91
-
-bb77:
-  %t78 = load i16* %t60, align 2
-  %t79 = icmp eq i16 %t78, 0
-  br i1 %t79, label %bb80, label %bb91
-
-bb80:
-  %t81 = load i16* %t62, align 2
-  %t82 = icmp eq i16 %t81, 0
-  br i1 %t82, label %bb83, label %bb91
-
-bb83:
-  %t84 = load i16* %t64, align 2
-  %t85 = icmp eq i16 %t84, 0
-  br i1 %t85, label %bb86, label %bb91
-
-bb86:
-  %t87 = load i16* %t65, align 2
-  %t88 = sitofp i16 %t87 to float
-  %t89 = load float* %t49, align 4
-  %t90 = fmul float %t88, %t89
-  store float %t90, float* %t25, align 4
-  store float %t90, float* %t12, align 4
-  store float %t90, float* %t14, align 4
-  store float %t90, float* %t16, align 4
-  store float %t90, float* %t18, align 4
-  store float %t90, float* %t20, align 4
-  store float %t90, float* %t22, align 4
-  store float %t90, float* %t24, align 4
-  br label %bb156
-
-bb91:
-  %t92 = load i16* %t65, align 2
-  %t93 = sitofp i16 %t92 to float
-  %t94 = load float* %t49, align 4
-  %t95 = fmul float %t93, %t94
-  %t96 = sitofp i16 %t68 to float
-  %t97 = load float* %t29, align 4
-  %t98 = fmul float %t96, %t97
-  %t99 = load i16* %t58, align 2
-  %t100 = sitofp i16 %t99 to float
-  %t101 = load float* %t32, align 4
-  %t102 = fmul float %t100, %t101
-  %t103 = load i16* %t62, align 2
-  %t104 = sitofp i16 %t103 to float
-  %t105 = load float* %t35, align 4
-  %t106 = fmul float %t104, %t105
-  %t107 = fadd float %t95, %t102
-  %t108 = fsub float %t95, %t102
-  %t109 = fadd float %t98, %t106
-  %t110 = fsub float %t98, %t106
-  %t111 = fmul float %t110, 0x3FF6A09E60000000
-  %t112 = fsub float %t111, %t109
-  %t113 = fadd float %t107, %t109
-  %t114 = fsub float %t107, %t109
-  %t115 = fadd float %t108, %t112
-  %t116 = fsub float %t108, %t112
-  %t117 = sitofp i16 %t66 to float
-  %t118 = load float* %t38, align 4
-  %t119 = fmul float %t117, %t118
-  %t120 = load i16* %t56, align 2
-  %t121 = sitofp i16 %t120 to float
-  %t122 = load float* %t41, align 4
-  %t123 = fmul float %t121, %t122
-  %t124 = load i16* %t60, align 2
-  %t125 = sitofp i16 %t124 to float
-  %t126 = load float* %t44, align 4
-  %t127 = fmul float %t125, %t126
-  %t128 = load i16* %t64, align 2
-  %t129 = sitofp i16 %t128 to float
-  %t130 = load float* %t47, align 4
-  %t131 = fmul float %t129, %t130
-  %t132 = fadd float %t127, %t123
-  %t133 = fsub float %t127, %t123
-  %t134 = fadd float %t119, %t131
-  %t135 = fsub float %t119, %t131
-  %t136 = fadd float %t134, %t132
-  %t137 = fsub float %t134, %t132
-  %t138 = fmul float %t137, 0x3FF6A09E60000000
-  %t139 = fadd float %t133, %t135
-  %t140 = fmul float %t139, 0x3FFD906BC0000000
-  %t141 = fmul float %t135, 0x3FF1517A80000000
-  %t142 = fsub float %t141, %t140
-  %t143 = fmul float %t133, 0xC004E7AEA0000000
-  %t144 = fadd float %t143, %t140
-  %t145 = fsub float %t144, %t136
-  %t146 = fsub float %t138, %t145
-  %t147 = fadd float %t142, %t146
-  %t148 = fadd float %t113, %t136
-  store float %t148, float* %t25, align 4
-  %t149 = fsub float %t113, %t136
-  store float %t149, float* %t24, align 4
-  %t150 = fadd float %t115, %t145
-  store float %t150, float* %t12, align 4
-  %t151 = fsub float %t115, %t145
-  store float %t151, float* %t22, align 4
-  %t152 = fadd float %t116, %t146
-  store float %t152, float* %t14, align 4
-  %t153 = fsub float %t116, %t146
-  store float %t153, float* %t20, align 4
-  %t154 = fadd float %t114, %t147
-  store float %t154, float* %t18, align 4
-  %t155 = fsub float %t114, %t147
-  store float %t155, float* %t16, align 4
-  br label %bb156
-
-bb156:
-  %t157 = add i32 %t10, 1
-  %t158 = icmp eq i32 %t157, 8
-  br i1 %t158, label %bb159, label %bb9
-
-bb159:
-  %t160 = add i32 %a4, 7
-  %t161 = add i32 %a4, 1
-  %t162 = add i32 %a4, 6
-  %t163 = add i32 %a4, 2
-  %t164 = add i32 %a4, 5
-  %t165 = add i32 %a4, 4
-  %t166 = add i32 %a4, 3
-  br label %bb167
-
-bb167:
-  %t168 = phi i32 [ 0, %bb159 ], [ %t293, %bb167 ]
-  %t169 = getelementptr i8** %a3, i32 %t168
-  %t170 = shl i32 %t168, 3
-  %t171 = or i32 %t170, 4
-  %t172 = getelementptr [64 x float]* %t, i32 0, i32 %t171
-  %t173 = or i32 %t170, 2
-  %t174 = getelementptr [64 x float]* %t, i32 0, i32 %t173
-  %t175 = or i32 %t170, 6
-  %t176 = getelementptr [64 x float]* %t, i32 0, i32 %t175
-  %t177 = or i32 %t170, 5
-  %t178 = getelementptr [64 x float]* %t, i32 0, i32 %t177
-  %t179 = or i32 %t170, 3
-  %t180 = getelementptr [64 x float]* %t, i32 0, i32 %t179
-  %t181 = or i32 %t170, 1
-  %t182 = getelementptr [64 x float]* %t, i32 0, i32 %t181
-  %t183 = or i32 %t170, 7
-  %t184 = getelementptr [64 x float]* %t, i32 0, i32 %t183
-  %t185 = getelementptr [64 x float]* %t, i32 0, i32 %t170
-  %t186 = load i8** %t169, align 4
-  %t187 = getelementptr inbounds i8* %t186, i32 %a4
-  %t188 = load float* %t185, align 4
-  %t189 = load float* %t172, align 4
-  %t190 = fadd float %t188, %t189
-  %t191 = fsub float %t188, %t189
-  %t192 = load float* %t174, align 4
-  %t193 = load float* %t176, align 4
-  %t194 = fadd float %t192, %t193
-  %t195 = fsub float %t192, %t193
-  %t196 = fmul float %t195, 0x3FF6A09E60000000
-  %t197 = fsub float %t196, %t194
-  %t198 = fadd float %t190, %t194
-  %t199 = fsub float %t190, %t194
-  %t200 = fadd float %t191, %t197
-  %t201 = fsub float %t191, %t197
-  %t202 = load float* %t178, align 4
-  %t203 = load float* %t180, align 4
-  %t204 = fadd float %t202, %t203
-  %t205 = fsub float %t202, %t203
-  %t206 = load float* %t182, align 4
-  %t207 = load float* %t184, align 4
-  %t208 = fadd float %t206, %t207
-  %t209 = fsub float %t206, %t207
-  %t210 = fadd float %t208, %t204
-  %t211 = fsub float %t208, %t204
-  %t212 = fmul float %t211, 0x3FF6A09E60000000
-  %t213 = fadd float %t205, %t209
-  %t214 = fmul float %t213, 0x3FFD906BC0000000
-  %t215 = fmul float %t209, 0x3FF1517A80000000
-  %t216 = fsub float %t215, %t214
-  %t217 = fmul float %t205, 0xC004E7AEA0000000
-  %t218 = fadd float %t217, %t214
-  %t219 = fsub float %t218, %t210
-  %t220 = fsub float %t212, %t219
-  %t221 = fadd float %t216, %t220
-  %t222 = fadd float %t198, %t210
-  %t223 = fptosi float %t222 to i32
-  %t224 = add nsw i32 %t223, 4
-  %t225 = lshr i32 %t224, 3
-  %t226 = and i32 %t225, 1023
-  %t227 = add i32 %t226, 128
-  %t228 = getelementptr inbounds i8* %t6, i32 %t227
-  %t229 = load i8* %t228, align 1
-  store i8 %t229, i8* %t187, align 1
-  %t230 = fsub float %t198, %t210
-  %t231 = fptosi float %t230 to i32
-  %t232 = add nsw i32 %t231, 4
-  %t233 = lshr i32 %t232, 3
-  %t234 = and i32 %t233, 1023
-  %t235 = add i32 %t234, 128
-  %t236 = getelementptr inbounds i8* %t6, i32 %t235
-  %t237 = load i8* %t236, align 1
-  %t238 = getelementptr inbounds i8* %t186, i32 %t160
-  store i8 %t237, i8* %t238, align 1
-  %t239 = fadd float %t200, %t219
-  %t240 = fptosi float %t239 to i32
-  %t241 = add nsw i32 %t240, 4
-  %t242 = lshr i32 %t241, 3
-  %t243 = and i32 %t242, 1023
-  %t244 = add i32 %t243, 128
-  %t245 = getelementptr inbounds i8* %t6, i32 %t244
-  %t246 = load i8* %t245, align 1
-  %t247 = getelementptr inbounds i8* %t186, i32 %t161
-  store i8 %t246, i8* %t247, align 1
-  %t248 = fsub float %t200, %t219
-  %t249 = fptosi float %t248 to i32
-  %t250 = add nsw i32 %t249, 4
-  %t251 = lshr i32 %t250, 3
-  %t252 = and i32 %t251, 1023
-  %t253 = add i32 %t252, 128
-  %t254 = getelementptr inbounds i8* %t6, i32 %t253
-  %t255 = load i8* %t254, align 1
-  %t256 = getelementptr inbounds i8* %t186, i32 %t162
-  store i8 %t255, i8* %t256, align 1
-  %t257 = fadd float %t201, %t220
-  %t258 = fptosi float %t257 to i32
-  %t259 = add nsw i32 %t258, 4
-  %t260 = lshr i32 %t259, 3
-  %t261 = and i32 %t260, 1023
-  %t262 = add i32 %t261, 128
-  %t263 = getelementptr inbounds i8* %t6, i32 %t262
-  %t264 = load i8* %t263, align 1
-  %t265 = getelementptr inbounds i8* %t186, i32 %t163
-  store i8 %t264, i8* %t265, align 1
-  %t266 = fsub float %t201, %t220
-  %t267 = fptosi float %t266 to i32
-  %t268 = add nsw i32 %t267, 4
-  %t269 = lshr i32 %t268, 3
-  %t270 = and i32 %t269, 1023
-  %t271 = add i32 %t270, 128
-  %t272 = getelementptr inbounds i8* %t6, i32 %t271
-  %t273 = load i8* %t272, align 1
-  %t274 = getelementptr inbounds i8* %t186, i32 %t164
-  store i8 %t273, i8* %t274, align 1
-  %t275 = fadd float %t199, %t221
-  %t276 = fptosi float %t275 to i32
-  %t277 = add nsw i32 %t276, 4
-  %t278 = lshr i32 %t277, 3
-  %t279 = and i32 %t278, 1023
-  %t280 = add i32 %t279, 128
-  %t281 = getelementptr inbounds i8* %t6, i32 %t280
-  %t282 = load i8* %t281, align 1
-  %t283 = getelementptr inbounds i8* %t186, i32 %t165
-  store i8 %t282, i8* %t283, align 1
-  %t284 = fsub float %t199, %t221
-  %t285 = fptosi float %t284 to i32
-  %t286 = add nsw i32 %t285, 4
-  %t287 = lshr i32 %t286, 3
-  %t288 = and i32 %t287, 1023
-  %t289 = add i32 %t288, 128
-  %t290 = getelementptr inbounds i8* %t6, i32 %t289
-  %t291 = load i8* %t290, align 1
-  %t292 = getelementptr inbounds i8* %t186, i32 %t166
-  store i8 %t291, i8* %t292, align 1
-  %t293 = add nsw i32 %t168, 1
-  %t294 = icmp eq i32 %t293, 8
-  br i1 %t294, label %bb295, label %bb167
-
-bb295:
-  ret void
-}
-
-%struct.ct_data_s = type { %union.anon, %union.anon }
-%struct.gz_header = type { i32, i32, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 }
-%struct.internal_state = type { %struct.z_stream*, i32, i8*, i32, i8*, i32, i32, %struct.gz_header*, i32, i8, i32, i32, i32, i32, i8*, i32, i16*, i16*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i32, i32, i32, i32, i16, i32 }
-%struct.static_tree_desc = type { i32 }
-%struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc* }
-%struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
-%union.anon = type { i16 }
-
-define i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize {
-entry:
-  %0 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 31 ; <i32*> [#uses=1]
-  %1 = load i32* %0, align 4                      ; <i32> [#uses=2]
-  %2 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 14 ; <i8**> [#uses=1]
-  %3 = load i8** %2, align 4                      ; <i8*> [#uses=27]
-  %4 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 27 ; <i32*> [#uses=1]
-  %5 = load i32* %4, align 4                      ; <i32> [#uses=17]
-  %6 = getelementptr inbounds i8* %3, i32 %5      ; <i8*> [#uses=1]
-  %7 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 30 ; <i32*> [#uses=1]
-  %8 = load i32* %7, align 4                      ; <i32> [#uses=4]
-  %9 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 36 ; <i32*> [#uses=1]
-  %10 = load i32* %9, align 4                     ; <i32> [#uses=2]
-  %11 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 11 ; <i32*> [#uses=1]
-  %12 = load i32* %11, align 4                    ; <i32> [#uses=2]
-  %13 = add i32 %12, -262                         ; <i32> [#uses=1]
-  %14 = icmp ugt i32 %5, %13                      ; <i1> [#uses=1]
-  br i1 %14, label %bb, label %bb2
-
-bb:                                               ; preds = %entry
-  %15 = add i32 %5, 262                           ; <i32> [#uses=1]
-  %16 = sub i32 %15, %12                          ; <i32> [#uses=1]
-  br label %bb2
-
-bb2:                                              ; preds = %bb, %entry
-  %iftmp.48.0 = phi i32 [ %16, %bb ], [ 0, %entry ] ; <i32> [#uses=1]
-  %17 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 16 ; <i16**> [#uses=1]
-  %18 = load i16** %17, align 4                   ; <i16*> [#uses=1]
-  %19 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 13 ; <i32*> [#uses=1]
-  %20 = load i32* %19, align 4                    ; <i32> [#uses=1]
-  %.sum = add i32 %5, 258                         ; <i32> [#uses=2]
-  %21 = getelementptr inbounds i8* %3, i32 %.sum  ; <i8*> [#uses=1]
-  %22 = add nsw i32 %5, -1                        ; <i32> [#uses=1]
-  %.sum30 = add i32 %22, %8                       ; <i32> [#uses=1]
-  %23 = getelementptr inbounds i8* %3, i32 %.sum30 ; <i8*> [#uses=1]
-  %24 = load i8* %23, align 1                     ; <i8> [#uses=1]
-  %.sum31 = add i32 %8, %5                        ; <i32> [#uses=1]
-  %25 = getelementptr inbounds i8* %3, i32 %.sum31 ; <i8*> [#uses=1]
-  %26 = load i8* %25, align 1                     ; <i8> [#uses=1]
-  %27 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 35 ; <i32*> [#uses=1]
-  %28 = load i32* %27, align 4                    ; <i32> [#uses=1]
-  %29 = lshr i32 %1, 2                            ; <i32> [#uses=1]
-  %30 = icmp ult i32 %8, %28                      ; <i1> [#uses=1]
-  %. = select i1 %30, i32 %1, i32 %29             ; <i32> [#uses=1]
-  %31 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 29 ; <i32*> [#uses=1]
-  %32 = load i32* %31, align 4                    ; <i32> [#uses=4]
-  %33 = icmp ugt i32 %10, %32                     ; <i1> [#uses=1]
-  %nice_match.0.ph = select i1 %33, i32 %32, i32 %10 ; <i32> [#uses=1]
-  %34 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 28 ; <i32*> [#uses=1]
-  %35 = ptrtoint i8* %21 to i32                   ; <i32> [#uses=1]
-  %36 = add nsw i32 %5, 257                       ; <i32> [#uses=1]
-  %tmp81 = add i32 %., -1                         ; <i32> [#uses=1]
-  br label %bb6
-
-bb6:                                              ; preds = %bb24, %bb2
-  %indvar78 = phi i32 [ 0, %bb2 ], [ %indvar.next79, %bb24 ] ; <i32> [#uses=2]
-  %best_len.2 = phi i32 [ %8, %bb2 ], [ %best_len.0, %bb24 ] ; <i32> [#uses=8]
-  %scan_end1.1 = phi i8 [ %24, %bb2 ], [ %scan_end1.0, %bb24 ] ; <i8> [#uses=6]
-  %cur_match_addr.0 = phi i32 [ %cur_match, %bb2 ], [ %90, %bb24 ] ; <i32> [#uses=14]
-  %scan_end.1 = phi i8 [ %26, %bb2 ], [ %scan_end.0, %bb24 ] ; <i8> [#uses=6]
-  %37 = getelementptr inbounds i8* %3, i32 %cur_match_addr.0 ; <i8*> [#uses=1]
-  %.sum32 = add i32 %cur_match_addr.0, %best_len.2 ; <i32> [#uses=1]
-  %38 = getelementptr inbounds i8* %3, i32 %.sum32 ; <i8*> [#uses=1]
-  %39 = load i8* %38, align 1                     ; <i8> [#uses=1]
-  %40 = icmp eq i8 %39, %scan_end.1               ; <i1> [#uses=1]
-  br i1 %40, label %bb7, label %bb23
-
-bb7:                                              ; preds = %bb6
-  %41 = add nsw i32 %best_len.2, -1               ; <i32> [#uses=1]
-  %.sum33 = add i32 %41, %cur_match_addr.0        ; <i32> [#uses=1]
-  %42 = getelementptr inbounds i8* %3, i32 %.sum33 ; <i8*> [#uses=1]
-  %43 = load i8* %42, align 1                     ; <i8> [#uses=1]
-  %44 = icmp eq i8 %43, %scan_end1.1              ; <i1> [#uses=1]
-  br i1 %44, label %bb8, label %bb23
-
-bb8:                                              ; preds = %bb7
-  %45 = load i8* %37, align 1                     ; <i8> [#uses=1]
-  %46 = load i8* %6, align 1                      ; <i8> [#uses=1]
-  %47 = icmp eq i8 %45, %46                       ; <i1> [#uses=1]
-  br i1 %47, label %bb9, label %bb23
-
-bb9:                                              ; preds = %bb8
-  %.sum34 = add i32 %cur_match_addr.0, 1          ; <i32> [#uses=1]
-  %48 = getelementptr inbounds i8* %3, i32 %.sum34 ; <i8*> [#uses=1]
-  %49 = load i8* %48, align 1                     ; <i8> [#uses=1]
-  %.sum88 = add i32 %5, 1                         ; <i32> [#uses=1]
-  %50 = getelementptr inbounds i8* %3, i32 %.sum88 ; <i8*> [#uses=1]
-  %51 = load i8* %50, align 1                     ; <i8> [#uses=1]
-  %52 = icmp eq i8 %49, %51                       ; <i1> [#uses=1]
-  br i1 %52, label %bb10, label %bb23
-
-bb10:                                             ; preds = %bb9
-  %tmp39 = add i32 %cur_match_addr.0, 10          ; <i32> [#uses=1]
-  %tmp41 = add i32 %cur_match_addr.0, 9           ; <i32> [#uses=1]
-  %tmp44 = add i32 %cur_match_addr.0, 8           ; <i32> [#uses=1]
-  %tmp47 = add i32 %cur_match_addr.0, 7           ; <i32> [#uses=1]
-  %tmp50 = add i32 %cur_match_addr.0, 6           ; <i32> [#uses=1]
-  %tmp53 = add i32 %cur_match_addr.0, 5           ; <i32> [#uses=1]
-  %tmp56 = add i32 %cur_match_addr.0, 4           ; <i32> [#uses=1]
-  %tmp59 = add i32 %cur_match_addr.0, 3           ; <i32> [#uses=1]
-  br label %bb11
-
-bb11:                                             ; preds = %bb18, %bb10
-  %indvar = phi i32 [ %indvar.next, %bb18 ], [ 0, %bb10 ] ; <i32> [#uses=2]
-  %tmp = shl i32 %indvar, 3                       ; <i32> [#uses=16]
-  %tmp40 = add i32 %tmp39, %tmp                   ; <i32> [#uses=1]
-  %scevgep = getelementptr i8* %3, i32 %tmp40     ; <i8*> [#uses=1]
-  %tmp42 = add i32 %tmp41, %tmp                   ; <i32> [#uses=1]
-  %scevgep43 = getelementptr i8* %3, i32 %tmp42   ; <i8*> [#uses=1]
-  %tmp45 = add i32 %tmp44, %tmp                   ; <i32> [#uses=1]
-  %scevgep46 = getelementptr i8* %3, i32 %tmp45   ; <i8*> [#uses=1]
-  %tmp48 = add i32 %tmp47, %tmp                   ; <i32> [#uses=1]
-  %scevgep49 = getelementptr i8* %3, i32 %tmp48   ; <i8*> [#uses=1]
-  %tmp51 = add i32 %tmp50, %tmp                   ; <i32> [#uses=1]
-  %scevgep52 = getelementptr i8* %3, i32 %tmp51   ; <i8*> [#uses=1]
-  %tmp54 = add i32 %tmp53, %tmp                   ; <i32> [#uses=1]
-  %scevgep55 = getelementptr i8* %3, i32 %tmp54   ; <i8*> [#uses=1]
-  %tmp60 = add i32 %tmp59, %tmp                   ; <i32> [#uses=1]
-  %scevgep61 = getelementptr i8* %3, i32 %tmp60   ; <i8*> [#uses=1]
-  %tmp62 = add i32 %tmp, 10                       ; <i32> [#uses=1]
-  %.sum89 = add i32 %5, %tmp62                    ; <i32> [#uses=2]
-  %scevgep63 = getelementptr i8* %3, i32 %.sum89  ; <i8*> [#uses=2]
-  %tmp64 = add i32 %tmp, 9                        ; <i32> [#uses=1]
-  %.sum90 = add i32 %5, %tmp64                    ; <i32> [#uses=1]
-  %scevgep65 = getelementptr i8* %3, i32 %.sum90  ; <i8*> [#uses=2]
-  %tmp66 = add i32 %tmp, 8                        ; <i32> [#uses=1]
-  %.sum91 = add i32 %5, %tmp66                    ; <i32> [#uses=1]
-  %scevgep67 = getelementptr i8* %3, i32 %.sum91  ; <i8*> [#uses=2]
-  %tmp6883 = or i32 %tmp, 7                       ; <i32> [#uses=1]
-  %.sum92 = add i32 %5, %tmp6883                  ; <i32> [#uses=1]
-  %scevgep69 = getelementptr i8* %3, i32 %.sum92  ; <i8*> [#uses=2]
-  %tmp7084 = or i32 %tmp, 6                       ; <i32> [#uses=1]
-  %.sum93 = add i32 %5, %tmp7084                  ; <i32> [#uses=1]
-  %scevgep71 = getelementptr i8* %3, i32 %.sum93  ; <i8*> [#uses=2]
-  %tmp7285 = or i32 %tmp, 5                       ; <i32> [#uses=1]
-  %.sum94 = add i32 %5, %tmp7285                  ; <i32> [#uses=1]
-  %scevgep73 = getelementptr i8* %3, i32 %.sum94  ; <i8*> [#uses=2]
-  %tmp7486 = or i32 %tmp, 4                       ; <i32> [#uses=1]
-  %.sum95 = add i32 %5, %tmp7486                  ; <i32> [#uses=1]
-  %scevgep75 = getelementptr i8* %3, i32 %.sum95  ; <i8*> [#uses=2]
-  %tmp7687 = or i32 %tmp, 3                       ; <i32> [#uses=1]
-  %.sum96 = add i32 %5, %tmp7687                  ; <i32> [#uses=1]
-  %scevgep77 = getelementptr i8* %3, i32 %.sum96  ; <i8*> [#uses=2]
-  %53 = load i8* %scevgep77, align 1              ; <i8> [#uses=1]
-  %54 = load i8* %scevgep61, align 1              ; <i8> [#uses=1]
-  %55 = icmp eq i8 %53, %54                       ; <i1> [#uses=1]
-  br i1 %55, label %bb12, label %bb20
-
-bb12:                                             ; preds = %bb11
-  %tmp57 = add i32 %tmp56, %tmp                   ; <i32> [#uses=1]
-  %scevgep58 = getelementptr i8* %3, i32 %tmp57   ; <i8*> [#uses=1]
-  %56 = load i8* %scevgep75, align 1              ; <i8> [#uses=1]
-  %57 = load i8* %scevgep58, align 1              ; <i8> [#uses=1]
-  %58 = icmp eq i8 %56, %57                       ; <i1> [#uses=1]
-  br i1 %58, label %bb13, label %bb20
-
-bb13:                                             ; preds = %bb12
-  %59 = load i8* %scevgep73, align 1              ; <i8> [#uses=1]
-  %60 = load i8* %scevgep55, align 1              ; <i8> [#uses=1]
-  %61 = icmp eq i8 %59, %60                       ; <i1> [#uses=1]
-  br i1 %61, label %bb14, label %bb20
-
-bb14:                                             ; preds = %bb13
-  %62 = load i8* %scevgep71, align 1              ; <i8> [#uses=1]
-  %63 = load i8* %scevgep52, align 1              ; <i8> [#uses=1]
-  %64 = icmp eq i8 %62, %63                       ; <i1> [#uses=1]
-  br i1 %64, label %bb15, label %bb20
-
-bb15:                                             ; preds = %bb14
-  %65 = load i8* %scevgep69, align 1              ; <i8> [#uses=1]
-  %66 = load i8* %scevgep49, align 1              ; <i8> [#uses=1]
-  %67 = icmp eq i8 %65, %66                       ; <i1> [#uses=1]
-  br i1 %67, label %bb16, label %bb20
-
-bb16:                                             ; preds = %bb15
-  %68 = load i8* %scevgep67, align 1              ; <i8> [#uses=1]
-  %69 = load i8* %scevgep46, align 1              ; <i8> [#uses=1]
-  %70 = icmp eq i8 %68, %69                       ; <i1> [#uses=1]
-  br i1 %70, label %bb17, label %bb20
-
-bb17:                                             ; preds = %bb16
-  %71 = load i8* %scevgep65, align 1              ; <i8> [#uses=1]
-  %72 = load i8* %scevgep43, align 1              ; <i8> [#uses=1]
-  %73 = icmp eq i8 %71, %72                       ; <i1> [#uses=1]
-  br i1 %73, label %bb18, label %bb20
-
-bb18:                                             ; preds = %bb17
-  %74 = load i8* %scevgep63, align 1              ; <i8> [#uses=1]
-  %75 = load i8* %scevgep, align 1                ; <i8> [#uses=1]
-  %76 = icmp eq i8 %74, %75                       ; <i1> [#uses=1]
-  %77 = icmp slt i32 %.sum89, %.sum               ; <i1> [#uses=1]
-  %or.cond = and i1 %76, %77                      ; <i1> [#uses=1]
-  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
-  br i1 %or.cond, label %bb11, label %bb20
-
-bb20:                                             ; preds = %bb18, %bb17, %bb16, %bb15, %bb14, %bb13, %bb12, %bb11
-  %scan.3 = phi i8* [ %scevgep77, %bb11 ], [ %scevgep75, %bb12 ], [ %scevgep73, %bb13 ], [ %scevgep71, %bb14 ], [ %scevgep69, %bb15 ], [ %scevgep67, %bb16 ], [ %scevgep65, %bb17 ], [ %scevgep63, %bb18 ] ; <i8*> [#uses=1]
-  %78 = ptrtoint i8* %scan.3 to i32               ; <i32> [#uses=1]
-  %79 = sub nsw i32 %78, %35                      ; <i32> [#uses=2]
-  %80 = add i32 %79, 258                          ; <i32> [#uses=5]
-  %81 = icmp sgt i32 %80, %best_len.2             ; <i1> [#uses=1]
-  br i1 %81, label %bb21, label %bb23
-
-bb21:                                             ; preds = %bb20
-  store i32 %cur_match_addr.0, i32* %34, align 4
-  %82 = icmp slt i32 %80, %nice_match.0.ph        ; <i1> [#uses=1]
-  br i1 %82, label %bb22, label %bb25
-
-bb22:                                             ; preds = %bb21
-  %.sum37 = add i32 %36, %79                      ; <i32> [#uses=1]
-  %83 = getelementptr inbounds i8* %3, i32 %.sum37 ; <i8*> [#uses=1]
-  %84 = load i8* %83, align 1                     ; <i8> [#uses=1]
-  %.sum38 = add i32 %80, %5                       ; <i32> [#uses=1]
-  %85 = getelementptr inbounds i8* %3, i32 %.sum38 ; <i8*> [#uses=1]
-  %86 = load i8* %85, align 1                     ; <i8> [#uses=1]
-  br label %bb23
-
-bb23:                                             ; preds = %bb22, %bb20, %bb9, %bb8, %bb7, %bb6
-  %best_len.0 = phi i32 [ %best_len.2, %bb6 ], [ %best_len.2, %bb7 ], [ %best_len.2, %bb8 ], [ %best_len.2, %bb9 ], [ %80, %bb22 ], [ %best_len.2, %bb20 ] ; <i32> [#uses=3]
-  %scan_end1.0 = phi i8 [ %scan_end1.1, %bb6 ], [ %scan_end1.1, %bb7 ], [ %scan_end1.1, %bb8 ], [ %scan_end1.1, %bb9 ], [ %84, %bb22 ], [ %scan_end1.1, %bb20 ] ; <i8> [#uses=1]
-  %scan_end.0 = phi i8 [ %scan_end.1, %bb6 ], [ %scan_end.1, %bb7 ], [ %scan_end.1, %bb8 ], [ %scan_end.1, %bb9 ], [ %86, %bb22 ], [ %scan_end.1, %bb20 ] ; <i8> [#uses=1]
-  %87 = and i32 %cur_match_addr.0, %20            ; <i32> [#uses=1]
-  %88 = getelementptr inbounds i16* %18, i32 %87  ; <i16*> [#uses=1]
-  %89 = load i16* %88, align 2                    ; <i16> [#uses=1]
-  %90 = zext i16 %89 to i32                       ; <i32> [#uses=2]
-  %91 = icmp ugt i32 %90, %iftmp.48.0             ; <i1> [#uses=1]
-  br i1 %91, label %bb24, label %bb25
-
-bb24:                                             ; preds = %bb23
-
-; LSR should use count-down iteration to avoid requiring the trip count
-; in a register.
-
-;      CHECK: @ %bb24
-; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
-; CHECK: bne.w
-
-  %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
-  %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
-  br i1 %92, label %bb25, label %bb6
-
-bb25:                                             ; preds = %bb24, %bb23, %bb21
-  %best_len.1 = phi i32 [ %best_len.0, %bb23 ], [ %best_len.0, %bb24 ], [ %80, %bb21 ] ; <i32> [#uses=2]
-  %93 = icmp ugt i32 %best_len.1, %32             ; <i1> [#uses=1]
-  %merge = select i1 %93, i32 %32, i32 %best_len.1 ; <i32> [#uses=1]
-  ret i32 %merge
-}
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index bf26a9670a79..5b4cf9d81606 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -12,7 +12,7 @@
 ; CHECK: add
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-macosx10.7.0"
+target triple = "thumbv7-apple-ios"
 
 %struct.partition_entry = type { i32, i32, i64, i64 }
 
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index c77402f3bc1f..3ac7d77d6f79 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 ;rdar://8003725
 
 @G1 = external global i32
@@ -6,13 +6,42 @@
 
 define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
 entry:
+; CHECK: f1:
 ; CHECK: cmp
 ; CHECK: moveq
 ; CHECK-NOT: cmp
-; CHECK: moveq
+; CHECK: mov{{eq|ne}}
     %tmp1 = icmp eq i32 %cond1, 0
     %tmp2 = select i1 %tmp1, i32 %x1, i32 %x2
     %tmp3 = select i1 %tmp1, i32 %x2, i32 %x3
     %tmp4 = add i32 %tmp2, %tmp3
     ret i32 %tmp4
 }
+
+@foo = external global i32
+@bar = external global [250 x i8], align 1
+
+; CSE of cmp across BB boundary
+; rdar://10660865
+define void @f2() nounwind ssp {
+entry:
+; CHECK: f2:
+; CHECK: cmp
+; CHECK: poplt
+; CHECK-NOT: cmp
+; CHECK: movle
+  %0 = load i32* @foo, align 4
+  %cmp28 = icmp sgt i32 %0, 0
+  br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
+
+for.body.lr.ph:                                   ; preds = %entry
+  %1 = icmp sgt i32 %0, 1
+  %smax = select i1 %1, i32 %0, i32 1
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([250 x i8]* @bar, i32 0, i32 0), i8 0, i32 %smax, i32 1, i1 false)
+  unreachable
+
+for.cond1.preheader:                              ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 30b9f59a8b3d..dc772827f270 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-post-ra | FileCheck %s
 
-; The ARM magic hinting works best with linear scan.
 ; CHECK: ldrd
 ; CHECK: strd
 ; CHECK: ldrb
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index aeda02282b99..fe0056c42a11 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7m-darwin-eabi -o - | FileCheck %s --check-prefix=DARWIN
 ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
 
 @from = common global [500 x i32] zeroinitializer, align 4
@@ -18,6 +19,8 @@ entry:
         ; EABI memset swaps arguments
         ; CHECK: mov r1, #0
         ; CHECK: memset
+        ; DARWIN: movs r1, #0
+        ; DARWIN: memset
         ; EABI: mov r2, #0
         ; EABI: __aeabi_memset
         call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
diff --git a/test/CodeGen/ARM/mul_const.ll b/test/CodeGen/ARM/mul_const.ll
index 3cb8a8e816f6..c50a23354678 100644
--- a/test/CodeGen/ARM/mul_const.ll
+++ b/test/CodeGen/ARM/mul_const.ll
@@ -41,3 +41,45 @@ entry:
         ret i32 %0
 }
 
+define i32 @tn9(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn9:
+; CHECK: add	r0, r0, r0, lsl #3
+; CHECK: rsb	r0, r0, #0
+        %0 = mul i32 %v, -9
+        ret i32 %0
+}
+
+define i32 @tn7(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn7:
+; CHECK: sub r0, r0, r0, lsl #3
+	%0 = mul i32 %v, -7
+	ret i32 %0
+}
+
+define i32 @tn5(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn5:
+; CHECK: add r0, r0, r0, lsl #2
+; CHECK: rsb r0, r0, #0
+        %0 = mul i32 %v, -5
+        ret i32 %0
+}
+
+define i32 @tn3(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn3:
+; CHECK: sub r0, r0, r0, lsl #2
+        %0 = mul i32 %v, -3
+        ret i32 %0
+}
+
+define i32 @tn12288(i32 %v) nounwind readnone {
+entry:
+; CHECK: tn12288:
+; CHECK: sub r0, r0, r0, lsl #2
+; CHECK: lsl{{.*}}#12
+        %0 = mul i32 %v, -12288
+        ret i32 %0
+}
diff --git a/test/CodeGen/ARM/neon_ld1.ll b/test/CodeGen/ARM/neon_ld1.ll
index c78872a4bca2..b892d2db67d6 100644
--- a/test/CodeGen/ARM/neon_ld1.ll
+++ b/test/CodeGen/ARM/neon_ld1.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64
-; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; CHECK: t1
+; CHECK: vldr d
+; CHECK: vldr d
+; CHECK: vadd.i16 d
+; CHECK: vstr d
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
 	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
@@ -12,6 +15,11 @@ entry:
 	ret void
 }
 
+; CHECK: t2
+; CHECK: vldr d
+; CHECK: vldr d
+; CHECK: vsub.i16 d
+; CHECK: vmov r0, r1, d
 define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
 entry:
 	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
diff --git a/test/CodeGen/ARM/neon_ld2.ll b/test/CodeGen/ARM/neon_ld2.ll
index 130277b31c36..944bfe060298 100644
--- a/test/CodeGen/ARM/neon_ld2.ll
+++ b/test/CodeGen/ARM/neon_ld2.ll
@@ -1,7 +1,10 @@
-; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
-; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
-; RUN: llc < %s -march=arm -mattr=+neon | grep vmov  | count 2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
 
+; CHECK: t1
+; CHECK: vldmia
+; CHECK: vldmia
+; CHECK: vadd.i64 q
+; CHECK: vstmia
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
@@ -12,6 +15,12 @@ entry:
 	ret void
 }
 
+; CHECK: t2
+; CHECK: vldmia
+; CHECK: vldmia
+; CHECK: vsub.i64 q
+; CHECK: vmov r0, r1, d
+; CHECK: vmov r2, r3, d
 define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
 entry:
 	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
new file mode 100644
index 000000000000..277bd05ba3b6
--- /dev/null
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -verify-machineinstrs
+; RUN: llc < %s -verify-machineinstrs -O0
+; PR12177
+;
+; This test case spills a QQQQ register.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
+target triple = "armv7-none-linux-gnueabi"
+
+%0 = type { %1*, i32, i32, i32, i8 }
+%1 = type { i32 (...)** }
+%2 = type { i8*, i8*, i8*, i32 }
+%3 = type { %4 }
+%4 = type { i32 (...)**, %2, %4*, i8, i8 }
+
+declare arm_aapcs_vfpcc void @func1(%0*, float* nocapture, float* nocapture, %2*) nounwind
+
+declare arm_aapcs_vfpcc %0** @func2()
+
+declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32)
+
+declare arm_aapcs_vfpcc %2** @func4()
+
+define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  %3 = load %0** %2, align 4, !tbaa !0
+  store float 0.000000e+00, float* undef, align 4
+  %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
+  call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
+  %5 = call arm_aapcs_vfpcc  %0** @func2() nounwind
+  store float 1.000000e+00, float* undef, align 4
+  call arm_aapcs_vfpcc  void @func1(%0* undef, float* undef, float* undef, %2* undef)
+  store float 1.500000e+01, float* undef, align 4
+  %6 = call arm_aapcs_vfpcc  %2** @func4() nounwind
+  %7 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2971) nounwind
+  %8 = fadd float undef, -1.000000e+05
+  store float %8, float* undef, align 16, !tbaa !3
+  %9 = call arm_aapcs_vfpcc  i32 @rand() nounwind
+  %10 = fmul float undef, 2.000000e+05
+  %11 = fadd float %10, -1.000000e+05
+  store float %11, float* undef, align 4, !tbaa !3
+  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+
+declare arm_aapcs_vfpcc i32 @rand()
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/ARM/odr_comdat.ll b/test/CodeGen/ARM/odr_comdat.ll
new file mode 100644
index 000000000000..e28b5788ef6d
--- /dev/null
+++ b/test/CodeGen/ARM/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ARMGNUEABI
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; ARMGNUEABI: .section        .bss._ZN1CIiE1iE,"aGw",%nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; ARMGNUEABI: .section        .data._ZN1CIiE1jE,"aGw",%progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/ARM/opt-shuff-tstore.ll b/test/CodeGen/ARM/opt-shuff-tstore.ll
new file mode 100644
index 000000000000..b4da5524289f
--- /dev/null
+++ b/test/CodeGen/ARM/opt-shuff-tstore.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mcpu=cortex-a9 -mtriple=arm-linux-unknown -promote-elements -mattr=+neon < %s | FileCheck %s
+
+; CHECK: func_4_8
+; CHECK: vst1.32
+; CHECK-NEXT: bx lr
+define void @func_4_8(<4 x i8> %param, <4 x i8>* %p) {
+  %r = add <4 x i8> %param, <i8 1, i8 2, i8 3, i8 4>
+  store <4 x i8> %r, <4 x i8>* %p
+  ret void
+}
+
+; CHECK: func_2_16
+; CHECK: vst1.32
+; CHECK-NEXT: bx lr
+define void @func_2_16(<2 x i16> %param, <2 x i16>* %p) {
+  %r = add <2 x i16> %param, <i16 1, i16 2>
+  store <2 x i16> %r, <2 x i16>* %p
+  ret void
+}
diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll
index e670a5be3bca..e72d51f06d4c 100644
--- a/test/CodeGen/ARM/peephole-bitcast.ll
+++ b/test/CodeGen/ARM/peephole-bitcast.ll
@@ -1,4 +1,6 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; XFAIL: *
+; PR11364
 
 ; vmov s0, r0 + vmov r0, s0 should have been optimized away.
 ; rdar://9104514
diff --git a/test/CodeGen/ARM/reg_asc_order.ll b/test/CodeGen/ARM/reg_asc_order.ll
new file mode 100644
index 000000000000..d1d0ee5f3e7b
--- /dev/null
+++ b/test/CodeGen/ARM/reg_asc_order.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Check that memcpy gets lowered to ldm/stm, at least in this very smple case.
+
+%struct.Foo = type { i32, i32, i32, i32 }
+
+define void @_Z10CopyStructP3FooS0_(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
+entry:
+;CHECK: ldm
+;CHECK: stm
+  %0 = bitcast %struct.Foo* %a to i8*
+  %1 = bitcast %struct.Foo* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 3a1921111280..05794e4ebddb 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -155,7 +155,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 
 define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 ; CHECK:        t6:
-; CHECK:        vldr.64
+; CHECK:        vldr
 ; CHECK:        vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
 ; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
   %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
@@ -240,7 +240,7 @@ bb14:                                             ; preds = %bb6
 ; PR7157
 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 ; CHECK:        t9:
-; CHECK:        vldr.64
+; CHECK:        vldr
 ; CHECK-NOT:    vmov d{{.*}}, d16
 ; CHECK:        vmov.i32 d17
 ; CHECK-NEXT:   vstmia r0, {d16, d17}
@@ -272,8 +272,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
 ; CHECK: t10:
-; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000
-; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3f000000
+; CHECK: vmul.f32 q8, q8, d[[DREG:[0-1]+]]
 ; CHECK: vadd.f32 q8, q8, q8
   %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index ea44c28fb707..6bb67431198a 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -112,11 +112,11 @@ entry:
   ret i32 %conv3
 }
 
+; rdar://10750814
 define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
 entry:
 ; CHECK: test9
-; CHECK: rev r0, r0
-; CHECK: lsr r0, r0, #16
+; CHECK: rev16 r0, r0
   %conv = zext i16 %v to i32
   %shr4 = lshr i32 %conv, 8
   %shl = shl nuw nsw i32 %conv, 8
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index f43dde52bbfd..c9ac66acbfd8 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -64,14 +64,14 @@ define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
 entry:
 ; ARM: t4:
 ; ARM: ldr
-; ARM: movlt
+; ARM: mov{{lt|ge}}
 
 ; ARMT2: t4:
 ; ARMT2: movwlt [[R0:r[0-9]+]], #65365
 ; ARMT2: movtlt [[R0]], #65365
 
 ; THUMB2: t4:
-; THUMB2: mvnlt.w [[R0:r[0-9]+]], #11141290
+; THUMB2: mvnlt [[R0:r[0-9]+]], #11141290
   %0 = icmp slt i32 %a, %b
   %1 = select i1 %0, i32 4283826005, i32 %x
   ret i32 %1
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index f1bd7ee53f88..3e07da841a5a 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -76,12 +76,12 @@ define double @f7(double %a, double %b) {
 ; block generated, odds are good that we have close to the ideal code for this:
 ;
 ; CHECK-NEON:      _f8:
-; CHECK-NEON:      adr     r2, LCPI7_0
-; CHECK-NEON-NEXT: movw    r3, #1123
-; CHECK-NEON-NEXT: adds    r1, r2, #4
-; CHECK-NEON-NEXT: cmp     r0, r3
+; CHECK-NEON:      adr     [[R2:r[0-9]+]], LCPI7_0
+; CHECK-NEON-NEXT: movw    [[R3:r[0-9]+]], #1123
+; CHECK-NEON-NEXT: adds    {{r.*}}, [[R2]], #4
+; CHECK-NEON-NEXT: cmp     r0, [[R3]]
 ; CHECK-NEON-NEXT: it      ne
-; CHECK-NEON-NEXT: movne   r1, r2
+; CHECK-NEON-NEXT: movne   {{r.*}}, [[R2]]
 ; CHECK-NEON-NEXT: ldr
 ; CHECK-NEON:      bx
 
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 8a3133adf3eb..ca2e18a63949 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -58,3 +58,49 @@ define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
   %s = or i32 %z, %y
  ret i32 %s
 }
+
+define i32 @t5(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t5:
+; ARM-NOT: moveq
+; ARM: orreq r2, r2, #1
+
+; T2: t5:
+; T2-NOT: moveq
+; T2: orreq r2, r2, #1
+  %tmp1 = icmp eq i32 %a, %b
+  %tmp2 = zext i1 %tmp1 to i32
+  %tmp3 = or i32 %tmp2, %c
+  ret i32 %tmp3
+}
+
+define i32 @t6(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t6:
+; ARM-NOT: movge
+; ARM: eorlt r3, r3, r2
+
+; T2: t6:
+; T2-NOT: movge
+; T2: eorlt.w r3, r3, r2
+  %cond = icmp slt i32 %a, %b
+  %tmp1 = select i1 %cond, i32 %c, i32 0
+  %tmp2 = xor i32 %tmp1, %d
+  ret i32 %tmp2
+}
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+; ARM: t7:
+; ARM-NOT: lsleq
+; ARM: andeq r2, r2, r2, lsl #1
+
+; T2: t7:
+; T2-NOT: lsleq.w
+; T2: andeq.w r2, r2, r2, lsl #1
+  %tmp1 = shl i32 %c, 1
+  %cond = icmp eq i32 %a, %b
+  %tmp2 = select i1 %cond, i32 %tmp1, i32 -1
+  %tmp3 = and i32 %c, %tmp2
+  ret i32 %tmp3
+}
+
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 964cef084f17..eb971ff72e74 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -55,11 +55,15 @@ define fastcc void @test4(i16 %addr) nounwind {
 entry:
 ; A8: test4:
 ; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; A8-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A8: str [[REG]], [r0, r1, lsl #2]
+; A8-NOT: str [[REG]], [r0]
 
 ; A9: test4:
 ; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
 ; A9: str [[REG]], [r0, r1, lsl #2]
+; A9-NOT: str [[REG]], [r0]
   %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index bf4e55cb06c4..057ea11389ac 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: bic sp, sp, #15
+; CHECK: bic {{.*}}, #15
 ; CHECK: vst1.64 {{.*}}sp, :128
 ; CHECK: vld1.64 {{.*}}sp, :128
 entry:
diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll
index f4e3a44d56e3..983ba455e7b7 100644
--- a/test/CodeGen/ARM/str_pre-2.ll
+++ b/test/CodeGen/ARM/str_pre-2.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=linearscan | FileCheck %s
 ; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s
 
 ; The greedy register allocator uses a single CSR here, invalidating the test.
diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll
index 993d7ec7505b..03ae12c6dea0 100644
--- a/test/CodeGen/ARM/subreg-remat.ll
+++ b/test/CodeGen/ARM/subreg-remat.ll
@@ -12,13 +12,13 @@ target triple = "thumbv7-apple-ios"
 ;
 ; CHECK: f1
 ; CHECK: vmov    s1, r0
-; CHECK: vldr.32 s0, LCPI
+; CHECK: vldr s0, LCPI
 ; The vector must be spilled:
-; CHECK: vstr.64 d0,
+; CHECK: vstr d0,
 ; CHECK: asm clobber d0
 ; And reloaded after the asm:
-; CHECK: vldr.64 [[D16:d[0-9]+]],
-; CHECK: vstr.64 [[D16]], [r1]
+; CHECK: vldr [[D16:d[0-9]+]],
+; CHECK: vstr [[D16]], [r1]
 define void @f1(float %x, <2 x float>* %p) {
   %v1 = insertelement <2 x float> undef, float %x, i32 1
   %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 0
@@ -37,13 +37,13 @@ define void @f1(float %x, <2 x float>* %p) {
 ; virtual register.  It doesn't read the old value.
 ;
 ; CHECK: f2
-; CHECK: vldr.32 s0, LCPI
+; CHECK: vldr s0, LCPI
 ; The vector must not be spilled:
-; CHECK-NOT: vstr.64
+; CHECK-NOT: vstr
 ; CHECK: asm clobber d0
 ; But instead rematerialize after the asm:
-; CHECK: vldr.32 [[S0:s[0-9]+]], LCPI
-; CHECK: vstr.64 [[D0:d[0-9]+]], [r0]
+; CHECK: vldr [[S0:s[0-9]+]], LCPI
+; CHECK: vstr [[D0:d[0-9]+]], [r0]
 define void @f2(<2 x float>* %p) {
   %v2 = insertelement <2 x float> undef, float 0x400921FB60000000, i32 0
   %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
diff --git a/test/CodeGen/ARM/tail-dup.ll b/test/CodeGen/ARM/tail-dup.ll
new file mode 100644
index 000000000000..e015bf098ff8
--- /dev/null
+++ b/test/CodeGen/ARM/tail-dup.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=dynamic-no-pic -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+; We should be able to tail-duplicate the basic block containing the indirectbr
+; into all of its predecessors.
+; CHECK: fn:
+; CHECK: mov pc
+; CHECK: mov pc
+; CHECK: mov pc
+
+@fn.codetable = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@fn, %RETURN), i8* blockaddress(@fn, %INCREMENT), i8* blockaddress(@fn, %DECREMENT)], align 4
+
+define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
+entry:
+  %0 = load i32* %opcodes, align 4, !tbaa !0
+  %arrayidx = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %0
+  br label %indirectgoto
+
+INCREMENT:                                        ; preds = %indirectgoto
+  %inc = add nsw i32 %result.0, 1
+  %1 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %1
+  br label %indirectgoto
+
+DECREMENT:                                        ; preds = %indirectgoto
+  %dec = add nsw i32 %result.0, -1
+  %2 = load i32* %opcodes.addr.0, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds [3 x i8*]* @fn.codetable, i32 0, i32 %2
+  br label %indirectgoto
+
+indirectgoto:                                     ; preds = %DECREMENT, %INCREMENT, %entry
+  %result.0 = phi i32 [ 0, %entry ], [ %dec, %DECREMENT ], [ %inc, %INCREMENT ]
+  %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
+  %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
+  %opcodes.addr.0 = getelementptr inbounds i32* %opcodes.pn, i32 1
+  %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+  indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
+
+RETURN:                                           ; preds = %indirectgoto
+  ret i32 %result.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
new file mode 100644
index 000000000000..93340c300cd4
--- /dev/null
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -mcpu=cortex-a9 -stress-ivchain | FileCheck %s
+; REQUIRES: asserts
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; rdar://10674430
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+; CHECK: sharedidx:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+; CHECK: %for.body
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+; CHECK: %for.body.1
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+; CHECK: %for.body.2
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+; CHECK: %for.body.3
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll
index 14e668efb1da..f157dbdb970c 100644
--- a/test/CodeGen/ARM/vbsl-constant.ll
+++ b/test/CodeGen/ARM/vbsl-constant.ll
@@ -2,8 +2,8 @@
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK: v_bsli8:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
@@ -16,8 +16,8 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK: v_bsli16:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
@@ -30,8 +30,8 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK: v_bsli32:
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
@@ -44,9 +44,9 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
 ;CHECK: v_bsli64:
-;CHECK: vldr.64
-;CHECK: vldr.64
-;CHECK: vldr.64
+;CHECK: vldr
+;CHECK: vldr
+;CHECK: vldr
 ;CHECK: vbsl
 	%tmp1 = load <1 x i64>* %A
 	%tmp2 = load <1 x i64>* %B
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index 13873932abd7..7fddbed1ed51 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -8,7 +8,7 @@ declare void @foo_int32x4_t(<4 x i32>)
 
 ; Test signed conversion.
 ; CHECK: t1
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t1() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -24,7 +24,7 @@ declare void @foo_float32x2_t(<2 x float>)
 
 ; Test unsigned conversion.
 ; CHECK: t2
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t2() nounwind {
 entry:
   %tmp = load i32* @uin, align 4, !tbaa !3
@@ -38,7 +38,7 @@ entry:
 
 ; Test which should not fold due to non-power of 2.
 ; CHECK: t3
-; CHECK: vdiv
+; CHECK: {{vdiv|vmul}}
 define void @t3() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -52,7 +52,7 @@ entry:
 
 ; Test which should not fold due to power of 2 out of range.
 ; CHECK: t4
-; CHECK: vdiv
+; CHECK: {{vdiv|vmul}}
 define void @t4() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -66,7 +66,7 @@ entry:
 
 ; Test case where const is max power of 2 (i.e., 2^32).
 ; CHECK: t5
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t5() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
@@ -80,7 +80,7 @@ entry:
 
 ; Test quadword.
 ; CHECK: t6
-; CHECK-NOT: vdiv
+; CHECK-NOT: {{vdiv|vmul}}
 define void @t6() nounwind {
 entry:
   %tmp = load i32* @iin, align 4, !tbaa !3
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index e99fac1f1e67..05332e4d8c5b 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -254,7 +254,7 @@ entry:
 ;CHECK: redundantVdup:
 ;CHECK: vmov.i8
 ;CHECK-NOT: vdup.8
-;CHECK: vstr.64
+;CHECK: vstr
 define void @redundantVdup(<8 x i8>* %ptr) nounwind {
   %1 = insertelement <8 x i8> undef, i8 -128, i32 0
   %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 81bdc44863b7..a38a0feae042 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -80,7 +80,7 @@ declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
 ; so they are not split up into i32 values.  Radar 8755338.
 define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_buildvector
-; CHECK: vldr.64
+; CHECK: vldr
   %t0 = load i64* %ptr, align 4
   %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
   store <2 x i64> %t1, <2 x i64>* %vp
@@ -89,7 +89,7 @@ define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 
 define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_insertelement
-; CHECK: vldr.64
+; CHECK: vldr
   %t0 = load i64* %ptr, align 4
   %vec = load <2 x i64>* %vp
   %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
@@ -99,7 +99,7 @@ define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 
 define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_extractelement
-; CHECK: vstr.64
+; CHECK: vstr
   %vec = load <2 x i64>* %vp
   %t1 = extractelement <2 x i64> %vec, i32 0
   store i64 %t1, i64* %ptr
@@ -123,3 +123,13 @@ define void @orVec(<3 x i8>* %A) nounwind {
   ret void
 }
 
+; The following test was hitting an assertion in the DAG combiner when
+; constant folding the multiply because the "sext undef" was translated to
+; a BUILD_VECTOR with i32 0 operands, which did not match the i16 operands
+; of the other BUILD_VECTOR.
+define i16 @foldBuildVectors() {
+  %1 = sext <8 x i8> undef to <8 x i16>
+  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %3 = extractelement <8 x i16> %2, i32 0
+  ret i16 %3
+}
diff --git a/test/CodeGen/ARM/vector-extend-narrow.ll b/test/CodeGen/ARM/vector-extend-narrow.ll
new file mode 100644
index 000000000000..5e9239f25632
--- /dev/null
+++ b/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -0,0 +1,46 @@
+; RUN: llc -mtriple armv7 %s -o - | FileCheck %s
+
+; CHECK: f:
+define float @f(<4 x i16>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u16
+  %1 = load <4 x i16>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i16> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+; CHECK: g:
+define float @g(<4 x i8>* nocapture %in) {
+  ; CHECK: vldr
+  ; CHECK: vmovl.u8
+  ; CHECK: vmovl.u16
+  %1 = load <4 x i8>* %in
+  ; CHECK: vcvt.f32.u32
+  %2 = uitofp <4 x i8> %1 to <4 x float>
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = extractelement <4 x float> %2, i32 1
+  %5 = extractelement <4 x float> %2, i32 2
+
+  ; CHECK: vadd.f32
+  %6 = fadd float %3, %4
+  %7 = fadd float %6, %5
+
+  ret float %7
+}
+
+; CHECK: h:
+define <4 x i8> @h(<4 x float> %v) {
+  ; CHECK: vcvt.{{[us]}}32.f32
+  ; CHECK: vmovn.i32
+  %1 = fptoui <4 x float> %v to <4 x i8>
+  ret <4 x i8> %1
+}
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index 65b5913e40a4..e224bdfe25a5 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -138,7 +138,7 @@ define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ; Make sure this doesn't crash
 define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind {
 ; CHECK: test_elem_mismatch:
-; CHECK: vstr.64
+; CHECK: vstr
   %tmp0 = load <2 x i64>* %src, align 16
   %tmp1 = bitcast <2 x i64> %tmp0 to <4 x i32>
   %tmp2 = extractelement <4 x i32> %tmp1, i32 0
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index d0e9ac3ad3c4..61d73c15f31f 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -32,7 +32,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
 
 define <2 x float> @vld1dupf(float* %A) nounwind {
 ;CHECK: vld1dupf:
-;CHECK: vld1.32 {d16[]}, [r0]
+;CHECK: vld1.32 {d16[]}, [r0, :32]
 	%tmp0 = load float* %A
         %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
@@ -51,7 +51,7 @@ define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
 
 define <4 x float> @vld1dupQf(float* %A) nounwind {
 ;CHECK: vld1dupQf:
-;CHECK: vld1.32 {d16[], d17[]}, [r0]
+;CHECK: vld1.32 {d16[], d17[]}, [r0, :32]
         %tmp0 = load float* %A
         %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index 0d7d4ec2949f..7bd0cbda02b1 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -31,9 +31,19 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
         ret <2 x i32> %tmp3
 }
 
+define <2 x i32> @vld1lanei32a32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld1lanei32a32:
+;Check the alignment value.  Legal values are none or :32.
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = load i32* %A, align 4
+	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+        ret <2 x i32> %tmp3
+}
+
 define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld1lanef:
-;CHECK: vld1.32 {d16[1]}, [r0]
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
 	%tmp1 = load <2 x float>* %B
 	%tmp2 = load float* %A, align 4
 	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
@@ -69,7 +79,7 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 
 define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld1laneQf:
-;CHECK: vld1.32 {d16[0]}, [r0]
+;CHECK: vld1.32 {d16[0]}, [r0, :32]
 	%tmp1 = load <4 x float>* %B
 	%tmp2 = load float* %A
 	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index a86be32bd203..0c2387960b4e 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -56,13 +56,13 @@ define <2 x i32> @v_movi32d() nounwind {
 
 define <2 x i32> @v_movi32e() nounwind {
 ;CHECK: v_movi32e:
-;CHECK: vmov.i32 d{{.*}}, #0x20FF
+;CHECK: vmov.i32 d{{.*}}, #0x20ff
 	ret <2 x i32> < i32 8447, i32 8447 >
 }
 
 define <2 x i32> @v_movi32f() nounwind {
 ;CHECK: v_movi32f:
-;CHECK: vmov.i32 d{{.*}}, #0x20FFFF
+;CHECK: vmov.i32 d{{.*}}, #0x20ffff
 	ret <2 x i32> < i32 2162687, i32 2162687 >
 }
 
@@ -92,19 +92,19 @@ define <2 x i32> @v_mvni32d() nounwind {
 
 define <2 x i32> @v_mvni32e() nounwind {
 ;CHECK: v_mvni32e:
-;CHECK: vmvn.i32 d{{.*}}, #0x20FF
+;CHECK: vmvn.i32 d{{.*}}, #0x20ff
 	ret <2 x i32> < i32 4294958848, i32 4294958848 >
 }
 
 define <2 x i32> @v_mvni32f() nounwind {
 ;CHECK: v_mvni32f:
-;CHECK: vmvn.i32 d{{.*}}, #0x20FFFF
+;CHECK: vmvn.i32 d{{.*}}, #0x20ffff
 	ret <2 x i32> < i32 4292804608, i32 4292804608 >
 }
 
 define <1 x i64> @v_movi64() nounwind {
 ;CHECK: v_movi64:
-;CHECK: vmov.i64 d{{.*}}, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 d{{.*}}, #0xff0000ff0000ffff
 	ret <1 x i64> < i64 18374687574888349695 >
 }
 
@@ -152,19 +152,19 @@ define <4 x i32> @v_movQi32d() nounwind {
 
 define <4 x i32> @v_movQi32e() nounwind {
 ;CHECK: v_movQi32e:
-;CHECK: vmov.i32 q{{.*}}, #0x20FF
+;CHECK: vmov.i32 q{{.*}}, #0x20ff
 	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
 }
 
 define <4 x i32> @v_movQi32f() nounwind {
 ;CHECK: v_movQi32f:
-;CHECK: vmov.i32 q{{.*}}, #0x20FFFF
+;CHECK: vmov.i32 q{{.*}}, #0x20ffff
 	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
 }
 
 define <2 x i64> @v_movQi64() nounwind {
 ;CHECK: v_movQi64:
-;CHECK: vmov.i64 q{{.*}}, #0xFF0000FF0000FFFF
+;CHECK: vmov.i64 q{{.*}}, #0xff0000ff0000ffff
 	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
 }
 
@@ -182,7 +182,7 @@ entry:
 define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
 entry:
 ;CHECK: vdupnneg75:
-;CHECK: vmov.i8 d{{.*}}, #0xB5
+;CHECK: vmov.i8 d{{.*}}, #0xb5
   %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
   store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
   ret void
@@ -353,3 +353,48 @@ define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
   store <4 x i16> %tmp2, <4 x i16>* %b, align 8
   ret void
 }
+
+; Use vmov.f32 to materialize f32 immediate splats
+; rdar://10437054
+define void @v_mov_v2f32(<2 x float>* nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v2f32:
+;CHECK: vmov.f32 d{{.*}}, #-1.600000e+01
+  store <2 x float> <float -1.600000e+01, float -1.600000e+01>, <2 x float>* %p, align 4
+  ret void
+}
+
+define void @v_mov_v4f32(<4 x float>* nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v4f32:
+;CHECK: vmov.f32 q{{.*}}, #3.100000e+01
+  store <4 x float> <float 3.100000e+01, float 3.100000e+01, float 3.100000e+01, float 3.100000e+01>, <4 x float>* %p, align 4
+  ret void
+}
+
+define void @v_mov_v4f32_undef(<4 x float> * nocapture %p) nounwind {
+entry:
+;CHECK: v_mov_v4f32_undef:
+;CHECK: vmov.f32 q{{.*}}, #1.000000e+00
+  %a = load <4 x float> *%p
+  %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
+  store <4 x float> %b, <4 x float> *%p
+  ret void
+}
+
+; Vector any_extends must be selected as either vmovl.u or vmovl.s.
+; rdar://10723651
+define void @any_extend(<4 x i1> %x, <4 x i32> %y) nounwind ssp {
+entry:
+;CHECK: any_extend
+;CHECK: vmovl
+  %and.i186 = zext <4 x i1> %x to <4 x i32>
+  %add.i185 = sub <4 x i32> %and.i186, %y
+  %sub.i = sub <4 x i32> %add.i185, zeroinitializer
+  %add.i = add <4 x i32> %sub.i, zeroinitializer
+  %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
+  tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 1780d6e66be7..61d89bbae835 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -514,3 +514,14 @@ entry:
   store <8 x i8> %10, <8 x i8>* %11, align 8
   ret void
 }
+
+; If one operand has a zero-extend and the other a sign-extend, vmull
+; cannot be used.
+define i16 @vmullWithInconsistentExtensions(<8 x i8> %vec) {
+; CHECK: vmullWithInconsistentExtensions
+; CHECK-NOT: vmull.s8
+  %1 = sext <8 x i8> %vec to <8 x i16>
+  %2 = mul <8 x i16> %1, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %3 = extractelement <8 x i16> %2, i32 0
+  ret i16 %3
+}
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll
index 34acd1678ae3..122ec0357fbe 100644
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -148,11 +148,11 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
   ret void
 }
 
-; vrev <4 x i16> should use VREV32 and not VREV64
+; The type <2 x i16> is legalized to <2 x i32> and need to be trunc-stored
+; to <2 x i16> when stored to memory.
 define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
 ; CHECK: test_vrev64:
-; CHECK: vext.16
-; CHECK: vrev32.16
+; CHECK: vst1.32
 entry:
   %0 = bitcast <4 x i16>* %source to <8 x i16>*
   %tmp2 = load <8 x i16>* %0, align 4
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 915a84b67767..fb05a20f6695 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -110,6 +110,24 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
+;CHECK: vst2update
+;CHECK: vst2.16 {d16, d17}, [r0]!
+	%tmp1 = load <4 x i16>* %B
+	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
+	%t5 = getelementptr inbounds i8* %out, i32 16
+	ret i8* %t5
+}
+
+define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
+;CHECK: vst2update2
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
+  %tmp1 = load <4 x float>* %this
+  call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
+  %tmp2 = getelementptr inbounds i8* %out, i32  32
+  ret i8* %tmp2
+}
+
 declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
 declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index 08b72325ed9e..758b355736d0 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -45,7 +45,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
 
 define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst1lanef:
-;CHECK: vst1.32 {d16[1]}, [r0]
+;CHECK: vst1.32 {d16[1]}, [r0, :32]
 	%tmp1 = load <2 x float>* %B
         %tmp2 = extractelement <2 x float> %tmp1, i32 1
         store float %tmp2, float* %A
@@ -358,6 +358,13 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
 	ret void
 }
 
+; Make sure this doesn't crash; PR10258
+define <8 x i16> @variable_insertelement(<8 x i16> %a, i16 %b, i32 %c) nounwind readnone {
+;CHECK: variable_insertelement:
+    %r = insertelement <8 x i16> %a, i16 %b, i32 %c
+    ret <8 x i16> %r
+}
+
 declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
 declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
 declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/widen-vmovs.ll b/test/CodeGen/ARM/widen-vmovs.ll
index 8fd99ba7af42..2cffda317902 100644
--- a/test/CodeGen/ARM/widen-vmovs.ll
+++ b/test/CodeGen/ARM/widen-vmovs.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs | FileCheck %s
 target triple = "thumbv7-apple-ios"
 
-; The 0.0 constant is loaded from the constant pool and kept in a register.
+; The 1.0e+10 constant is loaded from the constant pool and kept in a register.
 ; CHECK: %entry
-; CHECK: vldr.32 s
+; CHECK: vldr s
 ; The float loop variable is initialized with a vmovs from the constant register.
 ; The vmovs is first widened to a vmovd, and then converted to a vorr because of the v2f32 vadd.f32.
 ; CHECK: vorr [[DL:d[0-9]+]], [[DN:d[0-9]+]]
@@ -24,8 +24,8 @@ for.body4:
   br label %for.body.i
 
 for.body.i:
-  %tmp3.i = phi float [ 0.000000e+00, %for.body4 ], [ %add.i, %for.body.i ]
-  %add.i = fadd float %tmp3.i, 0.000000e+00
+  %tmp3.i = phi float [ 1.000000e+10, %for.body4 ], [ %add.i, %for.body.i ]
+  %add.i = fadd float %tmp3.i, 1.000000e+10
   %exitcond.i = icmp eq i32 undef, 41
   br i1 %exitcond.i, label %rInnerproduct.exit, label %for.body.i
 
diff --git a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll b/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
deleted file mode 100644
index 4b3d022c1d8d..000000000000
--- a/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; This shouldn't crash
-; RUN: llc < %s -march=alpha
-
-@.str_4 = external global [44 x i8]             ; <[44 x i8]*> [#uses=0]
-
-declare void @printf(i32, ...)
-
-define void @main() {
-entry:
-        %tmp.11861 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        %tmp.19466 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        %tmp.21571 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        %tmp.36796 = icmp slt i64 0, 1          ; <i1> [#uses=1]
-        br i1 %tmp.11861, label %loopexit.2, label %no_exit.2
-
-no_exit.2:              ; preds = %entry
-        ret void
-
-loopexit.2:             ; preds = %entry
-        br i1 %tmp.19466, label %loopexit.3, label %no_exit.3.preheader
-
-no_exit.3.preheader:            ; preds = %loopexit.2
-        ret void
-
-loopexit.3:             ; preds = %loopexit.2
-        br i1 %tmp.21571, label %no_exit.6, label %no_exit.4
-
-no_exit.4:              ; preds = %loopexit.3
-        ret void
-
-no_exit.6:              ; preds = %no_exit.6, %loopexit.3
-        %tmp.30793 = icmp sgt i64 0, 0          ; <i1> [#uses=1]
-        br i1 %tmp.30793, label %loopexit.6, label %no_exit.6
-
-loopexit.6:             ; preds = %no_exit.6
-        %Z.1 = select i1 %tmp.36796, double 1.000000e+00, double 0x3FEFFF7CEDE74EAE; <double> [#uses=2]
-        tail call void (i32, ...)* @printf( i32 0, i64 0, i64 0, i64 0, double 1.000000e+00, double 1.000000e+00, double %Z.1, double %Z.1 )
-        ret void
-}
-
diff --git a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll b/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
deleted file mode 100644
index 65d2a8d02ac8..000000000000
--- a/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; The global symbol should be legalized
-; RUN: llc < %s -march=alpha 
-
-target datalayout = "e-p:64:64"
-        %struct.LIST_HELP = type { %struct.LIST_HELP*, i8* }
-        %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [44 x i8] }
-        %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@clause_SORT = external global [21 x %struct.LIST_HELP*]                ; <[21 x %struct.LIST_HELP*]*> [#uses=0]
-@ia_in = external global %struct._IO_FILE*              ; <%struct._IO_FILE**> [#uses=1]
-@multvec_j = external global [100 x i32]                ; <[100 x i32]*> [#uses=0]
-
-define void @main(i32 %argc) {
-clock_Init.exit:
-        %tmp.5.i575 = load i32* null            ; <i32> [#uses=1]
-        %tmp.309 = icmp eq i32 %tmp.5.i575, 0           ; <i1> [#uses=1]
-        br i1 %tmp.309, label %UnifiedReturnBlock, label %then.17
-
-then.17:                ; preds = %clock_Init.exit
-        store %struct._IO_FILE* null, %struct._IO_FILE** @ia_in
-        %savedstack = call i8* @llvm.stacksave( )               ; <i8*> [#uses=0]
-        ret void
-
-UnifiedReturnBlock:             ; preds = %clock_Init.exit
-        ret void
-}
-
-declare i8* @llvm.stacksave()
diff --git a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll b/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
deleted file mode 100644
index 45587f08fd6c..000000000000
--- a/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; This shouldn't crash
-; RUN: llc < %s -march=alpha 
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev6-unknown-linux-gnu"
-deplibs = [ "c", "crtend", "stdc++" ]
-        %struct.__va_list_tag = type { i8*, i32 }
-
-define i32 @emit_library_call_value(i32 %nargs, ...) {
-entry:
-        %tmp.223 = va_arg %struct.__va_list_tag* null, i32              ; <i32> [#uses=1]
-        ret i32 %tmp.223
-}
-
diff --git a/test/CodeGen/Alpha/2006-04-04-zextload.ll b/test/CodeGen/Alpha/2006-04-04-zextload.ll
deleted file mode 100644
index 671d39e1bb7b..000000000000
--- a/test/CodeGen/Alpha/2006-04-04-zextload.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-        %struct._Callback_list = type { %struct._Callback_list*, void (i32, %struct.ios_base*, i32)*, i32, i32 }
-        %struct._Impl = type { i32, %struct.facet**, i64, %struct.facet**, i8** }
-        %struct._Words = type { i8*, i64 }
-        %"struct.__codecvt_abstract_base<char,char,__mbstate_t>" = type { %struct.facet }
-        %"struct.basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %struct.locale }
-        %struct.facet = type { i32 (...)**, i32 }
-        %struct.ios_base = type { i32 (...)**, i64, i64, i32, i32, i32, %struct._Callback_list*, %struct._Words, [8 x %struct._Words], i32, %struct._Words*, %struct.locale }
-        %struct.locale = type { %struct._Impl* }
-        %"struct.ostreambuf_iterator<char,std::char_traits<char> >" = type { %"struct.basic_streambuf<char,std::char_traits<char> >"*, i1 }
-
-define void @_ZNKSt7num_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE15_M_insert_floatIdEES3_S3_RSt8ios_baseccT_() {
-entry:
-        %tmp234 = icmp eq i8 0, 0               ; <i1> [#uses=1]
-        br i1 %tmp234, label %cond_next243, label %cond_true235
-
-cond_true235:           ; preds = %entry
-        ret void
-
-cond_next243:           ; preds = %entry
-        %tmp428 = load i64* null                ; <i64> [#uses=1]
-        %tmp428.upgrd.1 = trunc i64 %tmp428 to i32              ; <i32> [#uses=1]
-        %tmp429 = alloca i8, i32 %tmp428.upgrd.1                ; <i8*> [#uses=0]
-        unreachable
-}
-
-
diff --git a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll b/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
deleted file mode 100644
index 5d31bc3798dc..000000000000
--- a/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-
-define i32 @_ZN9__gnu_cxx18__exchange_and_addEPVii(i32* %__mem, i32 %__val) {
-entry:
-        %__tmp = alloca i32, align 4            ; <i32*> [#uses=1]
-        %tmp3 = call i32 asm sideeffect "\0A$$Lxadd_0:\0A\09ldl_l  $0,$3\0A\09addl   $0,$4,$1\0A\09stl_c  $1,$2\0A\09beq    $1,$$Lxadd_0\0A\09mb", "=&r,=*&r,=*m,m,r"( i32* %__tmp, i32* %__mem, i32* %__mem, i32 %__val )            ; <i32> [#uses=1]
-        ret i32 %tmp3
-}
-
-define void @_ZN9__gnu_cxx12__atomic_addEPVii(i32* %__mem, i32 %__val) {
-entry:
-        %tmp2 = call i32 asm sideeffect "\0A$$Ladd_1:\0A\09ldl_l  $0,$2\0A\09addl   $0,$3,$0\0A\09stl_c  $0,$1\0A\09beq    $0,$$Ladd_1\0A\09mb", "=&r,=*m,m,r"( i32* %__mem, i32* %__mem, i32 %__val )                ; <i32> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/Alpha/2006-11-01-vastart.ll b/test/CodeGen/Alpha/2006-11-01-vastart.ll
deleted file mode 100644
index 14e0bccc8482..000000000000
--- a/test/CodeGen/Alpha/2006-11-01-vastart.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-        %struct.va_list = type { i8*, i32, i32 }
-
-define void @yyerror(i32, ...) {
-entry:
-        %va.upgrd.1 = bitcast %struct.va_list* null to i8*              ; <i8*> [#uses=1]
-        call void @llvm.va_start( i8* %va.upgrd.1 )
-        ret void
-}
-
-declare void @llvm.va_start(i8*)
-
diff --git a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll b/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
deleted file mode 100644
index b537e250ad86..000000000000
--- a/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-;FIXME: this should produce no mul inst.  But not crashing will have to do for now
-
-; ModuleID = 'Output/bugpoint-train/bugpoint-reduced-simplified.bc'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
-target triple = "alphaev6-unknown-linux-gnu"
-
-define fastcc i32 @getcount(i32 %s) {
-cond_next43:		; preds = %bb27
-	%tmp431 = mul i32 %s, -3
-	ret i32 %tmp431
-}
diff --git a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll b/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
deleted file mode 100644
index 1a4b40e2da2c..000000000000
--- a/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
-target triple = "alphaev6-unknown-linux-gnu"
-
-define i64 @__mulvdi3(i64 %a, i64 %b) nounwind {
-entry:
-	%0 = sext i64 %a to i128		; <i128> [#uses=1]
-	%1 = sext i64 %b to i128		; <i128> [#uses=1]
-	%2 = mul i128 %1, %0		; <i128> [#uses=2]
-	%3 = lshr i128 %2, 64		; <i128> [#uses=1]
-	%4 = trunc i128 %3 to i64		; <i64> [#uses=1]
-	%5 = trunc i128 %2 to i64		; <i64> [#uses=1]
-	%6 = icmp eq i64 %4, 0		; <i1> [#uses=1]
-	br i1 %6, label %bb1, label %bb
-
-bb:		; preds = %entry
-	unreachable
-
-bb1:		; preds = %entry
-	ret i64 %5
-}
diff --git a/test/CodeGen/Alpha/2008-11-12-Add128.ll b/test/CodeGen/Alpha/2008-11-12-Add128.ll
deleted file mode 100644
index 8b9b603fe6fe..000000000000
--- a/test/CodeGen/Alpha/2008-11-12-Add128.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s
-; PR3044
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
-target triple = "alphaev6-unknown-linux-gnu"
-
-define i128 @__mulvti3(i128 %u, i128 %v) nounwind {
-entry:
-	%0 = load i128* null, align 16		; <i128> [#uses=1]
-	%1 = load i64* null, align 8		; <i64> [#uses=1]
-	%2 = zext i64 %1 to i128		; <i128> [#uses=1]
-	%3 = add i128 %2, %0		; <i128> [#uses=1]
-	store i128 %3, i128* null, align 16
-	unreachable
-}
diff --git a/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll b/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
deleted file mode 100644
index cfbf7fcdfd90..000000000000
--- a/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-define i1 @a(float %x) {
-  %r = fcmp ult float %x, 1.0
-  ret i1 %r
-}
diff --git a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index 4590f1245ae9..000000000000
--- a/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll b/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
deleted file mode 100644
index b838ec949eae..000000000000
--- a/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=alpha | FileCheck %s
-
-define fastcc i64 @getcount(i64 %s) {
-	%tmp431 = mul i64 %s, 12884901888
-	ret i64 %tmp431
-}
-
-; CHECK: sll $16,33,$0
-; CHECK-NEXT: sll $16,32,$1
-; CHECK-NEXT: addq $0,$1,$0
-
diff --git a/test/CodeGen/Alpha/add.ll b/test/CodeGen/Alpha/add.ll
deleted file mode 100644
index 8a926954c3b1..000000000000
--- a/test/CodeGen/Alpha/add.ll
+++ /dev/null
@@ -1,178 +0,0 @@
-;test all the shifted and signextending adds and subs with and without consts
-;
-; RUN: llc < %s -march=alpha -o %t.s
-; RUN: grep {	addl} %t.s | count 2
-; RUN: grep {	addq} %t.s | count 2
-; RUN: grep {	subl} %t.s | count 2
-; RUN: grep {	subq} %t.s | count 2
-;
-; RUN: grep {s4addl} %t.s | count 2
-; RUN: grep {s8addl} %t.s | count 2
-; RUN: grep {s4addq} %t.s | count 2
-; RUN: grep {s8addq} %t.s | count 2
-;
-; RUN: grep {s4subl} %t.s | count 2
-; RUN: grep {s8subl} %t.s | count 2
-; RUN: grep {s4subq} %t.s | count 2
-; RUN: grep {s8subq} %t.s | count 2
-
-
-define signext i32 @al(i32 signext %x.s, i32 signext %y.s) {
-entry:
-	%tmp.3.s = add i32 %y.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @ali(i32 signext %x.s)  {
-entry:
-	%tmp.3.s = add i32 100, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i64 @aq(i64 signext %x.s, i64 signext %y.s)  {
-entry:
-	%tmp.3.s = add i64 %y.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @aqi(i64 %x.s) {
-entry:
-	%tmp.3.s = add i64 100, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @sl(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.3.s = sub i32 %y.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @sli(i32 signext %x.s)  {
-entry:
-	%tmp.3.s = sub i32 %x.s, 100		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @sq(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.3.s = sub i64 %y.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @sqi(i64 %x.s) {
-entry:
-	%tmp.3.s = sub i64 %x.s, 100		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @a4l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @a8l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @a4q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = add i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @a8q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = add i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @a4li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @a8li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @a4qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = add i64 100, %tmp.1.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @a8qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = add i64 100, %tmp.1.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @s4l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @s8l(i32 signext %x.s, i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @s4q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @s8q(i64 %x.s, i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define signext i32 @s4li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define signext i32 @s8li(i32 signext %y.s)  {
-entry:
-	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
-	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]
-	ret i32 %tmp.3.s
-}
-
-define i64 @s4qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, 100		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
-
-define i64 @s8qi(i64 %y.s) {
-entry:
-	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
-	%tmp.3.s = sub i64 %tmp.1.s, 100		; <i64> [#uses=1]
-	ret i64 %tmp.3.s
-}
diff --git a/test/CodeGen/Alpha/add128.ll b/test/CodeGen/Alpha/add128.ll
deleted file mode 100644
index fa3b949fc7b8..000000000000
--- a/test/CodeGen/Alpha/add128.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;test for ADDC and ADDE expansion
-;
-; RUN: llc < %s -march=alpha
-
-define i128 @add128(i128 %x, i128 %y) {
-entry:
-	%tmp = add i128 %y, %x
-	ret i128 %tmp
-}
diff --git a/test/CodeGen/Alpha/bic.ll b/test/CodeGen/Alpha/bic.ll
deleted file mode 100644
index 9f0035097b0e..000000000000
--- a/test/CodeGen/Alpha/bic.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the bic instruction
-; RUN: llc < %s -march=alpha | grep {bic}
-
-define i64 @bar(i64 %x, i64 %y) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        %tmp.2 = and i64 %y, %tmp.1             ; <i64> [#uses=1]
-        ret i64 %tmp.2
-}
diff --git a/test/CodeGen/Alpha/bsr.ll b/test/CodeGen/Alpha/bsr.ll
deleted file mode 100644
index 14f6b46c5490..000000000000
--- a/test/CodeGen/Alpha/bsr.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; Make sure this testcase codegens the bsr instruction
-; RUN: llc < %s -march=alpha | grep bsr
-
-define internal i64 @abc(i32 %x) {
-        %tmp.2 = add i32 %x, -1         ; <i32> [#uses=1]
-        %tmp.0 = call i64 @abc( i32 %tmp.2 )            ; <i64> [#uses=1]
-        %tmp.5 = add i32 %x, -2         ; <i32> [#uses=1]
-        %tmp.3 = call i64 @abc( i32 %tmp.5 )            ; <i64> [#uses=1]
-        %tmp.6 = add i64 %tmp.0, %tmp.3         ; <i64> [#uses=1]
-        ret i64 %tmp.6
-}
-
diff --git a/test/CodeGen/Alpha/call_adj.ll b/test/CodeGen/Alpha/call_adj.ll
deleted file mode 100644
index 24e97a92b86b..000000000000
--- a/test/CodeGen/Alpha/call_adj.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-;All this should do is not crash
-;RUN: llc < %s -march=alpha
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-
-define void @_ZNSt13basic_filebufIcSt11char_traitsIcEE22_M_convert_to_externalEPcl(i32 %f) {
-entry:
-        %tmp49 = alloca i8, i32 %f              ; <i8*> [#uses=0]
-        %tmp = call i32 null( i8* null, i8* null, i8* null, i8* null, i8* null, i8* null, i8* null )               ; <i32> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/Alpha/cmov.ll b/test/CodeGen/Alpha/cmov.ll
deleted file mode 100644
index 9b655f03efdc..000000000000
--- a/test/CodeGen/Alpha/cmov.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=alpha | not grep cmovlt
-; RUN: llc < %s -march=alpha | grep cmoveq
-
-define i64 @cmov_lt(i64 %a, i64 %c) {
-entry:
-        %tmp.1 = icmp slt i64 %c, 0             ; <i1> [#uses=1]
-        %retval = select i1 %tmp.1, i64 %a, i64 10              ; <i64> [#uses=1]
-        ret i64 %retval
-}
-
-define i64 @cmov_const(i64 %a, i64 %b, i64 %c) {
-entry:
-        %tmp.1 = icmp slt i64 %a, %b            ; <i1> [#uses=1]
-        %retval = select i1 %tmp.1, i64 %c, i64 10              ; <i64> [#uses=1]
-        ret i64 %retval
-}
-
-define i64 @cmov_lt2(i64 %a, i64 %c) {
-entry:
-        %tmp.1 = icmp sgt i64 %c, 0             ; <i1> [#uses=1]
-        %retval = select i1 %tmp.1, i64 10, i64 %a              ; <i64> [#uses=1]
-        ret i64 %retval
-}
diff --git a/test/CodeGen/Alpha/cmpbge.ll b/test/CodeGen/Alpha/cmpbge.ll
deleted file mode 100644
index e88d2eec75e1..000000000000
--- a/test/CodeGen/Alpha/cmpbge.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=alpha | grep cmpbge | count 2
-
-define i1 @test1(i64 %A, i64 %B) {
-        %C = and i64 %A, 255            ; <i64> [#uses=1]
-        %D = and i64 %B, 255            ; <i64> [#uses=1]
-        %E = icmp uge i64 %C, %D                ; <i1> [#uses=1]
-        ret i1 %E
-}
-
-define i1 @test2(i64 %a, i64 %B) {
-        %A = shl i64 %a, 1              ; <i64> [#uses=1]
-        %C = and i64 %A, 254            ; <i64> [#uses=1]
-        %D = and i64 %B, 255            ; <i64> [#uses=1]
-        %E = icmp uge i64 %C, %D                ; <i1> [#uses=1]
-        ret i1 %E
-}
diff --git a/test/CodeGen/Alpha/ctlz.ll b/test/CodeGen/Alpha/ctlz.ll
deleted file mode 100644
index aa1588aa39e8..000000000000
--- a/test/CodeGen/Alpha/ctlz.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; Make sure this testcase codegens to the ctlz instruction
-; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctlz
-; RUN: llc < %s -march=alpha -mattr=+CIX | grep -i ctlz
-; RUN: llc < %s -march=alpha -mcpu=ev6 | not grep -i ctlz
-; RUN: llc < %s -march=alpha -mattr=-CIX | not grep -i ctlz
-
-declare i8 @llvm.ctlz.i8(i8)
-
-define i32 @bar(i8 %x) {
-entry:
-	%tmp.1 = call i8 @llvm.ctlz.i8( i8 %x ) 
-	%tmp.2 = sext i8 %tmp.1 to i32
-	ret i32 %tmp.2
-}
diff --git a/test/CodeGen/Alpha/ctlz_e.ll b/test/CodeGen/Alpha/ctlz_e.ll
deleted file mode 100644
index 230e096b08d2..000000000000
--- a/test/CodeGen/Alpha/ctlz_e.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; Make sure this testcase does not use ctpop
-; RUN: llc < %s -march=alpha | not grep -i ctpop 
-
-declare i64 @llvm.ctlz.i64(i64)
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = call i64 @llvm.ctlz.i64( i64 %x )              ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/ctpop.ll b/test/CodeGen/Alpha/ctpop.ll
deleted file mode 100644
index f887882cec2f..000000000000
--- a/test/CodeGen/Alpha/ctpop.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; Make sure this testcase codegens to the ctpop instruction
-; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctpop
-; RUN: llc < %s -march=alpha -mattr=+CIX | \
-; RUN:   grep -i ctpop
-; RUN: llc < %s -march=alpha -mcpu=ev6 | \
-; RUN:   not grep -i ctpop
-; RUN: llc < %s -march=alpha -mattr=-CIX | \
-; RUN:   not grep -i ctpop
-
-declare i64 @llvm.ctpop.i64(i64)
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = call i64 @llvm.ctpop.i64( i64 %x )             ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/dg.exp b/test/CodeGen/Alpha/dg.exp
deleted file mode 100644
index fb9f710b295f..000000000000
--- a/test/CodeGen/Alpha/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Alpha] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/CodeGen/Alpha/eqv.ll b/test/CodeGen/Alpha/eqv.ll
deleted file mode 100644
index b3413d6b5dce..000000000000
--- a/test/CodeGen/Alpha/eqv.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Make sure this testcase codegens to the eqv instruction
-; RUN: llc < %s -march=alpha | grep eqv
-
-define i64 @bar(i64 %x, i64 %y) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        %tmp.2 = xor i64 %y, %tmp.1             ; <i64> [#uses=1]
-        ret i64 %tmp.2
-}
-
diff --git a/test/CodeGen/Alpha/i32_sub_1.ll b/test/CodeGen/Alpha/i32_sub_1.ll
deleted file mode 100644
index 35b1d08dbd57..000000000000
--- a/test/CodeGen/Alpha/i32_sub_1.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the ctpop instruction
-; RUN: llc < %s -march=alpha | grep -i {subl \$16,1,\$0}
-
-
-define signext i32 @foo(i32 signext %x) {
-entry:
-	%tmp.1 = add i32 %x, -1		; <int> [#uses=1]
-	ret i32 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/illegal-element-type.ll b/test/CodeGen/Alpha/illegal-element-type.ll
deleted file mode 100644
index 4cf80dee57b7..000000000000
--- a/test/CodeGen/Alpha/illegal-element-type.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -mtriple=alphaev6-unknown-linux-gnu
-
-define void @foo() {
-entry:
-        br label %bb
-
-bb:             ; preds = %bb, %entry
-        br i1 false, label %bb26, label %bb
-
-bb19:           ; preds = %bb26
-        ret void
-
-bb26:           ; preds = %bb
-        br i1 false, label %bb30, label %bb19
-
-bb30:           ; preds = %bb26
-        br label %bb45
-
-bb45:           ; preds = %bb45, %bb30
-        %V.0 = phi <8 x i16> [ %tmp42, %bb45 ], [ zeroinitializer, %bb30 ]     ; <<8 x i16>> [#uses=1]
-        %tmp42 = mul <8 x i16> zeroinitializer, %V.0            ; <<8 x i16>> [#uses=1]
-        br label %bb45
-}
diff --git a/test/CodeGen/Alpha/jmp_table.ll b/test/CodeGen/Alpha/jmp_table.ll
deleted file mode 100644
index 917c9327dc16..000000000000
--- a/test/CodeGen/Alpha/jmp_table.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; try to check that we have the most important instructions, which shouldn't 
-; appear otherwise
-; RUN: llc < %s -march=alpha | grep jmp
-; RUN: llc < %s -march=alpha | grep gprel32
-; RUN: llc < %s -march=alpha | grep ldl
-; RUN: llc < %s -march=alpha | grep rodata
-; END.
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-@str = internal constant [2 x i8] c"1\00"               ; <[2 x i8]*> [#uses=1]
-@str1 = internal constant [2 x i8] c"2\00"              ; <[2 x i8]*> [#uses=1]
-@str2 = internal constant [2 x i8] c"3\00"              ; <[2 x i8]*> [#uses=1]
-@str3 = internal constant [2 x i8] c"4\00"              ; <[2 x i8]*> [#uses=1]
-@str4 = internal constant [2 x i8] c"5\00"              ; <[2 x i8]*> [#uses=1]
-@str5 = internal constant [2 x i8] c"6\00"              ; <[2 x i8]*> [#uses=1]
-@str6 = internal constant [2 x i8] c"7\00"              ; <[2 x i8]*> [#uses=1]
-@str7 = internal constant [2 x i8] c"8\00"              ; <[2 x i8]*> [#uses=1]
-
-define i32 @main(i32 %x, i8** %y) {
-entry:
-        %x_addr = alloca i32            ; <i32*> [#uses=2]
-        %y_addr = alloca i8**           ; <i8***> [#uses=1]
-        %retval = alloca i32, align 4           ; <i32*> [#uses=2]
-        %tmp = alloca i32, align 4              ; <i32*> [#uses=2]
-        %foo = alloca i8*, align 8              ; <i8**> [#uses=9]
-        %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-        store i32 %x, i32* %x_addr
-        store i8** %y, i8*** %y_addr
-        %tmp.upgrd.1 = load i32* %x_addr                ; <i32> [#uses=1]
-        switch i32 %tmp.upgrd.1, label %bb15 [
-                 i32 1, label %bb
-                 i32 2, label %bb1
-                 i32 3, label %bb3
-                 i32 4, label %bb5
-                 i32 5, label %bb7
-                 i32 6, label %bb9
-                 i32 7, label %bb11
-                 i32 8, label %bb13
-        ]
-
-bb:             ; preds = %entry
-        %tmp.upgrd.2 = getelementptr [2 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
-        store i8* %tmp.upgrd.2, i8** %foo
-        br label %bb16
-
-bb1:            ; preds = %entry
-        %tmp2 = getelementptr [2 x i8]* @str1, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp2, i8** %foo
-        br label %bb16
-
-bb3:            ; preds = %entry
-        %tmp4 = getelementptr [2 x i8]* @str2, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp4, i8** %foo
-        br label %bb16
-
-bb5:            ; preds = %entry
-        %tmp6 = getelementptr [2 x i8]* @str3, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp6, i8** %foo
-        br label %bb16
-
-bb7:            ; preds = %entry
-        %tmp8 = getelementptr [2 x i8]* @str4, i32 0, i64 0             ; <i8*> [#uses=1]
-        store i8* %tmp8, i8** %foo
-        br label %bb16
-
-bb9:            ; preds = %entry
-        %tmp10 = getelementptr [2 x i8]* @str5, i32 0, i64 0            ; <i8*> [#uses=1]
-        store i8* %tmp10, i8** %foo
-        br label %bb16
-
-bb11:           ; preds = %entry
-        %tmp12 = getelementptr [2 x i8]* @str6, i32 0, i64 0            ; <i8*> [#uses=1]
-        store i8* %tmp12, i8** %foo
-        br label %bb16
-
-bb13:           ; preds = %entry
-        %tmp14 = getelementptr [2 x i8]* @str7, i32 0, i64 0            ; <i8*> [#uses=1]
-        store i8* %tmp14, i8** %foo
-        br label %bb16
-
-bb15:           ; preds = %entry
-        br label %bb16
-
-bb16:           ; preds = %bb15, %bb13, %bb11, %bb9, %bb7, %bb5, %bb3, %bb1, %bb
-        %tmp17 = load i8** %foo         ; <i8*> [#uses=1]
-        %tmp18 = call i32 (...)* @print( i8* %tmp17 )           ; <i32> [#uses=0]
-        store i32 0, i32* %tmp
-        %tmp19 = load i32* %tmp         ; <i32> [#uses=1]
-        store i32 %tmp19, i32* %retval
-        br label %return
-
-return:         ; preds = %bb16
-        %retval.upgrd.3 = load i32* %retval             ; <i32> [#uses=1]
-        ret i32 %retval.upgrd.3
-}
-
-declare i32 @print(...)
-
diff --git a/test/CodeGen/Alpha/mb.ll b/test/CodeGen/Alpha/mb.ll
deleted file mode 100644
index 3268c541b23b..000000000000
--- a/test/CodeGen/Alpha/mb.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=alpha | grep mb
-
-define void @test() {
-	fence seq_cst
-	ret void
-}
diff --git a/test/CodeGen/Alpha/mul128.ll b/test/CodeGen/Alpha/mul128.ll
deleted file mode 100644
index daf8409409dd..000000000000
--- a/test/CodeGen/Alpha/mul128.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=alpha
-
-define i128 @__mulvdi3(i128 %a, i128 %b) nounwind {
-entry:
-        %r = mul i128 %a, %b
-        ret i128 %r
-}
diff --git a/test/CodeGen/Alpha/mul5.ll b/test/CodeGen/Alpha/mul5.ll
deleted file mode 100644
index 4075dd6289eb..000000000000
--- a/test/CodeGen/Alpha/mul5.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; Make sure this testcase does not use mulq
-; RUN: llc < %s -march=alpha | not grep -i mul
-
-define i64 @foo1(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 9          ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @foo3(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 259                ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @foo4l(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 260                ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @foo8l(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 768                ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 5          ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/neg1.ll b/test/CodeGen/Alpha/neg1.ll
deleted file mode 100644
index 0db767f68e51..000000000000
--- a/test/CodeGen/Alpha/neg1.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; Make sure this testcase codegens to the lda -1 instruction
-; RUN: llc < %s -march=alpha | grep {\\-1}
-
-define i64 @bar() {
-entry:
-	ret i64 -1
-}
diff --git a/test/CodeGen/Alpha/not.ll b/test/CodeGen/Alpha/not.ll
deleted file mode 100644
index 4f0a5c2946ef..000000000000
--- a/test/CodeGen/Alpha/not.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; Make sure this testcase codegens to the ornot instruction
-; RUN: llc < %s -march=alpha | grep eqv
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/ornot.ll b/test/CodeGen/Alpha/ornot.ll
deleted file mode 100644
index f930e345ce42..000000000000
--- a/test/CodeGen/Alpha/ornot.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Make sure this testcase codegens to the ornot instruction
-; RUN: llc < %s -march=alpha | grep ornot
-
-define i64 @bar(i64 %x, i64 %y) {
-entry:
-        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
-        %tmp.2 = or i64 %y, %tmp.1              ; <i64> [#uses=1]
-        ret i64 %tmp.2
-}
-
diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll
deleted file mode 100644
index f8d30940c0c2..000000000000
--- a/test/CodeGen/Alpha/private.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; Test to make sure that the 'private' is used correctly.
-;
-; RUN: llc < %s -march=alpha > %t
-; RUN: grep \\\$foo: %t
-; RUN: grep bsr.*\\\$\\\$foo %t
-; RUN: grep \\\$baz: %t
-; RUN: grep ldah.*\\\$baz %t
-
-define private void @foo() {
-        ret void
-}
-
-@baz = private global i32 4
-
-define i32 @bar() {
-        call void @foo()
-	%1 = load i32* @baz, align 4
-        ret i32 %1
-}
diff --git a/test/CodeGen/Alpha/rpcc.ll b/test/CodeGen/Alpha/rpcc.ll
deleted file mode 100644
index d6665b5d8d6f..000000000000
--- a/test/CodeGen/Alpha/rpcc.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=alpha | grep rpcc
-
-declare i64 @llvm.readcyclecounter()
-
-define i64 @foo() {
-entry:
-        %tmp.1 = call i64 @llvm.readcyclecounter( )             ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/srl_and.ll b/test/CodeGen/Alpha/srl_and.ll
deleted file mode 100644
index 3042ef3d0237..000000000000
--- a/test/CodeGen/Alpha/srl_and.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; Make sure this testcase codegens to the zapnot instruction
-; RUN: llc < %s -march=alpha | grep zapnot
-
-define i64 @foo(i64 %y) {
-entry:
-        %tmp = lshr i64 %y, 3           ; <i64> [#uses=1]
-        %tmp2 = and i64 %tmp, 8191              ; <i64> [#uses=1]
-        ret i64 %tmp2
-}
-
diff --git a/test/CodeGen/Alpha/sub128.ll b/test/CodeGen/Alpha/sub128.ll
deleted file mode 100644
index d26404bfe024..000000000000
--- a/test/CodeGen/Alpha/sub128.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-;test for SUBC and SUBE expansion
-;
-; RUN: llc < %s -march=alpha
-
-define i128 @sub128(i128 %x, i128 %y) {
-entry:
-	%tmp = sub i128 %y, %x
-	ret i128 %tmp
-}
diff --git a/test/CodeGen/Alpha/weak.ll b/test/CodeGen/Alpha/weak.ll
deleted file mode 100644
index ff04de9ef467..000000000000
--- a/test/CodeGen/Alpha/weak.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=alpha | grep .weak.*f
-; RUN: llc < %s -march=alpha | grep .weak.*h
-
-define weak i32 @f() {
-entry:
-        unreachable
-}
-
-define void @g() {
-entry:
-        tail call void @h( )
-        ret void
-}
-
-declare extern_weak void @h()
-
diff --git a/test/CodeGen/Alpha/zapnot.ll b/test/CodeGen/Alpha/zapnot.ll
deleted file mode 100644
index a47035e7f0e6..000000000000
--- a/test/CodeGen/Alpha/zapnot.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the bic instruction
-; RUN: llc < %s -march=alpha | grep zapnot
-
-
-define zeroext i16 @foo(i64 %y)  {
-entry:
-        %tmp.1 = trunc i64 %y to i16         ; <ushort> [#uses=1]
-        ret i16 %tmp.1
-}
diff --git a/test/CodeGen/Alpha/zapnot2.ll b/test/CodeGen/Alpha/zapnot2.ll
deleted file mode 100644
index cd3caae41d5a..000000000000
--- a/test/CodeGen/Alpha/zapnot2.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Make sure this testcase codegens to the zapnot instruction
-; RUN: llc < %s -march=alpha | grep zapnot
-
-define i64 @bar(i64 %x) {
-entry:
-        %tmp.1 = and i64 %x, 16711935           ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/Alpha/zapnot3.ll b/test/CodeGen/Alpha/zapnot3.ll
deleted file mode 100644
index f02961f1eaec..000000000000
--- a/test/CodeGen/Alpha/zapnot3.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=alpha | grep zapnot
-
-;demanded bits mess up this mask in a hard to fix way
-;define i64 @foo(i64 %y) {
-;        %tmp = and i64 %y,  65535
-;        %tmp2 = shr i64 %tmp,  i8 3
-;        ret i64 %tmp2
-;}
-
-define i64 @foo2(i64 %y) {
-        %tmp = lshr i64 %y, 3           ; <i64> [#uses=1]
-        %tmp2 = and i64 %tmp, 8191              ; <i64> [#uses=1]
-        ret i64 %tmp2
-}
-
diff --git a/test/CodeGen/Alpha/zapnot4.ll b/test/CodeGen/Alpha/zapnot4.ll
deleted file mode 100644
index 89beeef2d810..000000000000
--- a/test/CodeGen/Alpha/zapnot4.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=alpha | grep zapnot
-
-define i64 @foo(i64 %y) {
-        %tmp = shl i64 %y, 3            ; <i64> [#uses=1]
-        %tmp2 = and i64 %tmp, 65535             ; <i64> [#uses=1]
-        ret i64 %tmp2
-}
diff --git a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
deleted file mode 100644
index 50fccb440990..000000000000
--- a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs
-; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs -regalloc=greedy
-
-; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a
-; super-register is illegally extended.
-
-define i16 @f(i16 %x1, i16 %x2, i16 %x3, i16 %x4) {
-  %y1 = add i16 %x1, 1
-  %y2 = add i16 %x2, 2
-  %y3 = add i16 %x3, 3
-  %y4 = add i16 %x4, 4
-  %z12 = add i16 %y1, %y2
-  %z34 = add i16 %y3, %y4
-  %p = add i16 %z12, %z34
-  ret i16 %p
-}
diff --git a/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll b/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
deleted file mode 100644
index e5d1637a50cb..000000000000
--- a/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
-
-declare i16 @llvm.cttz.i16(i16) nounwind readnone
-
-declare i8 @llvm.cttz.i8(i8) nounwind readnone
-
-define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.cttz.i8(i8 %A)		; <i8> [#uses=1]
-	%b = call i16 @llvm.cttz.i16(i16 %B)		; <i16> [#uses=1]
-	%d = call i64 @llvm.cttz.i64(i64 %D)		; <i64> [#uses=1]
-	store i8 %a, i8* %AP
-	store i16 %b, i16* %BP
-	store i64 %d, i64* %DP
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll b/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
deleted file mode 100644
index 0b731dccd19f..000000000000
--- a/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; When joining live intervals of sub-registers, an MBB live-in list is not
-; updated properly. The register scavenger asserts on an undefined register.
-
-define i32 @foo(i8 %bar) {
-entry:
-  switch i8 %bar, label %bb1203 [
-    i8 117, label %bb1204
-    i8 85, label %bb1204
-    i8 106, label %bb1204
-  ]
-
-bb1203:                                           ; preds = %entry
-  ret i32 1
-
-bb1204:                                           ; preds = %entry, %entry, %entry
-  ret i32 2
-}
diff --git a/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll b/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
deleted file mode 100644
index dcc3ea0dec88..000000000000
--- a/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; LocalRewriter can forget to transfer a <def,dead> flag when setting up call
-; argument registers. This then causes register scavenger asserts.
-
-declare i32 @printf(i8*, i32, float)
-
-define i32 @testissue(i32 %i, float %x, float %y) {
-  br label %bb1
-
-bb1:                                              ; preds = %bb1, %0
-  %x2 = fmul float %x, 5.000000e-01               ; <float> [#uses=1]
-  %y2 = fmul float %y, 0x3FECCCCCC0000000         ; <float> [#uses=1]
-  %z2 = fadd float %x2, %y2                       ; <float> [#uses=1]
-  %z3 = fadd float undef, %z2                     ; <float> [#uses=1]
-  %i1 = shl i32 %i, 3                             ; <i32> [#uses=1]
-  %j1 = add i32 %i, 7                             ; <i32> [#uses=1]
-  %m1 = add i32 %i1, %j1                          ; <i32> [#uses=2]
-  %b = icmp sle i32 %m1, 6                        ; <i1> [#uses=1]
-  br i1 %b, label %bb1, label %bb2
-
-bb2:                                              ; preds = %bb1
-  %1 = call i32 @printf(i8* undef, i32 %m1, float %z3); <i32> [#uses=0]
-  ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
deleted file mode 100644
index b6cd2d40d1af..000000000000
--- a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; An undef argument causes a setugt node to escape instruction selection.
-
-define void @bugt() {
-cond_next305:
-  %tmp306307 = trunc i32 undef to i8              ; <i8> [#uses=1]
-  %tmp308 = icmp ugt i8 %tmp306307, 6             ; <i1> [#uses=1]
-  br i1 %tmp308, label %bb311, label %bb314
-
-bb311:                                            ; preds = %cond_next305
-  unreachable
-
-bb314:                                            ; preds = %cond_next305
-  ret void
-}
diff --git a/test/CodeGen/Blackfin/add-overflow.ll b/test/CodeGen/Blackfin/add-overflow.ll
deleted file mode 100644
index 8dcf3f84e962..000000000000
--- a/test/CodeGen/Blackfin/add-overflow.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-	%0 = type { i24, i1 }		; type %0
-
-define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
-entry:
-	%t = call %0 @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2)		; <%0> [#uses=1]
-	%obit = extractvalue %0 %t, 1		; <i1> [#uses=1]
-	br i1 %obit, label %carry, label %normal
-
-normal:		; preds = %entry
-	ret i1 true
-
-carry:		; preds = %entry
-	ret i1 false
-}
-
-declare %0 @llvm.uadd.with.overflow.i24(i24, i24) nounwind
diff --git a/test/CodeGen/Blackfin/add.ll b/test/CodeGen/Blackfin/add.ll
deleted file mode 100644
index 3311c03199ee..000000000000
--- a/test/CodeGen/Blackfin/add.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-define i32 @add(i32 %A, i32 %B) {
-	%R = add i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
diff --git a/test/CodeGen/Blackfin/addsub-i128.ll b/test/CodeGen/Blackfin/addsub-i128.ll
deleted file mode 100644
index dd5610120b4d..000000000000
--- a/test/CodeGen/Blackfin/addsub-i128.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; These functions have just the right size to annoy the register scavenger: They
-; use all the scratch registers, but not all the callee-saved registers.
-
-define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
-entry:
-	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
-	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
-	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
-	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
-	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
-	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
-	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
-	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
-	%tmp15 = add i128 %tmp12, %tmp5		; <i128> [#uses=2]
-	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
-	store i64 %tmp1617, i64* %RL
-	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
-	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
-	store i64 %tmp2122, i64* %RH
-	ret void
-}
-
-define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
-entry:
-	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
-	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
-	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
-	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
-	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
-	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
-	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
-	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
-	%tmp15 = sub i128 %tmp5, %tmp12		; <i128> [#uses=2]
-	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
-	store i64 %tmp1617, i64* %RL
-	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
-	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
-	store i64 %tmp2122, i64* %RH
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/basic-i1.ll b/test/CodeGen/Blackfin/basic-i1.ll
deleted file mode 100644
index c63adaba06cf..000000000000
--- a/test/CodeGen/Blackfin/basic-i1.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin > %t
-
-define i1 @add(i1 %A, i1 %B) {
-	%R = add i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @sub(i1 %A, i1 %B) {
-	%R = sub i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @mul(i1 %A, i1 %B) {
-	%R = mul i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @sdiv(i1 %A, i1 %B) {
-	%R = sdiv i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @udiv(i1 %A, i1 %B) {
-	%R = udiv i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @srem(i1 %A, i1 %B) {
-	%R = srem i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @urem(i1 %A, i1 %B) {
-	%R = urem i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @and(i1 %A, i1 %B) {
-	%R = and i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @or(i1 %A, i1 %B) {
-	%R = or i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
-
-define i1 @xor(i1 %A, i1 %B) {
-	%R = xor i1 %A, %B		; <i1> [#uses=1]
-	ret i1 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i16.ll b/test/CodeGen/Blackfin/basic-i16.ll
deleted file mode 100644
index 541e9a8dc948..000000000000
--- a/test/CodeGen/Blackfin/basic-i16.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-define i16 @add(i16 %A, i16 %B) {
-	%R = add i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @sub(i16 %A, i16 %B) {
-	%R = sub i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @mul(i16 %A, i16 %B) {
-	%R = mul i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @sdiv(i16 %A, i16 %B) {
-	%R = sdiv i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @udiv(i16 %A, i16 %B) {
-	%R = udiv i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @srem(i16 %A, i16 %B) {
-	%R = srem i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @urem(i16 %A, i16 %B) {
-	%R = urem i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i32.ll b/test/CodeGen/Blackfin/basic-i32.ll
deleted file mode 100644
index 4b5dbfcb957e..000000000000
--- a/test/CodeGen/Blackfin/basic-i32.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define i32 @add(i32 %A, i32 %B) {
-	%R = add i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @sub(i32 %A, i32 %B) {
-	%R = sub i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @mul(i32 %A, i32 %B) {
-	%R = mul i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @sdiv(i32 %A, i32 %B) {
-	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @udiv(i32 %A, i32 %B) {
-	%R = udiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @srem(i32 %A, i32 %B) {
-	%R = srem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @urem(i32 %A, i32 %B) {
-	%R = urem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @and(i32 %A, i32 %B) {
-	%R = and i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @or(i32 %A, i32 %B) {
-	%R = or i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define i32 @xor(i32 %A, i32 %B) {
-	%R = xor i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i64.ll b/test/CodeGen/Blackfin/basic-i64.ll
deleted file mode 100644
index d4dd8e2703bf..000000000000
--- a/test/CodeGen/Blackfin/basic-i64.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define i64 @add(i64 %A, i64 %B) {
-	%R = add i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @sub(i64 %A, i64 %B) {
-	%R = sub i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @mul(i64 %A, i64 %B) {
-	%R = mul i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @sdiv(i64 %A, i64 %B) {
-	%R = sdiv i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @udiv(i64 %A, i64 %B) {
-	%R = udiv i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @srem(i64 %A, i64 %B) {
-	%R = srem i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @urem(i64 %A, i64 %B) {
-	%R = urem i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @and(i64 %A, i64 %B) {
-	%R = and i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @or(i64 %A, i64 %B) {
-	%R = or i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
-
-define i64 @xor(i64 %A, i64 %B) {
-	%R = xor i64 %A, %B		; <i64> [#uses=1]
-	ret i64 %R
-}
diff --git a/test/CodeGen/Blackfin/basic-i8.ll b/test/CodeGen/Blackfin/basic-i8.ll
deleted file mode 100644
index 2c7ce9d1015a..000000000000
--- a/test/CodeGen/Blackfin/basic-i8.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-define i8 @add(i8 %A, i8 %B) {
-	%R = add i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @sub(i8 %A, i8 %B) {
-	%R = sub i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @mul(i8 %A, i8 %B) {
-	%R = mul i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @sdiv(i8 %A, i8 %B) {
-	%R = sdiv i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @udiv(i8 %A, i8 %B) {
-	%R = udiv i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @srem(i8 %A, i8 %B) {
-	%R = srem i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @urem(i8 %A, i8 %B) {
-	%R = urem i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @and(i8 %A, i8 %B) {
-	%R = and i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @or(i8 %A, i8 %B) {
-	%R = or i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
-
-define i8 @xor(i8 %A, i8 %B) {
-	%R = xor i8 %A, %B		; <i8> [#uses=1]
-	ret i8 %R
-}
diff --git a/test/CodeGen/Blackfin/basictest.ll b/test/CodeGen/Blackfin/basictest.ll
deleted file mode 100644
index 85040df0fde5..000000000000
--- a/test/CodeGen/Blackfin/basictest.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define void @void(i32, i32) {
-        add i32 0, 0            ; <i32>:3 [#uses=2]
-        sub i32 0, 4            ; <i32>:4 [#uses=2]
-        br label %5
-
-; <label>:5             ; preds = %5, %2
-        add i32 %0, %1          ; <i32>:6 [#uses=2]
-        sub i32 %6, %4          ; <i32>:7 [#uses=1]
-        icmp sle i32 %7, %3             ; <i1>:8 [#uses=1]
-        br i1 %8, label %9, label %5
-
-; <label>:9             ; preds = %5
-        add i32 %0, %1          ; <i32>:10 [#uses=0]
-        sub i32 %6, %4          ; <i32>:11 [#uses=1]
-        icmp sle i32 %11, %3            ; <i1>:12 [#uses=0]
-        ret void
-}
diff --git a/test/CodeGen/Blackfin/cmp-small-imm.ll b/test/CodeGen/Blackfin/cmp-small-imm.ll
deleted file mode 100644
index e1732a8f806b..000000000000
--- a/test/CodeGen/Blackfin/cmp-small-imm.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=bfin > %t
-
-define i1 @cmp3(i32 %A) {
-	%R = icmp uge i32 %A, 2
-	ret i1 %R
-}
diff --git a/test/CodeGen/Blackfin/cmp64.ll b/test/CodeGen/Blackfin/cmp64.ll
deleted file mode 100644
index 6c4f9c5bd7fd..000000000000
--- a/test/CodeGen/Blackfin/cmp64.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-; This test tries to use a JustCC register as a data operand for MOVEcc.  It
-; copies (JustCC -> DP), failing because JustCC can only be copied to D.
-; The proper solution would be to restrict the virtual register to D only.
-
-define i32 @main() {
-entry:
-	br label %loopentry
-
-loopentry:
-	%done = icmp sle i64 undef, 5
-	br i1 %done, label %loopentry, label %exit.1
-
-exit.1:
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/ct32.ll b/test/CodeGen/Blackfin/ct32.ll
deleted file mode 100644
index 363286d4b2f5..000000000000
--- a/test/CodeGen/Blackfin/ct32.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i32 @llvm.ctlz.i32(i32)
-declare i32 @llvm.cttz.i32(i32)
-declare i32 @llvm.ctpop.i32(i32)
-
-define i32 @ctlztest(i32 %B) {
-	%b = call i32 @llvm.ctlz.i32( i32 %B )
-	ret i32 %b
-}
-
-define i32 @cttztest(i32 %B) {
-	%b = call i32 @llvm.cttz.i32( i32 %B )
-	ret i32 %b
-}
-
-define i32 @ctpoptest(i32 %B) {
-	%b = call i32 @llvm.ctpop.i32( i32 %B )
-	ret i32 %b
-}
diff --git a/test/CodeGen/Blackfin/ct64.ll b/test/CodeGen/Blackfin/ct64.ll
deleted file mode 100644
index 75024343ea4e..000000000000
--- a/test/CodeGen/Blackfin/ct64.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i64 @llvm.ctlz.i64(i64)
-declare i64 @llvm.cttz.i64(i64)
-declare i64 @llvm.ctpop.i64(i64)
-
-define i64 @ctlztest(i64 %B) {
-	%b = call i64 @llvm.ctlz.i64( i64 %B )
-	ret i64 %b
-}
-
-define i64 @cttztest(i64 %B) {
-	%b = call i64 @llvm.cttz.i64( i64 %B )
-	ret i64 %b
-}
-
-define i64 @ctpoptest(i64 %B) {
-	%b = call i64 @llvm.ctpop.i64( i64 %B )
-	ret i64 %b
-}
diff --git a/test/CodeGen/Blackfin/ctlz16.ll b/test/CodeGen/Blackfin/ctlz16.ll
deleted file mode 100644
index eb4af232cfef..000000000000
--- a/test/CodeGen/Blackfin/ctlz16.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i16 @llvm.ctlz.i16(i16)
-
-define i16 @ctlztest(i16 %B) {
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-define i16 @ctlztest_z(i16 zeroext %B) {
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
-define i16 @ctlztest_s(i16 signext %B) {
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
diff --git a/test/CodeGen/Blackfin/ctlz64.ll b/test/CodeGen/Blackfin/ctlz64.ll
deleted file mode 100644
index 3e22f8843553..000000000000
--- a/test/CodeGen/Blackfin/ctlz64.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-@.str = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
-
-define i32 @main(i64 %arg) nounwind {
-entry:
-	%tmp47 = tail call i64 @llvm.cttz.i64(i64 %arg)		; <i64> [#uses=1]
-	%tmp48 = trunc i64 %tmp47 to i32		; <i32> [#uses=1]
-	%tmp40 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 0, i32 %tmp48, i32 0) nounwind		; <i32> [#uses=0]
-	ret i32 0
-}
-
-declare i32 @printf(i8* noalias, ...) nounwind
-
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
diff --git a/test/CodeGen/Blackfin/ctpop16.ll b/test/CodeGen/Blackfin/ctpop16.ll
deleted file mode 100644
index 8b6c07ef28a8..000000000000
--- a/test/CodeGen/Blackfin/ctpop16.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i16 @llvm.ctpop.i16(i16)
-
-define i16 @ctpoptest(i16 %B) {
-	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-define i16 @ctpoptest_z(i16 zeroext %B) {
-	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
-define i16 @ctpoptest_s(i16 signext %B) {
-	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
diff --git a/test/CodeGen/Blackfin/cttz16.ll b/test/CodeGen/Blackfin/cttz16.ll
deleted file mode 100644
index 510882ad41fa..000000000000
--- a/test/CodeGen/Blackfin/cttz16.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i16 @llvm.cttz.i16(i16)
-
-define i16 @cttztest(i16 %B) {
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-define i16 @cttztest_z(i16 zeroext %B) {
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
-define i16 @cttztest_s(i16 signext %B) {
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	ret i16 %b
-}
-
diff --git a/test/CodeGen/Blackfin/cycles.ll b/test/CodeGen/Blackfin/cycles.ll
deleted file mode 100644
index 6451c747bd70..000000000000
--- a/test/CodeGen/Blackfin/cycles.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=bfin | FileCheck %s
-
-declare i64 @llvm.readcyclecounter()
-
-; CHECK: cycles
-; CHECK: cycles2
-define i64 @cyc64() {
-	%tmp.1 = call i64 @llvm.readcyclecounter()
-	ret i64 %tmp.1
-}
-
-; CHECK: cycles
-define i32@cyc32() {
-	%tmp.1 = call i64 @llvm.readcyclecounter()
-        %s = trunc i64 %tmp.1 to i32
-	ret i32 %s
-}
diff --git a/test/CodeGen/Blackfin/dg.exp b/test/CodeGen/Blackfin/dg.exp
deleted file mode 100644
index 5fdbe5feb087..000000000000
--- a/test/CodeGen/Blackfin/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Blackfin] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Blackfin/double-cast.ll b/test/CodeGen/Blackfin/double-cast.ll
deleted file mode 100644
index 815ca797d752..000000000000
--- a/test/CodeGen/Blackfin/double-cast.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() {
-	%1 = call i32 (i8*, ...)* @printf(i8* undef, double undef)
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/frameindex.ll b/test/CodeGen/Blackfin/frameindex.ll
deleted file mode 100644
index 7e677fbf18cf..000000000000
--- a/test/CodeGen/Blackfin/frameindex.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-declare i32 @SIM(i8*, i8*, i32, i32, i32, [256 x i32]*, i32, i32, i32)
-
-define void @foo() {
-bb0:
-	%V = alloca [256 x i32], i32 256		; <[256 x i32]*> [#uses=1]
-	%0 = call i32 @SIM(i8* null, i8* null, i32 0, i32 0, i32 0, [256 x i32]* %V, i32 0, i32 0, i32 2)		; <i32> [#uses=0]
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i17mem.ll b/test/CodeGen/Blackfin/i17mem.ll
deleted file mode 100644
index bc5ade7416fa..000000000000
--- a/test/CodeGen/Blackfin/i17mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i17_l = external global i17		; <i17*> [#uses=1]
-@i17_s = external global i17		; <i17*> [#uses=1]
-
-define void @i17_ls() nounwind  {
-	%tmp = load i17* @i17_l		; <i17> [#uses=1]
-	store i17 %tmp, i17* @i17_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i1mem.ll b/test/CodeGen/Blackfin/i1mem.ll
deleted file mode 100644
index cb03e3d7fcb0..000000000000
--- a/test/CodeGen/Blackfin/i1mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i1_l = external global i1		; <i1*> [#uses=1]
-@i1_s = external global i1		; <i1*> [#uses=1]
-
-define void @i1_ls() nounwind  {
-	%tmp = load i1* @i1_l		; <i1> [#uses=1]
-	store i1 %tmp, i1* @i1_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i1ops.ll b/test/CodeGen/Blackfin/i1ops.ll
deleted file mode 100644
index 6b5612cc4997..000000000000
--- a/test/CodeGen/Blackfin/i1ops.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-define i32 @adj(i32 %d.1, i32 %ct.1) {
-entry:
-	%tmp.22.not = trunc i32 %ct.1 to i1		; <i1> [#uses=1]
-	%tmp.221 = xor i1 %tmp.22.not, true		; <i1> [#uses=1]
-	%tmp.26 = or i1 false, %tmp.221		; <i1> [#uses=1]
-	%tmp.27 = zext i1 %tmp.26 to i32		; <i32> [#uses=1]
-	ret i32 %tmp.27
-}
diff --git a/test/CodeGen/Blackfin/i216mem.ll b/test/CodeGen/Blackfin/i216mem.ll
deleted file mode 100644
index 9f8cf48e8756..000000000000
--- a/test/CodeGen/Blackfin/i216mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i216_l = external global i216		; <i216*> [#uses=1]
-@i216_s = external global i216		; <i216*> [#uses=1]
-
-define void @i216_ls() nounwind  {
-	%tmp = load i216* @i216_l		; <i216> [#uses=1]
-	store i216 %tmp, i216* @i216_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i248mem.ll b/test/CodeGen/Blackfin/i248mem.ll
deleted file mode 100644
index db23f541adcb..000000000000
--- a/test/CodeGen/Blackfin/i248mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin
-@i248_l = external global i248		; <i248*> [#uses=1]
-@i248_s = external global i248		; <i248*> [#uses=1]
-
-define void @i248_ls() nounwind  {
-	%tmp = load i248* @i248_l		; <i248> [#uses=1]
-	store i248 %tmp, i248* @i248_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i256mem.ll b/test/CodeGen/Blackfin/i256mem.ll
deleted file mode 100644
index bc5ade7416fa..000000000000
--- a/test/CodeGen/Blackfin/i256mem.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i17_l = external global i17		; <i17*> [#uses=1]
-@i17_s = external global i17		; <i17*> [#uses=1]
-
-define void @i17_ls() nounwind  {
-	%tmp = load i17* @i17_l		; <i17> [#uses=1]
-	store i17 %tmp, i17* @i17_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i256param.ll b/test/CodeGen/Blackfin/i256param.ll
deleted file mode 100644
index df74c9a6e0e8..000000000000
--- a/test/CodeGen/Blackfin/i256param.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i256_s = external global i256		; <i256*> [#uses=1]
-
-define void @i256_ls(i256 %x) nounwind  {
-	store i256 %x, i256* @i256_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i56param.ll b/test/CodeGen/Blackfin/i56param.ll
deleted file mode 100644
index ca0256391b1f..000000000000
--- a/test/CodeGen/Blackfin/i56param.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@i56_l = external global i56		; <i56*> [#uses=1]
-@i56_s = external global i56		; <i56*> [#uses=1]
-
-define void @i56_ls(i56 %x) nounwind  {
-	store i56 %x, i56* @i56_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/i8mem.ll b/test/CodeGen/Blackfin/i8mem.ll
deleted file mode 100644
index ea3a67e4994c..000000000000
--- a/test/CodeGen/Blackfin/i8mem.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-@i8_l = external global i8		; <i8*> [#uses=1]
-@i8_s = external global i8		; <i8*> [#uses=1]
-
-define void @i8_ls() nounwind  {
-	%tmp = load i8* @i8_l		; <i8> [#uses=1]
-	store i8 %tmp, i8* @i8_s
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/inline-asm.ll b/test/CodeGen/Blackfin/inline-asm.ll
deleted file mode 100644
index d623f6bd95aa..000000000000
--- a/test/CodeGen/Blackfin/inline-asm.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -march=bfin | FileCheck %s
-
-; Standard "r"
-; CHECK: r0 = r0 + r1;
-define i32 @add_r(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = $1 + $2;", "=r,r,r"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "d"
-; CHECK: r0 = r0 - r1;
-define i32 @add_d(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = $1 - $2;", "=d,d,d"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "a" for P-regs
-; CHECK: p0 = (p0 + p1) << 1;
-define i32 @add_a(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = ($1 + $2) << 1;", "=a,a,a"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "z" for P0, P1, P2. This is not a real regclass
-; CHECK: p0 = (p0 + p1) << 2;
-define i32 @add_Z(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = ($1 + $2) << 2;", "=z,z,z"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
-; Target "C" for CC. This is a single register
-; CHECK: cc = p0 < p1;
-; CHECK: r0 = cc;
-define i32 @add_C(i32 %A, i32 %B) {
-	%R = call i32 asm "$0 = $1 < $2;", "=C,z,z"( i32 %A, i32 %B ) nounwind
-	ret i32 %R
-}
-
diff --git a/test/CodeGen/Blackfin/int-setcc.ll b/test/CodeGen/Blackfin/int-setcc.ll
deleted file mode 100644
index 6bd9f86a999c..000000000000
--- a/test/CodeGen/Blackfin/int-setcc.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-define fastcc void @Evaluate() {
-entry:
-	br i1 false, label %cond_false186, label %cond_true
-
-cond_true:		; preds = %entry
-	ret void
-
-cond_false186:		; preds = %entry
-	br i1 false, label %cond_true293, label %bb203
-
-bb203:		; preds = %cond_false186
-	ret void
-
-cond_true293:		; preds = %cond_false186
-	br i1 false, label %cond_true298, label %cond_next317
-
-cond_true298:		; preds = %cond_true293
-	br i1 false, label %cond_next518, label %cond_true397.preheader
-
-cond_next317:		; preds = %cond_true293
-	ret void
-
-cond_true397.preheader:		; preds = %cond_true298
-	ret void
-
-cond_next518:		; preds = %cond_true298
-	br i1 false, label %bb1069, label %cond_true522
-
-cond_true522:		; preds = %cond_next518
-	ret void
-
-bb1069:		; preds = %cond_next518
-	br i1 false, label %cond_next1131, label %bb1096
-
-bb1096:		; preds = %bb1069
-	ret void
-
-cond_next1131:		; preds = %bb1069
-	br i1 false, label %cond_next1207, label %cond_true1150
-
-cond_true1150:		; preds = %cond_next1131
-	ret void
-
-cond_next1207:		; preds = %cond_next1131
-	br i1 false, label %cond_next1219, label %cond_true1211
-
-cond_true1211:		; preds = %cond_next1207
-	ret void
-
-cond_next1219:		; preds = %cond_next1207
-	br i1 false, label %cond_true1223, label %cond_next1283
-
-cond_true1223:		; preds = %cond_next1219
-	br i1 false, label %cond_true1254, label %cond_true1264
-
-cond_true1254:		; preds = %cond_true1223
-	br i1 false, label %bb1567, label %cond_true1369.preheader
-
-cond_true1264:		; preds = %cond_true1223
-	ret void
-
-cond_next1283:		; preds = %cond_next1219
-	ret void
-
-cond_true1369.preheader:		; preds = %cond_true1254
-	ret void
-
-bb1567:		; preds = %cond_true1254
-	%tmp1605 = load i8* null		; <i8> [#uses=1]
-	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]
-	br i1 %tmp1606, label %cond_next1637, label %cond_true1607
-
-cond_true1607:		; preds = %bb1567
-	ret void
-
-cond_next1637:		; preds = %bb1567
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/invalid-apint.ll b/test/CodeGen/Blackfin/invalid-apint.ll
deleted file mode 100644
index a8c01ba65f88..000000000000
--- a/test/CodeGen/Blackfin/invalid-apint.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-; Assertion failed: (width < BitWidth && "Invalid APInt Truncate request"),
-; function trunc, file APInt.cpp, line 956.
-
-@str2 = external global [29 x i8]
-
-define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
-entry:
-	%tmp17 = sext i8 %a13 to i32
-	%tmp23 = call i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0)
-	ret void
-}
-
-declare i32 @printf(i8*, ...)
diff --git a/test/CodeGen/Blackfin/jumptable.ll b/test/CodeGen/Blackfin/jumptable.ll
deleted file mode 100644
index 263533c00097..000000000000
--- a/test/CodeGen/Blackfin/jumptable.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
-
-; CHECK: .section .rodata
-; CHECK: JTI0_0:
-; CHECK: .long .BB0_1
-
-define i32 @oper(i32 %op, i32 %A, i32 %B) {
-entry:
-        switch i32 %op, label %bbx [
-               i32 1 , label %bb1
-               i32 2 , label %bb2
-               i32 3 , label %bb3
-               i32 4 , label %bb4
-               i32 5 , label %bb5
-               i32 6 , label %bb6
-               i32 7 , label %bb7
-               i32 8 , label %bb8
-               i32 9 , label %bb9
-               i32 10, label %bb10
-        ]
-bb1:
-	%R1 = add i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R1
-bb2:
-	%R2 = sub i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R2
-bb3:
-	%R3 = mul i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R3
-bb4:
-	%R4 = sdiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R4
-bb5:
-	%R5 = udiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R5
-bb6:
-	%R6 = srem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R6
-bb7:
-	%R7 = urem i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R7
-bb8:
-	%R8 = and i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R8
-bb9:
-	%R9 = or i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R9
-bb10:
-	%R10 = xor i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R10
-bbx:
-        ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/large-switch.ll b/test/CodeGen/Blackfin/large-switch.ll
deleted file mode 100644
index 02d32ef85f12..000000000000
--- a/test/CodeGen/Blackfin/large-switch.ll
+++ /dev/null
@@ -1,187 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-; The switch expansion uses a dynamic shl, and it produces a jumptable
-
-define void @athlon_fp_unit_ready_cost() {
-entry:
-	switch i32 0, label %UnifiedReturnBlock [
-		i32 -1, label %bb2063
-		i32 19, label %bb2035
-		i32 20, label %bb2035
-		i32 21, label %bb2035
-		i32 23, label %bb2035
-		i32 24, label %bb2035
-		i32 27, label %bb2035
-		i32 32, label %bb2035
-		i32 33, label %bb1994
-		i32 35, label %bb2035
-		i32 36, label %bb1994
-		i32 90, label %bb1948
-		i32 94, label %bb1948
-		i32 95, label %bb1948
-		i32 133, label %bb1419
-		i32 135, label %bb1238
-		i32 136, label %bb1238
-		i32 137, label %bb1238
-		i32 138, label %bb1238
-		i32 139, label %bb1201
-		i32 140, label %bb1201
-		i32 141, label %bb1154
-		i32 142, label %bb1126
-		i32 144, label %bb1201
-		i32 145, label %bb1126
-		i32 146, label %bb1201
-		i32 147, label %bb1126
-		i32 148, label %bb1201
-		i32 149, label %bb1126
-		i32 150, label %bb1201
-		i32 151, label %bb1126
-		i32 152, label %bb1096
-		i32 153, label %bb1096
-		i32 154, label %bb1096
-		i32 157, label %bb1096
-		i32 158, label %bb1096
-		i32 159, label %bb1096
-		i32 162, label %bb1096
-		i32 163, label %bb1096
-		i32 164, label %bb1096
-		i32 167, label %bb1201
-		i32 168, label %bb1201
-		i32 170, label %bb1201
-		i32 171, label %bb1201
-		i32 173, label %bb1201
-		i32 174, label %bb1201
-		i32 176, label %bb1201
-		i32 177, label %bb1201
-		i32 179, label %bb993
-		i32 180, label %bb993
-		i32 181, label %bb993
-		i32 182, label %bb993
-		i32 183, label %bb993
-		i32 184, label %bb993
-		i32 365, label %bb1126
-		i32 366, label %bb1126
-		i32 367, label %bb1126
-		i32 368, label %bb1126
-		i32 369, label %bb1126
-		i32 370, label %bb1126
-		i32 371, label %bb1126
-		i32 372, label %bb1126
-		i32 373, label %bb1126
-		i32 384, label %bb1126
-		i32 385, label %bb1126
-		i32 386, label %bb1126
-		i32 387, label %bb1126
-		i32 388, label %bb1126
-		i32 389, label %bb1126
-		i32 390, label %bb1126
-		i32 391, label %bb1126
-		i32 392, label %bb1126
-		i32 525, label %bb919
-		i32 526, label %bb839
-		i32 528, label %bb919
-		i32 529, label %bb839
-		i32 532, label %cond_next6.i97
-		i32 533, label %cond_next6.i81
-		i32 534, label %bb495
-		i32 536, label %cond_next6.i81
-		i32 537, label %cond_next6.i81
-		i32 538, label %bb396
-		i32 539, label %bb288
-		i32 541, label %bb396
-		i32 542, label %bb396
-		i32 543, label %bb396
-		i32 544, label %bb396
-		i32 545, label %bb189
-		i32 546, label %cond_next6.i
-		i32 547, label %bb189
-		i32 548, label %cond_next6.i
-		i32 549, label %bb189
-		i32 550, label %cond_next6.i
-		i32 551, label %bb189
-		i32 552, label %cond_next6.i
-		i32 553, label %bb189
-		i32 554, label %cond_next6.i
-		i32 555, label %bb189
-		i32 556, label %cond_next6.i
-		i32 557, label %bb189
-		i32 558, label %cond_next6.i
-		i32 618, label %bb40
-		i32 619, label %bb18
-		i32 620, label %bb40
-		i32 621, label %bb10
-		i32 622, label %bb10
-	]
-
-bb10:
-	ret void
-
-bb18:
-	ret void
-
-bb40:
-	ret void
-
-cond_next6.i:
-	ret void
-
-bb189:
-	ret void
-
-bb288:
-	ret void
-
-bb396:
-	ret void
-
-bb495:
-	ret void
-
-cond_next6.i81:
-	ret void
-
-cond_next6.i97:
-	ret void
-
-bb839:
-	ret void
-
-bb919:
-	ret void
-
-bb993:
-	ret void
-
-bb1096:
-	ret void
-
-bb1126:
-	ret void
-
-bb1154:
-	ret void
-
-bb1201:
-	ret void
-
-bb1238:
-	ret void
-
-bb1419:
-	ret void
-
-bb1948:
-	ret void
-
-bb1994:
-	ret void
-
-bb2035:
-	ret void
-
-bb2063:
-	ret void
-
-UnifiedReturnBlock:
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/load-i16.ll b/test/CodeGen/Blackfin/load-i16.ll
deleted file mode 100644
index eb18d410d088..000000000000
--- a/test/CodeGen/Blackfin/load-i16.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-; This somewhat contrived function heavily exercises register classes
-; It can trick -join-cross-class-copies into making illegal joins
-
-define void @f(i16** nocapture %p) nounwind readonly {
-entry:
-	%tmp1 = load i16** %p		; <i16*> [#uses=1]
-	%tmp2 = load i16* %tmp1		; <i16> [#uses=1]
-	%ptr = getelementptr i16* %tmp1, i16 %tmp2
-    store i16 %tmp2, i16* %ptr
-    ret void
-}
diff --git a/test/CodeGen/Blackfin/logic-i16.ll b/test/CodeGen/Blackfin/logic-i16.ll
deleted file mode 100644
index e44672ff4200..000000000000
--- a/test/CodeGen/Blackfin/logic-i16.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-define i16 @and(i16 %A, i16 %B) {
-	%R = and i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @or(i16 %A, i16 %B) {
-	%R = or i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
-
-define i16 @xor(i16 %A, i16 %B) {
-	%R = xor i16 %A, %B		; <i16> [#uses=1]
-	ret i16 %R
-}
diff --git a/test/CodeGen/Blackfin/many-args.ll b/test/CodeGen/Blackfin/many-args.ll
deleted file mode 100644
index 2df32ca354d8..000000000000
--- a/test/CodeGen/Blackfin/many-args.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-
-	%0 = type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
-	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
-
-define i32 @main(i32 %argc.1, i8** %argv.1) {
-entry:
-	%tmp.218 = load float* null		; <float> [#uses=1]
-	%tmp.219 = getelementptr %0* null, i64 0, i32 6		; <float*> [#uses=1]
-	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]
-	%tmp.221 = getelementptr %0* null, i64 0, i32 7		; <float*> [#uses=1]
-	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]
-	%tmp.223 = getelementptr %0* null, i64 0, i32 8		; <float*> [#uses=1]
-	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]
-	%tmp.225 = getelementptr %0* null, i64 0, i32 9		; <float*> [#uses=1]
-	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]
-	%tmp.227 = getelementptr %0* null, i64 0, i32 10		; <float*> [#uses=1]
-	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]
-	call void @place_and_route(i32 0, i32 0, float 0.000000e+00, i32 0, i32 0, i8* null, i32 0, i32 0, i8* null, i8* null, i8* null, i8* null, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i16 0, i16 0, i16 0, float 0.000000e+00, float 0.000000e+00, %struct..s_segment_inf* null, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228)
-	ret i32 0
-}
-
-declare void @place_and_route(i32, i32, float, i32, i32, i8*, i32, i32, i8*, i8*, i8*, i8*, i32, i32, i32, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, float, float, float, i32, i32, i16, i16, i16, float, float, %struct..s_segment_inf*, i32, float, float, float, float, float, float, float, float, float, float)
diff --git a/test/CodeGen/Blackfin/mulhu.ll b/test/CodeGen/Blackfin/mulhu.ll
deleted file mode 100644
index 72bacee33eb2..000000000000
--- a/test/CodeGen/Blackfin/mulhu.ll
+++ /dev/null
@@ -1,106 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
-	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
-	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
-	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
-	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
-	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
-	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
-	%struct.cost_pair = type { %struct.iv_cand*, i32, %struct.bitmap_head_def* }
-	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
-	%struct.def_operand_ptr = type { %struct.tree_node** }
-	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
-	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
-	%struct.edge_def_insns = type { %struct.rtx_def* }
-	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
-	%struct.eh_status = type opaque
-	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
-	%struct.et_node = type opaque
-	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
-	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
-	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
-	%struct.initial_value_struct = type opaque
-	%struct.iv = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i1, i1, i32 }
-	%struct.iv_cand = type { i32, i1, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.iv*, i32 }
-	%struct.iv_use = type { i32, i32, %struct.iv*, %struct.tree_node*, %struct.tree_node**, %struct.bitmap_head_def*, i32, %struct.cost_pair*, %struct.iv_cand* }
-	%struct.ivopts_data = type { %struct.loop*, %struct.htab*, i32, %struct.version_info*, %struct.bitmap_head_def*, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.bitmap_head_def*, i1 }
-	%struct.lang_decl = type opaque
-	%struct.language_function = type opaque
-	%struct.location_t = type { i8*, i32 }
-	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.lpt_decision, i32, i32, %struct.edge_def**, i32, %struct.basic_block_def*, %struct.basic_block_def*, i32, %struct.edge_def**, i32, %struct.edge_def**, i32, %struct.simple_bitmap_def*, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i32, i8*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i1 }
-	%struct.lpt_decision = type { i32, i32 }
-	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
-	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
-	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
-	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
-	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
-	%struct.rtx_def = type { i16, i8, i8, %struct.u }
-	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
-	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
-	%struct.stack_local_entry = type opaque
-	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
-	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
-	%struct.temp_slot = type opaque
-	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
-	%struct.tree_ann_d = type { %struct.stmt_ann_d }
-	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
-	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
-	%struct.tree_decl_u1 = type { i64 }
-	%struct.tree_decl_u2 = type { %struct.function* }
-	%struct.tree_node = type { %struct.tree_decl }
-	%struct.u = type { [1 x i64] }
-	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
-	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
-	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
-	%struct.varasm_status = type opaque
-	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
-	%struct.version_info = type { %struct.tree_node*, %struct.iv*, i1, i32, i1 }
-	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
-
-define i1 @determine_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand) {
-entry:
-	switch i32 0, label %bb91 [
-		i32 0, label %bb
-		i32 1, label %bb6
-		i32 3, label %cond_next135
-	]
-
-bb:		; preds = %entry
-	ret i1 false
-
-bb6:		; preds = %entry
-	br i1 false, label %bb87, label %cond_next27
-
-cond_next27:		; preds = %bb6
-	br i1 false, label %cond_true30, label %cond_next55
-
-cond_true30:		; preds = %cond_next27
-	br i1 false, label %cond_next41, label %cond_true35
-
-cond_true35:		; preds = %cond_true30
-	ret i1 false
-
-cond_next41:		; preds = %cond_true30
-	%tmp44 = call i32 @force_var_cost(%struct.ivopts_data* %data, %struct.tree_node* null, %struct.bitmap_head_def** null)		; <i32> [#uses=1]
-	%tmp46 = udiv i32 %tmp44, 5		; <i32> [#uses=1]
-	call void @set_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand, i32 %tmp46, %struct.bitmap_head_def* null)
-	br label %bb87
-
-cond_next55:		; preds = %cond_next27
-	ret i1 false
-
-bb87:		; preds = %cond_next41, %bb6
-	ret i1 false
-
-bb91:		; preds = %entry
-	ret i1 false
-
-cond_next135:		; preds = %entry
-	ret i1 false
-}
-
-declare void @set_use_iv_cost(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*, i32, %struct.bitmap_head_def*)
-
-declare i32 @force_var_cost(%struct.ivopts_data*, %struct.tree_node*, %struct.bitmap_head_def**)
diff --git a/test/CodeGen/Blackfin/printf.ll b/test/CodeGen/Blackfin/printf.ll
deleted file mode 100644
index 9e54b73c8772..000000000000
--- a/test/CodeGen/Blackfin/printf.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-@.str_1 = external constant [42 x i8]		; <[42 x i8]*> [#uses=1]
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main(i32 %argc.1, i8** %argv.1) {
-entry:
-	%tmp.16 = call i32 (i8*, ...)* @printf(i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 0, i32 0, i64 0, i64 0)
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/printf2.ll b/test/CodeGen/Blackfin/printf2.ll
deleted file mode 100644
index 7ac7e8032bb6..000000000000
--- a/test/CodeGen/Blackfin/printf2.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=bfin
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() {
-	%1 = call i32 (i8*, ...)* @printf(i8* undef, i1 undef)
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll
deleted file mode 100644
index 1ac14082907a..000000000000
--- a/test/CodeGen/Blackfin/promote-logic.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=bfin 
-
-; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
-; operation after LegalizeOps.
-
-define void @mng_display_bgr565() {
-entry:
-	br i1 false, label %bb.preheader, label %return
-
-bb.preheader:
-	br i1 false, label %cond_true48, label %cond_next80
-
-cond_true48:
-	%tmp = load i8* null
-	%tmp51 = zext i8 %tmp to i16
-	%tmp99 = load i8* null
-	%tmp54 = bitcast i8 %tmp99 to i8
-	%tmp54.upgrd.1 = zext i8 %tmp54 to i32
-	%tmp55 = lshr i32 %tmp54.upgrd.1, 3
-	%tmp55.upgrd.2 = trunc i32 %tmp55 to i16
-	%tmp52 = shl i16 %tmp51, 5
-	%tmp56 = and i16 %tmp55.upgrd.2, 28
-	%tmp57 = or i16 %tmp56, %tmp52
-	%tmp60 = zext i16 %tmp57 to i32
-	%tmp62 = xor i32 0, 65535
-	%tmp63 = mul i32 %tmp60, %tmp62
-	%tmp65 = add i32 0, %tmp63
-	%tmp69 = add i32 0, %tmp65
-	%tmp70 = lshr i32 %tmp69, 16
-	%tmp70.upgrd.3 = trunc i32 %tmp70 to i16
-	%tmp75 = lshr i16 %tmp70.upgrd.3, 8
-	%tmp75.upgrd.4 = trunc i16 %tmp75 to i8
-	%tmp76 = lshr i8 %tmp75.upgrd.4, 5
-	store i8 %tmp76, i8* null
-	ret void
-
-cond_next80:
-	ret void
-
-return:
-	ret void
-}
diff --git a/test/CodeGen/Blackfin/promote-setcc.ll b/test/CodeGen/Blackfin/promote-setcc.ll
deleted file mode 100644
index d344fadbf3d2..000000000000
--- a/test/CodeGen/Blackfin/promote-setcc.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=bfin > %t
-
-; The DAG combiner may sometimes create illegal i16 SETCC operations when run
-; after LegalizeOps. Try to tease out all the optimizations in
-; TargetLowering::SimplifySetCC.
-
-@x = external global i16
-@y = external global i16
-
-declare i16 @llvm.ctlz.i16(i16)
-
-; Case (srl (ctlz x), 5) == const
-; Note: ctlz is promoted, so this test does not catch the DAG combiner
-define i1 @srl_ctlz_const() {
-  %x = load i16* @x
-  %c = call i16 @llvm.ctlz.i16(i16 %x)
-  %s = lshr i16 %c, 4
-  %r = icmp eq i16 %s, 1
-  ret i1 %r
-}
-
-; Case (zext x) == const
-define i1 @zext_const() {
-  %x = load i16* @x
-  %r = icmp ugt i16 %x, 1
-  ret i1 %r
-}
-
-; Case (sext x) == const
-define i1 @sext_const() {
-  %x = load i16* @x
-  %y = add i16 %x, 1
-  %x2 = sext i16 %y to i32
-  %r = icmp ne i32 %x2, -1
-  ret i1 %r
-}
-
diff --git a/test/CodeGen/Blackfin/sdiv.ll b/test/CodeGen/Blackfin/sdiv.ll
deleted file mode 100644
index 1426655ba0b9..000000000000
--- a/test/CodeGen/Blackfin/sdiv.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs
-define i32 @sdiv(i32 %A, i32 %B) {
-	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
-	ret i32 %R
-}
diff --git a/test/CodeGen/Blackfin/simple-select.ll b/test/CodeGen/Blackfin/simple-select.ll
deleted file mode 100644
index 0f7f270967a6..000000000000
--- a/test/CodeGen/Blackfin/simple-select.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-declare i1 @foo()
-
-define i32 @test(i32* %A, i32* %B) {
-	%a = load i32* %A
-	%b = load i32* %B
-	%cond = call i1 @foo()
-	%c = select i1 %cond, i32 %a, i32 %b
-	ret i32 %c
-}
diff --git a/test/CodeGen/Blackfin/switch.ll b/test/CodeGen/Blackfin/switch.ll
deleted file mode 100644
index 3680ec6e554b..000000000000
--- a/test/CodeGen/Blackfin/switch.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-define i32 @foo(i32 %A, i32 %B, i32 %C) {
-entry:
-	switch i32 %A, label %out [
-		i32 1, label %bb
-		i32 0, label %bb13
-	]
-
-bb:		; preds = %entry
-	ret i32 1
-
-bb13:		; preds = %entry
-	ret i32 1
-
-out:		; preds = %entry
-	ret i32 0
-}
diff --git a/test/CodeGen/Blackfin/switch2.ll b/test/CodeGen/Blackfin/switch2.ll
deleted file mode 100644
index 7877bce9c372..000000000000
--- a/test/CodeGen/Blackfin/switch2.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-define i8* @FindChar(i8* %CurPtr) {
-entry:
-	br label %bb
-
-bb:		; preds = %bb, %entry
-	%tmp = load i8* null		; <i8> [#uses=1]
-	switch i8 %tmp, label %bb [
-		i8 0, label %bb7
-		i8 120, label %bb7
-	]
-
-bb7:		; preds = %bb, %bb
-	ret i8* null
-}
diff --git a/test/CodeGen/Blackfin/sync-intr.ll b/test/CodeGen/Blackfin/sync-intr.ll
deleted file mode 100644
index 0b103a3bf77a..000000000000
--- a/test/CodeGen/Blackfin/sync-intr.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
-
-define void @f() nounwind {
-entry:
-        ; CHECK-NOT: llvm.bfin
-        ; CHECK: csync;
-        call void @llvm.bfin.csync()
-
-        ; CHECK-NOT: llvm.bfin
-        ; CHECK: ssync;
-        call void @llvm.bfin.ssync()
-	ret void
-}
-
-declare void @llvm.bfin.csync() nounwind
-declare void @llvm.bfin.ssync() nounwind
diff --git a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll b/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
deleted file mode 100644
index 0b06041f5713..000000000000
--- a/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Make sure that global variables do not collide if they have the same name,
-; but different types.
-
-@X = global i32 5               ; <i32*> [#uses=0]
-@X.upgrd.1 = global i64 7               ; <i64*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll b/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
deleted file mode 100644
index a9f54e467d7e..000000000000
--- a/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This case was emitting code that looked like this:
-; ...
-;   llvm_BB1:       /* no statement here */
-; }
-; 
-; Which the Sun C compiler rejected, so now we are sure to put a return 
-; instruction in there if the basic block is otherwise empty.
-;
-define void @test() {
-        br label %BB1
-
-BB2:            ; preds = %BB2
-        br label %BB2
-
-BB1:            ; preds = %0
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll b/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
deleted file mode 100644
index 2afb1a02bbad..000000000000
--- a/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Test const pointer refs & forward references
-
-@t3 = global i32* @t1           ; <i32**> [#uses=0]
-@t1 = global i32 4              ; <i32*> [#uses=1]
-
diff --git a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
deleted file mode 100644
index b71cf07dbf0c..000000000000
--- a/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-global i32* bitcast (float* @2 to i32*)   ;; Forward numeric reference
-global float* @2                       ;; Duplicate forward numeric reference
-global float 0.0
-
-@array = constant [2 x i32] [ i32 12, i32 52 ]
-@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)
diff --git a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll b/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
deleted file mode 100644
index b5a1f0b28b2c..000000000000
--- a/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-@sptr1 = global [11 x i8]* @somestr         ;; Forward ref to a constant
-@somestr = constant [11 x i8] c"hello world"
diff --git a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll b/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
deleted file mode 100644
index 10b9fe22847c..000000000000
--- a/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-@fptr = global void ()* @f       ;; Forward ref method defn
-declare void @f()               ;; External method
-
diff --git a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll b/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
deleted file mode 100644
index 0827423e1ad0..000000000000
--- a/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-@array = constant [2 x i32] [ i32 12, i32 52 ]          ; <[2 x i32]*> [#uses=1]
-@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)         ; <i32**> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll b/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
deleted file mode 100644
index 59aafd55d4c1..000000000000
--- a/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c
-
-; The C Writer bombs on this testcase because it tries the print the prototype
-; for the test function, which tries to print the argument name.  The function
-; has not been incorporated into the slot calculator, so after it does the name
-; lookup, it tries a slot calculator lookup, which fails.
-
-define i32 @test(i32) {
-        ret i32 0
-}
diff --git a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll b/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
deleted file mode 100644
index 6c4d62905b13..000000000000
--- a/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Indirect function call test... found by Joel & Brian
-;
-
-@taskArray = external global i32*               ; <i32**> [#uses=1]
-
-define void @test(i32 %X) {
-        %Y = add i32 %X, -1             ; <i32> [#uses=1]
-        %cast100 = sext i32 %Y to i64           ; <i64> [#uses=1]
-        %gep100 = getelementptr i32** @taskArray, i64 %cast100          ; <i32**> [#uses=1]
-        %fooPtr = load i32** %gep100            ; <i32*> [#uses=1]
-        %cast101 = bitcast i32* %fooPtr to void (i32)*          ; <void (i32)*> [#uses=1]
-        call void %cast101( i32 1000 )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll b/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
deleted file mode 100644
index 1187a374601e..000000000000
--- a/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This testcase fails because the C backend does not arrange to output the 
-; contents of a structure type before it outputs the structure type itself.
-
-@Y = external global { { i32 } }                ; <{ { i32 } }*> [#uses=0]
-@X = external global { float }          ; <{ float }*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll b/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
deleted file mode 100644
index 021adb9c8873..000000000000
--- a/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-define void @test() {
-        %X = alloca [4 x i32]           ; <[4 x i32]*> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll b/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
deleted file mode 100644
index e915cd2fb3f4..000000000000
--- a/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=c
-
-
-declare void @foo(...)
-
-
diff --git a/test/CodeGen/CBackend/2002-10-16-External.ll b/test/CodeGen/CBackend/2002-10-16-External.ll
deleted file mode 100644
index 2cdd15cf185b..000000000000
--- a/test/CodeGen/CBackend/2002-10-16-External.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-@bob = external global i32              ; <i32*> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll b/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
deleted file mode 100644
index 82d594fc7e20..000000000000
--- a/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=c
-
-@testString = internal constant [18 x i8] c"Escaped newline\5Cn\00"             ; <[18 x i8]*> [#uses=1]
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() {
-        call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @testString, i64 0, i64 0) )                ; <i32>:1 [#uses=0]
-        ret i32 0
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll b/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
deleted file mode 100644
index 92d582d7f36d..000000000000
--- a/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Apparently this constant was unsigned in ISO C 90, but not in C 99.
-
-define i32 @foo() {
-        ret i32 -2147483648
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll b/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
deleted file mode 100644
index a42dc27a1e70..000000000000
--- a/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This testcase breaks the C backend, because gcc doesn't like (...) functions
-; with no arguments at all.
-
-define void @test(i64 %Ptr) {
-        %P = inttoptr i64 %Ptr to void (...)*           ; <void (...)*> [#uses=1]
-        call void (...)* %P( i64 %Ptr )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll b/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
deleted file mode 100644
index 19c784022926..000000000000
--- a/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-; The C backend was dying when there was no typename for a struct type!
-
-declare i32 @test(i32, { [32 x i32] }*)
diff --git a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll b/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
deleted file mode 100644
index 048e045b31e6..000000000000
--- a/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=c
-
-%X = type { i32, float }
-
-define void @test() {
-        getelementptr %X* null, i64 0, i32 1            ; <float*>:1 [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll b/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
deleted file mode 100644
index 6197b301fd4a..000000000000
--- a/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llc < %s -march=c
-
-; Make sure hex constant does not continue into a valid hexadecimal letter/number
-@version = global [3 x i8] c"\001\00"
diff --git a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll b/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
deleted file mode 100644
index f6177ea7db36..000000000000
--- a/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llc < %s -march=c
-
-@version = global [3 x i8] c"1\00\00"
diff --git a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll b/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
deleted file mode 100644
index f0b1bbc7f03b..000000000000
--- a/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=c
-
-declare i32 @callee(i32, i32)
-
-define i32 @test(i32 %X) {
-; <label>:0
-        %A = invoke i32 @callee( i32 %X, i32 5 )
-                        to label %Ok unwind label %Threw                ; <i32> [#uses=1]
-
-Ok:             ; preds = %Threw, %0
-        %B = phi i32 [ %A, %0 ], [ -1, %Threw ]         ; <i32> [#uses=1]
-        ret i32 %B
-
-Threw:          ; preds = %0
-        br label %Ok
-}
-
diff --git a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll b/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
deleted file mode 100644
index 4bd1da25b355..000000000000
--- a/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llc < %s -march=c | grep common | grep X
-
-@X = linkonce global i32 5
diff --git a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll b/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
deleted file mode 100644
index 0fbb3feef137..000000000000
--- a/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-; This is a non-normal FP value: it's a nan.
-@NAN = global { float } { float 0x7FF8000000000000 }            ; <{ float }*> [#uses=0]
-@NANs = global { float } { float 0x7FFC000000000000 }           ; <{ float }*> [#uses=0]
diff --git a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll b/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
deleted file mode 100644
index 9195634b0fc4..000000000000
--- a/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c
-
-%A = type { i32, i8*, { i32, i32, i32, i32, i32, i32, i32, i32 }*, i16 }
-
-define void @test(%A*) {
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll b/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
deleted file mode 100644
index b4389ffab18c..000000000000
--- a/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c
-
-; reduced from DOOM.
-        %union._XEvent = type { i32 }
-@.X_event_9 = global %union._XEvent zeroinitializer             ; <%union._XEvent*> [#uses=1]
-
-define void @I_InitGraphics() {
-shortcirc_next.3:
-        %tmp.319 = load i32* getelementptr ({ i32, i32 }* bitcast (%union._XEvent* @.X_event_9 to { i32, i32 }*), i64 0, i32 1)               ; <i32> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll b/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
deleted file mode 100644
index 6a2629124042..000000000000
--- a/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=c
-@y = weak global i8 0           ; <i8*> [#uses=1]
-
-define i32 @testcaseshr() {
-entry:
-        ret i32 lshr (i32 ptrtoint (i8* @y to i32), i32 4)
-}
-
-define i32 @testcaseshl() {
-entry:
-        ret i32 shl (i32 ptrtoint (i8* @y to i32), i32 4)
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll b/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
deleted file mode 100644
index 142fbd84dd8d..000000000000
--- a/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=c | grep builtin_return_address
-
-declare i8* @llvm.returnaddress(i32)
-
-declare i8* @llvm.frameaddress(i32)
-
-define i8* @test1() {
-        %X = call i8* @llvm.returnaddress( i32 0 )              ; <i8*> [#uses=1]
-        ret i8* %X
-}
-
-define i8* @test2() {
-        %X = call i8* @llvm.frameaddress( i32 0 )               ; <i8*> [#uses=1]
-        ret i8* %X
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll b/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
deleted file mode 100644
index d1c6861c58d0..000000000000
--- a/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; The intrinsic lowering pass was lowering intrinsics like llvm.memcpy to 
-; explicitly specified prototypes, inserting a new function if the old one
-; didn't exist.  This caused there to be two external memcpy functions in 
-; this testcase for example, which caused the CBE to mangle one, screwing
-; everything up.  :(  Test that this does not happen anymore.
-;
-; RUN: llc < %s -march=c | not grep _memcpy
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare float* @memcpy(i32*, i32, i32)
-
-define i32 @test(i8* %A, i8* %B, i32* %C) {
-        call float* @memcpy( i32* %C, i32 4, i32 17 )           ; <float*>:1 [#uses=0]
-        call void @llvm.memcpy.i32( i8* %A, i8* %B, i32 123, i32 14 )
-        ret i32 7
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll b/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
deleted file mode 100644
index 6fceb0865741..000000000000
--- a/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; This is a non-normal FP value
-; RUN: llc < %s -march=c | grep FPConstant | grep static
-
-define float @func() {
-        ret float 0xFFF0000000000000
-}
-
-define double @func2() {
-        ret double 0xFF20000000000000
-}
-
diff --git a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll b/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
deleted file mode 100644
index cf59634e82c1..000000000000
--- a/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=c | grep func1 | grep WEAK
-
-define linkonce i32 @func1() {
-        ret i32 5
-}
-
diff --git a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll b/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
deleted file mode 100644
index 3ee23d1a909a..000000000000
--- a/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c
-
-declare void @llvm.va_end(i8*)
-
-define void @test() {
-        %va.upgrd.1 = bitcast i8* null to i8*           ; <i8*> [#uses=1]
-        call void @llvm.va_end( i8* %va.upgrd.1 )
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll b/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
deleted file mode 100644
index af8f441c2229..000000000000
--- a/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; The CBE should not emit code that casts the function pointer.  This causes
-; GCC to get testy and insert trap instructions instead of doing the right
-; thing. :(
-; RUN: llc < %s -march=c
-
-declare void @external(i8*)
-
-define i32 @test(i32* %X) {
-        %RV = call i32 bitcast (void (i8*)* @external to i32 (i32*)*)( i32* %X )                ; <i32> [#uses=1]
-        ret i32 %RV
-}
-
diff --git a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll b/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
deleted file mode 100644
index 78e9bacd9e77..000000000000
--- a/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c | not grep extern.*msg
-; PR472
-
-@msg = internal global [6 x i8] c"hello\00"             ; <[6 x i8]*> [#uses=1]
-
-define i8* @foo() {
-entry:
-        ret i8* getelementptr ([6 x i8]* @msg, i32 0, i32 0)
-}
-
diff --git a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll b/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
deleted file mode 100644
index 57a9adc7e89a..000000000000
--- a/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-define i32 @foo() {
-        ret i32 and (i32 123456, i32 ptrtoint (i32 ()* @foo to i32))
-}
diff --git a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll b/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
deleted file mode 100644
index dd505af4831b..000000000000
--- a/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=c | grep volatile
-
-define void @test(i32* %P) {
-        %X = volatile load i32* %P              ; <i32> [#uses=1]
-        volatile store i32 %X, i32* %P
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll b/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
deleted file mode 100644
index 808b8f91407a..000000000000
--- a/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=c | not grep -- --65535
-; PR596
-
-target datalayout = "e-p:32:32"
-target triple = "i686-pc-linux-gnu"
-
-declare void @func(i32)
-
-define void @funcb() {
-entry:
-        %tmp.1 = sub i32 0, -65535              ; <i32> [#uses=1]
-        call void @func( i32 %tmp.1 )
-        br label %return
-
-return:         ; preds = %entry
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2005-08-23-Fmod.ll b/test/CodeGen/CBackend/2005-08-23-Fmod.ll
deleted file mode 100644
index 6e650eb293fc..000000000000
--- a/test/CodeGen/CBackend/2005-08-23-Fmod.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c | grep fmod
-
-define double @test(double %A, double %B) {
-        %C = frem double %A, %B         ; <double> [#uses=1]
-        ret double %C
-}
-
diff --git a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll b/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
deleted file mode 100644
index 99de837dc79a..000000000000
--- a/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c | grep {\\* *volatile *\\*}
-
-@G = external global void ()*           ; <void ()**> [#uses=2]
-
-define void @test() {
-        volatile store void ()* @test, void ()** @G
-        volatile load void ()** @G              ; <void ()*>:1 [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
deleted file mode 100644
index c9df800d72d5..000000000000
--- a/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=c | \
-; RUN:   grep __BITCAST | count 14
-
-define i32 @test1(float %F) {
-        %X = bitcast float %F to i32            ; <i32> [#uses=1]
-        ret i32 %X
-}
-
-define float @test2(i32 %I) {
-        %X = bitcast i32 %I to float            ; <float> [#uses=1]
-        ret float %X
-}
-
-define i64 @test3(double %D) {
-        %X = bitcast double %D to i64           ; <i64> [#uses=1]
-        ret i64 %X
-}
-
-define double @test4(i64 %L) {
-        %X = bitcast i64 %L to double           ; <double> [#uses=1]
-        ret double %X
-}
-
-define double @test5(double %D) {
-        %X = bitcast double %D to double                ; <double> [#uses=1]
-        %Y = fadd double %X, 2.000000e+00                ; <double> [#uses=1]
-        %Z = bitcast double %Y to i64           ; <i64> [#uses=1]
-        %res = bitcast i64 %Z to double         ; <double> [#uses=1]
-        ret double %res
-}
-
-define float @test6(float %F) {
-        %X = bitcast float %F to float          ; <float> [#uses=1]
-        %Y = fadd float %X, 2.000000e+00         ; <float> [#uses=1]
-        %Z = bitcast float %Y to i32            ; <i32> [#uses=1]
-        %res = bitcast i32 %Z to float          ; <float> [#uses=1]
-        ret float %res
-}
-
-define i32 @main(i32 %argc, i8** %argv) {
-        %a = call i32 @test1( float 0x400921FB40000000 )                ; <i32> [#uses=2]
-        %b = call float @test2( i32 %a )                ; <float> [#uses=0]
-        %c = call i64 @test3( double 0x400921FB4D12D84A )               ; <i64> [#uses=1]
-        %d = call double @test4( i64 %c )               ; <double> [#uses=0]
-        %e = call double @test5( double 7.000000e+00 )          ; <double> [#uses=0]
-        %f = call float @test6( float 7.000000e+00 )            ; <float> [#uses=0]
-        ret i32 %a
-}
-
diff --git a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll b/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
deleted file mode 100644
index da36e78e0b05..000000000000
--- a/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; For PR1099
-; RUN: llc < %s -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
-
-target datalayout = "e-p:32:32"
-target triple = "i686-apple-darwin8"
-        %struct.Connector = type { i16, i16, i8, i8, %struct.Connector*, i8* }
-
-
-define i1 @prune_match_entry_2E_ce(%struct.Connector* %a, i16 %b.0.0.val) {
-newFuncRoot:
-        br label %entry.ce
-
-cond_next.exitStub:             ; preds = %entry.ce
-        ret i1 true
-
-entry.return_crit_edge.exitStub:                ; preds = %entry.ce
-        ret i1 false
-
-entry.ce:               ; preds = %newFuncRoot
-        %tmp1 = getelementptr %struct.Connector* %a, i32 0, i32 0                ; <i16*> [#uses=1]
-        %tmp2 = load i16* %tmp1           ; <i16> [#uses=1]
-        %tmp3 = icmp eq i16 %tmp2, %b.0.0.val             ; <i1> [#uses=1]
-        br i1 %tmp3, label %cond_next.exitStub, label %entry.return_crit_edge.exitStub
-}
-
-
diff --git a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll b/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
deleted file mode 100644
index 4f699b792e20..000000000000
--- a/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep __builtin_stack_save
-; RUN: llc < %s -march=c | grep __builtin_stack_restore
-; PR1028
-
-declare i8* @llvm.stacksave()
-declare void @llvm.stackrestore(i8*)
-
-define i8* @test() {
-    %s = call i8* @llvm.stacksave()
-    call void @llvm.stackrestore(i8* %s)
-    ret i8* %s
-}
diff --git a/test/CodeGen/CBackend/2007-02-05-memset.ll b/test/CodeGen/CBackend/2007-02-05-memset.ll
deleted file mode 100644
index 7d508e424051..000000000000
--- a/test/CodeGen/CBackend/2007-02-05-memset.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=c
-; PR1181
-target datalayout = "e-p:64:64"
-target triple = "x86_64-apple-darwin8"
-
-
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-
-define fastcc void @InitUser_data_unregistered() {
-entry:
-        tail call void @llvm.memset.i64( i8* null, i8 0, i64 65496, i32 1 )
-        ret void
-}
diff --git a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll b/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
deleted file mode 100644
index 7e1ff2a9dfa0..000000000000
--- a/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; PR1164
-; RUN: llc < %s -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
-; RUN: llc < %s -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
-; RUN: llc < %s -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
-
-@G = global i32 123
-@ltmp_0_1 = global i32 123
-
-define i32 @test(i32 *%G) {
-        %A = load i32* %G
-        %B = load i32* @ltmp_0_1
-        %C = add i32 %A, %B
-        ret i32 %C
-}
diff --git a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll b/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
deleted file mode 100644
index c8bfdd6bcfc6..000000000000
--- a/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s -march=c | grep {packed}
-
-	%struct.p = type <{ i16 }>
-
-define i32 @main() {
-entry:
-        %t = alloca %struct.p, align 2
-	ret i32 5
-}
diff --git a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll b/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
deleted file mode 100644
index 6e0cf6829296..000000000000
--- a/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=c | \
-; RUN:          grep {struct __attribute__ ((packed, aligned(} | count 4
-
-define void @test(i32* %P) {
-        %X = load i32* %P, align 1
-        store i32 %X, i32* %P, align 1
-        ret void
-}
-
-define void @test2(i32* %P) {
-        %X = volatile load i32* %P, align 2
-        volatile store i32 %X, i32* %P, align 2
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll b/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
deleted file mode 100644
index e9fa552433a5..000000000000
--- a/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=c | grep {llvm_cbe_t.*&1}
-define i32 @test(i32 %r) {
-  %s = icmp eq i32 %r, 0
-  %t = add i1 %s, %s
-  %u = zext i1 %t to i32
-  br i1 %t, label %A, label %B
-A:
-
-  ret i32 %u
-B:
-
-  %v = select i1 %t, i32 %r, i32 %u
-  ret i32 %v
-}
diff --git a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
deleted file mode 100644
index b72b57343cd0..000000000000
--- a/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=c
-; PR2907
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
-target triple = "powerpc-apple-darwin9.5"
-	%"struct.Point<0>" = type { %"struct.Tensor<1,0>" }
-	%"struct.QGauss2<1>" = type { %"struct.Quadrature<0>" }
-	%"struct.Quadrature<0>" = type { %struct.Subscriptor, i32, %"struct.std::vector<Point<0>,std::allocator<Point<0> > >", %"struct.std::vector<double,std::allocator<double> >" }
-	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
-	%"struct.Tensor<1,0>" = type { [1 x double] }
-	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" }
-	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" = type { %"struct.Point<0>"*, %"struct.Point<0>"*, %"struct.Point<0>"* }
-	%"struct.std::_Vector_base<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" }
-	%"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" = type { double*, double*, double* }
-	%"struct.std::type_info" = type { i32 (...)**, i8* }
-	%"struct.std::vector<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" }
-	%"struct.std::vector<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >" }
-
-define fastcc void @_ZN6QGaussILi1EEC1Ej(%"struct.QGauss2<1>"* %this, i32 %n) {
-entry:
-	br label %bb4
-
-bb4:		; preds = %bb5.split, %bb4, %entry
-	%0 = fcmp ogt ppc_fp128 0xM00000000000000000000000000000000, select (i1 fcmp olt (ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000)), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000), ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128))		; <i1> [#uses=1]
-	br i1 %0, label %bb4, label %bb5.split
-
-bb5.split:		; preds = %bb4
-	%1 = getelementptr double* null, i32 0		; <double*> [#uses=0]
-	br label %bb4
-}
diff --git a/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll b/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
deleted file mode 100644
index 0ae480dcfdbe..000000000000
--- a/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc < %s -march=c
-; Check that uadd and sadd with overflow are handled by C Backend.
-
-%0 = type { i32, i1 }        ; type %0
-
-define i1 @func1(i32 zeroext %v1, i32 zeroext %v2) nounwind {
-entry:
-    %t = call %0 @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
-    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
-    br i1 %obit, label %carry, label %normal
-
-normal:     ; preds = %entry
-    ret i1 true
-
-carry:      ; preds = %entry
-    ret i1 false
-}
-
-define i1 @func2(i32 signext %v1, i32 signext %v2) nounwind {
-entry:
-    %t = call %0 @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
-    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
-    br i1 %obit, label %carry, label %normal
-
-normal:     ; preds = %entry
-    ret i1 true
-
-carry:      ; preds = %entry
-    ret i1 false
-}
-
-declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
-
-declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind
-
diff --git a/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll b/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
deleted file mode 100644
index 054a3cad900d..000000000000
--- a/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep {"m"(llvm_cbe_newcw))}
-; PR2407
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-
-define void @foo() {
-  %newcw = alloca i16             ; <i16*> [#uses=2]
-  call void asm sideeffect "fldcw $0", "*m,~{dirflag},~{fpsr},~{flags}"( i16*
-%newcw ) nounwind 
-  ret void
-}
diff --git a/test/CodeGen/CBackend/X86/dg.exp b/test/CodeGen/CBackend/X86/dg.exp
deleted file mode 100644
index 44e3a5eff4a4..000000000000
--- a/test/CodeGen/CBackend/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] && [llvm_supports_target CBackend] } {
-    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/CodeGen/CBackend/dg.exp b/test/CodeGen/CBackend/dg.exp
deleted file mode 100644
index 9d789409d4a4..000000000000
--- a/test/CodeGen/CBackend/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CBackend] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CBackend/fneg.ll b/test/CodeGen/CBackend/fneg.ll
deleted file mode 100644
index 7dec3d9e09c2..000000000000
--- a/test/CodeGen/CBackend/fneg.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=c
-
-define void @func() nounwind {
-  entry:
-  %0 = fsub double -0.0, undef
-  ret void
-}
diff --git a/test/CodeGen/CBackend/pr2408.ll b/test/CodeGen/CBackend/pr2408.ll
deleted file mode 100644
index bf8477b7e6dd..000000000000
--- a/test/CodeGen/CBackend/pr2408.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=c | grep {\\* ((unsigned int )}
-; PR2408
-
-define i32 @a(i32 %a) {
-entry:
-        %shr = ashr i32 %a, 0           ; <i32> [#uses=1]
-        %shr2 = ashr i32 2, 0           ; <i32> [#uses=1]
-        %mul = mul i32 %shr, %shr2              ; <i32> [#uses=1]
-        %shr4 = ashr i32 2, 0           ; <i32> [#uses=1]
-        %div = sdiv i32 %mul, %shr4             ; <i32> [#uses=1]
-        ret i32 %div
-}
diff --git a/test/CodeGen/CBackend/vectors.ll b/test/CodeGen/CBackend/vectors.ll
deleted file mode 100644
index b7b76775f6c6..000000000000
--- a/test/CodeGen/CBackend/vectors.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=c
-@.str15 = external global [2 x i8]
-
-define <4 x i32> @foo(<4 x i32> %a, i32 %b) {
-  %c = insertelement <4 x i32> %a, i32 1, i32 %b
-  
-  ret <4 x i32> %c
-}
-
-define i32 @test2(<4 x i32> %a, i32 %b) {
-  %c = extractelement <4 x i32> %a, i32 1
-  
-  ret i32 %c
-}
-
-define <4 x float> @test3(<4 x float> %Y) {
-	%Z = fadd <4 x float> %Y, %Y
-	%X = shufflevector <4 x float> zeroinitializer, <4 x float> %Z, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >
-	ret <4 x float> %X
-}
-
-define void @test4() {
-	%x = alloca <4 x float>
-	%tmp3.i16 = getelementptr <4 x float>* %x, i32 0, i32 0
-	store float 1.0, float* %tmp3.i16
-	ret void
-}
-
-define i32* @test5({i32, i32} * %P) {
-	%x = getelementptr {i32, i32} * %P, i32 0, i32 1
-	ret i32* %x
-}
-
-define i8* @test6() {
-  ret i8* getelementptr ([2 x i8]* @.str15, i32 0, i32 0) 
-}
-
diff --git a/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll b/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll
new file mode 100644
index 000000000000..419f59476784
--- /dev/null
+++ b/test/CodeGen/CPP/2012-02-05-UnitVarCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=cpp
+declare void @foo(<4 x i32>)
+define void @bar() {
+  call void @foo(<4 x i32> <i32 0, i32 1, i32 2, i32 3>)
+  ret void
+}
diff --git a/test/CodeGen/CPP/dg.exp b/test/CodeGen/CPP/dg.exp
deleted file mode 100644
index 3276dcc32751..000000000000
--- a/test/CodeGen/CPP/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CppBackend] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CPP/lit.local.cfg b/test/CodeGen/CPP/lit.local.cfg
new file mode 100644
index 000000000000..4d4b4a4a7e21
--- /dev/null
+++ b/test/CodeGen/CPP/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'CppBackend' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
index 72478a1ca62d..4203e91068d0 100644
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -5,6 +5,9 @@
 ; RUN: grep andhi  %t1.s | count 30
 ; RUN: grep andbi  %t1.s | count 4
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/call.ll b/test/CodeGen/CellSPU/call.ll
index 559b266e59df..11cf770145ba 100644
--- a/test/CodeGen/CellSPU/call.ll
+++ b/test/CodeGen/CellSPU/call.ll
@@ -1,7 +1,3 @@
-; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s
-; RUN: grep brsl    %t1.s | count 1
-; RUN: grep brasl   %t1.s | count 2
-; RUN: grep stqd    %t1.s | count 82
 ; RUN: llc < %s -march=cellspu | FileCheck %s
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
deleted file mode 100644
index 141361d5702b..000000000000
--- a/test/CodeGen/CellSPU/call_indirect.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=cellspu -asm-verbose=0 -regalloc=linearscan > %t1.s
-; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 -regalloc=linearscan > %t2.s
-; RUN: grep bisl    %t1.s | count 7
-; RUN: grep ila     %t1.s | count 1
-; RUN: grep rotqby  %t1.s | count 5
-; RUN: grep lqa     %t1.s | count 1
-; RUN: grep lqd     %t1.s | count 12
-; RUN: grep dispatch_tab %t1.s | count 5
-; RUN: grep bisl    %t2.s | count 7
-; RUN: grep ilhu    %t2.s | count 2
-; RUN: grep iohl    %t2.s | count 2
-; RUN: grep rotqby  %t2.s | count 5
-; RUN: grep lqd     %t2.s | count 13
-; RUN: grep ilhu    %t2.s | count 2
-; RUN: grep ai      %t2.s | count 9
-; RUN: grep dispatch_tab %t2.s | count 6
-
-; ModuleID = 'call_indirect.bc'
-target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
-target triple = "spu-unknown-elf"
-
-@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
-
-define void @dispatcher(i32 %i_arg, float %f_arg) {
-entry:
-        %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
-        tail call void %tmp2( i32 %i_arg, float %f_arg )
-        %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
-        tail call void %tmp2.1( i32 %i_arg, float %f_arg )
-        %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
-        tail call void %tmp2.2( i32 %i_arg, float %f_arg )
-        %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
-        tail call void %tmp2.3( i32 %i_arg, float %f_arg )
-        %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
-        tail call void %tmp2.4( i32 %i_arg, float %f_arg )
-        %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
-        tail call void %tmp2.5( i32 %i_arg, float %f_arg )
-        ret void
-}
-
-@ptr_list = internal global [1 x void ()*] [ void ()* inttoptr (i64 4294967295 to void ()*) ], align 4
-@ptr.a = internal global void ()** getelementptr ([1 x void ()*]* @ptr_list, i32 0, i32 1), align 16
-
-define void @double_indirect_call() {
-        %a = load void ()*** @ptr.a, align 16
-        %b = load void ()** %a, align 4
-        tail call void %b()
-        ret void
-}
diff --git a/test/CodeGen/CellSPU/dg.exp b/test/CodeGen/CellSPU/dg.exp
deleted file mode 100644
index d41647991a0a..000000000000
--- a/test/CodeGen/CellSPU/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target CellSPU] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/CellSPU/lit.local.cfg b/test/CodeGen/CellSPU/lit.local.cfg
new file mode 100644
index 000000000000..ea00867701b2
--- /dev/null
+++ b/test/CodeGen/CellSPU/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'CellSPU' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
index b770cad8dfce..57ac709c5414 100644
--- a/test/CodeGen/CellSPU/nand.ll
+++ b/test/CodeGen/CellSPU/nand.ll
@@ -3,6 +3,10 @@
 ; RUN: grep and    %t1.s | count 94
 ; RUN: grep xsbh   %t1.s | count 2
 ; RUN: grep xshw   %t1.s | count 4
+
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
index 4f1febbad79c..f329266a3c23 100644
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ b/test/CodeGen/CellSPU/or_ops.ll
@@ -6,6 +6,9 @@
 ; RUN: grep orbi   %t1.s | count 15
 ; RUN: FileCheck %s < %t1.s
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll
index b1219e6f56e5..977093527609 100644
--- a/test/CodeGen/CellSPU/rotate_ops.ll
+++ b/test/CodeGen/CellSPU/rotate_ops.ll
@@ -163,7 +163,7 @@ define i8 @rotri8(i8 %A) {
 define <2 x float> @test1(<4 x float> %param )
 {
 ; CHECK: test1
-; CHECK: rotqbyi
+; CHECK: shufb
   %el = extractelement <4 x float> %param, i32 1
   %vec1 = insertelement <1 x float> undef, float %el, i32 0
   %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
diff --git a/test/CodeGen/CellSPU/select_bits.ll b/test/CodeGen/CellSPU/select_bits.ll
index c804256f513b..65e0aa6fa0b0 100644
--- a/test/CodeGen/CellSPU/select_bits.ll
+++ b/test/CodeGen/CellSPU/select_bits.ll
@@ -1,6 +1,9 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep selb   %t1.s | count 56
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
 
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index 3252c776ecbf..f4aad44ed650 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,12 +1,12 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
 ; RUN: grep {shlh	}  %t1.s | count 10
 ; RUN: grep {shlhi	}  %t1.s | count 3
-; RUN: grep {shl	}  %t1.s | count 11
+; RUN: grep {shl	}  %t1.s | count 10
 ; RUN: grep {shli	}  %t1.s | count 3
 ; RUN: grep {xshw	}  %t1.s | count 5
-; RUN: grep {and	}  %t1.s | count 14
-; RUN: grep {andi	}  %t1.s | count 2
-; RUN: grep {rotmi	}  %t1.s | count 2
+; RUN: grep {and	}  %t1.s | count 15
+; RUN: grep {andi	}  %t1.s | count 4
+; RUN: grep {rotmi	}  %t1.s | count 4
 ; RUN: grep {rotqmbyi	}  %t1.s | count 1
 ; RUN: grep {rotqmbii	}  %t1.s | count 2
 ; RUN: grep {rotqmby	}  %t1.s | count 1
@@ -342,3 +342,7 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
 	%rv = ashr <8 x i16> %val, %sh
 	ret <8 x i16> %rv
 }
+
+define <2 x i64> @special_const() {
+  ret <2 x i64> <i64 4294967295, i64 4294967295>
+}
diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll
index c88a258c26c7..973586bf6cf2 100644
--- a/test/CodeGen/CellSPU/shuffles.ll
+++ b/test/CodeGen/CellSPU/shuffles.ll
@@ -1,12 +1,14 @@
 ; RUN: llc -O1  --march=cellspu < %s | FileCheck %s
 
+;CHECK: shuffle
 define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
   ; CHECK: cwd {{\$.}}, 0($sp)
   ; CHECK: shufb {{\$., \$4, \$3, \$.}}
   %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> <i32 4,i32 1,i32 2,i32 3>
   ret <4 x float> %val
 }
- 
+
+;CHECK: splat
 define <4 x float> @splat(float %param1) {
   ; CHECK: lqa
   ; CHECK: shufb $3
@@ -16,6 +18,7 @@ define <4 x float> @splat(float %param1) {
   ret <4 x float> %val  
 }
 
+;CHECK: test_insert
 define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
   %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
 ;CHECK:	lqa	$6,
@@ -31,6 +34,7 @@ define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
   ret void 
 }
 
+;CHECK: test_insert_1
 define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
 ;CHECK: cwd     $5, 4($sp)
 ;CHECK: shufb   $3, $4, $3, $5
@@ -39,6 +43,7 @@ define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
   ret <4 x float> %rv
 }
 
+;CHECK: test_v2i32
 define <2 x i32> @test_v2i32(<4 x i32>%vec)
 {
 ;CHECK: rotqbyi $3, $3, 4
@@ -49,17 +54,14 @@ define <2 x i32> @test_v2i32(<4 x i32>%vec)
 
 define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
 {
-;CHECK: rotqbyi $3, $3, 8
-;CHECK: bi $lr
   %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
         <4 x i32> <i32 2,i32 3,i32 0, i32 1>
   ret <4 x i32> %rv
 }
 
+;CHECK: test_v4i32_rot4
 define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
 {
-;CHECK: rotqbyi $3, $3, 4
-;CHECK: bi $lr
   %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
         <4 x i32> <i32 1,i32 2,i32 3, i32 0>
   ret <4 x i32> %rv
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
index adbb5efa28ba..8c3275080c69 100644
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -22,6 +22,9 @@
 ; RUN: grep shufb   %t2.s | count 7
 ; RUN: grep stqd    %t2.s | count 7
 
+; CellSPU legalization is over-sensitive to Legalize's traversal order.
+; XFAIL: *
+
 ; ModuleID = 'struct_1.bc'
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll
index 71d4aba63332..9c5b89613df9 100644
--- a/test/CodeGen/CellSPU/v2i32.ll
+++ b/test/CodeGen/CellSPU/v2i32.ll
@@ -9,7 +9,8 @@ define %vec @test_ret(%vec %param)
 
 define %vec @test_add(%vec %param)
 {
-;CHECK: a {{\$.}}, $3, $3
+;CHECK: shufb
+;CHECK: addx
   %1 = add %vec %param, %param
 ;CHECK: bi $lr
   ret %vec %1
@@ -17,21 +18,14 @@ define %vec @test_add(%vec %param)
 
 define %vec @test_sub(%vec %param)
 {
-;CHECK: sf {{\$.}}, $4, $3
   %1 = sub %vec %param, <i32 1, i32 1>
-
 ;CHECK: bi $lr
   ret %vec %1
 }
 
 define %vec @test_mul(%vec %param)
 {
-;CHECK: mpyu
-;CHECK: mpyh
-;CHECK: a {{\$., \$., \$.}}
-;CHECK: a {{\$., \$., \$.}}
   %1 = mul %vec %param, %param
-
 ;CHECK: bi $lr
   ret %vec %1
 }
@@ -56,22 +50,12 @@ define i32 @test_extract() {
 
 define void @test_store( %vec %val, %vec* %ptr)
 {
-;CHECK: stqd $3, 0(${{.}})
-;CHECK: bi $lr
   store %vec %val, %vec* %ptr
   ret void
 }
 
-;Alignment of <2 x i32> is not *directly* defined in the ABI
-;It probably is safe to interpret it as an array, thus having 8 byte
-;alignment (according to ABI). This tests that the size of
-;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the
-;two arrays
 define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
 {
-; CHECK-NOT:	ai	$3, $3, 16
-; CHECK:	ai	$3, $3, 8
-; CHECK:	bi	$lr
    %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
    ret <2 x i32>* %rv
 }
diff --git a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
index 943ed88febe9..d67559e4d859 100644
--- a/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
+++ b/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
@@ -30,8 +30,6 @@ UnifiedUnreachableBlock:		; preds = %entry
 
 declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn 
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
-
 declare void @__cxa_end_catch()
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
index 288bfd245da9..9f102066f2bb 100644
--- a/test/CodeGen/Generic/2008-02-04-Ctlz.ll
+++ b/test/CodeGen/Generic/2008-02-04-Ctlz.ll
@@ -4,8 +4,8 @@
 
 define i32 @main(i64 %arg) nounwind  {
 entry:
-	%tmp37 = tail call i64 @llvm.ctlz.i64( i64 %arg )		; <i64> [#uses=1]
-	%tmp47 = tail call i64 @llvm.cttz.i64( i64 %arg )		; <i64> [#uses=1]
+	%tmp37 = tail call i64 @llvm.ctlz.i64( i64 %arg, i1 true )		; <i64> [#uses=1]
+	%tmp47 = tail call i64 @llvm.cttz.i64( i64 %arg, i1 true )		; <i64> [#uses=1]
 	%tmp57 = tail call i64 @llvm.ctpop.i64( i64 %arg )		; <i64> [#uses=1]
 	%tmp38 = trunc i64 %tmp37 to i32		; <i32>:0 [#uses=1]
 	%tmp48 = trunc i64 %tmp47 to i32		; <i32>:0 [#uses=1]
@@ -16,6 +16,6 @@ entry:
 
 declare i32 @printf(i8* noalias , ...) nounwind 
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone 
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone 
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone 
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone 
diff --git a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
index 3cbf4c5665b2..b483009976c2 100644
--- a/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
+++ b/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
@@ -15,8 +15,6 @@
 %"struct.std::locale::facet" = type { i32 (...)**, i32 }
 %union..0._15 = type { i32 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i8* @__cxa_begin_catch(i8*) nounwind
 
 declare %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"*)
diff --git a/test/CodeGen/Generic/bool-vector.ll b/test/CodeGen/Generic/bool-vector.ll
deleted file mode 100644
index 4758697286a2..000000000000
--- a/test/CodeGen/Generic/bool-vector.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s
-; PR1845
-
-define void @boolVectorSelect(<4 x i1>* %boolVectorPtr) {
-Body:
-        %castPtr = bitcast <4 x i1>* %boolVectorPtr to <4 x i1>*
-        %someBools = load <4 x i1>* %castPtr, align 1           ; <<4 x i1>>
-        %internal = alloca <4 x i1>, align 16           ; <<4 x i1>*> [#uses=1]
-        store <4 x i1> %someBools, <4 x i1>* %internal, align 1
-        ret void
-}
diff --git a/test/CodeGen/Generic/dbg-declare.ll b/test/CodeGen/Generic/dbg-declare.ll
new file mode 100644
index 000000000000..01f7d6d47916
--- /dev/null
+++ b/test/CodeGen/Generic/dbg-declare.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -O0
+; <rdar://problem/11134152>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @foo(i32* %x) nounwind uwtable ssp {
+entry:
+  %x.addr = alloca i32*, align 8
+  %saved_stack = alloca i8*
+  %cleanup.dest.slot = alloca i32
+  store i32* %x, i32** %x.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i32** %x.addr}, metadata !14), !dbg !15
+  %0 = load i32** %x.addr, align 8, !dbg !16
+  %1 = load i32* %0, align 4, !dbg !16
+  %2 = zext i32 %1 to i64, !dbg !16
+  %3 = call i8* @llvm.stacksave(), !dbg !16
+  store i8* %3, i8** %saved_stack, !dbg !16
+  %vla = alloca i8, i64 %2, align 16, !dbg !16
+  call void @llvm.dbg.declare(metadata !{i8* %vla}, metadata !18), !dbg !23
+  store i32 1, i32* %cleanup.dest.slot
+  %4 = load i8** %saved_stack, !dbg !24
+  call void @llvm.stackrestore(i8* %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.1 (trunk 153698)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 6, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32*)* @foo, null, null, metadata !12} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"20020104-2.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786470, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_const_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 5, i32 21, metadata !5, null}
+!16 = metadata !{i32 7, i32 13, metadata !17, null}
+!17 = metadata !{i32 786443, metadata !5, i32 6, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!18 = metadata !{i32 786688, metadata !17, metadata !"a", metadata !6, i32 7, metadata !19, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 786465, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!23 = metadata !{i32 7, i32 8, metadata !17, null}
+!24 = metadata !{i32 9, i32 1, metadata !17, null}
+!25 = metadata !{i32 8, i32 3, metadata !17, null}
diff --git a/test/CodeGen/Generic/dg.exp b/test/CodeGen/Generic/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/CodeGen/Generic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/CodeGen/Generic/lit.local.cfg b/test/CodeGen/Generic/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/CodeGen/Generic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/CodeGen/Generic/llvm-ct-intrinsics.ll b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
index 1db75497592f..abcdb9bbbc14 100644
--- a/test/CodeGen/Generic/llvm-ct-intrinsics.ll
+++ b/test/CodeGen/Generic/llvm-ct-intrinsics.ll
@@ -21,19 +21,19 @@ define void @ctpoptest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %C
 	ret void
 }
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
-declare i16 @llvm.ctlz.i16(i16)
+declare i16 @llvm.ctlz.i16(i16, i1)
 
-declare i8 @llvm.ctlz.i8(i8)
+declare i8 @llvm.ctlz.i8(i8, i1)
 
 define void @ctlztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.ctlz.i8( i8 %A )		; <i8> [#uses=1]
-	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
-	%c = call i32 @llvm.ctlz.i32( i32 %C )		; <i32> [#uses=1]
-	%d = call i64 @llvm.ctlz.i64( i64 %D )		; <i64> [#uses=1]
+	%a = call i8 @llvm.ctlz.i8( i8 %A, i1 true )		; <i8> [#uses=1]
+	%b = call i16 @llvm.ctlz.i16( i16 %B, i1 true )		; <i16> [#uses=1]
+	%c = call i32 @llvm.ctlz.i32( i32 %C, i1 true )		; <i32> [#uses=1]
+	%d = call i64 @llvm.ctlz.i64( i64 %D, i1 true )		; <i64> [#uses=1]
 	store i8 %a, i8* %AP
 	store i16 %b, i16* %BP
 	store i32 %c, i32* %CP
@@ -41,19 +41,19 @@ define void @ctlztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP
 	ret void
 }
 
-declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.cttz.i64(i64, i1)
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
-declare i16 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16, i1)
 
-declare i8 @llvm.cttz.i8(i8)
+declare i8 @llvm.cttz.i8(i8, i1)
 
 define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
-	%a = call i8 @llvm.cttz.i8( i8 %A )		; <i8> [#uses=1]
-	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
-	%c = call i32 @llvm.cttz.i32( i32 %C )		; <i32> [#uses=1]
-	%d = call i64 @llvm.cttz.i64( i64 %D )		; <i64> [#uses=1]
+	%a = call i8 @llvm.cttz.i8( i8 %A, i1 true )		; <i8> [#uses=1]
+	%b = call i16 @llvm.cttz.i16( i16 %B, i1 true )		; <i16> [#uses=1]
+	%c = call i32 @llvm.cttz.i32( i32 %C, i1 true )		; <i32> [#uses=1]
+	%d = call i64 @llvm.cttz.i64( i64 %D, i1 true )		; <i64> [#uses=1]
 	store i8 %a, i8* %AP
 	store i16 %b, i16* %BP
 	store i32 %c, i32* %CP
diff --git a/test/CodeGen/Generic/pr12507.ll b/test/CodeGen/Generic/pr12507.ll
new file mode 100644
index 000000000000..c79335850600
--- /dev/null
+++ b/test/CodeGen/Generic/pr12507.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+@c = external global i32, align 4
+
+define void @foo(i160 %x) {
+entry:
+  %cmp.i = icmp ne i160 %x, 340282366920938463463374607431768211456
+  %conv.i = zext i1 %cmp.i to i32
+  %tobool.i = icmp eq i32 %conv.i, 0
+  br i1 %tobool.i, label %if.then.i, label %fn1.exit
+
+if.then.i:
+  store i32 0, i32* @c, align 4
+  br label %fn1.exit
+
+fn1.exit:
+  ret void
+}
diff --git a/test/CodeGen/Generic/select.ll b/test/CodeGen/Generic/select.ll
index 63052c1a2845..77636eb6e615 100644
--- a/test/CodeGen/Generic/select.ll
+++ b/test/CodeGen/Generic/select.ll
@@ -185,3 +185,11 @@ define i32 @checkFoldGEP(%Domain* %D, i64 %idx) {
         ret i32 %reg820
 }
 
+; Test case for scalarising a 1 element vselect
+;
+define <1 x i32> @checkScalariseVSELECT(<1 x i32> %a, <1 x i32> %b) {
+        %cond = icmp uge <1 x i32> %a, %b
+        %s = select <1 x i1> %cond, <1 x i32> %a, <1 x i32> %b
+        ret <1 x i32> %s
+}
+
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
new file mode 100644
index 000000000000..69002e0abcb1
--- /dev/null
+++ b/test/CodeGen/Hexagon/args.ll
@@ -0,0 +1,19 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+; CHECK: r[[T0:[0-9]+]] = #7
+; CHECK: memw(r29 + #0) = r[[T0]]
+; CHECK: r0 = #1
+; CHECK: r1 = #2
+; CHECK: r2 = #3
+; CHECK: r3 = #4
+; CHECK: r4 = #5
+; CHECK: r5 = #6
+
+
+define void @foo() nounwind {
+entry:
+  call void @bar(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7)
+  ret void
+}
+
+declare void @bar(i32, i32, i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
new file mode 100644
index 000000000000..36abd74d762b
--- /dev/null
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -0,0 +1,18 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
+
+@j = external global i32
+@k = external global i64
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i64* @k, align 8
+  %conv = trunc i64 %1 to i32
+  %2 = call i64 @llvm.hexagon.A2.combinew(i32 %0, i32 %conv)
+  store i64 %2, i64* @k, align 8
+  ret void
+}
+
+declare i64 @llvm.hexagon.A2.combinew(i32, i32) nounwind readnone
diff --git a/test/CodeGen/Hexagon/double.ll b/test/CodeGen/Hexagon/double.ll
new file mode 100644
index 000000000000..04c2ec157eca
--- /dev/null
+++ b/test/CodeGen/Hexagon/double.ll
@@ -0,0 +1,23 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: __hexagon_adddf3
+; CHECK: __hexagon_subdf3
+
+define void @foo(double* %acc, double %num, double %num2) nounwind {
+entry:
+  %acc.addr = alloca double*, align 4
+  %num.addr = alloca double, align 8
+  %num2.addr = alloca double, align 8
+  store double* %acc, double** %acc.addr, align 4
+  store double %num, double* %num.addr, align 8
+  store double %num2, double* %num2.addr, align 8
+  %0 = load double** %acc.addr, align 4
+  %1 = load double* %0
+  %2 = load double* %num.addr, align 8
+  %add = fadd double %1, %2
+  %3 = load double* %num2.addr, align 8
+  %sub = fsub double %add, %3
+  %4 = load double** %acc.addr, align 4
+  store double %sub, double* %4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/float.ll b/test/CodeGen/Hexagon/float.ll
new file mode 100644
index 000000000000..51acf2e501ce
--- /dev/null
+++ b/test/CodeGen/Hexagon/float.ll
@@ -0,0 +1,23 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: __hexagon_addsf3
+; CHECK: __hexagon_subsf3
+
+define void @foo(float* %acc, float %num, float %num2) nounwind {
+entry:
+  %acc.addr = alloca float*, align 4
+  %num.addr = alloca float, align 4
+  %num2.addr = alloca float, align 4
+  store float* %acc, float** %acc.addr, align 4
+  store float %num, float* %num.addr, align 4
+  store float %num2, float* %num2.addr, align 4
+  %0 = load float** %acc.addr, align 4
+  %1 = load float* %0
+  %2 = load float* %num.addr, align 4
+  %add = fadd float %1, %2
+  %3 = load float* %num2.addr, align 4
+  %sub = fsub float %add, %3
+  %4 = load float** %acc.addr, align 4
+  store float %sub, float* %4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/frame.ll b/test/CodeGen/Hexagon/frame.ll
new file mode 100644
index 000000000000..c0a9fda46894
--- /dev/null
+++ b/test/CodeGen/Hexagon/frame.ll
@@ -0,0 +1,24 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+
+@num = external global i32
+@acc = external global i32
+@num2 = external global i32
+
+; CHECK: allocframe
+; CHECK: dealloc_return
+
+define i32 @foo() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %0 = load i32* @num, align 4
+  store i32 %0, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %2 = load i32* @acc, align 4
+  %mul = mul nsw i32 %1, %2
+  %3 = load i32* @num2, align 4
+  %add = add nsw i32 %mul, %3
+  store i32 %add, i32* %i, align 4
+  %4 = load i32* %i, align 4
+  ret i32 %4
+}
diff --git a/test/CodeGen/Hexagon/lit.local.cfg b/test/CodeGen/Hexagon/lit.local.cfg
new file mode 100644
index 000000000000..24324b2792e3
--- /dev/null
+++ b/test/CodeGen/Hexagon/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Hexagon' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Hexagon/mpy.ll b/test/CodeGen/Hexagon/mpy.ll
new file mode 100644
index 000000000000..afd6fc607188
--- /dev/null
+++ b/test/CodeGen/Hexagon/mpy.ll
@@ -0,0 +1,20 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: += mpyi
+
+define void @foo(i32 %acc, i32 %num, i32 %num2) nounwind {
+entry:
+  %acc.addr = alloca i32, align 4
+  %num.addr = alloca i32, align 4
+  %num2.addr = alloca i32, align 4
+  store i32 %acc, i32* %acc.addr, align 4
+  store i32 %num, i32* %num.addr, align 4
+  store i32 %num2, i32* %num2.addr, align 4
+  %0 = load i32* %num.addr, align 4
+  %1 = load i32* %acc.addr, align 4
+  %mul = mul nsw i32 %0, %1
+  %2 = load i32* %num2.addr, align 4
+  %add = add nsw i32 %mul, %2
+  store i32 %add, i32* %num.addr, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/static.ll b/test/CodeGen/Hexagon/static.ll
new file mode 100644
index 000000000000..c63a3ba7fd74
--- /dev/null
+++ b/test/CodeGen/Hexagon/static.ll
@@ -0,0 +1,21 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
+
+@num = external global i32
+@acc = external global i32
+@val = external global i32
+
+; CHECK: CONST32(#num)
+; CHECK: CONST32(#acc)
+; CHECK: CONST32(#val)
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @num, align 4
+  %1 = load i32* @acc, align 4
+  %mul = mul nsw i32 %0, %1
+  %2 = load i32* @val, align 4
+  %add = add nsw i32 %mul, %2
+  store i32 %add, i32* @num, align 4
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
new file mode 100644
index 000000000000..2c962d0961d4
--- /dev/null
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -0,0 +1,16 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r1:0 = or(r{{[0-9]}}:{{[0-9]}}, r{{[0-9]}}:{{[0-9]}})
+
+%struct.small = type { i32, i32 }
+
+@s1 = common global %struct.small zeroinitializer, align 4
+
+define void @foo() nounwind {
+entry:
+  %0 = load i64* bitcast (%struct.small* @s1 to i64*), align 1
+  call void @bar(i64 %0)
+  ret void
+}
+
+declare void @bar(i64)
diff --git a/test/CodeGen/Hexagon/struct_args_large.ll b/test/CodeGen/Hexagon/struct_args_large.ll
new file mode 100644
index 000000000000..69de4f66a92b
--- /dev/null
+++ b/test/CodeGen/Hexagon/struct_args_large.ll
@@ -0,0 +1,17 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: r[[T0:[0-9]+]] = CONST32(#s2)
+; CHECK: r[[T1:[0-9]+]] = memw(r[[T0]] + #0)
+; CHECK: memw(r29 + #0) = r[[T1]]
+
+%struct.large = type { i64, i64 }
+
+@s2 = common global %struct.large zeroinitializer, align 8
+
+define void @foo() nounwind {
+entry:
+  call void @bar(%struct.large* byval @s2)
+  ret void
+}
+
+declare void @bar(%struct.large* byval)
diff --git a/test/CodeGen/Hexagon/vaddh.ll b/test/CodeGen/Hexagon/vaddh.ll
new file mode 100644
index 000000000000..788e4749f5a2
--- /dev/null
+++ b/test/CodeGen/Hexagon/vaddh.ll
@@ -0,0 +1,17 @@
+; RUN: true
+; DISABLED: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; CHECK: vaddh(r{{[0-9]+}}, r{{[0-9]+}})
+
+@j = external global i32
+@k = external global i32
+
+define void @foo() nounwind {
+entry:
+  %0 = load i32* @j, align 4
+  %1 = load i32* @k, align 4
+  %2 = call i32 @llvm.hexagon.A2.svaddh(i32 %0, i32 %1)
+  store i32 %2, i32* @k, align 4
+  ret void
+}
+
+declare i32 @llvm.hexagon.A2.svaddh(i32, i32) nounwind readnone
diff --git a/test/CodeGen/MBlaze/cc.ll b/test/CodeGen/MBlaze/cc.ll
index b1eb22aee9fd..827fd3272bd4 100644
--- a/test/CodeGen/MBlaze/cc.ll
+++ b/test/CodeGen/MBlaze/cc.ll
@@ -222,8 +222,8 @@ define void @testing() {
 
     %tmp.12 = call i32 @params8_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7, i32 8)
-    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        {{.* r7, .*, .*}}
@@ -235,9 +235,9 @@ define void @testing() {
 
     %tmp.13 = call i32 @params9_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                          i32 6, i32 7, i32 8, i32 9)
-    ; CHECK:        {{swi? .*, r1, 28}}
-    ; CHECK:        {{swi? .*, r1, 32}}
     ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        {{.* r7, .*, .*}}
@@ -249,10 +249,10 @@ define void @testing() {
 
     %tmp.14 = call i32 @params10_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
                                           i32 6, i32 7, i32 8, i32 9, i32 10)
-    ; CHECK:        {{swi? .*, r1, 28}}
-    ; CHECK:        {{swi? .*, r1, 32}}
-    ; CHECK:        {{swi? .*, r1, 36}}
     ; CHECK:        {{swi? .*, r1, 40}}
+    ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 28}}
     ; CHECK:        {{.* r5, .*, .*}}
     ; CHECK:        {{.* r6, .*, .*}}
     ; CHECK:        {{.* r7, .*, .*}}
diff --git a/test/CodeGen/MBlaze/dg.exp b/test/CodeGen/MBlaze/dg.exp
deleted file mode 100644
index bfd5e4715745..000000000000
--- a/test/CodeGen/MBlaze/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/MBlaze/div.ll b/test/CodeGen/MBlaze/div.ll
index fae9830619d9..621784a4f21c 100644
--- a/test/CodeGen/MBlaze/div.ll
+++ b/test/CodeGen/MBlaze/div.ll
@@ -13,14 +13,14 @@ define i8 @test_i8(i8 %a, i8 %b) {
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV:        idivu
+    ; DIV:        idiv
 
     %tmp.2 = sdiv i8 %a, %b
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV-NOT:    idivu
-    ; DIV:        idiv
+    ; DIV-NOT:    idiv
+    ; DIV:        idivu
 
     %tmp.3 = add i8 %tmp.1, %tmp.2
     ret i8 %tmp.3
@@ -36,14 +36,14 @@ define i16 @test_i16(i16 %a, i16 %b) {
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV:        idivu
+    ; DIV:        idiv
 
     %tmp.2 = sdiv i16 %a, %b
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV-NOT:    idivu
-    ; DIV:        idiv
+    ; DIV-NOT:    idiv
+    ; DIV:        idivu
 
     %tmp.3 = add i16 %tmp.1, %tmp.2
     ret i16 %tmp.3
@@ -59,14 +59,14 @@ define i32 @test_i32(i32 %a, i32 %b) {
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV:        idivu
+    ; DIV:        idiv
 
     %tmp.2 = sdiv i32 %a, %b
     ; FUN-NOT:    idiv
     ; FUN:        brlid
     ; DIV-NOT:    brlid
-    ; DIV-NOT:    idivu
-    ; DIV:        idiv
+    ; DIV-NOT:    idiv
+    ; DIV:        idivu
 
     %tmp.3 = add i32 %tmp.1, %tmp.2
     ret i32 %tmp.3
diff --git a/test/CodeGen/MBlaze/lit.local.cfg b/test/CodeGen/MBlaze/lit.local.cfg
new file mode 100644
index 000000000000..e236200d7572
--- /dev/null
+++ b/test/CodeGen/MBlaze/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
index f339373ffc75..4c7d2d092564 100644
--- a/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
+++ b/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
@@ -7,9 +7,9 @@ target triple = "msp430-unknown-linux-gnu"
 
 define void @uip_arp_arpin() nounwind {
 entry:
-	%tmp = volatile load i16* @uip_len		; <i16> [#uses=1]
+	%tmp = load volatile i16* @uip_len		; <i16> [#uses=1]
 	%cmp = icmp ult i16 %tmp, 42		; <i1> [#uses=1]
-	volatile store i16 0, i16* @uip_len
+	store volatile i16 0, i16* @uip_len
 	br i1 %cmp, label %if.then, label %if.end
 
 if.then:		; preds = %entry
diff --git a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
index 088d3e1e7b37..e8c0d14afd21 100644
--- a/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
+++ b/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
@@ -6,8 +6,8 @@ target triple = "msp430-generic-generic"
 define i16 @foo() nounwind readnone {
 entry:
   %result = alloca i16, align 1                   ; <i16*> [#uses=2]
-  volatile store i16 0, i16* %result
-  %tmp = volatile load i16* %result               ; <i16> [#uses=1]
+  store volatile i16 0, i16* %result
+  %tmp = load volatile i16* %result               ; <i16> [#uses=1]
   ret i16 %tmp
 }
 
@@ -22,8 +22,8 @@ while.cond:                                       ; preds = %while.cond, %entry
 
 while.end:                                        ; preds = %while.cond
   %result.i = alloca i16, align 1                 ; <i16*> [#uses=2]
-  volatile store i16 0, i16* %result.i
-  %tmp.i = volatile load i16* %result.i           ; <i16> [#uses=0]
+  store volatile i16 0, i16* %result.i
+  %tmp.i = load volatile i16* %result.i           ; <i16> [#uses=0]
   ret i16 0
 }
 
diff --git a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
index 4d7d9b96c7d3..9fab4826e085 100644
--- a/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
+++ b/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -11,10 +11,10 @@ entry:
   %x.addr = alloca i8                             ; <i8*> [#uses=2]
   %tmp = alloca i8, align 1                       ; <i8*> [#uses=2]
   store i8 %x, i8* %x.addr
-  %tmp1 = volatile load i8* @"\010x0021"          ; <i8> [#uses=1]
+  %tmp1 = load volatile i8* @"\010x0021"          ; <i8> [#uses=1]
   store i8 %tmp1, i8* %tmp
   %tmp2 = load i8* %x.addr                        ; <i8> [#uses=1]
-  volatile store i8 %tmp2, i8* @"\010x0021"
+  store volatile i8 %tmp2, i8* @"\010x0021"
   %tmp3 = load i8* %tmp                           ; <i8> [#uses=1]
   store i8 %tmp3, i8* %retval
   %0 = load i8* %retval                           ; <i8> [#uses=1]
diff --git a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
index 856eb9db3f6b..c1a186a637cf 100644
--- a/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
+++ b/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
@@ -4,9 +4,9 @@ define void @foo() nounwind {
 entry:
 	%r = alloca i8		; <i8*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	volatile load i8* %r, align 1		; <i8>:0 [#uses=1]
+	load volatile i8* %r, align 1		; <i8>:0 [#uses=1]
 	or i8 %0, 1		; <i8>:1 [#uses=1]
-	volatile store i8 %1, i8* %r, align 1
+	store volatile i8 %1, i8* %r, align 1
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
index 4f9a7248bbf1..c7ecb5ab853f 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -32,7 +32,7 @@ define i8 @am3(i8 %x, i16 %n) nounwind {
 ; CHECK:		bis.b	bar(r14), r15
 
 define i16 @am4(i16 %x) nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
diff --git a/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
index 17ebd873680b..727c29fc082b 100644
--- a/test/CodeGen/MSP430/AddrMode-bis-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -35,9 +35,9 @@ define void @am3(i16 %i, i8 %x) nounwind {
 ; CHECK:		bis.b	r14, bar(r15)
 
 define void @am4(i16 %x) nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	%2 = or i16 %x, %1
-	volatile store i16 %2, i16* inttoptr(i16 32 to i16*)
+	store volatile i16 %2, i16* inttoptr(i16 32 to i16*)
 	ret void
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
index 6676b88cd14f..7cd345bd8f88 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-rx.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -29,7 +29,7 @@ define i8 @am3(i16 %n) nounwind {
 ; CHECK:		mov.b	bar(r15), r15
 
 define i16 @am4() nounwind {
-	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16* inttoptr(i16 32 to i16*)
 	ret i16 %1
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
index 4b327b0578f2..5eeb02f72913 100644
--- a/test/CodeGen/MSP430/AddrMode-mov-xr.ll
+++ b/test/CodeGen/MSP430/AddrMode-mov-xr.ll
@@ -29,7 +29,7 @@ define void @am3(i16 %i, i8 %a) nounwind {
 ; CHECK:		mov.b	r14, bar(r15)
 
 define void @am4(i16 %a) nounwind {
-	volatile store i16 %a, i16* inttoptr(i16 32 to i16*)
+	store volatile i16 %a, i16* inttoptr(i16 32 to i16*)
 	ret void
 }
 ; CHECK: am4:
diff --git a/test/CodeGen/MSP430/Inst16mm.ll b/test/CodeGen/MSP430/Inst16mm.ll
index 2337c2c0f241..d4ae811ac8f0 100644
--- a/test/CodeGen/MSP430/Inst16mm.ll
+++ b/test/CodeGen/MSP430/Inst16mm.ll
@@ -64,6 +64,6 @@ entry:
  %0 = load i16* %retval                          ; <i16> [#uses=1]
  ret i16 %0
 ; CHECK: mov2:
-; CHECK:	mov.w	0(r1), 4(r1)
 ; CHECK:	mov.w	2(r1), 6(r1)
+; CHECK:	mov.w	0(r1), 4(r1)
 }
diff --git a/test/CodeGen/MSP430/dg.exp b/test/CodeGen/MSP430/dg.exp
deleted file mode 100644
index e4ea13a40649..000000000000
--- a/test/CodeGen/MSP430/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MSP430] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/MSP430/indirectbr2.ll b/test/CodeGen/MSP430/indirectbr2.ll
index 93cfb2506bb4..dc2abf5cd0ff 100644
--- a/test/CodeGen/MSP430/indirectbr2.ll
+++ b/test/CodeGen/MSP430/indirectbr2.ll
@@ -5,7 +5,7 @@ define internal i16 @foo(i16 %i) nounwind {
 entry:
   %tmp1 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
   %gotovar.4.0 = load i8** %tmp1, align 4        ; <i8*> [#uses=1]
-; CHECK: mov.w   .LC.0.2070(r15), pc
+; CHECK: mov.w   .LC.0.2070(r12), pc
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
 L5:                                               ; preds = %bb2
diff --git a/test/CodeGen/MSP430/lit.local.cfg b/test/CodeGen/MSP430/lit.local.cfg
new file mode 100644
index 000000000000..972732ebad30
--- /dev/null
+++ b/test/CodeGen/MSP430/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'MSP430' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Mips/2008-06-05-Carry.ll b/test/CodeGen/Mips/2008-06-05-Carry.ll
index 9d8e391f874e..c61e1cdedea7 100644
--- a/test/CodeGen/Mips/2008-06-05-Carry.ll
+++ b/test/CodeGen/Mips/2008-06-05-Carry.ll
@@ -1,19 +1,22 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep subu %t | count 2
-; RUN: grep addu %t | count 4
-
-target datalayout =
-"e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i64 @add64(i64 %u, i64 %v) nounwind  {
 entry:
-	%tmp2 = add i64 %u, %v	
+; CHECK: addu
+; CHECK: sltu 
+; CHECK: addu
+; CHECK: addu
+  %tmp2 = add i64 %u, %v  
   ret i64 %tmp2
 }
 
 define i64 @sub64(i64 %u, i64 %v) nounwind  {
 entry:
+; CHECK: sub64
+; CHECK: subu
+; CHECK: sltu 
+; CHECK: addu
+; CHECK: subu
   %tmp2 = sub i64 %u, %v
   ret i64 %tmp2
 }
diff --git a/test/CodeGen/Mips/2008-07-03-SRet.ll b/test/CodeGen/Mips/2008-07-03-SRet.ll
index b1d20d93f187..afec7f65d607 100644
--- a/test/CodeGen/Mips/2008-07-03-SRet.ll
+++ b/test/CodeGen/Mips/2008-07-03-SRet.ll
@@ -1,17 +1,18 @@
-; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3
+; RUN: llc -march=mips < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
-	%struct.sret0 = type { i32, i32, i32 }
+%struct.sret0 = type { i32, i32, i32 }
 
 define void @test0(%struct.sret0* noalias sret %agg.result, i32 %dummy) nounwind {
 entry:
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 0		; <i32*>:0 [#uses=1]
-	store i32 %dummy, i32* %0, align 4
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 1		; <i32*>:1 [#uses=1]
-	store i32 %dummy, i32* %1, align 4
-	getelementptr %struct.sret0* %agg.result, i32 0, i32 2		; <i32*>:2 [#uses=1]
-	store i32 %dummy, i32* %2, align 4
-	ret void
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+; CHECK: sw ${{[0-9]+}}, {{[0-9]+}}($4)
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 0    ; <i32*>:0 [#uses=1]
+  store i32 %dummy, i32* %0, align 4
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 1    ; <i32*>:1 [#uses=1]
+  store i32 %dummy, i32* %1, align 4
+  getelementptr %struct.sret0* %agg.result, i32 0, i32 2    ; <i32*>:2 [#uses=1]
+  store i32 %dummy, i32* %2, align 4
+  ret void
 }
 
diff --git a/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
index d804c7dcf317..4c552361d9da 100644
--- a/test/CodeGen/Mips/2008-07-07-Float2Int.ll
+++ b/test/CodeGen/Mips/2008-07-07-Float2Int.ll
@@ -1,16 +1,17 @@
-; RUN: llc < %s -march=mips | grep trunc.w.s | count 3
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @fptoint(float %a) nounwind {
 entry:
-	fptosi float %a to i32		; <i32>:0 [#uses=1]
-	ret i32 %0
+; CHECK: trunc.w.s 
+  fptosi float %a to i32		; <i32>:0 [#uses=1]
+  ret i32 %0
 }
 
 define i32 @fptouint(float %a) nounwind {
 entry:
-	fptoui float %a to i32		; <i32>:0 [#uses=1]
-	ret i32 %0
+; CHECK: fptouint
+; CHECK: trunc.w.s 
+; CHECK: trunc.w.s 
+  fptoui float %a to i32		; <i32>:0 [#uses=1]
+  ret i32 %0
 }
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index e0c745f34917..8479ad222d30 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -1,20 +1,16 @@
-; DISABLED: llc < %s -march=mips -o %t
-; DISABLED: grep seh %t | count 1
-; DISABLED: grep seb %t | count 1
-; RUN: false
-; XFAIL: *
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc < %s -march=mips -mcpu=mips32r2 | FileCheck %s 
+; RUN: llc < %s -march=mips64 -mcpu=mips64r2 | FileCheck %s 
 
 define signext i8 @A(i8 %e.0, i8 signext %sum)  nounwind {
 entry:
+; CHECK: seb
 	add i8 %sum, %e.0		; <i8>:0 [#uses=1]
 	ret i8 %0
 }
 
 define signext i16 @B(i16 %e.0, i16 signext %sum) nounwind {
 entry:
+; CHECK: seh
 	add i16 %sum, %e.0		; <i16>:0 [#uses=1]
 	ret i16 %0
 }
diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
index 94dfe35faba1..a8e54707ddb2 100644
--- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll
+++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
-; RUN: grep {.rodata.cst4,"aM",@progbits} %t | count 1
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define float @F(float %a) nounwind {
+; CHECK: .rodata.cst4,"aM",@progbits 
 entry:
-	fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
-	fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
-	ret float %1
+; CHECK: ($CPI0_{{[0-1]}})
+; CHECK: ($CPI0_{{[0,1]}}) 
+; CHECK: ($CPI0_{{[0,1]}}) 
+; CHECK: ($CPI0_{{[0,1]}}) 
+  fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
+  fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
+  ret float %1
 }
diff --git a/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index 23ed64a96d8e..dbde742ad3fe 100644
--- a/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -1,17 +1,53 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep mfhi  %t | count 1
-; RUN: grep mflo  %t | count 1
-; RUN: grep multu %t | count 1
+; RUN: llc -march=mips < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
-	%struct.DWstruct = type { i32, i32 }
+%struct.DWstruct = type { i32, i32 }
 
 define i32 @A0(i32 %u, i32 %v) nounwind  {
 entry:
-	%asmtmp = tail call %struct.DWstruct asm "multu $2,$3", "={lo},={hi},d,d"( i32 %u, i32 %v ) nounwind
-	%asmresult = extractvalue %struct.DWstruct %asmtmp, 0
-	%asmresult1 = extractvalue %struct.DWstruct %asmtmp, 1		; <i32> [#uses=1]
+; CHECK: multu 
+; CHECK: mflo
+; CHECK: mfhi
+  %asmtmp = tail call %struct.DWstruct asm "multu $2,$3", "={lo},={hi},d,d"( i32 %u, i32 %v ) nounwind
+  %asmresult = extractvalue %struct.DWstruct %asmtmp, 0
+  %asmresult1 = extractvalue %struct.DWstruct %asmtmp, 1    ; <i32> [#uses=1]
   %res = add i32 %asmresult, %asmresult1
-	ret i32 %res
+  ret i32 %res
 }
+
+@gi2 = external global i32
+@gi1 = external global i32
+@gi0 = external global i32
+@gf0 = external global float
+@gf1 = external global float
+@gd0 = external global double
+@gd1 = external global double
+
+define void @foo0() nounwind {
+entry:
+; CHECK: addu
+  %0 = load i32* @gi1, align 4
+  %1 = load i32* @gi0, align 4
+  %2 = tail call i32 asm "addu $0, $1, $2", "=r,r,r"(i32 %0, i32 %1) nounwind
+  store i32 %2, i32* @gi2, align 4
+  ret void
+}
+
+define void @foo2() nounwind {
+entry:
+; CHECK: neg.s
+  %0 = load float* @gf1, align 4
+  %1 = tail call float asm "neg.s $0, $1", "=f,f"(float %0) nounwind
+  store float %1, float* @gf0, align 4
+  ret void
+}
+
+define void @foo3() nounwind {
+entry:
+; CHECK: neg.d
+  %0 = load double* @gd1, align 8
+  %1 = tail call double asm "neg.d $0, $1", "=f,f"(double %0) nounwind
+  store double %1, double* @gd0, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
index f8eb02855979..78a49ffbe444 100644
--- a/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
+++ b/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
@@ -1,18 +1,15 @@
-; RUN: llc < %s -march=mips -o %t
-; RUN: grep mtc1 %t | count 1
-; RUN: grep mfc1 %t | count 1
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define float @A(i32 %u) nounwind  {
 entry:
-	bitcast i32 %u to float
-	ret float %0
+; CHECK: mtc1 
+  bitcast i32 %u to float
+  ret float %0
 }
 
 define i32 @B(float %u) nounwind  {
 entry:
-	bitcast float %u to i32
-	ret i32 %0
+; CHECK: mfc1 
+  bitcast float %u to i32
+  ret i32 %0
 }
diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll
index 6dd4af111cd9..0d94b19e4629 100644
--- a/test/CodeGen/Mips/2008-08-06-Alloca.ll
+++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll
@@ -1,17 +1,15 @@
-; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
-; RUN: llc < %s -march=mips -regalloc=basic | grep {subu.*sp} | count 2
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @twoalloca(i32 %size) nounwind {
 entry:
-	alloca i8, i32 %size		; <i8*>:0 [#uses=1]
-	alloca i8, i32 %size		; <i8*>:1 [#uses=1]
-	call i32 @foo( i8* %0 ) nounwind		; <i32>:2 [#uses=1]
-	call i32 @foo( i8* %1 ) nounwind		; <i32>:3 [#uses=1]
-	add i32 %3, %2		; <i32>:4 [#uses=1]
-	ret i32 %4
+; CHECK: subu ${{[0-9]+}}, $sp
+; CHECK: subu ${{[0-9]+}}, $sp
+  alloca i8, i32 %size    ; <i8*>:0 [#uses=1]
+  alloca i8, i32 %size    ; <i8*>:1 [#uses=1]
+  call i32 @foo( i8* %0 ) nounwind    ; <i32>:2 [#uses=1]
+  call i32 @foo( i8* %1 ) nounwind    ; <i32>:3 [#uses=1]
+  add i32 %3, %2    ; <i32>:4 [#uses=1]
+  ret i32 %4
 }
 
 declare i32 @foo(i8*)
diff --git a/test/CodeGen/Mips/2008-08-08-ctlz.ll b/test/CodeGen/Mips/2008-08-08-ctlz.ll
index fb3332329d6c..abd61de5a8d8 100644
--- a/test/CodeGen/Mips/2008-08-08-ctlz.ll
+++ b/test/CodeGen/Mips/2008-08-08-ctlz.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -march=mips | grep clz | count 1
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "mipsallegrexel-unknown-psp-elf"
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define i32 @A0(i32 %u) nounwind  {
 entry:
-	call i32 @llvm.ctlz.i32( i32 %u )
+; CHECK: clz 
+  call i32 @llvm.ctlz.i32( i32 %u, i1 true )
   ret i32 %0
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone 
diff --git a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index f5188434670b..9c4838a87e51 100644
--- a/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -1,7 +1,6 @@
-; RUN: llc < %s
+; RUN: llc -march=mips -soft-float < %s
 ; PR2667
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "psp"
 	%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
 	%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*, i8*, i32)*, i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)*, i32 (i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._reent*, i32 }
 	%struct.__sbuf = type { i8*, i32 }
diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
index b8d68269af42..2b2ee0fd7ad8 100644
--- a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
+++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -1,10 +1,23 @@
-; RUN: llc < %s | FileCheck %s
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
-target triple = "mips-unknown-linux"
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
 
 define float @h() nounwind readnone {
 entry:
-; CHECK: lw $2, %got($CPI0_0)($gp)
-; CHECK: lwc1 $f0, %lo($CPI0_0)($2)
+; PIC-O32: lw  $[[R0:[0-9]+]], %got($CPI0_0)
+; PIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; STATIC-O32: lui  $[[R0:[0-9]+]], %hi($CPI0_0)
+; STATIC-O32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; PIC-N32: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
+; STATIC-N32: lui  $[[R0:[0-9]+]], %hi($CPI0_0)
+; STATIC-N32: lwc1 $f0, %lo($CPI0_0)($[[R0]])
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; PIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
+; STATIC-N64: ld  $[[R0:[0-9]+]], %got_page($CPI0_0)
+; STATIC-N64: lwc1 $f0, %got_ofst($CPI0_0)($[[R0]])
   ret float 0x400B333340000000
 }
diff --git a/test/CodeGen/Mips/2010-07-20-Switch.ll b/test/CodeGen/Mips/2010-07-20-Switch.ll
index 07fc10cae180..aaf6767a3bda 100644
--- a/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -1,13 +1,25 @@
-; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s
+; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s -check-prefix=STATIC-O32 
+; RUN: llc < %s -march=mips -relocation-model=pic | FileCheck %s -check-prefix=PIC-O32 
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=PIC-N64
 
 define i32 @main() nounwind readnone {
 entry:
   %x = alloca i32, align 4                        ; <i32*> [#uses=2]
-  volatile store i32 2, i32* %x, align 4
-  %0 = volatile load i32* %x, align 4             ; <i32> [#uses=1]
-; CHECK: lui $3, %hi($JTI0_0)
-; CHECK: sll $2, $2, 2
-; CHECK: addiu $3, $3, %lo($JTI0_0)
+  store volatile i32 2, i32* %x, align 4
+  %0 = load volatile i32* %x, align 4             ; <i32> [#uses=1]
+; STATIC-O32: lui $[[R0:[0-9]+]], %hi($JTI0_0)
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
+; STATIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-O32: lw $[[R0:[0-9]+]], %got($JTI0_0)
+; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($JTI0_0)
+; PIC-O32: sll ${{[0-9]+}}, ${{[0-9]+}}, 2
+; PIC-O32: addu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
+; PIC-O32: jr  $[[R1]]
+; PIC-N64: ld $[[R0:[0-9]+]], %got_page($JTI0_0)
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($JTI0_0)
+; PIC-N64: dsll ${{[0-9]+}}, ${{[0-9]+}}, 3
+; PIC-N64: daddu $[[R1:[0-9]+]], ${{[0-9]+}}, $gp
+; PIC-N64: jr  $[[R1]]
   switch i32 %0, label %bb4 [
     i32 0, label %bb5
     i32 1, label %bb1
@@ -18,7 +30,7 @@ entry:
 bb1:                                              ; preds = %entry
   ret i32 2
 
-; CHECK: $BB0_2
+; CHECK: STATIC-O32: $BB0_2
 bb2:                                              ; preds = %entry
   ret i32 0
 
@@ -31,3 +43,23 @@ bb4:                                              ; preds = %entry
 bb5:                                              ; preds = %entry
   ret i32 1
 }
+
+; STATIC-O32: .align  2
+; STATIC-O32: $JTI0_0:
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; STATIC-O32: .4byte
+; PIC-O32: .align  2
+; PIC-O32: $JTI0_0:
+; PIC-O32: .gpword
+; PIC-O32: .gpword
+; PIC-O32: .gpword 
+; PIC-O32: .gpword 
+; PIC-N64: .align  3
+; PIC-N64: $JTI0_0:
+; PIC-N64: .gpdword
+; PIC-N64: .gpdword
+; PIC-N64: .gpdword 
+; PIC-N64: .gpdword 
+
diff --git a/test/CodeGen/Mips/2010-11-09-CountLeading.ll b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
index c592b311782f..6174500d3e0b 100644
--- a/test/CodeGen/Mips/2010-11-09-CountLeading.ll
+++ b/test/CodeGen/Mips/2010-11-09-CountLeading.ll
@@ -3,16 +3,16 @@
 ; CHECK: clz $2, $4
 define i32 @t1(i32 %X) nounwind readnone {
 entry:
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X, i1 true)
   ret i32 %tmp1
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 
 ; CHECK: clz $2, $4
 define i32 @t2(i32 %X) nounwind readnone {
 entry:
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X, i1 true)
   ret i32 %tmp1
 }
 
@@ -20,7 +20,7 @@ entry:
 define i32 @t3(i32 %X) nounwind readnone {
 entry:
   %neg = xor i32 %X, -1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg, i1 true)
   ret i32 %tmp1
 }
 
@@ -28,6 +28,6 @@ entry:
 define i32 @t4(i32 %X) nounwind readnone {
 entry:
   %neg = xor i32 %X, -1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg, i1 true)
   ret i32 %tmp1
 }
diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll
index 6de6b7781b24..7de7fa6f6bdb 100644
--- a/test/CodeGen/Mips/blockaddr.ll
+++ b/test/CodeGen/Mips/blockaddr.ll
@@ -1,5 +1,9 @@
-; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
-; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
 
 @reg = common global i8* null, align 4
 
@@ -8,14 +12,30 @@ entry:
   ret i8* %x
 }
 
-; CHECK-PIC: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
-; CHECK-PIC: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])($gp)
-; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
-; CHECK-STATIC: lui  $[[R2:[0-9]+]], %hi($tmp[[T0:[0-9]+]])
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T0]])
-; CHECK-STATIC: lui   $[[R3:[0-9]+]], %hi($tmp[[T1:[0-9]+]])
-; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T1]])
+; PIC-O32: lw  $[[R0:[0-9]+]], %got($tmp[[T0:[0-9]+]])
+; PIC-O32: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp[[T0]])
+; PIC-O32: lw  $[[R1:[0-9]+]], %got($tmp[[T1:[0-9]+]])
+; PIC-O32: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp[[T1]])
+; STATIC-O32: lui  $[[R2:[0-9]+]], %hi($tmp[[T2:[0-9]+]])
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]])
+; STATIC-O32: lui   $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]])
+; STATIC-O32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]])
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]])
+; PIC-N32: addiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]])
+; PIC-N32: lw  $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]])
+; PIC-N32: addiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]])
+; STATIC-N32: lui  $[[R2:[0-9]+]], %hi($tmp[[T2:[0-9]+]])
+; STATIC-N32: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp[[T2]])
+; STATIC-N32: lui   $[[R3:[0-9]+]], %hi($tmp[[T3:[0-9]+]])
+; STATIC-N32: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp[[T3]])
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page($tmp[[T0:[0-9]+]])
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R0]], %got_ofst($tmp[[T0]])
+; PIC-N64: ld  $[[R1:[0-9]+]], %got_page($tmp[[T1:[0-9]+]])
+; PIC-N64: daddiu ${{[0-9]+}}, $[[R1]], %got_ofst($tmp[[T1]])
+; STATIC-N64: ld  $[[R2:[0-9]+]], %got_page($tmp[[T2:[0-9]+]])
+; STATIC-N64: daddiu ${{[0-9]+}}, $[[R2]], %got_ofst($tmp[[T2]])
+; STATIC-N64: ld  $[[R3:[0-9]+]], %got_page($tmp[[T3:[0-9]+]])
+; STATIC-N64: daddiu ${{[0-9]+}}, $[[R3]], %got_ofst($tmp[[T3]])
 define void @f() nounwind {
 entry:
   %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
diff --git a/test/CodeGen/Mips/br-jmp.ll b/test/CodeGen/Mips/br-jmp.ll
new file mode 100644
index 000000000000..1b5513ab394d
--- /dev/null
+++ b/test/CodeGen/Mips/br-jmp.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
+; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
+
+define void @count(i32 %x, i32 %y, i32 %z) noreturn nounwind readnone {
+entry:
+  br label %bosco
+
+bosco:                                            ; preds = %bosco, %entry
+  br label %bosco
+}
+
+; CHECK-PIC: b	$BB0_1
+; CHECK-STATIC: j	$BB0_1
diff --git a/test/CodeGen/Mips/bswap.ll b/test/CodeGen/Mips/bswap.ll
new file mode 100644
index 000000000000..a8fc2cdc7431
--- /dev/null
+++ b/test/CodeGen/Mips/bswap.ll
@@ -0,0 +1,25 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=MIPS32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 | FileCheck %s -check-prefix=MIPS64
+
+define i32 @bswap32(i32 %x) nounwind readnone {
+entry:
+; MIPS32: bswap32:
+; MIPS32: wsbh $[[R0:[0-9]+]]
+; MIPS32: rotr ${{[0-9]+}}, $[[R0]], 16
+  %or.3 = call i32 @llvm.bswap.i32(i32 %x)
+  ret i32 %or.3
+}
+
+define i64 @bswap64(i64 %x) nounwind readnone {
+entry:
+; MIPS64: bswap64:
+; MIPS64: dsbh $[[R0:[0-9]+]]
+; MIPS64: dshd ${{[0-9]+}}, $[[R0]]
+  %or.7 = call i64 @llvm.bswap.i64(i64 %x)
+  ret i64 %or.7
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll
index 7851ba90d6b1..03254a9a799a 100755
--- a/test/CodeGen/Mips/cmov.ll
+++ b/test/CodeGen/Mips/cmov.ll
@@ -1,11 +1,14 @@
-; RUN: llc -march=mips < %s | FileCheck %s
-; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s
+; RUN: llc -march=mips < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips -regalloc=basic < %s | FileCheck %s -check-prefix=O32
+; RUN: llc -march=mips64el -mcpu=mips64 -mattr=n64 < %s | FileCheck %s -check-prefix=N64
 
 @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
 @i3 = common global i32* null, align 4
 
-; CHECK:  addiu ${{[0-9]+}}, $gp, %got(i1)
-; CHECK:  lw  ${{[0-9]+}}, %got(i3)($gp)
+; O32:  lw  ${{[0-9]+}}, %got(i3)($gp)
+; O32:  addiu ${{[0-9]+}}, $gp, %got(i1)
+; N64:  ld  ${{[0-9]+}}, %got_disp(i3)($gp)
+; N64:  daddiu ${{[0-9]+}}, $gp, %got_disp(i1)
 define i32* @cmov1(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
@@ -17,10 +20,14 @@ entry:
 @c = global i32 1, align 4
 @d = global i32 0, align 4
 
-; CHECK: cmov2:
-; CHECK: addiu $[[R0:[0-9]+]], $gp, %got(c)
-; CHECK: addiu $[[R1:[0-9]+]], $gp, %got(d)
-; CHECK: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+; O32: cmov2:
+; O32: addiu $[[R1:[0-9]+]], $gp, %got(d)
+; O32: addiu $[[R0:[0-9]+]], $gp, %got(c)
+; O32: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
+; N64: cmov2:
+; N64: daddiu $[[R1:[0-9]+]], $gp, %got_disp(d)
+; N64: daddiu $[[R0:[0-9]+]], $gp, %got_disp(c)
+; N64: movn  $[[R1]], $[[R0]], ${{[0-9]+}}
 define i32 @cmov2(i32 %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
diff --git a/test/CodeGen/Mips/cprestore.ll b/test/CodeGen/Mips/cprestore.ll
index 391f5c714dbb..57d022f47c82 100644
--- a/test/CodeGen/Mips/cprestore.ll
+++ b/test/CodeGen/Mips/cprestore.ll
@@ -1,11 +1,9 @@
-; DISABLED: llc -march=mipsel < %s | FileCheck %s
-; RUN: false
-
-; byval is currently unsupported.
-; XFAIL: *
+; RUN: llc -march=mipsel < %s | FileCheck %s
 
 ; CHECK: .set macro
+; CHECK: .set at
 ; CHECK-NEXT: .cprestore
+; CHECK: .set noat
 ; CHECK-NEXT: .set nomacro
 
 %struct.S = type { [16384 x i32] }
diff --git a/test/CodeGen/Mips/dg.exp b/test/CodeGen/Mips/dg.exp
deleted file mode 100644
index adb2cac9a6b0..000000000000
--- a/test/CodeGen/Mips/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Mips] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index 9cd34131a131..c3facdbc5556 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -10,15 +10,11 @@ entry:
 ; CHECK-EL:  .cfi_def_cfa_offset
 ; CHECK-EL:  sdc1 $f20
 ; CHECK-EL:  sw  $ra
-; CHECK-EL:  sw  $17
-; CHECK-EL:  sw  $16
 ; CHECK-EL:  .cfi_offset 52, -8
 ; CHECK-EL:  .cfi_offset 53, -4
 ; CHECK-EB:  .cfi_offset 53, -8
 ; CHECK-EB:  .cfi_offset 52, -4
 ; CHECK-EL:  .cfi_offset 31, -12
-; CHECK-EL:  .cfi_offset 17, -16
-; CHECK-EL:  .cfi_offset 16, -20
 ; CHECK-EL:  .cprestore 
 
   %exception = tail call i8* @__cxa_allocate_exception(i32 8) nounwind
@@ -58,16 +54,10 @@ unreachable:                                      ; preds = %entry
 
 declare i8* @__cxa_allocate_exception(i32)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
-declare void @llvm.eh.resume(i8*, i32)
-
 declare void @__cxa_throw(i8*, i8*, i8*)
 
 declare i8* @__cxa_begin_catch(i8*)
diff --git a/test/CodeGen/Mips/extins.ll b/test/CodeGen/Mips/extins.ll
index 69f53e503f6d..a164f7047b5c 100644
--- a/test/CodeGen/Mips/extins.ll
+++ b/test/CodeGen/Mips/extins.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
 
 define i32 @ext0_5_9(i32 %s, i32 %pos, i32 %sz) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/fabs.ll b/test/CodeGen/Mips/fabs.ll
new file mode 100644
index 000000000000..b296ab390d56
--- /dev/null
+++ b/test/CodeGen/Mips/fabs.ll
@@ -0,0 +1,52 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 -enable-no-nans-fp-math | FileCheck %s -check-prefix=NO-NAN
+
+define float @foo0(float %a) nounwind readnone {
+entry:
+
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: mtc1 $[[AND]], $f0
+
+; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
+; 32R2: mtc1 $[[INS]], $f0
+
+; NO-NAN: abs.s
+
+  %call = tail call float @fabsf(float %a) nounwind readnone
+  ret float %call
+}
+
+declare float @fabsf(float) nounwind readnone
+
+define double @foo1(double %a) nounwind readnone {
+entry:
+
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: mtc1 $[[AND]], $f1
+
+; 32R2: ins  $[[INS:[0-9]+]], $zero, 31, 1
+; 32R2: mtc1 $[[INS]], $f1
+
+; 64: daddiu  $[[T0:[0-9]+]], $zero, 1
+; 64: dsll    $[[T1:[0-9]+]], ${{[0-9]+}}, 63
+; 64: daddiu  $[[MSK0:[0-9]+]], $[[T1]], -1
+; 64: and     $[[AND:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: dmtc1   $[[AND]], $f0
+
+; 64R2: dins  $[[INS:[0-9]+]], $zero, 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
+; NO-NAN: abs.d
+
+  %call = tail call double @fabs(double %a) nounwind readnone
+  ret double %call
+}
+
+declare double @fabs(double) nounwind readnone
diff --git a/test/CodeGen/Mips/fcopysign-f32-f64.ll b/test/CodeGen/Mips/fcopysign-f32-f64.ll
new file mode 100644
index 000000000000..b36473d6f57a
--- /dev/null
+++ b/test/CodeGen/Mips/fcopysign-f32-f64.ll
@@ -0,0 +1,50 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
+
+declare double @copysign(double, double) nounwind readnone
+
+declare float @copysignf(float, float) nounwind readnone
+
+define float @func2(float %d, double %f) nounwind readnone {
+entry:
+; 64: func2
+; 64: lui  $[[T0:[0-9]+]], 32767
+; 64: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 64: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 63
+; 64: sll  $[[SLL:[0-9]+]], ${{[0-9]+}}, 31
+; 64: or   $[[OR:[0-9]+]], $[[AND0]], $[[SLL]]
+; 64: mtc1 $[[OR]], $f0
+
+; 64R2: dext ${{[0-9]+}}, ${{[0-9]+}}, 63, 1
+; 64R2: ins  $[[INS:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 64R2: mtc1 $[[INS]], $f0
+
+  %add = fadd float %d, 1.000000e+00
+  %conv = fptrunc double %f to float
+  %call = tail call float @copysignf(float %add, float %conv) nounwind readnone
+  ret float %call
+}
+
+define double @func3(double %d, float %f) nounwind readnone {
+entry:
+
+; 64: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64: dsll   $[[T1:[0-9]+]], $[[T0]], 63
+; 64: daddiu $[[MSK0:[0-9]+]], $[[T1]], -1
+; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: srl    ${{[0-9]+}}, ${{[0-9]+}}, 31
+; 64: dsll   $[[DSLL:[0-9]+]], ${{[0-9]+}}, 63
+; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[DSLL]]
+; 64: dmtc1  $[[OR]], $f0
+
+; 64R2: ext   ${{[0-9]+}}, ${{[0-9]+}}, 31, 1
+; 64R2: dins  $[[INS:[0-9]+]], ${{[0-9]+}}, 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
+  %add = fadd double %d, 1.000000e+00
+  %conv = fpext float %f to double
+  %call = tail call double @copysign(double %add, double %conv) nounwind readnone
+  ret double %call
+}
+
diff --git a/test/CodeGen/Mips/fcopysign.ll b/test/CodeGen/Mips/fcopysign.ll
index 79f956d719c5..1c57eca3c9ec 100644
--- a/test/CodeGen/Mips/fcopysign.ll
+++ b/test/CodeGen/Mips/fcopysign.ll
@@ -1,34 +1,35 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-EL
-; RUN: llc  < %s -march=mips | FileCheck %s -check-prefix=CHECK-EB
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s -check-prefix=32
+; RUN: llc  < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2
 
 define double @func0(double %d0, double %d1) nounwind readnone {
 entry:
-; CHECK-EL: func0:
-; CHECK-EL: lui $[[T0:[0-9]+]], 32767
-; CHECK-EL: lui $[[T1:[0-9]+]], 32768
-; CHECK-EL: mfc1 $[[HI0:[0-9]+]], $f13
-; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EL: mfc1 $[[HI1:[0-9]+]], $f15
-; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EL: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
-; CHECK-EL: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
-; CHECK-EL: mfc1 $[[LO0:[0-9]+]], $f12
-; CHECK-EL: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; CHECK-EL: mtc1 $[[LO0]], $f0
-; CHECK-EL: mtc1 $[[OR]], $f1
 ;
-; CHECK-EB: lui $[[T0:[0-9]+]], 32767
-; CHECK-EB: lui $[[T1:[0-9]+]], 32768
-; CHECK-EB: mfc1 $[[HI0:[0-9]+]], $f12
-; CHECK-EB: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EB: mfc1 $[[HI1:[0-9]+]], $f14
-; CHECK-EB: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EB: and $[[AND0:[0-9]+]], $[[HI0]], $[[MSK0]]
-; CHECK-EB: and $[[AND1:[0-9]+]], $[[HI1]], $[[MSK1]]
-; CHECK-EB: or  $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
-; CHECK-EB: mfc1 $[[LO0:[0-9]+]], $f13
-; CHECK-EB: mtc1 $[[OR]], $f0
-; CHECK-EB: mtc1 $[[LO0]], $f1
+; 32: lui  $[[MSK1:[0-9]+]], 32768
+; 32: and  $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: or   $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 32: mtc1 $[[OR]], $f1
+
+; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
+; 32R2: mtc1 $[[INS]], $f1
+
+; 64: daddiu $[[T0:[0-9]+]], $zero, 1
+; 64: dsll   $[[MSK1:[0-9]+]], $[[T0]], 63
+; 64: and    $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 64: daddiu $[[MSK0:[0-9]+]], $[[MSK1]], -1
+; 64: and    $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 64: or     $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 64: dmtc1  $[[OR]], $f0
+
+; 64R2: dext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 63, 1
+; 64R2: dins  $[[INS:[0-9]+]], $[[EXT]], 63, 1
+; 64R2: dmtc1 $[[INS]], $f0
+
   %call = tail call double @copysign(double %d0, double %d1) nounwind readnone
   ret double %call
 }
@@ -37,19 +38,22 @@ declare double @copysign(double, double) nounwind readnone
 
 define float @func1(float %f0, float %f1) nounwind readnone {
 entry:
-; CHECK-EL: func1:
-; CHECK-EL: lui $[[T0:[0-9]+]], 32767
-; CHECK-EL: lui $[[T1:[0-9]+]], 32768
-; CHECK-EL: mfc1 $[[ARG0:[0-9]+]], $f12
-; CHECK-EL: ori $[[MSK0:[0-9]+]], $[[T0]], 65535
-; CHECK-EL: mfc1 $[[ARG1:[0-9]+]], $f14
-; CHECK-EL: ori $[[MSK1:[0-9]+]], $[[T1]], 0
-; CHECK-EL: and $[[T2:[0-9]+]], $[[ARG0]], $[[MSK0]]
-; CHECK-EL: and $[[T3:[0-9]+]], $[[ARG1]], $[[MSK1]]
-; CHECK-EL: or  $[[T4:[0-9]+]], $[[T2]], $[[T3]]
-; CHECK-EL: mtc1 $[[T4]], $f0
+
+; 32: lui  $[[MSK1:[0-9]+]], 32768
+; 32: and  $[[AND1:[0-9]+]], ${{[0-9]+}}, $[[MSK1]]
+; 32: lui  $[[T0:[0-9]+]], 32767
+; 32: ori  $[[MSK0:[0-9]+]], $[[T0]], 65535
+; 32: and  $[[AND0:[0-9]+]], ${{[0-9]+}}, $[[MSK0]]
+; 32: or   $[[OR:[0-9]+]], $[[AND0]], $[[AND1]]
+; 32: mtc1 $[[OR]], $f0
+
+; 32R2: ext  $[[EXT:[0-9]+]], ${{[0-9]+}}, 31, 1
+; 32R2: ins  $[[INS:[0-9]+]], $[[EXT]], 31, 1
+; 32R2: mtc1 $[[INS]], $f0
+
   %call = tail call float @copysignf(float %f0, float %f1) nounwind readnone
   ret float %call
 }
 
 declare float @copysignf(float, float) nounwind readnone
+
diff --git a/test/CodeGen/Mips/fmadd1.ll b/test/CodeGen/Mips/fmadd1.ll
new file mode 100644
index 000000000000..435b419368b3
--- /dev/null
+++ b/test/CodeGen/Mips/fmadd1.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -enable-no-nans-fp-math | FileCheck %s -check-prefix=32R2
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 -enable-no-nans-fp-math | FileCheck %s -check-prefix=64R2
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2NAN
+; RUN: llc < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s -check-prefix=64R2NAN
+
+define float @FOO0float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: madd.s 
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  %add1 = fadd float %add, 0.000000e+00
+  ret float %add1
+}
+
+define float @FOO1float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; CHECK: msub.s 
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  %add = fadd float %sub, 0.000000e+00
+  ret float %add
+}
+
+define float @FOO2float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; 32R2: nmadd.s 
+; 64R2: nmadd.s 
+; 32R2NAN: madd.s 
+; 64R2NAN: madd.s 
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  %sub = fsub float 0.000000e+00, %add
+  ret float %sub
+}
+
+define float @FOO3float(float %a, float %b, float %c) nounwind readnone {
+entry:
+; 32R2: nmsub.s 
+; 64R2: nmsub.s 
+; 32R2NAN: msub.s 
+; 64R2NAN: msub.s 
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  %sub1 = fsub float 0.000000e+00, %sub
+  ret float %sub1
+}
+
+define double @FOO10double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: madd.d
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  %add1 = fadd double %add, 0.000000e+00
+  ret double %add1
+}
+
+define double @FOO11double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; CHECK: msub.d
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  %add = fadd double %sub, 0.000000e+00
+  ret double %add
+}
+
+define double @FOO12double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; 32R2: nmadd.d 
+; 64R2: nmadd.d 
+; 32R2NAN: madd.d 
+; 64R2NAN: madd.d 
+  %mul = fmul double %a, %b
+  %add = fadd double %mul, %c
+  %sub = fsub double 0.000000e+00, %add
+  ret double %sub
+}
+
+define double @FOO13double(double %a, double %b, double %c) nounwind readnone {
+entry:
+; 32R2: nmsub.d 
+; 64R2: nmsub.d 
+; 32R2NAN: msub.d 
+; 64R2NAN: msub.d 
+  %mul = fmul double %a, %b
+  %sub = fsub double %mul, %c
+  %sub1 = fsub double 0.000000e+00, %sub
+  ret double %sub1
+}
diff --git a/test/CodeGen/Mips/fneg.ll b/test/CodeGen/Mips/fneg.ll
new file mode 100644
index 000000000000..b322abdaa23c
--- /dev/null
+++ b/test/CodeGen/Mips/fneg.ll
@@ -0,0 +1,17 @@
+; RUN: llc  < %s -march=mipsel -mcpu=mips32 | FileCheck %s 
+
+define float @foo0(i32 %a, float %d) nounwind readnone {
+entry:
+; CHECK-NOT: neg.s
+  %sub = fsub float -0.000000e+00, %d
+  ret float %sub
+}
+
+define double @foo1(i32 %a, double %d) nounwind readnone {
+entry:
+; CHECK:     foo1
+; CHECK-NOT: neg.d
+; CHECK:     jr
+  %sub = fsub double -0.000000e+00, %d
+  ret double %sub
+}
diff --git a/test/CodeGen/Mips/fp-indexed-ls.ll b/test/CodeGen/Mips/fp-indexed-ls.ll
new file mode 100644
index 000000000000..08bd6e72ae77
--- /dev/null
+++ b/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -0,0 +1,98 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
+
+%struct.S = type <{ [4 x float] }>
+%struct.S2 = type <{ [4 x double] }>
+%struct.S3 = type <{ i8, float }>
+
+@s = external global [4 x %struct.S]
+@gf = external global float
+@gd = external global double
+@s2 = external global [4 x %struct.S2]
+@s3 = external global %struct.S3
+
+define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: lwxc1
+  %arrayidx = getelementptr inbounds float* %b, i32 %o
+  %0 = load float* %arrayidx, align 4
+  ret float %0
+}
+
+define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: ldxc1
+  %arrayidx = getelementptr inbounds double* %b, i32 %o
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+define float @foo2(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: luxc1
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+  %0 = load float* %arrayidx1, align 1
+  ret float %0
+}
+
+define void @foo3(float* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: swxc1
+  %0 = load float* @gf, align 4
+  %arrayidx = getelementptr inbounds float* %b, i32 %o
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo4(double* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: sdxc1
+  %0 = load double* @gd, align 8
+  %arrayidx = getelementptr inbounds double* %b, i32 %o
+  store double %0, double* %arrayidx, align 8
+  ret void
+}
+
+define void @foo5(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: suxc1
+  %0 = load float* @gf, align 4
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
+  store float %0, float* %arrayidx1, align 1
+  ret void
+}
+
+define double @foo6(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: foo6
+; CHECK-NOT: ldxc1
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+  %0 = load double* %arrayidx1, align 1
+  ret double %0
+}
+
+define void @foo7(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: foo7
+; CHECK-NOT: sdxc1
+  %0 = load double* @gd, align 8
+  %arrayidx1 = getelementptr inbounds [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
+  store double %0, double* %arrayidx1, align 1
+  ret void
+}
+
+define float @foo8() nounwind readonly {
+entry:
+; CHECK: foo8
+; CHECK: luxc1
+  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  ret float %0
+}
+
+define void @foo9(float %f) nounwind {
+entry:
+; CHECK: foo9
+; CHECK: suxc1
+  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/fpcmp.ll b/test/CodeGen/Mips/fpcmp.ll
deleted file mode 100644
index 86545e347c14..000000000000
--- a/test/CodeGen/Mips/fpcmp.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc  < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-MIPS32
-
-@g1 = external global i32
-
-define i32 @f(float %f0, float %f1) nounwind {
-entry:
-; CHECK-MIPS32: c.olt.s
-; CHECK-MIPS32: movt
-; CHECK-MIPS32: c.olt.s
-; CHECK-MIPS32: movt
-  %cmp = fcmp olt float %f0, %f1
-  %conv = zext i1 %cmp to i32
-  %tmp2 = load i32* @g1, align 4
-  %add = add nsw i32 %tmp2, %conv
-  store i32 %add, i32* @g1, align 4
-  %cond = select i1 %cmp, i32 10, i32 20
-  ret i32 %cond
-}
diff --git a/test/CodeGen/Mips/frem.ll b/test/CodeGen/Mips/frem.ll
new file mode 100644
index 000000000000..be222b2d9172
--- /dev/null
+++ b/test/CodeGen/Mips/frem.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=mipsel 
+
+define float @fmods(float %x, float %y) {
+entry:
+  %r = frem float %x, %y
+  ret float %r
+}
+
+define double @fmodd(double %x, double %y) {
+entry:
+  %r = frem double %x, %y
+  ret double %r
+}
diff --git a/test/CodeGen/Mips/global-address.ll b/test/CodeGen/Mips/global-address.ll
new file mode 100644
index 000000000000..0d49a7424ad6
--- /dev/null
+++ b/test/CodeGen/Mips/global-address.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-O32
+; RUN: llc -march=mipsel -relocation-model=static -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-O32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n32 -relocation-model=static  -mtriple=mipsel-linux-gnu < %s | FileCheck %s -check-prefix=STATIC-N32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC-N64
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC-N64
+
+@s1 = internal unnamed_addr global i32 8, align 4
+@g1 = external global i32
+
+define void @foo() nounwind {
+entry:
+; PIC-O32: lw  $[[R0:[0-9]+]], %got(s1)
+; PIC-O32: lw  ${{[0-9]+}}, %lo(s1)($[[R0]])
+; PIC-O32: lw  ${{[0-9]+}}, %got(g1)
+; STATIC-O32: lui $[[R1:[0-9]+]], %hi(s1)
+; STATIC-O32: lw  ${{[0-9]+}}, %lo(s1)($[[R1]])
+; STATIC-O32: lui $[[R2:[0-9]+]], %hi(g1)
+; STATIC-O32: lw  ${{[0-9]+}}, %lo(g1)($[[R2]])
+
+; PIC-N32: lw  $[[R0:[0-9]+]], %got_page(s1)
+; PIC-N32: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R0]])
+; PIC-N32: lw  ${{[0-9]+}}, %got_disp(g1)
+; STATIC-N32: lui $[[R1:[0-9]+]], %hi(s1)
+; STATIC-N32: lw  ${{[0-9]+}}, %lo(s1)($[[R1]])
+; STATIC-N32: lui $[[R2:[0-9]+]], %hi(g1)
+; STATIC-N32: lw  ${{[0-9]+}}, %lo(g1)($[[R2]])
+
+; PIC-N64: ld  $[[R0:[0-9]+]], %got_page(s1)
+; PIC-N64: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R0]])
+; PIC-N64: ld  ${{[0-9]+}}, %got_disp(g1)
+; STATIC-N64: ld  $[[R1:[0-9]+]], %got_page(s1)
+; STATIC-N64: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R1]])
+; STATIC-N64: ld  ${{[0-9]+}}, %got_disp(g1)
+
+  %0 = load i32* @s1, align 4
+  tail call void @foo1(i32 %0) nounwind
+  %1 = load i32* @g1, align 4
+  store i32 %1, i32* @s1, align 4
+  %add = add nsw i32 %1, 2
+  store i32 %add, i32* @g1, align 4
+  ret void
+}
+
+declare void @foo1(i32)
+
diff --git a/test/CodeGen/Mips/global-pointer-reg.ll b/test/CodeGen/Mips/global-pointer-reg.ll
new file mode 100644
index 000000000000..174d1f9cbe90
--- /dev/null
+++ b/test/CodeGen/Mips/global-pointer-reg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=mipsel -mips-fix-global-base-reg=false | FileCheck %s 
+
+@g0 = external global i32
+@g1 = external global i32
+@g2 = external global i32
+
+define void @foo1() nounwind {
+entry:
+; CHECK-NOT:    .cpload
+; CHECK-NOT:    .cprestore
+; CHECK: lui    $[[R0:[0-9]+]], %hi(_gp_disp)
+; CHECK: addiu  $[[R1:[0-9]+]], $[[R0]], %lo(_gp_disp)
+; CHECK: addu   $[[GP:[0-9]+]], $[[R1]], $25
+; CHECK: lw     ${{[0-9]+}}, %call16(foo2)($[[GP]])
+
+  tail call void @foo2(i32* @g0) nounwind
+  tail call void @foo2(i32* @g1) nounwind
+  tail call void @foo2(i32* @g2) nounwind
+  ret void
+}
+
+declare void @foo2(i32*)
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
index 87cf2a63c5b5..8b1f71b69f19 100644
--- a/test/CodeGen/Mips/i64arg.ll
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -4,21 +4,21 @@ define void @f1(i64 %ll1, float %f, i64 %ll, i32 %i, float %f2) nounwind {
 entry:
 ; CHECK: addu $[[R1:[0-9]+]], $zero, $5
 ; CHECK: addu $[[R0:[0-9]+]], $zero, $4
-; CHECK: lw  $25, %call16(ff1)
 ; CHECK: ori $6, ${{[0-9]+}}, 3855
 ; CHECK: ori $7, ${{[0-9]+}}, 22136
+; CHECK: lw  $25, %call16(ff1)
 ; CHECK: jalr
   tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
 ; CHECK: lw $25, %call16(ff2)
-; CHECK: lw $[[R2:[0-9]+]], 88($sp)
-; CHECK: lw $[[R3:[0-9]+]], 92($sp)
+; CHECK: lw $[[R2:[0-9]+]], 80($sp)
+; CHECK: lw $[[R3:[0-9]+]], 84($sp)
 ; CHECK: addu $4, $zero, $[[R2]]
 ; CHECK: addu $5, $zero, $[[R3]]
 ; CHECK: jalr $25
   tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
   %sub = add nsw i32 %i, -1
-; CHECK: sw $[[R0]], 24($sp)
 ; CHECK: sw $[[R1]], 28($sp)
+; CHECK: sw $[[R0]], 24($sp)
 ; CHECK: lw $25, %call16(ff3)
 ; CHECK: addu $6, $zero, $[[R2]]
 ; CHECK: addu $7, $zero, $[[R3]]
diff --git a/test/CodeGen/Mips/imm.ll b/test/CodeGen/Mips/imm.ll
new file mode 100644
index 000000000000..eea391e8707e
--- /dev/null
+++ b/test/CodeGen/Mips/imm.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+define i32 @foo0() nounwind readnone {
+entry:
+; CHECK: foo0
+; CHECK: lui $[[R0:[0-9]+]], 4660
+; CHECK: ori ${{[0-9]+}}, $[[R0]], 22136
+  ret i32 305419896
+}
+
+define i32 @foo1() nounwind readnone {
+entry:
+; CHECK: foo1
+; CHECK: lui ${{[0-9]+}}, 4660
+; CHECK-NOT: ori
+  ret i32 305397760
+}
+
+define i32 @foo2() nounwind readnone {
+entry:
+; CHECK: foo2
+; CHECK: addiu ${{[0-9]+}}, $zero, 4660
+  ret i32 4660
+}
+
+define i32 @foo17() nounwind readnone {
+entry:
+; CHECK: foo17
+; CHECK: addiu ${{[0-9]+}}, $zero, -32204
+  ret i32 -32204
+}
+
+define i32 @foo18() nounwind readnone {
+entry:
+; CHECK: foo18
+; CHECK: ori ${{[0-9]+}}, $zero, 33332
+  ret i32 33332
+}
diff --git a/test/CodeGen/Mips/indirectcall.ll b/test/CodeGen/Mips/indirectcall.ll
new file mode 100644
index 000000000000..ac565d646674
--- /dev/null
+++ b/test/CodeGen/Mips/indirectcall.ll
@@ -0,0 +1,8 @@
+; RUN: llc  < %s -march=mipsel -relocation-model=static | FileCheck %s 
+
+define void @foo0(void (i32)* nocapture %f1) nounwind {
+entry:
+; CHECK: jalr $25
+  tail call void %f1(i32 13) nounwind
+  ret void
+}
diff --git a/test/CodeGen/Mips/inlineasm64.ll b/test/CodeGen/Mips/inlineasm64.ll
new file mode 100644
index 000000000000..dbce3c394e96
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasm64.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+@gl2 = external global i64
+@gl1 = external global i64
+@gl0 = external global i64
+
+define void @foo1() nounwind {
+entry:
+; CHECK: foo1
+; CHECK: daddu
+  %0 = load i64* @gl1, align 8
+  %1 = load i64* @gl0, align 8
+  %2 = tail call i64 asm "daddu $0, $1, $2", "=r,r,r"(i64 %0, i64 %1) nounwind
+  store i64 %2, i64* @gl2, align 8
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index b5db58a57e38..4b31a88b418a 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -8,10 +8,10 @@ entry:
 ; CHECK: #APP
 ; CHECK: sw $4, 0($[[T0]])
 ; CHECK: #NO_APP
-; CHECK: lw  $[[T1:[0-9]+]], %got(g1)($gp)
 ; CHECK: #APP
 ; CHECK: lw $[[T3:[0-9]+]], 0($[[T0]])
 ; CHECK: #NO_APP
+; CHECK: lw  $[[T1:[0-9]+]], %got(g1)($gp)
 ; CHECK: sw  $[[T3]], 0($[[T1]])
 
   %l1 = alloca i32, align 4
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index 579a319d5f7a..b7c9a9ccbb58 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -1,8 +1,4 @@
-; DISABLED: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
-; RUN: false
-
-; byval is currently unsupported.
-; XFAIL: *
+; RUN: llc -march=mipsel -mcpu=mips32r2 < %s | FileCheck %s
 
 %struct.S1 = type { [65536 x i8] }
 
@@ -11,8 +7,8 @@
 define void @f() nounwind {
 entry:
 ; CHECK:  lui $at, 65534
-; CHECK:  addu  $at, $sp, $at
-; CHECK:  addiu $sp, $at, -24
+; CHECK:  addiu $at, $at, -24
+; CHECK:  addu  $sp, $sp, $at
 ; CHECK:  .cprestore  65536
 
   %agg.tmp = alloca %struct.S1, align 1
diff --git a/test/CodeGen/Mips/lit.local.cfg b/test/CodeGen/Mips/lit.local.cfg
new file mode 100644
index 000000000000..0587d3243e6b
--- /dev/null
+++ b/test/CodeGen/Mips/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Mips/mips64-fp-indexed-ls.ll b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
new file mode 100644
index 000000000000..09745fb8f61c
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-fp-indexed-ls.ll
@@ -0,0 +1,110 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 < %s | FileCheck %s
+
+%struct.S = type <{ [4 x float] }>
+%struct.S2 = type <{ [4 x double] }>
+%struct.S3 = type <{ i8, float }>
+
+@s = external global [4 x %struct.S]
+@gf = external global float
+@gd = external global double
+@s2 = external global [4 x %struct.S2]
+@s3 = external global %struct.S3
+
+define float @foo0(float* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: lwxc1
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
+  %0 = load float* %arrayidx, align 4
+  ret float %0
+}
+
+define double @foo1(double* nocapture %b, i32 %o) nounwind readonly {
+entry:
+; CHECK: ldxc1
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
+  %0 = load double* %arrayidx, align 8
+  ret double %0
+}
+
+define float @foo2(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: luxc1
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  %0 = load float* %arrayidx2, align 1
+  ret float %0
+}
+
+define void @foo3(float* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: swxc1
+  %0 = load float* @gf, align 4
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds float* %b, i64 %idxprom
+  store float %0, float* %arrayidx, align 4
+  ret void
+}
+
+define void @foo4(double* nocapture %b, i32 %o) nounwind {
+entry:
+; CHECK: sdxc1
+  %0 = load double* @gd, align 8
+  %idxprom = zext i32 %o to i64
+  %arrayidx = getelementptr inbounds double* %b, i64 %idxprom
+  store double %0, double* %arrayidx, align 8
+  ret void
+}
+
+define void @foo5(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: suxc1
+  %0 = load float* @gf, align 4
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S]* @s, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  store float %0, float* %arrayidx2, align 1
+  ret void
+}
+
+define double @foo6(i32 %b, i32 %c) nounwind readonly {
+entry:
+; CHECK: foo6
+; CHECK-NOT: ldxc1
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  %0 = load double* %arrayidx2, align 1
+  ret double %0
+}
+
+define void @foo7(i32 %b, i32 %c) nounwind {
+entry:
+; CHECK: foo7
+; CHECK-NOT: sdxc1
+  %0 = load double* @gd, align 8
+  %idxprom = zext i32 %c to i64
+  %idxprom1 = zext i32 %b to i64
+  %arrayidx2 = getelementptr inbounds [4 x %struct.S2]* @s2, i64 0, i64 %idxprom1, i32 0, i64 %idxprom
+  store double %0, double* %arrayidx2, align 1
+  ret void
+}
+
+define float @foo8() nounwind readonly {
+entry:
+; CHECK: foo8
+; CHECK: luxc1
+  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
+  ret float %0
+}
+
+define void @foo9(float %f) nounwind {
+entry:
+; CHECK: foo9
+; CHECK: suxc1
+  store float %f, float* getelementptr inbounds (%struct.S3* @s3, i64 0, i32 1), align 1
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/mips64countleading.ll b/test/CodeGen/Mips/mips64countleading.ll
new file mode 100644
index 000000000000..b2b67e51ade0
--- /dev/null
+++ b/test/CodeGen/Mips/mips64countleading.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i64 @t1(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclz
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
+  ret i64 %tmp1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i64 @t3(i64 %X) nounwind readnone {
+entry:
+; CHECK: dclo 
+  %neg = xor i64 %X, -1
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
+  ret i64 %tmp1
+}
+
diff --git a/test/CodeGen/Mips/mips64directive.ll b/test/CodeGen/Mips/mips64directive.ll
new file mode 100644
index 000000000000..fa81b729e9c8
--- /dev/null
+++ b/test/CodeGen/Mips/mips64directive.ll
@@ -0,0 +1,11 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+
+@gl = global i64 1250999896321, align 8
+
+; CHECK: 8byte
+define i64 @foo1() nounwind readonly {
+entry:
+  %0 = load i64* @gl, align 8
+  ret i64 %0
+}
+
diff --git a/test/CodeGen/Mips/mips64ext.ll b/test/CodeGen/Mips/mips64ext.ll
new file mode 100644
index 000000000000..02a35f8e6ed7
--- /dev/null
+++ b/test/CodeGen/Mips/mips64ext.ll
@@ -0,0 +1,26 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s 
+
+define i64 @zext64_32(i32 %a) nounwind readnone {
+entry:
+; CHECK: addiu $[[R0:[0-9]+]], ${{[0-9]+}}, 2
+; CHECK: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; CHECK: dsrl ${{[0-9]+}}, $[[R1]], 32
+  %add = add i32 %a, 2
+  %conv = zext i32 %add to i64
+  ret i64 %conv
+}
+
+define i64 @sext64_32(i32 %a) nounwind readnone {
+entry:
+; CHECK: sll ${{[0-9]+}}, ${{[0-9]+}}, 0
+  %conv = sext i32 %a to i64
+  ret i64 %conv
+}
+
+define i64 @i64_float(float %f) nounwind readnone {
+entry:
+; CHECK: trunc.l.s 
+  %conv = fptosi float %f to i64
+  ret i64 %conv
+}
+
diff --git a/test/CodeGen/Mips/mips64extins.ll b/test/CodeGen/Mips/mips64extins.ll
new file mode 100644
index 000000000000..14f92ca86947
--- /dev/null
+++ b/test/CodeGen/Mips/mips64extins.ll
@@ -0,0 +1,55 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64r2 -mattr=n64 | FileCheck %s 
+
+define i64 @dext(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 5, 10
+  %shr = lshr i64 %i, 5
+  %and = and i64 %shr, 1023
+  ret i64 %and
+}
+
+define i64 @dextm(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 5, 34
+  %shr = lshr i64 %i, 5
+  %and = and i64 %shr, 17179869183
+  ret i64 %and
+}
+
+define i64 @dextu(i64 %i) nounwind readnone {
+entry:
+; CHECK: dext ${{[0-9]+}}, ${{[0-9]+}}, 34, 6
+  %shr = lshr i64 %i, 34
+  %and = and i64 %shr, 63
+  ret i64 %and
+}
+
+define i64 @dins(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 8, 10
+  %shl2 = shl i64 %j, 8
+  %and = and i64 %shl2, 261888
+  %and3 = and i64 %i, -261889
+  %or = or i64 %and3, %and
+  ret i64 %or
+}
+
+define i64 @dinsm(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 10, 33
+  %shl4 = shl i64 %j, 10
+  %and = and i64 %shl4, 8796093021184
+  %and5 = and i64 %i, -8796093021185
+  %or = or i64 %and5, %and
+  ret i64 %or
+}
+
+define i64 @dinsu(i64 %i, i64 %j) nounwind readnone {
+entry:
+; CHECK: dins ${{[0-9]+}}, ${{[0-9]+}}, 40, 13
+  %shl4 = shl i64 %j, 40
+  %and = and i64 %shl4, 9006099743113216
+  %and5 = and i64 %i, -9006099743113217
+  %or = or i64 %and5, %and
+  ret i64 %or
+}
diff --git a/test/CodeGen/Mips/mips64fpimm0.ll b/test/CodeGen/Mips/mips64fpimm0.ll
new file mode 100644
index 000000000000..17716da0c670
--- /dev/null
+++ b/test/CodeGen/Mips/mips64fpimm0.ll
@@ -0,0 +1,7 @@
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s
+
+define double @foo1() nounwind readnone {
+entry:
+; CHECK: dmtc1 $zero
+  ret double 0.000000e+00
+}
diff --git a/test/CodeGen/Mips/mips64fpldst.ll b/test/CodeGen/Mips/mips64fpldst.ll
index b8f3ca9d7985..24647b20bf2e 100644
--- a/test/CodeGen/Mips/mips64fpldst.ll
+++ b/test/CodeGen/Mips/mips64fpldst.ll
@@ -1,5 +1,5 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
 
 @f0 = common global float 0.000000e+00, align 4
 @d0 = common global double 0.000000e+00, align 8
@@ -12,7 +12,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N64: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfl1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(f0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N32: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load float* @f0, align 4
   ret float %0
@@ -24,7 +24,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N64: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfl2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(d0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N32: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load double* @d0, align 8 
   ret double %0
@@ -36,7 +36,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N64: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfs1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(f0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N32: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load float* @f1, align 4 
   store float %0, float* @f0, align 4 
@@ -49,7 +49,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N64: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
 ; CHECK-N32: funcfs2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(d0)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N32: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
   %0 = load double* @d1, align 8 
   store double %0, double* @d0, align 8 
diff --git a/test/CodeGen/Mips/mips64imm.ll b/test/CodeGen/Mips/mips64imm.ll
new file mode 100644
index 000000000000..1fc8636c480b
--- /dev/null
+++ b/test/CodeGen/Mips/mips64imm.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i32 @foo1() nounwind readnone {
+entry:
+; CHECK: foo1
+; CHECK: lui ${{[0-9]+}}, 4660
+; CHECK-NOT: ori
+  ret i32 305397760
+}
+
+define i64 @foo3() nounwind readnone {
+entry:
+; CHECK: foo3
+; CHECK: lui $[[R0:[0-9]+]], 4660
+; CHECK: daddiu ${{[0-9]+}}, $[[R0]], 22136
+  ret i64 305419896
+}
+
+define i64 @foo6() nounwind readnone {
+entry:
+; CHECK: foo6
+; CHECK: ori ${{[0-9]+}}, $zero, 33332
+  ret i64 33332
+}
+
+define i64 @foo7() nounwind readnone {
+entry:
+; CHECK: foo7
+; CHECK: daddiu ${{[0-9]+}}, $zero, -32204
+  ret i64 -32204
+}
+
+define i64 @foo9() nounwind readnone {
+entry:
+; CHECK: foo9
+; CHECK: lui $[[R0:[0-9]+]], 583
+; CHECK: daddiu $[[R1:[0-9]+]], $[[R0]], -30001
+; CHECK: dsll $[[R2:[0-9]+]], $[[R1]], 18
+; CHECK: daddiu $[[R3:[0-9]+]], $[[R2]], 18441
+; CHECK: dsll $[[R4:[0-9]+]], $[[R3]], 17
+; CHECK: daddiu ${{[0-9]+}}, $[[R4]], 13398
+  ret i64 1311768467284833366
+}
+
+define i64 @foo10() nounwind readnone {
+entry:
+; CHECK: foo10
+; CHECK: lui $[[R0:[0-9]+]], 34661
+; CHECK: daddiu  ${{[0-9]+}}, $[[R0]], 17185
+  ret i64 -8690466096928522240
+}
+
diff --git a/test/CodeGen/Mips/mips64instrs.ll b/test/CodeGen/Mips/mips64instrs.ll
index c9812a276992..041831149057 100644
--- a/test/CodeGen/Mips/mips64instrs.ll
+++ b/test/CodeGen/Mips/mips64instrs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips64el -mcpu=mips64r1 < %s | FileCheck %s
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
 
 define i64 @f0(i64 %a0, i64 %a1) nounwind readnone {
 entry:
@@ -116,12 +116,12 @@ entry:
   ret i64 %rem
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
 
 define i64 @f18(i64 %X) nounwind readnone {
 entry:
 ; CHECK: dclz $2, $4
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %X, i1 true)
   ret i64 %tmp1
 }
 
@@ -129,7 +129,7 @@ define i64 @f19(i64 %X) nounwind readnone {
 entry:
 ; CHECK: dclo $2, $4
   %neg = xor i64 %X, -1
-  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg)
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %neg, i1 true)
   ret i64 %tmp1
 }
 
diff --git a/test/CodeGen/Mips/mips64intldst.ll b/test/CodeGen/Mips/mips64intldst.ll
index fdf496b19189..0e310a8670f9 100644
--- a/test/CodeGen/Mips/mips64intldst.ll
+++ b/test/CodeGen/Mips/mips64intldst.ll
@@ -1,5 +1,5 @@
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
-; RUN: llc  < %s -march=mips64el -mcpu=mips64r1 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n64 | FileCheck %s -check-prefix=CHECK-N64
+; RUN: llc  < %s -march=mips64el -mcpu=mips64 -mattr=n32 | FileCheck %s -check-prefix=CHECK-N32
 
 @c = common global i8 0, align 4
 @s = common global i16 0, align 4
@@ -16,7 +16,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N64: lb ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(c)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N32: lb ${{[0-9]+}}, 0($[[R0]])
   %0 = load i8* @c, align 4
   %conv = sext i8 %0 to i64
@@ -29,7 +29,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N64: lh ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(s)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N32: lh ${{[0-9]+}}, 0($[[R0]])
   %0 = load i16* @s, align 4
   %conv = sext i16 %0 to i64
@@ -42,7 +42,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N64: lw ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(i)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N32: lw ${{[0-9]+}}, 0($[[R0]])
   %0 = load i32* @i, align 4
   %conv = sext i32 %0 to i64
@@ -55,7 +55,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N64: ld ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: func4
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(l)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N32: ld ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l, align 8
   ret i64 %0
@@ -67,7 +67,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(uc)
 ; CHECK-N64: lbu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(uc)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(uc)
 ; CHECK-N32: lbu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i8* @uc, align 4
   %conv = zext i8 %0 to i64
@@ -80,7 +80,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(us)
 ; CHECK-N64: lhu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(us)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(us)
 ; CHECK-N32: lhu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i16* @us, align 4
   %conv = zext i16 %0 to i64
@@ -93,7 +93,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(ui)
 ; CHECK-N64: lwu ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: ufunc3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(ui)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(ui)
 ; CHECK-N32: lwu ${{[0-9]+}}, 0($[[R0]])
   %0 = load i32* @ui, align 4
   %conv = zext i32 %0 to i64
@@ -106,7 +106,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N64: sb ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc1
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(c)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N32: sb ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i8
@@ -120,7 +120,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N64: sh ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc2
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(s)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N32: sh ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i16
@@ -134,7 +134,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N64: sw ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc3
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(i)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N32: sw ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   %conv = trunc i64 %0 to i32
@@ -148,7 +148,7 @@ entry:
 ; CHECK-N64: ld $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N64: sd ${{[0-9]+}}, 0($[[R0]])
 ; CHECK-N32: sfunc4
-; CHECK-N32: lw $[[R0:[0-9]+]], %got(l)
+; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N32: sd ${{[0-9]+}}, 0($[[R0]])
   %0 = load i64* @l1, align 8
   store i64 %0, i64* @l, align 8
diff --git a/test/CodeGen/Mips/mips64lea.ll b/test/CodeGen/Mips/mips64lea.ll
new file mode 100644
index 000000000000..54d504f92266
--- /dev/null
+++ b/test/CodeGen/Mips/mips64lea.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define void @foo3() nounwind {
+entry:
+; CHECK: daddiu ${{[0-9]+}}, $sp
+  %a = alloca i32, align 4
+  call void @foo1(i32* %a) nounwind
+  ret void
+}
+
+declare void @foo1(i32*)
+
diff --git a/test/CodeGen/Mips/mips64muldiv.ll b/test/CodeGen/Mips/mips64muldiv.ll
new file mode 100644
index 000000000000..fd036a2ca9fb
--- /dev/null
+++ b/test/CodeGen/Mips/mips64muldiv.ll
@@ -0,0 +1,49 @@
+; RUN: llc -march=mips64el -mcpu=mips64 < %s | FileCheck %s
+
+define i64 @m0(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: dmult
+; CHECK: mflo
+  %mul = mul i64 %a1, %a0
+  ret i64 %mul
+}
+
+define i64 @m1(i64 %a) nounwind readnone {
+entry:
+; CHECK: dmult
+; CHECK: mfhi
+  %div = sdiv i64 %a, 3
+  ret i64 %div
+}
+
+define i64 @d0(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddivu
+; CHECK: mflo
+  %div = udiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @d1(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddiv
+; CHECK: mflo
+  %div = sdiv i64 %a0, %a1
+  ret i64 %div
+}
+
+define i64 @d2(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddivu
+; CHECK: mfhi
+  %rem = urem i64 %a0, %a1
+  ret i64 %rem
+}
+
+define i64 @d3(i64 %a0, i64 %a1) nounwind readnone {
+entry:
+; CHECK: ddiv
+; CHECK: mfhi
+  %rem = srem i64 %a0, %a1
+  ret i64 %rem
+}
diff --git a/test/CodeGen/Mips/mips64shift.ll b/test/CodeGen/Mips/mips64shift.ll
index cc5e50856147..45d1c9532276 100644
--- a/test/CodeGen/Mips/mips64shift.ll
+++ b/test/CodeGen/Mips/mips64shift.ll
@@ -44,21 +44,21 @@ entry:
 
 define i64 @f6(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsll32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsll ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shl = shl i64 %a0, 40
   ret i64 %shl
 }
 
 define i64 @f7(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsra32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsra ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shr = ashr i64 %a0, 40
   ret i64 %shr
 }
 
 define i64 @f8(i64 %a0) nounwind readnone {
 entry:
-; CHECK: dsrl32 ${{[0-9]+}}, ${{[0-9]+}}, 8
+; CHECK: dsrl ${{[0-9]+}}, ${{[0-9]+}}, 40
   %shr = lshr i64 %a0, 40
   ret i64 %shr
 }
@@ -94,7 +94,7 @@ entry:
 
 define i64 @f12(i64 %a0) nounwind readnone {
 entry:
-; CHECK: drotr32 ${{[0-9]+}}, ${{[0-9]+}}, 22
+; CHECK: drotr ${{[0-9]+}}, ${{[0-9]+}}, 54
   %shl = shl i64 %a0, 10
   %shr = lshr i64 %a0, 54
   %or = or i64 %shl, %shr
diff --git a/test/CodeGen/Mips/mipslopat.ll b/test/CodeGen/Mips/mipslopat.ll
index 02798285b499..1f433b9870ce 100644
--- a/test/CodeGen/Mips/mipslopat.ll
+++ b/test/CodeGen/Mips/mipslopat.ll
@@ -6,7 +6,7 @@
 
 define void @simple_vol_file() nounwind {
 entry:
-  %tmp = volatile load i32** @stat_vol_ptr_int, align 4
+  %tmp = load volatile i32** @stat_vol_ptr_int, align 4
   %0 = bitcast i32* %tmp to i8*
   call void @llvm.prefetch(i8* %0, i32 0, i32 0, i32 1)
   %tmp1 = load i32** @stat_ptr_vol_int, align 4
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index e6734808ab77..c5cbc7a66b8c 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -12,20 +12,20 @@ define void @f1() nounwind {
 entry:
 ; CHECK: lw  $[[R1:[0-9]+]], %got(f1.s1)($gp)
 ; CHECK: addiu $[[R0:[0-9]+]], $[[R1]], %lo(f1.s1)
-; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
-; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
-; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
-; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
-; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
 ; CHECK: lw  $[[R6:[0-9]+]], 28($[[R0]])
-; CHECK: sw  $[[R2]], 16($sp)
-; CHECK: sw  $[[R7]], 20($sp)
-; CHECK: sw  $[[R3]], 24($sp)
-; CHECK: sw  $[[R4]], 28($sp)
-; CHECK: sw  $[[R5]], 32($sp)
+; CHECK: lw  $[[R5:[0-9]+]], 24($[[R0]])
+; CHECK: lw  $[[R4:[0-9]+]], 20($[[R0]])
+; CHECK: lw  $[[R3:[0-9]+]], 16($[[R0]])
+; CHECK: lw  $[[R7:[0-9]+]], 12($[[R0]])
+; CHECK: lw  $[[R2:[0-9]+]], 8($[[R0]])
 ; CHECK: sw  $[[R6]], 36($sp)
-; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
+; CHECK: sw  $[[R5]], 32($sp)
+; CHECK: sw  $[[R4]], 28($sp)
+; CHECK: sw  $[[R3]], 24($sp)
+; CHECK: sw  $[[R7]], 20($sp)
+; CHECK: sw  $[[R2]], 16($sp)
 ; CHECK: lw  $7, 4($[[R0]])
+; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
   %agg.tmp10 = alloca %struct.S3, align 4
   call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
   call void @callee2(%struct.S2* byval @f1.s2) nounwind
@@ -44,20 +44,20 @@ declare void @callee3(float, %struct.S3* byval, %struct.S1* byval)
 define void @f2(float %f, %struct.S1* nocapture byval %s1) nounwind {
 entry:
 ; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $6, 64($sp)
 ; CHECK: sw  $7, 68($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: lw  $4, 88($sp)
 ; CHECK: ldc1 $f[[F0:[0-9]+]], 80($sp)
+; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
+; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
 ; CHECK: lw  $[[R2:[0-9]+]], 68($sp)
 ; CHECK: lh  $[[R1:[0-9]+]], 66($sp)
 ; CHECK: lb  $[[R0:[0-9]+]], 64($sp)
-; CHECK: lw  $[[R3:[0-9]+]], 72($sp)
-; CHECK: lw  $[[R4:[0-9]+]], 76($sp)
-; CHECK: lw  $4, 88($sp)
-; CHECK: sw  $[[R3]], 16($sp)
-; CHECK: sw  $[[R4]], 20($sp)
-; CHECK: sw  $[[R2]], 24($sp)
-; CHECK: sw  $[[R1]], 28($sp)
 ; CHECK: sw  $[[R0]], 32($sp)
+; CHECK: sw  $[[R1]], 28($sp)
+; CHECK: sw  $[[R2]], 24($sp)
+; CHECK: sw  $[[R4]], 20($sp)
+; CHECK: sw  $[[R3]], 16($sp)
 ; CHECK: mfc1 $6, $f[[F0]]
 
   %i2 = getelementptr inbounds %struct.S1* %s1, i32 0, i32 5
@@ -81,12 +81,12 @@ declare void @callee4(i32, double, i64, i32, i16 signext, i8 signext, float)
 define void @f3(%struct.S2* nocapture byval %s2) nounwind {
 entry:
 ; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $4, 56($sp)
-; CHECK: sw  $5, 60($sp)
-; CHECK: sw  $6, 64($sp)
 ; CHECK: sw  $7, 68($sp)
-; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $5, 60($sp)
+; CHECK: sw  $4, 56($sp)
 ; CHECK: lw  $4, 56($sp)
+; CHECK: lw  $[[R0:[0-9]+]], 68($sp)
 ; CHECK: sw  $[[R0]], 24($sp)
 
   %arrayidx = getelementptr inbounds %struct.S2* %s2, i32 0, i32 0, i32 0
@@ -100,14 +100,14 @@ entry:
 define void @f4(float %f, %struct.S3* nocapture byval %s3, %struct.S1* nocapture byval %s1) nounwind {
 entry:
 ; CHECK: addiu $sp, $sp, -56
-; CHECK: sw  $5, 60($sp)
-; CHECK: sw  $6, 64($sp)
 ; CHECK: sw  $7, 68($sp)
+; CHECK: sw  $6, 64($sp)
+; CHECK: sw  $5, 60($sp)
+; CHECK: lw  $4, 68($sp)
 ; CHECK: lw  $[[R1:[0-9]+]], 88($sp)
 ; CHECK: lb  $[[R0:[0-9]+]], 60($sp)
-; CHECK: lw  $4, 68($sp)
-; CHECK: sw  $[[R1]], 24($sp)
 ; CHECK: sw  $[[R0]], 32($sp)
+; CHECK: sw  $[[R1]], 24($sp)
 
   %i = getelementptr inbounds %struct.S1* %s1, i32 0, i32 2
   %tmp = load i32* %i, align 4, !tbaa !0
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index 4cc48f098a9e..d1a67fd9f4bf 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -1,19 +1,20 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -march=mips > %t
-; RUN: grep \\\$foo: %t
-; RUN: grep call.*\\\$foo %t
-; RUN: grep \\\$baz: %t
-; RUN: grep lw.*\\\$baz %t
+; RUN: llc -march=mips < %s | FileCheck %s
 
 define private void @foo() {
-        ret void
+; CHECK: foo:
+  ret void
 }
 
 @baz = private global i32 4
 
 define i32 @bar() {
-        call void @foo()
-	%1 = load i32* @baz, align 4
-        ret i32 %1
+; CHECK: bar:
+; CHECK: call16($foo)
+; CHECK: lw $[[R0:[0-9]+]], %got($baz)($
+; CHECK: lw ${{[0-9]+}}, %lo($baz)($[[R0]])
+  call void @foo()
+  %1 = load i32* @baz, align 4
+  ret i32 %1
 }
diff --git a/test/CodeGen/Mips/rotate.ll b/test/CodeGen/Mips/rotate.ll
index 8e27f4aad6eb..4f3cfb7df41c 100644
--- a/test/CodeGen/Mips/rotate.ll
+++ b/test/CodeGen/Mips/rotate.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s
 
 ; CHECK:  rotrv $2, $4
 define i32 @rot0(i32 %a, i32 %b) nounwind readnone {
diff --git a/test/CodeGen/Mips/swzero.ll b/test/CodeGen/Mips/swzero.ll
new file mode 100644
index 000000000000..da1e036eb997
--- /dev/null
+++ b/test/CodeGen/Mips/swzero.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+%struct.unaligned = type <{ i32 }>
+
+define void @zero_u(%struct.unaligned* nocapture %p) nounwind {
+entry:
+; CHECK: usw $zero
+  %x = getelementptr inbounds %struct.unaligned* %p, i32 0, i32 0
+  store i32 0, i32* %x, align 1
+  ret void
+}
+
+define void @zero_a(i32* nocapture %p) nounwind {
+entry:
+; CHECK: sw $zero
+  store i32 0, i32* %p, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/tls.ll b/test/CodeGen/Mips/tls.ll
index b0474b4c4434..a3c4768bb4b5 100644
--- a/test/CodeGen/Mips/tls.ll
+++ b/test/CodeGen/Mips/tls.ll
@@ -1,7 +1,8 @@
 ; RUN: llc -march=mipsel < %s | FileCheck %s -check-prefix=PIC
 ; RUN: llc -march=mipsel -relocation-model=static < %s \
 ; RUN:                             | FileCheck %s -check-prefix=STATIC
-
+; RUN: llc -march=mipsel -relocation-model=static < %s \
+; RUN:   -mips-fix-global-base-reg=false | FileCheck %s -check-prefix=STATICGP
 
 @t1 = thread_local global i32 0, align 4
 
@@ -39,8 +40,32 @@ entry:
 ; PIC:   jalr    $25
 ; PIC:   lw      $2, 0($2)
 
+; STATICGP: lui     $[[R0:[0-9]+]], %hi(__gnu_local_gp)
+; STATICGP: addiu   $[[GP:[0-9]+]], $[[R0]], %lo(__gnu_local_gp)
+; STATICGP: lw      ${{[0-9]+}}, %gottprel(t2)($[[GP]])
+; STATIC:   lui     $gp, %hi(__gnu_local_gp)
+; STATIC:   addiu   $gp, $gp, %lo(__gnu_local_gp)
 ; STATIC:   rdhwr   $3, $29
 ; STATIC:   lw      $[[R0:[0-9]+]], %gottprel(t2)($gp)
 ; STATIC:   addu    $[[R1:[0-9]+]], $3, $[[R0]]
 ; STATIC:   lw      $2, 0($[[R1]])
 }
+
+@f3.i = internal thread_local unnamed_addr global i32 1, align 4
+
+define i32 @f3() nounwind {
+entry:
+; CHECK: f3:
+
+; PIC:   addiu   $4, $gp, %tlsldm(f3.i)
+; PIC:   jalr    $25
+; PIC:   lui     $[[R0:[0-9]+]], %dtprel_hi(f3.i)
+; PIC:   addu    $[[R1:[0-9]+]], $[[R0]], $2
+; PIC:   lw      ${{[0-9]+}}, %dtprel_lo(f3.i)($[[R1]])
+
+  %0 = load i32* @f3.i, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @f3.i, align 4
+  ret i32 %inc
+}
+
diff --git a/test/CodeGen/Mips/unalignedload.ll b/test/CodeGen/Mips/unalignedload.ll
index 433e896d194b..6a087ba46e64 100644
--- a/test/CodeGen/Mips/unalignedload.ll
+++ b/test/CodeGen/Mips/unalignedload.ll
@@ -9,27 +9,27 @@
 
 define void @foo1() nounwind {
 entry:
-; CHECK-EL: lw  $25, %call16(foo2)
 ; CHECK-EL: ulhu  $4, 2
+; CHECK-EL: lw  $25, %call16(foo2)
 ; CHECK-EL: lw  $[[R0:[0-9]+]], %got(s4)
 ; CHECK-EL: lbu $[[R1:[0-9]+]], 6($[[R0]])
-; CHECK-EL: ulhu  $[[R2:[0-9]+]], 4($[[R0]])
 ; CHECK-EL: sll $[[R3:[0-9]+]], $[[R1]], 16
+; CHECK-EL: ulhu  $[[R2:[0-9]+]], 4($[[R0]])
+; CHECK-EL: or  $5, $[[R2]], $[[R3]]
 ; CHECK-EL: ulw $4, 0($[[R0]])
 ; CHECK-EL: lw  $25, %call16(foo4)
-; CHECK-EL: or  $5, $[[R2]], $[[R3]]
 
 ; CHECK-EB: ulhu  $[[R0:[0-9]+]], 2
-; CHECK-EB: lw  $25, %call16(foo2)
 ; CHECK-EB: sll $4, $[[R0]], 16
+; CHECK-EB: lw  $25, %call16(foo2)
 ; CHECK-EB: lw  $[[R1:[0-9]+]], %got(s4)
-; CHECK-EB: ulhu  $[[R2:[0-9]+]], 4($[[R1]])
 ; CHECK-EB: lbu $[[R3:[0-9]+]], 6($[[R1]])
-; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
 ; CHECK-EB: sll $[[R5:[0-9]+]], $[[R3]], 8
+; CHECK-EB: ulhu  $[[R2:[0-9]+]], 4($[[R1]])
+; CHECK-EB: sll $[[R4:[0-9]+]], $[[R2]], 16
+; CHECK-EB: or  $5, $[[R4]], $[[R5]]
 ; CHECK-EB: ulw $4, 0($[[R1]])
 ; CHECK-EB: lw  $25, %call16(foo4)
-; CHECK-EB: or  $5, $[[R4]], $[[R5]]
 
   tail call void @foo2(%struct.S1* byval getelementptr inbounds (%struct.S2* @s2, i32 0, i32 1)) nounwind
   tail call void @foo4(%struct.S4* byval @s4) nounwind
diff --git a/test/CodeGen/Mips/zeroreg.ll b/test/CodeGen/Mips/zeroreg.ll
new file mode 100644
index 000000000000..b890e1dba9fc
--- /dev/null
+++ b/test/CodeGen/Mips/zeroreg.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=mipsel | FileCheck %s
+
+@g1 = external global i32
+
+define i32 @foo0(i32 %s) nounwind readonly {
+entry:
+; CHECK-NOT: addiu
+; CHECK:     movn
+  %tobool = icmp ne i32 %s, 0
+  %0 = load i32* @g1, align 4, !tbaa !0
+  %cond = select i1 %tobool, i32 0, i32 %0
+  ret i32 %cond
+}
+
+define i32 @foo1(i32 %s) nounwind readonly {
+entry:
+; CHECK-NOT: addiu
+; CHECK:     movz
+  %tobool = icmp ne i32 %s, 0
+  %0 = load i32* @g1, align 4, !tbaa !0
+  %cond = select i1 %tobool, i32 %0, i32 0
+  ret i32 %cond
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll
index a643d251f074..f55070af2223 100644
--- a/test/CodeGen/PTX/cvt.ll
+++ b/test/CodeGen/PTX/cvt.ll
@@ -172,9 +172,9 @@ define ptx_device i64 @cvt_i64_f64(double %x) {
 ; f32
 
 define ptx_device float @cvt_f32_preds(i1 %x) {
-; CHECK: mov.b32 %f0, 1065353216;
-; CHECK: mov.b32 %f1, 0;
-; CHECK: selp.f32 %ret{{[0-9]+}}, %f0, %f1, %p{{[0-9]+}};
+; CHECK: mov.b32 %f0, 0;
+; CHECK: mov.b32 %f1, 1065353216;
+; CHECK: selp.f32 %ret{{[0-9]+}}, %f1, %f0, %p{{[0-9]+}};
 ; CHECK: ret;
 	%a = uitofp i1 %x to float
 	ret float %a
@@ -232,9 +232,9 @@ define ptx_device float @cvt_f32_s64(i64 %x) {
 ; f64
 
 define ptx_device double @cvt_f64_preds(i1 %x) {
-; CHECK: mov.b64 %fd0, 4575657221408423936;
-; CHECK: mov.b64 %fd1, 0;
-; CHECK: selp.f64 %ret{{[0-9]+}}, %fd0, %fd1, %p{{[0-9]+}};
+; CHECK: mov.b64 %fd0, 0;
+; CHECK: mov.b64 %fd1, 4575657221408423936;
+; CHECK: selp.f64 %ret{{[0-9]+}}, %fd1, %fd0, %p{{[0-9]+}};
 ; CHECK: ret;
 	%a = uitofp i1 %x to double
 	ret double %a
diff --git a/test/CodeGen/PTX/dg.exp b/test/CodeGen/PTX/dg.exp
deleted file mode 100644
index 2c304b57741e..000000000000
--- a/test/CodeGen/PTX/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PTX] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll
index 81fd33a28f62..e55820dfb0ea 100644
--- a/test/CodeGen/PTX/ld.ll
+++ b/test/CodeGen/PTX/ld.ll
@@ -1,48 +1,48 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-;CHECK: .extern .global .b8 array_i16[20];
+;CHECK: .extern .global .b16 array_i16[10];
 @array_i16 = external global [10 x i16]
 
-;CHECK: .extern .const .b8 array_constant_i16[20];
+;CHECK: .extern .const .b16 array_constant_i16[10];
 @array_constant_i16 = external addrspace(1) constant [10 x i16]
 
-;CHECK: .extern .shared .b8 array_shared_i16[20];
+;CHECK: .extern .shared .b16 array_shared_i16[10];
 @array_shared_i16 = external addrspace(4) global [10 x i16]
 
-;CHECK: .extern .global .b8 array_i32[40];
+;CHECK: .extern .global .b32 array_i32[10];
 @array_i32 = external global [10 x i32]
 
-;CHECK: .extern .const .b8 array_constant_i32[40];
+;CHECK: .extern .const .b32 array_constant_i32[10];
 @array_constant_i32 = external addrspace(1) constant [10 x i32]
 
-;CHECK: .extern .shared .b8 array_shared_i32[40];
+;CHECK: .extern .shared .b32 array_shared_i32[10];
 @array_shared_i32 = external addrspace(4) global [10 x i32]
 
-;CHECK: .extern .global .b8 array_i64[80];
+;CHECK: .extern .global .b64 array_i64[10];
 @array_i64 = external global [10 x i64]
 
-;CHECK: .extern .const .b8 array_constant_i64[80];
+;CHECK: .extern .const .b64 array_constant_i64[10];
 @array_constant_i64 = external addrspace(1) constant [10 x i64]
 
-;CHECK: .extern .shared .b8 array_shared_i64[80];
+;CHECK: .extern .shared .b64 array_shared_i64[10];
 @array_shared_i64 = external addrspace(4) global [10 x i64]
 
-;CHECK: .extern .global .b8 array_float[40];
+;CHECK: .extern .global .b32 array_float[10];
 @array_float = external global [10 x float]
 
-;CHECK: .extern .const .b8 array_constant_float[40];
+;CHECK: .extern .const .b32 array_constant_float[10];
 @array_constant_float = external addrspace(1) constant [10 x float]
 
-;CHECK: .extern .shared .b8 array_shared_float[40];
+;CHECK: .extern .shared .b32 array_shared_float[10];
 @array_shared_float = external addrspace(4) global [10 x float]
 
-;CHECK: .extern .global .b8 array_double[80];
+;CHECK: .extern .global .b64 array_double[10];
 @array_double = external global [10 x double]
 
-;CHECK: .extern .const .b8 array_constant_double[80];
+;CHECK: .extern .const .b64 array_constant_double[10];
 @array_constant_double = external addrspace(1) constant [10 x double]
 
-;CHECK: .extern .shared .b8 array_shared_double[80];
+;CHECK: .extern .shared .b64 array_shared_double[10];
 @array_shared_double = external addrspace(4) global [10 x double]
 
 
diff --git a/test/CodeGen/PTX/lit.local.cfg b/test/CodeGen/PTX/lit.local.cfg
new file mode 100644
index 000000000000..e748f7f05b31
--- /dev/null
+++ b/test/CodeGen/PTX/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'PTX' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/PTX/mad-disabling.ll b/test/CodeGen/PTX/mad-disabling.ll
index ad7b3417ec48..603c3ba69f79 100644
--- a/test/CodeGen/PTX/mad-disabling.ll
+++ b/test/CodeGen/PTX/mad-disabling.ll
@@ -1,8 +1,13 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | grep "mad"
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | grep -v "mad"
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
+; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
+; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
 
 define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
 entry:
+; FMA: mad.rn.f32
+; MUL: mul.rn.f32
+; MUL: add.rn.f32
   %a = fmul float %x, %y
   %b = fadd float %a, %z
   ret float %b
@@ -10,6 +15,9 @@ entry:
 
 define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
 entry:
+; FMA: mad.rn.f64
+; MUL: mul.rn.f64
+; MUL: add.rn.f64
   %a = fmul double %x, %y
   %b = fadd double %a, %z
   ret double %b
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
index 75555a7c5e98..9e501be03eeb 100644
--- a/test/CodeGen/PTX/mov.ll
+++ b/test/CodeGen/PTX/mov.ll
@@ -31,31 +31,31 @@ define ptx_device double @t1_f64() {
 }
 
 define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: mov.b16 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b16 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i16 %x
 }
 
 define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: mov.b32 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i32 %x
 }
 
 define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: mov.b64 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.b64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret i64 %x
 }
 
 define ptx_device float @t3_f32(float %x) {
-; CHECK: mov.f32 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.f32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret float %x
 }
 
 define ptx_device double @t3_f64(double %x) {
-; CHECK: mov.f64 %ret{{[0-9]+}}, %param{{[0-9]+}};
+; CHECK: mov.f64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
 ; CHECK: ret;
 	ret double %x
 }
diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll
index 09015dae1dbc..377f17379fe1 100644
--- a/test/CodeGen/PTX/parameter-order.ll
+++ b/test/CodeGen/PTX/parameter-order.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}}, .reg .b32 %param{{[0-9]+}})
+; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .f32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .f32 %arg{{[0-9]+}})
 define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
 ; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
 	%result = sub i32 %b, %c
diff --git a/test/CodeGen/PTX/printf.ll b/test/CodeGen/PTX/printf.ll
new file mode 100644
index 000000000000..f901b2055f0d
--- /dev/null
+++ b/test/CodeGen/PTX/printf.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+
+@str = private unnamed_addr constant [6 x i8] c"test\0A\00"
+
+define ptx_device void @t1_printf() {
+; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str;
+; CHECK: call.uni	(__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
+; CHECK: ret;
+    %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @str, i64 0, i64 0))
+	ret void
+}
+
+@str2 = private unnamed_addr constant [11 x i8] c"test = %f\0A\00"
+
+define ptx_device void @t2_printf() {
+; CHECK: .local .align 8 .b8 __local{{[0-9]+}}[{{[0-9]+}}];
+; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str2;
+; CHECK: cvta.local.u64  %rd{{[0-9]+}}, __local{{[0-9+]}};
+; CHECK: call.uni	(__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
+; CHECK: ret;
+  %1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @str2, i64 0, i64 0), double 0x3FF3333340000000)
+  ret void
+}
diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll
index 63ef58c4fb6c..c7943630c435 100644
--- a/test/CodeGen/PTX/st.ll
+++ b/test/CodeGen/PTX/st.ll
@@ -1,48 +1,48 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-;CHECK: .extern .global .b8 array_i16[20];
+;CHECK: .extern .global .b16 array_i16[10];
 @array_i16 = external global [10 x i16]
 
-;CHECK: .extern .const .b8 array_constant_i16[20];
+;CHECK: .extern .const .b16 array_constant_i16[10];
 @array_constant_i16 = external addrspace(1) constant [10 x i16]
 
-;CHECK: .extern .shared .b8 array_shared_i16[20];
+;CHECK: .extern .shared .b16 array_shared_i16[10];
 @array_shared_i16 = external addrspace(4) global [10 x i16]
 
-;CHECK: .extern .global .b8 array_i32[40];
+;CHECK: .extern .global .b32 array_i32[10];
 @array_i32 = external global [10 x i32]
 
-;CHECK: .extern .const .b8 array_constant_i32[40];
+;CHECK: .extern .const .b32 array_constant_i32[10];
 @array_constant_i32 = external addrspace(1) constant [10 x i32]
 
-;CHECK: .extern .shared .b8 array_shared_i32[40];
+;CHECK: .extern .shared .b32 array_shared_i32[10];
 @array_shared_i32 = external addrspace(4) global [10 x i32]
 
-;CHECK: .extern .global .b8 array_i64[80];
+;CHECK: .extern .global .b64 array_i64[10];
 @array_i64 = external global [10 x i64]
 
-;CHECK: .extern .const .b8 array_constant_i64[80];
+;CHECK: .extern .const .b64 array_constant_i64[10];
 @array_constant_i64 = external addrspace(1) constant [10 x i64]
 
-;CHECK: .extern .shared .b8 array_shared_i64[80];
+;CHECK: .extern .shared .b64 array_shared_i64[10];
 @array_shared_i64 = external addrspace(4) global [10 x i64]
 
-;CHECK: .extern .global .b8 array_float[40];
+;CHECK: .extern .global .b32 array_float[10];
 @array_float = external global [10 x float]
 
-;CHECK: .extern .const .b8 array_constant_float[40];
+;CHECK: .extern .const .b32 array_constant_float[10];
 @array_constant_float = external addrspace(1) constant [10 x float]
 
-;CHECK: .extern .shared .b8 array_shared_float[40];
+;CHECK: .extern .shared .b32 array_shared_float[10];
 @array_shared_float = external addrspace(4) global [10 x float]
 
-;CHECK: .extern .global .b8 array_double[80];
+;CHECK: .extern .global .b64 array_double[10];
 @array_double = external global [10 x double]
 
-;CHECK: .extern .const .b8 array_constant_double[80];
+;CHECK: .extern .const .b64 array_constant_double[10];
 @array_constant_double = external addrspace(1) constant [10 x double]
 
-;CHECK: .extern .shared .b8 array_shared_double[80];
+;CHECK: .extern .shared .b64 array_shared_double[10];
 @array_shared_double = external addrspace(4) global [10 x double]
 
 
diff --git a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
deleted file mode 100644
index 57ed250abc09..000000000000
--- a/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=ppc32 -combiner-alias-analysis | grep f5
-
-target datalayout = "E-p:32:32"
-target triple = "powerpc-apple-darwin8.2.0"
-        %struct.Point = type { double, double, double }
-
-define void @offset(%struct.Point* %pt, double %x, double %y, double %z) {
-entry:
-        %tmp = getelementptr %struct.Point* %pt, i32 0, i32 0           ; <double*> [#uses=2]
-        %tmp.upgrd.1 = load double* %tmp                ; <double> [#uses=1]
-        %tmp2 = fadd double %tmp.upgrd.1, %x             ; <double> [#uses=1]
-        store double %tmp2, double* %tmp
-        %tmp6 = getelementptr %struct.Point* %pt, i32 0, i32 1          ; <double*> [#uses=2]
-        %tmp7 = load double* %tmp6              ; <double> [#uses=1]
-        %tmp9 = fadd double %tmp7, %y            ; <double> [#uses=1]
-        store double %tmp9, double* %tmp6
-        %tmp13 = getelementptr %struct.Point* %pt, i32 0, i32 2         ; <double*> [#uses=2]
-        %tmp14 = load double* %tmp13            ; <double> [#uses=1]
-        %tmp16 = fadd double %tmp14, %z          ; <double> [#uses=1]
-        store double %tmp16, double* %tmp13
-        ret void
-}
-
diff --git a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index cca9e658ad5f..3620b0e6340a 100644
--- a/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -2,11 +2,11 @@
 
 define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
         %tmp19 = load i64* %t
-        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19 )             ; <i64> [#uses=1]
+        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19, i1 true )             ; <i64> [#uses=1]
         %tmp23 = trunc i64 %tmp22 to i32
         %tmp89 = add i32 %tmp23, -64          ; <i32> [#uses=1]
         %tmp90 = add i32 %tmp89, 0            ; <i32> [#uses=1]
         ret i32 %tmp90
 }
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
diff --git a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
index e50fac4472a9..d10291e190b9 100644
--- a/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
 
 declare i8* @bar(i32)
 
diff --git a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
index 9f35b8346c68..fb8cdcea63aa 100644
--- a/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 -enable-ppc32-regscavenger
+; RUN: llc < %s -march=ppc32
 
 	%struct._cpp_strbuf = type { i8*, i32, i32 }
 	%struct.cpp_string = type { i32, i8* }
diff --git a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
index dd425f59822b..f256bca81885 100644
--- a/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
+++ b/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
+; RUN: llc < %s -march=ppc64
 
 define i16 @test(i8* %d1, i16* %d2) {
  %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )
diff --git a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
index 7b6d4916c1a8..e7a1cf69c693 100644
--- a/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
+++ b/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -12,7 +12,7 @@ declare void @IODelay(i32)
 
 define i32 @_Z14ProgramByWordsPvyy(i8* %buffer, i64 %Offset, i64 %bufferSize) nounwind  {
 entry:
-	volatile store i8 -1, i8* null, align 1
+	store volatile i8 -1, i8* null, align 1
 	%tmp28 = icmp eq i8 0, 0		; <i1> [#uses=1]
 	br i1 %tmp28, label %bb107, label %bb
 
@@ -43,7 +43,7 @@ bb68:		; preds = %bb31
 	%tmp2021.i = trunc i64 %Pos.0.reg2mem.0 to i32		; <i32> [#uses=1]
 	%tmp202122.i = inttoptr i32 %tmp2021.i to i8*		; <i8*> [#uses=1]
 	tail call void @IODelay( i32 500 ) nounwind 
-	%tmp53.i = volatile load i16* null, align 2		; <i16> [#uses=2]
+	%tmp53.i = load volatile i16* null, align 2		; <i16> [#uses=2]
 	%tmp5455.i = zext i16 %tmp53.i to i32		; <i32> [#uses=1]
 	br i1 false, label %bb.i, label %bb65.i
 
@@ -59,7 +59,7 @@ bb70.i:		; preds = %bb65.i
 	ret i32 0
 
 _Z24unlock_then_erase_sectory.exit:		; preds = %bb65.i
-	volatile store i8 -1, i8* %tmp202122.i, align 1
+	store volatile i8 -1, i8* %tmp202122.i, align 1
 	%tmp93 = add i64 0, %Pos.0.reg2mem.0		; <i64> [#uses=2]
 	%tmp98 = add i64 0, %Offset		; <i64> [#uses=1]
 	%tmp100 = icmp ugt i64 %tmp98, %tmp93		; <i1> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
index b73382e6ebfd..974a99a52cb5 100644
--- a/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
+++ b/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
 ; ModuleID = 'hh.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
 target triple = "powerpc-apple-darwin9.6"
@@ -7,21 +7,27 @@ target triple = "powerpc-apple-darwin9.6"
 define void @foo() nounwind {
 entry:
 ;CHECK:  mfcr r2
+;CHECK:  lis r3, 1
 ;CHECK:  rlwinm r2, r2, 8, 0, 31
-;CHECK:  lis r0, 1
-;CHECK:  ori r0, r0, 34540
-;CHECK:  stwx r2, r1, r0
+;CHECK:  ori r3, r3, 34524
+;CHECK:  stwx r2, r1, r3
+; Make sure that the register scavenger returns the same temporary register.
+;CHECK:  mfcr r2
+;CHECK:  lis r3, 1
+;CHECK:  rlwinm r2, r2, 12, 0, 31
+;CHECK:  ori r3, r3, 34520
+;CHECK:  stwx r2, r1, r3
   %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
   call void @bar(i8* %x1) nounwind
-  call void asm sideeffect "", "~{cr2}"() nounwind
+  call void asm sideeffect "", "~{cr2},~{cr3}"() nounwind
   br label %return
 
 return:                                           ; preds = %entry
-;CHECK:  lis r0, 1
-;CHECK:  ori r0, r0, 34540
-;CHECK:  lwzx r2, r1, r0
+;CHECK:  lis r3, 1
+;CHECK:  ori r3, r3, 34524
+;CHECK:  lwzx r2, r1, r3
 ;CHECK:  rlwinm r2, r2, 24, 0, 31
 ;CHECK:  mtcrf 32, r2
   ret void
diff --git a/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
new file mode 100644
index 000000000000..6161b55edee9
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+; ModuleID = 'tsc.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = common global [32000 x float] zeroinitializer, align 16
+@b = common global [32000 x float] zeroinitializer, align 16
+@c = common global [32000 x float] zeroinitializer, align 16
+@d = common global [32000 x float] zeroinitializer, align 16
+@e = common global [32000 x float] zeroinitializer, align 16
+@aa = common global [256 x [256 x float]] zeroinitializer, align 16
+@bb = common global [256 x [256 x float]] zeroinitializer, align 16
+@cc = common global [256 x [256 x float]] zeroinitializer, align 16
+
+@.str11 = private unnamed_addr constant [6 x i8] c"s122 \00", align 1
+@.str152 = private unnamed_addr constant [14 x i8] c"S122\09 %.2f \09\09\00", align 1
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @init(i8* %name) nounwind
+declare i64 @clock() nounwind
+declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float)
+declare void @check(i32 %name) nounwind
+
+; CHECK: mfcr
+; CHECK: mtcr
+
+define i32 @s122(i32 %n1, i32 %n3) nounwind {
+entry:
+  %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str11, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  %sub = add nsw i32 %n1, -1
+  %cmp316 = icmp slt i32 %sub, 32000
+  br i1 %cmp316, label %entry.split.us, label %for.end.7
+
+entry.split.us:                                   ; preds = %entry
+  %0 = sext i32 %sub to i64
+  %1 = sext i32 %n3 to i64
+  br label %for.body4.lr.ph.us
+
+for.body4.us:                                     ; preds = %for.body4.lr.ph.us, %for.body4.us
+  %indvars.iv20 = phi i64 [ 0, %for.body4.lr.ph.us ], [ %indvars.iv.next21, %for.body4.us ]
+  %indvars.iv = phi i64 [ %0, %for.body4.lr.ph.us ], [ %indvars.iv.next, %for.body4.us ]
+  %indvars.iv.next21 = add i64 %indvars.iv20, 1
+  %sub5.us = sub i64 31999, %indvars.iv20
+  %sext = shl i64 %sub5.us, 32
+  %idxprom.us = ashr exact i64 %sext, 32
+  %arrayidx.us = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us
+  %2 = load float* %arrayidx.us, align 4, !tbaa !5
+  %arrayidx7.us = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv
+  %3 = load float* %arrayidx7.us, align 4, !tbaa !5
+  %add8.us = fadd float %3, %2
+  store float %add8.us, float* %arrayidx7.us, align 4, !tbaa !5
+  %indvars.iv.next = add i64 %indvars.iv, %1
+  %4 = trunc i64 %indvars.iv.next to i32
+  %cmp3.us = icmp slt i32 %4, 32000
+  br i1 %cmp3.us, label %for.body4.us, label %for.body4.lr.ph.us.1
+
+for.body4.lr.ph.us:                               ; preds = %entry.split.us, %for.end.us.4
+  %nl.019.us = phi i32 [ 0, %entry.split.us ], [ %inc.us.4, %for.end.us.4 ]
+  br label %for.body4.us
+
+for.end12:                                        ; preds = %for.end.7, %for.end.us.4
+  %call13 = tail call i64 @clock() nounwind
+  %sub14 = sub nsw i64 %call13, %call1
+  %conv = sitofp i64 %sub14 to double
+  %div = fdiv double %conv, 1.000000e+06
+  %call15 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str152, i64 0, i64 0), double %div) nounwind
+  tail call void @check(i32 1)
+  ret i32 0
+
+for.body4.lr.ph.us.1:                             ; preds = %for.body4.us
+  %call10.us = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.1
+
+for.body4.us.1:                                   ; preds = %for.body4.us.1, %for.body4.lr.ph.us.1
+  %indvars.iv20.1 = phi i64 [ 0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next21.1, %for.body4.us.1 ]
+  %indvars.iv.1 = phi i64 [ %0, %for.body4.lr.ph.us.1 ], [ %indvars.iv.next.1, %for.body4.us.1 ]
+  %indvars.iv.next21.1 = add i64 %indvars.iv20.1, 1
+  %sub5.us.1 = sub i64 31999, %indvars.iv20.1
+  %sext23 = shl i64 %sub5.us.1, 32
+  %idxprom.us.1 = ashr exact i64 %sext23, 32
+  %arrayidx.us.1 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.1
+  %5 = load float* %arrayidx.us.1, align 4, !tbaa !5
+  %arrayidx7.us.1 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.1
+  %6 = load float* %arrayidx7.us.1, align 4, !tbaa !5
+  %add8.us.1 = fadd float %6, %5
+  store float %add8.us.1, float* %arrayidx7.us.1, align 4, !tbaa !5
+  %indvars.iv.next.1 = add i64 %indvars.iv.1, %1
+  %7 = trunc i64 %indvars.iv.next.1 to i32
+  %cmp3.us.1 = icmp slt i32 %7, 32000
+  br i1 %cmp3.us.1, label %for.body4.us.1, label %for.body4.lr.ph.us.2
+
+for.body4.lr.ph.us.2:                             ; preds = %for.body4.us.1
+  %call10.us.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.2
+
+for.body4.us.2:                                   ; preds = %for.body4.us.2, %for.body4.lr.ph.us.2
+  %indvars.iv20.2 = phi i64 [ 0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next21.2, %for.body4.us.2 ]
+  %indvars.iv.2 = phi i64 [ %0, %for.body4.lr.ph.us.2 ], [ %indvars.iv.next.2, %for.body4.us.2 ]
+  %indvars.iv.next21.2 = add i64 %indvars.iv20.2, 1
+  %sub5.us.2 = sub i64 31999, %indvars.iv20.2
+  %sext24 = shl i64 %sub5.us.2, 32
+  %idxprom.us.2 = ashr exact i64 %sext24, 32
+  %arrayidx.us.2 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.2
+  %8 = load float* %arrayidx.us.2, align 4, !tbaa !5
+  %arrayidx7.us.2 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.2
+  %9 = load float* %arrayidx7.us.2, align 4, !tbaa !5
+  %add8.us.2 = fadd float %9, %8
+  store float %add8.us.2, float* %arrayidx7.us.2, align 4, !tbaa !5
+  %indvars.iv.next.2 = add i64 %indvars.iv.2, %1
+  %10 = trunc i64 %indvars.iv.next.2 to i32
+  %cmp3.us.2 = icmp slt i32 %10, 32000
+  br i1 %cmp3.us.2, label %for.body4.us.2, label %for.body4.lr.ph.us.3
+
+for.body4.lr.ph.us.3:                             ; preds = %for.body4.us.2
+  %call10.us.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.3
+
+for.body4.us.3:                                   ; preds = %for.body4.us.3, %for.body4.lr.ph.us.3
+  %indvars.iv20.3 = phi i64 [ 0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next21.3, %for.body4.us.3 ]
+  %indvars.iv.3 = phi i64 [ %0, %for.body4.lr.ph.us.3 ], [ %indvars.iv.next.3, %for.body4.us.3 ]
+  %indvars.iv.next21.3 = add i64 %indvars.iv20.3, 1
+  %sub5.us.3 = sub i64 31999, %indvars.iv20.3
+  %sext25 = shl i64 %sub5.us.3, 32
+  %idxprom.us.3 = ashr exact i64 %sext25, 32
+  %arrayidx.us.3 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.3
+  %11 = load float* %arrayidx.us.3, align 4, !tbaa !5
+  %arrayidx7.us.3 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.3
+  %12 = load float* %arrayidx7.us.3, align 4, !tbaa !5
+  %add8.us.3 = fadd float %12, %11
+  store float %add8.us.3, float* %arrayidx7.us.3, align 4, !tbaa !5
+  %indvars.iv.next.3 = add i64 %indvars.iv.3, %1
+  %13 = trunc i64 %indvars.iv.next.3 to i32
+  %cmp3.us.3 = icmp slt i32 %13, 32000
+  br i1 %cmp3.us.3, label %for.body4.us.3, label %for.body4.lr.ph.us.4
+
+for.body4.lr.ph.us.4:                             ; preds = %for.body4.us.3
+  %call10.us.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  br label %for.body4.us.4
+
+for.body4.us.4:                                   ; preds = %for.body4.us.4, %for.body4.lr.ph.us.4
+  %indvars.iv20.4 = phi i64 [ 0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next21.4, %for.body4.us.4 ]
+  %indvars.iv.4 = phi i64 [ %0, %for.body4.lr.ph.us.4 ], [ %indvars.iv.next.4, %for.body4.us.4 ]
+  %indvars.iv.next21.4 = add i64 %indvars.iv20.4, 1
+  %sub5.us.4 = sub i64 31999, %indvars.iv20.4
+  %sext26 = shl i64 %sub5.us.4, 32
+  %idxprom.us.4 = ashr exact i64 %sext26, 32
+  %arrayidx.us.4 = getelementptr inbounds [32000 x float]* @b, i64 0, i64 %idxprom.us.4
+  %14 = load float* %arrayidx.us.4, align 4, !tbaa !5
+  %arrayidx7.us.4 = getelementptr inbounds [32000 x float]* @a, i64 0, i64 %indvars.iv.4
+  %15 = load float* %arrayidx7.us.4, align 4, !tbaa !5
+  %add8.us.4 = fadd float %15, %14
+  store float %add8.us.4, float* %arrayidx7.us.4, align 4, !tbaa !5
+  %indvars.iv.next.4 = add i64 %indvars.iv.4, %1
+  %16 = trunc i64 %indvars.iv.next.4 to i32
+  %cmp3.us.4 = icmp slt i32 %16, 32000
+  br i1 %cmp3.us.4, label %for.body4.us.4, label %for.end.us.4
+
+for.end.us.4:                                     ; preds = %for.body4.us.4
+  %call10.us.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %inc.us.4 = add nsw i32 %nl.019.us, 5
+  %exitcond.4 = icmp eq i32 %inc.us.4, 200000
+  br i1 %exitcond.4, label %for.end12, label %for.body4.lr.ph.us
+
+for.end.7:                                        ; preds = %entry, %for.end.7
+  %nl.019 = phi i32 [ %inc.7, %for.end.7 ], [ 0, %entry ]
+  %call10 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.1 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.2 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.3 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.4 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.5 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.6 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %call10.7 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float 0.000000e+00) nounwind
+  %inc.7 = add nsw i32 %nl.019, 8
+  %exitcond.7 = icmp eq i32 %inc.7, 200000
+  br i1 %exitcond.7, label %for.end12, label %for.end.7
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
new file mode 100644
index 000000000000..52bf6c7e5017
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
@@ -0,0 +1,225 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -mcpu=g4 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+; ModuleID = 'tsc.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = common global [32000 x float] zeroinitializer, align 16
+@b = common global [32000 x float] zeroinitializer, align 16
+@c = common global [32000 x float] zeroinitializer, align 16
+@d = common global [32000 x float] zeroinitializer, align 16
+@e = common global [32000 x float] zeroinitializer, align 16
+@aa = common global [256 x [256 x float]] zeroinitializer, align 16
+@bb = common global [256 x [256 x float]] zeroinitializer, align 16
+@cc = common global [256 x [256 x float]] zeroinitializer, align 16
+@temp = common global float 0.000000e+00, align 4
+
+@.str81 = private unnamed_addr constant [6 x i8] c"s3110\00", align 1
+@.str235 = private unnamed_addr constant [15 x i8] c"S3110\09 %.2f \09\09\00", align 1
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @init(i8* %name) nounwind
+declare i64 @clock() nounwind
+declare i32 @dummy(float*, float*, float*, float*, float*, [256 x float]*, [256 x float]*, [256 x float]*, float)
+declare void @check(i32 %name) nounwind
+
+; CHECK: mfcr
+; CHECK: mtcr
+
+define i32 @s3110() nounwind {
+entry:
+  %call = tail call i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str81, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  br label %for.body
+
+for.body:                                         ; preds = %for.end17, %entry
+  %nl.041 = phi i32 [ 0, %entry ], [ %inc22, %for.end17 ]
+  %0 = load float* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0, i64 0), align 16, !tbaa !5
+  br label %for.cond5.preheader
+
+for.cond5.preheader:                              ; preds = %for.inc15, %for.body
+  %indvars.iv42 = phi i64 [ 0, %for.body ], [ %indvars.iv.next43, %for.inc15 ]
+  %max.139 = phi float [ %0, %for.body ], [ %max.3.15, %for.inc15 ]
+  %xindex.138 = phi i32 [ 0, %for.body ], [ %xindex.3.15, %for.inc15 ]
+  %yindex.137 = phi i32 [ 0, %for.body ], [ %yindex.3.15, %for.inc15 ]
+  br label %for.body7
+
+for.body7:                                        ; preds = %for.body7, %for.cond5.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond5.preheader ], [ %indvars.iv.next.15, %for.body7 ]
+  %max.235 = phi float [ %max.139, %for.cond5.preheader ], [ %max.3.15, %for.body7 ]
+  %xindex.234 = phi i32 [ %xindex.138, %for.cond5.preheader ], [ %xindex.3.15, %for.body7 ]
+  %yindex.233 = phi i32 [ %yindex.137, %for.cond5.preheader ], [ %yindex.3.15, %for.body7 ]
+  %arrayidx9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv
+  %1 = load float* %arrayidx9, align 16, !tbaa !5
+  %cmp10 = fcmp ogt float %1, %max.235
+  %2 = trunc i64 %indvars.iv to i32
+  %yindex.3 = select i1 %cmp10, i32 %2, i32 %yindex.233
+  %3 = trunc i64 %indvars.iv42 to i32
+  %xindex.3 = select i1 %cmp10, i32 %3, i32 %xindex.234
+  %max.3 = select i1 %cmp10, float %1, float %max.235
+  %indvars.iv.next45 = or i64 %indvars.iv, 1
+  %arrayidx9.1 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next45
+  %4 = load float* %arrayidx9.1, align 4, !tbaa !5
+  %cmp10.1 = fcmp ogt float %4, %max.3
+  %5 = trunc i64 %indvars.iv.next45 to i32
+  %yindex.3.1 = select i1 %cmp10.1, i32 %5, i32 %yindex.3
+  %xindex.3.1 = select i1 %cmp10.1, i32 %3, i32 %xindex.3
+  %max.3.1 = select i1 %cmp10.1, float %4, float %max.3
+  %indvars.iv.next.146 = or i64 %indvars.iv, 2
+  %arrayidx9.2 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.146
+  %6 = load float* %arrayidx9.2, align 8, !tbaa !5
+  %cmp10.2 = fcmp ogt float %6, %max.3.1
+  %7 = trunc i64 %indvars.iv.next.146 to i32
+  %yindex.3.2 = select i1 %cmp10.2, i32 %7, i32 %yindex.3.1
+  %xindex.3.2 = select i1 %cmp10.2, i32 %3, i32 %xindex.3.1
+  %max.3.2 = select i1 %cmp10.2, float %6, float %max.3.1
+  %indvars.iv.next.247 = or i64 %indvars.iv, 3
+  %arrayidx9.3 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.247
+  %8 = load float* %arrayidx9.3, align 4, !tbaa !5
+  %cmp10.3 = fcmp ogt float %8, %max.3.2
+  %9 = trunc i64 %indvars.iv.next.247 to i32
+  %yindex.3.3 = select i1 %cmp10.3, i32 %9, i32 %yindex.3.2
+  %xindex.3.3 = select i1 %cmp10.3, i32 %3, i32 %xindex.3.2
+  %max.3.3 = select i1 %cmp10.3, float %8, float %max.3.2
+  %indvars.iv.next.348 = or i64 %indvars.iv, 4
+  %arrayidx9.4 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.348
+  %10 = load float* %arrayidx9.4, align 16, !tbaa !5
+  %cmp10.4 = fcmp ogt float %10, %max.3.3
+  %11 = trunc i64 %indvars.iv.next.348 to i32
+  %yindex.3.4 = select i1 %cmp10.4, i32 %11, i32 %yindex.3.3
+  %xindex.3.4 = select i1 %cmp10.4, i32 %3, i32 %xindex.3.3
+  %max.3.4 = select i1 %cmp10.4, float %10, float %max.3.3
+  %indvars.iv.next.449 = or i64 %indvars.iv, 5
+  %arrayidx9.5 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.449
+  %12 = load float* %arrayidx9.5, align 4, !tbaa !5
+  %cmp10.5 = fcmp ogt float %12, %max.3.4
+  %13 = trunc i64 %indvars.iv.next.449 to i32
+  %yindex.3.5 = select i1 %cmp10.5, i32 %13, i32 %yindex.3.4
+  %xindex.3.5 = select i1 %cmp10.5, i32 %3, i32 %xindex.3.4
+  %max.3.5 = select i1 %cmp10.5, float %12, float %max.3.4
+  %indvars.iv.next.550 = or i64 %indvars.iv, 6
+  %arrayidx9.6 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.550
+  %14 = load float* %arrayidx9.6, align 8, !tbaa !5
+  %cmp10.6 = fcmp ogt float %14, %max.3.5
+  %15 = trunc i64 %indvars.iv.next.550 to i32
+  %yindex.3.6 = select i1 %cmp10.6, i32 %15, i32 %yindex.3.5
+  %xindex.3.6 = select i1 %cmp10.6, i32 %3, i32 %xindex.3.5
+  %max.3.6 = select i1 %cmp10.6, float %14, float %max.3.5
+  %indvars.iv.next.651 = or i64 %indvars.iv, 7
+  %arrayidx9.7 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.651
+  %16 = load float* %arrayidx9.7, align 4, !tbaa !5
+  %cmp10.7 = fcmp ogt float %16, %max.3.6
+  %17 = trunc i64 %indvars.iv.next.651 to i32
+  %yindex.3.7 = select i1 %cmp10.7, i32 %17, i32 %yindex.3.6
+  %xindex.3.7 = select i1 %cmp10.7, i32 %3, i32 %xindex.3.6
+  %max.3.7 = select i1 %cmp10.7, float %16, float %max.3.6
+  %indvars.iv.next.752 = or i64 %indvars.iv, 8
+  %arrayidx9.8 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.752
+  %18 = load float* %arrayidx9.8, align 16, !tbaa !5
+  %cmp10.8 = fcmp ogt float %18, %max.3.7
+  %19 = trunc i64 %indvars.iv.next.752 to i32
+  %yindex.3.8 = select i1 %cmp10.8, i32 %19, i32 %yindex.3.7
+  %xindex.3.8 = select i1 %cmp10.8, i32 %3, i32 %xindex.3.7
+  %max.3.8 = select i1 %cmp10.8, float %18, float %max.3.7
+  %indvars.iv.next.853 = or i64 %indvars.iv, 9
+  %arrayidx9.9 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.853
+  %20 = load float* %arrayidx9.9, align 4, !tbaa !5
+  %cmp10.9 = fcmp ogt float %20, %max.3.8
+  %21 = trunc i64 %indvars.iv.next.853 to i32
+  %yindex.3.9 = select i1 %cmp10.9, i32 %21, i32 %yindex.3.8
+  %xindex.3.9 = select i1 %cmp10.9, i32 %3, i32 %xindex.3.8
+  %max.3.9 = select i1 %cmp10.9, float %20, float %max.3.8
+  %indvars.iv.next.954 = or i64 %indvars.iv, 10
+  %arrayidx9.10 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.954
+  %22 = load float* %arrayidx9.10, align 8, !tbaa !5
+  %cmp10.10 = fcmp ogt float %22, %max.3.9
+  %23 = trunc i64 %indvars.iv.next.954 to i32
+  %yindex.3.10 = select i1 %cmp10.10, i32 %23, i32 %yindex.3.9
+  %xindex.3.10 = select i1 %cmp10.10, i32 %3, i32 %xindex.3.9
+  %max.3.10 = select i1 %cmp10.10, float %22, float %max.3.9
+  %indvars.iv.next.1055 = or i64 %indvars.iv, 11
+  %arrayidx9.11 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1055
+  %24 = load float* %arrayidx9.11, align 4, !tbaa !5
+  %cmp10.11 = fcmp ogt float %24, %max.3.10
+  %25 = trunc i64 %indvars.iv.next.1055 to i32
+  %yindex.3.11 = select i1 %cmp10.11, i32 %25, i32 %yindex.3.10
+  %xindex.3.11 = select i1 %cmp10.11, i32 %3, i32 %xindex.3.10
+  %max.3.11 = select i1 %cmp10.11, float %24, float %max.3.10
+  %indvars.iv.next.1156 = or i64 %indvars.iv, 12
+  %arrayidx9.12 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1156
+  %26 = load float* %arrayidx9.12, align 16, !tbaa !5
+  %cmp10.12 = fcmp ogt float %26, %max.3.11
+  %27 = trunc i64 %indvars.iv.next.1156 to i32
+  %yindex.3.12 = select i1 %cmp10.12, i32 %27, i32 %yindex.3.11
+  %xindex.3.12 = select i1 %cmp10.12, i32 %3, i32 %xindex.3.11
+  %max.3.12 = select i1 %cmp10.12, float %26, float %max.3.11
+  %indvars.iv.next.1257 = or i64 %indvars.iv, 13
+  %arrayidx9.13 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1257
+  %28 = load float* %arrayidx9.13, align 4, !tbaa !5
+  %cmp10.13 = fcmp ogt float %28, %max.3.12
+  %29 = trunc i64 %indvars.iv.next.1257 to i32
+  %yindex.3.13 = select i1 %cmp10.13, i32 %29, i32 %yindex.3.12
+  %xindex.3.13 = select i1 %cmp10.13, i32 %3, i32 %xindex.3.12
+  %max.3.13 = select i1 %cmp10.13, float %28, float %max.3.12
+  %indvars.iv.next.1358 = or i64 %indvars.iv, 14
+  %arrayidx9.14 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1358
+  %30 = load float* %arrayidx9.14, align 8, !tbaa !5
+  %cmp10.14 = fcmp ogt float %30, %max.3.13
+  %31 = trunc i64 %indvars.iv.next.1358 to i32
+  %yindex.3.14 = select i1 %cmp10.14, i32 %31, i32 %yindex.3.13
+  %xindex.3.14 = select i1 %cmp10.14, i32 %3, i32 %xindex.3.13
+  %max.3.14 = select i1 %cmp10.14, float %30, float %max.3.13
+  %indvars.iv.next.1459 = or i64 %indvars.iv, 15
+  %arrayidx9.15 = getelementptr inbounds [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1459
+  %32 = load float* %arrayidx9.15, align 4, !tbaa !5
+  %cmp10.15 = fcmp ogt float %32, %max.3.14
+  %33 = trunc i64 %indvars.iv.next.1459 to i32
+  %yindex.3.15 = select i1 %cmp10.15, i32 %33, i32 %yindex.3.14
+  %xindex.3.15 = select i1 %cmp10.15, i32 %3, i32 %xindex.3.14
+  %max.3.15 = select i1 %cmp10.15, float %32, float %max.3.14
+  %indvars.iv.next.15 = add i64 %indvars.iv, 16
+  %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
+  %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 256
+  br i1 %exitcond.15, label %for.inc15, label %for.body7
+
+for.inc15:                                        ; preds = %for.body7
+  %indvars.iv.next43 = add i64 %indvars.iv42, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next43 to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %for.end17, label %for.cond5.preheader
+
+for.end17:                                        ; preds = %for.inc15
+  %conv = sitofp i32 %xindex.3.15 to float
+  %add = fadd float %max.3.15, %conv
+  %conv18 = sitofp i32 %yindex.3.15 to float
+  %add19 = fadd float %add, %conv18
+  %call20 = tail call i32 @dummy(float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @c, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @d, i64 0, i64 0), float* getelementptr inbounds ([32000 x float]* @e, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @bb, i64 0, i64 0), [256 x float]* getelementptr inbounds ([256 x [256 x float]]* @cc, i64 0, i64 0), float %add19) nounwind
+  %inc22 = add nsw i32 %nl.041, 1
+  %exitcond44 = icmp eq i32 %inc22, 78100
+  br i1 %exitcond44, label %for.end23, label %for.body
+
+for.end23:                                        ; preds = %for.end17
+  %call24 = tail call i64 @clock() nounwind
+  %sub = sub nsw i64 %call24, %call1
+  %conv25 = sitofp i64 %sub to double
+  %div = fdiv double %conv25, 1.000000e+06
+  %call26 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str235, i64 0, i64 0), double %div) nounwind
+  %add29 = fadd float %add, 1.000000e+00
+  %add31 = fadd float %add29, %conv18
+  %add32 = fadd float %add31, 1.000000e+00
+  store float %add32, float* @temp, align 4, !tbaa !5
+  tail call void @check(i32 -1)
+  ret i32 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!4 = metadata !{metadata !"int", metadata !1}
+!5 = metadata !{metadata !"float", metadata !1}
diff --git a/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
new file mode 100644
index 000000000000..a18829e1bce8
--- /dev/null
+++ b/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g4 | FileCheck %s
+
+define void @test(i32* nocapture %x, i64* %xx, i32* %yp) nounwind uwtable ssp {
+entry:
+  %yy = load i32* %yp
+  %y = add i32 %yy, 1
+  %z = zext i32 %y to i64
+  %z2 = shl i64 %z, 32 
+  store i64 %z2, i64* %xx, align 4
+  ret void
+
+; CHECK: test:
+; CHECK: sldi {{.*}}, {{.*}}, 32
+; Note: it's okay if someday CodeGen gets smart enough to optimize out
+; the shift.
+}
diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll
index 466ae8034195..28dd08c7fed1 100644
--- a/test/CodeGen/PowerPC/Frames-alloca.ll
+++ b/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -2,9 +2,9 @@
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS
-; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32-RS
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-RS-NOFP
 
 ; CHECK-PPC32: stw r31, -4(r1)
 ; CHECK-PPC32: lwz r1, 0(r1)
diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll
index 302d3df28436..d07fea726770 100644
--- a/test/CodeGen/PowerPC/Frames-large.ll
+++ b/test/CodeGen/PowerPC/Frames-large.ll
@@ -21,12 +21,14 @@ define i32* @f1() nounwind {
 ; PPC32-NOFP: 	lwz r1, 0(r1)
 ; PPC32-NOFP: 	blr 
 
+
 ; PPC32-FP: _f1:
-; PPC32-FP:	stw r31, -4(r1)
 ; PPC32-FP:	lis r0, -1
+; PPC32-FP:	stw r31, -4(r1)
 ; PPC32-FP:	ori r0, r0, 32704
 ; PPC32-FP:	stwux r1, r1, r0
-; ...
+; PPC32-FP:	mr r31, r1
+; PPC32-FP:	addi r3, r31, 64
 ; PPC32-FP:	lwz r1, 0(r1)
 ; PPC32-FP:	lwz r31, -4(r1)
 ; PPC32-FP:	blr 
@@ -42,11 +44,12 @@ define i32* @f1() nounwind {
 
 
 ; PPC64-FP: _f1:
-; PPC64-FP:	std r31, -8(r1)
 ; PPC64-FP:	lis r0, -1
+; PPC64-FP:	std r31, -8(r1)
 ; PPC64-FP:	ori r0, r0, 32640
 ; PPC64-FP:	stdux r1, r1, r0
-; ...
+; PPC64-FP:	mr r31, r1
+; PPC64-FP:	addi r3, r31, 124
 ; PPC64-FP:	ld r1, 0(r1)
 ; PPC64-FP:	ld r31, -8(r1)
 ; PPC64-FP:	blr 
diff --git a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
index b10a99686763..7b0d69cb3bea 100644
--- a/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
+++ b/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {stw r3, 32751}
+; RUN:   grep {stw r4, 32751}
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {stw r3, 32751}
+; RUN:   grep {stw r4, 32751}
 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
-; RUN:   grep {std r3, 9024}
+; RUN:   grep {std r4, 9024}
 
 define void @test() nounwind {
 	store i32 0, i32* inttoptr (i64 48725999 to i32*)
diff --git a/test/CodeGen/PowerPC/a2-fp-basic.ll b/test/CodeGen/PowerPC/a2-fp-basic.ll
new file mode 100644
index 000000000000..932ad7a63ce4
--- /dev/null
+++ b/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s
+
+%0 = type { double, double }
+
+define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
+entry:
+  %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
+  %a.real = load double* %a.realp
+  %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
+  %a.imag = load double* %a.imagp
+  %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
+  %b.real = load double* %b.realp
+  %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
+  %b.imag = load double* %b.imagp
+  %mul.rl = fmul double %a.real, %b.real
+  %mul.rr = fmul double %a.imag, %b.imag
+  %mul.r = fsub double %mul.rl, %mul.rr
+  %mul.il = fmul double %a.imag, %b.real
+  %mul.ir = fmul double %a.real, %b.imag
+  %mul.i = fadd double %mul.il, %mul.ir
+  %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
+  %c.real = load double* %c.realp
+  %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
+  %c.imag = load double* %c.imagp
+  %add.r = fadd double %mul.r, %c.real
+  %add.i = fadd double %mul.i, %c.imag
+  %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
+  %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+  store double %add.r, double* %real
+  store double %add.i, double* %imag
+  ret void
+; CHECK: fmadd
+}
diff --git a/test/CodeGen/PowerPC/big-endian-formal-args.ll b/test/CodeGen/PowerPC/big-endian-formal-args.ll
index 318ccb034100..9a456b6ecc51 100644
--- a/test/CodeGen/PowerPC/big-endian-formal-args.ll
+++ b/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -2,8 +2,8 @@
 
 declare void @bar(i64 %x, i64 %y)
 
-; CHECK: li 4, 2
 ; CHECK: li {{[53]}}, 0
+; CHECK: li 4, 2
 ; CHECK: li 6, 3
 ; CHECK: mr {{[53]}}, {{[53]}}
 
diff --git a/test/CodeGen/PowerPC/bl8_elf_nop.ll b/test/CodeGen/PowerPC/bl8_elf_nop.ll
new file mode 100644
index 000000000000..386c59e32238
--- /dev/null
+++ b/test/CodeGen/PowerPC/bl8_elf_nop.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck  %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i32 @clock() nounwind
+
+define i32 @func() {
+entry:
+  %call = call i32 @clock() nounwind
+  %call2 = add i32 %call, 7
+  ret i32 %call2
+}
+
+; CHECK: bl clock
+; CHECK-NEXT: nop
+
diff --git a/test/CodeGen/PowerPC/can-lower-ret.ll b/test/CodeGen/PowerPC/can-lower-ret.ll
new file mode 100644
index 000000000000..acf4104c0bb3
--- /dev/null
+++ b/test/CodeGen/PowerPC/can-lower-ret.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -mcpu=ppc
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=ppc64
+
+define <4 x float> @foo1(<2 x float> %a, <2 x float> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+  %1 = shufflevector <2 x float> %b, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = shufflevector <4 x float> %0, <4 x float> %1, <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+  ret <4 x float> %2
+}
+
+define <4 x double> @foo2(<2 x double> %a, <2 x double> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
+  %1 = shufflevector <2 x double> %b, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = shufflevector <4 x double> %0, <4 x double> %1, <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+  ret <4 x double> %2
+}
+
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index ab493a068a32..1d365d47a877 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -1,11 +1,11 @@
 ; Make sure this testcase does not use ctpop
 ; RUN: llc < %s -march=ppc32 | grep -i cntlzw
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i32 @bar(i32 %x) {
 entry:
-        %tmp.1 = call i32 @llvm.cttz.i32( i32 %x )              ; <i32> [#uses=1]
+        %tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true )              ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
 
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
new file mode 100644
index 000000000000..e161cb05686f
--- /dev/null
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -break-anti-dependencies=all -march=ppc64 -mcpu=g5 | FileCheck %s
+; CHECK: main:
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !15), !dbg !17
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !16), !dbg !18
+  %add = add nsw i32 %argc, 1, !dbg !19
+  ret i32 %add, !dbg !19
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"dbg.c", metadata !"/src", metadata !"clang version 3.1", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !13} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"dbg.c", metadata !"/src", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !9, metadata !10}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!13 = metadata !{metadata !14}
+!14 = metadata !{metadata !15, metadata !16}
+!15 = metadata !{i32 721153, metadata !5, metadata !"argc", metadata !6, i32 16777217, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 721153, metadata !5, metadata !"argv", metadata !6, i32 33554433, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 1, i32 14, metadata !5, null}
+!18 = metadata !{i32 1, i32 26, metadata !5, null}
+!19 = metadata !{i32 2, i32 3, metadata !20, null}
+!20 = metadata !{i32 720907, metadata !5, i32 1, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+
diff --git a/test/CodeGen/PowerPC/dg.exp b/test/CodeGen/PowerPC/dg.exp
deleted file mode 100644
index 9e50b558aa34..000000000000
--- a/test/CodeGen/PowerPC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target PowerPC] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll
index 29c620e32409..4b6f88bb4a00 100644
--- a/test/CodeGen/PowerPC/indirectbr.ll
+++ b/test/CodeGen/PowerPC/indirectbr.ll
@@ -17,10 +17,22 @@ entry:
 bb2:                                              ; preds = %entry, %bb3
   %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
 ; PIC: mtctr
+; PIC-NEXT: li
+; PIC-NEXT: li
+; PIC-NEXT: li
+; PIC-NEXT: li
 ; PIC-NEXT: bctr
 ; STATIC: mtctr
+; STATIC-NEXT: li
+; STATIC-NEXT: li
+; STATIC-NEXT: li
+; STATIC-NEXT: li
 ; STATIC-NEXT: bctr
 ; PPC64: mtctr
+; PPC64-NEXT: li
+; PPC64-NEXT: li
+; PPC64-NEXT: li
+; PPC64-NEXT: li
 ; PPC64-NEXT: bctr
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
@@ -47,8 +59,8 @@ L2:                                               ; preds = %L3, %bb2
 
 L1:                                               ; preds = %L2, %bb2
   %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
-; PIC: addis r[[R0:[0-9]+]], r{{[0-9]+}}, ha16(Ltmp0-L0$pb)
 ; PIC: li r[[R1:[0-9]+]], lo16(Ltmp0-L0$pb)
+; PIC: addis r[[R0:[0-9]+]], r{{[0-9]+}}, ha16(Ltmp0-L0$pb)
 ; PIC: add r[[R2:[0-9]+]], r[[R0]], r[[R1]]
 ; PIC: stw r[[R2]]
 ; STATIC: li r[[R0:[0-9]+]], lo16(Ltmp0)
diff --git a/test/CodeGen/PowerPC/lit.local.cfg b/test/CodeGen/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..4019eca0bb88
--- /dev/null
+++ b/test/CodeGen/PowerPC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'PowerPC' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/PowerPC/ppc32-vaarg.ll b/test/CodeGen/PowerPC/ppc32-vaarg.ll
deleted file mode 100644
index 604299183f95..000000000000
--- a/test/CodeGen/PowerPC/ppc32-vaarg.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: llc -O0 < %s | FileCheck %s
-;ModuleID = 'test.c'
-target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
-target triple = "powerpc-unknown-freebsd9.0"
-
-%struct.__va_list_tag = type { i8, i8, i16, i8*, i8* }
-
-@var1 = common global i64 0, align 8
-@var2 = common global double 0.0, align 8
-@var3 = common global i32 0, align 4
-
-define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind {
- entry:
-  %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6
-; CHECK:      lbz 4, 0(3)
-; CHECK-NEXT: lwz 5, 4(3)
-; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31
-; CHECK-NEXT: cmplwi 0, 6, 0
-; CHECK-NEXT: addi 6, 4, 1
-; CHECK-NEXT: stw 3, -4(1)
-; CHECK-NEXT: stw 6, -8(1)
-; CHECK-NEXT: stw 4, -12(1)
-; CHECK-NEXT: stw 5, -16(1)
-; CHECK-NEXT: bne 0, .LBB0_2
-; CHECK-NEXT: # BB#1:                                 # %entry
-; CHECK-NEXT: lwz 3, -12(1)
-; CHECK-NEXT: stw 3, -8(1)
-; CHECK-NEXT: .LBB0_2:                                # %entry
-; CHECK-NEXT: lwz 3, -8(1)
-; CHECK-NEXT: lwz 4, -4(1)
-; CHECK-NEXT: lwz 5, 8(4)
-; CHECK-NEXT: slwi 6, 3, 2
-; CHECK-NEXT: addi 7, 3, 2
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: lwz 3, -16(1)
-; CHECK-NEXT: addi 8, 3, 4
-; CHECK-NEXT: add 5, 5, 6
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -20(1)
-; CHECK-NEXT: stw 5, -24(1)
-; CHECK-NEXT: stw 3, -28(1)
-; CHECK-NEXT: stw 7, -32(1)
-; CHECK-NEXT: stw 8, -36(1)
-; CHECK-NEXT: blt 0, .LBB0_4
-; CHECK-NEXT: # BB#3:                                 # %entry
-; CHECK-NEXT: lwz 3, -36(1)
-; CHECK-NEXT: stw 3, -28(1)
-; CHECK-NEXT: .LBB0_4:                                # %entry
-; CHECK-NEXT: lwz 3, -28(1)
-; CHECK-NEXT: lwz 4, -32(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stb 4, 0(5)
-; CHECK-NEXT: lwz 4, -24(1)
-; CHECK-NEXT: lwz 0, -20(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 3, -40(1)
-; CHECK-NEXT: stw 4, -44(1)
-; CHECK-NEXT: blt 0, .LBB0_6
-; CHECK-NEXT: # BB#5:                                 # %entry
-; CHECK-NEXT: lwz 3, -16(1)
-; CHECK-NEXT: stw 3, -44(1)
-; CHECK-NEXT: .LBB0_6:                                # %entry
-; CHECK-NEXT: lwz 3, -44(1)
-; CHECK-NEXT: lwz 4, -40(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stw 4, 4(5)
-  store i64 %x, i64* @var1, align 8
-; CHECK-NEXT: lis 4, var1@ha
-; CHECK-NEXT: lwz 6, 4(3)
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: la 7, var1@l(4)
-; CHECK-NEXT: stw 3, var1@l(4)
-; CHECK-NEXT: stw 6, 4(7)
-  %y = va_arg %struct.__va_list_tag* %ap, double; From f1
-; CHECK-NEXT: lbz 3, 1(5)
-; CHECK-NEXT: lwz 4, 4(5)
-; CHECK-NEXT: lwz 6, 8(5)
-; CHECK-NEXT: slwi 7, 3, 3
-; CHECK-NEXT: add 6, 6, 7
-; CHECK-NEXT: addi 7, 3, 1
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: addi 3, 4, 8
-; CHECK-NEXT: addi 6, 6, 32
-; CHECK-NEXT: mr 8, 4
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -48(1)
-; CHECK-NEXT: stw 4, -52(1)
-; CHECK-NEXT: stw 6, -56(1)
-; CHECK-NEXT: stw 7, -60(1)
-; CHECK-NEXT: stw 3, -64(1)
-; CHECK-NEXT: stw 8, -68(1)
-; CHECK-NEXT: blt 0, .LBB0_8
-; CHECK-NEXT: # BB#7:                                 # %entry
-; CHECK-NEXT: lwz 3, -64(1)
-; CHECK-NEXT: stw 3, -68(1)
-; CHECK-NEXT: .LBB0_8:                                # %entry
-; CHECK-NEXT: lwz 3, -68(1)
-; CHECK-NEXT: lwz 4, -60(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stb 4, 1(5)
-; CHECK-NEXT: lwz 4, -56(1)
-; CHECK-NEXT: lwz 0, -48(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 4, -72(1)
-; CHECK-NEXT: stw 3, -76(1)
-; CHECK-NEXT: blt 0, .LBB0_10
-; CHECK-NEXT: # BB#9:                                 # %entry
-; CHECK-NEXT: lwz 3, -52(1)
-; CHECK-NEXT: stw 3, -72(1)
-; CHECK-NEXT: .LBB0_10:                               # %entry
-; CHECK-NEXT: lwz 3, -72(1)
-; CHECK-NEXT: lwz 4, -76(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stw 4, 4(5)
-; CHECK-NEXT: lfd 0, 0(3)
-  store double %y, double* @var2, align 8
-; CHECK-NEXT: lis 3, var2@ha
-; CHECK-NEXT: stfd 0, var2@l(3)
-  %z = va_arg %struct.__va_list_tag* %ap, i32; From r7
-; CHECK-NEXT: lbz 3, 0(5)
-; CHECK-NEXT: lwz 4, 4(5)
-; CHECK-NEXT: lwz 6, 8(5)
-; CHECK-NEXT: slwi 7, 3, 2
-; CHECK-NEXT: addi 8, 3, 1
-; CHECK-NEXT: cmpwi 0, 3, 8
-; CHECK-NEXT: addi 3, 4, 4
-; CHECK-NEXT: add 6, 6, 7
-; CHECK-NEXT: mr 7, 4
-; CHECK-NEXT: stw 6, -80(1)
-; CHECK-NEXT: stw 8, -84(1)
-; CHECK-NEXT: stw 3, -88(1)
-; CHECK-NEXT: stw 4, -92(1)
-; CHECK-NEXT: stw 7, -96(1)
-; CHECK-NEXT: mfcr 0                          # cr0
-; CHECK-NEXT: stw 0, -100(1)
-; CHECK-NEXT: blt 0, .LBB0_12
-; CHECK-NEXT: # BB#11:                                # %entry
-; CHECK-NEXT: lwz 3, -88(1)
-; CHECK-NEXT: stw 3, -96(1)
-; CHECK-NEXT: .LBB0_12:                               # %entry
-; CHECK-NEXT: lwz 3, -96(1)
-; CHECK-NEXT: lwz 4, -84(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stb 4, 0(5)
-; CHECK-NEXT: lwz 4, -80(1)
-; CHECK-NEXT: lwz 0, -100(1)
-; CHECK-NEXT: mtcrf 128, 0
-; CHECK-NEXT: stw 4, -104(1)
-; CHECK-NEXT: stw 3, -108(1)
-; CHECK-NEXT: blt 0, .LBB0_14
-; CHECK-NEXT: # BB#13:                                # %entry
-; CHECK-NEXT: lwz 3, -92(1)
-; CHECK-NEXT: stw 3, -104(1)
-; CHECK-NEXT: .LBB0_14:                               # %entry
-; CHECK-NEXT: lwz 3, -104(1)
-; CHECK-NEXT: lwz 4, -108(1)
-; CHECK-NEXT: lwz 5, -4(1)
-; CHECK-NEXT: stw 4, 4(5)
-; CHECK-NEXT: lwz 3, 0(3)
-  store i32 %z, i32* @var3, align 4
-; CHECK-NEXT: lis 4, var3@ha
-; CHECK-NEXT: stw 3, var3@l(4)
-  ret void
-; CHECK-NEXT: stw 5, -112(1)
-; CHECK-NEXT: blr
-}
-
diff --git a/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
new file mode 100644
index 000000000000..1fad2fa3aaf5
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s
+
+%0 = type { double, double }
+
+define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
+entry:
+  %a.realp = getelementptr inbounds %0* %a, i32 0, i32 0
+  %a.real = load double* %a.realp
+  %a.imagp = getelementptr inbounds %0* %a, i32 0, i32 1
+  %a.imag = load double* %a.imagp
+  %b.realp = getelementptr inbounds %0* %b, i32 0, i32 0
+  %b.real = load double* %b.realp
+  %b.imagp = getelementptr inbounds %0* %b, i32 0, i32 1
+  %b.imag = load double* %b.imagp
+  %mul.rl = fmul double %a.real, %b.real
+  %mul.rr = fmul double %a.imag, %b.imag
+  %mul.r = fsub double %mul.rl, %mul.rr
+  %mul.il = fmul double %a.imag, %b.real
+  %mul.ir = fmul double %a.real, %b.imag
+  %mul.i = fadd double %mul.il, %mul.ir
+  %c.realp = getelementptr inbounds %0* %c, i32 0, i32 0
+  %c.real = load double* %c.realp
+  %c.imagp = getelementptr inbounds %0* %c, i32 0, i32 1
+  %c.imag = load double* %c.imagp
+  %add.r = fadd double %mul.r, %c.real
+  %add.i = fadd double %mul.i, %c.imag
+  %real = getelementptr inbounds %0* %agg.result, i32 0, i32 0
+  %imag = getelementptr inbounds %0* %agg.result, i32 0, i32 1
+  store double %add.r, double* %real
+  store double %add.i, double* %imag
+  ret void
+; CHECK: fmadd
+}
diff --git a/test/CodeGen/PowerPC/ppc440-msync.ll b/test/CodeGen/PowerPC/ppc440-msync.ll
new file mode 100644
index 000000000000..1274173256cf
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc440-msync.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=ppc32 | FileCheck %s
+; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s -check-prefix=BE-CHK
+
+define i32 @has_a_fence(i32 %a, i32 %b) nounwind {
+entry:
+  fence acquire
+  %cond = icmp eq i32 %a, %b
+  br i1 %cond, label %IfEqual, label %IfUnequal
+
+IfEqual:
+  fence release
+; CHECK: sync
+; CHECK-NOT: msync
+; BE-CHK: msync
+  br label %end
+
+IfUnequal:
+  fence release
+; CHECK: sync
+; CHECK-NOT: msync
+; BE-CHK: msync
+  ret i32 0
+
+end:
+  ret i32 1
+}
+
diff --git a/test/CodeGen/PowerPC/ppc64-ind-call.ll b/test/CodeGen/PowerPC/ppc64-ind-call.ll
new file mode 100644
index 000000000000..d5c4d468c656
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-ind-call.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc64 | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @test1() {
+entry:
+  %call.i75 = call zeroext i8 undef(i8* undef, i8 zeroext 10)
+  unreachable
+}
+
+; CHECK: @test1
+; CHECK: ld 11, 0(3)
+; CHECK: ld 2, 8(3)
+; CHECK: bctrl
+; CHECK: ld 2, 40(1)
+
diff --git a/test/CodeGen/PowerPC/ppc64-linux-func-size.ll b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
new file mode 100644
index 000000000000..e5aa1f169f64
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-linux-func-size.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
+
+; CHECK:      .section	.opd,"aw",@progbits
+; CHECK-NEXT: test1:
+; CHECK-NEXT:	.align 3
+; CHECK-NEXT:	.quad .L.test1
+; CHECK-NEXT:	.quad .TOC.@tocbase
+; CHECK-NEXT:	.text
+; CHECK-NEXT: .L.test1:
+
+define i32 @test1(i32 %a) nounwind {
+entry:
+  ret i32 %a
+}
+
+; Until recently, binutils accepted the .size directive as:
+;  .size	test1, .Ltmp0-test1
+; however, using this directive with recent binutils will result in the error:
+;  .size expression for XXX does not evaluate to a constant
+; so we must use the label which actually tags the start of the function.
+; CHECK: .size	test1, .Ltmp0-.L.test1
diff --git a/test/CodeGen/PowerPC/ppc64-prefetch.ll b/test/CodeGen/PowerPC/ppc64-prefetch.ll
new file mode 100644
index 000000000000..b2f37097f920
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-prefetch.ll
@@ -0,0 +1,15 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define void @test1(i8* %a, ...) nounwind {
+entry:
+  call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1)
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+
+; CHECK: @test1
+; CHECK: dcbt
+
diff --git a/test/CodeGen/PowerPC/ppc64-vaarg-int.ll b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
new file mode 100644
index 000000000000..5a63b01badc9
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-vaarg-int.ll
@@ -0,0 +1,20 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+; RUN: llc < %s | FileCheck %s
+
+define i32 @intvaarg(i32 %a, ...) nounwind {
+entry:
+  %va = alloca i8*, align 8
+  %va1 = bitcast i8** %va to i8*
+  call void @llvm.va_start(i8* %va1)
+  %0 = va_arg i8** %va, i32
+  %sub = sub nsw i32 %a, %0
+  ret i32 %sub
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+; CHECK: @intvaarg
+; Make sure that the va pointer is incremented by 8 (not 4).
+; CHECK: addi{{.*}}, 8
+
diff --git a/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll b/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll
new file mode 100644
index 000000000000..aa7de1618ebb
--- /dev/null
+++ b/test/CodeGen/SPARC/2011-12-03-TailDuplication.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=sparc <%s
+
+define void @foo(i32 %a) nounwind {
+entry:
+  br i1 undef, label %return, label %else.0
+
+else.0:
+  br i1 undef, label %if.end.0, label %return
+
+if.end.0:
+  br i1 undef, label %if.then.1, label %else.1
+
+else.1:
+  %0 = bitcast i8* undef to i8**
+  br label %else.1.2
+
+if.then.1:
+  br i1 undef, label %return, label %return
+
+else.1.2:
+  br i1 undef, label %return, label %return
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/SPARC/dg.exp b/test/CodeGen/SPARC/dg.exp
deleted file mode 100644
index 6c0a9975fe41..000000000000
--- a/test/CodeGen/SPARC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target Sparc] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/SPARC/lit.local.cfg b/test/CodeGen/SPARC/lit.local.cfg
new file mode 100644
index 000000000000..786fee9e6610
--- /dev/null
+++ b/test/CodeGen/SPARC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Sparc' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/SystemZ/00-RetVoid.ll b/test/CodeGen/SystemZ/00-RetVoid.ll
deleted file mode 100644
index 6f3cbac738f8..000000000000
--- a/test/CodeGen/SystemZ/00-RetVoid.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define void @foo() {
-entry:
-    ret void
-}
diff --git a/test/CodeGen/SystemZ/01-RetArg.ll b/test/CodeGen/SystemZ/01-RetArg.ll
deleted file mode 100644
index 8e1ff49c26fa..000000000000
--- a/test/CodeGen/SystemZ/01-RetArg.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    ret i64 %b
-}
diff --git a/test/CodeGen/SystemZ/01-RetImm.ll b/test/CodeGen/SystemZ/01-RetImm.ll
deleted file mode 100644
index 8b99e68dc7e1..000000000000
--- a/test/CodeGen/SystemZ/01-RetImm.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lghi  | count 1
-; RUN: llc < %s -march=systemz | grep llill | count 1
-; RUN: llc < %s -march=systemz | grep llilh | count 1
-; RUN: llc < %s -march=systemz | grep llihl | count 1
-; RUN: llc < %s -march=systemz | grep llihh | count 1
-; RUN: llc < %s -march=systemz | grep lgfi  | count 1
-; RUN: llc < %s -march=systemz | grep llilf | count 1
-; RUN: llc < %s -march=systemz | grep llihf | count 1
-
-
-define i64 @foo1() {
-entry:
-    ret i64 1
-}
-
-define i64 @foo2() {
-entry:
-    ret i64 65535 
-}
-
-define i64 @foo3() {
-entry:
-    ret i64 131072
-}
-
-define i64 @foo4() {
-entry:
-    ret i64 8589934592
-}
-
-define i64 @foo5() {
-entry:
-    ret i64 562949953421312
-}
-
-define i64 @foo6() {
-entry:
-    ret i64 65537
-}
-
-define i64 @foo7() {
-entry:
-    ret i64 4294967295
-}
-
-define i64 @foo8() {
-entry:
-    ret i64 281483566645248
-}
diff --git a/test/CodeGen/SystemZ/02-MemArith.ll b/test/CodeGen/SystemZ/02-MemArith.ll
deleted file mode 100644
index ee9e5e9b5c32..000000000000
--- a/test/CodeGen/SystemZ/02-MemArith.ll
+++ /dev/null
@@ -1,133 +0,0 @@
-; RUN: llc < %s -march=systemz | FileCheck %s
-
-define signext i32 @foo1(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo1:
-; CHECK:  a %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = add i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo2(i32 %a, i32 *%b, i64 %idx) {
-; CHECK: foo2:
-; CHECK:  ay %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = add i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo3(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo3:
-; CHECK:  ag %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = add i64 %a, %c
-    ret i64 %d
-}
-
-define signext i32 @foo4(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo4:
-; CHECK:  n %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = and i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo5(i32 %a, i32 *%b, i64 %idx) {
-; CHECK: foo5:
-; CHECK:  ny %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = and i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo6(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo6:
-; CHECK:  ng %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = and i64 %a, %c
-    ret i64 %d
-}
-
-define signext i32 @foo7(i32 %a, i32 *%b, i64 %idx) {
-; CHECK: foo7:
-; CHECK:  o %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = or i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo8(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo8:
-; CHECK:  oy %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = or i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo9(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo9:
-; CHECK:  og %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = or i64 %a, %c
-    ret i64 %d
-}
-
-define signext i32 @foo10(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo10:
-; CHECK:  x %r2, 4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = xor i32 %a, %c
-    ret i32 %d
-}
-
-define signext i32 @foo11(i32 %a, i32 *%b, i64 %idx)  {
-; CHECK: foo11:
-; CHECK:  xy %r2, -4(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
-    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
-    %c = load i32* %ptr
-    %d = xor i32 %a, %c
-    ret i32 %d
-}
-
-define signext i64 @foo12(i64 %a, i64 *%b, i64 %idx)  {
-; CHECK: foo12:
-; CHECK:  xg %r2, 8(%r1,%r3)
-entry:
-    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
-    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
-    %c = load i64* %ptr
-    %d = xor i64 %a, %c
-    ret i64 %d
-}
diff --git a/test/CodeGen/SystemZ/02-RetAdd.ll b/test/CodeGen/SystemZ/02-RetAdd.ll
deleted file mode 100644
index d5dfa220ad23..000000000000
--- a/test/CodeGen/SystemZ/02-RetAdd.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = add i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetAddImm.ll b/test/CodeGen/SystemZ/02-RetAddImm.ll
deleted file mode 100644
index 40f6cce936bd..000000000000
--- a/test/CodeGen/SystemZ/02-RetAddImm.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = add i64 %a, 1
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetAnd.ll b/test/CodeGen/SystemZ/02-RetAnd.ll
deleted file mode 100644
index b568a57f8bee..000000000000
--- a/test/CodeGen/SystemZ/02-RetAnd.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetAndImm.ll b/test/CodeGen/SystemZ/02-RetAndImm.ll
deleted file mode 100644
index 53c5e54528bd..000000000000
--- a/test/CodeGen/SystemZ/02-RetAndImm.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ngr   | count 4
-; RUN: llc < %s -march=systemz | grep llilh | count 1
-; RUN: llc < %s -march=systemz | grep llihl | count 1
-; RUN: llc < %s -march=systemz | grep llihh | count 1
-
-define i64 @foo1(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 1
-    ret i64 %c
-}
-
-define i64 @foo2(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 131072
-    ret i64 %c
-}
-
-define i64 @foo3(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 8589934592
-    ret i64 %c
-}
-
-define i64 @foo4(i64 %a, i64 %b) {
-entry:
-    %c = and i64 %a, 562949953421312
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetNeg.ll b/test/CodeGen/SystemZ/02-RetNeg.ll
deleted file mode 100644
index 3f6ba2f27fd4..000000000000
--- a/test/CodeGen/SystemZ/02-RetNeg.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lcgr | count 1
-
-define i64 @foo(i64 %a) {
-entry:
-    %c = sub i64 0, %a
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetOr.ll b/test/CodeGen/SystemZ/02-RetOr.ll
deleted file mode 100644
index a1ddb63d04ab..000000000000
--- a/test/CodeGen/SystemZ/02-RetOr.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetOrImm.ll b/test/CodeGen/SystemZ/02-RetOrImm.ll
deleted file mode 100644
index 68cd24d07f44..000000000000
--- a/test/CodeGen/SystemZ/02-RetOrImm.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s -march=systemz | grep oill | count 1
-; RUN: llc < %s -march=systemz | grep oilh | count 1
-; RUN: llc < %s -march=systemz | grep oihl | count 1
-; RUN: llc < %s -march=systemz | grep oihh | count 1
-
-define i64 @foo1(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 1
-    ret i64 %c
-}
-
-define i64 @foo2(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 131072
-    ret i64 %c
-}
-
-define i64 @foo3(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 8589934592
-    ret i64 %c
-}
-
-define i64 @foo4(i64 %a, i64 %b) {
-entry:
-    %c = or i64 %a, 562949953421312
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetSub.ll b/test/CodeGen/SystemZ/02-RetSub.ll
deleted file mode 100644
index 98e1861365fd..000000000000
--- a/test/CodeGen/SystemZ/02-RetSub.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = sub i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetSubImm.ll b/test/CodeGen/SystemZ/02-RetSubImm.ll
deleted file mode 100644
index 8479fbf8656e..000000000000
--- a/test/CodeGen/SystemZ/02-RetSubImm.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llc < %s -march=systemz
-
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = sub i64 %a, 1
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetXor.ll b/test/CodeGen/SystemZ/02-RetXor.ll
deleted file mode 100644
index 4d1adf2f367c..000000000000
--- a/test/CodeGen/SystemZ/02-RetXor.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = xor i64 %a, %b
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/02-RetXorImm.ll b/test/CodeGen/SystemZ/02-RetXorImm.ll
deleted file mode 100644
index 473bbf74f5bc..000000000000
--- a/test/CodeGen/SystemZ/02-RetXorImm.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=systemz
-define i64 @foo(i64 %a, i64 %b) {
-entry:
-    %c = xor i64 %a, 1
-    ret i64 %c
-}
diff --git a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
deleted file mode 100644
index 0a7f5ee22650..000000000000
--- a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ahi   | count 3
-; RUN: llc < %s -march=systemz | grep afi   | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 4
-; RUN: llc < %s -march=systemz | grep llgfr | count 2
-
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = add i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = add i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 1
-    ret i32 %c
-}
-
-define zeroext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 131072
-    ret i32 %c
-}
-
-define signext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, 131072
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetAddSubreg.ll b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
deleted file mode 100644
index 337bb3fcb866..000000000000
--- a/test/CodeGen/SystemZ/03-RetAddSubreg.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ar    | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 2
-; RUN: llc < %s -march=systemz | grep llgfr | count 1
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = add i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = add i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
deleted file mode 100644
index c5326ab536b8..000000000000
--- a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ngr  | count 6
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = and i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = and i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, 131072
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetAndSubreg.ll b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
deleted file mode 100644
index 75dc90a9b00e..000000000000
--- a/test/CodeGen/SystemZ/03-RetAndSubreg.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ngr | count 3
-; RUN: llc < %s -march=systemz | grep nihf | count 1
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = and i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = and i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetArgSubreg.ll b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
deleted file mode 100644
index 476821a61452..000000000000
--- a/test/CodeGen/SystemZ/03-RetArgSubreg.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lgr   | count 2
-; RUN: llc < %s -march=systemz | grep nihf  | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 1
-
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    ret i32 %b
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    ret i32 %b
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    ret i32 %b
-}
diff --git a/test/CodeGen/SystemZ/03-RetImmSubreg.ll b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
deleted file mode 100644
index 70da913edead..000000000000
--- a/test/CodeGen/SystemZ/03-RetImmSubreg.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lghi  | count 2
-; RUN: llc < %s -march=systemz | grep llill | count 1
-; RUN: llc < %s -march=systemz | grep llilh | count 1
-; RUN: llc < %s -march=systemz | grep lgfi  | count 1
-; RUN: llc < %s -march=systemz | grep llilf | count 2
-
-
-define i32 @foo1() {
-entry:
-    ret i32 1
-}
-
-define i32 @foo2() {
-entry:
-    ret i32 65535 
-}
-
-define i32 @foo3() {
-entry:
-    ret i32 131072
-}
-
-define i32 @foo4() {
-entry:
-    ret i32 65537
-}
-
-define i32 @foo5() {
-entry:
-    ret i32 4294967295
-}
-
-define zeroext i32 @foo6()  {
-entry:
-    ret i32 4294967295
-}
-
-define signext i32 @foo7()  {
-entry:
-    ret i32 4294967295
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll b/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
deleted file mode 100644
index 87ebcc1f0a4f..000000000000
--- a/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc < %s -march=systemz | grep lcr | count 1
-
-define i32 @foo(i32 %a) {
-entry:
-    %c = sub i32 0, %a
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
deleted file mode 100644
index 99adea8abbcb..000000000000
--- a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: llc < %s -march=systemz | grep oill  | count 3
-; RUN: llc < %s -march=systemz | grep oilh  | count 3
-; RUN: llc < %s -march=systemz | grep oilf  | count 3
-; RUN: llc < %s -march=systemz | grep llgfr | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 6
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, 131072
-    ret i32 %c
-}
-
-define i32 @foo7(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, 123456
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 1
-    ret i32 %c
-}
-
-define zeroext i32 @foo8(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 123456
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 131072
-    ret i32 %c
-}
-
-define signext i32 @foo9(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, 123456
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetOrSubreg.ll b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
deleted file mode 100644
index 7dab5cacedf3..000000000000
--- a/test/CodeGen/SystemZ/03-RetOrSubreg.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ogr   | count 3
-; RUN: llc < %s -march=systemz | grep nihf  | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 1
-
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = or i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = or i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
deleted file mode 100644
index 21ea9b583799..000000000000
--- a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s -march=systemz | grep ahi   | count 3
-; RUN: llc < %s -march=systemz | grep afi   | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 4
-; RUN: llc < %s -march=systemz | grep llgfr | count 2
-
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = sub i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = sub i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, 131072
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetSubSubreg.ll b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
deleted file mode 100644
index 24b7631d385e..000000000000
--- a/test/CodeGen/SystemZ/03-RetSubSubreg.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s -march=systemz | grep sr    | count 3
-; RUN: llc < %s -march=systemz | grep llgfr | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 2
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = sub i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = sub i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
deleted file mode 100644
index 70ee45415d17..000000000000
--- a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s -march=systemz | grep xilf  | count 9
-; RUN: llc < %s -march=systemz | grep llgfr | count 3
-; RUN: llc < %s -march=systemz | grep lgfr  | count 6
-
-define i32 @foo1(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, 1
-    ret i32 %c
-}
-
-define i32 @foo2(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, 131072
-    ret i32 %c
-}
-
-define i32 @foo7(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, 123456
-    ret i32 %c
-}
-
-define zeroext i32 @foo3(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 1
-    ret i32 %c
-}
-
-define zeroext i32 @foo8(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 123456
-    ret i32 %c
-}
-
-define signext i32 @foo4(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 131072
-    ret i32 %c
-}
-
-define zeroext i32 @foo5(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 1
-    ret i32 %c
-}
-
-define signext i32 @foo6(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 131072
-    ret i32 %c
-}
-
-define signext i32 @foo9(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, 123456
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/03-RetXorSubreg.ll b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
deleted file mode 100644
index 02c4a2a87f7b..000000000000
--- a/test/CodeGen/SystemZ/03-RetXorSubreg.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=systemz | grep xgr   | count 3
-; RUN: llc < %s -march=systemz | grep nihf  | count 1
-; RUN: llc < %s -march=systemz | grep lgfr  | count 1
-
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-    %c = xor i32 %a, %b
-    ret i32 %c
-}
-
-define zeroext i32 @foo1(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, %b
-    ret i32 %c
-}
-
-define signext i32 @foo2(i32 %a, i32 %b)  {
-entry:
-    %c = xor i32 %a, %b
-    ret i32 %c
-}
-
diff --git a/test/CodeGen/SystemZ/04-RetShifts.ll b/test/CodeGen/SystemZ/04-RetShifts.ll
deleted file mode 100644
index cccdc4737f76..000000000000
--- a/test/CodeGen/SystemZ/04-RetShifts.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; RUN: llc < %s -march=systemz | grep sra   | count 6
-; RUN: llc < %s -march=systemz | grep srag  | count 3
-; RUN: llc < %s -march=systemz | grep srl   | count 6
-; RUN: llc < %s -march=systemz | grep srlg  | count 3
-; RUN: llc < %s -march=systemz | grep sll   | count 6
-; RUN: llc < %s -march=systemz | grep sllg  | count 3
-
-define signext i32 @foo1(i32 %a, i32 %idx) nounwind readnone {
-entry:
-	%add = add i32 %idx, 1		; <i32> [#uses=1]
-	%shr = ashr i32 %a, %add		; <i32> [#uses=1]
-	ret i32 %shr
-}
-
-define signext i32 @foo2(i32 %a, i32 %idx) nounwind readnone {
-entry:
-	%add = add i32 %idx, 1		; <i32> [#uses=1]
-	%shr = shl i32 %a, %add		; <i32> [#uses=1]
-	ret i32 %shr
-}
-
-define signext i32 @foo3(i32 %a, i32 %idx) nounwind readnone {
-entry:
-	%add = add i32 %idx, 1		; <i32> [#uses=1]
-	%shr = lshr i32 %a, %add		; <i32> [#uses=1]
-	ret i32 %shr
-}
-
-define signext i64 @foo4(i64 %a, i64 %idx) nounwind readnone {
-entry:
-	%add = add i64 %idx, 1		; <i64> [#uses=1]
-	%shr = ashr i64 %a, %add		; <i64> [#uses=1]
-	ret i64 %shr
-}
-
-define signext i64 @foo5(i64 %a, i64 %idx) nounwind readnone {
-entry:
-	%add = add i64 %idx, 1		; <i64> [#uses=1]
-	%shr = shl i64 %a, %add		; <i64> [#uses=1]
-	ret i64 %shr
-}
-
-define signext i64 @foo6(i64 %a, i64 %idx) nounwind readnone {
-entry:
-	%add = add i64 %idx, 1		; <i64> [#uses=1]
-	%shr = lshr i64 %a, %add		; <i64> [#uses=1]
-	ret i64 %shr
-}
-
-define signext i32 @foo7(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = ashr i32 %a, 1
-        ret i32 %shr
-}
-
-define signext i32 @foo8(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = shl i32 %a, 1
-        ret i32 %shr
-}
-
-define signext i32 @foo9(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = lshr i32 %a, 1
-        ret i32 %shr
-}
-
-define signext i32 @foo10(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = ashr i32 %a, %idx
-        ret i32 %shr
-}
-
-define signext i32 @foo11(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = shl i32 %a, %idx
-        ret i32 %shr
-}
-
-define signext i32 @foo12(i32 %a, i32 %idx) nounwind readnone {
-entry:
-        %shr = lshr i32 %a, %idx
-        ret i32 %shr
-}
-
-define signext i64 @foo13(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = ashr i64 %a, 1
-        ret i64 %shr
-}
-
-define signext i64 @foo14(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = shl i64 %a, 1
-        ret i64 %shr
-}
-
-define signext i64 @foo15(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = lshr i64 %a, 1
-        ret i64 %shr
-}
-
-define signext i64 @foo16(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = ashr i64 %a, %idx
-        ret i64 %shr
-}
-
-define signext i64 @foo17(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = shl i64 %a, %idx
-        ret i64 %shr
-}
-
-define signext i64 @foo18(i64 %a, i64 %idx) nounwind readnone {
-entry:
-        %shr = lshr i64 %a, %idx
-        ret i64 %shr
-}
-
diff --git a/test/CodeGen/SystemZ/05-LoadAddr.ll b/test/CodeGen/SystemZ/05-LoadAddr.ll
deleted file mode 100644
index cf0264283939..000000000000
--- a/test/CodeGen/SystemZ/05-LoadAddr.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s | grep lay | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64* @foo(i64* %a, i64 %idx) nounwind readnone {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	ret i64* %add.ptr2
-}
diff --git a/test/CodeGen/SystemZ/05-MemImmStores.ll b/test/CodeGen/SystemZ/05-MemImmStores.ll
deleted file mode 100644
index 3cf21ccd931a..000000000000
--- a/test/CodeGen/SystemZ/05-MemImmStores.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s -mattr=+z10 | grep mvghi | count 1
-; RUN: llc < %s -mattr=+z10 | grep mvhi  | count 1
-; RUN: llc < %s -mattr=+z10 | grep mvhhi | count 1
-; RUN: llc < %s | grep mvi   | count 2
-; RUN: llc < %s | grep mviy  | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo1(i64* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i64* %a, i64 1		; <i64*> [#uses=1]
-	store i64 1, i64* %add.ptr
-	ret void
-}
-
-define void @foo2(i32* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i32* %a, i64 1		; <i32*> [#uses=1]
-	store i32 2, i32* %add.ptr
-	ret void
-}
-
-define void @foo3(i16* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %a, i64 1		; <i16*> [#uses=1]
-	store i16 3, i16* %add.ptr
-	ret void
-}
-
-define void @foo4(i8* nocapture %a, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i8* %a, i64 1		; <i8*> [#uses=1]
-	store i8 4, i8* %add.ptr
-	ret void
-}
-
-define void @foo5(i8* nocapture %a, i64 %idx) nounwind {
-entry:
-        %add.ptr = getelementptr i8* %a, i64 -1         ; <i8*> [#uses=1]
-        store i8 4, i8* %add.ptr
-        ret void
-}
-
-define void @foo6(i16* nocapture %a, i64 %idx) nounwind {
-entry:
-        %add.ptr = getelementptr i16* %a, i64 -1         ; <i16*> [#uses=1]
-        store i16 3, i16* %add.ptr
-        ret void
-}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/test/CodeGen/SystemZ/05-MemLoadsStores.ll
deleted file mode 100644
index eabeb0a42254..000000000000
--- a/test/CodeGen/SystemZ/05-MemLoadsStores.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: llc < %s | grep ly     | count 2
-; RUN: llc < %s | grep sty    | count 2
-; RUN: llc < %s | grep {l	%}  | count 2
-; RUN: llc < %s | grep {st	%} | count 2
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind {
-entry:
-	%tmp1 = load i32* %foo		; <i32> [#uses=1]
-	store i32 %tmp1, i32* %bar
-	ret void
-}
-
-define void @foo2(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i32* %foo, i64 1		; <i32*> [#uses=1]
-	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %tmp1, i32* %add.ptr5
-	ret void
-}
-
-define void @foo3(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%sub.ptr = getelementptr i32* %foo, i64 -1		; <i32*> [#uses=1]
-	%tmp1 = load i32* %sub.ptr		; <i32> [#uses=1]
-	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
-	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %tmp1, i32* %add.ptr
-	ret void
-}
-
-define void @foo4(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i32* %foo, i64 8192		; <i32*> [#uses=1]
-	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %tmp1, i32* %add.ptr5
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
deleted file mode 100644
index 53bb641cf1eb..000000000000
--- a/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: llc < %s | grep {sthy.%} | count 2
-; RUN: llc < %s | grep {lhy.%}  | count 2
-; RUN: llc < %s | grep {lh.%}   | count 6
-; RUN: llc < %s | grep {sth.%}  | count 2
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind {
-entry:
-	%tmp1 = load i16* %foo		; <i16> [#uses=1]
-	store i16 %tmp1, i16* %bar
-	ret void
-}
-
-define void @foo2(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
-	store i16 %tmp1, i16* %add.ptr5
-	ret void
-}
-
-define void @foo3(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
-	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
-	%add.ptr = getelementptr i16* %bar, i64 %sub.ptr3.sum		; <i16*> [#uses=1]
-	store i16 %tmp1, i16* %add.ptr
-	ret void
-}
-
-define void @foo4(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
-	store i16 %tmp1, i16* %add.ptr5
-	ret void
-}
-
-define void @foo5(i16* nocapture %foo, i32* nocapture %bar) nounwind {
-entry:
-	%tmp1 = load i16* %foo		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	store i32 %conv, i32* %bar
-	ret void
-}
-
-define void @foo6(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %conv, i32* %add.ptr5
-	ret void
-}
-
-define void @foo7(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
-	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
-	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %conv, i32* %add.ptr
-	ret void
-}
-
-define void @foo8(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
-entry:
-	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
-	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
-	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
-	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
-	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
-	store i32 %conv, i32* %add.ptr5
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/05-MemRegLoads.ll b/test/CodeGen/SystemZ/05-MemRegLoads.ll
deleted file mode 100644
index f690a4889962..000000000000
--- a/test/CodeGen/SystemZ/05-MemRegLoads.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; RUN: llc < %s -march=systemz | not grep aghi
-; RUN: llc < %s -march=systemz | grep llgf | count 1
-; RUN: llc < %s -march=systemz | grep llgh | count 1
-; RUN: llc < %s -march=systemz | grep llgc | count 1
-; RUN: llc < %s -march=systemz | grep lgf  | count 2
-; RUN: llc < %s -march=systemz | grep lgh  | count 2
-; RUN: llc < %s -march=systemz | grep lgb  | count 1
-
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define zeroext i64 @foo1(i64* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
-	ret i64 %tmp3
-}
-
-define zeroext i32 @foo2(i32* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
-	ret i32 %tmp3
-}
-
-define zeroext i16 @foo3(i16* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
-	ret i16 %tmp3
-}
-
-define zeroext i8 @foo4(i8* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
-	ret i8 %tmp3
-}
-
-define signext i64 @foo5(i64* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
-	ret i64 %tmp3
-}
-
-define signext i32 @foo6(i32* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
-	ret i32 %tmp3
-}
-
-define signext i16 @foo7(i16* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
-	ret i16 %tmp3
-}
-
-define signext i8 @foo8(i8* nocapture %a, i64 %idx) nounwind readonly {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
-	ret i8 %tmp3
-}
diff --git a/test/CodeGen/SystemZ/05-MemRegStores.ll b/test/CodeGen/SystemZ/05-MemRegStores.ll
deleted file mode 100644
index b851c3fa6e00..000000000000
--- a/test/CodeGen/SystemZ/05-MemRegStores.ll
+++ /dev/null
@@ -1,79 +0,0 @@
-; RUN: llc < %s | not grep aghi
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo1(i64* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-
-; CHECK: foo1:
-; CHECK:   stg %r4, 8(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	store i64 %val, i64* %add.ptr2
-	ret void
-}
-
-define void @foo2(i32* nocapture %a, i64 %idx, i32 %val) nounwind {
-entry:
-; CHECK: foo2:
-; CHECK:   st %r4, 4(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	store i32 %val, i32* %add.ptr2
-	ret void
-}
-
-define void @foo3(i16* nocapture %a, i64 %idx, i16 zeroext %val) nounwind {
-entry:
-; CHECK: foo3:
-; CHECK: sth     %r4, 2(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	store i16 %val, i16* %add.ptr2
-	ret void
-}
-
-define void @foo4(i8* nocapture %a, i64 %idx, i8 zeroext %val) nounwind {
-entry:
-; CHECK: foo4:
-; CHECK: stc     %r4, 1(%r3,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	store i8 %val, i8* %add.ptr2
-	ret void
-}
-
-define void @foo5(i8* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-; CHECK: foo5:
-; CHECK: stc     %r4, 1(%r3,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
-	%conv = trunc i64 %val to i8		; <i8> [#uses=1]
-	store i8 %conv, i8* %add.ptr2
-	ret void
-}
-
-define void @foo6(i16* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-; CHECK: foo6:
-; CHECK: sth     %r4, 2(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
-	%conv = trunc i64 %val to i16		; <i16> [#uses=1]
-	store i16 %conv, i16* %add.ptr2
-	ret void
-}
-
-define void @foo7(i32* nocapture %a, i64 %idx, i64 %val) nounwind {
-entry:
-; CHECK: foo7:
-; CHECK: st      %r4, 4(%r1,%r2)
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
-	%conv = trunc i64 %val to i32		; <i32> [#uses=1]
-	store i32 %conv, i32* %add.ptr2
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/06-CallViaStack.ll b/test/CodeGen/SystemZ/06-CallViaStack.ll
deleted file mode 100644
index e904f49de15f..000000000000
--- a/test/CodeGen/SystemZ/06-CallViaStack.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s | grep 168 | count 1
-; RUN: llc < %s | grep 160 | count 3
-; RUN: llc < %s | grep 328 | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) nounwind {
-entry:
-	%a = alloca i64, align 8		; <i64*> [#uses=3]
-	store i64 %g, i64* %a
-	call void @bar(i64* %a) nounwind
-	%tmp1 = load i64* %a		; <i64> [#uses=1]
-	ret i64 %tmp1
-}
-
-declare void @bar(i64*)
diff --git a/test/CodeGen/SystemZ/06-FrameIdxLoad.ll b/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
deleted file mode 100644
index c71da9b4418c..000000000000
--- a/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s | grep 160 | count 1
-; RUN: llc < %s | grep 168 | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
-entry:
-        ret i64 %f
-}
-
-define i64 @bar(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
-entry:
-	%conv = ptrtoint i64* %g to i64		; <i64> [#uses=1]
-	ret i64 %conv
-}
diff --git a/test/CodeGen/SystemZ/06-LocalFrame.ll b/test/CodeGen/SystemZ/06-LocalFrame.ll
deleted file mode 100644
index d89b0dfc76c9..000000000000
--- a/test/CodeGen/SystemZ/06-LocalFrame.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s | grep 160 | count 1
-; RUN: llc < %s | grep 328 | count 1
-; RUN: llc < %s | grep 168 | count 1
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define noalias i64* @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f) nounwind readnone {
-entry:
-	%g = alloca i64, align 8		; <i64*> [#uses=1]
-	%add.ptr = getelementptr i64* %g, i64 %f		; <i64*> [#uses=1]
-	ret i64* %add.ptr
-}
diff --git a/test/CodeGen/SystemZ/06-SimpleCall.ll b/test/CodeGen/SystemZ/06-SimpleCall.ll
deleted file mode 100644
index fd4b5029c731..000000000000
--- a/test/CodeGen/SystemZ/06-SimpleCall.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo() nounwind {
-entry:
-	tail call void @bar() nounwind
-	ret void
-}
-
-declare void @bar()
diff --git a/test/CodeGen/SystemZ/07-BrCond.ll b/test/CodeGen/SystemZ/07-BrCond.ll
deleted file mode 100644
index 859971713aa3..000000000000
--- a/test/CodeGen/SystemZ/07-BrCond.ll
+++ /dev/null
@@ -1,141 +0,0 @@
-; RUN: llc < %s | grep je  | count 1
-; RUN: llc < %s | grep jne | count 1
-; RUN: llc < %s | grep jhe | count 2
-; RUN: llc < %s | grep jle | count 2
-; RUN: llc < %s | grep jh  | count 4
-; RUN: llc < %s | grep jl  | count 4
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-BrCond32.ll b/test/CodeGen/SystemZ/07-BrCond32.ll
deleted file mode 100644
index 8ece5ac09840..000000000000
--- a/test/CodeGen/SystemZ/07-BrCond32.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; RUN: llc < %s | grep je  | count 1
-; RUN: llc < %s | grep jne | count 1
-; RUN: llc < %s | grep jhe | count 2
-; RUN: llc < %s | grep jle | count 2
-; RUN: llc < %s | grep jh  | count 4
-; RUN: llc < %s | grep jl  | count 4
-
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-BrUnCond.ll b/test/CodeGen/SystemZ/07-BrUnCond.ll
deleted file mode 100644
index ac6067abbee0..000000000000
--- a/test/CodeGen/SystemZ/07-BrUnCond.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-ibm-linux"
-
-define void @foo() noreturn nounwind {
-entry:
-	tail call void @baz() nounwind
-	br label %l1
-
-l1:		; preds = %entry, %l1
-	tail call void @bar() nounwind
-	br label %l1
-}
-
-declare void @bar()
-
-declare void @baz()
diff --git a/test/CodeGen/SystemZ/07-CmpImm.ll b/test/CodeGen/SystemZ/07-CmpImm.ll
deleted file mode 100644
index 4d0ebda0c035..000000000000
--- a/test/CodeGen/SystemZ/07-CmpImm.ll
+++ /dev/null
@@ -1,137 +0,0 @@
-; RUN: llc < %s | grep cgfi | count 8
-; RUN: llc < %s | grep clgfi | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i64 %a, i64 %b) nounwind {
-entry:
-	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i64 %a) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i64 %a) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i64 %a) nounwind {
-entry:
-	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i64 %a) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i64 %a) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i64 %a) nounwind {
-entry:
-	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i64 %a) nounwind {
-entry:
-	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-CmpImm32.ll b/test/CodeGen/SystemZ/07-CmpImm32.ll
deleted file mode 100644
index add34faafd3f..000000000000
--- a/test/CodeGen/SystemZ/07-CmpImm32.ll
+++ /dev/null
@@ -1,139 +0,0 @@
-; RUN: llc < %s | grep jl  | count 3
-; RUN: llc < %s | grep jh  | count 3
-; RUN: llc < %s | grep je  | count 2
-; RUN: llc < %s | grep jne | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define void @foo(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-declare void @bar()
-
-define void @foo1(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo2(i32 %a, i32 %b) nounwind {
-entry:
-	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo3(i32 %a) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo4(i32 %a) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo5(i32 %a) nounwind {
-entry:
-	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo6(i32 %a) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo7(i32 %a) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.then, label %if.end
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo8(i32 %a) nounwind {
-entry:
-	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
-
-define void @foo9(i32 %a) nounwind {
-entry:
-	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
-	br i1 %cmp, label %if.end, label %if.then
-
-if.then:		; preds = %entry
-	tail call void @bar() nounwind
-	ret void
-
-if.end:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/07-SelectCC.ll b/test/CodeGen/SystemZ/07-SelectCC.ll
deleted file mode 100644
index aa4b36e7d5d4..000000000000
--- a/test/CodeGen/SystemZ/07-SelectCC.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s | grep clgr
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
-	%cond = select i1 %cmp, i64 %a, i64 %b		; <i64> [#uses=1]
-	ret i64 %cond
-}
diff --git a/test/CodeGen/SystemZ/08-DivRem.ll b/test/CodeGen/SystemZ/08-DivRem.ll
deleted file mode 100644
index ff1e441882a0..000000000000
--- a/test/CodeGen/SystemZ/08-DivRem.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s | grep dsgr  | count 2
-; RUN: llc < %s | grep dsgfr | count 2
-; RUN: llc < %s | grep dlr   | count 2
-; RUN: llc < %s | grep dlgr  | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @div(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%div = sdiv i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %div
-}
-
-define i32 @div1(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%div = sdiv i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %div
-}
-
-define i64 @div2(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%div = udiv i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %div
-}
-
-define i32 @div3(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%div = udiv i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %div
-}
-
-define i64 @rem(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%rem = srem i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %rem
-}
-
-define i32 @rem1(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%rem = srem i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %rem
-}
-
-define i64 @rem2(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%rem = urem i64 %a, %b		; <i64> [#uses=1]
-	ret i64 %rem
-}
-
-define i32 @rem3(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%rem = urem i32 %a, %b		; <i32> [#uses=1]
-	ret i32 %rem
-}
diff --git a/test/CodeGen/SystemZ/08-DivRemMemOp.ll b/test/CodeGen/SystemZ/08-DivRemMemOp.ll
deleted file mode 100644
index d6ec0e7440ac..000000000000
--- a/test/CodeGen/SystemZ/08-DivRemMemOp.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: llc < %s | grep {dsgf.%} | count 2
-; RUN: llc < %s | grep {dsg.%}  | count 2
-; RUN: llc < %s | grep {dl.%}   | count 2
-; RUN: llc < %s | grep dlg      | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @div(i64 %a, i64* %b) nounwind readnone {
-entry:
-	%b1 = load i64* %b
-	%div = sdiv i64 %a, %b1
-	ret i64 %div
-}
-
-define i64 @div1(i64 %a, i64* %b) nounwind readnone {
-entry:
-        %b1 = load i64* %b
-        %div = udiv i64 %a, %b1
-        ret i64 %div
-}
-
-define i64 @rem(i64 %a, i64* %b) nounwind readnone {
-entry:
-        %b1 = load i64* %b
-        %div = srem i64 %a, %b1
-        ret i64 %div
-}
-
-define i64 @rem1(i64 %a, i64* %b) nounwind readnone {
-entry:
-        %b1 = load i64* %b
-        %div = urem i64 %a, %b1
-        ret i64 %div
-}
-
-define i32 @div2(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = sdiv i32 %a, %b1
-        ret i32 %div
-}
-
-define i32 @div3(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = udiv i32 %a, %b1
-        ret i32 %div
-}
-
-define i32 @rem2(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = srem i32 %a, %b1
-        ret i32 %div
-}
-
-define i32 @rem3(i32 %a, i32* %b) nounwind readnone {
-entry:
-        %b1 = load i32* %b
-        %div = urem i32 %a, %b1
-        ret i32 %div
-}
-
diff --git a/test/CodeGen/SystemZ/08-SimpleMuls.ll b/test/CodeGen/SystemZ/08-SimpleMuls.ll
deleted file mode 100644
index 1ab88d6ee7dd..000000000000
--- a/test/CodeGen/SystemZ/08-SimpleMuls.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s | grep msgr | count 2
-; RUN: llc < %s | grep msr  | count 2
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i64 @foo(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%mul = mul i64 %b, %a		; <i64> [#uses=1]
-	ret i64 %mul
-}
-
-define i64 @foo2(i64 %a, i64 %b) nounwind readnone {
-entry:
-	%mul = mul i64 %b, %a		; <i64> [#uses=1]
-	ret i64 %mul
-}
-
-define i32 @foo3(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%mul = mul i32 %b, %a		; <i32> [#uses=1]
-	ret i32 %mul
-}
-
-define i32 @foo4(i32 %a, i32 %b) nounwind readnone {
-entry:
-	%mul = mul i32 %b, %a		; <i32> [#uses=1]
-	ret i32 %mul
-}
diff --git a/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/test/CodeGen/SystemZ/09-DynamicAlloca.ll
deleted file mode 100644
index 30810ce6eb90..000000000000
--- a/test/CodeGen/SystemZ/09-DynamicAlloca.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @foo(i64 %N) nounwind {
-entry:
-	%N3 = trunc i64 %N to i32		; <i32> [#uses=1]
-	%vla = alloca i8, i32 %N3, align 2		; <i8*> [#uses=1]
-	call void @bar(i8* %vla) nounwind
-	ret void
-}
-
-declare void @bar(i8*)
diff --git a/test/CodeGen/SystemZ/09-Globals.ll b/test/CodeGen/SystemZ/09-Globals.ll
deleted file mode 100644
index 50a26e2a451a..000000000000
--- a/test/CodeGen/SystemZ/09-Globals.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s | grep larl | count 3
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-ibm-linux"
-@bar = common global i64 0, align 8		; <i64*> [#uses=3]
-
-define i64 @foo() nounwind readonly {
-entry:
-	%tmp = load i64* @bar		; <i64> [#uses=1]
-	ret i64 %tmp
-}
-
-define i64* @foo2() nounwind readnone {
-entry:
-	ret i64* @bar
-}
-
-define i64* @foo3(i64 %idx) nounwind readnone {
-entry:
-	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
-	%add.ptr2 = getelementptr i64* @bar, i64 %add.ptr.sum		; <i64*> [#uses=1]
-	ret i64* %add.ptr2
-}
diff --git a/test/CodeGen/SystemZ/09-Switches.ll b/test/CodeGen/SystemZ/09-Switches.ll
deleted file mode 100644
index 32aaa62a58cf..000000000000
--- a/test/CodeGen/SystemZ/09-Switches.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: llc < %s -march=systemz | grep larl
-
-define i32 @main(i32 %tmp158) {
-entry:
-        switch i32 %tmp158, label %bb336 [
-		 i32 -2147483648, label %bb338
-		 i32 -2147483647, label %bb338
-		 i32 -2147483646, label %bb338
-		 i32 120, label %bb338
-		 i32 121, label %bb339
-		 i32 122, label %bb340
-                 i32 123, label %bb341
-                 i32 124, label %bb342
-                 i32 125, label %bb343
-                 i32 126, label %bb336
-		 i32 1024, label %bb338
-                 i32 0, label %bb338
-                 i32 1, label %bb338
-                 i32 2, label %bb338
-                 i32 3, label %bb338
-                 i32 4, label %bb338
-		 i32 5, label %bb338
-        ]
-bb336:
-  ret i32 10
-bb338:
-  ret i32 11
-bb339:
-  ret i32 12
-bb340:
-  ret i32 13
-bb341:
-  ret i32 14
-bb342:
-  ret i32 15
-bb343:
-  ret i32 18
-
-}
diff --git a/test/CodeGen/SystemZ/10-FuncsPic.ll b/test/CodeGen/SystemZ/10-FuncsPic.ll
deleted file mode 100644
index f291e5ff42b6..000000000000
--- a/test/CodeGen/SystemZ/10-FuncsPic.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 3
-; RUN: llc < %s -relocation-model=pic | grep PLT | count 1
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-@ptr = external global void (...)*		; <void (...)**> [#uses=2]
-
-define void @foo1() nounwind {
-entry:
-	store void (...)* @func, void (...)** @ptr
-	ret void
-}
-
-declare void @func(...)
-
-define void @foo2() nounwind {
-entry:
-	tail call void (...)* @func() nounwind
-	ret void
-}
-
-define void @foo3() nounwind {
-entry:
-	%tmp = load void (...)** @ptr		; <void (...)*> [#uses=1]
-	tail call void (...)* %tmp() nounwind
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/10-GlobalsPic.ll b/test/CodeGen/SystemZ/10-GlobalsPic.ll
deleted file mode 100644
index c581ad9c4578..000000000000
--- a/test/CodeGen/SystemZ/10-GlobalsPic.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-@src = external global i32		; <i32*> [#uses=2]
-@dst = external global i32		; <i32*> [#uses=2]
-@ptr = external global i32*		; <i32**> [#uses=2]
-
-define void @foo1() nounwind {
-entry:
-	%tmp = load i32* @src		; <i32> [#uses=1]
-	store i32 %tmp, i32* @dst
-	ret void
-}
-
-define void @foo2() nounwind {
-entry:
-	store i32* @dst, i32** @ptr
-	ret void
-}
-
-define void @foo3() nounwind {
-entry:
-	%tmp = load i32* @src		; <i32> [#uses=1]
-	%tmp1 = load i32** @ptr		; <i32*> [#uses=1]
-	%arrayidx = getelementptr i32* %tmp1, i64 1		; <i32*> [#uses=1]
-	store i32 %tmp, i32* %arrayidx
-	ret void
-}
diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll
deleted file mode 100644
index 1aa9c6799870..000000000000
--- a/test/CodeGen/SystemZ/11-BSwap.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-
-define zeroext i16 @foo(i16 zeroext %a)  {
-	%res = tail call i16 @llvm.bswap.i16(i16 %a)
-	ret i16 %res
-}
-
-define zeroext i32 @foo2(i32 zeroext %a)  {
-; CHECK: foo2:
-; CHECK:  lrvr [[R1:%r.]], %r2
-        %res = tail call i32 @llvm.bswap.i32(i32 %a)
-        ret i32 %res
-}
-
-define zeroext i64 @foo3(i64 %a)  {
-; CHECK: foo3:
-; CHECK:  lrvgr %r2, %r2
-        %res = tail call i64 @llvm.bswap.i64(i64 %a)
-        ret i64 %res
-}
-
-define zeroext i16 @foo4(i16* %b)  {
-	%a = load i16* %b
-        %res = tail call i16 @llvm.bswap.i16(i16 %a)
-        ret i16 %res
-}
-
-define zeroext i32 @foo5(i32* %b)  {
-; CHECK: foo5:
-; CHECK:  lrv [[R1:%r.]], 0(%r2)
-	%a = load i32* %b
-        %res = tail call i32 @llvm.bswap.i32(i32 %a)
-        ret i32 %res
-}
-
-define i64 @foo6(i64* %b) {
-; CHECK: foo6:
-; CHECK:  lrvg %r2, 0(%r2)
-	%a = load i64* %b
-        %res = tail call i64 @llvm.bswap.i64(i64 %a)
-        ret i64 %res
-}
-
-define void @foo7(i16 %a, i16* %b) {
-        %res = tail call i16 @llvm.bswap.i16(i16 %a)
-        store i16 %res, i16* %b
-        ret void
-}
-
-define void @foo8(i32 %a, i32* %b) {
-; CHECK: foo8:
-; CHECK:  strv %r2, 0(%r3)
-        %res = tail call i32 @llvm.bswap.i32(i32 %a)
-        store i32 %res, i32* %b
-        ret void
-}
-
-define void @foo9(i64 %a, i64* %b) {
-; CHECK: foo9:
-; CHECK:  strvg %r2, 0(%r3)
-        %res = tail call i64 @llvm.bswap.i64(i64 %a)
-        store i64 %res, i64* %b
-        ret void
-}
-
-declare i16 @llvm.bswap.i16(i16) nounwind readnone
-declare i32 @llvm.bswap.i32(i32) nounwind readnone
-declare i64 @llvm.bswap.i64(i64) nounwind readnone
-
diff --git a/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll b/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
deleted file mode 100644
index 65f8e14a9ee1..000000000000
--- a/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-unknown-linux-gnu"
-
-define i32 @main() nounwind {
-entry:
-	%call = call i32 (...)* @random() nounwind		; <i32> [#uses=0]
-	unreachable
-}
-
-declare i32 @random(...)
diff --git a/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll b/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
deleted file mode 100644
index 3cfa97dfc2ba..000000000000
--- a/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=systemz | grep nilf | count 1
-; RUN: llc < %s -march=systemz | grep nill | count 1
-
-define i32 @gnu_dev_major(i64 %__dev) nounwind readnone {
-entry:
-        %shr = lshr i64 %__dev, 8               ; <i64> [#uses=1]
-        %shr8 = trunc i64 %shr to i32           ; <i32> [#uses=1]
-        %shr2 = lshr i64 %__dev, 32             ; <i64> [#uses=1]
-        %conv = trunc i64 %shr2 to i32          ; <i32> [#uses=1]
-        %and3 = and i32 %conv, -4096            ; <i32> [#uses=1]
-        %and6 = and i32 %shr8, 4095             ; <i32> [#uses=1]
-        %conv5 = or i32 %and6, %and3            ; <i32> [#uses=1]
-        ret i32 %conv5
-}
diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
deleted file mode 100644
index 54424e18f68b..000000000000
--- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=systemz | grep rll
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
-entry:
-	%shl = shl i32 %x, 1		; <i32> [#uses=1]
-	%sub = sub i32 32, 1		; <i32> [#uses=1]
-	%shr = lshr i32 %x, %sub		; <i32> [#uses=1]
-	%or = or i32 %shr, %shl		; <i32> [#uses=1]
-	ret i32 %or
-}
diff --git a/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll b/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
deleted file mode 100644
index 5f6ec50df6c4..000000000000
--- a/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
-target triple = "s390x-ibm-linux"
-	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i8*, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
-	%struct.re_registers = type <{ i32, i8, i8, i8, i8, i32*, i32* }>
-
-define i32 @xre_search_2(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %startpos, i32 %range, %struct.re_registers* %regs, i32 %stop) nounwind {
-entry:
-	%cmp17.i = icmp slt i32 undef, %startpos		; <i1> [#uses=1]
-	%or.cond.i = or i1 undef, %cmp17.i		; <i1> [#uses=1]
-	br i1 %or.cond.i, label %byte_re_search_2.exit, label %if.then20.i
-
-if.then20.i:		; preds = %entry
-	ret i32 -2
-
-byte_re_search_2.exit:		; preds = %entry
-	ret i32 -1
-}
diff --git a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
deleted file mode 100644
index 89b22251eb23..000000000000
--- a/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind {
-entry:
-	br label %for.body38
-
-for.body38:		; preds = %for.body38, %entry
-	br i1 undef, label %for.cond220, label %for.body38
-
-for.cond220:		; preds = %for.cond220, %for.body38
-	br i1 false, label %for.cond220, label %for.end297
-
-for.end297:		; preds = %for.cond220
-	%tmp334 = load i8* undef		; <i8> [#uses=1]
-	%conv343 = zext i8 %tmp334 to i32		; <i32> [#uses=1]
-	%sub344 = add i32 %conv343, -1		; <i32> [#uses=1]
-	%shl345 = shl i32 1, %sub344		; <i32> [#uses=1]
-	%conv346 = sext i32 %shl345 to i64		; <i64> [#uses=1]
-	br label %for.body356
-
-for.body356:		; preds = %for.body356, %for.end297
-	%mask.1633 = phi i64 [ %conv346, %for.end297 ], [ undef, %for.body356 ]		; <i64> [#uses=0]
-	br label %for.body356
-}
diff --git a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
deleted file mode 100644
index 68ccb848980c..000000000000
--- a/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind {
-entry:
-	br i1 undef, label %for.body, label %return
-
-for.body:		; preds = %entry
-	%add = add i32 0, %col		; <i32> [#uses=1]
-	%sh_prom = zext i32 %add to i64		; <i64> [#uses=1]
-	%shl = shl i64 1, %sh_prom		; <i64> [#uses=1]
-	br i1 undef, label %if.then13, label %if.else
-
-if.then13:		; preds = %for.body
-	ret i32 0
-
-if.else:		; preds = %for.body
-	%or34 = or i64 undef, %shl		; <i64> [#uses=0]
-	ret i32 0
-
-return:		; preds = %entry
-	ret i32 1
-}
diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
deleted file mode 100644
index 92f54675b72b..000000000000
--- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; RUN: llc < %s -regalloc=basic | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind
-
-declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind
-
-define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind {
-; CHECK: lg %r{{[0-9]+}}, 328(%r15)
-
-newFuncRoot:
-	br label %bb3
-
-bb4.exitStub:		; preds = %bb3
-	store double %call, double* %call.out
-	ret void
-
-bb3:		; preds = %newFuncRoot
-	tail call void @rdft(i32 signext %nfft, i32 signext -1, double* %arrayidx44.reload, i32* %ip, double* %w) nounwind
-	%call = tail call double @mp_mul_d2i_test(i32 signext %radix, i32 signext %nfft, double* %tmpfft)		; <double> [#uses=1]
-	br label %bb4.exitStub
-}
diff --git a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
deleted file mode 100644
index f4e176eb4421..000000000000
--- a/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define float @foo(i32 signext %a) {
-entry:
-    %b = bitcast i32 %a to float
-    ret float %b
-}
-
-define i32 @bar(float %a) {
-entry:
-    %b = bitcast float %a to i32
-    ret i32 %b
-}
diff --git a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
deleted file mode 100644
index 63fd8553b32e..000000000000
--- a/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llc < %s
-
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
-target triple = "s390x-ibm-linux"
-
-define signext i32 @dfg_parse() nounwind {
-entry:
-	br i1 undef, label %if.then2208, label %if.else2360
-
-if.then2208:		; preds = %entry
-	br i1 undef, label %bb.nph3189, label %for.end2270
-
-bb.nph3189:		; preds = %if.then2208
-	unreachable
-
-for.end2270:		; preds = %if.then2208
-	%call2279 = call i64 @strlen(i8* undef) nounwind		; <i64> [#uses=1]
-	%add2281 = add i64 0, %call2279		; <i64> [#uses=1]
-	%tmp2283 = trunc i64 %add2281 to i32		; <i32> [#uses=1]
-	%tmp2284 = alloca i8, i32 %tmp2283, align 2		; <i8*> [#uses=1]
-	%yyd.0.i2561.13 = getelementptr i8* %tmp2284, i64 13		; <i8*> [#uses=1]
-	store i8 117, i8* %yyd.0.i2561.13
-	br label %while.cond.i2558
-
-while.cond.i2558:		; preds = %while.cond.i2558, %for.end2270
-	br label %while.cond.i2558
-
-if.else2360:		; preds = %entry
-	unreachable
-}
-
-declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll b/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
deleted file mode 100644
index f7686f14da9e..000000000000
--- a/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
-target triple = "s390x-ibm-linux-gnu"
-
-@__JCR_LIST__ = internal global [0 x i8*] zeroinitializer, section ".jcr", align 8 ; <[0 x i8*]*> [#uses=1]
-
-define internal void @frame_dummy() nounwind {
-entry:
-  %asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind ; <void (i8*)*> [#uses=2]
-  %0 = icmp eq void (i8*)* %asmtmp, null          ; <i1> [#uses=1]
-  br i1 %0, label %return, label %bb3
-
-bb3:                                              ; preds = %entry
-  tail call void %asmtmp(i8* bitcast ([0 x i8*]* @__JCR_LIST__ to i8*)) nounwind
-  ret void
-
-return:                                           ; preds = %entry
-  ret void
-}
-
-declare extern_weak void @_Jv_RegisterClasses(i8*)
diff --git a/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll b/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
deleted file mode 100644
index fde7d9d281c2..000000000000
--- a/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
-target triple = "s390x-ibm-linux-gnu"
-
-define double @foo(double %a, double %b) nounwind {
-entry:
-; CHECK: cpsdr %f0, %f2, %f0
-  %0 = tail call double @copysign(double %a, double %b) nounwind readnone
-  ret double %0
-}
-
-define float @bar(float %a, float %b) nounwind {
-entry:
-; CHECK: cpsdr %f0, %f2, %f0
-  %0 = tail call float @copysignf(float %a, float %b) nounwind readnone
-  ret float %0
-}
-
-
-declare double @copysign(double, double) nounwind readnone
-declare float @copysignf(float, float) nounwind readnone
diff --git a/test/CodeGen/SystemZ/2010-01-04-DivMem.ll b/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
deleted file mode 100644
index d730beca245b..000000000000
--- a/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: llc < %s
-target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16-n32:64"
-target triple = "s390x-elf"
-
-@REGISTER = external global [10 x i32]            ; <[10 x i32]*> [#uses=2]
-
-define void @DIVR_P(i32 signext %PRINT_EFFECT) nounwind {
-entry:
-  %REG1 = alloca i32, align 4                     ; <i32*> [#uses=2]
-  %REG2 = alloca i32, align 4                     ; <i32*> [#uses=2]
-  %call = call signext i32 (...)* @FORMAT2(i32* %REG1, i32* %REG2) nounwind ; <i32> [#uses=0]
-  %tmp = load i32* %REG1                          ; <i32> [#uses=1]
-  %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
-  %arrayidx = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom ; <i32*> [#uses=2]
-  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=2]
-  %tmp2 = load i32* %REG2                         ; <i32> [#uses=1]
-  %idxprom3 = sext i32 %tmp2 to i64               ; <i64> [#uses=1]
-  %arrayidx4 = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom3 ; <i32*> [#uses=3]
-  %tmp5 = load i32* %arrayidx4                    ; <i32> [#uses=3]
-  %cmp6 = icmp sgt i32 %tmp5, 8388607             ; <i1> [#uses=1]
-  %REG2_SIGN.0 = select i1 %cmp6, i32 -1, i32 1   ; <i32> [#uses=2]
-  %cmp10 = icmp eq i32 %REG2_SIGN.0, 1            ; <i1> [#uses=1]
-  %not.cmp = icmp slt i32 %tmp1, 8388608          ; <i1> [#uses=2]
-  %or.cond = and i1 %cmp10, %not.cmp              ; <i1> [#uses=1]
-  br i1 %or.cond, label %if.then13, label %if.end25
-
-if.then13:                                        ; preds = %entry
-  %div = sdiv i32 %tmp5, %tmp1                    ; <i32> [#uses=2]
-  store i32 %div, i32* %arrayidx4
-  br label %if.end25
-
-if.end25:                                         ; preds = %if.then13, %entry
-  %tmp35 = phi i32 [ %div, %if.then13 ], [ %tmp5, %entry ] ; <i32> [#uses=1]
-  %cmp27 = icmp eq i32 %REG2_SIGN.0, -1           ; <i1> [#uses=1]
-  %or.cond46 = and i1 %cmp27, %not.cmp            ; <i1> [#uses=1]
-  br i1 %or.cond46, label %if.then31, label %if.end45
-
-if.then31:                                        ; preds = %if.end25
-  %sub = sub i32 16777216, %tmp35                 ; <i32> [#uses=1]
-  %tmp39 = load i32* %arrayidx                    ; <i32> [#uses=1]
-  %div40 = udiv i32 %sub, %tmp39                  ; <i32> [#uses=1]
-  %sub41 = sub i32 16777216, %div40               ; <i32> [#uses=1]
-  store i32 %sub41, i32* %arrayidx4
-  ret void
-
-if.end45:                                         ; preds = %if.end25
-  ret void
-}
-
-declare signext i32 @FORMAT2(...)
diff --git a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll b/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
deleted file mode 100644
index c2877ac55ed1..000000000000
--- a/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
-; Check that DEBUG_VALUE comments come through on a variety of targets.
-
-define i32 @main() nounwind ssp {
-entry:
-; CHECK: DEBUG_VALUE
-  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
-  ret i32 0, !dbg !10
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.sp = !{!0}
-
-!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 0}
-!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
-!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
-!9 = metadata !{i32 3, i32 11, metadata !8, null}
-!10 = metadata !{i32 4, i32 2, metadata !8, null}
-
diff --git a/test/CodeGen/SystemZ/dg.exp b/test/CodeGen/SystemZ/dg.exp
deleted file mode 100644
index e9624bac68e9..000000000000
--- a/test/CodeGen/SystemZ/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target SystemZ] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
index 2890c22ce6ca..ed55bb5dcf89 100644
--- a/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
+++ b/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
@@ -1,11 +1,7 @@
-; DISABLED: llc -mtriple=thumbv6-apple-darwin < %s
-; RUN: false
+; RUN: llc -mtriple=thumbv6-apple-darwin < %s
 ; rdar://problem/9416774
 ; ModuleID = 'reduced.ll'
 
-; byval is currently unsupported.
-; XFAIL: *
-
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-ios"
 
diff --git a/test/CodeGen/Thumb/dg.exp b/test/CodeGen/Thumb/dg.exp
deleted file mode 100644
index 3ff359aab39b..000000000000
--- a/test/CodeGen/Thumb/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index fbacabaedc35..f8c438c6e0a4 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-apple-ios | FileCheck %s
 
 define void @test1() {
 ; CHECK: test1:
diff --git a/test/CodeGen/Thumb/lit.local.cfg b/test/CodeGen/Thumb/lit.local.cfg
new file mode 100644
index 000000000000..cb77b09ef4ad
--- /dev/null
+++ b/test/CodeGen/Thumb/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index c2ba208e4ae2..50a1a0728846 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -13,9 +13,9 @@ entry:
 
 bb:             ; preds = %bb, %entry
         %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp5, %bb ]              ; <i32> [#uses=2]
-        %tmp = volatile load i8** %va           ; <i8*> [#uses=2]
+        %tmp = load volatile i8** %va           ; <i8*> [#uses=2]
         %tmp2 = getelementptr i8* %tmp, i32 4           ; <i8*> [#uses=1]
-        volatile store i8* %tmp2, i8** %va
+        store volatile i8* %tmp2, i8** %va
         %tmp5 = add i32 %a_addr.0, -1           ; <i32> [#uses=1]
         %tmp.upgrd.2 = icmp eq i32 %a_addr.0, 1         ; <i1> [#uses=1]
         br i1 %tmp.upgrd.2, label %bb7, label %bb
diff --git a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
index 4e1394ff2732..4616dcfe3e4e 100644
--- a/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
+++ b/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+vfp2,+thumb2 | FileCheck %s
 ; rdar://7076238
 
 @"\01LC" = external constant [36 x i8], align 1		; <[36 x i8]*> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
index 034a28f003db..524e5a6b7b68 100644
--- a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
+++ b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
@@ -5,7 +5,7 @@ define void @fred(i32 %three_by_three, i8* %in, double %dt1, i32 %x_size, i32 %y
 entry:
 ; -- The loop following the load should only use a single add-literation
 ;    instruction.
-; CHECK: ldr.64
+; CHECK: vldr
 ; CHECK: adds r{{[0-9]+.*}}#1
 ; CHECK-NOT: adds
 ; CHECK: subsections_via_symbols
diff --git a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
index bb734aca4e68..fcf1bae796b0 100644
--- a/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ b/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -21,7 +21,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
 entry:
   %tmp1 = getelementptr inbounds %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
-  volatile store i32 1, i32* %tmp1, align 4
+  store volatile i32 1, i32* %tmp1, align 4
   %tmp12 = getelementptr inbounds %s1* %this, i32 0, i32 1
   store i32 %levels, i32* %tmp12, align 4
   %tmp13 = getelementptr inbounds %s1* %this, i32 0, i32 3
@@ -46,7 +46,7 @@ entry:
   %tmp24 = shl i32 %flags.0, 16
   %asmtmp.i.i.i = tail call %0 asm sideeffect "\0A0:\09ldrex $1, [$2]\0A\09orr $1, $1, $3\0A\09strex $0, $1, [$2]\0A\09cmp $0, #0\0A\09bne 0b", "=&r,=&r,r,r,~{memory},~{cc}"(i32* %tmp1, i32 %tmp24) nounwind
   %tmp25 = getelementptr inbounds %s1* %this, i32 0, i32 2, i32 0, i32 0
-  volatile store i32 1, i32* %tmp25, align 4
+  store volatile i32 1, i32* %tmp25, align 4
   %tmp26 = icmp eq i32 %levels, 0
   br i1 %tmp26, label %return, label %bb4
 
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
index 01fb0a581a5b..06762bad854f 100644
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -23,7 +23,7 @@ entry:
   %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
 ; Constant pool load followed by add.
 ; Then clobber the loaded register, not the sum.
-; CHECK: vldr.64 [[LDR:d.*]],
+; CHECK: vldr [[LDR:d.*]],
 ; CHECK: LPC0_0:
 ; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
 ; CHECK-NOT: vmov.f64 [[ADD]]
diff --git a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
index d2140a10048d..5cb266b11b0c 100644
--- a/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
+++ b/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -1,5 +1,5 @@
 ; rdar://8465407
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
 %struct.buf = type opaque
 
diff --git a/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
new file mode 100644
index 000000000000..dadbdc5ced2f
--- /dev/null
+++ b/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 
+
+%struct.LIST_NODE.0.16 = type { %struct.LIST_NODE.0.16*, i8* }
+
+define %struct.LIST_NODE.0.16* @list_AssocListPair(%struct.LIST_NODE.0.16* %List, i8* %Key) nounwind readonly {
+entry:
+  br label %bb3
+
+bb:                                               ; preds = %bb3
+  %Scan.0.idx7.val = load i8** undef, align 4
+  %.idx = getelementptr i8* %Scan.0.idx7.val, i32 4
+  %0 = bitcast i8* %.idx to i8**
+  %.idx.val = load i8** %0, align 4
+  %1 = icmp eq i8* %.idx.val, %Key
+  br i1 %1, label %bb5, label %bb2
+
+bb2:                                              ; preds = %bb
+  %Scan.0.idx8.val = load %struct.LIST_NODE.0.16** undef, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %entry
+  %Scan.0 = phi %struct.LIST_NODE.0.16* [ %List, %entry ], [ %Scan.0.idx8.val, %bb2 ]
+  %2 = icmp eq %struct.LIST_NODE.0.16* %Scan.0, null
+  br i1 %2, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb3, %bb
+  ret %struct.LIST_NODE.0.16* null
+}
diff --git a/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
new file mode 100644
index 000000000000..4acdd9e19ed8
--- /dev/null
+++ b/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s
+; rdar://10676853
+
+%struct.Dict_node_struct = type { i8*, %struct.Word_file_struct*, %struct.Exp_struct*, %struct.Dict_node_struct*, %struct.Dict_node_struct* }
+%struct.Word_file_struct = type { [60 x i8], i32, %struct.Word_file_struct* }
+%struct.Exp_struct = type { i8, i8, i8, i8, %union.anon }
+%union.anon = type { %struct.E_list_struct* }
+%struct.E_list_struct = type { %struct.E_list_struct*, %struct.Exp_struct* }
+
+@lookup_list = external hidden unnamed_addr global %struct.Dict_node_struct*, align 4
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define hidden fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %dn, i8* nocapture %s) nounwind ssp {
+; CHECK: rdictionary_lookup:
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then10, %entry
+  %dn.tr = phi %struct.Dict_node_struct* [ %dn, %entry ], [ %9, %if.then10 ]
+  %cmp = icmp eq %struct.Dict_node_struct* %dn.tr, null
+  br i1 %cmp, label %if.end11, label %if.end
+
+if.end:                                           ; preds = %tailrecurse
+  %string = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 0
+  %0 = load i8** %string, align 4
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.body.i, %if.end
+  %1 = phi i8* [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ]
+  %storemerge.i = phi i8* [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ]
+  %2 = load i8* %1, align 1
+  %cmp.i = icmp eq i8 %2, 0
+  %.pre.i = load i8* %storemerge.i, align 1
+  br i1 %cmp.i, label %lor.lhs.false.i, label %land.end.i
+
+land.end.i:                                       ; preds = %while.cond.i
+  %cmp4.i = icmp eq i8 %2, %.pre.i
+  br i1 %cmp4.i, label %while.body.i, label %while.end.i
+
+while.body.i:                                     ; preds = %land.end.i
+  %incdec.ptr.i = getelementptr inbounds i8* %1, i32 1
+  %incdec.ptr6.i = getelementptr inbounds i8* %storemerge.i, i32 1
+  br label %while.cond.i
+
+while.end.i:                                      ; preds = %land.end.i
+  %cmp8.i = icmp eq i8 %2, 42
+  br i1 %cmp8.i, label %if.end3, label %lor.lhs.false.i
+
+lor.lhs.false.i:                                  ; preds = %while.end.i, %while.cond.i
+  %3 = phi i8 [ %2, %while.end.i ], [ 0, %while.cond.i ]
+  %cmp11.i = icmp eq i8 %.pre.i, 42
+  br i1 %cmp11.i, label %if.end3, label %dict_match.exit
+
+dict_match.exit:                                  ; preds = %lor.lhs.false.i
+  %cmp14.i = icmp eq i8 %3, 46
+  %conv16.i = sext i8 %3 to i32
+  %.conv16.i = select i1 %cmp14.i, i32 0, i32 %conv16.i
+  %cmp18.i = icmp eq i8 %.pre.i, 46
+  %conv22.i = sext i8 %.pre.i to i32
+  %cond24.i = select i1 %cmp18.i, i32 0, i32 %conv22.i
+  %sub.i = sub nsw i32 %.conv16.i, %cond24.i
+  %cmp1 = icmp sgt i32 %sub.i, -1
+  br i1 %cmp1, label %if.end3, label %if.then10
+
+if.end3:                                          ; preds = %dict_match.exit, %lor.lhs.false.i, %while.end.i
+; CHECK: %if.end3
+; CHECK: cmp
+; CHECK-NOT: cbnz
+  %storemerge1.i3 = phi i32 [ %sub.i, %dict_match.exit ], [ 0, %lor.lhs.false.i ], [ 0, %while.end.i ]
+  %right = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 4
+  %4 = load %struct.Dict_node_struct** %right, align 4
+  tail call fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %4, i8* %s)
+  %cmp4 = icmp eq i32 %storemerge1.i3, 0
+  br i1 %cmp4, label %if.then5, label %if.end8
+
+if.then5:                                         ; preds = %if.end3
+  %call6 = tail call fastcc i8* @xalloc(i32 20)
+  %5 = bitcast i8* %call6 to %struct.Dict_node_struct*
+  %6 = bitcast %struct.Dict_node_struct* %dn.tr to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call6, i8* %6, i32 16, i32 4, i1 false)
+  %7 = load %struct.Dict_node_struct** @lookup_list, align 4
+  %right7 = getelementptr inbounds i8* %call6, i32 16
+  %8 = bitcast i8* %right7 to %struct.Dict_node_struct**
+  store %struct.Dict_node_struct* %7, %struct.Dict_node_struct** %8, align 4
+  store %struct.Dict_node_struct* %5, %struct.Dict_node_struct** @lookup_list, align 4
+  br label %if.then10
+
+if.end8:                                          ; preds = %if.end3
+  %cmp9 = icmp slt i32 %storemerge1.i3, 1
+  br i1 %cmp9, label %if.then10, label %if.end11
+
+if.then10:                                        ; preds = %if.end8, %if.then5, %dict_match.exit
+  %left = getelementptr inbounds %struct.Dict_node_struct* %dn.tr, i32 0, i32 3
+  %9 = load %struct.Dict_node_struct** %left, align 4
+  br label %tailrecurse
+
+if.end11:                                         ; preds = %if.end8, %tailrecurse
+  ret void
+}
+
+; Materializable
+declare hidden fastcc i8* @xalloc(i32) nounwind ssp
diff --git a/test/CodeGen/Thumb2/aligned-constants.ll b/test/CodeGen/Thumb2/aligned-constants.ll
new file mode 100644
index 000000000000..16b3a193c9d7
--- /dev/null
+++ b/test/CodeGen/Thumb2/aligned-constants.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; The double in the constant pool is 8-byte aligned, forcing the function
+; alignment.
+; CHECK: .align 3
+; CHECK: func
+;
+; Constant pool with 8-byte entry before 4-byte entry:
+; CHECK: .align 3
+; CHECK: LCPI
+; CHECK:	.long	2370821947
+; CHECK:	.long	1080815255
+; CHECK: LCPI
+; CHECK:	.long	1123477881
+define void @func(float* nocapture %x, double* nocapture %y) nounwind ssp {
+entry:
+  %0 = load float* %x, align 4
+  %add = fadd float %0, 0x405EDD2F20000000
+  store float %add, float* %x, align 4
+  %1 = load double* %y, align 4
+  %add1 = fadd double %1, 2.234560e+02
+  store double %add1, double* %y, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/aligned-spill.ll b/test/CodeGen/Thumb2/aligned-spill.ll
new file mode 100644
index 000000000000..c98ca8098583
--- /dev/null
+++ b/test/CodeGen/Thumb2/aligned-spill.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=0 | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=1 | FileCheck %s --check-prefix=NEON
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; CHECK: f
+; This function is forced to spill a double.
+; Verify that the spill slot is properly aligned.
+;
+; The caller-saved r4 is used as a scratch register for stack realignment.
+; CHECK: push {r4, r7, lr}
+; CHECK: bic r4, r4, #7
+; CHECK: mov sp, r4
+define void @f(double* nocapture %p) nounwind ssp {
+entry:
+  %0 = load double* %p, align 4
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  tail call void @g() nounwind
+  store double %0, double* %p, align 4
+  ret void
+}
+
+; NEON: f
+; NEON: push {r4, r7, lr}
+; NEON: sub.w r4, sp, #64
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
+; NEON: vst1.64 {d12, d13, d14, d15}, [r4, :128]
+; Stack pointer adjustment for the stack frame contents.
+; This could legally happen before the spills.
+; Since the spill slot is only 8 bytes, technically it would be fine to only
+; subtract #8 here. That would leave sp less aligned than some stack slots,
+; and would probably blow MFI's mind.
+; NEON: sub sp, #16
+; The epilog is free to use another scratch register than r4.
+; NEON: add r[[R4:[0-9]+]], sp, #16
+; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]], :128]!
+; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]], :128]
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: pop
+
+declare void @g()
+
+; Spill 7 d-registers.
+define void @f7(double* nocapture %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind
+  ret void
+}
+
+; NEON: f7
+; NEON: push {r4, r7, lr}
+; NEON: sub.w r4, sp, #56
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9, d10, d11}, [r4, :128]!
+; NEON: vst1.64 {d12, d13}, [r4, :128]
+; NEON: vstr d14, [r4, #16]
+; Epilog
+; NEON: vld1.64 {d8, d9, d10, d11},
+; NEON: vld1.64 {d12, d13},
+; NEON: vldr d14,
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: pop
+
+; Spill 7 d-registers, leave a hole.
+define void @f3plus4(double* nocapture %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind
+  ret void
+}
+
+; Aligned spilling only works for contiguous ranges starting from d8.
+; The rest goes to the standard vpush instructions.
+; NEON: f3plus4
+; NEON: push {r4, r7, lr}
+; NEON: vpush {d12, d13, d14, d15}
+; NEON: sub.w r4, sp, #24
+; NEON: bic r4, r4, #15
+; Stack pointer must be updated before the spills.
+; NEON: mov sp, r4
+; NEON: vst1.64 {d8, d9}, [r4, :128]
+; NEON: vstr d10, [r4, #16]
+; Epilog
+; NEON: vld1.64 {d8, d9},
+; NEON: vldr d10, [{{.*}}, #16]
+; The stack pointer restore must happen after the reloads.
+; NEON: mov sp,
+; NEON: vpop {d12, d13, d14, d15}
+; NEON: pop
diff --git a/test/CodeGen/Thumb2/constant-islands.ll b/test/CodeGen/Thumb2/constant-islands.ll
new file mode 100644
index 000000000000..19d23851da8a
--- /dev/null
+++ b/test/CodeGen/Thumb2/constant-islands.ll
@@ -0,0 +1,1400 @@
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O0 -filetype=obj -o %t.o
+; RUN: llc < %s -march=arm   -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 -O2 -filetype=obj -o %t.o
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios"
+
+; This function comes from the Bullet test.  It is quite big, and exercises the
+; constant island pass a bit.  It has caused failures, including
+; <rdar://problem/10670199>
+;
+; It is unlikely that this code will continue to create the exact conditions
+; that broke the arm constant island pass in the past, but it is still useful to
+; force the pass to split basic blocks etc.
+;
+; The run lines above force the integrated assembler to be enabled so it can
+; catch any illegal displacements.  Other than that, we depend on the constant
+; island pass assertions.
+
+%class.btVector3 = type { [4 x float] }
+%class.btTransform = type { %class.btMatrix3x3, %class.btVector3 }
+%class.btMatrix3x3 = type { [3 x %class.btVector3] }
+%class.btCapsuleShape = type { %class.btConvexInternalShape, i32 }
+%class.btConvexInternalShape = type { %class.btConvexShape, %class.btVector3, %class.btVector3, float, float }
+%class.btConvexShape = type { %class.btCollisionShape }
+%class.btCollisionShape = type { i32 (...)**, i32, i8* }
+%class.RagDoll = type { i32 (...)**, %class.btDynamicsWorld*, [11 x %class.btCollisionShape*], [11 x %class.btRigidBody*], [10 x %class.btTypedConstraint*] }
+%class.btDynamicsWorld = type { %class.btCollisionWorld, void (%class.btDynamicsWorld*, float)*, void (%class.btDynamicsWorld*, float)*, i8*, %struct.btContactSolverInfo }
+%class.btCollisionWorld = type { i32 (...)**, %class.btAlignedObjectArray, %class.btDispatcher*, %struct.btDispatcherInfo, %class.btStackAlloc*, %class.btBroadphaseInterface*, %class.btIDebugDraw*, i8 }
+%class.btAlignedObjectArray = type { %class.btAlignedAllocator, i32, i32, %class.btCollisionObject**, i8 }
+%class.btAlignedAllocator = type { i8 }
+%class.btCollisionObject = type { i32 (...)**, %class.btTransform, %class.btTransform, %class.btVector3, %class.btVector3, %class.btVector3, i8, float, %struct.btBroadphaseProxy*, %class.btCollisionShape*, %class.btCollisionShape*, i32, i32, i32, i32, float, float, float, i8*, i32, float, float, float, i8, [7 x i8] }
+%struct.btBroadphaseProxy = type { i8*, i16, i16, i8*, i32, %class.btVector3, %class.btVector3 }
+%class.btDispatcher = type { i32 (...)** }
+%struct.btDispatcherInfo = type { float, i32, i32, float, i8, %class.btIDebugDraw*, i8, i8, i8, float, i8, float, %class.btStackAlloc* }
+%class.btIDebugDraw = type { i32 (...)** }
+%class.btStackAlloc = type opaque
+%class.btBroadphaseInterface = type { i32 (...)** }
+%struct.btContactSolverInfo = type { %struct.btContactSolverInfoData }
+%struct.btContactSolverInfoData = type { float, float, float, float, float, i32, float, float, float, float, float, i32, float, float, float, i32, i32 }
+%class.btRigidBody = type { %class.btCollisionObject, %class.btMatrix3x3, %class.btVector3, %class.btVector3, float, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float, float, i8, float, float, float, float, float, float, %class.btMotionState*, %class.btAlignedObjectArray.22, i32, i32, i32 }
+%class.btMotionState = type { i32 (...)** }
+%class.btAlignedObjectArray.22 = type { %class.btAlignedAllocator.23, i32, i32, %class.btTypedConstraint**, i8 }
+%class.btAlignedAllocator.23 = type { i8 }
+%class.btTypedConstraint = type { i32 (...)**, %struct.btTypedObject, i32, i32, i8, %class.btRigidBody*, %class.btRigidBody*, float, float, %class.btVector3, %class.btVector3, %class.btVector3 }
+%struct.btTypedObject = type { i32 }
+%class.btHingeConstraint = type { %class.btTypedConstraint, [3 x %class.btJacobianEntry], [3 x %class.btJacobianEntry], %class.btTransform, %class.btTransform, float, float, float, float, float, float, float, float, float, float, float, float, float, i8, i8, i8, i8, i8, float }
+%class.btJacobianEntry = type { %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, %class.btVector3, float }
+%class.btConeTwistConstraint = type { %class.btTypedConstraint, [3 x %class.btJacobianEntry], %class.btTransform, %class.btTransform, float, float, float, float, float, float, float, float, %class.btVector3, %class.btVector3, float, float, float, float, float, float, float, float, i8, i8, i8, i8, float, float, %class.btVector3, i8, i8, %class.btQuaternion, float, %class.btVector3 }
+%class.btQuaternion = type { %class.btQuadWord }
+%class.btQuadWord = type { [4 x float] }
+
+@_ZTV7RagDoll = external unnamed_addr constant [4 x i8*]
+
+declare noalias i8* @_Znwm(i32)
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3*, float*, float*, float*) unnamed_addr inlinehint ssp align 2
+
+declare void @_ZSt9terminatev()
+
+declare %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform*) unnamed_addr ssp align 2
+
+declare void @_ZN11btTransform11setIdentityEv(%class.btTransform*) ssp align 2
+
+declare void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform*, %class.btVector3*) nounwind inlinehint ssp align 2
+
+declare i8* @_ZN13btConvexShapenwEm(i32) inlinehint ssp align 2
+
+declare void @_ZN13btConvexShapedlEPv(i8*) inlinehint ssp align 2
+
+declare %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape*, float, float)
+
+declare %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform*) nounwind inlinehint ssp align 2
+
+define %class.RagDoll* @_ZN7RagDollC2EP15btDynamicsWorldRK9btVector3f(%class.RagDoll* %this, %class.btDynamicsWorld* %ownerWorld, %class.btVector3* %positionOffset, float %scale) unnamed_addr ssp align 2 {
+entry:
+  %retval = alloca %class.RagDoll*, align 4
+  %this.addr = alloca %class.RagDoll*, align 4
+  %ownerWorld.addr = alloca %class.btDynamicsWorld*, align 4
+  %positionOffset.addr = alloca %class.btVector3*, align 4
+  %scale.addr = alloca float, align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %offset = alloca %class.btTransform, align 4
+  %transform = alloca %class.btTransform, align 4
+  %ref.tmp = alloca %class.btVector3, align 4
+  %ref.tmp97 = alloca %class.btVector3, align 4
+  %ref.tmp98 = alloca float, align 4
+  %ref.tmp99 = alloca float, align 4
+  %ref.tmp100 = alloca float, align 4
+  %ref.tmp102 = alloca %class.btTransform, align 4
+  %ref.tmp107 = alloca %class.btVector3, align 4
+  %ref.tmp108 = alloca %class.btVector3, align 4
+  %ref.tmp109 = alloca float, align 4
+  %ref.tmp110 = alloca float, align 4
+  %ref.tmp111 = alloca float, align 4
+  %ref.tmp113 = alloca %class.btTransform, align 4
+  %ref.tmp119 = alloca %class.btVector3, align 4
+  %ref.tmp120 = alloca %class.btVector3, align 4
+  %ref.tmp121 = alloca float, align 4
+  %ref.tmp122 = alloca float, align 4
+  %ref.tmp123 = alloca float, align 4
+  %ref.tmp125 = alloca %class.btTransform, align 4
+  %ref.tmp131 = alloca %class.btVector3, align 4
+  %ref.tmp132 = alloca %class.btVector3, align 4
+  %ref.tmp133 = alloca float, align 4
+  %ref.tmp134 = alloca float, align 4
+  %ref.tmp135 = alloca float, align 4
+  %ref.tmp137 = alloca %class.btTransform, align 4
+  %ref.tmp143 = alloca %class.btVector3, align 4
+  %ref.tmp144 = alloca %class.btVector3, align 4
+  %ref.tmp145 = alloca float, align 4
+  %ref.tmp146 = alloca float, align 4
+  %ref.tmp147 = alloca float, align 4
+  %ref.tmp149 = alloca %class.btTransform, align 4
+  %ref.tmp155 = alloca %class.btVector3, align 4
+  %ref.tmp156 = alloca %class.btVector3, align 4
+  %ref.tmp157 = alloca float, align 4
+  %ref.tmp158 = alloca float, align 4
+  %ref.tmp159 = alloca float, align 4
+  %ref.tmp161 = alloca %class.btTransform, align 4
+  %ref.tmp167 = alloca %class.btVector3, align 4
+  %ref.tmp168 = alloca %class.btVector3, align 4
+  %ref.tmp169 = alloca float, align 4
+  %ref.tmp170 = alloca float, align 4
+  %ref.tmp171 = alloca float, align 4
+  %ref.tmp173 = alloca %class.btTransform, align 4
+  %ref.tmp179 = alloca %class.btVector3, align 4
+  %ref.tmp180 = alloca %class.btVector3, align 4
+  %ref.tmp181 = alloca float, align 4
+  %ref.tmp182 = alloca float, align 4
+  %ref.tmp183 = alloca float, align 4
+  %ref.tmp186 = alloca %class.btTransform, align 4
+  %ref.tmp192 = alloca %class.btVector3, align 4
+  %ref.tmp193 = alloca %class.btVector3, align 4
+  %ref.tmp194 = alloca float, align 4
+  %ref.tmp195 = alloca float, align 4
+  %ref.tmp196 = alloca float, align 4
+  %ref.tmp199 = alloca %class.btTransform, align 4
+  %ref.tmp205 = alloca %class.btVector3, align 4
+  %ref.tmp206 = alloca %class.btVector3, align 4
+  %ref.tmp207 = alloca float, align 4
+  %ref.tmp208 = alloca float, align 4
+  %ref.tmp209 = alloca float, align 4
+  %ref.tmp212 = alloca %class.btTransform, align 4
+  %ref.tmp218 = alloca %class.btVector3, align 4
+  %ref.tmp219 = alloca %class.btVector3, align 4
+  %ref.tmp220 = alloca float, align 4
+  %ref.tmp221 = alloca float, align 4
+  %ref.tmp222 = alloca float, align 4
+  %ref.tmp225 = alloca %class.btTransform, align 4
+  %i = alloca i32, align 4
+  %hingeC = alloca %class.btHingeConstraint*, align 4
+  %coneC = alloca %class.btConeTwistConstraint*, align 4
+  %localA = alloca %class.btTransform, align 4
+  %localB = alloca %class.btTransform, align 4
+  %ref.tmp240 = alloca %class.btVector3, align 4
+  %ref.tmp241 = alloca %class.btVector3, align 4
+  %ref.tmp242 = alloca float, align 4
+  %ref.tmp243 = alloca float, align 4
+  %ref.tmp244 = alloca float, align 4
+  %ref.tmp247 = alloca %class.btVector3, align 4
+  %ref.tmp248 = alloca %class.btVector3, align 4
+  %ref.tmp249 = alloca float, align 4
+  %ref.tmp250 = alloca float, align 4
+  %ref.tmp251 = alloca float, align 4
+  %ref.tmp266 = alloca %class.btVector3, align 4
+  %ref.tmp267 = alloca %class.btVector3, align 4
+  %ref.tmp268 = alloca float, align 4
+  %ref.tmp269 = alloca float, align 4
+  %ref.tmp270 = alloca float, align 4
+  %ref.tmp273 = alloca %class.btVector3, align 4
+  %ref.tmp274 = alloca %class.btVector3, align 4
+  %ref.tmp275 = alloca float, align 4
+  %ref.tmp276 = alloca float, align 4
+  %ref.tmp277 = alloca float, align 4
+  %ref.tmp295 = alloca %class.btVector3, align 4
+  %ref.tmp296 = alloca %class.btVector3, align 4
+  %ref.tmp297 = alloca float, align 4
+  %ref.tmp298 = alloca float, align 4
+  %ref.tmp299 = alloca float, align 4
+  %ref.tmp302 = alloca %class.btVector3, align 4
+  %ref.tmp303 = alloca %class.btVector3, align 4
+  %ref.tmp304 = alloca float, align 4
+  %ref.tmp305 = alloca float, align 4
+  %ref.tmp306 = alloca float, align 4
+  %ref.tmp324 = alloca %class.btVector3, align 4
+  %ref.tmp325 = alloca %class.btVector3, align 4
+  %ref.tmp326 = alloca float, align 4
+  %ref.tmp327 = alloca float, align 4
+  %ref.tmp328 = alloca float, align 4
+  %ref.tmp331 = alloca %class.btVector3, align 4
+  %ref.tmp332 = alloca %class.btVector3, align 4
+  %ref.tmp333 = alloca float, align 4
+  %ref.tmp334 = alloca float, align 4
+  %ref.tmp335 = alloca float, align 4
+  %ref.tmp353 = alloca %class.btVector3, align 4
+  %ref.tmp354 = alloca %class.btVector3, align 4
+  %ref.tmp355 = alloca float, align 4
+  %ref.tmp356 = alloca float, align 4
+  %ref.tmp357 = alloca float, align 4
+  %ref.tmp360 = alloca %class.btVector3, align 4
+  %ref.tmp361 = alloca %class.btVector3, align 4
+  %ref.tmp362 = alloca float, align 4
+  %ref.tmp363 = alloca float, align 4
+  %ref.tmp364 = alloca float, align 4
+  %ref.tmp382 = alloca %class.btVector3, align 4
+  %ref.tmp383 = alloca %class.btVector3, align 4
+  %ref.tmp384 = alloca float, align 4
+  %ref.tmp385 = alloca float, align 4
+  %ref.tmp386 = alloca float, align 4
+  %ref.tmp389 = alloca %class.btVector3, align 4
+  %ref.tmp390 = alloca %class.btVector3, align 4
+  %ref.tmp391 = alloca float, align 4
+  %ref.tmp392 = alloca float, align 4
+  %ref.tmp393 = alloca float, align 4
+  %ref.tmp411 = alloca %class.btVector3, align 4
+  %ref.tmp412 = alloca %class.btVector3, align 4
+  %ref.tmp413 = alloca float, align 4
+  %ref.tmp414 = alloca float, align 4
+  %ref.tmp415 = alloca float, align 4
+  %ref.tmp418 = alloca %class.btVector3, align 4
+  %ref.tmp419 = alloca %class.btVector3, align 4
+  %ref.tmp420 = alloca float, align 4
+  %ref.tmp421 = alloca float, align 4
+  %ref.tmp422 = alloca float, align 4
+  %ref.tmp440 = alloca %class.btVector3, align 4
+  %ref.tmp441 = alloca %class.btVector3, align 4
+  %ref.tmp442 = alloca float, align 4
+  %ref.tmp443 = alloca float, align 4
+  %ref.tmp444 = alloca float, align 4
+  %ref.tmp447 = alloca %class.btVector3, align 4
+  %ref.tmp448 = alloca %class.btVector3, align 4
+  %ref.tmp449 = alloca float, align 4
+  %ref.tmp450 = alloca float, align 4
+  %ref.tmp451 = alloca float, align 4
+  %ref.tmp469 = alloca %class.btVector3, align 4
+  %ref.tmp470 = alloca %class.btVector3, align 4
+  %ref.tmp471 = alloca float, align 4
+  %ref.tmp472 = alloca float, align 4
+  %ref.tmp473 = alloca float, align 4
+  %ref.tmp476 = alloca %class.btVector3, align 4
+  %ref.tmp477 = alloca %class.btVector3, align 4
+  %ref.tmp478 = alloca float, align 4
+  %ref.tmp479 = alloca float, align 4
+  %ref.tmp480 = alloca float, align 4
+  %ref.tmp498 = alloca %class.btVector3, align 4
+  %ref.tmp499 = alloca %class.btVector3, align 4
+  %ref.tmp500 = alloca float, align 4
+  %ref.tmp501 = alloca float, align 4
+  %ref.tmp502 = alloca float, align 4
+  %ref.tmp505 = alloca %class.btVector3, align 4
+  %ref.tmp506 = alloca %class.btVector3, align 4
+  %ref.tmp507 = alloca float, align 4
+  %ref.tmp508 = alloca float, align 4
+  %ref.tmp509 = alloca float, align 4
+  store %class.RagDoll* %this, %class.RagDoll** %this.addr, align 4
+  store %class.btDynamicsWorld* %ownerWorld, %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  store %class.btVector3* %positionOffset, %class.btVector3** %positionOffset.addr, align 4
+  store float %scale, float* %scale.addr, align 4
+  %this1 = load %class.RagDoll** %this.addr
+  store %class.RagDoll* %this1, %class.RagDoll** %retval
+  %0 = bitcast %class.RagDoll* %this1 to i8***
+  store i8** getelementptr inbounds ([4 x i8*]* @_ZTV7RagDoll, i64 0, i64 2), i8*** %0
+  %m_ownerWorld = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %1 = load %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  store %class.btDynamicsWorld* %1, %class.btDynamicsWorld** %m_ownerWorld, align 4
+  %call = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %2 = bitcast i8* %call to %class.btCapsuleShape*
+  %3 = load float* %scale.addr, align 4
+  %mul = fmul float 0x3FC3333340000000, %3
+  %4 = load float* %scale.addr, align 4
+  %mul2 = fmul float 0x3FC99999A0000000, %4
+  %call3 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %2, float %mul, float %mul2)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  %5 = bitcast %class.btCapsuleShape* %2 to %class.btCollisionShape*
+  %m_shapes = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes, i32 0, i32 0
+  store %class.btCollisionShape* %5, %class.btCollisionShape** %arrayidx, align 4
+  %call5 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %6 = bitcast i8* %call5 to %class.btCapsuleShape*
+  %7 = load float* %scale.addr, align 4
+  %mul6 = fmul float 0x3FC3333340000000, %7
+  %8 = load float* %scale.addr, align 4
+  %mul7 = fmul float 0x3FD1EB8520000000, %8
+  %call10 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %6, float %mul6, float %mul7)
+          to label %invoke.cont9 unwind label %lpad8
+
+invoke.cont9:                                     ; preds = %invoke.cont
+  %9 = bitcast %class.btCapsuleShape* %6 to %class.btCollisionShape*
+  %m_shapes12 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx13 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes12, i32 0, i32 1
+  store %class.btCollisionShape* %9, %class.btCollisionShape** %arrayidx13, align 4
+  %call14 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %10 = bitcast i8* %call14 to %class.btCapsuleShape*
+  %11 = load float* %scale.addr, align 4
+  %mul15 = fmul float 0x3FB99999A0000000, %11
+  %12 = load float* %scale.addr, align 4
+  %mul16 = fmul float 0x3FA99999A0000000, %12
+  %call19 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %10, float %mul15, float %mul16)
+          to label %invoke.cont18 unwind label %lpad17
+
+invoke.cont18:                                    ; preds = %invoke.cont9
+  %13 = bitcast %class.btCapsuleShape* %10 to %class.btCollisionShape*
+  %m_shapes21 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx22 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes21, i32 0, i32 2
+  store %class.btCollisionShape* %13, %class.btCollisionShape** %arrayidx22, align 4
+  %call23 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %14 = bitcast i8* %call23 to %class.btCapsuleShape*
+  %15 = load float* %scale.addr, align 4
+  %mul24 = fmul float 0x3FB1EB8520000000, %15
+  %16 = load float* %scale.addr, align 4
+  %mul25 = fmul float 0x3FDCCCCCC0000000, %16
+  %call28 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %14, float %mul24, float %mul25)
+          to label %invoke.cont27 unwind label %lpad26
+
+invoke.cont27:                                    ; preds = %invoke.cont18
+  %17 = bitcast %class.btCapsuleShape* %14 to %class.btCollisionShape*
+  %m_shapes30 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx31 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes30, i32 0, i32 3
+  store %class.btCollisionShape* %17, %class.btCollisionShape** %arrayidx31, align 4
+  %call32 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %18 = bitcast i8* %call32 to %class.btCapsuleShape*
+  %19 = load float* %scale.addr, align 4
+  %mul33 = fmul float 0x3FA99999A0000000, %19
+  %20 = load float* %scale.addr, align 4
+  %mul34 = fmul float 0x3FD7AE1480000000, %20
+  %call37 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %18, float %mul33, float %mul34)
+          to label %invoke.cont36 unwind label %lpad35
+
+invoke.cont36:                                    ; preds = %invoke.cont27
+  %21 = bitcast %class.btCapsuleShape* %18 to %class.btCollisionShape*
+  %m_shapes39 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx40 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes39, i32 0, i32 4
+  store %class.btCollisionShape* %21, %class.btCollisionShape** %arrayidx40, align 4
+  %call41 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %22 = bitcast i8* %call41 to %class.btCapsuleShape*
+  %23 = load float* %scale.addr, align 4
+  %mul42 = fmul float 0x3FB1EB8520000000, %23
+  %24 = load float* %scale.addr, align 4
+  %mul43 = fmul float 0x3FDCCCCCC0000000, %24
+  %call46 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %22, float %mul42, float %mul43)
+          to label %invoke.cont45 unwind label %lpad44
+
+invoke.cont45:                                    ; preds = %invoke.cont36
+  %25 = bitcast %class.btCapsuleShape* %22 to %class.btCollisionShape*
+  %m_shapes48 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx49 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes48, i32 0, i32 5
+  store %class.btCollisionShape* %25, %class.btCollisionShape** %arrayidx49, align 4
+  %call50 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %26 = bitcast i8* %call50 to %class.btCapsuleShape*
+  %27 = load float* %scale.addr, align 4
+  %mul51 = fmul float 0x3FA99999A0000000, %27
+  %28 = load float* %scale.addr, align 4
+  %mul52 = fmul float 0x3FD7AE1480000000, %28
+  %call55 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %26, float %mul51, float %mul52)
+          to label %invoke.cont54 unwind label %lpad53
+
+invoke.cont54:                                    ; preds = %invoke.cont45
+  %29 = bitcast %class.btCapsuleShape* %26 to %class.btCollisionShape*
+  %m_shapes57 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx58 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes57, i32 0, i32 6
+  store %class.btCollisionShape* %29, %class.btCollisionShape** %arrayidx58, align 4
+  %call59 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %30 = bitcast i8* %call59 to %class.btCapsuleShape*
+  %31 = load float* %scale.addr, align 4
+  %mul60 = fmul float 0x3FA99999A0000000, %31
+  %32 = load float* %scale.addr, align 4
+  %mul61 = fmul float 0x3FD51EB860000000, %32
+  %call64 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %30, float %mul60, float %mul61)
+          to label %invoke.cont63 unwind label %lpad62
+
+invoke.cont63:                                    ; preds = %invoke.cont54
+  %33 = bitcast %class.btCapsuleShape* %30 to %class.btCollisionShape*
+  %m_shapes66 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx67 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes66, i32 0, i32 7
+  store %class.btCollisionShape* %33, %class.btCollisionShape** %arrayidx67, align 4
+  %call68 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %34 = bitcast i8* %call68 to %class.btCapsuleShape*
+  %35 = load float* %scale.addr, align 4
+  %mul69 = fmul float 0x3FA47AE140000000, %35
+  %36 = load float* %scale.addr, align 4
+  %mul70 = fmul float 2.500000e-01, %36
+  %call73 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %34, float %mul69, float %mul70)
+          to label %invoke.cont72 unwind label %lpad71
+
+invoke.cont72:                                    ; preds = %invoke.cont63
+  %37 = bitcast %class.btCapsuleShape* %34 to %class.btCollisionShape*
+  %m_shapes75 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx76 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes75, i32 0, i32 8
+  store %class.btCollisionShape* %37, %class.btCollisionShape** %arrayidx76, align 4
+  %call77 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %38 = bitcast i8* %call77 to %class.btCapsuleShape*
+  %39 = load float* %scale.addr, align 4
+  %mul78 = fmul float 0x3FA99999A0000000, %39
+  %40 = load float* %scale.addr, align 4
+  %mul79 = fmul float 0x3FD51EB860000000, %40
+  %call82 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %38, float %mul78, float %mul79)
+          to label %invoke.cont81 unwind label %lpad80
+
+invoke.cont81:                                    ; preds = %invoke.cont72
+  %41 = bitcast %class.btCapsuleShape* %38 to %class.btCollisionShape*
+  %m_shapes84 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx85 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes84, i32 0, i32 9
+  store %class.btCollisionShape* %41, %class.btCollisionShape** %arrayidx85, align 4
+  %call86 = call i8* @_ZN13btConvexShapenwEm(i32 56)
+  %42 = bitcast i8* %call86 to %class.btCapsuleShape*
+  %43 = load float* %scale.addr, align 4
+  %mul87 = fmul float 0x3FA47AE140000000, %43
+  %44 = load float* %scale.addr, align 4
+  %mul88 = fmul float 2.500000e-01, %44
+  %call91 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %42, float %mul87, float %mul88)
+          to label %invoke.cont90 unwind label %lpad89
+
+invoke.cont90:                                    ; preds = %invoke.cont81
+  %45 = bitcast %class.btCapsuleShape* %42 to %class.btCollisionShape*
+  %m_shapes93 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx94 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes93, i32 0, i32 10
+  store %class.btCollisionShape* %45, %class.btCollisionShape** %arrayidx94, align 4
+  %call95 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %offset)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %offset)
+  %46 = load %class.btVector3** %positionOffset.addr, align 4
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %offset, %class.btVector3* %46)
+  %call96 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %transform)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp98, align 4
+  store float 1.000000e+00, float* %ref.tmp99, align 4
+  store float 0.000000e+00, float* %ref.tmp100, align 4
+  %call101 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp97, float* %ref.tmp98, float* %ref.tmp99, float* %ref.tmp100)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp, float* %scale.addr, %class.btVector3* %ref.tmp97)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp102, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes103 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx104 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes103, i32 0, i32 0
+  %47 = load %class.btCollisionShape** %arrayidx104, align 4
+  %call105 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp102, %class.btCollisionShape* %47)
+  %m_bodies = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx106 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies, i32 0, i32 0
+  store %class.btRigidBody* %call105, %class.btRigidBody** %arrayidx106, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp109, align 4
+  store float 0x3FF3333340000000, float* %ref.tmp110, align 4
+  store float 0.000000e+00, float* %ref.tmp111, align 4
+  %call112 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp108, float* %ref.tmp109, float* %ref.tmp110, float* %ref.tmp111)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp107, float* %scale.addr, %class.btVector3* %ref.tmp108)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp107)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp113, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes114 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx115 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes114, i32 0, i32 1
+  %48 = load %class.btCollisionShape** %arrayidx115, align 4
+  %call116 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp113, %class.btCollisionShape* %48)
+  %m_bodies117 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx118 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies117, i32 0, i32 1
+  store %class.btRigidBody* %call116, %class.btRigidBody** %arrayidx118, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0.000000e+00, float* %ref.tmp121, align 4
+  store float 0x3FF99999A0000000, float* %ref.tmp122, align 4
+  store float 0.000000e+00, float* %ref.tmp123, align 4
+  %call124 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp120, float* %ref.tmp121, float* %ref.tmp122, float* %ref.tmp123)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp119, float* %scale.addr, %class.btVector3* %ref.tmp120)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp119)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp125, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes126 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx127 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes126, i32 0, i32 2
+  %49 = load %class.btCollisionShape** %arrayidx127, align 4
+  %call128 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp125, %class.btCollisionShape* %49)
+  %m_bodies129 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx130 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies129, i32 0, i32 2
+  store %class.btRigidBody* %call128, %class.btRigidBody** %arrayidx130, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFC70A3D80000000, float* %ref.tmp133, align 4
+  store float 0x3FE4CCCCC0000000, float* %ref.tmp134, align 4
+  store float 0.000000e+00, float* %ref.tmp135, align 4
+  %call136 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp132, float* %ref.tmp133, float* %ref.tmp134, float* %ref.tmp135)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp131, float* %scale.addr, %class.btVector3* %ref.tmp132)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp131)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp137, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes138 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx139 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes138, i32 0, i32 3
+  %50 = load %class.btCollisionShape** %arrayidx139, align 4
+  %call140 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp137, %class.btCollisionShape* %50)
+  %m_bodies141 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx142 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies141, i32 0, i32 3
+  store %class.btRigidBody* %call140, %class.btRigidBody** %arrayidx142, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFC70A3D80000000, float* %ref.tmp145, align 4
+  store float 0x3FC99999A0000000, float* %ref.tmp146, align 4
+  store float 0.000000e+00, float* %ref.tmp147, align 4
+  %call148 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp144, float* %ref.tmp145, float* %ref.tmp146, float* %ref.tmp147)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp143, float* %scale.addr, %class.btVector3* %ref.tmp144)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp143)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp149, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes150 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx151 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes150, i32 0, i32 4
+  %51 = load %class.btCollisionShape** %arrayidx151, align 4
+  %call152 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp149, %class.btCollisionShape* %51)
+  %m_bodies153 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx154 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies153, i32 0, i32 4
+  store %class.btRigidBody* %call152, %class.btRigidBody** %arrayidx154, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FC70A3D80000000, float* %ref.tmp157, align 4
+  store float 0x3FE4CCCCC0000000, float* %ref.tmp158, align 4
+  store float 0.000000e+00, float* %ref.tmp159, align 4
+  %call160 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp156, float* %ref.tmp157, float* %ref.tmp158, float* %ref.tmp159)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp155, float* %scale.addr, %class.btVector3* %ref.tmp156)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp155)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp161, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes162 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx163 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes162, i32 0, i32 5
+  %52 = load %class.btCollisionShape** %arrayidx163, align 4
+  %call164 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp161, %class.btCollisionShape* %52)
+  %m_bodies165 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx166 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies165, i32 0, i32 5
+  store %class.btRigidBody* %call164, %class.btRigidBody** %arrayidx166, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FC70A3D80000000, float* %ref.tmp169, align 4
+  store float 0x3FC99999A0000000, float* %ref.tmp170, align 4
+  store float 0.000000e+00, float* %ref.tmp171, align 4
+  %call172 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp168, float* %ref.tmp169, float* %ref.tmp170, float* %ref.tmp171)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp167, float* %scale.addr, %class.btVector3* %ref.tmp168)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp167)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp173, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes174 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx175 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes174, i32 0, i32 6
+  %53 = load %class.btCollisionShape** %arrayidx175, align 4
+  %call176 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp173, %class.btCollisionShape* %53)
+  %m_bodies177 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx178 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies177, i32 0, i32 6
+  store %class.btRigidBody* %call176, %class.btRigidBody** %arrayidx178, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFD6666660000000, float* %ref.tmp181, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp182, align 4
+  store float 0.000000e+00, float* %ref.tmp183, align 4
+  %call184 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp180, float* %ref.tmp181, float* %ref.tmp182, float* %ref.tmp183)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp179, float* %scale.addr, %class.btVector3* %ref.tmp180)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp179)
+  %call185 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call185, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp186, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes187 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx188 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes187, i32 0, i32 7
+  %54 = load %class.btCollisionShape** %arrayidx188, align 4
+  %call189 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp186, %class.btCollisionShape* %54)
+  %m_bodies190 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx191 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies190, i32 0, i32 7
+  store %class.btRigidBody* %call189, %class.btRigidBody** %arrayidx191, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0xBFE6666660000000, float* %ref.tmp194, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp195, align 4
+  store float 0.000000e+00, float* %ref.tmp196, align 4
+  %call197 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp193, float* %ref.tmp194, float* %ref.tmp195, float* %ref.tmp196)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp192, float* %scale.addr, %class.btVector3* %ref.tmp193)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp192)
+  %call198 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call198, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp199, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes200 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx201 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes200, i32 0, i32 8
+  %55 = load %class.btCollisionShape** %arrayidx201, align 4
+  %call202 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp199, %class.btCollisionShape* %55)
+  %m_bodies203 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx204 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies203, i32 0, i32 8
+  store %class.btRigidBody* %call202, %class.btRigidBody** %arrayidx204, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FD6666660000000, float* %ref.tmp207, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp208, align 4
+  store float 0.000000e+00, float* %ref.tmp209, align 4
+  %call210 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp206, float* %ref.tmp207, float* %ref.tmp208, float* %ref.tmp209)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp205, float* %scale.addr, %class.btVector3* %ref.tmp206)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp205)
+  %call211 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call211, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp212, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes213 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx214 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes213, i32 0, i32 9
+  %56 = load %class.btCollisionShape** %arrayidx214, align 4
+  %call215 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp212, %class.btCollisionShape* %56)
+  %m_bodies216 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx217 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies216, i32 0, i32 9
+  store %class.btRigidBody* %call215, %class.btRigidBody** %arrayidx217, align 4
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
+  store float 0x3FE6666660000000, float* %ref.tmp220, align 4
+  store float 0x3FF7333340000000, float* %ref.tmp221, align 4
+  store float 0.000000e+00, float* %ref.tmp222, align 4
+  %call223 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp219, float* %ref.tmp220, float* %ref.tmp221, float* %ref.tmp222)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp218, float* %scale.addr, %class.btVector3* %ref.tmp219)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %transform, %class.btVector3* %ref.tmp218)
+  %call224 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %transform)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call224, float 0.000000e+00, float 0.000000e+00, float 0xBFF921FB60000000)
+  call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp225, %class.btTransform* %offset, %class.btTransform* %transform)
+  %m_shapes226 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 2
+  %arrayidx227 = getelementptr inbounds [11 x %class.btCollisionShape*]* %m_shapes226, i32 0, i32 10
+  %57 = load %class.btCollisionShape** %arrayidx227, align 4
+  %call228 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp225, %class.btCollisionShape* %57)
+  %m_bodies229 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx230 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies229, i32 0, i32 10
+  store %class.btRigidBody* %call228, %class.btRigidBody** %arrayidx230, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %invoke.cont90
+  %58 = load i32* %i, align 4
+  %cmp = icmp slt i32 %58, 11
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %59 = load i32* %i, align 4
+  %m_bodies231 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx232 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies231, i32 0, i32 %59
+  %60 = load %class.btRigidBody** %arrayidx232, align 4
+  call void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody* %60, float 0x3FA99999A0000000, float 0x3FEB333340000000)
+  %61 = load i32* %i, align 4
+  %m_bodies233 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx234 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies233, i32 0, i32 %61
+  %62 = load %class.btRigidBody** %arrayidx234, align 4
+  %63 = bitcast %class.btRigidBody* %62 to %class.btCollisionObject*
+  call void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject* %63, float 0x3FE99999A0000000)
+  %64 = load i32* %i, align 4
+  %m_bodies235 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx236 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies235, i32 0, i32 %64
+  %65 = load %class.btRigidBody** %arrayidx236, align 4
+  call void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody* %65, float 0x3FF99999A0000000, float 2.500000e+00)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %66 = load i32* %i, align 4
+  %inc = add nsw i32 %66, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+lpad:                                             ; preds = %entry
+  %67 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %68 = extractvalue { i8*, i32 } %67, 0
+  store i8* %68, i8** %exn.slot
+  %69 = extractvalue { i8*, i32 } %67, 1
+  store i32 %69, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call)
+          to label %invoke.cont4 unwind label %terminate.lpad
+
+invoke.cont4:                                     ; preds = %lpad
+  br label %eh.resume
+
+lpad8:                                            ; preds = %invoke.cont
+  %70 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %71 = extractvalue { i8*, i32 } %70, 0
+  store i8* %71, i8** %exn.slot
+  %72 = extractvalue { i8*, i32 } %70, 1
+  store i32 %72, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call5)
+          to label %invoke.cont11 unwind label %terminate.lpad
+
+invoke.cont11:                                    ; preds = %lpad8
+  br label %eh.resume
+
+lpad17:                                           ; preds = %invoke.cont9
+  %73 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %74 = extractvalue { i8*, i32 } %73, 0
+  store i8* %74, i8** %exn.slot
+  %75 = extractvalue { i8*, i32 } %73, 1
+  store i32 %75, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call14)
+          to label %invoke.cont20 unwind label %terminate.lpad
+
+invoke.cont20:                                    ; preds = %lpad17
+  br label %eh.resume
+
+lpad26:                                           ; preds = %invoke.cont18
+  %76 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %77 = extractvalue { i8*, i32 } %76, 0
+  store i8* %77, i8** %exn.slot
+  %78 = extractvalue { i8*, i32 } %76, 1
+  store i32 %78, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call23)
+          to label %invoke.cont29 unwind label %terminate.lpad
+
+invoke.cont29:                                    ; preds = %lpad26
+  br label %eh.resume
+
+lpad35:                                           ; preds = %invoke.cont27
+  %79 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %80 = extractvalue { i8*, i32 } %79, 0
+  store i8* %80, i8** %exn.slot
+  %81 = extractvalue { i8*, i32 } %79, 1
+  store i32 %81, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call32)
+          to label %invoke.cont38 unwind label %terminate.lpad
+
+invoke.cont38:                                    ; preds = %lpad35
+  br label %eh.resume
+
+lpad44:                                           ; preds = %invoke.cont36
+  %82 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %83 = extractvalue { i8*, i32 } %82, 0
+  store i8* %83, i8** %exn.slot
+  %84 = extractvalue { i8*, i32 } %82, 1
+  store i32 %84, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call41)
+          to label %invoke.cont47 unwind label %terminate.lpad
+
+invoke.cont47:                                    ; preds = %lpad44
+  br label %eh.resume
+
+lpad53:                                           ; preds = %invoke.cont45
+  %85 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %86 = extractvalue { i8*, i32 } %85, 0
+  store i8* %86, i8** %exn.slot
+  %87 = extractvalue { i8*, i32 } %85, 1
+  store i32 %87, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call50)
+          to label %invoke.cont56 unwind label %terminate.lpad
+
+invoke.cont56:                                    ; preds = %lpad53
+  br label %eh.resume
+
+lpad62:                                           ; preds = %invoke.cont54
+  %88 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %89 = extractvalue { i8*, i32 } %88, 0
+  store i8* %89, i8** %exn.slot
+  %90 = extractvalue { i8*, i32 } %88, 1
+  store i32 %90, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call59)
+          to label %invoke.cont65 unwind label %terminate.lpad
+
+invoke.cont65:                                    ; preds = %lpad62
+  br label %eh.resume
+
+lpad71:                                           ; preds = %invoke.cont63
+  %91 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %92 = extractvalue { i8*, i32 } %91, 0
+  store i8* %92, i8** %exn.slot
+  %93 = extractvalue { i8*, i32 } %91, 1
+  store i32 %93, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call68)
+          to label %invoke.cont74 unwind label %terminate.lpad
+
+invoke.cont74:                                    ; preds = %lpad71
+  br label %eh.resume
+
+lpad80:                                           ; preds = %invoke.cont72
+  %94 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %95 = extractvalue { i8*, i32 } %94, 0
+  store i8* %95, i8** %exn.slot
+  %96 = extractvalue { i8*, i32 } %94, 1
+  store i32 %96, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call77)
+          to label %invoke.cont83 unwind label %terminate.lpad
+
+invoke.cont83:                                    ; preds = %lpad80
+  br label %eh.resume
+
+lpad89:                                           ; preds = %invoke.cont81
+  %97 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %98 = extractvalue { i8*, i32 } %97, 0
+  store i8* %98, i8** %exn.slot
+  %99 = extractvalue { i8*, i32 } %97, 1
+  store i32 %99, i32* %ehselector.slot
+  invoke void @_ZN13btConvexShapedlEPv(i8* %call86)
+          to label %invoke.cont92 unwind label %terminate.lpad
+
+invoke.cont92:                                    ; preds = %lpad89
+  br label %eh.resume
+
+for.end:                                          ; preds = %for.cond
+  %call237 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %localA)
+  %call238 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %localB)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call239 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call239, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp242, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp243, align 4
+  store float 0.000000e+00, float* %ref.tmp244, align 4
+  %call245 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp241, float* %ref.tmp242, float* %ref.tmp243, float* %ref.tmp244)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp240, float* %scale.addr, %class.btVector3* %ref.tmp241)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp240)
+  %call246 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call246, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp249, align 4
+  store float 0xBFC3333340000000, float* %ref.tmp250, align 4
+  store float 0.000000e+00, float* %ref.tmp251, align 4
+  %call252 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp248, float* %ref.tmp249, float* %ref.tmp250, float* %ref.tmp251)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp247, float* %scale.addr, %class.btVector3* %ref.tmp248)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp247)
+  %call253 = call noalias i8* @_Znwm(i32 780)
+  %100 = bitcast i8* %call253 to %class.btHingeConstraint*
+  %m_bodies254 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx255 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies254, i32 0, i32 0
+  %101 = load %class.btRigidBody** %arrayidx255, align 4
+  %m_bodies256 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx257 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies256, i32 0, i32 1
+  %102 = load %class.btRigidBody** %arrayidx257, align 4
+  %call260 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %100, %class.btRigidBody* %101, %class.btRigidBody* %102, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont259 unwind label %lpad258
+
+invoke.cont259:                                   ; preds = %for.end
+  store %class.btHingeConstraint* %100, %class.btHingeConstraint** %hingeC, align 4
+  %103 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %103, float 0xBFE921FB60000000, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %104 = load %class.btHingeConstraint** %hingeC, align 4
+  %105 = bitcast %class.btHingeConstraint* %104 to %class.btTypedConstraint*
+  %m_joints = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx261 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints, i32 0, i32 0
+  store %class.btTypedConstraint* %105, %class.btTypedConstraint** %arrayidx261, align 4
+  %m_ownerWorld262 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %106 = load %class.btDynamicsWorld** %m_ownerWorld262, align 4
+  %107 = bitcast %class.btDynamicsWorld* %106 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %107
+  %vfn = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable, i64 10
+  %108 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn
+  %m_joints263 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx264 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints263, i32 0, i32 0
+  %109 = load %class.btTypedConstraint** %arrayidx264, align 4
+  call void %108(%class.btDynamicsWorld* %106, %class.btTypedConstraint* %109, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call265 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call265, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp268, align 4
+  store float 0x3FD3333340000000, float* %ref.tmp269, align 4
+  store float 0.000000e+00, float* %ref.tmp270, align 4
+  %call271 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp267, float* %ref.tmp268, float* %ref.tmp269, float* %ref.tmp270)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp266, float* %scale.addr, %class.btVector3* %ref.tmp267)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp266)
+  %call272 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call272, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp275, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp276, align 4
+  store float 0.000000e+00, float* %ref.tmp277, align 4
+  %call278 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp274, float* %ref.tmp275, float* %ref.tmp276, float* %ref.tmp277)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp273, float* %scale.addr, %class.btVector3* %ref.tmp274)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp273)
+  %call279 = call noalias i8* @_Znwm(i32 628)
+  %110 = bitcast i8* %call279 to %class.btConeTwistConstraint*
+  %m_bodies280 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx281 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies280, i32 0, i32 1
+  %111 = load %class.btRigidBody** %arrayidx281, align 4
+  %m_bodies282 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx283 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies282, i32 0, i32 2
+  %112 = load %class.btRigidBody** %arrayidx283, align 4
+  %call286 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %110, %class.btRigidBody* %111, %class.btRigidBody* %112, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont285 unwind label %lpad284
+
+invoke.cont285:                                   ; preds = %invoke.cont259
+  store %class.btConeTwistConstraint* %110, %class.btConeTwistConstraint** %coneC, align 4
+  %113 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %113, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0x3FF921FB60000000, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %114 = load %class.btConeTwistConstraint** %coneC, align 4
+  %115 = bitcast %class.btConeTwistConstraint* %114 to %class.btTypedConstraint*
+  %m_joints287 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx288 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints287, i32 0, i32 1
+  store %class.btTypedConstraint* %115, %class.btTypedConstraint** %arrayidx288, align 4
+  %m_ownerWorld289 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %116 = load %class.btDynamicsWorld** %m_ownerWorld289, align 4
+  %117 = bitcast %class.btDynamicsWorld* %116 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable290 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %117
+  %vfn291 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable290, i64 10
+  %118 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn291
+  %m_joints292 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx293 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints292, i32 0, i32 1
+  %119 = load %class.btTypedConstraint** %arrayidx293, align 4
+  call void %118(%class.btDynamicsWorld* %116, %class.btTypedConstraint* %119, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call294 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call294, float 0.000000e+00, float 0.000000e+00, float 0xC00F6A7A20000000)
+  store float 0xBFC70A3D80000000, float* %ref.tmp297, align 4
+  store float 0xBFB99999A0000000, float* %ref.tmp298, align 4
+  store float 0.000000e+00, float* %ref.tmp299, align 4
+  %call300 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp296, float* %ref.tmp297, float* %ref.tmp298, float* %ref.tmp299)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp295, float* %scale.addr, %class.btVector3* %ref.tmp296)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp295)
+  %call301 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call301, float 0.000000e+00, float 0.000000e+00, float 0xC00F6A7A20000000)
+  store float 0.000000e+00, float* %ref.tmp304, align 4
+  store float 0x3FCCCCCCC0000000, float* %ref.tmp305, align 4
+  store float 0.000000e+00, float* %ref.tmp306, align 4
+  %call307 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp303, float* %ref.tmp304, float* %ref.tmp305, float* %ref.tmp306)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp302, float* %scale.addr, %class.btVector3* %ref.tmp303)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp302)
+  %call308 = call noalias i8* @_Znwm(i32 628)
+  %120 = bitcast i8* %call308 to %class.btConeTwistConstraint*
+  %m_bodies309 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx310 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies309, i32 0, i32 0
+  %121 = load %class.btRigidBody** %arrayidx310, align 4
+  %m_bodies311 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx312 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies311, i32 0, i32 3
+  %122 = load %class.btRigidBody** %arrayidx312, align 4
+  %call315 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %120, %class.btRigidBody* %121, %class.btRigidBody* %122, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont314 unwind label %lpad313
+
+invoke.cont314:                                   ; preds = %invoke.cont285
+  store %class.btConeTwistConstraint* %120, %class.btConeTwistConstraint** %coneC, align 4
+  %123 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %123, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %124 = load %class.btConeTwistConstraint** %coneC, align 4
+  %125 = bitcast %class.btConeTwistConstraint* %124 to %class.btTypedConstraint*
+  %m_joints316 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx317 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints316, i32 0, i32 2
+  store %class.btTypedConstraint* %125, %class.btTypedConstraint** %arrayidx317, align 4
+  %m_ownerWorld318 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %126 = load %class.btDynamicsWorld** %m_ownerWorld318, align 4
+  %127 = bitcast %class.btDynamicsWorld* %126 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable319 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %127
+  %vfn320 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable319, i64 10
+  %128 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn320
+  %m_joints321 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx322 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints321, i32 0, i32 2
+  %129 = load %class.btTypedConstraint** %arrayidx322, align 4
+  call void %128(%class.btDynamicsWorld* %126, %class.btTypedConstraint* %129, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call323 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call323, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp326, align 4
+  store float 0xBFCCCCCCC0000000, float* %ref.tmp327, align 4
+  store float 0.000000e+00, float* %ref.tmp328, align 4
+  %call329 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp325, float* %ref.tmp326, float* %ref.tmp327, float* %ref.tmp328)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp324, float* %scale.addr, %class.btVector3* %ref.tmp325)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp324)
+  %call330 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call330, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp333, align 4
+  store float 0x3FC7AE1480000000, float* %ref.tmp334, align 4
+  store float 0.000000e+00, float* %ref.tmp335, align 4
+  %call336 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp332, float* %ref.tmp333, float* %ref.tmp334, float* %ref.tmp335)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp331, float* %scale.addr, %class.btVector3* %ref.tmp332)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp331)
+  %call337 = call noalias i8* @_Znwm(i32 780)
+  %130 = bitcast i8* %call337 to %class.btHingeConstraint*
+  %m_bodies338 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx339 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies338, i32 0, i32 3
+  %131 = load %class.btRigidBody** %arrayidx339, align 4
+  %m_bodies340 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx341 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies340, i32 0, i32 4
+  %132 = load %class.btRigidBody** %arrayidx341, align 4
+  %call344 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %130, %class.btRigidBody* %131, %class.btRigidBody* %132, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont343 unwind label %lpad342
+
+invoke.cont343:                                   ; preds = %invoke.cont314
+  store %class.btHingeConstraint* %130, %class.btHingeConstraint** %hingeC, align 4
+  %133 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %133, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %134 = load %class.btHingeConstraint** %hingeC, align 4
+  %135 = bitcast %class.btHingeConstraint* %134 to %class.btTypedConstraint*
+  %m_joints345 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx346 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints345, i32 0, i32 3
+  store %class.btTypedConstraint* %135, %class.btTypedConstraint** %arrayidx346, align 4
+  %m_ownerWorld347 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %136 = load %class.btDynamicsWorld** %m_ownerWorld347, align 4
+  %137 = bitcast %class.btDynamicsWorld* %136 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable348 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %137
+  %vfn349 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable348, i64 10
+  %138 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn349
+  %m_joints350 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx351 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints350, i32 0, i32 3
+  %139 = load %class.btTypedConstraint** %arrayidx351, align 4
+  call void %138(%class.btDynamicsWorld* %136, %class.btTypedConstraint* %139, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call352 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call352, float 0.000000e+00, float 0.000000e+00, float 0x3FE921FB60000000)
+  store float 0x3FC70A3D80000000, float* %ref.tmp355, align 4
+  store float 0xBFB99999A0000000, float* %ref.tmp356, align 4
+  store float 0.000000e+00, float* %ref.tmp357, align 4
+  %call358 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp354, float* %ref.tmp355, float* %ref.tmp356, float* %ref.tmp357)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp353, float* %scale.addr, %class.btVector3* %ref.tmp354)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp353)
+  %call359 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call359, float 0.000000e+00, float 0.000000e+00, float 0x3FE921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp362, align 4
+  store float 0x3FCCCCCCC0000000, float* %ref.tmp363, align 4
+  store float 0.000000e+00, float* %ref.tmp364, align 4
+  %call365 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp361, float* %ref.tmp362, float* %ref.tmp363, float* %ref.tmp364)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp360, float* %scale.addr, %class.btVector3* %ref.tmp361)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp360)
+  %call366 = call noalias i8* @_Znwm(i32 628)
+  %140 = bitcast i8* %call366 to %class.btConeTwistConstraint*
+  %m_bodies367 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx368 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies367, i32 0, i32 0
+  %141 = load %class.btRigidBody** %arrayidx368, align 4
+  %m_bodies369 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx370 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies369, i32 0, i32 5
+  %142 = load %class.btRigidBody** %arrayidx370, align 4
+  %call373 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %140, %class.btRigidBody* %141, %class.btRigidBody* %142, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont372 unwind label %lpad371
+
+invoke.cont372:                                   ; preds = %invoke.cont343
+  store %class.btConeTwistConstraint* %140, %class.btConeTwistConstraint** %coneC, align 4
+  %143 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %143, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %144 = load %class.btConeTwistConstraint** %coneC, align 4
+  %145 = bitcast %class.btConeTwistConstraint* %144 to %class.btTypedConstraint*
+  %m_joints374 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx375 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints374, i32 0, i32 4
+  store %class.btTypedConstraint* %145, %class.btTypedConstraint** %arrayidx375, align 4
+  %m_ownerWorld376 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %146 = load %class.btDynamicsWorld** %m_ownerWorld376, align 4
+  %147 = bitcast %class.btDynamicsWorld* %146 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable377 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %147
+  %vfn378 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable377, i64 10
+  %148 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn378
+  %m_joints379 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx380 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints379, i32 0, i32 4
+  %149 = load %class.btTypedConstraint** %arrayidx380, align 4
+  call void %148(%class.btDynamicsWorld* %146, %class.btTypedConstraint* %149, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call381 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call381, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp384, align 4
+  store float 0xBFCCCCCCC0000000, float* %ref.tmp385, align 4
+  store float 0.000000e+00, float* %ref.tmp386, align 4
+  %call387 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp383, float* %ref.tmp384, float* %ref.tmp385, float* %ref.tmp386)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp382, float* %scale.addr, %class.btVector3* %ref.tmp383)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp382)
+  %call388 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call388, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp391, align 4
+  store float 0x3FC7AE1480000000, float* %ref.tmp392, align 4
+  store float 0.000000e+00, float* %ref.tmp393, align 4
+  %call394 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp390, float* %ref.tmp391, float* %ref.tmp392, float* %ref.tmp393)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp389, float* %scale.addr, %class.btVector3* %ref.tmp390)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp389)
+  %call395 = call noalias i8* @_Znwm(i32 780)
+  %150 = bitcast i8* %call395 to %class.btHingeConstraint*
+  %m_bodies396 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx397 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies396, i32 0, i32 5
+  %151 = load %class.btRigidBody** %arrayidx397, align 4
+  %m_bodies398 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx399 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies398, i32 0, i32 6
+  %152 = load %class.btRigidBody** %arrayidx399, align 4
+  %call402 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %150, %class.btRigidBody* %151, %class.btRigidBody* %152, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont401 unwind label %lpad400
+
+invoke.cont401:                                   ; preds = %invoke.cont372
+  store %class.btHingeConstraint* %150, %class.btHingeConstraint** %hingeC, align 4
+  %153 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %153, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %154 = load %class.btHingeConstraint** %hingeC, align 4
+  %155 = bitcast %class.btHingeConstraint* %154 to %class.btTypedConstraint*
+  %m_joints403 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx404 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints403, i32 0, i32 5
+  store %class.btTypedConstraint* %155, %class.btTypedConstraint** %arrayidx404, align 4
+  %m_ownerWorld405 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %156 = load %class.btDynamicsWorld** %m_ownerWorld405, align 4
+  %157 = bitcast %class.btDynamicsWorld* %156 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable406 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %157
+  %vfn407 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable406, i64 10
+  %158 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn407
+  %m_joints408 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx409 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints408, i32 0, i32 5
+  %159 = load %class.btTypedConstraint** %arrayidx409, align 4
+  call void %158(%class.btDynamicsWorld* %156, %class.btTypedConstraint* %159, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call410 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call410, float 0.000000e+00, float 0.000000e+00, float 0x400921FB60000000)
+  store float 0xBFC99999A0000000, float* %ref.tmp413, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp414, align 4
+  store float 0.000000e+00, float* %ref.tmp415, align 4
+  %call416 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp412, float* %ref.tmp413, float* %ref.tmp414, float* %ref.tmp415)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp411, float* %scale.addr, %class.btVector3* %ref.tmp412)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp411)
+  %call417 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call417, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp420, align 4
+  store float 0xBFC70A3D80000000, float* %ref.tmp421, align 4
+  store float 0.000000e+00, float* %ref.tmp422, align 4
+  %call423 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp419, float* %ref.tmp420, float* %ref.tmp421, float* %ref.tmp422)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp418, float* %scale.addr, %class.btVector3* %ref.tmp419)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp418)
+  %call424 = call noalias i8* @_Znwm(i32 628)
+  %160 = bitcast i8* %call424 to %class.btConeTwistConstraint*
+  %m_bodies425 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx426 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies425, i32 0, i32 1
+  %161 = load %class.btRigidBody** %arrayidx426, align 4
+  %m_bodies427 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx428 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies427, i32 0, i32 7
+  %162 = load %class.btRigidBody** %arrayidx428, align 4
+  %call431 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %160, %class.btRigidBody* %161, %class.btRigidBody* %162, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont430 unwind label %lpad429
+
+invoke.cont430:                                   ; preds = %invoke.cont401
+  store %class.btConeTwistConstraint* %160, %class.btConeTwistConstraint** %coneC, align 4
+  %163 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %163, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %164 = load %class.btConeTwistConstraint** %coneC, align 4
+  %165 = bitcast %class.btConeTwistConstraint* %164 to %class.btTypedConstraint*
+  %m_joints432 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx433 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints432, i32 0, i32 6
+  store %class.btTypedConstraint* %165, %class.btTypedConstraint** %arrayidx433, align 4
+  %m_ownerWorld434 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %166 = load %class.btDynamicsWorld** %m_ownerWorld434, align 4
+  %167 = bitcast %class.btDynamicsWorld* %166 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable435 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %167
+  %vfn436 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable435, i64 10
+  %168 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn436
+  %m_joints437 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx438 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints437, i32 0, i32 6
+  %169 = load %class.btTypedConstraint** %arrayidx438, align 4
+  call void %168(%class.btDynamicsWorld* %166, %class.btTypedConstraint* %169, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call439 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call439, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp442, align 4
+  store float 0x3FC70A3D80000000, float* %ref.tmp443, align 4
+  store float 0.000000e+00, float* %ref.tmp444, align 4
+  %call445 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp441, float* %ref.tmp442, float* %ref.tmp443, float* %ref.tmp444)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp440, float* %scale.addr, %class.btVector3* %ref.tmp441)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp440)
+  %call446 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call446, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp449, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp450, align 4
+  store float 0.000000e+00, float* %ref.tmp451, align 4
+  %call452 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp448, float* %ref.tmp449, float* %ref.tmp450, float* %ref.tmp451)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp447, float* %scale.addr, %class.btVector3* %ref.tmp448)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp447)
+  %call453 = call noalias i8* @_Znwm(i32 780)
+  %170 = bitcast i8* %call453 to %class.btHingeConstraint*
+  %m_bodies454 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx455 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies454, i32 0, i32 7
+  %171 = load %class.btRigidBody** %arrayidx455, align 4
+  %m_bodies456 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx457 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies456, i32 0, i32 8
+  %172 = load %class.btRigidBody** %arrayidx457, align 4
+  %call460 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %170, %class.btRigidBody* %171, %class.btRigidBody* %172, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont459 unwind label %lpad458
+
+invoke.cont459:                                   ; preds = %invoke.cont430
+  store %class.btHingeConstraint* %170, %class.btHingeConstraint** %hingeC, align 4
+  %173 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %173, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %174 = load %class.btHingeConstraint** %hingeC, align 4
+  %175 = bitcast %class.btHingeConstraint* %174 to %class.btTypedConstraint*
+  %m_joints461 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx462 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints461, i32 0, i32 7
+  store %class.btTypedConstraint* %175, %class.btTypedConstraint** %arrayidx462, align 4
+  %m_ownerWorld463 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %176 = load %class.btDynamicsWorld** %m_ownerWorld463, align 4
+  %177 = bitcast %class.btDynamicsWorld* %176 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable464 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %177
+  %vfn465 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable464, i64 10
+  %178 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn465
+  %m_joints466 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx467 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints466, i32 0, i32 7
+  %179 = load %class.btTypedConstraint** %arrayidx467, align 4
+  call void %178(%class.btDynamicsWorld* %176, %class.btTypedConstraint* %179, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call468 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call468, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
+  store float 0x3FC99999A0000000, float* %ref.tmp471, align 4
+  store float 0x3FC3333340000000, float* %ref.tmp472, align 4
+  store float 0.000000e+00, float* %ref.tmp473, align 4
+  %call474 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp470, float* %ref.tmp471, float* %ref.tmp472, float* %ref.tmp473)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp469, float* %scale.addr, %class.btVector3* %ref.tmp470)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp469)
+  %call475 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call475, float 0.000000e+00, float 0.000000e+00, float 0x3FF921FB60000000)
+  store float 0.000000e+00, float* %ref.tmp478, align 4
+  store float 0xBFC70A3D80000000, float* %ref.tmp479, align 4
+  store float 0.000000e+00, float* %ref.tmp480, align 4
+  %call481 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp477, float* %ref.tmp478, float* %ref.tmp479, float* %ref.tmp480)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp476, float* %scale.addr, %class.btVector3* %ref.tmp477)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp476)
+  %call482 = call noalias i8* @_Znwm(i32 628)
+  %180 = bitcast i8* %call482 to %class.btConeTwistConstraint*
+  %m_bodies483 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx484 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies483, i32 0, i32 1
+  %181 = load %class.btRigidBody** %arrayidx484, align 4
+  %m_bodies485 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx486 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies485, i32 0, i32 9
+  %182 = load %class.btRigidBody** %arrayidx486, align 4
+  %call489 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %180, %class.btRigidBody* %181, %class.btRigidBody* %182, %class.btTransform* %localA, %class.btTransform* %localB)
+          to label %invoke.cont488 unwind label %lpad487
+
+invoke.cont488:                                   ; preds = %invoke.cont459
+  store %class.btConeTwistConstraint* %180, %class.btConeTwistConstraint** %coneC, align 4
+  %183 = load %class.btConeTwistConstraint** %coneC, align 4
+  call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %183, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
+  %184 = load %class.btConeTwistConstraint** %coneC, align 4
+  %185 = bitcast %class.btConeTwistConstraint* %184 to %class.btTypedConstraint*
+  %m_joints490 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx491 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints490, i32 0, i32 8
+  store %class.btTypedConstraint* %185, %class.btTypedConstraint** %arrayidx491, align 4
+  %m_ownerWorld492 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %186 = load %class.btDynamicsWorld** %m_ownerWorld492, align 4
+  %187 = bitcast %class.btDynamicsWorld* %186 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable493 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %187
+  %vfn494 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable493, i64 10
+  %188 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn494
+  %m_joints495 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx496 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints495, i32 0, i32 8
+  %189 = load %class.btTypedConstraint** %arrayidx496, align 4
+  call void %188(%class.btDynamicsWorld* %186, %class.btTypedConstraint* %189, i1 zeroext true)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
+  call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
+  %call497 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localA)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call497, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp500, align 4
+  store float 0x3FC70A3D80000000, float* %ref.tmp501, align 4
+  store float 0.000000e+00, float* %ref.tmp502, align 4
+  %call503 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp499, float* %ref.tmp500, float* %ref.tmp501, float* %ref.tmp502)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp498, float* %scale.addr, %class.btVector3* %ref.tmp499)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localA, %class.btVector3* %ref.tmp498)
+  %call504 = call %class.btMatrix3x3* @_ZN11btTransform8getBasisEv(%class.btTransform* %localB)
+  call void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3* %call504, float 0.000000e+00, float 0x3FF921FB60000000, float 0.000000e+00)
+  store float 0.000000e+00, float* %ref.tmp507, align 4
+  store float 0xBFC1EB8520000000, float* %ref.tmp508, align 4
+  store float 0.000000e+00, float* %ref.tmp509, align 4
+  %call510 = call %class.btVector3* @_ZN9btVector3C1ERKfS1_S1_(%class.btVector3* %ref.tmp506, float* %ref.tmp507, float* %ref.tmp508, float* %ref.tmp509)
+  call void @_ZmlRKfRK9btVector3(%class.btVector3* sret %ref.tmp505, float* %scale.addr, %class.btVector3* %ref.tmp506)
+  call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %localB, %class.btVector3* %ref.tmp505)
+  %call511 = call noalias i8* @_Znwm(i32 780)
+  %190 = bitcast i8* %call511 to %class.btHingeConstraint*
+  %m_bodies512 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx513 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies512, i32 0, i32 9
+  %191 = load %class.btRigidBody** %arrayidx513, align 4
+  %m_bodies514 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 3
+  %arrayidx515 = getelementptr inbounds [11 x %class.btRigidBody*]* %m_bodies514, i32 0, i32 10
+  %192 = load %class.btRigidBody** %arrayidx515, align 4
+  %call518 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %190, %class.btRigidBody* %191, %class.btRigidBody* %192, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
+          to label %invoke.cont517 unwind label %lpad516
+
+invoke.cont517:                                   ; preds = %invoke.cont488
+  store %class.btHingeConstraint* %190, %class.btHingeConstraint** %hingeC, align 4
+  %193 = load %class.btHingeConstraint** %hingeC, align 4
+  call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %193, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
+  %194 = load %class.btHingeConstraint** %hingeC, align 4
+  %195 = bitcast %class.btHingeConstraint* %194 to %class.btTypedConstraint*
+  %m_joints519 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx520 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints519, i32 0, i32 9
+  store %class.btTypedConstraint* %195, %class.btTypedConstraint** %arrayidx520, align 4
+  %m_ownerWorld521 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 1
+  %196 = load %class.btDynamicsWorld** %m_ownerWorld521, align 4
+  %197 = bitcast %class.btDynamicsWorld* %196 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
+  %vtable522 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %197
+  %vfn523 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable522, i64 10
+  %198 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn523
+  %m_joints524 = getelementptr inbounds %class.RagDoll* %this1, i32 0, i32 4
+  %arrayidx525 = getelementptr inbounds [10 x %class.btTypedConstraint*]* %m_joints524, i32 0, i32 9
+  %199 = load %class.btTypedConstraint** %arrayidx525, align 4
+  call void %198(%class.btDynamicsWorld* %196, %class.btTypedConstraint* %199, i1 zeroext true)
+  %200 = load %class.RagDoll** %retval
+  ret %class.RagDoll* %200
+
+lpad258:                                          ; preds = %for.end
+  %201 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %202 = extractvalue { i8*, i32 } %201, 0
+  store i8* %202, i8** %exn.slot
+  %203 = extractvalue { i8*, i32 } %201, 1
+  store i32 %203, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call253) nounwind
+  br label %eh.resume
+
+lpad284:                                          ; preds = %invoke.cont259
+  %204 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %205 = extractvalue { i8*, i32 } %204, 0
+  store i8* %205, i8** %exn.slot
+  %206 = extractvalue { i8*, i32 } %204, 1
+  store i32 %206, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call279) nounwind
+  br label %eh.resume
+
+lpad313:                                          ; preds = %invoke.cont285
+  %207 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %208 = extractvalue { i8*, i32 } %207, 0
+  store i8* %208, i8** %exn.slot
+  %209 = extractvalue { i8*, i32 } %207, 1
+  store i32 %209, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call308) nounwind
+  br label %eh.resume
+
+lpad342:                                          ; preds = %invoke.cont314
+  %210 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %211 = extractvalue { i8*, i32 } %210, 0
+  store i8* %211, i8** %exn.slot
+  %212 = extractvalue { i8*, i32 } %210, 1
+  store i32 %212, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call337) nounwind
+  br label %eh.resume
+
+lpad371:                                          ; preds = %invoke.cont343
+  %213 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %214 = extractvalue { i8*, i32 } %213, 0
+  store i8* %214, i8** %exn.slot
+  %215 = extractvalue { i8*, i32 } %213, 1
+  store i32 %215, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call366) nounwind
+  br label %eh.resume
+
+lpad400:                                          ; preds = %invoke.cont372
+  %216 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %217 = extractvalue { i8*, i32 } %216, 0
+  store i8* %217, i8** %exn.slot
+  %218 = extractvalue { i8*, i32 } %216, 1
+  store i32 %218, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call395) nounwind
+  br label %eh.resume
+
+lpad429:                                          ; preds = %invoke.cont401
+  %219 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %220 = extractvalue { i8*, i32 } %219, 0
+  store i8* %220, i8** %exn.slot
+  %221 = extractvalue { i8*, i32 } %219, 1
+  store i32 %221, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call424) nounwind
+  br label %eh.resume
+
+lpad458:                                          ; preds = %invoke.cont430
+  %222 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %223 = extractvalue { i8*, i32 } %222, 0
+  store i8* %223, i8** %exn.slot
+  %224 = extractvalue { i8*, i32 } %222, 1
+  store i32 %224, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call453) nounwind
+  br label %eh.resume
+
+lpad487:                                          ; preds = %invoke.cont459
+  %225 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %226 = extractvalue { i8*, i32 } %225, 0
+  store i8* %226, i8** %exn.slot
+  %227 = extractvalue { i8*, i32 } %225, 1
+  store i32 %227, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call482) nounwind
+  br label %eh.resume
+
+lpad516:                                          ; preds = %invoke.cont488
+  %228 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          cleanup
+  %229 = extractvalue { i8*, i32 } %228, 0
+  store i8* %229, i8** %exn.slot
+  %230 = extractvalue { i8*, i32 } %228, 1
+  store i32 %230, i32* %ehselector.slot
+  call void @_ZdlPv(i8* %call511) nounwind
+  br label %eh.resume
+
+eh.resume:                                        ; preds = %lpad516, %lpad487, %lpad458, %lpad429, %lpad400, %lpad371, %lpad342, %lpad313, %lpad284, %lpad258, %invoke.cont92, %invoke.cont83, %invoke.cont74, %invoke.cont65, %invoke.cont56, %invoke.cont47, %invoke.cont38, %invoke.cont29, %invoke.cont20, %invoke.cont11, %invoke.cont4
+  %exn = load i8** %exn.slot
+  %sel = load i32* %ehselector.slot
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+  %lpad.val526 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+  resume { i8*, i32 } %lpad.val526
+
+terminate.lpad:                                   ; preds = %lpad89, %lpad80, %lpad71, %lpad62, %lpad53, %lpad44, %lpad35, %lpad26, %lpad17, %lpad8, %lpad
+  %231 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+}
+
+declare void @_ZmlRKfRK9btVector3(%class.btVector3* noalias sret, float*, %class.btVector3*) inlinehint ssp
+
+declare %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll*, float, %class.btTransform*, %class.btCollisionShape*) ssp align 2
+
+declare void @_ZNK11btTransformmlERKS_(%class.btTransform* noalias sret, %class.btTransform*, %class.btTransform*) inlinehint ssp align 2
+
+declare void @_ZN11btMatrix3x311setEulerZYXEfff(%class.btMatrix3x3*, float, float, float) ssp align 2
+
+declare void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody*, float, float)
+
+declare void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject*, float) nounwind ssp align 2
+
+declare void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody*, float, float) nounwind ssp align 2
+
+declare %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint*, %class.btRigidBody*, %class.btRigidBody*, %class.btTransform*, %class.btTransform*, i1 zeroext)
+
+declare void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint*, float, float, float, float, float) ssp align 2
+
+declare %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint*, %class.btRigidBody*, %class.btRigidBody*, %class.btTransform*, %class.btTransform*)
+
+declare void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint*, float, float, float, float, float, float) nounwind ssp align 2
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index d8b51ec82ded..cb4d08058f41 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -verify-machineinstrs
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
@@ -47,3 +47,32 @@ bb2:                                              ; preds = %bb
   tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
   ret i32 0
 }
+
+; PR12389
+; Make sure the DPair register class can spill.
+define void @pr12389(i8* %p) nounwind ssp {
+entry:
+  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %p, i32 1)
+  tail call void asm sideeffect "", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* %p, <4 x float> %vld1, i32 1)
+  ret void
+}
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+; <rdar://problem/11101911>
+; When an strd is expanded into two str instructions, make sure the first str
+; doesn't kill the base register. This can happen if the base register is the
+; same as the data register.
+%class = type { i8*, %class*, i32 }
+define void @f11101911(%class* %this, i32 %num) ssp align 2 {
+entry:
+  %p1 = getelementptr inbounds %class* %this, i32 0, i32 1
+  %p2 = getelementptr inbounds %class* %this, i32 0, i32 2
+  tail call void asm sideeffect "", "~{r1},~{r3},~{r5},~{r11},~{r13}"() nounwind
+  store %class* %this, %class** %p1, align 4
+  store i32 %num, i32* %p2, align 4
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/dg.exp b/test/CodeGen/Thumb2/dg.exp
deleted file mode 100644
index 3ff359aab39b..000000000000
--- a/test/CodeGen/Thumb2/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
new file mode 100644
index 000000000000..aef6f8560641
--- /dev/null
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios0.0.0"
+
+; This test case would clobber the outgoing call arguments by writing to the
+; emergency spill slot at [sp, #4] without adjusting the stack pointer first.
+
+; CHECK: main
+; CHECK: vmov.f64
+; Adjust SP for the large call
+; CHECK: sub sp,
+; CHECK: mov [[FR:r[0-9]+]], sp
+; Store to call frame + #4
+; CHECK: str{{.*\[}}[[FR]], #4]
+; Don't clobber that store until the call.
+; CHECK-NOT: [sp, #4]
+; CHECK: variadic
+
+define i32 @main() ssp {
+entry:
+  %d = alloca double, align 8
+  store double 1.000000e+00, double* %d, align 8
+  %0 = load double* %d, align 8
+  call void (i8*, i8*, i8*, ...)* @variadic(i8* null, i8* null, i8* null, i32 1, double 1.234800e+03, double 2.363450e+03, double %0, i32 1, double 1.234560e+03, double 2.345670e+03, double 4.6334563e+03, double 2.423440e+03, double 4.234330e+03, double 2.965430e+03, i32 1, double 4.669300e+03, double 2.927500e+03, double 4.663100e+03, double 2.921000e+03, double 4.663100e+03, double 2.345100e+03, i32 1, double 3.663100e+03, double 2.905100e+03, double 4.669300e+03, double 2.898600e+03, double 4.676900e+03, double 2.898600e+03, i32 1, double 4.684600e+03, double 2.898600e+03, double 1.234800e+03, double 2.905100e+03, double 1.234800e+03, double 2.345100e+03, i32 1, double 7.719700e+03, double 2.920500e+03, double 4.713500e+03, double 2.927000e+03, double 4.705800e+03, double 2.927000e+03, i32 1, double 8.698200e+03, double 2.927000e+03, double 4.692000e+03, double 2.920500e+03, double 4.692000e+03, double 2.912500e+03, i32 1, double 4.692000e+03, double 2.945600e+03, double 4.698200e+03, double 2.898100e+03, double 4.705800e+03, double 2.898100e+03, i32 1, double 4.713500e+03, double 2.898100e+03, double 4.719700e+03, double 2.945600e+03, double 4.719700e+03, double 2.912500e+03, i32 1, double 4.749200e+03, double 2.920100e+03, double 4.743000e+03, double 2.926600e+03, double 4.735300e+03, double 2.926600e+03, i32 1, double 4.727700e+03, double 2.926600e+03, double 4.721500e+03, double 2.920100e+03, double 4.721500e+03, double 2.912100e+03, i32 1, double 4.721500e+03, double 2.945100e+03, double 4.727700e+03, double 2.897700e+03, double 4.735300e+03, double 2.897700e+03, i32 1, double 4.743000e+03, double 2.897700e+03, double 4.749200e+03, double 2.945100e+03, double 4.749200e+03, double 2.912100e+03, i32 1, double 4.778200e+03, double 2.920100e+03, double 4.772000e+03, double 2.926600e+03, double 4.764300e+03, double 2.926600e+03, i32 1, double 4.756700e+03, double 2.926600e+03, double 4.750500e+03, double 2.920100e+03, double 4.750500e+03, double 2.912100e+03, i32 1, double 4.750500e+03, double 2.945100e+03, double 4.756700e+03, double 2.897700e+03, double 4.764300e+03, double 2.897700e+03, i32 1, double 4.772000e+03, double 2.897700e+03, double 4.778200e+03, double 2.945100e+03, double 4.778200e+03, double 2.912100e+03, i32 1, double 4.801900e+03, double 2.942100e+03, double 4.795700e+03, double 2.948500e+03, double 4.788100e+03, double 2.948500e+03, i32 1, double 4.780500e+03, double 2.948500e+03, double 4.774300e+03, double 2.942100e+03, double 4.774300e+03, double 2.934100e+03, i32 1, double 4.774300e+03, double 2.926100e+03, double 4.780500e+03, double 2.919600e+03, double 4.788100e+03, double 2.919600e+03, i32 1, double 4.795700e+03, double 2.919600e+03, double 4.801900e+03, double 2.926100e+03, double 4.801900e+03, double 2.934100e+03, i32 1, double 4.801500e+03, double 2.972500e+03, double 4.795300e+03, double 2.978900e+03, double 4.787700e+03, double 2.978900e+03, i32 1, double 4.780000e+03, double 2.978900e+03, double 4.773800e+03, double 2.972500e+03, double 4.773800e+03, double 2.964500e+03, i32 1, double 4.773800e+03, double 2.956500e+03, double 4.780000e+03, double 2.950000e+03, double 4.787700e+03, double 2.950000e+03, i32 1, double 4.795300e+03, double 2.950000e+03, double 4.801500e+03, double 2.956500e+03, double 4.801500e+03, double 2.964500e+03, i32 1, double 4.802400e+03, double 3.010200e+03, double 4.796200e+03, double 3.016600e+03, double 4.788500e+03, double 3.016600e+03, i32 1, double 4.780900e+03, double 3.016600e+03, double 4.774700e+03, double 3.010200e+03, double 4.774700e+03, double 3.002200e+03, i32 1, double 4.774700e+03, double 2.994200e+03, double 4.780900e+03, double 2.987700e+03, double 4.788500e+03, double 2.987700e+03, i32 1, double 4.796200e+03, double 2.987700e+03, double 4.802400e+03, double 2.994200e+03, double 4.802400e+03, double 3.002200e+03, i32 1, double 4.802400e+03, double 3.039400e+03, double 4.796200e+03, double 3.455800e+03, double 4.788500e+03, double 3.455800e+03, i32 1, double 4.780900e+03, double 3.455800e+03, double 4.774700e+03, double 3.039400e+03, double 4.774700e+03, double 3.031400e+03, i32 1, double 4.774700e+03, double 3.023400e+03, double 4.780900e+03, double 3.016900e+03, double 4.788500e+03, double 3.016900e+03, i32 1, double 4.796200e+03, double 3.016900e+03, double 4.802400e+03, double 3.023400e+03, double 4.802400e+03, double 3.031400e+03, i32 1, double 4.778600e+03, double 3.063100e+03, double 4.772400e+03, double 3.069600e+03, double 4.764700e+03, double 3.069600e+03, i32 1, double 4.757100e+03, double 3.069600e+03, double 4.750900e+03, double 3.063100e+03, double 4.750900e+03, double 3.055100e+03, i32 1, double 4.750900e+03, double 3.457100e+03, double 4.757100e+03, double 3.450700e+03, double 4.764700e+03, double 3.450700e+03, i32 1, double 4.772400e+03, double 3.450700e+03, double 4.778600e+03, double 3.457100e+03, double 4.778600e+03, double 3.055100e+03, i32 1, double 4.748600e+03, double 3.063600e+03, double 4.742400e+03, double 3.070000e+03, double 4.734700e+03, double 3.070000e+03, i32 1, double 4.727100e+03, double 3.070000e+03, double 4.720900e+03, double 3.063600e+03, double 4.720900e+03, double 3.055600e+03, i32 1, double 4.720900e+03, double 3.457600e+03, double 4.727100e+03, double 3.451100e+03, double 4.734700e+03, double 3.451100e+03, i32 1, double 4.742400e+03, double 3.451100e+03, double 4.748600e+03, double 3.457600e+03, double 4.748600e+03, double 3.055600e+03, i32 1, double 4.719500e+03, double 3.063600e+03, double 4.713300e+03, double 3.070000e+03, double 4.705700e+03, double 3.070000e+03, i32 1, double 4.698000e+03, double 3.070000e+03, double 4.691900e+03, double 3.063600e+03, double 4.691900e+03, double 3.055600e+03, i32 1, double 4.691900e+03, double 3.457600e+03, double 4.698000e+03, double 3.451100e+03, double 4.705700e+03, double 3.451100e+03, i32 1, double 4.713300e+03, double 3.451100e+03, double 4.719500e+03, double 3.457600e+03, double 4.719500e+03, double 3.055600e+03, i32 1, double 4.691300e+03, double 3.064000e+03, double 4.685100e+03, double 3.070500e+03, double 4.677500e+03, double 3.070500e+03, i32 1, double 4.669900e+03, double 3.070500e+03, double 4.663700e+03, double 3.064000e+03, double 4.663700e+03, double 3.056000e+03, i32 1, double 4.663700e+03, double 3.458000e+03, double 4.669900e+03, double 3.451600e+03, double 4.677500e+03, double 3.451600e+03, i32 1, double 4.685100e+03, double 3.451600e+03, double 4.691300e+03, double 3.458000e+03, double 4.691300e+03, double 3.056000e+03, i32 1, double 4.668500e+03, double 3.453000e+03, double 4.662300e+03, double 3.459400e+03, double 4.654700e+03, double 3.459400e+03, i32 1, double 4.647000e+03, double 3.459400e+03, double 4.640900e+03, double 3.453000e+03, double 4.640900e+03, double 3.035000e+03, i32 1, double 4.640900e+03, double 3.027000e+03, double 4.647000e+03, double 3.020500e+03, double 4.654700e+03, double 3.020500e+03, i32 1, double 4.662300e+03, double 3.020500e+03, double 4.668500e+03, double 3.027000e+03, double 4.668500e+03, double 3.035000e+03, i32 1, double 4.668500e+03, double 3.014300e+03, double 4.662300e+03, double 3.020800e+03, double 4.654700e+03, double 3.020800e+03, i32 1, double 4.647000e+03, double 3.020800e+03, double 4.640900e+03, double 3.014300e+03, double 4.640900e+03, double 3.006400e+03, i32 1, double 4.640900e+03, double 2.998400e+03, double 4.647000e+03, double 2.991900e+03, double 4.654700e+03, double 2.991900e+03, i32 1, double 4.662300e+03, double 2.991900e+03, double 4.668500e+03, double 2.998400e+03, double 4.668500e+03, double 3.006400e+03, i32 1, double 4.668100e+03, double 2.941100e+03, double 4.661900e+03, double 2.947600e+03, double 4.654200e+03, double 2.947600e+03, i32 1, double 4.646600e+03, double 2.947600e+03, double 4.640400e+03, double 2.941100e+03, double 4.640400e+03, double 2.933100e+03, i32 1, double 4.640400e+03, double 2.925200e+03, double 4.646600e+03, double 2.918700e+03, double 4.654200e+03, double 2.918700e+03, i32 1, double 4.661900e+03, double 2.918700e+03, double 4.668100e+03, double 2.925200e+03, double 4.668100e+03, double 2.933100e+03, i32 1, double 4.668500e+03, double 2.971600e+03, double 4.662300e+03, double 2.978100e+03, double 4.654700e+03, double 2.978100e+03, i32 1, double 4.647000e+03, double 2.978100e+03, double 4.640900e+03, double 2.971600e+03, double 4.640900e+03, double 2.963600e+03, i32 1, double 4.640900e+03, double 2.955700e+03, double 4.647000e+03, double 2.949200e+03, double 4.654700e+03, double 2.949200e+03, i32 1, double 4.662300e+03, double 2.949200e+03, double 4.668500e+03, double 2.955700e+03, double 4.668500e+03, double 2.963600e+03, i32 2, i32 1, double 4.691300e+03, double 3.056000e+03, i32 2, i32 1, double 4.748600e+03, double 3.055600e+03, i32 2, i32 1, double 4.778200e+03, double 2.912100e+03, i32 2, i32 1, double 4.749200e+03, double 2.912100e+03, i32 2, i32 1, double 4.802400e+03, double 3.031400e+03, i32 2, i32 1, double 4.778600e+03, double 3.055100e+03, i32 2, i32 1, double 4.801500e+03, double 2.964500e+03, i32 2, i32 1, double 4.802400e+03, double 3.002200e+03, i32 2, i32 1, double 4.719700e+03, double 2.912500e+03, i32 2, i32 1, double 4.801900e+03, double 2.934100e+03, i32 2, i32 1, double 4.719500e+03, double 3.055600e+03, i32 2, i32 1, double 4.668500e+03, double 3.006400e+03, i32 2, i32 1, double 4.668500e+03, double 3.035000e+03, i32 2, i32 1, double 4.668100e+03, double 2.933100e+03, i32 2, i32 1, double 4.668500e+03, double 2.963600e+03, i32 2, i32 48)
+  ret i32 0
+}
+
+declare void @variadic(i8*, i8*, i8*, ...)
+
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index 4597ba56b0d1..36544d16d6f4 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
 ; rdar://7352504
 ; Make sure we use "str r9, [sp, #+28]" instead of "sub.w r4, r7, #256" followed by "str r9, [r4, #-32]".
 
@@ -46,10 +46,10 @@ bb119:                                            ; preds = %bb20, %bb20
 
 bb420:                                            ; preds = %bb20, %bb20
 ; CHECK: bb420
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8]
-; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp
   store %union.rec* null, %union.rec** @zz_hold, align 4
   store %union.rec* null, %union.rec** @zz_res, align 4
   store %union.rec* %x, %union.rec** @zz_hold, align 4
diff --git a/test/CodeGen/Thumb2/lit.local.cfg b/test/CodeGen/Thumb2/lit.local.cfg
new file mode 100644
index 000000000000..cb77b09ef4ad
--- /dev/null
+++ b/test/CodeGen/Thumb2/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index 9ff114e2b6f2..9aaa821698c1 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -3,11 +3,6 @@
 
 ; This now reduces to a single induction variable.
 
-; TODO: It still gets a GPR shuffle at the end of the loop
-; This is because something in instruction selection has decided
-; that comparing the pre-incremented value with zero is better
-; than comparing the post-incremented value with -4.
-
 @G = external global i32                          ; <i32*> [#uses=2]
 @array = external global i32*                     ; <i32**> [#uses=1]
 
@@ -20,9 +15,9 @@ entry:
 
 bb:                                               ; preds = %bb, %entry
 ; CHECK: LBB0_1:
-; CHECK: cmp [[R2:r[0-9]+]], #0
-; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], [[R2]], #1
-; CHECK: mov [[R2]], [[REGISTER]]
+; CHECK: subs [[R2:r[0-9]+]], #1
+; CHECK: cmp.w [[R2]], #-1
+; CHECK: bne LBB0_1
 
   %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
   %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 46937fc84b05..82857425a9d7 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -51,12 +51,11 @@ return:                                           ; preds = %bb, %entry
 define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: mov.w [[R3:r[0-9]+]], #1065353216
-; CHECK: vdup.32 q{{.*}}, [[R3]]
+; CHECK: vmov.f32 q{{.*}}, #1.000000e+00
   br i1 undef, label %bb1, label %bb2
 
 bb1:
-; CHECK-NEXT: %bb1
+; CHECK: %bb1
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
   %tmp1 = shl i32 %indvar, 2
   %gep1 = getelementptr i8* %ptr1, i32 %tmp1
@@ -95,8 +94,8 @@ bb.nph:
 
 bb:                                               ; preds = %bb, %bb.nph
 ; CHECK: bb
-; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK: eor.w
+; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
 ; CHECK-NOT: eor
 ; CHECK: and
   %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
diff --git a/test/CodeGen/Thumb2/thumb2-cbnz.ll b/test/CodeGen/Thumb2/thumb2-cbnz.ll
index 0992fa8be343..893bd0fdaef4 100644
--- a/test/CodeGen/Thumb2/thumb2-cbnz.ll
+++ b/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
 ; rdar://7354379
 
-declare double @floor(double) nounwind readnone
+declare double @foo(double) nounwind readnone
 
 define void @t(i32 %c, double %b) {
 entry:
@@ -24,9 +24,8 @@ bb7:                                              ; preds = %bb3
 
 bb9:                                              ; preds = %bb7
 ; CHECK:      cmp	r0, #0
-; CHECK:      cmp	r0, #0
 ; CHECK-NEXT:      cbnz
-  %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  %0 = tail call  double @foo(double %b) nounwind readnone ; <double> [#uses=0]
   br label %bb11
 
 bb11:                                             ; preds = %bb9, %bb7
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index 00a54a0f1952..f7e966535d2f 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -3,8 +3,8 @@
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
 ; CHECK: clz r
-    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
+    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a, i1 true)
     ret i32 %tmp
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 2c5734881d53..f577f79d6917 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios | FileCheck %s
 
 define void @foo(i32 %X, i32 %Y) {
 entry:
diff --git a/test/CodeGen/Thumb2/thumb2-ldm.ll b/test/CodeGen/Thumb2/thumb2-ldm.ll
index 4f2b7c18f9ce..b2328e780074 100644
--- a/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mattr=+thumb2 | FileCheck %s
 
 @X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
 
diff --git a/test/CodeGen/Thumb2/thumb2-ldrd.ll b/test/CodeGen/Thumb2/thumb2-ldrd.ll
index d3b781dbc0d4..2e83ea146cd0 100644
--- a/test/CodeGen/Thumb2/thumb2-ldrd.ll
+++ b/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -1,10 +1,11 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
 
 @b = external global i64*
 
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-;CHECK: ldrd r2, r3, [r2]
+; CHECK: ldrd
+; CHECK: umull
 	%0 = load i64** @b, align 4
 	%1 = load i64* %0, align 4
 	%2 = mul i64 %1, %a
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
index 24c45c53fc87..58f9add0fc60 100644
--- a/test/CodeGen/Thumb2/thumb2-mls.ll
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -15,5 +15,5 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) {
     ret i32 %tmp2
 }
 ; CHECK: f2:
-; CHECK: 	muls	r0, r0, r1
+; CHECK: 	muls	r0, r1, r0
 
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index bb97d978cf20..ac059bdaf05d 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -2,7 +2,7 @@
 
 define i32 @f1(i32 %a, i32 %b, i32 %c) {
 ; CHECK: f1:
-; CHECK: muls r0, r0, r1
+; CHECK: muls r0, r1, r0
     %tmp = mul i32 %a, %b
     ret i32 %tmp
 }
diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll
index ceefabbbfa21..74729fd4150f 100644
--- a/test/CodeGen/Thumb2/thumb2-select_xform.ll
+++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -3,8 +3,8 @@
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK: t1
 ; CHECK: mvn r0, #-2147483648
-; CHECK: add r0, r1
 ; CHECK: cmp r2, #10
+; CHECK: add r0, r1
 ; CHECK: it  gt
 ; CHECK: movgt r0, r1
         %tmp1 = icmp sgt i32 %c, 10
diff --git a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll b/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
deleted file mode 100644
index c62fee1bd263..000000000000
--- a/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; Linear scan does not currently coalesce any two variables that have
-; overlapping live intervals. When two overlapping intervals have the same
-; value, they can be joined though.
-;
-; RUN: llc < %s -march=x86 -regalloc=linearscan | \
-; RUN:   not grep {mov %\[A-Z\]\\\{2,3\\\}, %\[A-Z\]\\\{2,3\\\}}
-
-define i64 @test(i64 %x) {
-entry:
-        %tmp.1 = mul i64 %x, 4294967297         ; <i64> [#uses=1]
-        ret i64 %tmp.1
-}
-
diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll
index a871ea198cf9..38bca283b132 100644
--- a/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\
-; RUN:     grep {asm-printer} | grep 34
+; RUN:     grep {asm-printer} | grep 35
 
 target datalayout = "e-p:32:32"
 define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
@@ -30,7 +30,7 @@ cond_true:		; preds = %cond_true, %entry
 	%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp88 = add <4 x i32> %tmp87, %tmp77		; <<4 x i32>> [#uses=2]
 	%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.pcmpgt.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
+	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.psra.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
 	%tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64>		; <<2 x i64>> [#uses=2]
 	%tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 >		; <<2 x i64>> [#uses=1]
 	%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2		; <<2 x i64>> [#uses=1]
@@ -48,4 +48,4 @@ return:		; preds = %cond_true, %entry
 	ret void
 }
 
-declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 6f8b89c3240d..24aa5b98d0bb 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -1,5 +1,5 @@
 ; PR1075
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
 
 define float @foo(float %x) nounwind {
     %tmp1 = fmul float %x, 3.000000e+00
diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll
index d1fc70d83679..7d21b71ac373 100644
--- a/test/CodeGen/X86/2007-05-05-Personality.ll
+++ b/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -10,9 +10,10 @@ entry:
   invoke void @raise()
           to label %eh_then unwind label %unwind
 
-unwind:                                           ; preds = %entry
-  %eh_ptr = tail call i8* @llvm.eh.exception()
-  %eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error)
+unwind:                                           ; preds = %entry 
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*)
+              catch i8* @error
+  %eh_select = extractvalue { i8*, i32 } %eh_ptr, 1
   %eh_typeid = tail call i32 @llvm.eh.typeid.for(i8* @error)
   %tmp2 = icmp eq i32 %eh_select, %eh_typeid
   br i1 %tmp2, label %eh_then, label %Unwind
@@ -21,16 +22,11 @@ eh_then:                                          ; preds = %unwind, %entry
   ret void
 
 Unwind:                                           ; preds = %unwind
-  %0 = tail call i32 (...)* @_Unwind_Resume(i8* %eh_ptr)
-  unreachable
+  resume { i8*, i32 } %eh_ptr
 }
 
 declare void @raise()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gnat_eh_personality(...)
diff --git a/test/CodeGen/X86/2007-11-06-InstrSched.ll b/test/CodeGen/X86/2007-11-06-InstrSched.ll
index f6db0d0379e7..838a0c35646f 100644
--- a/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
+; RUN: llc < %s -march=x86 -mcpu=generic -mattr=+sse2 | not grep lea
 
 define float @foo(i32* %x, float* %y, i32 %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index 265d96854851..2e95082afa9c 100644
--- a/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
+; RUN: llc < %s -march=x86 -mcpu=generic | grep {(%esp)} | count 2
 ; PR1872
 
 	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/2008-01-16-Trampoline.ll b/test/CodeGen/X86/2008-01-16-Trampoline.ll
deleted file mode 100644
index 704b2bab4a26..000000000000
--- a/test/CodeGen/X86/2008-01-16-Trampoline.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86
-; RUN: llc < %s -march=x86-64
-
-	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
-
-define fastcc i32 @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets.5146(i64 %table.0.0, i64 %table.0.1, i32 %last, i32 %pos) {
-entry:
-	%tramp22 = call i8* @llvm.init.trampoline( i8* null, i8* bitcast (void (%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets*, i32, i32)* @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177 to i8*), i8* null )		; <i8*> [#uses=0]
-	unreachable
-}
-
-declare void @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177(%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets* nest , i32, i32) nounwind 
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
diff --git a/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
deleted file mode 100644
index 8f4d353f28d3..000000000000
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of re-materialization} | grep 2
-; rdar://5761454
-
-	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
-
-define  %struct.quad_struct* @MakeTree(i32 %size, i32 %center_x, i32 %center_y, i32 %lo_proc, i32 %hi_proc, %struct.quad_struct* %parent, i32 %ct, i32 %level) nounwind  {
-entry:
-	br i1 true, label %bb43.i, label %bb.i
-
-bb.i:		; preds = %entry
-	ret %struct.quad_struct* null
-
-bb43.i:		; preds = %entry
-	br i1 true, label %CheckOutside.exit40.i, label %bb11.i38.i
-
-bb11.i38.i:		; preds = %bb43.i
-	ret %struct.quad_struct* null
-
-CheckOutside.exit40.i:		; preds = %bb43.i
-	br i1 true, label %CheckOutside.exit30.i, label %bb11.i28.i
-
-bb11.i28.i:		; preds = %CheckOutside.exit40.i
-	ret %struct.quad_struct* null
-
-CheckOutside.exit30.i:		; preds = %CheckOutside.exit40.i
-	br i1 true, label %CheckOutside.exit20.i, label %bb11.i18.i
-
-bb11.i18.i:		; preds = %CheckOutside.exit30.i
-	ret %struct.quad_struct* null
-
-CheckOutside.exit20.i:		; preds = %CheckOutside.exit30.i
-	br i1 true, label %bb34, label %bb11.i8.i
-
-bb11.i8.i:		; preds = %CheckOutside.exit20.i
-	ret %struct.quad_struct* null
-
-bb34:		; preds = %CheckOutside.exit20.i
-	%tmp15.reg2mem.0 = sdiv i32 %size, 2		; <i32> [#uses=7]
-	%tmp85 = sub i32 %center_y, %tmp15.reg2mem.0		; <i32> [#uses=2]
-	%tmp88 = sub i32 %center_x, %tmp15.reg2mem.0		; <i32> [#uses=2]
-	%tmp92 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp85, i32 0, i32 %hi_proc, %struct.quad_struct* null, i32 2, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	%tmp99 = add i32 0, %hi_proc		; <i32> [#uses=1]
-	%tmp100 = sdiv i32 %tmp99, 2		; <i32> [#uses=1]
-	%tmp110 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp85, i32 0, i32 %tmp100, %struct.quad_struct* null, i32 3, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	%tmp122 = add i32 %tmp15.reg2mem.0, %center_y		; <i32> [#uses=2]
-	%tmp129 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp122, i32 0, i32 0, %struct.quad_struct* null, i32 1, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	%tmp147 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp122, i32 %lo_proc, i32 0, %struct.quad_struct* null, i32 0, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
-	unreachable
-}
diff --git a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
deleted file mode 100644
index 33d658ca01f9..000000000000
--- a/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -regalloc=linearscan | grep movss | count 1
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -regalloc=linearscan -stats |& grep {Number of re-materialization} | grep 1
-
-	%struct..0objc_object = type opaque
-	%struct.OhBoy = type {  }
-	%struct.BooHoo = type { i32 }
-	%struct.objc_selector = type opaque
-@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.OhBoy*, %struct.objc_selector*, i32, %struct.BooHoo*)* @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define void @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]"(%struct.OhBoy* %self, %struct.objc_selector* %_cmd, i32 %delta, %struct.BooHoo* %viewingState) nounwind  {
-entry:
-	%tmp19 = load i32* null, align 4		; <i32> [#uses=1]
-	%tmp24 = tail call float bitcast (void (%struct..0objc_object*, ...)* @objc_msgSend_fpret to float (%struct..0objc_object*, %struct.objc_selector*)*)( %struct..0objc_object* null, %struct.objc_selector* null ) nounwind 		; <float> [#uses=2]
-	%tmp30 = icmp sgt i32 %delta, 0		; <i1> [#uses=1]
-	br i1 %tmp30, label %bb33, label %bb87.preheader
-bb33:		; preds = %entry
-	%tmp28 = fadd float 0.000000e+00, %tmp24		; <float> [#uses=1]
-	%tmp35 = fcmp ogt float %tmp28, 1.800000e+01		; <i1> [#uses=1]
-	br i1 %tmp35, label %bb38, label %bb87.preheader
-bb38:		; preds = %bb33
-	%tmp53 = add i32 %tmp19, %delta		; <i32> [#uses=2]
-	br label %bb43
-bb43:		; preds = %bb38
-	store i32 %tmp53, i32* null, align 4
-	ret void
-bb50:		; preds = %bb38
-	%tmp56 = fsub float 1.800000e+01, %tmp24		; <float> [#uses=1]
-	%tmp57 = fcmp ugt float 0.000000e+00, %tmp56		; <i1> [#uses=1]
-	br i1 %tmp57, label %bb64, label %bb87.preheader
-bb64:		; preds = %bb50
-	ret void
-bb87.preheader:		; preds = %bb50, %bb33, %entry
-	%usableDelta.0 = phi i32 [ %delta, %entry ], [ %delta, %bb33 ], [ %tmp53, %bb50 ]		; <i32> [#uses=1]
-	%tmp100 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* null, %struct.objc_selector* null, %struct..0objc_object* null ) nounwind 		; <%struct..0objc_object*> [#uses=2]
-	%tmp106 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null ) nounwind 		; <%struct..0objc_object*> [#uses=0]
-	%umax = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
-	br label %bb108
-bb108:		; preds = %bb108, %bb87.preheader
-	%attachmentIndex.0.reg2mem.0 = phi i32 [ 0, %bb87.preheader ], [ %indvar.next, %bb108 ]		; <i32> [#uses=2]
-	%tmp114 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null, i32 %attachmentIndex.0.reg2mem.0 ) nounwind 		; <%struct..0objc_object*> [#uses=1]
-	%tmp121 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp114, %struct.objc_selector* null, i32 %usableDelta.0 ) nounwind 		; <%struct..0objc_object*> [#uses=0]
-	%indvar.next = add i32 %attachmentIndex.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb130, label %bb108
-bb130:		; preds = %bb108
-	ret void
-}
-
-declare %struct..0objc_object* @objc_msgSend(%struct..0objc_object*, %struct.objc_selector*, ...)
-
-declare void @objc_msgSend_fpret(%struct..0objc_object*, ...)
diff --git a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
index e5dda4ac754c..ac167b009a8d 100644
--- a/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -O0 -fast-isel=false -regalloc=linearscan | grep mov | count 5
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false -optimize-regalloc -regalloc=basic | grep mov | count 5
 ; PR2343
 
 	%llvm.dbg.anchor.type = type { i32, i32 }
diff --git a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
index 0d11546889ac..c068f8ac632c 100644
--- a/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
+++ b/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -2,8 +2,6 @@
 
 @_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
 
-declare i8* @llvm.eh.exception() nounwind 
-
 declare i8* @_Znwm(i32)
 
 declare i8* @__cxa_begin_catch(i8*) nounwind 
diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index 90af3870bd44..a6234d377df3 100644
--- a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 | not grep movsd
 ; RUN: llc < %s -march=x86 | grep movw
 ; RUN: llc < %s -march=x86 | grep addw
-; These transforms are turned off for volatile loads and stores.
+; These transforms are turned off for load volatiles and stores.
 ; Check that they weren't turned off for all loads and stores!
 
 @atomic = global double 0.000000e+00		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 86652826aeac..037559edaf57 100644
--- a/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -8,13 +8,13 @@
 
 define i16 @f(i64 %x, double %y) {
 	%b = bitcast i64 %x to double		; <double> [#uses=1]
-	volatile store double %b, double* @atomic ; one processor operation only
-	volatile store double 0.000000e+00, double* @atomic2 ; one processor operation only
+	store volatile double %b, double* @atomic ; one processor operation only
+	store volatile double 0.000000e+00, double* @atomic2 ; one processor operation only
 	%b2 = bitcast double %y to i64		; <i64> [#uses=1]
-	volatile store i64 %b2, i64* @anything ; may transform to store of double
-	%l = volatile load i32* @ioport		; must not narrow
+	store volatile i64 %b2, i64* @anything ; may transform to store of double
+	%l = load volatile i32* @ioport		; must not narrow
 	%t = trunc i32 %l to i16		; <i16> [#uses=1]
-	%l2 = volatile load i32* @ioport		; must not narrow
+	%l2 = load volatile i32* @ioport		; must not narrow
 	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
 	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
 	%f = add i16 %t, %t2		; <i16> [#uses=1]
diff --git a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
index 101b3c5cfdbb..f0d46a0252c3 100644
--- a/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
+++ b/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mcpu=core2 | grep pxor | count 2
-; RUN: llc < %s -mcpu=core2 | not grep movapd
+; RUN: llc < %s -mcpu=core2 | grep xorps | count 2
+; RUN: llc < %s -mcpu=core2 | not grep movap
 ; PR2715
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index 2dc1deaf1738..757f1ff68253 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpcklpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpckhpd
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
 ; originally from PR2687, but things don't work that way any more.
diff --git a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 511c7b5ebcb6..6867ae798087 100644
--- a/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -march=x86 -regalloc=linearscan | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=fast       | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=basic      | FileCheck %s
 ; RUN: llc < %s -march=x86 -regalloc=greedy     | FileCheck %s
diff --git a/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
index 935c4c55f046..f35245bb2af7 100644
--- a/test/CodeGen/X86/2008-09-29-VolatileBug.ll
+++ b/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -6,7 +6,7 @@
 
 define i32 @main() nounwind {
 entry:
-	%0 = volatile load i32* @g_407, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32* @g_407, align 4		; <i32> [#uses=1]
 	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
 	%2 = tail call i32 @func_45(i8 zeroext %1) nounwind		; <i32> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/X86/2008-12-16-BadShift.ll b/test/CodeGen/X86/2008-12-16-BadShift.ll
deleted file mode 100644
index 6c70c5ba5322..000000000000
--- a/test/CodeGen/X86/2008-12-16-BadShift.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s | not grep shrl
-; Note: this test is really trying to make sure that the shift
-; returns the right result; shrl is most likely wrong,
-; but if CodeGen starts legitimately using an shrl here,
-; please adjust the test appropriately.
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-@.str = internal constant [6 x i8] c"%lld\0A\00"		; <[6 x i8]*> [#uses=1]
-
-define i64 @mebbe_shift(i32 %xx, i32 %test) nounwind {
-entry:
-	%conv = zext i32 %xx to i64		; <i64> [#uses=1]
-	%tobool = icmp ne i32 %test, 0		; <i1> [#uses=1]
-	%shl = select i1 %tobool, i64 3, i64 0		; <i64> [#uses=1]
-	%x.0 = shl i64 %conv, %shl		; <i64> [#uses=1]
-	ret i64 %x.0
-}
-
diff --git a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
index 75e0b8a958b0..435adbbebfad 100644
--- a/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
 ; PR3149
 ; Make sure the copy after inline asm is not coalesced away.
 
diff --git a/test/CodeGen/X86/2009-01-31-BigShift2.ll b/test/CodeGen/X86/2009-01-31-BigShift2.ll
index 9d240844afba..3e425536d1b9 100644
--- a/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -6,6 +6,6 @@ define void @test(<8 x double>* %P, i64* %Q) nounwind {
 	%B = bitcast <8 x double> %A to i512		; <i512> [#uses=1]
 	%C = lshr i512 %B, 448		; <i512> [#uses=1]
 	%D = trunc i512 %C to i64		; <i64> [#uses=1]
-	volatile store i64 %D, i64* %Q
+	store volatile i64 %D, i64* %Q
 	ret void
 }
diff --git a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll b/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
deleted file mode 100644
index a46a20b1da65..000000000000
--- a/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
-; RUN: grep movss %t | count 2
-; RUN: grep movaps %t | count 2
-; RUN: grep movdqa %t | count 2
-
-define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
-newFuncRoot:
-	%tmp82 = insertelement <4 x float> %wr.2178, float 0.000000e+00, i32 0		; <<4 x float>> [#uses=1]
-	%tmp85 = insertelement <4 x float> %tmp82, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
-	%tmp87 = insertelement <4 x float> %tmp85, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
-	%tmp89 = insertelement <4 x float> %tmp87, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
-	store <4 x float> %tmp89, <4 x float>* %tmp89.out
-	ret i1 false
-}
diff --git a/test/CodeGen/X86/2009-03-16-SpillerBug.ll b/test/CodeGen/X86/2009-03-16-SpillerBug.ll
deleted file mode 100644
index 951e191cd293..000000000000
--- a/test/CodeGen/X86/2009-03-16-SpillerBug.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=linearscan -stats |& grep virtregrewriter | not grep {stores unfolded}
-; rdar://6682365
-
-; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
-
-	%struct.CAST_KEY = type { [32 x i32], i32 }
-@CAST_S_table0 = constant [2 x i32] [i32 821772500, i32 -1616838901], align 32		; <[2 x i32]*> [#uses=0]
-@CAST_S_table4 = constant [2 x i32] [i32 2127105028, i32 745436345], align 32		; <[2 x i32]*> [#uses=6]
-@CAST_S_table5 = constant [2 x i32] [i32 -151351395, i32 749497569], align 32		; <[2 x i32]*> [#uses=5]
-@CAST_S_table6 = constant [2 x i32] [i32 -2048901095, i32 858518887], align 32		; <[2 x i32]*> [#uses=4]
-@CAST_S_table7 = constant [2 x i32] [i32 -501862387, i32 -1143078916], align 32		; <[2 x i32]*> [#uses=5]
-@CAST_S_table1 = constant [2 x i32] [i32 522195092, i32 -284448933], align 32		; <[2 x i32]*> [#uses=0]
-@CAST_S_table2 = constant [2 x i32] [i32 -1913667008, i32 637164959], align 32		; <[2 x i32]*> [#uses=0]
-@CAST_S_table3 = constant [2 x i32] [i32 -1649212384, i32 532081118], align 32		; <[2 x i32]*> [#uses=0]
-
-define void @CAST_set_key(%struct.CAST_KEY* nocapture %key, i32 %len, i8* nocapture %data) nounwind ssp {
-bb1.thread:
-	%0 = getelementptr [16 x i32]* null, i32 0, i32 5		; <i32*> [#uses=1]
-	%1 = getelementptr [16 x i32]* null, i32 0, i32 8		; <i32*> [#uses=1]
-	%2 = load i32* null, align 4		; <i32> [#uses=1]
-	%3 = shl i32 %2, 24		; <i32> [#uses=1]
-	%4 = load i32* null, align 4		; <i32> [#uses=1]
-	%5 = shl i32 %4, 16		; <i32> [#uses=1]
-	%6 = load i32* null, align 4		; <i32> [#uses=1]
-	%7 = or i32 %5, %3		; <i32> [#uses=1]
-	%8 = or i32 %7, %6		; <i32> [#uses=1]
-	%9 = or i32 %8, 0		; <i32> [#uses=1]
-	%10 = load i32* null, align 4		; <i32> [#uses=1]
-	%11 = shl i32 %10, 24		; <i32> [#uses=1]
-	%12 = load i32* %0, align 4		; <i32> [#uses=1]
-	%13 = shl i32 %12, 16		; <i32> [#uses=1]
-	%14 = load i32* null, align 4		; <i32> [#uses=1]
-	%15 = or i32 %13, %11		; <i32> [#uses=1]
-	%16 = or i32 %15, %14		; <i32> [#uses=1]
-	%17 = or i32 %16, 0		; <i32> [#uses=1]
-	br label %bb11
-
-bb11:		; preds = %bb11, %bb1.thread
-	%18 = phi i32 [ %110, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
-	%19 = phi i32 [ %112, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
-	%20 = phi i32 [ 0, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
-	%21 = phi i32 [ %113, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
-	%X.0.0 = phi i32 [ %9, %bb1.thread ], [ %92, %bb11 ]		; <i32> [#uses=0]
-	%X.1.0 = phi i32 [ %17, %bb1.thread ], [ 0, %bb11 ]		; <i32> [#uses=0]
-	%22 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %21		; <i32*> [#uses=0]
-	%23 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %18		; <i32*> [#uses=0]
-	%24 = load i32* null, align 4		; <i32> [#uses=1]
-	%25 = xor i32 0, %24		; <i32> [#uses=1]
-	%26 = xor i32 %25, 0		; <i32> [#uses=1]
-	%27 = xor i32 %26, 0		; <i32> [#uses=4]
-	%28 = and i32 %27, 255		; <i32> [#uses=2]
-	%29 = lshr i32 %27, 8		; <i32> [#uses=1]
-	%30 = and i32 %29, 255		; <i32> [#uses=2]
-	%31 = lshr i32 %27, 16		; <i32> [#uses=1]
-	%32 = and i32 %31, 255		; <i32> [#uses=1]
-	%33 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %28		; <i32*> [#uses=1]
-	%34 = load i32* %33, align 4		; <i32> [#uses=2]
-	%35 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %30		; <i32*> [#uses=1]
-	%36 = load i32* %35, align 4		; <i32> [#uses=2]
-	%37 = xor i32 %34, 0		; <i32> [#uses=1]
-	%38 = xor i32 %37, %36		; <i32> [#uses=1]
-	%39 = xor i32 %38, 0		; <i32> [#uses=1]
-	%40 = xor i32 %39, 0		; <i32> [#uses=1]
-	%41 = xor i32 %40, 0		; <i32> [#uses=3]
-	%42 = lshr i32 %41, 8		; <i32> [#uses=1]
-	%43 = and i32 %42, 255		; <i32> [#uses=2]
-	%44 = lshr i32 %41, 16		; <i32> [#uses=1]
-	%45 = and i32 %44, 255		; <i32> [#uses=1]
-	%46 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %43		; <i32*> [#uses=1]
-	%47 = load i32* %46, align 4		; <i32> [#uses=1]
-	%48 = load i32* null, align 4		; <i32> [#uses=1]
-	%49 = xor i32 %47, 0		; <i32> [#uses=1]
-	%50 = xor i32 %49, %48		; <i32> [#uses=1]
-	%51 = xor i32 %50, 0		; <i32> [#uses=1]
-	%52 = xor i32 %51, 0		; <i32> [#uses=1]
-	%53 = xor i32 %52, 0		; <i32> [#uses=2]
-	%54 = and i32 %53, 255		; <i32> [#uses=1]
-	%55 = lshr i32 %53, 24		; <i32> [#uses=1]
-	%56 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %55		; <i32*> [#uses=1]
-	%57 = load i32* %56, align 4		; <i32> [#uses=1]
-	%58 = xor i32 0, %57		; <i32> [#uses=1]
-	%59 = xor i32 %58, 0		; <i32> [#uses=1]
-	%60 = xor i32 %59, 0		; <i32> [#uses=1]
-	store i32 %60, i32* null, align 4
-	%61 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
-	%62 = load i32* %61, align 4		; <i32> [#uses=1]
-	%63 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %54		; <i32*> [#uses=1]
-	%64 = load i32* %63, align 4		; <i32> [#uses=1]
-	%65 = xor i32 0, %64		; <i32> [#uses=1]
-	%66 = xor i32 %65, 0		; <i32> [#uses=1]
-	store i32 %66, i32* null, align 4
-	%67 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %45		; <i32*> [#uses=1]
-	%68 = load i32* %67, align 4		; <i32> [#uses=1]
-	%69 = xor i32 %36, %34		; <i32> [#uses=1]
-	%70 = xor i32 %69, 0		; <i32> [#uses=1]
-	%71 = xor i32 %70, %68		; <i32> [#uses=1]
-	%72 = xor i32 %71, 0		; <i32> [#uses=1]
-	store i32 %72, i32* null, align 4
-	%73 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %32		; <i32*> [#uses=1]
-	%74 = load i32* %73, align 4		; <i32> [#uses=2]
-	%75 = load i32* null, align 4		; <i32> [#uses=1]
-	%76 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %43		; <i32*> [#uses=1]
-	%77 = load i32* %76, align 4		; <i32> [#uses=1]
-	%78 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 0		; <i32*> [#uses=1]
-	%79 = load i32* %78, align 4		; <i32> [#uses=1]
-	%80 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %30		; <i32*> [#uses=1]
-	%81 = load i32* %80, align 4		; <i32> [#uses=2]
-	%82 = xor i32 %75, %74		; <i32> [#uses=1]
-	%83 = xor i32 %82, %77		; <i32> [#uses=1]
-	%84 = xor i32 %83, %79		; <i32> [#uses=1]
-	%85 = xor i32 %84, %81		; <i32> [#uses=1]
-	store i32 %85, i32* null, align 4
-	%86 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %28		; <i32*> [#uses=1]
-	%87 = load i32* %86, align 4		; <i32> [#uses=1]
-	%88 = xor i32 %74, %41		; <i32> [#uses=1]
-	%89 = xor i32 %88, %87		; <i32> [#uses=1]
-	%90 = xor i32 %89, 0		; <i32> [#uses=1]
-	%91 = xor i32 %90, %81		; <i32> [#uses=1]
-	%92 = xor i32 %91, 0		; <i32> [#uses=3]
-	%93 = lshr i32 %92, 16		; <i32> [#uses=1]
-	%94 = and i32 %93, 255		; <i32> [#uses=1]
-	store i32 %94, i32* null, align 4
-	%95 = lshr i32 %92, 24		; <i32> [#uses=2]
-	%96 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %95		; <i32*> [#uses=1]
-	%97 = load i32* %96, align 4		; <i32> [#uses=1]
-	%98 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=1]
-	%99 = load i32* %98, align 4		; <i32> [#uses=1]
-	%100 = load i32* null, align 4		; <i32> [#uses=0]
-	%101 = xor i32 %97, 0		; <i32> [#uses=1]
-	%102 = xor i32 %101, %99		; <i32> [#uses=1]
-	%103 = xor i32 %102, 0		; <i32> [#uses=1]
-	%104 = xor i32 %103, 0		; <i32> [#uses=0]
-	store i32 0, i32* null, align 4
-	%105 = xor i32 0, %27		; <i32> [#uses=1]
-	%106 = xor i32 %105, 0		; <i32> [#uses=1]
-	%107 = xor i32 %106, 0		; <i32> [#uses=1]
-	%108 = xor i32 %107, 0		; <i32> [#uses=1]
-	%109 = xor i32 %108, %62		; <i32> [#uses=3]
-	%110 = and i32 %109, 255		; <i32> [#uses=1]
-	%111 = lshr i32 %109, 16		; <i32> [#uses=1]
-	%112 = and i32 %111, 255		; <i32> [#uses=1]
-	%113 = lshr i32 %109, 24		; <i32> [#uses=3]
-	store i32 %113, i32* %1, align 4
-	%114 = load i32* null, align 4		; <i32> [#uses=1]
-	%115 = xor i32 0, %114		; <i32> [#uses=1]
-	%116 = xor i32 %115, 0		; <i32> [#uses=1]
-	%117 = xor i32 %116, 0		; <i32> [#uses=1]
-	%K.0.sum42 = or i32 0, 12		; <i32> [#uses=1]
-	%118 = getelementptr [32 x i32]* null, i32 0, i32 %K.0.sum42		; <i32*> [#uses=1]
-	store i32 %117, i32* %118, align 4
-	%119 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=0]
-	store i32 0, i32* null, align 4
-	%120 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %113		; <i32*> [#uses=1]
-	%121 = load i32* %120, align 4		; <i32> [#uses=1]
-	%122 = xor i32 0, %121		; <i32> [#uses=1]
-	store i32 %122, i32* null, align 4
-	%123 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
-	%124 = load i32* %123, align 4		; <i32> [#uses=1]
-	%125 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %95		; <i32*> [#uses=1]
-	%126 = load i32* %125, align 4		; <i32> [#uses=1]
-	%127 = xor i32 0, %124		; <i32> [#uses=1]
-	%128 = xor i32 %127, 0		; <i32> [#uses=1]
-	%129 = xor i32 %128, %126		; <i32> [#uses=1]
-	%130 = xor i32 %129, 0		; <i32> [#uses=1]
-	store i32 %130, i32* null, align 4
-	br label %bb11
-}
diff --git a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index 90dabb8ab635..8bbdb0e82f78 100644
--- a/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -9,30 +9,30 @@
 @X = external global i64		; <i64*> [#uses=25]
 
 define fastcc i64 @foo() nounwind {
-	%tmp = volatile load i64* @X		; <i64> [#uses=7]
-	%tmp1 = volatile load i64* @X		; <i64> [#uses=5]
-	%tmp2 = volatile load i64* @X		; <i64> [#uses=3]
-	%tmp3 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp4 = volatile load i64* @X		; <i64> [#uses=5]
-	%tmp5 = volatile load i64* @X		; <i64> [#uses=3]
-	%tmp6 = volatile load i64* @X		; <i64> [#uses=2]
-	%tmp7 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp8 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp9 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp10 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp11 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp12 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp13 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp14 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp15 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp16 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp17 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp18 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp19 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp20 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp21 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp22 = volatile load i64* @X		; <i64> [#uses=1]
-	%tmp23 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp = load volatile i64* @X		; <i64> [#uses=7]
+	%tmp1 = load volatile i64* @X		; <i64> [#uses=5]
+	%tmp2 = load volatile i64* @X		; <i64> [#uses=3]
+	%tmp3 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp4 = load volatile i64* @X		; <i64> [#uses=5]
+	%tmp5 = load volatile i64* @X		; <i64> [#uses=3]
+	%tmp6 = load volatile i64* @X		; <i64> [#uses=2]
+	%tmp7 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp8 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp9 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp10 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp11 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp12 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp13 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp14 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp15 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp16 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp17 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp18 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp19 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp20 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp21 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp22 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp23 = load volatile i64* @X		; <i64> [#uses=1]
 	%tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8)		; <i64> [#uses=1]
 	%tmp25 = add i64 %tmp6, %tmp5		; <i64> [#uses=1]
 	%tmp26 = add i64 %tmp25, %tmp4		; <i64> [#uses=1]
@@ -229,7 +229,7 @@ define fastcc i64 @foo() nounwind {
 	%tmp217 = add i64 %tmp205, %tmp215		; <i64> [#uses=1]
 	%tmp218 = add i64 %tmp217, %tmp211		; <i64> [#uses=1]
 	%tmp219 = call i64 @llvm.bswap.i64(i64 %tmp23)		; <i64> [#uses=2]
-	volatile store i64 %tmp219, i64* @X, align 8
+	store volatile i64 %tmp219, i64* @X, align 8
 	%tmp220 = add i64 %tmp203, %tmp190		; <i64> [#uses=1]
 	%tmp221 = add i64 %tmp220, %tmp216		; <i64> [#uses=1]
 	%tmp222 = add i64 %tmp219, %tmp177		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 620e0f366740..9f5a8c53be18 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,11 +1,10 @@
 ; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=linearscan < %s | \
+; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
 ; RUN:   FileCheck %s
 ; rdar://6808032
 
 ; CHECK: pextrw $14
 ; CHECK-NEXT: shrl $8
-; CHECK-NEXT: (%ebp)
 ; CHECK-NEXT: pinsrw
 
 define void @update(i8** %args_list) nounwind {
diff --git a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
index a5e28c074867..c2cd89c33ee8 100644
--- a/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
+++ b/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
@@ -12,7 +12,7 @@ entry:
 	br label %bb
 
 bb:		; preds = %bb.i, %bb, %entry
-	%2 = volatile load i32* @g_9, align 4		; <i32> [#uses=2]
+	%2 = load volatile i32* @g_9, align 4		; <i32> [#uses=2]
 	%3 = icmp sgt i32 %2, 1		; <i1> [#uses=1]
 	%4 = and i1 %3, %1		; <i1> [#uses=1]
 	br i1 %4, label %bb.i, label %bb
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index 12bd28518762..1259cf47b2bc 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
 ; CHECK: subq    $40, %rsp
 ; CHECK: movaps  %xmm8, (%rsp)
 ; CHECK: movaps  %xmm7, 16(%rsp)
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index 5c514805e485..5f5d5cccf714 100644
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
 ; RUN: grep movzwl %t1 | count 2
-; RUN: grep movzbl %t1 | count 2
+; RUN: grep movzbl %t1 | count 1
 ; RUN: grep movd %t1 | count 4
 
 define <4 x i16> @a(i32* %x1) nounwind {
diff --git a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
index 07ef53e09d8e..66caedfc0692 100644
--- a/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
+++ b/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx | grep movl | count 2
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 -mattr=+mmx | grep movd | count 2
 
 define i64 @a(i32 %a, i32 %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-07-17-StackColoringBug.ll b/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
deleted file mode 100644
index 3e5bd348ecd9..000000000000
--- a/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llc < %s -mtriple=i386-pc-linux-gnu -disable-fp-elim -color-ss-with-regs | not grep dil
-; PR4552
-
-target triple = "i386-pc-linux-gnu"
-@g_8 = internal global i32 0		; <i32*> [#uses=1]
-@g_72 = internal global i32 0		; <i32*> [#uses=1]
-@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8, i8)* @uint84 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define i32 @uint84(i32 %p_15, i8 signext %p_17, i8 signext %p_19) nounwind {
-entry:
-	%g_72.promoted = load i32* @g_72		; <i32> [#uses=1]
-	%g_8.promoted = load i32* @g_8		; <i32> [#uses=1]
-	br label %bb
-
-bb:		; preds = %func_40.exit, %entry
-	%g_8.tmp.1 = phi i32 [ %g_8.promoted, %entry ], [ %g_8.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
-	%g_72.tmp.1 = phi i32 [ %g_72.promoted, %entry ], [ %g_72.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
-	%retval12.i4.i.i = trunc i32 %g_8.tmp.1 to i8		; <i8> [#uses=2]
-	%0 = trunc i32 %g_72.tmp.1 to i8		; <i8> [#uses=2]
-	%1 = mul i8 %retval12.i4.i.i, %0		; <i8> [#uses=1]
-	%2 = icmp eq i8 %1, 0		; <i1> [#uses=1]
-	br i1 %2, label %bb2.i.i, label %bb.i.i
-
-bb.i.i:		; preds = %bb
-	%3 = sext i8 %0 to i32		; <i32> [#uses=1]
-	%4 = and i32 %3, 50295		; <i32> [#uses=1]
-	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
-	br i1 %5, label %bb2.i.i, label %func_55.exit.i
-
-bb2.i.i:		; preds = %bb.i.i, %bb
-	br label %func_55.exit.i
-
-func_55.exit.i:		; preds = %bb2.i.i, %bb.i.i
-	%g_72.tmp.2 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
-	%6 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
-	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
-	%8 = mul i8 %7, %retval12.i4.i.i		; <i8> [#uses=1]
-	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
-	br i1 %9, label %bb2.i4.i, label %bb.i3.i
-
-bb.i3.i:		; preds = %func_55.exit.i
-	%10 = sext i8 %7 to i32		; <i32> [#uses=1]
-	%11 = and i32 %10, 50295		; <i32> [#uses=1]
-	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
-	br i1 %12, label %bb2.i4.i, label %func_40.exit
-
-bb2.i4.i:		; preds = %bb.i3.i, %func_55.exit.i
-	br label %func_40.exit
-
-func_40.exit:		; preds = %bb2.i4.i, %bb.i3.i
-	%g_72.tmp.0 = phi i32 [ 1, %bb2.i4.i ], [ %g_72.tmp.2, %bb.i3.i ]		; <i32> [#uses=1]
-	%phitmp = icmp sgt i32 %g_8.tmp.1, 0		; <i1> [#uses=1]
-	%g_8.tmp.0 = select i1 %phitmp, i32 %g_8.tmp.1, i32 1		; <i32> [#uses=1]
-	br label %bb
-}
diff --git a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
index 790fd88c46dd..410a42a42878 100644
--- a/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
+++ b/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -41,18 +41,18 @@ bb3:                                              ; preds = %bb2, %bb
   br i1 undef, label %bb5, label %bb4
 
 bb4:                                              ; preds = %bb3
-  %17 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %17 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   br label %bb5
 
 bb5:                                              ; preds = %bb4, %bb3
-  %18 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %18 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
   %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
   br i1 undef, label %return, label %bb6.preheader
 
 bb6.preheader:                                    ; preds = %bb5
   %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
-  %22 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %22 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
   %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
   unreachable
 
diff --git a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index f6ac2ba60647..d4a74c9e7e7a 100644
--- a/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
+; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
 ; Check that lowered argumens do not overwrite the return address before it is moved.
 ; Bug 6225
 ;
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
index 69787c78cfd6..5372bc522785 100644
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -1,32 +1,35 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
 ; There are no MMX operations here, so we use XMM or i64.
 
+; CHECK: ti8
 define void @ti8(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <8 x i8>
         %tmp2 = bitcast double %b to <8 x i8>
         %tmp3 = add <8 x i8> %tmp1, %tmp2
-; CHECK:  paddb %xmm1, %xmm0
+; CHECK:  paddw
         store <8 x i8> %tmp3, <8 x i8>* null
         ret void
 }
 
+; CHECK: ti16
 define void @ti16(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <4 x i16>
         %tmp2 = bitcast double %b to <4 x i16>
         %tmp3 = add <4 x i16> %tmp1, %tmp2
-; CHECK:  paddw %xmm1, %xmm0
+; CHECK:  paddd
         store <4 x i16> %tmp3, <4 x i16>* null
         ret void
 }
 
+; CHECK: ti32
 define void @ti32(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to <2 x i32>
         %tmp2 = bitcast double %b to <2 x i32>
         %tmp3 = add <2 x i32> %tmp1, %tmp2
-; CHECK:  paddd %xmm1, %xmm0
+; CHECK:  paddq
         store <2 x i32> %tmp3, <2 x i32>* null
         ret void
 }
@@ -55,6 +58,7 @@ entry:
         ret void
 }
 
+; CHECK: ti16a
 define void @ti16a(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to x86_mmx
@@ -66,6 +70,7 @@ entry:
         ret void
 }
 
+; CHECK: ti32a
 define void @ti32a(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to x86_mmx
@@ -77,6 +82,7 @@ entry:
         ret void
 }
 
+; CHECK: ti64a
 define void @ti64a(double %a, double %b) nounwind {
 entry:
         %tmp1 = bitcast double %a to x86_mmx
diff --git a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
index 7af58dc38399..cbf5502e1c05 100644
--- a/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -30,14 +30,16 @@ invoke.cont:                                      ; preds = %entry
   br label %finally
 
 terminate.handler:                                ; preds = %match.end
-  %exc = call i8* @llvm.eh.exception()            ; <i8*> [#uses=1]
-  %1 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1) ; <i32> [#uses=0]
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           cleanup
   call void @_ZSt9terminatev() noreturn nounwind
   unreachable
 
 try.handler:                                      ; preds = %entry
-  %exc1 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=3]
-  %selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc1, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*), i8* null) ; <i32> [#uses=1]
+  %exc1.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           catch i8* null
+  %exc1 = extractvalue { i8*, i32 } %exc1.ptr, 0
+  %selector = extractvalue { i8*, i32 } %exc1.ptr, 1
   %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) ; <i32> [#uses=1]
   %3 = icmp eq i32 %selector, %2                  ; <i1> [#uses=1]
   br i1 %3, label %match, label %catch.next
@@ -55,9 +57,10 @@ invoke.cont2:                                     ; preds = %match
   br label %match.end
 
 match.handler:                                    ; preds = %match
-  %exc3 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=2]
-  %7 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc3, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0) ; <i32> [#uses=0]
-  store i8* %exc3, i8** %_rethrow
+  %exc3 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           cleanup
+  %7 = extractvalue { i8*, i32 } %exc3, 0
+  store i8* %7, i8** %_rethrow
   store i32 2, i32* %cleanup.dst
   br label %match.end
 
@@ -124,10 +127,6 @@ declare void @_Z6throwsv() ssp
 
 declare i32 @__gxx_personality_v0(...)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_ZSt9terminatev()
 
 declare void @_Unwind_Resume_or_Rethrow(i8*)
diff --git a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
index 5accfd74c0a9..e0c2c6c9ca7f 100644
--- a/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
+++ b/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -mcpu=generic | FileCheck %s
 ; PR6941
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll b/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
deleted file mode 100644
index 2ba12dfc5680..000000000000
--- a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
+++ /dev/null
@@ -1,108 +0,0 @@
-; RUN: llc -O2 -mtriple=i386-apple-darwin <%s | FileCheck %s
-; Use DW_FORM_addr for DW_AT_entry_pc.
-; Radar 8094785
-
-; CHECK:	.byte	17                      ## DW_TAG_compile_unit
-; CHECK-NEXT:	.byte	1                       ## DW_CHILDREN_yes
-; CHECK-NEXT:	.byte	37                      ## DW_AT_producer
-; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-; CHECK-NEXT:	.byte	19                      ## DW_AT_language
-; CHECK-NEXT:	.byte	5                       ## DW_FORM_data2
-; CHECK-NEXT:	.byte	3                       ## DW_AT_name
-; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-; CHECK-NEXT:	.byte	82                      ## DW_AT_entry_pc
-; CHECK-NEXT:	.byte	1                       ## DW_FORM_addr
-; CHECK-NEXT:	.byte	16                      ## DW_AT_stmt_list
-; CHECK-NEXT:	.byte	6                       ## DW_FORM_data4
-; CHECK-NEXT:	.byte	27                      ## DW_AT_comp_dir
-; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
-; CHECK-NEXT:	.byte	225                     ## DW_AT_APPLE_optimized
-
-%struct.a = type { i32, %struct.a* }
-
-@ret = common global i32 0                        ; <i32*> [#uses=2]
-
-define void @foo(i32 %x) nounwind noinline ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !21), !dbg !28
-  store i32 %x, i32* @ret, align 4, !dbg !29
-  ret void, !dbg !31
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-define i8* @bar(%struct.a* %b) nounwind noinline ssp {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !22), !dbg !32
-  %0 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !33 ; <i32*> [#uses=1]
-  %1 = load i32* %0, align 8, !dbg !33            ; <i32> [#uses=1]
-  tail call void @foo(i32 %1) nounwind noinline ssp, !dbg !33
-  %2 = bitcast %struct.a* %b to i8*, !dbg !35     ; <i8*> [#uses=1]
-  ret i8* %2, !dbg !35
-}
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
-entry:
-  %e = alloca %struct.a, align 8                  ; <%struct.a*> [#uses=4]
-  call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !23), !dbg !36
-  call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !24), !dbg !36
-  call void @llvm.dbg.declare(metadata !{%struct.a* %e}, metadata !25), !dbg !37
-  %0 = getelementptr inbounds %struct.a* %e, i64 0, i32 0, !dbg !38 ; <i32*> [#uses=1]
-  store i32 4, i32* %0, align 8, !dbg !38
-  %1 = getelementptr inbounds %struct.a* %e, i64 0, i32 1, !dbg !39 ; <%struct.a**> [#uses=1]
-  store %struct.a* %e, %struct.a** %1, align 8, !dbg !39
-  %2 = call i8* @bar(%struct.a* %e) nounwind noinline ssp, !dbg !40 ; <i8*> [#uses=0]
-  %3 = load i32* @ret, align 4, !dbg !41          ; <i32> [#uses=1]
-  ret i32 %3, !dbg !41
-}
-
-!llvm.dbg.sp = !{!0, !6, !15}
-!llvm.dbg.lv.foo = !{!21}
-!llvm.dbg.lv.bar = !{!22}
-!llvm.dbg.lv.main = !{!23, !24, !25}
-!llvm.dbg.gv = !{!27}
-
-!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 34, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo} ; [ DW_TAG_subprogram ]
-!1 = metadata !{i32 524329, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 38, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
-!7 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{metadata !9, metadata !10}
-!9 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
-!10 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 23, i64 128, i64 64, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_structure_type ]
-!12 = metadata !{metadata !13, metadata !14}
-!13 = metadata !{i32 524301, metadata !11, metadata !"c", metadata !1, i32 24, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
-!14 = metadata !{i32 524301, metadata !11, metadata !"d", metadata !1, i32 25, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
-!15 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 43, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
-!16 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!17 = metadata !{metadata !5, metadata !5, metadata !18}
-!18 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !19} ; [ DW_TAG_pointer_type ]
-!19 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
-!20 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
-!21 = metadata !{i32 524545, metadata !0, metadata !"x", metadata !1, i32 33, metadata !5} ; [ DW_TAG_arg_variable ]
-!22 = metadata !{i32 524545, metadata !6, metadata !"b", metadata !1, i32 38, metadata !10} ; [ DW_TAG_arg_variable ]
-!23 = metadata !{i32 524545, metadata !15, metadata !"argc", metadata !1, i32 43, metadata !5} ; [ DW_TAG_arg_variable ]
-!24 = metadata !{i32 524545, metadata !15, metadata !"argv", metadata !1, i32 43, metadata !18} ; [ DW_TAG_arg_variable ]
-!25 = metadata !{i32 524544, metadata !26, metadata !"e", metadata !1, i32 44, metadata !11} ; [ DW_TAG_auto_variable ]
-!26 = metadata !{i32 524299, metadata !15, i32 43, i32 0} ; [ DW_TAG_lexical_block ]
-!27 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 28, metadata !5, i1 false, i1 true, i32* @ret} ; [ DW_TAG_variable ]
-!28 = metadata !{i32 33, i32 0, metadata !0, null}
-!29 = metadata !{i32 35, i32 0, metadata !30, null}
-!30 = metadata !{i32 524299, metadata !0, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
-!31 = metadata !{i32 36, i32 0, metadata !30, null}
-!32 = metadata !{i32 38, i32 0, metadata !6, null}
-!33 = metadata !{i32 39, i32 0, metadata !34, null}
-!34 = metadata !{i32 524299, metadata !6, i32 38, i32 0} ; [ DW_TAG_lexical_block ]
-!35 = metadata !{i32 40, i32 0, metadata !34, null}
-!36 = metadata !{i32 43, i32 0, metadata !15, null}
-!37 = metadata !{i32 44, i32 0, metadata !26, null}
-!38 = metadata !{i32 45, i32 0, metadata !26, null}
-!39 = metadata !{i32 46, i32 0, metadata !26, null}
-!40 = metadata !{i32 48, i32 0, metadata !26, null}
-!41 = metadata !{i32 49, i32 0, metadata !26, null}
diff --git a/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/test/CodeGen/X86/2010-08-04-MingWCrash.ll
index 98a0887c0e69..61f527b0470c 100644
--- a/test/CodeGen/X86/2010-08-04-MingWCrash.ll
+++ b/test/CodeGen/X86/2010-08-04-MingWCrash.ll
@@ -10,14 +10,15 @@ bb1:
   ret void
 
 lpad:
-  %exn = tail call i8* @llvm.eh.exception() nounwind
-  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1, i8* null) nounwind
+  %exn.ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           catch i8* null
+  %exn = extractvalue { i8*, i32 } %exn.ptr, 0
+  %eh.selector = extractvalue { i8*, i32 } %exn.ptr, 1
   %ehspec.fails = icmp slt i32 %eh.selector, 0
   br i1 %ehspec.fails, label %ehspec.unexpected, label %cleanup
 
 cleanup:
-  tail call void @_Unwind_Resume_or_Rethrow(i8* %exn) noreturn nounwind
-  unreachable
+  resume { i8*, i32 } %exn.ptr
 
 ehspec.unexpected:
   tail call void @__cxa_call_unexpected(i8* %exn) noreturn nounwind
@@ -26,12 +27,8 @@ ehspec.unexpected:
 
 declare noalias i8* @malloc()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
 
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_Unwind_Resume_or_Rethrow(i8*)
 
 declare void @__cxa_call_unexpected(i8*)
diff --git a/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
index d98ef14e108b..b3cc35d723f7 100644
--- a/test/CodeGen/X86/2010-08-10-DbgConstant.ll
+++ b/test/CodeGen/X86/2010-08-10-DbgConstant.ll
@@ -1,6 +1,6 @@
-; RUN: llc  -march=x86 -O0 < %s | FileCheck %s
+; RUN: llc  -mtriple=i686-linux -O0 < %s | FileCheck %s
 ; CHECK: DW_TAG_constant
-; CHECK-NEXT: ascii	 "ro"                   #{{#?}} DW_AT_name
+; CHECK-NEXT: .long .Lstring3 #{{#?}} DW_AT_name
 
 define void @foo() nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 7f134113b2e3..166dcf259989 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -4,8 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 ; Check debug info for variable z_s
-;CHECK:       .ascii   "z_s"                  ## DW_AT_name
-;CHECK-NEXT:  .byte   0
+;CHECK: .long Lset13
 ;CHECK-NEXT:  ## DW_AT_decl_file
 ;CHECK-NEXT:  ## DW_AT_decl_line
 ;CHECK-NEXT:  ## DW_AT_type
diff --git a/test/CodeGen/X86/2011-08-29-InitOrder.ll b/test/CodeGen/X86/2011-08-29-InitOrder.ll
index 72c79d27d026..4d5f8d7857c0 100644
--- a/test/CodeGen/X86/2011-08-29-InitOrder.ll
+++ b/test/CodeGen/X86/2011-08-29-InitOrder.ll
@@ -3,22 +3,28 @@
 ; PR5329
 
 @llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }]
-; CHECK-DEFAULT: construct_3
-; CHECK-DEFAULT: construct_2
-; CHECK-DEFAULT: construct_1
+; CHECK-DEFAULT  .section        .ctors.64535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_1
+; CHECK-DEFAULT: .section        .ctors.63535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_2
+; CHECK-DEFAULT: .section        .ctors.62535,"aw",@progbits
+; CHECK-DEFAULT: .long construct_3
 
-; CHECK-DARWIN: construct_1
-; CHECK-DARWIN: construct_2
-; CHECK-DARWIN: construct_3
+; CHECK-DARWIN: .long _construct_1
+; CHECK-DARWIN-NEXT: .long _construct_2
+; CHECK-DARWIN-NEXT: .long _construct_3
 
 @llvm.global_dtors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @destruct_2 }, { i32, void ()* } { i32 1000, void ()* @destruct_1 }, { i32, void ()* } { i32 3000, void ()* @destruct_3 }]
-; CHECK-DEFAULT: destruct_3
-; CHECK-DEFAULT: destruct_2
-; CHECK-DEFAULT: destruct_1
+; CHECK-DEFAULT: .section        .dtors.64535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_1
+; CHECK-DEFAULT: .section        .dtors.63535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_2
+; CHECK-DEFAULT: .section        .dtors.62535,"aw",@progbits
+; CHECK-DEFAULT: .long destruct_3
 
-; CHECK-DARWIN: destruct_1
-; CHECK-DARWIN: destruct_2
-; CHECK-DARWIN: destruct_3
+; CHECK-DARWIN:      .long _destruct_1
+; CHECK-DARWIN-NEXT: .long _destruct_2
+; CHECK-DARWIN-NEXT: .long _destruct_3
 
 declare void @construct_1()
 declare void @construct_2()
diff --git a/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
new file mode 100644
index 000000000000..8c09d97f08d2
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=x86 -fast-isel -mattr=+sse < %s | FileCheck %s
+; <rdar://problem/10215997>
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7"
+
+define void @vectortest() nounwind ssp {
+entry:
+  %p1 = alloca <4 x float>, align 16
+  %p2 = alloca <4 x float>, align 16
+  %p3 = alloca <4 x float>, align 16
+  %p4 = alloca <4 x float>, align 16
+  %p5 = alloca <4 x float>, align 16
+  store <4 x float> <float 0x3FF19999A0000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF6666660000000>, <4 x float>* %p1, align 16
+  store <4 x float> <float 0x4000CCCCC0000000, float 0x40019999A0000000, float 0x4002666660000000, float 0x4003333340000000>, <4 x float>* %p2, align 16
+  store <4 x float> <float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000>, <4 x float>* %p3, align 16
+  store <4 x float> <float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000>, <4 x float>* %p4, align 16
+  store <4 x float> <float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000>, <4 x float>* %p5, align 16
+  %0 = load <4 x float>* %p1, align 16
+  %1 = load <4 x float>* %p2, align 16
+  %2 = load <4 x float>* %p3, align 16
+  %3 = load <4 x float>* %p4, align 16
+  %4 = load <4 x float>* %p5, align 16
+; CHECK:      movaps {{%xmm[0-7]}}, (%esp)
+; CHECK-NEXT: calll _dovectortest 
+  call void @dovectortest(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4)
+  ret void
+}
+
+declare void @dovectortest(<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>)
diff --git a/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
new file mode 100644
index 000000000000..a7207537de21
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i8:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.anon = type { <2 x i8> }
+
+@i = global <2 x i8> <i8 150, i8 100>, align 8
+@j = global <2 x i8> <i8 10, i8 13>, align 8
+@res = common global %union.anon zeroinitializer, align 8
+
+; Make sure we load the constants i and j starting offset zero.
+; Also make sure that we sign-extend it.
+; Based on /gcc-4_2-testsuite/src/gcc.c-torture/execute/pr23135.c
+
+; CHECK: main
+define i32 @main() nounwind uwtable {
+entry:
+; CHECK: movsbq  j(%rip), %
+; CHECK: movsbq  i(%rip), %
+  %0 = load <2 x i8>* @i, align 8
+  %1 = load <2 x i8>* @j, align 8
+  %div = sdiv <2 x i8> %1, %0
+  store <2 x i8> %div, <2 x i8>* getelementptr inbounds (%union.anon* @res, i32 0, i32 0), align 8
+  ret i32 0
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
new file mode 100644
index 000000000000..e08c5b28c5ec
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure that we don't crash when legalizng vselect and vsetcc and that
+; we are able to generate vector blend instructions.
+
+; CHECK: simple_widen
+; CHECK: blend
+; CHECK: ret
+define void @simple_widen() {
+entry:
+  %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+  store <2 x float> %0, <2 x float>* undef
+  ret void
+}
+
+; CHECK: complex_inreg_work
+; CHECK: blend
+; CHECK: ret
+
+define void @complex_inreg_work() {
+entry:
+  %0 = fcmp oeq <2 x float> undef, undef
+  %1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef
+  store <2 x float> %1, <2 x float>* undef
+  ret void
+}
+
+; CHECK: zero_test
+; CHECK: blend
+; CHECK: ret
+
+define void @zero_test() {
+entry:
+  %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer
+  store <2 x float> %0, <2 x float>* undef
+  ret void
+}
+
+; CHECK: full_test
+; CHECK: blend
+; CHECK: ret
+
+define void @full_test() {
+ entry:
+   %Cy300 = alloca <4 x float>
+   %Cy11a = alloca <2 x float>
+   %Cy118 = alloca <2 x float>
+   %Cy119 = alloca <2 x float>
+   br label %B1
+
+ B1:                                               ; preds = %entry
+   %0 = load <2 x float>* %Cy119
+   %1 = fptosi <2 x float> %0 to <2 x i32>
+   %2 = sitofp <2 x i32> %1 to <2 x float>
+   %3 = fcmp ogt <2 x float> %0, zeroinitializer
+   %4 = fadd <2 x float> %2, <float 1.000000e+00, float 1.000000e+00>
+   %5 = select <2 x i1> %3, <2 x float> %4, <2 x float> %2
+   %6 = fcmp oeq <2 x float> %2, %0
+   %7 = select <2 x i1> %6, <2 x float> %0, <2 x float> %5
+   store <2 x float> %7, <2 x float>* %Cy118
+   %8 = load <2 x float>* %Cy118
+   store <2 x float> %8, <2 x float>* %Cy11a
+   ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-10-21-widen-cmp.ll b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
new file mode 100644
index 000000000000..2fe645b07815
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that a <4 x float> compare is generated and that we are
+; not stuck in an endless loop.
+
+; CHECK: cmp_2_floats
+; CHECK: cmpordps
+; CHECK: ret
+
+define void @cmp_2_floats() {
+entry:
+  %0 = fcmp oeq <2 x float> undef, undef
+  %1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef
+  store <2 x float> %1, <2 x float>* undef
+  ret void
+}
+
+; CHECK: cmp_2_doubles
+; CHECK: cmpordpd
+; CHECK: blendvpd
+; CHECK: ret
+define void @cmp_2_doubles() {
+entry:
+  %0 = fcmp oeq <2 x double> undef, undef
+  %1 = select <2 x i1> %0, <2 x double> undef, <2 x double> undef
+  store <2 x double> %1, <2 x double>* undef
+  ret void
+}
+
+; CHECK: mp_11193
+; CHECK: psraw   $15
+; CHECK: ret
+define void @mp_11193(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET)
+nounwind {
+allocas:
+  %bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+  %t = extractelement <8 x i1> %bincmp, i32 0
+  %ft = sitofp i1 %t to float
+  %pp = bitcast <8 x float>* %RET to float*
+  store float %ft, float* %pp
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2011-10-27-tstore.ll b/test/CodeGen/X86/2011-10-27-tstore.ll
new file mode 100644
index 000000000000..6e83f6713ae4
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-27-tstore.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: ltstore
+;CHECK: movq
+;CHECK: movq
+;CHECK: ret
+define void @ltstore(<4 x i32>* %pA, <2 x i32>* %pB) {
+entry:
+  %in = load <4 x i32>* %pA
+  %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  store <2 x i32> %j, <2 x i32>* %pB
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2011-10-30-padd.ll b/test/CodeGen/X86/2011-10-30-padd.ll
new file mode 100644
index 000000000000..180ca15a0ee2
--- /dev/null
+++ b/test/CodeGen/X86/2011-10-30-padd.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s
+
+;CHECK: addXX_test
+;CHECK: padd
+;CHECK: ret
+
+
+define <16 x i8> @addXX_test(<16 x i8> %a) {
+      %b = add <16 x i8> %a, %a
+      ret <16 x i8> %b
+}
+
+;CHECK: instcombine_test
+;CHECK: padd
+;CHECK: ret
+define <16 x i8> @instcombine_test(<16 x i8> %a) {
+  %b = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %b
+}
+
diff --git a/test/CodeGen/X86/2011-11-07-LegalizeBuildVector.ll b/test/CodeGen/X86/2011-11-07-LegalizeBuildVector.ll
new file mode 100644
index 000000000000..d3164707a35d
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-07-LegalizeBuildVector.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+
+; We don't really care what this outputs; just make sure it's somewhat sane.
+; CHECK: legalize_test
+; CHECK: vmovups
+define void @legalize_test(i32 %x, <8 x i32>* %p) nounwind {
+entry:
+  %t1 = insertelement <8 x i32> <i32 undef, i32 undef, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>, i32 %x, i32 0
+  %t2 = shufflevector <8 x i32> %t1, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %int2float = sitofp <8 x i32> %t2 to <8 x float>
+  %blendAsInt.i821 = bitcast <8 x float> %int2float to <8 x i32>
+  store <8 x i32> %blendAsInt.i821, <8 x i32>* %p, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
new file mode 100644
index 000000000000..8174109378de
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11"
+
+; This test would create a vpand %ymm instruction that is only legal in AVX2.
+; CHECK-NOT: vpand %ymm
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+
+define void @ShadeTile() nounwind {
+allocas:
+  br i1 undef, label %if_then, label %if_else
+
+if_then:                                          ; preds = %allocas
+  unreachable
+
+if_else:                                          ; preds = %allocas
+  br i1 undef, label %for_loop156.lr.ph, label %if_exit
+
+for_loop156.lr.ph:                                ; preds = %if_else
+  %val_6.i21244 = load i16* undef, align 2
+  %0 = insertelement <8 x i16> undef, i16 %val_6.i21244, i32 6
+  %val_7.i21248 = load i16* undef, align 2
+  %1 = insertelement <8 x i16> %0, i16 %val_7.i21248, i32 7
+  %uint2uint32.i20206 = zext <8 x i16> %1 to <8 x i32>
+  %bitop5.i20208 = and <8 x i32> %uint2uint32.i20206, <i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744>
+  %bitop8.i20209 = and <8 x i32> %uint2uint32.i20206, <i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023>
+  %bitop12.i20211 = lshr <8 x i32> %bitop5.i20208, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+  %binop13.i20212 = add <8 x i32> %bitop12.i20211, <i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
+  %bitop15.i20213 = shl <8 x i32> %binop13.i20212, <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
+  %bitop17.i20214 = shl <8 x i32> %bitop8.i20209, <i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
+  %bitop20.i20215 = or <8 x i32> undef, %bitop15.i20213
+  %bitop22.i20216 = or <8 x i32> %bitop20.i20215, %bitop17.i20214
+  %int_to_float_bitcast.i.i.i20217 = bitcast <8 x i32> %bitop22.i20216 to <8 x float>
+  %binop401 = fmul <8 x float> undef, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
+  %binop402 = fadd <8 x float> %binop401, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
+  %binop403 = fmul <8 x float> zeroinitializer, %binop402
+  %binop406 = fmul <8 x float> %int_to_float_bitcast.i.i.i20217, <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
+  %binop407 = fadd <8 x float> %binop406, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
+  %binop408 = fmul <8 x float> zeroinitializer, %binop407
+  %binop411 = fsub <8 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, undef
+  %val_4.i21290 = load i16* undef, align 2
+  %2 = insertelement <8 x i16> undef, i16 %val_4.i21290, i32 4
+  %val_5.i21294 = load i16* undef, align 2
+  %3 = insertelement <8 x i16> %2, i16 %val_5.i21294, i32 5
+  %val_6.i21298 = load i16* undef, align 2
+  %4 = insertelement <8 x i16> %3, i16 %val_6.i21298, i32 6
+  %ptr_7.i21301 = inttoptr i64 undef to i16*
+  %val_7.i21302 = load i16* %ptr_7.i21301, align 2
+  %5 = insertelement <8 x i16> %4, i16 %val_7.i21302, i32 7
+  %uint2uint32.i20218 = zext <8 x i16> %5 to <8 x i32>
+  %structelement561 = load i8** undef, align 8
+  %ptr2int563 = ptrtoint i8* %structelement561 to i64
+  %smear.ptr_smear7571 = insertelement <8 x i64> undef, i64 %ptr2int563, i32 7
+  %new_ptr582 = add <8 x i64> %smear.ptr_smear7571, zeroinitializer
+  %val_5.i21509 = load i8* null, align 1
+  %6 = insertelement <8 x i8> undef, i8 %val_5.i21509, i32 5
+  %7 = insertelement <8 x i8> %6, i8 undef, i32 6
+  %iptr_7.i21515 = extractelement <8 x i64> %new_ptr582, i32 7
+  %ptr_7.i21516 = inttoptr i64 %iptr_7.i21515 to i8*
+  %val_7.i21517 = load i8* %ptr_7.i21516, align 1
+  %8 = insertelement <8 x i8> %7, i8 %val_7.i21517, i32 7
+  %uint2float.i20245 = uitofp <8 x i8> %8 to <8 x float>
+  %binop.i20246 = fmul <8 x float> %uint2float.i20245, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
+  br i1 undef, label %for_loop594.lr.ph, label %for_exit595
+
+if_exit:                                          ; preds = %if_else
+  ret void
+
+for_loop594.lr.ph:                                ; preds = %for_loop156.lr.ph
+  %bitop8.i20221 = and <8 x i32> %uint2uint32.i20218, <i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023, i32 1023>
+  br i1 undef, label %cif_test_all730, label %cif_mask_mixed1552
+
+for_exit595:                                      ; preds = %for_loop156.lr.ph
+  unreachable
+
+cif_test_all730:                                  ; preds = %for_loop594.lr.ph
+  %binop11.i20545 = fmul <8 x float> %binop408, zeroinitializer
+  %binop12.i20546 = fadd <8 x float> undef, %binop11.i20545
+  %binop15.i20547 = fmul <8 x float> %binop411, undef
+  %binop16.i20548 = fadd <8 x float> %binop12.i20546, %binop15.i20547
+  %bincmp774 = fcmp ogt <8 x float> %binop16.i20548, zeroinitializer
+  %val_to_boolvec32775 = sext <8 x i1> %bincmp774 to <8 x i32>
+  %floatmask.i20549 = bitcast <8 x i32> %val_to_boolvec32775 to <8 x float>
+  %v.i20550 = tail call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i20549) nounwind readnone
+  %cond = icmp eq i32 %v.i20550, 255
+  br i1 %cond, label %cif_test_all794, label %cif_test_mixed
+
+cif_test_all794:                                  ; preds = %cif_test_all730
+  %binop.i20572 = fmul <8 x float> %binop403, undef
+  unreachable
+
+cif_test_mixed:                                   ; preds = %cif_test_all730
+  %binop1207 = fmul <8 x float> %binop.i20246, undef
+  unreachable
+
+cif_mask_mixed1552:                               ; preds = %for_loop594.lr.ph
+  unreachable
+}
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
new file mode 100644
index 000000000000..0a949eb29b89
--- /dev/null
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "x86_64-apple-macosx10.6.6"
+
+; Test that the order of operands is correct
+; CHECK: select_func
+; CHECK: pblendvb        %xmm1, %xmm2
+; CHECK: ret
+
+define void @select_func() {
+entry:
+  %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+  %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
+  %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
+  %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
+  %neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
+  %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
+  %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
+  %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
+  store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
new file mode 100644
index 000000000000..fcaabddd2cc5
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; PR11494
+
+define void @test(<4 x i32>* nocapture %p) nounwind {
+  ; CHECK: test:
+  ; CHECK: vpxor %xmm0, %xmm0, %xmm0
+  ; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
+  ; CHECK-NEXT: vmovdqu	%xmm0, (%rdi)
+  ; CHECK-NEXT: ret
+  %a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> <i32 -8, i32 -9, i32 -10, i32 -11>, <4 x i32> zeroinitializer) nounwind
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
+  %c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  store <4 x i32> %c, <4 x i32>* %p, align 1
+  ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll b/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll
new file mode 100644
index 000000000000..7a4126f4ae2a
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-06-BitcastVectorGlobal.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR11495
+
+; CHECK: 1311768467463790320
+@v = global <2 x float> bitcast (<1 x i64> <i64 1311768467463790320> to <2 x float>), align 8
diff --git a/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
new file mode 100644
index 000000000000..1561784dee32
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx -mattr=+avx
+; Various missing patterns causing crashes.
+; rdar://10538793
+
+define void @t1() nounwind {
+entry:
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %t1.exit, %entry
+  br i1 false, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  br i1 undef, label %0, label %t1.exit
+
+; <label>:0                                       ; preds = %loop
+  %1 = load <16 x i32> addrspace(1)* undef, align 64
+  %2 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %1, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0>
+  store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
+  br label %t1.exit
+
+t1.exit:                                 ; preds = %0, %loop
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  ret void
+}
+
+define void @t2() nounwind {
+  br i1 undef, label %1, label %4
+
+; <label>:1                                       ; preds = %0
+  %2 = load <16 x i32> addrspace(1)* undef, align 64
+  %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0>
+  store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
+  br label %4
+
+; <label>:4                                       ; preds = %1, %0
+  ret void
+}
+
+define void @t3() nounwind {
+entry:
+  br label %loop.cond
+
+loop.cond:                                        ; preds = %t2.exit, %entry
+  br i1 false, label %return, label %loop
+
+loop:                                             ; preds = %loop.cond
+  br i1 undef, label %0, label %t2.exit
+
+; <label>:0                                       ; preds = %loop
+  %1 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0>
+  %2 = load <16 x i32> addrspace(1)* undef, align 64
+  %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
+  br label %t2.exit
+
+t2.exit:                                 ; preds = %0, %loop
+  br label %loop.cond
+
+return:                                           ; preds = %loop.cond
+  ret void
+}
+
+define <3 x i64> @t4() nounwind {
+entry:
+  %0 = load <2 x i64> addrspace(1)* undef, align 16
+  %1 = extractelement <2 x i64> %0, i32 0
+  %2 = insertelement <3 x i64> <i64 undef, i64 0, i64 0>, i64 %1, i32 0
+  ret <3 x i64> %2
+}
+
+define void @t5() nounwind {
+entry:
+  %0 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %1 = shufflevector <8 x i64> <i64 0, i64 0, i64 0, i64 undef, i64 undef, i64 0, i64 0, i64 0>, <8 x i64> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 9, i32 8, i32 5, i32 6, i32 7>
+  store <8 x i64> %1, <8 x i64> addrspace(1)* undef, align 64
+
+  ret void
+}
diff --git a/test/CodeGen/X86/2011-12-15-vec_shift.ll b/test/CodeGen/X86/2011-12-15-vec_shift.ll
new file mode 100644
index 000000000000..6f9188c44268
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-15-vec_shift.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=x86-64 -mattr=+sse41 < %s | FileCheck %s -check-prefix=CHECK-W-SSE4
+; RUN: llc -march=x86-64 -mattr=-sse41 < %s | FileCheck %s -check-prefix=CHECK-WO-SSE4
+; Test case for r146671
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7"
+
+define <16 x i8> @shift(<16 x i8> %a, <16 x i8> %b) nounwind {
+  ; Make sure operands to pblend are in the right order.
+  ; CHECK-W-SSE4: psllw $4, [[REG1:%xmm.]]
+  ; CHECK-W-SSE4: pblendvb [[REG1]],{{ %xmm.}}
+  ; CHECK-W-SSE4: psllw $2
+
+  ; Make sure we're masking and pcmp'ing the VSELECT conditon vector.
+  ; CHECK-WO-SSE4: psllw $5, [[REG1:%xmm.]]
+  ; CHECK-WO-SSE4: pand [[REG1]], [[REG2:%xmm.]]
+  ; CHECK-WO-SSE4: pcmpeqb {{%xmm., }}[[REG2]]
+  %1 = shl <16 x i8> %a, %b
+  ret <16 x i8> %1
+}
diff --git a/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
new file mode 100644
index 000000000000..39c213f00ab8
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=x86-64 -mattr=-sse42,+sse41 < %s | FileCheck %s
+; Make sure we don't load from the location pointed to by %p
+; twice: it has non-obvious performance implications, and
+; the relevant transformation doesn't know how to update
+; the chains correctly.
+; PR10747
+
+; CHECK: test:
+; CHECK: pextrd $2, %xmm
+define <4 x i32> @test(<4 x i32>* %p) {
+  %v = load <4 x i32>* %p
+  %e = extractelement <4 x i32> %v, i32 2
+  %cmp = icmp eq i32 %e, 3
+  %sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer
+  ret <4 x i32> %sel
+}
diff --git a/test/CodeGen/X86/2011-12-28-vselecti8.ll b/test/CodeGen/X86/2011-12-28-vselecti8.ll
new file mode 100644
index 000000000000..dbc122ac6e40
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-28-vselecti8.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin  -mcpu=corei7 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin11.2.0"
+
+; CHECK: @foo8
+; CHECK: psll
+; CHECK: psraw
+; CHECK: pblendvb
+; CHECK: ret
+define void @foo8(float* nocapture %RET) nounwind {
+allocas:
+  %resultvec.i = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, <8 x i8> <i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100, i8 100>
+  %uint2float = uitofp <8 x i8> %resultvec.i to <8 x float>
+  %ptr = bitcast float * %RET to <8 x float> *
+  store <8 x float> %uint2float, <8 x float>* %ptr, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/2011-12-8-bitcastintprom.ll b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
new file mode 100644
index 000000000000..e2b3ebcf76eb
--- /dev/null
+++ b/test/CodeGen/X86/2011-12-8-bitcastintprom.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
+
+; Make sure that the conversion between v4i8 to v2i16 is not a simple bitcast.
+; CHECK: prom_bug
+; CHECK: shufb
+; CHECK: movd
+; CHECK: movw
+; CHECK: ret
+define void @prom_bug(<4 x i8> %t, i16* %p) {
+  %r = bitcast <4 x i8> %t to <2 x i16>
+  %o = extractelement <2 x i16> %r, i32 0
+  store i16 %o, i16* %p
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
new file mode 100644
index 000000000000..75efcf5ac47b
--- /dev/null
+++ b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that the booleans are converted using zext and not via sext.
+; 0x1 means that we only look at the first bit.
+
+;CHECK: 0x1
+;CHECK: ui_to_fp_conv
+;CHECK: ret
+define void @ui_to_fp_conv(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
+allocas:
+  %bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+  %bool2float = uitofp <8 x i1> %bincmp to <8 x float>
+  store <8 x float> %bool2float, <8 x float>* %RET, align 4
+  ret void
+}
+
+
+
diff --git a/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
new file mode 100644
index 000000000000..832a8eb8144c
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -disable-fp-elim
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7"
+
+; This test case has a landing pad with two predecessors, and a variable that
+; is undef on the first edge while carrying the first function return value on
+; the second edge.
+;
+; Live range splitting tries to isolate the block containing the first function
+; call, and it is important that the last split point is after the function call
+; so the return value can spill.
+;
+; <rdar://problem/10664933>
+
+@Exception = external unnamed_addr constant { i8*, i8* }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @f(i32* nocapture %arg, i32* nocapture %arg1, i32* nocapture %arg2, i32* nocapture %arg3, i32 %arg4, i32 %arg5) optsize ssp {
+bb:
+  br i1 undef, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb
+  %tmp = select i1 false, i32 0, i32 undef
+  br label %bb7
+
+bb7:                                              ; preds = %bb6, %bb
+  %tmp8 = phi i32 [ %tmp, %bb6 ], [ 0, %bb ]
+  %tmp9 = shl i32 %tmp8, 2
+  %tmp10 = invoke noalias i8* @_Znam(i32 undef) optsize
+          to label %bb11 unwind label %bb20
+
+bb11:                                             ; preds = %bb7
+  %tmp12 = ptrtoint i8* %tmp10 to i32
+  %tmp13 = bitcast i8* %tmp10 to i32*
+  %tmp14 = shl i32 %tmp8, 2
+  %tmp15 = getelementptr i32* %tmp13, i32 undef
+  %tmp16 = getelementptr i32* %tmp13, i32 undef
+  %tmp17 = zext i32 %tmp9 to i64
+  %tmp18 = add i64 %tmp17, -1
+  %tmp19 = icmp ugt i64 %tmp18, 4294967295
+  br i1 %tmp19, label %bb29, label %bb31
+
+bb20:                                             ; preds = %bb43, %bb41, %bb29, %bb7
+  %tmp21 = phi i32 [ undef, %bb7 ], [ %tmp12, %bb43 ], [ %tmp12, %bb29 ], [ %tmp12, %bb41 ]
+  %tmp22 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8* }* @Exception to i8*)
+  br i1 undef, label %bb23, label %bb69
+
+bb23:                                             ; preds = %bb38, %bb20
+  %tmp24 = phi i32 [ %tmp12, %bb38 ], [ %tmp21, %bb20 ]
+  %tmp25 = icmp eq i32 %tmp24, 0
+  br i1 %tmp25, label %bb28, label %bb26
+
+bb26:                                             ; preds = %bb23
+  %tmp27 = inttoptr i32 %tmp24 to i8*
+  br label %bb28
+
+bb28:                                             ; preds = %bb26, %bb23
+  ret void
+
+bb29:                                             ; preds = %bb11
+  invoke void @OnOverFlow() optsize
+          to label %bb30 unwind label %bb20
+
+bb30:                                             ; preds = %bb29
+  unreachable
+
+bb31:                                             ; preds = %bb11
+  %tmp32 = bitcast i32* %tmp15 to i8*
+  %tmp33 = zext i32 %tmp8 to i64
+  %tmp34 = add i64 %tmp33, -1
+  %tmp35 = icmp ugt i64 %tmp34, 4294967295
+  %tmp36 = icmp sgt i32 %tmp8, 0
+  %tmp37 = add i32 %tmp9, -4
+  br label %bb38
+
+bb38:                                             ; preds = %bb67, %bb31
+  %tmp39 = phi i32 [ %tmp68, %bb67 ], [ undef, %bb31 ]
+  %tmp40 = icmp sgt i32 %tmp39, undef
+  br i1 %tmp40, label %bb41, label %bb23
+
+bb41:                                             ; preds = %bb38
+  invoke void @Pjii(i32* %tmp16, i32 0, i32 %tmp8) optsize
+          to label %bb42 unwind label %bb20
+
+bb42:                                             ; preds = %bb41
+  tail call void @llvm.memset.p0i8.i32(i8* %tmp32, i8 0, i32 %tmp9, i32 1, i1 false) nounwind
+  br i1 %tmp35, label %bb43, label %bb45
+
+bb43:                                             ; preds = %bb42
+  invoke void @OnOverFlow() optsize
+          to label %bb44 unwind label %bb20
+
+bb44:                                             ; preds = %bb43
+  unreachable
+
+bb45:                                             ; preds = %bb57, %bb42
+  %tmp46 = phi i32 [ %tmp58, %bb57 ], [ 255, %bb42 ]
+  %tmp47 = icmp slt i32 undef, 0
+  br i1 %tmp47, label %bb48, label %bb59
+
+bb48:                                             ; preds = %bb45
+  tail call void @llvm.memset.p0i8.i32(i8* %tmp32, i8 0, i32 %tmp9, i32 1, i1 false) nounwind
+  br i1 %tmp36, label %bb49, label %bb57
+
+bb49:                                             ; preds = %bb49, %bb48
+  %tmp50 = phi i32 [ %tmp55, %bb49 ], [ 0, %bb48 ]
+  %tmp51 = add i32 %tmp50, undef
+  %tmp52 = add i32 %tmp50, undef
+  %tmp53 = getelementptr i32* %tmp13, i32 %tmp52
+  %tmp54 = load i32* %tmp53, align 4, !tbaa !0
+  %tmp55 = add i32 %tmp50, 1
+  %tmp56 = icmp eq i32 %tmp55, %tmp8
+  br i1 %tmp56, label %bb57, label %bb49
+
+bb57:                                             ; preds = %bb49, %bb48
+  %tmp58 = add i32 %tmp46, -1
+  br label %bb45
+
+bb59:                                             ; preds = %bb45
+  %tmp60 = ashr i32 %tmp46, 31
+  tail call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 %tmp37, i32 1, i1 false) nounwind
+  br i1 %tmp36, label %bb61, label %bb67
+
+bb61:                                             ; preds = %bb61, %bb59
+  %tmp62 = phi i32 [ %tmp65, %bb61 ], [ 0, %bb59 ]
+  %tmp63 = add i32 %tmp62, %tmp14
+  %tmp64 = getelementptr i32* %tmp13, i32 %tmp63
+  store i32 0, i32* %tmp64, align 4, !tbaa !0
+  %tmp65 = add i32 %tmp62, 1
+  %tmp66 = icmp eq i32 %tmp65, %tmp8
+  br i1 %tmp66, label %bb67, label %bb61
+
+bb67:                                             ; preds = %bb61, %bb59
+  %tmp68 = add i32 %tmp39, -1
+  br label %bb38
+
+bb69:                                             ; preds = %bb20
+  resume { i8*, i32 } %tmp22
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare noalias i8* @_Znam(i32) optsize
+
+declare void @Pjii(i32*, i32, i32) optsize
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare void @OnOverFlow() noreturn optsize ssp align 2
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/X86/2012-01-11-split-cv.ll b/test/CodeGen/X86/2012-01-11-split-cv.ll
new file mode 100644
index 000000000000..6b9007291901
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+;CHECK: add18i16
+define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
+;CHECK: vmovups
+  %b = load <18 x i16>* %bp, align 16
+  %x = add <18 x i16> zeroinitializer, %b
+  store <18 x i16> %x, <18 x i16>* %ret, align 16
+;CHECK: ret
+  ret void
+}
+
diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll
new file mode 100644
index 000000000000..fa8e80f0bdef
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+; CHECK: endless_loop
+define void @endless_loop() {
+entry:
+  %0 = load <8 x i32> addrspace(1)* undef, align 32
+  %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
+  store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
+  ret void
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
new file mode 100644
index 000000000000..a883d7938b55
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i686-linux -mattr=-sse | FileCheck %s
+; PR11768
+
+@ptr = external global i8*
+
+define void @baz() nounwind ssp {
+entry:
+  %0 = load i8** @ptr, align 4
+  %cmp = icmp eq i8* %0, null
+  fence seq_cst
+  br i1 %cmp, label %if.then, label %if.else
+
+; Make sure the fence comes before the comparison, since it
+; clobbers EFLAGS.
+
+; CHECK: lock
+; CHECK-NEXT: orl {{.*}}, (%esp)
+; CHECK-NEXT: cmpl $0
+
+if.then:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @foo to void ()*)() nounwind
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @bar to void ()*)() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+declare void @foo(...)
+
+declare void @bar(...)
diff --git a/test/CodeGen/X86/2012-01-18-vbitcast.ll b/test/CodeGen/X86/2012-01-18-vbitcast.ll
new file mode 100644
index 000000000000..8a3ccc8dfda5
--- /dev/null
+++ b/test/CodeGen/X86/2012-01-18-vbitcast.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s
+
+;CHECK: vcast
+define <2 x i32> @vcast(<2 x float> %a, <2 x float> %b) {
+;CHECK: pshufd
+;CHECK: pshufd
+  %af = bitcast <2 x float> %a to <2 x i32>
+  %bf = bitcast <2 x float> %b to <2 x i32>
+  %x = sub <2 x i32> %af, %bf
+;CHECK: psubq
+  ret <2 x i32> %x
+;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2012-02-12-dagco.ll b/test/CodeGen/X86/2012-02-12-dagco.ll
new file mode 100644
index 000000000000..13723a229943
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-12-dagco.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx
+target triple = "x86_64-unknown-linux-gnu"
+; Make sure we are not crashing on this one
+define void @dagco_crash() {
+entry:
+  %srcval.i411.i = load <4 x i64>* undef, align 1
+  %0 = extractelement <4 x i64> %srcval.i411.i, i32 3
+  %srcval.i409.i = load <2 x i64>* undef, align 1
+  %1 = extractelement <2 x i64> %srcval.i409.i, i32 0
+  %2 = insertelement <8 x i64> undef, i64 %0, i32 5
+  %3 = insertelement <8 x i64> %2, i64 %1, i32 6
+  %4 = insertelement <8 x i64> %3, i64 undef, i32 7
+  store <8 x i64> %4, <8 x i64> addrspace(1)* undef, align 64
+  unreachable
+}
+
diff --git a/test/CodeGen/X86/2012-02-14-scalar.ll b/test/CodeGen/X86/2012-02-14-scalar.ll
new file mode 100644
index 000000000000..1dc076b3e0d3
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-14-scalar.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx
+target triple = "x86_64-unknown-linux-gnu"
+; Make sure we are not crashing on this one
+define void @autogen_28112_5000() {
+BB:
+  %S17 = icmp sgt <1 x i64> undef, undef
+  %E19 = extractelement <1 x i1> %S17, i32 0
+  br label %CF
+
+CF:                                               ; preds = %CF, %BB
+  %S23 = select i1 %E19, i8 undef, i8 undef
+  br label %CF
+}
diff --git a/test/CodeGen/X86/2012-02-20-MachineCPBug.ll b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
new file mode 100644
index 000000000000..557d49d82f84
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-20-MachineCPBug.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=i386-apple-macosx -mattr=+sse | FileCheck %s
+; PR11940: Do not optimize away movb %al, %ch
+
+%struct.APInt = type { i64* }
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+define void @bug(%struct.APInt* noalias nocapture sret %agg.result, %struct.APInt* nocapture %this, i32 %rotateAmt) nounwind align 2 {
+entry:
+; CHECK: bug:
+  %call = tail call i8* @calloc(i32 1, i32 32)
+  %call.i = tail call i8* @calloc(i32 1, i32 32) nounwind
+  %0 = bitcast i8* %call.i to i64*
+  %rem.i = and i32 %rotateAmt, 63
+  %div.i = lshr i32 %rotateAmt, 6
+  %cmp.i = icmp eq i32 %rem.i, 0
+  br i1 %cmp.i, label %for.cond.preheader.i, label %if.end.i
+
+for.cond.preheader.i:                             ; preds = %entry
+  %sub.i = sub i32 4, %div.i
+  %cmp23.i = icmp eq i32 %div.i, 4
+  br i1 %cmp23.i, label %for.body9.lr.ph.i, label %for.body.lr.ph.i
+
+for.body.lr.ph.i:                                 ; preds = %for.cond.preheader.i
+  %pVal.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
+  %.pre5.i = load i64** %pVal.i, align 4
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %for.body.lr.ph.i
+  %i.04.i = phi i32 [ 0, %for.body.lr.ph.i ], [ %inc.i, %for.body.i ]
+  %add.i = add i32 %i.04.i, %div.i
+  %arrayidx.i = getelementptr inbounds i64* %.pre5.i, i32 %add.i
+  %1 = load i64* %arrayidx.i, align 4
+  %arrayidx3.i = getelementptr inbounds i64* %0, i32 %i.04.i
+  store i64 %1, i64* %arrayidx3.i, align 4
+  %inc.i = add i32 %i.04.i, 1
+  %cmp2.i = icmp ult i32 %inc.i, %sub.i
+  br i1 %cmp2.i, label %for.body.i, label %if.end.i
+
+if.end.i:                                         ; preds = %for.body.i, %entry
+  %cmp81.i = icmp eq i32 %div.i, 3
+  br i1 %cmp81.i, label %_ZNK5APInt4lshrEj.exit, label %for.body9.lr.ph.i
+
+for.body9.lr.ph.i:                                ; preds = %if.end.i, %for.cond.preheader.i
+  %sub58.i = sub i32 3, %div.i
+  %pVal11.i = getelementptr inbounds %struct.APInt* %this, i32 0, i32 0
+  %sh_prom.i = zext i32 %rem.i to i64
+  %sub17.i = sub i32 64, %rem.i
+  %sh_prom18.i = zext i32 %sub17.i to i64
+  %.pre.i = load i64** %pVal11.i, align 4
+  br label %for.body9.i
+
+for.body9.i:                                      ; preds = %for.body9.i, %for.body9.lr.ph.i
+; CHECK: %for.body9.i
+; CHECK: movb
+; CHECK: shrdl
+  %i6.02.i = phi i32 [ 0, %for.body9.lr.ph.i ], [ %inc21.i, %for.body9.i ]
+  %add10.i = add i32 %i6.02.i, %div.i
+  %arrayidx12.i = getelementptr inbounds i64* %.pre.i, i32 %add10.i
+  %2 = load i64* %arrayidx12.i, align 4
+  %shr.i = lshr i64 %2, %sh_prom.i
+  %add14.i = add i32 %add10.i, 1
+  %arrayidx16.i = getelementptr inbounds i64* %.pre.i, i32 %add14.i
+  %3 = load i64* %arrayidx16.i, align 4
+  %shl.i = shl i64 %3, %sh_prom18.i
+  %or.i = or i64 %shl.i, %shr.i
+  %arrayidx19.i = getelementptr inbounds i64* %0, i32 %i6.02.i
+  store i64 %or.i, i64* %arrayidx19.i, align 4
+  %inc21.i = add i32 %i6.02.i, 1
+  %cmp8.i = icmp ult i32 %inc21.i, %sub58.i
+  br i1 %cmp8.i, label %for.body9.i, label %_ZNK5APInt4lshrEj.exit
+
+_ZNK5APInt4lshrEj.exit:                           ; preds = %for.body9.i, %if.end.i
+  %call.i1 = tail call i8* @calloc(i32 1, i32 32) nounwind
+  %4 = getelementptr inbounds %struct.APInt* %agg.result, i32 0, i32 0
+  store i64* %0, i64** %4, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll b/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll
new file mode 100644
index 000000000000..a55c77bd2266
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-23-mmx-inlineasm.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=x86 -mcpu=i686 -mattr=+mmx < %s | FileCheck %s
+; <rdar://problem/10106006>
+
+define void @func() nounwind ssp {
+; CHECK:  psrlw %mm0, %mm1
+entry:
+  call void asm sideeffect "psrlw $0, %mm1", "y,~{dirflag},~{fpsr},~{flags}"(i32 8) nounwind
+  unreachable
+
+bb367:                                            ; preds = %entry                                                                                                                 
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-02-29-CoalescerBug.ll b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
new file mode 100644
index 000000000000..bdce85325f37
--- /dev/null
+++ b/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
@@ -0,0 +1,58 @@
+; RUN: llc -O1 <%s
+; PR12138
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7.0"
+
+%struct.S0 = type { i8, i32 }
+
+@d = external global [2 x [2 x %struct.S0]], align 4
+@c = external global i32, align 4
+@e = external global i32, align 4
+@b = external global i32, align 4
+@a = external global i32, align 4
+
+define void @fn2() nounwind optsize ssp {
+entry:
+  store i64 0, i64* bitcast ([2 x [2 x %struct.S0]]* @d to i64*), align 4
+  %0 = load i32* @c, align 4
+  %tobool2 = icmp eq i32 %0, 0
+  %1 = load i32* @a, align 4
+  %tobool4 = icmp eq i32 %1, 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %if.end, %entry
+  %f.1.0 = phi i32 [ undef, %entry ], [ %sub, %if.end ]
+  %g.0 = phi i64 [ 0, %entry ], [ %ins, %if.end ]
+  %tobool = icmp eq i32 %f.1.0, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %2 = lshr i64 %g.0, 32
+  %conv = trunc i64 %2 to i16
+  br i1 %tobool2, label %lor.rhs, label %lor.end
+
+lor.rhs:                                          ; preds = %for.body
+  store i32 1, i32* @e, align 4
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %for.body
+  %xor.i = xor i16 %conv, 1
+  %p1.lobit.i8 = lshr i64 %g.0, 47
+  %p1.lobit.i8.tr = trunc i64 %p1.lobit.i8 to i16
+  %p1.lobit.i = and i16 %p1.lobit.i8.tr, 1
+  %and.i = and i16 %p1.lobit.i, %xor.i
+  %3 = xor i16 %and.i, 1
+  %sub.conv.i = sub i16 %conv, %3
+  %conv3 = sext i16 %sub.conv.i to i32
+  store i32 %conv3, i32* @b, align 4
+  br i1 %tobool4, label %if.end, label %for.end
+
+if.end:                                           ; preds = %lor.end
+  %mask = and i64 %g.0, -256
+  %ins = or i64 %mask, 1
+  %sub = add nsw i32 %f.1.0, -1
+  br label %for.cond
+
+for.end:                                          ; preds = %lor.end, %for.cond
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-03-15-build_vector_wl.ll b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
new file mode 100644
index 000000000000..fec17e9f4aca
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-15-build_vector_wl.ll
@@ -0,0 +1,10 @@
+
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; CHECK: build_vector_again
+define <4 x i8> @build_vector_again(<16 x i8> %in) nounwind readnone {
+entry:
+  %out = shufflevector <16 x i8> %in, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: shufb
+  ret <4 x i8> %out
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll b/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll
new file mode 100644
index 000000000000..d24647e06448
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-20-LargeConstantExpr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/11070338>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK:      _.memset_pattern:
+; CHECK-NEXT: .quad   4575657222473777152
+; CHECK-NEXT: .quad   4575657222473777152
+
+@.memset_pattern = internal unnamed_addr constant i128 or (i128 zext (i64 bitcast (<2 x float> <float 1.000000e+00, float 1.000000e+00> to i64) to i128), i128 shl (i128 zext (i64 bitcast (<2 x float> <float 1.000000e+00, float 1.000000e+00> to i64) to i128), i128 64)), align 16
+
+define void @foo(i8* %a, i64 %b) {
+  call void @memset_pattern16(i8* %a, i8* bitcast (i128* @.memset_pattern to i8*), i64 %b)
+  ret void
+}
+
+declare void @memset_pattern16(i8*, i8*, i64)
diff --git a/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
new file mode 100644
index 000000000000..101eccabbd49
--- /dev/null
+++ b/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats |& \
+; RUN:   not grep {Number of machine instructions hoisted out of loops post regalloc}
+
+; rdar://11095580
+
+%struct.ref_s = type { %union.color_sample, i16, i16 }
+%union.color_sample = type { i64 }
+
+@table = external global [3891 x i64]
+
+declare i32 @foo()
+
+define i32 @zarray(%struct.ref_s* nocapture %op) nounwind ssp {
+entry:
+  %call = tail call i32 @foo()
+  %tmp = ashr i32 %call, 31
+  %0 = and i32 %tmp, 1396
+  %index9 = add i32 %0, 2397
+  indirectbr i8* undef, [label %return, label %if.end]
+
+if.end:                                           ; preds = %entry
+  %size5 = getelementptr inbounds %struct.ref_s* %op, i64 0, i32 2
+  %tmp6 = load i16* %size5, align 2
+  %tobool1 = icmp eq i16 %tmp6, 0
+  %1 = select i1 %tobool1, i32 1396, i32 -1910
+  %index10 = add i32 %index9, %1
+  indirectbr i8* undef, [label %return, label %while.body.lr.ph]
+
+while.body.lr.ph:                                 ; preds = %if.end
+  %refs = bitcast %struct.ref_s* %op to %struct.ref_s**
+  %tmp9 = load %struct.ref_s** %refs, align 8
+  %tmp4 = zext i16 %tmp6 to i64
+  %index13 = add i32 %index10, 1658
+  %2 = sext i32 %index13 to i64
+  %3 = getelementptr [3891 x i64]* @table, i64 0, i64 %2
+  %blockaddress14 = load i64* %3, align 8
+  %4 = inttoptr i64 %blockaddress14 to i8*
+  indirectbr i8* %4, [label %while.body]
+
+while.body:                                       ; preds = %while.body, %while.body.lr.ph
+  %index7 = phi i32 [ %index15, %while.body ], [ %index13, %while.body.lr.ph ]
+  %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
+  %type_attrs = getelementptr %struct.ref_s* %tmp9, i64 %indvar, i32 1
+  store i16 32, i16* %type_attrs, align 2
+  %indvar.next = add i64 %indvar, 1
+  %exitcond5 = icmp eq i64 %indvar.next, %tmp4
+  %tmp7 = select i1 %exitcond5, i32 1648, i32 0
+  %index15 = add i32 %index7, %tmp7
+  %tmp8 = select i1 %exitcond5, i64 13, i64 0
+  %5 = sext i32 %index15 to i64
+  %6 = getelementptr [3891 x i64]* @table, i64 0, i64 %5
+  %blockaddress16 = load i64* %6, align 8
+  %7 = inttoptr i64 %blockaddress16 to i8*
+  indirectbr i8* %7, [label %return, label %while.body]
+
+return:                                           ; preds = %while.body, %if.end, %entry
+  %retval.0 = phi i32 [ %call, %entry ], [ 0, %if.end ], [ 0, %while.body ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll b/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll
new file mode 100644
index 000000000000..2d9016508876
--- /dev/null
+++ b/test/CodeGen/X86/2012-04-09-TwoAddrPassBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc -O1 -verify-coalescing < %s
+; PR12495
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+define void @func(i8* nocapture) nounwind uwtable ssp align 2 {
+  br i1 undef, label %4, label %2
+
+; <label>:2                                       ; preds = %1                  
+  %3 = tail call double @foo() nounwind
+  br label %4
+
+; <label>:4                                       ; preds = %2, %1              
+  %5 = phi double [ %3, %2 ], [ 0.000000e+00, %1 ]
+  %6 = fsub double %5, undef
+  %7 = fcmp olt double %6, 0.000000e+00
+  %8 = select i1 %7, double 0.000000e+00, double %6
+  %9 = fcmp olt double undef, 0.000000e+00
+  %10 = fcmp olt double %8, undef
+  %11 = or i1 %9, %10
+  br i1 %11, label %12, label %14
+
+; <label>:12                                      ; preds = %4                  
+  %13 = tail call double @fmod(double %8, double 0.000000e+00) nounwind
+  unreachable
+
+; <label>:14                                      ; preds = %4                  
+  ret void
+}
+
+declare double @foo()
+
+declare double @fmod(double, double)
diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll
new file mode 100644
index 000000000000..ff6be369dc57
--- /dev/null
+++ b/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+;CHECK: bad_cast
+define void @bad_cast() {
+entry:
+  %vext.i = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
+  %vecinit8.i = shufflevector <3 x i64> zeroinitializer, <3 x i64> %vext.i, <3 x i32> <i32 0, i32 3, i32 4>
+  store <3 x i64> %vecinit8.i, <3 x i64>* undef, align 32
+;CHECK: ret
+  ret void
+}
+
+
+;CHECK: bad_insert
+define void @bad_insert(i32 %t) {
+entry:
+;CHECK: vpinsrd
+  %v2 = insertelement <8 x i32> zeroinitializer, i32 %t, i32 0
+  store <8 x i32> %v2, <8 x i32> addrspace(1)* undef, align 32
+;CHECK: ret
+  ret void
+}
+
diff --git a/test/CodeGen/X86/GC/dg.exp b/test/CodeGen/X86/GC/dg.exp
deleted file mode 100644
index 629a14773615..000000000000
--- a/test/CodeGen/X86/GC/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/X86/GC/lit.local.cfg b/test/CodeGen/X86/GC/lit.local.cfg
new file mode 100644
index 000000000000..a8ad0f1a28b2
--- /dev/null
+++ b/test/CodeGen/X86/GC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/X86/SwizzleShuff.ll b/test/CodeGen/X86/SwizzleShuff.ll
new file mode 100644
index 000000000000..100817a676e8
--- /dev/null
+++ b/test/CodeGen/X86/SwizzleShuff.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; Check that we perform a scalar XOR on i32.
+
+; CHECK: pull_bitcast
+; CHECK: xorl
+; CHECK: ret
+define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
+  %A = load <4 x i8>* %pA
+  %B = load <4 x i8>* %pB
+  %C = xor <4 x i8> %A, %B
+  store <4 x i8> %C, <4 x i8>* %pA
+  ret void
+}
+
+; CHECK: multi_use_swizzle
+; CHECK: mov
+; CHECK-NEXT: shuf
+; CHECK-NEXT: shuf
+; CHECK-NEXT: shuf
+; CHECK-NEXT: xor
+; CHECK-NEXT: ret
+define <4 x i32> @multi_use_swizzle (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 6>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 2>
+  %S2 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 2>
+  %R = xor <4 x i32> %S1, %S2
+  ret <4 x i32> %R
+}
+
+; CHECK: pull_bitcast2
+; CHECK: xorl
+; CHECK: ret
+define <4 x i8> @pull_bitcast2 (<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) {
+  %A = load <4 x i8>* %pA
+  store <4 x i8> %A, <4 x i8>* %pC
+  %B = load <4 x i8>* %pB
+  %C = xor <4 x i8> %A, %B
+  store <4 x i8> %C, <4 x i8>* %pA
+  ret <4 x i8> %C
+}
+
+
+
+; CHECK: reverse_1
+; CHECK-NOT: shuf
+; CHECK: ret
+define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  ret <4 x i32> %S1
+}
+
+
+; CHECK: no_reverse_shuff
+; CHECK: shuf
+; CHECK: ret
+define <4 x i32> @no_reverse_shuff (<4 x i32>* %pA, <4 x i32>* %pB) {
+  %A = load <4 x i32>* %pA
+  %B = load <4 x i32>* %pB
+  %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
+  ret <4 x i32> %S1
+}
diff --git a/test/CodeGen/X86/abi-isel.ll b/test/CodeGen/X86/abi-isel.ll
index 5068d2929dbf..658ccaa71dc0 100644
--- a/test/CodeGen/X86/abi-isel.ll
+++ b/test/CodeGen/X86/abi-isel.ll
@@ -1,16 +1,16 @@
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
 
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
-; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
 
 @src = external global [131072 x i32]
 @dst = external global [131072 x i32]
diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll
index 7bf527ab744d..8e871f4aeb4d 100644
--- a/test/CodeGen/X86/add.ll
+++ b/test/CodeGen/X86/add.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -join-physregs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -join-physregs | FileCheck %s -check-prefix=X64
 
 ; Some of these tests depend on -join-physregs to commute instructions.
 
diff --git a/test/CodeGen/X86/apm.ll b/test/CodeGen/X86/apm.ll
index b514cf6427d5..aaedf18481b5 100644
--- a/test/CodeGen/X86/apm.ll
+++ b/test/CodeGen/X86/apm.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse3 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse3 | FileCheck %s -check-prefix=WIN64
 ; PR8573
 
 ; CHECK: foo:
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
new file mode 100644
index 000000000000..59427880a71d
--- /dev/null
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=atom %s
+; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s
+
+declare void @use_arr(i8*)
+declare void @many_params(i32, i32, i32, i32, i32, i32)
+
+define void @test1() nounwind {
+; atom: test1:
+; atom: leal -1052(%esp), %esp
+; atom-NOT: sub
+; atom: call
+; atom: leal 1052(%esp), %esp
+
+; CHECK: test1:
+; CHECK: subl
+; CHECK: call
+; CHECK-NOT: lea
+  %arr = alloca [1024 x i8], align 16
+  %arr_ptr = getelementptr inbounds [1024 x i8]* %arr, i8 0, i8 0
+  call void @use_arr(i8* %arr_ptr)
+  ret void
+}
+
+define void @test2() nounwind {
+; atom: test2:
+; atom: leal -28(%esp), %esp
+; atom: call
+; atom: leal 28(%esp), %esp
+
+; CHECK: test2:
+; CHECK-NOT: lea
+  call void @many_params(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6)
+  ret void
+}
+
+define void @test3() nounwind {
+; atom: test3:
+; atom: leal -8(%esp), %esp
+; atom: leal 8(%esp), %esp
+
+; CHECK: test3:
+; CHECK-NOT: lea
+  %x = alloca i32, align 4
+  %y = alloca i32, align 4
+  store i32 0, i32* %x, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/atom-sched.ll b/test/CodeGen/X86/atom-sched.ll
new file mode 100644
index 000000000000..2301dfc020ad
--- /dev/null
+++ b/test/CodeGen/X86/atom-sched.ll
@@ -0,0 +1,28 @@
+; RUN: llc <%s -O2 -mcpu=atom -march=x86 -relocation-model=static | FileCheck -check-prefix=atom %s
+; RUN: llc <%s -O2 -mcpu=core2 -march=x86 -relocation-model=static | FileCheck %s
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+@d = common global i32 0, align 4
+@e = common global i32 0, align 4
+@f = common global i32 0, align 4
+
+define void @func() nounwind uwtable {
+; atom: imull
+; atom-NOT: movl
+; atom: imull
+; CHECK: imull
+; CHECK: movl
+; CHECK: imull
+entry:
+  %0 = load i32* @b, align 4
+  %1 = load i32* @c, align 4
+  %mul = mul nsw i32 %0, %1
+  store i32 %mul, i32* @a, align 4
+  %2 = load i32* @e, align 4
+  %3 = load i32* @f, align 4
+  %mul1 = mul nsw i32 %2, %3
+  store i32 %mul1, i32* @d, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-arith.ll b/test/CodeGen/X86/avx-arith.ll
index 59988ca8b68d..4aa337033df6 100644
--- a/test/CodeGen/X86/avx-arith.ll
+++ b/test/CodeGen/X86/avx-arith.ll
@@ -259,3 +259,14 @@ define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
   ret <4 x i64> %x
 }
 
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+define <4 x float> @int_sqrt_ss() {
+; CHECK: int_sqrt_ss
+; CHECK: vsqrtss
+ %x0 = load float addrspace(1)* undef, align 8
+ %x1 = insertelement <4 x float> undef, float %x0, i32 0
+ %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
+ ret <4 x float> %x2
+}
+
diff --git a/test/CodeGen/X86/avx-basic.ll b/test/CodeGen/X86/avx-basic.ll
index 0a46b0828a8c..8ad0fa82b58f 100644
--- a/test/CodeGen/X86/avx-basic.ll
+++ b/test/CodeGen/X86/avx-basic.ll
@@ -6,7 +6,7 @@
 
 define void @zero128() nounwind ssp {
 entry:
-  ; CHECK: vpxor
+  ; CHECK: vxorps
   ; CHECK: vmovaps
   store <4 x float> zeroinitializer, <4 x float>* @z, align 16
   ret void
@@ -105,3 +105,19 @@ allocas:
   ret <8 x i32> %updatedret.i30.i
 }
 
+;;;; Don't crash on fneg
+; rdar://10566486
+; CHECK: fneg
+; CHECK: vxorps
+define <16 x float> @fneg(<16 x float> addrspace(1)* nocapture %out) nounwind {
+  %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  ret <16 x float> %1
+}
+
+;;; Don't crash on build vector
+; CHECK: @build_vec_16x16
+; CHECK: vmovd
+define <16 x i16> @build_vec_16x16(i16 %a) nounwind readonly {
+  %res = insertelement <16 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, i16 %a, i32 0
+  ret <16 x i16> %res
+}
diff --git a/test/CodeGen/X86/avx-cast.ll b/test/CodeGen/X86/avx-cast.ll
index d6d2415ea059..32d450cac9f9 100644
--- a/test/CodeGen/X86/avx-cast.ll
+++ b/test/CodeGen/X86/avx-cast.ll
@@ -16,7 +16,7 @@ entry:
   ret <4 x double> %shuffle.i
 }
 
-; CHECK: vpxor
+; CHECK: vxorps
 ; CHECK-NEXT: vinsertf128 $0
 define <4 x i64> @castC(<2 x i64> %m) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index 6c0bd58074d4..d0a7fe01009e 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -18,7 +18,7 @@ define <4 x double> @sitofp01(<4 x i32> %a) {
   ret <4 x double> %b
 }
 
-; CHECK: vcvtpd2dqy %ymm
+; CHECK: vcvttpd2dqy %ymm
 define <4 x i32> @fptosi01(<4 x double> %a) {
   %b = fptosi <4 x double> %a to <4 x i32>
   ret <4 x i32> %b
diff --git a/test/CodeGen/X86/avx-fp2int.ll b/test/CodeGen/X86/avx-fp2int.ll
new file mode 100755
index 000000000000..a3aadde2bdd1
--- /dev/null
+++ b/test/CodeGen/X86/avx-fp2int.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+;; Check that FP_TO_SINT and FP_TO_UINT generate convert with truncate
+
+; CHECK: test1:
+; CHECK: vcvttpd2dqy
+; CHECK: ret
+; CHECK: test2:
+; CHECK: vcvttpd2dqy
+; CHECK: ret
+
+define <4 x i8> @test1(<4 x double> %d) {
+  %c = fptoui <4 x double> %d to <4 x i8>
+  ret <4 x i8> %c
+}
+define <4 x i8> @test2(<4 x double> %d) {
+  %c = fptosi <4 x double> %d to <4 x i8>
+  ret <4 x i8> %c
+}
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 5201688686d3..b33493252a5f 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -245,34 +245,6 @@ define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
 
 
-define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
-
-
-define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly
-
-
-define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
-  ; CHECK: pushl
-  ; CHECK: movl
-  ; CHECK: vmaskmovdqu
-  ; CHECK: popl
-  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
-  ret void
-}
-declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
-
 
 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
   ; CHECK: vmaxpd
@@ -314,25 +286,10 @@ define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
 declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
 
 
-define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
-  ; CHECK: movl
-  ; CHECK: vmovntdq
-  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
-  ret void
-}
-declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
-
-
-define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
-  ; CHECK: movl
-  ; CHECK: vmovntpd
-  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
-  ret void
-}
-declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_mul_sd
   ; CHECK: vmulsd
   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -412,54 +369,6 @@ define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
-  ; CHECK: vpcmpeqb
-  %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK: vpcmpeqd
-  %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
-  ; CHECK: vpcmpeqw
-  %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-
-define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
-  ; CHECK: vpcmpgtb
-  %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK: vpcmpgtd
-  %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
-  ; CHECK: vpcmpgtw
-  %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-
 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
   ; CHECK: vpmaddwd
   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
@@ -749,6 +658,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
 
 
 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: test_x86_sse2_storel_dq
   ; CHECK: movl
   ; CHECK: vmovq
   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
@@ -758,6 +668,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
 
 
 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: test_x86_sse2_storeu_dq
   ; CHECK: movl
   ; CHECK: vmovdqu
   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
@@ -767,15 +678,18 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
 
 
 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_storeu_pd
   ; CHECK: movl
   ; CHECK: vmovupd
-  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+  %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
 
 
 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: test_x86_sse2_sub_sd
   ; CHECK: vsubsd
   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -955,21 +869,13 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovntdqa
-  %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly
-
 
-define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vmpsadbw
-  %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
+  %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
 }
-declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
 
 
 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
@@ -996,14 +902,6 @@ define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vpcmpeqq
-  %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
-
-
 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
   ; CHECK: vphminposuw
   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
@@ -1180,33 +1078,33 @@ define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
 declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: sbbl
-  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: seta
   ; CHECK: movzbl
-  %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
 
 
-define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) {
+define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
   ; CHECK: vptest 
   ; CHECK: sete
   ; CHECK: movzbl
-  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
   ret i32 %res
 }
-declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
 
 
 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
@@ -1317,14 +1215,6 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
 
 
-define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK: vpcmpgtq
-  %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
-
-
 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
   ; CHECK: movl
@@ -1512,14 +1402,6 @@ define void @test_x86_sse_ldmxcsr(i8* %a0) {
 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
 
 
-define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) {
-  ; CHECK: movl
-  ; CHECK: vmovups
-  %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
-
 
 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vmaxps
@@ -1561,14 +1443,6 @@ define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
 
 
-define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) {
-  ; CHECK: movl
-  ; CHECK: vmovntps
-  call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1)
-  ret void
-}
-declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind
-
 
 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
   ; CHECK: vmulss
@@ -1743,12 +1617,12 @@ define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
 
 
-define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
+define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
   ; CHECK: vphaddsw
-  %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
+  %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
 }
-declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone
+declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
 
 
 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
@@ -1783,12 +1657,12 @@ define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpmaddubsw
-  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
   ret <8 x i16> %res
 }
-declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
 
 
 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
@@ -1892,6 +1766,74 @@ define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
+
+define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vcmpeqps
+  %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpltps
+  %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpleps
+  %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpunordps
+  %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneqps
+  %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnltps
+  %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnleps
+  %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpordps
+  %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_uqps
+  %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngeps
+  %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngtps
+  %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpfalseps
+  %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_oqps
+  %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgeps
+  %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgtps
+  %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmptrueps
+  %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_osps
+  %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmplt_oqps
+  %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmple_oqps
+  %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpunord_sps
+  %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_usps
+  %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnlt_uqps
+  %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnle_uqps
+  %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpord_sps
+  %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpeq_usps
+  %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpnge_uqps
+  %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpngt_uqps
+  %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpfalse_osps
+  %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpneq_osps
+  %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpge_oqps
+  %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmpgt_oqps
+  %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1]
+  ; CHECK: vcmptrue_usps
+  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
 declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
 
 
@@ -2007,30 +1949,6 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
 
 
-define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
-  ; CHECK: vmovdqu
-  %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
-  ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
-
-
-define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
-  ; CHECK: vmovupd
-  %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
-  ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
-
-
-define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) {
-  ; CHECK: vmovups
-  %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
-  ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly
-
-
 define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
   ; CHECK: vmaskmovpd
   %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -2143,29 +2061,10 @@ define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
 
 
-define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
-  ; CHECK: vmovntdq
-  call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
 
 
-define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
-  ; CHECK: vmovntpd
-  call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
 
 
-define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) {
-  ; CHECK: vmovntps
-  call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1)
-  ret void
-}
-declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
-
 
 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
   ; CHECK: vptest
@@ -2245,8 +2144,11 @@ declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
 
 
 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
-  ; CHECK: vmovdqu
-  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1)
+  ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
+  ; CHECK: vmovups
+  ; add operation forces the execution domain.
+  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
   ret void
 }
 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
@@ -2254,7 +2156,9 @@ declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
 
 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
   ; CHECK: vmovupd
-  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1)
+  ; add operation forces the execution domain.
+  %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
   ret void
 }
 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
@@ -2292,20 +2196,20 @@ define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
 
 
-define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
+define <4 x float> @test_x86_avx_vbroadcast_ss(i8* %a0) {
   ; CHECK: vbroadcastss
-  %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.avx.vbroadcast.ss(i8* %a0) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
+declare <4 x float> @llvm.x86.avx.vbroadcast.ss(i8*) nounwind readonly
 
 
-define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
+define <8 x float> @test_x86_avx_vbroadcast_ss_256(i8* %a0) {
   ; CHECK: vbroadcastss
-  %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  %res = call <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8* %a0) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
-declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
+declare <8 x float> @llvm.x86.avx.vbroadcast.ss.256(i8*) nounwind readonly
 
 
 define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
@@ -2433,6 +2337,12 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
+define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
+  ; CHECK: vpermilps
+  %a2 = load <4 x i32>* %a1
+  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
 
 
@@ -2575,4 +2485,73 @@ define void @test_x86_avx_vzeroupper() {
 }
 declare void @llvm.x86.avx.vzeroupper() nounwind
 
+; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work
 
+; CHECK: monitor
+define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+  ret void
+}
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: mwait
+define void @mwait(i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+  ret void
+}
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
+
+; CHECK: sfence
+define void @sfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse.sfence()
+  ret void
+}
+declare void @llvm.x86.sse.sfence() nounwind
+
+; CHECK: lfence
+define void @lfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse2.lfence()
+  ret void
+}
+declare void @llvm.x86.sse2.lfence() nounwind
+
+; CHECK: mfence
+define void @mfence() nounwind {
+entry:
+  tail call void @llvm.x86.sse2.mfence()
+  ret void
+}
+declare void @llvm.x86.sse2.mfence() nounwind
+
+; CHECK: clflush
+define void @clflush(i8* %p) nounwind {
+entry:
+  tail call void @llvm.x86.sse2.clflush(i8* %p)
+  ret void
+}
+declare void @llvm.x86.sse2.clflush(i8*) nounwind
+
+; CHECK: crc32b
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+
+; CHECK: crc32w
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+
+; CHECK: crc32l
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index 07a63efd71fc..c9fc66a8a791 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -25,20 +25,26 @@ declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
 
 ;;
 ;; The two tests below check that we must fold load + scalar_to_vector
-;; + ins_subvec+ zext into only a single vmovss or vmovsd
+;; + ins_subvec+ zext into only a single vmovss or vmovsd or vinsertps from memory
 
-; CHECK: vmovss (%
+; CHECK: mov00
 define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
   %val = load float* %ptr
+; CHECK: vinsertps
+; CHECK: vinsertf128
   %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
   ret <8 x float> %i0
+; CHECK: ret
 }
 
-; CHECK: vmovsd (%
+; CHECK: mov01
 define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
   %val = load double* %ptr
+; CHECK: vmovlpd
+; CHECK: vinsertf128
   %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
   ret <4 x double> %i0
+; CHECK: ret
 }
 
 ; CHECK: vmovaps  %ymm
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index 518c09c8695d..115cefb1b5eb 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -7,7 +7,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandpd LCP{{.*}}(%rip)
@@ -16,7 +18,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vandps
@@ -45,7 +49,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %xor.i = xor <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %xor.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vxorpd LCP{{.*}}(%rip)
@@ -54,7 +60,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %xor.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vxorps
@@ -83,7 +91,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %or.i = or <4 x i64> %0, %1
   %2 = bitcast <4 x i64> %or.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vorpd LCP{{.*}}(%rip)
@@ -92,7 +102,9 @@ entry:
   %0 = bitcast <4 x double> %y to <4 x i64>
   %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
   %1 = bitcast <4 x i64> %or.i to <4 x double>
-  ret <4 x double> %1
+  ; add forces execution domain
+  %2 = fadd <4 x double> %1, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %2
 }
 
 ; CHECK: vorps
@@ -122,7 +134,9 @@ entry:
   %1 = bitcast <4 x double> %y to <4 x i64>
   %and.i = and <4 x i64> %1, %neg.i
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandnpd (%
@@ -134,7 +148,9 @@ entry:
   %1 = bitcast <4 x double> %tmp2 to <4 x i64>
   %and.i = and <4 x i64> %1, %neg.i
   %2 = bitcast <4 x i64> %and.i to <4 x double>
-  ret <4 x double> %2
+  ; add forces execution domain
+  %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
+  ret <4 x double> %3
 }
 
 ; CHECK: vandnps
@@ -165,7 +181,9 @@ entry:
 ; CHECK: vpandn  %xmm
 define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
 entry:
-  %y = xor <2 x i64> %a, <i64 -1, i64 -1>
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
   %x = and <2 x i64> %a, %y
   ret <2 x i64> %x
 }
@@ -173,7 +191,9 @@ entry:
 ; CHECK: vpand %xmm
 define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
 entry:
-  %x = and <2 x i64> %a, %b
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = and <2 x i64> %a2, %b
   ret <2 x i64> %x
 }
 
diff --git a/test/CodeGen/X86/avx-minmax.ll b/test/CodeGen/X86/avx-minmax.ll
index f36ba7b62a2c..7c5882010945 100644
--- a/test/CodeGen/X86/avx-minmax.ll
+++ b/test/CodeGen/X86/avx-minmax.ll
@@ -33,7 +33,7 @@ define <4 x float> @minps(<4 x float> %x, <4 x float> %y) {
 }
 
 ; UNSAFE: vmaxpd:
-; UNSAFE: vmaxpd %ymm
+; UNSAFE: vmaxpd {{.+}}, %ymm
 define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
   %max_is_x = fcmp oge <4 x double> %x, %y
   %max = select <4 x i1> %max_is_x, <4 x double> %x, <4 x double> %y
@@ -41,7 +41,7 @@ define <4 x double> @vmaxpd(<4 x double> %x, <4 x double> %y) {
 }
 
 ; UNSAFE: vminpd:
-; UNSAFE: vminpd %ymm
+; UNSAFE: vminpd {{.+}}, %ymm
 define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
   %min_is_x = fcmp ole <4 x double> %x, %y
   %min = select <4 x i1> %min_is_x, <4 x double> %x, <4 x double> %y
@@ -49,7 +49,7 @@ define <4 x double> @vminpd(<4 x double> %x, <4 x double> %y) {
 }
 
 ; UNSAFE: vmaxps:
-; UNSAFE: vmaxps %ymm
+; UNSAFE: vmaxps {{.+}}, %ymm
 define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
   %max_is_x = fcmp oge <8 x float> %x, %y
   %max = select <8 x i1> %max_is_x, <8 x float> %x, <8 x float> %y
@@ -57,7 +57,7 @@ define <8 x float> @vmaxps(<8 x float> %x, <8 x float> %y) {
 }
 
 ; UNSAFE: vminps:
-; UNSAFE: vminps %ymm
+; UNSAFE: vminps {{.+}}, %ymm
 define <8 x float> @vminps(<8 x float> %x, <8 x float> %y) {
   %min_is_x = fcmp ole <8 x float> %x, %y
   %min = select <8 x i1> %min_is_x, <8 x float> %x, <8 x float> %y
diff --git a/test/CodeGen/X86/avx-sext.ll b/test/CodeGen/X86/avx-sext.ll
new file mode 100755
index 000000000000..3713a8c37799
--- /dev/null
+++ b/test/CodeGen/X86/avx-sext.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+;CHECK: sext_8i16_to_8i32
+;CHECK: vpmovsxwd
+
+  %B = sext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+;CHECK: sext_4i32_to_4i64
+;CHECK: vpmovsxdq
+
+  %B = sext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index 3ea39a2358e0..681747b844a0 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -62,6 +62,45 @@ define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
   ret <16 x i16> %s
 }
 
+; CHECK: vpsrlw
+; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
+; CHECK: vpsrlw
+; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
+define <32 x i8> @vshift09(<32 x i8> %a) nounwind readnone {
+  %s = ashr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %s
+}
+
+; CHECK: pxor
+; CHECK: pcmpgtb
+; CHECK: pcmpgtb
+define <32 x i8> @vshift10(<32 x i8> %a) nounwind readnone {
+  %s = ashr <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <32 x i8> %s
+}
+
+; CHECK: vpsrlw
+; CHECK: pand
+; CHECK: vpsrlw
+; CHECK: pand
+define <32 x i8> @vshift11(<32 x i8> %a) nounwind readnone {
+  %s = lshr <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %s
+}
+
+; CHECK: vpsllw
+; CHECK: pand
+; CHECK: vpsllw
+; CHECK: pand
+define <32 x i8> @vshift12(<32 x i8> %a) nounwind readnone {
+  %s = shl <32 x i8> %a, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+  ret <32 x i8> %s
+}
+
 ;;; Support variable shifts
 ; CHECK: _vshift08
 ; CHECK: vextractf128 $1
@@ -73,3 +112,27 @@ define <8 x i32> @vshift08(<8 x i32> %a) nounwind {
   ret <8 x i32> %bitop
 }
 
+;;; Uses shifts for sign extension
+; CHECK: _sext_v16i16
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK: vinsertf128
+define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+  %b = trunc <16 x i16> %a to <16 x i8>
+  %c = sext <16 x i8> %b to <16 x i16>
+  ret <16 x i16> %c
+}
+
+; CHECK: _sext_v8i32
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK: vinsertf128
+define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll
new file mode 100755
index 000000000000..5268ec3a56cd
--- /dev/null
+++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i64> @test1(<4 x i64> %a) nounwind {
+ %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ ret <4 x i64>%b
+ ; CHECK: test1:
+ ; CHECK: vinsertf128
+ }
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 0db334dd9940..16c447be1727 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -6,5 +6,199 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
   ret <4 x float> %b
 ; CHECK: test1:
 ; CHECK: vshufps
-; CHECK: vpshufd
+; CHECK: vpermilps
+}
+
+; rdar://10538417
+define <3 x i64> @test2(<2 x i64> %v) nounwind readnone {
+; CHECK: test2:
+; CHECK: vinsertf128
+  %1 = shufflevector <2 x i64> %v, <2 x i64> %v, <3 x i32> <i32 0, i32 1, i32 undef>
+  %2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
+  ret <3 x i64> %2
+; CHECK: ret
+}
+
+define <4 x i64> @test3(<4 x i64> %a, <4 x i64> %b) nounwind {
+  %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
+  ret <4 x i64> %c
+; CHECK: test3:
+; CHECK: vperm2f128
+; CHECK: ret
+}
+
+define <8 x float> @test4(float %a) nounwind {
+  %b = insertelement <8 x float> zeroinitializer, float %a, i32 0
+  ret <8 x float> %b
+; CHECK: test4:
+; CHECK: vinsertf128
+}
+
+; rdar://10594409
+define <8 x float> @test5(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast float* %f to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+; CHECK: test5
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+define <4 x double> @test6(double* nocapture %d) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast double* %d to <2 x double>*
+  %1 = load <2 x double>* %0, align 16
+; CHECK: test6
+; CHECK: vmovaps
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <2 x double> %1, <2 x double> <double 0.000000e+00, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x double> %shuffle.i
+}
+
+define <16 x i16> @test7(<4 x i16> %a) nounwind {
+; CHECK: test7
+  %b = shufflevector <4 x i16> %a, <4 x i16> undef, <16 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: ret
+  ret <16 x i16> %b
+}
+
+; CHECK: test8
+define void @test8() {
+entry:
+  %0 = load <16 x i64> addrspace(1)* null, align 128
+  %1 = shufflevector <16 x i64> <i64 undef, i64 undef, i64 0, i64 undef, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i64> %0, <16 x i32> <i32 17, i32 18, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 undef, i32 undef, i32 26>
+  %2 = shufflevector <16 x i64> %1, <16 x i64> %0, <16 x i32> <i32 0, i32 1, i32 2, i32 30, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 undef, i32 11, i32 undef, i32 22, i32 20, i32 15>
+  store <16 x i64> %2, <16 x i64> addrspace(1)* undef, align 128
+; CHECK: ret
+  ret void
+}
+
+; Extract a value from a shufflevector..
+define i32 @test9(<4 x i32> %a) nounwind {
+; CHECK: test9
+; CHECK: vpextrd
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4> 
+  %r = extractelement <8 x i32> %b, i32 2
+; CHECK: ret
+  ret i32 %r
+}
+
+; Extract a value which is the result of an undef mask.
+define i32 @test10(<4 x i32> %a) nounwind {
+; CHECK: @test10
+; CHECK-NEXT: #
+; CHECK-NEXT: ret
+  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %r = extractelement <8 x i32> %b, i32 2
+  ret i32 %r
+}
+
+define <4 x float> @test11(<4 x float> %a) nounwind  {
+; check: test11
+; check: vpermilps $27
+  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x float> @test12(<4 x float>* %a) nounwind  {
+; CHECK: test12
+; CHECK: vpermilps $27, (
+  %tmp0 = load <4 x float>* %a
+  %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x float> %tmp1
+}
+
+define <4 x i32> @test13(<4 x i32> %a) nounwind  {
+; check: test13
+; check: vpshufd $27
+  %tmp1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+define <4 x i32> @test14(<4 x i32>* %a) nounwind  {
+; CHECK: test14
+; CHECK: vpshufd $27, (
+  %tmp0 = load <4 x i32>* %a
+  %tmp1 = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i32> %tmp1
+}
+
+; CHECK: test15
+; CHECK: vpshufd $8
+; CHECK: ret
+define <4 x i32> @test15(<2 x i32>%x) nounwind readnone {
+  %x1 = shufflevector <2 x i32> %x, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  ret <4 x i32>%x1
+}
+
+; rdar://10974078
+define <8 x float> @test16(float* nocapture %f) nounwind uwtable readonly ssp {
+entry:
+  %0 = bitcast float* %f to <4 x float>*
+  %1 = load <4 x float>* %0, align 8
+; CHECK: test16
+; CHECK: vmovups
+; CHECK-NOT: vxorps
+; CHECK-NOT: vinsertf128
+  %shuffle.i = shufflevector <4 x float> %1, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle.i
+}
+
+; PR12413
+; CHECK: vpshufb
+; CHECK: vpshufb
+; CHECK: vpshufb
+; CHECK: vpshufb
+define <32 x i8> @shuf(<32 x i8> %inval1, <32 x i8> %inval2) {
+entry:
+ %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0,
+i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
+22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32
+42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32
+62>
+ ret <32 x i8> %0
+}
+
+; CHECK: blend1
+; CHECK: vblendps
+; CHECK: ret
+define <4 x i32> @blend1(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend2
+; CHECK: vblendps
+; CHECK: ret
+define <4 x i32> @blend2(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend2a
+; CHECK: vblendps
+; CHECK: ret
+define <4 x float> @blend2a(<4 x float> %a, <4 x float> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %t
+}
+
+; CHECK: blend3
+; CHECK-NOT: vblendps
+; CHECK: ret
+define <4 x i32> @blend3(<4 x i32> %a, <4 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 2, i32 7>
+  ret <4 x i32> %t
+}
+
+; CHECK: blend4
+; CHECK: vblendpd
+; CHECK: ret
+define <4 x i64> @blend4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x i64> %t
 }
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index af20b90322e1..94bcddd97592 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -32,7 +32,7 @@ entry:
   ret <4 x i64> %vecinit6.i
 }
 
-; CHECK: vshufpd $0
+; CHECK: vpermilpd $0
 ; CHECK-NEXT: vinsertf128 $1
 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
 entry:
@@ -47,7 +47,7 @@ entry:
 ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
 ; To:
 ;   shuffle (vload ptr)), undef, <1, 1, 1, 1>
-; CHECK: vmovdqa
+; CHECK: vmovaps
 ; CHECK-NEXT: vinsertf128  $1
 ; CHECK-NEXT: vpermilps $-1
 define <8 x float> @funcE() nounwind {
diff --git a/test/CodeGen/X86/avx-trunc.ll b/test/CodeGen/X86/avx-trunc.ll
new file mode 100755
index 000000000000..d0077366444d
--- /dev/null
+++ b/test/CodeGen/X86/avx-trunc.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <4 x i32> @trunc_64_32(<4 x i64> %A) nounwind uwtable readnone ssp{
+; CHECK: trunc_64_32
+; CHECK: pshufd
+  %B = trunc <4 x i64> %A to <4 x i32>
+  ret <4 x i32>%B
+}
+define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{
+; CHECK: trunc_32_16
+; CHECK: pshufb
+  %B = trunc <8 x i32> %A to <8 x i16>
+  ret <8 x i16>%B
+}
+
diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll
index d420101339f5..20f534532263 100644
--- a/test/CodeGen/X86/avx-unpack.ll
+++ b/test/CodeGen/X86/avx-unpack.ll
@@ -67,6 +67,15 @@ entry:
   ret <8 x i32> %shuffle.i
 }
 
+; CHECK: vunpckhps (%
+define <8 x i32> @unpackhips2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <8 x i32>* %src1
+  %b = load <8 x i32>* %src2
+  %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
 ; CHECK: vunpckhpd
 define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
 entry:
@@ -74,6 +83,15 @@ entry:
   ret <4 x i64> %shuffle.i
 }
 
+; CHECK: vunpckhpd (%
+define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <4 x i64>* %src1
+  %b = load <4 x i64>* %src2
+  %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
 ; CHECK: vunpcklps
 define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
 entry:
@@ -81,9 +99,63 @@ entry:
   ret <8 x i32> %shuffle.i
 }
 
+; CHECK: vunpcklps (%
+define <8 x i32> @unpacklops2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <8 x i32>* %src1
+  %b = load <8 x i32>* %src2
+  %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i32> %shuffle.i
+}
+
 ; CHECK: vunpcklpd
 define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
 entry:
   %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i64> %shuffle.i
 }
+
+; CHECK: vunpcklpd (%
+define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
+entry:
+  %a = load <4 x i64>* %src1
+  %b = load <4 x i64>* %src2
+  %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+; CHECK: vpunpckhwd
+; CHECK: vinsertf128
+define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+; CHECK: vpunpcklwd
+; CHECK: vinsertf128
+define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpckhbw
+; CHECK: vpunpckhbw
+; CHECK: vinsertf128
+define <32 x i8> @unpackhbw_undef(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+; CHECK: vpunpcklbw
+; CHECK: vinsertf128
+define <32 x i8> @unpacklbw_undef(<32 x i8> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle.i
+}
diff --git a/test/CodeGen/X86/avx-varargs-x86_64.ll b/test/CodeGen/X86/avx-varargs-x86_64.ll
new file mode 100644
index 000000000000..b0932bdfced9
--- /dev/null
+++ b/test/CodeGen/X86/avx-varargs-x86_64.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; <rdar://problem/10463281>
+; Check that the <8 x float> is passed on the stack.
+
+@x = common global <8 x float> zeroinitializer, align 32
+declare i32 @f(i32, ...)
+
+; CHECK: test1:
+; CHECK: vmovaps	%ymm0, (%rsp)
+define void @test1() nounwind uwtable ssp {
+entry:
+  %0 = load <8 x float>* @x, align 32
+  %call = call i32 (i32, ...)* @f(i32 1, <8 x float> %0)
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 89b41884401e..148ae7329f4b 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -1,7 +1,4 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
-; XFAIL: *
-
-; xfail this file for now because of PR8156, when it gets solved merge this with avx-splat.ll
 
 ; CHECK: vbroadcastsd (%
 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
@@ -50,7 +47,7 @@ entry:
 ;;;; 128-bit versions
 
 ; CHECK: vbroadcastss (%
-define <4 x float> @E(float* %ptr) nounwind uwtable readnone ssp {
+define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
 entry:
   %q = load float* %ptr, align 4
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -60,6 +57,19 @@ entry:
   ret <4 x float> %vecinit6.i
 }
 
+
+; CHECK: _e2
+; CHECK-NOT: vbroadcastss
+; CHECK: ret
+define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+    %vecinit.i = insertelement <4 x float> undef, float      0xbf80000000000000, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
+  ret <4 x float> %vecinit6.i
+}
+
+
 ; CHECK: vbroadcastss (%
 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
 entry:
@@ -74,7 +84,7 @@ entry:
 ; Unsupported vbroadcasts
 
 ; CHECK: _G
-; CHECK-NOT: vbroadcastsd (%
+; CHECK-NOT: broadcast (%
 ; CHECK: ret
 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
 entry:
@@ -85,10 +95,20 @@ entry:
 }
 
 ; CHECK: _H
-; CHECK-NOT: vbroadcastss
+; CHECK-NOT: broadcast
 ; CHECK: ret
 define <4 x i32> @H(<4 x i32> %a) {
   %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
   ret <4 x i32> %x
 }
 
+; CHECK: _I
+; CHECK-NOT: broadcast (%
+; CHECK: ret
+define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 4
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
+}
diff --git a/test/CodeGen/X86/avx-vextractf128.ll b/test/CodeGen/X86/avx-vextractf128.ll
index dccf901b2599..fe0f6caed36a 100644
--- a/test/CodeGen/X86/avx-vextractf128.ll
+++ b/test/CodeGen/X86/avx-vextractf128.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
+; CHECK: @A
 ; CHECK-NOT: vunpck
 ; CHECK: vextractf128 $1
 define <8 x float> @A(<8 x float> %a) nounwind uwtable readnone ssp {
@@ -8,6 +9,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: @B
 ; CHECK-NOT: vunpck
 ; CHECK: vextractf128 $1
 define <4 x double> @B(<4 x double> %a) nounwind uwtable readnone ssp {
@@ -16,3 +18,89 @@ entry:
   ret <4 x double> %shuffle
 }
 
+; CHECK: @t0
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t0(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
+  %1 = bitcast float* %addr to <4 x float>*
+  store <4 x float> %0, <4 x float>* %1, align 16
+  ret void
+}
+
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+
+; CHECK: @t1
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovups %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t1(float* %addr, <8 x float> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a, i8 0)
+  %1 = bitcast float* %addr to i8*
+  tail call void @llvm.x86.sse.storeu.ps(i8* %1, <4 x float> %0)
+  ret void
+}
+
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+; CHECK: @t2
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t2(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
+  %1 = bitcast double* %addr to <2 x double>*
+  store <2 x double> %0, <2 x double>* %1, align 16
+  ret void
+}
+
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+
+; CHECK: @t3
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovups %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t3(double* %addr, <4 x double> %a) nounwind uwtable ssp {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a, i8 0)
+  %1 = bitcast double* %addr to i8*
+  tail call void @llvm.x86.sse2.storeu.pd(i8* %1, <2 x double> %0)
+  ret void
+}
+
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+; CHECK: @t4
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovaps %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t4(<2 x i64>* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
+entry:
+  %0 = bitcast <4 x i64> %a to <8 x i32>
+  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
+  %2 = bitcast <4 x i32> %1 to <2 x i64>
+  store <2 x i64> %2, <2 x i64>* %addr, align 16
+  ret void
+}
+
+declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
+
+; CHECK: @t5
+; CHECK-NOT: vextractf128 $0, %ymm0, %xmm0
+; CHECK-NOT: vmovdqu %xmm0, (%rdi)
+; CHECK: vextractf128 $0, %ymm0, (%rdi)
+define void @t5(<2 x i64>* %addr, <4 x i64> %a) nounwind uwtable ssp {
+entry:
+  %0 = bitcast <4 x i64> %a to <8 x i32>
+  %1 = tail call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %0, i8 0)
+  %2 = bitcast <2 x i64>* %addr to i8*
+  %3 = bitcast <4 x i32> %1 to <16 x i8>
+  tail call void @llvm.x86.sse2.storeu.dq(i8* %2, <16 x i8> %3)
+  ret void
+}
+
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
diff --git a/test/CodeGen/X86/avx-vinsertf128.ll b/test/CodeGen/X86/avx-vinsertf128.ll
index cda1331da326..9a954fe8047e 100644
--- a/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/test/CodeGen/X86/avx-vinsertf128.ll
@@ -56,3 +56,76 @@ define <8 x i32> @DAGCombineB(<8 x i32> %v1, <8 x i32> %v2) nounwind readonly {
   %2 = add <8 x i32> %1, %v1
   ret <8 x i32> %2
 }
+
+; CHECK: insert_pd
+define <4 x double> @insert_pd(<4 x double> %a0, <2 x double> %a1) {
+; CHECK: vinsertf128
+%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 0)
+ret <4 x double> %res
+}
+
+; CHECK: insert_undef_pd
+define <4 x double> @insert_undef_pd(<4 x double> %a0, <2 x double> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> undef, <2 x double> %a1, i8 0)
+ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
+
+
+; CHECK: insert_ps
+define <8 x float> @insert_ps(<8 x float> %a0, <4 x float> %a1) {
+; CHECK: vinsertf128
+%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 0)
+ret <8 x float> %res
+}
+
+; CHECK: insert_undef_ps
+define <8 x float> @insert_undef_ps(<8 x float> %a0, <4 x float> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %a1, i8 0)
+ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
+
+
+; CHECK: insert_si
+define <8 x i32> @insert_si(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK: vinsertf128
+%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 0)
+ret <8 x i32> %res
+}
+
+; CHECK: insert_undef_si
+define <8 x i32> @insert_undef_si(<8 x i32> %a0, <4 x i32> %a1) {
+; CHECK: vmovaps	%ymm1, %ymm0
+%res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> undef, <4 x i32> %a1, i8 0)
+ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+
+; rdar://10643481
+; CHECK: vinsertf128_combine
+define <8 x float> @vinsertf128_combine(float* nocapture %f) nounwind uwtable readonly ssp {
+; CHECK-NOT: vmovaps
+; CHECK: vinsertf128
+entry:
+  %add.ptr = getelementptr inbounds float* %f, i64 4
+  %0 = bitcast float* %add.ptr to <4 x float>*
+  %1 = load <4 x float>* %0, align 16
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
+  ret <8 x float> %2
+}
+
+; rdar://11076953
+; CHECK: vinsertf128_ucombine
+define <8 x float> @vinsertf128_ucombine(float* nocapture %f) nounwind uwtable readonly ssp {
+; CHECK-NOT: vmovups
+; CHECK: vinsertf128
+entry:
+  %add.ptr = getelementptr inbounds float* %f, i64 4
+  %0 = bitcast float* %add.ptr to <4 x float>*
+  %1 = load <4 x float>* %0, align 8
+  %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
+  ret <8 x float> %2
+}
diff --git a/test/CodeGen/X86/avx-vperm2f128.ll b/test/CodeGen/X86/avx-vperm2f128.ll
index 3550a908231e..caa21e5bacfe 100644
--- a/test/CodeGen/X86/avx-vperm2f128.ll
+++ b/test/CodeGen/X86/avx-vperm2f128.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
+; CHECK: _A
 ; CHECK: vperm2f128 $1
 define <8 x float> @A(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -7,6 +8,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _B
 ; CHECK: vperm2f128 $48
 define <8 x float> @B(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -14,6 +16,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _C
 ; CHECK: vperm2f128 $0
 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -21,6 +24,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _D
 ; CHECK: vperm2f128 $17
 define <8 x float> @D(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -28,6 +32,7 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: _E
 ; CHECK: vperm2f128 $17
 define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
 entry:
@@ -35,7 +40,8 @@ entry:
   ret <32 x i8> %shuffle
 }
 
-; CHECK: vperm2f128 $33
+; CHECK: _E2
+; CHECK: vperm2f128 $3
 define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
@@ -44,6 +50,7 @@ entry:
 
 ;;;; Cases with undef indicies mixed in the mask
 
+; CHECK: _F
 ; CHECK: vperm2f128 $33
 define <8 x float> @F(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx-vpermil.ll b/test/CodeGen/X86/avx-vpermil.ll
index 49b2f540a2c7..cb904b93313a 100644
--- a/test/CodeGen/X86/avx-vpermil.ll
+++ b/test/CodeGen/X86/avx-vpermil.ll
@@ -28,6 +28,14 @@ entry:
   ret <4 x i64> %shuffle
 }
 
+; CHECK: vpermilpd
+define <4 x i64> @funcQ(<4 x i64>* %a) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i64>* %a
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
+  ret <4 x i64> %shuffle
+}
+
 ; vpermil should match masks like this: <u,3,1,2,4,u,5,6>. Check that the
 ; target specific mask was correctly generated.
 ; CHECK: vpermilps $-100
@@ -37,7 +45,8 @@ entry:
   ret <8 x float> %shuffle
 }
 
-; CHECK-NOT: vpermilps
+; CHECK: palignr
+; CHECK: palignr
 define <8 x float> @funcF(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
diff --git a/test/CodeGen/X86/avx-vshufp.ll b/test/CodeGen/X86/avx-vshufp.ll
index f06548dc3d6d..45883b717380 100644
--- a/test/CodeGen/X86/avx-vshufp.ll
+++ b/test/CodeGen/X86/avx-vshufp.ll
@@ -7,6 +7,31 @@ entry:
   ret <8 x float> %shuffle
 }
 
+; CHECK: vshufps  $-53, (%{{.*}}), %ymm
+define <8 x float> @A2(<8 x float>* %a, <8 x float>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <8 x float>* %a
+  %b2 = load <8 x float>* %b
+  %shuffle = shufflevector <8 x float> %a2, <8 x float> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, %ymm
+define <8 x i32> @A3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %ymm
+define <8 x i32> @A4(<8 x i32>* %a, <8 x i32>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <8 x i32>* %a
+  %b2 = load <8 x i32>* %b
+  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b2, <8 x i32> <i32 3, i32 2, i32 8, i32 11, i32 7, i32 6, i32 12, i32 15>
+  ret <8 x i32> %shuffle
+}
+
 ; CHECK: vshufpd  $10, %ymm
 define <4 x double> @B(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
 entry:
@@ -14,6 +39,31 @@ entry:
   ret <4 x double> %shuffle
 }
 
+; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+define <4 x double> @B2(<4 x double>* %a, <4 x double>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x double>* %a
+  %b2 = load <4 x double>* %b
+  %shuffle = shufflevector <4 x double> %a2, <4 x double> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x double> %shuffle
+}
+
+; CHECK: vshufpd  $10, %ymm
+define <4 x i64> @B3(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: vshufpd  $10, (%{{.*}}), %ymm
+define <4 x i64> @B4(<4 x i64>* %a, <4 x i64>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i64>* %a
+  %b2 = load <4 x i64>* %b
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x i64> %shuffle
+}
+
 ; CHECK: vshufps  $-53, %ymm
 define <8 x float> @C(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
 entry:
@@ -27,3 +77,81 @@ entry:
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 undef>
   ret <4 x double> %shuffle
 }
+
+; CHECK: vshufps $-55, %ymm
+define <8 x float> @E(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 10, i32 0, i32 3, i32 13, i32 14, i32 4, i32 7>
+  ret <8 x float> %shuffle
+}
+
+; CHECK: vshufpd  $8, %ymm
+define <4 x double> @F(<4 x double> %a, <4 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
+  ret <4 x double> %shuffle
+}
+
+; CHECK: vshufps  $-53, %xmm
+define <4 x float> @A128(<4 x float> %a, <4 x float> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %xmm
+define <4 x float> @A2128(<4 x float>* %a, <4 x float>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x float>* %a
+  %b2 = load <4 x float>* %b
+  %shuffle = shufflevector <4 x float> %a2, <4 x float> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x float> %shuffle
+}
+
+; CHECK: vshufps  $-53, %xmm
+define <4 x i32> @A3128(<4 x i32> %a, <4 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+; CHECK: vshufps  $-53, (%{{.*}}), %xmm
+define <4 x i32> @A4128(<4 x i32>* %a, <4 x i32>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <4 x i32>* %a
+  %b2 = load <4 x i32>* %b
+  %shuffle = shufflevector <4 x i32> %a2, <4 x i32> %b2, <4 x i32> <i32 3, i32 2, i32 4, i32 7>
+  ret <4 x i32> %shuffle
+}
+
+; CHECK: vshufpd  $1, %xmm
+define <2 x double> @B128(<2 x double> %a, <2 x double> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %shuffle
+}
+
+; CHECK: vshufpd  $1, (%{{.*}}), %xmm
+define <2 x double> @B2128(<2 x double>* %a, <2 x double>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <2 x double>* %a
+  %b2 = load <2 x double>* %b
+  %shuffle = shufflevector <2 x double> %a2, <2 x double> %b2, <2 x i32> <i32 1, i32 2>
+  ret <2 x double> %shuffle
+}
+
+; CHECK: vshufpd  $1, %xmm
+define <2 x i64> @B3128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
+
+; CHECK: vshufpd  $1, (%{{.*}}), %xmm
+define <2 x i64> @B4128(<2 x i64>* %a, <2 x i64>* %b) nounwind uwtable readnone ssp {
+entry:
+  %a2 = load <2 x i64>* %a
+  %b2 = load <2 x i64>* %b
+  %shuffle = shufflevector <2 x i64> %a2, <2 x i64> %b2, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
diff --git a/test/CodeGen/X86/avx-vzeroupper.ll b/test/CodeGen/X86/avx-vzeroupper.ll
index eaf236c6c77b..bf4ab5be1512 100644
--- a/test/CodeGen/X86/avx-vzeroupper.ll
+++ b/test/CodeGen/X86/avx-vzeroupper.ll
@@ -1,26 +1,83 @@
 ; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
-define <4 x float> @do_sse_local(<4 x float> %a) nounwind uwtable readnone ssp {
-entry:
-  %add.i = fadd <4 x float> %a, %a
-  ret <4 x float> %add.i
-}
+declare <4 x float> @do_sse(<4 x float>)
+declare <8 x float> @do_avx(<8 x float>)
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+@x = common global <4 x float> zeroinitializer, align 16
+@g = common global <8 x float> zeroinitializer, align 32
+
+;; Basic checking - don't emit any vzeroupper instruction
 
 ; CHECK: _test00
 define <4 x float> @test00(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
 entry:
+  ; CHECK-NOT: vzeroupper
   %add.i = fadd <4 x float> %a, %b
+  %call3 = call <4 x float> @do_sse(<4 x float> %add.i) nounwind
+  ; CHECK: ret
+  ret <4 x float> %call3
+}
+
+;; Check parameter 256-bit parameter passing
+
+; CHECK: _test01
+define <8 x float> @test01(<4 x float> %a, <4 x float> %b, <8 x float> %c) nounwind uwtable ssp {
+entry:
+  %tmp = load <4 x float>* @x, align 16
   ; CHECK: vzeroupper
   ; CHECK-NEXT: callq _do_sse
-  %call3 = tail call <4 x float> @do_sse(<4 x float> %add.i) nounwind
-  %sub.i = fsub <4 x float> %call3, %add.i
+  %call = tail call <4 x float> @do_sse(<4 x float> %tmp) nounwind
+  store <4 x float> %call, <4 x float>* @x, align 16
   ; CHECK-NOT: vzeroupper
-  ; CHECK: callq _do_sse_local
-  %call8 = tail call <4 x float> @do_sse_local(<4 x float> %sub.i)
+  ; CHECK: callq _do_sse
+  %call2 = tail call <4 x float> @do_sse(<4 x float> %call) nounwind
+  store <4 x float> %call2, <4 x float>* @x, align 16
+  ; CHECK: ret
+  ret <8 x float> %c
+}
+
+;; Test the pass convergence and also that vzeroupper is only issued when necessary,
+;; for this function it should be only once
+
+; CHECK: _test02
+define <4 x float> @test02(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
+entry:
+  %add.i = fadd <4 x float> %a, %b
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  ; CHECK: LBB
+  ; CHECK-NOT: vzeroupper
+  %i.018 = phi i32 [ 0, %entry ], [ %1, %for.body ]
+  %c.017 = phi <4 x float> [ %add.i, %entry ], [ %call14, %for.body ]
+  ; CHECK: callq _do_sse
+  %call5 = tail call <4 x float> @do_sse(<4 x float> %c.017) nounwind
+  ; CHECK-NEXT: callq _do_sse
+  %call7 = tail call <4 x float> @do_sse(<4 x float> %call5) nounwind
+  %tmp11 = load <8 x float>* @g, align 32
+  %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %tmp11, i8 1) nounwind
   ; CHECK: vzeroupper
-  ; CHECK-NEXT: jmp _do_sse
-  %call10 = tail call <4 x float> @do_sse(<4 x float> %call8) nounwind
-  ret <4 x float> %call10
+  ; CHECK-NEXT: callq _do_sse
+  %call14 = tail call <4 x float> @do_sse(<4 x float> %0) nounwind
+  %1 = add nsw i32 %i.018, 1
+  %exitcond = icmp eq i32 %1, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret <4 x float> %call14
 }
 
-declare <4 x float> @do_sse(<4 x float>)
+;; Check that we also perform vzeroupper when we return from a function.
+
+; CHECK: _test03
+define <4 x float> @test03(<4 x float> %a, <4 x float> %b) nounwind uwtable ssp {
+entry:
+  %shuf = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ; CHECK-NOT: vzeroupper
+  ; CHECK: call
+  %call = call <8 x float> @do_avx(<8 x float> %shuf) nounwind
+  %shuf2 = shufflevector <8 x float> %call, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ; CHECK: vzeroupper
+  ; CHECK: ret
+  ret <4 x float> %shuf2
+}
diff --git a/test/CodeGen/X86/avx-win64-args.ll b/test/CodeGen/X86/avx-win64-args.ll
new file mode 100755
index 000000000000..85b2634a532b
--- /dev/null
+++ b/test/CodeGen/X86/avx-win64-args.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+declare <8 x float> @foo(<8 x float>, i32)
+
+define <8 x float> @test1(<8 x float> %x, <8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+; CHECK: test1
+; CHECK: leaq {{.*}}, %rcx
+; CHECK: movl {{.*}}, %edx
+; CHECK: call
+; CHECK: ret
+  %x1 = fadd  <8 x float>  %x, %y
+  %call = call  <8 x float> @foo(<8 x float> %x1, i32 1) nounwind
+  %y1 = fsub  <8 x float>  %call, %y
+  ret <8 x float> %y1
+}
+
diff --git a/test/CodeGen/X86/avx-win64.ll b/test/CodeGen/X86/avx-win64.ll
new file mode 100644
index 000000000000..dc6bd594450f
--- /dev/null
+++ b/test/CodeGen/X86/avx-win64.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; PR11862
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-win32"
+
+; This function has live ymm registers across a win64 call.
+; The ymm6-15 registers are still call-clobbered even if xmm6-15 are callee-saved.
+; Verify that callee-saved registers are not being used.
+
+; CHECK: f___vyf
+; CHECK: pushq %rbp
+; CHECK: vmovmsk
+; CHECK: vmovaps %ymm{{.*}}(%r
+; CHECK: vmovaps %ymm{{.*}}(%r
+; CHECK: call
+; Two reloads. It's OK if these get folded.
+; CHECK: vmovaps {{.*\(%r.*}}, %ymm
+; CHECK: vmovaps {{.*\(%r.*}}, %ymm
+; CHECK: blend
+define <8 x float> @f___vyf(<8 x float> %x, <8 x i32> %__mask) nounwind readnone {
+allocas:
+  %bincmp = fcmp oeq <8 x float> %x, zeroinitializer
+  %val_to_boolvec32 = sext <8 x i1> %bincmp to <8 x i32>
+  %"~test" = xor <8 x i32> %val_to_boolvec32, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %"internal_mask&function_mask25" = and <8 x i32> %"~test", %__mask
+  %floatmask.i46 = bitcast <8 x i32> %"internal_mask&function_mask25" to <8 x float>
+  %v.i47 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask.i46) nounwind readnone
+  %any_mm_cmp27 = icmp eq i32 %v.i47, 0
+  br i1 %any_mm_cmp27, label %safe_if_after_false, label %safe_if_run_false
+
+safe_if_run_false:                                ; preds = %allocas
+  %binop = fadd <8 x float> %x, <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  %calltmp = call <8 x float> @f___vyf(<8 x float> %binop, <8 x i32> %"internal_mask&function_mask25")
+  %binop33 = fadd <8 x float> %calltmp, %x
+  %mask_as_float.i48 = bitcast <8 x i32> %"~test" to <8 x float>
+  %blend.i52 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %x, <8 x float> %binop33, <8 x float> %mask_as_float.i48) nounwind
+  br label %safe_if_after_false
+
+safe_if_after_false:                              ; preds = %safe_if_run_false, %allocas
+  %0 = phi <8 x float> [ %x, %allocas ], [ %blend.i52, %safe_if_run_false ]
+  ret <8 x float> %0
+}
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/avx-zext.ll b/test/CodeGen/X86/avx-zext.ll
new file mode 100755
index 000000000000..b630e9d14612
--- /dev/null
+++ b/test/CodeGen/X86/avx-zext.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+;CHECK: zext_8i16_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: ret
+
+  %B = zext <8 x i16> %A to <8 x i32>
+  ret <8 x i32>%B
+}
+
+define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+;CHECK: zext_4i32_to_4i64
+;CHECK: vpunpckhdq
+;CHECK: ret
+
+  %B = zext <4 x i32> %A to <4 x i64>
+  ret <4 x i64>%B
+}
+
+
+define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
+;CHECK: zext_8i8_to_8i32
+;CHECK: vpunpckhwd
+;CHECK: vpunpcklwd
+;CHECK: vinsertf128
+;CHECK: ret
+  %t = zext <8 x i8> %z to <8 x i32>
+  ret <8 x i32> %t
+}
diff --git a/test/CodeGen/X86/avx2-arith.ll b/test/CodeGen/X86/avx2-arith.ll
new file mode 100644
index 000000000000..09f953835820
--- /dev/null
+++ b/test/CodeGen/X86/avx2-arith.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpaddq %ymm
+define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = add <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+; CHECK: vpaddd %ymm
+define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = add <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vpaddw %ymm
+define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = add <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vpaddb %ymm
+define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = add <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+; CHECK: vpsubq %ymm
+define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = sub <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+; CHECK: vpsubd %ymm
+define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = sub <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vpsubw %ymm
+define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = sub <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vpsubb %ymm
+define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = sub <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+; CHECK: vpmulld %ymm
+define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = mul <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+; CHECK: vpmullw %ymm
+define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = mul <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+; CHECK: vpmuludq %ymm
+; CHECK-NEXT: vpsrlq $32, %ymm
+; CHECK-NEXT: vpmuludq %ymm
+; CHECK-NEXT: vpsllq $32, %ymm
+; CHECK-NEXT: vpaddq %ymm
+; CHECK-NEXT: vpsrlq $32, %ymm
+; CHECK-NEXT: vpmuludq %ymm
+; CHECK-NEXT: vpsllq $32, %ymm
+; CHECK-NEXT: vpaddq %ymm
+define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = mul <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
diff --git a/test/CodeGen/X86/avx2-cmp.ll b/test/CodeGen/X86/avx2-cmp.ll
new file mode 100644
index 000000000000..df30d9efed13
--- /dev/null
+++ b/test/CodeGen/X86/avx2-cmp.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpcmpgtd  %ymm
+define <8 x i32> @int256-cmp(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %bincmp = icmp slt <8 x i32> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %x
+}
+
+; CHECK: vpcmpgtq  %ymm
+define <4 x i64> @v4i64-cmp(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %bincmp = icmp slt <4 x i64> %i, %j
+  %x = sext <4 x i1> %bincmp to <4 x i64>
+  ret <4 x i64> %x
+}
+
+; CHECK: vpcmpgtw  %ymm
+define <16 x i16> @v16i16-cmp(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %bincmp = icmp slt <16 x i16> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i16>
+  ret <16 x i16> %x
+}
+
+; CHECK: vpcmpgtb  %ymm
+define <32 x i8> @v32i8-cmp(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %bincmp = icmp slt <32 x i8> %i, %j
+  %x = sext <32 x i1> %bincmp to <32 x i8>
+  ret <32 x i8> %x
+}
+
+; CHECK: vpcmpeqd  %ymm
+define <8 x i32> @int256-cmpeq(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %bincmp = icmp eq <8 x i32> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %x
+}
+
+; CHECK: vpcmpeqq  %ymm
+define <4 x i64> @v4i64-cmpeq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %bincmp = icmp eq <4 x i64> %i, %j
+  %x = sext <4 x i1> %bincmp to <4 x i64>
+  ret <4 x i64> %x
+}
+
+; CHECK: vpcmpeqw  %ymm
+define <16 x i16> @v16i16-cmpeq(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %bincmp = icmp eq <16 x i16> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i16>
+  ret <16 x i16> %x
+}
+
+; CHECK: vpcmpeqb  %ymm
+define <32 x i8> @v32i8-cmpeq(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %bincmp = icmp eq <32 x i8> %i, %j
+  %x = sext <32 x i1> %bincmp to <32 x i8>
+  ret <32 x i8> %x
+}
+
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
new file mode 100644
index 000000000000..1fb41c02b9e9
--- /dev/null
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -0,0 +1,994 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=core-avx2 -mattr=avx2 | FileCheck %s
+
+define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpackssdw
+  %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpacksswb
+  %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpackuswb
+  %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpaddsb
+  %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpaddsw
+  %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpaddusb
+  %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpaddusw
+  %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpavgb
+  %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpavgw
+  %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmaddwd
+  %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmaxs_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmaxsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pmaxu_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpmaxub
+  %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmins_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpminsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpminub
+  %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) {
+  ; CHECK: vpmovmskb
+  %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmulhw
+  %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmulhuw
+  %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmulu_dq(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmuludq
+  %res = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsadbw
+  %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpslld
+  %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_dq(<4 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_dq_bs(<4 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.dq.bs(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsllq
+  %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsllw
+  %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) {
+  ; CHECK: vpslld
+  %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) {
+  ; CHECK: vpsllq
+  %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) {
+  ; CHECK: vpsllw
+  %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrad
+  %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsraw
+  %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) {
+  ; CHECK: vpsrad
+  %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) {
+  ; CHECK: vpsraw
+  %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrld
+  %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_dq(<4 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.dq(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_dq_bs(<4 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.dq.bs(<4 x i64>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsrlq
+  %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsrlw
+  %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) {
+  ; CHECK: vpsrld
+  %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) {
+  ; CHECK: vpsrlq
+  %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) {
+  ; CHECK: vpsrlw
+  %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsubsb
+  %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpsubsw
+  %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsubusb
+  %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpsubusw
+  %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) {
+  ; CHECK: vpabsb
+  %res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) {
+  ; CHECK: vpabsd
+  %res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) {
+  ; CHECK: vpabsw
+  %res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vphaddd
+  %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphaddsw
+  %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphaddw
+  %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vphsubd
+  %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphsubsw
+  %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vphsubw
+  %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpmaddubsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmulhrsw
+  %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpshufb
+  %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpsignb
+  %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsignd
+  %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpsignw
+  %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_movntdqa(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovntdqa
+  %res = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
+
+
+define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vmpsadbw
+  %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i32) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpackusdw
+  %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
+  ; CHECK: vpblendvb
+  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpblendw
+  %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 7) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpmaxsb
+  %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmaxsd
+  %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmaxud(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmaxud
+  %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpmaxuw
+  %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx2_pminsb(<32 x i8> %a0, <32 x i8> %a1) {
+  ; CHECK: vpminsb
+  %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pminsd(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpminsd
+  %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pminud(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpminud
+  %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
+  ; CHECK: vpminuw
+  %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbw
+  %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovsxdq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbw
+  %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovzxdq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwd
+  %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwq
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpmuldq
+  %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_vbroadcasti128(i8* %a0) {
+  ; CHECK: vbroadcasti128
+  %res = call <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8* %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vbroadcasti128(i8*) nounwind readonly
+
+define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpblendd
+  %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpblendd
+  %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
+
+
+define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
+  ; CHECK: vpbroadcastb
+  %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
+
+
+define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
+  ; CHECK: vpbroadcastb
+  %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
+
+
+define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
+  ; CHECK: vpbroadcastw
+  %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
+
+
+define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
+  ; CHECK: vpbroadcastw
+  %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
+  ; CHECK: vpbroadcastd
+  %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
+  ; CHECK: vpbroadcastd
+  %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
+
+
+define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
+  ; CHECK: vpbroadcastq
+  %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
+  ; CHECK: vpbroadcastq
+  %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpermd
+  %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vpermps
+  %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_permq(<4 x i64> %a0) {
+  ; CHECK: vpermq
+  %res = call <4 x i64> @llvm.x86.avx2.permq(<4 x i64> %a0, i8 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.permq(<4 x i64>, i8) nounwind readonly
+
+
+define <4 x double> @test_x86_avx2_permpd(<4 x double> %a0) {
+  ; CHECK: vpermpd
+  %res = call <4 x double> @llvm.x86.avx2.permpd(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.permpd(<4 x double>, i8) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vperm2i128
+  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readonly
+
+
+define <2 x i64> @test_x86_avx2_vextracti128(<4 x i64> %a0) {
+  ; CHECK: vextracti128
+  %res = call <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.vextracti128(<4 x i64>, i8) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vinserti128
+  %res = call <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64> %a0, <2 x i64> %a1, i8 7) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {
+  ; CHECK: vpmaskmovq
+  %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) {
+  ; CHECK: vpmaskmovq
+  %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: vpmaskmovd
+  %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) {
+  ; CHECK: vpmaskmovd
+  %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
+
+
+define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmaskmovq
+  call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
+
+
+define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
+  ; CHECK: vpmaskmovq
+  call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
+
+
+define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmaskmovd
+  call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
+
+
+define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
+  ; CHECK: vpmaskmovd
+  call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
+  ret void
+}
+declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
+
+
+define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsllvd
+  %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsllvd
+  %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsllvq
+  %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vpsllvq
+  %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrlvd
+  %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsrlvd
+  %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsrlvq
+  %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vpsrlvq
+  %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsravd
+  %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vpsravd
+  %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
+
+; This is checked here because the execution dependency fix pass makes it hard to test in AVX mode since we don't have 256-bit integer instructions
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+  ; CHECK: vmovdqu
+  ; add operation forces the execution domain.
+  %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
diff --git a/test/CodeGen/X86/avx2-logic.ll b/test/CodeGen/X86/avx2-logic.ll
new file mode 100644
index 000000000000..13ebaa6f8797
--- /dev/null
+++ b/test/CodeGen/X86/avx2-logic.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpandn
+; CHECK: vpandn  %ymm
+; CHECK: ret
+define <4 x i64> @vpandn(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %x = and <4 x i64> %a, %y
+  ret <4 x i64> %x
+}
+
+; CHECK: vpand
+; CHECK: vpand %ymm
+; CHECK: ret
+define <4 x i64> @vpand(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = and <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpor
+; CHECK: vpor %ymm
+; CHECK: ret
+define <4 x i64> @vpor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = or <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpxor
+; CHECK: vpxor %ymm
+; CHECK: ret
+define <4 x i64> @vpxor(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = xor <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+; CHECK: vpblendvb
+; CHECK: vpblendvb %ymm
+; CHECK: ret
+define <32 x i8> @vpblendvb(<32 x i8> %x, <32 x i8> %y) {
+  %min_is_x = icmp ult <32 x i8> %x, %y
+  %min = select <32 x i1> %min_is_x, <32 x i8> %x, <32 x i8> %y
+  ret <32 x i8> %min
+}
+
+define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind {
+entry:
+; CHECK: signd:
+; CHECK: psignd
+; CHECK-NOT: sub
+; CHECK: ret
+  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <8 x i32> zeroinitializer, %a
+  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <8 x i32> %a, %0
+  %2 = and <8 x i32> %b.lobit, %sub
+  %cond = or <8 x i32> %1, %2
+  ret <8 x i32> %cond
+}
+
+define <8 x i32> @blendvb(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) nounwind {
+entry:
+; CHECK: blendvb:
+; CHECK: pblendvb
+; CHECK: ret
+  %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <8 x i32> zeroinitializer, %a
+  %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <8 x i32> %c, %0
+  %2 = and <8 x i32> %a, %b.lobit
+  %cond = or <8 x i32> %1, %2
+  ret <8 x i32> %cond
+}
+
+define <8 x i32> @allOnes() nounwind {
+; CHECK: vpcmpeqd
+; CHECK-NOT: vinsert
+        ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
+define <16 x i16> @allOnes2() nounwind {
+; CHECK: vpcmpeqd
+; CHECK-NOT: vinsert
+        ret <16 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+}
diff --git a/test/CodeGen/X86/avx2-nontemporal.ll b/test/CodeGen/X86/avx2-nontemporal.ll
new file mode 100644
index 000000000000..0768aae48e8c
--- /dev/null
+++ b/test/CodeGen/X86/avx2-nontemporal.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+avx2 | FileCheck %s
+
+define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) {
+; CHECK: vmovntps
+  %cast = bitcast i8* %B to <8 x float>*
+  %A2 = fadd <8 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <8 x float> %A2, <8 x float>* %cast, align 16, !nontemporal !0
+; CHECK: vmovntdq
+  %cast1 = bitcast i8* %B to <4 x i64>*
+  %E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4>
+  store <4 x i64> %E2, <4 x i64>* %cast1, align 16, !nontemporal !0
+; CHECK: vmovntpd
+  %cast2 = bitcast i8* %B to <4 x double>*
+  %C2 = fadd <4 x double> %C, <double 0x0, double 0x0, double 0x0, double 0x4200000000000000>
+  store <4 x double> %C2, <4 x double>* %cast2, align 16, !nontemporal !0
+; CHECK: movnti
+  %cast3 = bitcast i8* %B to i32*
+  store i32 %D, i32* %cast3, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = metadata !{i32 1}
diff --git a/test/CodeGen/X86/avx2-palignr.ll b/test/CodeGen/X86/avx2-palignr.ll
new file mode 100644
index 000000000000..53b9da32ae8e
--- /dev/null
+++ b/test/CodeGen/X86/avx2-palignr.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+define <8 x i32> @test1(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+  ret <8 x i32> %C
+}
+
+define <8 x i32> @test2(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 undef, i32 12>
+  ret <8 x i32> %C
+}
+
+define <8 x i32> @test3(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: vpalignr $4
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
+  ret <8 x i32> %C
+}
+;
+define <8 x i32> @test4(<8 x i32> %A, <8 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: vpalignr $8
+  %C = shufflevector <8 x i32> %A, <8 x i32> %B, <8 x i32> <i32 10, i32 11, i32 undef, i32 1, i32 14, i32 15, i32 4, i32 5>
+  ret <8 x i32> %C
+}
+
+define <16 x i16> @test5(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test5:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 undef, i32 6, i32 7, i32 16, i32 17, i32 18, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
+  ret <16 x i16> %C
+}
+
+define <16 x i16> @test6(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test6:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 12, i32 13, i32 undef, i32 15, i32 24, i32 25, i32 26>
+  ret <16 x i16> %C
+}
+
+define <16 x i16> @test7(<16 x i16> %A, <16 x i16> %B) nounwind {
+; CHECK: test7:
+; CHECK: vpalignr $6
+  %C = shufflevector <16 x i16> %A, <16 x i16> %B, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %C
+}
+
+define <32 x i8> @test8(<32 x i8> %A, <32 x i8> %B) nounwind {
+; CHECK: test8:
+; CHECK: palignr $5
+  %C = shufflevector <32 x i8> %A, <32 x i8> %B, <32 x i32> <i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52>
+  ret <32 x i8> %C
+}
diff --git a/test/CodeGen/X86/avx2-phaddsub.ll b/test/CodeGen/X86/avx2-phaddsub.ll
new file mode 100644
index 000000000000..4eac71d08b4f
--- /dev/null
+++ b/test/CodeGen/X86/avx2-phaddsub.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86-64 -mattr=+avx2 | FileCheck %s
+
+; CHECK: phaddw1:
+; CHECK: vphaddw
+define <16 x i16> @phaddw1(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
+  %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %r = add <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phaddw2:
+; CHECK: vphaddw
+define <16 x i16> @phaddw2(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %b = shufflevector <16 x i16> %y, <16 x i16> %x, <16 x i32> <i32 16, i32 18, i32 20, i32 22, i32 0, i32 2, i32 4, i32 6, i32 24, i32 26, i32 28, i32 30, i32 8, i32 10, i32 12, i32 14>
+  %r = add <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phaddd1:
+; CHECK: vphaddd
+define <8 x i32> @phaddd1(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phaddd2:
+; CHECK: vphaddd
+define <8 x i32> @phaddd2(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
+  %b = shufflevector <8 x i32> %y, <8 x i32> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phaddd3:
+; CHECK: vphaddd
+define <8 x i32> @phaddd3(<8 x i32> %x) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = add <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phsubw1:
+; CHECK: vphsubw
+define <16 x i16> @phsubw1(<16 x i16> %x, <16 x i16> %y) {
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
+  %b = shufflevector <16 x i16> %x, <16 x i16> %y, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 23, i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
+  %r = sub <16 x i16> %a, %b
+  ret <16 x i16> %r
+}
+
+; CHECK: phsubd1:
+; CHECK: vphsubd
+define <8 x i32> @phsubd1(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = sub <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
+
+; CHECK: phsubd2:
+; CHECK: vphsubd
+define <8 x i32> @phsubd2(<8 x i32> %x, <8 x i32> %y) {
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 0, i32 undef, i32 8, i32 undef, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 1, i32 undef, i32 9, i32 11, i32 5, i32 7, i32 undef, i32 15>
+  %r = sub <8 x i32> %a, %b
+  ret <8 x i32> %r
+}
diff --git a/test/CodeGen/X86/avx2-shift.ll b/test/CodeGen/X86/avx2-shift.ll
new file mode 100644
index 000000000000..1f192a0d323c
--- /dev/null
+++ b/test/CodeGen/X86/avx2-shift.ll
@@ -0,0 +1,268 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: variable_shl0
+; CHECK: psllvd
+; CHECK: ret
+define <4 x i32> @variable_shl0(<4 x i32> %x, <4 x i32> %y) {
+  %k = shl <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_shl1
+; CHECK: psllvd
+; CHECK: ret
+define <8 x i32> @variable_shl1(<8 x i32> %x, <8 x i32> %y) {
+  %k = shl <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+; CHECK: variable_shl2
+; CHECK: psllvq
+; CHECK: ret
+define <2 x i64> @variable_shl2(<2 x i64> %x, <2 x i64> %y) {
+  %k = shl <2 x i64> %x, %y
+  ret <2 x i64> %k
+}
+; CHECK: variable_shl3
+; CHECK: psllvq
+; CHECK: ret
+define <4 x i64> @variable_shl3(<4 x i64> %x, <4 x i64> %y) {
+  %k = shl <4 x i64> %x, %y
+  ret <4 x i64> %k
+}
+; CHECK: variable_srl0
+; CHECK: psrlvd
+; CHECK: ret
+define <4 x i32> @variable_srl0(<4 x i32> %x, <4 x i32> %y) {
+  %k = lshr <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_srl1
+; CHECK: psrlvd
+; CHECK: ret
+define <8 x i32> @variable_srl1(<8 x i32> %x, <8 x i32> %y) {
+  %k = lshr <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+; CHECK: variable_srl2
+; CHECK: psrlvq
+; CHECK: ret
+define <2 x i64> @variable_srl2(<2 x i64> %x, <2 x i64> %y) {
+  %k = lshr <2 x i64> %x, %y
+  ret <2 x i64> %k
+}
+; CHECK: variable_srl3
+; CHECK: psrlvq
+; CHECK: ret
+define <4 x i64> @variable_srl3(<4 x i64> %x, <4 x i64> %y) {
+  %k = lshr <4 x i64> %x, %y
+  ret <4 x i64> %k
+}
+
+; CHECK: variable_sra0
+; CHECK: vpsravd
+; CHECK: ret
+define <4 x i32> @variable_sra0(<4 x i32> %x, <4 x i32> %y) {
+  %k = ashr <4 x i32> %x, %y
+  ret <4 x i32> %k
+}
+; CHECK: variable_sra1
+; CHECK: vpsravd
+; CHECK: ret
+define <8 x i32> @variable_sra1(<8 x i32> %x, <8 x i32> %y) {
+  %k = ashr <8 x i32> %x, %y
+  ret <8 x i32> %k
+}
+
+;;; Shift left
+; CHECK: vpslld
+define <8 x i32> @vshift00(<8 x i32> %a) nounwind readnone {
+  %s = shl <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsllw
+define <16 x i16> @vshift01(<16 x i16> %a) nounwind readnone {
+  %s = shl <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsllq
+define <4 x i64> @vshift02(<4 x i64> %a) nounwind readnone {
+  %s = shl <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Logical Shift right
+; CHECK: vpsrld
+define <8 x i32> @vshift03(<8 x i32> %a) nounwind readnone {
+  %s = lshr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsrlw
+define <16 x i16> @vshift04(<16 x i16> %a) nounwind readnone {
+  %s = lshr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: vpsrlq
+define <4 x i64> @vshift05(<4 x i64> %a) nounwind readnone {
+  %s = lshr <4 x i64> %a, <i64 2, i64 2, i64 2, i64 2>
+  ret <4 x i64> %s
+}
+
+;;; Arithmetic Shift right
+; CHECK: vpsrad
+define <8 x i32> @vshift06(<8 x i32> %a) nounwind readnone {
+  %s = ashr <8 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32
+2>
+  ret <8 x i32> %s
+}
+
+; CHECK: vpsraw
+define <16 x i16> @vshift07(<16 x i16> %a) nounwind readnone {
+  %s = ashr <16 x i16> %a, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+  ret <16 x i16> %s
+}
+
+; CHECK: variable_sra0_load
+; CHECK: vpsravd (%
+; CHECK: ret
+define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = ashr <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+
+; CHECK: variable_sra1_load
+; CHECK: vpsravd (%
+; CHECK: ret
+define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = ashr <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+
+; CHECK: variable_shl0_load
+; CHECK: vpsllvd (%
+; CHECK: ret
+define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = shl <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+; CHECK: variable_shl1_load
+; CHECK: vpsllvd (%
+; CHECK: ret
+define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = shl <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+; CHECK: variable_shl2_load
+; CHECK: vpsllvq (%
+; CHECK: ret
+define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
+  %y1 = load <2 x i64>* %y
+  %k = shl <2 x i64> %x, %y1
+  ret <2 x i64> %k
+}
+; CHECK: variable_shl3_load
+; CHECK: vpsllvq (%
+; CHECK: ret
+define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
+  %y1 = load <4 x i64>* %y
+  %k = shl <4 x i64> %x, %y1
+  ret <4 x i64> %k
+}
+; CHECK: variable_srl0_load
+; CHECK: vpsrlvd (%
+; CHECK: ret
+define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
+  %y1 = load <4 x i32>* %y
+  %k = lshr <4 x i32> %x, %y1
+  ret <4 x i32> %k
+}
+; CHECK: variable_srl1_load
+; CHECK: vpsrlvd (%
+; CHECK: ret
+define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
+  %y1 = load <8 x i32>* %y
+  %k = lshr <8 x i32> %x, %y1
+  ret <8 x i32> %k
+}
+; CHECK: variable_srl2_load
+; CHECK: vpsrlvq (%
+; CHECK: ret
+define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
+  %y1 = load <2 x i64>* %y
+  %k = lshr <2 x i64> %x, %y1
+  ret <2 x i64> %k
+}
+; CHECK: variable_srl3_load
+; CHECK: vpsrlvq (%
+; CHECK: ret
+define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
+  %y1 = load <4 x i64>* %y
+  %k = lshr <4 x i64> %x, %y1
+  ret <4 x i64> %k
+}
+
+define <32 x i8> @shl9(<32 x i8> %A) nounwind {
+  %B = shl <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: shl9:
+; CHECK: vpsllw $3
+; CHECK: vpand
+; CHECK: ret
+}
+
+define <32 x i8> @shr9(<32 x i8> %A) nounwind {
+  %B = lshr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: shr9:
+; CHECK: vpsrlw $3
+; CHECK: vpand
+; CHECK: ret
+}
+
+define <32 x i8> @sra_v32i8_7(<32 x i8> %A) nounwind {
+  %B = ashr <32 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <32 x i8> %B
+; CHECK: sra_v32i8_7:
+; CHECK: vpxor
+; CHECK: vpcmpgtb
+; CHECK: ret
+}
+
+define <32 x i8> @sra_v32i8(<32 x i8> %A) nounwind {
+  %B = ashr <32 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %B
+; CHECK: sra_v32i8:
+; CHECK: vpsrlw $3
+; CHECK: vpand
+; CHECK: vpxor
+; CHECK: vpsubb
+; CHECK: ret
+}
+
+; CHECK: _sext_v16i16
+; CHECK: vpsllw
+; CHECK: vpsraw
+; CHECK-NOT: vinsertf128
+define <16 x i16> @sext_v16i16(<16 x i16> %a) nounwind {
+  %b = trunc <16 x i16> %a to <16 x i8>
+  %c = sext <16 x i8> %b to <16 x i16>
+  ret <16 x i16> %c
+}
+
+; CHECK: _sext_v8i32
+; CHECK: vpslld
+; CHECK: vpsrad
+; CHECK-NOT: vinsertf128
+define <8 x i32> @sext_v8i32(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
diff --git a/test/CodeGen/X86/avx2-unpack.ll b/test/CodeGen/X86/avx2-unpack.ll
new file mode 100644
index 000000000000..6d17443489ae
--- /dev/null
+++ b/test/CodeGen/X86/avx2-unpack.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpunpckhdq
+define <8 x i32> @unpackhidq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpckhqdq
+define <4 x i64> @unpackhiqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckldq
+define <8 x i32> @unpacklodq1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpcklqdq
+define <4 x i64> @unpacklqdq1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+define <16 x i16> @unpackhwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+define <16 x i16> @unpacklwd(<16 x i16> %src1, <16 x i16> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpckhbw
+define <32 x i8> @unpackhbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpcklbw
+define <32 x i8> @unpacklbw(<32 x i8> %src1, <32 x i8> %src2) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <32 x i8> %src1, <32 x i8> %src2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
+  ret <32 x i8> %shuffle.i
+}
+
+; CHECK: vpunpckhdq
+define <8 x i32> @unpackhidq1_undef(<8 x i32> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <8 x i32> %src1, <8 x i32> %src1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
+  ret <8 x i32> %shuffle.i
+}
+
+; CHECK: vpunpckhqdq
+define <4 x i64> @unpackhiqdq1_undef(<4 x i64> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  ret <4 x i64> %shuffle.i
+}
+
+; CHECK: vpunpckhwd
+define <16 x i16> @unpackhwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <16 x i16> %shuffle.i
+}
+
+; CHECK: vpunpcklwd
+define <16 x i16> @unpacklwd_undef(<16 x i16> %src1) nounwind uwtable readnone ssp {
+entry:
+  %shuffle.i = shufflevector <16 x i16> %src1, <16 x i16> %src1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
+  ret <16 x i16> %shuffle.i
+}
+
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
new file mode 100644
index 000000000000..1a78414761ca
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vpbroadcastb (%
+define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i8* %ptr, align 4
+  %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
+  %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
+  %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
+  %q3 = insertelement <16 x i8> %q2, i8 %q, i32 3
+  %q4 = insertelement <16 x i8> %q3, i8 %q, i32 4
+  %q5 = insertelement <16 x i8> %q4, i8 %q, i32 5
+  %q6 = insertelement <16 x i8> %q5, i8 %q, i32 6
+  %q7 = insertelement <16 x i8> %q6, i8 %q, i32 7
+  %q8 = insertelement <16 x i8> %q7, i8 %q, i32 8
+  %q9 = insertelement <16 x i8> %q8, i8 %q, i32 9
+  %qa = insertelement <16 x i8> %q9, i8 %q, i32 10
+  %qb = insertelement <16 x i8> %qa, i8 %q, i32 11
+  %qc = insertelement <16 x i8> %qb, i8 %q, i32 12
+  %qd = insertelement <16 x i8> %qc, i8 %q, i32 13
+  %qe = insertelement <16 x i8> %qd, i8 %q, i32 14
+  %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
+  ret <16 x i8> %qf
+}
+; CHECK: vpbroadcastb (%
+define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i8* %ptr, align 4
+  %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
+  %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
+  %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
+  %q3 = insertelement <32 x i8> %q2, i8 %q, i32 3
+  %q4 = insertelement <32 x i8> %q3, i8 %q, i32 4
+  %q5 = insertelement <32 x i8> %q4, i8 %q, i32 5
+  %q6 = insertelement <32 x i8> %q5, i8 %q, i32 6
+  %q7 = insertelement <32 x i8> %q6, i8 %q, i32 7
+  %q8 = insertelement <32 x i8> %q7, i8 %q, i32 8
+  %q9 = insertelement <32 x i8> %q8, i8 %q, i32 9
+  %qa = insertelement <32 x i8> %q9, i8 %q, i32 10
+  %qb = insertelement <32 x i8> %qa, i8 %q, i32 11
+  %qc = insertelement <32 x i8> %qb, i8 %q, i32 12
+  %qd = insertelement <32 x i8> %qc, i8 %q, i32 13
+  %qe = insertelement <32 x i8> %qd, i8 %q, i32 14
+  %qf = insertelement <32 x i8> %qe, i8 %q, i32 15
+
+  %q20 = insertelement <32 x i8> %qf, i8 %q,  i32 16
+  %q21 = insertelement <32 x i8> %q20, i8 %q, i32 17
+  %q22 = insertelement <32 x i8> %q21, i8 %q, i32 18
+  %q23 = insertelement <32 x i8> %q22, i8 %q, i32 19
+  %q24 = insertelement <32 x i8> %q23, i8 %q, i32 20
+  %q25 = insertelement <32 x i8> %q24, i8 %q, i32 21
+  %q26 = insertelement <32 x i8> %q25, i8 %q, i32 22
+  %q27 = insertelement <32 x i8> %q26, i8 %q, i32 23
+  %q28 = insertelement <32 x i8> %q27, i8 %q, i32 24
+  %q29 = insertelement <32 x i8> %q28, i8 %q, i32 25
+  %q2a = insertelement <32 x i8> %q29, i8 %q, i32 26
+  %q2b = insertelement <32 x i8> %q2a, i8 %q, i32 27
+  %q2c = insertelement <32 x i8> %q2b, i8 %q, i32 28
+  %q2d = insertelement <32 x i8> %q2c, i8 %q, i32 29
+  %q2e = insertelement <32 x i8> %q2d, i8 %q, i32 30
+  %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
+  ret <32 x i8> %q2f
+}
+; CHECK: vpbroadcastw (%
+
+define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i16* %ptr, align 4
+  %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
+  %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
+  %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
+  %q3 = insertelement <8 x i16> %q2, i16 %q, i32 3
+  %q4 = insertelement <8 x i16> %q3, i16 %q, i32 4
+  %q5 = insertelement <8 x i16> %q4, i16 %q, i32 5
+  %q6 = insertelement <8 x i16> %q5, i16 %q, i32 6
+  %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
+  ret <8 x i16> %q7
+}
+; CHECK: vpbroadcastw (%
+define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i16* %ptr, align 4
+  %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
+  %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
+  %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
+  %q3 = insertelement <16 x i16> %q2, i16 %q, i32 3
+  %q4 = insertelement <16 x i16> %q3, i16 %q, i32 4
+  %q5 = insertelement <16 x i16> %q4, i16 %q, i32 5
+  %q6 = insertelement <16 x i16> %q5, i16 %q, i32 6
+  %q7 = insertelement <16 x i16> %q6, i16 %q, i32 7
+  %q8 = insertelement <16 x i16> %q7, i16 %q, i32 8
+  %q9 = insertelement <16 x i16> %q8, i16 %q, i32 9
+  %qa = insertelement <16 x i16> %q9, i16 %q, i32 10
+  %qb = insertelement <16 x i16> %qa, i16 %q, i32 11
+  %qc = insertelement <16 x i16> %qb, i16 %q, i32 12
+  %qd = insertelement <16 x i16> %qc, i16 %q, i32 13
+  %qe = insertelement <16 x i16> %qd, i16 %q, i32 14
+  %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
+  ret <16 x i16> %qf
+}
+; CHECK: vpbroadcastd (%
+define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i32* %ptr, align 4
+  %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
+  %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
+  %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
+  %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
+  ret <4 x i32> %q3
+}
+; CHECK: vpbroadcastd (%
+define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i32* %ptr, align 4
+  %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
+  %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
+  %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
+  %q3 = insertelement <8 x i32> %q2, i32 %q, i32 3
+  %q4 = insertelement <8 x i32> %q3, i32 %q, i32 4
+  %q5 = insertelement <8 x i32> %q4, i32 %q, i32 5
+  %q6 = insertelement <8 x i32> %q5, i32 %q, i32 6
+  %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
+  ret <8 x i32> %q7
+}
+; CHECK: vpbroadcastq (%
+define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i64* %ptr, align 4
+  %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
+  %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
+  ret <2 x i64> %q1
+}
+; CHECK: vpbroadcastq (%
+define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load i64* %ptr, align 4
+  %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
+  %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
+  %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
+  %q3 = insertelement <4 x i64> %q2, i64 %q, i32 3
+  ret <4 x i64> %q3
+}
+
+; make sure that we still don't support broadcast double into 128-bit vector
+; this used to crash
+define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+entry:
+  %q = load double* %ptr, align 4
+  %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
+  %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
+  ret <2 x double> %vecinit2.i
+}
+
+; CHECK: V111
+; CHECK: vpbroadcastd
+; CHECK: ret
+define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
+entry:
+  %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %g
+}
+
+; CHECK: _e2
+; CHECK: vbroadcastss
+; CHECK: ret
+define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+  %vecinit.i = insertelement <4 x float> undef, float        0xbf80000000000000, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
+  ret <4 x float> %vecinit6.i
+}
+
+; CHECK: _e4
+; CHECK-NOT: broadcast
+; CHECK: ret
+define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
+  %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
+  %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
+  %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
+  %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 52, i32 3
+  %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 52, i32 3
+  %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 52, i32 3
+  %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 52, i32 3
+  %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 52, i32 3
+  ret <8 x i8> %vecinit7.i
+}
diff --git a/test/CodeGen/X86/avx2-vperm2i128.ll b/test/CodeGen/X86/avx2-vperm2i128.ll
new file mode 100644
index 000000000000..1937db5d7c16
--- /dev/null
+++ b/test/CodeGen/X86/avx2-vperm2i128.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+; CHECK: vperm2i128 $17
+define <32 x i8> @E(<32 x i8> %a, <32 x i8> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <32 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  %shuffle = shufflevector <32 x i8> %a2, <32 x i8> %b, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <32 x i8> %shuffle
+}
+
+; CHECK: vperm2i128 $3
+define <4 x i64> @E2(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %shuffle = shufflevector <4 x i64> %a2, <4 x i64> %b, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
+  ret <4 x i64> %shuffle
+}
+
+; CHECK: vperm2i128 $49
+define <8 x i32> @E3(<8 x i32> %a, <8 x i32> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <8 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %shuffle = shufflevector <8 x i32> %a2, <8 x i32> %b, <8 x i32> <i32 undef, i32 5, i32 undef, i32 7, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x i32> %shuffle
+}
+
+; CHECK: vperm2i128 $2
+define <16 x i16> @E4(<16 x i16> %a, <16 x i16> %b) nounwind uwtable readnone ssp {
+entry:
+  ; add forces execution domain
+  %a2 = add <16 x i16> %a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %shuffle = shufflevector <16 x i16> %a2, <16 x i16> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i16> %shuffle
+}
+
+; CHECK: vperm2i128 $2, (%
+define <16 x i16> @E5(<16 x i16>* %a, <16 x i16>* %b) nounwind uwtable readnone ssp {
+entry:
+  %c = load <16 x i16>* %a
+  %d = load <16 x i16>* %b
+  %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x i16> %shuffle
+}
diff --git a/test/CodeGen/X86/bc-extract.ll b/test/CodeGen/X86/bc-extract.ll
index ac972a8e2e5b..ceabcb71a78b 100644
--- a/test/CodeGen/X86/bc-extract.ll
+++ b/test/CodeGen/X86/bc-extract.ll
@@ -11,7 +11,7 @@ entry:
 
 define float @extractFloat2() nounwind {
 entry:
-  ; CHECK: pxor	%xmm0, %xmm0
+  ; CHECK: xorps	%xmm0, %xmm0
   %tmp4 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
   %tmp5 = extractelement <2 x float> %tmp4, i32 1
   ret float %tmp5
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
new file mode 100644
index 000000000000..3a10c70ada85
--- /dev/null
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -promote-elements -mattr=+sse41 | FileCheck %s
+
+
+; In this test we check that sign-extend of the mask bit is performed by
+; shifting the needed bit to the MSB, and not using shl+sra.
+
+;CHECK: vsel_float
+;CHECK: pslld
+;CHECK-NEXT: blendvps
+;CHECK: ret
+define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
+  ret <4 x float> %vsel
+}
+
+;CHECK: vsel_4xi8
+;CHECK: pslld
+;CHECK-NEXT: blendvps
+;CHECK: ret
+define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
+  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i8> %v1, <4 x i8> %v2
+  ret <4 x i8> %vsel
+}
+
+
+; We do not have native support for v8i16 blends and we have to use the
+; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not r
+; reduce the mask in this case.
+;CHECK: vsel_8xi16
+;CHECK: psllw
+;CHECK: psraw
+;CHECK: pblendvb
+;CHECK: ret
+define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
+  %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
+  ret <8 x i16> %vsel
+}
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
new file mode 100644
index 000000000000..167d522d47d8
--- /dev/null
+++ b/test/CodeGen/X86/block-placement.ll
@@ -0,0 +1,930 @@
+; RUN: llc -mtriple=i686-linux -enable-block-placement < %s | FileCheck %s
+
+declare void @error(i32 %i, i32 %a, i32 %b)
+
+define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
+; Test a chain of ifs, where the block guarded by the if is error handling code
+; that is not expected to run.
+; CHECK: test_ifchains:
+; CHECK: %entry
+; CHECK: %else1
+; CHECK: %else2
+; CHECK: %else3
+; CHECK: %else4
+; CHECK: %exit
+; CHECK: %then1
+; CHECK: %then2
+; CHECK: %then3
+; CHECK: %then4
+; CHECK: %then5
+
+entry:
+  %gep1 = getelementptr i32* %a, i32 1
+  %val1 = load i32* %gep1
+  %cond1 = icmp ugt i32 %val1, 1
+  br i1 %cond1, label %then1, label %else1, !prof !0
+
+then1:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else1
+
+else1:
+  %gep2 = getelementptr i32* %a, i32 2
+  %val2 = load i32* %gep2
+  %cond2 = icmp ugt i32 %val2, 2
+  br i1 %cond2, label %then2, label %else2, !prof !0
+
+then2:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else2
+
+else2:
+  %gep3 = getelementptr i32* %a, i32 3
+  %val3 = load i32* %gep3
+  %cond3 = icmp ugt i32 %val3, 3
+  br i1 %cond3, label %then3, label %else3, !prof !0
+
+then3:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else3
+
+else3:
+  %gep4 = getelementptr i32* %a, i32 4
+  %val4 = load i32* %gep4
+  %cond4 = icmp ugt i32 %val4, 4
+  br i1 %cond4, label %then4, label %else4, !prof !0
+
+then4:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %else4
+
+else4:
+  %gep5 = getelementptr i32* %a, i32 3
+  %val5 = load i32* %gep5
+  %cond5 = icmp ugt i32 %val5, 3
+  br i1 %cond5, label %then5, label %exit, !prof !0
+
+then5:
+  call void @error(i32 %i, i32 1, i32 %b)
+  br label %exit
+
+exit:
+  ret i32 %b
+}
+
+define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
+; Check that we sink cold loop blocks after the hot loop body.
+; CHECK: test_loop_cold_blocks:
+; CHECK: %entry
+; CHECK: %body1
+; CHECK: %body2
+; CHECK: %body3
+; CHECK: %unlikely1
+; CHECK: %unlikely2
+; CHECK: %exit
+
+entry:
+  br label %body1
+
+body1:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body3 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body3 ]
+  %unlikelycond1 = icmp slt i32 %base, 42
+  br i1 %unlikelycond1, label %unlikely1, label %body2, !prof !0
+
+unlikely1:
+  call void @error(i32 %i, i32 1, i32 %base)
+  br label %body2
+
+body2:
+  %unlikelycond2 = icmp sgt i32 %base, 21
+  br i1 %unlikelycond2, label %unlikely2, label %body3, !prof !0
+
+unlikely2:
+  call void @error(i32 %i, i32 2, i32 %base)
+  br label %body3
+
+body3:
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body1
+
+exit:
+  ret i32 %sum
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+
+define i32 @test_loop_early_exits(i32 %i, i32* %a) {
+; Check that we sink early exit blocks out of loop bodies.
+; CHECK: test_loop_early_exits:
+; CHECK: %entry
+; CHECK: %body2
+; CHECK: %body3
+; CHECK: %body4
+; CHECK: %body1
+; CHECK: %bail1
+; CHECK: %bail2
+; CHECK: %bail3
+; CHECK: %exit
+
+entry:
+  br label %body1
+
+body1:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body4 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body4 ]
+  %bailcond1 = icmp eq i32 %base, 42
+  br i1 %bailcond1, label %bail1, label %body2
+
+bail1:
+  ret i32 -1
+
+body2:
+  %bailcond2 = icmp eq i32 %base, 43
+  br i1 %bailcond2, label %bail2, label %body3
+
+bail2:
+  ret i32 -2
+
+body3:
+  %bailcond3 = icmp eq i32 %base, 44
+  br i1 %bailcond3, label %bail3, label %body4
+
+bail3:
+  ret i32 -3
+
+body4:
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body1
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @test_loop_rotate(i32 %i, i32* %a) {
+; Check that we rotate conditional exits from the loop to the bottom of the
+; loop, eliminating unconditional branches to the top.
+; CHECK: test_loop_rotate:
+; CHECK: %entry
+; CHECK: %body1
+; CHECK: %body0
+; CHECK: %exit
+
+entry:
+  br label %body0
+
+body0:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body1 ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body1
+
+body1:
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %bailcond1 = icmp eq i32 %sum, 42
+  br label %body0
+
+exit:
+  ret i32 %base
+}
+
+define void @test_loop_rotate_reversed_blocks() {
+; This test case (greatly reduced from an Olden bencmark) ensures that the loop
+; rotate implementation doesn't assume that loops are laid out in a particular
+; order. The first loop will get split into two basic blocks, with the loop
+; header coming after the loop latch.
+;
+; CHECK: test_loop_rotate_reversed_blocks
+; CHECK: %entry
+; Look for a jump into the middle of the loop, and no branches mid-way.
+; CHECK: jmp
+; CHECK: %loop1
+; CHECK-NOT: j{{\w*}} .LBB{{.*}}
+; CHECK: %loop1
+; CHECK: je
+
+entry:
+  %cond1 = load volatile i1* undef
+  br i1 %cond1, label %loop2.preheader, label %loop1
+
+loop1:
+  call i32 @f()
+  %cond2 = load volatile i1* undef
+  br i1 %cond2, label %loop2.preheader, label %loop1
+
+loop2.preheader:
+  call i32 @f()
+  %cond3 = load volatile i1* undef
+  br i1 %cond3, label %exit, label %loop2
+
+loop2:
+  call i32 @f()
+  %cond4 = load volatile i1* undef
+  br i1 %cond4, label %exit, label %loop2
+
+exit:
+  ret void
+}
+
+define i32 @test_loop_align(i32 %i, i32* %a) {
+; Check that we provide basic loop body alignment with the block placement
+; pass.
+; CHECK: test_loop_align:
+; CHECK: %entry
+; CHECK: .align [[ALIGN:[0-9]+]],
+; CHECK-NEXT: %body
+; CHECK: %exit
+
+entry:
+  br label %body
+
+body:
+  %iv = phi i32 [ 0, %entry ], [ %next, %body ]
+  %base = phi i32 [ 0, %entry ], [ %sum, %body ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %0 = load i32* %arrayidx
+  %sum = add nsw i32 %0, %base
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %body
+
+exit:
+  ret i32 %sum
+}
+
+define i32 @test_nested_loop_align(i32 %i, i32* %a, i32* %b) {
+; Check that we provide nested loop body alignment.
+; CHECK: test_nested_loop_align:
+; CHECK: %entry
+; CHECK: .align [[ALIGN]],
+; CHECK-NEXT: %loop.body.1
+; CHECK: .align [[ALIGN]],
+; CHECK-NEXT: %inner.loop.body
+; CHECK-NOT: .align
+; CHECK: %exit
+
+entry:
+  br label %loop.body.1
+
+loop.body.1:
+  %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ]
+  %arrayidx = getelementptr inbounds i32* %a, i32 %iv
+  %bidx = load i32* %arrayidx
+  br label %inner.loop.body
+
+inner.loop.body:
+  %inner.iv = phi i32 [ 0, %loop.body.1 ], [ %inner.next, %inner.loop.body ]
+  %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ]
+  %scaled_idx = mul i32 %bidx, %iv
+  %inner.arrayidx = getelementptr inbounds i32* %b, i32 %scaled_idx
+  %0 = load i32* %inner.arrayidx
+  %sum = add nsw i32 %0, %base
+  %inner.next = add i32 %iv, 1
+  %inner.exitcond = icmp eq i32 %inner.next, %i
+  br i1 %inner.exitcond, label %loop.body.2, label %inner.loop.body
+
+loop.body.2:
+  %next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %next, %i
+  br i1 %exitcond, label %exit, label %loop.body.1
+
+exit:
+  ret i32 %sum
+}
+
+define void @unnatural_cfg1() {
+; Test that we can handle a loop with an inner unnatural loop at the end of
+; a function. This is a gross CFG reduced out of the single source GCC.
+; CHECK: unnatural_cfg1
+; CHECK: %entry
+; CHECK: %loop.body1
+; CHECK: %loop.body2
+; CHECK: %loop.body3
+
+entry:
+  br label %loop.header
+
+loop.header:
+  br label %loop.body1
+
+loop.body1:
+  br i1 undef, label %loop.body3, label %loop.body2
+
+loop.body2:
+  %ptr = load i32** undef, align 4
+  br label %loop.body3
+
+loop.body3:
+  %myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ]
+  %bcmyptr = bitcast i32* %myptr to i32*
+  %val = load i32* %bcmyptr, align 4
+  %comp = icmp eq i32 %val, 48
+  br i1 %comp, label %loop.body4, label %loop.body5
+
+loop.body4:
+  br i1 undef, label %loop.header, label %loop.body5
+
+loop.body5:
+  %ptr2 = load i32** undef, align 4
+  br label %loop.body3
+}
+
+define void @unnatural_cfg2() {
+; Test that we can handle a loop with a nested natural loop *and* an unnatural
+; loop. This was reduced from a crash on block placement when run over
+; single-source GCC.
+; CHECK: unnatural_cfg2
+; CHECK: %entry
+; CHECK: %loop.body1
+; CHECK: %loop.body2
+; CHECK: %loop.header
+; CHECK: %loop.body3
+; CHECK: %loop.inner1.begin
+; The end block is folded with %loop.body3...
+; CHECK-NOT: %loop.inner1.end
+; CHECK: %loop.body4
+; CHECK: %loop.inner2.begin
+; The loop.inner2.end block is folded
+; CHECK: %bail
+
+entry:
+  br label %loop.header
+
+loop.header:
+  %comp0 = icmp eq i32* undef, null
+  br i1 %comp0, label %bail, label %loop.body1
+
+loop.body1:
+  %val0 = load i32** undef, align 4
+  br i1 undef, label %loop.body2, label %loop.inner1.begin
+
+loop.body2:
+  br i1 undef, label %loop.body4, label %loop.body3
+
+loop.body3:
+  %ptr1 = getelementptr inbounds i32* %val0, i32 0
+  %castptr1 = bitcast i32* %ptr1 to i32**
+  %val1 = load i32** %castptr1, align 4
+  br label %loop.inner1.begin
+
+loop.inner1.begin:
+  %valphi = phi i32* [ %val2, %loop.inner1.end ], [ %val1, %loop.body3 ], [ %val0, %loop.body1 ]
+  %castval = bitcast i32* %valphi to i32*
+  %comp1 = icmp eq i32 undef, 48
+  br i1 %comp1, label %loop.inner1.end, label %loop.body4
+
+loop.inner1.end:
+  %ptr2 = getelementptr inbounds i32* %valphi, i32 0
+  %castptr2 = bitcast i32* %ptr2 to i32**
+  %val2 = load i32** %castptr2, align 4
+  br label %loop.inner1.begin
+
+loop.body4.dead:
+  br label %loop.body4
+
+loop.body4:
+  %comp2 = icmp ult i32 undef, 3
+  br i1 %comp2, label %loop.inner2.begin, label %loop.end
+
+loop.inner2.begin:
+  br i1 false, label %loop.end, label %loop.inner2.end
+
+loop.inner2.end:
+  %comp3 = icmp eq i32 undef, 1769472
+  br i1 %comp3, label %loop.end, label %loop.inner2.begin
+
+loop.end:
+  br label %loop.header
+
+bail:
+  unreachable
+}
+
+define i32 @problematic_switch() {
+; This function's CFG caused overlow in the machine branch probability
+; calculation, triggering asserts. Make sure we don't crash on it.
+; CHECK: problematic_switch
+
+entry:
+  switch i32 undef, label %exit [
+    i32 879, label %bogus
+    i32 877, label %step
+    i32 876, label %step
+    i32 875, label %step
+    i32 874, label %step
+    i32 873, label %step
+    i32 872, label %step
+    i32 868, label %step
+    i32 867, label %step
+    i32 866, label %step
+    i32 861, label %step
+    i32 860, label %step
+    i32 856, label %step
+    i32 855, label %step
+    i32 854, label %step
+    i32 831, label %step
+    i32 830, label %step
+    i32 829, label %step
+    i32 828, label %step
+    i32 815, label %step
+    i32 814, label %step
+    i32 811, label %step
+    i32 806, label %step
+    i32 805, label %step
+    i32 804, label %step
+    i32 803, label %step
+    i32 802, label %step
+    i32 801, label %step
+    i32 800, label %step
+    i32 799, label %step
+    i32 798, label %step
+    i32 797, label %step
+    i32 796, label %step
+    i32 795, label %step
+  ]
+bogus:
+  unreachable
+step:
+  br label %exit
+exit:
+  %merge = phi i32 [ 3, %step ], [ 6, %entry ]
+  ret i32 %merge
+}
+
+define void @fpcmp_unanalyzable_branch(i1 %cond) {
+; This function's CFG contains an unanalyzable branch that is likely to be
+; split due to having a different high-probability predecessor.
+; CHECK: fpcmp_unanalyzable_branch
+; CHECK: %entry
+; CHECK: %exit
+; CHECK-NOT: %if.then
+; CHECK-NOT: %if.end
+; CHECK-NOT: jne
+; CHECK-NOT: jnp
+; CHECK: jne
+; CHECK-NEXT: jnp
+; CHECK-NEXT: %if.then
+
+entry:
+; Note that this branch must be strongly biased toward
+; 'entry.if.then_crit_edge' to ensure that we would try to form a chain for
+; 'entry' -> 'entry.if.then_crit_edge' -> 'if.then'. It is the last edge in that
+; chain which would violate the unanalyzable branch in 'exit', but we won't even
+; try this trick unless 'if.then' is believed to almost always be reached from
+; 'entry.if.then_crit_edge'.
+  br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
+
+entry.if.then_crit_edge:
+  %.pre14 = load i8* undef, align 1, !tbaa !0
+  br label %if.then
+
+lor.lhs.false:
+  br i1 undef, label %if.end, label %exit
+
+exit:
+  %cmp.i = fcmp une double 0.000000e+00, undef
+  br i1 %cmp.i, label %if.then, label %if.end
+
+if.then:
+  %0 = phi i8 [ %.pre14, %entry.if.then_crit_edge ], [ undef, %exit ]
+  %1 = and i8 %0, 1
+  store i8 %1, i8* undef, align 4, !tbaa !0
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+!1 = metadata !{metadata !"branch_weights", i32 1000, i32 1}
+
+declare i32 @f()
+declare i32 @g()
+declare i32 @h(i32 %x)
+
+define i32 @test_global_cfg_break_profitability() {
+; Check that our metrics for the profitability of a CFG break are global rather
+; than local. A successor may be very hot, but if the current block isn't, it
+; doesn't matter. Within this test the 'then' block is slightly warmer than the
+; 'else' block, but not nearly enough to merit merging it with the exit block
+; even though the probability of 'then' branching to the 'exit' block is very
+; high.
+; CHECK: test_global_cfg_break_profitability
+; CHECK: calll {{_?}}f
+; CHECK: calll {{_?}}g
+; CHECK: calll {{_?}}h
+; CHECK: ret
+
+entry:
+  br i1 undef, label %then, label %else, !prof !2
+
+then:
+  %then.result = call i32 @f()
+  br label %exit
+
+else:
+  %else.result = call i32 @g()
+  br label %exit
+
+exit:
+  %result = phi i32 [ %then.result, %then ], [ %else.result, %else ]
+  %result2 = call i32 @h(i32 %result)
+  ret i32 %result
+}
+
+!2 = metadata !{metadata !"branch_weights", i32 3, i32 1}
+
+declare i32 @__gxx_personality_v0(...)
+
+define void @test_eh_lpad_successor() {
+; Some times the landing pad ends up as the first successor of an invoke block.
+; When this happens, a strange result used to fall out of updateTerminators: we
+; didn't correctly locate the fallthrough successor, assuming blindly that the
+; first one was the fallthrough successor. As a result, we would add an
+; erroneous jump to the landing pad thinking *that* was the default successor.
+; CHECK: test_eh_lpad_successor
+; CHECK: %entry
+; CHECK-NOT: jmp
+; CHECK: %loop
+
+entry:
+  invoke i32 @f() to label %preheader unwind label %lpad
+
+preheader:
+  br label %loop
+
+lpad:
+  %lpad.val = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  resume { i8*, i32 } %lpad.val
+
+loop:
+  br label %loop
+}
+
+declare void @fake_throw() noreturn
+
+define void @test_eh_throw() {
+; For blocks containing a 'throw' (or similar functionality), we have
+; a no-return invoke. In this case, only EH successors will exist, and
+; fallthrough simply won't occur. Make sure we don't crash trying to update
+; terminators for such constructs.
+;
+; CHECK: test_eh_throw
+; CHECK: %entry
+; CHECK: %cleanup
+
+entry:
+  invoke void @fake_throw() to label %continue unwind label %cleanup
+
+continue:
+  unreachable
+
+cleanup:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  unreachable
+}
+
+define void @test_unnatural_cfg_backwards_inner_loop() {
+; Test that when we encounter an unnatural CFG structure after having formed
+; a chain for an inner loop which happened to be laid out backwards we don't
+; attempt to merge onto the wrong end of the inner loop just because we find it
+; first. This was reduced from a crasher in GCC's single source.
+;
+; CHECK: test_unnatural_cfg_backwards_inner_loop
+; CHECK: %entry
+; CHECK: %body
+; CHECK: %loop2b
+; CHECK: %loop1
+; CHECK: %loop2a
+
+entry:
+  br i1 undef, label %loop2a, label %body
+
+body:
+  br label %loop2a
+
+loop1:
+  %next.load = load i32** undef
+  br i1 %comp.a, label %loop2a, label %loop2b
+
+loop2a:
+  %var = phi i32* [ null, %entry ], [ null, %body ], [ %next.phi, %loop1 ]
+  %next.var = phi i32* [ null, %entry ], [ undef, %body ], [ %next.load, %loop1 ]
+  %comp.a = icmp eq i32* %var, null
+  br label %loop3
+
+loop2b:
+  %gep = getelementptr inbounds i32* %var.phi, i32 0
+  %next.ptr = bitcast i32* %gep to i32**
+  store i32* %next.phi, i32** %next.ptr
+  br label %loop3
+
+loop3:
+  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+  br label %loop1
+}
+
+define void @unanalyzable_branch_to_loop_header() {
+; Ensure that we can handle unanalyzable branches into loop headers. We
+; pre-form chains for unanalyzable branches, and will find the tail end of that
+; at the start of the loop. This function uses floating point comparison
+; fallthrough because that happens to always produce unanalyzable branches on
+; x86.
+;
+; CHECK: unanalyzable_branch_to_loop_header
+; CHECK: %entry
+; CHECK: %loop
+; CHECK: %exit
+
+entry:
+  %cmp = fcmp une double 0.000000e+00, undef
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+  %cond = icmp eq i8 undef, 42
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @unanalyzable_branch_to_best_succ(i1 %cond) {
+; Ensure that we can handle unanalyzable branches where the destination block
+; gets selected as the optimal sucessor to merge.
+;
+; CHECK: unanalyzable_branch_to_best_succ
+; CHECK: %entry
+; CHECK: %foo
+; CHECK: %bar
+; CHECK: %exit
+
+entry:
+  ; Bias this branch toward bar to ensure we form that chain.
+  br i1 %cond, label %bar, label %foo, !prof !1
+
+foo:
+  %cmp = fcmp une double 0.000000e+00, undef
+  br i1 %cmp, label %bar, label %exit
+
+bar:
+  call i32 @f()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @unanalyzable_branch_to_free_block(float %x) {
+; Ensure that we can handle unanalyzable branches where the destination block
+; gets selected as the best free block in the CFG.
+;
+; CHECK: unanalyzable_branch_to_free_block
+; CHECK: %entry
+; CHECK: %a
+; CHECK: %b
+; CHECK: %c
+; CHECK: %exit
+
+entry:
+  br i1 undef, label %a, label %b
+
+a:
+  call i32 @f()
+  br label %c
+
+b:
+  %cmp = fcmp une float %x, undef
+  br i1 %cmp, label %c, label %exit
+
+c:
+  call i32 @g()
+  br label %exit
+
+exit:
+  ret void
+}
+
+define void @many_unanalyzable_branches() {
+; Ensure that we don't crash as we're building up many unanalyzable branches,
+; blocks, and loops.
+;
+; CHECK: many_unanalyzable_branches
+; CHECK: %entry
+; CHECK: %exit
+
+entry:
+  br label %0
+
+  %val0 = load volatile float* undef
+  %cmp0 = fcmp une float %val0, undef
+  br i1 %cmp0, label %1, label %0
+  %val1 = load volatile float* undef
+  %cmp1 = fcmp une float %val1, undef
+  br i1 %cmp1, label %2, label %1
+  %val2 = load volatile float* undef
+  %cmp2 = fcmp une float %val2, undef
+  br i1 %cmp2, label %3, label %2
+  %val3 = load volatile float* undef
+  %cmp3 = fcmp une float %val3, undef
+  br i1 %cmp3, label %4, label %3
+  %val4 = load volatile float* undef
+  %cmp4 = fcmp une float %val4, undef
+  br i1 %cmp4, label %5, label %4
+  %val5 = load volatile float* undef
+  %cmp5 = fcmp une float %val5, undef
+  br i1 %cmp5, label %6, label %5
+  %val6 = load volatile float* undef
+  %cmp6 = fcmp une float %val6, undef
+  br i1 %cmp6, label %7, label %6
+  %val7 = load volatile float* undef
+  %cmp7 = fcmp une float %val7, undef
+  br i1 %cmp7, label %8, label %7
+  %val8 = load volatile float* undef
+  %cmp8 = fcmp une float %val8, undef
+  br i1 %cmp8, label %9, label %8
+  %val9 = load volatile float* undef
+  %cmp9 = fcmp une float %val9, undef
+  br i1 %cmp9, label %10, label %9
+  %val10 = load volatile float* undef
+  %cmp10 = fcmp une float %val10, undef
+  br i1 %cmp10, label %11, label %10
+  %val11 = load volatile float* undef
+  %cmp11 = fcmp une float %val11, undef
+  br i1 %cmp11, label %12, label %11
+  %val12 = load volatile float* undef
+  %cmp12 = fcmp une float %val12, undef
+  br i1 %cmp12, label %13, label %12
+  %val13 = load volatile float* undef
+  %cmp13 = fcmp une float %val13, undef
+  br i1 %cmp13, label %14, label %13
+  %val14 = load volatile float* undef
+  %cmp14 = fcmp une float %val14, undef
+  br i1 %cmp14, label %15, label %14
+  %val15 = load volatile float* undef
+  %cmp15 = fcmp une float %val15, undef
+  br i1 %cmp15, label %16, label %15
+  %val16 = load volatile float* undef
+  %cmp16 = fcmp une float %val16, undef
+  br i1 %cmp16, label %17, label %16
+  %val17 = load volatile float* undef
+  %cmp17 = fcmp une float %val17, undef
+  br i1 %cmp17, label %18, label %17
+  %val18 = load volatile float* undef
+  %cmp18 = fcmp une float %val18, undef
+  br i1 %cmp18, label %19, label %18
+  %val19 = load volatile float* undef
+  %cmp19 = fcmp une float %val19, undef
+  br i1 %cmp19, label %20, label %19
+  %val20 = load volatile float* undef
+  %cmp20 = fcmp une float %val20, undef
+  br i1 %cmp20, label %21, label %20
+  %val21 = load volatile float* undef
+  %cmp21 = fcmp une float %val21, undef
+  br i1 %cmp21, label %22, label %21
+  %val22 = load volatile float* undef
+  %cmp22 = fcmp une float %val22, undef
+  br i1 %cmp22, label %23, label %22
+  %val23 = load volatile float* undef
+  %cmp23 = fcmp une float %val23, undef
+  br i1 %cmp23, label %24, label %23
+  %val24 = load volatile float* undef
+  %cmp24 = fcmp une float %val24, undef
+  br i1 %cmp24, label %25, label %24
+  %val25 = load volatile float* undef
+  %cmp25 = fcmp une float %val25, undef
+  br i1 %cmp25, label %26, label %25
+  %val26 = load volatile float* undef
+  %cmp26 = fcmp une float %val26, undef
+  br i1 %cmp26, label %27, label %26
+  %val27 = load volatile float* undef
+  %cmp27 = fcmp une float %val27, undef
+  br i1 %cmp27, label %28, label %27
+  %val28 = load volatile float* undef
+  %cmp28 = fcmp une float %val28, undef
+  br i1 %cmp28, label %29, label %28
+  %val29 = load volatile float* undef
+  %cmp29 = fcmp une float %val29, undef
+  br i1 %cmp29, label %30, label %29
+  %val30 = load volatile float* undef
+  %cmp30 = fcmp une float %val30, undef
+  br i1 %cmp30, label %31, label %30
+  %val31 = load volatile float* undef
+  %cmp31 = fcmp une float %val31, undef
+  br i1 %cmp31, label %32, label %31
+  %val32 = load volatile float* undef
+  %cmp32 = fcmp une float %val32, undef
+  br i1 %cmp32, label %33, label %32
+  %val33 = load volatile float* undef
+  %cmp33 = fcmp une float %val33, undef
+  br i1 %cmp33, label %34, label %33
+  %val34 = load volatile float* undef
+  %cmp34 = fcmp une float %val34, undef
+  br i1 %cmp34, label %35, label %34
+  %val35 = load volatile float* undef
+  %cmp35 = fcmp une float %val35, undef
+  br i1 %cmp35, label %36, label %35
+  %val36 = load volatile float* undef
+  %cmp36 = fcmp une float %val36, undef
+  br i1 %cmp36, label %37, label %36
+  %val37 = load volatile float* undef
+  %cmp37 = fcmp une float %val37, undef
+  br i1 %cmp37, label %38, label %37
+  %val38 = load volatile float* undef
+  %cmp38 = fcmp une float %val38, undef
+  br i1 %cmp38, label %39, label %38
+  %val39 = load volatile float* undef
+  %cmp39 = fcmp une float %val39, undef
+  br i1 %cmp39, label %40, label %39
+  %val40 = load volatile float* undef
+  %cmp40 = fcmp une float %val40, undef
+  br i1 %cmp40, label %41, label %40
+  %val41 = load volatile float* undef
+  %cmp41 = fcmp une float %val41, undef
+  br i1 %cmp41, label %42, label %41
+  %val42 = load volatile float* undef
+  %cmp42 = fcmp une float %val42, undef
+  br i1 %cmp42, label %43, label %42
+  %val43 = load volatile float* undef
+  %cmp43 = fcmp une float %val43, undef
+  br i1 %cmp43, label %44, label %43
+  %val44 = load volatile float* undef
+  %cmp44 = fcmp une float %val44, undef
+  br i1 %cmp44, label %45, label %44
+  %val45 = load volatile float* undef
+  %cmp45 = fcmp une float %val45, undef
+  br i1 %cmp45, label %46, label %45
+  %val46 = load volatile float* undef
+  %cmp46 = fcmp une float %val46, undef
+  br i1 %cmp46, label %47, label %46
+  %val47 = load volatile float* undef
+  %cmp47 = fcmp une float %val47, undef
+  br i1 %cmp47, label %48, label %47
+  %val48 = load volatile float* undef
+  %cmp48 = fcmp une float %val48, undef
+  br i1 %cmp48, label %49, label %48
+  %val49 = load volatile float* undef
+  %cmp49 = fcmp une float %val49, undef
+  br i1 %cmp49, label %50, label %49
+  %val50 = load volatile float* undef
+  %cmp50 = fcmp une float %val50, undef
+  br i1 %cmp50, label %51, label %50
+  %val51 = load volatile float* undef
+  %cmp51 = fcmp une float %val51, undef
+  br i1 %cmp51, label %52, label %51
+  %val52 = load volatile float* undef
+  %cmp52 = fcmp une float %val52, undef
+  br i1 %cmp52, label %53, label %52
+  %val53 = load volatile float* undef
+  %cmp53 = fcmp une float %val53, undef
+  br i1 %cmp53, label %54, label %53
+  %val54 = load volatile float* undef
+  %cmp54 = fcmp une float %val54, undef
+  br i1 %cmp54, label %55, label %54
+  %val55 = load volatile float* undef
+  %cmp55 = fcmp une float %val55, undef
+  br i1 %cmp55, label %56, label %55
+  %val56 = load volatile float* undef
+  %cmp56 = fcmp une float %val56, undef
+  br i1 %cmp56, label %57, label %56
+  %val57 = load volatile float* undef
+  %cmp57 = fcmp une float %val57, undef
+  br i1 %cmp57, label %58, label %57
+  %val58 = load volatile float* undef
+  %cmp58 = fcmp une float %val58, undef
+  br i1 %cmp58, label %59, label %58
+  %val59 = load volatile float* undef
+  %cmp59 = fcmp une float %val59, undef
+  br i1 %cmp59, label %60, label %59
+  %val60 = load volatile float* undef
+  %cmp60 = fcmp une float %val60, undef
+  br i1 %cmp60, label %61, label %60
+  %val61 = load volatile float* undef
+  %cmp61 = fcmp une float %val61, undef
+  br i1 %cmp61, label %62, label %61
+  %val62 = load volatile float* undef
+  %cmp62 = fcmp une float %val62, undef
+  br i1 %cmp62, label %63, label %62
+  %val63 = load volatile float* undef
+  %cmp63 = fcmp une float %val63, undef
+  br i1 %cmp63, label %64, label %63
+  %val64 = load volatile float* undef
+  %cmp64 = fcmp une float %val64, undef
+  br i1 %cmp64, label %65, label %64
+
+  br label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index 88c09e3acdc8..43c47c0fa8a5 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -1,40 +1,65 @@
-; RUN: llc < %s -march=x86-64 -mattr=+bmi | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=+bmi,+bmi2 | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-       %tmp = tail call i32 @llvm.cttz.i32( i32 %x )
-       ret i32 %tmp
+declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
+
+define i8 @t1(i8 %x) nounwind  {
+  %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 false )
+  ret i8 %tmp
 ; CHECK: t1:
 ; CHECK: tzcntl
 }
 
-declare i32 @llvm.cttz.i32(i32) nounwind readnone
-
 define i16 @t2(i16 %x) nounwind  {
-       %tmp = tail call i16 @llvm.cttz.i16( i16 %x )
-       ret i16 %tmp
+  %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 false )
+  ret i16 %tmp
 ; CHECK: t2:
 ; CHECK: tzcntw
 }
 
-declare i16 @llvm.cttz.i16(i16) nounwind readnone
-
-define i64 @t3(i64 %x) nounwind  {
-       %tmp = tail call i64 @llvm.cttz.i64( i64 %x )
-       ret i64 %tmp
+define i32 @t3(i32 %x) nounwind  {
+  %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 false )
+  ret i32 %tmp
 ; CHECK: t3:
+; CHECK: tzcntl
+}
+
+define i64 @t4(i64 %x) nounwind  {
+  %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 false )
+  ret i64 %tmp
+; CHECK: t4:
 ; CHECK: tzcntq
 }
 
-declare i64 @llvm.cttz.i64(i64) nounwind readnone
+define i8 @t5(i8 %x) nounwind  {
+  %tmp = tail call i8 @llvm.cttz.i8( i8 %x, i1 true )
+  ret i8 %tmp
+; CHECK: t5:
+; CHECK: tzcntl
+}
 
-define i8 @t4(i8 %x) nounwind  {
-       %tmp = tail call i8 @llvm.cttz.i8( i8 %x )
-       ret i8 %tmp
-; CHECK: t4:
+define i16 @t6(i16 %x) nounwind  {
+  %tmp = tail call i16 @llvm.cttz.i16( i16 %x, i1 true )
+  ret i16 %tmp
+; CHECK: t6:
 ; CHECK: tzcntw
 }
 
-declare i8 @llvm.cttz.i8(i8) nounwind readnone
+define i32 @t7(i32 %x) nounwind  {
+  %tmp = tail call i32 @llvm.cttz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: t7:
+; CHECK: tzcntl
+}
+
+define i64 @t8(i64 %x) nounwind  {
+  %tmp = tail call i64 @llvm.cttz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: t8:
+; CHECK: tzcntq
+}
 
 define i32 @andn32(i32 %x, i32 %y) nounwind readnone {
   %tmp1 = xor i32 %x, -1
@@ -51,3 +76,124 @@ define i64 @andn64(i64 %x, i64 %y) nounwind readnone {
 ; CHECK: andn64:
 ; CHECK: andnq
 }
+
+define i32 @bextr32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: bextr32:
+; CHECK: bextrl
+}
+
+declare i32 @llvm.x86.bmi.bextr.32(i32, i32) nounwind readnone
+
+define i64 @bextr64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.bextr.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: bextr64:
+; CHECK: bextrq
+}
+
+declare i64 @llvm.x86.bmi.bextr.64(i64, i64) nounwind readnone
+
+define i32 @bzhi32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: bzhi32:
+; CHECK: bzhil
+}
+
+declare i32 @llvm.x86.bmi.bzhi.32(i32, i32) nounwind readnone
+
+define i64 @bzhi64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: bzhi64:
+; CHECK: bzhiq
+}
+
+declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
+
+define i32 @blsi32(i32 %x) nounwind readnone {
+  %tmp = sub i32 0, %x
+  %tmp2 = and i32 %x, %tmp
+  ret i32 %tmp2
+; CHECK: blsi32:
+; CHECK: blsil
+}
+
+define i64 @blsi64(i64 %x) nounwind readnone {
+  %tmp = sub i64 0, %x
+  %tmp2 = and i64 %tmp, %x
+  ret i64 %tmp2
+; CHECK: blsi64:
+; CHECK: blsiq
+}
+
+define i32 @blsmsk32(i32 %x) nounwind readnone {
+  %tmp = sub i32 %x, 1
+  %tmp2 = xor i32 %x, %tmp
+  ret i32 %tmp2
+; CHECK: blsmsk32:
+; CHECK: blsmskl
+}
+
+define i64 @blsmsk64(i64 %x) nounwind readnone {
+  %tmp = sub i64 %x, 1
+  %tmp2 = xor i64 %tmp, %x
+  ret i64 %tmp2
+; CHECK: blsmsk64:
+; CHECK: blsmskq
+}
+
+define i32 @blsr32(i32 %x) nounwind readnone {
+  %tmp = sub i32 %x, 1
+  %tmp2 = and i32 %x, %tmp
+  ret i32 %tmp2
+; CHECK: blsr32:
+; CHECK: blsrl
+}
+
+define i64 @blsr64(i64 %x) nounwind readnone {
+  %tmp = sub i64 %x, 1
+  %tmp2 = and i64 %tmp, %x
+  ret i64 %tmp2
+; CHECK: blsr64:
+; CHECK: blsrq
+}
+
+define i32 @pdep32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: pdep32:
+; CHECK: pdepl
+}
+
+declare i32 @llvm.x86.bmi.pdep.32(i32, i32) nounwind readnone
+
+define i64 @pdep64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.pdep.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: pdep64:
+; CHECK: pdepq
+}
+
+declare i64 @llvm.x86.bmi.pdep.64(i64, i64) nounwind readnone
+
+define i32 @pext32(i32 %x, i32 %y) nounwind readnone {
+  %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y)
+  ret i32 %tmp
+; CHECK: pext32:
+; CHECK: pextl
+}
+
+declare i32 @llvm.x86.bmi.pext.32(i32, i32) nounwind readnone
+
+define i64 @pext64(i64 %x, i64 %y) nounwind readnone {
+  %tmp = tail call i64 @llvm.x86.bmi.pext.64(i64 %x, i64 %y)
+  ret i64 %tmp
+; CHECK: pext64:
+; CHECK: pextq
+}
+
+declare i64 @llvm.x86.bmi.pext.64(i64, i64) nounwind readnone
+
diff --git a/test/CodeGen/X86/brcond.ll b/test/CodeGen/X86/brcond.ll
index 5cdc1000f3cd..44670c802b41 100644
--- a/test/CodeGen/X86/brcond.ll
+++ b/test/CodeGen/X86/brcond.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=penryn | FileCheck %s
+
 ; rdar://7475489
 
 define i32 @test1(i32 %a, i32 %b) nounwind ssp {
@@ -106,3 +107,4 @@ bb2:                                              ; preds = %entry, %bb1
   %.0 = fptrunc double %.0.in to float            ; <float> [#uses=1]
   ret float %.0
 }
+
diff --git a/test/CodeGen/X86/btq.ll b/test/CodeGen/X86/btq.ll
new file mode 100644
index 000000000000..9c137a7239bb
--- /dev/null
+++ b/test/CodeGen/X86/btq.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+declare void @bar()
+
+define void @test1(i64 %foo) nounwind {
+  %and = and i64 %foo, 4294967296
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: test1:
+; CHECK: btq $32
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @test2(i64 %foo) nounwind {
+  %and = and i64 %foo, 2147483648
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+; CHECK: test2:
+; CHECK: testl $-2147483648
+
+if.then:
+  tail call void @bar() nounwind
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/byval6.ll b/test/CodeGen/X86/byval6.ll
index b060369a182e..2d3990167f2e 100644
--- a/test/CodeGen/X86/byval6.ll
+++ b/test/CodeGen/X86/byval6.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | grep add | not grep 16
+; RUN: llc < %s -mcpu=generic -march=x86 | grep add | not grep 16
 
 	%struct.W = type { x86_fp80, x86_fp80 }
 @B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
diff --git a/test/CodeGen/X86/cfstring.ll b/test/CodeGen/X86/cfstring.ll
new file mode 100644
index 000000000000..7420ce730475
--- /dev/null
+++ b/test/CodeGen/X86/cfstring.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/10564621>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+; Make sure that the string ends up the the correct section.
+
+; CHECK:        .section __TEXT,__cstring
+; CHECK-NEXT: l_.str3:
+
+; CHECK:        .section  __DATA,__cfstring
+; CHECK-NEXT:   .align  4
+; CHECK-NEXT: L__unnamed_cfstring_4:
+; CHECK-NEXT:   .quad  ___CFConstantStringClassReference
+; CHECK-NEXT:   .long  1992
+; CHECK-NEXT:   .space  4
+; CHECK-NEXT:   .quad  l_.str3
+; CHECK-NEXT:   .long  0
+; CHECK-NEXT:   .space  4
+
+@isLogVisible = global i8 0, align 1
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
+@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
+
+define linkonce_odr void @bar() nounwind ssp align 2 {
+entry:
+  %stack = alloca i8*, align 4
+  %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
+  store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
diff --git a/test/CodeGen/X86/change-compare-stride-0.ll b/test/CodeGen/X86/change-compare-stride-0.ll
deleted file mode 100644
index 439f7b0d4f6c..000000000000
--- a/test/CodeGen/X86/change-compare-stride-0.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: llc < %s -march=x86 -enable-lsr-nested | FileCheck %s
-;
-; Nested LSR is required to optimize this case.
-; We do not expect to see this form of IR without -enable-iv-rewrite.
-
-define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
-; CHECK: borf:
-; CHECK-NOT: inc
-; CHECK-NOT: leal 1(
-; CHECK-NOT: leal -1(
-; CHECK: decl
-; CHECK-NEXT: cmpl $-478
-; CHECK: ret
-
-bb4.thread:
-	br label %bb2.outer
-
-bb2.outer:		; preds = %bb4, %bb4.thread
-	%indvar18 = phi i32 [ 0, %bb4.thread ], [ %indvar.next28, %bb4 ]		; <i32> [#uses=3]
-	%tmp34 = mul i32 %indvar18, 65535		; <i32> [#uses=1]
-	%i.0.reg2mem.0.ph = add i32 %tmp34, 639		; <i32> [#uses=1]
-	%0 = and i32 %i.0.reg2mem.0.ph, 65535		; <i32> [#uses=1]
-	%1 = mul i32 %0, 480		; <i32> [#uses=1]
-	%tmp20 = mul i32 %indvar18, -478		; <i32> [#uses=1]
-	br label %bb2
-
-bb2:		; preds = %bb2, %bb2.outer
-	%indvar = phi i32 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ]		; <i32> [#uses=3]
-	%ctg2 = getelementptr i8* %out, i32 %tmp20		; <i8*> [#uses=1]
-	%tmp21 = ptrtoint i8* %ctg2 to i32		; <i32> [#uses=1]
-	%tmp23 = sub i32 %tmp21, %indvar		; <i32> [#uses=1]
-	%out_addr.0.reg2mem.0 = inttoptr i32 %tmp23 to i8*		; <i8*> [#uses=1]
-	%tmp25 = mul i32 %indvar, 65535		; <i32> [#uses=1]
-	%j.0.reg2mem.0 = add i32 %tmp25, 479		; <i32> [#uses=1]
-	%2 = and i32 %j.0.reg2mem.0, 65535		; <i32> [#uses=1]
-	%3 = add i32 %1, %2		; <i32> [#uses=9]
-	%4 = add i32 %3, -481		; <i32> [#uses=1]
-	%5 = getelementptr i8* %in, i32 %4		; <i8*> [#uses=1]
-	%6 = load i8* %5, align 1		; <i8> [#uses=1]
-	%7 = add i32 %3, -480		; <i32> [#uses=1]
-	%8 = getelementptr i8* %in, i32 %7		; <i8*> [#uses=1]
-	%9 = load i8* %8, align 1		; <i8> [#uses=1]
-	%10 = add i32 %3, -479		; <i32> [#uses=1]
-	%11 = getelementptr i8* %in, i32 %10		; <i8*> [#uses=1]
-	%12 = load i8* %11, align 1		; <i8> [#uses=1]
-	%13 = add i32 %3, -1		; <i32> [#uses=1]
-	%14 = getelementptr i8* %in, i32 %13		; <i8*> [#uses=1]
-	%15 = load i8* %14, align 1		; <i8> [#uses=1]
-	%16 = getelementptr i8* %in, i32 %3		; <i8*> [#uses=1]
-	%17 = load i8* %16, align 1		; <i8> [#uses=1]
-	%18 = add i32 %3, 1		; <i32> [#uses=1]
-	%19 = getelementptr i8* %in, i32 %18		; <i8*> [#uses=1]
-	%20 = load i8* %19, align 1		; <i8> [#uses=1]
-	%21 = add i32 %3, 481		; <i32> [#uses=1]
-	%22 = getelementptr i8* %in, i32 %21		; <i8*> [#uses=1]
-	%23 = load i8* %22, align 1		; <i8> [#uses=1]
-	%24 = add i32 %3, 480		; <i32> [#uses=1]
-	%25 = getelementptr i8* %in, i32 %24		; <i8*> [#uses=1]
-	%26 = load i8* %25, align 1		; <i8> [#uses=1]
-	%27 = add i32 %3, 479		; <i32> [#uses=1]
-	%28 = getelementptr i8* %in, i32 %27		; <i8*> [#uses=1]
-	%29 = load i8* %28, align 1		; <i8> [#uses=1]
-	%30 = add i8 %9, %6		; <i8> [#uses=1]
-	%31 = add i8 %30, %12		; <i8> [#uses=1]
-	%32 = add i8 %31, %15		; <i8> [#uses=1]
-	%33 = add i8 %32, %17		; <i8> [#uses=1]
-	%34 = add i8 %33, %20		; <i8> [#uses=1]
-	%35 = add i8 %34, %23		; <i8> [#uses=1]
-	%36 = add i8 %35, %26		; <i8> [#uses=1]
-	%37 = add i8 %36, %29		; <i8> [#uses=1]
-	store i8 %37, i8* %out_addr.0.reg2mem.0, align 1
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next, 478		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb4, label %bb2
-
-bb4:		; preds = %bb2
-	%indvar.next28 = add i32 %indvar18, 1		; <i32> [#uses=2]
-	%exitcond29 = icmp eq i32 %indvar.next28, 638		; <i1> [#uses=1]
-	br i1 %exitcond29, label %return, label %bb2.outer
-
-return:		; preds = %bb4
-	ret void
-}
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
index 8b53ae2817c6..1c5c113a7232 100644
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -3,6 +3,10 @@
 ; Nested LSR is required to optimize this case.
 ; We do not expect to see this form of IR without -enable-iv-rewrite.
 
+; xfailed for now because the scheduler two-address hack has been disabled.
+; Now it's generating a leal -1 rather than a decq.
+; XFAIL: *
+
 define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
 ; CHECK: borf:
 ; CHECK-NOT: inc
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index d76fab4123bd..763079f3446f 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,48 +1,141 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=yonah | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
-	ret i32 %tmp
-; CHECK: t1:
-; CHECK: bsrl
-; CHECK: cmov
+declare i8 @llvm.cttz.i8(i8, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i8 @llvm.ctlz.i8(i8, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i8 @cttz_i8(i8 %x)  {
+  %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
+  ret i8 %tmp
+; CHECK: cttz_i8:
+; CHECK: bsfl
+; CHECK-NOT: cmov
+; CHECK: ret
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+define i16 @cttz_i16(i16 %x)  {
+  %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
+  ret i16 %tmp
+; CHECK: cttz_i16:
+; CHECK: bsfw
+; CHECK-NOT: cmov
+; CHECK: ret
+}
 
-define i32 @t2(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.cttz.i32( i32 %x )
-	ret i32 %tmp
-; CHECK: t2:
+define i32 @cttz_i32(i32 %x)  {
+  %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: cttz_i32:
 ; CHECK: bsfl
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: ret
+}
+
+define i64 @cttz_i64(i64 %x)  {
+  %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: cttz_i64:
+; CHECK: bsfq
+; CHECK-NOT: cmov
+; CHECK: ret
 }
 
-declare i32 @llvm.cttz.i32(i32) nounwind readnone 
+define i8 @ctlz_i8(i8 %x) {
+entry:
+  %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
+  ret i8 %tmp2
+; CHECK: ctlz_i8:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: xorl $7,
+; CHECK: ret
+}
 
-define i16 @t3(i16 %x, i16 %y) nounwind  {
+define i16 @ctlz_i16(i16 %x) {
 entry:
-        %tmp1 = add i16 %x, %y
-	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1 )		; <i16> [#uses=1]
-	ret i16 %tmp2
-; CHECK: t3:
+  %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
+  ret i16 %tmp2
+; CHECK: ctlz_i16:
 ; CHECK: bsrw
-; CHECK: cmov
+; CHECK-NOT: cmov
+; CHECK: xorl $15,
+; CHECK: ret
+}
+
+define i32 @ctlz_i32(i32 %x) {
+  %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+  ret i32 %tmp
+; CHECK: ctlz_i32:
+; CHECK: bsrl
+; CHECK-NOT: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+}
+
+define i64 @ctlz_i64(i64 %x) {
+  %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
+  ret i64 %tmp
+; CHECK: ctlz_i64:
+; CHECK: bsrq
+; CHECK-NOT: cmov
+; CHECK: xorq $63,
+; CHECK: ret
 }
 
-declare i16 @llvm.ctlz.i16(i16) nounwind readnone 
+define i32 @ctlz_i32_cmov(i32 %n) {
+entry:
+; Generate a cmov to handle zero inputs when necessary.
+; CHECK: ctlz_i32_cmov:
+; CHECK: bsrl
+; CHECK: cmov
+; CHECK: xorl $31,
+; CHECK: ret
+  %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  ret i32 %tmp1
+}
 
+define i32 @ctlz_i32_fold_cmov(i32 %n) {
+entry:
 ; Don't generate the cmovne when the source is known non-zero (and bsr would
 ; not set ZF).
 ; rdar://9490949
-
-define i32 @t4(i32 %n) nounwind {
-entry:
-; CHECK: t4:
+; CHECK: ctlz_i32_fold_cmov:
 ; CHECK: bsrl
 ; CHECK-NOT: cmov
+; CHECK: xorl $31,
 ; CHECK: ret
   %or = or i32 %n, 1
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %or)
+  %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
   ret i32 %tmp1
 }
+
+define i32 @ctlz_bsr(i32 %n) {
+entry:
+; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
+; the most significant bit, which is what 'bsr' does natively.
+; CHECK: ctlz_bsr:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
+
+define i32 @ctlz_bsr_cmov(i32 %n) {
+entry:
+; Same as ctlz_bsr, but ensure this happens even when there is a potential
+; zero.
+; CHECK: ctlz_bsr_cmov:
+; CHECK: bsrl
+; CHECK-NOT: xorl
+; CHECK: ret
+  %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
+  %bsr = xor i32 %ctlz, 31
+  ret i32 %bsr
+}
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 7a8d6e6a8a36..2e7ffbfd546d 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -84,7 +84,7 @@ entry:
   br i1 %3, label %func_4.exit.i, label %bb.i.i.i
 
 bb.i.i.i:                                         ; preds = %entry
-  %4 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  %4 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_4.exit.i
 
 ; CHECK: test4:
@@ -101,7 +101,7 @@ func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
   br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
 
 bb.i.i:                                           ; preds = %func_4.exit.i
-  %5 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  %5 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_1.exit
 
 func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
diff --git a/test/CodeGen/X86/cmpxchg16b.ll b/test/CodeGen/X86/cmpxchg16b.ll
index ba1c4ef9e225..edbd0bc9ded5 100644
--- a/test/CodeGen/X86/cmpxchg16b.ll
+++ b/test/CodeGen/X86/cmpxchg16b.ll
@@ -3,7 +3,7 @@
 ; Basic 128-bit cmpxchg
 define void @t1(i128* nocapture %p) nounwind ssp {
 entry:
-; CHECK movl	$1, %ebx
+; CHECK: movl	$1, %ebx
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg16b
   %r = cmpxchg i128* %p, i128 0, i128 1 seq_cst
diff --git a/test/CodeGen/X86/coalescer-commute1.ll b/test/CodeGen/X86/coalescer-commute1.ll
index 8aa0bfdd51fb..d9e0778102cb 100644
--- a/test/CodeGen/X86/coalescer-commute1.ll
+++ b/test/CodeGen/X86/coalescer-commute1.ll
@@ -21,6 +21,6 @@ bb:		; preds = %bb, %entry
 	br i1 %exitcond, label %bb13, label %bb
 
 bb13:		; preds = %bb
-	volatile store float %tmp6, float* @G, align 4
+	store volatile float %tmp6, float* @G, align 4
 	ret void
 }
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 153145728f58..cf6e27d15972 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -6,16 +6,16 @@
 ; Chain and flag folding issues.
 define i32 @test1() nounwind ssp {
 entry:
-  %tmp5.i = volatile load i32* undef              ; <i32> [#uses=1]
+  %tmp5.i = load volatile i32* undef              ; <i32> [#uses=1]
   %conv.i = zext i32 %tmp5.i to i64               ; <i64> [#uses=1]
-  %tmp12.i = volatile load i32* undef             ; <i32> [#uses=1]
+  %tmp12.i = load volatile i32* undef             ; <i32> [#uses=1]
   %conv13.i = zext i32 %tmp12.i to i64            ; <i64> [#uses=1]
   %shl.i = shl i64 %conv13.i, 32                  ; <i64> [#uses=1]
   %or.i = or i64 %shl.i, %conv.i                  ; <i64> [#uses=1]
   %add16.i = add i64 %or.i, 256                   ; <i64> [#uses=1]
   %shr.i = lshr i64 %add16.i, 8                   ; <i64> [#uses=1]
   %conv19.i = trunc i64 %shr.i to i32             ; <i32> [#uses=1]
-  volatile store i32 %conv19.i, i32* undef
+  store volatile i32 %conv19.i, i32* undef
   ret i32 undef
 }
 
diff --git a/test/CodeGen/X86/dbg-file-name.ll b/test/CodeGen/X86/dbg-file-name.ll
index 3a849aa54383..adf985461055 100644
--- a/test/CodeGen/X86/dbg-file-name.ll
+++ b/test/CodeGen/X86/dbg-file-name.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
+; RUN: llc -enable-dwarf-directory -mtriple x86_64-apple-darwin10.0.0  < %s | FileCheck %s
 
 ; Radar 8884898
-; CHECK: file	1 "/Users/manav/one/two{{/|\\\\}}simple.c"
+; CHECK: file	1 "simple.c"
 
 declare i32 @printf(i8*, ...) nounwind
 
diff --git a/test/CodeGen/X86/dbg-inline.ll b/test/CodeGen/X86/dbg-inline.ll
deleted file mode 100644
index 523c62e778cd..000000000000
--- a/test/CodeGen/X86/dbg-inline.ll
+++ /dev/null
@@ -1,140 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-; Radar 7881628, 9747970
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.7.0"
-
-%class.APFloat = type { i32 }
-
-define i32 @_ZNK7APFloat9partCountEv(%class.APFloat* nocapture %this) nounwind uwtable readonly optsize ssp align 2 {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !28), !dbg !41
-  %prec = getelementptr inbounds %class.APFloat* %this, i64 0, i32 0, !dbg !42
-  %tmp = load i32* %prec, align 4, !dbg !42, !tbaa !44
-  tail call void @llvm.dbg.value(metadata !{i32 %tmp}, i64 0, metadata !47), !dbg !48
-  %add.i = add i32 %tmp, 42, !dbg !49
-  ret i32 %add.i, !dbg !42
-}
-
-define zeroext i1 @_ZNK7APFloat14bitwiseIsEqualERKS_(%class.APFloat* %this, %class.APFloat* %rhs) uwtable optsize ssp align 2 {
-entry:
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !29), !dbg !51
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %rhs}, i64 0, metadata !30), !dbg !52
-  tail call void @llvm.dbg.value(metadata !{%class.APFloat* %this}, i64 0, metadata !53), !dbg !55
-  %prec.i = getelementptr inbounds %class.APFloat* %this, i64 0, i32 0, !dbg !56
-;CHECK: DW_TAG_inlined_subroutine
-;CHECK: DW_AT_abstract_origin
-;CHECK: DW_AT_ranges
-  %tmp.i = load i32* %prec.i, align 4, !dbg !56, !tbaa !44
-  tail call void @llvm.dbg.value(metadata !{i32 %tmp.i}, i64 0, metadata !57), !dbg !58
-  %add.i.i = add i32 %tmp.i, 42, !dbg !59
-  tail call void @llvm.dbg.value(metadata !{i32 %add.i.i}, i64 0, metadata !31), !dbg !54
-  %call2 = tail call i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat* %this) optsize, !dbg !60
-  tail call void @llvm.dbg.value(metadata !{i64* %call2}, i64 0, metadata !34), !dbg !60
-  %call3 = tail call i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat* %rhs) optsize, !dbg !61
-  tail call void @llvm.dbg.value(metadata !{i64* %call3}, i64 0, metadata !37), !dbg !61
-  %tmp = zext i32 %add.i.i to i64
-  br label %for.cond, !dbg !62
-
-for.cond:                                         ; preds = %for.inc, %entry
-  %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ]
-  %tmp13 = sub i64 %tmp, %indvar, !dbg !62
-  %i.0 = trunc i64 %tmp13 to i32, !dbg !62
-  %cmp = icmp sgt i32 %i.0, 0, !dbg !62
-  br i1 %cmp, label %for.body, label %return, !dbg !62
-
-for.body:                                         ; preds = %for.cond
-  %p.0 = getelementptr i64* %call2, i64 %indvar, !dbg !63
-  %tmp6 = load i64* %p.0, align 8, !dbg !63, !tbaa !66
-  %tmp8 = load i64* %call3, align 8, !dbg !63, !tbaa !66
-  %cmp9 = icmp eq i64 %tmp6, %tmp8, !dbg !63
-  br i1 %cmp9, label %for.inc, label %return, !dbg !63
-
-for.inc:                                          ; preds = %for.body
-  %indvar.next = add i64 %indvar, 1, !dbg !67
-  br label %for.cond, !dbg !67
-
-return:                                           ; preds = %for.cond, %for.body
-  %retval.0 = phi i1 [ false, %for.body ], [ true, %for.cond ]
-  ret i1 %retval.0, !dbg !68
-}
-
-declare i64* @_ZNK7APFloat16significandPartsEv(%class.APFloat*) optsize
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-!llvm.dbg.sp = !{!1, !7, !12, !23, !24, !25}
-!llvm.dbg.lv._ZNK7APFloat9partCountEv = !{!28}
-!llvm.dbg.lv._ZNK7APFloat14bitwiseIsEqualERKS_ = !{!29, !30, !31, !34, !37}
-!llvm.dbg.lv._ZL16partCountForBitsj = !{!38}
-!llvm.dbg.gv = !{!39}
-
-!0 = metadata !{i32 655377, i32 0, i32 4, metadata !"/Volumes/Athwagate/R9747970/apf.cc", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 136149)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{i32 655406, i32 0, metadata !2, metadata !"bitwiseIsEqual", metadata !"bitwiseIsEqual", metadata !"_ZNK7APFloat14bitwiseIsEqualERKS_", metadata !3, i32 8, metadata !19, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!2 = metadata !{i32 655362, metadata !0, metadata !"APFloat", metadata !3, i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !4, i32 0, null, null} ; [ DW_TAG_class_type ]
-!3 = metadata !{i32 655401, metadata !"/Volumes/Athwagate/R9747970/apf.cc", metadata !"/private/tmp", metadata !0} ; [ DW_TAG_file_type ]
-!4 = metadata !{metadata !5, metadata !1, metadata !7, metadata !12}
-!5 = metadata !{i32 655373, metadata !2, metadata !"prec", metadata !3, i32 13, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
-!6 = metadata !{i32 655396, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!7 = metadata !{i32 655406, i32 0, metadata !2, metadata !"partCount", metadata !"partCount", metadata !"_ZNK7APFloat9partCountEv", metadata !3, i32 9, metadata !8, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!8 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!9 = metadata !{metadata !6, metadata !10}
-!10 = metadata !{i32 655375, metadata !0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
-!11 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
-!12 = metadata !{i32 655406, i32 0, metadata !2, metadata !"significandParts", metadata !"significandParts", metadata !"_ZNK7APFloat16significandPartsEv", metadata !3, i32 11, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ]
-!13 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!14 = metadata !{metadata !15, metadata !10}
-!15 = metadata !{i32 655375, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
-!16 = metadata !{i32 655382, metadata !0, metadata !"integerPart", metadata !3, i32 2, i64 0, i64 0, i64 0, i32 0, metadata !17} ; [ DW_TAG_typedef ]
-!17 = metadata !{i32 655382, metadata !0, metadata !"uint64_t", metadata !3, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !18} ; [ DW_TAG_typedef ]
-!18 = metadata !{i32 655396, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
-!19 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !20, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!20 = metadata !{metadata !21, metadata !10, metadata !22}
-!21 = metadata !{i32 655396, metadata !0, metadata !"bool", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
-!22 = metadata !{i32 655376, metadata !0, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_reference_type ]
-!23 = metadata !{i32 655406, i32 0, metadata !0, metadata !"partCount", metadata !"partCount", metadata !"_ZNK7APFloat9partCountEv", metadata !3, i32 23, metadata !8, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%class.APFloat*)* @_ZNK7APFloat9partCountEv, null, metadata !7} ; [ DW_TAG_subprogram ]
-!24 = metadata !{i32 655406, i32 0, metadata !0, metadata !"bitwiseIsEqual", metadata !"bitwiseIsEqual", metadata !"_ZNK7APFloat14bitwiseIsEqualERKS_", metadata !3, i32 28, metadata !19, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i1 (%class.APFloat*, %class.APFloat*)* @_ZNK7APFloat14bitwiseIsEqualERKS_, null, metadata !1} ; [ DW_TAG_subprogram ]
-!25 = metadata !{i32 655406, i32 0, metadata !3, metadata !"partCountForBits", metadata !"partCountForBits", metadata !"", metadata !3, i32 17, metadata !26, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, null} ; [ DW_TAG_subprogram ]
-!26 = metadata !{i32 655381, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !27, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!27 = metadata !{metadata !6}
-!28 = metadata !{i32 655617, metadata !23, metadata !"this", metadata !3, i32 16777238, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!29 = metadata !{i32 655617, metadata !24, metadata !"this", metadata !3, i32 16777244, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
-!30 = metadata !{i32 655617, metadata !24, metadata !"rhs", metadata !3, i32 33554460, metadata !22, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!31 = metadata !{i32 655616, metadata !32, metadata !"i", metadata !3, i32 29, metadata !33, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!32 = metadata !{i32 655371, metadata !24, i32 28, i32 56, metadata !3, i32 1} ; [ DW_TAG_lexical_block ]
-!33 = metadata !{i32 655396, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!34 = metadata !{i32 655616, metadata !32, metadata !"p", metadata !3, i32 30, metadata !35, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!35 = metadata !{i32 655375, metadata !0, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !36} ; [ DW_TAG_pointer_type ]
-!36 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !16} ; [ DW_TAG_const_type ]
-!37 = metadata !{i32 655616, metadata !32, metadata !"q", metadata !3, i32 31, metadata !35, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!38 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
-!39 = metadata !{i32 655412, i32 0, metadata !3, metadata !"integerPartWidth", metadata !"integerPartWidth", metadata !"integerPartWidth", metadata !3, i32 3, metadata !40, i32 1, i32 1, i32 42} ; [ DW_TAG_variable ]
-!40 = metadata !{i32 655398, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_const_type ]
-!41 = metadata !{i32 22, i32 23, metadata !23, null}
-!42 = metadata !{i32 24, i32 10, metadata !43, null}
-!43 = metadata !{i32 655371, metadata !23, i32 23, i32 1, metadata !3, i32 0} ; [ DW_TAG_lexical_block ]
-!44 = metadata !{metadata !"int", metadata !45}
-!45 = metadata !{metadata !"omnipotent char", metadata !46}
-!46 = metadata !{metadata !"Simple C/C++ TBAA", null}
-!47 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, metadata !42} ; [ DW_TAG_arg_variable ]
-!48 = metadata !{i32 16, i32 58, metadata !25, metadata !42}
-!49 = metadata !{i32 18, i32 3, metadata !50, metadata !42}
-!50 = metadata !{i32 655371, metadata !25, i32 17, i32 1, metadata !3, i32 4} ; [ DW_TAG_lexical_block ]
-!51 = metadata !{i32 28, i32 15, metadata !24, null}
-!52 = metadata !{i32 28, i32 45, metadata !24, null}
-!53 = metadata !{i32 655617, metadata !23, metadata !"this", metadata !3, i32 16777238, metadata !10, i32 64, metadata !54} ; [ DW_TAG_arg_variable ]
-!54 = metadata !{i32 29, i32 10, metadata !32, null}
-!55 = metadata !{i32 22, i32 23, metadata !23, metadata !54}
-!56 = metadata !{i32 24, i32 10, metadata !43, metadata !54}
-!57 = metadata !{i32 655617, metadata !25, metadata !"bits", metadata !3, i32 16777232, metadata !6, i32 0, metadata !56} ; [ DW_TAG_arg_variable ]
-!58 = metadata !{i32 16, i32 58, metadata !25, metadata !56}
-!59 = metadata !{i32 18, i32 3, metadata !50, metadata !56}
-!60 = metadata !{i32 30, i32 24, metadata !32, null}
-!61 = metadata !{i32 31, i32 24, metadata !32, null}
-!62 = metadata !{i32 32, i32 3, metadata !32, null}
-!63 = metadata !{i32 33, i32 5, metadata !64, null}
-!64 = metadata !{i32 655371, metadata !65, i32 32, i32 25, metadata !3, i32 3} ; [ DW_TAG_lexical_block ]
-!65 = metadata !{i32 655371, metadata !32, i32 32, i32 3, metadata !3, i32 2} ; [ DW_TAG_lexical_block ]
-!66 = metadata !{metadata !"long long", metadata !45}
-!67 = metadata !{i32 32, i32 15, metadata !65, null}
-!68 = metadata !{i32 37, i32 1, metadata !32, null}
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index afe1729d506f..c35935f015ac 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin8"
 ;CHECK-NEXT:    .short  Lset
 ;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
-;CHECK-NEXT: Ltmp7
+;CHECK-NEXT: Ltmp5
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
 
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
new file mode 100644
index 000000000000..788910c7fe72
--- /dev/null
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -0,0 +1,37 @@
+; RUN: llc -O0 < %s | FileCheck %s
+; Radar 10464995
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+@s = common global [4294967296 x i8] zeroinitializer, align 16
+;CHECK: .long	4294967295
+
+define void @bar() nounwind uwtable ssp {
+entry:
+  store i8 97, i8* getelementptr inbounds ([4294967296 x i8]* @s, i32 0, i64 0), align 1, !dbg !18
+  ret void, !dbg !20
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"small.c", metadata !"/private/tmp", metadata !"clang version 3.1 (trunk 144833)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !11} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @bar, null, null, metadata !9} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"small.c", metadata !"/private/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 720948, i32 0, null, metadata !"s", metadata !"s", metadata !"", metadata !6, i32 2, metadata !14, i32 0, i32 1, [4294967296 x i8]* @s} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
+!15 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 720929, i64 0, i64 4294967295} ; [ DW_TAG_subrange_type ]
+!18 = metadata !{i32 5, i32 3, metadata !19, null}
+!19 = metadata !{i32 720907, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!20 = metadata !{i32 6, i32 1, metadata !19, null}
diff --git a/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
index 481c4ba4a49a..d248a4130355 100644
--- a/test/CodeGen/X86/dbg-value-inlined-parameter.ll
+++ b/test/CodeGen/X86/dbg-value-inlined-parameter.ll
@@ -8,7 +8,7 @@
 ;CHECK-NEXT: DW_AT_call_file
 ;CHECK-NEXT: DW_AT_call_line
 ;CHECK-NEXT: DW_TAG_formal_parameter
-;CHECK-NEXT: .ascii   "sp"                   ## DW_AT_name
+;CHECK-NEXT: Lstring11-Lsection_str ## DW_AT_name
 
 %struct.S1 = type { float*, i32 }
 
diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll
index a0e4d16246ff..05e29ecff03f 100644
--- a/test/CodeGen/X86/dbg-value-location.ll
+++ b/test/CodeGen/X86/dbg-value-location.ll
@@ -4,8 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 ;Radar 8950491
 
-;CHECK:        .ascii   "var"                  ## DW_AT_name
-;CHECK-NEXT:        .byte   0
+;CHECK: .long Lset5
 ;CHECK-NEXT:        ## DW_AT_decl_file
 ;CHECK-NEXT:        ## DW_AT_decl_line
 ;CHECK-NEXT:        ## DW_AT_type
diff --git a/test/CodeGen/X86/dg.exp b/test/CodeGen/X86/dg.exp
deleted file mode 100644
index 629a14773615..000000000000
--- a/test/CodeGen/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 87c1be51f1ac..e577ecb85aa8 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
 
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
new file mode 100644
index 000000000000..c64752c9522b
--- /dev/null
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -0,0 +1,16 @@
+; RUN: llc %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"empty.c", metadata !"/home/nlewycky", metadata !"clang version 3.1 (trunk 143523)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+
+; The important part of the following check is that dir = #0.
+;                        Dir  Mod Time   File Len   File Name
+;                        ---- ---------- ---------- ---------------------------
+; CHECK: file_names[  1]    0 0x00000000 0x00000000 empty.c
diff --git a/test/CodeGen/X86/empty-functions.ll b/test/CodeGen/X86/empty-functions.ll
index 874c53ac3b05..ac5174db5fc5 100644
--- a/test/CodeGen/X86/empty-functions.ll
+++ b/test/CodeGen/X86/empty-functions.ll
@@ -6,14 +6,11 @@ entry:
   unreachable
 }
 ; CHECK-NO-FP:     _func:
-; CHECK-NO-FP-NEXT: :
 ; CHECK-NO-FP-NEXT: .cfi_startproc
 ; CHECK-NO-FP:     nop
-; CHECK-NO-FP-NEXT: :
 ; CHECK-NO-FP-NEXT: .cfi_endproc
 
 ; CHECK-FP:      _func:
-; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_startproc
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: pushq %rbp
@@ -25,5 +22,4 @@ entry:
 ; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_def_cfa_register %rbp
 ; CHECK-FP-NEXT: nop
-; CHECK-FP-NEXT: :
 ; CHECK-FP-NEXT: .cfi_endproc
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 52dcb61d87f8..0f16a64ccd79 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 | not grep lea
-; RUN: llc < %s -march=x86 | grep {movl	%ebp}
+; RUN: llc < %s -mcpu=generic -march=x86 | not grep lea
+; RUN: llc < %s -mcpu=generic -march=x86 | grep {movl	%ebp}
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/f16c-intrinsics.ll b/test/CodeGen/X86/f16c-intrinsics.ll
new file mode 100644
index 000000000000..2135f9409cfe
--- /dev/null
+++ b/test/CodeGen/X86/f16c-intrinsics.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mattr=+avx,+f16c | FileCheck %s
+
+define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
+  ; CHECK: vcvtph2ps
+  %res = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) nounwind readonly
+
+
+define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
+  ; CHECK: vcvtph2ps
+  %res = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) nounwind readonly
+
+
+define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
+  ; CHECK: vcvtps2ph
+  %res = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) nounwind readonly
+
+
+define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
+  ; CHECK: vcvtps2ph
+  %res = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) nounwind readonly
diff --git a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
index e15182120094..e4982f054954 100644
--- a/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
+++ b/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN: llc < %s -mcpu=generic -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   grep {add	ESP, 8}
 
 target triple = "i686-pc-linux-gnu"
diff --git a/test/CodeGen/X86/fast-isel-bc.ll b/test/CodeGen/X86/fast-isel-bc.ll
index 4abc3b5b3c85..8ac15cdbc03f 100644
--- a/test/CodeGen/X86/fast-isel-bc.ll
+++ b/test/CodeGen/X86/fast-isel-bc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+; RUN: llc < %s -O0 -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
 ; PR4684
 
 target datalayout =
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 91d1f5d1518c..f0375f86028c 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -82,9 +82,8 @@ define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
   ret i64 %v11
 ; X64: test5:
 ; X64: movslq	%e[[A1]], %rax
-; X64-NEXT: movq	(%r[[A0]],%rax), %rax
-; X64-NEXT: addq	%{{rdx|r8}}, %rax
-; X64-NEXT: ret
+; X64-NEXT: (%r[[A0]],%rax),
+; X64: ret
 }
 
 ; PR9500, rdar://9156159 - Don't do non-local address mode folding,
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index 6a5a10295fbc..d8f4663c94e6 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -82,7 +82,7 @@ entry:
   ret i64 %mul
 
 ; CHECK: test6:
-; CHECK: leaq	(,%rdi,8), %rax
+; CHECK: shlq	$3, %rdi
 }
 
 define i32 @test7(i32 %x) nounwind ssp {
@@ -90,7 +90,7 @@ entry:
   %mul = mul nsw i32 %x, 8
   ret i32 %mul
 ; CHECK: test7:
-; CHECK: leal	(,%rdi,8), %eax
+; CHECK: shll	$3, %edi
 }
 
 
@@ -225,18 +225,20 @@ if.else:                                          ; preds = %entry
 ; CHECK-NEXT: je 
 }
 
-; Check that 0.0 is materialized using pxor
+; Check that 0.0 is materialized using xorps
 define void @test18(float* %p1) {
   store float 0.0, float* %p1
   ret void
 ; CHECK: test18:
-; CHECK: pxor
+; CHECK: xorps
 }
+
+; Without any type hints, doubles use the smaller xorps instead of xorpd.
 define void @test19(double* %p1) {
   store double 0.0, double* %p1
   ret void
 ; CHECK: test19:
-; CHECK: pxor
+; CHECK: xorps
 }
 
 ; Check that we fast-isel sret
@@ -252,12 +254,12 @@ entry:
 }
 declare void @test20sret(%struct.a* sret)
 
-; Check that -0.0 is not materialized using pxor
+; Check that -0.0 is not materialized using xor
 define void @test21(double* %p1) {
   store double -0.0, double* %p1
   ret void
 ; CHECK: test21:
-; CHECK-NOT: pxor
+; CHECK-NOT: xor
 ; CHECK: movsd	LCPI
 }
 
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 19972f74b2ba..b9598bb465ce 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc -fast-isel -O0 -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -fast-isel -O0 -mcpu=generic -mtriple=i386-apple-darwin10 -relocation-model=pic < %s | FileCheck %s
 
 ; This should use flds to set the return value.
 ; CHECK: test0:
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 8391860756a4..c88d52968dd8 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -99,7 +99,6 @@ define void @load_store_i1(i1* %p, i1* %q) nounwind {
   ret void
 }
 
-
 @crash_test1x = external global <2 x i32>, align 8
 
 define void @crash_test1() nounwind ssp {
@@ -108,3 +107,13 @@ define void @crash_test1() nounwind ssp {
   ret void
 }
 
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+define i64* @life() nounwind {
+  %a1 = alloca i64*, align 8
+  %a2 = bitcast i64** %a1 to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %a2) nounwind      
+  %a3 = load i64** %a1, align 8
+  ret i64* %a3
+}
+
diff --git a/test/CodeGen/X86/fdiv.ll b/test/CodeGen/X86/fdiv.ll
new file mode 100644
index 000000000000..0749682e2f68
--- /dev/null
+++ b/test/CodeGen/X86/fdiv.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | FileCheck %s
+
+define double @exact(double %x) {
+; Exact division by a constant converted to multiplication.
+; CHECK: @exact
+; CHECK: mulsd
+  %div = fdiv double %x, 2.0
+  ret double %div
+}
+
+define double @inexact(double %x) {
+; Inexact division by a constant converted to multiplication.
+; CHECK: @inexact
+; CHECK: mulsd
+  %div = fdiv double %x, 0x41DFFFFFFFC00000 
+  ret double %div
+}
+
+define double @funky(double %x) {
+; No conversion to multiplication if too funky.
+; CHECK: @funky
+; CHECK: divsd
+  %div = fdiv double %x, 0.0
+  ret double %div
+}
+
+define double @denormal1(double %x) {
+; Don't generate multiplication by a denormal.
+; CHECK: @denormal1
+; CHECK: divsd
+  %div = fdiv double %x, 0x7FD0000000000001
+  ret double %div
+}
+
+define double @denormal2(double %x) {
+; Don't generate multiplication by a denormal.
+; CHECK: @denormal
+; CHECK: divsd
+  %div = fdiv double %x, 0x7FEFFFFFFFFFFFFF
+  ret double %div
+}
diff --git a/test/CodeGen/X86/fltused.ll b/test/CodeGen/X86/fltused.ll
index 2ffcb966782a..81511a33f5cb 100644
--- a/test/CodeGen/X86/fltused.ll
+++ b/test/CodeGen/X86/fltused.ll
@@ -4,6 +4,8 @@
 
 ; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
 ; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+; RUN: llc < %s -O0 -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -O0 -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
 
 @.str = private constant [4 x i8] c"%f\0A\00"
 
diff --git a/test/CodeGen/X86/fltused_function_pointer.ll b/test/CodeGen/X86/fltused_function_pointer.ll
new file mode 100644
index 000000000000..cfe484a8c258
--- /dev/null
+++ b/test/CodeGen/X86/fltused_function_pointer.ll
@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+; RUN: llc < %s -O0 -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -O0 -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @foo(i32 (i8*, ...)* %f) nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* %f(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  ret i32 0
+}
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
new file mode 100644
index 000000000000..5ed03ef01f3c
--- /dev/null
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -0,0 +1,295 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4 | FileCheck %s
+
+; VFMADD
+define < 4 x float > @test_x86_fma4_vfmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
+  ; CHECK: vfmaddss (%{{.*}})
+  %x = load float *%a2
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
+  %x = load float *%a1
+  %y = insertelement <4 x float> undef, float %x, i32 0
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
+  ; CHECK: vfmaddsd (%{{.*}})
+  %x = load double *%a2
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
+  %x = load double *%a1
+  %y = insertelement <2 x double> undef, double %x, i32 0
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
+  ; CHECK: vfmaddps (%{{.*}})
+  %x = load <4 x float>* %a2
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+define < 4 x float > @test_x86_fma4_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
+  %x = load <4 x float>* %a1
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
+  ; CHECK: vfmaddpd (%{{.*}})
+  %x = load <2 x double>* %a2
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+define < 2 x double > @test_x86_fma4_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
+  %x = load <2 x double>* %a1
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMSUB
+define < 4 x float > @test_x86_fma4_vfmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubss
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFNMADD
+define < 4 x float > @test_x86_fma4_vfnmadd_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmaddss
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmadd_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmaddsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfnmadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfnmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfnmaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfnmadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfnmadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfnmaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfnmadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFNMSUB
+define < 4 x float > @test_x86_fma4_vfnmsub_ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmsubss
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmsub.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmsub_sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmsubsd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmsub.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 4 x float > @test_x86_fma4_vfnmsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfnmsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfnmsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfnmsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfnmsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfnmsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfnmsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfnmsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfnmsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfnmsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfnmsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfnmsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMADDSUB
+define < 4 x float > @test_x86_fma4_vfmaddsub_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmaddsubps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmaddsub.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmaddsub_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmaddsubpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmaddsub.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmaddsub_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmaddsubps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmaddsub.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmaddsub_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmaddsubpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmaddsub.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
+
+; VFMSUBADD
+define < 4 x float > @test_x86_fma4_vfmsubadd_ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) {
+  ; CHECK: vfmsubaddps
+  %res = call < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+declare < 4 x float > @llvm.x86.fma4.vfmsubadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+
+define < 2 x double > @test_x86_fma4_vfmsubadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
+  ; CHECK: vfmsubaddpd
+  %res = call < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+declare < 2 x double > @llvm.x86.fma4.vfmsubadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+
+define < 8 x float > @test_x86_fma4_vfmsubadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
+  ; CHECK: vfmsubaddps
+  ; CHECK: ymm
+  %res = call < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) ; <i64> [#uses=1]
+  ret < 8 x float > %res
+}
+declare < 8 x float > @llvm.x86.fma4.vfmsubadd.ps.256(< 8 x float >, < 8 x float >, < 8 x float >) nounwind readnone
+
+define < 4 x double > @test_x86_fma4_vfmsubadd_pd_256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) {
+  ; CHECK: vfmsubaddpd
+  ; CHECK: ymm
+  %res = call < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double > %a0, < 4 x double > %a1, < 4 x double > %a2) ; <i64> [#uses=1]
+  ret < 4 x double > %res
+}
+declare < 4 x double > @llvm.x86.fma4.vfmsubadd.pd.256(< 4 x double >, < 4 x double >, < 4 x double >) nounwind readnone
diff --git a/test/CodeGen/X86/fold-and-shift.ll b/test/CodeGen/X86/fold-and-shift.ll
index 9f79f7723b33..93baa0e0eee0 100644
--- a/test/CodeGen/X86/fold-and-shift.ll
+++ b/test/CodeGen/X86/fold-and-shift.ll
@@ -1,21 +1,77 @@
-; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @t1(i8* %X, i32 %i) {
+; CHECK: t1:
+; CHECK-NOT: and
+; CHECK: movzbl
+; CHECK: movl (%{{...}},%{{...}},4),
+; CHECK: ret
+
 entry:
-	%tmp2 = shl i32 %i, 2		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp2, 1020		; <i32> [#uses=1]
-	%tmp7 = getelementptr i8* %X, i32 %tmp4		; <i8*> [#uses=1]
-	%tmp78 = bitcast i8* %tmp7 to i32*		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
-	ret i32 %tmp9
+  %tmp2 = shl i32 %i, 2
+  %tmp4 = and i32 %tmp2, 1020
+  %tmp7 = getelementptr i8* %X, i32 %tmp4
+  %tmp78 = bitcast i8* %tmp7 to i32*
+  %tmp9 = load i32* %tmp78
+  ret i32 %tmp9
 }
 
 define i32 @t2(i16* %X, i32 %i) {
+; CHECK: t2:
+; CHECK-NOT: and
+; CHECK: movzwl
+; CHECK: movl (%{{...}},%{{...}},4),
+; CHECK: ret
+
+entry:
+  %tmp2 = shl i32 %i, 1
+  %tmp4 = and i32 %tmp2, 131070
+  %tmp7 = getelementptr i16* %X, i32 %tmp4
+  %tmp78 = bitcast i16* %tmp7 to i32*
+  %tmp9 = load i32* %tmp78
+  ret i32 %tmp9
+}
+
+define i32 @t3(i16* %i.ptr, i32* %arr) {
+; This case is tricky. The lshr followed by a gep will produce a lshr followed
+; by an and to remove the low bits. This can be simplified by doing the lshr by
+; a greater constant and using the addressing mode to scale the result back up.
+; To make matters worse, because of the two-phase zext of %i and their reuse in
+; the function, the DAG can get confusing trying to re-use both of them and
+; prevent easy analysis of the mask in order to match this.
+; CHECK: t3:
+; CHECK-NOT: and
+; CHECK: shrl
+; CHECK: addl (%{{...}},%{{...}},4),
+; CHECK: ret
+
+entry:
+  %i = load i16* %i.ptr
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %val.ptr = getelementptr inbounds i32* %arr, i32 %index
+  %val = load i32* %val.ptr
+  %sum = add i32 %val, %i.zext
+  ret i32 %sum
+}
+
+define i32 @t4(i16* %i.ptr, i32* %arr) {
+; A version of @t3 that has more zero extends and more re-use of intermediate
+; values. This exercise slightly different bits of canonicalization.
+; CHECK: t4:
+; CHECK-NOT: and
+; CHECK: shrl
+; CHECK: addl (%{{...}},%{{...}},4),
+; CHECK: ret
+
 entry:
-	%tmp2 = shl i32 %i, 1		; <i32> [#uses=1]
-	%tmp4 = and i32 %tmp2, 131070		; <i32> [#uses=1]
-	%tmp7 = getelementptr i16* %X, i32 %tmp4		; <i16*> [#uses=1]
-	%tmp78 = bitcast i16* %tmp7 to i32*		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
-	ret i32 %tmp9
+  %i = load i16* %i.ptr
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %index.zext = zext i32 %index to i64
+  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+  %val = load i32* %val.ptr
+  %sum.1 = add i32 %val, %i.zext
+  %sum.2 = add i32 %sum.1, %index
+  ret i32 %sum.2
 }
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index 5525af25270f..e03cb7edb580 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
 	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
 @stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll
index 647bbdb7f0fd..1d315ffe359b 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-0.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
+; DISABLED: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
+
+; i386 test has been disabled when scheduler 2-addr hack is disabled.
 
 ; This testcase shouldn't need to spill the -1 value,
 ; so it should just use pcmpeqd to materialize an all-ones vector.
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 9f8d9903810d..9cf4607cf5b2 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -1,15 +1,14 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=linearscan | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=basic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=basic | FileCheck %s
 
 ; This testcase should need to spill the -1 value on both x86-32 and x86-64,
 ; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
 ; should use a constant-pool load instead.
+;
+; RAGreedy defeats the test by splitting live ranges.
 
 ; Constant pool all-ones vector:
-; CHECK: .long 4294967295
-; CHECK-NEXT: .long 4294967295
-; CHECK-NEXT: .long 4294967295
-; CHECK-NEXT: .long 4294967295
+; CHECK: .space 16,255
 
 ; No pcmpeqd instructions, everybody uses the constant pool.
 ; CHECK: program_1:
diff --git a/test/CodeGen/X86/fp-stack-O0.ll b/test/CodeGen/X86/fp-stack-O0.ll
index b9cb5d7894c7..df90254dbd27 100644
--- a/test/CodeGen/X86/fp-stack-O0.ll
+++ b/test/CodeGen/X86/fp-stack-O0.ll
@@ -10,7 +10,7 @@ declare i32 @x2(x86_fp80, x86_fp80) nounwind
 ; Pass arguments on the stack.
 ; CHECK-NEXT: movq %rsp, [[RCX:%r..]]
 ; Copy constant-pool value.
-; CHECK-NEXT: fldt LCPI
+; CHECK-NEXT: fldl LCPI
 ; CHECK-NEXT: fstpt 16([[RCX]])
 ; Copy x1 return value.
 ; CHECK-NEXT: fstpt ([[RCX]])
diff --git a/test/CodeGen/X86/fp-stack-ret-conv.ll b/test/CodeGen/X86/fp-stack-ret-conv.ll
index f220b24f90b0..3e26141eca6b 100644
--- a/test/CodeGen/X86/fp-stack-ret-conv.ll
+++ b/test/CodeGen/X86/fp-stack-ret-conv.ll
@@ -10,7 +10,7 @@ entry:
 	%tmp13 = tail call double @foo()
 	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
 	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
-	volatile store double %tmp3940, double* %b
+	store volatile double %tmp3940, double* %b
 	ret void
 }
 
diff --git a/test/CodeGen/X86/fsgsbase.ll b/test/CodeGen/X86/fsgsbase.ll
new file mode 100644
index 000000000000..0c22e3c7db29
--- /dev/null
+++ b/test/CodeGen/X86/fsgsbase.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=core-avx-i -mattr=fsgsbase | FileCheck %s
+
+define i32 @test_x86_rdfsbase_32() {
+  ; CHECK: rdfsbasel
+  %res = call i32 @llvm.x86.rdfsbase.32()
+  ret i32 %res
+}
+declare i32 @llvm.x86.rdfsbase.32() nounwind readnone
+
+define i32 @test_x86_rdgsbase_32() {
+  ; CHECK: rdgsbasel
+  %res = call i32 @llvm.x86.rdgsbase.32()
+  ret i32 %res
+}
+declare i32 @llvm.x86.rdgsbase.32() nounwind readnone
+
+define i64 @test_x86_rdfsbase_64() {
+  ; CHECK: rdfsbaseq
+  %res = call i64 @llvm.x86.rdfsbase.64()
+  ret i64 %res
+}
+declare i64 @llvm.x86.rdfsbase.64() nounwind readnone
+
+define i64 @test_x86_rdgsbase_64() {
+  ; CHECK: rdgsbaseq
+  %res = call i64 @llvm.x86.rdgsbase.64()
+  ret i64 %res
+}
+declare i64 @llvm.x86.rdgsbase.64() nounwind readnone
+
+define void @test_x86_wrfsbase_32(i32 %x) {
+  ; CHECK: wrfsbasel
+  call void @llvm.x86.wrfsbase.32(i32 %x)
+  ret void
+}
+declare void @llvm.x86.wrfsbase.32(i32) nounwind readnone
+
+define void @test_x86_wrgsbase_32(i32 %x) {
+  ; CHECK: wrgsbasel
+  call void @llvm.x86.wrgsbase.32(i32 %x)
+  ret void
+}
+declare void @llvm.x86.wrgsbase.32(i32) nounwind readnone
+
+define void @test_x86_wrfsbase_64(i64 %x) {
+  ; CHECK: wrfsbaseq
+  call void @llvm.x86.wrfsbase.64(i64 %x)
+  ret void
+}
+declare void @llvm.x86.wrfsbase.64(i64) nounwind readnone
+
+define void @test_x86_wrgsbase_64(i64 %x) {
+  ; CHECK: wrgsbaseq
+  call void @llvm.x86.wrgsbase.64(i64 %x)
+  ret void
+}
+declare void @llvm.x86.wrgsbase.64(i64) nounwind readnone
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
new file mode 100644
index 000000000000..d89e9dca33d1
--- /dev/null
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple x86_64-apple-darwin %s -o - | FileCheck %s
+@_ZTIi = external constant i8*
+
+define i32 @main() uwtable optsize ssp {
+entry:
+  invoke void @_Z1fv() optsize
+          to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  br label %eh.resume
+
+try.cont:
+  ret i32 0
+
+eh.resume:
+  resume { i8*, i32 } %0
+}
+
+declare void @_Z1fv() optsize
+
+declare i32 @__gxx_personality_v0(...)
+
+; CHECK: Leh_func_end0:
+; CHECK: GCC_except_table0
+; CHECK: = Leh_func_end0-
diff --git a/test/CodeGen/X86/haddsub.ll b/test/CodeGen/X86/haddsub.ll
index 91758ead636b..5f1f4fd8f76d 100644
--- a/test/CodeGen/X86/haddsub.ll
+++ b/test/CodeGen/X86/haddsub.ll
@@ -192,3 +192,94 @@ define <4 x float> @hsubps4(<4 x float> %x) {
   %r = fsub <4 x float> %a, %b
   ret <4 x float> %r
 }
+
+; SSE3: vhaddps1:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps1:
+; AVX: vhaddps
+define <8 x float> @vhaddps1(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddps2:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps2:
+; AVX: vhaddps
+define <8 x float> @vhaddps2(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 2, i32 9, i32 10, i32 5, i32 6, i32 13, i32 14>
+  %b = shufflevector <8 x float> %y, <8 x float> %x, <8 x i32> <i32 8, i32 11, i32 0, i32 3, i32 12, i32 15, i32 4, i32 7>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddps3:
+; SSE3-NOT: vhaddps
+; SSE3: haddps
+; SSE3: haddps
+; AVX: vhaddps3:
+; AVX: vhaddps
+define <8 x float> @vhaddps3(<8 x float> %x) {
+  %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = fadd <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhsubps1:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; SSE3: hsubps
+; AVX: vhsubps1:
+; AVX: vhsubps
+define <8 x float> @vhsubps1(<8 x float> %x, <8 x float> %y) {
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
+  %r = fsub <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhsubps3:
+; SSE3-NOT: vhsubps
+; SSE3: hsubps
+; SSE3: hsubps
+; AVX: vhsubps3:
+; AVX: vhsubps
+define <8 x float> @vhsubps3(<8 x float> %x) {
+  %a = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 8, i32 10, i32 4, i32 6, i32 undef, i32 14>
+  %b = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 9, i32 undef, i32 5, i32 7, i32 13, i32 15>
+  %r = fsub <8 x float> %a, %b
+  ret <8 x float> %r
+}
+
+; SSE3: vhaddpd1:
+; SSE3-NOT: vhaddpd
+; SSE3: haddpd
+; SSE3: haddpd
+; AVX: vhaddpd1:
+; AVX: vhaddpd
+define <4 x double> @vhaddpd1(<4 x double> %x, <4 x double> %y) {
+  %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %r = fadd <4 x double> %a, %b
+  ret <4 x double> %r
+}
+
+; SSE3: vhsubpd1:
+; SSE3-NOT: vhsubpd
+; SSE3: hsubpd
+; SSE3: hsubpd
+; AVX: vhsubpd1:
+; AVX: vhsubpd
+define <4 x double> @vhsubpd1(<4 x double> %x, <4 x double> %y) {
+  %a = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %b = shufflevector <4 x double> %x, <4 x double> %y, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %r = fsub <4 x double> %a, %b
+  ret <4 x double> %r
+}
diff --git a/test/CodeGen/X86/hoist-invariant-load.ll b/test/CodeGen/X86/hoist-invariant-load.ll
new file mode 100644
index 000000000000..4289fa7cc254
--- /dev/null
+++ b/test/CodeGen/X86/hoist-invariant-load.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -stats -O2 |& grep "1 machine-licm"
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__DATA, __objc_imageinfo, regular, no_dead_strip"
+@llvm.used = appending global [3 x i8*] [i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata"
+
+define void @test(i8* %x) uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %0 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !invariant.load !0
+  %call = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %x, i8* %0)
+  %inc = add i32 %i.01, 1
+  %exitcond = icmp eq i32 %inc, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+
+!0 = metadata !{}
diff --git a/test/CodeGen/X86/i128-sdiv.ll b/test/CodeGen/X86/i128-sdiv.ll
new file mode 100644
index 000000000000..ab5cdda0ce22
--- /dev/null
+++ b/test/CodeGen/X86/i128-sdiv.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; Make sure none of these crash, and that the power-of-two transformations
+; trigger correctly.
+
+define i128 @test1(i128 %x) {
+  ; CHECK: test1:
+  ; CHECK-NOT: call
+  %tmp = sdiv i128 %x, 73786976294838206464
+  ret i128 %tmp
+}
+
+define i128 @test2(i128 %x) {
+  ; CHECK: test2:
+  ; CHECK-NOT: call
+  %tmp = sdiv i128 %x, -73786976294838206464
+  ret i128 %tmp
+}
+
+define i128 @test3(i128 %x) {
+  ; CHECK: test3:
+  ; CHECK: call
+  %tmp = sdiv i128 %x, -73786976294838206467
+  ret i128 %tmp
+}
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index c9a1c1c38c5b..2249618c8a9e 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=i386-apple-darwin | FileCheck %s
 
 ; There should be no stack manipulations between the inline asm and ret.
 ; CHECK: test1
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
index 1c8e2f9eec85..fca68baac6ef 100644
--- a/test/CodeGen/X86/inline-asm-q-regs.ll
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -march=x86-64 -mattr=+avx
 ; rdar://7066579
 
 	%0 = type { i64, i64, i64, i64, i64 }		; type %0
@@ -20,3 +20,18 @@ define void @test3(double %tmp) nounwind {
   call void asm sideeffect "$0", "q"(double %tmp) nounwind
   ret void
 }
+
+; rdar://10392864
+define void @test4(i8 signext %val, i8 signext %a, i8 signext %b, i8 signext %c, i8 signext %d) nounwind {
+entry:
+  %0 = tail call { i8, i8, i8, i8, i8 } asm "foo $1, $2, $3, $4, $1\0Axchgb ${0:b}, ${0:h}", "=q,={ax},={bx},={cx},={dx},0,1,2,3,4,~{dirflag},~{fpsr},~{flags}"(i8 %val, i8 %a, i8 %b, i8 %c, i8 %d) nounwind
+  ret void
+}
+
+; rdar://10614894
+define <8 x float> @test5(<8 x float> %a, <8 x float> %b) nounwind {
+entry:
+  %0 = tail call <8 x float> asm "vperm2f128 $3, $2, $1, $0", "=x,x,x,i,~{dirflag},~{fpsr},~{flags}"(<8 x float> %a, <8 x float> %b, i32 16) nounwind
+  ret <8 x float> %0
+}
+
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
index 79b688551eb9..91576fb09ec2 100644
--- a/test/CodeGen/X86/inline-asm-tied.ll
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -regalloc=linearscan | grep {movl	%edx, 4(%esp)} | count 2
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -optimize-regalloc -regalloc=basic | FileCheck %s
 ; rdar://6992609
 
+; CHECK: movl [[EDX:%e..]], 4(%esp)
+; CHECK: movl [[EDX]], 4(%esp)
 target triple = "i386-apple-darwin9.0"
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
 
diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll
deleted file mode 100644
index 8f79fb8cde27..000000000000
--- a/test/CodeGen/X86/iv-users-in-other-loops.ll
+++ /dev/null
@@ -1,300 +0,0 @@
-; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t
-; RUN: not grep inc %t
-; RUN: grep dec %t | count 2
-; RUN: grep addq %t | count 12
-; RUN: not grep addb %t
-; RUN: not grep leaq %t
-; RUN: not grep leal %t
-; RUN: not grep movq %t
-
-; IV users in each of the loops from other loops shouldn't cause LSR
-; to insert new induction variables. Previously it would create a
-; flood of new induction variables.
-; Also, the loop reversal should kick in once.
-;
-; In this example, performing LSR on the entire loop nest,
-; as opposed to only the inner loop can further reduce induction variables,
-; and their related instructions and registers.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @foo(float* %A, i32 %IA, float* %B, i32 %IB, float* nocapture %C, i32 %N) nounwind {
-entry:
-      %0 = xor i32 %IA, 1		; <i32> [#uses=1]
-      %1 = xor i32 %IB, 1		; <i32> [#uses=1]
-      %2 = or i32 %1, %0		; <i32> [#uses=1]
-      %3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
-      br i1 %3, label %bb2, label %bb13
-
-bb:		; preds = %bb3
-      %4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
-      %5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
-      %6 = fmul float %4, %5		; <float> [#uses=1]
-      %7 = fadd float %6, %Sum0.0		; <float> [#uses=1]
-      %indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
-      br label %bb2
-
-bb2:		; preds = %entry, %bb
-      %B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
-      %Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
-      %indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
-      %N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
-      %A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
-      %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
-      %8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
-      br i1 %8, label %bb3, label %bb4
-
-bb3:		; preds = %bb2
-      %9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
-      %10 = and i64 %9, 15		; <i64> [#uses=1]
-      %11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
-      br i1 %11, label %bb4, label %bb
-
-bb4:		; preds = %bb3, %bb2
-      %12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
-      %13 = and i64 %12, 15		; <i64> [#uses=1]
-      %14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
-      %15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
-      br i1 %14, label %bb6.preheader, label %bb10.preheader
-
-bb10.preheader:		; preds = %bb4
-      br i1 %15, label %bb9, label %bb12.loopexit
-
-bb6.preheader:		; preds = %bb4
-      br i1 %15, label %bb5, label %bb8.loopexit
-
-bb5:		; preds = %bb5, %bb6.preheader
-      %indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
-      %vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum1.070 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum2.069 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=1]
-	%vSum3.067 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=1]
-	%indvar145 = trunc i64 %indvar143 to i32		; <i32> [#uses=1]
-	%tmp150 = mul i32 %indvar145, -16		; <i32> [#uses=1]
-	%N_addr.268 = add i32 %tmp150, %N_addr.0		; <i32> [#uses=1]
-	%A_addr.273.rec = shl i64 %indvar143, 4		; <i64> [#uses=5]
-	%B_addr.0.sum180 = add i64 %B_addr.0.rec, %A_addr.273.rec		; <i64> [#uses=2]
-	%B_addr.271 = getelementptr float* %B, i64 %B_addr.0.sum180		; <float*> [#uses=1]
-	%A_addr.273 = getelementptr float* %A, i64 %B_addr.0.sum180		; <float*> [#uses=1]
-	tail call void asm sideeffect ";# foo", "~{dirflag},~{fpsr},~{flags}"() nounwind
-	%16 = bitcast float* %A_addr.273 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%17 = load <4 x float>* %16, align 16		; <<4 x float>> [#uses=1]
-	%18 = bitcast float* %B_addr.271 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%19 = load <4 x float>* %18, align 16		; <<4 x float>> [#uses=1]
-	%20 = fmul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
-	%21 = fadd <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum163 = or i64 %A_addr.273.rec, 4		; <i64> [#uses=1]
-	%A_addr.0.sum175 = add i64 %B_addr.0.rec, %A_addr.273.sum163		; <i64> [#uses=2]
-	%22 = getelementptr float* %A, i64 %A_addr.0.sum175		; <float*> [#uses=1]
-	%23 = bitcast float* %22 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%24 = load <4 x float>* %23, align 16		; <<4 x float>> [#uses=1]
-	%25 = getelementptr float* %B, i64 %A_addr.0.sum175		; <float*> [#uses=1]
-	%26 = bitcast float* %25 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%27 = load <4 x float>* %26, align 16		; <<4 x float>> [#uses=1]
-	%28 = fmul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
-	%29 = fadd <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum161 = or i64 %A_addr.273.rec, 8		; <i64> [#uses=1]
-	%A_addr.0.sum174 = add i64 %B_addr.0.rec, %A_addr.273.sum161		; <i64> [#uses=2]
-	%30 = getelementptr float* %A, i64 %A_addr.0.sum174		; <float*> [#uses=1]
-	%31 = bitcast float* %30 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%32 = load <4 x float>* %31, align 16		; <<4 x float>> [#uses=1]
-	%33 = getelementptr float* %B, i64 %A_addr.0.sum174		; <float*> [#uses=1]
-	%34 = bitcast float* %33 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%35 = load <4 x float>* %34, align 16		; <<4 x float>> [#uses=1]
-	%36 = fmul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
-	%37 = fadd <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
-	%A_addr.273.sum159 = or i64 %A_addr.273.rec, 12		; <i64> [#uses=1]
-	%A_addr.0.sum173 = add i64 %B_addr.0.rec, %A_addr.273.sum159		; <i64> [#uses=2]
-	%38 = getelementptr float* %A, i64 %A_addr.0.sum173		; <float*> [#uses=1]
-	%39 = bitcast float* %38 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%40 = load <4 x float>* %39, align 16		; <<4 x float>> [#uses=1]
-	%41 = getelementptr float* %B, i64 %A_addr.0.sum173		; <float*> [#uses=1]
-	%42 = bitcast float* %41 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%43 = load <4 x float>* %42, align 16		; <<4 x float>> [#uses=1]
-	%44 = fmul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
-	%45 = fadd <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
-	%.rec83 = add i64 %A_addr.273.rec, 16		; <i64> [#uses=1]
-	%A_addr.0.sum172 = add i64 %B_addr.0.rec, %.rec83		; <i64> [#uses=2]
-	%46 = getelementptr float* %A, i64 %A_addr.0.sum172		; <float*> [#uses=1]
-	%47 = getelementptr float* %B, i64 %A_addr.0.sum172		; <float*> [#uses=1]
-	%48 = add i32 %N_addr.268, -16		; <i32> [#uses=2]
-	%49 = icmp sgt i32 %48, 15		; <i1> [#uses=1]
-	%indvar.next144 = add i64 %indvar143, 1		; <i64> [#uses=1]
-	br i1 %49, label %bb5, label %bb8.loopexit
-
-bb7:		; preds = %bb7, %bb8.loopexit
-	%indvar130 = phi i64 [ 0, %bb8.loopexit ], [ %indvar.next131, %bb7 ]		; <i64> [#uses=3]
-	%vSum0.260 = phi <4 x float> [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ]		; <<4 x float>> [#uses=1]
-	%indvar132 = trunc i64 %indvar130 to i32		; <i32> [#uses=1]
-	%tmp133 = mul i32 %indvar132, -4		; <i32> [#uses=1]
-	%N_addr.358 = add i32 %tmp133, %N_addr.2.lcssa		; <i32> [#uses=1]
-	%A_addr.361.rec = shl i64 %indvar130, 2		; <i64> [#uses=3]
-	%B_addr.359 = getelementptr float* %B_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
-	%A_addr.361 = getelementptr float* %A_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
-	%50 = bitcast float* %A_addr.361 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%51 = load <4 x float>* %50, align 16		; <<4 x float>> [#uses=1]
-	%52 = bitcast float* %B_addr.359 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%53 = load <4 x float>* %52, align 16		; <<4 x float>> [#uses=1]
-	%54 = fmul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
-	%55 = fadd <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
-	%.rec85 = add i64 %A_addr.361.rec, 4		; <i64> [#uses=2]
-	%56 = getelementptr float* %A_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
-	%57 = getelementptr float* %B_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
-	%58 = add i32 %N_addr.358, -4		; <i32> [#uses=2]
-	%59 = icmp sgt i32 %58, 3		; <i1> [#uses=1]
-	%indvar.next131 = add i64 %indvar130, 1		; <i64> [#uses=1]
-	br i1 %59, label %bb7, label %bb13
-
-bb8.loopexit:		; preds = %bb5, %bb6.preheader
-	%A_addr.2.lcssa = phi float* [ %A_addr.0, %bb6.preheader ], [ %46, %bb5 ]		; <float*> [#uses=3]
-	%vSum0.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=2]
-	%B_addr.2.lcssa = phi float* [ %B_addr.0, %bb6.preheader ], [ %47, %bb5 ]		; <float*> [#uses=3]
-	%vSum1.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=2]
-	%vSum2.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=2]
-	%N_addr.2.lcssa = phi i32 [ %N_addr.0, %bb6.preheader ], [ %48, %bb5 ]		; <i32> [#uses=3]
-	%vSum3.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=2]
-	%60 = icmp sgt i32 %N_addr.2.lcssa, 3		; <i1> [#uses=1]
-	br i1 %60, label %bb7, label %bb13
-
-bb9:		; preds = %bb9, %bb10.preheader
-	%indvar106 = phi i64 [ 0, %bb10.preheader ], [ %indvar.next107, %bb9 ]		; <i64> [#uses=3]
-	%vSum0.339 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum1.237 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum2.236 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=1]
-	%vSum3.234 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=1]
-	%indvar108 = trunc i64 %indvar106 to i32		; <i32> [#uses=1]
-	%tmp113 = mul i32 %indvar108, -16		; <i32> [#uses=1]
-	%N_addr.435 = add i32 %tmp113, %N_addr.0		; <i32> [#uses=1]
-	%A_addr.440.rec = shl i64 %indvar106, 4		; <i64> [#uses=5]
-	%B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec		; <i64> [#uses=2]
-	%B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum		; <float*> [#uses=1]
-	%A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum		; <float*> [#uses=1]
-	%61 = bitcast float* %B_addr.438 to <4 x float>*		; <i8*> [#uses=1]
-	%62 = load <4 x float>* %61, align 1
-	%B_addr.438.sum169 = or i64 %A_addr.440.rec, 4		; <i64> [#uses=1]
-	%B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169		; <i64> [#uses=2]
-	%63 = getelementptr float* %B, i64 %B_addr.0.sum187		; <float*> [#uses=1]
-	%64 = bitcast float* %63 to <4 x float>*		; <i8*> [#uses=1]
-	%65 = load <4 x float>* %64, align 1
-	%B_addr.438.sum168 = or i64 %A_addr.440.rec, 8		; <i64> [#uses=1]
-	%B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168		; <i64> [#uses=2]
-	%66 = getelementptr float* %B, i64 %B_addr.0.sum186		; <float*> [#uses=1]
-	%67 = bitcast float* %66 to <4 x float>*		; <i8*> [#uses=1]
-	%68 = load <4 x float>* %67, align 1
-	%B_addr.438.sum167 = or i64 %A_addr.440.rec, 12		; <i64> [#uses=1]
-	%B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167		; <i64> [#uses=2]
-	%69 = getelementptr float* %B, i64 %B_addr.0.sum185		; <float*> [#uses=1]
-	%70 = bitcast float* %69 to <4 x float>*		; <i8*> [#uses=1]
-	%71 = load <4 x float>* %70, align 1
-	%72 = bitcast float* %A_addr.440 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%73 = load <4 x float>* %72, align 16		; <<4 x float>> [#uses=1]
-	%74 = fmul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
-	%75 = fadd <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
-	%76 = getelementptr float* %A, i64 %B_addr.0.sum187		; <float*> [#uses=1]
-	%77 = bitcast float* %76 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%78 = load <4 x float>* %77, align 16		; <<4 x float>> [#uses=1]
-	%79 = fmul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
-	%80 = fadd <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
-	%81 = getelementptr float* %A, i64 %B_addr.0.sum186		; <float*> [#uses=1]
-	%82 = bitcast float* %81 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%83 = load <4 x float>* %82, align 16		; <<4 x float>> [#uses=1]
-	%84 = fmul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
-	%85 = fadd <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
-	%86 = getelementptr float* %A, i64 %B_addr.0.sum185		; <float*> [#uses=1]
-	%87 = bitcast float* %86 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%88 = load <4 x float>* %87, align 16		; <<4 x float>> [#uses=1]
-	%89 = fmul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
-	%90 = fadd <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
-	%.rec89 = add i64 %A_addr.440.rec, 16		; <i64> [#uses=1]
-	%A_addr.0.sum170 = add i64 %B_addr.0.rec, %.rec89		; <i64> [#uses=2]
-	%91 = getelementptr float* %A, i64 %A_addr.0.sum170		; <float*> [#uses=1]
-	%92 = getelementptr float* %B, i64 %A_addr.0.sum170		; <float*> [#uses=1]
-	%93 = add i32 %N_addr.435, -16		; <i32> [#uses=2]
-	%94 = icmp sgt i32 %93, 15		; <i1> [#uses=1]
-	%indvar.next107 = add i64 %indvar106, 1		; <i64> [#uses=1]
-	br i1 %94, label %bb9, label %bb12.loopexit
-
-bb11:		; preds = %bb11, %bb12.loopexit
-	%indvar = phi i64 [ 0, %bb12.loopexit ], [ %indvar.next, %bb11 ]		; <i64> [#uses=3]
-	%vSum0.428 = phi <4 x float> [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
-	%indvar96 = trunc i64 %indvar to i32		; <i32> [#uses=1]
-	%tmp = mul i32 %indvar96, -4		; <i32> [#uses=1]
-	%N_addr.526 = add i32 %tmp, %N_addr.4.lcssa		; <i32> [#uses=1]
-	%A_addr.529.rec = shl i64 %indvar, 2		; <i64> [#uses=3]
-	%B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
-	%A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
-	%95 = bitcast float* %B_addr.527 to <4 x float>*		; <i8*> [#uses=1]
-	%96 = load <4 x float>* %95, align 1
-	%97 = bitcast float* %A_addr.529 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%98 = load <4 x float>* %97, align 16		; <<4 x float>> [#uses=1]
-	%99 = fmul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
-	%100 = fadd <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
-	%.rec91 = add i64 %A_addr.529.rec, 4		; <i64> [#uses=2]
-	%101 = getelementptr float* %A_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
-	%102 = getelementptr float* %B_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
-	%103 = add i32 %N_addr.526, -4		; <i32> [#uses=2]
-	%104 = icmp sgt i32 %103, 3		; <i1> [#uses=1]
-	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
-	br i1 %104, label %bb11, label %bb13
-
-bb12.loopexit:		; preds = %bb9, %bb10.preheader
-	%A_addr.4.lcssa = phi float* [ %A_addr.0, %bb10.preheader ], [ %91, %bb9 ]		; <float*> [#uses=3]
-	%vSum0.3.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=2]
-	%B_addr.4.lcssa = phi float* [ %B_addr.0, %bb10.preheader ], [ %92, %bb9 ]		; <float*> [#uses=3]
-	%vSum1.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=2]
-	%vSum2.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=2]
-	%N_addr.4.lcssa = phi i32 [ %N_addr.0, %bb10.preheader ], [ %93, %bb9 ]		; <i32> [#uses=3]
-	%vSum3.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=2]
-	%105 = icmp sgt i32 %N_addr.4.lcssa, 3		; <i1> [#uses=1]
-	br i1 %105, label %bb11, label %bb13
-
-bb13:		; preds = %bb12.loopexit, %bb11, %bb8.loopexit, %bb7, %entry
-	%Sum0.1 = phi float [ 0.000000e+00, %entry ], [ %Sum0.0, %bb7 ], [ %Sum0.0, %bb8.loopexit ], [ %Sum0.0, %bb11 ], [ %Sum0.0, %bb12.loopexit ]		; <float> [#uses=1]
-	%vSum3.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum3.0.lcssa, %bb7 ], [ %vSum3.0.lcssa, %bb8.loopexit ], [ %vSum3.2.lcssa, %bb11 ], [ %vSum3.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%N_addr.1 = phi i32 [ %N, %entry ], [ %N_addr.2.lcssa, %bb8.loopexit ], [ %58, %bb7 ], [ %N_addr.4.lcssa, %bb12.loopexit ], [ %103, %bb11 ]		; <i32> [#uses=2]
-	%vSum2.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum2.0.lcssa, %bb7 ], [ %vSum2.0.lcssa, %bb8.loopexit ], [ %vSum2.2.lcssa, %bb11 ], [ %vSum2.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%vSum1.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum1.0.lcssa, %bb7 ], [ %vSum1.0.lcssa, %bb8.loopexit ], [ %vSum1.2.lcssa, %bb11 ], [ %vSum1.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
-	%B_addr.1 = phi float* [ %B, %entry ], [ %B_addr.2.lcssa, %bb8.loopexit ], [ %57, %bb7 ], [ %B_addr.4.lcssa, %bb12.loopexit ], [ %102, %bb11 ]		; <float*> [#uses=1]
-	%vSum0.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ], [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
-	%A_addr.1 = phi float* [ %A, %entry ], [ %A_addr.2.lcssa, %bb8.loopexit ], [ %56, %bb7 ], [ %A_addr.4.lcssa, %bb12.loopexit ], [ %101, %bb11 ]		; <float*> [#uses=1]
-	%106 = fadd <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
-	%107 = fadd <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
-	%108 = fadd <4 x float> %106, %107		; <<4 x float>> [#uses=4]
-	%tmp23 = extractelement <4 x float> %108, i32 0		; <float> [#uses=1]
-	%tmp21 = extractelement <4 x float> %108, i32 1		; <float> [#uses=1]
-	%109 = fadd float %tmp23, %tmp21		; <float> [#uses=1]
-	%tmp19 = extractelement <4 x float> %108, i32 2		; <float> [#uses=1]
-	%tmp17 = extractelement <4 x float> %108, i32 3		; <float> [#uses=1]
-	%110 = fadd float %tmp19, %tmp17		; <float> [#uses=1]
-	%111 = fadd float %109, %110		; <float> [#uses=1]
-	%Sum0.254 = fadd float %111, %Sum0.1		; <float> [#uses=2]
-	%112 = icmp sgt i32 %N_addr.1, 0		; <i1> [#uses=1]
-	br i1 %112, label %bb.nph56, label %bb16
-
-bb.nph56:		; preds = %bb13
-	%tmp. = zext i32 %N_addr.1 to i64		; <i64> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb14, %bb.nph56
-	%indvar117 = phi i64 [ 0, %bb.nph56 ], [ %indvar.next118, %bb14 ]		; <i64> [#uses=3]
-	%Sum0.255 = phi float [ %Sum0.254, %bb.nph56 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
-	%tmp.122 = sext i32 %IB to i64		; <i64> [#uses=1]
-	%B_addr.652.rec = mul i64 %indvar117, %tmp.122		; <i64> [#uses=1]
-	%tmp.124 = sext i32 %IA to i64		; <i64> [#uses=1]
-	%A_addr.653.rec = mul i64 %indvar117, %tmp.124		; <i64> [#uses=1]
-	%B_addr.652 = getelementptr float* %B_addr.1, i64 %B_addr.652.rec		; <float*> [#uses=1]
-	%A_addr.653 = getelementptr float* %A_addr.1, i64 %A_addr.653.rec		; <float*> [#uses=1]
-	%113 = load float* %A_addr.653, align 4		; <float> [#uses=1]
-	%114 = load float* %B_addr.652, align 4		; <float> [#uses=1]
-	%115 = fmul float %113, %114		; <float> [#uses=1]
-	%Sum0.2 = fadd float %115, %Sum0.255		; <float> [#uses=2]
-	%indvar.next118 = add i64 %indvar117, 1		; <i64> [#uses=2]
-	%exitcond = icmp eq i64 %indvar.next118, %tmp.		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb16, label %bb14
-
-bb16:		; preds = %bb14, %bb13
-	%Sum0.2.lcssa = phi float [ %Sum0.254, %bb13 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
-	store float %Sum0.2.lcssa, float* %C, align 4
-	ret void
-}
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index 5e8e16217363..dbd133cd9ab4 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s -march=x86 | grep jns
+; RUN: llc < %s -march=x86 -mcpu=pentiumpro | FileCheck %s
 
 define i32 @f(i32 %X) {
 entry:
+; CHECK: f:
+; CHECK: jns
 	%tmp1 = add i32 %X, 1		; <i32> [#uses=1]
 	%tmp = icmp slt i32 %tmp1, 0		; <i1> [#uses=1]
 	br i1 %tmp, label %cond_true, label %cond_next
@@ -18,3 +20,15 @@ cond_next:		; preds = %cond_true, %entry
 declare i32 @bar(...)
 
 declare i32 @baz(...)
+
+; rdar://10633221
+define i32 @g(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: g:
+; CHECK-NOT: test
+; CHECK: cmovs
+  %sub = sub nsw i32 %a, %b
+  %cmp = icmp sgt i32 %sub, 0
+  %cond = select i1 %cmp, i32 %sub, i32 0
+  ret i32 %cond
+}
diff --git a/test/CodeGen/X86/legalize-libcalls.ll b/test/CodeGen/X86/legalize-libcalls.ll
new file mode 100644
index 000000000000..879dc98ab20d
--- /dev/null
+++ b/test/CodeGen/X86/legalize-libcalls.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=x86 < %s
+; RUN: llc -march=x86-64 < %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+define float @MakeSphere(float %theta.079) nounwind {
+entry:
+  %add36 = fadd float %theta.079, undef
+  %call = call float @cosf(float %theta.079) nounwind readnone
+  %call45 = call float @sinf(float %theta.079) nounwind readnone
+  %call37 = call float @sinf(float %add36) nounwind readnone
+  store float %call, float* undef, align 8
+  store float %call37, float* undef, align 8
+  store float %call45, float* undef, align 8
+  ret float %add36
+}
+
+define hidden fastcc void @unroll_loop(i64 %storemerge32129) nounwind {
+entry:
+  call fastcc void @copy_rtx() nounwind
+  call fastcc void @copy_rtx() nounwind
+  %tmp225 = alloca i8, i64 %storemerge32129, align 8 ; [#uses=0 type=i8*]
+  %cmp651201 = icmp slt i64 %storemerge32129, 0   ; [#uses=1 type=i1]
+  br i1 %cmp651201, label %for.body653.lr.ph, label %if.end638.for.end659_crit_edge
+
+for.body653.lr.ph:                                ; preds = %entry
+  unreachable
+
+if.end638.for.end659_crit_edge:                   ; preds = %entry
+  unreachable
+}
+
+declare float @cosf(float) nounwind readnone
+declare float @sinf(float) nounwind readnone
+declare hidden fastcc void @copy_rtx() nounwind
diff --git a/test/CodeGen/X86/legalize-shift-64.ll b/test/CodeGen/X86/legalize-shift-64.ll
new file mode 100644
index 000000000000..c9f2fc27dbff
--- /dev/null
+++ b/test/CodeGen/X86/legalize-shift-64.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mcpu=generic -march=x86 < %s | FileCheck %s
+
+define i64 @test1(i32 %xx, i32 %test) nounwind {
+  %conv = zext i32 %xx to i64
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shl = shl i64 %conv, %sh_prom
+  ret i64 %shl
+; CHECK: test1:
+; CHECK: shll	%cl, %eax
+; CHECK: shrl	%edx
+; CHECK: xorb	$31
+; CHECK: shrl	%cl, %edx
+}
+
+define i64 @test2(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shl = shl i64 %xx, %sh_prom
+  ret i64 %shl
+; CHECK: test2:
+; CHECK: shll	%cl, %esi
+; CHECK: shrl	%edx
+; CHECK: xorb	$31
+; CHECK: shrl	%cl, %edx
+; CHECK: orl	%esi, %edx
+; CHECK: shll	%cl, %eax
+}
+
+define i64 @test3(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shr = lshr i64 %xx, %sh_prom
+  ret i64 %shr
+; CHECK: test3:
+; CHECK: shrl	%cl, %esi
+; CHECK: leal	(%edx,%edx), %eax
+; CHECK: xorb	$31, %cl
+; CHECK: shll	%cl, %eax
+; CHECK: orl	%esi, %eax
+; CHECK: shrl	%cl, %edx
+}
+
+define i64 @test4(i64 %xx, i32 %test) nounwind {
+  %and = and i32 %test, 7
+  %sh_prom = zext i32 %and to i64
+  %shr = ashr i64 %xx, %sh_prom
+  ret i64 %shr
+; CHECK: test4:
+; CHECK: shrl	%cl, %esi
+; CHECK: leal	(%edx,%edx), %eax
+; CHECK: xorb	$31, %cl
+; CHECK: shll	%cl, %eax
+; CHECK: orl	%esi, %eax
+; CHECK: sarl	%cl, %edx
+}
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
new file mode 100644
index 000000000000..a8ad0f1a28b2
--- /dev/null
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/X86/log2_not_readnone.ll b/test/CodeGen/X86/log2_not_readnone.ll
new file mode 100644
index 000000000000..5620835d7baf
--- /dev/null
+++ b/test/CodeGen/X86/log2_not_readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=i386-linux-gnueabi %s -o - | FileCheck %s
+
+; Log2 and exp2 are string-matched to intrinsics. If they are not declared
+; readnone, they can't be changed to intrinsics (because they can change errno).
+
+declare double @log2(double)
+declare double @exp2(double)
+
+define void @f() {
+       ; CHECK: calll log2
+       %1 = call double @log2(double 0.000000e+00)
+       ; CHECK: calll exp2
+       %2 = call double @exp2(double 0.000000e+00)
+       ret void
+}
diff --git a/test/CodeGen/X86/loop-strength-reduce3.ll b/test/CodeGen/X86/loop-strength-reduce3.ll
deleted file mode 100644
index d6c265f329a1..000000000000
--- a/test/CodeGen/X86/loop-strength-reduce3.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: llc < %s -march=x86 -enable-lsr-nested | grep cmp | grep 240
-; RUN: llc < %s -march=x86 -enable-lsr-nested | grep inc | count 1
-
-define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind {
-entry:
-	%tmp2955 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
-	br i1 %tmp2955, label %bb26.outer.us, label %bb40.split
-
-bb26.outer.us:		; preds = %bb26.bb32_crit_edge.us, %entry
-	%i.044.0.ph.us = phi i32 [ 0, %entry ], [ %indvar.next57, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=2]
-	%k.1.ph.us = phi i32 [ 0, %entry ], [ %k.0.us, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=1]
-	%tmp3.us = mul i32 %i.044.0.ph.us, 6		; <i32> [#uses=1]
-	br label %bb1.us
-
-bb1.us:		; preds = %bb1.us, %bb26.outer.us
-	%j.053.us = phi i32 [ 0, %bb26.outer.us ], [ %tmp25.us, %bb1.us ]		; <i32> [#uses=2]
-	%k.154.us = phi i32 [ %k.1.ph.us, %bb26.outer.us ], [ %k.0.us, %bb1.us ]		; <i32> [#uses=1]
-	%tmp5.us = add i32 %tmp3.us, %j.053.us		; <i32> [#uses=1]
-	%tmp7.us = shl i32 %D, %tmp5.us		; <i32> [#uses=2]
-	%tmp9.us = icmp eq i32 %tmp7.us, %B		; <i1> [#uses=1]
-	%tmp910.us = zext i1 %tmp9.us to i32		; <i32> [#uses=1]
-	%tmp12.us = and i32 %tmp7.us, %A		; <i32> [#uses=1]
-	%tmp19.us = and i32 %tmp12.us, %tmp910.us		; <i32> [#uses=1]
-	%k.0.us = add i32 %tmp19.us, %k.154.us		; <i32> [#uses=3]
-	%tmp25.us = add i32 %j.053.us, 1		; <i32> [#uses=2]
-	%tmp29.us = icmp slt i32 %tmp25.us, %C		; <i1> [#uses=1]
-	br i1 %tmp29.us, label %bb1.us, label %bb26.bb32_crit_edge.us
-
-bb26.bb32_crit_edge.us:		; preds = %bb1.us
-	%indvar.next57 = add i32 %i.044.0.ph.us, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next57, 40		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb40.split, label %bb26.outer.us
-
-bb40.split:		; preds = %bb26.bb32_crit_edge.us, %entry
-	%k.1.lcssa.lcssa.us-lcssa = phi i32 [ %k.0.us, %bb26.bb32_crit_edge.us ], [ 0, %entry ]		; <i32> [#uses=1]
-	ret i32 %k.1.lcssa.lcssa.us-lcssa
-}
diff --git a/test/CodeGen/X86/loop-strength-reduce5.ll b/test/CodeGen/X86/loop-strength-reduce5.ll
index b07eeb6759a4..d50a66805db7 100644
--- a/test/CodeGen/X86/loop-strength-reduce5.ll
+++ b/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -11,9 +11,9 @@ entry:
 bb:		; preds = %bb, %entry
 	%i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
 	%tmp1 = trunc i32 %i.014.0 to i16		; <i16> [#uses=2]
-	volatile store i16 %tmp1, i16* @X, align 2
+	store volatile i16 %tmp1, i16* @X, align 2
 	%tmp34 = shl i16 %tmp1, 2		; <i16> [#uses=1]
-	volatile store i16 %tmp34, i16* @Y, align 2
+	store volatile i16 %tmp34, i16* @Y, align 2
 	%indvar.next = add i32 %i.014.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
 	br i1 %exitcond, label %return, label %bb
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index 938023ffe037..ebda9f201df9 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,6 +1,8 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
 
+; CHECK: t:
 ; CHECK: decq
+; CHECK-NEXT: movl (
 ; CHECK-NEXT: jne
 
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
@@ -135,3 +137,44 @@ bb2:		; preds = %bb
 	store i8 %92, i8* %93, align 1
 	ret void
 }
+
+; Check that DAGCombiner doesn't mess up the IV update when the exiting value
+; is equal to the stride.
+; It must not fold (cmp (add iv, 1), 1) --> (cmp iv, 0).
+
+; CHECK: f:
+; CHECK: %for.body
+; CHECK: incl [[IV:%e..]]
+; CHECK: cmpl $1, [[IV]]
+; CHECK: jne
+; CHECK: ret
+
+define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
+entry:
+  %cmp4 = icmp eq i32 %i, 1
+  br i1 %cmp4, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = sext i32 %i to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
+  %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %cmp1 = icmp ugt i32 %1, %b.05
+  %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
+  %2 = trunc i64 %indvars.iv to i32
+  %i.addr.0.bi.0 = select i1 %cmp1, i32 %2, i32 %bi.06
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %bi.0.lcssa = phi i32 [ 0, %entry ], [ %i.addr.0.bi.0, %for.body ]
+  ret i32 %bi.0.lcssa
+}
+
diff --git a/test/CodeGen/X86/lsr-nonaffine.ll b/test/CodeGen/X86/lsr-nonaffine.ll
index d0d2bbd67cf6..d825b5a76c09 100644
--- a/test/CodeGen/X86/lsr-nonaffine.ll
+++ b/test/CodeGen/X86/lsr-nonaffine.ll
@@ -19,7 +19,7 @@ entry:
 
 loop:
   %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
-  volatile store i64 %i, i64* %p
+  store volatile i64 %i, i64* %p
   %i.next = add i64 %i, %s
   %c = icmp slt i64 %i.next, %n
   br i1 %c, label %loop, label %exit
diff --git a/test/CodeGen/X86/lsr-reuse.ll b/test/CodeGen/X86/lsr-reuse.ll
index 527a5a60e868..1311a73fd32c 100644
--- a/test/CodeGen/X86/lsr-reuse.ll
+++ b/test/CodeGen/X86/lsr-reuse.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
 target datalayout = "e-p:64:64:64"
 target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/lsr-sort.ll b/test/CodeGen/X86/lsr-sort.ll
index 1f3b59a905b9..b85ddeb13b8d 100644
--- a/test/CodeGen/X86/lsr-sort.ll
+++ b/test/CodeGen/X86/lsr-sort.ll
@@ -12,7 +12,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
 	%1 = trunc i32 %i.03 to i16		; <i16> [#uses=1]
-	volatile store i16 %1, i16* @X, align 2
+	store volatile i16 %1, i16* @X, align 2
 	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
 	br i1 %exitcond, label %return, label %bb
diff --git a/test/CodeGen/X86/lzcnt.ll b/test/CodeGen/X86/lzcnt.ll
index e5a55abf1ab7..2faa24a9a544 100644
--- a/test/CodeGen/X86/lzcnt.ll
+++ b/test/CodeGen/X86/lzcnt.ll
@@ -1,38 +1,62 @@
 ; RUN: llc < %s -march=x86-64 -mattr=+lzcnt | FileCheck %s
 
-define i32 @t1(i32 %x) nounwind  {
-	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
-	ret i32 %tmp
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+
+define i8 @t1(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 false )
+	ret i8 %tmp
 ; CHECK: t1:
 ; CHECK: lzcntl
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
-
 define i16 @t2(i16 %x) nounwind  {
-	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x )
+	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 false )
 	ret i16 %tmp
 ; CHECK: t2:
 ; CHECK: lzcntw
 }
 
-declare i16 @llvm.ctlz.i16(i16) nounwind readnone
+define i32 @t3(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 false )
+	ret i32 %tmp
+; CHECK: t3:
+; CHECK: lzcntl
+}
 
-define i64 @t3(i64 %x) nounwind  {
-	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x )
+define i64 @t4(i64 %x) nounwind  {
+	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 false )
 	ret i64 %tmp
-; CHECK: t3:
+; CHECK: t4:
 ; CHECK: lzcntq
 }
 
-declare i64 @llvm.ctlz.i64(i64) nounwind readnone
-
-define i8 @t4(i8 %x) nounwind  {
-	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x )
+define i8 @t5(i8 %x) nounwind  {
+	%tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 true )
 	ret i8 %tmp
-; CHECK: t4:
+; CHECK: t5:
+; CHECK: lzcntl
+}
+
+define i16 @t6(i16 %x) nounwind  {
+	%tmp = tail call i16 @llvm.ctlz.i16( i16 %x, i1 true )
+	ret i16 %tmp
+; CHECK: t6:
 ; CHECK: lzcntw
 }
 
-declare i8 @llvm.ctlz.i8(i8) nounwind readnone
+define i32 @t7(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x, i1 true )
+	ret i32 %tmp
+; CHECK: t7:
+; CHECK: lzcntl
+}
 
+define i64 @t8(i64 %x) nounwind  {
+	%tmp = tail call i64 @llvm.ctlz.i64( i64 %x, i1 true )
+	ret i64 %tmp
+; CHECK: t8:
+; CHECK: lzcntq
+}
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
new file mode 100644
index 000000000000..54fa01c38fde
--- /dev/null
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s | FileCheck %s
+
+; After tail duplication, two copies in an early exit BB can be cancelled out.
+; rdar://10640363
+define i32 @t1(i32 %a, i32 %b) nounwind  {
+entry:
+; CHECK: t1:
+; CHECK: jne
+  %cmp1 = icmp eq i32 %b, 0
+  br i1 %cmp1, label %while.end, label %while.body
+
+; CHECK: BB
+; CHECK-NOT: mov
+; CHECK: ret
+
+while.body:                                       ; preds = %entry, %while.body
+  %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
+  %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
+  %rem = srem i32 %a.addr.03, %b.addr.02
+  %cmp = icmp eq i32 %rem, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
+  ret i32 %a.addr.0.lcssa
+}
+
+; Two movdqa (from phi-elimination) in the entry BB cancels out.
+; rdar://10428165
+define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK-NOT: movdqa
+  %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+  ret <8 x i16> %tmp8
+}
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index d819fc8f6ecd..a757cde6abe9 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx < %s | FileCheck %s
 ; rdar://7610418
 
 %ptr = type { i8* }
@@ -77,3 +77,25 @@ bb.nph743.us:                                     ; preds = %for.body53.us, %if.
 sw.bb307:                                         ; preds = %sw.bb, %entry
   ret void
 }
+
+; CSE physical register defining instruction across MBB boundary.
+; rdar://10660865
+define i32 @cross_mbb_phys_cse(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: cross_mbb_phys_cse:
+; CHECK: cmpl
+; CHECK: ja
+  %cmp = icmp ugt i32 %a, %b
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+; CHECK-NOT: cmpl
+; CHECK: sbbl
+  %cmp1 = icmp ult i32 %a, %b
+  %. = sext i1 %cmp1 to i32
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll
index 0b4d73a683af..a7b036e9b658 100644
--- a/test/CodeGen/X86/masked-iv-safe.ll
+++ b/test/CodeGen/X86/masked-iv-safe.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
 ; RUN: not grep and %t
 ; RUN: not grep movz %t
 ; RUN: not grep sar %t
 ; RUN: not grep shl %t
-; RUN: grep add %t | count 2
+; RUN: grep add %t | count 1
 ; RUN: grep inc %t | count 4
 ; RUN: grep dec %t | count 2
-; RUN: grep lea %t | count 2
+; RUN: grep lea %t | count 3
 
 ; Optimize away zext-inreg and sext-inreg on the loop induction
 ; variable using trip-count information.
diff --git a/test/CodeGen/X86/mcinst-avx-lowering.ll b/test/CodeGen/X86/mcinst-avx-lowering.ll
new file mode 100644
index 000000000000..41f96e8856c9
--- /dev/null
+++ b/test/CodeGen/X86/mcinst-avx-lowering.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10 -mattr=avx -show-mc-encoding < %s | FileCheck %s
+
+define i64 @t1(double %d_ivar) nounwind uwtable ssp {
+entry:
+; CHECK: t1
+  %0 = bitcast double %d_ivar to i64
+; CHECK: vmovd
+; CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xc0]
+  ret i64 %0
+}
+
+define double @t2(i64 %d_ivar) nounwind uwtable ssp {
+entry:
+; CHECK: t2
+  %0 = bitcast i64 %d_ivar to double
+; CHECK: vmovd
+; CHECK: encoding: [0xc4,0xe1,0xf9,0x6e,0xc7]
+  ret double %0
+}
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index f43b0bf509ca..86c6862a53fc 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -79,3 +79,16 @@ entry:
 ; LINUX movq
 }
 
+
+@.str = private unnamed_addr constant [30 x i8] c"\00aaaaaaaaaaaaaaaaaaaaaaaaaaaa\00", align 1
+
+define void @test5(i8* nocapture %C) nounwind uwtable ssp {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([30 x i8]* @.str, i64 0, i64 0), i64 16, i32 1, i1 false)
+  ret void
+
+; DARWIN: movabsq	$7016996765293437281
+; DARWIN: movabsq	$7016996765293437184
+}
+
+
diff --git a/test/CodeGen/X86/misched-new.ll b/test/CodeGen/X86/misched-new.ll
new file mode 100644
index 000000000000..8f2f6f7697df
--- /dev/null
+++ b/test/CodeGen/X86/misched-new.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=x86-64 -mcpu=core2 -enable-misched -misched=shuffle -misched-bottomup < %s
+; REQUIRES: asserts
+;
+; Interesting MachineScheduler cases.
+;
+; FIXME: There should be an assert in the coalescer that we're not rematting
+; "not-quite-dead" copies, but that breaks a lot of tests <rdar://problem/11148682>.
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; From oggenc.
+; After coalescing, we have a dead superreg (RAX) definition.
+;
+; CHECK: xorl %esi, %esi
+; CHECK: movl $32, %ecx
+; CHECK: rep;movsl
+define fastcc void @_preextrapolate_helper() nounwind uwtable ssp {
+entry:
+  br i1 undef, label %for.cond.preheader, label %if.end
+
+for.cond.preheader:                               ; preds = %entry
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* null, i64 128, i32 4, i1 false) nounwind
+  unreachable
+
+if.end:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/mmx-builtins.ll b/test/CodeGen/X86/mmx-builtins.ll
index 3ac0e4ee4b85..8b7200d2f78f 100644
--- a/test/CodeGen/X86/mmx-builtins.ll
+++ b/test/CodeGen/X86/mmx-builtins.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
 
 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
 
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll
index 6062b505a569..d9c7c678d1b2 100644
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
+; RUN: llc < %s  -mtriple=x86_64-linux -mcpu=corei7 | grep pinsr
 ; PR2562
 
 external global i16		; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll
deleted file mode 100644
index a7ce7d93920e..000000000000
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
-
-	%struct.vS1024 = type { [8 x <4 x i32>] }
-	%struct.vS512 = type { [4 x <4 x i32>] }
-
-declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
-
-define void @t() nounwind {
-entry:
-	br label %bb554
-
-bb554:		; preds = %bb554, %entry
-	%sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ]		; <<1 x i64>> [#uses=1]
-	%0 = load x86_mmx* null, align 8		; <<1 x i64>> [#uses=2]
-	%1 = bitcast x86_mmx %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
-	%tmp555 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
-	%2 = bitcast <2 x i32> %tmp555 to x86_mmx		; <<1 x i64>> [#uses=1]
-	%3 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
-        store <1 x i64> %sum.0.reg2mem.0, <1 x i64>* null
-        %tmp3 = bitcast x86_mmx %2 to <1 x i64>
-	%tmp558 = add <1 x i64> %sum.0.reg2mem.0, %tmp3		; <<1 x i64>> [#uses=1]
-        %tmp5 = bitcast <1 x i64> %tmp558 to x86_mmx
-	%4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %tmp5, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
-        %tmp6 = bitcast x86_mmx %4 to <1 x i64>
-        %tmp7 = bitcast x86_mmx %3 to <1 x i64>
-	%tmp562 = add <1 x i64> %tmp6, %tmp7		; <<1 x i64>> [#uses=1]
-	br label %bb554
-}
diff --git a/test/CodeGen/X86/mmx-vzmovl.ll b/test/CodeGen/X86/mmx-vzmovl.ll
deleted file mode 100644
index 191e261f616f..000000000000
--- a/test/CodeGen/X86/mmx-vzmovl.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep movq | count 2
-; There are no MMX operations here; this is promoted to XMM.
-
-define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
-entry:
-	%0 = load <1 x i64>* %a, align 8		; <<1 x i64>> [#uses=1]
-	%1 = bitcast <1 x i64> %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
-	%2 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
-	%3 = bitcast <2 x i32> %2 to <1 x i64>		; <<1 x i64>> [#uses=1]
-	store <1 x i64> %3, <1 x i64>* %b, align 8
-	br label %bb2
-
-bb2:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/X86/movmsk.ll b/test/CodeGen/X86/movmsk.ll
index 2368548bfa86..928ad037c1ce 100644
--- a/test/CodeGen/X86/movmsk.ll
+++ b/test/CodeGen/X86/movmsk.ll
@@ -78,6 +78,22 @@ entry:
   ret i32 %shr.i
 }
 
+; PR11570
+define void @float_call_signbit(double %n) {
+entry:
+; FIXME: This should also use movmskps; we don't form the FGETSIGN node
+; in this case, though.
+; CHECK: float_call_signbit:
+; CHECK: movd %xmm0, %rdi
+; FIXME
+  %t0 = bitcast double %n to i64
+  %tobool.i.i.i.i = icmp slt i64 %t0, 0
+  tail call void @float_call_signbit_callee(i1 zeroext %tobool.i.i.i.i)
+  ret void
+}
+declare void @float_call_signbit_callee(i1 zeroext)
+
+
 ; rdar://10247336
 ; movmskp{s|d} only set low 4/2 bits, high bits are known zero
 
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 51a06112aada..4f7e28ace3cd 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,6 +1,10 @@
 ; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
 ; rdar://7236213
 
+; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
+; The code isn't any worse though.
+; XFAIL: *
+
 ; CodeGen shouldn't require any lea instructions inside the marked loop.
 ; It should properly set up post-increment uses and do coalescing for
 ; the induction variables.
diff --git a/test/CodeGen/X86/nancvt.ll b/test/CodeGen/X86/nancvt.ll
index 82b73319ad14..8036710b225a 100644
--- a/test/CodeGen/X86/nancvt.ll
+++ b/test/CodeGen/X86/nancvt.ll
@@ -52,8 +52,8 @@ bb:		; preds = %bb23
 	%tmp17 = ashr i64 %tmp16, %.cast		; <i64> [#uses=1]
 	%tmp1718 = trunc i64 %tmp17 to i32		; <i32> [#uses=1]
 	%tmp19 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp1718, i32* @var
-	volatile store i32 %tmp13, i32* @var
+	store volatile i32 %tmp1718, i32* @var
+	store volatile i32 %tmp13, i32* @var
 	%tmp21 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp22 = add i32 %tmp21, 1		; <i32> [#uses=1]
 	store i32 %tmp22, i32* %i, align 4
@@ -86,7 +86,7 @@ bb28:		; preds = %bb46
 	%tmp3940 = bitcast float* %tmp39 to i32*		; <i32*> [#uses=1]
 	%tmp41 = load i32* %tmp3940, align 4		; <i32> [#uses=1]
 	%tmp42 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp41, i32* @var
+	store volatile i32 %tmp41, i32* @var
 	%tmp44 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp45 = add i32 %tmp44, 1		; <i32> [#uses=1]
 	store i32 %tmp45, i32* %i, align 4
@@ -127,8 +127,8 @@ bb52:		; preds = %bb78
 	%tmp72 = ashr i64 %tmp70, %.cast71		; <i64> [#uses=1]
 	%tmp7273 = trunc i64 %tmp72 to i32		; <i32> [#uses=1]
 	%tmp74 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp7273, i32* @var
-	volatile store i32 %tmp66, i32* @var
+	store volatile i32 %tmp7273, i32* @var
+	store volatile i32 %tmp66, i32* @var
 	%tmp76 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp77 = add i32 %tmp76, 1		; <i32> [#uses=1]
 	store i32 %tmp77, i32* %i, align 4
@@ -161,7 +161,7 @@ bb84:		; preds = %bb101
 	%tmp9495 = bitcast float* %tmp94 to i32*		; <i32*> [#uses=1]
 	%tmp96 = load i32* %tmp9495, align 4		; <i32> [#uses=1]
 	%tmp97 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
-	volatile store i32 %tmp96, i32* @var
+	store volatile i32 %tmp96, i32* @var
 	%tmp99 = load i32* %i, align 4		; <i32> [#uses=1]
 	%tmp100 = add i32 %tmp99, 1		; <i32> [#uses=1]
 	store i32 %tmp100, i32* %i, align 4
diff --git a/test/CodeGen/X86/narrow-shl-load.ll b/test/CodeGen/X86/narrow-shl-load.ll
index ef27cbc3418c..7822453add4f 100644
--- a/test/CodeGen/X86/narrow-shl-load.ll
+++ b/test/CodeGen/X86/narrow-shl-load.ll
@@ -67,7 +67,7 @@ declare void @exit(i32) noreturn
 ; DAG Combiner can't fold this into a load of the 1'th byte.
 ; PR8757
 define i32 @test3(i32 *%P) nounwind ssp {
-  volatile store i32 128, i32* %P
+  store volatile i32 128, i32* %P
   %tmp4.pre = load i32* %P
   %phitmp = trunc i32 %tmp4.pre to i16
   %phitmp13 = shl i16 %phitmp, 8
diff --git a/test/CodeGen/X86/negate-add-zero.ll b/test/CodeGen/X86/negate-add-zero.ll
index c3f412e09ae8..92850f22eaa5 100644
--- a/test/CodeGen/X86/negate-add-zero.ll
+++ b/test/CodeGen/X86/negate-add-zero.ll
@@ -486,10 +486,6 @@ declare void @_ZN7CDSListIP9HingeNodeEC1Eii(%"struct.CDSList<HingeNode*>"*, i32,
 
 declare i8* @_Znwm(i32)
 
-declare i8* @llvm.eh.exception() nounwind
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
-
 declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
 
 declare void @_ZdlPv(i8*) nounwind
diff --git a/test/CodeGen/X86/no-cfi.ll b/test/CodeGen/X86/no-cfi.ll
index f9985d458512..5bb9bb2d4f67 100644
--- a/test/CodeGen/X86/no-cfi.ll
+++ b/test/CodeGen/X86/no-cfi.ll
@@ -24,15 +24,11 @@ invoke.cont:
   ret void
 
 lpad:
-  %exn = call i8* @llvm.eh.exception() nounwind
-  %eh.selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null) nounwind
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
   ret void
 }
 
 declare i32 @foo()
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
 declare i32 @__gxx_personality_v0(...)
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
index 1d095359b61c..ae04435ac39c 100644
--- a/test/CodeGen/X86/nontemporal.ll
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -3,13 +3,16 @@
 define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
 ; CHECK: movntps
   %cast = bitcast i8* %B to <4 x float>*
-  store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0
+  %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
+  store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0
 ; CHECK: movntdq
   %cast1 = bitcast i8* %B to <2 x i64>*
-  store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0
+  %E2 = add <2 x i64> %E, <i64 1, i64 2>
+  store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0
 ; CHECK: movntpd
   %cast2 = bitcast i8* %B to <2 x double>*
-  store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0
+  %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
+  store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
 ; CHECK: movnti
   %cast3 = bitcast i8* %B to i32*
   store i32 %D, i32* %cast3, align 16, !nontemporal !0
diff --git a/test/CodeGen/X86/null-streamer.ll b/test/CodeGen/X86/null-streamer.ll
new file mode 100644
index 000000000000..7c0e82f08f93
--- /dev/null
+++ b/test/CodeGen/X86/null-streamer.ll
@@ -0,0 +1,11 @@
+; Check the MCNullStreamer operates correctly, at least on a minimal test case.
+;
+; RUN: llc -filetype=null -o %t -march=x86 %s
+
+define void @f0()  {
+  ret void
+}
+
+define void @f1() {
+  ret void
+}
diff --git a/test/CodeGen/X86/objc-gc-module-flags.ll b/test/CodeGen/X86/objc-gc-module-flags.ll
new file mode 100644
index 000000000000..8cb2c036a4f7
--- /dev/null
+++ b/test/CodeGen/X86/objc-gc-module-flags.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; CHECK:        .section  __DATA,__objc_imageinfo,regular,no_dead_strip
+; CHECK-NEXT: L_OBJC_IMAGE_INFO:
+; CHECK-NEXT:   .long  0
+; CHECK-NEXT:   .long  2
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 1, metadata !"Objective-C Garbage Collection", i32 2}
diff --git a/test/CodeGen/X86/object-size.ll b/test/CodeGen/X86/object-size.ll
index 0493edc8d090..8f1eabde7423 100644
--- a/test/CodeGen/X86/object-size.ll
+++ b/test/CodeGen/X86/object-size.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -regalloc=linearscan < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; RUN: llc -O0 < %s -march=x86-64 | FileCheck %s -check-prefix=X64
 
 ; ModuleID = 'ts.c'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -12,8 +12,8 @@ entry:
   %tmp = load i8** @p                             ; <i8*> [#uses=1]
   %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i1 0) ; <i64> [#uses=1]
   %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
-; X64: movabsq $-1, %rax
-; X64: cmpq    $-1, %rax
+; X64: movabsq $-1, [[RAX:%r..]]
+; X64: cmpq    $-1, [[RAX]]
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
diff --git a/test/CodeGen/X86/odr_comdat.ll b/test/CodeGen/X86/odr_comdat.ll
new file mode 100644
index 000000000000..547334c045a3
--- /dev/null
+++ b/test/CodeGen/X86/odr_comdat.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=X86LINUX
+
+; Checking that a comdat group gets generated correctly for a static member 
+; of instantiated C++ templates.
+; see http://sourcery.mentor.com/public/cxx-abi/abi.html#vague-itemplate
+; section 5.2.6 Instantiated templates
+; "Any static member data object is emitted in a COMDAT identified by its mangled 
+;  name, in any object file with a reference to its name symbol."
+
+; Case 1: variable is not explicitly initialized, and ends up in a .bss section
+; X86LINUX:   .section        .bss._ZN1CIiE1iE,"aGw",@nobits,_ZN1CIiE1iE,comdat
+@_ZN1CIiE1iE = weak_odr global i32 0, align 4
+
+; Case 2: variable is explicitly initialized, and ends up in a .data section
+; X86LINUX:   .section        .data._ZN1CIiE1jE,"aGw",@progbits,_ZN1CIiE1jE,comdat
+@_ZN1CIiE1jE = weak_odr global i32 12, align 4
diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll
index e42aa9d6244c..d092916ac6fc 100644
--- a/test/CodeGen/X86/optimize-max-3.ll
+++ b/test/CodeGen/X86/optimize-max-3.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
 
 ; LSR's OptimizeMax should eliminate the select (max).
 
diff --git a/test/CodeGen/X86/overlap-shift.ll b/test/CodeGen/X86/overlap-shift.ll
index c1fc041e7d9b..d185af16b90b 100644
--- a/test/CodeGen/X86/overlap-shift.ll
+++ b/test/CodeGen/X86/overlap-shift.ll
@@ -13,7 +13,7 @@
 
 define i32 @test1(i32 %X) {
         %Z = shl i32 %X, 2              ; <i32> [#uses=1]
-        volatile store i32 %Z, i32* @G
+        store volatile i32 %Z, i32* @G
         ret i32 %X
 }
 
diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
index 528c4bcc74df..a3799807b384 100644
--- a/test/CodeGen/X86/peep-test-3.ll
+++ b/test/CodeGen/X86/peep-test-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -post-RA-scheduler=false | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -post-RA-scheduler=false | FileCheck %s
 ; rdar://7226797
 
 ; LLVM should omit the testl and use the flags result from the orl.
diff --git a/test/CodeGen/X86/peep-vector-extract-insert.ll b/test/CodeGen/X86/peep-vector-extract-insert.ll
index 5e18044e7e1b..d48a3318262c 100644
--- a/test/CodeGen/X86/peep-vector-extract-insert.ll
+++ b/test/CodeGen/X86/peep-vector-extract-insert.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
+; RUN: llc < %s -march=x86-64 | grep {xorps	%xmm0, %xmm0} | count 2
 
 define float @foo(<4 x float> %a) {
   %b = insertelement <4 x float> %a, float 0.0, i32 3
diff --git a/test/CodeGen/X86/personality_size.ll b/test/CodeGen/X86/personality_size.ll
new file mode 100644
index 000000000000..30a5d39e4afc
--- /dev/null
+++ b/test/CodeGen/X86/personality_size.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -relocation-model=pic -disable-cfi -mtriple=x86_64-pc-solaris2.11 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -relocation-model=pic -disable-cfi -mtriple=i386-pc-solaris2.11 -disable-cgp-branch-opts | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+  invoke void @_Z1gv()
+          to label %return unwind label %unwind
+
+unwind:                                           ; preds = %entry
+  %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
+  ret void
+
+return:                                           ; preds = %eh_then, %entry
+  ret void
+}
+
+declare void @_Z1gv()
+
+declare i32 @__gxx_personality_v0(...)
+
+; X64:      .size	DW.ref.__gxx_personality_v0, 8
+; X64:      .quad	__gxx_personality_v0
+
+; X32:      .size	DW.ref.__gxx_personality_v0, 4
+; X32:      .long	__gxx_personality_v0
+
diff --git a/test/CodeGen/X86/phaddsub.ll b/test/CodeGen/X86/phaddsub.ll
new file mode 100644
index 000000000000..62d85f7ee7c7
--- /dev/null
+++ b/test/CodeGen/X86/phaddsub.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s -march=x86-64 -mattr=+ssse3,-avx | FileCheck %s -check-prefix=SSSE3
+; RUN: llc < %s -march=x86-64 -mattr=-ssse3,+avx | FileCheck %s -check-prefix=AVX
+
+; SSSE3: phaddw1:
+; SSSE3-NOT: vphaddw
+; SSSE3: phaddw
+; AVX: phaddw1:
+; AVX: vphaddw
+define <8 x i16> @phaddw1(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %r = add <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phaddw2:
+; SSSE3-NOT: vphaddw
+; SSSE3: phaddw
+; AVX: phaddw2:
+; AVX: vphaddw
+define <8 x i16> @phaddw2(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 2, i32 5, i32 6, i32 9, i32 10, i32 13, i32 14>
+  %b = shufflevector <8 x i16> %y, <8 x i16> %x, <8 x i32> <i32 8, i32 11, i32 12, i32 15, i32 0, i32 3, i32 4, i32 7>
+  %r = add <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phaddd1:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd1:
+; AVX: vphaddd
+define <4 x i32> @phaddd1(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd2:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd2:
+; AVX: vphaddd
+define <4 x i32> @phaddd2(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 2, i32 5, i32 6>
+  %b = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 4, i32 7, i32 0, i32 3>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd3:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd3:
+; AVX: vphaddd
+define <4 x i32> @phaddd3(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd4:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd4:
+; AVX: vphaddd
+define <4 x i32> @phaddd4(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd5:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd5:
+; AVX: vphaddd
+define <4 x i32> @phaddd5(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd6:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd6:
+; AVX: vphaddd
+define <4 x i32> @phaddd6(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phaddd7:
+; SSSE3-NOT: vphaddd
+; SSSE3: phaddd
+; AVX: phaddd7:
+; AVX: vphaddd
+define <4 x i32> @phaddd7(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 undef, i32 undef>
+  %r = add <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubw1:
+; SSSE3-NOT: vphsubw
+; SSSE3: phsubw
+; AVX: phsubw1:
+; AVX: vphsubw
+define <8 x i16> @phsubw1(<8 x i16> %x, <8 x i16> %y) {
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %r = sub <8 x i16> %a, %b
+  ret <8 x i16> %r
+}
+
+; SSSE3: phsubd1:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd1:
+; AVX: vphsubd
+define <4 x i32> @phsubd1(<4 x i32> %x, <4 x i32> %y) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd2:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd2:
+; AVX: vphsubd
+define <4 x i32> @phsubd2(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 undef, i32 3, i32 5, i32 7>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd3:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd3:
+; AVX: vphsubd
+define <4 x i32> @phsubd3(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
+
+; SSSE3: phsubd4:
+; SSSE3-NOT: vphsubd
+; SSSE3: phsubd
+; AVX: phsubd4:
+; AVX: vphsubd
+define <4 x i32> @phsubd4(<4 x i32> %x) {
+  %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+  %b = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %r = sub <4 x i32> %a, %b
+  ret <4 x i32> %r
+}
diff --git a/test/CodeGen/X86/pic.ll b/test/CodeGen/X86/pic.ll
index fb60ac2a60b9..fc0630991c9c 100644
--- a/test/CodeGen/X86/pic.ll
+++ b/test/CodeGen/X86/pic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mcpu=generic -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
 
 @ptr = external global i32* 
 @dst = external global i32 
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
new file mode 100644
index 000000000000..cc1df2fffcc5
--- /dev/null
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -0,0 +1,138 @@
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 | FileCheck %s
+; RUN: opt -instsimplify %s -disable-output
+
+;CHECK: SHUFF0
+define <8 x i32*> @SHUFF0(<4 x i32*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i32*> %ptrv, <4 x i32*> %ptrv, <8 x i32> <i32 2, i32 7, i32 1, i32 2, i32 4, i32 5, i32 1, i32 1>
+;CHECK: pshufd
+  ret <8 x i32*> %G
+;CHECK: ret
+}
+
+;CHECK: SHUFF1
+define <4 x i32*> @SHUFF1(<4 x i32*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i32*> %ptrv, <4 x i32*> %ptrv, <4 x i32> <i32 2, i32 7, i32 7, i32 2>
+;CHECK: pshufd
+  ret <4 x i32*> %G
+;CHECK: ret
+}
+
+;CHECK: SHUFF3
+define <4 x i8*> @SHUFF3(<4 x i8*> %ptrv) nounwind {
+entry:
+  %G = shufflevector <4 x i8*> %ptrv, <4 x i8*> undef, <4 x i32> <i32 2, i32 7, i32 1, i32 2>
+;CHECK: pshufd
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD0
+define <4 x i8*> @LOAD0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movaps
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD1
+define <4 x i8*> @LOAD1(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movdqa
+;CHECK: pshufd
+;CHECK: movdqa
+  %T = shufflevector <4 x i8*> %G, <4 x i8*> %G, <4 x i32> <i32 7, i32 1, i32 4, i32 3>
+  store <4 x i8*> %T, <4 x i8*>* %p
+  ret <4 x i8*> %G
+;CHECK: ret
+}
+
+;CHECK: LOAD2
+define <4 x i8*> @LOAD2(<4 x i8*>* %p) nounwind {
+entry:
+  %I = alloca <4 x i8*>
+;CHECK: sub
+  %G = load <4 x i8*>* %p
+;CHECK: movaps
+  store <4 x i8*> %G, <4 x i8*>* %I
+;CHECK: movaps
+  %Z = load <4 x i8*>* %I
+  ret <4 x i8*> %Z
+;CHECK: add
+;CHECK: ret
+}
+
+;CHECK: INT2PTR0
+define <4 x i32> @INT2PTR0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movl
+;CHECK: movaps
+  %K = ptrtoint <4 x i8*> %G to <4 x i32>
+;CHECK: ret
+  ret <4 x i32> %K
+}
+
+;CHECK: INT2PTR1
+define <4 x i32*> @INT2PTR1(<4 x i8>* %p) nounwind {
+entry:
+  %G = load <4 x i8>* %p
+;CHECK: movl
+;CHECK: movd
+;CHECK: pshufb
+;CHECK: pand
+  %K = inttoptr <4 x i8> %G to <4 x i32*>
+;CHECK: ret
+  ret <4 x i32*> %K
+}
+
+;CHECK: BITCAST0
+define <4 x i32*> @BITCAST0(<4 x i8*>* %p) nounwind {
+entry:
+  %G = load <4 x i8*>* %p
+;CHECK: movl
+  %T = bitcast <4 x i8*> %G to <4 x i32*>
+;CHECK: movaps
+;CHECK: ret
+  ret <4 x i32*> %T
+}
+
+;CHECK: BITCAST1
+define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
+entry:
+  %G = load <2 x i8*>* %p
+;CHECK: movl
+;CHECK: movd
+;CHECK: pinsrd
+  %T = bitcast <2 x i8*> %G to <2 x i32*>
+;CHECK: ret
+  ret <2 x i32*> %T
+}
+
+;CHECK: ICMP0
+define <4 x i32> @ICMP0(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
+entry:
+  %g0 = load <4 x i8*>* %p0
+  %g1 = load <4 x i8*>* %p1
+  %k = icmp sgt <4 x i8*> %g0, %g1
+  ;CHECK: pcmpgtd
+  %j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
+  ret <4 x i32> %j
+  ;CHECK: ret
+}
+
+;CHECK: ICMP1
+define <4 x i32> @ICMP1(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
+entry:
+  %g0 = load <4 x i8*>* %p0
+  %g1 = load <4 x i8*>* %p1
+  %k = icmp eq <4 x i8*> %g0, %g1
+  ;CHECK: pcmpeqd
+  %j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
+  ret <4 x i32> %j
+  ;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/pr11202.ll b/test/CodeGen/X86/pr11202.ll
new file mode 100644
index 000000000000..13070d1c600e
--- /dev/null
+++ b/test/CodeGen/X86/pr11202.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+@bb = constant [1 x i8*] [i8* blockaddress(@main, %l2)]
+
+define void @main() {
+entry:
+  br label %l1
+
+l1:                                               ; preds = %l2, %entry
+  %a = zext i1 false to i32
+  br label %l2
+
+l2:                                               ; preds = %l1
+  %b = zext i1 false to i32
+  br label %l1
+}
+
+; CHECK: .Ltmp0:                                 # Address of block that was removed by CodeGen
+; CHECK: .quad	.Ltmp0
diff --git a/test/CodeGen/X86/pr11415.ll b/test/CodeGen/X86/pr11415.ll
new file mode 100644
index 000000000000..e1fa0326b762
--- /dev/null
+++ b/test/CodeGen/X86/pr11415.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple=x86_64-pc-linux %s -o - -regalloc=fast | FileCheck %s
+
+; We used to consider the early clobber in the second asm statement as
+; defining %0 before it was read. This caused us to omit the
+; movq	-8(%rsp), %rdx
+
+; CHECK: 	#APP
+; CHECK-NEXT:	#NO_APP
+; CHECK-NEXT:	movq	%rcx, %rax
+; CHECK-NEXT:	movq	%rax, -8(%rsp)
+; CHECK-NEXT:	movq	-8(%rsp), %rdx
+; CHECK-NEXT:	#APP
+; CHECK-NEXT:	#NO_APP
+; CHECK-NEXT:	movq	%rdx, %rax
+; CHECK-NEXT:	movq	%rdx, -8(%rsp)
+; CHECK-NEXT:	ret
+
+define i64 @foo() {
+entry:
+  %0 = tail call i64 asm "", "={cx}"() nounwind
+  %1 = tail call i64 asm "", "=&r,0,r,~{rax}"(i64 %0, i64 %0) nounwind
+  ret i64 %1
+}
diff --git a/test/CodeGen/X86/pr12360.ll b/test/CodeGen/X86/pr12360.ll
new file mode 100644
index 000000000000..f29e50e29a3b
--- /dev/null
+++ b/test/CodeGen/X86/pr12360.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define zeroext i1 @f1(i8* %x) {
+; CHECK: f1:
+; CHECK: movb	(%rdi), %al
+; CHECK-NEXT: ret
+
+entry:
+  %0 = load i8* %x, align 1, !range !0
+  %tobool = trunc i8 %0 to i1
+  ret i1 %tobool
+}
+
+define zeroext i1 @f2(i8* %x) {
+; CHECK: f2:
+; CHECK: movb	(%rdi), %al
+; CHECK-NEXT: ret
+
+entry:
+  %0 = load i8* %x, align 1, !range !0
+  %tobool = icmp ne i8 %0, 0
+  ret i1 %tobool
+}
+
+!0 = metadata !{i8 0, i8 2}
+
+
+; check that we don't build a "trunc" from i1 to i1, which would assert.
+define zeroext i1 @f3(i1 %x) {
+; CHECK: f3:
+
+entry:
+  %tobool = icmp ne i1 %x, 0
+  ret i1 %tobool
+}
+
+; check that we don't build a trunc when other bits are needed
+define zeroext i1 @f4(i32 %x) {
+; CHECK: f4:
+; CHECK: and
+
+entry:
+  %y = and i32 %x, 32768
+  %z = icmp ne i32 %y, 0
+  ret i1 %z
+}
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index 945ec4c6b621..9b0ef83ab042 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -33,7 +33,7 @@ declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 define i32 @main() {
 entry:
 ; CHECK: flds
-	%tmp6 = volatile load float* @a		; <float> [#uses=1]
+	%tmp6 = load volatile float* @a		; <float> [#uses=1]
 ; CHECK: fstps (%esp)
 ; CHECK: tanf
 	%tmp9 = tail call float @tanf( float %tmp6 )		; <float> [#uses=1]
@@ -41,7 +41,7 @@ entry:
 ; CHECK: fstp
 
 ; CHECK: fldl
-	%tmp12 = volatile load double* @b		; <double> [#uses=1]
+	%tmp12 = load volatile double* @b		; <double> [#uses=1]
 ; CHECK: fstpl (%esp)
 ; CHECK: tan
 	%tmp13 = tail call double @tan( double %tmp12 )		; <double> [#uses=1]
diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
index 2a8bb358014d..02a36054d88c 100644
--- a/test/CodeGen/X86/pr2182.ll
+++ b/test/CodeGen/X86/pr2182.ll
@@ -15,17 +15,17 @@ define void @loop_2() nounwind  {
 ; CHECK-NEXT: addl $3, (%{{.*}})
 ; CHECK-NEXT: ret
 
-  %tmp = volatile load i32* @x, align 4           ; <i32> [#uses=1]
+  %tmp = load volatile i32* @x, align 4           ; <i32> [#uses=1]
   %tmp1 = add i32 %tmp, 3         ; <i32> [#uses=1]
-  volatile store i32 %tmp1, i32* @x, align 4
-  %tmp.1 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1, i32* @x, align 4
+  %tmp.1 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.1 = add i32 %tmp.1, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.1, i32* @x, align 4
-  %tmp.2 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1.1, i32* @x, align 4
+  %tmp.2 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.2 = add i32 %tmp.2, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.2, i32* @x, align 4
-  %tmp.3 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  store volatile i32 %tmp1.2, i32* @x, align 4
+  %tmp.3 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.3 = add i32 %tmp.3, 3             ; <i32> [#uses=1]
-  volatile store i32 %tmp1.3, i32* @x, align 4
+  store volatile i32 %tmp1.3, i32* @x, align 4
   ret void
 }
diff --git a/test/CodeGen/X86/pr3495-2.ll b/test/CodeGen/X86/pr3495-2.ll
deleted file mode 100644
index a4204e528930..000000000000
--- a/test/CodeGen/X86/pr3495-2.ll
+++ /dev/null
@@ -1,54 +0,0 @@
-; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats -regalloc=linearscan |& grep {Number of loads added} | grep 1
-; PR3495
-;
-; This test may not be testing what it was supposed to test.
-; It used to have two spills and four reloads, but not it only has one spill and one reload.
-
-target datalayout = "e-p:32:32:32"
-target triple = "i386-apple-darwin9.6"
-	%struct.constraintVCGType = type { i32, i32, i32, i32 }
-	%struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 }
-
-define fastcc void @SCC_DFSBelowVCG(%struct.nodeVCGType* %VCG, i32 %net, i32 %label) nounwind {
-entry:
-	%0 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 5		; <i32*> [#uses=2]
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
-	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
-	br i1 %2, label %bb5, label %bb.nph3
-
-bb.nph3:		; preds = %entry
-	%3 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 4		; <%struct.constraintVCGType**> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb3, %bb.nph3
-	%s.02 = phi i32 [ 0, %bb.nph3 ], [ %12, %bb3 ]		; <i32> [#uses=2]
-	%4 = load %struct.constraintVCGType** %3, align 4		; <%struct.constraintVCGType*> [#uses=1]
-	%5 = icmp eq i32 0, 0		; <i1> [#uses=1]
-	br i1 %5, label %bb1, label %bb3
-
-bb1:		; preds = %bb
-	%6 = getelementptr %struct.constraintVCGType* %4, i32 %s.02, i32 0		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=2]
-	%8 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 7		; <i32*> [#uses=1]
-	%9 = load i32* %8, align 4		; <i32> [#uses=1]
-	%10 = icmp eq i32 %9, 0		; <i1> [#uses=1]
-	br i1 %10, label %bb2, label %bb3
-
-bb2:		; preds = %bb1
-	%11 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 4		; <%struct.constraintVCGType**> [#uses=0]
-	br label %bb.i
-
-bb.i:		; preds = %bb.i, %bb2
-	br label %bb.i
-
-bb3:		; preds = %bb1, %bb
-	%12 = add i32 %s.02, 1		; <i32> [#uses=2]
-	%13 = load i32* %0, align 4		; <i32> [#uses=1]
-	%14 = icmp ugt i32 %13, %12		; <i1> [#uses=1]
-	br i1 %14, label %bb, label %bb5
-
-bb5:		; preds = %bb3, %entry
-	%15 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 6		; <i32*> [#uses=1]
-	store i32 %label, i32* %15, align 4
-	ret void
-}
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
deleted file mode 100644
index 7efd35b8b6d0..000000000000
--- a/test/CodeGen/X86/pr3495.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of loads added} | grep 2
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of spill slots allocated} | grep 1
-; RUN: llc < %s -march=x86 -stats -regalloc=linearscan -enable-lsr-nested |& grep {Number of machine instrs printed} | grep 34
-; PR3495
-;
-; Note: this should not spill at all with either good LSR or good regalloc.
-
-target triple = "i386-pc-linux-gnu"
-@x = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=1]
-@rows = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=2]
-@up = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=2]
-@down = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=1]
-
-define i32 @queens(i32 %c) nounwind {
-entry:
-	%tmp91 = add i32 %c, 1		; <i32> [#uses=3]
-	%tmp135 = getelementptr [8 x i32]* @x, i32 0, i32 %tmp91		; <i32*> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb569, %entry
-	%r25.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %indvar.next715, %bb569 ]		; <i32> [#uses=4]
-	%tmp27 = getelementptr [8 x i32]* @rows, i32 0, i32 %r25.0.reg2mem.0		; <i32*> [#uses=1]
-	%tmp28 = load i32* %tmp27, align 4		; <i32> [#uses=1]
-	%tmp29 = icmp eq i32 %tmp28, 0		; <i1> [#uses=1]
-	br i1 %tmp29, label %bb569, label %bb31
-
-bb31:		; preds = %bb
-	%tmp35 = sub i32 %r25.0.reg2mem.0, 0		; <i32> [#uses=1]
-	%tmp36 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp35		; <i32*> [#uses=1]
-	%tmp37 = load i32* %tmp36, align 4		; <i32> [#uses=1]
-	%tmp38 = icmp eq i32 %tmp37, 0		; <i1> [#uses=1]
-	br i1 %tmp38, label %bb569, label %bb41
-
-bb41:		; preds = %bb31
-	%tmp54 = sub i32 %r25.0.reg2mem.0, %c		; <i32> [#uses=1]
-	%tmp55 = add i32 %tmp54, 7		; <i32> [#uses=1]
-	%tmp62 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp55		; <i32*> [#uses=2]
-	store i32 0, i32* %tmp62, align 4
-	br label %bb92
-
-bb92:		; preds = %bb545, %bb41
-	%r20.0.reg2mem.0 = phi i32 [ 0, %bb41 ], [ %indvar.next711, %bb545 ]		; <i32> [#uses=5]
-	%tmp94 = getelementptr [8 x i32]* @rows, i32 0, i32 %r20.0.reg2mem.0		; <i32*> [#uses=1]
-	%tmp95 = load i32* %tmp94, align 4		; <i32> [#uses=0]
-	%tmp112 = add i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=1]
-	%tmp113 = getelementptr [15 x i32]* @down, i32 0, i32 %tmp112		; <i32*> [#uses=2]
-	%tmp114 = load i32* %tmp113, align 4		; <i32> [#uses=1]
-	%tmp115 = icmp eq i32 %tmp114, 0		; <i1> [#uses=1]
-	br i1 %tmp115, label %bb545, label %bb118
-
-bb118:		; preds = %bb92
-	%tmp122 = sub i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=0]
-	store i32 0, i32* %tmp113, align 4
-	store i32 %r20.0.reg2mem.0, i32* %tmp135, align 4
-	br label %bb142
-
-bb142:		; preds = %bb142, %bb118
-	%k18.0.reg2mem.0 = phi i32 [ 0, %bb118 ], [ %indvar.next709, %bb142 ]		; <i32> [#uses=1]
-	%indvar.next709 = add i32 %k18.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond710 = icmp eq i32 %indvar.next709, 8		; <i1> [#uses=1]
-	br i1 %exitcond710, label %bb155, label %bb142
-
-bb155:		; preds = %bb142
-	%tmp156 = tail call i32 @putchar(i32 10) nounwind		; <i32> [#uses=0]
-	br label %bb545
-
-bb545:		; preds = %bb155, %bb92
-	%indvar.next711 = add i32 %r20.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond712 = icmp eq i32 %indvar.next711, 8		; <i1> [#uses=1]
-	br i1 %exitcond712, label %bb553, label %bb92
-
-bb553:		; preds = %bb545
-	store i32 1, i32* %tmp62, align 4
-	br label %bb569
-
-bb569:		; preds = %bb553, %bb31, %bb
-	%indvar.next715 = add i32 %r25.0.reg2mem.0, 1		; <i32> [#uses=1]
-	br label %bb
-}
-
-declare i32 @putchar(i32)
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index ebe11a5e8e4a..ec2f302b1499 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -1,4 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
+
+; rdar://10538297
 
 define void @t(i8* %ptr) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/promote.ll b/test/CodeGen/X86/promote.ll
new file mode 100644
index 000000000000..8b30dc718b08
--- /dev/null
+++ b/test/CodeGen/X86/promote.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i8:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+; CHECK: mul_f
+define i32 @mul_f(<4 x i8>* %A) {
+entry:
+; CHECK: pmul
+; CHECK-NOT: mulb
+  %0 = load <4 x i8>* %A, align 8
+  %mul = mul <4 x i8> %0, %0
+  store <4 x i8> %mul, <4 x i8>* undef
+  ret i32 0
+; CHECK: ret
+}
+
+
+; CHECK: shuff_f
+define i32 @shuff_f(<4 x i8>* %A) {
+entry:
+; CHECK: pshufb
+; CHECK: paddd
+; CHECK: pshufb
+  %0 = load <4 x i8>* %A, align 8
+  %add = add <4 x i8> %0, %0
+  store <4 x i8> %add, <4 x i8>* undef
+  ret i32 0
+; CHECK: ret
+}
+
+; CHECK: bitcast_widen
+define <2 x float> @bitcast_widen(<4 x i32> %in) nounwind readnone {
+entry:
+; CHECK-NOT: pshufd
+ %x = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+ %y = bitcast <2 x i32> %x to <2 x float>
+ ret <2 x float> %y
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/rd-mod-wr-eflags.ll b/test/CodeGen/X86/rd-mod-wr-eflags.ll
new file mode 100644
index 000000000000..faca3d7bacdb
--- /dev/null
+++ b/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -0,0 +1,179 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+%struct.obj = type { i64 }
+
+; CHECK: _Z7releaseP3obj
+define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp {
+entry:
+; CHECK: decq	(%{{rdi|rcx}})
+; CHECK-NEXT: je
+  %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0
+  %0 = load i64* %refcnt, align 8, !tbaa !0
+  %dec = add i64 %0, -1
+  store i64 %dec, i64* %refcnt, align 8, !tbaa !0
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %if.end, label %return
+
+if.end:                                           ; preds = %entry
+  %1 = bitcast %struct.obj* %o to i8*
+  tail call void @free(i8* %1)
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  ret void
+}
+
+@c = common global i64 0, align 8
+@a = common global i32 0, align 4
+@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1
+@b = common global i32 0, align 4
+
+; CHECK: test
+define i32 @test() nounwind uwtable ssp {
+entry:
+; CHECK: decq
+; CHECK-NOT: decq
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %dec.i, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+; CHECK: test2
+define i32 @test2() nounwind uwtable ssp {
+entry:
+; CHECK-NOT: decq ({{.*}})
+%0 = load i64* @c, align 8, !tbaa !0
+%dec.i = add nsw i64 %0, -1
+store i64 %dec.i, i64* @c, align 8, !tbaa !0
+%tobool.i = icmp ne i64 %0, 0
+%lor.ext.i = zext i1 %tobool.i to i32
+store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3
+%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind
+ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @free(i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"long", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
+
+%struct.obj2 = type { i64, i32, i16, i8 }
+
+declare void @other(%struct.obj2* ) nounwind;
+
+; CHECK: example_dec
+define void @example_dec(%struct.obj2* %o) nounwind uwtable ssp {
+; 64 bit dec
+entry:
+  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+; CHECK-NOT: load 
+  %0 = load i64* %s64, align 8
+; CHECK: decq ({{.*}})
+  %dec = add i64 %0, -1
+  store i64 %dec, i64* %s64, align 8
+  %tobool = icmp eq i64 %dec, 0
+  br i1 %tobool, label %if.end, label %return
+
+; 32 bit dec
+if.end:
+  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+; CHECK-NOT: load 
+  %1 = load i32* %s32, align 4
+; CHECK: decl {{[0-9][0-9]*}}({{.*}})
+  %dec1 = add i32 %1, -1
+  store i32 %dec1, i32* %s32, align 4
+  %tobool2 = icmp eq i32 %dec1, 0
+  br i1 %tobool2, label %if.end1, label %return
+
+; 16 bit dec
+if.end1:
+  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+; CHECK-NOT: load 
+  %2 = load i16* %s16, align 2
+; CHECK: decw {{[0-9][0-9]*}}({{.*}})
+  %dec2 = add i16 %2, -1
+  store i16 %dec2, i16* %s16, align 2
+  %tobool3 = icmp eq i16 %dec2, 0
+  br i1 %tobool3, label %if.end2, label %return
+
+; 8 bit dec
+if.end2:
+  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+; CHECK-NOT: load 
+  %3 = load i8* %s8
+; CHECK: decb {{[0-9][0-9]*}}({{.*}})
+  %dec3 = add i8 %3, -1
+  store i8 %dec3, i8* %s8
+  %tobool4 = icmp eq i8 %dec3, 0
+  br i1 %tobool4, label %if.end4, label %return
+
+if.end4:
+  tail call void @other(%struct.obj2* %o) nounwind
+  br label %return
+
+return:                                           ; preds = %if.end4, %if.end, %entry                                                                                                                                                                               
+  ret void
+}
+
+; CHECK: example_inc
+define void @example_inc(%struct.obj2* %o) nounwind uwtable ssp {
+; 64 bit inc
+entry:
+  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0
+; CHECK-NOT: load 
+  %0 = load i64* %s64, align 8
+; CHECK: incq ({{.*}})
+  %inc = add i64 %0, 1
+  store i64 %inc, i64* %s64, align 8
+  %tobool = icmp eq i64 %inc, 0
+  br i1 %tobool, label %if.end, label %return
+
+; 32 bit inc
+if.end:
+  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1
+; CHECK-NOT: load 
+  %1 = load i32* %s32, align 4
+; CHECK: incl {{[0-9][0-9]*}}({{.*}})
+  %inc1 = add i32 %1, 1
+  store i32 %inc1, i32* %s32, align 4
+  %tobool2 = icmp eq i32 %inc1, 0
+  br i1 %tobool2, label %if.end1, label %return
+
+; 16 bit inc
+if.end1:
+  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2
+; CHECK-NOT: load 
+  %2 = load i16* %s16, align 2
+; CHECK: incw {{[0-9][0-9]*}}({{.*}})
+  %inc2 = add i16 %2, 1
+  store i16 %inc2, i16* %s16, align 2
+  %tobool3 = icmp eq i16 %inc2, 0
+  br i1 %tobool3, label %if.end2, label %return
+
+; 8 bit inc
+if.end2:
+  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3
+; CHECK-NOT: load 
+  %3 = load i8* %s8
+; CHECK: incb {{[0-9][0-9]*}}({{.*}})
+  %inc3 = add i8 %3, 1
+  store i8 %inc3, i8* %s8
+  %tobool4 = icmp eq i8 %inc3, 0
+  br i1 %tobool4, label %if.end4, label %return
+
+if.end4:
+  tail call void @other(%struct.obj2* %o) nounwind
+  br label %return
+
+return:
+  ret void
+}
diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll
index d93697123596..d99a7a4bc4ad 100644
--- a/test/CodeGen/X86/red-zone.ll
+++ b/test/CodeGen/X86/red-zone.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 
 ; First without noredzone.
 ; CHECK: f0:
diff --git a/test/CodeGen/X86/red-zone2.ll b/test/CodeGen/X86/red-zone2.ll
index 9557d17150ec..f09216319e8d 100644
--- a/test/CodeGen/X86/red-zone2.ll
+++ b/test/CodeGen/X86/red-zone2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 > %t
+; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
 ; RUN: grep subq %t | count 1
 ; RUN: grep addq %t | count 1
 
diff --git a/test/CodeGen/X86/reghinting.ll b/test/CodeGen/X86/reghinting.ll
index 87f65ed6247a..6759115639b2 100644
--- a/test/CodeGen/X86/reghinting.ll
+++ b/test/CodeGen/X86/reghinting.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-macosx | FileCheck %s
 ; PR10221
 
 ;; The registers %x and %y must both spill across the finit call.
diff --git a/test/CodeGen/X86/remat-scalar-zero.ll b/test/CodeGen/X86/remat-scalar-zero.ll
index f6f0ed10b514..75f438d26cd0 100644
--- a/test/CodeGen/X86/remat-scalar-zero.ll
+++ b/test/CodeGen/X86/remat-scalar-zero.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
 ; RUN: not grep xor %t
 ; RUN: not grep movap %t
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
new file mode 100644
index 000000000000..0dd74ea0791e
--- /dev/null
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
+
+define float @test1(float %x) nounwind  {
+  %call = tail call float @floorf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test1:
+; CHECK-SSE: roundss $1
+
+; CHECK-AVX: test1:
+; CHECK-AVX: vroundss $1
+}
+
+declare float @floorf(float) nounwind readnone
+
+define double @test2(double %x) nounwind  {
+  %call = tail call double @floor(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test2:
+; CHECK-SSE: roundsd $1
+
+; CHECK-AVX: test2:
+; CHECK-AVX: vroundsd $1
+}
+
+declare double @floor(double) nounwind readnone
+
+define float @test3(float %x) nounwind  {
+  %call = tail call float @nearbyintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test3:
+; CHECK-SSE: roundss $12
+
+; CHECK-AVX: test3:
+; CHECK-AVX: vroundss $12
+}
+
+declare float @nearbyintf(float) nounwind readnone
+
+define double @test4(double %x) nounwind  {
+  %call = tail call double @nearbyint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test4:
+; CHECK-SSE: roundsd $12
+
+; CHECK-AVX: test4:
+; CHECK-AVX: vroundsd $12
+}
+
+declare double @nearbyint(double) nounwind readnone
+
+define float @test5(float %x) nounwind  {
+  %call = tail call float @ceilf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test5:
+; CHECK-SSE: roundss $2
+
+; CHECK-AVX: test5:
+; CHECK-AVX: vroundss $2
+}
+
+declare float @ceilf(float) nounwind readnone
+
+define double @test6(double %x) nounwind  {
+  %call = tail call double @ceil(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test6:
+; CHECK-SSE: roundsd $2
+
+; CHECK-AVX: test6:
+; CHECK-AVX: vroundsd $2
+}
+
+declare double @ceil(double) nounwind readnone
+
+define float @test7(float %x) nounwind  {
+  %call = tail call float @rintf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test7:
+; CHECK-SSE: roundss $4
+
+; CHECK-AVX: test7:
+; CHECK-AVX: vroundss $4
+}
+
+declare float @rintf(float) nounwind readnone
+
+define double @test8(double %x) nounwind  {
+  %call = tail call double @rint(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test8:
+; CHECK-SSE: roundsd $4
+
+; CHECK-AVX: test8:
+; CHECK-AVX: vroundsd $4
+}
+
+declare double @rint(double) nounwind readnone
+
+define float @test9(float %x) nounwind  {
+  %call = tail call float @truncf(float %x) nounwind readnone
+  ret float %call
+
+; CHECK-SSE: test9:
+; CHECK-SSE: roundss $3
+
+; CHECK-AVX: test9:
+; CHECK-AVX: vroundss $3
+}
+
+declare float @truncf(float) nounwind readnone
+
+define double @test10(double %x) nounwind  {
+  %call = tail call double @trunc(double %x) nounwind readnone
+  ret double %call
+
+; CHECK-SSE: test10:
+; CHECK-SSE: roundsd $3
+
+; CHECK-AVX: test10:
+; CHECK-AVX: vroundsd $3
+}
+
+declare double @trunc(double) nounwind readnone
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll
index adc58ac34b9e..e99ea9356a64 100644
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -3,9 +3,10 @@
 ; Verify when widening a divide/remainder operation, we only generate a
 ; divide/rem per element since divide/remainder can trap.
 
+; CHECK: vectorDiv
 define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
-; CHECK: idivl
-; CHECK: idivl
+; CHECK: idivq
+; CHECK: idivq
 ; CHECK-NOT: idivl
 ; CHECK: ret
 entry:
@@ -32,6 +33,7 @@ entry:
   ret void
 }
 
+; CHECK: test_char_div
 define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
 ; CHECK: idivb
 ; CHECK: idivb
@@ -42,6 +44,7 @@ define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
   ret <3 x i8>  %div.r
 }
 
+; CHECK: test_uchar_div
 define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
 ; CHECK: divb
 ; CHECK: divb
@@ -52,6 +55,7 @@ define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
   ret <3 x i8>  %div.r
 }
 
+; CHECK: test_short_div
 define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
 ; CHECK: idivw
 ; CHECK: idivw
@@ -64,17 +68,19 @@ define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
   ret <5 x i16>  %div.r
 }
 
+; CHECK: test_ushort_div
 define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
-; CHECK: divw
-; CHECK: divw
-; CHECK: divw
-; CHECK: divw
-; CHECK-NOT: divw
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK-NOT: divl
 ; CHECK: ret
   %div.r = udiv <4 x i16> %num, %div
   ret <4 x i16>  %div.r
 }
 
+; CHECK: test_uint_div
 define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
 ; CHECK: divl
 ; CHECK: divl
@@ -85,6 +91,7 @@ define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
   ret <3 x i32>  %div.r
 }
 
+; CHECK: test_long_div
 define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
 ; CHECK: idivq
 ; CHECK: idivq
@@ -95,6 +102,7 @@ define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
   ret <3 x i64>  %div.r
 }
 
+; CHECK: test_ulong_div
 define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
 ; CHECK: divq
 ; CHECK: divq
@@ -105,18 +113,19 @@ define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
   ret <3 x i64>  %div.r
 }
 
-
+; CHECK: test_char_rem
 define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
-; CHECK: idivb
-; CHECK: idivb
-; CHECK: idivb
-; CHECK: idivb
-; CHECK-NOT: idivb
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
 ; CHECK: ret
   %rem.r = srem <4 x i8> %num, %rem
   ret <4 x i8>  %rem.r
 }
 
+; CHECK: test_short_rem
 define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
 ; CHECK: idivw
 ; CHECK: idivw
@@ -129,6 +138,7 @@ define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
   ret <5 x i16>  %rem.r
 }
 
+; CHECK: test_uint_rem
 define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
 ; CHECK: idivl
 ; CHECK: idivl
@@ -141,6 +151,7 @@ define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
 }
 
 
+; CHECK: test_ulong_rem
 define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
 ; CHECK: divq
 ; CHECK: divq
@@ -153,6 +164,7 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
   ret <5 x i64>  %rem.r
 }
 
+; CHECK: test_int_div
 define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
 ; CHECK: idivl
 ; CHECK: idivl
diff --git a/test/CodeGen/X86/segmented-stacks-dynamic.ll b/test/CodeGen/X86/segmented-stacks-dynamic.ll
new file mode 100644
index 000000000000..5ce08aa51c76
--- /dev/null
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+
+; Just to prevent the alloca from being optimized away
+declare void @dummy_use(i32*, i32)
+
+define i32 @test_basic(i32 %l) {
+        %mem = alloca i32, i32 %l
+        call void @dummy_use (i32* %mem, i32 %l)
+        %terminate = icmp eq i32 %l, 0
+        br i1 %terminate, label %true, label %false
+
+true:
+        ret i32 0
+
+false:
+        %newlen = sub i32 %l, 1
+        %retvalue = call i32 @test_basic(i32 %newlen)
+        ret i32 %retvalue
+
+; X32:      test_basic:
+
+; X32:      cmpl %gs:48, %esp
+; X32-NEXT: ja      .LBB0_2
+
+; X32:      pushl $4
+; X32-NEXT: pushl $12
+; X32-NEXT: calll __morestack
+; X32-NEXT: ret
+
+; X32:      movl %esp, %eax
+; X32-NEXT: subl %ecx, %eax
+; X32-NEXT: cmpl %eax, %gs:48
+
+; X32:      movl %eax, %esp
+
+; X32:      subl $12, %esp
+; X32-NEXT: pushl %ecx
+; X32-NEXT: calll __morestack_allocate_stack_space
+; X32-NEXT: addl $16, %esp
+
+; X64:      test_basic:
+
+; X64:      cmpq %fs:112, %rsp
+; X64-NEXT: ja      .LBB0_2
+
+; X64:      movabsq $24, %r10
+; X64-NEXT: movabsq $0, %r11
+; X64-NEXT: callq __morestack
+; X64-NEXT: ret
+
+; X64:      movq %rsp, %rdi
+; X64-NEXT: subq %rax, %rdi
+; X64-NEXT: cmpq %rdi, %fs:112
+
+; X64:      movq %rdi, %rsp
+
+; X64:      movq %rax, %rdi
+; X64-NEXT: callq __morestack_allocate_stack_space
+; X64-NEXT: movq %rax, %rdi
+
+}
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index ecdb00d5d1ef..5407b87418f3 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -1,60 +1,97 @@
-; RUN: llc < %s -mtriple=i686-linux -segmented-stacks | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux  -segmented-stacks | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-FreeBSD
+
+; We used to crash with filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -segmented-stacks -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-freebsd -segmented-stacks -filetype=obj
+
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-solaris -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-Solaris
+; RUN: not llc < %s -mcpu=generic -mtriple=x86_64-mingw32 -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X64-MinGW
+; RUN: not llc < %s -mcpu=generic -mtriple=i686-freebsd -segmented-stacks 2> %t.log
+; RUN: FileCheck %s -input-file=%t.log -check-prefix=X32-FreeBSD
+
+; X64-Solaris: Segmented stacks not supported on this platform
+; X64-MinGW: Segmented stacks not supported on this platform
+; X32-FreeBSD: Segmented stacks not supported on FreeBSD i386
 
 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
 
-define i32 @test_basic(i32 %l) {
-        %mem = alloca i32, i32 %l
-        call void @dummy_use (i32* %mem, i32 %l)
-        %terminate = icmp eq i32 %l, 0
-        br i1 %terminate, label %true, label %false
+define void @test_basic() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+	ret void
+
+; X32-Linux:       test_basic:
+
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB0_2
 
-true:
-        ret i32 0
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $60
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
 
-false:
-        %newlen = sub i32 %l, 1
-        %retvalue = call i32 @test_basic(i32 %newlen)
-        ret i32 %retvalue
+; X64-Linux:       test_basic:
 
-; X32:      test_basic:
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB0_2
 
-; X32:      leal -12(%esp), %ecx
-; X32-NEXT: cmpl %gs:48, %ecx
+; X64-Linux:       movabsq $40, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
 
-; X32:      subl $8, %esp
-; X32-NEXT: pushl $4
-; X32-NEXT: pushl $12
-; X32-NEXT: calll __morestack
-; X32-NEXT: addl $8, %esp
-; X32-NEXT: ret 
+; X32-Darwin:      test_basic:
 
-; X32:      movl %eax, %esp
+; X32-Darwin:      movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp
+; X32-Darwin-NEXT: ja      LBB0_2
 
-; X32:      subl $12, %esp
-; X32-NEXT: pushl %ecx
-; X32-NEXT: calll __morestack_allocate_stack_space
-; X32-NEXT: addl $16, %esp
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
-; X64:      test_basic:
+; X64-Darwin:      test_basic:
 
-; X64:      leaq -24(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB0_2
 
-; X64:      movabsq $24, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Darwin:      movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
 
-; X64:      movq %rsp, %rax
-; X64-NEXT: subq %rcx, %rax
-; X64-NEXT: cmpq %rax, %fs:112
+; X32-MinGW:       test_basic:
 
-; X64:      movq %rax, %rsp
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB0_2
 
-; X64:      movq %rcx, %rdi
-; X64-NEXT: callq __morestack_allocate_stack_space
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $48
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_basic:
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB0_2
+
+; X64-FreeBSD:       movabsq $40, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
 
 }
 
@@ -63,25 +100,286 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
        %result = add i32 %other, %addend
        ret i32 %result
 
-; X32:      leal (%esp), %edx
-; X32-NEXT: cmpl %gs:48, %edx
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB1_2
+
+; X32-Linux:       pushl $4
+; X32-Linux-NEXT:  pushl $0
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB1_2
+
+; X64-Linux:       movq %r10, %rax
+; X64-Linux-NEXT:  movabsq $0, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+; X64-Linux-NEXT:  movq %rax, %r10
+
+; X32-Darwin:      movl $432, %edx
+; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
+; X32-Darwin-NEXT: ja      LBB1_2
+
+; X32-Darwin:      pushl $4
+; X32-Darwin-NEXT: pushl $0
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB1_2
+
+; X64-Darwin:      movq %r10, %rax
+; X64-Darwin-NEXT: movabsq $0, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+; X64-Darwin-NEXT: movq %rax, %r10
+
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB1_2
+
+; X32-MinGW:       pushl $4
+; X32-MinGW-NEXT:  pushl $0
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB1_2
+
+; X64-FreeBSD:       movq %r10, %rax
+; X64-FreeBSD-NEXT:  movabsq $0, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+; X64-FreeBSD-NEXT:  movq %rax, %r10
+
+}
+
+define void @test_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; X32-Linux:       leal -40012(%esp), %ecx
+; X32-Linux-NEXT:  cmpl %gs:48, %ecx
+; X32-Linux-NEXT:  ja      .LBB2_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $40012
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       leaq -40008(%rsp), %r11
+; X64-Linux-NEXT:  cmpq %fs:112, %r11
+; X64-Linux-NEXT:  ja      .LBB2_2
+
+; X64-Linux:       movabsq $40008, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      leal -40012(%esp), %ecx
+; X32-Darwin-NEXT: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
+; X32-Darwin-NEXT: ja      LBB2_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja      LBB2_2
+
+; X64-Darwin:      movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       leal -40008(%esp), %ecx
+; X32-MinGW-NEXT:  cmpl %fs:20, %ecx
+; X32-MinGW-NEXT:  ja      LBB2_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $40008
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       leaq -40008(%rsp), %r11
+; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
+; X64-FreeBSD-NEXT:  ja      .LBB2_2
+
+; X64-FreeBSD:       movabsq $40008, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc() {
+        %mem = alloca i32, i32 10
+        call void @dummy_use (i32* %mem, i32 10)
+        ret void
+
+; X32-Linux:       test_fastcc:
+
+; X32-Linux:       cmpl %gs:48, %esp
+; X32-Linux-NEXT:  ja      .LBB3_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $60
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       test_fastcc:
+
+; X64-Linux:       cmpq %fs:112, %rsp
+; X64-Linux-NEXT:  ja      .LBB3_2
+
+; X64-Linux:       movabsq $40, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      test_fastcc:
+
+; X32-Darwin:      movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp
+; X32-Darwin-NEXT: ja      LBB3_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      test_fastcc:
+
+; X64-Darwin:      cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja      LBB3_2
+
+; X64-Darwin:      movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       test_fastcc:
+
+; X32-MinGW:       cmpl %fs:20, %esp
+; X32-MinGW-NEXT:  ja      LBB3_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $48
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_fastcc:
+
+; X64-FreeBSD:       cmpq %fs:24, %rsp
+; X64-FreeBSD-NEXT:  ja      .LBB3_2
+
+; X64-FreeBSD:       movabsq $40, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc_large() {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 0)
+        ret void
+
+; X32-Linux:       test_fastcc_large:
+
+; X32-Linux:       leal -40012(%esp), %eax
+; X32-Linux-NEXT:  cmpl %gs:48, %eax
+; X32-Linux-NEXT:  ja      .LBB4_2
+
+; X32-Linux:       pushl $0
+; X32-Linux-NEXT:  pushl $40012
+; X32-Linux-NEXT:  calll __morestack
+; X32-Linux-NEXT:  ret
+
+; X64-Linux:       test_fastcc_large:
+
+; X64-Linux:       leaq -40008(%rsp), %r11
+; X64-Linux-NEXT:  cmpq %fs:112, %r11
+; X64-Linux-NEXT:  ja      .LBB4_2
+
+; X64-Linux:       movabsq $40008, %r10
+; X64-Linux-NEXT:  movabsq $0, %r11
+; X64-Linux-NEXT:  callq __morestack
+; X64-Linux-NEXT:  ret
+
+; X32-Darwin:      test_fastcc_large:
+
+; X32-Darwin:      leal -40012(%esp), %eax
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: ja      LBB4_2
+
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin:      test_fastcc_large:
+
+; X64-Darwin:      leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja      LBB4_2
+
+; X64-Darwin:      movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+; X32-MinGW:       test_fastcc_large:
+
+; X32-MinGW:       leal -40008(%esp), %eax
+; X32-MinGW-NEXT:  cmpl %fs:20, %eax
+; X32-MinGW-NEXT:  ja      LBB4_2
+
+; X32-MinGW:       pushl $0
+; X32-MinGW-NEXT:  pushl $40008
+; X32-MinGW-NEXT:  calll ___morestack
+; X32-MinGW-NEXT:  ret
+
+; X64-FreeBSD:       test_fastcc_large:
+
+; X64-FreeBSD:       leaq -40008(%rsp), %r11
+; X64-FreeBSD-NEXT:  cmpq %fs:24, %r11
+; X64-FreeBSD-NEXT:  ja      .LBB4_2
+
+; X64-FreeBSD:       movabsq $40008, %r10
+; X64-FreeBSD-NEXT:  movabsq $0, %r11
+; X64-FreeBSD-NEXT:  callq __morestack
+; X64-FreeBSD-NEXT:  ret
+
+}
+
+define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
+        %mem = alloca i32, i32 10000
+        call void @dummy_use (i32* %mem, i32 %a)
+        ret void
 
+; This is testing that the Mac implementation preserves ecx
 
-; X32:      subl $8, %esp
-; X32-NEXT: pushl $4
-; X32-NEXT: pushl $0
-; X32-NEXT: calll __morestack
-; X32-NEXT: addl $8, %esp
-; X32-NEXT: ret
+; X32-Darwin:      test_fastcc_large_with_ecx_arg:
 
-; X64:      leaq (%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
+; X32-Darwin:      leal -40012(%esp), %eax
+; X32-Darwin-NEXT: pushl %ecx
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: popl %ecx
+; X32-Darwin-NEXT: ja      LBB5_2
 
-; X64:      movq %r10, %rax
-; X64-NEXT: movabsq $0, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
-; X64:      movq %rax, %r10
+; X32-Darwin:      pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
 
 }
diff --git a/test/CodeGen/X86/sext-subreg.ll b/test/CodeGen/X86/sext-subreg.ll
index b2b9f8121fd6..a128af9950f0 100644
--- a/test/CodeGen/X86/sext-subreg.ll
+++ b/test/CodeGen/X86/sext-subreg.ll
@@ -8,10 +8,10 @@ define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
 ; CHECK: movl %eax
   %C = add i64 %A, %B
   %D = trunc i64 %C to i32
-  volatile store i32 %D, i32* %P
+  store volatile i32 %D, i32* %P
   %E = shl i64 %C, 32
   %F = ashr i64 %E, 32  
-  volatile store i64 %F, i64 *%P2
-  volatile store i32 %D, i32* %P
+  store volatile i64 %F, i64 *%P2
+  store volatile i32 %D, i32* %P
   ret i64 undef
 }
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index fd278c2239f0..b747cc5580ca 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86    | grep and | count 2
 ; RUN: llc < %s -march=x86-64 | not grep and 
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
@@ -7,9 +7,15 @@ define i32 @t1(i32 %t, i32 %val) nounwind {
        ret i32 %res
 }
 
+define i32 @t2(i32 %t, i32 %val) nounwind {
+       %shamt = and i32 %t, 63
+       %res = shl i32 %val, %shamt
+       ret i32 %res
+}
+
 @X = internal global i16 0
 
-define void @t2(i16 %t) nounwind {
+define void @t3(i16 %t) nounwind {
        %shamt = and i16 %t, 31
        %tmp = load i16* @X
        %tmp1 = ashr i16 %tmp, %shamt
@@ -17,8 +23,14 @@ define void @t2(i16 %t) nounwind {
        ret void
 }
 
-define i64 @t3(i64 %t, i64 %val) nounwind {
+define i64 @t4(i64 %t, i64 %val) nounwind {
        %shamt = and i64 %t, 63
        %res = lshr i64 %val, %shamt
        ret i64 %res
 }
+
+define i64 @t5(i64 %t, i64 %val) nounwind {
+       %shamt = and i64 %t, 191
+       %res = lshr i64 %val, %shamt
+       ret i64 %res
+}
diff --git a/test/CodeGen/X86/shift-combine.ll b/test/CodeGen/X86/shift-combine.ll
index e443ac19a80f..51f83036c23d 100644
--- a/test/CodeGen/X86/shift-combine.ll
+++ b/test/CodeGen/X86/shift-combine.ll
@@ -1,15 +1,19 @@
-; RUN: llc < %s | not grep shrl
+; RUN: llc -march=x86 < %s | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
-@array = weak global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+@array = weak global [4 x i32] zeroinitializer
+
+define i32 @test_lshr_and(i32 %x) {
+; CHECK: test_lshr_and:
+; CHECK-NOT: shrl
+; CHECK: andl $12,
+; CHECK: movl {{.*}}array{{.*}},
+; CHECK: ret
 
-define i32 @foo(i32 %x) {
 entry:
-	%tmp2 = lshr i32 %x, 2		; <i32> [#uses=1]
-	%tmp3 = and i32 %tmp2, 3		; <i32> [#uses=1]
-	%tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3		; <i32*> [#uses=1]
-	%tmp5 = load i32* %tmp4, align 4		; <i32> [#uses=1]
-	ret i32 %tmp5
+  %tmp2 = lshr i32 %x, 2
+  %tmp3 = and i32 %tmp2, 3
+  %tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3
+  %tmp5 = load i32* %tmp4, align 4
+  ret i32 %tmp5
 }
 
diff --git a/test/CodeGen/X86/shift-folding.ll b/test/CodeGen/X86/shift-folding.ll
index d9c3061ff687..3ea601147bb0 100644
--- a/test/CodeGen/X86/shift-folding.ll
+++ b/test/CodeGen/X86/shift-folding.ll
@@ -1,28 +1,70 @@
-; RUN: llc < %s -march=x86 | \
-; RUN:   grep {s\[ah\]\[rl\]l} | count 1
-
-define i32* @test1(i32* %P, i32 %X) nounwind {
-        %Y = lshr i32 %X, 2             ; <i32> [#uses=1]
-        %gep.upgrd.1 = zext i32 %Y to i64               ; <i64> [#uses=1]
-        %P2 = getelementptr i32* %P, i64 %gep.upgrd.1           ; <i32*> [#uses=1]
-        ret i32* %P2
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32* @test1(i32* %P, i32 %X) {
+; CHECK: test1:
+; CHECK-NOT: shrl
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = lshr i32 %X, 2
+  %gep.upgrd.1 = zext i32 %Y to i64
+  %P2 = getelementptr i32* %P, i64 %gep.upgrd.1
+  ret i32* %P2
 }
 
-define i32* @test2(i32* %P, i32 %X) nounwind {
-        %Y = shl i32 %X, 2              ; <i32> [#uses=1]
-        %gep.upgrd.2 = zext i32 %Y to i64               ; <i64> [#uses=1]
-        %P2 = getelementptr i32* %P, i64 %gep.upgrd.2           ; <i32*> [#uses=1]
-        ret i32* %P2
+define i32* @test2(i32* %P, i32 %X) {
+; CHECK: test2:
+; CHECK: shll $4
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = shl i32 %X, 2
+  %gep.upgrd.2 = zext i32 %Y to i64
+  %P2 = getelementptr i32* %P, i64 %gep.upgrd.2
+  ret i32* %P2
 }
 
-define i32* @test3(i32* %P, i32 %X) nounwind {
-        %Y = ashr i32 %X, 2             ; <i32> [#uses=1]
-        %P2 = getelementptr i32* %P, i32 %Y             ; <i32*> [#uses=1]
-        ret i32* %P2
+define i32* @test3(i32* %P, i32 %X) {
+; CHECK: test3:
+; CHECK-NOT: shrl
+; CHECK-NOT: shll
+; CHECK: ret
+
+entry:
+  %Y = ashr i32 %X, 2
+  %P2 = getelementptr i32* %P, i32 %Y
+  ret i32* %P2
 }
 
-define fastcc i32 @test4(i32* %d) nounwind {
+define fastcc i32 @test4(i32* %d) {
+; CHECK: test4:
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
   %tmp4 = load i32* %d
   %tmp512 = lshr i32 %tmp4, 24
   ret i32 %tmp512
 }
+
+define i64 @test5(i16 %i, i32* %arr) {
+; Ensure that we don't fold away shifts which have multiple uses, as they are
+; just re-introduced for the second use.
+; CHECK: test5:
+; CHECK-NOT: shrl
+; CHECK: shrl $11
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
+  %i.zext = zext i16 %i to i32
+  %index = lshr i32 %i.zext, 11
+  %index.zext = zext i32 %index to i64
+  %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+  %val = load i32* %val.ptr
+  %val.zext = zext i32 %val to i64
+  %sum = add i64 %val.zext, %index.zext
+  ret i64 %sum
+}
diff --git a/test/CodeGen/X86/shl-i64.ll b/test/CodeGen/X86/shl-i64.ll
new file mode 100644
index 000000000000..f00058a8a886
--- /dev/null
+++ b/test/CodeGen/X86/shl-i64.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 -mattr=+sse2 < %s | FileCheck %s
+
+; Make sure that we don't generate an illegal i64 extract after LegalizeType.
+; CHECK: shll
+
+
+define void @test_cl(<4 x i64>*  %dst, <4 x i64>* %src, i32 %idx) {
+entry:
+  %arrayidx = getelementptr inbounds <4 x i64> * %src, i32 %idx
+  %0 = load <4 x i64> * %arrayidx, align 32
+  %arrayidx1 = getelementptr inbounds <4 x i64> * %dst, i32 %idx
+  %1 = load <4 x i64> * %arrayidx1, align 32
+  %2 = extractelement <4 x i64> %1, i32 0
+  %and = and i64 %2, 63
+  %3 = insertelement <4 x i64> undef, i64 %and, i32 0    
+  %splat = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i64> %0, %splat
+  store <4 x i64> %shl, <4 x i64> * %arrayidx1, align 32
+  ret void
+}
diff --git a/test/CodeGen/X86/sibcall-5.ll b/test/CodeGen/X86/sibcall-5.ll
index 9d74121b4301..937817e45647 100644
--- a/test/CodeGen/X86/sibcall-5.ll
+++ b/test/CodeGen/X86/sibcall-5.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin8 -mattr=+sse2  | FileCheck %s --check-prefix=X32
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-sse3 | FileCheck %s --check-prefix=X64_BAD
 
 ; Sibcall optimization of expanded libcalls.
 ; rdar://8707777
@@ -29,3 +30,31 @@ entry:
 declare float @sinf(float) nounwind readonly
 
 declare double @sin(double) nounwind readonly
+
+; rdar://10930395
+%0 = type opaque
+
+@"\01L_OBJC_SELECTOR_REFERENCES_2" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+define hidden { double, double } @foo2(%0* %self, i8* nocapture %_cmd) uwtable optsize ssp {
+; X64_BAD: foo
+; X64_BAD: call
+; X64_BAD: call
+; X64_BAD: call
+  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2", align 8, !invariant.load !0
+  %2 = bitcast %0* %self to i8*
+  %3 = tail call { double, double } bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to { double, double } (i8*, i8*)*)(i8* %2, i8* %1) optsize
+  %4 = extractvalue { double, double } %3, 0
+  %5 = extractvalue { double, double } %3, 1
+  %6 = tail call double @floor(double %4) optsize
+  %7 = tail call double @floor(double %5) optsize
+  %insert.i.i = insertvalue { double, double } undef, double %6, 0
+  %insert5.i.i = insertvalue { double, double } %insert.i.i, double %7, 1
+  ret { double, double } %insert5.i.i
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare double @floor(double) optsize
+
+!0 = metadata !{}
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
index 2b13029896ee..81a072fb396a 100644
--- a/test/CodeGen/X86/splat-scalar-load.ll
+++ b/test/CodeGen/X86/splat-scalar-load.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
 ; rdar://7434544
 
-define <2 x i64> @t2() nounwind ssp {
+define <2 x i64> @t2() nounwind {
 entry:
 ; CHECK: t2:
 ; CHECK: pshufd	$85, (%esp), %xmm0
diff --git a/test/CodeGen/X86/sret.ll b/test/CodeGen/X86/sret.ll
deleted file mode 100644
index b9455300bdbb..000000000000
--- a/test/CodeGen/X86/sret.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llc < %s -march=x86 | grep ret | grep 4
-
-	%struct.foo = type { [4 x i32] }
-
-define void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
-entry:
-	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
-	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
-	store i32 1, i32* %tmp3, align 8
-        ret void
-}
-
-@dst = external global i32
-
-define void @foo() nounwind {
-	%memtmp = alloca %struct.foo, align 4
-        call void @bar( %struct.foo* sret %memtmp ) nounwind
-        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
-	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
-        %tmp6 = load i32* %tmp5
-        store i32 %tmp6, i32* @dst
-        ret void
-}
diff --git a/test/CodeGen/X86/sse-align-3.ll b/test/CodeGen/X86/sse-align-3.ll
index 04f216176c36..b6b0471e913f 100644
--- a/test/CodeGen/X86/sse-align-3.ll
+++ b/test/CodeGen/X86/sse-align-3.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
 ; CHECK-NOT:     movapd
 ; CHECK:     movaps
-; CHECK-NOT:     movaps
-; CHECK:     movapd
+; CHECK-NOT:     movapd
+; CHECK:     movaps
 ; CHECK-NOT:     movap
 
 define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
diff --git a/test/CodeGen/X86/sse-domains.ll b/test/CodeGen/X86/sse-domains.ll
new file mode 100644
index 000000000000..d1e07c856364
--- /dev/null
+++ b/test/CodeGen/X86/sse-domains.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7"
+
+; CHECK: f
+;
+; This function contains load / store / and operations that all can execute in
+; any domain.  The only domain-specific operation is the %add = shl... operation
+; which is <4 x i32>.
+;
+; The paddd instruction can only influence the other operations through the loop
+; back-edge. Check that everything is still moved into the integer domain.
+
+define void @f(<4 x i32>* nocapture %p, i32 %n) nounwind uwtable ssp {
+entry:
+  br label %while.body
+
+; Materialize a zeroinitializer and a constant-pool load in the integer domain.
+; The order is not important.
+; CHECK: pxor
+; CHECK: movdqa
+
+; The instructions in the loop must all be integer domain as well.
+; CHECK: while.body
+; CHECK: pand
+; CHECK: movdqa
+; CHECK: movdqa
+; Finally, the controlling integer-only instruction.
+; CHECK: paddd
+while.body:
+  %p.addr.04 = phi <4 x i32>* [ %incdec.ptr, %while.body ], [ %p, %entry ]
+  %n.addr.03 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+  %x.02 = phi <4 x i32> [ %add, %while.body ], [ zeroinitializer, %entry ]
+  %dec = add nsw i32 %n.addr.03, -1
+  %and = and <4 x i32> %x.02, <i32 127, i32 127, i32 127, i32 127>
+  %incdec.ptr = getelementptr inbounds <4 x i32>* %p.addr.04, i64 1
+  store <4 x i32> %and, <4 x i32>* %p.addr.04, align 16
+  %0 = load <4 x i32>* %incdec.ptr, align 16
+  %add = shl <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:
+  ret void
+}
+
+; CHECK: f2
+; CHECK: for.body
+;
+; This loop contains two cvtsi2ss instructions that update the same xmm
+; register.  Verify that the execution dependency fix pass breaks those
+; dependencies by inserting xorps instructions.
+;
+; If the register allocator chooses different registers for the two cvtsi2ss
+; instructions, they are still dependent on themselves.
+; CHECK: xorps [[XMM1:%xmm[0-9]+]]
+; CHECK: , [[XMM1]]
+; CHECK: cvtsi2ss %{{.*}}, [[XMM1]]
+; CHECK: xorps [[XMM2:%xmm[0-9]+]]
+; CHECK: , [[XMM2]]
+; CHECK: cvtsi2ss %{{.*}}, [[XMM2]]
+;
+define float @f2(i32 %m) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i32 %m, 0
+  br i1 %tobool3, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %m.addr.07 = phi i32 [ %dec, %for.body ], [ %m, %entry ]
+  %s1.06 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ]
+  %s2.05 = phi float [ %add2, %for.body ], [ 0.000000e+00, %entry ]
+  %n.04 = phi i32 [ %inc, %for.body ], [ 1, %entry ]
+  %conv = sitofp i32 %n.04 to float
+  %add = fadd float %s1.06, %conv
+  %conv1 = sitofp i32 %m.addr.07 to float
+  %add2 = fadd float %s2.05, %conv1
+  %inc = add nsw i32 %n.04, 1
+  %dec = add nsw i32 %m.addr.07, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %s1.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %s2.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add2, %for.body ]
+  %sub = fsub float %s1.0.lcssa, %s2.0.lcssa
+  ret float %sub
+}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index af1a73b8f12e..11124409f058 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -140,15 +140,15 @@ define double @ole_inverse(double %x, double %y) nounwind {
 }
 
 ; CHECK:      x_ogt:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ogt:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ogt:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ogt(double %x) nounwind {
@@ -158,15 +158,15 @@ define double @x_ogt(double %x) nounwind {
 }
 
 ; CHECK:      x_olt:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_olt:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_olt:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_olt(double %x) nounwind {
@@ -176,17 +176,17 @@ define double @x_olt(double %x) nounwind {
 }
 
 ; CHECK:      x_ogt_inverse:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ogt_inverse:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ogt_inverse:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -197,17 +197,17 @@ define double @x_ogt_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_olt_inverse:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_olt_inverse:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_olt_inverse:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -220,11 +220,11 @@ define double @x_olt_inverse(double %x) nounwind {
 ; CHECK:      x_oge:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_oge:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_oge:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_oge(double %x) nounwind {
@@ -236,11 +236,11 @@ define double @x_oge(double %x) nounwind {
 ; CHECK:      x_ole:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ole:
-; UNSAFE-NEXT: pxor %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ole:
-; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ole(double %x) nounwind {
@@ -252,12 +252,12 @@ define double @x_ole(double %x) nounwind {
 ; CHECK:      x_oge_inverse:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_oge_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_oge_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -270,12 +270,12 @@ define double @x_oge_inverse(double %x) nounwind {
 ; CHECK:      x_ole_inverse:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ole_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ole_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -414,11 +414,11 @@ define double @ule_inverse(double %x, double %y) nounwind {
 ; CHECK:      x_ugt:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ugt:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ugt:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ugt(double %x) nounwind {
@@ -430,11 +430,11 @@ define double @x_ugt(double %x) nounwind {
 ; CHECK:      x_ult:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_ult:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ult:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ult(double %x) nounwind {
@@ -446,12 +446,12 @@ define double @x_ult(double %x) nounwind {
 ; CHECK:      x_ugt_inverse:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      x_ugt_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ugt_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -464,12 +464,12 @@ define double @x_ugt_inverse(double %x) nounwind {
 ; CHECK:      x_ult_inverse:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      x_ult_inverse:
-; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ult_inverse:
-; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -480,16 +480,16 @@ define double @x_ult_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_uge:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: maxsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_uge:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_uge:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: maxsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_uge(double %x) nounwind {
@@ -499,16 +499,16 @@ define double @x_uge(double %x) nounwind {
 }
 
 ; CHECK:      x_ule:
-; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; CHECK-NEXT: minsd  %xmm0, %xmm1
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ule:
-; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ule:
-; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
 ; FINITE-NEXT: minsd  %xmm1, %xmm0
 ; FINITE-NEXT: ret
 define double @x_ule(double %x) nounwind {
@@ -518,16 +518,16 @@ define double @x_ule(double %x) nounwind {
 }
 
 ; CHECK:      x_uge_inverse:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_uge_inverse:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: minsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_uge_inverse:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: minsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
@@ -538,16 +538,16 @@ define double @x_uge_inverse(double %x) nounwind {
 }
 
 ; CHECK:      x_ule_inverse:
-; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      x_ule_inverse:
-; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; UNSAFE-NEXT: maxsd %xmm0, %xmm1
 ; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      x_ule_inverse:
-; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
 ; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; FINITE-NEXT: ret
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 56b099ec42e6..2f4317bf294c 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah -promote-elements -mattr=+sse2,-sse41 | FileCheck %s
-
-
-; currently (xor v4i32) is defined as illegal, so we scalarize the code.
+; RUN: llc < %s -march=x86 -mcpu=yonah -mattr=+sse2,-sse41 | FileCheck %s
 
+; CHECK: vsel_float
+; CHECK: pandn
+; CHECK: pand
+; CHECK: por
+; CHECK: ret
 define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
   %A = load <4 x float>* %v1
   %B = load <4 x float>* %v2
@@ -11,8 +13,11 @@ define void@vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
   ret void
 }
 
-; currently (xor v4i32) is defined as illegal, so we scalarize the code.
-
+; CHECK: vsel_i32
+; CHECK: pandn
+; CHECK: pand
+; CHECK: por
+; CHECK: ret
 define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
   %A = load <4 x i32>* %v1
   %B = load <4 x i32>* %v2
@@ -21,9 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
   ret void
 }
 
+; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_i64
-; CHECK: pxor
-; CHECK: pand
+; CHECK: xorps
+; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
@@ -36,14 +42,14 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
   ret void
 }
 
+; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_double
-; CHECK: pxor
-; CHECK: pand
+; CHECK: xorps
+; CHECK: andps
 ; CHECK: andnps
 ; CHECK: orps
 ; CHECK: ret
 
-
 define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
   %A = load <4 x double>* %v1
   %B = load <4 x double>* %v2
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 70e0a8a177e0..36a0fd91bd87 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -98,7 +98,7 @@ define void @test7() nounwind {
         ret void
         
 ; CHECK: test7:
-; CHECK:	pxor	%xmm0, %xmm0
+; CHECK:	xorps	%xmm0, %xmm0
 ; CHECK:	movaps	%xmm0, 0
 }
 
@@ -144,7 +144,7 @@ define <2 x double> @test11(double %a, double %b) nounwind {
 	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
 	ret <2 x double> %tmp7
 ; CHECK: test11:
-; CHECK: movapd	4(%esp), %xmm0
+; CHECK: movaps	4(%esp), %xmm0
 }
 
 define void @test12() nounwind {
@@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
         %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
         ret <4 x float> %tmp27
 ; CHECK: test14:
-; CHECK: 	addps	[[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
-; CHECK: 	subps	[[X1]], [[X2:%xmm[0-9]+]]
+; CHECK: 	subps	[[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
+; CHECK: 	addps	[[X1]], [[X0:%xmm[0-9]+]]
 ; CHECK: 	movlhps	[[X2]], [[X0]]
 }
 
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 8b3a317ffb78..5ea1b4dff1c1 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -16,10 +16,8 @@ entry:
 	ret void
         
 ; X64: t0:
-; X64: 	movddup	(%rsi), %xmm0
-; X64:  pshuflw	$0, %xmm0, %xmm0
-; X64:	xorl	%eax, %eax
-; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	movdqa	(%rsi), %xmm0
+; X64:	pslldq	$2, %xmm0
 ; X64:	movdqa	%xmm0, (%rdi)
 ; X64:	ret
 }
@@ -31,9 +29,8 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 	ret <8 x i16> %tmp3
         
 ; X64: t1:
-; X64: 	movl	(%rsi), %eax
 ; X64: 	movdqa	(%rdi), %xmm0
-; X64: 	pinsrw	$0, %eax, %xmm0
+; X64: 	pinsrw	$0, (%rsi), %xmm0
 ; X64: 	ret
 }
 
@@ -167,12 +164,12 @@ define internal void @t10() nounwind {
         store <4 x i16> %6, <4 x i16>* @g2, align 8
         ret void
 ; X64: 	t10:
-; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %eax
-; X64: 		unpcklpd [[X1:%xmm[0-9]+]]
-; X64: 		pshuflw	$8, [[X1]], [[X2:%xmm[0-9]+]]
-; X64: 		pinsrw	$2, %eax, [[X2]]
+; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %ecx
 ; X64: 		pextrw	$6, [[X0]], %eax
-; X64: 		pinsrw	$3, %eax, [[X2]]
+; X64: 		movlhps [[X0]], [[X0]]
+; X64: 		pshuflw	$8, [[X0]], [[X0]]
+; X64: 		pinsrw	$2, %ecx, [[X0]]
+; X64: 		pinsrw	$3, %eax, [[X0]]
 }
 
 
@@ -229,7 +226,7 @@ entry:
 }
 
 
-
+; FIXME: t15 is worse off from disabling of scheduler 2-address hack.
 define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
 entry:
         %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
@@ -250,13 +247,11 @@ entry:
         %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
         ret <16 x i8> %tmp9
 ; X64: 	t16:
-; X64: 		pinsrw	$0, %eax, [[X1:%xmm[0-9]+]]
-; X64: 		pextrw	$8, [[X0:%xmm[0-9]+]], %eax
-; X64: 		pinsrw	$1, %eax, [[X1]]
-; X64: 		pextrw	$1, [[X1]], %ecx
-; X64: 		movd	[[X1]], %edx
-; X64: 		pinsrw	$0, %edx, %xmm
-; X64: 		pinsrw	$1, %eax, %xmm
+; X64: 		pextrw	$8, %xmm0, %eax
+; X64: 		pslldq	$2, %xmm0
+; X64: 		movd	%xmm0, %ecx
+; X64: 		pextrw	$1, %xmm0, %edx
+; X64: 		pinsrw	$0, %ecx, %xmm0
 ; X64: 		ret
 }
 
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 2ac4cb435a75..54264b16aea0 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -183,8 +183,8 @@ define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    insertps  $0, %xmm1, %xmm0        
 }
 
-define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_1:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -195,8 +195,8 @@ define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    sete	%al
 }
 
-define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_2:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -207,8 +207,8 @@ define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
 ; X64:    sbbl	%eax
 }
 
-define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
-        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone
         ret i32 %tmp1
 ; X32: _ptestz_3:
 ; X32:    ptest 	%xmm1, %xmm0
@@ -220,9 +220,9 @@ define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
 }
 
 
-declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
-declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
-declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
 
 ; This used to compile to insertps $0  + insertps $16.  insertps $0 is always
 ; pointless.
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index 793c0267124c..f6c13ec0adf7 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -11,13 +11,13 @@ define void @test({ double, double }* byval  %z, double* %P) nounwind {
 entry:
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
 	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
-        volatile store double %tmp4, double* %P
+        store volatile double %tmp4, double* %P
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp1 = volatile load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp1 = load volatile double* %tmp, align 8		; <double> [#uses=1]
 	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
     ; CHECK: andpd{{.*}}4(%esp), %xmm
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
-	volatile store double %tmp6, double* %P, align 8
+	store volatile double %tmp6, double* %P, align 8
 	ret void
 }
 
diff --git a/test/CodeGen/X86/stack-align2.ll b/test/CodeGen/X86/stack-align2.ll
new file mode 100644
index 000000000000..18cce7266d13
--- /dev/null
+++ b/test/CodeGen/X86/stack-align2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mcpu=generic -mtriple=i386-linux | FileCheck %s -check-prefix=LINUX-I386
+; RUN: llc < %s -mcpu=generic -mtriple=i386-netbsd | FileCheck %s -check-prefix=NETBSD-I386
+; RUN: llc < %s -mcpu=generic -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-I386
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX-X86_64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-netbsd | FileCheck %s -check-prefix=NETBSD-X86_64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin8 | FileCheck %s -check-prefix=DARWIN-X86_64
+
+define i32 @test() nounwind {
+entry:
+  call void @test2()
+  ret i32 0
+
+; LINUX-I386:     subl	$12, %esp
+; DARWIN-I386:    subl	$12, %esp
+; NETBSD-I386-NOT: subl	{{.*}}, %esp
+
+; LINUX-X86_64:      pushq %{{.*}}
+; LINUX-X86_64-NOT:  subq	{{.*}}, %rsp
+; DARWIN-X86_64:     pushq %{{.*}}
+; DARWIN-X86_64-NOT: subq	{{.*}}, %rsp
+; NETBSD-X86_64:     pushq %{{.*}}
+; NETBSD-X86_64-NOT: subq	{{.*}}, %rsp
+}
+
+declare void @test2()
diff --git a/test/CodeGen/X86/store-empty-member.ll b/test/CodeGen/X86/store-empty-member.ll
index 37f86c60fae5..aea85b94d414 100644
--- a/test/CodeGen/X86/store-empty-member.ll
+++ b/test/CodeGen/X86/store-empty-member.ll
@@ -9,6 +9,6 @@
 
 define void @foo() nounwind {
   %1 = alloca %testType
-  volatile store %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
+  store volatile %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
   ret void
 }
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 11686227ab9c..8313166a90cc 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL
 
 target datalayout = "e-p:32:32"
         %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
@@ -16,9 +17,14 @@ cond_true2732.preheader:                ; preds = %entry
         store i64 %tmp2676.us.us, i64* %tmp2666
         ret i32 0
 
-; CHECK: 	and	{{E..}}, DWORD PTR [360]
-; CHECK:	and	DWORD PTR [356], {{E..}}
-; CHECK:	mov	DWORD PTR [360], {{E..}}
+; INTEL: 	and	{{E..}}, DWORD PTR [360]
+; INTEL:	and	DWORD PTR [356], {{E..}}
+; FIXME:	mov	DWORD PTR [360], {{E..}}
+; The above line comes out as 'mov 360, EAX', but when the register is ECX it works?
+
+; ATT: 	andl	360, %{{e..}}
+; ATT:	andl	%{{e..}}, 356
+; ATT:	movl	%{{e..}}, 360
 
 }
 
diff --git a/test/CodeGen/X86/stride-reuse.ll b/test/CodeGen/X86/stride-reuse.ll
index 1251a2400555..81de22ca4e35 100644
--- a/test/CodeGen/X86/stride-reuse.ll
+++ b/test/CodeGen/X86/stride-reuse.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86            | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86            | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
 ; CHECK-NOT:     lea
 
 @B = external global [1000 x float], align 32
diff --git a/test/CodeGen/X86/sub-with-overflow.ll b/test/CodeGen/X86/sub-with-overflow.ll
index 4522e917d315..749b5db480f5 100644
--- a/test/CodeGen/X86/sub-with-overflow.ll
+++ b/test/CodeGen/X86/sub-with-overflow.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
diff --git a/test/CodeGen/X86/tail-dup-addr.ll b/test/CodeGen/X86/tail-dup-addr.ll
index c5a105cb587f..c68a8c6bf845 100644
--- a/test/CodeGen/X86/tail-dup-addr.ll
+++ b/test/CodeGen/X86/tail-dup-addr.ll
@@ -2,8 +2,8 @@
 
 ; Test that we don't drop a block that has its address taken.
 
+; CHECK: Ltmp0:                                  ## Block address taken
 ; CHECK: Ltmp1:                                  ## Block address taken
-; CHECK: Ltmp2:                                  ## Block address taken
 
 @a = common global i32 0, align 4
 @p = common global i8* null, align 8
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index d6c16ca0078e..f1b9f20082f5 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -314,7 +314,7 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
   unreachable
 
 bbx:
@@ -323,7 +323,7 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
   unreachable
 
 return:
@@ -352,8 +352,8 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 bbx:
@@ -362,8 +362,8 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 return:
@@ -390,8 +390,8 @@ bby:
   ]
 
 bb7:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 bbx:
@@ -400,8 +400,8 @@ bbx:
   ]
 
 bb12:
-  volatile store i32 0, i32* @XYZ
-  volatile store i32 1, i32* @XYZ
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
   unreachable
 
 return:
diff --git a/test/CodeGen/X86/tailcall-disable.ll b/test/CodeGen/X86/tailcall-disable.ll
new file mode 100644
index 000000000000..b628f5e537f0
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-disable.ll
@@ -0,0 +1,40 @@
+; RUN: llc -disable-tail-calls < %s | FileCheck --check-prefix=CALL %s
+; RUN: llc < %s | FileCheck --check-prefix=JMP %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @helper() nounwind {
+entry:
+  ret i32 7
+}
+
+define i32 @test1() nounwind {
+entry:
+  %call = tail call i32 @helper()
+  ret i32 %call
+}
+
+; CALL: test1:
+; CALL-NOT: ret
+; CALL: callq helper
+; CALL: ret
+
+; JMP: test1:
+; JMP-NOT: ret
+; JMP: jmp helper # TAILCALL
+
+define i32 @test2() nounwind {
+entry:
+  %call = tail call i32 @test2()
+  ret i32 %call
+}
+
+; CALL: test2:
+; CALL-NOT: ret
+; CALL: callq test2
+; CALL: ret
+
+; JMP: test2:
+; JMP-NOT: ret
+; JMP: jmp test2 # TAILCALL
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 7ecf379cd9c5..762160202c2d 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
 
 ; FIXME: Win64 does not support byval.
 
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index c18c7aa2d432..bff5f9924f66 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
-; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mcpu=generic -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
 
 ; FIXME: Redundant unused stack allocation could be eliminated.
 ; CHECK: subq  ${{24|72|80}}, %rsp
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
new file mode 100644
index 000000000000..a7be48355f69
--- /dev/null
+++ b/test/CodeGen/X86/thiscall-struct-return.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=i386-PC-Win32 | FileCheck %s
+
+%class.C = type { i8 }
+%struct.S = type { i32 }
+%struct.M = type { i32, i32 }
+
+declare void @_ZN1CC1Ev(%class.C* %this) unnamed_addr nounwind align 2
+declare x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* noalias sret %agg.result, %class.C* %this) nounwind align 2
+declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result, %class.C* %this) nounwind align 2
+
+define void @testv() nounwind {
+; CHECK: testv:
+; CHECK: leal
+; CHECK-NEXT: movl	%esi, (%esp)
+; CHECK-NEXT: calll _ZN1CC1Ev
+; CHECK: leal 8(%esp), %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: calll _ZNK1C5SmallEv
+entry:
+  %c = alloca %class.C, align 1
+  %tmp = alloca %struct.S, align 4
+  call void @_ZN1CC1Ev(%class.C* %c)
+  ; This call should put the return structure as a pointer
+  ; into EAX instead of returning directly in EAX.  The this
+  ; pointer should go into ECX
+  call x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* sret %tmp, %class.C* %c)
+  ret void
+}
+
+define void @test2v() nounwind {
+; CHECK: test2v:
+; CHECK: leal
+; CHECK-NEXT: movl	%esi, (%esp)
+; CHECK-NEXT: calll _ZN1CC1Ev
+; CHECK: leal 8(%esp), %eax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: calll _ZNK1C6MediumEv
+entry:
+  %c = alloca %class.C, align 1
+  %tmp = alloca %struct.M, align 4
+  call void @_ZN1CC1Ev(%class.C* %c)
+  ; This call should put the return structure as a pointer
+  ; into EAX instead of returning directly in EAX/EDX.  The this
+  ; pointer should go into ECX
+  call x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* sret %tmp, %class.C* %c)
+  ret void
+}
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
new file mode 100644
index 000000000000..e2e58a541a4c
--- /dev/null
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X64 %s
+
+@i = thread_local global i32 15
+@i2 = external thread_local global i32
+
+define i32 @f1() {
+; X32: f1:
+; X32:      movl %gs:i@NTPOFF, %eax
+; X32-NEXT: ret
+; X64: f1:
+; X64:      movl %fs:i@TPOFF, %eax
+; X64-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32: f2:
+; X32:      movl %gs:0, %eax
+; X32-NEXT: leal i@NTPOFF(%eax), %eax
+; X32-NEXT: ret
+; X64: f2:
+; X64:      movq %fs:0, %rax
+; X64-NEXT: leaq i@TPOFF(%rax), %rax
+; X64-NEXT: ret
+
+entry:
+	ret i32* @i
+}
+
+define i32 @f3() {
+; X32: f3:
+; X32:      movl i2@INDNTPOFF, %eax
+; X32-NEXT: movl %gs:(%eax), %eax
+; X32-NEXT: ret
+; X64: f3:
+; X64:      movq i2@GOTTPOFF(%rip), %rax
+; X64-NEXT: movl %fs:(%rax), %eax
+; X64-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i2
+	ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32: f4:
+; X32:      movl %gs:0, %eax
+; X32-NEXT: addl i2@INDNTPOFF, %eax
+; X32-NEXT: ret
+; X64: f4:
+; X64:      movq %fs:0, %rax
+; X64-NEXT: addq i2@GOTTPOFF(%rip), %rax
+; X64-NEXT: ret
+
+entry:
+	ret i32* @i2
+}
diff --git a/test/CodeGen/X86/tls.ll b/test/CodeGen/X86/tls.ll
new file mode 100644
index 000000000000..e8a79bfa6ee3
--- /dev/null
+++ b/test/CodeGen/X86/tls.ll
@@ -0,0 +1,329 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32_LINUX %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64_LINUX %s
+; RUN: llc < %s -march=x86 -mtriple=x86-pc-win32 | FileCheck -check-prefix=X32_WIN %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-win32 | FileCheck -check-prefix=X64_WIN %s
+
+@i1 = thread_local global i32 15
+@i2 = external thread_local global i32
+@i3 = internal thread_local global i32 15
+@i4 = hidden thread_local global i32 15
+@i5 = external hidden thread_local global i32
+@s1 = thread_local global i16 15
+@b1 = thread_local global i8 0
+
+define i32 @f1() {
+; X32_LINUX: f1:
+; X32_LINUX:      movl %gs:i1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f1:
+; X64_LINUX:      movl %fs:i1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f1:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f1:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i1
+	ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32_LINUX: f2:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i1@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f2:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i1@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f2:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f2:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i1@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i1
+}
+
+define i32 @f3() nounwind {
+; X32_LINUX: f3:
+; X32_LINUX:      movl i2@INDNTPOFF, %eax
+; X32_LINUX-NEXT: movl %gs:(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f3:
+; X64_LINUX:      movq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: movl %fs:(%rax), %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f3:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f3:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i2@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i2
+	ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32_LINUX: f4:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: addl i2@INDNTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f4:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: addq i2@GOTTPOFF(%rip), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f4:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i2@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f4:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i2@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i2
+}
+
+define i32 @f5() nounwind {
+; X32_LINUX: f5:
+; X32_LINUX:      movl %gs:i3@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f5:
+; X64_LINUX:      movl %fs:i3@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f5:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movl _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f5:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movl i3@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i3
+	ret i32 %tmp1
+}
+
+define i32* @f6() {
+; X32_LINUX: f6:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i3@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f6:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i3@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f6:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: leal _i3@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f6:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: leaq i3@SECREL(%rax), %rax
+; X64_WIN-NEXT: ret
+
+entry:
+	ret i32* @i3
+}
+
+define i32 @f7() {
+; X32_LINUX: f7:
+; X32_LINUX:      movl %gs:i4@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f7:
+; X64_LINUX:      movl %fs:i4@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i4
+	ret i32 %tmp1
+}
+
+define i32* @f8() {
+; X32_LINUX: f8:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i4@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f8:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i4@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+
+entry:
+	ret i32* @i4
+}
+
+define i32 @f9() {
+; X32_LINUX: f9:
+; X32_LINUX:      movl %gs:i5@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f9:
+; X64_LINUX:      movl %fs:i5@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+
+entry:
+	%tmp1 = load i32* @i5
+	ret i32 %tmp1
+}
+
+define i32* @f10() {
+; X32_LINUX: f10:
+; X32_LINUX:      movl %gs:0, %eax
+; X32_LINUX-NEXT: leal i5@NTPOFF(%eax), %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f10:
+; X64_LINUX:      movq %fs:0, %rax
+; X64_LINUX-NEXT: leaq i5@TPOFF(%rax), %rax
+; X64_LINUX-NEXT: ret
+
+entry:
+	ret i32* @i5
+}
+
+define i16 @f11() {
+; X32_LINUX: f11:
+; X32_LINUX:      movzwl %gs:s1@NTPOFF, %eax
+; Why is this kill line here, but no where else?
+; X32_LINUX-NEXT: # kill
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f11:
+; X64_LINUX:      movzwl %fs:s1@TPOFF, %eax
+; X64_LINUX-NEXT: # kill
+; X64_LINUX-NEXT: ret
+; X32_WIN: f11:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movzwl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: # kill
+; X32_WIN-NEXT: ret
+; X64_WIN: f11:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movzwl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: # kill
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i16* @s1
+	ret i16 %tmp1
+}
+
+define i32 @f12() {
+; X32_LINUX: f12:
+; X32_LINUX:      movswl %gs:s1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f12:
+; X64_LINUX:      movswl %fs:s1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f12:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movswl _s1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f12:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movswl s1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i16* @s1
+  %tmp2 = sext i16 %tmp1 to i32
+	ret i32 %tmp2
+}
+
+define i8 @f13() {
+; X32_LINUX: f13:
+; X32_LINUX:      movb %gs:b1@NTPOFF, %al
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f13:
+; X64_LINUX:      movb %fs:b1@TPOFF, %al
+; X64_LINUX-NEXT: ret
+; X32_WIN: f13:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movb _b1@SECREL(%eax), %al
+; X32_WIN-NEXT: ret
+; X64_WIN: f13:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movb b1@SECREL(%rax), %al
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i8* @b1
+	ret i8 %tmp1
+}
+
+define i32 @f14() {
+; X32_LINUX: f14:
+; X32_LINUX:      movsbl %gs:b1@NTPOFF, %eax
+; X32_LINUX-NEXT: ret
+; X64_LINUX: f14:
+; X64_LINUX:      movsbl %fs:b1@TPOFF, %eax
+; X64_LINUX-NEXT: ret
+; X32_WIN: f14:
+; X32_WIN:      movl __tls_index, %eax
+; X32_WIN-NEXT: movl %fs:__tls_array, %ecx
+; X32_WIN-NEXT: movl (%ecx,%eax,4), %eax
+; X32_WIN-NEXT: movsbl _b1@SECREL(%eax), %eax
+; X32_WIN-NEXT: ret
+; X64_WIN: f14:
+; X64_WIN:      movl _tls_index(%rip), %eax
+; X64_WIN-NEXT: movq %gs:88, %rcx
+; X64_WIN-NEXT: movq (%rcx,%rax,8), %rax
+; X64_WIN-NEXT: movsbl b1@SECREL(%rax), %eax
+; X64_WIN-NEXT: ret
+
+entry:
+	%tmp1 = load i8* @b1
+  %tmp2 = sext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
+
diff --git a/test/CodeGen/X86/tls1.ll b/test/CodeGen/X86/tls1.ll
deleted file mode 100644
index 0cae5c4f2888..000000000000
--- a/test/CodeGen/X86/tls1.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = thread_local global i32 15
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls10.ll b/test/CodeGen/X86/tls10.ll
deleted file mode 100644
index fb61596d09ca..000000000000
--- a/test/CodeGen/X86/tls10.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = external hidden thread_local global i32
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls11.ll b/test/CodeGen/X86/tls11.ll
deleted file mode 100644
index 514a168c5387..000000000000
--- a/test/CodeGen/X86/tls11.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movzwl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movzwl	%fs:i@TPOFF, %eax} %t2
-
-@i = thread_local global i16 15
-
-define i16 @f() {
-entry:
-	%tmp1 = load i16* @i
-	ret i16 %tmp1
-}
diff --git a/test/CodeGen/X86/tls12.ll b/test/CodeGen/X86/tls12.ll
deleted file mode 100644
index c29f6adacd20..000000000000
--- a/test/CodeGen/X86/tls12.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
-
-@i = thread_local global i8 15
-
-define i8 @f() {
-entry:
-	%tmp1 = load i8* @i
-	ret i8 %tmp1
-}
diff --git a/test/CodeGen/X86/tls13.ll b/test/CodeGen/X86/tls13.ll
deleted file mode 100644
index 08778ec2ce8b..000000000000
--- a/test/CodeGen/X86/tls13.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
-; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
-; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
-
-@i = thread_local global i16 0
-@j = thread_local global i16 0
-
-define void @f() nounwind optsize {
-entry:
-        %0 = load i16* @i, align 2
-        %1 = sext i16 %0 to i32
-        tail call void @g(i32 %1) nounwind
-        %2 = load i16* @j, align 2
-        %3 = zext i16 %2 to i32
-        tail call void @h(i32 %3) nounwind
-        ret void
-}
-
-declare void @g(i32)
-
-declare void @h(i32)
diff --git a/test/CodeGen/X86/tls14.ll b/test/CodeGen/X86/tls14.ll
deleted file mode 100644
index 88426dd43d50..000000000000
--- a/test/CodeGen/X86/tls14.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
-; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
-; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
-
-@i = thread_local global i8 0
-@j = thread_local global i8 0
-
-define void @f() nounwind optsize {
-entry:
-        %0 = load i8* @i, align 2
-        %1 = sext i8 %0 to i32
-        tail call void @g(i32 %1) nounwind
-        %2 = load i8* @j, align 2
-        %3 = zext i8 %2 to i32
-        tail call void @h(i32 %3) nounwind
-        ret void
-}
-
-declare void @g(i32)
-
-declare void @h(i32)
diff --git a/test/CodeGen/X86/tls15.ll b/test/CodeGen/X86/tls15.ll
deleted file mode 100644
index 7abf070d3fd2..000000000000
--- a/test/CodeGen/X86/tls15.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t | count 1
-; RUN: grep {leal	i@NTPOFF(%eax), %ecx} %t
-; RUN: grep {leal	j@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2 | count 1
-; RUN: grep {leaq	i@TPOFF(%rax), %rcx} %t2
-; RUN: grep {leaq	j@TPOFF(%rax), %rax} %t2
-
-@i = thread_local global i32 0
-@j = thread_local global i32 0
-
-define void @f(i32** %a, i32** %b) {
-entry:
-	store i32* @i, i32** %a, align 8
-	store i32* @j, i32** %b, align 8
-	ret void
-}
diff --git a/test/CodeGen/X86/tls2.ll b/test/CodeGen/X86/tls2.ll
deleted file mode 100644
index 5a94296afefc..000000000000
--- a/test/CodeGen/X86/tls2.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
deleted file mode 100644
index 7327cc41777e..000000000000
--- a/test/CodeGen/X86/tls3.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	i@INDNTPOFF, %eax} %t
-; RUN: grep {movl	%gs:(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
-; RUN: grep {movl	%fs:(%rax), %eax} %t2
-
-@i = external thread_local global i32		; <i32*> [#uses=2]
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls4.ll b/test/CodeGen/X86/tls4.ll
deleted file mode 100644
index d2e40e389bd5..000000000000
--- a/test/CodeGen/X86/tls4.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {addl	i@INDNTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
-
-@i = external thread_local global i32		; <i32*> [#uses=2]
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls5.ll b/test/CodeGen/X86/tls5.ll
deleted file mode 100644
index 4d2cc02b5028..000000000000
--- a/test/CodeGen/X86/tls5.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = internal thread_local global i32 15
-
-define i32 @f() {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls6.ll b/test/CodeGen/X86/tls6.ll
deleted file mode 100644
index 505106ee14ed..000000000000
--- a/test/CodeGen/X86/tls6.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = internal thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls7.ll b/test/CodeGen/X86/tls7.ll
deleted file mode 100644
index e9116e772090..000000000000
--- a/test/CodeGen/X86/tls7.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = hidden thread_local global i32 15
-
-define i32 @f() {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/tls8.ll b/test/CodeGen/X86/tls8.ll
deleted file mode 100644
index 375af94920f5..000000000000
--- a/test/CodeGen/X86/tls8.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:0, %eax} %t
-; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movq	%fs:0, %rax} %t2
-; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
-
-@i = hidden thread_local global i32 15
-
-define i32* @f() {
-entry:
-	ret i32* @i
-}
diff --git a/test/CodeGen/X86/tls9.ll b/test/CodeGen/X86/tls9.ll
deleted file mode 100644
index 7d08df84a9fa..000000000000
--- a/test/CodeGen/X86/tls9.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
-; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
-; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
-
-@i = external hidden thread_local global i32
-
-define i32 @f() nounwind {
-entry:
-	%tmp1 = load i32* @i
-	ret i32 %tmp1
-}
diff --git a/test/CodeGen/X86/twoaddr-lea.ll b/test/CodeGen/X86/twoaddr-lea.ll
index a1d797feeac4..9d58019b1a99 100644
--- a/test/CodeGen/X86/twoaddr-lea.ll
+++ b/test/CodeGen/X86/twoaddr-lea.ll
@@ -5,7 +5,7 @@
 ;; allocator turns the shift into an LEA.  This also occurs for ADD.
 
 ; Check that the shift gets turned into an LEA.
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-apple-darwin | FileCheck %s
 
 @G = external global i32
 
@@ -14,7 +14,7 @@ define i32 @test1(i32 %X) nounwind {
 ; CHECK-NOT: mov
 ; CHECK: leal 1(%rdi)
         %Z = add i32 %X, 1
-        volatile store i32 %Z, i32* @G
+        store volatile i32 %Z, i32* @G
         ret i32 %X
 }
 
diff --git a/test/CodeGen/X86/uint64-to-float.ll b/test/CodeGen/X86/uint64-to-float.ll
index 1dbbdcf89279..e853e7717f12 100644
--- a/test/CodeGen/X86/uint64-to-float.ll
+++ b/test/CodeGen/X86/uint64-to-float.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s
 ; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
 ; by the compiler_rt implementation of __floatundisf.
 ; <rdar://problem/8493982>
@@ -6,37 +6,12 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
-; FIXME: This test could generate this code:
-;
-; ## BB#0:                                ## %entry
-; 	testq	%rdi, %rdi
-; 	jns	LBB0_2
-; ## BB#1:
-; 	movq	%rdi, %rax
-; 	shrq	%rax
-; 	andq	$1, %rdi
-; 	orq	%rax, %rdi
-; 	cvtsi2ssq	%rdi, %xmm0
-; 	addss	%xmm0, %xmm0
-; 	ret
-; LBB0_2:                                 ## %entry
-; 	cvtsi2ssq	%rdi, %xmm0
-; 	ret
-;
-; The blocks come from lowering:
-;
-;   %vreg7<def> = CMOV_FR32 %vreg6<kill>, %vreg5<kill>, 15, %EFLAGS<imp-use>; FR32:%vreg7,%vreg6,%vreg5
-;
-; If the instruction had an EFLAGS<kill> flag, it wouldn't need to mark EFLAGS
-; as live-in on the new blocks, and machine sinking would be able to sink
-; everything below the test.
-
-; CHECK: shrq
-; CHECK: andq
-; CHECK-NEXT: orq
 ; CHECK: testq %rdi, %rdi
 ; CHECK-NEXT: jns LBB0_2
-; CHECK: cvtsi2ss
+; CHECK: shrq
+; CHECK-NEXT: andq
+; CHECK-NEXT: orq
+; CHECK-NEXT: cvtsi2ss
 ; CHECK: LBB0_2
 ; CHECK-NEXT: cvtsi2ss
 define float @test(i64 %a) {
diff --git a/test/CodeGen/X86/unreachable-stack-protector.ll b/test/CodeGen/X86/unreachable-stack-protector.ll
index eeebceea71d4..b066297ff1b4 100644
--- a/test/CodeGen/X86/unreachable-stack-protector.ll
+++ b/test/CodeGen/X86/unreachable-stack-protector.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -disable-cgp-delete-dead-blocks | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
 
diff --git a/test/CodeGen/X86/utf16-cfstrings.ll b/test/CodeGen/X86/utf16-cfstrings.ll
new file mode 100644
index 000000000000..af76a333e8a6
--- /dev/null
+++ b/test/CodeGen/X86/utf16-cfstrings.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple x86_64-apple-macosx10 | FileCheck %s
+; <rdar://problem/10655949>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i64 }
+
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str = internal unnamed_addr constant [5 x i16] [i16 252, i16 98, i16 101, i16 114, i16 0], align 2
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 2000, i8* bitcast ([5 x i16]* @.str to i8*), i64 4 }, section "__DATA,__cfstring"
+
+; CHECK:         .section      __TEXT,__ustring
+; CHECK-NEXT:    .align        1
+; CHECK-NEXT: _.str:
+; CHECK-NEXT:    .short  252     ## 0xfc
+; CHECK-NEXT:    .short  98      ## 0x62
+; CHECK-NEXT:    .short  101     ## 0x65
+; CHECK-NEXT:    .short  114     ## 0x72
+; CHECK-NEXT:    .short  0       ## 0x0
+
+define i32 @main() uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void (%0*, ...)* @NSLog(%0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to %0*))
+  ret i32 0
+}
+
+declare void @NSLog(%0*, ...)
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+
+!0 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!1 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!2 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!3 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/CodeGen/X86/utf8.ll b/test/CodeGen/X86/utf8.ll
new file mode 100644
index 000000000000..67bc5ae2fd41
--- /dev/null
+++ b/test/CodeGen/X86/utf8.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; CHECK: iΔ
+@"i\CE\94" = common global i32 0, align 4
diff --git a/test/CodeGen/X86/v-binop-widen.ll b/test/CodeGen/X86/v-binop-widen.ll
index 3bee7007749c..8655c6c8ea54 100644
--- a/test/CodeGen/X86/v-binop-widen.ll
+++ b/test/CodeGen/X86/v-binop-widen.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
 ; CHECK: divss
 ; CHECK: divps
 ; CHECK: divps
diff --git a/test/CodeGen/X86/vec_call.ll b/test/CodeGen/X86/vec_call.ll
index b3efc7b16b7d..f2fc7e7d9d5d 100644
--- a/test/CodeGen/X86/vec_call.ll
+++ b/test/CodeGen/X86/vec_call.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {subl.*60}
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
 ; RUN:   grep {movaps.*32}
 
 
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 04bb7254fb08..91777f7aa6b4 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
+; RUN: llc < %s -mtriple=i686-linux -mcpu=penryn | FileCheck %s
 
 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
@@ -8,9 +8,12 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
 
 define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
 entry:
+; CHECK: cfi_def_cfa_offset
 ; CHECK-NOT: set
-; CHECK: pcmpgt
-; CHECK: blendvps
+; CHECK: movzwl
+; CHECK: movzwl
+; CHECK: pshufd
+; CHECK: pshufb
   %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
   %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
   %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index f0158d643c17..bddd53514643 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,15 +1,15 @@
 ; RUN: llc < %s -march=x86-64
 
-declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
-declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
 
 define <2 x i64> @footz(<2 x i64> %a) nounwind {
-  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a)
+  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
 }
 define <2 x i64> @foolz(<2 x i64> %a) nounwind {
-  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a)
+  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
 }
 define <2 x i64> @foopop(<2 x i64> %a) nounwind {
diff --git a/test/CodeGen/X86/vec_extract-sse4.ll b/test/CodeGen/X86/vec_extract-sse4.ll
index f4876543d329..42d7f27f7d60 100644
--- a/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/test/CodeGen/X86/vec_extract-sse4.ll
@@ -2,7 +2,7 @@
 ; RUN: not grep extractps   %t
 ; RUN: not grep pextrd      %t
 ; RUN: not grep pshufd  %t
-; RUN: grep movss   %t | count 2
+; RUN: not grep movss   %t
 
 define void @t1(float* %R, <4 x float>* %P1) nounwind {
 	%X = load <4 x float>* %P1
diff --git a/test/CodeGen/X86/vec_fpext.ll b/test/CodeGen/X86/vec_fpext.ll
new file mode 100644
index 000000000000..05b263e2e0c4
--- /dev/null
+++ b/test/CodeGen/X86/vec_fpext.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41,-avx | FileCheck %s
+
+; PR11674
+define void @fpext_frommem(<2 x float>* %in, <2 x double>* %out) {
+entry:
+; TODO: We should be able to generate cvtps2pd for the load.
+; For now, just check that we generate something sane.
+; CHECK: cvtss2sd
+; CHECK: cvtss2sd
+  %0 = load <2 x float>* %in, align 8
+  %1 = fpext <2 x float> %0 to <2 x double>
+  store <2 x double> %1, <2 x double>* %out, align 1
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_return.ll b/test/CodeGen/X86/vec_return.ll
index 676be9b7179c..2cf5dc6caa77 100644
--- a/test/CodeGen/X86/vec_return.ll
+++ b/test/CodeGen/X86/vec_return.ll
@@ -1,12 +1,16 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep pxor %t | count 1
-; RUN: grep movaps %t | count 1
-; RUN: not grep shuf %t
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
+; Without any typed operations, always use the smaller xorps.
+; CHECK: test
+; CHECK: xorps
 define <2 x double> @test() {
 	ret <2 x double> zeroinitializer
 }
 
+; Prefer a constant pool load here.
+; CHECK: test2
+; CHECK-NOT: shuf
+; CHECK: movaps {{.*}}CPI
 define <4 x i32> @test2() nounwind  {
 	ret <4 x i32> < i32 0, i32 0, i32 1, i32 0 >
 }
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index fc06b9514e43..b6b8ba6f846a 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_shuffle-23.ll b/test/CodeGen/X86/vec_shuffle-23.ll
index 05a3a1e9d276..24687359cc5a 100644
--- a/test/CodeGen/X86/vec_shuffle-23.ll
+++ b/test/CodeGen/X86/vec_shuffle-23.ll
@@ -5,7 +5,7 @@ define i32 @t() nounwind {
 entry:
 	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
 	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
-	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	store volatile <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
 	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp, <4 x i32>* %b
 	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-24.ll b/test/CodeGen/X86/vec_shuffle-24.ll
index 1b104deb3055..d038dafaf294 100644
--- a/test/CodeGen/X86/vec_shuffle-24.ll
+++ b/test/CodeGen/X86/vec_shuffle-24.ll
@@ -5,7 +5,7 @@ entry:
 ; CHECK: punpckldq
 	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
 	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
-	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	store volatile <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
 	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp, <4 x i32>* %b
 	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll
index 950040a124a5..430aa046afab 100644
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -4,10 +4,10 @@
 
 define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
 entry:
-; CHECK: movaps  ({{%rdi|%rcx}}), %xmm0
-; CHECK: movaps  %xmm0, %xmm1
-; CHECK-NEXT: movss   %xmm2, %xmm1
-; CHECK-NEXT: shufps  $36, %xmm1, %xmm0
+; CHECK: movaps  ({{%rdi|%rcx}}), %[[XMM0:xmm[0-9]+]]
+; CHECK: movaps  %[[XMM0]], %[[XMM1:xmm[0-9]+]]
+; CHECK-NEXT: movss   %xmm{{[0-9]+}}, %[[XMM1]]
+; CHECK-NEXT: shufps  $36, %[[XMM1]], %[[XMM0]]
   %0 = load <4 x i32>* undef, align 16
   %1 = load <4 x i32>* %a0, align 16
   %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
@@ -26,10 +26,12 @@ entry:
 
 define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
 entry:
-; CHECK: movaps  32({{%rdi|%rcx}}), %xmm0
-; CHECK-NEXT: movaps  48({{%rdi|%rcx}}), %xmm1
-; CHECK-NEXT: movss   %xmm1, %xmm0
-; CHECK-NEXT: movq    %xmm0, ({{%rsi|%rdx}}) 
+; CHECK: t02
+; CHECK: movaps
+; CHECK: shufps
+; CHECK: pshufd
+; CHECK: movq
+; CHECK: ret
   %0 = bitcast <8 x i32>* %source to <4 x i32>*
   %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
   %tmp2 = load <4 x i32>* %arrayidx, align 16
diff --git a/test/CodeGen/X86/vec_shuffle-38.ll b/test/CodeGen/X86/vec_shuffle-38.ll
index 69a2ede758ae..96ef883c4e1e 100644
--- a/test/CodeGen/X86/vec_shuffle-38.ll
+++ b/test/CodeGen/X86/vec_shuffle-38.ll
@@ -46,10 +46,9 @@ entry:
 
 ; rdar://10119696
 ; CHECK: f
-define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind uwtable readonly ssp {
+define <4 x float> @f(<4 x float> %x, double* nocapture %y) nounwind readonly ssp {
 entry:
-  ; CHECK: movsd  (%
-  ; CHECK-NEXT: movsd  %xmm
+  ; CHECK: movlps  (%{{rdi|rdx}}), %xmm0
   %u110.i = load double* %y, align 1
   %tmp8.i = insertelement <2 x double> undef, double %u110.i, i32 0
   %tmp9.i = bitcast <2 x double> %tmp8.i to <4 x float>
@@ -57,3 +56,22 @@ entry:
   ret <4 x float> %shuffle.i
 }
 
+define <4 x float> @loadhpi2(%struct.Float2* nocapture %vHiCoefPtr_0, %struct.Float2* nocapture %vLoCoefPtr_0, i32 %s) nounwind readonly ssp {
+entry:
+; CHECK: loadhpi2
+; CHECK: movhps (
+; CHECK-NOT: movlhps
+  %0 = bitcast %struct.Float2* %vHiCoefPtr_0 to <1 x i64>*
+  %idx.ext = sext i32 %s to i64
+  %add.ptr = getelementptr inbounds <1 x i64>* %0, i64 %idx.ext
+  %add.ptr.val = load <1 x i64>* %add.ptr, align 1
+  %1 = bitcast <1 x i64> %add.ptr.val to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %2 = bitcast %struct.Float2* %vLoCoefPtr_0 to <1 x i64>*
+  %add.ptr2 = getelementptr inbounds <1 x i64>* %2, i64 %idx.ext
+  %add.ptr2.val = load <1 x i64>* %add.ptr2, align 1
+  %3 = bitcast <1 x i64> %add.ptr2.val to <2 x float>
+  %shuffle.i4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle1.i5 = shufflevector <4 x float> %shuffle.i, <4 x float> %shuffle.i4, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  ret <4 x float> %shuffle1.i5
+}
diff --git a/test/CodeGen/X86/vec_shuffle-39.ll b/test/CodeGen/X86/vec_shuffle-39.ll
new file mode 100644
index 000000000000..55531e305cb8
--- /dev/null
+++ b/test/CodeGen/X86/vec_shuffle-39.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; rdar://10050222, rdar://10134392
+
+define <4 x float> @t1(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
+entry:
+; CHECK: t1:
+; CHECK: movlps (%rdi), %xmm0
+; CHECK: ret
+  %p.val = load <1 x i64>* %p, align 1
+  %0 = bitcast <1 x i64> %p.val to <2 x float>
+  %shuffle.i = shufflevector <2 x float> %0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %shuffle1.i = shufflevector <4 x float> %a, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x float> %shuffle1.i
+}
+
+define <4 x float> @t1a(<4 x float> %a, <1 x i64>* nocapture %p) nounwind {
+entry:
+; CHECK: t1a:
+; CHECK: movlps (%rdi), %xmm0
+; CHECK: ret
+  %0 = bitcast <1 x i64>* %p to double*
+  %1 = load double* %0
+  %2 = insertelement <2 x double> undef, double %1, i32 0
+  %3 = bitcast <2 x double> %2 to <4 x float>
+  %4 = shufflevector <4 x float> %a, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  ret <4 x float> %4
+}
+
+define void @t2(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: movlps %xmm0, (%rdi)
+; CHECK: ret
+  %cast.i = bitcast <4 x float> %a to <2 x i64>
+  %extract.i = extractelement <2 x i64> %cast.i, i32 0
+  %0 = getelementptr inbounds <1 x i64>* %p, i64 0, i64 0
+  store i64 %extract.i, i64* %0, align 8
+  ret void
+}
+
+define void @t2a(<1 x i64>* nocapture %p, <4 x float> %a) nounwind {
+entry:
+; CHECK: t2a:
+; CHECK: movlps %xmm0, (%rdi)
+; CHECK: ret
+  %0 = bitcast <1 x i64>* %p to double*
+  %1 = bitcast <4 x float> %a to <2 x double>
+  %2 = extractelement <2 x double> %1, i32 0
+  store double %2, double* %0
+  ret void
+}
+
+; rdar://10436044
+define <2 x double> @t3() nounwind readonly {
+bb:
+; CHECK: t3:
+; CHECK: punpcklqdq %xmm1, %xmm0
+; CHECK: movq (%rax), %xmm1
+; CHECK: movsd %xmm1, %xmm0
+  %tmp0 = load i128* null, align 1
+  %tmp1 = load <2 x i32>* undef, align 8
+  %tmp2 = bitcast i128 %tmp0 to <16 x i8>
+  %tmp3 = bitcast <2 x i32> %tmp1 to i64
+  %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
+  %tmp5 = bitcast <16 x i8> %tmp2 to <2 x double>
+  %tmp6 = bitcast <2 x i64> %tmp4 to <2 x double>
+  %tmp7 = shufflevector <2 x double> %tmp5, <2 x double> %tmp6, <2 x i32> <i32 2, i32 1>
+  ret <2 x double> %tmp7
+}
+
+; rdar://10450317
+define <2 x i64> @t4() nounwind readonly {
+bb:
+; CHECK: t4:
+; CHECK: punpcklqdq %xmm0, %xmm1
+; CHECK: movq (%rax), %xmm0
+; CHECK: movsd %xmm1, %xmm0
+  %tmp0 = load i128* null, align 1
+  %tmp1 = load <2 x i32>* undef, align 8
+  %tmp2 = bitcast i128 %tmp0 to <16 x i8>
+  %tmp3 = bitcast <2 x i32> %tmp1 to i64
+  %tmp4 = insertelement <2 x i64> undef, i64 %tmp3, i32 0
+  %tmp5 = bitcast <16 x i8> %tmp2 to <2 x i64>
+  %tmp6 = shufflevector <2 x i64> %tmp4, <2 x i64> %tmp5, <2 x i32> <i32 2, i32 1>
+  ret <2 x i64> %tmp6
+}
diff --git a/test/CodeGen/X86/vec_shuffle.ll b/test/CodeGen/X86/vec_shuffle.ll
index 2a48de22098f..65995984859b 100644
--- a/test/CodeGen/X86/vec_shuffle.ll
+++ b/test/CodeGen/X86/vec_shuffle.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
-; RUN: grep movq    %t | count 1
-; RUN: grep pshufd  %t | count 1
-; RUN: grep movupd  %t | count 1
-; RUN: grep pshufhw %t | count 1
+; RUN: llc < %s -mtriple=i686-linux -mcpu=core2 | FileCheck %s
 
+; CHECK: test_v4sf
+; CHECK: movq 8(%esp)
+; CHECK: pshufd $80
 define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
 	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
 	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
@@ -13,6 +12,9 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
 	ret void
 }
 
+; CHECK: test_v2sd
+; CHECK: movups	8(%esp)
+; CHECK: movaps
 define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
 	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
 	%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1		; <<2 x double>> [#uses=1]
@@ -20,6 +22,9 @@ define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
 	ret void
 }
 
+; CHECK: test_v8i16
+; CHECK: pshufhw $-58
+; CHECK: movdqa
 define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
 	%tmp = load <2 x i64>* %A		; <<2 x i64>> [#uses=1]
 	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>		; <<8 x i16>> [#uses=8]
diff --git a/test/CodeGen/X86/vec_udiv_to_shift.ll b/test/CodeGen/X86/vec_udiv_to_shift.ll
new file mode 100644
index 000000000000..6edfcc0c3fa7
--- /dev/null
+++ b/test/CodeGen/X86/vec_udiv_to_shift.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <8 x i16> @udiv_vec8x16(<8 x i16> %var) {
+entry:
+; CHECK: lshr <8 x i16> %var, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+%0 = udiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+ret <8 x i16> %0
+}
+
+define <4 x i32> @udiv_vec4x32(<4 x i32> %var) {
+entry:
+; CHECK: lshr <4 x i32> %var, <i32 4, i32 4, i32 4, i32 4>
+%0 = udiv <4 x i32> %var, <i32 16, i32 16, i32 16, i32 16>
+ret <4 x i32> %0
+}
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 4d1f05629b41..682a0dfca806 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
 
+; CHECK: foo
 ; CHECK: xorps
 define void @foo(<4 x float>* %P) {
         %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
@@ -8,6 +9,7 @@ define void @foo(<4 x float>* %P) {
         ret void
 }
 
+; CHECK: bar
 ; CHECK: pxor
 define void @bar(<4 x i32>* %P) {
         %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
@@ -16,3 +18,13 @@ define void @bar(<4 x i32>* %P) {
         ret void
 }
 
+; Without any type hints from operations, we fall back to the smaller xorps.
+; The IR type <4 x i32> is ignored.
+; CHECK: untyped_zero
+; CHECK: xorps
+; CHECK: movaps
+define void @untyped_zero(<4 x i32>* %p) {
+entry:
+  store <4 x i32> zeroinitializer, <4 x i32>* %p, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_zero_cse.ll b/test/CodeGen/X86/vec_zero_cse.ll
index 8aa50945e635..41ea0245ed86 100644
--- a/test/CodeGen/X86/vec_zero_cse.ll
+++ b/test/CodeGen/X86/vec_zero_cse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep xorps | count 1
 ; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
 ; 64-bit stores here do not use MMX.
 
diff --git a/test/CodeGen/X86/vector-gep.ll b/test/CodeGen/X86/vector-gep.ll
new file mode 100644
index 000000000000..3476e36c646f
--- /dev/null
+++ b/test/CodeGen/X86/vector-gep.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx | FileCheck %s
+; RUN: opt -instsimplify %s -disable-output
+
+;CHECK: AGEP0:
+define <4 x i32*> @AGEP0(i32* %ptr) nounwind {
+entry:
+  %vecinit.i = insertelement <4 x i32*> undef, i32* %ptr, i32 0
+  %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32 1
+  %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32 2
+  %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32 3
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+;CHECK: pslld $2
+;CHECK: padd
+  %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32 19, i32 233>
+  ret <4 x i32*> %A3
+;CHECK: ret
+}
+
+;CHECK: AGEP1:
+define i32 @AGEP1(<4 x i32*> %param) nounwind {
+entry:
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  %k = extractelement <4 x i32*> %A2, i32 3
+  %v = load i32* %k
+  ret i32 %v
+;CHECK: ret
+}
+
+;CHECK: AGEP2:
+define i32 @AGEP2(<4 x i32*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
+  %k = extractelement <4 x i32*> %A2, i32 3
+  %v = load i32* %k
+  ret i32 %v
+;CHECK: ret
+}
+
+;CHECK: AGEP3:
+define <4 x i32*> @AGEP3(<4 x i32*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK: pslld $2
+;CHECK: padd
+  %A2 = getelementptr <4 x i32*> %param, <4 x i32> %off
+  %v = alloca i32
+  %k = insertelement <4 x i32*> %A2, i32* %v, i32 3
+  ret <4 x i32*> %k
+;CHECK: ret
+}
+
+;CHECK: AGEP4:
+define <4 x i16*> @AGEP4(<4 x i16*> %param, <4 x i32> %off) nounwind {
+entry:
+; Multiply offset by two (add it to itself).
+;CHECK: padd
+; add the base to the offset
+;CHECK: padd
+  %A = getelementptr <4 x i16*> %param, <4 x i32> %off
+  ret <4 x i16*> %A
+;CHECK: ret
+}
+
+;CHECK: AGEP5:
+define <4 x i8*> @AGEP5(<4 x i8*> %param, <4 x i8> %off) nounwind {
+entry:
+;CHECK: paddd
+  %A = getelementptr <4 x i8*> %param, <4 x i8> %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
+
+; The size of each element is 1 byte. No need to multiply by element size.
+;CHECK: AGEP6:
+define <4 x i8*> @AGEP6(<4 x i8*> %param, <4 x i32> %off) nounwind {
+entry:
+;CHECK-NOT: pslld
+  %A = getelementptr <4 x i8*> %param, <4 x i32> %off
+  ret <4 x i8*> %A
+;CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/vector-variable-idx2.ll b/test/CodeGen/X86/vector-variable-idx2.ll
new file mode 100644
index 000000000000..d47df90e7e64
--- /dev/null
+++ b/test/CodeGen/X86/vector-variable-idx2.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.0"
+
+define i64 @__builtin_ia32_vec_ext_v2di(<2 x i64> %a, i32 %i) nounwind {
+  %1 = alloca <2 x i64>, align 16
+  %2 = alloca i32, align 4
+  store <2 x i64> %a, <2 x i64>* %1, align 16
+  store i32 %i, i32* %2, align 4
+  %3 = load <2 x i64>* %1, align 16
+  %4 = load i32* %2, align 4
+  %5 = extractelement <2 x i64> %3, i32 %4
+  ret i64 %5
+}
+
+define <2 x i64> @__builtin_ia32_vec_int_v2di(<2 x i64> %a, i32 %i) nounwind {
+  %1 = alloca <2 x i64>, align 16
+  %2 = alloca i32, align 4
+  store <2 x i64> %a, <2 x i64>* %1, align 16
+  store i32 %i, i32* %2, align 4
+  %3 = load <2 x i64>* %1, align 16
+  %4 = load i32* %2, align 4
+  %5 = insertelement <2 x i64> %3, i64 1, i32 %4
+  ret <2 x i64> %5
+}
diff --git a/test/CodeGen/X86/volatile.ll b/test/CodeGen/X86/volatile.ll
index 2e5742afdf85..1a82014536e7 100644
--- a/test/CodeGen/X86/volatile.ll
+++ b/test/CodeGen/X86/volatile.ll
@@ -4,14 +4,14 @@
 @x = external global double
 
 define void @foo() nounwind  {
-  %a = volatile load double* @x
-  volatile store double 0.0, double* @x
-  volatile store double 0.0, double* @x
-  %b = volatile load double* @x
+  %a = load volatile double* @x
+  store volatile double 0.0, double* @x
+  store volatile double 0.0, double* @x
+  %b = load volatile double* @x
   ret void
 }
 
 define void @bar() nounwind  {
-  %c = volatile load double* @x
+  %c = load volatile double* @x
   ret void
 }
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll
index 97dacfdf09e0..ee98806c0f8b 100644
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=x86 |  FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux |  FileCheck %s
 
-
-define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
-; CHECK: andb
+define void @t0(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
+; CHECK: t0
+; CHECK: pand
+; CHECK: ret
   %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
   %cmp2 = icmp ne <2 x i64> %src2, zeroinitializer
   %t1 = and <2 x i1> %cmp1, %cmp2
@@ -12,7 +13,9 @@ define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind reado
 }
 
 define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
-; CHECK: andb
+; CHECK: t2
+; CHECK-NOT: pand
+; CHECK: ret
   %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
   %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
   %t1 = and <3 x i1> %cmp1, %cmp2
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll
index 4b8016dc7132..661cde8bda3b 100644
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,12 +1,10 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 |  FileCheck %s
-
-; Widen a v3i8 to v16i8 to use a vector add
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 |  FileCheck %s
 
 define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
 entry:
 ; CHECK-NOT: pextrw
-; CHECK: paddb
-; CHECK: pextrb
+; CHECK: add
+
 	%dst.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
 	%src.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
 	%n.addr = alloca i32		; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll
index 03b3fea01f6c..d35abc308173 100644
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: paddb
+; CHECK: padd
 ; CHECK: pand
 
 ; widen v8i8 to v16i8 (checks even power of 2 widening with add & and)
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index 057492377a27..f55b184f3acc 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
-; CHECK: paddw
-; CHECK: pextrw
-; CHECK: movd
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
+; CHECK: incl
+; CHECK: incl
+; CHECK: incl
+; CHECK: addl
 
 ; Widen a v3i16 to v8i16 to do a vector add
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 1eace9e024e0..4330aae8ec82 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
-; CHECK: paddw
+; CHECK: paddd
 ; CHECK: pextrd
 ; CHECK: movd
 
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll
index 8e1adf58f869..5ea54267692a 100644
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,16 +1,6 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-
-; v8i8 that is widen to v16i8 then split
-; FIXME: This is widen to v16i8 and split to 16 and we then rebuild the vector.
-; Unfortunately, we don't split the store so we don't get the code we want.
+; CHECK: psraw
+; CHECK: psraw
 
 define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
 entry:
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll
index f6810cda9e35..51f1c887b00d 100644
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,6 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: pshufd
-; CHECK: paddd
+; CHECK: paddq
 
 ; truncate v2i64 to v2i32
 
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll
index 80f3a492c494..affd796ffc3f 100644
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: cvtsi2ss
+; CHECK-NOT: cvtsi2ss
 
 ; unsigned to float v7i16 to v7f32
 
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index c91627cd27a3..4aeec9136d0e 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -1,18 +1,18 @@
-; RUN: llc < %s -o - -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -o - -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -o - -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -o - -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s -check-prefix=WIN64
 ; PR4891
 
 ; Both loads should happen before either store.
 
-; CHECK: movl  (%rdi), %[[R1:...]]
-; CHECK: movl  (%rsi), %[[R2:...]]
-; CHECK: movl  %[[R2]], (%rdi)
-; CHECK: movl  %[[R1]], (%rsi)
+; CHECK: movd  ({{.*}}), {{.*}}
+; CHECK: movd  ({{.*}}), {{.*}}
+; CHECK: movd  {{.*}}, ({{.*}})
+; CHECK: movd  {{.*}}, ({{.*}})
 
-; WIN64: movl  (%rcx), %[[R1:...]]
-; WIN64: movl  (%rdx), %[[R2:...]]
-; WIN64: movl  %[[R2]], (%rcx)
-; WIN64: movl  %[[R1]], (%rdx)
+; WIN64: movd  ({{.*}}), {{.*}}
+; WIN64: movd  ({{.*}}), {{.*}}
+; WIN64: movd  {{.*}}, ({{.*}})
+; WIN64: movd  {{.*}}, ({{.*}})
 
 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/widen_load-1.ll b/test/CodeGen/X86/widen_load-1.ll
index 639617f17774..9705d149ddcc 100644
--- a/test/CodeGen/X86/widen_load-1.ll
+++ b/test/CodeGen/X86/widen_load-1.ll
@@ -1,5 +1,6 @@
 ; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 ; PR4891
+; PR5626
 
 ; This load should be before the call, not after.
 
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 642206316c6b..79aa00050254 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
+; RUN: llc < %s -o - -mcpu=generic -march=x86-64 -mattr=+sse42 | FileCheck %s
 
 ; Test based on pr5626 to load/store
 ;
 
 %i32vec3 = type <3 x i32>
+; CHECK: add3i32
 define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 ; CHECK: movdqa
 ; CHECK: paddd
@@ -16,6 +17,7 @@ define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 	ret void
 }
 
+; CHECK: add3i32_2
 define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 ; CHECK: movq
 ; CHECK: pinsrd
@@ -32,6 +34,7 @@ define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
 }
 
 %i32vec7 = type <7 x i32>
+; CHECK: add7i32
 define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
 ; CHECK: movdqa
 ; CHECK: movdqa
@@ -47,6 +50,7 @@ define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
 	ret void
 }
 
+; CHECK: add12i32
 %i32vec12 = type <12 x i32>
 define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
 ; CHECK: movdqa
@@ -66,12 +70,14 @@ define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
 }
 
 
+; CHECK: add3i16
 %i16vec3 = type <3 x i16>
 define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddw
-; CHECK: movd
-; CHECK: pextrw
+; CHECK: add3i16
+; CHECK: addl
+; CHECK: addl
+; CHECK: addl
+; CHECK: ret
 	%a = load %i16vec3* %ap, align 16
 	%b = load %i16vec3* %bp, align 16
 	%x = add %i16vec3 %a, %b
@@ -79,10 +85,11 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
 	ret void
 }
 
+; CHECK: add4i16
 %i16vec4 = type <4 x i16>
 define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddw
+; CHECK: add4i16
+; CHECK: paddd
 ; CHECK: movq
 	%a = load %i16vec4* %ap, align 16
 	%b = load %i16vec4* %bp, align 16
@@ -91,6 +98,7 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp
 	ret void
 }
 
+; CHECK: add12i16
 %i16vec12 = type <12 x i16>
 define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
 ; CHECK: movdqa
@@ -106,6 +114,7 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
 	ret void
 }
 
+; CHECK: add18i16
 %i16vec18 = type <18 x i16>
 define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
 ; CHECK: movdqa
@@ -125,12 +134,13 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
 }
 
 
+; CHECK: add3i8
 %i8vec3 = type <3 x i8>
 define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddb
-; CHECK: pextrb
-; CHECK: movb
+; CHECK: addb
+; CHECK: addb
+; CHECK: addb
+; CHECK: ret
 	%a = load %i8vec3* %ap, align 16
 	%b = load %i8vec3* %bp, align 16
 	%x = add %i8vec3 %a, %b
@@ -138,6 +148,7 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
 	ret void
 }
 
+; CHECK: add31i8:
 %i8vec31 = type <31 x i8>
 define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
 ; CHECK: movdqa
@@ -147,6 +158,7 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
 ; CHECK: movq
 ; CHECK: pextrb
 ; CHECK: pextrw
+; CHECK: ret
 	%a = load %i8vec31* %ap, align 16
 	%b = load %i8vec31* %bp, align 16
 	%x = add %i8vec31 %a, %b
@@ -155,9 +167,10 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
 }
 
 
+; CHECK: rot
 %i8vec3pack = type { <3 x i8>, i8 }
 define %i8vec3pack  @rot() nounwind {
-; CHECK: shrb
+; CHECK: movd {{-?[0-9]+}}(%rsp), {{%xmm[0-9]}}
 entry:
   %X = alloca %i8vec3pack, align 4
   %rot = alloca %i8vec3pack, align 4
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 8e951b77ca6a..7bebb274f6ec 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -10,6 +10,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }
 
 
@@ -23,6 +24,7 @@ entry:
 	%val = fadd <3 x float> %x, %src2
 	store <3 x float> %val, <3 x float>* %dst.addr
 	ret void
+; CHECK: ret
 }
 
 ; Example of when widening a v3float operation causes the DAG to replace a node
@@ -31,7 +33,7 @@ entry:
 define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
 entry:
 ; CHECK: shuf3:
-; CHECK: pshufd
+; CHECK: shufps
   %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
   %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -45,12 +47,23 @@ entry:
   %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
   store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst
   ret void
+; CHECK: ret
 }
 
 ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
 define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
 ; CHECK: shuf4:
-; CHECK: punpckldq
+; CHECK-NOT: punpckldq
   %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %vshuf
+; CHECK: ret
+}
+
+; PR11389: another CONCAT_VECTORS case
+define void @shuf5(<8 x i8>* %p) nounwind {
+; CHECK: shuf5:
+  %v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> undef, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  store <8 x i8> %v, <8 x i8>* %p, align 8
+  ret void
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/win32_sret.ll b/test/CodeGen/X86/win32_sret.ll
new file mode 100644
index 000000000000..878c6db99286
--- /dev/null
+++ b/test/CodeGen/X86/win32_sret.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+
+; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
+; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
+; arguments are caller-cleanup like normal arguments.
+
+define void @sret1(i8* sret) nounwind {
+entry:
+; WIN_X32:    {{ret$}}
+; MINGW_X32:  ret $4
+; LINUX:      ret $4
+  ret void
+}
+
+define void @sret2(i32* sret %x, i32 %y) nounwind {
+entry:
+; WIN_X32:    {{ret$}}
+; MINGW_X32:  ret $4
+; LINUX:      ret $4
+  store i32 %y, i32* %x
+  ret void
+}
+
diff --git a/test/CodeGen/X86/win64_alloca_dynalloca.ll b/test/CodeGen/X86/win64_alloca_dynalloca.ll
index e39d007b6fab..a961c6af1884 100644
--- a/test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ b/test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -join-physregs -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
-; RUN: llc < %s -join-physregs -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
-; RUN: llc < %s -join-physregs -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-mingw32     | FileCheck %s -check-prefix=M64
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32       | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -join-physregs -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
 ; PR8777
 ; PR8778
 
diff --git a/test/CodeGen/X86/win64_vararg.ll b/test/CodeGen/X86/win64_vararg.ll
index efe8bcacbeae..52bc50922c26 100644
--- a/test/CodeGen/X86/win64_vararg.ll
+++ b/test/CodeGen/X86/win64_vararg.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-win32 | FileCheck %s
 
 ; Verify that the var arg parameters which are passed in registers are stored
 ; in home stack slots allocated by the caller and that AP is correctly
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
new file mode 100644
index 000000000000..596b4262e6b0
--- /dev/null
+++ b/test/CodeGen/X86/win_ftol2.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
+; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
+
+; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
+; function has a nonstandard calling convention: the input value is expected on
+; the x87 stack instead of the callstack. The input value is popped by the
+; callee. Mingw32 uses normal cdecl compiler-rt functions.
+
+define i64 @double_ui64(double %x) nounwind {
+entry:
+; COMPILERRT: @double_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @double_ui64
+; FTOL: fldl
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui double %x to i64
+  ret i64 %0
+}
+
+define i64 @float_ui64(float %x) nounwind {
+entry:
+; COMPILERRT: @float_ui64
+; COMPILERRT-NOT: calll __ftol2
+; FTOL: @float_ui64
+; FTOL: flds
+; FTOL: calll __ftol2
+; FTOL-NOT: fstp
+  %0 = fptoui float %x to i64
+  ret i64 %0
+}
+
+define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_2
+; FTOL: @double_ui64_2
+; FTOL_2: @double_ui64_2
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %2 %1
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %3, %4
+  ret i64 %5
+}
+
+define i64 @double_ui64_3(double %x, double %y, double %z) nounwind {
+; COMPILERRT: @double_ui64_3
+; FTOL: @double_ui64_3
+; FTOL_2: @double_ui64_3
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %z
+; FTOL_2: fldl
+;; stack is %y %z
+; FTOL_2: fldl
+;; stack is %x %y %z
+; FTOL_2: fdiv %st(0), %st(1)
+;; stack is %x %1 %z
+; FTOL_2: fsubp %st(2)
+;; stack is %1 %2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+;; stack is %1 %2 (still)
+; FTOL_2: calll __ftol2
+; FTOL_2-NOT: fxch
+; FTOL_2-NOT: fld
+; FTOL_2-NOT: fst
+; FTOL_2: calll __ftol2
+;; stack is empty
+
+  %1 = fdiv double %x, %y
+  %2 = fsub double %x, %z
+  %3 = fptoui double %1 to i64
+  %4 = fptoui double %2 to i64
+  %5 = sub i64 %4, %3
+  ret i64 %5
+}
+
+define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
+; COMPILERRT: @double_ui64_4
+; FTOL: @double_ui64_4
+; FTOL_2: @double_ui64_4
+;; stack is empty
+; FTOL_2: fldl
+;; stack is %y
+; FTOL_2: fldl
+;; stack is %x %y
+; FTOL_2: fxch
+;; stack is %y %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+; FTOL_2: fld %st(0)
+;; stack is %x %x
+; FTOL_2: calll __ftol2
+;; stack is %x
+
+  %1 = fptoui double %x to i64
+  %2 = fptoui double %y to i64
+  %3 = sub i64 %1, %2
+  %4 = insertvalue {double, i64} undef, double %x, 0
+  %5 = insertvalue {double, i64} %4, i64 %3, 1
+  ret {double, i64} %5
+}
+
+define i32 @double_ui32_5(double %X) {
+; FTOL: @double_ui32_5
+; FTOL: calll __ftol2
+  %tmp.1 = fptoui double %X to i32
+  ret i32 %tmp.1
+}
+
+define i64 @double_ui64_5(double %X) {
+; FTOL: @double_ui64_5
+; FTOL: calll __ftol2
+  %tmp.1 = fptoui double %X to i64
+  ret i64 %tmp.1
+}
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll
index fdf68f92a927..20bccab8ff78 100644
--- a/test/CodeGen/X86/x86-shifts.ll
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -6,8 +6,9 @@
 define <4 x i32> @shl4(<4 x i32> %A) nounwind {
 entry:
 ; CHECK:      shl4
+; CHECK:      padd
 ; CHECK:      pslld
-; CHECK-NEXT: pslld
+; CHECK:      ret
   %B = shl <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = shl <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
   %K = xor <4 x i32> %B, %C
@@ -19,6 +20,7 @@ entry:
 ; CHECK:      shr4
 ; CHECK:      psrld
 ; CHECK-NEXT: psrld
+; CHECK:      ret
   %B = lshr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = lshr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
   %K = xor <4 x i32> %B, %C
@@ -30,6 +32,7 @@ entry:
 ; CHECK:      sra4
 ; CHECK:      psrad
 ; CHECK-NEXT: psrad
+; CHECK:      ret
   %B = ashr <4 x i32> %A,  < i32 2, i32 2, i32 2, i32 2>
   %C = ashr <4 x i32> %A,  < i32 1, i32 1, i32 1, i32 1>
   %K = xor <4 x i32> %B, %C
@@ -41,6 +44,7 @@ entry:
 ; CHECK:      shl2
 ; CHECK:      psllq
 ; CHECK-NEXT: psllq
+; CHECK:      ret
   %B = shl <2 x i64> %A,  < i64 2, i64 2>
   %C = shl <2 x i64> %A,  < i64 9, i64 9>
   %K = xor <2 x i64> %B, %C
@@ -52,6 +56,7 @@ entry:
 ; CHECK:      shr2
 ; CHECK:      psrlq
 ; CHECK-NEXT: psrlq
+; CHECK:      ret
   %B = lshr <2 x i64> %A,  < i64 8, i64 8>
   %C = lshr <2 x i64> %A,  < i64 1, i64 1>
   %K = xor <2 x i64> %B, %C
@@ -62,8 +67,9 @@ entry:
 define <8 x i16> @shl8(<8 x i16> %A) nounwind {
 entry:
 ; CHECK:      shl8
+; CHECK:      padd
 ; CHECK:      psllw
-; CHECK-NEXT: psllw
+; CHECK:      ret
   %B = shl <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = shl <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -75,6 +81,7 @@ entry:
 ; CHECK:      shr8
 ; CHECK:      psrlw
 ; CHECK-NEXT: psrlw
+; CHECK:      ret
   %B = lshr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = lshr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -86,6 +93,7 @@ entry:
 ; CHECK:      sra8
 ; CHECK:      psraw
 ; CHECK-NEXT: psraw
+; CHECK:      ret
   %B = ashr <8 x i16> %A,  < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
   %C = ashr <8 x i16> %A,  < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -100,6 +108,7 @@ entry:
 ; CHECK: sll8_nosplat
 ; CHECK-NOT: psll
 ; CHECK-NOT: psll
+; CHECK:      ret
   %B = shl <8 x i16> %A,  < i16 1, i16 2, i16 3, i16 6, i16 2, i16 2, i16 2, i16 2>
   %C = shl <8 x i16> %A,  < i16 9, i16 7, i16 5, i16 1, i16 4, i16 1, i16 1, i16 1>
   %K = xor <8 x i16> %B, %C
@@ -112,6 +121,7 @@ entry:
 ; CHECK: shr2_nosplat
 ; CHECK-NOT:  psrlq
 ; CHECK-NOT:  psrlq
+; CHECK:      ret
   %B = lshr <2 x i64> %A,  < i64 8, i64 1>
   %C = lshr <2 x i64> %A,  < i64 1, i64 0>
   %K = xor <2 x i64> %B, %C
@@ -124,7 +134,8 @@ entry:
 define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
 entry:
 ; CHECK: shl2_other
-; CHECK-not:      psllq
+; CHECK: psllq
+; CHECK: ret
   %B = shl <2 x i32> %A,  < i32 2, i32 2>
   %C = shl <2 x i32> %A,  < i32 9, i32 9>
   %K = xor <2 x i32> %B, %C
@@ -134,9 +145,48 @@ entry:
 define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
 entry:
 ; CHECK: shr2_other
-; CHECK-NOT:      psrlq
+; CHECK: psrlq
+; CHECK: ret
   %B = lshr <2 x i32> %A,  < i32 8, i32 8>
   %C = lshr <2 x i32> %A,  < i32 1, i32 1>
   %K = xor <2 x i32> %B, %C
   ret <2 x i32> %K
 }
+
+define <16 x i8> @shl9(<16 x i8> %A) nounwind {
+  %B = shl <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %B
+; CHECK: shl9:
+; CHECK: psllw $3
+; CHECK: pand
+; CHECK: ret
+}
+
+define <16 x i8> @shr9(<16 x i8> %A) nounwind {
+  %B = lshr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %B
+; CHECK: shr9:
+; CHECK: psrlw $3
+; CHECK: pand
+; CHECK: ret
+}
+
+define <16 x i8> @sra_v16i8_7(<16 x i8> %A) nounwind {
+  %B = ashr <16 x i8> %A, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  ret <16 x i8> %B
+; CHECK: sra_v16i8_7:
+; CHECK: pxor
+; CHECK: pcmpgtb
+; CHECK: ret
+}
+
+define <16 x i8> @sra_v16i8(<16 x i8> %A) nounwind {
+  %B = ashr <16 x i8> %A, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %B
+; CHECK: sra_v16i8:
+; CHECK: psrlw $3
+; CHECK: pand
+; CHECK: pxor
+; CHECK: psubb
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
new file mode 100644
index 000000000000..a2521b0a66db
--- /dev/null
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -0,0 +1,969 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mattr=+avx,+fma4,+xop | FileCheck %s
+
+define <2 x double> @test_int_x86_xop_vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vpermil2pd
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  %vec = load <2 x double>* %a1
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  %vec = load <2 x double>* %a2
+  %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ;  [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 2) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_mr(<4 x double> %a0, <4 x double>* %a1, <4 x double> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a1
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vpermil2pd_256_rm(<4 x double> %a0, <4 x double> %a1, <4 x double>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpermil2pd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a2
+  %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vpermil2ps
+  %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 3) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vpermil2ps
+  ; CHECK: ymm
+  %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 4) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
+  ; CHECK: vpcmov
+  %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
+
+define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
+  ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_mr(<4 x i64> %a0, <4 x i64>* %a1, <4 x i64> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+  %vec = load <4 x i64>* %a1
+  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
+  ret <4 x i64> %res
+}
+define <4 x i64> @test_int_x86_xop_vpcmov_256_rm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpcmov
+  ; CHECK: ymm
+ %vec = load <4 x i64>* %a2
+ %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK:vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK:vpcomb
+  %vec = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomequb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomequd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomequq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomequw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomged(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomled(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomleub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomleud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomlew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomltub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomltud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomltw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomned(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomneub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomneud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomnew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomb
+  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomd
+  %res = call <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcomub
+  %res = call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcomud
+  %res = call <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcomuq
+  %res = call <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomuw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcomw
+  %res = call <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddbd(<16 x i8> %a0) {
+  ; CHECK: vphaddbd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddbq(<16 x i8> %a0) {
+  ; CHECK: vphaddbq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddbw(<16 x i8> %a0) {
+  ; CHECK: vphaddbw
+  %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadddq(<4 x i32> %a0) {
+  ; CHECK: vphadddq
+  %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddubd(<16 x i8> %a0) {
+  ; CHECK: vphaddubd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddubq(<16 x i8> %a0) {
+  ; CHECK: vphaddubq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphaddubw(<16 x i8> %a0) {
+  ; CHECK: vphaddubw
+  %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddudq(<4 x i32> %a0) {
+  ; CHECK: vphaddudq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphadduwd(<8 x i16> %a0) {
+  ; CHECK: vphadduwd
+  %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphadduwq(<8 x i16> %a0) {
+  ; CHECK: vphadduwq
+  %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphaddwd(<8 x i16> %a0) {
+  ; CHECK: vphaddwd
+  %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphaddwq(<8 x i16> %a0) {
+  ; CHECK: vphaddwq
+  %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vphsubbw(<16 x i8> %a0) {
+  ; CHECK: vphsubbw
+  %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vphsubdq(<4 x i32> %a0) {
+  ; CHECK: vphsubdq
+  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) ;
+  ret <2 x i64> %res
+}
+define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vphsubdq
+  %vec = load <4 x i32>* %a0
+  %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vphsubwd(<8 x i16> %a0) {
+  ; CHECK: vphsubwd
+  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) ;
+  ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vphsubwd
+  %vec = load <8 x i16>* %a0
+  %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacsdd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacsdqh
+  %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacsdql
+  %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacssdd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacssdqh
+  %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) {
+  ; CHECK: vpmacssdql
+  %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacsswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpmacssww
+  %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmacswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpmacsww
+  %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmadcsswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) {
+  ; CHECK: vpmadcswd
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpmadcswd
+  %vec = load <8 x i16>* %a1
+  %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  ; CHECK: vpperm
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpperm
+  %vec = load <16 x i8>* %a2
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
+  ret <16 x i8> %res
+}
+define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpperm
+  %vec = load <16 x i8>* %a1
+  %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vprotb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vprotb
+  %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vprotd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vprotd
+  %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vprotq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vprotq
+  %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vprotw
+  %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshab
+  %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshad(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpshad
+  %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshaq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpshaq
+  %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshaw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpshaw
+  %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <16 x i8> @test_int_x86_xop_vpshlb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshlb
+  %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vpshld(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpshld
+  %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vpshlq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpshlq
+  %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vpshlw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpshlw
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpshlw
+  %vec = load <8 x i16>* %a1
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
+  ret <8 x i16> %res
+}
+define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vpshlw
+  %vec = load <8 x i16>* %a0
+  %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczss
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %a1) ;
+  ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(<4 x float> %a0, float* %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczss
+  %elem = load float* %a1
+  %vec = insertelement <4 x float> undef, float %elem, i32 0
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0, <4 x float> %vec) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczsd
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %a1) ;
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(<2 x double> %a0, double* %a1) {
+  ; CHECK-NOT: mov
+  ; CHECK: vfrczsd
+  %elem = load double* %a1
+  %vec = insertelement <2 x double> undef, double %elem, i32 0
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0, <2 x double> %vec) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>, <2 x double>) nounwind readnone
+
+define <2 x double> @test_int_x86_xop_vfrcz_pd(<2 x double> %a0) {
+  ; CHECK: vfrczpd
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) ;
+  ret <2 x double> %res
+}
+define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczpd
+  %vec = load <2 x double>* %a0
+  %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone
+
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256(<4 x double> %a0) {
+  ; CHECK: vfrczpd
+  ; CHECK: ymm
+  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) ;
+  ret <4 x double> %res
+}
+define <4 x double> @test_int_x86_xop_vfrcz_pd_256_mem(<4 x double>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczpd
+  ; CHECK: ymm
+  %vec = load <4 x double>* %a0
+  %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone
+
+define <4 x float> @test_int_x86_xop_vfrcz_ps(<4 x float> %a0) {
+  ; CHECK: vfrczps
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) ;
+  ret <4 x float> %res
+}
+define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczps
+  %vec = load <4 x float>* %a0
+  %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone
+
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256(<8 x float> %a0) {
+  ; CHECK: vfrczps
+  ; CHECK: ymm
+  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) ;
+  ret <8 x float> %res
+}
+define <8 x float> @test_int_x86_xop_vfrcz_ps_256_mem(<8 x float>* %a0) {
+  ; CHECK-NOT: vmovaps
+  ; CHECK: vfrczps
+  ; CHECK: ymm
+  %vec = load <8 x float>* %a0
+  %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone
+
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index 178c59dbaa97..ddc4cab14a4c 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -8,7 +8,7 @@ define <4 x i32> @test1() nounwind {
 	ret <4 x i32> %tmp
         
 ; X32: test1:
-; X32:	pxor	%xmm0, %xmm0
+; X32:	xorps	%xmm0, %xmm0
 ; X32:	ret
 }
 
diff --git a/test/CodeGen/X86/zero-remat.ll b/test/CodeGen/X86/zero-remat.ll
index 447007439fbb..4242530f7731 100644
--- a/test/CodeGen/X86/zero-remat.ll
+++ b/test/CodeGen/X86/zero-remat.ll
@@ -16,9 +16,9 @@ define double @foo() nounwind {
 ;CHECK-32: ret
 
 ;CHECK-64: foo:
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: call
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: ret
 }
 
@@ -33,8 +33,8 @@ define float @foof() nounwind {
 ;CHECK-32: ret
 
 ;CHECK-64: foof:
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: call
-;CHECK-64: pxor
+;CHECK-64: xorps
 ;CHECK-64: ret
 }
diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll
index b3f5cdbb88d1..ff93c68ff35a 100644
--- a/test/CodeGen/X86/zext-fold.ll
+++ b/test/CodeGen/X86/zext-fold.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
 
 ;; Simple case
 define i32 @test1(i8 %x) nounwind readnone {
@@ -34,7 +34,7 @@ define void @test3(i8 %x) nounwind readnone {
   ret void
 }
 ; CHECK: test3
-; CHECK: movzbl 16(%esp), [[REGISTER:%e[a-z]{2}]]
+; CHECK: movzbl {{[0-9]+}}(%esp), [[REGISTER:%e[a-z]{2}]]
 ; CHECK-NEXT: movl [[REGISTER]], 4(%esp)
 ; CHECK-NEXT: andl $224, [[REGISTER]]
 ; CHECK-NEXT: movl [[REGISTER]], (%esp)
diff --git a/test/CodeGen/X86/zext-sext.ll b/test/CodeGen/X86/zext-sext.ll
index cea9e9c854db..6432ae38ff3a 100644
--- a/test/CodeGen/X86/zext-sext.ll
+++ b/test/CodeGen/X86/zext-sext.ll
@@ -1,4 +1,5 @@
 ; XFAIL: *
+; ...should pass. See PR12324: misched bringup
 ; RUN: llc < %s -march=x86-64 | FileCheck %s
 ; <rdar://problem/8006248>
 
diff --git a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
index 7d6d7bac3d6e..84e21e46348d 100644
--- a/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
+++ b/test/CodeGen/XCore/2011-08-01-DynamicAllocBug.ll
@@ -16,5 +16,5 @@ allocas:
 ; CHECK: f:
 ; CHECK: ldaw [[REGISTER:r[0-9]+]], {{r[0-9]+}}[-r1]
 ; CHECK: set sp, [[REGISTER]]
-; CHECK extsp 1
-; CHECK bl g
+; CHECK: extsp 1
+; CHECK: bl g
diff --git a/test/CodeGen/XCore/cos.ll b/test/CodeGen/XCore/cos.ll
deleted file mode 100644
index 8211f85b9bc2..000000000000
--- a/test/CodeGen/XCore/cos.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl cosf" %t1.s | count 1
-; RUN: grep "bl cos" %t1.s | count 2
-declare double @llvm.cos.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.cos.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.cos.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.cos.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/dg.exp b/test/CodeGen/XCore/dg.exp
deleted file mode 100644
index 7110eabb3a53..000000000000
--- a/test/CodeGen/XCore/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target XCore] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
-}
diff --git a/test/CodeGen/XCore/exp.ll b/test/CodeGen/XCore/exp.ll
deleted file mode 100644
index d23d484ed62e..000000000000
--- a/test/CodeGen/XCore/exp.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl expf" %t1.s | count 1
-; RUN: grep "bl exp" %t1.s | count 2
-declare double @llvm.exp.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.exp.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.exp.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.exp.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/exp2.ll b/test/CodeGen/XCore/exp2.ll
deleted file mode 100644
index 4c4d17f4bbf7..000000000000
--- a/test/CodeGen/XCore/exp2.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl exp2f" %t1.s | count 1
-; RUN: grep "bl exp2" %t1.s | count 2
-declare double @llvm.exp2.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.exp2.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.exp2.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.exp2.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/float-intrinsics.ll b/test/CodeGen/XCore/float-intrinsics.ll
new file mode 100644
index 000000000000..69a40f3c79bf
--- /dev/null
+++ b/test/CodeGen/XCore/float-intrinsics.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+declare double @llvm.cos.f64(double)
+declare double @llvm.exp.f64(double)
+declare double @llvm.exp2.f64(double)
+declare double @llvm.log.f64(double)
+declare double @llvm.log10.f64(double)
+declare double @llvm.log2.f64(double)
+declare double @llvm.pow.f64(double, double)
+declare double @llvm.powi.f64(double, i32)
+declare double @llvm.sin.f64(double)
+declare double @llvm.sqrt.f64(double)
+
+define double @cos(double %F) {
+; CHECK: cos:
+; CHECK: bl cos
+        %result = call double @llvm.cos.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.cos.f32(float)
+
+; CHECK: cosf:
+; CHECK: bl cosf
+define float @cosf(float %F) {
+        %result = call float @llvm.cos.f32(float %F)
+	ret float %result
+}
+
+define double @exp(double %F) {
+; CHECK: exp:
+; CHECK: bl exp
+        %result = call double @llvm.exp.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.exp.f32(float)
+
+define float @expf(float %F) {
+; CHECK: expf:
+; CHECK: bl expf
+        %result = call float @llvm.exp.f32(float %F)
+	ret float %result
+}
+
+define double @exp2(double %F) {
+; CHECK: exp2:
+; CHECK: bl exp2
+        %result = call double @llvm.exp2.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.exp2.f32(float)
+
+define float @exp2f(float %F) {
+; CHECK: exp2f:
+; CHECK: bl exp2f
+        %result = call float @llvm.exp2.f32(float %F)
+	ret float %result
+}
+
+define double @log(double %F) {
+; CHECK: log:
+; CHECK: bl log
+        %result = call double @llvm.log.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log.f32(float)
+
+define float @logf(float %F) {
+; CHECK: logf:
+; CHECK: bl logf
+        %result = call float @llvm.log.f32(float %F)
+	ret float %result
+}
+
+define double @log10(double %F) {
+; CHECK: log10:
+; CHECK: bl log10
+        %result = call double @llvm.log10.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log10.f32(float)
+
+define float @log10f(float %F) {
+; CHECK: log10f:
+; CHECK: bl log10f
+        %result = call float @llvm.log10.f32(float %F)
+	ret float %result
+}
+
+define double @log2(double %F) {
+; CHECK: log2:
+; CHECK: bl log2
+        %result = call double @llvm.log2.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log2.f32(float)
+
+define float @log2f(float %F) {
+; CHECK: log2f:
+; CHECK: bl log2f
+        %result = call float @llvm.log2.f32(float %F)
+	ret float %result
+}
+
+define double @pow(double %F, double %power) {
+; CHECK: pow:
+; CHECK: bl pow
+        %result = call double @llvm.pow.f64(double %F, double %power)
+	ret double %result
+}
+
+declare float @llvm.pow.f32(float, float)
+
+define float @powf(float %F, float %power) {
+; CHECK: powf:
+; CHECK: bl powf
+        %result = call float @llvm.pow.f32(float %F, float %power)
+	ret float %result
+}
+
+define double @powi(double %F, i32 %power) {
+; CHECK: powi:
+; CHECK: bl __powidf2
+        %result = call double @llvm.powi.f64(double %F, i32 %power)
+	ret double %result
+}
+
+declare float @llvm.powi.f32(float, i32)
+
+define float @powif(float %F, i32 %power) {
+; CHECK: powif:
+; CHECK: bl __powisf2
+        %result = call float @llvm.powi.f32(float %F, i32 %power)
+	ret float %result
+}
+
+define double @sin(double %F) {
+; CHECK: sin:
+; CHECK: bl sin
+        %result = call double @llvm.sin.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.sin.f32(float)
+
+define float @sinf(float %F) {
+; CHECK: sinf:
+; CHECK: bl sinf
+        %result = call float @llvm.sin.f32(float %F)
+	ret float %result
+}
+
+define double @sqrt(double %F) {
+; CHECK: sqrt:
+; CHECK: bl sqrt
+        %result = call double @llvm.sqrt.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.sqrt.f32(float)
+
+define float @sqrtf(float %F) {
+; CHECK: sqrtf:
+; CHECK: bl sqrtf
+        %result = call float @llvm.sqrt.f32(float %F)
+	ret float %result
+}
diff --git a/test/CodeGen/XCore/fneg.ll b/test/CodeGen/XCore/fneg.ll
index e3dd3dd45c23..d442a19712f3 100644
--- a/test/CodeGen/XCore/fneg.ll
+++ b/test/CodeGen/XCore/fneg.ll
@@ -1,6 +1,8 @@
-; RUN: llc < %s -march=xcore | grep "xor" | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 define i1 @test(double %F) nounwind {
 entry:
+; CHECK: test:
+; CHECK: xor
 	%0 = fsub double -0.000000e+00, %F
 	%1 = fcmp olt double 0.000000e+00, %0
 	ret i1 %1
diff --git a/test/CodeGen/XCore/getid.ll b/test/CodeGen/XCore/getid.ll
index ecab65c0e92e..ec46071b546c 100644
--- a/test/CodeGen/XCore/getid.ll
+++ b/test/CodeGen/XCore/getid.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "get r11, id" %t1.s | count 1 
+; RUN: llc < %s -march=xcore | FileCheck %s
 declare i32 @llvm.xcore.getid()
 
 define i32 @test() {
+; CHECK: test:
+; CHECK: get r11, id
+; CHECK-NEXT: mov r0, r11
 	%result = call i32 @llvm.xcore.getid()
 	ret i32 %result
 }
diff --git a/test/CodeGen/XCore/global_negative_offset.ll b/test/CodeGen/XCore/global_negative_offset.ll
new file mode 100644
index 000000000000..0328fb0460f3
--- /dev/null
+++ b/test/CodeGen/XCore/global_negative_offset.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+; Don't fold negative offsets into cp / dp accesses to avoid a relocation
+; error if the address + addend is less than the start of the cp / dp.
+
+@a = external constant [0 x i32], section ".cp.rodata"
+@b = external global [0 x i32]
+
+define i32 *@f() nounwind {
+entry:
+; CHECK: f:
+; CHECK: ldaw r11, cp[a]
+; CHECK: sub r0, r11, 4
+	%0 = getelementptr [0 x i32]* @a, i32 0, i32 -1
+	ret i32* %0
+}
+
+define i32 *@g() nounwind {
+entry:
+; CHECK: g:
+; CHECK: ldaw [[REG:r[0-9]+]], dp[b]
+; CHECK: sub r0, [[REG]], 4
+	%0 = getelementptr [0 x i32]* @b, i32 0, i32 -1
+	ret i32* %0
+}
diff --git a/test/CodeGen/XCore/ladd_lsub_combine.ll b/test/CodeGen/XCore/ladd_lsub_combine.ll
index a693ee22291a..cd89966bcde7 100644
--- a/test/CodeGen/XCore/ladd_lsub_combine.ll
+++ b/test/CodeGen/XCore/ladd_lsub_combine.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=xcore | FileCheck %s
+; RUN: llc -march=xcore < %s | FileCheck %s
 
 ; Only needs one ladd
 define i64 @f1(i32 %x, i32 %y) nounwind {
diff --git a/test/CodeGen/XCore/licm-ldwcp.ll b/test/CodeGen/XCore/licm-ldwcp.ll
index 4884f70e736b..794c6bb64e39 100644
--- a/test/CodeGen/XCore/licm-ldwcp.ll
+++ b/test/CodeGen/XCore/licm-ldwcp.ll
@@ -13,6 +13,6 @@ entry:
   br label %bb
 
 bb:                                               ; preds = %bb, %entry
-  volatile store i32 525509670, i32* %p, align 4
+  store volatile i32 525509670, i32* %p, align 4
   br label %bb
 }
diff --git a/test/CodeGen/XCore/lit.local.cfg b/test/CodeGen/XCore/lit.local.cfg
new file mode 100644
index 000000000000..f8726af57f79
--- /dev/null
+++ b/test/CodeGen/XCore/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'XCore' in targets:
+    config.unsupported = True
+
diff --git a/test/CodeGen/XCore/load.ll b/test/CodeGen/XCore/load.ll
index adfea212a279..faff03b1e70d 100644
--- a/test/CodeGen/XCore/load.ll
+++ b/test/CodeGen/XCore/load.ll
@@ -1,15 +1,9 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: not grep add %t1.s
-; RUN: not grep ldaw %t1.s
-; RUN: not grep lda16 %t1.s
-; RUN: not grep zext %t1.s
-; RUN: not grep sext %t1.s
-; RUN: grep "ldw" %t1.s | count 2
-; RUN: grep "ld16s" %t1.s | count 1
-; RUN: grep "ld8u" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 define i32 @load32(i32* %p, i32 %offset) nounwind {
 entry:
+; CHECK: load32:
+; CHECK: ldw r0, r0[r1]
 	%0 = getelementptr i32* %p, i32 %offset
 	%1 = load i32* %0, align 4
 	ret i32 %1
@@ -17,6 +11,8 @@ entry:
 
 define i32 @load32_imm(i32* %p) nounwind {
 entry:
+; CHECK: load32_imm:
+; CHECK: ldw r0, r0[11]
 	%0 = getelementptr i32* %p, i32 11
 	%1 = load i32* %0, align 4
 	ret i32 %1
@@ -24,6 +20,9 @@ entry:
 
 define i32 @load16(i16* %p, i32 %offset) nounwind {
 entry:
+; CHECK: load16:
+; CHECK: ld16s r0, r0[r1]
+; CHECK-NOT: sext
 	%0 = getelementptr i16* %p, i32 %offset
 	%1 = load i16* %0, align 2
 	%2 = sext i16 %1 to i32
@@ -32,6 +31,9 @@ entry:
 
 define i32 @load8(i8* %p, i32 %offset) nounwind {
 entry:
+; CHECK: load8:
+; CHECK: ld8u r0, r0[r1]
+; CHECK-NOT: zext
 	%0 = getelementptr i8* %p, i32 %offset
 	%1 = load i8* %0, align 1
 	%2 = zext i8 %1 to i32
diff --git a/test/CodeGen/XCore/log.ll b/test/CodeGen/XCore/log.ll
deleted file mode 100644
index a08471f48e4a..000000000000
--- a/test/CodeGen/XCore/log.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl logf" %t1.s | count 1
-; RUN: grep "bl log" %t1.s | count 2
-declare double @llvm.log.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.log.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.log.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.log.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/log10.ll b/test/CodeGen/XCore/log10.ll
deleted file mode 100644
index a72b8bfaf6b9..000000000000
--- a/test/CodeGen/XCore/log10.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl log10f" %t1.s | count 1
-; RUN: grep "bl log10" %t1.s | count 2
-declare double @llvm.log10.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.log10.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.log10.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.log10.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/log2.ll b/test/CodeGen/XCore/log2.ll
deleted file mode 100644
index d257433a01a7..000000000000
--- a/test/CodeGen/XCore/log2.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl log2f" %t1.s | count 1
-; RUN: grep "bl log2" %t1.s | count 2
-declare double @llvm.log2.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.log2.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.log2.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.log2.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/pow.ll b/test/CodeGen/XCore/pow.ll
deleted file mode 100644
index b461185b7fde..000000000000
--- a/test/CodeGen/XCore/pow.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl powf" %t1.s | count 1
-; RUN: grep "bl pow" %t1.s | count 2
-declare double @llvm.pow.f64(double, double)
-
-define double @test(double %F, double %power) {
-        %result = call double @llvm.pow.f64(double %F, double %power)
-	ret double %result
-}
-
-declare float @llvm.pow.f32(float, float)
-
-define float @testf(float %F, float %power) {
-        %result = call float @llvm.pow.f32(float %F, float %power)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/powi.ll b/test/CodeGen/XCore/powi.ll
deleted file mode 100644
index de31cbed00c0..000000000000
--- a/test/CodeGen/XCore/powi.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl __powidf2" %t1.s | count 1
-; RUN: grep "bl __powisf2" %t1.s | count 1
-declare double @llvm.powi.f64(double, i32)
-
-define double @test(double %F, i32 %power) {
-        %result = call double @llvm.powi.f64(double %F, i32 %power)
-	ret double %result
-}
-
-declare float @llvm.powi.f32(float, i32)
-
-define float @testf(float %F, i32 %power) {
-        %result = call float @llvm.powi.f32(float %F, i32 %power)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index 537d63b903a0..80b7db4ce3a2 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -1,19 +1,21 @@
 ; Test to make sure that the 'private' is used correctly.
 ;
-; RUN: llc < %s -march=xcore > %t
-; RUN: grep .Lfoo: %t
-; RUN: grep bl.*\.Lfoo %t
-; RUN: grep .Lbaz: %t
-; RUN: grep ldw.*\.Lbaz %t
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 define private void @foo() {
+; CHECK: .Lfoo:
         ret void
 }
 
 @baz = private global i32 4
 
 define i32 @bar() {
+; CHECK: bar:
+; CHECK: bl .Lfoo
+; CHECK: ldw r0, dp[.Lbaz]
         call void @foo()
 	%1 = load i32* @baz, align 4
         ret i32 %1
 }
+
+; CHECK: .Lbaz:
diff --git a/test/CodeGen/XCore/scavenging.ll b/test/CodeGen/XCore/scavenging.ll
index 3181e96116b6..5b612d0f9b59 100644
--- a/test/CodeGen/XCore/scavenging.ll
+++ b/test/CodeGen/XCore/scavenging.ll
@@ -18,32 +18,32 @@ entry:
 	%x = alloca [100 x i32], align 4		; <[100 x i32]*> [#uses=2]
 	%0 = load i32* @size, align 4		; <i32> [#uses=1]
 	%1 = alloca i32, i32 %0, align 4		; <i32*> [#uses=1]
-	%2 = volatile load i32* @g0, align 4		; <i32> [#uses=1]
-	%3 = volatile load i32* @g1, align 4		; <i32> [#uses=1]
-	%4 = volatile load i32* @g2, align 4		; <i32> [#uses=1]
-	%5 = volatile load i32* @g3, align 4		; <i32> [#uses=1]
-	%6 = volatile load i32* @g4, align 4		; <i32> [#uses=1]
-	%7 = volatile load i32* @g5, align 4		; <i32> [#uses=1]
-	%8 = volatile load i32* @g6, align 4		; <i32> [#uses=1]
-	%9 = volatile load i32* @g7, align 4		; <i32> [#uses=1]
-	%10 = volatile load i32* @g8, align 4		; <i32> [#uses=1]
-	%11 = volatile load i32* @g9, align 4		; <i32> [#uses=1]
-	%12 = volatile load i32* @g10, align 4		; <i32> [#uses=1]
-	%13 = volatile load i32* @g11, align 4		; <i32> [#uses=2]
+	%2 = load volatile i32* @g0, align 4		; <i32> [#uses=1]
+	%3 = load volatile i32* @g1, align 4		; <i32> [#uses=1]
+	%4 = load volatile i32* @g2, align 4		; <i32> [#uses=1]
+	%5 = load volatile i32* @g3, align 4		; <i32> [#uses=1]
+	%6 = load volatile i32* @g4, align 4		; <i32> [#uses=1]
+	%7 = load volatile i32* @g5, align 4		; <i32> [#uses=1]
+	%8 = load volatile i32* @g6, align 4		; <i32> [#uses=1]
+	%9 = load volatile i32* @g7, align 4		; <i32> [#uses=1]
+	%10 = load volatile i32* @g8, align 4		; <i32> [#uses=1]
+	%11 = load volatile i32* @g9, align 4		; <i32> [#uses=1]
+	%12 = load volatile i32* @g10, align 4		; <i32> [#uses=1]
+	%13 = load volatile i32* @g11, align 4		; <i32> [#uses=2]
 	%14 = getelementptr [100 x i32]* %x, i32 0, i32 50		; <i32*> [#uses=1]
 	store i32 %13, i32* %14, align 4
-	volatile store i32 %13, i32* @g11, align 4
-	volatile store i32 %12, i32* @g10, align 4
-	volatile store i32 %11, i32* @g9, align 4
-	volatile store i32 %10, i32* @g8, align 4
-	volatile store i32 %9, i32* @g7, align 4
-	volatile store i32 %8, i32* @g6, align 4
-	volatile store i32 %7, i32* @g5, align 4
-	volatile store i32 %6, i32* @g4, align 4
-	volatile store i32 %5, i32* @g3, align 4
-	volatile store i32 %4, i32* @g2, align 4
-	volatile store i32 %3, i32* @g1, align 4
-	volatile store i32 %2, i32* @g0, align 4
+	store volatile i32 %13, i32* @g11, align 4
+	store volatile i32 %12, i32* @g10, align 4
+	store volatile i32 %11, i32* @g9, align 4
+	store volatile i32 %10, i32* @g8, align 4
+	store volatile i32 %9, i32* @g7, align 4
+	store volatile i32 %8, i32* @g6, align 4
+	store volatile i32 %7, i32* @g5, align 4
+	store volatile i32 %6, i32* @g4, align 4
+	store volatile i32 %5, i32* @g3, align 4
+	store volatile i32 %4, i32* @g2, align 4
+	store volatile i32 %3, i32* @g1, align 4
+	store volatile i32 %2, i32* @g0, align 4
 	%x1 = getelementptr [100 x i32]* %x, i32 0, i32 0		; <i32*> [#uses=1]
 	call void @g(i32* %x1, i32* %1) nounwind
 	ret void
diff --git a/test/CodeGen/XCore/sin.ll b/test/CodeGen/XCore/sin.ll
deleted file mode 100644
index ced026f1d3e1..000000000000
--- a/test/CodeGen/XCore/sin.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl sinf" %t1.s | count 1
-; RUN: grep "bl sin" %t1.s | count 2
-declare double @llvm.sin.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.sin.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.sin.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.sin.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/sqrt.ll b/test/CodeGen/XCore/sqrt.ll
deleted file mode 100644
index 364d1a14c6ae..000000000000
--- a/test/CodeGen/XCore/sqrt.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl sqrtf" %t1.s | count 1
-; RUN: grep "bl sqrt" %t1.s | count 2
-declare double @llvm.sqrt.f64(double)
-
-define double @test(double %F) {
-        %result = call double @llvm.sqrt.f64(double %F)
-	ret double %result
-}
-
-declare float @llvm.sqrt.f32(float)
-
-define float @testf(float %F) {
-        %result = call float @llvm.sqrt.f32(float %F)
-	ret float %result
-}
diff --git a/test/CodeGen/XCore/store.ll b/test/CodeGen/XCore/store.ll
index 2213743ff897..836b1254d67a 100644
--- a/test/CodeGen/XCore/store.ll
+++ b/test/CodeGen/XCore/store.ll
@@ -1,13 +1,9 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: not grep add %t1.s
-; RUN: not grep ldaw %t1.s
-; RUN: not grep lda16 %t1.s
-; RUN: grep "stw" %t1.s | count 2
-; RUN: grep "st16" %t1.s | count 1
-; RUN: grep "st8" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 define void @store32(i32* %p, i32 %offset, i32 %val) nounwind {
 entry:
+; CHECK: store32:
+; CHECK: stw r2, r0[r1]
 	%0 = getelementptr i32* %p, i32 %offset
 	store i32 %val, i32* %0, align 4
 	ret void
@@ -15,6 +11,8 @@ entry:
 
 define void @store32_imm(i32* %p, i32 %val) nounwind {
 entry:
+; CHECK: store32_imm:
+; CHECK: stw r1, r0[11]
 	%0 = getelementptr i32* %p, i32 11
 	store i32 %val, i32* %0, align 4
 	ret void
@@ -22,6 +20,8 @@ entry:
 
 define void @store16(i16* %p, i32 %offset, i16 %val) nounwind {
 entry:
+; CHECK: store16:
+; CHECK: st16 r2, r0[r1]
 	%0 = getelementptr i16* %p, i32 %offset
 	store i16 %val, i16* %0, align 2
 	ret void
@@ -29,6 +29,8 @@ entry:
 
 define void @store8(i8* %p, i32 %offset, i8 %val) nounwind {
 entry:
+; CHECK: store8:
+; CHECK: st8 r2, r0[r1]
 	%0 = getelementptr i8* %p, i32 %offset
 	store i8 %val, i8* %0, align 1
 	ret void
diff --git a/test/CodeGen/XCore/trap.ll b/test/CodeGen/XCore/trap.ll
index 45f886d332aa..eb71cb6acb6e 100644
--- a/test/CodeGen/XCore/trap.ll
+++ b/test/CodeGen/XCore/trap.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "ecallf" %t1.s | count 1
-; RUN: grep "ldc" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 define i32 @test() noreturn nounwind  {
 entry:
+; CHECK: test:
+; CHECK: ldc
+; CHECK: ecallf
 	tail call void @llvm.trap( )
 	unreachable
 }
diff --git a/test/CodeGen/XCore/unaligned_store_combine.ll b/test/CodeGen/XCore/unaligned_store_combine.ll
index 493ca6a975f8..c997b78ee6bd 100644
--- a/test/CodeGen/XCore/unaligned_store_combine.ll
+++ b/test/CodeGen/XCore/unaligned_store_combine.ll
@@ -1,11 +1,12 @@
-; RUN: llc < %s -march=xcore > %t1.s
-; RUN: grep "bl memmove" %t1.s | count 1
-; RUN: grep "ldc r., 8" %t1.s | count 1
+; RUN: llc < %s -march=xcore | FileCheck %s
 
 ; Unaligned load / store pair. Should be combined into a memmove
 ; of size 8
 define void @f(i64* %dst, i64* %src) nounwind {
 entry:
+; CHECK: f:
+; CHECK: ldc r2, 8
+; CHECK: bl memmove
 	%0 = load i64* %src, align 1
 	store i64 %0, i64* %dst, align 1
 	ret void
diff --git a/test/DebugInfo/2009-01-15-dbg_declare.ll b/test/DebugInfo/2009-01-15-dbg_declare.ll
deleted file mode 100644
index ab404afbd8a7..000000000000
--- a/test/DebugInfo/2009-01-15-dbg_declare.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc %s -o /dev/null
-
-        %llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }*, i8*, i8* }
-@llvm.dbg.variable24 = external constant %llvm.dbg.variable.type                ; <%llvm.dbg.variable.type*> [#uses=1]
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-define i32 @isascii(i32 %_c) nounwind {
-entry:
-	%j = alloca i32
-	%0 = bitcast i32* %j to { }*
-        call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable24 to { }*))
-        unreachable
-}
-
-
diff --git a/test/DebugInfo/2010-04-25-CU-entry_pc.ll b/test/DebugInfo/2010-04-25-CU-entry_pc.ll
deleted file mode 100644
index de099b6b9c50..000000000000
--- a/test/DebugInfo/2010-04-25-CU-entry_pc.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llc < %s | grep entry_pc | count 2
-@i = global i32 1                                 ; <i32*> [#uses=0]
-
-!llvm.dbg.gv = !{!0}
-
-!0 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32* @i} ; [ DW_TAG_variable ]
-!1 = metadata !{i32 524329, metadata !"b.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
-!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"b.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!3 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
index 0e1d1fddc412..94bddc092f4a 100644
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -19,7 +19,7 @@ entry:
   %0 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 2, i32 0 ; <i8*> [#uses=1]
   %1 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 9, i32 0 ; <i8*> [#uses=1]
   call void @uuid_LtoB(i8* %0, i8* %1) nounwind, !dbg !0
-  %a9 = volatile load i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
+  %a9 = load volatile i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
   %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
   %a11 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
   %a12 = load i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
@@ -29,7 +29,7 @@ entry:
   call void @llvm.dbg.value(metadata !18, i64 0, metadata !19) nounwind
   call void @llvm.dbg.declare(metadata !6, metadata !23) nounwind
   call void @llvm.dbg.value(metadata !{i64* %data_addr.i17}, i64 0, metadata !34) nounwind
-  %a13 = volatile load i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
+  %a13 = load volatile i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
   %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
   %a15 = add i64 %a10, %a14, !dbg !7              ; <i64> [#uses=1]
   %a16 = sub i64 %a15, %a14                       ; <i64> [#uses=1]
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index 001e9389cca1..2557c9c63dea 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -2,7 +2,6 @@
 ; Check struct X for dead variable xyz from inlined function foo.
 
 ; CHECK:	DW_TAG_structure_type
-; CHECK-NEXT:	DW_AT_sibling
 ; CHECK-NEXT:	DW_AT_name
  
 
diff --git a/test/DebugInfo/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/2011-09-26-GlobalVarContext.ll
deleted file mode 100644
index 3e9fa88fc77f..000000000000
--- a/test/DebugInfo/2011-09-26-GlobalVarContext.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc -asm-verbose %s -o - | FileCheck %s
-
-; ModuleID = 'test.c'
-
-@GLOBAL = common global i32 0, align 4
-
-define i32 @f() nounwind {
-  %LOCAL = alloca i32, align 4
-  call void @llvm.dbg.declare(metadata !{i32* %LOCAL}, metadata !15), !dbg !17
-  %1 = load i32* @GLOBAL, align 4, !dbg !18
-  store i32 %1, i32* %LOCAL, align 4, !dbg !18
-  %2 = load i32* @GLOBAL, align 4, !dbg !19
-  ret i32 %2, !dbg !19
-}
-
-declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-
-!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", metadata !"clang version 3.0 (trunk)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
-!1 = metadata !{metadata !2}
-!2 = metadata !{i32 0}
-!3 = metadata !{metadata !4}
-!4 = metadata !{metadata !5}
-!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
-!6 = metadata !{i32 720937, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", null} ; [ DW_TAG_file_type ]
-!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
-!8 = metadata !{metadata !9}
-!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!10 = metadata !{metadata !11}
-!11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
-!12 = metadata !{metadata !13}
-!13 = metadata !{metadata !14}
-!14 = metadata !{i32 720948, i32 0, null, metadata !"GLOBAL", metadata !"GLOBAL", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLOBAL} ; [ DW_TAG_variable ]
-!15 = metadata !{i32 721152, metadata !16, metadata !"LOCAL", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
-!16 = metadata !{i32 720907, metadata !5, i32 3, i32 9, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
-!17 = metadata !{i32 4, i32 9, metadata !16, null}
-!18 = metadata !{i32 4, i32 23, metadata !16, null}
-!19 = metadata !{i32 5, i32 5, metadata !16, null}
-
-; CHECK: .ascii	 "GLOBAL"
-; CHECK: .byte	1
-; CHECK: .byte	1
-
-; CHECK: .ascii	 "LOCAL"
-; CHECK: .byte	1
-; CHECK: .byte	4
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
new file mode 100644
index 000000000000..934fa81435ad
--- /dev/null
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -asm-verbose %s -o - | FileCheck %s
+
+; ModuleID = 'test.c'
+
+@GLB = common global i32 0, align 4
+
+define i32 @f() nounwind {
+  %LOC = alloca i32, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %LOC}, metadata !15), !dbg !17
+  %1 = load i32* @GLB, align 4, !dbg !18
+  store i32 %1, i32* %LOC, align 4, !dbg !18
+  %2 = load i32* @GLB, align 4, !dbg !19
+  ret i32 %2, !dbg !19
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", metadata !"clang version 3.0 (trunk)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !12} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 false, i32 ()* @f, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"test.c", metadata !"/work/llvm/vanilla/test/DebugInfo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{metadata !14}
+!14 = metadata !{i32 720948, i32 0, null, metadata !"GLB", metadata !"GLB", metadata !"", metadata !6, i32 1, metadata !9, i32 0, i32 1, i32* @GLB} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 721152, metadata !16, metadata !"LOC", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!16 = metadata !{i32 720907, metadata !5, i32 3, i32 9, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!17 = metadata !{i32 4, i32 9, metadata !16, null}
+!18 = metadata !{i32 4, i32 23, metadata !16, null}
+!19 = metadata !{i32 5, i32 5, metadata !16, null}
+
+; CHECK: .long .Lstring3
+; CHECK: .byte	1
+; CHECK: .byte	1
+
+; CHECK: .long .Lstring6
+; CHECK: .byte	1
+; CHECK: .byte	4
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
new file mode 100644
index 000000000000..6e201695636e
--- /dev/null
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -0,0 +1,172 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.7 %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: b_ref
+; CHECK-NOT: AT_bit_size
+
+%struct.bar = type { %struct.baz, %struct.baz* }
+%struct.baz = type { i32 }
+
+define i32 @main(i32 %argc, i8** %argv) uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %myBar = alloca %struct.bar, align 8
+  store i32 0, i32* %retval
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %argc.addr}, metadata !49), !dbg !50
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata !{i8*** %argv.addr}, metadata !51), !dbg !52
+  call void @llvm.dbg.declare(metadata !{%struct.bar* %myBar}, metadata !53), !dbg !55
+  call void @_ZN3barC1Ei(%struct.bar* %myBar, i32 1), !dbg !56
+  ret i32 0, !dbg !57
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr void @_ZN3barC1Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.bar*, align 8
+  %x.addr = alloca i32, align 4
+  store %struct.bar* %this, %struct.bar** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.bar** %this.addr}, metadata !58), !dbg !59
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !60), !dbg !61
+  %this1 = load %struct.bar** %this.addr
+  %0 = load i32* %x.addr, align 4, !dbg !62
+  call void @_ZN3barC2Ei(%struct.bar* %this1, i32 %0), !dbg !62
+  ret void, !dbg !62
+}
+
+define linkonce_odr void @_ZN3barC2Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.bar*, align 8
+  %x.addr = alloca i32, align 4
+  store %struct.bar* %this, %struct.bar** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.bar** %this.addr}, metadata !63), !dbg !64
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !65), !dbg !66
+  %this1 = load %struct.bar** %this.addr
+  %b = getelementptr inbounds %struct.bar* %this1, i32 0, i32 0, !dbg !67
+  %0 = load i32* %x.addr, align 4, !dbg !67
+  call void @_ZN3bazC1Ei(%struct.baz* %b, i32 %0), !dbg !67
+  %1 = getelementptr inbounds %struct.bar* %this1, i32 0, i32 1, !dbg !67
+  %b2 = getelementptr inbounds %struct.bar* %this1, i32 0, i32 0, !dbg !67
+  store %struct.baz* %b2, %struct.baz** %1, align 8, !dbg !67
+  ret void, !dbg !68
+}
+
+define linkonce_odr void @_ZN3bazC1Ei(%struct.baz* %this, i32 %a) unnamed_addr uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.baz*, align 8
+  %a.addr = alloca i32, align 4
+  store %struct.baz* %this, %struct.baz** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.baz** %this.addr}, metadata !70), !dbg !71
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !72), !dbg !73
+  %this1 = load %struct.baz** %this.addr
+  %0 = load i32* %a.addr, align 4, !dbg !74
+  call void @_ZN3bazC2Ei(%struct.baz* %this1, i32 %0), !dbg !74
+  ret void, !dbg !74
+}
+
+define linkonce_odr void @_ZN3bazC2Ei(%struct.baz* %this, i32 %a) unnamed_addr nounwind uwtable ssp align 2 {
+entry:
+  %this.addr = alloca %struct.baz*, align 8
+  %a.addr = alloca i32, align 4
+  store %struct.baz* %this, %struct.baz** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.baz** %this.addr}, metadata !75), !dbg !76
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %a.addr}, metadata !77), !dbg !78
+  %this1 = load %struct.baz** %this.addr
+  %h = getelementptr inbounds %struct.baz* %this1, i32 0, i32 0, !dbg !79
+  %0 = load i32* %a.addr, align 4, !dbg !79
+  store i32 %0, i32* %h, align 4, !dbg !79
+  ret void, !dbg !80
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", metadata !"clang version 3.1 (trunk 146596)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !3, metadata !27, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !9}
+!5 = metadata !{i32 720898, null, metadata !"bar", metadata !6, i32 9, i64 128, i64 64, i32 0, i32 0, null, metadata !7, i32 0, null, null} ; [ DW_TAG_class_type ]
+!6 = metadata !{i32 720937, metadata !"main.cpp", metadata !"/Users/echristo/tmp/bad-struct-ref", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{metadata !8, metadata !19, metadata !21}
+!8 = metadata !{i32 720909, metadata !5, metadata !"b", metadata !6, i32 11, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!9 = metadata !{i32 720898, null, metadata !"baz", metadata !6, i32 3, i64 32, i64 32, i32 0, i32 0, null, metadata !10, i32 0, null, null} ; [ DW_TAG_class_type ]
+!10 = metadata !{metadata !11, metadata !13}
+!11 = metadata !{i32 720909, metadata !9, metadata !"h", metadata !6, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
+!12 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!13 = metadata !{i32 720942, i32 0, metadata !9, metadata !"baz", metadata !"baz", metadata !"", metadata !6, i32 6, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !17} ; [ DW_TAG_subprogram ]
+!14 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !15, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!15 = metadata !{null, metadata !16, metadata !12}
+!16 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!19 = metadata !{i32 720909, metadata !5, metadata !"b_ref", metadata !6, i32 12, i64 64, i64 64, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 720912, null, null, null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !9} ; [ DW_TAG_reference_type ]
+!21 = metadata !{i32 720942, i32 0, metadata !5, metadata !"bar", metadata !"bar", metadata !"", metadata !6, i32 13, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
+!22 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!23 = metadata !{null, metadata !24, metadata !12}
+!24 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !5} ; [ DW_TAG_pointer_type ]
+!25 = metadata !{metadata !26}
+!26 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!27 = metadata !{metadata !28}
+!28 = metadata !{metadata !29, metadata !37, metadata !40, metadata !43, metadata !46}
+!29 = metadata !{i32 720942, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 17, metadata !30, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i8**)* @main, null, null, metadata !35} ; [ DW_TAG_subprogram ]
+!30 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !31, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!31 = metadata !{metadata !12, metadata !12, metadata !32}
+!32 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !33} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !34} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!35 = metadata !{metadata !36}
+!36 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!37 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC1Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC1Ei, null, metadata !21, metadata !38} ; [ DW_TAG_subprogram ]
+!38 = metadata !{metadata !39}
+!39 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!40 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3barC2Ei", metadata !6, i32 13, metadata !22, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.bar*, i32)* @_ZN3barC2Ei, null, metadata !21, metadata !41} ; [ DW_TAG_subprogram ]
+!41 = metadata !{metadata !42}
+!42 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!43 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC1Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC1Ei, null, metadata !13, metadata !44} ; [ DW_TAG_subprogram ]
+!44 = metadata !{metadata !45}
+!45 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!46 = metadata !{i32 720942, i32 0, null, metadata !"baz", metadata !"baz", metadata !"_ZN3bazC2Ei", metadata !6, i32 6, metadata !14, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.baz*, i32)* @_ZN3bazC2Ei, null, metadata !13, metadata !47} ; [ DW_TAG_subprogram ]
+!47 = metadata !{metadata !48}
+!48 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!49 = metadata !{i32 721153, metadata !29, metadata !"argc", metadata !6, i32 16777232, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!50 = metadata !{i32 16, i32 14, metadata !29, null}
+!51 = metadata !{i32 721153, metadata !29, metadata !"argv", metadata !6, i32 33554448, metadata !32, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!52 = metadata !{i32 16, i32 27, metadata !29, null}
+!53 = metadata !{i32 721152, metadata !54, metadata !"myBar", metadata !6, i32 18, metadata !5, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!54 = metadata !{i32 720907, metadata !29, i32 17, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!55 = metadata !{i32 18, i32 9, metadata !54, null}
+!56 = metadata !{i32 18, i32 17, metadata !54, null}
+!57 = metadata !{i32 19, i32 5, metadata !54, null}
+!58 = metadata !{i32 721153, metadata !37, metadata !"this", metadata !6, i32 16777229, metadata !24, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!59 = metadata !{i32 13, i32 5, metadata !37, null}
+!60 = metadata !{i32 721153, metadata !37, metadata !"x", metadata !6, i32 33554445, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!61 = metadata !{i32 13, i32 13, metadata !37, null}
+!62 = metadata !{i32 13, i32 34, metadata !37, null}
+!63 = metadata !{i32 721153, metadata !40, metadata !"this", metadata !6, i32 16777229, metadata !24, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!64 = metadata !{i32 13, i32 5, metadata !40, null}
+!65 = metadata !{i32 721153, metadata !40, metadata !"x", metadata !6, i32 33554445, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!66 = metadata !{i32 13, i32 13, metadata !40, null}
+!67 = metadata !{i32 13, i32 33, metadata !40, null}
+!68 = metadata !{i32 13, i32 34, metadata !69, null}
+!69 = metadata !{i32 720907, metadata !40, i32 13, i32 33, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!70 = metadata !{i32 721153, metadata !43, metadata !"this", metadata !6, i32 16777222, metadata !16, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!71 = metadata !{i32 6, i32 5, metadata !43, null}
+!72 = metadata !{i32 721153, metadata !43, metadata !"a", metadata !6, i32 33554438, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!73 = metadata !{i32 6, i32 13, metadata !43, null}
+!74 = metadata !{i32 6, i32 24, metadata !43, null}
+!75 = metadata !{i32 721153, metadata !46, metadata !"this", metadata !6, i32 16777222, metadata !16, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!76 = metadata !{i32 6, i32 5, metadata !46, null}
+!77 = metadata !{i32 721153, metadata !46, metadata !"a", metadata !6, i32 33554438, metadata !12, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!78 = metadata !{i32 6, i32 13, metadata !46, null}
+!79 = metadata !{i32 6, i32 23, metadata !46, null}
+!80 = metadata !{i32 6, i32 24, metadata !81, null}
+!81 = metadata !{i32 720907, metadata !46, i32 6, i32 23, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
new file mode 100644
index 000000000000..59280e027f35
--- /dev/null
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Checks that we don't emit a size for a pointer type.
+; CHECK: DW_TAG_pointer_type
+; CHECK-NEXT: DW_AT_type
+; CHECK-NOT-NEXT: DW_AT_byte_size
+
+%struct.A = type { i32 }
+
+define i32 @_Z3fooP1A(%struct.A* %a) nounwind uwtable ssp {
+entry:
+  %a.addr = alloca %struct.A*, align 8
+  store %struct.A* %a, %struct.A** %a.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.A** %a.addr}, metadata !16), !dbg !17
+  %0 = load %struct.A** %a.addr, align 8, !dbg !18
+  %b = getelementptr inbounds %struct.A* %0, i32 0, i32 0, !dbg !18
+  %1 = load i32* %b, align 4, !dbg !18
+  ret i32 %1, !dbg !18
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 150996)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"_Z3fooP1A", metadata !6, i32 3, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (%struct.A*)* @_Z3fooP1A, null, null, metadata !14} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !12, i32 0, null, null} ; [ DW_TAG_class_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786445, metadata !11, metadata !"b", metadata !6, i32 1, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 786689, metadata !5, metadata !"a", metadata !6, i32 16777219, metadata !10, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 3, i32 13, metadata !5, null}
+!18 = metadata !{i32 4, i32 3, metadata !19, null}
+!19 = metadata !{i32 786443, metadata !5, i32 3, i32 16, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
new file mode 100644
index 000000000000..078b740a4170
--- /dev/null
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -0,0 +1,43 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; test that the DW_AT_specification is a back edge in the file.
+
+; CHECK: 0x0000003a: DW_TAG_subprogram [5] *
+; CHECK: 0x00000060: DW_AT_specification [DW_FORM_ref4]      (cu + 0x003a => {0x0000003a})
+
+
+@_ZZN3foo3barEvE1x = constant i32 0, align 4
+
+define void @_ZN3foo3barEv()  {
+entry:
+  ret void, !dbg !25
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"<unknown>", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !18} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void ()* @_ZN3foo3barEv, null, metadata !11, metadata !16} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"nsNativeAppSupportBase.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 720915, null, metadata !"foo", metadata !6, i32 1, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!11 = metadata !{i32 720942, i32 0, metadata !12, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !14} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 720898, null, metadata !"foo", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !13, i32 0, null, null} ; [ DW_TAG_class_type ]
+!13 = metadata !{metadata !11}
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!18 = metadata !{metadata !19}
+!19 = metadata !{metadata !20}
+!20 = metadata !{i32 720948, i32 0, metadata !5, metadata !"x", metadata !"x", metadata !"", metadata !6, i32 5, metadata !21, i32 1, i32 1, i32* @_ZZN3foo3barEvE1x} ; [ DW_TAG_variable ]
+!21 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_const_type ]
+!22 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 6, i32 1, metadata !26, null}
+!26 = metadata !{i32 720907, metadata !5, i32 4, i32 17, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
new file mode 100644
index 000000000000..a0dcec32e691
--- /dev/null
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Check that the friend tag is there and is followed by a DW_AT_friend that has a reference back.
+
+; CHECK: 0x00000032:   DW_TAG_class_type [4]
+; CHECK: 0x00000077:   DW_TAG_class_type [4]
+; CHECK: 0x000000a0:     DW_TAG_friend [9]  
+; CHECK: 0x000000a1:       DW_AT_friend [DW_FORM_ref4]   (cu + 0x0032 => {0x00000032})
+
+
+%class.A = type { i32 }
+%class.B = type { i32 }
+
+@a = global %class.A zeroinitializer, align 4
+@b = global %class.B zeroinitializer, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153413) (llvm/trunk 153428)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !17}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 10, metadata !7, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ]
+!8 = metadata !{metadata !9, metadata !11}
+!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !12, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !15} ; [ DW_TAG_subprogram ]
+!12 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !13, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!13 = metadata !{null, metadata !14}
+!14 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !7} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{metadata !16}
+!16 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786484, i32 0, null, metadata !"b", metadata !"b", metadata !"", metadata !6, i32 11, metadata !18, i32 0, i32 1, %class.B* @b} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 786434, null, metadata !"B", metadata !6, i32 5, i64 32, i64 32, i32 0, i32 0, null, metadata !19, i32 0, null, null} ; [ DW_TAG_class_type ]
+!19 = metadata !{metadata !20, metadata !21, metadata !27}
+!20 = metadata !{i32 786445, metadata !18, metadata !"b", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ]
+!21 = metadata !{i32 786478, i32 0, metadata !18, metadata !"B", metadata !"B", metadata !"", metadata !6, i32 5, metadata !22, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !25} ; [ DW_TAG_subprogram ]
+!22 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !23, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!23 = metadata !{null, metadata !24}
+!24 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !18} ; [ DW_TAG_pointer_type ]
+!25 = metadata !{metadata !26}
+!26 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!27 = metadata !{i32 786474, metadata !18, null, metadata !6, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_friend ]
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
new file mode 100644
index 000000000000..4953c421cd32
--- /dev/null
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -0,0 +1,127 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Checks that we emit debug info for the block variable declare.
+; CHECK: 0x00000030:   DW_TAG_subprogram [3]
+; CHECK: 0x0000005b:     DW_TAG_variable [5]
+; CHECK: 0x0000005c:       DW_AT_name [DW_FORM_strp]     ( .debug_str[0x000000e6] = "block")
+; CHECK: 0x00000066:       DW_AT_location [DW_FORM_data4]        (0x00000023)
+
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_literal_generic = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define hidden void @__foo_block_invoke_0(i8* %.block_descriptor) uwtable ssp {
+entry:
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  call void @llvm.dbg.value(metadata !{i8* %.block_descriptor}, i64 0, metadata !39), !dbg !51
+  %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>*, !dbg !52
+  call void @llvm.dbg.declare(metadata !{<{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>* %block}, metadata !53), !dbg !54
+  %block.capture.addr = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void ()* }>* %block, i32 0, i32 5, !dbg !55
+  %0 = load void ()** %block.capture.addr, align 8, !dbg !55
+  %block.literal = bitcast void ()* %0 to %struct.__block_literal_generic*, !dbg !55
+  %1 = getelementptr inbounds %struct.__block_literal_generic* %block.literal, i32 0, i32 3, !dbg !55
+  %2 = bitcast %struct.__block_literal_generic* %block.literal to i8*, !dbg !55
+  %3 = load i8** %1, !dbg !55
+  %4 = bitcast i8* %3 to void (i8*)*, !dbg !55
+  invoke void %4(i8* %2)
+          to label %invoke.cont unwind label %lpad, !dbg !55
+
+invoke.cont:                                      ; preds = %entry
+  br label %eh.cont, !dbg !58
+
+eh.cont:                                          ; preds = %catch, %invoke.cont
+  ret void, !dbg !61
+
+lpad:                                             ; preds = %entry
+  %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null, !dbg !55
+  %6 = extractvalue { i8*, i32 } %5, 0, !dbg !55
+  store i8* %6, i8** %exn.slot, !dbg !55
+  %7 = extractvalue { i8*, i32 } %5, 1, !dbg !55
+  store i32 %7, i32* %ehselector.slot, !dbg !55
+  br label %catch, !dbg !55
+
+catch:                                            ; preds = %lpad
+  %exn = load i8** %exn.slot, !dbg !62
+  %exn.adjusted = call i8* @objc_begin_catch(i8* %exn) nounwind, !dbg !62
+  call void @objc_end_catch(), !dbg !58
+  br label %eh.cont, !dbg !58
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i8* @objc_begin_catch(i8*)
+
+declare void @objc_end_catch()
+
+declare i32 @__objc_personality_v0(...)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!35, !36, !37, !38}
+
+!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 151227)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !28, metadata !31, metadata !34}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 786454, null, metadata !"dispatch_block_t", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_generic", metadata !6, i32 5, i64 256, i64 0, i32 0, i32 8, null, metadata !12, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!12 = metadata !{metadata !13, metadata !15, metadata !17, metadata !18, metadata !19}
+!13 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
+!14 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
+!16 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
+!18 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 0, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
+!19 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 5, i64 64, i64 64, i64 192, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 786451, metadata !6, metadata !"__block_descriptor", metadata !6, i32 5, i64 128, i64 0, i32 0, i32 8, null, metadata !22, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!22 = metadata !{metadata !23, metadata !25}
+!23 = metadata !{i32 786445, metadata !6, metadata !"reserved", metadata !6, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_member ]
+!24 = metadata !{i32 786468, null, metadata !"long unsigned int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 786445, metadata !6, metadata !"Size", metadata !6, i32 0, i64 64, i64 64, i64 64, i32 0, metadata !24} ; [ DW_TAG_member ]
+!26 = metadata !{metadata !27}
+!27 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!28 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__foo_block_invoke_0", metadata !"__foo_block_invoke_0", metadata !"", metadata !6, i32 7, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @__foo_block_invoke_0, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!29 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!30 = metadata !{null, metadata !14}
+!31 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__copy_helper_block_", metadata !"__copy_helper_block_", metadata !"", metadata !6, i32 10, metadata !32, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!32 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!33 = metadata !{null, metadata !14, metadata !14}
+!34 = metadata !{i32 786478, i32 0, metadata !6, metadata !"__destroy_helper_block_", metadata !"__destroy_helper_block_", metadata !"", metadata !6, i32 10, metadata !29, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !26} ; [ DW_TAG_subprogram ]
+!35 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!36 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!37 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!38 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
+!39 = metadata !{i32 786689, metadata !28, metadata !".block_descriptor", metadata !6, i32 16777223, metadata !40, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!40 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 0, i64 0, i32 0, metadata !41} ; [ DW_TAG_pointer_type ]
+!41 = metadata !{i32 786451, metadata !6, metadata !"__block_literal_1", metadata !6, i32 7, i64 320, i64 64, i32 0, i32 0, null, metadata !42, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!42 = metadata !{metadata !43, metadata !44, metadata !45, metadata !46, metadata !47, metadata !50}
+!43 = metadata !{i32 786445, metadata !6, metadata !"__isa", metadata !6, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_member ]
+!44 = metadata !{i32 786445, metadata !6, metadata !"__flags", metadata !6, i32 7, i64 32, i64 32, i64 64, i32 0, metadata !16} ; [ DW_TAG_member ]
+!45 = metadata !{i32 786445, metadata !6, metadata !"__reserved", metadata !6, i32 7, i64 32, i64 32, i64 96, i32 0, metadata !16} ; [ DW_TAG_member ]
+!46 = metadata !{i32 786445, metadata !6, metadata !"__FuncPtr", metadata !6, i32 7, i64 64, i64 64, i64 128, i32 0, metadata !14} ; [ DW_TAG_member ]
+!47 = metadata !{i32 786445, metadata !6, metadata !"__descriptor", metadata !6, i32 7, i64 64, i64 64, i64 192, i32 0, metadata !48} ; [ DW_TAG_member ]
+!48 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !49} ; [ DW_TAG_pointer_type ]
+!49 = metadata !{i32 786451, null, metadata !"__block_descriptor_withcopydispose", metadata !6, i32 7, i32 0, i32 0, i32 0, i32 4, null, null, i32 0} ; [ DW_TAG_structure_type ]
+!50 = metadata !{i32 786445, metadata !6, metadata !"block", metadata !6, i32 7, i64 64, i64 64, i64 256, i32 0, metadata !9} ; [ DW_TAG_member ]
+!51 = metadata !{i32 7, i32 18, metadata !28, null}
+!52 = metadata !{i32 7, i32 19, metadata !28, null}
+!53 = metadata !{i32 786688, metadata !28, metadata !"block", metadata !6, i32 5, metadata !9, i32 0, i32 0, i64 1, i64 32} ; [ DW_TAG_auto_variable ]
+!54 = metadata !{i32 5, i32 27, metadata !28, null}
+!55 = metadata !{i32 8, i32 22, metadata !56, null}
+!56 = metadata !{i32 786443, metadata !57, i32 7, i32 26, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!57 = metadata !{i32 786443, metadata !28, i32 7, i32 19, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!58 = metadata !{i32 10, i32 20, metadata !59, null}
+!59 = metadata !{i32 786443, metadata !60, i32 9, i32 35, metadata !6, i32 4} ; [ DW_TAG_lexical_block ]
+!60 = metadata !{i32 786443, metadata !57, i32 9, i32 35, metadata !6, i32 3} ; [ DW_TAG_lexical_block ]
+!61 = metadata !{i32 10, i32 21, metadata !28, null}
+!62 = metadata !{i32 9, i32 20, metadata !56, null}
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
new file mode 100644
index 000000000000..a22707189b08
--- /dev/null
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -0,0 +1,96 @@
+; RUN: llc -mtriple=x86_64-linux %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; test that we add DW_AT_inline even when we only have concrete out of line
+; instances.
+
+; first check that we have a TAG_subprogram at a given offset and it has
+; AT_inline.
+
+; CHECK: 0x00000134:   DW_TAG_subprogram [18]
+; CHECK-NEXT:     DW_AT_MIPS_linkage_name
+; CHECK-NEXT:     DW_AT_specification
+; CHECK-NEXT:     DW_AT_inline
+
+
+; and then that a TAG_subprogram refers to it with AT_abstract_origin.
+
+; CHECK: 0x00000184:   DW_TAG_subprogram [20]
+; CHECK-NEXT: DW_AT_abstract_origin [DW_FORM_ref4]    (cu + 0x0134 => {0x00000134})
+
+define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
+entry:
+  store i32 1, i32* null, align 4, !dbg !50
+  tail call void @_Z8moz_freePv(i8* null) nounwind, !dbg !54
+  ret i32 0
+}
+
+define void @_ZN17nsAutoRefCntD1Ev() {
+entry:
+  tail call void @_Z8moz_freePv(i8* null) nounwind, !dbg !57
+  ret void
+}
+
+declare void @_Z8moz_freePv(i8*)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"nsAutoRefCnt.cpp", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src", metadata !"clang version 3.1 ()", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !47} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !23, metadata !27, metadata !31}
+!5 = metadata !{i32 720942, i32 0, null, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !12, metadata !20} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"nsAutoRefCnt.ii", metadata !"/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 720915, null, metadata !"nsAutoRefCnt", metadata !6, i32 10, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!12 = metadata !{i32 720942, i32 0, metadata !13, metadata !"Release", metadata !"Release", metadata !"_ZN17nsAutoRefCnt7ReleaseEv", metadata !6, i32 11, metadata !7, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 720898, null, metadata !"nsAutoRefCnt", metadata !6, i32 10, i64 8, i64 8, i32 0, i32 0, null, metadata !14, i32 0, null, null} ; [ DW_TAG_class_type ]
+!14 = metadata !{metadata !12, metadata !15}
+!15 = metadata !{i32 720942, i32 0, metadata !13, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"", metadata !6, i32 12, metadata !16, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{null, metadata !10}
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!20 = metadata !{metadata !21}
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 721153, metadata !5, metadata !"this", metadata !6, i32 16777230, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD1Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !24} ; [ DW_TAG_subprogram ]
+!24 = metadata !{metadata !25}
+!25 = metadata !{metadata !26}
+!26 = metadata !{i32 721153, metadata !23, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!27 = metadata !{i32 720942, i32 0, null, metadata !"~nsAutoRefCnt", metadata !"~nsAutoRefCnt", metadata !"_ZN17nsAutoRefCntD2Ev", metadata !6, i32 18, metadata !16, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32* null, null, metadata !15, metadata !28} ; [ DW_TAG_subprogram ]
+!28 = metadata !{metadata !29}
+!29 = metadata !{metadata !30}
+!30 = metadata !{i32 721153, metadata !27, metadata !"this", metadata !6, i32 16777234, metadata !10, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!31 = metadata !{i32 720942, i32 0, null, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null, metadata !36, metadata !43} ; [ DW_TAG_subprogram ]
+!32 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !33, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!33 = metadata !{metadata !9, metadata !34, metadata !9}
+!34 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !35} ; [ DW_TAG_pointer_type ]
+!35 = metadata !{i32 720915, null, metadata !"nsAutoRefCnt", metadata !6, i32 2, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!36 = metadata !{i32 720942, i32 0, metadata !37, metadata !"operator=", metadata !"operator=", metadata !"_ZN12nsAutoRefCntaSEi", metadata !6, i32 4, metadata !32, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 720898, null, metadata !"nsAutoRefCnt", metadata !6, i32 2, i64 32, i64 32, i32 0, i32 0, null, metadata !38, i32 0, null, null} ; [ DW_TAG_class_type ]
+!38 = metadata !{metadata !39, metadata !40, metadata !36}
+!39 = metadata !{i32 720909, metadata !37, metadata !"mValue", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ]
+!40 = metadata !{i32 720942, i32 0, metadata !37, metadata !"nsAutoRefCnt", metadata !"nsAutoRefCnt", metadata !"", metadata !6, i32 3, metadata !41, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 true, null, null, i32 0, metadata !18} ; [ DW_TAG_subprogram ]
+!41 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !42, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!42 = metadata !{null, metadata !34}
+!43 = metadata !{metadata !44}
+!44 = metadata !{metadata !45, metadata !46}
+!45 = metadata !{i32 721153, metadata !31, metadata !"this", metadata !6, i32 16777220, metadata !34, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!46 = metadata !{i32 721153, metadata !31, metadata !"aValue", metadata !6, i32 33554436, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!47 = metadata !{metadata !48}
+!48 = metadata !{metadata !49}
+!49 = metadata !{i32 720948, i32 0, null, metadata !"mRefCnt", metadata !"mRefCnt", metadata !"", metadata !6, i32 9, metadata !37, i32 0, i32 1, i32* null} ; [ DW_TAG_variable ]
+!50 = metadata !{i32 5, i32 5, metadata !51, metadata !52}
+!51 = metadata !{i32 720907, metadata !31, i32 4, i32 29, metadata !6, i32 2} ; [ DW_TAG_lexical_block ]
+!52 = metadata !{i32 15, i32 0, metadata !53, null}
+!53 = metadata !{i32 720907, metadata !5, i32 14, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!54 = metadata !{i32 19, i32 3, metadata !55, metadata !56}
+!55 = metadata !{i32 720907, metadata !27, i32 18, i32 41, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
+!56 = metadata !{i32 18, i32 41, metadata !23, metadata !52}
+!57 = metadata !{i32 19, i32 3, metadata !55, metadata !58}
+!58 = metadata !{i32 18, i32 41, metadata !23, null}
diff --git a/test/DebugInfo/X86/dg.exp b/test/DebugInfo/X86/dg.exp
deleted file mode 100644
index 7b7bd4e73807..000000000000
--- a/test/DebugInfo/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
new file mode 100644
index 000000000000..6935c47d0cf6
--- /dev/null
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -0,0 +1,49 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Check that the line table starts at 7, not 4, but that the first
+; statement isn't until line 8.
+
+; CHECK-NOT: 0x0000000000000000      7      0      1   0  is_stmt
+; CHECK: 0x0000000000000000      7      0      1   0
+; CHECK: 0x0000000000000004      8     18      1   0  is_stmt prologue_end
+
+define i32 @callee(i32 %x) nounwind uwtable ssp {
+entry:
+  %x.addr = alloca i32, align 4
+  %y = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  call void @llvm.dbg.declare(metadata !{i32* %x.addr}, metadata !12), !dbg !13
+  call void @llvm.dbg.declare(metadata !{i32* %y}, metadata !14), !dbg !16
+  %0 = load i32* %x.addr, align 4, !dbg !17
+  %1 = load i32* %x.addr, align 4, !dbg !17
+  %mul = mul nsw i32 %0, %1, !dbg !17
+  store i32 %mul, i32* %y, align 4, !dbg !17
+  %2 = load i32* %y, align 4, !dbg !18
+  %sub = sub nsw i32 %2, 2, !dbg !18
+  ret i32 %sub, !dbg !18
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"ending-run.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153921) (llvm/trunk 153916)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"callee", metadata !"callee", metadata !"", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 (i32)* @callee, null, null, metadata !10, i32 7} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"ending-run.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786689, metadata !5, metadata !"x", metadata !6, i32 16777221, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 5, i32 5, metadata !5, null}
+!14 = metadata !{i32 786688, metadata !15, metadata !"y", metadata !6, i32 8, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 786443, metadata !5, i32 7, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 8, i32 9, metadata !15, null}
+!17 = metadata !{i32 8, i32 18, metadata !15, null}
+!18 = metadata !{i32 9, i32 5, metadata !15, null}
diff --git a/test/DebugInfo/X86/lit.local.cfg b/test/DebugInfo/X86/lit.local.cfg
new file mode 100644
index 000000000000..0d694da8df5a
--- /dev/null
+++ b/test/DebugInfo/X86/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
new file mode 100644
index 000000000000..f9d9b9171493
--- /dev/null
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Check that we use DW_AT_low_pc
+
+; CHECK: DW_TAG_compile_unit [1]
+; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+; CHECK: DW_TAG_subprogram [2]
+
+define i32 @_Z1qv() nounwind uwtable readnone ssp {
+entry:
+  ret i32 undef, !dbg !13
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 153454) (llvm/trunk 153471)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !12}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"q", metadata !"q", metadata !"_Z1qv", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @_Z1qv, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.cpp", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 786478, i32 0, metadata !6, metadata !"t", metadata !"t", metadata !"", metadata !6, i32 2, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, null, null, null, metadata !10} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 7, i32 1, metadata !14, null}
+!14 = metadata !{i32 786443, metadata !5, i32 5, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
new file mode 100644
index 000000000000..1a815f936c17
--- /dev/null
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: 0x00000027:   DW_TAG_structure_type
+; CHECK: 0x0000002c:     DW_AT_declaration
+; CHECK: 0x0000002d:     DW_AT_APPLE_runtime_class
+
+%0 = type opaque
+
+@a = common global %0* null, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10, !11, !12}
+
+!0 = metadata !{i32 786449, i32 0, i32 16, metadata !"foo.m", metadata !"/Users/echristo", metadata !"clang version 3.1 (trunk 152054 trunk 152094)", i1 true, i1 false, metadata !"", i32 2, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 3, metadata !7, i32 0, i32 1, %0** @a} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !"foo.m", metadata !"/Users/echristo", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 786451, null, metadata !"FooBarBaz", metadata !6, i32 1, i32 0, i32 0, i32 0, i32 4, null, null, i32 16} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 1, metadata !"Objective-C Version", i32 2}
+!10 = metadata !{i32 1, metadata !"Objective-C Image Info Version", i32 0}
+!11 = metadata !{i32 1, metadata !"Objective-C Image Info Section", metadata !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
+!12 = metadata !{i32 4, metadata !"Objective-C Garbage Collection", i32 0}
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
new file mode 100644
index 000000000000..f11fbe4cc5f5
--- /dev/null
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.7 %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; CHECK: ptr
+; CHECK-NOT: AT_bit_size
+
+%struct.crass = type { i8* }
+
+@crass = common global %struct.crass zeroinitializer, align 8
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"foo.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 147882)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720948, i32 0, null, metadata !"crass", metadata !"crass", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %struct.crass* @crass} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 720937, metadata !"foo.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720915, null, metadata !"crass", metadata !6, i32 1, i64 64, i64 64, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 720909, metadata !7, metadata !"ptr", metadata !6, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 720934, null, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !11} ; [ DW_TAG_const_type ]
+!11 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
new file mode 100644
index 000000000000..5a001eea75a1
--- /dev/null
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -0,0 +1,65 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; test that the DW_AT_specification is a back edge in the file.
+
+; CHECK: 0x0000005c:     DW_TAG_subprogram [5]
+; CHECK: 0x0000007c:     DW_AT_specification [DW_FORM_ref4]      (cu + 0x005c => {0x0000005c})
+
+%struct.foo = type { i8 }
+
+define void @_Z3zedP3foo(%struct.foo* %x) uwtable {
+entry:
+  %x.addr = alloca %struct.foo*, align 8
+  store %struct.foo* %x, %struct.foo** %x.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.foo** %x.addr}, metadata !23), !dbg !24
+  %0 = load %struct.foo** %x.addr, align 8, !dbg !25
+  call void @_ZN3foo3barEv(%struct.foo* %0), !dbg !25
+  ret void, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr void @_ZN3foo3barEv(%struct.foo* %this) nounwind uwtable align 2 {
+entry:
+  %this.addr = alloca %struct.foo*, align 8
+  store %struct.foo* %this, %struct.foo** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata !{%struct.foo** %this.addr}, metadata !28), !dbg !29
+  %this1 = load %struct.foo** %this.addr
+  ret void, !dbg !30
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 4, metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build", metadata !"clang version 3.0 ()", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !20}
+!5 = metadata !{i32 720942, i32 0, metadata !6, metadata !"zed", metadata !"zed", metadata !"_Z3zedP3foo", metadata !6, i32 4, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_Z3zedP3foo, null, null, metadata !18} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 720937, metadata !"/home/espindola/llvm/test.cc", metadata !"/home/espindola/tmpfs/build", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null, metadata !9}
+!9 = metadata !{i32 720911, null, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 720898, null, metadata !"foo", metadata !6, i32 1, i64 8, i64 8, i32 0, i32 0, null, metadata !11, i32 0, null, null} ; [ DW_TAG_class_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 720942, i32 0, metadata !10, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !13, i1 false, i1 false, i32 0, i32 0, null, i32 256, i1 false, null, null, i32 0, metadata !16} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{null, metadata !15}
+!15 = metadata !{i32 720911, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!20 = metadata !{i32 720942, i32 0, null, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEv", metadata !6, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (%struct.foo*)* @_ZN3foo3barEv, null, metadata !12, metadata !21} ; [ DW_TAG_subprogram ]
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 720932}                      ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 721153, metadata !5, metadata !"x", metadata !6, i32 16777220, metadata !9, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 4, i32 15, metadata !5, null}
+!25 = metadata !{i32 4, i32 20, metadata !26, null}
+!26 = metadata !{i32 720907, metadata !5, i32 4, i32 18, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 4, i32 30, metadata !26, null}
+!28 = metadata !{i32 721153, metadata !20, metadata !"this", metadata !6, i32 16777218, metadata !15, i32 64, i32 0} ; [ DW_TAG_arg_variable ]
+!29 = metadata !{i32 2, i32 8, metadata !20, null}
+!30 = metadata !{i32 2, i32 15, metadata !31, null}
+!31 = metadata !{i32 720907, metadata !20, i32 2, i32 14, metadata !6, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
new file mode 100644
index 000000000000..2cd100156aad
--- /dev/null
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LINUX
+; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s --check-prefix=DARWIN
+
+@yyyy = common global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 720913, i32 0, i32 12, metadata !"z.c", metadata !"/home/nicholas", metadata !"clang version 3.1 (trunk 143009)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 720948, i32 0, null, metadata !"yyyy", metadata !"yyyy", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @yyyy} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 720937, metadata !"z.c", metadata !"/home/nicholas", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+
+; Verify that we refer to 'yyyy' with a relocation.
+; LINUX:      .long   .Lstring3               # DW_AT_name
+; LINUX-NEXT: .long   39                      # DW_AT_type
+; LINUX-NEXT: .byte   1                       # DW_AT_external
+; LINUX-NEXT: .byte   1                       # DW_AT_decl_file
+; LINUX-NEXT: .byte   1                       # DW_AT_decl_line
+; LINUX-NEXT: .byte   9                       # DW_AT_location
+; LINUX-NEXT: .byte   3
+; LINUX-NEXT: .quad   yyyy
+
+; Verify that we refer to 'yyyy' without a relocation.
+; DARWIN: Lset5 = Lstring3-Lsection_str               ## DW_AT_name
+; DARWIN-NEXT:        .long   Lset5
+; DARWIN-NEXT:        .long   39                      ## DW_AT_type
+; DARWIN-NEXT:        .byte   1                       ## DW_AT_external
+; DARWIN-NEXT:        .byte   1                       ## DW_AT_decl_file
+; DARWIN-NEXT:        .byte   1                       ## DW_AT_decl_line
+; DARWIN-NEXT:        .byte   9                       ## DW_AT_location
+; DARWIN-NEXT:        .byte   3
+; DARWIN-NEXT:        .quad   _yyyy
+
+; Verify that "yyyy" ended up in the stringpool.
+; LINUX: .section .debug_str,"MS",@progbits,1
+; LINUX-NOT: .section
+; LINUX: yyyy
+; DARWIN: .section __DWARF,__debug_str,regular,debug
+; DARWIN-NOT: .section
+; DARWIN: yyyy
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
new file mode 100644
index 000000000000..9a047388207a
--- /dev/null
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t -filetype=obj
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Make sure that structures have a decl file and decl line attached.
+; CHECK: DW_TAG_structure_type [3]
+; CHECK: DW_AT_decl_file
+; CHECK: DW_AT_decl_line
+; CHECK: DW_TAG_member
+
+%struct.foo = type { i32 }
+
+@f = common global %struct.foo zeroinitializer, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", metadata !"clang version 3.1 (trunk 152837) (llvm/trunk 152845)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"f", metadata !"f", metadata !"", metadata !6, i32 5, metadata !7, i32 0, i32 1, %struct.foo* @f} ; [ DW_TAG_variable ]
+!6 = metadata !{i32 786473, metadata !"struct_bug.c", metadata !"/Users/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 32, i64 32, i32 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786445, metadata !7, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/bug_null_debuginfo.ll b/test/DebugInfo/bug_null_debuginfo.ll
new file mode 100644
index 000000000000..a7fdf70d71c7
--- /dev/null
+++ b/test/DebugInfo/bug_null_debuginfo.ll
@@ -0,0 +1,6 @@
+; RUN: llc
+
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{null, null, null}
diff --git a/test/DebugInfo/dg.exp b/test/DebugInfo/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/DebugInfo/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/DebugInfo/lit.local.cfg b/test/DebugInfo/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/DebugInfo/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/ExecutionEngine/2002-12-16-ArgTest.ll b/test/ExecutionEngine/2002-12-16-ArgTest.ll
index eba58ccca423..eb2fe8c04832 100644
--- a/test/ExecutionEngine/2002-12-16-ArgTest.ll
+++ b/test/ExecutionEngine/2002-12-16-ArgTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 @.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
 
diff --git a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
index 577226b5318b..3182193453ae 100644
--- a/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
+++ b/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define i32 @foo(i32 %X, i32 %Y, double %A) {
diff --git a/test/ExecutionEngine/2003-01-04-LoopTest.ll b/test/ExecutionEngine/2003-01-04-LoopTest.ll
index 61b0a1bd58ee..3e27e0607ba9 100644
--- a/test/ExecutionEngine/2003-01-04-LoopTest.ll
+++ b/test/ExecutionEngine/2003-01-04-LoopTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2003-01-04-PhiTest.ll b/test/ExecutionEngine/2003-01-04-PhiTest.ll
index 2bc70d749f50..48576e7c83e6 100644
--- a/test/ExecutionEngine/2003-01-04-PhiTest.ll
+++ b/test/ExecutionEngine/2003-01-04-PhiTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/2003-01-09-SARTest.ll b/test/ExecutionEngine/2003-01-09-SARTest.ll
index 560cd3eae9a2..ed58e1184377 100644
--- a/test/ExecutionEngine/2003-01-09-SARTest.ll
+++ b/test/ExecutionEngine/2003-01-09-SARTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; We were accidentally inverting the signedness of right shifts.  Whoops.
 
diff --git a/test/ExecutionEngine/2003-01-10-FUCOM.ll b/test/ExecutionEngine/2003-01-10-FUCOM.ll
index 8512f634323f..4960e5969005 100644
--- a/test/ExecutionEngine/2003-01-10-FUCOM.ll
+++ b/test/ExecutionEngine/2003-01-10-FUCOM.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
index df150373b727..80e19ba19320 100644
--- a/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
+++ b/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define i32 @bar(i8* %X) {
diff --git a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
index 26429a05090f..6f61aa68b67e 100644
--- a/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
+++ b/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
@@ -1,6 +1,6 @@
 ; This testcase should return with an exit code of 1.
 ;
-; RUN: not lli %s
+; RUN: not %lli %s
 ; XFAIL: arm
 
 @test = global i64 0		; <i64*> [#uses=1]
diff --git a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
index 566f3ae3693b..236be18d96ef 100644
--- a/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
+++ b/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s test
+; RUN: %lli %s test
 ; XFAIL: arm
 
 declare i32 @puts(i8*)
diff --git a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
index bcdb11468dca..45279adbe57b 100644
--- a/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
+++ b/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 target datalayout = "e-p:32:32"
 
diff --git a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
index 37dae861c983..4342aa440975 100644
--- a/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
+++ b/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/2003-06-05-PHIBug.ll b/test/ExecutionEngine/2003-06-05-PHIBug.ll
index f7bd8b77244f..03b66c43a1ce 100644
--- a/test/ExecutionEngine/2003-06-05-PHIBug.ll
+++ b/test/ExecutionEngine/2003-06-05-PHIBug.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; Testcase distilled from 256.bzip2.
 
diff --git a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
index 6c2f34095f60..22dd4ccb44cf 100644
--- a/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
+++ b/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 ; This testcase failed to work because two variable sized allocas confused the
diff --git a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
index 29cbaac4deb6..60dc3d6b7d43 100644
--- a/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
+++ b/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 ;
diff --git a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
index 6711d4db245a..04a5e1741bb0 100644
--- a/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
+++ b/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 ; This testcase exposes a bug in the local register allocator where it runs out
diff --git a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
index fe182116ec9e..6e48c60db262 100644
--- a/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
+++ b/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 @A = global i32 0		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/2005-12-02-TailCallBug.ll b/test/ExecutionEngine/2005-12-02-TailCallBug.ll
index 874ce39e662f..8523b5e3f5b8 100644
--- a/test/ExecutionEngine/2005-12-02-TailCallBug.ll
+++ b/test/ExecutionEngine/2005-12-02-TailCallBug.ll
@@ -1,5 +1,5 @@
 ; PR672
-; RUN: lli %s
+; RUN: %lli %s
 ; XFAIL: arm
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll b/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
index c0dc4cf61abb..418361163fcf 100644
--- a/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
+++ b/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter %s
+; RUN: %lli -force-interpreter %s
 ; PR1836
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
index 07cc659cd040..0ab02747ba1d 100644
--- a/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
+++ b/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter=true %s | grep 1
+; RUN: %lli -force-interpreter=true %s | grep 1
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
diff --git a/test/ExecutionEngine/2010-01-15-UndefValue.ll b/test/ExecutionEngine/2010-01-15-UndefValue.ll
index 6e7a392125e1..01cb21f4206d 100644
--- a/test/ExecutionEngine/2010-01-15-UndefValue.ll
+++ b/test/ExecutionEngine/2010-01-15-UndefValue.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter=true %s
+; RUN: %lli -force-interpreter=true %s
 
 define i32 @main() {
        %a = add i32 0, undef
diff --git a/test/ExecutionEngine/dg.exp b/test/ExecutionEngine/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/ExecutionEngine/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/ExecutionEngine/fpbitcast.ll b/test/ExecutionEngine/fpbitcast.ll
index 47cbb02db180..fa84be441010 100644
--- a/test/ExecutionEngine/fpbitcast.ll
+++ b/test/ExecutionEngine/fpbitcast.ll
@@ -1,4 +1,4 @@
-; RUN: lli -force-interpreter=true %s | grep 40091eb8
+; RUN: %lli -force-interpreter=true %s | grep 40091eb8
 ;
 define i32 @test(double %x) {
 entry:
diff --git a/test/ExecutionEngine/hello.ll b/test/ExecutionEngine/hello.ll
index 92c26a6c365c..f2c4a7f414d9 100644
--- a/test/ExecutionEngine/hello.ll
+++ b/test/ExecutionEngine/hello.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 @.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
diff --git a/test/ExecutionEngine/hello2.ll b/test/ExecutionEngine/hello2.ll
index 10557ab5336f..155ed41d7ca6 100644
--- a/test/ExecutionEngine/hello2.ll
+++ b/test/ExecutionEngine/hello2.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 @X = global i32 7		; <i32*> [#uses=0]
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/ExecutionEngine/simplesttest.ll b/test/ExecutionEngine/simplesttest.ll
index ad38485d6e02..85c171532752 100644
--- a/test/ExecutionEngine/simplesttest.ll
+++ b/test/ExecutionEngine/simplesttest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	ret i32 0
diff --git a/test/ExecutionEngine/simpletest.ll b/test/ExecutionEngine/simpletest.ll
index 797b359c29aa..83f9b8405902 100644
--- a/test/ExecutionEngine/simpletest.ll
+++ b/test/ExecutionEngine/simpletest.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define i32 @bar() {
diff --git a/test/ExecutionEngine/stubs.ll b/test/ExecutionEngine/stubs.ll
index 2039ab5a6a3c..b40e4be63d41 100644
--- a/test/ExecutionEngine/stubs.ll
+++ b/test/ExecutionEngine/stubs.ll
@@ -1,4 +1,4 @@
-; RUN: lli -disable-lazy-compilation=false %s
+; RUN: %lli -disable-lazy-compilation=false %s
 ; XFAIL: arm
 
 define i32 @main() nounwind {
diff --git a/test/ExecutionEngine/test-arith.ll b/test/ExecutionEngine/test-arith.ll
index 354ecd24bc83..79f989f7265b 100644
--- a/test/ExecutionEngine/test-arith.ll
+++ b/test/ExecutionEngine/test-arith.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%A = add i8 0, 12		; <i8> [#uses=1]
diff --git a/test/ExecutionEngine/test-branch.ll b/test/ExecutionEngine/test-branch.ll
index 7d4fd5605922..3ae55d069b3d 100644
--- a/test/ExecutionEngine/test-branch.ll
+++ b/test/ExecutionEngine/test-branch.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test unconditional branch
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-call-no-external-funcs.ll b/test/ExecutionEngine/test-call-no-external-funcs.ll
new file mode 100644
index 000000000000..b2dd5325f114
--- /dev/null
+++ b/test/ExecutionEngine/test-call-no-external-funcs.ll
@@ -0,0 +1,15 @@
+; RUN: %lli %s > /dev/null
+; XFAIL: arm
+
+define i32 @_Z14func_exit_codev() nounwind uwtable {
+entry:
+  ret i32 0
+}
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %call = call i32 @_Z14func_exit_codev()
+  ret i32 %call
+}
diff --git a/test/ExecutionEngine/test-call.ll b/test/ExecutionEngine/test-call.ll
index c4131a20f795..3fd39fe094f3 100644
--- a/test/ExecutionEngine/test-call.ll
+++ b/test/ExecutionEngine/test-call.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 declare void @exit(i32)
diff --git a/test/ExecutionEngine/test-cast.ll b/test/ExecutionEngine/test-cast.ll
index f41448cc60aa..667fa80a4897 100644
--- a/test/ExecutionEngine/test-cast.ll
+++ b/test/ExecutionEngine/test-cast.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @foo() {
 	ret i32 0
diff --git a/test/ExecutionEngine/test-common-symbols.ll b/test/ExecutionEngine/test-common-symbols.ll
new file mode 100644
index 000000000000..4dd926512ae6
--- /dev/null
+++ b/test/ExecutionEngine/test-common-symbols.ll
@@ -0,0 +1,89 @@
+; RUN: %lli -O0 -disable-lazy-compilation=false %s
+; XFAIL: arm
+
+; The intention of this test is to verify that symbols mapped to COMMON in ELF
+; work as expected.
+;
+; Compiled from this C code:
+;
+; int zero_int;
+; double zero_double;
+; int zero_arr[10];
+; 
+; int main()
+; {
+;     zero_arr[zero_int + 5] = 40;
+; 
+;     if (zero_double < 1.0)
+;         zero_arr[zero_int + 2] = 70;
+; 
+;     for (int i = 1; i < 10; ++i) {
+;         zero_arr[i] = zero_arr[i - 1] + zero_arr[i];
+;     }
+;     return zero_arr[9] == 110 ? 0 : -1;
+; }
+
+@zero_int = common global i32 0, align 4
+@zero_arr = common global [10 x i32] zeroinitializer, align 16
+@zero_double = common global double 0.000000e+00, align 8
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @zero_int, align 4
+  %add = add nsw i32 %0, 5
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom
+  store i32 40, i32* %arrayidx, align 4
+  %1 = load double* @zero_double, align 8
+  %cmp = fcmp olt double %1, 1.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %2 = load i32* @zero_int, align 4
+  %add1 = add nsw i32 %2, 2
+  %idxprom2 = sext i32 %add1 to i64
+  %arrayidx3 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
+  store i32 70, i32* %arrayidx3, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  store i32 1, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %if.end
+  %3 = load i32* %i, align 4
+  %cmp4 = icmp slt i32 %3, 10
+  br i1 %cmp4, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32* %i, align 4
+  %sub = sub nsw i32 %4, 1
+  %idxprom5 = sext i32 %sub to i64
+  %arrayidx6 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
+  %5 = load i32* %arrayidx6, align 4
+  %6 = load i32* %i, align 4
+  %idxprom7 = sext i32 %6 to i64
+  %arrayidx8 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
+  %7 = load i32* %arrayidx8, align 4
+  %add9 = add nsw i32 %5, %7
+  %8 = load i32* %i, align 4
+  %idxprom10 = sext i32 %8 to i64
+  %arrayidx11 = getelementptr inbounds [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
+  store i32 %add9, i32* %arrayidx11, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %9 = load i32* %i, align 4
+  %inc = add nsw i32 %9, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %cmp12 = icmp eq i32 %10, 110
+  %cond = select i1 %cmp12, i32 0, i32 -1
+  ret i32 %cond
+}
diff --git a/test/ExecutionEngine/test-constantexpr.ll b/test/ExecutionEngine/test-constantexpr.ll
index d6d90e3e1982..d01479a86cdc 100644
--- a/test/ExecutionEngine/test-constantexpr.ll
+++ b/test/ExecutionEngine/test-constantexpr.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; This tests to make sure that we can evaluate weird constant expressions
 
diff --git a/test/ExecutionEngine/test-fp-no-external-funcs.ll b/test/ExecutionEngine/test-fp-no-external-funcs.ll
new file mode 100644
index 000000000000..61b12c2abeb7
--- /dev/null
+++ b/test/ExecutionEngine/test-fp-no-external-funcs.ll
@@ -0,0 +1,21 @@
+; RUN: %lli  %s > /dev/null
+
+define double @test(double* %DP, double %Arg) {
+	%D = load double* %DP		; <double> [#uses=1]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
+	%Y = fdiv double %X, %X		; <double> [#uses=2]
+	%Q = fadd double %Y, %Arg		; <double> [#uses=1]
+	%R = bitcast double %Q to double		; <double> [#uses=1]
+	store double %Q, double* %DP
+	ret double %Y
+}
+
+define i32 @main() {
+	%X = alloca double		; <double*> [#uses=2]
+	store double 0.000000e+00, double* %X
+	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/test/ExecutionEngine/test-fp.ll b/test/ExecutionEngine/test-fp.ll
index f653660fb832..2bf0210d8b00 100644
--- a/test/ExecutionEngine/test-fp.ll
+++ b/test/ExecutionEngine/test-fp.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
 	%D = load double* %DP		; <double> [#uses=1]
diff --git a/test/ExecutionEngine/test-global-init-nonzero.ll b/test/ExecutionEngine/test-global-init-nonzero.ll
new file mode 100644
index 000000000000..ef2d37b89199
--- /dev/null
+++ b/test/ExecutionEngine/test-global-init-nonzero.ll
@@ -0,0 +1,35 @@
+; RUN: %lli  %s > /dev/null
+; XFAIL: arm
+
+@count = global i32 1, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 49
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/test-global.ll b/test/ExecutionEngine/test-global.ll
new file mode 100644
index 000000000000..2ea50dea99ae
--- /dev/null
+++ b/test/ExecutionEngine/test-global.ll
@@ -0,0 +1,35 @@
+; RUN: %lli %s > /dev/null
+; XFAIL: arm
+
+@count = global i32 0, align 4
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* @count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* @count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* @count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/test-loadstore.ll b/test/ExecutionEngine/test-loadstore.ll
index 7eb57cbf0102..75743146c6eb 100644
--- a/test/ExecutionEngine/test-loadstore.ll
+++ b/test/ExecutionEngine/test-loadstore.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 ; XFAIL: arm
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
diff --git a/test/ExecutionEngine/test-local.ll b/test/ExecutionEngine/test-local.ll
new file mode 100644
index 000000000000..240b174ac2fe
--- /dev/null
+++ b/test/ExecutionEngine/test-local.ll
@@ -0,0 +1,35 @@
+; RUN: %lli %s > /dev/null
+; XFAIL: arm
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  %count = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %count, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp slt i32 %0, 50
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32* %count, align 4
+  %inc = add nsw i32 %1, 1
+  store i32 %inc, i32* %count, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %2 = load i32* %i, align 4
+  %inc1 = add nsw i32 %2, 1
+  store i32 %inc1, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %3 = load i32* %count, align 4
+  %sub = sub nsw i32 %3, 50
+  ret i32 %sub
+}
diff --git a/test/ExecutionEngine/test-logical.ll b/test/ExecutionEngine/test-logical.ll
index 710763a30bd8..05b381bb53c2 100644
--- a/test/ExecutionEngine/test-logical.ll
+++ b/test/ExecutionEngine/test-logical.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%A = and i8 4, 8		; <i8> [#uses=2]
diff --git a/test/ExecutionEngine/test-loop.ll b/test/ExecutionEngine/test-loop.ll
index f0e6f7a6f9c1..e951a14ed2e9 100644
--- a/test/ExecutionEngine/test-loop.ll
+++ b/test/ExecutionEngine/test-loop.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 ; <label>:0
diff --git a/test/ExecutionEngine/test-phi.ll b/test/ExecutionEngine/test-phi.ll
index c5848a8b5cce..c5bdfd513edc 100644
--- a/test/ExecutionEngine/test-phi.ll
+++ b/test/ExecutionEngine/test-phi.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test phi node
 @Y = global i32 6		; <i32*> [#uses=1]
diff --git a/test/ExecutionEngine/test-ret.ll b/test/ExecutionEngine/test-ret.ll
index beec39960751..025f53e5cb9e 100644
--- a/test/ExecutionEngine/test-ret.ll
+++ b/test/ExecutionEngine/test-ret.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 ; test return instructions
 define void @test1() {
diff --git a/test/ExecutionEngine/test-return.ll b/test/ExecutionEngine/test-return.ll
new file mode 100644
index 000000000000..d464a4b72d80
--- /dev/null
+++ b/test/ExecutionEngine/test-return.ll
@@ -0,0 +1,8 @@
+; RUN: %lli %s > /dev/null
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0
+}
diff --git a/test/ExecutionEngine/test-setcond-fp.ll b/test/ExecutionEngine/test-setcond-fp.ll
index d1d6d05b3352..68276e617a4a 100644
--- a/test/ExecutionEngine/test-setcond-fp.ll
+++ b/test/ExecutionEngine/test-setcond-fp.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 
 define i32 @main() {
diff --git a/test/ExecutionEngine/test-setcond-int.ll b/test/ExecutionEngine/test-setcond-int.ll
index f59d325a5b59..48dc02198ec0 100644
--- a/test/ExecutionEngine/test-setcond-int.ll
+++ b/test/ExecutionEngine/test-setcond-int.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%int1 = add i32 0, 0		; <i32> [#uses=6]
diff --git a/test/ExecutionEngine/test-shift.ll b/test/ExecutionEngine/test-shift.ll
index d0fb90a42750..590e2620689f 100644
--- a/test/ExecutionEngine/test-shift.ll
+++ b/test/ExecutionEngine/test-shift.ll
@@ -1,4 +1,4 @@
-; RUN: lli %s > /dev/null
+; RUN: %lli %s > /dev/null
 
 define i32 @main() {
 	%shamt = add i8 0, 1		; <i8> [#uses=8]
diff --git a/test/Feature/const_pv.ll b/test/Feature/const_pv.ll
new file mode 100644
index 000000000000..6fd6abdccf08
--- /dev/null
+++ b/test/Feature/const_pv.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as %s -disable-output
+@G = constant <3 x i64> ptrtoint (<3 x i8*> <i8* null, i8* null, i8* null> to <3 x i64>)
+
+@G1 = global i8 zeroinitializer
+@g = constant <2 x i8*> getelementptr (<2 x i8*> <i8* @G1, i8* @G1>, <2 x i32> <i32 0, i32 0>)
+
+@t = constant <2 x i1> icmp ((<2 x i32> ptrtoint (<2 x i8*> zeroinitializer to <2 x i32>), <2 x i32> zeroinitializer )
+
diff --git a/test/Feature/dg.exp b/test/Feature/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Feature/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Feature/float.ll b/test/Feature/float.ll
index 6c6c5dd53970..b875afe98047 100644
--- a/test/Feature/float.ll
+++ b/test/Feature/float.ll
@@ -2,5 +2,6 @@
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 
+@H1     = global half 0x4010000000000000
 @F1     = global float 0x4010000000000000
 @D1     = global double 0x4010000000000000
diff --git a/test/Feature/global_pv.ll b/test/Feature/global_pv.ll
new file mode 100644
index 000000000000..d257ec077ab9
--- /dev/null
+++ b/test/Feature/global_pv.ll
@@ -0,0 +1,14 @@
+; RUN: opt -instcombine -S -o - %s | llvm-as
+; RUN: opt -instcombine -globalopt -S -o - %s | llvm-as
+@G1 = global i32 zeroinitializer
+@G2 = global i32 zeroinitializer
+@g = global <2 x i32*> zeroinitializer
+%0 = type { i32, void ()* }
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @test }]
+define internal void @test() {
+  %A = insertelement <2 x i32*> undef, i32* @G1, i32 0
+  %B = insertelement <2 x i32*> %A,  i32* @G2, i32 1
+  store <2 x i32*> %B, <2 x i32*>* @g
+  ret void
+}
+
diff --git a/test/Feature/intrinsics.ll b/test/Feature/intrinsics.ll
index 2dd6b53e7c99..c4e3db6174a6 100644
--- a/test/Feature/intrinsics.ll
+++ b/test/Feature/intrinsics.ll
@@ -6,7 +6,6 @@ declare i1 @llvm.isunordered.f32(float, float)
 
 declare i1 @llvm.isunordered.f64(double, double)
 
-declare void @llvm.prefetch(i8*, i32, i32)
 
 declare i8 @llvm.ctpop.i8(i8)
 
@@ -16,21 +15,21 @@ declare i32 @llvm.ctpop.i32(i32)
 
 declare i64 @llvm.ctpop.i64(i64)
 
-declare i8 @llvm.cttz.i8(i8)
+declare i8 @llvm.cttz.i8(i8, i1)
 
-declare i16 @llvm.cttz.i16(i16)
+declare i16 @llvm.cttz.i16(i16, i1)
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
-declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.cttz.i64(i64, i1)
 
-declare i8 @llvm.ctlz.i8(i8)
+declare i8 @llvm.ctlz.i8(i8, i1)
 
-declare i16 @llvm.ctlz.i16(i16)
+declare i16 @llvm.ctlz.i16(i16, i1)
 
-declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
 
-declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.ctlz.i64(i64, i1)
 
 declare float @llvm.sqrt.f32(float)
 
@@ -41,21 +40,20 @@ declare double @llvm.sqrt.f64(double)
 define void @libm() {
         fcmp uno float 1.000000e+00, 2.000000e+00               ; <i1>:1 [#uses=0]
         fcmp uno double 3.000000e+00, 4.000000e+00              ; <i1>:2 [#uses=0]
-        call void @llvm.prefetch( i8* null, i32 1, i32 3 )
         call float @llvm.sqrt.f32( float 5.000000e+00 )         ; <float>:3 [#uses=0]
         call double @llvm.sqrt.f64( double 6.000000e+00 )               ; <double>:4 [#uses=0]
         call i8  @llvm.ctpop.i8( i8 10 )                ; <i32>:5 [#uses=0]
         call i16 @llvm.ctpop.i16( i16 11 )              ; <i32>:6 [#uses=0]
         call i32 @llvm.ctpop.i32( i32 12 )              ; <i32>:7 [#uses=0]
         call i64 @llvm.ctpop.i64( i64 13 )              ; <i32>:8 [#uses=0]
-        call i8  @llvm.ctlz.i8( i8 14 )         ; <i32>:9 [#uses=0]
-        call i16 @llvm.ctlz.i16( i16 15 )               ; <i32>:10 [#uses=0]
-        call i32 @llvm.ctlz.i32( i32 16 )               ; <i32>:11 [#uses=0]
-        call i64 @llvm.ctlz.i64( i64 17 )               ; <i32>:12 [#uses=0]
-        call i8  @llvm.cttz.i8( i8 18 )         ; <i32>:13 [#uses=0]
-        call i16 @llvm.cttz.i16( i16 19 )               ; <i32>:14 [#uses=0]
-        call i32 @llvm.cttz.i32( i32 20 )               ; <i32>:15 [#uses=0]
-        call i64 @llvm.cttz.i64( i64 21 )               ; <i32>:16 [#uses=0]
+        call i8  @llvm.ctlz.i8( i8 14, i1 true )         ; <i32>:9 [#uses=0]
+        call i16 @llvm.ctlz.i16( i16 15, i1 true )               ; <i32>:10 [#uses=0]
+        call i32 @llvm.ctlz.i32( i32 16, i1 true )               ; <i32>:11 [#uses=0]
+        call i64 @llvm.ctlz.i64( i64 17, i1 true )               ; <i32>:12 [#uses=0]
+        call i8  @llvm.cttz.i8( i8 18, i1 true )         ; <i32>:13 [#uses=0]
+        call i16 @llvm.cttz.i16( i16 19, i1 true )               ; <i32>:14 [#uses=0]
+        call i32 @llvm.cttz.i32( i32 20, i1 true )               ; <i32>:15 [#uses=0]
+        call i64 @llvm.cttz.i64( i64 21, i1 true )               ; <i32>:16 [#uses=0]
         ret void
 }
 
diff --git a/test/Feature/lit.local.cfg b/test/Feature/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Feature/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Feature/llvm2cpp.exp b/test/Feature/llvm2cpp.exp
deleted file mode 100644
index de0126ce3239..000000000000
--- a/test/Feature/llvm2cpp.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm2cpp.exp
-
-llvm2cpp-test [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
new file mode 100644
index 000000000000..35c5c4a0bba4
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -asan -S | llc -o /dev/null
+; The bug manifests as a reg alloc failure:
+; error: ran out of registers during register allocation
+; ModuleID = 'z.o'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-linux-gnu"
+
+%struct.DSPContext = type { void (i16*, i8*, i32)*, void (i16*, i8*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, i32 (i16*)*, void (i8*, i8*, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, void (i16*)*, void (i16*)*, i32 (i8*, i32)*, i32 (i8*, i32)*, [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], [6 x i32 (i8*, i8*, i8*, i32, i32)*], i32 (i8*, i16*, i32)*, [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [2 x void (i8*, i8*, i8*, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [8 x void (i8*, i8*, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [10 x void (i8*, i32, i32, i32, i32)*], [10 x void (i8*, i8*, i32, i32, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, [2 x [4 x i32 (i8*, i8*, i8*, i32, i32)*]], void (i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, i32 (i8*, i8*, i32, i32)*, void (i8*, i8*, i32, i32*, i32*, i32*)*, void (i8*, i8*, i8*, i32, i32)*, void (i32*, i32*, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void ([4 x [4 x i16]]*, i8*, [40 x i8]*, [40 x [2 x i16]]*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32*)*, void (i8*, i32, i32*)*, void (i8*, i8*, i32, i16*, i16*)*, void (float*, float*, i32)*, void ([256 x float]*, [2 x float]*, i32, i32, i32)*, void (i32*, i32, i32, double*)*, void (float*, float*, i32)*, void (float*, float*, float*, i32)*, void (float*, float*, float*, float*, i32)*, void (float*, float*, float*, float*, float, i32)*, void (float*, i32*, float, i32)*, void (float*, float*, float, float, i32)*, void (float*, float*, float, i32)*, [2 x void (float*, float*, float**, float, i32)*], [2 x void (float*, float**, float, i32)*], float (float*, float*, i32)*, void (float*, float*, i32)*, void (i16*, float*, i32)*, void (i16*, float**, i32, i32)*, void (i16*)*, void (i16*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, [64 x i8], i32, i32 (i16*, i16*, i16*, i32)*, void (i16*, i16*, i32)*, void (i8*, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void ([4 x i16]*)*, void (i8*, i32*, i16*, i32, i8*)*, void (i8*, i32*, i16*, i32, i8*)*, void (i8**, i32*, i16*, i32, i8*)*, void (i8*, i32*, i16*, i32, i8*)*, void (i16*, i16*, i16*, i16*, i16*, i16*, i32)*, void (i16*, i32)*, void (i8*, i32, i8**, i32, i32, i32, i32, i32, %struct.slice_buffer_s*, i32, i8*)*, void (i8*, i32, i32)*, [4 x void (i8*, i32, i8*, i32, i32, i32)*], void (i32*, i32*, i32, i32, i32, i32, i32, i32*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, void (i8*, i32)*, void (i8*, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, [16 x void (i8*, i8*, i32, i32)*], [16 x void (i8*, i8*, i32, i32)*], [12 x void (i8*, i8*, i32)*], void (i8*, i8*, i32, i32*, i32*, i32)*, void (i16*, i16*, i32)*, void (i16*, i16*, i32)*, i32 (i16*, i16*, i32, i32)*, [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*] }
+%struct.slice_buffer_s = type opaque
+%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8*, i32*, i32, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, float, i64, i32, i64, i64, float, float, %struct.AVHWAccel*, i32, i8*, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*, i32, i32)*, i8*, i32*, i32)*, i32, i32, i32, i32, i32, i32, i8*, float, float, float, float, i32, i32, i32, float, float, float, i32, i32, i32, i32, [4 x i32], i8*, i32, i32, i32, i32 }
+%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+%struct.AVOption = type opaque
+%struct.AVRational = type { i32, i32 }
+%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*], i64, i8* }
+%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, %struct.AVPacket*)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32*, i8*, i32*, i32*, i64* }
+%struct.AVPacket = type { i64, i64, i8*, i32, i32, i32, i32, void (%struct.AVPacket*)*, i8*, i64, i64 }
+%struct.RcOverride = type { i32, i32, i32, float }
+%struct.AVPaletteControl = type { i32, [256 x i32] }
+%struct.AVHWAccel = type { i8*, i32, i32, i32, i32, %struct.AVHWAccel*, i32 (%struct.AVCodecContext*, i8*, i32)*, i32 (%struct.AVCodecContext*, i8*, i32)*, i32 (%struct.AVCodecContext*)*, i32 }
+
+@firtable = internal unnamed_addr constant [9 x i8*] [i8* @ff_mlp_firorder_0, i8* @ff_mlp_firorder_1, i8* @ff_mlp_firorder_2, i8* @ff_mlp_firorder_3, i8* @ff_mlp_firorder_4, i8* @ff_mlp_firorder_5, i8* @ff_mlp_firorder_6, i8* @ff_mlp_firorder_7, i8* @ff_mlp_firorder_8], align 4
+@iirtable = internal unnamed_addr constant [5 x i8*] [i8* @ff_mlp_iirorder_0, i8* @ff_mlp_iirorder_1, i8* @ff_mlp_iirorder_2, i8* @ff_mlp_iirorder_3, i8* @ff_mlp_iirorder_4], align 4
+@ff_mlp_iirorder_0 = external global i8
+@ff_mlp_iirorder_1 = external global i8
+@ff_mlp_iirorder_2 = external global i8
+@ff_mlp_iirorder_3 = external global i8
+@ff_mlp_iirorder_4 = external global i8
+@ff_mlp_firorder_0 = external global i8
+@ff_mlp_firorder_1 = external global i8
+@ff_mlp_firorder_2 = external global i8
+@ff_mlp_firorder_3 = external global i8
+@ff_mlp_firorder_4 = external global i8
+@ff_mlp_firorder_5 = external global i8
+@ff_mlp_firorder_6 = external global i8
+@ff_mlp_firorder_7 = external global i8
+@ff_mlp_firorder_8 = external global i8
+
+define void @ff_mlp_init_x86(%struct.DSPContext* nocapture %c, %struct.AVCodecContext* nocapture %avctx) nounwind address_safety {
+entry:
+  %mlp_filter_channel = getelementptr inbounds %struct.DSPContext* %c, i32 0, i32 131
+  store void (i32*, i32*, i32, i32, i32, i32, i32, i32*)* @mlp_filter_channel_x86, void (i32*, i32*, i32, i32, i32, i32, i32, i32*)** %mlp_filter_channel, align 4, !tbaa !0
+  ret void
+}
+
+define internal void @mlp_filter_channel_x86(i32* %state, i32* %coeff, i32 %firorder, i32 %iirorder, i32 %filter_shift, i32 %mask, i32 %blocksize, i32* %sample_buffer) nounwind address_safety {
+entry:
+  %filter_shift.addr = alloca i32, align 4
+  %mask.addr = alloca i32, align 4
+  %blocksize.addr = alloca i32, align 4
+  %firjump = alloca i8*, align 4
+  %iirjump = alloca i8*, align 4
+  store i32 %filter_shift, i32* %filter_shift.addr, align 4, !tbaa !3
+  store i32 %mask, i32* %mask.addr, align 4, !tbaa !3
+  %arrayidx = getelementptr inbounds [9 x i8*]* @firtable, i32 0, i32 %firorder
+  %0 = load i8** %arrayidx, align 4, !tbaa !0
+  store i8* %0, i8** %firjump, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds [5 x i8*]* @iirtable, i32 0, i32 %iirorder
+  %1 = load i8** %arrayidx1, align 4, !tbaa !0
+  store i8* %1, i8** %iirjump, align 4, !tbaa !0
+  %sub = sub nsw i32 0, %blocksize
+  store i32 %sub, i32* %blocksize.addr, align 4, !tbaa !3
+  %2 = call { i32*, i32*, i32* } asm sideeffect "1:                           \0A\09xor           %esi, %esi\0A\09xor           %ecx, %ecx\0A\09jmp  *$5                     \0A\09ff_mlp_firorder_8:            \0A\09mov   0x1c+0($0), %eax\0A\09imull 0x1c+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_7:            \0A\09mov   0x18+0($0), %eax\0A\09imull 0x18+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_6:            \0A\09mov   0x14+0($0), %eax\0A\09imull 0x14+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_5:            \0A\09mov   0x10+0($0), %eax\0A\09imull 0x10+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_4:            \0A\09mov   0x0c+0($0), %eax\0A\09imull 0x0c+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_3:            \0A\09mov   0x08+0($0), %eax\0A\09imull 0x08+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_2:            \0A\09mov   0x04+0($0), %eax\0A\09imull 0x04+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_1:            \0A\09mov   0x00+0($0), %eax\0A\09imull 0x00+0($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_firorder_0:\0A\09jmp  *$6                     \0A\09ff_mlp_iirorder_4:            \0A\09mov   0x0c+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x0c+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_3:            \0A\09mov   0x08+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x08+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_2:            \0A\09mov   0x04+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x04+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_1:            \0A\09mov   0x00+4*(8 + (40 * 4))($0), %eax\0A\09imull 0x00+4* 8($1)       \0A\09add                %eax , %esi\0A\09adc                %edx , %ecx\0A\09ff_mlp_iirorder_0:\0A\09mov           %ecx, %edx\0A\09mov           %esi, %eax\0A\09movzbl        $7   , %ecx\0A\09shrd    %cl, %edx, %eax\0A\09mov  %eax  ,%edx      \0A\09add  ($2)      ,%eax     \0A\09and   $4       ,%eax     \0A\09sub   $$4       ,  $0         \0A\09mov  %eax, ($0)        \0A\09mov  %eax, ($2)        \0A\09add $$4* 8    ,  $2         \0A\09sub  %edx   ,%eax     \0A\09mov  %eax,4*(8 + (40 * 4))($0)  \0A\09incl              $3         \0A\09js 1b                        \0A\09", "=r,=r,=r,=*m,*m,*m,*m,*m,0,1,2,*m,~{eax},~{edx},~{esi},~{ecx},~{dirflag},~{fpsr},~{flags}"(i32* %blocksize.addr, i32* %mask.addr, i8** %firjump, i8** %iirjump, i32* %filter_shift.addr, i32* %state, i32* %coeff, i32* %sample_buffer, i32* %blocksize.addr) nounwind, !srcloc !4
+  ret void
+}
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
+!4 = metadata !{i32 156132, i32 156164, i32 156205, i32 156238, i32 156282, i32 156332, i32 156370, i32 156408, i32 156447, i32 156486, i32 156536, i32 156574, i32 156612, i32 156651, i32 156690, i32 156740, i32 156778, i32 156816, i32 156855, i32 156894, i32 156944, i32 156982, i32 157020, i32 157059, i32 157098, i32 157148, i32 157186, i32 157224, i32 157263, i32 157302, i32 157352, i32 157390, i32 157428, i32 157467, i32 157506, i32 157556, i32 157594, i32 157632, i32 157671, i32 157710, i32 157760, i32 157798, i32 157836, i32 157875, i32 157914, i32 157952, i32 157996, i32 158046, i32 158099, i32 158140, i32 158179, i32 158218, i32 158268, i32 158321, i32 158362, i32 158401, i32 158440, i32 158490, i32 158543, i32 158584, i32 158623, i32 158662, i32 158712, i32 158765, i32 158806, i32 158845, i32 158884, i32 158922, i32 158963, i32 158996, i32 159029, i32 159062, i32 159109, i32 159154, i32 159199, i32 159243, i32 159286, i32 159329, i32 159375, i32 159422, i32 159478, i32 159522, i32 159566}
diff --git a/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
new file mode 100644
index 000000000000..b05ed3c77c16
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/X86/lit.local.cfg
@@ -0,0 +1,13 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
new file mode 100644
index 000000000000..c0fe15e9fcec
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -basicaa -gvn -asan -S | FileCheck %s
+; ASAN conflicts with load widening iff the widened load accesses data out of bounds
+; (while the original unwidened loads do not).
+; http://code.google.com/p/address-sanitizer/issues/detail?id=20#c1
+
+
+; 32-bit little endian target.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+%struct_of_7_bytes_4_aligned = type { i32, i8, i8, i8}
+
+@f = global %struct_of_7_bytes_4_aligned zeroinitializer, align 4
+
+; Accessing bytes 4 and 6, not ok to widen to i32 if address_safety is set.
+
+define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone address_safety {
+entry:
+  %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 3), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+  ret i32 %add
+; CHECK: @test_widening_bad
+; CHECK: __asan_report_load1
+; CHECK: __asan_report_load1
+; CHECK-NOT: __asan_report
+; We can not use check for "ret" here because __asan_report_load1 calls live after ret.
+; CHECK: end_test_widening_bad
+}
+
+define void @end_test_widening_bad() {
+  entry:
+  ret void
+}
+
+;; Accessing bytes 4 and 5. Ok to widen to i16.
+
+define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone address_safety {
+entry:
+  %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %conv = zext i8 %tmp to i32
+  %tmp1 = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 2), align 1
+  %conv2 = zext i8 %tmp1 to i32
+  %add = add nsw i32 %conv, %conv2
+  ret i32 %add
+; CHECK: @test_widening_ok
+; CHECK: __asan_report_load2
+; CHECK-NOT: __asan_report
+; CHECK: end_test_widening_ok
+}
+
+define void @end_test_widening_ok() {
+  entry:
+  ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll b/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll
new file mode 100644
index 000000000000..1687877849c4
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; no action should be taken for these globals
+@v1 = linkonce_odr constant i8 1
+; CHECK-NOT: __asan_register_globals
diff --git a/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll b/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll
new file mode 100644
index 000000000000..89644d4a943f
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; no action should be taken for thread locals
+@xxx = thread_local global i32 0, align 4
+; CHECK-NOT: __asan_register_globals
diff --git a/test/Instrumentation/AddressSanitizer/instrument-no-return.ll b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
new file mode 100644
index 000000000000..80f1b1c74cd1
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+; AddressSanitizer must insert __asan_handle_no_return
+; before every noreturn call.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @MyNoReturnFunc(i32) noreturn
+
+define i32 @_Z5ChildPv(i8* nocapture %arg) uwtable address_safety {
+entry:
+  call void @MyNoReturnFunc(i32 1) noreturn
+  unreachable
+}
+
+; CHECK:        call void @__asan_handle_no_return
+; CHECK-NEXT:   call void @MyNoReturnFunc
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
new file mode 100644
index 000000000000..ba8d65a4fa4b
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+@xxx = global i32 0, align 4
+
+; If a global is present, __asan_[un]register_globals should be called from
+; module ctor/dtor
+
+; CHECK: llvm.global_dtors
+; CHECK: llvm.global_ctors
+
+; CHECK: define internal void @asan.module_ctor
+; CHECK-NOT: ret
+; CHECK: call void @__asan_register_globals
+; CHECK: ret
+
+; CHECK: define internal void @asan.module_dtor
+; CHECK-NOT: ret
+; CHECK: call void @__asan_unregister_globals
+; CHECK: ret
diff --git a/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll b/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
new file mode 100644
index 000000000000..633bf9ae78c0
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
@@ -0,0 +1,25 @@
+; Test that AddressSanitizer instruments "(*a)++" only once.
+; RUN: opt < %s -asan -S -asan-opt=1 | FileCheck %s -check-prefix=OPT1
+; RUN: opt < %s -asan -S -asan-opt=0 | FileCheck %s -check-prefix=OPT0
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define void @IncrementMe(i32* %a) address_safety {
+entry:
+  %tmp1 = load i32* %a, align 4
+  %tmp2 = add i32 %tmp1,  1
+  store i32 %tmp2, i32* %a, align 4
+  ret void
+}
+
+; With optimizations enabled we should see only one call to __asan_report_*
+; OPT1: IncrementMe
+; OPT1: __asan_report_
+; OPT1-NOT: __asan_report_
+; OPT1: asan.module_ctor
+
+; Without optimizations we should see two calls to __asan_report_*
+; OPT0: IncrementMe
+; OPT0: __asan_report_
+; OPT0: __asan_report_
+; OPT0: asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/lit.local.cfg b/test/Instrumentation/AddressSanitizer/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/AddressSanitizer/test64.ll b/test/Instrumentation/AddressSanitizer/test64.ll
new file mode 100644
index 000000000000..fc27de914dee
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/test64.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -asan -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define i32 @read_4_bytes(i32* %a) address_safety {
+entry:
+  %tmp1 = load i32* %a, align 4
+  ret i32 %tmp1
+}
+; CHECK: @read_4_bytes
+; CHECK-NOT: ret
+; CHECK: lshr {{.*}} 3
+; Check for ASAN's Offset for 64-bit (2^44)
+; CHECK-NEXT: 17592186044416
+; CHECK: ret
diff --git a/test/Instrumentation/ThreadSanitizer/lit.local.cfg b/test/Instrumentation/ThreadSanitizer/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/ThreadSanitizer/read_before_write.ll b/test/Instrumentation/ThreadSanitizer/read_before_write.ll
new file mode 100644
index 000000000000..482362aa7dce
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/read_before_write.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @IncrementMe(i32* nocapture %ptr) nounwind uwtable {
+entry:
+  %0 = load i32* %ptr, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %ptr, align 4
+  ret void
+}
+; CHECK: define void @IncrementMe
+; CHECK-NOT: __tsan_read
+; CHECK: __tsan_write
+; CHECK: ret void
+
+define void @IncrementMeWithCallInBetween(i32* nocapture %ptr) nounwind uwtable {
+entry:
+  %0 = load i32* %ptr, align 4
+  %inc = add nsw i32 %0, 1
+  call void @foo()
+  store i32 %inc, i32* %ptr, align 4
+  ret void
+}
+
+; CHECK: define void @IncrementMeWithCallInBetween
+; CHECK: __tsan_read
+; CHECK: __tsan_write
+; CHECK: ret void
+
+declare void @foo()
+
diff --git a/test/Instrumentation/ThreadSanitizer/read_from_global.ll b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
new file mode 100644
index 000000000000..a08453ac4a94
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/read_from_global.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+; Check that tsan does not instrument reads from constant globals.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@const_global = external constant i32
+define i32 @read_from_const_global() nounwind uwtable readnone {
+entry:
+  %0 = load i32* @const_global, align 4
+  ret i32 %0
+}
+; CHECK: define i32 @read_from_const_global
+; CHECK-NOT: __tsan
+; CHECK: ret i32
+
+@non_const_global = global i32 0, align 4
+define i32 @read_from_non_const_global() nounwind uwtable readonly {
+entry:
+  %0 = load i32* @non_const_global, align 4
+  ret i32 %0
+}
+
+; CHECK:  define i32 @read_from_non_const_global
+; CHECK: __tsan_read
+; CHECK: ret i32
+
+@const_global_array = external constant [10 x i32]
+define i32 @read_from_const_global_array(i32 %idx) nounwind uwtable readnone {
+entry:
+  %idxprom = sext i32 %idx to i64
+  %arrayidx = getelementptr inbounds [10 x i32]* @const_global_array, i64 0, i64 %idxprom
+  %0 = load i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; CHECK: define i32 @read_from_const_global_array
+; CHECK-NOT: __tsan
+; CHECK: ret i32
+
+%struct.Foo = type { i32 (...)** }
+define void @call_virtual_func(%struct.Foo* %f) uwtable {
+entry:
+  %0 = bitcast %struct.Foo* %f to void (%struct.Foo*)***
+  %vtable = load void (%struct.Foo*)*** %0, align 8, !tbaa !3
+  %1 = load void (%struct.Foo*)** %vtable, align 8
+  call void %1(%struct.Foo* %f)
+  ret void
+}
+
+; CHECK: define void @call_virtual_func
+; CHECK: __tsan_read
+; CHECK: = load
+; CHECK-NOT: __tsan_read
+; CHECK: = load
+; CHECK: ret void
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"vtable pointer", metadata !2}
+
diff --git a/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
new file mode 100644
index 000000000000..33c703b4c9bd
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @read_4_bytes(i32* %a) {
+entry:
+  %tmp1 = load i32* %a, align 4
+  ret i32 %tmp1
+}
+
+; CHECK: @llvm.global_ctors = {{.*}}@__tsan_init
+
+; CHECK: define i32 @read_4_bytes(i32* %a) {
+; CHECK:        call void @__tsan_func_entry(i8* %0)
+; CHECK-NEXT:   %1 = bitcast i32* %a to i8*
+; CHECK-NEXT:   call void @__tsan_read4(i8* %1)
+; CHECK-NEXT:   %tmp1 = load i32* %a, align 4
+; CHECK-NEXT:   call void @__tsan_func_exit()
+; CHECK: ret i32
+
+
diff --git a/test/Instrumentation/ThreadSanitizer/vptr_update.ll b/test/Instrumentation/ThreadSanitizer/vptr_update.ll
new file mode 100644
index 000000000000..f31865901b57
--- /dev/null
+++ b/test/Instrumentation/ThreadSanitizer/vptr_update.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -tsan -S | FileCheck %s
+; Check that vtable pointer updates are treated in a special way.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define void @Foo(i8** nocapture %a, i8* %b) nounwind uwtable {
+entry:
+; CHECK: call void @__tsan_vptr_update
+  store i8* %b, i8** %a, align 8, !tbaa !0
+  ret void
+}
+!0 = metadata !{metadata !"vtable pointer", metadata !1}
+!1 = metadata !{metadata !"Simple C/C++ TBAA", null}
+
diff --git a/test/Integer/dg.exp b/test/Integer/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Integer/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Integer/lit.local.cfg b/test/Integer/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Integer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Linker/2004-05-07-TypeResolution1.ll b/test/Linker/2004-05-07-TypeResolution1.ll
index f0ade337138a..4cff9ace430a 100644
--- a/test/Linker/2004-05-07-TypeResolution1.ll
+++ b/test/Linker/2004-05-07-TypeResolution1.ll
@@ -30,6 +30,6 @@ declare void @func(%struct2*)
 
 define void @tty_init() {
 entry:
-	volatile store void (%struct2*)* @func, void (%struct2*)** getelementptr (%struct1* @driver1, i64 0, i32 1)
+	store volatile void (%struct2*)* @func, void (%struct2*)** getelementptr (%struct1* @driver1, i64 0, i32 1)
 	ret void
 }
diff --git a/test/Linker/2004-05-07-TypeResolution2.ll b/test/Linker/2004-05-07-TypeResolution2.ll
index 74fe39f4d9f4..380712717817 100644
--- a/test/Linker/2004-05-07-TypeResolution2.ll
+++ b/test/Linker/2004-05-07-TypeResolution2.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:32:32"
 define internal void @f1(%struct1* %tty) {
 loopentry.preheader:
 	%tmp.2.i.i = getelementptr %struct1* %tty, i64 0, i32 1		; <void (%struct2*)**> [#uses=1]
-	%tmp.3.i.i = volatile load void (%struct2*)** %tmp.2.i.i		; <void (%struct2*)*> [#uses=0]
+	%tmp.3.i.i = load volatile void (%struct2*)** %tmp.2.i.i		; <void (%struct2*)*> [#uses=0]
 	ret void
 }
 
diff --git a/test/Linker/2011-08-18-unique-debug-type.ll b/test/Linker/2011-08-18-unique-debug-type.ll
index 4ef0e0e391e1..696fdb3108b8 100644
--- a/test/Linker/2011-08-18-unique-debug-type.ll
+++ b/test/Linker/2011-08-18-unique-debug-type.ll
@@ -1,5 +1,5 @@
 
-; RUN: llvm-link %s %p/2011-08-18-unique-debug-type2.ll -S -o - | grep "int" | count 1
+; RUN: llvm-link %s %p/2011-08-18-unique-debug-type2.ll -S -o - | grep "int" | grep -v "^; ModuleID" | count 1
 ; Test to check only one MDNode for "int" after linking.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
diff --git a/test/Linker/dg.exp b/test/Linker/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Linker/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Linker/link-type-names.ll b/test/Linker/link-type-names.ll
new file mode 100644
index 000000000000..bfc3b64361dc
--- /dev/null
+++ b/test/Linker/link-type-names.ll
@@ -0,0 +1,10 @@
+; RUN: echo "%X = type { i32 } @G2 = global %X { i32 4 }" > %t.ll
+; RUN: llvm-link %s %t.ll -S | FileCheck %s
+; PR11464
+
+%X = type { i32 }
+@G = global %X { i32 4 }
+
+
+; CHECK: @G = global %X { i32 4 }
+; CHECK: @G2 = global %X { i32 4 }
diff --git a/test/Linker/lit.local.cfg b/test/Linker/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Linker/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Linker/module-flags-1-a.ll b/test/Linker/module-flags-1-a.ll
new file mode 100644
index 000000000000..973aa80822c5
--- /dev/null
+++ b/test/Linker/module-flags-1-a.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-link %s %p/module-flags-1-b.ll -S -o - | sort | FileCheck %s
+
+; Test basic functionality of module flags.
+
+; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
+; CHECK: !1 = metadata !{i32 1, metadata !"qux", i32 42}
+; CHECK: !2 = metadata !{i32 1, metadata !"mux", metadata !3}
+; CHECK: !3 = metadata !{metadata !"hello world", i32 927}
+; CHECK: !4 = metadata !{i32 2, metadata !"bar", i32 42}
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !4}
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 2, metadata !"bar", i32 42 }
+!2 = metadata !{ i32 1, metadata !"mux", metadata !{ metadata !"hello world", i32 927 } }
+
+!llvm.module.flags = !{ !0, !1, !2 }
diff --git a/test/Linker/module-flags-1-b.ll b/test/Linker/module-flags-1-b.ll
new file mode 100644
index 000000000000..bf3f5e555508
--- /dev/null
+++ b/test/Linker/module-flags-1-b.ll
@@ -0,0 +1,8 @@
+; This file is used with module-flags-1-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 1, metadata !"qux", i32 42 }
+!2 = metadata !{ i32 1, metadata !"mux", metadata !{ metadata !"hello world", i32 927 } }
+
+!llvm.module.flags = !{ !0, !1, !2 }
diff --git a/test/Linker/module-flags-2-a.ll b/test/Linker/module-flags-2-a.ll
new file mode 100644
index 000000000000..3ae02889d16b
--- /dev/null
+++ b/test/Linker/module-flags-2-a.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-link %s %p/module-flags-2-b.ll -S -o - | sort | FileCheck %s
+
+; Test the 'override' behavior.
+
+; CHECK: !0 = metadata !{i32 4, metadata !"foo", i32 37}
+; CHECK: !llvm.module.flags = !{!0}
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 927 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-2-b.ll b/test/Linker/module-flags-2-b.ll
new file mode 100644
index 000000000000..ab55e4b997c0
--- /dev/null
+++ b/test/Linker/module-flags-2-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-2-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 4, metadata !"foo", i32 37 } ; Override the "foo" value.
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-3-a.ll b/test/Linker/module-flags-3-a.ll
new file mode 100644
index 000000000000..4233a0a7a5b1
--- /dev/null
+++ b/test/Linker/module-flags-3-a.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-link %s %p/module-flags-3-b.ll -S -o - | sort | FileCheck %s
+
+; Test 'require' behavior.
+
+; CHECK: !0 = metadata !{i32 1, metadata !"foo", i32 37}
+; CHECK: !1 = metadata !{i32 3, metadata !"foo", metadata !2}
+; CHECK: !2 = metadata !{metadata !"bar", i32 42}
+; CHECK: !3 = metadata !{i32 1, metadata !"bar", i32 42}
+; CHECK: !llvm.module.flags = !{!0, !1, !3}
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 1, metadata !"bar", i32 42 }
+
+!llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-3-b.ll b/test/Linker/module-flags-3-b.ll
new file mode 100644
index 000000000000..76be80263376
--- /dev/null
+++ b/test/Linker/module-flags-3-b.ll
@@ -0,0 +1,8 @@
+; This file is used with module-flags-3-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 3, metadata !"foo",
+  metadata !{ metadata !"bar", i32 42 }
+}
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-4-a.ll b/test/Linker/module-flags-4-a.ll
new file mode 100644
index 000000000000..f411a569d830
--- /dev/null
+++ b/test/Linker/module-flags-4-a.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-link %s %p/module-flags-4-b.ll -S -o - |& FileCheck %s
+
+; Test 'require' error.
+
+; CHECK: linking module flags 'bar': does not have the required value
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+!1 = metadata !{ i32 1, metadata !"bar", i32 927 }
+
+!llvm.module.flags = !{ !0, !1 }
diff --git a/test/Linker/module-flags-4-b.ll b/test/Linker/module-flags-4-b.ll
new file mode 100644
index 000000000000..3a460bbeb0ba
--- /dev/null
+++ b/test/Linker/module-flags-4-b.ll
@@ -0,0 +1,8 @@
+; This file is used with module-flags-4-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 3, metadata !"foo",
+  metadata !{ metadata !"bar", i32 42 }
+}
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-5-a.ll b/test/Linker/module-flags-5-a.ll
new file mode 100644
index 000000000000..2e59ecca702c
--- /dev/null
+++ b/test/Linker/module-flags-5-a.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-link %s %p/module-flags-5-b.ll -S -o - |& FileCheck %s
+
+; Test the 'override' error.
+
+; CHECK: linking module flags 'foo': IDs have conflicting override values
+
+!0 = metadata !{ i32 4, metadata !"foo", i32 927 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-5-b.ll b/test/Linker/module-flags-5-b.ll
new file mode 100644
index 000000000000..1e99b207544e
--- /dev/null
+++ b/test/Linker/module-flags-5-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-5-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 4, metadata !"foo", i32 37 } ; Override the "foo" value.
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-6-a.ll b/test/Linker/module-flags-6-a.ll
new file mode 100644
index 000000000000..c3e0225814f5
--- /dev/null
+++ b/test/Linker/module-flags-6-a.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-link %s %p/module-flags-6-b.ll -S -o - |& FileCheck %s
+
+; Test module flags error messages.
+
+; CHECK: linking module flags 'foo': IDs have conflicting values
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 37 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/module-flags-6-b.ll b/test/Linker/module-flags-6-b.ll
new file mode 100644
index 000000000000..2bc5a96045bd
--- /dev/null
+++ b/test/Linker/module-flags-6-b.ll
@@ -0,0 +1,6 @@
+; This file is used with module-flags-6-a.ll
+; RUN: true
+
+!0 = metadata !{ i32 1, metadata !"foo", i32 38 }
+
+!llvm.module.flags = !{ !0 }
diff --git a/test/Linker/multiple-merged-structs.ll b/test/Linker/multiple-merged-structs.ll
new file mode 100644
index 000000000000..348cd89bbc4c
--- /dev/null
+++ b/test/Linker/multiple-merged-structs.ll
@@ -0,0 +1,19 @@
+; RUN: echo {%bug_type = type opaque \
+; RUN:     declare i32 @bug_a(%bug_type*) \
+; RUN:     declare i32 @bug_b(%bug_type*) } > %t.ll
+; RUN: llvm-link %t.ll %s
+; PR11464
+
+%bug_type = type { %bug_type* }
+%bar = type { i32 }
+
+define i32 @bug_a(%bug_type* %fp) nounwind uwtable {
+entry:
+  %d_stream = getelementptr inbounds %bug_type* %fp, i64 0, i32 0
+  ret i32 0
+}
+
+define i32 @bug_b(%bar* %a) nounwind uwtable {
+entry:
+  ret i32 0
+}
diff --git a/test/Linker/visibility1.ll b/test/Linker/visibility1.ll
new file mode 100644
index 000000000000..131f6d59b5e6
--- /dev/null
+++ b/test/Linker/visibility1.ll
@@ -0,0 +1,46 @@
+; RUN: llvm-link %s %p/visibility2.ll -S | FileCheck %s
+; RUN: llvm-link %p/visibility2.ll %s -S | FileCheck %s
+
+; The values in this file are strong, the ones in visibility2.ll are weak,
+; but we should still get the visibility from them.
+
+; Variables
+; CHECK: @v1 = hidden global i32 0
+@v1 = global i32 0
+
+; CHECK: @v2 = protected  global i32 0
+@v2 = global i32 0
+
+; CHECK: @v3 = hidden global i32 0
+@v3 = protected global i32 0
+
+
+; Aliases
+; CHECK: @a1 = hidden alias i32* @v1
+@a1 = alias i32* @v1
+
+; CHECK: @a2 = protected alias i32* @v2
+@a2 = alias i32* @v2
+
+; CHECK: @a3 = hidden alias i32* @v3
+@a3 = protected alias i32* @v3
+
+
+; Functions
+; CHECK: define hidden void @f1()
+define void @f1()  {
+entry:
+  ret void
+}
+
+; CHECK: define protected void @f2()
+define void @f2()  {
+entry:
+  ret void
+}
+
+; CHECK: define hidden void @f3()
+define protected void @f3()  {
+entry:
+  ret void
+}
diff --git a/test/Linker/visibility2.ll b/test/Linker/visibility2.ll
new file mode 100644
index 000000000000..e6363ca2f386
--- /dev/null
+++ b/test/Linker/visibility2.ll
@@ -0,0 +1,27 @@
+; This file is used by visibility1.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+; Variables
+@v1 = weak hidden global i32 0
+@v2 = weak protected global i32 0
+@v3 = weak hidden global i32 0
+
+; Aliases
+@a1 = hidden alias weak i32* @v1
+@a2 = protected alias weak i32* @v2
+@a3 = hidden alias weak i32* @v3
+
+; Functions
+define weak hidden void @f1() {
+entry:
+  ret void
+}
+define weak protected void @f2() {
+entry:
+  ret void
+}
+define weak hidden void @f3() {
+entry:
+  ret void
+}
diff --git a/test/MC/ARM/arm-aliases.s b/test/MC/ARM/arm-aliases.s
new file mode 100644
index 000000000000..d4ea0dfcb569
--- /dev/null
+++ b/test/MC/ARM/arm-aliases.s
@@ -0,0 +1,17 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+  .syntax unified
+
+@ Shift-by-zero should canonicalize to no shift at all (lsl #0 encoding)
+        add r1, r2, r3, lsl #0
+        sub r1, r2, r3, ror #0
+        eor r1, r2, r3, lsr #0
+        orr r1, r2, r3, asr #0
+        and r1, r2, r3, ror #0
+        bic r1, r2, r3, lsl #0
+
+@ CHECK: add	r1, r2, r3              @ encoding: [0x03,0x10,0x82,0xe0]
+@ CHECK: sub	r1, r2, r3              @ encoding: [0x03,0x10,0x42,0xe0]
+@ CHECK: eor	r1, r2, r3              @ encoding: [0x03,0x10,0x22,0xe0]
+@ CHECK: orr	r1, r2, r3              @ encoding: [0x03,0x10,0x82,0xe1]
+@ CHECK: and	r1, r2, r3              @ encoding: [0x03,0x10,0x02,0xe0]
+@ CHECK: bic	r1, r2, r3              @ encoding: [0x03,0x10,0xc2,0xe1]
diff --git a/test/MC/ARM/arm-it-block.s b/test/MC/ARM/arm-it-block.s
new file mode 100644
index 000000000000..e5e549128bd5
--- /dev/null
+++ b/test/MC/ARM/arm-it-block.s
@@ -0,0 +1,11 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+  .syntax unified
+  .globl _func
+
+_func:
+@ CHECK: _func:
+        it eq
+        moveq r2, r3
+@ 'it' is parsed but not encoded.
+@ CHECK-NOT: it
+@ CHECK: moveq	r2, r3          @ encoding: [0x03,0x20,0xa0,0x01]
diff --git a/test/MC/ARM/arm-memory-instructions.s b/test/MC/ARM/arm-memory-instructions.s
index 783ac28ced95..d8d9130b690b 100644
--- a/test/MC/ARM/arm-memory-instructions.s
+++ b/test/MC/ARM/arm-memory-instructions.s
@@ -130,8 +130,13 @@ _func:
 
 
 @------------------------------------------------------------------------------
-@ FIXME: LDRD (label)
+@ LDRD (label)
 @------------------------------------------------------------------------------
+        ldrd r2, r3, Lbaz
+Lbaz: .quad 0
+
+@ CHECK: ldrd	r2, r3, Lbaz            @ encoding: [0xd0'A',0x20'A',0x4f'A',0xe1'A']
+
 
 @------------------------------------------------------------------------------
 @ LDRD (register)
diff --git a/test/MC/ARM/arm_fixups.s b/test/MC/ARM/arm_fixups.s
index aba0cd824dbc..74dfb99ef2a7 100644
--- a/test/MC/ARM/arm_fixups.s
+++ b/test/MC/ARM/arm_fixups.s
@@ -3,7 +3,7 @@
 
     bl _printf
 @ CHECK: bl _printf @ encoding: [A,A,A,0xeb]
-@ CHECK: @ fixup A - offset: 0, value: _printf, kind: fixup_arm_uncondbranch
+@ CHECK: @ fixup A - offset: 0, value: _printf, kind: fixup_arm_uncondbl
 
     mov r9, :lower16:(_foo)
     movw r9, :lower16:(_foo)
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index 55d9f0261950..4788ac731b19 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -133,9 +133,9 @@ Lforward:
         adr	r2, #-3
 
 @ CHECK: Lback:
-@ CHECK: adr	r2, Lback    @ encoding: [0bAAAAAAA0,0x20'A',0x0f'A',0b1110001A]
+@ CHECK: adr	r2, Lback    @ encoding: [A,0x20'A',0x0f'A',0xe2'A']
 @ CHECK:  @   fixup A - offset: 0, value: Lback, kind: fixup_arm_adr_pcrel_12
-@ CHECK: adr	r3, Lforward @ encoding: [0bAAAAAAA0,0x30'A',0x0f'A',0b1110001A]
+@ CHECK: adr	r3, Lforward @ encoding: [A,0x30'A',0x0f'A',0xe2'A']
 @ CHECK:  @   fixup A - offset: 0, value: Lforward, kind: fixup_arm_adr_pcrel_12
 @ CHECK: Lforward:
 @ CHECK: adr	r2, #3                  @ encoding: [0x03,0x20,0x8f,0xe2]
@@ -153,6 +153,7 @@ Lforward:
         add r4, r5, r6, asr #5
         add r4, r5, r6, ror #5
         add r6, r7, r8, lsl r9
+        add r4, r4, r3, asl r9
         add r6, r7, r8, lsr r9
         add r6, r7, r8, asr r9
         add r6, r7, r8, ror r9
@@ -172,6 +173,9 @@ Lforward:
         add r6, r7, ror r9
         add r4, r5, rrx
 
+	add r0, #-4
+	add r4, r5, #-21
+
 @ CHECK: add	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe2]
 @ CHECK: add	r4, r5, r6              @ encoding: [0x06,0x40,0x85,0xe0]
 @ CHECK: add	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0x85,0xe0]
@@ -180,12 +184,12 @@ Lforward:
 @ CHECK: add	r4, r5, r6, asr #5      @ encoding: [0xc6,0x42,0x85,0xe0]
 @ CHECK: add	r4, r5, r6, ror #5      @ encoding: [0xe6,0x42,0x85,0xe0]
 @ CHECK: add	r6, r7, r8, lsl r9      @ encoding: [0x18,0x69,0x87,0xe0]
+@ CHECK: add	r4, r4, r3, lsl r9      @ encoding: [0x13,0x49,0x84,0xe0]
 @ CHECK: add	r6, r7, r8, lsr r9      @ encoding: [0x38,0x69,0x87,0xe0]
 @ CHECK: add	r6, r7, r8, asr r9      @ encoding: [0x58,0x69,0x87,0xe0]
 @ CHECK: add	r6, r7, r8, ror r9      @ encoding: [0x78,0x69,0x87,0xe0]
 @ CHECK: add	r4, r5, r6, rrx         @ encoding: [0x66,0x40,0x85,0xe0]
 
-
 @ CHECK: add	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x85,0xe2]
 @ CHECK: add	r4, r4, r5              @ encoding: [0x05,0x40,0x84,0xe0]
 @ CHECK: add	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0x84,0xe0]
@@ -199,6 +203,9 @@ Lforward:
 @ CHECK: add	r6, r6, r7, ror r9      @ encoding: [0x77,0x69,0x86,0xe0]
 @ CHECK: add	r4, r4, r5, rrx         @ encoding: [0x65,0x40,0x84,0xe0]
 
+@ CHECK: sub	r0, r0, #4              @ encoding: [0x04,0x00,0x40,0xe2]
+@ CHECK: sub	r4, r5, #21             @ encoding: [0x15,0x40,0x45,0xe2]
+
 
 @------------------------------------------------------------------------------
 @ AND
@@ -215,6 +222,7 @@ Lforward:
     and r6, r7, r8, asr r2
     and r6, r7, r8, ror r2
     and r10, r1, r6, rrx
+    and r2, r3, #0x7fffffff
 
     @ destination register is optional
     and r1, #0xf
@@ -242,6 +250,7 @@ Lforward:
 @ CHECK: and	r6, r7, r8, asr r2      @ encoding: [0x58,0x62,0x07,0xe0]
 @ CHECK: and	r6, r7, r8, ror r2      @ encoding: [0x78,0x62,0x07,0xe0]
 @ CHECK: and	r10, r1, r6, rrx        @ encoding: [0x66,0xa0,0x01,0xe0]
+@ CHECK: bic	r2, r3, #-2147483648    @ encoding: [0x02,0x21,0xc3,0xe3]
 
 @ CHECK: and	r1, r1, #15             @ encoding: [0x0f,0x10,0x01,0xe2]
 @ CHECK: and	r10, r10, r1            @ encoding: [0x01,0xa0,0x0a,0xe0]
@@ -257,8 +266,19 @@ Lforward:
 @ CHECK: and	r10, r10, r1, rrx       @ encoding: [0x61,0xa0,0x0a,0xe0]
 
 @------------------------------------------------------------------------------
-@ FIXME: ASR
+@ ASR
 @------------------------------------------------------------------------------
+	asr r2, r4, #32
+	asr r2, r4, #2
+	asr r2, r4, #0
+	asr r4, #2
+
+@ CHECK: asr	r2, r4, #32             @ encoding: [0x44,0x20,0xa0,0xe1]
+@ CHECK: asr	r2, r4, #2              @ encoding: [0x44,0x21,0xa0,0xe1]
+@ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: asr	r4, r4, #2              @ encoding: [0x44,0x41,0xa0,0xe1]
+
+
 @------------------------------------------------------------------------------
 @ B
 @------------------------------------------------------------------------------
@@ -362,15 +382,18 @@ Lforward:
 @------------------------------------------------------------------------------
 
         bl _bar
+        bleq _bar
         blx _bar
         blls #28634268
         blx	#32424576
         blx	#16212288
 
 @ CHECK: bl  _bar @ encoding: [A,A,A,0xeb]
-@ CHECK:   @   fixup A - offset: 0, value: _bar, kind: fixup_arm_uncondbranch
+@ CHECK:   @   fixup A - offset: 0, value: _bar, kind: fixup_arm_uncondbl
+@ CHECK: bleq  _bar @ encoding: [A,A,A,0x0b]
+@ CHECK:   @   fixup A - offset: 0, value: _bar, kind: fixup_arm_condbl
 @ CHECK: blx	_bar @ encoding: [A,A,A,0xfa]
-           @   fixup A - offset: 0, value: _bar, kind: fixup_arm_uncondbranch
+           @   fixup A - offset: 0, value: _bar, kind: fixup_arm_blx
 @ CHECK: blls	#28634268               @ encoding: [0x27,0x3b,0x6d,0x9b]
 @ CHECK: blx	#32424576               @ encoding: [0xa0,0xb0,0x7b,0xfa]
 @ CHECK: blx	#16212288               @ encoding: [0x50,0xd8,0x3d,0xfa]
@@ -473,6 +496,8 @@ Lforward:
         cmp r7, r8, asr r2
         cmp r7, r8, ror r2
         cmp r1, r6, rrx
+        cmp r0, #-2
+        cmp lr, #0
 
 @ CHECK: cmp	r1, #15                 @ encoding: [0x0f,0x00,0x51,0xe3]
 @ CHECK: cmp	r1, r6                  @ encoding: [0x06,0x00,0x51,0xe1]
@@ -486,6 +511,8 @@ Lforward:
 @ CHECK: cmp	r7, r8, asr r2          @ encoding: [0x58,0x02,0x57,0xe1]
 @ CHECK: cmp	r7, r8, ror r2          @ encoding: [0x78,0x02,0x57,0xe1]
 @ CHECK: cmp	r1, r6, rrx             @ encoding: [0x66,0x00,0x51,0xe1]
+@ CHECK: cmn	r0, #2                  @ encoding: [0x02,0x00,0x70,0xe3]
+@ CHECK: cmp  lr, #0                    @ encoding: [0x00,0x00,0x5e,0xe3]
 
 
 @------------------------------------------------------------------------------
@@ -744,6 +771,10 @@ Lforward:
         ldmda     r2!, {r1,r3-r6,sp}
         ldmdb     r2!, {r1,r3-r6,sp}
 
+        @ system version
+        ldm r0, {r0, r2, lr}^
+        ldm sp!, {r0-r3, pc}^
+
 @ CHECK: ldm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
 @ CHECK: ldm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
 @ CHECK: ldmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe9]
@@ -755,6 +786,8 @@ Lforward:
 @ CHECK: ldmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe9]
 @ CHECK: ldmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe8]
 @ CHECK: ldmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe9]
+@ CHECK: ldm	r0, {lr, r0, r2} ^          @ encoding: [0x05,0x40,0xd0,0xe8]
+@ CHECK: ldm	sp!, {pc, r0, r1, r2, r3} ^ @ encoding: [0x0f,0x80,0xfd,0xe8]
 
 
 @------------------------------------------------------------------------------
@@ -780,11 +813,32 @@ Lforward:
 @ CHECK: ldrhthi r8, [r11], #0          @ encoding: [0xb0,0x80,0xfb,0x80]
 
 @------------------------------------------------------------------------------
-@ FIXME: LSL
+@ LSL
 @------------------------------------------------------------------------------
+	lsl r2, r4, #31
+	lsl r2, r4, #1
+	lsl r2, r4, #0
+	lsl r4, #1
+
+@ CHECK: lsl	r2, r4, #31             @ encoding: [0x84,0x2f,0xa0,0xe1]
+@ CHECK: lsl	r2, r4, #1              @ encoding: [0x84,0x20,0xa0,0xe1]
+@ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: lsl	r4, r4, #1              @ encoding: [0x84,0x40,0xa0,0xe1]
+
+
 @------------------------------------------------------------------------------
-@ FIXME: LSR
+@ LSR
 @------------------------------------------------------------------------------
+	lsr r2, r4, #32
+	lsr r2, r4, #2
+	lsr r2, r4, #0
+	lsr r4, #2
+
+@ CHECK: lsr	r2, r4, #32             @ encoding: [0x24,0x20,0xa0,0xe1]
+@ CHECK: lsr	r2, r4, #2              @ encoding: [0x24,0x21,0xa0,0xe1]
+@ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: lsr	r4, r4, #2              @ encoding: [0x24,0x41,0xa0,0xe1]
+
 
 @------------------------------------------------------------------------------
 @ MCR/MCR2
@@ -855,11 +909,28 @@ Lforward:
         movs r2, r3
         moveq r2, r3
         movseq r2, r3
+        mov r12, r8, lsl #(2 - 2)
+        lsl r2, r3, #(2 - 2)
+        mov r12, r8, lsr #(2 - 2)
+        lsr r2, r3, #(2 - 2)
+        mov r12, r8, asr #(2 - 2)
+        asr r2, r3, #(2 - 2)
+        mov r12, r8, ror #(2 - 2)
+        ror r2, r3, #(2 - 2)
 
 @ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
 @ CHECK: movs	r2, r3                  @ encoding: [0x03,0x20,0xb0,0xe1]
 @ CHECK: moveq	r2, r3                  @ encoding: [0x03,0x20,0xa0,0x01]
 @ CHECK: movseq	r2, r3                  @ encoding: [0x03,0x20,0xb0,0x01]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+@ CHECK: mov	r12, r8                 @ encoding: [0x08,0xc0,0xa0,0xe1]
+@ CHECK: mov	r2, r3                  @ encoding: [0x03,0x20,0xa0,0xe1]
+
 
 @------------------------------------------------------------------------------
 @ MOVT
@@ -975,6 +1046,7 @@ Lforward:
         muls r5, r6, r7
         mulgt r5, r6, r7
         mulsle r5, r6, r7
+        mul r11, r5
 
 @ CHECK: mul	r5, r6, r7              @ encoding: [0x96,0x07,0x05,0xe0]
 @ CHECK: muls	r5, r6, r7              @ encoding: [0x96,0x07,0x15,0xe0]
@@ -1038,6 +1110,14 @@ Lforward:
 @ CHECK: mvnslt	r5, r6, ror r7          @ encoding: [0x76,0x57,0xf0,0xb1]
 
 @------------------------------------------------------------------------------
+@ NEG
+@------------------------------------------------------------------------------
+        neg r5, r8
+
+@ CHECK: rsb	r5, r8, #0              @ encoding: [0x00,0x50,0x68,0xe2]
+
+
+@------------------------------------------------------------------------------
 @ NOP
 @------------------------------------------------------------------------------
         nop
@@ -1313,6 +1393,20 @@ Lforward:
 
 
 @------------------------------------------------------------------------------
+@ ROR
+@------------------------------------------------------------------------------
+	ror r2, r4, #31
+	ror r2, r4, #1
+	ror r2, r4, #0
+	ror r4, #1
+
+@ CHECK: ror	r2, r4, #31             @ encoding: [0xe4,0x2f,0xa0,0xe1]
+@ CHECK: ror	r2, r4, #1              @ encoding: [0xe4,0x20,0xa0,0xe1]
+@ CHECK: mov	r2, r4                  @ encoding: [0x04,0x20,0xa0,0xe1]
+@ CHECK: ror	r4, r4, #1              @ encoding: [0xe4,0x40,0xa0,0xe1]
+
+
+@------------------------------------------------------------------------------
 @ RSB
 @------------------------------------------------------------------------------
         rsb r4, r5, #0xf000
diff --git a/test/MC/ARM/basic-thumb-instructions.s b/test/MC/ARM/basic-thumb-instructions.s
index 0fa52b098746..bc2605c16ec9 100644
--- a/test/MC/ARM/basic-thumb-instructions.s
+++ b/test/MC/ARM/basic-thumb-instructions.s
@@ -59,12 +59,16 @@ _func:
         add sp, sp, #4
         add r2, sp, #8
         add r2, sp, #1020
+	add sp, sp, #-8
+	add sp, #-8
 
 @ CHECK: add	sp, #4                  @ encoding: [0x01,0xb0]
 @ CHECK: add	sp, #508                @ encoding: [0x7f,0xb0]
 @ CHECK: add	sp, #4                  @ encoding: [0x01,0xb0]
 @ CHECK: add	r2, sp, #8              @ encoding: [0x02,0xaa]
 @ CHECK: add	r2, sp, #1020           @ encoding: [0xff,0xaa]
+@ CHECK: sub	sp, #8                  @ encoding: [0x82,0xb0]
+@ CHECK: sub	sp, #8                  @ encoding: [0x82,0xb0]
 
 
 @------------------------------------------------------------------------------
@@ -93,10 +97,16 @@ _func:
         asrs r2, r3, #32
         asrs r2, r3, #5
         asrs r2, r3, #1
+        asrs r5, #21
+        asrs r5, r5, #21
+        asrs r3, r5, #21
 
 @ CHECK: asrs	r2, r3, #32             @ encoding: [0x1a,0x10]
 @ CHECK: asrs	r2, r3, #5              @ encoding: [0x5a,0x11]
 @ CHECK: asrs	r2, r3, #1              @ encoding: [0x5a,0x10]
+@ CHECK: asrs	r5, r5, #21             @ encoding: [0x6d,0x15]
+@ CHECK: asrs	r5, r5, #21             @ encoding: [0x6d,0x15]
+@ CHECK: asrs	r3, r5, #21             @ encoding: [0x6b,0x15]
 
 
 @------------------------------------------------------------------------------
@@ -315,9 +325,15 @@ _func:
 @------------------------------------------------------------------------------
         lsls r4, r5, #0
         lsls r4, r5, #4
+        lsls r3, #12
+        lsls r3, r3, #12
+        lsls r1, r3, #12
 
 @ CHECK: lsls	r4, r5, #0              @ encoding: [0x2c,0x00]
 @ CHECK: lsls	r4, r5, #4              @ encoding: [0x2c,0x01]
+@ CHECK: lsls	r3, r3, #12             @ encoding: [0x1b,0x03]
+@ CHECK: lsls	r3, r3, #12             @ encoding: [0x1b,0x03]
+@ CHECK: lsls	r1, r3, #12             @ encoding: [0x19,0x03]
 
 
 @------------------------------------------------------------------------------
@@ -333,9 +349,15 @@ _func:
 @------------------------------------------------------------------------------
         lsrs r1, r3, #1
         lsrs r1, r3, #32
+        lsrs r4, #20
+        lsrs r4, r4, #20
+        lsrs r2, r4, #20
 
 @ CHECK: lsrs	r1, r3, #1              @ encoding: [0x59,0x08]
 @ CHECK: lsrs	r1, r3, #32             @ encoding: [0x19,0x08]
+@ CHECK: lsrs	r4, r4, #20             @ encoding: [0x24,0x0d]
+@ CHECK: lsrs	r4, r4, #20             @ encoding: [0x24,0x0d]
+@ CHECK: lsrs	r2, r4, #20             @ encoding: [0x22,0x0d]
 
 
 @------------------------------------------------------------------------------
@@ -372,9 +394,11 @@ _func:
 @ MUL
 @------------------------------------------------------------------------------
         muls r1, r2, r1
+        muls r2, r2, r3
         muls r3, r4
 
 @ CHECK: muls	r1, r2, r1              @ encoding: [0x51,0x43]
+@ CHECK: muls	r2, r3, r2              @ encoding: [0x5a,0x43]
 @ CHECK: muls	r3, r4, r3              @ encoding: [0x63,0x43]
 
 
diff --git a/test/MC/ARM/basic-thumb2-instructions.s b/test/MC/ARM/basic-thumb2-instructions.s
index 68815dab016f..d2e208bc53b5 100644
--- a/test/MC/ARM/basic-thumb2-instructions.s
+++ b/test/MC/ARM/basic-thumb2-instructions.s
@@ -73,6 +73,10 @@ _func:
         add r12, r6, #0x100
         addw r12, r6, #0x100
         adds r1, r2, #0x1f0
+	add r2, #1
+        add r0, r0, #32
+        adds r2, r2, #56
+        adds r2, #56
 
 @ CHECK: itet	eq                      @ encoding: [0x0a,0xbf]
 @ CHECK: addeq	r1, r2, #4              @ encoding: [0x11,0x1d]
@@ -85,6 +89,10 @@ _func:
 @ CHECK: add.w	r12, r6, #256           @ encoding: [0x06,0xf5,0x80,0x7c]
 @ CHECK: addw	r12, r6, #256           @ encoding: [0x06,0xf2,0x00,0x1c]
 @ CHECK: adds.w	r1, r2, #496            @ encoding: [0x12,0xf5,0xf8,0x71]
+@ CHECK: add.w	r2, r2, #1              @ encoding: [0x02,0xf1,0x01,0x02]
+@ CHECK: add.w	r0, r0, #32             @ encoding: [0x00,0xf1,0x20,0x00]
+@ CHECK: adds	r2, #56                 @ encoding: [0x38,0x32]
+@ CHECK: adds	r2, #56                 @ encoding: [0x38,0x32]
 
 
 @------------------------------------------------------------------------------
@@ -95,12 +103,16 @@ _func:
         adds r7, r3, r1, lsl #31
         adds.w r0, r3, r6, lsr #25
         add.w r4, r8, r1, ror #12
+        add r10, r8
+        add r10, r10, r8
 
 @ CHECK: add.w	r1, r2, r8              @ encoding: [0x02,0xeb,0x08,0x01]
 @ CHECK: add.w	r5, r9, r2, asr #32     @ encoding: [0x09,0xeb,0x22,0x05]
 @ CHECK: adds.w	r7, r3, r1, lsl #31     @ encoding: [0x13,0xeb,0xc1,0x77]
 @ CHECK: adds.w	r0, r3, r6, lsr #25     @ encoding: [0x13,0xeb,0x56,0x60]
 @ CHECK: add.w	r4, r8, r1, ror #12     @ encoding: [0x08,0xeb,0x31,0x34]
+@ CHECK: add	r10, r8                 @ encoding: [0xc2,0x44]
+@ CHECK: add	r10, r8                 @ encoding: [0xc2,0x44]
 
 
 @------------------------------------------------------------------------------
@@ -360,6 +372,8 @@ _func:
         cmp sp, r6, lsr #1
         cmp r2, r5, asr #24
         cmp r1, r4, ror #15
+        cmp r2, #-2
+        cmp r9, #1
 
 @ CHECK: cmp.w	r5, #65280              @ encoding: [0xb5,0xf5,0x7f,0x4f]
 @ CHECK: cmp.w	r4, r12                 @ encoding: [0xb4,0xeb,0x0c,0x0f]
@@ -368,6 +382,8 @@ _func:
 @ CHECK: cmp.w	sp, r6, lsr #1          @ encoding: [0xbd,0xeb,0x56,0x0f]
 @ CHECK: cmp.w	r2, r5, asr #24         @ encoding: [0xb2,0xeb,0x25,0x6f]
 @ CHECK: cmp.w	r1, r4, ror #15         @ encoding: [0xb1,0xeb,0xf4,0x3f]
+@ CHECK: cmn.w	r2, #2                  @ encoding: [0x12,0xf1,0x02,0x0f]
+@ CHECK: cmp.w	r9, #1                  @ encoding: [0xb9,0xf1,0x01,0x0f]
 
 
 @------------------------------------------------------------------------------
@@ -573,6 +589,7 @@ _func:
         ldm r4, {r5, r6}
         ldm r5!, {r3, r8}
         ldmfd r5!, {r3, r8}
+        ldmia sp!, {r4-r11, pc}
 
 @ CHECK: ldm.w	r4, {r4, r5, r8, r9}    @ encoding: [0x94,0xe8,0x30,0x03]
 @ CHECK: ldm.w	r4, {r5, r6}            @ encoding: [0x94,0xe8,0x60,0x00]
@@ -590,6 +607,7 @@ _func:
 @ CHECK: ldm.w	r4, {r5, r6}            @ encoding: [0x94,0xe8,0x60,0x00]
 @ CHECK: ldm.w	r5!, {r3, r8}           @ encoding: [0xb5,0xe8,0x08,0x01]
 @ CHECK: ldm.w	r5!, {r3, r8}           @ encoding: [0xb5,0xe8,0x08,0x01]
+@ CHECK: pop.w	{pc, r4, r5, r6, r7, r8, r9, r10, r11} @ encoding: [0xbd,0xe8,0xf0,0x8f]
 
 
 @------------------------------------------------------------------------------
@@ -599,11 +617,15 @@ _func:
         ldmdb r4, {r5, r6}
         ldmdb r5!, {r3, r8}
         ldmea r5!, {r3, r8}
+        ldmdb.w r4, {r5, r6}
+        ldmdb.w r5!, {r3, r8}
 
 @ CHECK: ldmdb	r4, {r4, r5, r8, r9}    @ encoding: [0x14,0xe9,0x30,0x03]
 @ CHECK: ldmdb	r4, {r5, r6}            @ encoding: [0x14,0xe9,0x60,0x00]
 @ CHECK: ldmdb	r5!, {r3, r8}           @ encoding: [0x35,0xe9,0x08,0x01]
 @ CHECK: ldmdb	r5!, {r3, r8}           @ encoding: [0x35,0xe9,0x08,0x01]
+@ CHECK: ldmdb	r4, {r5, r6}            @ encoding: [0x14,0xe9,0x60,0x00]
+@ CHECK: ldmdb	r5!, {r3, r8}           @ encoding: [0x35,0xe9,0x08,0x01]
 
 
 @------------------------------------------------------------------------------
@@ -638,9 +660,12 @@ _func:
 @ LDR(literal)
 @------------------------------------------------------------------------------
         ldr.w r5, _foo
+        ldr   lr, (_strcmp-4)
 
 @ CHECK: ldr.w	r5, _foo                @ encoding: [0x5f'A',0xf8'A',A,0x50'A']
-            @   fixup A - offset: 0, value: _foo, kind: fixup_t2_ldst_pcrel_12
+@ CHECK: @   fixup A - offset: 0, value: _foo, kind: fixup_t2_ldst_pcrel_12
+@ CHECK: ldr.w	lr, _strcmp-4           @ encoding: [0x5f'A',0xf8'A',A,0xe0'A']
+@ CHECK: @   fixup A - offset: 0, value: _strcmp-4, kind: fixup_t2_ldst_pcrel_12
 
 
 @------------------------------------------------------------------------------
@@ -813,7 +838,7 @@ _func:
 @------------------------------------------------------------------------------
         ldrh r5, _bar
 
-@ CHECK: ldrh.w	r5, _bar                @ encoding: [0xbf'A',0xf8'A',A,0x50'A']
+@ CHECK: ldrh.w	r5, _bar                @ encoding: [0x3f'A',0xf8'A',A,0x50'A']
 @ CHECK:     @   fixup A - offset: 0, value: _bar, kind: fixup_t2_ldst_pcrel_12
 
 
@@ -882,7 +907,7 @@ _func:
 @------------------------------------------------------------------------------
         ldrsb r5, _bar
 
-@ CHECK: ldrsb.w r5, _bar               @ encoding: [0x9f'A',0xf9'A',A,0x50'A']
+@ CHECK: ldrsb.w r5, _bar               @ encoding: [0x1f'A',0xf9'A',A,0x50'A']
 @ CHECK:      @   fixup A - offset: 0, value: _bar, kind: fixup_t2_ldst_pcrel_12
 
 
@@ -951,7 +976,7 @@ _func:
 @------------------------------------------------------------------------------
         ldrsh r5, _bar
 
-@ CHECK: ldrsh.w r5, _bar               @ encoding: [0xbf'A',0xf9'A',A,0x50'A']
+@ CHECK: ldrsh.w r5, _bar               @ encoding: [0x3f'A',0xf9'A',A,0x50'A']
 @ CHECK:      @   fixup A - offset: 0, value: _bar, kind: fixup_t2_ldst_pcrel_12
 
 @ TEMPORARILY DISABLED:
@@ -1066,9 +1091,13 @@ _func:
 @------------------------------------------------------------------------------
         mcr  p7, #1, r5, c1, c1, #4
         mcr2  p7, #1, r5, c1, c1, #4
+        mcr p14, #0, r4, c0, c5
+        mcr2 p4, #2, r2, c1, c3
 
 @ CHECK: mcr	p7, #1, r5, c1, c1, #4  @ encoding: [0x21,0xee,0x91,0x57]
 @ CHECK: mcr2	p7, #1, r5, c1, c1, #4  @ encoding: [0x21,0xfe,0x91,0x57]
+@ CHECK: mcr	p14, #0, r4, c0, c5, #0 @ encoding: [0x00,0xee,0x15,0x4e]
+@ CHECK: mcr2	p4, #2, r2, c1, c3, #0  @ encoding: [0x41,0xfe,0x13,0x24]
 
 
 @------------------------------------------------------------------------------
@@ -1108,6 +1137,12 @@ _func:
         moveq r1, #12
         movne.w r1, #12
         mov.w r6, #450
+        it lo
+        movlo r1, #-1
+
+        @ alias for mvn
+	mov r3, #-3
+
 
 @ CHECK: movs	r1, #21                 @ encoding: [0x15,0x21]
 @ CHECK: movs.w	r1, #21                 @ encoding: [0x5f,0xf0,0x15,0x01]
@@ -1123,6 +1158,48 @@ _func:
 @ CHECK: moveq	r1, #12                 @ encoding: [0x0c,0x21]
 @ CHECK: movne.w r1, #12                @ encoding: [0x4f,0xf0,0x0c,0x01]
 @ CHECK: mov.w	r6, #450                @ encoding: [0x4f,0xf4,0xe1,0x76]
+@ CHECK: it	lo                      @ encoding: [0x38,0xbf]
+@ CHECK: movlo.w	r1, #-1         @ encoding: [0x4f,0xf0,0xff,0x31]
+@ CHECK: mvn	r3, #2                  @ encoding: [0x6f,0xf0,0x02,0x03]
+
+@------------------------------------------------------------------------------
+@ MOV(shifted register)
+@------------------------------------------------------------------------------
+        mov r6, r2, lsl #16
+        mov r6, r2, lsr #16
+        movs r6, r2, asr #32
+        movs r6, r2, ror #5
+        movs r4, r4, lsl r5
+        movs r4, r4, lsr r5
+        movs r4, r4, asr r5
+        movs r4, r4, ror r5
+        mov r4, r4, lsl r5
+        movs r4, r4, ror r8
+        movs r4, r5, lsr r6
+        itttt eq
+        moveq r4, r4, lsl r5
+        moveq r4, r4, lsr r5
+        moveq r4, r4, asr r5
+        moveq r4, r4, ror r5
+        mov r4, r4, rrx
+
+@ CHECK: lsl.w	r6, r2, #16             @ encoding: [0x4f,0xea,0x02,0x46]
+@ CHECK: lsr.w	r6, r2, #16             @ encoding: [0x4f,0xea,0x12,0x46]
+@ CHECK: asrs	r6, r2, #32             @ encoding: [0x16,0x10]
+@ CHECK: rors.w	r6, r2, #5              @ encoding: [0x5f,0xea,0x72,0x16]
+@ CHECK: lsls	r4, r5                  @ encoding: [0xac,0x40]
+@ CHECK: lsrs	r4, r5                  @ encoding: [0xec,0x40]
+@ CHECK: asrs	r4, r5                  @ encoding: [0x2c,0x41]
+@ CHECK: rors	r4, r5                  @ encoding: [0xec,0x41]
+@ CHECK: lsl.w	r4, r4, r5              @ encoding: [0x04,0xfa,0x05,0xf4]
+@ CHECK: rors.w	r4, r4, r8              @ encoding: [0x74,0xfa,0x08,0xf4]
+@ CHECK: lsrs.w	r4, r5, r6              @ encoding: [0x35,0xfa,0x06,0xf4]
+@ CHECK: itttt	eq                      @ encoding: [0x01,0xbf]
+@ CHECK: lsleq	r4, r5                  @ encoding: [0xac,0x40]
+@ CHECK: lsreq	r4, r5                  @ encoding: [0xec,0x40]
+@ CHECK: asreq	r4, r5                  @ encoding: [0x2c,0x41]
+@ CHECK: roreq	r4, r5                  @ encoding: [0xec,0x41]
+@ CHECK: rrx	r4, r4                  @ encoding: [0x4f,0xea,0x34,0x04]
 
 
 @------------------------------------------------------------------------------
@@ -1143,9 +1220,13 @@ _func:
 @------------------------------------------------------------------------------
         mrc  p14, #0, r1, c1, c2, #4
         mrc2  p14, #0, r1, c1, c2, #4
+        mrc p11, #1, r1, c2, c2
+        mrc2 p12, #3, r3, c3, c4
 
 @ CHECK: mrc	p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xee,0x92,0x1e]
 @ CHECK: mrc2	p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xfe,0x92,0x1e]
+@ CHECK: mrc	p11, #1, r1, c2, c2, #0 @ encoding: [0x32,0xee,0x12,0x1b]
+@ CHECK: mrc2	p12, #3, r3, c3, c4, #0 @ encoding: [0x73,0xfe,0x14,0x3c]
 
 
 @------------------------------------------------------------------------------
@@ -1187,6 +1268,7 @@ _func:
         msr  spsr_fc, r0
         msr  SPSR_fsxc, r5
         msr  cpsr_fsxc, r8
+        msr  cpsr, r3
 
 @ CHECK: msr	APSR_nzcvq, r1          @ encoding: [0x81,0xf3,0x00,0x88]
 @ CHECK: msr	APSR_g, r2              @ encoding: [0x82,0xf3,0x00,0x84]
@@ -1202,6 +1284,7 @@ _func:
 @ CHECK: msr	SPSR_fc, r0             @ encoding: [0x90,0xf3,0x00,0x89]
 @ CHECK: msr	SPSR_fsxc, r5           @ encoding: [0x95,0xf3,0x00,0x8f]
 @ CHECK: msr	CPSR_fsxc, r8           @ encoding: [0x88,0xf3,0x00,0x8f]
+@ CHECK: msr	CPSR_fc, r3             @ encoding: [0x83,0xf3,0x00,0x89]
 
 
 @------------------------------------------------------------------------------
@@ -1212,12 +1295,18 @@ _func:
         mul r3, r4, r6
         it eq
         muleq r3, r4, r5
+        it le
+        mulle r4, r4, r8
+        mul r5, r6
 
 @ CHECK: muls	r3, r4, r3              @ encoding: [0x63,0x43]
 @ CHECK: mul	r3, r4, r3              @ encoding: [0x04,0xfb,0x03,0xf3]
 @ CHECK: mul	r3, r4, r6              @ encoding: [0x04,0xfb,0x06,0xf3]
 @ CHECK: it	eq                      @ encoding: [0x08,0xbf]
 @ CHECK: muleq	r3, r4, r5              @ encoding: [0x04,0xfb,0x05,0xf3]
+@ CHECK: it	le                      @ encoding: [0xd8,0xbf]
+@ CHECK: mulle	r4, r4, r8              @ encoding: [0x04,0xfb,0x08,0xf4]
+@ CHECK: mul	r5, r6, r5              @ encoding: [0x06,0xfb,0x05,0xf5]
 
 
 @------------------------------------------------------------------------------
@@ -1228,7 +1317,7 @@ _func:
         mvns r0, #0x3fc0000
         itte eq
         mvnseq r1, #12
-        mvneq r1, #12
+        mvneq.w r1, #12
         mvnne r1, #12
 
 @ CHECK: mvns	r8, #21                 @ encoding: [0x7f,0xf0,0x15,0x08]
@@ -1247,7 +1336,7 @@ _func:
         mvns r2, r3
         mvn r5, r6, lsl #19
         mvn r5, r6, lsr #9
-        mvn r5, r6, asr #4
+        mvn.w r5, r6, asr #4
         mvn r5, r6, ror #6
         mvn r5, r6, rrx
         it eq
@@ -1264,6 +1353,16 @@ _func:
 @ CHECK: mvneq	r2, r3                  @ encoding: [0xda,0x43]
 
 @------------------------------------------------------------------------------
+@ NEG
+@------------------------------------------------------------------------------
+        neg r5, r2
+        neg r5, r8
+
+@ CHECK: rsb.w	r5, r2, #0              @ encoding: [0xc2,0xf1,0x00,0x05]
+@ CHECK: rsb.w	r5, r8, #0              @ encoding: [0xc8,0xf1,0x00,0x05]
+
+
+@------------------------------------------------------------------------------
 @ NOP
 @------------------------------------------------------------------------------
         nop.w
@@ -1343,20 +1442,24 @@ _func:
         pld [r6, #33]
         pld [r6, #257]
         pld [r7, #257]
+        pld [r1, #0]
+        pld [r1, #-0]
 
 @ CHECK: pld	[r5, #-4]               @ encoding: [0x15,0xf8,0x04,0xfc]
 @ CHECK: pld	[r6, #32]               @ encoding: [0x96,0xf8,0x20,0xf0]
 @ CHECK: pld	[r6, #33]               @ encoding: [0x96,0xf8,0x21,0xf0]
 @ CHECK: pld	[r6, #257]              @ encoding: [0x96,0xf8,0x01,0xf1]
 @ CHECK: pld	[r7, #257]              @ encoding: [0x97,0xf8,0x01,0xf1]
+@ CHECK: pld	[r1]                    @ encoding: [0x91,0xf8,0x00,0xf0]
+@ CHECK: pld	[r1, #-0]               @ encoding: [0x11,0xf8,0x00,0xfc]
 
 
 @------------------------------------------------------------------------------
 @ PLD(literal)
 @------------------------------------------------------------------------------
-        pld  _foo
+@        pld  _foo
 
-@ CHECK: pld	_foo                    @ encoding: [0x9f'A',0xf8'A',A,0xf0'A']
+@ FIXME: pld	_foo                    @ encoding: [0x9f'A',0xf8'A',A,0xf0'A']
             @   fixup A - offset: 0, value: _foo, kind: fixup_t2_ldst_pcrel_12
 
 
@@ -1396,10 +1499,10 @@ _func:
 @------------------------------------------------------------------------------
 @ PLI(literal)
 @------------------------------------------------------------------------------
-        pli  _foo
+@        pli  _foo
 
 
-@ CHECK: pli	_foo                    @ encoding: [0x9f'A',0xf9'A',A,0xf0'A']
+@ FIXME: pli	_foo                    @ encoding: [0x9f'A',0xf9'A',A,0xf0'A']
            @   fixup A - offset: 0, value: _foo, kind: fixup_t2_ldst_pcrel_12
 
 
@@ -1420,6 +1523,21 @@ _func:
 @ CHECK: pli	[sp, r2, lsl #1]        @ encoding: [0x1d,0xf9,0x12,0xf0]
 @ CHECK: pli	[sp, r2]                @ encoding: [0x1d,0xf9,0x02,0xf0]
 
+@------------------------------------------------------------------------------
+@ POP (alias)
+@------------------------------------------------------------------------------
+        pop {r2, r9}
+
+@ CHECK: pop.w	{r2, r9}                @ encoding: [0xbd,0xe8,0x04,0x02]
+
+
+@------------------------------------------------------------------------------
+@ PUSH (alias)
+@------------------------------------------------------------------------------
+        push {r2, r9}
+
+@ CHECK: push.w	{r2, r9}                @ encoding: [0x2d,0xe9,0x04,0x02]
+
 
 @------------------------------------------------------------------------------
 @ QADD/QADD16/QADD8
@@ -1609,11 +1727,19 @@ _func:
         rsbs r3, r12, #0xf
         rsb r1, #0xff
         rsb r1, r1, #0xff
+        rsb r11, r11, #0
+        rsb r9, #0
+        rsbs r3, r1, #0
+        rsb r3, r1, #0
 
 @ CHECK: rsb.w	r2, r5, #1044480        @ encoding: [0xc5,0xf5,0x7f,0x22]
 @ CHECK: rsbs.w	r3, r12, #15            @ encoding: [0xdc,0xf1,0x0f,0x03]
 @ CHECK: rsb.w	r1, r1, #255            @ encoding: [0xc1,0xf1,0xff,0x01]
 @ CHECK: rsb.w	r1, r1, #255            @ encoding: [0xc1,0xf1,0xff,0x01]
+@ CHECK: rsb.w	r11, r11, #0            @ encoding: [0xcb,0xf1,0x00,0x0b]
+@ CHECK: rsb.w	r9, r9, #0              @ encoding: [0xc9,0xf1,0x00,0x09]
+@ CHECK: rsbs	r3, r1, #0              @ encoding: [0x4b,0x42]
+@ CHECK: rsb.w	r3, r1, #0              @ encoding: [0xc1,0xf1,0x00,0x03]
 
 
 @------------------------------------------------------------------------------
@@ -2287,11 +2413,13 @@ _func:
         stmdb r4, {r5, r6}
         stmdb r5!, {r3, r8}
         stmea r5!, {r3, r8}
+        stmdb.w r5, {r0, r1}
 
 @ CHECK: stmdb	r4, {r4, r5, r8, r9}    @ encoding: [0x04,0xe9,0x30,0x03]
 @ CHECK: stmdb	r4, {r5, r6}            @ encoding: [0x04,0xe9,0x60,0x00]
 @ CHECK: stmdb	r5!, {r3, r8}           @ encoding: [0x25,0xe9,0x08,0x01]
 @ CHECK: stm.w	r5!, {r3, r8}           @ encoding: [0xa5,0xe8,0x08,0x01]
+@ CHECK: stmdb	r5, {r0, r1}            @ encoding: [0x05,0xe9,0x03,0x00]
 
 
 @------------------------------------------------------------------------------
@@ -2526,6 +2654,10 @@ _func:
         sub r12, r6, #0x100
         subw r12, r6, #0x100
         subs r1, r2, #0x1f0
+	sub r2, #1
+        sub r0, r0, #32
+        subs r2, r2, #56
+        subs r2, #56
 
 @ CHECK: itet	eq                      @ encoding: [0x0a,0xbf]
 @ CHECK: subeq	r1, r2, #4              @ encoding: [0x11,0x1f]
@@ -2538,6 +2670,10 @@ _func:
 @ CHECK: sub.w	r12, r6, #256           @ encoding: [0xa6,0xf5,0x80,0x7c]
 @ CHECK: subw	r12, r6, #256           @ encoding: [0xa6,0xf2,0x00,0x1c]
 @ CHECK: subs.w	r1, r2, #496            @ encoding: [0xb2,0xf5,0xf8,0x71]
+@ CHECK: sub.w	r2, r2, #1              @ encoding: [0xa2,0xf1,0x01,0x02]
+@ CHECK: sub.w	r0, r0, #32             @ encoding: [0xa0,0xf1,0x20,0x00]
+@ CHECK: subs	r2, #56                 @ encoding: [0x38,0x3a]
+@ CHECK: subs	r2, #56                 @ encoding: [0x38,0x3a]
 
 
 @------------------------------------------------------------------------------
@@ -2550,6 +2686,12 @@ _func:
         sub r4, r5, r6, asr #5
         sub r4, r5, r6, ror #5
         sub.w r5, r2, r12, rrx
+        sub r2, sp, ip
+        sub sp, sp, ip
+        sub sp, ip
+        sub.w r2, sp, ip
+        sub.w sp, sp, ip
+        sub.w sp, ip
 
 @ CHECK: sub.w	r4, r5, r6              @ encoding: [0xa5,0xeb,0x06,0x04]
 @ CHECK: sub.w	r4, r5, r6, lsl #5      @ encoding: [0xa5,0xeb,0x46,0x14]
@@ -2558,6 +2700,12 @@ _func:
 @ CHECK: sub.w	r4, r5, r6, asr #5      @ encoding: [0xa5,0xeb,0x66,0x14]
 @ CHECK: sub.w	r4, r5, r6, ror #5      @ encoding: [0xa5,0xeb,0x76,0x14]
 @ CHECK: sub.w r5, r2, r12, rrx         @ encoding: [0xa2,0xeb,0x3c,0x05]
+@ CHECK: sub.w	r2, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x02]
+@ CHECK: sub.w	sp, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x0d]
+@ CHECK: sub.w	sp, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x0d]
+@ CHECK: sub.w	r2, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x02]
+@ CHECK: sub.w	sp, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x0d]
+@ CHECK: sub.w	sp, sp, r12             @ encoding: [0xad,0xeb,0x0c,0x0d]
 
 
 @------------------------------------------------------------------------------
@@ -3211,3 +3359,30 @@ _func:
 @ CHECK: wfelt                          @ encoding: [0x20,0xbf]
 @ CHECK: wfige                          @ encoding: [0x30,0xbf]
 @ CHECK: yieldlt                        @ encoding: [0x10,0xbf]
+
+
+@------------------------------------------------------------------------------
+@ Alternate syntax for LDR*(literal) encodings
+@------------------------------------------------------------------------------
+        ldr r11, [pc, #-22]
+        ldrb r11, [pc, #-22]
+        ldrh r11, [pc, #-22]
+        ldrsb r11, [pc, #-22]
+        ldrsh r11, [pc, #-22]
+
+        ldr.w r11, [pc, #-22]
+        ldrb.w r11, [pc, #-22]
+        ldrh.w r11, [pc, #-22]
+        ldrsb.w r11, [pc, #-22]
+        ldrsh.w r11, [pc, #-22]
+
+@ CHECK: ldr.w	r11, [pc, #-22]         @ encoding: [0x5f,0xf8,0x16,0xb0]
+@ CHECK: ldrb.w	r11, [pc, #-22]         @ encoding: [0x1f,0xf8,0x16,0xb0]
+@ CHECK: ldrh.w	r11, [pc, #-22]         @ encoding: [0x3f,0xf8,0x16,0xb0]
+@ CHECK: ldrsb.w r11, [pc, #-22]        @ encoding: [0x1f,0xf9,0x16,0xb0]
+@ CHECK: ldrsh.w r11, [pc, #-22]        @ encoding: [0x3f,0xf9,0x16,0xb0]
+@ CHECK: ldr.w	r11, [pc, #-22]         @ encoding: [0x5f,0xf8,0x16,0xb0]
+@ CHECK: ldrb.w	r11, [pc, #-22]         @ encoding: [0x1f,0xf8,0x16,0xb0]
+@ CHECK: ldrh.w	r11, [pc, #-22]         @ encoding: [0x3f,0xf8,0x16,0xb0]
+@ CHECK: ldrsb.w r11, [pc, #-22]        @ encoding: [0x1f,0xf9,0x16,0xb0]
+@ CHECK: ldrsh.w r11, [pc, #-22]        @ encoding: [0x3f,0xf9,0x16,0xb0]
diff --git a/test/MC/ARM/cxx-global-constructor.ll b/test/MC/ARM/cxx-global-constructor.ll
new file mode 100644
index 000000000000..e06d2c73ed93
--- /dev/null
+++ b/test/MC/ARM/cxx-global-constructor.ll
@@ -0,0 +1,12 @@
+; RUN: llc %s -mtriple=armv7-linux-gnueabi -relocation-model=pic \
+; RUN: -filetype=obj -o - | elf-dump --dump-section-data | FileCheck %s
+
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }]
+
+define void @f() {
+  ret void
+}
+
+; Check for a relocation of type R_ARM_TARGET1.
+; CHECK: ('r_type', 0x26)
diff --git a/test/MC/ARM/darwin-ARM-reloc.s b/test/MC/ARM/darwin-ARM-reloc.s
deleted file mode 100644
index 86b45e07bf38..000000000000
--- a/test/MC/ARM/darwin-ARM-reloc.s
+++ /dev/null
@@ -1,171 +0,0 @@
-@ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
-@ RUN: FileCheck < %t.dump %s
-
-	.syntax unified
-        .text
-_f0:
-        bl _printf
-
-_f1:
-        bl _f0
-
-        .data
-_d0:
-Ld0_0:  
-        .long Lsc0_0 - Ld0_0
-        
-	.section	__TEXT,__cstring,cstring_literals
-Lsc0_0:
-        .long 0
-
-@ CHECK: ('cputype', 12)
-@ CHECK: ('cpusubtype', 9)
-@ CHECK: ('filetype', 1)
-@ CHECK: ('num_load_commands', 3)
-@ CHECK: ('load_commands_size', 364)
-@ CHECK: ('flag', 0)
-@ CHECK: ('load_commands', [
-@ CHECK:   # Load Command 0
-@ CHECK:  (('command', 1)
-@ CHECK:   ('size', 260)
-@ CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:   ('vm_addr', 0)
-@ CHECK:   ('vm_size', 16)
-@ CHECK:   ('file_offset', 392)
-@ CHECK:   ('file_size', 16)
-@ CHECK:   ('maxprot', 7)
-@ CHECK:   ('initprot', 7)
-@ CHECK:   ('num_sections', 3)
-@ CHECK:   ('flags', 0)
-@ CHECK:   ('sections', [
-@ CHECK:     # Section 0
-@ CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 0)
-@ CHECK:     ('size', 8)
-@ CHECK:     ('offset', 392)
-@ CHECK:     ('alignment', 0)
-@ CHECK:     ('reloc_offset', 408)
-@ CHECK:     ('num_reloc', 2)
-@ CHECK:     ('flags', 0x80000400)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:     # Relocation 0
-@ CHECK:     (('word-0', 0x4),
-@ CHECK:      ('word-1', 0x55000001)),
-@ CHECK:     # Relocation 1
-@ CHECK:     (('word-0', 0x0),
-@ CHECK:      ('word-1', 0x5d000003)),
-@ CHECK:   ])
-@ CHECK:   ('_section_data', 'feffffeb fdffffeb')
-@ CHECK:     # Section 1
-@ CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 8)
-@ CHECK:     ('size', 4)
-@ CHECK:     ('offset', 400)
-@ CHECK:     ('alignment', 0)
-@ CHECK:     ('reloc_offset', 424)
-@ CHECK:     ('num_reloc', 2)
-@ CHECK:     ('flags', 0x0)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:     # Relocation 0
-@ CHECK:     (('word-0', 0xa2000000),
-@ CHECK:      ('word-1', 0xc)),
-@ CHECK:     # Relocation 1
-@ CHECK:     (('word-0', 0xa1000000),
-@ CHECK:      ('word-1', 0x8)),
-@ CHECK:   ])
-@ CHECK:   ('_section_data', '04000000')
-@ CHECK:     # Section 2
-@ CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 12)
-@ CHECK:     ('size', 4)
-@ CHECK:     ('offset', 404)
-@ CHECK:     ('alignment', 0)
-@ CHECK:     ('reloc_offset', 0)
-@ CHECK:     ('num_reloc', 0)
-@ CHECK:     ('flags', 0x2)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:   ])
-@ CHECK:   ('_section_data', '00000000')
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK:   # Load Command 1
-@ CHECK:  (('command', 2)
-@ CHECK:   ('size', 24)
-@ CHECK:   ('symoff', 440)
-@ CHECK:   ('nsyms', 4)
-@ CHECK:   ('stroff', 488)
-@ CHECK:   ('strsize', 24)
-@ CHECK:   ('_string_data', '\x00_printf\x00_f0\x00_f1\x00_d0\x00\x00\x00\x00')
-@ CHECK:   ('_symbols', [
-@ CHECK:     # Symbol 0
-@ CHECK:    (('n_strx', 9)
-@ CHECK:     ('n_type', 0xe)
-@ CHECK:     ('n_sect', 1)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 0)
-@ CHECK:     ('_string', '_f0')
-@ CHECK:    ),
-@ CHECK:     # Symbol 1
-@ CHECK:    (('n_strx', 13)
-@ CHECK:     ('n_type', 0xe)
-@ CHECK:     ('n_sect', 1)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 4)
-@ CHECK:     ('_string', '_f1')
-@ CHECK:    ),
-@ CHECK:     # Symbol 2
-@ CHECK:    (('n_strx', 17)
-@ CHECK:     ('n_type', 0xe)
-@ CHECK:     ('n_sect', 2)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 8)
-@ CHECK:     ('_string', '_d0')
-@ CHECK:    ),
-@ CHECK:     # Symbol 3
-@ CHECK:    (('n_strx', 1)
-@ CHECK:     ('n_type', 0x1)
-@ CHECK:     ('n_sect', 0)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 0)
-@ CHECK:     ('_string', '_printf')
-@ CHECK:    ),
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK:   # Load Command 2
-@ CHECK:  (('command', 11)
-@ CHECK:   ('size', 80)
-@ CHECK:   ('ilocalsym', 0)
-@ CHECK:   ('nlocalsym', 3)
-@ CHECK:   ('iextdefsym', 3)
-@ CHECK:   ('nextdefsym', 0)
-@ CHECK:   ('iundefsym', 3)
-@ CHECK:   ('nundefsym', 1)
-@ CHECK:   ('tocoff', 0)
-@ CHECK:   ('ntoc', 0)
-@ CHECK:   ('modtaboff', 0)
-@ CHECK:   ('nmodtab', 0)
-@ CHECK:   ('extrefsymoff', 0)
-@ CHECK:   ('nextrefsyms', 0)
-@ CHECK:   ('indirectsymoff', 0)
-@ CHECK:   ('nindirectsyms', 0)
-@ CHECK:   ('extreloff', 0)
-@ CHECK:   ('nextrel', 0)
-@ CHECK:   ('locreloff', 0)
-@ CHECK:   ('nlocrel', 0)
-@ CHECK:   ('_indirect_symbols', [
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK: ])
diff --git a/test/MC/ARM/dg.exp b/test/MC/ARM/dg.exp
deleted file mode 100644
index 055fa2507d3c..000000000000
--- a/test/MC/ARM/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index f722dd7c070e..7da79c31dc35 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -216,7 +216,7 @@
         @ Out of order STM registers
         stmda sp!, {r5, r2}
 
-@ CHECK-ERRORS: error: register list not in ascending order
+@ CHECK-ERRORS: warning: register list not in ascending order
 @ CHECK-ERRORS:         stmda     sp!, {r5, r2}
 @ CHECK-ERRORS:                            ^
 
diff --git a/test/MC/ARM/dot-req.s b/test/MC/ARM/dot-req.s
new file mode 100644
index 000000000000..3b4cf5c80c00
--- /dev/null
+++ b/test/MC/ARM/dot-req.s
@@ -0,0 +1,11 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+        .syntax unified
+bar:
+fred .req r5
+        mov r11, fred
+.unreq fred
+fred .req r6
+        mov r1, fred
+
+@ CHECK: mov	r11, r5                 @ encoding: [0x05,0xb0,0xa0,0xe1]
+@ CHECK: mov	r1, r6                  @ encoding: [0x06,0x10,0xa0,0xe1]
diff --git a/test/MC/ARM/elf-reloc-01.ll b/test/MC/ARM/elf-reloc-01.ll
index e6efe7eb94c5..6899d92b50f8 100644
--- a/test/MC/ARM/elf-reloc-01.ll
+++ b/test/MC/ARM/elf-reloc-01.ll
@@ -42,12 +42,12 @@ entry:
   ]
 
 bb:                                               ; preds = %entry
-  volatile store i32 11, i32* @var_tls, align 4
-  volatile store double 2.200000e+01, double* @var_tls_double, align 8
-  volatile store i32 33, i32* @var_static, align 4
-  volatile store double 4.400000e+01, double* @var_static_double, align 8
-  volatile store i32 55, i32* @var_global, align 4
-  volatile store double 6.600000e+01, double* @var_global_double, align 8
+  store volatile i32 11, i32* @var_tls, align 4
+  store volatile double 2.200000e+01, double* @var_tls_double, align 8
+  store volatile i32 33, i32* @var_static, align 4
+  store volatile double 4.400000e+01, double* @var_static_double, align 8
+  store volatile i32 55, i32* @var_global, align 4
+  store volatile double 6.600000e+01, double* @var_global_double, align 8
   br label %bb3
 
 bb2:                                              ; preds = %entry
diff --git a/test/MC/ARM/elf-reloc-condcall.s b/test/MC/ARM/elf-reloc-condcall.s
new file mode 100644
index 000000000000..dcc62d33c2f0
--- /dev/null
+++ b/test/MC/ARM/elf-reloc-condcall.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -triple=armv7-linux-gnueabi -filetype=obj %s -o - | \
+// RUN:   elf-dump | FileCheck -check-prefix=OBJ %s
+
+        bleq some_label
+        bl some_label
+        blx some_label
+// OBJ: .rel.text
+
+// OBJ: 'r_offset', 0x00000000
+// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT: 'r_type', 0x1d
+
+// OBJ: 'r_offset', 0x00000004
+// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT: 'r_type', 0x1c
+
+// OBJ: 'r_offset', 0x00000008
+// OBJ-NEXT:  'r_sym', 0x000004
+// OBJ-NEXT: 'r_type', 0x1c
+
+// OBJ: .symtab
+// OBJ: Symbol 4
+// OBJ-NEXT: some_label
+\ No newline at end of file
diff --git a/test/MC/ARM/elf-thumbfunc-reloc.s b/test/MC/ARM/elf-thumbfunc-reloc.s
new file mode 100644
index 000000000000..4a311dd51131
--- /dev/null
+++ b/test/MC/ARM/elf-thumbfunc-reloc.s
@@ -0,0 +1,23 @@
+@@ test st_value bit 0 of thumb function
+@ RUN: llvm-mc %s -triple=arm-freebsd-eabi -filetype=obj -o - | \
+@ RUN: elf-dump  | FileCheck %s
+
+
+	.syntax unified
+        .text
+        .globl  f
+        .align  2
+        .type   f,%function
+        .code   16
+        .thumb_func
+f:
+        push    {r7, lr}
+        mov     r7, sp
+        bl      g
+        pop     {r7, pc}
+
+@@ make sure an R_ARM_THM_CALL relocation is generated for the call to g
+@CHECK:        ('_relocations', [
+@CHECK:         (('r_offset', 0x00000004)
+@CHECK-NEXT:     ('r_sym', 0x{{[0-9a-fA-F]+}})
+@CHECK-NEXT:     ('r_type', 0x0a)
diff --git a/test/MC/ARM/lit.local.cfg b/test/MC/ARM/lit.local.cfg
new file mode 100644
index 000000000000..57009139616f
--- /dev/null
+++ b/test/MC/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/ARM/mode-switch.s b/test/MC/ARM/mode-switch.s
index 9d4995439e82..afcc082ba60d 100644
--- a/test/MC/ARM/mode-switch.s
+++ b/test/MC/ARM/mode-switch.s
@@ -13,3 +13,14 @@
 .code 16
         adds    r0, r0, r1
 @ CHECK: adds	r0, r0, r1              @ encoding: [0x40,0x18]
+
+.arm
+	add	r0, r0, r1
+@ CHECK: add	r0, r0, r1              @ encoding: [0x01,0x00,0x80,0xe0]
+
+.thumb
+	add.w	r0, r0, r1
+        adds    r0, r0, r1
+
+@ CHECK: add.w	r0, r0, r1              @ encoding: [0x00,0xeb,0x01,0x00]
+@ CHECK: adds	r0, r0, r1              @ encoding: [0x40,0x18]
diff --git a/test/MC/ARM/neon-add-encoding.s b/test/MC/ARM/neon-add-encoding.s
index e425397b7901..1fdfa4c13dfb 100644
--- a/test/MC/ARM/neon-add-encoding.s
+++ b/test/MC/ARM/neon-add-encoding.s
@@ -90,39 +90,81 @@
 @ CHECK: vrhadd.u32	q8, q8, q9      @ encoding: [0xe2,0x01,0x60,0xf3]
 	vrhadd.u32	q8, q8, q9
 
-@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
 	vqadd.s8	d16, d16, d17
-@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
 	vqadd.s16	d16, d16, d17
-@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
 	vqadd.s32	d16, d16, d17
-@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
 	vqadd.s64	d16, d16, d17
-@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
 	vqadd.u8	d16, d16, d17
-@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
 	vqadd.u16	d16, d16, d17
-@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
 	vqadd.u32	d16, d16, d17
-@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
 	vqadd.u64	d16, d16, d17
-@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+
+@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
+
 	vqadd.s8	q8, q8, q9
-@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
 	vqadd.s16	q8, q8, q9
-@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
 	vqadd.s32	q8, q8, q9
-@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
 	vqadd.s64	q8, q8, q9
-@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
 	vqadd.u8	q8, q8, q9
-@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
 	vqadd.u16	q8, q8, q9
-@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
 	vqadd.u32	q8, q8, q9
-@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
 	vqadd.u64	q8, q8, q9
 
+@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
+
+
+@ two-operand variants.
+	vqadd.s8	d16, d17
+	vqadd.s16	d16, d17
+	vqadd.s32	d16, d17
+	vqadd.s64	d16, d17
+	vqadd.u8	d16, d17
+	vqadd.u16	d16, d17
+	vqadd.u32	d16, d17
+	vqadd.u64	d16, d17
+
+@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
+
+	vqadd.s8	q8, q9
+	vqadd.s16	q8, q9
+	vqadd.s32	q8, q9
+	vqadd.s64	q8, q9
+	vqadd.u8	q8, q9
+	vqadd.u16	q8, q9
+	vqadd.u32	q8, q9
+	vqadd.u64	q8, q9
+
+@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
+@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
+@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
+@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
+@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
+@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
+@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
+
+
 @ CHECK: vaddhn.i16	d16, q8, q9     @ encoding: [0xa2,0x04,0xc0,0xf2]
 	vaddhn.i16	d16, q8, q9
 @ CHECK: vaddhn.i32	d16, q8, q9     @ encoding: [0xa2,0x04,0xd0,0xf2]
@@ -135,3 +177,43 @@
 	vraddhn.i32	d16, q8, q9
 @ CHECK: vraddhn.i64	d16, q8, q9     @ encoding: [0xa2,0x04,0xe0,0xf3]
 	vraddhn.i64	d16, q8, q9
+
+
+@ Two-operand variants
+
+	vadd.i8  d6, d5
+	vadd.i16 d7, d1
+	vadd.i32 d8, d2
+	vadd.i64 d9, d3
+
+	vadd.i8  q6, q5
+	vadd.i16 q7, q1
+	vadd.i32 q8, q2
+	vadd.i64 q9, q3
+
+@ CHECK: vadd.i8	d6, d6, d5      @ encoding: [0x05,0x68,0x06,0xf2]
+@ CHECK: vadd.i16	d7, d7, d1      @ encoding: [0x01,0x78,0x17,0xf2]
+@ CHECK: vadd.i32	d8, d8, d2      @ encoding: [0x02,0x88,0x28,0xf2]
+@ CHECK: vadd.i64	d9, d9, d3      @ encoding: [0x03,0x98,0x39,0xf2]
+
+@ CHECK: vadd.i8	q6, q6, q5      @ encoding: [0x4a,0xc8,0x0c,0xf2]
+@ CHECK: vadd.i16	q7, q7, q1      @ encoding: [0x42,0xe8,0x1e,0xf2]
+@ CHECK: vadd.i32	q8, q8, q2      @ encoding: [0xc4,0x08,0x60,0xf2]
+@ CHECK: vadd.i64	q9, q9, q3      @ encoding: [0xc6,0x28,0x72,0xf2]
+
+
+	vaddw.s8  q6, d5
+	vaddw.s16 q7, d1
+	vaddw.s32 q8, d2
+
+	vaddw.u8  q6, d5
+	vaddw.u16 q7, d1
+	vaddw.u32 q8, d2
+
+@ CHECK: vaddw.s8	q6, q6, d5      @ encoding: [0x05,0xc1,0x8c,0xf2]
+@ CHECK: vaddw.s16	q7, q7, d1      @ encoding: [0x01,0xe1,0x9e,0xf2]
+@ CHECK: vaddw.s32	q8, q8, d2      @ encoding: [0x82,0x01,0xe0,0xf2]
+
+@ CHECK: vaddw.u8	q6, q6, d5      @ encoding: [0x05,0xc1,0x8c,0xf3]
+@ CHECK: vaddw.u16	q7, q7, d1      @ encoding: [0x01,0xe1,0x9e,0xf3]
+@ CHECK: vaddw.u32	q8, q8, d2      @ encoding: [0x82,0x01,0xe0,0xf3]
diff --git a/test/MC/ARM/neon-bitwise-encoding.s b/test/MC/ARM/neon-bitwise-encoding.s
index 81e2c4d099bf..2ce9bccf6727 100644
--- a/test/MC/ARM/neon-bitwise-encoding.s
+++ b/test/MC/ARM/neon-bitwise-encoding.s
@@ -22,9 +22,9 @@
 	vorr.i32	q8, #0x1000000
 	vorr.i32	q8, #0x0
 
-@ FIXME: vorr.i32	d16, #0x1000000 @ encoding: [0x11,0x07,0xc0,0xf2]
-@ FIXME: vorr.i32	q8, #0x1000000  @ encoding: [0x51,0x07,0xc0,0xf2]
-@ FIXME: vorr.i32	q8, #0x0        @ encoding: [0x50,0x01,0xc0,0xf2]
+@ CHECK: vorr.i32	d16, #0x1000000 @ encoding: [0x11,0x07,0xc0,0xf2]
+@ CHECK: vorr.i32	q8, #0x1000000  @ encoding: [0x51,0x07,0xc0,0xf2]
+@ CHECK: vorr.i32	q8, #0x0        @ encoding: [0x50,0x01,0xc0,0xf2]
 
 	vbic	d16, d17, d16
 	vbic	q8, q8, q9
@@ -33,8 +33,8 @@
 
 @ CHECK: vbic	d16, d17, d16           @ encoding: [0xb0,0x01,0x51,0xf2]
 @ CHECK: vbic	q8, q8, q9              @ encoding: [0xf2,0x01,0x50,0xf2]
-@ FIXME: vbic.i32	d16, #0xFF000000 @ encoding: [0x3f,0x07,0xc7,0xf3]
-@ FIXME: vbic.i32	q8, #0xFF000000 @ encoding: [0x7f,0x07,0xc7,0xf3]
+@ CHECK: vbic.i32	d16, #0xff000000 @ encoding: [0x3f,0x07,0xc7,0xf3]
+@ CHECK: vbic.i32	q8, #0xff000000 @ encoding: [0x7f,0x07,0xc7,0xf3]
 
 	vorn	d16, d17, d16
 	vorn	q8, q8, q9
@@ -53,3 +53,211 @@
 
 @ CHECK: vbsl	d18, d17, d16           @ encoding: [0xb0,0x21,0x51,0xf3]
 @ CHECK: vbsl	q8, q10, q9             @ encoding: [0xf2,0x01,0x54,0xf3]
+
+
+@ Size suffices are optional.
+        veor q4, q7, q3
+        veor.8 q4, q7, q3
+        veor.16 q4, q7, q3
+        veor.32 q4, q7, q3
+        veor.64 q4, q7, q3
+
+        veor.i8 q4, q7, q3
+        veor.i16 q4, q7, q3
+        veor.i32 q4, q7, q3
+        veor.i64 q4, q7, q3
+
+        veor.s8 q4, q7, q3
+        veor.s16 q4, q7, q3
+        veor.s32 q4, q7, q3
+        veor.s64 q4, q7, q3
+
+        veor.u8 q4, q7, q3
+        veor.u16 q4, q7, q3
+        veor.u32 q4, q7, q3
+        veor.u64 q4, q7, q3
+
+        veor.p8 q4, q7, q3
+        veor.p16 q4, q7, q3
+        veor.f32 q4, q7, q3
+        veor.f64 q4, q7, q3
+
+        veor.f q4, q7, q3
+        veor.d q4, q7, q3
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+@ CHECK: veor	q4, q7, q3              @ encoding: [0x56,0x81,0x0e,0xf3]
+
+
+        vand d4, d7, d3
+        vand.8 d4, d7, d3
+        vand.16 d4, d7, d3
+        vand.32 d4, d7, d3
+        vand.64 d4, d7, d3
+
+        vand.i8 d4, d7, d3
+        vand.i16 d4, d7, d3
+        vand.i32 d4, d7, d3
+        vand.i64 d4, d7, d3
+
+        vand.s8 d4, d7, d3
+        vand.s16 d4, d7, d3
+        vand.s32 d4, d7, d3
+        vand.s64 d4, d7, d3
+
+        vand.u8 d4, d7, d3
+        vand.u16 d4, d7, d3
+        vand.u32 d4, d7, d3
+        vand.u64 d4, d7, d3
+
+        vand.p8 d4, d7, d3
+        vand.p16 d4, d7, d3
+        vand.f32 d4, d7, d3
+        vand.f64 d4, d7, d3
+
+        vand.f d4, d7, d3
+        vand.d d4, d7, d3
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+@ CHECK: vand	d4, d7, d3              @ encoding: [0x13,0x41,0x07,0xf2]
+
+        vorr d4, d7, d3
+        vorr.8 d4, d7, d3
+        vorr.16 d4, d7, d3
+        vorr.32 d4, d7, d3
+        vorr.64 d4, d7, d3
+
+        vorr.i8 d4, d7, d3
+        vorr.i16 d4, d7, d3
+        vorr.i32 d4, d7, d3
+        vorr.i64 d4, d7, d3
+
+        vorr.s8 d4, d7, d3
+        vorr.s16 d4, d7, d3
+        vorr.s32 q4, q7, q3
+        vorr.s64 q4, q7, q3
+
+        vorr.u8 q4, q7, q3
+        vorr.u16 q4, q7, q3
+        vorr.u32 q4, q7, q3
+        vorr.u64 q4, q7, q3
+
+        vorr.p8 q4, q7, q3
+        vorr.p16 q4, q7, q3
+        vorr.f32 q4, q7, q3
+        vorr.f64 q4, q7, q3
+
+        vorr.f q4, q7, q3
+        vorr.d q4, q7, q3
+
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	d4, d7, d3              @ encoding: [0x13,0x41,0x27,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+@ CHECK: vorr	q4, q7, q3              @ encoding: [0x56,0x81,0x2e,0xf2]
+
+@ Two-operand aliases
+	vand.s8  q6, q5
+	vand.s16 q7, q1
+	vand.s32 q8, q2
+	vand.f64 q8, q2
+
+	veor.8   q6, q5
+	veor.p16 q7, q1
+	veor.u32 q8, q2
+	veor.d   q8, q2
+
+	veor.i8  q6, q5
+	veor.16  q7, q1
+	veor.f   q8, q2
+	veor.i64 q8, q2
+
+@ CHECK: vand	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf2]
+@ CHECK: vand	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf2]
+@ CHECK: vand	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf2]
+@ CHECK: vand	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf2]
+
+@ CHECK: veor	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf3]
+@ CHECK: veor	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+
+@ CHECK: veor	q6, q6, q5              @ encoding: [0x5a,0xc1,0x0c,0xf3]
+@ CHECK: veor	q7, q7, q1              @ encoding: [0x52,0xe1,0x0e,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
+@ CHECK: veor	q8, q8, q2              @ encoding: [0xd4,0x01,0x40,0xf3]
diff --git a/test/MC/ARM/neon-cmp-encoding.s b/test/MC/ARM/neon-cmp-encoding.s
index d94e2f72079f..b3aedb8e52ec 100644
--- a/test/MC/ARM/neon-cmp-encoding.s
+++ b/test/MC/ARM/neon-cmp-encoding.s
@@ -111,3 +111,66 @@
 @ CHECK: vcle.s8	d16, d16, #0    @ encoding: [0xa0,0x01,0xf1,0xf3]
 @ CHECK: vcgt.s8	d16, d16, #0    @ encoding: [0x20,0x00,0xf1,0xf3]
 @ CHECK: vclt.s8	d16, d16, #0    @ encoding: [0x20,0x02,0xf1,0xf3]
+
+
+        vclt.s8 q12, q13, q3
+        vclt.s16 q12, q13, q3
+        vclt.s32 q12, q13, q3
+        vclt.u8 q12, q13, q3
+        vclt.u16 q12, q13, q3
+        vclt.u32 q12, q13, q3
+        vclt.f32 q12, q13, q3
+
+        vclt.s8 d12, d13, d3
+        vclt.s16 d12, d13, d3
+        vclt.s32 d12, d13, d3
+        vclt.u8 d12, d13, d3
+        vclt.u16 d12, d13, d3
+        vclt.u32 d12, d13, d3
+        vclt.f32 d12, d13, d3
+
+@ CHECK: vcgt.s8	q12, q3, q13    @ encoding: [0x6a,0x83,0x46,0xf2]
+@ CHECK: vcgt.s16	q12, q3, q13    @ encoding: [0x6a,0x83,0x56,0xf2]
+@ CHECK: vcgt.s32	q12, q3, q13    @ encoding: [0x6a,0x83,0x66,0xf2]
+@ CHECK: vcgt.u8	q12, q3, q13    @ encoding: [0x6a,0x83,0x46,0xf3]
+@ CHECK: vcgt.u16	q12, q3, q13    @ encoding: [0x6a,0x83,0x56,0xf3]
+@ CHECK: vcgt.u32	q12, q3, q13    @ encoding: [0x6a,0x83,0x66,0xf3]
+@ CHECK: vcgt.f32	q12, q3, q13    @ encoding: [0x6a,0x8e,0x66,0xf3]
+
+@ CHECK: vcgt.s8	d12, d3, d13    @ encoding: [0x0d,0xc3,0x03,0xf2]
+@ CHECK: vcgt.s16	d12, d3, d13    @ encoding: [0x0d,0xc3,0x13,0xf2]
+@ CHECK: vcgt.s32	d12, d3, d13    @ encoding: [0x0d,0xc3,0x23,0xf2]
+@ CHECK: vcgt.u8	d12, d3, d13    @ encoding: [0x0d,0xc3,0x03,0xf3]
+@ CHECK: vcgt.u16	d12, d3, d13    @ encoding: [0x0d,0xc3,0x13,0xf3]
+@ CHECK: vcgt.u32	d12, d3, d13    @ encoding: [0x0d,0xc3,0x23,0xf3]
+@ CHECK: vcgt.f32	d12, d3, d13    @ encoding: [0x0d,0xce,0x23,0xf3]
+
+	vcle.s8	d16, d16, d17
+	vcle.s16 d16, d16, d17
+	vcle.s32 d16, d16, d17
+	vcle.u8	d16, d16, d17
+	vcle.u16 d16, d16, d17
+	vcle.u32 d16, d16, d17
+	vcle.f32 d16, d16, d17
+	vcle.s8	q8, q8, q9
+	vcle.s16 q8, q8, q9
+	vcle.s32 q8, q8, q9
+	vcle.u8	q8, q8, q9
+	vcle.u16 q8, q8, q9
+	vcle.u32 q8, q8, q9
+	vcle.f32 q8, q8, q9
+
+@ CHECK: vcge.s8	d16, d17, d16           @ encoding: [0xb0,0x03,0x41,0xf2]
+@ CHECK: vcge.s16	d16, d17, d16   @ encoding: [0xb0,0x03,0x51,0xf2]
+@ CHECK: vcge.s32	d16, d17, d16   @ encoding: [0xb0,0x03,0x61,0xf2]
+@ CHECK: vcge.u8	d16, d17, d16           @ encoding: [0xb0,0x03,0x41,0xf3]
+@ CHECK: vcge.u16	d16, d17, d16   @ encoding: [0xb0,0x03,0x51,0xf3]
+@ CHECK: vcge.u32	d16, d17, d16   @ encoding: [0xb0,0x03,0x61,0xf3]
+@ CHECK: vcge.f32	d16, d17, d16   @ encoding: [0xa0,0x0e,0x41,0xf3]
+@ CHECK: vcge.s8	q8, q9, q8              @ encoding: [0xf0,0x03,0x42,0xf2]
+@ CHECK: vcge.s16	q8, q9, q8      @ encoding: [0xf0,0x03,0x52,0xf2]
+@ CHECK: vcge.s32	q8, q9, q8      @ encoding: [0xf0,0x03,0x62,0xf2]
+@ CHECK: vcge.u8	q8, q9, q8              @ encoding: [0xf0,0x03,0x42,0xf3]
+@ CHECK: vcge.u16	q8, q9, q8      @ encoding: [0xf0,0x03,0x52,0xf3]
+@ CHECK: vcge.u32	q8, q9, q8      @ encoding: [0xf0,0x03,0x62,0xf3]
+@ CHECK: vcge.f32	q8, q9, q8      @ encoding: [0xe0,0x0e,0x42,0xf3]
diff --git a/test/MC/ARM/neon-minmax-encoding.s b/test/MC/ARM/neon-minmax-encoding.s
index 2d0d8c9b8aeb..b1eb258b0757 100644
--- a/test/MC/ARM/neon-minmax-encoding.s
+++ b/test/MC/ARM/neon-minmax-encoding.s
@@ -1,58 +1,124 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vmin.s8	d16, d16, d17           @ encoding: [0xb1,0x06,0x40,0xf2]
-	vmin.s8	d16, d16, d17
-@ CHECK: vmin.s16	d16, d16, d17   @ encoding: [0xb1,0x06,0x50,0xf2]
-	vmin.s16	d16, d16, d17
-@ CHECK: vmin.s32	d16, d16, d17   @ encoding: [0xb1,0x06,0x60,0xf2]
-	vmin.s32	d16, d16, d17
-@ CHECK: vmin.u8	d16, d16, d17           @ encoding: [0xb1,0x06,0x40,0xf3]
-	vmin.u8	d16, d16, d17
-@ CHECK: vmin.u16	d16, d16, d17   @ encoding: [0xb1,0x06,0x50,0xf3]
-	vmin.u16	d16, d16, d17
-@ CHECK: vmin.u32	d16, d16, d17   @ encoding: [0xb1,0x06,0x60,0xf3]
-	vmin.u32	d16, d16, d17
-@ CHECK: vmin.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x60,0xf2]
-	vmin.f32	d16, d16, d17
-@ CHECK: vmin.s8	q8, q8, q9              @ encoding: [0xf2,0x06,0x40,0xf2]
-	vmin.s8	q8, q8, q9
-@ CHECK: vmin.s16	q8, q8, q9      @ encoding: [0xf2,0x06,0x50,0xf2]
-	vmin.s16	q8, q8, q9
-@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf2]
-	vmin.s32	q8, q8, q9
-@ CHECK: vmin.u8	q8, q8, q9              @ encoding: [0xf2,0x06,0x40,0xf3]
-	vmin.u8	q8, q8, q9
-@ CHECK: vmin.u16	q8, q8, q9      @ encoding: [0xf2,0x06,0x50,0xf3]
-	vmin.u16	q8, q8, q9
-@ CHECK: vmin.u32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf3]
-	vmin.u32	q8, q8, q9
-@ CHECK: vmin.f32	q8, q8, q9      @ encoding: [0xe2,0x0f,0x60,0xf2]
-	vmin.f32	q8, q8, q9
-@ CHECK: vmax.s8	d16, d16, d17           @ encoding: [0xa1,0x06,0x40,0xf2]
-	vmax.s8	d16, d16, d17
-@ CHECK: vmax.s16	d16, d16, d17   @ encoding: [0xa1,0x06,0x50,0xf2]
-	vmax.s16	d16, d16, d17
-@ CHECK: vmax.s32	d16, d16, d17   @ encoding: [0xa1,0x06,0x60,0xf2]
-	vmax.s32	d16, d16, d17
-@ CHECK: vmax.u8	d16, d16, d17           @ encoding: [0xa1,0x06,0x40,0xf3]
-	vmax.u8	d16, d16, d17
-@ CHECK: vmax.u16	d16, d16, d17   @ encoding: [0xa1,0x06,0x50,0xf3]
-	vmax.u16	d16, d16, d17
-@ CHECK: vmax.u32	d16, d16, d17   @ encoding: [0xa1,0x06,0x60,0xf3]
-	vmax.u32	d16, d16, d17
-@ CHECK: vmax.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x40,0xf2]
-	vmax.f32	d16, d16, d17
-@ CHECK: vmax.s8	q8, q8, q9              @ encoding: [0xe2,0x06,0x40,0xf2]
-	vmax.s8	q8, q8, q9
-@ CHECK: vmax.s16	q8, q8, q9      @ encoding: [0xe2,0x06,0x50,0xf2]
-	vmax.s16	q8, q8, q9
+        vmax.s8 d1, d2, d3
+        vmax.s16 d4, d5, d6
+        vmax.s32 d7, d8, d9
+        vmax.u8 d10, d11, d12
+        vmax.u16 d13, d14, d15
+        vmax.u32 d16, d17, d18
+        vmax.f32 d19, d20, d21
+
+        vmax.s8 d2, d3
+        vmax.s16 d5, d6
+        vmax.s32 d8, d9
+        vmax.u8 d11, d12
+        vmax.u16 d14, d15
+        vmax.u32 d17, d18
+        vmax.f32 d20, d21
+
+        vmax.s8 q1, q2, q3
+        vmax.s16 q4, q5, q6
+        vmax.s32 q7, q8, q9
+        vmax.u8 q10, q11, q12
+        vmax.u16 q13, q14, q15
+        vmax.u32 q6, q7, q8
+        vmax.f32 q9, q5, q1
+
+        vmax.s8 q2, q3
+        vmax.s16 q5, q6
+        vmax.s32 q8, q9
+        vmax.u8 q11, q2
+        vmax.u16 q4, q5
+        vmax.u32 q7, q8
+        vmax.f32 q2, q1
+
+@ CHECK: vmax.s8	d1, d2, d3      @ encoding: [0x03,0x16,0x02,0xf2]
+@ CHECK: vmax.s16	d4, d5, d6      @ encoding: [0x06,0x46,0x15,0xf2]
+@ CHECK: vmax.s32	d7, d8, d9      @ encoding: [0x09,0x76,0x28,0xf2]
+@ CHECK: vmax.u8	d10, d11, d12   @ encoding: [0x0c,0xa6,0x0b,0xf3]
+@ CHECK: vmax.u16	d13, d14, d15   @ encoding: [0x0f,0xd6,0x1e,0xf3]
+@ CHECK: vmax.u32	d16, d17, d18   @ encoding: [0xa2,0x06,0x61,0xf3]
+@ CHECK: vmax.f32	d19, d20, d21   @ encoding: [0xa5,0x3f,0x44,0xf2]
+@ CHECK: vmax.s8	d2, d2, d3      @ encoding: [0x03,0x26,0x02,0xf2]
+@ CHECK: vmax.s16	d5, d5, d6      @ encoding: [0x06,0x56,0x15,0xf2]
+@ CHECK: vmax.s32	d8, d8, d9      @ encoding: [0x09,0x86,0x28,0xf2]
+@ CHECK: vmax.u8	d11, d11, d12   @ encoding: [0x0c,0xb6,0x0b,0xf3]
+@ CHECK: vmax.u16	d14, d14, d15   @ encoding: [0x0f,0xe6,0x1e,0xf3]
+@ CHECK: vmax.u32	d17, d17, d18   @ encoding: [0xa2,0x16,0x61,0xf3]
+@ CHECK: vmax.f32	d20, d20, d21   @ encoding: [0xa5,0x4f,0x44,0xf2]
+@ CHECK: vmax.s8	q1, q2, q3      @ encoding: [0x46,0x26,0x04,0xf2]
+@ CHECK: vmax.s16	q4, q5, q6      @ encoding: [0x4c,0x86,0x1a,0xf2]
+@ CHECK: vmax.s32	q7, q8, q9      @ encoding: [0xe2,0xe6,0x20,0xf2]
+@ CHECK: vmax.u8	q10, q11, q12   @ encoding: [0xe8,0x46,0x46,0xf3]
+@ CHECK: vmax.u16	q13, q14, q15   @ encoding: [0xee,0xa6,0x5c,0xf3]
+@ CHECK: vmax.u32	q6, q7, q8      @ encoding: [0x60,0xc6,0x2e,0xf3]
+@ CHECK: vmax.f32	q9, q5, q1      @ encoding: [0x42,0x2f,0x4a,0xf2]
+@ CHECK: vmax.s8	q2, q2, q3      @ encoding: [0x46,0x46,0x04,0xf2]
+@ CHECK: vmax.s16	q5, q5, q6      @ encoding: [0x4c,0xa6,0x1a,0xf2]
 @ CHECK: vmax.s32	q8, q8, q9      @ encoding: [0xe2,0x06,0x60,0xf2]
-	vmax.s32	q8, q8, q9
-@ CHECK: vmax.u8	q8, q8, q9              @ encoding: [0xe2,0x06,0x40,0xf3]
-	vmax.u8	q8, q8, q9
-@ CHECK: vmax.u16	q8, q8, q9      @ encoding: [0xe2,0x06,0x50,0xf3]
-	vmax.u16	q8, q8, q9
-@ CHECK: vmax.u32	q8, q8, q9      @ encoding: [0xe2,0x06,0x60,0xf3]
-	vmax.u32	q8, q8, q9
-@ CHECK: vmax.f32	q8, q8, q9      @ encoding: [0xe2,0x0f,0x40,0xf2]
-	vmax.f32	q8, q8, q9
+@ CHECK: vmax.u8	q11, q11, q2    @ encoding: [0xc4,0x66,0x46,0xf3]
+@ CHECK: vmax.u16	q4, q4, q5      @ encoding: [0x4a,0x86,0x18,0xf3]
+@ CHECK: vmax.u32	q7, q7, q8      @ encoding: [0x60,0xe6,0x2e,0xf3]
+@ CHECK: vmax.f32	q2, q2, q1      @ encoding: [0x42,0x4f,0x04,0xf2]
+
+
+        vmin.s8 d1, d2, d3
+        vmin.s16 d4, d5, d6
+        vmin.s32 d7, d8, d9
+        vmin.u8 d10, d11, d12
+        vmin.u16 d13, d14, d15
+        vmin.u32 d16, d17, d18
+        vmin.f32 d19, d20, d21
+
+        vmin.s8 d2, d3
+        vmin.s16 d5, d6
+        vmin.s32 d8, d9
+        vmin.u8 d11, d12
+        vmin.u16 d14, d15
+        vmin.u32 d17, d18
+        vmin.f32 d20, d21
+
+        vmin.s8 q1, q2, q3
+        vmin.s16 q4, q5, q6
+        vmin.s32 q7, q8, q9
+        vmin.u8 q10, q11, q12
+        vmin.u16 q13, q14, q15
+        vmin.u32 q6, q7, q8
+        vmin.f32 q9, q5, q1
+
+        vmin.s8 q2, q3
+        vmin.s16 q5, q6
+        vmin.s32 q8, q9
+        vmin.u8 q11, q2
+        vmin.u16 q4, q5
+        vmin.u32 q7, q8
+        vmin.f32 q2, q1
+
+@ CHECK: vmin.s8	d1, d2, d3      @ encoding: [0x13,0x16,0x02,0xf2]
+@ CHECK: vmin.s16	d4, d5, d6      @ encoding: [0x16,0x46,0x15,0xf2]
+@ CHECK: vmin.s32	d7, d8, d9      @ encoding: [0x19,0x76,0x28,0xf2]
+@ CHECK: vmin.u8	d10, d11, d12   @ encoding: [0x1c,0xa6,0x0b,0xf3]
+@ CHECK: vmin.u16	d13, d14, d15   @ encoding: [0x1f,0xd6,0x1e,0xf3]
+@ CHECK: vmin.u32	d16, d17, d18   @ encoding: [0xb2,0x06,0x61,0xf3]
+@ CHECK: vmin.f32	d19, d20, d21   @ encoding: [0xa5,0x3f,0x64,0xf2]
+@ CHECK: vmin.s8	d2, d2, d3      @ encoding: [0x13,0x26,0x02,0xf2]
+@ CHECK: vmin.s16	d5, d5, d6      @ encoding: [0x16,0x56,0x15,0xf2]
+@ CHECK: vmin.s32	d8, d8, d9      @ encoding: [0x19,0x86,0x28,0xf2]
+@ CHECK: vmin.u8	d11, d11, d12   @ encoding: [0x1c,0xb6,0x0b,0xf3]
+@ CHECK: vmin.u16	d14, d14, d15   @ encoding: [0x1f,0xe6,0x1e,0xf3]
+@ CHECK: vmin.u32	d17, d17, d18   @ encoding: [0xb2,0x16,0x61,0xf3]
+@ CHECK: vmin.f32	d20, d20, d21   @ encoding: [0xa5,0x4f,0x64,0xf2]
+@ CHECK: vmin.s8	q1, q2, q3      @ encoding: [0x56,0x26,0x04,0xf2]
+@ CHECK: vmin.s16	q4, q5, q6      @ encoding: [0x5c,0x86,0x1a,0xf2]
+@ CHECK: vmin.s32	q7, q8, q9      @ encoding: [0xf2,0xe6,0x20,0xf2]
+@ CHECK: vmin.u8	q10, q11, q12   @ encoding: [0xf8,0x46,0x46,0xf3]
+@ CHECK: vmin.u16	q13, q14, q15   @ encoding: [0xfe,0xa6,0x5c,0xf3]
+@ CHECK: vmin.u32	q6, q7, q8      @ encoding: [0x70,0xc6,0x2e,0xf3]
+@ CHECK: vmin.f32	q9, q5, q1      @ encoding: [0x42,0x2f,0x6a,0xf2]
+@ CHECK: vmin.s8	q2, q2, q3      @ encoding: [0x56,0x46,0x04,0xf2]
+@ CHECK: vmin.s16	q5, q5, q6      @ encoding: [0x5c,0xa6,0x1a,0xf2]
+@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf2]
+@ CHECK: vmin.u8	q11, q11, q2    @ encoding: [0xd4,0x66,0x46,0xf3]
+@ CHECK: vmin.u16	q4, q4, q5      @ encoding: [0x5a,0x86,0x18,0xf3]
+@ CHECK: vmin.u32	q7, q7, q8      @ encoding: [0x70,0xe6,0x2e,0xf3]
+@ CHECK: vmin.f32	q2, q2, q1      @ encoding: [0x42,0x4f,0x24,0xf2]
diff --git a/test/MC/ARM/neon-mov-encoding.s b/test/MC/ARM/neon-mov-encoding.s
index 02eec1215ab7..6f26a13c3ea9 100644
--- a/test/MC/ARM/neon-mov-encoding.s
+++ b/test/MC/ARM/neon-mov-encoding.s
@@ -1,5 +1,4 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 	vmov.i8		d16, #0x8
 	vmov.i16	d16, #0x10
@@ -19,9 +18,9 @@
 @ CHECK: vmov.i32	d16, #0x2000    @ encoding: [0x10,0x02,0xc2,0xf2]
 @ CHECK: vmov.i32	d16, #0x200000  @ encoding: [0x10,0x04,0xc2,0xf2]
 @ CHECK: vmov.i32	d16, #0x20000000 @ encoding: [0x10,0x06,0xc2,0xf2]
-@ CHECK: vmov.i32	d16, #0x20FF    @ encoding: [0x10,0x0c,0xc2,0xf2]
-@ CHECK: vmov.i32	d16, #0x20FFFF  @ encoding: [0x10,0x0d,0xc2,0xf2]
-@ CHECK: vmov.i64 d16, #0xFF0000FF0000FFFF @ encoding: [0x33,0x0e,0xc1,0xf3]
+@ CHECK: vmov.i32	d16, #0x20ff    @ encoding: [0x10,0x0c,0xc2,0xf2]
+@ CHECK: vmov.i32	d16, #0x20ffff  @ encoding: [0x10,0x0d,0xc2,0xf2]
+@ CHECK: vmov.i64 d16, #0xff0000ff0000ffff @ encoding: [0x33,0x0e,0xc1,0xf3]
 
 
 
@@ -43,9 +42,9 @@
 @ CHECK: vmov.i32	q8, #0x2000     @ encoding: [0x50,0x02,0xc2,0xf2]
 @ CHECK: vmov.i32	q8, #0x200000   @ encoding: [0x50,0x04,0xc2,0xf2]
 @ CHECK: vmov.i32	q8, #0x20000000 @ encoding: [0x50,0x06,0xc2,0xf2]
-@ CHECK: vmov.i32	q8, #0x20FF     @ encoding: [0x50,0x0c,0xc2,0xf2]
-@ CHECK: vmov.i32	q8, #0x20FFFF   @ encoding: [0x50,0x0d,0xc2,0xf2]
-@ CHECK: vmov.i64 q8, #0xFF0000FF0000FFFF @ encoding: [0x73,0x0e,0xc1,0xf3]
+@ CHECK: vmov.i32	q8, #0x20ff     @ encoding: [0x50,0x0c,0xc2,0xf2]
+@ CHECK: vmov.i32	q8, #0x20ffff   @ encoding: [0x50,0x0d,0xc2,0xf2]
+@ CHECK: vmov.i64 q8, #0xff0000ff0000ffff @ encoding: [0x73,0x0e,0xc1,0xf3]
 
 	vmvn.i16	d16, #0x10
 	vmvn.i16	d16, #0x1000
@@ -62,8 +61,8 @@
 @ CHECK: vmvn.i32	d16, #0x2000    @ encoding: [0x30,0x02,0xc2,0xf2]
 @ CHECK: vmvn.i32	d16, #0x200000  @ encoding: [0x30,0x04,0xc2,0xf2]
 @ CHECK: vmvn.i32	d16, #0x20000000 @ encoding: [0x30,0x06,0xc2,0xf2]
-@ CHECK: vmvn.i32	d16, #0x20FF    @ encoding: [0x30,0x0c,0xc2,0xf2]
-@ CHECK: vmvn.i32	d16, #0x20FFFF  @ encoding: [0x30,0x0d,0xc2,0xf2]
+@ CHECK: vmvn.i32	d16, #0x20ff    @ encoding: [0x30,0x0c,0xc2,0xf2]
+@ CHECK: vmvn.i32	d16, #0x20ffff  @ encoding: [0x30,0x0d,0xc2,0xf2]
 
 	vmovl.s8	q8, d16
 	vmovl.s16	q8, d16
diff --git a/test/MC/ARM/neon-mul-accum-encoding.s b/test/MC/ARM/neon-mul-accum-encoding.s
index ed9ceb3ecb00..e71ad7121cc6 100644
--- a/test/MC/ARM/neon-mul-accum-encoding.s
+++ b/test/MC/ARM/neon-mul-accum-encoding.s
@@ -1,66 +1,94 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vmla.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xf2]
 	vmla.i8	d16, d18, d17
-@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf2]
 	vmla.i16	d16, d18, d17
-@ CHECK: vmla.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf2]
 	vmla.i32	d16, d18, d17
-@ CHECK: vmla.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x42,0xf2]
 	vmla.f32	d16, d18, d17
-@ CHECK: vmla.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xf2]
 	vmla.i8	q9, q8, q10
-@ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf2]
 	vmla.i16	q9, q8, q10
-@ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf2]
 	vmla.i32	q9, q8, q10
-@ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x40,0xf2]
 	vmla.f32	q9, q8, q10
-@ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf2]
+	vmla.i32	q12, q8, d3[0]
+
+@ CHECK: vmla.i8	d16, d18, d17   @ encoding: [0xa1,0x09,0x42,0xf2]
+@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf2]
+@ CHECK: vmla.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf2]
+@ CHECK: vmla.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x42,0xf2]
+@ CHECK: vmla.i8	q9, q8, q10     @ encoding: [0xe4,0x29,0x40,0xf2]
+@ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf2]
+@ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf2]
+@ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x40,0xf2]
+@ CHECK: vmla.i32	q12, q8, d3[0]  @ encoding: [0xc3,0x80,0xe0,0xf3]
+
+
 	vmlal.s8	q8, d19, d18
-@ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf2]
 	vmlal.s16	q8, d19, d18
-@ CHECK: vmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf2]
 	vmlal.s32	q8, d19, d18
-@ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf3]
 	vmlal.u8	q8, d19, d18
-@ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf3]
 	vmlal.u16	q8, d19, d18
-@ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf3]
 	vmlal.u32	q8, d19, d18
-@ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x09,0xd3,0xf2]
+
+@ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf2]
+@ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf2]
+@ CHECK: vmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf2]
+@ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf3]
+@ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf3]
+@ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf3]
+
+
 	vqdmlal.s16	q8, d19, d18
-@ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x09,0xe3,0xf2]
 	vqdmlal.s32	q8, d19, d18
-@ CHECK: vmls.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xf3]
+        vqdmlal.s16 q11, d11, d7[0]
+        vqdmlal.s16 q11, d11, d7[1]
+        vqdmlal.s16 q11, d11, d7[2]
+        vqdmlal.s16 q11, d11, d7[3]
+
+@ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x09,0xd3,0xf2]
+@ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x09,0xe3,0xf2]
+@ CHECK: vqdmlal.s16	q11, d11, d7[0] @ encoding: [0x47,0x63,0xdb,0xf2]
+@ CHECK: vqdmlal.s16	q11, d11, d7[1] @ encoding: [0x4f,0x63,0xdb,0xf2]
+@ CHECK: vqdmlal.s16	q11, d11, d7[2] @ encoding: [0x67,0x63,0xdb,0xf2]
+@ CHECK: vqdmlal.s16	q11, d11, d7[3] @ encoding: [0x6f,0x63,0xdb,0xf2]
+
+
 	vmls.i8	d16, d18, d17
-@ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf3]
 	vmls.i16	d16, d18, d17
-@ CHECK: vmls.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf3]
 	vmls.i32	d16, d18, d17
-@ CHECK: vmls.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x62,0xf2]
 	vmls.f32	d16, d18, d17
-@ CHECK: vmls.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xf3]
 	vmls.i8	q9, q8, q10
-@ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf3]
 	vmls.i16	q9, q8, q10
-@ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf3]
 	vmls.i32	q9, q8, q10
-@ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x60,0xf2]
 	vmls.f32	q9, q8, q10
-@ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf2]
+	vmls.i16	q4, q12, d6[2]
+
+@ CHECK: vmls.i8	d16, d18, d17   @ encoding: [0xa1,0x09,0x42,0xf3]
+@ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf3]
+@ CHECK: vmls.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf3]
+@ CHECK: vmls.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x62,0xf2]
+@ CHECK: vmls.i8	q9, q8, q10     @ encoding: [0xe4,0x29,0x40,0xf3]
+@ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf3]
+@ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf3]
+@ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x60,0xf2]
+@ CHECK: vmls.i16	q4, q12, d6[2]  @ encoding: [0xe6,0x84,0x98,0xf3]
+
+
 	vmlsl.s8	q8, d19, d18
-@ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf2]
 	vmlsl.s16	q8, d19, d18
-@ CHECK: vmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf2]
 	vmlsl.s32	q8, d19, d18
-@ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf3]
 	vmlsl.u8	q8, d19, d18
-@ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf3]
 	vmlsl.u16	q8, d19, d18
-@ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf3]
 	vmlsl.u32	q8, d19, d18
-@ CHECK: vqdmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0b,0xd3,0xf2]
+
+@ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf2]
+@ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf2]
+@ CHECK: vmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf2]
+@ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf3]
+@ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf3]
+@ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf3]
+
+
 	vqdmlsl.s16	q8, d19, d18
-@ CHECK: vqdmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0b,0xe3,0xf2]
 	vqdmlsl.s32	q8, d19, d18
+
+@ CHECK: vqdmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0b,0xd3,0xf2]
+@ CHECK: vqdmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0b,0xe3,0xf2]
diff --git a/test/MC/ARM/neon-mul-encoding.s b/test/MC/ARM/neon-mul-encoding.s
index 4dc78036c025..d6bc1f3291f3 100644
--- a/test/MC/ARM/neon-mul-encoding.s
+++ b/test/MC/ARM/neon-mul-encoding.s
@@ -1,82 +1,168 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-	vmla.i8	d16, d18, d17
-	vmla.i16	d16, d18, d17
-	vmla.i32	d16, d18, d17
-	vmla.f32	d16, d18, d17
-	vmla.i8	q9, q8, q10
-	vmla.i16	q9, q8, q10
-	vmla.i32	q9, q8, q10
-	vmla.f32	q9, q8, q10
-
-@ CHECK: vmla.i8	d16, d18, d17   @ encoding: [0xa1,0x09,0x42,0xf2]
-@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf2]
-@ CHECK: vmla.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf2]
-@ CHECK: vmla.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x42,0xf2]
-@ CHECK: vmla.i8	q9, q8, q10     @ encoding: [0xe4,0x29,0x40,0xf2]
-@ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf2]
-@ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf2]
-@ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x40,0xf2]
-
-
-	vmlal.s8	q8, d19, d18
-	vmlal.s16	q8, d19, d18
-	vmlal.s32	q8, d19, d18
-	vmlal.u8	q8, d19, d18
-	vmlal.u16	q8, d19, d18
-	vmlal.u32	q8, d19, d18
-
-@ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf2]
-@ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf2]
-@ CHECK: vmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf2]
-@ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf3]
-@ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf3]
-@ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf3]
-
-
-	vqdmlal.s16	q8, d19, d18
-	vqdmlal.s32	q8, d19, d18
-
-@ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x09,0xd3,0xf2]
-@ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x09,0xe3,0xf2]
-
-
-	vmls.i8	d16, d18, d17
-	vmls.i16	d16, d18, d17
-	vmls.i32	d16, d18, d17
-	vmls.f32	d16, d18, d17
-	vmls.i8	q9, q8, q10
-	vmls.i16	q9, q8, q10
-	vmls.i32	q9, q8, q10
-	vmls.f32	q9, q8, q10
-
-@ CHECK: vmls.i8	d16, d18, d17   @ encoding: [0xa1,0x09,0x42,0xf3]
-@ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf3]
-@ CHECK: vmls.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf3]
-@ CHECK: vmls.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x62,0xf2]
-@ CHECK: vmls.i8	q9, q8, q10     @ encoding: [0xe4,0x29,0x40,0xf3]
-@ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf3]
-@ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf3]
-@ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x60,0xf2]
-
-
-	vmlsl.s8	q8, d19, d18
-	vmlsl.s16	q8, d19, d18
-	vmlsl.s32	q8, d19, d18
-	vmlsl.u8	q8, d19, d18
-	vmlsl.u16	q8, d19, d18
-	vmlsl.u32	q8, d19, d18
-
-@ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf2]
-@ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf2]
-@ CHECK: vmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf2]
-@ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf3]
-@ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf3]
-@ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf3]
-
-
-	vqdmlsl.s16	q8, d19, d18
-	vqdmlsl.s32	q8, d19, d18
-
-@ CHECK: vqdmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0b,0xd3,0xf2]
-@ CHECK: vqdmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0b,0xe3,0xf2]
+	vmul.i8	d16, d16, d17
+	vmul.i16	d16, d16, d17
+	vmul.i32	d16, d16, d17
+	vmul.f32	d16, d16, d17
+	vmul.i8	q8, q8, q9
+	vmul.i16	q8, q8, q9
+	vmul.i32	q8, q8, q9
+	vmul.f32	q8, q8, q9
+	vmul.p8	d16, d16, d17
+	vmul.p8	q8, q8, q9
+	vmul.i16	d18, d8, d0[3]
+
+	vmul.i8	d16, d17
+	vmul.i16	d16, d17
+	vmul.i32	d16, d17
+	vmul.f32	d16, d17
+	vmul.i8	q8, q9
+	vmul.i16	q8, q9
+	vmul.i32	q8, q9
+	vmul.f32	q8, q9
+	vmul.p8	d16, d17
+	vmul.p8	q8, q9
+
+@ CHECK: vmul.i8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf2]
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0xb1,0x09,0x50,0xf2]
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0xb1,0x09,0x60,0xf2]
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0xb1,0x0d,0x40,0xf3]
+@ CHECK: vmul.i8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf2]
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0xf2,0x09,0x50,0xf2]
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0xf2,0x09,0x60,0xf2]
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0xf2,0x0d,0x40,0xf3]
+@ CHECK: vmul.p8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf3]
+@ CHECK: vmul.p8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf3]
+@ CHECK: vmul.i16	d18, d8, d0[3]  @ encoding: [0x68,0x28,0xd8,0xf2]
+
+@ CHECK: vmul.i8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf2]
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0xb1,0x09,0x50,0xf2]
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0xb1,0x09,0x60,0xf2]
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0xb1,0x0d,0x40,0xf3]
+@ CHECK: vmul.i8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf2]
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0xf2,0x09,0x50,0xf2]
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0xf2,0x09,0x60,0xf2]
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0xf2,0x0d,0x40,0xf3]
+@ CHECK: vmul.p8	d16, d16, d17   @ encoding: [0xb1,0x09,0x40,0xf3]
+@ CHECK: vmul.p8	q8, q8, q9      @ encoding: [0xf2,0x09,0x40,0xf3]
+
+
+	vqdmulh.s16	d16, d16, d17
+	vqdmulh.s32	d16, d16, d17
+	vqdmulh.s16	q8, q8, q9
+	vqdmulh.s32	q8, q8, q9
+	vqdmulh.s16	d16, d17
+	vqdmulh.s32	d16, d17
+	vqdmulh.s16	q8, q9
+	vqdmulh.s32	q8, q9
+	vqdmulh.s16	d11, d2, d3[0]
+
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf2]
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf2]
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf2]
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf2]
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf2]
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf2]
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf2]
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf2]
+@ CHECK: vqdmulh.s16	d11, d2, d3[0]  @ encoding: [0x43,0xbc,0x92,0xf2]
+
+
+	vqrdmulh.s16	d16, d16, d17
+	vqrdmulh.s32	d16, d16, d17
+	vqrdmulh.s16	q8, q8, q9
+	vqrdmulh.s32	q8, q8, q9
+
+@ CHECK: vqrdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf3]
+@ CHECK: vqrdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf3]
+@ CHECK: vqrdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf3]
+@ CHECK: vqrdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf3]
+
+
+	vmull.s8	q8, d16, d17
+	vmull.s16	q8, d16, d17
+	vmull.s32	q8, d16, d17
+	vmull.u8	q8, d16, d17
+	vmull.u16	q8, d16, d17
+	vmull.u32	q8, d16, d17
+	vmull.p8	q8, d16, d17
+
+@ CHECK: vmull.s8	q8, d16, d17    @ encoding: [0xa1,0x0c,0xc0,0xf2]
+@ CHECK: vmull.s16	q8, d16, d17    @ encoding: [0xa1,0x0c,0xd0,0xf2]
+@ CHECK: vmull.s32	q8, d16, d17    @ encoding: [0xa1,0x0c,0xe0,0xf2]
+@ CHECK: vmull.u8	q8, d16, d17    @ encoding: [0xa1,0x0c,0xc0,0xf3]
+@ CHECK: vmull.u16	q8, d16, d17    @ encoding: [0xa1,0x0c,0xd0,0xf3]
+@ CHECK: vmull.u32	q8, d16, d17    @ encoding: [0xa1,0x0c,0xe0,0xf3]
+@ CHECK: vmull.p8	q8, d16, d17    @ encoding: [0xa1,0x0e,0xc0,0xf2]
+
+
+	vqdmull.s16	q8, d16, d17
+	vqdmull.s32	q8, d16, d17
+
+@ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xa1,0x0d,0xd0,0xf2]
+@ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xa1,0x0d,0xe0,0xf2]
+
+
+        vmul.i16 d0, d4[2]
+        vmul.s16 d1, d7[3]
+        vmul.u16 d2, d1[1]
+        vmul.i32 d3, d2[0]
+        vmul.s32 d4, d3[1]
+        vmul.u32 d5, d4[0]
+        vmul.f32 d6, d5[1]
+
+        vmul.i16 q0, d4[2]
+        vmul.s16 q1, d7[3]
+        vmul.u16 q2, d1[1]
+        vmul.i32 q3, d2[0]
+        vmul.s32 q4, d3[1]
+        vmul.u32 q5, d4[0]
+        vmul.f32 q6, d5[1]
+
+        vmul.i16 d9, d0, d4[2]
+        vmul.s16 d8, d1, d7[3]
+        vmul.u16 d7, d2, d1[1]
+        vmul.i32 d6, d3, d2[0]
+        vmul.s32 d5, d4, d3[1]
+        vmul.u32 d4, d5, d4[0]
+        vmul.f32 d3, d6, d5[1]
+
+        vmul.i16 q9, q0, d4[2]
+        vmul.s16 q8, q1, d7[3]
+        vmul.u16 q7, q2, d1[1]
+        vmul.i32 q6, q3, d2[0]
+        vmul.s32 q5, q4, d3[1]
+        vmul.u32 q4, q5, d4[0]
+        vmul.f32 q3, q6, d5[1]
+
+@ CHECK: vmul.i16	d0, d0, d4[2]   @ encoding: [0x64,0x08,0x90,0xf2]
+@ CHECK: vmul.i16	d1, d1, d7[3]   @ encoding: [0x6f,0x18,0x91,0xf2]
+@ CHECK: vmul.i16	d2, d2, d1[1]   @ encoding: [0x49,0x28,0x92,0xf2]
+@ CHECK: vmul.i32	d3, d3, d2[0]   @ encoding: [0x42,0x38,0xa3,0xf2]
+@ CHECK: vmul.i32	d4, d4, d3[1]   @ encoding: [0x63,0x48,0xa4,0xf2]
+@ CHECK: vmul.i32	d5, d5, d4[0]   @ encoding: [0x44,0x58,0xa5,0xf2]
+@ CHECK: vmul.f32	d6, d6, d5[1]   @ encoding: [0x65,0x69,0xa6,0xf2]
+
+@ CHECK: vmul.i16	q0, q0, d4[2]   @ encoding: [0x64,0x08,0x90,0xf3]
+@ CHECK: vmul.i16	q1, q1, d7[3]   @ encoding: [0x6f,0x28,0x92,0xf3]
+@ CHECK: vmul.i16	q2, q2, d1[1]   @ encoding: [0x49,0x48,0x94,0xf3]
+@ CHECK: vmul.i32	q3, q3, d2[0]   @ encoding: [0x42,0x68,0xa6,0xf3]
+@ CHECK: vmul.i32	q4, q4, d3[1]   @ encoding: [0x63,0x88,0xa8,0xf3]
+@ CHECK: vmul.i32	q5, q5, d4[0]   @ encoding: [0x44,0xa8,0xaa,0xf3]
+@ CHECK: vmul.f32	q6, q6, d5[1]   @ encoding: [0x65,0xc9,0xac,0xf3]
+
+@ CHECK: vmul.i16	d9, d0, d4[2]   @ encoding: [0x64,0x98,0x90,0xf2]
+@ CHECK: vmul.i16	d8, d1, d7[3]   @ encoding: [0x6f,0x88,0x91,0xf2]
+@ CHECK: vmul.i16	d7, d2, d1[1]   @ encoding: [0x49,0x78,0x92,0xf2]
+@ CHECK: vmul.i32	d6, d3, d2[0]   @ encoding: [0x42,0x68,0xa3,0xf2]
+@ CHECK: vmul.i32	d5, d4, d3[1]   @ encoding: [0x63,0x58,0xa4,0xf2]
+@ CHECK: vmul.i32	d4, d5, d4[0]   @ encoding: [0x44,0x48,0xa5,0xf2]
+@ CHECK: vmul.f32	d3, d6, d5[1]   @ encoding: [0x65,0x39,0xa6,0xf2]
+
+@ CHECK: vmul.i16	q9, q0, d4[2]   @ encoding: [0x64,0x28,0xd0,0xf3]
+@ CHECK: vmul.i16	q8, q1, d7[3]   @ encoding: [0x6f,0x08,0xd2,0xf3]
+@ CHECK: vmul.i16	q7, q2, d1[1]   @ encoding: [0x49,0xe8,0x94,0xf3]
+@ CHECK: vmul.i32	q6, q3, d2[0]   @ encoding: [0x42,0xc8,0xa6,0xf3]
+@ CHECK: vmul.i32	q5, q4, d3[1]   @ encoding: [0x63,0xa8,0xa8,0xf3]
+@ CHECK: vmul.i32	q4, q5, d4[0]   @ encoding: [0x44,0x88,0xaa,0xf3]
+@ CHECK: vmul.f32	q3, q6, d5[1]   @ encoding: [0x65,0x69,0xac,0xf3]
diff --git a/test/MC/ARM/neon-pairwise-encoding.s b/test/MC/ARM/neon-pairwise-encoding.s
index 65c47bd64aeb..b1e86aa58edb 100644
--- a/test/MC/ARM/neon-pairwise-encoding.s
+++ b/test/MC/ARM/neon-pairwise-encoding.s
@@ -8,6 +8,16 @@
 	vpadd.i32	d16, d17, d16
 @ CHECK: vpadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xf3]
 	vpadd.f32	d16, d16, d17
+
+@ CHECK: vpadd.i8	d17, d17, d16   @ encoding: [0xb0,0x1b,0x41,0xf2]
+	vpadd.i8	d17, d16
+@ CHECK: vpadd.i16	d17, d17, d16   @ encoding: [0xb0,0x1b,0x51,0xf2]
+	vpadd.i16	d17, d16
+@ CHECK: vpadd.i32	d17, d17, d16   @ encoding: [0xb0,0x1b,0x61,0xf2]
+	vpadd.i32	d17, d16
+@ CHECK: vpadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xf3]
+	vpadd.f32	d16, d17
+
 @ CHECK: vpaddl.s8	d16, d16        @ encoding: [0x20,0x02,0xf0,0xf3]
 	vpaddl.s8	d16, d16
 @ CHECK: vpaddl.s16	d16, d16        @ encoding: [0x20,0x02,0xf4,0xf3]
diff --git a/test/MC/ARM/neon-shift-encoding.s b/test/MC/ARM/neon-shift-encoding.s
index a7a1b8386044..54ed173c92ee 100644
--- a/test/MC/ARM/neon-shift-encoding.s
+++ b/test/MC/ARM/neon-shift-encoding.s
@@ -1,134 +1,255 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
 _foo:
-@ CHECK: vshl.u8	d16, d17, d16  @ encoding: [0xa1,0x04,0x40,0xf3]
 	vshl.u8	d16, d17, d16
-@ CHECK: vshl.u16	d16, d17, d16  @ encoding: [0xa1,0x04,0x50,0xf3]
 	vshl.u16	d16, d17, d16
-@ CHECK: vshl.u32	d16, d17, d16  @ encoding: [0xa1,0x04,0x60,0xf3]
 	vshl.u32	d16, d17, d16
-@ CHECK: vshl.u64	d16, d17, d16  @ encoding: [0xa1,0x04,0x70,0xf3]
 	vshl.u64	d16, d17, d16
-@ CHECK: vshl.i8	d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf2]
 	vshl.i8	d16, d16, #7
-@ CHECK: vshl.i16	d16, d16, #15  @ encoding: [0x30,0x05,0xdf,0xf2]
 	vshl.i16	d16, d16, #15
-@ CHECK: vshl.i32	d16, d16, #31  @ encoding: [0x30,0x05,0xff,0xf2]
 	vshl.i32	d16, d16, #31
-@ CHECK: vshl.i64	d16, d16, #63  @ encoding: [0xb0,0x05,0xff,0xf2]
 	vshl.i64	d16, d16, #63
-@ CHECK: vshl.u8	q8, q9, q8  @ encoding: [0xe2,0x04,0x40,0xf3]
 	vshl.u8	q8, q9, q8
-@ CHECK: vshl.u16	q8, q9, q8  @ encoding: [0xe2,0x04,0x50,0xf3]
 	vshl.u16	q8, q9, q8
-@ CHECK: vshl.u32	q8, q9, q8  @ encoding: [0xe2,0x04,0x60,0xf3]
 	vshl.u32	q8, q9, q8
-@ CHECK: vshl.u64	q8, q9, q8  @ encoding: [0xe2,0x04,0x70,0xf3]
 	vshl.u64	q8, q9, q8
-@ CHECK: vshl.i8	q8, q8, #7  @ encoding: [0x70,0x05,0xcf,0xf2]
 	vshl.i8	q8, q8, #7
-@ CHECK: vshl.i16	q8, q8, #15  @ encoding: [0x70,0x05,0xdf,0xf2]
 	vshl.i16	q8, q8, #15
-@ CHECK: vshl.i32	q8, q8, #31  @ encoding: [0x70,0x05,0xff,0xf2]
 	vshl.i32	q8, q8, #31
-@ CHECK: vshl.i64	q8, q8, #63  @ encoding: [0xf0,0x05,0xff,0xf2]
 	vshl.i64	q8, q8, #63
-@ CHECK: vshr.u8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf3]
+
+@ CHECK: vshl.u8	d16, d17, d16  @ encoding: [0xa1,0x04,0x40,0xf3]
+@ CHECK: vshl.u16	d16, d17, d16  @ encoding: [0xa1,0x04,0x50,0xf3]
+@ CHECK: vshl.u32	d16, d17, d16  @ encoding: [0xa1,0x04,0x60,0xf3]
+@ CHECK: vshl.u64	d16, d17, d16  @ encoding: [0xa1,0x04,0x70,0xf3]
+@ CHECK: vshl.i8	d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf2]
+@ CHECK: vshl.i16	d16, d16, #15  @ encoding: [0x30,0x05,0xdf,0xf2]
+@ CHECK: vshl.i32	d16, d16, #31  @ encoding: [0x30,0x05,0xff,0xf2]
+@ CHECK: vshl.i64	d16, d16, #63  @ encoding: [0xb0,0x05,0xff,0xf2]
+@ CHECK: vshl.u8	q8, q9, q8  @ encoding: [0xe2,0x04,0x40,0xf3]
+@ CHECK: vshl.u16	q8, q9, q8  @ encoding: [0xe2,0x04,0x50,0xf3]
+@ CHECK: vshl.u32	q8, q9, q8  @ encoding: [0xe2,0x04,0x60,0xf3]
+@ CHECK: vshl.u64	q8, q9, q8  @ encoding: [0xe2,0x04,0x70,0xf3]
+@ CHECK: vshl.i8	q8, q8, #7  @ encoding: [0x70,0x05,0xcf,0xf2]
+@ CHECK: vshl.i16	q8, q8, #15  @ encoding: [0x70,0x05,0xdf,0xf2]
+@ CHECK: vshl.i32	q8, q8, #31  @ encoding: [0x70,0x05,0xff,0xf2]
+@ CHECK: vshl.i64	q8, q8, #63  @ encoding: [0xf0,0x05,0xff,0xf2]
+
+
 	vshr.u8	d16, d16, #7
-@ CHECK: vshr.u16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf3]
 	vshr.u16	d16, d16, #15
-@ CHECK: vshr.u32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf3]
 	vshr.u32	d16, d16, #31
-@ CHECK: vshr.u64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf3]
 	vshr.u64	d16, d16, #63
-@ CHECK: vshr.u8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf3]
 	vshr.u8	q8, q8, #7
-@ CHECK: vshr.u16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf3]
 	vshr.u16	q8, q8, #15
-@ CHECK: vshr.u32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf3]
 	vshr.u32	q8, q8, #31
-@ CHECK: vshr.u64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf3]
 	vshr.u64	q8, q8, #63
-@ CHECK: vshr.s8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf2]
 	vshr.s8	d16, d16, #7
-@ CHECK: vshr.s16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf2]
 	vshr.s16	d16, d16, #15
-@ CHECK: vshr.s32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf2]
 	vshr.s32	d16, d16, #31
-@ CHECK: vshr.s64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf2]
 	vshr.s64	d16, d16, #63
-@ CHECK: vshr.s8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf2]
 	vshr.s8	q8, q8, #7
-@ CHECK: vshr.s16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf2]
 	vshr.s16	q8, q8, #15
-@ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
 	vshr.s32	q8, q8, #31
-@ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
 	vshr.s64	q8, q8, #63
-@ CHECK: vsra.u8  d16, d16, #7   @ encoding: [0x30,0x01,0xc9,0xf3]
-	vsra.u8   d16, d16, #7
-@ CHECK: vsra.u16 d16, d16, #15  @ encoding: [0x30,0x01,0xd1,0xf3]
-	vsra.u16  d16, d16, #15
-@ CHECK: vsra.u32 d16, d16, #31  @ encoding: [0x30,0x01,0xe1,0xf3]
-	vsra.u32  d16, d16, #31
-@ CHECK: vsra.u64 d16, d16, #63  @ encoding: [0xb0,0x01,0xc1,0xf3]
-	vsra.u64  d16, d16, #63
-@ CHECK: vsra.u8  q8, q8, #7     @ encoding: [0x70,0x01,0xc9,0xf3]
-	vsra.u8   q8, q8, #7
-@ CHECK: vsra.u16 q8, q8, #15    @ encoding: [0x70,0x01,0xd1,0xf3]
-	vsra.u16  q8, q8, #15
-@ CHECK: vsra.u32 q8, q8, #31    @ encoding: [0x70,0x01,0xe1,0xf3]
-	vsra.u32  q8, q8, #31
-@ CHECK: vsra.u64 q8, q8, #63    @ encoding: [0xf0,0x01,0xc1,0xf3]
-	vsra.u64  q8, q8, #63
-@ CHECK: vsra.s8  d16, d16, #7   @ encoding: [0x30,0x01,0xc9,0xf2]
-	vsra.s8   d16, d16, #7
-@ CHECK: vsra.s16 d16, d16, #15  @ encoding: [0x30,0x01,0xd1,0xf2]
-	vsra.s16  d16, d16, #15
-@ CHECK: vsra.s32 d16, d16, #31  @ encoding: [0x30,0x01,0xe1,0xf2]
-	vsra.s32  d16, d16, #31
-@ CHECK: vsra.s64 d16, d16, #63  @ encoding: [0xb0,0x01,0xc1,0xf2]
-	vsra.s64  d16, d16, #63
-@ CHECK: vsra.s8  q8, q8, #7     @ encoding: [0x70,0x01,0xc9,0xf2]
-	vsra.s8   q8, q8, #7
-@ CHECK: vsra.s16 q8, q8, #15    @ encoding: [0x70,0x01,0xd1,0xf2]
-	vsra.s16  q8, q8, #15
-@ CHECK: vsra.s32 q8, q8, #31    @ encoding: [0x70,0x01,0xe1,0xf2]
-	vsra.s32  q8, q8, #31
-@ CHECK: vsra.s64 q8, q8, #63    @ encoding: [0xf0,0x01,0xc1,0xf2]
-	vsra.s64  q8, q8, #63
-@ CHECK: vsri.8   d16, d16, #7  @ encoding: [0x30,0x04,0xc9,0xf3]
-	vsri.8   d16, d16, #7
-@ CHECK: vsri.16  d16, d16, #15 @ encoding: [0x30,0x04,0xd1,0xf3]
-	vsri.16  d16, d16, #15
-@ CHECK: vsri.32  d16, d16, #31 @ encoding: [0x30,0x04,0xe1,0xf3]
-	vsri.32  d16, d16, #31
-@ CHECK: vsri.64  d16, d16, #63 @ encoding: [0xb0,0x04,0xc1,0xf3]
-	vsri.64  d16, d16, #63
-@ CHECK: vsri.8   q8, q8, #7    @ encoding: [0x70,0x04,0xc9,0xf3]
-	vsri.8   q8, q8, #7
-@ CHECK: vsri.16  q8, q8, #15   @ encoding: [0x70,0x04,0xd1,0xf3]
-	vsri.16  q8, q8, #15
-@ CHECK: vsri.32  q8, q8, #31   @ encoding: [0x70,0x04,0xe1,0xf3]
-	vsri.32  q8, q8, #31
-@ CHECK: vsri.64  q8, q8, #63   @ encoding: [0xf0,0x04,0xc1,0xf3]
-	vsri.64  q8, q8, #63
-@ CHECK: vsli.8   d16, d16, #7  @ encoding: [0x30,0x05,0xcf,0xf3]
-	vsli.8   d16, d16, #7
-@ CHECK: vsli.16  d16, d16, #15 @ encoding: [0x30,0x05,0xdf,0xf3]
-	vsli.16  d16, d16, #15
-@ CHECK: vsli.32  d16, d16, #31 @ encoding: [0x30,0x05,0xff,0xf3]
-	vsli.32  d16, d16, #31
-@ CHECK: vsli.64  d16, d16, #63 @ encoding: [0xb0,0x05,0xff,0xf3]
-	vsli.64  d16, d16, #63
-@ CHECK: vsli.8   q8, q8, #7    @ encoding: [0x70,0x05,0xcf,0xf3]
-	vsli.8   q8, q8, #7
-@ CHECK: vsli.16  q8, q8, #15   @ encoding: [0x70,0x05,0xdf,0xf3]
-	vsli.16  q8, q8, #15
-@ CHECK: vsli.32  q8, q8, #31   @ encoding: [0x70,0x05,0xff,0xf3]
-	vsli.32  q8, q8, #31
-@ CHECK: vsli.64  q8, q8, #63   @ encoding: [0xf0,0x05,0xff,0xf3]
-	vsli.64  q8, q8, #63
+
+@ CHECK: vshr.u8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf3]
+@ CHECK: vshr.u8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf3]
+@ CHECK: vshr.s8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf2]
+@ CHECK: vshr.s8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
+
+@ implied destination operand variants.
+	vshr.u8	d16, #7
+	vshr.u16	d16, #15
+	vshr.u32	d16, #31
+	vshr.u64	d16, #63
+	vshr.u8	q8, #7
+	vshr.u16	q8, #15
+	vshr.u32	q8, #31
+	vshr.u64	q8, #63
+	vshr.s8	d16, #7
+	vshr.s16	d16, #15
+	vshr.s32	d16, #31
+	vshr.s64	d16, #63
+	vshr.s8	q8, #7
+	vshr.s16	q8, #15
+	vshr.s32	q8, #31
+	vshr.s64	q8, #63
+
+@ CHECK: vshr.u8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf3]
+@ CHECK: vshr.u8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf3]
+@ CHECK: vshr.u16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf3]
+@ CHECK: vshr.u32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf3]
+@ CHECK: vshr.u64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf3]
+@ CHECK: vshr.s8	d16, d16, #7  @ encoding: [0x30,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	d16, d16, #15  @ encoding: [0x30,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	d16, d16, #31  @ encoding: [0x30,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	d16, d16, #63  @ encoding: [0xb0,0x00,0xc1,0xf2]
+@ CHECK: vshr.s8	q8, q8, #7  @ encoding: [0x70,0x00,0xc9,0xf2]
+@ CHECK: vshr.s16	q8, q8, #15  @ encoding: [0x70,0x00,0xd1,0xf2]
+@ CHECK: vshr.s32	q8, q8, #31  @ encoding: [0x70,0x00,0xe1,0xf2]
+@ CHECK: vshr.s64	q8, q8, #63  @ encoding: [0xf0,0x00,0xc1,0xf2]
+
+
+	vsra.s8   d16, d6, #7
+	vsra.s16  d26, d18, #15
+	vsra.s32  d11, d10, #31
+	vsra.s64  d12, d19, #63
+	vsra.s8   q1, q8, #7
+	vsra.s16  q2, q7, #15
+	vsra.s32  q3, q6, #31
+	vsra.s64  q4, q5, #63
+
+	vsra.s8   d16, #7
+	vsra.s16  d15, #15
+	vsra.s32  d14, #31
+	vsra.s64  d13, #63
+	vsra.s8   q4, #7
+	vsra.s16  q5, #15
+	vsra.s32  q6, #31
+	vsra.s64  q7, #63
+
+@ CHECK: vsra.s8	d16, d6, #7     @ encoding: [0x16,0x01,0xc9,0xf2]
+@ CHECK: vsra.s16	d26, d18, #15   @ encoding: [0x32,0xa1,0xd1,0xf2]
+@ CHECK: vsra.s32	d11, d10, #31   @ encoding: [0x1a,0xb1,0xa1,0xf2]
+@ CHECK: vsra.s64	d12, d19, #63   @ encoding: [0xb3,0xc1,0x81,0xf2]
+@ CHECK: vsra.s8	q1, q8, #7      @ encoding: [0x70,0x21,0x89,0xf2]
+@ CHECK: vsra.s16	q2, q7, #15     @ encoding: [0x5e,0x41,0x91,0xf2]
+@ CHECK: vsra.s32	q3, q6, #31     @ encoding: [0x5c,0x61,0xa1,0xf2]
+@ CHECK: vsra.s64	q4, q5, #63     @ encoding: [0xda,0x81,0x81,0xf2]
+@ CHECK: vsra.s8	d16, d16, #7    @ encoding: [0x30,0x01,0xc9,0xf2]
+@ CHECK: vsra.s16	d15, d15, #15   @ encoding: [0x1f,0xf1,0x91,0xf2]
+@ CHECK: vsra.s32	d14, d14, #31   @ encoding: [0x1e,0xe1,0xa1,0xf2]
+@ CHECK: vsra.s64	d13, d13, #63   @ encoding: [0x9d,0xd1,0x81,0xf2]
+@ CHECK: vsra.s8	q4, q4, #7      @ encoding: [0x58,0x81,0x89,0xf2]
+@ CHECK: vsra.s16	q5, q5, #15     @ encoding: [0x5a,0xa1,0x91,0xf2]
+@ CHECK: vsra.s32	q6, q6, #31     @ encoding: [0x5c,0xc1,0xa1,0xf2]
+@ CHECK: vsra.s64	q7, q7, #63     @ encoding: [0xde,0xe1,0x81,0xf2]
+
+
+	vsra.u8   d16, d6, #7
+	vsra.u16  d26, d18, #15
+	vsra.u32  d11, d10, #31
+	vsra.u64  d12, d19, #63
+	vsra.u8   q1, q8, #7
+	vsra.u16  q2, q7, #15
+	vsra.u32  q3, q6, #31
+	vsra.u64  q4, q5, #63
+
+	vsra.u8   d16, #7
+	vsra.u16  d15, #15
+	vsra.u32  d14, #31
+	vsra.u64  d13, #63
+	vsra.u8   q4, #7
+	vsra.u16  q5, #15
+	vsra.u32  q6, #31
+	vsra.u64  q7, #63
+
+@ CHECK: vsra.u8	d16, d6, #7     @ encoding: [0x16,0x01,0xc9,0xf3]
+@ CHECK: vsra.u16	d26, d18, #15   @ encoding: [0x32,0xa1,0xd1,0xf3]
+@ CHECK: vsra.u32	d11, d10, #31   @ encoding: [0x1a,0xb1,0xa1,0xf3]
+@ CHECK: vsra.u64	d12, d19, #63   @ encoding: [0xb3,0xc1,0x81,0xf3]
+@ CHECK: vsra.u8	q1, q8, #7      @ encoding: [0x70,0x21,0x89,0xf3]
+@ CHECK: vsra.u16	q2, q7, #15     @ encoding: [0x5e,0x41,0x91,0xf3]
+@ CHECK: vsra.u32	q3, q6, #31     @ encoding: [0x5c,0x61,0xa1,0xf3]
+@ CHECK: vsra.u64	q4, q5, #63     @ encoding: [0xda,0x81,0x81,0xf3]
+@ CHECK: vsra.u8	d16, d16, #7    @ encoding: [0x30,0x01,0xc9,0xf3]
+@ CHECK: vsra.u16	d15, d15, #15   @ encoding: [0x1f,0xf1,0x91,0xf3]
+@ CHECK: vsra.u32	d14, d14, #31   @ encoding: [0x1e,0xe1,0xa1,0xf3]
+@ CHECK: vsra.u64	d13, d13, #63   @ encoding: [0x9d,0xd1,0x81,0xf3]
+@ CHECK: vsra.u8	q4, q4, #7      @ encoding: [0x58,0x81,0x89,0xf3]
+@ CHECK: vsra.u16	q5, q5, #15     @ encoding: [0x5a,0xa1,0x91,0xf3]
+@ CHECK: vsra.u32	q6, q6, #31     @ encoding: [0x5c,0xc1,0xa1,0xf3]
+@ CHECK: vsra.u64	q7, q7, #63     @ encoding: [0xde,0xe1,0x81,0xf3]
+
+
+	vsri.8   d16, d6, #7
+	vsri.16  d26, d18, #15
+	vsri.32  d11, d10, #31
+	vsri.64  d12, d19, #63
+	vsri.8   q1, q8, #7
+	vsri.16  q2, q7, #15
+	vsri.32  q3, q6, #31
+	vsri.64  q4, q5, #63
+
+	vsri.8   d16, #7
+	vsri.16  d15, #15
+	vsri.32  d14, #31
+	vsri.64  d13, #63
+	vsri.8   q4, #7
+	vsri.16  q5, #15
+	vsri.32  q6, #31
+	vsri.64  q7, #63
+
+@ CHECK: vsri.8	d16, d6, #7             @ encoding: [0x16,0x04,0xc9,0xf3]
+@ CHECK: vsri.16 d26, d18, #15          @ encoding: [0x32,0xa4,0xd1,0xf3]
+@ CHECK: vsri.32 d11, d10, #31          @ encoding: [0x1a,0xb4,0xa1,0xf3]
+@ CHECK: vsri.64 d12, d19, #63          @ encoding: [0xb3,0xc4,0x81,0xf3]
+@ CHECK: vsri.8	q1, q8, #7              @ encoding: [0x70,0x24,0x89,0xf3]
+@ CHECK: vsri.16 q2, q7, #15            @ encoding: [0x5e,0x44,0x91,0xf3]
+@ CHECK: vsri.32 q3, q6, #31            @ encoding: [0x5c,0x64,0xa1,0xf3]
+@ CHECK: vsri.64 q4, q5, #63            @ encoding: [0xda,0x84,0x81,0xf3]
+@ CHECK: vsri.8	d16, d16, #7            @ encoding: [0x30,0x04,0xc9,0xf3]
+@ CHECK: vsri.16 d15, d15, #15          @ encoding: [0x1f,0xf4,0x91,0xf3]
+@ CHECK: vsri.32 d14, d14, #31          @ encoding: [0x1e,0xe4,0xa1,0xf3]
+@ CHECK: vsri.64 d13, d13, #63          @ encoding: [0x9d,0xd4,0x81,0xf3]
+@ CHECK: vsri.8	q4, q4, #7              @ encoding: [0x58,0x84,0x89,0xf3]
+@ CHECK: vsri.16 q5, q5, #15            @ encoding: [0x5a,0xa4,0x91,0xf3]
+@ CHECK: vsri.32 q6, q6, #31            @ encoding: [0x5c,0xc4,0xa1,0xf3]
+@ CHECK: vsri.64 q7, q7, #63            @ encoding: [0xde,0xe4,0x81,0xf3]
+
+
+	vsli.8   d16, d6, #7
+	vsli.16  d26, d18, #15
+	vsli.32  d11, d10, #31
+	vsli.64  d12, d19, #63
+	vsli.8   q1, q8, #7
+	vsli.16  q2, q7, #15
+	vsli.32  q3, q6, #31
+	vsli.64  q4, q5, #63
+
+	vsli.8   d16, #7
+	vsli.16  d15, #15
+	vsli.32  d14, #31
+	vsli.64  d13, #63
+	vsli.8   q4, #7
+	vsli.16  q5, #15
+	vsli.32  q6, #31
+	vsli.64  q7, #63
+
+@ CHECK: vsli.8	d16, d6, #7             @ encoding: [0x16,0x05,0xcf,0xf3]
+@ CHECK: vsli.16 d26, d18, #15          @ encoding: [0x32,0xa5,0xdf,0xf3]
+@ CHECK: vsli.32 d11, d10, #31          @ encoding: [0x1a,0xb5,0xbf,0xf3]
+@ CHECK: vsli.64 d12, d19, #63          @ encoding: [0xb3,0xc5,0xbf,0xf3]
+@ CHECK: vsli.8	q1, q8, #7              @ encoding: [0x70,0x25,0x8f,0xf3]
+@ CHECK: vsli.16 q2, q7, #15            @ encoding: [0x5e,0x45,0x9f,0xf3]
+@ CHECK: vsli.32 q3, q6, #31            @ encoding: [0x5c,0x65,0xbf,0xf3]
+@ CHECK: vsli.64 q4, q5, #63            @ encoding: [0xda,0x85,0xbf,0xf3]
+@ CHECK: vsli.8	d16, d16, #7            @ encoding: [0x30,0x05,0xcf,0xf3]
+@ CHECK: vsli.16 d15, d15, #15          @ encoding: [0x1f,0xf5,0x9f,0xf3]
+@ CHECK: vsli.32 d14, d14, #31          @ encoding: [0x1e,0xe5,0xbf,0xf3]
+@ CHECK: vsli.64 d13, d13, #63          @ encoding: [0x9d,0xd5,0xbf,0xf3]
+@ CHECK: vsli.8	q4, q4, #7              @ encoding: [0x58,0x85,0x8f,0xf3]
+@ CHECK: vsli.16 q5, q5, #15            @ encoding: [0x5a,0xa5,0x9f,0xf3]
+@ CHECK: vsli.32 q6, q6, #31            @ encoding: [0x5c,0xc5,0xbf,0xf3]
+@ CHECK: vsli.64 q7, q7, #63            @ encoding: [0xde,0xe5,0xbf,0xf3]
+
+
 @ CHECK: vshll.s8	q8, d16, #7  @ encoding: [0x30,0x0a,0xcf,0xf2]
 	vshll.s8	q8, d16, #7
 @ CHECK: vshll.s16	q8, d16, #15  @ encoding: [0x30,0x0a,0xdf,0xf2]
@@ -235,3 +356,134 @@ _foo:
 	vqrshrn.u32	d16, q8, #13
 @ CHECK: vqrshrn.u64	d16, q8, #13  @ encoding: [0x70,0x09,0xf3,0xf3]
 	vqrshrn.u64	d16, q8, #13
+
+@ Optional destination operand variants.
+        vshl.s8 q4, q5
+        vshl.s16 q4, q5
+        vshl.s32 q4, q5
+        vshl.s64 q4, q5
+
+        vshl.u8 q4, q5
+        vshl.u16 q4, q5
+        vshl.u32 q4, q5
+        vshl.u64 q4, q5
+
+        vshl.s8 d4, d5
+        vshl.s16 d4, d5
+        vshl.s32 d4, d5
+        vshl.s64 d4, d5
+
+        vshl.u8 d4, d5
+        vshl.u16 d4, d5
+        vshl.u32 d4, d5
+        vshl.u64 d4, d5
+
+@ CHECK: vshl.s8	q4, q4, q5      @ encoding: [0x48,0x84,0x0a,0xf2]
+@ CHECK: vshl.s16	q4, q4, q5      @ encoding: [0x48,0x84,0x1a,0xf2]
+@ CHECK: vshl.s32	q4, q4, q5      @ encoding: [0x48,0x84,0x2a,0xf2]
+@ CHECK: vshl.s64	q4, q4, q5      @ encoding: [0x48,0x84,0x3a,0xf2]
+
+@ CHECK: vshl.u8	q4, q4, q5      @ encoding: [0x48,0x84,0x0a,0xf3]
+@ CHECK: vshl.u16	q4, q4, q5      @ encoding: [0x48,0x84,0x1a,0xf3]
+@ CHECK: vshl.u32	q4, q4, q5      @ encoding: [0x48,0x84,0x2a,0xf3]
+@ CHECK: vshl.u64	q4, q4, q5      @ encoding: [0x48,0x84,0x3a,0xf3]
+
+@ CHECK: vshl.s8	d4, d4, d5      @ encoding: [0x04,0x44,0x05,0xf2]
+@ CHECK: vshl.s16	d4, d4, d5      @ encoding: [0x04,0x44,0x15,0xf2]
+@ CHECK: vshl.s32	d4, d4, d5      @ encoding: [0x04,0x44,0x25,0xf2]
+@ CHECK: vshl.s64	d4, d4, d5      @ encoding: [0x04,0x44,0x35,0xf2]
+
+@ CHECK: vshl.u8	d4, d4, d5      @ encoding: [0x04,0x44,0x05,0xf3]
+@ CHECK: vshl.u16	d4, d4, d5      @ encoding: [0x04,0x44,0x15,0xf3]
+@ CHECK: vshl.u32	d4, d4, d5      @ encoding: [0x04,0x44,0x25,0xf3]
+@ CHECK: vshl.u64	d4, d4, d5      @ encoding: [0x04,0x44,0x35,0xf3]
+
+        vshl.s8 q4, #2
+        vshl.s16 q4, #14
+        vshl.s32 q4, #27
+        vshl.s64 q4, #35
+
+        vshl.s8 d4, #6
+        vshl.u16 d4, #10
+        vshl.s32 d4, #17
+        vshl.u64 d4, #43
+
+@ CHECK: vshl.i8	q4, q4, #2      @ encoding: [0x58,0x85,0x8a,0xf2]
+@ CHECK: vshl.i16	q4, q4, #14     @ encoding: [0x58,0x85,0x9e,0xf2]
+@ CHECK: vshl.i32	q4, q4, #27     @ encoding: [0x58,0x85,0xbb,0xf2]
+@ CHECK: vshl.i64	q4, q4, #35     @ encoding: [0xd8,0x85,0xa3,0xf2]
+
+@ CHECK: vshl.i8	d4, d4, #6      @ encoding: [0x14,0x45,0x8e,0xf2]
+@ CHECK: vshl.i16	d4, d4, #10     @ encoding: [0x14,0x45,0x9a,0xf2]
+@ CHECK: vshl.i32	d4, d4, #17     @ encoding: [0x14,0x45,0xb1,0xf2]
+@ CHECK: vshl.i64	d4, d4, #43     @ encoding: [0x94,0x45,0xab,0xf2]
+
+
+@ Two-operand forms.
+	vshr.s8	d15, #8
+	vshr.s16	d12, #16
+	vshr.s32	d13, #32
+	vshr.s64	d14, #64
+	vshr.u8	d16, #8
+	vshr.u16	d17, #16
+	vshr.u32	d6, #32
+	vshr.u64	d10, #64
+	vshr.s8	q1, #8
+	vshr.s16	q2, #16
+	vshr.s32	q3, #32
+	vshr.s64	q4, #64
+	vshr.u8	q5, #8
+	vshr.u16	q6, #16
+	vshr.u32	q7, #32
+	vshr.u64	q8, #64
+
+@ CHECK: vshr.s8	d15, d15, #8    @ encoding: [0x1f,0xf0,0x88,0xf2]
+@ CHECK: vshr.s16	d12, d12, #16   @ encoding: [0x1c,0xc0,0x90,0xf2]
+@ CHECK: vshr.s32	d13, d13, #32   @ encoding: [0x1d,0xd0,0xa0,0xf2]
+@ CHECK: vshr.s64	d14, d14, #64   @ encoding: [0x9e,0xe0,0x80,0xf2]
+@ CHECK: vshr.u8	d16, d16, #8    @ encoding: [0x30,0x00,0xc8,0xf3]
+@ CHECK: vshr.u16	d17, d17, #16   @ encoding: [0x31,0x10,0xd0,0xf3]
+@ CHECK: vshr.u32	d6, d6, #32     @ encoding: [0x16,0x60,0xa0,0xf3]
+@ CHECK: vshr.u64	d10, d10, #64   @ encoding: [0x9a,0xa0,0x80,0xf3]
+@ CHECK: vshr.s8	q1, q1, #8      @ encoding: [0x52,0x20,0x88,0xf2]
+@ CHECK: vshr.s16	q2, q2, #16     @ encoding: [0x54,0x40,0x90,0xf2]
+@ CHECK: vshr.s32	q3, q3, #32     @ encoding: [0x56,0x60,0xa0,0xf2]
+@ CHECK: vshr.s64	q4, q4, #64     @ encoding: [0xd8,0x80,0x80,0xf2]
+@ CHECK: vshr.u8	q5, q5, #8      @ encoding: [0x5a,0xa0,0x88,0xf3]
+@ CHECK: vshr.u16	q6, q6, #16     @ encoding: [0x5c,0xc0,0x90,0xf3]
+@ CHECK: vshr.u32	q7, q7, #32     @ encoding: [0x5e,0xe0,0xa0,0xf3]
+@ CHECK: vshr.u64	q8, q8, #64     @ encoding: [0xf0,0x00,0xc0,0xf3]
+
+	vrshr.s8	d15, #8
+	vrshr.s16	d12, #16
+	vrshr.s32	d13, #32
+	vrshr.s64	d14, #64
+	vrshr.u8	d16, #8
+	vrshr.u16	d17, #16
+	vrshr.u32	d6, #32
+	vrshr.u64	d10, #64
+	vrshr.s8	q1, #8
+	vrshr.s16	q2, #16
+	vrshr.s32	q3, #32
+	vrshr.s64	q4, #64
+	vrshr.u8	q5, #8
+	vrshr.u16	q6, #16
+	vrshr.u32	q7, #32
+	vrshr.u64	q8, #64
+
+@ CHECK: vrshr.s8	d15, d15, #8    @ encoding: [0x1f,0xf2,0x88,0xf2]
+@ CHECK: vrshr.s16	d12, d12, #16   @ encoding: [0x1c,0xc2,0x90,0xf2]
+@ CHECK: vrshr.s32	d13, d13, #32   @ encoding: [0x1d,0xd2,0xa0,0xf2]
+@ CHECK: vrshr.s64	d14, d14, #64   @ encoding: [0x9e,0xe2,0x80,0xf2]
+@ CHECK: vrshr.u8	d16, d16, #8    @ encoding: [0x30,0x02,0xc8,0xf3]
+@ CHECK: vrshr.u16	d17, d17, #16   @ encoding: [0x31,0x12,0xd0,0xf3]
+@ CHECK: vrshr.u32	d6, d6, #32     @ encoding: [0x16,0x62,0xa0,0xf3]
+@ CHECK: vrshr.u64	d10, d10, #64   @ encoding: [0x9a,0xa2,0x80,0xf3]
+@ CHECK: vrshr.s8	q1, q1, #8      @ encoding: [0x52,0x22,0x88,0xf2]
+@ CHECK: vrshr.s16	q2, q2, #16     @ encoding: [0x54,0x42,0x90,0xf2]
+@ CHECK: vrshr.s32	q3, q3, #32     @ encoding: [0x56,0x62,0xa0,0xf2]
+@ CHECK: vrshr.s64	q4, q4, #64     @ encoding: [0xd8,0x82,0x80,0xf2]
+@ CHECK: vrshr.u8	q5, q5, #8      @ encoding: [0x5a,0xa2,0x88,0xf3]
+@ CHECK: vrshr.u16	q6, q6, #16     @ encoding: [0x5c,0xc2,0x90,0xf3]
+@ CHECK: vrshr.u32	q7, q7, #32     @ encoding: [0x5e,0xe2,0xa0,0xf3]
+@ CHECK: vrshr.u64	q8, q8, #64     @ encoding: [0xf0,0x02,0xc0,0xf3]
diff --git a/test/MC/ARM/neon-shiftaccum-encoding.s b/test/MC/ARM/neon-shiftaccum-encoding.s
deleted file mode 100644
index 0dc630d395dc..000000000000
--- a/test/MC/ARM/neon-shiftaccum-encoding.s
+++ /dev/null
@@ -1,98 +0,0 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
-
-@ CHECK: vsra.s8	d17, d16, #8            @ encoding: [0x30,0x11,0xc8,0xf2]
-	vsra.s8	d17, d16, #8
-@ CHECK: vsra.s16	d17, d16, #16   @ encoding: [0x30,0x11,0xd0,0xf2]
-	vsra.s16	d17, d16, #16
-@ CHECK: vsra.s32	d17, d16, #32   @ encoding: [0x30,0x11,0xe0,0xf2]
-	vsra.s32	d17, d16, #32
-@ CHECK: vsra.s64	d17, d16, #64   @ encoding: [0xb0,0x11,0xc0,0xf2]
-	vsra.s64	d17, d16, #64
-@ CHECK: vsra.s8	q8, q9, #8              @ encoding: [0x72,0x01,0xc8,0xf2]
-	vsra.s8	q8, q9, #8
-@ CHECK: vsra.s16	q8, q9, #16     @ encoding: [0x72,0x01,0xd0,0xf2]
-	vsra.s16	q8, q9, #16
-@ CHECK: vsra.s32	q8, q9, #32     @ encoding: [0x72,0x01,0xe0,0xf2]
-	vsra.s32	q8, q9, #32
-@ CHECK: vsra.s64	q8, q9, #64     @ encoding: [0xf2,0x01,0xc0,0xf2]
-	vsra.s64	q8, q9, #64
-@ CHECK: vsra.u8	d17, d16, #8            @ encoding: [0x30,0x11,0xc8,0xf3]
-	vsra.u8	d17, d16, #8
-@ CHECK: vsra.u16	d17, d16, #16   @ encoding: [0x30,0x11,0xd0,0xf3]
-	vsra.u16	d17, d16, #16
-@ CHECK: vsra.u32	d17, d16, #32   @ encoding: [0x30,0x11,0xe0,0xf3]
-	vsra.u32	d17, d16, #32
-@ CHECK: vsra.u64	d17, d16, #64   @ encoding: [0xb0,0x11,0xc0,0xf3]
-	vsra.u64	d17, d16, #64
-@ CHECK: vsra.u8	q8, q9, #8              @ encoding: [0x72,0x01,0xc8,0xf3]
-	vsra.u8	q8, q9, #8
-@ CHECK: vsra.u16	q8, q9, #16     @ encoding: [0x72,0x01,0xd0,0xf3]
-	vsra.u16	q8, q9, #16
-@ CHECK: vsra.u32	q8, q9, #32     @ encoding: [0x72,0x01,0xe0,0xf3]
-	vsra.u32	q8, q9, #32
-@ CHECK: vsra.u64	q8, q9, #64     @ encoding: [0xf2,0x01,0xc0,0xf3]
-	vsra.u64	q8, q9, #64
-@ CHECK: vrsra.s8	d17, d16, #8    @ encoding: [0x30,0x13,0xc8,0xf2]
-	vrsra.s8	d17, d16, #8
-@ CHECK: vrsra.s16	d17, d16, #16   @ encoding: [0x30,0x13,0xd0,0xf2]
-	vrsra.s16	d17, d16, #16
-@ CHECK: vrsra.s32	d17, d16, #32   @ encoding: [0x30,0x13,0xe0,0xf2]
-	vrsra.s32	d17, d16, #32
-@ CHECK: vrsra.s64	d17, d16, #64   @ encoding: [0xb0,0x13,0xc0,0xf2]
-	vrsra.s64	d17, d16, #64
-@ CHECK: vrsra.u8	d17, d16, #8    @ encoding: [0x30,0x13,0xc8,0xf3]
-	vrsra.u8	d17, d16, #8
-@ CHECK: vrsra.u16	d17, d16, #16   @ encoding: [0x30,0x13,0xd0,0xf3]
-	vrsra.u16	d17, d16, #16
-@ CHECK: vrsra.u32	d17, d16, #32   @ encoding: [0x30,0x13,0xe0,0xf3]
-	vrsra.u32	d17, d16, #32
-@ CHECK: vrsra.u64	d17, d16, #64   @ encoding: [0xb0,0x13,0xc0,0xf3]
-	vrsra.u64	d17, d16, #64
-@ CHECK: vrsra.s8	q8, q9, #8      @ encoding: [0x72,0x03,0xc8,0xf2]
-	vrsra.s8	q8, q9, #8
-@ CHECK: vrsra.s16	q8, q9, #16     @ encoding: [0x72,0x03,0xd0,0xf2]
-	vrsra.s16	q8, q9, #16
-@ CHECK: vrsra.s32	q8, q9, #32     @ encoding: [0x72,0x03,0xe0,0xf2]
-	vrsra.s32	q8, q9, #32
-@ CHECK: vrsra.s64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf2]
-	vrsra.s64	q8, q9, #64
-@ CHECK: vrsra.u8	q8, q9, #8      @ encoding: [0x72,0x03,0xc8,0xf3]
-	vrsra.u8	q8, q9, #8
-@ CHECK: vrsra.u16	q8, q9, #16     @ encoding: [0x72,0x03,0xd0,0xf3]
-	vrsra.u16	q8, q9, #16
-@ CHECK: vrsra.u32	q8, q9, #32     @ encoding: [0x72,0x03,0xe0,0xf3]
-	vrsra.u32	q8, q9, #32
-@ CHECK: vrsra.u64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf3]
-	vrsra.u64	q8, q9, #64
-@ CHECK: vsli.8	d17, d16, #7            @ encoding: [0x30,0x15,0xcf,0xf3]
-	vsli.8	d17, d16, #7
-@ CHECK: vsli.16	d17, d16, #15           @ encoding: [0x30,0x15,0xdf,0xf3]
-	vsli.16	d17, d16, #15
-@ CHECK: vsli.32	d17, d16, #31           @ encoding: [0x30,0x15,0xff,0xf3]
-	vsli.32	d17, d16, #31
-@ CHECK: vsli.64	d17, d16, #63           @ encoding: [0xb0,0x15,0xff,0xf3]
-	vsli.64	d17, d16, #63
-@ CHECK: vsli.8	q9, q8, #7              @ encoding: [0x70,0x25,0xcf,0xf3]
-	vsli.8	q9, q8, #7
-@ CHECK: vsli.16	q9, q8, #15             @ encoding: [0x70,0x25,0xdf,0xf3]
-	vsli.16	q9, q8, #15
-@ CHECK: vsli.32	q9, q8, #31             @ encoding: [0x70,0x25,0xff,0xf3]
-	vsli.32	q9, q8, #31
-@ CHECK: vsli.64	q9, q8, #63             @ encoding: [0xf0,0x25,0xff,0xf3]
-	vsli.64	q9, q8, #63
-@ CHECK: vsri.8	d17, d16, #8            @ encoding: [0x30,0x14,0xc8,0xf3]
-	vsri.8	d17, d16, #8
-@ CHECK: vsri.16	d17, d16, #16           @ encoding: [0x30,0x14,0xd0,0xf3]
-	vsri.16	d17, d16, #16
-@ CHECK: vsri.32	d17, d16, #32           @ encoding: [0x30,0x14,0xe0,0xf3]
-	vsri.32	d17, d16, #32
-@ CHECK: vsri.64	d17, d16, #64           @ encoding: [0xb0,0x14,0xc0,0xf3]
-	vsri.64	d17, d16, #64
-@ CHECK: vsri.8	q9, q8, #8              @ encoding: [0x70,0x24,0xc8,0xf3]
-	vsri.8	q9, q8, #8
-@ CHECK: vsri.16	q9, q8, #16             @ encoding: [0x70,0x24,0xd0,0xf3]
-	vsri.16	q9, q8, #16
-@ CHECK: vsri.32	q9, q8, #32             @ encoding: [0x70,0x24,0xe0,0xf3]
-	vsri.32	q9, q8, #32
-@ CHECK: vsri.64	q9, q8, #64             @ encoding: [0xf0,0x24,0xc0,0xf3]
-	vsri.64	q9, q8, #64
diff --git a/test/MC/ARM/neon-shuffle-encoding.s b/test/MC/ARM/neon-shuffle-encoding.s
index ce7eb66a08a7..0f07d9f9968c 100644
--- a/test/MC/ARM/neon-shuffle-encoding.s
+++ b/test/MC/ARM/neon-shuffle-encoding.s
@@ -1,46 +1,136 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xa0,0x03,0xf1,0xf2]
 	vext.8	d16, d17, d16, #3
-@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xa0,0x05,0xf1,0xf2]
 	vext.8	d16, d17, d16, #5
-@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xe0,0x03,0xf2,0xf2]
 	vext.8	q8, q9, q8, #3
-@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xe0,0x07,0xf2,0xf2]
 	vext.8	q8, q9, q8, #7
-@ CHECK: vext.16	d16, d17, d16, #3       @ encoding: [0xa0,0x06,0xf1,0xf2]
 	vext.16	d16, d17, d16, #3
-@ CHECK: vext.32	q8, q9, q8, #3          @ encoding: [0xe0,0x0c,0xf2,0xf2]
 	vext.32	q8, q9, q8, #3
-@ CHECK: vtrn.8	d17, d16                @ encoding: [0xa0,0x10,0xf2,0xf3]
+	vext.64	q8, q9, q8, #1
+
+	vext.8	d17, d16, #3
+	vext.8	d7, d11, #5
+	vext.8	q3, q8, #3
+	vext.8	q9, q4, #7
+	vext.16	d1, d26, #3
+	vext.32	q5, q8, #3
+	vext.64	q5, q8, #1
+
+
+@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xa0,0x03,0xf1,0xf2]
+@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xa0,0x05,0xf1,0xf2]
+@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xe0,0x03,0xf2,0xf2]
+@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xe0,0x07,0xf2,0xf2]
+@ CHECK: vext.16 d16, d17, d16, #3      @ encoding: [0xa0,0x06,0xf1,0xf2]
+@ CHECK: vext.32 q8, q9, q8, #3         @ encoding: [0xe0,0x0c,0xf2,0xf2]
+@ CHECK: vext.64 q8, q9, q8, #1         @ encoding: [0xe0,0x08,0xf2,0xf2]
+
+@ CHECK: vext.8	d17, d17, d16, #3       @ encoding: [0xa0,0x13,0xf1,0xf2]
+@ CHECK: vext.8	d7, d7, d11, #5         @ encoding: [0x0b,0x75,0xb7,0xf2]
+@ CHECK: vext.8	q3, q3, q8, #3          @ encoding: [0x60,0x63,0xb6,0xf2]
+@ CHECK: vext.8	q9, q9, q4, #7          @ encoding: [0xc8,0x27,0xf2,0xf2]
+@ CHECK: vext.16 d1, d1, d26, #3        @ encoding: [0x2a,0x16,0xb1,0xf2]
+@ CHECK: vext.32 q5, q5, q8, #3         @ encoding: [0x60,0xac,0xba,0xf2]
+@ CHECK: vext.64 q5, q5, q8, #1         @ encoding: [0x60,0xa8,0xba,0xf2]
+
+
 	vtrn.8	d17, d16
-@ CHECK: vtrn.16	d17, d16                @ encoding: [0xa0,0x10,0xf6,0xf3]
 	vtrn.16	d17, d16
-@ CHECK: vtrn.32	d17, d16                @ encoding: [0xa0,0x10,0xfa,0xf3]
 	vtrn.32	d17, d16
-@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xe0,0x20,0xf2,0xf3]
 	vtrn.8	q9, q8
-@ CHECK: vtrn.16	q9, q8                  @ encoding: [0xe0,0x20,0xf6,0xf3]
 	vtrn.16	q9, q8
-@ CHECK: vtrn.32	q9, q8                  @ encoding: [0xe0,0x20,0xfa,0xf3]
 	vtrn.32	q9, q8
-@ CHECK: vuzp.8	d17, d16                @ encoding: [0x20,0x11,0xf2,0xf3]
+
+@ CHECK: vtrn.8	d17, d16                @ encoding: [0xa0,0x10,0xf2,0xf3]
+@ CHECK: vtrn.16 d17, d16               @ encoding: [0xa0,0x10,0xf6,0xf3]
+@ CHECK: vtrn.32 d17, d16               @ encoding: [0xa0,0x10,0xfa,0xf3]
+@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xe0,0x20,0xf2,0xf3]
+@ CHECK: vtrn.16 q9, q8                 @ encoding: [0xe0,0x20,0xf6,0xf3]
+@ CHECK: vtrn.32 q9, q8                 @ encoding: [0xe0,0x20,0xfa,0xf3]
+
+
 	vuzp.8	d17, d16
-@ CHECK: vuzp.16	d17, d16                @ encoding: [0x20,0x11,0xf6,0xf3]
 	vuzp.16	d17, d16
-@ CHECK: vuzp.8	q9, q8                  @ encoding: [0x60,0x21,0xf2,0xf3]
 	vuzp.8	q9, q8
-@ CHECK: vuzp.16	q9, q8                  @ encoding: [0x60,0x21,0xf6,0xf3]
 	vuzp.16	q9, q8
-@ CHECK: vuzp.32	q9, q8                  @ encoding: [0x60,0x21,0xfa,0xf3]
 	vuzp.32	q9, q8
-@ CHECK: vzip.8	d17, d16                @ encoding: [0xa0,0x11,0xf2,0xf3]
 	vzip.8	d17, d16
-@ CHECK: vzip.16	d17, d16                @ encoding: [0xa0,0x11,0xf6,0xf3]
 	vzip.16	d17, d16
-@ CHECK: vzip.8	q9, q8                  @ encoding: [0xe0,0x21,0xf2,0xf3]
 	vzip.8	q9, q8
-@ CHECK: vzip.16	q9, q8                  @ encoding: [0xe0,0x21,0xf6,0xf3]
 	vzip.16	q9, q8
-@ CHECK: vzip.32	q9, q8                  @ encoding: [0xe0,0x21,0xfa,0xf3]
 	vzip.32	q9, q8
+        vzip.32 d2, d3
+        vuzp.32 d2, d3
+
+@ CHECK: vuzp.8	d17, d16                @ encoding: [0x20,0x11,0xf2,0xf3]
+@ CHECK: vuzp.16 d17, d16               @ encoding: [0x20,0x11,0xf6,0xf3]
+@ CHECK: vuzp.8	q9, q8                  @ encoding: [0x60,0x21,0xf2,0xf3]
+@ CHECK: vuzp.16 q9, q8                 @ encoding: [0x60,0x21,0xf6,0xf3]
+@ CHECK: vuzp.32 q9, q8                 @ encoding: [0x60,0x21,0xfa,0xf3]
+@ CHECK: vzip.8	d17, d16                @ encoding: [0xa0,0x11,0xf2,0xf3]
+@ CHECK: vzip.16 d17, d16               @ encoding: [0xa0,0x11,0xf6,0xf3]
+@ CHECK: vzip.8	q9, q8                  @ encoding: [0xe0,0x21,0xf2,0xf3]
+@ CHECK: vzip.16 q9, q8                 @ encoding: [0xe0,0x21,0xf6,0xf3]
+@ CHECK: vzip.32 q9, q8                 @ encoding: [0xe0,0x21,0xfa,0xf3]
+@ CHECK: vtrn.32 d2, d3                 @ encoding: [0x83,0x20,0xba,0xf3]
+@ CHECK: vtrn.32 d2, d3                 @ encoding: [0x83,0x20,0xba,0xf3]
+
+
+@ VTRN alternate size suffices
+
+        vtrn.8 d3, d9
+        vtrn.i8 d3, d9
+        vtrn.u8 d3, d9
+        vtrn.p8 d3, d9
+        vtrn.16 d3, d9
+        vtrn.i16 d3, d9
+        vtrn.u16 d3, d9
+        vtrn.p16 d3, d9
+        vtrn.32 d3, d9
+        vtrn.i32 d3, d9
+        vtrn.u32 d3, d9
+        vtrn.f32 d3, d9
+        vtrn.f d3, d9
+
+        vtrn.8 q14, q6
+        vtrn.i8 q14, q6
+        vtrn.u8 q14, q6
+        vtrn.p8 q14, q6
+        vtrn.16 q14, q6
+        vtrn.i16 q14, q6
+        vtrn.u16 q14, q6
+        vtrn.p16 q14, q6
+        vtrn.32 q14, q6
+        vtrn.i32 q14, q6
+        vtrn.u32 q14, q6
+        vtrn.f32 q14, q6
+        vtrn.f q14, q6
+
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.8	d3, d9                  @ encoding: [0x89,0x30,0xb2,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.16	d3, d9          @ encoding: [0x89,0x30,0xb6,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+@ CHECK: vtrn.32	d3, d9          @ encoding: [0x89,0x30,0xba,0xf3]
+
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.8	q14, q6                 @ encoding: [0xcc,0xc0,0xf2,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.16	q14, q6         @ encoding: [0xcc,0xc0,0xf6,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+@ CHECK: vtrn.32	q14, q6         @ encoding: [0xcc,0xc0,0xfa,0xf3]
+
diff --git a/test/MC/ARM/neon-sub-encoding.s b/test/MC/ARM/neon-sub-encoding.s
index 241a01ffd4d4..0622e192bc62 100644
--- a/test/MC/ARM/neon-sub-encoding.s
+++ b/test/MC/ARM/neon-sub-encoding.s
@@ -1,25 +1,51 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
 
-@ CHECK: vsub.i8	d16, d17, d16           @ encoding: [0xa0,0x08,0x41,0xf3]
 	vsub.i8	d16, d17, d16
-@ CHECK: vsub.i16	d16, d17, d16   @ encoding: [0xa0,0x08,0x51,0xf3]
 	vsub.i16	d16, d17, d16
-@ CHECK: vsub.i32	d16, d17, d16   @ encoding: [0xa0,0x08,0x61,0xf3]
 	vsub.i32	d16, d17, d16
-@ CHECK: vsub.i64	d16, d17, d16   @ encoding: [0xa0,0x08,0x71,0xf3]
 	vsub.i64	d16, d17, d16
-@ CHECK: vsub.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xf2]
 	vsub.f32	d16, d16, d17
-@ CHECK: vsub.i8	q8, q8, q9              @ encoding: [0xe2,0x08,0x40,0xf3]
 	vsub.i8	q8, q8, q9
-@ CHECK: vsub.i16	q8, q8, q9      @ encoding: [0xe2,0x08,0x50,0xf3]
 	vsub.i16	q8, q8, q9
-@ CHECK: vsub.i32	q8, q8, q9      @ encoding: [0xe2,0x08,0x60,0xf3]
 	vsub.i32	q8, q8, q9
-@ CHECK: vsub.i64	q8, q8, q9      @ encoding: [0xe2,0x08,0x70,0xf3]
 	vsub.i64	q8, q8, q9
-@ CHECK: vsub.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xf2]
 	vsub.f32	q8, q8, q9
+
+	vsub.i8  d13, d21
+	vsub.i16 d14, d22
+	vsub.i32 d15, d23
+	vsub.i64 d16, d24
+	vsub.f32 d17, d25
+	vsub.i8  q1, q10
+	vsub.i16 q2, q9
+	vsub.i32 q3, q8
+	vsub.i64 q4, q7
+	vsub.f32 q5, q6
+
+@ CHECK: vsub.i8	d16, d17, d16   @ encoding: [0xa0,0x08,0x41,0xf3]
+@ CHECK: vsub.i16	d16, d17, d16   @ encoding: [0xa0,0x08,0x51,0xf3]
+@ CHECK: vsub.i32	d16, d17, d16   @ encoding: [0xa0,0x08,0x61,0xf3]
+@ CHECK: vsub.i64	d16, d17, d16   @ encoding: [0xa0,0x08,0x71,0xf3]
+@ CHECK: vsub.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xf2]
+@ CHECK: vsub.i8	q8, q8, q9      @ encoding: [0xe2,0x08,0x40,0xf3]
+@ CHECK: vsub.i16	q8, q8, q9      @ encoding: [0xe2,0x08,0x50,0xf3]
+@ CHECK: vsub.i32	q8, q8, q9      @ encoding: [0xe2,0x08,0x60,0xf3]
+@ CHECK: vsub.i64	q8, q8, q9      @ encoding: [0xe2,0x08,0x70,0xf3]
+@ CHECK: vsub.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xf2]
+
+@ CHECK: vsub.i8	d13, d13, d21   @ encoding: [0x25,0xd8,0x0d,0xf3]
+@ CHECK: vsub.i16	d14, d14, d22   @ encoding: [0x26,0xe8,0x1e,0xf3]
+@ CHECK: vsub.i32	d15, d15, d23   @ encoding: [0x27,0xf8,0x2f,0xf3]
+@ CHECK: vsub.i64	d16, d16, d24   @ encoding: [0xa8,0x08,0x70,0xf3]
+@ CHECK: vsub.f32	d17, d17, d25   @ encoding: [0xa9,0x1d,0x61,0xf2]
+@ CHECK: vsub.i8	q1, q1, q10     @ encoding: [0x64,0x28,0x02,0xf3]
+@ CHECK: vsub.i16	q2, q2, q9      @ encoding: [0x62,0x48,0x14,0xf3]
+@ CHECK: vsub.i32	q3, q3, q8      @ encoding: [0x60,0x68,0x26,0xf3]
+@ CHECK: vsub.i64	q4, q4, q7      @ encoding: [0x4e,0x88,0x38,0xf3]
+@ CHECK: vsub.f32	q5, q5, q6      @ encoding: [0x4c,0xad,0x2a,0xf2]
+
+
+
 @ CHECK: vsubl.s8	q8, d17, d16    @ encoding: [0xa0,0x02,0xc1,0xf2]
 	vsubl.s8	q8, d17, d16
 @ CHECK: vsubl.s16	q8, d17, d16    @ encoding: [0xa0,0x02,0xd1,0xf2]
diff --git a/test/MC/ARM/neon-table-encoding.s b/test/MC/ARM/neon-table-encoding.s
index 7bf47c782f8a..343ae83cba88 100644
--- a/test/MC/ARM/neon-table-encoding.s
+++ b/test/MC/ARM/neon-table-encoding.s
@@ -1,19 +1,22 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
-@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xf3]
 	vtbl.8	d16, {d17}, d16
-@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xf3]
 	vtbl.8	d16, {d16, d17}, d18
-@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xf3]
 	vtbl.8	d16, {d16, d17, d18}, d20
-@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xf3]
 	vtbl.8	d16, {d16, d17, d18, d19}, d20
-@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xf3]
+
+@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xf3]
+@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xf3]
+@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xf3]
+@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xf3]
+
+
 	vtbx.8	d18, {d16}, d17
-@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xf3]
 	vtbx.8	d19, {d16, d17}, d18
-@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xf3]
 	vtbx.8	d20, {d16, d17, d18}, d21
-@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xf3]
 	vtbx.8	d20, {d16, d17, d18, d19}, d21
+
+@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xf3]
+@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xf3]
+@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xf3]
+@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xf3]
diff --git a/test/MC/ARM/neon-vld-encoding.s b/test/MC/ARM/neon-vld-encoding.s
index 55c88686e003..3cc6bf11cf5e 100644
--- a/test/MC/ARM/neon-vld-encoding.s
+++ b/test/MC/ARM/neon-vld-encoding.s
@@ -1,5 +1,4 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 	vld1.8	{d16}, [r0, :64]
 	vld1.16	{d16}, [r0]
@@ -9,15 +8,107 @@
 	vld1.16	{d16, d17}, [r0, :128]
 	vld1.32	{d16, d17}, [r0]
 	vld1.64	{d16, d17}, [r0]
+	vld1.8 {d1, d2, d3}, [r3]
+	vld1.16 {d4, d5, d6}, [r3, :64]
+	vld1.32 {d5, d6, d7}, [r3]
+	vld1.64 {d6, d7, d8}, [r3, :64]
+	vld1.8 {d1, d2, d3, d4}, [r3]
+	vld1.16 {d4, d5, d6, d7}, [r3, :64]
+	vld1.32 {d5, d6, d7, d8}, [r3]
+	vld1.64 {d6, d7, d8, d9}, [r3, :64]
 
-@ CHECK: vld1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf4]
-@ CHECK: vld1.16	{d16}, [r0]     @ encoding: [0x4f,0x07,0x60,0xf4]
-@ CHECK: vld1.32	{d16}, [r0]     @ encoding: [0x8f,0x07,0x60,0xf4]
-@ CHECK: vld1.64	{d16}, [r0]     @ encoding: [0xcf,0x07,0x60,0xf4]
-@ CHECK: vld1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf4]
-@ CHECK: vld1.16	{d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x60,0xf4]
-@ CHECK: vld1.32	{d16, d17}, [r0]@ encoding: [0x8f,0x0a,0x60,0xf4]
-@ CHECK: vld1.64	{d16, d17}, [r0]@ encoding: [0xcf,0x0a,0x60,0xf4]
+	vld1.8	{d16}, [r0, :64]!
+	vld1.16	{d16}, [r0]!
+	vld1.32	{d16}, [r0]!
+	vld1.64	{d16}, [r0]!
+	vld1.8	{d16, d17}, [r0, :64]!
+	vld1.16	{d16, d17}, [r0, :128]!
+	vld1.32	{d16, d17}, [r0]!
+	vld1.64	{d16, d17}, [r0]!
+
+	vld1.8	{d16}, [r0, :64], r5
+	vld1.16	{d16}, [r0], r5
+	vld1.32	{d16}, [r0], r5
+	vld1.64	{d16}, [r0], r5
+	vld1.8	{d16, d17}, [r0, :64], r5
+	vld1.16	{d16, d17}, [r0, :128], r5
+	vld1.32	{d16, d17}, [r0], r5
+	vld1.64	{d16, d17}, [r0], r5
+
+	vld1.8 {d1, d2, d3}, [r3]!
+	vld1.16 {d4, d5, d6}, [r3, :64]!
+	vld1.32 {d5, d6, d7}, [r3]!
+	vld1.64 {d6, d7, d8}, [r3, :64]!
+
+	vld1.8 {d1, d2, d3}, [r3], r6
+	vld1.16 {d4, d5, d6}, [r3, :64], r6
+	vld1.32 {d5, d6, d7}, [r3], r6
+	vld1.64 {d6, d7, d8}, [r3, :64], r6
+
+	vld1.8 {d1, d2, d3, d4}, [r3]!
+	vld1.16 {d4, d5, d6, d7}, [r3, :64]!
+	vld1.32 {d5, d6, d7, d8}, [r3]!
+	vld1.64 {d6, d7, d8, d9}, [r3, :64]!
+
+	vld1.8 {d1, d2, d3, d4}, [r3], r8
+	vld1.16 {d4, d5, d6, d7}, [r3, :64], r8
+	vld1.32 {d5, d6, d7, d8}, [r3], r8
+	vld1.64 {d6, d7, d8, d9}, [r3, :64], r8
+
+@ CHECK: vld1.8 {d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf4]
+@ CHECK: vld1.16 {d16}, [r0]            @ encoding: [0x4f,0x07,0x60,0xf4]
+@ CHECK: vld1.32 {d16}, [r0]            @ encoding: [0x8f,0x07,0x60,0xf4]
+@ CHECK: vld1.64 {d16}, [r0]            @ encoding: [0xcf,0x07,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x60,0xf4]
+@ CHECK: vld1.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x0a,0x60,0xf4]
+@ CHECK: vld1.64 {d16, d17}, [r0]       @ encoding: [0xcf,0x0a,0x60,0xf4]
+@ CHECK: vld1.8 {d1, d2, d3}, [r3]      @ encoding: [0x0f,0x16,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64] @ encoding: [0x5f,0x46,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7}, [r3]     @ encoding: [0x8f,0x56,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64] @ encoding: [0xdf,0x66,0x23,0xf4]
+@ CHECK: vld1.8 {d1, d2, d3, d4}, [r3]  @ encoding: [0x0f,0x12,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64] @ encoding: [0x5f,0x42,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7, d8}, [r3]  @ encoding: [0x8f,0x52,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64] @ encoding: [0xdf,0x62,0x23,0xf4]
+@ CHECK: vld1.8	{d16}, [r0, :64]!       @ encoding: [0x1d,0x07,0x60,0xf4]
+
+@ CHECK: vld1.16 {d16}, [r0]!           @ encoding: [0x4d,0x07,0x60,0xf4]
+@ CHECK: vld1.32 {d16}, [r0]!           @ encoding: [0x8d,0x07,0x60,0xf4]
+@ CHECK: vld1.64 {d16}, [r0]!           @ encoding: [0xcd,0x07,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0, :64]!  @ encoding: [0x1d,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0, :128]! @ encoding: [0x6d,0x0a,0x60,0xf4]
+@ CHECK: vld1.32 {d16, d17}, [r0]!      @ encoding: [0x8d,0x0a,0x60,0xf4]
+@ CHECK: vld1.64 {d16, d17}, [r0]!      @ encoding: [0xcd,0x0a,0x60,0xf4]
+
+@ CHECK: vld1.8 {d16}, [r0, :64], r5    @ encoding: [0x15,0x07,0x60,0xf4]
+@ CHECK: vld1.16 {d16}, [r0], r5        @ encoding: [0x45,0x07,0x60,0xf4]
+@ CHECK: vld1.32 {d16}, [r0], r5        @ encoding: [0x85,0x07,0x60,0xf4]
+@ CHECK: vld1.64 {d16}, [r0], r5        @ encoding: [0xc5,0x07,0x60,0xf4]
+@ CHECK: vld1.8 {d16, d17}, [r0, :64], r5 @ encoding: [0x15,0x0a,0x60,0xf4]
+@ CHECK: vld1.16 {d16, d17}, [r0, :128], r5 @ encoding: [0x65,0x0a,0x60,0xf4]
+@ CHECK: vld1.32 {d16, d17}, [r0], r5   @ encoding: [0x85,0x0a,0x60,0xf4]
+@ CHECK: vld1.64 {d16, d17}, [r0], r5   @ encoding: [0xc5,0x0a,0x60,0xf4]
+
+@ CHECK: vld1.8	{d1, d2, d3}, [r3]!     @ encoding: [0x0d,0x16,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64]! @ encoding: [0x5d,0x46,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7}, [r3]!     @ encoding: [0x8d,0x56,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64]! @ encoding: [0xdd,0x66,0x23,0xf4]
+
+@ CHECK: vld1.8	{d1, d2, d3}, [r3], r6  @ encoding: [0x06,0x16,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6}, [r3, :64], r6 @ encoding: [0x56,0x46,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7}, [r3], r6  @ encoding: [0x86,0x56,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8}, [r3, :64], r6 @ encoding: [0xd6,0x66,0x23,0xf4]
+
+@ CHECK: vld1.8	{d1, d2, d3, d4}, [r3]! @ encoding: [0x0d,0x12,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64]! @ encoding: [0x5d,0x42,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7, d8}, [r3]! @ encoding: [0x8d,0x52,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64]! @ encoding: [0xdd,0x62,0x23,0xf4]
+
+@ CHECK: vld1.8	{d1, d2, d3, d4}, [r3], r8 @ encoding: [0x08,0x12,0x23,0xf4]
+@ CHECK: vld1.16 {d4, d5, d6, d7}, [r3, :64], r8 @ encoding: [0x58,0x42,0x23,0xf4]
+@ CHECK: vld1.32 {d5, d6, d7, d8}, [r3], r8 @ encoding: [0x88,0x52,0x23,0xf4]
+@ CHECK: vld1.64 {d6, d7, d8, d9}, [r3, :64], r8 @ encoding: [0xd8,0x62,0x23,0xf4]
 
 
 	vld2.8	{d16, d17}, [r0, :64]
@@ -27,63 +118,154 @@
 	vld2.16	{d16, d17, d18, d19}, [r0, :128]
 	vld2.32	{d16, d17, d18, d19}, [r0, :256]
 
+	vld2.8	{d19, d20}, [r0, :64]!
+	vld2.16	{d16, d17}, [r0, :128]!
+	vld2.32	{q10}, [r0]!
+	vld2.8	{d4-d7}, [r0, :64]!
+	vld2.16	{d1, d2, d3, d4}, [r0, :128]!
+	vld2.32	{q7, q8}, [r0, :256]!
+
+	vld2.8	{d19, d20}, [r0, :64], r6
+	vld2.16	{d16, d17}, [r0, :128], r6
+	vld2.32	{q10}, [r0], r6
+	vld2.8	{d4-d7}, [r0, :64], r6
+	vld2.16	{d1, d2, d3, d4}, [r0, :128], r6
+	vld2.32	{q7, q8}, [r0, :256], r6
+
 @ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf4]
 @ CHECK: vld2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x60,0xf4]
-@ CHECK: vld2.32 {d16, d17}, [r0]@ encoding: [0x8f,0x08,0x60,0xf4]
-@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64]@ encoding: [0x1f,0x03,0x60,0xf4]
+@ CHECK: vld2.32 {d16, d17}, [r0] @ encoding: [0x8f,0x08,0x60,0xf4]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf4]
 @ CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf4]
 @ CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf4]
 
+@ CHECK: vld2.8	{d19, d20}, [r0, :64]!  @ encoding: [0x1d,0x38,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0, :128]! @ encoding: [0x6d,0x08,0x60,0xf4]
+@ CHECK: vld2.32 {d20, d21}, [r0]!       @ encoding: [0x8d,0x48,0x60,0xf4]
+@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0, :64]! @ encoding: [0x1d,0x43,0x20,0xf4]
+@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128]! @ encoding: [0x6d,0x13,0x20,0xf4]
+@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256]! @ encoding: [0xbd,0xe3,0x20,0xf4]
+
+@ CHECK: vld2.8	{d19, d20}, [r0, :64], r6 @ encoding: [0x16,0x38,0x60,0xf4]
+@ CHECK: vld2.16 {d16, d17}, [r0, :128], r6 @ encoding: [0x66,0x08,0x60,0xf4]
+@ CHECK: vld2.32 {d20, d21}, [r0], r6    @ encoding: [0x86,0x48,0x60,0xf4]
+@ CHECK: vld2.8	{d4, d5, d6, d7}, [r0, :64], r6 @ encoding: [0x16,0x43,0x20,0xf4]
+@ CHECK: vld2.16 {d1, d2, d3, d4}, [r0, :128], r6 @ encoding: [0x66,0x13,0x20,0xf4]
+@ CHECK: vld2.32 {d14, d15, d16, d17}, [r0, :256], r6 @ encoding: [0xb6,0xe3,0x20,0xf4]
+
+
+	vld3.8 {d16, d17, d18}, [r1]
+	vld3.16 {d6, d7, d8}, [r2]
+	vld3.32 {d1, d2, d3}, [r3]
+	vld3.8 {d16, d18, d20}, [r0, :64]
+	vld3.u16 {d27, d29, d31}, [r4]
+	vld3.i32 {d6, d8, d10}, [r5]
 
-	vld3.8	{d16, d17, d18}, [r0, :64]
-	vld3.16	{d16, d17, d18}, [r0]
-	vld3.32	{d16, d17, d18}, [r0]
-	vld3.8	{d16, d18, d20}, [r0, :64]!
-	vld3.8	{d17, d19, d21}, [r0, :64]!
-	vld3.16	{d16, d18, d20}, [r0]!
-	vld3.16	{d17, d19, d21}, [r0]!
-	vld3.32	{d16, d18, d20}, [r0]!
-	vld3.32	{d17, d19, d21}, [r0]!
+	vld3.i8 {d12, d13, d14}, [r6], r1
+	vld3.i16 {d11, d12, d13}, [r7], r2
+	vld3.u32 {d2, d3, d4}, [r8], r3
+	vld3.8 {d4, d6, d8}, [r9], r4
+	vld3.u16 {d14, d16, d18}, [r9], r4
+	vld3.i32 {d16, d18, d20}, [r10], r5
 
-@ CHECK: vld3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf4]
-@ CHECK: vld3.16 {d16, d17, d18}, [r0]  @ encoding: [0x4f,0x04,0x60,0xf4]
-@ CHECK: vld3.32 {d16, d17, d18}, [r0]  @ encoding: [0x8f,0x04,0x60,0xf4]
+	vld3.p8 {d6, d7, d8}, [r8]!
+	vld3.16 {d9, d10, d11}, [r7]!
+	vld3.f32 {d1, d2, d3}, [r6]!
+	vld3.8 {d16, d18, d20}, [r0, :64]!
+	vld3.p16 {d20, d22, d24}, [r5]!
+	vld3.32 {d5, d7, d9}, [r4]!
+
+
+@ CHECK: vld3.8	{d16, d17, d18}, [r1]   @ encoding: [0x0f,0x04,0x61,0xf4]
+@ CHECK: vld3.16	{d6, d7, d8}, [r2]      @ encoding: [0x4f,0x64,0x22,0xf4]
+@ CHECK: vld3.32	{d1, d2, d3}, [r3]      @ encoding: [0x8f,0x14,0x23,0xf4]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x60,0xf4]
+@ CHECK: vld3.16	{d27, d29, d31}, [r4]   @ encoding: [0x4f,0xb5,0x64,0xf4]
+@ CHECK: vld3.32	{d6, d8, d10}, [r5]     @ encoding: [0x8f,0x65,0x25,0xf4]
+@ CHECK: vld3.8	{d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x26,0xf4]
+@ CHECK: vld3.16	{d11, d12, d13}, [r7], r2 @ encoding: [0x42,0xb4,0x27,0xf4]
+@ CHECK: vld3.32	{d2, d3, d4}, [r8], r3  @ encoding: [0x83,0x24,0x28,0xf4]
+@ CHECK: vld3.8	{d4, d6, d8}, [r9], r4  @ encoding: [0x04,0x45,0x29,0xf4]
+@ CHECK: vld3.16	{d14, d16, d18}, [r9], r4 @ encoding: [0x44,0xe5,0x29,0xf4]
+@ CHECK: vld3.32	{d16, d18, d20}, [r10], r5 @ encoding: [0x85,0x05,0x6a,0xf4]
+@ CHECK: vld3.8	{d6, d7, d8}, [r8]!     @ encoding: [0x0d,0x64,0x28,0xf4]
+@ CHECK: vld3.16	{d9, d10, d11}, [r7]!   @ encoding: [0x4d,0x94,0x27,0xf4]
+@ CHECK: vld3.32	{d1, d2, d3}, [r6]!     @ encoding: [0x8d,0x14,0x26,0xf4]
 @ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4]
-@ CHECK: vld3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf4]
-@ CHECK: vld3.16 {d16, d18, d20}, [r0]! @ encoding: [0x4d,0x05,0x60,0xf4]
-@ CHECK: vld3.16 {d17, d19, d21}, [r0]! @ encoding: [0x4d,0x15,0x60,0xf4]
-@ CHECK: vld3.32 {d16, d18, d20}, [r0]! @ encoding: [0x8d,0x05,0x60,0xf4]
-@ CHECK: vld3.32 {d17, d19, d21}, [r0]! @ encoding: [0x8d,0x15,0x60,0xf4]
-
-
-	vld4.8	{d16, d17, d18, d19}, [r0, :64]
-	vld4.16	{d16, d17, d18, d19}, [r0, :128]
-	vld4.32	{d16, d17, d18, d19}, [r0, :256]
-	vld4.8	{d16, d18, d20, d22}, [r0, :256]!
-	vld4.8	{d17, d19, d21, d23}, [r0, :256]!
-	vld4.16	{d16, d18, d20, d22}, [r0]!
-	vld4.16	{d17, d19, d21, d23}, [r0]!
-	vld4.32	{d16, d18, d20, d22}, [r0]!
-	vld4.32	{d17, d19, d21, d23}, [r0]!
-
-@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64]@ encoding: [0x1f,0x00,0x60,0xf4]
-@ CHECK: vld4.16 {d16, d17, d18, d19}, [r0,:128]@ encoding:[0x6f,0x00,0x60,0xf4]
-@ CHECK: vld4.32 {d16, d17, d18, d19}, [r0,:256]@ encoding:[0xbf,0x00,0x60,0xf4]
-@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0,:256]!@ encoding:[0x3d,0x01,0x60,0xf4]
-@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0,:256]!@ encoding:[0x3d,0x11,0x60,0xf4]
-@ CHECK: vld4.16 {d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf4]
-@ CHECK: vld4.16 {d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf4]
-@ CHECK: vld4.32 {d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf4]
-@ CHECK: vld4.32 {d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf4]
+@ CHECK: vld3.16	{d20, d22, d24}, [r5]!  @ encoding: [0x4d,0x45,0x65,0xf4]
+@ CHECK: vld3.32	{d5, d7, d9}, [r4]!     @ encoding: [0x8d,0x55,0x24,0xf4]
+
+
+	vld4.8 {d16, d17, d18, d19}, [r1, :64]
+	vld4.16 {d16, d17, d18, d19}, [r2, :128]
+	vld4.32 {d16, d17, d18, d19}, [r3, :256]
+	vld4.8 {d17, d19, d21, d23}, [r5, :256]
+	vld4.16 {d17, d19, d21, d23}, [r7]
+	vld4.32 {d16, d18, d20, d22}, [r8]
+
+	vld4.s8 {d16, d17, d18, d19}, [r1, :64]!
+	vld4.s16 {d16, d17, d18, d19}, [r2, :128]!
+	vld4.s32 {d16, d17, d18, d19}, [r3, :256]!
+	vld4.u8 {d17, d19, d21, d23}, [r5, :256]!
+	vld4.u16 {d17, d19, d21, d23}, [r7]!
+	vld4.u32 {d16, d18, d20, d22}, [r8]!
+
+	vld4.p8 {d16, d17, d18, d19}, [r1, :64], r8
+	vld4.p16 {d16, d17, d18, d19}, [r2], r7
+	vld4.f32 {d16, d17, d18, d19}, [r3, :64], r5
+	vld4.i8 {d16, d18, d20, d22}, [r4, :256], r2
+	vld4.i16 {d16, d18, d20, d22}, [r6], r3
+	vld4.i32 {d17, d19, d21, d23}, [r9], r4
+
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x65,0xf4]
+@ CHECK: vld4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x67,0xf4]
+@ CHECK: vld4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x68,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x65,0xf4]
+@ CHECK: vld4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x67,0xf4]
+@ CHECK: vld4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x68,0xf4]
+@ CHECK: vld4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x61,0xf4]
+@ CHECK: vld4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x62,0xf4]
+@ CHECK: vld4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x63,0xf4]
+@ CHECK: vld4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x64,0xf4]
+@ CHECK: vld4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x66,0xf4]
+@ CHECK: vld4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x69,0xf4]
+
+
+	vld1.8 {d4[]}, [r1]
+	vld1.8 {d4[]}, [r1]!
+	vld1.8 {d4[]}, [r1], r3
+	vld1.8 {d4[], d5[]}, [r1]
+	vld1.8 {d4[], d5[]}, [r1]!
+	vld1.8 {d4[], d5[]}, [r1], r3
 
+@ CHECK: vld1.8	{d4[]}, [r1]            @ encoding: [0x0f,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[]}, [r1]!           @ encoding: [0x0d,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[]}, [r1], r3        @ encoding: [0x03,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[], d5[]}, [r1]      @ encoding: [0x2f,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[], d5[]}, [r1]!     @ encoding: [0x2d,0x4c,0xa1,0xf4]
+@ CHECK: vld1.8	{d4[], d5[]}, [r1], r3  @ encoding: [0x23,0x4c,0xa1,0xf4]
 
 	vld1.8	{d16[3]}, [r0]
 	vld1.16	{d16[2]}, [r0, :16]
 	vld1.32	{d16[1]}, [r0, :32]
+        vld1.p8 d12[6], [r2]!
+        vld1.i8 d12[6], [r2], r2
+        vld1.u16 d12[3], [r2]!
+        vld1.16 d12[2], [r2], r2
 
 @ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf4]
 @ CHECK: vld1.16 {d16[2]}, [r0, :16]    @ encoding: [0x9f,0x04,0xe0,0xf4]
 @ CHECK: vld1.32 {d16[1]}, [r0, :32]    @ encoding: [0xbf,0x08,0xe0,0xf4]
+@ CHECK: vld1.8	{d12[6]}, [r2]!         @ encoding: [0xcd,0xc0,0xa2,0xf4]
+@ CHECK: vld1.8	{d12[6]}, [r2], r2      @ encoding: [0xc2,0xc0,0xa2,0xf4]
+@ CHECK: vld1.16 {d12[3]}, [r2]!        @ encoding: [0xcd,0xc4,0xa2,0xf4]
+@ CHECK: vld1.16 {d12[2]}, [r2], r2     @ encoding: [0x82,0xc4,0xa2,0xf4]
 
 
 	vld2.8	{d16[1], d17[1]}, [r0, :16]
@@ -91,35 +273,225 @@
 	vld2.32	{d16[1], d17[1]}, [r0]
 	vld2.16	{d17[1], d19[1]}, [r0]
 	vld2.32	{d17[0], d19[0]}, [r0, :64]
+	vld2.32	{d17[0], d19[0]}, [r0, :64]!
+        vld2.8 {d2[4], d3[4]}, [r2], r3
+        vld2.8 {d2[4], d3[4]}, [r2]!
+        vld2.8 {d2[4], d3[4]}, [r2]
+        vld2.32 {d22[], d23[]}, [r1]
+        vld2.32 {d22[], d24[]}, [r1]
+        vld2.32 {d10[ ],d11[ ]}, [r3]!
+        vld2.32 {d14[ ],d16[ ]}, [r4]!
+        vld2.32 {d22[ ],d23[ ]}, [r5], r4
+        vld2.32 {d22[ ],d24[ ]}, [r6], r4
 
 @ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4]
 @ CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4]
 @ CHECK: vld2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf4]
 @ CHECK: vld2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf4]
 @ CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf4]
+@ CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]! @ encoding: [0x5d,0x19,0xe0,0xf4]
+@ CHECK: vld2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0xa2,0xf4]
+@ CHECK: vld2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0xa2,0xf4]
+@ CHECK: vld2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0xa2,0xf4]
+@ CHECK: vld2.32 {d22[], d23[]}, [r1]    @ encoding: [0x8f,0x6d,0xe1,0xf4]
+@ CHECK: vld2.32 {d22[], d24[]}, [r1]    @ encoding: [0xaf,0x6d,0xe1,0xf4]
+@ CHECK: vld2.32 {d10[], d11[]}, [r3]!   @ encoding: [0x8d,0xad,0xa3,0xf4]
+@ CHECK: vld2.32 {d14[], d16[]}, [r4]!   @ encoding: [0xad,0xed,0xa4,0xf4]
+@ CHECK: vld2.32 {d22[], d23[]}, [r5], r4 @ encoding: [0x84,0x6d,0xe5,0xf4]
+@ CHECK: vld2.32 {d22[], d24[]}, [r6], r4 @ encoding: [0xa4,0x6d,0xe6,0xf4]
+
+
+	vld3.8 {d16[1], d17[1], d18[1]}, [r1]
+	vld3.16 {d6[1], d7[1], d8[1]}, [r2]
+	vld3.32 {d1[1], d2[1], d3[1]}, [r3]
+	vld3.u16 {d27[2], d29[2], d31[2]}, [r4]
+	vld3.i32 {d6[0], d8[0], d10[0]}, [r5]
+
+	vld3.i8 {d12[3], d13[3], d14[3]}, [r6], r1
+	vld3.i16 {d11[2], d12[2], d13[2]}, [r7], r2
+	vld3.u32 {d2[1], d3[1], d4[1]}, [r8], r3
+	vld3.u16 {d14[2], d16[2], d18[2]}, [r9], r4
+	vld3.i32 {d16[0], d18[0], d20[0]}, [r10], r5
+
+	vld3.p8 {d6[6], d7[6], d8[6]}, [r8]!
+	vld3.16 {d9[2], d10[2], d11[2]}, [r7]!
+	vld3.f32 {d1[1], d2[1], d3[1]}, [r6]!
+	vld3.p16 {d20[2], d22[2], d24[2]}, [r5]!
+	vld3.32 {d5[0], d7[0], d9[0]}, [r4]!
+
+@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r1] @ encoding: [0x2f,0x02,0xe1,0xf4]
+@ CHECK: vld3.16 {d6[1], d7[1], d8[1]}, [r2] @ encoding: [0x4f,0x66,0xa2,0xf4]
+@ CHECK: vld3.32 {d1[1], d2[1], d3[1]}, [r3] @ encoding: [0x8f,0x1a,0xa3,0xf4]
+@ CHECK: vld3.16 {d27[2], d29[2], d31[2]}, [r4] @ encoding: [0xaf,0xb6,0xe4,0xf4]
+@ CHECK: vld3.32 {d6[0], d8[0], d10[0]}, [r5] @ encoding: [0x4f,0x6a,0xa5,0xf4]
+@ CHECK: vld3.8	{d12[3], d13[3], d14[3]}, [r6], r1 @ encoding: [0x61,0xc2,0xa6,0xf4]
+@ CHECK: vld3.16 {d11[2], d12[2], d13[2]}, [r7], r2 @ encoding: [0x82,0xb6,0xa7,0xf4]
+@ CHECK: vld3.32 {d2[1], d3[1], d4[1]}, [r8], r3 @ encoding: [0x83,0x2a,0xa8,0xf4]
+@ CHECK: vld3.16 {d14[2], d16[2], d18[2]}, [r9], r4 @ encoding: [0xa4,0xe6,0xa9,0xf4]
+@ CHECK: vld3.32 {d16[0], d18[0], d20[0]}, [r10], r5 @ encoding: [0x45,0x0a,0xea,0xf4]
+@ CHECK: vld3.8	{d6[6], d7[6], d8[6]}, [r8]! @ encoding: [0xcd,0x62,0xa8,0xf4]
+@ CHECK: vld3.16 {d9[2], d10[2], d11[2]}, [r7]! @ encoding: [0x8d,0x96,0xa7,0xf4]
+@ CHECK: vld3.32 {d1[1], d2[1], d3[1]}, [r6]! @ encoding: [0x8d,0x1a,0xa6,0xf4]
+@ CHECK: vld3.16 {d20[2], d21[2], d22[2]}, [r5]! @ encoding: [0xad,0x46,0xe5,0xf4]
+@ CHECK: vld3.32 {d5[0], d7[0], d9[0]}, [r4]! @ encoding: [0x4d,0x5a,0xa4,0xf4]
+
+
+	vld3.8 {d16[], d17[], d18[]}, [r1]
+	vld3.16 {d16[], d17[], d18[]}, [r2]
+	vld3.32 {d16[], d17[], d18[]}, [r3]
+	vld3.8 {d17[], d19[], d21[]}, [r7]
+	vld3.16 {d17[], d19[], d21[]}, [r7]
+	vld3.32 {d16[], d18[], d20[]}, [r8]
+
+	vld3.s8 {d16[], d17[], d18[]}, [r1]!
+	vld3.s16 {d16[], d17[], d18[]}, [r2]!
+	vld3.s32 {d16[], d17[], d18[]}, [r3]!
+	vld3.u8 {d17[], d19[], d21[]}, [r7]!
+	vld3.u16 {d17[], d19[], d21[]}, [r7]!
+	vld3.u32 {d16[], d18[], d20[]}, [r8]!
+
+	vld3.p8 {d16[], d17[], d18[]}, [r1], r8
+	vld3.p16 {d16[], d17[], d18[]}, [r2], r7
+	vld3.f32 {d16[], d17[], d18[]}, [r3], r5
+	vld3.i8 {d16[], d18[], d20[]}, [r6], r3
+	vld3.i16 {d16[], d18[], d20[]}, [r6], r3
+	vld3.i32 {d17[], d19[], d21[]}, [r9], r4
+
+@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1] @ encoding: [0x0f,0x0e,0xe1,0xf4]
+@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2] @ encoding: [0x4f,0x0e,0xe2,0xf4]
+@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3] @ encoding: [0x8f,0x0e,0xe3,0xf4]
+@ CHECK: vld3.8 {d17[], d19[], d21[]}, [r7] @ encoding: [0x2f,0x1e,0xe7,0xf4]
+@ CHECK: vld3.16 {d17[], d19[], d21[]}, [r7] @ encoding: [0x6f,0x1e,0xe7,0xf4]
+@ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8] @ encoding: [0xaf,0x0e,0xe8,0xf4]
+@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1]! @ encoding: [0x0d,0x0e,0xe1,0xf4]
+@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2]! @ encoding: [0x4d,0x0e,0xe2,0xf4]
+@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3]! @ encoding: [0x8d,0x0e,0xe3,0xf4]
+@ CHECK: vld3.8 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x2d,0x1e,0xe7,0xf4]
+@ CHECK: vld3.16 {d17[], d18[], d19[]}, [r7]! @ encoding: [0x6d,0x1e,0xe7,0xf4]
+@ CHECK: vld3.32 {d16[], d18[], d20[]}, [r8]! @ encoding: [0xad,0x0e,0xe8,0xf4]
+@ CHECK: vld3.8 {d16[], d17[], d18[]}, [r1], r8 @ encoding: [0x08,0x0e,0xe1,0xf4]
+@ CHECK: vld3.16 {d16[], d17[], d18[]}, [r2], r7 @ encoding: [0x47,0x0e,0xe2,0xf4]
+@ CHECK: vld3.32 {d16[], d17[], d18[]}, [r3], r5 @ encoding: [0x85,0x0e,0xe3,0xf4]
+@ CHECK: vld3.8 {d16[], d18[], d20[]}, [r6], r3 @ encoding: [0x23,0x0e,0xe6,0xf4]
+@ CHECK: vld3.16 {d16[], d18[], d20[]}, [r6], r3 @ encoding: [0x63,0x0e,0xe6,0xf4]
+@ CHECK: vld3.32 {d17[], d19[], d21[]}, [r9], r4 @ encoding: [0xa4,0x1e,0xe9,0xf4]
+
+
+	vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1]
+	vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2]
+	vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3]
+	vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7]
+	vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]
+
+	vld4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	vld4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]!
+	vld4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]!
+	vld4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]!
+	vld4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]!
+
+	vld4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8
+	vld4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7
+	vld4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5
+	vld4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3
+	vld4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4
+
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1] @ encoding: [0x2f,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] @ encoding: [0x4f,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xe3,0xf4]
+@ CHECK: vld4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xe8,0xf4]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xe3,0xf4]
+@ CHECK: vld4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xe8,0xf4]
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xe3,0xf4]
+@ CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xe6,0xf4]
+@ CHECK: vld4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xe9,0xf4]
+
+
+	vld4.8 {d16[], d17[], d18[], d19[]}, [r1]
+	vld4.16 {d16[], d17[], d18[], d19[]}, [r2]
+	vld4.32 {d16[], d17[], d18[], d19[]}, [r3]
+	vld4.8 {d17[], d19[], d21[], d23[]}, [r7]
+	vld4.16 {d17[], d19[], d21[], d23[]}, [r7]
+	vld4.32 {d16[], d18[], d20[], d22[]}, [r8]
+
+	vld4.s8 {d16[], d17[], d18[], d19[]}, [r1]!
+	vld4.s16 {d16[], d17[], d18[], d19[]}, [r2]!
+	vld4.s32 {d16[], d17[], d18[], d19[]}, [r3]!
+	vld4.u8 {d17[], d19[], d21[], d23[]}, [r7]!
+	vld4.u16 {d17[], d19[], d21[], d23[]}, [r7]!
+	vld4.u32 {d16[], d18[], d20[], d22[]}, [r8]!
+
+	vld4.p8 {d16[], d17[], d18[], d19[]}, [r1], r8
+	vld4.p16 {d16[], d17[], d18[], d19[]}, [r2], r7
+	vld4.f32 {d16[], d17[], d18[], d19[]}, [r3], r5
+	vld4.i8 {d16[], d18[], d20[], d22[]}, [r6], r3
+	vld4.i16 {d16[], d18[], d20[], d22[]}, [r6], r3
+	vld4.i32 {d17[], d19[], d21[], d23[]}, [r9], r4
+
+@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1] @ encoding: [0x0f,0x0f,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2] @ encoding: [0x4f,0x0f,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3] @ encoding: [0x8f,0x0f,0xe3,0xf4]
+@ CHECK: vld4.8 {d17[], d19[], d21[], d23[]}, [r7] @ encoding: [0x2f,0x1f,0xe7,0xf4]
+@ CHECK: vld4.16 {d17[], d19[], d21[], d23[]}, [r7] @ encoding: [0x6f,0x1f,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r8] @ encoding: [0xaf,0x0f,0xe8,0xf4]
+@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1]! @ encoding: [0x0d,0x0f,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2]! @ encoding: [0x4d,0x0f,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3]! @ encoding: [0x8d,0x0f,0xe3,0xf4]
+@ CHECK: vld4.8 {d17[], d18[], d19[], d20[]}, [r7]! @ encoding: [0x2d,0x1f,0xe7,0xf4]
+@ CHECK: vld4.16 {d17[], d18[], d19[], d20[]}, [r7]! @ encoding: [0x6d,0x1f,0xe7,0xf4]
+@ CHECK: vld4.32 {d16[], d18[], d20[], d22[]}, [r8]! @ encoding: [0xad,0x0f,0xe8,0xf4]
+@ CHECK: vld4.8 {d16[], d17[], d18[], d19[]}, [r1], r8 @ encoding: [0x08,0x0f,0xe1,0xf4]
+@ CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r2], r7 @ encoding: [0x47,0x0f,0xe2,0xf4]
+@ CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r3], r5 @ encoding: [0x85,0x0f,0xe3,0xf4]
+@ CHECK: vld4.8 {d16[], d18[], d20[], d22[]}, [r6], r3 @ encoding: [0x23,0x0f,0xe6,0xf4]
+@ CHECK: vld4.16 {d16[], d18[], d20[], d22[]}, [r6], r3 @ encoding: [0x63,0x0f,0xe6,0xf4]
+@ CHECK: vld4.32 {d17[], d19[], d21[], d23[]}, [r9], r4 @ encoding: [0xa4,0x1f,0xe9,0xf4]
+
+@ Handle 'Q' registers in register lists as if the sub-reg D regs were
+@ specified instead.
+	vld1.8 {q3}, [r9]
+	vld1.8 {q3, q4}, [r9]
+
+@ CHECK: vld1.8	{d6, d7}, [r9]          @ encoding: [0x0f,0x6a,0x29,0xf4]
+@ CHECK: vld1.8	{d6, d7, d8, d9}, [r9]  @ encoding: [0x0f,0x62,0x29,0xf4]
+
+
+@ Spot-check additional size-suffix aliases.
+        vld1.8 {d2}, [r2]
+        vld1.p8 {d2}, [r2]
+        vld1.u8 {d2}, [r2]
+
+        vld1.8 {q2}, [r2]
+        vld1.p8 {q2}, [r2]
+        vld1.u8 {q2}, [r2]
+        vld1.f32 {q2}, [r2]
+
+        vld1.u8 {d2, d3, d4}, [r2]
+        vld1.i32 {d2, d3, d4}, [r2]
+        vld1.f64 {d2, d3, d4}, [r2]
 
+@ CHECK: vld1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x22,0xf4]
+@ CHECK: vld1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x22,0xf4]
+@ CHECK: vld1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x22,0xf4]
 
-	vld3.8	{d16[1], d17[1], d18[1]}, [r0]
-	vld3.16	{d16[1], d17[1], d18[1]}, [r0]
-	vld3.32	{d16[1], d17[1], d18[1]}, [r0]
-	vld3.16	{d16[1], d18[1], d20[1]}, [r0]
-	vld3.32	{d17[1], d19[1], d21[1]}, [r0]
+@ CHECK: vld1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x22,0xf4]
+@ CHECK: vld1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x22,0xf4]
+@ CHECK: vld1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x22,0xf4]
+@ CHECK: vld1.32 {d4, d5}, [r2]         @ encoding: [0x8f,0x4a,0x22,0xf4]
 
-@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf4]
-@ CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x4f,0x06,0xe0,0xf4]
-@ CHECK: vld3.32 {d16[1], d17[1], d18[1]}, [r0]@ encoding: [0x8f,0x0a,0xe0,0xf4]
-@ CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]@ encoding: [0x6f,0x06,0xe0,0xf4]
-@ CHECK: vld3.32 {d17[1], d19[1], d21[1]}, [r0]@ encoding: [0xcf,0x1a,0xe0,0xf4]
+@ CHECK: vld1.8	{d2, d3, d4}, [r2]      @ encoding: [0x0f,0x26,0x22,0xf4]
+@ CHECK: vld1.32 {d2, d3, d4}, [r2]     @ encoding: [0x8f,0x26,0x22,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4}, [r2]     @ encoding: [0xcf,0x26,0x22,0xf4]
 
 
-	vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
-	vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-	vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
-	vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
-	vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
+@ Register lists can use the range syntax, just like VLDM
+	vld1.f64 {d2-d5}, [r2,:128]!
+	vld1.f64 {d2,d3,d4,d5}, [r2,:128]!
 
-@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf4]
-@ CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf4]
-@ CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf4]
-@ CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf4]
-@ CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2, :128]! @ encoding: [0xed,0x22,0x22,0xf4]
+@ CHECK: vld1.64 {d2, d3, d4, d5}, [r2, :128]! @ encoding: [0xed,0x22,0x22,0xf4]
diff --git a/test/MC/ARM/neon-vst-encoding.s b/test/MC/ARM/neon-vst-encoding.s
index c595aa2d5a4d..f5feca4c8c12 100644
--- a/test/MC/ARM/neon-vst-encoding.s
+++ b/test/MC/ARM/neon-vst-encoding.s
@@ -1,101 +1,278 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
-@ XFAIL: *
+
+	vst1.8	{d16}, [r0, :64]
+	vst1.16	{d16}, [r0]
+	vst1.32	{d16}, [r0]
+	vst1.64	{d16}, [r0]
+	vst1.8	{d16, d17}, [r0, :64]
+	vst1.16	{d16, d17}, [r0, :128]
+	vst1.32	{d16, d17}, [r0]
+	vst1.64	{d16, d17}, [r0]
+        vst1.8  {d16, d17, d18}, [r0, :64]
+        vst1.8  {d16, d17, d18}, [r0, :64]!
+        vst1.8  {d16, d17, d18}, [r0], r3
+        vst1.8  {d16, d17, d18, d19}, [r0, :64]
+        vst1.16  {d16, d17, d18, d19}, [r1, :64]!
+        vst1.64  {d16, d17, d18, d19}, [r3], r2
 
 @ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf4]
-  vst1.8	{d16}, [r0, :64]
-@ CHECK: vst1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x40,0xf4]
-  vst1.16	{d16}, [r0]
-@ CHECK: vst1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x40,0xf4]
-  vst1.32	{d16}, [r0]
-@ CHECK: vst1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x40,0xf4]
-  vst1.64	{d16}, [r0]
+@ CHECK: vst1.16 {d16}, [r0]            @ encoding: [0x4f,0x07,0x40,0xf4]
+@ CHECK: vst1.32 {d16}, [r0]            @ encoding: [0x8f,0x07,0x40,0xf4]
+@ CHECK: vst1.64 {d16}, [r0]            @ encoding: [0xcf,0x07,0x40,0xf4]
 @ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf4]
-  vst1.8	{d16, d17}, [r0, :64]
-@ CHECK: vst1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x40,0xf4]
-  vst1.16	{d16, d17}, [r0, :128]
-@ CHECK: vst1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x40,0xf4]
-  vst1.32	{d16, d17}, [r0]
-@ CHECK: vst1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x40,0xf4]
-  vst1.64	{d16, d17}, [r0]
+@ CHECK: vst1.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x0a,0x40,0xf4]
+@ CHECK: vst1.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x0a,0x40,0xf4]
+@ CHECK: vst1.64 {d16, d17}, [r0]       @ encoding: [0xcf,0x0a,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0, :64]! @ encoding: [0x1d,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18}, [r0], r3 @ encoding: [0x03,0x06,0x40,0xf4]
+@ CHECK: vst1.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x02,0x40,0xf4]
+@ CHECK: vst1.16 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x5d,0x02,0x41,0xf4]
+@ CHECK: vst1.64 {d16, d17, d18, d19}, [r3], r2 @ encoding: [0xc2,0x02,0x43,0xf4]
+
+
+	vst2.8	{d16, d17}, [r0, :64]
+	vst2.16	{d16, d17}, [r0, :128]
+	vst2.32	{d16, d17}, [r0]
+	vst2.8	{d16, d17, d18, d19}, [r0, :64]
+	vst2.16	{d16, d17, d18, d19}, [r0, :128]
+	vst2.32	{d16, d17, d18, d19}, [r0, :256]
+	vst2.8	{d16, d17}, [r0, :64]!
+	vst2.16	{q15}, [r0, :128]!
+	vst2.32	{d14, d15}, [r0]!
+	vst2.8	{d16, d17, d18, d19}, [r0, :64]!
+	vst2.16	{d18-d21}, [r0, :128]!
+	vst2.32	{q4, q5}, [r0, :256]!
 
 @ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf4]
-  vst2.8	{d16, d17}, [r0, :64]
-@ CHECK: vst2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x40,0xf4]
-  vst2.16	{d16, d17}, [r0, :128]
-@ CHECK: vst2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x40,0xf4]
-  vst2.32	{d16, d17}, [r0]
+@ CHECK: vst2.16 {d16, d17}, [r0, :128] @ encoding: [0x6f,0x08,0x40,0xf4]
+@ CHECK: vst2.32 {d16, d17}, [r0]       @ encoding: [0x8f,0x08,0x40,0xf4]
 @ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf4]
-  vst2.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf4]
-  vst2.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf4]
-  vst2.32	{d16, d17, d18, d19}, [r0, :256]
-
-@ CHECK: vst3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x40,0xf4]
-  vst3.8	{d16, d17, d18}, [r0, :64]
-@ CHECK: vst3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x40,0xf4]
-  vst3.16	{d16, d17, d18}, [r0]
-@ CHECK: vst3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x40,0xf4]
-  vst3.32	{d16, d17, d18}, [r0]
+@ CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf4]
+@ CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf4]
+@ CHECK: vst2.8	{d16, d17}, [r0, :64]!  @ encoding: [0x1d,0x08,0x40,0xf4]
+@ CHECK: vst2.16	{d30, d31}, [r0, :128]! @ encoding: [0x6d,0xe8,0x40,0xf4]
+@ CHECK: vst2.32	{d14, d15}, [r0]!       @ encoding: [0x8d,0xe8,0x00,0xf4]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64]! @ encoding: [0x1d,0x03,0x40,0xf4]
+@ CHECK: vst2.16	{d18, d19, d20, d21}, [r0, :128]! @ encoding: [0x6d,0x23,0x40,0xf4]
+@ CHECK: vst2.32	{d8, d9, d10, d11}, [r0, :256]! @ encoding: [0xbd,0x83,0x00,0xf4]
+
+
+	vst3.8 {d16, d17, d18}, [r1]
+	vst3.16 {d6, d7, d8}, [r2]
+	vst3.32 {d1, d2, d3}, [r3]
+	vst3.8 {d16, d18, d20}, [r0, :64]
+	vst3.u16 {d27, d29, d31}, [r4]
+	vst3.i32 {d6, d8, d10}, [r5]
+
+	vst3.i8 {d12, d13, d14}, [r6], r1
+	vst3.i16 {d11, d12, d13}, [r7], r2
+	vst3.u32 {d2, d3, d4}, [r8], r3
+	vst3.8 {d4, d6, d8}, [r9], r4
+	vst3.u16 {d14, d16, d18}, [r9], r4
+	vst3.i32 {d16, d18, d20}, [r10], r5
+
+	vst3.p8 {d6, d7, d8}, [r8]!
+	vst3.16 {d9, d10, d11}, [r7]!
+	vst3.f32 {d1, d2, d3}, [r6]!
+	vst3.8 {d16, d18, d20}, [r0, :64]!
+	vst3.p16 {d20, d22, d24}, [r5]!
+	vst3.32 {d5, d7, d9}, [r4]!
+
+@ CHECK: vst3.8	{d16, d17, d18}, [r1]   @ encoding: [0x0f,0x04,0x41,0xf4]
+@ CHECK: vst3.16	{d6, d7, d8}, [r2]      @ encoding: [0x4f,0x64,0x02,0xf4]
+@ CHECK: vst3.32	{d1, d2, d3}, [r3]      @ encoding: [0x8f,0x14,0x03,0xf4]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64] @ encoding: [0x1f,0x05,0x40,0xf4]
+@ CHECK: vst3.16	{d27, d29, d31}, [r4]   @ encoding: [0x4f,0xb5,0x44,0xf4]
+@ CHECK: vst3.32	{d6, d8, d10}, [r5]     @ encoding: [0x8f,0x65,0x05,0xf4]
+@ CHECK: vst3.8	{d12, d13, d14}, [r6], r1 @ encoding: [0x01,0xc4,0x06,0xf4]
+@ CHECK: vst3.16	{d11, d12, d13}, [r7], r2 @ encoding: [0x42,0xb4,0x07,0xf4]
+@ CHECK: vst3.32	{d2, d3, d4}, [r8], r3  @ encoding: [0x83,0x24,0x08,0xf4]
+@ CHECK: vst3.8	{d4, d6, d8}, [r9], r4  @ encoding: [0x04,0x45,0x09,0xf4]
+@ CHECK: vst3.16	{d14, d16, d18}, [r9], r4 @ encoding: [0x44,0xe5,0x09,0xf4]
+@ CHECK: vst3.32	{d16, d18, d20}, [r10], r5 @ encoding: [0x85,0x05,0x4a,0xf4]
+@ CHECK: vst3.8	{d6, d7, d8}, [r8]!     @ encoding: [0x0d,0x64,0x08,0xf4]
+@ CHECK: vst3.16	{d9, d10, d11}, [r7]!   @ encoding: [0x4d,0x94,0x07,0xf4]
+@ CHECK: vst3.32	{d1, d2, d3}, [r6]!     @ encoding: [0x8d,0x14,0x06,0xf4]
 @ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf4]
-  vst3.8	{d16, d18, d20}, [r0, :64]!
-@ CHECK: vst3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x40,0xf4]
-  vst3.8	{d17, d19, d21}, [r0, :64]!
-@ CHECK: vst3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x40,0xf4]
-  vst3.16	{d16, d18, d20}, [r0]!
-@ CHECK: vst3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x40,0xf4]
-  vst3.16	{d17, d19, d21}, [r0]!
-@ CHECK: vst3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x40,0xf4]
-  vst3.32	{d16, d18, d20}, [r0]!
-@ CHECK: vst3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x40,0xf4]
-  vst3.32	{d17, d19, d21}, [r0]!
-
-@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x40,0xf4]
-  vst4.8	{d16, d17, d18, d19}, [r0, :64]
-@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x40,0xf4]
-  vst4.16	{d16, d17, d18, d19}, [r0, :128]
-@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x40,0xf4]
-  vst4.8	{d16, d18, d20, d22}, [r0, :256]!
-@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x40,0xf4]
-  vst4.8	{d17, d19, d21, d23}, [r0, :256]!
-@ CHECK: vst4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf4]
-  vst4.16	{d16, d18, d20, d22}, [r0]!
-@ CHECK: vst4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf4]
-  vst4.16	{d17, d19, d21, d23}, [r0]!
-@ CHECK: vst4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x40,0xf4]
-  vst4.32	{d16, d18, d20, d22}, [r0]!
-@ CHECK: vst4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf4]
-  vst4.32	{d17, d19, d21, d23}, [r0]!
+@ CHECK: vst3.16	{d20, d22, d24}, [r5]!  @ encoding: [0x4d,0x45,0x45,0xf4]
+@ CHECK: vst3.32	{d5, d7, d9}, [r4]!     @ encoding: [0x8d,0x55,0x04,0xf4]
+
+
+	vst4.8 {d16, d17, d18, d19}, [r1, :64]
+	vst4.16 {d16, d17, d18, d19}, [r2, :128]
+	vst4.32 {d16, d17, d18, d19}, [r3, :256]
+	vst4.8 {d17, d19, d21, d23}, [r5, :256]
+	vst4.16 {d17, d19, d21, d23}, [r7]
+	vst4.32 {d16, d18, d20, d22}, [r8]
+
+	vst4.s8 {d16, d17, d18, d19}, [r1, :64]!
+	vst4.s16 {d16, d17, d18, d19}, [r2, :128]!
+	vst4.s32 {d16, d17, d18, d19}, [r3, :256]!
+	vst4.u8 {d17, d19, d21, d23}, [r5, :256]!
+	vst4.u16 {d17, d19, d21, d23}, [r7]!
+	vst4.u32 {d16, d18, d20, d22}, [r8]!
+
+	vst4.p8 {d16, d17, d18, d19}, [r1, :64], r8
+	vst4.p16 {d16, d17, d18, d19}, [r2], r7
+	vst4.f32 {d16, d17, d18, d19}, [r3, :64], r5
+	vst4.i8 {d16, d18, d20, d22}, [r4, :256], r2
+	vst4.i16 {d16, d18, d20, d22}, [r6], r3
+	vst4.i32 {d17, d19, d21, d23}, [r9], r4
+
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64] @ encoding: [0x1f,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128] @ encoding: [0x6f,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256] @ encoding: [0xbf,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256] @ encoding: [0x3f,0x11,0x45,0xf4]
+@ CHECK: vst4.16 {d17, d19, d21, d23}, [r7] @ encoding: [0x4f,0x11,0x47,0xf4]
+@ CHECK: vst4.32 {d16, d18, d20, d22}, [r8] @ encoding: [0x8f,0x01,0x48,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64]! @ encoding: [0x1d,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2, :128]! @ encoding: [0x6d,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :256]! @ encoding: [0xbd,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d17, d19, d21, d23}, [r5, :256]! @ encoding: [0x3d,0x11,0x45,0xf4]
+@ CHECK: vst4.16 {d17, d19, d21, d23}, [r7]! @ encoding: [0x4d,0x11,0x47,0xf4]
+@ CHECK: vst4.32 {d16, d18, d20, d22}, [r8]! @ encoding: [0x8d,0x01,0x48,0xf4]
+@ CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :64], r8 @ encoding: [0x18,0x00,0x41,0xf4]
+@ CHECK: vst4.16 {d16, d17, d18, d19}, [r2], r7 @ encoding: [0x47,0x00,0x42,0xf4]
+@ CHECK: vst4.32 {d16, d17, d18, d19}, [r3, :64], r5 @ encoding: [0x95,0x00,0x43,0xf4]
+@ CHECK: vst4.8 {d16, d18, d20, d22}, [r4, :256], r2 @ encoding: [0x32,0x01,0x44,0xf4]
+@ CHECK: vst4.16 {d16, d18, d20, d22}, [r6], r3 @ encoding: [0x43,0x01,0x46,0xf4]
+@ CHECK: vst4.32 {d17, d19, d21, d23}, [r9], r4 @ encoding: [0x84,0x11,0x49,0xf4]
+
+
+	vst2.8	{d16[1], d17[1]}, [r0, :16]
+	vst2.p16	{d16[1], d17[1]}, [r0, :32]
+	vst2.i32	{d16[1], d17[1]}, [r0]
+	vst2.u16	{d17[1], d19[1]}, [r0]
+	vst2.f32	{d17[0], d19[0]}, [r0, :64]
+
+        vst2.8 {d2[4], d3[4]}, [r2], r3
+        vst2.u8 {d2[4], d3[4]}, [r2]!
+        vst2.p8 {d2[4], d3[4]}, [r2]
+
+        vst2.16 {d17[1], d19[1]}, [r0]
+        vst2.32 {d17[0], d19[0]}, [r0, :64]
+        vst2.i16 {d7[1], d9[1]}, [r1]!
+        vst2.32 {d6[0], d8[0]}, [r2, :64]!
+        vst2.16 {d2[1], d4[1]}, [r3], r5
+        vst2.u32 {d5[0], d7[0]}, [r4, :64], r7
 
 @ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4]
-  vst2.8	{d16[1], d17[1]}, [r0, :16]
-@ CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
-  vst2.16	{d16[1], d17[1]}, [r0, :32]
-@ CHECK: vst2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
-  vst2.32	{d16[1], d17[1]}, [r0]
-@ CHECK: vst2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
-  vst2.16	{d17[1], d19[1]}, [r0]
-@ CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
-  vst2.32	{d17[0], d19[0]}, [r0, :64]
-
-@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf4]
-  vst3.8	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vst3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xc0,0xf4]
-  vst3.16	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vst3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xc0,0xf4]
-  vst3.32	{d16[1], d17[1], d18[1]}, [r0]
-@ CHECK: vst3.16	{d17[2], d19[2], d21[2]}, [r0] @ encoding: [0xaf,0x16,0xc0,0xf4]
-  vst3.16	{d17[2], d19[2], d21[2]}, [r0]
-@ CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0] @ encoding: [0x4f,0x0a,0xc0,0xf4]
-  vst3.32	{d16[0], d18[0], d20[0]}, [r0]
-
-@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf4]
-  vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
-@ CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf4]
-  vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
-@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf4]
-  vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
-@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf4]
-  vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
-@ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf4]
-  vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
+@ CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
+@ CHECK: vst2.32 {d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
+@ CHECK: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
+@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+
+@ CHECK: vst2.8	{d2[4], d3[4]}, [r2], r3 @ encoding: [0x83,0x21,0x82,0xf4]
+@ CHECK: vst2.8	{d2[4], d3[4]}, [r2]!   @ encoding: [0x8d,0x21,0x82,0xf4]
+@ CHECK: vst2.8	{d2[4], d3[4]}, [r2]    @ encoding: [0x8f,0x21,0x82,0xf4]
+
+@ CHECK: vst2.16 {d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
+@ CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+@ CHECK: vst2.16 {d7[1], d9[1]}, [r1]!   @ encoding: [0x6d,0x75,0x81,0xf4]
+@ CHECK: vst2.32 {d6[0], d8[0]}, [r2, :64]! @ encoding: [0x5d,0x69,0x82,0xf4]
+@ CHECK: vst2.16 {d2[1], d4[1]}, [r3], r5 @ encoding: [0x65,0x25,0x83,0xf4]
+@ CHECK: vst2.32 {d5[0], d7[0]}, [r4, :64], r7 @ encoding: [0x57,0x59,0x84,0xf4]
+
+
+	vst3.8 {d16[1], d17[1], d18[1]}, [r1]
+	vst3.16 {d6[1], d7[1], d8[1]}, [r2]
+	vst3.32 {d1[1], d2[1], d3[1]}, [r3]
+	vst3.u16 {d27[1], d29[1], d31[1]}, [r4]
+	vst3.i32 {d6[1], d8[1], d10[1]}, [r5]
+
+	vst3.i8 {d12[1], d13[1], d14[1]}, [r6], r1
+	vst3.i16 {d11[1], d12[1], d13[1]}, [r7], r2
+	vst3.u32 {d2[1], d3[1], d4[1]}, [r8], r3
+	vst3.u16 {d14[1], d16[1], d18[1]}, [r9], r4
+	vst3.i32 {d16[1], d18[1], d20[1]}, [r10], r5
+
+	vst3.p8 {d6[1], d7[1], d8[1]}, [r8]!
+	vst3.16 {d9[1], d10[1], d11[1]}, [r7]!
+	vst3.f32 {d1[1], d2[1], d3[1]}, [r6]!
+	vst3.p16 {d20[1], d22[1], d24[1]}, [r5]!
+	vst3.32 {d5[1], d7[1], d9[1]}, [r4]!
+
+@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r1] @ encoding: [0x2f,0x02,0xc1,0xf4]
+@ CHECK: vst3.16	{d6[1], d7[1], d8[1]}, [r2] @ encoding: [0x4f,0x66,0x82,0xf4]
+@ CHECK: vst3.32	{d1[1], d2[1], d3[1]}, [r3] @ encoding: [0x8f,0x1a,0x83,0xf4]
+@ CHECK: vst3.16	{d27[1], d29[1], d31[1]}, [r4] @ encoding: [0x6f,0xb6,0xc4,0xf4]
+@ CHECK: vst3.32	{d6[1], d8[1], d10[1]}, [r5] @ encoding: [0xcf,0x6a,0x85,0xf4]
+@ CHECK: vst3.8	{d12[1], d13[1], d14[1]}, [r6], r1 @ encoding: [0x21,0xc2,0x86,0xf4]
+@ CHECK: vst3.16	{d11[1], d12[1], d13[1]}, [r7], r2 @ encoding: [0x42,0xb6,0x87,0xf4]
+@ CHECK: vst3.32	{d2[1], d3[1], d4[1]}, [r8], r3 @ encoding: [0x83,0x2a,0x88,0xf4]
+@ CHECK: vst3.16	{d14[1], d16[1], d18[1]}, [r9], r4 @ encoding: [0x64,0xe6,0x89,0xf4]
+@ CHECK: vst3.32	{d16[1], d18[1], d20[1]}, [r10], r5 @ encoding: [0xc5,0x0a,0xca,0xf4]
+@ CHECK: vst3.8	{d6[1], d7[1], d8[1]}, [r8]! @ encoding: [0x2d,0x62,0x88,0xf4]
+@ CHECK: vst3.16	{d9[1], d10[1], d11[1]}, [r7]! @ encoding: [0x4d,0x96,0x87,0xf4]
+@ CHECK: vst3.32	{d1[1], d2[1], d3[1]}, [r6]! @ encoding: [0x8d,0x1a,0x86,0xf4]
+@ CHECK: vst3.16	{d20[1], d21[1], d22[1]}, [r5]! @ encoding: [0x6d,0x46,0xc5,0xf4]
+@ CHECK: vst3.32	{d5[1], d7[1], d9[1]}, [r4]! @ encoding: [0xcd,0x5a,0x84,0xf4]
+
+
+	vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1]
+	vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2]
+	vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3]
+	vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7]
+	vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]
+
+	vst4.s8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	vst4.s16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]!
+	vst4.s32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]!
+	vst4.u16 {d17[1], d19[1], d21[1], d23[1]}, [r7]!
+	vst4.u32 {d16[1], d18[1], d20[1], d22[1]}, [r8]!
+
+	vst4.p8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8
+	vst4.p16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7
+	vst4.f32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5
+	vst4.i16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3
+	vst4.i32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4
+
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1] @ encoding: [0x2f,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2] @ encoding: [0x4f,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3] @ encoding: [0x8f,0x0b,0xc3,0xf4]
+@ CHECK: vst4.16 {d17[1], d19[1], d21[1], d23[1]}, [r7] @ encoding: [0x6f,0x17,0xc7,0xf4]
+@ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8] @ encoding: [0xcf,0x0b,0xc8,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32]! @ encoding: [0x3d,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2, :64]! @ encoding: [0x5d,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :128]! @ encoding: [0xad,0x0b,0xc3,0xf4]
+@ CHECK: vst4.16 {d17[1], d18[1], d19[1], d20[1]}, [r7]! @ encoding: [0x6d,0x17,0xc7,0xf4]
+@ CHECK: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r8]! @ encoding: [0xcd,0x0b,0xc8,0xf4]
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r1, :32], r8 @ encoding: [0x38,0x03,0xc1,0xf4]
+@ CHECK: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r2], r7 @ encoding: [0x47,0x07,0xc2,0xf4]
+@ CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r3, :64], r5 @ encoding: [0x95,0x0b,0xc3,0xf4]
+@ CHECK: vst4.16 {d16[1], d18[1], d20[1], d22[1]}, [r6], r3 @ encoding: [0x63,0x07,0xc6,0xf4]
+@ CHECK: vst4.32 {d17[1], d19[1], d21[1], d23[1]}, [r9], r4 @ encoding: [0xc4,0x1b,0xc9,0xf4]
+
+
+@ Spot-check additional size-suffix aliases.
+
+        vst1.8 {d2}, [r2]
+        vst1.p8 {d2}, [r2]
+        vst1.u8 {d2}, [r2]
+
+        vst1.8 {q2}, [r2]
+        vst1.p8 {q2}, [r2]
+        vst1.u8 {q2}, [r2]
+        vst1.f32 {q2}, [r2]
+
+@ CHECK: vst1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x02,0xf4]
+@ CHECK: vst1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x02,0xf4]
+@ CHECK: vst1.8	{d2}, [r2]              @ encoding: [0x0f,0x27,0x02,0xf4]
+
+@ CHECK: vst1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x02,0xf4]
+@ CHECK: vst1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x02,0xf4]
+@ CHECK: vst1.8	{d4, d5}, [r2]          @ encoding: [0x0f,0x4a,0x02,0xf4]
+@ CHECK: vst1.32 {d4, d5}, [r2]         @ encoding: [0x8f,0x4a,0x02,0xf4]
+
+@ rdar://11082188
+        vst2.8 {d8, d10}, [r4]
+@ CHECK: vst2.8	{d8, d10}, [r4]         @ encoding: [0x0f,0x89,0x04,0xf4]
+
+        vst1.32 {d9[1]}, [r3, :32]
+        vst1.32 {d27[1]}, [r9, :32]!
+        vst1.32 {d27[1]}, [r3, :32], r5
+@ CHECK: vst1.32	{d9[1]}, [r3, :32]       @ encoding: [0xbf,0x98,0x83,0xf4]
+@ CHECK: vst1.32	{d27[1]}, [r9, :32]!     @ encoding: [0xbd,0xb8,0xc9,0xf4]
+@ CHECK: vst1.32	{d27[1]}, [r3, :32], r5  @ encoding: [0xb5,0xb8,0xc3,0xf4]
+
diff --git a/test/MC/ARM/neon-vswp.s b/test/MC/ARM/neon-vswp.s
new file mode 100644
index 000000000000..2138eedf4c25
--- /dev/null
+++ b/test/MC/ARM/neon-vswp.s
@@ -0,0 +1,7 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
+
+vswp d1, d2
+vswp q1, q2
+
+@ CHECK: vswp	d1, d2                  @ encoding: [0x02,0x10,0xb2,0xf3]
+@ CHECK: vswp	q1, q2                  @ encoding: [0x44,0x20,0xb2,0xf3]
diff --git a/test/MC/ARM/neont2-minmax-encoding.s b/test/MC/ARM/neont2-minmax-encoding.s
index 7e86d45bb14a..9ecadce8dce2 100644
--- a/test/MC/ARM/neont2-minmax-encoding.s
+++ b/test/MC/ARM/neont2-minmax-encoding.s
@@ -2,59 +2,125 @@
 
 .code 16
 
-@ CHECK: vmin.s8	d16, d16, d17           @ encoding: [0x40,0xef,0xb1,0x06]
-	vmin.s8	d16, d16, d17
-@ CHECK: vmin.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x06]
-	vmin.s16	d16, d16, d17
-@ CHECK: vmin.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x06]
-	vmin.s32	d16, d16, d17
-@ CHECK: vmin.u8	d16, d16, d17           @ encoding: [0x40,0xff,0xb1,0x06]
-	vmin.u8	d16, d16, d17
-@ CHECK: vmin.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xb1,0x06]
-	vmin.u16	d16, d16, d17
-@ CHECK: vmin.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xb1,0x06]
-	vmin.u32	d16, d16, d17
-@ CHECK: vmin.f32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0f]
-	vmin.f32	d16, d16, d17
-@ CHECK: vmin.s8	q8, q8, q9              @ encoding: [0x40,0xef,0xf2,0x06]
-	vmin.s8	q8, q8, q9
-@ CHECK: vmin.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x06]
-	vmin.s16	q8, q8, q9
-@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x06]
-	vmin.s32	q8, q8, q9
-@ CHECK: vmin.u8	q8, q8, q9              @ encoding: [0x40,0xff,0xf2,0x06]
-	vmin.u8	q8, q8, q9
-@ CHECK: vmin.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xf2,0x06]
-	vmin.u16	q8, q8, q9
-@ CHECK: vmin.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xf2,0x06]
-	vmin.u32	q8, q8, q9
-@ CHECK: vmin.f32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0f]
-	vmin.f32	q8, q8, q9
-@ CHECK: vmax.s8	d16, d16, d17           @ encoding: [0x40,0xef,0xa1,0x06]
-	vmax.s8	d16, d16, d17
-@ CHECK: vmax.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x06]
-	vmax.s16	d16, d16, d17
-@ CHECK: vmax.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x06]
-	vmax.s32	d16, d16, d17
-@ CHECK: vmax.u8	d16, d16, d17           @ encoding: [0x40,0xff,0xa1,0x06]
-	vmax.u8	d16, d16, d17
-@ CHECK: vmax.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x06]
-	vmax.u16	d16, d16, d17
-@ CHECK: vmax.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x06]
-	vmax.u32	d16, d16, d17
-@ CHECK: vmax.f32	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x0f]
-	vmax.f32	d16, d16, d17
-@ CHECK: vmax.s8	q8, q8, q9              @ encoding: [0x40,0xef,0xe2,0x06]
-	vmax.s8	q8, q8, q9
-@ CHECK: vmax.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x06]
-	vmax.s16	q8, q8, q9
+        vmax.s8 d1, d2, d3
+        vmax.s16 d4, d5, d6
+        vmax.s32 d7, d8, d9
+        vmax.u8 d10, d11, d12
+        vmax.u16 d13, d14, d15
+        vmax.u32 d16, d17, d18
+        vmax.f32 d19, d20, d21
+
+        vmax.s8 d2, d3
+        vmax.s16 d5, d6
+        vmax.s32 d8, d9
+        vmax.u8 d11, d12
+        vmax.u16 d14, d15
+        vmax.u32 d17, d18
+        vmax.f32 d20, d21
+
+        vmax.s8 q1, q2, q3
+        vmax.s16 q4, q5, q6
+        vmax.s32 q7, q8, q9
+        vmax.u8 q10, q11, q12
+        vmax.u16 q13, q14, q15
+        vmax.u32 q6, q7, q8
+        vmax.f32 q9, q5, q1
+
+        vmax.s8 q2, q3
+        vmax.s16 q5, q6
+        vmax.s32 q8, q9
+        vmax.u8 q11, q2
+        vmax.u16 q4, q5
+        vmax.u32 q7, q8
+        vmax.f32 q2, q1
+
+@ CHECK: vmax.s8	d1, d2, d3      @ encoding: [0x02,0xef,0x03,0x16]
+@ CHECK: vmax.s16	d4, d5, d6      @ encoding: [0x15,0xef,0x06,0x46]
+@ CHECK: vmax.s32	d7, d8, d9      @ encoding: [0x28,0xef,0x09,0x76]
+@ CHECK: vmax.u8	d10, d11, d12   @ encoding: [0x0b,0xff,0x0c,0xa6]
+@ CHECK: vmax.u16	d13, d14, d15   @ encoding: [0x1e,0xff,0x0f,0xd6]
+@ CHECK: vmax.u32	d16, d17, d18   @ encoding: [0x61,0xff,0xa2,0x06]
+@ CHECK: vmax.f32	d19, d20, d21   @ encoding: [0x44,0xef,0xa5,0x3f]
+@ CHECK: vmax.s8	d2, d2, d3      @ encoding: [0x02,0xef,0x03,0x26]
+@ CHECK: vmax.s16	d5, d5, d6      @ encoding: [0x15,0xef,0x06,0x56]
+@ CHECK: vmax.s32	d8, d8, d9      @ encoding: [0x28,0xef,0x09,0x86]
+@ CHECK: vmax.u8	d11, d11, d12   @ encoding: [0x0b,0xff,0x0c,0xb6]
+@ CHECK: vmax.u16	d14, d14, d15   @ encoding: [0x1e,0xff,0x0f,0xe6]
+@ CHECK: vmax.u32	d17, d17, d18   @ encoding: [0x61,0xff,0xa2,0x16]
+@ CHECK: vmax.f32	d20, d20, d21   @ encoding: [0x44,0xef,0xa5,0x4f]
+@ CHECK: vmax.s8	q1, q2, q3      @ encoding: [0x04,0xef,0x46,0x26]
+@ CHECK: vmax.s16	q4, q5, q6      @ encoding: [0x1a,0xef,0x4c,0x86]
+@ CHECK: vmax.s32	q7, q8, q9      @ encoding: [0x20,0xef,0xe2,0xe6]
+@ CHECK: vmax.u8	q10, q11, q12   @ encoding: [0x46,0xff,0xe8,0x46]
+@ CHECK: vmax.u16	q13, q14, q15   @ encoding: [0x5c,0xff,0xee,0xa6]
+@ CHECK: vmax.u32	q6, q7, q8      @ encoding: [0x2e,0xff,0x60,0xc6]
+@ CHECK: vmax.f32	q9, q5, q1      @ encoding: [0x4a,0xef,0x42,0x2f]
+@ CHECK: vmax.s8	q2, q2, q3      @ encoding: [0x04,0xef,0x46,0x46]
+@ CHECK: vmax.s16	q5, q5, q6      @ encoding: [0x1a,0xef,0x4c,0xa6]
 @ CHECK: vmax.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x06]
-	vmax.s32	q8, q8, q9
-@ CHECK: vmax.u8	q8, q8, q9              @ encoding: [0x40,0xff,0xe2,0x06]
-	vmax.u8	q8, q8, q9
-@ CHECK: vmax.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x06]
-	vmax.u16	q8, q8, q9
-@ CHECK: vmax.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x06]
-	vmax.u32	q8, q8, q9
-@ CHECK: vmax.f32	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x0f]
-	vmax.f32	q8, q8, q9
+@ CHECK: vmax.u8	q11, q11, q2    @ encoding: [0x46,0xff,0xc4,0x66]
+@ CHECK: vmax.u16	q4, q4, q5      @ encoding: [0x18,0xff,0x4a,0x86]
+@ CHECK: vmax.u32	q7, q7, q8      @ encoding: [0x2e,0xff,0x60,0xe6]
+@ CHECK: vmax.f32	q2, q2, q1      @ encoding: [0x04,0xef,0x42,0x4f]
+
+
+        vmin.s8 d1, d2, d3
+        vmin.s16 d4, d5, d6
+        vmin.s32 d7, d8, d9
+        vmin.u8 d10, d11, d12
+        vmin.u16 d13, d14, d15
+        vmin.u32 d16, d17, d18
+        vmin.f32 d19, d20, d21
+
+        vmin.s8 d2, d3
+        vmin.s16 d5, d6
+        vmin.s32 d8, d9
+        vmin.u8 d11, d12
+        vmin.u16 d14, d15
+        vmin.u32 d17, d18
+        vmin.f32 d20, d21
+
+        vmin.s8 q1, q2, q3
+        vmin.s16 q4, q5, q6
+        vmin.s32 q7, q8, q9
+        vmin.u8 q10, q11, q12
+        vmin.u16 q13, q14, q15
+        vmin.u32 q6, q7, q8
+        vmin.f32 q9, q5, q1
+
+        vmin.s8 q2, q3
+        vmin.s16 q5, q6
+        vmin.s32 q8, q9
+        vmin.u8 q11, q2
+        vmin.u16 q4, q5
+        vmin.u32 q7, q8
+        vmin.f32 q2, q1
+
+@ CHECK: vmin.s8	d1, d2, d3      @ encoding: [0x02,0xef,0x13,0x16]
+@ CHECK: vmin.s16	d4, d5, d6      @ encoding: [0x15,0xef,0x16,0x46]
+@ CHECK: vmin.s32	d7, d8, d9      @ encoding: [0x28,0xef,0x19,0x76]
+@ CHECK: vmin.u8	d10, d11, d12   @ encoding: [0x0b,0xff,0x1c,0xa6]
+@ CHECK: vmin.u16	d13, d14, d15   @ encoding: [0x1e,0xff,0x1f,0xd6]
+@ CHECK: vmin.u32	d16, d17, d18   @ encoding: [0x61,0xff,0xb2,0x06]
+@ CHECK: vmin.f32	d19, d20, d21   @ encoding: [0x64,0xef,0xa5,0x3f]
+@ CHECK: vmin.s8	d2, d2, d3      @ encoding: [0x02,0xef,0x13,0x26]
+@ CHECK: vmin.s16	d5, d5, d6      @ encoding: [0x15,0xef,0x16,0x56]
+@ CHECK: vmin.s32	d8, d8, d9      @ encoding: [0x28,0xef,0x19,0x86]
+@ CHECK: vmin.u8	d11, d11, d12   @ encoding: [0x0b,0xff,0x1c,0xb6]
+@ CHECK: vmin.u16	d14, d14, d15   @ encoding: [0x1e,0xff,0x1f,0xe6]
+@ CHECK: vmin.u32	d17, d17, d18   @ encoding: [0x61,0xff,0xb2,0x16]
+@ CHECK: vmin.f32	d20, d20, d21   @ encoding: [0x64,0xef,0xa5,0x4f]
+@ CHECK: vmin.s8	q1, q2, q3      @ encoding: [0x04,0xef,0x56,0x26]
+@ CHECK: vmin.s16	q4, q5, q6      @ encoding: [0x1a,0xef,0x5c,0x86]
+@ CHECK: vmin.s32	q7, q8, q9      @ encoding: [0x20,0xef,0xf2,0xe6]
+@ CHECK: vmin.u8	q10, q11, q12   @ encoding: [0x46,0xff,0xf8,0x46]
+@ CHECK: vmin.u16	q13, q14, q15   @ encoding: [0x5c,0xff,0xfe,0xa6]
+@ CHECK: vmin.u32	q6, q7, q8      @ encoding: [0x2e,0xff,0x70,0xc6]
+@ CHECK: vmin.f32	q9, q5, q1      @ encoding: [0x6a,0xef,0x42,0x2f]
+@ CHECK: vmin.s8	q2, q2, q3      @ encoding: [0x04,0xef,0x56,0x46]
+@ CHECK: vmin.s16	q5, q5, q6      @ encoding: [0x1a,0xef,0x5c,0xa6]
+@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x06]
+@ CHECK: vmin.u8	q11, q11, q2    @ encoding: [0x46,0xff,0xd4,0x66]
+@ CHECK: vmin.u16	q4, q4, q5      @ encoding: [0x18,0xff,0x5a,0x86]
+@ CHECK: vmin.u32	q7, q7, q8      @ encoding: [0x2e,0xff,0x70,0xe6]
+@ CHECK: vmin.f32	q2, q2, q1      @ encoding: [0x24,0xef,0x42,0x4f]
diff --git a/test/MC/ARM/neont2-mov-encoding.s b/test/MC/ARM/neont2-mov-encoding.s
index ababbb795729..43df3498cb5b 100644
--- a/test/MC/ARM/neont2-mov-encoding.s
+++ b/test/MC/ARM/neont2-mov-encoding.s
@@ -1,119 +1,131 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 .code 16
 
-@ CHECK: vmov.i8	d16, #0x8               @ encoding: [0x18,0x0e,0xc0,0xef]
 	vmov.i8	d16, #0x8
-@ CHECK: vmov.i16	d16, #0x10      @ encoding: [0x10,0x08,0xc1,0xef]
 	vmov.i16	d16, #0x10
-@ CHECK: vmov.i16	d16, #0x1000    @ encoding: [0x10,0x0a,0xc1,0xef]
 	vmov.i16	d16, #0x1000
-@ CHECK: vmov.i32	d16, #0x20      @ encoding: [0x10,0x00,0xc2,0xef]
 	vmov.i32	d16, #0x20
-@ CHECK: vmov.i32	d16, #0x2000    @ encoding: [0x10,0x02,0xc2,0xef]
 	vmov.i32	d16, #0x2000
-@ CHECK: vmov.i32	d16, #0x200000  @ encoding: [0x10,0x04,0xc2,0xef]
 	vmov.i32	d16, #0x200000
-@ CHECK: vmov.i32	d16, #0x20000000 @ encoding: [0x10,0x06,0xc2,0xef]
 	vmov.i32	d16, #0x20000000
-@ CHECK: vmov.i32	d16, #0x20FF    @ encoding: [0x10,0x0c,0xc2,0xef]
 	vmov.i32	d16, #0x20FF
-@ CHECK: vmov.i32	d16, #0x20FFFF  @ encoding: [0x10,0x0d,0xc2,0xef]
 	vmov.i32	d16, #0x20FFFF
-@ CHECK: vmov.i64	d16, #0xFF0000FF0000FFFF @ encoding: [0x33,0x0e,0xc1,0xff]
 	vmov.i64	d16, #0xFF0000FF0000FFFF
-@ CHECK: vmov.i8	q8, #0x8                @ encoding: [0x58,0x0e,0xc0,0xef]
+
+@ CHECK: vmov.i8	d16, #0x8       @ encoding: [0xc0,0xef,0x18,0x0e]
+@ CHECK: vmov.i16	d16, #0x10      @ encoding: [0xc1,0xef,0x10,0x08]
+@ CHECK: vmov.i16	d16, #0x1000    @ encoding: [0xc1,0xef,0x10,0x0a]
+@ CHECK: vmov.i32	d16, #0x20      @ encoding: [0xc2,0xef,0x10,0x00]
+@ CHECK: vmov.i32	d16, #0x2000    @ encoding: [0xc2,0xef,0x10,0x02]
+@ CHECK: vmov.i32	d16, #0x200000  @ encoding: [0xc2,0xef,0x10,0x04]
+@ CHECK: vmov.i32	d16, #0x20000000 @ encoding: [0xc2,0xef,0x10,0x06]
+@ CHECK: vmov.i32	d16, #0x20ff    @ encoding: [0xc2,0xef,0x10,0x0c]
+@ CHECK: vmov.i32	d16, #0x20ffff  @ encoding: [0xc2,0xef,0x10,0x0d]
+@ CHECK: vmov.i64 d16, #0xff0000ff0000ffff @ encoding: [0xc1,0xff,0x33,0x0e]
+
+
 	vmov.i8	q8, #0x8
-@ CHECK: vmov.i16	q8, #0x10       @ encoding: [0x50,0x08,0xc1,0xef]
 	vmov.i16	q8, #0x10
-@ CHECK: vmov.i16	q8, #0x1000     @ encoding: [0x50,0x0a,0xc1,0xef]
 	vmov.i16	q8, #0x1000
-@ CHECK: vmov.i32	q8, #0x20       @ encoding: [0x50,0x00,0xc2,0xef]
 	vmov.i32	q8, #0x20
-@ CHECK: vmov.i32	q8, #0x2000     @ encoding: [0x50,0x02,0xc2,0xef]
 	vmov.i32	q8, #0x2000
-@ CHECK: vmov.i32	q8, #0x200000   @ encoding: [0x50,0x04,0xc2,0xef]
 	vmov.i32	q8, #0x200000
-@ CHECK: vmov.i32	q8, #0x20000000 @ encoding: [0x50,0x06,0xc2,0xef]
 	vmov.i32	q8, #0x20000000
-@ CHECK: vmov.i32	q8, #0x20FF     @ encoding: [0x50,0x0c,0xc2,0xef]
 	vmov.i32	q8, #0x20FF
-@ CHECK: vmov.i32	q8, #0x20FFFF   @ encoding: [0x50,0x0d,0xc2,0xef]
 	vmov.i32	q8, #0x20FFFF
-@ CHECK: vmov.i64	q8, #0xFF0000FF0000FFFF @ encoding: [0x73,0x0e,0xc1,0xff]
 	vmov.i64	q8, #0xFF0000FF0000FFFF
-@ CHECK: vmvn.i16	d16, #0x10      @ encoding: [0x30,0x08,0xc1,0xef]
+
+@ CHECK: vmov.i8	q8, #0x8        @ encoding: [0xc0,0xef,0x58,0x0e]
+@ CHECK: vmov.i16	q8, #0x10       @ encoding: [0xc1,0xef,0x50,0x08]
+@ CHECK: vmov.i16	q8, #0x1000     @ encoding: [0xc1,0xef,0x50,0x0a]
+@ CHECK: vmov.i32	q8, #0x20       @ encoding: [0xc2,0xef,0x50,0x00]
+@ CHECK: vmov.i32	q8, #0x2000     @ encoding: [0xc2,0xef,0x50,0x02]
+@ CHECK: vmov.i32	q8, #0x200000   @ encoding: [0xc2,0xef,0x50,0x04]
+@ CHECK: vmov.i32	q8, #0x20000000 @ encoding: [0xc2,0xef,0x50,0x06]
+@ CHECK: vmov.i32	q8, #0x20ff     @ encoding: [0xc2,0xef,0x50,0x0c]
+@ CHECK: vmov.i32	q8, #0x20ffff   @ encoding: [0xc2,0xef,0x50,0x0d]
+@ CHECK: vmov.i64 q8, #0xff0000ff0000ffff @ encoding: [0xc1,0xff,0x73,0x0e]
+
+
 	vmvn.i16	d16, #0x10
-@ CHECK: vmvn.i16	d16, #0x1000    @ encoding: [0x30,0x0a,0xc1,0xef]
 	vmvn.i16	d16, #0x1000
-@ CHECK: vmvn.i32	d16, #0x20      @ encoding: [0x30,0x00,0xc2,0xef]
 	vmvn.i32	d16, #0x20
-@ CHECK: vmvn.i32	d16, #0x2000    @ encoding: [0x30,0x02,0xc2,0xef]
 	vmvn.i32	d16, #0x2000
-@ CHECK: vmvn.i32	d16, #0x200000  @ encoding: [0x30,0x04,0xc2,0xef]
 	vmvn.i32	d16, #0x200000
-@ CHECK: vmvn.i32	d16, #0x20000000 @ encoding: [0x30,0x06,0xc2,0xef]
 	vmvn.i32	d16, #0x20000000
-@ CHECK: vmvn.i32	d16, #0x20FF    @ encoding: [0x30,0x0c,0xc2,0xef]
 	vmvn.i32	d16, #0x20FF
-@ CHECK: vmvn.i32	d16, #0x20FFFF  @ encoding: [0x30,0x0d,0xc2,0xef]
 	vmvn.i32	d16, #0x20FFFF
-@ CHECK: vmovl.s8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xef]
+
+@ CHECK: vmvn.i16	d16, #0x10      @ encoding: [0xc1,0xef,0x30,0x08]
+@ CHECK: vmvn.i16	d16, #0x1000    @ encoding: [0xc1,0xef,0x30,0x0a]
+@ CHECK: vmvn.i32	d16, #0x20      @ encoding: [0xc2,0xef,0x30,0x00]
+@ CHECK: vmvn.i32	d16, #0x2000    @ encoding: [0xc2,0xef,0x30,0x02]
+@ CHECK: vmvn.i32	d16, #0x200000  @ encoding: [0xc2,0xef,0x30,0x04]
+@ CHECK: vmvn.i32	d16, #0x20000000 @ encoding: [0xc2,0xef,0x30,0x06]
+@ CHECK: vmvn.i32	d16, #0x20ff    @ encoding: [0xc2,0xef,0x30,0x0c]
+@ CHECK: vmvn.i32	d16, #0x20ffff  @ encoding: [0xc2,0xef,0x30,0x0d]
+
+
 	vmovl.s8	q8, d16
-@ CHECK: vmovl.s16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xef]
 	vmovl.s16	q8, d16
-@ CHECK: vmovl.s32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xef]
 	vmovl.s32	q8, d16
-@ CHECK: vmovl.u8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xff]
 	vmovl.u8	q8, d16
-@ CHECK: vmovl.u16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xff]
 	vmovl.u16	q8, d16
-@ CHECK: vmovl.u32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xff]
 	vmovl.u32	q8, d16
-@ CHECK: vmovn.i16	d16, q8         @ encoding: [0x20,0x02,0xf2,0xff]
 	vmovn.i16	d16, q8
-@ CHECK: vmovn.i32	d16, q8         @ encoding: [0x20,0x02,0xf6,0xff]
 	vmovn.i32	d16, q8
-@ CHECK: vmovn.i64	d16, q8         @ encoding: [0x20,0x02,0xfa,0xff]
 	vmovn.i64	d16, q8
-@ CHECK: vqmovn.s16	d16, q8         @ encoding: [0xa0,0x02,0xf2,0xff]
 	vqmovn.s16	d16, q8
-@ CHECK: vqmovn.s32	d16, q8         @ encoding: [0xa0,0x02,0xf6,0xff]
 	vqmovn.s32	d16, q8
-@ CHECK: vqmovn.s64	d16, q8         @ encoding: [0xa0,0x02,0xfa,0xff]
 	vqmovn.s64	d16, q8
-@ CHECK: vqmovn.u16	d16, q8         @ encoding: [0xe0,0x02,0xf2,0xff]
 	vqmovn.u16	d16, q8
-@ CHECK: vqmovn.u32	d16, q8         @ encoding: [0xe0,0x02,0xf6,0xff]
 	vqmovn.u32	d16, q8
-@ CHECK: vqmovn.u64	d16, q8         @ encoding: [0xe0,0x02,0xfa,0xff]
 	vqmovn.u64	d16, q8
-@ CHECK: vqmovun.s16	d16, q8         @ encoding: [0x60,0x02,0xf2,0xff]
 	vqmovun.s16	d16, q8
-@ CHECK: vqmovun.s32	d16, q8         @ encoding: [0x60,0x02,0xf6,0xff]
 	vqmovun.s32	d16, q8
-@ CHECK: vqmovun.s64	d16, q8         @ encoding: [0x60,0x02,0xfa,0xff]
 	vqmovun.s64	d16, q8
-@ CHECK: vmov.s8	r0, d16[1]              @ encoding: [0xb0,0x0b,0x50,0xee]
+
+@ CHECK: vmovl.s8	q8, d16         @ encoding: [0xc8,0xef,0x30,0x0a]
+@ CHECK: vmovl.s16	q8, d16         @ encoding: [0xd0,0xef,0x30,0x0a]
+@ CHECK: vmovl.s32	q8, d16         @ encoding: [0xe0,0xef,0x30,0x0a]
+@ CHECK: vmovl.u8	q8, d16         @ encoding: [0xc8,0xff,0x30,0x0a]
+@ CHECK: vmovl.u16	q8, d16         @ encoding: [0xd0,0xff,0x30,0x0a]
+@ CHECK: vmovl.u32	q8, d16         @ encoding: [0xe0,0xff,0x30,0x0a]
+@ CHECK: vmovn.i16	d16, q8         @ encoding: [0xf2,0xff,0x20,0x02]
+@ CHECK: vmovn.i32	d16, q8         @ encoding: [0xf6,0xff,0x20,0x02]
+@ CHECK: vmovn.i64	d16, q8         @ encoding: [0xfa,0xff,0x20,0x02]
+@ CHECK: vqmovn.s16	d16, q8         @ encoding: [0xf2,0xff,0xa0,0x02]
+@ CHECK: vqmovn.s32	d16, q8         @ encoding: [0xf6,0xff,0xa0,0x02]
+@ CHECK: vqmovn.s64	d16, q8         @ encoding: [0xfa,0xff,0xa0,0x02]
+@ CHECK: vqmovn.u16	d16, q8         @ encoding: [0xf2,0xff,0xe0,0x02]
+@ CHECK: vqmovn.u32	d16, q8         @ encoding: [0xf6,0xff,0xe0,0x02]
+@ CHECK: vqmovn.u64	d16, q8         @ encoding: [0xfa,0xff,0xe0,0x02]
+@ CHECK: vqmovun.s16	d16, q8         @ encoding: [0xf2,0xff,0x60,0x02]
+@ CHECK: vqmovun.s32	d16, q8         @ encoding: [0xf6,0xff,0x60,0x02]
+@ CHECK: vqmovun.s64	d16, q8         @ encoding: [0xfa,0xff,0x60,0x02]
+
+
 	vmov.s8	r0, d16[1]
-@ CHECK: vmov.s16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x10,0xee]
 	vmov.s16	r0, d16[1]
-@ CHECK: vmov.u8	r0, d16[1]              @ encoding: [0xb0,0x0b,0xd0,0xee]
 	vmov.u8	r0, d16[1]
-@ CHECK: vmov.u16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x90,0xee]
 	vmov.u16	r0, d16[1]
-@ CHECK: vmov.32	r0, d16[1]              @ encoding: [0x90,0x0b,0x30,0xee]
 	vmov.32	r0, d16[1]
-@ CHECK: vmov.8	d16[1], r1              @ encoding: [0xb0,0x1b,0x40,0xee]
 	vmov.8	d16[1], r1
-@ CHECK: vmov.16	d16[1], r1              @ encoding: [0xf0,0x1b,0x00,0xee]
 	vmov.16	d16[1], r1
-@ CHECK: vmov.32	d16[1], r1              @ encoding: [0x90,0x1b,0x20,0xee]
 	vmov.32	d16[1], r1
-@ CHECK: vmov.8	d18[1], r1              @ encoding: [0xb0,0x1b,0x42,0xee]
 	vmov.8	d18[1], r1
-@ CHECK: vmov.16	d18[1], r1              @ encoding: [0xf0,0x1b,0x02,0xee]
 	vmov.16	d18[1], r1
-@ CHECK: vmov.32	d18[1], r1              @ encoding: [0x90,0x1b,0x22,0xee]
 	vmov.32	d18[1], r1
+
+@ CHECK: vmov.s8	r0, d16[1]      @ encoding: [0x50,0xee,0xb0,0x0b]
+@ CHECK: vmov.s16	r0, d16[1]      @ encoding: [0x10,0xee,0xf0,0x0b]
+@ CHECK: vmov.u8	r0, d16[1]      @ encoding: [0xd0,0xee,0xb0,0x0b]
+@ CHECK: vmov.u16	r0, d16[1]      @ encoding: [0x90,0xee,0xf0,0x0b]
+@ CHECK: vmov.32	r0, d16[1]      @ encoding: [0x30,0xee,0x90,0x0b]
+@ CHECK: vmov.8	d16[1], r1              @ encoding: [0x40,0xee,0xb0,0x1b]
+@ CHECK: vmov.16	d16[1], r1      @ encoding: [0x00,0xee,0xf0,0x1b]
+@ CHECK: vmov.32	d16[1], r1      @ encoding: [0x20,0xee,0x90,0x1b]
+@ CHECK: vmov.8	d18[1], r1              @ encoding: [0x42,0xee,0xb0,0x1b]
+@ CHECK: vmov.16	d18[1], r1      @ encoding: [0x02,0xee,0xf0,0x1b]
+@ CHECK: vmov.32	d18[1], r1      @ encoding: [0x22,0xee,0x90,0x1b]
diff --git a/test/MC/ARM/neont2-mul-accum-encoding.s b/test/MC/ARM/neont2-mul-accum-encoding.s
index be4bf79bca58..bc6a4d4cd1f8 100644
--- a/test/MC/ARM/neont2-mul-accum-encoding.s
+++ b/test/MC/ARM/neont2-mul-accum-encoding.s
@@ -10,6 +10,7 @@
 	vmla.i16	q9, q8, q10
 	vmla.i32	q9, q8, q10
 	vmla.f32	q9, q8, q10
+	vmla.i32	q12, q8, d3[0]
 
 @ CHECK: vmla.i8	d16, d18, d17   @ encoding: [0x42,0xef,0xa1,0x09]
 @ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0x52,0xef,0xa1,0x09]
@@ -19,6 +20,7 @@
 @ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0x50,0xef,0xe4,0x29]
 @ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0x60,0xef,0xe4,0x29]
 @ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0x40,0xef,0xf4,0x2d]
+@ CHECK: vmla.i32	q12, q8, d3[0]    @ encoding: [0xe0,0xff,0xc3,0x80]
 
 
 	vmlal.s8	q8, d19, d18
@@ -27,6 +29,7 @@
 	vmlal.u8	q8, d19, d18
 	vmlal.u16	q8, d19, d18
 	vmlal.u32	q8, d19, d18
+	vmlal.s32	q0, d5, d10[0]
 
 @ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xc3,0xef,0xa2,0x08]
 @ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xd3,0xef,0xa2,0x08]
@@ -34,13 +37,22 @@
 @ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xc3,0xff,0xa2,0x08]
 @ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xd3,0xff,0xa2,0x08]
 @ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xe3,0xff,0xa2,0x08]
+@ CHECK: vmlal.s32	q0, d5, d10[0]    @ encoding: [0xa5,0xef,0x4a,0x02]
 
 
 	vqdmlal.s16	q8, d19, d18
 	vqdmlal.s32	q8, d19, d18
+        vqdmlal.s16 q11, d11, d7[0]
+        vqdmlal.s16 q11, d11, d7[1]
+        vqdmlal.s16 q11, d11, d7[2]
+        vqdmlal.s16 q11, d11, d7[3]
 
 @ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xd3,0xef,0xa2,0x09]
 @ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xe3,0xef,0xa2,0x09]
+@ CHECK: vqdmlal.s16	q11, d11, d7[0] @ encoding: [0xdb,0xef,0x47,0x63]
+@ CHECK: vqdmlal.s16	q11, d11, d7[1] @ encoding: [0xdb,0xef,0x4f,0x63]
+@ CHECK: vqdmlal.s16	q11, d11, d7[2] @ encoding: [0xdb,0xef,0x67,0x63]
+@ CHECK: vqdmlal.s16	q11, d11, d7[3] @ encoding: [0xdb,0xef,0x6f,0x63]
 
 
 	vmls.i8	d16, d18, d17
@@ -51,6 +63,7 @@
 	vmls.i16	q9, q8, q10
 	vmls.i32	q9, q8, q10
 	vmls.f32	q9, q8, q10
+	vmls.i16	q4, q12, d6[2]
 
 @ CHECK: vmls.i8	d16, d18, d17   @ encoding: [0x42,0xff,0xa1,0x09]
 @ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0x52,0xff,0xa1,0x09]
@@ -60,6 +73,7 @@
 @ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0x50,0xff,0xe4,0x29]
 @ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0x60,0xff,0xe4,0x29]
 @ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0x60,0xef,0xf4,0x2d]
+@ CHECK: vmls.i16	q4, q12, d6[2]  @ encoding: [0x98,0xff,0xe6,0x84]
 
 
 	vmlsl.s8	q8, d19, d18
@@ -68,6 +82,7 @@
 	vmlsl.u8	q8, d19, d18
 	vmlsl.u16	q8, d19, d18
 	vmlsl.u32	q8, d19, d18
+	vmlsl.u16	q11, d25, d1[3]
 
 @ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xc3,0xef,0xa2,0x0a]
 @ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xd3,0xef,0xa2,0x0a]
@@ -75,6 +90,7 @@
 @ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xc3,0xff,0xa2,0x0a]
 @ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xd3,0xff,0xa2,0x0a]
 @ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xe3,0xff,0xa2,0x0a]
+@ CHECK: vmlsl.u16	q11, d25, d1[3]    @ encoding: [0xd9,0xff,0xe9,0x66]
 
 
 	vqdmlsl.s16	q8, d19, d18
diff --git a/test/MC/ARM/neont2-mul-encoding.s b/test/MC/ARM/neont2-mul-encoding.s
index 93ecabb50bb3..dfbb66712fa5 100644
--- a/test/MC/ARM/neont2-mul-encoding.s
+++ b/test/MC/ARM/neont2-mul-encoding.s
@@ -2,57 +2,77 @@
 
 .code 16
 
-@ CHECK: vmul.i8	d16, d16, d17           @ encoding: [0x40,0xef,0xb1,0x09]
 	vmul.i8	d16, d16, d17
-@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x09]
 	vmul.i16	d16, d16, d17
-@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x09]
 	vmul.i32	d16, d16, d17
-@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0x40,0xff,0xb1,0x0d]
 	vmul.f32	d16, d16, d17
-@ CHECK: vmul.i8	q8, q8, q9              @ encoding: [0x40,0xef,0xf2,0x09]
 	vmul.i8	q8, q8, q9
-@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x09]
 	vmul.i16	q8, q8, q9
-@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x09]
 	vmul.i32	q8, q8, q9
-@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0x40,0xff,0xf2,0x0d]
 	vmul.f32	q8, q8, q9
-@ CHECK: vmul.p8	d16, d16, d17           @ encoding: [0x40,0xff,0xb1,0x09]
 	vmul.p8	d16, d16, d17
-@ CHECK: vmul.p8	q8, q8, q9              @ encoding: [0x40,0xff,0xf2,0x09]
 	vmul.p8	q8, q8, q9
-@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x0b]
+	vmul.i16	d18, d8, d0[3]
+
+@ CHECK: vmul.i8	d16, d16, d17   @ encoding: [0x40,0xef,0xb1,0x09]
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x09]
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x09]
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0x40,0xff,0xb1,0x0d]
+@ CHECK: vmul.i8	q8, q8, q9      @ encoding: [0x40,0xef,0xf2,0x09]
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x09]
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x09]
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0x40,0xff,0xf2,0x0d]
+@ CHECK: vmul.p8	d16, d16, d17   @ encoding: [0x40,0xff,0xb1,0x09]
+@ CHECK: vmul.p8	q8, q8, q9      @ encoding: [0x40,0xff,0xf2,0x09]
+@ CHECK: vmul.i16	d18, d8, d0[3]  @ encoding: [0xd8,0xef,0x68,0x28]
+
+
 	vqdmulh.s16	d16, d16, d17
-@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0b]
 	vqdmulh.s32	d16, d16, d17
-@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x0b]
 	vqdmulh.s16	q8, q8, q9
-@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0b]
 	vqdmulh.s32	q8, q8, q9
-@ CHECK: vqrdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x0b]
+	vqdmulh.s16	d11, d2, d3[0]
+
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x0b]
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0b]
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x0b]
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0b]
+@ CHECK: vqdmulh.s16	d11, d2, d3[0]  @ encoding: [0x92,0xef,0x43,0xbc]
+
+
 	vqrdmulh.s16	d16, d16, d17
-@ CHECK: vqrdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x0b]
 	vqrdmulh.s32	d16, d16, d17
-@ CHECK: vqrdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x0b]
 	vqrdmulh.s16	q8, q8, q9
-@ CHECK: vqrdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x0b]
 	vqrdmulh.s32	q8, q8, q9
-@ CHECK: vmull.s8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0c]
+
+@ CHECK: vqrdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x0b]
+@ CHECK: vqrdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x0b]
+@ CHECK: vqrdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x0b]
+@ CHECK: vqrdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x0b]
+
+
 	vmull.s8	q8, d16, d17
-@ CHECK: vmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0c]
 	vmull.s16	q8, d16, d17
-@ CHECK: vmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0c]
 	vmull.s32	q8, d16, d17
-@ CHECK: vmull.u8	q8, d16, d17    @ encoding: [0xc0,0xff,0xa1,0x0c]
 	vmull.u8	q8, d16, d17
-@ CHECK: vmull.u16	q8, d16, d17    @ encoding: [0xd0,0xff,0xa1,0x0c]
 	vmull.u16	q8, d16, d17
-@ CHECK: vmull.u32	q8, d16, d17    @ encoding: [0xe0,0xff,0xa1,0x0c]
 	vmull.u32	q8, d16, d17
-@ CHECK: vmull.p8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0e]
 	vmull.p8	q8, d16, d17
-@ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0d]
+
+@ CHECK: vmull.s8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0c]
+@ CHECK: vmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0c]
+@ CHECK: vmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0c]
+@ CHECK: vmull.u8	q8, d16, d17    @ encoding: [0xc0,0xff,0xa1,0x0c]
+@ CHECK: vmull.u16	q8, d16, d17    @ encoding: [0xd0,0xff,0xa1,0x0c]
+@ CHECK: vmull.u32	q8, d16, d17    @ encoding: [0xe0,0xff,0xa1,0x0c]
+@ CHECK: vmull.p8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0e]
+
+
 	vqdmull.s16	q8, d16, d17
-@ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0d]
 	vqdmull.s32	q8, d16, d17
+        vqdmull.s16	q1, d7, d1[1]
+
+@ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0d]
+@ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0d]
+@ CHECK: vqdmull.s16	q1, d7, d1[1]   @ encoding: [0x97,0xef,0x49,0x2b]
+
diff --git a/test/MC/ARM/neont2-table-encoding.s b/test/MC/ARM/neont2-table-encoding.s
index 46fb9345fbb6..9bfcc74bb6c0 100644
--- a/test/MC/ARM/neont2-table-encoding.s
+++ b/test/MC/ARM/neont2-table-encoding.s
@@ -1,21 +1,24 @@
 @ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
-@ XFAIL: *
 
 .code 16
 
-@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xff]
 	vtbl.8	d16, {d17}, d16
-@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xff]
 	vtbl.8	d16, {d16, d17}, d18
-@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xff]
 	vtbl.8	d16, {d16, d17, d18}, d20
-@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xff]
 	vtbl.8	d16, {d16, d17, d18, d19}, d20
-@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xff]
+
+@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xf1,0xff,0xa0,0x08]
+@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xf0,0xff,0xa2,0x09]
+@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xf0,0xff,0xa4,0x0a]
+@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xf0,0xff,0xa4,0x0b]
+
+
 	vtbx.8	d18, {d16}, d17
-@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xff]
 	vtbx.8	d19, {d16, d17}, d18
-@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xff]
 	vtbx.8	d20, {d16, d17, d18}, d21
-@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xff]
 	vtbx.8	d20, {d16, d17, d18, d19}, d21
+
+@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xf0,0xff,0xe1,0x28]
+@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xf0,0xff,0xe2,0x39]
+@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xf0,0xff,0xe5,0x4a]
+@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xf0,0xff,0xe5,0x4b]
diff --git a/test/MC/ARM/neont2-vst-encoding.s b/test/MC/ARM/neont2-vst-encoding.s
index 1722f12a00f6..b50d8b63c1c2 100644
--- a/test/MC/ARM/neont2-vst-encoding.s
+++ b/test/MC/ARM/neont2-vst-encoding.s
@@ -101,3 +101,7 @@
   vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
 @ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf9]
   vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
+
+@ rdar://11082188
+        vst2.8 {d8, d10}, [r4]
+@ CHECK: vst2.8	{d8, d10}, [r4]         @ encoding: [0x04,0xf9,0x0f,0x89]
diff --git a/test/MC/ARM/nop-armv6t2-padding.s b/test/MC/ARM/nop-armv6t2-padding.s
deleted file mode 100644
index 0e257186caa3..000000000000
--- a/test/MC/ARM/nop-armv6t2-padding.s
+++ /dev/null
@@ -1,10 +0,0 @@
-@ RUN: llvm-mc -triple armv6t2-apple-darwin %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
-@ RUN: FileCheck %s < %t.dump
-
-x:
-      add r0, r1, r2
-      .align 4
-      add r0, r1, r2
-
-@ CHECK: ('_section_data', '020081e0 007820e3 007820e3 007820e3 020081e0')
diff --git a/test/MC/ARM/pr11877.s b/test/MC/ARM/pr11877.s
new file mode 100644
index 000000000000..da3f6ad1d8ee
--- /dev/null
+++ b/test/MC/ARM/pr11877.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple arm-unknown-unknown %s
+
+i:
+        .long    g
+g = h
+h = i
diff --git a/test/MC/ARM/prefetch.ll b/test/MC/ARM/prefetch.ll
deleted file mode 100644
index e77fdb121249..000000000000
--- a/test/MC/ARM/prefetch.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   -mattr=+v7,+mp -show-mc-encoding | FileCheck %s -check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+v7     -show-mc-encoding | FileCheck %s -check-prefix=T2
-; rdar://8924681
-
-define void @t1(i8* %ptr) nounwind  {
-entry:
-; ARM: t1:
-; ARM: pldw [r0]                        @ encoding: [0x00,0xf0,0x90,0xf5]
-; ARM: pld [r0]                         @ encoding: [0x00,0xf0,0xd0,0xf5]
-
-; T2: t1:
-; T2: pld [r0]                      @ encoding: [0x90,0xf8,0x00,0xf0]
-  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
-  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
-  ret void
-}
-
-define void @t2(i8* %ptr) nounwind  {
-entry:
-; ARM: t2:
-; ARM: pld [r0, #1023]                  @ encoding: [0xff,0xf3,0xd0,0xf5]
-
-; T2: t2:
-; T2: pld [r0, #1023]               @ encoding: [0x90,0xf8,0xff,0xf3]
-  %tmp = getelementptr i8* %ptr, i32 1023
-  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
-  ret void
-}
-
-define void @t3(i32 %base, i32 %offset) nounwind  {
-entry:
-; ARM: t3:
-; ARM: pld [r0, r1, lsr #2]             @ encoding: [0x21,0xf1,0xd0,0xf7]
-
-; T2: t3:
-; T2: pld [r0, r1]                  @ encoding: [0x10,0xf8,0x01,0xf0]
-  %tmp1 = lshr i32 %offset, 2
-  %tmp2 = add i32 %base, %tmp1
-  %tmp3 = inttoptr i32 %tmp2 to i8*
-  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
-  ret void
-}
-
-define void @t4(i32 %base, i32 %offset) nounwind  {
-entry:
-; ARM: t4:
-; ARM: pld [r0, r1, lsl #2]             @ encoding: [0x01,0xf1,0xd0,0xf7]
-
-; T2: t4:
-; T2: pld [r0, r1, lsl #2]          @ encoding: [0x10,0xf8,0x21,0xf0]
-  %tmp1 = shl i32 %offset, 2
-  %tmp2 = add i32 %base, %tmp1
-  %tmp3 = inttoptr i32 %tmp2 to i8*
-  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
-  ret void
-}
-
-declare void @llvm.prefetch(i8*, i32, i32) nounwind 
diff --git a/test/MC/ARM/simple-fp-encoding.s b/test/MC/ARM/simple-fp-encoding.s
index e7d452a28495..b592f1ee499d 100644
--- a/test/MC/ARM/simple-fp-encoding.s
+++ b/test/MC/ARM/simple-fp-encoding.s
@@ -21,9 +21,15 @@
 @ CHECK: vmul.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0x61,0xee]
         vmul.f64        d16, d17, d16
 
+@ CHECK: vmul.f64	d20, d20, d17   @ encoding: [0xa1,0x4b,0x64,0xee]
+	vmul.f64  d20, d17
+
 @ CHECK: vmul.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x20,0xee]
         vmul.f32        s0, s1, s0
 
+@ CHECK: vmul.f32	s11, s11, s21   @ encoding: [0xaa,0x5a,0x65,0xee]
+	vmul.f32  s11, s21
+
 @ CHECK: vnmul.f64 d16, d17, d16     @ encoding: [0xe0,0x0b,0x61,0xee]
         vnmul.f64       d16, d17, d16
 
@@ -114,10 +120,21 @@
 @ CHECK: vnmls.f32 s1, s2, s0        @ encoding: [0x00,0x0a,0x51,0xee]
         vnmls.f32       s1, s2, s0
 
-@ CHECK: vmrs apsr_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
-@ CHECK: vmrs apsr_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+        vmrs    APSR_nzcv, fpscr
         vmrs    apsr_nzcv, fpscr
         fmstat
+        vmrs    r2, fpsid
+        vmrs    r3, FPSID
+        vmrs    r4, mvfr0
+        vmrs    r5, MVFR1
+
+@ CHECK: vmrs APSR_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@ CHECK: vmrs APSR_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@ CHECK: vmrs APSR_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@ CHECK: vmrs r2, fpsid              @ encoding: [0x10,0x2a,0xf0,0xee]
+@ CHECK: vmrs r3, fpsid              @ encoding: [0x10,0x3a,0xf0,0xee]
+@ CHECK: vmrs r4, mvfr0              @ encoding: [0x10,0x4a,0xf7,0xee]
+@ CHECK: vmrs r5, mvfr1              @ encoding: [0x10,0x5a,0xf6,0xee]
 
 @ CHECK: vnegne.f64 d16, d16         @ encoding: [0x60,0x0b,0xf1,0x1e]
         vnegne.f64      d16, d16
@@ -127,6 +144,16 @@
         vmovne  s0, r0
         vmoveq  s0, r1
 
+        vmov.f32 r1, s2
+        vmov.f32 s4, r3
+        vmov.f64 r1, r5, d2
+        vmov.f64 d4, r3, r9
+
+@ CHECK: vmov	r1, s2                  @ encoding: [0x10,0x1a,0x11,0xee]
+@ CHECK: vmov	s4, r3                  @ encoding: [0x10,0x3a,0x02,0xee]
+@ CHECK: vmov	r1, r5, d2              @ encoding: [0x12,0x1b,0x55,0xec]
+@ CHECK: vmov	d4, r3, r9              @ encoding: [0x14,0x3b,0x49,0xec]
+
 @ CHECK: vmrs r0, fpscr              @ encoding: [0x10,0x0a,0xf1,0xee]
         vmrs    r0, fpscr
 @ CHECK: vmrs  r0, fpexc             @ encoding: [0x10,0x0a,0xf8,0xee]
@@ -172,52 +199,62 @@
 @ CHECK: vmov r0, r1, d16            @ encoding: [0x30,0x0b,0x51,0xec]
         vmov    r0, r1, d16
 
-@ CHECK: vldr.64 d17, [r0]           @ encoding: [0x00,0x1b,0xd0,0xed]
+@ CHECK: vldr d17, [r0]           @ encoding: [0x00,0x1b,0xd0,0xed]
+@ CHECK: vldr s0, [lr]            @ encoding: [0x00,0x0a,0x9e,0xed]
+@ CHECK: vldr d0, [lr]            @ encoding: [0x00,0x0b,0x9e,0xed]
+
         vldr.64	d17, [r0]
+	vldr.i32 s0, [lr]
+	vldr.d d0, [lr]
 
-@ CHECK: vldr.64 d1, [r2, #32]       @ encoding: [0x08,0x1b,0x92,0xed]
-@ CHECK: vldr.64 d1, [r2, #-32]      @ encoding: [0x08,0x1b,0x12,0xed]
+@ CHECK: vldr d1, [r2, #32]       @ encoding: [0x08,0x1b,0x92,0xed]
+@ CHECK: vldr d1, [r2, #-32]      @ encoding: [0x08,0x1b,0x12,0xed]
         vldr.64	d1, [r2, #32]
-        vldr.64	d1, [r2, #-32]
+        vldr.f64	d1, [r2, #-32]
 
-@ CHECK: vldr.64 d2, [r3]            @ encoding: [0x00,0x2b,0x93,0xed]
+@ CHECK: vldr d2, [r3]            @ encoding: [0x00,0x2b,0x93,0xed]
         vldr.64 d2, [r3]
 
-@ CHECK: vldr.64 d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
-@ CHECK: vldr.64 d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
-@ CHECK: vldr.64 d3, [pc, #-0]            @ encoding: [0x00,0x3b,0x1f,0xed]
+@ CHECK: vldr d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
+@ CHECK: vldr d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
+@ CHECK: vldr d3, [pc, #-0]            @ encoding: [0x00,0x3b,0x1f,0xed]
         vldr.64 d3, [pc]
         vldr.64 d3, [pc,#0]
         vldr.64 d3, [pc,#-0]
 
-@ CHECK: vldr.32 s13, [r0]           @ encoding: [0x00,0x6a,0xd0,0xed]
+@ CHECK: vldr s13, [r0]           @ encoding: [0x00,0x6a,0xd0,0xed]
         vldr.32	s13, [r0]
 
-@ CHECK: vldr.32 s1, [r2, #32]       @ encoding: [0x08,0x0a,0xd2,0xed]
-@ CHECK: vldr.32 s1, [r2, #-32]      @ encoding: [0x08,0x0a,0x52,0xed]
+@ CHECK: vldr s1, [r2, #32]       @ encoding: [0x08,0x0a,0xd2,0xed]
+@ CHECK: vldr s1, [r2, #-32]      @ encoding: [0x08,0x0a,0x52,0xed]
         vldr.32	s1, [r2, #32]
         vldr.32	s1, [r2, #-32]
 
-@ CHECK: vldr.32 s2, [r3]            @ encoding: [0x00,0x1a,0x93,0xed]
+@ CHECK: vldr s2, [r3]            @ encoding: [0x00,0x1a,0x93,0xed]
         vldr.32 s2, [r3]
 
-@ CHECK: vldr.32 s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
-@ CHECK: vldr.32 s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
-@ CHECK: vldr.32 s5, [pc, #-0]            @ encoding: [0x00,0x2a,0x5f,0xed]
+@ CHECK: vldr s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
+@ CHECK: vldr s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
+@ CHECK: vldr s5, [pc, #-0]            @ encoding: [0x00,0x2a,0x5f,0xed]
         vldr.32 s5, [pc]
         vldr.32 s5, [pc,#0]
         vldr.32 s5, [pc,#-0]
 
-@ CHECK: vstr.64 d4, [r1]            @ encoding: [0x00,0x4b,0x81,0xed]
-@ CHECK: vstr.64 d4, [r1, #24]       @ encoding: [0x06,0x4b,0x81,0xed]
-@ CHECK: vstr.64 d4, [r1, #-24]      @ encoding: [0x06,0x4b,0x01,0xed]
+@ CHECK: vstr d4, [r1]            @ encoding: [0x00,0x4b,0x81,0xed]
+@ CHECK: vstr d4, [r1, #24]       @ encoding: [0x06,0x4b,0x81,0xed]
+@ CHECK: vstr d4, [r1, #-24]      @ encoding: [0x06,0x4b,0x01,0xed]
+@ CHECK: vstr s0, [lr]            @ encoding: [0x00,0x0a,0x8e,0xed]
+@ CHECK: vstr d0, [lr]            @ encoding: [0x00,0x0b,0x8e,0xed]
+
         vstr.64 d4, [r1]
         vstr.64 d4, [r1, #24]
         vstr.64 d4, [r1, #-24]
+	vstr s0, [lr]
+	vstr d0, [lr]
 
-@ CHECK: vstr.32 s4, [r1]            @ encoding: [0x00,0x2a,0x81,0xed]
-@ CHECK: vstr.32 s4, [r1, #24]       @ encoding: [0x06,0x2a,0x81,0xed]
-@ CHECK: vstr.32 s4, [r1, #-24]      @ encoding: [0x06,0x2a,0x01,0xed]
+@ CHECK: vstr s4, [r1]            @ encoding: [0x00,0x2a,0x81,0xed]
+@ CHECK: vstr s4, [r1, #24]       @ encoding: [0x06,0x2a,0x81,0xed]
+@ CHECK: vstr s4, [r1, #-24]      @ encoding: [0x06,0x2a,0x01,0xed]
         vstr.32 s4, [r1]
         vstr.32 s4, [r1, #24]
         vstr.32 s4, [r1, #-24]
@@ -229,8 +266,10 @@
 
 @ CHECK: vstmia r1, {d2, d3, d4, d5, d6, d7} @ encoding: [0x0c,0x2b,0x81,0xec]
 @ CHECK: vstmia	r1, {s2, s3, s4, s5, s6, s7} @ encoding: [0x06,0x1a,0x81,0xec]
+@ CHECK: vpush	{d8, d9, d10, d11, d12, d13, d14, d15} @ encoding: [0x10,0x8b,0x2d,0xed]
         vstmia  r1, {d2,d3-d6,d7}
         vstmia  r1, {s2,s3-s6,s7}
+        vstmdb sp!, {q4-q7}
 
 @ CHECK: vcvtr.s32.f64  s0, d0 @ encoding: [0x40,0x0b,0xbd,0xee]
 @ CHECK: vcvtr.s32.f32  s0, s1 @ encoding: [0x60,0x0a,0xbd,0xee]
@@ -243,3 +282,76 @@
 
 @ CHECK: vmovne	s25, s26, r2, r5
         vmovne	s25, s26, r2, r5        @ encoding: [0x39,0x2a,0x45,0x1c]
+
+@ VMOV w/ optional data type suffix.
+	vmov.32 s1, r8
+        vmov.s16 s2, r4
+        vmov.16 s3, r6
+        vmov.u32 s4, r1
+        vmov.p8 s5, r2
+        vmov.8 s6, r3
+
+        vmov.32 r1, s8
+        vmov.s16 r2, s4
+        vmov.16 r3, s6
+        vmov.u32 r4, s1
+        vmov.p8 r5, s2
+        vmov.8 r6, s3
+
+@ CHECK: vmov	s1, r8                  @ encoding: [0x90,0x8a,0x00,0xee]
+@ CHECK: vmov	s2, r4                  @ encoding: [0x10,0x4a,0x01,0xee]
+@ CHECK: vmov	s3, r6                  @ encoding: [0x90,0x6a,0x01,0xee]
+@ CHECK: vmov	s4, r1                  @ encoding: [0x10,0x1a,0x02,0xee]
+@ CHECK: vmov	s5, r2                  @ encoding: [0x90,0x2a,0x02,0xee]
+@ CHECK: vmov	s6, r3                  @ encoding: [0x10,0x3a,0x03,0xee]
+@ CHECK: vmov	r1, s8                  @ encoding: [0x10,0x1a,0x14,0xee]
+@ CHECK: vmov	r2, s4                  @ encoding: [0x10,0x2a,0x12,0xee]
+@ CHECK: vmov	r3, s6                  @ encoding: [0x10,0x3a,0x13,0xee]
+@ CHECK: vmov	r4, s1                  @ encoding: [0x90,0x4a,0x10,0xee]
+@ CHECK: vmov	r5, s2                  @ encoding: [0x10,0x5a,0x11,0xee]
+@ CHECK: vmov	r6, s3                  @ encoding: [0x90,0x6a,0x11,0xee]
+
+
+@ VCVT (between floating-point and fixed-point)
+        vcvt.f32.u32 s0, s0, #20
+        vcvt.f64.s32 d0, d0, #32
+        vcvt.f32.u16 s0, s0, #1
+        vcvt.f64.s16 d0, d0, #16
+        vcvt.f32.s32 s1, s1, #20
+        vcvt.f64.u32 d20, d20, #32
+        vcvt.f32.s16 s17, s17, #1
+        vcvt.f64.u16 d23, d23, #16
+        vcvt.u32.f32 s12, s12, #20 
+        vcvt.s32.f64 d2, d2, #32
+        vcvt.u16.f32 s28, s28, #1
+        vcvt.s16.f64 d15, d15, #16
+        vcvt.s32.f32 s1, s1, #20
+        vcvt.u32.f64 d20, d20, #32
+        vcvt.s16.f32 s17, s17, #1
+        vcvt.u16.f64 d23, d23, #16
+
+@ CHECK: vcvt.f32.u32	s0, s0, #20     @ encoding: [0xc6,0x0a,0xbb,0xee]
+@ CHECK: vcvt.f64.s32	d0, d0, #32     @ encoding: [0xc0,0x0b,0xba,0xee]
+@ CHECK: vcvt.f32.u16	s0, s0, #1      @ encoding: [0x67,0x0a,0xbb,0xee]
+@ CHECK: vcvt.f64.s16	d0, d0, #16     @ encoding: [0x40,0x0b,0xba,0xee]
+@ CHECK: vcvt.f32.s32	s1, s1, #20     @ encoding: [0xc6,0x0a,0xfa,0xee]
+@ CHECK: vcvt.f64.u32	d20, d20, #32   @ encoding: [0xc0,0x4b,0xfb,0xee]
+@ CHECK: vcvt.f32.s16	s17, s17, #1    @ encoding: [0x67,0x8a,0xfa,0xee]
+@ CHECK: vcvt.f64.u16	d23, d23, #16   @ encoding: [0x40,0x7b,0xfb,0xee]
+
+@ CHECK: vcvt.u32.f32	s12, s12, #20   @ encoding: [0xc6,0x6a,0xbf,0xee]
+@ CHECK: vcvt.s32.f64	d2, d2, #32     @ encoding: [0xc0,0x2b,0xbe,0xee]
+@ CHECK: vcvt.u16.f32	s28, s28, #1    @ encoding: [0x67,0xea,0xbf,0xee]
+@ CHECK: vcvt.s16.f64	d15, d15, #16   @ encoding: [0x40,0xfb,0xbe,0xee]
+@ CHECK: vcvt.s32.f32	s1, s1, #20     @ encoding: [0xc6,0x0a,0xfe,0xee]
+@ CHECK: vcvt.u32.f64	d20, d20, #32   @ encoding: [0xc0,0x4b,0xff,0xee]
+@ CHECK: vcvt.s16.f32	s17, s17, #1    @ encoding: [0x67,0x8a,0xfe,0xee]
+@ CHECK: vcvt.u16.f64	d23, d23, #16   @ encoding: [0x40,0x7b,0xff,0xee]
+
+
+@ Use NEON to load some f32 immediates that don't fit the f8 representation.
+        vmov.f32 d4, #0.0
+        vmov.f32 d4, #32.0
+
+@ CHECK: vmov.i32	d4, #0x0        @ encoding: [0x10,0x40,0x80,0xf2]
+@ CHECK: vmov.i32	d4, #0x42000000 @ encoding: [0x12,0x46,0x84,0xf2]
diff --git a/test/MC/ARM/thumb-diagnostics.s b/test/MC/ARM/thumb-diagnostics.s
index d02c27e1ae02..99d7e38c7ed4 100644
--- a/test/MC/ARM/thumb-diagnostics.s
+++ b/test/MC/ARM/thumb-diagnostics.s
@@ -24,13 +24,9 @@
 
 @ Out of range immediates for ASR instruction.
         asrs r2, r3, #33
-        asrs r2, r3, #0
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS:         asrs r2, r3, #33
 @ CHECK-ERRORS:                      ^
-@ CHECK-ERRORS: error: invalid operand for instruction
-@ CHECK-ERRORS:         asrs r2, r3, #0
-@ CHECK-ERRORS:                      ^
 
 @ Out of range immediates for BKPT instruction.
         bkpt #256
@@ -125,10 +121,10 @@ error: invalid operand for instruction
         add sp, #3
         add sp, sp, #512
         add r2, sp, #1024
-@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
 @ CHECK-ERRORS:         add sp, #-1
 @ CHECK-ERRORS:                 ^
-@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
 @ CHECK-ERRORS:         add sp, #3
 @ CHECK-ERRORS:                 ^
 @ CHECK-ERRORS: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/ARM/vfp4.s b/test/MC/ARM/vfp4.s
new file mode 100644
index 000000000000..cc87a38112ba
--- /dev/null
+++ b/test/MC/ARM/vfp4.s
@@ -0,0 +1,50 @@
+@ RUN: llvm-mc < %s -triple armv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4   | FileCheck %s --check-prefix=ARM
+@ RUN: llvm-mc < %s -triple thumbv7-unknown-unknown -show-encoding -mattr=+neon,+vfp4 | FileCheck %s --check-prefix=THUMB
+
+@ ARM: vfma.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xe2,0xee]
+@ THUMB: vfma.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xa1,0x0b]
+vfma.f64 d16, d18, d17
+
+@ ARM: vfma.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0xa2,0xee]
+@ THUMB: vfma.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x00,0x1a]
+vfma.f32 s2, s4, s0
+
+@ ARM: vfma.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x42,0xf2]
+@ THUMB: vfma.f32 d16, d18, d17 @ encoding: [0x42,0xef,0xb1,0x0c]
+vfma.f32 d16, d18, d17
+
+@ ARM: vfma.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x08,0xf2]
+@ THUMB: vfma.f32	q2, q4, q0 @ encoding: [0x08,0xef,0x50,0x4c]
+vfma.f32 q2, q4, q0
+
+@ ARM: vfnma.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xd2,0xee]
+@ THUMB: vfnma.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xe1,0x0b]
+vfnma.f64 d16, d18, d17
+
+@ ARM: vfnma.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0x92,0xee]
+@ THUMB: vfnma.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x40,0x1a]
+vfnma.f32 s2, s4, s0
+
+@ ARM: vfms.f64 d16, d18, d17 @ encoding: [0xe1,0x0b,0xe2,0xee]
+@ THUMB: vfms.f64 d16, d18, d17 @ encoding: [0xe2,0xee,0xe1,0x0b]
+vfms.f64 d16, d18, d17
+
+@ ARM: vfms.f32 s2, s4, s0 @ encoding: [0x40,0x1a,0xa2,0xee]
+@ THUMB: vfms.f32 s2, s4, s0 @ encoding: [0xa2,0xee,0x40,0x1a]
+vfms.f32 s2, s4, s0
+
+@ ARM: vfms.f32 d16, d18, d17 @ encoding: [0xb1,0x0c,0x62,0xf2]
+@ THUMB: vfms.f32 d16, d18, d17 @ encoding: [0x62,0xef,0xb1,0x0c]
+vfms.f32 d16, d18, d17
+
+@ ARM: vfms.f32 q2, q4, q0 @ encoding: [0x50,0x4c,0x28,0xf2]
+@ THUMB: vfms.f32	q2, q4, q0 @ encoding: [0x28,0xef,0x50,0x4c]
+vfms.f32 q2, q4, q0
+
+@ ARM: vfnms.f64 d16, d18, d17 @ encoding: [0xa1,0x0b,0xd2,0xee]
+@ THUMB: vfnms.f64 d16, d18, d17 @ encoding: [0xd2,0xee,0xa1,0x0b]
+vfnms.f64 d16, d18, d17
+
+@ ARM: vfnms.f32 s2, s4, s0 @ encoding: [0x00,0x1a,0x92,0xee]
+@ THUMB: vfnms.f32 s2, s4, s0 @ encoding: [0x92,0xee,0x00,0x1a]
+vfnms.f32 s2, s4, s0
diff --git a/test/MC/ARM/vpush-vpop.s b/test/MC/ARM/vpush-vpop.s
index 1212c83cfc07..4fb4decd11e2 100644
--- a/test/MC/ARM/vpush-vpop.s
+++ b/test/MC/ARM/vpush-vpop.s
@@ -7,6 +7,21 @@ foo:
  vpush {s8, s9, s10, s11, s12}
  vpop  {d8, d9, d10, d11, d12}
  vpop  {s8, s9, s10, s11, s12}
+@ optional size suffix
+ vpush.s8 {d8, d9, d10, d11, d12}
+ vpush.16 {s8, s9, s10, s11, s12}
+ vpop.f32  {d8, d9, d10, d11, d12}
+ vpop.64  {s8, s9, s10, s11, s12}
+
+@ CHECK-THUMB: vpush {d8, d9, d10, d11, d12} @ encoding: [0x2d,0xed,0x0a,0x8b]
+@ CHECK-THUMB: vpush {s8, s9, s10, s11, s12} @ encoding: [0x2d,0xed,0x05,0x4a]
+@ CHECK-THUMB: vpop  {d8, d9, d10, d11, d12} @ encoding: [0xbd,0xec,0x0a,0x8b]
+@ CHECK-THUMB: vpop  {s8, s9, s10, s11, s12} @ encoding: [0xbd,0xec,0x05,0x4a]
+
+@ CHECK-ARM: vpush {d8, d9, d10, d11, d12} @ encoding: [0x0a,0x8b,0x2d,0xed]
+@ CHECK-ARM: vpush {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0x2d,0xed]
+@ CHECK-ARM: vpop  {d8, d9, d10, d11, d12} @ encoding: [0x0a,0x8b,0xbd,0xec]
+@ CHECK-ARM: vpop  {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0xbd,0xec]
 
 @ CHECK-THUMB: vpush {d8, d9, d10, d11, d12} @ encoding: [0x2d,0xed,0x0a,0x8b]
 @ CHECK-THUMB: vpush {s8, s9, s10, s11, s12} @ encoding: [0x2d,0xed,0x05,0x4a]
diff --git a/test/MC/AsmParser/2011-09-06-NoNewline.s b/test/MC/AsmParser/2011-09-06-NoNewline.s
deleted file mode 100644
index 33e1dbb9709f..000000000000
--- a/test/MC/AsmParser/2011-09-06-NoNewline.s
+++ /dev/null
@@ -1,7 +0,0 @@
-// RUN: llvm-mc -triple i386-unknown-unknown %s
-movl %gs:8, %eax
-// RUN: llvm-mc -triple i386-unknown-unknown %s
-movl %gs:8, %eax
-// RUN: llvm-mc -triple i386-unknown-unknown %s
-movl %gs:8, %eax
-        
-\ No newline at end of file
diff --git a/test/MC/AsmParser/cfi-unfinished-frame.s b/test/MC/AsmParser/cfi-unfinished-frame.s
new file mode 100644
index 000000000000..1182d52baedf
--- /dev/null
+++ b/test/MC/AsmParser/cfi-unfinished-frame.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -filetype=asm -triple x86_64-pc-linux-gnu %s -o %t 2>%t.out
+// RUN: FileCheck -input-file=%t.out %s
+
+.cfi_startproc
+// CHECK: Unfinished frame
diff --git a/test/MC/AsmParser/dg.exp b/test/MC/AsmParser/dg.exp
deleted file mode 100644
index a6d81da5b716..000000000000
--- a/test/MC/AsmParser/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
-}
diff --git a/test/MC/AsmParser/directive_file-errors.s b/test/MC/AsmParser/directive_file-errors.s
new file mode 100644
index 000000000000..5ae2bbe80051
--- /dev/null
+++ b/test/MC/AsmParser/directive_file-errors.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -g -triple i386-unknown-unknown %s 2> %t.err | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
+// Test for Bug 11740
+
+        .file "hello"
+        .file 1 "world"
+
+// CHECK: .file "hello"
+// CHECK-ERRORS:6:9: error: input can't have .file dwarf directives when -g is used to generate dwarf debug info for assembly code
diff --git a/test/MC/AsmParser/directive_file.s b/test/MC/AsmParser/directive_file.s
index 3160d5c2bfd7..121890e69a4a 100644
--- a/test/MC/AsmParser/directive_file.s
+++ b/test/MC/AsmParser/directive_file.s
@@ -2,7 +2,8 @@
 
         .file "hello"
         .file 1 "world"
+        .file 2 "directory" "file"
 
 # CHECK: .file "hello"
 # CHECK: .file 1 "world"
-
+# CHECK: .file 2 "directory" "file"
diff --git a/test/MC/AsmParser/directive_incbin.s b/test/MC/AsmParser/directive_incbin.s
new file mode 100644
index 000000000000..55f9c7951ffa
--- /dev/null
+++ b/test/MC/AsmParser/directive_incbin.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s -I %p | FileCheck %s
+
+.data
+.incbin "incbin_abcd"
+
+# CHECK: .ascii	 "abcd\n"
diff --git a/test/MC/AsmParser/incbin_abcd b/test/MC/AsmParser/incbin_abcd
new file mode 100644
index 000000000000..acbe86c7c895
--- /dev/null
+++ b/test/MC/AsmParser/incbin_abcd
@@ -0,0 +1 @@
+abcd
diff --git a/test/MC/AsmParser/lit.local.cfg b/test/MC/AsmParser/lit.local.cfg
new file mode 100644
index 000000000000..6c49f08b7496
--- /dev/null
+++ b/test/MC/AsmParser/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/AsmParser/macro-args.s b/test/MC/AsmParser/macro-args.s
index 808b6eb48803..4b878999e424 100644
--- a/test/MC/AsmParser/macro-args.s
+++ b/test/MC/AsmParser/macro-args.s
@@ -8,3 +8,13 @@
 GET    is_sse, %eax
 
 // CHECK: movl	is_sse@GOTOFF(%ebx), %eax
+
+.macro bar
+    .long $n
+.endm
+
+bar 1, 2, 3
+bar
+
+// CHECK: .long 3
+// CHECK: .long 0
diff --git a/test/MC/AsmParser/pr11865.s b/test/MC/AsmParser/pr11865.s
new file mode 100644
index 000000000000..1c03e117d982
--- /dev/null
+++ b/test/MC/AsmParser/pr11865.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s
+
+i:
+        .long    g
+g = h
+h = i
diff --git a/test/MC/AsmParser/variables-invalid.s b/test/MC/AsmParser/variables-invalid.s
index 9656889c5b1a..c466d422b969 100644
--- a/test/MC/AsmParser/variables-invalid.s
+++ b/test/MC/AsmParser/variables-invalid.s
@@ -2,7 +2,7 @@
 // RUN: FileCheck --input-file %t %s
 
         .data
-// CHECK: invalid assignment to 't0_v0'
+// CHECK: Recursive use of 't0_v0'
         t0_v0 = t0_v0 + 1
 
         t1_v1 = 1
@@ -13,5 +13,16 @@ t2_s0:
         t2_s0 = 2
 
         t3_s0 = t2_s0 + 1
+        .long t3_s0
 // CHECK: invalid reassignment of non-absolute variable 't3_s0'
         t3_s0 = 1
+
+
+// CHECK: Recursive use of 't4_s2'
+        t4_s0 = t4_s1
+        t4_s1 = t4_s2
+        t4_s2 = t4_s0
+
+// CHECK: Recursive use of 't5_s1'
+        t5_s0 = t5_s1 + 1
+        t5_s1 = t5_s0
diff --git a/test/MC/COFF/dg.exp b/test/MC/COFF/dg.exp
deleted file mode 100644
index d46d700975e5..000000000000
--- a/test/MC/COFF/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,s}]]
-}
diff --git a/test/MC/COFF/global_ctors.ll b/test/MC/COFF/global_ctors.ll
new file mode 100644
index 000000000000..4d6b1c7d9913
--- /dev/null
+++ b/test/MC/COFF/global_ctors.ll
@@ -0,0 +1,28 @@
+; Test that global ctors are emitted into the proper COFF section for the
+; target. Mingw uses .ctors, whereas MSVC uses .CRT$XC*.
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32 
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN32 
+; RUN: llc < %s -mtriple i686-pc-mingw32 | FileCheck %s --check-prefix MINGW32 
+; RUN: llc < %s -mtriple x86_64-pc-mingw32 | FileCheck %s --check-prefix MINGW32 
+
+@.str = private unnamed_addr constant [13 x i8] c"constructing\00", align 1
+@.str2 = private unnamed_addr constant [5 x i8] c"main\00", align 1
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @a_global_ctor }]
+
+declare i32 @puts(i8*)
+
+define void @a_global_ctor() nounwind {
+  %1 = call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
+
+define i32 @main() nounwind {
+  %1 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @.str2, i32 0, i32 0))
+  ret i32 0
+}
+
+; WIN32: .section .CRT$XCU,"r"
+; WIN32: a_global_ctor
+; MINGW32: .section .ctors,"w"
+; MINGW32: a_global_ctor
diff --git a/test/MC/COFF/lit.local.cfg b/test/MC/COFF/lit.local.cfg
new file mode 100644
index 000000000000..41a8434f9993
--- /dev/null
+++ b/test/MC/COFF/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.s', '.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/COFF/secrel32.s b/test/MC/COFF/secrel32.s
new file mode 100644
index 000000000000..ce148db9000c
--- /dev/null
+++ b/test/MC/COFF/secrel32.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+
+// check that we produce the correct relocation for .secrel32
+
+Lfoo:
+	.secrel32	Lfoo
+
+// CHECK:       Relocations              = [
+// CHECK-NEXT:    0 = {
+// CHECK-NEXT:       VirtualAddress           = 0x0
+// CHECK-NEXT:       SymbolTableIndex         = 0
+// CHECK-NEXT:       Type                     = IMAGE_REL_I386_SECREL (11)
+// CHECK-NEXT:       SymbolName               = .text
+// CHECK-NEXT:     }
diff --git a/test/MC/COFF/symbol-mangling.ll b/test/MC/COFF/symbol-mangling.ll
new file mode 100644
index 000000000000..f1b4b4becd0e
--- /dev/null
+++ b/test/MC/COFF/symbol-mangling.ll
@@ -0,0 +1,17 @@
+; The purpose of this test is to see if the MC layer properly handles symbol
+; names needing quoting on MS/Windows. This code is generated by clang when
+; using -cxx-abi microsoft.
+
+; RUN: llc -filetype=asm -mtriple i686-pc-win32 %s -o - | FileCheck %s
+
+; CHECK: ?sayhi@A@@QBEXXZ
+
+%struct.A = type {}
+
+define i32 @main() {
+entry:
+  tail call void @"\01?sayhi@A@@QBEXXZ"(%struct.A* null)
+  ret i32 0
+}
+
+declare void @"\01?sayhi@A@@QBEXXZ"(%struct.A*)
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
index 69a094dd681c..ce1446b02b10 100644
--- a/test/MC/Disassembler/ARM/arm-tests.txt
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -201,7 +201,7 @@
 0x20 0x51 0x17 0xe6
 
 # CHECK:	strdeq	r2, r3, [r0], -r8
-0xf8 0x24 0x00 0x00
+0xf8 0x20 0x00 0x00
 
 # CHECK:	ldrdeq	r2, r3, [r0], -r12
 0xdc 0x24 0x00 0x00
@@ -215,7 +215,7 @@
 # CHECK:	vldmdb	r2!, {s7, s8, s9, s10, s11}
 0x05 0x3a 0x72 0xed
 
-# CHECK:	vldr.32	s23, [r2, #660]
+# CHECK:	vldr	s23, [r2, #660]
 0xa5 0xba 0xd2 0xed
 
 # CHECK:	strtvc	r5, [r3], r0, lsr #20
@@ -317,3 +317,7 @@
 
 # CHECK: stc2l	p0, c0, [r2], #-96
 0x18 0x0 0x62 0xfc
+
+# CHECK: ldmgt	sp!, {r9}
+0x00 0x02 0xbd 0xc8
+
diff --git a/test/MC/Disassembler/ARM/dg.exp b/test/MC/Disassembler/ARM/dg.exp
deleted file mode 100644
index fc2f17a6fbaa..000000000000
--- a/test/MC/Disassembler/ARM/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target ARM] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
-}
-
diff --git a/test/MC/Disassembler/ARM/fp-encoding.txt b/test/MC/Disassembler/ARM/fp-encoding.txt
index f3e026138a0a..9095b84ce18b 100644
--- a/test/MC/Disassembler/ARM/fp-encoding.txt
+++ b/test/MC/Disassembler/ARM/fp-encoding.txt
@@ -152,46 +152,46 @@
 # CHECK: vmov    r0, r1, d16
 
 0x00 0x1b 0xd0 0xed
-# CHECK: vldr.64	d17, [r0]
+# CHECK: vldr	d17, [r0]
 
 0x08 0x1b 0x92 0xed
 0x08 0x1b 0x12 0xed
-# CHECK: vldr.64	d1, [r2, #32]
-# CHECK: vldr.64	d1, [r2, #-32]
+# CHECK: vldr	d1, [r2, #32]
+# CHECK: vldr	d1, [r2, #-32]
 
 0x00 0x2b 0x93 0xed
-# CHECK: vldr.64 d2, [r3]
+# CHECK: vldr d2, [r3]
 
 0x00 0x3b 0x9f 0xed
-# CHECK: vldr.64 d3, [pc]
+# CHECK: vldr d3, [pc]
 
 0x00 0x6a 0xd0 0xed
-# CHECK: vldr.32	s13, [r0]
+# CHECK: vldr	s13, [r0]
 
 0x08 0x0a 0xd2 0xed
 0x08 0x0a 0x52 0xed
-# CHECK: vldr.32	s1, [r2, #32]
-# CHECK: vldr.32	s1, [r2, #-32]
+# CHECK: vldr	s1, [r2, #32]
+# CHECK: vldr	s1, [r2, #-32]
 
 0x00 0x1a 0x93 0xed
-# CHECK: vldr.32 s2, [r3]
+# CHECK: vldr s2, [r3]
 
 0x00 0x2a 0xdf 0xed
-# CHECK: vldr.32 s5, [pc]
+# CHECK: vldr s5, [pc]
 
 0x00 0x4b 0x81 0xed
 0x06 0x4b 0x81 0xed
 0x06 0x4b 0x01 0xed
-# CHECK: vstr.64 d4, [r1]
-# CHECK: vstr.64 d4, [r1, #24]
-# CHECK: vstr.64 d4, [r1, #-24]
+# CHECK: vstr d4, [r1]
+# CHECK: vstr d4, [r1, #24]
+# CHECK: vstr d4, [r1, #-24]
 
 0x00 0x2a 0x81 0xed
 0x06 0x2a 0x81 0xed
 0x06 0x2a 0x01 0xed
-# CHECK: vstr.32 s4, [r1]
-# CHECK: vstr.32 s4, [r1, #24]
-# CHECK: vstr.32 s4, [r1, #-24]
+# CHECK: vstr s4, [r1]
+# CHECK: vstr s4, [r1, #24]
+# CHECK: vstr s4, [r1, #-24]
 
 0x0c 0x2b 0x91 0xec
 0x06 0x1a 0x91 0xec
diff --git a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
index 6fdb55e691d4..8146b5cb4461 100644
--- a/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
+++ b/test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt
@@ -1,4 +1,4 @@
 # RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {potentially undefined instruction encoding}
 
 # invalid (imod, M, iflags) combination
-0x93 0x1c 0x02 0xf1
+0x93 0x00 0x02 0xf1
diff --git a/test/MC/Disassembler/ARM/invalid-IT-CC15.txt b/test/MC/Disassembler/ARM/invalid-IT-CC15.txt
new file mode 100644
index 000000000000..17e25ead42f1
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-IT-CC15.txt
@@ -0,0 +1,18 @@
+# RUN: llvm-mc --disassemble %s -triple=thumbv7-unknown-unknown |& grep und
+# rdar://10841671
+
+0xe3 0xbf
+0xdf 0xed 0x61 0x3b
+0x71 0xee 0xe0 0x1b
+0x72 0xee 0xa3 0x2b
+0xdf 0xed 0x60 0x0b
+
+# This is test is dealing with a undefined condition code value of 15 in the
+# above sequence of junk bytes and not allowing the disassembler to abort on
+# printing the final instruction in this list.
+# 
+#	ittte	al
+#	vldr	d19, [pc, #388]
+#	vsub.f64	d17, d17, d16
+#	vadd.f64	d18, d18, d19
+#	vldr<und>	d16, [pc, #384]
diff --git a/test/MC/Disassembler/ARM/invalid-LDRD-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRD-arm.txt
deleted file mode 100644
index f8f23ed02b88..000000000000
--- a/test/MC/Disassembler/ARM/invalid-LDRD-arm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
-# -------------------------------------------------------------------------------------------------
-# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| X: X: X: 1| X: X: X: X| 1: 1: X: 1| X: X: X: X|
-# -------------------------------------------------------------------------------------------------
-# 
-# A8.6.68 LDRD (register)
-# if Rt{0} = 1 then UNDEFINED;
-0xd0 0x10 0x00 0x00
diff --git a/test/MC/Disassembler/ARM/invalid-LDRT-arm.txt b/test/MC/Disassembler/ARM/invalid-LDRT-arm.txt
deleted file mode 100644
index 067dcb36a7ef..000000000000
--- a/test/MC/Disassembler/ARM/invalid-LDRT-arm.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=0 Name=PHI Format=(42)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 0| 0: 0: 1: 1| 0: 1: 1: 1| 0: 1: 0: 1| 0: 0: 0: 1| 0: 0: 0: 1| 0: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# The bytes have Inst{4} = 1, so it's not an LDRT Encoding A2 instruction.
-0x10 0x51 0x37 0xe6
-
-
diff --git a/test/MC/Disassembler/ARM/invalid-LSL-regform.txt b/test/MC/Disassembler/ARM/invalid-LSL-regform.txt
deleted file mode 100644
index 6a1f11faf232..000000000000
--- a/test/MC/Disassembler/ARM/invalid-LSL-regform.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 0: 0: 1| 1: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 0: 0: 1| 0: 0: 0: 1| 0: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.89 LSL (register)
-# if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
-0x12 0xf1 0xa0 0xe1
diff --git a/test/MC/Disassembler/ARM/invalid-RSC-arm.txt b/test/MC/Disassembler/ARM/invalid-RSC-arm.txt
deleted file mode 100644
index 096b909bc6d1..000000000000
--- a/test/MC/Disassembler/ARM/invalid-RSC-arm.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=261 Name=RSCrs Format=ARM_FORMAT_DPSOREGFRM(5)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 0: 0: 1: 1| 0: 0: 0: 0| 1: 1: 1: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 0: 0: 0| 0: 1: 0: 1| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-# if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
-0x5f 0xf8 0xe4 0x30
diff --git a/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt b/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt
deleted file mode 100644
index b236f8ef4d25..000000000000
--- a/test/MC/Disassembler/ARM/invalid-SSAT-arm.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=322 Name=SSAT Format=ARM_FORMAT_SATFRM(13)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 1: 0: 0| 0: 0: 0: 1| 1: 0: 1: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# A8.6.183 SSAT
-# if d == 15 || n == 15 then UNPREDICTABLE;
-0x1a 0xf4 0xa0 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt b/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt
deleted file mode 100644
index d3998bdc09a8..000000000000
--- a/test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=355 Name=STRBrs Format=ARM_FORMAT_STFRM(7)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
-# -------------------------------------------------------------------------------------------------
-#
-# if t == 15 then UNPREDICTABLE
-0x00 0xf0 0xcf 0xe7
diff --git a/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt b/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt
deleted file mode 100644
index fb3e71106c9d..000000000000
--- a/test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& grep {invalid instruction encoding}
-
-# Opcode=426 Name=UQADD8 Format=ARM_FORMAT_DPFRM(4)
-#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-# -------------------------------------------------------------------------------------------------
-# | 1: 1: 1: 0| 0: 1: 1: 0| 0: 1: 1: 0| 0: 1: 1: 0| 0: 1: 0: 1| 1: 1: 1: 1| 1: 0: 0: 1| 1: 1: 1: 1|
-# -------------------------------------------------------------------------------------------------
-#
-# DPFrm with bad reg specifier(s)
-#
-# if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
-0x9f 0x5f 0x66 0xe6
diff --git a/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
new file mode 100644
index 000000000000..8ff3a2b9bd59
--- /dev/null
+++ b/test/MC/Disassembler/ARM/invalid-VST1d8Twb_register-thumb.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 |& grep {invalid instruction encoding}
+
+# Opcode=1839 Name=VST1d8Twb_register Format=ARM_FORMAT_NLdSt(30)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 1| 1: 0: 0: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 1: 1: 0| 0: 0: 1: 0| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.391 VST1 (multiple single elements)
+# This encoding looks like: vst1.8 {d0,d1,d2}, [r0, :128]
+# But bits 5-4 for the alignment of 128 encoded as align = 0b10, is available only if <list>
+# contains two or four registers.  rdar://11220250
+0x00 0xf9 0x2f 0x06
diff --git a/test/MC/Disassembler/ARM/ldrd-armv4.txt b/test/MC/Disassembler/ARM/ldrd-armv4.txt
new file mode 100644
index 000000000000..bb87ade7869b
--- /dev/null
+++ b/test/MC/Disassembler/ARM/ldrd-armv4.txt
@@ -0,0 +1,15 @@
+# RUN: llvm-mc --disassemble %s -triple=armv4-linux-gnueabi |& FileCheck %s -check-prefix=V4
+# RUN: llvm-mc --disassemble %s -triple=armv5te-linux-gnueabi |& FileCheck %s -check-prefix=V5TE
+
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| X: X: X: 1| X: X: X: X| 1: 1: X: 1| X: X: X: X|
+# -------------------------------------------------------------------------------------------------
+# 
+# A8.6.68 LDRD (register)
+# if Rt{0} = 1 then UNDEFINED;
+
+# V4: invalid instruction encoding
+# V5TE: ldrd
+0xd0 0x10 0x00 0x01
+
diff --git a/test/MC/Disassembler/ARM/lit.local.cfg b/test/MC/Disassembler/ARM/lit.local.cfg
new file mode 100644
index 000000000000..22a76e5cab2d
--- /dev/null
+++ b/test/MC/Disassembler/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/ARM/neon-tests.txt b/test/MC/Disassembler/ARM/neon-tests.txt
index 1e03debefab6..f44c2a02cf06 100644
--- a/test/MC/Disassembler/ARM/neon-tests.txt
+++ b/test/MC/Disassembler/ARM/neon-tests.txt
@@ -30,7 +30,7 @@
 # CHECK:	vorr	d0, d15, d15
 0x1f 0x01 0x2f 0xf2
 
-# CHECK:	vmov.i64	q6, #0xFF00FF00FF
+# CHECK:	vmov.i64	q6, #0xff00ff00ff
 0x75 0xce 0x81 0xf2
 
 # CHECK:	vmvn.i32	d0, #0x0
@@ -69,10 +69,10 @@
 # CHECK:	vpop	{d8}
 0x02 0x8b 0xbd 0xec
 
-# CHECK:	vorr.i32	q15, #0x4F0000
+# CHECK:	vorr.i32	q15, #0x4f0000
 0x5f 0xe5 0xc4 0xf2
 
-# CHECK:	vbic.i32	q2, #0xA900
+# CHECK:	vbic.i32	q2, #0xa900
 0x79 0x43 0x82 0xf3
 
 # CHECK:	vst2.32	{d16, d18}, [r2, :64], r2
diff --git a/test/MC/Disassembler/ARM/neon.txt b/test/MC/Disassembler/ARM/neon.txt
index 5d2df93ed780..58fe20eaa275 100644
--- a/test/MC/Disassembler/ARM/neon.txt
+++ b/test/MC/Disassembler/ARM/neon.txt
@@ -307,9 +307,9 @@
 0xf2 0x01 0x50 0xf2
 # CHECK: vbic	q8, q8, q9
 0x3f 0x07 0xc7 0xf3
-# CHECK: vbic.i32	d16, #0xFF000000
+# CHECK: vbic.i32	d16, #0xff000000
 0x7f 0x07 0xc7 0xf3
-# CHECK: vbic.i32	q8, #0xFF000000
+# CHECK: vbic.i32	q8, #0xff000000
 
 0xb0 0x01 0x71 0xf2
 # CHECK: vorn	d16, d17, d16
@@ -587,11 +587,11 @@
 0x10 0x06 0xc2 0xf2
 # CHECK: vmov.i32	d16, #0x20000000
 0x10 0x0c 0xc2 0xf2
-# CHECK: vmov.i32	d16, #0x20FF
+# CHECK: vmov.i32	d16, #0x20ff
 0x10 0x0d 0xc2 0xf2
-# CHECK: vmov.i32	d16, #0x20FFFF
+# CHECK: vmov.i32	d16, #0x20ffff
 0x33 0x0e 0xc1 0xf3
-# CHECK: vmov.i64	d16, #0xFF0000FF0000FFFF
+# CHECK: vmov.i64	d16, #0xff0000ff0000ffff
 0x58 0x0e 0xc0 0xf2
 # CHECK: vmov.i8	q8, #0x8
 0x50 0x08 0xc1 0xf2
@@ -607,11 +607,11 @@
 0x50 0x06 0xc2 0xf2
 # CHECK: vmov.i32	q8, #0x20000000
 0x50 0x0c 0xc2 0xf2
-# CHECK: vmov.i32	q8, #0x20FF
+# CHECK: vmov.i32	q8, #0x20ff
 0x50 0x0d 0xc2 0xf2
-# CHECK: vmov.i32	q8, #0x20FFFF
+# CHECK: vmov.i32	q8, #0x20ffff
 0x73 0x0e 0xc1 0xf3
-# CHECK: vmov.i64	q8, #0xFF0000FF0000FFFF
+# CHECK: vmov.i64	q8, #0xff0000ff0000ffff
 0x30 0x08 0xc1 0xf2
 # CHECK: vmvn.i16	d16, #0x10
 0x30 0x0a 0xc1 0xf2
@@ -625,9 +625,9 @@
 0x30 0x06 0xc2 0xf2
 # CHECK: vmvn.i32	d16, #0x20000000
 0x30 0x0c 0xc2 0xf2
-# CHECK: vmvn.i32	d16, #0x20FF
+# CHECK: vmvn.i32	d16, #0x20ff
 0x30 0x0d 0xc2 0xf2
-# CHECK: vmvn.i32	d16, #0x20FFFF
+# CHECK: vmvn.i32	d16, #0x20ffff
 0x30 0x0a 0xc8 0xf2
 # CHECK: vmovl.s8	q8, d16
 0x30 0x0a 0xd0 0xf2
@@ -1856,3 +1856,390 @@
 
 0xe9 0x1a 0xb2 0x4e
 # CHECK: vcvttmi.f32.f16	s2, s19
+
+0x1d 0x76 0x66 0xf4
+# CHECK: vld1.8	{d23, d24, d25}, [r6, :64]!
+0x9d 0x62 0x6f 0xf4
+# CHECK: vld1.32	{d22, d23, d24, d25}, [pc, :64]!
+0x9d 0xaa 0x41 0xf4
+# CHECK: vst1.32	{d26, d27}, [r1, :64]!
+
+0x10 0x0f 0x83 0xf2
+0x50 0x0f 0x83 0xf2
+# CHECK: vmov.f32	d0, #1.600000e+01
+# CHECK: vmov.f32	q0, #1.600000e+01
+
+# rdar://10798451
+0xe7 0xf9 0x32 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16], r2
+0xe7 0xf9 0x3d 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]!
+0xe7 0xf9 0x3f 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]
+
+# rdar://11034702
+0x0d 0x87 0x04 0xf4
+# CHECK: vst1.8	{d8}, [r4]!            
+0x4d 0x87 0x04 0xf4
+# CHECK: vst1.16	{d8}, [r4]!             
+0x8d 0x87 0x04 0xf4
+# CHECK: vst1.32	{d8}, [r4]!             
+0xcd 0x87 0x04 0xf4
+# CHECK: vst1.64	{d8}, [r4]!             
+0x06 0x87 0x04 0xf4
+# CHECK: vst1.8	{d8}, [r4], r6          
+0x46 0x87 0x04 0xf4
+# CHECK: vst1.16	{d8}, [r4], r6          
+0x86 0x87 0x04 0xf4
+# CHECK: vst1.32	{d8}, [r4], r6          
+0xc6 0x87 0x04 0xf4
+# CHECK: vst1.64	{d8}, [r4], r6          
+
+0x0d 0x8a 0x04 0xf4
+# CHECK: vst1.8	{d8, d9}, [r4]!         
+0x4d 0x8a 0x04 0xf4
+# CHECK: vst1.16	{d8, d9}, [r4]!         
+0x8d 0x8a 0x04 0xf4
+# CHECK: vst1.32	{d8, d9}, [r4]!         
+0xcd 0x8a 0x04 0xf4
+# CHECK: vst1.64	{d8, d9}, [r4]!         
+0x06 0x8a 0x04 0xf4
+# CHECK: vst1.8	{d8, d9}, [r4], r6      
+0x46 0x8a 0x04 0xf4
+# CHECK: vst1.16	{d8, d9}, [r4], r6      
+0x86 0x8a 0x04 0xf4
+# CHECK: vst1.32	{d8, d9}, [r4], r6      
+0xc6 0x8a 0x04 0xf4
+# CHECK: vst1.64	{d8, d9}, [r4], r6      
+
+0x0d 0x86 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10}, [r4]!    
+0x4d 0x86 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10}, [r4]!    
+0x8d 0x86 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10}, [r4]!    
+0xcd 0x86 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10}, [r4]!    
+0x06 0x86 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10}, [r4], r6 
+0x46 0x86 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10}, [r4], r6 
+0x86 0x86 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10}, [r4], r6 
+0xc6 0x86 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10}, [r4], r6 
+
+0x0d 0x82 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4]! 
+0x4d 0x82 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4]! 
+0x8d 0x82 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4]! 
+0xcd 0x82 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4]! 
+0x06 0x82 0x04 0xf4
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4], r6 
+0x46 0x82 0x04 0xf4
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4], r6 
+0x86 0x82 0x04 0xf4
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4], r6 
+0xc6 0x82 0x04 0xf4
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4], r6 
+
+0x0d 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x4d 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x8d 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x06 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+0x46 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x86 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+
+0x0d 0x89 0x04 0xf4
+# CHECK: vst2.8	{d8, d10}, [r4]!        
+0x4d 0x89 0x04 0xf4
+# CHECK: vst2.16	{d8, d10}, [r4]!        
+0x8d 0x89 0x04 0xf4
+# CHECK: vst2.32	{d8, d10}, [r4]!        
+0x06 0x89 0x04 0xf4
+# CHECK: vst2.8	{d8, d10}, [r4], r6     
+0x46 0x89 0x04 0xf4
+# CHECK: vst2.16	{d8, d10}, [r4], r6     
+0x86 0x89 0x04 0xf4
+# CHECK: vst2.32	{d8, d10}, [r4], r6     
+
+0x0d 0x84 0x04 0xf4
+# CHECK: vst3.8	{d8, d9, d10}, [r4]!    
+0x4d 0x84 0x04 0xf4
+# CHECK: vst3.16	{d8, d9, d10}, [r4]!    
+0x8d 0x84 0x04 0xf4
+# CHECK: vst3.32	{d8, d9, d10}, [r4]!    
+0x06 0x85 0x04 0xf4
+# CHECK: vst3.8	{d8, d10, d12}, [r4], r6 
+0x46 0x85 0x04 0xf4
+# CHECK: vst3.16	{d8, d10, d12}, [r4], r6 
+0x86 0x85 0x04 0xf4
+# CHECK: vst3.32	{d8, d10, d12}, [r4], r6 
+
+0x0d 0x80 0x04 0xf4
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4]! 
+0x4d 0x80 0x04 0xf4
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4]! 
+0x8d 0x80 0x04 0xf4
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4]! 
+0x06 0x81 0x04 0xf4
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4], r6 
+0x46 0x81 0x04 0xf4
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4], r6 
+0x86 0x81 0x04 0xf4
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4], r6 
+
+0x4f 0x8a 0x04 0xf4
+# CHECK: vst1.16	{d8, d9}, [r4]          
+0x8f 0x8a 0x04 0xf4
+# CHECK: vst1.32	{d8, d9}, [r4]          
+0xcf 0x8a 0x04 0xf4
+# CHECK: vst1.64	{d8, d9}, [r4]          
+0x0f 0x8a 0x04 0xf4
+# CHECK: vst1.8	{d8, d9}, [r4]          
+
+0x4f 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4]          
+0x8f 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4]          
+0x0f 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4]          
+
+0x4d 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x46 0x88 0x04 0xf4
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x8d 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x86 0x88 0x04 0xf4
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+0x0d 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x06 0x88 0x04 0xf4
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+
+0x4f 0x89 0x04 0xf4
+# CHECK: vst2.16	{d8, d10}, [r4]         
+0x8f 0x89 0x04 0xf4
+# CHECK: vst2.32	{d8, d10}, [r4]         
+0x0f 0x89 0x04 0xf4
+# CHECK: vst2.8	{d8, d10}, [r4]         
+
+0x0f 0x84 0x04 0xf4
+# CHECK: vst3.8	{d8, d9, d10}, [r4]     
+0x4f 0x84 0x04 0xf4
+# CHECK: vst3.16	{d8, d9, d10}, [r4]     
+0x8f 0x84 0x04 0xf4
+# CHECK: vst3.32	{d8, d9, d10}, [r4]     
+
+0x0f 0x80 0x04 0xf4
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4] 
+0x4f 0x80 0x04 0xf4
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4] 
+0x8f 0x80 0x04 0xf4
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4] 
+
+0x0f 0x85 0x04 0xf4
+# CHECK: vst3.8	{d8, d10, d12}, [r4]    
+0x4f 0x85 0x04 0xf4
+# CHECK: vst3.16	{d8, d10, d12}, [r4]    
+0x8f 0x85 0x04 0xf4
+# CHECK: vst3.32	{d8, d10, d12}, [r4]    
+
+0x0f 0x81 0x04 0xf4
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4] 
+0x4f 0x81 0x04 0xf4
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4] 
+0x8f 0x81 0x04 0xf4
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4] 
+
+# rdar://11204059
+0x0d 0x87 0x24 0xf4
+# CHECK: vld1.8	{d8}, [r4]!             
+0x4d 0x87 0x24 0xf4
+# CHECK: vld1.16	{d8}, [r4]!             
+0x8d 0x87 0x24 0xf4
+# CHECK: vld1.32	{d8}, [r4]!             
+0xcd 0x87 0x24 0xf4
+# CHECK: vld1.64	{d8}, [r4]!             
+0x06 0x87 0x24 0xf4
+# CHECK: vld1.8	{d8}, [r4], r6          
+0x46 0x87 0x24 0xf4
+# CHECK: vld1.16	{d8}, [r4], r6          
+0x86 0x87 0x24 0xf4
+# CHECK: vld1.32	{d8}, [r4], r6          
+0xc6 0x87 0x24 0xf4
+# CHECK: vld1.64	{d8}, [r4], r6          
+0x0d 0x8a 0x24 0xf4
+# CHECK: vld1.8	{d8, d9}, [r4]!         
+0x4d 0x8a 0x24 0xf4
+# CHECK: vld1.16	{d8, d9}, [r4]!         
+0x8d 0x8a 0x24 0xf4
+# CHECK: vld1.32	{d8, d9}, [r4]!         
+0xcd 0x8a 0x24 0xf4
+# CHECK: vld1.64	{d8, d9}, [r4]!         
+0x06 0x8a 0x24 0xf4
+# CHECK: vld1.8	{d8, d9}, [r4], r6      
+0x46 0x8a 0x24 0xf4
+# CHECK: vld1.16	{d8, d9}, [r4], r6      
+0x86 0x8a 0x24 0xf4
+# CHECK: vld1.32	{d8, d9}, [r4], r6      
+0xc6 0x8a 0x24 0xf4
+# CHECK: vld1.64	{d8, d9}, [r4], r6      
+0x0d 0x86 0x24 0xf4
+# CHECK: vld1.8	{d8, d9, d10}, [r4]!    
+0x4d 0x86 0x24 0xf4
+# CHECK: vld1.16	{d8, d9, d10}, [r4]!    
+0x8d 0x86 0x24 0xf4
+# CHECK: vld1.32	{d8, d9, d10}, [r4]!    
+0xcd 0x86 0x24 0xf4
+# CHECK: vld1.64	{d8, d9, d10}, [r4]!    
+0x06 0x86 0x24 0xf4
+# CHECK: vld1.8	{d8, d9, d10}, [r4], r6 
+0x46 0x86 0x24 0xf4
+# CHECK: vld1.16	{d8, d9, d10}, [r4], r6 
+0x86 0x86 0x24 0xf4
+# CHECK: vld1.32	{d8, d9, d10}, [r4], r6 
+0xc6 0x86 0x24 0xf4
+# CHECK: vld1.64	{d8, d9, d10}, [r4], r6 
+0x0d 0x82 0x24 0xf4
+# CHECK: vld1.8	{d8, d9, d10, d11}, [r4]! 
+0x4d 0x82 0x24 0xf4
+# CHECK: vld1.16	{d8, d9, d10, d11}, [r4]! 
+0x8d 0x82 0x24 0xf4
+# CHECK: vld1.32	{d8, d9, d10, d11}, [r4]! 
+0xcd 0x82 0x24 0xf4
+# CHECK: vld1.64	{d8, d9, d10, d11}, [r4]! 
+0x06 0x82 0x24 0xf4
+# CHECK: vld1.8	{d8, d9, d10, d11}, [r4], r6 
+0x46 0x82 0x24 0xf4
+# CHECK: vld1.16	{d8, d9, d10, d11}, [r4], r6 
+0x86 0x82 0x24 0xf4
+# CHECK: vld1.32	{d8, d9, d10, d11}, [r4], r6 
+0xc6 0x82 0x24 0xf4
+# CHECK: vld1.64	{d8, d9, d10, d11}, [r4], r6 
+0x0d 0x88 0x24 0xf4
+# CHECK: vld2.8	{d8, d9}, [r4]!         
+0x4d 0x88 0x24 0xf4
+# CHECK: vld2.16	{d8, d9}, [r4]!         
+0x8d 0x88 0x24 0xf4
+# CHECK: vld2.32	{d8, d9}, [r4]!         
+0x06 0x88 0x24 0xf4
+# CHECK: vld2.8	{d8, d9}, [r4], r6      
+0x46 0x88 0x24 0xf4
+# CHECK: vld2.16	{d8, d9}, [r4], r6      
+0x86 0x88 0x24 0xf4
+# CHECK: vld2.32	{d8, d9}, [r4], r6      
+0x0d 0x89 0x24 0xf4
+# CHECK: vld2.8	{d8, d10}, [r4]!        
+0x4d 0x89 0x24 0xf4
+# CHECK: vld2.16	{d8, d10}, [r4]!        
+0x8d 0x89 0x24 0xf4
+# CHECK: vld2.32	{d8, d10}, [r4]!        
+0x06 0x89 0x24 0xf4
+# CHECK: vld2.8	{d8, d10}, [r4], r6     
+0x46 0x89 0x24 0xf4
+# CHECK: vld2.16	{d8, d10}, [r4], r6     
+0x86 0x89 0x24 0xf4
+# CHECK: vld2.32	{d8, d10}, [r4], r6     
+0x0d 0x84 0x24 0xf4
+# CHECK: vld3.8	{d8, d9, d10}, [r4]!    
+0x4d 0x84 0x24 0xf4
+# CHECK: vld3.16	{d8, d9, d10}, [r4]!    
+0x8d 0x84 0x24 0xf4
+# CHECK: vld3.32	{d8, d9, d10}, [r4]!    
+0x06 0x85 0x24 0xf4
+# CHECK: vld3.8	{d8, d10, d12}, [r4], r6 
+0x46 0x85 0x24 0xf4
+# CHECK: vld3.16	{d8, d10, d12}, [r4], r6 
+0x86 0x85 0x24 0xf4
+# CHECK: vld3.32	{d8, d10, d12}, [r4], r6 
+0x0d 0x80 0x24 0xf4
+# CHECK: vld4.8	{d8, d9, d10, d11}, [r4]! 
+0x4d 0x80 0x24 0xf4
+# CHECK: vld4.16	{d8, d9, d10, d11}, [r4]! 
+0x8d 0x80 0x24 0xf4
+# CHECK: vld4.32	{d8, d9, d10, d11}, [r4]! 
+0x06 0x81 0x24 0xf4
+# CHECK: vld4.8	{d8, d10, d12, d14}, [r4], r6 
+0x46 0x81 0x24 0xf4
+# CHECK: vld4.16	{d8, d10, d12, d14}, [r4], r6 
+0x86 0x81 0x24 0xf4
+# CHECK: vld4.32	{d8, d10, d12, d14}, [r4], r6 
+0x4f 0x8a 0x24 0xf4
+# CHECK: vld1.16	{d8, d9}, [r4]          
+0x8f 0x8a 0x24 0xf4
+# CHECK: vld1.32	{d8, d9}, [r4]          
+0xcf 0x8a 0x24 0xf4
+# CHECK: vld1.64	{d8, d9}, [r4]          
+0x0f 0x8a 0x24 0xf4
+# CHECK: vld1.8	{d8, d9}, [r4]          
+0x4f 0x88 0x24 0xf4
+# CHECK: vld2.16	{d8, d9}, [r4]          
+0x8f 0x88 0x24 0xf4
+# CHECK: vld2.32	{d8, d9}, [r4]          
+0x0f 0x88 0x24 0xf4
+# CHECK: vld2.8	{d8, d9}, [r4]          
+0x4d 0x88 0x24 0xf4
+# CHECK: vld2.16	{d8, d9}, [r4]!         
+0x46 0x88 0x24 0xf4
+# CHECK: vld2.16	{d8, d9}, [r4], r6      
+0x8d 0x88 0x24 0xf4
+# CHECK: vld2.32	{d8, d9}, [r4]!         
+0x86 0x88 0x24 0xf4
+# CHECK: vld2.32	{d8, d9}, [r4], r6      
+0x0d 0x88 0x24 0xf4
+# CHECK: vld2.8	{d8, d9}, [r4]!         
+0x06 0x88 0x24 0xf4
+# CHECK: vld2.8	{d8, d9}, [r4], r6      
+0x4f 0x89 0x24 0xf4
+# CHECK: vld2.16	{d8, d10}, [r4]         
+0x8f 0x89 0x24 0xf4
+# CHECK: vld2.32	{d8, d10}, [r4]         
+0x0f 0x89 0x24 0xf4
+# CHECK: vld2.8	{d8, d10}, [r4]         
+0x4d 0x83 0x24 0xf4
+# CHECK: vld2.16	{d8, d9, d10, d11}, [r4]! 
+0x46 0x83 0x24 0xf4
+# CHECK: vld2.16	{d8, d9, d10, d11}, [r4], r6 
+0x8d 0x83 0x24 0xf4
+# CHECK: vld2.32	{d8, d9, d10, d11}, [r4]! 
+0x86 0x83 0x24 0xf4
+# CHECK: vld2.32	{d8, d9, d10, d11}, [r4], r6 
+0x0d 0x83 0x24 0xf4
+# CHECK: vld2.8	{d8, d9, d10, d11}, [r4]! 
+0x06 0x83 0x24 0xf4
+# CHECK: vld2.8	{d8, d9, d10, d11}, [r4], r6 
+0x0f 0x84 0x24 0xf4
+# CHECK: vld3.8	{d8, d9, d10}, [r4]     
+0x4f 0x84 0x24 0xf4
+# CHECK: vld3.16	{d8, d9, d10}, [r4]     
+0x8f 0x84 0x24 0xf4
+# CHECK: vld3.32	{d8, d9, d10}, [r4]     
+0x0f 0x80 0x24 0xf4
+# CHECK: vld4.8	{d8, d9, d10, d11}, [r4] 
+0x4f 0x80 0x24 0xf4
+# CHECK: vld4.16	{d8, d9, d10, d11}, [r4] 
+0x8f 0x80 0x24 0xf4
+# CHECK: vld4.32	{d8, d9, d10, d11}, [r4] 
+0x0f 0x85 0x24 0xf4
+# CHECK: vld3.8	{d8, d10, d12}, [r4]    
+0x4f 0x85 0x24 0xf4
+# CHECK: vld3.16	{d8, d10, d12}, [r4]    
+0x8f 0x85 0x24 0xf4
+# CHECK: vld3.32	{d8, d10, d12}, [r4]    
+0x0f 0x81 0x24 0xf4
+# CHECK: vld4.8	{d8, d10, d12, d14}, [r4] 
+0x4f 0x81 0x24 0xf4
+# CHECK: vld4.16	{d8, d10, d12, d14}, [r4] 
+0x8f 0x81 0x24 0xf4
+# CHECK: vld4.32	{d8, d10, d12, d14}, [r4] 
diff --git a/test/MC/Disassembler/ARM/neont2.txt b/test/MC/Disassembler/ARM/neont2.txt
index 577703c804b7..efe7e60ddaed 100644
--- a/test/MC/Disassembler/ARM/neont2.txt
+++ b/test/MC/Disassembler/ARM/neont2.txt
@@ -301,9 +301,9 @@
 0x50 0xef 0xf2 0x01
 # CHECK: vbic	q8, q8, q9
 0xc7 0xff 0x3f 0x07
-# CHECK: vbic.i32	d16, #0xFF000000
+# CHECK: vbic.i32	d16, #0xff000000
 0xc7 0xff 0x7f 0x07
-# CHECK: vbic.i32	q8, #0xFF000000
+# CHECK: vbic.i32	q8, #0xff000000
 
 0x71 0xef 0xb0 0x01
 # CHECK: vorn	d16, d17, d16
@@ -486,11 +486,11 @@
 0xc2 0xef 0x10 0x06
 # CHECK: vmov.i32	d16, #0x20000000
 0xc2 0xef 0x10 0x0c
-# CHECK: vmov.i32	d16, #0x20FF
+# CHECK: vmov.i32	d16, #0x20ff
 0xc2 0xef 0x10 0x0d
-# CHECK: vmov.i32	d16, #0x20FFFF
+# CHECK: vmov.i32	d16, #0x20ffff
 0xc1 0xff 0x33 0x0e
-# CHECK: vmov.i64	d16, #0xFF0000FF0000FFFF
+# CHECK: vmov.i64	d16, #0xff0000ff0000ffff
 0xc0 0xef 0x58 0x0e
 # CHECK: vmov.i8	q8, #0x8
 0xc1 0xef 0x50 0x08
@@ -506,11 +506,11 @@
 0xc2 0xef 0x50 0x06
 # CHECK: vmov.i32	q8, #0x20000000
 0xc2 0xef 0x50 0x0c
-# CHECK: vmov.i32	q8, #0x20FF
+# CHECK: vmov.i32	q8, #0x20ff
 0xc2 0xef 0x50 0x0d
-# CHECK: vmov.i32	q8, #0x20FFFF
+# CHECK: vmov.i32	q8, #0x20ffff
 0xc1 0xff 0x73 0x0e
-# CHECK: vmov.i64	q8, #0xFF0000FF0000FFFF
+# CHECK: vmov.i64	q8, #0xff0000ff0000ffff
 0xc1 0xef 0x30 0x08
 # CHECK: vmvn.i16	d16, #0x10
 0xc1 0xef 0x30 0x0a
@@ -524,9 +524,9 @@
 0xc2 0xef 0x30 0x06
 # CHECK: vmvn.i32	d16, #0x20000000
 0xc2 0xef 0x30 0x0c
-# CHECK: vmvn.i32	d16, #0x20FF
+# CHECK: vmvn.i32	d16, #0x20ff
 0xc2 0xef 0x30 0x0d
-# CHECK: vmvn.i32	d16, #0x20FFFF
+# CHECK: vmvn.i32	d16, #0x20ffff
 0xc8 0xef 0x30 0x0a
 # CHECK: vmovl.s8	q8, d16
 0xd0 0xef 0x30 0x0a
@@ -1584,3 +1584,379 @@
 # CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
 0xc0 0xf9 0x4f 0x1b
 # CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
+
+0x63 0xf9 0x37 0xc9
+# CHECK: vld2.8	{d28, d30}, [r3, :256], r7
+
+# rdar://10798451
+0xe7 0xf9 0x32 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16], r2
+0xe7 0xf9 0x3d 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]!
+0xe7 0xf9 0x3f 0x1d
+# CHECK vld2.8	{d17[], d19[]}, [r7, :16]
+
+# rdar://11034702
+0x04 0xf9 0x0d 0x87
+# CHECK: vst1.8	{d8}, [r4]!             
+0x04 0xf9 0x4d 0x87
+# CHECK: vst1.16	{d8}, [r4]!             
+0x04 0xf9 0x8d 0x87
+# CHECK: vst1.32	{d8}, [r4]!             
+0x04 0xf9 0xcd 0x87
+# CHECK: vst1.64	{d8}, [r4]!             
+0x04 0xf9 0x06 0x87
+# CHECK: vst1.8	{d8}, [r4], r6          
+0x04 0xf9 0x46 0x87
+# CHECK: vst1.16	{d8}, [r4], r6          
+0x04 0xf9 0x86 0x87
+# CHECK: vst1.32	{d8}, [r4], r6          
+0x04 0xf9 0xc6 0x87
+# CHECK: vst1.64	{d8}, [r4], r6          
+
+0x04 0xf9 0x0d 0x8a
+# CHECK: vst1.8	{d8, d9}, [r4]!         
+0x04 0xf9 0x4d 0x8a
+# CHECK: vst1.16	{d8, d9}, [r4]!         
+0x04 0xf9 0x8d 0x8a
+# CHECK: vst1.32	{d8, d9}, [r4]!         
+0x04 0xf9 0xcd 0x8a
+# CHECK: vst1.64	{d8, d9}, [r4]!         
+0x04 0xf9 0x06 0x8a
+# CHECK: vst1.8	{d8, d9}, [r4], r6      
+0x04 0xf9 0x46 0x8a
+# CHECK: vst1.16	{d8, d9}, [r4], r6      
+0x04 0xf9 0x86 0x8a
+# CHECK: vst1.32	{d8, d9}, [r4], r6      
+0x04 0xf9 0xc6 0x8a
+# CHECK: vst1.64	{d8, d9}, [r4], r6      
+
+0x04 0xf9 0x0d 0x86
+# CHECK: vst1.8	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x4d 0x86
+# CHECK: vst1.16	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x8d 0x86
+# CHECK: vst1.32	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0xcd 0x86
+# CHECK: vst1.64	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x06 0x86
+# CHECK: vst1.8	{d8, d9, d10}, [r4], r6 
+0x04 0xf9 0x46 0x86
+# CHECK: vst1.16	{d8, d9, d10}, [r4], r6 
+0x04 0xf9 0x86 0x86
+# CHECK: vst1.32	{d8, d9, d10}, [r4], r6 
+0x04 0xf9 0xc6 0x86
+# CHECK: vst1.64	{d8, d9, d10}, [r4], r6 
+
+0x04 0xf9 0x0d 0x82
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0x4d 0x82
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0x8d 0x82
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0xcd 0x82
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4]! 
+0x04 0xf9 0x06 0x82
+# CHECK: vst1.8	{d8, d9, d10, d11}, [r4], r6 
+0x04 0xf9 0x46 0x82
+# CHECK: vst1.16	{d8, d9, d10, d11}, [r4], r6 
+0x04 0xf9 0x86 0x82
+# CHECK: vst1.32	{d8, d9, d10, d11}, [r4], r6 
+0x04 0xf9 0xc6 0x82
+# CHECK: vst1.64	{d8, d9, d10, d11}, [r4], r6 
+
+0x04 0xf9 0x0d 0x88
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x04 0xf9 0x4d 0x88
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x04 0xf9 0x8d 0x88
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x04 0xf9 0x06 0x88
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+0x04 0xf9 0x46 0x88
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x04 0xf9 0x86 0x88
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+
+0x04 0xf9 0x0d 0x89
+# CHECK: vst2.8	{d8, d10}, [r4]!        
+0x04 0xf9 0x4d 0x89
+# CHECK: vst2.16	{d8, d10}, [r4]!        
+0x04 0xf9 0x8d 0x89
+# CHECK: vst2.32	{d8, d10}, [r4]!        
+0x04 0xf9 0x06 0x89
+# CHECK: vst2.8	{d8, d10}, [r4], r6     
+0x04 0xf9 0x46 0x89
+# CHECK: vst2.16	{d8, d10}, [r4], r6     
+0x04 0xf9 0x86 0x89
+# CHECK: vst2.32	{d8, d10}, [r4], r6     
+
+0x04 0xf9 0x0d 0x84
+# CHECK: vst3.8	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x4d 0x84
+# CHECK: vst3.16	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x8d 0x84
+# CHECK: vst3.32	{d8, d9, d10}, [r4]!    
+0x04 0xf9 0x06 0x85
+# CHECK: vst3.8	{d8, d10, d12}, [r4], r6 
+0x04 0xf9 0x46 0x85
+# CHECK: vst3.16	{d8, d10, d12}, [r4], r6 
+0x04 0xf9 0x86 0x85
+# CHECK: vst3.32	{d8, d10, d12}, [r4], r6 
+
+0x04 0xf9 0x0d 0x80
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4]!
+0x04 0xf9 0x4d 0x80
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4]!
+0x04 0xf9 0x8d 0x80
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4]!
+0x04 0xf9 0x06 0x81
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4], r6
+0x04 0xf9 0x46 0x81
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4], r6
+0x04 0xf9 0x86 0x81
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4], r6
+
+0x04 0xf9 0x4f 0x8a
+# CHECK: vst1.16	{d8, d9}, [r4]          
+0x04 0xf9 0x8f 0x8a
+# CHECK: vst1.32	{d8, d9}, [r4]          
+0x04 0xf9 0xcf 0x8a
+# CHECK: vst1.64	{d8, d9}, [r4]          
+0x04 0xf9 0x0f 0x8a
+# CHECK: vst1.8	{d8, d9}, [r4]          
+0x04 0xf9 0x4f 0x88
+# CHECK: vst2.16	{d8, d9}, [r4]          
+0x04 0xf9 0x8f 0x88
+# CHECK: vst2.32	{d8, d9}, [r4]          
+0x04 0xf9 0x0f 0x88
+# CHECK: vst2.8	{d8, d9}, [r4]          
+0x04 0xf9 0x4d 0x88
+# CHECK: vst2.16	{d8, d9}, [r4]!         
+0x04 0xf9 0x46 0x88
+# CHECK: vst2.16	{d8, d9}, [r4], r6      
+0x04 0xf9 0x8d 0x88
+# CHECK: vst2.32	{d8, d9}, [r4]!         
+0x04 0xf9 0x86 0x88
+# CHECK: vst2.32	{d8, d9}, [r4], r6      
+0x04 0xf9 0x0d 0x88
+# CHECK: vst2.8	{d8, d9}, [r4]!         
+0x04 0xf9 0x06 0x88
+# CHECK: vst2.8	{d8, d9}, [r4], r6      
+
+0x04 0xf9 0x4f 0x89
+# CHECK: vst2.16	{d8, d10}, [r4]        
+0x04 0xf9 0x8f 0x89
+# CHECK: vst2.32	{d8, d10}, [r4]        
+0x04 0xf9 0x0f 0x89
+# CHECK: vst2.8	{d8, d10}, [r4]        
+
+0x04 0xf9 0x0f 0x84
+# CHECK: vst3.8	{d8, d9, d10}, [r4]    
+0x04 0xf9 0x4f 0x84
+# CHECK: vst3.16	{d8, d9, d10}, [r4]    
+0x04 0xf9 0x8f 0x84
+# CHECK: vst3.32	{d8, d9, d10}, [r4]    
+
+0x04 0xf9 0x0f 0x80
+# CHECK: vst4.8	{d8, d9, d10, d11}, [r4]
+0x04 0xf9 0x4f 0x80
+# CHECK: vst4.16	{d8, d9, d10, d11}, [r4]
+0x04 0xf9 0x8f 0x80
+# CHECK: vst4.32	{d8, d9, d10, d11}, [r4]
+
+0x04 0xf9 0x0f 0x85
+# CHECK: vst3.8	{d8, d10, d12}, [r4]   
+0x04 0xf9 0x4f 0x85
+# CHECK: vst3.16	{d8, d10, d12}, [r4]   
+0x04 0xf9 0x8f 0x85
+# CHECK: vst3.32	{d8, d10, d12}, [r4]   
+
+0x04 0xf9 0x0f 0x81
+# CHECK: vst4.8	{d8, d10, d12, d14}, [r4]
+0x04 0xf9 0x4f 0x81
+# CHECK: vst4.16	{d8, d10, d12, d14}, [r4]
+0x04 0xf9 0x8f 0x81
+# CHECK: vst4.32	{d8, d10, d12, d14}, [r4]
+
+# rdar://11204059
+0x24 0xf9 0x0d 0x87
+# CHECK: vld1.8	{d8}, [r4]!             
+0x24 0xf9 0x4d 0x87
+# CHECK: vld1.16	{d8}, [r4]!             
+0x24 0xf9 0x8d 0x87
+# CHECK: vld1.32	{d8}, [r4]!             
+0x24 0xf9 0xcd 0x87
+# CHECK: vld1.64	{d8}, [r4]!             
+0x24 0xf9 0x06 0x87
+# CHECK: vld1.8	{d8}, [r4], r6          
+0x24 0xf9 0x46 0x87
+# CHECK: vld1.16	{d8}, [r4], r6          
+0x24 0xf9 0x86 0x87
+# CHECK: vld1.32	{d8}, [r4], r6          
+0x24 0xf9 0xc6 0x87
+# CHECK: vld1.64	{d8}, [r4], r6          
+0x24 0xf9 0x0d 0x8a
+# CHECK: vld1.8	{d8, d9}, [r4]!         
+0x24 0xf9 0x4d 0x8a
+# CHECK: vld1.16	{d8, d9}, [r4]!         
+0x24 0xf9 0x8d 0x8a
+# CHECK: vld1.32	{d8, d9}, [r4]!         
+0x24 0xf9 0xcd 0x8a
+# CHECK: vld1.64	{d8, d9}, [r4]!         
+0x24 0xf9 0x06 0x8a
+# CHECK: vld1.8	{d8, d9}, [r4], r6      
+0x24 0xf9 0x46 0x8a
+# CHECK: vld1.16	{d8, d9}, [r4], r6      
+0x24 0xf9 0x86 0x8a
+# CHECK: vld1.32	{d8, d9}, [r4], r6      
+0x24 0xf9 0xc6 0x8a
+# CHECK: vld1.64	{d8, d9}, [r4], r6      
+0x24 0xf9 0x0d 0x86
+# CHECK: vld1.8	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x4d 0x86
+# CHECK: vld1.16	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x8d 0x86
+# CHECK: vld1.32	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0xcd 0x86
+# CHECK: vld1.64	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x06 0x86
+# CHECK: vld1.8	{d8, d9, d10}, [r4], r6 
+0x24 0xf9 0x46 0x86
+# CHECK: vld1.16	{d8, d9, d10}, [r4], r6 
+0x24 0xf9 0x86 0x86
+# CHECK: vld1.32	{d8, d9, d10}, [r4], r6 
+0x24 0xf9 0xc6 0x86
+# CHECK: vld1.64	{d8, d9, d10}, [r4], r6 
+0x24 0xf9 0x0d 0x82
+# CHECK: vld1.8	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x4d 0x82
+# CHECK: vld1.16	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x8d 0x82
+# CHECK: vld1.32	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0xcd 0x82
+# CHECK: vld1.64	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x06 0x82
+# CHECK: vld1.8	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x46 0x82
+# CHECK: vld1.16	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x86 0x82
+# CHECK: vld1.32	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0xc6 0x82
+# CHECK: vld1.64	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x0d 0x88
+# CHECK: vld2.8	{d8, d9}, [r4]!         
+0x24 0xf9 0x4d 0x88
+# CHECK: vld2.16	{d8, d9}, [r4]!         
+0x24 0xf9 0x8d 0x88
+# CHECK: vld2.32	{d8, d9}, [r4]!         
+0x24 0xf9 0x06 0x88
+# CHECK: vld2.8	{d8, d9}, [r4], r6      
+0x24 0xf9 0x46 0x88
+# CHECK: vld2.16	{d8, d9}, [r4], r6      
+0x24 0xf9 0x86 0x88
+# CHECK: vld2.32	{d8, d9}, [r4], r6      
+0x24 0xf9 0x0d 0x89
+# CHECK: vld2.8	{d8, d10}, [r4]!        
+0x24 0xf9 0x4d 0x89
+# CHECK: vld2.16	{d8, d10}, [r4]!        
+0x24 0xf9 0x8d 0x89
+# CHECK: vld2.32	{d8, d10}, [r4]!        
+0x24 0xf9 0x06 0x89
+# CHECK: vld2.8	{d8, d10}, [r4], r6     
+0x24 0xf9 0x46 0x89
+# CHECK: vld2.16	{d8, d10}, [r4], r6     
+0x24 0xf9 0x86 0x89
+# CHECK: vld2.32	{d8, d10}, [r4], r6     
+0x24 0xf9 0x0d 0x84
+# CHECK: vld3.8	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x4d 0x84
+# CHECK: vld3.16	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x8d 0x84
+# CHECK: vld3.32	{d8, d9, d10}, [r4]!    
+0x24 0xf9 0x06 0x85
+# CHECK: vld3.8	{d8, d10, d12}, [r4], r6 
+0x24 0xf9 0x46 0x85
+# CHECK: vld3.16	{d8, d10, d12}, [r4], r6 
+0x24 0xf9 0x86 0x85
+# CHECK: vld3.32	{d8, d10, d12}, [r4], r6 
+0x24 0xf9 0x0d 0x80
+# CHECK: vld4.8	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x4d 0x80
+# CHECK: vld4.16	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x8d 0x80
+# CHECK: vld4.32	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x06 0x81
+# CHECK: vld4.8	{d8, d10, d12, d14}, [r4], r6 
+0x24 0xf9 0x46 0x81
+# CHECK: vld4.16	{d8, d10, d12, d14}, [r4], r6 
+0x24 0xf9 0x86 0x81
+# CHECK: vld4.32	{d8, d10, d12, d14}, [r4], r6 
+0x24 0xf9 0x4f 0x8a
+# CHECK: vld1.16	{d8, d9}, [r4]          
+0x24 0xf9 0x8f 0x8a
+# CHECK: vld1.32	{d8, d9}, [r4]          
+0x24 0xf9 0xcf 0x8a
+# CHECK: vld1.64	{d8, d9}, [r4]          
+0x24 0xf9 0x0f 0x8a
+# CHECK: vld1.8	{d8, d9}, [r4]          
+0x24 0xf9 0x4f 0x88
+# CHECK: vld2.16	{d8, d9}, [r4]          
+0x24 0xf9 0x8f 0x88
+# CHECK: vld2.32	{d8, d9}, [r4]          
+0x24 0xf9 0x0f 0x88
+# CHECK: vld2.8	{d8, d9}, [r4]          
+0x24 0xf9 0x4d 0x88
+# CHECK: vld2.16	{d8, d9}, [r4]!         
+0x24 0xf9 0x46 0x88
+# CHECK: vld2.16	{d8, d9}, [r4], r6      
+0x24 0xf9 0x8d 0x88
+# CHECK: vld2.32	{d8, d9}, [r4]!         
+0x24 0xf9 0x86 0x88
+# CHECK: vld2.32	{d8, d9}, [r4], r6      
+0x24 0xf9 0x0d 0x88
+# CHECK: vld2.8	{d8, d9}, [r4]!         
+0x24 0xf9 0x06 0x88
+# CHECK: vld2.8	{d8, d9}, [r4], r6      
+0x24 0xf9 0x4f 0x89
+# CHECK: vld2.16	{d8, d10}, [r4]         
+0x24 0xf9 0x8f 0x89
+# CHECK: vld2.32	{d8, d10}, [r4]         
+0x24 0xf9 0x0f 0x89
+# CHECK: vld2.8	{d8, d10}, [r4]         
+0x24 0xf9 0x4d 0x83
+# CHECK: vld2.16	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x46 0x83
+# CHECK: vld2.16	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x8d 0x83
+# CHECK: vld2.32	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x86 0x83
+# CHECK: vld2.32	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x0d 0x83
+# CHECK: vld2.8	{d8, d9, d10, d11}, [r4]! 
+0x24 0xf9 0x06 0x83
+# CHECK: vld2.8	{d8, d9, d10, d11}, [r4], r6 
+0x24 0xf9 0x0f 0x84
+# CHECK: vld3.8	{d8, d9, d10}, [r4]     
+0x24 0xf9 0x4f 0x84
+# CHECK: vld3.16	{d8, d9, d10}, [r4]     
+0x24 0xf9 0x8f 0x84
+# CHECK: vld3.32	{d8, d9, d10}, [r4]     
+0x24 0xf9 0x0f 0x80
+# CHECK: vld4.8	{d8, d9, d10, d11}, [r4] 
+0x24 0xf9 0x4f 0x80
+# CHECK: vld4.16	{d8, d9, d10, d11}, [r4] 
+0x24 0xf9 0x8f 0x80
+# CHECK: vld4.32	{d8, d9, d10, d11}, [r4] 
+0x24 0xf9 0x0f 0x85
+# CHECK: vld3.8	{d8, d10, d12}, [r4]    
+0x24 0xf9 0x4f 0x85
+# CHECK: vld3.16	{d8, d10, d12}, [r4]    
+0x24 0xf9 0x8f 0x85
+# CHECK: vld3.32	{d8, d10, d12}, [r4]    
+0x24 0xf9 0x0f 0x81
+# CHECK: vld4.8	{d8, d10, d12, d14}, [r4] 
+0x24 0xf9 0x4f 0x81
+# CHECK: vld4.16	{d8, d10, d12, d14}, [r4] 
+0x24 0xf9 0x8f 0x81
+# CHECK: vld4.32	{d8, d10, d12, d14}, [r4] 
diff --git a/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt b/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt
new file mode 100644
index 000000000000..275bae2fa28e
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-ADC-arm.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+
+# CHECK: potentially undefined
+# CHECK: 0x1f 0x12 0xb0 0x00
+0x1f 0x12 0xb0 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x13 0xf2 0xb0 0x00
+0x13 0xf2 0xb0 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x13 0x1f 0xb0 0x00
+0x13 0x1f 0xb0 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x13 0x12 0xbf 0x00
+0x13 0x12 0xbf 0x00
diff --git a/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt b/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt
new file mode 100644
index 000000000000..635b66ea43f0
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-ADDREXT3-arm.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+
+# CHECK: potentially undefined
+# CHECK: 0xd1 0xf1 0x5f 0x01
+0xd1 0xf1 0x5f 0x01
+# CHECK: potentially undefined
+# CHECK: 0xf1 0xf1 0x5f 0x01
+0xf1 0xf1 0x5f 0x01
+# CHECK: potentially undefined
+# CHECK: 0xf1 0xf1 0x5f 0x01
+0xf1 0xf1 0x5f 0x01
+# CHECK: potentially undefined
+# CHECK: 0xd1 0xe1 0x4f 0x01
+0xd1 0xe1 0x4f 0x01
+
+
diff --git a/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt b/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt
new file mode 100644
index 000000000000..ed5e350c13c8
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-LDR-arm.txt
@@ -0,0 +1,22 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+
+# CHECK: potentially undefined
+# CHECK: 0xff 0x00 0xb9 0x00
+0xff 0x00 0xb9 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0xfb 0xf0 0xb9 0x00
+0xfb 0xf0 0xb9 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0xfb 0x01 0xb9 0x00
+0xfb 0x01 0xb9 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0xfb 0x00 0xbf 0x00
+0xfb 0x00 0xbf 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0xfb 0x90 0xb9 0x00
+0xfb 0x90 0xb9 0x00
+
diff --git a/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt b/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt
new file mode 100644
index 000000000000..a8f54f7127f2
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-LDRD-arm.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0| X: X: X: 1| X: X: X: X| 1: 1: X: 1| X: X: X: X|
+# -------------------------------------------------------------------------------------------------
+# 
+# A8.6.68 LDRD (register)
+# if Rt{0} = 1 then UNDEFINED;
+
+# CHECK: potentially undefined
+# CHECK: 0xd0 0x10 0x00 0x00
+0xd0 0x10 0x00 0x00
diff --git a/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt b/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt
new file mode 100644
index 000000000000..f7d6bc6edcf6
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-LSL-regform.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# Opcode=196 Name=MOVs Format=ARM_FORMAT_DPSOREGFRM(5)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 0: 0: 1| 1: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 0: 0: 1| 0: 0: 0: 1| 0: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.89 LSL (register)
+# if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+
+# CHECK: warning: potentially undefined instruction encoding
+0x12 0xf1 0xa0 0xe1
diff --git a/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt b/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt
new file mode 100644
index 000000000000..3db86cc44fb6
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-MUL-arm.txt
@@ -0,0 +1,17 @@
+# RUN: llvm-mc --disassemble %s -triple=armv7-linux-gnueabi |& FileCheck %s
+
+# CHECK: potentially undefined
+# CHECK: 0x93 0x12 0x01 0x00 
+0x93 0x12 0x01 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x92 0x0f 0x01 0x00
+0x92 0x0f 0x01 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x9f 0x02 0x01 0x00
+0x9f 0x02 0x01 0x00
+
+# CHECK: potentially undefined
+# CHECK: 0x92 0x01 0x0f 0x00
+0x92 0x01 0x0f 0x00
diff --git a/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt b/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt
new file mode 100644
index 000000000000..5b1361094990
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-RSC-arm.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# Opcode=261 Name=RSCrs Format=ARM_FORMAT_DPSOREGFRM(5)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 0: 0: 1: 1| 0: 0: 0: 0| 1: 1: 1: 0| 0: 1: 0: 0| 1: 1: 1: 1| 1: 0: 0: 0| 0: 1: 0: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+# if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE;
+
+# CHECK: warning: potentially undefined instruction encoding
+0x5f 0xf8 0xe4 0x30
diff --git a/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt
new file mode 100644
index 000000000000..8ec49cad3499
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-SHADD16-arm.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# CHECK: warning: potentially undefined
+# CHECK: shadd16	r5, r7, r0
+0x10 0x51 0x37 0xe6
+
+
diff --git a/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt b/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt
new file mode 100644
index 000000000000..874378ed024c
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-SSAT-arm.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# Opcode=322 Name=SSAT Format=ARM_FORMAT_SATFRM(13)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 1: 0: 1: 0| 0: 0: 0: 0| 1: 1: 1: 1| 0: 1: 0: 0| 0: 0: 0: 1| 1: 0: 1: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# A8.6.183 SSAT
+# if d == 15 || n == 15 then UNPREDICTABLE;
+
+# CHECK:warning: potentially undefined instruction encoding
+0x1a 0xf4 0xa0 0xe6
diff --git a/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt b/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt
new file mode 100644
index 000000000000..fef6125d283d
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-STRBrs-arm.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# Opcode=355 Name=STRBrs Format=ARM_FORMAT_STFRM(7)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 1| 1: 1: 0: 0| 1: 1: 1: 1| 1: 1: 1: 1| 0: 0: 0: 0| 0: 0: 0: 0| 0: 0: 0: 0|
+# -------------------------------------------------------------------------------------------------
+#
+# if t == 15 then UNPREDICTABLE
+
+# CHECK: warning: potentially undefined instruction encoding
+0x00 0xf0 0xcf 0xe7
diff --git a/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt b/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt
new file mode 100644
index 000000000000..4c4c9abed2fc
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-UQADD8-arm.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 |& FileCheck %s
+
+# Opcode=426 Name=UQADD8 Format=ARM_FORMAT_DPFRM(4)
+#  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+# -------------------------------------------------------------------------------------------------
+# | 1: 1: 1: 0| 0: 1: 1: 0| 0: 1: 1: 0| 0: 1: 1: 0| 0: 1: 0: 1| 1: 1: 1: 1| 1: 0: 0: 1| 1: 1: 1: 1|
+# -------------------------------------------------------------------------------------------------
+#
+# DPFrm with bad reg specifier(s)
+#
+# if d == 15 || n == 15 || m == 15 then UNPREDICTABLE;
+0x9f 0x5f 0x66 0xe6
+
+# CHECK: warning: potentially undefined
+# CHECK: uqadd8	r5, r6, pc
+ 
diff --git a/test/MC/Disassembler/ARM/unpredictables-thumb.txt b/test/MC/Disassembler/ARM/unpredictables-thumb.txt
new file mode 100644
index 000000000000..e7645f0a59e3
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictables-thumb.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc --disassemble %s -triple=thumbv7 |& FileCheck %s
+
+0x01 0x47
+# CHECK: 3:1: warning: potentially undefined
+# CHECK: bx r0
diff --git a/test/MC/Disassembler/ARM/vfp4.txt b/test/MC/Disassembler/ARM/vfp4.txt
new file mode 100644
index 000000000000..4f2c73211833
--- /dev/null
+++ b/test/MC/Disassembler/ARM/vfp4.txt
@@ -0,0 +1,37 @@
+# RUN: llvm-mc < %s -triple thumbv7-unknown-unknown --disassemble -mattr=+neon,+vfp4 | FileCheck %s
+
+# CHECK: vfma.f64 d16, d18, d17
+0xe2 0xee 0xa1 0x0b
+
+# CHECK: vfma.f32 s2, s4, s0
+0xa2 0xee 0x00 0x1a
+
+# CHECK: vfma.f32 d16, d18, d17
+0x42 0xef 0xb1 0x0c
+
+# CHECK: vfma.f32 q2, q4, q0
+0x08 0xef 0x50 0x4c
+
+# CHECK: vfnms.f64 d16, d18, d17
+0xd2 0xee 0xa1 0x0b
+
+# CHECK: vfnms.f32 s2, s4, s0
+0x92 0xee 0x00 0x1a
+
+# CHECK: vfms.f64 d16, d18, d17
+0xe2 0xee 0xe1 0x0b
+
+# CHECK: vfms.f32 s2, s4, s0
+0xa2 0xee 0x40 0x1a
+
+# CHECK: vfms.f32 d16, d18, d17
+0x62 0xef 0xb1 0x0c
+
+# CHECK: vfms.f32 q2, q4, q0
+0x28 0xef 0x50 0x4c
+
+# CHECK: vfnma.f64 d16, d18, d17
+0xd2 0xee 0xe1 0x0b
+
+# CHECK: vfnma.f32 s2, s4, s0
+0x92 0xee 0x40 0x1a
diff --git a/test/MC/Disassembler/MBlaze/dg.exp b/test/MC/Disassembler/MBlaze/dg.exp
deleted file mode 100644
index 0be99a34235d..000000000000
--- a/test/MC/Disassembler/MBlaze/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
-}
-
diff --git a/test/MC/Disassembler/MBlaze/lit.local.cfg b/test/MC/Disassembler/MBlaze/lit.local.cfg
new file mode 100644
index 000000000000..3955b4e167a5
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/MBlaze/mblaze_mbar.txt b/test/MC/Disassembler/MBlaze/mblaze_mbar.txt
new file mode 100644
index 000000000000..6beba86b15ba
--- /dev/null
+++ b/test/MC/Disassembler/MBlaze/mblaze_mbar.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Memory Barrier instructions
+################################################################################
+
+# CHECK:    mbar        0
+0xB8 0x02 0x00 0x04
+
+# CHECK:    mbar        1
+0xB8 0x22 0x00 0x04
+
+# CHECK:    mbar        2
+0xB8 0x42 0x00 0x04
diff --git a/test/MC/Disassembler/MBlaze/mblaze_pattern.txt b/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
index 1268378fa0f8..cb19ee0427bd 100644
--- a/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
+++ b/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
@@ -12,3 +12,6 @@
 
 # CHECK:    pcmpeq      r0, r1, r2
 0x88 0x01 0x14 0x00
+
+# CHECK:    clz         r0, r1
+0x90 0x01 0x00 0xE0
diff --git a/test/MC/Disassembler/X86/dg.exp b/test/MC/Disassembler/X86/dg.exp
deleted file mode 100644
index a4d0e7c718c8..000000000000
--- a/test/MC/Disassembler/X86/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
-}
-
diff --git a/test/MC/Disassembler/X86/intel-syntax.txt b/test/MC/Disassembler/X86/intel-syntax.txt
index 54b242d7b2ef..3391e45cc000 100644
--- a/test/MC/Disassembler/X86/intel-syntax.txt
+++ b/test/MC/Disassembler/X86/intel-syntax.txt
@@ -77,3 +77,31 @@
 
 # CHECK: test RAX, 0
 0x48 0xa9 0x00 0x00 0x00 0x00
+
+# CHECK: sysret
+0x48 0x0f 0x07
+
+# CHECK: sysret
+0x0f 0x07
+
+# CHECK: sysexit
+0x48 0x0f 0x35
+
+# CHECK: sysexit
+0x0f 0x35
+
+# CHECK: iret
+0x66 0xcf
+
+# CHECK: iretd
+0xcf
+
+# CHECK: iretq
+0x48 0xcf
+
+# CHECK: ret
+0x66 0xc3
+
+# CHECK: retf
+0x66 0xcb
+
diff --git a/test/MC/Disassembler/X86/invalid-cmp-imm.txt b/test/MC/Disassembler/X86/invalid-cmp-imm.txt
new file mode 100644
index 000000000000..bf8699b24f26
--- /dev/null
+++ b/test/MC/Disassembler/X86/invalid-cmp-imm.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 |& grep {invalid instruction encoding}
+
+# This instruction would decode as cmpordps if the immediate byte was less than 8.
+0x0f 0xc2 0xc7 0x08
+# This instruction would decode as cmpordpd if the immediate byte was less than 8.
+0x66 0x0f 0xc2 0xc7 0x08
+# This instruction would decode as cmpordss if the immediate byte was less than 8.
+0xf3 0x0f 0xc2 0xc7 0x08
+# This instruction would decode as cmpordsd if the immediate byte was less than 8.
+0xf2 0x0f 0xc2 0xc7 0x08
diff --git a/test/MC/Disassembler/X86/lit.local.cfg b/test/MC/Disassembler/X86/lit.local.cfg
new file mode 100644
index 000000000000..6211b3e53853
--- /dev/null
+++ b/test/MC/Disassembler/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.txt']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/X86/simple-tests.txt b/test/MC/Disassembler/X86/simple-tests.txt
index 2dc918cb0dca..c0e77d0698bd 100644
--- a/test/MC/Disassembler/X86/simple-tests.txt
+++ b/test/MC/Disassembler/X86/simple-tests.txt
@@ -6,6 +6,11 @@
 # CHECK: int	$33
 0xCD 0x21
 
+# CHECK: jrcxz -127
+0xe3 0x81
+
+# CHECK: jecxz -127
+0x67 0xe3 0x81
 
 # CHECK: addb	%al, (%rax)
 0 0
@@ -28,6 +33,9 @@
 # CHECK: vmcall
 0x0f 0x01 0xc1
 
+# CHECK: vmfunc
+0x0f 0x01 0xd4
+
 # CHECK: vmlaunch
 0x0f 0x01 0xc2
 
@@ -52,6 +60,30 @@
 # CHECK: vmptrst
 0x0f 0xc7 0x38
 
+# CHECK: vmrun
+0x0f 0x01 0xd8
+
+# CHECK: vmmcall
+0x0f 0x01 0xd9
+
+# CHECK: vmload
+0x0f 0x01 0xda
+
+# CHECK: vmsave
+0x0f 0x01 0xdb
+
+# CHECK: stgi
+0x0f 0x01 0xdc
+
+# CHECK: clgi
+0x0f 0x01 0xdd
+
+# CHECK: skinit
+0x0f 0x01 0xde
+
+# CHECK: invlpga
+0x0f 0x01 0xdf
+
 # CHECK: movl $0, -4(%rbp)
 0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
 
@@ -309,6 +341,9 @@
 # CHECK: invvpid (%rax), %rax
 0x66 0x0f 0x38 0x81 0x00
 
+# CHECK: invpcid (%rax), %rax
+0x66 0x0f 0x38 0x82 0x00
+
 # CHECK: nop
 0x90
 
@@ -518,3 +553,179 @@
 
 # CHECK: andnq (%rax), %r15, %rax
 0xc4 0xe2 0x80 0xf2 0x00
+
+# CHECK: blsrl (%rax), %r15d
+0xc4 0xe2 0x00 0xf3 0x08
+
+# CHECK: blsrq (%rax), %r15
+0xc4 0xe2 0x80 0xf3 0x08
+
+# CHECK: blsmskl (%rax), %r15d
+0xc4 0xe2 0x00 0xf3 0x10
+
+# CHECK: blsmskq (%rax), %r15
+0xc4 0xe2 0x80 0xf3 0x10
+
+# CHECK: blsil (%rax), %r15d
+0xc4 0xe2 0x00 0xf3 0x18
+
+# CHECK: blsiq (%rax), %r15
+0xc4 0xe2 0x80 0xf3 0x18
+
+# CHECK: bextrl %r12d, (%rax), %r10d
+0xc4 0x62 0x18 0xf7 0x10
+
+# CHECK: bextrl %r12d, %r11d, %r10d
+0xc4 0x42 0x18 0xf7 0xd3
+
+# CHECK: bextrq %r12, (%rax), %r10
+0xc4 0x62 0x98 0xf7 0x10
+
+# CHECK: bextrq %r12, %r11, %r10
+0xc4 0x42 0x98 0xf7 0xd3
+
+# CHECK: bzhil %r12d, (%rax), %r10d
+0xc4 0x62 0x18 0xf5 0x10
+
+# CHECK: bzhil %r12d, %r11d, %r10d
+0xc4 0x42 0x18 0xf5 0xd3
+
+# CHECK: bzhiq %r12, (%rax), %r10
+0xc4 0x62 0x98 0xf5 0x10
+
+# CHECK: bzhiq %r12, %r11, %r10
+0xc4 0x42 0x98 0xf5 0xd3
+
+# CHECK: pextl %r12d, %r11d, %r10d
+0xc4 0x42 0x22 0xf5 0xd4
+
+# CHECK: pextl (%rax), %r11d, %r10d
+0xc4 0x62 0x22 0xf5 0x10
+
+# CHECK: pextq %r12, %r11, %r10
+0xc4 0x42 0xa2 0xf5 0xd4
+
+# CHECK: pextq (%rax), %r11, %r10
+0xc4 0x62 0xa2 0xf5 0x10
+
+# CHECK: pdepl %r12d, %r11d, %r10d
+0xc4 0x42 0x23 0xf5 0xd4
+
+# CHECK: pdepl (%rax), %r11d, %r10d
+0xc4 0x62 0x23 0xf5 0x10
+
+# CHECK: pdepq %r12, %r11, %r10
+0xc4 0x42 0xa3 0xf5 0xd4
+
+# CHECK: pdepq (%rax), %r11, %r10
+0xc4 0x62 0xa3 0xf5 0x10
+
+# CHECK: mulxl %r12d, %r11d, %r10d
+0xc4 0x42 0x23 0xf6 0xd4
+
+# CHECK: mulxl (%rax), %r11d, %r10d
+0xc4 0x62 0x23 0xf6 0x10
+
+# CHECK: mulxq %r12, %r11, %r10
+0xc4 0x42 0xa3 0xf6 0xd4
+
+# CHECK: mulxq (%rax), %r11, %r10
+0xc4 0x62 0xa3 0xf6 0x10
+
+# CHECK: rorxl $1, %r12d, %r10d
+0xc4 0x43 0x7b 0xf0 0xd4 0x01
+
+# CHECK: rorxl $31, (%rax), %r10d
+0xc4 0x63 0x7b 0xf0 0x10 0x1f
+
+# CHECK: rorxq $1, %r12, %r10
+0xc4 0x43 0xfb 0xf0 0xd4 0x01
+
+# CHECK: rorxq $63, (%rax), %r10
+0xc4 0x63 0xfb 0xf0 0x10 0x3f
+
+# CHECK: shlxl %r12d, (%rax), %r10d
+0xc4 0x62 0x19 0xf7 0x10
+
+# CHECK: shlxl %r12d, %r11d, %r10d
+0xc4 0x42 0x19 0xf7 0xd3
+
+# CHECK: shlxq %r12, (%rax), %r10
+0xc4 0x62 0x99 0xf7 0x10
+
+# CHECK: shlxq %r12, %r11, %r10
+0xc4 0x42 0x99 0xf7 0xd3
+
+# CHECK: sarxl %r12d, (%rax), %r10d
+0xc4 0x62 0x1a 0xf7 0x10
+
+# CHECK: sarxl %r12d, %r11d, %r10d
+0xc4 0x42 0x1a 0xf7 0xd3
+
+# CHECK: sarxq %r12, (%rax), %r10
+0xc4 0x62 0x9a 0xf7 0x10
+
+# CHECK: sarxq %r12, %r11, %r10
+0xc4 0x42 0x9a 0xf7 0xd3
+
+# CHECK: shrxl %r12d, (%rax), %r10d
+0xc4 0x62 0x1b 0xf7 0x10
+
+# CHECK: shrxl %r12d, %r11d, %r10d
+0xc4 0x42 0x1b 0xf7 0xd3
+
+# CHECK: shrxq %r12, (%rax), %r10
+0xc4 0x62 0x9b 0xf7 0x10
+
+# CHECK: shrxq %r12, %r11, %r10
+0xc4 0x42 0x9b 0xf7 0xd3
+
+# CHECK: vfmadd132ps %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x19 0x98 0xd3
+
+# CHECK: vfmadd132pd %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x99 0x98 0xd3
+
+# CHECK: vfmadd132ps %ymm11, %ymm12, %ymm10
+0xc4 0x42 0x1d 0x98 0xd3
+
+# CHECK: vfmadd132pd %ymm11, %ymm12, %ymm10
+0xc4 0x42 0x9d 0x98 0xd3
+
+# CHECK: vfmadd132ps (%rax), %xmm12, %xmm10
+0xc4 0x62 0x19 0x98 0x10
+
+# CHECK: vfmadd132pd (%rax), %xmm12, %xmm10
+0xc4 0x62 0x99 0x98 0x10
+
+# CHECK: vfmadd132ps (%rax), %ymm12, %ymm10
+0xc4 0x62 0x1d 0x98 0x10
+
+# CHECK: vfmadd132pd (%rax), %ymm12, %ymm10
+0xc4 0x62 0x9d 0x98 0x10
+
+# CHECK: vfmadd132ss %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x19 0x99 0xd3
+
+# CHECK: vfmadd132sd %xmm11, %xmm12, %xmm10
+0xc4 0x42 0x99 0x99 0xd3
+
+# CHECK: vfmadd132ss (%rax), %xmm12, %xmm10
+0xc4 0x62 0x19 0x99 0x10
+
+# CHECK: vfmadd132sd (%rax), %xmm12, %xmm10
+0xc4 0x62 0x99 0x99 0x10
+
+# CHECK: vfmaddss (%rcx), %xmm1, %xmm0, %xmm0
+0xc4 0xe3 0xf9 0x6a 0x01 0x10
+
+# CHECK: vfmaddss %xmm1, (%rcx), %xmm0, %xmm0
+0xc4 0xe3 0x79 0x6a 0x01 0x10
+
+# CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
+0xc4 0xe3 0xe1 0x48 0x40 0x04 0x21
+
+# rdar://8812056 lldb doesn't print the x86 lock prefix when disassembling
+# CHECK: lock
+# CHECK-NEXT: xaddq	%rcx, %rbx
+0xf0 0x48 0x0f 0xc1 0xcb
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
index c4437ba35d74..739fa6a843f9 100644
--- a/test/MC/Disassembler/X86/x86-32.txt
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -11,6 +11,12 @@
 # CHECK: calll
 0xff 0xd0
 
+# CHECK: jecxz -127
+0xe3 0x81
+
+# CHECK: jcxz -127
+0x67 0xe3 0x81
+
 # CHECK: incl
 0x40
 
@@ -63,6 +69,9 @@
 # CHECK: vmcall
 0x0f 0x01 0xc1
 
+# CHECK: vmfunc
+0x0f 0x01 0xd4
+
 # CHECK: vmlaunch
 0x0f 0x01 0xc2
 
@@ -87,6 +96,30 @@
 # CHECK: vmptrst
 0x0f 0xc7 0x38
 
+# CHECK: vmrun
+0x0f 0x01 0xd8
+
+# CHECK: vmmcall
+0x0f 0x01 0xd9
+
+# CHECK: vmload
+0x0f 0x01 0xda
+
+# CHECK: vmsave
+0x0f 0x01 0xdb
+
+# CHECK: stgi
+0x0f 0x01 0xdc
+
+# CHECK: clgi
+0x0f 0x01 0xdd
+
+# CHECK: skinit
+0x0f 0x01 0xde
+
+# CHECK: invlpga
+0x0f 0x01 0xdf
+
 # CHECK: movl $0, -4(%ebp)
 0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
 
@@ -328,6 +361,9 @@
 # CHECK: invvpid (%eax), %eax
 0x66 0x0f 0x38 0x81 0x00
 
+# CHECK: invpcid (%eax), %eax
+0x66 0x0f 0x38 0x82 0x00
+
 # CHECK: nop
 0x90
 
@@ -385,6 +421,18 @@
 # CHECK: movl %eax, 0
 0xa3 0x00 0x00 0x00 0x00
 
+# CHECK: cmpordpd %xmm7, %xmm0
+0x66 0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordps %xmm7, %xmm0
+0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordsd %xmm7, %xmm0
+0xf2 0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordss %xmm7, %xmm0
+0xf3 0x0f 0xc2 0xc7 0x07
+
 # CHECK: vaddps	%xmm3, %xmm7, %xmm0
 0xc4 0xe1 0x00 0x58 0xc3
 
@@ -495,3 +543,72 @@
 
 # CHECK: andnl (%eax), %edi, %eax
 0xc4 0xe2 0x80 0xf2 0x00
+
+# CHECK: blsrl (%eax), %edi
+0xc4 0xe2 0x40 0xf3 0x08
+
+# CHECK: blsmskl (%eax), %edi
+0xc4 0xe2 0x40 0xf3 0x10
+
+# CHECK: blsil (%eax), %edi
+0xc4 0xe2 0x40 0xf3 0x18
+
+# CHECK: bextrl %esi, (%eax), %edx
+0xc4 0xe2 0x08 0xf7 0x10
+
+# CHECK: bextrl %esi, %ebx, %edx
+0xc4 0xe2 0x08 0xf7 0xd3
+
+# CHECK: bzhil %esi, (%eax), %edx
+0xc4 0xe2 0x08 0xf5 0x10
+
+# CHECK: bzhil %esi, %ebx, %edx
+0xc4 0xe2 0x08 0xf5 0xd3
+
+# CHECK: pextl %esp, %ecx, %edx
+0xc4 0xe2 0x72 0xf5 0xd4
+
+# CHECK: pextl (%eax), %ecx, %edx
+0xc4 0xe2 0x72 0xf5 0x10
+
+# CHECK: pdepl %esp, %ecx, %edx
+0xc4 0xe2 0x73 0xf5 0xd4
+
+# CHECK: pdepl (%eax), %ecx, %edx
+0xc4 0xe2 0x73 0xf5 0x10
+
+# CHECK: mulxl %esp, %ecx, %edx
+0xc4 0xe2 0x73 0xf6 0xd4
+
+# CHECK: mulxl (%eax), %ecx, %edx
+0xc4 0xe2 0x73 0xf6 0x10
+
+# CHECK: mulxl %esp, %ecx, %edx
+0xc4 0xe2 0xf3 0xf6 0xd4
+
+# CHECK: mulxl (%eax), %ecx, %edx
+0xc4 0xe2 0xf3 0xf6 0x10
+
+# CHECK: rorxl $1, %esp, %edx
+0xc4 0xe3 0x7b 0xf0 0xd4 0x01
+
+# CHECK: rorxl $31, (%eax), %edx
+0xc4 0xe3 0x7b 0xf0 0x10 0x1f
+
+# CHECK: shlxl %esi, (%eax), %edx
+0xc4 0xe2 0x09 0xf7 0x10
+
+# CHECK: shlxl %esi, %ebx, %edx
+0xc4 0xe2 0x09 0xf7 0xd3
+
+# CHECK: sarxl %esi, (%eax), %edx
+0xc4 0xe2 0x0a 0xf7 0x10
+
+# CHECK: sarxl %esi, %ebx, %edx
+0xc4 0xe2 0x0a 0xf7 0xd3
+
+# CHECK: shrxl %esi, (%eax), %edx
+0xc4 0xe2 0x0b 0xf7 0x10
+
+# CHECK: shrxl %esi, %ebx, %edx
+0xc4 0xe2 0x0b 0xf7 0xd3
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
new file mode 100644
index 000000000000..f4b8f46fa2e5
--- /dev/null
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -0,0 +1,63 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s
+
+# Coverage
+
+# CHECK: vcmptrue_usps 
+0xc5 0x04 0xc2 0xc7 0x1f
+
+# CHECK: vcmptrue_uspd 
+0xc5 0x05 0xc2 0xc7 0x1f
+
+# CHECK: vcmptrue_usss 
+0xc5 0x06 0xc2 0xc7 0x1f
+
+# CHECK: vcmptrue_ussd 
+0xc5 0x07 0xc2 0xc7 0x1f
+
+# CHECK: vcmpeq_uqps 
+0xc5 0x04 0xc2 0xc7 0x08
+
+# CHECK: vcmpeq_uqpd 
+0xc5 0x05 0xc2 0xc7 0x08
+
+# CHECK: vcmpeq_uqss 
+0xc5 0x06 0xc2 0xc7 0x08
+
+# CHECK: vcmpeq_uqsd 
+0xc5 0x07 0xc2 0xc7 0x08
+
+# CHECK: vcmpeqps 
+0xc5 0x04 0xc2 0xc7 0x00
+
+# CHECK: vcmpeqpd 
+0xc5 0x05 0xc2 0xc7 0x00
+
+# CHECK: vcmpeqss 
+0xc5 0x06 0xc2 0xc7 0x00
+
+# CHECK: vcmpeqsd 
+0xc5 0x07 0xc2 0xc7 0x00
+
+# CHECK: cmpeqps 
+0x0f 0xc2 0xc7 0x00
+
+# CHECK: cmpeqpd 
+0x66 0x0f 0xc2 0xc7 0x00
+
+# CHECK: cmpeqss 
+0xf3 0x0f 0xc2 0xc7 0x00
+
+# CHECK: cmpeqsd 
+0xf2 0x0f 0xc2 0xc7 0x00
+
+# CHECK: cmpordps 
+0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordpd 
+0x66 0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordss 
+0xf3 0x0f 0xc2 0xc7 0x07
+
+# CHECK: cmpordsd 
+0xf2 0x0f 0xc2 0xc7 0x07
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
new file mode 100644
index 000000000000..3a5af00901cf
--- /dev/null
+++ b/test/MC/ELF/cfi-escape.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_escape 0x15, 7, 0x7f # DW_CFA_val_offset_sf, %esp, 8/-8
+        nop
+	.cfi_endproc
+
+// CHECK:       # Section 4
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00411507 7f000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 5
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
new file mode 100644
index 000000000000..0fc3129c713c
--- /dev/null
+++ b/test/MC/ELF/cfi-restore.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_restore %rbp
+        nop
+	.cfi_endproc
+
+// CHECK:       # Section 4
+// CHECK-NEXT:  (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000048)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000030)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:   ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 0041c600 00000000')
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 5
+// CHECK-NEXT:  (('sh_name', 0x0000000c) # '.rela.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x00000004)
+// CHECK-NEXT:   ('sh_flags', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000000000000390)
+// CHECK-NEXT:   ('sh_size', 0x0000000000000018)
+// CHECK-NEXT:   ('sh_link', 0x00000007)
+// CHECK-NEXT:   ('sh_info', 0x00000004)
+// CHECK-NEXT:   ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:   ('sh_entsize', 0x0000000000000018)
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x0000000000000020)
+// CHECK-NEXT:     ('r_sym', 0x00000002)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x0000000000000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s
new file mode 100644
index 000000000000..cf6d16073abe
--- /dev/null
+++ b/test/MC/ELF/cfi-signal-frame.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+        .cfi_startproc
+        .cfi_signal_frame
+        .cfi_endproc
+
+g:
+        .cfi_startproc
+        .cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000011) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x0000000000000002)
+// CHECK-NEXT:  ('sh_addr', 0x0000000000000000)
+// CHECK-NEXT:  ('sh_offset', 0x0000000000000040)
+// CHECK-NEXT:  ('sh_size', 0x0000000000000058)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x0000000000000008)
+// CHECK-NEXT:  ('sh_entsize', 0x0000000000000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5253 00017810 011b0c07 08900100 10000000 1c000000 00000000 00000000 00000000 14000000 00000000 017a5200 01781001 1b0c0708 90010000 10000000 1c000000 00000000 00000000 00000000')
+// CHECK-NEXT: ),
diff --git a/test/MC/ELF/dg.exp b/test/MC/ELF/dg.exp
deleted file mode 100644
index d46d700975e5..000000000000
--- a/test/MC/ELF/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,s}]]
-}
diff --git a/test/MC/ELF/gen-dwarf.s b/test/MC/ELF/gen-dwarf.s
new file mode 100644
index 000000000000..b090e0802b10
--- /dev/null
+++ b/test/MC/ELF/gen-dwarf.s
@@ -0,0 +1,70 @@
+// RUN: llvm-mc -g -triple  i686-pc-linux-gnu %s -filetype=obj -o - | elf-dump | FileCheck %s
+
+
+// Test that on ELF the debug info has a relocation to debug_abbrev and one to
+// to debug_line.
+
+
+    .text
+    .globl foo
+    .type foo, @function
+    .align 4
+foo:
+    ret
+    .size foo, .-foo
+
+// Section 4 is .debug_line
+// CHECK:       # Section 4
+// CHECK-NEXT:  # '.debug_line'
+
+
+
+// The two relocations, one to symbol 6 and one to 4
+// CHECK:         # '.rel.debug_info'
+// CHECK-NEXT:   ('sh_type',
+// CHECK-NEXT:   ('sh_flags'
+// CHECK-NEXT:   ('sh_addr',
+// CHECK-NEXT:   ('sh_offset',
+// CHECK-NEXT:   ('sh_size',
+// CHECK-NEXT:   ('sh_link',
+// CHECK-NEXT:   ('sh_info',
+// CHECK-NEXT:   ('sh_addralign',
+// CHECK-NEXT:   ('sh_entsize',
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('r_offset', 0x00000006)
+// CHECK-NEXT:     ('r_sym', 0x000006)
+// CHECK-NEXT:     ('r_type', 0x01)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:    # Relocation 1
+// CHECK-NEXT:    (('r_offset', 0x0000000c)
+// CHECK-NEXT:     ('r_sym', 0x000004)
+// CHECK-NEXT:     ('r_type', 0x01)
+// CHECK-NEXT:    ),
+
+
+// Section 8 is .debug_abbrev
+// CHECK:       # Section 8
+// CHECK-NEXT:  (('sh_name', 0x00000001) # '.debug_abbrev'
+
+// Symbol 4 is section 4 (.debug_line)
+// CHECK:         # Symbol 4
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_value', 0x00000000)
+// CHECK-NEXT:     ('st_size', 0x00000000)
+// CHECK-NEXT:     ('st_bind', 0x0)
+// CHECK-NEXT:     ('st_type', 0x3)
+// CHECK-NEXT:     ('st_other', 0x00)
+// CHECK-NEXT:     ('st_shndx', 0x0004)
+// CHECK-NEXT:    ),
+
+// Symbol 6 is section 8 (.debug_abbrev)
+// CHECK:         # Symbol 6
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_value', 0x00000000)
+// CHECK-NEXT:     ('st_size', 0x00000000)
+// CHECK-NEXT:     ('st_bind', 0x0)
+// CHECK-NEXT:     ('st_type', 0x3)
+// CHECK-NEXT:     ('st_other', 0x00)
+// CHECK-NEXT:     ('st_shndx', 0x0008)
+// CHECK-NEXT:    ),
diff --git a/test/MC/ELF/global-offset.s b/test/MC/ELF/global-offset.s
index 8cc5dbb8d822..81ae5d785df9 100644
--- a/test/MC/ELF/global-offset.s
+++ b/test/MC/ELF/global-offset.s
@@ -6,6 +6,10 @@
         addl    $_GLOBAL_OFFSET_TABLE_, %ebx
         leal    _GLOBAL_OFFSET_TABLE_(%ebx), %ebx
 
+// But not in this case
+foo:
+        addl    _GLOBAL_OFFSET_TABLE_-foo,%ebx
+
 // CHECK:      ('sh_name', 0x00000005) # '.text'
 // CHECK-NEXT: ('sh_type',
 // CHECK-NEXT: ('sh_flags',
@@ -16,4 +20,4 @@
 // CHECK-NEXT: ('sh_info',
 // CHECK-NEXT: ('sh_addralign',
 // CHECK-NEXT: ('sh_entsize',
-// CHECK-NEXT: ('_section_data', '81c30200 00008d9b 02000000')
+// CHECK-NEXT: ('_section_data', '81c30200 00008d9b 02000000 031d0200 0000')
diff --git a/test/MC/ELF/lit.local.cfg b/test/MC/ELF/lit.local.cfg
new file mode 100644
index 000000000000..56bf00859572
--- /dev/null
+++ b/test/MC/ELF/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/ELF/many-section.s b/test/MC/ELF/many-section.s
index e7e723ad9059..b729e668168e 100644
--- a/test/MC/ELF/many-section.s
+++ b/test/MC/ELF/many-section.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t
-// RUN: llvm-nm %t | FileCheck %s
+// RUN: llvm-nm -a %t | FileCheck %s
 
 // CHECK: s000a
 // CHECK-NOT: U
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index 442176307fa4..85da2eb8c949 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -160,6 +160,18 @@
 // CHECK-NEXT:  ('r_sym', 0x00000d)
 // CHECK-NEXT:  ('r_type', 0x21)
 // CHECK-NEXT: ),
+// Relocation 25 (_GLOBAL_OFFSET_TABLE_-bar2) is of type R_386_GOTPC.
+// CHECK-NEXT: Relocation 25
+// CHECK-NEXT: (('r_offset', 0x00000094)
+// CHECK-NEXT:  ('r_sym', 0x00000b)
+// CHECK-NEXT:  ('r_type', 0x0a)
+// CHECK-NEXT: ),
+// Relocation 26 (und_symbol-bar2) is of type R_386_PC32
+// CHECK-NEXT: Relocation 26
+// CHECK-NEXT: (('r_offset', 0x0000009a)
+// CHECK-NEXT:  ('r_sym', 0x00000e)
+// CHECK-NEXT:  ('r_type', 0x02)
+// CHECK-NEXT: ),
 
 // Section 4 is bss
 // CHECK:      # Section 4
@@ -225,6 +237,8 @@ bar2:
         movl zed@DTPOFF(%eax), %eax
         pushl $bar
         addl foo@GOTTPOFF(%edx), %eax
+        subl    _GLOBAL_OFFSET_TABLE_-bar2, %ebx
+        leal und_symbol-bar2(%edx),%ecx
 
         .section        zedsec,"awT",@progbits
 zed:
diff --git a/test/MC/ELF/tls-i386.s b/test/MC/ELF/tls-i386.s
index 197418d93ce4..922d4c6e6c2c 100644
--- a/test/MC/ELF/tls-i386.s
+++ b/test/MC/ELF/tls-i386.s
@@ -9,6 +9,13 @@
         movl    foo5@TPOFF(%eax), %eax
         movl    foo6@DTPOFF(%eax), %eax
         movl    foo7@INDNTPOFF, %eax
+        .long   foo8@NTPOFF
+        .long   foo9@GOTNTPOFF
+        .long   fooA@TLSGD
+        .long   fooB@TLSLDM
+        .long   fooC@TPOFF
+        .long   fooD@DTPOFF
+        .long   fooE@INDNTPOFF
 
 // CHECK:       (('st_name', 0x00000001) # 'foo1'
 // CHECK-NEXT:   ('st_value', 0x00000000)
@@ -72,3 +79,67 @@
 // CHECK-NEXT:   ('st_other', 0x00)
 // CHECK-NEXT:   ('st_shndx', 0x0000)
 // CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 12
+// CHECK-NEXT:  (('st_name', 0x00000024) # 'foo8'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 13
+// CHECK-NEXT:  (('st_name', 0x00000029) # 'foo9'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 14
+// CHECK-NEXT:  (('st_name', 0x0000002e) # 'fooA'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 15
+// CHECK-NEXT:  (('st_name', 0x00000033) # 'fooB'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 16
+// CHECK-NEXT:  (('st_name', 0x00000038) # 'fooC'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 17
+// CHECK-NEXT:  (('st_name', 0x0000003d) # 'fooD'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 18
+// CHECK-NEXT:  (('st_name', 0x00000042) # 'fooE'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:  ),
+
diff --git a/test/MC/ELF/tls.s b/test/MC/ELF/tls.s
index d6d7de6ff2cf..fe2bb4e73307 100644
--- a/test/MC/ELF/tls.s
+++ b/test/MC/ELF/tls.s
@@ -5,12 +5,14 @@
 	leaq	foo1@TLSGD(%rip), %rdi
         leaq    foo2@GOTTPOFF(%rip), %rdi
         leaq    foo3@TLSLD(%rip), %rdi
-
+	.long foo4@GOTTPOFF
+	.long foo5@TLSLD
+	.long foo6@TLSGD
 	.section	.zed,"awT",@progbits
 foobar:
 	.long	43
 
-// CHECK:      (('st_name', 0x00000010) # 'foobar'
+// CHECK:      (('st_name', 0x0000001f) # 'foobar'
 // CHECK-NEXT:  ('st_bind', 0x0)
 // CHECK-NEXT:  ('st_type', 0x6)
 // CHECK-NEXT:  ('st_other', 0x00)
@@ -46,3 +48,30 @@ foobar:
 // CHECK-NEXT:   ('st_value', 0x0000000000000000)
 // CHECK-NEXT:   ('st_size', 0x0000000000000000)
 // CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 10
+// CHECK-NEXT:  (('st_name', 0x00000010) # 'foo4'
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 11
+// CHECK-NEXT:  (('st_name', 0x00000015) # 'foo5'
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 12
+// CHECK-NEXT:  (('st_name', 0x0000001a) # 'foo6'
+// CHECK-NEXT:   ('st_bind', 0x1)
+// CHECK-NEXT:   ('st_type', 0x6)
+// CHECK-NEXT:   ('st_other', 0x00)
+// CHECK-NEXT:   ('st_shndx', 0x0000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
diff --git a/test/MC/ELF/type.s b/test/MC/ELF/type.s
index 2b25a6b69f69..ec53e4ffa524 100644
--- a/test/MC/ELF/type.s
+++ b/test/MC/ELF/type.s
@@ -12,6 +12,10 @@ bar:
 // Test that gnu_unique_object is accepted.
         .type zed,@gnu_unique_object
 
+ifunc:
+        .global ifunc
+        .type ifunc,@gnu_indirect_function
+
 // CHECK:      # Symbol 4
 // CHECK-NEXT: (('st_name', 0x00000005) # 'bar'
 // CHECK-NEXT:  ('st_bind', 0x1)
@@ -30,3 +34,13 @@ bar:
 // CHECK-NEXT:  ('st_value', 0x0000000000000000)
 // CHECK-NEXT:  ('st_size', 0x0000000000000000)
 // CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 6
+// CHECK-NEXT: (('st_name', 0x00000009) # 'ifunc'
+// CHECK-NEXT:  ('st_bind', 0x1)
+// CHECK-NEXT:  ('st_type', 0xa)
+// CHECK-NEXT:  ('st_other', 0x00)
+// CHECK-NEXT:  ('st_shndx', 0x0001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
diff --git a/test/MC/MBlaze/dg.exp b/test/MC/MBlaze/dg.exp
deleted file mode 100644
index 0c4e78e88dc4..000000000000
--- a/test/MC/MBlaze/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target MBlaze] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/MBlaze/lit.local.cfg b/test/MC/MBlaze/lit.local.cfg
new file mode 100644
index 000000000000..b0e1d850ff96
--- /dev/null
+++ b/test/MC/MBlaze/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'MBlaze' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/ARM/darwin-ARM-reloc.s b/test/MC/MachO/ARM/darwin-ARM-reloc.s
new file mode 100644
index 000000000000..b98c80c46e8e
--- /dev/null
+++ b/test/MC/MachO/ARM/darwin-ARM-reloc.s
@@ -0,0 +1,173 @@
+@ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+	.syntax unified
+        .text
+_f0:
+        bl _printf
+
+_f1:
+        bl _f0
+
+        .data
+_d0:
+Ld0_0:
+        .long Lsc0_0 - Ld0_0
+
+	.section	__TEXT,__cstring,cstring_literals
+Lsc0_0:
+        .long 0
+
+        .subsections_via_symbols
+
+@ CHECK: ('cputype', 12)
+@ CHECK: ('cpusubtype', 9)
+@ CHECK: ('filetype', 1)
+@ CHECK: ('num_load_commands', 3)
+@ CHECK: ('load_commands_size', 364)
+@ CHECK: ('flag', 8192)
+@ CHECK: ('load_commands', [
+@ CHECK:   # Load Command 0
+@ CHECK:  (('command', 1)
+@ CHECK:   ('size', 260)
+@ CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:   ('vm_addr', 0)
+@ CHECK:   ('vm_size', 16)
+@ CHECK:   ('file_offset', 392)
+@ CHECK:   ('file_size', 16)
+@ CHECK:   ('maxprot', 7)
+@ CHECK:   ('initprot', 7)
+@ CHECK:   ('num_sections', 3)
+@ CHECK:   ('flags', 0)
+@ CHECK:   ('sections', [
+@ CHECK:     # Section 0
+@ CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 0)
+@ CHECK:     ('size', 8)
+@ CHECK:     ('offset', 392)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 408)
+@ CHECK:     ('num_reloc', 2)
+@ CHECK:     ('flags', 0x80000400)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:     # Relocation 0
+@ CHECK:     (('word-0', 0x4),
+@ CHECK:      ('word-1', 0x55000001)),
+@ CHECK:     # Relocation 1
+@ CHECK:     (('word-0', 0x0),
+@ CHECK:      ('word-1', 0x5d000003)),
+@ CHECK:   ])
+@ CHECK:   ('_section_data', 'feffffeb fdffffeb')
+@ CHECK:     # Section 1
+@ CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 8)
+@ CHECK:     ('size', 4)
+@ CHECK:     ('offset', 400)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 424)
+@ CHECK:     ('num_reloc', 2)
+@ CHECK:     ('flags', 0x0)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:     # Relocation 0
+@ CHECK:     (('word-0', 0xa2000000),
+@ CHECK:      ('word-1', 0xc)),
+@ CHECK:     # Relocation 1
+@ CHECK:     (('word-0', 0xa1000000),
+@ CHECK:      ('word-1', 0x8)),
+@ CHECK:   ])
+@ CHECK:   ('_section_data', '04000000')
+@ CHECK:     # Section 2
+@ CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 12)
+@ CHECK:     ('size', 4)
+@ CHECK:     ('offset', 404)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 0)
+@ CHECK:     ('num_reloc', 0)
+@ CHECK:     ('flags', 0x2)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:   ])
+@ CHECK:   ('_section_data', '00000000')
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 1
+@ CHECK:  (('command', 2)
+@ CHECK:   ('size', 24)
+@ CHECK:   ('symoff', 440)
+@ CHECK:   ('nsyms', 4)
+@ CHECK:   ('stroff', 488)
+@ CHECK:   ('strsize', 24)
+@ CHECK:   ('_string_data', '\x00_printf\x00_f0\x00_f1\x00_d0\x00\x00\x00\x00')
+@ CHECK:   ('_symbols', [
+@ CHECK:     # Symbol 0
+@ CHECK:    (('n_strx', 9)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 1)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_f0')
+@ CHECK:    ),
+@ CHECK:     # Symbol 1
+@ CHECK:    (('n_strx', 13)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 1)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 4)
+@ CHECK:     ('_string', '_f1')
+@ CHECK:    ),
+@ CHECK:     # Symbol 2
+@ CHECK:    (('n_strx', 17)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 2)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 8)
+@ CHECK:     ('_string', '_d0')
+@ CHECK:    ),
+@ CHECK:     # Symbol 3
+@ CHECK:    (('n_strx', 1)
+@ CHECK:     ('n_type', 0x1)
+@ CHECK:     ('n_sect', 0)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_printf')
+@ CHECK:    ),
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 2
+@ CHECK:  (('command', 11)
+@ CHECK:   ('size', 80)
+@ CHECK:   ('ilocalsym', 0)
+@ CHECK:   ('nlocalsym', 3)
+@ CHECK:   ('iextdefsym', 3)
+@ CHECK:   ('nextdefsym', 0)
+@ CHECK:   ('iundefsym', 3)
+@ CHECK:   ('nundefsym', 1)
+@ CHECK:   ('tocoff', 0)
+@ CHECK:   ('ntoc', 0)
+@ CHECK:   ('modtaboff', 0)
+@ CHECK:   ('nmodtab', 0)
+@ CHECK:   ('extrefsymoff', 0)
+@ CHECK:   ('nextrefsyms', 0)
+@ CHECK:   ('indirectsymoff', 0)
+@ CHECK:   ('nindirectsyms', 0)
+@ CHECK:   ('extreloff', 0)
+@ CHECK:   ('nextrel', 0)
+@ CHECK:   ('locreloff', 0)
+@ CHECK:   ('nlocrel', 0)
+@ CHECK:   ('_indirect_symbols', [
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK: ])
diff --git a/test/MC/ARM/darwin-Thumb-reloc.s b/test/MC/MachO/ARM/darwin-Thumb-reloc.s
index 567573d9ef19..567573d9ef19 100644
--- a/test/MC/ARM/darwin-Thumb-reloc.s
+++ b/test/MC/MachO/ARM/darwin-Thumb-reloc.s
diff --git a/test/MC/MachO/ARM/empty-function-nop.ll b/test/MC/MachO/ARM/empty-function-nop.ll
new file mode 100644
index 000000000000..ef86ebc2a267
--- /dev/null
+++ b/test/MC/MachO/ARM/empty-function-nop.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -filetype=obj -mtriple=thumbv6-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-T1 %s
+; RUN: llc < %s -filetype=obj -mtriple=thumbv7-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-T2 %s
+; RUN: llc < %s -filetype=obj -mtriple=armv6-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-ARM %s
+; RUN: llc < %s -filetype=obj -mtriple=armv7-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-ARMV7 %s
+
+; Empty functions need a NOP in them for MachO to prevent DWARF FDEs from
+; getting all mucked up. See lib/CodeGen/AsmPrinter/AsmPrinter.cpp for
+; details.
+define internal fastcc void @empty_function() {
+  unreachable
+}
+; CHECK-T1:    ('_section_data', 'c046')
+; CHECK-T2:    ('_section_data', '00bf')
+; CHECK-ARM:   ('_section_data', '0000a0e1')
+; CHECK-ARMV7: ('_section_data', '00f020e3')
diff --git a/test/MC/MachO/ARM/lit.local.cfg b/test/MC/MachO/ARM/lit.local.cfg
new file mode 100644
index 000000000000..89764637feb0
--- /dev/null
+++ b/test/MC/MachO/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/ARM/no-subsections-reloc.s b/test/MC/MachO/ARM/no-subsections-reloc.s
new file mode 100644
index 000000000000..7701c59c6805
--- /dev/null
+++ b/test/MC/MachO/ARM/no-subsections-reloc.s
@@ -0,0 +1,18 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+@ When not using subsections-via-symbols, references to non-local symbols
+@ in the same section can be resolved at assembly time w/o relocations.
+
+ .syntax unified
+ .text
+ .thumb
+ .thumb_func _foo
+_foo:
+    ldr r3, bar
+bar:
+    .long 0
+
+@ CHECK: 'num_reloc', 0
+@ CHECK: '_section_data', 'dff80030 00000000'
diff --git a/test/MC/ARM/nop-armv4-padding.s b/test/MC/MachO/ARM/nop-armv4-padding.s
index 8f646dbb396a..8f646dbb396a 100644
--- a/test/MC/ARM/nop-armv4-padding.s
+++ b/test/MC/MachO/ARM/nop-armv4-padding.s
diff --git a/test/MC/MachO/ARM/nop-armv6t2-padding.s b/test/MC/MachO/ARM/nop-armv6t2-padding.s
new file mode 100644
index 000000000000..c38ad2d7c57c
--- /dev/null
+++ b/test/MC/MachO/ARM/nop-armv6t2-padding.s
@@ -0,0 +1,10 @@
+@ RUN: llvm-mc -triple armv6t2-apple-darwin %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck %s < %t.dump
+
+x:
+      add r0, r1, r2
+      .align 4
+      add r0, r1, r2
+
+@ CHECK: ('_section_data', '020081e0 00f020e3 00f020e3 00f020e3 020081e0')
diff --git a/test/MC/ARM/nop-thumb-padding.s b/test/MC/MachO/ARM/nop-thumb-padding.s
index 1e173f1a42d9..1e173f1a42d9 100644
--- a/test/MC/ARM/nop-thumb-padding.s
+++ b/test/MC/MachO/ARM/nop-thumb-padding.s
diff --git a/test/MC/ARM/nop-thumb2-padding.s b/test/MC/MachO/ARM/nop-thumb2-padding.s
index a8aa3a1168ef..a8aa3a1168ef 100644
--- a/test/MC/ARM/nop-thumb2-padding.s
+++ b/test/MC/MachO/ARM/nop-thumb2-padding.s
diff --git a/test/MC/MachO/ARM/relax-thumb-ldr-literal.s b/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
new file mode 100644
index 000000000000..8d26f6d2e2d7
--- /dev/null
+++ b/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
@@ -0,0 +1,13 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+	.syntax unified
+        .text
+	.thumb
+	.thumb_func _foo
+_foo:
+        ldr r2, (_foo - 4)
+
+@ CHECK: ('num_reloc', 0)
+@ CHECK: ('_section_data', '5ff80820')
diff --git a/test/MC/MachO/ARM/relax-thumb2-branches.s b/test/MC/MachO/ARM/relax-thumb2-branches.s
new file mode 100644
index 000000000000..7916d424078c
--- /dev/null
+++ b/test/MC/MachO/ARM/relax-thumb2-branches.s
@@ -0,0 +1,14 @@
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        ble Lfoo        @ wide encoding
+
+        .space 258
+Lfoo:
+        nop
+
+        ble Lbaz        @ narrow encoding
+        .space 256
+Lbaz:
+
+@ CHECK: '_section_data', '40f38180
+@ CHECK: 000000bf 7fdd
diff --git a/test/MC/MachO/ARM/static-movt-relocs.s b/test/MC/MachO/ARM/static-movt-relocs.s
new file mode 100644
index 000000000000..dce56832929d
--- /dev/null
+++ b/test/MC/MachO/ARM/static-movt-relocs.s
@@ -0,0 +1,23 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+        .thumb
+        .thumb_func foo
+foo:
+        movw r0, :lower16:(bar + 16)
+        movt r0, :upper16:(bar + 16)
+        bx r0
+
+
+@ CHECK:  ('_relocations', [
+@ CHECK:    # Relocation 0
+@ CHECK:    (('word-0', 0x4),
+@ CHECK:     ('word-1', 0x8e000001)),
+@ CHECK:    # Relocation 1
+@ CHECK:    (('word-0', 0x10),
+@ CHECK:     ('word-1', 0x16ffffff)),
+@ CHECK:    # Relocation 2
+@ CHECK:    (('word-0', 0x0),
+@ CHECK:     ('word-1', 0x8c000001)),
+@ CHECK:    # Relocation 3
+@ CHECK:    (('word-0', 0x0),
+@ CHECK:     ('word-1', 0x14ffffff)),
+@ CHECK:  ])
diff --git a/test/MC/MachO/ARM/thumb2-function-relative-load.s b/test/MC/MachO/ARM/thumb2-function-relative-load.s
new file mode 100644
index 000000000000..622007dc1657
--- /dev/null
+++ b/test/MC/MachO/ARM/thumb2-function-relative-load.s
@@ -0,0 +1,13 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+        .syntax unified
+        .text
+	.thumb
+        .thumb_func _foo
+_foo:
+	ldr lr, (_foo - 4)
+
+        .subsections_via_symbols
+
+@ CHECK: ('_section_data', '5ff808e0')
diff --git a/test/MC/ARM/thumb2-movt-fixup.s b/test/MC/MachO/ARM/thumb2-movt-fixup.s
index ddd95b54791e..ddd95b54791e 100644
--- a/test/MC/ARM/thumb2-movt-fixup.s
+++ b/test/MC/MachO/ARM/thumb2-movt-fixup.s
diff --git a/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s b/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
new file mode 100644
index 000000000000..49cfa418162c
--- /dev/null
+++ b/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// Test case for rdar://10743265
+
+// This tests that this expression does not cause a crash and produces two
+// relocation entries:
+// Relocation information (__TEXT,__text) 2 entries
+// address  pcrel length extern type    scattered symbolnum/value
+// 00000000 False long   True   SUB     False     _base
+// 00000000 False long   True   UNSIGND False     _start_ap_2
+
+_base = .
+
+.long (0x2000) + _start_ap_2 - _base 
+.word 0
+
+_start_ap_2:
+        cli
+
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0x5c000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0xc000001)),
+// CHECK:   ])
diff --git a/test/MC/MachO/dg.exp b/test/MC/MachO/dg.exp
deleted file mode 100644
index ca6aefe9c53d..000000000000
--- a/test/MC/MachO/dg.exp
+++ /dev/null
@@ -1,6 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
-}
-
diff --git a/test/MC/MachO/file.s b/test/MC/MachO/file.s
new file mode 100644
index 000000000000..0168747ae19a
--- /dev/null
+++ b/test/MC/MachO/file.s
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .file	1 "dir/foo"
+        nop
+
+// CHECK:         ('_section_data', '90')
+// CHECK-NEXT:      # Section 1
+// CHECK-NEXT:     (('section_name', '__debug_line\x00\x00\x00\x00')
+// CHECK-NEXT:      ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:      ('address', 1)
+// CHECK-NEXT:      ('size', 45)
+// CHECK-NEXT:      ('offset', 221)
+// CHECK-NEXT:      ('alignment', 0)
+// CHECK-NEXT:      ('reloc_offset', 0)
+// CHECK-NEXT:      ('num_reloc', 0)
+// CHECK-NEXT:      ('flags', 0x2000000)
+// CHECK-NEXT:      ('reserved1', 0)
+// CHECK-NEXT:      ('reserved2', 0)
+// CHECK-NEXT:     ),
+// CHECK-NEXT:    ('_relocations', [
+// CHECK-NEXT:    ])
+// CHECK-NEXT:    ('_section_data', '29000000 02001e00 00000101 fb0e0d00 01010101 00000001 00000164 69720000 666f6f00 01000000 02000001 01')
diff --git a/test/MC/MachO/gen-dwarf.s b/test/MC/MachO/gen-dwarf.s
new file mode 100644
index 000000000000..4fbc32d295b7
--- /dev/null
+++ b/test/MC/MachO/gen-dwarf.s
@@ -0,0 +1,122 @@
+// RUN: llvm-mc -g -triple i386-apple-darwin10 %s -filetype=obj -o %t
+// RUN: llvm-dwarfdump %t | FileCheck %s
+
+.globl _bar
+_bar:
+	movl	$0, %eax
+L1:	leave
+	ret
+_foo:
+_baz:
+	nop
+.data
+_x:	.long 1
+
+// CHECK: file format Mach-O 32-bit i386
+
+// CHECK: .debug_abbrev contents:
+// CHECK: Abbrev table for offset: 0x00000000
+// CHECK: [1] DW_TAG_compile_unit	DW_CHILDREN_yes
+// CHECK: 	DW_AT_stmt_list	DW_FORM_data4
+// CHECK: 	DW_AT_low_pc	DW_FORM_addr
+// CHECK: 	DW_AT_high_pc	DW_FORM_addr
+// CHECK: 	DW_AT_name	DW_FORM_string
+// CHECK: 	DW_AT_comp_dir	DW_FORM_string
+// CHECK: 	DW_AT_producer	DW_FORM_string
+// CHECK: 	DW_AT_language	DW_FORM_data2
+
+// CHECK: [2] DW_TAG_label	DW_CHILDREN_yes
+// CHECK: 	DW_AT_name	DW_FORM_string
+// CHECK: 	DW_AT_decl_file	DW_FORM_data4
+// CHECK: 	DW_AT_decl_line	DW_FORM_data4
+// CHECK: 	DW_AT_low_pc	DW_FORM_addr
+// CHECK: 	DW_AT_prototyped	DW_FORM_flag
+
+// CHECK: [3] DW_TAG_unspecified_parameters	DW_CHILDREN_no
+
+
+// CHECK: .debug_info contents:
+
+// We don't check the leading addresses these are at.
+// CHECK:  DW_TAG_compile_unit [1] *
+// CHECK:    DW_AT_stmt_list [DW_FORM_data4]	(0x00000000)
+// CHECK:    DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000000)
+// CHECK:    DW_AT_high_pc [DW_FORM_addr]	(0x0000000000000008)
+// We don't check the file name as it is a temp directory
+// CHECK:    DW_AT_name [DW_FORM_string]
+// We don't check the DW_AT_comp_dir which is the current working directory
+// CHECK:    DW_AT_producer [DW_FORM_string]	("llvm-mc (based on {{.*}})")
+// CHECK:    DW_AT_language [DW_FORM_data2]	(0x8001)
+
+// CHECK:    DW_TAG_label [2] *
+// CHECK:      DW_AT_name [DW_FORM_string]	("bar")
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x00000005)
+// CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000000)
+// CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
+
+// CHECK:      DW_TAG_unspecified_parameters [3]  
+
+// CHECK:      NULL
+
+// CHECK:    DW_TAG_label [2] *
+// CHECK:      DW_AT_name [DW_FORM_string]	("foo")
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x00000009)
+// CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000007)
+// CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
+
+// CHECK:      DW_TAG_unspecified_parameters [3]  
+
+// CHECK:      NULL
+
+// CHECK:    DW_TAG_label [2] *
+// CHECK:      DW_AT_name [DW_FORM_string]	("baz")
+// CHECK:      DW_AT_decl_file [DW_FORM_data4]	(0x00000001)
+// CHECK:      DW_AT_decl_line [DW_FORM_data4]	(0x0000000a)
+// CHECK:      DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000007)
+// CHECK:      DW_AT_prototyped [DW_FORM_flag]	(0x00)
+
+// CHECK:      DW_TAG_unspecified_parameters [3]  
+
+// CHECK:      NULL
+
+// CHECK:    NULL
+
+// CHECK: .debug_aranges contents:
+// CHECK: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
+
+// CHECK: .debug_lines contents:
+// CHECK: Line table prologue:
+// We don't check the total_length as it includes lengths of temp paths
+// CHECK:         version: 2
+// We don't check the prologue_length as it too includes lengths of temp paths
+// CHECK: min_inst_length: 1
+// CHECK: default_is_stmt: 1
+// CHECK:       line_base: -5
+// CHECK:      line_range: 14
+// CHECK:     opcode_base: 13
+// CHECK: standard_opcode_lengths[DW_LNS_copy] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_advance_pc] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_advance_line] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_set_file] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_set_column] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_negate_stmt] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_set_basic_block] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_const_add_pc] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 1
+// CHECK: standard_opcode_lengths[DW_LNS_set_prologue_end] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_set_epilogue_begin] = 0
+// CHECK: standard_opcode_lengths[DW_LNS_set_isa] = 1
+// We don't check include_directories as it has a temp path
+// CHECK:                 Dir  Mod Time   File Len   File Name
+// CHECK:                 ---- ---------- ---------- ---------------------------
+// CHECK: file_names[  1]    1 0x00000000 0x00000000 gen-dwarf.s
+
+// CHECK: Address            Line   Column File   ISA Flags
+// CHECK: ------------------ ------ ------ ------ --- -------------
+// CHECK: 0x0000000000000000      6      0      1   0  is_stmt
+// CHECK: 0x0000000000000005      7      0      1   0  is_stmt
+// CHECK: 0x0000000000000006      8      0      1   0  is_stmt
+// CHECK: 0x0000000000000007     11      0      1   0  is_stmt
+// CHECK: 0x0000000000000008     11      0      1   0  is_stmt end_sequence
diff --git a/test/MC/MachO/lit.local.cfg b/test/MC/MachO/lit.local.cfg
new file mode 100644
index 000000000000..6c49f08b7496
--- /dev/null
+++ b/test/MC/MachO/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/MachO/reloc-pcrel-offset.s b/test/MC/MachO/reloc-pcrel-offset.s
index e0f12bf4ba2f..e113e9616cc0 100644
--- a/test/MC/MachO/reloc-pcrel-offset.s
+++ b/test/MC/MachO/reloc-pcrel-offset.s
@@ -11,4 +11,7 @@
 
         .text
 _a:
+_b:
         call _a
+
+        .subsections_via_symbols
diff --git a/test/MC/MachO/reloc-pcrel.s b/test/MC/MachO/reloc-pcrel.s
index fff7cc0ada04..11334150368a 100644
--- a/test/MC/MachO/reloc-pcrel.s
+++ b/test/MC/MachO/reloc-pcrel.s
@@ -8,13 +8,13 @@
 // CHECK:  ('word-1', 0x6)),
 // CHECK: # Relocation 2
 // CHECK: (('word-0', 0x40),
-// CHECK:  ('word-1', 0xd000002)),
+// CHECK:  ('word-1', 0xd000003)),
 // CHECK: # Relocation 3
 // CHECK: (('word-0', 0x3b),
-// CHECK:  ('word-1', 0xd000002)),
+// CHECK:  ('word-1', 0xd000003)),
 // CHECK: # Relocation 4
 // CHECK: (('word-0', 0x36),
-// CHECK:  ('word-1', 0xd000002)),
+// CHECK:  ('word-1', 0xd000003)),
 // CHECK: # Relocation 5
 // CHECK: (('word-0', 0xe0000031),
 // CHECK:  ('word-1', 0x4)),
@@ -36,15 +36,16 @@
 // CHECK-NEXT: ])
 
         xorl %eax,%eax
-        
+
         .globl _a
 _a:
         xorl %eax,%eax
 _b:
+_d:
         xorl %eax,%eax
 L0:
         xorl %eax,%eax
-L1:     
+L1:
 
         call L0
         call L0 - 1
@@ -60,3 +61,5 @@ L1:
         call _c + 1
 //        call _a - L0
         call _b - L0
+
+        .subsections_via_symbols
diff --git a/test/MC/Mips/elf-bigendian.ll b/test/MC/Mips/elf-bigendian.ll
new file mode 100644
index 000000000000..875ba3ba965e
--- /dev/null
+++ b/test/MC/Mips/elf-bigendian.ll
@@ -0,0 +1,45 @@
+; RUN: llc -filetype=obj -mtriple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check that this is big endian.
+; CHECK: ('e_indent[EI_DATA]', 0x02)
+
+; Make sure that a section table (text) entry is correct.
+; CHECK:   (('sh_name', 0x{{[0]*}}5) # '.text'
+; CHECKNEXT:   ('sh_type', 0x{{[0]*}}1)
+; CHECKNEXT:   ('sh_flags', 0x{{[0]*}}6)
+; CHECKNEXT:   ('sh_addr', 0x{{{[0-9,a-f]+}})
+; CHECKNEXT:   ('sh_offset', 0x{{{[0-9,a-f]+}})
+; CHECKNEXT:   ('sh_size', 0x{{{[0-9,a-f]+}})
+; CHECKNEXT:   ('sh_link', 0x{{[0]+}})
+; CHECKNEXT:   ('sh_info', 0x{{[0]+}})
+; CHECKNEXT:   ('sh_addralign', 0x{{[0]*}}4)
+; CHECKNEXT:   ('sh_entsize', 0x{{[0]+}})
+
+; See that at least first 3 instructions are correct: GP prologue
+; CHECKNEXT:   ('_section_data', '3c1c0000 279c0000 0399e021 {{[0-9,a-f]*}}')
+
+; ModuleID = '../br1.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "mips-unknown-linux"
+
+@x = global i32 1, align 4
+@str = private unnamed_addr constant [4 x i8] c"goo\00"
+@str2 = private unnamed_addr constant [4 x i8] c"foo\00"
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @x, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %foo
+
+if.end:                                           ; preds = %entry
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @str, i32 0, i32 0))
+  br label %foo
+
+foo:                                              ; preds = %entry, %if.end
+  %puts2 = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @str2, i32 0, i32 0))
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
diff --git a/test/MC/Mips/elf-relsym.ll b/test/MC/Mips/elf-relsym.ll
new file mode 100644
index 000000000000..0f74437ec5d4
--- /dev/null
+++ b/test/MC/Mips/elf-relsym.ll
@@ -0,0 +1,29 @@
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check that the appropriate symbols were created.
+
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$.str'
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$.str1'
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$CPI0_0'
+; CHECK: (('st_name', 0x{{[0-9|a-f]+}}) # '$CPI0_1'
+
+@.str = private unnamed_addr constant [6 x i8] c"abcde\00", align 1
+@gc1 = external global i8*
+@.str1 = private unnamed_addr constant [5 x i8] c"fghi\00", align 1
+@gc2 = external global i8*
+@gd1 = external global double
+@gd2 = external global double
+
+define void @foo1() nounwind {
+entry:
+  store i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8** @gc1, align 4
+  store i8* getelementptr inbounds ([5 x i8]* @.str1, i32 0, i32 0), i8** @gc2, align 4
+  %0 = load double* @gd1, align 8
+  %add = fadd double %0, 2.500000e+00
+  store double %add, double* @gd1, align 8
+  %1 = load double* @gd2, align 8
+  %add1 = fadd double %1, 4.500000e+00
+  store double %add1, double* @gd2, align 8
+  ret void
+}
+
diff --git a/test/MC/Mips/elf-tls.ll b/test/MC/Mips/elf-tls.ll
new file mode 100644
index 000000000000..b4183b835779
--- /dev/null
+++ b/test/MC/Mips/elf-tls.ll
@@ -0,0 +1,36 @@
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck %s
+
+; Check that the appropriate relocations were created.
+
+; CHECK:     ('r_type', 0x2b)
+; CHECK:     ('r_type', 0x2c)
+; CHECK:     ('r_type', 0x2d)
+
+@t1 = thread_local global i32 0, align 4
+
+define i32 @f1() nounwind {
+entry:
+  %tmp = load i32* @t1, align 4
+  ret i32 %tmp
+
+}
+
+
+@t2 = external thread_local global i32
+
+define i32 @f2() nounwind {
+entry:
+  %tmp = load i32* @t2, align 4
+  ret i32 %tmp
+
+}
+
+@f3.i = internal thread_local unnamed_addr global i32 1, align 4
+
+define i32 @f3() nounwind {
+entry:
+  %0 = load i32* @f3.i, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @f3.i, align 4
+  ret i32 %inc
+}
diff --git a/test/MC/Mips/elf_basic.s b/test/MC/Mips/elf_basic.s
new file mode 100644
index 000000000000..7a79fa066be4
--- /dev/null
+++ b/test/MC/Mips/elf_basic.s
@@ -0,0 +1,32 @@
+// 32 bit big endian
+// RUN: llvm-mc -filetype=obj -triple mips-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-BE32 %s
+// 32 bit little endian
+// RUN: llvm-mc -filetype=obj -triple mipsel-unknown-linux %s -o - | elf-dump --dump-section-data  | FileCheck -check-prefix=CHECK-LE32 %s
+// 64 bit big endian
+// RUN: llvm-mc -filetype=obj -arch=mips64 -triple mips64-unknown-linux %s -o - | elf-dump --dump-section-data | FileCheck -check-prefix=CHECK-BE64 %s
+// 64 bit little endian
+// RUN: llvm-mc -filetype=obj -arch=mips64el -triple mips64el-unknown-linux %s -o - | elf-dump --dump-section-data | FileCheck -check-prefix=CHECK-LE64 %s
+
+// Check that we produce 32 bit with each endian.
+
+// This is 32 bit.
+// CHECK-BE32: ('e_indent[EI_CLASS]', 0x01)
+// This is big endian.
+// CHECK-BE32: ('e_indent[EI_DATA]', 0x02)
+
+// This is 32 bit.
+// CHECK-LE32: ('e_indent[EI_CLASS]', 0x01)
+// This is little endian.
+// CHECK-LE32: ('e_indent[EI_DATA]', 0x01)
+
+// Check that we produce 64 bit with each endian.
+
+// This is 64 bit.
+// CHECK-BE64: ('e_indent[EI_CLASS]', 0x02)
+// This is big endian.
+// CHECK-BE64: ('e_indent[EI_DATA]', 0x02)
+
+// This is 64 bit.
+// CHECK-LE64: ('e_indent[EI_CLASS]', 0x02)
+// This is little endian.
+// CHECK-LE64: ('e_indent[EI_DATA]', 0x01)
diff --git a/test/MC/Mips/lit.local.cfg b/test/MC/Mips/lit.local.cfg
new file mode 100644
index 000000000000..d2e3b28dbd86
--- /dev/null
+++ b/test/MC/Mips/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
+
diff --git a/test/MC/Mips/pr11877.s b/test/MC/Mips/pr11877.s
new file mode 100644
index 000000000000..d354ce4c554d
--- /dev/null
+++ b/test/MC/Mips/pr11877.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple mips-unknown-unknown %s
+
+i:
+        .long    g
+g = h
+h = i
diff --git a/test/MC/X86/2011-09-06-NoNewline.s b/test/MC/X86/2011-09-06-NoNewline.s
new file mode 100644
index 000000000000..bc681a38e718
--- /dev/null
+++ b/test/MC/X86/2011-09-06-NoNewline.s
@@ -0,0 +1,3 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s
+// PR10869
+movl %gs:8, %eax
+\ No newline at end of file
diff --git a/test/MC/X86/address-size.s b/test/MC/X86/address-size.s
new file mode 100644
index 000000000000..b105b40ec568
--- /dev/null
+++ b/test/MC/X86/address-size.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+	.code64
+	movb	$0x0, (%esi)
+// CHECK: encoding: [0x67,0xc6,0x06,0x00]
+	movb	$0x0, (%rsi)
+// CHECK: encoding: [0xc6,0x06,0x00]
+
+	.code32
+	movb	$0x0, (%si)
+// CHECK: encoding: [0x67,0xc6,0x06,0x00]
+	movb	$0x0, (%esi)
+// CHECK: encoding: [0xc6,0x06,0x00]
diff --git a/test/MC/X86/dg.exp b/test/MC/X86/dg.exp
deleted file mode 100644
index ec87b695b7ef..000000000000
--- a/test/MC/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
-}
diff --git a/test/MC/X86/intel-syntax-2.s b/test/MC/X86/intel-syntax-2.s
new file mode 100644
index 000000000000..ca4afc317398
--- /dev/null
+++ b/test/MC/X86/intel-syntax-2.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown  %s | FileCheck %s
+
+	.intel_syntax
+_test:
+// CHECK:	movl	$257, -4(%rsp)
+	mov	DWORD PTR [RSP - 4], 257
+
diff --git a/test/MC/X86/intel-syntax-encoding.s b/test/MC/X86/intel-syntax-encoding.s
new file mode 100644
index 000000000000..03b05511649a
--- /dev/null
+++ b/test/MC/X86/intel-syntax-encoding.s
@@ -0,0 +1,57 @@
+// RUN: llvm-mc -x86-asm-syntax=intel -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: encoding: [0x66,0x83,0xf0,0x0c]
+	xor	ax, 12
+// CHECK: encoding: [0x83,0xf0,0x0c]
+	xor	eax, 12
+// CHECK: encoding: [0x48,0x83,0xf0,0x0c]
+	xor	rax, 12
+
+// CHECK: encoding: [0x66,0x83,0xc8,0x0c]
+	or	ax, 12
+// CHECK: encoding: [0x83,0xc8,0x0c]
+	or	eax, 12
+// CHECK: encoding: [0x48,0x83,0xc8,0x0c]
+	or	rax, 12
+
+// CHECK: encoding: [0x66,0x83,0xf8,0x0c]
+	cmp	ax, 12
+// CHECK: encoding: [0x83,0xf8,0x0c]
+	cmp	eax, 12
+// CHECK: encoding: [0x48,0x83,0xf8,0x0c]
+	cmp	rax, 12
+
+// CHECK: encoding: [0x48,0x89,0x44,0x24,0xf0]	
+	mov	QWORD PTR [RSP - 16], RAX
+
+// CHECK: encoding: [0x66,0x83,0xc0,0xf4]
+	add	ax, -12
+// CHECK: encoding: [0x83,0xc0,0xf4]
+	add	eax, -12
+// CHECK: encoding: [0x48,0x83,0xc0,0xf4]
+	add	rax, -12
+
+LBB0_3:
+// CHECK: encoding: [0xeb,A]
+	jmp	LBB0_3
+// CHECK: encoding: [0xf2,0x0f,0x10,0x2c,0x25,0xf8,0xff,0xff,0xff]
+        movsd   XMM5, QWORD PTR [-8]
+
+// CHECK: encoding: [0xd1,0xe7]
+	shl	EDI, 1
+
+// CHECK: encoding: [0x0f,0xc2,0xd1,0x01]
+	cmpltps XMM2, XMM1
+
+// CHECK: encoding: [0xc3]
+    ret
+
+// CHECK: encoding: [0xcb]
+    retf
+
+// CHECK: encoding: [0xc2,0x08,0x00]
+    ret 8
+
+// CHECK: encoding: [0xca,0x08,0x00]
+    retf 8
+
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
new file mode 100644
index 000000000000..7cd56777b0e9
--- /dev/null
+++ b/test/MC/X86/intel-syntax.s
@@ -0,0 +1,66 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
+
+_test:
+	xor	EAX, EAX
+	ret
+
+_main:
+// CHECK:	movl	$257, -4(%rsp)
+	mov	DWORD PTR [RSP - 4], 257
+// CHECK:	movl	$258, 4(%rsp)
+	mov	DWORD PTR [RSP + 4], 258
+// CHECK:	movq	$123, -16(%rsp)
+	mov	QWORD PTR [RSP - 16], 123
+// CHECK:	movb	$97, -17(%rsp)
+	mov	BYTE PTR [RSP - 17], 97
+// CHECK:	movl	-4(%rsp), %eax
+	mov	EAX, DWORD PTR [RSP - 4]
+// CHECK:	movq    (%rsp), %rax
+	mov     RAX, QWORD PTR [RSP]
+// CHECK:	movl	$-4, -4(%rsp)
+	mov	DWORD PTR [RSP - 4], -4
+// CHECK:	movq	0, %rcx
+	mov	RCX, QWORD PTR [0]
+// CHECK:	movl	-24(%rsp,%rax,4), %eax	
+	mov	EAX, DWORD PTR [RSP + 4*RAX - 24]
+// CHECK:	movb	%dil, (%rdx,%rcx)
+	mov	BYTE PTR [RDX + RCX], DIL
+// CHECK:	movzwl	2(%rcx), %edi
+	movzx	EDI, WORD PTR [RCX + 2]
+// CHECK:	callq	_test
+	call	_test
+// CHECK:	andw	$12,	%ax
+	and	ax, 12
+// CHECK:	andw	$-12,	%ax
+	and	ax, -12
+// CHECK:	andw	$257,	%ax
+	and	ax, 257
+// CHECK:	andw	$-257,	%ax
+	and	ax, -257
+// CHECK:	andl	$12,	%eax
+	and	eax, 12
+// CHECK:	andl	$-12,	%eax
+	and	eax, -12
+// CHECK:	andl	$257,	%eax
+	and	eax, 257
+// CHECK:	andl	$-257,	%eax
+	and	eax, -257
+// CHECK:	andq	$12,	%rax
+	and	rax, 12
+// CHECK:	andq	$-12,	%rax
+	and	rax, -12
+// CHECK:	andq	$257,	%rax
+	and	rax, 257
+// CHECK:	andq	$-257,	%rax
+	and	rax, -257
+// CHECK:	fld	%st(0)
+	fld	ST(0)
+// CHECK:	movl	%fs:(%rdi), %eax
+        mov     EAX, DWORD PTR FS:[RDI]
+// CHECK:	leal	(,%rdi,4), %r8d
+        lea     R8D, DWORD PTR [4*RDI]
+// CHECK:        movl    _fnan(,%ecx,4), %ecx
+        mov     ECX, DWORD PTR [4*ECX + _fnan]
+// CHECK:       movq    %fs:320, %rax
+        mov     RAX, QWORD PTR FS:[320]
+	ret
diff --git a/test/MC/X86/lit.local.cfg b/test/MC/X86/lit.local.cfg
new file mode 100644
index 000000000000..eee568e8fdc2
--- /dev/null
+++ b/test/MC/X86/lit.local.cfg
@@ -0,0 +1,12 @@
+config.suffixes = ['.ll', '.c', '.cpp', '.s']
+
+def getRoot(config):
+    if not config.parent:
+        return config
+    return getRoot(config.parent)
+
+root = getRoot(config)
+
+targets = set(root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index 0954ce2b02cd..6c27b8590b52 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -500,6 +500,9 @@
 // CHECK: 	sysexit
         	sysexit
 
+// CHECK: 	sysexitl
+        	sysexitl
+
 // CHECK: 	ud2
         	ud2
 
@@ -4417,6 +4420,10 @@
 // CHECK:  encoding: [0x0f,0x35]
         	sysexit
 
+// CHECK: sysexitl
+// CHECK:  encoding: [0x0f,0x35]
+        	sysexitl
+
 // CHECK: fxsave	3735928559(%ebx,%ecx,8)
 // CHECK:  encoding: [0x0f,0xae,0x84,0xcb,0xef,0xbe,0xad,0xde]
         	fxsave	0xdeadbeef(%ebx,%ecx,8)
@@ -18401,6 +18408,9 @@
 // CHECK: 	vmcall
         	vmcall
 
+// CHECK: 	vmfunc
+        	vmfunc
+
 // CHECK: 	vmclear	3735928559(%ebx,%ecx,8)
         	vmclear	0xdeadbeef(%ebx,%ecx,8)
 
@@ -18458,6 +18468,30 @@
 // CHECK: 	vmxon	305419896
         	vmxon	0x12345678
 
+// CHECK: 	vmrun %eax
+        	vmrun %eax
+
+// CHECK: 	vmmcall
+        	vmmcall
+
+// CHECK: 	vmload %eax
+        	vmload %eax
+
+// CHECK: 	vmsave %eax
+        	vmsave %eax
+
+// CHECK: 	stgi
+        	stgi
+
+// CHECK: 	clgi
+        	clgi
+
+// CHECK: 	skinit %eax
+        	skinit %eax
+
+// CHECK: 	invlpga %ecx, %eax
+        	invlpga %ecx, %eax
+
 // CHECK: 	phaddw	3735928559(%ebx,%ecx,8), %mm3
         	phaddw	0xdeadbeef(%ebx,%ecx,8),%mm3
 
diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s
index 19f14450fee9..57a00378d319 100644
--- a/test/MC/X86/x86-32.s
+++ b/test/MC/X86/x86-32.s
@@ -28,6 +28,9 @@
 	vmcall
 // CHECK: vmcall
 // CHECK: encoding: [0x0f,0x01,0xc1]
+	vmfunc
+// CHECK: vmfunc
+// CHECK: encoding: [0x0f,0x01,0xd4]
 	vmlaunch
 // CHECK: vmlaunch
 // CHECK: encoding: [0x0f,0x01,0xc2]
@@ -41,7 +44,32 @@
 // CHECK: swapgs
 // CHECK: encoding: [0x0f,0x01,0xf8]
 
-rdtscp
+	vmrun %eax
+// CHECK: vmrun %eax
+// CHECK: encoding: [0x0f,0x01,0xd8]
+	vmmcall
+// CHECK: vmmcall
+// CHECK: encoding: [0x0f,0x01,0xd9]
+	vmload %eax
+// CHECK: vmload %eax
+// CHECK: encoding: [0x0f,0x01,0xda]
+	vmsave %eax
+// CHECK: vmsave %eax
+// CHECK: encoding: [0x0f,0x01,0xdb]
+	stgi
+// CHECK: stgi
+// CHECK: encoding: [0x0f,0x01,0xdc]
+	clgi
+// CHECK: clgi
+// CHECK: encoding: [0x0f,0x01,0xdd]
+	skinit %eax
+// CHECK: skinit %eax
+// CHECK: encoding: [0x0f,0x01,0xde]
+	invlpga %ecx, %eax
+// CHECK: invlpga %ecx, %eax
+// CHECK: encoding: [0x0f,0x01,0xdf]
+
+	rdtscp
 // CHECK: rdtscp
 // CHECK:  encoding: [0x0f,0x01,0xf9]
 
@@ -69,9 +97,9 @@ rdtscp
         sal $1, %eax
 
 // moffset forms of moves, rdar://7947184
-movb	0, %al    // CHECK: movb 0, %al  # encoding: [0xa0,A,A,A,A]
-movw	0, %ax    // CHECK: movw 0, %ax  # encoding: [0x66,0xa1,A,A,A,A]
-movl	0, %eax   // CHECK: movl 0, %eax  # encoding: [0xa1,A,A,A,A]
+movb	0, %al    // CHECK: movb 0, %al  # encoding: [0xa0,0x00,0x00,0x00,0x00]
+movw	0, %ax    // CHECK: movw 0, %ax  # encoding: [0x66,0xa1,0x00,0x00,0x00,0x00]
+movl	0, %eax   // CHECK: movl 0, %eax  # encoding: [0xa1,0x00,0x00,0x00,0x00]
 
 // rdar://7973775
 into
@@ -962,3 +990,11 @@ xchgl %ecx, %eax
 // CHECK: xchgl %ecx, %eax
 // CHECK: encoding: [0x91]
 xchgl %eax, %ecx
+
+// CHECK: retw
+// CHECK: encoding: [0x66,0xc3]
+retw
+
+// CHECK: lretw
+// CHECK: encoding: [0x66,0xcb]
+lretw
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index a9cdaa495f05..6a2d5bba6b70 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -50,6 +50,9 @@
 // CHECK: ret
         ret
         
+// CHECK: retw
+        retw
+        
 // FIXME: Check that this matches SUB32ri8
 // CHECK: subl $1, %eax
         subl $1, %eax
@@ -339,15 +342,28 @@ rclb	$1, %bl   // CHECK: rclb %bl     # encoding: [0xd0,0xd3]
 rclb	$2, %bl   // CHECK: rclb $2, %bl # encoding: [0xc0,0xd3,0x02]
 
 // rdar://8418316
-// CHECK: shldw	$1, %bx, %bx
-// CHECK: shldw	$1, %bx, %bx
-// CHECK: shrdw	$1, %bx, %bx
-// CHECK: shrdw	$1, %bx, %bx
-
-shld	%bx,%bx
-shld	$1, %bx,%bx
-shrd	%bx,%bx
-shrd	$1, %bx,%bx
+// PR12173
+// CHECK: shldw	%cl, %bx, %dx
+// CHECK: shldw	%cl, %bx, %dx
+// CHECK: shldw	$1, %bx, %dx
+// CHECK: shldw	%cl, %bx, (%rax)
+// CHECK: shldw	%cl, %bx, (%rax)
+// CHECK: shrdw	%cl, %bx, %dx
+// CHECK: shrdw	%cl, %bx, %dx
+// CHECK: shrdw	$1, %bx, %dx
+// CHECK: shrdw	%cl, %bx, (%rax)
+// CHECK: shrdw	%cl, %bx, (%rax)
+
+shld  %bx, %dx
+shld  %cl, %bx, %dx
+shld  $1, %bx, %dx
+shld  %bx, (%rax)
+shld  %cl, %bx, (%rax)
+shrd  %bx, %dx
+shrd  %cl, %bx, %dx
+shrd  $1, %bx, %dx
+shrd  %bx, (%rax)
+shrd  %cl, %bx, (%rax)
 
 // CHECK: sldtl	%ecx
 // CHECK: encoding: [0x0f,0x00,0xc1]
@@ -459,6 +475,7 @@ cwtl  // CHECK: cwtl
 cbw   // CHECK: cbtw
 cwd   // CHECK: cwtd
 cdq   // CHECK: cltd
+cqo   // CHECK: cqto
 
 // rdar://8456378 and PR7557 - fstsw
 fstsw %ax
@@ -827,6 +844,7 @@ iretq
 lretq  // CHECK: lretq # encoding: [0x48,0xcb]
 lretl  // CHECK: lretl # encoding: [0xcb]
 lret   // CHECK: lretl # encoding: [0xcb]
+lretw  // CHECK: lretw # encoding: [0x66,0xcb]
 
 // rdar://8403907
 sysret
@@ -1039,6 +1057,9 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1]
 	movsl
 	movsl	%ds:(%rsi), %es:(%rdi)
 	movsl	(%rsi), %es:(%rdi)
+// rdar://10883092
+// CHECK: movsd
+	movsl	(%rsi), (%rdi)
 
 // CHECK: movsq # encoding: [0x48,0xa5]
 // CHECK: movsq
@@ -1191,3 +1212,15 @@ xchgl %ecx, %eax
 // CHECK: xchgl %ecx, %eax
 // CHECK: encoding: [0x91]
 xchgl %eax, %ecx
+
+// CHECK: sysexit
+// CHECK: encoding: [0x0f,0x35]
+sysexit
+
+// CHECK: sysexitl
+// CHECK: encoding: [0x0f,0x35]
+sysexitl
+
+// CHECK: sysexitq
+// CHECK: encoding: [0x48,0x0f,0x35]
+sysexitq
diff --git a/test/MC/X86/x86_64-avx-encoding.s b/test/MC/X86/x86_64-avx-encoding.s
index d3b226f205db..bd5559a52810 100644
--- a/test/MC/X86/x86_64-avx-encoding.s
+++ b/test/MC/X86/x86_64-avx-encoding.s
@@ -600,6 +600,774 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 // CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03]
           vcmpunordsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
 
+// CHECK: vcmpps  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08]
+          vcmpeq_uqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09]
+          vcmpngeps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a]
+          vcmpngtps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b]
+          vcmpfalseps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c]
+          vcmpneq_oqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d]
+          vcmpgeps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e]
+          vcmpgtps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f]
+          vcmptrueps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10]
+          vcmpeq_osps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11]
+          vcmplt_oqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12]
+          vcmple_oqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13]
+          vcmpunord_sps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14]
+          vcmpneq_usps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15]
+          vcmpnlt_uqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16]
+          vcmpnle_uqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17]
+          vcmpord_sps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18]
+          vcmpeq_usps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19]
+          vcmpnge_uqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a]
+          vcmpngt_uqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b]
+          vcmpfalse_osps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c]
+          vcmpneq_osps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d]
+          vcmpge_oqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e]
+          vcmpgt_oqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f]
+          vcmptrue_usps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x08]
+          vcmpeq_uqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $9, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x09]
+          vcmpngeps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $10, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x0a]
+          vcmpngtps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $11, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x0b]
+          vcmpfalseps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $12, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x0c]
+          vcmpneq_oqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $13, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x0d]
+          vcmpgeps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $14, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x0e]
+          vcmpgtps   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $15, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x0f]
+          vcmptrueps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $16, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x10]
+          vcmpeq_osps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $17, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x11]
+          vcmplt_oqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $18, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x12]
+          vcmple_oqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $19, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x13]
+          vcmpunord_sps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $20, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x14]
+          vcmpneq_usps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $21, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x15]
+          vcmpnlt_uqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $22, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x16]
+          vcmpnle_uqps   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $23, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x17]
+          vcmpord_sps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $24, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x18]
+          vcmpeq_usps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $25, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x19]
+          vcmpnge_uqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $26, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x1a]
+          vcmpngt_uqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $27, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x1b]
+          vcmpfalse_osps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $28, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x1c]
+          vcmpneq_osps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $29, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x1d]
+          vcmpge_oqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $30, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x1e]
+          vcmpgt_oqps   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $31, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x1f]
+          vcmptrue_usps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x08]
+          vcmpeq_uqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x09]
+          vcmpngepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0a]
+          vcmpngtpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0b]
+          vcmpfalsepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0c]
+          vcmpneq_oqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0d]
+          vcmpgepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0e]
+          vcmpgtpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x0f]
+          vcmptruepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x10]
+          vcmpeq_ospd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x11]
+          vcmplt_oqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x12]
+          vcmple_oqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x13]
+          vcmpunord_spd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x14]
+          vcmpneq_uspd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x15]
+          vcmpnlt_uqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x16]
+          vcmpnle_uqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x17]
+          vcmpord_spd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x18]
+          vcmpeq_uspd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x19]
+          vcmpnge_uqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1a]
+          vcmpngt_uqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1b]
+          vcmpfalse_ospd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1c]
+          vcmpneq_ospd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1d]
+          vcmpge_oqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1e]
+          vcmpgt_oqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x1f]
+          vcmptrue_uspd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x08]
+          vcmpeq_uqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $9, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x09]
+          vcmpngepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $10, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x0a]
+          vcmpngtpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $11, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x0b]
+          vcmpfalsepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $12, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x0c]
+          vcmpneq_oqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $13, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x0d]
+          vcmpgepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $14, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x0e]
+          vcmpgtpd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $15, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x0f]
+          vcmptruepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $16, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x10]
+          vcmpeq_ospd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $17, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x11]
+          vcmplt_oqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $18, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x12]
+          vcmple_oqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $19, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x13]
+          vcmpunord_spd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $20, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x14]
+          vcmpneq_uspd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $21, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x15]
+          vcmpnlt_uqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $22, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x16]
+          vcmpnle_uqpd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $23, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x17]
+          vcmpord_spd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $24, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x18]
+          vcmpeq_uspd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $25, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x19]
+          vcmpnge_uqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $26, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x1a]
+          vcmpngt_uqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $27, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x1b]
+          vcmpfalse_ospd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $28, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x1c]
+          vcmpneq_ospd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $29, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x1d]
+          vcmpge_oqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $30, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x1e]
+          vcmpgt_oqpd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $31, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x1f]
+          vcmptrue_uspd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x08]
+          vcmpeq_uqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x09]
+          vcmpngess   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0a]
+          vcmpngtss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0b]
+          vcmpfalsess   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0c]
+          vcmpneq_oqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0d]
+          vcmpgess   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0e]
+          vcmpgtss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x0f]
+          vcmptruess   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x10]
+          vcmpeq_osss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x11]
+          vcmplt_oqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x12]
+          vcmple_oqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x13]
+          vcmpunord_sss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x14]
+          vcmpneq_usss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x15]
+          vcmpnlt_uqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x16]
+          vcmpnle_uqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x17]
+          vcmpord_sss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x18]
+          vcmpeq_usss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x19]
+          vcmpnge_uqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1a]
+          vcmpngt_uqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1b]
+          vcmpfalse_osss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1c]
+          vcmpneq_osss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1d]
+          vcmpge_oqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1e]
+          vcmpgt_oqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x1f]
+          vcmptrue_usss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x08]
+          vcmpeq_uqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $9, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x09]
+          vcmpngess   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $10, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x0a]
+          vcmpngtss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $11, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x0b]
+          vcmpfalsess   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $12, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x0c]
+          vcmpneq_oqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $13, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x0d]
+          vcmpgess   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $14, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x0e]
+          vcmpgtss   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $15, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x0f]
+          vcmptruess   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $16, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x10]
+          vcmpeq_osss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $17, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x11]
+          vcmplt_oqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $18, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x12]
+          vcmple_oqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $19, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x13]
+          vcmpunord_sss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $20, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x14]
+          vcmpneq_usss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $21, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x15]
+          vcmpnlt_uqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $22, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x16]
+          vcmpnle_uqss   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $23, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x17]
+          vcmpord_sss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $24, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x18]
+          vcmpeq_usss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $25, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x19]
+          vcmpnge_uqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $26, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x1a]
+          vcmpngt_uqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $27, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x1b]
+          vcmpfalse_osss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $28, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x1c]
+          vcmpneq_osss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $29, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x1d]
+          vcmpge_oqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $30, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x1e]
+          vcmpgt_oqss   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $31, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x1f]
+          vcmptrue_usss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x08]
+          vcmpeq_uqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x09]
+          vcmpngesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0a]
+          vcmpngtsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0b]
+          vcmpfalsesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0c]
+          vcmpneq_oqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0d]
+          vcmpgesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0e]
+          vcmpgtsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x0f]
+          vcmptruesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x10]
+          vcmpeq_ossd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x11]
+          vcmplt_oqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x12]
+          vcmple_oqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x13]
+          vcmpunord_ssd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x14]
+          vcmpneq_ussd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x15]
+          vcmpnlt_uqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x16]
+          vcmpnle_uqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x17]
+          vcmpord_ssd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x18]
+          vcmpeq_ussd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x19]
+          vcmpnge_uqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1a]
+          vcmpngt_uqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1b]
+          vcmpfalse_ossd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1c]
+          vcmpneq_ossd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1d]
+          vcmpge_oqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1e]
+          vcmpgt_oqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x1f]
+          vcmptrue_ussd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x08]
+          vcmpeq_uqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $9, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x09]
+          vcmpngesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $10, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x0a]
+          vcmpngtsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $11, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x0b]
+          vcmpfalsesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $12, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x0c]
+          vcmpneq_oqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $13, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x0d]
+          vcmpgesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $14, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x0e]
+          vcmpgtsd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $15, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x0f]
+          vcmptruesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $16, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x10]
+          vcmpeq_ossd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $17, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x11]
+          vcmplt_oqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $18, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x12]
+          vcmple_oqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $19, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x13]
+          vcmpunord_ssd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $20, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x14]
+          vcmpneq_ussd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $21, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x15]
+          vcmpnlt_uqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $22, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x16]
+          vcmpnle_uqsd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $23, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x17]
+          vcmpord_ssd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $24, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x18]
+          vcmpeq_ussd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $25, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x19]
+          vcmpnge_uqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $26, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x1a]
+          vcmpngt_uqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $27, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x1b]
+          vcmpfalse_ossd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $28, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x1c]
+          vcmpneq_ossd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $29, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x1d]
+          vcmpge_oqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $30, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x1e]
+          vcmpgt_oqsd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $31, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x1f]
+          vcmptrue_ussd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
 // CHECK: vucomiss  %xmm11, %xmm12
 // CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3]
           vucomiss  %xmm11, %xmm12
@@ -3346,3 +4114,10 @@ vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
 _foo:
   nop
   vpshufb _foo(%rip), %xmm0, %xmm0
+
+// CHECK: vblendvps %ymm1, _foo2(%rip), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x4a,0x05,A,A,A,A,0x10]
+// CHECK: fixup A - offset: 5, value: _foo2-5
+_foo2:
+  nop
+  vblendvps %ymm1, _foo2(%rip), %ymm0, %ymm0
diff --git a/test/MC/X86/x86_64-bmi-encoding.s b/test/MC/X86/x86_64-bmi-encoding.s
new file mode 100644
index 000000000000..3e69d4af0b79
--- /dev/null
+++ b/test/MC/X86/x86_64-bmi-encoding.s
@@ -0,0 +1,202 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: blsmskl  %r11d, %r10d
+// CHECK: encoding: [0xc4,0xc2,0x28,0xf3,0xd3]
+          blsmskl  %r11d, %r10d
+
+// CHECK: blsmskq  %r11, %r10
+// CHECK: encoding: [0xc4,0xc2,0xa8,0xf3,0xd3]
+          blsmskq  %r11, %r10
+
+// CHECK: blsmskl  (%rax), %r10d
+// CHECK: encoding: [0xc4,0xe2,0x28,0xf3,0x10]
+          blsmskl  (%rax), %r10d
+
+// CHECK: blsmskq  (%rax), %r10
+// CHECK: encoding: [0xc4,0xe2,0xa8,0xf3,0x10]
+          blsmskq  (%rax), %r10
+
+// CHECK: blsil  %r11d, %r10d
+// CHECK: encoding: [0xc4,0xc2,0x28,0xf3,0xdb]
+          blsil  %r11d, %r10d
+
+// CHECK: blsiq  %r11, %r10
+// CHECK: encoding: [0xc4,0xc2,0xa8,0xf3,0xdb]
+          blsiq  %r11, %r10
+
+// CHECK: blsil  (%rax), %r10d
+// CHECK: encoding: [0xc4,0xe2,0x28,0xf3,0x18]
+          blsil  (%rax), %r10d
+
+// CHECK: blsiq  (%rax), %r10
+// CHECK: encoding: [0xc4,0xe2,0xa8,0xf3,0x18]
+          blsiq  (%rax), %r10
+
+// CHECK: blsrl  %r11d, %r10d
+// CHECK: encoding: [0xc4,0xc2,0x28,0xf3,0xcb]
+          blsrl  %r11d, %r10d
+
+// CHECK: blsrq  %r11, %r10
+// CHECK: encoding: [0xc4,0xc2,0xa8,0xf3,0xcb]
+          blsrq  %r11, %r10
+
+// CHECK: blsrl  (%rax), %r10d
+// CHECK: encoding: [0xc4,0xe2,0x28,0xf3,0x08]
+          blsrl  (%rax), %r10d
+
+// CHECK: blsrq  (%rax), %r10
+// CHECK: encoding: [0xc4,0xe2,0xa8,0xf3,0x08]
+          blsrq  (%rax), %r10
+
+// CHECK: andnl  (%rax), %r11d, %r10d
+// CHECK: encoding: [0xc4,0x62,0x20,0xf2,0x10]
+          andnl  (%rax), %r11d, %r10d
+
+// CHECK: andnq  (%rax), %r11, %r10
+// CHECK: encoding: [0xc4,0x62,0xa0,0xf2,0x10]
+          andnq  (%rax), %r11, %r10
+
+// CHECK: bextrl %r12d, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x62,0x18,0xf7,0x10]
+          bextrl %r12d, (%rax), %r10d
+
+// CHECK: bextrl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x18,0xf7,0xd3]
+          bextrl %r12d, %r11d, %r10d
+
+// CHECK: bextrq %r12, (%rax), %r10
+// CHECK: encoding: [0xc4,0x62,0x98,0xf7,0x10]
+          bextrq %r12, (%rax), %r10
+
+// CHECK: bextrq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0x98,0xf7,0xd3]
+          bextrq %r12, %r11, %r10
+
+// CHECK: bzhil %r12d, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x62,0x18,0xf5,0x10]
+          bzhil %r12d, (%rax), %r10d
+
+// CHECK: bzhil %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x18,0xf5,0xd3]
+          bzhil %r12d, %r11d, %r10d
+
+// CHECK: bzhiq %r12, (%rax), %r10
+// CHECK: encoding: [0xc4,0x62,0x98,0xf5,0x10]
+          bzhiq %r12, (%rax), %r10
+
+// CHECK: bzhiq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0x98,0xf5,0xd3]
+          bzhiq %r12, %r11, %r10
+
+// CHECK: pextl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x22,0xf5,0xd4]
+          pextl %r12d, %r11d, %r10d
+
+// CHECK: pextl (%rax), %r11d, %r10d
+// CHECK: encoding: [0xc4,0x62,0x22,0xf5,0x10]
+          pextl (%rax), %r11d, %r10d
+
+// CHECK: pextq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0xa2,0xf5,0xd4]
+          pextq %r12, %r11, %r10
+
+// CHECK: pextq (%rax), %r11, %r10
+// CHECK: encoding: [0xc4,0x62,0xa2,0xf5,0x10]
+          pextq (%rax), %r11, %r10
+
+// CHECK: pdepl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x23,0xf5,0xd4]
+          pdepl %r12d, %r11d, %r10d
+
+// CHECK: pdepl (%rax), %r11d, %r10d
+// CHECK: encoding: [0xc4,0x62,0x23,0xf5,0x10]
+          pdepl (%rax), %r11d, %r10d
+
+// CHECK: pdepq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0xa3,0xf5,0xd4]
+          pdepq %r12, %r11, %r10
+
+// CHECK: pdepq (%rax), %r11, %r10
+// CHECK: encoding: [0xc4,0x62,0xa3,0xf5,0x10]
+          pdepq (%rax), %r11, %r10
+
+// CHECK: mulxl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x23,0xf6,0xd4]
+          mulxl %r12d, %r11d, %r10d
+
+// CHECK: mulxl (%rax), %r11d, %r10d
+// CHECK: encoding: [0xc4,0x62,0x23,0xf6,0x10]
+          mulxl (%rax), %r11d, %r10d
+
+// CHECK: mulxq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0xa3,0xf6,0xd4]
+          mulxq %r12, %r11, %r10
+
+// CHECK: mulxq (%rax), %r11, %r10
+// CHECK: encoding: [0xc4,0x62,0xa3,0xf6,0x10]
+          mulxq (%rax), %r11, %r10
+
+// CHECK: rorxl $10, %r12d, %r10d
+// CHECK: encoding: [0xc4,0x43,0x7b,0xf0,0xd4,0x0a]
+          rorxl $10, %r12d, %r10d
+
+// CHECK: rorxl $31, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x63,0x7b,0xf0,0x10,0x1f]
+          rorxl $31, (%rax), %r10d
+
+// CHECK: rorxq $1, %r12, %r10
+// CHECK: encoding: [0xc4,0x43,0xfb,0xf0,0xd4,0x01]
+          rorxq $1, %r12, %r10
+
+// CHECK: rorxq $63, (%rax), %r10
+// CHECK: encoding: [0xc4,0x63,0xfb,0xf0,0x10,0x3f]
+          rorxq $63, (%rax), %r10
+
+// CHECK: shlxl %r12d, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x62,0x19,0xf7,0x10]
+          shlxl %r12d, (%rax), %r10d
+
+// CHECK: shlxl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x19,0xf7,0xd3]
+          shlxl %r12d, %r11d, %r10d
+
+// CHECK: shlxq %r12, (%rax), %r10
+// CHECK: encoding: [0xc4,0x62,0x99,0xf7,0x10]
+          shlxq %r12, (%rax), %r10
+
+// CHECK: shlxq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0x99,0xf7,0xd3]
+          shlxq %r12, %r11, %r10
+
+// CHECK: sarxl %r12d, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x62,0x1a,0xf7,0x10]
+          sarxl %r12d, (%rax), %r10d
+
+// CHECK: sarxl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x1a,0xf7,0xd3]
+          sarxl %r12d, %r11d, %r10d
+
+// CHECK: sarxq %r12, (%rax), %r10
+// CHECK: encoding: [0xc4,0x62,0x9a,0xf7,0x10]
+          sarxq %r12, (%rax), %r10
+
+// CHECK: sarxq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0x9a,0xf7,0xd3]
+          sarxq %r12, %r11, %r10
+
+// CHECK: shrxl %r12d, (%rax), %r10d
+// CHECK: encoding: [0xc4,0x62,0x1b,0xf7,0x10]
+          shrxl %r12d, (%rax), %r10d
+
+// CHECK: shrxl %r12d, %r11d, %r10d
+// CHECK: encoding: [0xc4,0x42,0x1b,0xf7,0xd3]
+          shrxl %r12d, %r11d, %r10d
+
+// CHECK: shrxq %r12, (%rax), %r10
+// CHECK: encoding: [0xc4,0x62,0x9b,0xf7,0x10]
+          shrxq %r12, (%rax), %r10
+
+// CHECK: shrxq %r12, %r11, %r10
+// CHECK: encoding: [0xc4,0x42,0x9b,0xf7,0xd3]
+          shrxq %r12, %r11, %r10
+
diff --git a/test/MC/X86/x86_64-fma4-encoding.s b/test/MC/X86/x86_64-fma4-encoding.s
new file mode 100644
index 000000000000..805fc23cf4cf
--- /dev/null
+++ b/test/MC/X86/x86_64-fma4-encoding.s
@@ -0,0 +1,391 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// vfmadd
+// CHECK: vfmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0x01,0x10]
+          vfmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6a,0x01,0x10]
+          vfmaddss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
+          vfmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0x01,0x10]
+          vfmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6b,0x01,0x10]
+          vfmaddsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
+          vfmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0x01,0x10]
+          vfmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x68,0x01,0x10]
+          vfmaddps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
+          vfmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0x01,0x10]
+          vfmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x69,0x01,0x10]
+          vfmaddpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
+          vfmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0x01,0x10]
+          vfmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x68,0x01,0x10]
+          vfmaddps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
+          vfmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0x01,0x10]
+          vfmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x69,0x01,0x10]
+          vfmaddpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
+          vfmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfmsub
+// CHECK: vfmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0x01,0x10]
+          vfmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6e,0x01,0x10]
+          vfmsubss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6e,0xc2,0x10]
+          vfmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6f,0x01,0x10]
+          vfmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6f,0x01,0x10]
+          vfmsubsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6f,0xc2,0x10]
+          vfmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6c,0x01,0x10]
+          vfmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6c,0x01,0x10]
+          vfmsubps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
+          vfmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6d,0x01,0x10]
+          vfmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x6d,0x01,0x10]
+          vfmsubpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
+          vfmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6c,0x01,0x10]
+          vfmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x6c,0x01,0x10]
+          vfmsubps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
+          vfmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6d,0x01,0x10]
+          vfmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x6d,0x01,0x10]
+          vfmsubpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
+          vfmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfnmadd
+// CHECK: vfnmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7a,0x01,0x10]
+          vfnmaddss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7a,0x01,0x10]
+          vfnmaddss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7a,0xc2,0x10]
+          vfnmaddss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7b,0x01,0x10]
+          vfnmaddsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7b,0x01,0x10]
+          vfnmaddsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7b,0xc2,0x10]
+          vfnmaddsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x78,0x01,0x10]
+          vfnmaddps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x78,0x01,0x10]
+          vfnmaddps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
+          vfnmaddps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x79,0x01,0x10]
+          vfnmaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x79,0x01,0x10]
+          vfnmaddpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
+          vfnmaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x78,0x01,0x10]
+          vfnmaddps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmaddps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x78,0x01,0x10]
+          vfnmaddps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
+          vfnmaddps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x79,0x01,0x10]
+          vfnmaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmaddpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x79,0x01,0x10]
+          vfnmaddpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
+          vfnmaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfnmsub
+// CHECK: vfnmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7e,0x01,0x10]
+          vfnmsubss  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubss   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7e,0x01,0x10]
+          vfnmsubss   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7e,0xc2,0x10]
+          vfnmsubss   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7f,0x01,0x10]
+          vfnmsubsd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubsd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7f,0x01,0x10]
+          vfnmsubsd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7f,0xc2,0x10]
+          vfnmsubsd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7c,0x01,0x10]
+          vfnmsubps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7c,0x01,0x10]
+          vfnmsubps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
+          vfnmsubps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7d,0x01,0x10]
+          vfnmsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x7d,0x01,0x10]
+          vfnmsubpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfnmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
+          vfnmsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfnmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7c,0x01,0x10]
+          vfnmsubps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmsubps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x7c,0x01,0x10]
+          vfnmsubps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
+          vfnmsubps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7d,0x01,0x10]
+          vfnmsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfnmsubpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x7d,0x01,0x10]
+          vfnmsubpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfnmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
+          vfnmsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfmaddsub
+// CHECK: vfmaddsubps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5c,0x01,0x10]
+          vfmaddsubps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5c,0x01,0x10]
+          vfmaddsubps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsubps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
+          vfmaddsubps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5d,0x01,0x10]
+          vfmaddsubpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5d,0x01,0x10]
+          vfmaddsubpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmaddsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
+          vfmaddsubpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmaddsubps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5c,0x01,0x10]
+          vfmaddsubps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddsubps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5c,0x01,0x10]
+          vfmaddsubps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddsubps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
+          vfmaddsubps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5d,0x01,0x10]
+          vfmaddsubpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmaddsubpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5d,0x01,0x10]
+          vfmaddsubpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmaddsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
+          vfmaddsubpd   %ymm2, %ymm1, %ymm0, %ymm0
+
+// vfmsubadd
+// CHECK: vfmsubaddps  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5e,0x01,0x10]
+          vfmsubaddps  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddps   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5e,0x01,0x10]
+          vfmsubaddps   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubaddps   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
+          vfmsubaddps   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5f,0x01,0x10]
+          vfmsubaddpd  (%rcx), %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddpd   %xmm1, (%rcx), %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x79,0x5f,0x01,0x10]
+          vfmsubaddpd   %xmm1, (%rcx),%xmm0, %xmm0
+
+// CHECK: vfmsubaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
+          vfmsubaddpd   %xmm2, %xmm1, %xmm0, %xmm0
+
+// CHECK: vfmsubaddps  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5e,0x01,0x10]
+          vfmsubaddps  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubaddps   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5e,0x01,0x10]
+          vfmsubaddps   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubaddps   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
+          vfmsubaddps   %ymm2, %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5f,0x01,0x10]
+          vfmsubaddpd  (%rcx), %ymm1, %ymm0, %ymm0
+
+// CHECK: vfmsubaddpd   %ymm1, (%rcx), %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x5f,0x01,0x10]
+          vfmsubaddpd   %ymm1, (%rcx),%ymm0, %ymm0
+
+// CHECK: vfmsubaddpd   %ymm2, %ymm1, %ymm0, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
+          vfmsubaddpd   %ymm2, %ymm1, %ymm0, %ymm0
diff --git a/test/MC/X86/x86_64-xop-encoding.s b/test/MC/X86/x86_64-xop-encoding.s
new file mode 100644
index 000000000000..1137b71df04b
--- /dev/null
+++ b/test/MC/X86/x86_64-xop-encoding.s
@@ -0,0 +1,584 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+//////////////////////////
+// 2 operand instructions
+/////////////////////////
+        
+// vphsubwd
+// CHECK: vphsubwd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe2,0x0c,0x01]
+          vphsubwd (%rcx,%rax), %xmm1
+// CHECK: vphsubwd %xmm0, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe2,0xc8]
+          vphsubwd %xmm0, %xmm1
+
+// vphsubdq
+// CHECK: vphsubdq (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe3,0x0c,0x01] 
+          vphsubdq (%rcx,%rax), %xmm1
+// CHECK: vphsubdq %xmm0, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe3,0xc8]
+          vphsubdq %xmm0, %xmm1
+
+// vphsubbw
+// CHECK: vphsubbw (%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe1,0x08]
+          vphsubbw (%rax), %xmm1
+// CHECK: vphsubbw %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xe1,0xca]
+          vphsubbw %xmm2, %xmm1
+
+// vphaddwq
+// CHECK: vphaddwq (%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc7,0x21]
+          vphaddwq (%rcx), %xmm4
+// CHECK: vphaddwq %xmm6, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc7,0xd6]
+          vphaddwq %xmm6, %xmm2
+
+// vphaddwd
+// CHECK: vphaddwd (%rdx,%rax), %xmm7
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc6,0x3c,0x02]
+          vphaddwd (%rdx,%rax), %xmm7
+// CHECK: vphaddwd %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc6,0xe3]
+          vphaddwd %xmm3, %xmm4
+
+// vphadduwq
+// CHECK: vphadduwq (%rcx,%rax), %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd7,0x34,0x01]
+          vphadduwq (%rcx,%rax), %xmm6
+// CHECK: vphadduwq %xmm7, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd7,0xc7]
+          vphadduwq %xmm7, %xmm0
+
+// vphadduwd
+// CHECK: vphadduwd (%rax), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd6,0x28]
+          vphadduwd (%rax), %xmm5
+// CHECK: vphadduwd %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd6,0xca]
+          vphadduwd %xmm2, %xmm1
+
+// vphaddudq
+// CHECK: vphaddudq 8(%rcx,%rax), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xdb,0x64,0x01,0x08]
+          vphaddudq 8(%rcx,%rax), %xmm4
+// CHECK: vphaddudq %xmm6, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0xdb,0xd6]
+          vphaddudq %xmm6, %xmm2
+
+// vphaddubw
+// CHECK: vphaddubw (%rcx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd1,0x19]
+          vphaddubw (%rcx), %xmm3
+// CHECK: vphaddubw %xmm5, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd1,0xc5]
+          vphaddubw %xmm5, %xmm0
+
+// vphaddubq
+// CHECK: vphaddubq (%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd3,0x21]
+          vphaddubq (%rcx), %xmm4
+// CHECK: vphaddubq %xmm2, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd3,0xd2]
+          vphaddubq %xmm2, %xmm2
+
+// vphaddubd
+// CHECK: vphaddubd (%rax), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd2,0x28]
+          vphaddubd (%rax), %xmm5
+// CHECK: vphaddubd %xmm5, %xmm7
+// CHECK: encoding: [0x8f,0xe9,0x78,0xd2,0xfd]
+          vphaddubd %xmm5, %xmm7
+
+// vphadddq
+// CHECK: vphadddq (%rdx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x78,0xcb,0x22]
+          vphadddq (%rdx), %xmm4
+// CHECK: vphadddq %xmm4, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0xcb,0xec]
+          vphadddq %xmm4, %xmm5
+
+// vphaddbw
+// CHECK: vphaddbw (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc1,0x0c,0x01]
+          vphaddbw (%rcx,%rax), %xmm1
+// CHECK: vphaddbw %xmm5, %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc1,0xf5]
+          vphaddbw %xmm5, %xmm6
+
+// vphaddbq
+// CHECK: vphaddbq (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc3,0x0c,0x01]
+          vphaddbq (%rcx,%rax), %xmm1
+// CHECK: vphaddbq %xmm2, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc3,0xc2]
+          vphaddbq %xmm2, %xmm0
+
+// vphaddbd
+// CHECK: vphaddbd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc2,0x0c,0x01]
+          vphaddbd (%rcx,%rax), %xmm1
+// CHECK: vphaddbd %xmm1, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0xc2,0xd9]
+          vphaddbd %xmm1, %xmm3
+
+// vfrczss
+// CHECK: vfrczss (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x82,0x0c,0x01]
+          vfrczss (%rcx,%rax), %xmm1
+// CHECK: vfrczss %xmm5, %xmm7
+// CHECK: encoding: [0x8f,0xe9,0x78,0x82,0xfd]
+          vfrczss %xmm5, %xmm7
+
+// vfrczsd
+// CHECK: vfrczsd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x83,0x0c,0x01]
+          vfrczsd (%rcx,%rax), %xmm1
+// CHECK: vfrczsd %xmm7, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0x83,0xc7]
+          vfrczsd %xmm7, %xmm0
+
+// vfrczps
+// CHECK: vfrczps 4(%rax), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0x58,0x04]
+          vfrczps 4(%rax), %xmm3
+// CHECK: vfrczps %xmm6, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0xee]
+          vfrczps %xmm6, %xmm5
+// CHECK: vfrczps (%rcx), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x80,0x09]
+          vfrczps (%rcx), %xmm1
+// CHECK: vfrczps %ymm2, %ymm4
+// CHECK: encoding: [0x8f,0xe9,0x7c,0x80,0xe2]
+          vfrczps %ymm2, %ymm4
+
+// vfrczpd
+// CHECK: vfrczpd (%rcx,%rax), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x78,0x81,0x0c,0x01]
+          vfrczpd (%rcx,%rax), %xmm1
+// CHECK: vfrczpd %xmm7, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x78,0x81,0xc7]
+          vfrczpd %xmm7, %xmm0
+// CHECK: vfrczpd (%rcx,%rax), %ymm2
+// CHECK: encoding: [0x8f,0xe9,0x7c,0x81,0x14,0x01]
+          vfrczpd (%rcx,%rax), %ymm2
+// CHECK: vfrczpd %ymm5, %ymm3
+// CHECK: encoding: [0x8f,0xe9,0x7c,0x81,0xdd]
+          vfrczpd %ymm5, %ymm3
+
+
+        
+//////////////////////////
+// 3 operand instructions
+/////////////////////////
+        
+// vpshlw
+// CHECK: vpshlw %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x95,0xd1]
+          vpshlw %xmm0, %xmm1, %xmm2
+// CHECK: vpshlw (%rax), %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0xf0,0x95,0x10]
+          vpshlw (%rax), %xmm1, %xmm2
+// CHECK: vpshlw %xmm0, (%rax,%rcx), %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x95,0x14,0x08]
+          vpshlw %xmm0, (%rax,%rcx), %xmm2
+
+// vpshlq
+// CHECK: vpshlq %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x68,0x97,0xf4]
+          vpshlq %xmm2, %xmm4, %xmm6
+// CHECK: vpshlq (%rcx), %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x97,0x09]
+          vpshlq (%rcx), %xmm2, %xmm1
+// CHECK: vpshlq %xmm5, (%rdx,%rcx), %xmm6
+// CHECK: encoding: [0x8f,0xe9,0x50,0x97,0x34,0x0a]
+          vpshlq %xmm5, (%rdx,%rcx), %xmm6
+
+// vpshld
+// CHECK: vpshld %xmm7, %xmm5, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x40,0x96,0xdd]
+          vpshld %xmm7, %xmm5, %xmm3
+// CHECK: vpshld 4(%rax), %xmm3, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0xe0,0x96,0x58,0x04]
+          vpshld 4(%rax), %xmm3, %xmm3
+// CHECK: vpshld %xmm1, (%rax,%rcx), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x70,0x96,0x2c,0x08]
+          vpshld %xmm1, (%rax,%rcx), %xmm5
+
+// vpshlb
+// CHECK: vpshlb %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x70,0x94,0xda]
+          vpshlb %xmm1, %xmm2, %xmm3
+// CHECK: vpshlb (%rcx), %xmm0, %xmm7
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x94,0x39]
+          vpshlb (%rcx), %xmm0, %xmm7
+// CHECK: vpshlb %xmm2, (%rax,%rdx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x68,0x94,0x1c,0x10]
+          vpshlb %xmm2, (%rax,%rdx), %xmm3
+
+// vpshaw
+// CHECK: vpshaw %xmm7, %xmm5, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x40,0x99,0xdd]
+          vpshaw %xmm7, %xmm5, %xmm3
+// CHECK: vpshaw (%rax), %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x99,0x08]
+          vpshaw (%rax), %xmm2, %xmm1
+// CHECK: vpshaw %xmm0, 8(%rax,%rcx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x78,0x99,0x5c,0x08,0x08]
+          vpshaw %xmm0, 8(%rax,%rcx), %xmm3
+
+// vpshaq
+// CHECK: vpshaq %xmm4, %xmm4, %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x58,0x9b,0xe4]
+          vpshaq %xmm4, %xmm4, %xmm4
+// CHECK: vpshaq (%rcx), %xmm2, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x9b,0x01]
+          vpshaq (%rcx), %xmm2, %xmm0
+// CHECK: vpshaq %xmm6, (%rax,%rcx), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x48,0x9b,0x2c,0x08]
+          vpshaq %xmm6, (%rax,%rcx), %xmm5
+
+// vpshad
+// CHECK: vpshad %xmm5, %xmm4, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x50,0x9a,0xc4]
+          vpshad %xmm5, %xmm4, %xmm0
+// CHECK: vpshad (%rax), %xmm2, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x9a,0x28]
+          vpshad (%rax), %xmm2, %xmm5
+// CHECK: vpshad %xmm2, (%rax), %xmm5
+// CHECK: encoding: [0x8f,0xe9,0x68,0x9a,0x28]
+          vpshad %xmm2, (%rax), %xmm5
+
+// vpshab
+// CHECK: vpshab %xmm1, %xmm1, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0x70,0x98,0xc1]
+          vpshab %xmm1, %xmm1, %xmm0
+// CHECK: vpshab (%rcx), %xmm4, %xmm0
+// CHECK: encoding: [0x8f,0xe9,0xd8,0x98,0x01]
+          vpshab (%rcx), %xmm4, %xmm0
+// CHECK: vpshab %xmm5, (%rcx), %xmm3
+// CHECK: encoding: [0x8f,0xe9,0x50,0x98,0x19]
+          vpshab %xmm5, (%rcx), %xmm3
+
+// vprotw
+// CHECK: vprotw (%rax), %xmm3, %xmm6
+// CHECK: encoding: [0x8f,0xe9,0xe0,0x91,0x30]
+          vprotw (%rax), %xmm3, %xmm6
+// CHECK: vprotw %xmm5, (%rax,%rcx), %xmm1
+// CHECK: encoding: [0x8f,0xe9,0x50,0x91,0x0c,0x08]
+          vprotw %xmm5, (%rax,%rcx), %xmm1
+// CHECK: vprotw %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x91,0xd1]
+          vprotw %xmm0, %xmm1, %xmm2
+// CHECK: vprotw $42, (%rcx), %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0x09,0x2a]
+          vprotw $42, (%rcx), %xmm1
+// CHECK: vprotw $41, (%rax), %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0x20,0x29]
+          vprotw $41, (%rax), %xmm4
+// CHECK: vprotw $40, %xmm1, %xmm3
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc1,0xd9,0x28]
+          vprotw $40, %xmm1, %xmm3
+
+// vprotq
+// CHECK: vprotq (%rax), %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0xf0,0x93,0x10]
+          vprotq (%rax), %xmm1, %xmm2
+// CHECK: vprotq (%rax,%rcx), %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0xf0,0x93,0x14,0x08]
+          vprotq (%rax,%rcx), %xmm1, %xmm2
+// CHECK: vprotq %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x78,0x93,0xd1]
+          vprotq %xmm0, %xmm1, %xmm2
+// CHECK: vprotq $42, (%rax), %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0x10,0x2a]
+          vprotq $42, (%rax), %xmm2
+// CHECK: vprotq $42, (%rax,%rcx), %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0x14,0x08,0x2a]
+          vprotq $42, (%rax,%rcx), %xmm2
+// CHECK: vprotq $42, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc3,0xd1,0x2a]
+          vprotq $42, %xmm1, %xmm2
+
+// vprotd
+// CHECK: vprotd (%rax), %xmm0, %xmm3
+// CHECK: encoding: [0x8f,0xe9,0xf8,0x92,0x18]
+          vprotd (%rax), %xmm0, %xmm3
+// CHECK: vprotd %xmm2, (%rax,%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x68,0x92,0x24,0x08]
+          vprotd %xmm2, (%rax,%rcx), %xmm4
+// CHECK: vprotd %xmm5, %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x50,0x92,0xd3]
+          vprotd %xmm5, %xmm3, %xmm2
+// CHECK: vprotd $43, (%rcx), %xmm6
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0x31,0x2b]
+          vprotd $43, (%rcx), %xmm6
+// CHECK: vprotd $44, (%rax,%rcx), %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0x3c,0x08,0x2c]
+          vprotd $44, (%rax,%rcx), %xmm7
+// CHECK: vprotd $45, %xmm4, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc2,0xe4,0x2d]
+          vprotd $45, %xmm4, %xmm4
+
+// vprotb
+// CHECK: vprotb (%rcx), %xmm2, %xmm5
+// CHECK: encoding: [0x8f,0xe9,0xe8,0x90,0x29]
+          vprotb (%rcx), %xmm2, %xmm5
+// CHECK: vprotb %xmm5, (%rax,%rcx), %xmm4
+// CHECK: encoding: [0x8f,0xe9,0x50,0x90,0x24,0x08]
+          vprotb %xmm5, (%rax,%rcx), %xmm4
+// CHECK: vprotb %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe9,0x58,0x90,0xd3]
+          vprotb %xmm4, %xmm3, %xmm2
+// CHECK: vprotb $46, (%rax), %xmm3
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0x18,0x2e]
+          vprotb $46, (%rax), %xmm3
+// CHECK: vprotb $47, (%rax,%rcx), %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0x3c,0x08,0x2f]
+          vprotb $47, (%rax,%rcx), %xmm7
+// CHECK: vprotb $48, %xmm5, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x78,0xc0,0xed,0x30]
+          vprotb $48, %xmm5, %xmm5
+
+//////////////////////////
+// 4 operand instructions
+/////////////////////////
+
+// vpmadcswd
+// CHECK: vpmadcswd %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xb6,0xe2,0x10]
+        vpmadcswd %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: vpmadcswd %xmm1, (%rax), %xmm3, %xmm4		
+// CHECK: encoding: [0x8f,0xe8,0x60,0xb6,0x20,0x10]
+        vpmadcswd %xmm1, (%rax), %xmm3, %xmm4		
+
+// vpmadcsswd
+// CHECK: vpmadcsswd %xmm1, %xmm4, %xmm6, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x48,0xa6,0xe4,0x10]
+          vpmadcsswd %xmm1, %xmm4, %xmm6, %xmm4
+// CHECK: vpmadcsswd %xmm1, (%rax,%rcx), %xmm3, %xmm4		
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa6,0x24,0x08,0x10]
+          vpmadcsswd %xmm1, (%rax,%rcx), %xmm3, %xmm4		
+
+// vpmacsww
+// CHECK: vpmacsww %xmm0, %xmm2, %xmm5, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x50,0x95,0xe2,0x00]
+          vpmacsww %xmm0, %xmm2, %xmm5, %xmm4
+// CHECK: vpmacsww %xmm1, (%rax), %xmm6, %xmm4		
+// CHECK: encoding: [0x8f,0xe8,0x48,0x95,0x20,0x10]
+          vpmacsww %xmm1, (%rax), %xmm6, %xmm4		
+
+// vpmacswd
+// CHECK: vpmacswd %xmm4, %xmm5, %xmm6, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x48,0x96,0xfd,0x40]
+          vpmacswd %xmm4, %xmm5, %xmm6, %xmm7
+// CHECK: vpmacswd %xmm0, (%rax), %xmm1, %xmm2		
+// CHECK: encoding: [0x8f,0xe8,0x70,0x96,0x10,0x00]
+          vpmacswd %xmm0, (%rax), %xmm1, %xmm2		
+
+// vpmacssww
+// CHECK: vpmacssww %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x68,0x85,0xcb,0x40]
+          vpmacssww %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: vpmacssww %xmm6, (%rcx), %xmm7, %xmm7		
+// CHECK: encoding: [0x8f,0xe8,0x40,0x85,0x39,0x60]
+          vpmacssww %xmm6, (%rcx), %xmm7, %xmm7		
+
+// vpmacsswd
+// CHECK: vpmacsswd %xmm4, %xmm2, %xmm4, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x58,0x86,0xd2,0x40]
+          vpmacsswd %xmm4, %xmm2, %xmm4, %xmm2
+// CHECK: vpmacsswd %xmm0, 8(%rax,%rcx), %xmm1, %xmm0		
+// CHECK: encoding: [0x8f,0xe8,0x70,0x86,0x44,0x08,0x08,0x00]
+          vpmacsswd %xmm0, 8(%rax,%rcx), %xmm1, %xmm0		
+
+// vpmacssdql
+// CHECK: vpmacssdql %xmm1, %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x68,0x87,0xe1,0x10]
+          vpmacssdql %xmm1, %xmm1, %xmm2, %xmm4
+// CHECK: vpmacssdql %xmm7, (%rcx), %xmm6, %xmm5		
+// CHECK: encoding: [0x8f,0xe8,0x48,0x87,0x29,0x70]
+          vpmacssdql %xmm7, (%rcx), %xmm6, %xmm5		
+
+// vpmacssdqh
+// CHECK: vpmacssdqh %xmm3, %xmm2, %xmm0, %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x78,0x8f,0xca,0x30]
+          vpmacssdqh %xmm3, %xmm2, %xmm0, %xmm1
+// CHECK: vpmacssdqh %xmm7, (%rax,%rcx), %xmm2, %xmm3		
+// CHECK: encoding: [0x8f,0xe8,0x68,0x8f,0x1c,0x08,0x70]
+          vpmacssdqh %xmm7, (%rax,%rcx), %xmm2, %xmm3		
+
+// vpmacssdd
+// CHECK: vpmacssdd %xmm2, %xmm2, %xmm3, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x60,0x8e,0xea,0x20]
+          vpmacssdd %xmm2, %xmm2, %xmm3, %xmm5
+// CHECK: vpmacssdd %xmm4, (%rax), %xmm1, %xmm2		
+// CHECK: encoding: [0x8f,0xe8,0x70,0x8e,0x10,0x40]
+          vpmacssdd %xmm4, (%rax), %xmm1, %xmm2		
+
+// vpmacsdql
+// CHECK: vpmacsdql %xmm3, %xmm0, %xmm6, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x48,0x97,0xf8,0x30]
+          vpmacsdql %xmm3, %xmm0, %xmm6, %xmm7
+// CHECK: vpmacsdql %xmm5, 8(%rcx), %xmm3, %xmm5		
+// CHECK: encoding: [0x8f,0xe8,0x60,0x97,0x69,0x08,0x50]
+          vpmacsdql %xmm5, 8(%rcx), %xmm3, %xmm5		
+
+// vpmacsdqh
+// CHECK: vpmacsdqh %xmm7, %xmm5, %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x60,0x9f,0xd5,0x70]
+          vpmacsdqh %xmm7, %xmm5, %xmm3, %xmm2
+// CHECK: vpmacsdqh %xmm5, 4(%rax), %xmm2, %xmm0		
+// CHECK: encoding: [0x8f,0xe8,0x68,0x9f,0x40,0x04,0x50]
+          vpmacsdqh %xmm5, 4(%rax), %xmm2, %xmm0		
+
+// vpmacsdd
+// CHECK: vpmacsdd %xmm4, %xmm6, %xmm4, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x58,0x9e,0xd6,0x40]
+          vpmacsdd %xmm4, %xmm6, %xmm4, %xmm2
+// CHECK: vpmacsdd %xmm4, (%rax,%rcx), %xmm4, %xmm3		
+// CHECK: encoding: [0x8f,0xe8,0x58,0x9e,0x1c,0x08,0x40]
+          vpmacsdd %xmm4, (%rax,%rcx), %xmm4, %xmm3		
+
+// vpcomw
+// CHECK: vpcomw $42, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xcd,0xe2,0x2a]
+          vpcomw $42, %xmm2, %xmm3, %xmm4
+// CHECK: vpcomw $42, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xcd,0x20,0x2a]
+          vpcomw $42, (%rax), %xmm3, %xmm4
+
+// vpcomuw
+// CHECK: vpcomuw $43, %xmm1, %xmm3, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x60,0xed,0xe9,0x2b]
+          vpcomuw $43, %xmm1, %xmm3, %xmm5
+// CHECK: vpcomuw $44, (%rax,%rcx), %xmm0, %xmm6
+// CHECK: encoding: [0x8f,0xe8,0x78,0xed,0x34,0x08,0x2c]
+          vpcomuw $44, (%rax,%rcx), %xmm0, %xmm6
+
+// vpcomuq
+// CHECK: vpcomuq $45, %xmm3, %xmm3, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x60,0xef,0xfb,0x2d]
+          vpcomuq $45, %xmm3, %xmm3, %xmm7
+// CHECK: vpcomuq $46, (%rax), %xmm3, %xmm1
+// CHECK: encoding: [0x8f,0xe8,0x60,0xef,0x08,0x2e]
+          vpcomuq $46, (%rax), %xmm3, %xmm1
+
+// vpcomud
+// CHECK: vpcomud $47, %xmm0, %xmm1, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x70,0xee,0xd0,0x2f]
+          vpcomud $47, %xmm0, %xmm1, %xmm2
+// CHECK: vpcomud $48, 4(%rax), %xmm6, %xmm3
+// CHECK: encoding: [0x8f,0xe8,0x48,0xee,0x58,0x04,0x30]
+          vpcomud $48, 4(%rax), %xmm6, %xmm3
+
+// vpcomub
+// CHECK: vpcomub $49, %xmm3, %xmm4, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x58,0xec,0xeb,0x31]
+          vpcomub $49, %xmm3, %xmm4, %xmm5
+// CHECK: vpcomub $50, (%rcx), %xmm6, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x48,0xec,0x11,0x32]
+          vpcomub $50, (%rcx), %xmm6, %xmm2
+
+// vpcomq
+// CHECK: vpcomq $51, %xmm3, %xmm0, %xmm5
+// CHECK: encoding: [0x8f,0xe8,0x78,0xcf,0xeb,0x33]
+          vpcomq $51, %xmm3, %xmm0, %xmm5
+// CHECK: vpcomq $52, (%rax), %xmm1, %xmm7
+// CHECK: encoding: [0x8f,0xe8,0x70,0xcf,0x38,0x34]
+          vpcomq $52, (%rax), %xmm1, %xmm7
+
+// vpcomd
+// CHECK: vpcomd $53, %xmm3, %xmm3, %xmm0
+// CHECK: encoding: [0x8f,0xe8,0x60,0xce,0xc3,0x35]
+          vpcomd $53, %xmm3, %xmm3, %xmm0
+// CHECK: vpcomd $54, (%rcx), %xmm2, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x68,0xce,0x11,0x36]
+          vpcomd $54, (%rcx), %xmm2, %xmm2
+
+// vpcomb
+// CHECK: vpcomb $55, %xmm6, %xmm4, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x58,0xcc,0xd6,0x37]
+          vpcomb $55, %xmm6, %xmm4, %xmm2
+// CHECK: vpcomb $56, 8(%rax), %xmm3, %xmm2
+// CHECK: encoding: [0x8f,0xe8,0x60,0xcc,0x50,0x08,0x38]
+          vpcomb $56, 8(%rax), %xmm3, %xmm2
+
+
+// vpperm
+// CHECK: vpperm %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa3,0xe2,0x10]
+        vpperm %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: vpperm (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0xe0,0xa3,0x20,0x20]
+        vpperm (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: vpperm %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa3,0x20,0x10]
+        vpperm %xmm1, (%rax), %xmm3, %xmm4
+
+// vpcmov
+// CHECK: vpcmov %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa2,0xe2,0x10]
+	vpcmov %xmm1, %xmm2, %xmm3, %xmm4
+// CHECK: vpcmov (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0xe0,0xa2,0x20,0x20]
+	vpcmov (%rax), %xmm2, %xmm3, %xmm4
+// CHECK: vpcmov %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0x8f,0xe8,0x60,0xa2,0x20,0x10]
+	vpcmov %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: vpcmov %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: encoding: [0x8f,0xe8,0x64,0xa2,0xe2,0x10]
+	vpcmov %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: vpcmov (%rax), %ymm2, %ymm3, %ymm4
+// CHECK: encoding: [0x8f,0xe8,0xe4,0xa2,0x20,0x20]
+	vpcmov (%rax), %ymm2, %ymm3, %ymm4
+// CHECK: vpcmov %ymm1, (%rax), %ymm3, %ymm4
+// CHECK: encoding: [0x8f,0xe8,0x64,0xa2,0x20,0x10]
+	vpcmov %ymm1, (%rax), %ymm3, %ymm4
+
+
+//////////////////////////
+// 5 operand instructions
+/////////////////////////
+// vpermil2pd
+// CHECK: vpermil2pd $1, %xmm5, %xmm2, %xmm1, %xmm7
+// CHECK: encoding: [0xc4,0xe3,0x71,0x49,0xfa,0x51]
+          vpermil2pd $1, %xmm5, %xmm2, %xmm1, %xmm7
+// CHECK: vpermil2pd $2, (%rax), %xmm3, %xmm3, %xmm4
+// CHECK: encoding: [0xc4,0xe3,0xe1,0x49,0x20,0x32]
+          vpermil2pd $2, (%rax), %xmm3, %xmm3, %xmm4
+// CHECK: vpermil2pd $3, 8(%rax), %ymm0, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xe3,0xdd,0x49,0x70,0x08,0x03]
+          vpermil2pd $3, 8(%rax), %ymm0, %ymm4, %ymm6
+// CHECK: vpermil2pd $0, %xmm3, (%rax,%rcx), %xmm1, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0x71,0x49,0x04,0x08,0x30]
+          vpermil2pd $0, %xmm3, (%rax,%rcx), %xmm1, %xmm0
+// CHECK: vpermil2pd $1, %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: encoding: [0xc4,0xe3,0x65,0x49,0xe2,0x11]
+          vpermil2pd $1, %ymm1, %ymm2, %ymm3, %ymm4
+// CHECK: vpermil2pd $2, %ymm1, (%rax), %ymm3, %ymm4
+// CHECK: encoding: [0xc4,0xe3,0x65,0x49,0x20,0x12]
+          vpermil2pd $2, %ymm1, (%rax), %ymm3, %ymm4
+
+// vpermil2ps
+// CHECK: vpermil2ps $0, %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x69,0x48,0xcb,0x40]
+          vpermil2ps $0, %xmm4, %xmm3, %xmm2, %xmm1
+// CHECK: vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
+// CHECK: encoding: [0xc4,0xe3,0xe1,0x48,0x40,0x04,0x21]
+          vpermil2ps $1, 4(%rax), %xmm2, %xmm3, %xmm0
+// CHECK: vpermil2ps $2, (%rax), %ymm1, %ymm5, %ymm6
+// CHECK: encoding: [0xc4,0xe3,0xd5,0x48,0x30,0x12]
+          vpermil2ps $2, (%rax), %ymm1, %ymm5, %ymm6
+// CHECK: vpermil2ps $3, %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: encoding: [0xc4,0xe3,0x61,0x48,0x20,0x13]
+          vpermil2ps $3, %xmm1, (%rax), %xmm3, %xmm4
+// CHECK: vpermil2ps $0, %ymm4, %ymm4, %ymm2, %ymm2
+// CHECK: encoding: [0xc4,0xe3,0x6d,0x48,0xd4,0x40]
+          vpermil2ps $0, %ymm4, %ymm4, %ymm2, %ymm2
+// CHECK: vpermil2pd $1, %ymm1, 4(%rax), %ymm1, %ymm0
+// CHECK: encoding: [0xc4,0xe3,0x75,0x49,0x40,0x04,0x11]
+          vpermil2pd $1, %ymm1, 4(%rax), %ymm1, %ymm0
+
diff --git a/test/MC/X86/x86_errors.s b/test/MC/X86/x86_errors.s
index e0a2c676d3df..f161e06cb580 100644
--- a/test/MC/X86/x86_errors.s
+++ b/test/MC/X86/x86_errors.s
@@ -15,3 +15,16 @@ addl $0, 0(%rax)
 
 # 8 "test.s"
  movi $8,%eax
+
+movl 0(%rax), 0(%edx)  // error: invalid operand for instruction
+
+// 32: error: instruction requires a CPU feature not currently enabled
+sysexitq
+
+// rdar://10710167
+// 64: error: expected scale expression
+lea (%rsp, %rbp, $4), %rax
+
+// rdar://10423777
+// 64: error: index register is 32-bit, but base register is 64-bit
+movq (%rsi,%ecx),%xmm0
diff --git a/test/Makefile b/test/Makefile
index c0bc36c54dbc..a4e53f8d03f8 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -59,7 +59,7 @@ extra-lit-site-cfgs::
 
 ifneq ($(strip $(filter check-local-all,$(MAKECMDGOALS))),)
 ifndef TESTSUITE
-ifeq ($(shell test -d $(PROJ_SRC_DIR)/../tools/clang && echo OK), OK)
+ifeq ($(shell test -f $(PROJ_OBJ_DIR)/../tools/clang/Makefile && echo OK), OK)
 LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/test
 
 # Force creation of Clang's lit.site.cfg.
@@ -121,11 +121,6 @@ DSYMUTIL=dsymutil
 else
 DSYMUTIL=true
 endif
-ifdef TargetCommonOpts
-BUGPOINT_TOPTS="-gcc-tool-args $(TargetCommonOpts)"
-else
-BUGPOINT_TOPTS=""
-endif
 
 ifneq ($(OCAMLOPT),)
 CC_FOR_OCAMLOPT := $(shell $(OCAMLOPT) -config | grep native_c_compiler | sed -e 's/native_c_compiler: //')
@@ -140,30 +135,18 @@ site.exp: FORCE
 	@echo '# Do not edit!' >> site.tmp
 	@echo 'set target_triplet "$(TARGET_TRIPLE)"' >> site.tmp
 	@echo 'set TARGETS_TO_BUILD "$(TARGETS_TO_BUILD)"' >> site.tmp
-	@echo 'set llvmgcc_langs "$(LLVMGCC_LANGS)"' >> site.tmp
-	@echo 'set llvmtoolsdir "$(ToolDir)"' >>site.tmp
-	@echo 'set llvmlibsdir "$(LibDir)"' >>site.tmp
 	@echo 'set llvmshlibdir "$(SharedLibDir)"' >>site.tmp
 	@echo 'set llvm_bindings "$(BINDINGS_TO_BUILD)"' >> site.tmp
 	@echo 'set srcroot "$(LLVM_SRC_ROOT)"' >>site.tmp
 	@echo 'set objroot "$(LLVM_OBJ_ROOT)"' >>site.tmp
 	@echo 'set srcdir "$(LLVM_SRC_ROOT)/test"' >>site.tmp
 	@echo 'set objdir "$(LLVM_OBJ_ROOT)/test"' >>site.tmp
-	@echo 'set gccpath "$(CC)"' >>site.tmp
-	@echo 'set gxxpath "$(CXX)"' >>site.tmp
-	@echo 'set compile_c "' $(CC) $(CPP.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >>site.tmp
-	@echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c -x c++ '"' >> site.tmp
 	@echo 'set link "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) '"' >>site.tmp
-	@echo 'set llvmgcc "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp
-	@echo 'set llvmgxx "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp
-	@echo 'set bugpoint_topts $(BUGPOINT_TOPTS)' >> site.tmp
 	@echo 'set shlibext "$(SHLIBEXT)"' >> site.tmp
 	@echo 'set ocamlopt "$(OCAMLOPT) -cc \"$(CXX_FOR_OCAMLOPT)\" -I $(LibDir)/ocaml"' >> site.tmp
 	@echo 'set valgrind "$(VALGRIND)"' >> site.tmp
 	@echo 'set grep "$(GREP)"' >>site.tmp
 	@echo 'set gas "$(GAS)"' >>site.tmp
-	@echo 'set llvmdsymutil "$(DSYMUTIL)"' >>site.tmp
-	@echo 'set emitir "$(LLVMCC_EMITIR_FLAG)"' >>site.tmp
 	@echo '## All variables above are generated by configure. Do Not Edit ## ' >>site.tmp
 	@test ! -f site.exp || \
 	sed '1,/^## All variables above are.*##/ d' site.exp >> site.tmp
@@ -186,6 +169,8 @@ lit.site.cfg: site.exp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
+	@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
+	@$(ECHOPATH) s=@LLVM_BINDINGS@=$(BINDINGS_TO_BUILD)=g >> lit.tmp
 	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
 	@-rm -f lit.tmp
 
diff --git a/test/Object/TestObjectFiles/archive-test.a-coff-i386 b/test/Object/Inputs/archive-test.a-coff-i386
index 846cd636f1ed..846cd636f1ed 100644
--- a/test/Object/TestObjectFiles/archive-test.a-coff-i386
+++ b/test/Object/Inputs/archive-test.a-coff-i386
diff --git a/test/Object/Inputs/elf-versioning-test.i386 b/test/Object/Inputs/elf-versioning-test.i386
new file mode 100755
index 000000000000..c7c1eac4977e
--- /dev/null
+++ b/test/Object/Inputs/elf-versioning-test.i386
diff --git a/test/Object/Inputs/elf-versioning-test.x86_64 b/test/Object/Inputs/elf-versioning-test.x86_64
new file mode 100755
index 000000000000..cba79baeb4c6
--- /dev/null
+++ b/test/Object/Inputs/elf-versioning-test.x86_64
diff --git a/test/Object/Inputs/elfver.S b/test/Object/Inputs/elfver.S
new file mode 100644
index 000000000000..ba63279c7a6b
--- /dev/null
+++ b/test/Object/Inputs/elfver.S
@@ -0,0 +1,31 @@
+# Compile with:
+#   ARGS="-shared -nostdlib -Wl,--version-script=elfver.script"
+#   clang $ARGS -m32 elfver.S -lc -o elf-versioning-test.i386
+#   clang $ARGS -m64 elfver.S -lc -o elf-versioning-test.x86_64
+
+# Also, strip off non-dynamic symbols:
+#   strip elf-versioning-test.i386
+#   strip elf-versioning-test.x86_64
+
+#ifdef __i386__
+.symver _puts, puts@GLIBC_2.0
+#else
+.symver _puts, puts@GLIBC_2.2.5
+#endif
+call _puts@PLT
+
+.symver foo1, foo@VER1
+.globl foo1
+.type foo1, @function
+foo1:
+  ret
+
+.symver foo2, foo@@VER2
+.globl foo2
+.type foo2, @function
+foo2:
+  ret
+
+.globl unversioned_define
+.type unversioned_define, @function
+unversioned_define:
diff --git a/test/Object/Inputs/elfver.script b/test/Object/Inputs/elfver.script
new file mode 100644
index 000000000000..1316fcb84c8d
--- /dev/null
+++ b/test/Object/Inputs/elfver.script
@@ -0,0 +1,10 @@
+VER1 {
+  global:
+    foo;
+};
+
+VER2 {
+  global:
+    foo;
+} VER1;
+
diff --git a/test/Object/Inputs/shared-object-test.elf-i386 b/test/Object/Inputs/shared-object-test.elf-i386
new file mode 100644
index 000000000000..fb639155d760
--- /dev/null
+++ b/test/Object/Inputs/shared-object-test.elf-i386
diff --git a/test/Object/Inputs/shared-object-test.elf-x86-64 b/test/Object/Inputs/shared-object-test.elf-x86-64
new file mode 100644
index 000000000000..92667f58d656
--- /dev/null
+++ b/test/Object/Inputs/shared-object-test.elf-x86-64
diff --git a/test/Object/Inputs/shared.ll b/test/Object/Inputs/shared.ll
new file mode 100644
index 000000000000..1a62d560b93a
--- /dev/null
+++ b/test/Object/Inputs/shared.ll
@@ -0,0 +1,33 @@
+; How to make the shared objects from this file:
+;
+; LDARGS="--unresolved-symbols=ignore-all -soname=libfoo.so --no-as-needed -lc -lm"
+;
+; X86-32 ELF:
+;   llc -mtriple=i386-linux-gnu shared.ll -filetype=obj -o tmp32.o -relocation-model=pic
+;   ld -melf_i386 -shared tmp32.o -o shared-object-test.elf-i386 $LDARGS
+;
+; X86-64 ELF:
+;   llc -mtriple=x86_64-linux-gnu shared.ll -filetype=obj -o tmp64.o -relocation-model=pic
+;   ld -melf_x86_64 -shared tmp64.o -o shared-object-test.elf-x86-64 $LDARGS
+
+@defined_sym = global i32 1, align 4
+
+@tls_sym = thread_local global i32 2, align 4
+
+@undef_sym = external global i32
+
+@undef_tls_sym = external thread_local global i32
+
+@common_sym = common global i32 0, align 4
+
+define i32 @global_func() nounwind uwtable {
+entry:
+  ret i32 0
+}
+
+declare i32 @undef_func(...)
+
+define internal i32 @local_func() nounwind uwtable {
+entry:
+  ret i32 0
+}
diff --git a/test/Object/TestObjectFiles/trivial-object-test.coff-i386 b/test/Object/Inputs/trivial-object-test.coff-i386
index 8cfd9949b1bd..8cfd9949b1bd 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.coff-i386
+++ b/test/Object/Inputs/trivial-object-test.coff-i386
diff --git a/test/Object/TestObjectFiles/trivial-object-test.coff-x86-64 b/test/Object/Inputs/trivial-object-test.coff-x86-64
index 077591482cea..077591482cea 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.coff-x86-64
+++ b/test/Object/Inputs/trivial-object-test.coff-x86-64
diff --git a/test/Object/TestObjectFiles/trivial-object-test.elf-i386 b/test/Object/Inputs/trivial-object-test.elf-i386
index 1a0ea40dfe12..1a0ea40dfe12 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.elf-i386
+++ b/test/Object/Inputs/trivial-object-test.elf-i386
diff --git a/test/Object/TestObjectFiles/trivial-object-test.elf-x86-64 b/test/Object/Inputs/trivial-object-test.elf-x86-64
index 889f5d96a699..889f5d96a699 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.elf-x86-64
+++ b/test/Object/Inputs/trivial-object-test.elf-x86-64
diff --git a/test/Object/TestObjectFiles/trivial-object-test.macho-i386 b/test/Object/Inputs/trivial-object-test.macho-i386
index 099bd1ebf23b..099bd1ebf23b 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.macho-i386
+++ b/test/Object/Inputs/trivial-object-test.macho-i386
diff --git a/test/Object/TestObjectFiles/trivial-object-test.macho-x86-64 b/test/Object/Inputs/trivial-object-test.macho-x86-64
index 93eeb5deceb0..93eeb5deceb0 100644
--- a/test/Object/TestObjectFiles/trivial-object-test.macho-x86-64
+++ b/test/Object/Inputs/trivial-object-test.macho-x86-64
diff --git a/test/Object/Inputs/trivial.ll b/test/Object/Inputs/trivial.ll
new file mode 100644
index 000000000000..25ece7611a31
--- /dev/null
+++ b/test/Object/Inputs/trivial.ll
@@ -0,0 +1,12 @@
+@.str = private unnamed_addr constant [13 x i8] c"Hello World\0A\00", align 1
+
+define i32 @main() nounwind {
+entry:
+  %call = tail call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) nounwind
+  tail call void bitcast (void (...)* @SomeOtherFunction to void ()*)() nounwind
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare void @SomeOtherFunction(...)
diff --git a/test/Object/TestObjectFiles/archive-test.a-bitcode b/test/Object/TestObjectFiles/archive-test.a-bitcode
deleted file mode 100644
index 3aeb34fa3342..000000000000
--- a/test/Object/TestObjectFiles/archive-test.a-bitcode
+++ /dev/null
diff --git a/test/Object/X86/lit.local.cfg b/test/Object/X86/lit.local.cfg
new file mode 100644
index 000000000000..6a29e9250f3c
--- /dev/null
+++ b/test/Object/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.test']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Object/X86/objdump-disassembly-inline-relocations.test b/test/Object/X86/objdump-disassembly-inline-relocations.test
new file mode 100644
index 000000000000..a5875f6a2f96
--- /dev/null
+++ b/test/Object/X86/objdump-disassembly-inline-relocations.test
@@ -0,0 +1,32 @@
+RUN: llvm-objdump -d -r %p/../Inputs/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -d -r %p/../Inputs/trivial-object-test.coff-x86-64 \
+RUN:              | FileCheck %s -check-prefix COFF-x86-64
+
+COFF-i386: file format COFF-i386
+COFF-i386: Disassembly of section .text:
+COFF-i386:        0:       83 ec 0c                                        subl    $12, %esp
+COFF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+COFF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
+COFF-i386:                              e: IMAGE_REL_I386_DIR32  L_.str
+COFF-i386:       12:       e8 00 00 00 00                                  calll   0
+COFF-i386:                             13: IMAGE_REL_I386_REL32  _puts
+COFF-i386:       17:       e8 00 00 00 00                                  calll   0
+COFF-i386:                             18: IMAGE_REL_I386_REL32  _SomeOtherFunction
+COFF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+COFF-i386:       20:       83 c4 0c                                        addl    $12, %esp
+COFF-i386:       23:       c3                                              ret
+
+COFF-x86-64: file format COFF-x86-64
+COFF-x86-64: Disassembly of section .text:
+COFF-x86-64:        0:       48 83 ec 28                                     subq    $40, %rsp
+COFF-x86-64:        4:       c7 44 24 24 00 00 00 00                         movl    $0, 36(%rsp)
+COFF-x86-64:        c:       48 8d 0d 00 00 00 00                            leaq    (%rip), %rcx
+COFF-x86-64:                               f: IMAGE_REL_AMD64_REL32 L.str
+COFF-x86-64:       13:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:                              14: IMAGE_REL_AMD64_REL32 puts
+COFF-x86-64:       18:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:                              19: IMAGE_REL_AMD64_REL32 SomeOtherFunction
+COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
+COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
+COFF-x86-64:       25:       c3                                              ret
diff --git a/test/Object/X86/objdump-trivial-object.test b/test/Object/X86/objdump-trivial-object.test
new file mode 100644
index 000000000000..8f9ea974d275
--- /dev/null
+++ b/test/Object/X86/objdump-trivial-object.test
@@ -0,0 +1,54 @@
+RUN: llvm-objdump -d %p/../Inputs/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -d %p/../Inputs/trivial-object-test.coff-x86-64 \
+RUN:              | FileCheck %s -check-prefix COFF-x86-64
+RUN: llvm-objdump -d %p/../Inputs/trivial-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+RUN: llvm-objdump -d %p/../Inputs/trivial-object-test.elf-x86-64 \
+RUN:              | FileCheck %s -check-prefix ELF-x86-64
+
+COFF-i386: file format COFF-i386
+COFF-i386: Disassembly of section .text:
+COFF-i386:        0:       83 ec 0c                                        subl    $12, %esp
+COFF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+COFF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
+COFF-i386:       12:       e8 00 00 00 00                                  calll   0
+COFF-i386:       17:       e8 00 00 00 00                                  calll   0
+COFF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+COFF-i386:       20:       83 c4 0c                                        addl    $12, %esp
+COFF-i386:       23:       c3                                              ret
+
+COFF-x86-64: file format COFF-x86-64
+COFF-x86-64: Disassembly of section .text:
+COFF-x86-64:        0:       48 83 ec 28                                     subq    $40, %rsp
+COFF-x86-64:        4:       c7 44 24 24 00 00 00 00                         movl    $0, 36(%rsp)
+COFF-x86-64:        c:       48 8d 0d 00 00 00 00                            leaq    (%rip), %rcx
+COFF-x86-64:       13:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:       18:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
+COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
+COFF-x86-64:       25:       c3                                              ret
+
+
+ELF-i386: file format ELF32-i386
+ELF-i386: Disassembly of section .text:
+ELF-i386:        0:       83 ec 0c                                        subl    $12, %esp
+ELF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+ELF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
+ELF-i386:       12:       e8 fc ff ff ff                                  calll   -4
+ELF-i386:       17:       e8 fc ff ff ff                                  calll   -4
+ELF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+ELF-i386:       20:       83 c4 0c                                        addl    $12, %esp
+ELF-i386:       23:       c3                                              ret
+
+ELF-x86-64: file format ELF64-x86-64
+ELF-x86-64: Disassembly of section .text:
+ELF-x86-64:        0:       48 83 ec 08                                     subq    $8, %rsp
+ELF-x86-64:        4:       c7 44 24 04 00 00 00 00                         movl    $0, 4(%rsp)
+ELF-x86-64:        c:       bf 00 00 00 00                                  movl    $0, %edi
+ELF-x86-64:       11:       e8 00 00 00 00                                  callq   0
+ELF-x86-64:       16:       30 c0                                           xorb    %al, %al
+ELF-x86-64:       18:       e8 00 00 00 00                                  callq   0
+ELF-x86-64:       1d:       8b 44 24 04                                     movl    4(%rsp), %eax
+ELF-x86-64:       21:       48 83 c4 08                                     addq    $8, %rsp
+ELF-x86-64:       25:       c3                                              ret
diff --git a/test/Object/dg.exp b/test/Object/dg.exp
deleted file mode 100644
index be82c513920e..000000000000
--- a/test/Object/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{test}]]
diff --git a/test/Object/lit.local.cfg b/test/Object/lit.local.cfg
new file mode 100644
index 000000000000..df9b335dd131
--- /dev/null
+++ b/test/Object/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.test']
diff --git a/test/Object/nm-archive.test b/test/Object/nm-archive.test
index da6144ee712d..2d96b73a2714 100644
--- a/test/Object/nm-archive.test
+++ b/test/Object/nm-archive.test
@@ -1,7 +1,8 @@
-RUN: llvm-nm %p/TestObjectFiles/archive-test.a-coff-i386 \
+RUN: llvm-nm %p/Inputs/archive-test.a-coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF
-RUN: llvm-nm %p/TestObjectFiles/archive-test.a-bitcode \
-RUN:         | FileCheck %s -check-prefix BITCODE
+RUN: llvm-as %p/Inputs/trivial.ll -o=%t1
+RUN: llvm-ar rcs %t2 %t1
+RUN: llvm-nm %t2 | FileCheck %s -check-prefix BITCODE
 
 
 COFF: trivial-object-test.coff-i386:
diff --git a/test/Object/nm-shared-object.test b/test/Object/nm-shared-object.test
new file mode 100644
index 000000000000..b361df535553
--- /dev/null
+++ b/test/Object/nm-shared-object.test
@@ -0,0 +1,15 @@
+RUN: llvm-nm -D %p/Inputs/shared-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-nm -D %p/Inputs/shared-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF
+
+; Note: tls_sym should be 'D' (not '?'), but TLS is not
+; yet recognized by ObjectFile.
+
+ELF: {{[0-9a-f]+}} A __bss_start
+ELF: {{[0-9a-f]+}} A _edata
+ELF: {{[0-9a-f]+}} A _end
+ELF: {{[0-9a-f]+}} B common_sym
+ELF: {{[0-9a-f]+}} D defined_sym
+ELF: {{[0-9a-f]+}} T global_func
+ELF:               ? tls_sym
diff --git a/test/Object/nm-trivial-object.test b/test/Object/nm-trivial-object.test
index 6de1780a1608..e5635ab4758e 100644
--- a/test/Object/nm-trivial-object.test
+++ b/test/Object/nm-trivial-object.test
@@ -1,11 +1,15 @@
-RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.coff-i386 \
+RUN: llvm-nm %p/Inputs/trivial-object-test.coff-i386 \
 RUN:         | FileCheck %s -check-prefix COFF
-RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
+RUN: llvm-nm %p/Inputs/trivial-object-test.coff-x86-64 \
 RUN:         | FileCheck %s -check-prefix COFF
-RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.elf-i386 \
+RUN: llvm-nm %p/Inputs/trivial-object-test.elf-i386 \
 RUN:         | FileCheck %s -check-prefix ELF
-RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
+RUN: llvm-nm %p/Inputs/trivial-object-test.elf-x86-64 \
 RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-nm %p/Inputs/trivial-object-test.macho-i386 \
+RUN:         | FileCheck %s -check-prefix macho
+RUN: llvm-nm %p/Inputs/trivial-object-test.macho-x86-64 \
+RUN:         | FileCheck %s -check-prefix macho64
 
 COFF: 00000000 d .data
 COFF: 00000000 t .text
@@ -17,3 +21,13 @@ COFF:          U {{_?}}puts
 ELF:          U SomeOtherFunction
 ELF: 00000000 T main
 ELF:          U puts
+
+
+macho: 00000000 U _SomeOtherFunction
+macho: 00000000 s _main
+macho: 00000000 U _puts
+
+macho64: 00000028 s L_.str
+macho64: 00000000 u _SomeOtherFunction
+macho64: 00000000 s _main
+macho64: 00000000 u _puts
+\ No newline at end of file
diff --git a/test/Object/objdump-disassembly-inline-relocations.test b/test/Object/objdump-disassembly-inline-relocations.test
deleted file mode 100644
index 91f2e48f6230..000000000000
--- a/test/Object/objdump-disassembly-inline-relocations.test
+++ /dev/null
@@ -1,32 +0,0 @@
-RUN: llvm-objdump -d -r %p/TestObjectFiles/trivial-object-test.coff-i386 \
-RUN:              | FileCheck %s -check-prefix COFF-i386
-RUN: llvm-objdump -d -r %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
-RUN:              | FileCheck %s -check-prefix COFF-x86-64
-
-COFF-i386: file format COFF-i386
-COFF-i386: Disassembly of section .text:
-COFF-i386:        0:       83 ec 0c                                        subl    $12, %esp
-COFF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
-COFF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
-COFF-i386:                              e: IMAGE_REL_I386_DIR32  L_.str
-COFF-i386:       12:       e8 00 00 00 00                                  calll   0
-COFF-i386:                             13: IMAGE_REL_I386_REL32  _puts
-COFF-i386:       17:       e8 00 00 00 00                                  calll   0
-COFF-i386:                             18: IMAGE_REL_I386_REL32  _SomeOtherFunction
-COFF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
-COFF-i386:       20:       83 c4 0c                                        addl    $12, %esp
-COFF-i386:       23:       c3                                              ret
-
-COFF-x86-64: file format COFF-x86-64
-COFF-x86-64: Disassembly of section .text:
-COFF-x86-64:        0:       48 83 ec 28                                     subq    $40, %rsp
-COFF-x86-64:        4:       c7 44 24 24 00 00 00 00                         movl    $0, 36(%rsp)
-COFF-x86-64:        c:       48 8d 0d 00 00 00 00                            leaq    (%rip), %rcx
-COFF-x86-64:                               f: IMAGE_REL_AMD64_REL32 L.str
-COFF-x86-64:       13:       e8 00 00 00 00                                  callq   0
-COFF-x86-64:                              14: IMAGE_REL_AMD64_REL32 puts
-COFF-x86-64:       18:       e8 00 00 00 00                                  callq   0
-COFF-x86-64:                              19: IMAGE_REL_AMD64_REL32 SomeOtherFunction
-COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
-COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
-COFF-x86-64:       25:       c3                                              ret
diff --git a/test/Object/objdump-file-header.test b/test/Object/objdump-file-header.test
new file mode 100644
index 000000000000..3fce3f40719e
--- /dev/null
+++ b/test/Object/objdump-file-header.test
@@ -0,0 +1,18 @@
+RUN: llvm-objdump -f %p/Inputs/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -f %p/Inputs/trivial-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+
+XFAIL: *
+
+COFF-i386: : file format
+COFF-i386: architecture: i386
+COFF-i386: HAS_RELOC
+COFF-i386: HAS_SYMS
+COFF-i386: start address 0x
+
+ELF-i386: : file format elf
+ELF-i386: architecture: i386
+ELF-i386: HAS_RELOC
+ELF-i386: HAS_SYMS
+ELF-i386: start address 0x
diff --git a/test/Object/objdump-relocations.test b/test/Object/objdump-relocations.test
index 2dcdb432ef61..c4b564e7231d 100644
--- a/test/Object/objdump-relocations.test
+++ b/test/Object/objdump-relocations.test
@@ -1,10 +1,10 @@
-RUN: llvm-objdump -r %p/TestObjectFiles/trivial-object-test.coff-i386 \
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.coff-i386 \
 RUN:              | FileCheck %s -check-prefix COFF-i386
-RUN: llvm-objdump -r %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.coff-x86-64 \
 RUN:              | FileCheck %s -check-prefix COFF-x86-64
-RUN: llvm-objdump -r %p/TestObjectFiles/trivial-object-test.elf-i386 \
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-i386 \
 RUN:              | FileCheck %s -check-prefix ELF-i386
-RUN: llvm-objdump -r %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
+RUN: llvm-objdump -r %p/Inputs/trivial-object-test.elf-x86-64 \
 RUN:              | FileCheck %s -check-prefix ELF-x86-64
 
 COFF-i386: .text
diff --git a/test/Object/objdump-section-content.test b/test/Object/objdump-section-content.test
new file mode 100644
index 000000000000..581e75eb5acc
--- /dev/null
+++ b/test/Object/objdump-section-content.test
@@ -0,0 +1,20 @@
+RUN: llvm-objdump -s %p/Inputs/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -s %p/Inputs/trivial-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+
+COFF-i386: trivial-object-test.coff-i386:     file format
+COFF-i386: Contents of section .text:
+COFF-i386:  0000 83ec0cc7 44240800 000000c7 04240000  ....D$.......$..
+COFF-i386:  0010 0000e800 000000e8 00000000 8b442408  .............D$.
+COFF-i386:  0020 83c40cc3                             ....
+COFF-i386: Contents of section .data:
+COFF-i386:  0000 48656c6c 6f20576f 726c6421 00        Hello World!.
+
+ELF-i386: trivial-object-test.elf-i386:     file format
+ELF-i386: Contents of section .text:
+ELF-i386:  0000 83ec0cc7 44240800 000000c7 04240000  ....D$.......$..
+ELF-i386:  0010 0000e8fc ffffffe8 fcffffff 8b442408  .............D$.
+ELF-i386:  0020 83c40cc3                             ....
+ELF-i386: Contents of section .rodata.str1.1:
+ELF-i386:  0024 48656c6c 6f20576f 726c6421 00        Hello World!.
diff --git a/test/Object/objdump-sectionheaders.test b/test/Object/objdump-sectionheaders.test
index 4515d00e618d..a417d07a81cc 100644
--- a/test/Object/objdump-sectionheaders.test
+++ b/test/Object/objdump-sectionheaders.test
@@ -1,4 +1,4 @@
-; RUN: llvm-objdump -h %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
+; RUN: llvm-objdump -h %p/Inputs/trivial-object-test.elf-x86-64 \
 ; RUN:              | FileCheck %s
 
 ; To verify this, use readelf -S, not objdump -h. Binutils objdump filters the
diff --git a/test/Object/objdump-symbol-table.test b/test/Object/objdump-symbol-table.test
new file mode 100644
index 000000000000..8a0f44042d98
--- /dev/null
+++ b/test/Object/objdump-symbol-table.test
@@ -0,0 +1,33 @@
+RUN: llvm-objdump -t %p/Inputs/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -t %p/Inputs/trivial-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+RUN: llvm-objdump -t %p/Inputs/trivial-object-test.macho-i386 \
+RUN:              | FileCheck %s -check-prefix macho-i386
+
+COFF-i386: trivial-object-test.coff-i386:     file format
+COFF-i386: SYMBOL TABLE:
+COFF-i386: [  0](sec  1)(fl 0x00)(ty   0)(scl   3) (nx 1) 0x00000000 .text
+COFF-i386: AUX scnlen 0x24 nreloc 3 nlnno 0 checksum 0x0 assoc 1 comdat 0
+COFF-i386: [  2](sec  2)(fl 0x00)(ty   0)(scl   3) (nx 1) 0x00000000 .data
+COFF-i386: AUX scnlen 0xd nreloc 0 nlnno 0 checksum 0x0 assoc 2 comdat 0
+COFF-i386: [  4](sec  1)(fl 0x00)(ty 200)(scl   2) (nx 0) 0x00000000 _main
+COFF-i386: [  5](sec  2)(fl 0x00)(ty   0)(scl   3) (nx 0) 0x00000000 L_.str
+COFF-i386: [  6](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 _puts
+COFF-i386: [  7](sec  0)(fl 0x00)(ty   0)(scl   2) (nx 0) 0x00000000 _SomeOtherFunction
+
+ELF-i386: trivial-object-test.elf-i386:     file format
+ELF-i386: SYMBOL TABLE:
+ELF-i386: 00000000 l    df *ABS*  00000000 trivial-object-test.s
+ELF-i386: 00000000 l    d  .text  00000000 .text
+ELF-i386: 00000024 l    d  .rodata.str1.1 00000000 .rodata.str1.1
+ELF-i386: 00000031 l    d  .note.GNU-stack        00000000 .note.GNU-stack
+ELF-i386: 00000000 g     F .text  00000024 main
+ELF-i386: 00000000         *UND*  00000000 SomeOtherFunction
+ELF-i386: 00000000         *UND*  00000000 puts
+
+macho-i386: trivial-object-test.macho-i386:        file format Mach-O 32-bit i386
+macho-i386: SYMBOL TABLE:
+macho-i386: 00000000 g     F __TEXT,__text  00000024 _main
+macho-i386: 00000000         *UND*  00000000 _SomeOtherFunction
+macho-i386: 00000000         *UND*  00000000 _puts
+\ No newline at end of file
diff --git a/test/Object/objdump-trivial-object.test b/test/Object/objdump-trivial-object.test
deleted file mode 100644
index c4855fdfd682..000000000000
--- a/test/Object/objdump-trivial-object.test
+++ /dev/null
@@ -1,54 +0,0 @@
-RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.coff-i386 \
-RUN:              | FileCheck %s -check-prefix COFF-i386
-RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
-RUN:              | FileCheck %s -check-prefix COFF-x86-64
-RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.elf-i386 \
-RUN:              | FileCheck %s -check-prefix ELF-i386
-RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
-RUN:              | FileCheck %s -check-prefix ELF-x86-64
-
-COFF-i386: file format COFF-i386
-COFF-i386: Disassembly of section .text:
-COFF-i386:        0:       83 ec 0c                                        subl    $12, %esp
-COFF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
-COFF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
-COFF-i386:       12:       e8 00 00 00 00                                  calll   0
-COFF-i386:       17:       e8 00 00 00 00                                  calll   0
-COFF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
-COFF-i386:       20:       83 c4 0c                                        addl    $12, %esp
-COFF-i386:       23:       c3                                              ret
-
-COFF-x86-64: file format COFF-x86-64
-COFF-x86-64: Disassembly of section .text:
-COFF-x86-64:        0:       48 83 ec 28                                     subq    $40, %rsp
-COFF-x86-64:        4:       c7 44 24 24 00 00 00 00                         movl    $0, 36(%rsp)
-COFF-x86-64:        c:       48 8d 0d 00 00 00 00                            leaq    (%rip), %rcx
-COFF-x86-64:       13:       e8 00 00 00 00                                  callq   0
-COFF-x86-64:       18:       e8 00 00 00 00                                  callq   0
-COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
-COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
-COFF-x86-64:       25:       c3                                              ret
-
-
-ELF-i386: file format ELF32-i386
-ELF-i386: Disassembly of section .text:
-ELF-i386:        0:       83 ec 0c                                        subl    $12, %esp
-ELF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
-ELF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
-ELF-i386:       12:       e8 fc ff ff ff                                  calll   -4
-ELF-i386:       17:       e8 fc ff ff ff                                  calll   -4
-ELF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
-ELF-i386:       20:       83 c4 0c                                        addl    $12, %esp
-ELF-i386:       23:       c3                                              ret
-
-ELF-x86-64: file format ELF64-x86-64
-ELF-x86-64: Disassembly of section .text:
-ELF-x86-64:        0:       48 83 ec 08                                     subq    $8, %rsp
-ELF-x86-64:        4:       c7 44 24 04 00 00 00 00                         movl    $0, 4(%rsp)
-ELF-x86-64:        c:       bf 00 00 00 00                                  movl    $0, %edi
-ELF-x86-64:       11:       e8 00 00 00 00                                  callq   0
-ELF-x86-64:       16:       30 c0                                           xorb    %al, %al
-ELF-x86-64:       18:       e8 00 00 00 00                                  callq   0
-ELF-x86-64:       1d:       8b 44 24 04                                     movl    4(%rsp), %eax
-ELF-x86-64:       21:       48 83 c4 08                                     addq    $8, %rsp
-ELF-x86-64:       25:       c3                                              ret
diff --git a/test/Object/readobj-elf-versioning.test b/test/Object/readobj-elf-versioning.test
new file mode 100644
index 000000000000..0906f344e2c5
--- /dev/null
+++ b/test/Object/readobj-elf-versioning.test
@@ -0,0 +1,15 @@
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.i386 \
+RUN:         | FileCheck %s -check-prefix ELF32
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/elf-versioning-test.x86_64 \
+RUN:         | FileCheck %s -check-prefix ELF64
+
+ELF: foo@@VER2          FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+ELF: foo@VER1           FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+ELF: unversioned_define FUNC  {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} global
+
+ELF32: puts@GLIBC_2.0   FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global
+ELF64: puts@GLIBC_2.2.5 FUNC {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} undef,global
diff --git a/test/Object/readobj-shared-object.test b/test/Object/readobj-shared-object.test
new file mode 100644
index 000000000000..3b5457ce0737
--- /dev/null
+++ b/test/Object/readobj-shared-object.test
@@ -0,0 +1,59 @@
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF32
+
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-readobj %p/Inputs/shared-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF64
+
+ELF64:File Format : ELF64-x86-64
+ELF64:Arch        : x86_64
+ELF64:Address Size: 64 bits
+ELF64:Load Name   : libfoo.so
+
+ELF32:File Format : ELF32-i386
+ELF32:Arch        : i386
+ELF32:Address Size: 32 bits
+ELF32:Load Name   : libfoo.so
+
+ELF:Symbols:
+ELF:  .dynsym                DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .dynstr                DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .text                  DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .eh_frame              DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .tdata                 DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .dynamic               DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .got.plt               DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .data                  DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  .bss                   DBG             {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  formatspecific
+ELF:  shared.ll              FILE            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute,formatspecific
+ELF:  local_func             FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}
+ELF:  _GLOBAL_OFFSET_TABLE_  DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute
+ELF:  _DYNAMIC               DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  absolute
+ELF:  common_sym             DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  tls_sym                DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,threadlocal
+ELF:  defined_sym            DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  __bss_start            ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  _end                   ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  global_func            FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  _edata                 ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  Total: 21
+
+ELF:Dynamic Symbols:
+ELF:  common_sym             DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  tls_sym                DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,threadlocal
+ELF:  defined_sym            DATA            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  __bss_start            ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  _end                   ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  global_func            FUNC            {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global
+ELF:  _edata                 ?               {{[0-9a-f]+}}  {{[0-9a-f]+}}  {{[0-9a-f]+}}  global,absolute
+ELF:  Total: {{[0-9a-f]+}}
+
+ELF:Libraries needed:
+ELF:  libc.so.6
+ELF:  libm.so.6
+ELF:  Total: 2
+
+
diff --git a/test/Other/2009-03-31-CallGraph.ll b/test/Other/2009-03-31-CallGraph.ll
index d6653ecbe8ef..864903cffba2 100644
--- a/test/Other/2009-03-31-CallGraph.ll
+++ b/test/Other/2009-03-31-CallGraph.ll
@@ -15,6 +15,8 @@ ok2:
     unreachable
 
 lpad2:
+    %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            cleanup
     unreachable
 }
 
@@ -29,3 +31,4 @@ declare void @f6() nounwind
 
 declare void @f8()
 
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Other/X86/dg.exp b/test/Other/X86/dg.exp
deleted file mode 100644
index 7b7bd4e73807..000000000000
--- a/test/Other/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Other/X86/lit.local.cfg b/test/Other/X86/lit.local.cfg
new file mode 100644
index 000000000000..da2db5a45f9c
--- /dev/null
+++ b/test/Other/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index e4521d5184aa..d28c178588bb 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -102,14 +102,17 @@
 @N = constant i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1)
 @O = constant i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
 
-; Fold GEP of a GEP. Theoretically some of these cases could be folded
-; without using targetdata, however that's not implemented yet.
+; Fold GEP of a GEP. Very simple cases are folded without targetdata.
 
+; PLAIN: @Y = global [3 x { i32, i32 }]* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 2)
 ; PLAIN: @Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
+; OPT: @Y = global [3 x { i32, i32 }]* getelementptr ([3 x { i32, i32 }]* @ext, i64 2)
 ; OPT: @Z = global i32* getelementptr (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
+; TO: @Y = global [3 x { i32, i32 }]* getelementptr ([3 x { i32, i32 }]* @ext, i64 2)
 ; TO: @Z = global i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 1)
 
 @ext = external global [3 x { i32, i32 }]
+@Y = global [3 x { i32, i32 }]* getelementptr inbounds ([3 x { i32, i32 }]* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 1), i64 1)
 @Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
 
 ; Duplicate all of the above as function return values rather than
diff --git a/test/Other/dg.exp b/test/Other/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Other/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Other/lint.ll b/test/Other/lint.ll
index 4aa984e2e1b7..ca2b1a336a18 100644
--- a/test/Other/lint.ll
+++ b/test/Other/lint.ll
@@ -151,7 +151,7 @@ entry:
 exit:
   %t3 = phi i32* [ %t4, %exit ]
   %t4 = bitcast i32* %t3 to i32*
-  %x = volatile load i32* %t3
+  %x = load volatile i32* %t3
   br label %exit
 }
 
diff --git a/test/Other/lit.local.cfg b/test/Other/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Other/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/TableGen/Dag.td b/test/TableGen/Dag.td
index 9ed2301cef04..7ceb4e74b2ff 100644
--- a/test/TableGen/Dag.td
+++ b/test/TableGen/Dag.td
@@ -60,6 +60,7 @@ def VAL3 : bar<foo1, somedef1>;
 // CHECK-NEXT:  dag Dag1 = (somedef1 1);
 // CHECK-NEXT:  dag Dag2 = (somedef1 2);
 // CHECK-NEXT:  dag Dag3 = (somedef1 2);
+// CHECK-NEXT:  NAME = ?
 // CHECK-NEXT: }
 
 
@@ -68,4 +69,5 @@ def VAL4 : bar<foo2, somedef2>;
 // CHECK-NEXT:  dag Dag1 = (somedef1 1);
 // CHECK-NEXT:  dag Dag2 = (somedef2 2);
 // CHECK-NEXT:  dag Dag3 = (somedef2 2);
+// CHECK-NEXT:  NAME = ?
 // CHECK-NEXT: }
diff --git a/test/TableGen/ForeachList.td b/test/TableGen/ForeachList.td
new file mode 100644
index 000000000000..99b7e14c2d5f
--- /dev/null
+++ b/test/TableGen/ForeachList.td
@@ -0,0 +1,76 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Register<string name, int idx> {
+  string Name = name;
+  int Index = idx;
+}
+
+foreach i = [0, 1, 2, 3, 4, 5, 6, 7] in {
+  def R#i : Register<"R"#i, i>;
+  def F#i : Register<"F"#i, i>;
+}
+
+// CHECK: def F0
+// CHECK: string Name = "F0";
+// CHECK: int Index = 0;
+
+// CHECK: def F1
+// CHECK: string Name = "F1";
+// CHECK: int Index = 1;
+
+// CHECK: def F2
+// CHECK: string Name = "F2";
+// CHECK: int Index = 2;
+
+// CHECK: def F3
+// CHECK: string Name = "F3";
+// CHECK: int Index = 3;
+
+// CHECK: def F4
+// CHECK: string Name = "F4";
+// CHECK: int Index = 4;
+
+// CHECK: def F5
+// CHECK: string Name = "F5";
+// CHECK: int Index = 5;
+
+// CHECK: def F6
+// CHECK: string Name = "F6";
+// CHECK: int Index = 6;
+
+// CHECK: def F7
+// CHECK: string Name = "F7";
+// CHECK: int Index = 7;
+
+// CHECK: def R0
+// CHECK: string Name = "R0";
+// CHECK: int Index = 0;
+
+// CHECK: def R1
+// CHECK: string Name = "R1";
+// CHECK: int Index = 1;
+
+// CHECK: def R2
+// CHECK: string Name = "R2";
+// CHECK: int Index = 2;
+
+// CHECK: def R3
+// CHECK: string Name = "R3";
+// CHECK: int Index = 3;
+
+// CHECK: def R4
+// CHECK: string Name = "R4";
+// CHECK: int Index = 4;
+
+// CHECK: def R5
+// CHECK: string Name = "R5";
+// CHECK: int Index = 5;
+
+// CHECK: def R6
+// CHECK: string Name = "R6";
+// CHECK: int Index = 6;
+
+// CHECK: def R7
+// CHECK: string Name = "R7";
+// CHECK: int Index = 7;
diff --git a/test/TableGen/ForeachLoop.td b/test/TableGen/ForeachLoop.td
new file mode 100644
index 000000000000..e2defe9cfea6
--- /dev/null
+++ b/test/TableGen/ForeachLoop.td
@@ -0,0 +1,43 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Register<string name, int idx> {
+  string Name = name;
+  int Index = idx;
+}
+
+foreach i = [0, 1, 2, 3, 4, 5, 6, 7] in
+  def R#i : Register<"R"#i, i>;
+
+
+// CHECK: def R0
+// CHECK: string Name = "R0";
+// CHECK: int Index = 0;
+
+// CHECK: def R1
+// CHECK: string Name = "R1";
+// CHECK: int Index = 1;
+
+// CHECK: def R2
+// CHECK: string Name = "R2";
+// CHECK: int Index = 2;
+
+// CHECK: def R3
+// CHECK: string Name = "R3";
+// CHECK: int Index = 3;
+
+// CHECK: def R4
+// CHECK: string Name = "R4";
+// CHECK: int Index = 4;
+
+// CHECK: def R5
+// CHECK: string Name = "R5";
+// CHECK: int Index = 5;
+
+// CHECK: def R6
+// CHECK: string Name = "R6";
+// CHECK: int Index = 6;
+
+// CHECK: def R7
+// CHECK: string Name = "R7";
+// CHECK: int Index = 7;
diff --git a/test/TableGen/NestedForeach.td b/test/TableGen/NestedForeach.td
new file mode 100644
index 000000000000..e8c16f720d0e
--- /dev/null
+++ b/test/TableGen/NestedForeach.td
@@ -0,0 +1,74 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Droid<string series, int release, string model, int patchlevel> {
+  string Series = series;
+  int Release = release;
+  string Model = model;
+  int Patchlevel = patchlevel;
+}
+
+foreach S = ["R", "C"] in {
+  foreach R = [2, 3, 4] in {
+    foreach M = ["D", "P", "Q"] in {
+      foreach P = [0, 2, 4] in {
+        def S#R#M#P : Droid<S, R, M, P>;
+      }
+    }
+  }
+}
+
+// CHECK: def C2D0
+// CHECK: def C2D2
+// CHECK: def C2D4
+// CHECK: def C2P0
+// CHECK: def C2P2
+// CHECK: def C2P4
+// CHECK: def C2Q0
+// CHECK: def C2Q2
+// CHECK: def C2Q4
+// CHECK: def C3D0
+// CHECK: def C3D2
+// CHECK: def C3D4
+// CHECK: def C3P0
+// CHECK: def C3P2
+// CHECK: def C3P4
+// CHECK: def C3Q0
+// CHECK: def C3Q2
+// CHECK: def C3Q4
+// CHECK: def C4D0
+// CHECK: def C4D2
+// CHECK: def C4D4
+// CHECK: def C4P0
+// CHECK: def C4P2
+// CHECK: def C4P4
+// CHECK: def C4Q0
+// CHECK: def C4Q2
+// CHECK: def C4Q4
+// CHECK: def R2D0
+// CHECK: def R2D2
+// CHECK: def R2D4
+// CHECK: def R2P0
+// CHECK: def R2P2
+// CHECK: def R2P4
+// CHECK: def R2Q0
+// CHECK: def R2Q2
+// CHECK: def R2Q4
+// CHECK: def R3D0
+// CHECK: def R3D2
+// CHECK: def R3D4
+// CHECK: def R3P0
+// CHECK: def R3P2
+// CHECK: def R3P4
+// CHECK: def R3Q0
+// CHECK: def R3Q2
+// CHECK: def R3Q4
+// CHECK: def R4D0
+// CHECK: def R4D2
+// CHECK: def R4D4
+// CHECK: def R4P0
+// CHECK: def R4P2
+// CHECK: def R4P4
+// CHECK: def R4Q0
+// CHECK: def R4Q2
+// CHECK: def R4Q4
diff --git a/test/TableGen/Paste.td b/test/TableGen/Paste.td
new file mode 100644
index 000000000000..a7e2a5b318ba
--- /dev/null
+++ b/test/TableGen/Paste.td
@@ -0,0 +1,36 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Instr<int i> {
+  int index = i;
+}
+
+multiclass Test {
+  def Vx#NAME#PS : Instr<0>;
+  def Vx#NAME#PD : Instr<1>;
+  def Vy#NAME#PS : Instr<2>;
+  def Vy#NAME#PD : Instr<3>;
+}
+
+defm ADD : Test;
+defm SUB : Test;
+
+// CHECK: VxADDPD
+// CHECK: index = 1;
+// CHECK: VxADDPS
+// CHECK: index = 0;
+
+// CHECK: VxSUBPD
+// CHECK: index = 1;
+// CHECK: VxSUBPS
+// CHECK: index = 0;
+
+// CHECK: VyADDPD
+// CHECK: index = 3;
+// CHECK: VyADDPS
+// CHECK: index = 2;
+
+// CHECK: VySUBPD
+// CHECK: index = 3;
+// CHECK: VySUBPS
+// CHECK: index = 2;
diff --git a/test/TableGen/SetTheory.td b/test/TableGen/SetTheory.td
index a4acea907d80..4d85aa3e6f22 100644
--- a/test/TableGen/SetTheory.td
+++ b/test/TableGen/SetTheory.td
@@ -165,3 +165,10 @@ def S9d : Set<(sequence "S%ua", 7, 9)>;
 // CHECK: S9b = [ e7 e6 e5 e4 e3 ]
 // CHECK: S9c = [ e0 ]
 // CHECK: S9d = [ a b c d e0 e3 e6 e9 e4 e5 e7 ]
+
+// The 'interleave' operator is almost the inverse of 'decimate'.
+def interleave;
+def T0a : Set<(interleave S9a, S9b)>;
+def T0b : Set<(interleave S8e, S8d)>;
+// CHECK: T0a = [ e3 e7 e4 e6 e5 ]
+// CHECK: T0b = [ e0 e1 e2 e3 e4 e5 e6 e7 e8 e9 ]
diff --git a/test/TableGen/SiblingForeach.td b/test/TableGen/SiblingForeach.td
new file mode 100644
index 000000000000..a11f6f87b427
--- /dev/null
+++ b/test/TableGen/SiblingForeach.td
@@ -0,0 +1,277 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Set<int i = 0, int j = 0, int k = 0> {
+  int I = i;
+  int J = j;
+  int K = k;
+}
+
+foreach i = [1, 2, 3] in {
+  def I1_#i : Set<i>;
+  foreach j = [1, 2, 3] in {
+    def I1_#i#_J1_#j : Set<i, j>;
+  }
+  def I2_#i : Set<i>;
+  foreach j = [4, 5, 6] in {
+    foreach k = [1, 2, 3] in {
+      def I3_#i#_J2_#j#_K1_#k : Set<i, j, k>;
+    }
+    def I4_#i#_J3_#j : Set<i, j>;
+  }
+}
+
+// CHECK: def I1_1
+// CHECK: int I = 1;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I1_1_J1_1
+// CHECK: int I = 1;
+// CHECK: int J = 1;
+// CHECK: int K = 0;
+
+// CHECK: def I1_1_J1_2
+// CHECK: int I = 1;
+// CHECK: int J = 2;
+// CHECK: int K = 0;
+
+// CHECK: def I1_1_J1_3
+// CHECK: int I = 1;
+// CHECK: int J = 3;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2
+// CHECK: int I = 2;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2_J1_1
+// CHECK: int I = 2;
+// CHECK: int J = 1;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2_J1_2
+// CHECK: int I = 2;
+// CHECK: int J = 2;
+// CHECK: int K = 0;
+
+// CHECK: def I1_2_J1_3
+// CHECK: int I = 2;
+// CHECK: int J = 3;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3
+// CHECK: int I = 3;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3_J1_1
+// CHECK: int I = 3;
+// CHECK: int J = 1;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3_J1_2
+// CHECK: int I = 3;
+// CHECK: int J = 2;
+// CHECK: int K = 0;
+
+// CHECK: def I1_3_J1_3
+// CHECK: int I = 3;
+// CHECK: int J = 3;
+// CHECK: int K = 0;
+
+// CHECK: def I2_1
+// CHECK: int I = 1;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I2_2
+// CHECK: int I = 2;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I2_3
+// CHECK: int I = 3;
+// CHECK: int J = 0;
+// CHECK: int K = 0;
+
+// CHECK: def I3_1_J2_4_K1_1
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 1;
+
+// CHECK: def I3_1_J2_4_K1_2
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 2;
+
+// CHECK: def I3_1_J2_4_K1_3
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 3;
+
+// CHECK: def I3_1_J2_5_K1_1
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 1;
+
+// CHECK: def I3_1_J2_5_K1_2
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 2;
+
+// CHECK: def I3_1_J2_5_K1_3
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 3;
+
+// CHECK: def I3_1_J2_6_K1_1
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 1;
+
+// CHECK: def I3_1_J2_6_K1_2
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 2;
+
+// CHECK: def I3_1_J2_6_K1_3
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 3;
+
+// CHECK: def I3_2_J2_4_K1_1
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 1;
+
+// CHECK: def I3_2_J2_4_K1_2
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 2;
+
+// CHECK: def I3_2_J2_4_K1_3
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 3;
+
+// CHECK: def I3_2_J2_5_K1_1
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 1;
+
+// CHECK: def I3_2_J2_5_K1_2
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 2;
+
+// CHECK: def I3_2_J2_5_K1_3
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 3;
+
+// CHECK: def I3_2_J2_6_K1_1
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 1;
+
+// CHECK: def I3_2_J2_6_K1_2
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 2;
+
+// CHECK: def I3_2_J2_6_K1_3
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 3;
+
+// CHECK: def I3_3_J2_4_K1_1
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 1;
+
+// CHECK: def I3_3_J2_4_K1_2
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 2;
+
+// CHECK: def I3_3_J2_4_K1_3
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 3;
+
+// CHECK: def I3_3_J2_5_K1_1
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 1;
+
+// CHECK: def I3_3_J2_5_K1_2
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 2;
+
+// CHECK: def I3_3_J2_5_K1_3
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 3;
+
+// CHECK: def I3_3_J2_6_K1_1
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 1;
+
+// CHECK: def I3_3_J2_6_K1_2
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 2;
+
+// CHECK: def I3_3_J2_6_K1_3
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 3;
+
+// CHECK: def I4_1_J3_4
+// CHECK: int I = 1;
+// CHECK: int J = 4;
+// CHECK: int K = 0;
+
+// CHECK: def I4_1_J3_5
+// CHECK: int I = 1;
+// CHECK: int J = 5;
+// CHECK: int K = 0;
+
+// CHECK: def I4_1_J3_6
+// CHECK: int I = 1;
+// CHECK: int J = 6;
+// CHECK: int K = 0;
+
+// CHECK: def I4_2_J3_4
+// CHECK: int I = 2;
+// CHECK: int J = 4;
+// CHECK: int K = 0;
+
+// CHECK: def I4_2_J3_5
+// CHECK: int I = 2;
+// CHECK: int J = 5;
+// CHECK: int K = 0;
+
+// CHECK: def I4_2_J3_6
+// CHECK: int I = 2;
+// CHECK: int J = 6;
+// CHECK: int K = 0;
+
+// CHECK: def I4_3_J3_4
+// CHECK: int I = 3;
+// CHECK: int J = 4;
+// CHECK: int K = 0;
+
+// CHECK: def I4_3_J3_5
+// CHECK: int I = 3;
+// CHECK: int J = 5;
+// CHECK: int K = 0;
+
+// CHECK: def I4_3_J3_6
+// CHECK: int I = 3;
+// CHECK: int J = 6;
+// CHECK: int K = 0;
diff --git a/test/TableGen/TwoLevelName.td b/test/TableGen/TwoLevelName.td
new file mode 100644
index 000000000000..9c502f475507
--- /dev/null
+++ b/test/TableGen/TwoLevelName.td
@@ -0,0 +1,46 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Type<string name, int length, int width> {
+  string Name = name;
+  int Length = length;
+  int Width = width;
+}
+
+multiclass OT1<string ss, int l, int w> {
+  def _#NAME# : Type<ss, l, w>;
+}
+multiclass OT2<string ss, int w> {
+  defm  v1#NAME# : OT1<!strconcat( "v1", ss),  1, w>;
+  defm  v2#NAME# : OT1<!strconcat( "v2", ss),  2, w>;
+  defm  v3#NAME# : OT1<!strconcat( "v3", ss),  3, w>;
+  defm  v4#NAME# : OT1<!strconcat( "v4", ss),  4, w>;
+  defm  v8#NAME# : OT1<!strconcat( "v8", ss),  8, w>;
+  defm v16#NAME# : OT1<!strconcat("v16", ss), 16, w>;
+}
+
+defm i8 : OT2<"i8", 8>;
+
+// CHECK: _v16i8
+// CHECK: Length = 16
+// CHECK: Width = 8
+
+// CHECK: _v1i8
+// CHECK: Length = 1
+// CHECK: Width = 8
+
+// CHECK: _v2i8
+// CHECK: Length = 2
+// CHECK: Width = 8
+
+// CHECK: _v3i8
+// CHECK: Length = 3
+// CHECK: Width = 8
+
+// CHECK: _v4i8
+// CHECK: Length = 4
+// CHECK: Width = 8
+
+// CHECK: _v8i8
+// CHECK: Length = 8
+// CHECK: Width = 8
diff --git a/test/TableGen/dg.exp b/test/TableGen/dg.exp
deleted file mode 100644
index f7d275ad8cb1..000000000000
--- a/test/TableGen/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{td}]]
diff --git a/test/TableGen/foreach.td b/test/TableGen/foreach.td
index cbcade921b91..814ae6ef9363 100644
--- a/test/TableGen/foreach.td
+++ b/test/TableGen/foreach.td
@@ -1,6 +1,6 @@
 // RUN: llvm-tblgen %s | grep {Jr} | count 2
 // RUN: llvm-tblgen %s | grep {Sr} | count 2
-// RUN: llvm-tblgen %s | grep {NAME} | count 1
+// RUN: llvm-tblgen %s | grep {"NAME"} | count 1
 // XFAIL: vg_leak
 
 // Variables for foreach
diff --git a/test/TableGen/lit.local.cfg b/test/TableGen/lit.local.cfg
new file mode 100644
index 000000000000..9a4a0144f720
--- /dev/null
+++ b/test/TableGen/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.td']
diff --git a/test/Transforms/ADCE/dg.exp b/test/Transforms/ADCE/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/ADCE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ADCE/lit.local.cfg b/test/Transforms/ADCE/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/ADCE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ArgumentPromotion/dg.exp b/test/Transforms/ArgumentPromotion/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/ArgumentPromotion/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ArgumentPromotion/lit.local.cfg b/test/Transforms/ArgumentPromotion/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll
new file mode 100644
index 000000000000..32a91ceee007
--- /dev/null
+++ b/test/Transforms/BBVectorize/cycle.ll
@@ -0,0 +1,112 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; This test checks the non-trivial pairing-induced cycle avoidance. Without this cycle avoidance, the algorithm would otherwise
+; want to select the pairs:
+; %div77 = fdiv double %sub74, %mul76.v.r1 <->   %div125 = fdiv double %mul121, %mul76.v.r2 (div125 depends on mul117)
+; %add84 = fadd double %sub83, 2.000000e+00 <->   %add127 = fadd double %mul126, 1.000000e+00 (add127 depends on div77)
+; %mul95 = fmul double %sub45.v.r1, %sub36.v.r1 <->   %mul88 = fmul double %sub36.v.r1, %sub87 (mul88 depends on add84)
+; %mul117 = fmul double %sub39.v.r1, %sub116 <->   %mul97 = fmul double %mul96, %sub39.v.r1 (mul97 depends on mul95)
+; and so a dependency cycle would be created.
+
+declare double @fabs(double) nounwind readnone
+define void @test1(double %a, double %b, double %c, double %add80, double %mul1, double %mul2.v.r1, double %mul73, double %sub, double %sub65, double %F.0, i32 %n.0, double %Bnm3.0, double %Bnm2.0, double %Bnm1.0, double %Anm3.0, double %Anm2.0, double %Anm1.0) {
+entry:
+  br label %go
+go:
+  %conv = sitofp i32 %n.0 to double
+  %add35 = fadd double %conv, %a
+  %sub36 = fadd double %add35, -1.000000e+00
+  %add38 = fadd double %conv, %b
+  %sub39 = fadd double %add38, -1.000000e+00
+  %add41 = fadd double %conv, %c
+  %sub42 = fadd double %add41, -1.000000e+00
+  %sub45 = fadd double %add35, -2.000000e+00
+  %sub48 = fadd double %add38, -2.000000e+00
+  %sub51 = fadd double %add41, -2.000000e+00
+  %mul52 = shl nsw i32 %n.0, 1
+  %sub53 = add nsw i32 %mul52, -1
+  %conv54 = sitofp i32 %sub53 to double
+  %sub56 = add nsw i32 %mul52, -3
+  %conv57 = sitofp i32 %sub56 to double
+  %sub59 = add nsw i32 %mul52, -5
+  %conv60 = sitofp i32 %sub59 to double
+  %mul61 = mul nsw i32 %n.0, %n.0
+  %conv62 = sitofp i32 %mul61 to double
+  %mul63 = fmul double %conv62, 3.000000e+00
+  %mul67 = fmul double %sub65, %conv
+  %add68 = fadd double %mul63, %mul67
+  %add69 = fadd double %add68, 2.000000e+00
+  %sub71 = fsub double %add69, %mul2.v.r1
+  %sub74 = fsub double %sub71, %mul73
+  %mul75 = fmul double %conv57, 2.000000e+00
+  %mul76 = fmul double %mul75, %sub42
+  %div77 = fdiv double %sub74, %mul76
+  %mul82 = fmul double %add80, %conv
+  %sub83 = fsub double %mul63, %mul82
+  %add84 = fadd double %sub83, 2.000000e+00
+  %sub86 = fsub double %add84, %mul2.v.r1
+  %sub87 = fsub double -0.000000e+00, %sub86
+  %mul88 = fmul double %sub36, %sub87
+  %mul89 = fmul double %mul88, %sub39
+  %mul90 = fmul double %conv54, 4.000000e+00
+  %mul91 = fmul double %mul90, %conv57
+  %mul92 = fmul double %mul91, %sub51
+  %mul93 = fmul double %mul92, %sub42
+  %div94 = fdiv double %mul89, %mul93
+  %mul95 = fmul double %sub45, %sub36
+  %mul96 = fmul double %mul95, %sub48
+  %mul97 = fmul double %mul96, %sub39
+  %sub99 = fsub double %conv, %a
+  %sub100 = fadd double %sub99, -2.000000e+00
+  %mul101 = fmul double %mul97, %sub100
+  %sub103 = fsub double %conv, %b
+  %sub104 = fadd double %sub103, -2.000000e+00
+  %mul105 = fmul double %mul101, %sub104
+  %mul106 = fmul double %conv57, 8.000000e+00
+  %mul107 = fmul double %mul106, %conv57
+  %mul108 = fmul double %mul107, %conv60
+  %sub111 = fadd double %add41, -3.000000e+00
+  %mul112 = fmul double %mul108, %sub111
+  %mul113 = fmul double %mul112, %sub51
+  %mul114 = fmul double %mul113, %sub42
+  %div115 = fdiv double %mul105, %mul114
+  %sub116 = fsub double -0.000000e+00, %sub36
+  %mul117 = fmul double %sub39, %sub116
+  %sub119 = fsub double %conv, %c
+  %sub120 = fadd double %sub119, -1.000000e+00
+  %mul121 = fmul double %mul117, %sub120
+  %mul123 = fmul double %mul75, %sub51
+  %mul124 = fmul double %mul123, %sub42
+  %div125 = fdiv double %mul121, %mul124
+  %mul126 = fmul double %div77, %sub
+  %add127 = fadd double %mul126, 1.000000e+00
+  %mul128 = fmul double %add127, %Anm1.0
+  %mul129 = fmul double %div94, %sub
+  %add130 = fadd double %div125, %mul129
+  %mul131 = fmul double %add130, %sub
+  %mul132 = fmul double %mul131, %Anm2.0
+  %add133 = fadd double %mul128, %mul132
+  %mul134 = fmul double %div115, %mul1
+  %mul135 = fmul double %mul134, %Anm3.0
+  %add136 = fadd double %add133, %mul135
+  %mul139 = fmul double %add127, %Bnm1.0
+  %mul143 = fmul double %mul131, %Bnm2.0
+  %add144 = fadd double %mul139, %mul143
+  %mul146 = fmul double %mul134, %Bnm3.0
+  %add147 = fadd double %add144, %mul146
+  %div148 = fdiv double %add136, %add147
+  %sub149 = fsub double %F.0, %div148
+  %div150 = fdiv double %sub149, %F.0
+  %call = tail call double @fabs(double %div150) nounwind readnone
+  %cmp = fcmp olt double %call, 0x3CB0000000000000
+  %cmp152 = icmp sgt i32 %n.0, 20000
+  %or.cond = or i1 %cmp, %cmp152
+  br i1 %or.cond, label %done, label %go
+done:
+  ret void
+; CHECK: @test1
+; CHECK: go:
+; CHECK-NEXT: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
+; FIXME: When tree pruning is deterministic, include the entire output.
+}
diff --git a/test/Transforms/BBVectorize/func-alias.ll b/test/Transforms/BBVectorize/func-alias.ll
new file mode 100644
index 000000000000..9d0cc07c1593
--- /dev/null
+++ b/test/Transforms/BBVectorize/func-alias.ll
@@ -0,0 +1,244 @@
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -basicaa -bb-vectorize -bb-vectorize-req-chain-depth=2 -instcombine -gvn -S | FileCheck %s
+; The chain length is set to 2 so that this will do some vectorization; check that the order of the functions is unchanged.
+
+%struct.descriptor_dimension = type { i64, i64, i64 }
+%struct.__st_parameter_common = type { i32, i32, i8*, i32, i32, i8*, i32* }
+%struct.__st_parameter_dt = type { %struct.__st_parameter_common, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] }
+%"struct.array4_real(kind=4)" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+%"struct.array4_integer(kind=4).73" = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+%struct.array4_unknown = type { i8*, i64, i64, [4 x %struct.descriptor_dimension] }
+
+@.cst4 = external unnamed_addr constant [11 x i8], align 8
+@.cst823 = external unnamed_addr constant [214 x i8], align 64
+@j.4580 = external global i32
+@j1.4581 = external global i32
+@nty1.4590 = external global [2 x i8]
+@nty2.4591 = external global [2 x i8]
+@xr1.4592 = external global float
+@xr2.4593 = external global float
+@yr1.4594 = external global float
+@yr2.4595 = external global float
+
+@__main1_MOD_iave = external unnamed_addr global i32
+@__main1_MOD_igrp = external global i32
+@__main1_MOD_iounit = external global i32
+@__main1_MOD_ityp = external global i32
+@__main1_MOD_mclmsg = external unnamed_addr global %struct.array4_unknown, align 32
+@__main1_MOD_mxdate = external unnamed_addr global %"struct.array4_integer(kind=4).73", align 32
+@__main1_MOD_rmxval = external unnamed_addr global %"struct.array4_real(kind=4)", align 32
+
+declare void @_gfortran_st_write(%struct.__st_parameter_dt*)
+declare void @_gfortran_st_write_done(%struct.__st_parameter_dt*)
+declare void @_gfortran_transfer_character_write(%struct.__st_parameter_dt*, i8*, i32)
+declare void @_gfortran_transfer_integer_write(%struct.__st_parameter_dt*, i8*, i32)
+declare void @_gfortran_transfer_real_write(%struct.__st_parameter_dt*, i8*, i32)
+
+define i1 @"prtmax__<bb 3>_<bb 34>"(%struct.__st_parameter_dt* %memtmp3, i32 %D.4627_188.reload) nounwind {
+; CHECK: prtmax__
+newFuncRoot:
+  br label %"<bb 34>"
+
+codeRepl80.exitStub:                              ; preds = %"<bb 34>"
+  ret i1 true
+
+"<bb 34>.<bb 25>_crit_edge.exitStub":             ; preds = %"<bb 34>"
+  ret i1 false
+
+"<bb 34>":                                        ; preds = %newFuncRoot
+  %tmp128 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp129 = getelementptr inbounds %struct.__st_parameter_common* %tmp128, i32 0, i32 2
+  store i8* getelementptr inbounds ([11 x i8]* @.cst4, i64 0, i64 0), i8** %tmp129, align 8
+  %tmp130 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp131 = getelementptr inbounds %struct.__st_parameter_common* %tmp130, i32 0, i32 3
+  store i32 31495, i32* %tmp131, align 4
+  %tmp132 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 5
+  store i8* getelementptr inbounds ([214 x i8]* @.cst823, i64 0, i64 0), i8** %tmp132, align 8
+  %tmp133 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 6
+  store i32 214, i32* %tmp133, align 4
+  %tmp134 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp135 = getelementptr inbounds %struct.__st_parameter_common* %tmp134, i32 0, i32 0
+  store i32 4096, i32* %tmp135, align 4
+  %iounit.8748_288 = load i32* @__main1_MOD_iounit, align 4
+  %tmp136 = getelementptr inbounds %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
+  %tmp137 = getelementptr inbounds %struct.__st_parameter_common* %tmp136, i32 0, i32 1
+  store i32 %iounit.8748_288, i32* %tmp137, align 4
+  call void @_gfortran_st_write(%struct.__st_parameter_dt* %memtmp3) nounwind
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j.4580, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  %D.75807_289 = load i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
+  %j.8758_290 = load i32* @j.4580, align 4
+  %D.75760_291 = sext i32 %j.8758_290 to i64
+  %iave.8736_292 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_293 = sext i32 %iave.8736_292 to i64
+  %D.75808_294 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75809_295 = mul nsw i64 %D.75620_293, %D.75808_294
+  %igrp.8737_296 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_297 = sext i32 %igrp.8737_296 to i64
+  %D.75810_298 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75811_299 = mul nsw i64 %D.75635_297, %D.75810_298
+  %D.75812_300 = add nsw i64 %D.75809_295, %D.75811_299
+  %D.75813_301 = add nsw i64 %D.75760_291, %D.75812_300
+  %ityp.8750_302 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_303 = sext i32 %ityp.8750_302 to i64
+  %D.75814_304 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75815_305 = mul nsw i64 %D.75704_303, %D.75814_304
+  %D.75816_306 = add nsw i64 %D.75813_301, %D.75815_305
+  %D.75817_307 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
+  %D.75818_308 = add nsw i64 %D.75816_306, %D.75817_307
+  %tmp138 = bitcast i8* %D.75807_289 to [0 x float]*
+  %tmp139 = bitcast [0 x float]* %tmp138 to float*
+  %D.75819_309 = getelementptr inbounds float* %tmp139, i64 %D.75818_308
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75819_309, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  %D.75820_310 = load i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
+  %j.8758_311 = load i32* @j.4580, align 4
+  %D.75760_312 = sext i32 %j.8758_311 to i64
+  %iave.8736_313 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_314 = sext i32 %iave.8736_313 to i64
+  %D.75821_315 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75822_316 = mul nsw i64 %D.75620_314, %D.75821_315
+  %igrp.8737_317 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_318 = sext i32 %igrp.8737_317 to i64
+  %D.75823_319 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75824_320 = mul nsw i64 %D.75635_318, %D.75823_319
+  %D.75825_321 = add nsw i64 %D.75822_316, %D.75824_320
+  %D.75826_322 = add nsw i64 %D.75760_312, %D.75825_321
+  %ityp.8750_323 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_324 = sext i32 %ityp.8750_323 to i64
+  %D.75827_325 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75828_326 = mul nsw i64 %D.75704_324, %D.75827_325
+  %D.75829_327 = add nsw i64 %D.75826_322, %D.75828_326
+  %D.75830_328 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
+  %D.75831_329 = add nsw i64 %D.75829_327, %D.75830_328
+  %tmp140 = bitcast i8* %D.75820_310 to [0 x [1 x i8]]*
+  %tmp141 = bitcast [0 x [1 x i8]]* %tmp140 to [1 x i8]*
+  %D.75832_330 = getelementptr inbounds [1 x i8]* %tmp141, i64 %D.75831_329
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75832_330, i32 1) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  %D.75833_331 = load i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
+  %j.8758_332 = load i32* @j.4580, align 4
+  %D.75760_333 = sext i32 %j.8758_332 to i64
+  %iave.8736_334 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_335 = sext i32 %iave.8736_334 to i64
+  %D.75834_336 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75835_337 = mul nsw i64 %D.75620_335, %D.75834_336
+  %igrp.8737_338 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_339 = sext i32 %igrp.8737_338 to i64
+  %D.75836_340 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75837_341 = mul nsw i64 %D.75635_339, %D.75836_340
+  %D.75838_342 = add nsw i64 %D.75835_337, %D.75837_341
+  %D.75839_343 = add nsw i64 %D.75760_333, %D.75838_342
+  %ityp.8750_344 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_345 = sext i32 %ityp.8750_344 to i64
+  %D.75840_346 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75841_347 = mul nsw i64 %D.75704_345, %D.75840_346
+  %D.75842_348 = add nsw i64 %D.75839_343, %D.75841_347
+  %D.75843_349 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
+  %D.75844_350 = add nsw i64 %D.75842_348, %D.75843_349
+  %tmp142 = bitcast i8* %D.75833_331 to [0 x i32]*
+  %tmp143 = bitcast [0 x i32]* %tmp142 to i32*
+  %D.75845_351 = getelementptr inbounds i32* %tmp143, i64 %D.75844_350
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75845_351, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr1.4592, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr1.4594, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty1.4590, i32 2) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j1.4581, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  %D.75807_352 = load i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
+  %j1.8760_353 = load i32* @j1.4581, align 4
+  %D.75773_354 = sext i32 %j1.8760_353 to i64
+  %iave.8736_355 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_356 = sext i32 %iave.8736_355 to i64
+  %D.75808_357 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75809_358 = mul nsw i64 %D.75620_356, %D.75808_357
+  %igrp.8737_359 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_360 = sext i32 %igrp.8737_359 to i64
+  %D.75810_361 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75811_362 = mul nsw i64 %D.75635_360, %D.75810_361
+  %D.75812_363 = add nsw i64 %D.75809_358, %D.75811_362
+  %D.75846_364 = add nsw i64 %D.75773_354, %D.75812_363
+  %ityp.8750_365 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_366 = sext i32 %ityp.8750_365 to i64
+  %D.75814_367 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75815_368 = mul nsw i64 %D.75704_366, %D.75814_367
+  %D.75847_369 = add nsw i64 %D.75846_364, %D.75815_368
+  %D.75817_370 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
+  %D.75848_371 = add nsw i64 %D.75847_369, %D.75817_370
+  %tmp144 = bitcast i8* %D.75807_352 to [0 x float]*
+  %tmp145 = bitcast [0 x float]* %tmp144 to float*
+  %D.75849_372 = getelementptr inbounds float* %tmp145, i64 %D.75848_371
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75849_372, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  %D.75820_373 = load i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
+  %j1.8760_374 = load i32* @j1.4581, align 4
+  %D.75773_375 = sext i32 %j1.8760_374 to i64
+  %iave.8736_376 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_377 = sext i32 %iave.8736_376 to i64
+  %D.75821_378 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75822_379 = mul nsw i64 %D.75620_377, %D.75821_378
+  %igrp.8737_380 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_381 = sext i32 %igrp.8737_380 to i64
+  %D.75823_382 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75824_383 = mul nsw i64 %D.75635_381, %D.75823_382
+  %D.75825_384 = add nsw i64 %D.75822_379, %D.75824_383
+  %D.75850_385 = add nsw i64 %D.75773_375, %D.75825_384
+  %ityp.8750_386 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_387 = sext i32 %ityp.8750_386 to i64
+  %D.75827_388 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75828_389 = mul nsw i64 %D.75704_387, %D.75827_388
+  %D.75851_390 = add nsw i64 %D.75850_385, %D.75828_389
+  %D.75830_391 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
+  %D.75852_392 = add nsw i64 %D.75851_390, %D.75830_391
+  %tmp146 = bitcast i8* %D.75820_373 to [0 x [1 x i8]]*
+  %tmp147 = bitcast [0 x [1 x i8]]* %tmp146 to [1 x i8]*
+  %D.75853_393 = getelementptr inbounds [1 x i8]* %tmp147, i64 %D.75852_392
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75853_393, i32 1) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  %D.75833_394 = load i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
+  %j1.8760_395 = load i32* @j1.4581, align 4
+  %D.75773_396 = sext i32 %j1.8760_395 to i64
+  %iave.8736_397 = load i32* @__main1_MOD_iave, align 4
+  %D.75620_398 = sext i32 %iave.8736_397 to i64
+  %D.75834_399 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75835_400 = mul nsw i64 %D.75620_398, %D.75834_399
+  %igrp.8737_401 = load i32* @__main1_MOD_igrp, align 4
+  %D.75635_402 = sext i32 %igrp.8737_401 to i64
+  %D.75836_403 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75837_404 = mul nsw i64 %D.75635_402, %D.75836_403
+  %D.75838_405 = add nsw i64 %D.75835_400, %D.75837_404
+  %D.75854_406 = add nsw i64 %D.75773_396, %D.75838_405
+  %ityp.8750_407 = load i32* @__main1_MOD_ityp, align 4
+  %D.75704_408 = sext i32 %ityp.8750_407 to i64
+  %D.75840_409 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75841_410 = mul nsw i64 %D.75704_408, %D.75840_409
+  %D.75855_411 = add nsw i64 %D.75854_406, %D.75841_410
+  %D.75843_412 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
+  %D.75856_413 = add nsw i64 %D.75855_411, %D.75843_412
+  %tmp148 = bitcast i8* %D.75833_394 to [0 x i32]*
+  %tmp149 = bitcast [0 x i32]* %tmp148 to i32*
+  %D.75857_414 = getelementptr inbounds i32* %tmp149, i64 %D.75856_413
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* %D.75857_414, i32 4) nounwind
+; CHECK: @_gfortran_transfer_integer_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @xr2.4593, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* @yr2.4595, i32 4) nounwind
+; CHECK: @_gfortran_transfer_real_write
+  call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [2 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [2 x i8]* @nty2.4591, i32 2) nounwind
+; CHECK: @_gfortran_transfer_character_write
+  call void @_gfortran_st_write_done(%struct.__st_parameter_dt* %memtmp3) nounwind
+; CHECK: @_gfortran_st_write_done
+  %j.8758_415 = load i32* @j.4580, align 4
+  %D.4634_416 = icmp eq i32 %j.8758_415, %D.4627_188.reload
+  %j.8758_417 = load i32* @j.4580, align 4
+  %j.8770_418 = add nsw i32 %j.8758_417, 1
+  store i32 %j.8770_418, i32* @j.4580, align 4
+  %tmp150 = icmp ne i1 %D.4634_416, false
+  br i1 %tmp150, label %codeRepl80.exitStub, label %"<bb 34>.<bb 25>_crit_edge.exitStub"
+}
+
diff --git a/test/Transforms/BBVectorize/ld1.ll b/test/Transforms/BBVectorize/ld1.ll
new file mode 100644
index 000000000000..cea225d076e1
--- /dev/null
+++ b/test/Transforms/BBVectorize/ld1.ll
@@ -0,0 +1,41 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %i2 = load double* %c, align 8
+  %add = fadd double %mul, %i2
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %arrayidx6 = getelementptr inbounds double* %c, i64 1
+  %i5 = load double* %arrayidx6, align 8
+  %add7 = fadd double %mul5, %i5
+  %mul9 = fmul double %add, %i1
+  %add11 = fadd double %mul9, %i2
+  %mul13 = fmul double %add7, %i4
+  %add15 = fadd double %mul13, %i5
+  %mul16 = fmul double %add11, %add15
+  ret double %mul16
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i2.v.i0 = bitcast double* %c to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %i2 = load <2 x double>* %i2.v.i0, align 8
+; CHECK: %add = fadd <2 x double> %mul, %i2
+; CHECK: %mul9 = fmul <2 x double> %add, %i1
+; CHECK: %add11 = fadd <2 x double> %mul9, %i2
+; CHECK: %add11.v.r1 = extractelement <2 x double> %add11, i32 0
+; CHECK: %add11.v.r2 = extractelement <2 x double> %add11, i32 1
+; CHECK: %mul16 = fmul double %add11.v.r1, %add11.v.r2
+; CHECK: ret double %mul16
+}
+
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/BBVectorize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
new file mode 100644
index 000000000000..bebc91ad91a0
--- /dev/null
+++ b/test/Transforms/BBVectorize/loop1.ll
@@ -0,0 +1,93 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
+; The second check covers the use of alias analysis (with loop unrolling).
+
+define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
+entry:
+  br label %for.body
+; CHECK: @test1
+; CHECK-UNRL: @test1
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8
+  %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8
+  %mul = fmul double %0, %0
+  %mul3 = fmul double %0, %1
+  %add = fadd double %mul, %mul3
+  %add4 = fadd double %1, %1
+  %add5 = fadd double %add4, %0
+  %mul6 = fmul double %0, %add5
+  %add7 = fadd double %add, %mul6
+  %mul8 = fmul double %1, %1
+  %add9 = fadd double %0, %0
+  %add10 = fadd double %add9, %0
+  %mul11 = fmul double %mul8, %add10
+  %add12 = fadd double %add7, %mul11
+  %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+  store double %add12, double* %arrayidx14, align 8
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 10
+  br i1 %exitcond, label %for.end, label %for.body
+; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK: %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+; CHECK: %0 = load double* %arrayidx, align 8
+; CHECK: %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+; CHECK: %1 = load double* %arrayidx2, align 8
+; CHECK: %mul = fmul double %0, %0
+; CHECK: %mul3 = fmul double %0, %1
+; CHECK: %add = fadd double %mul, %mul3
+; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
+; CHECK: %mul8 = fmul double %1, %1
+; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1
+; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2
+; CHECK: %add5.v.i1.1 = insertelement <2 x double> undef, double %0, i32 0
+; CHECK: %add5.v.i1.2 = insertelement <2 x double> %add5.v.i1.1, double %0, i32 1
+; CHECK: %add5 = fadd <2 x double> %add4, %add5.v.i1.2
+; CHECK: %mul6.v.i0.2 = insertelement <2 x double> %add5.v.i1.1, double %mul8, i32 1
+; CHECK: %mul6 = fmul <2 x double> %mul6.v.i0.2, %add5
+; CHECK: %mul6.v.r1 = extractelement <2 x double> %mul6, i32 0
+; CHECK: %mul6.v.r2 = extractelement <2 x double> %mul6, i32 1
+; CHECK: %add7 = fadd double %add, %mul6.v.r1
+; CHECK: %add12 = fadd double %add7, %mul6.v.r2
+; CHECK: %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+; CHECK: store double %add12, double* %arrayidx14, align 8
+; CHECK: %indvars.iv.next = add i64 %indvars.iv, 1
+; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+; CHECK: %exitcond = icmp eq i32 %lftr.wideiv, 10
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+; CHECK-UNRL: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
+; CHECK-UNRL: %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+; CHECK-UNRL: %0 = bitcast double* %arrayidx to <2 x double>*
+; CHECK-UNRL: %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+; CHECK-UNRL: %1 = bitcast double* %arrayidx2 to <2 x double>*
+; CHECK-UNRL: %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+; CHECK-UNRL: %2 = load <2 x double>* %0, align 8
+; CHECK-UNRL: %3 = load <2 x double>* %1, align 8
+; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
+; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
+; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
+; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
+; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
+; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
+; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
+; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
+; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
+; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
+; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
+; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
+; CHECK-UNRL: %4 = bitcast double* %arrayidx14 to <2 x double>*
+; CHECK-UNRL: store <2 x double> %add12, <2 x double>* %4, align 8
+; CHECK-UNRL: %indvars.iv.next.1 = add i64 %indvars.iv, 2
+; CHECK-UNRL: %lftr.wideiv.1 = trunc i64 %indvars.iv.next.1 to i32
+; CHECK-UNRL: %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 10
+; CHECK-UNRL: br i1 %exitcond.1, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/BBVectorize/mem-op-depth.ll b/test/Transforms/BBVectorize/mem-op-depth.ll
new file mode 100644
index 000000000000..84f16bd2f47d
--- /dev/null
+++ b/test/Transforms/BBVectorize/mem-op-depth.ll
@@ -0,0 +1,22 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=6 -instcombine -gvn -S | FileCheck %s
+
+@A = common global [1024 x float] zeroinitializer, align 16
+@B = common global [1024 x float] zeroinitializer, align 16
+
+define i32 @test1() nounwind {
+; CHECK: @test1
+  %V1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  %V2 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 1), align 4
+  %V3= load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 2), align 8
+  %V4 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 3), align 4
+; CHECK:   %V1 = load <4 x float>* bitcast ([1024 x float]* @A to <4 x float>*), align 16
+  store float %V1, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 0), align 16
+  store float %V2, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 1), align 4
+  store float %V3, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 2), align 8
+  store float %V4, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 3), align 4
+; CHECK-NEXT: store <4 x float> %V1, <4 x float>* bitcast ([1024 x float]* @B to <4 x float>*), align 16
+  ret i32 0
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/BBVectorize/req-depth.ll b/test/Transforms/BBVectorize/req-depth.ll
new file mode 100644
index 000000000000..8c9cc3c188e3
--- /dev/null
+++ b/test/Transforms/BBVectorize/req-depth.ll
@@ -0,0 +1,17 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 3 -S | FileCheck %s -check-prefix=CHECK-RD3
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth 2 -S | FileCheck %s -check-prefix=CHECK-RD2
+
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+	%R  = fmul double %Y1, %Y2
+	ret double %R
+; CHECK-RD3: @test1
+; CHECK-RD2: @test1
+; CHECK-RD3-NOT: <2 x double>
+; CHECK-RD2: <2 x double>
+}
+
diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll
new file mode 100644
index 000000000000..d9945b563077
--- /dev/null
+++ b/test/Transforms/BBVectorize/search-limit.ll
@@ -0,0 +1,46 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-search-limit=4 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-SL4
+
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test1
+; CHECK-SL4: @test1
+; CHECK-SL4-NOT: <2 x double>
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+        ; Here we have a dependency chain: the short search limit will not
+        ; see past this chain and so will not see the second part of the
+        ; pair to vectorize.
+        %mul41 = fmul double %Z1, %Y2
+        %sub48 = fsub double %Z1, %mul41
+        %mul62 = fmul double %Z1, %sub48
+        %sub69 = fsub double %Z1, %mul62
+        %mul83 = fmul double %Z1, %sub69
+        %sub90 = fsub double %Z1, %mul83
+        %mul104 = fmul double %Z1, %sub90
+        %sub111 = fsub double %Z1, %mul104
+        %mul125 = fmul double %Z1, %sub111
+        %sub132 = fsub double %Z1, %mul125
+        %mul146 = fmul double %Z1, %sub132
+        %sub153 = fsub double %Z1, %mul146
+        ; end of chain.
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+	%R1  = fdiv double %Z1, %Z2
+        %R   = fmul double %R1, %sub153
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R1 = fdiv double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
new file mode 100644
index 000000000000..68449771436e
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -0,0 +1,103 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+declare double @llvm.fma.f64(double, double, double)
+declare double @llvm.cos.f64(double)
+declare double @llvm.powi.f64(double, i32)
+
+; Basic depth-3 chain with fma
+define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, double %C2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.fma.f64(double %X1, double %A1, double %C1)
+	%Y2 = call double @llvm.fma.f64(double %X2, double %A2, double %C2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test1
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
+; CHECK: %Y1.v.i2.2 = insertelement <2 x double> %Y1.v.i2.1, double %C2, i32 1
+; CHECK: %Y1 = call <2 x double> @llvm.fma.v2f64(<2 x double> %X1, <2 x double> %X1.v.i0.2, <2 x double> %Y1.v.i2.2)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with cos
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.cos.f64(double %X1)
+	%Y2 = call double @llvm.cos.f64(double %X2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test2
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi
+define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test3
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P)
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain with powi (different powers: should not vectorize)
+define double @test4(double %A1, double %A2, double %B1, double %B2, i32 %P) {
+
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+        %P2 = add i32 %P, 1
+	%Y1 = call double @llvm.powi.f64(double %X1, i32 %P)
+	%Y2 = call double @llvm.powi.f64(double %X2, i32 %P2)
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+	%R  = fmul double %Z1, %Z2
+	ret double %R
+; CHECK: @test4
+; CHECK-NOT: <2 x double>
+; CHECK: ret double %R
+}
+
+; CHECK: declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) nounwind readonly
+; CHECK: declare <2 x double> @llvm.powi.v2f64(<2 x double>, i32) nounwind readonly
+
diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll
new file mode 100644
index 000000000000..a5397eeb1f96
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple-ldstr.ll
@@ -0,0 +1,110 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
+
+; Simple 3-pair chain with loads and stores
+define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test1
+; CHECK-AO-NOT: <2 x double>
+}
+
+; Simple chain with extending loads and stores
+define void @test2(float* %a, float* %b, double* %c) nounwind uwtable readonly {
+entry:
+  %i0f = load float* %a, align 4
+  %i0 = fpext float %i0f to double
+  %i1f = load float* %b, align 4
+  %i1 = fpext float %i1f to double
+  %mul = fmul double %i0, %i1
+  %arrayidx3 = getelementptr inbounds float* %a, i64 1
+  %i3f = load float* %arrayidx3, align 4
+  %i3 = fpext float %i3f to double
+  %arrayidx4 = getelementptr inbounds float* %b, i64 1
+  %i4f = load float* %arrayidx4, align 4
+  %i4 = fpext float %i4f to double
+  %mul5 = fmul double %i3, %i4
+  store double %mul, double* %c, align 8
+  %arrayidx5 = getelementptr inbounds double* %c, i64 1
+  store double %mul5, double* %arrayidx5, align 8
+  ret void
+; CHECK: @test2
+; CHECK: %i0f.v.i0 = bitcast float* %a to <2 x float>*
+; CHECK: %i1f.v.i0 = bitcast float* %b to <2 x float>*
+; CHECK: %i0f = load <2 x float>* %i0f.v.i0, align 4
+; CHECK: %i0 = fpext <2 x float> %i0f to <2 x double>
+; CHECK: %i1f = load <2 x float>* %i1f.v.i0, align 4
+; CHECK: %i1 = fpext <2 x float> %i1f to <2 x double>
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test2
+; CHECK-AO-NOT: <2 x double>
+}
+
+; Simple chain with loads and truncating stores
+define void @test3(double* %a, double* %b, float* %c) nounwind uwtable readonly {
+entry:
+  %i0 = load double* %a, align 8
+  %i1 = load double* %b, align 8
+  %mul = fmul double %i0, %i1
+  %mulf = fptrunc double %mul to float
+  %arrayidx3 = getelementptr inbounds double* %a, i64 1
+  %i3 = load double* %arrayidx3, align 8
+  %arrayidx4 = getelementptr inbounds double* %b, i64 1
+  %i4 = load double* %arrayidx4, align 8
+  %mul5 = fmul double %i3, %i4
+  %mul5f = fptrunc double %mul5 to float
+  store float %mulf, float* %c, align 8
+  %arrayidx5 = getelementptr inbounds float* %c, i64 1
+  store float %mul5f, float* %arrayidx5, align 4
+  ret void
+; CHECK: @test3
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %mulf = fptrunc <2 x double> %mul to <2 x float>
+; CHECK: %0 = bitcast float* %c to <2 x float>*
+; CHECK: store <2 x float> %mulf, <2 x float>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test3
+; CHECK-AO: %i0 = load double* %a, align 8
+; CHECK-AO: %i1 = load double* %b, align 8
+; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
+; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
+; CHECK-AO: %arrayidx3 = getelementptr inbounds double* %a, i64 1
+; CHECK-AO: %i3 = load double* %arrayidx3, align 8
+; CHECK-AO: %arrayidx4 = getelementptr inbounds double* %b, i64 1
+; CHECK-AO: %i4 = load double* %arrayidx4, align 8
+; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1
+; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1
+; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2
+; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float>
+; CHECK-AO: %0 = bitcast float* %c to <2 x float>*
+; CHECK-AO: store <2 x float> %mulf, <2 x float>* %0, align 8
+; CHECK-AO: ret void
+}
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
new file mode 100644
index 000000000000..904d766bb673
--- /dev/null
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -0,0 +1,152 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; Basic depth-3 chain
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test1
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair permuted)
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test2
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y2, %B1
+	%Z2 = fadd double %Y1, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair first splat)
+define double @test3(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test3
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y2, %B1
+	%Z2 = fadd double %Y2, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair second splat)
+define double @test4(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test4
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+	%X1 = fsub double %A1, %B1
+	%X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = fmul double %X1, %A1
+	%Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+	%Z1 = fadd double %Y1, %B1
+	%Z2 = fadd double %Y1, %B2
+; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+	%R  = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+	ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain
+define <2 x float> @test5(<2 x float> %A1, <2 x float> %A2, <2 x float> %B1, <2 x float> %B2) {
+; CHECK: @test5
+; CHECK: %X1.v.i1 = shufflevector <2 x float> %B1, <2 x float> %B2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: %X1.v.i0 = shufflevector <2 x float> %A1, <2 x float> %A2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	%X1 = fsub <2 x float> %A1, %B1
+	%X2 = fsub <2 x float> %A2, %B2
+; CHECK: %X1 = fsub <4 x float> %X1.v.i0, %X1.v.i1
+	%Y1 = fmul <2 x float> %X1, %A1
+	%Y2 = fmul <2 x float> %X2, %A2
+; CHECK: %Y1 = fmul <4 x float> %X1, %X1.v.i0
+	%Z1 = fadd <2 x float> %Y1, %B1
+	%Z2 = fadd <2 x float> %Y2, %B2
+; CHECK: %Z1 = fadd <4 x float> %Y1, %X1.v.i1
+	%R  = fmul <2 x float> %Z1, %Z2
+; CHECK: %Z1.v.r1 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+; CHECK: %Z1.v.r2 = shufflevector <4 x float> %Z1, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+; CHECK: %R = fmul <2 x float> %Z1.v.r1, %Z1.v.r2
+	ret <2 x float> %R
+; CHECK: ret <2 x float> %R
+}
+
+; Basic chain with shuffles
+define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
+; CHECK: @test6
+; CHECK: %X1.v.i1 = shufflevector <8 x i8> %B1, <8 x i8> %B2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK: %X1.v.i0 = shufflevector <8 x i8> %A1, <8 x i8> %A2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	%X1 = sub <8 x i8> %A1, %B1
+	%X2 = sub <8 x i8> %A2, %B2
+; CHECK: %X1 = sub <16 x i8> %X1.v.i0, %X1.v.i1
+	%Y1 = mul <8 x i8> %X1, %A1
+	%Y2 = mul <8 x i8> %X2, %A2
+; CHECK: %Y1 = mul <16 x i8> %X1, %X1.v.i0
+	%Z1 = add <8 x i8> %Y1, %B1
+	%Z2 = add <8 x i8> %Y2, %B2
+; CHECK: %Z1 = add <16 x i8> %Y1, %X1.v.i1
+        %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
+        %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
+; CHECK: %Z1.v.r2 = shufflevector <16 x i8> %Z1, <16 x i8> undef, <8 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 undef, i32 13, i32 undef, i32 15>
+; CHECK: %Q1.v.i1 = shufflevector <8 x i8> %Z1.v.r2, <8 x i8> undef, <16 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK: %Q1 = shufflevector <16 x i8> %Z1, <16 x i8> %Q1.v.i1, <16 x i32> <i32 23, i32 16, i32 6, i32 1, i32 21, i32 18, i32 4, i32 3, i32 14, i32 15, i32 8, i32 9, i32 10, i32 12, i32 12, i32 9>
+	%R  = mul <8 x i8> %Q1, %Q2
+; CHECK: %Q1.v.r1 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK: %Q1.v.r2 = shufflevector <16 x i8> %Q1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK: %R = mul <8 x i8> %Q1.v.r1, %Q1.v.r2
+	ret <8 x i8> %R
+; CHECK: ret <8 x i8> %R
+}
+
+
diff --git a/test/Transforms/BlockPlacement/dg.exp b/test/Transforms/BlockPlacement/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/BlockPlacement/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/BlockPlacement/lit.local.cfg b/test/Transforms/BlockPlacement/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/BlockPlacement/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeExtractor/dg.exp b/test/Transforms/CodeExtractor/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/CodeExtractor/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/CodeExtractor/lit.local.cfg b/test/Transforms/CodeExtractor/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/CodeExtractor/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CodeGenPrepare/dg.exp b/test/Transforms/CodeGenPrepare/dg.exp
deleted file mode 100644
index de42dad163fd..000000000000
--- a/test/Transforms/CodeGenPrepare/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/CodeGenPrepare/lit.local.cfg b/test/Transforms/CodeGenPrepare/lit.local.cfg
new file mode 100644
index 000000000000..c6106e4746f2
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/ConstProp/2007-11-23-cttz.ll b/test/Transforms/ConstProp/2007-11-23-cttz.ll
index 37cda303713b..a28c9b0a2f13 100644
--- a/test/Transforms/ConstProp/2007-11-23-cttz.ll
+++ b/test/Transforms/ConstProp/2007-11-23-cttz.ll
@@ -1,8 +1,8 @@
 ; RUN: opt < %s -constprop -S | grep {ret i13 13}
 ; PR1816
-declare i13 @llvm.cttz.i13(i13)
+declare i13 @llvm.cttz.i13(i13, i1)
 
 define i13 @test() {
-	%X = call i13 @llvm.cttz.i13(i13 0)
+	%X = call i13 @llvm.cttz.i13(i13 0, i1 true)
 	ret i13 %X
 }
diff --git a/test/Transforms/ConstProp/basictest.ll b/test/Transforms/ConstProp/basictest.ll
index d0d0a5bb3352..09e6e7db437a 100644
--- a/test/Transforms/ConstProp/basictest.ll
+++ b/test/Transforms/ConstProp/basictest.ll
@@ -1,5 +1,8 @@
 ; RUN: opt < %s -constprop -die -S | FileCheck %s
 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
 ; This is a basic sanity check for constant propagation.  The add instruction
 ; should be eliminated.
 define i32 @test1(i1 %B) {
@@ -40,3 +43,11 @@ define i1 @TNAN() {
   %C = or i1 %A, %B
   ret i1 %C
 }
+
+define i128 @vector_to_int_cast() {
+  %A = bitcast <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824> to i128
+  ret i128 %A
+; CHECK: @vector_to_int_cast
+; CHECK: ret i128 85070591750041656499021422275829170176
+}
+  
+\ No newline at end of file
diff --git a/test/Transforms/ConstProp/bswap.ll b/test/Transforms/ConstProp/bswap.ll
index 9fce309b7887..a68fdcd4581e 100644
--- a/test/Transforms/ConstProp/bswap.ll
+++ b/test/Transforms/ConstProp/bswap.ll
@@ -1,6 +1,6 @@
 ; bswap should be constant folded when it is passed a constant argument
 
-; RUN: opt < %s -constprop -S | not grep call
+; RUN: opt < %s -constprop -S | FileCheck %s
 
 declare i16 @llvm.bswap.i16(i16)
 
@@ -8,18 +8,34 @@ declare i32 @llvm.bswap.i32(i32)
 
 declare i64 @llvm.bswap.i64(i64)
 
+declare i80 @llvm.bswap.i80(i80)
+
+; CHECK: define i16 @W
 define i16 @W() {
+        ; CHECK: ret i16 256
         %Z = call i16 @llvm.bswap.i16( i16 1 )          ; <i16> [#uses=1]
         ret i16 %Z
 }
 
+; CHECK: define i32 @X
 define i32 @X() {
+        ; CHECK: ret i32 16777216
         %Z = call i32 @llvm.bswap.i32( i32 1 )          ; <i32> [#uses=1]
         ret i32 %Z
 }
 
+; CHECK: define i64 @Y
 define i64 @Y() {
+        ; CHECK: ret i64 72057594037927936
         %Z = call i64 @llvm.bswap.i64( i64 1 )          ; <i64> [#uses=1]
         ret i64 %Z
 }
 
+; CHECK: define i80 @Z
+define i80 @Z() {
+        ; CHECK: ret i80 -450681596205739728166896
+        ;                0xA0908070605040302010
+        %Z = call i80 @llvm.bswap.i80( i80 76151636403560493650080 )
+        ;                                  0x102030405060708090A0
+        ret i80 %Z
+}
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 3b6010a0dc32..7a405a539c51 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -constprop -S | FileCheck %s
+; RUN: opt < %s -constprop -disable-simplify-libcalls -S | FileCheck %s --check-prefix=FNOBUILTIN
 
 declare double @cos(double)
 
@@ -59,3 +60,90 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+define double @test_intrinsic_pow() nounwind uwtable ssp {
+entry:
+; CHECK: @test_intrinsic_pow
+; CHECK-NOT: call
+  %0 = call double @llvm.pow.f64(double 1.500000e+00, double 3.000000e+00)
+  ret double %0
+}
+declare double @llvm.pow.f64(double, double) nounwind readonly
+
+; Shouldn't fold because of -fno-builtin
+define double @sin_() nounwind uwtable ssp {
+; FNOBUILTIN: @sin_
+; FNOBUILTIN: %1 = call double @sin(double 3.000000e+00)
+  %1 = call double @sin(double 3.000000e+00)
+  ret double %1
+}
+
+; Shouldn't fold because of -fno-builtin
+define double @sqrt_() nounwind uwtable ssp {
+; FNOBUILTIN: @sqrt_
+; FNOBUILTIN: %1 = call double @sqrt(double 3.000000e+00)
+  %1 = call double @sqrt(double 3.000000e+00)
+  ret double %1
+}
+
+; Shouldn't fold because of -fno-builtin
+define float @sqrtf_() nounwind uwtable ssp {
+; FNOBUILTIN: @sqrtf_
+; FNOBUILTIN: %1 = call float @sqrtf(float 3.000000e+00)
+  %1 = call float @sqrtf(float 3.000000e+00)
+  ret float %1
+}
+declare float @sqrtf(float)
+
+; Shouldn't fold because of -fno-builtin
+define float @sinf_() nounwind uwtable ssp {
+; FNOBUILTIN: @sinf_
+; FNOBUILTIN: %1 = call float @sinf(float 3.000000e+00)
+  %1 = call float @sinf(float 3.000000e+00)
+  ret float %1
+}
+declare float @sinf(float)
+
+; Shouldn't fold because of -fno-builtin
+define double @tan_() nounwind uwtable ssp {
+; FNOBUILTIN: @tan_
+; FNOBUILTIN: %1 = call double @tan(double 3.000000e+00)
+  %1 = call double @tan(double 3.000000e+00)
+  ret double %1
+}
+
+; Shouldn't fold because of -fno-builtin
+define double @tanh_() nounwind uwtable ssp {
+; FNOBUILTIN: @tanh_
+; FNOBUILTIN: %1 = call double @tanh(double 3.000000e+00)
+  %1 = call double @tanh(double 3.000000e+00)
+  ret double %1
+}
+declare double @tanh(double)
+
+; Shouldn't fold because of -fno-builtin
+define double @pow_() nounwind uwtable ssp {
+; FNOBUILTIN: @pow_
+; FNOBUILTIN: %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
+  %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
+  ret double %1
+}
+declare double @pow(double, double)
+
+; Shouldn't fold because of -fno-builtin
+define double @fmod_() nounwind uwtable ssp {
+; FNOBUILTIN: @fmod_
+; FNOBUILTIN: %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
+  %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
+  ret double %1
+}
+declare double @fmod(double, double)
+
+; Shouldn't fold because of -fno-builtin
+define double @atan2_() nounwind uwtable ssp {
+; FNOBUILTIN: @atan2_
+; FNOBUILTIN: %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
+  %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
+  ret double %1
+}
+declare double @atan2(double, double)
diff --git a/test/Transforms/ConstProp/dg.exp b/test/Transforms/ConstProp/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/ConstProp/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ConstProp/lit.local.cfg b/test/Transforms/ConstProp/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/ConstProp/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ConstantMerge/dg.exp b/test/Transforms/ConstantMerge/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/ConstantMerge/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ConstantMerge/linker-private.ll b/test/Transforms/ConstantMerge/linker-private.ll
new file mode 100644
index 000000000000..eba7880e8af7
--- /dev/null
+++ b/test/Transforms/ConstantMerge/linker-private.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -constmerge -S | FileCheck %s
+; <rdar://problem/10564621>
+
+%0 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+
+; CHECK: @.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+@isLogVisible = global i8 0, align 1
+@__CFConstantStringClassReference = external global [0 x i32]
+@.str3 = linker_private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+@_unnamed_cfstring_4 = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([1 x i8]* @.str3, i32 0, i32 0), i32 0 }, section "__DATA,__cfstring"
+@null.array = weak_odr constant [1 x i8] zeroinitializer, align 1
+
+define linkonce_odr void @bar() nounwind ssp align 2 {
+entry:
+  %stack = alloca i8*, align 4
+  %call = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* null, i8* null, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_4 to %0*))
+  store i8* getelementptr inbounds ([1 x i8]* @null.array, i32 0, i32 0), i8** %stack, align 4
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
diff --git a/test/Transforms/ConstantMerge/lit.local.cfg b/test/Transforms/ConstantMerge/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/ConstantMerge/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
index 270c048e2f98..475cd8d772e6 100644
--- a/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -79,4 +79,103 @@ Impossible:
 
 LessThanOrEqualToTwo:
   ret i32 0
-}
-\ No newline at end of file
+}
+
+define i32 @switch1(i32 %s) {
+; CHECK: @switch1
+entry:
+  %cmp = icmp slt i32 %s, 0
+  br i1 %cmp, label %negative, label %out
+
+negative:
+  switch i32 %s, label %out [
+; CHECK: switch i32 %s, label %out
+    i32 0, label %out
+; CHECK-NOT: i32 0
+    i32 1, label %out
+; CHECK-NOT: i32 1
+    i32 -1, label %next
+; CHECK: i32 -1, label %next
+    i32 -2, label %next
+; CHECK: i32 -2, label %next
+    i32 2, label %out
+; CHECK-NOT: i32 2
+    i32 3, label %out
+; CHECK-NOT: i32 3
+  ]
+
+out:
+  %p = phi i32 [ 1, %entry ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ], [ -1, %negative ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %negative ], [ 0, %negative ]
+  ret i32 %q
+}
+
+define i32 @switch2(i32 %s) {
+; CHECK: @switch2
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+  switch i32 %s, label %out [
+    i32 0, label %out
+    i32 -1, label %next
+    i32 -2, label %next
+  ]
+; CHECK: br label %out
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+define i32 @switch3(i32 %s) {
+; CHECK: @switch3
+entry:
+  %cmp = icmp sgt i32 %s, 0
+  br i1 %cmp, label %positive, label %out
+
+positive:
+  switch i32 %s, label %out [
+    i32 -1, label %out
+    i32 -2, label %next
+    i32 -3, label %next
+  ]
+; CHECK: br label %out
+
+out:
+  %p = phi i32 [ -1, %entry ], [ 1, %positive ], [ 1, %positive ]
+  ret i32 %p
+
+next:
+  %q = phi i32 [ 0, %positive ], [ 0, %positive ]
+  ret i32 %q
+}
+
+define void @switch4(i32 %s) {
+; CHECK: @switch4
+entry:
+  %cmp = icmp eq i32 %s, 0
+  br i1 %cmp, label %zero, label %out
+
+zero:
+  switch i32 %s, label %out [
+    i32 0, label %next
+    i32 1, label %out
+    i32 -1, label %out
+  ]
+; CHECK: br label %next
+
+out:
+  ret void
+
+next:
+  ret void
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/dg.exp b/test/Transforms/CorrelatedValuePropagation/dg.exp
deleted file mode 100644
index de42dad163fd..000000000000
--- a/test/Transforms/CorrelatedValuePropagation/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/CorrelatedValuePropagation/lit.local.cfg b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
new file mode 100644
index 000000000000..c6106e4746f2
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll
new file mode 100644
index 000000000000..9b70ed200eda
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/range.ll
@@ -0,0 +1,43 @@
+; RUN: opt -correlated-propagation -S < %s | FileCheck %s
+
+declare i32 @foo()
+
+define i32 @test1(i32 %a) nounwind {
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp eq i32 %a, 7
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+
+; CHECK: @test1
+; CHECK: then:
+; CHECK-NEXT: br i1 false, label %end, label %else
+}
+
+define i32 @test2(i32 %a) nounwind {
+  %a.off = add i32 %a, -8
+  %cmp = icmp ult i32 %a.off, 8
+  br i1 %cmp, label %then, label %else
+
+then:
+  %dead = icmp ugt i32 %a, 15
+  br i1 %dead, label %end, label %else
+
+else:
+  ret i32 1
+
+end:
+  ret i32 2
+
+; CHECK: @test2
+; CHECK: then:
+; CHECK-NEXT: br i1 false, label %end, label %else
+}
diff --git a/test/Transforms/DeadArgElim/deadexternal.ll b/test/Transforms/DeadArgElim/deadexternal.ll
index b2d63ec77209..e3fe1bbb548b 100644
--- a/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/test/Transforms/DeadArgElim/deadexternal.ll
@@ -30,10 +30,10 @@ entry:
 define void @h() {
 entry:
   %i = alloca i32, align 4
-  volatile store i32 10, i32* %i, align 4
+  store volatile i32 10, i32* %i, align 4
 ; CHECK: %tmp = load volatile i32* %i, align 4
 ; CHECK-next: call void @f(i32 undef)
-  %tmp = volatile load i32* %i, align 4
+  %tmp = load volatile i32* %i, align 4
   call void @f(i32 %tmp)
   ret void
 }
diff --git a/test/Transforms/DeadArgElim/dg.exp b/test/Transforms/DeadArgElim/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/DeadArgElim/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/DeadArgElim/lit.local.cfg b/test/Transforms/DeadArgElim/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/DeadArgElim/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll b/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
new file mode 100644
index 000000000000..ed53eb524c20
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll
@@ -0,0 +1,95 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.vec2 = type { <4 x i32>, <4 x i32> }
+%struct.vec2plusi = type { <4 x i32>, <4 x i32>, i32 }
+
+@glob1 = global %struct.vec2 zeroinitializer, align 16
+@glob2 = global %struct.vec2plusi zeroinitializer, align 16
+
+define void @write24to28(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write24to28
+entry:
+  %arrayidx0 = getelementptr inbounds i32* %p, i64 1
+  %p3 = bitcast i32* %arrayidx0 to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 24, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @write28to32(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write28to32
+entry:
+  %p3 = bitcast i32* %p to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @dontwrite28to32memset(i32* nocapture %p) nounwind uwtable ssp {
+; CHECK: @dontwrite28to32memset
+entry:
+  %p3 = bitcast i32* %p to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 16, i1 false)
+  %arrayidx1 = getelementptr inbounds i32* %p, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+define void @write32to36(%struct.vec2plusi* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write32to36
+entry:
+  %0 = bitcast %struct.vec2plusi* %p to i8*
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 32, i32 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2plusi* @glob2 to i8*), i64 36, i32 16, i1 false)
+  %c = getelementptr inbounds %struct.vec2plusi* %p, i64 0, i32 2
+  store i32 1, i32* %c, align 4
+  ret void
+}
+
+define void @write16to32(%struct.vec2* nocapture %p) nounwind uwtable ssp {
+; CHECK: @write16to32
+entry:
+  %0 = bitcast %struct.vec2* %p to i8*
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 16, i32 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
+  %c = getelementptr inbounds %struct.vec2* %p, i64 0, i32 1
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32>* %c, align 4
+  ret void
+}
+
+define void @dontwrite28to32memcpy(%struct.vec2* nocapture %p) nounwind uwtable ssp {
+; CHECK: @dontwrite28to32memcpy
+entry:
+  %0 = bitcast %struct.vec2* %p to i8*
+; CHECK: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.vec2* @glob1 to i8*), i64 32, i32 16, i1 false)
+  %arrayidx1 = getelementptr inbounds %struct.vec2* %p, i64 0, i32 0, i64 7
+  store i32 1, i32* %arrayidx1, align 4
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+%struct.trapframe = type { i64, i64, i64 }
+
+; bugzilla 11455 - make sure negative GEP's don't break this optimisation
+; CHECK: @cpu_lwp_fork
+define void @cpu_lwp_fork(%struct.trapframe* %md_regs, i64 %pcb_rsp0) nounwind uwtable noinline ssp {
+entry:
+  %0 = inttoptr i64 %pcb_rsp0 to %struct.trapframe*
+  %add.ptr = getelementptr inbounds %struct.trapframe* %0, i64 -1
+  %1 = bitcast %struct.trapframe* %add.ptr to i8*
+  %2 = bitcast %struct.trapframe* %md_regs to i8*
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 24, i32 1, i1 false)
+  %tf_trapno = getelementptr inbounds %struct.trapframe* %0, i64 -1, i32 1
+  store i64 3, i64* %tf_trapno, align 8
+  ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/dg.exp b/test/Transforms/DeadStoreElimination/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/DeadStoreElimination/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/DeadStoreElimination/dominate.ll b/test/Transforms/DeadStoreElimination/dominate.ll
new file mode 100644
index 000000000000..284fea4234fc
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/dominate.ll
@@ -0,0 +1,25 @@
+; RUN: opt  %s -dse -disable-output
+; test that we don't crash
+declare void @bar()
+
+define void @foo() {
+bb1:
+  %memtmp3.i = alloca [21 x i8], align 1
+  %0 = getelementptr inbounds [21 x i8]* %memtmp3.i, i64 0, i64 0
+  br label %bb3
+
+bb2:
+  call void @llvm.lifetime.end(i64 -1, i8* %0)
+  br label %bb3
+
+bb3:
+  call void @bar()
+  call void @llvm.lifetime.end(i64 -1, i8* %0)
+  br label %bb4
+
+bb4:
+  ret void
+
+}
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/Transforms/DeadStoreElimination/free.ll b/test/Transforms/DeadStoreElimination/free.ll
index aa3f0ab938e2..a5fbdc76387e 100644
--- a/test/Transforms/DeadStoreElimination/free.ll
+++ b/test/Transforms/DeadStoreElimination/free.ll
@@ -2,6 +2,9 @@
 
 target datalayout = "e-p:64:64:64"
 
+declare void @free(i8* nocapture)
+declare noalias i8* @malloc(i64)
+
 ; CHECK: @test
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: @free
@@ -26,10 +29,10 @@ define void @test2({i32, i32}* %P) {
 	ret void
 }
 
-; CHECK: @test4
+; CHECK: @test3
 ; CHECK-NOT: store
 ; CHECK: ret void
-define void @test4() {
+define void @test3() {
   %m = call i8* @malloc(i64 24)
   store i8 0, i8* %m
   %m1 = getelementptr i8* %m, i64 1
@@ -38,5 +41,30 @@ define void @test4() {
   ret void
 }
 
-declare void @free(i8*)
-declare i8* @malloc(i64)
+; PR11240
+; CHECK: @test4
+; CHECK-NOT: store
+; CHECK: ret void
+define void @test4(i1 %x) nounwind {
+entry:
+  %alloc1 = tail call noalias i8* @malloc(i64 4) nounwind
+  br i1 %x, label %skipinit1, label %init1
+
+init1:
+  store i8 1, i8* %alloc1
+  br label %skipinit1
+
+skipinit1:
+  tail call void @free(i8* %alloc1) nounwind
+  ret void
+}
+
+; CHECK: @test5
+define void @test5() {
+  br label %bb
+
+bb:
+  tail call void @free(i8* undef) nounwind
+  br label %bb
+}
+
diff --git a/test/Transforms/DeadStoreElimination/lit.local.cfg b/test/Transforms/DeadStoreElimination/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/DeadStoreElimination/pr11390.ll b/test/Transforms/DeadStoreElimination/pr11390.ll
new file mode 100644
index 000000000000..2ce6eea365aa
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/pr11390.ll
@@ -0,0 +1,38 @@
+; RUN: opt -basicaa -dse -S -o - %s | FileCheck %s
+; PR11390
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define fastcc void @cat_domain(i8* nocapture %name, i8* nocapture %domain, i8** 
+nocapture %s) nounwind uwtable {
+entry:
+  %call = tail call i64 @strlen(i8* %name) nounwind readonly
+  %call1 = tail call i64 @strlen(i8* %domain) nounwind readonly
+  %add = add i64 %call, 1
+  %add2 = add i64 %add, %call1
+  %add3 = add i64 %add2, 1
+  %call4 = tail call noalias i8* @malloc(i64 %add3) nounwind
+  store i8* %call4, i8** %s, align 8
+  %tobool = icmp eq i8* %call4, null
+  br i1 %tobool, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %call4, i8* %name, i64 %call, i32 1, i1 false)
+  %arrayidx = getelementptr inbounds i8* %call4, i64 %call
+  store i8 46, i8* %arrayidx, align 1
+; CHECK: store i8 46
+  %add.ptr5 = getelementptr inbounds i8* %call4, i64 %add
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %add.ptr5, i8* %domain, i64 %call1, i32 1, i1 false)
+  %arrayidx8 = getelementptr inbounds i8* %call4, i64 %add2
+  store i8 0, i8* %arrayidx8, align 1
+  br label %return
+
+return:                                           ; preds = %if.end, %entry
+  ret void
+}
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
+
+declare noalias i8* @malloc(i64) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index ec2f15737a37..81eb5a8c705e 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -251,3 +251,10 @@ bb:
 ; CHECK: call void @test19f
 }
 
+define void @test20() {
+  %m = call i8* @malloc(i32 24)
+  store i8 0, i8* %m
+  ret void
+}
+; CHECK: @test20
+; CHECK-NEXT: ret void
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 57b1697ff4de..32c302c9205b 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -10,22 +10,22 @@ define void @test1(i8 %V, i32 *%P) {
   
   %C = zext i8 %V to i32
   %D = zext i8 %V to i32  ;; CSE
-  volatile store i32 %C, i32* %P
-  volatile store i32 %D, i32* %P
+  store volatile i32 %C, i32* %P
+  store volatile i32 %D, i32* %P
   ; CHECK-NEXT: %C = zext i8 %V to i32
   ; CHECK-NEXT: store volatile i32 %C
   ; CHECK-NEXT: store volatile i32 %C
   
   %E = add i32 %C, %C
   %F = add i32 %C, %C
-  volatile store i32 %E, i32* %P
-  volatile store i32 %F, i32* %P
+  store volatile i32 %E, i32* %P
+  store volatile i32 %F, i32* %P
   ; CHECK-NEXT: %E = add i32 %C, %C
   ; CHECK-NEXT: store volatile i32 %E
   ; CHECK-NEXT: store volatile i32 %E
 
   %G = add nuw i32 %C, %C         ;; not a CSE with E
-  volatile store i32 %G, i32* %P
+  store volatile i32 %G, i32* %P
   ; CHECK-NEXT: %G = add nuw i32 %C, %C
   ; CHECK-NEXT: store volatile i32 %G
   ret void
diff --git a/test/Transforms/EarlyCSE/dg.exp b/test/Transforms/EarlyCSE/dg.exp
deleted file mode 100644
index de42dad163fd..000000000000
--- a/test/Transforms/EarlyCSE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/EarlyCSE/instsimplify-dom.ll b/test/Transforms/EarlyCSE/instsimplify-dom.ll
new file mode 100644
index 000000000000..36dffec1c63c
--- /dev/null
+++ b/test/Transforms/EarlyCSE/instsimplify-dom.ll
@@ -0,0 +1,19 @@
+; RUN: opt -early-cse -S < %s | FileCheck %s
+; PR12231
+
+declare i32 @f()
+
+define i32 @fn() {
+entry:
+  br label %lbl_1215
+
+lbl_1215:
+  %ins34 = phi i32 [ %ins35, %xxx ], [ undef, %entry ]
+  ret i32 %ins34
+
+xxx:
+  %ins35 = call i32 @f()
+  br label %lbl_1215
+}
+
+; CHECK: define i32 @fn
diff --git a/test/Transforms/EarlyCSE/lit.local.cfg b/test/Transforms/EarlyCSE/lit.local.cfg
new file mode 100644
index 000000000000..c6106e4746f2
--- /dev/null
+++ b/test/Transforms/EarlyCSE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll b/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
index 85df09ebd7fe..b7e4d1f87261 100644
--- a/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
@@ -4,6 +4,6 @@
 @g = global i32 0		; <i32*> [#uses=1]
 
 define i32 @f() {
-	%t = volatile load i32* @g		; <i32> [#uses=1]
+	%t = load volatile i32* @g		; <i32> [#uses=1]
 	ret i32 %t
 }
diff --git a/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll b/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
deleted file mode 100644
index e2bab19e7efd..000000000000
--- a/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
+++ /dev/null
@@ -1,105 +0,0 @@
-; RUN: opt < %s -functionattrs -S | not grep {nocapture *%%q}
-; RUN: opt < %s -functionattrs -S | grep {nocapture *%%p} | count 6
-@g = global i32* null		; <i32**> [#uses=1]
-
-define i32* @c1(i32* %q) {
-	ret i32* %q
-}
-
-define void @c2(i32* %q) {
-	store i32* %q, i32** @g
-	ret void
-}
-
-define void @c3(i32* %q) {
-	call void @c2(i32* %q)
-	ret void
-}
-
-define i1 @c4(i32* %q, i32 %bitno) {
-	%tmp = ptrtoint i32* %q to i32
-	%tmp2 = lshr i32 %tmp, %bitno
-	%bit = trunc i32 %tmp2 to i1
-	br i1 %bit, label %l1, label %l0
-l0:
-	ret i1 0 ; escaping value not caught by def-use chaining.
-l1:
-	ret i1 1 ; escaping value not caught by def-use chaining.
-}
-
-@lookup_table = global [2 x i1] [ i1 0, i1 1 ]
-
-define i1 @c5(i32* %q, i32 %bitno) {
-	%tmp = ptrtoint i32* %q to i32
-	%tmp2 = lshr i32 %tmp, %bitno
-	%bit = and i32 %tmp2, 1
-        ; subtle escape mechanism follows
-	%lookup = getelementptr [2 x i1]* @lookup_table, i32 0, i32 %bit
-	%val = load i1* %lookup
-	ret i1 %val
-}
-
-declare void @throw_if_bit_set(i8*, i8) readonly
-define i1 @c6(i8* %q, i8 %bit) {
-	invoke void @throw_if_bit_set(i8* %q, i8 %bit)
-		to label %ret0 unwind label %ret1
-ret0:
-	ret i1 0
-ret1:
-        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
-                 cleanup
-	ret i1 1
-}
-
-declare i32 @__gxx_personality_v0(...)
-
-define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind {
-	%tmp = ptrtoint i32* %q to i32
-	%tmp2 = lshr i32 %tmp, %bitno
-	%bit = and i32 %tmp2, 1
-	%lookup = getelementptr [2 x i1]* @lookup_table, i32 0, i32 %bit
-	ret i1* %lookup
-}
-
-define i1 @c7(i32* %q, i32 %bitno) {
-	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
-	%val = load i1* %ptr
-	ret i1 %val
-}
-
-
-define i32 @nc1(i32* %q, i32* %p, i1 %b) {
-e:
-	br label %l
-l:
-	%x = phi i32* [ %p, %e ]
-	%y = phi i32* [ %q, %e ]
-	%tmp = bitcast i32* %x to i32*		; <i32*> [#uses=2]
-	%tmp2 = select i1 %b, i32* %tmp, i32* %y
-	%val = load i32* %tmp2		; <i32> [#uses=1]
-	store i32 0, i32* %tmp
-	store i32* %y, i32** @g
-	ret i32 %val
-}
-
-define void @nc2(i32* %p, i32* %q) {
-	%1 = call i32 @nc1(i32* %q, i32* %p, i1 0)		; <i32> [#uses=0]
-	ret void
-}
-
-define void @nc3(void ()* %p) {
-	call void %p()
-	ret void
-}
-
-declare void @external(i8*) readonly nounwind
-define void @nc4(i8* %p) {
-	call void @external(i8* %p)
-	ret void
-}
-
-define void @nc5(void (i8*)* %f, i8* %p) {
-	call void %f(i8* %p) readonly nounwind
-	call void %f(i8* nocapture %p)
-	ret void
-}
diff --git a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
index f21fabc493c7..93991d21a1fa 100644
--- a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
+++ b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
@@ -5,6 +5,6 @@
 
 define void @foo() {
 ; CHECK: void @foo() {
-  %tmp = volatile load i32* @g
+  %tmp = load volatile i32* @g
   ret void
 }
diff --git a/test/Transforms/FunctionAttrs/dg.exp b/test/Transforms/FunctionAttrs/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/FunctionAttrs/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/FunctionAttrs/lit.local.cfg b/test/Transforms/FunctionAttrs/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/FunctionAttrs/nocapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll
new file mode 100644
index 000000000000..3027acd35c7d
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/nocapture.ll
@@ -0,0 +1,178 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+@g = global i32* null		; <i32**> [#uses=1]
+
+; CHECK: define i32* @c1(i32* %q)
+define i32* @c1(i32* %q) {
+	ret i32* %q
+}
+
+; CHECK: define void @c2(i32* %q)
+define void @c2(i32* %q) {
+	store i32* %q, i32** @g
+	ret void
+}
+
+; CHECK: define void @c3(i32* %q)
+define void @c3(i32* %q) {
+	call void @c2(i32* %q)
+	ret void
+}
+
+; CHECK: define i1 @c4(i32* %q, i32 %bitno)
+define i1 @c4(i32* %q, i32 %bitno) {
+	%tmp = ptrtoint i32* %q to i32
+	%tmp2 = lshr i32 %tmp, %bitno
+	%bit = trunc i32 %tmp2 to i1
+	br i1 %bit, label %l1, label %l0
+l0:
+	ret i1 0 ; escaping value not caught by def-use chaining.
+l1:
+	ret i1 1 ; escaping value not caught by def-use chaining.
+}
+
+@lookup_table = global [2 x i1] [ i1 0, i1 1 ]
+
+; CHECK: define i1 @c5(i32* %q, i32 %bitno)
+define i1 @c5(i32* %q, i32 %bitno) {
+	%tmp = ptrtoint i32* %q to i32
+	%tmp2 = lshr i32 %tmp, %bitno
+	%bit = and i32 %tmp2, 1
+        ; subtle escape mechanism follows
+	%lookup = getelementptr [2 x i1]* @lookup_table, i32 0, i32 %bit
+	%val = load i1* %lookup
+	ret i1 %val
+}
+
+declare void @throw_if_bit_set(i8*, i8) readonly
+
+; CHECK: define i1 @c6(i8* %q, i8 %bit)
+define i1 @c6(i8* %q, i8 %bit) {
+	invoke void @throw_if_bit_set(i8* %q, i8 %bit)
+		to label %ret0 unwind label %ret1
+ret0:
+	ret i1 0
+ret1:
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+                 cleanup
+	ret i1 1
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind {
+	%tmp = ptrtoint i32* %q to i32
+	%tmp2 = lshr i32 %tmp, %bitno
+	%bit = and i32 %tmp2, 1
+	%lookup = getelementptr [2 x i1]* @lookup_table, i32 0, i32 %bit
+	ret i1* %lookup
+}
+
+; CHECK: define i1 @c7(i32* %q, i32 %bitno)
+define i1 @c7(i32* %q, i32 %bitno) {
+	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
+	%val = load i1* %ptr
+	ret i1 %val
+}
+
+
+; CHECK: define i32 @nc1(i32* %q, i32* nocapture %p, i1 %b)
+define i32 @nc1(i32* %q, i32* %p, i1 %b) {
+e:
+	br label %l
+l:
+	%x = phi i32* [ %p, %e ]
+	%y = phi i32* [ %q, %e ]
+	%tmp = bitcast i32* %x to i32*		; <i32*> [#uses=2]
+	%tmp2 = select i1 %b, i32* %tmp, i32* %y
+	%val = load i32* %tmp2		; <i32> [#uses=1]
+	store i32 0, i32* %tmp
+	store i32* %y, i32** @g
+	ret i32 %val
+}
+
+; CHECK: define void @nc2(i32* nocapture %p, i32* %q)
+define void @nc2(i32* %p, i32* %q) {
+	%1 = call i32 @nc1(i32* %q, i32* %p, i1 0)		; <i32> [#uses=0]
+	ret void
+}
+
+; CHECK: define void @nc3(void ()* nocapture %p)
+define void @nc3(void ()* %p) {
+	call void %p()
+	ret void
+}
+
+declare void @external(i8*) readonly nounwind
+; CHECK: define void @nc4(i8* nocapture %p)
+define void @nc4(i8* %p) {
+	call void @external(i8* %p)
+	ret void
+}
+
+; CHECK: define void @nc5(void (i8*)* nocapture %f, i8* nocapture %p)
+define void @nc5(void (i8*)* %f, i8* %p) {
+	call void %f(i8* %p) readonly nounwind
+	call void %f(i8* nocapture %p)
+	ret void
+}
+
+; CHECK: define void @test1_1(i8* nocapture %x1_1, i8* %y1_1)
+define void @test1_1(i8* %x1_1, i8* %y1_1) {
+  call i8* @test1_2(i8* %x1_1, i8* %y1_1)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define i8* @test1_2(i8* nocapture %x1_2, i8* %y1_2)
+define i8* @test1_2(i8* %x1_2, i8* %y1_2) {
+  call void @test1_1(i8* %x1_2, i8* %y1_2)
+  store i32* null, i32** @g
+  ret i8* %y1_2
+}
+
+; CHECK: define void @test2(i8* nocapture %x2)
+define void @test2(i8* %x2) {
+  call void @test2(i8* %x2)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define void @test3(i8* nocapture %x3, i8* nocapture %y3, i8* nocapture %z3)
+define void @test3(i8* %x3, i8* %y3, i8* %z3) {
+  call void @test3(i8* %z3, i8* %y3, i8* %x3)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define void @test4_1(i8* %x4_1)
+define void @test4_1(i8* %x4_1) {
+  call i8* @test4_2(i8* %x4_1, i8* %x4_1, i8* %x4_1)
+  store i32* null, i32** @g
+  ret void
+}
+
+; CHECK: define i8* @test4_2(i8* nocapture %x4_2, i8* %y4_2, i8* nocapture %z4_2)
+define i8* @test4_2(i8* %x4_2, i8* %y4_2, i8* %z4_2) {
+  call void @test4_1(i8* null)
+  store i32* null, i32** @g
+  ret i8* %y4_2
+}
+
+declare i8* @test5_1(i8* %x5_1)
+
+; CHECK: define void @test5_2(i8* %x5_2)
+define void @test5_2(i8* %x5_2) {
+  call i8* @test5_1(i8* %x5_2)
+  store i32* null, i32** @g
+  ret void
+}
+
+declare void @test6_1(i8* %x6_1, i8* nocapture %y6_1, ...)
+
+; CHECK: define void @test6_2(i8* %x6_2, i8* nocapture %y6_2, i8* %z6_2)
+define void @test6_2(i8* %x6_2, i8* %y6_2, i8* %z6_2) {
+  call void (i8*, i8*, ...)* @test6_1(i8* %x6_2, i8* %y6_2, i8* %z6_2)
+  store i32* null, i32** @g
+  ret void
+}
+
diff --git a/test/Transforms/GVN/commute.ll b/test/Transforms/GVN/commute.ll
new file mode 100644
index 000000000000..cf4fb7f17291
--- /dev/null
+++ b/test/Transforms/GVN/commute.ll
@@ -0,0 +1,23 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+declare void @use(i32, i32)
+
+define void @foo(i32 %x, i32 %y) {
+  ; CHECK: @foo
+  %add1 = add i32 %x, %y
+  %add2 = add i32 %y, %x
+  call void @use(i32 %add1, i32 %add2)
+  ; CHECK: @use(i32 %add1, i32 %add1)
+  ret void
+}
+
+declare void @vse(i1, i1)
+
+define void @bar(i32 %x, i32 %y) {
+  ; CHECK: @bar
+  %cmp1 = icmp ult i32 %x, %y
+  %cmp2 = icmp ugt i32 %y, %x
+  call void @vse(i1 %cmp1, i1 %cmp2)
+  ; CHECK: @vse(i1 %cmp1, i1 %cmp1)
+  ret void
+}
diff --git a/test/Transforms/GVN/condprop.ll b/test/Transforms/GVN/condprop.ll
index 0b31b01b7b14..9c28955801f7 100644
--- a/test/Transforms/GVN/condprop.ll
+++ b/test/Transforms/GVN/condprop.ll
@@ -55,25 +55,6 @@ return:		; preds = %bb8
 }
 
 declare void @foo(i1)
-
-; CHECK: @test2
-define void @test2(i1 %x, i1 %y) {
-  %z = or i1 %x, %y
-  br i1 %z, label %true, label %false
-true:
-; CHECK: true:
-  %z2 = or i1 %x, %y
-  call void @foo(i1 %z2)
-; CHECK: call void @foo(i1 true)
-  br label %true
-false:
-; CHECK: false:
-  %z3 = or i1 %x, %y
-  call void @foo(i1 %z3)
-; CHECK: call void @foo(i1 false)
-  br label %false
-}
-
 declare void @bar(i32)
 
 ; CHECK: @test3
@@ -130,3 +111,141 @@ case3:
 ; CHECK: call void @bar(i32 %x)
   ret void
 }
+
+; CHECK: @test5
+define i1 @test5(i32 %x, i32 %y) {
+  %cmp = icmp eq i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = icmp ne i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+  %cmp3 = icmp eq i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test6
+define i1 @test6(i32 %x, i32 %y) {
+  %cmp2 = icmp ne i32 %x, %y
+  %cmp = icmp eq i32 %x, %y
+  %cmp3 = icmp eq i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test7
+define i1 @test7(i32 %x, i32 %y) {
+  %cmp = icmp sgt i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+  %cmp2 = icmp sle i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+  %cmp3 = icmp sgt i32 %x, %y
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; CHECK: @test8
+define i1 @test8(i32 %x, i32 %y) {
+  %cmp2 = icmp sle i32 %x, %y
+  %cmp = icmp sgt i32 %x, %y
+  %cmp3 = icmp sgt i32 %x, %y
+  br i1 %cmp, label %same, label %different
+
+same:
+; CHECK: ret i1 false
+  ret i1 %cmp2
+
+different:
+; CHECK: ret i1 false
+  ret i1 %cmp3
+}
+
+; PR1768
+; CHECK: @test9
+define i32 @test9(i32 %i, i32 %j) {
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+  %diff = sub i32 %i, %j
+  ret i32 %diff
+; CHECK: ret i32 0
+
+ret:
+  ret i32 5
+; CHECK: ret i32 5
+}
+
+; PR1768
+; CHECK: @test10
+define i32 @test10(i32 %j, i32 %i) {
+  %cmp = icmp eq i32 %i, %j
+  br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+  %diff = sub i32 %i, %j
+  ret i32 %diff
+; CHECK: ret i32 0
+
+ret:
+  ret i32 5
+; CHECK: ret i32 5
+}
+
+declare i32 @yogibar()
+
+; CHECK: @test11
+define i32 @test11(i32 %x) {
+  %v0 = call i32 @yogibar()
+  %v1 = call i32 @yogibar()
+  %cmp = icmp eq i32 %v0, %v1
+  br i1 %cmp, label %cond_true, label %next
+
+cond_true:
+  ret i32 %v1
+; CHECK: ret i32 %v0
+
+next:
+  %cmp2 = icmp eq i32 %x, %v0
+  br i1 %cmp2, label %cond_true2, label %next2
+
+cond_true2:
+  ret i32 %v0
+; CHECK: ret i32 %x
+
+next2:
+  ret i32 0
+}
+
+; CHECK: @test12
+define i32 @test12(i32 %x) {
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:
+  br label %ret
+
+cond_false:
+  br label %ret
+
+ret:
+  %res = phi i32 [ %x, %cond_true ], [ %x, %cond_false ]
+; CHECK: %res = phi i32 [ 0, %cond_true ], [ %x, %cond_false ]
+  ret i32 %res
+}
diff --git a/test/Transforms/GVN/dg.exp b/test/Transforms/GVN/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/GVN/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GVN/lit.local.cfg b/test/Transforms/GVN/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/GVN/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GVN/pre-compare.ll b/test/Transforms/GVN/pre-compare.ll
new file mode 100644
index 000000000000..18d0c2e1085c
--- /dev/null
+++ b/test/Transforms/GVN/pre-compare.ll
@@ -0,0 +1,68 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+; C source:
+;
+;   void f(int x) {
+;     if (x != 1)
+;       puts (x == 2 ? "a" : "b");
+;     for (;;) {
+;       puts("step 1");
+;       if (x == 2)
+;         continue;
+;       printf("step 2: %d\n", x);
+;     }
+;   }
+;
+; If we PRE %cmp3, CodeGenPrepare won't be able to sink the compare down to its
+; uses, and we are forced to keep both %x and %cmp3 in registers in the loop.
+;
+; It is just as cheap to recompute the icmp against %x as it is to compare a
+; GPR against 0. On x86-64, the br i1 %cmp3 becomes:
+;
+;   testb %r12b, %r12b
+;   jne	LBB0_3
+;
+; The sunk icmp is:
+;
+;   cmpl $2, %ebx
+;   je	LBB0_3
+;
+; This is just as good, and it doesn't require a separate register.
+;
+; CHECK-NOT: phi i1
+
+@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1
+@.str1 = private unnamed_addr constant [2 x i8] c"b\00", align 1
+@.str2 = private unnamed_addr constant [7 x i8] c"step 1\00", align 1
+@.str3 = private unnamed_addr constant [12 x i8] c"step 2: %d\0A\00", align 1
+
+define void @f(i32 %x) noreturn nounwind uwtable ssp {
+entry:
+  %cmp = icmp eq i32 %x, 1
+  br i1 %cmp, label %for.cond.preheader, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp1 = icmp eq i32 %x, 2
+  %cond = select i1 %cmp1, i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str1, i64 0, i64 0)
+  %call = tail call i32 @puts(i8* %cond) nounwind
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry, %if.then
+  %cmp3 = icmp eq i32 %x, 2
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.backedge, %for.cond.preheader
+  %call2 = tail call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0)) nounwind
+  br i1 %cmp3, label %for.cond.backedge, label %if.end5
+
+if.end5:                                          ; preds = %for.cond
+  %call6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str3, i64 0, i64 0), i32 %x) nounwind
+  br label %for.cond.backedge
+
+for.cond.backedge:                                ; preds = %if.end5, %for.cond
+  br label %for.cond
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 2f0d2eb367cc..9e08004ea476 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -26,6 +26,15 @@ define i8 @crash0({i32, i32} %A, {i32, i32}* %P) {
   ret i8 %Y
 }
 
+;; No PR filed, crashed in CaptureTracker.
+declare void @helper()
+define void @crash1() {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i32 1, i1 false) nounwind
+  %tmp = load i8* bitcast (void ()* @helper to i8*)
+  %x = icmp eq i8 %tmp, 15
+  ret void
+}
+
 
 ;;===----------------------------------------------------------------------===;;
 ;; Store -> Load  and  Load -> Load forwarding where src and dst are different
@@ -642,3 +651,28 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
+;;===----------------------------------------------------------------------===;;
+;; Load -> Store dependency which isn't interfered with by a call that happens
+;; before the pointer was captured.
+;;===----------------------------------------------------------------------===;;
+
+%class.X = type { [8 x i8] }
+
+@_ZTV1X = weak_odr constant [5 x i8*] zeroinitializer
+@_ZTV1Y = weak_odr constant [5 x i8*] zeroinitializer
+
+declare void @use()
+declare void @use3(i8***, i8**)
+
+; PR8908
+define void @test_escape1() nounwind {
+  %x = alloca i8**, align 8
+  store i8** getelementptr inbounds ([5 x i8*]* @_ZTV1X, i64 0, i64 2), i8*** %x, align 8
+  call void @use() nounwind
+  %DEAD = load i8*** %x, align 8
+  call void @use3(i8*** %x, i8** %DEAD) nounwind
+  ret void
+; CHECK: test_escape1
+; CHECK-NOT: DEAD
+; CHECK: ret
+}
diff --git a/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll b/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
deleted file mode 100644
index 29864f825285..000000000000
--- a/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
+++ /dev/null
@@ -1,264 +0,0 @@
-; RUN: opt < %s -globaldce | llc -O0 -o /dev/null
-
-%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
-%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>" = type { i32 }
-%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
-%struct.pthread_attr_t = type { i64, [48 x i8] }
-%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
-
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel ; <i32 (i64)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutexattr_init ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)* @pthread_mutexattr_settype ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutexattr_destroy ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
-
-define weak void @_ZN9__gnu_cxx26__aux_require_boolean_exprIbEEvRKT_(i8* %__t) {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !0)
-  tail call void @llvm.dbg.stoppoint(i32 240, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !0)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_19_ConvertibleConceptIjjEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !8)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !8)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !11)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !11)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !12)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !12)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !13)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !13)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !14)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !14)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !15)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !15)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !16)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !16)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIiEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !17)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !17)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIlEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !18)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !18)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIxEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !19)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !19)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIjEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !20)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !20)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIcSt11char_traitsIcEEcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !21)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !21)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIwSt11char_traitsIwEEwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !22)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !22)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !23)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !23)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKcEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !24)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !24)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKcSsEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !25)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !25)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPcSsEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !26)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !26)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKwSbIwSt11char_traitsIwESaIwEEEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !27)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !27)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPwSbIwSt11char_traitsIwESaIwEEEEEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !28)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !28)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !29)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !29)
-  ret void
-}
-
-define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKwEEEEvv() {
-entry:
-  tail call void @llvm.dbg.func.start(metadata !30)
-  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
-  tail call void @llvm.dbg.region.end(metadata !30)
-  ret void
-}
-
-declare void @llvm.dbg.func.start(metadata) nounwind readnone
-
-declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
-
-declare void @llvm.dbg.region.end(metadata) nounwind readnone
-
-declare extern_weak i32 @pthread_once(i32*, void ()*)
-
-declare extern_weak i8* @pthread_getspecific(i32)
-
-declare extern_weak i32 @pthread_setspecific(i32, i8*)
-
-declare extern_weak i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
-
-declare extern_weak i32 @pthread_cancel(i64)
-
-declare extern_weak i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
-
-declare extern_weak i32 @pthread_key_create(i32*, void (i8*)*)
-
-declare extern_weak i32 @pthread_key_delete(i32)
-
-declare extern_weak i32 @pthread_mutexattr_init(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
-
-declare extern_weak i32 @pthread_mutexattr_settype(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)
-
-declare extern_weak i32 @pthread_mutexattr_destroy(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
-
-!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__aux_require_boolean_expr<bool>", metadata !"__aux_require_boolean_expr<bool>", metadata !"_ZN9__gnu_cxx26__aux_require_boolean_exprIbEEvRKT_", metadata !2, i32 239, metadata !3, i1 false, i1 true}
-!1 = metadata !{i32 458769, i32 0, i32 4, metadata !"concept-inst.cc", metadata !"/home/buildbot/buildslave/llvm-x86_64-linux-selfhost/llvm-gcc.obj/x86_64-unknown-linux-gnu/libstdc++-v3/src/../../../../llvm-gcc.src/libstdc++-v3/src", metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build)", i1 true, i1 true, metadata !"", i32 0}
-!2 = metadata !{i32 458769, i32 0, i32 4, metadata !"boost_concept_check.h", metadata !"/home/buildbot/buildslave/llvm-x86_64-linux-selfhost/llvm-gcc.obj/x86_64-unknown-linux-gnu/libstdc++-v3/include/bits", metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build)", i1 false, i1 true, metadata !"", i32 0}
-!3 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}
-!4 = metadata !{null, metadata !5}
-!5 = metadata !{i32 458768, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !6}
-!6 = metadata !{i32 458790, metadata !1, metadata !"", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !7}
-!7 = metadata !{i32 458788, metadata !1, metadata !"bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2}
-!8 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_ConvertibleConcept<unsigned int, unsigned int> >", metadata !"__function_requires<__gnu_cxx::_ConvertibleConcept<unsigned int, unsigned int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_19_ConvertibleConceptIjjEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!9 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0}
-!10 = metadata !{null}
-!11 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!12 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const char*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!13 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!14 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!16 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!17 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIiEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!18 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIlEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!19 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long long int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long long int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIxEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!20 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<unsigned int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<unsigned int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIjEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!21 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<char, std::char_traits<char> >, char> >", metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<char, std::char_traits<char> >, char> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIcSt11char_traitsIcEEcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!22 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<wchar_t, std::char_traits<wchar_t> >, wchar_t> >", metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<wchar_t, std::char_traits<wchar_t> >, wchar_t> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIwSt11char_traitsIwEEwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!23 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!24 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const char*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!25 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKcSsEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!26 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPcSsEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!27 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKwSbIwSt11char_traitsIwESaIwEEEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!28 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPwSbIwSt11char_traitsIwESaIwEEEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!29 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
-!30 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
diff --git a/test/Transforms/GlobalDCE/dg.exp b/test/Transforms/GlobalDCE/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/GlobalDCE/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GlobalDCE/lit.local.cfg b/test/Transforms/GlobalDCE/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/GlobalDCE/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
index a6803abc5d36..588d5c9a6844 100644
--- a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
+++ b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
@@ -3,7 +3,7 @@
 
 define double @foo() nounwind  {
 entry:
-	%tmp1 = volatile load double* @t0.1441, align 8		; <double> [#uses=2]
+	%tmp1 = load volatile double* @t0.1441, align 8		; <double> [#uses=2]
 	%tmp4 = fmul double %tmp1, %tmp1		; <double> [#uses=1]
 	ret double %tmp4
 }
diff --git a/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll b/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
deleted file mode 100644
index 0e70c49adf14..000000000000
--- a/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt < %s -globalopt -S | grep { nest } | count 1
-	%struct.FRAME.nest = type { i32, i32 (i32)* }
-	%struct.__builtin_trampoline = type { [10 x i8] }
-@.str = internal constant [7 x i8] c"%d %d\0A\00"		; <[7 x i8]*> [#uses=1]
-
-define i32 @process(i32 (i32)* %func) nounwind  {
-entry:
-	%tmp2 = tail call i32 %func( i32 1 ) nounwind 		; <i32> [#uses=1]
-	ret i32 %tmp2
-}
-
-define internal fastcc i32 @g.1478(%struct.FRAME.nest* nest  %CHAIN.1, i32 %m) nounwind  {
-entry:
-	%tmp3 = getelementptr %struct.FRAME.nest* %CHAIN.1, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
-	%tmp7 = icmp eq i32 %tmp4, %m		; <i1> [#uses=1]
-	%tmp78 = zext i1 %tmp7 to i32		; <i32> [#uses=1]
-	ret i32 %tmp78
-}
-
-define internal i32 @f.1481(%struct.FRAME.nest* nest  %CHAIN.2, i32 %m) nounwind  {
-entry:
-	%tmp4 = tail call fastcc i32 @g.1478( %struct.FRAME.nest* nest  %CHAIN.2, i32 %m ) nounwind 		; <i32> [#uses=1]
-	%tmp6 = getelementptr %struct.FRAME.nest* %CHAIN.2, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp7 = load i32* %tmp6, align 4		; <i32> [#uses=1]
-	%tmp9 = icmp eq i32 %tmp4, %tmp7		; <i1> [#uses=1]
-	%tmp910 = zext i1 %tmp9 to i32		; <i32> [#uses=1]
-	ret i32 %tmp910
-}
-
-define i32 @nest(i32 %n) nounwind  {
-entry:
-	%TRAMP.316 = alloca [10 x i8]		; <[10 x i8]*> [#uses=1]
-	%FRAME.0 = alloca %struct.FRAME.nest		; <%struct.FRAME.nest*> [#uses=3]
-	%TRAMP.316.sub = getelementptr [10 x i8]* %TRAMP.316, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 %n, i32* %tmp3, align 8
-	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.316.sub, i8* bitcast (i32 (%struct.FRAME.nest*, i32)* @f.1481 to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32)**> [#uses=1]
-	%tmp89 = bitcast i8* %tramp to i32 (i32)*		; <i32 (i32)*> [#uses=2]
-	store i32 (i32)* %tmp89, i32 (i32)** %tmp7, align 4
-	%tmp13 = call i32 @process( i32 (i32)* %tmp89 ) nounwind 		; <i32> [#uses=1]
-	ret i32 %tmp13
-}
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
-
-define i32 @main() nounwind  {
-entry:
-	%tmp = tail call i32 @nest( i32 2 ) nounwind 		; <i32> [#uses=1]
-	%tmp1 = tail call i32 @nest( i32 1 ) nounwind 		; <i32> [#uses=1]
-	%tmp3 = tail call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([7 x i8]* @.str, i32 0, i32 0), i32 %tmp1, i32 %tmp ) nounwind 		; <i32> [#uses=0]
-	ret i32 undef
-}
-
-declare i32 @printf(i8*, ...) nounwind 
diff --git a/test/Transforms/GlobalOpt/atomic.ll b/test/Transforms/GlobalOpt/atomic.ll
new file mode 100644
index 000000000000..4c3f4395a0ea
--- /dev/null
+++ b/test/Transforms/GlobalOpt/atomic.ll
@@ -0,0 +1,10 @@
+; RUN: opt -globalopt < %s -S -o - | FileCheck %s
+
+@GV1 = internal global i64 1
+; CHECK: @GV1 = internal unnamed_addr constant i64 1
+
+define void @test1() {
+entry:
+  %0 = load atomic i8* bitcast (i64* @GV1 to i8*) acquire, align 8
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/constantfold-initializers.ll b/test/Transforms/GlobalOpt/constantfold-initializers.ll
index 834bd0012e7a..af8fa324db8c 100644
--- a/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -6,3 +6,46 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; CHECK: @A = global i1 false
 @A = global i1 icmp ne (i64 sub nsw (i64 ptrtoint (i8* getelementptr inbounds ([3 x i8]* @.str91250, i64 0, i64 1) to i64), i64 ptrtoint ([3 x i8]* @.str91250 to i64)), i64 1)
+
+; PR11352
+
+@xs = global [2 x i32] zeroinitializer, align 4
+; CHECK: @xs = global [2 x i32] [i32 1, i32 1]
+
+define internal void @test1() {
+entry:
+  store i32 1, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 0)
+  %0 = load i32* getelementptr inbounds ([2 x i32]* @xs, i32 0, i64 0), align 4
+  store i32 %0, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 1)
+  ret void
+}
+
+; PR12060
+
+%closure = type { i32 }
+
+@f = internal global %closure zeroinitializer, align 4
+@m = global i32 0, align 4
+; CHECK-NOT: @f
+; CHECK: @m = global i32 13
+
+define internal i32 @test2_helper(%closure* %this, i32 %b) {
+entry:
+  %0 = getelementptr inbounds %closure* %this, i32 0, i32 0
+  %1 = load i32* %0, align 4
+  %add = add nsw i32 %1, %b
+  ret i32 %add
+}
+
+define internal void @test2() {
+entry:
+  store i32 4, i32* getelementptr inbounds (%closure* @f, i32 0, i32 0)
+  %call = call i32 @test2_helper(%closure* @f, i32 9)
+  store i32 %call, i32* @m, align 4
+  ret void
+}
+
+@llvm.global_ctors = appending constant
+  [2 x { i32, void ()* }]
+  [{ i32, void ()* } { i32 65535, void ()* @test1 },
+   { i32, void ()* } { i32 65535, void ()* @test2 }]
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
index 204f979ed3e1..e3bc473f52ad 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
@@ -4,20 +4,31 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 %0 = type { i32, void ()* }
 %struct.foo = type { i32* }
+%struct.bar = type { i128 }
 
 @G = global i32 0, align 4
 @H = global i32 0, align 4
 @X = global %struct.foo zeroinitializer, align 8
-@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @init }]
+@X2 = global %struct.bar zeroinitializer, align 8
+@llvm.global_ctors = appending global [2 x %0] [%0 { i32 65535, void ()* @init1 }, %0 { i32 65535, void ()* @init2 }]
 
 ; PR8710 - GlobalOpt shouldn't change the global's initializer to have this
 ; arbitrary constant expression, the code generator can't handle it.
-define internal void @init() {
+define internal void @init1() {
 entry:
   %tmp = getelementptr inbounds %struct.foo* @X, i32 0, i32 0
   store i32* inttoptr (i64 sdiv (i64 ptrtoint (i32* @G to i64), i64 ptrtoint (i32* @H to i64)) to i32*), i32** %tmp, align 8
   ret void
 }
-
-; CHECK: @init
+; CHECK: @init1
 ; CHECK: store i32*
+
+; PR11705 - ptrtoint isn't safe in general in global initializers.
+define internal void @init2() {
+entry:
+  %tmp = getelementptr inbounds %struct.bar* @X2, i32 0, i32 0
+  store i128 ptrtoint (i32* @G to i128), i128* %tmp, align 16
+  ret void
+}
+; CHECK: @init2
+; CHECK: store i128
diff --git a/test/Transforms/GlobalOpt/cxx-dtor.ll b/test/Transforms/GlobalOpt/cxx-dtor.ll
index 22635620baad..7c6ae78d1bc0 100644
--- a/test/Transforms/GlobalOpt/cxx-dtor.ll
+++ b/test/Transforms/GlobalOpt/cxx-dtor.ll
@@ -2,6 +2,7 @@
 
 %0 = type { i32, void ()* }
 %struct.A = type { i8 }
+%struct.B = type { }
 
 @a = global %struct.A zeroinitializer, align 1
 @__dso_handle = external global i8*
@@ -15,13 +16,14 @@ define internal void @__cxx_global_var_init() nounwind section "__TEXT,__StaticI
 }
 
 define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind align 2 {
-  call void @_ZN1AD2Ev(%struct.A* %this)
+  %t = bitcast %struct.A* %this to %struct.B*
+  call void @_ZN1BD1Ev(%struct.B* %t)
   ret void
 }
 
 declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 
-define linkonce_odr void @_ZN1AD2Ev(%struct.A* %this) nounwind align 2 {
+define linkonce_odr void @_ZN1BD1Ev(%struct.B* %this) nounwind align 2 {
   ret void
 }
 
diff --git a/test/Transforms/GlobalOpt/deadfunction.ll b/test/Transforms/GlobalOpt/deadfunction.ll
new file mode 100644
index 000000000000..5e003c63f77d
--- /dev/null
+++ b/test/Transforms/GlobalOpt/deadfunction.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+; CHECK-NOT: test
+
+declare void @aa()
+declare void @bb()
+
+; Test that we can erase a function which has a blockaddress referring to it
+@test.x = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@test, %a), i8* blockaddress(@test, %b), i8* blockaddress(@test, %c)], align 16
+define internal void @test(i32 %n) nounwind noinline {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds [3 x i8*]* @test.x, i64 0, i64 %idxprom
+  %0 = load i8** %arrayidx, align 8
+  indirectbr i8* %0, [label %a, label %b, label %c]
+
+a:
+  tail call void @aa() nounwind
+  br label %b
+
+b:
+  tail call void @bb() nounwind
+  br label %c
+
+c:
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/dg.exp b/test/Transforms/GlobalOpt/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/GlobalOpt/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/GlobalOpt/invariant.ll b/test/Transforms/GlobalOpt/invariant.ll
new file mode 100644
index 000000000000..6b9919347890
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invariant.ll
@@ -0,0 +1,59 @@
+; RUN: opt -globalopt -S -o - < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare {}* @llvm.invariant.start(i64 %size, i8* nocapture %ptr)
+
+define void @test1(i8* %ptr) {
+  call {}* @llvm.invariant.start(i64 4, i8* %ptr)
+  ret void
+}
+
+@object1 = global i32 0
+; CHECK: @object1 = constant i32 -1
+define void @ctor1() {
+  store i32 -1, i32* @object1
+  %A = bitcast i32* @object1 to i8*
+  call void @test1(i8* %A)
+  ret void
+}
+
+
+@object2 = global i32 0
+; CHECK: @object2 = global i32 0
+define void @ctor2() {
+  store i32 -1, i32* @object2
+  %A = bitcast i32* @object2 to i8*
+  %B = call {}* @llvm.invariant.start(i64 4, i8* %A)
+  %C = bitcast {}* %B to i8*
+  ret void
+}
+
+
+@object3 = global i32 0
+; CHECK: @object3 = global i32 -1
+define void @ctor3() {
+  store i32 -1, i32* @object3
+  %A = bitcast i32* @object3 to i8*
+  call {}* @llvm.invariant.start(i64 3, i8* %A)
+  ret void
+}
+
+
+@object4 = global i32 0
+; CHECK: @object4 = global i32 -1
+define void @ctor4() {
+  store i32 -1, i32* @object4
+  %A = bitcast i32* @object4 to i8*
+  call {}* @llvm.invariant.start(i64 -1, i8* %A)
+  ret void
+}
+
+
+@llvm.global_ctors = appending constant
+  [4 x { i32, void ()* }]
+  [ { i32, void ()* } { i32 65535, void ()* @ctor1 },
+    { i32, void ()* } { i32 65535, void ()* @ctor2 },
+    { i32, void ()* } { i32 65535, void ()* @ctor3 },
+    { i32, void ()* } { i32 65535, void ()* @ctor4 } ]
diff --git a/test/Transforms/GlobalOpt/invoke.ll b/test/Transforms/GlobalOpt/invoke.ll
new file mode 100644
index 000000000000..c1f499c38a3c
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invoke.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+; rdar://11022897
+
+; Globalopt should be able to evaluate an invoke.
+; CHECK: @tmp = global i32 1
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@tmp = global i32 0
+
+define i32 @one() {
+  ret i32 1
+}
+
+define void @_GLOBAL__I_a() {
+bb:
+  %tmp1 = invoke i32 @one()
+          to label %bb2 unwind label %bb4
+
+bb2:                                              ; preds = %bb
+  store i32 %tmp1, i32* @tmp
+  ret void
+
+bb4:                                              ; preds = %bb
+  %tmp5 = landingpad { i8*, i32 } personality i8* undef
+          filter [0 x i8*] zeroinitializer
+  unreachable
+}
diff --git a/test/Transforms/GlobalOpt/lit.local.cfg b/test/Transforms/GlobalOpt/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/GlobalOpt/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll b/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll
new file mode 100644
index 000000000000..d613601e8d78
--- /dev/null
+++ b/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+
+@zero = internal global [10 x i32] zeroinitializer
+
+define i32 @test1(i64 %idx) nounwind {
+  %arrayidx = getelementptr inbounds [10 x i32]* @zero, i64 0, i64 %idx
+  %l = load i32* %arrayidx
+  ret i32 %l
+; CHECK: @test1
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/IPConstantProp/dangling-block-address.ll b/test/Transforms/IPConstantProp/dangling-block-address.ll
index 0489dfa796f3..bb101333f801 100644
--- a/test/Transforms/IPConstantProp/dangling-block-address.ll
+++ b/test/Transforms/IPConstantProp/dangling-block-address.ll
@@ -12,7 +12,7 @@
 define void @foo(i32 %x) nounwind readnone {
 entry:
   %b = alloca i32, align 4                        ; <i32*> [#uses=1]
-  volatile store i32 -1, i32* %b
+  store volatile i32 -1, i32* %b
   ret void
 }
 
diff --git a/test/Transforms/IPConstantProp/dg.exp b/test/Transforms/IPConstantProp/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/IPConstantProp/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/IPConstantProp/lit.local.cfg b/test/Transforms/IPConstantProp/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/IPConstantProp/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
index 77354f75106a..af9f1b38f1a9 100644
--- a/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
+++ b/test/Transforms/IndVarSimplify/2011-09-10-widen-nsw.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s
 ; Test WidenIV::GetExtendedOperandRecurrence.
 ; add219 should be extended to i64 because it is nsw, even though its
 ; sext cannot be hoisted outside the loop.
@@ -19,7 +19,7 @@ for.body153:                                      ; preds = %for.body153, %for.b
 
 ; CHECK: add nsw i64 %indvars.iv, 1
 for.body170:                                      ; preds = %for.body170, %for.body153
-  %i2.19 = phi i32 [ %add249, %for.body170 ], [ undef, %for.body153 ]
+  %i2.19 = phi i32 [ %add249, %for.body170 ], [ 0, %for.body153 ]
   %add219 = add nsw i32 %i2.19, 1
   %idxprom220 = sext i32 %add219 to i64
   %add249 = add nsw i32 %i2.19, %shl132
diff --git a/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
new file mode 100644
index 000000000000..76c90e0cddd9
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2011-10-27-lftrnull.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+; rdar://10359193: assert "IndVar type must match IVInit type"
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-darwin"
+
+; CHECK: @test
+; CHECK: if.end.i126:
+; CHECK: %exitcond = icmp ne i8* %incdec.ptr.i, getelementptr (i8* null, i32 undef)
+define void @test() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:
+  br i1 undef, label %while.end, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br i1 undef, label %if.then165, label %while.cond
+
+if.then165:                                       ; preds = %while.body
+  br i1 undef, label %while.cond, label %for.body.lr.ph.i81
+
+for.body.lr.ph.i81:                               ; preds = %if.then165
+  br label %for.body.i86
+
+for.body.i86:                                     ; preds = %for.end.i129, %for.body.lr.ph.i81
+  %cmp196.i = icmp ult i32 0, undef
+  br i1 %cmp196.i, label %for.body21.lr.ph.i, label %for.end.i129
+
+for.body21.lr.ph.i:                               ; preds = %for.body.i86
+  br label %for.body21.i
+
+for.body21.i:
+  %destYPixelPtr.010.i = phi i8* [ null, %for.body21.lr.ph.i ], [ %incdec.ptr.i, %if.end.i126 ]
+  %x.09.i = phi i32 [ 0, %for.body21.lr.ph.i ], [ %inc.i125, %if.end.i126 ]
+  br i1 undef, label %if.end.i126, label %if.else.i124
+
+if.else.i124:                                     ; preds = %for.body21.i
+  store i8 undef, i8* %destYPixelPtr.010.i, align 1
+  br label %if.end.i126
+
+if.end.i126:                                      ; preds = %if.else.i124, %for.body21.i
+  %incdec.ptr.i = getelementptr inbounds i8* %destYPixelPtr.010.i, i32 1
+  %inc.i125 = add i32 %x.09.i, 1
+  %cmp19.i = icmp ult i32 %inc.i125, undef
+  br i1 %cmp19.i, label %for.body21.i, label %for.end.i129
+
+for.end.i129:                                     ; preds = %if.end.i126, %for.body.i86
+  br i1 undef, label %for.body.i86, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  br label %bail
+
+bail:                                             ; preds = %while.end, %lor.lhs.false44, %lor.lhs.false41, %if.end29, %if.end
+  unreachable
+
+return:                                           ; preds = %lor.lhs.false20, %lor.lhs.false12, %lor.lhs.false, %entry
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
new file mode 100644
index 000000000000..c0c508f02ecd
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@@ -0,0 +1,140 @@
+; RUN: opt < %s -indvars -S "-default-data-layout=e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" | FileCheck %s
+; RUN: opt < %s -indvars -S "-default-data-layout=e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" | FileCheck %s
+;
+; PR11279: Assertion !IVLimit->getType()->isPointerTy()
+;
+; Test LinearFunctionTestReplace of a pointer-type loop counter. Note
+; that BECount may or may not be a pointer type. A pointer type
+; BECount doesn't really make sense, but that's what falls out of
+; SCEV. Since it's an i8*, it has unit stride so we never adjust the
+; SCEV expression in a way that would convert it to an integer type.
+
+; CHECK: @testnullptrptr
+; CHECK: loop:
+; CHECK: icmp ne
+define i8 @testnullptrptr(i8* %buf, i8* %end) nounwind {
+  br label %loopguard
+
+loopguard:
+  %guard = icmp ult i8* null, %end
+  br i1 %guard, label %preheader, label %exit
+
+preheader:
+  br label %loop
+
+loop:
+  %p.01.us.us = phi i8* [ null, %preheader ], [ %gep, %loop ]
+  %s = phi i8 [0, %preheader], [%snext, %loop]
+  %gep = getelementptr inbounds i8* %p.01.us.us, i64 1
+  %snext = load i8* %gep
+  %cmp = icmp ult i8* %gep, %end
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i8 %snext
+}
+
+; CHECK: @testptrptr
+; CHECK: loop:
+; CHECK: icmp ne
+define i8 @testptrptr(i8* %buf, i8* %end) nounwind {
+  br label %loopguard
+
+loopguard:
+  %guard = icmp ult i8* %buf, %end
+  br i1 %guard, label %preheader, label %exit
+
+preheader:
+  br label %loop
+
+loop:
+  %p.01.us.us = phi i8* [ %buf, %preheader ], [ %gep, %loop ]
+  %s = phi i8 [0, %preheader], [%snext, %loop]
+  %gep = getelementptr inbounds i8* %p.01.us.us, i64 1
+  %snext = load i8* %gep
+  %cmp = icmp ult i8* %gep, %end
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i8 %snext
+}
+
+; CHECK: @testnullptrint
+; CHECK: loop:
+; CHECK: icmp ne
+define i8 @testnullptrint(i8* %buf, i8* %end) nounwind {
+  br label %loopguard
+
+loopguard:
+  %bi = ptrtoint i8* %buf to i32
+  %ei = ptrtoint i8* %end to i32
+  %cnt = sub i32 %ei, %bi
+  %guard = icmp ult i32 0, %cnt
+  br i1 %guard, label %preheader, label %exit
+
+preheader:
+  br label %loop
+
+loop:
+  %p.01.us.us = phi i8* [ null, %preheader ], [ %gep, %loop ]
+  %iv = phi i32 [ 0, %preheader ], [ %ivnext, %loop ]
+  %s = phi i8 [0, %preheader], [%snext, %loop]
+  %gep = getelementptr inbounds i8* %p.01.us.us, i64 1
+  %snext = load i8* %gep
+  %ivnext = add i32 %iv, 1
+  %cmp = icmp ult i32 %ivnext, %cnt
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i8 %snext
+}
+
+; CHECK: @testptrint
+; CHECK: loop:
+; CHECK: icmp ne
+define i8 @testptrint(i8* %buf, i8* %end) nounwind {
+  br label %loopguard
+
+loopguard:
+  %bi = ptrtoint i8* %buf to i32
+  %ei = ptrtoint i8* %end to i32
+  %cnt = sub i32 %ei, %bi
+  %guard = icmp ult i32 %bi, %cnt
+  br i1 %guard, label %preheader, label %exit
+
+preheader:
+  br label %loop
+
+loop:
+  %p.01.us.us = phi i8* [ %buf, %preheader ], [ %gep, %loop ]
+  %iv = phi i32 [ %bi, %preheader ], [ %ivnext, %loop ]
+  %s = phi i8 [0, %preheader], [%snext, %loop]
+  %gep = getelementptr inbounds i8* %p.01.us.us, i64 1
+  %snext = load i8* %gep
+  %ivnext = add i32 %iv, 1
+  %cmp = icmp ult i32 %ivnext, %cnt
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret i8 %snext
+}
+
+; IV and BECount have two different pointer types here.
+define void @testnullptr([512 x i8]* %base) nounwind {
+entry:
+  %add.ptr1603 = getelementptr [512 x i8]* %base, i64 0, i64 512
+  br label %preheader
+
+preheader:
+  %cmp1604192 = icmp ult i8* undef, %add.ptr1603
+  br i1 %cmp1604192, label %for.body, label %for.end1609
+
+for.body:
+  %r.17193 = phi i8* [ %incdec.ptr1608, %for.body ], [ null, %preheader ]
+  %incdec.ptr1608 = getelementptr i8* %r.17193, i64 1
+  %cmp1604 = icmp ult i8* %incdec.ptr1608, %add.ptr1603
+  br i1 %cmp1604, label %for.body, label %for.end1609
+
+for.end1609:
+  unreachable
+}
diff --git a/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll b/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
new file mode 100644
index 000000000000..c74d04e6a5d8
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+;
+; Prior to the fix for PR11375, indvars would replace %firstIV with a
+; loop-invariant gep computed in the preheader. This was incorrect
+; because it was based on the minimum "ExitNotTaken" count. If the
+; final loop test is skipped (odd number of elements) then the early
+; exit would be taken and the loop invariant value would be incorrect.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; CHECK: if.end:
+; CHECK: phi i32* [ %first.lcssa, %early.exit ]
+define i32 @test(i32* %first, i32* %last) uwtable ssp {
+entry:
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %if.end, label %do.body
+
+do.body:                                          ; preds = %if.else, %if.then
+  %firstIV = phi i32* [ %incdec.ptr2, %if.else ], [ %first, %if.then ]
+  %incdec.ptr1 = getelementptr inbounds i32* %firstIV, i64 1
+  %cmp1 = icmp eq i32* %incdec.ptr1, %last
+  br i1 %cmp1, label %early.exit, label %if.else
+
+if.else:                                        ; preds = %do.body
+  %incdec.ptr2 = getelementptr inbounds i32* %firstIV, i64 2
+  %cmp2 = icmp eq i32* %incdec.ptr2, %last
+  br i1 %cmp2, label %if.end, label %do.body
+
+early.exit:
+  %first.lcssa = phi i32* [ %firstIV, %do.body ]
+  br label %if.end
+
+if.end:
+  %tmp = phi i32* [ %first.lcssa, %early.exit ], [ %first, %if.then ], [ %first, %entry ], [ undef, %if.else ]
+  %val = load i32* %tmp
+  ret i32 %val
+}
diff --git a/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll b/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll
new file mode 100644
index 000000000000..ccf259597e30
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2011-11-17-selfphi.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+; PR11350: Check that SimplifyIndvar handles a cycle of useless self-phis.
+
+; CHECK: @test
+; CHECK-NOT: lcssa = phi
+define void @test() nounwind {
+entry:
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry
+  br label %for.cond.outer
+
+for.cond.outer:                                   ; preds = %for.cond.preheader, %for.end
+  %p_41.addr.0.ph = phi i32 [ %p_41.addr.1.lcssa, %for.end ], [ 1, %for.cond.preheader ]
+  br label %for.cond
+
+for.cond:
+  br i1 true, label %for.end, label %for.ph
+
+for.ph:                                   ; preds = %for.cond4.preheader
+  br label %for.end
+
+for.end:
+  %p_41.addr.1.lcssa = phi i32 [ undef, %for.ph ], [ %p_41.addr.0.ph, %for.cond ]
+  %p_68.lobit.i = lshr i32 %p_41.addr.1.lcssa, 31
+  %cmp7 = icmp eq i32 %p_41.addr.1.lcssa, 0
+  %conv8 = zext i1 %cmp7 to i32
+  br label %for.cond.outer
+}
diff --git a/test/Transforms/IndVarSimplify/ada-loops.ll b/test/Transforms/IndVarSimplify/ada-loops.ll
index 154de6fea4b6..c0932982ee5e 100644
--- a/test/Transforms/IndVarSimplify/ada-loops.ll
+++ b/test/Transforms/IndVarSimplify/ada-loops.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
 ;
 ; PR1301
 
diff --git a/test/Transforms/IndVarSimplify/addrec-gep.ll b/test/Transforms/IndVarSimplify/addrec-gep.ll
deleted file mode 100644
index b62d093960c3..000000000000
--- a/test/Transforms/IndVarSimplify/addrec-gep.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK: getelementptr
-; CHECK: mul {{.*}}, 37
-; CHECK: add {{.*}}, 5203
-; CHECK-NOT: cast
-
-; This test tests several things. The load and store should use the
-; same address instead of having it computed twice, and SCEVExpander should
-; be able to reconstruct the full getelementptr, despite it having a few
-; obstacles set in its way.
-
-target datalayout = "e-p:64:64:64-n32:64"
-
-define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
-entry:
-	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
-	br i1 %tmp, label %bb.nph3, label %return
-
-bb.nph:		; preds = %bb2.preheader
-	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
-	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
-	br label %bb1
-
-bb1:		; preds = %bb2, %bb.nph
-	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
-	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
-	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
-        %z0 = add i64 %tmp3, 5203
-	%tmp5 = getelementptr double* %p, i64 %z0		; <double*> [#uses=1]
-	%tmp6 = load double* %tmp5, align 8		; <double> [#uses=1]
-	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
-        %z1 = add i64 %tmp4, 5203
-	%tmp8 = getelementptr double* %p, i64 %z1		; <double*> [#uses=1]
-	store double %tmp7, double* %tmp8, align 8
-	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
-	br label %bb2
-
-bb2:		; preds = %bb1
-	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
-	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
-
-bb2.bb3_crit_edge:		; preds = %bb2
-	br label %bb3
-
-bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
-	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
-	br label %bb4
-
-bb4:		; preds = %bb3
-	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
-	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
-
-bb4.return_crit_edge:		; preds = %bb4
-	br label %bb4.return_crit_edge.split
-
-bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
-	br label %return
-
-bb.nph3:		; preds = %entry
-	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
-	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
-	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
-	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
-	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
-	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
-	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
-	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
-
-bb.nph3.split:		; preds = %bb.nph3
-	br label %bb2.preheader
-
-bb2.preheader:		; preds = %bb.nph3.split, %bb4
-	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
-	br i1 true, label %bb.nph, label %bb3
-
-return:		; preds = %bb4.return_crit_edge.split, %entry
-	ret void
-}
diff --git a/test/Transforms/IndVarSimplify/avoid-i0.ll b/test/Transforms/IndVarSimplify/avoid-i0.ll
index 59661fa2e88d..22f2e4b718c2 100644
--- a/test/Transforms/IndVarSimplify/avoid-i0.ll
+++ b/test/Transforms/IndVarSimplify/avoid-i0.ll
@@ -90,7 +90,7 @@ entry:
 	br label %bb4
 
 bb:		; preds = %bb4
-	%0 = volatile load i32* @x, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32* @x, align 4		; <i32> [#uses=1]
 	store i32 %0, i32* %vol.0, align 4
 	store i32 0, i32* %l_52, align 4
 	br label %bb2
diff --git a/test/Transforms/IndVarSimplify/complex-scev.ll b/test/Transforms/IndVarSimplify/complex-scev.ll
deleted file mode 100644
index 395377e3a48e..000000000000
--- a/test/Transforms/IndVarSimplify/complex-scev.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; The i induction variable looks like a wrap-around, but it really is just
-; a simple affine IV.  Make sure that indvars eliminates it.
-
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK: phi
-; CHECK-NOT: phi
-
-define void @foo() {
-entry:
-        br label %bb6
-
-bb6:            ; preds = %cond_true, %entry
-        %j.0 = phi i32 [ 1, %entry ], [ %tmp5, %cond_true ]             ; <i32> [#uses=3]
-        %i.0 = phi i32 [ 0, %entry ], [ %j.0, %cond_true ]              ; <i32> [#uses=1]
-        %tmp7 = call i32 (...)* @foo2( )                ; <i32> [#uses=1]
-        %tmp = icmp ne i32 %tmp7, 0             ; <i1> [#uses=1]
-        br i1 %tmp, label %cond_true, label %return
-
-cond_true:              ; preds = %bb6
-        %tmp2 = call i32 (...)* @bar( i32 %i.0, i32 %j.0 )              ; <i32> [#uses=0]
-        %tmp5 = add i32 %j.0, 1         ; <i32> [#uses=1]
-        br label %bb6
-
-return:         ; preds = %bb6
-        ret void
-}
-
-declare i32 @bar(...)
-
-declare i32 @foo2(...)
-
diff --git a/test/Transforms/IndVarSimplify/dg.exp b/test/Transforms/IndVarSimplify/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/IndVarSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/IndVarSimplify/elim-extend.ll b/test/Transforms/IndVarSimplify/elim-extend.ll
index 43c162fed7f1..ad5679f3e7a4 100644
--- a/test/Transforms/IndVarSimplify/elim-extend.ll
+++ b/test/Transforms/IndVarSimplify/elim-extend.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 
diff --git a/test/Transforms/IndVarSimplify/gep-with-mul-base.ll b/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
deleted file mode 100644
index 7e1e2a31768c..000000000000
--- a/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
+++ /dev/null
@@ -1,68 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK: define void @foo
-; CHECK: mul
-; CHECK: mul
-; CHECK: mul
-; CHECK: add
-; CHECK: sub
-; CHECK: define void @bar
-; CHECK: mul
-; CHECK: mul
-; CHECK: mul
-; CHECK: add
-; CHECK: sub
-
-define void @foo(i64 %n, i64 %m, i64 %o, double* nocapture %p) nounwind {
-entry:
-	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
-	br i1 %tmp, label %bb.nph, label %return
-
-bb.nph:		; preds = %entry
-	%tmp1 = mul i64 %n, 37		; <i64> [#uses=1]
-	%tmp2 = mul i64 %tmp1, %m		; <i64> [#uses=1]
-	%tmp3 = mul i64 %tmp2, %o		; <i64> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb, %bb.nph
-	%i.01 = phi i64 [ %tmp3, %bb.nph ], [ %tmp13, %bb ]		; <i64> [#uses=3]
-	%tmp9 = getelementptr double* %p, i64 %i.01		; <double*> [#uses=1]
-	%tmp10 = load double* %tmp9, align 8		; <double> [#uses=1]
-	%tmp11 = fdiv double %tmp10, 2.100000e+00		; <double> [#uses=1]
-	store double %tmp11, double* %tmp9, align 8
-	%tmp13 = add i64 %i.01, 1		; <i64> [#uses=2]
-	%tmp14 = icmp slt i64 %tmp13, %n		; <i1> [#uses=1]
-	br i1 %tmp14, label %bb, label %return.loopexit
-
-return.loopexit:		; preds = %bb
-	br label %return
-
-return:		; preds = %return.loopexit, %entry
-	ret void
-}
-define void @bar(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
-entry:
-	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
-	br i1 %tmp, label %bb.nph, label %return
-
-bb.nph:		; preds = %entry
-	%tmp1 = mul i64 %n, %q		; <i64> [#uses=1]
-	%tmp2 = mul i64 %tmp1, %m		; <i64> [#uses=1]
-	%tmp3 = mul i64 %tmp2, %o		; <i64> [#uses=1]
-	br label %bb
-
-bb:		; preds = %bb, %bb.nph
-	%i.01 = phi i64 [ %tmp3, %bb.nph ], [ %tmp13, %bb ]		; <i64> [#uses=3]
-	%tmp9 = getelementptr double* %p, i64 %i.01		; <double*> [#uses=1]
-	%tmp10 = load double* %tmp9, align 8		; <double> [#uses=1]
-	%tmp11 = fdiv double %tmp10, 2.100000e+00		; <double> [#uses=1]
-	store double %tmp11, double* %tmp9, align 8
-	%tmp13 = add i64 %i.01, 1		; <i64> [#uses=2]
-	%tmp14 = icmp slt i64 %tmp13, %n		; <i1> [#uses=1]
-	br i1 %tmp14, label %bb, label %return.loopexit
-
-return.loopexit:		; preds = %bb
-	br label %return
-
-return:		; preds = %return.loopexit, %entry
-	ret void
-}
diff --git a/test/Transforms/IndVarSimplify/iv-fold.ll b/test/Transforms/IndVarSimplify/iv-fold.ll
index 2e191184225c..e0b05cdb31f5 100644
--- a/test/Transforms/IndVarSimplify/iv-fold.ll
+++ b/test/Transforms/IndVarSimplify/iv-fold.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
 
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 646e6c00ce6f..2e0f70ce461a 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
 ; CHECK-NOT: and
 ; CHECK-NOT: zext
 
diff --git a/test/Transforms/IndVarSimplify/lftr-reuse.ll b/test/Transforms/IndVarSimplify/lftr-reuse.ll
index 490eee9c221d..9abfe13407fc 100644
--- a/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s
 ;
 ; Make sure that indvars can perform LFTR without a canonical IV.
 
diff --git a/test/Transforms/IndVarSimplify/lit.local.cfg b/test/Transforms/IndVarSimplify/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
index 269478a5ed03..c3619f640b33 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
@@ -1,8 +1,14 @@
 ; RUN: opt < %s -indvars -S \
 ; RUN:   | grep {%b.1 = phi i32 \\\[ 2, %bb \\\], \\\[ 1, %bb2 \\\]}
-
+;
 ; This loop has multiple exits, and the value of %b1 depends on which
 ; exit is taken. Indvars should correctly compute the exit values.
+;
+; XFAIL: *
+; Indvars does not currently replace loop invariant values unless all
+; loop exits have the same exit value. We could handle some cases,
+; such as this, by making getSCEVAtScope() sensitive to a particular
+; loop exit.  See PR11388.
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-pc-linux-gnu"
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate9.ll b/test/Transforms/IndVarSimplify/loop_evaluate9.ll
index 8184a73f89eb..9f3bcaf21bed 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate9.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate9.ll
@@ -2,8 +2,13 @@
 ; RUN: grep {\[%\]tmp7 = icmp eq i8 -28, -28} %t
 ; RUN: grep {\[%\]tmp8 = icmp eq i8 63, 63} %t
 ; PR4477
-
 ; Indvars should compute the exit values in loop.
+;
+; XFAIL: *
+; Indvars does not currently replace loop invariant values unless all
+; loop exits have the same exit value. We could handle some cases,
+; such as this, by making getSCEVAtScope() sensitive to a particular
+; loop exit.  See PR11388.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 9c2abd0f31c7..bfdd000e38eb 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s
 ;
 ; Make sure that indvars isn't inserting canonical IVs.
 ; This is kinda hard to do until linear function test replacement is removed.
@@ -333,9 +333,9 @@ entry:
 
 ; CHECK: loop:
 ; CHECK: phi %structIF*
-; CHECK: phi i32*
-; CHECK: getelementptr inbounds
+; CHECK-NOT: phi
 ; CHECK: getelementptr inbounds
+; CHECK-NOT: getelementptr
 ; CHECK: exit:
 loop:
   %ptr.iv = phi %structIF* [ %ptr.inc, %latch ], [ %base, %entry ]
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll b/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
deleted file mode 100644
index 251d34ec383a..000000000000
--- a/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK-NOT: {{inttoptr|ptrtoint}}
-; CHECK: scevgep
-; CHECK-NOT: {{inttoptr|ptrtoint}}
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n32:64"
-
-; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.
-
-define void @foo(i8* %p) nounwind {
-entry:
-  br i1 true, label %bb.nph, label %for.end
-
-for.cond:
-  %phitmp = icmp slt i64 %inc, 20
-  br i1 %phitmp, label %for.body, label %for.cond.for.end_crit_edge
-
-for.cond.for.end_crit_edge:
-  br label %for.end
-
-bb.nph:
-  br label %for.body
-
-for.body:
-  %storemerge1 = phi i64 [ %inc, %for.cond ], [ 0, %bb.nph ]
-  %call = tail call i64 @bar() nounwind
-  %call2 = tail call i64 @car() nounwind
-  %conv = trunc i64 %call2 to i8
-  %conv3 = sext i8 %conv to i64
-  %add = add nsw i64 %call, %storemerge1
-  %add4 = add nsw i64 %add, %conv3
-  %arrayidx = getelementptr inbounds i8* %p, i64 %add4
-  store i8 0, i8* %arrayidx
-  %inc = add nsw i64 %storemerge1, 1
-  br label %for.cond
-
-for.end:
-  ret void
-}
-
-declare i64 @bar()
-
-declare i64 @car()
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-nested.ll b/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
deleted file mode 100644
index cdcaaa0c9254..000000000000
--- a/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
+++ /dev/null
@@ -1,76 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; No explicit integer multiplications!
-; No i8* arithmetic or pointer casting anywhere!
-; CHECK-NOT: = {{= mul|i8\*|bitcast|inttoptr|ptrtoint}}
-; Exactly one getelementptr for each load+store.
-; Each getelementptr using %struct.Q* %s as a base and not i8*.
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK: getelementptr %struct.Q* %s,
-; CHECK-NOT: = {{= mul|i8\*|bitcast|inttoptr|ptrtoint}}
-
-; FIXME: This test should pass with or without TargetData. Until opt
-; supports running tests without targetdata, just hardware this in.
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
-
-%struct.Q = type { [10 x %struct.N] }
-%struct.N = type { %struct.S }
-%struct.S = type { [100 x double], [100 x double] }
-
-define void @foo(%struct.Q* %s, i64 %n) nounwind {
-entry:
-  br label %bb1
-
-bb1:
-  %i = phi i64 [ 2, %entry ], [ %i.next, %bb ]
-  %j = phi i64 [ 0, %entry ], [ %j.next, %bb ]
-  %t5 = icmp slt i64 %i, %n
-  br i1 %t5, label %bb, label %return
-
-bb:
-  %t0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %i
-  %t1 = load double* %t0, align 8
-  %t2 = fmul double %t1, 3.200000e+00
-  %t3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %i
-  store double %t2, double* %t3, align 8
-
-  %s0 = getelementptr inbounds %struct.Q* %s, i64 13, i32 0, i64 7, i32 0, i32 1, i64 %i
-  %s1 = load double* %s0, align 8
-  %s2 = fmul double %s1, 3.200000e+00
-  %s3 = getelementptr inbounds %struct.Q* %s, i64 13, i32 0, i64 7, i32 0, i32 1, i64 %i
-  store double %s2, double* %s3, align 8
-
-  %u0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 7, i32 0, i32 1, i64 %j
-  %u1 = load double* %u0, align 8
-  %u2 = fmul double %u1, 3.200000e+00
-  %u3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 7, i32 0, i32 1, i64 %j
-  store double %u2, double* %u3, align 8
-
-  %v0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 1, i64 %i
-  %v1 = load double* %v0, align 8
-  %v2 = fmul double %v1, 3.200000e+00
-  %v3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 1, i64 %i
-  store double %v2, double* %v3, align 8
-
-  %w0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %j
-  %w1 = load double* %w0, align 8
-  %w2 = fmul double %w1, 3.200000e+00
-  %w3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %j
-  store double %w2, double* %w3, align 8
-
-  %x0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 3, i32 0, i32 0, i64 %i
-  %x1 = load double* %x0, align 8
-  %x2 = fmul double %x1, 3.200000e+00
-  %x3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 3, i32 0, i32 0, i64 %i
-  store double %x2, double* %x3, align 8
-
-  %i.next = add i64 %i, 1
-  %j.next = add i64 %j, 1
-  br label %bb1
-
-return:
-  ret void
-}
diff --git a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll b/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
deleted file mode 100644
index 2f3100fcafbd..000000000000
--- a/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK: %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 2, i64 %0, i64 1
-target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n32:64"
-
-; Indvars shouldn't expand this to
-;   %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %tmp, i64 19
-; or something. That's valid, but more obscure.
-
-define void @foo([3 x [3 x double]]* noalias %p) nounwind {
-entry:
-  br label %loop
-
-loop:
-  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
-  %ip = add i64 %i, 1
-  %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 2, i64 %ip, i64 1
-  volatile store double 0.0, double* %p.2.ip.1
-  %i.next = add i64 %i, 1
-  br label %loop
-}
diff --git a/test/Transforms/IndVarSimplify/preserve-gep.ll b/test/Transforms/IndVarSimplify/preserve-gep.ll
deleted file mode 100644
index fec8a2895988..000000000000
--- a/test/Transforms/IndVarSimplify/preserve-gep.ll
+++ /dev/null
@@ -1,39 +0,0 @@
-; RUN: opt < %s -indvars -S -enable-iv-rewrite | FileCheck %s
-; CHECK-NOT: {{ptrtoint|inttoptr}}
-; CHECK: getelementptr
-; CHECK-NOT: {{ptrtoint|inttoptr|getelementptr}}
-
-; Indvars shouldn't leave getelementptrs expanded out as
-; inttoptr+ptrtoint in its output in common cases.
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n32:64"
-target triple = "x86_64-unknown-linux-gnu"
-	%struct.Foo = type { i32, i32, [10 x i32], i32 }
-
-define void @me(%struct.Foo* nocapture %Bar) nounwind {
-entry:
-	br i1 false, label %return, label %bb.nph
-
-bb.nph:		; preds = %entry
-	br label %bb
-
-bb:		; preds = %bb1, %bb.nph
-	%i.01 = phi i64 [ %4, %bb1 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
-	%0 = getelementptr %struct.Foo* %Bar, i64 %i.01, i32 2, i64 3		; <i32*> [#uses=1]
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
-	%2 = mul i32 %1, 113		; <i32> [#uses=1]
-	%3 = getelementptr %struct.Foo* %Bar, i64 %i.01, i32 2, i64 3		; <i32*> [#uses=1]
-	store i32 %2, i32* %3, align 4
-	%4 = add i64 %i.01, 1		; <i64> [#uses=2]
-	br label %bb1
-
-bb1:		; preds = %bb
-	%phitmp = icmp sgt i64 %4, 19999		; <i1> [#uses=1]
-	br i1 %phitmp, label %bb1.return_crit_edge, label %bb
-
-bb1.return_crit_edge:		; preds = %bb1
-	br label %return
-
-return:		; preds = %bb1.return_crit_edge, %entry
-	ret void
-}
diff --git a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
index 22e209283b76..f619e8d127ec 100644
--- a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
+++ b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -S | FileCheck %s
 
 ; Indvars should insert a 64-bit induction variable to eliminate the
 ; sext for the addressing, however it shouldn't eliminate the sext
diff --git a/test/Transforms/IndVarSimplify/sink-alloca.ll b/test/Transforms/IndVarSimplify/sink-alloca.ll
index 3a6c683e7cec..64207d823d2c 100644
--- a/test/Transforms/IndVarSimplify/sink-alloca.ll
+++ b/test/Transforms/IndVarSimplify/sink-alloca.ll
@@ -1,15 +1,10 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
-; PR4775
-
-; Indvars shouldn't sink the alloca out of the entry block, even though
-; it's not used until after the loop.
-
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin10.0"
 
-@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @main to i8*)],
-section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
-
+; PR4775
+; Indvars shouldn't sink the alloca out of the entry block, even though
+; it's not used until after the loop.
 define i32 @main() nounwind {
 ; CHECK: entry:
 ; CHECK-NEXT: %result.i = alloca i32, align 4
@@ -23,9 +18,39 @@ while.cond:                                       ; preds = %while.cond, %entry
   br i1 %tobool, label %while.end, label %while.cond
 
 while.end:                                        ; preds = %while.cond
-  volatile store i32 0, i32* %result.i
-  %tmp.i = volatile load i32* %result.i           ; <i32> [#uses=0]
+  store volatile i32 0, i32* %result.i
+  %tmp.i = load volatile i32* %result.i           ; <i32> [#uses=0]
   ret i32 0
 }
-
 declare i32 @bar()
+
+; <rdar://problem/10352360>
+; Indvars shouldn't sink the first alloca between the stacksave and stackrestore
+; intrinsics.
+declare i8* @a(...)
+declare i8* @llvm.stacksave() nounwind
+declare void @llvm.stackrestore(i8*) nounwind
+define void @h(i64 %n) nounwind uwtable ssp {
+; CHECK: entry:
+; CHECK-NEXT: %vla = alloca i8*
+; CHECK-NEXT: %savedstack = call i8* @llvm.stacksave()
+entry:
+  %vla = alloca i8*, i64 %n, align 16
+  %savedstack = call i8* @llvm.stacksave() nounwind
+  %vla.i = alloca i8*, i64 %n, align 16
+  br label %for.body.i
+
+for.body.i:
+  %indvars.iv37.i = phi i64 [ %indvars.iv.next38.i, %for.body.i ], [ 0, %entry ]
+  %call.i = call i8* (...)* @a() nounwind
+  %arrayidx.i = getelementptr inbounds i8** %vla.i, i64 %indvars.iv37.i
+  store i8* %call.i, i8** %arrayidx.i, align 8
+  %indvars.iv.next38.i = add i64 %indvars.iv37.i, 1
+  %exitcond5 = icmp eq i64 %indvars.iv.next38.i, %n
+  br i1 %exitcond5, label %g.exit, label %for.body.i
+
+g.exit:
+  call void @llvm.stackrestore(i8* %savedstack) nounwind
+  %call1 = call i8* (...)* @a(i8** %vla) nounwind
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll b/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
index fc906cdcfb15..fb9ef22549c0 100644
--- a/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
+++ b/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -indvars -instcombine -S | FileCheck %s
-; RUN: opt < %s -indvars -enable-iv-rewrite=false -instcombine -S | FileCheck %s
 ;
 ; Test that -indvars can reduce variable stride IVs.  If it can reduce variable
 ; stride iv's, it will make %iv. and %m.0.0 isomorphic to each other without
diff --git a/test/Transforms/Inline/2007-06-06-NoInline.ll b/test/Transforms/Inline/2007-06-06-NoInline.ll
deleted file mode 100644
index d5a7953ffb07..000000000000
--- a/test/Transforms/Inline/2007-06-06-NoInline.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: opt < %s -inline -S | grep "define internal i32 @bar"
-@llvm.noinline = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32)* @bar to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define internal i32 @bar(i32 %x, i32 %y) {
-entry:
-	%x_addr = alloca i32		; <i32*> [#uses=2]
-	%y_addr = alloca i32		; <i32*> [#uses=2]
-	%retval = alloca i32, align 4		; <i32*> [#uses=2]
-	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 %x, i32* %x_addr
-	store i32 %y, i32* %y_addr
-	%tmp1 = load i32* %x_addr		; <i32> [#uses=1]
-	%tmp2 = load i32* %y_addr		; <i32> [#uses=1]
-	%tmp3 = add i32 %tmp1, %tmp2		; <i32> [#uses=1]
-	store i32 %tmp3, i32* %tmp
-	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
-	store i32 %tmp4, i32* %retval
-	br label %return
-
-return:		; preds = %entry
-	%retval5 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval5
-}
-
-define i32 @foo(i32 %a, i32 %b) {
-entry:
-	%a_addr = alloca i32		; <i32*> [#uses=2]
-	%b_addr = alloca i32		; <i32*> [#uses=2]
-	%retval = alloca i32, align 4		; <i32*> [#uses=2]
-	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 %a, i32* %a_addr
-	store i32 %b, i32* %b_addr
-	%tmp1 = load i32* %b_addr		; <i32> [#uses=1]
-	%tmp2 = load i32* %a_addr		; <i32> [#uses=1]
-	%tmp3 = call i32 @bar( i32 %tmp1, i32 %tmp2 )		; <i32> [#uses=1]
-	store i32 %tmp3, i32* %tmp
-	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
-	store i32 %tmp4, i32* %retval
-	br label %return
-
-return:		; preds = %entry
-	%retval5 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval5
-}
diff --git a/test/Transforms/Inline/2008-09-02-AlwaysInline.ll b/test/Transforms/Inline/2008-09-02-AlwaysInline.ll
deleted file mode 100644
index 39095c407281..000000000000
--- a/test/Transforms/Inline/2008-09-02-AlwaysInline.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: opt < %s  -inline-threshold=0 -inline -S | not grep call 
-
-define i32 @fn2() alwaysinline {
-  ret i32 1
-}
-
-define i32 @fn3() {
-   %r = call i32 @fn2()
-   ret i32 %r
-}
diff --git a/test/Transforms/Inline/2008-10-30-AlwaysInline.ll b/test/Transforms/Inline/2008-10-30-AlwaysInline.ll
deleted file mode 100644
index 11e501274d3b..000000000000
--- a/test/Transforms/Inline/2008-10-30-AlwaysInline.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -always-inline -S | not grep call 
-
-; Ensure that threshold doesn't disrupt always inline.
-; RUN: opt < %s -inline-threshold=-2000000001 -always-inline -S | not grep call 
-
-
-define internal i32 @if0() alwaysinline {
-       ret i32 1 
-}
-
-define i32 @f0() {
-       %r = call i32 @if0()
-       ret i32 %r
-}
diff --git a/test/Transforms/Inline/2008-11-04-AlwaysInline.ll b/test/Transforms/Inline/2008-11-04-AlwaysInline.ll
deleted file mode 100644
index bc9787b82345..000000000000
--- a/test/Transforms/Inline/2008-11-04-AlwaysInline.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: opt < %s -always-inline -S | grep {@foo}
-; Ensure that foo is not removed by always inliner
-; PR 2945
-
-define internal i32 @foo() nounwind {
-  ret i32 0
-}
diff --git a/test/Transforms/Inline/alloca-bonus.ll b/test/Transforms/Inline/alloca-bonus.ll
new file mode 100644
index 000000000000..d04d54e3a538
--- /dev/null
+++ b/test/Transforms/Inline/alloca-bonus.ll
@@ -0,0 +1,155 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s
+
+target datalayout = "p:32:32"
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+
+@glbl = external global i32
+
+define void @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call void @inner1
+  %ptr = alloca i32
+  call void @inner1(i32* %ptr)
+  ret void
+}
+
+define void @inner1(i32 *%ptr) {
+  %A = load i32* %ptr
+  store i32 0, i32* %ptr
+  %C = getelementptr inbounds i32* %ptr, i32 0
+  %D = getelementptr inbounds i32* %ptr, i32 1
+  %E = bitcast i32* %ptr to i8*
+  %F = select i1 false, i32* %ptr, i32* @glbl
+  call void @llvm.lifetime.start(i64 0, i8* %E)
+  ret void
+}
+
+define void @outer2() {
+; CHECK: @outer2
+; CHECK: call void @inner2
+  %ptr = alloca i32
+  call void @inner2(i32* %ptr)
+  ret void
+}
+
+; %D poisons this call, scalar-repl can't handle that instruction.
+define void @inner2(i32 *%ptr) {
+  %A = load i32* %ptr
+  store i32 0, i32* %ptr
+  %C = getelementptr inbounds i32* %ptr, i32 0
+  %D = getelementptr inbounds i32* %ptr, i32 %A
+  %E = bitcast i32* %ptr to i8*
+  %F = select i1 false, i32* %ptr, i32* @glbl
+  call void @llvm.lifetime.start(i64 0, i8* %E)
+  ret void
+}
+
+define void @outer3() {
+; CHECK: @outer3
+; CHECK-NOT: call void @inner3
+  %ptr = alloca i32
+  call void @inner3(i32* %ptr, i1 undef)
+  ret void
+}
+
+define void @inner3(i32 *%ptr, i1 %x) {
+  %A = icmp eq i32* %ptr, null
+  %B = and i1 %x, %A
+  br i1 %A, label %bb.true, label %bb.false
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %t1 = load i32* %ptr
+  %t2 = add i32 %t1, 1
+  %t3 = add i32 %t2, 1
+  %t4 = add i32 %t3, 1
+  %t5 = add i32 %t4, 1
+  %t6 = add i32 %t5, 1
+  %t7 = add i32 %t6, 1
+  %t8 = add i32 %t7, 1
+  %t9 = add i32 %t8, 1
+  %t10 = add i32 %t9, 1
+  %t11 = add i32 %t10, 1
+  %t12 = add i32 %t11, 1
+  %t13 = add i32 %t12, 1
+  %t14 = add i32 %t13, 1
+  %t15 = add i32 %t14, 1
+  %t16 = add i32 %t15, 1
+  %t17 = add i32 %t16, 1
+  %t18 = add i32 %t17, 1
+  %t19 = add i32 %t18, 1
+  %t20 = add i32 %t19, 1
+  ret void
+bb.false:
+  ret void
+}
+
+define void @outer4(i32 %A) {
+; CHECK: @outer4
+; CHECK-NOT: call void @inner4
+  %ptr = alloca i32
+  call void @inner4(i32* %ptr, i32 %A)
+  ret void
+}
+
+; %B poisons this call, scalar-repl can't handle that instruction. However, we
+; still want to detect that the icmp and branch *can* be handled.
+define void @inner4(i32 *%ptr, i32 %A) {
+  %B = getelementptr inbounds i32* %ptr, i32 %A
+  %C = icmp eq i32* %ptr, null
+  br i1 %C, label %bb.true, label %bb.false
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %t1 = load i32* %ptr
+  %t2 = add i32 %t1, 1
+  %t3 = add i32 %t2, 1
+  %t4 = add i32 %t3, 1
+  %t5 = add i32 %t4, 1
+  %t6 = add i32 %t5, 1
+  %t7 = add i32 %t6, 1
+  %t8 = add i32 %t7, 1
+  %t9 = add i32 %t8, 1
+  %t10 = add i32 %t9, 1
+  %t11 = add i32 %t10, 1
+  %t12 = add i32 %t11, 1
+  %t13 = add i32 %t12, 1
+  %t14 = add i32 %t13, 1
+  %t15 = add i32 %t14, 1
+  %t16 = add i32 %t15, 1
+  %t17 = add i32 %t16, 1
+  %t18 = add i32 %t17, 1
+  %t19 = add i32 %t18, 1
+  %t20 = add i32 %t19, 1
+  ret void
+bb.false:
+  ret void
+}
+
+define void @outer5() {
+; CHECK: @outer5
+; CHECK-NOT: call void @inner5
+  %ptr = alloca i32
+  call void @inner5(i1 false, i32* %ptr)
+  ret void
+}
+
+; %D poisons this call, scalar-repl can't handle that instruction. However, if
+; the flag is set appropriately, the poisoning instruction is inside of dead
+; code, and so shouldn't be counted.
+define void @inner5(i1 %flag, i32 *%ptr) {
+  %A = load i32* %ptr
+  store i32 0, i32* %ptr
+  %C = getelementptr inbounds i32* %ptr, i32 0
+  br i1 %flag, label %if.then, label %exit
+
+if.then:
+  %D = getelementptr inbounds i32* %ptr, i32 %A
+  %E = bitcast i32* %ptr to i8*
+  %F = select i1 false, i32* %ptr, i32* @glbl
+  call void @llvm.lifetime.start(i64 0, i8* %E)
+  ret void
+
+exit:
+  ret void
+}
+
diff --git a/test/Transforms/Inline/always-inline.ll b/test/Transforms/Inline/always-inline.ll
new file mode 100644
index 000000000000..e0be41fa6657
--- /dev/null
+++ b/test/Transforms/Inline/always-inline.ll
@@ -0,0 +1,125 @@
+; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s
+;
+; Ensure the threshold has no impact on these decisions.
+; RUN: opt < %s -inline-threshold=20000000 -always-inline -S | FileCheck %s
+; RUN: opt < %s -inline-threshold=-20000000 -always-inline -S | FileCheck %s
+
+define i32 @inner1() alwaysinline {
+  ret i32 1
+}
+define i32 @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call
+; CHECK: ret
+
+   %r = call i32 @inner1()
+   ret i32 %r
+}
+
+; The always inliner can't DCE internal functions. PR2945
+; CHECK: @pr2945
+define internal i32 @pr2945() nounwind {
+  ret i32 0
+}
+
+define internal void @inner2(i32 %N) alwaysinline {
+  %P = alloca i32, i32 %N
+  ret void
+}
+define void @outer2(i32 %N) {
+; The always inliner (unlike the normal one) should be willing to inline
+; a function with a dynamic alloca into one without a dynamic alloca.
+; rdar://6655932
+;
+; CHECK: @outer2
+; CHECK-NOT: call void @inner2
+; CHECK alloca i32, i32 %N
+; CHECK-NOT: call void @inner2
+; CHECK: ret void
+
+  call void @inner2( i32 %N )
+  ret void
+}
+
+declare i32 @a() returns_twice
+declare i32 @b() returns_twice
+
+define i32 @inner3() alwaysinline {
+entry:
+  %call = call i32 @a() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+define i32 @outer3() {
+entry:
+; CHECK: @outer3
+; CHECK-NOT: call i32 @a
+; CHECK: ret
+
+  %call = call i32 @inner3()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @inner4() alwaysinline returns_twice {
+entry:
+  %call = call i32 @b() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @outer4() {
+entry:
+; CHECK: @outer4
+; CHECK: call i32 @b()
+; CHECK: ret
+
+  %call = call i32 @inner4() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @inner5(i8* %addr) alwaysinline {
+entry:
+  indirectbr i8* %addr, [ label %one, label %two ]
+
+one:
+  ret i32 42
+
+two:
+  ret i32 44
+}
+define i32 @outer5(i32 %x) {
+; CHECK: @outer5
+; CHECK: call i32 @inner5
+; CHECK: ret
+
+  %cmp = icmp slt i32 %x, 42
+  %addr = select i1 %cmp, i8* blockaddress(@inner5, %one), i8* blockaddress(@inner5, %two)
+  %call = call i32 @inner5(i8* %addr)
+  ret i32 %call
+}
+
+define void @inner6(i32 %x) alwaysinline {
+entry:
+  %icmp = icmp slt i32 %x, 0
+  br i1 %icmp, label %return, label %bb
+
+bb:
+  %sub = sub nsw i32 %x, 1
+  call void @inner6(i32 %sub)
+  ret void
+
+return:
+  ret void
+}
+define void @outer6() {
+; CHECK: @outer6
+; CHECK: call void @inner6(i32 42)
+; CHECK: ret
+
+entry:
+  call void @inner6(i32 42)
+  ret void
+}
+
diff --git a/test/Transforms/Inline/always_inline_dyn_alloca.ll b/test/Transforms/Inline/always_inline_dyn_alloca.ll
deleted file mode 100644
index 25cfc49f1a84..000000000000
--- a/test/Transforms/Inline/always_inline_dyn_alloca.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -inline -S | not grep callee
-; rdar://6655932
-
-; If callee is marked alwaysinline, inline it! Even if callee has dynamic
-; alloca and caller does not,
-
-define internal void @callee(i32 %N) alwaysinline {
-        %P = alloca i32, i32 %N
-        ret void
-}
-
-define void @foo(i32 %N) {
-        call void @callee( i32 %N )
-        ret void
-}
diff --git a/test/Transforms/Inline/blockaddress.ll b/test/Transforms/Inline/blockaddress.ll
new file mode 100644
index 000000000000..4206312d7743
--- /dev/null
+++ b/test/Transforms/Inline/blockaddress.ll
@@ -0,0 +1,27 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+; PR10162
+
+; Make sure the blockaddress is mapped correctly when doit is inlined
+; CHECK: store i8* blockaddress(@f, %here.i), i8** @ptr1, align 8
+
+@i = global i32 1, align 4
+@ptr1 = common global i8* null, align 8
+
+define void @doit(i8** nocapture %pptr, i32 %cond) nounwind uwtable {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.end, label %here
+
+here:
+  store i8* blockaddress(@doit, %here), i8** %pptr, align 8
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @f(i32 %cond) nounwind uwtable {
+entry:
+  call void @doit(i8** @ptr1, i32 %cond)
+  ret void
+}
diff --git a/test/Transforms/Inline/dg.exp b/test/Transforms/Inline/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/Inline/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Inline/dynamic_alloca_test.ll b/test/Transforms/Inline/dynamic_alloca_test.ll
index 0286535efec1..15a5c66815d2 100644
--- a/test/Transforms/Inline/dynamic_alloca_test.ll
+++ b/test/Transforms/Inline/dynamic_alloca_test.ll
@@ -3,33 +3,43 @@
 ; Functions with dynamic allocas can only be inlined into functions that
 ; already have dynamic allocas.
 
-; RUN: opt < %s -inline -S | \
-; RUN:   grep llvm.stacksave
-; RUN: opt < %s -inline -S | not grep callee
-
+; RUN: opt < %s -inline -S | FileCheck %s
+;
+; FIXME: This test is xfailed because the inline cost rewrite disabled *all*
+; inlining of functions which contain a dynamic alloca. It should be re-enabled
+; once that functionality is restored.
+; XFAIL: *
 
 declare void @ext(i32*)
 
 define internal void @callee(i32 %N) {
-        %P = alloca i32, i32 %N         ; <i32*> [#uses=1]
-        call void @ext( i32* %P )
-        ret void
+  %P = alloca i32, i32 %N
+  call void @ext(i32* %P)
+  ret void
 }
 
 define void @foo(i32 %N) {
-; <label>:0
-        %P = alloca i32, i32 %N         ; <i32*> [#uses=1]
-        call void @ext( i32* %P )
-        br label %Loop
-
-Loop:           ; preds = %Loop, %0
-        %count = phi i32 [ 0, %0 ], [ %next, %Loop ]            ; <i32> [#uses=2]
-        %next = add i32 %count, 1               ; <i32> [#uses=1]
-        call void @callee( i32 %N )
-        %cond = icmp eq i32 %count, 100000              ; <i1> [#uses=1]
-        br i1 %cond, label %out, label %Loop
-
-out:            ; preds = %Loop
-        ret void
+; CHECK: @foo
+; CHECK: alloca i32, i32 %{{.*}}
+; CHECK: call i8* @llvm.stacksave()
+; CHECK: alloca i32, i32 %{{.*}}
+; CHECK: call void @ext
+; CHECK: call void @llvm.stackrestore
+; CHECK: ret
+
+entry:
+  %P = alloca i32, i32 %N
+  call void @ext(i32* %P)
+  br label %loop
+
+loop:
+  %count = phi i32 [ 0, %entry ], [ %next, %loop ]
+  %next = add i32 %count, 1
+  call void @callee(i32 %N)
+  %cond = icmp eq i32 %count, 100000
+  br i1 %cond, label %out, label %loop
+
+out:
+  ret void
 }
 
diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll
index 462c29a85cef..1f34113a1809 100644
--- a/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/test/Transforms/Inline/inline-invoke-tail.ll
@@ -23,15 +23,11 @@ invcont:
 	ret i32 %retval
 
 lpad:
-	%eh_ptr = call i8* @llvm.eh.exception()
-	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
+        %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+            catch i8* null
 	unreachable
 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @__gxx_personality_v0(...)
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/Inline/inline_cleanup.ll b/test/Transforms/Inline/inline_cleanup.ll
index 4c6472194210..3898aa7044ac 100644
--- a/test/Transforms/Inline/inline_cleanup.ll
+++ b/test/Transforms/Inline/inline_cleanup.ll
@@ -1,10 +1,8 @@
 ; Test that the inliner doesn't leave around dead allocas, and that it folds
 ; uncond branches away after it is done specializing.
 
-; RUN: opt < %s -inline -S | \
-; RUN:    not grep {alloca.*uses=0}
-; RUN: opt < %s -inline -S | \
-; RUN:    not grep {br label}
+; RUN: opt < %s -inline -S | FileCheck %s
+
 @A = weak global i32 0		; <i32*> [#uses=1]
 @B = weak global i32 0		; <i32*> [#uses=1]
 @C = weak global i32 0		; <i32*> [#uses=1]
@@ -54,6 +52,18 @@ UnifiedReturnBlock:		; preds = %cond_next13
 declare void @ext(i32*)
 
 define void @test() {
+; CHECK: @test
+; CHECK-NOT: ret
+;
+; FIXME: This should be a CHECK-NOT, but currently we have a bug that causes us
+; to not nuke unused allocas.
+; CHECK: alloca
+; CHECK-NOT: ret
+;
+; No branches should survive the inliner's cleanup.
+; CHECK-NOT: br
+; CHECK: ret void
+
 entry:
 	tail call fastcc void @foo( i32 1 )
 	tail call fastcc void @foo( i32 2 )
@@ -61,3 +71,143 @@ entry:
 	tail call fastcc void @foo( i32 8 )
 	ret void
 }
+
+declare void @f(i32 %x)
+
+define void @inner2(i32 %x, i32 %y, i32 %z, i1 %b) {
+entry:
+  %cmp1 = icmp ne i32 %x, 0
+  br i1 %cmp1, label %then1, label %end1
+
+then1:
+  call void @f(i32 %x)
+  br label %end1
+
+end1:
+  %x2 = and i32 %x, %z
+  %cmp2 = icmp sgt i32 %x2, 1
+  br i1 %cmp2, label %then2, label %end2
+
+then2:
+  call void @f(i32 %x2)
+  br label %end2
+
+end2:
+  %y2 = or i32 %y, %z
+  %cmp3 = icmp sgt i32 %y2, 0
+  br i1 %cmp3, label %then3, label %end3
+
+then3:
+  call void @f(i32 %y2)
+  br label %end3
+
+end3:
+  br i1 %b, label %end3.1, label %end3.2
+
+end3.1:
+  %x3.1 = or i32 %x, 10
+  br label %end3.3
+
+end3.2:
+  %x3.2 = or i32 %x, 10
+  br label %end3.3
+
+end3.3:
+  %x3.3 = phi i32 [ %x3.1, %end3.1 ], [ %x3.2, %end3.2 ]
+  %cmp4 = icmp slt i32 %x3.3, 1
+  br i1 %cmp4, label %then4, label %end4
+
+then4:
+  call void @f(i32 %x3.3)
+  br label %end4
+
+end4:
+  ret void
+}
+
+define void @outer2(i32 %z, i1 %b) {
+; Ensure that after inlining, none of the blocks with a call to @f actually
+; make it through inlining.
+; CHECK: define void @outer2
+; CHECK-NOT: call
+; CHECK: ret void
+
+entry:
+  call void @inner2(i32 0, i32 -1, i32 %z, i1 %b)
+  ret void
+}
+
+define void @PR12470_inner(i16 signext %p1) nounwind uwtable {
+entry:
+  br i1 undef, label %cond.true, label %cond.false
+
+cond.true:
+  br label %cond.end
+
+cond.false:
+  %conv = sext i16 %p1 to i32
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ undef, %cond.true ], [ 0, %cond.false ]
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.end5, label %if.then
+
+if.then:
+  ret void
+
+if.end5:
+  ret void
+}
+
+define void @PR12470_outer() {
+; This previously crashed during inliner cleanup and folding inner return
+; instructions. Check that we don't crash and we produce a function with a single
+; return instruction due to merging the returns of the inlined function.
+; CHECK: define void @PR12470_outer
+; CHECK-NOT: call
+; CHECK: ret void
+; CHECK-NOT: ret void
+; CHECK: }
+
+entry:
+  call void @PR12470_inner(i16 signext 1)
+  ret void
+}
+
+define void @crasher_inner() nounwind uwtable {
+entry:
+  br i1 false, label %for.end28, label %for.body6
+
+for.body6:
+  br i1 undef, label %for.body6, label %for.cond12.for.inc26_crit_edge
+
+for.cond12.for.inc26_crit_edge:
+  br label %for.body6.1
+
+for.end28:
+  ret void
+
+for.body6.1:
+  br i1 undef, label %for.body6.1, label %for.cond12.for.inc26_crit_edge.1
+
+for.cond12.for.inc26_crit_edge.1:
+  br label %for.body6.2
+
+for.body6.2:
+  br i1 undef, label %for.body6.2, label %for.cond12.for.inc26_crit_edge.2
+
+for.cond12.for.inc26_crit_edge.2:
+  br label %for.end28
+}
+
+define void @crasher_outer() {
+; CHECK: @crasher_outer
+; CHECK-NOT: call
+; CHECK: ret void
+; CHECK-NOT: ret
+; CHECK: }
+entry:
+  tail call void @crasher_inner()
+  ret void
+}
diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll
index 537c69b305cb..dc35b60ba39c 100644
--- a/test/Transforms/Inline/inline_constprop.ll
+++ b/test/Transforms/Inline/inline_constprop.ll
@@ -1,14 +1,112 @@
-; RUN: opt < %s -inline -S | not grep callee
-; RUN: opt < %s -inline -S | not grep div
+; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s
 
+define internal i32 @callee1(i32 %A, i32 %B) {
+  %C = sdiv i32 %A, %B
+  ret i32 %C
+}
+
+define i32 @caller1() {
+; CHECK: define i32 @caller1
+; CHECK-NEXT: ret i32 3
+
+  %X = call i32 @callee1( i32 10, i32 3 )
+  ret i32 %X
+}
+
+define i32 @caller2() {
+; Check that we can constant-prop through instructions after inlining callee21
+; to get constants in the inlined callsite to callee22.
+; FIXME: Currently, the threshold is fixed at 20 because we don't perform
+; *recursive* cost analysis to realize that the nested call site will definitely
+; inline and be cheap. We should eventually do that and lower the threshold here
+; to 1.
+;
+; CHECK: @caller2
+; CHECK-NOT: call void @callee2
+; CHECK: ret
+
+  %x = call i32 @callee21(i32 42, i32 48)
+  ret i32 %x
+}
+
+define i32 @callee21(i32 %x, i32 %y) {
+  %sub = sub i32 %y, %x
+  %result = call i32 @callee22(i32 %sub)
+  ret i32 %result
+}
 
-define internal i32 @callee(i32 %A, i32 %B) {
-        %C = sdiv i32 %A, %B            ; <i32> [#uses=1]
-        ret i32 %C
+declare i8* @getptr()
+
+define i32 @callee22(i32 %x) {
+  %icmp = icmp ugt i32 %x, 42
+  br i1 %icmp, label %bb.true, label %bb.false
+bb.true:
+  ; This block musn't be counted in the inline cost.
+  %x1 = add i32 %x, 1
+  %x2 = add i32 %x1, 1
+  %x3 = add i32 %x2, 1
+  %x4 = add i32 %x3, 1
+  %x5 = add i32 %x4, 1
+  %x6 = add i32 %x5, 1
+  %x7 = add i32 %x6, 1
+  %x8 = add i32 %x7, 1
+
+  ret i32 %x8
+bb.false:
+  ret i32 %x
 }
 
-define i32 @test() {
-        %X = call i32 @callee( i32 10, i32 3 )          ; <i32> [#uses=1]
-        ret i32 %X
+define i32 @caller3() {
+; Check that even if the expensive path is hidden behind several basic blocks,
+; it doesn't count toward the inline cost when constant-prop proves those paths
+; dead.
+;
+; CHECK: @caller3
+; CHECK-NOT: call
+; CHECK: ret i32 6
+
+entry:
+  %x = call i32 @callee3(i32 42, i32 48)
+  ret i32 %x
 }
 
+define i32 @callee3(i32 %x, i32 %y) {
+  %sub = sub i32 %y, %x
+  %icmp = icmp ugt i32 %sub, 42
+  br i1 %icmp, label %bb.true, label %bb.false
+
+bb.true:
+  %icmp2 = icmp ult i32 %sub, 64
+  br i1 %icmp2, label %bb.true.true, label %bb.true.false
+
+bb.true.true:
+  ; This block musn't be counted in the inline cost.
+  %x1 = add i32 %x, 1
+  %x2 = add i32 %x1, 1
+  %x3 = add i32 %x2, 1
+  %x4 = add i32 %x3, 1
+  %x5 = add i32 %x4, 1
+  %x6 = add i32 %x5, 1
+  %x7 = add i32 %x6, 1
+  %x8 = add i32 %x7, 1
+  br label %bb.merge
+
+bb.true.false:
+  ; This block musn't be counted in the inline cost.
+  %y1 = add i32 %y, 1
+  %y2 = add i32 %y1, 1
+  %y3 = add i32 %y2, 1
+  %y4 = add i32 %y3, 1
+  %y5 = add i32 %y4, 1
+  %y6 = add i32 %y5, 1
+  %y7 = add i32 %y6, 1
+  %y8 = add i32 %y7, 1
+  br label %bb.merge
+
+bb.merge:
+  %result = phi i32 [ %x8, %bb.true.true ], [ %y8, %bb.true.false ]
+  ret i32 %result
+
+bb.false:
+  ret i32 %sub
+}
diff --git a/test/Transforms/Inline/inline_returns_twice.ll b/test/Transforms/Inline/inline_returns_twice.ll
new file mode 100644
index 000000000000..ab2e954af151
--- /dev/null
+++ b/test/Transforms/Inline/inline_returns_twice.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; Check that functions with "returns_twice" calls are only inlined,
+; if they are themselve marked as such.
+
+declare i32 @a() returns_twice
+declare i32 @b() returns_twice
+
+define i32 @f() {
+entry:
+  %call = call i32 @a() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @g() {
+entry:
+; CHECK: define i32 @g
+; CHECK: call i32 @f()
+; CHECK-NOT: call i32 @a()
+  %call = call i32 @f()
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @h() returns_twice {
+entry:
+  %call = call i32 @b() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
+
+define i32 @i() {
+entry:
+; CHECK: define i32 @i
+; CHECK: call i32 @b()
+; CHECK-NOT: call i32 @h()
+  %call = call i32 @h() returns_twice
+  %add = add nsw i32 1, %call
+  ret i32 %add
+}
diff --git a/test/Transforms/Inline/lit.local.cfg b/test/Transforms/Inline/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/Inline/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll
index 1d5ebbbf0fa9..6cde0e27fd1e 100644
--- a/test/Transforms/Inline/noinline-recursive-fn.ll
+++ b/test/Transforms/Inline/noinline-recursive-fn.ll
@@ -17,7 +17,7 @@ entry:
 bb:                                               ; preds = %entry
   %1 = sub nsw i32 %x, 1                          ; <i32> [#uses=1]
   call void @foo(i32 %1) nounwind ssp
-  volatile store i32 1, i32* @g, align 4
+  store volatile i32 1, i32* @g, align 4
   ret void
 
 return:                                           ; preds = %entry
@@ -42,7 +42,7 @@ entry:
   %0 = bitcast i8* %Bar to void (i32, i8*, i8*)*
   %1 = sub nsw i32 %x, 1
   call void %0(i32 %1, i8* %Foo, i8* %Bar) nounwind
-  volatile store i32 42, i32* @g, align 4
+  store volatile i32 42, i32* @g, align 4
   ret void
 }
 
@@ -54,7 +54,7 @@ entry:
 bb:                                               ; preds = %entry
   %1 = bitcast i8* %Foo to void (i32, i8*, i8*)*  ; <void (i32, i8*, i8*)*> [#uses=1]
   call void %1(i32 %x, i8* %Foo, i8* %Bar) nounwind
-  volatile store i32 13, i32* @g, align 4
+  store volatile i32 13, i32* @g, align 4
   ret void
 
 return:                                           ; preds = %entry
@@ -71,3 +71,40 @@ entry:
   call void @f2(i32 123, i8* bitcast (void (i32, i8*, i8*)* @f1 to i8*), i8* bitcast (void (i32, i8*, i8*)* @f2 to i8*)) nounwind ssp
   ret void
 }
+
+
+; Check that a recursive function, when called with a constant that makes the
+; recursive path dead code can actually be inlined.
+define i32 @fib(i32 %i) {
+entry:
+  %is.zero = icmp eq i32 %i, 0
+  br i1 %is.zero, label %zero.then, label %zero.else
+
+zero.then:
+  ret i32 0
+
+zero.else:
+  %is.one = icmp eq i32 %i, 1
+  br i1 %is.one, label %one.then, label %one.else
+
+one.then:
+  ret i32 1
+
+one.else:
+  %i1 = sub i32 %i, 1
+  %f1 = call i32 @fib(i32 %i1)
+  %i2 = sub i32 %i, 2
+  %f2 = call i32 @fib(i32 %i2)
+  %f = add i32 %f1, %f2
+  ret i32 %f
+}
+
+define i32 @fib_caller() {
+; CHECK: @fib_caller
+; CHECK-NOT: call
+; CHECK: ret
+  %f1 = call i32 @fib(i32 0)
+  %f2 = call i32 @fib(i32 1)
+  %result = add i32 %f1, %f2
+  ret i32 %result
+}
diff --git a/test/Transforms/Inline/ptr-diff.ll b/test/Transforms/Inline/ptr-diff.ll
new file mode 100644
index 000000000000..60fc3e2a3326
--- /dev/null
+++ b/test/Transforms/Inline/ptr-diff.ll
@@ -0,0 +1,58 @@
+; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s
+
+target datalayout = "p:32:32"
+
+define i32 @outer1() {
+; CHECK: @outer1
+; CHECK-NOT: call
+; CHECK: ret i32
+
+  %ptr = alloca i32
+  %ptr1 = getelementptr inbounds i32* %ptr, i32 0
+  %ptr2 = getelementptr inbounds i32* %ptr, i32 42
+  %result = call i32 @inner1(i32* %ptr1, i32* %ptr2)
+  ret i32 %result
+}
+
+define i32 @inner1(i32* %begin, i32* %end) {
+  %begin.i = ptrtoint i32* %begin to i32
+  %end.i = ptrtoint i32* %end to i32
+  %distance = sub i32 %end.i, %begin.i
+  %icmp = icmp sle i32 %distance, 42
+  br i1 %icmp, label %then, label %else
+
+then:
+  ret i32 3
+
+else:
+  %t = load i32* %begin
+  ret i32 %t
+}
+
+define i32 @outer2(i32* %ptr) {
+; Test that an inbounds GEP disables this -- it isn't safe in general as
+; wrapping changes the behavior of lessthan and greaterthan comparisions.
+; CHECK: @outer2
+; CHECK: call i32 @inner2
+; CHECK: ret i32
+
+  %ptr1 = getelementptr i32* %ptr, i32 0
+  %ptr2 = getelementptr i32* %ptr, i32 42
+  %result = call i32 @inner2(i32* %ptr1, i32* %ptr2)
+  ret i32 %result
+}
+
+define i32 @inner2(i32* %begin, i32* %end) {
+  %begin.i = ptrtoint i32* %begin to i32
+  %end.i = ptrtoint i32* %end to i32
+  %distance = sub i32 %end.i, %begin.i
+  %icmp = icmp sle i32 %distance, 42
+  br i1 %icmp, label %then, label %else
+
+then:
+  ret i32 3
+
+else:
+  %t = load i32* %begin
+  ret i32 %t
+}
diff --git a/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll b/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
index 32979191f853..7f7390809c77 100644
--- a/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
+++ b/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
@@ -2,6 +2,6 @@
 
 define void @test(i32* %P) {
         ; Dead but not deletable!
-        %X = volatile load i32* %P              ; <i32> [#uses=0]
+        %X = load volatile i32* %P              ; <i32> [#uses=0]
         ret void
 }
diff --git a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll b/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
deleted file mode 100644
index 6190aa92805b..000000000000
--- a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep {call i32 @f}
-
-	%struct.FRAME.nest = type { i32, i32 (i32)* }
-	%struct.__builtin_trampoline = type { [10 x i8] }
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
-
-declare i32 @f(%struct.FRAME.nest* nest , i32 )
-
-define i32 @nest(i32 %n) {
-entry:
-	%FRAME.0 = alloca %struct.FRAME.nest, align 8		; <%struct.FRAME.nest*> [#uses=3]
-	%TRAMP.216 = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
-	%TRAMP.216.sub = getelementptr [10 x i8]* %TRAMP.216, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 %n, i32* %tmp3, align 8
-	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest* , i32)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32)**> [#uses=1]
-	%tmp89 = bitcast i8* %tramp to i32 (i32)*		; <i32 (i32)*> [#uses=2]
-	store i32 (i32)* %tmp89, i32 (i32)** %tmp7, align 8
-	%tmp2.i = call i32 %tmp89( i32 1 )		; <i32> [#uses=1]
-	ret i32 %tmp2.i
-}
diff --git a/test/Transforms/InstCombine/2007-10-28-stacksave.ll b/test/Transforms/InstCombine/2007-10-28-stacksave.ll
index 76bceb6879bb..4c5c367bcfae 100644
--- a/test/Transforms/InstCombine/2007-10-28-stacksave.ll
+++ b/test/Transforms/InstCombine/2007-10-28-stacksave.ll
@@ -26,7 +26,7 @@ lab:		; preds = %cleanup31, %entry
 	%tmp21 = getelementptr i32* %tmp1819, i32 0		; <i32*> [#uses=1]
 	store i32 1, i32* %tmp21, align 4
 	%tmp2223 = bitcast i32* %tmp1819 to i8*		; <i8*> [#uses=1]
-	volatile store i8* %tmp2223, i8** @p, align 4
+	store volatile i8* %tmp2223, i8** @p, align 4
 	%tmp25 = add i32 %n.0, 1		; <i32> [#uses=2]
 	%tmp27 = icmp sle i32 %tmp25, 999999		; <i1> [#uses=1]
 	%tmp2728 = zext i1 %tmp27 to i8		; <i8> [#uses=1]
diff --git a/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll b/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
deleted file mode 100644
index 6401dfd0c11e..000000000000
--- a/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -instcombine -disable-output
-
-	%struct.FRAME.nest = type { i32, i32 (i32*)* }
-	%struct.__builtin_trampoline = type { [10 x i8] }
-
-declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
-
-declare i32 @f(%struct.FRAME.nest* nest , i32*)
-
-define i32 @nest(i32 %n) {
-entry:
-	%FRAME.0 = alloca %struct.FRAME.nest, align 8		; <%struct.FRAME.nest*> [#uses=3]
-	%TRAMP.216 = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
-	%TRAMP.216.sub = getelementptr [10 x i8]* %TRAMP.216, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 %n, i32* %tmp3, align 8
-	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, i32*)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32*)**> [#uses=1]
-	%tmp89 = bitcast i8* %tramp to i32 (i32*)*		; <i32 (i32*)*> [#uses=2]
-	store i32 (i32*)* %tmp89, i32 (i32*)** %tmp7, align 8
-	%tmp2.i = call i32 %tmp89( i32* nest  null )		; <i32> [#uses=1]
-	ret i32 %tmp2.i
-}
diff --git a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
index 6847f5ed0534..de08c32fb40e 100644
--- a/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
+++ b/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
@@ -2,7 +2,7 @@
 
 define void @test() {
 	%votf = alloca <4 x float>		; <<4 x float>*> [#uses=1]
-	volatile store <4 x float> zeroinitializer, <4 x float>* %votf, align 16
+	store volatile <4 x float> zeroinitializer, <4 x float>* %votf, align 16
 	ret void
 }
 
diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
index a24f3071c9ac..1286e3d63b27 100644
--- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
+++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
@@ -6,17 +6,17 @@ target triple = "i386-apple-darwin8"
 define i32 @main() nounwind  {
 entry:
 	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
-	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb
 
 bb:		; preds = %bb, %entry
 	%b.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
 	%tmp3.reg2mem.0 = phi i32 [ %tmp34, %entry ], [ %tmp3, %bb ]		; <i32> [#uses=1]
 	%tmp4 = add i32 %tmp3.reg2mem.0, 5		; <i32> [#uses=1]
-	volatile store i32 %tmp4, i32* @g_1, align 4
+	store volatile i32 %tmp4, i32* @g_1, align 4
 	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
-	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp9, label %bb, label %bb11
 
 bb11:		; preds = %bb
diff --git a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
index 5fb11ffb32da..ebbd3a743f1c 100644
--- a/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
+++ b/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
@@ -7,11 +7,11 @@ target triple = "i386-apple-darwin8"
 define i32 @main(i32 %i) nounwind  {
 entry:
 	%tmp93 = icmp slt i32 %i, 10		; <i1> [#uses=0]
-	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp93, label %bb11, label %bb
 
 bb:		; preds = %bb, %entry
-	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb11
 
 bb11:		; preds = %bb
diff --git a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
index 830783455189..4f4709b6f27a 100644
--- a/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
+++ b/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
@@ -10,7 +10,7 @@ entry:
 	%tmp2752 = alloca i32		; <i32*> [#uses=2]
 	%tmpcast53 = bitcast i32* %tmp2752 to i8*		; <i8*> [#uses=1]
 	store i32 2, i32* %tmp2752, align 4
-	volatile store i8* %tmpcast53, i8** @p, align 4
+	store volatile i8* %tmpcast53, i8** @p, align 4
 	br label %bb44
 
 bb:		; preds = %bb44
@@ -29,7 +29,7 @@ bb44:		; preds = %bb44, %entry
 	store i32 1, i32* %tmp27, align 4
 	%tmp34 = getelementptr i32* %tmp27, i32 %tmp4		; <i32*> [#uses=1]
 	store i32 2, i32* %tmp34, align 4
-	volatile store i8* %tmpcast, i8** @p, align 4
+	store volatile i8* %tmpcast, i8** @p, align 4
 	%exitcond = icmp eq i32 %tmp3857, 999999		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb, label %bb44
 }
diff --git a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
index 81044083c602..1ed53237aab3 100644
--- a/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
+++ b/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
@@ -7,17 +7,17 @@ target triple = "i386-apple-darwin8"
 define i32 @main() nounwind  {
 entry:
 	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
-	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb
 
 bb:		; preds = %bb, %entry
 	%b.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
 	%tmp3.reg2mem.0 = phi i32 [ %tmp3, %bb ], [ %tmp34, %entry ]
 	%tmp4 = add i32 %tmp3.reg2mem.0, 5		; <i32> [#uses=1]
-	volatile store i32 %tmp4, i32* @g_1, align 4
+	store volatile i32 %tmp4, i32* @g_1, align 4
 	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
-	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp9, label %bb, label %bb11
 
 bb11:		; preds = %bb
diff --git a/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll b/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll
new file mode 100644
index 000000000000..abab9dc57029
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-01-11-OpaqueBitcastCrash.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -disable-output
+
+%opaque_struct = type opaque
+
+@G = external global [0 x %opaque_struct]
+
+declare void @foo(%opaque_struct*)
+
+define void @bar() {
+  call void @foo(%opaque_struct* bitcast ([0 x %opaque_struct]* @G to %opaque_struct*))
+  ret void
+}
diff --git a/test/Transforms/InstCombine/2012-02-13-FCmp.ll b/test/Transforms/InstCombine/2012-02-13-FCmp.ll
new file mode 100644
index 000000000000..39b05946515a
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-02-13-FCmp.ll
@@ -0,0 +1,35 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+; Radar 10803727
+@.str = private unnamed_addr constant [35 x i8] c"\0Ain_range input (should be 0): %f\0A\00", align 1
+@.str1 = external hidden unnamed_addr constant [35 x i8], align 1
+
+declare i32 @printf(i8*, ...)
+define i64 @_Z8tempCastj(i32 %val) uwtable ssp {
+entry:
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str1, i64 0, i64 0), i32 %val)
+  %conv = uitofp i32 %val to double
+  %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str, i64 0, i64 0), double %conv)
+  %cmp.i = fcmp oge double %conv, -1.000000e+00
+  br i1 %cmp.i, label %land.rhs.i, label %if.end.critedge
+; CHECK:  br i1 true, label %land.rhs.i, label %if.end.critedge
+
+land.rhs.i:                                       ; preds = %entry
+  %cmp1.i = fcmp olt double %conv, 1.000000e+00
+  br i1 %cmp1.i, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.rhs.i
+  %add = fadd double %conv, 5.000000e-01
+  %conv3 = fptosi double %add to i64
+  br label %return
+
+if.end.critedge:                                  ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.end.critedge, %land.rhs.i
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i64 [ %conv3, %if.then ], [ -1, %if.end ]
+  ret i64 %retval.0
+}
+
diff --git a/test/Transforms/InstCombine/2012-02-28-ICmp.ll b/test/Transforms/InstCombine/2012-02-28-ICmp.ll
new file mode 100644
index 000000000000..82cf85fa4cd8
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-02-28-ICmp.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; <rdar://problem/10803154>
+
+; There should be no transformation.
+; CHECK: %a = trunc i32 %x to i8
+; CHECK: %b = icmp ne i8 %a, 0
+; CHECK: %c = and i32 %x, 16711680
+; CHECK: %d = icmp ne i32 %c, 0
+; CHECK: %e = and i1 %b, %d
+; CHECK: ret i1 %e
+
+define i1 @f1(i32 %x) {
+  %a = trunc i32 %x to i8
+  %b = icmp ne i8 %a, 0
+  %c = and i32 %x, 16711680
+  %d = icmp ne i32 %c, 0
+  %e = and i1 %b, %d
+  ret i1 %e
+}
diff --git a/test/Transforms/InstCombine/2012-03-10-InstCombine.ll b/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
new file mode 100644
index 000000000000..58ccf12e6cf4
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-03-10-InstCombine.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+; Derived from gcc.c-torture/execute/frame-address.c
+
+; CHECK:     @func
+; CHECK:     return:
+; CHECK-NOT: ret i32 0
+; CHECK:     ret i32 %retval
+
+define i32 @func(i8* %c, i8* %f) nounwind uwtable readnone noinline ssp {
+entry:
+  %d = alloca i8, align 1
+  store i8 0, i8* %d, align 1
+  %cmp = icmp ugt i8* %d, %c
+  br i1 %cmp, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %cmp2 = icmp ule i8* %d, %f
+  %not.cmp1 = icmp uge i8* %c, %f
+  %.cmp2 = and i1 %cmp2, %not.cmp1
+  %land.ext = zext i1 %.cmp2 to i32
+  br label %return
+
+if.else:                                          ; preds = %entry
+  %cmp5 = icmp uge i8* %d, %f
+  %not.cmp3 = icmp ule i8* %c, %f
+  %.cmp5 = and i1 %cmp5, %not.cmp3
+  %land.ext7 = zext i1 %.cmp5 to i32
+  br label %return
+
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %land.ext, %if.then ], [ %land.ext7, %if.else ]
+  ret i32 %retval.0
+}
+
diff --git a/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll b/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
new file mode 100644
index 000000000000..c1602da4c84d
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-3-15-or-xor-constant.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR12234
+
+@g = extern_weak global i32
+define i32 @function(i32 %x) nounwind {
+entry:
+  %xor = xor i32 %x, 1
+  store volatile i32 %xor, i32* inttoptr (i64 1 to i32*), align 4
+  %or4 = or i32 or (i32 zext (i1 icmp eq (i32* @g, i32* null) to i32), i32 1), %xor
+  ret i32 %or4
+}
+; CHECK: define i32 @function
diff --git a/test/Transforms/InstCombine/LandingPadClauses.ll b/test/Transforms/InstCombine/LandingPadClauses.ll
index 055bdcc81b56..de3b2d34fb94 100644
--- a/test/Transforms/InstCombine/LandingPadClauses.ll
+++ b/test/Transforms/InstCombine/LandingPadClauses.ll
@@ -6,6 +6,7 @@
 
 declare i32 @generic_personality(i32, i64, i8*, i8*)
 declare i32 @__gxx_personality_v0(i32, i64, i8*, i8*)
+declare i32 @__objc_personality_v0(i32, i64, i8*, i8*)
 
 declare void @bar()
 
@@ -179,3 +180,54 @@ lpad.d:
 ; CHECK-NEXT: null
 ; CHECK-NEXT: unreachable
 }
+
+define void @foo_objc() {
+; CHECK: @foo_objc
+  invoke void @bar()
+    to label %cont.a unwind label %lpad.a
+cont.a:
+  invoke void @bar()
+    to label %cont.b unwind label %lpad.b
+cont.b:
+  invoke void @bar()
+    to label %cont.c unwind label %lpad.c
+cont.c:
+  invoke void @bar()
+    to label %cont.d unwind label %lpad.d
+cont.d:
+  ret void
+
+lpad.a:
+  %a = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__objc_personality_v0
+          catch i32* null
+          catch i32* @T1
+  unreachable
+; CHECK: %a = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+
+lpad.b:
+  %b = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__objc_personality_v0
+          filter [1 x i32*] zeroinitializer
+  unreachable
+; CHECK: %b = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.c:
+  %c = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__objc_personality_v0
+          filter [2 x i32*] [i32* @T1, i32* null]
+  unreachable
+; CHECK: %c = landingpad
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: unreachable
+
+lpad.d:
+  %d = landingpad { i8*, i32 } personality i32 (i32, i64, i8*, i8*)* @__objc_personality_v0
+          cleanup
+          catch i32* null
+  unreachable
+; CHECK: %d = landingpad
+; CHECK-NEXT: null
+; CHECK-NEXT: unreachable
+}
diff --git a/test/Transforms/InstCombine/align-external.ll b/test/Transforms/InstCombine/align-external.ll
index 6e8ad87f19e0..d4a5d429912b 100644
--- a/test/Transforms/InstCombine/align-external.ll
+++ b/test/Transforms/InstCombine/align-external.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-; Don't assume that external global variables have their preferred
-; alignment. They may only have the ABI minimum alignment.
+; Don't assume that external global variables or those with weak linkage have
+; their preferred alignment. They may only have the ABI minimum alignment.
 
 ; CHECK: %s = shl i64 %a, 3
 ; CHECK: %r = or i64 %s, ptrtoint (i32* @A to i64)
@@ -11,7 +11,7 @@
 target datalayout = "-i32:8:32"
 
 @A = external global i32
-@B = external global i32
+@B = weak_odr global i32 0
 
 define i64 @foo(i64 %a) {
   %t = ptrtoint i32* @A to i64
@@ -20,3 +20,10 @@ define i64 @foo(i64 %a) {
   %q = add i64 %r, 1
   ret i64 %q
 }
+
+define i32 @bar() {
+; CHECK: @bar
+  %r = load i32* @B, align 1
+; CHECK: align 1
+  ret i32 %r
+}
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index e4d136734546..ef7185cc81e0 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -44,3 +44,47 @@ define i32* @test4(i32 %n) {
   %A = alloca i32, i32 %n
   ret i32* %A
 }
+
+; Allocas which are only used by GEPs, bitcasts, and stores (transitively)
+; should be deleted.
+define void @test5() {
+; CHECK: @test5
+; CHECK-NOT: alloca
+; CHECK-NOT: store
+; CHECK: ret
+
+entry:
+  %a = alloca { i32 }
+  %b = alloca i32*
+  %a.1 = getelementptr { i32 }* %a, i32 0, i32 0
+  store i32 123, i32* %a.1
+  store i32* %a.1, i32** %b
+  %b.1 = bitcast i32** %b to i32*
+  store i32 123, i32* %b.1
+  %a.2 = getelementptr { i32 }* %a, i32 0, i32 0
+  store atomic i32 2, i32* %a.2 unordered, align 4
+  %a.3 = getelementptr { i32 }* %a, i32 0, i32 0
+  store atomic i32 3, i32* %a.3 release, align 4
+  %a.4 = getelementptr { i32 }* %a, i32 0, i32 0
+  store atomic i32 4, i32* %a.4 seq_cst, align 4
+  ret void
+}
+
+declare void @f(i32* %p)
+
+; Check that we don't delete allocas in some erroneous cases.
+define void @test6() {
+; CHECK: @test6
+; CHECK-NOT: ret
+; CHECK: alloca
+; CHECK-NEXT: alloca
+; CHECK: ret
+
+entry:
+  %a = alloca { i32 }
+  %b = alloca i32
+  %a.1 = getelementptr { i32 }* %a, i32 0, i32 0
+  store volatile i32 123, i32* %a.1
+  tail call void @f(i32* %b)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/apint-shl-trunc.ll b/test/Transforms/InstCombine/apint-shl-trunc.ll
index 8163e6d527d7..f2dc7d5130a9 100644
--- a/test/Transforms/InstCombine/apint-shl-trunc.ll
+++ b/test/Transforms/InstCombine/apint-shl-trunc.ll
@@ -1,13 +1,24 @@
-; RUN: opt < %s -instcombine -S | grep shl
-; END.
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i1 @test0(i39 %X, i39 %A) {
+; CHECK: @test0
+; CHECK: %[[V1:.*]] = shl i39 1, %A
+; CHECK: %[[V2:.*]] = and i39 %[[V1]], %X
+; CHECK: %[[V3:.*]] = icmp ne i39 %[[V2]], 0
+; CHECK: ret i1 %[[V3]]
+
 	%B = lshr i39 %X, %A
 	%D = trunc i39 %B to i1
 	ret i1 %D
 }
 
 define i1 @test1(i799 %X, i799 %A) {
+; CHECK: @test1
+; CHECK: %[[V1:.*]] = shl i799 1, %A
+; CHECK: %[[V2:.*]] = and i799 %[[V1]], %X
+; CHECK: %[[V3:.*]] = icmp ne i799 %[[V2]], 0
+; CHECK: ret i1 %[[V3]]
+
 	%B = lshr i799 %X, %A
 	%D = trunc i799 %B to i1
 	ret i1 %D
diff --git a/test/Transforms/InstCombine/bitcount.ll b/test/Transforms/InstCombine/bitcount.ll
index f75ca2df69d1..a6fd83742c28 100644
--- a/test/Transforms/InstCombine/bitcount.ll
+++ b/test/Transforms/InstCombine/bitcount.ll
@@ -4,13 +4,13 @@
 ; RUN:   grep -v declare | not grep llvm.ct
 
 declare i31 @llvm.ctpop.i31(i31 %val) 
-declare i32 @llvm.cttz.i32(i32 %val) 
-declare i33 @llvm.ctlz.i33(i33 %val) 
+declare i32 @llvm.cttz.i32(i32 %val, i1) 
+declare i33 @llvm.ctlz.i33(i33 %val, i1) 
 
 define i32 @test(i32 %A) {
   %c1 = call i31 @llvm.ctpop.i31(i31 12415124)
-  %c2 = call i32 @llvm.cttz.i32(i32 87359874)
-  %c3 = call i33 @llvm.ctlz.i33(i33 87359874)
+  %c2 = call i32 @llvm.cttz.i32(i32 87359874, i1 true)
+  %c3 = call i33 @llvm.ctlz.i33(i33 87359874, i1 true)
   %t1 = zext i31 %c1 to i32
   %t3 = trunc i33 %c3 to i32
   %r1 = add i32 %t1, %c2
diff --git a/test/Transforms/InstCombine/constant-fold-gep.ll b/test/Transforms/InstCombine/constant-fold-gep.ll
index c679226d4a9d..e5b16ea0ffdc 100644
--- a/test/Transforms/InstCombine/constant-fold-gep.ll
+++ b/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -9,7 +9,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 @Y = internal global [3 x %struct.X] zeroinitializer
 
 define void @frob() {
-; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 16
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 4
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
@@ -33,7 +33,7 @@ define void @frob() {
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 10), align 4
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 2), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 11), align 4
-; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 16
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 12), align 4
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 1), align 4
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 13), align 4
@@ -47,7 +47,7 @@ define void @frob() {
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 8
 ; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 0), align 8
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 18), align 8
-; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 8
+; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 16
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 36), align 8
 ; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 1), align 8
   store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 19), align 8
diff --git a/test/Transforms/InstCombine/crash.ll b/test/Transforms/InstCombine/crash.ll
index 54a77aab45f9..d5af5321dec3 100644
--- a/test/Transforms/InstCombine/crash.ll
+++ b/test/Transforms/InstCombine/crash.ll
@@ -165,20 +165,19 @@ entry:
   br i1 %tobool, label %cond.end, label %cond.false
 
 terminate.handler:                                ; preds = %ehcleanup
-  %exc = call i8* @llvm.eh.exception()            ; <i8*> [#uses=1]
-  %0 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1) ; <i32> [#uses=0]
+  %exc = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           cleanup
   call void @_ZSt9terminatev() noreturn nounwind
   unreachable
 
 ehcleanup:                                        ; preds = %cond.false
-  %exc1 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=2]
-  %1 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc1, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null) ; <i32> [#uses=0]
+  %exc1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+           catch i8* null
   invoke void @_ZN6UStackD1Ev(%class.UStack* %breaks)
           to label %cont unwind label %terminate.handler
 
 cont:                                             ; preds = %ehcleanup
-  call void @_Unwind_Resume_or_Rethrow(i8* %exc1)
-  unreachable
+  resume { i8*, i32 } %exc1
 
 cond.false:                                       ; preds = %entry
   %tmp4 = getelementptr inbounds %class.RuleBasedBreakIterator* %this, i32 0, i32 0 ; <i64 ()**> [#uses=1]
@@ -199,10 +198,6 @@ declare void @_ZN6UStackD1Ev(%class.UStack*)
 
 declare i32 @__gxx_personality_v0(...)
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare void @_ZSt9terminatev()
 
 declare void @_Unwind_Resume_or_Rethrow(i8*)
diff --git a/test/Transforms/InstCombine/dg.exp b/test/Transforms/InstCombine/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/InstCombine/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/InstCombine/extractvalue.ll b/test/Transforms/InstCombine/extractvalue.ll
index cf36b8f23717..5e4c67778224 100644
--- a/test/Transforms/InstCombine/extractvalue.ll
+++ b/test/Transforms/InstCombine/extractvalue.ll
@@ -88,7 +88,7 @@ define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) {
 ; CHECK-NEXT: ret
 define i32 @nogep-multiuse({i32, i32}* %pair) {
         ; The load should be left unchanged since both parts are needed.
-        %L = volatile load {i32, i32}* %pair
+        %L = load volatile {i32, i32}* %pair
         %LHS = extractvalue {i32, i32} %L, 0
         %RHS = extractvalue {i32, i32} %L, 1
         %R = add i32 %LHS, %RHS
@@ -100,8 +100,8 @@ define i32 @nogep-multiuse({i32, i32}* %pair) {
 ; CHECK-NEXT: extractvalue
 ; CHECK-NEXT: ret
 define i32 @nogep-volatile({i32, i32}* %pair) {
-        ; The volatile load should be left unchanged.
-        %L = volatile load {i32, i32}* %pair
+        ; The load volatile should be left unchanged.
+        %L = load volatile {i32, i32}* %pair
         %E = extractvalue {i32, i32} %L, 1
         ret i32 %E
 }
diff --git a/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll b/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll
new file mode 100644
index 000000000000..bd92b4a29c05
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-sqrt-sqrtf.ll
@@ -0,0 +1,17 @@
+; RUN: opt -instcombine -S -disable-simplify-libcalls < %s | FileCheck %s
+; rdar://10466410
+
+; Instcombine tries to fold (fptrunc (sqrt (fpext x))) -> (sqrtf x), but this
+; shouldn't fold when sqrtf isn't available.
+define float @foo(float %f) uwtable ssp {
+entry:
+; CHECK: %conv = fpext float %f to double
+; CHECK: %call = tail call double @sqrt(double %conv)
+; CHECK: %conv1 = fptrunc double %call to float
+  %conv = fpext float %f to double
+  %call = tail call double @sqrt(double %conv)
+  %conv1 = fptrunc double %call to float
+  ret float %conv1
+}
+
+declare double @sqrt(double)
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 77ca62cfec67..a9ae221d8f96 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -121,8 +121,8 @@ define i1 @test12(i1 %A) {
   %B = icmp ne i64 bitcast (<2 x i32> <i32 1, i32 -1> to i64), %S
   ret i1 %B
 ; CHECK: @test12
-; CHECK-NEXT: %B = select i1
-; CHECK-NEXT: ret i1 %B
+; CHECK-NEXT: = xor i1 %A, true
+; CHECK-NEXT: ret i1
 }
 
 ; PR6481
@@ -524,7 +524,7 @@ define i1 @test53(i32 %a, i32 %b) nounwind {
 
 ; CHECK: @test54
 ; CHECK-NEXT: %and = and i8 %a, -64
-; CHECK-NEXT icmp eq i8 %and, -128
+; CHECK-NEXT: icmp eq i8 %and, -128
 define i1 @test54(i8 %a) nounwind {
   %ext = zext i8 %a to i32
   %and = and i32 %ext, 192
@@ -559,3 +559,81 @@ define i1 @test57(i32 %a) {
   call void @foo(i32 %and)
   ret i1 %cmp
 }
+
+; rdar://problem/10482509
+; CHECK: @cmpabs1
+; CHECK-NEXT: icmp ne
+define zeroext i1 @cmpabs1(i64 %val) {
+  %sub = sub nsw i64 0, %val
+  %cmp = icmp slt i64 %val, 0
+  %sub.val = select i1 %cmp, i64 %sub, i64 %val
+  %tobool = icmp ne i64 %sub.val, 0
+  ret i1 %tobool
+}
+
+; CHECK: @cmpabs2
+; CHECK-NEXT: icmp ne
+define zeroext i1 @cmpabs2(i64 %val) {
+  %sub = sub nsw i64 0, %val
+  %cmp = icmp slt i64 %val, 0
+  %sub.val = select i1 %cmp, i64 %val, i64 %sub
+  %tobool = icmp ne i64 %sub.val, 0
+  ret i1 %tobool
+}
+
+; CHECK: @test58
+; CHECK-NEXT: call i32 @test58_d(i64 36029346783166592)
+define void @test58() nounwind {
+  %cast = bitcast <1 x i64> <i64 36029346783166592> to i64
+  %call = call i32 @test58_d( i64 %cast) nounwind
+  ret void
+}
+declare i32 @test58_d(i64)
+
+define i1 @test59(i8* %foo) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 2
+  %gep2 = getelementptr inbounds i8* %foo, i64 10
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  %use = ptrtoint i8* %cast1 to i64
+  %call = call i32 @test58_d(i64 %use) nounwind
+  ret i1 %cmp
+; CHECK: @test59
+; CHECK: ret i1 true
+}
+
+define i1 @test60(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; CHECK: @test60
+; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
+; CHECK-NEXT: icmp slt i64 %gep1.idx, %j
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test61(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr i32* %bit, i64 %i
+  %gep2 = getelementptr  i8* %foo, i64 %j
+  %cast1 = bitcast i32* %gep1 to i8*
+  %cmp = icmp ult i8* %cast1, %gep2
+  ret i1 %cmp
+; Don't transform non-inbounds GEPs.
+; CHECK: @test61
+; CHECK: icmp ult i8* %cast1, %gep2
+; CHECK-NEXT: ret i1
+}
+
+define i1 @test62(i8* %a) {
+  %arrayidx1 = getelementptr inbounds i8* %a, i64 1
+  %arrayidx2 = getelementptr inbounds i8* %a, i64 10
+  %cmp = icmp slt i8* %arrayidx1, %arrayidx2
+  ret i1 %cmp
+; CHECK: @test62
+; CHECK-NEXT: ret i1 true
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index f033e510368b..382e6b38574d 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -5,10 +5,10 @@
 declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
 declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
 declare double @llvm.powi.f64(double, i32) nounwind readonly
-declare i32 @llvm.cttz.i32(i32) nounwind readnone
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
-declare i8 @llvm.ctlz.i8(i8) nounwind readnone
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
 
 define i8 @uaddtest1(i8 %A, i8 %B) {
   %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
@@ -142,13 +142,13 @@ define i32 @umultest4(i32 %n) nounwind {
 define void @powi(double %V, double *%P) {
 entry:
   %A = tail call double @llvm.powi.f64(double %V, i32 -1) nounwind
-  volatile store double %A, double* %P
+  store volatile double %A, double* %P
 
   %B = tail call double @llvm.powi.f64(double %V, i32 0) nounwind
-  volatile store double %B, double* %P
+  store volatile double %B, double* %P
 
   %C = tail call double @llvm.powi.f64(double %V, i32 1) nounwind
-  volatile store double %C, double* %P
+  store volatile double %C, double* %P
   ret void
 ; CHECK: @powi
 ; CHECK: %A = fdiv double 1.0{{.*}}, %V
@@ -161,7 +161,7 @@ define i32 @cttz(i32 %a) {
 entry:
   %or = or i32 %a, 8
   %and = and i32 %or, -8
-  %count = tail call i32 @llvm.cttz.i32(i32 %and) nounwind readnone
+  %count = tail call i32 @llvm.cttz.i32(i32 %and, i1 true) nounwind readnone
   ret i32 %count
 ; CHECK: @cttz
 ; CHECK-NEXT: entry:
@@ -172,7 +172,7 @@ define i8 @ctlz(i8 %a) {
 entry:
   %or = or i8 %a, 32
   %and = and i8 %or, 63
-  %count = tail call i8 @llvm.ctlz.i8(i8 %and) nounwind readnone
+  %count = tail call i8 @llvm.ctlz.i8(i8 %and, i1 true) nounwind readnone
   ret i8 %count
 ; CHECK: @ctlz
 ; CHECK-NEXT: entry:
@@ -181,15 +181,15 @@ entry:
 
 define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
 entry:
-  %lz = tail call i32 @llvm.ctlz.i32(i32 %a) nounwind readnone
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %a, i1 false) nounwind readnone
   %lz.cmp = icmp eq i32 %lz, 32
-  volatile store i1 %lz.cmp, i1* %c
-  %tz = tail call i32 @llvm.cttz.i32(i32 %a) nounwind readnone
+  store volatile i1 %lz.cmp, i1* %c
+  %tz = tail call i32 @llvm.cttz.i32(i32 %a, i1 false) nounwind readnone
   %tz.cmp = icmp ne i32 %tz, 32
-  volatile store i1 %tz.cmp, i1* %c
+  store volatile i1 %tz.cmp, i1* %c
   %pop = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone
   %pop.cmp = icmp eq i32 %pop, 0
-  volatile store i1 %pop.cmp, i1* %c
+  store volatile i1 %pop.cmp, i1* %c
   ret void
 ; CHECK: @cmp.simplify
 ; CHECK-NEXT: entry:
@@ -201,16 +201,22 @@ entry:
 ; CHECK-NEXT: store volatile i1 %pop.cmp, i1* %c
 }
 
-
-define i32 @cttz_simplify1(i32 %x) nounwind readnone ssp {
-  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x)    ; <i32> [#uses=1]
-  %shr3 = lshr i32 %tmp1, 5                       ; <i32> [#uses=1]
+define i32 @cttz_simplify1a(i32 %x) nounwind readnone ssp {
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  %shr3 = lshr i32 %tmp1, 5
   ret i32 %shr3
-  
-; CHECK: @cttz_simplify1
+
+; CHECK: @cttz_simplify1a
 ; CHECK: icmp eq i32 %x, 0
-; CHECK-NEXT: zext i1 
+; CHECK-NEXT: zext i1
 ; CHECK-NEXT: ret i32
 }
 
+define i32 @cttz_simplify1b(i32 %x) nounwind readnone ssp {
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  %shr3 = lshr i32 %tmp1, 5
+  ret i32 %shr3
 
+; CHECK: @cttz_simplify1b
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/InstCombine/lit.local.cfg b/test/Transforms/InstCombine/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/InstCombine/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 53a56434aede..edb530585ce1 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -1,116 +1,184 @@
 ; This test makes sure that mul instructions are properly eliminated.
-; RUN: opt < %s -instcombine -S | not grep mul
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
+; CHECK: @test1
         %B = mul i32 %A, 1              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: ret i32 %A
 }
 
 define i32 @test2(i32 %A) {
+; CHECK: @test2
         ; Should convert to an add instruction
         %B = mul i32 %A, 2              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: shl i32 %A, 1
 }
 
 define i32 @test3(i32 %A) {
+; CHECK: @test3
         ; This should disappear entirely
         %B = mul i32 %A, 0              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: ret i32 0
 }
 
 define double @test4(double %A) {
+; CHECK: @test4
         ; This is safe for FP
         %B = fmul double 1.000000e+00, %A                ; <double> [#uses=1]
         ret double %B
+; CHECK: ret double %A
 }
 
 define i32 @test5(i32 %A) {
+; CHECK: @test5
         %B = mul i32 %A, 8              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: shl i32 %A, 3
 }
 
 define i8 @test6(i8 %A) {
+; CHECK: @test6
         %B = mul i8 %A, 8               ; <i8> [#uses=1]
         %C = mul i8 %B, 8               ; <i8> [#uses=1]
         ret i8 %C
+; CHECK: shl i8 %A, 6
 }
 
 define i32 @test7(i32 %i) {
+; CHECK: @test7
         %tmp = mul i32 %i, -1           ; <i32> [#uses=1]
         ret i32 %tmp
+; CHECK: sub i32 0, %i
 }
 
 define i64 @test8(i64 %i) {
-       ; tmp = sub 0, %i
+; CHECK: @test8
         %j = mul i64 %i, -1             ; <i64> [#uses=1]
         ret i64 %j
+; CHECK: sub i64 0, %i
 }
 
 define i32 @test9(i32 %i) {
-        ; %j = sub 0, %i
+; CHECK: @test9
         %j = mul i32 %i, -1             ; <i32> [#uses=1]
         ret i32 %j
+; CHECJ: sub i32 0, %i
 }
 
 define i32 @test10(i32 %a, i32 %b) {
+; CHECK: @test10
         %c = icmp slt i32 %a, 0         ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
        ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST10:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST10]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
 define i32 @test11(i32 %a, i32 %b) {
+; CHECK: @test11
         %c = icmp sle i32 %a, -1                ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST11:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST11]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
-define i32 @test12(i8 %a, i32 %b) {
-        %c = icmp ugt i8 %a, 127                ; <i1> [#uses=1]
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK: @test12
+        %c = icmp ugt i32 %a, 2147483647                ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
-        ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST12:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST12]], %b
+; CHECK-NEXT: ret i32 %e
+
 }
 
 ; PR2642
 define internal void @test13(<4 x float>*) {
+; CHECK: @test13
 	load <4 x float>* %0, align 1
 	fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
 	store <4 x float> %3, <4 x float>* %0, align 1
 	ret void
+; CHECK-NEXT: ret void
 }
 
 define <16 x i8> @test14(<16 x i8> %a) {
+; CHECK: @test14
         %b = mul <16 x i8> %a, zeroinitializer
         ret <16 x i8> %b
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
 }
 
 ; rdar://7293527
 define i32 @test15(i32 %A, i32 %B) {
+; CHECK: @test15
 entry:
   %shl = shl i32 1, %B
   %m = mul i32 %shl, %A
   ret i32 %m
+; CHECK: shl i32 %A, %B
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
 define i32 @test16(i32 %b, i1 %c) {
+; CHECK: @test16
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST16:%.*]] = sext i1 %c to i32
+; CHECK-NEXT: %e = and i32 [[TEST16]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
 define i32 @test17(i32 %a, i32 %b) {
+; CHECK: @test17
   %a.lobit = lshr i32 %a, 31
   %e = mul i32 %a.lobit, %b
   ret i32 %e
+; CHECK: [[TEST17:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST17]], %b
+; CHECK-NEXT: ret i32 %e
+}
+
+define i32 @test18(i32 %A, i32 %B) {
+; CHECK: @test18
+  %C = and i32 %A, 1
+  %D = and i32 %B, 1
+
+  %E = mul i32 %C, %D
+  %F = and i32 %E, 16
+  ret i32 %F
+; CHECK-NEXT: ret i32 0
 }
 
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+declare void @use(i1)
 
+define i32 @test19(i32 %A, i32 %B) {
+; CHECK: @test19
+  %C = and i32 %A, 1
+  %D = and i32 %B, 1
 
+; It would be nice if we also started proving that this doesn't overflow.
+  %E = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %C, i32 %D)
+  %F = extractvalue {i32, i1} %E, 0
+  %G = extractvalue {i32, i1} %E, 1
+  call void @use(i1 %G)
+  %H = and i32 %F, 16
+  ret i32 %H
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll
index 9123283988de..81ceef8c41cf 100644
--- a/test/Transforms/InstCombine/overflow.ll
+++ b/test/Transforms/InstCombine/overflow.ll
@@ -130,4 +130,26 @@ entry:
   ret i64 %Q
 }
 
+; CHECK: @test8
+; PR11438
+; This is @test1, but the operands are not sign-extended.  Make sure
+; we don't transform this case.
+define i32 @test8(i64 %a, i64 %b) nounwind ssp {
+entry:
+; CHECK-NOT: llvm.sadd
+; CHECK: add i64 %a, %b
+; CHECK-NOT: llvm.sadd
+; CHECK: ret
+  %add = add i64 %a, %b
+  %add.off = add i64 %add, 2147483648
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
 
+if.end:
+  %conv9 = trunc i64 %add to i32
+  ret i32 %conv9
+}
diff --git a/test/Transforms/InstCombine/pr12251.ll b/test/Transforms/InstCombine/pr12251.ll
new file mode 100644
index 000000000000..74a41eb7d227
--- /dev/null
+++ b/test/Transforms/InstCombine/pr12251.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define zeroext i1 @_Z3fooPb(i8* nocapture %x) {
+entry:
+  %a = load i8* %x, align 1, !range !0
+  %b = and i8 %a, 1
+  %tobool = icmp ne i8 %b, 0
+  ret i1 %tobool
+}
+
+; CHECK: %a = load i8* %x, align 1, !range !0
+; CHECK-NEXT: %tobool = icmp ne i8 %a, 0
+; CHECK-NEXT: ret i1 %tobool
+
+!0 = metadata !{i8 0, i8 2}
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 46615613eb9c..4baae2618dde 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -809,3 +809,23 @@ define i32 @test61(i32* %ptr) {
 ; CHECK: @test61
 ; CHECK: ret i32 10
 }
+
+define i1 @test62(i1 %A, i1 %B) {
+        %not = xor i1 %A, true
+        %C = select i1 %A, i1 %not, i1 %B             
+        ret i1 %C
+; CHECK: @test62
+; CHECK: %not = xor i1 %A, true
+; CHECK: %C = and i1 %not, %B
+; CHECK: ret i1 %C
+}
+
+define i1 @test63(i1 %A, i1 %B) {
+        %not = xor i1 %A, true
+        %C = select i1 %A, i1 %B, i1 %not         
+        ret i1 %C
+; CHECK: @test63
+; CHECK: %not = xor i1 %A, true
+; CHECK: %C = or i1 %B, %not
+; CHECK: ret i1 %C
+}
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
index f49a2efb39d8..f1987973f462 100644
--- a/test/Transforms/InstCombine/sext.ll
+++ b/test/Transforms/InstCombine/sext.ll
@@ -3,8 +3,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 declare i32 @llvm.ctpop.i32(i32)
-declare i32 @llvm.ctlz.i32(i32)
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
 
 define i64 @test1(i32 %x) {
   %t = call i32 @llvm.ctpop.i32(i32 %x)
@@ -16,7 +16,7 @@ define i64 @test1(i32 %x) {
 }
 
 define i64 @test2(i32 %x) {
-  %t = call i32 @llvm.ctlz.i32(i32 %x)
+  %t = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
   %s = sext i32 %t to i64
   ret i64 %s
 
@@ -25,7 +25,7 @@ define i64 @test2(i32 %x) {
 }
 
 define i64 @test3(i32 %x) {
-  %t = call i32 @llvm.cttz.i32(i32 %x)
+  %t = call i32 @llvm.cttz.i32(i32 %x, i1 true)
   %s = sext i32 %t to i64
   ret i64 %s
 
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 132d51a660bb..52310e34e09d 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -542,3 +542,75 @@ define i32 @test45(i32 %a) nounwind {
 ; CHECK-NEXT: %y = lshr i32 %a, 5
 ; CHECK-NEXT: ret i32 %y
 }
+
+define i32 @test46(i32 %a) {
+  %y = ashr exact i32 %a, 3
+  %z = shl i32 %y, 1
+  ret i32 %z
+; CHECK: @test46
+; CHECK-NEXT: %z = ashr exact i32 %a, 2
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test47(i32 %a) {
+  %y = lshr exact i32 %a, 3
+  %z = shl i32 %y, 1
+  ret i32 %z
+; CHECK: @test47
+; CHECK-NEXT: %z = lshr exact i32 %a, 2
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test48(i32 %x) {
+  %A = lshr exact i32 %x, 1
+  %B = shl i32 %A, 3
+  ret i32 %B
+; CHECK: @test48
+; CHECK-NEXT: %B = shl i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test49(i32 %x) {
+  %A = ashr exact i32 %x, 1
+  %B = shl i32 %A, 3
+  ret i32 %B
+; CHECK: @test49
+; CHECK-NEXT: %B = shl i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test50(i32 %x) {
+  %A = shl nsw i32 %x, 1
+  %B = ashr i32 %A, 3
+  ret i32 %B
+; CHECK: @test50
+; CHECK-NEXT: %B = ashr i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test51(i32 %x) {
+  %A = shl nuw i32 %x, 1
+  %B = lshr i32 %A, 3
+  ret i32 %B
+; CHECK: @test51
+; CHECK-NEXT: %B = lshr i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test52(i32 %x) {
+  %A = shl nsw i32 %x, 3
+  %B = ashr i32 %A, 1
+  ret i32 %B
+; CHECK: @test52
+; CHECK-NEXT: %B = shl nsw i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test53(i32 %x) {
+  %A = shl nuw i32 %x, 3
+  %B = lshr i32 %A, 1
+  ret i32 %B
+; CHECK: @test53
+; CHECK-NEXT: %B = shl nuw i32 %x, 2
+; CHECK-NEXT: ret i32 %B
+}
diff --git a/test/Transforms/InstCombine/sign-test-and-or.ll b/test/Transforms/InstCombine/sign-test-and-or.ll
index 47f5f3051e46..a6066d80020d 100644
--- a/test/Transforms/InstCombine/sign-test-and-or.ll
+++ b/test/Transforms/InstCombine/sign-test-and-or.ll
@@ -77,3 +77,103 @@ if.then:
 if.end:
   ret void
 }
+
+define void @test5(i32 %a) nounwind {
+  %and = and i32 %a, 134217728
+  %1 = icmp eq i32 %and, 0
+  %2 = icmp sgt i32 %a, -1
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test5
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test6(i32 %a) nounwind {
+  %1 = icmp sgt i32 %a, -1
+  %and = and i32 %a, 134217728
+  %2 = icmp eq i32 %and, 0
+  %or.cond = and i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test6
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test7(i32 %a) nounwind {
+  %and = and i32 %a, 134217728
+  %1 = icmp ne i32 %and, 0
+  %2 = icmp slt i32 %a, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test7
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.end, label %if.the
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test8(i32 %a) nounwind {
+  %1 = icmp slt i32 %a, 0
+  %and = and i32 %a, 134217728
+  %2 = icmp ne i32 %and, 0
+  %or.cond = or i1 %1, %2
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test8
+; CHECK-NEXT: %1 = and i32 %a, -2013265920
+; CHECK-NEXT: %2 = icmp eq i32 %1, 0
+; CHECK-NEXT: br i1 %2, label %if.end, label %if.the
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+define void @test9(i32 %a) nounwind {
+  %1 = and i32 %a, 1073741824
+  %2 = icmp ne i32 %1, 0
+  %3 = icmp sgt i32 %a, -1
+  %or.cond = and i1 %2, %3
+  br i1 %or.cond, label %if.then, label %if.end
+
+; CHECK: @test9
+; CHECK-NEXT: %1 = and i32 %a, -1073741824
+; CHECK-NEXT: %2 = icmp eq i32 %1, 1073741824
+; CHECK-NEXT: br i1 %2, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() nounwind
+  ret void
+
+if.end:
+  ret void
+}
diff --git a/test/Transforms/InstCombine/sub-xor.ll b/test/Transforms/InstCombine/sub-xor.ll
new file mode 100644
index 000000000000..279e4aca9de4
--- /dev/null
+++ b/test/Transforms/InstCombine/sub-xor.ll
@@ -0,0 +1,37 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define i32 @test1(i32 %x) nounwind {
+  %and = and i32 %x, 31
+  %sub = sub i32 63, %and
+  ret i32 %sub
+
+; CHECK: @test1
+; CHECK-NEXT: and i32 %x, 31
+; CHECK-NEXT: xor i32 %and, 63
+; CHECK-NEXT: ret
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+
+define i32 @test2(i32 %x) nounwind {
+  %count = tail call i32 @llvm.ctlz.i32(i32 %x, i1 true) nounwind readnone
+  %sub = sub i32 31, %count
+  ret i32 %sub
+
+; CHECK: @test2
+; CHECK-NEXT: ctlz
+; CHECK-NEXT: xor i32 %count, 31
+; CHECK-NEXT: ret
+}
+
+define i32 @test3(i32 %x) nounwind {
+  %and = and i32 %x, 31
+  %sub = xor i32 31, %and
+  %add = add i32 %sub, 42
+  ret i32 %add
+
+; CHECK: @test3
+; CHECK-NEXT: and i32 %x, 31
+; CHECK-NEXT: sub i32 73, %and
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 37de3281358e..b71ec8c98f83 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -301,3 +301,29 @@ define i32 @test28(i32 %x, i32 %y, i32 %z) {
 ; CHECK-NEXT: add i32
 ; CHECK-NEXT: ret i32
 }
+
+define i64 @test29(i8* %foo, i64 %i, i64 %j) {
+  %gep1 = getelementptr inbounds i8* %foo, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = ptrtoint i8* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+; CHECK: @test29
+; CHECK-NEXT: sub i64 %i, %j
+; CHECK-NEXT: ret i64
+}
+
+define i64 @test30(i8* %foo, i64 %i, i64 %j) {
+  %bit = bitcast i8* %foo to i32*
+  %gep1 = getelementptr inbounds i32* %bit, i64 %i
+  %gep2 = getelementptr inbounds i8* %foo, i64 %j
+  %cast1 = ptrtoint i32* %gep1 to i64
+  %cast2 = ptrtoint i8* %gep2 to i64
+  %sub = sub i64 %cast1, %cast2
+  ret i64 %sub
+; CHECK: @test30
+; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
+; CHECK-NEXT: sub i64 %gep1.idx, %j
+; CHECK-NEXT: ret i64
+}
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 896cb881760b..8f78c2e6bd50 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -98,6 +98,17 @@ define <4 x i8> @test9a(<16 x i8> %tmp6) nounwind {
 	ret <4 x i8> %tmp9
 }
 
+; Test fold of two shuffles where the first shuffle vectors inputs are a
+; different length then the second.
+define <4 x i8> @test9b(<4 x i8> %tmp6, <4 x i8> %tmp7) nounwind {
+; CHECK: @test9
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+  %tmp1 = shufflevector <4 x i8> %tmp6, <4 x i8> %tmp7, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 2, i32 3>		; <<4 x i8>> [#uses=1]
+  %tmp9 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>		; <<4 x i8>> [#uses=1]
+  ret <4 x i8> %tmp9
+}
+
 ; Redundant vector splats should be removed.  Radar 8597790.
 define <4 x i32> @test10(<4 x i32> %tmp5) nounwind {
 ; CHECK: @test10
@@ -107,3 +118,38 @@ define <4 x i32> @test10(<4 x i32> %tmp5) nounwind {
   %tmp7 = shufflevector <4 x i32> %tmp6, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %tmp7
 }
+
+; Test fold of two shuffles where the two shufflevector inputs's op1 are
+; the same
+define <8 x i8> @test11(<16 x i8> %tmp6) nounwind {
+; CHECK: @test11
+; CHECK-NEXT: shufflevector <16 x i8> %tmp6, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret
+  %tmp1 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>		; <<4 x i8>> [#uses=1]
+  %tmp2 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>		; <<4 x i8>> [#uses=1]
+  %tmp3 = shufflevector <4 x i8> %tmp1, <4 x i8> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>		; <<8 x i8>> [#uses=1]
+  ret <8 x i8> %tmp3
+}
+
+; Test fold of two shuffles where the first shufflevector's inputs are
+; the same as the second
+define <8 x i8> @test12(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
+; CHECK: @test12
+; CHECK-NEXT: shufflevector <8 x i8> %tmp6, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>
+; CHECK-NEXT: ret
+  %tmp1 = shufflevector <8 x i8> %tmp6, <8 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 undef, i32 7>	; <<8 x i8>> [#uses=1]
+  %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9, i32 8, i32 11, i32 12>		; <<8 x i8>> [#uses=1]
+  ret <8 x i8> %tmp3
+}
+
+; Test fold of two shuffles where the first shufflevector's inputs are
+; the same as the second
+define <8 x i8> @test12a(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
+; CHECK: @test12a
+; CHECK-NEXT: shufflevector <8 x i8> %tmp2, <8 x i8> %tmp6, <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>
+; CHECK-NEXT: ret
+  %tmp1 = shufflevector <8 x i8> %tmp6, <8 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 undef, i32 7>	; <<8 x i8>> [#uses=1]
+  %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp1, <8 x i32> <i32 0, i32 3, i32 1, i32 4, i32 8, i32 9, i32 10, i32 11>		; <<8 x i8>> [#uses=1]
+  ret <8 x i8> %tmp3
+}
+
diff --git a/test/Transforms/InstCombine/vector_gep1.ll b/test/Transforms/InstCombine/vector_gep1.ll
new file mode 100644
index 000000000000..652362299562
--- /dev/null
+++ b/test/Transforms/InstCombine/vector_gep1.ll
@@ -0,0 +1,37 @@
+; RUN: opt -instcombine %s -disable-output
+; RUN: opt -instsimplify %s -disable-output
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@G1 = global i8 zeroinitializer
+
+define <2 x i1> @test(<2 x i8*> %a, <2 x i8*> %b) {
+   %A = icmp eq <2 x i8*> %a, %b
+   ret <2 x i1> %A
+}
+
+define <2 x i1> @test2(<2 x i8*> %a) {
+  %A = inttoptr <2 x i32> <i32 1, i32 2> to <2 x i8*>
+  %B = icmp ult <2 x i8*> %A, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define <2 x i1> @test3(<2 x i8*> %a) {
+  %g = getelementptr <2 x i8*> %a, <2 x i32> <i32 1, i32 0>
+  %B = icmp ult <2 x i8*> %g, zeroinitializer
+  ret <2 x i1> %B
+}
+
+define <1 x i1> @test4(<1 x i8*> %a) {
+  %g = getelementptr <1 x i8*> %a, <1 x i32> <i32 1>
+  %B = icmp ult <1 x i8*> %g, zeroinitializer
+  ret <1 x i1> %B
+}
+
+define <2 x i1> @test5(<2 x i8*> %a) {
+  %w = getelementptr <2 x i8*> %a, <2 x i32> zeroinitializer
+  %e = getelementptr <2 x i8*> %w, <2 x i32> <i32 5, i32 9>
+  %g = getelementptr <2 x i8*> %e, <2 x i32> <i32 1, i32 0>
+  %B = icmp ult <2 x i8*> %g, zeroinitializer
+  ret <2 x i1> %B
+}
diff --git a/test/Transforms/InstCombine/volatile_store.ll b/test/Transforms/InstCombine/volatile_store.ll
index 0518e5aa0262..22566781180d 100644
--- a/test/Transforms/InstCombine/volatile_store.ll
+++ b/test/Transforms/InstCombine/volatile_store.ll
@@ -5,8 +5,8 @@
 
 define void @self_assign_1() {
 entry:
-	%tmp = volatile load i32* @x		; <i32> [#uses=1]
-	volatile store i32 %tmp, i32* @x
+	%tmp = load volatile i32* @x		; <i32> [#uses=1]
+	store volatile i32 %tmp, i32* @x
 	br label %return
 
 return:		; preds = %entry
diff --git a/test/Transforms/InstSimplify/2011-10-27-BinOpCrash.ll b/test/Transforms/InstSimplify/2011-10-27-BinOpCrash.ll
new file mode 100644
index 000000000000..a10081a42dd5
--- /dev/null
+++ b/test/Transforms/InstSimplify/2011-10-27-BinOpCrash.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine
+
+@_ZN11xercesc_2_5L11gDigitCharsE = external constant [32 x i16], align 2
+@_ZN11xercesc_2_5L10gBaseCharsE = external constant [354 x i16], align 2
+@_ZN11xercesc_2_5L17gIdeographicCharsE = external constant [7 x i16], align 2
+@_ZN11xercesc_2_5L15gCombiningCharsE = external constant [163 x i16], align 2
+
+define i32 @_ZN11xercesc_2_515XMLRangeFactory11buildRangesEv(i32 %x) {
+  %a = add i32 %x, add (i32 add (i32 ashr (i32 add (i32 mul (i32 ptrtoint ([32 x i16]* @_ZN11xercesc_2_5L11gDigitCharsE to i32), i32 -1), i32 ptrtoint (i16* getelementptr inbounds ([32 x i16]* @_ZN11xercesc_2_5L11gDigitCharsE, i32 0, i32 30) to i32)), i32 1), i32 ashr (i32 add (i32 mul (i32 ptrtoint ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE to i32), i32 -1), i32 ptrtoint (i16* getelementptr inbounds ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 4) to i32)), i32 1)), i32 8)
+  %b = add i32 %a, %x
+  ret i32 %b
+}
diff --git a/test/Transforms/InstSimplify/2011-11-23-MaskedBitsCrash.ll b/test/Transforms/InstSimplify/2011-11-23-MaskedBitsCrash.ll
new file mode 100644
index 000000000000..6166536726ae
--- /dev/null
+++ b/test/Transforms/InstSimplify/2011-11-23-MaskedBitsCrash.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instsimplify
+
+; The mul can be proved to always overflow (turning a negative value
+; into a positive one) and thus results in undefined behaviour.  At
+; the same time we were deducing from the nsw flag that that mul could
+; be assumed to have a negative value (since if not it has an undefined
+; value, which can be taken to be negative).  We were reporting the mul
+; as being both positive and negative, firing an assertion!
+define i1 @test1(i32 %a) {
+entry:
+  %0 = or i32 %a, 1
+  %1 = shl i32 %0, 31
+  %2 = mul nsw i32 %1, 4
+  %3 = and i32 %2, -4
+  %4 = icmp ne i32 %3, 0
+  ret i1 %4
+}
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
new file mode 100644
index 000000000000..33a4d6b02d63
--- /dev/null
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i64 @pow2(i32 %x) {
+; CHECK: @pow2
+  %negx = sub i32 0, %x
+  %x2 = and i32 %x, %negx
+  %e = zext i32 %x2 to i64
+  %nege = sub i64 0, %e
+  %e2 = and i64 %e, %nege
+  ret i64 %e2
+; CHECK: ret i64 %e
+}
+
+define i64 @pow2b(i32 %x) {
+; CHECK: @pow2b
+  %sh = shl i32 2, %x
+  %e = zext i32 %sh to i64
+  %nege = sub i64 0, %e
+  %e2 = and i64 %e, %nege
+  ret i64 %e2
+; CHECK: ret i64 %e
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 2cbd641a7426..ced74bd4be9b 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -10,6 +10,161 @@ define i1 @ptrtoint() {
 ; CHECK: ret i1 false
 }
 
+define i1 @bitcast() {
+; CHECK: @bitcast
+  %a = alloca i32
+  %b = alloca i64
+  %x = bitcast i32* %a to i8*
+  %y = bitcast i64* %b to i8*
+  %cmp = icmp eq i8* %x, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep() {
+; CHECK: @gep
+  %a = alloca [3 x i8], align 8
+  %x = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep2() {
+; CHECK: @gep2
+  %a = alloca [3 x i8], align 8
+  %x = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %y = getelementptr inbounds [3 x i8]* %a, i32 0, i32 0
+  %cmp = icmp eq i8* %x, %y
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 true
+}
+
+; PR11238
+%gept = type { i32, i32 }
+@gepy = global %gept zeroinitializer, align 8
+@gepz = extern_weak global %gept
+
+define i1 @gep3() {
+; CHECK: @gep3
+  %x = alloca %gept, align 8
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* %x, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep4() {
+; CHECK: @gep4
+  %x = alloca %gept, align 8
+  %a = getelementptr %gept* @gepy, i64 0, i32 0
+  %b = getelementptr %gept* @gepy, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep5() {
+; CHECK: @gep5
+  %x = alloca %gept, align 8
+  %a = getelementptr inbounds %gept* %x, i64 0, i32 1
+  %b = getelementptr %gept* @gepy, i64 0, i32 0
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep6(%gept* %x) {
+; Same as @gep3 but potentially null.
+; CHECK: @gep6
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* %x, i64 0, i32 1
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep7(%gept* %x) {
+; CHECK: @gep7
+  %a = getelementptr %gept* %x, i64 0, i32 0
+  %b = getelementptr %gept* @gepz, i64 0, i32 0
+  %equal = icmp eq i32* %a, %b
+  ret i1 %equal
+; CHECK: ret i1 %equal
+}
+
+define i1 @gep8(%gept* %x) {
+; CHECK: @gep8
+  %a = getelementptr %gept* %x, i32 1
+  %b = getelementptr %gept* %x, i32 -1
+  %equal = icmp ugt %gept* %a, %b
+  ret i1 %equal
+; CHECK: ret i1 %equal
+}
+
+define i1 @gep9(i8* %ptr) {
+; CHECK: @gep9
+; CHECK-NOT: ret
+; CHECK: ret i1 true
+
+entry:
+  %first1 = getelementptr inbounds i8* %ptr, i32 0
+  %first2 = getelementptr inbounds i8* %first1, i32 1
+  %first3 = getelementptr inbounds i8* %first2, i32 2
+  %first4 = getelementptr inbounds i8* %first3, i32 4
+  %last1 = getelementptr inbounds i8* %first2, i32 48
+  %last2 = getelementptr inbounds i8* %last1, i32 8
+  %last3 = getelementptr inbounds i8* %last2, i32 -4
+  %last4 = getelementptr inbounds i8* %last3, i32 -4
+  %first.int = ptrtoint i8* %first4 to i32
+  %last.int = ptrtoint i8* %last4 to i32
+  %cmp = icmp ne i32 %last.int, %first.int
+  ret i1 %cmp
+}
+
+define i1 @gep10(i8* %ptr) {
+; CHECK: @gep10
+; CHECK-NOT: ret
+; CHECK: ret i1 true
+
+entry:
+  %first1 = getelementptr inbounds i8* %ptr, i32 -2
+  %first2 = getelementptr inbounds i8* %first1, i32 44
+  %last1 = getelementptr inbounds i8* %ptr, i32 48
+  %last2 = getelementptr inbounds i8* %last1, i32 -6
+  %first.int = ptrtoint i8* %first2 to i32
+  %last.int = ptrtoint i8* %last2 to i32
+  %cmp = icmp eq i32 %last.int, %first.int
+  ret i1 %cmp
+}
+
+define i1 @gep11(i8* %ptr) {
+; CHECK: @gep11
+; CHECK-NOT: ret
+; CHECK: ret i1 true
+
+entry:
+  %first1 = getelementptr inbounds i8* %ptr, i32 -2
+  %last1 = getelementptr inbounds i8* %ptr, i32 48
+  %last2 = getelementptr inbounds i8* %last1, i32 -6
+  %cmp = icmp ult i8* %first1, %last2
+  ret i1 %cmp
+}
+
+define i1 @gep12(i8* %ptr) {
+; CHECK: @gep12
+; CHECK-NOT: ret
+; CHECK: ret i1 %cmp
+
+entry:
+  %first1 = getelementptr inbounds i8* %ptr, i32 -2
+  %last1 = getelementptr inbounds i8* %ptr, i32 48
+  %last2 = getelementptr inbounds i8* %last1, i32 -6
+  %cmp = icmp slt i8* %first1, %last2
+  ret i1 %cmp
+}
+
 define i1 @zext(i32 %x) {
 ; CHECK: @zext
   %e1 = zext i32 %x to i64
@@ -204,6 +359,24 @@ define i1 @select4(i1 %cond) {
 ; CHECK: ret i1 %cond
 }
 
+define i1 @select5(i32 %x) {
+; CHECK: @select5
+  %c = icmp eq i32 %x, 0
+  %s = select i1 %c, i32 1, i32 %x
+  %c2 = icmp eq i32 %s, 0
+  ret i1 %c2
+; CHECK: ret i1 false
+}
+
+define i1 @select6(i32 %x) {
+; CHECK: @select6
+  %c = icmp sgt i32 %x, 0
+  %s = select i1 %c, i32 %x, i32 4
+  %c2 = icmp eq i32 %s, 0
+  ret i1 %c2
+; CHECK: ret i1 %c2
+}
+
 define i1 @urem1(i32 %X, i32 %Y) {
 ; CHECK: @urem1
   %A = urem i32 %X, %Y
@@ -300,6 +473,40 @@ define i1 @udiv2(i32 %X, i32 %Y, i32 %Z) {
 ; CHECK: ret i1 true
 }
 
+define i1 @udiv3(i32 %X, i32 %Y) {
+; CHECK: @udiv3
+  %A = udiv i32 %X, %Y
+  %C = icmp ugt i32 %A, %X
+  ret i1 %C
+; CHECK: ret i1 false
+}
+
+define i1 @udiv4(i32 %X, i32 %Y) {
+; CHECK: @udiv4
+  %A = udiv i32 %X, %Y
+  %C = icmp ule i32 %A, %X
+  ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @udiv5(i32 %X) {
+; CHECK: @udiv5
+  %A = udiv i32 123, %X
+  %C = icmp ugt i32 %A, 124
+  ret i1 %C
+; CHECK: ret i1 false
+}
+
+; PR11340
+define i1 @udiv6(i32 %X) nounwind {
+; CHECK: @udiv6
+  %A = udiv i32 1, %X
+  %C = icmp eq i32 %A, 0
+  ret i1 %C
+; CHECK: ret i1 %C
+}
+
+
 define i1 @sdiv1(i32 %X) {
 ; CHECK: @sdiv1
   %A = sdiv i32 %X, 1000000
@@ -323,3 +530,71 @@ define i1 @and1(i32 %X) {
   ret i1 %B
 ; CHECK: ret i1 false
 }
+
+define i1 @mul1(i32 %X) {
+; CHECK: @mul1
+; Square of a non-zero number is non-zero if there is no overflow.
+  %Y = or i32 %X, 1
+  %M = mul nuw i32 %Y, %Y
+  %C = icmp eq i32 %M, 0
+  ret i1 %C
+; CHECK: ret i1 false
+}
+
+define i1 @mul2(i32 %X) {
+; CHECK: @mul2
+; Square of a non-zero number is positive if there is no signed overflow.
+  %Y = or i32 %X, 1
+  %M = mul nsw i32 %Y, %Y
+  %C = icmp sgt i32 %M, 0
+  ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @mul3(i32 %X, i32 %Y) {
+; CHECK: @mul3
+; Product of non-negative numbers is non-negative if there is no signed overflow.
+  %XX = mul nsw i32 %X, %X
+  %YY = mul nsw i32 %Y, %Y
+  %M = mul nsw i32 %XX, %YY
+  %C = icmp sge i32 %M, 0
+  ret i1 %C
+; CHECK: ret i1 true
+}
+
+define <2 x i1> @vectorselect1(<2 x i1> %cond) {
+; CHECK: @vectorselect1
+  %invert = xor <2 x i1> %cond, <i1 1, i1 1>
+  %s = select <2 x i1> %invert, <2 x i32> <i32 0, i32 0>, <2 x i32> <i32 1, i32 1>
+  %c = icmp ne <2 x i32> %s, <i32 0, i32 0>
+  ret <2 x i1> %c
+; CHECK: ret <2 x i1> %cond
+}
+
+; PR11948
+define <2 x i1> @vectorselectcrash(i32 %arg1) {
+  %tobool40 = icmp ne i32 %arg1, 0
+  %cond43 = select i1 %tobool40, <2 x i16> <i16 -5, i16 66>, <2 x i16> <i16 46, i16 1>
+  %cmp45 = icmp ugt <2 x i16> %cond43, <i16 73, i16 21>
+  ret <2 x i1> %cmp45
+}
+
+; PR12013
+define i1 @alloca_compare(i64 %idx) {
+  %sv = alloca { i32, i32, [124 x i32] }
+  %1 = getelementptr inbounds { i32, i32, [124 x i32] }* %sv, i32 0, i32 2, i64 %idx
+  %2 = icmp eq i32* %1, null
+  ret i1 %2
+  ; CHECK: alloca_compare
+  ; CHECK: ret i1 false
+}
+
+; PR12075
+define i1 @infinite_gep() {
+  ret i1 1
+
+unreachableblock:
+  %X = getelementptr i32 *%X, i32 1
+  %Y = icmp eq i32* %X, null
+  ret i1 %Y
+}
diff --git a/test/Transforms/InstSimplify/dg.exp b/test/Transforms/InstSimplify/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/InstSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/InstSimplify/lit.local.cfg b/test/Transforms/InstSimplify/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/InstSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/InstSimplify/phi.ll b/test/Transforms/InstSimplify/phi.ll
new file mode 100644
index 000000000000..05cd40d90210
--- /dev/null
+++ b/test/Transforms/InstSimplify/phi.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; PR12189
+define i1 @test1(i32 %x) {
+; CHECK: @test1
+  br i1 true, label %a, label %b
+
+a:
+  %aa = or i32 %x, 10
+  br label %c
+
+b:
+  %bb = or i32 %x, 10
+  br label %c
+
+c:
+  %cc = phi i32 [ %bb, %b ], [%aa, %a ]
+  %d = urem i32 %cc, 2
+  %e = icmp eq i32 %d, 0
+  ret i1 %e
+; CHECK: ret i1 %e
+}
diff --git a/test/Transforms/InstSimplify/ptr_diff.ll b/test/Transforms/InstSimplify/ptr_diff.ll
new file mode 100644
index 000000000000..1eb1fd4c097e
--- /dev/null
+++ b/test/Transforms/InstSimplify/ptr_diff.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @ptrdiff1(i8* %ptr) {
+; CHECK: @ptrdiff1
+; CHECK-NEXT: ret i64 42
+
+  %first = getelementptr inbounds i8* %ptr, i32 0
+  %last = getelementptr inbounds i8* %ptr, i32 42
+  %first.int = ptrtoint i8* %first to i64
+  %last.int = ptrtoint i8* %last to i64
+  %diff = sub i64 %last.int, %first.int
+  ret i64 %diff
+}
+
+define i64 @ptrdiff2(i8* %ptr) {
+; CHECK: @ptrdiff2
+; CHECK-NEXT: ret i64 42
+
+  %first1 = getelementptr inbounds i8* %ptr, i32 0
+  %first2 = getelementptr inbounds i8* %first1, i32 1
+  %first3 = getelementptr inbounds i8* %first2, i32 2
+  %first4 = getelementptr inbounds i8* %first3, i32 4
+  %last1 = getelementptr inbounds i8* %first2, i32 48
+  %last2 = getelementptr inbounds i8* %last1, i32 8
+  %last3 = getelementptr inbounds i8* %last2, i32 -4
+  %last4 = getelementptr inbounds i8* %last3, i32 -4
+  %first.int = ptrtoint i8* %first4 to i64
+  %last.int = ptrtoint i8* %last4 to i64
+  %diff = sub i64 %last.int, %first.int
+  ret i64 %diff
+}
+
+define i64 @ptrdiff3(i8* %ptr) {
+; Don't bother with non-inbounds GEPs.
+; CHECK: @ptrdiff3
+; CHECK: getelementptr
+; CHECK: sub
+; CHECK: ret
+
+  %first = getelementptr i8* %ptr, i32 0
+  %last = getelementptr i8* %ptr, i32 42
+  %first.int = ptrtoint i8* %first to i64
+  %last.int = ptrtoint i8* %last to i64
+  %diff = sub i64 %last.int, %first.int
+  ret i64 %diff
+}
diff --git a/test/Transforms/InstSimplify/reassociate.ll b/test/Transforms/InstSimplify/reassociate.ll
index 3c8169e5e283..e659e6f42c8d 100644
--- a/test/Transforms/InstSimplify/reassociate.ll
+++ b/test/Transforms/InstSimplify/reassociate.ll
@@ -184,3 +184,12 @@ define i32 @udiv5(i32 %x, i32 %y) {
 ; CHECK: ret i32 %x
 }
 
+define i16 @trunc1(i32 %x) {
+; CHECK: @trunc1
+  %y = add i32 %x, 1
+  %tx = trunc i32 %x to i16
+  %ty = trunc i32 %y to i16
+  %d = sub i16 %ty, %tx
+  ret i16 %d
+; CHECK: ret i16 1
+}
diff --git a/test/Transforms/InstSimplify/undef.ll b/test/Transforms/InstSimplify/undef.ll
index 8134cc848749..23cd50f92b40 100644
--- a/test/Transforms/InstSimplify/undef.ll
+++ b/test/Transforms/InstSimplify/undef.ll
@@ -84,6 +84,13 @@ define i64 @test11() {
   ret i64 %r
 }
 
+; @test11b
+; CHECK: ret i64 undef
+define i64 @test11b(i64 %a) {
+  %r = shl i64 %a, undef
+  ret i64 %r
+}
+
 ; @test12
 ; CHECK: ret i64 undef
 define i64 @test12() {
@@ -91,6 +98,13 @@ define i64 @test12() {
   ret i64 %r
 }
 
+; @test12b
+; CHECK: ret i64 undef
+define i64 @test12b(i64 %a) {
+  %r = ashr i64 %a, undef
+  ret i64 %r
+}
+
 ; @test13
 ; CHECK: ret i64 undef
 define i64 @test13() {
@@ -98,6 +112,13 @@ define i64 @test13() {
   ret i64 %r
 }
 
+; @test13b
+; CHECK: ret i64 undef
+define i64 @test13b(i64 %a) {
+  %r = lshr i64 %a, undef
+  ret i64 %r
+}
+
 ; @test14
 ; CHECK: ret i1 undef
 define i1 @test14() {
@@ -125,3 +146,10 @@ define i64 @test17(i64 %a) {
   %r = select i1 undef, i64 undef, i64 %a
   ret i64 %r
 }
+
+; @test18
+; CHECK: ret i64 undef
+define i64 @test18(i64 %a) {
+  %r = call i64 (i64)* undef(i64 %a)
+  ret i64 %r
+}
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
new file mode 100644
index 000000000000..f65260e00f54
--- /dev/null
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -0,0 +1,8 @@
+;RUN: opt -instsimplify %s -disable-output
+declare void @helper(<2 x i8*>)
+define void @test(<2 x i8*> %a) {
+  %A = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 0>
+  call void @helper(<2 x i8*> %A)
+  ret void
+}
+
diff --git a/test/Transforms/Internalize/dg.exp b/test/Transforms/Internalize/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/Internalize/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Internalize/lit.local.cfg b/test/Transforms/Internalize/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/Internalize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll b/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
index 46aaa00380e3..e80bae578a93 100644
--- a/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
+++ b/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
@@ -15,7 +15,7 @@ for.cond1177:
   br i1 %cmp1179, label %for.cond1177, label %land.rhs1320
 
 land.rhs1320:
-  %tmp1324 = volatile load i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1, !tbaa !0
+  %tmp1324 = load volatile i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1, !tbaa !0
   br label %if.end.i
 
 if.end.i:
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index 2115dd384007..b9c03544db81 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -399,7 +399,7 @@ if.then237:
   br label %lbl_664
 
 lbl_596:                                          ; preds = %lbl_664, %for.end37
-  volatile store i64 undef, i64* undef, align 4
+  store volatile i64 undef, i64* undef, align 4
   br label %for.cond111
 
 for.cond111:                                      ; preds = %safe_sub_func_int64_t_s_s.exit, %lbl_596
diff --git a/test/Transforms/JumpThreading/dg.exp b/test/Transforms/JumpThreading/dg.exp
deleted file mode 100644
index de42dad163fd..000000000000
--- a/test/Transforms/JumpThreading/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/test/Transforms/JumpThreading/lit.local.cfg b/test/Transforms/JumpThreading/lit.local.cfg
new file mode 100644
index 000000000000..c6106e4746f2
--- /dev/null
+++ b/test/Transforms/JumpThreading/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/JumpThreading/no-irreducible-loops.ll b/test/Transforms/JumpThreading/no-irreducible-loops.ll
index 7c7fe3929ae0..a4914f96348e 100644
--- a/test/Transforms/JumpThreading/no-irreducible-loops.ll
+++ b/test/Transforms/JumpThreading/no-irreducible-loops.ll
@@ -17,11 +17,11 @@ bb:		; preds = %bb4
 	br i1 %0, label %bb1, label %bb2
 
 bb1:		; preds = %bb
-	volatile store i32 1000, i32* @v1, align 4
+	store volatile i32 1000, i32* @v1, align 4
 	br label %bb3
 
 bb2:		; preds = %bb
-	volatile store i32 1001, i32* @v1, align 4
+	store volatile i32 1001, i32* @v1, align 4
 	br label %bb3
 
 bb3:		; preds = %bb2, %bb1
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index cce23ea319c8..78d36e7053c9 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -1,12 +1,12 @@
 ; RUN: opt < %s -jump-threading -S | FileCheck %s
-; rdar://6402033
 
-; Test that we can thread through the block with the partially redundant load (%2).
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin7"
 
-define i32 @foo(i32* %P) nounwind {
-; CHECK: foo
+; Test that we can thread through the block with the partially redundant load (%2).
+; rdar://6402033
+define i32 @test1(i32* %P) nounwind {
+; CHECK: @test1
 entry:
 	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
 	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
@@ -39,3 +39,43 @@ bb3:		; preds = %bb1
 declare i32 @f1(...)
 
 declare i32 @f2(...)
+
+
+;; Check that we preserve TBAA information.
+; rdar://11039258
+
+define i32 @test2(i32* %P) nounwind {
+; CHECK: @test2
+entry:
+	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1, label %bb
+
+bb:		; preds = %entry
+; CHECK: bb1.thread:
+; CHECK: store{{.*}}, !tbaa !0
+; CHECK: br label %bb3
+	store i32 42, i32* %P, align 4, !tbaa !0
+	br label %bb1
+
+bb1:		; preds = %entry, %bb
+	%res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
+	%2 = load i32* %P, align 4, !tbaa !0
+	%3 = icmp sgt i32 %2, 36
+	br i1 %3, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+	%4 = tail call i32 (...)* @f2() nounwind
+	ret i32 %res.0
+
+bb3:		; preds = %bb1
+; CHECK: bb3:
+; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
+; CHECK: ret i32 %res.01
+	ret i32 %res.0
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+
diff --git a/test/Transforms/LCSSA/dg.exp b/test/Transforms/LCSSA/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LCSSA/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LCSSA/lit.local.cfg b/test/Transforms/LCSSA/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LCSSA/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/2007-05-22-VolatileSink.ll b/test/Transforms/LICM/2007-05-22-VolatileSink.ll
index 17383c2ebb63..4df6ea758103 100644
--- a/test/Transforms/LICM/2007-05-22-VolatileSink.ll
+++ b/test/Transforms/LICM/2007-05-22-VolatileSink.ll
@@ -10,7 +10,7 @@ entry:
 	br label %bb6
 
 bb:		; preds = %bb6
-	%tmp2 = volatile load i32* %DataIn		; <i32> [#uses=1]
+	%tmp2 = load volatile i32* %DataIn		; <i32> [#uses=1]
 	%tmp3 = getelementptr [64 x i32]* %buffer, i32 0, i32 %i.0		; <i32*> [#uses=1]
 	store i32 %tmp2, i32* %tmp3
 	%tmp5 = add i32 %i.0, 1		; <i32> [#uses=1]
@@ -28,7 +28,7 @@ bb12:		; preds = %bb22
 	%tmp16 = add i32 %tmp14, %i.1		; <i32> [#uses=1]
 	%tmp17 = getelementptr [64 x i32]* %buffer, i32 0, i32 %tmp16		; <i32*> [#uses=1]
 	%tmp18 = load i32* %tmp17		; <i32> [#uses=1]
-	volatile store i32 %tmp18, i32* %DataOut
+	store volatile i32 %tmp18, i32* %DataOut
 	%tmp21 = add i32 %j.1, 1		; <i32> [#uses=1]
 	br label %bb22
 
diff --git a/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll b/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
index fd114f4ccc11..2bbc6ab0414a 100644
--- a/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
+++ b/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
@@ -19,7 +19,7 @@ for.body4.lr.ph:
 
 for.body4:
   %l_612.11 = phi i32* [ undef, %for.body4.lr.ph ], [ %call19, %for.body4 ]
-  %tmp7 = volatile load i16* @g_39, align 2
+  %tmp7 = load volatile i16* @g_39, align 2
   %call = call i32** @func_108(i32*** undef)
   %call19 = call i32* @func_84(i32** %call)
   br i1 false, label %for.body4, label %for.cond.loopexit
diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll
index ff7fa0b19a82..de41d008a746 100644
--- a/test/Transforms/LICM/crash.ll
+++ b/test/Transforms/LICM/crash.ll
@@ -68,7 +68,7 @@ define void @test4() noreturn nounwind {
   br label %1
 
 ; <label>:1                                       ; preds = %1, %0
-  volatile store i32* @g_47, i32** undef, align 8
+  store volatile i32* @g_47, i32** undef, align 8
   store i32 undef, i32* @g_47, align 4
   br label %1
 }
diff --git a/test/Transforms/LICM/dg.exp b/test/Transforms/LICM/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LICM/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll
new file mode 100644
index 000000000000..4e100d3aee34
--- /dev/null
+++ b/test/Transforms/LICM/hoist-invariant-load.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -licm -stats -S |& grep "1 licm"
+
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__DATA, __objc_imageinfo, regular, no_dead_strip"
+@llvm.used = appending global [3 x i8*] [i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata"
+
+define void @test(i8* %x) uwtable ssp {
+entry:
+  %x.addr = alloca i8*, align 8
+  %i = alloca i32, align 4
+  store i8* %x, i8** %x.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32* %i, align 4
+  %cmp = icmp ult i32 %0, 10000
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i8** %x.addr, align 8
+  %2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !invariant.load !0
+  %call = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %1, i8* %2)
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %3 = load i32* %i, align 4
+  %inc = add i32 %3, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+
+!0 = metadata !{}
diff --git a/test/Transforms/LICM/lit.local.cfg b/test/Transforms/LICM/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LICM/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index 9aefc4f87eac..05a64d632274 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -59,7 +59,7 @@ define void @test3(i32 %i) {
 	br label %Loop
 Loop:
         ; Should not promote this to a register
-	%x = volatile load i32* @X
+	%x = load volatile i32* @X
 	%x2 = add i32 %x, 1	
 	store i32 %x2, i32* @X
 	br i1 true, label %Out, label %Loop
@@ -133,7 +133,7 @@ Loop:		; preds = %Loop, %0
 	%x2 = add i32 %x, 1		; <i32> [#uses=1]
 	store i32 %x2, i32* @X
         
-        volatile store i32* @X, i32** %P2
+        store volatile i32* @X, i32** %P2
         
 	%Next = add i32 %j, 1		; <i32> [#uses=2]
 	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
diff --git a/test/Transforms/LICM/speculate.ll b/test/Transforms/LICM/speculate.ll
new file mode 100644
index 000000000000..507b193e6b1c
--- /dev/null
+++ b/test/Transforms/LICM/speculate.ll
@@ -0,0 +1,167 @@
+; RUN: opt -S -licm < %s | FileCheck %s
+
+; UDiv is safe to speculate if the denominator is known non-zero.
+
+; CHECK: @safe_udiv
+; CHECK:      %div = udiv i64 %x, %or
+; CHECK-NEXT: br label %for.body
+
+define void @safe_udiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %or = or i64 %m, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = udiv i64 %x, %or
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; UDiv is unsafe to speculate if the denominator is not known non-zero.
+
+; CHECK: @unsafe_udiv
+; CHECK-NOT:  udiv
+; CHECK: for.body:
+
+define void @unsafe_udiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = udiv i64 %x, %m
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; SDiv is safe to speculate if the denominator is known non-zero and
+; known to have at least one zero bit.
+
+; CHECK: @safe_sdiv
+; CHECK:      %div = sdiv i64 %x, %or
+; CHECK-NEXT: br label %for.body
+
+define void @safe_sdiv(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %and = and i64 %m, -3
+  %or = or i64 %and, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = sdiv i64 %x, %or
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; SDiv is unsafe to speculate if the denominator is not known non-zero.
+
+; CHECK: @unsafe_sdiv_a
+; CHECK-NOT:  sdiv
+; CHECK: for.body:
+
+define void @unsafe_sdiv_a(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %or = or i64 %m, 1
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = sdiv i64 %x, %or
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
+; SDiv is unsafe to speculate if the denominator is not known to have a zero bit.
+
+; CHECK: @unsafe_sdiv_b
+; CHECK-NOT:  sdiv
+; CHECK: for.body:
+
+define void @unsafe_sdiv_b(i64 %x, i64 %m, i64 %n, i32* %p, i64* %q) nounwind {
+entry:
+  %and = and i64 %m, -3
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc
+  %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %p, i64 %i.02
+  %0 = load i32* %arrayidx, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %for.inc, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %div = sdiv i64 %x, %and
+  %arrayidx1 = getelementptr inbounds i64* %q, i64 %i.02
+  store i64 %div, i64* %arrayidx1, align 8
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %inc = add i64 %i.02, 1
+  %cmp = icmp slt i64 %inc, %n
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopDeletion/dg.exp b/test/Transforms/LoopDeletion/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LoopDeletion/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopDeletion/lit.local.cfg b/test/Transforms/LoopDeletion/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LoopDeletion/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopIdiom/dg.exp b/test/Transforms/LoopIdiom/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LoopIdiom/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopIdiom/lit.local.cfg b/test/Transforms/LoopIdiom/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LoopIdiom/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopRotate/alloca.ll b/test/Transforms/LoopRotate/alloca.ll
new file mode 100644
index 000000000000..fd217ea8dcf0
--- /dev/null
+++ b/test/Transforms/LoopRotate/alloca.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -loop-rotate -S | FileCheck %s
+
+; Test alloca in -loop-rotate.
+
+; We expect a different value for %ptr each iteration (according to the
+; definition of alloca). I.e. each @use must be paired with an alloca.
+
+; CHECK: call void @use(i8* %
+; CHECK: %ptr = alloca i8
+
+@e = global i16 10
+
+declare void @use(i8*)
+
+define void @test() {
+entry:
+  %end = load i16* @e
+  br label %loop
+
+loop:
+  %n.phi = phi i16 [ %n, %loop.fin ], [ 0, %entry ]
+  %ptr = alloca i8
+  %cond = icmp eq i16 %n.phi, %end
+  br i1 %cond, label %exit, label %loop.fin
+
+loop.fin:
+  %n = add i16 %n.phi, 1
+  call void @use(i8* %ptr)
+  br label %loop
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 92871780a4da..b32ee82d3a57 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -1,11 +1,13 @@
 ; RUN: opt -S -loop-rotate  %s  | FileCheck %s
 
-; CHECK: entry
-; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x}
-
 declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 
 define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp {
+; CHECK: define i32 @tak
+; CHECK: entry
+; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x}
+
 entry:
   br label %tailrecurse
 
@@ -35,7 +37,45 @@ return:                                           ; preds = %if.end
   ret i32 %z.tr, !dbg !17
 }
 
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+@channelColumns = external global i64
+@horzPlane = external global i8*, align 8
+
+define void @FindFreeHorzSeg(i64 %startCol, i64 %row, i64* %rowStart) {
+; Ensure that the loop increment basic block is rotated into the tail of the
+; body, even though it contains a debug intrinsic call.
+; CHECK: define void @FindFreeHorzSeg
+; CHECK: %dec = add
+; CHECK-NEXT: tail call void @llvm.dbg.value
+; CHECK-NEXT: br i1 %tobool, label %for.cond, label %for.end
+
+entry:
+  br label %for.cond
+
+for.cond:
+  %i.0 = phi i64 [ %startCol, %entry ], [ %dec, %for.inc ]
+  %cmp = icmp eq i64 %i.0, 0
+  br i1 %cmp, label %for.end, label %for.body
+
+for.body:
+  %0 = load i64* @channelColumns, align 8
+  %mul = mul i64 %0, %row
+  %add = add i64 %mul, %i.0
+  %1 = load i8** @horzPlane, align 8
+  %arrayidx = getelementptr inbounds i8* %1, i64 %add
+  %2 = load i8* %arrayidx, align 1
+  %tobool = icmp eq i8 %2, 0
+  br i1 %tobool, label %for.inc, label %for.end
+
+for.inc:
+  %dec = add i64 %i.0, -1
+  tail call void @llvm.dbg.value(metadata !{i64 %dec}, i64 0, metadata undef)
+  br label %for.cond
+
+for.end:
+  %add1 = add i64 %i.0, 1
+  store i64 %add1, i64* %rowStart, align 8
+  ret void
+}
 
 !llvm.dbg.sp = !{!0}
 
diff --git a/test/Transforms/LoopRotate/dg.exp b/test/Transforms/LoopRotate/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LoopRotate/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopRotate/lit.local.cfg b/test/Transforms/LoopRotate/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LoopRotate/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopRotate/simplifylatch.ll b/test/Transforms/LoopRotate/simplifylatch.ll
new file mode 100644
index 000000000000..f4227245f74b
--- /dev/null
+++ b/test/Transforms/LoopRotate/simplifylatch.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S < %s -loop-rotate -verify-dom-info -verify-loop-info | FileCheck %s
+; PR2624 unroll multiple exits
+
+@mode_table = global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+
+; CHECK: @f
+; CHECK-NOT: bb4
+define i8 @f() {
+entry:
+	tail call i32 @fegetround( )		; <i32>:0 [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb4, %entry
+	%mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb4 ]		; <i8> [#uses=4]
+	zext i8 %mode.0 to i32		; <i32>:1 [#uses=1]
+	getelementptr [4 x i32]* @mode_table, i32 0, i32 %1		; <i32*>:2 [#uses=1]
+	load i32* %2, align 4		; <i32>:3 [#uses=1]
+	icmp eq i32 %3, %0		; <i1>:4 [#uses=1]
+	br i1 %4, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	ret i8 %mode.0
+
+bb2:		; preds = %bb
+	icmp eq i8 %mode.0, 1		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb5, label %bb4
+
+bb4:		; preds = %bb2
+	%indvar.next = add i8 %mode.0, 1		; <i8> [#uses=1]
+	br label %bb
+
+bb5:		; preds = %bb2
+	tail call void @raise_exception( ) noreturn
+	unreachable
+}
+
+declare i32 @fegetround()
+
+declare void @raise_exception() noreturn
diff --git a/test/Transforms/LoopSimplify/2011-12-14-LandingpadHeader.ll b/test/Transforms/LoopSimplify/2011-12-14-LandingpadHeader.ll
new file mode 100644
index 000000000000..173a5825767b
--- /dev/null
+++ b/test/Transforms/LoopSimplify/2011-12-14-LandingpadHeader.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -loop-simplify -S | FileCheck %s
+; PR11575
+
+@catchtypeinfo = external unnamed_addr constant { i8*, i8*, i8* }
+
+define void @main() uwtable ssp {
+entry:
+  invoke void @f1()
+          to label %try.cont19 unwind label %catch
+
+; CHECK: catch.preheader:
+; CHECK-NEXT: landingpad
+; CHECK: br label %catch
+
+; CHECK: catch.split-lp:
+; CHECK-NEXT: landingpad
+; CHECK: br label %catch
+
+catch:                                            ; preds = %if.else, %entry
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* bitcast ({ i8*, i8*, i8* }* @catchtypeinfo to i8*)
+  invoke void @f3()
+          to label %if.else unwind label %eh.resume
+
+if.else:                                          ; preds = %catch
+  invoke void @f2()
+          to label %try.cont19 unwind label %catch
+
+try.cont19:                                       ; preds = %if.else, %entry
+  ret void
+
+eh.resume:                                        ; preds = %catch
+  %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+          catch i8* bitcast ({ i8*, i8*, i8* }* @catchtypeinfo to i8*)
+  resume { i8*, i32 } undef
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @f1()
+
+declare void @f2()
+
+declare void @f3()
diff --git a/test/Transforms/LoopSimplify/2012-03-20-indirectbr.ll b/test/Transforms/LoopSimplify/2012-03-20-indirectbr.ll
new file mode 100644
index 000000000000..9c805da485d7
--- /dev/null
+++ b/test/Transforms/LoopSimplify/2012-03-20-indirectbr.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -loop-simplify -S | FileCheck %s
+
+; Make sure the preheader exists.
+; CHECK: sw.bb103:
+; CHECK: indirectbr {{.*}}label %while.cond112
+; CHECK: while.cond112:
+; But the tail is not split.
+; CHECK: for.body:
+; CHECK: indirectbr {{.*}}label %while.cond112
+define fastcc void @build_regex_nfa() nounwind uwtable ssp {
+entry:
+  indirectbr i8* blockaddress(@build_regex_nfa, %while.cond), [label %while.cond]
+
+while.cond:                                       ; preds = %if.then439, %entry
+  indirectbr i8* blockaddress(@build_regex_nfa, %sw.bb103), [label %do.body785, label %sw.bb103]
+
+sw.bb103:                                         ; preds = %while.body
+  indirectbr i8* blockaddress(@build_regex_nfa, %while.cond112), [label %while.cond112]
+
+while.cond112:                                    ; preds = %for.body, %for.cond.preheader, %sw.bb103
+  %pc.0 = phi i8 [ -1, %sw.bb103 ], [ 0, %for.body ], [ %pc.0, %for.cond.preheader ]
+  indirectbr i8* blockaddress(@build_regex_nfa, %Lsetdone), [label %sw.bb118, label %Lsetdone]
+
+sw.bb118:                                         ; preds = %while.cond112
+  indirectbr i8* blockaddress(@build_regex_nfa, %for.cond.preheader), [label %Lerror.loopexit, label %for.cond.preheader]
+
+for.cond.preheader:                               ; preds = %sw.bb118
+  indirectbr i8* blockaddress(@build_regex_nfa, %for.body), [label %while.cond112, label %for.body]
+
+for.body:                                         ; preds = %for.body, %for.cond.preheader
+  indirectbr i8* blockaddress(@build_regex_nfa, %for.body), [label %while.cond112, label %for.body]
+
+Lsetdone:                                         ; preds = %while.cond112
+  unreachable
+
+do.body785:                                       ; preds = %while.cond, %while.body
+  ret void
+
+Lerror.loopexit:                                  ; preds = %sw.bb118
+  unreachable
+}
diff --git a/test/Transforms/LoopSimplify/dg.exp b/test/Transforms/LoopSimplify/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LoopSimplify/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopSimplify/lit.local.cfg b/test/Transforms/LoopSimplify/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LoopSimplify/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll b/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
index 90477d106974..ce56bd31018c 100644
--- a/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
+++ b/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
@@ -10,7 +10,7 @@ entry:
 bb:		; preds = %bb, %entry
 	%l_2.0.reg2mem.0 = phi i16 [ 0, %entry ], [ %t1, %bb ]		; <i16> [#uses=2]
 	%t0 = shl i16 %l_2.0.reg2mem.0, 1		; <i16>:0 [#uses=1]
-	volatile store i16 %t0, i16* @g_3, align 2
+	store volatile i16 %t0, i16* @g_3, align 2
 	%t1 = add i16 %l_2.0.reg2mem.0, -3		; <i16>:1 [#uses=2]
 	%t2 = icmp slt i16 %t1, 1		; <i1>:2 [#uses=1]
 	br i1 %t2, label %bb, label %return
@@ -22,7 +22,7 @@ return:		; preds = %bb
 define i32 @main() nounwind {
 entry:
 	tail call void @func_1( ) nounwind
-	volatile load i16* @g_3, align 2		; <i16>:0 [#uses=1]
+	load volatile i16* @g_3, align 2		; <i16>:0 [#uses=1]
 	zext i16 %0 to i32		; <i32>:1 [#uses=1]
 	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %1 ) nounwind		; <i32>:2 [#uses=0]
 	ret i32 0
diff --git a/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
new file mode 100644
index 000000000000..392a8bcf89db
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
@@ -0,0 +1,39 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; PR11571: handle a postinc user outside of for.body7 that requires
+; recursive expansion of a quadratic recurrence within for.body7. LSR
+; needs to forget that for.body7 is a postinc loop during expansion.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
+target triple = "i386-unknown-freebsd10.0"
+
+@b = external global [121 x i32]
+
+; CHECK: @vb
+;   Outer recurrence:
+; CHECK: %lsr.iv1 = phi [121 x i32]*
+;   Inner recurrence:
+; CHECK: %lsr.iv = phi i32
+;   Outer step (relative to inner recurrence):
+; CHECK: %scevgep = getelementptr i1* %{{.*}}, i32 %lsr.iv
+;   Outer use:
+; CHECK: %lsr.iv3 = phi [121 x i32]* [ %lsr.iv1, %for.body43.preheader ]
+define void @vb() nounwind {
+for.cond.preheader:
+  br label %for.body7
+
+for.body7:
+  %indvars.iv77 = phi i32 [ %indvars.iv.next78, %for.body7 ], [ 1, %for.cond.preheader ]
+  %bf.072 = phi i32 [ %t1, %for.body7 ], [ 0, %for.cond.preheader ]
+  %t1 = add i32 %bf.072, %indvars.iv77
+  %indvars.iv.next78 = add i32 %indvars.iv77, 1
+  br i1 undef, label %for.body43, label %for.body7
+
+for.body43:
+  %bf.459 = phi i32 [ %inc44, %for.body43 ], [ %t1, %for.body7 ]
+  %inc44 = add nsw i32 %bf.459, 1
+  %arrayidx45 = getelementptr inbounds [121 x i32]* @b, i32 0, i32 %bf.459
+  %t2 = load i32* %arrayidx45, align 4
+  br label %for.body43
+}
+
diff --git a/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
new file mode 100644
index 000000000000..d7f5723188c2
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-01-02-nopreheader.ll
@@ -0,0 +1,88 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://10619599> "SelectionDAGBuilder shouldn't visit PHI nodes!" assert.
+; <rdar://10655343> SCEVExpander segfault on simple test case
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-f128:128:128-n8:16:32"
+target triple = "i386-apple-darwin"
+
+; LSR should convert the inner loop (bb7.us) IV (j.01.us) into float*.
+; This involves a nested AddRec, the outer AddRec's loop invariant components
+; cannot find a preheader, so they should be expanded in the loop header
+; (bb7.lr.ph.us) below the existing phi i.12.us.
+; Currently, LSR won't kick in on such loops.
+; CHECK: @nopreheader
+; CHECK: bb7.us:
+; CHECK-NOT: phi float*
+; CHECK: %j.01.us = phi i32
+; CHECK-NOT: phi float*
+define void @nopreheader(float* nocapture %a, i32 %n) nounwind {
+entry:
+  %0 = sdiv i32 %n, undef
+  indirectbr i8* undef, [label %bb10.preheader]
+
+bb10.preheader:                                   ; preds = %bb4
+  indirectbr i8* undef, [label %bb8.preheader.lr.ph, label %return]
+
+bb8.preheader.lr.ph:                              ; preds = %bb10.preheader
+  indirectbr i8* null, [label %bb7.lr.ph.us, label %bb9]
+
+bb7.lr.ph.us:                                     ; preds = %bb9.us, %bb8.preheader.lr.ph
+  %i.12.us = phi i32 [ %2, %bb9.us ], [ 0, %bb8.preheader.lr.ph ]
+  %tmp30 = mul i32 %0, %i.12.us
+  indirectbr i8* undef, [label %bb7.us]
+
+bb7.us:                                           ; preds = %bb7.lr.ph.us, %bb7.us
+  %j.01.us = phi i32 [ 0, %bb7.lr.ph.us ], [ %1, %bb7.us ]
+  %tmp31 = add i32 %tmp30, %j.01.us
+  %scevgep9 = getelementptr float* %a, i32 %tmp31
+  store float undef, float* %scevgep9, align 1
+  %1 = add nsw i32 %j.01.us, 1
+  indirectbr i8* undef, [label %bb9.us, label %bb7.us]
+
+bb9.us:                                           ; preds = %bb7.us
+  %2 = add nsw i32 %i.12.us, 1
+  indirectbr i8* undef, [label %bb7.lr.ph.us, label %return]
+
+bb9:                                              ; preds = %bb9, %bb8.preheader.lr.ph
+  indirectbr i8* undef, [label %bb9, label %return]
+
+return:                                           ; preds = %bb9, %bb9.us, %bb10.preheader
+  ret void
+}
+
+; In this case, SCEVExpander simply cannot materialize the AddRecExpr
+; that LSR picks. We must detect that %bb8.preheader does not have a
+; preheader and avoid performing LSR on %bb7.
+; CHECK: @nopreheader2
+; CHECK: bb7:
+; CHECK: %indvar = phi i32
+define fastcc void @nopreheader2([200 x i32]* nocapture %Array2) nounwind {
+entry:
+  indirectbr i8* undef, [label %bb]
+
+bb:                                               ; preds = %bb, %entry
+  indirectbr i8* undef, [label %bb3, label %bb]
+
+bb3:                                              ; preds = %bb3, %bb
+  indirectbr i8* undef, [label %bb8.preheader, label %bb3]
+
+bb8.preheader:                                    ; preds = %bb9, %bb3
+  %indvar5 = phi i32 [ %indvar.next6, %bb9 ], [ 0, %bb3 ]
+  %tmp26 = add i32 %indvar5, 13
+  indirectbr i8* null, [label %bb7]
+
+bb7:                                              ; preds = %bb8.preheader, %bb7
+  %indvar = phi i32 [ 0, %bb8.preheader ], [ %indvar.next, %bb7 ]
+  %scevgep = getelementptr [200 x i32]* %Array2, i32 %tmp26, i32 %indvar
+  store i32 undef, i32* %scevgep, align 4
+  %indvar.next = add i32 %indvar, 1
+  indirectbr i8* undef, [label %bb9, label %bb7]
+
+bb9:                                              ; preds = %bb7
+  %indvar.next6 = add i32 %indvar5, 1
+  indirectbr i8* undef, [label %return, label %bb8.preheader]
+
+return:                                           ; preds = %bb9
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
new file mode 100644
index 000000000000..3036a7e38bbf
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-01-16-nopreheader.ll
@@ -0,0 +1,113 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://10701050> "Cannot split an edge from an IndirectBrInst" assert.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; while.cond197 is a dominates the simplified loop while.cond238 but
+; has no with no preheader.
+;
+; CHECK: @nopreheader
+; CHECK: %while.cond238
+; CHECK: phi i64
+; CHECK-NOT: phi
+; CHECK: indirectbr
+define void @nopreheader(i8* %end) nounwind {
+entry:
+  br label %while.cond179
+
+while.cond179:                                    ; preds = %if.end434, %if.end369, %if.end277, %if.end165
+  %s.1 = phi i8* [ undef, %if.end434 ], [ %incdec.ptr356, %if.end348 ], [ undef, %entry ]
+  indirectbr i8* undef, [label %land.rhs184, label %while.end453]
+
+land.rhs184:                                      ; preds = %while.cond179
+  indirectbr i8* undef, [label %while.end453, label %while.cond197]
+
+while.cond197:                                    ; preds = %land.rhs202, %land.rhs184
+  %0 = phi i64 [ %indvar.next11, %land.rhs202 ], [ 0, %land.rhs184 ]
+  indirectbr i8* undef, [label %land.rhs202, label %while.end215]
+
+land.rhs202:                                      ; preds = %while.cond197
+  %indvar.next11 = add i64 %0, 1
+  indirectbr i8* undef, [label %while.end215, label %while.cond197]
+
+while.end215:                                     ; preds = %land.rhs202, %while.cond197
+  indirectbr i8* undef, [label %PREMATURE, label %if.end221]
+
+if.end221:                                        ; preds = %while.end215
+  indirectbr i8* undef, [label %while.cond238.preheader, label %lor.lhs.false227]
+
+lor.lhs.false227:                                 ; preds = %if.end221
+  indirectbr i8* undef, [label %while.cond238.preheader, label %if.else]
+
+while.cond238.preheader:                          ; preds = %lor.lhs.false227, %if.end221
+  %tmp16 = add i64 %0, 2
+  indirectbr i8* undef, [label %while.cond238]
+
+while.cond238:                                    ; preds = %land.rhs243, %while.cond238.preheader
+  %1 = phi i64 [ %indvar.next15, %land.rhs243 ], [ 0, %while.cond238.preheader ]
+  %tmp36 = add i64 %tmp16, %1
+  %s.3 = getelementptr i8* %s.1, i64 %tmp36
+  %cmp241 = icmp ult i8* %s.3, %end
+  indirectbr i8* undef, [label %land.rhs243, label %while.end256]
+
+land.rhs243:                                      ; preds = %while.cond238
+  %indvar.next15 = add i64 %1, 1
+  indirectbr i8* undef, [label %while.end256, label %while.cond238]
+
+while.end256:                                     ; preds = %land.rhs243, %while.cond238
+  indirectbr i8* undef, [label %PREMATURE]
+
+if.else:                                          ; preds = %lor.lhs.false227
+  indirectbr i8* undef, [label %if.then297, label %if.else386]
+
+if.then297:                                       ; preds = %if.else
+  indirectbr i8* undef, [label %PREMATURE, label %if.end307]
+
+if.end307:                                        ; preds = %if.then297
+  indirectbr i8* undef, [label %if.end314, label %FAIL]
+
+if.end314:                                        ; preds = %if.end307
+  indirectbr i8* undef, [label %if.end340]
+
+if.end340:                                        ; preds = %while.end334
+  indirectbr i8* undef, [label %PREMATURE, label %if.end348]
+
+if.end348:                                        ; preds = %if.end340
+  %incdec.ptr356 = getelementptr inbounds i8* undef, i64 2
+  indirectbr i8* undef, [label %while.cond179]
+
+if.else386:                                       ; preds = %if.else
+  indirectbr i8* undef, [label %while.end453, label %if.end434]
+
+if.end434:                                        ; preds = %if.then428, %if.end421
+  indirectbr i8* undef, [label %while.cond179]
+
+while.end453:                                     ; preds = %if.else386, %land.rhs184, %while.cond179
+  indirectbr i8* undef, [label %PREMATURE, label %if.end459]
+
+if.end459:                                        ; preds = %while.end453
+  indirectbr i8* undef, [label %if.then465, label %FAIL]
+
+if.then465:                                       ; preds = %if.end459
+  indirectbr i8* undef, [label %return, label %if.then479]
+
+if.then479:                                       ; preds = %if.then465
+  indirectbr i8* undef, [label %return]
+
+FAIL:                                             ; preds = %if.end459, %if.end307, %land.lhs.true142, %land.lhs.true131, %while.end
+  indirectbr i8* undef, [label %DECL_FAIL]
+
+PREMATURE:                                        ; preds = %while.end453, %while.end415, %if.end340, %while.end334, %if.then297, %while.end256, %while.end215
+  indirectbr i8* undef, [label %return, label %if.then495]
+
+if.then495:                                       ; preds = %PREMATURE
+  indirectbr i8* undef, [label %return]
+
+DECL_FAIL:                                        ; preds = %if.then488, %FAIL, %land.lhs.true99, %lor.lhs.false, %if.end83, %if.then39, %if.end
+  indirectbr i8* undef, [label %return]
+
+return:                                           ; preds = %if.then512, %if.end504, %DECL_FAIL, %if.then495, %PREMATURE, %if.then479, %if.then465, %if.then69, %if.end52, %if.end19, %if.then
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll b/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
new file mode 100644
index 000000000000..0172492edc99
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
@@ -0,0 +1,155 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+;
+; <rdar://problem/11049788> Segmentation fault: 11 in LoopStrengthReduce
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; IVUsers should not consider tmp128 a valid user because it is not in a
+; simplified loop nest.
+; CHECK: @nopreheader
+; CHECK: for.cond:
+; CHECK: %tmp128 = add i64 %0, %indvar65
+define void @nopreheader(i8* %cmd) nounwind ssp {
+entry:
+  indirectbr i8* undef, [label %while.cond]
+
+while.cond:                                       ; preds = %while.body, %entry
+  %0 = phi i64 [ %indvar.next48, %while.body ], [ 0, %entry ]
+  indirectbr i8* undef, [label %while.end, label %while.body]
+
+while.body:                                       ; preds = %lor.rhs, %lor.lhs.false17, %lor.lhs.false11, %lor.lhs.false, %land.rhs
+  %indvar.next48 = add i64 %0, 1
+  indirectbr i8* undef, [label %while.cond]
+
+while.end:                                        ; preds = %lor.rhs, %while.cond
+  indirectbr i8* undef, [label %if.end152]
+
+if.end152:                                        ; preds = %lor.lhs.false144, %if.end110
+  indirectbr i8* undef, [label %lor.lhs.false184, label %for.cond]
+
+lor.lhs.false184:                                 ; preds = %lor.lhs.false177
+  indirectbr i8* undef, [label %return, label %for.cond]
+
+for.cond:                                         ; preds = %for.inc, %lor.lhs.false184, %if.end152
+  %indvar65 = phi i64 [ %indvar.next66, %for.inc ], [ 0, %lor.lhs.false184 ], [ 0, %if.end152 ]
+  %tmp128 = add i64 %0, %indvar65
+  %s.4 = getelementptr i8* %cmd, i64 %tmp128
+  %tmp195 = load i8* %s.4, align 1
+  indirectbr i8* undef, [label %return, label %land.rhs198]
+
+land.rhs198:                                      ; preds = %for.cond
+  indirectbr i8* undef, [label %return, label %for.inc]
+
+for.inc:                                          ; preds = %lor.rhs234, %land.lhs.true228, %land.lhs.true216, %land.lhs.true204
+  %indvar.next66 = add i64 %indvar65, 1
+  indirectbr i8* undef, [label %for.cond]
+
+return:                                           ; preds = %if.end677, %doshell, %if.then96
+  ret void
+}
+
+; Another case with a dominating loop that does not contain the IV
+; User. Just make sure it doesn't assert.
+define void @nopreheader2() nounwind ssp {
+entry:
+  indirectbr i8* undef, [label %while.cond, label %return]
+
+while.cond:                                       ; preds = %while.cond.backedge, %entry
+  indirectbr i8* undef, [label %while.cond.backedge, label %lor.rhs]
+
+lor.rhs:                                          ; preds = %while.cond
+  indirectbr i8* undef, [label %while.cond.backedge, label %while.end]
+
+while.cond.backedge:                              ; preds = %lor.rhs, %while.cond
+  indirectbr i8* undef, [label %while.cond]
+
+while.end:                                        ; preds = %lor.rhs
+  indirectbr i8* undef, [label %if.then18, label %return]
+
+if.then18:                                        ; preds = %while.end
+  indirectbr i8* undef, [label %if.end35, label %lor.lhs.false]
+
+lor.lhs.false:                                    ; preds = %if.then18
+  indirectbr i8* undef, [label %if.end35, label %return]
+
+if.end35:                                         ; preds = %lor.lhs.false, %if.then18
+  indirectbr i8* undef, [label %while.cond36]
+
+while.cond36:                                     ; preds = %while.body49, %if.end35
+  %0 = phi i64 [ %indvar.next13, %while.body49 ], [ 0, %if.end35 ]
+  indirectbr i8* undef, [label %while.body49, label %lor.rhs42]
+
+lor.rhs42:                                        ; preds = %while.cond36
+  indirectbr i8* undef, [label %while.body49, label %while.end52]
+
+while.body49:                                     ; preds = %lor.rhs42, %while.cond36
+  %indvar.next13 = add i64 %0, 1
+  indirectbr i8* undef, [label %while.cond36]
+
+while.end52:                                      ; preds = %lor.rhs42
+  indirectbr i8* undef, [label %land.lhs.true, label %return]
+
+land.lhs.true:                                    ; preds = %while.end52
+  indirectbr i8* undef, [label %while.cond66.preheader, label %return]
+
+while.cond66.preheader:                           ; preds = %land.lhs.true
+  indirectbr i8* undef, [label %while.cond66]
+
+while.cond66:                                     ; preds = %while.body77, %while.cond66.preheader
+  indirectbr i8* undef, [label %land.rhs, label %while.cond81.preheader]
+
+land.rhs:                                         ; preds = %while.cond66
+  indirectbr i8* undef, [label %while.body77, label %while.cond81.preheader]
+
+while.cond81.preheader:                           ; preds = %land.rhs, %while.cond66
+  %tmp45 = add i64 undef, %0
+  %tmp46 = add i64 %tmp45, undef
+  indirectbr i8* undef, [label %while.cond81]
+
+while.body77:                                     ; preds = %land.rhs
+  indirectbr i8* undef, [label %while.cond66]
+
+while.cond81:                                     ; preds = %while.body94, %while.cond81.preheader
+  %tmp25 = add i64 %tmp46, undef
+  indirectbr i8* undef, [label %while.body94, label %lor.rhs87]
+
+lor.rhs87:                                        ; preds = %while.cond81
+  indirectbr i8* undef, [label %while.body94, label %return]
+
+while.body94:                                     ; preds = %lor.rhs87, %while.cond81
+  indirectbr i8* undef, [label %while.cond81]
+
+return:                                           ; preds = %if.end216, %land.lhs.true183, %land.lhs.true, %while.end52, %lor.lhs.false, %while.end, %entry
+  ret void
+}
+
+; Test a phi operand IV User dominated by a no-preheader loop.
+define void @nopreheader3() nounwind uwtable ssp align 2 {
+entry:
+  indirectbr i8* blockaddress(@nopreheader3, %if.end10), [label %if.end22, label %if.end10]
+
+if.end10:                                         ; preds = %entry
+  indirectbr i8* blockaddress(@nopreheader3, %if.end6.i), [label %if.end22, label %if.end6.i]
+
+if.end6.i:                                        ; preds = %if.end10
+  indirectbr i8* blockaddress(@nopreheader3, %while.cond2.preheader.i.i), [label %if.then12, label %while.cond2.preheader.i.i]
+
+while.cond2.preheader.i.i:                        ; preds = %while.end.i18.i, %if.end6.i
+  indirectbr i8* blockaddress(@nopreheader3, %while.cond2.i.i), [label %while.cond2.i.i]
+
+while.cond2.i.i:                                  ; preds = %while.cond2.i.i, %while.cond2.preheader.i.i
+  %i1.1.i14.i = phi i32 [ %add.i15.i, %while.cond2.i.i ], [ undef, %while.cond2.preheader.i.i ]
+  %add.i15.i = add nsw i32 %i1.1.i14.i, undef
+  indirectbr i8* blockaddress(@nopreheader3, %while.end.i18.i), [label %while.cond2.i.i, label %while.end.i18.i]
+
+while.end.i18.i:                                  ; preds = %while.cond2.i.i
+  indirectbr i8* blockaddress(@nopreheader3, %while.cond2.preheader.i.i), [label %if.then12, label %while.cond2.preheader.i.i]
+
+if.then12:                                        ; preds = %while.end.i18.i, %if.end6.i
+  %i1.0.lcssa.i.i = phi i32 [ undef, %if.end6.i ], [ %i1.1.i14.i, %while.end.i18.i ]
+  indirectbr i8* blockaddress(@nopreheader3, %if.end22), [label %if.end22]
+
+if.end22:                                         ; preds = %if.then12, %if.end10, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll b/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll
new file mode 100644
index 000000000000..c9b11a9feed0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -loop-reduce -S
+; PR11950: isHighCostExpansion crashes on ConstExpr
+;
+; The crash happened during IVChain analysis (CollectChains). We don't
+; really care how LSR decides to transform this loop, so we don't
+; check it. As long as the analysis doesn't crash we're ok.
+target datalayout = "e-p:64:64:64-n32:64"
+
+%struct.this_structure_s.0.5 = type { [6144 x [8 x i32]], [6144 x [8 x i32]], [6147 x [4 x i32]], [8 x i32], [2 x i8*], [2 x i8*], [6144 x i8], [6144 x i32], [6144 x i32], [4 x [4 x i8]] }
+
+define internal fastcc void @someFunction(%struct.this_structure_s.0.5* nocapture %scratch, i32 %stage, i32 %cbSize) nounwind {
+entry:
+  %0 = getelementptr inbounds %struct.this_structure_s.0.5* %scratch, i32 0, i32 4, i32 %stage
+  %1 = load i8** %0, align 4
+  %2 = getelementptr inbounds %struct.this_structure_s.0.5* %scratch, i32 0, i32 5, i32 %stage
+  %3 = load i8** %2, align 4
+  %4 = getelementptr inbounds %struct.this_structure_s.0.5* %scratch, i32 0, i32 2, i32 0, i32 0
+  %tmp11 = shl i32 %stage, 1
+  %tmp1325 = or i32 %tmp11, 1
+  br label %__label_D_1608
+
+__label_D_1608:                                   ; preds = %__label_D_1608, %entry
+  %i.12 = phi i32 [ 0, %entry ], [ %10, %__label_D_1608 ]
+  %tmp = shl i32 %i.12, 2
+  %lvar_g.13 = getelementptr i32* %4, i32 %tmp
+  %tmp626 = or i32 %tmp, 1
+  %scevgep = getelementptr i32* %4, i32 %tmp626
+  %tmp727 = or i32 %tmp, 2
+  %scevgep8 = getelementptr i32* %4, i32 %tmp727
+  %tmp928 = or i32 %tmp, 3
+  %scevgep10 = getelementptr i32* %4, i32 %tmp928
+  %scevgep12 = getelementptr %struct.this_structure_s.0.5* %scratch, i32 0, i32 9, i32 %tmp11, i32 %i.12
+  %scevgep14 = getelementptr %struct.this_structure_s.0.5* %scratch, i32 0, i32 9, i32 %tmp1325, i32 %i.12
+  %5 = load i8* %scevgep12, align 1
+  %6 = sext i8 %5 to i32
+  %7 = load i8* %scevgep14, align 1
+  %8 = sext i8 %7 to i32
+  store i32 0, i32* %lvar_g.13, align 4
+  store i32 %8, i32* %scevgep, align 4
+  store i32 %6, i32* %scevgep8, align 4
+  %9 = add nsw i32 %8, %6
+  store i32 %9, i32* %scevgep10, align 4
+  %10 = add nsw i32 %i.12, 1
+  %exitcond = icmp eq i32 %10, 3
+  br i1 %exitcond, label %return, label %__label_D_1608
+
+return:                                           ; preds = %__label_D_1608
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
new file mode 100644
index 000000000000..9189d79e2fb6
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -0,0 +1,292 @@
+; RUN: llc < %s -O3 -march=thumb -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+
+; @simple is the most basic chain of address induction variables. Chaining
+; saves at least one register and avoids complex addressing and setup
+; code.
+;
+; A9: @simple
+; no expensive address computation in the preheader
+; A9: lsl
+; A9-NOT: lsl
+; A9: %loop
+; no complex address modes
+; A9-NOT: lsl
+define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @user is not currently chained because the IV is live across memory ops.
+;
+; A9: @user
+; stride multiples computed in the preheader
+; A9: lsl
+; A9: lsl
+; A9: %loop
+; complex address modes
+; A9: lsl
+; A9: lsl
+define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  store i32 %s4, i32* %iv
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @extrastride is a slightly more interesting case of a single
+; complete chain with multiple strides. The test case IR is what LSR
+; used to do, and exactly what we don't want to do. LSR's new IV
+; chaining feature should now undo the damage.
+;
+; A9: extrastride:
+; no spills
+; A9-NOT: str
+; only one stride multiple in the preheader
+; A9: lsl
+; A9-NOT: {{str r|lsl}}
+; A9: %for.body{{$}}
+; no complex address modes or reloads
+; A9-NOT: {{ldr .*[sp]|lsl}}
+define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %cmp8 = icmp eq i32 %z, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %add.ptr.sum = shl i32 %main_stride, 1 ; s*2
+  %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3
+  %add.ptr2.sum = add i32 %x, %main_stride ; s + x
+  %add.ptr4.sum = shl i32 %main_stride, 2 ; s*4
+  %add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
+  %0 = bitcast i8* %main.addr.011 to i32*
+  %1 = load i32* %0, align 4
+  %add.ptr = getelementptr inbounds i8* %main.addr.011, i32 %main_stride
+  %2 = bitcast i8* %add.ptr to i32*
+  %3 = load i32* %2, align 4
+  %add.ptr1 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr.sum
+  %4 = bitcast i8* %add.ptr1 to i32*
+  %5 = load i32* %4, align 4
+  %add.ptr2 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr1.sum
+  %6 = bitcast i8* %add.ptr2 to i32*
+  %7 = load i32* %6, align 4
+  %add.ptr3 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr4.sum
+  %8 = bitcast i8* %add.ptr3 to i32*
+  %9 = load i32* %8, align 4
+  %add = add i32 %3, %1
+  %add4 = add i32 %add, %5
+  %add5 = add i32 %add4, %7
+  %add6 = add i32 %add5, %9
+  store i32 %add6, i32* %res.addr.09, align 4
+  %add.ptr6 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr3.sum
+  %add.ptr7 = getelementptr inbounds i32* %res.addr.09, i32 %y
+  %inc = add i32 %i.010, 1
+  %cmp = icmp eq i32 %inc, %z
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; @foldedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' can be folded into the addressing mode.
+; Consequently, we should *not* form any chains.
+;
+; A9: foldedidx:
+; A9: ldrb.w {{r[0-9]|lr}}, [{{r[0-9]|lr}}, #3]
+define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.07
+  %0 = load i8* %arrayidx, align 1
+  %conv5 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.07
+  %1 = load i8* %arrayidx1, align 1
+  %conv26 = zext i8 %1 to i32
+  %add = add nsw i32 %conv26, %conv5
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.07
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %inc1 = or i32 %i.07, 1
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %inc1
+  %2 = load i8* %arrayidx.1, align 1
+  %conv5.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %inc1
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv26.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv26.1, %conv5.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %inc1
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %inc.12 = or i32 %i.07, 2
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %inc.12
+  %4 = load i8* %arrayidx.2, align 1
+  %conv5.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %inc.12
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv26.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv26.2, %conv5.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %inc.12
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %inc.23 = or i32 %i.07, 3
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %inc.23
+  %6 = load i8* %arrayidx.3, align 1
+  %conv5.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %inc.23
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv26.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv26.3, %conv5.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %inc.23
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %inc.3 = add nsw i32 %i.07, 4
+  %exitcond.3 = icmp eq i32 %inc.3, 400
+  br i1 %exitcond.3, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; @testNeon is an important example of the nead for ivchains.
+;
+; Currently we have three extra add.w's that keep the store address
+; live past the next increment because ISEL is unfortunately undoing
+; the store chain. ISEL also fails to convert the stores to
+; post-increment addressing. However, the loads should use
+; post-increment addressing, no add's or add.w's beyond the three
+; mentioned. Most importantly, there should be no spills or reloads!
+;
+; CHECK: testNeon:
+; CHECK: %.lr.ph
+; CHECK-NOT: lsl.w
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK: add.w r
+; CHECK-NOT: {{ldr|str|adds|add r}}
+; CHECK-NOT: add.w r
+; CHECK: bne
+define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i8>* nocapture %data) nounwind optsize {
+  %1 = icmp sgt i32 %limit, 0
+  br i1 %1, label %.lr.ph, label %45
+
+.lr.ph:                                           ; preds = %0
+  %2 = shl nsw i32 %ref_stride, 1
+  %3 = mul nsw i32 %ref_stride, 3
+  %4 = shl nsw i32 %ref_stride, 2
+  %5 = mul nsw i32 %ref_stride, 5
+  %6 = mul nsw i32 %ref_stride, 6
+  %7 = mul nsw i32 %ref_stride, 7
+  %8 = shl nsw i32 %ref_stride, 3
+  %9 = sub i32 0, %8
+  %10 = mul i32 %limit, -64
+  br label %11
+
+; <label>:11                                      ; preds = %11, %.lr.ph
+  %.05 = phi i8* [ %ref_data, %.lr.ph ], [ %42, %11 ]
+  %counter.04 = phi i32 [ 0, %.lr.ph ], [ %44, %11 ]
+  %result.03 = phi <16 x i8> [ zeroinitializer, %.lr.ph ], [ %41, %11 ]
+  %.012 = phi <16 x i8>* [ %data, %.lr.ph ], [ %43, %11 ]
+  %12 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %.05, i32 1) nounwind
+  %13 = getelementptr inbounds i8* %.05, i32 %ref_stride
+  %14 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %13, i32 1) nounwind
+  %15 = shufflevector <1 x i64> %12, <1 x i64> %14, <2 x i32> <i32 0, i32 1>
+  %16 = bitcast <2 x i64> %15 to <16 x i8>
+  %17 = getelementptr inbounds <16 x i8>* %.012, i32 1
+  store <16 x i8> %16, <16 x i8>* %.012, align 4
+  %18 = getelementptr inbounds i8* %.05, i32 %2
+  %19 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %18, i32 1) nounwind
+  %20 = getelementptr inbounds i8* %.05, i32 %3
+  %21 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %20, i32 1) nounwind
+  %22 = shufflevector <1 x i64> %19, <1 x i64> %21, <2 x i32> <i32 0, i32 1>
+  %23 = bitcast <2 x i64> %22 to <16 x i8>
+  %24 = getelementptr inbounds <16 x i8>* %.012, i32 2
+  store <16 x i8> %23, <16 x i8>* %17, align 4
+  %25 = getelementptr inbounds i8* %.05, i32 %4
+  %26 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %25, i32 1) nounwind
+  %27 = getelementptr inbounds i8* %.05, i32 %5
+  %28 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %27, i32 1) nounwind
+  %29 = shufflevector <1 x i64> %26, <1 x i64> %28, <2 x i32> <i32 0, i32 1>
+  %30 = bitcast <2 x i64> %29 to <16 x i8>
+  %31 = getelementptr inbounds <16 x i8>* %.012, i32 3
+  store <16 x i8> %30, <16 x i8>* %24, align 4
+  %32 = getelementptr inbounds i8* %.05, i32 %6
+  %33 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %32, i32 1) nounwind
+  %34 = getelementptr inbounds i8* %.05, i32 %7
+  %35 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %34, i32 1) nounwind
+  %36 = shufflevector <1 x i64> %33, <1 x i64> %35, <2 x i32> <i32 0, i32 1>
+  %37 = bitcast <2 x i64> %36 to <16 x i8>
+  store <16 x i8> %37, <16 x i8>* %31, align 4
+  %38 = add <16 x i8> %16, %23
+  %39 = add <16 x i8> %38, %30
+  %40 = add <16 x i8> %39, %37
+  %41 = add <16 x i8> %result.03, %40
+  %42 = getelementptr i8* %.05, i32 %9
+  %43 = getelementptr inbounds <16 x i8>* %.012, i32 -64
+  %44 = add nsw i32 %counter.04, 1
+  %exitcond = icmp eq i32 %44, %limit
+  br i1 %exitcond, label %._crit_edge, label %11
+
+._crit_edge:                                      ; preds = %11
+  %scevgep = getelementptr <16 x i8>* %data, i32 %10
+  br label %45
+
+; <label>:45                                      ; preds = %._crit_edge, %0
+  %result.0.lcssa = phi <16 x i8> [ %41, %._crit_edge ], [ zeroinitializer, %0 ]
+  %.01.lcssa = phi <16 x i8>* [ %scevgep, %._crit_edge ], [ %data, %0 ]
+  store <16 x i8> %result.0.lcssa, <16 x i8>* %.01.lcssa, align 4
+  ret void
+}
+
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
diff --git a/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
new file mode 100644
index 000000000000..bac2ffab31d9
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
new file mode 100644
index 000000000000..cb23ad01a497
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | FileCheck %s
+;
+; PR11431: handle a phi operand that is replaced by a postinc user.
+; LSR first expands %t3 to %t2 in %phi
+; LSR then expands %t2 in %phi into two decrements, one on each loop exit.
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i1 @check() nounwind
+
+; Check that LSR did something close to the behavior at the time of the bug.
+; CHECK: @sqlite3DropTriggerPtr
+; CHECK: incq %rax
+; CHECK: jne
+; CHECK: decq %rax
+; CHECK: ret
+define i64 @sqlite3DropTriggerPtr() nounwind {
+bb:
+  %cmp = call zeroext i1 @check()
+  br label %bb1
+
+bb1:                                              ; preds = %bb4, %bb
+  %t0 = phi i64 [ 0, %bb ], [ %t3, %bb4 ]
+  %t2 = phi i64 [ 1, %bb ], [ %t5, %bb4 ]
+  %t3 = add nsw i64 %t0, 1
+  br i1 %cmp, label %bb4, label %bb8
+
+bb4:                                              ; preds = %bb1
+  %t5 = add nsw i64 %t2, 1
+  br i1 %cmp, label %bb1, label %bb8
+
+bb8:                                              ; preds = %bb8, %bb4
+  %phi = phi i64 [ %t3, %bb1 ], [ %t2, %bb4 ]
+  ret i64 %phi
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
new file mode 100644
index 000000000000..510865096272
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s | FileCheck %s
+;
+; Test LSR's ability to prune formulae that refer to nonexistant
+; AddRecs in other loops.
+;
+; Unable to reduce this case further because it requires LSR to exceed
+; ComplexityLimit.
+;
+; We really just want to ensure that LSR can process this loop without
+; finding an unsatisfactory solution and bailing out. I've added
+; dummyout, an obvious candidate for postinc replacement so we can
+; verify that LSR removes it.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin"
+
+; CHECK: @test
+; CHECK: # %for.body{{$}}
+; dummyiv copy should be removed
+; CHECK-NOT: movq
+; CHECK: # %for.cond19.preheader
+; dummycnt should be removed
+; CHECK-NOT: incq
+; CHECK: # %for.body23{{$}}
+define i64 @test(i64 %count, float* nocapture %srcrow, i32* nocapture %destrow) nounwind uwtable ssp {
+entry:
+  %cmp34 = icmp eq i64 %count, 0
+  br i1 %cmp34, label %for.end29, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %dummyiv = phi i64 [ %dummycnt, %for.body ], [ 0, %entry ]
+  %indvars.iv39 = phi i64 [ %indvars.iv.next40, %for.body ], [ 0, %entry ]
+  %dp.036 = phi i32* [ %add.ptr, %for.body ], [ %destrow, %entry ]
+  %p.035 = phi float* [ %incdec.ptr4, %for.body ], [ %srcrow, %entry ]
+  %incdec.ptr = getelementptr inbounds float* %p.035, i64 1
+  %0 = load float* %incdec.ptr, align 4
+  %incdec.ptr2 = getelementptr inbounds float* %p.035, i64 2
+  %1 = load float* %incdec.ptr2, align 4
+  %incdec.ptr3 = getelementptr inbounds float* %p.035, i64 3
+  %2 = load float* %incdec.ptr3, align 4
+  %incdec.ptr4 = getelementptr inbounds float* %p.035, i64 4
+  %3 = load float* %incdec.ptr4, align 4
+  %4 = load i32* %dp.036, align 4
+  %conv5 = fptoui float %0 to i32
+  %or = or i32 %4, %conv5
+  %arrayidx6 = getelementptr inbounds i32* %dp.036, i64 1
+  %5 = load i32* %arrayidx6, align 4
+  %conv7 = fptoui float %1 to i32
+  %or8 = or i32 %5, %conv7
+  %arrayidx9 = getelementptr inbounds i32* %dp.036, i64 2
+  %6 = load i32* %arrayidx9, align 4
+  %conv10 = fptoui float %2 to i32
+  %or11 = or i32 %6, %conv10
+  %arrayidx12 = getelementptr inbounds i32* %dp.036, i64 3
+  %7 = load i32* %arrayidx12, align 4
+  %conv13 = fptoui float %3 to i32
+  %or14 = or i32 %7, %conv13
+  store i32 %or, i32* %dp.036, align 4
+  store i32 %or8, i32* %arrayidx6, align 4
+  store i32 %or11, i32* %arrayidx9, align 4
+  store i32 %or14, i32* %arrayidx12, align 4
+  %add.ptr = getelementptr inbounds i32* %dp.036, i64 4
+  %indvars.iv.next40 = add i64 %indvars.iv39, 4
+  %dummycnt = add i64 %dummyiv, 1
+  %cmp = icmp ult i64 %indvars.iv.next40, %count
+  br i1 %cmp, label %for.body, label %for.cond19.preheader
+
+for.cond19.preheader:                             ; preds = %for.body
+  %dummyout = add i64 %dummyiv, 1
+  %rem = and i64 %count, 3
+  %cmp2130 = icmp eq i64 %rem, 0
+  br i1 %cmp2130, label %for.end29, label %for.body23.lr.ph
+
+for.body23.lr.ph:                                 ; preds = %for.cond19.preheader
+  %8 = and i64 %count, 3
+  br label %for.body23
+
+for.body23:                                       ; preds = %for.body23, %for.body23.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body23.lr.ph ], [ %indvars.iv.next, %for.body23 ]
+  %dp.132 = phi i32* [ %add.ptr, %for.body23.lr.ph ], [ %incdec.ptr28, %for.body23 ]
+  %p.131 = phi float* [ %incdec.ptr4, %for.body23.lr.ph ], [ %incdec.ptr24, %for.body23 ]
+  %incdec.ptr24 = getelementptr inbounds float* %p.131, i64 1
+  %9 = load float* %incdec.ptr24, align 4
+  %10 = load i32* %dp.132, align 4
+  %conv25 = fptoui float %9 to i32
+  %or26 = or i32 %10, %conv25
+  store i32 %or26, i32* %dp.132, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %incdec.ptr28 = getelementptr inbounds i32* %dp.132, i64 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %8
+  br i1 %exitcond, label %for.end29, label %for.body23
+
+for.end29:                                        ; preds = %entry, %for.body23, %for.cond19.preheader
+  %result = phi i64 [ 0, %entry ], [ %dummyout, %for.body23 ], [ %dummyout, %for.cond19.preheader ]
+  ret i64 %result
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
new file mode 100644
index 000000000000..2dcaab82a1a5
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s
+
+declare i1 @check() nounwind
+declare i1 @foo(i8*, i8*, i8*) nounwind
+
+; Check that redundant phi elimination ran
+; CHECK: @test
+; CHECK: %while.body.i
+; CHECK: movs
+; CHECK-NOT: movs
+; CHECK: %for.end.i
+define i32 @test(i8* %base) nounwind uwtable ssp {
+entry:
+  br label %while.body.lr.ph.i
+
+while.body.lr.ph.i:                               ; preds = %cond.true.i
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %cond.true29.i, %while.body.lr.ph.i
+  %indvars.iv7.i = phi i64 [ 16, %while.body.lr.ph.i ], [ %indvars.iv.next8.i, %cond.true29.i ]
+  %i.05.i = phi i64 [ 0, %while.body.lr.ph.i ], [ %indvars.iv7.i, %cond.true29.i ]
+  %sext.i = shl i64 %i.05.i, 32
+  %idx.ext.i = ashr exact i64 %sext.i, 32
+  %add.ptr.sum.i = add i64 %idx.ext.i, 16
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i, %while.body.i
+  %indvars.iv.i = phi i64 [ 0, %while.body.i ], [ %indvars.iv.next.i, %for.body.i ]
+  %add.ptr.sum = add i64 %add.ptr.sum.i, %indvars.iv.i
+  %arrayidx22.i = getelementptr inbounds i8* %base, i64 %add.ptr.sum
+  %0 = load i8* %arrayidx22.i, align 1
+  %indvars.iv.next.i = add i64 %indvars.iv.i, 1
+  %cmp = call i1 @check() nounwind
+  br i1 %cmp, label %for.end.i, label %for.body.i
+
+for.end.i:                                        ; preds = %for.body.i
+  %add.ptr.i144 = getelementptr inbounds i8* %base, i64 %add.ptr.sum.i
+  %cmp2 = tail call i1 @foo(i8* %add.ptr.i144, i8* %add.ptr.i144, i8* undef) nounwind
+  br i1 %cmp2, label %cond.true29.i, label %cond.false35.i
+
+cond.true29.i:                                    ; preds = %for.end.i
+  %indvars.iv.next8.i = add i64 %indvars.iv7.i, 16
+  br i1 false, label %exit, label %while.body.i
+
+cond.false35.i:                                   ; preds = %for.end.i
+  unreachable
+
+exit:                                 ; preds = %cond.true29.i, %cond.true.i
+  ret i32 0
+}
+
+%struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771 = type { i32, i32, i32 }
+
+@tags = external global [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], align 16
+
+; PR11782: SCEVExpander assert
+;
+; Test phi reuse after LSR that requires SCEVExpander to hoist an
+; interesting GEP.
+;
+; CHECK: @test2
+; CHECK: %entry
+; CHECK-NOT: mov
+; CHECK: jne
+define void @test2(i32 %n) nounwind uwtable {
+entry:
+  br i1 undef, label %while.end, label %for.cond468
+
+for.cond468:                                      ; preds = %if.then477, %entry
+  %indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ]
+  %k.0.in = phi i32* [ %last, %if.then477 ], [ getelementptr inbounds ([5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 0, i32 2), %entry ]
+  %k.0 = load i32* %k.0.in, align 4
+  %0 = trunc i64 %indvars.iv1163 to i32
+  %cmp469 = icmp slt i32 %0, %n
+  br i1 %cmp469, label %for.body471, label %for.inc498
+
+for.body471:                                      ; preds = %for.cond468
+  %first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 1
+  %1 = load i32* %first, align 4
+  br i1 undef, label %if.then477, label %for.inc498
+
+if.then477:                                       ; preds = %for.body471
+  %last = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 2
+  %indvars.iv.next1164 = add i64 %indvars.iv1163, 1
+  br label %for.cond468
+
+for.inc498:                                       ; preds = %for.inc498, %for.body471, %for.cond468
+  br label %for.inc498
+
+while.end:                                        ; preds = %entry
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/dg.exp b/test/Transforms/LoopStrengthReduce/X86/dg.exp
deleted file mode 100644
index 7b7bd4e73807..000000000000
--- a/test/Transforms/LoopStrengthReduce/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
new file mode 100644
index 000000000000..e42b67fd35af
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -0,0 +1,300 @@
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
+
+; @simple is the most basic chain of address induction variables. Chaining
+; saves at least one register and avoids complex addressing and setup
+; code.
+;
+; X64: @simple
+; %x * 4
+; X64: shlq $2
+; no other address computation in the preheader
+; X64-NEXT: xorl
+; X64-NEXT: .align
+; X64: %loop
+; no complex address modes
+; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
+;
+; X32: @simple
+; no expensive address computation in the preheader
+; X32-NOT: imul
+; X32: %loop
+; no complex address modes
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @user is not currently chained because the IV is live across memory ops.
+;
+; X64: @user
+; X64: shlq $4
+; X64: lea
+; X64: lea
+; X64: %loop
+; complex address modes
+; X64: (%{{[^)]+}},%{{[^)]+}},
+;
+; X32: @user
+; expensive address computation in the preheader
+; X32: imul
+; X32: %loop
+; complex address modes
+; X32: (%{{[^)]+}},%{{[^)]+}},
+define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+entry:
+  br label %loop
+loop:
+  %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
+  %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
+  %v = load i32* %iv
+  %iv1 = getelementptr inbounds i32* %iv, i32 %x
+  %v1 = load i32* %iv1
+  %iv2 = getelementptr inbounds i32* %iv1, i32 %x
+  %v2 = load i32* %iv2
+  %iv3 = getelementptr inbounds i32* %iv2, i32 %x
+  %v3 = load i32* %iv3
+  %s1 = add i32 %s, %v
+  %s2 = add i32 %s1, %v1
+  %s3 = add i32 %s2, %v2
+  %s4 = add i32 %s3, %v3
+  %iv4 = getelementptr inbounds i32* %iv3, i32 %x
+  store i32 %s4, i32* %iv
+  %cmp = icmp eq i32* %iv4, %b
+  br i1 %cmp, label %exit, label %loop
+exit:
+  ret i32 %s4
+}
+
+; @extrastride is a slightly more interesting case of a single
+; complete chain with multiple strides. The test case IR is what LSR
+; used to do, and exactly what we don't want to do. LSR's new IV
+; chaining feature should now undo the damage.
+;
+; X64: extrastride:
+; We currently don't handle this on X64 because the sexts cause
+; strange increment expressions like this:
+; IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
+;
+; X32: extrastride:
+; no spills in the preheader
+; X32-NOT: mov{{.*}}(%esp){{$}}
+; X32: %for.body{{$}}
+; no complex address modes
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+; no reloads
+; X32-NOT: (%esp)
+define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %cmp8 = icmp eq i32 %z, 0
+  br i1 %cmp8, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %add.ptr.sum = shl i32 %main_stride, 1 ; s*2
+  %add.ptr1.sum = add i32 %add.ptr.sum, %main_stride ; s*3
+  %add.ptr2.sum = add i32 %x, %main_stride ; s + x
+  %add.ptr4.sum = shl i32 %main_stride, 2 ; s*4
+  %add.ptr3.sum = add i32 %add.ptr2.sum, %add.ptr4.sum ; total IV stride = s*5+x
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %main.addr.011 = phi i8* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
+  %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
+  %0 = bitcast i8* %main.addr.011 to i32*
+  %1 = load i32* %0, align 4
+  %add.ptr = getelementptr inbounds i8* %main.addr.011, i32 %main_stride
+  %2 = bitcast i8* %add.ptr to i32*
+  %3 = load i32* %2, align 4
+  %add.ptr1 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr.sum
+  %4 = bitcast i8* %add.ptr1 to i32*
+  %5 = load i32* %4, align 4
+  %add.ptr2 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr1.sum
+  %6 = bitcast i8* %add.ptr2 to i32*
+  %7 = load i32* %6, align 4
+  %add.ptr3 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr4.sum
+  %8 = bitcast i8* %add.ptr3 to i32*
+  %9 = load i32* %8, align 4
+  %add = add i32 %3, %1
+  %add4 = add i32 %add, %5
+  %add5 = add i32 %add4, %7
+  %add6 = add i32 %add5, %9
+  store i32 %add6, i32* %res.addr.09, align 4
+  %add.ptr6 = getelementptr inbounds i8* %main.addr.011, i32 %add.ptr3.sum
+  %add.ptr7 = getelementptr inbounds i32* %res.addr.09, i32 %y
+  %inc = add i32 %i.010, 1
+  %cmp = icmp eq i32 %inc, %z
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; @foldedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' can be folded into the addressing mode.
+; Consequently, we should *not* form any chains.
+;
+; X64: foldedidx:
+; X64: movzbl -3(
+;
+; X32: foldedidx:
+; X32: movzbl -3(
+define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.07
+  %0 = load i8* %arrayidx, align 1
+  %conv5 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.07
+  %1 = load i8* %arrayidx1, align 1
+  %conv26 = zext i8 %1 to i32
+  %add = add nsw i32 %conv26, %conv5
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.07
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %inc1 = or i32 %i.07, 1
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %inc1
+  %2 = load i8* %arrayidx.1, align 1
+  %conv5.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %inc1
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv26.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv26.1, %conv5.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %inc1
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %inc.12 = or i32 %i.07, 2
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %inc.12
+  %4 = load i8* %arrayidx.2, align 1
+  %conv5.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %inc.12
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv26.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv26.2, %conv5.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %inc.12
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %inc.23 = or i32 %i.07, 3
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %inc.23
+  %6 = load i8* %arrayidx.3, align 1
+  %conv5.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %inc.23
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv26.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv26.3, %conv5.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %inc.23
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %inc.3 = add nsw i32 %i.07, 4
+  %exitcond.3 = icmp eq i32 %inc.3, 400
+  br i1 %exitcond.3, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; @multioper tests instructions with multiple IV user operands. We
+; should be able to chain them independent of each other.
+;
+; X64: @multioper
+; X64: %for.body
+; X64: movl %{{.*}},4)
+; X64-NEXT: leal 1(
+; X64-NEXT: movl %{{.*}},4)
+; X64-NEXT: leal 2(
+; X64-NEXT: movl %{{.*}},4)
+; X64-NEXT: leal 3(
+; X64-NEXT: movl %{{.*}},4)
+;
+; X32: @multioper
+; X32: %for.body
+; X32: movl %{{.*}},4)
+; X32-NEXT: leal 1(
+; X32-NEXT: movl %{{.*}},4)
+; X32-NEXT: leal 2(
+; X32-NEXT: movl %{{.*}},4)
+; X32-NEXT: leal 3(
+; X32-NEXT: movl %{{.*}},4)
+define void @multioper(i32* %a, i32 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %p = phi i32* [ %p.next, %for.body ], [ %a, %entry ]
+  %i = phi i32 [ %inc4, %for.body ], [ 0, %entry ]
+  store i32 %i, i32* %p, align 4
+  %inc1 = or i32 %i, 1
+  %add.ptr.i1 = getelementptr inbounds i32* %p, i32 1
+  store i32 %inc1, i32* %add.ptr.i1, align 4
+  %inc2 = add nsw i32 %i, 2
+  %add.ptr.i2 = getelementptr inbounds i32* %p, i32 2
+  store i32 %inc2, i32* %add.ptr.i2, align 4
+  %inc3 = add nsw i32 %i, 3
+  %add.ptr.i3 = getelementptr inbounds i32* %p, i32 3
+  store i32 %inc3, i32* %add.ptr.i3, align 4
+  %p.next = getelementptr inbounds i32* %p, i32 4
+  %inc4 = add nsw i32 %i, 4
+  %cmp = icmp slt i32 %inc4, %n
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+; @testCmpZero has a ICmpZero LSR use that should not be hidden from
+; LSR. Profitable chains should have more than one nonzero increment
+; anyway.
+;
+; X32: @testCmpZero
+; X32: %for.body82.us
+; X32: dec
+; X32: jne
+define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
+entry:
+  %dest0 = getelementptr inbounds i8* %src, i32 %srcidx
+  %source0 = getelementptr inbounds i8* %dst, i32 %dstidx
+  %add.ptr79.us.sum = add i32 %srcidx, %len
+  %lftr.limit = getelementptr i8* %src, i32 %add.ptr79.us.sum
+  br label %for.body82.us
+
+for.body82.us:
+  %dest = phi i8* [ %dest0, %entry ], [ %incdec.ptr91.us, %for.body82.us ]
+  %source = phi i8* [ %source0, %entry ], [ %add.ptr83.us, %for.body82.us ]
+  %0 = bitcast i8* %source to i32*
+  %1 = load i32* %0, align 4
+  %trunc = trunc i32 %1 to i8
+  %add.ptr83.us = getelementptr inbounds i8* %source, i32 4
+  %incdec.ptr91.us = getelementptr inbounds i8* %dest, i32 1
+  store i8 %trunc, i8* %dest, align 1
+  %exitcond = icmp eq i8* %incdec.ptr91.us, %lftr.limit
+  br i1 %exitcond, label %return, label %for.body82.us
+
+return:
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
new file mode 100644
index 000000000000..d8e0aa9dc805
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
@@ -0,0 +1,96 @@
+; REQUIRES: asserts
+; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -march=x86 -mcpu=core2 -stress-ivchain | FileCheck %s -check-prefix=X32
+
+; @sharedidx is an unrolled variant of this loop:
+;  for (unsigned long i = 0; i < len; i += s) {
+;    c[i] = a[i] + b[i];
+;  }
+; where 's' cannot be folded into the addressing mode.
+;
+; This is not quite profitable to chain. But with -stress-ivchain, we
+; can form three address chains in place of the shared induction
+; variable.
+
+; X64: sharedidx:
+; X64: %for.body.preheader
+; X64-NOT: leal ({{.*}},4)
+; X64: %for.body.1
+
+; X32: sharedidx:
+; X32: %for.body.2
+; X32: add
+; X32: add
+; X32: add
+; X32: add
+; X32: add
+; X32: %for.body.3
+define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
+entry:
+  %cmp8 = icmp eq i32 %len, 0
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body.3
+  %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8* %a, i32 %i.09
+  %0 = load i8* %arrayidx, align 1
+  %conv6 = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8* %b, i32 %i.09
+  %1 = load i8* %arrayidx1, align 1
+  %conv27 = zext i8 %1 to i32
+  %add = add nsw i32 %conv27, %conv6
+  %conv3 = trunc i32 %add to i8
+  %arrayidx4 = getelementptr inbounds i8* %c, i32 %i.09
+  store i8 %conv3, i8* %arrayidx4, align 1
+  %add5 = add i32 %i.09, %s
+  %cmp = icmp ult i32 %add5, %len
+  br i1 %cmp, label %for.body.1, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.body.1, %for.body.2, %for.body.3, %entry
+  ret void
+
+for.body.1:                                       ; preds = %for.body
+  %arrayidx.1 = getelementptr inbounds i8* %a, i32 %add5
+  %2 = load i8* %arrayidx.1, align 1
+  %conv6.1 = zext i8 %2 to i32
+  %arrayidx1.1 = getelementptr inbounds i8* %b, i32 %add5
+  %3 = load i8* %arrayidx1.1, align 1
+  %conv27.1 = zext i8 %3 to i32
+  %add.1 = add nsw i32 %conv27.1, %conv6.1
+  %conv3.1 = trunc i32 %add.1 to i8
+  %arrayidx4.1 = getelementptr inbounds i8* %c, i32 %add5
+  store i8 %conv3.1, i8* %arrayidx4.1, align 1
+  %add5.1 = add i32 %add5, %s
+  %cmp.1 = icmp ult i32 %add5.1, %len
+  br i1 %cmp.1, label %for.body.2, label %for.end
+
+for.body.2:                                       ; preds = %for.body.1
+  %arrayidx.2 = getelementptr inbounds i8* %a, i32 %add5.1
+  %4 = load i8* %arrayidx.2, align 1
+  %conv6.2 = zext i8 %4 to i32
+  %arrayidx1.2 = getelementptr inbounds i8* %b, i32 %add5.1
+  %5 = load i8* %arrayidx1.2, align 1
+  %conv27.2 = zext i8 %5 to i32
+  %add.2 = add nsw i32 %conv27.2, %conv6.2
+  %conv3.2 = trunc i32 %add.2 to i8
+  %arrayidx4.2 = getelementptr inbounds i8* %c, i32 %add5.1
+  store i8 %conv3.2, i8* %arrayidx4.2, align 1
+  %add5.2 = add i32 %add5.1, %s
+  %cmp.2 = icmp ult i32 %add5.2, %len
+  br i1 %cmp.2, label %for.body.3, label %for.end
+
+for.body.3:                                       ; preds = %for.body.2
+  %arrayidx.3 = getelementptr inbounds i8* %a, i32 %add5.2
+  %6 = load i8* %arrayidx.3, align 1
+  %conv6.3 = zext i8 %6 to i32
+  %arrayidx1.3 = getelementptr inbounds i8* %b, i32 %add5.2
+  %7 = load i8* %arrayidx1.3, align 1
+  %conv27.3 = zext i8 %7 to i32
+  %add.3 = add nsw i32 %conv27.3, %conv6.3
+  %conv3.3 = trunc i32 %add.3 to i8
+  %arrayidx4.3 = getelementptr inbounds i8* %c, i32 %add5.2
+  store i8 %conv3.3, i8* %arrayidx4.3, align 1
+  %add5.3 = add i32 %add5.2, %s
+  %cmp.3 = icmp ult i32 %add5.3, %len
+  br i1 %cmp.3, label %for.body, label %for.end
+}
diff --git a/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
new file mode 100644
index 000000000000..da2db5a45f9c
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/addrec-gep.ll b/test/Transforms/LoopStrengthReduce/addrec-gep.ll
new file mode 100644
index 000000000000..3e4e369657b0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/addrec-gep.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK: bb1:
+; CHECK: load double* [[IV:%[^,]+]]
+; CHECK: store double {{.*}}, double* [[IV]]
+; CHECK: getelementptr double*
+; CHECK-NOT: cast
+; CHECK: br {{.*}} label %bb1
+
+; This test tests several things. The load and store should use the
+; same address instead of having it computed twice, and SCEVExpander should
+; be able to reconstruct the full getelementptr, despite it having a few
+; obstacles set in its way.
+; We only check that the inner loop (bb1-bb2) is "reduced" because LSR
+; currently only operates on inner loops.
+
+target datalayout = "e-p:64:64:64-n32:64"
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph3, label %return
+
+bb.nph:		; preds = %bb2.preheader
+	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
+	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
+	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
+        %z0 = add i64 %tmp3, 5203
+	%tmp5 = getelementptr double* %p, i64 %z0		; <double*> [#uses=1]
+	%tmp6 = load double* %tmp5, align 8		; <double> [#uses=1]
+	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
+        %z1 = add i64 %tmp4, 5203
+	%tmp8 = getelementptr double* %p, i64 %z1		; <double*> [#uses=1]
+	store double %tmp7, double* %tmp8, align 8
+	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
+	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
+	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
+	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %bb4.return_crit_edge.split
+
+bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
+	br label %return
+
+bb.nph3:		; preds = %entry
+	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
+	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
+	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
+	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
+	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
+	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+
+bb.nph3.split:		; preds = %bb.nph3
+	br label %bb2.preheader
+
+bb2.preheader:		; preds = %bb.nph3.split, %bb4
+	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
+	br i1 true, label %bb.nph, label %bb3
+
+return:		; preds = %bb4.return_crit_edge.split, %entry
+	ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/dg.exp b/test/Transforms/LoopStrengthReduce/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LoopStrengthReduce/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopStrengthReduce/dominate-assert.ll b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
new file mode 100644
index 000000000000..b87bf620decf
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/dominate-assert.ll
@@ -0,0 +1,70 @@
+; RUN: opt -loop-reduce %s
+; we used to crash on this one
+
+declare i8* @_Znwm()
+declare i32 @__gxx_personality_v0(...)
+declare void @g()
+define void @f() {
+bb0:
+  br label %bb1
+bb1:
+  %v0 = phi i64 [ 0, %bb0 ], [ %v1, %bb1 ]
+  %v1 = add nsw i64 %v0, 1
+  br i1 undef, label %bb2, label %bb1
+bb2:
+  %v2 = icmp eq i64 %v0, 0
+  br i1 %v2, label %bb6, label %bb3
+bb3:
+  %v3 = invoke noalias i8* @_Znwm()
+          to label %bb5 unwind label %bb4
+bb4:
+  %v4 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb9
+bb5:
+  %v5 = bitcast i8* %v3 to i32**
+  %add.ptr.i = getelementptr inbounds i32** %v5, i64 %v0
+  br label %bb6
+bb6:
+  %v6 = phi i32** [ null, %bb2 ], [ %add.ptr.i, %bb5 ]
+  invoke void @g()
+          to label %bb7 unwind label %bb8
+bb7:
+  unreachable
+bb8:
+  %v7 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br label %bb9
+bb9:
+  resume { i8*, i32 } zeroinitializer
+}
+
+
+define void @h() {
+bb1:
+  invoke void @g() optsize
+          to label %bb2 unwind label %bb5
+bb2:
+  %arrayctor.cur = phi i8* [ undef, %bb1 ], [ %arrayctor.next, %bb3 ]
+  invoke void @g() optsize
+          to label %bb3 unwind label %bb6
+bb3:
+  %arrayctor.next = getelementptr inbounds i8* %arrayctor.cur, i64 1
+  br label %bb2
+bb4:
+  ret void
+bb5:
+  %tmp = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  invoke void @g() optsize
+          to label %bb4 unwind label %bb7
+bb6:
+  %tmp1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  %arraydestroy.isempty = icmp eq i8* undef, %arrayctor.cur
+  ret void
+bb7:
+  %lpad.nonloopexit = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          catch i8* null
+  ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
index abbfda6e9255..ad4959be340e 100644
--- a/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
+++ b/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
@@ -9,7 +9,7 @@ entry:
 	br label %no_exit
 no_exit:		; preds = %no_exit, %entry
 	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=1]
-	volatile store float 0.000000e+00, float* %D
+	store volatile float 0.000000e+00, float* %D
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
 ; CHECK: icmp
 ; CHECK-NEXT: br i1
diff --git a/test/Transforms/LoopStrengthReduce/ivchain.ll b/test/Transforms/LoopStrengthReduce/ivchain.ll
new file mode 100644
index 000000000000..ce7ad198de49
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/ivchain.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+;
+; PR11782: bad cast to AddRecExpr.
+; A sign extend feeds an IVUser and cannot be hoisted into the AddRec.
+; CollectIVChains should bailout on this case.
+
+%struct = type { i8*, i8*, i16, i64, i16, i16, i16, i64, i64, i16, i8*, i64, i64, i64 }
+
+; CHECK: @test
+; CHECK: for.body:
+; CHECK: lsr.iv = phi %struct
+; CHECK: br
+define i32 @test(i8* %h, i32 %more) nounwind uwtable {
+entry:
+  br i1 undef, label %land.end238, label %return
+
+land.end238:                                      ; preds = %if.end229
+  br label %for.body
+
+for.body:                                         ; preds = %sw.epilog, %land.end238
+  %fbh.0 = phi %struct* [ undef, %land.end238 ], [ %incdec.ptr, %sw.epilog ]
+  %column_n.0 = phi i16 [ 0, %land.end238 ], [ %inc601, %sw.epilog ]
+  %conv250 = sext i16 %column_n.0 to i32
+  %add257 = add nsw i32 %conv250, 1
+  %conv258 = trunc i32 %add257 to i16
+  %cmp263 = icmp ult i16 undef, 2
+  br label %if.end388
+
+if.end388:                                        ; preds = %if.then380, %if.else356
+  %ColLength = getelementptr inbounds %struct* %fbh.0, i64 0, i32 7
+  %call405 = call signext i16 @SQLColAttribute(i8* undef, i16 zeroext %conv258, i16 zeroext 1003, i8* null, i16 signext 0, i16* null, i64* %ColLength) nounwind
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb542, %sw.bb523, %if.end475
+  %inc601 = add i16 %column_n.0, 1
+  %incdec.ptr = getelementptr inbounds %struct* %fbh.0, i64 1
+  br label %for.body
+
+return:                                           ; preds = %entry
+  ret i32 1
+}
+
+declare signext i16 @SQLColAttribute(i8*, i16 zeroext, i16 zeroext, i8*, i16 signext, i16*, i64*)
diff --git a/test/Transforms/LoopStrengthReduce/lit.local.cfg b/test/Transforms/LoopStrengthReduce/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 27609157ec8c..96904c66e640 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -1,15 +1,15 @@
 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
 ; PR9939
 
-; LSR should property handle the post-inc offset when folding the
+; LSR should properly handle the post-inc offset when folding the
 ; non-IV operand of an icmp into the IV.
 
-; CHECK:   %5 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
-; CHECK:   %6 = lshr i64 %5, 1
-; CHECK:   %7 = mul i64 %6, 2
+; CHECK:   %4 = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK:   %5 = lshr i64 %4, 1
+; CHECK:   %6 = mul i64 %5, 2
 ; CHECK:   br label %for.body
 ; CHECK: for.body:
-; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %7, %for.body.lr.ph ]
+; CHECK:   %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ %6, %for.body.lr.ph ]
 ; CHECK:   %lsr.iv.next = add i64 %lsr.iv2, -2
 ; CHECK:   %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16*
 ; CHECK:   %cmp27 = icmp eq i16* %lsr.iv.next3, null
diff --git a/test/Transforms/LoopStrengthReduce/pr12018.ll b/test/Transforms/LoopStrengthReduce/pr12018.ll
new file mode 100644
index 000000000000..ee7b1e8883e6
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr12018.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+
+%struct.nsTArray = type { i8 }
+%struct.nsTArrayHeader = type { i32 }
+
+define void @_Z6foobarR8nsTArray(%struct.nsTArray* %aValues, i32 %foo, %struct.nsTArrayHeader* %bar) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %_ZN8nsTArray9ElementAtEi.exit, %entry
+  %i.06 = phi i32 [ %add, %_ZN8nsTArray9ElementAtEi.exit ], [ 0, %entry ]
+  %call.i = call %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev() nounwind
+  %add.ptr.i = getelementptr inbounds %struct.nsTArrayHeader* %call.i, i32 1
+  %tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
+  %arrayidx = getelementptr inbounds %struct.nsTArray* %tmp, i32 %i.06
+  %add = add nsw i32 %i.06, 1
+  call void @llvm.dbg.value(metadata !{%struct.nsTArray* %aValues}, i64 0, metadata !0) nounwind
+  br label %_ZN8nsTArray9ElementAtEi.exit
+
+_ZN8nsTArray9ElementAtEi.exit:                    ; preds = %for.body
+  %arrayidx.i = getelementptr inbounds %struct.nsTArray* %tmp, i32 %add
+  call void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray* %arrayidx, %struct.nsTArray* %arrayidx.i) nounwind
+  %cmp = icmp slt i32 %add, %foo
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %_ZN8nsTArray9ElementAtEi.exit
+  ret void
+}
+
+declare void @_ZN11nsTArray15ComputeDistanceERKS_Rd(%struct.nsTArray*, %struct.nsTArray*)
+
+declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 786689}                       ; [ DW_TAG_arg_variable ]
diff --git a/test/Transforms/LoopStrengthReduce/pr12048.ll b/test/Transforms/LoopStrengthReduce/pr12048.ll
new file mode 100644
index 000000000000..7e0f2ad7a3c7
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr12048.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-reduce
+
+define void @resolve_name() nounwind uwtable ssp {
+  br label %while.cond40.preheader
+while.cond132.while.cond.loopexit_crit_edge:
+  br label %while.cond40.preheader
+while.cond40.preheader:
+  br label %while.cond40
+while.cond40:
+  %indvars.iv194 = phi i8* [ null, %while.cond40.preheader ], [ %scevgep, %while.body51 ]
+  %tmp.1 = phi i8* [ undef, %while.cond40.preheader ], [ %incdec.ptr, %while.body51 ]
+  switch i8 undef, label %while.body51 [
+    i8 0, label %if.then59
+  ]
+while.body51:                                     ; preds = %land.end50
+  %incdec.ptr = getelementptr inbounds i8* %tmp.1, i64 1
+  %scevgep = getelementptr i8* %indvars.iv194, i64 1
+  br label %while.cond40
+if.then59:                                        ; preds = %while.end
+  br i1 undef, label %if.then64, label %if.end113
+if.then64:                                        ; preds = %if.then59
+  %incdec.ptr88.tmp.2 = select i1 undef, i8* undef, i8* undef
+  br label %if.end113
+if.end113:                                        ; preds = %if.then64, %if.then59
+  %tmp.4 = phi i8* [ %incdec.ptr88.tmp.2, %if.then64 ], [ undef, %if.then59 ]
+  %tmp.4195 = ptrtoint i8* %tmp.4 to i64
+  br  label %while.cond132.preheader
+while.cond132.preheader:                          ; preds = %if.end113
+  %cmp133173 = icmp eq i8* %tmp.1, %tmp.4
+  br i1 %cmp133173, label %while.cond40.preheader, label %while.body139.lr.ph
+while.body139.lr.ph:                              ; preds = %while.cond132.preheader
+  %scevgep198 = getelementptr i8* %indvars.iv194, i64 0
+  %scevgep198199 = ptrtoint i8* %scevgep198 to i64
+  br label %while.body139
+while.body139:                                    ; preds = %while.body139, %while.body139.lr.ph
+  %start_of_var.0177 = phi i8* [ %tmp.1, %while.body139.lr.ph ], [ null, %while.body139 ]
+  br i1 undef, label %while.cond132.while.cond.loopexit_crit_edge, label %while.body139
+}
diff --git a/test/Transforms/LoopStrengthReduce/pr3399.ll b/test/Transforms/LoopStrengthReduce/pr3399.ll
index b809007fea8f..26c5002fdecf 100644
--- a/test/Transforms/LoopStrengthReduce/pr3399.ll
+++ b/test/Transforms/LoopStrengthReduce/pr3399.ll
@@ -13,7 +13,7 @@ bb:		; preds = %bb5, %bb5.thread
 
 bb1:		; preds = %bb
 	%l_2.0.reg2mem.0 = sub i32 0, %indvar		; <i32> [#uses=1]
-	%0 = volatile load i32* @g_53, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32* @g_53, align 4		; <i32> [#uses=1]
 	%1 = trunc i32 %l_2.0.reg2mem.0 to i16		; <i16> [#uses=1]
 	%2 = trunc i32 %0 to i16		; <i16> [#uses=1]
 	%3 = mul i16 %2, %1		; <i16> [#uses=1]
diff --git a/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll b/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll
new file mode 100644
index 000000000000..f90d03080056
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/preserve-gep-loop-variant.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; CHECK-NOT: {{inttoptr|ptrtoint}}
+; CHECK: scevgep
+; CHECK-NOT: {{inttoptr|ptrtoint}}
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n32:64"
+
+; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.
+
+define void @foo(i8* %p) nounwind {
+entry:
+  br i1 true, label %bb.nph, label %for.end
+
+for.cond:
+  %phitmp = icmp slt i64 %inc, 20
+  br i1 %phitmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  br label %for.end
+
+bb.nph:
+  br label %for.body
+
+for.body:
+  %storemerge1 = phi i64 [ %inc, %for.cond ], [ 0, %bb.nph ]
+  %call = tail call i64 @bar() nounwind
+  %call2 = tail call i64 @car() nounwind
+  %conv = trunc i64 %call2 to i8
+  %conv3 = sext i8 %conv to i64
+  %add = add nsw i64 %call, %storemerge1
+  %add4 = add nsw i64 %add, %conv3
+  %arrayidx = getelementptr inbounds i8* %p, i64 %add4
+  store i8 0, i8* %arrayidx
+  %inc = add nsw i64 %storemerge1, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+declare i64 @bar()
+
+declare i64 @car()
diff --git a/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll b/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll
index 59551d5c720c..a43a4ffc557c 100644
--- a/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll
+++ b/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S < %s -loop-unroll -unroll-count=4 -enable-iv-rewrite=false | FileCheck %s
+; RUN: opt -S < %s -loop-unroll -unroll-count=4 | FileCheck %s
 ;
 ; Test induction variable simplify after loop unrolling. It should
 ; expose nice opportunities for GVN.
diff --git a/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll b/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll
new file mode 100644
index 000000000000..8946a23c200f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/2012-04-09-unroll-indirectbr.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -S -loop-unroll -simplifycfg | FileCheck %s
+; PR12513: Loop unrolling breaks with indirect branches.
+; If loop unrolling attempts to transform this loop, it replaces the
+; indirectbr successors. SimplifyCFG then considers them to be unreachable.
+declare void @subtract() nounwind uwtable
+
+; CHECK-NOT: unreachable
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
+entry:
+  %vals19 = alloca [5 x i32], align 16
+  %x20 = alloca i32, align 4
+  store i32 135, i32* %x20, align 4
+  br label %for.body
+
+for.body:                                         ; preds = ; %call2_termjoin, %call3_termjoin
+  %indvars.iv = phi i64 [ 0, %entry ], [ %joinphi15.in.in, %call2_termjoin ]
+  %a6 = call coldcc i8* @funca(i8* blockaddress(@main, %for.body_code), i8*
+blockaddress(@main, %for.body_codeprime)) nounwind
+  indirectbr i8* %a6, [label %for.body_code, label %for.body_codeprime]
+
+for.body_code:                                    ; preds = %for.body
+  call void @subtract()
+  br label %call2_termjoin
+
+call2_termjoin:                                   ; preds = %for.body_codeprime, %for.body_code
+  %joinphi15.in.in = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %joinphi15.in.in, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %call2_termjoin
+  ret i32 0
+
+for.body_codeprime:                               ; preds = %for.body
+  call void @subtract_v2(i64 %indvars.iv)
+  br label %call2_termjoin
+}
+
+declare coldcc i8* @funca(i8*, i8*) readonly
+
+declare void @subtract_v2(i64) nounwind uwtable
diff --git a/test/Transforms/LoopUnroll/dg.exp b/test/Transforms/LoopUnroll/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LoopUnroll/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopUnroll/lit.local.cfg b/test/Transforms/LoopUnroll/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LoopUnroll/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
new file mode 100644
index 000000000000..3179d55e978a
--- /dev/null
+++ b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
+; Loop size = 3, when the function has the optsize attribute, the
+; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
+; by 16 times because 3 * 16 < 50.
+define void @unroll_opt_for_size() nounwind optsize {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %inc = add i32 %iv, 1
+  %exitcnd = icmp uge i32 %inc, 1024
+  br i1 %exitcnd, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; CHECK:      add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: icmp
diff --git a/test/Transforms/LoopUnroll/pr11361.ll b/test/Transforms/LoopUnroll/pr11361.ll
new file mode 100644
index 000000000000..7ce7f5fe4600
--- /dev/null
+++ b/test/Transforms/LoopUnroll/pr11361.ll
@@ -0,0 +1,42 @@
+; RUN: opt -loop-unroll -disable-output
+; PR11361
+
+; This tests for an iterator invalidation issue.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @func_1() nounwind uwtable {
+entry:
+  br label %for.cond8.preheader
+
+for.cond8.preheader:                              ; preds = %for.inc15, %entry
+  %l_1264.04 = phi i32 [ 0, %entry ], [ %add.i, %for.inc15 ]
+  %l_1330.0.03 = phi i80 [ undef, %entry ], [ %ins.lcssa, %for.inc15 ]
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9, %for.cond8.preheader
+  %l_1330.0.12 = phi i80 [ %l_1330.0.03, %for.cond8.preheader ], [ %ins, %for.body9 ]
+  %storemerge1 = phi i32 [ 7, %for.cond8.preheader ], [ %sub, %for.body9 ]
+  %tmp = lshr i80 %l_1330.0.12, 8
+  %tmp1 = trunc i80 %tmp to i8
+  %inc12 = add i8 %tmp1, 1
+  %tmp2 = zext i8 %inc12 to i80
+  %tmp3 = shl nuw nsw i80 %tmp2, 8
+  %mask = and i80 %l_1330.0.12, -65281
+  %ins = or i80 %tmp3, %mask
+  %sub = add nsw i32 %storemerge1, -1
+  %tobool = icmp eq i32 %sub, 0
+  br i1 %tobool, label %for.inc15, label %for.body9
+
+for.inc15:                                        ; preds = %for.body9
+  %ins.lcssa = phi i80 [ %ins, %for.body9 ]
+  %sext = shl i32 %l_1264.04, 24
+  %conv.i = ashr exact i32 %sext, 24
+  %add.i = add nsw i32 %conv.i, 1
+  %cmp = icmp slt i32 %add.i, 3
+  br i1 %cmp, label %for.cond8.preheader, label %for.end16
+
+for.end16:                                        ; preds = %for.inc15
+  ret void
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
new file mode 100644
index 000000000000..d8bbea9f1073
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s
+
+; Tests for unrolling loops with run-time trip counts
+
+; CHECK: unr.cmp{{.*}}:
+; CHECK: for.body.unr{{.*}}:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
+
+
+; Still try to completely unroll loops with compile-time trip counts
+; even if the -unroll-runtime is specified
+
+; CHECK: for.body:
+; CHECK-NOT: for.body.unr:
+
+define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.01
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 5
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+; This is test 2007-05-09-UnknownTripCount.ll which can be unrolled now
+; if the -unroll-runtime option is turned on
+
+; CHECK: bb72.2:
+
+define void @foo(i32 %trips) {
+entry:
+        br label %cond_true.outer
+
+cond_true.outer:
+        %indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]
+        br label %bb72
+
+bb72:
+        %indvar.next2 = add i32 %indvar1.ph, 1
+        %exitcond3 = icmp eq i32 %indvar.next2, %trips
+        br i1 %exitcond3, label %cond_true138, label %cond_true.outer
+
+cond_true138:
+        ret void
+}
+
+
+; Test run-time unrolling for a loop that counts down by -2.
+
+; CHECK: for.body.unr:
+; CHECK: br i1 %cmp.7, label %for.cond.for.end_crit_edge{{.*}}, label %for.body
+
+define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
+entry:
+  %cmp2 = icmp eq i32 %len, 0
+  br i1 %cmp2, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ]
+  %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ]
+  %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %incdec.ptr = getelementptr inbounds i16* %p.addr.05, i64 1
+  %0 = load i16* %p.addr.05, align 2
+  %conv = zext i16 %0 to i32
+  %add = add i32 %conv, %res.03
+  %sub = add nsw i32 %len.addr.04, -2
+  %cmp = icmp eq i32 %sub, 0
+  br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  %phitmp = trunc i32 %add to i16
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ]
+  ret i16 %res.0.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
new file mode 100644
index 000000000000..ad99b8cd9c66
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=4 | FileCheck %s
+
+; This tests that setting the unroll count works
+
+; CHECK: unr.cmp:
+; CHECK: for.body.unr:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll
new file mode 100644
index 000000000000..cbc7af58ff5b
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 -unroll-runtime -unroll-count=8 | FileCheck %s
+
+; Choose a smaller, power-of-two, unroll count if the loop is too large.
+; This test makes sure we're not unrolling 'odd' counts
+
+; CHECK: unr.cmp:
+; CHECK: for.body.unr:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+entry:
+  %cmp1 = icmp eq i32 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop3.ll b/test/Transforms/LoopUnroll/runtime-loop3.ll
new file mode 100644
index 000000000000..55cf22373ece
--- /dev/null
+++ b/test/Transforms/LoopUnroll/runtime-loop3.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS
+
+; Test that nested loops can be unrolled.  We need to increase threshold to do it
+
+; STATS: 2 loop-unroll - Number of loops unrolled (completely or otherwise)
+
+define i32 @nested(i32* nocapture %a, i32 %n, i32 %m) nounwind uwtable readonly {
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.cond1.preheader.lr.ph, label %for.end7
+
+for.cond1.preheader.lr.ph:                        ; preds = %entry
+  %cmp28 = icmp sgt i32 %m, 0
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.inc5, %for.cond1.preheader.lr.ph
+  %indvars.iv16 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next17, %for.inc5 ]
+  %sum.012 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %sum.1.lcssa, %for.inc5 ]
+  br i1 %cmp28, label %for.body3, label %for.inc5
+
+for.body3:                                        ; preds = %for.cond1.preheader, %for.body3
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.cond1.preheader ]
+  %sum.19 = phi i32 [ %add4, %for.body3 ], [ %sum.012, %for.cond1.preheader ]
+  %0 = add nsw i64 %indvars.iv, %indvars.iv16
+  %arrayidx = getelementptr inbounds i32* %a, i64 %0
+  %1 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %1, %sum.19
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %m
+  br i1 %exitcond, label %for.inc5, label %for.body3
+
+for.inc5:                                         ; preds = %for.body3, %for.cond1.preheader
+  %sum.1.lcssa = phi i32 [ %sum.012, %for.cond1.preheader ], [ %add4, %for.body3 ]
+  %indvars.iv.next17 = add i64 %indvars.iv16, 1
+  %lftr.wideiv18 = trunc i64 %indvars.iv.next17 to i32
+  %exitcond19 = icmp eq i32 %lftr.wideiv18, %n
+  br i1 %exitcond19, label %for.end7, label %for.cond1.preheader
+
+for.end7:                                         ; preds = %for.inc5, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.inc5 ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/LoopUnroll/unloop.ll b/test/Transforms/LoopUnroll/unloop.ll
index 217c8cea56f7..5a9cacda443c 100644
--- a/test/Transforms/LoopUnroll/unloop.ll
+++ b/test/Transforms/LoopUnroll/unloop.ll
@@ -427,3 +427,44 @@ if.end2413:                                       ; preds = %defchar
 return:                                           ; preds = %sw.bb304
   ret void
 }
+
+; PR11335: the most deeply nested block should be removed from the outer loop.
+; CHECK: @removeSubloopBlocks2
+; CHECK: for.cond3:
+; CHECK-NOT: br
+; CHECK: ret void
+define void @removeSubloopBlocks2() nounwind {
+entry:
+  %tobool.i = icmp ne i32 undef, 0
+  br label %lbl_616
+
+lbl_616.loopexit:                                 ; preds = %for.cond
+  br label %lbl_616
+
+lbl_616:                                          ; preds = %lbl_616.loopexit, %entry
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond3, %lbl_616
+  br i1 false, label %for.cond1.preheader, label %lbl_616.loopexit
+
+for.cond1.preheader:                              ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1.loopexit:                               ; preds = %for.cond.i
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.cond1.loopexit, %for.cond1.preheader
+  br i1 false, label %for.body2, label %for.cond3
+
+for.body2:                                        ; preds = %for.cond1
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.cond.i, %for.body2
+  br i1 %tobool.i, label %for.cond.i, label %for.cond1.loopexit
+
+for.cond3:                                        ; preds = %for.cond1
+  br i1 false, label %for.cond, label %if.end
+
+if.end:                                           ; preds = %for.cond3
+  ret void
+}
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
new file mode 100644
index 000000000000..8389fe46436f
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -0,0 +1,91 @@
+; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 2 loop-unswitch - Number of switches unswitched
+
+; CHECK:      %1 = icmp eq i32 %c, 1
+; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us
+; CHECK-NEXT:   %var_val.us = load i32* %var
+; CHECK-NEXT:   switch i32 1, label %default.us-lcssa.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   %2 = icmp eq i32 %c, 2
+; CHECK-NEXT:   br i1 %2, label %.split.split.us, label %.split..split.split_crit_edge
+
+; CHECK:      .split..split.split_crit_edge:                    ; preds = %.split
+; CHECK-NEXT:   br label %.split.split
+
+; CHECK:      .split.split.us:                                  ; preds = %.split
+; CHECK-NEXT:   br label %loop_begin.us1
+
+; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us5, %.split.split.us
+; CHECK-NEXT:   %var_val.us2 = load i32* %var
+; CHECK-NEXT:   switch i32 2, label %default.us-lcssa.us-lcssa.us [
+; CHECK-NEXT:     i32 1, label %inc.us3
+; CHECK-NEXT:     i32 2, label %dec.us4
+; CHECK-NEXT:   ]
+
+; CHECK:      dec.us4:                                          ; preds = %loop_begin.us1
+; CHECK-NEXT:   call void @decf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us5
+
+; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split.split
+; CHECK-NEXT:   %var_val = load i32* %var
+; CHECK-NEXT:   switch i32 %c, label %default.us-lcssa.us-lcssa [
+; CHECK-NEXT:     i32 1, label %inc
+; CHECK-NEXT:     i32 2, label %dec
+; CHECK-NEXT:   ]
+
+; CHECK:      inc:                                              ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa, label %inc.split
+
+; CHECK:      dec:                                              ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable6, label %dec.split
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+  call void @decf() noreturn nounwind
+  br label %loop_begin
+default:  
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
new file mode 100644
index 000000000000..05d98d513e0c
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -0,0 +1,84 @@
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 13 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 13 -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 1 loop-unswitch - Number of switches unswitched
+
+; ModuleID = '../llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll'
+
+; CHECK:        %1 = icmp eq i32 %c, 1
+; CHECK-NEXT:   br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us
+; CHECK:        switch i32 1, label %second_switch.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
+; CHECK-NEXT:   switch i32 %d, label %default.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+; CHECK-NEXT:   ]
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split
+; CHECK:        switch i32 %c, label %second_switch [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge:                         ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable, label %inc
+
+; CHECK:      second_switch:                                    ; preds = %loop_begin
+; CHECK-NEXT:   switch i32 %d, label %default [
+; CHECK-NEXT:     i32 1, label %inc
+; CHECK-NEXT:   ]
+
+; CHECK:      inc:                                              ; preds = %loop_begin.inc_crit_edge, %second_switch
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+  %d = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %second_switch [
+      i32 1, label %inc
+  ]
+
+second_switch:
+  switch i32 %d, label %default [
+      i32 1, label %inc
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+
+default:  
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
new file mode 100644
index 000000000000..1b186d6becbb
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -0,0 +1,138 @@
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 3 loop-unswitch - Number of switches unswitched
+
+; CHECK:        %1 = icmp eq i32 %c, 1
+; CHECK-NEXT:   br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK:      ..split_crit_edge:                                ; preds = %0
+; CHECK-NEXT:   br label %.split
+
+; CHECK:      .split.us:                                        ; preds = %0
+; CHECK-NEXT:   %2 = icmp eq i32 %d, 1
+; CHECK-NEXT:   br i1 %2, label %.split.us.split.us, label %.split.us..split.us.split_crit_edge
+
+; CHECK:      .split.us..split.us.split_crit_edge:              ; preds = %.split.us
+; CHECK-NEXT:   br label %.split.us.split
+
+; CHECK:      .split.us.split.us:                               ; preds = %.split.us
+; CHECK-NEXT:   br label %loop_begin.us.us
+
+; CHECK:      loop_begin.us.us:                                 ; preds = %loop_begin.backedge.us.us, %.split.us.split.us
+; CHECK-NEXT:   %var_val.us.us = load i32* %var
+; CHECK-NEXT:   switch i32 1, label %second_switch.us.us [
+; CHECK-NEXT:     i32 1, label %inc.us.us
+
+; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us.us
+
+; CHECK:      second_switch.us.us:                              ; preds = %loop_begin.us.us
+; CHECK-NEXT:   switch i32 1, label %default.us.us [
+; CHECK-NEXT:     i32 1, label %inc.us.us
+
+; CHECK:      .split.us.split:                                  ; preds = %.split.us..split.us.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin.us
+
+; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us.split
+; CHECK-NEXT:   %var_val.us = load i32* %var
+; CHECK-NEXT:   switch i32 1, label %second_switch.us [
+; CHECK-NEXT:     i32 1, label %inc.us
+
+; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
+; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
+; CHECK-NEXT:   switch i32 %d, label %default.us [
+; CHECK-NEXT:     i32 1, label %second_switch.us.inc.us_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      second_switch.us.inc.us_crit_edge:                ; preds = %second_switch.us
+; CHECK-NEXT:   br i1 true, label %us-unreachable8, label %inc.us
+
+; CHECK:      .split:                                           ; preds = %..split_crit_edge
+; CHECK-NEXT:   %3 = icmp eq i32 %d, 1
+; CHECK-NEXT:   br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge
+
+; CHECK:      .split..split.split_crit_edge:                    ; preds = %.split
+; CHECK-NEXT:   br label %.split.split
+
+; CHECK:      .split.split.us:                                  ; preds = %.split
+; CHECK-NEXT:   br label %loop_begin.us1
+
+; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us6, %.split.split.us
+; CHECK-NEXT:   %var_val.us2 = load i32* %var
+; CHECK-NEXT:   switch i32 %c, label %second_switch.us4 [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge.us
+; CHECK-NEXT:   ]
+
+; CHECK:      inc.us3:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us6
+
+; CHECK:      second_switch.us4:                                ; preds = %loop_begin.us1
+; CHECK-NEXT:   switch i32 1, label %default.us5 [
+; CHECK-NEXT:     i32 1, label %inc.us3
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge.us:                      ; preds = %loop_begin.us1
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3
+
+; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
+; CHECK-NEXT:   br label %loop_begin
+
+; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split.split
+; CHECK-NEXT:   %var_val = load i32* %var
+; CHECK-NEXT:   switch i32 %c, label %second_switch [
+; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      loop_begin.inc_crit_edge:                         ; preds = %loop_begin
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa, label %inc
+
+; CHECK:      second_switch:                                    ; preds = %loop_begin
+; CHECK-NEXT:   switch i32 %d, label %default [
+; CHECK-NEXT:     i32 1, label %second_switch.inc_crit_edge
+; CHECK-NEXT:   ]
+
+; CHECK:      second_switch.inc_crit_edge:                      ; preds = %second_switch
+; CHECK-NEXT:   br i1 true, label %us-unreachable7, label %inc
+
+
+define i32 @test(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32* %mem
+  %d = load i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32* %var
+
+  switch i32 %c, label %second_switch [
+      i32 1, label %inc
+  ]
+
+second_switch:
+  switch i32 %d, label %default [
+      i32 1, label %inc
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+
+default:  
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll b/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll
new file mode 100644
index 000000000000..c92f0a2be3fc
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2012-04-02-IndirectBr.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -S -loop-unswitch -verify-loop-info -verify-dom-info | FileCheck %s
+; PR12343: -loop-unswitch crash on indirect branch
+
+; CHECK:       %0 = icmp eq i64 undef, 0
+; CHECK-NEXT:  br i1 %0, label %"5", label %"4"
+
+; CHECK:       "5":                                              ; preds = %entry
+; CHECK-NEXT:  br label %"16"
+
+; CHECK:       "16":                                             ; preds = %"22", %"5"
+; CHECK-NEXT:  indirectbr i8* undef, [label %"22", label %"33"]
+
+; CHECK:       "22":                                             ; preds = %"16"
+; CHECK-NEXT:  br i1 %0, label %"16", label %"26"
+
+; CHECK:       "26":                                             ; preds = %"22"
+; CHECK-NEXT:  unreachable
+
+define void @foo() {
+entry:
+  %0 = icmp eq i64 undef, 0
+  br i1 %0, label %"5", label %"4"
+
+"4":                                              ; preds = %entry
+  unreachable
+
+"5":                                              ; preds = %entry
+  br label %"16"
+
+"16":                                             ; preds = %"22", %"5"
+  indirectbr i8* undef, [label %"22", label %"33"]
+
+"22":                                             ; preds = %"16"
+  br i1 %0, label %"16", label %"26"
+
+"26":                                             ; preds = %"22"
+  unreachable
+
+"33":                                             ; preds = %"16"
+  unreachable
+}
diff --git a/test/Transforms/LoopUnswitch/dg.exp b/test/Transforms/LoopUnswitch/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LoopUnswitch/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LoopUnswitch/lit.local.cfg b/test/Transforms/LoopUnswitch/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerAtomic/dg.exp b/test/Transforms/LowerAtomic/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LowerAtomic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerAtomic/lit.local.cfg b/test/Transforms/LowerAtomic/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LowerAtomic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerExpectIntrinsic/dg.exp b/test/Transforms/LowerExpectIntrinsic/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LowerExpectIntrinsic/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerExpectIntrinsic/lit.local.cfg b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
new file mode 100644
index 000000000000..c6106e4746f2
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/LowerInvoke/dg.exp b/test/Transforms/LowerInvoke/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LowerInvoke/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerInvoke/lit.local.cfg b/test/Transforms/LowerInvoke/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LowerInvoke/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LowerSwitch/dg.exp b/test/Transforms/LowerSwitch/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/LowerSwitch/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerSwitch/lit.local.cfg b/test/Transforms/LowerSwitch/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LowerSwitch/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
index 52a837531296..ea0d515498cf 100644
--- a/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
+++ b/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -std-compile-opts -S | grep volatile | count 3
 ; PR1520
-; Don't promote volatile loads/stores. This is really needed to handle setjmp/lonjmp properly.
+; Don't promote load volatiles/stores. This is really needed to handle setjmp/lonjmp properly.
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i686-pc-linux-gnu"
@@ -14,7 +14,7 @@ entry:
 	%v = alloca i32, align 4		; <i32*> [#uses=3]
 	%tmp = alloca i32, align 4		; <i32*> [#uses=3]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	volatile store i32 0, i32* %v, align 4
+	store volatile i32 0, i32* %v, align 4
 	%tmp1 = call i32 @_setjmp( %struct.__jmp_buf_tag* getelementptr ([1 x %struct.__jmp_buf_tag]* @j, i32 0, i32 0) )		; <i32> [#uses=1]
 	%tmp2 = icmp ne i32 %tmp1, 0		; <i1> [#uses=1]
 	%tmp23 = zext i1 %tmp2 to i8		; <i8> [#uses=1]
@@ -22,12 +22,12 @@ entry:
 	br i1 %toBool, label %bb, label %bb5
 
 bb:		; preds = %entry
-	%tmp4 = volatile load i32* %v, align 4		; <i32> [#uses=1]
+	%tmp4 = load volatile i32* %v, align 4		; <i32> [#uses=1]
 	store i32 %tmp4, i32* %tmp, align 4
 	br label %bb6
 
 bb5:		; preds = %entry
-	volatile store i32 1, i32* %v, align 4
+	store volatile i32 1, i32* %v, align 4
 	call void @g( )
 	store i32 0, i32* %tmp, align 4
 	br label %bb6
diff --git a/test/Transforms/Mem2Reg/dg.exp b/test/Transforms/Mem2Reg/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/Mem2Reg/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Mem2Reg/lit.local.cfg b/test/Transforms/Mem2Reg/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/Mem2Reg/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MemCpyOpt/dg.exp b/test/Transforms/MemCpyOpt/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/MemCpyOpt/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll
index 1ac97e9e6b91..8832f897b089 100644
--- a/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/test/Transforms/MemCpyOpt/form-memset.ll
@@ -57,8 +57,8 @@ entry:
 
 declare i32 @bar(...)
 
+%struct.MV = type { i16, i16 }
 
-	%struct.MV = type { i16, i16 }
 
 define void @test2() nounwind  {
 entry:
@@ -220,3 +220,31 @@ entry:
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
 }
 
+; More aggressive heuristic
+; rdar://9892684
+define void @test7(i32* nocapture %c) nounwind optsize {
+  store i32 -1, i32* %c, align 4
+  %1 = getelementptr inbounds i32* %c, i32 1
+  store i32 -1, i32* %1, align 4
+  %2 = getelementptr inbounds i32* %c, i32 2
+  store i32 -1, i32* %2, align 4
+  %3 = getelementptr inbounds i32* %c, i32 3
+  store i32 -1, i32* %3, align 4
+  %4 = getelementptr inbounds i32* %c, i32 4
+  store i32 -1, i32* %4, align 4
+; CHECK: @test7
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %5, i8 -1, i64 20, i32 4, i1 false)
+  ret void
+}
+
+%struct.test8 = type { [4 x i32] }
+
+define void @test8() {
+entry:
+  %memtmp = alloca %struct.test8, align 16
+  %0 = bitcast %struct.test8* %memtmp to <4 x i32>*
+  store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
+  ret void
+; CHECK: @test8
+; CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
+}
diff --git a/test/Transforms/MemCpyOpt/lit.local.cfg b/test/Transforms/MemCpyOpt/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 71d4d4e8a11f..63d0ebf5c137 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -59,7 +59,7 @@ define void @test3(%0* noalias sret %agg.result) nounwind  {
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false)
   ret void
 ; CHECK: @test3
-; CHECK-NEXT: %agg.result2 = bitcast 
+; CHECK-NEXT: %agg.result1 = bitcast 
 ; CHECK-NEXT: call void @llvm.memcpy
 ; CHECK-NEXT: ret void
 }
@@ -130,3 +130,21 @@ declare i32 @g(%struct.p* byval align 8)
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
+; PR11142 - When looking for a memcpy-memcpy dependency, don't get stuck on
+; instructions between the memcpy's that only affect the destination pointer.
+@test8.str = internal constant [7 x i8] c"ABCDEF\00"
+
+define void @test8() {
+; CHECK: test8
+; CHECK-NOT: memcpy
+  %A = tail call i8* @malloc(i32 10)
+  %B = getelementptr inbounds i8* %A, i64 2
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %B, i8* getelementptr inbounds ([7 x i8]* @test8.str, i64 0, i64 0), i32 7, i32 1, i1 false)
+  %C = tail call i8* @malloc(i32 10)
+  %D = getelementptr inbounds i8* %C, i64 2
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %D, i8* %B, i32 7, i32 1, i1 false)
+  ret void
+; CHECK: ret void
+}
+
+declare noalias i8* @malloc(i32)
diff --git a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
index 201903e99545..e3e52b401af5 100644
--- a/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
+++ b/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
@@ -75,10 +75,12 @@ bb2:                                              ; preds = %bb1, %invcont
   ret void
 
 lpad:                                             ; preds = %bb
-  %eh_ptr = call i8* @llvm.eh.exception()
-  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+              cleanup
+  %exn = extractvalue { i8*, i32 } %eh_ptr, 0
+  store i8* %exn, i8** %eh_exception
   %eh_ptr4 = load i8** %eh_exception
-  %eh_select5 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr4, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0)
+  %eh_select5 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select5, i32* %eh_selector
   %eh_select = load i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.150, align 4
@@ -199,10 +201,12 @@ bb2:                                              ; preds = %bb1, %invcont
   ret void
 
 lpad:                                             ; preds = %bb
-  %eh_ptr = call i8* @llvm.eh.exception()
-  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+              cleanup
+  %exn = extractvalue { i8*, i32 } %eh_ptr, 0
+  store i8* %exn, i8** %eh_exception
   %eh_ptr4 = load i8** %eh_exception
-  %eh_select5 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr4, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0)
+  %eh_select5 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select5, i32* %eh_selector
   %eh_select = load i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.148, align 4
@@ -220,10 +224,6 @@ lpad:                                             ; preds = %bb
   unreachable
 }
 
-declare i8* @llvm.eh.exception() nounwind readonly
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
-
 declare i32 @__gxx_personality_v0(...)
 
 declare void @_Unwind_Resume_or_Rethrow()
diff --git a/test/Transforms/MergeFunc/dg.exp b/test/Transforms/MergeFunc/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/MergeFunc/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/MergeFunc/lit.local.cfg b/test/Transforms/MergeFunc/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/MergeFunc/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/apelim.ll b/test/Transforms/ObjCARC/apelim.ll
new file mode 100644
index 000000000000..8c7b5b1e654f
--- /dev/null
+++ b/test/Transforms/ObjCARC/apelim.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -objc-arc-apelim < %s | FileCheck %s
+; rdar://10227311
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_x }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_y }]
+
+@x = global i32 0
+
+declare i32 @bar() nounwind
+
+define i32 @foo() nounwind {
+entry:
+  ret i32 5
+}
+
+define internal void @__cxx_global_var_init() {
+entry:
+  %call = call i32 @foo()
+  store i32 %call, i32* @x, align 4
+  ret void
+}
+
+define internal void @__dxx_global_var_init() {
+entry:
+  %call = call i32 @bar()
+  store i32 %call, i32* @x, align 4
+  ret void
+}
+
+; CHECK: define internal void @_GLOBAL__I_x()
+; CHECK-NOT: @objc
+; CHECK: }
+define internal void @_GLOBAL__I_x() {
+entry:
+  %0 = call i8* @objc_autoreleasePoolPush() nounwind
+  call void @__cxx_global_var_init()
+  call void @objc_autoreleasePoolPop(i8* %0) nounwind
+  ret void
+}
+
+; CHECK: define internal void @_GLOBAL__I_y()
+; CHECK: %0 = call i8* @objc_autoreleasePoolPush() nounwind
+; CHECK: call void @objc_autoreleasePoolPop(i8* %0) nounwind
+; CHECK: }
+define internal void @_GLOBAL__I_y() {
+entry:
+  %0 = call i8* @objc_autoreleasePoolPush() nounwind
+  call void @__dxx_global_var_init()
+  call void @objc_autoreleasePoolPop(i8* %0) nounwind
+  ret void
+}
+
+declare i8* @objc_autoreleasePoolPush()
+declare void @objc_autoreleasePoolPop(i8*)
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 575cf42d4e65..ba2f778a28ec 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -3,10 +3,12 @@
 target datalayout = "e-p:64:64:64"
 
 declare i8* @objc_retain(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
 declare void @objc_release(i8*)
 declare i8* @objc_autorelease(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
 declare void @objc_autoreleasePoolPop(i8*)
-declare void @objc_autoreleasePoolPush()
+declare i8* @objc_autoreleasePoolPush()
 declare i8* @objc_retainBlock(i8*)
 
 declare i8* @objc_retainedObject(i8*)
@@ -86,6 +88,37 @@ alt_return:
   ret void
 }
 
+; Don't do partial elimination into two different CFG diamonds.
+
+; CHECK: define void @test1b(
+; CHECK: entry:
+; CHECK:   tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NOT: @objc_
+; CHECK: if.end5:
+; CHECK:   tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test1b(i8* %x, i1 %p, i1 %q) {
+entry:
+  tail call i8* @objc_retain(i8* %x) nounwind
+  br i1 %p, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @callee()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  br i1 %q, label %if.then3, label %if.end5
+
+if.then3:                                         ; preds = %if.end
+  tail call void @use_pointer(i8* %x)
+  br label %if.end5
+
+if.end5:                                          ; preds = %if.then3, %if.end
+  tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 ; Like test0 but the pointer is passed to an intervening call,
 ; so the optimization is not safe.
 
@@ -136,7 +169,7 @@ entry:
 loop:
   %c = bitcast i32* %x to i8*
   call void @objc_release(i8* %c) nounwind
-  %j = volatile load i1* %q
+  %j = load volatile i1* %q
   br i1 %j, label %loop, label %return
 
 return:
@@ -159,7 +192,7 @@ entry:
 loop:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
-  %j = volatile load i1* %q
+  %j = load volatile i1* %q
   br i1 %j, label %loop, label %return
 
 return:
@@ -495,7 +528,7 @@ entry:
 define void @test13d(i8* %x, i64 %n) {
 entry:
   call i8* @objc_retain(i8* %x) nounwind
-  call void @objc_autoreleasePoolPush()
+  call i8* @objc_autoreleasePoolPush()
   call i8* @objc_retain(i8* %x) nounwind
   call void @use_pointer(i8* %x)
   call void @use_pointer(i8* %x)
@@ -755,7 +788,7 @@ C:
 @__block_holder_tmp_1 = external constant %block1
 define void @test23() {
 entry:
-  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind, !clang.arc.copy_on_escape !0
   call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
   call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
   call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
@@ -770,13 +803,28 @@ entry:
 ; CHECK: }
 define void @test23b(i8* %p) {
 entry:
-  %0 = call i8* @objc_retainBlock(i8* %p) nounwind
+  %0 = call i8* @objc_retainBlock(i8* %p) nounwind, !clang.arc.copy_on_escape !0
   call void @callee()
   call void @use_pointer(i8* %p)
   call void @objc_release(i8* %p) nounwind
   ret void
 }
 
+; Don't optimize objc_retainBlock, because there's no copy_on_escape metadata.
+
+; CHECK: define void @test23c(
+; CHECK: @objc_retainBlock
+; CHECK: @objc_release
+; CHECK: }
+define void @test23c() {
+entry:
+  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
+  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
+  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  ret void
+}
+
 ; Any call can decrement a retain count.
 
 ; CHECK: define void @test24(
@@ -1354,7 +1402,7 @@ entry:
 ; CHECK-NEXT: call i8* @objc_autorelease(i8* %p)
 ; CHECK-NEXT: call void @use_pointer(i8* %p)
 ; CHECK-NEXT: call void @use_pointer(i8* %p)
-; CHECK-NEXT: call void @objc_autoreleasePoolPush()
+; CHECK-NEXT: call i8* @objc_autoreleasePoolPush()
 ; CHECK-NEXT: ret void
 ; CHECK-NEXT: }
 define void @test43b(i8* %p) {
@@ -1364,7 +1412,7 @@ entry:
   call i8* @objc_retain(i8* %p)
   call void @use_pointer(i8* %p)
   call void @use_pointer(i8* %p)
-  call void @objc_autoreleasePoolPush()
+  call i8* @objc_autoreleasePoolPush()
   call void @objc_release(i8* %p)
   ret void
 }
@@ -1497,9 +1545,11 @@ define void @test52(i8** %zz, i8** %pp) {
 
 ; Like test52, but the pointer has function type, so it's assumed to
 ; be not reference counted.
+; Oops. That's wrong. Clang sometimes uses function types gratuitously.
+; See rdar://10551239.
 
 ; CHECK: define void @test53(
-; CHECK-NOT: @objc_
+; CHECK: @objc_
 ; CHECK: }
 define void @test53(void ()** %zz, i8** %pp) {
   %p = load i8** %pp
@@ -1673,6 +1723,154 @@ define void @test61() {
   ret void
 }
 
+; Delete a retain matched by releases when one is inside the loop and the
+; other is outside the loop.
+
+; CHECK: define void @test62(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test62(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  call void @objc_release(i8* %x)
+  br label %loop
+
+exit:
+  call void @objc_release(i8* %x)
+  ret void
+}
+
+; Like test62 but with no release in exit.
+; Don't delete anything!
+
+; CHECK: define void @test63(
+; CHECK: loop:
+; CHECK:   tail call i8* @objc_retain(i8* %x)
+; CHECK: loop.more:
+; CHECK:   call void @objc_release(i8* %x)
+; CHECK: }
+define void @test63(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  call void @objc_release(i8* %x)
+  br label %loop
+
+exit:
+  ret void
+}
+
+; Like test62 but with no release in loop.more.
+; Don't delete anything!
+
+; CHECK: define void @test64(
+; CHECK: loop:
+; CHECK:   tail call i8* @objc_retain(i8* %x)
+; CHECK: exit:
+; CHECK:   call void @objc_release(i8* %x)
+; CHECK: }
+define void @test64(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  br label %loop
+
+exit:
+  call void @objc_release(i8* %x)
+  ret void
+}
+
+; Move an autorelease past a phi with a null.
+
+; CHECK: define i8* @test65(
+; CHECK: if.then:
+; CHECK:   call i8* @objc_autorelease(
+; CHECK: return:
+; CHECK-NOT: @objc_autorelease
+; CHECK: }
+define i8* @test65(i1 %x) {
+entry:
+  br i1 %x, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %c = call i8* @returner()
+  %s = call i8* @objc_retainAutoreleasedReturnValue(i8* %c) nounwind
+  br label %return
+
+return:                                           ; preds = %if.then, %entry
+  %retval = phi i8* [ %s, %if.then ], [ null, %entry ]
+  %q = call i8* @objc_autorelease(i8* %retval) nounwind
+  ret i8* %retval
+}
+
+; Don't move an autorelease past an autorelease pool boundary.
+
+; CHECK: define i8* @test65b(
+; CHECK: if.then:
+; CHECK-NOT: @objc_autorelease
+; CHECK: return:
+; CHECK:   call i8* @objc_autorelease(
+; CHECK: }
+define i8* @test65b(i1 %x) {
+entry:
+  %t = call i8* @objc_autoreleasePoolPush()
+  br i1 %x, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %c = call i8* @returner()
+  %s = call i8* @objc_retainAutoreleasedReturnValue(i8* %c) nounwind
+  br label %return
+
+return:                                           ; preds = %if.then, %entry
+  %retval = phi i8* [ %s, %if.then ], [ null, %entry ]
+  call void @objc_autoreleasePoolPop(i8* %t)
+  %q = call i8* @objc_autorelease(i8* %retval) nounwind
+  ret i8* %retval
+}
+
+; Don't move an autoreleaseReuturnValue, which would break
+; the RV optimization.
+
+; CHECK: define i8* @test65c(
+; CHECK: if.then:
+; CHECK-NOT: @objc_autorelease
+; CHECK: return:
+; CHECK:   call i8* @objc_autoreleaseReturnValue(
+; CHECK: }
+define i8* @test65c(i1 %x) {
+entry:
+  br i1 %x, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %c = call i8* @returner()
+  %s = call i8* @objc_retainAutoreleasedReturnValue(i8* %c) nounwind
+  br label %return
+
+return:                                           ; preds = %if.then, %entry
+  %retval = phi i8* [ %s, %if.then ], [ null, %entry ]
+  %q = call i8* @objc_autoreleaseReturnValue(i8* %retval) nounwind
+  ret i8* %retval
+}
+
 declare void @bar(i32 ()*)
 
 ; A few real-world testcases.
diff --git a/test/Transforms/ObjCARC/contract-storestrong-ivar.ll b/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
index 4ad78e753472..4a9b3140f101 100644
--- a/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
 
-; CHECK: call void @objc_storeStrong(i8**
+; CHECK: tail call void @objc_storeStrong(i8**
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin11.0.0"
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index 25c93f411c8d..4ff0596fbbc6 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -9,7 +9,7 @@ declare void @objc_release(i8*)
 
 ; CHECK: define void @test0(
 ; CHECK: entry:
-; CHECK-NEXT: call void @objc_storeStrong(i8** @x, i8* %p) nounwind
+; CHECK-NEXT: tail call void @objc_storeStrong(i8** @x, i8* %p) nounwind
 ; CHECK-NEXT: ret void
 define void @test0(i8* %p) {
 entry:
@@ -33,7 +33,7 @@ entry:
 define void @test1(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
-  %tmp = volatile load i8** @x, align 8
+  %tmp = load volatile i8** @x, align 8
   store i8* %0, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
@@ -53,7 +53,7 @@ define void @test2(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
   %tmp = load i8** @x, align 8
-  volatile store i8* %0, i8** @x, align 8
+  store volatile i8* %0, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
 }
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
index 04ae3ca505fc..c48f8a534fad 100644
--- a/test/Transforms/ObjCARC/contract.ll
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -143,3 +143,21 @@ define i8* @test7(i8* %p) {
   %2 = tail call i8* @objc_autoreleaseReturnValue(i8* %p)
   ret i8* %p
 }
+
+; Do the return value substitution for PHI nodes too.
+
+; CHECK: define i8* @test8(
+; CHECK: %retval = phi i8* [ %p, %if.then ], [ null, %entry ]
+; CHECK: }
+define i8* @test8(i1 %x, i8* %c) {
+entry:
+  br i1 %x, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %p = call i8* @objc_retain(i8* %c) nounwind
+  br label %return
+
+return:                                           ; preds = %if.then, %entry
+  %retval = phi i8* [ %c, %if.then ], [ null, %entry ]
+  ret i8* %retval
+}
diff --git a/test/Transforms/ObjCARC/dg.exp b/test/Transforms/ObjCARC/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/ObjCARC/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
index cf971e458aaa..76e82a587b8d 100644
--- a/test/Transforms/ObjCARC/invoke.ll
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -2,9 +2,11 @@
 
 declare i8* @objc_retain(i8*)
 declare void @objc_release(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
 declare i8* @objc_msgSend(i8*, i8*, ...)
 declare void @use_pointer(i8*)
 declare void @callee()
+declare i8* @returner()
 
 ; ARCOpt shouldn't try to move the releases to the block containing the invoke.
 
@@ -68,6 +70,149 @@ done:
   ret void
 }
 
+; The optimizer should ignore invoke unwind paths consistently.
+; PR12265
+
+; CHECK: define void @test2() {
+; CHECK: invoke.cont:
+; CHECK-NEXT: call i8* @objc_retain
+; CHEK-NOT: @objc
+; CHECK: finally.cont:
+; CHECK-NEXT: call void @objc_release
+; CHEK-NOT: @objc
+; CHECK: finally.rethrow:
+; CHEK-NOT: @objc
+; CHECK: }
+define void @test2() {
+entry:
+  %call = invoke i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* ()*)()
+          to label %invoke.cont unwind label %finally.rethrow, !clang.arc.no_objc_arc_exceptions !0
+
+invoke.cont:                                      ; preds = %entry
+  %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void ()*)(), !clang.arc.no_objc_arc_exceptions !0
+  invoke void @use_pointer(i8* %call)
+          to label %finally.cont unwind label %finally.rethrow, !clang.arc.no_objc_arc_exceptions !0
+
+finally.cont:                                     ; preds = %invoke.cont
+  tail call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+  ret void
+
+finally.rethrow:                                  ; preds = %invoke.cont, %entry
+  %tmp2 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          catch i8* null
+  unreachable
+}
+
+; Don't try to place code on invoke critical edges.
+
+; CHECK: define void @test3(
+; CHECK: if.end:
+; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: ret void
+define void @test3(i8* %p, i1 %b) {
+entry:
+  %0 = call i8* @objc_retain(i8* %p)
+  call void @callee()
+  br i1 %b, label %if.else, label %if.then
+
+if.then:
+  invoke void @use_pointer(i8* %p)
+          to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+if.else:
+  invoke void @use_pointer(i8* %p)
+          to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+lpad:
+  %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+       cleanup
+  ret void
+
+if.end:
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Like test3, but with ARC-relevant exception handling.
+
+; CHECK: define void @test4(
+; CHECK: lpad:
+; CHECK-NEXT: %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: call void @objc_release(i8* %p) nounwind
+; CHECK-NEXT: ret void
+define void @test4(i8* %p, i1 %b) {
+entry:
+  %0 = call i8* @objc_retain(i8* %p)
+  call void @callee()
+  br i1 %b, label %if.else, label %if.then
+
+if.then:
+  invoke void @use_pointer(i8* %p)
+          to label %if.end unwind label %lpad
+
+if.else:
+  invoke void @use_pointer(i8* %p)
+          to label %if.end unwind label %lpad
+
+lpad:
+  %r = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+       cleanup
+  call void @objc_release(i8* %p)
+  ret void
+
+if.end:
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Don't turn the retainAutoreleaseReturnValue into retain, because it's
+; for an invoke which we can assume codegen will put immediately prior.
+
+; CHECK: define void @test5(
+; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+; CHECK: }
+define void @test5() {
+entry:
+  %z = invoke i8* @returner()
+          to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+lpad:
+  %r13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  ret void
+
+if.end:
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  ret void
+}
+
+; Like test5, but there's intervening code.
+
+; CHECK: define void @test6(
+; CHECK: call i8* @objc_retain(i8* %z)
+; CHECK: }
+define void @test6() {
+entry:
+  %z = invoke i8* @returner()
+          to label %if.end unwind label %lpad, !clang.arc.no_objc_arc_exceptions !0
+
+lpad:
+  %r13 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  ret void
+
+if.end:
+  call void @callee()
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %z)
+  ret void
+}
+
 declare i32 @__gxx_personality_v0(...)
+declare i32 @__objc_personality_v0(...)
 
 !0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/lit.local.cfg b/test/Transforms/ObjCARC/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/ObjCARC/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 9eada8a2ddba..a618a21d8bb3 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -484,12 +484,14 @@ forcoll.empty:
   ret void
 }
 
-; Delete a nested retain+release pair.
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because of a split loop backedge.
+; See test9b for the same testcase without a split backedge.
 
 ; CHECK: define void @test9(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: call i8* @objc_retain
 ; CHECK: }
 define void @test9() nounwind {
 entry:
@@ -551,13 +553,79 @@ forcoll.empty:
   ret void
 }
 
-; Delete a nested retain+release pair.
+; Like test9, but without a split backedge. This we can optimize.
 
-; CHECK: define void @test10(
+; CHECK: define void @test9b(
 ; CHECK: call i8* @objc_retain
 ; CHECK: call i8* @objc_retain
 ; CHECK-NOT: @objc_retain
 ; CHECK: }
+define void @test9b() nounwind {
+entry:
+  %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
+  %items.ptr = alloca [16 x i8*], align 8
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  %call1 = call i8* @returner()
+  %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
+  %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  %2 = call i8* @objc_retain(i8* %0) nounwind
+  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %iszero = icmp eq i64 %call4, 0
+  br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:
+  %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
+  %mutationsptr = load i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:
+  %forcoll.count.ph = phi i64 [ %call4, %forcoll.loopinit ], [ %call7, %forcoll.refetch ]
+  %tmp9 = icmp ugt i64 %forcoll.count.ph, 1
+  %umax = select i1 %tmp9, i64 %forcoll.count.ph, i64 1
+  br label %forcoll.loopbody
+
+forcoll.loopbody:
+  %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ]
+  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64* %mutationsptr5, align 8
+  %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
+  br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:
+  call void @objc_enumerationMutation(i8* %2)
+  br label %forcoll.notmutated
+
+forcoll.notmutated:
+  %phitmp = add i64 %forcoll.index, 1
+  %exitcond = icmp eq i64 %phitmp, %umax
+  br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
+
+forcoll.refetch:
+  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %4 = icmp eq i64 %call7, 0
+  br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:
+  call void @objc_release(i8* %2) nounwind
+  call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because of a split loop backedge.
+; See test10b for the same testcase without a split backedge.
+
+; CHECK: define void @test10(
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK: }
 define void @test10() nounwind {
 entry:
   %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
@@ -618,3 +686,68 @@ forcoll.empty:
   call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
   ret void
 }
+
+; Like test10, but without a split backedge. This we can optimize.
+
+; CHECK: define void @test10b(
+; CHECK: call i8* @objc_retain
+; CHECK: call i8* @objc_retain
+; CHECK-NOT: @objc_retain
+; CHECK: }
+define void @test10b() nounwind {
+entry:
+  %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
+  %items.ptr = alloca [16 x i8*], align 8
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  %call1 = call i8* @returner()
+  %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call1) nounwind
+  call void @callee()
+  %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
+  %2 = call i8* @objc_retain(i8* %0) nounwind
+  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %iszero = icmp eq i64 %call4, 0
+  br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
+
+forcoll.loopinit:
+  %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
+  %mutationsptr = load i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  br label %forcoll.loopbody.outer
+
+forcoll.loopbody.outer:
+  %forcoll.count.ph = phi i64 [ %call4, %forcoll.loopinit ], [ %call7, %forcoll.refetch ]
+  %tmp9 = icmp ugt i64 %forcoll.count.ph, 1
+  %umax = select i1 %tmp9, i64 %forcoll.count.ph, i64 1
+  br label %forcoll.loopbody
+
+forcoll.loopbody:
+  %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ]
+  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64* %mutationsptr5, align 8
+  %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
+  br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
+
+forcoll.mutated:
+  call void @objc_enumerationMutation(i8* %2)
+  br label %forcoll.notmutated
+
+forcoll.notmutated:
+  %phitmp = add i64 %forcoll.index, 1
+  %exitcond = icmp eq i64 %phitmp, %umax
+  br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
+
+forcoll.refetch:
+  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
+  %4 = icmp eq i64 %call7, 0
+  br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
+
+forcoll.empty:
+  call void @objc_release(i8* %2) nounwind
+  call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
new file mode 100644
index 000000000000..9728f6e0d94f
--- /dev/null
+++ b/test/Transforms/ObjCARC/no-objc-arc-exceptions.ll
@@ -0,0 +1,122 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+%struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
+%struct.__block_descriptor = type { i64, i64 }
+@_NSConcreteStackBlock = external global i8*
+@__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
+
+; The optimizer should make use of the !clang.arc.no_objc_arc_exceptions
+; metadata and eliminate the retainBlock+release pair here.
+; rdar://10803830.
+
+; CHECK: define void @test0(
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test0() {
+entry:
+  %x = alloca %struct.__block_byref_x, align 8
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
+  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
+  store i32 0, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
+  store i32 32, i32* %byref.size, align 4
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
+  %t1 = bitcast %struct.__block_byref_x* %x to i8*
+  store i8* %t1, i8** %block.captured, align 8
+  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
+  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
+  %t4 = getelementptr inbounds i8* %t3, i64 16
+  %t5 = bitcast i8* %t4 to i8**
+  %t6 = load i8** %t5, align 8
+  %t7 = bitcast i8* %t6 to void (i8*)*
+  invoke void %t7(i8* %t3)
+          to label %invoke.cont unwind label %lpad, !clang.arc.no_objc_arc_exceptions !4
+
+invoke.cont:                                      ; preds = %entry
+  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  ret void
+
+lpad:                                             ; preds = %entry
+  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  resume { i8*, i32 } %t8
+}
+
+; There is no !clang.arc.no_objc_arc_exceptions
+; metadata here, so the optimizer shouldn't eliminate anything.
+
+; CHECK: define void @test0_no_metadata(
+; CHECK: call i8* @objc_retainBlock(
+; CHECK: invoke
+; CHECK: call void @objc_release(
+; CHECK: }
+define void @test0_no_metadata() {
+entry:
+  %x = alloca %struct.__block_byref_x, align 8
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>, align 8
+  %byref.isa = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 0
+  store i8* null, i8** %byref.isa, align 8
+  %byref.forwarding = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 1
+  store %struct.__block_byref_x* %x, %struct.__block_byref_x** %byref.forwarding, align 8
+  %byref.flags = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 2
+  store i32 0, i32* %byref.flags, align 8
+  %byref.size = getelementptr inbounds %struct.__block_byref_x* %x, i64 0, i32 3
+  store i32 32, i32* %byref.size, align 4
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__foo_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block, i64 0, i32 5
+  %t1 = bitcast %struct.__block_byref_x* %x to i8*
+  store i8* %t1, i8** %block.captured, align 8
+  %t2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
+  %t3 = call i8* @objc_retainBlock(i8* %t2) nounwind, !clang.arc.copy_on_escape !4
+  %t4 = getelementptr inbounds i8* %t3, i64 16
+  %t5 = bitcast i8* %t4 to i8**
+  %t6 = load i8** %t5, align 8
+  %t7 = bitcast i8* %t6 to void (i8*)*
+  invoke void %t7(i8* %t3)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  call void @objc_release(i8* %t3) nounwind, !clang.imprecise_release !4
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  ret void
+
+lpad:                                             ; preds = %entry
+  %t8 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*)
+          cleanup
+  call void @_Block_object_dispose(i8* %t1, i32 8)
+  resume { i8*, i32 } %t8
+}
+
+declare i8* @objc_retainBlock(i8*)
+declare void @objc_release(i8*)
+declare void @_Block_object_dispose(i8*, i32)
+declare i32 @__objc_personality_v0(...)
+declare void @__foo_block_invoke_0(i8* nocapture) uwtable ssp
+
+!4 = metadata !{}
diff --git a/test/Transforms/ObjCARC/pointer-types.ll b/test/Transforms/ObjCARC/pointer-types.ll
new file mode 100644
index 000000000000..6abc93986434
--- /dev/null
+++ b/test/Transforms/ObjCARC/pointer-types.ll
@@ -0,0 +1,31 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+; Don't hoist @objc_release past a use of its pointer, even
+; if the use has function type, because clang uses function types
+; in dubious ways.
+; rdar://10551239
+
+; CHECK: define void @test0(
+; CHECK: %otherBlock = phi void ()* [ %b1, %if.then ], [ null, %entry ]
+; CHECK-NEXT: call void @use_fptr(void ()* %otherBlock)
+; CHECK-NEXT: %tmp11 = bitcast void ()* %otherBlock to i8*
+; CHECK-NEXT: call void @objc_release(i8* %tmp11)
+
+define void @test0(i1 %tobool, void ()* %b1) {
+entry:
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %otherBlock = phi void ()* [ %b1, %if.then ], [ null, %entry ]
+  call void @use_fptr(void ()* %otherBlock)
+  %tmp11 = bitcast void ()* %otherBlock to i8*
+  call void @objc_release(i8* %tmp11) nounwind
+  ret void
+}
+
+declare void @use_fptr(void ()*)
+declare void @objc_release(i8*)
+
diff --git a/test/Transforms/ObjCARC/pr12270.ll b/test/Transforms/ObjCARC/pr12270.ll
new file mode 100644
index 000000000000..1faae5f68705
--- /dev/null
+++ b/test/Transforms/ObjCARC/pr12270.ll
@@ -0,0 +1,21 @@
+; RUN: opt -disable-output -objc-arc-contract %s
+; test that we don't crash on unreachable code
+%2 = type opaque
+
+define void @_i_Test__foo(%2 *%x) {
+entry:
+  unreachable
+
+return:                                           ; No predecessors!
+  %bar = bitcast %2* %x to i8*
+  %foo = call i8* @objc_autoreleaseReturnValue(i8* %bar) nounwind
+  call void @callee()
+  call void @use_pointer(i8* %foo)
+  call void @objc_release(i8* %foo) nounwind
+  ret void
+}
+
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare void @objc_release(i8*)
+declare void @callee()
+declare void @use_pointer(i8*)
diff --git a/test/Transforms/ObjCARC/retain-block-alloca.ll b/test/Transforms/ObjCARC/retain-block-alloca.ll
index 468da9147adb..01f208704c7b 100644
--- a/test/Transforms/ObjCARC/retain-block-alloca.ll
+++ b/test/Transforms/ObjCARC/retain-block-alloca.ll
@@ -1,11 +1,6 @@
 ; RUN: opt -S -objc-arc < %s | FileCheck %s
 ; rdar://10209613
 
-; CHECK: define void @test
-; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) nounwind
-; CHECK: @objc_msgSend
-; CHECK-NEXT: @objc_release(i8* %3)
-
 %0 = type opaque
 %struct.__block_descriptor = type { i64, i64 }
 
@@ -13,6 +8,10 @@
 @__block_descriptor_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
 @"\01L_OBJC_SELECTOR_REFERENCES_" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
 
+; CHECK: define void @test(
+; CHECK: %3 = call i8* @objc_retainBlock(i8* %2) nounwind
+; CHECK: @objc_msgSend
+; CHECK-NEXT: @objc_release(i8* %3)
 define void @test(%0* %array) uwtable {
 entry:
   %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
@@ -41,6 +40,43 @@ entry:
   ret void
 }
 
+; Same as test, but the objc_retainBlock has a clang.arc.copy_on_escape
+; tag so it's safe to delete.
+
+; CHECK: define void @test_with_COE(
+; CHECK-NOT: @objc_retainBlock
+; CHECK: @objc_msgSend
+; CHECK: @objc_release
+; CHECK-NOT: @objc_release
+; CHECK: }
+define void @test_with_COE(%0* %array) uwtable {
+entry:
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, align 8
+  %0 = bitcast %0* %array to i8*
+  %1 = tail call i8* @objc_retain(i8* %0) nounwind
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i64 0, i32 5
+  store %0* %array, %0** %block.captured, align 8
+  %2 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block to i8*
+  %3 = call i8* @objc_retainBlock(i8* %2) nounwind, !clang.arc.copy_on_escape !0
+  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*)*)(i8* %0, i8* %tmp2, i8* %3)
+  call void @objc_release(i8* %3) nounwind
+  %strongdestroy = load %0** %block.captured, align 8
+  %4 = bitcast %0* %strongdestroy to i8*
+  call void @objc_release(i8* %4) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
 declare i8* @objc_retain(i8*)
 
 declare void @__test_block_invoke_0(i8* nocapture) uwtable
diff --git a/test/Transforms/ObjCARC/retain-block-load.ll b/test/Transforms/ObjCARC/retain-block-load.ll
new file mode 100644
index 000000000000..a5170e323653
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-block-load.ll
@@ -0,0 +1,51 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+; rdar://10803830
+; The optimizer should be able to prove that the block does not
+; "escape", so the retainBlock+release pair can be eliminated.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.__block_descriptor = type { i64, i64 }
+
+@_NSConcreteStackBlock = external global i8*
+@__block_descriptor_tmp = external global { i64, i64, i8*, i8* }
+
+; CHECK: define void @test() {
+; CHECK-NOT: @objc
+; CHECK: declare i8* @objc_retainBlock(i8*)
+; CHECK: declare void @objc_release(i8*)
+
+define void @test() {
+entry:
+  %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, align 8
+  %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 1
+  store i32 1073741824, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 3
+  store i8* bitcast (i32 (i8*)* @__test_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block, i64 0, i32 5
+  store i32 4, i32* %block.captured, align 8
+  %tmp = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %block to i8*
+  %tmp1 = call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  %tmp2 = getelementptr inbounds i8* %tmp1, i64 16
+  %tmp3 = bitcast i8* %tmp2 to i8**
+  %tmp4 = load i8** %tmp3, align 8
+  %tmp5 = bitcast i8* %tmp4 to i32 (i8*)*
+  %call = call i32 %tmp5(i8* %tmp1)
+  call void @objc_release(i8* %tmp1) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+declare i32 @__test_block_invoke_0(i8* nocapture %.block_descriptor) nounwind readonly
+
+declare i8* @objc_retainBlock(i8*)
+
+declare void @objc_release(i8*)
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/retain-block.ll b/test/Transforms/ObjCARC/retain-block.ll
new file mode 100644
index 000000000000..b3b62d300008
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-block.ll
@@ -0,0 +1,138 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+!0 = metadata !{}
+
+declare i8* @objc_retain(i8*)
+declare void @callee(i8)
+declare void @use_pointer(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_retainBlock(i8*)
+declare i8* @objc_autorelease(i8*)
+
+; Basic retainBlock+release elimination.
+
+; CHECK: define void @test0(i8* %tmp) {
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test0(i8* %tmp) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0, but there's no copy_on_escape metadata, so there's no
+; optimization possible.
+
+; CHECK: define void @test0_no_metadata(i8* %tmp) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_no_metadata(i8* %tmp) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0, but the pointer escapes, so there's no
+; optimization possible.
+
+; CHECK: define void @test0_escape(i8* %tmp, i8** %z) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test0_escape, but there's no intervening call.
+
+; CHECK: define void @test0_just_escape(i8* %tmp, i8** %z) {
+; CHECK: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK: tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test0_just_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Basic nested retainBlock+release elimination.
+
+; CHECK: define void @test1(i8* %tmp) {
+; CHECK-NOT: @objc
+; CHECK: tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK-NOT: @objc
+; CHECK: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1(i8* %tmp) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test1, but there's no copy_on_escape metadata, so there's no
+; retainBlock+release optimization possible. But we can still eliminate
+; the outer retain+release.
+
+; CHECK: define void @test1_no_metadata(i8* %tmp) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1_no_metadata(i8* %tmp) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; Same as test1, but the pointer escapes, so there's no
+; retainBlock+release optimization possible. But we can still eliminate
+; the outer retain+release
+
+; CHECK: define void @test1_escape(i8* %tmp, i8** %z) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+; CHECK-NEXT: store i8* %tmp2, i8** %z
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: @use_pointer(i8* %tmp2)
+; CHECK-NEXT: tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test1_escape(i8* %tmp, i8** %z) {
+entry:
+  %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+  %tmp2 = tail call i8* @objc_retainBlock(i8* %tmp) nounwind, !clang.arc.copy_on_escape !0
+  store i8* %tmp2, i8** %z
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @use_pointer(i8* %tmp2)
+  tail call void @objc_release(i8* %tmp2) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %tmp) nounwind, !clang.imprecise_release !0
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll
index 41bde017e777..f876e51592b6 100644
--- a/test/Transforms/ObjCARC/retain-not-declared.ll
+++ b/test/Transforms/ObjCARC/retain-not-declared.ll
@@ -30,7 +30,7 @@ entry:
 
 ; CHECK: @test1(
 ; CHECK: @objc_retain(
-; CHECK: @objc_retain(
+; CHECK: @objc_retainAutoreleasedReturnValue(
 ; CHECK: @objc_release(
 ; CHECK: @objc_release(
 ; CHECK: }
diff --git a/test/Transforms/PhaseOrdering/dg.exp b/test/Transforms/PhaseOrdering/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/PhaseOrdering/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/PhaseOrdering/lit.local.cfg b/test/Transforms/PhaseOrdering/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/PruneEH/dg.exp b/test/Transforms/PruneEH/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/PruneEH/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/PruneEH/lit.local.cfg b/test/Transforms/PruneEH/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/PruneEH/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/Reassociate/dg.exp b/test/Transforms/Reassociate/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/Reassociate/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Reassociate/lit.local.cfg b/test/Transforms/Reassociate/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/Reassociate/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
index cd6cf9704a58..63f41dbc0240 100644
--- a/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
+++ b/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
@@ -6,9 +6,9 @@ target triple = "i686-pc-linux-gnu"
 
 define i32 @x(i32 %b) {
 entry:
- %val = call i32 @llvm.cttz.i32(i32 undef)
+ %val = call i32 @llvm.cttz.i32(i32 undef, i1 true)
  ret i32 %val
 }
 
-declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.cttz.i32(i32, i1)
 
diff --git a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
index 7546bf5cc727..f62ed7048e05 100644
--- a/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
+++ b/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
@@ -21,10 +21,6 @@ define internal i32 @f() {
 
 declare i8* @__cxa_begin_catch(i8*) nounwind
 
-declare i8* @llvm.eh.exception() nounwind
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
-
 declare void @__cxa_end_catch()
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SCCP/dg.exp b/test/Transforms/SCCP/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/SCCP/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SCCP/lit.local.cfg b/test/Transforms/SCCP/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/SCCP/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SCCP/phitest.ll b/test/Transforms/SCCP/phitest.ll
deleted file mode 100644
index 4c5c3dcc6904..000000000000
--- a/test/Transforms/SCCP/phitest.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -sccp -dce -simplifycfg -S | not grep br
-
-define i32 @test(i32 %param) {
-entry:
-	%tmp.1 = icmp ne i32 %param, 0		; <i1> [#uses=1]
-	br i1 %tmp.1, label %endif.0, label %else
-else:		; preds = %entry
-	br label %endif.0
-endif.0:		; preds = %else, %entry
-	%a.0 = phi i32 [ 2, %else ], [ 3, %entry ]		; <i32> [#uses=1]
-	%b.0 = phi i32 [ 3, %else ], [ 2, %entry ]		; <i32> [#uses=1]
-	%tmp.5 = add i32 %a.0, %b.0		; <i32> [#uses=1]
-	%tmp.7 = icmp ne i32 %tmp.5, 5		; <i1> [#uses=1]
-	br i1 %tmp.7, label %UnifiedReturnBlock, label %endif.1
-endif.1:		; preds = %endif.0
-	ret i32 0
-UnifiedReturnBlock:		; preds = %endif.0
-	ret i32 2
-}
-
diff --git a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll b/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
deleted file mode 100644
index d71bcb9713be..000000000000
--- a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
+++ /dev/null
@@ -1,184 +0,0 @@
-; RUN: opt < %s -scalarrepl -disable-output -stats |& grep "Number of aggregates converted to scalar"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin9.6"
-	%0 = type { }		; type %0
-	%1 = type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }		; type %1
-	%2 = type { i8*, i32 }		; type %2
-	%3 = type opaque		; type %3
-	%4 = type { i32 }		; type %4
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, %0*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
-	%llvm.dbg.composite.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0*, %0*, i32 }
-	%llvm.dbg.derivedtype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0* }
-	%llvm.dbg.subprogram.type = type { i32, %0*, %0*, i8*, i8*, i8*, %0*, i32, %0*, i1, i1 }
-	%llvm.dbg.subrange.type = type { i32, i64, i64 }
-	%llvm.dbg.variable.type = type { i32, %0*, i8*, %0*, i32, %0* }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-internal constant [8 x i8] c"PR491.c\00", section "llvm.metadata"		; <[8 x i8]*>:0 [#uses=1]
-internal constant [77 x i8] c"/Volumes/Nanpura/mainline/llvm/projects/llvm-test/SingleSource/Regression/C/\00", section "llvm.metadata"		; <[77 x i8]*>:1 [#uses=1]
-internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5641) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*>:2 [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 1, i8* getelementptr ([8 x i8]* @0, i32 0, i32 0), i8* getelementptr ([77 x i8]* @1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 0 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*>:3 [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([4 x i8]* @3, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-internal constant [5 x i8] c"char\00", section "llvm.metadata"		; <[5 x i8]*>:4 [#uses=1]
-@llvm.dbg.basictype5 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @4, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, i32 6 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458790, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype5 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype6 = internal constant %llvm.dbg.derivedtype.type { i32 458767, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-internal constant [13 x i8] c"unsigned int\00", section "llvm.metadata"		; <[13 x i8]*>:5 [#uses=1]
-@llvm.dbg.basictype8 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([13 x i8]* @5, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 7 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [3 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype6 to %0*), %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype8 to %0*)], section "llvm.metadata"		; <[3 x %0*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([3 x %0*]* @llvm.dbg.array to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-internal constant [12 x i8] c"assert_fail\00", section "llvm.metadata"		; <[12 x i8]*>:6 [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([12 x i8]* @6, i32 0, i32 0), i8* getelementptr ([12 x i8]* @6, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 4, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to %0*), i1 true, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=0]
-internal constant [2 x i8] c"l\00", section "llvm.metadata"		; <[2 x i8]*>:7 [#uses=1]
-@__stderrp = external global %1*		; <%1**> [#uses=4]
-internal constant [35 x i8] c"assertion failed in line %u: '%s'\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[35 x i8]*>:8 [#uses=1]
-@llvm.dbg.array13 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*), %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*)], section "llvm.metadata"		; <[2 x %0*]*> [#uses=1]
-@llvm.dbg.composite14 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array13 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [5 x i8] c"test\00", section "llvm.metadata"		; <[5 x i8]*>:9 [#uses=1]
-@llvm.dbg.subprogram16 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @9, i32 0, i32 0), i8* getelementptr ([5 x i8]* @9, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 10, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite14 to %0*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-internal constant [9 x i8] c"long int\00", section "llvm.metadata"		; <[9 x i8]*>:10 [#uses=1]
-@llvm.dbg.basictype21 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([9 x i8]* @10, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.derivedtype22 = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @7, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype21 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.subrange = internal constant %llvm.dbg.subrange.type { i32 458785, i64 0, i64 3 }, section "llvm.metadata"		; <%llvm.dbg.subrange.type*> [#uses=1]
-@llvm.dbg.array23 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.subrange.type* @llvm.dbg.subrange to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
-internal constant [14 x i8] c"unsigned char\00", section "llvm.metadata"		; <[14 x i8]*>:11 [#uses=1]
-@llvm.dbg.basictype25 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([14 x i8]* @11, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, i32 8 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.composite26 = internal constant %llvm.dbg.composite.type { i32 458753, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype25 to %0*), %0* bitcast ([1 x %0*]* @llvm.dbg.array23 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [2 x i8] c"c\00", section "llvm.metadata"		; <[2 x i8]*>:12 [#uses=1]
-@llvm.dbg.derivedtype28 = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @12, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite26 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array29 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype22 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype28 to %0*)], section "llvm.metadata"		; <[2 x %0*]*> [#uses=1]
-@llvm.dbg.composite30 = internal constant %llvm.dbg.composite.type { i32 458775, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 32, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array29 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [2 x i8] c"u\00", section "llvm.metadata"		; <[2 x i8]*>:13 [#uses=1]
-@llvm.dbg.variable32 = internal constant %llvm.dbg.variable.type { i32 459008, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @13, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite30 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-internal constant [11 x i8] c"u.l == 128\00", section "__TEXT,__cstring,cstring_literals"		; <[11 x i8]*>:14 [#uses=1]
-internal constant [8 x i8] c"u.l < 0\00", section "__TEXT,__cstring,cstring_literals"		; <[8 x i8]*>:15 [#uses=1]
-@llvm.dbg.array35 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
-@llvm.dbg.composite36 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([1 x %0*]* @llvm.dbg.array35 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*>:16 [#uses=1]
-@llvm.dbg.subprogram38 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @16, i32 0, i32 0), i8* getelementptr ([5 x i8]* @16, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 28, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite36 to %0*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-
-declare void @llvm.dbg.func.start(%0*) nounwind readnone
-
-declare void @llvm.dbg.declare(%0*, %0*) nounwind readnone
-
-declare void @llvm.dbg.stoppoint(i32, i32, %0*) nounwind readnone
-
-declare i32 @fprintf(%1* nocapture, i8* nocapture, ...) nounwind
-
-declare void @llvm.dbg.region.end(%0*) nounwind readnone
-
-define i32 @test(i32) nounwind {
-; <label>:1
-	%2 = alloca %4, align 8		; <%4*> [#uses=7]
-	call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*))
-	%3 = bitcast %4* %2 to %0*		; <%0*> [#uses=1]
-	call void @llvm.dbg.declare(%0* %3, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable32 to %0*))
-	call void @llvm.dbg.stoppoint(i32 21, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%4 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %4, align 8
-	%5 = bitcast %4* %2 to i8*		; <i8*> [#uses=1]
-	store i8 -128, i8* %5, align 8
-	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%6 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 8		; <i32> [#uses=1]
-	%8 = icmp eq i32 %7, 128		; <i1> [#uses=1]
-	br i1 %8, label %12, label %9
-
-; <label>:9		; preds = %1
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%10 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%11 = call i32 (%1*, i8*, ...)* @fprintf(%1* %10, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 22, i8* getelementptr ([11 x i8]* @14, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %12
-
-; <label>:12		; preds = %9, %1
-	%.0 = phi i32 [ 0, %9 ], [ 1, %1 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%13 = and i32 %.0, %0		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%14 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %14, align 8
-	%15 = bitcast %4* %2 to [4 x i8]*		; <[4 x i8]*> [#uses=1]
-	%16 = getelementptr [4 x i8]* %15, i32 0, i32 3		; <i8*> [#uses=1]
-	store i8 -128, i8* %16, align 1
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%17 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
-	%18 = load i32* %17, align 8		; <i32> [#uses=1]
-	%19 = icmp slt i32 %18, 0		; <i1> [#uses=1]
-	br i1 %19, label %23, label %20
-
-; <label>:20		; preds = %12
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%21 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%22 = call i32 (%1*, i8*, ...)* @fprintf(%1* %21, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 24, i8* getelementptr ([8 x i8]* @15, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %23
-
-; <label>:23		; preds = %20, %12
-	%.01 = phi i32 [ 0, %20 ], [ 1, %12 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%24 = and i32 %.01, %13		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 25, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*))
-	ret i32 %24
-}
-
-define i32 @main() nounwind {
-; <label>:0
-	%1 = alloca %4, align 8		; <%4*> [#uses=7]
-	call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram38 to %0*))
-	call void @llvm.dbg.stoppoint(i32 29, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	%2 = bitcast %4* %1 to %0*		; <%0*> [#uses=1]
-	call void @llvm.dbg.declare(%0* %2, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable32 to %0*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 21, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%3 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %3, align 8
-	%4 = bitcast %4* %1 to i8*		; <i8*> [#uses=1]
-	store i8 -128, i8* %4, align 8
-	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%5 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	%6 = load i32* %5, align 8		; <i32> [#uses=1]
-	%7 = icmp eq i32 %6, 128		; <i1> [#uses=1]
-	br i1 %7, label %11, label %8
-
-; <label>:8		; preds = %0
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%9 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%10 = call i32 (%1*, i8*, ...)* @fprintf(%1* %9, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 22, i8* getelementptr ([11 x i8]* @14, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %11
-
-; <label>:11		; preds = %8, %0
-	%.0.i = phi i32 [ 0, %8 ], [ 1, %0 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%12 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %12, align 8
-	%13 = bitcast %4* %1 to [4 x i8]*		; <[4 x i8]*> [#uses=1]
-	%14 = getelementptr [4 x i8]* %13, i32 0, i32 3		; <i8*> [#uses=1]
-	store i8 -128, i8* %14, align 1
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%15 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	%16 = load i32* %15, align 8		; <i32> [#uses=1]
-	%17 = icmp slt i32 %16, 0		; <i1> [#uses=1]
-	br i1 %17, label %test.exit, label %18
-
-; <label>:18		; preds = %11
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%19 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
-	%20 = call i32 (%1*, i8*, ...)* @fprintf(%1* %19, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 24, i8* getelementptr ([8 x i8]* @15, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	br label %test.exit
-
-test.exit:		; preds = %18, %11
-	%.01.i = phi i32 [ 0, %18 ], [ 1, %11 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%21 = and i32 %.01.i, %.0.i		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 25, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
-	%tmp = xor i32 %21, 1		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 29, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
-	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram38 to %0*))
-	ret i32 %tmp
-}
diff --git a/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll b/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
new file mode 100644
index 000000000000..cd21ff5f0b51
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -S -scalarrepl | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios5.0.0"
+
+%union.anon = type { <4 x float> }
+
+; CHECK: @test
+; CHECK-NOT: alloca
+
+define void @test() nounwind {
+entry:
+  %u = alloca %union.anon, align 16
+  %u164 = bitcast %union.anon* %u to [4 x i32]*
+  %arrayidx165 = getelementptr inbounds [4 x i32]* %u164, i32 0, i32 0
+  store i32 undef, i32* %arrayidx165, align 4
+  %v186 = bitcast %union.anon* %u to <4 x float>*
+  store <4 x float> undef, <4 x float>* %v186, align 16
+  ret void
+}
diff --git a/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll b/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
new file mode 100644
index 000000000000..da707b706d6d
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -S -scalarrepl | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.S = type { [2 x %struct.anon], double }
+%struct.anon = type {}
+
+; CHECK: @test()
+; CHECK-NOT: alloca
+; CHECK: ret double 1.0
+
+define double @test() nounwind uwtable ssp {
+entry:
+  %retval = alloca %struct.S, align 8
+  %ret = alloca %struct.S, align 8
+  %b = getelementptr inbounds %struct.S* %ret, i32 0, i32 1
+  store double 1.000000e+00, double* %b, align 8
+  %0 = bitcast %struct.S* %retval to i8*
+  %1 = bitcast %struct.S* %ret to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i32 8, i1 false)
+  %2 = bitcast %struct.S* %retval to double*
+  %3 = load double* %2, align 1
+  ret double %3
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/debuginfo.ll b/test/Transforms/ScalarRepl/debuginfo.ll
deleted file mode 100644
index ae2c6cc8f631..000000000000
--- a/test/Transforms/ScalarRepl/debuginfo.ll
+++ /dev/null
@@ -1,107 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-; RUN: opt < %s -scalarrepl-ssa -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }* }
-	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-	%struct.Sphere = type { %struct.Vec }
-	%struct.Vec = type { i32, i32, i32 }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [6 x i8] c"r.cpp\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"Vec\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str6 = internal constant [2 x i8] c"y\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype7 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 32, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str8 = internal constant [2 x i8] c"z\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype9 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 64, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype10 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype11 = internal constant %llvm.dbg.derivedtype.type { i32 458790, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 96, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype12 = internal constant %llvm.dbg.derivedtype.type { i32 458768, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype11 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [3 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype10 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite13 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite13 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array14 = internal constant [5 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype10 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
-@llvm.dbg.composite15 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array14 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprogram16 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 5, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite15 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array17 = internal constant [5 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype7 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype9 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
-@llvm.dbg.composite18 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, i64 96, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array17 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.derivedtype19 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array20 = internal constant [5 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array20 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str21 = internal constant [13 x i8] c"__comp_ctor \00", section "llvm.metadata"		; <[13 x i8]*> [#uses=1]
-@.str22 = internal constant [14 x i8] c"_ZN3VecC1Eiii\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
-@llvm.dbg.array32 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite33 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array32 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str34 = internal constant [10 x i8] c"operator-\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
-@.str35 = internal constant [14 x i8] c"_ZmiRK3VecS1_\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
-@.str41 = internal constant [7 x i8] c"Sphere\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@.str43 = internal constant [7 x i8] c"center\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype44 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str43, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 14, i64 96, i64 32, i64 0, i32 1, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype45 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite52 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array46 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype45 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite47 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array46 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str48 = internal constant [11 x i8] c"ray_sphere\00", section "llvm.metadata"		; <[11 x i8]*> [#uses=1]
-@.str49 = internal constant [30 x i8] c"_ZN6Sphere10ray_sphereERK3Vec\00", section "llvm.metadata"		; <[30 x i8]*> [#uses=1]
-@llvm.dbg.subprogram50 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([30 x i8]* @.str49, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 16, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite47 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array51 = internal constant [2 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype44 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram50 to { }*) ], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite52 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str41, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, i64 96, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array51 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.derivedtype53 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite52 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array54 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype53 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite55 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array54 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprogram56 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([30 x i8]* @.str49, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 16, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite55 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str61 = internal constant [2 x i8] c"v\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable62 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*), i8* getelementptr ([2 x i8]* @.str61, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 17, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
-
-define i32 @_ZN6Sphere10ray_sphereERK3Vec(%struct.Sphere* %this, %struct.Vec* %Orig) nounwind {
-entry:
-	%v = alloca %struct.Vec, align 8		; <%struct.Vec*> [#uses=4]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*))
-	%0 = bitcast %struct.Vec* %v to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable62 to { }*))
-	%1 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 2		; <i32*> [#uses=1]
-	%2 = load i32* %1, align 4		; <i32> [#uses=1]
-	%3 = getelementptr %struct.Vec* %Orig, i32 0, i32 2		; <i32*> [#uses=1]
-	%4 = load i32* %3, align 4		; <i32> [#uses=1]
-	%5 = sub i32 %2, %4		; <i32> [#uses=1]
-	%6 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=1]
-	%8 = getelementptr %struct.Vec* %Orig, i32 0, i32 1		; <i32*> [#uses=1]
-	%9 = load i32* %8, align 4		; <i32> [#uses=1]
-	%10 = sub i32 %7, %9		; <i32> [#uses=1]
-	%11 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%12 = load i32* %11, align 4		; <i32> [#uses=1]
-	%13 = getelementptr %struct.Vec* %Orig, i32 0, i32 0		; <i32*> [#uses=1]
-	%14 = load i32* %13, align 4		; <i32> [#uses=1]
-	%15 = sub i32 %12, %14		; <i32> [#uses=1]
-	%16 = getelementptr %struct.Vec* %v, i32 0, i32 0		; <i32*> [#uses=2]
-	store i32 %15, i32* %16, align 8
-	%17 = getelementptr %struct.Vec* %v, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %10, i32* %17, align 4
-	%18 = getelementptr %struct.Vec* %v, i32 0, i32 2		; <i32*> [#uses=1]
-	store i32 %5, i32* %18, align 8
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 9, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	%19 = load i32* %16, align 8		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 18, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*))
-	ret i32 %19
-}
diff --git a/test/Transforms/ScalarRepl/dg.exp b/test/Transforms/ScalarRepl/dg.exp
deleted file mode 100644
index 39954d8a498d..000000000000
--- a/test/Transforms/ScalarRepl/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
diff --git a/test/Transforms/ScalarRepl/lit.local.cfg b/test/Transforms/ScalarRepl/lit.local.cfg
new file mode 100644
index 000000000000..c6106e4746f2
--- /dev/null
+++ b/test/Transforms/ScalarRepl/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/ScalarRepl/negative-memset.ll b/test/Transforms/ScalarRepl/negative-memset.ll
new file mode 100644
index 000000000000..e52ab468fd2c
--- /dev/null
+++ b/test/Transforms/ScalarRepl/negative-memset.ll
@@ -0,0 +1,20 @@
+; PR12202
+; RUN: opt < %s -scalarrepl -S
+; Ensure that we do not hang or crash when feeding a negative value to memset
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
+target triple = "i686-pc-win32"
+
+define i32 @test() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  %buff = alloca [1 x i8], align 1
+  store i32 0, i32* %retval
+  %0 = bitcast [1 x i8]* %buff to i8*
+  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
+  %arraydecay = getelementptr inbounds [1 x i8]* %buff, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -1, i32 -8, i32 1, i1 false)	; Negative 8!
+  ret i32 0
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll
new file mode 100644
index 000000000000..cb5101c2dd8e
--- /dev/null
+++ b/test/Transforms/ScalarRepl/phi-cycle.ll
@@ -0,0 +1,77 @@
+; RUN: opt -S -scalarrepl-ssa < %s | FileCheck %s
+; rdar://10589171
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.foo = type { i32, i32 }
+
+@.str = private unnamed_addr constant [6 x i8] c"x=%d\0A\00", align 1
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
+entry:
+  %f = alloca %struct.foo, align 4
+  %x.i = getelementptr inbounds %struct.foo* %f, i64 0, i32 0
+  store i32 1, i32* %x.i, align 4
+  %y.i = getelementptr inbounds %struct.foo* %f, i64 0, i32 1
+  br label %while.cond.i
+
+; CHECK: while.cond.i:
+; CHECK-NEXT: %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
+; CHECK-NEXT: %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
+; CHECK-NEXT: %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
+; CHECK-NOT: phi
+while.cond.i:                                     ; preds = %while.cond.backedge.i, %entry
+  %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
+  %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
+  %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
+  %cmp.i = icmp sgt i32 %left.0.i, 0
+  br i1 %cmp.i, label %while.body.i, label %while.cond.i.func.exit_crit_edge
+
+while.cond.i.func.exit_crit_edge:                 ; preds = %while.cond.i
+  br label %func.exit
+
+while.body.i:                                     ; preds = %while.cond.i
+  %dec.i = add nsw i32 %left.0.i, -1
+  switch i32 1, label %while.body.i.func.exit_crit_edge [
+    i32 0, label %while.cond.backedge.i
+    i32 1, label %sw.bb.i
+  ]
+
+while.body.i.func.exit_crit_edge:                 ; preds = %while.body.i
+  br label %func.exit
+
+sw.bb.i:                                          ; preds = %while.body.i
+  %cmp2.i = icmp eq i32 %tmp, 1
+  br i1 %cmp2.i, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %sw.bb.i
+  store i32 %pos.0.i, i32* %x.i, align 4
+  br label %if.end.i
+
+; CHECK: if.end.i:
+; CHECK-NEXT: %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
+; CHECK-NOT: phi
+if.end.i:                                         ; preds = %if.then.i, %sw.bb.i
+  %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
+  store i32 %tmp1, i32* %y.i, align 4
+  br label %while.cond.backedge.i
+
+; CHECK: while.cond.backedge.i:
+; CHECK-NEXT: %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
+; CHECK-NOT: phi
+while.cond.backedge.i:                            ; preds = %if.end.i, %while.body.i
+  %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
+  %xtmp.i = add i32 %pos.0.i, 1
+  br label %while.cond.i
+
+; CHECK: func.exit:
+; CHECK-NOT: load
+; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) nounwind
+func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
+  %tmp3 = load i32* %x.i, align 4
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
index ab276b043e3a..fadf1aa276c8 100644
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ b/test/Transforms/ScalarRepl/volatile.ll
@@ -4,9 +4,9 @@
 define i32 @voltest(i32 %T) {
 	%A = alloca {i32, i32}
 	%B = getelementptr {i32,i32}* %A, i32 0, i32 0
-	volatile store i32 %T, i32* %B
+	store volatile i32 %T, i32* %B
 
 	%C = getelementptr {i32,i32}* %A, i32 0, i32 1
-	%X = volatile load i32* %C
+	%X = load volatile i32* %C
 	ret i32 %X
 }
diff --git a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
index 568e61c6ac6d..e2765e5f50e6 100644
--- a/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
+++ b/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
@@ -1,9 +1,14 @@
-; RUN: opt < %s -simplifycfg -S | grep {br i1 } | count 4
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 ; PR3354
 ; Do not merge bb1 into the entry block, it might trap.
 
 @G = extern_weak global i32
 
+; CHECK: @test(
+; CHECK: br i1 %tmp25
+; CHECK: bb1:
+; CHECK: sdiv
+
 define i32 @test(i32 %tmp21, i32 %tmp24) {
 	%tmp25 = icmp sle i32 %tmp21, %tmp24		
 	br i1 %tmp25, label %bb2, label %bb1	
@@ -18,6 +23,11 @@ bb6:
 	ret i32 927
 }
 
+; CHECK: @test2(
+; CHECK: br i1 %tmp34
+; CHECK: bb5:
+; CHECK: sdiv
+
 define i32 @test2(i32 %tmp21, i32 %tmp24, i1 %tmp34) {
 	br i1 %tmp34, label %bb5, label %bb6
 
diff --git a/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll b/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
deleted file mode 100644
index ebacf2fe9a9e..000000000000
--- a/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: opt %s -simplifycfg -disable-output
-; PR8445
-
-define void @test() {
-      unwind
-}
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index 5cfc77ce08a5..a61867fe89c7 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -1,7 +1,10 @@
-; RUN: opt < %s -simplifycfg  -S | grep select
-; RUN: opt < %s -simplifycfg  -S | grep br | count 2
+; RUN: opt < %s -simplifycfg -phi-node-folding-threshold=2 -S | FileCheck %s
 
-define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind  {
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @test1(i32 %a, i32 %b, i32 %c) nounwind  {
+; CHECK: @test1
 entry:
         %tmp1 = icmp eq i32 %b, 0
         br i1 %tmp1, label %bb1, label %bb3
@@ -9,6 +12,11 @@ entry:
 bb1:            ; preds = %entry
 	%tmp2 = icmp sgt i32 %c, 1
 	br i1 %tmp2, label %bb2, label %bb3
+; CHECK: bb1:
+; CHECK-NEXT: icmp sgt i32 %c, 1
+; CHECK-NEXT: add i32 %a, 1
+; CHECK-NEXT: select i1 %tmp2, i32 %tmp3, i32 %a
+; CHECK-NEXT: br label %bb3
 
 bb2:		; preds = bb1
 	%tmp3 = add i32 %a, 1
@@ -19,3 +27,20 @@ bb3:		; preds = %bb2, %entry
         %tmp5 = sub i32 %tmp4, 1
 	ret i32 %tmp5
 }
+
+declare i8 @llvm.cttz.i8(i8, i1)
+
+define i8 @test2(i8 %a) {
+; CHECK: @test2
+  br i1 undef, label %bb_true, label %bb_false
+bb_true:
+  %b = tail call i8 @llvm.cttz.i8(i8 %a, i1 false)
+  br label %join
+bb_false:
+  br label %join
+join:
+  %c = phi i8 [%b, %bb_true], [%a, %bb_false]
+; CHECK: select
+  ret i8 %c
+}
+
diff --git a/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll b/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
deleted file mode 100644
index 761f0d53905b..000000000000
--- a/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | grep {br i1} | count 1
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
-	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([7 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [7 x i8] c"cond.c\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5555) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 393473, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
-@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable6 = internal constant %llvm.dbg.variable.type { i32 393473, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str7, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
-@.str7 = internal constant [2 x i8] c"y\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 393238, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([6 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, i64 0, i64 0, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype9 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str8 = internal constant [6 x i8] c"uint1\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
-@llvm.dbg.basictype9 = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 7 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-
-define i32 @foo(i32 %x1, i1 zeroext %y2) nounwind {
-entry:
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp4 = icmp eq i32 %x1, 0		; <i1> [#uses=1]
-	br i1 %tmp4, label %bb, label %bb14
-
-bb:		; preds = %entry
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %y2, label %bb14, label %bb10
-
-bb7:		; preds = %bb
-	call void @llvm.dbg.stoppoint(i32 7, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp9 = call i32 @g1(i32 %x1) nounwind		; <i32> [#uses=1]
-	ret i32 %tmp9
-
-bb10:		; preds = %bb
-	call void @llvm.dbg.stoppoint(i32 8, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp12 = add i32 %x1, 1		; <i32> [#uses=1]
-	%tmp13 = call i32 @g2(i32 %tmp12) nounwind		; <i32> [#uses=1]
-	ret i32 %tmp13
-
-bb14:		; preds = %entry
-	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp16 = call i32 @g1(i32 %x1) nounwind		; <i32> [#uses=1]
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret i32 %tmp16
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare i32 @g1(i32)
-
-declare i32 @g2(i32)
-
-declare void @llvm.dbg.region.end({ }*) nounwind
diff --git a/test/Transforms/SimplifyCFG/branch-fold.ll b/test/Transforms/SimplifyCFG/branch-fold.ll
index 266609b52a54..2b2968119188 100644
--- a/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -1,13 +1,19 @@
-; RUN: opt < %s -simplifycfg -S | grep {br i1} | count 1
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
 
 define void @test(i32* %P, i32* %Q, i1 %A, i1 %B) {
+; CHECK: test
+; CHECK: br i1
+; CHECK-NOT: br i1
+; CHECK: ret
+; CHECK: ret
+
+entry:
         br i1 %A, label %a, label %b
-a:              ; preds = %0
+a:
         br i1 %B, label %b, label %c
-b:              ; preds = %a, %0
+b:
         store i32 123, i32* %P
         ret void
-c:              ; preds = %a
+c:
         ret void
 }
-
diff --git a/test/Transforms/SimplifyCFG/branch_fold_dbg.ll b/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
deleted file mode 100644
index 6a500de6b0a4..000000000000
--- a/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-; END.
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-
-define void @main() {
-entry:
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.14.i19 = icmp eq i32 0, 2		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.14.i19, label %endif.1.i20, label %read_min.exit
-endif.1.i20:		; preds = %entry
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.9.i.i = icmp eq i8* null, null		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.9.i.i, label %then.i12.i, label %then.i.i
-then.i.i:		; preds = %endif.1.i20
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-then.i12.i:		; preds = %endif.1.i20
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.9.i4.i = icmp eq i8* null, null		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.9.i4.i, label %endif.2.i33, label %then.i5.i
-then.i5.i:		; preds = %then.i12.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-endif.2.i33:		; preds = %then.i12.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %loopexit.0.i40, label %no_exit.0.i35
-no_exit.0.i35:		; preds = %no_exit.0.i35, %endif.2.i33
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.130.i = icmp slt i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.130.i, label %loopexit.0.i40.loopexit, label %no_exit.0.i35
-loopexit.0.i40.loopexit:		; preds = %no_exit.0.i35
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %loopexit.0.i40
-loopexit.0.i40:		; preds = %loopexit.0.i40.loopexit, %endif.2.i33
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.341.i = icmp eq i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.341.i, label %loopentry.1.i, label %read_min.exit
-loopentry.1.i:		; preds = %loopexit.0.i40
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.347.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.347.i, label %no_exit.1.i41, label %loopexit.2.i44
-no_exit.1.i41:		; preds = %endif.5.i, %loopentry.1.i
-	%indvar.i42 = phi i32 [ %indvar.next.i, %endif.5.i ], [ 0, %loopentry.1.i ]		; <i32> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.355.i = icmp eq i32 0, 3		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.355.i, label %endif.5.i, label %read_min.exit
-endif.5.i:		; preds = %no_exit.1.i41
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.34773.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%indvar.next.i = add i32 %indvar.i42, 1		; <i32> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.34773.i, label %no_exit.1.i41, label %loopexit.1.i.loopexit
-loopexit.1.i.loopexit:		; preds = %endif.5.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-loopexit.2.i44:		; preds = %loopentry.1.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-read_min.exit:		; preds = %no_exit.1.i41, %loopexit.0.i40, %entry
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.23 = icmp eq i32 0, 0		; <i1> [#uses=1]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 %tmp.23, label %endif.1, label %then.1
-then.1:		; preds = %read_min.exit
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %endif.0.i, label %then.0.i
-then.0.i:		; preds = %then.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %endif.1.i, label %then.1.i
-endif.0.i:		; preds = %then.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %endif.1.i, label %then.1.i
-then.1.i:		; preds = %endif.0.i, %then.0.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %getfree.exit, label %then.2.i
-endif.1.i:		; preds = %endif.0.i, %then.0.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %getfree.exit, label %then.2.i
-then.2.i:		; preds = %endif.1.i, %then.1.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-getfree.exit:		; preds = %endif.1.i, %then.1.i
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-endif.1:		; preds = %read_min.exit
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%tmp.27.i = getelementptr i32* null, i32 0		; <i32*> [#uses=0]
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %loopexit.0.i15, label %no_exit.0.i14
-no_exit.0.i14:		; preds = %endif.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-loopexit.0.i15:		; preds = %endif.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
-no_exit.1.i16:		; preds = %no_exit.1.i16, %loopexit.0.i15
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
-primal_start_artificial.exit:		; preds = %no_exit.1.i16, %loopexit.0.i15
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	ret void
-}
diff --git a/test/Transforms/SimplifyCFG/dg.exp b/test/Transforms/SimplifyCFG/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/SimplifyCFG/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll b/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
deleted file mode 100644
index 6fbbb1b19f36..000000000000
--- a/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @bar(i32)
-
-define void @test(i1 %P, i32* %Q) {
-        br i1 %P, label %T, label %F
-T:              ; preds = %0
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        store i32 1, i32* %Q
-        %A = load i32* %Q               ; <i32> [#uses=1]
-        call void @bar( i32 %A )
-        ret void
-F:              ; preds = %0
-        store i32 1, i32* %Q
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %B = load i32* %Q               ; <i32> [#uses=1]
-        call void @bar( i32 %B )
-        ret void
-}
-
diff --git a/test/Transforms/SimplifyCFG/lit.local.cfg b/test/Transforms/SimplifyCFG/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SimplifyCFG/multiple-phis.ll b/test/Transforms/SimplifyCFG/multiple-phis.ll
new file mode 100644
index 000000000000..78454232461a
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/multiple-phis.ll
@@ -0,0 +1,39 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+; It's not worthwhile to if-convert one of the phi nodes and leave
+; the other behind, because that still requires a branch. If
+; SimplifyCFG if-converts one of the phis, it should do both.
+
+; CHECK:      %div.high.addr.0 = select i1 %cmp1, i32 %div, i32 %high.addr.0
+; CHECK-NEXT: %low.0.add2 = select i1 %cmp1, i32 %low.0, i32 %add2
+; CHECK-NEXT: br label %while.cond
+
+define i32 @upper_bound(i32* %r, i32 %high, i32 %k) nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %if.then, %if.else, %entry
+  %high.addr.0 = phi i32 [ %high, %entry ], [ %div, %if.then ], [ %high.addr.0, %if.else ]
+  %low.0 = phi i32 [ 0, %entry ], [ %low.0, %if.then ], [ %add2, %if.else ]
+  %cmp = icmp ult i32 %low.0, %high.addr.0
+  br i1 %cmp, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %add = add i32 %low.0, %high.addr.0
+  %div = udiv i32 %add, 2
+  %idxprom = zext i32 %div to i64
+  %arrayidx = getelementptr inbounds i32* %r, i64 %idxprom
+  %0 = load i32* %arrayidx
+  %cmp1 = icmp ult i32 %k, %0
+  br i1 %cmp1, label %if.then, label %if.else
+
+if.then:                                          ; preds = %while.body
+  br label %while.cond
+
+if.else:                                          ; preds = %while.body
+  %add2 = add i32 %div, 1
+  br label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret i32 %low.0
+}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
new file mode 100644
index 000000000000..c7917857ee60
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -0,0 +1,88 @@
+; RUN: opt -simplifycfg -S -o - < %s | FileCheck %s
+
+declare void @helper(i32)
+
+define void @test1(i1 %a, i1 %b) {
+; CHECK: @test1
+entry:
+  br i1 %a, label %Y, label %X, !prof !0
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !0
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !1
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test2(i1 %a, i1 %b) {
+; CHECK: @test2
+entry:
+  br i1 %a, label %X, label %Y, !prof !1
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1
+; CHECK-NOT: !prof
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !2
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test3(i1 %a, i1 %b) {
+; CHECK: @test3
+; CHECK-NOT: !prof
+entry:
+  br i1 %a, label %X, label %Y, !prof !1
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+define void @test4(i1 %a, i1 %b) {
+; CHECK: @test4
+; CHECK-NOT: !prof
+entry:
+  br i1 %a, label %X, label %Y
+
+X:
+  %c = or i1 %b, false
+  br i1 %c, label %Z, label %Y, !prof !1
+
+Y:
+  call void @helper(i32 0)
+  ret void
+
+Z:
+  call void @helper(i32 1)
+  ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 3, i32 5}
+!1 = metadata !{metadata !"branch_weights", i32 1, i32 1}
+!2 = metadata !{metadata !"branch_weights", i32 1, i32 2}
+
+; CHECK: !0 = metadata !{metadata !"branch_weights", i32 5, i32 11}
+; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 5}
+; CHECK-NOT: !2
diff --git a/test/Transforms/SimplifyCFG/select-gep.ll b/test/Transforms/SimplifyCFG/select-gep.ll
index 009f05e5574c..7654d0271a9a 100644
--- a/test/Transforms/SimplifyCFG/select-gep.ll
+++ b/test/Transforms/SimplifyCFG/select-gep.ll
@@ -35,6 +35,6 @@ if.end:
   ret i8* %x.addr
 
 ; CHECK: @test2
-; CHECK: %x.addr = select i1 %cmp, i8* %incdec.ptr, i8* %y
-; CHECK: ret i8* %x.addr
+; CHECK: %incdec.ptr.y = select i1 %cmp, i8* %incdec.ptr, i8* %y
+; CHECK: ret i8* %incdec.ptr.y
 }
diff --git a/test/Transforms/SimplifyCFG/switch-masked-bits.ll b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
index fc83ec207814..3b0c48be6e2d 100644
--- a/test/Transforms/SimplifyCFG/switch-masked-bits.ll
+++ b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
@@ -15,8 +15,8 @@ c:
   ret i32 5
 ; CHECK: @test1
 ; CHECK: %cond = icmp eq i32 %i, 24
-; CHECK: %merge = select i1 %cond, i32 5, i32 0
-; CHECK: ret i32 %merge
+; CHECK: %. = select i1 %cond, i32 5, i32 0
+; CHECK: ret i32 %.
 }
 
 
diff --git a/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
index 5494a651d47d..673a62bf035c 100644
--- a/test/Transforms/SimplifyCFG/switch-on-const-select.ll
+++ b/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -115,7 +115,7 @@ entry:
 cont:
 ; CHECK: %lt = icmp slt i64 %x, %y
     %lt = icmp slt i64 %x, %y
-; CHECK-NEXT: br i1 %lt, label %a, label %r
+; CHECK-NEXT: select i1 %lt, i32 -1, i32 1
     %qux = select i1 %lt, i32 0, i32 2
     switch i32 %qux, label %bees [
         i32 0, label %a
diff --git a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
deleted file mode 100644
index 2723ec608e1d..000000000000
--- a/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | FileCheck %s
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-define i1 @t({ i32, i32 }* %I) {
-; CHECK: @t
-; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
-; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
-entry:
-        %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
-        %tmp.2.i = load i32* %tmp.1.i           ; <i32> [#uses=6]
-        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
-        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
-shortcirc_next.0:               ; preds = %entry
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
-        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
-shortcirc_next.1:               ; preds = %shortcirc_next.0
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
-        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
-shortcirc_next.2:               ; preds = %shortcirc_next.1
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
-        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
-shortcirc_next.3:               ; preds = %shortcirc_next.2
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
-        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
-shortcirc_next.4:               ; preds = %shortcirc_next.3
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
-        br label %UnifiedReturnBlock
-shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
-        br label %UnifiedReturnBlock
-UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
-        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
-        ret i1 %UnifiedRetVal
-}
-
diff --git a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll b/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
deleted file mode 100644
index 343e169edc26..000000000000
--- a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
+++ /dev/null
@@ -1,116 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | \
-; RUN:   not grep " switch"
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
-	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }*, i32 }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [10 x i8] c"swithh2.c\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
-@.str1 = internal constant [38 x i8] c"/developer/home2/zsth/test/debug/tmp/\00", section "llvm.metadata"		; <[38 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5641) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([10 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([38 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 -1 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [2 x { }*] [{ }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
-
-define i32 @foo(i32 %x) nounwind {
-entry:
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	switch i32 %x, label %bb4 [
-		i32 1, label %bb
-		i32 2, label %bb1
-		i32 3, label %bb2
-		i32 4, label %bb3
-	]
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb
-
-bb:		; preds = %0, %entry
-	call void @llvm.dbg.stoppoint(i32 3, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 3, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb1
-
-bb1:		; preds = %1, %entry
-	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb2
-
-bb2:		; preds = %2, %entry
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb3
-
-bb3:		; preds = %3, %entry
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb4
-
-bb4:		; preds = %4, %entry
-	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	switch i32 %x, label %bb7 [
-		i32 5, label %bb5
-		i32 6, label %bb6
-	]
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb5
-
-bb5:		; preds = %5, %bb4
-	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb6
-
-bb6:		; preds = %6, %bb4
-	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb7
-
-bb7:		; preds = %7, %bb4
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-
-bb8:		; preds = %bb7, %bb6, %bb5, %bb3, %bb2, %bb1, %bb
-	%.0 = phi i32 [ 4, %bb3 ], [ 3, %bb2 ], [ 2, %bb1 ], [ 1, %bb ], [ 6, %bb6 ], [ 5, %bb5 ], [ 0, %bb7 ]		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %return
-
-return:		; preds = %bb8
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret i32 %.0
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
diff --git a/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll b/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
deleted file mode 100644
index 01041eb7db58..000000000000
--- a/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
-
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-define i1 @qux(i8* %m, i8* %n, i8* %o, i8* %p) nounwind  {
-entry:
-        %tmp7 = icmp eq i8* %m, %n
-        br i1 %tmp7, label %bb, label %UnifiedReturnBlock
-
-bb:
-call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-        %tmp15 = icmp eq i8* %o, %p
-        br label %UnifiedReturnBlock
-
-UnifiedReturnBlock:
-        %result = phi i1 [ 0, %entry ], [ %tmp15, %bb ]
-        ret i1 %result
-}
diff --git a/test/Transforms/SimplifyCFG/unreachable-blocks.ll b/test/Transforms/SimplifyCFG/unreachable-blocks.ll
new file mode 100644
index 000000000000..1df0eab6259f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/unreachable-blocks.ll
@@ -0,0 +1,28 @@
+; RUN: opt -simplifycfg < %s -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; PR11825
+define void @test1() {
+entry:
+  br label %return
+
+while_block:                                      ; preds = %and_if_cont2, %and_if_cont
+  %newlen = sub i32 %newlen, 1
+  %newptr = getelementptr i8* %newptr, i64 1
+  %test = icmp sgt i32 %newlen, 0
+  br i1 %test, label %and_if1, label %and_if_cont2
+
+and_if1:                                          ; preds = %while_block
+  %char = load i8* %newptr
+  %test2 = icmp ule i8 %char, 32
+  br label %and_if_cont2
+
+and_if_cont2:                                     ; preds = %and_if1, %while_block
+  %a18 = phi i1 [ %test, %while_block ], [ %test2, %and_if1 ]
+  br i1 %a18, label %while_block, label %return
+
+return:                                           ; preds = %and_if_cont2, %and_if_cont
+  ret void
+}
diff --git a/test/Transforms/SimplifyLibCalls/PR7357.ll b/test/Transforms/SimplifyLibCalls/PR7357.ll
index 6d5c1d5047dc..3529a9cfb1b8 100644
--- a/test/Transforms/SimplifyLibCalls/PR7357.ll
+++ b/test/Transforms/SimplifyLibCalls/PR7357.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -default-data-layout="e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" -simplify-libcalls -S | FileCheck %s
+; RUN: opt < %s "-default-data-layout=e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" -simplify-libcalls -S | FileCheck %s
 @.str1 = private constant [11 x i8] c"(){};[]&|:\00", align 4
 
 ; check that simplify libcalls will not replace a call with one calling
diff --git a/test/Transforms/SimplifyLibCalls/Printf.ll b/test/Transforms/SimplifyLibCalls/Printf.ll
index caea311ba142..489c993f2110 100644
--- a/test/Transforms/SimplifyLibCalls/Printf.ll
+++ b/test/Transforms/SimplifyLibCalls/Printf.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -simplify-libcalls -S -o %t
-; RUN: FileCheck < %t %s
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
 
 @str = internal constant [13 x i8] c"hello world\0A\00"         ; <[13 x i8]*> [#uses=1]
 @str1 = internal constant [2 x i8] c"h\00"              ; <[2 x i8]*> [#uses=1]
 
+; CHECK: private unnamed_addr constant [12 x i8] c"hello world\00"
+
 declare i32 @printf(i8*, ...)
 
 ; CHECK: define void @f0
diff --git a/test/Transforms/SimplifyLibCalls/StrLen.ll b/test/Transforms/SimplifyLibCalls/StrLen.ll
index 45b349d6840d..acd8aaf6e543 100644
--- a/test/Transforms/SimplifyLibCalls/StrLen.ll
+++ b/test/Transforms/SimplifyLibCalls/StrLen.ll
@@ -6,6 +6,7 @@ target datalayout = "e-p:32:32"
 @hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=3]
 @null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=3]
 @null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
+@nullstring = constant i8 0
 
 declare i32 @strlen(i8*)
 
@@ -54,3 +55,8 @@ define i1 @test7() {
 	%ne_null = icmp ne i32 %null_l, 0		; <i1> [#uses=1]
 	ret i1 %ne_null
 }
+
+define i32 @test8() {
+	%len = tail call i32 @strlen(i8* @nullstring) nounwind
+	ret i32 %len
+}
diff --git a/test/Transforms/SimplifyLibCalls/cos.ll b/test/Transforms/SimplifyLibCalls/cos.ll
new file mode 100644
index 000000000000..6a8ce8c3881d
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/cos.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define double @foo(double %d) nounwind readnone {
+; CHECK: @foo
+    %1 = fsub double -0.000000e+00, %d
+    %2 = call double @cos(double %1) nounwind readnone
+; CHECK: call double @cos(double %d)
+    ret double %2
+}
+
+declare double @cos(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/dg.exp b/test/Transforms/SimplifyLibCalls/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/SimplifyLibCalls/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/SimplifyLibCalls/fwrite.ll b/test/Transforms/SimplifyLibCalls/fwrite.ll
new file mode 100644
index 000000000000..f0f3dcaac63e
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/fwrite.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+%FILE = type { i32 }
+
+@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
+
+define i64 @foo(%FILE* %f) {
+; CHECK: %retval = call i64 @fwrite
+  %retval = call i64 @fwrite(i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0), i64 1, i64 1, %FILE* %f)
+  ret i64 %retval
+}
+
+declare i64 @fwrite(i8*, i64, i64, %FILE *)
diff --git a/test/Transforms/SimplifyLibCalls/lit.local.cfg b/test/Transforms/SimplifyLibCalls/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/SimplifyLibCalls/osx-names.ll b/test/Transforms/SimplifyLibCalls/osx-names.ll
new file mode 100644
index 000000000000..e321d1dd3171
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/osx-names.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; <rdar://problem/9815881>
+; On OSX x86-32, fwrite and fputs aren't called fwrite and fputs.
+; Make sure we use the correct names.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.7.2"
+
+%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sbuf = type { i8*, i32 }
+%struct.__sFILEX = type opaque
+
+@.str = private unnamed_addr constant [13 x i8] c"Hello world\0A\00", align 1
+@.str2 = private unnamed_addr constant [3 x i8] c"%s\00", align 1
+
+define void @test1(%struct.__sFILE* %stream) nounwind {
+; CHECK: define void @test1
+; CHECK: call i32 @"fwrite$UNIX2003"
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0)) nounwind
+  ret void
+}
+
+define void @test2(%struct.__sFILE* %stream, i8* %str) nounwind ssp {
+; CHECK: define void @test2
+; CHECK: call i32 @"fputs$UNIX2003"
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...)* @fprintf(%struct.__sFILE* %stream, i8* getelementptr inbounds ([3 x i8]* @.str2, i32 0, i32 0), i8* %str) nounwind
+  ret void
+}
+
+declare i32 @fprintf(%struct.__sFILE*, i8*, ...) nounwind
diff --git a/test/Transforms/Sink/basic.ll b/test/Transforms/Sink/basic.ll
index 23433728c13a..4c531d82e6ee 100644
--- a/test/Transforms/Sink/basic.ll
+++ b/test/Transforms/Sink/basic.ll
@@ -21,7 +21,7 @@ false:
   ret i32 0
 }
 
-; But don't sink volatile loads...
+; But don't sink load volatiles...
 
 ;      CHECK: @foo2
 ;      CHECK: load volatile
diff --git a/test/Transforms/Sink/dg.exp b/test/Transforms/Sink/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/Sink/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/Sink/lit.local.cfg b/test/Transforms/Sink/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/Sink/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/StripSymbols/dg.exp b/test/Transforms/StripSymbols/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/StripSymbols/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/StripSymbols/lit.local.cfg b/test/Transforms/StripSymbols/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/StripSymbols/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailCallElim/dg.exp b/test/Transforms/TailCallElim/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/TailCallElim/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/TailCallElim/dont_reorder_load.ll b/test/Transforms/TailCallElim/dont_reorder_load.ll
index 899e11596688..a29b72e94239 100644
--- a/test/Transforms/TailCallElim/dont_reorder_load.ll
+++ b/test/Transforms/TailCallElim/dont_reorder_load.ll
@@ -46,7 +46,7 @@ else:		; preds = %entry
 }
 
 ; This load can't be safely moved above the call because that would change the
-; order in which the volatile loads are performed.
+; order in which the load volatiles are performed.
 define fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
 entry:
 	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
@@ -58,7 +58,7 @@ if:		; preds = %entry
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%tmp8 = call fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
-	%tmp9 = volatile load i32* %a_arg		; <i32> [#uses=1]
+	%tmp9 = load volatile i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
 	ret i32 %tmp10
 }
diff --git a/test/Transforms/TailCallElim/lit.local.cfg b/test/Transforms/TailCallElim/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/TailCallElim/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/TailCallElim/setjmp.ll b/test/Transforms/TailCallElim/setjmp.ll
index 7ef9cb360f53..4ce6ac7965e9 100644
--- a/test/Transforms/TailCallElim/setjmp.ll
+++ b/test/Transforms/TailCallElim/setjmp.ll
@@ -15,7 +15,7 @@ bb:
   ret void
 }
 
-declare i32 @setjmp(i32*)
+declare i32 @setjmp(i32*) returns_twice
 
 ; CHECK: foo2
 ; CHECK-NOT: tail call void @bar()
diff --git a/test/Transforms/TailDup/X86/dg.exp b/test/Transforms/TailDup/X86/dg.exp
deleted file mode 100644
index 7b7bd4e73807..000000000000
--- a/test/Transforms/TailDup/X86/dg.exp
+++ /dev/null
@@ -1,5 +0,0 @@
-load_lib llvm.exp
-
-if { [llvm_supports_target X86] } {
-  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
-}
diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/TailDup/X86/lit.local.cfg
new file mode 100644
index 000000000000..da2db5a45f9c
--- /dev/null
+++ b/test/Transforms/TailDup/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/TailDup/dg.exp b/test/Transforms/TailDup/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Transforms/TailDup/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/TailDup/lit.local.cfg
new file mode 100644
index 000000000000..18c604aba567
--- /dev/null
+++ b/test/Transforms/TailDup/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
diff --git a/test/Unit/lit.site.cfg.in b/test/Unit/lit.site.cfg.in
index 9643507b1640..65e98d0af550 100644
--- a/test/Unit/lit.site.cfg.in
+++ b/test/Unit/lit.site.cfg.in
@@ -3,7 +3,6 @@
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
-config.llvmgcc_dir = "@LLVMGCCDIR@"
 config.llvm_build_mode = "@LLVM_BUILD_MODE@"
 config.enable_shared = @ENABLE_SHARED@
 config.shlibdir = "@SHLIBDIR@"
diff --git a/test/Verifier/cttz-undef-arg.ll b/test/Verifier/cttz-undef-arg.ll
new file mode 100644
index 000000000000..48cd061d3221
--- /dev/null
+++ b/test/Verifier/cttz-undef-arg.ll
@@ -0,0 +1,16 @@
+; RUN: not llvm-as < %s -o /dev/null |& FileCheck %s
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define void @f(i32 %x, i1 %is_not_zero) {
+entry:
+; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int
+; CHECK-NEXT: @llvm.ctlz.i32
+  call i32 @llvm.ctlz.i32(i32 %x, i1 %is_not_zero)
+
+; CHECK: is_zero_undef argument of bit counting intrinsics must be a constant int
+; CHECK-NEXT: @llvm.cttz.i32
+  call i32 @llvm.cttz.i32(i32 %x, i1 %is_not_zero)
+  ret void
+}
diff --git a/test/Verifier/dg.exp b/test/Verifier/dg.exp
deleted file mode 100644
index f2005891a59a..000000000000
--- a/test/Verifier/dg.exp
+++ /dev/null
@@ -1,3 +0,0 @@
-load_lib llvm.exp
-
-RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Verifier/fpaccuracy.ll b/test/Verifier/fpaccuracy.ll
new file mode 100644
index 000000000000..2fefde09f188
--- /dev/null
+++ b/test/Verifier/fpaccuracy.ll
@@ -0,0 +1,31 @@
+; RUN: not llvm-as < %s |& FileCheck %s
+
+define void @foo(i32 %i, float %f, <2 x float> %g) {
+  %s = add i32 %i, %i, !fpaccuracy !0
+; CHECK: fpaccuracy requires a floating point result!
+  %t = fadd float %f, %f, !fpaccuracy !1
+; CHECK: fpaccuracy takes one operand!
+  %u = fadd float %f, %f, !fpaccuracy !2
+; CHECK: fpaccuracy takes one operand!
+  %v = fadd float %f, %f, !fpaccuracy !3
+; CHECK: fpaccuracy ULPs not a floating point number!
+  %w = fadd float %f, %f, !fpaccuracy !0
+; Above line is correct.
+  %w2 = fadd <2 x float> %g, %g, !fpaccuracy !0
+; Above line is correct.
+  %x = fadd float %f, %f, !fpaccuracy !4
+; CHECK: fpaccuracy ULPs is negative!
+  %y = fadd float %f, %f, !fpaccuracy !5
+; CHECK: fpaccuracy ULPs is negative!
+  %z = fadd float %f, %f, !fpaccuracy !6
+; CHECK: fpaccuracy ULPs not a normal number!
+  ret void
+}
+
+!0 = metadata !{ float 1.0 }
+!1 = metadata !{ }
+!2 = metadata !{ float 1.0, float 1.0 }
+!3 = metadata !{ i32 1 }
+!4 = metadata !{ float -1.0 }
+!5 = metadata !{ float -0.0 }
+!6 = metadata !{ float 0x7FFFFFFF00000000 }
diff --git a/test/Verifier/lit.local.cfg b/test/Verifier/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Verifier/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Verifier/range-1.ll b/test/Verifier/range-1.ll
new file mode 100644
index 000000000000..611933a1ec30
--- /dev/null
+++ b/test/Verifier/range-1.ll
@@ -0,0 +1,78 @@
+; RUN: not llvm-as < %s -o /dev/null |& FileCheck %s
+
+define void @f1(i8* %x) {
+entry:
+  store i8 0, i8* %x, align 1, !range !0
+  ret void
+}
+!0 = metadata !{i8 0, i8 1}
+; CHECK: Ranges are only for loads!
+; CHECK-NEXT: store i8 0, i8* %x, align 1, !range !0
+
+define i8 @f2(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !1
+  ret i8 %y
+}
+!1 = metadata !{}
+; CHECK: It should have at least one range!
+; CHECK-NEXT: metadata
+
+define i8 @f3(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !2
+  ret i8 %y
+}
+!2 = metadata !{i8 0}
+; CHECK: Unfinished range!
+
+define i8 @f4(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !3
+  ret i8 %y
+}
+!3 = metadata !{double 0.0, i8 0}
+; CHECK: The lower limit must be an integer!
+
+define i8 @f5(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !4
+  ret i8 %y
+}
+!4 = metadata !{i8 0, double 0.0}
+; CHECK: The upper limit must be an integer!
+
+define i8 @f6(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !5
+  ret i8 %y
+}
+!5 = metadata !{i32 0, i8 0}
+; CHECK: Range types must match load type!
+; CHECK:  %y = load
+
+define i8 @f7(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !6
+  ret i8 %y
+}
+!6 = metadata !{i8 0, i32 0}
+; CHECK: Range types must match load type!
+; CHECK:  %y = load
+
+define i8 @f8(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !7
+  ret i8 %y
+}
+!7 = metadata !{i32 0, i32 0}
+; CHECK: Range types must match load type!
+; CHECK:  %y = load
+
+define i8 @f9(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !8
+  ret i8 %y
+}
+!8 = metadata !{i8 0, i8 0}
+; CHECK: Range must not be empty!
diff --git a/test/Verifier/range-2.ll b/test/Verifier/range-2.ll
new file mode 100644
index 000000000000..ef542c8c9380
--- /dev/null
+++ b/test/Verifier/range-2.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s -o /dev/null
+
+define i8 @f1(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !0
+  ret i8 %y
+}
+!0 = metadata !{i8 0, i8 1}
+
+define i8 @f2(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !1
+  ret i8 %y
+}
+!1 = metadata !{i8 255, i8 1}
+
+define i8 @f3(i8* %x) {
+entry:
+  %y = load i8* %x, align 1, !range !2
+  ret i8 %y
+}
+!2 = metadata !{i8 1, i8 3, i8 5, i8 42}
diff --git a/test/YAMLParser/LICENSE.txt b/test/YAMLParser/LICENSE.txt
new file mode 100644
index 000000000000..050ced23f688
--- /dev/null
+++ b/test/YAMLParser/LICENSE.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2006 Kirill Simonov
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/test/YAMLParser/bool.data b/test/YAMLParser/bool.data
new file mode 100644
index 000000000000..e987a0ec1e32
--- /dev/null
+++ b/test/YAMLParser/bool.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+- yes
+- NO
+- True
+- on
diff --git a/test/YAMLParser/construct-bool.data b/test/YAMLParser/construct-bool.data
new file mode 100644
index 000000000000..035ec0c85805
--- /dev/null
+++ b/test/YAMLParser/construct-bool.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: yes
+answer: NO
+logical: True
+option: on
+
+
+but:
+    y: is a string
+    n: is a string
diff --git a/test/YAMLParser/construct-custom.data b/test/YAMLParser/construct-custom.data
new file mode 100644
index 000000000000..cac95e0a5fb7
--- /dev/null
+++ b/test/YAMLParser/construct-custom.data
@@ -0,0 +1,28 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- !tag1
+  x: 1
+- !tag1
+  x: 1
+  'y': 2
+  z: 3
+- !tag2
+  10
+- !tag2
+  =: 10
+  'y': 20
+  z: 30
+- !tag3
+  x: 1
+- !tag3
+  x: 1
+  'y': 2
+  z: 3
+- !tag3
+  =: 1
+  'y': 2
+  z: 3
+- !foo
+  my-parameter: foo
+  my-another-parameter: [1,2,3]
diff --git a/test/YAMLParser/construct-float.data b/test/YAMLParser/construct-float.data
new file mode 100644
index 000000000000..07c51bdd833b
--- /dev/null
+++ b/test/YAMLParser/construct-float.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 6.8523015e+5
+exponential: 685.230_15e+03
+fixed: 685_230.15
+sexagesimal: 190:20:30.15
+negative infinity: -.inf
+not a number: .NaN
diff --git a/test/YAMLParser/construct-int.data b/test/YAMLParser/construct-int.data
new file mode 100644
index 000000000000..b14c37f7880e
--- /dev/null
+++ b/test/YAMLParser/construct-int.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 685230
+decimal: +685_230
+octal: 02472256
+hexadecimal: 0x_0A_74_AE
+binary: 0b1010_0111_0100_1010_1110
+sexagesimal: 190:20:30
diff --git a/test/YAMLParser/construct-map.data b/test/YAMLParser/construct-map.data
new file mode 100644
index 000000000000..1b681206d177
--- /dev/null
+++ b/test/YAMLParser/construct-map.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+# Unordered set of key: value pairs.
+Block style: !!map
+  Clark : Evans
+  Brian : Ingerson
+  Oren  : Ben-Kiki
+Flow style: !!map { Clark: Evans, Brian: Ingerson, Oren: Ben-Kiki }
diff --git a/test/YAMLParser/construct-merge.data b/test/YAMLParser/construct-merge.data
new file mode 100644
index 000000000000..0ebc9f612d5d
--- /dev/null
+++ b/test/YAMLParser/construct-merge.data
@@ -0,0 +1,29 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- &CENTER { x: 1, 'y': 2 }
+- &LEFT { x: 0, 'y': 2 }
+- &BIG { r: 10 }
+- &SMALL { r: 1 }
+
+# All the following maps are equal:
+
+- # Explicit keys
+  x: 1
+  'y': 2
+  r: 10
+  label: center/big
+
+- # Merge one map
+  << : *CENTER
+  r: 10
+  label: center/big
+
+- # Merge multiple maps
+  << : [ *CENTER, *BIG ]
+  label: center/big
+
+- # Override
+  << : [ *BIG, *LEFT, *SMALL ]
+  x: 1
+  label: center/big
diff --git a/test/YAMLParser/construct-null.data b/test/YAMLParser/construct-null.data
new file mode 100644
index 000000000000..51f8b61e24ef
--- /dev/null
+++ b/test/YAMLParser/construct-null.data
@@ -0,0 +1,20 @@
+# RUN: yaml-bench -canonical %s
+
+# A document may be null.
+---
+---
+# This mapping has four keys,
+# one has a value.
+empty:
+canonical: ~
+english: null
+~: null key
+---
+# This sequence has five
+# entries, two have values.
+sparse:
+  - ~
+  - 2nd entry
+  -
+  - 4th entry
+  - Null
diff --git a/test/YAMLParser/construct-omap.data b/test/YAMLParser/construct-omap.data
new file mode 100644
index 000000000000..b96d6799c7d4
--- /dev/null
+++ b/test/YAMLParser/construct-omap.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed ordered map (dictionary).
+Bestiary: !!omap
+  - aardvark: African pig-like ant eater. Ugly.
+  - anteater: South-American ant eater. Two species.
+  - anaconda: South-American constrictor snake. Scaly.
+  # Etc.
+# Flow style
+Numbers: !!omap [ one: 1, two: 2, three : 3 ]
diff --git a/test/YAMLParser/construct-pairs.data b/test/YAMLParser/construct-pairs.data
new file mode 100644
index 000000000000..40f288d1d741
--- /dev/null
+++ b/test/YAMLParser/construct-pairs.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed pairs.
+Block tasks: !!pairs
+  - meeting: with team.
+  - meeting: with boss.
+  - break: lunch.
+  - meeting: with client.
+Flow tasks: !!pairs [ meeting: with team, meeting: with boss ]
diff --git a/test/YAMLParser/construct-seq.data b/test/YAMLParser/construct-seq.data
new file mode 100644
index 000000000000..f43fd39f8ed2
--- /dev/null
+++ b/test/YAMLParser/construct-seq.data
@@ -0,0 +1,17 @@
+# RUN: yaml-bench -canonical %s
+
+# Ordered sequence of nodes
+Block style: !!seq
+- Mercury   # Rotates - no light/dark sides.
+- Venus     # Deadliest. Aptly named.
+- Earth     # Mostly dirt.
+- Mars      # Seems empty.
+- Jupiter   # The king.
+- Saturn    # Pretty.
+- Uranus    # Where the sun hardly shines.
+- Neptune   # Boring. No rings.
+- Pluto     # You call this a planet?
+Flow style: !!seq [ Mercury, Venus, Earth, Mars,      # Rocks
+                    Jupiter, Saturn, Uranus, Neptune, # Gas
+                    Pluto ]                           # Overrated
+
diff --git a/test/YAMLParser/construct-set.data b/test/YAMLParser/construct-set.data
new file mode 100644
index 000000000000..3e9d095e714e
--- /dev/null
+++ b/test/YAMLParser/construct-set.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly typed set.
+baseball players: !!set
+  ? Mark McGwire
+  ? Sammy Sosa
+  ? Ken Griffey
+# Flow style
+baseball teams: !!set { Boston Red Sox, Detroit Tigers, New York Yankees }
diff --git a/test/YAMLParser/construct-str-ascii.data b/test/YAMLParser/construct-str-ascii.data
new file mode 100644
index 000000000000..24290ae8a99c
--- /dev/null
+++ b/test/YAMLParser/construct-str-ascii.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- !!str "ascii string"
diff --git a/test/YAMLParser/construct-str.data b/test/YAMLParser/construct-str.data
new file mode 100644
index 000000000000..dc1ce825cdc8
--- /dev/null
+++ b/test/YAMLParser/construct-str.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+string: abcd
diff --git a/test/YAMLParser/construct-timestamp.data b/test/YAMLParser/construct-timestamp.data
new file mode 100644
index 000000000000..f262c2d02c0d
--- /dev/null
+++ b/test/YAMLParser/construct-timestamp.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+canonical:        2001-12-15T02:59:43.1Z
+valid iso8601:    2001-12-14t21:59:43.10-05:00
+space separated:  2001-12-14 21:59:43.10 -5
+no time zone (Z): 2001-12-15 2:59:43.10
+date (00:00:00Z): 2002-12-14
diff --git a/test/YAMLParser/construct-value.data b/test/YAMLParser/construct-value.data
new file mode 100644
index 000000000000..fe01a0dc9049
--- /dev/null
+++ b/test/YAMLParser/construct-value.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+---     # Old schema
+link with:
+  - library1.dll
+  - library2.dll
+---     # New schema
+link with:
+  - = : library1.dll
+    version: 1.2
+  - = : library2.dll
+    version: 2.3
diff --git a/test/YAMLParser/duplicate-key.former-loader-error.data b/test/YAMLParser/duplicate-key.former-loader-error.data
new file mode 100644
index 000000000000..9272103fe657
--- /dev/null
+++ b/test/YAMLParser/duplicate-key.former-loader-error.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo: bar
+foo: baz
diff --git a/test/YAMLParser/duplicate-mapping-key.former-loader-error.data b/test/YAMLParser/duplicate-mapping-key.former-loader-error.data
new file mode 100644
index 000000000000..96d175d2ac1a
--- /dev/null
+++ b/test/YAMLParser/duplicate-mapping-key.former-loader-error.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+---
+&anchor foo:
+    foo: bar
+    *anchor: duplicate key
+    baz: bat
+    *anchor: duplicate key
diff --git a/test/YAMLParser/duplicate-merge-key.former-loader-error.data b/test/YAMLParser/duplicate-merge-key.former-loader-error.data
new file mode 100644
index 000000000000..6b1276436ab1
--- /dev/null
+++ b/test/YAMLParser/duplicate-merge-key.former-loader-error.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+<<: {x: 1, y: 2}
+foo: bar
+<<: {z: 3, t: 4}
diff --git a/test/YAMLParser/duplicate-value-key.former-loader-error.data b/test/YAMLParser/duplicate-value-key.former-loader-error.data
new file mode 100644
index 000000000000..dc20e0b275c6
--- /dev/null
+++ b/test/YAMLParser/duplicate-value-key.former-loader-error.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+=: 1
+foo: bar
+=: 2
diff --git a/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data b/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data
new file mode 100644
index 000000000000..f5adedb1350f
--- /dev/null
+++ b/test/YAMLParser/emit-block-scalar-in-simple-key-context-bug.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+? |-
+  foo
+: |-
+  bar
diff --git a/test/YAMLParser/empty-document-bug.data b/test/YAMLParser/empty-document-bug.data
new file mode 100644
index 000000000000..fa131fe78efe
--- /dev/null
+++ b/test/YAMLParser/empty-document-bug.data
@@ -0,0 +1,2 @@
+# RUN: yaml-bench -canonical %s
+
diff --git a/test/YAMLParser/float.data b/test/YAMLParser/float.data
new file mode 100644
index 000000000000..c4de97037c83
--- /dev/null
+++ b/test/YAMLParser/float.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 6.8523015e+5
+- 685.230_15e+03
+- 685_230.15
+- 190:20:30.15
+- -.inf
+- .NaN
diff --git a/test/YAMLParser/int.data b/test/YAMLParser/int.data
new file mode 100644
index 000000000000..2651d096ff60
--- /dev/null
+++ b/test/YAMLParser/int.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 685230
+- +685_230
+- 02472256
+- 0x_0A_74_AE
+- 0b1010_0111_0100_1010_1110
+- 190:20:30
diff --git a/test/YAMLParser/invalid-single-quote-bug.data b/test/YAMLParser/invalid-single-quote-bug.data
new file mode 100644
index 000000000000..3722a003dfff
--- /dev/null
+++ b/test/YAMLParser/invalid-single-quote-bug.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- "foo 'bar'"
+- "foo\n'bar'"
diff --git a/test/YAMLParser/merge.data b/test/YAMLParser/merge.data
new file mode 100644
index 000000000000..86313596e198
--- /dev/null
+++ b/test/YAMLParser/merge.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- <<
diff --git a/test/YAMLParser/more-floats.data b/test/YAMLParser/more-floats.data
new file mode 100644
index 000000000000..668b31cd13ae
--- /dev/null
+++ b/test/YAMLParser/more-floats.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+[0.0, +1.0, -1.0, +.inf, -.inf, .nan, .nan]
diff --git a/test/YAMLParser/negative-float-bug.data b/test/YAMLParser/negative-float-bug.data
new file mode 100644
index 000000000000..0ba0ffee3010
--- /dev/null
+++ b/test/YAMLParser/negative-float-bug.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+-1.0
diff --git a/test/YAMLParser/null.data b/test/YAMLParser/null.data
new file mode 100644
index 000000000000..a38d7fa6c522
--- /dev/null
+++ b/test/YAMLParser/null.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+-
+- ~
+- null
diff --git a/test/YAMLParser/resolver.data b/test/YAMLParser/resolver.data
new file mode 100644
index 000000000000..8cbba6328b68
--- /dev/null
+++ b/test/YAMLParser/resolver.data
@@ -0,0 +1,32 @@
+# RUN: yaml-bench -canonical %s
+
+---
+"this scalar should be selected"
+---
+key11: !foo
+    key12:
+        is: [selected]
+    key22:
+        key13: [not, selected]
+        key23: [not, selected]
+    key32:
+        key31: [not, selected]
+        key32: [not, selected]
+        key33: {not: selected}
+key21: !bar
+    - not selected
+    - selected
+    - not selected
+key31: !baz
+    key12:
+        key13:
+            key14: {selected}
+        key23:
+            key14: [not, selected]
+        key33:
+            key14: {selected}
+            key24: {not: selected}
+    key22:
+        -   key14: {selected}
+            key24: {not: selected}
+        -   key14: {selected}
diff --git a/test/YAMLParser/run-parser-crash-bug.data b/test/YAMLParser/run-parser-crash-bug.data
new file mode 100644
index 000000000000..3ec910ce0477
--- /dev/null
+++ b/test/YAMLParser/run-parser-crash-bug.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+- Harry Potter and the Prisoner of Azkaban
+- Harry Potter and the Goblet of Fire
+- Harry Potter and the Order of the Phoenix
+---
+- Memoirs Found in a Bathtub
+- Snow Crash
+- Ghost World
diff --git a/test/YAMLParser/scan-document-end-bug.data b/test/YAMLParser/scan-document-end-bug.data
new file mode 100644
index 000000000000..7354caf8cd8b
--- /dev/null
+++ b/test/YAMLParser/scan-document-end-bug.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Ticket #4
+---
+...
diff --git a/test/YAMLParser/scan-line-break-bug.data b/test/YAMLParser/scan-line-break-bug.data
new file mode 100644
index 000000000000..792973d3f56c
--- /dev/null
+++ b/test/YAMLParser/scan-line-break-bug.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+foo:
+    bar
+    baz
diff --git a/test/YAMLParser/single-dot-is-not-float-bug.data b/test/YAMLParser/single-dot-is-not-float-bug.data
new file mode 100644
index 000000000000..810a5936a894
--- /dev/null
+++ b/test/YAMLParser/single-dot-is-not-float-bug.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+.
diff --git a/test/YAMLParser/sloppy-indentation.data b/test/YAMLParser/sloppy-indentation.data
new file mode 100644
index 000000000000..2b2b62b14afa
--- /dev/null
+++ b/test/YAMLParser/sloppy-indentation.data
@@ -0,0 +1,19 @@
+# RUN: yaml-bench -canonical %s
+
+---
+in the block context:
+    indentation should be kept: { 
+    but in the flow context: [
+it may be violated]
+}
+---
+the parser does not require scalars
+to be indented with at least one space
+...
+---
+"the parser does not require scalars
+to be indented with at least one space"
+---
+foo:
+    bar: 'quoted scalars
+may not adhere indentation'
diff --git a/test/YAMLParser/spec-02-01.data b/test/YAMLParser/spec-02-01.data
new file mode 100644
index 000000000000..dd15b2bc26a1
--- /dev/null
+++ b/test/YAMLParser/spec-02-01.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- Mark McGwire
+- Sammy Sosa
+- Ken Griffey
diff --git a/test/YAMLParser/spec-02-02.data b/test/YAMLParser/spec-02-02.data
new file mode 100644
index 000000000000..a5695d5c27a5
--- /dev/null
+++ b/test/YAMLParser/spec-02-02.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+hr:  65    # Home runs
+avg: 0.278 # Batting average
+rbi: 147   # Runs Batted In
diff --git a/test/YAMLParser/spec-02-03.data b/test/YAMLParser/spec-02-03.data
new file mode 100644
index 000000000000..81f8d991f746
--- /dev/null
+++ b/test/YAMLParser/spec-02-03.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+american:
+  - Boston Red Sox
+  - Detroit Tigers
+  - New York Yankees
+national:
+  - New York Mets
+  - Chicago Cubs
+  - Atlanta Braves
diff --git a/test/YAMLParser/spec-02-04.data b/test/YAMLParser/spec-02-04.data
new file mode 100644
index 000000000000..44a218d5926b
--- /dev/null
+++ b/test/YAMLParser/spec-02-04.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+-
+  name: Mark McGwire
+  hr:   65
+  avg:  0.278
+-
+  name: Sammy Sosa
+  hr:   63
+  avg:  0.288
diff --git a/test/YAMLParser/spec-02-05.data b/test/YAMLParser/spec-02-05.data
new file mode 100644
index 000000000000..c9a4a7572f7a
--- /dev/null
+++ b/test/YAMLParser/spec-02-05.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- [name        , hr, avg  ]
+- [Mark McGwire, 65, 0.278]
+- [Sammy Sosa  , 63, 0.288]
diff --git a/test/YAMLParser/spec-02-06.data b/test/YAMLParser/spec-02-06.data
new file mode 100644
index 000000000000..85c1e2bab8c4
--- /dev/null
+++ b/test/YAMLParser/spec-02-06.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+Mark McGwire: {hr: 65, avg: 0.278}
+Sammy Sosa: {
+    hr: 63,
+    avg: 0.288
+  }
diff --git a/test/YAMLParser/spec-02-07.data b/test/YAMLParser/spec-02-07.data
new file mode 100644
index 000000000000..c349662a98ac
--- /dev/null
+++ b/test/YAMLParser/spec-02-07.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+# Ranking of 1998 home runs
+---
+- Mark McGwire
+- Sammy Sosa
+- Ken Griffey
+
+# Team ranking
+---
+- Chicago Cubs
+- St Louis Cardinals
diff --git a/test/YAMLParser/spec-02-08.data b/test/YAMLParser/spec-02-08.data
new file mode 100644
index 000000000000..9746a43788a5
--- /dev/null
+++ b/test/YAMLParser/spec-02-08.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s
+
+---
+time: 20:03:20
+player: Sammy Sosa
+action: strike (miss)
+...
+---
+time: 20:03:47
+player: Sammy Sosa
+action: grand slam
+...
diff --git a/test/YAMLParser/spec-02-09.data b/test/YAMLParser/spec-02-09.data
new file mode 100644
index 000000000000..6aef9333799a
--- /dev/null
+++ b/test/YAMLParser/spec-02-09.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+hr: # 1998 hr ranking
+  - Mark McGwire
+  - Sammy Sosa
+rbi:
+  # 1998 rbi ranking
+  - Sammy Sosa
+  - Ken Griffey
diff --git a/test/YAMLParser/spec-02-10.data b/test/YAMLParser/spec-02-10.data
new file mode 100644
index 000000000000..0302fa750099
--- /dev/null
+++ b/test/YAMLParser/spec-02-10.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+hr:
+  - Mark McGwire
+  # Following node labeled SS
+  - &SS Sammy Sosa
+rbi:
+  - *SS # Subsequent occurrence
+  - Ken Griffey
diff --git a/test/YAMLParser/spec-02-11.data b/test/YAMLParser/spec-02-11.data
new file mode 100644
index 000000000000..d8cf863b2c1e
--- /dev/null
+++ b/test/YAMLParser/spec-02-11.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+? - Detroit Tigers
+  - Chicago cubs
+:
+  - 2001-07-23
+
+? [ New York Yankees,
+    Atlanta Braves ]
+: [ 2001-07-02, 2001-08-12,
+    2001-08-14 ]
diff --git a/test/YAMLParser/spec-02-12.data b/test/YAMLParser/spec-02-12.data
new file mode 100644
index 000000000000..3b4d5370a939
--- /dev/null
+++ b/test/YAMLParser/spec-02-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+---
+# products purchased
+- item    : Super Hoop
+  quantity: 1
+- item    : Basketball
+  quantity: 4
+- item    : Big Shoes
+  quantity: 1
diff --git a/test/YAMLParser/spec-02-13.data b/test/YAMLParser/spec-02-13.data
new file mode 100644
index 000000000000..2bbccbf5d7de
--- /dev/null
+++ b/test/YAMLParser/spec-02-13.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# ASCII Art
+--- |
+  \//||\/||
+  // ||  ||__
diff --git a/test/YAMLParser/spec-02-14.data b/test/YAMLParser/spec-02-14.data
new file mode 100644
index 000000000000..5a18ea213e6c
--- /dev/null
+++ b/test/YAMLParser/spec-02-14.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+---
+  Mark McGwire's
+  year was crippled
+  by a knee injury.
diff --git a/test/YAMLParser/spec-02-15.data b/test/YAMLParser/spec-02-15.data
new file mode 100644
index 000000000000..2a7fbe96ad40
--- /dev/null
+++ b/test/YAMLParser/spec-02-15.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ Sammy Sosa completed another
+ fine season with great stats.
+
+   63 Home Runs
+   0.288 Batting Average
+
+ What a year!
diff --git a/test/YAMLParser/spec-02-16.data b/test/YAMLParser/spec-02-16.data
new file mode 100644
index 000000000000..3a5792c76320
--- /dev/null
+++ b/test/YAMLParser/spec-02-16.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+name: Mark McGwire
+accomplishment: >
+  Mark set a major league
+  home run record in 1998.
+stats: |
+  65 Home Runs
+  0.278 Batting Average
diff --git a/test/YAMLParser/spec-02-17.data b/test/YAMLParser/spec-02-17.data
new file mode 100644
index 000000000000..2bcb60c8d933
--- /dev/null
+++ b/test/YAMLParser/spec-02-17.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+unicode: "Sosa did fine.\u263A"
+control: "\b1998\t1999\t2000\n"
+hexesc:  "\x13\x10 is \r\n"
+
+single: '"Howdy!" he cried.'
+quoted: ' # not a ''comment''.'
+tie-fighter: '|\-*-/|'
+
+# CHECK: !!str "Sosa did fine.\u263A"
+# CHECK: !!str "\b1998\t1999\t2000\n"
+# CHECK: !!str "\x13\x10 is \r\n"
+# CHECK: !!str "\"Howdy!\" he cried."
+# CHECK: !!str " # not a 'comment'."
+# CHECK: !!str "|\\-*-/|"
diff --git a/test/YAMLParser/spec-02-18.data b/test/YAMLParser/spec-02-18.data
new file mode 100644
index 000000000000..625a4962e997
--- /dev/null
+++ b/test/YAMLParser/spec-02-18.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+plain:
+  This unquoted scalar
+  spans many lines.
+
+quoted: "So does this
+  quoted scalar.\n"
diff --git a/test/YAMLParser/spec-02-19.data b/test/YAMLParser/spec-02-19.data
new file mode 100644
index 000000000000..cb9df6dd1f43
--- /dev/null
+++ b/test/YAMLParser/spec-02-19.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 12345
+decimal: +12,345
+sexagesimal: 3:25:45
+octal: 014
+hexadecimal: 0xC
diff --git a/test/YAMLParser/spec-02-20.data b/test/YAMLParser/spec-02-20.data
new file mode 100644
index 000000000000..ed147986119d
--- /dev/null
+++ b/test/YAMLParser/spec-02-20.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 1.23015e+3
+exponential: 12.3015e+02
+sexagesimal: 20:30.15
+fixed: 1,230.15
+negative infinity: -.inf
+not a number: .NaN
diff --git a/test/YAMLParser/spec-02-21.data b/test/YAMLParser/spec-02-21.data
new file mode 100644
index 000000000000..ea979db065b1
--- /dev/null
+++ b/test/YAMLParser/spec-02-21.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+null: ~
+true: y
+false: n
+string: '12345'
diff --git a/test/YAMLParser/spec-02-22.data b/test/YAMLParser/spec-02-22.data
new file mode 100644
index 000000000000..77724f71066e
--- /dev/null
+++ b/test/YAMLParser/spec-02-22.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+canonical: 2001-12-15T02:59:43.1Z
+iso8601: 2001-12-14t21:59:43.10-05:00
+spaced: 2001-12-14 21:59:43.10 -5
+date: 2002-12-14
diff --git a/test/YAMLParser/spec-02-23.data b/test/YAMLParser/spec-02-23.data
new file mode 100644
index 000000000000..d08dfa755c6b
--- /dev/null
+++ b/test/YAMLParser/spec-02-23.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+not-date: !!str 2002-04-28
+
+picture: !!binary |
+ R0lGODlhDAAMAIQAAP//9/X
+ 17unp5WZmZgAAAOfn515eXv
+ Pz7Y6OjuDg4J+fn5OTk6enp
+ 56enmleECcgggoBADs=
+
+application specific tag: !something |
+ The semantics of the tag
+ above may be different for
+ different documents.
diff --git a/test/YAMLParser/spec-02-24.data b/test/YAMLParser/spec-02-24.data
new file mode 100644
index 000000000000..01ca7f5d122a
--- /dev/null
+++ b/test/YAMLParser/spec-02-24.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG ! tag:clarkevans.com,2002:
+--- !shape
+  # Use the ! handle for presenting
+  # tag:clarkevans.com,2002:circle
+- !circle
+  center: &ORIGIN {x: 73, y: 129}
+  radius: 7
+- !line
+  start: *ORIGIN
+  finish: { x: 89, y: 102 }
+- !label
+  start: *ORIGIN
+  color: 0xFFEEBB
+  text: Pretty vector drawing.
diff --git a/test/YAMLParser/spec-02-25.data b/test/YAMLParser/spec-02-25.data
new file mode 100644
index 000000000000..fbadfda97e36
--- /dev/null
+++ b/test/YAMLParser/spec-02-25.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# sets are represented as a
+# mapping where each key is
+# associated with the empty string
+--- !!set
+? Mark McGwire
+? Sammy Sosa
+? Ken Griff
diff --git a/test/YAMLParser/spec-02-26.data b/test/YAMLParser/spec-02-26.data
new file mode 100644
index 000000000000..257108e7e043
--- /dev/null
+++ b/test/YAMLParser/spec-02-26.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+# ordered maps are represented as
+# a sequence of mappings, with
+# each mapping having one key
+--- !!omap
+- Mark McGwire: 65
+- Sammy Sosa: 63
+- Ken Griffy: 58
diff --git a/test/YAMLParser/spec-02-27.data b/test/YAMLParser/spec-02-27.data
new file mode 100644
index 000000000000..a190ff19db0b
--- /dev/null
+++ b/test/YAMLParser/spec-02-27.data
@@ -0,0 +1,31 @@
+# RUN: yaml-bench -canonical %s
+
+--- !<tag:clarkevans.com,2002:invoice>
+invoice: 34843
+date   : 2001-01-23
+bill-to: &id001
+    given  : Chris
+    family : Dumars
+    address:
+        lines: |
+            458 Walkman Dr.
+            Suite #292
+        city    : Royal Oak
+        state   : MI
+        postal  : 48046
+ship-to: *id001
+product:
+    - sku         : BL394D
+      quantity    : 4
+      description : Basketball
+      price       : 450.00
+    - sku         : BL4438H
+      quantity    : 1
+      description : Super Hoop
+      price       : 2392.00
+tax  : 251.42
+total: 4443.52
+comments:
+    Late afternoon is best.
+    Backup contact is Nancy
+    Billsmer @ 338-4338.
diff --git a/test/YAMLParser/spec-02-28.data b/test/YAMLParser/spec-02-28.data
new file mode 100644
index 000000000000..695c27f5d55a
--- /dev/null
+++ b/test/YAMLParser/spec-02-28.data
@@ -0,0 +1,28 @@
+# RUN: yaml-bench -canonical %s
+
+---
+Time: 2001-11-23 15:01:42 -5
+User: ed
+Warning:
+  This is an error message
+  for the log file
+---
+Time: 2001-11-23 15:02:31 -5
+User: ed
+Warning:
+  A slightly different error
+  message.
+---
+Date: 2001-11-23 15:03:17 -5
+User: ed
+Fatal:
+  Unknown variable "bar"
+Stack:
+  - file: TopClass.py
+    line: 23
+    code: |
+      x = MoreObject("345\n")
+  - file: MoreClass.py
+    line: 58
+    code: |-
+      foo = bar
diff --git a/test/YAMLParser/spec-05-01-utf8.data b/test/YAMLParser/spec-05-01-utf8.data
new file mode 100644
index 000000000000..349da06fab26
--- /dev/null
+++ b/test/YAMLParser/spec-05-01-utf8.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+# Comment only.
diff --git a/test/YAMLParser/spec-05-02-utf8.data b/test/YAMLParser/spec-05-02-utf8.data
new file mode 100644
index 000000000000..b306bdb719ef
--- /dev/null
+++ b/test/YAMLParser/spec-05-02-utf8.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+# Invalid use of BOM
+# inside a
+# document.
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-03.data b/test/YAMLParser/spec-05-03.data
new file mode 100644
index 000000000000..461e98d2c2b8
--- /dev/null
+++ b/test/YAMLParser/spec-05-03.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+sequence:
+- one
+- two
+mapping:
+  ? sky
+  : blue
+  ? sea : green
diff --git a/test/YAMLParser/spec-05-04.data b/test/YAMLParser/spec-05-04.data
new file mode 100644
index 000000000000..52850f435bf5
--- /dev/null
+++ b/test/YAMLParser/spec-05-04.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+sequence: [ one, two, ]
+mapping: { sky: blue, sea: green }
diff --git a/test/YAMLParser/spec-05-05.data b/test/YAMLParser/spec-05-05.data
new file mode 100644
index 000000000000..499ee8ffb8ff
--- /dev/null
+++ b/test/YAMLParser/spec-05-05.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+# Comment only.
diff --git a/test/YAMLParser/spec-05-06.data b/test/YAMLParser/spec-05-06.data
new file mode 100644
index 000000000000..729141acf4b3
--- /dev/null
+++ b/test/YAMLParser/spec-05-06.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+anchored: !local &anchor value
+alias: *anchor
diff --git a/test/YAMLParser/spec-05-07.data b/test/YAMLParser/spec-05-07.data
new file mode 100644
index 000000000000..fc80a0d4152e
--- /dev/null
+++ b/test/YAMLParser/spec-05-07.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+literal: |
+  text
+folded: >
+  text
diff --git a/test/YAMLParser/spec-05-08.data b/test/YAMLParser/spec-05-08.data
new file mode 100644
index 000000000000..9f2b7ece53fc
--- /dev/null
+++ b/test/YAMLParser/spec-05-08.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+single: 'text'
+double: "text"
diff --git a/test/YAMLParser/spec-05-09.data b/test/YAMLParser/spec-05-09.data
new file mode 100644
index 000000000000..fc061fb2982e
--- /dev/null
+++ b/test/YAMLParser/spec-05-09.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+%YAML 1.1
+--- text
diff --git a/test/YAMLParser/spec-05-10.data b/test/YAMLParser/spec-05-10.data
new file mode 100644
index 000000000000..6788f0bfc31a
--- /dev/null
+++ b/test/YAMLParser/spec-05-10.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+commercial-at: @text
+grave-accent: `text
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-11.data b/test/YAMLParser/spec-05-11.data
new file mode 100644
index 000000000000..7cba5562d5fb
--- /dev/null
+++ b/test/YAMLParser/spec-05-11.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+|
+  Generic line break (no glyph)
+  Generic line break (glyphed)  Line separator   Paragraph separator 
diff --git a/test/YAMLParser/spec-05-12.data b/test/YAMLParser/spec-05-12.data
new file mode 100644
index 000000000000..7dadff76f8d7
--- /dev/null
+++ b/test/YAMLParser/spec-05-12.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently reject tabs as indentation.
+# XFAIL: *
+
+# Tabs do's and don'ts:
+# comment:
+quoted: "Quoted		"
+block: |
+  void main() {
+  	printf("Hello, world!\n");
+  }
+elsewhere:	# separation
+	indentation, in	plain scalar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-05-13.data b/test/YAMLParser/spec-05-13.data
new file mode 100644
index 000000000000..db62e866a755
--- /dev/null
+++ b/test/YAMLParser/spec-05-13.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+  "Text containing   
+  both space and	
+  	tab	characters"
diff --git a/test/YAMLParser/spec-05-14.data b/test/YAMLParser/spec-05-14.data
new file mode 100644
index 000000000000..65451651b69e
--- /dev/null
+++ b/test/YAMLParser/spec-05-14.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+"Fun with \\
+\" \a \b \e \f \
+\n \r \t \v \0 \
+\  \_ \N \L \P \
+\x41 \u0041 \U00000041"
+
+# CHECK: !!str "Fun with \\\n\" \a \b \e \f \n \r \t \v \0   \_ \N \L \P A A A"
diff --git a/test/YAMLParser/spec-05-15.data b/test/YAMLParser/spec-05-15.data
new file mode 100644
index 000000000000..cd8421ad2792
--- /dev/null
+++ b/test/YAMLParser/spec-05-15.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+Bad escapes:
+  "\c
+  \xq-"
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-06-01.data b/test/YAMLParser/spec-06-01.data
new file mode 100644
index 000000000000..95b26bdb3856
--- /dev/null
+++ b/test/YAMLParser/spec-06-01.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+  # Leading comment line spaces are
+   # neither content nor indentation.
+    
+Not indented:
+ By one space: |
+    By four
+      spaces
+ Flow style: [    # Leading spaces
+   By two,        # in flow style
+  Also by two,    # are neither
+# Tabs are not allowed:
+#  	Still by two   # content nor
+    Still by two   # content nor
+    ]             # indentation.
diff --git a/test/YAMLParser/spec-06-02.data b/test/YAMLParser/spec-06-02.data
new file mode 100644
index 000000000000..40a15c9f3ea2
--- /dev/null
+++ b/test/YAMLParser/spec-06-02.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+  # Comment
+   
+
diff --git a/test/YAMLParser/spec-06-03.data b/test/YAMLParser/spec-06-03.data
new file mode 100644
index 000000000000..c1893ef08324
--- /dev/null
+++ b/test/YAMLParser/spec-06-03.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+key:    # Comment
+  value
diff --git a/test/YAMLParser/spec-06-04.data b/test/YAMLParser/spec-06-04.data
new file mode 100644
index 000000000000..b61bcc6b9551
--- /dev/null
+++ b/test/YAMLParser/spec-06-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+key:    # Comment
+        # lines
+  value
+
diff --git a/test/YAMLParser/spec-06-05.data b/test/YAMLParser/spec-06-05.data
new file mode 100644
index 000000000000..4bcaa5a81893
--- /dev/null
+++ b/test/YAMLParser/spec-06-05.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+{ first: Sammy, last: Sosa }:
+# Statistics:
+  hr:  # Home runs
+    65
+  avg: # Average
+    0.278
diff --git a/test/YAMLParser/spec-06-06.data b/test/YAMLParser/spec-06-06.data
new file mode 100644
index 000000000000..67e39ddf8996
--- /dev/null
+++ b/test/YAMLParser/spec-06-06.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+plain: text
+  lines
+quoted: "text
+  	lines"
+block: |
+  text
+   	lines
diff --git a/test/YAMLParser/spec-06-07.data b/test/YAMLParser/spec-06-07.data
new file mode 100644
index 000000000000..451bd349e3ef
--- /dev/null
+++ b/test/YAMLParser/spec-06-07.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+- foo
+ 
+  bar
+- |-
+  foo
+ 
+  bar
+  
diff --git a/test/YAMLParser/spec-06-08.data b/test/YAMLParser/spec-06-08.data
new file mode 100644
index 000000000000..aa06f847ea36
--- /dev/null
+++ b/test/YAMLParser/spec-06-08.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+>-
+  specific   trimmed     as  space
diff --git a/test/YAMLParser/spec-07-01.data b/test/YAMLParser/spec-07-01.data
new file mode 100644
index 000000000000..21bc5e59d59c
--- /dev/null
+++ b/test/YAMLParser/spec-07-01.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+%FOO  bar baz # Should be ignored
+               # with a warning.
+--- "foo"
diff --git a/test/YAMLParser/spec-07-02.data b/test/YAMLParser/spec-07-02.data
new file mode 100644
index 000000000000..bf0e758c880c
--- /dev/null
+++ b/test/YAMLParser/spec-07-02.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+%YAML 1.2 # Attempt parsing
+           # with a warning
+---
+"foo"
diff --git a/test/YAMLParser/spec-07-03.data b/test/YAMLParser/spec-07-03.data
new file mode 100644
index 000000000000..7ca948301691
--- /dev/null
+++ b/test/YAMLParser/spec-07-03.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+%YAML 1.1
+%YAML 1.1
+foo
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-07-04.data b/test/YAMLParser/spec-07-04.data
new file mode 100644
index 000000000000..beba7d06ecf4
--- /dev/null
+++ b/test/YAMLParser/spec-07-04.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !yaml! tag:yaml.org,2002:
+---
+!yaml!str "foo"
diff --git a/test/YAMLParser/spec-07-05.data b/test/YAMLParser/spec-07-05.data
new file mode 100644
index 000000000000..279b54afa142
--- /dev/null
+++ b/test/YAMLParser/spec-07-05.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently parse TAG directives.
+# XFAIL: *
+
+%TAG ! !foo
+%TAG ! !foo
+bar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-07-06.data b/test/YAMLParser/spec-07-06.data
new file mode 100644
index 000000000000..9f27f91f3111
--- /dev/null
+++ b/test/YAMLParser/spec-07-06.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !      !foo
+%TAG !yaml! tag:yaml.org,2002:
+---
+- !bar "baz"
+- !yaml!str "string"
diff --git a/test/YAMLParser/spec-07-07a.data b/test/YAMLParser/spec-07-07a.data
new file mode 100644
index 000000000000..e51f8f7d6947
--- /dev/null
+++ b/test/YAMLParser/spec-07-07a.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+# Private application:
+!foo "bar"
diff --git a/test/YAMLParser/spec-07-07b.data b/test/YAMLParser/spec-07-07b.data
new file mode 100644
index 000000000000..003d5755726b
--- /dev/null
+++ b/test/YAMLParser/spec-07-07b.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Migrated to global:
+%TAG ! tag:ben-kiki.org,2000:app/
+---
+!foo "bar"
diff --git a/test/YAMLParser/spec-07-08.data b/test/YAMLParser/spec-07-08.data
new file mode 100644
index 000000000000..7197404b3849
--- /dev/null
+++ b/test/YAMLParser/spec-07-08.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicitly specify default settings:
+%TAG !     !
+%TAG !!    tag:yaml.org,2002:
+# Named handles have no default:
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !foo "bar"
+- !!str "string"
+- !o!type "baz"
diff --git a/test/YAMLParser/spec-07-09.data b/test/YAMLParser/spec-07-09.data
new file mode 100644
index 000000000000..1f98ba041468
--- /dev/null
+++ b/test/YAMLParser/spec-07-09.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo
+...
+# Repeated end marker.
+...
+---
+bar
+# No end marker.
+---
+baz
+...
diff --git a/test/YAMLParser/spec-07-10.data b/test/YAMLParser/spec-07-10.data
new file mode 100644
index 000000000000..a1766834781a
--- /dev/null
+++ b/test/YAMLParser/spec-07-10.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+"Root flow
+ scalar"
+--- !!str >
+ Root block
+ scalar
+---
+# Root collection:
+foo : bar
+... # Is optional.
+---
+# Explicit document may be empty.
diff --git a/test/YAMLParser/spec-07-11.data b/test/YAMLParser/spec-07-11.data
new file mode 100644
index 000000000000..ce14b7ebe4dc
--- /dev/null
+++ b/test/YAMLParser/spec-07-11.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+# A stream may contain
+# no documents.
diff --git a/test/YAMLParser/spec-07-12a.data b/test/YAMLParser/spec-07-12a.data
new file mode 100644
index 000000000000..7327f8188e0b
--- /dev/null
+++ b/test/YAMLParser/spec-07-12a.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Implicit document. Root
+# collection (mapping) node.
+foo : bar
diff --git a/test/YAMLParser/spec-07-12b.data b/test/YAMLParser/spec-07-12b.data
new file mode 100644
index 000000000000..d759abea7d44
--- /dev/null
+++ b/test/YAMLParser/spec-07-12b.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Explicit document. Root
+# scalar (literal) node.
+--- |
+ Text content
diff --git a/test/YAMLParser/spec-07-13.data b/test/YAMLParser/spec-07-13.data
new file mode 100644
index 000000000000..ab74df101872
--- /dev/null
+++ b/test/YAMLParser/spec-07-13.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+! "First document"
+---
+!foo "No directives"
+%TAG ! !foo
+---
+!bar "With directives"
+%YAML 1.1
+---
+!baz "Reset settings"
diff --git a/test/YAMLParser/spec-08-01.data b/test/YAMLParser/spec-08-01.data
new file mode 100644
index 000000000000..5abbfa809491
--- /dev/null
+++ b/test/YAMLParser/spec-08-01.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+!!str &a1 "foo" : !!str bar
+&a2 baz : *a1
diff --git a/test/YAMLParser/spec-08-02.data b/test/YAMLParser/spec-08-02.data
new file mode 100644
index 000000000000..8a75783a7095
--- /dev/null
+++ b/test/YAMLParser/spec-08-02.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+First occurrence: &anchor Value
+Second occurrence: *anchor
diff --git a/test/YAMLParser/spec-08-03.data b/test/YAMLParser/spec-08-03.data
new file mode 100644
index 000000000000..8c715305a810
--- /dev/null
+++ b/test/YAMLParser/spec-08-03.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+!<tag:yaml.org,2002:str> foo :
+  !<!bar> baz
diff --git a/test/YAMLParser/spec-08-04.data b/test/YAMLParser/spec-08-04.data
new file mode 100644
index 000000000000..f13538bc87e0
--- /dev/null
+++ b/test/YAMLParser/spec-08-04.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently look at the content of literal tags.
+# XFAIL: *
+
+- !<!> foo
+- !<$:?> bar
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-08-05.data b/test/YAMLParser/spec-08-05.data
new file mode 100644
index 000000000000..0613446c8974
--- /dev/null
+++ b/test/YAMLParser/spec-08-05.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !local foo
+- !!str bar
+- !o!type baz
diff --git a/test/YAMLParser/spec-08-06.data b/test/YAMLParser/spec-08-06.data
new file mode 100644
index 000000000000..a811bfdefe39
--- /dev/null
+++ b/test/YAMLParser/spec-08-06.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# We don't currently validate tags.
+# XFAIL: *
+
+%TAG !o! tag:ben-kiki.org,2000:
+---
+- !$a!b foo
+- !o! bar
+- !h!type baz
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-08-07.data b/test/YAMLParser/spec-08-07.data
new file mode 100644
index 000000000000..fc3f2df7f058
--- /dev/null
+++ b/test/YAMLParser/spec-08-07.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+# Assuming conventional resolution:
+- "12"
+- 12
+- ! 12
diff --git a/test/YAMLParser/spec-08-08.data b/test/YAMLParser/spec-08-08.data
new file mode 100644
index 000000000000..460029f6ace0
--- /dev/null
+++ b/test/YAMLParser/spec-08-08.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+foo:
+ "bar
+ baz"
+---
+"foo
+ bar"
+---
+foo
+ bar
+--- |
+ foo
+...
diff --git a/test/YAMLParser/spec-08-09.data b/test/YAMLParser/spec-08-09.data
new file mode 100644
index 000000000000..1c8258594310
--- /dev/null
+++ b/test/YAMLParser/spec-08-09.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+---
+scalars:
+  plain: !!str some text
+  quoted:
+    single: 'some text'
+    double: "some text"
+collections:
+  sequence: !!seq [ !!str entry,
+    # Mapping entry:
+      key: value ]
+  mapping: { key: value }
diff --git a/test/YAMLParser/spec-08-10.data b/test/YAMLParser/spec-08-10.data
new file mode 100644
index 000000000000..74054eb08832
--- /dev/null
+++ b/test/YAMLParser/spec-08-10.data
@@ -0,0 +1,17 @@
+# RUN: yaml-bench -canonical %s
+
+block styles:
+  scalars:
+    literal: !!str |
+      #!/usr/bin/perl
+      print "Hello, world!\n";
+    folded: >
+      This sentence
+      is false.
+  collections: !!map
+    sequence: !!seq # Entry:
+      - entry # Plain
+      # Mapping entry:
+      - key: value
+    mapping: 
+      key: value
diff --git a/test/YAMLParser/spec-08-11.data b/test/YAMLParser/spec-08-11.data
new file mode 100644
index 000000000000..8a75783a7095
--- /dev/null
+++ b/test/YAMLParser/spec-08-11.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+First occurrence: &anchor Value
+Second occurrence: *anchor
diff --git a/test/YAMLParser/spec-08-12.data b/test/YAMLParser/spec-08-12.data
new file mode 100644
index 000000000000..69e78b42d276
--- /dev/null
+++ b/test/YAMLParser/spec-08-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+[
+  Without properties,
+  &anchor "Anchored",
+  !!str 'Tagged',
+  *anchor, # Alias node
+  !!str ,  # Empty plain scalar
+  '',   # Empty plain scalar
+]
diff --git a/test/YAMLParser/spec-08-13.data b/test/YAMLParser/spec-08-13.data
new file mode 100644
index 000000000000..931d56a0cfe7
--- /dev/null
+++ b/test/YAMLParser/spec-08-13.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+{
+  ? foo :,
+  ? : bar,
+}
diff --git a/test/YAMLParser/spec-08-14.data b/test/YAMLParser/spec-08-14.data
new file mode 100644
index 000000000000..61c448351ae5
--- /dev/null
+++ b/test/YAMLParser/spec-08-14.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- "flow in block"
+- >
+ Block scalar
+- !!map # Block collection
+  foo : bar
diff --git a/test/YAMLParser/spec-08-15.data b/test/YAMLParser/spec-08-15.data
new file mode 100644
index 000000000000..f21e84a43141
--- /dev/null
+++ b/test/YAMLParser/spec-08-15.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- # Empty plain scalar
+- ? foo
+  :
+  ?
+  : bar
diff --git a/test/YAMLParser/spec-09-01.data b/test/YAMLParser/spec-09-01.data
new file mode 100644
index 000000000000..8999b4961626
--- /dev/null
+++ b/test/YAMLParser/spec-09-01.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+"simple key" : {
+  "also simple" : value,
+  ? "not a
+  simple key" : "any
+  value"
+}
diff --git a/test/YAMLParser/spec-09-02.data b/test/YAMLParser/spec-09-02.data
new file mode 100644
index 000000000000..f69037820ebd
--- /dev/null
+++ b/test/YAMLParser/spec-09-02.data
@@ -0,0 +1,14 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# Indent trimming is not yet implemented.
+# XFAIL: *
+
+ "as space
+ trimmed
+
+ specific
+
+ escaped	\
+ none"
+
+# CHECK: !!str "as space trimmed\nspecific\nescaped\tnone"
diff --git a/test/YAMLParser/spec-09-03.data b/test/YAMLParser/spec-09-03.data
new file mode 100644
index 000000000000..3fb0d8b184ab
--- /dev/null
+++ b/test/YAMLParser/spec-09-03.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- "
+  last"
+- " 	
+  last"
+- " 	first
+  last"
diff --git a/test/YAMLParser/spec-09-04.data b/test/YAMLParser/spec-09-04.data
new file mode 100644
index 000000000000..4178ec6befbd
--- /dev/null
+++ b/test/YAMLParser/spec-09-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+ "first
+ 	inner 1	
+ \ inner 2 \
+ last"
diff --git a/test/YAMLParser/spec-09-05.data b/test/YAMLParser/spec-09-05.data
new file mode 100644
index 000000000000..e482d5366235
--- /dev/null
+++ b/test/YAMLParser/spec-09-05.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+- "first
+  	"
+- "first
+
+  	last"
+- "first
+ inner
+ \ 	last"
diff --git a/test/YAMLParser/spec-09-06.data b/test/YAMLParser/spec-09-06.data
new file mode 100644
index 000000000000..edc0cbba9004
--- /dev/null
+++ b/test/YAMLParser/spec-09-06.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+ 'here''s to "quotes"'
diff --git a/test/YAMLParser/spec-09-07.data b/test/YAMLParser/spec-09-07.data
new file mode 100644
index 000000000000..3c010ca5b93b
--- /dev/null
+++ b/test/YAMLParser/spec-09-07.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+'simple key' : {
+  'also simple' : value,
+  ? 'not a
+  simple key' : 'any
+  value'
+}
diff --git a/test/YAMLParser/spec-09-08.data b/test/YAMLParser/spec-09-08.data
new file mode 100644
index 000000000000..d114e58fcac1
--- /dev/null
+++ b/test/YAMLParser/spec-09-08.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+ 'as space	 trimmed  specific  none'
diff --git a/test/YAMLParser/spec-09-09.data b/test/YAMLParser/spec-09-09.data
new file mode 100644
index 000000000000..2fec1b536ef1
--- /dev/null
+++ b/test/YAMLParser/spec-09-09.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- '
+  last'
+- ' 	
+  last'
+- ' 	first
+  last'
diff --git a/test/YAMLParser/spec-09-10.data b/test/YAMLParser/spec-09-10.data
new file mode 100644
index 000000000000..faabfb06b5ec
--- /dev/null
+++ b/test/YAMLParser/spec-09-10.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+ 'first
+ 	inner	
+ last'
diff --git a/test/YAMLParser/spec-09-11.data b/test/YAMLParser/spec-09-11.data
new file mode 100644
index 000000000000..3f487ad6b043
--- /dev/null
+++ b/test/YAMLParser/spec-09-11.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- 'first
+  	'
+- 'first
+
+  	last'
diff --git a/test/YAMLParser/spec-09-12.data b/test/YAMLParser/spec-09-12.data
new file mode 100644
index 000000000000..d992c589cd69
--- /dev/null
+++ b/test/YAMLParser/spec-09-12.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+# Outside flow collection:
+- ::std::vector
+- Up, up, and away!
+- -123
+# Inside flow collection:
+- [ '::std::vector',
+  "Up, up, and away!",
+  -123 ]
diff --git a/test/YAMLParser/spec-09-13.data b/test/YAMLParser/spec-09-13.data
new file mode 100644
index 000000000000..d48f2d2c47ee
--- /dev/null
+++ b/test/YAMLParser/spec-09-13.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+simple key : {
+  also simple : value,
+  ? not a
+  simple key : any
+  value
+}
diff --git a/test/YAMLParser/spec-09-14.data b/test/YAMLParser/spec-09-14.data
new file mode 100644
index 000000000000..890f6bf2e718
--- /dev/null
+++ b/test/YAMLParser/spec-09-14.data
@@ -0,0 +1,21 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# Not quite sure why this doesn't fail.
+# XFAIL: *
+
+---
+--- ||| : foo
+... >>>: bar
+---
+[
+---
+,
+... ,
+{
+--- :
+... # Nested
+}
+]
+...
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-09-15.data b/test/YAMLParser/spec-09-15.data
new file mode 100644
index 000000000000..4111d1ba2cb1
--- /dev/null
+++ b/test/YAMLParser/spec-09-15.data
@@ -0,0 +1,15 @@
+# RUN: yaml-bench -canonical %s
+
+---
+"---" : foo
+...: bar
+---
+[
+---,
+...,
+{
+? ---
+: ...
+}
+]
+...
diff --git a/test/YAMLParser/spec-09-16.data b/test/YAMLParser/spec-09-16.data
new file mode 100644
index 000000000000..e595f47bece9
--- /dev/null
+++ b/test/YAMLParser/spec-09-16.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+# Tabs are confusing:
+# as space/trimmed/specific/none
+ as space  trimmed  specific  none
diff --git a/test/YAMLParser/spec-09-17.data b/test/YAMLParser/spec-09-17.data
new file mode 100644
index 000000000000..1bacf4d68b1f
--- /dev/null
+++ b/test/YAMLParser/spec-09-17.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+ first line 
+   
+  more line
diff --git a/test/YAMLParser/spec-09-18.data b/test/YAMLParser/spec-09-18.data
new file mode 100644
index 000000000000..ac623f9973f7
--- /dev/null
+++ b/test/YAMLParser/spec-09-18.data
@@ -0,0 +1,11 @@
+# RUN: yaml-bench -canonical %s
+
+- | # Just the style
+ literal
+- >1 # Indentation indicator
+  folded
+- |+ # Chomping indicator
+ keep
+
+- >-1 # Both indicators
+  strip
diff --git a/test/YAMLParser/spec-09-19.data b/test/YAMLParser/spec-09-19.data
new file mode 100644
index 000000000000..52aa157137b2
--- /dev/null
+++ b/test/YAMLParser/spec-09-19.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+- |
+ literal
+- >
+ folded
diff --git a/test/YAMLParser/spec-09-20.data b/test/YAMLParser/spec-09-20.data
new file mode 100644
index 000000000000..86fc7ab9a2ed
--- /dev/null
+++ b/test/YAMLParser/spec-09-20.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+- |
+ detected
+- >
+ 
+  
+  # detected
+- |1
+  explicit
+- >
+ 	
+ detected
diff --git a/test/YAMLParser/spec-09-21.data b/test/YAMLParser/spec-09-21.data
new file mode 100644
index 000000000000..2bcc28337f9f
--- /dev/null
+++ b/test/YAMLParser/spec-09-21.data
@@ -0,0 +1,12 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+
+- |
+
+ text
+- >
+  text
+ text
+- |1
+ text
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-09-22.data b/test/YAMLParser/spec-09-22.data
new file mode 100644
index 000000000000..b95faa50b5d0
--- /dev/null
+++ b/test/YAMLParser/spec-09-22.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+strip: |-
+  text clip: |
+  textkeep: |+
+  text 
diff --git a/test/YAMLParser/spec-09-23.data b/test/YAMLParser/spec-09-23.data
new file mode 100644
index 000000000000..94f839818b6c
--- /dev/null
+++ b/test/YAMLParser/spec-09-23.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s
+
+ # Strip
+  # Comments:
+strip: |-
+  # text     # Clip
+  # comments:
+clip: |
+  # text   # Keep
+  # comments:
+keep: |+
+  # text  # Trail
+  # comments.
diff --git a/test/YAMLParser/spec-09-24.data b/test/YAMLParser/spec-09-24.data
new file mode 100644
index 000000000000..f08eae6a80ea
--- /dev/null
+++ b/test/YAMLParser/spec-09-24.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+strip: >-
+
+clip: >
+
+keep: |+
+
diff --git a/test/YAMLParser/spec-09-25.data b/test/YAMLParser/spec-09-25.data
new file mode 100644
index 000000000000..b15edb523d2f
--- /dev/null
+++ b/test/YAMLParser/spec-09-25.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+| # Simple block scalar
+ literal
+ 	text
diff --git a/test/YAMLParser/spec-09-26.data b/test/YAMLParser/spec-09-26.data
new file mode 100644
index 000000000000..286740ed39cc
--- /dev/null
+++ b/test/YAMLParser/spec-09-26.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+ 
+  
+  literal
+ 
+  text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-27.data b/test/YAMLParser/spec-09-27.data
new file mode 100644
index 000000000000..286740ed39cc
--- /dev/null
+++ b/test/YAMLParser/spec-09-27.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+ 
+  
+  literal
+ 
+  text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-28.data b/test/YAMLParser/spec-09-28.data
new file mode 100644
index 000000000000..286740ed39cc
--- /dev/null
+++ b/test/YAMLParser/spec-09-28.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+|
+ 
+  
+  literal
+ 
+  text
+
+ # Comment
diff --git a/test/YAMLParser/spec-09-29.data b/test/YAMLParser/spec-09-29.data
new file mode 100644
index 000000000000..e8906ff64a15
--- /dev/null
+++ b/test/YAMLParser/spec-09-29.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+> # Simple folded scalar
+ folded
+ text
+ 	lines
diff --git a/test/YAMLParser/spec-09-30.data b/test/YAMLParser/spec-09-30.data
new file mode 100644
index 000000000000..a2d8bf495043
--- /dev/null
+++ b/test/YAMLParser/spec-09-30.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-31.data b/test/YAMLParser/spec-09-31.data
new file mode 100644
index 000000000000..a2d8bf495043
--- /dev/null
+++ b/test/YAMLParser/spec-09-31.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-32.data b/test/YAMLParser/spec-09-32.data
new file mode 100644
index 000000000000..a2d8bf495043
--- /dev/null
+++ b/test/YAMLParser/spec-09-32.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-09-33.data b/test/YAMLParser/spec-09-33.data
new file mode 100644
index 000000000000..a2d8bf495043
--- /dev/null
+++ b/test/YAMLParser/spec-09-33.data
@@ -0,0 +1,16 @@
+# RUN: yaml-bench -canonical %s
+
+>
+ folded
+ line
+
+ next
+ line
+
+   * bullet
+   * list
+
+ last
+ line
+
+# Comment
diff --git a/test/YAMLParser/spec-10-01.data b/test/YAMLParser/spec-10-01.data
new file mode 100644
index 000000000000..549a54db42fd
--- /dev/null
+++ b/test/YAMLParser/spec-10-01.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- [ inner, inner, ]
+- [inner,last]
diff --git a/test/YAMLParser/spec-10-02.data b/test/YAMLParser/spec-10-02.data
new file mode 100644
index 000000000000..662427a0c066
--- /dev/null
+++ b/test/YAMLParser/spec-10-02.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+[
+"double
+ quoted", 'single
+           quoted',
+plain
+ text, [ nested ],
+single: pair ,
+]
diff --git a/test/YAMLParser/spec-10-03.data b/test/YAMLParser/spec-10-03.data
new file mode 100644
index 000000000000..43f300e40c35
--- /dev/null
+++ b/test/YAMLParser/spec-10-03.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+block: # Block
+       # sequence
+- one
+- two : three
diff --git a/test/YAMLParser/spec-10-04.data b/test/YAMLParser/spec-10-04.data
new file mode 100644
index 000000000000..733a570efe8a
--- /dev/null
+++ b/test/YAMLParser/spec-10-04.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+block:
+- one
+-
+ - two
diff --git a/test/YAMLParser/spec-10-05.data b/test/YAMLParser/spec-10-05.data
new file mode 100644
index 000000000000..3848b2a20060
--- /dev/null
+++ b/test/YAMLParser/spec-10-05.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+- # Empty
+- |
+ block node
+- - one # in-line
+  - two # sequence
+- one: two # in-line
+           # mapping
diff --git a/test/YAMLParser/spec-10-06.data b/test/YAMLParser/spec-10-06.data
new file mode 100644
index 000000000000..40efb2b916ca
--- /dev/null
+++ b/test/YAMLParser/spec-10-06.data
@@ -0,0 +1,4 @@
+# RUN: yaml-bench -canonical %s
+
+- { inner : entry , also: inner , }
+- {inner: entry,last : entry}
diff --git a/test/YAMLParser/spec-10-07.data b/test/YAMLParser/spec-10-07.data
new file mode 100644
index 000000000000..7aa350e40bb5
--- /dev/null
+++ b/test/YAMLParser/spec-10-07.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+{
+? : value, # Empty key
+? explicit
+ key: value,
+simple key : value,
+[ collection, simple, key ]: value
+}
diff --git a/test/YAMLParser/spec-10-08.data b/test/YAMLParser/spec-10-08.data
new file mode 100644
index 000000000000..5b981e983392
--- /dev/null
+++ b/test/YAMLParser/spec-10-08.data
@@ -0,0 +1,13 @@
+# RUN: yaml-bench -canonical %s |& FileCheck %s
+#
+# This fails because even without a key token, some contexts (in this case flow
+# maps) allow implicit null keys, which mix with this in weird ways.
+# XFAIL: *
+
+{
+multi-line
+ simple key : value,
+very long ...................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................(>1KB)................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................... key: value
+}
+
+# CHECK: error
diff --git a/test/YAMLParser/spec-10-09.data b/test/YAMLParser/spec-10-09.data
new file mode 100644
index 000000000000..a6b1fd00dde3
--- /dev/null
+++ b/test/YAMLParser/spec-10-09.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+{
+key : value,
+empty: # empty value↓
+}
diff --git a/test/YAMLParser/spec-10-10.data b/test/YAMLParser/spec-10-10.data
new file mode 100644
index 000000000000..c97901ddfbe0
--- /dev/null
+++ b/test/YAMLParser/spec-10-10.data
@@ -0,0 +1,10 @@
+# RUN: yaml-bench -canonical %s
+
+{
+? explicit key1 : explicit value,
+? explicit key2 : , # Explicit empty
+? explicit key3,     # Empty value
+simple key1 : explicit value,
+simple key2 : ,     # Explicit empty
+simple key3,         # Empty value
+}
diff --git a/test/YAMLParser/spec-10-11.data b/test/YAMLParser/spec-10-11.data
new file mode 100644
index 000000000000..51bd06f02025
--- /dev/null
+++ b/test/YAMLParser/spec-10-11.data
@@ -0,0 +1,9 @@
+# RUN: yaml-bench -canonical %s
+
+[
+? explicit key1 : explicit value,
+? explicit key2 : , # Explicit empty
+? explicit key3,     # Implicit empty
+simple key1 : explicit value,
+simple key2 : ,     # Explicit empty
+]
diff --git a/test/YAMLParser/spec-10-12.data b/test/YAMLParser/spec-10-12.data
new file mode 100644
index 000000000000..65a90b3f2c5a
--- /dev/null
+++ b/test/YAMLParser/spec-10-12.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+block: # Block
+    # mapping
+ key: value
diff --git a/test/YAMLParser/spec-10-13.data b/test/YAMLParser/spec-10-13.data
new file mode 100644
index 000000000000..ccadeb1e7d5f
--- /dev/null
+++ b/test/YAMLParser/spec-10-13.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+? explicit key # implicit value
+? |
+  block key
+: - one # explicit in-line
+  - two # block value
diff --git a/test/YAMLParser/spec-10-14.data b/test/YAMLParser/spec-10-14.data
new file mode 100644
index 000000000000..866ec1f7b2c3
--- /dev/null
+++ b/test/YAMLParser/spec-10-14.data
@@ -0,0 +1,6 @@
+# RUN: yaml-bench -canonical %s
+
+plain key: # empty value
+"quoted key":
+- one # explicit next-line
+- two # block value
diff --git a/test/YAMLParser/spec-10-15.data b/test/YAMLParser/spec-10-15.data
new file mode 100644
index 000000000000..7d061bddd193
--- /dev/null
+++ b/test/YAMLParser/spec-10-15.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- sun: yellow
+- ? earth: blue
+  : moon: white
diff --git a/test/YAMLParser/str.data b/test/YAMLParser/str.data
new file mode 100644
index 000000000000..bf013b6f52c5
--- /dev/null
+++ b/test/YAMLParser/str.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- abcd
diff --git a/test/YAMLParser/timestamp-bugs.data b/test/YAMLParser/timestamp-bugs.data
new file mode 100644
index 000000000000..bf41a21b22d9
--- /dev/null
+++ b/test/YAMLParser/timestamp-bugs.data
@@ -0,0 +1,8 @@
+# RUN: yaml-bench -canonical %s
+
+- 2001-12-14 21:59:43.10 -5:30
+- 2001-12-14 21:59:43.10 +5:30
+- 2001-12-14 21:59:43.00101
+- 2001-12-14 21:59:43+1
+- 2001-12-14 21:59:43-1:30
+- 2005-07-08 17:35:04.517600
diff --git a/test/YAMLParser/timestamp.data b/test/YAMLParser/timestamp.data
new file mode 100644
index 000000000000..79945451b54e
--- /dev/null
+++ b/test/YAMLParser/timestamp.data
@@ -0,0 +1,7 @@
+# RUN: yaml-bench -canonical %s
+
+- 2001-12-15T02:59:43.1Z
+- 2001-12-14t21:59:43.10-05:00
+- 2001-12-14 21:59:43.10 -5
+- 2001-12-15 2:59:43.10
+- 2002-12-14
diff --git a/test/YAMLParser/utf8-implicit.data b/test/YAMLParser/utf8-implicit.data
new file mode 100644
index 000000000000..ee2791fb062e
--- /dev/null
+++ b/test/YAMLParser/utf8-implicit.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- implicit UTF-8
diff --git a/test/YAMLParser/utf8.data b/test/YAMLParser/utf8.data
new file mode 100644
index 000000000000..3935e9d12179
--- /dev/null
+++ b/test/YAMLParser/utf8.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+--- UTF-8
diff --git a/test/YAMLParser/value.data b/test/YAMLParser/value.data
new file mode 100644
index 000000000000..311ccd4f22e9
--- /dev/null
+++ b/test/YAMLParser/value.data
@@ -0,0 +1,3 @@
+# RUN: yaml-bench -canonical %s
+
+- =
diff --git a/test/YAMLParser/yaml.data b/test/YAMLParser/yaml.data
new file mode 100644
index 000000000000..3ce5e4b73e28
--- /dev/null
+++ b/test/YAMLParser/yaml.data
@@ -0,0 +1,5 @@
+# RUN: yaml-bench -canonical %s
+
+- !!yaml '!'
+- !!yaml '&'
+- !!yaml '*'
diff --git a/test/lib/llvm.exp b/test/lib/llvm.exp
deleted file mode 100644
index 89be85cda842..000000000000
--- a/test/lib/llvm.exp
+++ /dev/null
@@ -1,285 +0,0 @@
-# This procedure executes one line of a test case's execution script.
-proc execOneLine { test PRS outcome lineno line } {
-  set status 0
-  set resultmsg ""
-  set retval [ catch { eval exec -keepnewline -- $line } errmsg ]
-  if { $retval != 0 } {
-    set code [lindex $::errorCode 0]
-    set lineno [expr $lineno + 1]
-    if { $PRS != ""} {
-      set PRS " for $PRS"
-    }
-    set errmsg " at line $lineno\nwhile running: $line\n$errmsg"
-    switch "$code" {
-      CHILDSTATUS {
-        set status [lindex $::errorCode 2]
-        if { $status != 0 } {
-          set resultmsg "$test$PRS\nFailed with exit($status)$errmsg"
-        }
-      }
-      CHILDKILLED {
-        set signal [lindex $::errorCode 2]
-        set resultmsg "$test$PRS\nFailed with signal($signal)$errmsg"
-      }
-      CHILDSUSP {
-        set signal [lindex $::errorCode 2]
-        set resultmsg "$test$PRS\nFailed with suspend($signal)$errmsg"
-      }
-      POSIX {
-        set posixNum [lindex $::errorCode 1]
-        set posixMsg [lindex $::errorCode 2]
-        set resultmsg "$test$PRS\nFailed with posix($posixNum,$posixMsg)$errmsg"
-      }
-      NONE {
-        # Any other error such as stderr output of a program, or syntax error in
-        # the RUN line.
-        set resultmsg "$test$PRS\nFailed with unknown error (or has stderr output)$errmsg"
-      }
-      default {
-        set resultmsg "$test$PRS\nFailed with unknown error$errmsg"
-      }
-    }
-  }
-  return $resultmsg
-}
-
-# This procedure performs variable substitutions on the RUN: lines of a test
-# cases.
-proc substitute { line test tmpFile } {
-  global srcroot objroot srcdir objdir subdir target_triplet
-  global llvmgcc llvmgxx emitir ocamlopt
-  global gccpath gxxpath compile_c compile_cxx link shlibext llvmlibsdir
-  global llvmdsymutil valgrind grep gas bugpoint_topts
-  set path [file join $srcdir $subdir]
-
-  # Substitute all Tcl variables.
-  set new_line [subst $line ]
-
-  #replace %% with _#MARKER#_ to make the replacement of %% more predictable
-  regsub -all {%%} $new_line {_#MARKER#_} new_line
-  #replace %llvmgcc_only with actual path to llvmgcc
-  regsub -all {%llvmgcc_only} $new_line "$llvmgcc" new_line
-  #replace %llvmgcc with actual path to llvmgcc
-  regsub -all {%llvmgcc} $new_line "$llvmgcc $emitir -w" new_line
-  #replace %llvmgxx with actual path to llvmg++
-  regsub -all {%llvmgxx} $new_line "$llvmgxx $emitir -w" new_line
-  #replace %compile_cxx with C++ compilation command
-  regsub -all {%compile_cxx} $new_line "$compile_cxx" new_line
-  #replace %compile_c with C compilation command
-  regsub -all {%compile_c} $new_line "$compile_c" new_line
-  #replace %link with C++ link command
-  regsub -all {%link} $new_line "$link" new_line
-  #replace %shlibext with shared library extension
-  regsub -all {%shlibext} $new_line "$shlibext" new_line
-  #replace %ocamlopt with ocaml compiler command
-  regsub -all {%ocamlopt} $new_line "$ocamlopt" new_line
-  #replace %llvmdsymutil with dsymutil command
-  regsub -all {%llvmdsymutil} $new_line "$llvmdsymutil" new_line
-  #replace %llvmlibsdir with configure library directory
-  regsub -all {%llvmlibsdir} $new_line "$llvmlibsdir" new_line
-  #replace %bugpoint_topts with actual bugpoint target options
-  regsub -all {%bugpoint_topts} $new_line "$bugpoint_topts" new_line
-  #replace %p with path to source,
-  regsub -all {%p} $new_line [file join $srcdir $subdir] new_line
-  #replace %s with filename
-  regsub -all {%s} $new_line $test new_line
-  #replace %t with temp filenames
-  regsub -all {%t} $new_line $tmpFile new_line
-  #replace %abs_tmp with absolute temp filenames
-  regsub -all {%abs_tmp} $new_line [file join [pwd] $tmpFile] new_line
-  #replace _#MARKER#_ with %
-  regsub -all {_#MARKER#_} $new_line % new_line
-
-  #replace grep with GNU grep
-  regsub -all { grep } $new_line " $grep " new_line
-  #replace as with GNU as
-  regsub -all {\| as } $new_line "| $gas " new_line
-
-  #valgind related stuff
-# regsub -all {bugpoint } $new_line "$valgrind bugpoint " new_line
-  regsub -all {llc } $new_line "$valgrind llc " new_line
-  regsub -all {lli } $new_line "$valgrind lli " new_line
-  regsub -all {llvm-ar } $new_line "$valgrind llvm-ar " new_line
-  regsub -all {llvm-as } $new_line "$valgrind llvm-as " new_line
-  regsub -all {llvm-bcanalyzer } $new_line "$valgrind llvm-bcanalyzer " new_line
-  regsub -all {llvm-dis } $new_line "$valgrind llvm-dis " new_line
-  regsub -all {llvm-extract } $new_line "$valgrind llvm-extract " new_line
-  regsub -all {llvm-ld } $new_line "$valgrind llvm-ld " new_line
-  regsub -all {llvm-link } $new_line "$valgrind llvm-link " new_line
-  regsub -all {llvm-nm } $new_line "$valgrind llvm-nm " new_line
-  regsub -all {llvm-prof } $new_line "$valgrind llvm-prof " new_line
-  regsub -all {llvm-ranlib } $new_line "$valgrind llvm-ranlib " new_line
-  regsub -all {([^a-zA-Z_-])opt } $new_line "\\1$valgrind opt " new_line
-  regsub -all {^opt } $new_line "$valgrind opt " new_line
-  regsub -all {llvm-tblgen } $new_line "$valgrind llvm-tblgen " new_line
-  regsub -all "not $valgrind " $new_line "$valgrind not " new_line
-
-  return $new_line
-}
-
-# This procedure runs the set of tests for the test_source_files array.
-proc RunLLVMTests { test_source_files } {
-  global srcroot objroot srcdir objdir subdir target_triplet
-  set timeout 60
-
-  set path [file join $objdir $subdir]
-
-  #Make Output Directory if it does not exist already
-  if { [file exists path] } {
-    cd $path
-  } else {
-    file mkdir $path
-    cd $path
-  }
-
-  file mkdir Output
-  cd Output
-
-  foreach test $test_source_files {
-    #Should figure out best way to set the timeout
-    #set timeout 40
-
-    set filename [file tail $test]
-    verbose "ABOUT TO RUN: $filename" 2
-    set outcome PASS
-    set tmpFile "$filename.tmp"
-
-    # Mark that it should not be XFAIL for this target.
-    set targetPASS 0
-
-    #set hasRunline bool to check if testcase has a runline
-    set numLines 0
-
-    # Open the test file and start reading lines
-    set testFileId [ open $test r]
-    set runline ""
-    set PRNUMS ""
-    foreach line [split [read $testFileId] \n] {
-
-      # if its the END. line then stop parsing (optimization for big files)
-      if {[regexp {END.[[:space:]]*$} $line match endofscript]} {
-        break
-
-      # if the line is continued, concatenate and continue the loop
-      } elseif {[regexp {RUN: *(.+)(\\)$} $line match oneline suffix]} {
-        set runline "$runline$oneline "
-
-      # if its a terminating RUN: line then do substitution on the whole line
-      # and then save the line.
-      } elseif {[regexp {RUN: *(.+)$} $line match oneline suffix]} {
-        set runline "$runline$oneline"
-        set runline [ substitute $runline $test $tmpFile ]
-        set lines($numLines) $runline
-        set numLines [expr $numLines + 1]
-        set runline ""
-
-      # if its an PR line, save the problem report number
-      } elseif {[regexp {PR([0-9]+)} $line match prnum]} {
-        if {$PRNUMS == ""} {
-          set PRNUMS "PR$prnum"
-        } else {
-          set PRNUMS "$PRNUMS,$prnum"
-        }
-      # if its an XFAIL line, see if we should be XFAILing or not.
-      } elseif {[regexp {XFAIL:[ *](.+)} $line match targets]} {
-        set targets
-
-        #split up target if more then 1 specified
-        foreach target [split $targets ,] {
-          if { $target == "*" } {
-              if {$targetPASS != 1} {
-                 set outcome XFAIL
-              }
-          } elseif { [regexp $target $target_triplet match] } {
-              if {$targetPASS != 1} {
-                 set outcome XFAIL
-              }
-          }
-        }
-      } elseif {[regexp {XTARGET:[ *](.+)} $line match targets]} {
-        set targets
-
-        #split up target if more then 1 specified
-        foreach target [split $targets ,] {
-          if { [regexp {\*} $target match] } {
-              set targetPASS 1
-              set outcome PASS
-          } elseif { [regexp $target $target_triplet match] } {
-              set targetPASS 1
-              set outcome PASS
-          }
-        }
-      }
-    }
-
-    # Done reading the script
-    close $testFileId
-
-
-    if { $numLines == 0 } {
-      fail "$test: \nDoes not have a RUN line\n"
-    } else {
-      set failed 0
-      for { set i 0 } { $i < $numLines } { set i [ expr $i + 1 ] } {
-        regsub ^.*RUN:(.*) $lines($i) \1 theLine
-        set resultmsg [execOneLine $test $PRNUMS $outcome $i $theLine ]
-        if { $resultmsg != "" } {
-          if { $outcome == "XFAIL" } {
-            xfail "$resultmsg"
-          } else {
-            fail "$resultmsg"
-          }
-          set failed 1
-          break
-        }
-      }
-      if { $failed } {
-        continue
-      } else {
-        if { $PRNUMS != "" } {
-          set PRNUMS " for $PRNUMS"
-        }
-        if { $outcome == "XFAIL" } {
-          xpass "$test$PRNUMS"
-        } else {
-          pass "$test$PRNUMS"
-        }
-      }
-    }
-  }
-}
-
-# This procedure provides an interface to check the TARGETS_TO_BUILD makefile
-# variable to see if a particular target has been configured to build. This
-# helps avoid running tests for targets that aren't available.
-proc llvm_supports_target { tgtName } {
-  global TARGETS_TO_BUILD
-  foreach target [split $TARGETS_TO_BUILD] {
-    if { [regexp $tgtName $target match] } {
-      return 1
-    }
-  }
-  return 0
-}
-
-proc llvm_supports_darwin_and_target { tgtName } {
-  global target_triplet
-  if { [ llvm_supports_target $tgtName ] } {
-    if { [regexp darwin $target_triplet match] } {
-      return 1
-    }
-  }
-  return 0
-}
-
-# This procedure provides an interface to check the BINDINGS_TO_BUILD makefile
-# variable to see if a particular binding has been configured to build.
-proc llvm_supports_binding { name } {
-  global llvm_bindings
-  foreach item [split $llvm_bindings] {
-    if { [regexp $name $item match] } {
-      return 1
-    }
-  }
-  return 0
-}
diff --git a/test/lib/llvm2cpp.exp b/test/lib/llvm2cpp.exp
deleted file mode 100644
index f4530338ee23..000000000000
--- a/test/lib/llvm2cpp.exp
+++ /dev/null
@@ -1,100 +0,0 @@
-# This file defines a tcl proc to assist with testing the llvm2cpp. There are
-# no llvm2cpp specific test cases. Instead, it utilizes all the existing test
-# cases and makes sure llvm2cpp can run them. The basic idea is that we find
-# all the LLVM Assembly (*.ll) files, run llvm2cpp on them to generate a C++
-# program, compile those programs, run them and see if what they produce matches
-# the original input to llvm2cpp.
-
-proc llvm2cpp-test { files } {
-  global subdir llvmtoolsdir llvmlibsdir objdir srcdir objroot srcroot 
-  set timeout 30
-  set path [file join $objdir $subdir]
-  set llc [file join $llvmtoolsdir llc ]
-  set llvmas [file join $llvmtoolsdir llvm-as ]
-  set llvmdis [file join $llvmtoolsdir llvm-dis ]
-
-  #Make Output Directory if it does not exist already
-  if { [file exists path] } {
-      cd $path
-  } else {
-      file mkdir $path
-      cd $path
-  }
-  
-  file mkdir Output
-
-  foreach test $files {
-      
-    set filename [file tail $test]
-    set generated [file join Output $filename.cpp]
-    set executable [file join Output $filename.exe]
-    set output [file join Output $filename.gen]
-    set assembly [file join Output $filename.asm]
-    set testname [file rootname $filename]
-    set bytecode [file join Output $filename.bc]
-
-    # Note that the stderr for llvm-as, etc. must be redirected to /dev/null 
-    # because otherwise exec will see the msgs and return 1 even though they 
-    # are only warnings. If real errors are generated on stderr then llvm-as 
-    # will return a non-zero retval anyway so we're good.
-
-    # Scan the test file to see if there's an XFAIL file. If so, don't run it
-    set retval [ catch { 
-      exec -keepnewline grep XFAIL $test 2>/dev/null } msg ]
-    if { $retval == 0 } {
-      continue;
-    }
-
-    # Run llvm-as/llvm-dis
-    set pipeline llvm-as|llvm-dis
-    set retval [ catch { 
-      exec -keepnewline $llvmas < $test -o - | $llvmdis -o $assembly 2>/dev/null } msg ]
-
-    if { $retval != 0 } {
-      fail "$test: $pipeline returned $retval\n$msg"
-      continue 
-    }
-
-    # Build bytecode for llvm2cpp input
-    set retval [ catch { 
-      exec -keepnewline $llvmas < $assembly > $bytecode 2>/dev/null } msg ]
-
-    if { $retval != 0 } {
-      fail "$test: llvm-as returned $retval\n$msg"
-      continue 
-    }
-
-    set retval [ catch { 
-      exec -keepnewline $llc -march=cpp -o $generated < $bytecode 2>/dev/null } msg]
-
-    if { $retval != 0 } {
-      fail "$test: llvm2cpp returned $retval\n$msg"
-      continue
-    }
-
-    set retval [ catch { 
-      exec -keepnewline gcc -g -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -o $executable $generated -I$srcroot/include -I$objroot/include -L$llvmlibsdir -lLLVMCore -lLLVMSupport -lLLVMSystem -lstdc++ } msg ] 
-    if { $retval != 0 } {
-      fail "$test: gcc returned $retval\n$msg"
-      continue
-    }
-
-    set retval [ catch { exec -keepnewline $executable > $output } msg ]
-    if { $retval != 0 } {
-      set execname [file tail $executable]
-      fail "$test: $execname returned $retval:\n$msg"
-      continue
-    } 
-
-    set retval [ catch { 
-      exec -keepnewline diff $assembly $output } msg ]
-
-    if { $retval != 0 } {
-      fail "$test: diff returned $retval:\n$msg"
-      continue
-    }
-    pass "$test"
-  }
-}
-
-
diff --git a/test/lit.cfg b/test/lit.cfg
index 91abb636f479..c58935956a4f 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -42,18 +42,6 @@ if llvm_obj_root is not None:
 # Tweak the PATH to include the scripts dir, the tools dir, and the llvm-gcc bin
 # dir (if available).
 if llvm_obj_root is not None:
-    # Include llvm-gcc first, as the llvm-gcc binaryies will not appear
-    # neither in the tools nor in the scripts dir. However it might be
-    # possible, that some old llvm tools are in the llvm-gcc dir. Adding
-    # llvm-gcc dir first ensures, that those will always be overwritten
-    # by the new tools in llvm_tools_dir. So now outdated tools are used
-      # for testing
-    llvmgcc_dir = getattr(config, 'llvmgcc_dir', None)
-    if llvmgcc_dir:
-        path = os.path.pathsep.join((os.path.join(llvmgcc_dir, 'bin'),
-                                     config.environment['PATH']))
-        config.environment['PATH'] = path
-
     llvm_src_root = getattr(config, 'llvm_src_root', None)
     if not llvm_src_root:
         lit.fatal('No LLVM source root set!')
@@ -153,22 +141,32 @@ for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
     if m:
         site_exp[m.group(1)] = m.group(2)
 
+# Provide target_triple for use in XFAIL and XTARGET.
+config.target_triple = site_exp['target_triplet']
+
+# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
+# triple so we can check it with XFAIL and XTARGET.
+config.target_triple += lit.valgrindTriple
+
+# Process jit implementation option
+jit_impl_cfg = lit.params.get('jit_impl', None)
+if jit_impl_cfg == 'mcjit':
+  # When running with mcjit, mangle -mcjit into target triple
+  # and add -use-mcjit flag to lli invocation
+  if 'i686' in config.target_triple:
+    config.target_triple += jit_impl_cfg + '-ia32'
+  elif 'x86_64' in config.target_triple:
+    config.target_triple += jit_impl_cfg + '-ia64'
+  else:
+    config.target_triple += jit_impl_cfg
+
+  config.substitutions.append( ('%lli', 'lli -use-mcjit') )
+else:
+  config.substitutions.append( ('%lli', 'lli') )
+
 # Add substitutions.
-config.substitutions.append(('%llvmgcc_only', site_exp['llvmgcc']))
-for sub in ['llvmgcc', 'llvmgxx', 'emitir', 'compile_cxx', 'compile_c',
-            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
-            'llvmshlibdir',
-            'bugpoint_topts']:
-    if sub in ('llvmgcc', 'llvmgxx'):
-        config.substitutions.append(('%' + sub,
-                                     site_exp[sub] + ' %emitir -w'))
-    # FIXME: This is a hack to avoid LLVMC tests failing due to a clang driver
-    #        warning when passing in "-fexceptions -fno-exceptions".
-    elif sub == 'compile_cxx':
-        config.substitutions.append(('%' + sub,
-                                  site_exp[sub].replace('-fno-exceptions', '')))
-    else:
-        config.substitutions.append(('%' + sub, site_exp[sub]))
+for sub in ['link', 'shlibext', 'ocamlopt', 'llvmshlibdir']:
+    config.substitutions.append(('%' + sub, site_exp[sub]))
 
 # For each occurrence of an llvm tool name as its own word, replace it
 # with the full path to the build directory holding that tool.  This
@@ -187,12 +185,14 @@ for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
                 r"\bllc\b",             r"\blli\b",
                 r"\bllvm-ar\b",         r"\bllvm-as\b",
                 r"\bllvm-bcanalyzer\b", r"\bllvm-config\b",
-                r"\bllvm-diff\b",       r"\bllvm-dis\b",
+                r"\bllvm-cov\b",        r"\bllvm-diff\b",
+                r"\bllvm-dis\b",        r"\bllvm-dwarfdump\b",
                 r"\bllvm-extract\b",    r"\bllvm-ld\b",
                 r"\bllvm-link\b",       r"\bllvm-mc\b",
-                r"\bllvm-nm\b",         r"\bllvm-prof\b",
-                r"\bllvm-ranlib\b",     r"\bllvm-shlib\b",
-                r"\bllvm-stub\b",       r"\bllvm2cpp\b",
+                r"\bllvm-nm\b",         r"\bllvm-objdump\b",
+                r"\bllvm-prof\b",       r"\bllvm-ranlib\b",
+                r"\bllvm-rtdyld\b",     r"\bllvm-shlib\b",
+                r"\bllvm-size\b",       r"\bllvm-stub\b",
                 # Don't match '-llvmc'.
                 r"(?<!-)\bllvmc\b",     r"\blto\b",
                                         # Don't match '.opt', '-opt',
@@ -218,78 +218,6 @@ for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
              break
     config.substitutions.append((pattern, substitution))
 
-excludes = []
-
-# Provide target_triple for use in XFAIL and XTARGET.
-config.target_triple = site_exp['target_triplet']
-
-# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
-# triple so we can check it with XFAIL and XTARGET.
-config.target_triple += lit.valgrindTriple
-
-# Provide llvm_supports_target for use in local configs.
-targets = set(site_exp["TARGETS_TO_BUILD"].split())
-def llvm_supports_target(name):
-    return name in targets
-
-def llvm_supports_darwin_and_target(name):
-    return 'darwin' in config.target_triple and llvm_supports_target(name)
-
-bindings = set([s.strip() for s in site_exp['llvm_bindings'].split(',')])
-def llvm_supports_binding(name):
-    return name.strip() in bindings
-
-# Provide on_clone hook for reading 'dg.exp'.
-import os
-simpleLibData = re.compile(r"""load_lib llvm.exp
-
-RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
-                           re.MULTILINE)
-conditionalLibData = re.compile(r"""load_lib llvm.exp
-
-if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
- *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
-\}""", re.MULTILINE)
-def on_clone(parent, cfg, for_path):
-    def addSuffixes(match):
-        if match[0] == '{' and match[-1] == '}':
-            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
-        else:
-            cfg.suffixes = ['.' + match]
-
-    libPath = os.path.join(os.path.dirname(for_path),
-                           'dg.exp')
-    if not os.path.exists(libPath):
-        cfg.unsupported = True
-        return
-
-    # Reset unsupported, in case we inherited it.
-    cfg.unsupported = False
-    lib = open(libPath).read().strip()
-
-    # Check for a simple library.
-    m = simpleLibData.match(lib)
-    if m:
-        addSuffixes(m.group(1))
-        return
-
-    # Check for a conditional test set.
-    m = conditionalLibData.match(lib)
-    if m:
-        funcname,arg,match = m.groups()
-        addSuffixes(match)
-
-        func = globals().get(funcname)
-        if not func:
-            lit.error('unsupported predicate %r' % funcname)
-        elif not func(arg):
-            cfg.unsupported = True
-        return
-    # Otherwise, give up.
-    lit.error('unable to understand %r:\n%s' % (libPath, lib))
-
-config.on_clone = on_clone
-
 ### Features
 
 # Shell execution
@@ -306,5 +234,10 @@ else:
 if loadable_module:
     config.available_features.add('loadable_module')
 
-if config.enable_assertions:
+# llc knows whether he is compiled with -DNDEBUG.
+import subprocess
+llc_cmd = subprocess.Popen([os.path.join(llvm_tools_dir, 'llc'), '-version'],
+                           stdout = subprocess.PIPE)
+if re.search(r'with assertions', llc_cmd.stdout.read()):
     config.available_features.add('asserts')
+llc_cmd.wait()
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index fe152ef499fb..8b81186211aa 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -3,11 +3,12 @@
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
-config.llvmgcc_dir = "@LLVMGCCDIR@"
 config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
 config.python_executable = "@PYTHON_EXECUTABLE@"
 config.enable_shared = @ENABLE_SHARED@
 config.enable_assertions = @ENABLE_ASSERTIONS@
+config.targets_to_build = "@TARGETS_TO_BUILD@"
+config.llvm_bindings = "@LLVM_BINDINGS@"
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/test/site.exp.in b/test/site.exp.in
index 277d54995f6b..cfb2eac55055 100644
--- a/test/site.exp.in
+++ b/test/site.exp.in
@@ -2,27 +2,15 @@
 # Do not edit!
 set target_triplet "@TARGET_TRIPLE@"
 set TARGETS_TO_BUILD "@TARGETS_TO_BUILD@"
-set llvmgcc_langs "@LLVMGCC_LANGS@"
-set llvmtoolsdir "@LLVM_TOOLS_DIR@"
-set llvmlibsdir "@LLVM_LIBS_DIR@"
 set llvmshlibdir "@SHLIBDIR@"
 set llvm_bindings "@LLVM_BINDINGS@"
 set srcroot "@LLVM_SOURCE_DIR@"
 set objroot "@LLVM_BINARY_DIR@"
 set srcdir "@LLVM_SOURCE_DIR@"
 set objdir "@LLVM_BINARY_DIR@"
-set gccpath "@GCCPATH@"
-set gxxpath "@GXXPATH@"
-set compile_c "@TEST_COMPILE_C_CMD@"
-set compile_cxx "@TEST_COMPILE_CXX_CMD@"
 set link "@TEST_LINK_CMD@"
-set llvmgcc "@LLVMGCC@"
-set llvmgxx "@LLVMGXX@"
-set bugpoint_topts "@BUGPOINT_TOPTS@"
 set shlibext "@SHLIBEXT@"
 set ocamlopt "@OCAMLOPT@"
 set valgrind "@VALGRIND@"
 set grep "@GREP@"
 set gas "@AS@"
-set llvmdsymutil "@DSYMUTIL@"
-set emitir "@LLVMCC_EMITIR_FLAG@"
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index e66648bee1e4..9668c764c689 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -11,11 +11,8 @@ if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/polly/CMakeLists.txt )
 endif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/polly/CMakeLists.txt )
 
 if( NOT WIN32 OR MSYS OR CYGWIN )
-  # It is useful to build llvm-config before the other tools, so we
-  # have a fresh LibDeps.txt for regenerating the hard-coded library
-  # dependencies. llvm-config/CMakeLists.txt takes care of this but we
-  # must keep llvm-config as the first entry on the list of tools to
-  # be built.
+  # We currently require 'sed' to build llvm-config, so don't try to build it
+  # on pure Win32.
   add_subdirectory(llvm-config)
 endif()
 
@@ -31,6 +28,7 @@ add_subdirectory(llvm-nm)
 add_subdirectory(llvm-size)
 
 add_subdirectory(llvm-ld)
+add_subdirectory(llvm-cov)
 add_subdirectory(llvm-prof)
 add_subdirectory(llvm-link)
 add_subdirectory(lli)
@@ -39,6 +37,7 @@ add_subdirectory(llvm-extract)
 add_subdirectory(llvm-diff)
 add_subdirectory(macho-dump)
 add_subdirectory(llvm-objdump)
+add_subdirectory(llvm-readobj)
 add_subdirectory(llvm-rtdyld)
 add_subdirectory(llvm-dwarfdump)
 
@@ -46,7 +45,7 @@ add_subdirectory(bugpoint)
 add_subdirectory(bugpoint-passes)
 add_subdirectory(llvm-bcanalyzer)
 add_subdirectory(llvm-stub)
-add_subdirectory(edis)
+add_subdirectory(llvm-stress)
 
 if( NOT WIN32 )
   add_subdirectory(lto)
@@ -59,11 +58,14 @@ if( LLVM_ENABLE_PIC )
   endif()
 endif()
 
-if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
+set(LLVM_CLANG_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/clang" CACHE PATH "Path to Clang source directory")
+
+if (NOT ${LLVM_CLANG_SOURCE_DIR} STREQUAL ""
+    AND EXISTS ${LLVM_CLANG_SOURCE_DIR}/CMakeLists.txt)
   option(LLVM_BUILD_CLANG "Whether to build Clang as part of LLVM" ON)
   if (${LLVM_BUILD_CLANG})
-    add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/clang )
+    add_subdirectory(${LLVM_CLANG_SOURCE_DIR} clang)
   endif()
-endif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
+endif ()
 
 set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)
diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt
new file mode 100644
index 000000000000..aba990f52114
--- /dev/null
+++ b/tools/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./tools/LLVMBuild.txt ------------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-ld llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size llvm-stub macho-dump opt
+
+[component_0]
+type = Group
+name = Tools
+parent = $ROOT
diff --git a/tools/Makefile b/tools/Makefile
index 68ce314c92a1..8bf091a72a08 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -9,8 +9,15 @@
 
 LEVEL := ..
 
+include $(LEVEL)/Makefile.config
+
 # Build clang if present.
-OPTIONAL_PARALLEL_DIRS := clang
+
+ifneq ($(CLANG_SRC_ROOT),)
+  OPTIONAL_PARALLEL_DIRS := $(CLANG_SRC_ROOT)
+else
+  OPTIONAL_PARALLEL_DIRS := clang
+endif
 
 # Build LLDB if present. Note LLDB must be built last as it depends on the
 # wider LLVM infrastructure (including Clang).
@@ -25,9 +32,9 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
                  llvm-ld llvm-prof llvm-link \
                  lli llvm-extract llvm-mc \
                  bugpoint llvm-bcanalyzer llvm-stub \
-                 llvm-diff macho-dump llvm-objdump \
+                 llvm-diff macho-dump llvm-objdump llvm-readobj \
 	         llvm-rtdyld llvm-dwarfdump llvm-cov \
-	         llvm-size
+	         llvm-size llvm-stress
 
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS
@@ -36,9 +43,6 @@ ifdef ONLY_TOOLS
   PARALLEL_DIRS := $(filter-out lldb,$(ONLY_TOOLS))
 endif
 
-include $(LEVEL)/Makefile.config
-
-
 # These libraries build as dynamic libraries (.dylib /.so), they can only be
 # built if ENABLE_PIC is set.
 ifndef ONLY_TOOLS
@@ -52,14 +56,6 @@ ifeq ($(ENABLE_PIC),1)
   endif
 
   PARALLEL_DIRS += bugpoint-passes
-
-  # The edis library is only supported if ARM and/or X86 are enabled, and if
-  # LLVM is being built PIC on platforms that support dylibs.
-  ifneq ($(DISABLE_EDIS),1)
-    ifneq ($(filter $(TARGETS_TO_BUILD), X86 ARM),)
-      PARALLEL_DIRS += edis
-    endif
-  endif
 endif
 
 ifdef LLVM_HAS_POLLY
diff --git a/tools/bugpoint-passes/Makefile b/tools/bugpoint-passes/Makefile
index b4ad3e4ad3b0..61f96bc33859 100644
--- a/tools/bugpoint-passes/Makefile
+++ b/tools/bugpoint-passes/Makefile
@@ -7,10 +7,10 @@
 #
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-LIBRARYNAME = BugpointPasses
-LOADABLE_MODULE = 1
-USEDLIBS =
+LEVEL := ../..
+LIBRARYNAME := BugpointPasses
+LOADABLE_MODULE := 1
+USEDLIBS :=
 
 # If we don't need RTTI or EH, there's no reason to export anything
 # from this plugin.
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index 677d17887f40..6b219bf08587 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -87,7 +87,7 @@ Module *llvm::ParseInputFile(const std::string &Filename,
   SMDiagnostic Err;
   Module *Result = ParseIRFile(Filename, Err, Ctxt);
   if (!Result)
-    Err.Print("bugpoint", errs());
+    Err.print("bugpoint", errs());
 
   // If we don't have an override triple, use the first one to configure
   // bugpoint, or use the host triple if none provided.
@@ -96,7 +96,7 @@ Module *llvm::ParseInputFile(const std::string &Filename,
       Triple TheTriple(Result->getTargetTriple());
 
       if (TheTriple.getTriple().empty())
-        TheTriple.setTriple(sys::getHostTriple());
+        TheTriple.setTriple(sys::getDefaultTargetTriple());
 
       TargetTriple.setTriple(TheTriple.getTriple());
     }
diff --git a/tools/bugpoint/CMakeLists.txt b/tools/bugpoint/CMakeLists.txt
index e06feb100312..ee2235bf427e 100644
--- a/tools/bugpoint/CMakeLists.txt
+++ b/tools/bugpoint/CMakeLists.txt
@@ -1,5 +1,5 @@
 set(LLVM_LINK_COMPONENTS asmparser instrumentation scalaropts ipo
-  linker bitreader bitwriter)
+  linker bitreader bitwriter vectorize)
 
 add_llvm_tool(bugpoint
   BugDriver.cpp
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index f19ef6222f56..aed16f47e012 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -169,7 +169,7 @@ ReduceCrashingGlobalVariables::TestGlobalVariables(
   return false;
 }
 
-namespace llvm {
+namespace {
   /// ReduceCrashingFunctions reducer - This works by removing functions and
   /// seeing if the program still crashes. If it does, then keep the newer,
   /// smaller program.
@@ -401,7 +401,8 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
     for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI)
       for (BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E;) {
         Instruction *Inst = I++;
-        if (!Instructions.count(Inst) && !isa<TerminatorInst>(Inst)) {
+        if (!Instructions.count(Inst) && !isa<TerminatorInst>(Inst) &&
+            !isa<LandingPadInst>(Inst)) {
           if (!Inst->getType()->isVoidTy())
             Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
           Inst->eraseFromParent();
@@ -568,12 +569,15 @@ static bool DebugACrash(BugDriver &BD,
         for (Function::const_iterator BI = FI->begin(), E = FI->end(); BI != E;
              ++BI)
           for (BasicBlock::const_iterator I = BI->begin(), E = --BI->end();
-               I != E; ++I, ++CurInstructionNum)
+               I != E; ++I, ++CurInstructionNum) {
             if (InstructionsToSkipBeforeDeleting) {
               --InstructionsToSkipBeforeDeleting;
             } else {
               if (BugpointIsInterrupted) goto ExitLoops;
 
+              if (isa<LandingPadInst>(I))
+                continue;
+
               outs() << "Checking instruction: " << *I;
               Module *M = BD.deleteInstructionFromProgram(I, Simplification);
 
@@ -590,6 +594,7 @@ static bool DebugACrash(BugDriver &BD,
               // one.
               delete M;
             }
+          }
 
     if (InstructionsToSkipBeforeDeleting) {
       InstructionsToSkipBeforeDeleting = 0;
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index 77c01ac552b3..218a559d21d7 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -28,8 +28,7 @@ namespace {
   // for miscompilation.
   //
   enum OutputType {
-    AutoPick, RunLLI, RunJIT, RunLLC, RunLLCIA, RunCBE, CBE_bug, LLC_Safe,
-    CompileCustom, Custom
+    AutoPick, RunLLI, RunJIT, RunLLC, RunLLCIA, LLC_Safe, CompileCustom, Custom
   };
 
   cl::opt<double>
@@ -48,8 +47,6 @@ namespace {
                             clEnumValN(RunLLC, "run-llc", "Compile with LLC"),
                             clEnumValN(RunLLCIA, "run-llc-ia",
                                   "Compile with LLC with integrated assembler"),
-                            clEnumValN(RunCBE, "run-cbe", "Compile with CBE"),
-                            clEnumValN(CBE_bug,"cbe-bug", "Find CBE bugs"),
                             clEnumValN(LLC_Safe, "llc-safe", "Use LLC for all"),
                             clEnumValN(CompileCustom, "compile-custom",
                             "Use -compile-command to define a command to "
@@ -64,7 +61,6 @@ namespace {
   SafeInterpreterSel(cl::desc("Specify \"safe\" i.e. known-good backend:"),
               cl::values(clEnumValN(AutoPick, "safe-auto", "Use best guess"),
                          clEnumValN(RunLLC, "safe-run-llc", "Compile with LLC"),
-                         clEnumValN(RunCBE, "safe-run-cbe", "Compile with CBE"),
                          clEnumValN(Custom, "safe-run-custom",
                          "Use -exec-command to define a command to execute "
                          "the bitcode. Useful for cross-compilation."),
@@ -154,10 +150,6 @@ bool BugDriver::initializeExecutionEnvironment() {
 
   switch (InterpreterSel) {
   case AutoPick:
-    InterpreterSel = RunCBE;
-    Interpreter =
-      AbstractInterpreter::createCBE(getToolName(), Message, GCCBinary,
-                                     &ToolArgv, &GCCToolArgv);
     if (!Interpreter) {
       InterpreterSel = RunJIT;
       Interpreter = AbstractInterpreter::createJIT(getToolName(), Message,
@@ -195,12 +187,6 @@ bool BugDriver::initializeExecutionEnvironment() {
     Interpreter = AbstractInterpreter::createJIT(getToolName(), Message,
                                                  &ToolArgv);
     break;
-  case RunCBE:
-  case CBE_bug:
-    Interpreter = AbstractInterpreter::createCBE(getToolName(), Message,
-                                                 GCCBinary, &ToolArgv,
-                                                 &GCCToolArgv);
-    break;
   case CompileCustom:
     Interpreter =
       AbstractInterpreter::createCustomCompiler(Message, CustomCompileCommand);
@@ -209,9 +195,6 @@ bool BugDriver::initializeExecutionEnvironment() {
     Interpreter =
       AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
     break;
-  default:
-    Message = "Sorry, this back-end is not supported by bugpoint right now!\n";
-    break;
   }
   if (!Interpreter)
     errs() << Message;
@@ -224,17 +207,6 @@ bool BugDriver::initializeExecutionEnvironment() {
   std::vector<std::string> SafeToolArgs = SafeToolArgv;
   switch (SafeInterpreterSel) {
   case AutoPick:
-    // In "cbe-bug" mode, default to using LLC as the "safe" backend.
-    if (!SafeInterpreter &&
-        InterpreterSel == CBE_bug) {
-      SafeInterpreterSel = RunLLC;
-      SafeToolArgs.push_back("--relocation-model=pic");
-      SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                       GCCBinary,
-                                                       &SafeToolArgs,
-                                                       &GCCToolArgv);
-    }
-
     // In "llc-safe" mode, default to using LLC as the "safe" backend.
     if (!SafeInterpreter &&
         InterpreterSel == LLC_Safe) {
@@ -246,17 +218,6 @@ bool BugDriver::initializeExecutionEnvironment() {
                                                        &GCCToolArgv);
     }
 
-    // Pick a backend that's different from the test backend. The JIT and
-    // LLC backends share a lot of code, so prefer to use the CBE as the
-    // safe back-end when testing them.
-    if (!SafeInterpreter &&
-        InterpreterSel != RunCBE) {
-      SafeInterpreterSel = RunCBE;
-      SafeInterpreter = AbstractInterpreter::createCBE(Path.c_str(), Message,
-                                                       GCCBinary,
-                                                       &SafeToolArgs,
-                                                       &GCCToolArgv);
-    }
     if (!SafeInterpreter &&
         InterpreterSel != RunLLC &&
         InterpreterSel != RunJIT) {
@@ -280,11 +241,6 @@ bool BugDriver::initializeExecutionEnvironment() {
                                                      &GCCToolArgv,
                                                 SafeInterpreterSel == RunLLCIA);
     break;
-  case RunCBE:
-    SafeInterpreter = AbstractInterpreter::createCBE(Path.c_str(), Message,
-                                                     GCCBinary, &SafeToolArgs,
-                                                     &GCCToolArgv);
-    break;
   case Custom:
     SafeInterpreter =
       AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
@@ -462,8 +418,8 @@ bool BugDriver::createReferenceFile(Module *M, const std::string &Filename) {
     errs() << Error;
     if (Interpreter != SafeInterpreter) {
       errs() << "*** There is a bug running the \"safe\" backend.  Either"
-             << " debug it (for example with the -run-cbe bugpoint option,"
-             << " if CBE is being used as the \"safe\" backend), or fix the"
+             << " debug it (for example with the -run-jit bugpoint option,"
+             << " if JIT is being used as the \"safe\" backend), or fix the"
              << " error some other way.\n";
     }
     return false;
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index 73b65ca94f70..ac8e15971157 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -47,7 +47,39 @@ namespace {
   cl::opt<bool, true>
   NoSCFG("disable-simplifycfg", cl::location(DisableSimplifyCFG),
          cl::desc("Do not use the -simplifycfg pass to reduce testcases"));
-}
+
+  Function* globalInitUsesExternalBA(GlobalVariable* GV) {
+    if (!GV->hasInitializer())
+      return 0;
+
+    Constant *I = GV->getInitializer();
+
+    // walk the values used by the initializer
+    // (and recurse into things like ConstantExpr)
+    std::vector<Constant*> Todo;
+    std::set<Constant*> Done;
+    Todo.push_back(I);
+
+    while (!Todo.empty()) {
+      Constant* V = Todo.back();
+      Todo.pop_back();
+      Done.insert(V);
+
+      if (BlockAddress *BA = dyn_cast<BlockAddress>(V)) {
+        Function *F = BA->getFunction();
+        if (F->isDeclaration())
+          return F;
+      }
+
+      for (User::op_iterator i = V->op_begin(), e = V->op_end(); i != e; ++i) {
+        Constant *C = dyn_cast<Constant>(*i);
+        if (C && !isa<GlobalValue>(C) && !Done.count(C))
+          Todo.push_back(C);
+      }
+    }
+    return 0;
+  }
+}  // end anonymous namespace
 
 /// deleteInstructionFromProgram - This method clones the current Program and
 /// deletes the specified instruction from the cloned module.  It then runs a
@@ -272,11 +304,6 @@ llvm::SplitFunctionsOutOfModule(Module *M,
   ValueToValueMapTy NewVMap;
   Module *New = CloneModule(M, NewVMap);
 
-  // Make sure global initializers exist only in the safe module (CBE->.so)
-  for (Module::global_iterator I = New->global_begin(), E = New->global_end();
-       I != E; ++I)
-    I->setInitializer(0);  // Delete the initializer to make it external
-
   // Remove the Test functions from the Safe module
   std::set<Function *> TestFunctions;
   for (unsigned i = 0, e = F.size(); i != e; ++i) {
@@ -295,6 +322,27 @@ llvm::SplitFunctionsOutOfModule(Module *M,
       DeleteFunctionBody(I);
   
 
+  // Try to split the global initializers evenly
+  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    GlobalVariable *GV = cast<GlobalVariable>(NewVMap[I]);
+    if (Function *TestFn = globalInitUsesExternalBA(I)) {
+      if (Function *SafeFn = globalInitUsesExternalBA(GV)) {
+        errs() << "*** Error: when reducing functions, encountered "
+                  "the global '";
+        WriteAsOperand(errs(), GV, false);
+        errs() << "' with an initializer that references blockaddresses "
+                  "from safe function '" << SafeFn->getName()
+               << "' and from test function '" << TestFn->getName() << "'.\n";
+        exit(1);
+      }
+      I->setInitializer(0);  // Delete the initializer to make it external
+    } else {
+      // If we keep it in the safe module, then delete it in the test module
+      GV->setInitializer(0);
+    }
+  }
+
   // Make sure that there is a global ctor/dtor array in both halves of the
   // module if they both have static ctor/dtor functions.
   SplitStaticCtorDtor("llvm.global_ctors", M, New, NewVMap);
@@ -340,7 +388,7 @@ Module *BugDriver::ExtractMappedBlocksFromModule(const
     // If the BB doesn't have a name, give it one so we have something to key
     // off of.
     if (!BB->hasName()) BB->setName("tmpbb");
-    BlocksToNotExtractFile.os() << BB->getParent()->getNameStr() << " "
+    BlocksToNotExtractFile.os() << BB->getParent()->getName() << " "
                                 << BB->getName() << "\n";
   }
   BlocksToNotExtractFile.os().close();
diff --git a/tools/bugpoint/LLVMBuild.txt b/tools/bugpoint/LLVMBuild.txt
new file mode 100644
index 000000000000..549d9d023395
--- /dev/null
+++ b/tools/bugpoint/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/bugpoint/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = bugpoint
+parent = Tools
+required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Linker Scalar
diff --git a/tools/bugpoint/Makefile b/tools/bugpoint/Makefile
index 5d287ef188ae..34f4bddb0185 100644
--- a/tools/bugpoint/Makefile
+++ b/tools/bugpoint/Makefile
@@ -6,11 +6,10 @@
 # License. See LICENSE.TXT for details.
 # 
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
 
-TOOLNAME = bugpoint
-
-LINK_COMPONENTS := asmparser instrumentation scalaropts ipo \
-                   linker bitreader bitwriter
+LEVEL := ../..
+TOOLNAME := bugpoint
+LINK_COMPONENTS := asmparser instrumentation scalaropts ipo linker bitreader \
+                   bitwriter vectorize
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 7ff16dbf958b..82a3a862a2d8 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -820,7 +820,8 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
       // Don't forward functions which are external in the test module too.
       if (TestFn && !TestFn->isDeclaration()) {
         // 1. Add a string constant with its name to the global file
-        Constant *InitArray = ConstantArray::get(F->getContext(), F->getName());
+        Constant *InitArray =
+          ConstantDataArray::getString(F->getContext(), F->getName());
         GlobalVariable *funcName =
           new GlobalVariable(*Safe, InitArray->getType(), true /*isConstant*/,
                              GlobalValue::InternalLinkage, InitArray,
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index 336c83d7b1f3..fb090ee17697 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -85,8 +85,11 @@ void BugDriver::EmitProgressBitcode(const Module *M,
   if (NoFlyer || PassesToRun.empty()) return;
   outs() << "\n*** You can reproduce the problem with: ";
   if (UseValgrind) outs() << "valgrind ";
-  outs() << "opt " << Filename << " ";
-  outs() << getPassesString(PassesToRun) << "\n";
+  outs() << "opt " << Filename;
+  for (unsigned i = 0, e = PluginLoader::getNumPlugins(); i != e; ++i) {
+    outs() << " -load " << PluginLoader::getPlugin(i);
+  }
+  outs() << " " << getPassesString(PassesToRun) << "\n";
 }
 
 cl::opt<bool> SilencePasses("silence-passes",
@@ -145,10 +148,9 @@ bool BugDriver::runPasses(Module *Program,
     return 1;
   }
 
-  sys::Path tool = PrependMainExecutablePath("opt", getToolName(),
-                                             (void*)"opt");
+  sys::Path tool = sys::Program::FindProgramByName("opt");
   if (tool.empty()) {
-    errs() << "Cannot find `opt' in executable directory!\n";
+    errs() << "Cannot find `opt' in PATH!\n";
     return 1;
   }
 
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 0d98262b4310..25a2baef7dc8 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -234,6 +234,8 @@ int LLI::ExecuteProgram(const std::string &Bitcode,
       Timeout, MemoryLimit, Error);
 }
 
+void AbstractInterpreter::anchor() { }
+
 // LLI create method - Try to find the LLI executable
 AbstractInterpreter *AbstractInterpreter::createLLI(const char *Argv0,
                                                     std::string &Message,
@@ -621,94 +623,6 @@ AbstractInterpreter *AbstractInterpreter::createJIT(const char *Argv0,
   return 0;
 }
 
-GCC::FileType CBE::OutputCode(const std::string &Bitcode,
-                              sys::Path &OutputCFile, std::string &Error,
-                              unsigned Timeout, unsigned MemoryLimit) {
-  sys::Path uniqueFile(Bitcode+".cbe.c");
-  std::string ErrMsg;
-  if (uniqueFile.makeUnique(true, &ErrMsg)) {
-    errs() << "Error making unique filename: " << ErrMsg << "\n";
-    exit(1);
-  }
-  OutputCFile = uniqueFile;
-  std::vector<const char *> LLCArgs;
-  LLCArgs.push_back(LLCPath.c_str());
-
-  // Add any extra LLC args.
-  for (unsigned i = 0, e = ToolArgs.size(); i != e; ++i)
-    LLCArgs.push_back(ToolArgs[i].c_str());
-
-  LLCArgs.push_back("-o");
-  LLCArgs.push_back(OutputCFile.c_str());   // Output to the C file
-  LLCArgs.push_back("-march=c");            // Output C language
-  LLCArgs.push_back(Bitcode.c_str());      // This is the input bitcode
-  LLCArgs.push_back(0);
-
-  outs() << "<cbe>"; outs().flush();
-  DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i = 0, e = LLCArgs.size()-1; i != e; ++i)
-          errs() << " " << LLCArgs[i];
-        errs() << "\n";
-        );
-  if (RunProgramWithTimeout(LLCPath, &LLCArgs[0], sys::Path(), sys::Path(),
-                            sys::Path(), Timeout, MemoryLimit))
-    Error = ProcessFailure(LLCPath, &LLCArgs[0], Timeout, MemoryLimit);
-  return GCC::CFile;
-}
-
-void CBE::compileProgram(const std::string &Bitcode, std::string *Error,
-                         unsigned Timeout, unsigned MemoryLimit) {
-  sys::Path OutputCFile;
-  OutputCode(Bitcode, OutputCFile, *Error, Timeout, MemoryLimit);
-  OutputCFile.eraseFromDisk();
-}
-
-int CBE::ExecuteProgram(const std::string &Bitcode,
-                        const std::vector<std::string> &Args,
-                        const std::string &InputFile,
-                        const std::string &OutputFile,
-                        std::string *Error,
-                        const std::vector<std::string> &ArgsForGCC,
-                        const std::vector<std::string> &SharedLibs,
-                        unsigned Timeout,
-                        unsigned MemoryLimit) {
-  sys::Path OutputCFile;
-  OutputCode(Bitcode, OutputCFile, *Error, Timeout, MemoryLimit);
-
-  FileRemover CFileRemove(OutputCFile.str(), !SaveTemps);
-
-  std::vector<std::string> GCCArgs(ArgsForGCC);
-  GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
-
-  return gcc->ExecuteProgram(OutputCFile.str(), Args, GCC::CFile,
-                             InputFile, OutputFile, Error, GCCArgs,
-                             Timeout, MemoryLimit);
-}
-
-/// createCBE - Try to find the 'llc' executable
-///
-CBE *AbstractInterpreter::createCBE(const char *Argv0,
-                                    std::string &Message,
-                                    const std::string &GCCBinary,
-                                    const std::vector<std::string> *Args,
-                                    const std::vector<std::string> *GCCArgs) {
-  sys::Path LLCPath =
-    PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t)&createCBE);
-  if (LLCPath.isEmpty()) {
-    Message =
-      "Cannot find `llc' in executable directory!\n";
-    return 0;
-  }
-
-  Message = "Found llc: " + LLCPath.str() + "\n";
-  GCC *gcc = GCC::create(Message, GCCBinary, GCCArgs);
-  if (!gcc) {
-    errs() << Message << "\n";
-    exit(1);
-  }
-  return new CBE(LLCPath, gcc, Args);
-}
-
 //===---------------------------------------------------------------------===//
 // GCC abstraction
 //
@@ -920,8 +834,7 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
   } else
     GCCArgs.push_back("-shared");  // `-shared' for Linux/X86, maybe others
 
-  if ((TargetTriple.getArch() == Triple::alpha) ||
-      (TargetTriple.getArch() == Triple::x86_64))
+  if (TargetTriple.getArch() == Triple::x86_64)
     GCCArgs.push_back("-fPIC");   // Requires shared objs to contain PIC
 
   if (TargetTriple.getArch() == Triple::sparc)
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index cfa8acf6b240..7b93394fd8ca 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -86,6 +86,7 @@ public:
 /// complexity behind a simple interface.
 ///
 class AbstractInterpreter {
+  virtual void anchor();
 public:
   static CBE *createCBE(const char *Argv0, std::string &Message,
                         const std::string              &GCCBinary,
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index 6a87521a17b6..8f15b026a511 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -120,6 +120,7 @@ int main(int argc, char **argv) {
   PassRegistry &Registry = *PassRegistry::getPassRegistry();
   initializeCore(Registry);
   initializeScalarOpts(Registry);
+  initializeVectorization(Registry);
   initializeIPO(Registry);
   initializeAnalysis(Registry);
   initializeIPA(Registry);
diff --git a/tools/edis/CMakeLists.txt b/tools/edis/CMakeLists.txt
deleted file mode 100644
index 1e162f93183f..000000000000
--- a/tools/edis/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-include_directories(${CMAKE_CURRENT_BINARY_DIR})
-
-set(SOURCES
-  ../../include/llvm-c/EnhancedDisassembly.h
-  EDMain.cpp
-  )
-
-set(EDIS_DEPENDS LLVMMCDisassembler LLVMMCParser)
-if( LLVM_TARGETS_TO_BUILD MATCHES X86 )
-  list(APPEND EDIS_DEPENDS LLVMX86AsmPrinter LLVMX86AsmParser LLVMX86Disassembler LLVMX86Desc)
-endif()
-if( LLVM_TARGETS_TO_BUILD MATCHES ARM )
-  list(APPEND EDIS_DEPENDS LLVMARMAsmPrinter LLVMARMAsmParser LLVMARMDisassembler LLVMARMDesc)
-endif()
-
-add_llvm_library(EnhancedDisassembly ${SOURCES})
-set_property(TARGET EnhancedDisassembly PROPERTY
-  OUTPUT_NAME "EnhancedDisassembly")
-
-add_llvm_library_dependencies(EnhancedDisassembly
-  ${EDIS_DEPENDS})
diff --git a/tools/edis/EDMain.cpp b/tools/edis/EDMain.cpp
deleted file mode 100644
index 16855b3f45d8..000000000000
--- a/tools/edis/EDMain.cpp
+++ /dev/null
@@ -1,284 +0,0 @@
-//===-- EDMain.cpp - LLVM Enhanced Disassembly C API ----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-// 
-//===----------------------------------------------------------------------===//
-//
-// This file implements the enhanced disassembler's public C API.
-//
-//===----------------------------------------------------------------------===//
-
-// FIXME: This code isn't layered right, the headers should be moved to
-// include llvm/MC/MCDisassembler or something.
-#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
-#include "../../lib/MC/MCDisassembler/EDInst.h"
-#include "../../lib/MC/MCDisassembler/EDOperand.h"
-#include "../../lib/MC/MCDisassembler/EDToken.h"
-#include "llvm-c/EnhancedDisassembly.h"
-using namespace llvm;
-
-int EDGetDisassembler(EDDisassemblerRef *disassembler,
-                      const char *triple,
-                      EDAssemblySyntax_t syntax) {
-  EDDisassembler::initialize();
-  
-  EDDisassembler::AssemblySyntax Syntax;
-  switch (syntax) {
-  default: assert(0 && "Unknown assembly syntax!");
-  case kEDAssemblySyntaxX86Intel:
-    Syntax = EDDisassembler::kEDAssemblySyntaxX86Intel;
-    break;
-  case kEDAssemblySyntaxX86ATT:
-    Syntax = EDDisassembler::kEDAssemblySyntaxX86ATT;
-    break;
-  case kEDAssemblySyntaxARMUAL:
-    Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL;
-    break;
-  }
-  
-  EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax);
-  
-  if (!ret)
-    return -1;
-  *disassembler = ret;
-  return 0;
-}
-
-int EDGetRegisterName(const char** regName,
-                      EDDisassemblerRef disassembler,
-                      unsigned regID) {
-  const char *name = ((EDDisassembler*)disassembler)->nameWithRegisterID(regID);
-  if (!name)
-    return -1;
-  *regName = name;
-  return 0;
-}
-
-int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
-                             unsigned regID) {
-  return ((EDDisassembler*)disassembler)->registerIsStackPointer(regID) ? 1 : 0;
-}
-
-int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
-                               unsigned regID) {
-  return ((EDDisassembler*)disassembler)->registerIsProgramCounter(regID) ? 1:0;
-}
-
-unsigned int EDCreateInsts(EDInstRef *insts,
-                           unsigned int count,
-                           EDDisassemblerRef disassembler,
-                           ::EDByteReaderCallback byteReader,
-                           uint64_t address,
-                           void *arg) {
-  unsigned int index;
-  
-  for (index = 0; index < count; ++index) {
-    EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader,
-                                                               address, arg);
-    
-    if (!inst)
-      return index;
-    
-    insts[index] = inst;
-    address += inst->byteSize();
-  }
-  
-  return count;
-}
-
-void EDReleaseInst(EDInstRef inst) {
-  delete ((EDInst*)inst);
-}
-
-int EDInstByteSize(EDInstRef inst) {
-  return ((EDInst*)inst)->byteSize();
-}
-
-int EDGetInstString(const char **buf,
-                    EDInstRef inst) {
-  return ((EDInst*)inst)->getString(*buf);
-}
-
-int EDInstID(unsigned *instID, EDInstRef inst) {
-  *instID = ((EDInst*)inst)->instID();
-  return 0;
-}
-
-int EDInstIsBranch(EDInstRef inst) {
-  return ((EDInst*)inst)->isBranch();
-}
-
-int EDInstIsMove(EDInstRef inst) {
-  return ((EDInst*)inst)->isMove();
-}
-
-int EDBranchTargetID(EDInstRef inst) {
-  return ((EDInst*)inst)->branchTargetID();
-}
-
-int EDMoveSourceID(EDInstRef inst) {
-  return ((EDInst*)inst)->moveSourceID();
-}
-
-int EDMoveTargetID(EDInstRef inst) {
-  return ((EDInst*)inst)->moveTargetID();
-}
-
-int EDNumTokens(EDInstRef inst) {
-  return ((EDInst*)inst)->numTokens();
-}
-
-int EDGetToken(EDTokenRef *token,
-               EDInstRef inst,
-               int index) {
-  return ((EDInst*)inst)->getToken(*(EDToken**)token, index);
-}
-
-int EDGetTokenString(const char **buf,
-                     EDTokenRef token) {
-  return ((EDToken*)token)->getString(*buf);
-}
-
-int EDOperandIndexForToken(EDTokenRef token) {
-  return ((EDToken*)token)->operandID();
-}
-
-int EDTokenIsWhitespace(EDTokenRef token) {
-  return ((EDToken*)token)->type() == EDToken::kTokenWhitespace;
-}
-
-int EDTokenIsPunctuation(EDTokenRef token) {
-  return ((EDToken*)token)->type() == EDToken::kTokenPunctuation;
-}
-
-int EDTokenIsOpcode(EDTokenRef token) {
-  return ((EDToken*)token)->type() == EDToken::kTokenOpcode;
-}
-
-int EDTokenIsLiteral(EDTokenRef token) {
-  return ((EDToken*)token)->type() == EDToken::kTokenLiteral;
-}
-
-int EDTokenIsRegister(EDTokenRef token) {
-  return ((EDToken*)token)->type() == EDToken::kTokenRegister;
-}
-
-int EDTokenIsNegativeLiteral(EDTokenRef token) {
-  if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
-    return -1;
-  
-  return ((EDToken*)token)->literalSign();
-}
-
-int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) {
-  if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
-    return -1;
-  
-  return ((EDToken*)token)->literalAbsoluteValue(*value);
-}
-
-int EDRegisterTokenValue(unsigned *registerID,
-                         EDTokenRef token) {
-  if (((EDToken*)token)->type() != EDToken::kTokenRegister)
-    return -1;
-  
-  return ((EDToken*)token)->registerID(*registerID);
-}
-
-int EDNumOperands(EDInstRef inst) {
-  return ((EDInst*)inst)->numOperands();
-}
-
-int EDGetOperand(EDOperandRef *operand,
-                 EDInstRef inst,
-                 int index) {
-  return ((EDInst*)inst)->getOperand(*(EDOperand**)operand, index);
-}
-
-int EDOperandIsRegister(EDOperandRef operand) {
-  return ((EDOperand*)operand)->isRegister();
-}
-
-int EDOperandIsImmediate(EDOperandRef operand) {
-  return ((EDOperand*)operand)->isImmediate();
-}
-
-int EDOperandIsMemory(EDOperandRef operand) {
-  return ((EDOperand*)operand)->isMemory();
-}
-
-int EDRegisterOperandValue(unsigned *value, EDOperandRef operand) {
-  if (!((EDOperand*)operand)->isRegister())
-    return -1;
-  *value = ((EDOperand*)operand)->regVal();
-  return 0;
-}
-
-int EDImmediateOperandValue(uint64_t *value, EDOperandRef operand) {
-  if (!((EDOperand*)operand)->isImmediate())
-    return -1;
-  *value = ((EDOperand*)operand)->immediateVal();
-  return 0;
-}
-
-int EDEvaluateOperand(uint64_t *result, EDOperandRef operand,
-                      ::EDRegisterReaderCallback regReader, void *arg) {
-  return ((EDOperand*)operand)->evaluate(*result, regReader, arg);
-}
-
-#ifdef __BLOCKS__
-
-struct ByteReaderWrapper {
-  EDByteBlock_t byteBlock;
-};
-
-static int readerWrapperCallback(uint8_t *byte, 
-                          uint64_t address,
-                          void *arg) {
-  struct ByteReaderWrapper *wrapper = (struct ByteReaderWrapper *)arg;
-  return wrapper->byteBlock(byte, address);
-}
-
-unsigned int EDBlockCreateInsts(EDInstRef *insts,
-                                int count,
-                                EDDisassemblerRef disassembler,
-                                EDByteBlock_t byteBlock,
-                                uint64_t address) {
-  struct ByteReaderWrapper wrapper;
-  wrapper.byteBlock = byteBlock;
-  
-  return EDCreateInsts(insts,
-                       count,
-                       disassembler, 
-                       readerWrapperCallback, 
-                       address, 
-                       (void*)&wrapper);
-}
-
-int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand,
-                           EDRegisterBlock_t regBlock) {
-  return ((EDOperand*)operand)->evaluate(*result, regBlock);
-}
-
-int EDBlockVisitTokens(EDInstRef inst, ::EDTokenVisitor_t visitor) {
-  return ((EDInst*)inst)->visitTokens((llvm::EDTokenVisitor_t)visitor);
-}
-
-#else
-
-extern "C" unsigned int EDBlockCreateInsts() {
-  return 0;
-}
-
-extern "C" int EDBlockEvaluateOperand() {
-  return -1;
-}
-
-extern "C" int EDBlockVisitTokens() {
-  return -1;
-}
-
-#endif
diff --git a/tools/edis/Makefile b/tools/edis/Makefile
deleted file mode 100644
index 3fcb408c8817..000000000000
--- a/tools/edis/Makefile
+++ /dev/null
@@ -1,53 +0,0 @@
-##===- tools/edis/Makefile -----------------------------------*- Makefile -*-===##
-# 
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-# 
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../..
-LIBRARYNAME = EnhancedDisassembly
-LINK_LIBS_IN_SHARED = 1
-
-EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/EnhancedDisassembly.exports
-
-# Include this here so we can get the configuration of the targets
-# that have been configured for construction. We have to do this 
-# early so we can set up LINK_COMPONENTS before including Makefile.rules
-include $(LEVEL)/Makefile.config
-
-LINK_COMPONENTS := mcdisassembler
-
-# If the X86 target is enabled, link in the asmprinter and disassembler.
-ifneq ($(filter $(TARGETS_TO_BUILD), X86),)
-LINK_COMPONENTS += x86asmprinter x86disassembler
-endif
-
-# If the ARM target is enabled, link in the asmprinter and disassembler.
-ifneq ($(filter $(TARGETS_TO_BUILD), ARM),)
-LINK_COMPONENTS += armasmprinter armdisassembler
-endif
-
-include $(LEVEL)/Makefile.common
-
-ifeq ($(HOST_OS),Darwin)
-    # extra options to override libtool defaults 
-    LLVMLibsOptions    := $(LLVMLibsOptions)  \
-                         -Wl,-dead_strip
-
-    ifdef EDIS_VERSION
-        LLVMLibsOptions    := $(LLVMLibsOptions) -Wl,-current_version -Wl,$(EDIS_VERSION) \
-                              -Wl,-compatibility_version -Wl,1
-    endif
-
-    # Mac OS X 10.4 and earlier tools do not allow a second -install_name on command line
-    DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
-    ifneq ($(DARWIN_VERS),8)
-       LLVMLibsOptions    := $(LLVMLibsOptions)  \
-                            -Wl,-install_name \
-                            -Wl,"@rpath/lib$(LIBRARYNAME)$(SHLIBEXT)"
-    endif
-endif
-
diff --git a/tools/gold/CMakeLists.txt b/tools/gold/CMakeLists.txt
index eb4b6e6aae53..2cc132ff78e4 100644
--- a/tools/gold/CMakeLists.txt
+++ b/tools/gold/CMakeLists.txt
@@ -40,6 +40,7 @@ else()
   set_property(SOURCE gold-plugin.cpp APPEND PROPERTY
     OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/exportsfile)
 
-  target_link_libraries(LLVMgold LTO -Wl,--version-script,exportsfile)
+  target_link_libraries(LLVMgold LTO
+                   -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/exportsfile)
   add_dependencies(LLVMgold gold_exports)
 endif()
diff --git a/tools/gold/Makefile b/tools/gold/Makefile
index 759406f7b1b3..02f66d73eedc 100644
--- a/tools/gold/Makefile
+++ b/tools/gold/Makefile
@@ -7,8 +7,12 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-LIBRARYNAME = LLVMgold
+LEVEL := ../..
+LIBRARYNAME := LLVMgold
+LINK_COMPONENTS := support
+LINK_LIBS_IN_SHARED := 1
+SHARED_LIBRARY := 1
+LOADABLE_MODULE := 1
 
 EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/gold.exports
 
@@ -17,15 +21,9 @@ EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/gold.exports
 # early so we can set up LINK_COMPONENTS before including Makefile.rules
 include $(LEVEL)/Makefile.config
 
-LINK_LIBS_IN_SHARED=1
-SHARED_LIBRARY = 1
-LOADABLE_MODULE = 1
-
-LINK_COMPONENTS := support
-
 # Because off_t is used in the public API, the largefile parts are required for
 # ABI compatibility.
-CXXFLAGS+=-I$(BINUTILS_INCDIR) -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
-CXXFLAGS+=$(SharedLibDir)/$(SharedPrefix)LTO$(SHLIBEXT)
+CXXFLAGS += -I$(BINUTILS_INCDIR) -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
+CXXFLAGS += -L$(SharedLibDir)/$(SharedPrefix) -lLTO
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 6f547b3a30ca..cfd84c0f67d7 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"
+#include "llvm/Config/config.h" // plugin-api.h requires HAVE_STDINT_H
 #include "plugin-api.h"
 
 #include "llvm-c/lto.h"
diff --git a/tools/llc/LLVMBuild.txt b/tools/llc/LLVMBuild.txt
new file mode 100644
index 000000000000..8c8794f62069
--- /dev/null
+++ b/tools/llc/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llc/LLVMBuild.txt --------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llc
+parent = Tools
+required_libraries = AsmParser BitReader all-targets
diff --git a/tools/llc/Makefile b/tools/llc/Makefile
index 7319aada489e..b32d5575d53e 100644
--- a/tools/llc/Makefile
+++ b/tools/llc/Makefile
@@ -7,15 +7,9 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-TOOLNAME = llc
+LEVEL := ../..
+TOOLNAME := llc
+LINK_COMPONENTS := all-targets bitreader asmparser
 
-# Include this here so we can get the configuration of the targets
-# that have been configured for construction. We have to do this 
-# early so we can set up LINK_COMPONENTS before including Makefile.rules
-include $(LEVEL)/Makefile.config
-
-LINK_COMPONENTS := $(TARGETS_TO_BUILD) bitreader asmparser
-
-include $(LLVM_SRC_ROOT)/Makefile.rules
+include $(LEVEL)/Makefile.common
 
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index d29bd9bc69a9..9e30ac198b20 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Support/IRReader.h"
 #include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
-#include "llvm/Config/config.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -133,11 +132,147 @@ cl::opt<bool> DisableDotLoc("disable-dot-loc", cl::Hidden,
 cl::opt<bool> DisableCFI("disable-cfi", cl::Hidden,
                          cl::desc("Do not use .cfi_* directives"));
 
+cl::opt<bool> EnableDwarfDirectory("enable-dwarf-directory", cl::Hidden,
+    cl::desc("Use .file directives with an explicit directory."));
+
 static cl::opt<bool>
 DisableRedZone("disable-red-zone",
   cl::desc("Do not emit code that uses the red zone."),
   cl::init(false));
 
+static cl::opt<bool>
+EnableFPMAD("enable-fp-mad",
+  cl::desc("Enable less precise MAD instructions to be generated"),
+  cl::init(false));
+
+static cl::opt<bool>
+PrintCode("print-machineinstrs",
+  cl::desc("Print generated machine code"),
+  cl::init(false));
+
+static cl::opt<bool>
+DisableFPElim("disable-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization"),
+  cl::init(false));
+
+static cl::opt<bool>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+  cl::init(false));
+
+static cl::opt<bool>
+DisableExcessPrecision("disable-excess-fp-precision",
+  cl::desc("Disable optimizations that may increase FP precision"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+  cl::desc("Enable optimizations that may decrease FP precision"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+  cl::desc("Enable FP math optimizations that assume no +-Infs"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+  cl::desc("Enable FP math optimizations that assume no NaNs"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+  cl::Hidden,
+  cl::desc("Force codegen to assume rounding mode can change dynamically"),
+  cl::init(false));
+
+static cl::opt<bool>
+GenerateSoftFloatCalls("soft-float",
+  cl::desc("Generate software floating point library calls"),
+  cl::init(false));
+
+static cl::opt<llvm::FloatABI::ABIType>
+FloatABIForCalls("float-abi",
+  cl::desc("Choose float ABI type"),
+  cl::init(FloatABI::Default),
+  cl::values(
+    clEnumValN(FloatABI::Default, "default",
+               "Target default float ABI type"),
+    clEnumValN(FloatABI::Soft, "soft",
+               "Soft float ABI (implied by -soft-float)"),
+    clEnumValN(FloatABI::Hard, "hard",
+               "Hard float ABI (uses FP registers)"),
+    clEnumValEnd));
+
+static cl::opt<bool>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+  cl::desc("Don't place zero-initialized symbols into bss section"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableJITExceptionHandling("jit-enable-eh",
+  cl::desc("Emit exception handling information"),
+  cl::init(false));
+
+// In debug builds, make this default to true.
+#ifdef NDEBUG
+#define EMIT_DEBUG false
+#else
+#define EMIT_DEBUG true
+#endif
+static cl::opt<bool>
+EmitJitDebugInfo("jit-emit-debug",
+  cl::desc("Emit debug information to debugger"),
+  cl::init(EMIT_DEBUG));
+#undef EMIT_DEBUG
+
+static cl::opt<bool>
+EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
+  cl::Hidden,
+  cl::desc("Emit debug info objfiles to disk"),
+  cl::init(false));
+
+static cl::opt<bool>
+EnableGuaranteedTailCallOpt("tailcallopt",
+  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
+  cl::init(false));
+
+static cl::opt<bool>
+DisableTailCalls("disable-tail-calls",
+  cl::desc("Never emit tail calls"),
+  cl::init(false));
+
+static cl::opt<unsigned>
+OverrideStackAlignment("stack-alignment",
+  cl::desc("Override default stack alignment"),
+  cl::init(0));
+
+static cl::opt<bool>
+EnableRealignStack("realign-stack",
+  cl::desc("Realign stack if needed"),
+  cl::init(true));
+
+static cl::opt<bool>
+DisableSwitchTables(cl::Hidden, "disable-jump-tables",
+  cl::desc("Do not generate jump tables."),
+  cl::init(false));
+
+static cl::opt<std::string>
+TrapFuncName("trap-func", cl::Hidden,
+  cl::desc("Emit a call to trap function rather than a trap instruction"),
+  cl::init(""));
+
+static cl::opt<bool>
+EnablePIE("enable-pie",
+  cl::desc("Assume the creation of a position independent executable."),
+  cl::init(false));
+
+static cl::opt<bool>
+SegmentedStacks("segmented-stacks",
+  cl::desc("Use segmented stacks if possible."),
+  cl::init(false));
+
+
 // GetFileNameRoot - Helper function to get the basename of a filename.
 static inline std::string
 GetFileNameRoot(const std::string &InputFilename) {
@@ -166,7 +301,6 @@ static tool_output_file *GetOutputStream(const char *TargetName,
       OutputFilename = GetFileNameRoot(InputFilename);
 
       switch (FileType) {
-      default: assert(0 && "Unknown file type");
       case TargetMachine::CGFT_AssemblyFile:
         if (TargetName[0] == 'c') {
           if (TargetName[1] == 0)
@@ -194,7 +328,6 @@ static tool_output_file *GetOutputStream(const char *TargetName,
   // Decide if we need "binary" output.
   bool Binary = false;
   switch (FileType) {
-  default: assert(0 && "Unknown file type");
   case TargetMachine::CGFT_AssemblyFile:
     break;
   case TargetMachine::CGFT_ObjectFile:
@@ -247,7 +380,7 @@ int main(int argc, char **argv) {
 
   M.reset(ParseIRFile(InputFilename, Err, Context));
   if (M.get() == 0) {
-    Err.Print(argv[0], errs());
+    Err.print(argv[0], errs());
     return 1;
   }
   Module &mod = *M.get();
@@ -258,7 +391,7 @@ int main(int argc, char **argv) {
 
   Triple TheTriple(mod.getTargetTriple());
   if (TheTriple.getTriple().empty())
-    TheTriple.setTriple(sys::getHostTriple());
+    TheTriple.setTriple(sys::getDefaultTargetTriple());
 
   // Allocate target machine.  First, check whether the user has explicitly
   // specified an architecture to compile for. If so we have to look it up by
@@ -303,10 +436,49 @@ int main(int argc, char **argv) {
     FeaturesStr = Features.getString();
   }
 
+  CodeGenOpt::Level OLvl = CodeGenOpt::Default;
+  switch (OptLevel) {
+  default:
+    errs() << argv[0] << ": invalid optimization level.\n";
+    return 1;
+  case ' ': break;
+  case '0': OLvl = CodeGenOpt::None; break;
+  case '1': OLvl = CodeGenOpt::Less; break;
+  case '2': OLvl = CodeGenOpt::Default; break;
+  case '3': OLvl = CodeGenOpt::Aggressive; break;
+  }
+
+  TargetOptions Options;
+  Options.LessPreciseFPMADOption = EnableFPMAD;
+  Options.PrintMachineCode = PrintCode;
+  Options.NoFramePointerElim = DisableFPElim;
+  Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
+  Options.NoExcessFPPrecision = DisableExcessPrecision;
+  Options.UnsafeFPMath = EnableUnsafeFPMath;
+  Options.NoInfsFPMath = EnableNoInfsFPMath;
+  Options.NoNaNsFPMath = EnableNoNaNsFPMath;
+  Options.HonorSignDependentRoundingFPMathOption =
+      EnableHonorSignDependentRoundingFPMath;
+  Options.UseSoftFloat = GenerateSoftFloatCalls;
+  if (FloatABIForCalls != FloatABI::Default)
+    Options.FloatABIType = FloatABIForCalls;
+  Options.NoZerosInBSS = DontPlaceZerosInBSS;
+  Options.JITExceptionHandling = EnableJITExceptionHandling;
+  Options.JITEmitDebugInfo = EmitJitDebugInfo;
+  Options.JITEmitDebugInfoToDisk = EmitJitDebugInfoToDisk;
+  Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
+  Options.DisableTailCalls = DisableTailCalls;
+  Options.StackAlignmentOverride = OverrideStackAlignment;
+  Options.RealignStack = EnableRealignStack;
+  Options.DisableJumpTables = DisableSwitchTables;
+  Options.TrapFuncName = TrapFuncName;
+  Options.PositionIndependentExecutable = EnablePIE;
+  Options.EnableSegmentedStacks = SegmentedStacks;
+
   std::auto_ptr<TargetMachine>
     target(TheTarget->createTargetMachine(TheTriple.getTriple(),
-                                          MCPU, FeaturesStr,
-                                          RelocModel, CMModel));
+                                          MCPU, FeaturesStr, Options,
+                                          RelocModel, CMModel, OLvl));
   assert(target.get() && "Could not allocate target machine!");
   TargetMachine &Target = *target.get();
 
@@ -316,6 +488,12 @@ int main(int argc, char **argv) {
   if (DisableCFI)
     Target.setMCUseCFI(false);
 
+  if (EnableDwarfDirectory)
+    Target.setMCUseDwarfDirectory(true);
+
+  if (GenerateSoftFloatCalls)
+    FloatABIForCalls = FloatABI::Soft;
+
   // Disable .loc support for older OS X versions.
   if (TheTriple.isMacOSX() &&
       TheTriple.isMacOSXVersionLT(10, 6))
@@ -326,18 +504,6 @@ int main(int argc, char **argv) {
     (GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]));
   if (!Out) return 1;
 
-  CodeGenOpt::Level OLvl = CodeGenOpt::Default;
-  switch (OptLevel) {
-  default:
-    errs() << argv[0] << ": invalid optimization level.\n";
-    return 1;
-  case ' ': break;
-  case '0': OLvl = CodeGenOpt::None; break;
-  case '1': OLvl = CodeGenOpt::Less; break;
-  case '2': OLvl = CodeGenOpt::Default; break;
-  case '3': OLvl = CodeGenOpt::Aggressive; break;
-  }
-
   // Build up all of the passes that we want to do to the module.
   PassManager PM;
 
@@ -362,7 +528,7 @@ int main(int argc, char **argv) {
     formatted_raw_ostream FOS(Out->os());
 
     // Ask the target to add backend passes as necessary.
-    if (Target.addPassesToEmitFile(PM, FOS, FileType, OLvl, NoVerify)) {
+    if (Target.addPassesToEmitFile(PM, FOS, FileType, NoVerify)) {
       errs() << argv[0] << ": target does not support generation of this"
              << " file type!\n";
       return 1;
diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt
index 9378ef255466..a5d2e61ea24c 100644
--- a/tools/lli/CMakeLists.txt
+++ b/tools/lli/CMakeLists.txt
@@ -1,5 +1,22 @@
+
+link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} )
+
 set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag)
 
+if( LLVM_USE_OPROFILE )
+  set(LLVM_LINK_COMPONENTS
+    ${LLVM_LINK_COMPONENTS}
+    OProfileJIT
+    )
+endif( LLVM_USE_OPROFILE )
+
+if( LLVM_USE_INTEL_JITEVENTS )
+  set(LLVM_LINK_COMPONENTS
+    ${LLVM_LINK_COMPONENTS}
+    IntelJITEvents
+    )
+endif( LLVM_USE_INTEL_JITEVENTS )
+
 add_llvm_tool(lli
   lli.cpp
   )
diff --git a/tools/lli/LLVMBuild.txt b/tools/lli/LLVMBuild.txt
new file mode 100644
index 000000000000..4eb82bd9e1c5
--- /dev/null
+++ b/tools/lli/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/lli/LLVMBuild.txt --------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = lli
+parent = Tools
+required_libraries = AsmParser BitReader Interpreter JIT MCJIT NativeCodeGen SelectionDAG
diff --git a/tools/lli/Makefile b/tools/lli/Makefile
index 80aa82b4d681..100fc2e415aa 100644
--- a/tools/lli/Makefile
+++ b/tools/lli/Makefile
@@ -7,9 +7,23 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL    := ../..
+LEVEL := ../..
 TOOLNAME := lli
+
+include $(LEVEL)/Makefile.config
+
 LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag
 
-# Enable JIT support
-include $(LEVEL)/Makefile.common
+# If Intel JIT Events support is confiured, link against the LLVM Intel JIT
+# Events interface library
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+  LINK_COMPONENTS += inteljitevents
+endif
+
+# If oprofile support is confiured, link against the LLVM oprofile interface
+# library
+ifeq ($(USE_OPROFILE), 1)
+  LINK_COMPONENTS += oprofilejit
+endif
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 50c7a498f1cd..efcc1f5870ca 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ExecutionEngine/Interpreter.h"
 #include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/IRReader.h"
@@ -94,12 +95,12 @@ namespace {
                      "of the executable"),
             cl::value_desc("function"),
             cl::init("main"));
-  
+
   cl::opt<std::string>
   FakeArgv0("fake-argv0",
             cl::desc("Override the 'argv[0]' value passed into the executing"
                      " program"), cl::value_desc("executable"));
-  
+
   cl::opt<bool>
   DisableCoreFiles("disable-core-files", cl::Hidden,
                    cl::desc("Disable emission of core files if possible"));
@@ -158,7 +159,7 @@ static void do_shutdown() {
 int main(int argc, char **argv, char * const *envp) {
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
-  
+
   LLVMContext &Context = getGlobalContext();
   atexit(do_shutdown);  // Call llvm_shutdown() on exit.
 
@@ -173,12 +174,12 @@ int main(int argc, char **argv, char * const *envp) {
   // If the user doesn't want core files, disable them.
   if (DisableCoreFiles)
     sys::Process::PreventCoreFiles();
-  
+
   // Load the bitcode...
   SMDiagnostic Err;
   Module *Mod = ParseIRFile(InputFile, Err, Context);
   if (!Mod) {
-    Err.Print(argv[0], errs());
+    Err.print(argv[0], errs());
     return 1;
   }
 
@@ -199,6 +200,8 @@ int main(int argc, char **argv, char * const *envp) {
   builder.setRelocationModel(RelocModel);
   builder.setCodeModel(CMModel);
   builder.setErrorStr(&ErrorMsg);
+  builder.setJITMemoryManager(ForceInterpreter ? 0 :
+                              JITMemoryManager::CreateDefaultMemManager());
   builder.setEngineKind(ForceInterpreter
                         ? EngineKind::Interpreter
                         : EngineKind::JIT);
@@ -207,9 +210,11 @@ int main(int argc, char **argv, char * const *envp) {
   if (!TargetTriple.empty())
     Mod->setTargetTriple(Triple::normalize(TargetTriple));
 
-  // Enable MCJIT, if desired.
-  if (UseMCJIT)
+  // Enable MCJIT if desired.
+  if (UseMCJIT && !ForceInterpreter) {
     builder.setUseMCJIT(true);
+    builder.setJITMemoryManager(JITMemoryManager::CreateDefaultMemManager());
+  }
 
   CodeGenOpt::Level OLvl = CodeGenOpt::Default;
   switch (OptLevel) {
@@ -233,7 +238,12 @@ int main(int argc, char **argv, char * const *envp) {
     exit(1);
   }
 
-  EE->RegisterJITEventListener(createOProfileJITEventListener());
+  // The following functions have no effect if their respective profiling
+  // support wasn't enabled in the build configuration.
+  EE->RegisterJITEventListener(
+                JITEventListener::createOProfileJITEventListener());
+  EE->RegisterJITEventListener(
+                JITEventListener::createIntelJITEventListener());
 
   EE->DisableLazyCompilation(NoLazyCompilation);
 
@@ -262,15 +272,15 @@ int main(int argc, char **argv, char * const *envp) {
     return -1;
   }
 
-  // If the program doesn't explicitly call exit, we will need the Exit 
-  // function later on to make an explicit call, so get the function now. 
+  // If the program doesn't explicitly call exit, we will need the Exit
+  // function later on to make an explicit call, so get the function now.
   Constant *Exit = Mod->getOrInsertFunction("exit", Type::getVoidTy(Context),
                                                     Type::getInt32Ty(Context),
                                                     NULL);
-  
+
   // Reset errno to zero on entry to main.
   errno = 0;
- 
+
   // Run static constructors.
   EE->runStaticConstructorsDestructors(false);
 
@@ -287,8 +297,8 @@ int main(int argc, char **argv, char * const *envp) {
 
   // Run static destructors.
   EE->runStaticConstructorsDestructors(true);
-  
-  // If the program didn't call exit explicitly, we should call it now. 
+
+  // If the program didn't call exit explicitly, we should call it now.
   // This ensures that any atexit handlers get called correctly.
   if (Function *ExitF = dyn_cast<Function>(Exit)) {
     std::vector<GenericValue> Args;
diff --git a/tools/llvm-ar/LLVMBuild.txt b/tools/llvm-ar/LLVMBuild.txt
new file mode 100644
index 000000000000..1f61a3201488
--- /dev/null
+++ b/tools/llvm-ar/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-ar/LLVMBuild.txt ----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-ar
+parent = Tools
+required_libraries = Archive
diff --git a/tools/llvm-ar/Makefile b/tools/llvm-ar/Makefile
index e4fe4e8ca39a..6ee6f34942d7 100644
--- a/tools/llvm-ar/Makefile
+++ b/tools/llvm-ar/Makefile
@@ -6,20 +6,13 @@
 # License. See LICENSE.TXT for details.
 # 
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
 
-TOOLNAME = llvm-ar
-LINK_COMPONENTS = archive
+LEVEL := ../..
+TOOLNAME := llvm-ar
+LINK_COMPONENTS := archive
 REQUIRES_EH := 1
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
-
-check-local::
-	$(Echo) Checking llvm-ar
-	$(Verb) $(ToolDir)/llvm-ar zRrS nada.a .
-	$(Verb) $(ToolDir)/llvm-ar tv nada.a | \
-	  grep Debug/llvm-ar.d >/dev/null 2>&1
-	$(Verb) $(RM) -f nada.a
diff --git a/tools/llvm-as/LLVMBuild.txt b/tools/llvm-as/LLVMBuild.txt
new file mode 100644
index 000000000000..542470bbdd8b
--- /dev/null
+++ b/tools/llvm-as/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-as/LLVMBuild.txt ----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-as
+parent = Tools
+required_libraries = AsmParser BitWriter
diff --git a/tools/llvm-as/Makefile b/tools/llvm-as/Makefile
index e1e5853a7b6a..dfd71b295a9a 100644
--- a/tools/llvm-as/Makefile
+++ b/tools/llvm-as/Makefile
@@ -7,11 +7,11 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-TOOLNAME = llvm-as
+LEVEL := ../..
+TOOLNAME := llvm-as
 LINK_COMPONENTS := asmparser bitwriter
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index c1661cdcb196..1def9a4a2d70 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -96,7 +96,7 @@ int main(int argc, char **argv) {
   SMDiagnostic Err;
   std::auto_ptr<Module> M(ParseAssemblyFile(InputFilename, Err, Context));
   if (M.get() == 0) {
-    Err.Print(argv[0], errs());
+    Err.print(argv[0], errs());
     return 1;
   }
 
diff --git a/tools/llvm-bcanalyzer/LLVMBuild.txt b/tools/llvm-bcanalyzer/LLVMBuild.txt
new file mode 100644
index 000000000000..ee77a7d33ab6
--- /dev/null
+++ b/tools/llvm-bcanalyzer/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-bcanalyzer/LLVMBuild.txt --------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-bcanalyzer
+parent = Tools
+required_libraries = BitReader
diff --git a/tools/llvm-bcanalyzer/Makefile b/tools/llvm-bcanalyzer/Makefile
index 488387d5da24..2fc61dbd62a7 100644
--- a/tools/llvm-bcanalyzer/Makefile
+++ b/tools/llvm-bcanalyzer/Makefile
@@ -6,12 +6,12 @@
 # License. See LICENSE.TXT for details.
 # 
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
 
-TOOLNAME = llvm-bcanalyzer
+LEVEL := ../..
+TOOLNAME := llvm-bcanalyzer
 LINK_COMPONENTS := bitreader
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 4ada64a5285f..d6300878d510 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -102,14 +102,13 @@ static const char *GetBlockName(unsigned BlockID,
   default:                           return 0;
   case bitc::MODULE_BLOCK_ID:        return "MODULE_BLOCK";
   case bitc::PARAMATTR_BLOCK_ID:     return "PARAMATTR_BLOCK";
-  case bitc::TYPE_BLOCK_ID_OLD:      return "TYPE_BLOCK_ID_OLD";
   case bitc::TYPE_BLOCK_ID_NEW:      return "TYPE_BLOCK_ID";
   case bitc::CONSTANTS_BLOCK_ID:     return "CONSTANTS_BLOCK";
   case bitc::FUNCTION_BLOCK_ID:      return "FUNCTION_BLOCK";
-  case bitc::TYPE_SYMTAB_BLOCK_ID_OLD: return "TYPE_SYMTAB_OLD";
   case bitc::VALUE_SYMTAB_BLOCK_ID:  return "VALUE_SYMTAB";
   case bitc::METADATA_BLOCK_ID:      return "METADATA_BLOCK";
   case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK";
+  case bitc::USELIST_BLOCK_ID:       return "USELIST_BLOCK_ID";
   }
 }
 
@@ -163,7 +162,6 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     default: return 0;
     case bitc::PARAMATTR_CODE_ENTRY: return "ENTRY";
     }
-  case bitc::TYPE_BLOCK_ID_OLD:
   case bitc::TYPE_BLOCK_ID_NEW:
     switch (CodeID) {
     default: return 0;
@@ -175,8 +173,6 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::TYPE_CODE_OPAQUE:       return "OPAQUE";
     case bitc::TYPE_CODE_INTEGER:      return "INTEGER";
     case bitc::TYPE_CODE_POINTER:      return "POINTER";
-    case bitc::TYPE_CODE_FUNCTION:     return "FUNCTION";
-    case bitc::TYPE_CODE_STRUCT_OLD:   return "STRUCT_OLD";
     case bitc::TYPE_CODE_ARRAY:        return "ARRAY";
     case bitc::TYPE_CODE_VECTOR:       return "VECTOR";
     case bitc::TYPE_CODE_X86_FP80:     return "X86_FP80";
@@ -186,6 +182,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::TYPE_CODE_STRUCT_ANON:  return "STRUCT_ANON";
     case bitc::TYPE_CODE_STRUCT_NAME:  return "STRUCT_NAME";
     case bitc::TYPE_CODE_STRUCT_NAMED: return "STRUCT_NAMED";
+    case bitc::TYPE_CODE_FUNCTION:     return "FUNCTION";
     }
 
   case bitc::CONSTANTS_BLOCK_ID:
@@ -211,6 +208,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::CST_CODE_CE_CMP:          return "CE_CMP";
     case bitc::CST_CODE_INLINEASM:       return "INLINEASM";
     case bitc::CST_CODE_CE_SHUFVEC_EX:   return "CE_SHUFVEC_EX";
+    case bitc::CST_CODE_BLOCKADDRESS:    return "CST_CODE_BLOCKADDRESS";
+    case bitc::CST_CODE_DATA:            return "DATA";
     }
   case bitc::FUNCTION_BLOCK_ID:
     switch (CodeID) {
@@ -231,7 +230,6 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::FUNC_CODE_INST_BR:           return "INST_BR";
     case bitc::FUNC_CODE_INST_SWITCH:       return "INST_SWITCH";
     case bitc::FUNC_CODE_INST_INVOKE:       return "INST_INVOKE";
-    case bitc::FUNC_CODE_INST_UNWIND:       return "INST_UNWIND";
     case bitc::FUNC_CODE_INST_UNREACHABLE:  return "INST_UNREACHABLE";
 
     case bitc::FUNC_CODE_INST_PHI:          return "INST_PHI";
@@ -247,11 +245,6 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
     case bitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
     }
-  case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
-    switch (CodeID) {
-    default: return 0;
-    case bitc::TST_CODE_ENTRY: return "ENTRY";
-    }
   case bitc::VALUE_SYMTAB_BLOCK_ID:
     switch (CodeID) {
     default: return 0;
@@ -273,6 +266,11 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::METADATA_FN_NODE:     return "METADATA_FN_NODE";
     case bitc::METADATA_NAMED_NODE:  return "METADATA_NAMED_NODE";
     }
+  case bitc::USELIST_BLOCK_ID:
+    switch(CodeID) {
+    default:return 0;
+    case bitc::USELIST_CODE_ENTRY:   return "USELIST_CODE_ENTRY";
+    }
   }
 }
 
@@ -333,7 +331,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
 
   // BLOCKINFO is a special part of the stream.
   if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
-    if (Dump) errs() << Indent << "<BLOCKINFO_BLOCK/>\n";
+    if (Dump) outs() << Indent << "<BLOCKINFO_BLOCK/>\n";
     if (Stream.ReadBlockInfoBlock())
       return Error("Malformed BlockInfoBlock");
     uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
@@ -347,16 +345,16 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
 
   const char *BlockName = 0;
   if (Dump) {
-    errs() << Indent << "<";
+    outs() << Indent << "<";
     if ((BlockName = GetBlockName(BlockID, *Stream.getBitStreamReader())))
-      errs() << BlockName;
+      outs() << BlockName;
     else
-      errs() << "UnknownBlock" << BlockID;
+      outs() << "UnknownBlock" << BlockID;
 
     if (NonSymbolic && BlockName)
-      errs() << " BlockID=" << BlockID;
+      outs() << " BlockID=" << BlockID;
 
-    errs() << " NumWords=" << NumWords
+    outs() << " NumWords=" << NumWords
            << " BlockCodeSize=" << Stream.GetAbbrevIDWidth() << ">\n";
   }
 
@@ -378,11 +376,11 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
       uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
       BlockStats.NumBits += BlockBitEnd-BlockBitStart;
       if (Dump) {
-        errs() << Indent << "</";
+        outs() << Indent << "</";
         if (BlockName)
-          errs() << BlockName << ">\n";
+          outs() << BlockName << ">\n";
         else
-          errs() << "UnknownBlock" << BlockID << ">\n";
+          outs() << "UnknownBlock" << BlockID << ">\n";
       }
       return false;
     }
@@ -424,25 +422,25 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
         BlockStats.CodeFreq[Code].NumAbbrev++;
 
       if (Dump) {
-        errs() << Indent << "  <";
+        outs() << Indent << "  <";
         if (const char *CodeName =
               GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
-          errs() << CodeName;
+          outs() << CodeName;
         else
-          errs() << "UnknownCode" << Code;
+          outs() << "UnknownCode" << Code;
         if (NonSymbolic &&
             GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
-          errs() << " codeid=" << Code;
+          outs() << " codeid=" << Code;
         if (AbbrevID != bitc::UNABBREV_RECORD)
-          errs() << " abbrevid=" << AbbrevID;
+          outs() << " abbrevid=" << AbbrevID;
 
         for (unsigned i = 0, e = Record.size(); i != e; ++i)
-          errs() << " op" << i << "=" << (int64_t)Record[i];
+          outs() << " op" << i << "=" << (int64_t)Record[i];
 
-        errs() << "/>";
+        outs() << "/>";
 
         if (BlobStart) {
-          errs() << " blob data = ";
+          outs() << " blob data = ";
           bool BlobIsPrintable = true;
           for (unsigned i = 0; i != BlobLen; ++i)
             if (!isprint(BlobStart[i])) {
@@ -451,12 +449,12 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
             }
 
           if (BlobIsPrintable)
-            errs() << "'" << std::string(BlobStart, BlobStart+BlobLen) <<"'";
+            outs() << "'" << std::string(BlobStart, BlobStart+BlobLen) <<"'";
           else
-            errs() << "unprintable, " << BlobLen << " bytes.";
+            outs() << "unprintable, " << BlobLen << " bytes.";
         }
 
-        errs() << "\n";
+        outs() << "\n";
       }
 
       break;
@@ -485,13 +483,13 @@ static int AnalyzeBitcode() {
   if (MemBuf->getBufferSize() & 3)
     return Error("Bitcode stream should be a multiple of 4 bytes in length");
 
-  unsigned char *BufPtr = (unsigned char *)MemBuf->getBufferStart();
-  unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
+  const unsigned char *BufPtr = (unsigned char *)MemBuf->getBufferStart();
+  const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
 
   // If we have a wrapper header, parse it and ignore the non-bc file contents.
   // The magic number is 0x0B17C0DE stored in little endian.
   if (isBitcodeWrapper(BufPtr, EndBufPtr))
-    if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr))
+    if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
       return Error("Invalid bitcode wrapper header");
 
   BitstreamReader StreamFile(BufPtr, EndBufPtr);
@@ -527,59 +525,58 @@ static int AnalyzeBitcode() {
     ++NumTopBlocks;
   }
 
-  if (Dump) errs() << "\n\n";
+  if (Dump) outs() << "\n\n";
 
   uint64_t BufferSizeBits = (EndBufPtr-BufPtr)*CHAR_BIT;
   // Print a summary of the read file.
-  errs() << "Summary of " << InputFilename << ":\n";
-  errs() << "         Total size: ";
+  outs() << "Summary of " << InputFilename << ":\n";
+  outs() << "         Total size: ";
   PrintSize(BufferSizeBits);
-  errs() << "\n";
-  errs() << "        Stream type: ";
+  outs() << "\n";
+  outs() << "        Stream type: ";
   switch (CurStreamType) {
-  default: assert(0 && "Unknown bitstream type");
-  case UnknownBitstream: errs() << "unknown\n"; break;
-  case LLVMIRBitstream:  errs() << "LLVM IR\n"; break;
+  case UnknownBitstream: outs() << "unknown\n"; break;
+  case LLVMIRBitstream:  outs() << "LLVM IR\n"; break;
   }
-  errs() << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
-  errs() << "\n";
+  outs() << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
+  outs() << "\n";
 
   // Emit per-block stats.
-  errs() << "Per-block Summary:\n";
+  outs() << "Per-block Summary:\n";
   for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
        E = BlockIDStats.end(); I != E; ++I) {
-    errs() << "  Block ID #" << I->first;
+    outs() << "  Block ID #" << I->first;
     if (const char *BlockName = GetBlockName(I->first, StreamFile))
-      errs() << " (" << BlockName << ")";
-    errs() << ":\n";
+      outs() << " (" << BlockName << ")";
+    outs() << ":\n";
 
     const PerBlockIDStats &Stats = I->second;
-    errs() << "      Num Instances: " << Stats.NumInstances << "\n";
-    errs() << "         Total Size: ";
+    outs() << "      Num Instances: " << Stats.NumInstances << "\n";
+    outs() << "         Total Size: ";
     PrintSize(Stats.NumBits);
-    errs() << "\n";
+    outs() << "\n";
     double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
     errs() << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
     if (Stats.NumInstances > 1) {
-      errs() << "       Average Size: ";
+      outs() << "       Average Size: ";
       PrintSize(Stats.NumBits/(double)Stats.NumInstances);
-      errs() << "\n";
-      errs() << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
+      outs() << "\n";
+      outs() << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
              << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
-      errs() << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
+      outs() << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
              << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
-      errs() << "    Tot/Avg Records: " << Stats.NumRecords << "/"
+      outs() << "    Tot/Avg Records: " << Stats.NumRecords << "/"
              << Stats.NumRecords/(double)Stats.NumInstances << "\n";
     } else {
-      errs() << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
-      errs() << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
-      errs() << "        Num Records: " << Stats.NumRecords << "\n";
+      outs() << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
+      outs() << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
+      outs() << "        Num Records: " << Stats.NumRecords << "\n";
     }
     if (Stats.NumRecords) {
       double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
-      errs() << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
+      outs() << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
     }
-    errs() << "\n";
+    outs() << "\n";
 
     // Print a histogram of the codes we see.
     if (!NoHistogram && !Stats.CodeFreq.empty()) {
@@ -590,7 +587,7 @@ static int AnalyzeBitcode() {
       std::stable_sort(FreqPairs.begin(), FreqPairs.end());
       std::reverse(FreqPairs.begin(), FreqPairs.end());
 
-      errs() << "\tRecord Histogram:\n";
+      outs() << "\tRecord Histogram:\n";
       fprintf(stderr, "\t\t  Count    # Bits   %% Abv  Record Kind\n");
       for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
         const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
@@ -610,7 +607,7 @@ static int AnalyzeBitcode() {
         else
           fprintf(stderr, "UnknownCode%d\n", FreqPairs[i].second);
       }
-      errs() << "\n";
+      outs() << "\n";
 
     }
   }
diff --git a/tools/llvm-config/BuildVariables.inc.in b/tools/llvm-config/BuildVariables.inc.in
new file mode 100644
index 000000000000..fe87afb82190
--- /dev/null
+++ b/tools/llvm-config/BuildVariables.inc.in
@@ -0,0 +1,27 @@
+//===-- BuildVariables.inc.in - llvm-config build variables -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is configured by the build system to define the variables
+// llvm-config wants to report to the user, but which can only be determined at
+// build time.
+//
+// The non .in variant of this file has been autogenerated by the LLVM build. Do
+// not edit!
+//
+//===----------------------------------------------------------------------===//
+
+#define LLVM_SRC_ROOT "@LLVM_SRC_ROOT@"
+#define LLVM_OBJ_ROOT "@LLVM_OBJ_ROOT@"
+#define LLVM_CPPFLAGS "@LLVM_CPPFLAGS@"
+#define LLVM_CFLAGS "@LLVM_CFLAGS@"
+#define LLVM_LDFLAGS "@LLVM_LDFLAGS@"
+#define LLVM_CXXFLAGS "@LLVM_CXXFLAGS@"
+#define LLVM_BUILDMODE "@LLVM_BUILDMODE@"
+#define LLVM_TARGETS_BUILT "@LLVM_TARGETS_BUILT@"
+#define LLVM_SYSTEM_LIBS "@LLVM_SYSTEM_LIBS@"
diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
index 60168623f13b..5ad58bf9df47 100644
--- a/tools/llvm-config/CMakeLists.txt
+++ b/tools/llvm-config/CMakeLists.txt
@@ -1,140 +1,42 @@
-include(TestBigEndian)
+set(LLVM_LINK_COMPONENTS support)
 
-include(FindPerl)
-if( NOT PERL_FOUND )
-  message(FATAL_ERROR "Perl required but not found!")
-endif( NOT PERL_FOUND )
-
-set(PERL ${PERL_EXECUTABLE})
-set(VERSION PACKAGE_VERSION)
-set(PREFIX ${CMAKE_INSTALL_PREFIX})
-set(abs_top_srcdir ${LLVM_MAIN_SRC_DIR})
-set(abs_top_builddir ${LLVM_BINARY_DIR})
-execute_process(COMMAND date
-  OUTPUT_VARIABLE LLVM_CONFIGTIME
-  OUTPUT_STRIP_TRAILING_WHITESPACE)
-# LLVM_ON_UNIX and LLVM_ON_WIN32 already set.
-# those are set to blank by `autoconf' on MinGW, so it seems they are not required:
-#set(LLVMGCCDIR "")
-#set(LLVMGCC "")
-#set(LLVMGXX "")
-test_big_endian(IS_BIG_ENDIAN)
-if( IS_BIG_ENDIAN )
-  set(ENDIAN "big")
-else( IS_BIG_ENDIAN )
-  set(ENDIAN "little")
-endif( IS_BIG_ENDIAN )
-set(SHLIBEXT ${LTDL_SHLIB_EXT})
-#EXEEXT already set.
-set(OS "${CMAKE_SYSTEM}")
-set(target "${TARGET_TRIPLE}")
-set(ARCH "${LLVM_NATIVE_ARCH}")
+# We need to generate the BuildVariables.inc file containing values which are
+# only defined when under certain build modes. Unfortunately, that precludes
+# doing this inside CMake so we have to shell out to sed. For now, that means we
+# can't expect to build llvm-config on Window.s
+set(BUILDVARIABLES_SRCPATH ${CMAKE_CURRENT_SOURCE_DIR}/BuildVariables.inc.in)
+set(BUILDVARIABLES_OBJPATH ${CMAKE_CURRENT_BINARY_DIR}/BuildVariables.inc)
+set(SEDSCRIPT_OBJPATH ${CMAKE_CURRENT_BINARY_DIR}/BuildVariables.configure.sed)
 
+# Compute the substitution values for various items.
 get_system_libs(LLVM_SYSTEM_LIBS_LIST)
 foreach(l ${LLVM_SYSTEM_LIBS_LIST})
-  set(LLVM_SYSTEM_LIBS ${LLVM_SYSTEM_LIBS} "-l${l}")
+  set(SYSTEM_LIBS ${SYSTEM_LIBS} "-l${l}")
 endforeach()
-
-foreach(c ${LLVM_TARGETS_TO_BUILD})
-  set(TARGETS_BUILT "${TARGETS_BUILT} ${c}")
-endforeach(c)
-set(TARGETS_TO_BUILD ${TARGETS_BUILT})
-set(TARGET_HAS_JIT "1")  # TODO
-
-# Avoids replacement at config-time:
-set(LLVM_CPPFLAGS "@LLVM_CPPFLAGS@")
-set(LLVM_CFLAGS "@LLVM_CFLAGS@")
-set(LLVM_CXXFLAGS "@LLVM_CXXFLAGS@")
-set(LLVM_LDFLAGS "@LLVM_LDFLAGS@")
-set(LIBS "@LIBS@")
-set(LLVM_BUILDMODE "@LLVM_BUILDMODE@")
-
-configure_file(
-  ${CMAKE_CURRENT_SOURCE_DIR}/llvm-config.in.in
-  ${CMAKE_CURRENT_BINARY_DIR}/llvm-config.in
-  @ONLY
-)
-
-set(LIBDEPS LibDeps.txt)
-set(LIBDEPS_TMP LibDeps.txt.tmp)
-set(FINAL_LIBDEPS FinalLibDeps.txt)
-set(LLVM_CONFIG ${LLVM_TOOLS_BINARY_DIR}/llvm-config)
-set(LLVM_CONFIG_IN ${CMAKE_CURRENT_BINARY_DIR}/llvm-config.in)
-
-if( CMAKE_CROSSCOMPILING )
-  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
-endif()
-
-find_program(NM_PATH nm PATH_SUFFIXES /bin)
-
-if( NOT NM_PATH )
-  message(FATAL_ERROR "`nm' not found")
-endif()
-
-get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
-
-add_custom_command(OUTPUT ${LIBDEPS_TMP}
-  COMMAND ${PERL_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/GenLibDeps.pl -flat ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR} ${NM_PATH} > ${LIBDEPS_TMP}
-  DEPENDS ${llvm_libs}
-  COMMENT "Regenerating ${LIBDEPS_TMP}")
-
-add_custom_command(OUTPUT ${LIBDEPS}
-  COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LIBDEPS_TMP} ${LIBDEPS}
-  DEPENDS ${LIBDEPS_TMP}
-  COMMENT "Updating ${LIBDEPS} if necessary...")
-
-# This must stop the build if find-cycles.pl returns error:
-add_custom_command(OUTPUT ${FINAL_LIBDEPS}
-  COMMAND ${CMAKE_COMMAND} -E remove -f ${FINAL_LIBDEPS} ${FINAL_LIBDEPS}.tmp
-  COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/find-cycles.pl < ${LIBDEPS} > ${FINAL_LIBDEPS}.tmp
-  COMMAND ${CMAKE_COMMAND} -E copy ${FINAL_LIBDEPS}.tmp ${FINAL_LIBDEPS}
-  DEPENDS ${LIBDEPS}
-  COMMENT "Checking for cyclic dependencies between LLVM libraries.")
-
 set(C_FLGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 set(CXX_FLGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 set(CPP_FLGS "${CMAKE_CPP_FLAGS} ${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 
-# We don't want certain flags on the output of
-# llvm-config --cflags --cxxflags
-macro(remove_option_from_llvm_config option)
-  llvm_replace_compiler_option(C_FLGS "${option}" "")
-  llvm_replace_compiler_option(CXX_FLGS "${option}" "")
-  llvm_replace_compiler_option(CPP_FLGS "${option}" "")
-endmacro(remove_option_from_llvm_config)
-remove_option_from_llvm_config("-pedantic")
-remove_option_from_llvm_config("-Wall")
-remove_option_from_llvm_config("-W")
-
-add_custom_command(OUTPUT ${LLVM_CONFIG}
-  COMMAND echo s!@LLVM_CPPFLAGS@!${CPP_FLGS}! > temp.sed
-  COMMAND echo s!@LLVM_CFLAGS@!${C_FLGS}! >> temp.sed
-  COMMAND echo s!@LLVM_CXXFLAGS@!${CXX_FLGS}! >> temp.sed
+add_custom_command(OUTPUT ${BUILDVARIABLES_OBJPATH}
+  COMMAND echo s!@LLVM_SRC_ROOT@!${LLVM_MAIN_SRC_DIR}! > ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_OBJ_ROOT@!${LLVM_BINARY_DIR}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_CPPFLAGS@!${CPP_FLGS}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_CFLAGS@!${C_FLGS}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_CXXFLAGS@!${CXX_FLGS}! >> ${SEDSCRIPT_OBJPATH}
   # TODO: Use general flags for linking! not just for shared libs:
-  COMMAND echo s!@LLVM_LDFLAGS@!${CMAKE_SHARED_LINKER_FLAGS}! >> temp.sed
-  COMMAND echo s!@LIBS@!${LLVM_SYSTEM_LIBS}! >> temp.sed
-  COMMAND echo s!@LLVM_BUILDMODE@!${CMAKE_BUILD_TYPE}! >> temp.sed
-  COMMAND sed -f temp.sed < ${LLVM_CONFIG_IN} > ${LLVM_CONFIG}
-  COMMAND ${CMAKE_COMMAND} -E remove -f temp.sed
-  COMMAND cat ${FINAL_LIBDEPS} >> ${LLVM_CONFIG}
-  COMMAND chmod +x ${LLVM_CONFIG}
+  COMMAND echo s!@LLVM_LDFLAGS@!${CMAKE_SHARED_LINKER_FLAGS}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_BUILDMODE@!${CMAKE_BUILD_TYPE}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_SYSTEM_LIBS@!${SYSTEM_LIBS}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND echo s!@LLVM_TARGETS_BUILT@!${LLVM_TARGETS_TO_BUILD}! >> ${SEDSCRIPT_OBJPATH}
+  COMMAND sed -f ${SEDSCRIPT_OBJPATH} < ${BUILDVARIABLES_SRCPATH} > ${BUILDVARIABLES_OBJPATH}
   VERBATIM
-  DEPENDS ${FINAL_LIBDEPS} ${LLVM_CONFIG_IN}
-  COMMENT "Building llvm-config script."
+  COMMENT "Building BuildVariables.inc include."
   )
 
-add_custom_target(llvm-config.target ALL
-  DEPENDS ${LLVM_CONFIG})
-
-add_dependencies( llvm-config.target ${llvm_libs} )
-
-# Make sure that llvm-config builds before the llvm tools, so we have
-# LibDeps.txt and can use it for updating the hard-coded library
-# dependencies on cmake/modules/LLVMLibDeps.cmake when the tools'
-# build fail due to outdated dependencies:
-set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} llvm-config.target)
+# Add the llvm-config tool.
+add_llvm_tool(llvm-config
+  llvm-config.cpp
+  )
 
-install(FILES ${LLVM_CONFIG}
-  PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE
-  WORLD_READ WORLD_EXECUTE
-  DESTINATION bin)
+# Add the dependency on the generation step.
+add_file_dependencies(${CMAKE_CURRENT_SOURCE_DIR}/llvm-config.cpp ${BUILDVARIABLES_OBJPATH})
diff --git a/tools/llvm-config/Makefile b/tools/llvm-config/Makefile
index c7f7b3234d64..3f11730a37d7 100644
--- a/tools/llvm-config/Makefile
+++ b/tools/llvm-config/Makefile
@@ -1,5 +1,5 @@
-##===- tools/llvm-config/Makefile --------------------------*- Makefile -*-===##
-#
+##===- tools/llvm-config/Makefile---------------------------*- Makefile -*-===##
+# 
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
@@ -7,97 +7,42 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
+LEVEL := ../..
+TOOLNAME := llvm-config
+USEDLIBS := LLVMSupport.a
 
-EXTRA_DIST = LibDeps.txt FinalLibDeps.txt llvm-config.in.in find-cycles.pl
+# We generate sources in the build directory, make sure it is in the include
+# paths.
+INCLUDE_BUILD_DIR := 1
 
-include $(LEVEL)/Makefile.common
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
 
-# If we don't have Perl, we can't generate the library dependencies upon which 
-# llvm-config depends. Therefore, only if we detect perl will we do anything
-# useful.
-ifeq ($(HAVE_PERL),1)
+# Note that we have to use lazy expansion here.
+BUILDVARIABLES_SRCPATH = $(PROJ_SRC_ROOT)/tools/$(TOOLNAME)/BuildVariables.inc.in
+BUILDVARIABLES_OBJPATH = $(ObjDir)/BuildVariables.inc
+BUILT_SOURCES = $(BUILDVARIABLES_OBJPATH)
+
+include $(LEVEL)/Makefile.common
 
 # Combine preprocessor flags (except for -I) and CXX flags.
-SUB_CPPFLAGS = ${CPP.BaseFlags}
-SUB_CFLAGS   = ${CPP.BaseFlags} ${C.Flags}
-SUB_CXXFLAGS = ${CPP.BaseFlags} ${CXX.Flags}
+SUB_CPPFLAGS := ${CPP.BaseFlags}
+SUB_CFLAGS   := ${CPP.BaseFlags} ${C.Flags}
+SUB_CXXFLAGS := ${CPP.BaseFlags} ${CXX.Flags}
 
 # This is blank for now.  We need to be careful about adding stuff here:
 # LDFLAGS tend not to be portable, and we don't currently require the
 # user to use libtool when linking against LLVM.
-SUB_LDFLAGS = 
-
-FinalLibDeps = $(PROJ_OBJ_DIR)/FinalLibDeps.txt
-LibDeps      = $(PROJ_OBJ_DIR)/LibDeps.txt
-LibDepsTemp  = $(PROJ_OBJ_DIR)/LibDeps.txt.tmp
-GenLibDeps   = $(PROJ_SRC_ROOT)/utils/GenLibDeps.pl
-
-$(LibDepsTemp): $(GenLibDeps) $(LibDir) $(wildcard $(LibDir)/*.a $(LibDir)/*.o)
-	$(Echo) "Regenerating LibDeps.txt.tmp"
-	$(Verb) $(PERL) $(GenLibDeps) -flat $(LibDir) "$(NM_PATH)" > $(LibDepsTemp)
-
-$(LibDeps): $(LibDepsTemp)
-	$(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \
-	  $(EchoCmd) Updated LibDeps.txt because dependencies changed )
-
-# Find all the cyclic dependencies between various LLVM libraries, so we
-# don't have to process them at runtime.
-$(FinalLibDeps): find-cycles.pl $(LibDeps)
-	$(Echo) "Checking for cyclic dependencies between LLVM libraries."
-	$(Verb) $(PERL) $< < $(LibDeps) > $@ || rm -f $@
-
-# Rerun our configure substitutions as needed.
-ConfigInIn = $(PROJ_SRC_DIR)/llvm-config.in.in
-llvm-config.in: $(ConfigInIn) $(ConfigStatusScript)
-	$(Verb) cd $(PROJ_OBJ_ROOT) ; \
-		$(ConfigStatusScript) tools/llvm-config/llvm-config.in
-
-llvm-config-perobj: llvm-config.in $(GenLibDeps) $(LibDir) $(wildcard $(LibDir)/*.a)
-	$(Echo) "Generating llvm-config-perobj"
-	$(Verb) $(PERL) $(GenLibDeps) -perobj -flat $(LibDir) "$(NM_PATH)" >PerobjDeps.txt
-	$(Echo) "Checking for cyclic dependencies between LLVM objects."
-	$(Verb) $(PERL) $(PROJ_SRC_DIR)/find-cycles.pl < PerobjDepsIncl.txt > PerobjDepsInclFinal.txt || rm -f $@
-	$(Verb) $(ECHO) 's/@LLVM_CPPFLAGS@/$(subst /,\/,$(SUB_CPPFLAGS))/' \
-	> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_CFLAGS@/$(subst /,\/,$(SUB_CFLAGS))/' \
-	>> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_CXXFLAGS@/$(subst /,\/,$(SUB_CXXFLAGS))/' \
-	>> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_LDFLAGS@/$(subst /,\/,$(SUB_LDFLAGS))/' \
-	>> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_BUILDMODE@/$(subst /,\/,$(BuildMode))/' \
-	>> temp.sed
-	$(Verb) $(SED) -f temp.sed < $< > $@
-	$(Verb) $(RM) temp.sed
-	$(Verb) cat PerobjDepsFinal.txt >> $@
-	$(Verb) chmod +x $@
+SUB_LDFLAGS :=
 
-llvm-config-perobjincl: llvm-config.in $(GenLibDeps) $(LibDir) $(wildcard $(LibDir)/*.a)
-	$(Echo) "Generating llvm-config-perobjincl"
-	$(Verb) $(PERL) $(GenLibDeps) -perobj -perobjincl -flat $(LibDir) "$(NM_PATH)" >PerobjDepsIncl.txt
-	$(Echo) "Checking for cyclic dependencies between LLVM objects."
-	$(Verb) $(PERL) $(PROJ_SRC_DIR)/find-cycles.pl < PerobjDepsIncl.txt > PerobjDepsInclFinal.txt
-	$(Verb) $(ECHO) 's/@LLVM_CPPFLAGS@/$(subst /,\/,$(SUB_CPPFLAGS))/' \
-	> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_CFLAGS@/$(subst /,\/,$(SUB_CFLAGS))/' \
-	>> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_CXXFLAGS@/$(subst /,\/,$(SUB_CXXFLAGS))/' \
-	>> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_LDFLAGS@/$(subst /,\/,$(SUB_LDFLAGS))/' \
-	>> temp.sed
-	$(Verb) $(ECHO) 's/@LLVM_BUILDMODE@/$(subst /,\/,$(BuildMode))/' \
-	>> temp.sed
-	$(Verb) $(SED) -f temp.sed < $< > $@
-	$(Verb) $(RM) temp.sed
-	$(Verb) cat PerobjDepsInclFinal.txt >> $@
-	$(Verb) chmod +x $@
-
-# Build our final script.
-$(ToolDir)/llvm-config: llvm-config.in $(FinalLibDeps)
-	$(Echo) "Building llvm-config script."
-	$(Verb) $(ECHO) 's/@LLVM_CPPFLAGS@/$(subst /,\/,$(SUB_CPPFLAGS))/' \
+$(ObjDir)/BuildVariables.inc: $(BUILDVARIABLES_SRCPATH) Makefile $(ObjDir)/.dir
+	$(Echo) "Building llvm-config BuildVariables.inc file."
+	$(Verb) $(ECHO) 's/@LLVM_SRC_ROOT@/$(subst /,\/,$(LLVM_SRC_ROOT))/' \
 	  > temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_OBJ_ROOT@/$(subst /,\/,$(LLVM_OBJ_ROOT))/' \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_CPPFLAGS@/$(subst /,\/,$(SUB_CPPFLAGS))/' \
+	  >> temp.sed
 	$(Verb) $(ECHO) 's/@LLVM_CFLAGS@/$(subst /,\/,$(SUB_CFLAGS))/' \
 	  >> temp.sed
 	$(Verb) $(ECHO) 's/@LLVM_CXXFLAGS@/$(subst /,\/,$(SUB_CXXFLAGS))/' \
@@ -106,26 +51,9 @@ $(ToolDir)/llvm-config: llvm-config.in $(FinalLibDeps)
 	  >> temp.sed
 	$(Verb) $(ECHO) 's/@LLVM_BUILDMODE@/$(subst /,\/,$(BuildMode))/' \
 	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_SYSTEM_LIBS@/$(subst /,\/,$(LIBS))/' \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_TARGETS_BUILT@/$(subst /,\/,$(TARGETS_TO_BUILD))/' \
+	  >> temp.sed
 	$(Verb) $(SED) -f temp.sed < $< > $@
 	$(Verb) $(RM) temp.sed
-	$(Verb) cat $(FinalLibDeps) >> $@
-	$(Verb) chmod +x $@
-
-else
-# We don't have perl, just generate a dummy llvm-config
-$(ToolDir)/llvm-config:
-	$(Echo) "Building place holder llvm-config script."
-	$(Verb) $(ECHO) 'echo llvm-config: Perl not found so llvm-config could not be generated' >> $@
-	$(Verb) chmod +x $@
-
-endif
-# Hook into the standard Makefile rules.
-all-local:: $(ToolDir)/llvm-config
-clean-local::
-	$(Verb) $(RM) -f $(ToolDir)/llvm-config llvm-config.in $(FinalLibDeps) \
-	  $(LibDeps) GenLibDeps.out
-install-local:: all-local
-	$(Echo) Installing llvm-config
-	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_bindir)
-	$(Verb) $(ScriptInstall) $(ToolDir)/llvm-config $(DESTDIR)$(PROJ_bindir)
-
diff --git a/tools/llvm-config/find-cycles.pl b/tools/llvm-config/find-cycles.pl
deleted file mode 100755
index 5cbf5b4b2776..000000000000
--- a/tools/llvm-config/find-cycles.pl
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/perl
-#
-# Program:  find-cycles.pl
-#
-# Synopsis: Given a list of possibly cyclic dependencies, merge all the
-#           cycles.  This makes it possible to topologically sort the
-#           dependencies between different parts of LLVM.
-#
-# Syntax:   find-cycles.pl < LibDeps.txt > FinalLibDeps.txt
-#
-# Input:    cycmem1: cycmem2 dep1 dep2
-#           cycmem2: cycmem1 dep3 dep4
-#           boring: dep4
-#
-# Output:   cycmem1 cycmem2: dep1 dep2 dep3 dep4
-#           boring: dep4
-#
-# This file was written by Eric Kidd, and is placed into the public domain.
-#
-
-use 5.006;
-use strict;
-use warnings;
-
-my %DEPS;
-my @CYCLES;
-sub find_all_cycles;
-
-# Read our dependency information.
-while (<>) {
-    chomp;
-    my ($module, $dependency_str) = /^\s*([^:]+):\s*(.*)\s*$/;
-    die "Malformed data: $_" unless defined $dependency_str;
-    my @dependencies = split(/ /, $dependency_str);
-    $DEPS{$module} = \@dependencies;
-}
-
-# Partition our raw dependencies into sets of cyclically-connected nodes.
-find_all_cycles();
-
-# Print out the finished cycles, with their dependencies.
-my @output;
-my $cycles_found = 0;
-foreach my $cycle (@CYCLES) {
-    my @modules = sort keys %{$cycle};
-
-    # Merge the dependencies of all modules in this cycle.
-    my %dependencies;
-    foreach my $module (@modules) {
-        @dependencies{@{$DEPS{$module}}} = 1;
-    }
-
-    # Prune the known cyclic dependencies.
-    foreach my $module (@modules) {
-        delete $dependencies{$module};
-    }
-
-    # Warn about possible linker problems.
-    my @archives = grep(/\.a$/, @modules);
-    if (@archives > 1) {
-        $cycles_found = $cycles_found + 1;
-        print STDERR "find-cycles.pl: Circular dependency between *.a files:\n";
-        print STDERR "find-cycles.pl:   ", join(' ', @archives), "\n";
-        push @modules, @archives; # WORKAROUND: Duplicate *.a files. Ick.
-    } elsif (@modules > 1) {
-        $cycles_found = $cycles_found + 1;
-        print STDERR "find-cycles.pl: Circular dependency between *.o files:\n";
-        print STDERR "find-cycles.pl:   ", join(' ', @modules), "\n";
-        push @modules, @modules; # WORKAROUND: Duplicate *.o files. Ick.
-    }
-
-    # Add to our output.  (@modules is already as sorted as we need it to be.)
-    push @output, (join(' ', @modules) . ': ' .
-                   join(' ', sort keys %dependencies) . "\n");
-}
-print sort @output;
-
-exit $cycles_found;
-
-#==========================================================================
-#  Depedency Cycle Support
-#==========================================================================
-#  For now, we have cycles in our dependency graph.  Ideally, each cycle
-#  would be collapsed down to a single *.a file, saving us all this work.
-#
-#  To understand this code, you'll need a working knowledge of Perl 5,
-#  and possibly some quality time with 'man perlref'.
-
-my %SEEN;
-my %CYCLES;
-sub find_cycles ($@);
-sub found_cycles ($@);
-
-sub find_all_cycles {
-    # Find all multi-item cycles.
-    my @modules = sort keys %DEPS;
-    foreach my $module (@modules) { find_cycles($module); }
-
-    # Build fake one-item "cycles" for the remaining modules, so we can
-    # treat them uniformly.
-    foreach my $module (@modules) {
-        unless (defined $CYCLES{$module}) {
-            my %cycle = ($module, 1);
-            $CYCLES{$module} = \%cycle;
-        }
-    }
-
-    # Find all our unique cycles.  We have to do this the hard way because
-    # we apparently can't store hash references as hash keys without making
-    # 'strict refs' sad.
-    my %seen;
-    foreach my $cycle (values %CYCLES) {
-        unless ($seen{$cycle}) {
-            $seen{$cycle} = 1;
-            push @CYCLES, $cycle;
-        }
-    }
-}
-
-# Walk through our graph depth-first (keeping a trail in @path), and report
-# any cycles we find.
-sub find_cycles ($@) {
-    my ($module, @path) = @_;
-    if (str_in_list($module, @path)) {
-        found_cycle($module, @path);
-    } else {
-        return if defined $SEEN{$module};
-        $SEEN{$module} = 1;
-        foreach my $dep (@{$DEPS{$module}}) {
-            find_cycles($dep, @path, $module);
-        }
-    }
-}
-
-# Give a cycle, attempt to merge it with pre-existing cycle data.
-sub found_cycle ($@) {
-    my ($module, @path) = @_;
-
-    # Pop any modules which aren't part of our cycle.
-    while ($path[0] ne $module) { shift @path; }
-    #print join("->", @path, $module) . "\n";
-
-    # Collect the modules in our cycle into a hash.
-    my %cycle;
-    foreach my $item (@path) {
-        $cycle{$item} = 1;
-        if (defined $CYCLES{$item}) {
-            # Looks like we intersect with an existing cycle, so merge
-            # all those in, too.
-            foreach my $old_item (keys %{$CYCLES{$item}}) {
-                $cycle{$old_item} = 1;
-            }
-        }
-    }
-
-    # Update our global cycle table.
-    my $cycle_ref = \%cycle;
-    foreach my $item (keys %cycle) {
-        $CYCLES{$item} = $cycle_ref;
-    }
-    #print join(":", sort keys %cycle) . "\n";
-}
-
-sub str_in_list ($@) {
-    my ($str, @list) = @_;
-    foreach my $item (@list) {
-        return 1 if ($item eq $str);
-    }
-    return 0;
-}
diff --git a/tools/llvm-config/llvm-config.cpp b/tools/llvm-config/llvm-config.cpp
new file mode 100644
index 000000000000..79fd7f8c5aec
--- /dev/null
+++ b/tools/llvm-config/llvm-config.cpp
@@ -0,0 +1,342 @@
+//===-- llvm-config.cpp - LLVM project configuration utility --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool encapsulates information about an LLVM project configuration for
+// use by other project's build environments (to determine installed path,
+// available features, required libraries, etc.).
+//
+// Note that although this tool *may* be used by some parts of LLVM's build
+// itself (i.e., the Makefiles use it to compute required libraries when linking
+// tools), this tool is primarily designed to support external projects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+// Include the build time variables we can report to the user. This is generated
+// at build time from the BuildVariables.inc.in file by the build system.
+#include "BuildVariables.inc"
+
+// Include the component table. This creates an array of struct
+// AvailableComponent entries, which record the component name, library name,
+// and required components for all of the available libraries.
+//
+// Not all components define a library, we also use "library groups" as a way to
+// create entries for pseudo groups like x86 or all-targets.
+#include "LibraryDependencies.inc"
+
+/// \brief Traverse a single component adding to the topological ordering in
+/// \arg RequiredLibs.
+///
+/// \param Name - The component to traverse.
+/// \param ComponentMap - A prebuilt map of component names to descriptors.
+/// \param VisitedComponents [in] [out] - The set of already visited components.
+/// \param RequiredLibs [out] - The ordered list of required libraries.
+static void VisitComponent(StringRef Name,
+                           const StringMap<AvailableComponent*> &ComponentMap,
+                           std::set<AvailableComponent*> &VisitedComponents,
+                           std::vector<StringRef> &RequiredLibs) {
+  // Lookup the component.
+  AvailableComponent *AC = ComponentMap.lookup(Name);
+  assert(AC && "Invalid component name!");
+
+  // Add to the visited table.
+  if (!VisitedComponents.insert(AC).second) {
+    // We are done if the component has already been visited.
+    return;
+  }
+
+  // Otherwise, visit all the dependencies.
+  for (unsigned i = 0; AC->RequiredLibraries[i]; ++i) {
+    VisitComponent(AC->RequiredLibraries[i], ComponentMap, VisitedComponents,
+                   RequiredLibs);
+  }
+
+  // Add to the required library list.
+  if (AC->Library)
+    RequiredLibs.push_back(AC->Library);
+}
+
+/// \brief Compute the list of required libraries for a given list of
+/// components, in an order suitable for passing to a linker (that is, libraries
+/// appear prior to their dependencies).
+///
+/// \param Components - The names of the components to find libraries for.
+/// \param RequiredLibs [out] - On return, the ordered list of libraries that
+/// are required to link the given components.
+void ComputeLibsForComponents(const std::vector<StringRef> &Components,
+                              std::vector<StringRef> &RequiredLibs) {
+  std::set<AvailableComponent*> VisitedComponents;
+
+  // Build a map of component names to information.
+  StringMap<AvailableComponent*> ComponentMap;
+  for (unsigned i = 0; i != array_lengthof(AvailableComponents); ++i) {
+    AvailableComponent *AC = &AvailableComponents[i];
+    ComponentMap[AC->Name] = AC;
+  }
+
+  // Visit the components.
+  for (unsigned i = 0, e = Components.size(); i != e; ++i) {
+    // Users are allowed to provide mixed case component names.
+    std::string ComponentLower = Components[i].lower();
+
+    // Validate that the user supplied a valid component name.
+    if (!ComponentMap.count(ComponentLower)) {
+      llvm::errs() << "llvm-config: unknown component name: " << Components[i]
+                   << "\n";
+      exit(1);
+    }
+
+    VisitComponent(ComponentLower, ComponentMap, VisitedComponents,
+                   RequiredLibs);
+  }
+
+  // The list is now ordered with leafs first, we want the libraries to printed
+  // in the reverse order of dependency.
+  std::reverse(RequiredLibs.begin(), RequiredLibs.end());
+}
+
+/* *** */
+
+void usage() {
+  errs() << "\
+usage: llvm-config <OPTION>... [<COMPONENT>...]\n\
+\n\
+Get various configuration information needed to compile programs which use\n\
+LLVM.  Typically called from 'configure' scripts.  Examples:\n\
+  llvm-config --cxxflags\n\
+  llvm-config --ldflags\n\
+  llvm-config --libs engine bcreader scalaropts\n\
+\n\
+Options:\n\
+  --version         Print LLVM version.\n\
+  --prefix          Print the installation prefix.\n\
+  --src-root        Print the source root LLVM was built from.\n\
+  --obj-root        Print the object root used to build LLVM.\n\
+  --bindir          Directory containing LLVM executables.\n\
+  --includedir      Directory containing LLVM headers.\n\
+  --libdir          Directory containing LLVM libraries.\n\
+  --cppflags        C preprocessor flags for files that include LLVM headers.\n\
+  --cflags          C compiler flags for files that include LLVM headers.\n\
+  --cxxflags        C++ compiler flags for files that include LLVM headers.\n\
+  --ldflags         Print Linker flags.\n\
+  --libs            Libraries needed to link against LLVM components.\n\
+  --libnames        Bare library names for in-tree builds.\n\
+  --libfiles        Fully qualified library filenames for makefile depends.\n\
+  --components      List of all possible components.\n\
+  --targets-built   List of all targets currently built.\n\
+  --host-target     Target triple used to configure LLVM.\n\
+  --build-mode      Print build mode of LLVM tree (e.g. Debug or Release).\n\
+Typical components:\n\
+  all               All LLVM libraries (default).\n\
+  engine            Either a native JIT or a bitcode interpreter.\n";
+  exit(1);
+}
+
+/// \brief Compute the path to the main executable.
+llvm::sys::Path GetExecutablePath(const char *Argv0) {
+  // This just needs to be some symbol in the binary; C++ doesn't
+  // allow taking the address of ::main however.
+  void *P = (void*) (intptr_t) GetExecutablePath;
+  return llvm::sys::Path::GetMainExecutable(Argv0, P);
+}
+
+int main(int argc, char **argv) {
+  std::vector<StringRef> Components;
+  bool PrintLibs = false, PrintLibNames = false, PrintLibFiles = false;
+  bool HasAnyOption = false;
+
+  // llvm-config is designed to support being run both from a development tree
+  // and from an installed path. We try and auto-detect which case we are in so
+  // that we can report the correct information when run from a development
+  // tree.
+  bool IsInDevelopmentTree;
+  enum { MakefileStyle, CMakeStyle, CMakeBuildModeStyle } DevelopmentTreeLayout;
+  llvm::SmallString<256> CurrentPath(GetExecutablePath(argv[0]).str());
+  std::string CurrentExecPrefix;
+  std::string ActiveObjRoot;
+
+  // Create an absolute path, and pop up one directory (we expect to be inside a
+  // bin dir).
+  sys::fs::make_absolute(CurrentPath);
+  CurrentExecPrefix = sys::path::parent_path(
+    sys::path::parent_path(CurrentPath)).str();
+
+  // Check to see if we are inside a development tree by comparing to possible
+  // locations (prefix style or CMake style). This could be wrong in the face of
+  // symbolic links, but is good enough.
+  if (CurrentExecPrefix == std::string(LLVM_OBJ_ROOT) + "/" + LLVM_BUILDMODE) {
+    IsInDevelopmentTree = true;
+    DevelopmentTreeLayout = MakefileStyle;
+
+    // If we are in a development tree, then check if we are in a BuildTools
+    // directory. This indicates we are built for the build triple, but we
+    // always want to provide information for the host triple.
+    if (sys::path::filename(LLVM_OBJ_ROOT) == "BuildTools") {
+      ActiveObjRoot = sys::path::parent_path(LLVM_OBJ_ROOT);
+    } else {
+      ActiveObjRoot = LLVM_OBJ_ROOT;
+    }
+  } else if (CurrentExecPrefix == std::string(LLVM_OBJ_ROOT)) {
+    IsInDevelopmentTree = true;
+    DevelopmentTreeLayout = CMakeStyle;
+    ActiveObjRoot = LLVM_OBJ_ROOT;
+  } else if (CurrentExecPrefix == std::string(LLVM_OBJ_ROOT) + "/bin") {
+    IsInDevelopmentTree = true;
+    DevelopmentTreeLayout = CMakeBuildModeStyle;
+    ActiveObjRoot = LLVM_OBJ_ROOT;
+  } else {
+    IsInDevelopmentTree = false;
+    DevelopmentTreeLayout = MakefileStyle; // Initialized to avoid warnings.
+  }
+
+  // Compute various directory locations based on the derived location
+  // information.
+  std::string ActivePrefix, ActiveBinDir, ActiveIncludeDir, ActiveLibDir;
+  std::string ActiveIncludeOption;
+  if (IsInDevelopmentTree) {
+    ActiveIncludeDir = std::string(LLVM_SRC_ROOT) + "/include";
+    ActivePrefix = CurrentExecPrefix;
+
+    // CMake organizes the products differently than a normal prefix style
+    // layout.
+    switch (DevelopmentTreeLayout) {
+    case MakefileStyle:
+      ActiveBinDir = ActiveObjRoot + "/" + LLVM_BUILDMODE + "/bin";
+      ActiveLibDir = ActiveObjRoot + "/" + LLVM_BUILDMODE + "/lib";
+      break;
+    case CMakeStyle:
+      ActiveBinDir = ActiveObjRoot + "/bin";
+      ActiveLibDir = ActiveObjRoot + "/lib";
+      break;
+    case CMakeBuildModeStyle:
+      ActiveBinDir = ActiveObjRoot + "/bin/" + LLVM_BUILDMODE;
+      ActiveLibDir = ActiveObjRoot + "/lib/" + LLVM_BUILDMODE;
+      break;
+    }
+
+    // We need to include files from both the source and object trees.
+    ActiveIncludeOption = ("-I" + ActiveIncludeDir + " " +
+                           "-I" + ActiveObjRoot + "/include");
+  } else {
+    ActivePrefix = CurrentExecPrefix;
+    ActiveIncludeDir = ActivePrefix + "/include";
+    ActiveBinDir = ActivePrefix + "/bin";
+    ActiveLibDir = ActivePrefix + "/lib";
+    ActiveIncludeOption = "-I" + ActiveIncludeDir;
+  }
+
+  raw_ostream &OS = outs();
+  for (int i = 1; i != argc; ++i) {
+    StringRef Arg = argv[i];
+
+    if (Arg.startswith("-")) {
+      HasAnyOption = true;
+      if (Arg == "--version") {
+        OS << PACKAGE_VERSION << '\n';
+      } else if (Arg == "--prefix") {
+        OS << ActivePrefix << '\n';
+      } else if (Arg == "--bindir") {
+        OS << ActiveBinDir << '\n';
+      } else if (Arg == "--includedir") {
+        OS << ActiveIncludeDir << '\n';
+      } else if (Arg == "--libdir") {
+        OS << ActiveLibDir << '\n';
+      } else if (Arg == "--cppflags") {
+        OS << ActiveIncludeOption << ' ' << LLVM_CPPFLAGS << '\n';
+      } else if (Arg == "--cflags") {
+        OS << ActiveIncludeOption << ' ' << LLVM_CFLAGS << '\n';
+      } else if (Arg == "--cxxflags") {
+        OS << ActiveIncludeOption << ' ' << LLVM_CXXFLAGS << '\n';
+      } else if (Arg == "--ldflags") {
+        OS << "-L" << ActiveLibDir << ' ' << LLVM_LDFLAGS
+           << ' ' << LLVM_SYSTEM_LIBS << '\n';
+      } else if (Arg == "--libs") {
+        PrintLibs = true;
+      } else if (Arg == "--libnames") {
+        PrintLibNames = true;
+      } else if (Arg == "--libfiles") {
+        PrintLibFiles = true;
+      } else if (Arg == "--components") {
+        for (unsigned j = 0; j != array_lengthof(AvailableComponents); ++j) {
+          OS << ' ';
+          OS << AvailableComponents[j].Name;
+        }
+        OS << '\n';
+      } else if (Arg == "--targets-built") {
+        OS << LLVM_TARGETS_BUILT << '\n';
+      } else if (Arg == "--host-target") {
+        OS << LLVM_DEFAULT_TARGET_TRIPLE << '\n';
+      } else if (Arg == "--build-mode") {
+        OS << LLVM_BUILDMODE << '\n';
+      } else if (Arg == "--obj-root") {
+        OS << LLVM_OBJ_ROOT << '\n';
+      } else if (Arg == "--src-root") {
+        OS << LLVM_SRC_ROOT << '\n';
+      } else {
+        usage();
+      }
+    } else {
+      Components.push_back(Arg);
+    }
+  }
+
+  if (!HasAnyOption)
+    usage();
+
+  if (PrintLibs || PrintLibNames || PrintLibFiles) {
+    // If no components were specified, default to "all".
+    if (Components.empty())
+      Components.push_back("all");
+
+    // Construct the list of all the required libraries.
+    std::vector<StringRef> RequiredLibs;
+    ComputeLibsForComponents(Components, RequiredLibs);
+
+    for (unsigned i = 0, e = RequiredLibs.size(); i != e; ++i) {
+      StringRef Lib = RequiredLibs[i];
+      if (i)
+        OS << ' ';
+
+      if (PrintLibNames) {
+        OS << Lib;
+      } else if (PrintLibFiles) {
+        OS << ActiveLibDir << '/' << Lib;
+      } else if (PrintLibs) {
+        // If this is a typical library name, include it using -l.
+        if (Lib.startswith("lib") && Lib.endswith(".a")) {
+          OS << "-l" << Lib.slice(3, Lib.size()-2);
+          continue;
+        }
+
+        // Otherwise, print the full path.
+        OS << ActiveLibDir << '/' << Lib;
+      }
+    }
+    OS << '\n';
+  } else if (!Components.empty()) {
+    errs() << "llvm-config: error: components given, but unused\n\n";
+    usage();
+  }
+
+  return 0;
+}
diff --git a/tools/llvm-config/llvm-config.in.in b/tools/llvm-config/llvm-config.in.in
deleted file mode 100644
index 840a10e23a1e..000000000000
--- a/tools/llvm-config/llvm-config.in.in
+++ /dev/null
@@ -1,463 +0,0 @@
-#!@PERL@
-##===- tools/llvm-config ---------------------------------------*- perl -*-===##
-# 
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-# 
-##===----------------------------------------------------------------------===##
-#
-# Synopsis: Prints out compiler options needed to build against an installed
-#           copy of LLVM.
-#
-# Syntax:   llvm-config OPTIONS... [COMPONENTS...]
-# 
-##===----------------------------------------------------------------------===##
-
-use 5.006;
-use strict;
-use warnings;
-use Cwd 'abs_path';
-
-#---- begin autoconf values ----
-my $PACKAGE_NAME        = q{@PACKAGE_NAME@};
-my $VERSION             = q{@PACKAGE_VERSION@};
-my $PREFIX              = q{@LLVM_PREFIX@};
-my $LLVM_CONFIGTIME     = q{@LLVM_CONFIGTIME@};
-my $LLVM_SRC_ROOT       = q{@abs_top_srcdir@};
-my $LLVM_OBJ_ROOT       = q{@abs_top_builddir@};
-my $ARCH                = lc(q{@ARCH@});
-my $TARGET_TRIPLE       = q{@target@};
-my $TARGETS_TO_BUILD    = q{@TARGETS_TO_BUILD@};
-my $TARGET_HAS_JIT      = q{@TARGET_HAS_JIT@};
-my @TARGETS_BUILT       = map { lc($_) } qw{@TARGETS_TO_BUILD@};
-#---- end autoconf values ----
-
-# Must pretend x86_64 architecture is really x86, otherwise the native backend
-# won't get linked in.
-$ARCH = "x86" if $ARCH eq "x86_64";
-
-#---- begin Makefile values ----
-my $CPPFLAGS            = q{@LLVM_CPPFLAGS@};
-my $CFLAGS              = q{@LLVM_CFLAGS@};
-my $CXXFLAGS            = q{@LLVM_CXXFLAGS@};
-my $LDFLAGS             = q{@LLVM_LDFLAGS@};
-my $SYSTEM_LIBS         = q{@LIBS@};
-my $LLVM_BUILDMODE      = q{@LLVM_BUILDMODE@};
-#---- end Makefile values ----
-
-# Figure out where llvm-config is being run from.  Primarily, we care if it has
-# been installed, or is running from the build directory, which changes the
-# locations of some files.
-
-# Convert the current executable name into its directory (e.g. ".").
-my ($RUN_DIR) = ($0 =~ /^(.*)\/.*$/);
-
-# Turn the directory into an absolute directory on the file system, also pop up
-# from "bin" into the build or prefix dir.
-my $ABS_RUN_DIR = abs_path("$RUN_DIR/..");
-chomp($ABS_RUN_DIR);
-
-# Compute the absolute object directory build, e.g. "foo/llvm/Debug".
-my $ABS_OBJ_ROOT = "$LLVM_OBJ_ROOT/$LLVM_BUILDMODE";
-$ABS_OBJ_ROOT = abs_path("$ABS_OBJ_ROOT") if (-d $ABS_OBJ_ROOT);
-chomp($ABS_OBJ_ROOT);
-
-my $INCLUDEDIR = "$ABS_RUN_DIR/include";
-my $INCLUDEOPTION = "-I$INCLUDEDIR";
-my $LIBDIR     = "$ABS_RUN_DIR/lib";
-my $BINDIR     = "$ABS_RUN_DIR/bin";
-if ($ABS_RUN_DIR eq $ABS_OBJ_ROOT) {
-  # If we are running out of the build directory, the include dir is in the
-  # srcdir.
-  $INCLUDEDIR = "$LLVM_SRC_ROOT/include";
-  # We need include files from both the srcdir and objdir.
-  $INCLUDEOPTION = "-I$INCLUDEDIR -I$LLVM_OBJ_ROOT/include"
-} else {
-  # If installed, ignore the prefix the tree was configured with, use the
-  # current prefix.
-  $PREFIX = $ABS_RUN_DIR;
-}
-
-sub usage;
-sub fix_library_names (@);
-sub fix_library_files (@);
-sub expand_dependencies (@);
-sub name_map_entries;
-
-# Parse our command-line arguments.
-usage if @ARGV == 0;
-my @components;
-my $has_opt = 0;
-my $want_libs = 0;
-my $want_libnames = 0;
-my $want_libfiles = 0;
-my $want_components = 0;
-foreach my $arg (@ARGV) {
-    if ($arg =~ /^-/) {
-        if ($arg eq "--version") {
-            $has_opt = 1; print "$VERSION\n";
-        } elsif ($arg eq "--prefix") {
-            $has_opt = 1; print "$PREFIX\n";
-        } elsif ($arg eq "--bindir") {
-            $has_opt = 1; print "$BINDIR\n";
-        } elsif ($arg eq "--includedir") {
-            $has_opt = 1; print "$INCLUDEDIR\n";
-        } elsif ($arg eq "--libdir") {
-            $has_opt = 1; print "$LIBDIR\n";
-        } elsif ($arg eq "--cppflags") {
-            $has_opt = 1; print "$INCLUDEOPTION $CPPFLAGS\n";
-        } elsif ($arg eq "--cflags") {
-            $has_opt = 1; print "$INCLUDEOPTION $CFLAGS\n";
-        } elsif ($arg eq "--cxxflags") {
-            $has_opt = 1; print "$INCLUDEOPTION $CXXFLAGS\n";
-        } elsif ($arg eq "--ldflags") {
-            $has_opt = 1; print "-L$LIBDIR $LDFLAGS $SYSTEM_LIBS\n";
-        } elsif ($arg eq "--libs") {
-            $has_opt = 1; $want_libs = 1;
-        } elsif ($arg eq "--libnames") {
-            $has_opt = 1; $want_libnames = 1;
-        } elsif ($arg eq "--libfiles") {
-            $has_opt = 1; $want_libfiles = 1;
-        } elsif ($arg eq "--components") {
-            $has_opt = 1; print join(' ', name_map_entries), "\n";
-        } elsif ($arg eq "--targets-built") {
-            $has_opt = 1; print join(' ', @TARGETS_BUILT), "\n";
-        } elsif ($arg eq "--host-target") {
-            $has_opt = 1; print "$TARGET_TRIPLE\n";
-        } elsif ($arg eq "--build-mode") {
-            $has_opt = 1; print "$LLVM_BUILDMODE\n";
-        } elsif ($arg eq "--obj-root") {
-            $has_opt = 1; print abs_path("$LLVM_OBJ_ROOT/");
-        } elsif ($arg eq "--src-root") {
-            $has_opt = 1; print abs_path("$LLVM_SRC_ROOT/");
-        } else {
-            usage();
-        }
-    } else {
-        push @components, $arg;
-    }
-}
-
-# If no options were specified, fail.
-usage unless $has_opt;
-
-# If no components were specified, default to 'all'.
-if (@components == 0) {
-    push @components, 'all';
-}
-
-# Force component names to lower case.
-@components = map lc, @components;
-
-# Handle any arguments which require building our dependency graph.
-if ($want_libs || $want_libnames || $want_libfiles) {
-    my @libs = expand_dependencies(@components);
-    print join(' ', fix_library_names(@libs)), "\n" if ($want_libs);
-    print join(' ',  @libs), "\n" if ($want_libnames);
-    print join(' ', fix_library_files(@libs)), "\n" if ($want_libfiles);
-}
-
-exit 0;
-
-#==========================================================================
-#  Support Routines
-#==========================================================================
-
-sub usage {
-    print STDERR <<__EOD__;
-Usage: llvm-config <OPTION>... [<COMPONENT>...]
-
-Get various configuration information needed to compile programs which use
-LLVM.  Typically called from 'configure' scripts.  Examples:
-  llvm-config --cxxflags
-  llvm-config --ldflags
-  llvm-config --libs engine bcreader scalaropts
-
-Options:
-  --version          Print LLVM version.
-  --prefix           Print the installation prefix.
-  --src-root         Print the source root LLVM was built from.
-  --obj-root         Print the object root used to build LLVM.
-  --bindir           Directory containing LLVM executables.
-  --includedir       Directory containing LLVM headers.
-  --libdir           Directory containing LLVM libraries.
-  --cppflags         C preprocessor flags for files that include LLVM headers.
-  --cflags           C compiler flags for files that include LLVM headers.
-  --cxxflags         C++ compiler flags for files that include LLVM headers.
-  --ldflags          Print Linker flags.
-  --libs             Libraries needed to link against LLVM components.
-  --libnames         Bare library names for in-tree builds.
-  --libfiles         Fully qualified library filenames for makefile depends.
-  --components       List of all possible components.
-  --targets-built    List of all targets currently built.
-  --host-target      Target triple used to configure LLVM.
-  --build-mode       Print build mode of LLVM tree (e.g. Debug or Release).
-Typical components:
-  all                All LLVM libraries (default).
-  backend            Either a native backend or the C backend.
-  engine             Either a native JIT or a bitcode interpreter.
-__EOD__
-    exit(1);
-}
-
-# Use -lfoo instead of libfoo.a whenever possible, and add directories to
-# files which can't be found using -L.
-sub fix_library_names (@) {
-    my @libs = @_;
-    my @result;
-    foreach my $lib (@libs) {
-        # Transform the bare library name appropriately.
-        my ($basename) = ($lib =~ /^lib([^.]*)\.a/);
-        if (defined $basename) {
-            push @result, "-l$basename";
-        } else {
-            push @result, "$LIBDIR/$lib";
-        }
-    }
-    return @result;
-}
-
-# Turn the list of libraries into a list of files.
-sub fix_library_files(@) {
-    my @libs = @_;
-    my @result;
-    foreach my $lib (@libs) {
-        # Transform the bare library name into a filename.
-        push @result, "$LIBDIR/$lib";
-    }
-    return @result;
-}
-
-#==========================================================================
-#  Library Dependency Analysis
-#==========================================================================
-#  Given a few human-readable library names, find all their dependencies
-#  and sort them into an order which the linker will like.  If we packed
-#  our libraries into fewer archives, we could make the linker do much
-#  of this work for us.
-#
-#  Libraries have two different types of names in this code: Human-friendly
-#  "component" names entered on the command-line, and the raw file names
-#  we use internally (and ultimately pass to the linker).
-#
-#  To understand this code, you'll need a working knowledge of Perl 5,
-#  and possibly some quality time with 'man perlref'.
-
-sub load_dependencies;
-sub build_name_map;
-sub have_native_backend;
-sub find_best_engine;
-sub expand_names (@);
-sub find_all_required_sets (@);
-sub find_all_required_sets_helper ($$@);
-
-# Each "set" contains one or more libraries which must be included as a
-# group (due to cyclic dependencies).  Sets are represented as a Perl array
-# reference pointing to a list of internal library names.
-my @SETS;
-
-# Various mapping tables.
-my %LIB_TO_SET_MAP; # Maps internal library names to their sets.
-my %SET_DEPS;       # Maps sets to a list of libraries they depend on.
-my %NAME_MAP;       # Maps human-entered names to internal names.
-
-# Have our dependencies been loaded yet?
-my $DEPENDENCIES_LOADED = 0;
-
-# Given a list of human-friendly component names, translate them into a
-# complete set of linker arguments.
-sub expand_dependencies (@) {
-    my @libs = @_;
-    load_dependencies;
-    my @required_sets = find_all_required_sets(expand_names(@libs));
-    my @sorted_sets = topologically_sort_sets(@required_sets);
-
-    # Expand the library sets into libraries.
-    my @result;
-    foreach my $set (@sorted_sets) { push @result, @{$set}; }
-    return @result;
-}
-
-# Load in the raw dependency data stored at the end of this file.
-sub load_dependencies {
-    return if $DEPENDENCIES_LOADED;
-    $DEPENDENCIES_LOADED = 1;
-    while (<DATA>) {
-        # Parse our line.
-        my ($libs, $deps) = /^\s*([^:]+):\s*(.*)\s*$/;
-        die "Malformed dependency data" unless defined $deps;
-        my @libs = split(' ', $libs);
-        my @deps = split(' ', $deps);
-
-        # Record our dependency data.
-        my $set = \@libs;
-        push @SETS, $set;
-        foreach my $lib (@libs) { $LIB_TO_SET_MAP{$lib} = $set; }
-        $SET_DEPS{$set} = \@deps;
-    }
-    build_name_map;
-}
-
-# Build a map converting human-friendly component names into internal
-# library names.
-sub build_name_map {
-    # Add entries for all the actual libraries.
-    foreach my $set (@SETS) {
-        foreach my $lib (sort @$set) {
-            my $short_name = $lib;
-            $short_name =~ s/^(lib)?LLVM([^.]*)\..*$/$2/;
-            $short_name =~ tr/A-Z/a-z/;
-            $NAME_MAP{$short_name} = [$lib];
-        }
-    }
-
-    # Add target-specific entries
-    foreach my $target (@TARGETS_BUILT) {
-        # FIXME: Temporary, until we don't switch all targets
-        if (defined $NAME_MAP{$target.'asmprinter'}) {
-            $NAME_MAP{$target} = [$target.'info',
-                                  $target.'asmprinter', 
-                                  $target.'codegen']
-        } elsif (defined $NAME_MAP{$target.'codegen'}) {
-          $NAME_MAP{$target} = [$target.'info',
-                                $target.'codegen']
-        } else {
-            $NAME_MAP{$target} = [$target.'info',
-                                  $NAME_MAP{$target}[0]]
-        }
-
-        if (defined $NAME_MAP{$target.'asmparser'}) {
-            push @{$NAME_MAP{$target}},$target.'asmparser'
-        }
-
-        if (defined $NAME_MAP{$target.'disassembler'}) {
-            push @{$NAME_MAP{$target}},$target.'disassembler'
-        }
-    }
-
-    # Add virtual entries.
-    $NAME_MAP{'native'}  = have_native_backend() ? [$ARCH] : [];
-    $NAME_MAP{'nativecodegen'} = have_native_backend() ? [$ARCH.'codegen'] : [];
-    $NAME_MAP{'backend'} = have_native_backend() ? ['native'] : ['cbackend'];
-    $NAME_MAP{'engine'}  = find_best_engine;
-    $NAME_MAP{'all'}     = [name_map_entries];   # Must be last.
-}
-
-# Return true if we have a native backend to use.
-sub have_native_backend {
-    my %BUILT;
-    foreach my $target (@TARGETS_BUILT) { $BUILT{$target} = 1; }
-    return defined $NAME_MAP{$ARCH} && defined $BUILT{$ARCH};
-}
-
-# Find a working subclass of ExecutionEngine for this platform.
-sub find_best_engine {
-    if (have_native_backend && $TARGET_HAS_JIT) {
-        return ['jit', 'native'];
-    } else {
-        return ['interpreter'];
-    }
-}
-
-# Get all the human-friendly component names.
-sub name_map_entries {
-    load_dependencies;
-    return sort keys %NAME_MAP;
-}
-
-# Map human-readable names to internal library names.
-sub expand_names (@) {
-    my @names = @_;
-    my @result;
-    foreach my $name (@names) {
-        if (defined $LIB_TO_SET_MAP{$name}) {
-            # We've hit bottom: An actual library name.
-            push @result, $name;
-        } elsif (defined $NAME_MAP{$name}) {
-            # We've found a short name to expand.
-            push @result, expand_names(@{$NAME_MAP{$name}});
-        } else {
-            print STDERR "llvm-config: unknown component name: $name\n";
-            exit(1);
-        }
-    }
-    return @result;
-}
-
-# Given a list of internal library names, return all sets of libraries which
-# will need to be included by the linker (in no particular order).
-sub find_all_required_sets (@) {
-    my @libs = @_;
-    my %sets_added;
-    my @result;
-    find_all_required_sets_helper(\%sets_added, \@result, @libs);
-    return @result;
-}
-
-# Recursive closures are pretty broken in Perl, so we're going to separate
-# this function from find_all_required_sets and pass in the state we need
-# manually, as references.  Yes, this is fairly unpleasant.
-sub find_all_required_sets_helper ($$@) {
-    my ($sets_added, $result, @libs) = @_;
-    foreach my $lib (@libs) {
-        my $set = $LIB_TO_SET_MAP{$lib};
-        next if defined $$sets_added{$set};
-        $$sets_added{$set} = 1;
-        push @$result, $set;
-        find_all_required_sets_helper($sets_added, $result, @{$SET_DEPS{$set}});
-    }
-}
-
-# Print a list of sets, with a label.  Used for debugging.
-sub print_sets ($@) {
-    my ($label, @sets) = @_;
-    my @output;
-    foreach my $set (@sets) { push @output, join(',', @$set); }
-    print "$label: ", join(';', @output), "\n";
-}
-
-# Returns true if $lib is a key in $added.
-sub has_lib_been_added ($$) {
-    my ($added, $lib) = @_;
-    return defined $$added{$LIB_TO_SET_MAP{$lib}};
-}
-
-# Returns true if all the dependencies of $set appear in $added.
-sub have_all_deps_been_added ($$) {
-    my ($added, $set) = @_;
-    #print_sets("  Checking", $set);
-    #print_sets("     Wants", $SET_DEPS{$set});
-    foreach my $lib (@{$SET_DEPS{$set}}) {
-        return 0 unless has_lib_been_added($added, $lib);
-    }
-    return 1;
-}
-
-# Given a list of sets, topologically sort them using dependencies.
-sub topologically_sort_sets (@) {
-    my @sets = @_;
-    my %added;
-    my @result;
-    SCAN: while (@sets) { # We'll delete items from @sets as we go.
-        #print_sets("So far", reverse(@result));
-        #print_sets("Remaining", @sets);
-        for (my $i = 0; $i < @sets; ++$i) {
-            my $set = $sets[$i];
-            if (have_all_deps_been_added(\%added, $set)) {
-                push @result, $set;
-                $added{$set} = 1;
-                #print "Removing $i.\n";
-                splice(@sets, $i, 1);
-                next SCAN; # Restart our scan.
-            }
-        }
-        die "Can't find a library with no dependencies";
-    }
-    return reverse(@result);
-}
-
-# Our library dependency data will be added after the '__END__' token, and will
-# be read through the magic <DATA> filehandle.
-__END__
diff --git a/tools/llvm-cov/LLVMBuild.txt b/tools/llvm-cov/LLVMBuild.txt
new file mode 100644
index 000000000000..87e00d170f90
--- /dev/null
+++ b/tools/llvm-cov/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-cov/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-cov
+parent = Tools
+required_libraries = Instrumentation
diff --git a/tools/llvm-cov/Makefile b/tools/llvm-cov/Makefile
index bd9fa2ad3d92..2d47ce4d4b65 100644
--- a/tools/llvm-cov/Makefile
+++ b/tools/llvm-cov/Makefile
@@ -7,12 +7,11 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-
-TOOLNAME = llvm-cov
+LEVEL := ../..
+TOOLNAME := llvm-cov
 LINK_COMPONENTS := instrumentation
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-diff/DiffConsumer.cpp b/tools/llvm-diff/DiffConsumer.cpp
index c23e8fb91a1b..05280392a47f 100644
--- a/tools/llvm-diff/DiffConsumer.cpp
+++ b/tools/llvm-diff/DiffConsumer.cpp
@@ -44,6 +44,8 @@ static void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering){
 }
 
 
+void Consumer::anchor() { }
+
 void DiffConsumer::printValue(Value *V, bool isL) {
   if (V->hasName()) {
     out << (isa<GlobalValue>(V) ? '@' : '%') << V->getName();
@@ -64,6 +66,10 @@ void DiffConsumer::printValue(Value *V, bool isL) {
     }
     return;
   }
+  if (isa<Constant>(V)) {
+    out << *V;
+    return;
+  }
 
   unsigned N = contexts.size();
   while (N > 0) {
diff --git a/tools/llvm-diff/DiffConsumer.h b/tools/llvm-diff/DiffConsumer.h
index b95d42713a64..2060fe1c944f 100644
--- a/tools/llvm-diff/DiffConsumer.h
+++ b/tools/llvm-diff/DiffConsumer.h
@@ -29,6 +29,7 @@ namespace llvm {
 
   /// The interface for consumers of difference data.
   class Consumer {
+    virtual void anchor();
   public:
     /// Record that a local context has been entered.  Left and
     /// Right are IR "containers" of some sort which are being
diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp
index b240d8c5da5d..a5a99f5b9c47 100644
--- a/tools/llvm-diff/DifferenceEngine.cpp
+++ b/tools/llvm-diff/DifferenceEngine.cpp
@@ -319,15 +319,19 @@ class FunctionDifferenceEngine {
       bool Difference = false;
 
       DenseMap<ConstantInt*,BasicBlock*> LCases;
-      for (unsigned I = 1, E = LI->getNumCases(); I != E; ++I)
-        LCases[LI->getCaseValue(I)] = LI->getSuccessor(I);
-      for (unsigned I = 1, E = RI->getNumCases(); I != E; ++I) {
-        ConstantInt *CaseValue = RI->getCaseValue(I);
+      
+      for (SwitchInst::CaseIt I = LI->case_begin(), E = LI->case_end();
+           I != E; ++I)
+        LCases[I.getCaseValue()] = I.getCaseSuccessor();
+        
+      for (SwitchInst::CaseIt I = RI->case_begin(), E = RI->case_end();
+           I != E; ++I) {
+        ConstantInt *CaseValue = I.getCaseValue();
         BasicBlock *LCase = LCases[CaseValue];
         if (LCase) {
-          if (TryUnify) tryUnify(LCase, RI->getSuccessor(I));
+          if (TryUnify) tryUnify(LCase, I.getCaseSuccessor());
           LCases.erase(CaseValue);
-        } else if (!Difference) {
+        } else if (Complain || !Difference) {
           if (Complain)
             Engine.logf("right switch has extra case %r") << CaseValue;
           Difference = true;
@@ -628,6 +632,8 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
 
 }
 
+void DifferenceEngine::Oracle::anchor() { }
+
 void DifferenceEngine::diff(Function *L, Function *R) {
   Context C(*this, L, R);
 
diff --git a/tools/llvm-diff/DifferenceEngine.h b/tools/llvm-diff/DifferenceEngine.h
index 5b4f80b99e55..7ea79e430ff5 100644
--- a/tools/llvm-diff/DifferenceEngine.h
+++ b/tools/llvm-diff/DifferenceEngine.h
@@ -50,7 +50,9 @@ namespace llvm {
 
     /// An oracle for answering whether two values are equivalent as
     /// operands.
-    struct Oracle {
+    class Oracle {
+      virtual void anchor();
+    public:
       virtual bool operator()(Value *L, Value *R) = 0;
 
     protected:
diff --git a/tools/llvm-diff/LLVMBuild.txt b/tools/llvm-diff/LLVMBuild.txt
new file mode 100644
index 000000000000..fa06a03353bb
--- /dev/null
+++ b/tools/llvm-diff/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-diff/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-diff
+parent = Tools
+required_libraries = AsmParser BitReader
diff --git a/tools/llvm-diff/Makefile b/tools/llvm-diff/Makefile
index 58e49fa95962..f7fa7159c54f 100644
--- a/tools/llvm-diff/Makefile
+++ b/tools/llvm-diff/Makefile
@@ -7,11 +7,11 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-TOOLNAME = llvm-diff
+LEVEL := ../..
+TOOLNAME := llvm-diff
 LINK_COMPONENTS := asmparser bitreader
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp
index 76853f1e4330..774169bcde17 100644
--- a/tools/llvm-diff/llvm-diff.cpp
+++ b/tools/llvm-diff/llvm-diff.cpp
@@ -38,7 +38,7 @@ static Module *ReadModule(LLVMContext &Context, StringRef Name) {
   SMDiagnostic Diag;
   Module *M = ParseIRFile(Name, Diag, Context);
   if (!M)
-    Diag.Print("llvmdiff", errs());
+    Diag.print("llvm-diff", errs());
   return M;
 }
 
diff --git a/tools/llvm-dis/LLVMBuild.txt b/tools/llvm-dis/LLVMBuild.txt
new file mode 100644
index 000000000000..4525010c1fc9
--- /dev/null
+++ b/tools/llvm-dis/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-dis/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-dis
+parent = Tools
+required_libraries = Analysis BitReader
diff --git a/tools/llvm-dis/Makefile b/tools/llvm-dis/Makefile
index be711000861d..aeeeed0d68c9 100644
--- a/tools/llvm-dis/Makefile
+++ b/tools/llvm-dis/Makefile
@@ -6,12 +6,12 @@
 # License. See LICENSE.TXT for details.
 # 
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
 
-TOOLNAME = llvm-dis
+LEVEL := ../..
+TOOLNAME := llvm-dis
 LINK_COMPONENTS := bitreader analysis
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 9020a527866c..6450ea6ac74b 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Assembly/AssemblyAnnotationWriter.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataStream.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -126,12 +127,19 @@ int main(int argc, char **argv) {
   std::string ErrorMessage;
   std::auto_ptr<Module> M;
 
-  {
-    OwningPtr<MemoryBuffer> BufferPtr;
-    if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr))
-      ErrorMessage = ec.message();
+  // Use the bitcode streaming interface
+  DataStreamer *streamer = getDataFileStreamer(InputFilename, &ErrorMessage);
+  if (streamer) {
+    std::string DisplayFilename;
+    if (InputFilename == "-")
+      DisplayFilename = "<stdin>";
     else
-      M.reset(ParseBitcodeFile(BufferPtr.get(), Context, &ErrorMessage));
+      DisplayFilename = InputFilename;
+    M.reset(getStreamedBitcodeModule(DisplayFilename, streamer, Context,
+                                     &ErrorMessage));
+    if(M.get() != 0 && M->MaterializeAllPermanently(&ErrorMessage)) {
+      M.reset();
+    }
   }
 
   if (M.get() == 0) {
@@ -183,4 +191,3 @@ int main(int argc, char **argv) {
 
   return 0;
 }
-
diff --git a/tools/llvm-dwarfdump/LLVMBuild.txt b/tools/llvm-dwarfdump/LLVMBuild.txt
new file mode 100644
index 000000000000..28b7c4cda480
--- /dev/null
+++ b/tools/llvm-dwarfdump/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-dwarfdump/LLVMBuild.txt ---------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-dwarfdump
+parent = Tools
+required_libraries = DebugInfo Object
diff --git a/tools/llvm-dwarfdump/Makefile b/tools/llvm-dwarfdump/Makefile
index e61f27d298b9..7ca1a8d877d0 100644
--- a/tools/llvm-dwarfdump/Makefile
+++ b/tools/llvm-dwarfdump/Makefile
@@ -6,12 +6,12 @@
 # License. See LICENSE.TXT for details.
 #
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
 
-TOOLNAME = llvm-dwarfdump
-LINK_COMPONENTS = DebugInfo Object
+LEVEL := ../..
+TOOLNAME := llvm-dwarfdump
+LINK_COMPONENTS := DebugInfo Object
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-extract/LLVMBuild.txt b/tools/llvm-extract/LLVMBuild.txt
new file mode 100644
index 000000000000..1b1a4c36cdd1
--- /dev/null
+++ b/tools/llvm-extract/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-extract/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-extract
+parent = Tools
+required_libraries = AsmParser BitReader BitWriter IPO
diff --git a/tools/llvm-extract/Makefile b/tools/llvm-extract/Makefile
index 5672aa3299a2..a1e93f5ce468 100644
--- a/tools/llvm-extract/Makefile
+++ b/tools/llvm-extract/Makefile
@@ -7,12 +7,11 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-
-TOOLNAME = llvm-extract
+LEVEL := ../..
+TOOLNAME := llvm-extract
 LINK_COMPONENTS := ipo bitreader bitwriter asmparser
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index f6227ee25553..2ed11c52b2b3 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -90,7 +90,7 @@ int main(int argc, char **argv) {
   M.reset(getLazyIRFileModule(InputFilename, Err, Context));
 
   if (M.get() == 0) {
-    Err.Print(argv[0], errs());
+    Err.print(argv[0], errs());
     return 1;
   }
 
@@ -99,7 +99,7 @@ int main(int argc, char **argv) {
 
   // Figure out which globals we should extract.
   for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) {
-    GlobalValue *GV = M.get()->getNamedGlobal(ExtractGlobals[i]);
+    GlobalValue *GV = M->getNamedGlobal(ExtractGlobals[i]);
     if (!GV) {
       errs() << argv[0] << ": program doesn't contain global named '"
              << ExtractGlobals[i] << "'!\n";
@@ -117,8 +117,8 @@ int main(int argc, char **argv) {
         "invalid regex: " << Error;
     }
     bool match = false;
-    for (Module::global_iterator GV = M.get()->global_begin(), 
-           E = M.get()->global_end(); GV != E; GV++) {
+    for (Module::global_iterator GV = M->global_begin(),
+           E = M->global_end(); GV != E; GV++) {
       if (RegEx.match(GV->getName())) {
         GVs.insert(&*GV);
         match = true;
@@ -133,7 +133,7 @@ int main(int argc, char **argv) {
 
   // Figure out which functions we should extract.
   for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) {
-    GlobalValue *GV = M.get()->getFunction(ExtractFuncs[i]);
+    GlobalValue *GV = M->getFunction(ExtractFuncs[i]);
     if (!GV) {
       errs() << argv[0] << ": program doesn't contain function named '"
              << ExtractFuncs[i] << "'!\n";
@@ -151,7 +151,7 @@ int main(int argc, char **argv) {
         "invalid regex: " << Error;
     }
     bool match = false;
-    for (Module::iterator F = M.get()->begin(), E = M.get()->end(); F != E; 
+    for (Module::iterator F = M->begin(), E = M->end(); F != E;
          F++) {
       if (RegEx.match(F->getName())) {
         GVs.insert(&*F);
diff --git a/tools/llvm-ld/CMakeLists.txt b/tools/llvm-ld/CMakeLists.txt
index 370bcb4abf52..d328a04b0ebe 100644
--- a/tools/llvm-ld/CMakeLists.txt
+++ b/tools/llvm-ld/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS ipo scalaropts linker archive bitwriter)
+set(LLVM_LINK_COMPONENTS ipo scalaropts linker archive bitwriter vectorize)
 
 add_llvm_tool(llvm-ld
   Optimize.cpp
diff --git a/tools/llvm-ld/LLVMBuild.txt b/tools/llvm-ld/LLVMBuild.txt
new file mode 100644
index 000000000000..eed0452dcf24
--- /dev/null
+++ b/tools/llvm-ld/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-ld/LLVMBuild.txt ----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-ld
+parent = Tools
+required_libraries = Archive BitWriter IPO Linker Scalar
diff --git a/tools/llvm-ld/Makefile b/tools/llvm-ld/Makefile
index 1ef9bf117450..8793ca9c1074 100644
--- a/tools/llvm-ld/Makefile
+++ b/tools/llvm-ld/Makefile
@@ -7,9 +7,8 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-
-TOOLNAME = llvm-ld
-LINK_COMPONENTS = ipo scalaropts linker archive bitwriter
+LEVEL := ../..
+TOOLNAME := llvm-ld
+LINK_COMPONENTS := ipo scalaropts linker archive bitwriter vectorize
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-ld/llvm-ld.cpp b/tools/llvm-ld/llvm-ld.cpp
index 6b4c3c7728e5..ecf0476eb291 100644
--- a/tools/llvm-ld/llvm-ld.cpp
+++ b/tools/llvm-ld/llvm-ld.cpp
@@ -37,7 +37,6 @@
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Config/config.h"
 #include <memory>
 #include <cstring>
 using namespace llvm;
@@ -424,7 +423,7 @@ static void EmitShellScript(char **argv, Module *M) {
     PrintAndExit(ErrMsg, M);
 
   return;
-#endif
+#else
 
   // Output the script to start the program...
   std::string ErrorInfo;
@@ -470,6 +469,7 @@ static void EmitShellScript(char **argv, Module *M) {
   }
   Out2.os() << "    "  << BitcodeOutputFilename << " ${1+\"$@\"}\n";
   Out2.keep();
+#endif
 }
 
 // BuildLinkItems -- This function generates a LinkItemList for the LinkItems
diff --git a/tools/llvm-link/LLVMBuild.txt b/tools/llvm-link/LLVMBuild.txt
new file mode 100644
index 000000000000..6399dede784e
--- /dev/null
+++ b/tools/llvm-link/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-link/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-link
+parent = Tools
+required_libraries = AsmParser BitReader BitWriter Linker
diff --git a/tools/llvm-link/Makefile b/tools/llvm-link/Makefile
index 26370187c55f..2553db0cd39c 100644
--- a/tools/llvm-link/Makefile
+++ b/tools/llvm-link/Makefile
@@ -6,12 +6,12 @@
 # License. See LICENSE.TXT for details.
 # 
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
 
-TOOLNAME = llvm-link
-LINK_COMPONENTS = linker bitreader bitwriter asmparser
+LEVEL := ../..
+TOOLNAME := llvm-link
+LINK_COMPONENTS := linker bitreader bitwriter asmparser
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index 95ad1ca5a17e..378a83368f37 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -69,7 +69,7 @@ static inline std::auto_ptr<Module> LoadFile(const char *argv0,
   Result = ParseIRFile(FNStr, Err, Context);
   if (Result) return std::auto_ptr<Module>(Result);   // Load successful!
 
-  Err.Print(argv0, errs());
+  Err.print(argv0, errs());
   return std::auto_ptr<Module>();
 }
 
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index a9381b591a1f..a8cd7c1c897f 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -21,6 +21,8 @@
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Triple.h"
@@ -72,14 +74,16 @@ static bool PrintInsts(const MCDisassembler &DisAsm,
     switch (S) {
     case MCDisassembler::Fail:
       SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
-                      "invalid instruction encoding", "warning");
+                      SourceMgr::DK_Warning,
+                      "invalid instruction encoding");
       if (Size == 0)
         Size = 1; // skip illegible bytes
       break;
 
     case MCDisassembler::SoftFail:
       SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
-                      "potentially undefined instruction encoding", "warning");
+                      SourceMgr::DK_Warning,
+                      "potentially undefined instruction encoding");
       // Fall through
 
     case MCDisassembler::Success:
@@ -125,8 +129,8 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
     unsigned ByteVal;
     if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
       // If we have an error, print it and skip to the end of line.
-      SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
-                      "invalid input token", "error");
+      SM.PrintMessage(SMLoc::getFromPointer(Value.data()), SourceMgr::DK_Error,
+                      "invalid input token");
       Str = Str.substr(Str.find('\n'));
       ByteArray.clear();
       continue;
@@ -153,21 +157,34 @@ int Disassembler::disassemble(const Target &T,
     return -1;
   }
 
-  OwningPtr<const MCSubtargetInfo> STI(T.createMCSubtargetInfo(Triple, Cpu, FeaturesStr));
+  OwningPtr<const MCSubtargetInfo> STI(T.createMCSubtargetInfo(Triple, Cpu,
+                                                               FeaturesStr));
   if (!STI) {
     errs() << "error: no subtarget info for target " << Triple << "\n";
     return -1;
   }
-  
+
   OwningPtr<const MCDisassembler> DisAsm(T.createMCDisassembler(*STI));
   if (!DisAsm) {
     errs() << "error: no disassembler for target " << Triple << "\n";
     return -1;
   }
 
+  OwningPtr<const MCRegisterInfo> MRI(T.createMCRegInfo(Triple));
+  if (!MRI) {
+    errs() << "error: no register info for target " << Triple << "\n";
+    return -1;
+  }
+
+  OwningPtr<const MCInstrInfo> MII(T.createMCInstrInfo());
+  if (!MII) {
+    errs() << "error: no instruction info for target " << Triple << "\n";
+    return -1;
+  }
+
   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
-  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(AsmPrinterVariant,
-                                                    *AsmInfo, *STI));
+  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(AsmPrinterVariant, *AsmInfo,
+                                                    *MII, *MRI, *STI));
   if (!IP) {
     errs() << "error: no instruction printer for target " << Triple << '\n';
     return -1;
@@ -247,7 +264,6 @@ int Disassembler::disassembleEnhanced(const std::string &TS,
     break;
   }
 
-  EDDisassembler::initialize();
   OwningPtr<EDDisassembler>
     disassembler(EDDisassembler::getDisassembler(TS.c_str(), AS));
 
@@ -294,7 +310,6 @@ int Disassembler::disassembleEnhanced(const std::string &TS,
         Out << operandIndex << "-";
 
       switch (token->type()) {
-      default: Out << "?"; break;
       case EDToken::kTokenWhitespace: Out << "w"; break;
       case EDToken::kTokenPunctuation: Out << "p"; break;
       case EDToken::kTokenOpcode: Out << "o"; break;
@@ -365,4 +380,3 @@ int Disassembler::disassembleEnhanced(const std::string &TS,
 
   return 0;
 }
-
diff --git a/tools/llvm-mc/LLVMBuild.txt b/tools/llvm-mc/LLVMBuild.txt
new file mode 100644
index 000000000000..dff5358f3858
--- /dev/null
+++ b/tools/llvm-mc/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-mc/LLVMBuild.txt ----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-mc
+parent = Tools
+required_libraries = MC MCDisassembler MCParser Support all-targets
diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile
index 934a6e4dd081..b147fadb5747 100644
--- a/tools/llvm-mc/Makefile
+++ b/tools/llvm-mc/Makefile
@@ -7,18 +7,11 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-TOOLNAME = llvm-mc
+LEVEL := ../..
+TOOLNAME := llvm-mc
+LINK_COMPONENTS := all-targets MCDisassembler MCParser MC support
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
-
-# Include this here so we can get the configuration of the targets
-# that have been configured for construction. We have to do this 
-# early so we can set up LINK_COMPONENTS before including Makefile.rules
-include $(LEVEL)/Makefile.config
-
-LINK_COMPONENTS := $(TARGETS_TO_BUILD) MCDisassembler MCParser MC support
-
-include $(LLVM_SRC_ROOT)/Makefile.rules
+TOOL_NO_EXPORTS := 1
 
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 5fb3fdf5b5f1..d882e01529de 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -70,9 +70,6 @@ RelaxAll("mc-relax-all", cl::desc("Relax all fixups"));
 static cl::opt<bool>
 NoExecStack("mc-no-exec-stack", cl::desc("File doesn't need an exec stack"));
 
-static cl::opt<bool>
-EnableLogging("enable-api-logging", cl::desc("Enable MC API logging"));
-
 enum OutputFileType {
   OFT_Null,
   OFT_AssemblyFile,
@@ -152,6 +149,10 @@ NoInitialTextSection("n", cl::desc("Don't assume assembly file starts "
 static cl::opt<bool>
 SaveTempLabels("L", cl::desc("Don't discard temporary labels"));
 
+static cl::opt<bool>
+GenDwarfForAssembly("g", cl::desc("Generate dwarf debugging info for assembly "
+                                  "source files"));
+
 enum ActionType {
   AC_AsLex,
   AC_Assemble,
@@ -175,7 +176,7 @@ Action(cl::desc("Action to perform:"),
 static const Target *GetTarget(const char *ProgName) {
   // Figure out the target triple.
   if (TripleName.empty())
-    TripleName = sys::getHostTriple();
+    TripleName = sys::getDefaultTargetTriple();
   Triple TheTriple(Triple::normalize(TripleName));
 
   const Target *TheTarget = 0;
@@ -230,6 +231,17 @@ static tool_output_file *GetOutputStream() {
   return Out;
 }
 
+static std::string DwarfDebugFlags;
+static void setDwarfDebugFlags(int argc, char **argv) {
+  if (!getenv("RC_DEBUG_OPTIONS"))
+    return;
+  for (int i = 0; i < argc; i++) {
+    DwarfDebugFlags += argv[i];
+    if (i + 1 < argc)
+      DwarfDebugFlags += " ";
+  }
+}
+
 static int AsLexInput(const char *ProgName) {
   OwningPtr<MemoryBuffer> BufferPtr;
   if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr)) {
@@ -267,7 +279,8 @@ static int AsLexInput(const char *ProgName) {
 
     switch (Tok.getKind()) {
     default:
-      SrcMgr.PrintMessage(Lexer.getLoc(), "unknown token", "warning");
+      SrcMgr.PrintMessage(Lexer.getLoc(), SourceMgr::DK_Warning,
+                          "unknown token");
       Error = true;
       break;
     case AsmToken::Error:
@@ -370,12 +383,16 @@ static int AssembleInput(const char *ProgName) {
   // FIXME: This is not pretty. MCContext has a ptr to MCObjectFileInfo and
   // MCObjectFileInfo needs a MCContext reference in order to initialize itself.
   OwningPtr<MCObjectFileInfo> MOFI(new MCObjectFileInfo());
-  MCContext Ctx(*MAI, *MRI, MOFI.get());
+  MCContext Ctx(*MAI, *MRI, MOFI.get(), &SrcMgr);
   MOFI->InitMCObjectFileInfo(TripleName, RelocModel, CMModel, Ctx);
 
   if (SaveTempLabels)
     Ctx.setAllowTemporaryLabels(false);
 
+  Ctx.setGenDwarfForAssembly(GenDwarfForAssembly);
+  if (!DwarfDebugFlags.empty()) 
+    Ctx.setDwarfDebugFlags(StringRef(DwarfDebugFlags));
+
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
   if (MAttrs.size()) {
@@ -399,7 +416,7 @@ static int AssembleInput(const char *ProgName) {
   // FIXME: There is a bit of code duplication with addPassesToEmitFile.
   if (FileType == OFT_AssemblyFile) {
     MCInstPrinter *IP =
-      TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *STI);
+      TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI, *MCII, *MRI, *STI);
     MCCodeEmitter *CE = 0;
     MCAsmBackend *MAB = 0;
     if (ShowEncoding) {
@@ -408,8 +425,10 @@ static int AssembleInput(const char *ProgName) {
     }
     Str.reset(TheTarget->createAsmStreamer(Ctx, FOS, /*asmverbose*/true,
                                            /*useLoc*/ true,
-                                           /*useCFI*/ true, IP, CE, MAB,
-                                           ShowInst));
+                                           /*useCFI*/ true,
+                                           /*useDwarfDirectory*/ true,
+                                           IP, CE, MAB, ShowInst));
+
   } else if (FileType == OFT_Null) {
     Str.reset(createNullStreamer(Ctx));
   } else {
@@ -421,10 +440,6 @@ static int AssembleInput(const char *ProgName) {
                                                 NoExecStack));
   }
 
-  if (EnableLogging) {
-    Str.reset(createLoggingStreamer(Str.take(), errs()));
-  }
-
   OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr, Ctx,
                                                   *Str.get(), *MAI));
   OwningPtr<MCTargetAsmParser> TAP(TheTarget->createMCAsmParser(*STI, *Parser));
@@ -497,11 +512,14 @@ int main(int argc, char **argv) {
   llvm::InitializeAllAsmParsers();
   llvm::InitializeAllDisassemblers();
 
+  // Register the target printer for --version.
+  cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
+
   cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
   TripleName = Triple::normalize(TripleName);
+  setDwarfDebugFlags(argc, argv);
 
   switch (Action) {
-  default:
   case AC_AsLex:
     return AsLexInput(argv[0]);
   case AC_Assemble:
@@ -511,7 +529,4 @@ int main(int argc, char **argv) {
   case AC_EDisassemble:
     return DisassembleInput(argv[0], true);
   }
-
-  return 0;
 }
-
diff --git a/tools/llvm-nm/LLVMBuild.txt b/tools/llvm-nm/LLVMBuild.txt
new file mode 100644
index 000000000000..38ecbfd2e6f5
--- /dev/null
+++ b/tools/llvm-nm/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-nm/LLVMBuild.txt ----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-nm
+parent = Tools
+required_libraries = Archive BitReader Object
diff --git a/tools/llvm-nm/Makefile b/tools/llvm-nm/Makefile
index 6bb4cd4acc23..d9cee989950d 100644
--- a/tools/llvm-nm/Makefile
+++ b/tools/llvm-nm/Makefile
@@ -6,12 +6,12 @@
 # License. See LICENSE.TXT for details.
 # 
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
 
-TOOLNAME = llvm-nm
-LINK_COMPONENTS = archive bitreader object
+LEVEL := ../..
+TOOLNAME := llvm-nm
+LINK_COMPONENTS := archive bitreader object
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index e79d72d1933c..8d9e51e56ebd 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Format.h"
@@ -60,6 +61,12 @@ namespace {
   cl::alias UndefinedOnly2("u", cl::desc("Alias for --undefined-only"),
                            cl::aliasopt(UndefinedOnly));
 
+  cl::opt<bool> DynamicSyms("dynamic",
+                             cl::desc("Display the dynamic symbols instead "
+                                      "of normal symbols."));
+  cl::alias DynamicSyms2("D", cl::desc("Alias for --dynamic"),
+                         cl::aliasopt(DynamicSyms));
+
   cl::opt<bool> DefinedOnly("defined-only",
                             cl::desc("Show only defined symbols"));
 
@@ -110,6 +117,19 @@ namespace {
   std::string ToolName;
 }
 
+
+static void error(Twine message, Twine path = Twine()) {
+  errs() << ToolName << ": " << path << ": " << message << ".\n";
+}
+
+static bool error(error_code ec, Twine path = Twine()) {
+  if (ec) {
+    error(ec.message(), path);
+    return true;
+  }
+  return false;
+}
+
 namespace {
   struct NMSymbol {
     uint64_t  Address;
@@ -144,14 +164,6 @@ namespace {
   StringRef CurrentFilename;
   typedef std::vector<NMSymbol> SymbolListT;
   SymbolListT SymbolList;
-
-  bool error(error_code ec) {
-    if (!ec) return false;
-
-    outs() << ToolName << ": error reading file: " << ec.message() << ".\n";
-    outs().flush();
-    return true;
-  }
 }
 
 static void SortAndPrintSymbolList() {
@@ -192,9 +204,10 @@ static void SortAndPrintSymbolList() {
       strcpy(SymbolSizeStr, "        ");
 
     if (i->Address != object::UnknownAddressOrSize)
-      format("%08"PRIx64, i->Address).print(SymbolAddrStr, sizeof(SymbolAddrStr));
+      format("%08" PRIx64, i->Address).print(SymbolAddrStr,
+                                             sizeof(SymbolAddrStr));
     if (i->Size != object::UnknownAddressOrSize)
-      format("%08"PRIx64, i->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
+      format("%08" PRIx64, i->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
 
     if (OutputFormat == posix) {
       outs() << i->Name << " " << i->TypeChar << " "
@@ -271,13 +284,17 @@ static void DumpSymbolNamesFromModule(Module *M) {
 
 static void DumpSymbolNamesFromObject(ObjectFile *obj) {
   error_code ec;
-  for (symbol_iterator i = obj->begin_symbols(),
-                       e = obj->end_symbols();
-                       i != e; i.increment(ec)) {
+  symbol_iterator ibegin = obj->begin_symbols();
+  symbol_iterator iend = obj->end_symbols();
+  if (DynamicSyms) {
+    ibegin = obj->begin_dynamic_symbols();
+    iend = obj->end_dynamic_symbols();
+  }
+  for (symbol_iterator i = ibegin; i != iend; i.increment(ec)) {
     if (error(ec)) break;
-    bool internal;
-    if (error(i->isInternal(internal))) break;
-    if (!DebugSyms && internal)
+    uint32_t symflags;
+    if (error(i->getFlags(symflags))) break;
+    if (!DebugSyms && (symflags & SymbolRef::SF_FormatSpecific))
       continue;
     NMSymbol s;
     s.Size = object::UnknownAddressOrSize;
@@ -286,7 +303,7 @@ static void DumpSymbolNamesFromObject(ObjectFile *obj) {
       if (error(i->getSize(s.Size))) break;
     }
     if (PrintAddress)
-      if (error(i->getOffset(s.Address))) break;
+      if (error(i->getAddress(s.Address))) break;
     if (error(i->getNMTypeChar(s.TypeChar))) break;
     if (error(i->getName(s.Name))) break;
     SymbolList.push_back(s);
@@ -297,38 +314,39 @@ static void DumpSymbolNamesFromObject(ObjectFile *obj) {
 }
 
 static void DumpSymbolNamesFromFile(std::string &Filename) {
+  if (Filename != "-" && !sys::fs::exists(Filename)) {
+    errs() << ToolName << ": '" << Filename << "': " << "No such file\n";
+    return;
+  }
+
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error(MemoryBuffer::getFileOrSTDIN(Filename, Buffer), Filename))
+    return;
+
+  sys::fs::file_magic magic = sys::fs::identify_magic(Buffer->getBuffer());
+
   LLVMContext &Context = getGlobalContext();
   std::string ErrorMessage;
-  sys::Path aPath(Filename);
-  bool exists;
-  if (sys::fs::exists(aPath.str(), exists) || !exists)
-    errs() << ToolName << ": '" << Filename << "': " << "No such file\n";
-  // Note: Currently we do not support reading an archive from stdin.
-  if (Filename == "-" || aPath.isBitcodeFile()) {
-    OwningPtr<MemoryBuffer> Buffer;
-    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buffer))
-      ErrorMessage = ec.message();
+  if (magic == sys::fs::file_magic::bitcode) {
     Module *Result = 0;
-    if (Buffer.get())
-      Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
-
+    Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
     if (Result) {
       DumpSymbolNamesFromModule(Result);
       delete Result;
-    } else
-      errs() << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
-
-  } else if (aPath.isArchive()) {
-    OwningPtr<Binary> arch;
-    if (error_code ec = object::createBinary(aPath.str(), arch)) {
-      errs() << ToolName << ": " << Filename << ": " << ec.message() << ".\n";
+    } else {
+      error(ErrorMessage, Filename);
       return;
     }
+  } else if (magic == sys::fs::file_magic::archive) {
+    OwningPtr<Binary> arch;
+    if (error(object::createBinary(Buffer.take(), arch), Filename))
+      return;
+
     if (object::Archive *a = dyn_cast<object::Archive>(arch.get())) {
       for (object::Archive::child_iterator i = a->begin_children(),
                                            e = a->end_children(); i != e; ++i) {
         OwningPtr<Binary> child;
-        if (error_code ec = i->getAsBinary(child)) {
+        if (i->getAsBinary(child)) {
           // Try opening it as a bitcode file.
           OwningPtr<MemoryBuffer> buff(i->getBuffer());
           Module *Result = 0;
@@ -347,12 +365,10 @@ static void DumpSymbolNamesFromFile(std::string &Filename) {
         }
       }
     }
-  } else if (aPath.isObjectFile()) {
+  } else if (magic.is_object()) {
     OwningPtr<Binary> obj;
-    if (error_code ec = object::createBinary(aPath.str(), obj)) {
-      errs() << ToolName << ": " << Filename << ": " << ec.message() << ".\n";
+    if (error(object::createBinary(Buffer.take(), obj), Filename))
       return;
-    }
     if (object::ObjectFile *o = dyn_cast<ObjectFile>(obj.get()))
       DumpSymbolNamesFromObject(o);
   } else {
@@ -370,6 +386,10 @@ int main(int argc, char **argv) {
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm symbol table dumper\n");
 
+  // llvm-nm only reads binary files.
+  if (error(sys::Program::ChangeStdinToBinary()))
+    return 1;
+
   ToolName = argv[0];
   if (BSDFormat) OutputFormat = bsd;
   if (POSIXFormat) OutputFormat = posix;
diff --git a/tools/llvm-objdump/LLVMBuild.txt b/tools/llvm-objdump/LLVMBuild.txt
new file mode 100644
index 000000000000..d16c501a6cca
--- /dev/null
+++ b/tools/llvm-objdump/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-objdump/LLVMBuild.txt -----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-objdump
+parent = Tools
+required_libraries = DebugInfo MC MCDisassembler MCParser Object all-targets
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 3f44b295d32c..0e7f3fdebebc 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -14,7 +14,7 @@
 #include "llvm-objdump.h"
 #include "MCFunction.h"
 #include "llvm/Support/MachO.h"
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/STLExtras.h"
@@ -26,6 +26,7 @@
 #include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -53,27 +54,28 @@ static cl::opt<std::string>
 
 static const Target *GetTarget(const MachOObject *MachOObj) {
   // Figure out the target triple.
-  llvm::Triple TT("unknown-unknown-unknown");
-  switch (MachOObj->getHeader().CPUType) {
-  case llvm::MachO::CPUTypeI386:
-    TT.setArch(Triple::ArchType(Triple::x86));
-    break;
-  case llvm::MachO::CPUTypeX86_64:
-    TT.setArch(Triple::ArchType(Triple::x86_64));
-    break;
-  case llvm::MachO::CPUTypeARM:
-    TT.setArch(Triple::ArchType(Triple::arm));
-    break;
-  case llvm::MachO::CPUTypePowerPC:
-    TT.setArch(Triple::ArchType(Triple::ppc));
-    break;
-  case llvm::MachO::CPUTypePowerPC64:
-    TT.setArch(Triple::ArchType(Triple::ppc64));
-    break;
+  if (TripleName.empty()) {
+    llvm::Triple TT("unknown-unknown-unknown");
+    switch (MachOObj->getHeader().CPUType) {
+    case llvm::MachO::CPUTypeI386:
+      TT.setArch(Triple::ArchType(Triple::x86));
+      break;
+    case llvm::MachO::CPUTypeX86_64:
+      TT.setArch(Triple::ArchType(Triple::x86_64));
+      break;
+    case llvm::MachO::CPUTypeARM:
+      TT.setArch(Triple::ArchType(Triple::arm));
+      break;
+    case llvm::MachO::CPUTypePowerPC:
+      TT.setArch(Triple::ArchType(Triple::ppc));
+      break;
+    case llvm::MachO::CPUTypePowerPC64:
+      TT.setArch(Triple::ArchType(Triple::ppc64));
+      break;
+    }
+    TripleName = TT.str();
   }
 
-  TripleName = TT.str();
-
   // Get the target specific parser.
   std::string Error;
   const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
@@ -85,57 +87,43 @@ static const Target *GetTarget(const MachOObject *MachOObj) {
   return 0;
 }
 
-struct Section {
-  char Name[16];
-  uint64_t Address;
-  uint64_t Size;
-  uint32_t Offset;
-  uint32_t NumRelocs;
-  uint64_t RelocTableOffset;
-};
-
-struct Symbol {
-  uint64_t Value;
-  uint32_t StringIndex;
-  uint8_t SectionIndex;
-  bool operator<(const Symbol &RHS) const { return Value < RHS.Value; }
+struct SymbolSorter {
+  bool operator()(const SymbolRef &A, const SymbolRef &B) {
+    SymbolRef::Type AType, BType;
+    A.getType(AType);
+    B.getType(BType);
+
+    uint64_t AAddr, BAddr;
+    if (AType != SymbolRef::ST_Function)
+      AAddr = 0;
+    else
+      A.getAddress(AAddr);
+    if (BType != SymbolRef::ST_Function)
+      BAddr = 0;
+    else
+      B.getAddress(BAddr);
+    return AAddr < BAddr;
+  }
 };
 
-template <typename T>
-static Section copySection(const T &Sect) {
-  Section S;
-  memcpy(S.Name, Sect->Name, 16);
-  S.Address = Sect->Address;
-  S.Size = Sect->Size;
-  S.Offset = Sect->Offset;
-  S.NumRelocs = Sect->NumRelocationTableEntries;
-  S.RelocTableOffset = Sect->RelocationTableOffset;
-  return S;
-}
-
-template <typename T>
-static Symbol copySymbol(const T &STE) {
-  Symbol S;
-  S.StringIndex = STE->StringIndex;
-  S.SectionIndex = STE->SectionIndex;
-  S.Value = STE->Value;
-  return S;
-}
-
 // Print additional information about an address, if available.
-static void DumpAddress(uint64_t Address, ArrayRef<Section> Sections,
+static void DumpAddress(uint64_t Address, ArrayRef<SectionRef> Sections,
                         MachOObject *MachOObj, raw_ostream &OS) {
   for (unsigned i = 0; i != Sections.size(); ++i) {
-    uint64_t addr = Address-Sections[i].Address;
-    if (Sections[i].Address <= Address &&
-        Sections[i].Address + Sections[i].Size > Address) {
-      StringRef bytes = MachOObj->getData(Sections[i].Offset,
-                                          Sections[i].Size);
+    uint64_t SectAddr = 0, SectSize = 0;
+    Sections[i].getAddress(SectAddr);
+    Sections[i].getSize(SectSize);
+    uint64_t addr = SectAddr;
+    if (SectAddr <= Address &&
+        SectAddr + SectSize > Address) {
+      StringRef bytes, name;
+      Sections[i].getContents(bytes);
+      Sections[i].getName(name);
       // Print constant strings.
-      if (!strcmp(Sections[i].Name, "__cstring"))
+      if (!name.compare("__cstring"))
         OS << '"' << bytes.substr(addr, bytes.find('\0', addr)) << '"';
       // Print constant CFStrings.
-      if (!strcmp(Sections[i].Name, "__cfstring"))
+      if (!name.compare("__cfstring"))
         OS << "@\"" << bytes.substr(addr, bytes.find('\0', addr)) << '"';
     }
   }
@@ -212,59 +200,34 @@ static void emitDOTFile(const char *FileName, const MCFunction &f,
 }
 
 static void getSectionsAndSymbols(const macho::Header &Header,
-                                  MachOObject *MachOObj,
+                                  MachOObjectFile *MachOObj,
                              InMemoryStruct<macho::SymtabLoadCommand> *SymtabLC,
-                                  std::vector<Section> &Sections,
-                                  std::vector<Symbol> &Symbols,
+                                  std::vector<SectionRef> &Sections,
+                                  std::vector<SymbolRef> &Symbols,
                                   SmallVectorImpl<uint64_t> &FoundFns) {
-  // Make a list of all symbols in the object file.
-  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
-    const MachOObject::LoadCommandInfo &LCI = MachOObj->getLoadCommandInfo(i);
-    if (LCI.Command.Type == macho::LCT_Segment) {
-      InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
-      MachOObj->ReadSegmentLoadCommand(LCI, SegmentLC);
-
-      // Store the sections in this segment.
-      for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
-        InMemoryStruct<macho::Section> Sect;
-        MachOObj->ReadSection(LCI, SectNum, Sect);
-        Sections.push_back(copySection(Sect));
+  error_code ec;
+  for (symbol_iterator SI = MachOObj->begin_symbols(),
+       SE = MachOObj->end_symbols(); SI != SE; SI.increment(ec))
+    Symbols.push_back(*SI);
+
+  for (section_iterator SI = MachOObj->begin_sections(),
+       SE = MachOObj->end_sections(); SI != SE; SI.increment(ec)) {
+    SectionRef SR = *SI;
+    StringRef SectName;
+    SR.getName(SectName);
+    Sections.push_back(*SI);
+  }
 
-      }
-    } else if (LCI.Command.Type == macho::LCT_Segment64) {
-      InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
-      MachOObj->ReadSegment64LoadCommand(LCI, Segment64LC);
-
-      // Store the sections in this segment.
-      for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections;
-          ++SectNum) {
-        InMemoryStruct<macho::Section64> Sect64;
-        MachOObj->ReadSection64(LCI, SectNum, Sect64);
-        Sections.push_back(copySection(Sect64));
-      }
-    } else if (LCI.Command.Type == macho::LCT_FunctionStarts) {
+  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
+    const MachOObject::LoadCommandInfo &LCI =
+       MachOObj->getObject()->getLoadCommandInfo(i);
+    if (LCI.Command.Type == macho::LCT_FunctionStarts) {
       // We found a function starts segment, parse the addresses for later
       // consumption.
       InMemoryStruct<macho::LinkeditDataLoadCommand> LLC;
-      MachOObj->ReadLinkeditDataLoadCommand(LCI, LLC);
+      MachOObj->getObject()->ReadLinkeditDataLoadCommand(LCI, LLC);
 
-      MachOObj->ReadULEB128s(LLC->DataOffset, FoundFns);
-    }
-  }
-  // Store the symbols.
-  if (SymtabLC) {
-    for (unsigned i = 0; i != (*SymtabLC)->NumSymbolTableEntries; ++i) {
-      if (MachOObj->is64Bit()) {
-        InMemoryStruct<macho::Symbol64TableEntry> STE;
-        MachOObj->ReadSymbol64TableEntry((*SymtabLC)->SymbolTableOffset, i,
-                                         STE);
-        Symbols.push_back(copySymbol(STE));
-      } else {
-        InMemoryStruct<macho::SymbolTableEntry> STE;
-        MachOObj->ReadSymbolTableEntry((*SymtabLC)->SymbolTableOffset, i,
-                                       STE);
-        Symbols.push_back(copySymbol(STE));
-      }
+      MachOObj->getObject()->ReadULEB128s(LLC->DataOffset, FoundFns);
     }
   }
 }
@@ -277,9 +240,11 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
     return;
   }
 
-  OwningPtr<MachOObject> MachOObj(MachOObject::LoadFromBuffer(Buff.take()));
+  OwningPtr<MachOObjectFile> MachOOF(static_cast<MachOObjectFile*>(
+        ObjectFile::createMachOObjectFile(Buff.take())));
+  MachOObject *MachOObj = MachOOF->getObject();
 
-  const Target *TheTarget = GetTarget(MachOObj.get());
+  const Target *TheTarget = GetTarget(MachOObj);
   if (!TheTarget) {
     // GetTarget prints out stuff.
     return;
@@ -293,9 +258,11 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
   OwningPtr<const MCSubtargetInfo>
     STI(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
   OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI));
+  OwningPtr<const MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
-  OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
-                              AsmPrinterVariant, *AsmInfo, *STI));
+  OwningPtr<MCInstPrinter>
+    IP(TheTarget->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, *InstrInfo,
+                                      *MRI, *STI));
 
   if (!InstrAnalysis || !AsmInfo || !STI || !DisAsm || !IP) {
     errs() << "error: couldn't initialize disassembler for target "
@@ -322,17 +289,17 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
   MachOObj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
   MachOObj->RegisterStringTable(*SymtabLC);
 
-  std::vector<Section> Sections;
-  std::vector<Symbol> Symbols;
+  std::vector<SectionRef> Sections;
+  std::vector<SymbolRef> Symbols;
   SmallVector<uint64_t, 8> FoundFns;
 
-  getSectionsAndSymbols(Header, MachOObj.get(), &SymtabLC, Sections, Symbols,
+  getSectionsAndSymbols(Header, MachOOF.get(), &SymtabLC, Sections, Symbols,
                         FoundFns);
 
   // Make a copy of the unsorted symbol list. FIXME: duplication
-  std::vector<Symbol> UnsortedSymbols(Symbols);
+  std::vector<SymbolRef> UnsortedSymbols(Symbols);
   // Sort the symbols by address, just in case they didn't come in that way.
-  array_pod_sort(Symbols.begin(), Symbols.end());
+  std::sort(Symbols.begin(), Symbols.end(), SymbolSorter());
 
 #ifndef NDEBUG
   raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
@@ -343,12 +310,12 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
   StringRef DebugAbbrevSection, DebugInfoSection, DebugArangesSection,
             DebugLineSection, DebugStrSection;
   OwningPtr<DIContext> diContext;
-  OwningPtr<MachOObject> DSYMObj;
-  MachOObject *DbgInfoObj = MachOObj.get();
+  OwningPtr<MachOObjectFile> DSYMObj;
+  MachOObject *DbgInfoObj = MachOObj;
   // Try to find debug info and set up the DIContext for it.
   if (UseDbg) {
-    ArrayRef<Section> DebugSections = Sections;
-    std::vector<Section> DSYMSections;
+    ArrayRef<SectionRef> DebugSections = Sections;
+    std::vector<SectionRef> DSYMSections;
 
     // A separate DSym file path was specified, parse it as a macho file,
     // get the sections and supply it to the section name parsing machinery.
@@ -358,34 +325,33 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
         errs() << "llvm-objdump: " << Filename << ": " << ec.message() << '\n';
         return;
       }
-      DSYMObj.reset(MachOObject::LoadFromBuffer(Buf.take()));
-      const macho::Header &Header = DSYMObj->getHeader();
+      DSYMObj.reset(static_cast<MachOObjectFile*>(
+            ObjectFile::createMachOObjectFile(Buf.take())));
+      const macho::Header &Header = DSYMObj->getObject()->getHeader();
 
-      std::vector<Symbol> Symbols;
+      std::vector<SymbolRef> Symbols;
       SmallVector<uint64_t, 8> FoundFns;
       getSectionsAndSymbols(Header, DSYMObj.get(), 0, DSYMSections, Symbols,
                             FoundFns);
       DebugSections = DSYMSections;
-      DbgInfoObj = DSYMObj.get();
+      DbgInfoObj = DSYMObj.get()->getObject();
     }
 
     // Find the named debug info sections.
     for (unsigned SectIdx = 0; SectIdx != DebugSections.size(); SectIdx++) {
-      if (!strcmp(DebugSections[SectIdx].Name, "__debug_abbrev"))
-        DebugAbbrevSection = DbgInfoObj->getData(DebugSections[SectIdx].Offset,
-                                                 DebugSections[SectIdx].Size);
-      else if (!strcmp(DebugSections[SectIdx].Name, "__debug_info"))
-        DebugInfoSection = DbgInfoObj->getData(DebugSections[SectIdx].Offset,
-                                               DebugSections[SectIdx].Size);
-      else if (!strcmp(DebugSections[SectIdx].Name, "__debug_aranges"))
-        DebugArangesSection = DbgInfoObj->getData(DebugSections[SectIdx].Offset,
-                                                  DebugSections[SectIdx].Size);
-      else if (!strcmp(DebugSections[SectIdx].Name, "__debug_line"))
-        DebugLineSection = DbgInfoObj->getData(DebugSections[SectIdx].Offset,
-                                               DebugSections[SectIdx].Size);
-      else if (!strcmp(DebugSections[SectIdx].Name, "__debug_str"))
-        DebugStrSection = DbgInfoObj->getData(DebugSections[SectIdx].Offset,
-                                              DebugSections[SectIdx].Size);
+      StringRef SectName;
+      if (!DebugSections[SectIdx].getName(SectName)) {
+        if (SectName.equals("__DWARF,__debug_abbrev"))
+          DebugSections[SectIdx].getContents(DebugAbbrevSection);
+        else if (SectName.equals("__DWARF,__debug_info"))
+          DebugSections[SectIdx].getContents(DebugInfoSection);
+        else if (SectName.equals("__DWARF,__debug_aranges"))
+          DebugSections[SectIdx].getContents(DebugArangesSection);
+        else if (SectName.equals("__DWARF,__debug_line"))
+          DebugSections[SectIdx].getContents(DebugLineSection);
+        else if (SectName.equals("__DWARF,__debug_str"))
+          DebugSections[SectIdx].getContents(DebugStrSection);
+      }
     }
 
     // Setup the DIContext.
@@ -401,68 +367,115 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
   FunctionListTy Functions;
 
   for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
-    if (strcmp(Sections[SectIdx].Name, "__text"))
+    StringRef SectName;
+    if (Sections[SectIdx].getName(SectName) ||
+        SectName.compare("__TEXT,__text"))
       continue; // Skip non-text sections
 
     // Insert the functions from the function starts segment into our map.
-    uint64_t VMAddr = Sections[SectIdx].Address - Sections[SectIdx].Offset;
-    for (unsigned i = 0, e = FoundFns.size(); i != e; ++i)
-      FunctionMap.insert(std::make_pair(FoundFns[i]+VMAddr, (MCFunction*)0));
+    uint64_t VMAddr;
+    Sections[SectIdx].getAddress(VMAddr);
+    for (unsigned i = 0, e = FoundFns.size(); i != e; ++i) {
+      StringRef SectBegin;
+      Sections[SectIdx].getContents(SectBegin);
+      uint64_t Offset = (uint64_t)SectBegin.data();
+      FunctionMap.insert(std::make_pair(VMAddr + FoundFns[i]-Offset,
+                                        (MCFunction*)0));
+    }
 
-    StringRef Bytes = MachOObj->getData(Sections[SectIdx].Offset,
-                                        Sections[SectIdx].Size);
+    StringRef Bytes;
+    Sections[SectIdx].getContents(Bytes);
     StringRefMemoryObject memoryObject(Bytes);
     bool symbolTableWorked = false;
 
     // Parse relocations.
-    std::vector<std::pair<uint64_t, uint32_t> > Relocs;
-    for (unsigned j = 0; j != Sections[SectIdx].NumRelocs; ++j) {
-      InMemoryStruct<macho::RelocationEntry> RE;
-      MachOObj->ReadRelocationEntry(Sections[SectIdx].RelocTableOffset, j, RE);
-      Relocs.push_back(std::make_pair(RE->Word0, RE->Word1 & 0xffffff));
+    std::vector<std::pair<uint64_t, SymbolRef> > Relocs;
+    error_code ec;
+    for (relocation_iterator RI = Sections[SectIdx].begin_relocations(),
+         RE = Sections[SectIdx].end_relocations(); RI != RE; RI.increment(ec)) {
+      uint64_t RelocOffset, SectionAddress;
+      RI->getAddress(RelocOffset);
+      Sections[SectIdx].getAddress(SectionAddress);
+      RelocOffset -= SectionAddress;
+
+      SymbolRef RelocSym;
+      RI->getSymbol(RelocSym);
+
+      Relocs.push_back(std::make_pair(RelocOffset, RelocSym));
     }
     array_pod_sort(Relocs.begin(), Relocs.end());
 
     // Disassemble symbol by symbol.
     for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
+      StringRef SymName;
+      Symbols[SymIdx].getName(SymName);
+
+      SymbolRef::Type ST;
+      Symbols[SymIdx].getType(ST);
+      if (ST != SymbolRef::ST_Function)
+        continue;
+
       // Make sure the symbol is defined in this section.
-      if ((unsigned)Symbols[SymIdx].SectionIndex - 1 != SectIdx)
+      bool containsSym = false;
+      Sections[SectIdx].containsSymbol(Symbols[SymIdx], containsSym);
+      if (!containsSym)
         continue;
 
       // Start at the address of the symbol relative to the section's address.
-      uint64_t Start = Symbols[SymIdx].Value - Sections[SectIdx].Address;
+      uint64_t SectionAddress = 0;
+      uint64_t Start = 0;
+      Sections[SectIdx].getAddress(SectionAddress);
+      Symbols[SymIdx].getAddress(Start);
+      Start -= SectionAddress;
+
       // Stop disassembling either at the beginning of the next symbol or at
       // the end of the section.
-      uint64_t End = (SymIdx+1 == Symbols.size() ||
-          Symbols[SymIdx].SectionIndex != Symbols[SymIdx+1].SectionIndex) ?
-          Sections[SectIdx].Size :
-          Symbols[SymIdx+1].Value - Sections[SectIdx].Address;
-      uint64_t Size;
+      bool containsNextSym = true;
+      uint64_t NextSym = 0;
+      uint64_t NextSymIdx = SymIdx+1;
+      while (Symbols.size() > NextSymIdx) {
+        SymbolRef::Type NextSymType;
+        Symbols[NextSymIdx].getType(NextSymType);
+        if (NextSymType == SymbolRef::ST_Function) {
+          Sections[SectIdx].containsSymbol(Symbols[NextSymIdx],
+                                           containsNextSym);
+          Symbols[NextSymIdx].getAddress(NextSym);
+          NextSym -= SectionAddress;
+          break;
+        }
+        ++NextSymIdx;
+      }
 
-      if (Start >= End)
-        continue;
+      uint64_t SectSize;
+      Sections[SectIdx].getSize(SectSize);
+      uint64_t End = containsNextSym ?  NextSym : SectSize;
+      uint64_t Size;
 
       symbolTableWorked = true;
 
       if (!CFG) {
         // Normal disassembly, print addresses, bytes and mnemonic form.
-        outs() << MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex)
-          << ":\n";
+        StringRef SymName;
+        Symbols[SymIdx].getName(SymName);
+
+        outs() << SymName << ":\n";
         DILineInfo lastLine;
         for (uint64_t Index = Start; Index < End; Index += Size) {
           MCInst Inst;
 
           if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
                                      DebugOut, nulls())) {
-            outs() << format("%8llx:\t", Sections[SectIdx].Address + Index);
+            uint64_t SectAddress = 0;
+            Sections[SectIdx].getAddress(SectAddress);
+            outs() << format("%8" PRIx64 ":\t", SectAddress + Index);
+
             DumpBytes(StringRef(Bytes.data() + Index, Size));
             IP->printInst(&Inst, outs(), "");
 
             // Print debug info.
             if (diContext) {
               DILineInfo dli =
-                diContext->getLineInfoForAddress(Sections[SectIdx].Address +
-                                                 Index);
+                diContext->getLineInfoForAddress(SectAddress + Index);
               // Print valid line info if it changed.
               if (dli != lastLine && dli.getLine() != 0)
                 outs() << "\t## " << dli.getFileName() << ':'
@@ -478,20 +491,24 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
         }
       } else {
         // Create CFG and use it for disassembly.
+        StringRef SymName;
+        Symbols[SymIdx].getName(SymName);
         createMCFunctionAndSaveCalls(
-            MachOObj->getStringAtIndex(Symbols[SymIdx].StringIndex),
-            DisAsm.get(), memoryObject, Start, End, InstrAnalysis.get(),
-            Start, DebugOut, FunctionMap, Functions);
+            SymName, DisAsm.get(), memoryObject, Start, End,
+            InstrAnalysis.get(), Start, DebugOut, FunctionMap, Functions);
       }
     }
 
     if (CFG) {
       if (!symbolTableWorked) {
         // Reading the symbol table didn't work, create a big __TEXT symbol.
+        uint64_t SectSize = 0, SectAddress = 0;
+        Sections[SectIdx].getSize(SectSize);
+        Sections[SectIdx].getAddress(SectAddress);
         createMCFunctionAndSaveCalls("__TEXT", DisAsm.get(), memoryObject,
-                                     0, Sections[SectIdx].Size,
+                                     0, SectSize,
                                      InstrAnalysis.get(),
-                                     Sections[SectIdx].Offset, DebugOut,
+                                     SectAddress, DebugOut,
                                      FunctionMap, Functions);
       }
       for (std::map<uint64_t, MCFunction*>::iterator mi = FunctionMap.begin(),
@@ -499,11 +516,14 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
         if (mi->second == 0) {
           // Create functions for the remaining callees we have gathered,
           // but we didn't find a name for them.
+          uint64_t SectSize = 0;
+          Sections[SectIdx].getSize(SectSize);
+
           SmallVector<uint64_t, 16> Calls;
           MCFunction f =
             MCFunction::createFunctionFromMC("unknown", DisAsm.get(),
                                              memoryObject, mi->first,
-                                             Sections[SectIdx].Size,
+                                             SectSize,
                                              InstrAnalysis.get(), DebugOut,
                                              Calls);
           Functions.push_back(f);
@@ -535,13 +555,17 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
               break;
             }
 
+          uint64_t SectSize = 0, SectAddress;
+          Sections[SectIdx].getSize(SectSize);
+          Sections[SectIdx].getAddress(SectAddress);
+
           // No predecessors, this is a data block. Print as .byte directives.
           if (!hasPreds) {
-            uint64_t End = llvm::next(fi) == fe ? Sections[SectIdx].Size :
+            uint64_t End = llvm::next(fi) == fe ? SectSize :
                                                   llvm::next(fi)->first;
             outs() << "# " << End-fi->first << " bytes of data:\n";
             for (unsigned pos = fi->first; pos != End; ++pos) {
-              outs() << format("%8x:\t", Sections[SectIdx].Address + pos);
+              outs() << format("%8x:\t", SectAddress + pos);
               DumpBytes(StringRef(Bytes.data() + pos, 1));
               outs() << format("\t.byte 0x%02x\n", (uint8_t)Bytes[pos]);
             }
@@ -558,13 +582,12 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
             const MCDecodedInst &Inst = fi->second.getInsts()[ii];
 
             // If there's a symbol at this address, print its name.
-            if (FunctionMap.find(Sections[SectIdx].Address + Inst.Address) !=
+            if (FunctionMap.find(SectAddress + Inst.Address) !=
                 FunctionMap.end())
-              outs() << FunctionMap[Sections[SectIdx].Address + Inst.Address]->
-                                                             getName() << ":\n";
+              outs() << FunctionMap[SectAddress + Inst.Address]-> getName()
+                     << ":\n";
 
-            outs() << format("%8llx:\t", Sections[SectIdx].Address +
-                                         Inst.Address);
+            outs() << format("%8" PRIx64 ":\t", SectAddress + Inst.Address);
             DumpBytes(StringRef(Bytes.data() + Inst.Address, Inst.Size));
 
             if (fi->second.contains(fi->first)) // Indent simple loops.
@@ -575,15 +598,15 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
             // Look for relocations inside this instructions, if there is one
             // print its target and additional information if available.
             for (unsigned j = 0; j != Relocs.size(); ++j)
-              if (Relocs[j].first >= Sections[SectIdx].Address + Inst.Address &&
-                  Relocs[j].first < Sections[SectIdx].Address + Inst.Address +
-                                    Inst.Size) {
-                outs() << "\t# "
-                   << MachOObj->getStringAtIndex(
-                                  UnsortedSymbols[Relocs[j].second].StringIndex)
-                   << ' ';
-                DumpAddress(UnsortedSymbols[Relocs[j].second].Value, Sections,
-                            MachOObj.get(), outs());
+              if (Relocs[j].first >= SectAddress + Inst.Address &&
+                  Relocs[j].first < SectAddress + Inst.Address + Inst.Size) {
+                StringRef SymName;
+                uint64_t Addr;
+                Relocs[j].second.getAddress(Addr);
+                Relocs[j].second.getName(SymName);
+
+                outs() << "\t# " << SymName << ' ';
+                DumpAddress(Addr, Sections, MachOObj, outs());
               }
 
             // If this instructions contains an address, see if we can evaluate
@@ -592,13 +615,12 @@ void llvm::DisassembleInputMachO(StringRef Filename) {
                                                           Inst.Address,
                                                           Inst.Size);
             if (targ != -1ULL)
-              DumpAddress(targ, Sections, MachOObj.get(), outs());
+              DumpAddress(targ, Sections, MachOObj, outs());
 
             // Print debug info.
             if (diContext) {
               DILineInfo dli =
-                diContext->getLineInfoForAddress(Sections[SectIdx].Address +
-                                                 Inst.Address);
+                diContext->getLineInfoForAddress(SectAddress + Inst.Address);
               // Print valid line info if it changed.
               if (dli != lastLine && dli.getLine() != 0)
                 outs() << "\t## " << dli.getFileName() << ':'
diff --git a/tools/llvm-objdump/Makefile b/tools/llvm-objdump/Makefile
index 703bf6c8a4f1..4616b78adb2e 100644
--- a/tools/llvm-objdump/Makefile
+++ b/tools/llvm-objdump/Makefile
@@ -6,13 +6,12 @@
 # License. See LICENSE.TXT for details.
 #
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
 
-TOOLNAME = llvm-objdump
-LINK_COMPONENTS = $(TARGETS_TO_BUILD) DebugInfo MC MCParser MCDisassembler \
-                  Object
+LEVEL := ../..
+TOOLNAME := llvm-objdump
+LINK_COMPONENTS := all-targets DebugInfo MC MCParser MCDisassembler Object
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 40c59bd8c369..5a6f94a1c281 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -16,14 +16,18 @@
 #include "llvm-objdump.h"
 #include "MCFunction.h"
 #include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -43,6 +47,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <algorithm>
+#include <cctype>
 #include <cstring>
 using namespace llvm;
 using namespace object;
@@ -61,6 +66,12 @@ static cl::opt<bool>
 Relocations("r", cl::desc("Display the relocation entries in the file"));
 
 static cl::opt<bool>
+SectionContents("s", cl::desc("Display the content of each section"));
+
+static cl::opt<bool>
+SymbolTable("t", cl::desc("Display the symbol table"));
+
+static cl::opt<bool>
 MachO("macho", cl::desc("Use MachO specific object file parser"));
 static cl::alias
 MachOm("m", cl::desc("Alias for --macho"), cl::aliasopt(MachO));
@@ -118,6 +129,8 @@ static const Target *GetTarget(const ObjectFile *Obj = NULL) {
   return 0;
 }
 
+void llvm::StringRefMemoryObject::anchor() { }
+
 void llvm::DumpBytes(StringRef bytes) {
   static const char hex_rep[] = "0123456789abcdef";
   // FIXME: The real way to do this is to figure out the longest instruction
@@ -158,10 +171,6 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     return;
   }
 
-  outs() << '\n';
-  outs() << Obj->getFileName()
-         << ":\tfile format " << Obj->getFileFormatName() << "\n\n";
-
   error_code ec;
   for (section_iterator i = Obj->begin_sections(),
                         e = Obj->end_sections();
@@ -182,7 +191,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       bool contains;
       if (!error(i->containsSymbol(*si, contains)) && contains) {
         uint64_t Address;
-        if (error(si->getOffset(Address))) break;
+        if (error(si->getAddress(Address))) break;
+        Address -= SectionAddr;
+
         StringRef Name;
         if (error(si->getName(Name))) break;
         Symbols.push_back(std::make_pair(Address, Name));
@@ -238,9 +249,21 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       return;
     }
 
+    OwningPtr<const MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
+    if (!MRI) {
+      errs() << "error: no register info for target " << TripleName << "\n";
+      return;
+    }
+
+    OwningPtr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
+    if (!MII) {
+      errs() << "error: no instruction info for target " << TripleName << "\n";
+      return;
+    }
+
     int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
     OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
-                                AsmPrinterVariant, *AsmInfo, *STI));
+                                AsmPrinterVariant, *AsmInfo, *MII, *MRI, *STI));
     if (!IP) {
       errs() << "error: no instruction printer for target " << TripleName
              << '\n';
@@ -285,7 +308,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
         if (DisAsm->getInstruction(Inst, Size, memoryObject, Index,
                                    DebugOut, nulls())) {
-          outs() << format("%8"PRIx64":\t", SectionAddr + Index);
+          outs() << format("%8" PRIx64 ":\t", SectionAddr + Index);
           DumpBytes(StringRef(Bytes.data() + Index, Size));
           IP->printInst(&Inst, outs(), "");
           outs() << "\n";
@@ -297,17 +320,23 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
         // Print relocation for instruction.
         while (rel_cur != rel_end) {
+          bool hidden = false;
           uint64_t addr;
           SmallString<16> name;
           SmallString<32> val;
+
+          // If this relocation is hidden, skip it.
+          if (error(rel_cur->getHidden(hidden))) goto skip_print_rel;
+          if (hidden) goto skip_print_rel;
+
           if (error(rel_cur->getAddress(addr))) goto skip_print_rel;
           // Stop when rel_cur's address is past the current instruction.
-          if (addr > Index + Size) break;
+          if (addr >= Index + Size) break;
           if (error(rel_cur->getTypeName(name))) goto skip_print_rel;
           if (error(rel_cur->getValueString(val))) goto skip_print_rel;
 
-          outs() << format("\t\t\t%8"PRIx64": ", SectionAddr + addr) << name << "\t"
-                 << val << "\n";
+          outs() << format("\t\t\t%8" PRIx64 ": ", SectionAddr + addr) << name
+                 << "\t" << val << "\n";
 
         skip_print_rel:
           ++rel_cur;
@@ -332,9 +361,12 @@ static void PrintRelocations(const ObjectFile *o) {
                              ri != re; ri.increment(ec)) {
       if (error(ec)) return;
 
+      bool hidden;
       uint64_t address;
       SmallString<32> relocname;
       SmallString<32> valuestr;
+      if (error(ri->getHidden(hidden))) continue;
+      if (hidden) continue;
       if (error(ri->getTypeName(relocname))) continue;
       if (error(ri->getAddress(address))) continue;
       if (error(ri->getValueString(valuestr))) continue;
@@ -364,19 +396,179 @@ static void PrintSectionHeaders(const ObjectFile *o) {
     if (error(si->isBSS(BSS))) return;
     std::string Type = (std::string(Text ? "TEXT " : "") +
                         (Data ? "DATA " : "") + (BSS ? "BSS" : ""));
-    outs() << format("%3d %-13s %09"PRIx64" %017"PRIx64" %s\n", i, Name.str().c_str(), Size,
-                     Address, Type.c_str());
+    outs() << format("%3d %-13s %09" PRIx64 " %017" PRIx64 " %s\n",
+                     i, Name.str().c_str(), Size, Address, Type.c_str());
     ++i;
   }
 }
 
+static void PrintSectionContents(const ObjectFile *o) {
+  error_code ec;
+  for (section_iterator si = o->begin_sections(),
+                        se = o->end_sections();
+                        si != se; si.increment(ec)) {
+    if (error(ec)) return;
+    StringRef Name;
+    StringRef Contents;
+    uint64_t BaseAddr;
+    if (error(si->getName(Name))) continue;
+    if (error(si->getContents(Contents))) continue;
+    if (error(si->getAddress(BaseAddr))) continue;
+
+    outs() << "Contents of section " << Name << ":\n";
+
+    // Dump out the content as hex and printable ascii characters.
+    for (std::size_t addr = 0, end = Contents.size(); addr < end; addr += 16) {
+      outs() << format(" %04" PRIx64 " ", BaseAddr + addr);
+      // Dump line of hex.
+      for (std::size_t i = 0; i < 16; ++i) {
+        if (i != 0 && i % 4 == 0)
+          outs() << ' ';
+        if (addr + i < end)
+          outs() << hexdigit((Contents[addr + i] >> 4) & 0xF, true)
+                 << hexdigit(Contents[addr + i] & 0xF, true);
+        else
+          outs() << "  ";
+      }
+      // Print ascii.
+      outs() << "  ";
+      for (std::size_t i = 0; i < 16 && addr + i < end; ++i) {
+        if (std::isprint(Contents[addr + i] & 0xFF))
+          outs() << Contents[addr + i];
+        else
+          outs() << ".";
+      }
+      outs() << "\n";
+    }
+  }
+}
+
+static void PrintCOFFSymbolTable(const COFFObjectFile *coff) {
+  const coff_file_header *header;
+  if (error(coff->getHeader(header))) return;
+  int aux_count = 0;
+  const coff_symbol *symbol = 0;
+  for (int i = 0, e = header->NumberOfSymbols; i != e; ++i) {
+    if (aux_count--) {
+      // Figure out which type of aux this is.
+      if (symbol->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC
+          && symbol->Value == 0) { // Section definition.
+        const coff_aux_section_definition *asd;
+        if (error(coff->getAuxSymbol<coff_aux_section_definition>(i, asd)))
+          return;
+        outs() << "AUX "
+               << format("scnlen 0x%x nreloc %d nlnno %d checksum 0x%x "
+                         , unsigned(asd->Length)
+                         , unsigned(asd->NumberOfRelocations)
+                         , unsigned(asd->NumberOfLinenumbers)
+                         , unsigned(asd->CheckSum))
+               << format("assoc %d comdat %d\n"
+                         , unsigned(asd->Number)
+                         , unsigned(asd->Selection));
+      } else {
+        outs() << "AUX Unknown\n";
+      }
+    } else {
+      StringRef name;
+      if (error(coff->getSymbol(i, symbol))) return;
+      if (error(coff->getSymbolName(symbol, name))) return;
+      outs() << "[" << format("%2d", i) << "]"
+             << "(sec " << format("%2d", int(symbol->SectionNumber)) << ")"
+             << "(fl 0x00)" // Flag bits, which COFF doesn't have.
+             << "(ty " << format("%3x", unsigned(symbol->Type)) << ")"
+             << "(scl " << format("%3x", unsigned(symbol->StorageClass)) << ") "
+             << "(nx " << unsigned(symbol->NumberOfAuxSymbols) << ") "
+             << "0x" << format("%08x", unsigned(symbol->Value)) << " "
+             << name << "\n";
+      aux_count = symbol->NumberOfAuxSymbols;
+    }
+  }
+}
+
+static void PrintSymbolTable(const ObjectFile *o) {
+  outs() << "SYMBOL TABLE:\n";
+
+  if (const COFFObjectFile *coff = dyn_cast<const COFFObjectFile>(o))
+    PrintCOFFSymbolTable(coff);
+  else {
+    error_code ec;
+    for (symbol_iterator si = o->begin_symbols(),
+                         se = o->end_symbols(); si != se; si.increment(ec)) {
+      if (error(ec)) return;
+      StringRef Name;
+      uint64_t Address;
+      SymbolRef::Type Type;
+      uint64_t Size;
+      uint32_t Flags;
+      section_iterator Section = o->end_sections();
+      if (error(si->getName(Name))) continue;
+      if (error(si->getAddress(Address))) continue;
+      if (error(si->getFlags(Flags))) continue;
+      if (error(si->getType(Type))) continue;
+      if (error(si->getSize(Size))) continue;
+      if (error(si->getSection(Section))) continue;
+
+      bool Global = Flags & SymbolRef::SF_Global;
+      bool Weak = Flags & SymbolRef::SF_Weak;
+      bool Absolute = Flags & SymbolRef::SF_Absolute;
+
+      if (Address == UnknownAddressOrSize)
+        Address = 0;
+      if (Size == UnknownAddressOrSize)
+        Size = 0;
+      char GlobLoc = ' ';
+      if (Type != SymbolRef::ST_Unknown)
+        GlobLoc = Global ? 'g' : 'l';
+      char Debug = (Type == SymbolRef::ST_Debug || Type == SymbolRef::ST_File)
+                   ? 'd' : ' ';
+      char FileFunc = ' ';
+      if (Type == SymbolRef::ST_File)
+        FileFunc = 'f';
+      else if (Type == SymbolRef::ST_Function)
+        FileFunc = 'F';
+
+      outs() << format("%08" PRIx64, Address) << " "
+             << GlobLoc // Local -> 'l', Global -> 'g', Neither -> ' '
+             << (Weak ? 'w' : ' ') // Weak?
+             << ' ' // Constructor. Not supported yet.
+             << ' ' // Warning. Not supported yet.
+             << ' ' // Indirect reference to another symbol.
+             << Debug // Debugging (d) or dynamic (D) symbol.
+             << FileFunc // Name of function (F), file (f) or object (O).
+             << ' ';
+      if (Absolute)
+        outs() << "*ABS*";
+      else if (Section == o->end_sections())
+        outs() << "*UND*";
+      else {
+        StringRef SectionName;
+        if (error(Section->getName(SectionName)))
+          SectionName = "";
+        outs() << SectionName;
+      }
+      outs() << '\t'
+             << format("%08" PRIx64 " ", Size)
+             << Name
+             << '\n';
+    }
+  }
+}
+
 static void DumpObject(const ObjectFile *o) {
+  outs() << '\n';
+  outs() << o->getFileName()
+         << ":\tfile format " << o->getFileFormatName() << "\n\n";
+
   if (Disassemble)
     DisassembleObject(o, Relocations);
   if (Relocations && !Disassemble)
     PrintRelocations(o);
   if (SectionHeaders)
     PrintSectionHeaders(o);
+  if (SectionContents)
+    PrintSectionContents(o);
+  if (SymbolTable)
+    PrintSymbolTable(o);
 }
 
 /// @brief Dump each object file in \a a;
@@ -385,8 +577,10 @@ static void DumpArchive(const Archive *a) {
                                e = a->end_children(); i != e; ++i) {
     OwningPtr<Binary> child;
     if (error_code ec = i->getAsBinary(child)) {
-      errs() << ToolName << ": '" << a->getFileName() << "': " << ec.message()
-             << ".\n";
+      // Ignore non-object files.
+      if (ec != object_error::invalid_file_type)
+        errs() << ToolName << ": '" << a->getFileName() << "': " << ec.message()
+               << ".\n";
       continue;
     }
     if (ObjectFile *o = dyn_cast<ObjectFile>(child.get()))
@@ -447,7 +641,11 @@ int main(int argc, char **argv) {
   if (InputFilenames.size() == 0)
     InputFilenames.push_back("a.out");
 
-  if (!Disassemble && !Relocations && !SectionHeaders) {
+  if (!Disassemble
+      && !Relocations
+      && !SectionHeaders
+      && !SectionContents
+      && !SymbolTable) {
     cl::PrintHelpMessage();
     return 2;
   }
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index 75f852afbc7e..aa71b77c8abf 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -25,7 +25,7 @@ void DumpBytes(StringRef bytes);
 void DisassembleInputMachO(StringRef Filename);
 
 class StringRefMemoryObject : public MemoryObject {
-private:
+  virtual void anchor();
   StringRef Bytes;
 public:
   StringRefMemoryObject(StringRef bytes) : Bytes(bytes) {}
diff --git a/tools/llvm-prof/LLVMBuild.txt b/tools/llvm-prof/LLVMBuild.txt
new file mode 100644
index 000000000000..d59127cc268f
--- /dev/null
+++ b/tools/llvm-prof/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-prof/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-prof
+parent = Tools
+required_libraries = Analysis BitReader
diff --git a/tools/llvm-prof/Makefile b/tools/llvm-prof/Makefile
index 86eb54d51bf4..f8297867652c 100644
--- a/tools/llvm-prof/Makefile
+++ b/tools/llvm-prof/Makefile
@@ -6,10 +6,10 @@
 # License. See LICENSE.TXT for details.
 # 
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
 
-TOOLNAME = llvm-prof
-LINK_COMPONENTS = bitreader analysis
+LEVEL := ../..
+TOOLNAME := llvm-prof
+LINK_COMPONENTS := bitreader analysis
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/tools/llvm-prof/llvm-prof.cpp b/tools/llvm-prof/llvm-prof.cpp
index 9d0b46833bef..d9b671336407 100644
--- a/tools/llvm-prof/llvm-prof.cpp
+++ b/tools/llvm-prof/llvm-prof.cpp
@@ -200,9 +200,9 @@ bool ProfileInfoPrinterPass::runOnModule(Module &M) {
     }
 
     outs() << format("%3d", i+1) << ". "
-      << format("%5.2g", FunctionCounts[i].second) << "/"
-      << format("%g", TotalExecutions) << " "
-      << FunctionCounts[i].first->getNameStr() << "\n";
+           << format("%5.2g", FunctionCounts[i].second) << "/"
+           << format("%g", TotalExecutions) << " "
+           << FunctionCounts[i].first->getName() << "\n";
   }
 
   std::set<Function*> FunctionsToPrint;
@@ -225,12 +225,12 @@ bool ProfileInfoPrinterPass::runOnModule(Module &M) {
   for (unsigned i = 0; i != BlocksToPrint; ++i) {
     if (Counts[i].second == 0) break;
     Function *F = Counts[i].first->getParent();
-    outs() << format("%3d", i+1) << ". " 
-      << format("%5g", Counts[i].second/(double)TotalExecutions*100) << "% "
-      << format("%5.0f", Counts[i].second) << "/"
-      << format("%g", TotalExecutions) << "\t"
-      << F->getNameStr() << "() - "
-       << Counts[i].first->getNameStr() << "\n";
+    outs() << format("%3d", i+1) << ". "
+           << format("%5g", Counts[i].second/(double)TotalExecutions*100)<<"% "
+           << format("%5.0f", Counts[i].second) << "/"
+           << format("%g", TotalExecutions) << "\t"
+           << F->getName() << "() - "
+           << Counts[i].first->getName() << "\n";
     FunctionsToPrint.insert(F);
   }
 
diff --git a/tools/llvm-ranlib/LLVMBuild.txt b/tools/llvm-ranlib/LLVMBuild.txt
new file mode 100644
index 000000000000..23015c54e552
--- /dev/null
+++ b/tools/llvm-ranlib/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-ranlib/LLVMBuild.txt ------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-ranlib
+parent = Tools
+required_libraries = Archive
diff --git a/tools/llvm-ranlib/Makefile b/tools/llvm-ranlib/Makefile
index 46a10e644662..36195f4399ec 100644
--- a/tools/llvm-ranlib/Makefile
+++ b/tools/llvm-ranlib/Makefile
@@ -7,12 +7,12 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-TOOLNAME = llvm-ranlib
-LINK_COMPONENTS = archive
+LEVEL := ../..
+TOOLNAME := llvm-ranlib
+LINK_COMPONENTS := archive
 REQUIRES_EH := 1
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-readobj/CMakeLists.txt b/tools/llvm-readobj/CMakeLists.txt
new file mode 100644
index 000000000000..be80469f28d5
--- /dev/null
+++ b/tools/llvm-readobj/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS archive bitreader object)
+
+add_llvm_tool(llvm-readobj
+  llvm-readobj.cpp
+  )
diff --git a/tools/llvm-readobj/LLVMBuild.txt b/tools/llvm-readobj/LLVMBuild.txt
new file mode 100644
index 000000000000..c9f934f4b6fa
--- /dev/null
+++ b/tools/llvm-readobj/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-readobj/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-readobj
+parent = Tools
+required_libraries = Archive BitReader Object
diff --git a/tools/llvm-readobj/Makefile b/tools/llvm-readobj/Makefile
new file mode 100644
index 000000000000..a7a7de356303
--- /dev/null
+++ b/tools/llvm-readobj/Makefile
@@ -0,0 +1,18 @@
+##===- tools/llvm-readobj/Makefile -----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := llvm-readobj
+LINK_COMPONENTS := archive bitreader object
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
new file mode 100644
index 000000000000..3be12899aea7
--- /dev/null
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -0,0 +1,218 @@
+//===- llvm-readobj.cpp - Dump contents of an Object File -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program is a utility that works like traditional Unix "readelf",
+// except that it can handle any type of object file recognized by lib/Object.
+//
+// It makes use of the generic ObjectFile interface.
+//
+// Caution: This utility is new, experimental, unsupported, and incomplete.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/FormattedStream.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input object>"), cl::init(""));
+
+void DumpSymbolHeader() {
+  outs() << format("  %-32s", (const char*)"Name")
+         << format("  %-4s", (const char*)"Type")
+         << format("  %-16s", (const char*)"Address")
+         << format("  %-16s", (const char*)"Size")
+         << format("  %-16s", (const char*)"FileOffset")
+         << format("  %-26s", (const char*)"Flags")
+         << "\n";
+}
+
+const char *GetTypeStr(SymbolRef::Type Type) {
+  switch (Type) {
+  case SymbolRef::ST_Unknown: return "?";
+  case SymbolRef::ST_Data: return "DATA";
+  case SymbolRef::ST_Debug: return "DBG";
+  case SymbolRef::ST_File: return "FILE";
+  case SymbolRef::ST_Function: return "FUNC";
+  case SymbolRef::ST_Other: return "-";
+  }
+  return "INV";
+}
+
+std::string GetFlagStr(uint32_t Flags) {
+  std::string result;
+  if (Flags & SymbolRef::SF_Undefined)
+    result += "undef,";
+  if (Flags & SymbolRef::SF_Global)
+    result += "global,";
+  if (Flags & SymbolRef::SF_Weak)
+    result += "weak,";
+  if (Flags & SymbolRef::SF_Absolute)
+    result += "absolute,";
+  if (Flags & SymbolRef::SF_ThreadLocal)
+    result += "threadlocal,";
+  if (Flags & SymbolRef::SF_Common)
+    result += "common,";
+  if (Flags & SymbolRef::SF_FormatSpecific)
+    result += "formatspecific,";
+
+  // Remove trailing comma
+  if (result.size() > 0) {
+    result.erase(result.size() - 1);
+  }
+  return result;
+}
+
+void DumpSymbol(const SymbolRef &Sym, const ObjectFile *obj, bool IsDynamic) {
+    StringRef Name;
+    SymbolRef::Type Type;
+    uint32_t Flags;
+    uint64_t Address;
+    uint64_t Size;
+    uint64_t FileOffset;
+    Sym.getName(Name);
+    Sym.getAddress(Address);
+    Sym.getSize(Size);
+    Sym.getFileOffset(FileOffset);
+    Sym.getType(Type);
+    Sym.getFlags(Flags);
+    std::string FullName = Name;
+
+    // If this is a dynamic symbol from an ELF object, append
+    // the symbol's version to the name.
+    if (IsDynamic && obj->isELF()) {
+      StringRef Version;
+      bool IsDefault;
+      GetELFSymbolVersion(obj, Sym, Version, IsDefault);
+      if (!Version.empty()) {
+        FullName += (IsDefault ? "@@" : "@");
+        FullName += Version;
+      }
+    }
+
+    // format() can't handle StringRefs
+    outs() << format("  %-32s", FullName.c_str())
+           << format("  %-4s", GetTypeStr(Type))
+           << format("  %16" PRIx64, Address)
+           << format("  %16" PRIx64, Size)
+           << format("  %16" PRIx64, FileOffset)
+           << "  " << GetFlagStr(Flags)
+           << "\n";
+}
+
+
+// Iterate through the normal symbols in the ObjectFile
+void DumpSymbols(const ObjectFile *obj) {
+  error_code ec;
+  uint32_t count = 0;
+  outs() << "Symbols:\n";
+  symbol_iterator it = obj->begin_symbols();
+  symbol_iterator ie = obj->end_symbols();
+  while (it != ie) {
+    DumpSymbol(*it, obj, false);
+    it.increment(ec);
+    if (ec)
+      report_fatal_error("Symbol iteration failed");
+    ++count;
+  }
+  outs() << "  Total: " << count << "\n\n";
+}
+
+// Iterate through the dynamic symbols in the ObjectFile.
+void DumpDynamicSymbols(const ObjectFile *obj) {
+  error_code ec;
+  uint32_t count = 0;
+  outs() << "Dynamic Symbols:\n";
+  symbol_iterator it = obj->begin_dynamic_symbols();
+  symbol_iterator ie = obj->end_dynamic_symbols();
+  while (it != ie) {
+    DumpSymbol(*it, obj, true);
+    it.increment(ec);
+    if (ec)
+      report_fatal_error("Symbol iteration failed");
+    ++count;
+  }
+  outs() << "  Total: " << count << "\n\n";
+}
+
+void DumpLibrary(const LibraryRef &lib) {
+  StringRef path;
+  lib.getPath(path);
+  outs() << "  " << path << "\n";
+}
+
+// Iterate through needed libraries
+void DumpLibrariesNeeded(const ObjectFile *obj) {
+  error_code ec;
+  uint32_t count = 0;
+  library_iterator it = obj->begin_libraries_needed();
+  library_iterator ie = obj->end_libraries_needed();
+  outs() << "Libraries needed:\n";
+  while (it != ie) {
+    DumpLibrary(*it);
+    it.increment(ec);
+    if (ec)
+      report_fatal_error("Needed libraries iteration failed");
+    ++count;
+  }
+  outs() << "  Total: " << count << "\n\n";
+}
+
+void DumpHeaders(const ObjectFile *obj) {
+  outs() << "File Format : " << obj->getFileFormatName() << "\n";
+  outs() << "Arch        : "
+         << Triple::getArchTypeName((llvm::Triple::ArchType)obj->getArch())
+         << "\n";
+  outs() << "Address Size: " << (8*obj->getBytesInAddress()) << " bits\n";
+  outs() << "Load Name   : " << obj->getLoadName() << "\n";
+  outs() << "\n";
+}
+
+int main(int argc, char** argv) {
+  error_code ec;
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  cl::ParseCommandLineOptions(argc, argv,
+                              "LLVM Object Reader\n");
+
+  if (InputFilename.empty()) {
+    errs() << "Please specify an input filename\n";
+    return 1;
+  }
+
+  // Open the object file
+  OwningPtr<MemoryBuffer> File;
+  if (MemoryBuffer::getFile(InputFilename, File)) {
+    errs() << InputFilename << ": Open failed\n";
+    return 1;
+  }
+
+  ObjectFile *obj = ObjectFile::createObjectFile(File.take());
+  if (!obj) {
+    errs() << InputFilename << ": Object type not recognized\n";
+  }
+
+  DumpHeaders(obj);
+  DumpSymbols(obj);
+  DumpDynamicSymbols(obj);
+  DumpLibrariesNeeded(obj);
+  return 0;
+}
+
diff --git a/tools/llvm-rtdyld/LLVMBuild.txt b/tools/llvm-rtdyld/LLVMBuild.txt
new file mode 100644
index 000000000000..b36d13c75a0b
--- /dev/null
+++ b/tools/llvm-rtdyld/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-rtdyld/LLVMBuild.txt ------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-rtdyld
+parent = Tools
+required_libraries = JIT MC Object RuntimeDyld Support all-targets
diff --git a/tools/llvm-rtdyld/Makefile b/tools/llvm-rtdyld/Makefile
index 0d57277f08d8..30fbee0979bd 100644
--- a/tools/llvm-rtdyld/Makefile
+++ b/tools/llvm-rtdyld/Makefile
@@ -7,17 +7,11 @@
 #
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-TOOLNAME = llvm-rtdyld
+LEVEL := ../..
+TOOLNAME := llvm-rtdyld
+LINK_COMPONENTS := all-targets support MC object RuntimeDyld JIT
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
+TOOL_NO_EXPORTS := 1
 
-# Include this here so we can get the configuration of the targets
-# that have been configured for construction. We have to do this
-# early so we can set up LINK_COMPONENTS before including Makefile.rules
-include $(LEVEL)/Makefile.config
-
-LINK_COMPONENTS := $(TARGETS_TO_BUILD) support MC object RuntimeDyld JIT
-
-include $(LLVM_SRC_ROOT)/Makefile.rules
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index ec9d6526ece1..01a7d1580786 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -51,22 +51,30 @@ EntryPoint("entry",
 class TrivialMemoryManager : public RTDyldMemoryManager {
 public:
   SmallVector<sys::MemoryBlock, 16> FunctionMemory;
+  SmallVector<sys::MemoryBlock, 16> DataMemory;
+
+  uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID);
+  uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                               unsigned SectionID);
+
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) {
+    return 0;
+  }
 
-  uint8_t *startFunctionBody(const char *Name, uintptr_t &Size);
-  void endFunctionBody(const char *Name, uint8_t *FunctionStart,
-                       uint8_t *FunctionEnd);
 };
 
-uint8_t *TrivialMemoryManager::startFunctionBody(const char *Name,
-                                                 uintptr_t &Size) {
+uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
+                                                   unsigned Alignment,
+                                                   unsigned SectionID) {
   return (uint8_t*)sys::Memory::AllocateRWX(Size, 0, 0).base();
 }
 
-void TrivialMemoryManager::endFunctionBody(const char *Name,
-                                           uint8_t *FunctionStart,
-                                           uint8_t *FunctionEnd) {
-  uintptr_t Size = FunctionEnd - FunctionStart + 1;
-  FunctionMemory.push_back(sys::MemoryBlock(FunctionStart, Size));
+uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
+                                                   unsigned Alignment,
+                                                   unsigned SectionID) {
+  return (uint8_t*)sys::Memory::AllocateRWX(Size, 0, 0).base();
 }
 
 static const char *ProgramName;
@@ -142,10 +150,7 @@ int main(int argc, char **argv) {
   cl::ParseCommandLineOptions(argc, argv, "llvm MC-JIT tool\n");
 
   switch (Action) {
-  default:
   case AC_Execute:
     return executeInput();
   }
-
-  return 0;
 }
diff --git a/tools/llvm-shlib/Makefile b/tools/llvm-shlib/Makefile
index 0695c0070d36..2d2e2c55b80d 100644
--- a/tools/llvm-shlib/Makefile
+++ b/tools/llvm-shlib/Makefile
@@ -7,13 +7,13 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
+LEVEL := ../..
 
 LIBRARYNAME = LLVM-$(LLVMVersion)
 
-NO_BUILD_ARCHIVE = 1
-LINK_LIBS_IN_SHARED = 1
-SHARED_LIBRARY = 1
+NO_BUILD_ARCHIVE := 1
+LINK_LIBS_IN_SHARED := 1
+SHARED_LIBRARY := 1
 
 include $(LEVEL)/Makefile.config
 
@@ -63,13 +63,15 @@ ifeq ($(HOST_OS),Darwin)
     endif
 endif
 
-ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux FreeBSD OpenBSD))
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux FreeBSD OpenBSD GNU))
     # Include everything from the .a's into the shared library.
     LLVMLibsOptions := -Wl,--whole-archive $(LLVMLibsOptions) \
                        -Wl,--no-whole-archive
+    # Add soname to the library.
+    LLVMLibsOptions += -Wl,--soname,lib$(LIBRARYNAME)$(SHLIBEXT)
 endif
 
-ifeq ($(HOST_OS),Linux)
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux GNU))
     # Don't allow unresolved symbols.
     LLVMLibsOptions += -Wl,--no-undefined
 endif
diff --git a/tools/llvm-size/LLVMBuild.txt b/tools/llvm-size/LLVMBuild.txt
new file mode 100644
index 000000000000..b4c538a406be
--- /dev/null
+++ b/tools/llvm-size/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-size/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-size
+parent = Tools
+required_libraries = Object
diff --git a/tools/llvm-size/Makefile b/tools/llvm-size/Makefile
index 5d0e27ed16e4..0622eb108978 100644
--- a/tools/llvm-size/Makefile
+++ b/tools/llvm-size/Makefile
@@ -6,10 +6,10 @@
 # License. See LICENSE.TXT for details.
 #
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
 
-TOOLNAME = llvm-size
-LINK_COMPONENTS = object
+LEVEL := ../..
+TOOLNAME := llvm-size
+LINK_COMPONENTS := object
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp
index 70e5cb9439b3..462da40afb63 100644
--- a/tools/llvm-size/llvm-size.cpp
+++ b/tools/llvm-size/llvm-size.cpp
@@ -96,13 +96,13 @@ static void PrintObjectSectionSizes(ObjectFile *o) {
   const char *radix_fmt = 0;
   switch (Radix) {
   case octal:
-    radix_fmt = "llo";
+    radix_fmt = PRIo64;
     break;
   case decimal:
-    radix_fmt = "llu";
+    radix_fmt = PRIu64;
     break;
   case hexadecimal:
-    radix_fmt = "llx";
+    radix_fmt = PRIx64;
     break;
   }
   if (OutputFormat == sysv) {
@@ -223,8 +223,8 @@ static void PrintObjectSectionSizes(ObjectFile *o) {
                      total_data,
                      total_bss);
     fmtbuf.clear();
-    fmt << "%7" << (Radix == octal ? "llo" : "llu") << " "
-        << "%7llx ";
+    fmt << "%7" << (Radix == octal ? PRIo64 : PRIu64) << " "
+        << "%7" PRIx64 " ";
     outs() << format(fmt.str().c_str(),
                      total,
                      total);
diff --git a/tools/llvm-stress/CMakeLists.txt b/tools/llvm-stress/CMakeLists.txt
new file mode 100644
index 000000000000..e2d07a5dda20
--- /dev/null
+++ b/tools/llvm-stress/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitreader asmparser bitwriter instrumentation scalaropts ipo)
+
+add_llvm_tool(llvm-stress
+  llvm-stress.cpp
+  )
diff --git a/tools/llvm-stress/LLVMBuild.txt b/tools/llvm-stress/LLVMBuild.txt
new file mode 100644
index 000000000000..f383d351dd36
--- /dev/null
+++ b/tools/llvm-stress/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-stress/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-stress
+parent = Tools
+required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar
diff --git a/tools/llvm-stress/Makefile b/tools/llvm-stress/Makefile
new file mode 100644
index 000000000000..90d57c3fa98a
--- /dev/null
+++ b/tools/llvm-stress/Makefile
@@ -0,0 +1,18 @@
+##===- tools/llvm-stress/Makefile --------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := llvm-stress
+LINK_COMPONENTS := object
+LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp
new file mode 100644
index 000000000000..d284ea5e42c9
--- /dev/null
+++ b/tools/llvm-stress/llvm-stress.cpp
@@ -0,0 +1,702 @@
+//===-- llvm-stress.cpp - Generate random LL files to stress-test LLVM ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program is a utility that generates random .ll files to stress-test
+// different components in LLVM.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Constants.h"
+#include "llvm/Instruction.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include <memory>
+#include <sstream>
+#include <set>
+#include <vector>
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<unsigned> SeedCL("seed",
+  cl::desc("Seed used for randomness"), cl::init(0));
+static cl::opt<unsigned> SizeCL("size",
+  cl::desc("The estimated size of the generated function (# of instrs)"),
+  cl::init(100));
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Override output filename"),
+               cl::value_desc("filename"));
+
+static cl::opt<bool> GenHalfFloat("generate-half-float",
+  cl::desc("Generate half-length floating-point values"), cl::init(false));
+static cl::opt<bool> GenX86FP80("generate-x86-fp80",
+  cl::desc("Generate 80-bit X86 floating-point values"), cl::init(false));
+static cl::opt<bool> GenFP128("generate-fp128",
+  cl::desc("Generate 128-bit floating-point values"), cl::init(false));
+static cl::opt<bool> GenPPCFP128("generate-ppc-fp128",
+  cl::desc("Generate 128-bit PPC floating-point values"), cl::init(false));
+static cl::opt<bool> GenX86MMX("generate-x86-mmx",
+  cl::desc("Generate X86 MMX floating-point values"), cl::init(false));
+
+/// A utility class to provide a pseudo-random number generator which is
+/// the same across all platforms. This is somewhat close to the libc
+/// implementation. Note: This is not a cryptographically secure pseudorandom
+/// number generator.
+class Random {
+public:
+  /// C'tor
+  Random(unsigned _seed):Seed(_seed) {}
+
+  /// Return a random integer, up to a
+  /// maximum of 2**19 - 1.
+  uint32_t Rand() {
+    uint32_t Val = Seed + 0x000b07a1;
+    Seed = (Val * 0x3c7c0ac1);
+    // Only lowest 19 bits are random-ish.
+    return Seed & 0x7ffff;
+  }
+
+  /// Return a random 32 bit integer.
+  uint32_t Rand32() {
+    uint32_t Val = Rand();
+    Val &= 0xffff;
+    return Val | (Rand() << 16);
+  }
+
+  /// Return a random 64 bit integer.
+  uint64_t Rand64() {
+    uint64_t Val = Rand32();
+    return Val | (uint64_t(Rand32()) << 32);
+  }
+private:
+  unsigned Seed;
+};
+
+/// Generate an empty function with a default argument list.
+Function *GenEmptyFunction(Module *M) {
+  // Type Definitions
+  std::vector<Type*> ArgsTy;
+  // Define a few arguments
+  LLVMContext &Context = M->getContext();
+  ArgsTy.push_back(PointerType::get(IntegerType::getInt8Ty(Context), 0));
+  ArgsTy.push_back(PointerType::get(IntegerType::getInt32Ty(Context), 0));
+  ArgsTy.push_back(PointerType::get(IntegerType::getInt64Ty(Context), 0));
+  ArgsTy.push_back(IntegerType::getInt32Ty(Context));
+  ArgsTy.push_back(IntegerType::getInt64Ty(Context));
+  ArgsTy.push_back(IntegerType::getInt8Ty(Context));
+
+  FunctionType *FuncTy = FunctionType::get(Type::getVoidTy(Context), ArgsTy, 0);
+  // Pick a unique name to describe the input parameters
+  std::stringstream ss;
+  ss<<"autogen_SD"<<SeedCL;
+  Function *Func = Function::Create(FuncTy, GlobalValue::ExternalLinkage,
+                                    ss.str(), M);
+
+  Func->setCallingConv(CallingConv::C);
+  return Func;
+}
+
+/// A base class, implementing utilities needed for
+/// modifying and adding new random instructions.
+struct Modifier {
+  /// Used to store the randomly generated values.
+  typedef std::vector<Value*> PieceTable;
+
+public:
+  /// C'tor
+  Modifier(BasicBlock *Block, PieceTable *PT, Random *R):
+    BB(Block),PT(PT),Ran(R),Context(BB->getContext()) {}
+  /// Add a new instruction.
+  virtual void Act() = 0;
+  /// Add N new instructions,
+  virtual void ActN(unsigned n) {
+    for (unsigned i=0; i<n; ++i)
+      Act();
+  }
+
+protected:
+  /// Return a random value from the list of known values.
+  Value *getRandomVal() {
+    assert(PT->size());
+    return PT->at(Ran->Rand() % PT->size());
+  }
+
+  Constant *getRandomConstant(Type *Tp) {
+    if (Tp->isIntegerTy()) {
+      if (Ran->Rand() & 1)
+        return ConstantInt::getAllOnesValue(Tp);
+      return ConstantInt::getNullValue(Tp);
+    } else if (Tp->isFloatingPointTy()) {
+      if (Ran->Rand() & 1)
+        return ConstantFP::getAllOnesValue(Tp);
+      return ConstantFP::getNullValue(Tp);
+    }
+    return UndefValue::get(Tp);
+  }
+
+  /// Return a random value with a known type.
+  Value *getRandomValue(Type *Tp) {
+    unsigned index = Ran->Rand();
+    for (unsigned i=0; i<PT->size(); ++i) {
+      Value *V = PT->at((index + i) % PT->size());
+      if (V->getType() == Tp)
+        return V;
+    }
+
+    // If the requested type was not found, generate a constant value.
+    if (Tp->isIntegerTy()) {
+      if (Ran->Rand() & 1)
+        return ConstantInt::getAllOnesValue(Tp);
+      return ConstantInt::getNullValue(Tp);
+    } else if (Tp->isFloatingPointTy()) {
+      if (Ran->Rand() & 1)
+        return ConstantFP::getAllOnesValue(Tp);
+      return ConstantFP::getNullValue(Tp);
+    } else if (Tp->isVectorTy()) {
+      VectorType *VTp = cast<VectorType>(Tp);
+
+      std::vector<Constant*> TempValues;
+      TempValues.reserve(VTp->getNumElements());
+      for (unsigned i = 0; i < VTp->getNumElements(); ++i)
+        TempValues.push_back(getRandomConstant(VTp->getScalarType()));
+
+      ArrayRef<Constant*> VectorValue(TempValues);
+      return ConstantVector::get(VectorValue);
+    }
+
+    return UndefValue::get(Tp);
+  }
+
+  /// Return a random value of any pointer type.
+  Value *getRandomPointerValue() {
+    unsigned index = Ran->Rand();
+    for (unsigned i=0; i<PT->size(); ++i) {
+      Value *V = PT->at((index + i) % PT->size());
+      if (V->getType()->isPointerTy())
+        return V;
+    }
+    return UndefValue::get(pickPointerType());
+  }
+
+  /// Return a random value of any vector type.
+  Value *getRandomVectorValue() {
+    unsigned index = Ran->Rand();
+    for (unsigned i=0; i<PT->size(); ++i) {
+      Value *V = PT->at((index + i) % PT->size());
+      if (V->getType()->isVectorTy())
+        return V;
+    }
+    return UndefValue::get(pickVectorType());
+  }
+
+  /// Pick a random type.
+  Type *pickType() {
+    return (Ran->Rand() & 1 ? pickVectorType() : pickScalarType());
+  }
+
+  /// Pick a random pointer type.
+  Type *pickPointerType() {
+    Type *Ty = pickType();
+    return PointerType::get(Ty, 0);
+  }
+
+  /// Pick a random vector type.
+  Type *pickVectorType(unsigned len = (unsigned)-1) {
+    // Pick a random vector width in the range 2**0 to 2**4.
+    // by adding two randoms we are generating a normal-like distribution
+    // around 2**3.
+    unsigned width = 1<<((Ran->Rand() % 3) + (Ran->Rand() % 3));
+    Type *Ty;
+
+    // Vectors of x86mmx are illegal; keep trying till we get something else.
+    do {
+      Ty = pickScalarType();
+    } while (Ty->isX86_MMXTy());
+
+    if (len != (unsigned)-1)
+      width = len;
+    return VectorType::get(Ty, width);
+  }
+
+  /// Pick a random scalar type.
+  Type *pickScalarType() {
+    Type *t = 0;
+    do {
+      switch (Ran->Rand() % 30) {
+      case 0: t = Type::getInt1Ty(Context); break;
+      case 1: t = Type::getInt8Ty(Context); break;
+      case 2: t = Type::getInt16Ty(Context); break;
+      case 3: case 4:
+      case 5: t = Type::getFloatTy(Context); break;
+      case 6: case 7:
+      case 8: t = Type::getDoubleTy(Context); break;
+      case 9: case 10:
+      case 11: t = Type::getInt32Ty(Context); break;
+      case 12: case 13:
+      case 14: t = Type::getInt64Ty(Context); break;
+      case 15: case 16:
+      case 17: if (GenHalfFloat) t = Type::getHalfTy(Context); break;
+      case 18: case 19:
+      case 20: if (GenX86FP80) t = Type::getX86_FP80Ty(Context); break;
+      case 21: case 22:
+      case 23: if (GenFP128) t = Type::getFP128Ty(Context); break;
+      case 24: case 25:
+      case 26: if (GenPPCFP128) t = Type::getPPC_FP128Ty(Context); break;
+      case 27: case 28:
+      case 29: if (GenX86MMX) t = Type::getX86_MMXTy(Context); break;
+      default: llvm_unreachable("Invalid scalar value");
+      }
+    } while (t == 0);
+
+    return t;
+  }
+
+  /// Basic block to populate
+  BasicBlock *BB;
+  /// Value table
+  PieceTable *PT;
+  /// Random number generator
+  Random *Ran;
+  /// Context
+  LLVMContext &Context;
+};
+
+struct LoadModifier: public Modifier {
+  LoadModifier(BasicBlock *BB, PieceTable *PT, Random *R):Modifier(BB, PT, R) {}
+  virtual void Act() {
+    // Try to use predefined pointers. If non exist, use undef pointer value;
+    Value *Ptr = getRandomPointerValue();
+    Value *V = new LoadInst(Ptr, "L", BB->getTerminator());
+    PT->push_back(V);
+  }
+};
+
+struct StoreModifier: public Modifier {
+  StoreModifier(BasicBlock *BB, PieceTable *PT, Random *R):Modifier(BB, PT, R) {}
+  virtual void Act() {
+    // Try to use predefined pointers. If non exist, use undef pointer value;
+    Value *Ptr = getRandomPointerValue();
+    Type  *Tp = Ptr->getType();
+    Value *Val = getRandomValue(Tp->getContainedType(0));
+    Type  *ValTy = Val->getType();
+
+    // Do not store vectors of i1s because they are unsupported
+    // by the codegen.
+    if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() == 1)
+      return;
+
+    new StoreInst(Val, Ptr, BB->getTerminator());
+  }
+};
+
+struct BinModifier: public Modifier {
+  BinModifier(BasicBlock *BB, PieceTable *PT, Random *R):Modifier(BB, PT, R) {}
+
+  virtual void Act() {
+    Value *Val0 = getRandomVal();
+    Value *Val1 = getRandomValue(Val0->getType());
+
+    // Don't handle pointer types.
+    if (Val0->getType()->isPointerTy() ||
+        Val1->getType()->isPointerTy())
+      return;
+
+    // Don't handle i1 types.
+    if (Val0->getType()->getScalarSizeInBits() == 1)
+      return;
+
+
+    bool isFloat = Val0->getType()->getScalarType()->isFloatingPointTy();
+    Instruction* Term = BB->getTerminator();
+    unsigned R = Ran->Rand() % (isFloat ? 7 : 13);
+    Instruction::BinaryOps Op;
+
+    switch (R) {
+    default: llvm_unreachable("Invalid BinOp");
+    case 0:{Op = (isFloat?Instruction::FAdd : Instruction::Add); break; }
+    case 1:{Op = (isFloat?Instruction::FSub : Instruction::Sub); break; }
+    case 2:{Op = (isFloat?Instruction::FMul : Instruction::Mul); break; }
+    case 3:{Op = (isFloat?Instruction::FDiv : Instruction::SDiv); break; }
+    case 4:{Op = (isFloat?Instruction::FDiv : Instruction::UDiv); break; }
+    case 5:{Op = (isFloat?Instruction::FRem : Instruction::SRem); break; }
+    case 6:{Op = (isFloat?Instruction::FRem : Instruction::URem); break; }
+    case 7: {Op = Instruction::Shl;  break; }
+    case 8: {Op = Instruction::LShr; break; }
+    case 9: {Op = Instruction::AShr; break; }
+    case 10:{Op = Instruction::And;  break; }
+    case 11:{Op = Instruction::Or;   break; }
+    case 12:{Op = Instruction::Xor;  break; }
+    }
+
+    PT->push_back(BinaryOperator::Create(Op, Val0, Val1, "B", Term));
+  }
+};
+
+/// Generate constant values.
+struct ConstModifier: public Modifier {
+  ConstModifier(BasicBlock *BB, PieceTable *PT, Random *R):Modifier(BB, PT, R) {}
+  virtual void Act() {
+    Type *Ty = pickType();
+
+    if (Ty->isVectorTy()) {
+      switch (Ran->Rand() % 2) {
+      case 0: if (Ty->getScalarType()->isIntegerTy())
+                return PT->push_back(ConstantVector::getAllOnesValue(Ty));
+      case 1: if (Ty->getScalarType()->isIntegerTy())
+                return PT->push_back(ConstantVector::getNullValue(Ty));
+      }
+    }
+
+    if (Ty->isFloatingPointTy()) {
+      // Generate 128 random bits, the size of the (currently)
+      // largest floating-point types.
+      uint64_t RandomBits[2];
+      for (unsigned i = 0; i < 2; ++i)
+        RandomBits[i] = Ran->Rand64();
+
+      APInt RandomInt(Ty->getPrimitiveSizeInBits(), makeArrayRef(RandomBits));
+
+      bool isIEEE = !Ty->isX86_FP80Ty() && !Ty->isPPC_FP128Ty();
+      APFloat RandomFloat(RandomInt, isIEEE);
+
+      if (Ran->Rand() & 1)
+        return PT->push_back(ConstantFP::getNullValue(Ty));
+      return PT->push_back(ConstantFP::get(Ty->getContext(), RandomFloat));
+    }
+
+    if (Ty->isIntegerTy()) {
+      switch (Ran->Rand() % 7) {
+      case 0: if (Ty->isIntegerTy())
+                return PT->push_back(ConstantInt::get(Ty,
+                  APInt::getAllOnesValue(Ty->getPrimitiveSizeInBits())));
+      case 1: if (Ty->isIntegerTy())
+                return PT->push_back(ConstantInt::get(Ty,
+                  APInt::getNullValue(Ty->getPrimitiveSizeInBits())));
+      case 2: case 3: case 4: case 5:
+      case 6: if (Ty->isIntegerTy())
+                PT->push_back(ConstantInt::get(Ty, Ran->Rand()));
+      }
+    }
+
+  }
+};
+
+struct AllocaModifier: public Modifier {
+  AllocaModifier(BasicBlock *BB, PieceTable *PT, Random *R):Modifier(BB, PT, R){}
+
+  virtual void Act() {
+    Type *Tp = pickType();
+    PT->push_back(new AllocaInst(Tp, "A", BB->getFirstNonPHI()));
+  }
+};
+
+struct ExtractElementModifier: public Modifier {
+  ExtractElementModifier(BasicBlock *BB, PieceTable *PT, Random *R):
+    Modifier(BB, PT, R) {}
+
+  virtual void Act() {
+    Value *Val0 = getRandomVectorValue();
+    Value *V = ExtractElementInst::Create(Val0,
+             ConstantInt::get(Type::getInt32Ty(BB->getContext()),
+             Ran->Rand() % cast<VectorType>(Val0->getType())->getNumElements()), 
+             "E", BB->getTerminator());
+    return PT->push_back(V);
+  }
+};
+
+struct ShuffModifier: public Modifier {
+  ShuffModifier(BasicBlock *BB, PieceTable *PT, Random *R):Modifier(BB, PT, R) {}
+  virtual void Act() {
+
+    Value *Val0 = getRandomVectorValue();
+    Value *Val1 = getRandomValue(Val0->getType());
+
+    unsigned Width = cast<VectorType>(Val0->getType())->getNumElements();
+    std::vector<Constant*> Idxs;
+
+    Type *I32 = Type::getInt32Ty(BB->getContext());
+    for (unsigned i=0; i<Width; ++i) {
+      Constant *CI = ConstantInt::get(I32, Ran->Rand() % (Width*2));
+      // Pick some undef values.
+      if (!(Ran->Rand() % 5))
+        CI = UndefValue::get(I32);
+      Idxs.push_back(CI);
+    }
+
+    Constant *Mask = ConstantVector::get(Idxs);
+
+    Value *V = new ShuffleVectorInst(Val0, Val1, Mask, "Shuff",
+                                     BB->getTerminator());
+    PT->push_back(V);
+  }
+};
+
+struct InsertElementModifier: public Modifier {
+  InsertElementModifier(BasicBlock *BB, PieceTable *PT, Random *R):
+    Modifier(BB, PT, R) {}
+
+  virtual void Act() {
+    Value *Val0 = getRandomVectorValue();
+    Value *Val1 = getRandomValue(Val0->getType()->getScalarType());
+
+    Value *V = InsertElementInst::Create(Val0, Val1,
+              ConstantInt::get(Type::getInt32Ty(BB->getContext()),
+              Ran->Rand() % cast<VectorType>(Val0->getType())->getNumElements()),
+              "I",  BB->getTerminator());
+    return PT->push_back(V);
+  }
+
+};
+
+struct CastModifier: public Modifier {
+  CastModifier(BasicBlock *BB, PieceTable *PT, Random *R):Modifier(BB, PT, R) {}
+  virtual void Act() {
+
+    Value *V = getRandomVal();
+    Type *VTy = V->getType();
+    Type *DestTy = pickScalarType();
+
+    // Handle vector casts vectors.
+    if (VTy->isVectorTy()) {
+      VectorType *VecTy = cast<VectorType>(VTy);
+      DestTy = pickVectorType(VecTy->getNumElements());
+    }
+
+    // no need to casr.
+    if (VTy == DestTy) return;
+
+    // Pointers:
+    if (VTy->isPointerTy()) {
+      if (!DestTy->isPointerTy())
+        DestTy = PointerType::get(DestTy, 0);
+      return PT->push_back(
+        new BitCastInst(V, DestTy, "PC", BB->getTerminator()));
+    }
+
+    // Generate lots of bitcasts.
+    if ((Ran->Rand() & 1) &&
+        VTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
+      return PT->push_back(
+        new BitCastInst(V, DestTy, "BC", BB->getTerminator()));
+    }
+
+    // Both types are integers:
+    if (VTy->getScalarType()->isIntegerTy() &&
+        DestTy->getScalarType()->isIntegerTy()) {
+      if (VTy->getScalarType()->getPrimitiveSizeInBits() >
+          DestTy->getScalarType()->getPrimitiveSizeInBits()) {
+        return PT->push_back(
+          new TruncInst(V, DestTy, "Tr", BB->getTerminator()));
+      } else {
+        if (Ran->Rand() & 1)
+          return PT->push_back(
+            new ZExtInst(V, DestTy, "ZE", BB->getTerminator()));
+        return PT->push_back(new SExtInst(V, DestTy, "Se", BB->getTerminator()));
+      }
+    }
+
+    // Fp to int.
+    if (VTy->getScalarType()->isFloatingPointTy() &&
+        DestTy->getScalarType()->isIntegerTy()) {
+      if (Ran->Rand() & 1)
+        return PT->push_back(
+          new FPToSIInst(V, DestTy, "FC", BB->getTerminator()));
+      return PT->push_back(new FPToUIInst(V, DestTy, "FC", BB->getTerminator()));
+    }
+
+    // Int to fp.
+    if (VTy->getScalarType()->isIntegerTy() &&
+        DestTy->getScalarType()->isFloatingPointTy()) {
+      if (Ran->Rand() & 1)
+        return PT->push_back(
+          new SIToFPInst(V, DestTy, "FC", BB->getTerminator()));
+      return PT->push_back(new UIToFPInst(V, DestTy, "FC", BB->getTerminator()));
+
+    }
+
+    // Both floats.
+    if (VTy->getScalarType()->isFloatingPointTy() &&
+        DestTy->getScalarType()->isFloatingPointTy()) {
+      if (VTy->getScalarType()->getPrimitiveSizeInBits() >
+          DestTy->getScalarType()->getPrimitiveSizeInBits()) {
+        return PT->push_back(
+          new FPTruncInst(V, DestTy, "Tr", BB->getTerminator()));
+      } else {
+        return PT->push_back(
+          new FPExtInst(V, DestTy, "ZE", BB->getTerminator()));
+      }
+    }
+  }
+
+};
+
+struct SelectModifier: public Modifier {
+  SelectModifier(BasicBlock *BB, PieceTable *PT, Random *R):
+    Modifier(BB, PT, R) {}
+
+  virtual void Act() {
+    // Try a bunch of different select configuration until a valid one is found.
+      Value *Val0 = getRandomVal();
+      Value *Val1 = getRandomValue(Val0->getType());
+
+      Type *CondTy = Type::getInt1Ty(Context);
+
+      // If the value type is a vector, and we allow vector select, then in 50%
+      // of the cases generate a vector select.
+      if (Val0->getType()->isVectorTy() && (Ran->Rand() % 1)) {
+        unsigned NumElem = cast<VectorType>(Val0->getType())->getNumElements();
+        CondTy = VectorType::get(CondTy, NumElem);
+      }
+
+      Value *Cond = getRandomValue(CondTy);
+      Value *V = SelectInst::Create(Cond, Val0, Val1, "Sl", BB->getTerminator());
+      return PT->push_back(V);
+  }
+};
+
+
+struct CmpModifier: public Modifier {
+  CmpModifier(BasicBlock *BB, PieceTable *PT, Random *R):Modifier(BB, PT, R) {}
+  virtual void Act() {
+
+    Value *Val0 = getRandomVal();
+    Value *Val1 = getRandomValue(Val0->getType());
+
+    if (Val0->getType()->isPointerTy()) return;
+    bool fp = Val0->getType()->getScalarType()->isFloatingPointTy();
+
+    int op;
+    if (fp) {
+      op = Ran->Rand() %
+      (CmpInst::LAST_FCMP_PREDICATE - CmpInst::FIRST_FCMP_PREDICATE) +
+       CmpInst::FIRST_FCMP_PREDICATE;
+    } else {
+      op = Ran->Rand() %
+      (CmpInst::LAST_ICMP_PREDICATE - CmpInst::FIRST_ICMP_PREDICATE) +
+       CmpInst::FIRST_ICMP_PREDICATE;
+    }
+
+    Value *V = CmpInst::Create(fp ? Instruction::FCmp : Instruction::ICmp,
+                               op, Val0, Val1, "Cmp", BB->getTerminator());
+    return PT->push_back(V);
+  }
+};
+
+void FillFunction(Function *F) {
+  // Create a legal entry block.
+  BasicBlock *BB = BasicBlock::Create(F->getContext(), "BB", F);
+  ReturnInst::Create(F->getContext(), BB);
+
+  // Create the value table.
+  Modifier::PieceTable PT;
+  // Pick an initial seed value
+  Random R(SeedCL);
+
+  // Consider arguments as legal values.
+  for (Function::arg_iterator it = F->arg_begin(), e = F->arg_end();
+       it != e; ++it)
+    PT.push_back(it);
+
+  // List of modifiers which add new random instructions.
+  std::vector<Modifier*> Modifiers;
+  std::auto_ptr<Modifier> LM(new LoadModifier(BB, &PT, &R));
+  std::auto_ptr<Modifier> SM(new StoreModifier(BB, &PT, &R));
+  std::auto_ptr<Modifier> EE(new ExtractElementModifier(BB, &PT, &R));
+  std::auto_ptr<Modifier> SHM(new ShuffModifier(BB, &PT, &R));
+  std::auto_ptr<Modifier> IE(new InsertElementModifier(BB, &PT, &R));
+  std::auto_ptr<Modifier> BM(new BinModifier(BB, &PT, &R));
+  std::auto_ptr<Modifier> CM(new CastModifier(BB, &PT, &R));
+  std::auto_ptr<Modifier> SLM(new SelectModifier(BB, &PT, &R));
+  std::auto_ptr<Modifier> PM(new CmpModifier(BB, &PT, &R));
+  Modifiers.push_back(LM.get());
+  Modifiers.push_back(SM.get());
+  Modifiers.push_back(EE.get());
+  Modifiers.push_back(SHM.get());
+  Modifiers.push_back(IE.get());
+  Modifiers.push_back(BM.get());
+  Modifiers.push_back(CM.get());
+  Modifiers.push_back(SLM.get());
+  Modifiers.push_back(PM.get());
+
+  // Generate the random instructions
+  AllocaModifier AM(BB, &PT, &R); AM.ActN(5); // Throw in a few allocas
+  ConstModifier COM(BB, &PT, &R);  COM.ActN(40); // Throw in a few constants
+
+  for (unsigned i=0; i< SizeCL / Modifiers.size(); ++i)
+    for (std::vector<Modifier*>::iterator it = Modifiers.begin(),
+         e = Modifiers.end(); it != e; ++it) {
+      (*it)->Act();
+    }
+
+  SM->ActN(5); // Throw in a few stores.
+}
+
+void IntroduceControlFlow(Function *F) {
+  std::set<Instruction*> BoolInst;
+  for (BasicBlock::iterator it = F->begin()->begin(),
+       e = F->begin()->end(); it != e; ++it) {
+    if (it->getType() == IntegerType::getInt1Ty(F->getContext()))
+      BoolInst.insert(it);
+  }
+
+  for (std::set<Instruction*>::iterator it = BoolInst.begin(),
+       e = BoolInst.end(); it != e; ++it) {
+    Instruction *Instr = *it;
+    BasicBlock *Curr = Instr->getParent();
+    BasicBlock::iterator Loc= Instr;
+    BasicBlock *Next = Curr->splitBasicBlock(Loc, "CF");
+    Instr->moveBefore(Curr->getTerminator());
+    if (Curr != &F->getEntryBlock()) {
+      BranchInst::Create(Curr, Next, Instr, Curr->getTerminator());
+      Curr->getTerminator()->eraseFromParent();
+    }
+  }
+}
+
+int main(int argc, char **argv) {
+  // Init LLVM, call llvm_shutdown() on exit, parse args, etc.
+  llvm::PrettyStackTraceProgram X(argc, argv);
+  cl::ParseCommandLineOptions(argc, argv, "llvm codegen stress-tester\n");
+  llvm_shutdown_obj Y;
+
+  std::auto_ptr<Module> M(new Module("/tmp/autogen.bc", getGlobalContext()));
+  Function *F = GenEmptyFunction(M.get());
+  FillFunction(F);
+  IntroduceControlFlow(F);
+
+  // Figure out what stream we are supposed to write to...
+  OwningPtr<tool_output_file> Out;
+  // Default to standard output.
+  if (OutputFilename.empty())
+    OutputFilename = "-";
+
+  std::string ErrorInfo;
+  Out.reset(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+                                 raw_fd_ostream::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    return 1;
+  }
+
+  PassManager Passes;
+  Passes.add(createVerifierPass());
+  Passes.add(createPrintModulePass(&Out->os()));
+  Passes.run(*M.get());
+  Out->keep();
+
+  return 0;
+}
diff --git a/tools/llvm-stub/LLVMBuild.txt b/tools/llvm-stub/LLVMBuild.txt
new file mode 100644
index 000000000000..5c3534c106e5
--- /dev/null
+++ b/tools/llvm-stub/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llvm-stub/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-stub
+parent = Tools
+required_libraries = 
diff --git a/tools/llvm-stub/Makefile b/tools/llvm-stub/Makefile
index 7ffe14976bb5..077efa2d02f1 100644
--- a/tools/llvm-stub/Makefile
+++ b/tools/llvm-stub/Makefile
@@ -7,7 +7,9 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-TOOLNAME = llvm-stub
+LEVEL := ../..
+TOOLNAME := llvm-stub
+LINK_COMPONENTS := object
+
 include $(LEVEL)/Makefile.common
 
diff --git a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
index 7e2c5f06e61f..911297609b0c 100644
--- a/tools/lto/CMakeLists.txt
+++ b/tools/lto/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
-  ipo scalaropts linker bitreader bitwriter mcdisassembler)
+  ipo scalaropts linker bitreader bitwriter mcdisassembler vectorize)
 
 add_definitions( -DLLVM_VERSION_INFO=\"${PACKAGE_VERSION}\" )
 
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 6c8dbad460c0..77c06a655b18 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -4,27 +4,26 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
-// This file implements the Link Time Optimization library. This library is 
+// This file implements the Link Time Optimization library. This library is
 // intended to be used by linker to optimize code at link time.
 //
 //===----------------------------------------------------------------------===//
 
-#include "LTOModule.h"
 #include "LTOCodeGenerator.h"
+#include "LTOModule.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Linker.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/SubtargetFeature.h"
@@ -33,67 +32,55 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Host.h"
-#include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/Config/config.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include <cstdlib>
-#include <unistd.h>
-#include <fcntl.h>
-
-
+#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
-static cl::opt<bool> DisableInline("disable-inlining",
+static cl::opt<bool> DisableInline("disable-inlining", cl::init(false),
   cl::desc("Do not run the inliner pass"));
 
+static cl::opt<bool> DisableGVNLoadPRE("disable-gvn-loadpre", cl::init(false),
+  cl::desc("Do not run the GVN load PRE pass"));
 
-const char* LTOCodeGenerator::getVersionString()
-{
+const char* LTOCodeGenerator::getVersionString() {
 #ifdef LLVM_VERSION_INFO
-    return PACKAGE_NAME " version " PACKAGE_VERSION ", " LLVM_VERSION_INFO;
+  return PACKAGE_NAME " version " PACKAGE_VERSION ", " LLVM_VERSION_INFO;
 #else
-    return PACKAGE_NAME " version " PACKAGE_VERSION;
+  return PACKAGE_NAME " version " PACKAGE_VERSION;
 #endif
 }
 
-
-LTOCodeGenerator::LTOCodeGenerator() 
-    : _context(getGlobalContext()),
-      _linker("LinkTimeOptimizer", "ld-temp.o", _context), _target(NULL),
-      _emitDwarfDebugInfo(false), _scopeRestrictionsDone(false),
-      _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC),
-      _nativeObjectFile(NULL)
-{
-    InitializeAllTargets();
-    InitializeAllTargetMCs();
-    InitializeAllAsmPrinters();
-}
-
-LTOCodeGenerator::~LTOCodeGenerator()
-{
-    delete _target;
-    delete _nativeObjectFile;
+LTOCodeGenerator::LTOCodeGenerator()
+  : _context(getGlobalContext()),
+    _linker("LinkTimeOptimizer", "ld-temp.o", _context), _target(NULL),
+    _emitDwarfDebugInfo(false), _scopeRestrictionsDone(false),
+    _runInternalizePass(false), _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC),
+    _nativeObjectFile(NULL) {
+  InitializeAllTargets();
+  InitializeAllTargetMCs();
+  InitializeAllAsmPrinters();
 }
 
+LTOCodeGenerator::~LTOCodeGenerator() {
+  delete _target;
+  delete _nativeObjectFile;
 
+  for (std::vector<char*>::iterator I = _codegenOptions.begin(),
+         E = _codegenOptions.end(); I != E; ++I)
+    free(*I);
+}
 
-bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg)
-{
-
-  if(mod->getLLVVMModule()->MaterializeAllPermanently(&errMsg))
-    return true;
-
+bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
   bool ret = _linker.LinkInModule(mod->getLLVVMModule(), &errMsg);
 
   const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs();
@@ -102,55 +89,39 @@ bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg)
 
   return ret;
 }
-    
-
-bool LTOCodeGenerator::setDebugInfo(lto_debug_model debug, std::string& errMsg)
-{
-    switch (debug) {
-        case LTO_DEBUG_MODEL_NONE:
-            _emitDwarfDebugInfo = false;
-            return false;
-            
-        case LTO_DEBUG_MODEL_DWARF:
-            _emitDwarfDebugInfo = true;
-            return false;
-    }
-    errMsg = "unknown debug format";
-    return true;
-}
 
+bool LTOCodeGenerator::setDebugInfo(lto_debug_model debug,
+                                    std::string& errMsg) {
+  switch (debug) {
+  case LTO_DEBUG_MODEL_NONE:
+    _emitDwarfDebugInfo = false;
+    return false;
 
-bool LTOCodeGenerator::setCodePICModel(lto_codegen_model model, 
-                                       std::string& errMsg)
-{
-    switch (model) {
-        case LTO_CODEGEN_PIC_MODEL_STATIC:
-        case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
-        case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
-            _codeModel = model;
-            return false;
-    }
-    errMsg = "unknown pic model";
-    return true;
-}
-
-void LTOCodeGenerator::setCpu(const char* mCpu)
-{
-  _mCpu = mCpu;
+  case LTO_DEBUG_MODEL_DWARF:
+    _emitDwarfDebugInfo = true;
+    return false;
+  }
+  llvm_unreachable("Unknown debug format!");
 }
 
-void LTOCodeGenerator::addMustPreserveSymbol(const char* sym)
-{
-    _mustPreserveSymbols[sym] = 1;
+bool LTOCodeGenerator::setCodePICModel(lto_codegen_model model,
+                                       std::string& errMsg) {
+  switch (model) {
+  case LTO_CODEGEN_PIC_MODEL_STATIC:
+  case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
+  case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+    _codeModel = model;
+    return false;
+  }
+  llvm_unreachable("Unknown PIC model!");
 }
 
-
 bool LTOCodeGenerator::writeMergedModules(const char *path,
                                           std::string &errMsg) {
   if (determineTarget(errMsg))
     return true;
 
-  // mark which symbols can not be internalized 
+  // mark which symbols can not be internalized
   applyScopeRestrictions();
 
   // create output file
@@ -162,7 +133,7 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
     errMsg += path;
     return true;
   }
-    
+
   // write bitcode to it
   WriteBitcodeToFile(_linker.getModule(), Out.os());
   Out.os().close();
@@ -173,14 +144,12 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
     Out.os().clear_error();
     return true;
   }
-  
+
   Out.keep();
   return false;
 }
 
-
-bool LTOCodeGenerator::compile_to_file(const char** name, std::string& errMsg)
-{
+bool LTOCodeGenerator::compile_to_file(const char** name, std::string& errMsg) {
   // make unique temp .o file to put generated object file
   sys::PathWithStatus uniqueObjPath("lto-llvm.o");
   if ( uniqueObjPath.createTemporaryFileOnDisk(false, &errMsg) ) {
@@ -194,12 +163,14 @@ bool LTOCodeGenerator::compile_to_file(const char** name, std::string& errMsg)
   tool_output_file objFile(uniqueObjPath.c_str(), errMsg);
   if (!errMsg.empty())
     return true;
+
   genResult = this->generateObjectFile(objFile.os(), errMsg);
   objFile.os().close();
   if (objFile.os().has_error()) {
     objFile.os().clear_error();
     return true;
   }
+
   objFile.keep();
   if ( genResult ) {
     uniqueObjPath.eraseFromDisk();
@@ -211,8 +182,7 @@ bool LTOCodeGenerator::compile_to_file(const char** name, std::string& errMsg)
   return false;
 }
 
-const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
-{
+const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg) {
   const char *name;
   if (compile_to_file(&name, errMsg))
     return NULL;
@@ -238,47 +208,48 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
   return _nativeObjectFile->getBufferStart();
 }
 
-bool LTOCodeGenerator::determineTarget(std::string& errMsg)
-{
-    if ( _target == NULL ) {
-        std::string Triple = _linker.getModule()->getTargetTriple();
-        if (Triple.empty())
-          Triple = sys::getHostTriple();
-
-        // create target machine from info for merged modules
-        const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
-        if ( march == NULL )
-            return true;
-
-        // The relocation model is actually a static member of TargetMachine
-        // and needs to be set before the TargetMachine is instantiated.
-        Reloc::Model RelocModel = Reloc::Default;
-        switch( _codeModel ) {
-        case LTO_CODEGEN_PIC_MODEL_STATIC:
-            RelocModel = Reloc::Static;
-            break;
-        case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
-            RelocModel = Reloc::PIC_;
-            break;
-        case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
-            RelocModel = Reloc::DynamicNoPIC;
-            break;
-        }
-
-        // construct LTOModule, hand over ownership of module and target
-        SubtargetFeatures Features;
-        Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
-        std::string FeatureStr = Features.getString();
-        _target = march->createTargetMachine(Triple, _mCpu, FeatureStr,
-                                             RelocModel);
+bool LTOCodeGenerator::determineTarget(std::string& errMsg) {
+  if ( _target == NULL ) {
+    std::string Triple = _linker.getModule()->getTargetTriple();
+    if (Triple.empty())
+      Triple = sys::getDefaultTargetTriple();
+
+    // create target machine from info for merged modules
+    const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
+    if ( march == NULL )
+      return true;
+
+    // The relocation model is actually a static member of TargetMachine and
+    // needs to be set before the TargetMachine is instantiated.
+    Reloc::Model RelocModel = Reloc::Default;
+    switch( _codeModel ) {
+    case LTO_CODEGEN_PIC_MODEL_STATIC:
+      RelocModel = Reloc::Static;
+      break;
+    case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
+      RelocModel = Reloc::PIC_;
+      break;
+    case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+      RelocModel = Reloc::DynamicNoPIC;
+      break;
     }
-    return false;
+
+    // construct LTOModule, hand over ownership of module and target
+    SubtargetFeatures Features;
+    Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
+    std::string FeatureStr = Features.getString();
+    TargetOptions Options;
+    _target = march->createTargetMachine(Triple, _mCpu, FeatureStr, Options,
+                                         RelocModel);
+  }
+  return false;
 }
 
-void LTOCodeGenerator::applyRestriction(GlobalValue &GV,
-                                     std::vector<const char*> &mustPreserveList,
-                                        SmallPtrSet<GlobalValue*, 8> &asmUsed,
-                                        Mangler &mangler) {
+void LTOCodeGenerator::
+applyRestriction(GlobalValue &GV,
+                 std::vector<const char*> &mustPreserveList,
+                 SmallPtrSet<GlobalValue*, 8> &asmUsed,
+                 Mangler &mangler) {
   SmallString<64> Buffer;
   mangler.getNameWithPrefix(Buffer, &GV, false);
 
@@ -298,8 +269,8 @@ static void findUsedValues(GlobalVariable *LLVMUsed,
   if (Inits == 0) return;
 
   for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
-    if (GlobalValue *GV = 
-          dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+    if (GlobalValue *GV =
+        dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
       UsedValues.insert(GV);
 }
 
@@ -311,8 +282,8 @@ void LTOCodeGenerator::applyScopeRestrictions() {
   PassManager passes;
   passes.add(createVerifierPass());
 
-  // mark which symbols can not be internalized 
-  MCContext Context(*_target->getMCAsmInfo(), *_target->getRegisterInfo(), NULL);
+  // mark which symbols can not be internalized
+  MCContext Context(*_target->getMCAsmInfo(), *_target->getRegisterInfo(),NULL);
   Mangler mangler(Context, *_target->getTargetData());
   std::vector<const char*> mustPreserveList;
   SmallPtrSet<GlobalValue*, 8> asmUsed;
@@ -320,7 +291,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
   for (Module::iterator f = mergedModule->begin(),
          e = mergedModule->end(); f != e; ++f)
     applyRestriction(*f, mustPreserveList, asmUsed, mangler);
-  for (Module::global_iterator v = mergedModule->global_begin(), 
+  for (Module::global_iterator v = mergedModule->global_begin(),
          e = mergedModule->global_end(); v !=  e; ++v)
     applyRestriction(*v, mustPreserveList, asmUsed, mangler);
   for (Module::alias_iterator a = mergedModule->alias_begin(),
@@ -355,81 +326,82 @@ void LTOCodeGenerator::applyScopeRestrictions() {
 
   // apply scope restrictions
   passes.run(*mergedModule);
-  
+
   _scopeRestrictionsDone = true;
 }
 
 /// Optimize merged modules using various IPO passes
 bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
                                           std::string &errMsg) {
-    if ( this->determineTarget(errMsg) ) 
-        return true;
+  if ( this->determineTarget(errMsg) )
+    return true;
 
-    // mark which symbols can not be internalized 
-    this->applyScopeRestrictions();
+  Module* mergedModule = _linker.getModule();
 
-    Module* mergedModule = _linker.getModule();
+  // if options were requested, set them
+  if ( !_codegenOptions.empty() )
+    cl::ParseCommandLineOptions(_codegenOptions.size(),
+                                const_cast<char **>(&_codegenOptions[0]));
 
-    // if options were requested, set them
-    if ( !_codegenOptions.empty() )
-        cl::ParseCommandLineOptions(_codegenOptions.size(), 
-                                    const_cast<char **>(&_codegenOptions[0]));
+  // mark which symbols can not be internalized
+  this->applyScopeRestrictions();
 
-    // Instantiate the pass manager to organize the passes.
-    PassManager passes;
+  // Instantiate the pass manager to organize the passes.
+  PassManager passes;
 
-    // Start off with a verification pass.
-    passes.add(createVerifierPass());
+  // Start off with a verification pass.
+  passes.add(createVerifierPass());
 
-    // Add an appropriate TargetData instance for this module...
-    passes.add(new TargetData(*_target->getTargetData()));
-    
-    PassManagerBuilder().populateLTOPassManager(passes, /*Internalize=*/ false,
-                                                !DisableInline);
+  // Add an appropriate TargetData instance for this module...
+  passes.add(new TargetData(*_target->getTargetData()));
 
-    // Make sure everything is still good.
-    passes.add(createVerifierPass());
+  PassManagerBuilder().populateLTOPassManager(passes,
+                                              _runInternalizePass,
+                                              !DisableInline,
+                                              DisableGVNLoadPRE);
 
-    FunctionPassManager *codeGenPasses = new FunctionPassManager(mergedModule);
+  // Make sure everything is still good.
+  passes.add(createVerifierPass());
 
-    codeGenPasses->add(new TargetData(*_target->getTargetData()));
+  FunctionPassManager *codeGenPasses = new FunctionPassManager(mergedModule);
 
-    formatted_raw_ostream Out(out);
+  codeGenPasses->add(new TargetData(*_target->getTargetData()));
 
-    if (_target->addPassesToEmitFile(*codeGenPasses, Out,
-                                     TargetMachine::CGFT_ObjectFile,
-                                     CodeGenOpt::Aggressive)) {
-      errMsg = "target file type not supported";
-      return true;
-    }
+  formatted_raw_ostream Out(out);
 
-    // Run our queue of passes all at once now, efficiently.
-    passes.run(*mergedModule);
+  if (_target->addPassesToEmitFile(*codeGenPasses, Out,
+                                   TargetMachine::CGFT_ObjectFile,
+                                   CodeGenOpt::Aggressive)) {
+    errMsg = "target file type not supported";
+    return true;
+  }
+
+  // Run our queue of passes all at once now, efficiently.
+  passes.run(*mergedModule);
 
-    // Run the code generator, and write assembly file
-    codeGenPasses->doInitialization();
+  // Run the code generator, and write assembly file
+  codeGenPasses->doInitialization();
 
-    for (Module::iterator
-           it = mergedModule->begin(), e = mergedModule->end(); it != e; ++it)
-      if (!it->isDeclaration())
-        codeGenPasses->run(*it);
+  for (Module::iterator
+         it = mergedModule->begin(), e = mergedModule->end(); it != e; ++it)
+    if (!it->isDeclaration())
+      codeGenPasses->run(*it);
 
-    codeGenPasses->doFinalization();
-    delete codeGenPasses;
+  codeGenPasses->doFinalization();
+  delete codeGenPasses;
 
-    return false; // success
+  return false; // success
 }
 
-
-/// Optimize merged modules using various IPO passes
-void LTOCodeGenerator::setCodeGenDebugOptions(const char* options)
-{
-    for (std::pair<StringRef, StringRef> o = getToken(options);
-         !o.first.empty(); o = getToken(o.second)) {
-        // ParseCommandLineOptions() expects argv[0] to be program name.
-        // Lazily add that.
-        if ( _codegenOptions.empty() ) 
-            _codegenOptions.push_back("libLTO");
-        _codegenOptions.push_back(strdup(o.first.str().c_str()));
-    }
+/// setCodeGenDebugOptions - Set codegen debugging options to aid in debugging
+/// LTO problems.
+void LTOCodeGenerator::setCodeGenDebugOptions(const char *options) {
+  for (std::pair<StringRef, StringRef> o = getToken(options);
+       !o.first.empty(); o = getToken(o.second)) {
+    // ParseCommandLineOptions() expects argv[0] to be program name. Lazily add
+    // that.
+    if ( _codegenOptions.empty() )
+      _codegenOptions.push_back(strdup("libLTO"));
+    _codegenOptions.push_back(strdup(o.first.str().c_str()));
+  }
 }
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index f8fd357df406..bac3e6efe909 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -4,71 +4,82 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
-// This file declares the LTOCodeGenerator class. 
+// This file declares the LTOCodeGenerator class.
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LTO_CODE_GENERATOR_H
 #define LTO_CODE_GENERATOR_H
 
 #include "llvm/Linker.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
-
+#include "llvm-c/lto.h"
 #include <string>
 
+namespace llvm {
+  class LLVMContext;
+  class GlobalValue;
+  class Mangler;
+  class MemoryBuffer;
+  class TargetMachine;
+  class raw_ostream;
+}
 
-//
-// C++ class which implements the opaque lto_code_gen_t
-//
-
+//===----------------------------------------------------------------------===//
+/// LTOCodeGenerator - C++ class which implements the opaque lto_code_gen_t
+/// type.
+///
 struct LTOCodeGenerator {
-    static const char*        getVersionString();
-    
-                            LTOCodeGenerator();
-                            ~LTOCodeGenerator();
-                            
-    bool                addModule(struct LTOModule*, std::string& errMsg);
-    bool                setDebugInfo(lto_debug_model, std::string& errMsg);
-    bool                setCodePICModel(lto_codegen_model, std::string& errMsg);
-    void                setCpu(const char *cpu);
-    void                addMustPreserveSymbol(const char* sym);
-    bool                writeMergedModules(const char* path, 
-                                                           std::string& errMsg);
-    bool                compile_to_file(const char** name, std::string& errMsg);
-    const void*         compile(size_t* length, std::string& errMsg);
-    void                setCodeGenDebugOptions(const char *opts); 
+  static const char *getVersionString();
+
+  LTOCodeGenerator();
+  ~LTOCodeGenerator();
+
+  bool addModule(struct LTOModule*, std::string &errMsg);
+  bool setDebugInfo(lto_debug_model, std::string &errMsg);
+  bool setCodePICModel(lto_codegen_model, std::string &errMsg);
+
+  void setCpu(const char* mCpu) { _mCpu = mCpu; }
+
+  void addMustPreserveSymbol(const char* sym) {
+    _mustPreserveSymbols[sym] = 1;
+  }
+
+  bool writeMergedModules(const char *path, std::string &errMsg);
+  bool compile_to_file(const char **name, std::string &errMsg);
+  const void *compile(size_t *length, std::string &errMsg);
+  void setCodeGenDebugOptions(const char *opts);
+
+  void enableInternalizePass() { _runInternalizePass = true; }
+
 private:
-    bool                generateObjectFile(llvm::raw_ostream& out, 
-                                           std::string& errMsg);
-    void                applyScopeRestrictions();
-    void                applyRestriction(llvm::GlobalValue &GV,
-                                     std::vector<const char*> &mustPreserveList,
+  bool generateObjectFile(llvm::raw_ostream &out, std::string &errMsg);
+  void applyScopeRestrictions();
+  void applyRestriction(llvm::GlobalValue &GV,
+                        std::vector<const char*> &mustPreserveList,
                         llvm::SmallPtrSet<llvm::GlobalValue*, 8> &asmUsed,
-                                         llvm::Mangler &mangler);
-    bool                determineTarget(std::string& errMsg);
-    
-    typedef llvm::StringMap<uint8_t> StringSet;
+                        llvm::Mangler &mangler);
+  bool determineTarget(std::string &errMsg);
 
-    llvm::LLVMContext&          _context;
-    llvm::Linker                _linker;
-    llvm::TargetMachine*        _target;
-    bool                        _emitDwarfDebugInfo;
-    bool                        _scopeRestrictionsDone;
-    lto_codegen_model           _codeModel;
-    StringSet                   _mustPreserveSymbols;
-    StringSet                   _asmUndefinedRefs;
-    llvm::MemoryBuffer*         _nativeObjectFile;
-    std::vector<const char*>    _codegenOptions;
-    std::string                 _mCpu;
-    std::string                 _nativeObjectPath;
+  typedef llvm::StringMap<uint8_t> StringSet;
+
+  llvm::LLVMContext&          _context;
+  llvm::Linker                _linker;
+  llvm::TargetMachine*        _target;
+  bool                        _emitDwarfDebugInfo;
+  bool                        _scopeRestrictionsDone;
+  bool                        _runInternalizePass;
+  lto_codegen_model           _codeModel;
+  StringSet                   _mustPreserveSymbols;
+  StringSet                   _asmUndefinedRefs;
+  llvm::MemoryBuffer*         _nativeObjectFile;
+  std::vector<char*>          _codegenOptions;
+  std::string                 _mCpu;
+  std::string                 _nativeObjectPath;
 };
 
 #endif // LTO_CODE_GENERATOR_H
-
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index 4ba8985e72a8..1dbd64bdc0be 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -13,39 +13,37 @@
 //===----------------------------------------------------------------------===//
 
 #include "LTOModule.h"
-
 #include "llvm/Constants.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/system_error.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
+#include "llvm/Support/Host.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
 using namespace llvm;
 
+LTOModule::LTOModule(llvm::Module *m, llvm::TargetMachine *t)
+  : _module(m), _target(t),
+    _context(*_target->getMCAsmInfo(), *_target->getRegisterInfo(), NULL),
+    _mangler(_context, *_target->getTargetData()) {}
+
+/// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM
+/// bitcode.
 bool LTOModule::isBitcodeFile(const void *mem, size_t length) {
   return llvm::sys::IdentifyFileType((char*)mem, length)
     == llvm::sys::Bitcode_FileType;
@@ -55,6 +53,8 @@ bool LTOModule::isBitcodeFile(const char *path) {
   return llvm::sys::Path(path).isBitcodeFile();
 }
 
+/// isBitcodeFileForTarget - Returns 'true' if the file (or memory contents) is
+/// LLVM bitcode for the specified triple.
 bool LTOModule::isBitcodeFileForTarget(const void *mem, size_t length,
                                        const char *triplePrefix) {
   MemoryBuffer *buffer = makeBuffer(mem, length);
@@ -63,7 +63,6 @@ bool LTOModule::isBitcodeFileForTarget(const void *mem, size_t length,
   return isTargetMatch(buffer, triplePrefix);
 }
 
-
 bool LTOModule::isBitcodeFileForTarget(const char *path,
                                        const char *triplePrefix) {
   OwningPtr<MemoryBuffer> buffer;
@@ -72,22 +71,17 @@ bool LTOModule::isBitcodeFileForTarget(const char *path,
   return isTargetMatch(buffer.take(), triplePrefix);
 }
 
-// Takes ownership of buffer.
+/// isTargetMatch - Returns 'true' if the memory buffer is for the specified
+/// target triple.
 bool LTOModule::isTargetMatch(MemoryBuffer *buffer, const char *triplePrefix) {
   std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext());
   delete buffer;
-  return (strncmp(Triple.c_str(), triplePrefix,
- 		  strlen(triplePrefix)) == 0);
+  return strncmp(Triple.c_str(), triplePrefix, strlen(triplePrefix)) == 0;
 }
 
-
-LTOModule::LTOModule(Module *m, TargetMachine *t)
-  : _module(m), _target(t)
-{
-}
-
-LTOModule *LTOModule::makeLTOModule(const char *path,
-                                    std::string &errMsg) {
+/// makeLTOModule - Create an LTOModule. N.B. These methods take ownership of
+/// the buffer.
+LTOModule *LTOModule::makeLTOModule(const char *path, std::string &errMsg) {
   OwningPtr<MemoryBuffer> buffer;
   if (error_code ec = MemoryBuffer::getFile(path, buffer)) {
     errMsg = ec.message();
@@ -97,8 +91,7 @@ LTOModule *LTOModule::makeLTOModule(const char *path,
 }
 
 LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
-                                    size_t size,
-                                    std::string &errMsg) {
+                                    size_t size, std::string &errMsg) {
   return makeLTOModule(fd, path, size, size, 0, errMsg);
 }
 
@@ -116,13 +109,6 @@ LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
   return makeLTOModule(buffer.take(), errMsg);
 }
 
-/// makeBuffer - Create a MemoryBuffer from a memory range.
-MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) {
-  const char *startPtr = (char*)mem;
-  return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), "", false);
-}
-
-
 LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length,
                                     std::string &errMsg) {
   OwningPtr<MemoryBuffer> buffer(makeBuffer(mem, length));
@@ -151,7 +137,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
 
   std::string Triple = m->getTargetTriple();
   if (Triple.empty())
-    Triple = sys::getHostTriple();
+    Triple = sys::getDefaultTargetTriple();
 
   // find machine architecture for this module
   const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
@@ -163,39 +149,33 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
   Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
   std::string FeatureStr = Features.getString();
   std::string CPU;
-  TargetMachine *target = march->createTargetMachine(Triple, CPU, FeatureStr);
+  TargetOptions Options;
+  TargetMachine *target = march->createTargetMachine(Triple, CPU, FeatureStr,
+                                                     Options);
   LTOModule *Ret = new LTOModule(m.take(), target);
-  bool Err = Ret->ParseSymbols(errMsg);
-  if (Err) {
+  if (Ret->parseSymbols(errMsg)) {
     delete Ret;
     return NULL;
   }
-  return Ret;
-}
-
 
-const char *LTOModule::getTargetTriple() {
-  return _module->getTargetTriple().c_str();
-}
-
-void LTOModule::setTargetTriple(const char *triple) {
-  _module->setTargetTriple(triple);
+  return Ret;
 }
 
-void LTOModule::addDefinedFunctionSymbol(Function *f, Mangler &mangler) {
-  // add to list of defined symbols
-  addDefinedSymbol(f, mangler, true);
+/// makeBuffer - Create a MemoryBuffer from a memory range.
+MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) {
+  const char *startPtr = (char*)mem;
+  return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), "", false);
 }
 
-// Get string that data pointer points to.
+/// objcClassNameFromExpression - Get string that the data pointer points to.
 bool LTOModule::objcClassNameFromExpression(Constant *c, std::string &name) {
   if (ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
     Constant *op = ce->getOperand(0);
     if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
       Constant *cn = gvn->getInitializer();
-      if (ConstantArray *ca = dyn_cast<ConstantArray>(cn)) {
+      if (ConstantDataArray *ca = dyn_cast<ConstantDataArray>(cn)) {
         if (ca->isCString()) {
-          name = ".objc_class_name_" + ca->getAsCString();
+          name = ".objc_class_name_" + ca->getAsCString().str();
           return true;
         }
       }
@@ -204,85 +184,92 @@ bool LTOModule::objcClassNameFromExpression(Constant *c, std::string &name) {
   return false;
 }
 
-// Parse i386/ppc ObjC class data structure.
+/// addObjCClass - Parse i386/ppc ObjC class data structure.
 void LTOModule::addObjCClass(GlobalVariable *clgv) {
-  if (ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
-    // second slot in __OBJC,__class is pointer to superclass name
-    std::string superclassName;
-    if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
-      NameAndAttributes info;
-      StringMap<NameAndAttributes>::value_type &entry =
-        _undefines.GetOrCreateValue(superclassName);
-      if (!entry.getValue().name) {
-        const char *symbolName = entry.getKey().data();
-        info.name = symbolName;
-        info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
-        entry.setValue(info);
-      }
-    }
-    // third slot in __OBJC,__class is pointer to class name
-    std::string className;
-    if (objcClassNameFromExpression(c->getOperand(2), className)) {
-      StringSet::value_type &entry =
-        _defines.GetOrCreateValue(className);
-      entry.setValue(1);
-      NameAndAttributes info;
-      info.name = entry.getKey().data();
-      info.attributes = (lto_symbol_attributes)
-        (LTO_SYMBOL_PERMISSIONS_DATA |
-         LTO_SYMBOL_DEFINITION_REGULAR |
-         LTO_SYMBOL_SCOPE_DEFAULT);
-      _symbols.push_back(info);
-    }
-  }
-}
-
-
-// Parse i386/ppc ObjC category data structure.
-void LTOModule::addObjCCategory(GlobalVariable *clgv) {
-  if (ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
-    // second slot in __OBJC,__category is pointer to target class name
-    std::string targetclassName;
-    if (objcClassNameFromExpression(c->getOperand(1), targetclassName)) {
-      NameAndAttributes info;
-
-      StringMap<NameAndAttributes>::value_type &entry =
-        _undefines.GetOrCreateValue(targetclassName);
-
-      if (entry.getValue().name)
-        return;
+  ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
+  if (!c) return;
 
+  // second slot in __OBJC,__class is pointer to superclass name
+  std::string superclassName;
+  if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
+    NameAndAttributes info;
+    StringMap<NameAndAttributes>::value_type &entry =
+      _undefines.GetOrCreateValue(superclassName);
+    if (!entry.getValue().name) {
       const char *symbolName = entry.getKey().data();
       info.name = symbolName;
       info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+      info.isFunction = false;
+      info.symbol = clgv;
       entry.setValue(info);
     }
   }
+
+  // third slot in __OBJC,__class is pointer to class name
+  std::string className;
+  if (objcClassNameFromExpression(c->getOperand(2), className)) {
+    StringSet::value_type &entry = _defines.GetOrCreateValue(className);
+    entry.setValue(1);
+
+    NameAndAttributes info;
+    info.name = entry.getKey().data();
+    info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
+      LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
+    info.isFunction = false;
+    info.symbol = clgv;
+    _symbols.push_back(info);
+  }
 }
 
+/// addObjCCategory - Parse i386/ppc ObjC category data structure.
+void LTOModule::addObjCCategory(GlobalVariable *clgv) {
+  ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer());
+  if (!c) return;
 
-// Parse i386/ppc ObjC class list data structure.
-void LTOModule::addObjCClassRef(GlobalVariable *clgv) {
+  // second slot in __OBJC,__category is pointer to target class name
   std::string targetclassName;
-  if (objcClassNameFromExpression(clgv->getInitializer(), targetclassName)) {
-    NameAndAttributes info;
+  if (!objcClassNameFromExpression(c->getOperand(1), targetclassName))
+    return;
 
-    StringMap<NameAndAttributes>::value_type &entry =
-      _undefines.GetOrCreateValue(targetclassName);
-    if (entry.getValue().name)
-      return;
+  NameAndAttributes info;
+  StringMap<NameAndAttributes>::value_type &entry =
+    _undefines.GetOrCreateValue(targetclassName);
 
-    const char *symbolName = entry.getKey().data();
-    info.name = symbolName;
-    info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
-    entry.setValue(info);
-  }
+  if (entry.getValue().name)
+    return;
+
+  const char *symbolName = entry.getKey().data();
+  info.name = symbolName;
+  info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+  info.isFunction = false;
+  info.symbol = clgv;
+  entry.setValue(info);
 }
 
+/// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
+void LTOModule::addObjCClassRef(GlobalVariable *clgv) {
+  std::string targetclassName;
+  if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName))
+    return;
+
+  NameAndAttributes info;
+  StringMap<NameAndAttributes>::value_type &entry =
+    _undefines.GetOrCreateValue(targetclassName);
+  if (entry.getValue().name)
+    return;
+
+  const char *symbolName = entry.getKey().data();
+  info.name = symbolName;
+  info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+  info.isFunction = false;
+  info.symbol = clgv;
+  entry.setValue(info);
+}
 
-void LTOModule::addDefinedDataSymbol(GlobalValue *v, Mangler &mangler) {
+/// addDefinedDataSymbol - Add a data symbol as defined to the list.
+void LTOModule::addDefinedDataSymbol(GlobalValue *v) {
   // Add to list of defined symbols.
-  addDefinedSymbol(v, mangler, false);
+  addDefinedSymbol(v, false);
 
   // Special case i386/ppc ObjC data structures in magic sections:
   // The issue is that the old ObjC object format did some strange
@@ -327,25 +314,30 @@ void LTOModule::addDefinedDataSymbol(GlobalValue *v, Mangler &mangler) {
   }
 }
 
+/// addDefinedFunctionSymbol - Add a function symbol as defined to the list.
+void LTOModule::addDefinedFunctionSymbol(Function *f) {
+  // add to list of defined symbols
+  addDefinedSymbol(f, true);
+}
 
-void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler,
-                                 bool isFunction) {
+/// addDefinedSymbol - Add a defined symbol to the list.
+void LTOModule::addDefinedSymbol(GlobalValue *def, bool isFunction) {
   // ignore all llvm.* symbols
   if (def->getName().startswith("llvm."))
     return;
 
   // string is owned by _defines
   SmallString<64> Buffer;
-  mangler.getNameWithPrefix(Buffer, def, false);
+  _mangler.getNameWithPrefix(Buffer, def, false);
 
   // set alignment part log2() can have rounding errors
   uint32_t align = def->getAlignment();
   uint32_t attr = align ? CountTrailingZeros_32(def->getAlignment()) : 0;
 
   // set permissions part
-  if (isFunction)
+  if (isFunction) {
     attr |= LTO_SYMBOL_PERMISSIONS_CODE;
-  else {
+  } else {
     GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
     if (gv && gv->isConstant())
       attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
@@ -377,18 +369,24 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler,
   else
     attr |= LTO_SYMBOL_SCOPE_INTERNAL;
 
-  // add to table of symbols
-  NameAndAttributes info;
   StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer);
   entry.setValue(1);
 
+  // fill information structure
+  NameAndAttributes info;
   StringRef Name = entry.getKey();
   info.name = Name.data();
   assert(info.name[Name.size()] == '\0');
-  info.attributes = (lto_symbol_attributes)attr;
+  info.attributes = attr;
+  info.isFunction = isFunction;
+  info.symbol = def;
+
+  // add to table of symbols
   _symbols.push_back(info);
 }
 
+/// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
+/// defined list.
 void LTOModule::addAsmGlobalSymbol(const char *name,
                                    lto_symbol_attributes scope) {
   StringSet::value_type &entry = _defines.GetOrCreateValue(name);
@@ -398,15 +396,41 @@ void LTOModule::addAsmGlobalSymbol(const char *name,
     return;
 
   entry.setValue(1);
-  const char *symbolName = entry.getKey().data();
-  uint32_t attr = LTO_SYMBOL_DEFINITION_REGULAR;
-  attr |= scope;
-  NameAndAttributes info;
-  info.name = symbolName;
-  info.attributes = (lto_symbol_attributes)attr;
-  _symbols.push_back(info);
+
+  NameAndAttributes &info = _undefines[entry.getKey().data()];
+
+  if (info.symbol == 0) {
+    // FIXME: This is trying to take care of module ASM like this:
+    //
+    //   module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
+    //
+    // but is gross and its mother dresses it funny. Have the ASM parser give us
+    // more details for this type of situation so that we're not guessing so
+    // much.
+
+    // fill information structure
+    info.name = name;
+    info.attributes =
+      LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
+    info.isFunction = false;
+    info.symbol = 0;
+
+    // add to table of symbols
+    _symbols.push_back(info);
+    return;
+  }
+
+  if (info.isFunction)
+    addDefinedFunctionSymbol(cast<Function>(info.symbol));
+  else
+    addDefinedDataSymbol(info.symbol);
+
+  _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK;
+  _symbols.back().attributes |= scope;
 }
 
+/// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
+/// undefined list.
 void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
   StringMap<NameAndAttributes>::value_type &entry =
     _undefines.GetOrCreateValue(name);
@@ -421,13 +445,16 @@ void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
   attr |= LTO_SYMBOL_SCOPE_DEFAULT;
   NameAndAttributes info;
   info.name = entry.getKey().data();
-  info.attributes = (lto_symbol_attributes)attr;
+  info.attributes = attr;
+  info.isFunction = false;
+  info.symbol = 0;
 
   entry.setValue(info);
 }
 
-void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl,
-                                            Mangler &mangler) {
+/// addPotentialUndefinedSymbol - Add a symbol which isn't defined just yet to a
+/// list to be resolved later.
+void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl, bool isFunc) {
   // ignore all llvm.* symbols
   if (decl->getName().startswith("llvm."))
     return;
@@ -437,7 +464,7 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl,
     return;
 
   SmallString<64> name;
-  mangler.getNameWithPrefix(name, decl, false);
+  _mangler.getNameWithPrefix(name, decl, false);
 
   StringMap<NameAndAttributes>::value_type &entry =
     _undefines.GetOrCreateValue(name);
@@ -449,19 +476,22 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl,
   NameAndAttributes info;
 
   info.name = entry.getKey().data();
+
   if (decl->hasExternalWeakLinkage())
     info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
   else
     info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
 
+  info.isFunction = isFunc;
+  info.symbol = decl;
+
   entry.setValue(info);
 }
 
-
 namespace {
   class RecordStreamer : public MCStreamer {
   public:
-    enum State { NeverSeen, Global, Defined, DefinedGlobal, Used};
+    enum State { NeverSeen, Global, Defined, DefinedGlobal, Used };
 
   private:
     StringMap<State> Symbols;
@@ -550,14 +580,16 @@ namespace {
 
     RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
 
-    virtual void ChangeSection(const MCSection *Section) {}
-    virtual void InitSections() {}
+    virtual void EmitInstruction(const MCInst &Inst) {
+      // Scan for values.
+      for (unsigned i = Inst.getNumOperands(); i--; )
+        if (Inst.getOperand(i).isExpr())
+          AddValueSymbols(Inst.getOperand(i).getExpr());
+    }
     virtual void EmitLabel(MCSymbol *Symbol) {
       Symbol->setSection(*getCurrentSection());
       markDefined(*Symbol);
     }
-    virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
-    virtual void EmitThumbFunc(MCSymbol *Func) {}
     virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
       // FIXME: should we handle aliases?
       markDefined(*Symbol);
@@ -566,20 +598,26 @@ namespace {
       if (Attribute == MCSA_Global)
         markGlobal(*Symbol);
     }
-    virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
-    virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
-    virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
-    virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
     virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
                               unsigned Size , unsigned ByteAlignment) {
       markDefined(*Symbol);
     }
-    virtual void EmitCOFFSymbolType(int Type) {}
-    virtual void EndCOFFSymbolDef() {}
     virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                   unsigned ByteAlignment) {
       markDefined(*Symbol);
     }
+
+    // Noop calls.
+    virtual void ChangeSection(const MCSection *Section) {}
+    virtual void InitSections() {}
+    virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+    virtual void EmitThumbFunc(MCSymbol *Func) {}
+    virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+    virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
+    virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+    virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
+    virtual void EmitCOFFSymbolType(int Type) {}
+    virtual void EndCOFFSymbolDef() {}
     virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
     virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                        unsigned ByteAlignment) {}
@@ -595,35 +633,30 @@ namespace {
                                       unsigned MaxBytesToEmit) {}
     virtual void EmitCodeAlignment(unsigned ByteAlignment,
                                    unsigned MaxBytesToEmit) {}
-    virtual void EmitValueToOffset(const MCExpr *Offset,
-                                   unsigned char Value ) {}
+    virtual bool EmitValueToOffset(const MCExpr *Offset,
+                                   unsigned char Value ) { return false; }
     virtual void EmitFileDirective(StringRef Filename) {}
     virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                           const MCSymbol *LastLabel,
                                           const MCSymbol *Label,
                                           unsigned PointerSize) {}
-
-    virtual void EmitInstruction(const MCInst &Inst) {
-      // Scan for values.
-      for (unsigned i = Inst.getNumOperands(); i--; )
-        if (Inst.getOperand(i).isExpr())
-          AddValueSymbols(Inst.getOperand(i).getExpr());
-    }
-    virtual void Finish() {}
+    virtual void FinishImpl() {}
   };
-}
+} // end anonymous namespace
 
-bool LTOModule::addAsmGlobalSymbols(MCContext &Context, std::string &errMsg) {
+/// addAsmGlobalSymbols - Add global symbols from module-level ASM to the
+/// defined or undefined lists.
+bool LTOModule::addAsmGlobalSymbols(std::string &errMsg) {
   const std::string &inlineAsm = _module->getModuleInlineAsm();
   if (inlineAsm.empty())
     return false;
 
-  OwningPtr<RecordStreamer> Streamer(new RecordStreamer(Context));
+  OwningPtr<RecordStreamer> Streamer(new RecordStreamer(_context));
   MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(inlineAsm);
   SourceMgr SrcMgr;
   SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
   OwningPtr<MCAsmParser> Parser(createMCAsmParser(SrcMgr,
-                                                  Context, *Streamer,
+                                                  _context, *Streamer,
                                                   *_target->getMCAsmInfo()));
   OwningPtr<MCSubtargetInfo> STI(_target->getTarget().
                       createMCSubtargetInfo(_target->getTargetTriple(),
@@ -657,6 +690,7 @@ bool LTOModule::addAsmGlobalSymbols(MCContext &Context, std::string &errMsg) {
   return false;
 }
 
+/// isDeclaration - Return 'true' if the global value is a declaration.
 static bool isDeclaration(const GlobalValue &V) {
   if (V.hasAvailableExternallyLinkage())
     return true;
@@ -665,74 +699,49 @@ static bool isDeclaration(const GlobalValue &V) {
   return V.isDeclaration();
 }
 
-static bool isAliasToDeclaration(const GlobalAlias &V) {
-  return isDeclaration(*V.getAliasedGlobal());
-}
-
-bool LTOModule::ParseSymbols(std::string &errMsg) {
-  // Use mangler to add GlobalPrefix to names to match linker names.
-  MCContext Context(*_target->getMCAsmInfo(), *_target->getRegisterInfo(),NULL);
-  Mangler mangler(Context, *_target->getTargetData());
-
+/// parseSymbols - Parse the symbols from the module and model-level ASM and add
+/// them to either the defined or undefined lists.
+bool LTOModule::parseSymbols(std::string &errMsg) {
   // add functions
-  for (Module::iterator f = _module->begin(); f != _module->end(); ++f) {
+  for (Module::iterator f = _module->begin(), e = _module->end(); f != e; ++f) {
     if (isDeclaration(*f))
-      addPotentialUndefinedSymbol(f, mangler);
+      addPotentialUndefinedSymbol(f, true);
     else
-      addDefinedFunctionSymbol(f, mangler);
+      addDefinedFunctionSymbol(f);
   }
 
   // add data
   for (Module::global_iterator v = _module->global_begin(),
          e = _module->global_end(); v !=  e; ++v) {
     if (isDeclaration(*v))
-      addPotentialUndefinedSymbol(v, mangler);
+      addPotentialUndefinedSymbol(v, false);
     else
-      addDefinedDataSymbol(v, mangler);
+      addDefinedDataSymbol(v);
   }
 
   // add asm globals
-  if (addAsmGlobalSymbols(Context, errMsg))
+  if (addAsmGlobalSymbols(errMsg))
     return true;
 
   // add aliases
-  for (Module::alias_iterator i = _module->alias_begin(),
-         e = _module->alias_end(); i != e; ++i) {
-    if (isAliasToDeclaration(*i))
-      addPotentialUndefinedSymbol(i, mangler);
+  for (Module::alias_iterator a = _module->alias_begin(),
+         e = _module->alias_end(); a != e; ++a) {
+    if (isDeclaration(*a->getAliasedGlobal()))
+      // Is an alias to a declaration.
+      addPotentialUndefinedSymbol(a, false);
     else
-      addDefinedDataSymbol(i, mangler);
+      addDefinedDataSymbol(a);
   }
 
   // make symbols for all undefines
-  for (StringMap<NameAndAttributes>::iterator it=_undefines.begin();
-       it != _undefines.end(); ++it) {
-    // if this symbol also has a definition, then don't make an undefine
-    // because it is a tentative definition
-    if (_defines.count(it->getKey()) == 0) {
-      NameAndAttributes info = it->getValue();
-      _symbols.push_back(info);
-    }
+  for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
+         e = _undefines.end(); u != e; ++u) {
+    // If this symbol also has a definition, then don't make an undefine because
+    // it is a tentative definition.
+    if (_defines.count(u->getKey())) continue;
+    NameAndAttributes info = u->getValue();
+    _symbols.push_back(info);
   }
-  return false;
-}
 
-
-uint32_t LTOModule::getSymbolCount() {
-  return _symbols.size();
-}
-
-
-lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index) {
-  if (index < _symbols.size())
-    return _symbols[index].attributes;
-  else
-    return lto_symbol_attributes(0);
-}
-
-const char *LTOModule::getSymbolName(uint32_t index) {
-  if (index < _symbols.size())
-    return _symbols[index].name;
-  else
-    return NULL;
+  return false;
 }
diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h
index ca08aea90adb..cafb927abfb1 100644
--- a/tools/lto/LTOModule.h
+++ b/tools/lto/LTOModule.h
@@ -4,10 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
-// This file declares the LTOModule class. 
+// This file declares the LTOModule class.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,110 +15,172 @@
 #define LTO_MODULE_H
 
 #include "llvm/Module.h"
-#include "llvm/ADT/OwningPtr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringMap.h"
-
 #include "llvm-c/lto.h"
-
 #include <vector>
 #include <string>
 
-
-// forward references to llvm classes
+// Forward references to llvm classes.
 namespace llvm {
-    class Mangler;
-    class MemoryBuffer;
-    class GlobalValue;
-    class Value;
-    class Function;
+  class Function;
+  class GlobalValue;
+  class MemoryBuffer;
+  class Value;
 }
 
-
-//
-// C++ class which implements the opaque lto_module_t
-//
+//===----------------------------------------------------------------------===//
+/// LTOModule - C++ class which implements the opaque lto_module_t type.
+///
 struct LTOModule {
-
-    static bool              isBitcodeFile(const void* mem, size_t length);
-    static bool              isBitcodeFile(const char* path);
-
-    static bool              isBitcodeFileForTarget(const void* mem, 
-                                    size_t length, const char* triplePrefix);
-
-    static bool              isBitcodeFileForTarget(const char* path, 
-                                                    const char* triplePrefix);
-
-    static LTOModule*        makeLTOModule(const char* path,
-                                          std::string& errMsg);
-    static LTOModule*        makeLTOModule(int fd, const char *path,
-                                           size_t size,
-                                           std::string& errMsg);
-    static LTOModule*        makeLTOModule(int fd, const char *path,
-                                           size_t file_size,
-                                           size_t map_size,
-                                           off_t offset,
-                                           std::string& errMsg);
-    static LTOModule*        makeLTOModule(const void* mem, size_t length,
-                                           std::string& errMsg);
-
-    const char*              getTargetTriple();
-    void                     setTargetTriple(const char*);
-    uint32_t                 getSymbolCount();
-    lto_symbol_attributes    getSymbolAttributes(uint32_t index);
-    const char*              getSymbolName(uint32_t index);
-    
-    llvm::Module *           getLLVVMModule() { return _module.get(); }
-    const std::vector<const char*> &getAsmUndefinedRefs() {
-            return _asm_undefines;
-    }
+private:
+  typedef llvm::StringMap<uint8_t> StringSet;
+
+  struct NameAndAttributes {
+    const char        *name;
+    uint32_t           attributes;
+    bool               isFunction;
+    llvm::GlobalValue *symbol;
+  };
+
+  llvm::OwningPtr<llvm::Module>           _module;
+  llvm::OwningPtr<llvm::TargetMachine>    _target;
+  std::vector<NameAndAttributes>          _symbols;
+
+  // _defines and _undefines only needed to disambiguate tentative definitions
+  StringSet                               _defines;
+  llvm::StringMap<NameAndAttributes>      _undefines;
+  std::vector<const char*>                _asm_undefines;
+  llvm::MCContext                         _context;
+
+  // Use mangler to add GlobalPrefix to names to match linker names.
+  llvm::Mangler                           _mangler;
+
+  LTOModule(llvm::Module *m, llvm::TargetMachine *t);
+public:
+  /// isBitcodeFile - Returns 'true' if the file or memory contents is LLVM
+  /// bitcode.
+  static bool isBitcodeFile(const void *mem, size_t length);
+  static bool isBitcodeFile(const char *path);
+
+  /// isBitcodeFileForTarget - Returns 'true' if the file or memory contents
+  /// is LLVM bitcode for the specified triple.
+  static bool isBitcodeFileForTarget(const void *mem,
+                                     size_t length,
+                                     const char *triplePrefix);
+  static bool isBitcodeFileForTarget(const char *path,
+                                     const char *triplePrefix);
+
+  /// makeLTOModule - Create an LTOModule. N.B. These methods take ownership
+  /// of the buffer.
+  static LTOModule *makeLTOModule(const char* path,
+                                  std::string &errMsg);
+  static LTOModule *makeLTOModule(int fd, const char *path,
+                                  size_t size, std::string &errMsg);
+  static LTOModule *makeLTOModule(int fd, const char *path,
+                                  size_t file_size,
+                                  size_t map_size,
+                                  off_t offset,
+                                  std::string& errMsg);
+  static LTOModule *makeLTOModule(const void *mem, size_t length,
+                                  std::string &errMsg);
+
+  /// getTargetTriple - Return the Module's target triple.
+  const char *getTargetTriple() {
+    return _module->getTargetTriple().c_str();
+  }
+
+  /// setTargetTriple - Set the Module's target triple.
+  void setTargetTriple(const char *triple) {
+    _module->setTargetTriple(triple);
+  }
+
+  /// getSymbolCount - Get the number of symbols
+  uint32_t getSymbolCount() {
+    return _symbols.size();
+  }
+
+  /// getSymbolAttributes - Get the attributes for a symbol at the specified
+  /// index.
+  lto_symbol_attributes getSymbolAttributes(uint32_t index) {
+    if (index < _symbols.size())
+      return lto_symbol_attributes(_symbols[index].attributes);
+    return lto_symbol_attributes(0);
+  }
+
+  /// getSymbolName - Get the name of the symbol at the specified index.
+  const char *getSymbolName(uint32_t index) {
+    if (index < _symbols.size())
+      return _symbols[index].name;
+    return NULL;
+  }
+
+  /// getLLVVMModule - Return the Module.
+  llvm::Module *getLLVVMModule() { return _module.get(); }
+
+  /// getAsmUndefinedRefs -
+  const std::vector<const char*> &getAsmUndefinedRefs() {
+    return _asm_undefines;
+  }
 
 private:
-                            LTOModule(llvm::Module* m, llvm::TargetMachine* t);
-
-    bool                    ParseSymbols(std::string &errMsg);
-    void                    addDefinedSymbol(llvm::GlobalValue* def, 
-                                                    llvm::Mangler& mangler, 
-                                                    bool isFunction);
-    void                    addPotentialUndefinedSymbol(llvm::GlobalValue* decl, 
-                                                        llvm::Mangler &mangler);
-    void                    addDefinedFunctionSymbol(llvm::Function* f, 
-                                                        llvm::Mangler &mangler);
-    void                    addDefinedDataSymbol(llvm::GlobalValue* v, 
-                                                        llvm::Mangler &mangler);
-    bool                    addAsmGlobalSymbols(llvm::MCContext &Context,
-                                                std::string &errMsg);
-    void                    addAsmGlobalSymbol(const char *,
-                                               lto_symbol_attributes scope);
-    void                    addAsmGlobalSymbolUndef(const char *);
-    void                    addObjCClass(llvm::GlobalVariable* clgv);
-    void                    addObjCCategory(llvm::GlobalVariable* clgv);
-    void                    addObjCClassRef(llvm::GlobalVariable* clgv);
-    bool                    objcClassNameFromExpression(llvm::Constant* c, 
-                                                    std::string& name);
-
-    static bool             isTargetMatch(llvm::MemoryBuffer* memBuffer,
-                                                    const char* triplePrefix);
-
-    static LTOModule*       makeLTOModule(llvm::MemoryBuffer* buffer,
-                                                        std::string& errMsg);
-    static llvm::MemoryBuffer* makeBuffer(const void* mem, size_t length);
-
-    typedef llvm::StringMap<uint8_t> StringSet;
-    
-    struct NameAndAttributes { 
-        const char*            name; 
-        lto_symbol_attributes  attributes; 
-    };
-
-    llvm::OwningPtr<llvm::Module>           _module;
-    llvm::OwningPtr<llvm::TargetMachine>    _target;
-    std::vector<NameAndAttributes>          _symbols;
-    // _defines and _undefines only needed to disambiguate tentative definitions
-    StringSet                               _defines;    
-    llvm::StringMap<NameAndAttributes>      _undefines;
-    std::vector<const char*>                _asm_undefines;
+  /// parseSymbols - Parse the symbols from the module and model-level ASM and
+  /// add them to either the defined or undefined lists.
+  bool parseSymbols(std::string &errMsg);
+
+  /// addPotentialUndefinedSymbol - Add a symbol which isn't defined just yet
+  /// to a list to be resolved later.
+  void addPotentialUndefinedSymbol(llvm::GlobalValue *dcl, bool isFunc);
+
+  /// addDefinedSymbol - Add a defined symbol to the list.
+  void addDefinedSymbol(llvm::GlobalValue *def, bool isFunction);
+
+  /// addDefinedFunctionSymbol - Add a function symbol as defined to the list.
+  void addDefinedFunctionSymbol(llvm::Function *f);
+
+  /// addDefinedDataSymbol - Add a data symbol as defined to the list.
+  void addDefinedDataSymbol(llvm::GlobalValue *v);
+
+  /// addAsmGlobalSymbols - Add global symbols from module-level ASM to the
+  /// defined or undefined lists.
+  bool addAsmGlobalSymbols(std::string &errMsg);
+
+  /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
+  /// defined list.
+  void addAsmGlobalSymbol(const char *, lto_symbol_attributes scope);
+
+  /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to
+  /// the undefined list.
+  void addAsmGlobalSymbolUndef(const char *);
+
+  /// addObjCClass - Parse i386/ppc ObjC class data structure.
+  void addObjCClass(llvm::GlobalVariable *clgv);
+
+  /// addObjCCategory - Parse i386/ppc ObjC category data structure.
+  void addObjCCategory(llvm::GlobalVariable *clgv);
+
+  /// addObjCClassRef - Parse i386/ppc ObjC class list data structure.
+  void addObjCClassRef(llvm::GlobalVariable *clgv);
+
+  /// objcClassNameFromExpression - Get string that the data pointer points
+  /// to.
+  bool objcClassNameFromExpression(llvm::Constant* c, std::string &name);
+
+  /// isTargetMatch - Returns 'true' if the memory buffer is for the specified
+  /// target triple.
+  static bool isTargetMatch(llvm::MemoryBuffer *memBuffer,
+                            const char *triplePrefix);
+
+  /// makeLTOModule - Create an LTOModule (private version). N.B. This
+  /// method takes ownership of the buffer.
+  static LTOModule *makeLTOModule(llvm::MemoryBuffer *buffer,
+                                  std::string &errMsg);
+
+  /// makeBuffer - Create a MemoryBuffer from a memory range.
+  static llvm::MemoryBuffer *makeBuffer(const void *mem, size_t length);
 };
 
 #endif // LTO_MODULE_H
-
diff --git a/tools/lto/Makefile b/tools/lto/Makefile
index 46925e77de2c..153fa031378d 100644
--- a/tools/lto/Makefile
+++ b/tools/lto/Makefile
@@ -7,22 +7,15 @@
 # 
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-LIBRARYNAME = LTO
+LEVEL := ../..
+LIBRARYNAME := LTO
+LINK_COMPONENTS := all-targets ipo scalaropts linker bitreader bitwriter \
+                   mcdisassembler vectorize
+LINK_LIBS_IN_SHARED := 1
+SHARED_LIBRARY := 1
 
 EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/lto.exports
 
-# Include this here so we can get the configuration of the targets
-# that have been configured for construction. We have to do this 
-# early so we can set up LINK_COMPONENTS before including Makefile.rules
-include $(LEVEL)/Makefile.config
-
-LINK_LIBS_IN_SHARED = 1
-SHARED_LIBRARY = 1
-
-LINK_COMPONENTS := $(TARGETS_TO_BUILD) ipo scalaropts linker bitreader \
-	bitwriter mcdisassembler
-
 include $(LEVEL)/Makefile.common
 
 ifdef LLVM_VERSION_INFO
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index dd658d17519d..addf7877c96e 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -4,10 +4,10 @@
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
-// This file implements the Link Time Optimization library. This library is 
+// This file implements the Link Time Optimization library. This library is
 // intended to be used by linker to optimize code at link time.
 //
 //===----------------------------------------------------------------------===//
@@ -19,292 +19,202 @@
 #include "LTOCodeGenerator.h"
 
 
-// holds most recent error string
-// *** not thread safe ***
+// Holds most recent error string.
+// *** Not thread safe ***
 static std::string sLastErrorString;
 
-
-
-//
-// returns a printable string
-//
-extern const char* lto_get_version()
-{
-    return LTOCodeGenerator::getVersionString();
+/// lto_get_version - Returns a printable string.
+extern const char* lto_get_version() {
+  return LTOCodeGenerator::getVersionString();
 }
 
-//
-// returns the last error string or NULL if last operation was successful
-//
-const char* lto_get_error_message()
-{
-    return sLastErrorString.c_str();
+/// lto_get_error_message - Returns the last error string or NULL if last
+/// operation was successful.
+const char* lto_get_error_message() {
+  return sLastErrorString.c_str();
 }
 
-
-
-//
-// validates if a file is a loadable object file
-//
-bool lto_module_is_object_file(const char* path)
-{
-    return LTOModule::isBitcodeFile(path);
+/// lto_module_is_object_file - Validates if a file is a loadable object file.
+bool lto_module_is_object_file(const char* path) {
+  return LTOModule::isBitcodeFile(path);
 }
 
-
-//
-// validates if a file is a loadable object file compilable for requested target
-//
-bool lto_module_is_object_file_for_target(const char* path, 
-                                            const char* target_triplet_prefix)
-{
-    return LTOModule::isBitcodeFileForTarget(path, target_triplet_prefix);
+/// lto_module_is_object_file_for_target - Validates if a file is a loadable
+/// object file compilable for requested target.
+bool lto_module_is_object_file_for_target(const char* path,
+                                          const char* target_triplet_prefix) {
+  return LTOModule::isBitcodeFileForTarget(path, target_triplet_prefix);
 }
 
-
-//
-// validates if a buffer is a loadable object file
-//
-bool lto_module_is_object_file_in_memory(const void* mem, size_t length)
-{
-    return LTOModule::isBitcodeFile(mem, length);
+/// lto_module_is_object_file_in_memory - Validates if a buffer is a loadable
+/// object file.
+bool lto_module_is_object_file_in_memory(const void* mem, size_t length) {
+  return LTOModule::isBitcodeFile(mem, length);
 }
 
-
-//
-// validates if a buffer is a loadable object file compilable for the target
-//
-bool lto_module_is_object_file_in_memory_for_target(const void* mem, 
-                            size_t length, const char* target_triplet_prefix)
-{
-    return LTOModule::isBitcodeFileForTarget(mem, length, target_triplet_prefix);
+/// lto_module_is_object_file_in_memory_for_target - Validates if a buffer is a
+/// loadable object file compilable for the target.
+bool
+lto_module_is_object_file_in_memory_for_target(const void* mem,
+                                            size_t length,
+                                            const char* target_triplet_prefix) {
+  return LTOModule::isBitcodeFileForTarget(mem, length, target_triplet_prefix);
 }
 
-
-
-//
-// loads an object file from disk  
-// returns NULL on error (check lto_get_error_message() for details)
-//
-lto_module_t lto_module_create(const char* path)
-{
-     return LTOModule::makeLTOModule(path, sLastErrorString);
+/// lto_module_create - Loads an object file from disk. Returns NULL on error
+/// (check lto_get_error_message() for details).
+lto_module_t lto_module_create(const char* path) {
+  return LTOModule::makeLTOModule(path, sLastErrorString);
 }
 
-//
-// loads an object file from disk
-// returns NULL on error (check lto_get_error_message() for details)
-//
-lto_module_t lto_module_create_from_fd(int fd, const char *path, size_t size)
-{
-     return LTOModule::makeLTOModule(fd, path, size, sLastErrorString);
+/// lto_module_create_from_fd - Loads an object file from disk. Returns NULL on
+/// error (check lto_get_error_message() for details).
+lto_module_t lto_module_create_from_fd(int fd, const char *path, size_t size) {
+  return LTOModule::makeLTOModule(fd, path, size, sLastErrorString);
 }
 
-//
-// loads an object file from disk
-// returns NULL on error (check lto_get_error_message() for details)
-//
+/// lto_module_create_from_fd_at_offset - Loads an object file from disk.
+/// Returns NULL on error (check lto_get_error_message() for details).
 lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path,
                                                  size_t file_size,
                                                  size_t map_size,
-                                                 off_t offset)
-{
-     return LTOModule::makeLTOModule(fd, path, file_size, map_size,
-                                     offset, sLastErrorString);
+                                                 off_t offset) {
+  return LTOModule::makeLTOModule(fd, path, file_size, map_size,
+                                  offset, sLastErrorString);
 }
 
-//
-// loads an object file from memory 
-// returns NULL on error (check lto_get_error_message() for details)
-//
-lto_module_t lto_module_create_from_memory(const void* mem, size_t length)
-{
-     return LTOModule::makeLTOModule(mem, length, sLastErrorString);
+/// lto_module_create_from_memory - Loads an object file from memory. Returns
+/// NULL on error (check lto_get_error_message() for details).
+lto_module_t lto_module_create_from_memory(const void* mem, size_t length) {
+  return LTOModule::makeLTOModule(mem, length, sLastErrorString);
 }
 
-
-//
-// frees all memory for a module
-// upon return the lto_module_t is no longer valid
-//
-void lto_module_dispose(lto_module_t mod)
-{
-    delete mod;
+/// lto_module_dispose - Frees all memory for a module. Upon return the
+/// lto_module_t is no longer valid.
+void lto_module_dispose(lto_module_t mod) {
+  delete mod;
 }
 
-
-//
-// returns triplet string which the object module was compiled under
-//
-const char* lto_module_get_target_triple(lto_module_t mod)
-{
-    return mod->getTargetTriple();
+/// lto_module_get_target_triple - Returns triplet string which the object
+/// module was compiled under.
+const char* lto_module_get_target_triple(lto_module_t mod) {
+  return mod->getTargetTriple();
 }
 
-//
-// sets triple string with which the object will be codegened.
-//
-void lto_module_set_target_triple(lto_module_t mod, const char *triple)
-{
-    return mod->setTargetTriple(triple);
+/// lto_module_set_target_triple - Sets triple string with which the object will
+/// be codegened.
+void lto_module_set_target_triple(lto_module_t mod, const char *triple) {
+  return mod->setTargetTriple(triple);
 }
 
-
-//
-// returns the number of symbols in the object module
-//
-unsigned int lto_module_get_num_symbols(lto_module_t mod)
-{
-    return mod->getSymbolCount();
+/// lto_module_get_num_symbols - Returns the number of symbols in the object
+/// module.
+unsigned int lto_module_get_num_symbols(lto_module_t mod) {
+  return mod->getSymbolCount();
 }
 
-//
-// returns the name of the ith symbol in the object module
-//
-const char* lto_module_get_symbol_name(lto_module_t mod, unsigned int index)
-{
-    return mod->getSymbolName(index);
+/// lto_module_get_symbol_name - Returns the name of the ith symbol in the
+/// object module.
+const char* lto_module_get_symbol_name(lto_module_t mod, unsigned int index) {
+  return mod->getSymbolName(index);
 }
 
-
-//
-// returns the attributes of the ith symbol in the object module
-//
-lto_symbol_attributes lto_module_get_symbol_attribute(lto_module_t mod, 
-                                                      unsigned int index)
-{
-    return mod->getSymbolAttributes(index);
+/// lto_module_get_symbol_attribute - Returns the attributes of the ith symbol
+/// in the object module.
+lto_symbol_attributes lto_module_get_symbol_attribute(lto_module_t mod,
+                                                      unsigned int index) {
+  return mod->getSymbolAttributes(index);
 }
 
-
-
-
-
-//
-// instantiates a code generator
-// returns NULL if there is an error
-//
-lto_code_gen_t lto_codegen_create(void)
-{
-     return new LTOCodeGenerator();
+/// lto_codegen_create - Instantiates a code generator. Returns NULL if there
+/// is an error.
+lto_code_gen_t lto_codegen_create(void) {
+  return new LTOCodeGenerator();
 }
 
-
-
-//
-// frees all memory for a code generator
-// upon return the lto_code_gen_t is no longer valid
-//
-void lto_codegen_dispose(lto_code_gen_t cg)
-{
-    delete cg;
+/// lto_codegen_dispose - Frees all memory for a code generator. Upon return the
+/// lto_code_gen_t is no longer valid.
+void lto_codegen_dispose(lto_code_gen_t cg) {
+  delete cg;
 }
 
-
-
-//
-// add an object module to the set of modules for which code will be generated
-// returns true on error (check lto_get_error_message() for details)
-//
-bool lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod)
-{
-    return cg->addModule(mod, sLastErrorString);
+/// lto_codegen_add_module - Add an object module to the set of modules for
+/// which code will be generated. Returns true on error (check
+/// lto_get_error_message() for details).
+bool lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod) {
+  return cg->addModule(mod, sLastErrorString);
 }
 
-
-//
-// sets what if any format of debug info should be generated
-// returns true on error (check lto_get_error_message() for details)
-//
-bool lto_codegen_set_debug_model(lto_code_gen_t cg, lto_debug_model debug)
-{
-    return cg->setDebugInfo(debug, sLastErrorString);
+/// lto_codegen_set_debug_model - Sets what if any format of debug info should
+/// be generated. Returns true on error (check lto_get_error_message() for
+/// details).
+bool lto_codegen_set_debug_model(lto_code_gen_t cg, lto_debug_model debug) {
+  return cg->setDebugInfo(debug, sLastErrorString);
 }
 
-
-//
-// sets what code model to generated
-// returns true on error (check lto_get_error_message() for details)
-//
-bool lto_codegen_set_pic_model(lto_code_gen_t cg, lto_codegen_model model)
-{
+/// lto_codegen_set_pic_model - Sets what code model to generated. Returns true
+/// on error (check lto_get_error_message() for details).
+bool lto_codegen_set_pic_model(lto_code_gen_t cg, lto_codegen_model model) {
   return cg->setCodePICModel(model, sLastErrorString);
 }
 
-//
-// sets the cpu to generate code for
-//
-void lto_codegen_set_cpu(lto_code_gen_t cg, const char* cpu)
-{
+/// lto_codegen_set_cpu - Sets the cpu to generate code for.
+void lto_codegen_set_cpu(lto_code_gen_t cg, const char *cpu) {
   return cg->setCpu(cpu);
 }
 
-//
-// sets the path to the assembler tool
-//
-void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path)
-{
+/// lto_codegen_set_assembler_path - Sets the path to the assembler tool.
+void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char *path) {
   // In here only for backwards compatibility. We use MC now.
 }
 
-
-//
-// sets extra arguments that libLTO should pass to the assembler
-//
-void lto_codegen_set_assembler_args(lto_code_gen_t cg, const char** args,
-                                    int nargs)
-{
+/// lto_codegen_set_assembler_args - Sets extra arguments that libLTO should
+/// pass to the assembler.
+void lto_codegen_set_assembler_args(lto_code_gen_t cg, const char **args,
+                                    int nargs) {
   // In here only for backwards compatibility. We use MC now.
 }
 
-//
-// adds to a list of all global symbols that must exist in the final
-// generated code.  If a function is not listed there, it might be
-// inlined into every usage and optimized away.
-//
-void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol)
-{
+/// lto_codegen_add_must_preserve_symbol - Adds to a list of all global symbols
+/// that must exist in the final generated code. If a function is not listed
+/// there, it might be inlined into every usage and optimized away.
+void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg,
+                                          const char *symbol) {
   cg->addMustPreserveSymbol(symbol);
 }
 
+/// lto_codegen_set_whole_program_optimization - Enable the internalize pass
+/// during LTO optimizations.
+void lto_codegen_set_whole_program_optimization(lto_code_gen_t cg) {
+  cg->enableInternalizePass();
+}
 
-//
-// writes a new file at the specified path that contains the
-// merged contents of all modules added so far.
-// returns true on error (check lto_get_error_message() for details)
-//
-bool lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path)
-{
+/// lto_codegen_write_merged_modules - Writes a new file at the specified path
+/// that contains the merged contents of all modules added so far. Returns true
+/// on error (check lto_get_error_message() for details).
+bool lto_codegen_write_merged_modules(lto_code_gen_t cg, const char *path) {
   return cg->writeMergedModules(path, sLastErrorString);
 }
 
-
-//
-// Generates code for all added modules into one native object file.
-// On success returns a pointer to a generated mach-o/ELF buffer and
-// length set to the buffer size.  The buffer is owned by the 
-// lto_code_gen_t and will be freed when lto_codegen_dispose()
-// is called, or lto_codegen_compile() is called again.
-// On failure, returns NULL (check lto_get_error_message() for details).
-//
-extern const void*
-lto_codegen_compile(lto_code_gen_t cg, size_t* length)
-{
+/// lto_codegen_compile - Generates code for all added modules into one native
+/// object file. On success returns a pointer to a generated mach-o/ELF buffer
+/// and length set to the buffer size. The buffer is owned by the lto_code_gen_t
+/// object and will be freed when lto_codegen_dispose() is called, or
+/// lto_codegen_compile() is called again. On failure, returns NULL (check
+/// lto_get_error_message() for details).
+const void *lto_codegen_compile(lto_code_gen_t cg, size_t *length) {
   return cg->compile(length, sLastErrorString);
 }
 
-extern bool
-lto_codegen_compile_to_file(lto_code_gen_t cg, const char **name)
-{
+/// lto_codegen_compile_to_file - Generates code for all added modules into one
+/// native object file. The name of the file is written to name. Returns true on
+/// error.
+bool lto_codegen_compile_to_file(lto_code_gen_t cg, const char **name) {
   return cg->compile_to_file(name, sLastErrorString);
 }
 
-
-//
-// Used to pass extra options to the code generator
-//
-extern void
-lto_codegen_debug_options(lto_code_gen_t cg, const char * opt)
-{
+/// lto_codegen_debug_options - Used to pass extra options to the code
+/// generator.
+void lto_codegen_debug_options(lto_code_gen_t cg, const char *opt) {
   cg->setCodeGenDebugOptions(opt);
 }
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index b900bfb594b1..f471f1ad6a27 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -27,6 +27,7 @@ lto_codegen_set_assembler_args
 lto_codegen_set_assembler_path
 lto_codegen_set_cpu
 lto_codegen_compile_to_file
+lto_codegen_set_whole_program_optimization
 LLVMCreateDisasm
 LLVMDisasmDispose
 LLVMDisasmInstruction
diff --git a/tools/macho-dump/LLVMBuild.txt b/tools/macho-dump/LLVMBuild.txt
new file mode 100644
index 000000000000..1ad9b84261c9
--- /dev/null
+++ b/tools/macho-dump/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/macho-dump/LLVMBuild.txt -------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = macho-dump
+parent = Tools
+required_libraries = Object Support
diff --git a/tools/macho-dump/Makefile b/tools/macho-dump/Makefile
index 638015e9289a..0843e982e1af 100644
--- a/tools/macho-dump/Makefile
+++ b/tools/macho-dump/Makefile
@@ -7,17 +7,11 @@
 #
 ##===----------------------------------------------------------------------===##
 
-LEVEL = ../..
-TOOLNAME = macho-dump
+LEVEL := ../..
+TOOLNAME := macho-dump
+LINK_COMPONENTS := support object
 
 # This tool has no plugins, optimize startup time.
-TOOL_NO_EXPORTS = 1
-
-# Include this here so we can get the configuration of the targets
-# that have been configured for construction. We have to do this
-# early so we can set up LINK_COMPONENTS before including Makefile.rules
-include $(LEVEL)/Makefile.config
-
-LINK_COMPONENTS := support object
+TOOL_NO_EXPORTS := 1
 
-include $(LLVM_SRC_ROOT)/Makefile.rules
+include $(LEVEL)/Makefile.common
diff --git a/tools/opt/CMakeLists.txt b/tools/opt/CMakeLists.txt
index 0570d0e04af7..7daf22aa9e3e 100644
--- a/tools/opt/CMakeLists.txt
+++ b/tools/opt/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS bitreader asmparser bitwriter instrumentation scalaropts ipo)
+set(LLVM_LINK_COMPONENTS bitreader asmparser bitwriter instrumentation scalaropts ipo vectorize)
 
 add_llvm_tool(opt
   AnalysisWrappers.cpp
diff --git a/tools/opt/LLVMBuild.txt b/tools/opt/LLVMBuild.txt
new file mode 100644
index 000000000000..4de99f51c885
--- /dev/null
+++ b/tools/opt/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/opt/LLVMBuild.txt --------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = opt
+parent = Tools
+required_libraries = AsmParser BitReader BitWriter IPO Instrumentation Scalar
diff --git a/tools/opt/Makefile b/tools/opt/Makefile
index 726cad87123f..16d116da5dbd 100644
--- a/tools/opt/Makefile
+++ b/tools/opt/Makefile
@@ -6,9 +6,9 @@
 # License. See LICENSE.TXT for details.
 #
 ##===----------------------------------------------------------------------===##
-LEVEL = ../..
-TOOLNAME = opt
 
-LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo
+LEVEL := ../..
+TOOLNAME := opt
+LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo vectorize
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/opt/PrintSCC.cpp b/tools/opt/PrintSCC.cpp
index 533f49ec2a87..11efdcdfd226 100644
--- a/tools/opt/PrintSCC.cpp
+++ b/tools/opt/PrintSCC.cpp
@@ -101,8 +101,8 @@ bool CallGraphSCC::runOnModule(Module &M) {
     errs() << "\nSCC #" << ++sccNum << " : ";
     for (std::vector<CallGraphNode*>::const_iterator I = nextSCC.begin(),
            E = nextSCC.end(); I != E; ++I)
-      errs() << ((*I)->getFunction() ? (*I)->getFunction()->getNameStr()
-                 : std::string("external node")) << ", ";
+      errs() << ((*I)->getFunction() ? (*I)->getFunction()->getName()
+                                     : "external node") << ", ";
     if (nextSCC.size() == 1 && SCCI.hasLoop())
       errs() << " (Has self-loop).";
   }
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index ffd2c21736e5..30da863b4114 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -291,8 +291,8 @@ struct RegionPassPrinter : public RegionPass {
   virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
     if (!Quiet) {
       Out << "Printing analysis '" << PassToPrint->getPassName() << "' for "
-        << "region: '" << R->getNameStr() << "' in function '"
-        << R->getEntry()->getParent()->getNameStr() << "':\n";
+          << "region: '" << R->getNameStr() << "' in function '"
+          << R->getEntry()->getParent()->getName() << "':\n";
     }
     // Get and print pass...
    getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
@@ -407,6 +407,8 @@ static inline void addPass(PassManagerBase &PM, Pass *P) {
 /// OptLevel - Optimization Level
 static void AddOptimizationPasses(PassManagerBase &MPM,FunctionPassManager &FPM,
                                   unsigned OptLevel) {
+  FPM.add(createVerifierPass());                  // Verify that input is correct
+
   PassManagerBuilder Builder;
   Builder.OptLevel = OptLevel;
 
@@ -478,6 +480,7 @@ int main(int argc, char **argv) {
   PassRegistry &Registry = *PassRegistry::getPassRegistry();
   initializeCore(Registry);
   initializeScalarOpts(Registry);
+  initializeVectorization(Registry);
   initializeIPO(Registry);
   initializeAnalysis(Registry);
   initializeIPA(Registry);
@@ -505,7 +508,7 @@ int main(int argc, char **argv) {
   M.reset(ParseIRFile(InputFilename, Err, Context));
 
   if (M.get() == 0) {
-    Err.Print(argv[0], errs());
+    Err.print(argv[0], errs());
     return 1;
   }
 
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index b6e02e3a9a3e..cc207f764da2 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -653,4 +653,28 @@ TEST(APFloatTest, getLargest) {
   EXPECT_EQ(1.7976931348623158e+308, APFloat::getLargest(APFloat::IEEEdouble).convertToDouble());
 }
 
+TEST(APFloatTest, convert) {
+  bool losesInfo;
+  APFloat test(APFloat::IEEEdouble, "1.0");
+  test.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(1.0f, test.convertToFloat());
+  EXPECT_FALSE(losesInfo);
+
+  test = APFloat(APFloat::x87DoubleExtended, "0x1p-53");
+  test.add(APFloat(APFloat::x87DoubleExtended, "1.0"), APFloat::rmNearestTiesToEven);
+  test.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(1.0, test.convertToDouble());
+  EXPECT_TRUE(losesInfo);
+
+  test = APFloat(APFloat::IEEEquad, "0x1p-53");
+  test.add(APFloat(APFloat::IEEEquad, "1.0"), APFloat::rmNearestTiesToEven);
+  test.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(1.0, test.convertToDouble());
+  EXPECT_TRUE(losesInfo);
+
+  test = APFloat(APFloat::x87DoubleExtended, "0xf.fffffffp+28");
+  test.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+  EXPECT_EQ(4294967295.0, test.convertToDouble());
+  EXPECT_FALSE(losesInfo);
+}
 }
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index 490811deb8f9..89b8aa94e464 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -144,6 +144,12 @@ TEST(APIntTest, i1) {
   EXPECT_EQ(zero, one.lshr(1));
   EXPECT_EQ(zero, one.ashr(1));
 
+  // Rotates.
+  EXPECT_EQ(one, one.rotl(0));
+  EXPECT_EQ(one, one.rotl(1));
+  EXPECT_EQ(one, one.rotr(0));
+  EXPECT_EQ(one, one.rotr(1));
+
   // Multiplies.
   EXPECT_EQ(neg_one, neg_one * one);
   EXPECT_EQ(neg_one, one * neg_one);
@@ -354,7 +360,7 @@ TEST(APIntTest, toString) {
   APInt(8, 0).toString(S, 16, true, true);
   EXPECT_EQ(S.str().str(), "0x0");
   S.clear();
-  APInt(8, 0).toString(S, 36, true, true);
+  APInt(8, 0).toString(S, 36, true, false);
   EXPECT_EQ(S.str().str(), "0");
   S.clear();
 
@@ -371,7 +377,7 @@ TEST(APIntTest, toString) {
   APInt(8, 255, isSigned).toString(S, 16, isSigned, true);
   EXPECT_EQ(S.str().str(), "0xFF");
   S.clear();
-  APInt(8, 255, isSigned).toString(S, 36, isSigned, true);
+  APInt(8, 255, isSigned).toString(S, 36, isSigned, false);
   EXPECT_EQ(S.str().str(), "73");
   S.clear();
 
@@ -388,7 +394,7 @@ TEST(APIntTest, toString) {
   APInt(8, 255, isSigned).toString(S, 16, isSigned, true);
   EXPECT_EQ(S.str().str(), "-0x1");
   S.clear();
-  APInt(8, 255, isSigned).toString(S, 36, isSigned, true);
+  APInt(8, 255, isSigned).toString(S, 36, isSigned, false);
   EXPECT_EQ(S.str().str(), "-1");
   S.clear();
 }
@@ -450,4 +456,34 @@ TEST(APIntTest, mul_clear) {
   EXPECT_EQ(ValA.toString(10, false), ValC.toString(10, false));
 }
 
+TEST(APIntTest, Rotate) {
+  EXPECT_EQ(APInt(8, 1),  APInt(8, 1).rotl(0));
+  EXPECT_EQ(APInt(8, 2),  APInt(8, 1).rotl(1));
+  EXPECT_EQ(APInt(8, 4),  APInt(8, 1).rotl(2));
+  EXPECT_EQ(APInt(8, 16), APInt(8, 1).rotl(4));
+  EXPECT_EQ(APInt(8, 1),  APInt(8, 1).rotl(8));
+
+  EXPECT_EQ(APInt(8, 16), APInt(8, 16).rotl(0));
+  EXPECT_EQ(APInt(8, 32), APInt(8, 16).rotl(1));
+  EXPECT_EQ(APInt(8, 64), APInt(8, 16).rotl(2));
+  EXPECT_EQ(APInt(8, 1),  APInt(8, 16).rotl(4));
+  EXPECT_EQ(APInt(8, 16), APInt(8, 16).rotl(8));
+
+  EXPECT_EQ(APInt(8, 16), APInt(8, 16).rotr(0));
+  EXPECT_EQ(APInt(8, 8),  APInt(8, 16).rotr(1));
+  EXPECT_EQ(APInt(8, 4),  APInt(8, 16).rotr(2));
+  EXPECT_EQ(APInt(8, 1),  APInt(8, 16).rotr(4));
+  EXPECT_EQ(APInt(8, 16), APInt(8, 16).rotr(8));
+
+  EXPECT_EQ(APInt(8, 1),   APInt(8, 1).rotr(0));
+  EXPECT_EQ(APInt(8, 128), APInt(8, 1).rotr(1));
+  EXPECT_EQ(APInt(8, 64),  APInt(8, 1).rotr(2));
+  EXPECT_EQ(APInt(8, 16),  APInt(8, 1).rotr(4));
+  EXPECT_EQ(APInt(8, 1),   APInt(8, 1).rotr(8));
+
+  APInt Big(256, "00004000800000000000000000003fff8000000000000000", 16);
+  APInt Rot(256, "3fff80000000000000000000000000000000000040008000", 16);
+  EXPECT_EQ(Rot, Big.rotr(144));
+}
+
 }
diff --git a/unittests/ADT/BitVectorTest.cpp b/unittests/ADT/BitVectorTest.cpp
index fa663121a8a6..f733e13fdfc3 100644
--- a/unittests/ADT/BitVectorTest.cpp
+++ b/unittests/ADT/BitVectorTest.cpp
@@ -196,6 +196,52 @@ TEST(BitVectorTest, ProxyIndex) {
   EXPECT_TRUE(Vec.none());
 }
 
+TEST(BitVectorTest, PortableBitMask) {
+  BitVector A;
+  const uint32_t Mask1[] = { 0x80000000, 6, 5 };
+
+  A.resize(10);
+  A.setBitsInMask(Mask1, 3);
+  EXPECT_EQ(10u, A.size());
+  EXPECT_FALSE(A.test(0));
+
+  A.resize(32);
+  A.setBitsInMask(Mask1, 3);
+  EXPECT_FALSE(A.test(0));
+  EXPECT_TRUE(A.test(31));
+  EXPECT_EQ(1u, A.count());
+
+  A.resize(33);
+  A.setBitsInMask(Mask1, 1);
+  EXPECT_EQ(1u, A.count());
+  A.setBitsInMask(Mask1, 2);
+  EXPECT_EQ(1u, A.count());
+
+  A.resize(34);
+  A.setBitsInMask(Mask1, 2);
+  EXPECT_EQ(2u, A.count());
+
+  A.resize(65);
+  A.setBitsInMask(Mask1, 3);
+  EXPECT_EQ(4u, A.count());
+
+  A.setBitsNotInMask(Mask1, 1);
+  EXPECT_EQ(32u+3u, A.count());
+
+  A.setBitsNotInMask(Mask1, 3);
+  EXPECT_EQ(65u, A.count());
+
+  A.resize(96);
+  EXPECT_EQ(65u, A.count());
+
+  A.clear();
+  A.resize(128);
+  A.setBitsNotInMask(Mask1, 3);
+  EXPECT_EQ(96u-5u, A.count());
+
+  A.clearBitsNotInMask(Mask1, 1);
+  EXPECT_EQ(64-4u, A.count());
+}
 }
 
 #endif
diff --git a/unittests/ADT/DenseMapTest.cpp b/unittests/ADT/DenseMapTest.cpp
index afac651a6b2b..e0ee7782ccbb 100644
--- a/unittests/ADT/DenseMapTest.cpp
+++ b/unittests/ADT/DenseMapTest.cpp
@@ -176,4 +176,45 @@ TEST_F(DenseMapTest, ConstIteratorTest) {
   EXPECT_TRUE(cit == cit2);
 }
 
+// Key traits that allows lookup with either an unsigned or char* key;
+// In the latter case, "a" == 0, "b" == 1 and so on.
+struct TestDenseMapInfo {
+  static inline unsigned getEmptyKey() { return ~0; }
+  static inline unsigned getTombstoneKey() { return ~0U - 1; }
+  static unsigned getHashValue(const unsigned& Val) { return Val * 37U; }
+  static unsigned getHashValue(const char* Val) {
+    return (unsigned)(Val[0] - 'a') * 37U;
+  }
+  static bool isEqual(const unsigned& LHS, const unsigned& RHS) {
+    return LHS == RHS;
+  }
+  static bool isEqual(const char* LHS, const unsigned& RHS) {
+    return (unsigned)(LHS[0] - 'a') == RHS;
+  }
+};
+
+// find_as() tests
+TEST_F(DenseMapTest, FindAsTest) {
+  DenseMap<unsigned, unsigned, TestDenseMapInfo> map;
+  map[0] = 1;
+  map[1] = 2;
+  map[2] = 3;
+
+  // Size tests
+  EXPECT_EQ(3u, map.size());
+
+  // Normal lookup tests
+  EXPECT_EQ(1, map.count(1));
+  EXPECT_EQ(1u, map.find(0)->second);
+  EXPECT_EQ(2u, map.find(1)->second);
+  EXPECT_EQ(3u, map.find(2)->second);
+  EXPECT_TRUE(map.find(3) == map.end());
+
+  // find_as() tests
+  EXPECT_EQ(1u, map.find_as("a")->second);
+  EXPECT_EQ(2u, map.find_as("b")->second);
+  EXPECT_EQ(3u, map.find_as("c")->second);
+  EXPECT_TRUE(map.find_as("d") == map.end());
+}
+
 }
diff --git a/unittests/ADT/HashingTest.cpp b/unittests/ADT/HashingTest.cpp
new file mode 100644
index 000000000000..b148f144513c
--- /dev/null
+++ b/unittests/ADT/HashingTest.cpp
@@ -0,0 +1,424 @@
+//===- llvm/unittest/ADT/HashingTest.cpp ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Hashing.h unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/DataTypes.h"
+#include <deque>
+#include <list>
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+// Helper for test code to print hash codes.
+void PrintTo(const hash_code &code, std::ostream *os) {
+  *os << static_cast<size_t>(code);
+}
+
+// Fake an object that is recognized as hashable data to test super large
+// objects.
+struct LargeTestInteger { uint64_t arr[8]; };
+
+struct NonPOD {
+  uint64_t x, y;
+  NonPOD(uint64_t x, uint64_t y) : x(x), y(y) {}
+  ~NonPOD() {}
+  friend hash_code hash_value(const NonPOD &obj) {
+    return hash_combine(obj.x, obj.y);
+  }
+};
+
+namespace hashing {
+namespace detail {
+template <> struct is_hashable_data<LargeTestInteger> : true_type {};
+} // namespace detail
+} // namespace hashing
+
+} // namespace llvm
+
+using namespace llvm;
+
+namespace {
+
+enum TestEnumeration {
+  TE_Foo = 42,
+  TE_Bar = 43
+};
+
+TEST(HashingTest, HashValueBasicTest) {
+  int x = 42, y = 43, c = 'x';
+  void *p = 0;
+  uint64_t i = 71;
+  const unsigned ci = 71;
+  volatile int vi = 71;
+  const volatile int cvi = 71;
+  uintptr_t addr = reinterpret_cast<uintptr_t>(&y);
+  EXPECT_EQ(hash_value(42), hash_value(x));
+  EXPECT_EQ(hash_value(42), hash_value(TE_Foo));
+  EXPECT_NE(hash_value(42), hash_value(y));
+  EXPECT_NE(hash_value(42), hash_value(TE_Bar));
+  EXPECT_NE(hash_value(42), hash_value(p));
+  EXPECT_EQ(hash_value(71), hash_value(i));
+  EXPECT_EQ(hash_value(71), hash_value(ci));
+  EXPECT_EQ(hash_value(71), hash_value(vi));
+  EXPECT_EQ(hash_value(71), hash_value(cvi));
+  EXPECT_EQ(hash_value(c), hash_value('x'));
+  EXPECT_EQ(hash_value('4'), hash_value('0' + 4));
+  EXPECT_EQ(hash_value(addr), hash_value(&y));
+}
+
+TEST(HashingTest, HashValueStdPair) {
+  EXPECT_EQ(hash_combine(42, 43), hash_value(std::make_pair(42, 43)));
+  EXPECT_NE(hash_combine(43, 42), hash_value(std::make_pair(42, 43)));
+  EXPECT_NE(hash_combine(42, 43), hash_value(std::make_pair(42ull, 43ull)));
+  EXPECT_NE(hash_combine(42, 43), hash_value(std::make_pair(42, 43ull)));
+  EXPECT_NE(hash_combine(42, 43), hash_value(std::make_pair(42ull, 43)));
+
+  // Note that pairs are implicitly flattened to a direct sequence of data and
+  // hashed efficiently as a consequence.
+  EXPECT_EQ(hash_combine(42, 43, 44),
+            hash_value(std::make_pair(42, std::make_pair(43, 44))));
+  EXPECT_EQ(hash_value(std::make_pair(42, std::make_pair(43, 44))),
+            hash_value(std::make_pair(std::make_pair(42, 43), 44)));
+
+  // Ensure that pairs which have padding bytes *inside* them don't get treated
+  // this way.
+  EXPECT_EQ(hash_combine('0', hash_combine(1ull, '2')),
+            hash_value(std::make_pair('0', std::make_pair(1ull, '2'))));
+
+  // Ensure that non-POD pairs don't explode the traits used.
+  NonPOD obj1(1, 2), obj2(3, 4), obj3(5, 6);
+  EXPECT_EQ(hash_combine(obj1, hash_combine(obj2, obj3)),
+            hash_value(std::make_pair(obj1, std::make_pair(obj2, obj3))));
+}
+
+TEST(HashingTest, HashValueStdString) {
+  std::string s = "Hello World!";
+  EXPECT_EQ(hash_combine_range(s.c_str(), s.c_str() + s.size()), hash_value(s));
+  EXPECT_EQ(hash_combine_range(s.c_str(), s.c_str() + s.size() - 1),
+            hash_value(s.substr(0, s.size() - 1)));
+  EXPECT_EQ(hash_combine_range(s.c_str() + 1, s.c_str() + s.size() - 1),
+            hash_value(s.substr(1, s.size() - 2)));
+
+  std::wstring ws = L"Hello Wide World!";
+  EXPECT_EQ(hash_combine_range(ws.c_str(), ws.c_str() + ws.size()),
+            hash_value(ws));
+  EXPECT_EQ(hash_combine_range(ws.c_str(), ws.c_str() + ws.size() - 1),
+            hash_value(ws.substr(0, ws.size() - 1)));
+  EXPECT_EQ(hash_combine_range(ws.c_str() + 1, ws.c_str() + ws.size() - 1),
+            hash_value(ws.substr(1, ws.size() - 2)));
+}
+
+template <typename T, size_t N> T *begin(T (&arr)[N]) { return arr; }
+template <typename T, size_t N> T *end(T (&arr)[N]) { return arr + N; }
+
+// Provide a dummy, hashable type designed for easy verification: its hash is
+// the same as its value.
+struct HashableDummy { size_t value; };
+hash_code hash_value(HashableDummy dummy) { return dummy.value; }
+
+TEST(HashingTest, HashCombineRangeBasicTest) {
+  // Leave this uninitialized in the hope that valgrind will catch bad reads.
+  int dummy;
+  hash_code dummy_hash = hash_combine_range(&dummy, &dummy);
+  EXPECT_NE(hash_code(0), dummy_hash);
+
+  const int arr1[] = { 1, 2, 3 };
+  hash_code arr1_hash = hash_combine_range(begin(arr1), end(arr1));
+  EXPECT_NE(dummy_hash, arr1_hash);
+  EXPECT_EQ(arr1_hash, hash_combine_range(begin(arr1), end(arr1)));
+
+  const std::vector<int> vec(begin(arr1), end(arr1));
+  EXPECT_EQ(arr1_hash, hash_combine_range(vec.begin(), vec.end()));
+
+  const std::list<int> list(begin(arr1), end(arr1));
+  EXPECT_EQ(arr1_hash, hash_combine_range(list.begin(), list.end()));
+
+  const std::deque<int> deque(begin(arr1), end(arr1));
+  EXPECT_EQ(arr1_hash, hash_combine_range(deque.begin(), deque.end()));
+
+  const int arr2[] = { 3, 2, 1 };
+  hash_code arr2_hash = hash_combine_range(begin(arr2), end(arr2));
+  EXPECT_NE(dummy_hash, arr2_hash);
+  EXPECT_NE(arr1_hash, arr2_hash);
+
+  const int arr3[] = { 1, 1, 2, 3 };
+  hash_code arr3_hash = hash_combine_range(begin(arr3), end(arr3));
+  EXPECT_NE(dummy_hash, arr3_hash);
+  EXPECT_NE(arr1_hash, arr3_hash);
+
+  const int arr4[] = { 1, 2, 3, 3 };
+  hash_code arr4_hash = hash_combine_range(begin(arr4), end(arr4));
+  EXPECT_NE(dummy_hash, arr4_hash);
+  EXPECT_NE(arr1_hash, arr4_hash);
+
+  const size_t arr5[] = { 1, 2, 3 };
+  const HashableDummy d_arr5[] = { {1}, {2}, {3} };
+  hash_code arr5_hash = hash_combine_range(begin(arr5), end(arr5));
+  hash_code d_arr5_hash = hash_combine_range(begin(d_arr5), end(d_arr5));
+  EXPECT_EQ(arr5_hash, d_arr5_hash);
+}
+
+TEST(HashingTest, HashCombineRangeLengthDiff) {
+  // Test that as only the length varies, we compute different hash codes for
+  // sequences.
+  std::map<size_t, size_t> code_to_size;
+  std::vector<char> all_one_c(256, '\xff');
+  for (unsigned Idx = 1, Size = all_one_c.size(); Idx < Size; ++Idx) {
+    hash_code code = hash_combine_range(&all_one_c[0], &all_one_c[0] + Idx);
+    std::map<size_t, size_t>::iterator
+      I = code_to_size.insert(std::make_pair(code, Idx)).first;
+    EXPECT_EQ(Idx, I->second);
+  }
+  code_to_size.clear();
+  std::vector<char> all_zero_c(256, '\0');
+  for (unsigned Idx = 1, Size = all_zero_c.size(); Idx < Size; ++Idx) {
+    hash_code code = hash_combine_range(&all_zero_c[0], &all_zero_c[0] + Idx);
+    std::map<size_t, size_t>::iterator
+      I = code_to_size.insert(std::make_pair(code, Idx)).first;
+    EXPECT_EQ(Idx, I->second);
+  }
+  code_to_size.clear();
+  std::vector<unsigned> all_one_int(512, -1);
+  for (unsigned Idx = 1, Size = all_one_int.size(); Idx < Size; ++Idx) {
+    hash_code code = hash_combine_range(&all_one_int[0], &all_one_int[0] + Idx);
+    std::map<size_t, size_t>::iterator
+      I = code_to_size.insert(std::make_pair(code, Idx)).first;
+    EXPECT_EQ(Idx, I->second);
+  }
+  code_to_size.clear();
+  std::vector<unsigned> all_zero_int(512, 0);
+  for (unsigned Idx = 1, Size = all_zero_int.size(); Idx < Size; ++Idx) {
+    hash_code code = hash_combine_range(&all_zero_int[0], &all_zero_int[0] + Idx);
+    std::map<size_t, size_t>::iterator
+      I = code_to_size.insert(std::make_pair(code, Idx)).first;
+    EXPECT_EQ(Idx, I->second);
+  }
+}
+
+TEST(HashingTest, HashCombineRangeGoldenTest) {
+  struct { const char *s; uint64_t hash; } golden_data[] = {
+#if SIZE_MAX == UINT64_MAX
+    { "a",                                0xaeb6f9d5517c61f8ULL },
+    { "ab",                               0x7ab1edb96be496b4ULL },
+    { "abc",                              0xe38e60bf19c71a3fULL },
+    { "abcde",                            0xd24461a66de97f6eULL },
+    { "abcdefgh",                         0x4ef872ec411dec9dULL },
+    { "abcdefghijklm",                    0xe8a865539f4eadfeULL },
+    { "abcdefghijklmnopqrstu",            0x261cdf85faaf4e79ULL },
+    { "abcdefghijklmnopqrstuvwxyzabcdef", 0x43ba70e4198e3b2aULL },
+    { "abcdefghijklmnopqrstuvwxyzabcdef"
+      "abcdefghijklmnopqrstuvwxyzghijkl"
+      "abcdefghijklmnopqrstuvwxyzmnopqr"
+      "abcdefghijklmnopqrstuvwxyzstuvwx"
+      "abcdefghijklmnopqrstuvwxyzyzabcd", 0xdcd57fb2afdf72beULL },
+    { "a",                                0xaeb6f9d5517c61f8ULL },
+    { "aa",                               0xf2b3b69a9736a1ebULL },
+    { "aaa",                              0xf752eb6f07b1cafeULL },
+    { "aaaaa",                            0x812bd21e1236954cULL },
+    { "aaaaaaaa",                         0xff07a2cff08ac587ULL },
+    { "aaaaaaaaaaaaa",                    0x84ac949d54d704ecULL },
+    { "aaaaaaaaaaaaaaaaaaaaa",            0xcb2c8fb6be8f5648ULL },
+    { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xcc40ab7f164091b6ULL },
+    { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xc58e174c1e78ffe9ULL },
+    { "z",                                0x1ba160d7e8f8785cULL },
+    { "zz",                               0x2c5c03172f1285d7ULL },
+    { "zzz",                              0x9d2c4f4b507a2ac3ULL },
+    { "zzzzz",                            0x0f03b9031735693aULL },
+    { "zzzzzzzz",                         0xe674147c8582c08eULL },
+    { "zzzzzzzzzzzzz",                    0x3162d9fa6938db83ULL },
+    { "zzzzzzzzzzzzzzzzzzzzz",            0x37b9a549e013620cULL },
+    { "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x8921470aff885016ULL },
+    { "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+      "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+      "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+      "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+      "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0xf60fdcd9beb08441ULL },
+    { "a",                                0xaeb6f9d5517c61f8ULL },
+    { "ab",                               0x7ab1edb96be496b4ULL },
+    { "aba",                              0x3edb049950884d0aULL },
+    { "ababa",                            0x8f2de9e73a97714bULL },
+    { "abababab",                         0xee14a29ddf0ce54cULL },
+    { "ababababababa",                    0x38b3ddaada2d52b4ULL },
+    { "ababababababababababa",            0xd3665364219f2b85ULL },
+    { "abababababababababababababababab", 0xa75cd6afbf1bc972ULL },
+    { "abababababababababababababababab"
+      "abababababababababababababababab"
+      "abababababababababababababababab"
+      "abababababababababababababababab"
+      "abababababababababababababababab", 0x840192d129f7a22bULL }
+#elif SIZE_MAX == UINT32_MAX
+    { "a",                                0x000000004605f745ULL },
+    { "ab",                               0x00000000d5f06301ULL },
+    { "abc",                              0x00000000559fe1eeULL },
+    { "abcde",                            0x00000000424028d7ULL },
+    { "abcdefgh",                         0x000000007bb119f8ULL },
+    { "abcdefghijklm",                    0x00000000edbca513ULL },
+    { "abcdefghijklmnopqrstu",            0x000000007c15712eULL },
+    { "abcdefghijklmnopqrstuvwxyzabcdef", 0x000000000b3aad66ULL },
+    { "abcdefghijklmnopqrstuvwxyzabcdef"
+      "abcdefghijklmnopqrstuvwxyzghijkl"
+      "abcdefghijklmnopqrstuvwxyzmnopqr"
+      "abcdefghijklmnopqrstuvwxyzstuvwx"
+      "abcdefghijklmnopqrstuvwxyzyzabcd", 0x000000008c758c8bULL },
+    { "a",                                0x000000004605f745ULL },
+    { "aa",                               0x00000000dc0a52daULL },
+    { "aaa",                              0x00000000b309274fULL },
+    { "aaaaa",                            0x00000000203b5ef6ULL },
+    { "aaaaaaaa",                         0x00000000a429e18fULL },
+    { "aaaaaaaaaaaaa",                    0x000000008662070bULL },
+    { "aaaaaaaaaaaaaaaaaaaaa",            0x000000003f11151cULL },
+    { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0x000000008600fe20ULL },
+    { "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+      "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0x000000004e0e0804ULL },
+    { "z",                                0x00000000c5e405e9ULL },
+    { "zz",                               0x00000000a8d8a2c6ULL },
+    { "zzz",                              0x00000000fc2af672ULL },
+    { "zzzzz",                            0x0000000047d9efe6ULL },
+    { "zzzzzzzz",                         0x0000000080d77794ULL },
+    { "zzzzzzzzzzzzz",                    0x00000000405f93adULL },
+    { "zzzzzzzzzzzzzzzzzzzzz",            0x00000000fc72838dULL },
+    { "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x000000007ce160f1ULL },
+    { "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+      "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+      "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+      "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
+      "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x00000000aed9ed1bULL },
+    { "a",                                0x000000004605f745ULL },
+    { "ab",                               0x00000000d5f06301ULL },
+    { "aba",                              0x00000000a85cd91bULL },
+    { "ababa",                            0x000000009e3bb52eULL },
+    { "abababab",                         0x000000002709b3b9ULL },
+    { "ababababababa",                    0x000000003a234174ULL },
+    { "ababababababababababa",            0x000000005c63e5ceULL },
+    { "abababababababababababababababab", 0x0000000013f74334ULL },
+    { "abababababababababababababababab"
+      "abababababababababababababababab"
+      "abababababababababababababababab"
+      "abababababababababababababababab"
+      "abababababababababababababababab", 0x00000000c1a6f135ULL },
+#else
+#error This test only supports 64-bit and 32-bit systems.
+#endif
+  };
+  for (unsigned i = 0; i < sizeof(golden_data)/sizeof(*golden_data); ++i) {
+    StringRef str = golden_data[i].s;
+    hash_code hash = hash_combine_range(str.begin(), str.end());
+#if 0 // Enable this to generate paste-able text for the above structure.
+    std::string member_str = "\"" + str.str() + "\",";
+    fprintf(stderr, " { %-35s 0x%016llxULL },\n",
+            member_str.c_str(), static_cast<uint64_t>(hash));
+#endif
+    EXPECT_EQ(static_cast<size_t>(golden_data[i].hash),
+              static_cast<size_t>(hash));
+  }
+}
+
+TEST(HashingTest, HashCombineBasicTest) {
+  // Hashing a sequence of homogenous types matches range hashing.
+  const int i1 = 42, i2 = 43, i3 = 123, i4 = 999, i5 = 0, i6 = 79;
+  const int arr1[] = { i1, i2, i3, i4, i5, i6 };
+  EXPECT_EQ(hash_combine_range(arr1, arr1 + 1), hash_combine(i1));
+  EXPECT_EQ(hash_combine_range(arr1, arr1 + 2), hash_combine(i1, i2));
+  EXPECT_EQ(hash_combine_range(arr1, arr1 + 3), hash_combine(i1, i2, i3));
+  EXPECT_EQ(hash_combine_range(arr1, arr1 + 4), hash_combine(i1, i2, i3, i4));
+  EXPECT_EQ(hash_combine_range(arr1, arr1 + 5),
+            hash_combine(i1, i2, i3, i4, i5));
+  EXPECT_EQ(hash_combine_range(arr1, arr1 + 6),
+            hash_combine(i1, i2, i3, i4, i5, i6));
+
+  // Hashing a sequence of heterogenous types which *happen* to all produce the
+  // same data for hashing produces the same as a range-based hash of the
+  // fundamental values.
+  const size_t s1 = 1024, s2 = 8888, s3 = 9000000;
+  const HashableDummy d1 = { 1024 }, d2 = { 8888 }, d3 = { 9000000 };
+  const size_t arr2[] = { s1, s2, s3 };
+  EXPECT_EQ(hash_combine_range(begin(arr2), end(arr2)),
+            hash_combine(s1, s2, s3));
+  EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(s1, s2, d3));
+  EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(s1, d2, s3));
+  EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(d1, s2, s3));
+  EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(d1, d2, s3));
+  EXPECT_EQ(hash_combine(s1, s2, s3), hash_combine(d1, d2, d3));
+
+  // Permuting values causes hashes to change.
+  EXPECT_NE(hash_combine(i1, i1, i1), hash_combine(i1, i1, i2));
+  EXPECT_NE(hash_combine(i1, i1, i1), hash_combine(i1, i2, i1));
+  EXPECT_NE(hash_combine(i1, i1, i1), hash_combine(i2, i1, i1));
+  EXPECT_NE(hash_combine(i1, i1, i1), hash_combine(i2, i2, i1));
+  EXPECT_NE(hash_combine(i1, i1, i1), hash_combine(i2, i2, i2));
+  EXPECT_NE(hash_combine(i2, i1, i1), hash_combine(i1, i1, i2));
+  EXPECT_NE(hash_combine(i1, i1, i2), hash_combine(i1, i2, i1));
+  EXPECT_NE(hash_combine(i1, i2, i1), hash_combine(i2, i1, i1));
+
+  // Changing type w/o changing value causes hashes to change.
+  EXPECT_NE(hash_combine(i1, i2, i3), hash_combine((char)i1, i2, i3));
+  EXPECT_NE(hash_combine(i1, i2, i3), hash_combine(i1, (char)i2, i3));
+  EXPECT_NE(hash_combine(i1, i2, i3), hash_combine(i1, i2, (char)i3));
+
+  // This is array of uint64, but it should have the exact same byte pattern as
+  // an array of LargeTestIntegers.
+  const uint64_t bigarr[] = {
+    0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL,
+    0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, 0xafafafafededededULL,
+    0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL,
+    0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL,
+    0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, 0xafafafafededededULL,
+    0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL,
+    0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL,
+    0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, 0xafafafafededededULL,
+    0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL
+  };
+  // Hash a preposterously large integer, both aligned with the buffer and
+  // misaligned.
+  const LargeTestInteger li = { {
+    0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL,
+    0xdeadbeafdeadbeefULL, 0xfefefefededededeULL, 0xafafafafededededULL,
+    0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL
+  } };
+  // Rotate the storage from 'li'.
+  const LargeTestInteger l2 = { {
+    0xacacacacbcbcbcbcULL, 0xccddeeffeeddccbbULL, 0xdeadbeafdeadbeefULL,
+    0xfefefefededededeULL, 0xafafafafededededULL, 0xffffeeeeddddccccULL,
+    0xaaaacbcbffffababULL, 0xaaaaaaaaababababULL
+  } };
+  const LargeTestInteger l3 = { {
+    0xccddeeffeeddccbbULL, 0xdeadbeafdeadbeefULL, 0xfefefefededededeULL,
+    0xafafafafededededULL, 0xffffeeeeddddccccULL, 0xaaaacbcbffffababULL,
+    0xaaaaaaaaababababULL, 0xacacacacbcbcbcbcULL
+  } };
+  EXPECT_EQ(hash_combine_range(begin(bigarr), end(bigarr)),
+            hash_combine(li, li, li));
+  EXPECT_EQ(hash_combine_range(bigarr, bigarr + 9),
+            hash_combine(bigarr[0], l2));
+  EXPECT_EQ(hash_combine_range(bigarr, bigarr + 10),
+            hash_combine(bigarr[0], bigarr[1], l3));
+  EXPECT_EQ(hash_combine_range(bigarr, bigarr + 17),
+            hash_combine(li, bigarr[0], l2));
+  EXPECT_EQ(hash_combine_range(bigarr, bigarr + 18),
+            hash_combine(li, bigarr[0], bigarr[1], l3));
+  EXPECT_EQ(hash_combine_range(bigarr, bigarr + 18),
+            hash_combine(bigarr[0], l2, bigarr[9], l3));
+  EXPECT_EQ(hash_combine_range(bigarr, bigarr + 20),
+            hash_combine(bigarr[0], l2, bigarr[9], l3, bigarr[18], bigarr[19]));
+}
+
+}
diff --git a/unittests/ADT/IntrusiveRefCntPtrTest.cpp b/unittests/ADT/IntrusiveRefCntPtrTest.cpp
new file mode 100644
index 000000000000..0c8c4ca16dd7
--- /dev/null
+++ b/unittests/ADT/IntrusiveRefCntPtrTest.cpp
@@ -0,0 +1,64 @@
+//===- unittest/ADT/IntrusiveRefCntPtrTest.cpp ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+struct VirtualRefCounted : public RefCountedBaseVPTR {
+  virtual void f() {}
+};
+
+// Run this test with valgrind to detect memory leaks.
+TEST(IntrusiveRefCntPtr, RefCountedBaseVPTRCopyDoesNotLeak) {
+  VirtualRefCounted *V1 = new VirtualRefCounted;
+  IntrusiveRefCntPtr<VirtualRefCounted> R1 = V1;
+  VirtualRefCounted *V2 = new VirtualRefCounted(*V1);
+  IntrusiveRefCntPtr<VirtualRefCounted> R2 = V2;
+}
+
+struct SimpleRefCounted : public RefCountedBase<SimpleRefCounted> {};
+
+// Run this test with valgrind to detect memory leaks.
+TEST(IntrusiveRefCntPtr, RefCountedBaseCopyDoesNotLeak) {
+  SimpleRefCounted *S1 = new SimpleRefCounted;
+  IntrusiveRefCntPtr<SimpleRefCounted> R1 = S1;
+  SimpleRefCounted *S2 = new SimpleRefCounted(*S1);
+  IntrusiveRefCntPtr<SimpleRefCounted> R2 = S2;
+}
+
+struct InterceptRefCounted : public RefCountedBase<InterceptRefCounted> {
+  InterceptRefCounted(bool *Released, bool *Retained)
+    : Released(Released), Retained(Retained) {}
+  bool * const Released;
+  bool * const Retained;
+};
+template <> struct IntrusiveRefCntPtrInfo<InterceptRefCounted> {
+  static void retain(InterceptRefCounted *I) {
+    *I->Retained = true;
+    I->Retain();
+  }
+  static void release(InterceptRefCounted *I) {
+    *I->Released = true;
+    I->Release();
+  }
+};
+TEST(IntrusiveRefCntPtr, UsesTraitsToRetainAndRelease) {
+  bool Released = false;
+  bool Retained = false;
+  {
+    InterceptRefCounted *I = new InterceptRefCounted(&Released, &Retained);
+    IntrusiveRefCntPtr<InterceptRefCounted> R = I;
+  }
+  EXPECT_TRUE(Released);
+  EXPECT_TRUE(Retained);
+}
+
+} // end namespace llvm
diff --git a/unittests/ADT/SmallPtrSetTest.cpp b/unittests/ADT/SmallPtrSetTest.cpp
new file mode 100644
index 000000000000..9114875e0035
--- /dev/null
+++ b/unittests/ADT/SmallPtrSetTest.cpp
@@ -0,0 +1,72 @@
+//===- llvm/unittest/ADT/SmallPtrSetTest.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SmallPtrSet unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+// SmallPtrSet swapping test.
+TEST(SmallPtrSetTest, SwapTest) {
+  int buf[10];
+
+  SmallPtrSet<int *, 2> a;
+  SmallPtrSet<int *, 2> b;
+
+  a.insert(&buf[0]);
+  a.insert(&buf[1]);
+  b.insert(&buf[2]);
+
+  std::swap(a, b);
+
+  EXPECT_EQ(1U, a.size());
+  EXPECT_EQ(2U, b.size());
+  EXPECT_TRUE(a.count(&buf[2]));
+  EXPECT_TRUE(b.count(&buf[0]));
+  EXPECT_TRUE(b.count(&buf[1]));
+
+  b.insert(&buf[3]);
+  std::swap(a, b);
+
+  EXPECT_EQ(3U, a.size());
+  EXPECT_EQ(1U, b.size());
+  EXPECT_TRUE(a.count(&buf[0]));
+  EXPECT_TRUE(a.count(&buf[1]));
+  EXPECT_TRUE(a.count(&buf[3]));
+  EXPECT_TRUE(b.count(&buf[2]));
+
+  std::swap(a, b);
+
+  EXPECT_EQ(1U, a.size());
+  EXPECT_EQ(3U, b.size());
+  EXPECT_TRUE(a.count(&buf[2]));
+  EXPECT_TRUE(b.count(&buf[0]));
+  EXPECT_TRUE(b.count(&buf[1]));
+  EXPECT_TRUE(b.count(&buf[3]));
+
+  a.insert(&buf[4]);
+  a.insert(&buf[5]);
+  a.insert(&buf[6]);
+
+  std::swap(b, a);
+
+  EXPECT_EQ(3U, a.size());
+  EXPECT_EQ(4U, b.size());
+  EXPECT_TRUE(b.count(&buf[2]));
+  EXPECT_TRUE(b.count(&buf[4]));
+  EXPECT_TRUE(b.count(&buf[5]));
+  EXPECT_TRUE(b.count(&buf[6]));
+  EXPECT_TRUE(a.count(&buf[0]));
+  EXPECT_TRUE(a.count(&buf[1]));
+  EXPECT_TRUE(a.count(&buf[3]));
+}
diff --git a/unittests/ADT/SmallStringTest.cpp b/unittests/ADT/SmallStringTest.cpp
index 099d8159c917..660ac44a8bca 100644
--- a/unittests/ADT/SmallStringTest.cpp
+++ b/unittests/ADT/SmallStringTest.cpp
@@ -44,5 +44,153 @@ TEST_F(SmallStringTest, EmptyStringTest) {
   EXPECT_TRUE(theString.rbegin() == theString.rend());
 }
 
+TEST_F(SmallStringTest, AssignRepeated) {
+  theString.assign(3, 'a');
+  EXPECT_EQ(3u, theString.size());
+  EXPECT_STREQ("aaa", theString.c_str());
 }
 
+TEST_F(SmallStringTest, AssignIterPair) {
+  StringRef abc = "abc";
+  theString.assign(abc.begin(), abc.end());
+  EXPECT_EQ(3u, theString.size());
+  EXPECT_STREQ("abc", theString.c_str());
+}
+
+TEST_F(SmallStringTest, AssignStringRef) {
+  StringRef abc = "abc";
+  theString.assign(abc);
+  EXPECT_EQ(3u, theString.size());
+  EXPECT_STREQ("abc", theString.c_str());
+}
+
+TEST_F(SmallStringTest, AssignSmallVector) {
+  StringRef abc = "abc";
+  SmallVector<char, 10> abcVec(abc.begin(), abc.end());
+  theString.assign(abcVec);
+  EXPECT_EQ(3u, theString.size());
+  EXPECT_STREQ("abc", theString.c_str());
+}
+
+TEST_F(SmallStringTest, AppendIterPair) {
+  StringRef abc = "abc";
+  theString.append(abc.begin(), abc.end());
+  theString.append(abc.begin(), abc.end());
+  EXPECT_EQ(6u, theString.size());
+  EXPECT_STREQ("abcabc", theString.c_str());
+}
+
+TEST_F(SmallStringTest, AppendStringRef) {
+  StringRef abc = "abc";
+  theString.append(abc);
+  theString.append(abc);
+  EXPECT_EQ(6u, theString.size());
+  EXPECT_STREQ("abcabc", theString.c_str());
+}
+
+TEST_F(SmallStringTest, AppendSmallVector) {
+  StringRef abc = "abc";
+  SmallVector<char, 10> abcVec(abc.begin(), abc.end());
+  theString.append(abcVec);
+  theString.append(abcVec);
+  EXPECT_EQ(6u, theString.size());
+  EXPECT_STREQ("abcabc", theString.c_str());
+}
+
+TEST_F(SmallStringTest, Substr) {
+  theString = "hello";
+  EXPECT_EQ("lo", theString.substr(3));
+  EXPECT_EQ("", theString.substr(100));
+  EXPECT_EQ("hello", theString.substr(0, 100));
+  EXPECT_EQ("o", theString.substr(4, 10));
+}
+
+TEST_F(SmallStringTest, Slice) {
+  theString = "hello";
+  EXPECT_EQ("l", theString.slice(2, 3));
+  EXPECT_EQ("ell", theString.slice(1, 4));
+  EXPECT_EQ("llo", theString.slice(2, 100));
+  EXPECT_EQ("", theString.slice(2, 1));
+  EXPECT_EQ("", theString.slice(10, 20));
+}
+
+TEST_F(SmallStringTest, Find) {
+  theString = "hello";
+  EXPECT_EQ(2U, theString.find('l'));
+  EXPECT_EQ(StringRef::npos, theString.find('z'));
+  EXPECT_EQ(StringRef::npos, theString.find("helloworld"));
+  EXPECT_EQ(0U, theString.find("hello"));
+  EXPECT_EQ(1U, theString.find("ello"));
+  EXPECT_EQ(StringRef::npos, theString.find("zz"));
+  EXPECT_EQ(2U, theString.find("ll", 2));
+  EXPECT_EQ(StringRef::npos, theString.find("ll", 3));
+  EXPECT_EQ(0U, theString.find(""));
+
+  EXPECT_EQ(3U, theString.rfind('l'));
+  EXPECT_EQ(StringRef::npos, theString.rfind('z'));
+  EXPECT_EQ(StringRef::npos, theString.rfind("helloworld"));
+  EXPECT_EQ(0U, theString.rfind("hello"));
+  EXPECT_EQ(1U, theString.rfind("ello"));
+  EXPECT_EQ(StringRef::npos, theString.rfind("zz"));
+
+  EXPECT_EQ(2U, theString.find_first_of('l'));
+  EXPECT_EQ(1U, theString.find_first_of("el"));
+  EXPECT_EQ(StringRef::npos, theString.find_first_of("xyz"));
+
+  EXPECT_EQ(1U, theString.find_first_not_of('h'));
+  EXPECT_EQ(4U, theString.find_first_not_of("hel"));
+  EXPECT_EQ(StringRef::npos, theString.find_first_not_of("hello"));
+
+  theString = "hellx xello hell ello world foo bar hello";
+  EXPECT_EQ(36U, theString.find("hello"));
+  EXPECT_EQ(28U, theString.find("foo"));
+  EXPECT_EQ(12U, theString.find("hell", 2));
+  EXPECT_EQ(0U, theString.find(""));
+}
+
+TEST_F(SmallStringTest, Count) {
+  theString = "hello";
+  EXPECT_EQ(2U, theString.count('l'));
+  EXPECT_EQ(1U, theString.count('o'));
+  EXPECT_EQ(0U, theString.count('z'));
+  EXPECT_EQ(0U, theString.count("helloworld"));
+  EXPECT_EQ(1U, theString.count("hello"));
+  EXPECT_EQ(1U, theString.count("ello"));
+  EXPECT_EQ(0U, theString.count("zz"));
+}
+
+TEST(StringRefTest, Comparisons) {
+  EXPECT_EQ(-1, SmallString<10>("aab").compare("aad"));
+  EXPECT_EQ( 0, SmallString<10>("aab").compare("aab"));
+  EXPECT_EQ( 1, SmallString<10>("aab").compare("aaa"));
+  EXPECT_EQ(-1, SmallString<10>("aab").compare("aabb"));
+  EXPECT_EQ( 1, SmallString<10>("aab").compare("aa"));
+  EXPECT_EQ( 1, SmallString<10>("\xFF").compare("\1"));
+
+  EXPECT_EQ(-1, SmallString<10>("AaB").compare_lower("aAd"));
+  EXPECT_EQ( 0, SmallString<10>("AaB").compare_lower("aab"));
+  EXPECT_EQ( 1, SmallString<10>("AaB").compare_lower("AAA"));
+  EXPECT_EQ(-1, SmallString<10>("AaB").compare_lower("aaBb"));
+  EXPECT_EQ( 1, SmallString<10>("AaB").compare_lower("aA"));
+  EXPECT_EQ( 1, SmallString<10>("\xFF").compare_lower("\1"));
+
+  EXPECT_EQ(-1, SmallString<10>("aab").compare_numeric("aad"));
+  EXPECT_EQ( 0, SmallString<10>("aab").compare_numeric("aab"));
+  EXPECT_EQ( 1, SmallString<10>("aab").compare_numeric("aaa"));
+  EXPECT_EQ(-1, SmallString<10>("aab").compare_numeric("aabb"));
+  EXPECT_EQ( 1, SmallString<10>("aab").compare_numeric("aa"));
+  EXPECT_EQ(-1, SmallString<10>("1").compare_numeric("10"));
+  EXPECT_EQ( 0, SmallString<10>("10").compare_numeric("10"));
+  EXPECT_EQ( 0, SmallString<10>("10a").compare_numeric("10a"));
+  EXPECT_EQ( 1, SmallString<10>("2").compare_numeric("1"));
+  EXPECT_EQ( 0, SmallString<10>("llvm_v1i64_ty").compare_numeric("llvm_v1i64_ty"));
+  EXPECT_EQ( 1, SmallString<10>("\xFF").compare_numeric("\1"));
+  EXPECT_EQ( 1, SmallString<10>("V16").compare_numeric("V1_q0"));
+  EXPECT_EQ(-1, SmallString<10>("V1_q0").compare_numeric("V16"));
+  EXPECT_EQ(-1, SmallString<10>("V8_q0").compare_numeric("V16"));
+  EXPECT_EQ( 1, SmallString<10>("V16").compare_numeric("V8_q0"));
+  EXPECT_EQ(-1, SmallString<10>("V1_q0").compare_numeric("V8_q0"));
+  EXPECT_EQ( 1, SmallString<10>("V8_q0").compare_numeric("V1_q0"));
+}
+
+}
diff --git a/unittests/ADT/SparseSetTest.cpp b/unittests/ADT/SparseSetTest.cpp
new file mode 100644
index 000000000000..a6ea7572ce44
--- /dev/null
+++ b/unittests/ADT/SparseSetTest.cpp
@@ -0,0 +1,186 @@
+//===------ ADT/SparseSetTest.cpp - SparseSet unit tests -  -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SparseSet.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+typedef SparseSet<unsigned> USet;
+
+// Empty set tests.
+TEST(SparseSetTest, EmptySet) {
+  USet Set;
+  EXPECT_TRUE(Set.empty());
+  EXPECT_TRUE(Set.begin() == Set.end());
+  EXPECT_EQ(0u, Set.size());
+
+  Set.setUniverse(10);
+
+  // Lookups on empty set.
+  EXPECT_TRUE(Set.find(0) == Set.end());
+  EXPECT_TRUE(Set.find(9) == Set.end());
+
+  // Same thing on a const reference.
+  const USet &CSet = Set;
+  EXPECT_TRUE(CSet.empty());
+  EXPECT_TRUE(CSet.begin() == CSet.end());
+  EXPECT_EQ(0u, CSet.size());
+  EXPECT_TRUE(CSet.find(0) == CSet.end());
+  USet::const_iterator I = CSet.find(5);
+  EXPECT_TRUE(I == CSet.end());
+}
+
+// Single entry set tests.
+TEST(SparseSetTest, SingleEntrySet) {
+  USet Set;
+  Set.setUniverse(10);
+  std::pair<USet::iterator, bool> IP = Set.insert(5);
+  EXPECT_TRUE(IP.second);
+  EXPECT_TRUE(IP.first == Set.begin());
+
+  EXPECT_FALSE(Set.empty());
+  EXPECT_FALSE(Set.begin() == Set.end());
+  EXPECT_TRUE(Set.begin() + 1 == Set.end());
+  EXPECT_EQ(1u, Set.size());
+
+  EXPECT_TRUE(Set.find(0) == Set.end());
+  EXPECT_TRUE(Set.find(9) == Set.end());
+
+  EXPECT_FALSE(Set.count(0));
+  EXPECT_TRUE(Set.count(5));
+
+  // Redundant insert.
+  IP = Set.insert(5);
+  EXPECT_FALSE(IP.second);
+  EXPECT_TRUE(IP.first == Set.begin());
+
+  // Erase non-existent element.
+  EXPECT_FALSE(Set.erase(1));
+  EXPECT_EQ(1u, Set.size());
+  EXPECT_EQ(5u, *Set.begin());
+
+  // Erase iterator.
+  USet::iterator I = Set.find(5);
+  EXPECT_TRUE(I == Set.begin());
+  I = Set.erase(I);
+  EXPECT_TRUE(I == Set.end());
+  EXPECT_TRUE(Set.empty());
+}
+
+// Multiple entry set tests.
+TEST(SparseSetTest, MultipleEntrySet) {
+  USet Set;
+  Set.setUniverse(10);
+
+  Set.insert(5);
+  Set.insert(3);
+  Set.insert(2);
+  Set.insert(1);
+  Set.insert(4);
+  EXPECT_EQ(5u, Set.size());
+
+  // Without deletions, iteration order == insertion order.
+  USet::const_iterator I = Set.begin();
+  EXPECT_EQ(5u, *I);
+  ++I;
+  EXPECT_EQ(3u, *I);
+  ++I;
+  EXPECT_EQ(2u, *I);
+  ++I;
+  EXPECT_EQ(1u, *I);
+  ++I;
+  EXPECT_EQ(4u, *I);
+  ++I;
+  EXPECT_TRUE(I == Set.end());
+
+  // Redundant insert.
+  std::pair<USet::iterator, bool> IP = Set.insert(3);
+  EXPECT_FALSE(IP.second);
+  EXPECT_TRUE(IP.first == Set.begin() + 1);
+
+  // Erase last element by key.
+  EXPECT_TRUE(Set.erase(4));
+  EXPECT_EQ(4u, Set.size());
+  EXPECT_FALSE(Set.count(4));
+  EXPECT_FALSE(Set.erase(4));
+  EXPECT_EQ(4u, Set.size());
+  EXPECT_FALSE(Set.count(4));
+
+  // Erase first element by key.
+  EXPECT_TRUE(Set.count(5));
+  EXPECT_TRUE(Set.find(5) == Set.begin());
+  EXPECT_TRUE(Set.erase(5));
+  EXPECT_EQ(3u, Set.size());
+  EXPECT_FALSE(Set.count(5));
+  EXPECT_FALSE(Set.erase(5));
+  EXPECT_EQ(3u, Set.size());
+  EXPECT_FALSE(Set.count(5));
+
+  Set.insert(6);
+  Set.insert(7);
+  EXPECT_EQ(5u, Set.size());
+
+  // Erase last element by iterator.
+  I = Set.erase(Set.end() - 1);
+  EXPECT_TRUE(I == Set.end());
+  EXPECT_EQ(4u, Set.size());
+
+  // Erase second element by iterator.
+  I = Set.erase(Set.begin() + 1);
+  EXPECT_TRUE(I == Set.begin() + 1);
+
+  // Clear and resize the universe.
+  Set.clear();
+  EXPECT_FALSE(Set.count(5));
+  Set.setUniverse(1000);
+
+  // Add more than 256 elements.
+  for (unsigned i = 100; i != 800; ++i)
+    Set.insert(i);
+
+  for (unsigned i = 0; i != 10; ++i)
+    Set.erase(i);
+
+  for (unsigned i = 100; i != 800; ++i)
+    EXPECT_TRUE(Set.count(i));
+
+  EXPECT_FALSE(Set.count(99));
+  EXPECT_FALSE(Set.count(800));
+  EXPECT_EQ(700u, Set.size());
+}
+
+struct Alt {
+  unsigned Value;
+  explicit Alt(unsigned x) : Value(x) {}
+  unsigned getSparseSetKey() const { return Value - 1000; }
+};
+
+TEST(SparseSetTest, AltStructSet) {
+  typedef SparseSet<Alt> ASet;
+  ASet Set;
+  Set.setUniverse(10);
+  Set.insert(Alt(1005));
+
+  ASet::iterator I = Set.find(5);
+  ASSERT_TRUE(I == Set.begin());
+  EXPECT_EQ(1005u, I->Value);
+
+  Set.insert(Alt(1006));
+  Set.insert(Alt(1006));
+  I = Set.erase(Set.begin());
+  ASSERT_TRUE(I == Set.begin());
+  EXPECT_EQ(1006u, I->Value);
+
+  EXPECT_FALSE(Set.erase(5));
+  EXPECT_TRUE(Set.erase(6));
+}
+} // namespace
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp
index 8364eac82748..cc7a7fbe332d 100644
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -9,6 +9,7 @@
 
 #include "gtest/gtest.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -245,6 +246,12 @@ TEST(StringRefTest, Find) {
   EXPECT_EQ(StringRef::npos, Str.find("zz"));
   EXPECT_EQ(2U, Str.find("ll", 2));
   EXPECT_EQ(StringRef::npos, Str.find("ll", 3));
+  EXPECT_EQ(0U, Str.find(""));
+  StringRef LongStr("hellx xello hell ello world foo bar hello");
+  EXPECT_EQ(36U, LongStr.find("hello"));
+  EXPECT_EQ(28U, LongStr.find("foo"));
+  EXPECT_EQ(12U, LongStr.find("hell", 2));
+  EXPECT_EQ(0U, LongStr.find(""));
 
   EXPECT_EQ(3U, Str.rfind('l'));
   EXPECT_EQ(StringRef::npos, Str.rfind('z'));
@@ -285,4 +292,140 @@ TEST(StringRefTest, Misc) {
   EXPECT_EQ("hello", OS.str());
 }
 
+TEST(StringRefTest, Hashing) {
+  EXPECT_EQ(hash_value(std::string()), hash_value(StringRef()));
+  EXPECT_EQ(hash_value(std::string()), hash_value(StringRef("")));
+  std::string S = "hello world";
+  hash_code H = hash_value(S);
+  EXPECT_EQ(H, hash_value(StringRef("hello world")));
+  EXPECT_EQ(H, hash_value(StringRef(S)));
+  EXPECT_NE(H, hash_value(StringRef("hello worl")));
+  EXPECT_EQ(hash_value(std::string("hello worl")),
+            hash_value(StringRef("hello worl")));
+  EXPECT_NE(H, hash_value(StringRef("hello world ")));
+  EXPECT_EQ(hash_value(std::string("hello world ")),
+            hash_value(StringRef("hello world ")));
+  EXPECT_EQ(H, hash_value(StringRef("hello world\0")));
+  EXPECT_NE(hash_value(std::string("ello worl")),
+            hash_value(StringRef("hello world").slice(1, -1)));
+}
+
+struct UnsignedPair {
+  const char *Str;
+  uint64_t Expected;
+} Unsigned[] =
+  { {"0", 0}
+  , {"255", 255}
+  , {"256", 256}
+  , {"65535", 65535}
+  , {"65536", 65536}
+  , {"4294967295", 4294967295ULL}
+  , {"4294967296", 4294967296ULL}
+  , {"18446744073709551615", 18446744073709551615ULL}
+  , {"042", 34}
+  , {"0x42", 66}
+  , {"0b101010", 42}
+  };
+
+struct SignedPair {
+  const char *Str;
+  int64_t Expected;
+} Signed[] =
+  { {"0", 0}
+  , {"-0", 0}
+  , {"127", 127}
+  , {"128", 128}
+  , {"-128", -128}
+  , {"-129", -129}
+  , {"32767", 32767}
+  , {"32768", 32768}
+  , {"-32768", -32768}
+  , {"-32769", -32769}
+  , {"2147483647", 2147483647LL}
+  , {"2147483648", 2147483648LL}
+  , {"-2147483648", -2147483648LL}
+  , {"-2147483649", -2147483649LL}
+  , {"-9223372036854775808", -(9223372036854775807LL) - 1}
+  , {"042", 34}
+  , {"0x42", 66}
+  , {"0b101010", 42}
+  , {"-042", -34}
+  , {"-0x42", -66}
+  , {"-0b101010", -42}
+  };
+
+TEST(StringRefTest, getAsInteger) {
+  uint8_t U8;
+  uint16_t U16;
+  uint32_t U32;
+  uint64_t U64;
+
+  for (size_t i = 0; i < array_lengthof(Unsigned); ++i) {
+    bool U8Success = StringRef(Unsigned[i].Str).getAsInteger(0, U8);
+    if (static_cast<uint8_t>(Unsigned[i].Expected) == Unsigned[i].Expected) {
+      ASSERT_FALSE(U8Success);
+      EXPECT_EQ(U8, Unsigned[i].Expected);
+    } else {
+      ASSERT_TRUE(U8Success);
+    }
+    bool U16Success = StringRef(Unsigned[i].Str).getAsInteger(0, U16);
+    if (static_cast<uint16_t>(Unsigned[i].Expected) == Unsigned[i].Expected) {
+      ASSERT_FALSE(U16Success);
+      EXPECT_EQ(U16, Unsigned[i].Expected);
+    } else {
+      ASSERT_TRUE(U16Success);
+    }
+    bool U32Success = StringRef(Unsigned[i].Str).getAsInteger(0, U32);
+    if (static_cast<uint32_t>(Unsigned[i].Expected) == Unsigned[i].Expected) {
+      ASSERT_FALSE(U32Success);
+      EXPECT_EQ(U32, Unsigned[i].Expected);
+    } else {
+      ASSERT_TRUE(U32Success);
+    }
+    bool U64Success = StringRef(Unsigned[i].Str).getAsInteger(0, U64);
+    if (static_cast<uint64_t>(Unsigned[i].Expected) == Unsigned[i].Expected) {
+      ASSERT_FALSE(U64Success);
+      EXPECT_EQ(U64, Unsigned[i].Expected);
+    } else {
+      ASSERT_TRUE(U64Success);
+    }
+  }
+
+  int8_t S8;
+  int16_t S16;
+  int32_t S32;
+  int64_t S64;
+
+  for (size_t i = 0; i < array_lengthof(Signed); ++i) {
+    bool S8Success = StringRef(Signed[i].Str).getAsInteger(0, S8);
+    if (static_cast<int8_t>(Signed[i].Expected) == Signed[i].Expected) {
+      ASSERT_FALSE(S8Success);
+      EXPECT_EQ(S8, Signed[i].Expected);
+    } else {
+      ASSERT_TRUE(S8Success);
+    }
+    bool S16Success = StringRef(Signed[i].Str).getAsInteger(0, S16);
+    if (static_cast<int16_t>(Signed[i].Expected) == Signed[i].Expected) {
+      ASSERT_FALSE(S16Success);
+      EXPECT_EQ(S16, Signed[i].Expected);
+    } else {
+      ASSERT_TRUE(S16Success);
+    }
+    bool S32Success = StringRef(Signed[i].Str).getAsInteger(0, S32);
+    if (static_cast<int32_t>(Signed[i].Expected) == Signed[i].Expected) {
+      ASSERT_FALSE(S32Success);
+      EXPECT_EQ(S32, Signed[i].Expected);
+    } else {
+      ASSERT_TRUE(S32Success);
+    }
+    bool S64Success = StringRef(Signed[i].Str).getAsInteger(0, S64);
+    if (static_cast<int64_t>(Signed[i].Expected) == Signed[i].Expected) {
+      ASSERT_FALSE(S64Success);
+      EXPECT_EQ(S64, Signed[i].Expected);
+    } else {
+      ASSERT_TRUE(S64Success);
+    }
+  }
+}
+
 } // end anonymous namespace
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index 160b69253b6f..479046e01d7f 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -87,6 +87,24 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::Linux, T.getOS());
   EXPECT_EQ(Triple::GNU, T.getEnvironment());
 
+  T = Triple("powerpc-bgp-linux");
+  EXPECT_EQ(Triple::ppc, T.getArch());
+  EXPECT_EQ(Triple::BGP, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+  T = Triple("powerpc-bgp-cnk");
+  EXPECT_EQ(Triple::ppc, T.getArch());
+  EXPECT_EQ(Triple::BGP, T.getVendor());
+  EXPECT_EQ(Triple::CNK, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+  T = Triple("powerpc64-bgq-linux");
+  EXPECT_EQ(Triple::ppc64, T.getArch());
+  EXPECT_EQ(Triple::BGQ, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
   T = Triple("powerpc-dunno-notsure");
   EXPECT_EQ(Triple::ppc, T.getArch());
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
@@ -154,7 +172,7 @@ TEST(TripleTest, Normalization) {
   // Check that normalizing a permutated set of valid components returns a
   // triple with the unpermuted components.
   StringRef C[4];
-  for (int Arch = 1+Triple::UnknownArch; Arch < Triple::InvalidArch; ++Arch) {
+  for (int Arch = 1+Triple::UnknownArch; Arch <= Triple::amdil; ++Arch) {
     C[0] = Triple::getArchTypeName(Triple::ArchType(Arch));
     for (int Vendor = 1+Triple::UnknownVendor; Vendor <= Triple::PC;
          ++Vendor) {
@@ -162,12 +180,6 @@ TEST(TripleTest, Normalization) {
       for (int OS = 1+Triple::UnknownOS; OS <= Triple::Minix; ++OS) {
         C[2] = Triple::getOSTypeName(Triple::OSType(OS));
 
-        // If a value has multiple interpretations, then the permutation
-        // test will inevitably fail.  Currently this is only the case for
-        // "psp" which parses as both an architecture and an O/S.
-        if (OS == Triple::Psp)
-          continue;
-
         std::string E = Join(C[0], C[1], C[2]);
         EXPECT_EQ(E, Triple::normalize(Join(C[0], C[1], C[2])));
 
@@ -212,9 +224,6 @@ TEST(TripleTest, Normalization) {
     }
   }
 
-  EXPECT_EQ("a-b-psp", Triple::normalize("a-b-psp"));
-  EXPECT_EQ("psp-b-c", Triple::normalize("psp-b-c"));
-
   // Various real-world funky triples.  The value returned by GCC's config.sub
   // is given in the comment.
   EXPECT_EQ("i386--mingw32", Triple::normalize("i386-mingw32")); // i386-pc-mingw32
@@ -267,4 +276,118 @@ TEST(TripleTest, MutateName) {
 
 }
 
+TEST(TripleTest, BitWidthPredicates) {
+  Triple T;
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_FALSE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
+
+  T.setArch(Triple::arm);
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_TRUE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
+
+  T.setArch(Triple::hexagon);
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_TRUE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
+
+  T.setArch(Triple::mips);
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_TRUE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
+
+  T.setArch(Triple::mips64);
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_FALSE(T.isArch32Bit());
+  EXPECT_TRUE(T.isArch64Bit());
+
+  T.setArch(Triple::msp430);
+  EXPECT_TRUE(T.isArch16Bit());
+  EXPECT_FALSE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
+
+  T.setArch(Triple::ppc);
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_TRUE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
+
+  T.setArch(Triple::ppc64);
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_FALSE(T.isArch32Bit());
+  EXPECT_TRUE(T.isArch64Bit());
+
+  T.setArch(Triple::x86);
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_TRUE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
+
+  T.setArch(Triple::x86_64);
+  EXPECT_FALSE(T.isArch16Bit());
+  EXPECT_FALSE(T.isArch32Bit());
+  EXPECT_TRUE(T.isArch64Bit());
+}
+
+TEST(TripleTest, BitWidthArchVariants) {
+  Triple T;
+  EXPECT_EQ(Triple::UnknownArch, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::UnknownArch, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::UnknownArch);
+  EXPECT_EQ(Triple::UnknownArch, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::UnknownArch, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::arm);
+  EXPECT_EQ(Triple::arm, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::UnknownArch, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::mips);
+  EXPECT_EQ(Triple::mips, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::mips64, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::mipsel);
+  EXPECT_EQ(Triple::mipsel, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::mips64el, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::ppc);
+  EXPECT_EQ(Triple::ppc, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::ppc64, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::ptx32);
+  EXPECT_EQ(Triple::ptx32, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::ptx64, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::sparc);
+  EXPECT_EQ(Triple::sparc, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::sparcv9, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::x86);
+  EXPECT_EQ(Triple::x86, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::x86_64, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::mips64);
+  EXPECT_EQ(Triple::mips, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::mips64, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::mips64el);
+  EXPECT_EQ(Triple::mipsel, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::mips64el, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::ppc64);
+  EXPECT_EQ(Triple::ppc, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::ppc64, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::ptx64);
+  EXPECT_EQ(Triple::ptx32, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::ptx64, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::sparcv9);
+  EXPECT_EQ(Triple::sparc, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::sparcv9, T.get64BitArchVariant().getArch());
+
+  T.setArch(Triple::x86_64);
+  EXPECT_EQ(Triple::x86, T.get32BitArchVariant().getArch());
+  EXPECT_EQ(Triple::x86_64, T.get64BitArchVariant().getArch());
+}
+
 }
diff --git a/unittests/ADT/VariadicFunctionTest.cpp b/unittests/ADT/VariadicFunctionTest.cpp
new file mode 100644
index 000000000000..cde31205966c
--- /dev/null
+++ b/unittests/ADT/VariadicFunctionTest.cpp
@@ -0,0 +1,110 @@
+//===----------- VariadicFunctionTest.cpp - VariadicFunction unit tests ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/VariadicFunction.h"
+
+using namespace llvm;
+namespace {
+
+// Defines a variadic function StringCat() to join strings.
+// StringCat()'s arguments and return value have class types.
+std::string StringCatImpl(ArrayRef<const std::string *> Args) {
+  std::string S;
+  for (unsigned i = 0, e = Args.size(); i < e; ++i)
+    S += *Args[i];
+  return S;
+}
+const VariadicFunction<std::string, std::string, StringCatImpl> StringCat = {};
+
+TEST(VariadicFunctionTest, WorksForClassTypes) {
+  EXPECT_EQ("", StringCat());
+  EXPECT_EQ("a", StringCat("a"));
+  EXPECT_EQ("abc", StringCat("a", "bc"));
+  EXPECT_EQ("0123456789abcdefghijklmnopqrstuv",
+            StringCat("0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
+                      "a", "b", "c", "d", "e", "f", "g", "h", "i", "j",
+                      "k", "l", "m", "n", "o", "p", "q", "r", "s", "t",
+                      "u", "v"));
+}
+
+// Defines a variadic function Sum(), whose arguments and return value
+// have primitive types.
+// The return type of SumImp() is deliberately different from its
+// argument type, as we want to test that this works.
+long SumImpl(ArrayRef<const int *> Args) {
+  long Result = 0;
+  for (unsigned i = 0, e = Args.size(); i < e; ++i)
+    Result += *Args[i];
+  return Result;
+}
+const VariadicFunction<long, int, SumImpl> Sum = {};
+
+TEST(VariadicFunctionTest, WorksForPrimitiveTypes) {
+  EXPECT_EQ(0, Sum());
+  EXPECT_EQ(1, Sum(1));
+  EXPECT_EQ(12, Sum(10, 2));
+  EXPECT_EQ(1234567, Sum(1000000, 200000, 30000, 4000, 500, 60, 7));
+}
+
+// Appends an array of strings to dest and returns the number of
+// characters appended.
+int StringAppendImpl(std::string *Dest, ArrayRef<const std::string *> Args) {
+  int Chars = 0;
+  for (unsigned i = 0, e = Args.size(); i < e; ++i) {
+    Chars += Args[i]->size();
+    *Dest += *Args[i];
+  }
+  return Chars;
+}
+const VariadicFunction1<int, std::string *, std::string,
+                        StringAppendImpl> StringAppend = {};
+
+TEST(VariadicFunction1Test, Works) {
+  std::string S0("hi");
+  EXPECT_EQ(0, StringAppend(&S0));
+  EXPECT_EQ("hi", S0);
+
+  std::string S1("bin");
+  EXPECT_EQ(2, StringAppend(&S1, "go"));
+  EXPECT_EQ("bingo", S1);
+
+  std::string S4("Fab4");
+  EXPECT_EQ(4 + 4 + 6 + 5,
+            StringAppend(&S4, "John", "Paul", "George", "Ringo"));
+  EXPECT_EQ("Fab4JohnPaulGeorgeRingo", S4);
+}
+
+// Counts how many optional arguments fall in the given range.
+// Returns the result in *num_in_range.  We make the return type void
+// as we want to test that VariadicFunction* can handle it.
+void CountInRangeImpl(int *NumInRange, int Low, int High,
+                      ArrayRef<const int *> Args) {
+  *NumInRange = 0;
+  for (unsigned i = 0, e = Args.size(); i < e; ++i)
+    if (Low <= *Args[i] && *Args[i] <= High)
+      ++(*NumInRange);
+}
+const VariadicFunction3<void, int *, int, int, int,
+                        CountInRangeImpl> CountInRange = {};
+
+TEST(VariadicFunction3Test, Works) {
+  int N = -1;
+  CountInRange(&N, -100, 100);
+  EXPECT_EQ(0, N);
+
+  CountInRange(&N, -100, 100, 42);
+  EXPECT_EQ(1, N);
+
+  CountInRange(&N, -100, 100, 1, 999, -200, 42);
+  EXPECT_EQ(2, N);
+}
+
+}  // namespace
diff --git a/unittests/Bitcode/BitReaderTest.cpp b/unittests/Bitcode/BitReaderTest.cpp
new file mode 100644
index 000000000000..68cfe2836a29
--- /dev/null
+++ b/unittests/Bitcode/BitReaderTest.cpp
@@ -0,0 +1,65 @@
+//===- llvm/unittest/Bitcode/BitReaderTest.cpp - Tests for BitReader ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+static Module *makeLLVMModule() {
+  Module* Mod = new Module("test-mem", getGlobalContext());
+
+  FunctionType* FuncTy =
+    FunctionType::get(Type::getVoidTy(Mod->getContext()), false);
+  Function* Func = Function::Create(FuncTy,GlobalValue::ExternalLinkage,
+                                    "func", Mod);
+
+  BasicBlock* Entry = BasicBlock::Create(Mod->getContext(), "entry", Func);
+  new UnreachableInst(Mod->getContext(), Entry);
+
+  BasicBlock* BB = BasicBlock::Create(Mod->getContext(), "bb", Func);
+  new UnreachableInst(Mod->getContext(), BB);
+
+  PointerType* Int8Ptr = Type::getInt8PtrTy(Mod->getContext());
+  new GlobalVariable(*Mod, Int8Ptr, /*isConstant=*/true,
+                     GlobalValue::ExternalLinkage,
+                     BlockAddress::get(BB), "table");
+
+  return Mod;
+}
+
+static void writeModuleToBuffer(SmallVectorImpl<char> &Buffer) {
+  Module *Mod = makeLLVMModule();
+  raw_svector_ostream OS(Buffer);
+  WriteBitcodeToFile(Mod, OS);
+}
+
+TEST(BitReaderTest, MaterializeFunctionsForBlockAddr) { // PR11677
+  SmallString<1024> Mem;
+  writeModuleToBuffer(Mem);
+  MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(Mem.str(), "test", false);
+  std::string errMsg;
+  Module *m = getLazyBitcodeModule(Buffer, getGlobalContext(), &errMsg);
+  PassManager passes;
+  passes.add(createVerifierPass());
+  passes.run(*m);
+}
+
+}
+}
diff --git a/unittests/Bitcode/Makefile b/unittests/Bitcode/Makefile
new file mode 100644
index 000000000000..aa437e7e2cc5
--- /dev/null
+++ b/unittests/Bitcode/Makefile
@@ -0,0 +1,15 @@
+##===- unittests/Bitcode/Makefile --------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TESTNAME = Bitcode
+LINK_COMPONENTS := core support bitreader bitwriter
+
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index 81d702981f82..5d691728d80f 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -60,19 +60,23 @@ add_llvm_unittest(ADT
   ADT/DenseMapTest.cpp
   ADT/DenseSetTest.cpp
   ADT/FoldingSet.cpp
+  ADT/HashingTest.cpp
   ADT/ilistTest.cpp
   ADT/ImmutableSetTest.cpp
   ADT/IntEqClassesTest.cpp
   ADT/IntervalMapTest.cpp
+  ADT/IntrusiveRefCntPtrTest.cpp
   ADT/PackedVectorTest.cpp
   ADT/SmallBitVectorTest.cpp
   ADT/SmallStringTest.cpp
   ADT/SmallVectorTest.cpp
   ADT/SparseBitVectorTest.cpp
+  ADT/SparseSetTest.cpp
   ADT/StringMapTest.cpp
   ADT/StringRefTest.cpp
   ADT/TripleTest.cpp
   ADT/TwineTest.cpp
+  ADT/VariadicFunctionTest.cpp
  )
 
 add_llvm_unittest(Analysis
@@ -83,11 +87,35 @@ add_llvm_unittest(ExecutionEngine
   ExecutionEngine/ExecutionEngineTest.cpp
   )
 
+if( LLVM_USE_INTEL_JITEVENTS )
+  include_directories( ${LLVM_INTEL_JITEVENTS_INCDIR} )
+  link_directories( ${LLVM_INTEL_JITEVENTS_LIBDIR} )
+  set(ProfileTestSources
+    ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
+    )
+  set(LLVM_LINK_COMPONENTS
+    ${LLVM_LINK_COMPONENTS}
+    IntelJITEvents
+    ) 
+endif( LLVM_USE_INTEL_JITEVENTS )
+
+if( LLVM_USE_OPROFILE )
+  set(ProfileTestSources
+    ${ProfileTestSources}
+    ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
+    )
+  set(LLVM_LINK_COMPONENTS
+    ${LLVM_LINK_COMPONENTS}
+    OProfileJIT
+    )
+endif( LLVM_USE_OPROFILE )
+
 set(JITTestsSources
   ExecutionEngine/JIT/JITEventListenerTest.cpp
   ExecutionEngine/JIT/JITMemoryManagerTest.cpp
   ExecutionEngine/JIT/JITTest.cpp
   ExecutionEngine/JIT/MultiJITTest.cpp
+  ${ProfileTestSources}
   )
 
 if(MSVC)
@@ -96,7 +124,7 @@ endif()
 
 add_llvm_unittest(ExecutionEngine/JIT ${JITTestsSources})
 
-if(MINGW)
+if(MINGW OR CYGWIN)
   set_property(TARGET JITTests PROPERTY LINK_FLAGS -Wl,--export-all-symbols)
 endif()
 
@@ -111,6 +139,7 @@ set(VMCoreSources
   VMCore/PassManagerTest.cpp
   VMCore/ValueMapTest.cpp
   VMCore/VerifierTest.cpp
+  VMCore/DominatorTreeTest.cpp
   )
 
 # MSVC9 and 8 cannot compile ValueMapTest.cpp due to their bug.
@@ -121,6 +150,10 @@ endif()
 
 add_llvm_unittest(VMCore ${VMCoreSources})
 
+add_llvm_unittest(Bitcode
+  Bitcode/BitReaderTest.cpp
+  )
+
 set(LLVM_LINK_COMPONENTS
   Support
   Core
@@ -132,6 +165,7 @@ add_llvm_unittest(Support
   Support/CommandLineTest.cpp
   Support/ConstantRangeTest.cpp
   Support/EndianTest.cpp
+  Support/JSONParserTest.cpp
   Support/LeakDetectorTest.cpp
   Support/MathExtrasTest.cpp
   Support/Path.cpp
@@ -141,4 +175,5 @@ add_llvm_unittest(Support
   Support/TimeValue.cpp
   Support/TypeBuilderTest.cpp
   Support/ValueHandleTest.cpp
+  Support/YAMLParserTest.cpp
   )
diff --git a/unittests/ExecutionEngine/ExecutionEngineTest.cpp b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
index 4dcef20c6e77..74a2ccdd0663 100644
--- a/unittests/ExecutionEngine/ExecutionEngineTest.cpp
+++ b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
@@ -22,12 +22,13 @@ namespace {
 class ExecutionEngineTest : public testing::Test {
 protected:
   ExecutionEngineTest()
-    : M(new Module("<main>", getGlobalContext())),
-      Engine(EngineBuilder(M).create()) {
+    : M(new Module("<main>", getGlobalContext())), Error(""),
+      Engine(EngineBuilder(M).setErrorStr(&Error).create()) {
   }
 
   virtual void SetUp() {
-    ASSERT_TRUE(Engine.get() != NULL);
+    ASSERT_TRUE(Engine.get() != NULL) << "EngineBuilder returned error: '"
+      << Error << "'";
   }
 
   GlobalVariable *NewExtGlobal(Type *T, const Twine &Name) {
@@ -36,6 +37,7 @@ protected:
   }
 
   Module *const M;
+  std::string Error;
   const OwningPtr<ExecutionEngine> Engine;
 };
 
diff --git a/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
new file mode 100644
index 000000000000..8ed7a15be37c
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/IntelJITEventListenerTest.cpp
@@ -0,0 +1,110 @@
+//===- JITEventListenerTest.cpp - Tests for Intel JITEventListener --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITEventListenerTestCommon.h"
+
+using namespace llvm;
+
+#include "llvm/ExecutionEngine/IntelJITEventsWrapper.h"
+
+#include <map>
+#include <list>
+
+namespace {
+
+// map of function ("method") IDs to source locations
+NativeCodeMap ReportedDebugFuncs;
+
+} // namespace
+
+/// Mock implementaion of Intel JIT API jitprofiling library
+namespace test_jitprofiling {
+
+int NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) {
+  switch (EventType) {
+    case iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED: {
+      EXPECT_TRUE(0 != EventSpecificData);
+      iJIT_Method_Load* msg = static_cast<iJIT_Method_Load*>(EventSpecificData);
+
+      ReportedDebugFuncs[msg->method_id];
+
+      for(unsigned int i = 0; i < msg->line_number_size; ++i) {
+        EXPECT_TRUE(0 != msg->line_number_table);
+        std::pair<std::string, unsigned int> loc(
+          std::string(msg->source_file_name),
+          msg->line_number_table[i].LineNumber);
+        ReportedDebugFuncs[msg->method_id].push_back(loc);
+      }
+    }
+    break;
+    case iJVM_EVENT_TYPE_METHOD_UNLOAD_START: {
+      EXPECT_TRUE(0 != EventSpecificData);
+      unsigned int UnloadId
+        = *reinterpret_cast<unsigned int*>(EventSpecificData);
+      EXPECT_TRUE(1 == ReportedDebugFuncs.erase(UnloadId));
+    }
+    default:
+      break;
+  }
+  return 0;
+}
+
+iJIT_IsProfilingActiveFlags IsProfilingActive(void) {
+  // for testing, pretend we have an Intel Parallel Amplifier XE 2011
+  // instance attached
+  return iJIT_SAMPLING_ON;
+}
+
+unsigned int GetNewMethodID(void) {
+  static unsigned int id = 0;
+  return ++id;
+}
+
+} //namespace test_jitprofiling
+
+class IntelJITEventListenerTest
+  : public JITEventListenerTestBase<IntelJITEventsWrapper> {
+public:
+  IntelJITEventListenerTest()
+  : JITEventListenerTestBase<IntelJITEventsWrapper>(
+      new IntelJITEventsWrapper(test_jitprofiling::NotifyEvent, 0,
+        test_jitprofiling::IsProfilingActive, 0, 0,
+        test_jitprofiling::GetNewMethodID))
+  {
+    EXPECT_TRUE(0 != MockWrapper);
+
+    Listener.reset(JITEventListener::createIntelJITEventListener(
+      MockWrapper.get()));
+    EXPECT_TRUE(0 != Listener);
+    EE->RegisterJITEventListener(Listener.get());
+  }
+};
+
+TEST_F(IntelJITEventListenerTest, NoDebugInfo) {
+  TestNoDebugInfo(ReportedDebugFuncs);
+}
+
+TEST_F(IntelJITEventListenerTest, SingleLine) {
+  TestSingleLine(ReportedDebugFuncs);
+}
+
+TEST_F(IntelJITEventListenerTest, MultipleLines) {
+  TestMultipleLines(ReportedDebugFuncs);
+}
+
+// This testcase is disabled because the Intel JIT API does not support a single
+// JITted function with source lines associated with multiple files
+/*
+TEST_F(IntelJITEventListenerTest, MultipleFiles) {
+  TestMultipleFiles(ReportedDebugFuncs);
+}
+*/
+
+testing::Environment* const jit_env =
+  testing::AddGlobalTestEnvironment(new JITEnvironment);
diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h b/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h
new file mode 100644
index 000000000000..53608cbfce3d
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h
@@ -0,0 +1,209 @@
+//===- JITEventListenerTestCommon.h - Helper for JITEventListener tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------------------===//
+
+#ifndef JIT_EVENT_LISTENER_TEST_COMMON_H
+#define JIT_EVENT_LISTENER_TEST_COMMON_H
+
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+
+#include <vector>
+#include <string>
+#include <utility>
+
+typedef std::vector<std::pair<std::string, unsigned int> > SourceLocations;
+typedef std::map<uint64_t, SourceLocations> NativeCodeMap;
+
+class JITEnvironment : public testing::Environment {
+  virtual void SetUp() {
+    // Required to create a JIT.
+    llvm::InitializeNativeTarget();
+  }
+};
+
+inline unsigned int getLine() {
+  return 12;
+}
+
+inline unsigned int getCol() {
+  return 0;
+}
+
+inline const char* getFilename() {
+  return "mock_source_file.cpp";
+}
+
+// Test fixture shared by tests for listener implementations
+template<typename WrapperT>
+class JITEventListenerTestBase : public testing::Test {
+protected:
+  llvm::OwningPtr<WrapperT> MockWrapper;
+  llvm::OwningPtr<llvm::JITEventListener> Listener;
+
+public:
+  llvm::Module* M;
+  llvm::MDNode* Scope;
+  llvm::ExecutionEngine* EE;
+  llvm::DIBuilder* DebugBuilder;
+  llvm::IRBuilder<> Builder;
+
+  JITEventListenerTestBase(WrapperT* w)
+  : MockWrapper(w)
+  , M(new llvm::Module("module", llvm::getGlobalContext()))
+  , EE(llvm::EngineBuilder(M)
+    .setEngineKind(llvm::EngineKind::JIT)
+    .setOptLevel(llvm::CodeGenOpt::None)
+    .create())
+  , DebugBuilder(new llvm::DIBuilder(*M))
+  , Builder(llvm::getGlobalContext())
+  {
+    DebugBuilder->createCompileUnit(llvm::dwarf::DW_LANG_C_plus_plus,
+                                    "JIT",
+                                    "JIT",
+                                    "JIT",
+                                    true,
+                                    "",
+                                    1);
+
+    Scope = DebugBuilder->createFile(getFilename(), ".");
+  }
+
+  llvm::Function *buildFunction(const SourceLocations& DebugLocations) {
+    using namespace llvm;
+
+    LLVMContext& GlobalContext = getGlobalContext();
+
+    SourceLocations::const_iterator CurrentDebugLocation
+      = DebugLocations.begin();
+
+    if (CurrentDebugLocation != DebugLocations.end()) {
+      DebugLoc DebugLocation = DebugLoc::get(getLine(), getCol(),
+          DebugBuilder->createFile(CurrentDebugLocation->first, "."));
+      Builder.SetCurrentDebugLocation(DebugLocation);
+      CurrentDebugLocation++;
+    }
+
+    Function *Result = Function::Create(
+        TypeBuilder<int32_t(int32_t), false>::get(GlobalContext),
+        GlobalValue::ExternalLinkage, "id", M);
+    Value *Arg = Result->arg_begin();
+    BasicBlock *BB = BasicBlock::Create(M->getContext(), "entry", Result);
+    Builder.SetInsertPoint(BB);
+    Value* one = ConstantInt::get(GlobalContext, APInt(32, 1));
+    for(; CurrentDebugLocation != DebugLocations.end();
+        ++CurrentDebugLocation) {
+      Arg = Builder.CreateMul(Arg, Builder.CreateAdd(Arg, one));
+      Builder.SetCurrentDebugLocation(
+        DebugLoc::get(CurrentDebugLocation->second, 0,
+                      DebugBuilder->createFile(CurrentDebugLocation->first, ".")));
+    }
+    Builder.CreateRet(Arg);
+    return Result;
+  }
+
+  void TestNoDebugInfo(NativeCodeMap& ReportedDebugFuncs) {
+    SourceLocations DebugLocations;
+    llvm::Function* f = buildFunction(DebugLocations);
+    EXPECT_TRUE(0 != f);
+
+    //Cause JITting and callbacks to our listener
+    EXPECT_TRUE(0 != EE->getPointerToFunction(f));
+    EXPECT_TRUE(1 == ReportedDebugFuncs.size());
+
+    EE->freeMachineCodeForFunction(f);
+    EXPECT_TRUE(ReportedDebugFuncs.size() == 0);
+  }
+
+  void TestSingleLine(NativeCodeMap& ReportedDebugFuncs) {
+    SourceLocations DebugLocations;
+    DebugLocations.push_back(std::make_pair(std::string(getFilename()),
+                                            getLine()));
+    llvm::Function* f = buildFunction(DebugLocations);
+    EXPECT_TRUE(0 != f);
+
+    EXPECT_TRUE(0 != EE->getPointerToFunction(f));
+    EXPECT_TRUE(1 == ReportedDebugFuncs.size());
+    EXPECT_STREQ(ReportedDebugFuncs.begin()->second.begin()->first.c_str(),
+                 getFilename());
+    EXPECT_EQ(ReportedDebugFuncs.begin()->second.begin()->second, getLine());
+
+    EE->freeMachineCodeForFunction(f);
+    EXPECT_TRUE(ReportedDebugFuncs.size() == 0);
+  }
+
+  void TestMultipleLines(NativeCodeMap& ReportedDebugFuncs) {
+    using namespace std;
+
+    SourceLocations DebugLocations;
+    unsigned int c = 5;
+    for(unsigned int i = 0; i < c; ++i) {
+      DebugLocations.push_back(make_pair(string(getFilename()), getLine() + i));
+    }
+
+    llvm::Function* f = buildFunction(DebugLocations);
+    EXPECT_TRUE(0 != f);
+
+    EXPECT_TRUE(0 != EE->getPointerToFunction(f));
+    EXPECT_TRUE(1 == ReportedDebugFuncs.size());
+    SourceLocations& FunctionInfo = ReportedDebugFuncs.begin()->second;
+    EXPECT_EQ(c, FunctionInfo.size());
+
+    int VerifyCount = 0;
+    for(SourceLocations::iterator i = FunctionInfo.begin();
+        i != FunctionInfo.end();
+        ++i) {
+      EXPECT_STREQ(i->first.c_str(), getFilename());
+      EXPECT_EQ(i->second, getLine() + VerifyCount);
+      VerifyCount++;
+    }
+
+    EE->freeMachineCodeForFunction(f);
+    EXPECT_TRUE(ReportedDebugFuncs.size() == 0);
+  }
+
+  void TestMultipleFiles(NativeCodeMap& ReportedDebugFuncs) {
+
+    std::string secondFilename("another_file.cpp");
+
+    SourceLocations DebugLocations;
+    DebugLocations.push_back(std::make_pair(std::string(getFilename()),
+                                            getLine()));
+    DebugLocations.push_back(std::make_pair(secondFilename, getLine()));
+    llvm::Function* f = buildFunction(DebugLocations);
+    EXPECT_TRUE(0 != f);
+
+    EXPECT_TRUE(0 != EE->getPointerToFunction(f));
+    EXPECT_TRUE(1 == ReportedDebugFuncs.size());
+    SourceLocations& FunctionInfo = ReportedDebugFuncs.begin()->second;
+    EXPECT_TRUE(2 == FunctionInfo.size());
+
+    EXPECT_STREQ(FunctionInfo.at(0).first.c_str(), getFilename());
+    EXPECT_STREQ(FunctionInfo.at(1).first.c_str(), secondFilename.c_str());
+
+    EXPECT_EQ(FunctionInfo.at(0).second, getLine());
+    EXPECT_EQ(FunctionInfo.at(1).second, getLine());
+
+    EE->freeMachineCodeForFunction(f);
+    EXPECT_TRUE(ReportedDebugFuncs.size() == 0);
+  }
+};
+
+#endif //JIT_EVENT_LISTENER_TEST_COMMON_H
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 2ef273020f9e..fa52321b32e0 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -64,6 +64,10 @@ public:
     : Base(JITMemoryManager::CreateDefaultMemManager()) {
     stubsAllocated = 0;
   }
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) {
+    return Base->getPointerToNamedFunction(Name, AbortOnFailure);
+  }
 
   virtual void setMemoryWritable() { Base->setMemoryWritable(); }
   virtual void setMemoryExecutable() { Base->setMemoryExecutable(); }
@@ -113,6 +117,14 @@ public:
       EndFunctionBodyCall(F, FunctionStart, FunctionEnd));
     Base->endFunctionBody(F, FunctionStart, FunctionEnd);
   }
+  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID) {
+    return Base->allocateDataSection(Size, Alignment, SectionID);
+  }
+  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID) {
+    return Base->allocateCodeSection(Size, Alignment, SectionID);
+  }
   virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
     return Base->allocateSpace(Size, Alignment);
   }
@@ -184,7 +196,7 @@ bool LoadAssemblyInto(Module *M, const char *assembly) {
     NULL != ParseAssemblyString(assembly, M, Error, M->getContext());
   std::string errMsg;
   raw_string_ostream os(errMsg);
-  Error.Print("", os);
+  Error.print("", os);
   EXPECT_TRUE(success) << os.str();
   return success;
 }
diff --git a/unittests/ExecutionEngine/JIT/Makefile b/unittests/ExecutionEngine/JIT/Makefile
index f5abe75a8f68..c404fb002a62 100644
--- a/unittests/ExecutionEngine/JIT/Makefile
+++ b/unittests/ExecutionEngine/JIT/Makefile
@@ -12,6 +12,30 @@ TESTNAME = JIT
 LINK_COMPONENTS := asmparser bitreader bitwriter core jit native support
 
 include $(LEVEL)/Makefile.config
+
+SOURCES := JITEventListenerTest.cpp JITMemoryManagerTest.cpp JITTest.cpp MultiJITTest.cpp
+
+
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+  # Build the Intel JIT Events interface tests
+  SOURCES += IntelJITEventListenerTest.cpp
+
+  # Add the Intel JIT Events include directory
+  CPPFLAGS += -I$(INTEL_JITEVENTS_INCDIR)
+
+  # Link against the LLVM Intel JIT Evens interface library
+  LINK_COMPONENTS += inteljitevents
+endif
+
+ifeq ($(USE_OPROFILE), 1)
+  # Build the OProfile JIT interface tests
+  SOURCES += OProfileJITEventListenerTest.cpp
+
+  # Link against the LLVM oprofile interface library
+  LINK_COMPONENTS += oprofilejit
+endif
+
+
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
 
 # Permit these tests to use the JIT's symbolic lookup.
diff --git a/unittests/ExecutionEngine/JIT/MultiJITTest.cpp b/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
index 91ea64aa53c9..5b99d5b676e2 100644
--- a/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
@@ -26,7 +26,7 @@ bool LoadAssemblyInto(Module *M, const char *assembly) {
     NULL != ParseAssemblyString(assembly, M, Error, M->getContext());
   std::string errMsg;
   raw_string_ostream os(errMsg);
-  Error.Print("", os);
+  Error.print("", os);
   EXPECT_TRUE(success) << os.str();
   return success;
 }
@@ -160,8 +160,21 @@ TEST(MultiJitTest, JitPool) {
   EXPECT_EQ(getPointerToNamedFunction("foo2"), foo2);
 
   // Symbol search
-  EXPECT_EQ((intptr_t)getPointerToNamedFunction("getPointerToNamedFunction"),
-            (intptr_t)&getPointerToNamedFunction);
+  intptr_t
+    sa = (intptr_t)getPointerToNamedFunction("getPointerToNamedFunction");
+  EXPECT_TRUE(sa != 0);
+  intptr_t fa = (intptr_t)&getPointerToNamedFunction;
+  EXPECT_TRUE(fa != 0);
+#ifdef __i386__
+  // getPointerToNamedFunction might be indirect jump on Win32 --enable-shared.
+  // FF 25 <disp32>: jmp *(pointer to IAT)
+  if (sa != fa && memcmp((char *)fa, "\xFF\x25", 2) == 0) {
+    fa = *(intptr_t *)(fa + 2); // Address to IAT
+    EXPECT_TRUE(fa != 0);
+    fa = *(intptr_t *)fa;       // Bound value of IAT
+  }
+#endif
+  EXPECT_TRUE(sa == fa);
 }
 #endif  // !defined(__arm__)
 
diff --git a/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
new file mode 100644
index 000000000000..9b0ee609923c
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
@@ -0,0 +1,166 @@
+//===- OProfileJITEventListenerTest.cpp - Unit tests for OProfileJITEventsListener --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--------------------------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/OProfileWrapper.h"
+#include "JITEventListenerTestCommon.h"
+
+#include <map>
+#include <list>
+
+using namespace llvm;
+
+namespace {
+
+struct OprofileNativeFunction {
+  const char* Name;
+  uint64_t Addr;
+  const void* CodePtr;
+  unsigned int CodeSize;
+
+  OprofileNativeFunction(const char* name,
+                         uint64_t addr,
+                         const void* code,
+                         unsigned int size)
+  : Name(name)
+  , Addr(addr)
+  , CodePtr(code)
+  , CodeSize(size) {
+  }
+};
+
+typedef std::list<OprofileNativeFunction> NativeFunctionList;
+typedef std::list<debug_line_info> NativeDebugList;
+NativeFunctionList NativeFunctions;
+
+NativeCodeMap ReportedDebugFuncs;
+
+} // namespace
+
+/// Mock implementaion of opagent library
+namespace test_opagent {
+
+op_agent_t globalAgent = reinterpret_cast<op_agent_t>(42);
+
+op_agent_t open_agent()
+{
+  // return non-null op_agent_t
+  return globalAgent;
+}
+
+int close_agent(op_agent_t agent)
+{
+  EXPECT_EQ(globalAgent, agent);
+  return 0;
+}
+
+int write_native_code(op_agent_t agent,
+                      const char* name,
+                      uint64_t addr,
+                      void const* code,
+                      unsigned int size)
+{
+  EXPECT_EQ(globalAgent, agent);
+  OprofileNativeFunction func(name, addr, code, size);
+  NativeFunctions.push_back(func);
+
+  // Verify no other registration has take place for the same address
+  EXPECT_TRUE(ReportedDebugFuncs.find(addr) == ReportedDebugFuncs.end());
+
+  ReportedDebugFuncs[addr];
+  return 0;
+}
+
+int write_debug_line_info(op_agent_t agent,
+                          void const* code,
+                          size_t num_entries,
+                          struct debug_line_info const* info)
+{
+  EXPECT_EQ(globalAgent, agent);
+
+  //verify code has been loaded first
+  uint64_t addr = reinterpret_cast<uint64_t>(code);
+  NativeCodeMap::iterator i = ReportedDebugFuncs.find(addr);
+  EXPECT_TRUE(i != ReportedDebugFuncs.end());
+
+  NativeDebugList NativeInfo(info, info + num_entries);
+
+  SourceLocations locs;
+  for(NativeDebugList::iterator i = NativeInfo.begin();
+      i != NativeInfo.end();
+      ++i) {
+    locs.push_back(std::make_pair(std::string(i->filename), i->lineno));
+  }
+  ReportedDebugFuncs[addr] = locs;
+
+  return 0;
+}
+
+int unload_native_code(op_agent_t agent, uint64_t addr) {
+  EXPECT_EQ(globalAgent, agent);
+
+  //verify that something for the given JIT addr has been loaded first
+  NativeCodeMap::iterator i = ReportedDebugFuncs.find(addr);
+  EXPECT_TRUE(i != ReportedDebugFuncs.end());
+  ReportedDebugFuncs.erase(i);
+  return 0;
+}
+
+int version() {
+  return 1;
+}
+
+bool is_oprofile_running() {
+  return true;
+}
+
+} //namespace test_opagent
+
+class OProfileJITEventListenerTest
+: public JITEventListenerTestBase<OProfileWrapper>
+{
+public:
+  OProfileJITEventListenerTest()
+  : JITEventListenerTestBase<OProfileWrapper>(
+    new OProfileWrapper(test_opagent::open_agent,
+      test_opagent::close_agent,
+      test_opagent::write_native_code,
+      test_opagent::write_debug_line_info,
+      test_opagent::unload_native_code,
+      test_opagent::version,
+      test_opagent::version,
+      test_opagent::is_oprofile_running))
+  {
+    EXPECT_TRUE(0 != MockWrapper);
+
+    Listener.reset(JITEventListener::createOProfileJITEventListener(
+      MockWrapper.get()));
+    EXPECT_TRUE(0 != Listener);
+    EE->RegisterJITEventListener(Listener.get());
+  }
+};
+
+TEST_F(OProfileJITEventListenerTest, NoDebugInfo) {
+  TestNoDebugInfo(ReportedDebugFuncs);
+}
+
+TEST_F(OProfileJITEventListenerTest, SingleLine) {
+  TestSingleLine(ReportedDebugFuncs);
+}
+
+TEST_F(OProfileJITEventListenerTest, MultipleLines) {
+  TestMultipleLines(ReportedDebugFuncs);
+}
+
+TEST_F(OProfileJITEventListenerTest, MultipleFiles) {
+  TestMultipleFiles(ReportedDebugFuncs);
+}
+
+testing::Environment* const jit_env =
+  testing::AddGlobalTestEnvironment(new JITEnvironment);
diff --git a/unittests/ExecutionEngine/Makefile b/unittests/ExecutionEngine/Makefile
index d4ef92ffb392..a0395cdad3bf 100644
--- a/unittests/ExecutionEngine/Makefile
+++ b/unittests/ExecutionEngine/Makefile
@@ -10,9 +10,7 @@
 LEVEL = ../..
 TESTNAME = ExecutionEngine
 LINK_COMPONENTS := engine interpreter
-
-include $(LEVEL)/Makefile.config
-
 PARALLEL_DIRS = JIT
 
+include $(LEVEL)/Makefile.config
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/Makefile b/unittests/Makefile
index 0401cd1c673a..27afccf02e36 100644
--- a/unittests/Makefile
+++ b/unittests/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ..
 
-PARALLEL_DIRS = ADT ExecutionEngine Support Transforms VMCore Analysis
+PARALLEL_DIRS = ADT ExecutionEngine Support Transforms VMCore Analysis Bitcode
 
 include $(LEVEL)/Makefile.common
 
diff --git a/unittests/Makefile.unittest b/unittests/Makefile.unittest
index 580ad7d71918..bd32aed4b0a2 100644
--- a/unittests/Makefile.unittest
+++ b/unittests/Makefile.unittest
@@ -34,7 +34,7 @@ ifneq ($(HAVE_PTHREAD), 1)
   CPP.Flags += -DGTEST_HAS_PTHREAD=0
 endif
 
-TESTLIBS = -lGoogleTest -lUnitTestMain
+TESTLIBS = -lgtest -lgtest_main
 
 ifeq ($(ENABLE_SHARED), 1)
   ifneq (,$(RPATH))
diff --git a/unittests/Support/AllocatorTest.cpp b/unittests/Support/AllocatorTest.cpp
index 6c0fca90456e..8b463c11dfca 100644
--- a/unittests/Support/AllocatorTest.cpp
+++ b/unittests/Support/AllocatorTest.cpp
@@ -93,6 +93,14 @@ TEST(AllocatorTest, TestOverflow) {
   EXPECT_EQ(2U, Alloc.GetNumSlabs());
 }
 
+// Test allocating with a size larger than the initial slab size.
+TEST(AllocatorTest, TestSmallSlabSize) {
+  BumpPtrAllocator Alloc(128);
+
+  Alloc.Allocate(200, 0);
+  EXPECT_EQ(2U, Alloc.GetNumSlabs());
+}
+
 // Mock slab allocator that returns slabs aligned on 4096 bytes.  There is no
 // easy portable way to do this, so this is kind of a hack.
 class MockSlabAllocator : public SlabAllocator {
diff --git a/unittests/Support/BlockFrequencyTest.cpp b/unittests/Support/BlockFrequencyTest.cpp
index edeea9b357f5..df256424b82d 100644
--- a/unittests/Support/BlockFrequencyTest.cpp
+++ b/unittests/Support/BlockFrequencyTest.cpp
@@ -53,4 +53,33 @@ TEST(BlockFrequencyTest, MaxToMax) {
   EXPECT_EQ(Freq.getFrequency(), UINT64_MAX);
 }
 
+TEST(BlockFrequencyTest, ProbabilityCompare) {
+  BranchProbability A(4, 5);
+  BranchProbability B(4U << 29, 5U << 29);
+  BranchProbability C(3, 4);
+
+  EXPECT_TRUE(A == B);
+  EXPECT_FALSE(A != B);
+  EXPECT_FALSE(A < B);
+  EXPECT_FALSE(A > B);
+  EXPECT_TRUE(A <= B);
+  EXPECT_TRUE(A >= B);
+
+  EXPECT_FALSE(B == C);
+  EXPECT_TRUE(B != C);
+  EXPECT_FALSE(B < C);
+  EXPECT_TRUE(B > C);
+  EXPECT_FALSE(B <= C);
+  EXPECT_TRUE(B >= C);
+
+  BranchProbability BigZero(0, UINT32_MAX);
+  BranchProbability BigOne(UINT32_MAX, UINT32_MAX);
+  EXPECT_FALSE(BigZero == BigOne);
+  EXPECT_TRUE(BigZero != BigOne);
+  EXPECT_TRUE(BigZero < BigOne);
+  EXPECT_FALSE(BigZero > BigOne);
+  EXPECT_TRUE(BigZero <= BigOne);
+  EXPECT_FALSE(BigZero >= BigOne);
+}
+
 }
diff --git a/unittests/Support/Casting.cpp b/unittests/Support/Casting.cpp
index ae84693bd636..ca0b40b1f55b 100644
--- a/unittests/Support/Casting.cpp
+++ b/unittests/Support/Casting.cpp
@@ -69,7 +69,9 @@ namespace {
 
 const foo *null_foo = NULL;
 
+bar B;
 extern bar &B1;
+bar &B1 = B;
 extern const bar *B2;
 // test various configurations of const
 const bar &B3 = B1;
@@ -145,9 +147,6 @@ TEST(CastingTest, dyn_cast_or_null) {
 //foo &F23 = cast_or_null<foo>(B1);
 //const foo &F24 = cast_or_null<foo>(B3);
 
-
-bar B;
-bar &B1 = B;
 const bar *B2 = &B;
 }  // anonymous namespace
 
diff --git a/unittests/Support/IRBuilderTest.cpp b/unittests/Support/IRBuilderTest.cpp
index 5d635ae361e0..b15de9ed3839 100644
--- a/unittests/Support/IRBuilderTest.cpp
+++ b/unittests/Support/IRBuilderTest.cpp
@@ -19,6 +19,7 @@
 
 using namespace llvm;
 
+namespace {
 class IRBuilderTest : public testing::Test {
 protected:
   virtual void SetUp() {
@@ -37,6 +38,7 @@ protected:
   OwningPtr<Module> M;
   BasicBlock *BB;
 };
+}
 
 TEST_F(IRBuilderTest, Lifetime) {
   IRBuilder<> Builder(BB);
diff --git a/unittests/Support/JSONParserTest.cpp b/unittests/Support/JSONParserTest.cpp
new file mode 100644
index 000000000000..e9efb817c298
--- /dev/null
+++ b/unittests/Support/JSONParserTest.cpp
@@ -0,0 +1,191 @@
+//===- unittest/Tooling/JSONParserTest ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/JSONParser.h"
+#include "llvm/ADT/Twine.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+// Checks that the given input gives a parse error. Makes sure that an error
+// text is available and the parse fails.
+static void ExpectParseError(StringRef Message, StringRef Input) {
+  SourceMgr SM;
+  JSONParser Parser(Input, &SM);
+  EXPECT_FALSE(Parser.validate()) << Message << ": " << Input;
+  EXPECT_TRUE(Parser.failed()) << Message << ": " << Input;
+}
+
+// Checks that the given input can be parsed without error.
+static void ExpectParseSuccess(StringRef Message, StringRef Input) {
+  SourceMgr SM;
+  JSONParser Parser(Input, &SM);
+  EXPECT_TRUE(Parser.validate()) << Message << ": " << Input;
+}
+
+TEST(JSONParser, FailsOnEmptyString) {
+  ExpectParseError("Empty JSON text", "");
+}
+ 
+TEST(JSONParser, FailsIfStartsWithString) {
+  ExpectParseError("Top-level string", "\"x\"");
+}
+
+TEST(JSONParser, ParsesEmptyArray) {
+  ExpectParseSuccess("Empty array", "[]");
+}
+
+TEST(JSONParser, FailsIfNotClosingArray) {
+  ExpectParseError("Not closing array", "[");
+  ExpectParseError("Not closing array", "  [  ");
+  ExpectParseError("Not closing array", "  [x");
+}
+
+TEST(JSONParser, ParsesEmptyArrayWithWhitespace) {
+  ExpectParseSuccess("Array with spaces", "  [  ]  ");
+  ExpectParseSuccess("All whitespaces", "\t\r\n[\t\n \t\r ]\t\r \n\n");
+}
+
+TEST(JSONParser, ParsesEmptyObject) {
+  ExpectParseSuccess("Empty object", "[{}]");
+}
+
+TEST(JSONParser, ParsesObject) {
+  ExpectParseSuccess("Object with an entry", "[{\"a\":\"/b\"}]");
+}
+
+TEST(JSONParser, ParsesMultipleKeyValuePairsInObject) {
+  ExpectParseSuccess("Multiple key, value pairs",
+                     "[{\"a\":\"/b\",\"c\":\"d\",\"e\":\"f\"}]");
+}
+
+TEST(JSONParser, FailsIfNotClosingObject) {
+  ExpectParseError("Missing close on empty", "[{]");
+  ExpectParseError("Missing close after pair", "[{\"a\":\"b\"]");
+}
+
+TEST(JSONParser, FailsIfMissingColon) {
+  ExpectParseError("Missing colon between key and value", "[{\"a\"\"/b\"}]");
+  ExpectParseError("Missing colon between key and value", "[{\"a\" \"b\"}]");
+}
+
+TEST(JSONParser, FailsOnMissingQuote) {
+  ExpectParseError("Missing open quote", "[{a\":\"b\"}]");
+  ExpectParseError("Missing closing quote", "[{\"a\":\"b}]");
+}
+
+TEST(JSONParser, ParsesEscapedQuotes) {
+  ExpectParseSuccess("Parses escaped string in key and value",
+                     "[{\"a\":\"\\\"b\\\"  \\\" \\\"\"}]");
+}
+
+TEST(JSONParser, ParsesEmptyString) {
+  ExpectParseSuccess("Parses empty string in value", "[{\"a\":\"\"}]");
+}
+
+TEST(JSONParser, FailsOnMissingString) {
+  ExpectParseError("Missing value", "[{\"a\":}]");
+  ExpectParseError("Missing key", "[{:\"b\"}]");
+}
+
+TEST(JSONParser, ParsesMultipleObjects) {
+  ExpectParseSuccess(
+      "Multiple objects in array",
+      "["
+      " { \"a\" : \"b\" },"
+      " { \"a\" : \"b\" },"
+      " { \"a\" : \"b\" }"
+      "]");
+}
+
+TEST(JSONParser, FailsOnMissingComma) {
+  ExpectParseError(
+      "Missing comma",
+      "["
+      " { \"a\" : \"b\" }"
+      " { \"a\" : \"b\" }"
+      "]");
+}
+
+TEST(JSONParser, FailsOnSuperfluousComma) {
+  ExpectParseError("Superfluous comma in array", "[ { \"a\" : \"b\" }, ]");
+  ExpectParseError("Superfluous comma in object", "{ \"a\" : \"b\", }");
+}
+
+TEST(JSONParser, ParsesSpacesInBetweenTokens) {
+  ExpectParseSuccess(
+      "Various whitespace between tokens",
+      " \t \n\n \r [ \t \n\n \r"
+      " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :"
+      " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r,\t \n\n \r"
+      " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :"
+      " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r]\t \n\n \r");
+}
+
+TEST(JSONParser, ParsesArrayOfArrays) {
+  ExpectParseSuccess("Array of arrays", "[[]]");
+}
+
+TEST(JSONParser, HandlesEndOfFileGracefully) {
+  ExpectParseError("In string starting with EOF", "[\"");
+  ExpectParseError("In string hitting EOF", "[\"   ");
+  ExpectParseError("In string escaping EOF", "[\"  \\");
+  ExpectParseError("In array starting with EOF", "[");
+  ExpectParseError("In array element starting with EOF", "[[], ");
+  ExpectParseError("In array hitting EOF", "[[] ");
+  ExpectParseError("In array hitting EOF", "[[]");
+  ExpectParseError("In object hitting EOF", "{\"\"");
+}
+
+// Checks that the given string can be parsed into an identical string inside
+// of an array.
+static void ExpectCanParseString(StringRef String) {
+  std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str();
+  SourceMgr SM;
+  JSONParser Parser(StringInArray, &SM);
+  const JSONArray *ParsedArray = dyn_cast<JSONArray>(Parser.parseRoot());
+  StringRef ParsedString =
+      dyn_cast<JSONString>(*ParsedArray->begin())->getRawText();
+  EXPECT_EQ(String, ParsedString.str());
+}
+
+// Checks that parsing the given string inside an array fails.
+static void ExpectCannotParseString(StringRef String) {
+  std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str();
+  ExpectParseError((Twine("When parsing string \"") + String + "\"").str(),
+                   StringInArray);
+}
+
+TEST(JSONParser, ParsesStrings) {
+  ExpectCanParseString("");
+  ExpectCannotParseString("\\");
+  ExpectCannotParseString("\"");
+  ExpectCanParseString(" ");
+  ExpectCanParseString("\\ ");
+  ExpectCanParseString("\\\"");
+  ExpectCannotParseString("\"\\");
+  ExpectCannotParseString(" \\");
+  ExpectCanParseString("\\\\");
+  ExpectCannotParseString("\\\\\\");
+  ExpectCanParseString("\\\\\\\\");
+  ExpectCanParseString("\\\" ");
+  ExpectCannotParseString("\\\\\" ");
+  ExpectCanParseString("\\\\\\\" ");
+  ExpectCanParseString("    \\\\  \\\"  \\\\\\\"   ");
+}
+
+TEST(JSONParser, WorksWithIteratorAlgorithms) {
+  SourceMgr SM;
+  JSONParser Parser("[\"1\", \"2\", \"3\", \"4\", \"5\", \"6\"]", &SM);
+  const JSONArray *Array = dyn_cast<JSONArray>(Parser.parseRoot());
+  EXPECT_EQ(6, std::distance(Array->begin(), Array->end()));
+}
+
+} // end namespace llvm
diff --git a/unittests/Support/ManagedStatic.cpp b/unittests/Support/ManagedStatic.cpp
new file mode 100644
index 000000000000..bfeb0a7b6fba
--- /dev/null
+++ b/unittests/Support/ManagedStatic.cpp
@@ -0,0 +1,44 @@
+//===- llvm/unittest/Support/ManagedStatic.cpp - ManagedStatic tests ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Config/config.h"
+#ifdef HAVE_PTHREAD_H
+#include <pthread.h>
+#endif
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+#ifdef HAVE_PTHREAD_H
+namespace test1 {
+  llvm::ManagedStatic<int> ms;
+  void *helper(void*) {
+    *ms;
+    return NULL;
+  }
+}
+
+TEST(Initialize, MultipleThreads) {
+  // Run this test under tsan: http://code.google.com/p/data-race-test/
+
+  llvm_start_multithreaded();
+  pthread_t t1, t2;
+  pthread_create(&t1, NULL, test1::helper, NULL);
+  pthread_create(&t2, NULL, test1::helper, NULL);
+  pthread_join(t1, NULL);
+  pthread_join(t2, NULL);
+  llvm_stop_multithreaded();
+}
+#endif
+
+} // anonymous namespace
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index 60d08bc92dbe..358dad0f838f 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -183,6 +183,11 @@ TEST_F(FileSystemTest, TempFiles) {
   ASSERT_NO_ERROR(fs::unique_file("%%-%%-%%-%%.temp", FD2, TempPath2));
   ASSERT_NE(TempPath.str(), TempPath2.str());
 
+  fs::file_status A, B;
+  ASSERT_NO_ERROR(fs::status(Twine(TempPath), A));
+  ASSERT_NO_ERROR(fs::status(Twine(TempPath2), B));
+  EXPECT_FALSE(fs::equivalent(A, B));
+
   // Try to copy the first to the second.
   EXPECT_EQ(
     fs::copy_file(Twine(TempPath), Twine(TempPath2)), errc::file_exists);
@@ -204,6 +209,9 @@ TEST_F(FileSystemTest, TempFiles) {
   bool equal;
   ASSERT_NO_ERROR(fs::equivalent(Twine(TempPath), Twine(TempPath2), equal));
   EXPECT_TRUE(equal);
+  ASSERT_NO_ERROR(fs::status(Twine(TempPath), A));
+  ASSERT_NO_ERROR(fs::status(Twine(TempPath2), B));
+  EXPECT_TRUE(fs::equivalent(A, B));
 
   // Remove Temp1.
   ::close(FileDescriptor);
@@ -223,6 +231,60 @@ TEST_F(FileSystemTest, DirectoryIteration) {
   error_code ec;
   for (fs::directory_iterator i(".", ec), e; i != e; i.increment(ec))
     ASSERT_NO_ERROR(ec);
+
+  // Create a known hierarchy to recurse over.
+  bool existed;
+  ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory)
+                  + "/recursive/a0/aa1", existed));
+  ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory)
+                  + "/recursive/a0/ab1", existed));
+  ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory)
+                  + "/recursive/dontlookhere/da1", existed));
+  ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory)
+                  + "/recursive/z0/za1", existed));
+  ASSERT_NO_ERROR(fs::create_directories(Twine(TestDirectory)
+                  + "/recursive/pop/p1", existed));
+  typedef std::vector<std::string> v_t;
+  v_t visited;
+  for (fs::recursive_directory_iterator i(Twine(TestDirectory)
+         + "/recursive", ec), e; i != e; i.increment(ec)){
+    ASSERT_NO_ERROR(ec);
+    if (path::filename(i->path()) == "p1") {
+      i.pop();
+      // FIXME: recursive_directory_iterator should be more robust.
+      if (i == e) break;
+    }
+    if (path::filename(i->path()) == "dontlookhere")
+      i.no_push();
+    visited.push_back(path::filename(i->path()));
+  }
+  v_t::const_iterator a0 = std::find(visited.begin(), visited.end(), "a0");
+  v_t::const_iterator aa1 = std::find(visited.begin(), visited.end(), "aa1");
+  v_t::const_iterator ab1 = std::find(visited.begin(), visited.end(), "ab1");
+  v_t::const_iterator dontlookhere = std::find(visited.begin(), visited.end(),
+                                               "dontlookhere");
+  v_t::const_iterator da1 = std::find(visited.begin(), visited.end(), "da1");
+  v_t::const_iterator z0 = std::find(visited.begin(), visited.end(), "z0");
+  v_t::const_iterator za1 = std::find(visited.begin(), visited.end(), "za1");
+  v_t::const_iterator pop = std::find(visited.begin(), visited.end(), "pop");
+  v_t::const_iterator p1 = std::find(visited.begin(), visited.end(), "p1");
+
+  // Make sure that each path was visited correctly.
+  ASSERT_NE(a0, visited.end());
+  ASSERT_NE(aa1, visited.end());
+  ASSERT_NE(ab1, visited.end());
+  ASSERT_NE(dontlookhere, visited.end());
+  ASSERT_EQ(da1, visited.end()); // Not visited.
+  ASSERT_NE(z0, visited.end());
+  ASSERT_NE(za1, visited.end());
+  ASSERT_NE(pop, visited.end());
+  ASSERT_EQ(p1, visited.end()); // Not visited.
+
+  // Make sure that parents were visited before children. No other ordering
+  // guarantees can be made across siblings.
+  ASSERT_LT(a0, aa1);
+  ASSERT_LT(a0, ab1);
+  ASSERT_LT(z0, za1);
 }
 
 TEST_F(FileSystemTest, Magic) {
diff --git a/unittests/Support/YAMLParserTest.cpp b/unittests/Support/YAMLParserTest.cpp
new file mode 100644
index 000000000000..e88427ac09d3
--- /dev/null
+++ b/unittests/Support/YAMLParserTest.cpp
@@ -0,0 +1,179 @@
+//===- unittest/Support/YAMLParserTest ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/YAMLParser.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+
+// Checks that the given input gives a parse error. Makes sure that an error
+// text is available and the parse fails.
+static void ExpectParseError(StringRef Message, StringRef Input) {
+  SourceMgr SM;
+  yaml::Stream Stream(Input, SM);
+  EXPECT_FALSE(Stream.validate()) << Message << ": " << Input;
+  EXPECT_TRUE(Stream.failed()) << Message << ": " << Input;
+}
+
+// Checks that the given input can be parsed without error.
+static void ExpectParseSuccess(StringRef Message, StringRef Input) {
+  SourceMgr SM;
+  yaml::Stream Stream(Input, SM);
+  EXPECT_TRUE(Stream.validate()) << Message << ": " << Input;
+}
+
+TEST(YAMLParser, ParsesEmptyArray) {
+  ExpectParseSuccess("Empty array", "[]");
+}
+
+TEST(YAMLParser, FailsIfNotClosingArray) {
+  ExpectParseError("Not closing array", "[");
+  ExpectParseError("Not closing array", "  [  ");
+  ExpectParseError("Not closing array", "  [x");
+}
+
+TEST(YAMLParser, ParsesEmptyArrayWithWhitespace) {
+  ExpectParseSuccess("Array with spaces", "  [  ]  ");
+  ExpectParseSuccess("All whitespaces", "\t\r\n[\t\n \t\r ]\t\r \n\n");
+}
+
+TEST(YAMLParser, ParsesEmptyObject) {
+  ExpectParseSuccess("Empty object", "[{}]");
+}
+
+TEST(YAMLParser, ParsesObject) {
+  ExpectParseSuccess("Object with an entry", "[{\"a\":\"/b\"}]");
+}
+
+TEST(YAMLParser, ParsesMultipleKeyValuePairsInObject) {
+  ExpectParseSuccess("Multiple key, value pairs",
+                     "[{\"a\":\"/b\",\"c\":\"d\",\"e\":\"f\"}]");
+}
+
+TEST(YAMLParser, FailsIfNotClosingObject) {
+  ExpectParseError("Missing close on empty", "[{]");
+  ExpectParseError("Missing close after pair", "[{\"a\":\"b\"]");
+}
+
+TEST(YAMLParser, FailsIfMissingColon) {
+  ExpectParseError("Missing colon between key and value", "[{\"a\"\"/b\"}]");
+  ExpectParseError("Missing colon between key and value", "[{\"a\" \"b\"}]");
+}
+
+TEST(YAMLParser, FailsOnMissingQuote) {
+  ExpectParseError("Missing open quote", "[{a\":\"b\"}]");
+  ExpectParseError("Missing closing quote", "[{\"a\":\"b}]");
+}
+
+TEST(YAMLParser, ParsesEscapedQuotes) {
+  ExpectParseSuccess("Parses escaped string in key and value",
+                     "[{\"a\":\"\\\"b\\\"  \\\" \\\"\"}]");
+}
+
+TEST(YAMLParser, ParsesEmptyString) {
+  ExpectParseSuccess("Parses empty string in value", "[{\"a\":\"\"}]");
+}
+
+TEST(YAMLParser, ParsesMultipleObjects) {
+  ExpectParseSuccess(
+      "Multiple objects in array",
+      "["
+      " { \"a\" : \"b\" },"
+      " { \"a\" : \"b\" },"
+      " { \"a\" : \"b\" }"
+      "]");
+}
+
+TEST(YAMLParser, FailsOnMissingComma) {
+  ExpectParseError(
+      "Missing comma",
+      "["
+      " { \"a\" : \"b\" }"
+      " { \"a\" : \"b\" }"
+      "]");
+}
+
+TEST(YAMLParser, ParsesSpacesInBetweenTokens) {
+  ExpectParseSuccess(
+      "Various whitespace between tokens",
+      " \t \n\n \r [ \t \n\n \r"
+      " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :"
+      " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r,\t \n\n \r"
+      " \t \n\n \r { \t \n\n \r\"a\"\t \n\n \r :"
+      " \t \n\n \r \"b\"\t \n\n \r } \t \n\n \r]\t \n\n \r");
+}
+
+TEST(YAMLParser, ParsesArrayOfArrays) {
+  ExpectParseSuccess("Array of arrays", "[[]]");
+}
+
+TEST(YAMLParser, HandlesEndOfFileGracefully) {
+  ExpectParseError("In string starting with EOF", "[\"");
+  ExpectParseError("In string hitting EOF", "[\"   ");
+  ExpectParseError("In string escaping EOF", "[\"  \\");
+  ExpectParseError("In array starting with EOF", "[");
+  ExpectParseError("In array element starting with EOF", "[[], ");
+  ExpectParseError("In array hitting EOF", "[[] ");
+  ExpectParseError("In array hitting EOF", "[[]");
+  ExpectParseError("In object hitting EOF", "{\"\"");
+}
+
+// Checks that the given string can be parsed into an identical string inside
+// of an array.
+static void ExpectCanParseString(StringRef String) {
+  std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str();
+  SourceMgr SM;
+  yaml::Stream Stream(StringInArray, SM);
+  yaml::SequenceNode *ParsedSequence
+    = dyn_cast<yaml::SequenceNode>(Stream.begin()->getRoot());
+  StringRef ParsedString
+    = dyn_cast<yaml::ScalarNode>(
+      static_cast<yaml::Node*>(ParsedSequence->begin()))->getRawValue();
+  ParsedString = ParsedString.substr(1, ParsedString.size() - 2);
+  EXPECT_EQ(String, ParsedString.str());
+}
+
+// Checks that parsing the given string inside an array fails.
+static void ExpectCannotParseString(StringRef String) {
+  std::string StringInArray = (llvm::Twine("[\"") + String + "\"]").str();
+  ExpectParseError((Twine("When parsing string \"") + String + "\"").str(),
+                   StringInArray);
+}
+
+TEST(YAMLParser, ParsesStrings) {
+  ExpectCanParseString("");
+  ExpectCannotParseString("\\");
+  ExpectCannotParseString("\"");
+  ExpectCanParseString(" ");
+  ExpectCanParseString("\\ ");
+  ExpectCanParseString("\\\"");
+  ExpectCannotParseString("\"\\");
+  ExpectCannotParseString(" \\");
+  ExpectCanParseString("\\\\");
+  ExpectCannotParseString("\\\\\\");
+  ExpectCanParseString("\\\\\\\\");
+  ExpectCanParseString("\\\" ");
+  ExpectCannotParseString("\\\\\" ");
+  ExpectCanParseString("\\\\\\\" ");
+  ExpectCanParseString("    \\\\  \\\"  \\\\\\\"   ");
+}
+
+TEST(YAMLParser, WorksWithIteratorAlgorithms) {
+  SourceMgr SM;
+  yaml::Stream Stream("[\"1\", \"2\", \"3\", \"4\", \"5\", \"6\"]", SM);
+  yaml::SequenceNode *Array
+    = dyn_cast<yaml::SequenceNode>(Stream.begin()->getRoot());
+  EXPECT_EQ(6, std::distance(Array->begin(), Array->end()));
+}
+
+} // end namespace llvm
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index 1b858695b1d3..4243b2d39de7 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -17,6 +17,7 @@
 
 using namespace llvm;
 
+namespace {
 class CloneInstruction : public ::testing::Test {
 protected:
   virtual void SetUp() {
@@ -47,6 +48,7 @@ protected:
   LLVMContext context;
   Value *V;
 };
+}
 
 TEST_F(CloneInstruction, OverflowBits) {
   V = new Argument(Type::getInt32Ty(context));
diff --git a/unittests/VMCore/DominatorTreeTest.cpp b/unittests/VMCore/DominatorTreeTest.cpp
new file mode 100644
index 000000000000..f6a90605a716
--- /dev/null
+++ b/unittests/VMCore/DominatorTreeTest.cpp
@@ -0,0 +1,195 @@
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeDPassPass(PassRegistry&);
+
+  namespace {
+    struct DPass : public FunctionPass {
+      static char ID;
+      virtual bool runOnFunction(Function &F) {
+        DominatorTree *DT = &getAnalysis<DominatorTree>();
+        Function::iterator FI = F.begin();
+
+        BasicBlock *BB0 = FI++;
+        BasicBlock::iterator BBI = BB0->begin();
+        Instruction *Y1 = BBI++;
+        Instruction *Y2 = BBI++;
+        Instruction *Y3 = BBI++;
+
+        BasicBlock *BB1 = FI++;
+        BBI = BB1->begin();
+        Instruction *Y4 = BBI++;
+
+        BasicBlock *BB2 = FI++;
+        BBI = BB2->begin();
+        Instruction *Y5 = BBI++;
+
+        BasicBlock *BB3 = FI++;
+        BBI = BB3->begin();
+        Instruction *Y6 = BBI++;
+        Instruction *Y7 = BBI++;
+
+        BasicBlock *BB4 = FI++;
+        BBI = BB4->begin();
+        Instruction *Y8 = BBI++;
+        Instruction *Y9 = BBI++;
+
+        // Reachability
+        EXPECT_TRUE(DT->isReachableFromEntry(BB0));
+        EXPECT_TRUE(DT->isReachableFromEntry(BB1));
+        EXPECT_TRUE(DT->isReachableFromEntry(BB2));
+        EXPECT_FALSE(DT->isReachableFromEntry(BB3));
+        EXPECT_TRUE(DT->isReachableFromEntry(BB4));
+
+        // BB dominance
+        EXPECT_TRUE(DT->dominates(BB0, BB0));
+        EXPECT_TRUE(DT->dominates(BB0, BB1));
+        EXPECT_TRUE(DT->dominates(BB0, BB2));
+        EXPECT_TRUE(DT->dominates(BB0, BB3));
+        EXPECT_TRUE(DT->dominates(BB0, BB4));
+
+        EXPECT_FALSE(DT->dominates(BB1, BB0));
+        EXPECT_TRUE(DT->dominates(BB1, BB1));
+        EXPECT_FALSE(DT->dominates(BB1, BB2));
+        EXPECT_TRUE(DT->dominates(BB1, BB3));
+        EXPECT_FALSE(DT->dominates(BB1, BB4));
+
+        EXPECT_FALSE(DT->dominates(BB2, BB0));
+        EXPECT_FALSE(DT->dominates(BB2, BB1));
+        EXPECT_TRUE(DT->dominates(BB2, BB2));
+        EXPECT_TRUE(DT->dominates(BB2, BB3));
+        EXPECT_FALSE(DT->dominates(BB2, BB4));
+
+        EXPECT_FALSE(DT->dominates(BB3, BB0));
+        EXPECT_FALSE(DT->dominates(BB3, BB1));
+        EXPECT_FALSE(DT->dominates(BB3, BB2));
+        EXPECT_TRUE(DT->dominates(BB3, BB3));
+        EXPECT_FALSE(DT->dominates(BB3, BB4));
+
+        // BB proper dominance
+        EXPECT_FALSE(DT->properlyDominates(BB0, BB0));
+        EXPECT_TRUE(DT->properlyDominates(BB0, BB1));
+        EXPECT_TRUE(DT->properlyDominates(BB0, BB2));
+        EXPECT_TRUE(DT->properlyDominates(BB0, BB3));
+
+        EXPECT_FALSE(DT->properlyDominates(BB1, BB0));
+        EXPECT_FALSE(DT->properlyDominates(BB1, BB1));
+        EXPECT_FALSE(DT->properlyDominates(BB1, BB2));
+        EXPECT_TRUE(DT->properlyDominates(BB1, BB3));
+
+        EXPECT_FALSE(DT->properlyDominates(BB2, BB0));
+        EXPECT_FALSE(DT->properlyDominates(BB2, BB1));
+        EXPECT_FALSE(DT->properlyDominates(BB2, BB2));
+        EXPECT_TRUE(DT->properlyDominates(BB2, BB3));
+
+        EXPECT_FALSE(DT->properlyDominates(BB3, BB0));
+        EXPECT_FALSE(DT->properlyDominates(BB3, BB1));
+        EXPECT_FALSE(DT->properlyDominates(BB3, BB2));
+        EXPECT_FALSE(DT->properlyDominates(BB3, BB3));
+
+        // Instruction dominance in the same reachable BB
+        EXPECT_FALSE(DT->dominates(Y1, Y1));
+        EXPECT_TRUE(DT->dominates(Y1, Y2));
+        EXPECT_FALSE(DT->dominates(Y2, Y1));
+        EXPECT_FALSE(DT->dominates(Y2, Y2));
+
+        // Instruction dominance in the same unreachable BB
+        EXPECT_TRUE(DT->dominates(Y6, Y6));
+        EXPECT_TRUE(DT->dominates(Y6, Y7));
+        EXPECT_TRUE(DT->dominates(Y7, Y6));
+        EXPECT_TRUE(DT->dominates(Y7, Y7));
+
+        // Invoke
+        EXPECT_TRUE(DT->dominates(Y3, Y4));
+        EXPECT_FALSE(DT->dominates(Y3, Y5));
+
+        // Phi
+        EXPECT_TRUE(DT->dominates(Y2, Y9));
+        EXPECT_FALSE(DT->dominates(Y3, Y9));
+        EXPECT_FALSE(DT->dominates(Y8, Y9));
+
+        // Anything dominates unreachable
+        EXPECT_TRUE(DT->dominates(Y1, Y6));
+        EXPECT_TRUE(DT->dominates(Y3, Y6));
+
+        // Unreachable doesn't dominate reachable
+        EXPECT_FALSE(DT->dominates(Y6, Y1));
+
+        // Instruction, BB dominance
+        EXPECT_FALSE(DT->dominates(Y1, BB0));
+        EXPECT_TRUE(DT->dominates(Y1, BB1));
+        EXPECT_TRUE(DT->dominates(Y1, BB2));
+        EXPECT_TRUE(DT->dominates(Y1, BB3));
+        EXPECT_TRUE(DT->dominates(Y1, BB4));
+
+        EXPECT_FALSE(DT->dominates(Y3, BB0));
+        EXPECT_TRUE(DT->dominates(Y3, BB1));
+        EXPECT_FALSE(DT->dominates(Y3, BB2));
+        EXPECT_TRUE(DT->dominates(Y3, BB3));
+        EXPECT_FALSE(DT->dominates(Y3, BB4));
+
+        EXPECT_TRUE(DT->dominates(Y6, BB3));
+
+        return false;
+      }
+      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+        AU.addRequired<DominatorTree>();
+      }
+      DPass() : FunctionPass(ID) {
+        initializeDPassPass(*PassRegistry::getPassRegistry());
+      }
+    };
+    char DPass::ID = 0;
+
+
+    Module* makeLLVMModule(DPass *P) {
+      const char *ModuleStrig =
+        "declare i32 @g()\n" \
+        "define void @f(i32 %x) {\n" \
+        "bb0:\n" \
+        "  %y1 = add i32 %x, 1\n" \
+        "  %y2 = add i32 %x, 1\n" \
+        "  %y3 = invoke i32 @g() to label %bb1 unwind label %bb2\n" \
+        "bb1:\n" \
+        "  %y4 = add i32 %x, 1\n" \
+        "  br label %bb4\n" \
+        "bb2:\n" \
+        "  %y5 = landingpad i32 personality i32 ()* @g\n" \
+        "          cleanup\n" \
+        "  br label %bb4\n" \
+        "bb3:\n" \
+        "  %y6 = add i32 %x, 1\n" \
+        "  %y7 = add i32 %x, 1\n" \
+        "  ret void\n" \
+        "bb4:\n" \
+        "  %y8 = phi i32 [0, %bb2], [%y4, %bb1]\n"
+        "  %y9 = phi i32 [0, %bb2], [%y4, %bb1]\n"
+        "  ret void\n" \
+        "}\n";
+      LLVMContext &C = getGlobalContext();
+      SMDiagnostic Err;
+      return ParseAssemblyString(ModuleStrig, NULL, Err, C);
+    }
+
+    TEST(DominatorTree, Unreachable) {
+      DPass *P = new DPass();
+      Module *M = makeLLVMModule(P);
+      PassManager Passes;
+      Passes.add(P);
+      Passes.run(*M);
+    }
+  }
+}
+
+INITIALIZE_PASS_BEGIN(DPass, "dpass", "dpass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(DPass, "dpass", "dpass", false, false)
diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp
index f0197bb671ab..218a9a08c439 100644
--- a/unittests/VMCore/InstructionsTest.cpp
+++ b/unittests/VMCore/InstructionsTest.cpp
@@ -13,6 +13,8 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
@@ -129,5 +131,100 @@ TEST(InstructionsTest, CastInst) {
   EXPECT_EQ(CastInst::SExt, CastInst::getCastOpcode(c8, true, V8x64Ty, true));
 }
 
+
+
+TEST(InstructionsTest, VectorGep) {
+  LLVMContext &C(getGlobalContext());
+
+  // Type Definitions
+  PointerType *Ptri8Ty = PointerType::get(IntegerType::get(C, 8), 0);
+  PointerType *Ptri32Ty = PointerType::get(IntegerType::get(C, 8), 0);
+
+  VectorType *V2xi8PTy = VectorType::get(Ptri8Ty, 2);
+  VectorType *V2xi32PTy = VectorType::get(Ptri32Ty, 2);
+
+  // Test different aspects of the vector-of-pointers type
+  // and GEPs which use this type.
+  ConstantInt *Ci32a = ConstantInt::get(C, APInt(32, 1492));
+  ConstantInt *Ci32b = ConstantInt::get(C, APInt(32, 1948));
+  std::vector<Constant*> ConstVa(2, Ci32a);
+  std::vector<Constant*> ConstVb(2, Ci32b);
+  Constant *C2xi32a = ConstantVector::get(ConstVa);
+  Constant *C2xi32b = ConstantVector::get(ConstVb);
+
+  CastInst *PtrVecA = new IntToPtrInst(C2xi32a, V2xi32PTy);
+  CastInst *PtrVecB = new IntToPtrInst(C2xi32b, V2xi32PTy);
+
+  ICmpInst *ICmp0 = new ICmpInst(ICmpInst::ICMP_SGT, PtrVecA, PtrVecB);
+  ICmpInst *ICmp1 = new ICmpInst(ICmpInst::ICMP_ULT, PtrVecA, PtrVecB);
+  EXPECT_NE(ICmp0, ICmp1); // suppress warning.
+
+  GetElementPtrInst *Gep0 = GetElementPtrInst::Create(PtrVecA, C2xi32a);
+  GetElementPtrInst *Gep1 = GetElementPtrInst::Create(PtrVecA, C2xi32b);
+  GetElementPtrInst *Gep2 = GetElementPtrInst::Create(PtrVecB, C2xi32a);
+  GetElementPtrInst *Gep3 = GetElementPtrInst::Create(PtrVecB, C2xi32b);
+
+  CastInst *BTC0 = new BitCastInst(Gep0, V2xi8PTy);
+  CastInst *BTC1 = new BitCastInst(Gep1, V2xi8PTy);
+  CastInst *BTC2 = new BitCastInst(Gep2, V2xi8PTy);
+  CastInst *BTC3 = new BitCastInst(Gep3, V2xi8PTy);
+
+  Value *S0 = BTC0->stripPointerCasts();
+  Value *S1 = BTC1->stripPointerCasts();
+  Value *S2 = BTC2->stripPointerCasts();
+  Value *S3 = BTC3->stripPointerCasts();
+
+  EXPECT_NE(S0, Gep0);
+  EXPECT_NE(S1, Gep1);
+  EXPECT_NE(S2, Gep2);
+  EXPECT_NE(S3, Gep3);
+
+  int64_t Offset;
+  TargetData TD("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3"
+                "2:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80"
+                ":128:128-n8:16:32:64-S128");
+  // Make sure we don't crash
+  GetPointerBaseWithConstantOffset(Gep0, Offset, TD);
+  GetPointerBaseWithConstantOffset(Gep1, Offset, TD);
+  GetPointerBaseWithConstantOffset(Gep2, Offset, TD);
+  GetPointerBaseWithConstantOffset(Gep3, Offset, TD);
+
+  // Gep of Geps
+  GetElementPtrInst *GepII0 = GetElementPtrInst::Create(Gep0, C2xi32b);
+  GetElementPtrInst *GepII1 = GetElementPtrInst::Create(Gep1, C2xi32a);
+  GetElementPtrInst *GepII2 = GetElementPtrInst::Create(Gep2, C2xi32b);
+  GetElementPtrInst *GepII3 = GetElementPtrInst::Create(Gep3, C2xi32a);
+
+  EXPECT_EQ(GepII0->getNumIndices(), 1u);
+  EXPECT_EQ(GepII1->getNumIndices(), 1u);
+  EXPECT_EQ(GepII2->getNumIndices(), 1u);
+  EXPECT_EQ(GepII3->getNumIndices(), 1u);
+
+  EXPECT_FALSE(GepII0->hasAllZeroIndices());
+  EXPECT_FALSE(GepII1->hasAllZeroIndices());
+  EXPECT_FALSE(GepII2->hasAllZeroIndices());
+  EXPECT_FALSE(GepII3->hasAllZeroIndices());
+
+  delete GepII0;
+  delete GepII1;
+  delete GepII2;
+  delete GepII3;
+
+  delete BTC0;
+  delete BTC1;
+  delete BTC2;
+  delete BTC3;
+
+  delete Gep0;
+  delete Gep1;
+  delete Gep2;
+  delete Gep3;
+
+  delete ICmp0;
+  delete ICmp1;
+  delete PtrVecA;
+  delete PtrVecB;
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm
diff --git a/unittests/VMCore/Makefile b/unittests/VMCore/Makefile
index 1b2b69c6d60b..df55065e1916 100644
--- a/unittests/VMCore/Makefile
+++ b/unittests/VMCore/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 TESTNAME = VMCore
-LINK_COMPONENTS := core support target ipa
+LINK_COMPONENTS := core support target ipa asmparser
 
 include $(LEVEL)/Makefile.config
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/VMCore/MetadataTest.cpp b/unittests/VMCore/MetadataTest.cpp
index 12ac2e704c8e..08927a2ff526 100644
--- a/unittests/VMCore/MetadataTest.cpp
+++ b/unittests/VMCore/MetadataTest.cpp
@@ -90,13 +90,20 @@ TEST_F(MDNodeTest, Simple) {
   MDNode *n1 = MDNode::get(Context, V);
   Value *const c1 = n1;
   MDNode *n2 = MDNode::get(Context, c1);
+  Value *const c2 = n2;
   MDNode *n3 = MDNode::get(Context, V);
+  MDNode *n4 = MDNode::getIfExists(Context, V);
+  MDNode *n5 = MDNode::getIfExists(Context, c1);
+  MDNode *n6 = MDNode::getIfExists(Context, c2);
   EXPECT_NE(n1, n2);
 #ifdef ENABLE_MDNODE_UNIQUING
   EXPECT_EQ(n1, n3);
 #else
   (void) n3;
 #endif
+  EXPECT_EQ(n4, n1);
+  EXPECT_EQ(n5, n2);
+  EXPECT_EQ(n6, (Value*)0);
 
   EXPECT_EQ(3u, n1->getNumOperands());
   EXPECT_EQ(s1, n1->getOperand(0));
diff --git a/unittests/VMCore/ValueMapTest.cpp b/unittests/VMCore/ValueMapTest.cpp
index b4939208e7d4..9bed37dff33e 100644
--- a/unittests/VMCore/ValueMapTest.cpp
+++ b/unittests/VMCore/ValueMapTest.cpp
@@ -12,7 +12,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
 
 #include "gtest/gtest.h"
 
@@ -195,7 +195,7 @@ struct LockMutex : ValueMapConfig<KeyT> {
   }
   static sys::Mutex *getMutex(const ExtraData &Data) { return Data.M; }
 };
-#if ENABLE_THREADS
+#if LLVM_ENABLE_THREADS
 TYPED_TEST(ValueMapTest, LocksMutex) {
   sys::Mutex M(false);  // Not recursive.
   bool CalledRAUW = false, CalledDeleted = false;
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index f2255948658e..33f04ce64779 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -117,8 +117,9 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
 
   // Check that there is something on the line.
   if (PatternStr.empty()) {
-    SM.PrintMessage(PatternLoc, "found empty check string with prefix '" +
-                    CheckPrefix+":'", "error");
+    SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
+                    "found empty check string with prefix '" +
+                    CheckPrefix+":'");
     return true;
   }
 
@@ -144,7 +145,8 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
       size_t End = PatternStr.find("}}");
       if (End == StringRef::npos) {
         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
-                        "found start of regex string with no end '}}'","error");
+                        SourceMgr::DK_Error,
+                        "found start of regex string with no end '}}'");
         return true;
       }
 
@@ -173,7 +175,8 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
       size_t End = PatternStr.find("]]");
       if (End == StringRef::npos) {
         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
-                        "invalid named regex reference, no ]] found", "error");
+                        SourceMgr::DK_Error,
+                        "invalid named regex reference, no ]] found");
         return true;
       }
 
@@ -185,8 +188,8 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
       StringRef Name = MatchStr.substr(0, NameEnd);
 
       if (Name.empty()) {
-        SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
-                        "invalid name in named regex: empty name", "error");
+        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
+                        "invalid name in named regex: empty name");
         return true;
       }
 
@@ -194,14 +197,14 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
       for (unsigned i = 0, e = Name.size(); i != e; ++i)
         if (Name[i] != '_' && !isalnum(Name[i])) {
           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
-                          "invalid name in named regex", "error");
+                          SourceMgr::DK_Error, "invalid name in named regex");
           return true;
         }
 
       // Name can't start with a digit.
       if (isdigit(Name[0])) {
-        SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
-                        "invalid name in named regex", "error");
+        SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
+                        "invalid name in named regex");
         return true;
       }
 
@@ -266,8 +269,8 @@ bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen,
   Regex R(RegexStr);
   std::string Error;
   if (!R.isValid(Error)) {
-    SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()),
-                    "invalid regex: " + Error, "error");
+    SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()), SourceMgr::DK_Error,
+                    "invalid regex: " + Error);
     return true;
   }
 
@@ -383,8 +386,8 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
         OS.write_escaped(it->second) << "\"";
       }
 
-      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), OS.str(), "note",
-                      /*ShowLine=*/false);
+      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
+                      OS.str());
     }
   }
 
@@ -422,7 +425,7 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
   // line.
   if (Best && Best != StringRef::npos && BestQuality < 50) {
       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
-                      "possible intended match here", "note");
+                      SourceMgr::DK_Note, "possible intended match here");
 
     // FIXME: If we wanted to be really friendly we would show why the match
     // failed, as it can be hard to spot simple one character differences.
@@ -566,8 +569,9 @@ static bool ReadCheckFile(SourceMgr &SM,
     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
     if (IsCheckNext && CheckStrings.empty()) {
       SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
+                      SourceMgr::DK_Error,
                       "found '"+CheckPrefix+"-NEXT:' without previous '"+
-                      CheckPrefix+ ": line", "error");
+                      CheckPrefix+ ": line");
       return true;
     }
 
@@ -607,15 +611,15 @@ static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
                              StringRef Buffer,
                              StringMap<StringRef> &VariableTable) {
   // Otherwise, we have an error, emit an error message.
-  SM.PrintMessage(CheckStr.Loc, "expected string not found in input",
-                  "error");
+  SM.PrintMessage(CheckStr.Loc, SourceMgr::DK_Error,
+                  "expected string not found in input");
 
   // Print the "scanning from here" line.  If the current position is at the
   // end of a line, advance to the start of the next line.
   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
 
-  SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here",
-                  "note");
+  SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
+                  "scanning from here");
 
   // Allow the pattern to print additional information if desired.
   CheckStr.Pat.PrintFailureInfo(SM, Buffer, VariableTable);
@@ -710,25 +714,22 @@ int main(int argc, char **argv) {
 
       unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion);
       if (NumNewLines == 0) {
-        SM.PrintMessage(CheckStr.Loc,
-                    CheckPrefix+"-NEXT: is on the same line as previous match",
-                        "error");
+        SM.PrintMessage(CheckStr.Loc, SourceMgr::DK_Error,
+                    CheckPrefix+"-NEXT: is on the same line as previous match");
         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
-                        "'next' match was here", "note");
-        SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
-                        "previous match was here", "note");
+                        SourceMgr::DK_Note, "'next' match was here");
+        SM.PrintMessage(SMLoc::getFromPointer(LastMatch), SourceMgr::DK_Note,
+                        "previous match was here");
         return 1;
       }
 
       if (NumNewLines != 1) {
-        SM.PrintMessage(CheckStr.Loc,
-                        CheckPrefix+
-                        "-NEXT: is not on the line after the previous match",
-                        "error");
+        SM.PrintMessage(CheckStr.Loc, SourceMgr::DK_Error, CheckPrefix+
+                        "-NEXT: is not on the line after the previous match");
         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
-                        "'next' match was here", "note");
-        SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
-                        "previous match was here", "note");
+                        SourceMgr::DK_Note, "'next' match was here");
+        SM.PrintMessage(SMLoc::getFromPointer(LastMatch), SourceMgr::DK_Note,
+                        "previous match was here");
         return 1;
       }
     }
@@ -743,10 +744,10 @@ int main(int argc, char **argv) {
                                                              VariableTable);
       if (Pos == StringRef::npos) continue;
 
-      SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
-                      CheckPrefix+"-NOT: string occurred!", "error");
-      SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
-                      CheckPrefix+"-NOT: pattern specified here", "note");
+      SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), SourceMgr::DK_Error,
+                      CheckPrefix+"-NOT: string occurred!");
+      SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first, SourceMgr::DK_Note,
+                      CheckPrefix+"-NOT: pattern specified here");
       return 1;
     }
 
diff --git a/utils/GenLibDeps.pl b/utils/GenLibDeps.pl
index 0cd9e6ae0001..656250c7e3d9 100755
--- a/utils/GenLibDeps.pl
+++ b/utils/GenLibDeps.pl
@@ -96,7 +96,6 @@ if ($PEROBJ) {
     $libpath =~ s/^AsmPrinter/CodeGen\/AsmPrinter/;
     $libpath =~ s/^BitReader/Bitcode\/Reader/;
     $libpath =~ s/^BitWriter/Bitcode\/Writer/;
-    $libpath =~ s/^CBackend/Target\/CBackend/;
     $libpath =~ s/^CppBackend/Target\/CppBackend/;
     $libpath =~ s/^MSIL/Target\/MSIL/;
     $libpath =~ s/^Core/VMCore/;
@@ -138,7 +137,6 @@ if ($PEROBJ) {
     $libpath =~ s/^AsmPrinter/CodeGen\/AsmPrinter/;
     $libpath =~ s/^BitReader/Bitcode\/Reader/;
     $libpath =~ s/^BitWriter/Bitcode\/Writer/;
-    $libpath =~ s/^CBackend/Target\/CBackend/;
     $libpath =~ s/^CppBackend/Target\/CppBackend/;
     $libpath =~ s/^MSIL/Target\/MSIL/;
     $libpath =~ s/^Core/VMCore/;
diff --git a/utils/KillTheDoctor/KillTheDoctor.cpp b/utils/KillTheDoctor/KillTheDoctor.cpp
index 1ddae0bc8bb1..70713b25bf25 100644
--- a/utils/KillTheDoctor/KillTheDoctor.cpp
+++ b/utils/KillTheDoctor/KillTheDoctor.cpp
@@ -211,19 +211,6 @@ static error_code GetFileNameFromHandle(HANDLE FileHandle,
   }
 }
 
-static std::string QuoteProgramPathIfNeeded(StringRef Command) {
-  if (Command.find_first_of(' ') == StringRef::npos)
-    return Command;
-  else {
-    std::string ret;
-    ret.reserve(Command.size() + 3);
-    ret.push_back('"');
-    ret.append(Command.begin(), Command.end());
-    ret.push_back('"');
-    return ret;
-  }
-}
-
 /// @brief Find program using shell lookup rules.
 /// @param Program This is either an absolute path, relative path, or simple a
 ///        program name. Look in PATH for any programs that match. If no
@@ -269,39 +256,6 @@ static std::string FindProgram(const std::string &Program, error_code &ec) {
   return PathName;
 }
 
-static error_code EnableDebugPrivileges() {
-  HANDLE TokenHandle;
-  BOOL success = ::OpenProcessToken(::GetCurrentProcess(),
-                                    TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY,
-                                    &TokenHandle);
-  if (!success)
-    return windows_error(::GetLastError());
-
-  TokenScopedHandle Token(TokenHandle);
-  TOKEN_PRIVILEGES  TokenPrivileges;
-  LUID              LocallyUniqueID;
-
-  success = ::LookupPrivilegeValueA(NULL,
-                                    SE_DEBUG_NAME,
-                                    &LocallyUniqueID);
-  if (!success)
-    return windows_error(::GetLastError());
-
-  TokenPrivileges.PrivilegeCount = 1;
-  TokenPrivileges.Privileges[0].Luid = LocallyUniqueID;
-  TokenPrivileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-
-  success = ::AdjustTokenPrivileges(Token,
-                                    FALSE,
-                                    &TokenPrivileges,
-                                    sizeof(TOKEN_PRIVILEGES),
-                                    NULL,
-                                    NULL);
-  // The value of success is basically useless. Either way we are just returning
-  // the value of ::GetLastError().
-  return windows_error(::GetLastError());
-}
-
 static StringRef ExceptionCodeToString(DWORD ExceptionCode) {
   switch(ExceptionCode) {
   case EXCEPTION_ACCESS_VIOLATION: return "EXCEPTION_ACCESS_VIOLATION";
diff --git a/utils/LLVMBuild.txt b/utils/LLVMBuild.txt
new file mode 100644
index 000000000000..382bfd31447b
--- /dev/null
+++ b/utils/LLVMBuild.txt
@@ -0,0 +1,29 @@
+;===- ./utils/LLVMBuild.txt ------------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = TableGen unittest
+
+[component_0]
+type = Group
+name = BuildTools
+parent = $ROOT
+
+[component_1]
+type = Group
+name = UtilityTools
+parent = $ROOT
diff --git a/utils/Makefile b/utils/Makefile
index 9d4dc5c2f90b..b98376006edf 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -9,14 +9,11 @@
 
 LEVEL = ..
 PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \
-	      count fpcmp llvm-lit not unittest
+	      count fpcmp llvm-lit not unittest json-bench
 
-EXTRA_DIST := cgiplotNLT.pl check-each-file codegen-diff countloc.sh \
+EXTRA_DIST := check-each-file codegen-diff countloc.sh \
               DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
-	      getsrcs.sh importNLT.pl llvmdo llvmgrep llvm-native-gcc \
-	      llvm-native-gxx makellvm NightlyTest.gnuplot NightlyTest.pl \
-	      NightlyTestTemplate.html NLT.schema \
-	      parseNLT.pl plotNLT.pl profile.pl \
-	      webNLT.pl vim
+	      getsrcs.sh llvmdo llvmgrep llvm-native-gcc \
+	      llvm-native-gxx makellvm profile.pl vim
 
 include $(LEVEL)/Makefile.common
diff --git a/utils/NLT.schema b/utils/NLT.schema
deleted file mode 100644
index 4bcddbc9f7f6..000000000000
--- a/utils/NLT.schema
+++ /dev/null
@@ -1,8 +0,0 @@
-CREATE TABLE `Tests` (
-  `NAME` varchar(255) NOT NULL default '',
-  `RUN` date NOT NULL default '0000-00-00',
-  `TEST` varchar(32) NOT NULL default '',
-  `VALUE` double NOT NULL default '0',
-  KEY `name_index` (`NAME`)
-) ENGINE=MyISAM DEFAULT CHARSET=latin1
-
diff --git a/utils/NewNightlyTest.pl b/utils/NewNightlyTest.pl
deleted file mode 100755
index da806e9c0e32..000000000000
--- a/utils/NewNightlyTest.pl
+++ /dev/null
@@ -1,836 +0,0 @@
-#!/usr/bin/perl
-use POSIX qw(strftime);
-use File::Copy;
-use File::Find;
-use Socket;
-
-#
-# Program:  NewNightlyTest.pl
-#
-# Synopsis: Perform a series of tests which are designed to be run nightly.
-#           This is used to keep track of the status of the LLVM tree, tracking
-#           regressions and performance changes. Submits this information
-#           to llvm.org where it is placed into the nightlytestresults database.
-#
-# Syntax:   NightlyTest.pl [OPTIONS] [CVSROOT BUILDDIR WEBDIR]
-#   where
-# OPTIONS may include one or more of the following:
-#
-# MAIN OPTIONS:
-#  -config LLVMPATH If specified, use an existing LLVM build and only run and
-#                   report the test information. The LLVMCONFIG argument should
-#                   be the path to the llvm-config executable in the LLVM build.
-#                   This should be the first argument if given. NOT YET
-#                   IMPLEMENTED.
-#  -nickname NAME   The NAME argument specifieds the nickname this script
-#                   will submit to the nightlytest results repository.
-#  -nouname         Don't include uname data (machine will be identified by nickname only).
-#  -submit-server   Specifies a server to submit the test results too. If this
-#                   option is not specified it defaults to
-#                   llvm.org. This is basically just the address of the
-#                   webserver
-#  -submit-script   Specifies which script to call on the submit server. If
-#                   this option is not specified it defaults to
-#                   /nightlytest/NightlyTestAccept.php. This is basically
-#                   everything after the www.yourserver.org.
-#  -submit-aux      If specified, an auxiliary script to run in addition to the
-#                   normal submit script. The script will be passed the path to
-#                   the "sentdata.txt" file as its sole argument.
-#  -nosubmit        Do not report the test results back to a submit server.
-#
-#
-# BUILD OPTIONS (not used with -config):
-#  -nocheckout      Do not create, checkout, update, or configure
-#                   the source tree.
-#  -noremove        Do not remove the BUILDDIR after it has been built.
-#  -noremoveresults Do not remove the WEBDIR after it has been built.
-#  -noclean         Do not run 'make clean' before building.
-#  -nobuild         Do not build llvm. If tests are enabled perform them
-#                   on the llvm build specified in the build directory
-#  -release         Build an LLVM Release+Asserts version
-#  -release-asserts Build an LLVM Release version
-#  -disable-bindings     Disable building LLVM bindings.
-#  -with-clang      Checkout Clang source into tools/clang.
-#  -compileflags    Next argument specifies extra options passed to make when
-#                   building LLVM.
-#  -use-gmake       Use gmake instead of the default make command to build
-#                   llvm and run tests.
-#  -llvmgccdir      Next argument specifies the llvm-gcc install prefix.
-#
-# TESTING OPTIONS:
-#  -notest          Do not even attempt to run the test programs.
-#  -nodejagnu       Do not run feature or regression tests
-#  -enable-llcbeta  Enable testing of beta features in llc.
-#  -enable-lli      Enable testing of lli (interpreter) features, default is off
-#  -disable-pic	    Disable building with Position Independent Code.
-#  -disable-llc     Disable LLC tests in the nightly tester.
-#  -disable-jit     Disable JIT tests in the nightly tester.
-#  -disable-cbe     Disable C backend tests in the nightly tester.
-#  -disable-lto     Disable link time optimization.
-#  -test-cflags     Next argument specifies that C compilation options that
-#                   override the default when running the testsuite.
-#  -test-cxxflags   Next argument specifies that C++ compilation options that
-#                   override the default when running the testsuite.
-#  -extraflags      Next argument specifies extra options that are passed to
-#                   compile the tests.
-#  -noexternals     Do not run the external tests (for cases where povray
-#                   or SPEC are not installed)
-#  -with-externals  Specify a directory where the external tests are located.
-#
-# OTHER OPTIONS:
-#  -parallel        Run parallel jobs with GNU Make (see -parallel-jobs).
-#  -parallel-jobs   The number of parallel Make jobs to use (default is two).
-#  -parallel-test   Allow parallel execution of llvm-test
-#  -verbose         Turn on some debug output
-#  -nice            Checkout/Configure/Build with "nice" to reduce impact
-#                   on busy servers.
-#  -f2c             Next argument specifies path to F2C utility
-#  -gccpath         Path to gcc/g++ used to build LLVM
-#  -target          Specify the target triplet
-#  -cflags          Next argument specifies that C compilation options that
-#                   override the default.
-#  -cxxflags        Next argument specifies that C++ compilation options that
-#                   override the default.
-#  -ldflags         Next argument specifies that linker options that override
-#                   the default.
-#
-# CVSROOT is ignored, it is passed for backwards compatibility.
-# BUILDDIR is the directory where sources for this test run will be checked out
-#  AND objects for this test run will be built. This directory MUST NOT
-#  exist before the script is run; it will be created by the svn checkout
-#  process and erased (unless -noremove is specified; see above.)
-# WEBDIR is the directory into which the test results web page will be written,
-#  AND in which the "index.html" is assumed to be a symlink to the most recent
-#  copy of the results. This directory will be created if it does not exist.
-# LLVMGCCDIR is the directory in which the LLVM GCC Front End is installed
-#  to. This is the same as you would have for a normal LLVM build.
-#
-##############################################################
-#
-# Getting environment variables
-#
-##############################################################
-my $HOME       = $ENV{'HOME'};
-my $SVNURL     = $ENV{"SVNURL"};
-$SVNURL        = 'http://llvm.org/svn/llvm-project' unless $SVNURL;
-my $TestSVNURL = $ENV{"TestSVNURL"};
-$TestSVNURL    = 'http://llvm.org/svn/llvm-project' unless $TestSVNURL;
-my $BuildDir   = $ENV{'BUILDDIR'};
-my $WebDir     = $ENV{'WEBDIR'};
-
-##############################################################
-#
-# Calculate the date prefix...
-#
-##############################################################
-use POSIX;
-@TIME = localtime;
-my $DATE = strftime("%Y-%m-%d_%H-%M-%S", localtime());
-
-##############################################################
-#
-# Parse arguments...
-#
-##############################################################
-$CONFIG_PATH="";
-$CONFIGUREARGS="";
-$nickname="";
-$NOTEST=0;
-$MAKECMD="make";
-$SUBMITSERVER = "llvm.org";
-$SUBMITSCRIPT = "/nightlytest/NightlyTestAccept.php";
-$SUBMITAUX="";
-$SUBMIT = 1;
-$PARALLELJOBS = "2";
-my $TESTFLAGS="";
-
-if ($ENV{'LLVMGCCDIR'}) {
-  $CONFIGUREARGS .= " --with-llvmgccdir=" . $ENV{'LLVMGCCDIR'};
-  $LLVMGCCPATH = $ENV{'LLVMGCCDIR'} . '/bin';
-}
-else {
-  $LLVMGCCPATH = "";
-}
-
-while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
-  shift;
-  last if /^--$/;  # Stop processing arguments on --
-
-  # List command line options here...
-  if (/^-config$/)         { $CONFIG_PATH = "$ARGV[0]"; shift; next; }
-  if (/^-nocheckout$/)     { $NOCHECKOUT = 1; next; }
-  if (/^-noclean$/)        { $NOCLEAN = 1; next; }
-  if (/^-noremove$/)       { $NOREMOVE = 1; next; }
-  if (/^-noremoveatend$/)  { $NOREMOVEATEND = 1; next; }
-  if (/^-noremoveresults$/){ $NOREMOVERESULTS = 1; next; }
-  if (/^-notest$/)         { $NOTEST = 1; next; }
-  if (/^-norunningtests$/) { next; } # Backward compatibility, ignored.
-  if (/^-parallel-jobs$/)  { $PARALLELJOBS = "$ARGV[0]"; shift; next;}
-  if (/^-parallel$/)       { $MAKEOPTS = "$MAKEOPTS -j$PARALLELJOBS"; next; }
-  if (/^-parallel-test$/)  { $PROGTESTOPTS .= " ENABLE_PARALLEL_REPORT=1"; next; }
-  if (/^-with-clang$/)     { $WITHCLANG = 1; next; }
-  if (/^-release$/)        { $MAKEOPTS = "$MAKEOPTS ENABLE_OPTIMIZED=1 ".
-                             "OPTIMIZE_OPTION=-O2"; next;}
-  if (/^-release-asserts$/){ $MAKEOPTS = "$MAKEOPTS ENABLE_OPTIMIZED=1 ".
-                             "DISABLE_ASSERTIONS=1 ".
-                             "OPTIMIZE_OPTION=-O2"; next;}
-  if (/^-enable-llcbeta$/) { $PROGTESTOPTS .= " ENABLE_LLCBETA=1"; next; }
-  if (/^-disable-pic$/)    { $CONFIGUREARGS .= " --enable-pic=no"; next; }
-  if (/^-enable-lli$/)     { $PROGTESTOPTS .= " ENABLE_LLI=1";
-                             $CONFIGUREARGS .= " --enable-lli"; next; }
-  if (/^-disable-llc$/)    { $PROGTESTOPTS .= " DISABLE_LLC=1";
-                             $CONFIGUREARGS .= " --disable-llc_diffs"; next; }
-  if (/^-disable-jit$/)    { $PROGTESTOPTS .= " DISABLE_JIT=1";
-                             $CONFIGUREARGS .= " --disable-jit"; next; }
-  if (/^-disable-bindings$/)    { $CONFIGUREARGS .= " --disable-bindings"; next; }
-  if (/^-disable-cbe$/)    { $PROGTESTOPTS .= " DISABLE_CBE=1"; next; }
-  if (/^-disable-lto$/)    { $PROGTESTOPTS .= " DISABLE_LTO=1"; next; }
-  if (/^-test-opts$/)      { $PROGTESTOPTS .= " $ARGV[0]"; shift; next; }
-  if (/^-verbose$/)        { $VERBOSE = 1; next; }
-  if (/^-teelogs$/)        { $TEELOGS = 1; next; }
-  if (/^-nice$/)           { $NICE = "nice "; next; }
-  if (/^-f2c$/)            { $CONFIGUREARGS .= " --with-f2c=$ARGV[0]";
-                             shift; next; }
-  if (/^-with-externals$/) { $CONFIGUREARGS .= " --with-externals=$ARGV[0]";
-                             shift; next; }
-  if (/^-configure-args$/) { $CONFIGUREARGS .= " $ARGV[0]";
-                             shift; next; }
-  if (/^-submit-server/)   { $SUBMITSERVER = "$ARGV[0]"; shift; next; }
-  if (/^-submit-script/)   { $SUBMITSCRIPT = "$ARGV[0]"; shift; next; }
-  if (/^-submit-aux/)      { $SUBMITAUX = "$ARGV[0]"; shift; next; }
-  if (/^-nosubmit$/)       { $SUBMIT = 0; next; }
-  if (/^-nickname$/)       { $nickname = "$ARGV[0]"; shift; next; }
-  if (/^-gccpath/)         { $CONFIGUREARGS .=
-                             " CC=$ARGV[0]/gcc CXX=$ARGV[0]/g++";
-                             $GCCPATH=$ARGV[0]; shift;  next; }
-  else                     { $GCCPATH=""; }
-  if (/^-target/)          { $CONFIGUREARGS .= " --target=$ARGV[0]";
-                             shift; next; }
-  if (/^-cflags/)          { $MAKEOPTS = "$MAKEOPTS C.Flags=\'$ARGV[0]\'";
-                             shift; next; }
-  if (/^-cxxflags/)        { $MAKEOPTS = "$MAKEOPTS CXX.Flags=\'$ARGV[0]\'";
-                             shift; next; }
-  if (/^-ldflags/)         { $MAKEOPTS = "$MAKEOPTS LD.Flags=\'$ARGV[0]\'";
-                             shift; next; }
-  if (/^-test-cflags/)     { $TESTFLAGS = "$TESTFLAGS CFLAGS=\'$ARGV[0]\'";
-                             shift; next; }
-  if (/^-test-cxxflags/)   { $TESTFLAGS = "$TESTFLAGS CXXFLAGS=\'$ARGV[0]\'";
-                             shift; next; }
-  if (/^-compileflags/)    { $MAKEOPTS = "$MAKEOPTS $ARGV[0]"; shift; next; }
-  if (/^-llvmgccdir/)      { $CONFIGUREARGS .= " --with-llvmgccdir=\'$ARGV[0]\'";
-                             $LLVMGCCPATH = $ARGV[0] . '/bin';
-                             shift; next;}
-  if (/^-noexternals$/)    { $NOEXTERNALS = 1; next; }
-  if (/^-nouname$/)        { $NOUNAME = 1; next; }
-  if (/^-use-gmake/)       { $MAKECMD = "gmake"; shift; next; }
-  if (/^-extraflags/)      { $CONFIGUREARGS .=
-                             " --with-extra-options=\'$ARGV[0]\'"; shift; next;}
-  if (/^-noexternals$/)    { $NOEXTERNALS = 1; next; }
-  if (/^-nodejagnu$/)      { next; }
-  if (/^-nobuild$/)        { $NOBUILD = 1; next; }
-  print "Unknown option: $_ : ignoring!\n";
-}
-
-if ($CONFIGUREARGS !~ /--disable-jit/) {
-  $CONFIGUREARGS .= " --enable-jit";
-}
-
-if (@ARGV != 0 and @ARGV != 3) {
-  die "error: must specify 0 or 3 options!";
-}
-
-if (@ARGV == 3) {
-  if ($CONFIG_PATH ne "") {
-      die "error: arguments are unsupported in -config mode,";
-  }
-
-  # ARGV[0] used to be the CVS root, ignored for backward compatibility.
-  $BuildDir   = $ARGV[1];
-  $WebDir     = $ARGV[2];
-}
-
-if ($CONFIG_PATH ne "") {
-  $BuildDir = "";
-  $SVNURL = $TestSVNURL = "";
-  if ($WebDir     eq "") {
-    die("please specify a web directory");
-  }
-} else {
-  if ($BuildDir   eq "" or
-      $WebDir     eq "") {
-    die("please specify a build directory, and a web directory");
-  }
-}
-
-if ($nickname eq "") {
-  die ("Please invoke NewNightlyTest.pl with command line option " .
-       "\"-nickname <nickname>\"");
-}
-
-my $LLVMSrcDir   = $ENV{'LLVMSRCDIR'};
-$LLVMSrcDir    = "$BuildDir/llvm" unless $LLVMSrcDir;
-my $LLVMObjDir   = $ENV{'LLVMOBJDIR'};
-$LLVMObjDir    = "$BuildDir/llvm" unless $LLVMObjDir;
-my $LLVMTestDir   = $ENV{'LLVMTESTDIR'};
-$LLVMTestDir    = "$BuildDir/llvm/projects/llvm-test" unless $LLVMTestDir;
-
-##############################################################
-#
-# Define the file names we'll use
-#
-##############################################################
-
-my $Prefix = "$WebDir/$DATE";
-my $SingleSourceLog = "$Prefix-SingleSource-ProgramTest.txt.gz";
-my $MultiSourceLog = "$Prefix-MultiSource-ProgramTest.txt.gz";
-my $ExternalLog = "$Prefix-External-ProgramTest.txt.gz";
-
-# These are only valid in non-config mode.
-my $ConfigureLog = "", $BuildLog = "", $COLog = "";
-my $DejagnuLog = "", $DejagnuSum = "", $DejagnuLog = "";
-
-# Are we in config mode?
-my $ConfigMode = 0;
-
-##############################################################
-#
-# Helper functions
-#
-##############################################################
-
-sub GetDir {
-  my $Suffix = shift;
-  opendir DH, $WebDir;
-  my @Result = reverse sort grep !/$DATE/, grep /[-0-9]+$Suffix/, readdir DH;
-  closedir DH;
-  return @Result;
-}
-
-sub RunLoggedCommand {
-  my $Command = shift;
-  my $Log = shift;
-  my $Title = shift;
-  if ($TEELOGS) {
-      if ($VERBOSE) {
-          print "$Title\n";
-          print "$Command 2>&1 | tee $Log\n";
-      }
-      system "$Command 2>&1 | tee $Log";
-  } else {
-      if ($VERBOSE) {
-          print "$Title\n";
-          print "$Command > $Log 2>&1\n";
-      }
-      system "$Command > $Log 2>&1";
-  }
-}
-
-sub RunAppendingLoggedCommand {
-  my $Command = shift;
-  my $Log = shift;
-  my $Title = shift;
-  if ($TEELOGS) {
-      if ($VERBOSE) {
-          print "$Title\n";
-          print "$Command 2>&1 | tee -a $Log\n";
-      }
-      system "$Command 2>&1 | tee -a $Log";
-  } else {
-      if ($VERBOSE) {
-          print "$Title\n";
-          print "$Command >> $Log 2>&1\n";
-      }
-      system "$Command >> $Log 2>&1";
-  }
-}
-
-sub GetRegex {   # (Regex with ()'s, value)
-  if ($_[1] =~ /$_[0]/m) {
-    return $1;
-  }
-  return "0";
-}
-
-sub ChangeDir { # directory, logical name
-  my ($dir,$name) = @_;
-  chomp($dir);
-  if ( $VERBOSE ) { print "Changing To: $name ($dir)\n"; }
-  $result = chdir($dir);
-  if (!$result) {
-    print "ERROR!!! Cannot change directory to: $name ($dir) because $!\n";
-    return false;
-  }
-  return true;
-}
-
-sub ReadFile {
-  if (open (FILE, $_[0])) {
-    undef $/;
-    my $Ret = <FILE>;
-    close FILE;
-    $/ = '\n';
-    return $Ret;
-  } else {
-    print "Could not open file '$_[0]' for reading!\n";
-    return "";
-  }
-}
-
-sub WriteFile {  # (filename, contents)
-  open (FILE, ">$_[0]") or die "Could not open file '$_[0]' for writing!\n";
-  print FILE $_[1];
-  close FILE;
-}
-
-sub CopyFile { #filename, newfile
-  my ($file, $newfile) = @_;
-  chomp($file);
-  if ($VERBOSE) { print "Copying $file to $newfile\n"; }
-  copy($file, $newfile);
-}
-
-#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-#
-# This function acts as a mini web browswer submitting data
-# to our central server via the post method
-#
-#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-sub WriteSentData {
-    $variables = $_[0];
-
-    # Write out the "...-sentdata.txt" file.
-
-    my $sentdata="";
-    foreach $x (keys (%$variables)){
-        $value = $variables->{$x};
-        $sentdata.= "$x  => $value\n";
-    }
-    WriteFile "$Prefix-sentdata.txt", $sentdata;
-}
-
-sub SendData {
-    $host = $_[0];
-    $file = $_[1];
-    $variables = $_[2];
-
-    if (!($SUBMITAUX eq "")) {
-        system "$SUBMITAUX \"$Prefix-sentdata.txt\"";
-    }
-
-    if (!$SUBMIT) {
-        return "Skipped standard submit.\n";
-    }
-
-    # Create the content to send to the server.
-
-    my $content;
-    foreach $key (keys (%$variables)){
-        $value = $variables->{$key};
-        $value =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
-        $content .= "$key=$value&";
-    }
-
-    # Send the data to the server.
-    #
-    # FIXME: This code should be more robust?
-
-    $port=80;
-    $socketaddr= sockaddr_in $port, inet_aton $host or die "Bad hostname\n";
-    socket SOCK, PF_INET, SOCK_STREAM, getprotobyname('tcp') or
-      die "Bad socket\n";
-    connect SOCK, $socketaddr or die "Bad connection\n";
-    select((select(SOCK), $| = 1)[0]);
-
-    $length = length($content);
-
-    my $send= "POST $file HTTP/1.0\n";
-    $send.= "Host: $host\n";
-    $send.= "Content-Type: application/x-www-form-urlencoded\n";
-    $send.= "Content-length: $length\n\n";
-    $send.= "$content";
-
-    print SOCK $send;
-    my $result;
-    while(<SOCK>){
-        $result  .= $_;
-    }
-    close(SOCK);
-
-    return $result;
-}
-
-##############################################################
-#
-# Individual Build & Test Functions
-#
-##############################################################
-
-# Create the source repository directory.
-sub CheckoutSource {
-  die "Invalid call!" unless $ConfigMode == 0;
-  if (-d $BuildDir) {
-    if (!$NOREMOVE) {
-      if ( $VERBOSE ) {
-        print "Build directory exists! Removing it\n";
-      }
-      system "rm -rf $BuildDir";
-      mkdir $BuildDir or die "Could not create checkout directory $BuildDir!";
-    } else {
-      if ( $VERBOSE ) {
-        print "Build directory exists!\n";
-      }
-    }
-  } else {
-    mkdir $BuildDir or die "Could not create checkout directory $BuildDir!";
-  }
-
-  ChangeDir( $BuildDir, "checkout directory" );
-  my $SVNCMD = "$NICE svn co --non-interactive";
-  RunLoggedCommand("( time -p $SVNCMD $SVNURL/llvm/trunk llvm; cd llvm/projects ; " .
-                   "  $SVNCMD $TestSVNURL/test-suite/trunk llvm-test )", $COLog,
-                   "CHECKOUT LLVM");
-  if ($WITHCLANG) {
-      RunLoggedCommand("( cd llvm/tools ; " .
-                       "  $SVNCMD $SVNURL/cfe/trunk clang )", $COLog,
-                       "CHECKOUT CLANG");
-  }
-}
-
-# Build the entire tree, saving build messages to the build log. Returns false
-# on build failure.
-sub BuildLLVM {
-  die "Invalid call!" unless $ConfigMode == 0;
-  my $EXTRAFLAGS = "--enable-spec --with-objroot=.";
-  RunLoggedCommand("(time -p $NICE ./configure $CONFIGUREARGS $EXTRAFLAGS) ",
-                   $ConfigureLog, "CONFIGURE");
-  # Build the entire tree, capturing the output into $BuildLog
-  if (!$NOCLEAN) {
-      RunAppendingLoggedCommand("($NICE $MAKECMD $MAKEOPTS clean)", $BuildLog, "BUILD CLEAN");
-  }
-  RunAppendingLoggedCommand("(time -p $NICE $MAKECMD $MAKEOPTS)", $BuildLog, "BUILD");
-
-  if (`grep -a '^$MAKECMD\[^:]*: .*Error' $BuildLog | wc -l` + 0 ||
-      `grep -a '^$MAKECMD: \*\*\*.*Stop.' $BuildLog | wc -l` + 0) {
-    return 0;
-  }
-
-  return 1;
-}
-
-# Run the named tests (i.e. "SingleSource" "MultiSource" "External")
-sub TestDirectory {
-  my $SubDir = shift;
-  ChangeDir( "$LLVMTestDir/$SubDir",
-             "Programs Test Subdirectory" ) || return ("", "");
-
-  my $ProgramTestLog = "$Prefix-$SubDir-ProgramTest.txt";
-
-  # Make sure to clean the test results.
-  RunLoggedCommand("$MAKECMD -k $MAKEOPTS $PROGTESTOPTS clean $TESTFLAGS",
-                   $ProgramTestLog, "TEST DIRECTORY $SubDir");
-
-  # Run the programs tests... creating a report.nightly.csv file.
-  my $LLCBetaOpts = "";
-  RunLoggedCommand("$MAKECMD -k $MAKEOPTS $PROGTESTOPTS report.nightly.csv ".
-                   "$TESTFLAGS TEST=nightly",
-                   $ProgramTestLog, "TEST DIRECTORY $SubDir");
-  $LLCBetaOpts = `$MAKECMD print-llcbeta-option`;
-
-  my $ProgramsTable;
-  if (`grep -a '^$MAKECMD\[^:]: .*Error' $ProgramTestLog | wc -l` + 0) {
-    $ProgramsTable="Error running test $SubDir\n";
-    print "ERROR TESTING\n";
-  } elsif (`grep -a '^$MAKECMD\[^:]: .*No rule to make target' $ProgramTestLog | wc -l` + 0) {
-    $ProgramsTable="Makefile error running tests $SubDir!\n";
-    print "ERROR TESTING\n";
-  } else {
-    # Create a list of the tests which were run...
-    system "egrep -a 'TEST-(PASS|FAIL)' < $ProgramTestLog ".
-           "| sort > $Prefix-$SubDir-Tests.txt";
-  }
-  $ProgramsTable = ReadFile "report.nightly.csv";
-
-  ChangeDir( "../../..", "Programs Test Parent Directory" );
-  return ($ProgramsTable, $LLCBetaOpts);
-}
-
-# Run all the nightly tests and return the program tables and the list of tests,
-# passes, fails, and xfails.
-sub RunNightlyTest() {
-  ($SSProgs, $llcbeta_options) = TestDirectory("SingleSource");
-  WriteFile "$Prefix-SingleSource-Performance.txt", $SSProgs;
-  ($MSProgs, $llcbeta_options) = TestDirectory("MultiSource");
-  WriteFile "$Prefix-MultiSource-Performance.txt", $MSProgs;
-  if ( ! $NOEXTERNALS ) {
-    ($ExtProgs, $llcbeta_options) = TestDirectory("External");
-    WriteFile "$Prefix-External-Performance.txt", $ExtProgs;
-    system "cat $Prefix-SingleSource-Tests.txt " .
-               "$Prefix-MultiSource-Tests.txt ".
-               "$Prefix-External-Tests.txt | sort > $Prefix-Tests.txt";
-    system "cat $Prefix-SingleSource-Performance.txt " .
-               "$Prefix-MultiSource-Performance.txt ".
-               "$Prefix-External-Performance.txt | sort > $Prefix-Performance.txt";
-  } else {
-    $ExtProgs = "External TEST STAGE SKIPPED\n";
-    if ( $VERBOSE ) {
-      print "External TEST STAGE SKIPPED\n";
-    }
-    system "cat $Prefix-SingleSource-Tests.txt " .
-               "$Prefix-MultiSource-Tests.txt ".
-               " | sort > $Prefix-Tests.txt";
-    system "cat $Prefix-SingleSource-Performance.txt " .
-               "$Prefix-MultiSource-Performance.txt ".
-               " | sort > $Prefix-Performance.txt";
-  }
-
-  # Compile passes, fails, xfails.
-  my $All = (ReadFile "$Prefix-Tests.txt");
-  my @TestSuiteResultLines = split "\n", $All;
-  my ($Passes, $Fails, $XFails) = "";
-
-  for ($x=0; $x < @TestSuiteResultLines; $x++) {
-    if (@TestSuiteResultLines[$x] =~ m/^PASS:/) {
-      $Passes .= "$TestSuiteResultLines[$x]\n";
-    }
-    elsif (@TestSuiteResultLines[$x] =~ m/^FAIL:/) {
-      $Fails .= "$TestSuiteResultLines[$x]\n";
-    }
-    elsif (@TestSuiteResultLines[$x] =~ m/^XFAIL:/) {
-      $XFails .= "$TestSuiteResultLines[$x]\n";
-    }
-  }
-
-  return ($SSProgs, $MSProgs, $ExtProgs, $All, $Passes, $Fails, $XFails);
-}
-
-##############################################################
-#
-# Initialize filenames
-#
-##############################################################
-
-if (! -d $WebDir) {
-  mkdir $WebDir, 0777 or die "Unable to create web directory: '$WebDir'.";
-  if($VERBOSE){
-    warn "$WebDir did not exist; creating it.\n";
-  }
-}
-
-if ($CONFIG_PATH ne "") {
-  $ConfigMode = 1;
-  $LLVMSrcDir = GetRegex "^(.*)\\s+", `$CONFIG_PATH --src-root`;
-  $LLVMObjDir = GetRegex "^(.*)\\s+", `$CONFIG_PATH --obj-root`;
-  # FIXME: Add llvm-config hook for this?
-  $LLVMTestDir = $LLVMObjDir . "/projects/test-suite";
-} else {
-  $ConfigureLog = "$Prefix-Configure-Log.txt";
-  $BuildLog = "$Prefix-Build-Log.txt";
-  $COLog = "$Prefix-CVS-Log.txt";
-}
-
-if ($VERBOSE) {
-  if ($CONFIG_PATH ne "") {
-    print "INITIALIZED (config mode)\n";
-    print "WebDir    = $WebDir\n";
-    print "Prefix    = $Prefix\n";
-    print "LLVM Src  = $LLVMSrcDir\n";
-    print "LLVM Obj  = $LLVMObjDir\n";
-    print "LLVM Test = $LLVMTestDir\n";
-  } else {
-    print "INITIALIZED\n";
-    print "SVN URL  = $SVNURL\n";
-    print "COLog    = $COLog\n";
-    print "BuildDir = $BuildDir\n";
-    print "WebDir   = $WebDir\n";
-    print "Prefix   = $Prefix\n";
-    print "BuildLog = $BuildLog\n";
-  }
-}
-
-##############################################################
-#
-# The actual NewNightlyTest logic.
-#
-##############################################################
-
-$starttime = `date "+20%y-%m-%d %H:%M:%S"`;
-
-my $BuildError = 0, $BuildStatus = "OK";
-if ($ConfigMode == 0) {
-  if (!$NOCHECKOUT) {
-    CheckoutSource();
-  }
-
-  # Build LLVM.
-  ChangeDir( $LLVMSrcDir , "llvm source directory") ;
-  if ($NOCHECKOUT || $NOBUILD) {
-    $BuildStatus = "Skipped by user";
-  } else {
-    if (!BuildLLVM()) {
-      if( $VERBOSE) { print  "\n***ERROR BUILDING TREE\n\n"; }
-      $BuildError = 1;
-      $BuildStatus = "Error: compilation aborted";
-    }
-  }
-}
-
-# Run the llvm-test tests.
-my ($SingleSourceProgramsTable, $MultiSourceProgramsTable, $ExternalProgramsTable,
-    $all_tests, $passes, $fails, $xfails) = "";
-if (!$NOTEST && !$BuildError) {
-  ($SingleSourceProgramsTable, $MultiSourceProgramsTable, $ExternalProgramsTable,
-   $all_tests, $passes, $fails, $xfails) = RunNightlyTest();
-}
-
-$endtime = `date "+20%y-%m-%d %H:%M:%S"`;
-
-# The last bit of logic is to remove the build and web dirs, after sending data
-# to the server.
-
-##############################################################
-#
-# Accumulate the information to send to the server.
-#
-##############################################################
-
-if ( $VERBOSE ) { print "PREPARING LOGS TO BE SENT TO SERVER\n"; }
-
-if ( ! $NOUNAME ) {
-    $machine_data = "uname: ".`uname -a`.
-        "hardware: ".`uname -m`.
-        "os: ".`uname -sr`.
-        "name: ".`uname -n`.
-        "date: ".`date \"+20%y-%m-%d\"`.
-        "time: ".`date +\"%H:%M:%S\"`;
-} else {
-    $machine_data = "uname: (excluded)\n".
-        "hardware: ".`uname -m`.
-        "os: ".`uname -sr`.
-        "name: $nickname\n".
-        "date: ".`date \"+20%y-%m-%d\"`.
-        "time: ".`date +\"%H:%M:%S\"`;
-}
-
-# Get gcc version.
-my $gcc_version_long = "";
-if ($GCCPATH ne "") {
-  $gcc_version_long = `$GCCPATH/gcc --version`;
-} elsif ($ENV{"CC"}) {
-  $gcc_version_long = `$ENV{"CC"} --version`;
-} else {
-  $gcc_version_long = `gcc --version`;
-}
-my $gcc_version = (split '\n', $gcc_version_long)[0];
-
-# Get llvm-gcc target triple.
-#
-# FIXME: This shouldn't be hardwired to llvm-gcc.
-my $llvmgcc_version_long = "";
-if ($LLVMGCCPATH ne "") {
-  $llvmgcc_version_long = `$LLVMGCCPATH/llvm-gcc -v 2>&1`;
-} else {
-  $llvmgcc_version_long = `llvm-gcc -v 2>&1`;
-}
-(split '\n', $llvmgcc_version_long)[1] =~ /Target: (.+)/;
-my $targetTriple = $1;
-
-# Logs.
-my ($ConfigureLogData, $BuildLogData, $CheckoutLogData) = "";
-if ($ConfigMode == 0) {
-  $ConfigureLogData = ReadFile $ConfigureLog;
-  $BuildLogData = ReadFile $BuildLog;
-  $CheckoutLogData = ReadFile $COLog;
-}
-
-# Checkout info.
-my $CheckoutTime_Wall = GetRegex "^real ([0-9.]+)", $CheckoutLogData;
-my $CheckoutTime_User = GetRegex "^user ([0-9.]+)", $CheckoutLogData;
-my $CheckoutTime_Sys = GetRegex "^sys ([0-9.]+)", $CheckoutLogData;
-my $CheckoutTime_CPU = $CVSCheckoutTime_User + $CVSCheckoutTime_Sys;
-
-# Configure info.
-my $ConfigTimeU = GetRegex "^user ([0-9.]+)", $ConfigureLogData;
-my $ConfigTimeS = GetRegex "^sys ([0-9.]+)", $ConfigureLogData;
-my $ConfigTime  = $ConfigTimeU+$ConfigTimeS;  # ConfigTime = User+System
-my $ConfigWallTime = GetRegex "^real ([0-9.]+)",$ConfigureLogData;
-$ConfigTime=-1 unless $ConfigTime;
-$ConfigWallTime=-1 unless $ConfigWallTime;
-
-# Build info.
-my $BuildTimeU = GetRegex "^user ([0-9.]+)", $BuildLogData;
-my $BuildTimeS = GetRegex "^sys ([0-9.]+)", $BuildLogData;
-my $BuildTime  = $BuildTimeU+$BuildTimeS;  # BuildTime = User+System
-my $BuildWallTime = GetRegex "^real ([0-9.]+)", $BuildLogData;
-$BuildTime=-1 unless $BuildTime;
-$BuildWallTime=-1 unless $BuildWallTime;
-
-if ( $VERBOSE ) { print "SEND THE DATA VIA THE POST REQUEST\n"; }
-
-my %hash_of_data = (
-  'machine_data' => $machine_data,
-  'build_data' => $ConfigureLogData . $BuildLogData,
-  'gcc_version' => $gcc_version,
-  'nickname' => $nickname,
-  'dejagnutime_wall' => "0.0",
-  'dejagnutime_cpu' => "0.0",
-  'cvscheckouttime_wall' => $CheckoutTime_Wall,
-  'cvscheckouttime_cpu' => $CheckoutTime_CPU,
-  'configtime_wall' => $ConfigWallTime,
-  'configtime_cpu'=> $ConfigTime,
-  'buildtime_wall' => $BuildWallTime,
-  'buildtime_cpu' => $BuildTime,
-  'buildstatus' => $BuildStatus,
-  'singlesource_programstable' => $SingleSourceProgramsTable,
-  'multisource_programstable' => $MultiSourceProgramsTable,
-  'externalsource_programstable' => $ExternalProgramsTable,
-  'llcbeta_options' => $llcbeta_options,
-  'passing_tests' => $passes,
-  'expfail_tests' => $xfails,
-  'unexpfail_tests' => $fails,
-  'all_tests' => $all_tests,
-  'dejagnutests_results' => "Dejagnu skipped by user choice.",
-  'dejagnutests_log' => "",
-  'starttime' => $starttime,
-  'endtime' => $endtime,
-  'target_triple' => $targetTriple,
-
-  # Unused, but left around for backwards compatibility.
-  'warnings' => "",
-  'cvsusercommitlist' => "",
-  'cvsuserupdatelist' => "",
-  'cvsaddedfiles' => "",
-  'cvsmodifiedfiles' => "",
-  'cvsremovedfiles' => "",
-  'lines_of_code' => "",
-  'cvs_file_count' => 0,
-  'cvs_dir_count' => 0,
-  'warnings_removed' => "",
-  'warnings_added' => "",
-  'new_tests' => "",
-  'removed_tests' => "",
-  'o_file_sizes' => "",
-  'a_file_sizes' => ""
-);
-
-# Write out the "...-sentdata.txt" file.
-WriteSentData \%hash_of_data;
-
-if ($SUBMIT || !($SUBMITAUX eq "")) {
-  my $response = SendData $SUBMITSERVER,$SUBMITSCRIPT,\%hash_of_data;
-  if( $VERBOSE) { print "============================\n$response"; }
-} else {
-  print "============================\n";
-  foreach $x(keys %hash_of_data){
-      print "$x  => $hash_of_data{$x}\n";
-  }
-}
-
-##############################################################
-#
-# Remove the source tree...
-#
-##############################################################
-system ( "$NICE rm -rf $BuildDir")
-  if (!$NOCHECKOUT and !$NOREMOVE and !$NOREMOVEATEND);
-system ( "$NICE rm -rf $WebDir")
-  if (!$NOCHECKOUT and !$NOREMOVE and !$NOREMOVERESULTS);
diff --git a/utils/NightlyTest.gnuplot b/utils/NightlyTest.gnuplot
deleted file mode 100644
index 514b72ab20ad..000000000000
--- a/utils/NightlyTest.gnuplot
+++ /dev/null
@@ -1,214 +0,0 @@
-set terminal png
-
-##------- Plot small Date vs LOC ----
-set output "running_loc.png"
-set xlabel "Date" 
-set ylabel "Lines of Code"
-set xdata time
-set timefmt "%Y-%m-%d-%H:%M:%S:"
-set format x "%b %d, %Y"
-
-set size .75,.75
-set xtics rotate
-set xlabel 0,-1
-plot "running_loc.txt" using 1:2 title '' with lines, \
-     "running_loc.txt" using 1:2 title "Date vs. Lines of Code" with lines
-
-##------- Plot large Date vs LOC ----
-set size 1.5,1.5
-set xtics norotate
-set xlabel 0,0
-set output "running_loc_large.png"
-plot "running_loc.txt" using 1:2 title '', \
-     "running_loc.txt" using 1:2 title "Date vs. Lines of Code" with lines
-
-
-# Delete all labels...
-set nolabel
-
-##------- Olden CBE performance ----
-
-set size .75,.75
-set xtics rotate
-set xlabel 0,-1
-set output "running_Olden_cbe_time.png"
-set ylabel "CBE compiled execution time (s)"
-plot "running_Olden_cbe_time.txt" u 1:2 t '' with lines, \
-     "running_Olden_cbe_time.txt" u 1:2 t "bh" with lines, \
-     "running_Olden_cbe_time.txt" u 1:3 t "em3d" with lines, \
-     "running_Olden_cbe_time.txt" u 1:4 t "mst" with lines, \
-     "running_Olden_cbe_time.txt" u 1:5 t "power" with lines, \
-     "running_Olden_cbe_time.txt" u 1:6 t "tsp" with lines, \
-     "running_Olden_cbe_time.txt" u 1:7 t "bisort" with lines, \
-     "running_Olden_cbe_time.txt" u 1:8 t "health" with lines, \
-     "running_Olden_cbe_time.txt" u 1:9 t "perimeter" with lines, \
-     "running_Olden_cbe_time.txt" u 1:10 t "treeadd" with lines, \
-     "running_Olden_cbe_time.txt" u 1:11 t "voronoi" \
-   with lines
-
-set size 1.5,1.5
-set xtics norotate
-set xlabel 0,0
-set output "running_Olden_cbe_time_large.png"
-plot "running_Olden_cbe_time.txt" u 1:2 t '' with lines, \
-     "running_Olden_cbe_time.txt" u 1:2 t "bh" with lines, \
-     "running_Olden_cbe_time.txt" u 1:3 t "em3d" with lines, \
-     "running_Olden_cbe_time.txt" u 1:4 t "mst" with lines, \
-     "running_Olden_cbe_time.txt" u 1:5 t "power" with lines, \
-     "running_Olden_cbe_time.txt" u 1:6 t "tsp" with lines, \
-     "running_Olden_cbe_time.txt" u 1:7 t "bisort" with lines, \
-     "running_Olden_cbe_time.txt" u 1:8 t "health" with lines, \
-     "running_Olden_cbe_time.txt" u 1:9 t "perimeter" with lines, \
-     "running_Olden_cbe_time.txt" u 1:10 t "treeadd" with lines, \
-     "running_Olden_cbe_time.txt" u 1:11 t "voronoi" \
-   with lines
-
-##------- Olden JIT performance ----
-
-set size .75,.75
-set xtics rotate
-set xlabel 0,-1
-set output "running_Olden_jit_time.png"
-set ylabel "JIT execution time (s)"
-plot "running_Olden_jit_time.txt" u 1:2 t '' with lines, \
-     "running_Olden_jit_time.txt" u 1:2 t "bh" with lines, \
-     "running_Olden_jit_time.txt" u 1:3 t "em3d" with lines, \
-     "running_Olden_jit_time.txt" u 1:4 t "mst" with lines, \
-     "running_Olden_jit_time.txt" u 1:5 t "power" with lines, \
-     "running_Olden_jit_time.txt" u 1:6 t "tsp" with lines, \
-     "running_Olden_jit_time.txt" u 1:7 t "bisort" with lines, \
-     "running_Olden_jit_time.txt" u 1:8 t "health" with lines, \
-     "running_Olden_jit_time.txt" u 1:9 t "perimeter" with lines, \
-     "running_Olden_jit_time.txt" u 1:10 t "treeadd" with lines, \
-     "running_Olden_jit_time.txt" u 1:11 t "voronoi" \
-   with lines
-
-set size 1.5,1.5
-set xtics norotate
-set xlabel 0,0
-set output "running_Olden_jit_time_large.png"
-plot "running_Olden_jit_time.txt" u 1:2 t '' with lines, \
-     "running_Olden_jit_time.txt" u 1:2 t "bh" with lines, \
-     "running_Olden_jit_time.txt" u 1:3 t "em3d" with lines, \
-     "running_Olden_jit_time.txt" u 1:4 t "mst" with lines, \
-     "running_Olden_jit_time.txt" u 1:5 t "power" with lines, \
-     "running_Olden_jit_time.txt" u 1:6 t "tsp" with lines, \
-     "running_Olden_jit_time.txt" u 1:7 t "bisort" with lines, \
-     "running_Olden_jit_time.txt" u 1:8 t "health" with lines, \
-     "running_Olden_jit_time.txt" u 1:9 t "perimeter" with lines, \
-     "running_Olden_jit_time.txt" u 1:10 t "treeadd" with lines, \
-     "running_Olden_jit_time.txt" u 1:11 t "voronoi" \
-   with lines
-
-##------- Olden LLC performance ----
-
-set size .75,.75
-set xtics rotate
-set xlabel 0,-1
-set output "running_Olden_llc_time.png"
-set ylabel "LLC compiled execution time (s)"
-plot "running_Olden_llc_time.txt" u 1:2 t '' with lines, \
-     "running_Olden_llc_time.txt" u 1:2 t "bh" with lines, \
-     "running_Olden_llc_time.txt" u 1:3 t "em3d" with lines, \
-     "running_Olden_llc_time.txt" u 1:4 t "mst" with lines, \
-     "running_Olden_llc_time.txt" u 1:5 t "power" with lines, \
-     "running_Olden_llc_time.txt" u 1:6 t "tsp" with lines, \
-     "running_Olden_llc_time.txt" u 1:7 t "bisort" with lines, \
-     "running_Olden_llc_time.txt" u 1:8 t "health" with lines, \
-     "running_Olden_llc_time.txt" u 1:9 t "perimeter" with lines, \
-     "running_Olden_llc_time.txt" u 1:10 t "treeadd" with lines, \
-     "running_Olden_llc_time.txt" u 1:11 t "voronoi" \
-   with lines
-
-set size 1.5,1.5
-set xtics norotate
-set xlabel 0,0
-set output "running_Olden_llc_time_large.png"
-plot "running_Olden_llc_time.txt" u 1:2 t '' with lines, \
-     "running_Olden_llc_time.txt" u 1:2 t "bh" with lines, \
-     "running_Olden_llc_time.txt" u 1:3 t "em3d" with lines, \
-     "running_Olden_llc_time.txt" u 1:4 t "mst" with lines, \
-     "running_Olden_llc_time.txt" u 1:5 t "power" with lines, \
-     "running_Olden_llc_time.txt" u 1:6 t "tsp" with lines, \
-     "running_Olden_llc_time.txt" u 1:7 t "bisort" with lines, \
-     "running_Olden_llc_time.txt" u 1:8 t "health" with lines, \
-     "running_Olden_llc_time.txt" u 1:9 t "perimeter" with lines, \
-     "running_Olden_llc_time.txt" u 1:10 t "treeadd" with lines, \
-     "running_Olden_llc_time.txt" u 1:11 t "voronoi" \
-   with lines
-
-
-##------- Olden optimizer time ----
-
-set size .75,.75
-set xtics rotate
-set xlabel 0,-1
-set output "running_Olden_opt_time.png"
-set ylabel "Time to run the optimizer (s)"
-plot "running_Olden_opt_time.txt" u 1:2 t '' with lines, \
-     "running_Olden_opt_time.txt" u 1:2 t "bh" with lines, \
-     "running_Olden_opt_time.txt" u 1:3 t "em3d" with lines, \
-     "running_Olden_opt_time.txt" u 1:4 t "mst" with lines, \
-     "running_Olden_opt_time.txt" u 1:5 t "power" with lines, \
-     "running_Olden_opt_time.txt" u 1:6 t "tsp" with lines, \
-     "running_Olden_opt_time.txt" u 1:7 t "bisort" with lines, \
-     "running_Olden_opt_time.txt" u 1:8 t "health" with lines, \
-     "running_Olden_opt_time.txt" u 1:9 t "perimeter" with lines, \
-     "running_Olden_opt_time.txt" u 1:10 t "treeadd" with lines, \
-     "running_Olden_opt_time.txt" u 1:11 t "voronoi" \
-   with lines
-
-set size 1.5,1.5
-set xtics norotate
-set xlabel 0,0
-set output "running_Olden_opt_time_large.png"
-plot "running_Olden_opt_time.txt" u 1:2 t '' with lines, \
-     "running_Olden_opt_time.txt" u 1:2 t "bh" with lines, \
-     "running_Olden_opt_time.txt" u 1:3 t "em3d" with lines, \
-     "running_Olden_opt_time.txt" u 1:4 t "mst" with lines, \
-     "running_Olden_opt_time.txt" u 1:5 t "power" with lines, \
-     "running_Olden_opt_time.txt" u 1:6 t "tsp" with lines, \
-     "running_Olden_opt_time.txt" u 1:7 t "bisort" with lines, \
-     "running_Olden_opt_time.txt" u 1:8 t "health" with lines, \
-     "running_Olden_opt_time.txt" u 1:9 t "perimeter" with lines, \
-     "running_Olden_opt_time.txt" u 1:10 t "treeadd" with lines, \
-     "running_Olden_opt_time.txt" u 1:11 t "voronoi" \
-   with lines
-
-
-##------- Bytecode size ----
-
-set size .75,.75
-set xtics rotate
-set xlabel 0,-1
-set output "running_Olden_bytecode.png"
-set ylabel "Program bytecode size (bytes)"
-plot "running_Olden_bytecode.txt" u 1:2 t '' with lines, \
-     "running_Olden_bytecode.txt" u 1:2 t "bh" with lines, \
-     "running_Olden_bytecode.txt" u 1:3 t "em3d" with lines, \
-     "running_Olden_bytecode.txt" u 1:4 t "mst" with lines, \
-     "running_Olden_bytecode.txt" u 1:5 t "power" with lines, \
-     "running_Olden_bytecode.txt" u 1:6 t "tsp" with lines, \
-     "running_Olden_bytecode.txt" u 1:7 t "bisort" with lines, \
-     "running_Olden_bytecode.txt" u 1:8 t "health" with lines, \
-     "running_Olden_bytecode.txt" u 1:9 t "perimeter" with lines, \
-     "running_Olden_bytecode.txt" u 1:10 t "treeadd" with lines, \
-     "running_Olden_bytecode.txt" u 1:11 t "voronoi" \
-   with lines
-
-set size 1.5,1.5
-set xtics norotate
-set xlabel 0,0
-set output "running_Olden_bytecode_large.png"
-plot "running_Olden_bytecode.txt" u 1:2 t '' with lines, \
-     "running_Olden_bytecode.txt" u 1:2 t "bh" with lines, \
-     "running_Olden_bytecode.txt" u 1:3 t "em3d" with lines, \
-     "running_Olden_bytecode.txt" u 1:4 t "mst" with lines, \
-     "running_Olden_bytecode.txt" u 1:5 t "power" with lines, \
-     "running_Olden_bytecode.txt" u 1:6 t "tsp" with lines, \
-     "running_Olden_bytecode.txt" u 1:7 t "bisort" with lines, \
-     "running_Olden_bytecode.txt" u 1:8 t "health" with lines, \
-     "running_Olden_bytecode.txt" u 1:9 t "perimeter" with lines, \
-     "running_Olden_bytecode.txt" u 1:10 t "treeadd" with lines, \
-     "running_Olden_bytecode.txt" u 1:11 t "voronoi" \
-   with lines
diff --git a/utils/NightlyTestTemplate.html b/utils/NightlyTestTemplate.html
deleted file mode 100644
index c38bb2e776bb..000000000000
--- a/utils/NightlyTestTemplate.html
+++ /dev/null
@@ -1,244 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html><head><title>LLVM Test Results for $DateString</title></head>
-
-<body bgcolor=white>
-<center><font size=+3 face=Verdana><b>LLVM Test Results for $DateString</b></font></center>
-<hr height=1>
-
-<table width=100%>
-<tr><td valign=top align=center>
-
-<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
-<table border="0" cellpadding="5" cellspacing="0"><tr><td bgcolor="#DDAA77">
-<font size=+1><b>Sections:</b></font><br>
-</td></tr><tr><td bgcolor="#FFCC99" align=center>
-<a href="#Overview">Overview</a><br>
-<a href="#Changes">Changes</a><br>
-<a href="#Dejagnu">Dejagnu Tests</a><br>
-<a href="#Trends">Trends</a><br>
-<a href="#Programs">Programs</a><br>
-</td></tr></table></td></tr></table>
-
-<p>
-<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
-<table border="0" cellpadding="5" cellspacing="0"><tr><td bgcolor="#DDAA77"
-<font size=+1><b>Previous:</b></font><br>
-</td></tr><tr><td bgcolor="#FFCC99">
-  $PrevDaysList
-</td></tr></table></td></tr></table>
-<p>
-
-<font size=+1><b>Back to:</b></font><br>
-<a href="http://llvm.org/testresults/">Test&nbsp;Results</a><br>
-<a href="http://llvm.org/">LLVM&nbsp;Page</a><p>
-
-</td><td valign=top>
-
-<center>
-<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
-<table border="0" cellpadding="10" cellspacing="0"><tr><td bgcolor="#DDAA77"
-<font size=+2 face=Verdana><b><a name="Overview">Today's Test Results Overview</font></b>
-</td></tr></table></td></tr></table></center><p>
-
-<!-- Running LOC graph -->
-<table align=right>
-<tr><td>
-<a href="running_loc_large.png"
-   ><img border=0 width=480 height=360 src="running_loc.png"></a>
-</td></tr>
-<tr><td align=center>Lines Of Code over Time<br>
-<font size=-1><a href="running_loc_large.png">Click for larger view</a></font>
-</td></tr>
-</table>
-
-<h2>Nightly Test Overview:</h2>
-<ul>
-  <li>Start: <b>$TestStartTime</b></li>
-  <li>Finish: <b>$TestFinishTime</b></li>
-  <li>Platform: <b>$TestPlatform</b></li>
-</ul>
-<h2>CVS Tree Overview:</h2>
-<ul>
-<li><a href="$DATE-CVS-Log.txt">CVS Checkout Log</a>
-<ul>
-    <b>$NumDirsInCVS</b> dirs, <b>$NumFilesInCVS</b> files, <b>$LOC</b>
-    lines of code, checked out in <b>$CVSCheckoutTime</b> seconds<br></ul>
-<li><a href="$DATE-Build-Log.txt">Compilation Log</a>
-<table>
-<tr><td><b>Item</b></td><td><b>CPU Time</b></td><td><b>Wall Clock</b></td></tr>
-<tr><td>Configure CVS Tree</td><td>$ConfigTime</td><td>$ConfigWallTime</td></tr>
-<tr><td>Build CVS Tree</td><td>$BuildTime</td><td>$BuildWallTime</td></tr>
-<tr><td>Run Dejagnu Tests</td><td>$DejagnuTime</td><td>$DejagnuWallTime</td></tr>
-</table></li>
-<li>Number of object files compiled: <b>$NumObjects</b></li>
-<li>Number of libraries linked: <b>$NumLibraries</b></li>
-<li>Number of executables linked:<b> $NumExecutables</b></li>
-<li>Build Status: $BuildStatus</li>
-</ul>
-
-<h2>Warnings during the build:</h2>
-$WarningsList
-
-<br><br><center>
-<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
-<table border="0" cellpadding="10" cellspacing="0"><tr><td bgcolor="#DDAA77"
-<font size=+2 face=Verdana><b><a name="Changes">Changes from Yesterday</font></b>
-</td></tr></table></td></tr></table></center><p>
-
-<h2>Changes to CVS:</h2>
-<ul>
-<li>Users who committed to CVS: <b>$UserCommitList</b>
-<li>Users who updated from CVS: <b>$UserUpdateList</b>
-<li>Added Files:    $AddedFilesList
-<li>Modified Files: $ModifiedFilesList
-<li>Removed Files:  $RemovedFilesList
-</ul><p>
-
-<h2>Changes to Warnings:</h2>
-<p>Warnings Added:</p>
-$WarningsAdded
-<p>Warnings Removed:</p>
-$WarningsRemoved
-
-<h2>Changes in the test suite:</h2>
-<ul>
-<li>New Tests: $TestsAdded
-<li>Removed Tests: $TestsRemoved
-<li>Newly passing tests: $TestsFixed
-<li>Newly failing tests: $TestsBroken
-</ul>
-</td></tr></tbody></table>
-
-
-<br/><br/><center>
-<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
-<table border="0" cellpadding="10" cellspacing="0"><tr><td bgcolor="#DDAA77"
-<font size=+2 face=Verdana><b><a name="Dejagnu">Dejagnu Test Results</font></b>
-</td></tr></table></td></tr></table></center>
-<br/>
-$DejagnuTestResults
-<p>A complete log of testing <a href="$DATE-Dejagnu-testrun.log">Feature and Regression</a> is available for further analysis.</p>
-
-<br><br><center>
-<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
-<table border="0" cellpadding="10" cellspacing="0"><tr><td bgcolor="#DDAA77"
-<font size=+2 face=Verdana><b><a name="Trends">Changes Over Time</font></b>
-</td></tr></table></td></tr></table></center><p>
-
-
-Here are some charts showing how the LLVM optimizer and code generators are
-changing over time.  For now we use the Olden benchmark suite to measure this,
-but eventually we will switch to using SPEC CPU2000.  All programs are run with
-"LARGE_PROBLEM_SIZE" enabled.  Click on any of the charts to get a larger
-version.<p>
-
-<h2>Compilation Measurements:</h2>
-
-<table border="0" align=center>
-<tr>
-<td width=50% align=center>
-<a href="running_Olden_bytecode_large.png"><img width=480 height=360 border=0 src="running_Olden_bytecode.png"></a><br>
-Size of LLVM bytecode files
-</td>
-<td width=50% align=center>
-<a href="running_Olden_opt_time_large.png"><img width=480 height=360 border=0 src="running_Olden_opt_time.png"></a><br>
-Time to run the LLVM optimizer on each program
-</td></tr>
-</table>
-
-<h2>Program Execution Measurements:</h2>
-
-<table border="0" align=center>
-<tr>
-<td width=50% align=center>
-<a href="running_Olden_cbe_time_large.png"><img width=480 height=360 border=0 src="running_Olden_cbe_time.png"></a><br>
-Execution time for CBE generated executable
-</td>
-<td width=50% align=center>
-<a href="running_Olden_llc_time_large.png"><img width=480 height=360 border=0 src="running_Olden_llc_time.png"></a><br>
-Execution time for the LLC generated executable
-</td></tr>
-
-<tr>
-<td align=center>
-<a href="running_Olden_jit_time_large.png"><img width=480 height=360 border=0 src="running_Olden_jit_time.png"></a><br>
-Execution time for program in the JIT
-</td>
-<td></td></tr>
-</table>
-
-
-
-
-<br><br><center>
-<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
-<table border="0" cellpadding="10" cellspacing="0"><tr><td bgcolor="#DDAA77"
-<font size=+2 face=Verdana><b><a name="Programs">Program Tests</font></b>
-</td></tr></table></td></tr></table></center><p>
-
-This section tests LLVM on a variety of programs in the test suite.  This
-includes benchmark suites like the Olden, McCat, Ptrdist, and SPEC benchmarks as
-well as a few random programs with test inputs.  This section is meant to track
-how stable LLVM is as a whole. A failure in the execution of any test is marked
-with an asterisk: `*'. The columns of the tables are:<p>
-
-<ol>
-<li><a name="Program">Program</a> - The name of the program for that row.</li>
-<li><a name="GCCAS">GCCAS</a> - Time to run LLVM optimizers on the program.</li>
-<li><a name="Bytecode">Bytecode</a> - The size of the bytecode for the
-    program</li>
-<li><a name="Instrs">Instrs</a> - The number of LLVM instructions in the
-    compiled bytecode</li>
-<li><a name="LLC<br>compile">LLC compile</a> - The time taken compile with
-    LLC (the static backend)</li>
-<li><a name="JIT<br>codegen">JIT codegen</a> - The amount of time spent in the
-    JIT itself, instead of executing the program.</li>
-<li><a name="Machine<br>code">Machine code</a> - The number of bytes of machine
-    code generated by the JIT.</li>
-<li><a name="GCC">GCC</a> - The time taken to execute the program when compiled
-    with GCC -O2.</li>
-<li><a name="CBE">CBE</a> - The time taken to execute the program after
-    compilation through the C backend, compiled with -O2.</li>
-<li><a name="LLC">LLC</a> - How long does the program generated by the static
-    backend LLC take to execute </li>
-<li><a name="JIT">JIT</a> - The amount of time spent running the
-    program with the JIT; this includes the code generation phase (listed above)
-    and actually running the program.</li>
-<li><a name="GCC/LLC">GCC/LLC</a> - The speed-up of the LLC output vs the native 
-    GCC output: greater than 1 is a speedup, less than 1 is a slowdown.</li>
-<li><a name="GCC/CBE">GCC/CBE</a> - The speed-up of the CBE output vs the native 
-    GCC output: greater than 1 is a speedup, less than 1 is a slowdown.</li>
-<li><a name="LLC-BETA">LLC-BETA</a> - How long does the program generated by the static
-    backend LLC take to execute the program, when compiled with new experimental 
-    features.  This is temporary, for tuning.</li>
-</ol><p>
-
-A complete log of testing 
-<a href="$DATE-SingleSource-ProgramTest.txt.gz">SingleSource</a>, 
-<a href="$DATE-MultiSource-ProgramTest.txt.gz">MultiSource</a>, and
-<a href="$DATE-External-ProgramTest.txt.gz">External</a> programs are
-available for further analysis.
-
-<h2>Programs/External</h2>
-
-<center>
-<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
-$ExternalProgramsTable
-</td></tr></table></center>
-
-<h2>Programs/MultiSource</h2>
-
-<center>
-<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
-$MultiSourceProgramsTable
-</td></tr></table></center>
-
-<h2>Programs/SingleSource</h2>
-
-<center>
-<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
-$SingleSourceProgramsTable
-</td></tr></table></center>
-
-</td></tr></html>
-
diff --git a/utils/TableGen/ARMDecoderEmitter.cpp b/utils/TableGen/ARMDecoderEmitter.cpp
deleted file mode 100644
index 145b96df98e2..000000000000
--- a/utils/TableGen/ARMDecoderEmitter.cpp
+++ /dev/null
@@ -1,1790 +0,0 @@
-//===------------ ARMDecoderEmitter.cpp - Decoder Generator ---------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-// It contains the tablegen backend that emits the decoder functions for ARM and
-// Thumb.  The disassembler core includes the auto-generated file, invokes the
-// decoder functions, and builds up the MCInst based on the decoded Opcode.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm-decoder-emitter"
-
-#include "ARMDecoderEmitter.h"
-#include "CodeGenTarget.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/TableGen/Record.h"
-
-#include <vector>
-#include <map>
-#include <string>
-
-using namespace llvm;
-
-/////////////////////////////////////////////////////
-//                                                 //
-//  Enums and Utilities for ARM Instruction Format //
-//                                                 //
-/////////////////////////////////////////////////////
-
-#define ARM_FORMATS                   \
-  ENTRY(ARM_FORMAT_PSEUDO,         0) \
-  ENTRY(ARM_FORMAT_MULFRM,         1) \
-  ENTRY(ARM_FORMAT_BRFRM,          2) \
-  ENTRY(ARM_FORMAT_BRMISCFRM,      3) \
-  ENTRY(ARM_FORMAT_DPFRM,          4) \
-  ENTRY(ARM_FORMAT_DPSOREGREGFRM,     5) \
-  ENTRY(ARM_FORMAT_LDFRM,          6) \
-  ENTRY(ARM_FORMAT_STFRM,          7) \
-  ENTRY(ARM_FORMAT_LDMISCFRM,      8) \
-  ENTRY(ARM_FORMAT_STMISCFRM,      9) \
-  ENTRY(ARM_FORMAT_LDSTMULFRM,    10) \
-  ENTRY(ARM_FORMAT_LDSTEXFRM,     11) \
-  ENTRY(ARM_FORMAT_ARITHMISCFRM,  12) \
-  ENTRY(ARM_FORMAT_SATFRM,        13) \
-  ENTRY(ARM_FORMAT_EXTFRM,        14) \
-  ENTRY(ARM_FORMAT_VFPUNARYFRM,   15) \
-  ENTRY(ARM_FORMAT_VFPBINARYFRM,  16) \
-  ENTRY(ARM_FORMAT_VFPCONV1FRM,   17) \
-  ENTRY(ARM_FORMAT_VFPCONV2FRM,   18) \
-  ENTRY(ARM_FORMAT_VFPCONV3FRM,   19) \
-  ENTRY(ARM_FORMAT_VFPCONV4FRM,   20) \
-  ENTRY(ARM_FORMAT_VFPCONV5FRM,   21) \
-  ENTRY(ARM_FORMAT_VFPLDSTFRM,    22) \
-  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
-  ENTRY(ARM_FORMAT_VFPMISCFRM,    24) \
-  ENTRY(ARM_FORMAT_THUMBFRM,      25) \
-  ENTRY(ARM_FORMAT_MISCFRM,       26) \
-  ENTRY(ARM_FORMAT_NEONGETLNFRM,  27) \
-  ENTRY(ARM_FORMAT_NEONSETLNFRM,  28) \
-  ENTRY(ARM_FORMAT_NEONDUPFRM,    29) \
-  ENTRY(ARM_FORMAT_NLdSt,         30) \
-  ENTRY(ARM_FORMAT_N1RegModImm,   31) \
-  ENTRY(ARM_FORMAT_N2Reg,         32) \
-  ENTRY(ARM_FORMAT_NVCVT,         33) \
-  ENTRY(ARM_FORMAT_NVecDupLn,     34) \
-  ENTRY(ARM_FORMAT_N2RegVecShL,   35) \
-  ENTRY(ARM_FORMAT_N2RegVecShR,   36) \
-  ENTRY(ARM_FORMAT_N3Reg,         37) \
-  ENTRY(ARM_FORMAT_N3RegVecSh,    38) \
-  ENTRY(ARM_FORMAT_NVecExtract,   39) \
-  ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
-  ENTRY(ARM_FORMAT_NVTBL,         41) \
-  ENTRY(ARM_FORMAT_DPSOREGIMMFRM, 42)
-
-// ARM instruction format specifies the encoding used by the instruction.
-#define ENTRY(n, v) n = v,
-typedef enum {
-  ARM_FORMATS
-  ARM_FORMAT_NA
-} ARMFormat;
-#undef ENTRY
-
-// Converts enum to const char*.
-static const char *stringForARMFormat(ARMFormat form) {
-#define ENTRY(n, v) case n: return #n;
-  switch(form) {
-    ARM_FORMATS
-  case ARM_FORMAT_NA:
-  default:
-    return "";
-  }
-#undef ENTRY
-}
-
-enum {
-  IndexModeNone = 0,
-  IndexModePre  = 1,
-  IndexModePost = 2,
-  IndexModeUpd  = 3
-};
-
-/////////////////////////
-//                     //
-//  Utility functions  //
-//                     //
-/////////////////////////
-
-/// byteFromBitsInit - Return the byte value from a BitsInit.
-/// Called from getByteField().
-static uint8_t byteFromBitsInit(BitsInit &init) {
-  int width = init.getNumBits();
-
-  assert(width <= 8 && "Field is too large for uint8_t!");
-
-  int index;
-  uint8_t mask = 0x01;
-
-  uint8_t ret = 0;
-
-  for (index = 0; index < width; index++) {
-    if (static_cast<BitInit*>(init.getBit(index))->getValue())
-      ret |= mask;
-
-    mask <<= 1;
-  }
-
-  return ret;
-}
-
-static uint8_t getByteField(const Record &def, const char *str) {
-  BitsInit *bits = def.getValueAsBitsInit(str);
-  return byteFromBitsInit(*bits);
-}
-
-static BitsInit &getBitsField(const Record &def, const char *str) {
-  BitsInit *bits = def.getValueAsBitsInit(str);
-  return *bits;
-}
-
-/// sameStringExceptSuffix - Return true if the two strings differ only in RHS's
-/// suffix.  ("VST4d8", "VST4d8_UPD", "_UPD") as input returns true.
-static
-bool sameStringExceptSuffix(const StringRef LHS, const StringRef RHS,
-                            const StringRef Suffix) {
-
-  if (RHS.startswith(LHS) && RHS.endswith(Suffix))
-    return RHS.size() == LHS.size() + Suffix.size();
-
-  return false;
-}
-
-/// thumbInstruction - Determine whether we have a Thumb instruction.
-/// See also ARMInstrFormats.td.
-static bool thumbInstruction(uint8_t Form) {
-  return Form == ARM_FORMAT_THUMBFRM;
-}
-
-// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
-// for a bit value.
-//
-// BIT_UNFILTERED is used as the init value for a filter position.  It is used
-// only for filter processings.
-typedef enum {
-  BIT_TRUE,      // '1'
-  BIT_FALSE,     // '0'
-  BIT_UNSET,     // '?'
-  BIT_UNFILTERED // unfiltered
-} bit_value_t;
-
-static bool ValueSet(bit_value_t V) {
-  return (V == BIT_TRUE || V == BIT_FALSE);
-}
-static bool ValueNotSet(bit_value_t V) {
-  return (V == BIT_UNSET);
-}
-static int Value(bit_value_t V) {
-  return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
-}
-static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
-  if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index)))
-    return bit->getValue() ? BIT_TRUE : BIT_FALSE;
-
-  // The bit is uninitialized.
-  return BIT_UNSET;
-}
-// Prints the bit value for each position.
-static void dumpBits(raw_ostream &o, BitsInit &bits) {
-  unsigned index;
-
-  for (index = bits.getNumBits(); index > 0; index--) {
-    switch (bitFromBits(bits, index - 1)) {
-    case BIT_TRUE:
-      o << "1";
-      break;
-    case BIT_FALSE:
-      o << "0";
-      break;
-    case BIT_UNSET:
-      o << "_";
-      break;
-    default:
-      assert(0 && "unexpected return value from bitFromBits");
-    }
-  }
-}
-
-// Enums for the available target names.
-typedef enum {
-  TARGET_ARM = 0,
-  TARGET_THUMB
-} TARGET_NAME_t;
-
-// FIXME: Possibly auto-detected?
-#define BIT_WIDTH 32
-
-// Forward declaration.
-class ARMFilterChooser;
-
-// Representation of the instruction to work on.
-typedef bit_value_t insn_t[BIT_WIDTH];
-
-/// Filter - Filter works with FilterChooser to produce the decoding tree for
-/// the ISA.
-///
-/// It is useful to think of a Filter as governing the switch stmts of the
-/// decoding tree in a certain level.  Each case stmt delegates to an inferior
-/// FilterChooser to decide what further decoding logic to employ, or in another
-/// words, what other remaining bits to look at.  The FilterChooser eventually
-/// chooses a best Filter to do its job.
-///
-/// This recursive scheme ends when the number of Opcodes assigned to the
-/// FilterChooser becomes 1 or if there is a conflict.  A conflict happens when
-/// the Filter/FilterChooser combo does not know how to distinguish among the
-/// Opcodes assigned.
-///
-/// An example of a conflict is 
-///
-/// Conflict:
-///                     111101000.00........00010000....
-///                     111101000.00........0001........
-///                     1111010...00........0001........
-///                     1111010...00....................
-///                     1111010.........................
-///                     1111............................
-///                     ................................
-///     VST4q8a         111101000_00________00010000____
-///     VST4q8b         111101000_00________00010000____
-///
-/// The Debug output shows the path that the decoding tree follows to reach the
-/// the conclusion that there is a conflict.  VST4q8a is a vst4 to double-spaced
-/// even registers, while VST4q8b is a vst4 to double-spaced odd regsisters.
-///
-/// The encoding info in the .td files does not specify this meta information,
-/// which could have been used by the decoder to resolve the conflict.  The
-/// decoder could try to decode the even/odd register numbering and assign to
-/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
-/// version and return the Opcode since the two have the same Asm format string.
-class ARMFilter {
-protected:
-  ARMFilterChooser *Owner; // points to the FilterChooser who owns this filter
-  unsigned StartBit; // the starting bit position
-  unsigned NumBits; // number of bits to filter
-  bool Mixed; // a mixed region contains both set and unset bits
-
-  // Map of well-known segment value to the set of uid's with that value. 
-  std::map<uint64_t, std::vector<unsigned> > FilteredInstructions;
-
-  // Set of uid's with non-constant segment values.
-  std::vector<unsigned> VariableInstructions;
-
-  // Map of well-known segment value to its delegate.
-  std::map<unsigned, ARMFilterChooser*> FilterChooserMap;
-
-  // Number of instructions which fall under FilteredInstructions category.
-  unsigned NumFiltered;
-
-  // Keeps track of the last opcode in the filtered bucket.
-  unsigned LastOpcFiltered;
-
-  // Number of instructions which fall under VariableInstructions category.
-  unsigned NumVariable;
-
-public:
-  unsigned getNumFiltered() { return NumFiltered; }
-  unsigned getNumVariable() { return NumVariable; }
-  unsigned getSingletonOpc() {
-    assert(NumFiltered == 1);
-    return LastOpcFiltered;
-  }
-  // Return the filter chooser for the group of instructions without constant
-  // segment values.
-  ARMFilterChooser &getVariableFC() {
-    assert(NumFiltered == 1);
-    assert(FilterChooserMap.size() == 1);
-    return *(FilterChooserMap.find((unsigned)-1)->second);
-  }
-
-  ARMFilter(const ARMFilter &f);
-  ARMFilter(ARMFilterChooser &owner, unsigned startBit, unsigned numBits,
-            bool mixed);
-
-  ~ARMFilter();
-
-  // Divides the decoding task into sub tasks and delegates them to the
-  // inferior FilterChooser's.
-  //
-  // A special case arises when there's only one entry in the filtered
-  // instructions.  In order to unambiguously decode the singleton, we need to
-  // match the remaining undecoded encoding bits against the singleton.
-  void recurse();
-
-  // Emit code to decode instructions given a segment or segments of bits.
-  void emit(raw_ostream &o, unsigned &Indentation);
-
-  // Returns the number of fanout produced by the filter.  More fanout implies
-  // the filter distinguishes more categories of instructions.
-  unsigned usefulness() const;
-}; // End of class Filter
-
-// These are states of our finite state machines used in FilterChooser's
-// filterProcessor() which produces the filter candidates to use.
-typedef enum {
-  ATTR_NONE,
-  ATTR_FILTERED,
-  ATTR_ALL_SET,
-  ATTR_ALL_UNSET,
-  ATTR_MIXED
-} bitAttr_t;
-
-/// ARMFilterChooser - FilterChooser chooses the best filter among a set of Filters
-/// in order to perform the decoding of instructions at the current level.
-///
-/// Decoding proceeds from the top down.  Based on the well-known encoding bits
-/// of instructions available, FilterChooser builds up the possible Filters that
-/// can further the task of decoding by distinguishing among the remaining
-/// candidate instructions.
-///
-/// Once a filter has been chosen, it is called upon to divide the decoding task
-/// into sub-tasks and delegates them to its inferior FilterChoosers for further
-/// processings.
-///
-/// It is useful to think of a Filter as governing the switch stmts of the
-/// decoding tree.  And each case is delegated to an inferior FilterChooser to
-/// decide what further remaining bits to look at.
-class ARMFilterChooser {
-  static TARGET_NAME_t TargetName;
-
-protected:
-  friend class ARMFilter;
-
-  // Vector of codegen instructions to choose our filter.
-  const std::vector<const CodeGenInstruction*> &AllInstructions;
-
-  // Vector of uid's for this filter chooser to work on.
-  const std::vector<unsigned> Opcodes;
-
-  // Vector of candidate filters.
-  std::vector<ARMFilter> Filters;
-
-  // Array of bit values passed down from our parent.
-  // Set to all BIT_UNFILTERED's for Parent == NULL.
-  bit_value_t FilterBitValues[BIT_WIDTH];
-
-  // Links to the FilterChooser above us in the decoding tree.
-  ARMFilterChooser *Parent;
-  
-  // Index of the best filter from Filters.
-  int BestIndex;
-
-public:
-  static void setTargetName(TARGET_NAME_t tn) { TargetName = tn; }
-
-  ARMFilterChooser(const ARMFilterChooser &FC) :
-      AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
-      Filters(FC.Filters), Parent(FC.Parent), BestIndex(FC.BestIndex) {
-    memcpy(FilterBitValues, FC.FilterBitValues, sizeof(FilterBitValues));
-  }
-
-  ARMFilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
-                const std::vector<unsigned> &IDs) :
-      AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(NULL),
-      BestIndex(-1) {
-    for (unsigned i = 0; i < BIT_WIDTH; ++i)
-      FilterBitValues[i] = BIT_UNFILTERED;
-
-    doFilter();
-  }
-
-  ARMFilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
-                   const std::vector<unsigned> &IDs,
-                   bit_value_t (&ParentFilterBitValues)[BIT_WIDTH],
-                   ARMFilterChooser &parent) :
-      AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(&parent),
-      BestIndex(-1) {
-    for (unsigned i = 0; i < BIT_WIDTH; ++i)
-      FilterBitValues[i] = ParentFilterBitValues[i];
-
-    doFilter();
-  }
-
-  // The top level filter chooser has NULL as its parent.
-  bool isTopLevel() { return Parent == NULL; }
-
-  // This provides an opportunity for target specific code emission.
-  void emitTopHook(raw_ostream &o);
-
-  // Emit the top level typedef and decodeInstruction() function.
-  void emitTop(raw_ostream &o, unsigned &Indentation);
-
-  // This provides an opportunity for target specific code emission after
-  // emitTop().
-  void emitBot(raw_ostream &o, unsigned &Indentation);
-
-protected:
-  // Populates the insn given the uid.
-  void insnWithID(insn_t &Insn, unsigned Opcode) const {
-    if (AllInstructions[Opcode]->isPseudo)
-      return;
-
-    BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
-
-    for (unsigned i = 0; i < BIT_WIDTH; ++i)
-      Insn[i] = bitFromBits(Bits, i);
-
-    // Set Inst{21} to 1 (wback) when IndexModeBits == IndexModeUpd.
-    Record *R = AllInstructions[Opcode]->TheDef;
-    if (R->getValue("IndexModeBits") &&
-        getByteField(*R, "IndexModeBits") == IndexModeUpd)
-      Insn[21] = BIT_TRUE;
-  }
-
-  // Returns the record name.
-  const std::string &nameWithID(unsigned Opcode) const {
-    return AllInstructions[Opcode]->TheDef->getName();
-  }
-
-  // Populates the field of the insn given the start position and the number of
-  // consecutive bits to scan for.
-  //
-  // Returns false if there exists any uninitialized bit value in the range.
-  // Returns true, otherwise.
-  bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit,
-      unsigned NumBits) const;
-
-  /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
-  /// filter array as a series of chars.
-  void dumpFilterArray(raw_ostream &o, bit_value_t (&filter)[BIT_WIDTH]);
-
-  /// dumpStack - dumpStack traverses the filter chooser chain and calls
-  /// dumpFilterArray on each filter chooser up to the top level one.
-  void dumpStack(raw_ostream &o, const char *prefix);
-
-  ARMFilter &bestFilter() {
-    assert(BestIndex != -1 && "BestIndex not set");
-    return Filters[BestIndex];
-  }
-
-  // Called from Filter::recurse() when singleton exists.  For debug purpose.
-  void SingletonExists(unsigned Opc);
-
-  bool PositionFiltered(unsigned i) {
-    return ValueSet(FilterBitValues[i]);
-  }
-
-  // Calculates the island(s) needed to decode the instruction.
-  // This returns a lit of undecoded bits of an instructions, for example,
-  // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
-  // decoded bits in order to verify that the instruction matches the Opcode.
-  unsigned getIslands(std::vector<unsigned> &StartBits,
-      std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
-      insn_t &Insn);
-
-  // The purpose of this function is for the API client to detect possible
-  // Load/Store Coprocessor instructions.  If the coprocessor number is of
-  // the instruction is either 10 or 11, the decoder should not report the
-  // instruction as LDC/LDC2/STC/STC2, but should match against Advanced SIMD or
-  // VFP instructions.
-  bool LdStCopEncoding1(unsigned Opc) {
-    const std::string &Name = nameWithID(Opc);
-    if (Name == "LDC_OFFSET" || Name == "LDC_OPTION" ||
-        Name == "LDC_POST" || Name == "LDC_PRE" ||
-        Name == "LDCL_OFFSET" || Name == "LDCL_OPTION" ||
-        Name == "LDCL_POST" || Name == "LDCL_PRE" ||
-        Name == "STC_OFFSET" || Name == "STC_OPTION" ||
-        Name == "STC_POST" || Name == "STC_PRE" ||
-        Name == "STCL_OFFSET" || Name == "STCL_OPTION" ||
-        Name == "STCL_POST" || Name == "STCL_PRE")
-      return true;
-    else
-      return false;
-  }
-
-  // Emits code to decode the singleton.  Return true if we have matched all the
-  // well-known bits.
-  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,unsigned Opc);
-
-  // Emits code to decode the singleton, and then to decode the rest.
-  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                            ARMFilter &Best);
-
-  // Assign a single filter and run with it.
-  void runSingleFilter(ARMFilterChooser &owner, unsigned startBit,
-                       unsigned numBit, bool mixed);
-
-  // reportRegion is a helper function for filterProcessor to mark a region as
-  // eligible for use as a filter region.
-  void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
-      bool AllowMixed);
-
-  // FilterProcessor scans the well-known encoding bits of the instructions and
-  // builds up a list of candidate filters.  It chooses the best filter and
-  // recursively descends down the decoding tree.
-  bool filterProcessor(bool AllowMixed, bool Greedy = true);
-
-  // Decides on the best configuration of filter(s) to use in order to decode
-  // the instructions.  A conflict of instructions may occur, in which case we
-  // dump the conflict set to the standard error.
-  void doFilter();
-
-  // Emits code to decode our share of instructions.  Returns true if the
-  // emitted code causes a return, which occurs if we know how to decode
-  // the instruction at this level or the instruction is not decodeable.
-  bool emit(raw_ostream &o, unsigned &Indentation);
-};
-
-///////////////////////////
-//                       //
-// Filter Implmenetation //
-//                       //
-///////////////////////////
-
-ARMFilter::ARMFilter(const ARMFilter &f) :
-  Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
-  FilteredInstructions(f.FilteredInstructions),
-  VariableInstructions(f.VariableInstructions),
-  FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered),
-  LastOpcFiltered(f.LastOpcFiltered), NumVariable(f.NumVariable) {
-}
-
-ARMFilter::ARMFilter(ARMFilterChooser &owner, unsigned startBit, unsigned numBits,
-    bool mixed) : Owner(&owner), StartBit(startBit), NumBits(numBits),
-                  Mixed(mixed) {
-  assert(StartBit + NumBits - 1 < BIT_WIDTH);
-
-  NumFiltered = 0;
-  LastOpcFiltered = 0;
-  NumVariable = 0;
-
-  for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
-    insn_t Insn;
-
-    // Populates the insn given the uid.
-    Owner->insnWithID(Insn, Owner->Opcodes[i]);
-
-    uint64_t Field;
-    // Scans the segment for possibly well-specified encoding bits.
-    bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits);
-
-    if (ok) {
-      // The encoding bits are well-known.  Lets add the uid of the
-      // instruction into the bucket keyed off the constant field value.
-      LastOpcFiltered = Owner->Opcodes[i];
-      FilteredInstructions[Field].push_back(LastOpcFiltered);
-      ++NumFiltered;
-    } else {
-      // Some of the encoding bit(s) are unspecfied.  This contributes to
-      // one additional member of "Variable" instructions.
-      VariableInstructions.push_back(Owner->Opcodes[i]);
-      ++NumVariable;
-    }
-  }
-
-  assert((FilteredInstructions.size() + VariableInstructions.size() > 0)
-         && "Filter returns no instruction categories");
-}
-
-ARMFilter::~ARMFilter() {
-  std::map<unsigned, ARMFilterChooser*>::iterator filterIterator;
-  for (filterIterator = FilterChooserMap.begin();
-       filterIterator != FilterChooserMap.end();
-       filterIterator++) {
-    delete filterIterator->second;
-  }
-}
-
-// Divides the decoding task into sub tasks and delegates them to the
-// inferior FilterChooser's.
-//
-// A special case arises when there's only one entry in the filtered
-// instructions.  In order to unambiguously decode the singleton, we need to
-// match the remaining undecoded encoding bits against the singleton.
-void ARMFilter::recurse() {
-  std::map<uint64_t, std::vector<unsigned> >::const_iterator mapIterator;
-
-  bit_value_t BitValueArray[BIT_WIDTH];
-  // Starts by inheriting our parent filter chooser's filter bit values.
-  memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray));
-
-  unsigned bitIndex;
-
-  if (VariableInstructions.size()) {
-    // Conservatively marks each segment position as BIT_UNSET.
-    for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
-      BitValueArray[StartBit + bitIndex] = BIT_UNSET;
-
-    // Delegates to an inferior filter chooser for further processing on this
-    // group of instructions whose segment values are variable.
-    FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>(
-                              (unsigned)-1,
-                              new ARMFilterChooser(Owner->AllInstructions,
-                                                   VariableInstructions,
-                                                   BitValueArray,
-                                                   *Owner)
-                              ));
-  }
-
-  // No need to recurse for a singleton filtered instruction.
-  // See also Filter::emit().
-  if (getNumFiltered() == 1) {
-    //Owner->SingletonExists(LastOpcFiltered);
-    assert(FilterChooserMap.size() == 1);
-    return;
-  }
-
-  // Otherwise, create sub choosers.
-  for (mapIterator = FilteredInstructions.begin();
-       mapIterator != FilteredInstructions.end();
-       mapIterator++) {
-
-    // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
-    for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
-      if (mapIterator->first & (1ULL << bitIndex))
-        BitValueArray[StartBit + bitIndex] = BIT_TRUE;
-      else
-        BitValueArray[StartBit + bitIndex] = BIT_FALSE;
-    }
-
-    // Delegates to an inferior filter chooser for further processing on this
-    // category of instructions.
-    FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>(
-                              mapIterator->first,
-                              new ARMFilterChooser(Owner->AllInstructions,
-                                                   mapIterator->second,
-                                                   BitValueArray,
-                                                   *Owner)
-                              ));
-  }
-}
-
-// Emit code to decode instructions given a segment or segments of bits.
-void ARMFilter::emit(raw_ostream &o, unsigned &Indentation) {
-  o.indent(Indentation) << "// Check Inst{";
-
-  if (NumBits > 1)
-    o << (StartBit + NumBits - 1) << '-';
-
-  o << StartBit << "} ...\n";
-
-  o.indent(Indentation) << "switch (fieldFromInstruction(insn, "
-                        << StartBit << ", " << NumBits << ")) {\n";
-
-  std::map<unsigned, ARMFilterChooser*>::iterator filterIterator;
-
-  bool DefaultCase = false;
-  for (filterIterator = FilterChooserMap.begin();
-       filterIterator != FilterChooserMap.end();
-       filterIterator++) {
-
-    // Field value -1 implies a non-empty set of variable instructions.
-    // See also recurse().
-    if (filterIterator->first == (unsigned)-1) {
-      DefaultCase = true;
-
-      o.indent(Indentation) << "default:\n";
-      o.indent(Indentation) << "  break; // fallthrough\n";
-
-      // Closing curly brace for the switch statement.
-      // This is unconventional because we want the default processing to be
-      // performed for the fallthrough cases as well, i.e., when the "cases"
-      // did not prove a decoded instruction.
-      o.indent(Indentation) << "}\n";
-
-    } else
-      o.indent(Indentation) << "case " << filterIterator->first << ":\n";
-
-    // We arrive at a category of instructions with the same segment value.
-    // Now delegate to the sub filter chooser for further decodings.
-    // The case may fallthrough, which happens if the remaining well-known
-    // encoding bits do not match exactly.
-    if (!DefaultCase) { ++Indentation; ++Indentation; }
-
-    bool finished = filterIterator->second->emit(o, Indentation);
-    // For top level default case, there's no need for a break statement.
-    if (Owner->isTopLevel() && DefaultCase)
-      break;
-    if (!finished)
-      o.indent(Indentation) << "break;\n";
-
-    if (!DefaultCase) { --Indentation; --Indentation; }
-  }
-
-  // If there is no default case, we still need to supply a closing brace.
-  if (!DefaultCase) {
-    // Closing curly brace for the switch statement.
-    o.indent(Indentation) << "}\n";
-  }
-}
-
-// Returns the number of fanout produced by the filter.  More fanout implies
-// the filter distinguishes more categories of instructions.
-unsigned ARMFilter::usefulness() const {
-  if (VariableInstructions.size())
-    return FilteredInstructions.size();
-  else
-    return FilteredInstructions.size() + 1;
-}
-
-//////////////////////////////////
-//                              //
-// Filterchooser Implementation //
-//                              //
-//////////////////////////////////
-
-// Define the symbol here.
-TARGET_NAME_t ARMFilterChooser::TargetName;
-
-// This provides an opportunity for target specific code emission.
-void ARMFilterChooser::emitTopHook(raw_ostream &o) {
-  if (TargetName == TARGET_ARM) {
-    // Emit code that references the ARMFormat data type.
-    o << "static const ARMFormat ARMFormats[] = {\n";
-    for (unsigned i = 0, e = AllInstructions.size(); i != e; ++i) {
-      const Record &Def = *(AllInstructions[i]->TheDef);
-      const std::string &Name = Def.getName();
-      if (Def.isSubClassOf("InstARM") || Def.isSubClassOf("InstThumb"))
-        o.indent(2) << 
-          stringForARMFormat((ARMFormat)getByteField(Def, "Form"));
-      else
-        o << "  ARM_FORMAT_NA";
-
-      o << ",\t// Inst #" << i << " = " << Name << '\n';
-    }
-    o << "  ARM_FORMAT_NA\t// Unreachable.\n";
-    o << "};\n\n";
-  }
-}
-
-// Emit the top level typedef and decodeInstruction() function.
-void ARMFilterChooser::emitTop(raw_ostream &o, unsigned &Indentation) {
-  // Run the target specific emit hook.
-  emitTopHook(o);
-
-  switch (BIT_WIDTH) {
-  case 8:
-    o.indent(Indentation) << "typedef uint8_t field_t;\n";
-    break;
-  case 16:
-    o.indent(Indentation) << "typedef uint16_t field_t;\n";
-    break;
-  case 32:
-    o.indent(Indentation) << "typedef uint32_t field_t;\n";
-    break;
-  case 64:
-    o.indent(Indentation) << "typedef uint64_t field_t;\n";
-    break;
-  default:
-    assert(0 && "Unexpected instruction size!");
-  }
-
-  o << '\n';
-
-  o.indent(Indentation) << "static field_t " <<
-    "fieldFromInstruction(field_t insn, unsigned startBit, unsigned numBits)\n";
-
-  o.indent(Indentation) << "{\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "assert(startBit + numBits <= " << BIT_WIDTH
-                        << " && \"Instruction field out of bounds!\");\n";
-  o << '\n';
-  o.indent(Indentation) << "field_t fieldMask;\n";
-  o << '\n';
-  o.indent(Indentation) << "if (numBits == " << BIT_WIDTH << ")\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "fieldMask = (field_t)-1;\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "else\n";
-
-  ++Indentation; ++Indentation;
-  o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n";
-  --Indentation; --Indentation;
-
-  o << '\n';
-  o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "}\n";
-
-  o << '\n';
-
-  o.indent(Indentation) <<"static uint16_t decodeInstruction(field_t insn) {\n";
-
-  ++Indentation; ++Indentation;
-  // Emits code to decode the instructions.
-  emit(o, Indentation);
-
-  o << '\n';
-  o.indent(Indentation) << "return 0;\n";
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "}\n";
-
-  o << '\n';
-}
-
-// This provides an opportunity for target specific code emission after
-// emitTop().
-void ARMFilterChooser::emitBot(raw_ostream &o, unsigned &Indentation) {
-  if (TargetName != TARGET_THUMB) return;
-
-  // Emit code that decodes the Thumb ISA.
-  o.indent(Indentation)
-    << "static uint16_t decodeThumbInstruction(field_t insn) {\n";
-
-  ++Indentation; ++Indentation;
-
-  // Emits code to decode the instructions.
-  emit(o, Indentation);
-
-  o << '\n';
-  o.indent(Indentation) << "return 0;\n";
-
-  --Indentation; --Indentation;
-
-  o.indent(Indentation) << "}\n";
-}
-
-// Populates the field of the insn given the start position and the number of
-// consecutive bits to scan for.
-//
-// Returns false if and on the first uninitialized bit value encountered.
-// Returns true, otherwise.
-bool ARMFilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
-    unsigned StartBit, unsigned NumBits) const {
-  Field = 0;
-
-  for (unsigned i = 0; i < NumBits; ++i) {
-    if (Insn[StartBit + i] == BIT_UNSET)
-      return false;
-
-    if (Insn[StartBit + i] == BIT_TRUE)
-      Field = Field | (1ULL << i);
-  }
-
-  return true;
-}
-
-/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
-/// filter array as a series of chars.
-void ARMFilterChooser::dumpFilterArray(raw_ostream &o,
-    bit_value_t (&filter)[BIT_WIDTH]) {
-  unsigned bitIndex;
-
-  for (bitIndex = BIT_WIDTH; bitIndex > 0; bitIndex--) {
-    switch (filter[bitIndex - 1]) {
-    case BIT_UNFILTERED:
-      o << ".";
-      break;
-    case BIT_UNSET:
-      o << "_";
-      break;
-    case BIT_TRUE:
-      o << "1";
-      break;
-    case BIT_FALSE:
-      o << "0";
-      break;
-    }
-  }
-}
-
-/// dumpStack - dumpStack traverses the filter chooser chain and calls
-/// dumpFilterArray on each filter chooser up to the top level one.
-void ARMFilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
-  ARMFilterChooser *current = this;
-
-  while (current) {
-    o << prefix;
-    dumpFilterArray(o, current->FilterBitValues);
-    o << '\n';
-    current = current->Parent;
-  }
-}
-
-// Called from Filter::recurse() when singleton exists.  For debug purpose.
-void ARMFilterChooser::SingletonExists(unsigned Opc) {
-  insn_t Insn0;
-  insnWithID(Insn0, Opc);
-
-  errs() << "Singleton exists: " << nameWithID(Opc)
-         << " with its decoding dominating ";
-  for (unsigned i = 0; i < Opcodes.size(); ++i) {
-    if (Opcodes[i] == Opc) continue;
-    errs() << nameWithID(Opcodes[i]) << ' ';
-  }
-  errs() << '\n';
-
-  dumpStack(errs(), "\t\t");
-  for (unsigned i = 0; i < Opcodes.size(); i++) {
-    const std::string &Name = nameWithID(Opcodes[i]);
-
-    errs() << '\t' << Name << " ";
-    dumpBits(errs(),
-             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
-    errs() << '\n';
-  }
-}
-
-// Calculates the island(s) needed to decode the instruction.
-// This returns a list of undecoded bits of an instructions, for example,
-// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
-// decoded bits in order to verify that the instruction matches the Opcode.
-unsigned ARMFilterChooser::getIslands(std::vector<unsigned> &StartBits,
-    std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
-    insn_t &Insn) {
-  unsigned Num, BitNo;
-  Num = BitNo = 0;
-
-  uint64_t FieldVal = 0;
-
-  // 0: Init
-  // 1: Water (the bit value does not affect decoding)
-  // 2: Island (well-known bit value needed for decoding)
-  int State = 0;
-  int Val = -1;
-
-  for (unsigned i = 0; i < BIT_WIDTH; ++i) {
-    Val = Value(Insn[i]);
-    bool Filtered = PositionFiltered(i);
-    switch (State) {
-    default:
-      assert(0 && "Unreachable code!");
-      break;
-    case 0:
-    case 1:
-      if (Filtered || Val == -1)
-        State = 1; // Still in Water
-      else {
-        State = 2; // Into the Island
-        BitNo = 0;
-        StartBits.push_back(i);
-        FieldVal = Val;
-      }
-      break;
-    case 2:
-      if (Filtered || Val == -1) {
-        State = 1; // Into the Water
-        EndBits.push_back(i - 1);
-        FieldVals.push_back(FieldVal);
-        ++Num;
-      } else {
-        State = 2; // Still in Island
-        ++BitNo;
-        FieldVal = FieldVal | Val << BitNo;
-      }
-      break;
-    }
-  }
-  // If we are still in Island after the loop, do some housekeeping.
-  if (State == 2) {
-    EndBits.push_back(BIT_WIDTH - 1);
-    FieldVals.push_back(FieldVal);
-    ++Num;
-  }
-
-  assert(StartBits.size() == Num && EndBits.size() == Num &&
-         FieldVals.size() == Num);
-  return Num;
-}
-
-// Emits code to decode the singleton.  Return true if we have matched all the
-// well-known bits.
-bool ARMFilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                                         unsigned Opc) {
-  std::vector<unsigned> StartBits;
-  std::vector<unsigned> EndBits;
-  std::vector<uint64_t> FieldVals;
-  insn_t Insn;
-  insnWithID(Insn, Opc);
-
-  // This provides a good opportunity to check for possible Ld/St Coprocessor
-  // Opcode and escapes if the coproc # is either 10 or 11.  It is a NEON/VFP
-  // instruction is disguise.
-  if (TargetName == TARGET_ARM && LdStCopEncoding1(Opc)) {
-    o.indent(Indentation);
-    // A8.6.51 & A8.6.188
-    // If coproc = 0b101?, i.e, slice(insn, 11, 8) = 10 or 11, escape.
-    o << "if (fieldFromInstruction(insn, 9, 3) == 5) break; // fallthrough\n";
-  }
-
-  // Look for islands of undecoded bits of the singleton.
-  getIslands(StartBits, EndBits, FieldVals, Insn);
-
-  unsigned Size = StartBits.size();
-  unsigned I, NumBits;
-
-  // If we have matched all the well-known bits, just issue a return.
-  if (Size == 0) {
-    o.indent(Indentation) << "return " << Opc << "; // " << nameWithID(Opc)
-                          << '\n';
-    return true;
-  }
-
-  // Otherwise, there are more decodings to be done!
-
-  // Emit code to match the island(s) for the singleton.
-  o.indent(Indentation) << "// Check ";
-
-  for (I = Size; I != 0; --I) {
-    o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} ";
-    if (I > 1)
-      o << "&& ";
-    else
-      o << "for singleton decoding...\n";
-  }
-
-  o.indent(Indentation) << "if (";
-
-  for (I = Size; I != 0; --I) {
-    NumBits = EndBits[I-1] - StartBits[I-1] + 1;
-    o << "fieldFromInstruction(insn, " << StartBits[I-1] << ", " << NumBits
-      << ") == " << FieldVals[I-1];
-    if (I > 1)
-      o << " && ";
-    else
-      o << ")\n";
-  }
-
-  o.indent(Indentation) << "  return " << Opc << "; // " << nameWithID(Opc)
-                        << '\n';
-
-  return false;
-}
-
-// Emits code to decode the singleton, and then to decode the rest.
-void ARMFilterChooser::emitSingletonDecoder(raw_ostream &o,
-                                            unsigned &Indentation,
-                                            ARMFilter &Best) {
-
-  unsigned Opc = Best.getSingletonOpc();
-
-  emitSingletonDecoder(o, Indentation, Opc);
-
-  // Emit code for the rest.
-  o.indent(Indentation) << "else\n";
-
-  Indentation += 2;
-  Best.getVariableFC().emit(o, Indentation);
-  Indentation -= 2;
-}
-
-// Assign a single filter and run with it.  Top level API client can initialize
-// with a single filter to start the filtering process.
-void ARMFilterChooser::runSingleFilter(ARMFilterChooser &owner,
-                                       unsigned startBit,
-                                       unsigned numBit, bool mixed) {
-  Filters.clear();
-  ARMFilter F(*this, startBit, numBit, true);
-  Filters.push_back(F);
-  BestIndex = 0; // Sole Filter instance to choose from.
-  bestFilter().recurse();
-}
-
-// reportRegion is a helper function for filterProcessor to mark a region as
-// eligible for use as a filter region.
-void ARMFilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
-                                    unsigned BitIndex, bool AllowMixed) {
-  if (RA == ATTR_MIXED && AllowMixed)
-    Filters.push_back(ARMFilter(*this, StartBit, BitIndex - StartBit, true));   
-  else if (RA == ATTR_ALL_SET && !AllowMixed)
-    Filters.push_back(ARMFilter(*this, StartBit, BitIndex - StartBit, false));
-}
-
-// FilterProcessor scans the well-known encoding bits of the instructions and
-// builds up a list of candidate filters.  It chooses the best filter and
-// recursively descends down the decoding tree.
-bool ARMFilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
-  Filters.clear();
-  BestIndex = -1;
-  unsigned numInstructions = Opcodes.size();
-
-  assert(numInstructions && "Filter created with no instructions");
-
-  // No further filtering is necessary.
-  if (numInstructions == 1)
-    return true;
-
-  // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
-  // instructions is 3.
-  if (AllowMixed && !Greedy) {
-    assert(numInstructions == 3);
-
-    for (unsigned i = 0; i < Opcodes.size(); ++i) {
-      std::vector<unsigned> StartBits;
-      std::vector<unsigned> EndBits;
-      std::vector<uint64_t> FieldVals;
-      insn_t Insn;
-
-      insnWithID(Insn, Opcodes[i]);
-
-      // Look for islands of undecoded bits of any instruction.
-      if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
-        // Found an instruction with island(s).  Now just assign a filter.
-        runSingleFilter(*this, StartBits[0], EndBits[0] - StartBits[0] + 1,
-                        true);
-        return true;
-      }
-    }
-  }
-
-  unsigned BitIndex, InsnIndex;
-
-  // We maintain BIT_WIDTH copies of the bitAttrs automaton.
-  // The automaton consumes the corresponding bit from each
-  // instruction.
-  //
-  //   Input symbols: 0, 1, and _ (unset).
-  //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
-  //   Initial state: NONE.
-  //
-  // (NONE) ------- [01] -> (ALL_SET)
-  // (NONE) ------- _ ----> (ALL_UNSET)
-  // (ALL_SET) ---- [01] -> (ALL_SET)
-  // (ALL_SET) ---- _ ----> (MIXED)
-  // (ALL_UNSET) -- [01] -> (MIXED)
-  // (ALL_UNSET) -- _ ----> (ALL_UNSET)
-  // (MIXED) ------ . ----> (MIXED)
-  // (FILTERED)---- . ----> (FILTERED)
-
-  bitAttr_t bitAttrs[BIT_WIDTH];
-
-  // FILTERED bit positions provide no entropy and are not worthy of pursuing.
-  // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
-  for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex)
-    if (FilterBitValues[BitIndex] == BIT_TRUE ||
-        FilterBitValues[BitIndex] == BIT_FALSE)
-      bitAttrs[BitIndex] = ATTR_FILTERED;
-    else
-      bitAttrs[BitIndex] = ATTR_NONE;
-
-  for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
-    insn_t insn;
-
-    insnWithID(insn, Opcodes[InsnIndex]);
-
-    for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex) {
-      switch (bitAttrs[BitIndex]) {
-      case ATTR_NONE:
-        if (insn[BitIndex] == BIT_UNSET)
-          bitAttrs[BitIndex] = ATTR_ALL_UNSET;
-        else
-          bitAttrs[BitIndex] = ATTR_ALL_SET;
-        break;
-      case ATTR_ALL_SET:
-        if (insn[BitIndex] == BIT_UNSET)
-          bitAttrs[BitIndex] = ATTR_MIXED;
-        break;
-      case ATTR_ALL_UNSET:
-        if (insn[BitIndex] != BIT_UNSET)
-          bitAttrs[BitIndex] = ATTR_MIXED;
-        break;
-      case ATTR_MIXED:
-      case ATTR_FILTERED:
-        break;
-      }
-    }
-  }
-
-  // The regionAttr automaton consumes the bitAttrs automatons' state,
-  // lowest-to-highest.
-  //
-  //   Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
-  //   States:        NONE, ALL_SET, MIXED
-  //   Initial state: NONE
-  //
-  // (NONE) ----- F --> (NONE)
-  // (NONE) ----- S --> (ALL_SET)     ; and set region start
-  // (NONE) ----- U --> (NONE)
-  // (NONE) ----- M --> (MIXED)       ; and set region start
-  // (ALL_SET) -- F --> (NONE)        ; and report an ALL_SET region
-  // (ALL_SET) -- S --> (ALL_SET)
-  // (ALL_SET) -- U --> (NONE)        ; and report an ALL_SET region
-  // (ALL_SET) -- M --> (MIXED)       ; and report an ALL_SET region
-  // (MIXED) ---- F --> (NONE)        ; and report a MIXED region
-  // (MIXED) ---- S --> (ALL_SET)     ; and report a MIXED region
-  // (MIXED) ---- U --> (NONE)        ; and report a MIXED region
-  // (MIXED) ---- M --> (MIXED)
-
-  bitAttr_t RA = ATTR_NONE;
-  unsigned StartBit = 0;
-
-  for (BitIndex = 0; BitIndex < BIT_WIDTH; BitIndex++) {
-    bitAttr_t bitAttr = bitAttrs[BitIndex];
-
-    assert(bitAttr != ATTR_NONE && "Bit without attributes");
-
-    switch (RA) {
-    case ATTR_NONE:
-      switch (bitAttr) {
-      case ATTR_FILTERED:
-        break;
-      case ATTR_ALL_SET:
-        StartBit = BitIndex;
-        RA = ATTR_ALL_SET;
-        break;
-      case ATTR_ALL_UNSET:
-        break;
-      case ATTR_MIXED:
-        StartBit = BitIndex;
-        RA = ATTR_MIXED;
-        break;
-      default:
-        assert(0 && "Unexpected bitAttr!");
-      }
-      break;
-    case ATTR_ALL_SET:
-      switch (bitAttr) {
-      case ATTR_FILTERED:
-        reportRegion(RA, StartBit, BitIndex, AllowMixed);
-        RA = ATTR_NONE;
-        break;
-      case ATTR_ALL_SET:
-        break;
-      case ATTR_ALL_UNSET:
-        reportRegion(RA, StartBit, BitIndex, AllowMixed);
-        RA = ATTR_NONE;
-        break;
-      case ATTR_MIXED:
-        reportRegion(RA, StartBit, BitIndex, AllowMixed);
-        StartBit = BitIndex;
-        RA = ATTR_MIXED;
-        break;
-      default:
-        assert(0 && "Unexpected bitAttr!");
-      }
-      break;
-    case ATTR_MIXED:
-      switch (bitAttr) {
-      case ATTR_FILTERED:
-        reportRegion(RA, StartBit, BitIndex, AllowMixed);
-        StartBit = BitIndex;
-        RA = ATTR_NONE;
-        break;
-      case ATTR_ALL_SET:
-        reportRegion(RA, StartBit, BitIndex, AllowMixed);
-        StartBit = BitIndex;
-        RA = ATTR_ALL_SET;
-        break;
-      case ATTR_ALL_UNSET:
-        reportRegion(RA, StartBit, BitIndex, AllowMixed);
-        RA = ATTR_NONE;
-        break;
-      case ATTR_MIXED:
-        break;
-      default:
-        assert(0 && "Unexpected bitAttr!");
-      }
-      break;
-    case ATTR_ALL_UNSET:
-      assert(0 && "regionAttr state machine has no ATTR_UNSET state");
-    case ATTR_FILTERED:
-      assert(0 && "regionAttr state machine has no ATTR_FILTERED state");
-    }
-  }
-
-  // At the end, if we're still in ALL_SET or MIXED states, report a region
-  switch (RA) {
-  case ATTR_NONE:
-    break;
-  case ATTR_FILTERED:
-    break;
-  case ATTR_ALL_SET:
-    reportRegion(RA, StartBit, BitIndex, AllowMixed);
-    break;
-  case ATTR_ALL_UNSET:
-    break;
-  case ATTR_MIXED:
-    reportRegion(RA, StartBit, BitIndex, AllowMixed);
-    break;
-  }
-
-  // We have finished with the filter processings.  Now it's time to choose
-  // the best performing filter.
-  BestIndex = 0;
-  bool AllUseless = true;
-  unsigned BestScore = 0;
-
-  for (unsigned i = 0, e = Filters.size(); i != e; ++i) {
-    unsigned Usefulness = Filters[i].usefulness();
-
-    if (Usefulness)
-      AllUseless = false;
-
-    if (Usefulness > BestScore) {
-      BestIndex = i;
-      BestScore = Usefulness;
-    }
-  }
-
-  if (!AllUseless)
-    bestFilter().recurse();
-
-  return !AllUseless;
-} // end of FilterChooser::filterProcessor(bool)
-
-// Decides on the best configuration of filter(s) to use in order to decode
-// the instructions.  A conflict of instructions may occur, in which case we
-// dump the conflict set to the standard error.
-void ARMFilterChooser::doFilter() {
-  unsigned Num = Opcodes.size();
-  assert(Num && "FilterChooser created with no instructions");
-
-  // Heuristics: Use Inst{31-28} as the top level filter for ARM ISA.
-  if (TargetName == TARGET_ARM && Parent == NULL) {
-    runSingleFilter(*this, 28, 4, false);
-    return;
-  }
-
-  // Try regions of consecutive known bit values first. 
-  if (filterProcessor(false))
-    return;
-
-  // Then regions of mixed bits (both known and unitialized bit values allowed).
-  if (filterProcessor(true))
-    return;
-
-  // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
-  // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
-  // well-known encoding pattern.  In such case, we backtrack and scan for the
-  // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
-  if (Num == 3 && filterProcessor(true, false))
-    return;
-
-  // If we come to here, the instruction decoding has failed.
-  // Set the BestIndex to -1 to indicate so.
-  BestIndex = -1;
-}
-
-// Emits code to decode our share of instructions.  Returns true if the
-// emitted code causes a return, which occurs if we know how to decode
-// the instruction at this level or the instruction is not decodeable.
-bool ARMFilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
-  if (Opcodes.size() == 1)
-    // There is only one instruction in the set, which is great!
-    // Call emitSingletonDecoder() to see whether there are any remaining
-    // encodings bits.
-    return emitSingletonDecoder(o, Indentation, Opcodes[0]);
-
-  // Choose the best filter to do the decodings!
-  if (BestIndex != -1) {
-    ARMFilter &Best = bestFilter();
-    if (Best.getNumFiltered() == 1)
-      emitSingletonDecoder(o, Indentation, Best);
-    else
-      bestFilter().emit(o, Indentation);
-    return false;
-  }
-
-  // If we reach here, there is a conflict in decoding.  Let's resolve the known
-  // conflicts!
-  if ((TargetName == TARGET_ARM || TargetName == TARGET_THUMB) &&
-      Opcodes.size() == 2) {
-    // Resolve the known conflict sets:
-    //
-    // 1. source registers are identical => VMOVDneon; otherwise => VORRd
-    // 2. source registers are identical => VMOVQ; otherwise => VORRq
-    // 3. LDR, LDRcp => return LDR for now.
-    // FIXME: How can we distinguish between LDR and LDRcp?  Do we need to?
-    // 4. tLDMIA, tLDMIA_UPD => Rn = Inst{10-8}, reglist = Inst{7-0},
-    //    wback = registers<Rn> = 0
-    // NOTE: (tLDM, tLDM_UPD) resolution must come before Advanced SIMD
-    //       addressing mode resolution!!!
-    // 5. VLD[234]LN*/VST[234]LN* vs. VLD[234]LN*_UPD/VST[234]LN*_UPD conflicts
-    //    are resolved returning the non-UPD versions of the instructions if the
-    //    Rm field, i.e., Inst{3-0} is 0b1111.  This is specified in A7.7.1
-    //    Advanced SIMD addressing mode.
-    const std::string &name1 = nameWithID(Opcodes[0]);
-    const std::string &name2 = nameWithID(Opcodes[1]);
-    if ((name1 == "VMOVDneon" && name2 == "VORRd") ||
-        (name1 == "VMOVQ" && name2 == "VORRq")) {
-      // Inserting the opening curly brace for this case block.
-      --Indentation; --Indentation;
-      o.indent(Indentation) << "{\n";
-      ++Indentation; ++Indentation;
-
-      o.indent(Indentation)
-        << "field_t N = fieldFromInstruction(insn, 7, 1), "
-        << "M = fieldFromInstruction(insn, 5, 1);\n";
-      o.indent(Indentation)
-        << "field_t Vn = fieldFromInstruction(insn, 16, 4), "
-        << "Vm = fieldFromInstruction(insn, 0, 4);\n";
-      o.indent(Indentation)
-        << "return (N == M && Vn == Vm) ? "
-        << Opcodes[0] << " /* " << name1 << " */ : "
-        << Opcodes[1] << " /* " << name2 << " */ ;\n";
-
-      // Inserting the closing curly brace for this case block.
-      --Indentation; --Indentation;
-      o.indent(Indentation) << "}\n";
-      ++Indentation; ++Indentation;
-
-      return true;
-    }
-    if (name1 == "LDR" && name2 == "LDRcp") {
-      o.indent(Indentation)
-        << "return " << Opcodes[0]
-        << "; // Returning LDR for {LDR, LDRcp}\n";
-      return true;
-    }
-    if (name1 == "tLDMIA" && name2 == "tLDMIA_UPD") {
-      // Inserting the opening curly brace for this case block.
-      --Indentation; --Indentation;
-      o.indent(Indentation) << "{\n";
-      ++Indentation; ++Indentation;
-      
-      o.indent(Indentation)
-        << "unsigned Rn = fieldFromInstruction(insn, 8, 3), "
-        << "list = fieldFromInstruction(insn, 0, 8);\n";
-      o.indent(Indentation)
-        << "return ((list >> Rn) & 1) == 0 ? "
-        << Opcodes[1] << " /* " << name2 << " */ : "
-        << Opcodes[0] << " /* " << name1 << " */ ;\n";
-
-      // Inserting the closing curly brace for this case block.
-      --Indentation; --Indentation;
-      o.indent(Indentation) << "}\n";
-      ++Indentation; ++Indentation;
-
-      return true;
-    }
-    if (sameStringExceptSuffix(name1, name2, "_UPD")) {
-      o.indent(Indentation)
-        << "return fieldFromInstruction(insn, 0, 4) == 15 ? " << Opcodes[0]
-        << " /* " << name1 << " */ : " << Opcodes[1] << "/* " << name2
-        << " */ ; // Advanced SIMD addressing mode\n";
-      return true;
-    }
-
-    // Otherwise, it does not belong to the known conflict sets.
-  }
-
-  // We don't know how to decode these instructions!  Return 0 and dump the
-  // conflict set!
-  o.indent(Indentation) << "return 0;" << " // Conflict set: ";
-  for (int i = 0, N = Opcodes.size(); i < N; ++i) {
-    o << nameWithID(Opcodes[i]);
-    if (i < (N - 1))
-      o << ", ";
-    else
-      o << '\n';
-  }
-
-  // Print out useful conflict information for postmortem analysis.
-  errs() << "Decoding Conflict:\n";
-
-  dumpStack(errs(), "\t\t");
-
-  for (unsigned i = 0; i < Opcodes.size(); i++) {
-    const std::string &Name = nameWithID(Opcodes[i]);
-
-    errs() << '\t' << Name << " ";
-    dumpBits(errs(),
-             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
-    errs() << '\n';
-  }
-
-  return true;
-}
-
-
-////////////////////////////////////////////
-//                                        //
-//  ARMDEBackend                          //
-//  (Helper class for ARMDecoderEmitter)  //
-//                                        //
-////////////////////////////////////////////
-
-class ARMDecoderEmitter::ARMDEBackend {
-public:
-  ARMDEBackend(ARMDecoderEmitter &frontend, RecordKeeper &Records) :
-    NumberedInstructions(),
-    Opcodes(),
-    Frontend(frontend),
-    Target(Records),
-    FC(NULL)
-  {
-    if (Target.getName() == "ARM")
-      TargetName = TARGET_ARM;
-    else {
-      errs() << "Target name " << Target.getName() << " not recognized\n";
-      assert(0 && "Unknown target");
-    }
-
-    // Populate the instructions for our TargetName.
-    populateInstructions();
-  }
-
-  ~ARMDEBackend() {
-    if (FC) {
-      delete FC;
-      FC = NULL;
-    }
-  }
-
-  void getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
-                                                &NumberedInstructions) {
-    // We must emit the PHI opcode first...
-    std::string Namespace = Target.getInstNamespace();
-    assert(!Namespace.empty() && "No instructions defined.");
-
-    NumberedInstructions = Target.getInstructionsByEnumValue();
-  }
-
-  bool populateInstruction(const CodeGenInstruction &CGI, TARGET_NAME_t TN);
-
-  void populateInstructions();
-
-  // Emits disassembler code for instruction decoding.  This delegates to the
-  // FilterChooser instance to do the heavy lifting.
-  void emit(raw_ostream &o);
-
-protected:
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  std::vector<unsigned> Opcodes;
-  // Special case for the ARM chip, which supports ARM and Thumb ISAs.
-  // Opcodes2 will be populated with the Thumb opcodes.
-  std::vector<unsigned> Opcodes2;
-  ARMDecoderEmitter &Frontend;
-  CodeGenTarget Target;
-  ARMFilterChooser *FC;
-
-  TARGET_NAME_t TargetName;
-};
-
-bool ARMDecoderEmitter::
-ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
-                                  TARGET_NAME_t TN) {
-  const Record &Def = *CGI.TheDef;
-  const StringRef Name = Def.getName();
-  uint8_t Form = getByteField(Def, "Form");
-
-  BitsInit &Bits = getBitsField(Def, "Inst");
-
-  // If all the bit positions are not specified; do not decode this instruction.
-  // We are bound to fail!  For proper disassembly, the well-known encoding bits
-  // of the instruction must be fully specified.
-  //
-  // This also removes pseudo instructions from considerations of disassembly,
-  // which is a better design and less fragile than the name matchings.
-  if (Bits.allInComplete()) return false;
-
-  // Ignore "asm parser only" instructions.
-  if (Def.getValueAsBit("isAsmParserOnly"))
-    return false;
-
-  if (TN == TARGET_ARM) {
-    if (Form == ARM_FORMAT_PSEUDO)
-      return false;
-    if (thumbInstruction(Form))
-      return false;
-
-    // Tail calls are other patterns that generate existing instructions.
-    if (Name == "TCRETURNdi" || Name == "TCRETURNdiND" ||
-        Name == "TCRETURNri" || Name == "TCRETURNriND" ||
-        Name == "TAILJMPd"  || Name == "TAILJMPdt" ||
-        Name == "TAILJMPdND" || Name == "TAILJMPdNDt" ||
-        Name == "TAILJMPr"  || Name == "TAILJMPrND" ||
-        Name == "MOVr_TC")
-      return false;
-
-    // Delegate ADR disassembly to the more generic ADDri/SUBri instructions.
-    if (Name == "ADR")
-      return false;
-
-    //
-    // The following special cases are for conflict resolutions.
-    //
-
-    // A8-598: VEXT
-    // Vector Extract extracts elements from the bottom end of the second
-    // operand vector and the top end of the first, concatenates them and
-    // places the result in the destination vector.  The elements of the
-    // vectors are treated as being 8-bit bitfields.  There is no distinction
-    // between data types.  The size of the operation can be specified in
-    // assembler as vext.size.  If the value is 16, 32, or 64, the syntax is
-    // a pseudo-instruction for a VEXT instruction specifying the equivalent
-    // number of bytes.
-    //
-    // Variants VEXTd16, VEXTd32, VEXTd8, and VEXTdf are reduced to VEXTd8;
-    // variants VEXTq16, VEXTq32, VEXTq8, and VEXTqf are reduced to VEXTq8.
-    if (Name == "VEXTd16" || Name == "VEXTd32" || Name == "VEXTdf" ||
-        Name == "VEXTq16" || Name == "VEXTq32" || Name == "VEXTqf")
-      return false;
-  } else if (TN == TARGET_THUMB) {
-    if (!thumbInstruction(Form))
-      return false;
-
-    // A8.6.25 BX.  Use the generic tBX_Rm, ignore tBX_RET and tBX_RET_vararg.
-    if (Name == "tBX_RET" || Name == "tBX_RET_vararg")
-      return false;
-
-    // Ignore tADR, prefer tADDrPCi.
-    if (Name == "tADR")
-      return false;
-
-    // Delegate t2ADR disassembly to the more generic t2ADDri12/t2SUBri12
-    // instructions.
-    if (Name == "t2ADR")
-      return false;
-
-    // Ignore tADDrSP, tADDspr, and tPICADD, prefer the generic tADDhirr.
-    // Ignore t2SUBrSPs, prefer the t2SUB[S]r[r|s].
-    // Ignore t2ADDrSPs, prefer the t2ADD[S]r[r|s].
-    if (Name == "tADDrSP" || Name == "tADDspr" || Name == "tPICADD" ||
-        Name == "t2SUBrSPs" || Name == "t2ADDrSPs")
-      return false;
-
-    // FIXME: Use ldr.n to work around a Darwin assembler bug.
-    // Introduce a workaround with tLDRpciDIS opcode.
-    if (Name == "tLDRpci")
-      return false;
-
-    // Ignore t2LDRDpci, prefer the generic t2LDRDi8, t2LDRD_PRE, t2LDRD_POST.
-    if (Name == "t2LDRDpci")
-      return false;
-
-    // Resolve conflicts:
-    //
-    //   t2LDMIA_RET conflict with t2LDM (ditto)
-    //   tMOVCCi conflicts with tMOVi8
-    //   tMOVCCr conflicts with tMOVgpr2gpr
-    //   tLDRcp conflicts with tLDRspi
-    //   t2MOVCCi16 conflicts with tMOVi16
-    if (Name == "t2LDMIA_RET" ||
-        Name == "tMOVCCi" || Name == "tMOVCCr" ||
-        Name == "tLDRcp" || 
-        Name == "t2MOVCCi16")
-      return false;
-  }
-
-  DEBUG({
-      // Dumps the instruction encoding format.
-      switch (TargetName) {
-      case TARGET_ARM:
-      case TARGET_THUMB:
-        errs() << Name << " " << stringForARMFormat((ARMFormat)Form);
-        break;
-      }
-
-      errs() << " ";
-
-      // Dumps the instruction encoding bits.
-      dumpBits(errs(), Bits);
-
-      errs() << '\n';
-
-      // Dumps the list of operand info.
-      for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
-        const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
-        const std::string &OperandName = Info.Name;
-        const Record &OperandDef = *Info.Rec;
-
-        errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
-      }
-    });
-
-  return true;
-}
-
-void ARMDecoderEmitter::ARMDEBackend::populateInstructions() {
-  getInstructionsByEnumValue(NumberedInstructions);
-
-  unsigned numUIDs = NumberedInstructions.size();
-  if (TargetName == TARGET_ARM) {
-    for (unsigned uid = 0; uid < numUIDs; uid++) {
-      // filter out intrinsics
-      if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM"))
-        continue;
-
-      if (populateInstruction(*NumberedInstructions[uid], TargetName))
-        Opcodes.push_back(uid);
-    }
-
-    // Special handling for the ARM chip, which supports two modes of execution.
-    // This branch handles the Thumb opcodes.
-    for (unsigned uid = 0; uid < numUIDs; uid++) {
-      // filter out intrinsics
-      if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM")
-          && !NumberedInstructions[uid]->TheDef->isSubClassOf("InstThumb"))
-        continue;
-
-      if (populateInstruction(*NumberedInstructions[uid], TARGET_THUMB))
-        Opcodes2.push_back(uid);
-    }
-
-    return;
-  }
-
-  // For other targets.
-  for (unsigned uid = 0; uid < numUIDs; uid++) {
-    Record *R = NumberedInstructions[uid]->TheDef;
-    if (R->getValueAsString("Namespace") == "TargetOpcode")
-      continue;
-
-    if (populateInstruction(*NumberedInstructions[uid], TargetName))
-      Opcodes.push_back(uid);
-  }
-}
-
-// Emits disassembler code for instruction decoding.  This delegates to the
-// FilterChooser instance to do the heavy lifting.
-void ARMDecoderEmitter::ARMDEBackend::emit(raw_ostream &o) {
-  switch (TargetName) {
-  case TARGET_ARM:
-    Frontend.EmitSourceFileHeader("ARM/Thumb Decoders", o);
-    break;
-  default:
-    assert(0 && "Unreachable code!");
-  }
-
-  o << "#include \"llvm/Support/DataTypes.h\"\n";
-  o << "#include <assert.h>\n";
-  o << '\n';
-  o << "namespace llvm {\n\n";
-
-  ARMFilterChooser::setTargetName(TargetName);
-
-  switch (TargetName) {
-  case TARGET_ARM: {
-    // Emit common utility and ARM ISA decoder.
-    FC = new ARMFilterChooser(NumberedInstructions, Opcodes);
-    // Reset indentation level.
-    unsigned Indentation = 0;
-    FC->emitTop(o, Indentation);
-    delete FC;
-
-    // Emit Thumb ISA decoder as well.
-    ARMFilterChooser::setTargetName(TARGET_THUMB);
-    FC = new ARMFilterChooser(NumberedInstructions, Opcodes2);
-    // Reset indentation level.
-    Indentation = 0;
-    FC->emitBot(o, Indentation);
-    break;
-  }
-  default:
-    assert(0 && "Unreachable code!");
-  }
-
-  o << "\n} // End llvm namespace \n";
-}
-
-/////////////////////////
-//  Backend interface  //
-/////////////////////////
-
-void ARMDecoderEmitter::initBackend()
-{
-  Backend = new ARMDEBackend(*this, Records);
-}
-
-void ARMDecoderEmitter::run(raw_ostream &o)
-{
-  Backend->emit(o);
-}
-
-void ARMDecoderEmitter::shutdownBackend()
-{
-  delete Backend;
-  Backend = NULL;
-}
diff --git a/utils/TableGen/ARMDecoderEmitter.h b/utils/TableGen/ARMDecoderEmitter.h
deleted file mode 100644
index 486f899354f4..000000000000
--- a/utils/TableGen/ARMDecoderEmitter.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===------------ ARMDecoderEmitter.h - Decoder Generator -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is part of the ARM Disassembler.
-// It contains the tablegen backend declaration ARMDecoderEmitter.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARMDECODEREMITTER_H
-#define ARMDECODEREMITTER_H
-
-#include "llvm/Support/DataTypes.h"
-#include "llvm/TableGen/TableGenBackend.h"
-
-namespace llvm {
-
-class ARMDecoderEmitter : public TableGenBackend {
-  RecordKeeper &Records;
-public:
-  ARMDecoderEmitter(RecordKeeper &R) : Records(R) {
-    initBackend();
-  }
-    
-  ~ARMDecoderEmitter() {
-    shutdownBackend();
-  }
-
-  // run - Output the code emitter
-  void run(raw_ostream &o);
-    
-private:
-  // Helper class for ARMDecoderEmitter.
-  class ARMDEBackend;
-
-  ARMDEBackend *Backend;
-    
-  void initBackend();
-  void shutdownBackend();
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 8b86c23d0632..39a3c25d99da 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -99,6 +99,7 @@
 #include "AsmMatcherEmitter.h"
 #include "CodeGenTarget.h"
 #include "StringMatcher.h"
+#include "StringToOffsetTable.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -107,6 +108,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include <map>
@@ -251,12 +253,7 @@ public:
 
     switch (Kind) {
     case Invalid:
-      assert(0 && "Invalid kind!");
-    case Token:
-      // Tokens are comparable by value.
-      //
-      // FIXME: Compare by enum value.
-      return ValueName < RHS.ValueName;
+      llvm_unreachable("Invalid kind!");
 
     default:
       // This class precedes the RHS if it is a proper subset of the RHS.
@@ -287,7 +284,11 @@ struct MatchableInfo {
     /// The suboperand index within SrcOpName, or -1 for the entire operand.
     int SubOpIdx;
 
-    explicit AsmOperand(StringRef T) : Token(T), Class(0), SubOpIdx(-1) {}
+    /// Register record if this token is singleton register.
+    Record *SingletonReg;
+
+    explicit AsmOperand(StringRef T) : Token(T), Class(0), SubOpIdx(-1),
+                                       SingletonReg(0) {}
   };
 
   /// ResOperand - This represents a single operand in the result instruction
@@ -366,6 +367,9 @@ struct MatchableInfo {
     }
   };
 
+  /// AsmVariantID - Target's assembly syntax variant no.
+  int AsmVariantID;
+
   /// TheDef - This is the definition of the instruction or InstAlias that this
   /// matchable came from.
   Record *const TheDef;
@@ -406,24 +410,28 @@ struct MatchableInfo {
   std::string ConversionFnKind;
 
   MatchableInfo(const CodeGenInstruction &CGI)
-    : TheDef(CGI.TheDef), DefRec(&CGI), AsmString(CGI.AsmString) {
+    : AsmVariantID(0), TheDef(CGI.TheDef), DefRec(&CGI),
+      AsmString(CGI.AsmString) {
   }
 
   MatchableInfo(const CodeGenInstAlias *Alias)
-    : TheDef(Alias->TheDef), DefRec(Alias), AsmString(Alias->AsmString) {
+    : AsmVariantID(0), TheDef(Alias->TheDef), DefRec(Alias),
+      AsmString(Alias->AsmString) {
   }
 
   void Initialize(const AsmMatcherInfo &Info,
-                  SmallPtrSet<Record*, 16> &SingletonRegisters);
+                  SmallPtrSet<Record*, 16> &SingletonRegisters,
+                  int AsmVariantNo, std::string &RegisterPrefix);
 
   /// Validate - Return true if this matchable is a valid thing to match against
   /// and perform a bunch of validity checking.
   bool Validate(StringRef CommentDelimiter, bool Hack) const;
 
-  /// getSingletonRegisterForAsmOperand - If the specified token is a singleton
-  /// register, return the Record for it, otherwise return null.
-  Record *getSingletonRegisterForAsmOperand(unsigned i,
-                                            const AsmMatcherInfo &Info) const;
+  /// extractSingletonRegisterForAsmOperand - Extract singleton register,
+  /// if present, from specified token.
+  void
+  extractSingletonRegisterForAsmOperand(unsigned i, const AsmMatcherInfo &Info,
+                                        std::string &RegisterPrefix);
 
   /// FindAsmOperand - Find the AsmOperand with the specified name and
   /// suboperand index.
@@ -557,9 +565,6 @@ public:
   /// Target - The target information.
   CodeGenTarget &Target;
 
-  /// The AsmParser "RegisterPrefix" value.
-  std::string RegisterPrefix;
-
   /// The classes which are needed for matching.
   std::vector<ClassInfo*> Classes;
 
@@ -591,7 +596,8 @@ private:
 
   /// getOperandClass - Lookup or create the class for the given operand.
   ClassInfo *getOperandClass(const CGIOperandList::OperandInfo &OI,
-                             int SubOpIdx = -1);
+                             int SubOpIdx);
+  ClassInfo *getOperandClass(Record *Rec, int SubOpIdx);
 
   /// BuildRegisterClasses - Build the ClassInfo* instances for register
   /// classes.
@@ -645,9 +651,11 @@ void MatchableInfo::dump() {
 }
 
 void MatchableInfo::Initialize(const AsmMatcherInfo &Info,
-                               SmallPtrSet<Record*, 16> &SingletonRegisters) {
-  // TODO: Eventually support asmparser for Variant != 0.
-  AsmString = CodeGenInstruction::FlattenAsmStringVariants(AsmString, 0);
+                               SmallPtrSet<Record*, 16> &SingletonRegisters,
+                               int AsmVariantNo, std::string &RegisterPrefix) {
+  AsmVariantID = AsmVariantNo;
+  AsmString =
+    CodeGenInstruction::FlattenAsmStringVariants(AsmString, AsmVariantNo);
 
   TokenizeAsmString(Info);
 
@@ -660,7 +668,8 @@ void MatchableInfo::Initialize(const AsmMatcherInfo &Info,
 
   // Collect singleton registers, if used.
   for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
-    if (Record *Reg = getSingletonRegisterForAsmOperand(i, Info))
+    extractSingletonRegisterForAsmOperand(i, Info, RegisterPrefix);
+    if (Record *Reg = AsmOperands[i].SingletonReg)
       SingletonRegisters.insert(Reg);
   }
 }
@@ -736,9 +745,12 @@ void MatchableInfo::TokenizeAsmString(const AsmMatcherInfo &Info) {
 
   // The first token of the instruction is the mnemonic, which must be a
   // simple string, not a $foo variable or a singleton register.
-  assert(!AsmOperands.empty() && "Instruction has no tokens?");
+  if (AsmOperands.empty())
+    throw TGError(TheDef->getLoc(),
+                  "Instruction '" + TheDef->getName() + "' has no tokens");
   Mnemonic = AsmOperands[0].Token;
-  if (Mnemonic[0] == '$' || getSingletonRegisterForAsmOperand(0, Info))
+  // FIXME : Check and raise an error if it is a register.
+  if (Mnemonic[0] == '$')
     throw TGError(TheDef->getLoc(),
                   "Invalid instruction mnemonic '" + Mnemonic.str() + "'!");
 
@@ -801,28 +813,30 @@ bool MatchableInfo::Validate(StringRef CommentDelimiter, bool Hack) const {
   return true;
 }
 
-/// getSingletonRegisterForAsmOperand - If the specified token is a singleton
-/// register, return the register name, otherwise return a null StringRef.
-Record *MatchableInfo::
-getSingletonRegisterForAsmOperand(unsigned i, const AsmMatcherInfo &Info) const{
-  StringRef Tok = AsmOperands[i].Token;
-  if (!Tok.startswith(Info.RegisterPrefix))
-    return 0;
+/// extractSingletonRegisterForAsmOperand - Extract singleton register,
+/// if present, from specified token.
+void MatchableInfo::
+extractSingletonRegisterForAsmOperand(unsigned OperandNo,
+                                      const AsmMatcherInfo &Info,
+                                      std::string &RegisterPrefix) {
+  StringRef Tok = AsmOperands[OperandNo].Token;
+  if (RegisterPrefix.empty()) {
+    std::string LoweredTok = Tok.lower();
+    if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok))
+      AsmOperands[OperandNo].SingletonReg = Reg->TheDef;
+    return;
+  }
+
+  if (!Tok.startswith(RegisterPrefix))
+    return;
 
-  StringRef RegName = Tok.substr(Info.RegisterPrefix.size());
+  StringRef RegName = Tok.substr(RegisterPrefix.size());
   if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(RegName))
-    return Reg->TheDef;
+    AsmOperands[OperandNo].SingletonReg = Reg->TheDef;
 
   // If there is no register prefix (i.e. "%" in "%eax"), then this may
   // be some random non-register token, just ignore it.
-  if (Info.RegisterPrefix.empty())
-    return 0;
-
-  // Otherwise, we have something invalid prefixed with the register prefix,
-  // such as %foo.
-  std::string Err = "unable to find register for '" + RegName.str() +
-  "' (which matches register prefix)";
-  throw TGError(TheDef->getLoc(), Err);
+  return;
 }
 
 static std::string getEnumNameForToken(StringRef Str) {
@@ -870,7 +884,11 @@ AsmMatcherInfo::getOperandClass(const CGIOperandList::OperandInfo &OI,
   Record *Rec = OI.Rec;
   if (SubOpIdx != -1)
     Rec = dynamic_cast<DefInit*>(OI.MIOperandInfo->getArg(SubOpIdx))->getDef();
+  return getOperandClass(Rec, SubOpIdx);
+}
 
+ClassInfo *
+AsmMatcherInfo::getOperandClass(Record *Rec, int SubOpIdx) {
   if (Rec->isSubClassOf("RegisterOperand")) {
     // RegisterOperand may have an associated ParserMatchClass. If it does,
     // use it, else just fall back to the underlying register class.
@@ -1102,8 +1120,7 @@ void AsmMatcherInfo::BuildOperandClasses() {
 AsmMatcherInfo::AsmMatcherInfo(Record *asmParser,
                                CodeGenTarget &target,
                                RecordKeeper &records)
-  : Records(records), AsmParser(asmParser), Target(target),
-    RegisterPrefix(AsmParser->getValueAsString("RegisterPrefix")) {
+  : Records(records), AsmParser(asmParser), Target(target) {
 }
 
 /// BuildOperandMatchInfo - Build the necessary information to handle user
@@ -1158,86 +1175,92 @@ void AsmMatcherInfo::BuildInfo() {
     assert(FeatureNo < 32 && "Too many subtarget features!");
   }
 
-  std::string CommentDelimiter = AsmParser->getValueAsString("CommentDelimiter");
-
   // Parse the instructions; we need to do this first so that we can gather the
   // singleton register classes.
   SmallPtrSet<Record*, 16> SingletonRegisters;
-  for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
-       E = Target.inst_end(); I != E; ++I) {
-    const CodeGenInstruction &CGI = **I;
-
-    // If the tblgen -match-prefix option is specified (for tblgen hackers),
-    // filter the set of instructions we consider.
-    if (!StringRef(CGI.TheDef->getName()).startswith(MatchPrefix))
-      continue;
+  unsigned VariantCount = Target.getAsmParserVariantCount();
+  for (unsigned VC = 0; VC != VariantCount; ++VC) {
+    Record *AsmVariant = Target.getAsmParserVariant(VC);
+    std::string CommentDelimiter = AsmVariant->getValueAsString("CommentDelimiter");
+    std::string RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix");
+    int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
+
+    for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
+           E = Target.inst_end(); I != E; ++I) {
+      const CodeGenInstruction &CGI = **I;
+
+      // If the tblgen -match-prefix option is specified (for tblgen hackers),
+      // filter the set of instructions we consider.
+      if (!StringRef(CGI.TheDef->getName()).startswith(MatchPrefix))
+        continue;
 
-    // Ignore "codegen only" instructions.
-    if (CGI.TheDef->getValueAsBit("isCodeGenOnly"))
-      continue;
+      // Ignore "codegen only" instructions.
+      if (CGI.TheDef->getValueAsBit("isCodeGenOnly"))
+        continue;
 
-    // Validate the operand list to ensure we can handle this instruction.
-    for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
-      const CGIOperandList::OperandInfo &OI = CGI.Operands[i];
-
-      // Validate tied operands.
-      if (OI.getTiedRegister() != -1) {
-        // If we have a tied operand that consists of multiple MCOperands,
-        // reject it.  We reject aliases and ignore instructions for now.
-        if (OI.MINumOperands != 1) {
-          // FIXME: Should reject these.  The ARM backend hits this with $lane
-          // in a bunch of instructions. It is unclear what the right answer is.
-          DEBUG({
-            errs() << "warning: '" << CGI.TheDef->getName() << "': "
-            << "ignoring instruction with multi-operand tied operand '"
-            << OI.Name << "'\n";
-          });
-          continue;
+      // Validate the operand list to ensure we can handle this instruction.
+      for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
+        const CGIOperandList::OperandInfo &OI = CGI.Operands[i];
+
+        // Validate tied operands.
+        if (OI.getTiedRegister() != -1) {
+          // If we have a tied operand that consists of multiple MCOperands,
+          // reject it.  We reject aliases and ignore instructions for now.
+          if (OI.MINumOperands != 1) {
+            // FIXME: Should reject these.  The ARM backend hits this with $lane
+            // in a bunch of instructions. It is unclear what the right answer is.
+            DEBUG({
+                errs() << "warning: '" << CGI.TheDef->getName() << "': "
+                       << "ignoring instruction with multi-operand tied operand '"
+                       << OI.Name << "'\n";
+              });
+            continue;
+          }
         }
       }
-    }
 
-    OwningPtr<MatchableInfo> II(new MatchableInfo(CGI));
+      OwningPtr<MatchableInfo> II(new MatchableInfo(CGI));
 
-    II->Initialize(*this, SingletonRegisters);
+      II->Initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
 
-    // Ignore instructions which shouldn't be matched and diagnose invalid
-    // instruction definitions with an error.
-    if (!II->Validate(CommentDelimiter, true))
-      continue;
+      // Ignore instructions which shouldn't be matched and diagnose invalid
+      // instruction definitions with an error.
+      if (!II->Validate(CommentDelimiter, true))
+        continue;
 
-    // Ignore "Int_*" and "*_Int" instructions, which are internal aliases.
-    //
-    // FIXME: This is a total hack.
-    if (StringRef(II->TheDef->getName()).startswith("Int_") ||
-        StringRef(II->TheDef->getName()).endswith("_Int"))
-      continue;
+      // Ignore "Int_*" and "*_Int" instructions, which are internal aliases.
+      //
+      // FIXME: This is a total hack.
+      if (StringRef(II->TheDef->getName()).startswith("Int_") ||
+          StringRef(II->TheDef->getName()).endswith("_Int"))
+        continue;
 
-     Matchables.push_back(II.take());
-  }
+      Matchables.push_back(II.take());
+    }
 
-  // Parse all of the InstAlias definitions and stick them in the list of
-  // matchables.
-  std::vector<Record*> AllInstAliases =
-    Records.getAllDerivedDefinitions("InstAlias");
-  for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) {
-    CodeGenInstAlias *Alias = new CodeGenInstAlias(AllInstAliases[i], Target);
-
-    // If the tblgen -match-prefix option is specified (for tblgen hackers),
-    // filter the set of instruction aliases we consider, based on the target
-    // instruction.
-    if (!StringRef(Alias->ResultInst->TheDef->getName()).startswith(
-          MatchPrefix))
-      continue;
+    // Parse all of the InstAlias definitions and stick them in the list of
+    // matchables.
+    std::vector<Record*> AllInstAliases =
+      Records.getAllDerivedDefinitions("InstAlias");
+    for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) {
+      CodeGenInstAlias *Alias = new CodeGenInstAlias(AllInstAliases[i], Target);
+
+      // If the tblgen -match-prefix option is specified (for tblgen hackers),
+      // filter the set of instruction aliases we consider, based on the target
+      // instruction.
+      if (!StringRef(Alias->ResultInst->TheDef->getName()).startswith(
+                                                                      MatchPrefix))
+        continue;
 
-    OwningPtr<MatchableInfo> II(new MatchableInfo(Alias));
+      OwningPtr<MatchableInfo> II(new MatchableInfo(Alias));
 
-    II->Initialize(*this, SingletonRegisters);
+      II->Initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
 
-    // Validate the alias definitions.
-    II->Validate(CommentDelimiter, false);
+      // Validate the alias definitions.
+      II->Validate(CommentDelimiter, false);
 
-    Matchables.push_back(II.take());
+      Matchables.push_back(II.take());
+    }
   }
 
   // Build info for the register classes.
@@ -1260,7 +1283,7 @@ void AsmMatcherInfo::BuildInfo() {
       StringRef Token = Op.Token;
 
       // Check for singleton registers.
-      if (Record *RegRecord = II->getSingletonRegisterForAsmOperand(i, *this)) {
+      if (Record *RegRecord = II->AsmOperands[i].SingletonReg) {
         Op.Class = RegisterClasses[RegRecord];
         assert(Op.Class && Op.Class->Registers.size() == 1 &&
                "Unexpected class for singleton register");
@@ -1297,6 +1320,17 @@ void AsmMatcherInfo::BuildInfo() {
       II->BuildAliasResultOperands();
   }
 
+  // Process token alias definitions and set up the associated superclass
+  // information.
+  std::vector<Record*> AllTokenAliases =
+    Records.getAllDerivedDefinitions("TokenAlias");
+  for (unsigned i = 0, e = AllTokenAliases.size(); i != e; ++i) {
+    Record *Rec = AllTokenAliases[i];
+    ClassInfo *FromClass = getTokenClass(Rec->getValueAsString("FromToken"));
+    ClassInfo *ToClass = getTokenClass(Rec->getValueAsString("ToToken"));
+    FromClass->SuperClasses.push_back(ToClass);
+  }
+
   // Reorder classes so that classes precede super classes.
   std::sort(Classes.begin(), Classes.end(), less_ptr<ClassInfo>());
 }
@@ -1375,9 +1409,11 @@ void AsmMatcherInfo::BuildAliasOperandReference(MatchableInfo *II,
         CGA.ResultOperands[i].getName() == OperandName) {
       // It's safe to go with the first one we find, because CodeGenInstAlias
       // validates that all operands with the same name have the same record.
-      unsigned ResultIdx = CGA.ResultInstOperandIndex[i].first;
       Op.SubOpIdx = CGA.ResultInstOperandIndex[i].second;
-      Op.Class = getOperandClass(CGA.ResultInst->Operands[ResultIdx],
+      // Use the match class from the Alias definition, not the
+      // destination instruction, as we may have an immediate that's
+      // being munged by the match class.
+      Op.Class = getOperandClass(CGA.ResultOperands[i].getRecord(),
                                  Op.SubOpIdx);
       Op.SrcOpName = OperandName;
       return;
@@ -1453,7 +1489,6 @@ void MatchableInfo::BuildAliasResultOperands() {
       // Find out what operand from the asmparser that this MCInst operand
       // comes from.
       switch (CGA.ResultOperands[AliasOpNo].Kind) {
-      default: assert(0 && "unexpected InstAlias operand kind");
       case CodeGenInstAlias::ResultOperand::K_Record: {
         StringRef Name = CGA.ResultOperands[AliasOpNo].getName();
         int SrcOperand = FindAsmOperand(Name, SubIdx);
@@ -1656,7 +1691,7 @@ static void EmitMatchClassEnumeration(CodeGenTarget &Target,
 /// EmitValidateOperandClass - Emit the function to validate an operand class.
 static void EmitValidateOperandClass(AsmMatcherInfo &Info,
                                      raw_ostream &OS) {
-  OS << "static bool ValidateOperandClass(MCParsedAsmOperand *GOp, "
+  OS << "static bool validateOperandClass(MCParsedAsmOperand *GOp, "
      << "MatchClassKind Kind) {\n";
   OS << "  " << Info.Target.getName() << "Operand &Operand = *("
      << Info.Target.getName() << "Operand*)GOp;\n";
@@ -1667,7 +1702,8 @@ static void EmitValidateOperandClass(AsmMatcherInfo &Info,
 
   // Check for Token operands first.
   OS << "  if (Operand.isToken())\n";
-  OS << "    return MatchTokenString(Operand.getToken()) == Kind;\n\n";
+  OS << "    return isSubclass(matchTokenString(Operand.getToken()), Kind);"
+     << "\n\n";
 
   // Check for register operands, including sub-classes.
   OS << "  if (Operand.isReg()) {\n";
@@ -1681,7 +1717,7 @@ static void EmitValidateOperandClass(AsmMatcherInfo &Info,
        << it->first->getName() << ": OpKind = " << it->second->Name
        << "; break;\n";
   OS << "    }\n";
-  OS << "    return IsSubclass(OpKind, Kind);\n";
+  OS << "    return isSubclass(OpKind, Kind);\n";
   OS << "  }\n\n";
 
   // Check the user classes. We don't care what order since we're only
@@ -1708,8 +1744,8 @@ static void EmitValidateOperandClass(AsmMatcherInfo &Info,
 static void EmitIsSubclass(CodeGenTarget &Target,
                            std::vector<ClassInfo*> &Infos,
                            raw_ostream &OS) {
-  OS << "/// IsSubclass - Compute whether \\arg A is a subclass of \\arg B.\n";
-  OS << "static bool IsSubclass(MatchClassKind A, MatchClassKind B) {\n";
+  OS << "/// isSubclass - Compute whether \\arg A is a subclass of \\arg B.\n";
+  OS << "static bool isSubclass(MatchClassKind A, MatchClassKind B) {\n";
   OS << "  if (A == B)\n";
   OS << "    return true;\n\n";
 
@@ -1720,32 +1756,30 @@ static void EmitIsSubclass(CodeGenTarget &Target,
          ie = Infos.end(); it != ie; ++it) {
     ClassInfo &A = **it;
 
-    if (A.Kind != ClassInfo::Token) {
-      std::vector<StringRef> SuperClasses;
-      for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
-             ie = Infos.end(); it != ie; ++it) {
-        ClassInfo &B = **it;
-
-        if (&A != &B && A.isSubsetOf(B))
-          SuperClasses.push_back(B.Name);
-      }
+    std::vector<StringRef> SuperClasses;
+    for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
+         ie = Infos.end(); it != ie; ++it) {
+      ClassInfo &B = **it;
 
-      if (SuperClasses.empty())
-        continue;
+      if (&A != &B && A.isSubsetOf(B))
+        SuperClasses.push_back(B.Name);
+    }
 
-      OS << "\n  case " << A.Name << ":\n";
+    if (SuperClasses.empty())
+      continue;
 
-      if (SuperClasses.size() == 1) {
-        OS << "    return B == " << SuperClasses.back() << ";\n";
-        continue;
-      }
+    OS << "\n  case " << A.Name << ":\n";
 
-      OS << "    switch (B) {\n";
-      OS << "    default: return false;\n";
-      for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
-        OS << "    case " << SuperClasses[i] << ": return true;\n";
-      OS << "    }\n";
+    if (SuperClasses.size() == 1) {
+      OS << "    return B == " << SuperClasses.back() << ";\n";
+      continue;
     }
+
+    OS << "    switch (B) {\n";
+    OS << "    default: return false;\n";
+    for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
+      OS << "    case " << SuperClasses[i] << ": return true;\n";
+    OS << "    }\n";
   }
   OS << "  }\n";
   OS << "}\n\n";
@@ -1767,7 +1801,7 @@ static void EmitMatchTokenString(CodeGenTarget &Target,
                                                   "return " + CI.Name + ";"));
   }
 
-  OS << "static MatchClassKind MatchTokenString(StringRef Name) {\n";
+  OS << "static MatchClassKind matchTokenString(StringRef Name) {\n";
 
   StringMatcher("Name", Matches, OS).Emit();
 
@@ -1905,7 +1939,7 @@ static bool EmitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info) {
     Info.getRecords().getAllDerivedDefinitions("MnemonicAlias");
   if (Aliases.empty()) return false;
 
-  OS << "static void ApplyMnemonicAliases(StringRef &Mnemonic, "
+  OS << "static void applyMnemonicAliases(StringRef &Mnemonic, "
         "unsigned Features) {\n";
 
   // Keep track of all the aliases from a mnemonic.  Use an std::map so that the
@@ -1975,45 +2009,62 @@ static bool EmitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info) {
   return true;
 }
 
+static const char *getMinimalTypeForRange(uint64_t Range) {
+  assert(Range < 0xFFFFFFFFULL && "Enum too large");
+  if (Range > 0xFFFF)
+    return "uint32_t";
+  if (Range > 0xFF)
+    return "uint16_t";
+  return "uint8_t";
+}
+
 static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
                               const AsmMatcherInfo &Info, StringRef ClassName) {
   // Emit the static custom operand parsing table;
   OS << "namespace {\n";
   OS << "  struct OperandMatchEntry {\n";
-  OS << "    const char *Mnemonic;\n";
-  OS << "    unsigned OperandMask;\n";
-  OS << "    MatchClassKind Class;\n";
-  OS << "    unsigned RequiredFeatures;\n";
+  OS << "    static const char *const MnemonicTable;\n";
+  OS << "    uint32_t OperandMask;\n";
+  OS << "    uint32_t Mnemonic;\n";
+  OS << "    " << getMinimalTypeForRange(1ULL << Info.SubtargetFeatures.size())
+               << " RequiredFeatures;\n";
+  OS << "    " << getMinimalTypeForRange(Info.Classes.size())
+               << " Class;\n\n";
+  OS << "    StringRef getMnemonic() const {\n";
+  OS << "      return StringRef(MnemonicTable + Mnemonic + 1,\n";
+  OS << "                       MnemonicTable[Mnemonic]);\n";
+  OS << "    }\n";
   OS << "  };\n\n";
 
   OS << "  // Predicate for searching for an opcode.\n";
   OS << "  struct LessOpcodeOperand {\n";
   OS << "    bool operator()(const OperandMatchEntry &LHS, StringRef RHS) {\n";
-  OS << "      return StringRef(LHS.Mnemonic) < RHS;\n";
+  OS << "      return LHS.getMnemonic()  < RHS;\n";
   OS << "    }\n";
   OS << "    bool operator()(StringRef LHS, const OperandMatchEntry &RHS) {\n";
-  OS << "      return LHS < StringRef(RHS.Mnemonic);\n";
+  OS << "      return LHS < RHS.getMnemonic();\n";
   OS << "    }\n";
   OS << "    bool operator()(const OperandMatchEntry &LHS,";
   OS << " const OperandMatchEntry &RHS) {\n";
-  OS << "      return StringRef(LHS.Mnemonic) < StringRef(RHS.Mnemonic);\n";
+  OS << "      return LHS.getMnemonic() < RHS.getMnemonic();\n";
   OS << "    }\n";
   OS << "  };\n";
 
   OS << "} // end anonymous namespace.\n\n";
 
+  StringToOffsetTable StringTable;
+
   OS << "static const OperandMatchEntry OperandMatchTable["
      << Info.OperandMatchInfo.size() << "] = {\n";
 
-  OS << "  /* Mnemonic, Operand List Mask, Operand Class, Features */\n";
+  OS << "  /* Operand List Mask, Mnemonic, Operand Class, Features */\n";
   for (std::vector<OperandMatchEntry>::const_iterator it =
        Info.OperandMatchInfo.begin(), ie = Info.OperandMatchInfo.end();
        it != ie; ++it) {
     const OperandMatchEntry &OMI = *it;
     const MatchableInfo &II = *OMI.MI;
 
-    OS << "  { \"" << II.Mnemonic << "\""
-       << ", " << OMI.OperandMask;
+    OS << "  { " << OMI.OperandMask;
 
     OS << " /* ";
     bool printComma = false;
@@ -2026,8 +2077,10 @@ static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
       }
     OS << " */";
 
-    OS << ", " << OMI.CI->Name
-       << ", ";
+    // Store a pascal-style length byte in the mnemonic.
+    std::string LenMnemonic = char(II.Mnemonic.size()) + II.Mnemonic.str();
+    OS << ", " << StringTable.GetOrAddStringOffset(LenMnemonic, false)
+       << " /* " << II.Mnemonic << " */, ";
 
     // Write the required features mask.
     if (!II.RequiredFeatures.empty()) {
@@ -2037,15 +2090,22 @@ static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
       }
     } else
       OS << "0";
+
+    OS << ", " << OMI.CI->Name;
+
     OS << " },\n";
   }
   OS << "};\n\n";
 
+  OS << "const char *const OperandMatchEntry::MnemonicTable =\n";
+  StringTable.EmitString(OS);
+  OS << ";\n\n";
+
   // Emit the operand class switch to call the correct custom parser for
   // the found operand class.
   OS << Target.getName() << ClassName << "::OperandMatchResultTy "
      << Target.getName() << ClassName << "::\n"
-     << "TryCustomParseOperand(SmallVectorImpl<MCParsedAsmOperand*>"
+     << "tryCustomParseOperand(SmallVectorImpl<MCParsedAsmOperand*>"
      << " &Operands,\n                      unsigned MCK) {\n\n"
      << "  switch(MCK) {\n";
 
@@ -2094,7 +2154,7 @@ static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
      << "       *ie = MnemonicRange.second; it != ie; ++it) {\n";
 
   OS << "    // equal_range guarantees that instruction mnemonic matches.\n";
-  OS << "    assert(Mnemonic == it->Mnemonic);\n\n";
+  OS << "    assert(Mnemonic == it->getMnemonic());\n\n";
 
   // Emit check that the required features are available.
   OS << "    // check if the available features match\n";
@@ -2111,7 +2171,7 @@ static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   // Emit call to the custom parser method
   OS << "    // call custom parse method to handle the operand\n";
   OS << "    OperandMatchResultTy Result = ";
-  OS << "TryCustomParseOperand(Operands, it->Class);\n";
+  OS << "tryCustomParseOperand(Operands, it->Class);\n";
   OS << "    if (Result != MatchOperand_NoMatch)\n";
   OS << "      return Result;\n";
   OS << "  }\n\n";
@@ -2186,7 +2246,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "  bool MnemonicIsValid(StringRef Mnemonic);\n";
   OS << "  unsigned MatchInstructionImpl(\n";
   OS << "    const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
-  OS << "    MCInst &Inst, unsigned &ErrorInfo);\n";
+  OS << "    MCInst &Inst, unsigned &ErrorInfo, unsigned VariantID = 0);\n";
 
   if (Info.OperandMatchInfo.size()) {
     OS << "\n  enum OperandMatchResultTy {\n";
@@ -2198,7 +2258,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     OS << "    SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
     OS << "    StringRef Mnemonic);\n";
 
-    OS << "  OperandMatchResultTy TryCustomParseOperand(\n";
+    OS << "  OperandMatchResultTy tryCustomParseOperand(\n";
     OS << "    SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
     OS << "    unsigned MCK);\n\n";
   }
@@ -2260,28 +2320,39 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   // following the mnemonic.
   OS << "namespace {\n";
   OS << "  struct MatchEntry {\n";
-  OS << "    unsigned Opcode;\n";
-  OS << "    const char *Mnemonic;\n";
-  OS << "    ConversionKind ConvertFn;\n";
-  OS << "    MatchClassKind Classes[" << MaxNumOperands << "];\n";
-  OS << "    unsigned RequiredFeatures;\n";
+  OS << "    static const char *const MnemonicTable;\n";
+  OS << "    uint32_t Mnemonic;\n";
+  OS << "    uint16_t Opcode;\n";
+  OS << "    " << getMinimalTypeForRange(Info.Matchables.size())
+               << " ConvertFn;\n";
+  OS << "    " << getMinimalTypeForRange(1ULL << Info.SubtargetFeatures.size())
+               << " RequiredFeatures;\n";
+  OS << "    " << getMinimalTypeForRange(Info.Classes.size())
+               << " Classes[" << MaxNumOperands << "];\n";
+  OS << "    uint8_t AsmVariantID;\n\n";
+  OS << "    StringRef getMnemonic() const {\n";
+  OS << "      return StringRef(MnemonicTable + Mnemonic + 1,\n";
+  OS << "                       MnemonicTable[Mnemonic]);\n";
+  OS << "    }\n";
   OS << "  };\n\n";
 
   OS << "  // Predicate for searching for an opcode.\n";
   OS << "  struct LessOpcode {\n";
   OS << "    bool operator()(const MatchEntry &LHS, StringRef RHS) {\n";
-  OS << "      return StringRef(LHS.Mnemonic) < RHS;\n";
+  OS << "      return LHS.getMnemonic() < RHS;\n";
   OS << "    }\n";
   OS << "    bool operator()(StringRef LHS, const MatchEntry &RHS) {\n";
-  OS << "      return LHS < StringRef(RHS.Mnemonic);\n";
+  OS << "      return LHS < RHS.getMnemonic();\n";
   OS << "    }\n";
   OS << "    bool operator()(const MatchEntry &LHS, const MatchEntry &RHS) {\n";
-  OS << "      return StringRef(LHS.Mnemonic) < StringRef(RHS.Mnemonic);\n";
+  OS << "      return LHS.getMnemonic() < RHS.getMnemonic();\n";
   OS << "    }\n";
   OS << "  };\n";
 
   OS << "} // end anonymous namespace.\n\n";
 
+  StringToOffsetTable StringTable;
+
   OS << "static const MatchEntry MatchTable["
      << Info.Matchables.size() << "] = {\n";
 
@@ -2290,16 +2361,13 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
        it != ie; ++it) {
     MatchableInfo &II = **it;
 
-    OS << "  { " << Target.getName() << "::"
-       << II.getResultInst()->TheDef->getName() << ", \"" << II.Mnemonic << "\""
-       << ", " << II.ConversionFnKind << ", { ";
-    for (unsigned i = 0, e = II.AsmOperands.size(); i != e; ++i) {
-      MatchableInfo::AsmOperand &Op = II.AsmOperands[i];
-
-      if (i) OS << ", ";
-      OS << Op.Class->Name;
-    }
-    OS << " }, ";
+    // Store a pascal-style length byte in the mnemonic.
+    std::string LenMnemonic = char(II.Mnemonic.size()) + II.Mnemonic.str();
+    OS << "  { " << StringTable.GetOrAddStringOffset(LenMnemonic, false)
+       << " /* " << II.Mnemonic << " */, "
+       << Target.getName() << "::"
+       << II.getResultInst()->TheDef->getName() << ", "
+       << II.ConversionFnKind << ", ";
 
     // Write the required features mask.
     if (!II.RequiredFeatures.empty()) {
@@ -2310,11 +2378,23 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
     } else
       OS << "0";
 
+    OS << ", { ";
+    for (unsigned i = 0, e = II.AsmOperands.size(); i != e; ++i) {
+      MatchableInfo::AsmOperand &Op = II.AsmOperands[i];
+
+      if (i) OS << ", ";
+      OS << Op.Class->Name;
+    }
+    OS << " }, " << II.AsmVariantID;
     OS << "},\n";
   }
 
   OS << "};\n\n";
 
+  OS << "const char *const MatchEntry::MnemonicTable =\n";
+  StringTable.EmitString(OS);
+  OS << ";\n\n";
+
   // A method to determine if a mnemonic is in the list.
   OS << "bool " << Target.getName() << ClassName << "::\n"
      << "MnemonicIsValid(StringRef Mnemonic) {\n";
@@ -2330,7 +2410,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << Target.getName() << ClassName << "::\n"
      << "MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>"
      << " &Operands,\n";
-  OS << "                     MCInst &Inst, unsigned &ErrorInfo) {\n";
+  OS << "                     MCInst &Inst, unsigned &ErrorInfo,\n";
+  OS << "                     unsigned VariantID) {\n";
 
   // Emit code to get the available features.
   OS << "  // Get the current feature set.\n";
@@ -2342,7 +2423,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
   if (HasMnemonicAliases) {
     OS << "  // Process all MnemonicAliases to remap the mnemonic.\n";
-    OS << "  ApplyMnemonicAliases(Mnemonic, AvailableFeatures);\n\n";
+    OS << "  // FIXME : Add an entry in AsmParserVariant to check this.\n";
+    OS << "  if (!VariantID)\n";
+    OS << "    applyMnemonicAliases(Mnemonic, AvailableFeatures);\n\n";
   }
 
   // Emit code to compute the class list for this operand vector.
@@ -2375,16 +2458,18 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "       it != ie; ++it) {\n";
 
   OS << "    // equal_range guarantees that instruction mnemonic matches.\n";
-  OS << "    assert(Mnemonic == it->Mnemonic);\n";
+  OS << "    assert(Mnemonic == it->getMnemonic());\n";
 
   // Emit check that the subclasses match.
+  OS << "    if (VariantID != it->AsmVariantID) continue;\n";
   OS << "    bool OperandsValid = true;\n";
   OS << "    for (unsigned i = 0; i != " << MaxNumOperands << "; ++i) {\n";
   OS << "      if (i + 1 >= Operands.size()) {\n";
   OS << "        OperandsValid = (it->Classes[i] == " <<"InvalidMatchClass);\n";
   OS << "        break;\n";
   OS << "      }\n";
-  OS << "      if (ValidateOperandClass(Operands[i+1], it->Classes[i]))\n";
+  OS << "      if (validateOperandClass(Operands[i+1], "
+                                       "(MatchClassKind)it->Classes[i]))\n";
   OS << "        continue;\n";
   OS << "      // If this operand is broken for all of the instances of this\n";
   OS << "      // mnemonic, keep track of it so we can report loc info.\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 3123e11f774f..e0b0aace33e6 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -16,6 +16,7 @@
 #include "AsmWriterInst.h"
 #include "CodeGenTarget.h"
 #include "StringToOffsetTable.h"
+#include "SequenceToOffsetTable.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
@@ -277,12 +278,27 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
     CGIAWIMap.insert(std::make_pair(Instructions[i].CGI, &Instructions[i]));
 
   // Build an aggregate string, and build a table of offsets into it.
-  StringToOffsetTable StringTable;
+  SequenceToOffsetTable<std::string> StringTable;
 
   /// OpcodeInfo - This encodes the index of the string to use for the first
   /// chunk of the output as well as indices used for operand printing.
   std::vector<unsigned> OpcodeInfo;
 
+  // Add all strings to the string table upfront so it can generate an optimized
+  // representation.
+  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+    AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions[i]];
+    if (AWI != 0 &&
+        AWI->Operands[0].OperandType == AsmWriterOperand::isLiteralTextOperand &&
+        !AWI->Operands[0].Str.empty()) {
+      std::string Str = AWI->Operands[0].Str;
+      UnescapeString(Str);
+      StringTable.add(Str);
+    }
+  }
+
+  StringTable.layout();
+
   unsigned MaxStringIdx = 0;
   for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
     AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions[i]];
@@ -294,11 +310,11 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
                         AsmWriterOperand::isLiteralTextOperand ||
                AWI->Operands[0].Str.empty()) {
       // Something handled by the asmwriter printer, but with no leading string.
-      Idx = StringTable.GetOrAddStringOffset("");
+      Idx = StringTable.get("");
     } else {
       std::string Str = AWI->Operands[0].Str;
       UnescapeString(Str);
-      Idx = StringTable.GetOrAddStringOffset(Str);
+      Idx = StringTable.get(Str);
       MaxStringIdx = std::max(MaxStringIdx, Idx);
 
       // Nuke the string from the operand list.  It is now handled!
@@ -373,9 +389,9 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   O << "  };\n\n";
 
   // Emit the string itself.
-  O << "  const char *AsmStrs = \n";
-  StringTable.EmitString(O);
-  O << ";\n\n";
+  O << "  const char AsmStrs[] = {\n";
+  StringTable.emit(O, printChar);
+  O << "  };\n\n";
 
   O << "  O << \"\\t\";\n\n";
 
@@ -461,13 +477,13 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
 
 static void
 emitRegisterNameString(raw_ostream &O, StringRef AltName,
-  const std::vector<CodeGenRegister*> &Registers) {
-  StringToOffsetTable StringTable;
-  O << "  static const unsigned RegAsmOffset" << AltName << "[] = {\n    ";
+                       const std::vector<CodeGenRegister*> &Registers) {
+  SequenceToOffsetTable<std::string> StringTable;
+  SmallVector<std::string, 4> AsmNames(Registers.size());
   for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
     const CodeGenRegister &Reg = *Registers[i];
+    std::string &AsmName = AsmNames[i];
 
-    std::string AsmName;
     // "NoRegAltName" is special. We don't need to do a lookup for that,
     // as it's just a reference to the default register name.
     if (AltName == "" || AltName == "NoRegAltName") {
@@ -495,21 +511,22 @@ emitRegisterNameString(raw_ostream &O, StringRef AltName,
         AsmName = AltNames[Idx];
       }
     }
+    StringTable.add(AsmName);
+  }
 
-    O << StringTable.GetOrAddStringOffset(AsmName);
-    if (((i + 1) % 14) == 0)
-      O << ",\n    ";
-    else
-      O << ", ";
+  StringTable.layout();
+  O << "  static const char AsmStrs" << AltName << "[] = {\n";
+  StringTable.emit(O, printChar);
+  O << "  };\n\n";
 
+  O << "  static const unsigned RegAsmOffset" << AltName << "[] = {";
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
+    if ((i % 14) == 0)
+      O << "\n    ";
+    O << StringTable.get(AsmNames[i]) << ", ";
   }
-  O << "0\n"
-    << "  };\n"
+  O << "\n  };\n"
     << "\n";
-
-  O << "  const char *AsmStrs" << AltName << " =\n";
-  StringTable.EmitString(O);
-  O << ";\n";
 }
 
 void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
@@ -544,7 +561,7 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
     O << "  const unsigned *RegAsmOffset;\n"
       << "  const char *AsmStrs;\n"
       << "  switch(AltIdx) {\n"
-      << "  default: assert(0 && \"Invalid register alt name index!\");\n";
+      << "  default: llvm_unreachable(\"Invalid register alt name index!\");\n";
     for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i) {
       StringRef Namespace = AltNameIndices[1]->getValueAsString("Namespace");
       StringRef AltName(AltNameIndices[i]->getName());
@@ -563,48 +580,6 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
     << "}\n";
 }
 
-void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) {
-  CodeGenTarget Target(Records);
-  Record *AsmWriter = Target.getAsmWriter();
-  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
-
-  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
-    Target.getInstructionsByEnumValue();
-
-  StringToOffsetTable StringTable;
-  O <<
-"\n\n#ifdef GET_INSTRUCTION_NAME\n"
-"#undef GET_INSTRUCTION_NAME\n\n"
-"/// getInstructionName: This method is automatically generated by tblgen\n"
-"/// from the instruction set description.  This returns the enum name of the\n"
-"/// specified instruction.\n"
-  "const char *" << Target.getName() << ClassName
-  << "::getInstructionName(unsigned Opcode) {\n"
-  << "  assert(Opcode < " << NumberedInstructions.size()
-  << " && \"Invalid instruction number!\");\n"
-  << "\n"
-  << "  static const unsigned InstAsmOffset[] = {";
-  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
-    const CodeGenInstruction &Inst = *NumberedInstructions[i];
-
-    std::string AsmName = Inst.TheDef->getName();
-    if ((i % 14) == 0)
-      O << "\n    ";
-
-    O << StringTable.GetOrAddStringOffset(AsmName) << ", ";
-  }
-  O << "0\n"
-  << "  };\n"
-  << "\n";
-
-  O << "  const char *Strs =\n";
-  StringTable.EmitString(O);
-  O << ";\n";
-
-  O << "  return Strs+InstAsmOffset[Opcode];\n"
-  << "}\n\n#endif\n";
-}
-
 namespace {
 // IAPrinter - Holds information about an InstAlias. Two InstAliases match if
 // they both have the same conditionals. In which case, we cannot print out the
@@ -694,70 +669,7 @@ static void EmitGetMapOperandNumber(raw_ostream &O) {
   O << "         I = OpMap.begin(), E = OpMap.end(); I != E; ++I)\n";
   O << "    if (I->first == Name)\n";
   O << "      return I->second;\n";
-  O << "  assert(false && \"Operand not in map!\");\n";
-  O << "  return 0;\n";
-  O << "}\n\n";
-}
-
-void AsmWriterEmitter::EmitRegIsInRegClass(raw_ostream &O) {
-  CodeGenTarget Target(Records);
-
-  // Enumerate the register classes.
-  ArrayRef<CodeGenRegisterClass*> RegisterClasses =
-    Target.getRegBank().getRegClasses();
-
-  O << "namespace { // Register classes\n";
-  O << "  enum RegClass {\n";
-
-  // Emit the register enum value for each RegisterClass.
-  for (unsigned I = 0, E = RegisterClasses.size(); I != E; ++I) {
-    if (I != 0) O << ",\n";
-    O << "    RC_" << RegisterClasses[I]->getName();
-  }
-
-  O << "\n  };\n";
-  O << "} // end anonymous namespace\n\n";
-
-  // Emit a function that returns 'true' if a regsiter is part of a particular
-  // register class. I.e., RAX is part of GR64 on X86.
-  O << "static bool regIsInRegisterClass"
-    << "(unsigned RegClass, unsigned Reg) {\n";
-
-  // Emit the switch that checks if a register belongs to a particular register
-  // class.
-  O << "  switch (RegClass) {\n";
-  O << "  default: break;\n";
-
-  for (unsigned I = 0, E = RegisterClasses.size(); I != E; ++I) {
-    const CodeGenRegisterClass &RC = *RegisterClasses[I];
-
-    // Give the register class a legal C name if it's anonymous.
-    std::string Name = RC.getName();
-    O << "  case RC_" << Name << ":\n";
-  
-    // Emit the register list now.
-    unsigned IE = RC.getOrder().size();
-    if (IE == 1) {
-      O << "    if (Reg == " << getQualifiedName(RC.getOrder()[0]) << ")\n";
-      O << "      return true;\n";
-    } else {
-      O << "    switch (Reg) {\n";
-      O << "    default: break;\n";
-
-      for (unsigned II = 0; II != IE; ++II) {
-        Record *Reg = RC.getOrder()[II];
-        O << "    case " << getQualifiedName(Reg) << ":\n";
-      }
-
-      O << "      return true;\n";
-      O << "    }\n";
-    }
-
-    O << "    break;\n";
-  }
-
-  O << "  }\n\n";
-  O << "  return false;\n";
+  O << "  llvm_unreachable(\"Operand not in map!\");\n";
   O << "}\n\n";
 }
 
@@ -804,8 +716,6 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   O << "\n#ifdef PRINT_ALIAS_INSTR\n";
   O << "#undef PRINT_ALIAS_INSTR\n\n";
 
-  EmitRegIsInRegClass(O);
-
   // Emit the method that prints the alias instruction.
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
 
@@ -858,7 +768,6 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
         const CodeGenInstAlias::ResultOperand &RO = CGA->ResultOperands[i];
 
         switch (RO.Kind) {
-        default: assert(0 && "unexpected InstAlias operand kind");
         case CodeGenInstAlias::ResultOperand::K_Record: {
           const Record *Rec = RO.getRecord();
           StringRef ROName = RO.getName();
@@ -872,9 +781,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
             if (!IAP->isOpMapped(ROName)) {
               IAP->addOperand(ROName, i);
-              Cond = std::string("regIsInRegisterClass(RC_") +
-                CGA->ResultOperands[i].getRecord()->getName() +
-                ", MI->getOperand(" + llvm::utostr(i) + ").getReg())";
+              Cond = std::string("MRI.getRegClass(") + Target.getName() + "::" +
+                CGA->ResultOperands[i].getRecord()->getName() + "RegClassID)"
+                ".contains(MI->getOperand(" + llvm::utostr(i) + ").getReg())";
               IAP->addCond(Cond);
             } else {
               Cond = std::string("MI->getOperand(") +
@@ -900,6 +809,13 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
           IAP->addCond(Cond);
           break;
         case CodeGenInstAlias::ResultOperand::K_Reg:
+          // If this is zero_reg, something's playing tricks we're not
+          // equipped to handle.
+          if (!CGA->ResultOperands[i].getRegister()) {
+            CantHandle = true;
+            break;
+          }
+
           Cond = std::string("MI->getOperand(") +
             llvm::utostr(i) + ").getReg() == " + Target.getName() +
             "::" + CGA->ResultOperands[i].getRegister()->getName();
@@ -1015,7 +931,6 @@ void AsmWriterEmitter::run(raw_ostream &O) {
 
   EmitPrintInstruction(O);
   EmitGetRegisterName(O);
-  EmitGetInstructionName(O);
   EmitPrintAliasInstruction(O);
 }
 
diff --git a/utils/TableGen/AsmWriterEmitter.h b/utils/TableGen/AsmWriterEmitter.h
index 731e31cc746e..9719b202faab 100644
--- a/utils/TableGen/AsmWriterEmitter.h
+++ b/utils/TableGen/AsmWriterEmitter.h
@@ -37,8 +37,6 @@ namespace llvm {
 private:
     void EmitPrintInstruction(raw_ostream &o);
     void EmitGetRegisterName(raw_ostream &o);
-    void EmitGetInstructionName(raw_ostream &o);
-    void EmitRegIsInRegClass(raw_ostream &O);
     void EmitPrintAliasInstruction(raw_ostream &O);
     
     AsmWriterInst *getAsmWriterInstByID(unsigned ID) const {
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index 02ebd67ba662..2b70f1c52bd0 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -1,8 +1,8 @@
 set(LLVM_REQUIRES_EH 1)
 set(LLVM_REQUIRES_RTTI 1)
+set(LLVM_LINK_COMPONENTS Support)
 
 add_tablegen(llvm-tblgen LLVM
-  ARMDecoderEmitter.cpp
   AsmMatcherEmitter.cpp
   AsmWriterEmitter.cpp
   AsmWriterInst.cpp
@@ -17,11 +17,11 @@ add_tablegen(llvm-tblgen LLVM
   DAGISelMatcherGen.cpp
   DAGISelMatcherOpt.cpp
   DAGISelMatcher.cpp
+  DFAPacketizerEmitter.cpp
   DisassemblerEmitter.cpp
   EDEmitter.cpp
   FastISelEmitter.cpp
   FixedLenDecoderEmitter.cpp
-  InstrEnumEmitter.cpp
   InstrInfoEmitter.cpp
   IntrinsicEmitter.cpp
   PseudoLoweringEmitter.cpp
@@ -32,5 +32,6 @@ add_tablegen(llvm-tblgen LLVM
   TGValueTypes.cpp
   TableGen.cpp
   X86DisassemblerTables.cpp
+  X86ModRMFilters.cpp
   X86RecognizableInstr.cpp
   )
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index fcdaa082fb2a..afbb3a870894 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -96,7 +96,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
         O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
         O << getQualifiedName(RegList->getElementAsRecord(0)) << ")) {\n";
       } else {
-        O << IndentStr << "static const unsigned RegList" << ++Counter
+        O << IndentStr << "static const uint16_t RegList" << ++Counter
           << "[] = {\n";
         O << IndentStr << "  ";
         for (unsigned i = 0, e = RegList->getSize(); i != e; ++i) {
@@ -127,7 +127,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
         unsigned RegListNumber = ++Counter;
         unsigned ShadowRegListNumber = ++Counter;
 
-        O << IndentStr << "static const unsigned RegList" << RegListNumber
+        O << IndentStr << "static const uint16_t RegList" << RegListNumber
           << "[] = {\n";
         O << IndentStr << "  ";
         for (unsigned i = 0, e = RegList->getSize(); i != e; ++i) {
@@ -136,7 +136,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
         }
         O << "\n" << IndentStr << "};\n";
 
-        O << IndentStr << "static const unsigned RegList"
+        O << IndentStr << "static const uint16_t RegList"
           << ShadowRegListNumber << "[] = {\n";
         O << IndentStr << "  ";
         for (unsigned i = 0, e = ShadowRegList->getSize(); i != e; ++i) {
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index c5a152665b06..3943e8a40f87 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -163,19 +163,19 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
       --bit;
     }
      
-    unsigned opMask = ~0U >> (32-N);
+    uint64_t opMask = ~(uint64_t)0 >> (64-N);
     int opShift = beginVarBit - N + 1;
     opMask <<= opShift;
     opShift = beginInstBit - beginVarBit;
     
     if (opShift > 0) {
-      Case += "      Value |= (op & " + utostr(opMask) + "U) << " +
+      Case += "      Value |= (op & UINT64_C(" + utostr(opMask) + ")) << " +
               itostr(opShift) + ";\n";
     } else if (opShift < 0) {
-      Case += "      Value |= (op & " + utostr(opMask) + "U) >> " + 
+      Case += "      Value |= (op & UINT64_C(" + utostr(opMask) + ")) >> " + 
               itostr(-opShift) + ";\n";
     } else {
-      Case += "      Value |= op & " + utostr(opMask) + "U;\n";
+      Case += "      Value |= op & UINT64_C(" + utostr(opMask) + ");\n";
     }
   }
 }
@@ -220,7 +220,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
     Target.getInstructionsByEnumValue();
 
   // Emit function declaration
-  o << "unsigned " << Target.getName();
+  o << "uint64_t " << Target.getName();
   if (MCEmitter)
     o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
       << "    SmallVectorImpl<MCFixup> &Fixups) const {\n";
@@ -228,7 +228,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
     o << "CodeEmitter::getBinaryCodeForInstr(const MachineInstr &MI) const {\n";
 
   // Emit instruction base values
-  o << "  static const unsigned InstBits[] = {\n";
+  o << "  static const uint64_t InstBits[] = {\n";
   for (std::vector<const CodeGenInstruction*>::const_iterator
           IN = NumberedInstructions.begin(),
           EN = NumberedInstructions.end();
@@ -238,21 +238,21 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
     if (R->getValueAsString("Namespace") == "TargetOpcode" ||
         R->getValueAsBit("isPseudo")) {
-      o << "    0U,\n";
+      o << "    UINT64_C(0),\n";
       continue;
     }
 
     BitsInit *BI = R->getValueAsBitsInit("Inst");
 
     // Start by filling in fixed values.
-    unsigned Value = 0;
+    uint64_t Value = 0;
     for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
       if (BitInit *B = dynamic_cast<BitInit*>(BI->getBit(e-i-1)))
-        Value |= B->getValue() << (e-i-1);
+        Value |= (uint64_t)B->getValue() << (e-i-1);
     }
-    o << "    " << Value << "U," << '\t' << "// " << R->getName() << "\n";
+    o << "    UINT64_C(" << Value << ")," << '\t' << "// " << R->getName() << "\n";
   }
-  o << "    0U\n  };\n";
+  o << "    UINT64_C(0)\n  };\n";
 
   // Map to accumulate all the cases.
   std::map<std::string, std::vector<std::string> > CaseMap;
@@ -273,8 +273,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
 
   // Emit initial function code
   o << "  const unsigned opcode = MI.getOpcode();\n"
-    << "  unsigned Value = InstBits[opcode];\n"
-    << "  unsigned op = 0;\n"
+    << "  uint64_t Value = InstBits[opcode];\n"
+    << "  uint64_t op = 0;\n"
     << "  (void)op;  // suppress warning\n"
     << "  switch (opcode) {\n";
 
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index dbf166262bb6..d2ddf232b32a 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -18,8 +18,10 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
-#include <set>
+#include "llvm/Support/ErrorHandling.h"
 #include <algorithm>
+#include <cstdio>
+#include <set>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -629,11 +631,11 @@ TreePredicateFn::TreePredicateFn(TreePattern *N) : PatFragRec(N) {
 }
 
 std::string TreePredicateFn::getPredCode() const {
-  return PatFragRec->getRecord()->getValueAsCode("PredicateCode");
+  return PatFragRec->getRecord()->getValueAsString("PredicateCode");
 }
 
 std::string TreePredicateFn::getImmCode() const {
-  return PatFragRec->getRecord()->getValueAsCode("ImmediateCode");
+  return PatFragRec->getRecord()->getValueAsString("ImmediateCode");
 }
 
 
@@ -748,7 +750,7 @@ std::string PatternToMatch::getPredicateCheck() const {
 #ifndef NDEBUG
         Def->dump();
 #endif
-        assert(0 && "Unknown predicate type!");
+        llvm_unreachable("Unknown predicate type!");
       }
       if (!PredicateCheck.empty())
         PredicateCheck += " && ";
@@ -839,7 +841,6 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
   TreePatternNode *NodeToApply = getOperandNum(OperandNo, N, NodeInfo, ResNo);
 
   switch (ConstraintType) {
-  default: assert(0 && "Unknown constraint type!");
   case SDTCisVT:
     // Operand must be a particular type.
     return NodeToApply->UpdateNodeType(ResNo, x.SDTCisVT_Info.VT, TP);
@@ -913,7 +914,7 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
       EnforceVectorSubVectorTypeIs(NodeToApply->getExtType(ResNo), TP);
   }
   }
-  return false;
+  llvm_unreachable("Invalid ConstraintType!");
 }
 
 //===----------------------------------------------------------------------===//
@@ -1609,10 +1610,9 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
         MadeChange |= Child->UpdateNodeType(ChildResNo, MVT::iPTR, TP);
       } else if (OperandNode->getName() == "unknown") {
         // Nothing to do.
-      } else {
-        assert(0 && "Unknown operand type!");
-        abort();
-      }
+      } else
+        llvm_unreachable("Unknown operand type!");
+
       MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters);
     }
 
@@ -2071,7 +2071,7 @@ void CodeGenDAGPatterns::ParseNodeTransforms() {
   while (!Xforms.empty()) {
     Record *XFormNode = Xforms.back();
     Record *SDNode = XFormNode->getValueAsDef("Opcode");
-    std::string Code = XFormNode->getValueAsCode("XFormFunction");
+    std::string Code = XFormNode->getValueAsString("XFormFunction");
     SDNodeXForms.insert(std::make_pair(XFormNode, NodeXForm(SDNode, Code)));
 
     Xforms.pop_back();
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 936fd0146455..5a2d40aa7c86 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -19,6 +19,7 @@
 #include "CodeGenIntrinsics.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <set>
 #include <algorithm>
 #include <vector>
@@ -723,8 +724,7 @@ public:
       if (Intrinsics[i].TheDef == R) return Intrinsics[i];
     for (unsigned i = 0, e = TgtIntrinsics.size(); i != e; ++i)
       if (TgtIntrinsics[i].TheDef == R) return TgtIntrinsics[i];
-    assert(0 && "Unknown intrinsic!");
-    abort();
+    llvm_unreachable("Unknown intrinsic!");
   }
 
   const CodeGenIntrinsic &getIntrinsicInfo(unsigned IID) const {
@@ -732,8 +732,7 @@ public:
       return Intrinsics[IID-1];
     if (IID-Intrinsics.size()-1 < TgtIntrinsics.size())
       return TgtIntrinsics[IID-Intrinsics.size()-1];
-    assert(0 && "Bad intrinsic ID!");
-    abort();
+    llvm_unreachable("Bad intrinsic ID!");
   }
 
   unsigned getIntrinsicID(Record *R) const {
@@ -741,8 +740,7 @@ public:
       if (Intrinsics[i].TheDef == R) return i;
     for (unsigned i = 0, e = TgtIntrinsics.size(); i != e; ++i)
       if (TgtIntrinsics[i].TheDef == R) return i + Intrinsics.size();
-    assert(0 && "Unknown intrinsic!");
-    abort();
+    llvm_unreachable("Unknown intrinsic!");
   }
 
   const DAGDefaultOperand &getDefaultOperand(Record *R) const {
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 53d499f39553..fb9ad9371bea 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -245,7 +245,7 @@ static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
   if (!Ops[DestOp.first].Constraints[DestOp.second].isNone())
     throw "Operand '" + DestOpName + "' cannot have multiple constraints!";
   Ops[DestOp.first].Constraints[DestOp.second] =
-  CGIOperandList::ConstraintInfo::getTied(FlatOpNo);
+    CGIOperandList::ConstraintInfo::getTied(FlatOpNo);
 }
 
 static void ParseConstraints(const std::string &CStr, CGIOperandList &Ops) {
@@ -423,6 +423,18 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
     return true;
   }
 
+  // For register operands, the source register class can be a subclass
+  // of the instruction register class, not just an exact match.
+  if (ADI && ADI->getDef()->isSubClassOf("RegisterClass")) {
+    if (!InstOpRec->isSubClassOf("RegisterClass"))
+      return false;
+    if (!T.getRegisterClass(InstOpRec)
+              .hasSubClass(&T.getRegisterClass(ADI->getDef())))
+      return false;
+    ResOp = ResultOperand(Result->getArgName(AliasOpNo), ADI->getDef());
+    return true;
+  }
+
   // Handle explicit registers.
   if (ADI && ADI->getDef()->isSubClassOf("Register")) {
     if (InstOpRec->isSubClassOf("OptionalDefOperand")) {
@@ -456,14 +468,19 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
   if (ADI && ADI->getDef()->getName() == "zero_reg") {
 
     // Check if this is an optional def.
-    if (!InstOpRec->isSubClassOf("OptionalDefOperand"))
-      throw TGError(Loc, "reg0 used for result that is not an "
-                    "OptionalDefOperand!");
+    // Tied operands where the source is a sub-operand of a complex operand
+    // need to represent both operands in the alias destination instruction.
+    // Allow zero_reg for the tied portion. This can and should go away once
+    // the MC representation of things doesn't use tied operands at all.
+    //if (!InstOpRec->isSubClassOf("OptionalDefOperand"))
+    //  throw TGError(Loc, "reg0 used for result that is not an "
+    //                "OptionalDefOperand!");
 
     ResOp = ResultOperand(static_cast<Record*>(0));
     return true;
   }
 
+  // Literal integers.
   if (IntInit *II = dynamic_cast<IntInit*>(Arg)) {
     if (hasSubOps || !InstOpRec->isSubClassOf("Operand"))
       return false;
@@ -475,6 +492,19 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
     return true;
   }
 
+  // If both are Operands with the same MVT, allow the conversion. It's
+  // up to the user to make sure the values are appropriate, just like
+  // for isel Pat's.
+  if (InstOpRec->isSubClassOf("Operand") &&
+      ADI->getDef()->isSubClassOf("Operand")) {
+    // FIXME: What other attributes should we check here? Identical
+    // MIOperandInfo perhaps?
+    if (InstOpRec->getValueInit("Type") != ADI->getDef()->getValueInit("Type"))
+      return false;
+    ResOp = ResultOperand(Result->getArgName(AliasOpNo), ADI->getDef());
+    return true;
+  }
+
   return false;
 }
 
@@ -511,8 +541,11 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) {
   unsigned AliasOpNo = 0;
   for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
 
-    // Tied registers don't have an entry in the result dag.
-    if (ResultInst->Operands[i].getTiedRegister() != -1)
+    // Tied registers don't have an entry in the result dag unless they're part
+    // of a complex operand, in which case we include them anyways, as we
+    // don't have any other way to specify the whole operand.
+    if (ResultInst->Operands[i].MINumOperands == 1 &&
+        ResultInst->Operands[i].getTiedRegister() != -1)
       continue;
 
     if (AliasOpNo >= Result->getNumArgs())
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 8de461527955..7ce4f878a3e7 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -15,6 +15,7 @@
 #include "CodeGenRegisters.h"
 #include "CodeGenTarget.h"
 #include "llvm/TableGen/Error.h"
+#include "llvm/ADT/IntEqClasses.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
@@ -22,6 +23,57 @@
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
+//                             CodeGenSubRegIndex
+//===----------------------------------------------------------------------===//
+
+CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum)
+  : TheDef(R),
+    EnumValue(Enum)
+{}
+
+std::string CodeGenSubRegIndex::getNamespace() const {
+  if (TheDef->getValue("Namespace"))
+    return TheDef->getValueAsString("Namespace");
+  else
+    return "";
+}
+
+const std::string &CodeGenSubRegIndex::getName() const {
+  return TheDef->getName();
+}
+
+std::string CodeGenSubRegIndex::getQualifiedName() const {
+  std::string N = getNamespace();
+  if (!N.empty())
+    N += "::";
+  N += getName();
+  return N;
+}
+
+void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) {
+  std::vector<Record*> Comps = TheDef->getValueAsListOfDefs("ComposedOf");
+  if (Comps.empty())
+    return;
+  if (Comps.size() != 2)
+    throw TGError(TheDef->getLoc(), "ComposedOf must have exactly two entries");
+  CodeGenSubRegIndex *A = RegBank.getSubRegIdx(Comps[0]);
+  CodeGenSubRegIndex *B = RegBank.getSubRegIdx(Comps[1]);
+  CodeGenSubRegIndex *X = A->addComposite(B, this);
+  if (X)
+    throw TGError(TheDef->getLoc(), "Ambiguous ComposedOf entries");
+}
+
+void CodeGenSubRegIndex::cleanComposites() {
+  // Clean out redundant mappings of the form this+X -> X.
+  for (CompMap::iterator i = Composed.begin(), e = Composed.end(); i != e;) {
+    CompMap::iterator j = i;
+    ++i;
+    if (j->first == j->second)
+      Composed.erase(j);
+  }
+}
+
+//===----------------------------------------------------------------------===//
 //                              CodeGenRegister
 //===----------------------------------------------------------------------===//
 
@@ -29,6 +81,7 @@ CodeGenRegister::CodeGenRegister(Record *R, unsigned Enum)
   : TheDef(R),
     EnumValue(Enum),
     CostPerUse(R->getValueAsInt("CostPerUse")),
+    CoveredBySubRegs(R->getValueAsBit("CoveredBySubRegs")),
     SubRegsComplete(false)
 {}
 
@@ -37,12 +90,81 @@ const std::string &CodeGenRegister::getName() const {
 }
 
 namespace {
-  struct Orphan {
-    CodeGenRegister *SubReg;
-    Record *First, *Second;
-    Orphan(CodeGenRegister *r, Record *a, Record *b)
-      : SubReg(r), First(a), Second(b) {}
-  };
+// Iterate over all register units in a set of registers.
+class RegUnitIterator {
+  CodeGenRegister::Set::const_iterator RegI, RegE;
+  CodeGenRegister::RegUnitList::const_iterator UnitI, UnitE;
+
+public:
+  RegUnitIterator(const CodeGenRegister::Set &Regs):
+    RegI(Regs.begin()), RegE(Regs.end()), UnitI(), UnitE() {
+
+    if (RegI != RegE) {
+      UnitI = (*RegI)->getRegUnits().begin();
+      UnitE = (*RegI)->getRegUnits().end();
+      advance();
+    }
+  }
+
+  bool isValid() const { return UnitI != UnitE; }
+
+  unsigned operator* () const { assert(isValid()); return *UnitI; };
+
+  const CodeGenRegister *getReg() const { assert(isValid()); return *RegI; }
+
+  /// Preincrement.  Move to the next unit.
+  void operator++() {
+    assert(isValid() && "Cannot advance beyond the last operand");
+    ++UnitI;
+    advance();
+  }
+
+protected:
+  void advance() {
+    while (UnitI == UnitE) {
+      if (++RegI == RegE)
+        break;
+      UnitI = (*RegI)->getRegUnits().begin();
+      UnitE = (*RegI)->getRegUnits().end();
+    }
+  }
+};
+} // namespace
+
+// Merge two RegUnitLists maintaining the order and removing duplicates.
+// Overwrites MergedRU in the process.
+static void mergeRegUnits(CodeGenRegister::RegUnitList &MergedRU,
+                          const CodeGenRegister::RegUnitList &RRU) {
+  CodeGenRegister::RegUnitList LRU = MergedRU;
+  MergedRU.clear();
+  std::set_union(LRU.begin(), LRU.end(), RRU.begin(), RRU.end(),
+                 std::back_inserter(MergedRU));
+}
+
+// Return true of this unit appears in RegUnits.
+static bool hasRegUnit(CodeGenRegister::RegUnitList &RegUnits, unsigned Unit) {
+  return std::count(RegUnits.begin(), RegUnits.end(), Unit);
+}
+
+// Inherit register units from subregisters.
+// Return true if the RegUnits changed.
+bool CodeGenRegister::inheritRegUnits(CodeGenRegBank &RegBank) {
+  unsigned OldNumUnits = RegUnits.size();
+  for (SubRegMap::const_iterator I = SubRegs.begin(), E = SubRegs.end();
+       I != E; ++I) {
+    // Strangely a register may have itself as a subreg (self-cycle) e.g. XMM.
+    // Only create a unit if no other subregs have units.
+    CodeGenRegister *SR = I->second;
+    if (SR == this) {
+      // RegUnits are only empty during getSubRegs, prior to computing weight.
+      if (RegUnits.empty())
+        RegUnits.push_back(RegBank.newRegUnit(0));
+      continue;
+    }
+    // Merge the subregister's units into this register's RegUnits.
+    mergeRegUnits(RegUnits, SR->RegUnits);
+  }
+  return OldNumUnits != RegUnits.size();
 }
 
 const CodeGenRegister::SubRegMap &
@@ -53,23 +175,26 @@ CodeGenRegister::getSubRegs(CodeGenRegBank &RegBank) {
   SubRegsComplete = true;
 
   std::vector<Record*> SubList = TheDef->getValueAsListOfDefs("SubRegs");
-  std::vector<Record*> Indices = TheDef->getValueAsListOfDefs("SubRegIndices");
-  if (SubList.size() != Indices.size())
+  std::vector<Record*> IdxList = TheDef->getValueAsListOfDefs("SubRegIndices");
+  if (SubList.size() != IdxList.size())
     throw TGError(TheDef->getLoc(), "Register " + getName() +
                   " SubRegIndices doesn't match SubRegs");
 
   // First insert the direct subregs and make sure they are fully indexed.
+  SmallVector<CodeGenSubRegIndex*, 8> Indices;
   for (unsigned i = 0, e = SubList.size(); i != e; ++i) {
     CodeGenRegister *SR = RegBank.getReg(SubList[i]);
-    if (!SubRegs.insert(std::make_pair(Indices[i], SR)).second)
-      throw TGError(TheDef->getLoc(), "SubRegIndex " + Indices[i]->getName() +
+    CodeGenSubRegIndex *Idx = RegBank.getSubRegIdx(IdxList[i]);
+    Indices.push_back(Idx);
+    if (!SubRegs.insert(std::make_pair(Idx, SR)).second)
+      throw TGError(TheDef->getLoc(), "SubRegIndex " + Idx->getName() +
                     " appears twice in Register " + getName());
   }
 
   // Keep track of inherited subregs and how they can be reached.
-  SmallVector<Orphan, 8> Orphans;
+  SmallPtrSet<CodeGenRegister*, 8> Orphans;
 
-  // Clone inherited subregs and place duplicate entries on Orphans.
+  // Clone inherited subregs and place duplicate entries in Orphans.
   // Here the order is important - earlier subregs take precedence.
   for (unsigned i = 0, e = SubList.size(); i != e; ++i) {
     CodeGenRegister *SR = RegBank.getReg(SubList[i]);
@@ -83,7 +208,7 @@ CodeGenRegister::getSubRegs(CodeGenRegBank &RegBank) {
     for (SubRegMap::const_iterator SI = Map.begin(), SE = Map.end(); SI != SE;
          ++SI) {
       if (!SubRegs.insert(*SI).second)
-        Orphans.push_back(Orphan(SI->second, Indices[i], SI->first));
+        Orphans.insert(SI->second);
 
       // Noop sub-register indexes are possible, so avoid duplicates.
       if (SI->second != SR)
@@ -91,6 +216,33 @@ CodeGenRegister::getSubRegs(CodeGenRegBank &RegBank) {
     }
   }
 
+  // Expand any composed subreg indices.
+  // If dsub_2 has ComposedOf = [qsub_1, dsub_0], and this register has a
+  // qsub_1 subreg, add a dsub_2 subreg.  Keep growing Indices and process
+  // expanded subreg indices recursively.
+  for (unsigned i = 0; i != Indices.size(); ++i) {
+    CodeGenSubRegIndex *Idx = Indices[i];
+    const CodeGenSubRegIndex::CompMap &Comps = Idx->getComposites();
+    CodeGenRegister *SR = SubRegs[Idx];
+    const SubRegMap &Map = SR->getSubRegs(RegBank);
+
+    // Look at the possible compositions of Idx.
+    // They may not all be supported by SR.
+    for (CodeGenSubRegIndex::CompMap::const_iterator I = Comps.begin(),
+           E = Comps.end(); I != E; ++I) {
+      SubRegMap::const_iterator SRI = Map.find(I->first);
+      if (SRI == Map.end())
+        continue; // Idx + I->first doesn't exist in SR.
+      // Add I->second as a name for the subreg SRI->second, assuming it is
+      // orphaned, and the name isn't already used for something else.
+      if (SubRegs.count(I->second) || !Orphans.erase(SRI->second))
+        continue;
+      // We found a new name for the orphaned sub-register.
+      SubRegs.insert(std::make_pair(I->second, SRI->second));
+      Indices.push_back(I->second);
+    }
+  }
+
   // Process the composites.
   ListInit *Comps = TheDef->getValueAsListInit("CompositeIndices");
   for (unsigned i = 0, e = Comps->size(); i != e; ++i) {
@@ -103,6 +255,7 @@ CodeGenRegister::getSubRegs(CodeGenRegBank &RegBank) {
     if (!BaseIdxInit || !BaseIdxInit->getDef()->isSubClassOf("SubRegIndex"))
       throw TGError(TheDef->getLoc(), "Invalid SubClassIndex in " +
                     Pat->getAsString());
+    CodeGenSubRegIndex *BaseIdx = RegBank.getSubRegIdx(BaseIdxInit->getDef());
 
     // Resolve list of subreg indices into R2.
     CodeGenRegister *R2 = this;
@@ -112,8 +265,9 @@ CodeGenRegister::getSubRegs(CodeGenRegBank &RegBank) {
       if (!IdxInit || !IdxInit->getDef()->isSubClassOf("SubRegIndex"))
         throw TGError(TheDef->getLoc(), "Invalid SubClassIndex in " +
                       Pat->getAsString());
+      CodeGenSubRegIndex *Idx = RegBank.getSubRegIdx(IdxInit->getDef());
       const SubRegMap &R2Subs = R2->getSubRegs(RegBank);
-      SubRegMap::const_iterator ni = R2Subs.find(IdxInit->getDef());
+      SubRegMap::const_iterator ni = R2Subs.find(Idx);
       if (ni == R2Subs.end())
         throw TGError(TheDef->getLoc(), "Composite " + Pat->getAsString() +
                       " refers to bad index in " + R2->getName());
@@ -121,35 +275,76 @@ CodeGenRegister::getSubRegs(CodeGenRegBank &RegBank) {
     }
 
     // Insert composite index. Allow overriding inherited indices etc.
-    SubRegs[BaseIdxInit->getDef()] = R2;
+    SubRegs[BaseIdx] = R2;
 
     // R2 is no longer an orphan.
-    for (unsigned j = 0, je = Orphans.size(); j != je; ++j)
-      if (Orphans[j].SubReg == R2)
-          Orphans[j].SubReg = 0;
+    Orphans.erase(R2);
   }
 
   // Now Orphans contains the inherited subregisters without a direct index.
   // Create inferred indexes for all missing entries.
-  for (unsigned i = 0, e = Orphans.size(); i != e; ++i) {
-    Orphan &O = Orphans[i];
-    if (!O.SubReg)
-      continue;
-    SubRegs[RegBank.getCompositeSubRegIndex(O.First, O.Second, true)] =
-      O.SubReg;
+  // Work backwards in the Indices vector in order to compose subregs bottom-up.
+  // Consider this subreg sequence:
+  //
+  //   qsub_1 -> dsub_0 -> ssub_0
+  //
+  // The qsub_1 -> dsub_0 composition becomes dsub_2, so the ssub_0 register
+  // can be reached in two different ways:
+  //
+  //   qsub_1 -> ssub_0
+  //   dsub_2 -> ssub_0
+  //
+  // We pick the latter composition because another register may have [dsub_0,
+  // dsub_1, dsub_2] subregs without neccessarily having a qsub_1 subreg.  The
+  // dsub_2 -> ssub_0 composition can be shared.
+  while (!Indices.empty() && !Orphans.empty()) {
+    CodeGenSubRegIndex *Idx = Indices.pop_back_val();
+    CodeGenRegister *SR = SubRegs[Idx];
+    const SubRegMap &Map = SR->getSubRegs(RegBank);
+    for (SubRegMap::const_iterator SI = Map.begin(), SE = Map.end(); SI != SE;
+         ++SI)
+      if (Orphans.erase(SI->second))
+        SubRegs[RegBank.getCompositeSubRegIndex(Idx, SI->first)] = SI->second;
   }
+
+  // Initialize RegUnitList. A register with no subregisters creates its own
+  // unit. Otherwise, it inherits all its subregister's units. Because
+  // getSubRegs is called recursively, this processes the register hierarchy in
+  // postorder.
+  //
+  // TODO: We currently assume all register units correspond to a named "leaf"
+  // register. We should also unify register units for ad-hoc register
+  // aliases. This can be done by iteratively merging units for aliasing
+  // registers using a worklist.
+  assert(RegUnits.empty() && "Should only initialize RegUnits once");
+  if (SubRegs.empty())
+    RegUnits.push_back(RegBank.newRegUnit(0));
+  else
+    inheritRegUnits(RegBank);
   return SubRegs;
 }
 
 void
-CodeGenRegister::addSubRegsPreOrder(SetVector<CodeGenRegister*> &OSet) const {
+CodeGenRegister::addSubRegsPreOrder(SetVector<const CodeGenRegister*> &OSet,
+                                    CodeGenRegBank &RegBank) const {
   assert(SubRegsComplete && "Must precompute sub-registers");
   std::vector<Record*> Indices = TheDef->getValueAsListOfDefs("SubRegIndices");
   for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
-    CodeGenRegister *SR = SubRegs.find(Indices[i])->second;
+    CodeGenSubRegIndex *Idx = RegBank.getSubRegIdx(Indices[i]);
+    CodeGenRegister *SR = SubRegs.find(Idx)->second;
     if (OSet.insert(SR))
-      SR->addSubRegsPreOrder(OSet);
+      SR->addSubRegsPreOrder(OSet, RegBank);
+  }
+}
+
+// Get the sum of this register's unit weights.
+unsigned CodeGenRegister::getWeight(const CodeGenRegBank &RegBank) const {
+  unsigned Weight = 0;
+  for (RegUnitList::const_iterator I = RegUnits.begin(), E = RegUnits.end();
+       I != E; ++I) {
+    Weight += RegBank.getRegUnitWeight(*I);
   }
+  return Weight;
 }
 
 //===----------------------------------------------------------------------===//
@@ -215,30 +410,40 @@ struct TupleExpander : SetTheory::Expander {
       for (unsigned i = 0, e = Proto->getValues().size(); i != e; ++i) {
         RecordVal RV = Proto->getValues()[i];
 
+        // Skip existing fields, like NAME.
+        if (NewReg->getValue(RV.getNameInit()))
+          continue;
+
+        StringRef Field = RV.getName();
+
         // Replace the sub-register list with Tuple.
-        if (RV.getName() == "SubRegs")
+        if (Field == "SubRegs")
           RV.setValue(ListInit::get(Tuple, RegisterRecTy));
 
         // Provide a blank AsmName. MC hacks are required anyway.
-        if (RV.getName() == "AsmName")
+        if (Field == "AsmName")
           RV.setValue(BlankName);
 
         // CostPerUse is aggregated from all Tuple members.
-        if (RV.getName() == "CostPerUse")
+        if (Field == "CostPerUse")
           RV.setValue(IntInit::get(CostPerUse));
 
+        // Composite registers are always covered by sub-registers.
+        if (Field == "CoveredBySubRegs")
+          RV.setValue(BitInit::get(true));
+
         // Copy fields from the RegisterTuples def.
-        if (RV.getName() == "SubRegIndices" ||
-            RV.getName() == "CompositeIndices") {
-          NewReg->addValue(*Def->getValue(RV.getName()));
+        if (Field == "SubRegIndices" ||
+            Field == "CompositeIndices") {
+          NewReg->addValue(*Def->getValue(Field));
           continue;
         }
 
         // Some fields get their default uninitialized value.
-        if (RV.getName() == "DwarfNumbers" ||
-            RV.getName() == "DwarfAlias" ||
-            RV.getName() == "Aliases") {
-          if (const RecordVal *DefRV = RegisterCl->getValue(RV.getName()))
+        if (Field == "DwarfNumbers" ||
+            Field == "DwarfAlias" ||
+            Field == "Aliases") {
+          if (const RecordVal *DefRV = RegisterCl->getValue(Field))
             NewReg->addValue(*DefRV);
           continue;
         }
@@ -330,7 +535,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
   SpillAlignment = R->getValueAsInt("Alignment");
   CopyCost = R->getValueAsInt("CopyCost");
   Allocatable = R->getValueAsBit("isAllocatable");
-  AltOrderSelect = R->getValueAsCode("AltOrderSelect");
+  AltOrderSelect = R->getValueAsString("AltOrderSelect");
 }
 
 // Create an inferred register class that was missing from the .td files.
@@ -448,7 +653,7 @@ static int TopoOrderRC(const void *PA, const void *PB) {
     return 1;
 
   // Finally order by name as a tie breaker.
-  return A->getName() < B->getName();
+  return StringRef(A->getName()).compare(B->getName());
 }
 
 std::string CodeGenRegisterClass::getQualifiedName() const {
@@ -504,6 +709,30 @@ void CodeGenRegisterClass::computeSubClasses(CodeGenRegBank &RegBank) {
       RegClasses[rci]->inheritProperties(RegBank);
 }
 
+void
+CodeGenRegisterClass::getSuperRegClasses(CodeGenSubRegIndex *SubIdx,
+                                         BitVector &Out) const {
+  DenseMap<CodeGenSubRegIndex*,
+           SmallPtrSet<CodeGenRegisterClass*, 8> >::const_iterator
+    FindI = SuperRegClasses.find(SubIdx);
+  if (FindI == SuperRegClasses.end())
+    return;
+  for (SmallPtrSet<CodeGenRegisterClass*, 8>::const_iterator I =
+       FindI->second.begin(), E = FindI->second.end(); I != E; ++I)
+    Out.set((*I)->EnumValue);
+}
+
+// Populate a unique sorted list of units from a register set.
+void CodeGenRegisterClass::buildRegUnitSet(
+  std::vector<unsigned> &RegUnits) const {
+  std::vector<unsigned> TmpUnits;
+  for (RegUnitIterator UnitI(Members); UnitI.isValid(); ++UnitI)
+    TmpUnits.push_back(*UnitI);
+  std::sort(TmpUnits.begin(), TmpUnits.end());
+  std::unique_copy(TmpUnits.begin(), TmpUnits.end(),
+                   std::back_inserter(RegUnits));
+}
+
 //===----------------------------------------------------------------------===//
 //                               CodeGenRegBank
 //===----------------------------------------------------------------------===//
@@ -511,13 +740,19 @@ void CodeGenRegisterClass::computeSubClasses(CodeGenRegBank &RegBank) {
 CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
   // Configure register Sets to understand register classes and tuples.
   Sets.addFieldExpander("RegisterClass", "MemberList");
+  Sets.addFieldExpander("CalleeSavedRegs", "SaveList");
   Sets.addExpander("RegisterTuples", new TupleExpander());
 
   // Read in the user-defined (named) sub-register indices.
   // More indices will be synthesized later.
-  SubRegIndices = Records.getAllDerivedDefinitions("SubRegIndex");
-  std::sort(SubRegIndices.begin(), SubRegIndices.end(), LessRecord());
-  NumNamedIndices = SubRegIndices.size();
+  std::vector<Record*> SRIs = Records.getAllDerivedDefinitions("SubRegIndex");
+  std::sort(SRIs.begin(), SRIs.end(), LessRecord());
+  NumNamedIndices = SRIs.size();
+  for (unsigned i = 0, e = SRIs.size(); i != e; ++i)
+    getSubRegIdx(SRIs[i]);
+  // Build composite maps from ComposedOf fields.
+  for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i)
+    SubRegIndices[i]->updateComponents(*this);
 
   // Read in the register definitions.
   std::vector<Record*> Regs = Records.getAllDerivedDefinitions("Register");
@@ -538,9 +773,14 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
 
   // Precompute all sub-register maps now all the registers are known.
   // This will create Composite entries for all inferred sub-register indices.
+  NumRegUnits = 0;
   for (unsigned i = 0, e = Registers.size(); i != e; ++i)
     Registers[i]->getSubRegs(*this);
 
+  // Native register units are associated with a leaf register. They've all been
+  // discovered now.
+  NumNativeRegUnits = NumRegUnits;
+
   // Read in register class definitions.
   std::vector<Record*> RCs = Records.getAllDerivedDefinitions("RegisterClass");
   if (RCs.empty())
@@ -561,6 +801,15 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
   CodeGenRegisterClass::computeSubClasses(*this);
 }
 
+CodeGenSubRegIndex *CodeGenRegBank::getSubRegIdx(Record *Def) {
+  CodeGenSubRegIndex *&Idx = Def2SubRegIdx[Def];
+  if (Idx)
+    return Idx;
+  Idx = new CodeGenSubRegIndex(Def, SubRegIndices.size() + 1);
+  SubRegIndices.push_back(Idx);
+  return Idx;
+}
+
 CodeGenRegister *CodeGenRegBank::getReg(Record *Def) {
   CodeGenRegister *&Reg = Def2Reg[Def];
   if (Reg)
@@ -582,6 +831,23 @@ void CodeGenRegBank::addToMaps(CodeGenRegisterClass *RC) {
   Key2RC.insert(std::make_pair(K, RC));
 }
 
+// Create a synthetic sub-class if it is missing.
+CodeGenRegisterClass*
+CodeGenRegBank::getOrCreateSubClass(const CodeGenRegisterClass *RC,
+                                    const CodeGenRegister::Set *Members,
+                                    StringRef Name) {
+  // Synthetic sub-class has the same size and alignment as RC.
+  CodeGenRegisterClass::Key K(Members, RC->SpillSize, RC->SpillAlignment);
+  RCKeyMap::const_iterator FoundI = Key2RC.find(K);
+  if (FoundI != Key2RC.end())
+    return FoundI->second;
+
+  // Sub-class doesn't exist, create a new one.
+  CodeGenRegisterClass *NewRC = new CodeGenRegisterClass(Name, K);
+  addToMaps(NewRC);
+  return NewRC;
+}
+
 CodeGenRegisterClass *CodeGenRegBank::getRegClass(Record *Def) {
   if (CodeGenRegisterClass *RC = Def2RC[Def])
     return RC;
@@ -589,34 +855,28 @@ CodeGenRegisterClass *CodeGenRegBank::getRegClass(Record *Def) {
   throw TGError(Def->getLoc(), "Not a known RegisterClass!");
 }
 
-Record *CodeGenRegBank::getCompositeSubRegIndex(Record *A, Record *B,
-                                                bool create) {
+CodeGenSubRegIndex*
+CodeGenRegBank::getCompositeSubRegIndex(CodeGenSubRegIndex *A,
+                                        CodeGenSubRegIndex *B) {
   // Look for an existing entry.
-  Record *&Comp = Composite[std::make_pair(A, B)];
-  if (Comp || !create)
+  CodeGenSubRegIndex *Comp = A->compose(B);
+  if (Comp)
     return Comp;
 
   // None exists, synthesize one.
   std::string Name = A->getName() + "_then_" + B->getName();
-  Comp = new Record(Name, SMLoc(), Records);
-  SubRegIndices.push_back(Comp);
+  Comp = getSubRegIdx(new Record(Name, SMLoc(), Records));
+  A->addComposite(B, Comp);
   return Comp;
 }
 
-unsigned CodeGenRegBank::getSubRegIndexNo(Record *idx) {
-  std::vector<Record*>::const_iterator i =
-    std::find(SubRegIndices.begin(), SubRegIndices.end(), idx);
-  assert(i != SubRegIndices.end() && "Not a SubRegIndex");
-  return (i - SubRegIndices.begin()) + 1;
-}
-
 void CodeGenRegBank::computeComposites() {
   for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
     CodeGenRegister *Reg1 = Registers[i];
     const CodeGenRegister::SubRegMap &SRM1 = Reg1->getSubRegs();
     for (CodeGenRegister::SubRegMap::const_iterator i1 = SRM1.begin(),
          e1 = SRM1.end(); i1 != e1; ++i1) {
-      Record *Idx1 = i1->first;
+      CodeGenSubRegIndex *Idx1 = i1->first;
       CodeGenRegister *Reg2 = i1->second;
       // Ignore identity compositions.
       if (Reg1 == Reg2)
@@ -625,7 +885,7 @@ void CodeGenRegBank::computeComposites() {
       // Try composing Idx1 with another SubRegIndex.
       for (CodeGenRegister::SubRegMap::const_iterator i2 = SRM2.begin(),
            e2 = SRM2.end(); i2 != e2; ++i2) {
-        std::pair<Record*, Record*> IdxPair(Idx1, i2->first);
+      CodeGenSubRegIndex *Idx2 = i2->first;
         CodeGenRegister *Reg3 = i2->second;
         // Ignore identity compositions.
         if (Reg2 == Reg3)
@@ -634,16 +894,13 @@ void CodeGenRegBank::computeComposites() {
         for (CodeGenRegister::SubRegMap::const_iterator i1d = SRM1.begin(),
              e1d = SRM1.end(); i1d != e1d; ++i1d) {
           if (i1d->second == Reg3) {
-            std::pair<CompositeMap::iterator, bool> Ins =
-              Composite.insert(std::make_pair(IdxPair, i1d->first));
             // Conflicting composition? Emit a warning but allow it.
-            if (!Ins.second && Ins.first->second != i1d->first) {
-              errs() << "Warning: SubRegIndex " << getQualifiedName(Idx1)
-                     << " and " << getQualifiedName(IdxPair.second)
+            if (CodeGenSubRegIndex *Prev = Idx1->addComposite(Idx2, i1d->first))
+              errs() << "Warning: SubRegIndex " << Idx1->getQualifiedName()
+                     << " and " << Idx2->getQualifiedName()
                      << " compose ambiguously as "
-                     << getQualifiedName(Ins.first->second) << " or "
-                     << getQualifiedName(i1d->first) << "\n";
-            }
+                     << Prev->getQualifiedName() << " or "
+                     << i1d->first->getQualifiedName() << "\n";
           }
         }
       }
@@ -652,12 +909,388 @@ void CodeGenRegBank::computeComposites() {
 
   // We don't care about the difference between (Idx1, Idx2) -> Idx2 and invalid
   // compositions, so remove any mappings of that form.
-  for (CompositeMap::iterator i = Composite.begin(), e = Composite.end();
-       i != e;) {
-    CompositeMap::iterator j = i;
-    ++i;
-    if (j->first.second == j->second)
-      Composite.erase(j);
+  for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i)
+    SubRegIndices[i]->cleanComposites();
+}
+
+namespace {
+// UberRegSet is a helper class for computeRegUnitWeights. Each UberRegSet is
+// the transitive closure of the union of overlapping register
+// classes. Together, the UberRegSets form a partition of the registers. If we
+// consider overlapping register classes to be connected, then each UberRegSet
+// is a set of connected components.
+//
+// An UberRegSet will likely be a horizontal slice of register names of
+// the same width. Nontrivial subregisters should then be in a separate
+// UberRegSet. But this property isn't required for valid computation of
+// register unit weights.
+//
+// A Weight field caches the max per-register unit weight in each UberRegSet.
+//
+// A set of SingularDeterminants flags single units of some register in this set
+// for which the unit weight equals the set weight. These units should not have
+// their weight increased.
+struct UberRegSet {
+  CodeGenRegister::Set Regs;
+  unsigned Weight;
+  CodeGenRegister::RegUnitList SingularDeterminants;
+
+  UberRegSet(): Weight(0) {}
+};
+} // namespace
+
+// Partition registers into UberRegSets, where each set is the transitive
+// closure of the union of overlapping register classes.
+//
+// UberRegSets[0] is a special non-allocatable set.
+static void computeUberSets(std::vector<UberRegSet> &UberSets,
+                            std::vector<UberRegSet*> &RegSets,
+                            CodeGenRegBank &RegBank) {
+
+  const std::vector<CodeGenRegister*> &Registers = RegBank.getRegisters();
+
+  // The Register EnumValue is one greater than its index into Registers.
+  assert(Registers.size() == Registers[Registers.size()-1]->EnumValue &&
+         "register enum value mismatch");
+
+  // For simplicitly make the SetID the same as EnumValue.
+  IntEqClasses UberSetIDs(Registers.size()+1);
+  std::set<unsigned> AllocatableRegs;
+  for (unsigned i = 0, e = RegBank.getRegClasses().size(); i != e; ++i) {
+
+    CodeGenRegisterClass *RegClass = RegBank.getRegClasses()[i];
+    if (!RegClass->Allocatable)
+      continue;
+
+    const CodeGenRegister::Set &Regs = RegClass->getMembers();
+    if (Regs.empty())
+      continue;
+
+    unsigned USetID = UberSetIDs.findLeader((*Regs.begin())->EnumValue);
+    assert(USetID && "register number 0 is invalid");
+
+    AllocatableRegs.insert((*Regs.begin())->EnumValue);
+    for (CodeGenRegister::Set::const_iterator I = llvm::next(Regs.begin()),
+           E = Regs.end(); I != E; ++I) {
+      AllocatableRegs.insert((*I)->EnumValue);
+      UberSetIDs.join(USetID, (*I)->EnumValue);
+    }
+  }
+  // Combine non-allocatable regs.
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
+    unsigned RegNum = Registers[i]->EnumValue;
+    if (AllocatableRegs.count(RegNum))
+      continue;
+
+    UberSetIDs.join(0, RegNum);
+  }
+  UberSetIDs.compress();
+
+  // Make the first UberSet a special unallocatable set.
+  unsigned ZeroID = UberSetIDs[0];
+
+  // Insert Registers into the UberSets formed by union-find.
+  // Do not resize after this.
+  UberSets.resize(UberSetIDs.getNumClasses());
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
+    const CodeGenRegister *Reg = Registers[i];
+    unsigned USetID = UberSetIDs[Reg->EnumValue];
+    if (!USetID)
+      USetID = ZeroID;
+    else if (USetID == ZeroID)
+      USetID = 0;
+
+    UberRegSet *USet = &UberSets[USetID];
+    USet->Regs.insert(Reg);
+    RegSets[i] = USet;
+  }
+}
+
+// Recompute each UberSet weight after changing unit weights.
+static void computeUberWeights(std::vector<UberRegSet> &UberSets,
+                               CodeGenRegBank &RegBank) {
+  // Skip the first unallocatable set.
+  for (std::vector<UberRegSet>::iterator I = llvm::next(UberSets.begin()),
+         E = UberSets.end(); I != E; ++I) {
+
+    // Initialize all unit weights in this set, and remember the max units/reg.
+    const CodeGenRegister *Reg = 0;
+    unsigned MaxWeight = 0, Weight = 0;
+    for (RegUnitIterator UnitI(I->Regs); UnitI.isValid(); ++UnitI) {
+      if (Reg != UnitI.getReg()) {
+        if (Weight > MaxWeight)
+          MaxWeight = Weight;
+        Reg = UnitI.getReg();
+        Weight = 0;
+      }
+      unsigned UWeight = RegBank.getRegUnitWeight(*UnitI);
+      if (!UWeight) {
+        UWeight = 1;
+        RegBank.increaseRegUnitWeight(*UnitI, UWeight);
+      }
+      Weight += UWeight;
+    }
+    if (Weight > MaxWeight)
+      MaxWeight = Weight;
+
+    // Update the set weight.
+    I->Weight = MaxWeight;
+
+    // Find singular determinants.
+    for (CodeGenRegister::Set::iterator RegI = I->Regs.begin(),
+           RegE = I->Regs.end(); RegI != RegE; ++RegI) {
+      if ((*RegI)->getRegUnits().size() == 1
+          && (*RegI)->getWeight(RegBank) == I->Weight)
+        mergeRegUnits(I->SingularDeterminants, (*RegI)->getRegUnits());
+    }
+  }
+}
+
+// normalizeWeight is a computeRegUnitWeights helper that adjusts the weight of
+// a register and its subregisters so that they have the same weight as their
+// UberSet. Self-recursion processes the subregister tree in postorder so
+// subregisters are normalized first.
+//
+// Side effects:
+// - creates new adopted register units
+// - causes superregisters to inherit adopted units
+// - increases the weight of "singular" units
+// - induces recomputation of UberWeights.
+static bool normalizeWeight(CodeGenRegister *Reg,
+                            std::vector<UberRegSet> &UberSets,
+                            std::vector<UberRegSet*> &RegSets,
+                            CodeGenRegister::RegUnitList &NormalUnits,
+                            CodeGenRegBank &RegBank) {
+  bool Changed = false;
+  const CodeGenRegister::SubRegMap &SRM = Reg->getSubRegs();
+  for (CodeGenRegister::SubRegMap::const_iterator SRI = SRM.begin(),
+         SRE = SRM.end(); SRI != SRE; ++SRI) {
+    if (SRI->second == Reg)
+      continue; // self-cycles happen
+
+    Changed |=
+      normalizeWeight(SRI->second, UberSets, RegSets, NormalUnits, RegBank);
+  }
+  // Postorder register normalization.
+
+  // Inherit register units newly adopted by subregisters.
+  if (Reg->inheritRegUnits(RegBank))
+    computeUberWeights(UberSets, RegBank);
+
+  // Check if this register is too skinny for its UberRegSet.
+  UberRegSet *UberSet = RegSets[RegBank.getRegIndex(Reg)];
+
+  unsigned RegWeight = Reg->getWeight(RegBank);
+  if (UberSet->Weight > RegWeight) {
+    // A register unit's weight can be adjusted only if it is the singular unit
+    // for this register, has not been used to normalize a subregister's set,
+    // and has not already been used to singularly determine this UberRegSet.
+    unsigned AdjustUnit = Reg->getRegUnits().front();
+    if (Reg->getRegUnits().size() != 1
+        || hasRegUnit(NormalUnits, AdjustUnit)
+        || hasRegUnit(UberSet->SingularDeterminants, AdjustUnit)) {
+      // We don't have an adjustable unit, so adopt a new one.
+      AdjustUnit = RegBank.newRegUnit(UberSet->Weight - RegWeight);
+      Reg->adoptRegUnit(AdjustUnit);
+      // Adopting a unit does not immediately require recomputing set weights.
+    }
+    else {
+      // Adjust the existing single unit.
+      RegBank.increaseRegUnitWeight(AdjustUnit, UberSet->Weight - RegWeight);
+      // The unit may be shared among sets and registers within this set.
+      computeUberWeights(UberSets, RegBank);
+    }
+    Changed = true;
+  }
+
+  // Mark these units normalized so superregisters can't change their weights.
+  mergeRegUnits(NormalUnits, Reg->getRegUnits());
+
+  return Changed;
+}
+
+// Compute a weight for each register unit created during getSubRegs.
+//
+// The goal is that two registers in the same class will have the same weight,
+// where each register's weight is defined as sum of its units' weights.
+void CodeGenRegBank::computeRegUnitWeights() {
+  assert(RegUnitWeights.empty() && "Only initialize RegUnitWeights once");
+
+  // Only allocatable units will be initialized to nonzero weight.
+  RegUnitWeights.resize(NumRegUnits);
+
+  std::vector<UberRegSet> UberSets;
+  std::vector<UberRegSet*> RegSets(Registers.size());
+  computeUberSets(UberSets, RegSets, *this);
+  // UberSets and RegSets are now immutable.
+
+  computeUberWeights(UberSets, *this);
+
+  // Iterate over each Register, normalizing the unit weights until reaching
+  // a fix point.
+  unsigned NumIters = 0;
+  for (bool Changed = true; Changed; ++NumIters) {
+    assert(NumIters <= NumNativeRegUnits && "Runaway register unit weights");
+    Changed = false;
+    for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
+      CodeGenRegister::RegUnitList NormalUnits;
+      Changed |=
+        normalizeWeight(Registers[i], UberSets, RegSets, NormalUnits, *this);
+    }
+  }
+}
+
+// Find a set in UniqueSets with the same elements as Set.
+// Return an iterator into UniqueSets.
+static std::vector<RegUnitSet>::const_iterator
+findRegUnitSet(const std::vector<RegUnitSet> &UniqueSets,
+               const RegUnitSet &Set) {
+  std::vector<RegUnitSet>::const_iterator
+    I = UniqueSets.begin(), E = UniqueSets.end();
+  for(;I != E; ++I) {
+    if (I->Units == Set.Units)
+      break;
+  }
+  return I;
+}
+
+// Return true if the RUSubSet is a subset of RUSuperSet.
+static bool isRegUnitSubSet(const std::vector<unsigned> &RUSubSet,
+                            const std::vector<unsigned> &RUSuperSet) {
+  return std::includes(RUSuperSet.begin(), RUSuperSet.end(),
+                       RUSubSet.begin(), RUSubSet.end());
+}
+
+// Iteratively prune unit sets.
+void CodeGenRegBank::pruneUnitSets() {
+  assert(RegClassUnitSets.empty() && "this invalidates RegClassUnitSets");
+
+  // Form an equivalence class of UnitSets with no significant difference.
+  std::vector<unsigned> SuperSetIDs;
+  for (unsigned SubIdx = 0, EndIdx = RegUnitSets.size();
+       SubIdx != EndIdx; ++SubIdx) {
+    const RegUnitSet &SubSet = RegUnitSets[SubIdx];
+    unsigned SuperIdx = 0;
+    for (; SuperIdx != EndIdx; ++SuperIdx) {
+      if (SuperIdx == SubIdx)
+        continue;
+
+      const RegUnitSet &SuperSet = RegUnitSets[SuperIdx];
+      if (isRegUnitSubSet(SubSet.Units, SuperSet.Units)
+          && (SubSet.Units.size() + 3 > SuperSet.Units.size())) {
+        break;
+      }
+    }
+    if (SuperIdx == EndIdx)
+      SuperSetIDs.push_back(SubIdx);
+  }
+  // Populate PrunedUnitSets with each equivalence class's superset.
+  std::vector<RegUnitSet> PrunedUnitSets(SuperSetIDs.size());
+  for (unsigned i = 0, e = SuperSetIDs.size(); i != e; ++i) {
+    unsigned SuperIdx = SuperSetIDs[i];
+    PrunedUnitSets[i].Name = RegUnitSets[SuperIdx].Name;
+    PrunedUnitSets[i].Units.swap(RegUnitSets[SuperIdx].Units);
+  }
+  RegUnitSets.swap(PrunedUnitSets);
+}
+
+// Create a RegUnitSet for each RegClass that contains all units in the class
+// including adopted units that are necessary to model register pressure. Then
+// iteratively compute RegUnitSets such that the union of any two overlapping
+// RegUnitSets is repreresented.
+//
+// RegisterInfoEmitter will map each RegClass to its RegUnitClass and any
+// RegUnitSet that is a superset of that RegUnitClass.
+void CodeGenRegBank::computeRegUnitSets() {
+
+  // Compute a unique RegUnitSet for each RegClass.
+  const ArrayRef<CodeGenRegisterClass*> &RegClasses = getRegClasses();
+  unsigned NumRegClasses = RegClasses.size();
+  for (unsigned RCIdx = 0, RCEnd = NumRegClasses; RCIdx != RCEnd; ++RCIdx) {
+    if (!RegClasses[RCIdx]->Allocatable)
+      continue;
+
+    // Speculatively grow the RegUnitSets to hold the new set.
+    RegUnitSets.resize(RegUnitSets.size() + 1);
+    RegUnitSets.back().Name = RegClasses[RCIdx]->getName();
+
+    // Compute a sorted list of units in this class.
+    RegClasses[RCIdx]->buildRegUnitSet(RegUnitSets.back().Units);
+
+    // Find an existing RegUnitSet.
+    std::vector<RegUnitSet>::const_iterator SetI =
+      findRegUnitSet(RegUnitSets, RegUnitSets.back());
+    if (SetI != llvm::prior(RegUnitSets.end()))
+      RegUnitSets.pop_back();
+  }
+
+  // Iteratively prune unit sets.
+  pruneUnitSets();
+
+  // Iterate over all unit sets, including new ones added by this loop.
+  unsigned NumRegUnitSubSets = RegUnitSets.size();
+  for (unsigned Idx = 0, EndIdx = RegUnitSets.size(); Idx != EndIdx; ++Idx) {
+    // In theory, this is combinatorial. In practice, it needs to be bounded
+    // by a small number of sets for regpressure to be efficient.
+    // If the assert is hit, we need to implement pruning.
+    assert(Idx < (2*NumRegUnitSubSets) && "runaway unit set inference");
+
+    // Compare new sets with all original classes.
+    for (unsigned SearchIdx = (Idx >= NumRegUnitSubSets) ? 0 : Idx+1;
+         SearchIdx != EndIdx; ++SearchIdx) {
+      std::set<unsigned> Intersection;
+      std::set_intersection(RegUnitSets[Idx].Units.begin(),
+                            RegUnitSets[Idx].Units.end(),
+                            RegUnitSets[SearchIdx].Units.begin(),
+                            RegUnitSets[SearchIdx].Units.end(),
+                            std::inserter(Intersection, Intersection.begin()));
+      if (Intersection.empty())
+        continue;
+
+      // Speculatively grow the RegUnitSets to hold the new set.
+      RegUnitSets.resize(RegUnitSets.size() + 1);
+      RegUnitSets.back().Name =
+        RegUnitSets[Idx].Name + "+" + RegUnitSets[SearchIdx].Name;
+
+      std::set_union(RegUnitSets[Idx].Units.begin(),
+                     RegUnitSets[Idx].Units.end(),
+                     RegUnitSets[SearchIdx].Units.begin(),
+                     RegUnitSets[SearchIdx].Units.end(),
+                     std::inserter(RegUnitSets.back().Units,
+                                   RegUnitSets.back().Units.begin()));
+
+      // Find an existing RegUnitSet, or add the union to the unique sets.
+      std::vector<RegUnitSet>::const_iterator SetI =
+        findRegUnitSet(RegUnitSets, RegUnitSets.back());
+      if (SetI != llvm::prior(RegUnitSets.end()))
+        RegUnitSets.pop_back();
+    }
+  }
+
+  // Iteratively prune unit sets after inferring supersets.
+  pruneUnitSets();
+
+  // For each register class, list the UnitSets that are supersets.
+  RegClassUnitSets.resize(NumRegClasses);
+  for (unsigned RCIdx = 0, RCEnd = NumRegClasses; RCIdx != RCEnd; ++RCIdx) {
+    if (!RegClasses[RCIdx]->Allocatable)
+      continue;
+
+    // Recompute the sorted list of units in this class.
+    std::vector<unsigned> RegUnits;
+    RegClasses[RCIdx]->buildRegUnitSet(RegUnits);
+
+    // Don't increase pressure for unallocatable regclasses.
+    if (RegUnits.empty())
+      continue;
+
+    // Find all supersets.
+    for (unsigned USIdx = 0, USEnd = RegUnitSets.size();
+         USIdx != USEnd; ++USIdx) {
+      if (isRegUnitSubSet(RegUnits, RegUnitSets[USIdx].Units))
+        RegClassUnitSets[RCIdx].push_back(USIdx);
+    }
+    assert(!RegClassUnitSets[RCIdx].empty() && "missing unit set for regclass");
   }
 }
 
@@ -737,62 +1370,187 @@ computeOverlaps(std::map<const CodeGenRegister*, CodeGenRegister::Set> &Map) {
 
 void CodeGenRegBank::computeDerivedInfo() {
   computeComposites();
+
+  // Compute a weight for each register unit created during getSubRegs.
+  // This may create adopted register units (with unit # >= NumNativeRegUnits).
+  computeRegUnitWeights();
+
+  // Compute a unique set of RegUnitSets. One for each RegClass and inferred
+  // supersets for the union of overlapping sets.
+  computeRegUnitSets();
 }
 
-// Infer missing register classes.
 //
-// For every register class RC, make sure that the set of registers in RC with
-// a given SubIxx sub-register form a register class.
-void CodeGenRegBank::computeInferredRegisterClasses() {
-  // When this function is called, the register classes have not been sorted
-  // and assigned EnumValues yet.  That means getSubClasses(),
-  // getSuperClasses(), and hasSubClass() functions are defunct.
+// Synthesize missing register class intersections.
+//
+// Make sure that sub-classes of RC exists such that getCommonSubClass(RC, X)
+// returns a maximal register class for all X.
+//
+void CodeGenRegBank::inferCommonSubClass(CodeGenRegisterClass *RC) {
+  for (unsigned rci = 0, rce = RegClasses.size(); rci != rce; ++rci) {
+    CodeGenRegisterClass *RC1 = RC;
+    CodeGenRegisterClass *RC2 = RegClasses[rci];
+    if (RC1 == RC2)
+      continue;
 
-  // Map SubRegIndex to register set.
-  typedef std::map<Record*, CodeGenRegister::Set, LessRecord> SubReg2SetMap;
+    // Compute the set intersection of RC1 and RC2.
+    const CodeGenRegister::Set &Memb1 = RC1->getMembers();
+    const CodeGenRegister::Set &Memb2 = RC2->getMembers();
+    CodeGenRegister::Set Intersection;
+    std::set_intersection(Memb1.begin(), Memb1.end(),
+                          Memb2.begin(), Memb2.end(),
+                          std::inserter(Intersection, Intersection.begin()),
+                          CodeGenRegister::Less());
+
+    // Skip disjoint class pairs.
+    if (Intersection.empty())
+      continue;
 
-  // Visit all register classes, including the ones being added by the loop.
-  for (unsigned rci = 0; rci != RegClasses.size(); ++rci) {
-    CodeGenRegisterClass &RC = *RegClasses[rci];
+    // If RC1 and RC2 have different spill sizes or alignments, use the
+    // larger size for sub-classing.  If they are equal, prefer RC1.
+    if (RC2->SpillSize > RC1->SpillSize ||
+        (RC2->SpillSize == RC1->SpillSize &&
+         RC2->SpillAlignment > RC1->SpillAlignment))
+      std::swap(RC1, RC2);
 
-    // Compute the set of registers supporting each SubRegIndex.
-    SubReg2SetMap SRSets;
-    for (CodeGenRegister::Set::const_iterator RI = RC.getMembers().begin(),
-         RE = RC.getMembers().end(); RI != RE; ++RI) {
-      const CodeGenRegister::SubRegMap &SRM = (*RI)->getSubRegs();
-      for (CodeGenRegister::SubRegMap::const_iterator I = SRM.begin(),
-           E = SRM.end(); I != E; ++I)
-        SRSets[I->first].insert(*RI);
+    getOrCreateSubClass(RC1, &Intersection,
+                        RC1->getName() + "_and_" + RC2->getName());
+  }
+}
+
+//
+// Synthesize missing sub-classes for getSubClassWithSubReg().
+//
+// Make sure that the set of registers in RC with a given SubIdx sub-register
+// form a register class.  Update RC->SubClassWithSubReg.
+//
+void CodeGenRegBank::inferSubClassWithSubReg(CodeGenRegisterClass *RC) {
+  // Map SubRegIndex to set of registers in RC supporting that SubRegIndex.
+  typedef std::map<CodeGenSubRegIndex*, CodeGenRegister::Set,
+                   CodeGenSubRegIndex::Less> SubReg2SetMap;
+
+  // Compute the set of registers supporting each SubRegIndex.
+  SubReg2SetMap SRSets;
+  for (CodeGenRegister::Set::const_iterator RI = RC->getMembers().begin(),
+       RE = RC->getMembers().end(); RI != RE; ++RI) {
+    const CodeGenRegister::SubRegMap &SRM = (*RI)->getSubRegs();
+    for (CodeGenRegister::SubRegMap::const_iterator I = SRM.begin(),
+         E = SRM.end(); I != E; ++I)
+      SRSets[I->first].insert(*RI);
+  }
+
+  // Find matching classes for all SRSets entries.  Iterate in SubRegIndex
+  // numerical order to visit synthetic indices last.
+  for (unsigned sri = 0, sre = SubRegIndices.size(); sri != sre; ++sri) {
+    CodeGenSubRegIndex *SubIdx = SubRegIndices[sri];
+    SubReg2SetMap::const_iterator I = SRSets.find(SubIdx);
+    // Unsupported SubRegIndex. Skip it.
+    if (I == SRSets.end())
+      continue;
+    // In most cases, all RC registers support the SubRegIndex.
+    if (I->second.size() == RC->getMembers().size()) {
+      RC->setSubClassWithSubReg(SubIdx, RC);
+      continue;
+    }
+    // This is a real subset.  See if we have a matching class.
+    CodeGenRegisterClass *SubRC =
+      getOrCreateSubClass(RC, &I->second,
+                          RC->getName() + "_with_" + I->first->getName());
+    RC->setSubClassWithSubReg(SubIdx, SubRC);
+  }
+}
+
+//
+// Synthesize missing sub-classes of RC for getMatchingSuperRegClass().
+//
+// Create sub-classes of RC such that getMatchingSuperRegClass(RC, SubIdx, X)
+// has a maximal result for any SubIdx and any X >= FirstSubRegRC.
+//
+
+void CodeGenRegBank::inferMatchingSuperRegClass(CodeGenRegisterClass *RC,
+                                                unsigned FirstSubRegRC) {
+  SmallVector<std::pair<const CodeGenRegister*,
+                        const CodeGenRegister*>, 16> SSPairs;
+
+  // Iterate in SubRegIndex numerical order to visit synthetic indices last.
+  for (unsigned sri = 0, sre = SubRegIndices.size(); sri != sre; ++sri) {
+    CodeGenSubRegIndex *SubIdx = SubRegIndices[sri];
+    // Skip indexes that aren't fully supported by RC's registers. This was
+    // computed by inferSubClassWithSubReg() above which should have been
+    // called first.
+    if (RC->getSubClassWithSubReg(SubIdx) != RC)
+      continue;
+
+    // Build list of (Super, Sub) pairs for this SubIdx.
+    SSPairs.clear();
+    for (CodeGenRegister::Set::const_iterator RI = RC->getMembers().begin(),
+         RE = RC->getMembers().end(); RI != RE; ++RI) {
+      const CodeGenRegister *Super = *RI;
+      const CodeGenRegister *Sub = Super->getSubRegs().find(SubIdx)->second;
+      assert(Sub && "Missing sub-register");
+      SSPairs.push_back(std::make_pair(Super, Sub));
     }
 
-    // Find matching classes for all SRSets entries.  Iterate in SubRegIndex
-    // numerical order to visit synthetic indices last.
-    for (unsigned sri = 0, sre = SubRegIndices.size(); sri != sre; ++sri) {
-      Record *SubIdx = SubRegIndices[sri];
-      SubReg2SetMap::const_iterator I = SRSets.find(SubIdx);
-      // Unsupported SubRegIndex. Skip it.
-      if (I == SRSets.end())
+    // Iterate over sub-register class candidates.  Ignore classes created by
+    // this loop. They will never be useful.
+    for (unsigned rci = FirstSubRegRC, rce = RegClasses.size(); rci != rce;
+         ++rci) {
+      CodeGenRegisterClass *SubRC = RegClasses[rci];
+      // Compute the subset of RC that maps into SubRC.
+      CodeGenRegister::Set SubSet;
+      for (unsigned i = 0, e = SSPairs.size(); i != e; ++i)
+        if (SubRC->contains(SSPairs[i].second))
+          SubSet.insert(SSPairs[i].first);
+      if (SubSet.empty())
         continue;
-      // In most cases, all RC registers support the SubRegIndex.
-      if (I->second.size() == RC.getMembers().size()) {
-        RC.setSubClassWithSubReg(SubIdx, &RC);
+      // RC injects completely into SubRC.
+      if (SubSet.size() == SSPairs.size()) {
+        SubRC->addSuperRegClass(SubIdx, RC);
         continue;
       }
+      // Only a subset of RC maps into SubRC. Make sure it is represented by a
+      // class.
+      getOrCreateSubClass(RC, &SubSet, RC->getName() +
+                          "_with_" + SubIdx->getName() +
+                          "_in_" + SubRC->getName());
+    }
+  }
+}
 
-      // This is a real subset.  See if we have a matching class.
-      CodeGenRegisterClass::Key K(&I->second, RC.SpillSize, RC.SpillAlignment);
-      RCKeyMap::const_iterator FoundI = Key2RC.find(K);
-      if (FoundI != Key2RC.end()) {
-        RC.setSubClassWithSubReg(SubIdx, FoundI->second);
-        continue;
-      }
 
-      // Class doesn't exist.
-      CodeGenRegisterClass *NewRC =
-        new CodeGenRegisterClass(RC.getName() + "_with_" +
-                                 I->first->getName(), K);
-      addToMaps(NewRC);
-      RC.setSubClassWithSubReg(SubIdx, NewRC);
+//
+// Infer missing register classes.
+//
+void CodeGenRegBank::computeInferredRegisterClasses() {
+  // When this function is called, the register classes have not been sorted
+  // and assigned EnumValues yet.  That means getSubClasses(),
+  // getSuperClasses(), and hasSubClass() functions are defunct.
+  unsigned FirstNewRC = RegClasses.size();
+
+  // Visit all register classes, including the ones being added by the loop.
+  for (unsigned rci = 0; rci != RegClasses.size(); ++rci) {
+    CodeGenRegisterClass *RC = RegClasses[rci];
+
+    // Synthesize answers for getSubClassWithSubReg().
+    inferSubClassWithSubReg(RC);
+
+    // Synthesize answers for getCommonSubClass().
+    inferCommonSubClass(RC);
+
+    // Synthesize answers for getMatchingSuperRegClass().
+    inferMatchingSuperRegClass(RC);
+
+    // New register classes are created while this loop is running, and we need
+    // to visit all of them.  I  particular, inferMatchingSuperRegClass needs
+    // to match old super-register classes with sub-register classes created
+    // after inferMatchingSuperRegClass was called.  At this point,
+    // inferMatchingSuperRegClass has checked SuperRC = [0..rci] with SubRC =
+    // [0..FirstNewRC).  We need to cover SubRC = [FirstNewRC..rci].
+    if (rci + 1 == FirstNewRC) {
+      unsigned NextNewRC = RegClasses.size();
+      for (unsigned rci2 = 0; rci2 != FirstNewRC; ++rci2)
+        inferMatchingSuperRegClass(RegClasses[rci2], FirstNewRC);
+      FirstNewRC = NextNewRC;
     }
   }
 }
@@ -843,3 +1601,45 @@ CodeGenRegBank::getRegClassForRegister(Record *R) {
   }
   return FoundRC;
 }
+
+BitVector CodeGenRegBank::computeCoveredRegisters(ArrayRef<Record*> Regs) {
+  SetVector<const CodeGenRegister*> Set;
+
+  // First add Regs with all sub-registers.
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    CodeGenRegister *Reg = getReg(Regs[i]);
+    if (Set.insert(Reg))
+      // Reg is new, add all sub-registers.
+      // The pre-ordering is not important here.
+      Reg->addSubRegsPreOrder(Set, *this);
+  }
+
+  // Second, find all super-registers that are completely covered by the set.
+  for (unsigned i = 0; i != Set.size(); ++i) {
+    const CodeGenRegister::SuperRegList &SR = Set[i]->getSuperRegs();
+    for (unsigned j = 0, e = SR.size(); j != e; ++j) {
+      const CodeGenRegister *Super = SR[j];
+      if (!Super->CoveredBySubRegs || Set.count(Super))
+        continue;
+      // This new super-register is covered by its sub-registers.
+      bool AllSubsInSet = true;
+      const CodeGenRegister::SubRegMap &SRM = Super->getSubRegs();
+      for (CodeGenRegister::SubRegMap::const_iterator I = SRM.begin(),
+             E = SRM.end(); I != E; ++I)
+        if (!Set.count(I->second)) {
+          AllSubsInSet = false;
+          break;
+        }
+      // All sub-registers in Set, add Super as well.
+      // We will visit Super later to recheck its super-registers.
+      if (AllSubsInSet)
+        Set.insert(Super);
+    }
+  }
+
+  // Convert to BitVector.
+  BitVector BV(Registers.size() + 1);
+  for (unsigned i = 0, e = Set.size(); i != e; ++i)
+    BV.set(Set[i]->EnumValue);
+  return BV;
+}
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 4fc34b092260..232a6e71de2d 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cstdlib>
 #include <map>
 #include <string>
@@ -31,14 +32,69 @@
 namespace llvm {
   class CodeGenRegBank;
 
+  /// CodeGenSubRegIndex - Represents a sub-register index.
+  class CodeGenSubRegIndex {
+    Record *const TheDef;
+    const unsigned EnumValue;
+
+  public:
+    CodeGenSubRegIndex(Record *R, unsigned Enum);
+
+    const std::string &getName() const;
+    std::string getNamespace() const;
+    std::string getQualifiedName() const;
+
+    // Order CodeGenSubRegIndex pointers by EnumValue.
+    struct Less {
+      bool operator()(const CodeGenSubRegIndex *A,
+                      const CodeGenSubRegIndex *B) const {
+        assert(A && B);
+        return A->EnumValue < B->EnumValue;
+      }
+    };
+
+    // Map of composite subreg indices.
+    typedef std::map<CodeGenSubRegIndex*, CodeGenSubRegIndex*, Less> CompMap;
+
+    // Returns the subreg index that results from composing this with Idx.
+    // Returns NULL if this and Idx don't compose.
+    CodeGenSubRegIndex *compose(CodeGenSubRegIndex *Idx) const {
+      CompMap::const_iterator I = Composed.find(Idx);
+      return I == Composed.end() ? 0 : I->second;
+    }
+
+    // Add a composite subreg index: this+A = B.
+    // Return a conflicting composite, or NULL
+    CodeGenSubRegIndex *addComposite(CodeGenSubRegIndex *A,
+                                     CodeGenSubRegIndex *B) {
+      std::pair<CompMap::iterator, bool> Ins =
+        Composed.insert(std::make_pair(A, B));
+      return (Ins.second || Ins.first->second == B) ? 0 : Ins.first->second;
+    }
+
+    // Update the composite maps of components specified in 'ComposedOf'.
+    void updateComponents(CodeGenRegBank&);
+
+    // Clean out redundant composite mappings.
+    void cleanComposites();
+
+    // Return the map of composites.
+    const CompMap &getComposites() const { return Composed; }
+
+  private:
+    CompMap Composed;
+  };
+
   /// CodeGenRegister - Represents a register definition.
   struct CodeGenRegister {
     Record *TheDef;
     unsigned EnumValue;
     unsigned CostPerUse;
+    bool CoveredBySubRegs;
 
     // Map SubRegIndex -> Register.
-    typedef std::map<Record*, CodeGenRegister*, LessRecord> SubRegMap;
+    typedef std::map<CodeGenSubRegIndex*, CodeGenRegister*,
+                     CodeGenSubRegIndex::Less> SubRegMap;
 
     CodeGenRegister(Record *R, unsigned Enum);
 
@@ -54,18 +110,37 @@ namespace llvm {
     }
 
     // Add sub-registers to OSet following a pre-order defined by the .td file.
-    void addSubRegsPreOrder(SetVector<CodeGenRegister*> &OSet) const;
+    void addSubRegsPreOrder(SetVector<const CodeGenRegister*> &OSet,
+                            CodeGenRegBank&) const;
 
     // List of super-registers in topological order, small to large.
-    typedef std::vector<CodeGenRegister*> SuperRegList;
+    typedef std::vector<const CodeGenRegister*> SuperRegList;
 
-    // Get the list of super-registers.
-    // This is only valid after computeDerivedInfo has visited all registers.
+    // Get the list of super-registers. This is valid after getSubReg
+    // visits all registers during RegBank construction.
     const SuperRegList &getSuperRegs() const {
       assert(SubRegsComplete && "Must precompute sub-registers");
       return SuperRegs;
     }
 
+    // List of register units in ascending order.
+    typedef SmallVector<unsigned, 16> RegUnitList;
+
+    // Get the list of register units.
+    // This is only valid after getSubRegs() completes.
+    const RegUnitList &getRegUnits() const { return RegUnits; }
+
+    // Inherit register units from subregisters.
+    // Return true if the RegUnits changed.
+    bool inheritRegUnits(CodeGenRegBank &RegBank);
+
+    // Adopt a register unit for pressure tracking.
+    // A unit is adopted iff its unit number is >= NumNativeRegUnits.
+    void adoptRegUnit(unsigned RUID) { RegUnits.push_back(RUID); }
+
+    // Get the sum of this register's register unit weights.
+    unsigned getWeight(const CodeGenRegBank &RegBank) const;
+
     // Order CodeGenRegister pointers by EnumValue.
     struct Less {
       bool operator()(const CodeGenRegister *A,
@@ -82,6 +157,7 @@ namespace llvm {
     bool SubRegsComplete;
     SubRegMap SubRegs;
     SuperRegList SuperRegs;
+    RegUnitList RegUnits;
   };
 
 
@@ -101,8 +177,17 @@ namespace llvm {
     // super-class.
     void inheritProperties(CodeGenRegBank&);
 
-    // Map SubRegIndex -> sub-class
-    DenseMap<Record*, CodeGenRegisterClass*> SubClassWithSubReg;
+    // Map SubRegIndex -> sub-class.  This is the largest sub-class where all
+    // registers have a SubRegIndex sub-register.
+    DenseMap<CodeGenSubRegIndex*, CodeGenRegisterClass*> SubClassWithSubReg;
+
+    // Map SubRegIndex -> set of super-reg classes.  This is all register
+    // classes SuperRC such that:
+    //
+    //   R:SubRegIndex in this RC for all R in SuperRC.
+    //
+    DenseMap<CodeGenSubRegIndex*,
+             SmallPtrSet<CodeGenRegisterClass*, 8> > SuperRegClasses;
 
   public:
     unsigned EnumValue;
@@ -128,8 +213,7 @@ namespace llvm {
     MVT::SimpleValueType getValueTypeNum(unsigned VTNum) const {
       if (VTNum < VTs.size())
         return VTs[VTNum];
-      assert(0 && "VTNum greater than number of ValueTypes in RegClass!");
-      abort();
+      llvm_unreachable("VTNum greater than number of ValueTypes in RegClass!");
     }
 
     // Return true if this this class contains the register.
@@ -150,14 +234,26 @@ namespace llvm {
 
     // getSubClassWithSubReg - Returns the largest sub-class where all
     // registers have a SubIdx sub-register.
-    CodeGenRegisterClass *getSubClassWithSubReg(Record *SubIdx) const {
+    CodeGenRegisterClass*
+    getSubClassWithSubReg(CodeGenSubRegIndex *SubIdx) const {
       return SubClassWithSubReg.lookup(SubIdx);
     }
 
-    void setSubClassWithSubReg(Record *SubIdx, CodeGenRegisterClass *SubRC) {
+    void setSubClassWithSubReg(CodeGenSubRegIndex *SubIdx,
+                               CodeGenRegisterClass *SubRC) {
       SubClassWithSubReg[SubIdx] = SubRC;
     }
 
+    // getSuperRegClasses - Returns a bit vector of all register classes
+    // containing only SubIdx super-registers of this class.
+    void getSuperRegClasses(CodeGenSubRegIndex *SubIdx, BitVector &Out) const;
+
+    // addSuperRegClass - Add a class containing only SudIdx super-registers.
+    void addSuperRegClass(CodeGenSubRegIndex *SubIdx,
+                          CodeGenRegisterClass *SuperRC) {
+      SuperRegClasses[SubIdx].insert(SuperRC);
+    }
+
     // getSubClasses - Returns a constant BitVector of subclasses indexed by
     // EnumValue.
     // The SubClasses vector includs an entry for this class.
@@ -183,6 +279,9 @@ namespace llvm {
     // getOrder(0).
     const CodeGenRegister::Set &getMembers() const { return Members; }
 
+    // Populate a unique sorted list of units from a register set.
+    void buildRegUnitSet(std::vector<unsigned> &RegUnits) const;
+
     CodeGenRegisterClass(CodeGenRegBank&, Record *R);
 
     // A key representing the parts of a register class used for forming
@@ -217,16 +316,34 @@ namespace llvm {
     static void computeSubClasses(CodeGenRegBank&);
   };
 
+  // Each RegUnitSet is a sorted vector with a name.
+  struct RegUnitSet {
+    typedef std::vector<unsigned>::const_iterator iterator;
+
+    std::string Name;
+    std::vector<unsigned> Units;
+  };
+
   // CodeGenRegBank - Represent a target's registers and the relations between
   // them.
   class CodeGenRegBank {
     RecordKeeper &Records;
     SetTheory Sets;
 
-    std::vector<Record*> SubRegIndices;
+    // SubRegIndices.
+    std::vector<CodeGenSubRegIndex*> SubRegIndices;
+    DenseMap<Record*, CodeGenSubRegIndex*> Def2SubRegIdx;
     unsigned NumNamedIndices;
+
+    // Registers.
     std::vector<CodeGenRegister*> Registers;
     DenseMap<Record*, CodeGenRegister*> Def2Reg;
+    unsigned NumNativeRegUnits;
+    unsigned NumRegUnits; // # native + adopted register units.
+
+    // Map each register unit to a weight (for register pressure).
+    // Includes native and adopted register units.
+    std::vector<unsigned> RegUnitWeights;
 
     // Register classes.
     std::vector<CodeGenRegisterClass*> RegClasses;
@@ -234,16 +351,38 @@ namespace llvm {
     typedef std::map<CodeGenRegisterClass::Key, CodeGenRegisterClass*> RCKeyMap;
     RCKeyMap Key2RC;
 
+    // Remember each unique set of register units. Initially, this contains a
+    // unique set for each register class. Simliar sets are coalesced with
+    // pruneUnitSets and new supersets are inferred during computeRegUnitSets.
+    std::vector<RegUnitSet> RegUnitSets;
+
+    // Map RegisterClass index to the index of the RegUnitSet that contains the
+    // class's units and any inferred RegUnit supersets.
+    std::vector<std::vector<unsigned> > RegClassUnitSets;
+
     // Add RC to *2RC maps.
     void addToMaps(CodeGenRegisterClass*);
 
+    // Create a synthetic sub-class if it is missing.
+    CodeGenRegisterClass *getOrCreateSubClass(const CodeGenRegisterClass *RC,
+                                              const CodeGenRegister::Set *Membs,
+                                              StringRef Name);
+
     // Infer missing register classes.
     void computeInferredRegisterClasses();
+    void inferCommonSubClass(CodeGenRegisterClass *RC);
+    void inferSubClassWithSubReg(CodeGenRegisterClass *RC);
+    void inferMatchingSuperRegClass(CodeGenRegisterClass *RC,
+                                    unsigned FirstSubRegRC = 0);
+
+    // Iteratively prune unit sets.
+    void pruneUnitSets();
+
+    // Compute a weight for each register unit created during getSubRegs.
+    void computeRegUnitWeights();
 
-    // Composite SubRegIndex instances.
-    // Map (SubRegIndex, SubRegIndex) -> SubRegIndex.
-    typedef DenseMap<std::pair<Record*, Record*>, Record*> CompositeMap;
-    CompositeMap Composite;
+    // Create a RegUnitSet for each RegClass and infer superclasses.
+    void computeRegUnitSets();
 
     // Populate the Composite map from sub-register relationships.
     void computeComposites();
@@ -256,20 +395,44 @@ namespace llvm {
     // Sub-register indices. The first NumNamedIndices are defined by the user
     // in the .td files. The rest are synthesized such that all sub-registers
     // have a unique name.
-    const std::vector<Record*> &getSubRegIndices() { return SubRegIndices; }
+    ArrayRef<CodeGenSubRegIndex*> getSubRegIndices() { return SubRegIndices; }
     unsigned getNumNamedIndices() { return NumNamedIndices; }
 
-    // Map a SubRegIndex Record to its enum value.
-    unsigned getSubRegIndexNo(Record *idx);
+    // Find a SubRegIndex form its Record def.
+    CodeGenSubRegIndex *getSubRegIdx(Record*);
 
     // Find or create a sub-register index representing the A+B composition.
-    Record *getCompositeSubRegIndex(Record *A, Record *B, bool create = false);
+    CodeGenSubRegIndex *getCompositeSubRegIndex(CodeGenSubRegIndex *A,
+                                                CodeGenSubRegIndex *B);
 
     const std::vector<CodeGenRegister*> &getRegisters() { return Registers; }
 
     // Find a register from its Record def.
     CodeGenRegister *getReg(Record*);
 
+    // Get a Register's index into the Registers array.
+    unsigned getRegIndex(const CodeGenRegister *Reg) const {
+      return Reg->EnumValue - 1;
+    }
+
+    // Create a new non-native register unit that can be adopted by a register
+    // to increase its pressure. Note that NumNativeRegUnits is not increased.
+    unsigned newRegUnit(unsigned Weight) {
+      if (!RegUnitWeights.empty()) {
+        assert(Weight && "should only add allocatable units");
+        RegUnitWeights.resize(NumRegUnits+1);
+        RegUnitWeights[NumRegUnits] = Weight;
+      }
+      return NumRegUnits++;
+    }
+
+    // Native units are the singular unit of a leaf register. Register aliasing
+    // is completely characterized by native units. Adopted units exist to give
+    // register additional weight but don't affect aliasing.
+    bool isNativeUnit(unsigned RUID) {
+      return RUID < NumNativeRegUnits;
+    }
+
     ArrayRef<CodeGenRegisterClass*> getRegClasses() const {
       return RegClasses;
     }
@@ -284,6 +447,41 @@ namespace llvm {
     /// return the superclass.  Otherwise return null.
     const CodeGenRegisterClass* getRegClassForRegister(Record *R);
 
+    // Get a register unit's weight. Zero for unallocatable registers.
+    unsigned getRegUnitWeight(unsigned RUID) const {
+      return RegUnitWeights[RUID];
+    }
+
+    // Get the sum of unit weights.
+    unsigned getRegUnitSetWeight(const std::vector<unsigned> &Units) const {
+      unsigned Weight = 0;
+      for (std::vector<unsigned>::const_iterator
+             I = Units.begin(), E = Units.end(); I != E; ++I)
+        Weight += getRegUnitWeight(*I);
+      return Weight;
+    }
+
+    // Increase a RegUnitWeight.
+    void increaseRegUnitWeight(unsigned RUID, unsigned Inc) {
+      RegUnitWeights[RUID] += Inc;
+    }
+
+    // Get the number of register pressure dimensions.
+    unsigned getNumRegPressureSets() const { return RegUnitSets.size(); }
+
+    // Get a set of register unit IDs for a given dimension of pressure.
+    RegUnitSet getRegPressureSet(unsigned Idx) const {
+      return RegUnitSets[Idx];
+    }
+
+    // Get a list of pressure set IDs for a register class. Liveness of a
+    // register in this class impacts each pressure set in this list by the
+    // weight of the register. An exact solution requires all registers in a
+    // class to have the same class, but it is not strictly guaranteed.
+    ArrayRef<unsigned> getRCPressureSetIDs(unsigned RCIdx) const {
+      return RegClassUnitSets[RCIdx];
+    }
+
     // Computed derived records such as missing sub-register indices.
     void computeDerivedInfo();
 
@@ -295,6 +493,15 @@ namespace llvm {
     // If R1 is a sub-register of R2, Map[R1] is a subset of Map[R2].
     void computeOverlaps(std::map<const CodeGenRegister*,
                                   CodeGenRegister::Set> &Map);
+
+    // Compute the set of registers completely covered by the registers in Regs.
+    // The returned BitVector will have a bit set for each register in Regs,
+    // all sub-registers, and all super-registers that are covered by the
+    // registers in Regs.
+    //
+    // This is used to compute the mask of call-preserved registers from a list
+    // of callee-saves.
+    BitVector computeCoveredRegisters(ArrayRef<Record*> Regs);
   };
 }
 
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 4a7bad7e6d85..cf6793570a26 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -58,6 +58,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::iAny:     return "MVT::iAny";
   case MVT::fAny:     return "MVT::fAny";
   case MVT::vAny:     return "MVT::vAny";
+  case MVT::f16:      return "MVT::f16";
   case MVT::f32:      return "MVT::f32";
   case MVT::f64:      return "MVT::f64";
   case MVT::f80:      return "MVT::f80";
@@ -82,6 +83,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v2i64:    return "MVT::v2i64";
   case MVT::v4i64:    return "MVT::v4i64";
   case MVT::v8i64:    return "MVT::v8i64";
+  case MVT::v2f16:    return "MVT::v2f16";
   case MVT::v2f32:    return "MVT::v2f32";
   case MVT::v4f32:    return "MVT::v4f32";
   case MVT::v8f32:    return "MVT::v8f32";
@@ -90,8 +92,8 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::Metadata: return "MVT::Metadata";
   case MVT::iPTR:     return "MVT::iPTR";
   case MVT::iPTRAny:  return "MVT::iPTRAny";
-  case MVT::untyped:  return "MVT::untyped";
-  default: assert(0 && "ILLEGAL VALUE TYPE!"); return "";
+  case MVT::Untyped:  return "MVT::Untyped";
+  default: llvm_unreachable("ILLEGAL VALUE TYPE!");
   }
 }
 
@@ -149,6 +151,26 @@ Record *CodeGenTarget::getAsmParser() const {
   return LI[AsmParserNum];
 }
 
+/// getAsmParserVariant - Return the AssmblyParserVariant definition for
+/// this target.
+///
+Record *CodeGenTarget::getAsmParserVariant(unsigned i) const {
+  std::vector<Record*> LI = 
+    TargetRec->getValueAsListOfDefs("AssemblyParserVariants");
+  if (i >= LI.size())
+    throw "Target does not have an AsmParserVariant #" + utostr(i) + "!";
+  return LI[i];
+}
+
+/// getAsmParserVariantCount - Return the AssmblyParserVariant definition 
+/// available for this target.
+///
+unsigned CodeGenTarget::getAsmParserVariantCount() const {
+  std::vector<Record*> LI = 
+    TargetRec->getValueAsListOfDefs("AssemblyParserVariants");
+  return LI.size();
+}
+
 /// getAsmWriter - Return the AssemblyWriter definition for this target.
 ///
 Record *CodeGenTarget::getAsmWriter() const {
@@ -267,6 +289,7 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
     "DBG_VALUE",
     "REG_SEQUENCE",
     "COPY",
+    "BUNDLE",
     0
   };
   const DenseMap<const Record*, CodeGenInstruction*> &Insts = getInstructions();
@@ -492,7 +515,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       unsigned ArgNo = Property->getValueAsInt("ArgNo");
       ArgumentAttributes.push_back(std::make_pair(ArgNo, NoCapture));
     } else
-      assert(0 && "Unknown property!");
+      llvm_unreachable("Unknown property!");
   }
 
   // Sort the argument attributes for later benefit.
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 730216c331b4..85463da59731 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -91,6 +91,16 @@ public:
   ///
   Record *getAsmParser() const;
 
+  /// getAsmParserVariant - Return the AssmblyParserVariant definition for
+  /// this target.
+  ///
+  Record *getAsmParserVariant(unsigned i) const;
+
+  /// getAsmParserVariantCount - Return the AssmblyParserVariant definition 
+  /// available for this target.
+  ///
+  unsigned getAsmParserVariantCount() const;
+
   /// getAsmWriter - Return the AssemblyWriter definition for this target.
   ///
   Record *getAsmWriter() const;
diff --git a/utils/TableGen/DAGISelMatcher.cpp b/utils/TableGen/DAGISelMatcher.cpp
index 1367e8dd6e86..bd77907a9bd9 100644
--- a/utils/TableGen/DAGISelMatcher.cpp
+++ b/utils/TableGen/DAGISelMatcher.cpp
@@ -15,6 +15,8 @@
 #include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
+void Matcher::anchor() { }
+
 void Matcher::dump() const {
   print(errs(), 0);
 }
@@ -324,6 +326,10 @@ unsigned EmitNodeMatcherCommon::getHashImpl() const {
 }
 
 
+void EmitNodeMatcher::anchor() { }
+
+void MorphNodeToMatcher::anchor() { }
+
 unsigned MarkGlueResultsMatcher::getHashImpl() const {
   return HashUnsigneds(GlueResultNodes.begin(), GlueResultNodes.end());
 }
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index dcb8da71086e..99ebf98b1e4b 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -41,6 +41,7 @@ class Matcher {
   // The next matcher node that is executed after this one.  Null if this is the
   // last stage of a match.
   OwningPtr<Matcher> Next;
+  virtual void anchor();
 public:
   enum KindTy {
     // Matcher state manipulation.
@@ -1011,6 +1012,7 @@ private:
 
 /// EmitNodeMatcher - This signals a successful match and generates a node.
 class EmitNodeMatcher : public EmitNodeMatcherCommon {
+  virtual void anchor();
   unsigned FirstResultSlot;
 public:
   EmitNodeMatcher(const std::string &opcodeName,
@@ -1033,6 +1035,7 @@ public:
 };
 
 class MorphNodeToMatcher : public EmitNodeMatcherCommon {
+  virtual void anchor();
   const PatternToMatch &Pattern;
 public:
   MorphNodeToMatcher(const std::string &opcodeName,
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index 3b65b2a6de0c..bd425a9bc1a9 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -573,8 +573,7 @@ EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
     return 2 + NumResultBytes;
   }
   }
-  assert(0 && "Unreachable");
-  return 0;
+  llvm_unreachable("Unreachable");
 }
 
 /// EmitMatcherList - Emit the bytes for the specified matcher subtree.
@@ -601,7 +600,7 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
   if (!PatternPredicates.empty()) {
     OS << "bool CheckPatternPredicate(unsigned PredNo) const {\n";
     OS << "  switch (PredNo) {\n";
-    OS << "  default: assert(0 && \"Invalid predicate in table?\");\n";
+    OS << "  default: llvm_unreachable(\"Invalid predicate in table?\");\n";
     for (unsigned i = 0, e = PatternPredicates.size(); i != e; ++i)
       OS << "  case " << i << ": return "  << PatternPredicates[i] << ";\n";
     OS << "  }\n";
@@ -619,7 +618,7 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
   if (!NodePredicates.empty()) {
     OS << "bool CheckNodePredicate(SDNode *Node, unsigned PredNo) const {\n";
     OS << "  switch (PredNo) {\n";
-    OS << "  default: assert(0 && \"Invalid predicate in table?\");\n";
+    OS << "  default: llvm_unreachable(\"Invalid predicate in table?\");\n";
     for (unsigned i = 0, e = NodePredicates.size(); i != e; ++i) {
       // Emit the predicate code corresponding to this pattern.
       TreePredicateFn PredFn = NodePredicates[i];
@@ -641,7 +640,7 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
     OS << "         SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) {\n";
     OS << "  unsigned NextRes = Result.size();\n";
     OS << "  switch (PatternNo) {\n";
-    OS << "  default: assert(0 && \"Invalid pattern # in table?\");\n";
+    OS << "  default: llvm_unreachable(\"Invalid pattern # in table?\");\n";
     for (unsigned i = 0, e = ComplexPatterns.size(); i != e; ++i) {
       const ComplexPattern &P = *ComplexPatterns[i];
       unsigned NumOps = P.getNumOperands();
@@ -679,7 +678,7 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
   if (!NodeXForms.empty()) {
     OS << "SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) {\n";
     OS << "  switch (XFormNo) {\n";
-    OS << "  default: assert(0 && \"Invalid xform # in table?\");\n";
+    OS << "  default: llvm_unreachable(\"Invalid xform # in table?\");\n";
 
     // FIXME: The node xform could take SDValue's instead of SDNode*'s.
     for (unsigned i = 0, e = NodeXForms.size(); i != e; ++i) {
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 49ad956f8866..2ac7b87e7010 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -217,7 +217,7 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
 
   DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue());
   if (DI == 0) {
-    errs() << "Unknown leaf kind: " << *DI << "\n";
+    errs() << "Unknown leaf kind: " << *N << "\n";
     abort();
   }
 
diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp
new file mode 100644
index 000000000000..4abf54ebae2e
--- /dev/null
+++ b/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -0,0 +1,512 @@
+//===- DFAPacketizerEmitter.cpp - Packetization DFA for a VLIW machine-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class parses the Schedule.td file and produces an API that can be used
+// to reason about whether an instruction can be added to a packet on a VLIW
+// architecture. The class internally generates a deterministic finite
+// automaton (DFA) that models all possible mappings of machine instructions
+// to functional units as instructions are added to a packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/Record.h"
+#include "CodeGenTarget.h"
+#include "DFAPacketizerEmitter.h"
+#include <list>
+
+using namespace llvm;
+
+//
+//
+// State represents the usage of machine resources if the packet contains
+// a set of instruction classes.
+//
+// Specifically, currentState is a set of bit-masks.
+// The nth bit in a bit-mask indicates whether the nth resource is being used
+// by this state. The set of bit-masks in a state represent the different
+// possible outcomes of transitioning to this state.
+// For example: consider a two resource architecture: resource L and resource M
+// with three instruction classes: L, M, and L_or_M.
+// From the initial state (currentState = 0x00), if we add instruction class
+// L_or_M we will transition to a state with currentState = [0x01, 0x10]. This
+// represents the possible resource states that can result from adding a L_or_M
+// instruction
+//
+// Another way of thinking about this transition is we are mapping a NDFA with
+// two states [0x01] and [0x10] into a DFA with a single state [0x01, 0x10].
+//
+//
+namespace {
+class State {
+ public:
+  static int currentStateNum;
+  int stateNum;
+  bool isInitial;
+  std::set<unsigned> stateInfo;
+
+  State();
+  State(const State &S);
+
+  //
+  // canAddInsnClass - Returns true if an instruction of type InsnClass is a
+  // valid transition from this state, i.e., can an instruction of type InsnClass
+  // be added to the packet represented by this state.
+  //
+  // PossibleStates is the set of valid resource states that ensue from valid
+  // transitions.
+  //
+  bool canAddInsnClass(unsigned InsnClass, std::set<unsigned> &PossibleStates);
+};
+} // End anonymous namespace.
+
+
+namespace {
+struct Transition {
+ public:
+  static int currentTransitionNum;
+  int transitionNum;
+  State *from;
+  unsigned input;
+  State *to;
+
+  Transition(State *from_, unsigned input_, State *to_);
+};
+} // End anonymous namespace.
+
+
+//
+// Comparators to keep set of states sorted.
+//
+namespace {
+struct ltState {
+  bool operator()(const State *s1, const State *s2) const;
+};
+} // End anonymous namespace.
+
+
+//
+// class DFA: deterministic finite automaton for processor resource tracking.
+//
+namespace {
+class DFA {
+public:
+  DFA();
+
+  // Set of states. Need to keep this sorted to emit the transition table.
+  std::set<State*, ltState> states;
+
+  // Map from a state to the list of transitions with that state as source.
+  std::map<State*, SmallVector<Transition*, 16>, ltState> stateTransitions;
+  State *currentState;
+
+  // Highest valued Input seen.
+  unsigned LargestInput;
+
+  //
+  // Modify the DFA.
+  //
+  void initialize();
+  void addState(State *);
+  void addTransition(Transition *);
+
+  //
+  // getTransition -  Return the state when a transition is made from
+  // State From with Input I. If a transition is not found, return NULL.
+  //
+  State *getTransition(State *, unsigned);
+
+  //
+  // isValidTransition: Predicate that checks if there is a valid transition
+  // from state From on input InsnClass.
+  //
+  bool isValidTransition(State *From, unsigned InsnClass);
+
+  //
+  // writeTable: Print out a table representing the DFA.
+  //
+  void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName);
+};
+} // End anonymous namespace.
+
+
+//
+// Constructors for State, Transition, and DFA
+//
+State::State() :
+  stateNum(currentStateNum++), isInitial(false) {}
+
+
+State::State(const State &S) :
+  stateNum(currentStateNum++), isInitial(S.isInitial),
+  stateInfo(S.stateInfo) {}
+
+
+Transition::Transition(State *from_, unsigned input_, State *to_) :
+  transitionNum(currentTransitionNum++), from(from_), input(input_),
+  to(to_) {}
+
+
+DFA::DFA() :
+  LargestInput(0) {}
+
+
+bool ltState::operator()(const State *s1, const State *s2) const {
+    return (s1->stateNum < s2->stateNum);
+}
+
+
+//
+// canAddInsnClass - Returns true if an instruction of type InsnClass is a
+// valid transition from this state i.e., can an instruction of type InsnClass
+// be added to the packet represented by this state.
+//
+// PossibleStates is the set of valid resource states that ensue from valid
+// transitions.
+//
+bool State::canAddInsnClass(unsigned InsnClass,
+                            std::set<unsigned> &PossibleStates) {
+  //
+  // Iterate over all resource states in currentState.
+  //
+  bool AddedState = false;
+
+  for (std::set<unsigned>::iterator SI = stateInfo.begin();
+       SI != stateInfo.end(); ++SI) {
+    unsigned thisState = *SI;
+
+    //
+    // Iterate over all possible resources used in InsnClass.
+    // For ex: for InsnClass = 0x11, all resources = {0x01, 0x10}.
+    //
+
+    DenseSet<unsigned> VisitedResourceStates;
+    for (unsigned int j = 0; j < sizeof(InsnClass) * 8; ++j) {
+      if ((0x1 << j) & InsnClass) {
+        //
+        // For each possible resource used in InsnClass, generate the
+        // resource state if that resource was used.
+        //
+        unsigned ResultingResourceState = thisState | (0x1 << j);
+        //
+        // Check if the resulting resource state can be accommodated in this
+        // packet.
+        // We compute ResultingResourceState OR thisState.
+        // If the result of the OR is different than thisState, it implies
+        // that there is at least one resource that can be used to schedule
+        // InsnClass in the current packet.
+        // Insert ResultingResourceState into PossibleStates only if we haven't
+        // processed ResultingResourceState before.
+        //
+        if ((ResultingResourceState != thisState) &&
+            (VisitedResourceStates.count(ResultingResourceState) == 0)) {
+          VisitedResourceStates.insert(ResultingResourceState);
+          PossibleStates.insert(ResultingResourceState);
+          AddedState = true;
+        }
+      }
+    }
+  }
+
+  return AddedState;
+}
+
+
+void DFA::initialize() {
+  currentState->isInitial = true;
+}
+
+
+void DFA::addState(State *S) {
+  assert(!states.count(S) && "State already exists");
+  states.insert(S);
+}
+
+
+void DFA::addTransition(Transition *T) {
+  // Update LargestInput.
+  if (T->input > LargestInput)
+    LargestInput = T->input;
+
+  // Add the new transition.
+  stateTransitions[T->from].push_back(T);
+}
+
+
+//
+// getTransition - Return the state when a transition is made from
+// State From with Input I. If a transition is not found, return NULL.
+//
+State *DFA::getTransition(State *From, unsigned I) {
+  // Do we have a transition from state From?
+  if (!stateTransitions.count(From))
+    return NULL;
+
+  // Do we have a transition from state From with Input I?
+  for (SmallVector<Transition*, 16>::iterator VI =
+         stateTransitions[From].begin();
+         VI != stateTransitions[From].end(); ++VI)
+    if ((*VI)->input == I)
+      return (*VI)->to;
+
+  return NULL;
+}
+
+
+bool DFA::isValidTransition(State *From, unsigned InsnClass) {
+  return (getTransition(From, InsnClass) != NULL);
+}
+
+
+int State::currentStateNum = 0;
+int Transition::currentTransitionNum = 0;
+
+DFAGen::DFAGen(RecordKeeper &R):
+  TargetName(CodeGenTarget(R).getName()),
+  allInsnClasses(), Records(R) {}
+
+
+//
+// writeTableAndAPI - Print out a table representing the DFA and the
+// associated API to create a DFA packetizer.
+//
+// Format:
+// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
+//                           transitions.
+// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable for
+//                         the ith state.
+//
+//
+void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
+  std::set<State*, ltState>::iterator SI = states.begin();
+  // This table provides a map to the beginning of the transitions for State s
+  // in DFAStateInputTable.
+  std::vector<int> StateEntry(states.size());
+
+  OS << "namespace llvm {\n\n";
+  OS << "const int " << TargetName << "DFAStateInputTable[][2] = {\n";
+
+  // Tracks the total valid transitions encountered so far. It is used
+  // to construct the StateEntry table.
+  int ValidTransitions = 0;
+  for (unsigned i = 0; i < states.size(); ++i, ++SI) {
+    StateEntry[i] = ValidTransitions;
+    for (unsigned j = 0; j <= LargestInput; ++j) {
+      assert (((*SI)->stateNum == (int) i) && "Mismatch in state numbers");
+      if (!isValidTransition(*SI, j))
+        continue;
+
+      OS << "{" << j << ", "
+         << getTransition(*SI, j)->stateNum
+         << "},    ";
+      ++ValidTransitions;
+    }
+
+    // If there are no valid transitions from this stage, we need a sentinel
+    // transition.
+    if (ValidTransitions == StateEntry[i]) {
+      OS << "{-1, -1},";
+      ++ValidTransitions;
+    }
+
+    OS << "\n";
+  }
+  OS << "};\n\n";
+  OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
+
+  // Multiply i by 2 since each entry in DFAStateInputTable is a set of
+  // two numbers.
+  for (unsigned i = 0; i < states.size(); ++i)
+    OS << StateEntry[i] << ", ";
+
+  OS << "\n};\n";
+  OS << "} // namespace\n";
+
+
+  //
+  // Emit DFA Packetizer tables if the target is a VLIW machine.
+  //
+  std::string SubTargetClassName = TargetName + "GenSubtargetInfo";
+  OS << "\n" << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n";
+  OS << "namespace llvm {\n";
+  OS << "DFAPacketizer *" << SubTargetClassName << "::"
+     << "createDFAPacketizer(const InstrItineraryData *IID) const {\n"
+     << "   return new DFAPacketizer(IID, " << TargetName
+     << "DFAStateInputTable, " << TargetName << "DFAStateEntryTable);\n}\n\n";
+  OS << "} // End llvm namespace \n";
+}
+
+
+//
+// collectAllInsnClasses - Populate allInsnClasses which is a set of units
+// used in each stage.
+//
+void DFAGen::collectAllInsnClasses(const std::string &Name,
+                                  Record *ItinData,
+                                  unsigned &NStages,
+                                  raw_ostream &OS) {
+  // Collect processor itineraries.
+  std::vector<Record*> ProcItinList =
+    Records.getAllDerivedDefinitions("ProcessorItineraries");
+
+  // If just no itinerary then don't bother.
+  if (ProcItinList.size() < 2)
+    return;
+  std::map<std::string, unsigned> NameToBitsMap;
+
+  // Parse functional units for all the itineraries.
+  for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
+    Record *Proc = ProcItinList[i];
+    std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
+
+    // Convert macros to bits for each stage.
+    for (unsigned i = 0, N = FUs.size(); i < N; ++i)
+      NameToBitsMap[FUs[i]->getName()] = (unsigned) (1U << i);
+  }
+
+  const std::vector<Record*> &StageList =
+    ItinData->getValueAsListOfDefs("Stages");
+
+  // The number of stages.
+  NStages = StageList.size();
+
+  // For each unit.
+  unsigned UnitBitValue = 0;
+
+  // Compute the bitwise or of each unit used in this stage.
+  for (unsigned i = 0; i < NStages; ++i) {
+    const Record *Stage = StageList[i];
+
+    // Get unit list.
+    const std::vector<Record*> &UnitList =
+      Stage->getValueAsListOfDefs("Units");
+
+    for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
+      // Conduct bitwise or.
+      std::string UnitName = UnitList[j]->getName();
+      assert(NameToBitsMap.count(UnitName));
+      UnitBitValue |= NameToBitsMap[UnitName];
+    }
+
+    if (UnitBitValue != 0)
+      allInsnClasses.insert(UnitBitValue);
+  }
+}
+
+
+//
+// Run the worklist algorithm to generate the DFA.
+//
+void DFAGen::run(raw_ostream &OS) {
+  EmitSourceFileHeader("Target DFA Packetizer Tables", OS);
+
+  // Collect processor iteraries.
+  std::vector<Record*> ProcItinList =
+    Records.getAllDerivedDefinitions("ProcessorItineraries");
+
+  //
+  // Collect the instruction classes.
+  //
+  for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
+    Record *Proc = ProcItinList[i];
+
+    // Get processor itinerary name.
+    const std::string &Name = Proc->getName();
+
+    // Skip default.
+    if (Name == "NoItineraries")
+      continue;
+
+    // Sanity check for at least one instruction itinerary class.
+    unsigned NItinClasses =
+      Records.getAllDerivedDefinitions("InstrItinClass").size();
+    if (NItinClasses == 0)
+      return;
+
+    // Get itinerary data list.
+    std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
+
+    // Collect instruction classes for all itinerary data.
+    for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) {
+      Record *ItinData = ItinDataList[j];
+      unsigned NStages;
+      collectAllInsnClasses(Name, ItinData, NStages, OS);
+    }
+  }
+
+
+  //
+  // Run a worklist algorithm to generate the DFA.
+  //
+  DFA D;
+  State *Initial = new State;
+  Initial->isInitial = true;
+  Initial->stateInfo.insert(0x0);
+  D.addState(Initial);
+  SmallVector<State*, 32> WorkList;
+  std::map<std::set<unsigned>, State*> Visited;
+
+  WorkList.push_back(Initial);
+
+  //
+  // Worklist algorithm to create a DFA for processor resource tracking.
+  // C = {set of InsnClasses}
+  // Begin with initial node in worklist. Initial node does not have
+  // any consumed resources,
+  //     ResourceState = 0x0
+  // Visited = {}
+  // While worklist != empty
+  //    S = first element of worklist
+  //    For every instruction class C
+  //      if we can accommodate C in S:
+  //          S' = state with resource states = {S Union C}
+  //          Add a new transition: S x C -> S'
+  //          If S' is not in Visited:
+  //             Add S' to worklist
+  //             Add S' to Visited
+  //
+  while (!WorkList.empty()) {
+    State *current = WorkList.pop_back_val();
+    for (DenseSet<unsigned>::iterator CI = allInsnClasses.begin(),
+           CE = allInsnClasses.end(); CI != CE; ++CI) {
+      unsigned InsnClass = *CI;
+
+      std::set<unsigned> NewStateResources;
+      //
+      // If we haven't already created a transition for this input
+      // and the state can accommodate this InsnClass, create a transition.
+      //
+      if (!D.getTransition(current, InsnClass) &&
+          current->canAddInsnClass(InsnClass, NewStateResources)) {
+        State *NewState = NULL;
+
+        //
+        // If we have seen this state before, then do not create a new state.
+        //
+        //
+        std::map<std::set<unsigned>, State*>::iterator VI;
+        if ((VI = Visited.find(NewStateResources)) != Visited.end())
+          NewState = VI->second;
+        else {
+          NewState = new State;
+          NewState->stateInfo = NewStateResources;
+          D.addState(NewState);
+          Visited[NewStateResources] = NewState;
+          WorkList.push_back(NewState);
+        }
+
+        Transition *NewTransition = new Transition(current, InsnClass,
+                                                   NewState);
+        D.addTransition(NewTransition);
+      }
+    }
+  }
+
+  // Print out the table.
+  D.writeTableAndAPI(OS, TargetName);
+}
diff --git a/utils/TableGen/DFAPacketizerEmitter.h b/utils/TableGen/DFAPacketizerEmitter.h
new file mode 100644
index 000000000000..1727150ae926
--- /dev/null
+++ b/utils/TableGen/DFAPacketizerEmitter.h
@@ -0,0 +1,52 @@
+//===- DFAPacketizerEmitter.h - Packetization DFA for a VLIW machine-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class parses the Schedule.td file and produces an API that can be used
+// to reason about whether an instruction can be added to a packet on a VLIW
+// architecture. The class internally generates a deterministic finite
+// automaton (DFA) that models all possible mappings of machine instructions
+// to functional units as instructions are added to a packet.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <map>
+#include <string>
+
+namespace llvm {
+//
+// class DFAGen: class that generates and prints out the DFA for resource
+// tracking.
+//
+class DFAGen : public TableGenBackend {
+private:
+  std::string TargetName;
+  //
+  // allInsnClasses is the set of all possible resources consumed by an
+  // InstrStage.
+  //
+  DenseSet<unsigned> allInsnClasses;
+  RecordKeeper &Records;
+
+public:
+  DFAGen(RecordKeeper &R);
+
+  //
+  // collectAllInsnClasses: Populate allInsnClasses which is a set of units
+  // used in each stage.
+  //
+  void collectAllInsnClasses(const std::string &Name,
+                            Record *ItinData,
+                            unsigned &NStages,
+                            raw_ostream &OS);
+
+  void run(raw_ostream &OS);
+};
+}
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index ff314e9c4f2f..4650197ae718 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -11,7 +11,6 @@
 #include "CodeGenTarget.h"
 #include "X86DisassemblerTables.h"
 #include "X86RecognizableInstr.h"
-#include "ARMDecoderEmitter.h"
 #include "FixedLenDecoderEmitter.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index abef70e31897..3809a4576a54 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -287,6 +287,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
   IMM("i64i8imm");
   IMM("i64i32imm");
   IMM("SSECC");
+  IMM("AVXCC");
 
   // all R, I, R, I, R
   MEM("i8mem");
@@ -519,6 +520,8 @@ static void X86ExtractSemantics(
       // ignore (doesn't go anywhere we know about)
     } else if (name.find("VMCALL") != name.npos) {
       // ignore (rather different semantics than a regular call)
+    } else if (name.find("VMMCALL") != name.npos) {
+      // ignore (rather different semantics than a regular call)
     } else if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
       CALL("off");
     } else {
@@ -567,12 +570,23 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   REG("DPR");
   REG("DPR_VFP2");
   REG("DPR_8");
+  REG("DPair");
   REG("SPR");
   REG("QPR");
   REG("QQPR");
   REG("QQQQPR");
+  REG("VecListOneD");
+  REG("VecListDPair");
+  REG("VecListDPairSpaced");
+  REG("VecListThreeD");
+  REG("VecListFourD");
+  REG("VecListOneDAllLanes");
+  REG("VecListDPairAllLanes");
+  REG("VecListDPairSpacedAllLanes");
 
   IMM("i32imm");
+  IMM("fbits16");
+  IMM("fbits32");
   IMM("i32imm_hilo16");
   IMM("bf_inv_mask_imm");
   IMM("lsb_pos_imm");
@@ -597,6 +611,20 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   IMM("imm1_16");
   IMM("imm1_32");
   IMM("nModImm");
+  IMM("nImmSplatI8");
+  IMM("nImmSplatI16");
+  IMM("nImmSplatI32");
+  IMM("nImmSplatI64");
+  IMM("nImmVMOVI32");
+  IMM("nImmVMOVF32");
+  IMM("imm8");
+  IMM("imm16");
+  IMM("imm32");
+  IMM("imm1_7");
+  IMM("imm1_15");
+  IMM("imm1_31");
+  IMM("imm0_1");
+  IMM("imm0_3");
   IMM("imm0_7");
   IMM("imm0_15");
   IMM("imm0_255");
@@ -735,7 +763,7 @@ static void ARMPopulateOperands(
       errs() << "Operand type: " << rec.getName() << '\n';
       errs() << "Operand name: " << operandInfo.Name << '\n';
       errs() << "Instruction name: " << inst.TheDef->getName() << '\n';
-      llvm_unreachable("Unhandled type");
+      throw("Unhandled type in EDEmitter");
     }
   }
 }
@@ -956,11 +984,7 @@ void EDEmitter::run(raw_ostream &o) {
 
   emitCommonEnums(o, i);
 
-  o << "namespace {\n";
-
-  o << "llvm::EDInstInfo instInfo" << target.getName().c_str() << "[] = ";
+  o << "static const llvm::EDInstInfo instInfo" << target.getName() << "[] = ";
   infoArray.emit(o, i);
   o << ";" << "\n";
-
-  o << "}\n";
 }
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 9fdc2e33a546..e8dad77302c3 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -21,7 +21,6 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/VectorExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
@@ -280,7 +279,7 @@ struct OperandsSignature {
       } else if (Operands[i].isImm()) {
         OS << "uint64_t imm" << i;
       } else if (Operands[i].isFP()) {
-        OS << "ConstantFP *f" << i;
+        OS << "const ConstantFP *f" << i;
       } else {
         llvm_unreachable("Unknown operand kind!");
       }
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 02b966a21431..9b676f21a1d3 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -17,6 +17,7 @@
 #include "FixedLenDecoderEmitter.h"
 #include "CodeGenTarget.h"
 #include "llvm/TableGen/Record.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -48,7 +49,7 @@ static bool ValueNotSet(bit_value_t V) {
 static int Value(bit_value_t V) {
   return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
 }
-static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
+static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
   if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index)))
     return bit->getValue() ? BIT_TRUE : BIT_FALSE;
 
@@ -56,7 +57,7 @@ static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
   return BIT_UNSET;
 }
 // Prints the bit value for each position.
-static void dumpBits(raw_ostream &o, BitsInit &bits) {
+static void dumpBits(raw_ostream &o, const BitsInit &bits) {
   unsigned index;
 
   for (index = bits.getNumBits(); index > 0; index--) {
@@ -71,7 +72,7 @@ static void dumpBits(raw_ostream &o, BitsInit &bits) {
       o << "_";
       break;
     default:
-      assert(0 && "unexpected return value from bitFromBits");
+      llvm_unreachable("unexpected return value from bitFromBits");
     }
   }
 }
@@ -125,7 +126,7 @@ typedef std::vector<bit_value_t> insn_t;
 /// version and return the Opcode since the two have the same Asm format string.
 class Filter {
 protected:
-  FilterChooser *Owner; // points to the FilterChooser who owns this filter
+  const FilterChooser *Owner;// points to the FilterChooser who owns this filter
   unsigned StartBit; // the starting bit position
   unsigned NumBits; // number of bits to filter
   bool Mixed; // a mixed region contains both set and unset bits
@@ -137,7 +138,7 @@ protected:
   std::vector<unsigned> VariableInstructions;
 
   // Map of well-known segment value to its delegate.
-  std::map<unsigned, FilterChooser*> FilterChooserMap;
+  std::map<unsigned, const FilterChooser*> FilterChooserMap;
 
   // Number of instructions which fall under FilteredInstructions category.
   unsigned NumFiltered;
@@ -145,19 +146,15 @@ protected:
   // Keeps track of the last opcode in the filtered bucket.
   unsigned LastOpcFiltered;
 
-  // Number of instructions which fall under VariableInstructions category.
-  unsigned NumVariable;
-
 public:
-  unsigned getNumFiltered() { return NumFiltered; }
-  unsigned getNumVariable() { return NumVariable; }
-  unsigned getSingletonOpc() {
+  unsigned getNumFiltered() const { return NumFiltered; }
+  unsigned getSingletonOpc() const {
     assert(NumFiltered == 1);
     return LastOpcFiltered;
   }
   // Return the filter chooser for the group of instructions without constant
   // segment values.
-  FilterChooser &getVariableFC() {
+  const FilterChooser &getVariableFC() const {
     assert(NumFiltered == 1);
     assert(FilterChooserMap.size() == 1);
     return *(FilterChooserMap.find((unsigned)-1)->second);
@@ -177,7 +174,7 @@ public:
   void recurse();
 
   // Emit code to decode instructions given a segment or segments of bits.
-  void emit(raw_ostream &o, unsigned &Indentation);
+  void emit(raw_ostream &o, unsigned &Indentation) const;
 
   // Returns the number of fanout produced by the filter.  More fanout implies
   // the filter distinguishes more categories of instructions.
@@ -217,10 +214,10 @@ protected:
   const std::vector<const CodeGenInstruction*> &AllInstructions;
 
   // Vector of uid's for this filter chooser to work on.
-  const std::vector<unsigned> Opcodes;
+  const std::vector<unsigned> &Opcodes;
 
   // Lookup table for the operand decoding of instructions.
-  std::map<unsigned, std::vector<OperandInfo> > &Operands;
+  const std::map<unsigned, std::vector<OperandInfo> > &Operands;
 
   // Vector of candidate filters.
   std::vector<Filter> Filters;
@@ -230,7 +227,7 @@ protected:
   std::vector<bit_value_t> FilterBitValues;
 
   // Links to the FilterChooser above us in the decoding tree.
-  FilterChooser *Parent;
+  const FilterChooser *Parent;
 
   // Index of the best filter from Filters.
   int BestIndex;
@@ -242,19 +239,19 @@ protected:
   const FixedLenDecoderEmitter *Emitter;
 
 public:
-  FilterChooser(const FilterChooser &FC) :
-    AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
+  FilterChooser(const FilterChooser &FC)
+    : AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
       Operands(FC.Operands), Filters(FC.Filters),
       FilterBitValues(FC.FilterBitValues), Parent(FC.Parent),
-    BestIndex(FC.BestIndex), BitWidth(FC.BitWidth),
-    Emitter(FC.Emitter) { }
+      BestIndex(FC.BestIndex), BitWidth(FC.BitWidth),
+      Emitter(FC.Emitter) { }
 
   FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
                 const std::vector<unsigned> &IDs,
-    std::map<unsigned, std::vector<OperandInfo> > &Ops,
+                const std::map<unsigned, std::vector<OperandInfo> > &Ops,
                 unsigned BW,
-                const FixedLenDecoderEmitter *E) :
-      AllInstructions(Insts), Opcodes(IDs), Operands(Ops), Filters(),
+                const FixedLenDecoderEmitter *E)
+    : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), Filters(),
       Parent(NULL), BestIndex(-1), BitWidth(BW), Emitter(E) {
     for (unsigned i = 0; i < BitWidth; ++i)
       FilterBitValues.push_back(BIT_UNFILTERED);
@@ -264,10 +261,10 @@ public:
 
   FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
                 const std::vector<unsigned> &IDs,
-        std::map<unsigned, std::vector<OperandInfo> > &Ops,
-                std::vector<bit_value_t> &ParentFilterBitValues,
-                FilterChooser &parent) :
-      AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
+                const std::map<unsigned, std::vector<OperandInfo> > &Ops,
+                const std::vector<bit_value_t> &ParentFilterBitValues,
+                const FilterChooser &parent)
+    : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
       Filters(), FilterBitValues(ParentFilterBitValues),
       Parent(&parent), BestIndex(-1), BitWidth(parent.BitWidth),
       Emitter(parent.Emitter) {
@@ -275,18 +272,31 @@ public:
   }
 
   // The top level filter chooser has NULL as its parent.
-  bool isTopLevel() { return Parent == NULL; }
+  bool isTopLevel() const { return Parent == NULL; }
 
   // Emit the top level typedef and decodeInstruction() function.
-  void emitTop(raw_ostream &o, unsigned Indentation, std::string Namespace);
+  void emitTop(raw_ostream &o, unsigned Indentation,
+               const std::string &Namespace) const;
 
 protected:
   // Populates the insn given the uid.
   void insnWithID(insn_t &Insn, unsigned Opcode) const {
     BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
 
-    for (unsigned i = 0; i < BitWidth; ++i)
-      Insn.push_back(bitFromBits(Bits, i));
+    // We may have a SoftFail bitmask, which specifies a mask where an encoding
+    // may differ from the value in "Inst" and yet still be valid, but the
+    // disassembler should return SoftFail instead of Success.
+    //
+    // This is used for marking UNPREDICTABLE instructions in the ARM world.
+    BitsInit *SFBits =
+      AllInstructions[Opcode]->TheDef->getValueAsBitsInit("SoftFail");
+
+    for (unsigned i = 0; i < BitWidth; ++i) {
+      if (SFBits && bitFromBits(*SFBits, i) == BIT_TRUE)
+        Insn.push_back(BIT_UNSET);
+      else
+        Insn.push_back(bitFromBits(Bits, i));
+    }
   }
 
   // Returns the record name.
@@ -300,15 +310,16 @@ protected:
   // Returns false if there exists any uninitialized bit value in the range.
   // Returns true, otherwise.
   bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit,
-      unsigned NumBits) const;
+                     unsigned NumBits) const;
 
   /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
   /// filter array as a series of chars.
-  void dumpFilterArray(raw_ostream &o, std::vector<bit_value_t> & filter);
+  void dumpFilterArray(raw_ostream &o,
+                       const std::vector<bit_value_t> & filter) const;
 
   /// dumpStack - dumpStack traverses the filter chooser chain and calls
   /// dumpFilterArray on each filter chooser up to the top level one.
-  void dumpStack(raw_ostream &o, const char *prefix);
+  void dumpStack(raw_ostream &o, const char *prefix) const;
 
   Filter &bestFilter() {
     assert(BestIndex != -1 && "BestIndex not set");
@@ -316,9 +327,9 @@ protected:
   }
 
   // Called from Filter::recurse() when singleton exists.  For debug purpose.
-  void SingletonExists(unsigned Opc);
+  void SingletonExists(unsigned Opc) const;
 
-  bool PositionFiltered(unsigned i) {
+  bool PositionFiltered(unsigned i) const {
     return ValueSet(FilterBitValues[i]);
   }
 
@@ -327,31 +338,37 @@ protected:
   // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
   // decoded bits in order to verify that the instruction matches the Opcode.
   unsigned getIslands(std::vector<unsigned> &StartBits,
-      std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
-      insn_t &Insn);
+                      std::vector<unsigned> &EndBits,
+                      std::vector<uint64_t> &FieldVals,
+                      const insn_t &Insn) const;
 
   // Emits code to check the Predicates member of an instruction are true.
   // Returns true if predicate matches were emitted, false otherwise.
-  bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation,unsigned Opc);
+  bool emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
+                          unsigned Opc) const;
+
+  void emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
+                         unsigned Opc) const;
 
   // Emits code to decode the singleton.  Return true if we have matched all the
   // well-known bits.
-  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,unsigned Opc);
+  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+                            unsigned Opc) const;
 
   // Emits code to decode the singleton, and then to decode the rest.
-  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,Filter &Best);
+  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+                            const Filter &Best) const;
 
   void emitBinaryParser(raw_ostream &o , unsigned &Indentation,
-                        OperandInfo &OpInfo);
+                        const OperandInfo &OpInfo) const;
 
   // Assign a single filter and run with it.
-  void runSingleFilter(FilterChooser &owner, unsigned startBit, unsigned numBit,
-      bool mixed);
+  void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
 
   // reportRegion is a helper function for filterProcessor to mark a region as
   // eligible for use as a filter region.
   void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
-      bool AllowMixed);
+                    bool AllowMixed);
 
   // FilterProcessor scans the well-known encoding bits of the instructions and
   // builds up a list of candidate filters.  It chooses the best filter and
@@ -366,31 +383,30 @@ protected:
   // Emits code to decode our share of instructions.  Returns true if the
   // emitted code causes a return, which occurs if we know how to decode
   // the instruction at this level or the instruction is not decodeable.
-  bool emit(raw_ostream &o, unsigned &Indentation);
+  bool emit(raw_ostream &o, unsigned &Indentation) const;
 };
 
 ///////////////////////////
 //                       //
-// Filter Implmenetation //
+// Filter Implementation //
 //                       //
 ///////////////////////////
 
-Filter::Filter(const Filter &f) :
-  Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
-  FilteredInstructions(f.FilteredInstructions),
-  VariableInstructions(f.VariableInstructions),
-  FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered),
-  LastOpcFiltered(f.LastOpcFiltered), NumVariable(f.NumVariable) {
+Filter::Filter(const Filter &f)
+  : Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
+    FilteredInstructions(f.FilteredInstructions),
+    VariableInstructions(f.VariableInstructions),
+    FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered),
+    LastOpcFiltered(f.LastOpcFiltered) {
 }
 
 Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
-    bool mixed) : Owner(&owner), StartBit(startBit), NumBits(numBits),
-                  Mixed(mixed) {
+               bool mixed)
+  : Owner(&owner), StartBit(startBit), NumBits(numBits), Mixed(mixed) {
   assert(StartBit + NumBits - 1 < Owner->BitWidth);
 
   NumFiltered = 0;
   LastOpcFiltered = 0;
-  NumVariable = 0;
 
   for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
     insn_t Insn;
@@ -409,10 +425,9 @@ Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
       FilteredInstructions[Field].push_back(LastOpcFiltered);
       ++NumFiltered;
     } else {
-      // Some of the encoding bit(s) are unspecfied.  This contributes to
+      // Some of the encoding bit(s) are unspecified.  This contributes to
       // one additional member of "Variable" instructions.
       VariableInstructions.push_back(Owner->Opcodes[i]);
-      ++NumVariable;
     }
   }
 
@@ -421,7 +436,7 @@ Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
 }
 
 Filter::~Filter() {
-  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+  std::map<unsigned, const FilterChooser*>::iterator filterIterator;
   for (filterIterator = FilterChooserMap.begin();
        filterIterator != FilterChooserMap.end();
        filterIterator++) {
@@ -450,7 +465,7 @@ void Filter::recurse() {
 
     // Delegates to an inferior filter chooser for further processing on this
     // group of instructions whose segment values are variable.
-    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+    FilterChooserMap.insert(std::pair<unsigned, const FilterChooser*>(
                               (unsigned)-1,
                               new FilterChooser(Owner->AllInstructions,
                                                 VariableInstructions,
@@ -483,7 +498,7 @@ void Filter::recurse() {
 
     // Delegates to an inferior filter chooser for further processing on this
     // category of instructions.
-    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+    FilterChooserMap.insert(std::pair<unsigned, const FilterChooser*>(
                               mapIterator->first,
                               new FilterChooser(Owner->AllInstructions,
                                                 mapIterator->second,
@@ -495,7 +510,7 @@ void Filter::recurse() {
 }
 
 // Emit code to decode instructions given a segment or segments of bits.
-void Filter::emit(raw_ostream &o, unsigned &Indentation) {
+void Filter::emit(raw_ostream &o, unsigned &Indentation) const {
   o.indent(Indentation) << "// Check Inst{";
 
   if (NumBits > 1)
@@ -507,7 +522,7 @@ void Filter::emit(raw_ostream &o, unsigned &Indentation) {
                         << "(insn, " << StartBit << ", "
                         << NumBits << ")) {\n";
 
-  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+  std::map<unsigned, const FilterChooser*>::const_iterator filterIterator;
 
   bool DefaultCase = false;
   for (filterIterator = FilterChooserMap.begin();
@@ -537,12 +552,12 @@ void Filter::emit(raw_ostream &o, unsigned &Indentation) {
     // encoding bits do not match exactly.
     if (!DefaultCase) { ++Indentation; ++Indentation; }
 
-    bool finished = filterIterator->second->emit(o, Indentation);
+    filterIterator->second->emit(o, Indentation);
     // For top level default case, there's no need for a break statement.
     if (Owner->isTopLevel() && DefaultCase)
       break;
-    if (!finished)
-      o.indent(Indentation) << "break;\n";
+    
+    o.indent(Indentation) << "break;\n";
 
     if (!DefaultCase) { --Indentation; --Indentation; }
   }
@@ -571,13 +586,17 @@ unsigned Filter::usefulness() const {
 
 // Emit the top level typedef and decodeInstruction() function.
 void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation,
-                            std::string Namespace) {
+                            const std::string &Namespace) const {
   o.indent(Indentation) <<
-    "static MCDisassembler::DecodeStatus decode" << Namespace << "Instruction" << BitWidth
-    << "(MCInst &MI, uint" << BitWidth << "_t insn, uint64_t Address, "
+    "static MCDisassembler::DecodeStatus decode" << Namespace << "Instruction"
+    << BitWidth << "(MCInst &MI, uint" << BitWidth
+    << "_t insn, uint64_t Address, "
     << "const void *Decoder, const MCSubtargetInfo &STI) {\n";
-  o.indent(Indentation) << "  unsigned tmp = 0;\n  (void)tmp;\n" << Emitter->Locals << "\n";
+  o.indent(Indentation) << "  unsigned tmp = 0;\n";
+  o.indent(Indentation) << "  (void)tmp;\n";
+  o.indent(Indentation) << Emitter->Locals << "\n";
   o.indent(Indentation) << "  uint64_t Bits = STI.getFeatureBits();\n";
+  o.indent(Indentation) << "  (void)Bits;\n";
 
   ++Indentation; ++Indentation;
   // Emits code to decode the instructions.
@@ -598,7 +617,7 @@ void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation,
 // Returns false if and on the first uninitialized bit value encountered.
 // Returns true, otherwise.
 bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
-    unsigned StartBit, unsigned NumBits) const {
+                                  unsigned StartBit, unsigned NumBits) const {
   Field = 0;
 
   for (unsigned i = 0; i < NumBits; ++i) {
@@ -615,7 +634,7 @@ bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
 /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
 /// filter array as a series of chars.
 void FilterChooser::dumpFilterArray(raw_ostream &o,
-                                    std::vector<bit_value_t> &filter) {
+                                 const std::vector<bit_value_t> &filter) const {
   unsigned bitIndex;
 
   for (bitIndex = BitWidth; bitIndex > 0; bitIndex--) {
@@ -638,8 +657,8 @@ void FilterChooser::dumpFilterArray(raw_ostream &o,
 
 /// dumpStack - dumpStack traverses the filter chooser chain and calls
 /// dumpFilterArray on each filter chooser up to the top level one.
-void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
-  FilterChooser *current = this;
+void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) const {
+  const FilterChooser *current = this;
 
   while (current) {
     o << prefix;
@@ -650,7 +669,7 @@ void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
 }
 
 // Called from Filter::recurse() when singleton exists.  For debug purpose.
-void FilterChooser::SingletonExists(unsigned Opc) {
+void FilterChooser::SingletonExists(unsigned Opc) const {
   insn_t Insn0;
   insnWithID(Insn0, Opc);
 
@@ -663,7 +682,7 @@ void FilterChooser::SingletonExists(unsigned Opc) {
   errs() << '\n';
 
   dumpStack(errs(), "\t\t");
-  for (unsigned i = 0; i < Opcodes.size(); i++) {
+  for (unsigned i = 0; i < Opcodes.size(); ++i) {
     const std::string &Name = nameWithID(Opcodes[i]);
 
     errs() << '\t' << Name << " ";
@@ -678,8 +697,9 @@ void FilterChooser::SingletonExists(unsigned Opc) {
 // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
 // decoded bits in order to verify that the instruction matches the Opcode.
 unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
-    std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
-    insn_t &Insn) {
+                                   std::vector<unsigned> &EndBits,
+                                   std::vector<uint64_t> &FieldVals,
+                                   const insn_t &Insn) const {
   unsigned Num, BitNo;
   Num = BitNo = 0;
 
@@ -695,9 +715,7 @@ unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
     Val = Value(Insn[i]);
     bool Filtered = PositionFiltered(i);
     switch (State) {
-    default:
-      assert(0 && "Unreachable code!");
-      break;
+    default: llvm_unreachable("Unreachable code!");
     case 0:
     case 1:
       if (Filtered || Val == -1)
@@ -736,17 +754,17 @@ unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
 }
 
 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
-                         OperandInfo &OpInfo) {
-  std::string &Decoder = OpInfo.Decoder;
+                                     const OperandInfo &OpInfo) const {
+  const std::string &Decoder = OpInfo.Decoder;
 
   if (OpInfo.numFields() == 1) {
-    OperandInfo::iterator OI = OpInfo.begin();
+    OperandInfo::const_iterator OI = OpInfo.begin();
     o.indent(Indentation) << "  tmp = fieldFromInstruction" << BitWidth
                             << "(insn, " << OI->Base << ", " << OI->Width
                             << ");\n";
   } else {
     o.indent(Indentation) << "  tmp = 0;\n";
-    for (OperandInfo::iterator OI = OpInfo.begin(), OE = OpInfo.end();
+    for (OperandInfo::const_iterator OI = OpInfo.begin(), OE = OpInfo.end();
          OI != OE; ++OI) {
       o.indent(Indentation) << "  tmp |= (fieldFromInstruction" << BitWidth
                             << "(insn, " << OI->Base << ", " << OI->Width
@@ -756,14 +774,15 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
 
   if (Decoder != "")
     o.indent(Indentation) << "  " << Emitter->GuardPrefix << Decoder
-                          << "(MI, tmp, Address, Decoder)" << Emitter->GuardPostfix << "\n";
+                          << "(MI, tmp, Address, Decoder)"
+                          << Emitter->GuardPostfix << "\n";
   else
     o.indent(Indentation) << "  MI.addOperand(MCOperand::CreateImm(tmp));\n";
 
 }
 
 static void emitSinglePredicateMatch(raw_ostream &o, StringRef str,
-                                     std::string PredicateNamespace) {
+                                     const std::string &PredicateNamespace) {
   if (str[0] == '!')
     o << "!(Bits & " << PredicateNamespace << "::"
       << str.slice(1,str.size()) << ")";
@@ -772,8 +791,9 @@ static void emitSinglePredicateMatch(raw_ostream &o, StringRef str,
 }
 
 bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
-                                           unsigned Opc) {
-  ListInit *Predicates = AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
+                                       unsigned Opc) const {
+  ListInit *Predicates =
+    AllInstructions[Opc]->TheDef->getValueAsListInit("Predicates");
   for (unsigned i = 0; i < Predicates->getSize(); ++i) {
     Record *Pred = Predicates->getElementAsRecord(i);
     if (!Pred->getValue("AssemblerMatcherPredicate"))
@@ -799,10 +819,70 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
   return Predicates->getSize() > 0;
 }
 
+void FilterChooser::emitSoftFailCheck(raw_ostream &o, unsigned Indentation,
+                                      unsigned Opc) const {
+  BitsInit *SFBits =
+    AllInstructions[Opc]->TheDef->getValueAsBitsInit("SoftFail");
+  if (!SFBits) return;
+  BitsInit *InstBits = AllInstructions[Opc]->TheDef->getValueAsBitsInit("Inst");
+
+  APInt PositiveMask(BitWidth, 0ULL);
+  APInt NegativeMask(BitWidth, 0ULL);
+  for (unsigned i = 0; i < BitWidth; ++i) {
+    bit_value_t B = bitFromBits(*SFBits, i);
+    bit_value_t IB = bitFromBits(*InstBits, i);
+
+    if (B != BIT_TRUE) continue;
+
+    switch (IB) {
+    case BIT_FALSE:
+      // The bit is meant to be false, so emit a check to see if it is true.
+      PositiveMask.setBit(i);
+      break;
+    case BIT_TRUE:
+      // The bit is meant to be true, so emit a check to see if it is false.
+      NegativeMask.setBit(i);
+      break;
+    default:
+      // The bit is not set; this must be an error!
+      StringRef Name = AllInstructions[Opc]->TheDef->getName();
+      errs() << "SoftFail Conflict: bit SoftFail{" << i << "} in "
+             << Name
+             << " is set but Inst{" << i <<"} is unset!\n"
+             << "  - You can only mark a bit as SoftFail if it is fully defined"
+             << " (1/0 - not '?') in Inst\n";
+      o << "#error SoftFail Conflict, " << Name << "::SoftFail{" << i 
+        << "} set but Inst{" << i << "} undefined!\n";
+    }
+  }
+
+  bool NeedPositiveMask = PositiveMask.getBoolValue();
+  bool NeedNegativeMask = NegativeMask.getBoolValue();
+
+  if (!NeedPositiveMask && !NeedNegativeMask)
+    return;
+
+  std::string PositiveMaskStr = PositiveMask.toString(16, /*signed=*/false);
+  std::string NegativeMaskStr = NegativeMask.toString(16, /*signed=*/false);
+  StringRef BitExt = "";
+  if (BitWidth > 32)
+    BitExt = "ULL";
+
+  o.indent(Indentation) << "if (";
+  if (NeedPositiveMask)
+    o << "insn & 0x" << PositiveMaskStr << BitExt;
+  if (NeedPositiveMask && NeedNegativeMask)
+    o << " || ";
+  if (NeedNegativeMask)
+    o << "~insn & 0x" << NegativeMaskStr << BitExt;
+  o << ")\n";
+  o.indent(Indentation+2) << "S = MCDisassembler::SoftFail;\n";
+}
+
 // Emits code to decode the singleton.  Return true if we have matched all the
 // well-known bits.
 bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-                                         unsigned Opc) {
+                                         unsigned Opc) const {
   std::vector<unsigned> StartBits;
   std::vector<unsigned> EndBits;
   std::vector<uint64_t> FieldVals;
@@ -821,22 +901,26 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
     if (!emitPredicateMatch(o, Indentation, Opc))
       o << "1";
     o << ") {\n";
+    emitSoftFailCheck(o, Indentation+2, Opc);
     o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
-    std::vector<OperandInfo>& InsnOperands = Operands[Opc];
-    for (std::vector<OperandInfo>::iterator
+    std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
+      Operands.find(Opc);
+    const std::vector<OperandInfo>& InsnOperands = OpIter->second;
+    for (std::vector<OperandInfo>::const_iterator
          I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
       // If a custom instruction decoder was specified, use that.
       if (I->numFields() == 0 && I->Decoder.size()) {
         o.indent(Indentation) << "  " << Emitter->GuardPrefix << I->Decoder
-                              << "(MI, insn, Address, Decoder)" << Emitter->GuardPostfix << "\n";
+                              << "(MI, insn, Address, Decoder)"
+                              << Emitter->GuardPostfix << "\n";
         break;
       }
 
       emitBinaryParser(o, Indentation, *I);
     }
 
-    o.indent(Indentation) << "  return " << Emitter->ReturnOK << "; // " << nameWithID(Opc)
-                          << '\n';
+    o.indent(Indentation) << "  return " << Emitter->ReturnOK << "; // "
+                          << nameWithID(Opc) << '\n';
     o.indent(Indentation) << "}\n"; // Closing predicate block.
     return true;
   }
@@ -870,21 +954,25 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
     else
       o << ") {\n";
   }
+  emitSoftFailCheck(o, Indentation+2, Opc);
   o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
-  std::vector<OperandInfo>& InsnOperands = Operands[Opc];
-  for (std::vector<OperandInfo>::iterator
+  std::map<unsigned, std::vector<OperandInfo> >::const_iterator OpIter =
+    Operands.find(Opc);
+  const std::vector<OperandInfo>& InsnOperands = OpIter->second;
+  for (std::vector<OperandInfo>::const_iterator
        I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
     // If a custom instruction decoder was specified, use that.
     if (I->numFields() == 0 && I->Decoder.size()) {
       o.indent(Indentation) << "  " << Emitter->GuardPrefix << I->Decoder
-                            << "(MI, insn, Address, Decoder)" << Emitter->GuardPostfix << "\n";
+                            << "(MI, insn, Address, Decoder)"
+                            << Emitter->GuardPostfix << "\n";
       break;
     }
 
     emitBinaryParser(o, Indentation, *I);
   }
-  o.indent(Indentation) << "  return " << Emitter->ReturnOK << "; // " << nameWithID(Opc)
-                        << '\n';
+  o.indent(Indentation) << "  return " << Emitter->ReturnOK << "; // "
+                        << nameWithID(Opc) << '\n';
   o.indent(Indentation) << "}\n";
 
   return false;
@@ -892,7 +980,7 @@ bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
 
 // Emits code to decode the singleton, and then to decode the rest.
 void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
-    Filter &Best) {
+                                         const Filter &Best) const {
 
   unsigned Opc = Best.getSingletonOpc();
 
@@ -908,8 +996,8 @@ void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
 
 // Assign a single filter and run with it.  Top level API client can initialize
 // with a single filter to start the filtering process.
-void FilterChooser::runSingleFilter(FilterChooser &owner, unsigned startBit,
-    unsigned numBit, bool mixed) {
+void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
+                                    bool mixed) {
   Filters.clear();
   Filter F(*this, startBit, numBit, true);
   Filters.push_back(F);
@@ -920,7 +1008,7 @@ void FilterChooser::runSingleFilter(FilterChooser &owner, unsigned startBit,
 // reportRegion is a helper function for filterProcessor to mark a region as
 // eligible for use as a filter region.
 void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
-    unsigned BitIndex, bool AllowMixed) {
+                                 unsigned BitIndex, bool AllowMixed) {
   if (RA == ATTR_MIXED && AllowMixed)
     Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, true));
   else if (RA == ATTR_ALL_SET && !AllowMixed)
@@ -957,8 +1045,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
       // Look for islands of undecoded bits of any instruction.
       if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
         // Found an instruction with island(s).  Now just assign a filter.
-        runSingleFilter(*this, StartBits[0], EndBits[0] - StartBits[0] + 1,
-                        true);
+        runSingleFilter(StartBits[0], EndBits[0] - StartBits[0] + 1, true);
         return true;
       }
     }
@@ -1066,7 +1153,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
         RA = ATTR_MIXED;
         break;
       default:
-        assert(0 && "Unexpected bitAttr!");
+        llvm_unreachable("Unexpected bitAttr!");
       }
       break;
     case ATTR_ALL_SET:
@@ -1087,7 +1174,7 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
         RA = ATTR_MIXED;
         break;
       default:
-        assert(0 && "Unexpected bitAttr!");
+        llvm_unreachable("Unexpected bitAttr!");
       }
       break;
     case ATTR_MIXED:
@@ -1109,13 +1196,13 @@ bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
       case ATTR_MIXED:
         break;
       default:
-        assert(0 && "Unexpected bitAttr!");
+        llvm_unreachable("Unexpected bitAttr!");
       }
       break;
     case ATTR_ALL_UNSET:
-      assert(0 && "regionAttr state machine has no ATTR_UNSET state");
+      llvm_unreachable("regionAttr state machine has no ATTR_UNSET state");
     case ATTR_FILTERED:
-      assert(0 && "regionAttr state machine has no ATTR_FILTERED state");
+      llvm_unreachable("regionAttr state machine has no ATTR_FILTERED state");
     }
   }
 
@@ -1189,7 +1276,7 @@ void FilterChooser::doFilter() {
 // Emits code to decode our share of instructions.  Returns true if the
 // emitted code causes a return, which occurs if we know how to decode
 // the instruction at this level or the instruction is not decodeable.
-bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
+bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) const {
   if (Opcodes.size() == 1)
     // There is only one instruction in the set, which is great!
     // Call emitSingletonDecoder() to see whether there are any remaining
@@ -1198,11 +1285,11 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
 
   // Choose the best filter to do the decodings!
   if (BestIndex != -1) {
-    Filter &Best = bestFilter();
+    const Filter &Best = Filters[BestIndex];
     if (Best.getNumFiltered() == 1)
       emitSingletonDecoder(o, Indentation, Best);
     else
-      bestFilter().emit(o, Indentation);
+      Best.emit(o, Indentation);
     return false;
   }
 
@@ -1222,7 +1309,7 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
 
   dumpStack(errs(), "\t\t");
 
-  for (unsigned i = 0; i < Opcodes.size(); i++) {
+  for (unsigned i = 0; i < Opcodes.size(); ++i) {
     const std::string &Name = nameWithID(Opcodes[i]);
 
     errs() << '\t' << Name << " ";
@@ -1234,9 +1321,8 @@ bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
   return true;
 }
 
-static bool populateInstruction(const CodeGenInstruction &CGI,
-                                unsigned Opc,
-                      std::map<unsigned, std::vector<OperandInfo> >& Operands){
+static bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc,
+                       std::map<unsigned, std::vector<OperandInfo> > &Operands){
   const Record &Def = *CGI.TheDef;
   // If all the bit positions are not specified; do not decode this instruction.
   // We are bound to fail!  For proper disassembly, the well-known encoding bits
@@ -1290,7 +1376,7 @@ static bool populateInstruction(const CodeGenInstruction &CGI,
   }
 
   // For each operand, see if we can figure out where it is encoded.
-  for (std::vector<std::pair<Init*, std::string> >::iterator
+  for (std::vector<std::pair<Init*, std::string> >::const_iterator
        NI = InOutOperands.begin(), NE = InOutOperands.end(); NI != NE; ++NI) {
     std::string Decoder = "";
 
@@ -1435,8 +1521,7 @@ static void emitHelper(llvm::raw_ostream &o, unsigned BitWidth) {
 }
 
 // Emits disassembler code for instruction decoding.
-void FixedLenDecoderEmitter::run(raw_ostream &o)
-{
+void FixedLenDecoderEmitter::run(raw_ostream &o) {
   o << "#include \"llvm/MC/MCInst.h\"\n";
   o << "#include \"llvm/Support/DataTypes.h\"\n";
   o << "#include <assert.h>\n";
@@ -1444,14 +1529,15 @@ void FixedLenDecoderEmitter::run(raw_ostream &o)
   o << "namespace llvm {\n\n";
 
   // Parameterize the decoders based on namespace and instruction width.
-  NumberedInstructions = Target.getInstructionsByEnumValue();
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
   std::map<std::pair<std::string, unsigned>,
            std::vector<unsigned> > OpcMap;
   std::map<unsigned, std::vector<OperandInfo> > Operands;
 
   for (unsigned i = 0; i < NumberedInstructions.size(); ++i) {
     const CodeGenInstruction *Inst = NumberedInstructions[i];
-    Record *Def = Inst->TheDef;
+    const Record *Def = Inst->TheDef;
     unsigned Size = Def->getValueAsInt("Size");
     if (Def->getValueAsString("Namespace") == "TargetOpcode" ||
         Def->getValueAsBit("isPseudo") ||
@@ -1470,7 +1556,7 @@ void FixedLenDecoderEmitter::run(raw_ostream &o)
 
   std::set<unsigned> Sizes;
   for (std::map<std::pair<std::string, unsigned>,
-                std::vector<unsigned> >::iterator
+                std::vector<unsigned> >::const_iterator
        I = OpcMap.begin(), E = OpcMap.end(); I != E; ++I) {
     // If we haven't visited this instruction width before, emit the
     // helper method to extract fields.
diff --git a/utils/TableGen/FixedLenDecoderEmitter.h b/utils/TableGen/FixedLenDecoderEmitter.h
index 2df5448aa8d1..195297c966dd 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.h
+++ b/utils/TableGen/FixedLenDecoderEmitter.h
@@ -39,12 +39,12 @@ struct OperandInfo {
     Fields.push_back(EncodingField(Base, Width, Offset));
   }
 
-  unsigned numFields() { return Fields.size(); }
+  unsigned numFields() const { return Fields.size(); }
 
-  typedef std::vector<EncodingField>::iterator iterator;
+  typedef std::vector<EncodingField>::const_iterator const_iterator;
 
-  iterator begin() { return Fields.begin(); }
-  iterator end()   { return Fields.end();   }
+  const_iterator begin() const { return Fields.begin(); }
+  const_iterator end() const   { return Fields.end();   }
 };
 
 class FixedLenDecoderEmitter : public TableGenBackend {
@@ -52,12 +52,12 @@ public:
   FixedLenDecoderEmitter(RecordKeeper &R,
                          std::string PredicateNamespace,
                          std::string GPrefix  = "if (",
-                         std::string GPostfix = " == MCDisassembler::Fail) return MCDisassembler::Fail;",
+                         std::string GPostfix = " == MCDisassembler::Fail)"
+                         " return MCDisassembler::Fail;",
                          std::string ROK      = "MCDisassembler::Success",
                          std::string RFail    = "MCDisassembler::Fail",
                          std::string L        = "") :
-    Records(R), Target(R),
-    NumberedInstructions(Target.getInstructionsByEnumValue()),
+    Target(R),
     PredicateNamespace(PredicateNamespace),
     GuardPrefix(GPrefix), GuardPostfix(GPostfix),
     ReturnOK(ROK), ReturnFail(RFail), Locals(L) {}
@@ -66,11 +66,7 @@ public:
   void run(raw_ostream &o);
 
 private:
-  RecordKeeper &Records;
   CodeGenTarget Target;
-  std::vector<const CodeGenInstruction*> NumberedInstructions;
-  std::vector<unsigned> Opcodes;
-  std::map<unsigned, std::vector<OperandInfo> > Operands;
 public:
   std::string PredicateNamespace;
   std::string GuardPrefix, GuardPostfix;
diff --git a/utils/TableGen/InstrEnumEmitter.cpp b/utils/TableGen/InstrEnumEmitter.cpp
deleted file mode 100644
index 5981afde0e7e..000000000000
--- a/utils/TableGen/InstrEnumEmitter.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-//===- InstrEnumEmitter.cpp - Generate Instruction Set Enums --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This tablegen backend is responsible for emitting enums for each machine
-// instruction.
-//
-//===----------------------------------------------------------------------===//
-
-#include "InstrEnumEmitter.h"
-#include "CodeGenTarget.h"
-#include "llvm/TableGen/Record.h"
-#include <cstdio>
-using namespace llvm;
-
-// runEnums - Print out enum values for all of the instructions.
-void InstrEnumEmitter::run(raw_ostream &OS) {
-  EmitSourceFileHeader("Target Instruction Enum Values", OS);
-  OS << "namespace llvm {\n\n";
-
-  CodeGenTarget Target(Records);
-
-  // We must emit the PHI opcode first...
-  std::string Namespace = Target.getInstNamespace();
-  
-  if (Namespace.empty()) {
-    fprintf(stderr, "No instructions defined!\n");
-    exit(1);
-  }
-
-  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
-    Target.getInstructionsByEnumValue();
-
-  OS << "namespace " << Namespace << " {\n";
-  OS << "  enum {\n";
-  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
-    OS << "    " << NumberedInstructions[i]->TheDef->getName()
-       << "\t= " << i << ",\n";
-  }
-  OS << "    INSTRUCTION_LIST_END = " << NumberedInstructions.size() << "\n";
-  OS << "  };\n}\n";
-  OS << "} // End llvm namespace \n";
-}
diff --git a/utils/TableGen/InstrEnumEmitter.h b/utils/TableGen/InstrEnumEmitter.h
deleted file mode 100644
index c29a30938d34..000000000000
--- a/utils/TableGen/InstrEnumEmitter.h
+++ /dev/null
@@ -1,33 +0,0 @@
-//===- InstrEnumEmitter.h - Generate Instruction Set Enums ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This tablegen backend is responsible for emitting enums for each machine
-// instruction.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef INSTRENUM_EMITTER_H
-#define INSTRENUM_EMITTER_H
-
-#include "llvm/TableGen/TableGenBackend.h"
-
-namespace llvm {
-
-class InstrEnumEmitter : public TableGenBackend {
-  RecordKeeper &Records;
-public:
-  InstrEnumEmitter(RecordKeeper &R) : Records(R) {}
-
-  // run - Output the instruction set description, returning true on failure.
-  void run(raw_ostream &OS);
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 8341724a73e4..8b3efd33f4dc 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -14,14 +14,16 @@
 
 #include "InstrInfoEmitter.h"
 #include "CodeGenTarget.h"
+#include "SequenceToOffsetTable.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/ADT/StringExtras.h"
 #include <algorithm>
+#include <cstdio>
 using namespace llvm;
 
 static void PrintDefList(const std::vector<Record*> &Uses,
                          unsigned Num, raw_ostream &OS) {
-  OS << "static const unsigned ImplicitList" << Num << "[] = { ";
+  OS << "static const uint16_t ImplicitList" << Num << "[] = { ";
   for (unsigned i = 0, e = Uses.size(); i != e; ++i)
     OS << getQualifiedName(Uses[i]) << ", ";
   OS << "0 };\n";
@@ -106,6 +108,11 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
       if (Inst.Operands[i].Rec->isSubClassOf("OptionalDefOperand"))
         Res += "|(1<<MCOI::OptionalDef)";
 
+      // Fill in operand type.
+      Res += ", MCOI::";
+      assert(!Inst.Operands[i].OperandType.empty() && "Invalid operand type.");
+      Res += Inst.Operands[i].OperandType;
+
       // Fill in constraint info.
       Res += ", ";
 
@@ -121,11 +128,6 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
                     " << 16) | (1 << MCOI::TIED_TO))";
       }
 
-      // Fill in operand type.
-      Res += ", MCOI::";
-      assert(!Inst.Operands[i].OperandType.empty() && "Invalid operand type.");
-      Res += Inst.Operands[i].OperandType;
-
       Result.push_back(Res);
     }
   }
@@ -203,7 +205,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
 
   // Emit all of the MCInstrDesc records in their ENUM ordering.
   //
-  OS << "\nMCInstrDesc " << TargetName << "Insts[] = {\n";
+  OS << "\nextern const MCInstrDesc " << TargetName << "Insts[] = {\n";
   const std::vector<const CodeGenInstruction*> &NumberedInstructions =
     Target.getInstructionsByEnumValue();
 
@@ -212,10 +214,33 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
                OperandInfoIDs, OS);
   OS << "};\n\n";
 
+  // Build an array of instruction names
+  SequenceToOffsetTable<std::string> InstrNames;
+  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+    const CodeGenInstruction *Instr = NumberedInstructions[i];
+    InstrNames.add(Instr->TheDef->getName());
+  }
+
+  InstrNames.layout();
+  OS << "extern const char " << TargetName << "InstrNameData[] = {\n";
+  InstrNames.emit(OS, printChar);
+  OS << "};\n\n";
+
+  OS << "extern const unsigned " << TargetName <<"InstrNameIndices[] = {";
+  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+    if (i % 8 == 0)
+      OS << "\n    ";
+    const CodeGenInstruction *Instr = NumberedInstructions[i];
+    OS << InstrNames.get(Instr->TheDef->getName()) << "U, ";
+  }
+
+  OS << "\n};\n\n";
+
   // MCInstrInfo initialization routine.
   OS << "static inline void Init" << TargetName
      << "MCInstrInfo(MCInstrInfo *II) {\n";
   OS << "  II->InitMCInstrInfo(" << TargetName << "Insts, "
+     << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, "
      << NumberedInstructions.size() << ");\n}\n\n";
 
   OS << "} // End llvm namespace \n";
@@ -239,10 +264,13 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   OS << "#undef GET_INSTRINFO_CTOR\n";
 
   OS << "namespace llvm {\n";
-  OS << "extern MCInstrDesc " << TargetName << "Insts[];\n";
+  OS << "extern const MCInstrDesc " << TargetName << "Insts[];\n";
+  OS << "extern const unsigned " << TargetName << "InstrNameIndices[];\n";
+  OS << "extern const char " << TargetName << "InstrNameData[];\n";
   OS << ClassName << "::" << ClassName << "(int SO, int DO)\n"
      << "  : TargetInstrInfoImpl(SO, DO) {\n"
      << "  InitMCInstrInfo(" << TargetName << "Insts, "
+     << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, "
      << NumberedInstructions.size() << ");\n}\n";
   OS << "} // End llvm namespace \n";
 
@@ -264,8 +292,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   OS << Num << ",\t" << MinOperands << ",\t"
      << Inst.Operands.NumDefs << ",\t"
      << getItinClassNumber(Inst.TheDef) << ",\t"
-     << Inst.TheDef->getValueAsInt("Size") << ",\t\""
-     << Inst.TheDef->getName() << "\", 0";
+     << Inst.TheDef->getValueAsInt("Size") << ",\t0";
 
   // Emit all of the target indepedent flags...
   if (Inst.isPseudo)           OS << "|(1<<MCID::Pseudo)";
@@ -346,7 +373,7 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
 
   // We must emit the PHI opcode first...
   std::string Namespace = Target.getInstNamespace();
-  
+
   if (Namespace.empty()) {
     fprintf(stderr, "No instructions defined!\n");
     exit(1);
diff --git a/utils/TableGen/InstrInfoEmitter.h b/utils/TableGen/InstrInfoEmitter.h
index 1461e2c5f7cf..f8d3ea51c814 100644
--- a/utils/TableGen/InstrInfoEmitter.h
+++ b/utils/TableGen/InstrInfoEmitter.h
@@ -31,19 +31,19 @@ class InstrInfoEmitter : public TableGenBackend {
   RecordKeeper &Records;
   CodeGenDAGPatterns CDP;
   std::map<std::string, unsigned> ItinClassMap;
-  
+
 public:
   InstrInfoEmitter(RecordKeeper &R) : Records(R), CDP(R) { }
 
-  // run - Output the instruction set description, returning true on failure.
+  // run - Output the instruction set description.
   void run(raw_ostream &OS);
 
 private:
   void emitEnums(raw_ostream &OS);
 
-  typedef std::map<std::vector<std::string>, unsigned> OperandInfoMapTy;  
+  typedef std::map<std::vector<std::string>, unsigned> OperandInfoMapTy;
   void emitRecord(const CodeGenInstruction &Inst, unsigned Num,
-                  Record *InstrInfo, 
+                  Record *InstrInfo,
                   std::map<std::vector<Record*>, unsigned> &EL,
                   const OperandInfoMapTy &OpInfo,
                   raw_ostream &OS);
@@ -51,7 +51,7 @@ private:
   // Itinerary information.
   void GatherItinClasses();
   unsigned getItinClassNumber(const Record *InstRec);
-  
+
   // Operand information.
   void EmitOperandInfo(raw_ostream &OS, OperandInfoMapTy &OperandInfoIDs);
   std::vector<std::string> GetOperandInfo(const CodeGenInstruction &Inst);
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 782b89ede2e7..8e1bae8c1f0f 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -57,9 +57,6 @@ void IntrinsicEmitter::run(raw_ostream &OS) {
   // Emit intrinsic alias analysis mod/ref behavior.
   EmitModRefBehavior(Ints, OS);
 
-  // Emit a list of intrinsics with corresponding GCC builtins.
-  EmitGCCBuiltinList(Ints, OS);
-
   // Emit code to translate GCC builtins into LLVM intrinsics.
   EmitIntrinsicToGCCBuiltinMap(Ints, OS);
 
@@ -160,17 +157,20 @@ EmitIntrinsicToNameTable(const std::vector<CodeGenIntrinsic> &Ints,
 void IntrinsicEmitter::
 EmitIntrinsicToOverloadTable(const std::vector<CodeGenIntrinsic> &Ints, 
                          raw_ostream &OS) {
-  OS << "// Intrinsic ID to overload table\n";
+  OS << "// Intrinsic ID to overload bitset\n";
   OS << "#ifdef GET_INTRINSIC_OVERLOAD_TABLE\n";
-  OS << "  // Note that entry #0 is the invalid intrinsic!\n";
+  OS << "static const uint8_t OTable[] = {\n";
+  OS << "  0";
   for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
-    OS << "  ";
+    // Add one to the index so we emit a null bit for the invalid #0 intrinsic.
+    if ((i+1)%8 == 0)
+      OS << ",\n  0";
     if (Ints[i].isOverloaded)
-      OS << "true";
-    else
-      OS << "false";
-    OS << ",\n";
+      OS << " | (1<<" << (i+1)%8 << ')';
   }
+  OS << "\n};\n\n";
+  // OTable contains a true bit at the position if the intrinsic is overloaded.
+  OS << "return (OTable[id/8] & (1 << (id%8))) != 0;\n";
   OS << "#endif\n\n";
 }
 
@@ -181,6 +181,8 @@ static void EmitTypeForValueType(raw_ostream &OS, MVT::SimpleValueType VT) {
   } else if (VT == MVT::Other) {
     // MVT::OtherVT is used to mean the empty struct type here.
     OS << "StructType::get(Context)";
+  } else if (VT == MVT::f16) {
+    OS << "Type::getHalfTy(Context)";
   } else if (VT == MVT::f32) {
     OS << "Type::getFloatTy(Context)";
   } else if (VT == MVT::f64) {
@@ -318,7 +320,7 @@ void IntrinsicEmitter::EmitVerifier(const std::vector<CodeGenIntrinsic> &Ints,
   OS << "// Verifier::visitIntrinsicFunctionCall code.\n";
   OS << "#ifdef GET_INTRINSIC_VERIFIER\n";
   OS << "  switch (ID) {\n";
-  OS << "  default: assert(0 && \"Invalid intrinsic!\");\n";
+  OS << "  default: llvm_unreachable(\"Invalid intrinsic!\");\n";
   
   // This checking can emit a lot of very common code.  To reduce the amount of
   // code that we emit, batch up cases that have identical types.  This avoids
@@ -414,7 +416,7 @@ void IntrinsicEmitter::EmitGenerator(const std::vector<CodeGenIntrinsic> &Ints,
   OS << "// Code for generating Intrinsic function declarations.\n";
   OS << "#ifdef GET_INTRINSIC_GENERATOR\n";
   OS << "  switch (id) {\n";
-  OS << "  default: assert(0 && \"Invalid intrinsic!\");\n";
+  OS << "  default: llvm_unreachable(\"Invalid intrinsic!\");\n";
   
   // Similar to GET_INTRINSIC_VERIFIER, batch up cases that have identical
   // types.
@@ -483,8 +485,7 @@ namespace {
     case CodeGenIntrinsic::ReadWriteMem:
       return MRK_none;
     }
-    assert(0 && "bad mod-ref kind");
-    return MRK_none;
+    llvm_unreachable("bad mod-ref kind");
   }
 
   struct AttributeComparator {
@@ -516,37 +517,50 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
   else
     OS << "AttrListPtr Intrinsic::getAttributes(ID id) {\n";
 
-  // Compute the maximum number of attribute arguments.
-  std::vector<const CodeGenIntrinsic*> sortedIntrinsics(Ints.size());
+  // Compute the maximum number of attribute arguments and the map
+  typedef std::map<const CodeGenIntrinsic*, unsigned,
+                   AttributeComparator> UniqAttrMapTy;
+  UniqAttrMapTy UniqAttributes;
   unsigned maxArgAttrs = 0;
+  unsigned AttrNum = 0;
   for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
     const CodeGenIntrinsic &intrinsic = Ints[i];
-    sortedIntrinsics[i] = &intrinsic;
     maxArgAttrs =
       std::max(maxArgAttrs, unsigned(intrinsic.ArgumentAttributes.size()));
+    unsigned &N = UniqAttributes[&intrinsic];
+    if (N) continue;
+    assert(AttrNum < 256 && "Too many unique attributes for table!");
+    N = ++AttrNum;
   }
 
   // Emit an array of AttributeWithIndex.  Most intrinsics will have
   // at least one entry, for the function itself (index ~1), which is
   // usually nounwind.
-  OS << "  AttributeWithIndex AWI[" << maxArgAttrs+1 << "];\n";
-  OS << "  unsigned NumAttrs = 0;\n";
-  OS << "  switch (id) {\n";
-  OS << "    default: break;\n";
+  OS << "  static const uint8_t IntrinsicsToAttributesMap[] = {\n";
 
-  AttributeComparator precedes;
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
+    const CodeGenIntrinsic &intrinsic = Ints[i];
 
-  std::stable_sort(sortedIntrinsics.begin(), sortedIntrinsics.end(), precedes);
+    OS << "    " << UniqAttributes[&intrinsic] << ", // "
+       << intrinsic.Name << "\n";
+  }
+  OS << "  };\n\n";
 
-  for (unsigned i = 0, e = sortedIntrinsics.size(); i != e; ++i) {
-    const CodeGenIntrinsic &intrinsic = *sortedIntrinsics[i];
-    OS << "  case " << TargetPrefix << "Intrinsic::"
-       << intrinsic.EnumName << ":\n";
+  OS << "  AttributeWithIndex AWI[" << maxArgAttrs+1 << "];\n";
+  OS << "  unsigned NumAttrs = 0;\n";
+  OS << "  if (id != 0) {\n";
+  OS << "    switch(IntrinsicsToAttributesMap[id - ";
+  if (TargetOnly)
+    OS << "Intrinsic::num_intrinsics";
+  else
+    OS << "1";
+  OS << "]) {\n";
+  OS << "    default: llvm_unreachable(\"Invalid attribute number\");\n";
+  for (UniqAttrMapTy::const_iterator I = UniqAttributes.begin(),
+       E = UniqAttributes.end(); I != E; ++I) {
+    OS << "    case " << I->second << ":\n";
 
-    // Fill out the case if this is the last case for this range of
-    // intrinsics.
-    if (i + 1 != e && !precedes(&intrinsic, sortedIntrinsics[i + 1]))
-      continue;
+    const CodeGenIntrinsic &intrinsic = *(I->first);
 
     // Keep track of the number of attributes we're writing out.
     unsigned numAttrs = 0;
@@ -554,8 +568,8 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
     // The argument attributes are alreadys sorted by argument index.
     for (unsigned ai = 0, ae = intrinsic.ArgumentAttributes.size(); ai != ae;) {
       unsigned argNo = intrinsic.ArgumentAttributes[ai].first;
-      
-      OS << "    AWI[" << numAttrs++ << "] = AttributeWithIndex::get("
+
+      OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get("
          << argNo+1 << ", ";
 
       bool moreThanOne = false;
@@ -579,7 +593,7 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
     ModRefKind modRef = getModRefKind(intrinsic);
 
     if (!intrinsic.canThrow || modRef) {
-      OS << "    AWI[" << numAttrs++ << "] = AttributeWithIndex::get(~0, ";
+      OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get(~0, ";
       if (!intrinsic.canThrow) {
         OS << "Attribute::NoUnwind";
         if (modRef) OS << '|';
@@ -593,13 +607,14 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
     }
 
     if (numAttrs) {
-      OS << "    NumAttrs = " << numAttrs << ";\n";
-      OS << "    break;\n";
+      OS << "      NumAttrs = " << numAttrs << ";\n";
+      OS << "      break;\n";
     } else {
-      OS << "    return AttrListPtr();\n";
+      OS << "      return AttrListPtr();\n";
     }
   }
   
+  OS << "    }\n";
   OS << "  }\n";
   OS << "  return AttrListPtr::get(AWI, NumAttrs);\n";
   OS << "}\n";
@@ -609,50 +624,36 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
 /// EmitModRefBehavior - Determine intrinsic alias analysis mod/ref behavior.
 void IntrinsicEmitter::
 EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
-  OS << "// Determine intrinsic alias analysis mod/ref behavior.\n";
-  OS << "#ifdef GET_INTRINSIC_MODREF_BEHAVIOR\n";
-  OS << "switch (iid) {\n";
-  OS << "default:\n    return UnknownModRefBehavior;\n";
+  OS << "// Determine intrinsic alias analysis mod/ref behavior.\n"
+     << "#ifdef GET_INTRINSIC_MODREF_BEHAVIOR\n"
+     << "assert(iid <= Intrinsic::" << Ints.back().EnumName << " && "
+     << "\"Unknown intrinsic.\");\n\n";
+
+  OS << "static const uint8_t IntrinsicModRefBehavior[] = {\n"
+     << "  /* invalid */ UnknownModRefBehavior,\n";
   for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
-    if (Ints[i].ModRef == CodeGenIntrinsic::ReadWriteMem)
-      continue;
-    OS << "case " << TargetPrefix << "Intrinsic::" << Ints[i].EnumName
-      << ":\n";
+    OS << "  /* " << TargetPrefix << Ints[i].EnumName << " */ ";
     switch (Ints[i].ModRef) {
-    default:
-      assert(false && "Unknown Mod/Ref type!");
     case CodeGenIntrinsic::NoMem:
-      OS << "  return DoesNotAccessMemory;\n";
+      OS << "DoesNotAccessMemory,\n";
       break;
     case CodeGenIntrinsic::ReadArgMem:
-      OS << "  return OnlyReadsArgumentPointees;\n";
+      OS << "OnlyReadsArgumentPointees,\n";
       break;
     case CodeGenIntrinsic::ReadMem:
-      OS << "  return OnlyReadsMemory;\n";
+      OS << "OnlyReadsMemory,\n";
       break;
     case CodeGenIntrinsic::ReadWriteArgMem:
-      OS << "  return OnlyAccessesArgumentPointees;\n";
+      OS << "OnlyAccessesArgumentPointees,\n";
+      break;
+    case CodeGenIntrinsic::ReadWriteMem:
+      OS << "UnknownModRefBehavior,\n";
       break;
     }
   }
-  OS << "}\n";
-  OS << "#endif // GET_INTRINSIC_MODREF_BEHAVIOR\n\n";
-}
-
-void IntrinsicEmitter::
-EmitGCCBuiltinList(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
-  OS << "// Get the GCC builtin that corresponds to an LLVM intrinsic.\n";
-  OS << "#ifdef GET_GCC_BUILTIN_NAME\n";
-  OS << "  switch (F->getIntrinsicID()) {\n";
-  OS << "  default: BuiltinName = \"\"; break;\n";
-  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
-    if (!Ints[i].GCCBuiltinName.empty()) {
-      OS << "  case Intrinsic::" << Ints[i].EnumName << ": BuiltinName = \""
-         << Ints[i].GCCBuiltinName << "\"; break;\n";
-    }
-  }
-  OS << "  }\n";
-  OS << "#endif\n\n";
+  OS << "};\n\n"
+     << "return static_cast<ModRefBehavior>(IntrinsicModRefBehavior[iid]);\n"
+     << "#endif // GET_INTRINSIC_MODREF_BEHAVIOR\n\n";
 }
 
 /// EmitTargetBuiltins - All of the builtins in the specified map are for the
diff --git a/utils/TableGen/IntrinsicEmitter.h b/utils/TableGen/IntrinsicEmitter.h
index eb6379cc7414..f9bcd5980226 100644
--- a/utils/TableGen/IntrinsicEmitter.h
+++ b/utils/TableGen/IntrinsicEmitter.h
@@ -48,8 +48,6 @@ namespace llvm {
                         raw_ostream &OS);
     void EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints,
                             raw_ostream &OS);
-    void EmitGCCBuiltinList(const std::vector<CodeGenIntrinsic> &Ints, 
-                            raw_ostream &OS);
     void EmitIntrinsicToGCCBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints, 
                                       raw_ostream &OS);
     void EmitSuffix(raw_ostream &OS);
diff --git a/utils/TableGen/LLVMBuild.txt b/utils/TableGen/LLVMBuild.txt
new file mode 100644
index 000000000000..b0081eb588d1
--- /dev/null
+++ b/utils/TableGen/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./utils/TableGen/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = BuildTool
+name = tblgen
+parent = BuildTools
+required_libraries = Support TableGen
diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp
index c685527a140c..802d112108fa 100644
--- a/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -67,7 +67,7 @@ addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Insn,
       // Since we added more than one, we also need to adjust the base.
       BaseIdx += NewOps - 1;
     } else
-      assert(0 && "Unhandled pseudo-expansion argument type!");
+      llvm_unreachable("Unhandled pseudo-expansion argument type!");
   }
   return OpsAdded;
 }
@@ -100,8 +100,11 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) {
     throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
                                  "' operand count mismatch");
 
+  unsigned NumMIOperands = 0;
+  for (unsigned i = 0, e = Insn.Operands.size(); i != e; ++i)
+    NumMIOperands += Insn.Operands[i].MINumOperands;
   IndexedMap<OpData> OperandMap;
-  OperandMap.grow(Insn.Operands.size());
+  OperandMap.grow(NumMIOperands);
 
   addDagOperandMapping(Rec, Dag, Insn, OperandMap, 0);
 
@@ -176,8 +179,6 @@ void PseudoLoweringEmitter::emitLoweringEmitter(raw_ostream &o) {
       for (unsigned i = 0, e = Dest.Operands[OpNo].MINumOperands;
            i != e; ++i) {
         switch (Expansion.OperandMap[MIOpNo + i].Kind) {
-        default:
-          llvm_unreachable("Unknown operand type?!");
         case OpData::Operand:
           o << "      lowerOperand(MI->getOperand("
             << Source.Operands[Expansion.OperandMap[MIOpNo].Data
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index b0f4ffc84e08..a2478a7330e8 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -16,6 +16,7 @@
 #include "RegisterInfoEmitter.h"
 #include "CodeGenTarget.h"
 #include "CodeGenRegisters.h"
+#include "SequenceToOffsetTable.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -31,6 +32,9 @@ RegisterInfoEmitter::runEnums(raw_ostream &OS,
                               CodeGenTarget &Target, CodeGenRegBank &Bank) {
   const std::vector<CodeGenRegister*> &Registers = Bank.getRegisters();
 
+  // Register enums are stored as uint16_t in the tables. Make sure we'll fit
+  assert(Registers.size() <= 0xffff && "Too many regs to fit in tables");
+
   std::string Namespace = Registers[0]->TheDef->getValueAsString("Namespace");
 
   EmitSourceFileHeader("Target Register Enum Values", OS);
@@ -41,7 +45,8 @@ RegisterInfoEmitter::runEnums(raw_ostream &OS,
   OS << "namespace llvm {\n\n";
 
   OS << "class MCRegisterClass;\n"
-     << "extern MCRegisterClass " << Namespace << "MCRegisterClasses[];\n\n";
+     << "extern const MCRegisterClass " << Namespace
+     << "MCRegisterClasses[];\n\n";
 
   if (!Namespace.empty())
     OS << "namespace " << Namespace << " {\n";
@@ -59,6 +64,11 @@ RegisterInfoEmitter::runEnums(raw_ostream &OS,
 
   ArrayRef<CodeGenRegisterClass*> RegisterClasses = Bank.getRegClasses();
   if (!RegisterClasses.empty()) {
+
+    // RegisterClass enums are stored as uint16_t in the tables.
+    assert(RegisterClasses.size() <= 0xffff &&
+           "Too many register classes to fit in tables");
+
     OS << "\n// Register classes\n";
     if (!Namespace.empty())
       OS << "namespace " << Namespace << " {\n";
@@ -89,16 +99,104 @@ RegisterInfoEmitter::runEnums(raw_ostream &OS,
       OS << "}\n";
   }
 
+  ArrayRef<CodeGenSubRegIndex*> SubRegIndices = Bank.getSubRegIndices();
+  if (!SubRegIndices.empty()) {
+    OS << "\n// Subregister indices\n";
+    std::string Namespace =
+      SubRegIndices[0]->getNamespace();
+    if (!Namespace.empty())
+      OS << "namespace " << Namespace << " {\n";
+    OS << "enum {\n  NoSubRegister,\n";
+    for (unsigned i = 0, e = Bank.getNumNamedIndices(); i != e; ++i)
+      OS << "  " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n";
+    OS << "  NUM_TARGET_NAMED_SUBREGS\n};\n";
+    if (!Namespace.empty())
+      OS << "}\n";
+  }
 
   OS << "} // End llvm namespace \n";
   OS << "#endif // GET_REGINFO_ENUM\n\n";
 }
 
-void
-RegisterInfoEmitter::EmitRegMapping(raw_ostream &OS,
-                                    const std::vector<CodeGenRegister*> &Regs,
-                                    bool isCtor) {
+void RegisterInfoEmitter::
+EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
+                    const std::string &ClassName) {
+  unsigned NumRCs = RegBank.getRegClasses().size();
+  unsigned NumSets = RegBank.getNumRegPressureSets();
+
+  OS << "/// Get the weight in units of pressure for this register class.\n"
+     << "const RegClassWeight &" << ClassName << "::\n"
+     << "getRegClassWeight(const TargetRegisterClass *RC) const {\n"
+     << "  static const RegClassWeight RCWeightTable[] = {\n";
+  for (unsigned i = 0, e = NumRCs; i != e; ++i) {
+    const CodeGenRegisterClass &RC = *RegBank.getRegClasses()[i];
+    const CodeGenRegister::Set &Regs = RC.getMembers();
+    if (Regs.empty())
+      OS << "    {0, 0";
+    else {
+      std::vector<unsigned> RegUnits;
+      RC.buildRegUnitSet(RegUnits);
+      OS << "    {" << (*Regs.begin())->getWeight(RegBank)
+         << ", " << RegBank.getRegUnitSetWeight(RegUnits);
+    }
+    OS << "},  \t// " << RC.getName() << "\n";
+  }
+  OS << "    {0, 0} };\n"
+     << "  return RCWeightTable[RC->getID()];\n"
+     << "}\n\n";
+
+  OS << "\n"
+     << "// Get the number of dimensions of register pressure.\n"
+     << "unsigned " << ClassName << "::getNumRegPressureSets() const {\n"
+     << "  return " << NumSets << ";\n}\n\n";
+
+  OS << "// Get the register unit pressure limit for this dimension.\n"
+     << "// This limit must be adjusted dynamically for reserved registers.\n"
+     << "unsigned " << ClassName << "::\n"
+     << "getRegPressureSetLimit(unsigned Idx) const {\n"
+     << "  static const unsigned PressureLimitTable[] = {\n";
+  for (unsigned i = 0; i < NumSets; ++i ) {
+    const RegUnitSet &RegUnits = RegBank.getRegPressureSet(i);
+    OS << "    " << RegBank.getRegUnitSetWeight(RegUnits.Units)
+       << ",  \t// " << i << ": " << RegBank.getRegPressureSet(i).Name << "\n";
+  }
+  OS << "    0 };\n"
+     << "  return PressureLimitTable[Idx];\n"
+     << "}\n\n";
+
+  OS << "/// Get the dimensions of register pressure "
+     << "impacted by this register class.\n"
+     << "/// Returns a -1 terminated array of pressure set IDs\n"
+     << "const int* " << ClassName << "::\n"
+     << "getRegClassPressureSets(const TargetRegisterClass *RC) const {\n"
+     << "  static const int RCSetsTable[] = {\n    ";
+  std::vector<unsigned> RCSetStarts(NumRCs);
+  for (unsigned i = 0, StartIdx = 0, e = NumRCs; i != e; ++i) {
+    RCSetStarts[i] = StartIdx;
+    ArrayRef<unsigned> PSetIDs = RegBank.getRCPressureSetIDs(i);
+    for (ArrayRef<unsigned>::iterator PSetI = PSetIDs.begin(),
+           PSetE = PSetIDs.end(); PSetI != PSetE; ++PSetI) {
+      OS << *PSetI << ",  ";
+      ++StartIdx;
+    }
+    OS << "-1,  \t// " << RegBank.getRegClasses()[i]->getName() << "\n    ";
+    ++StartIdx;
+  }
+  OS << "-1 };\n";
+  OS << "  static const unsigned RCSetStartTable[] = {\n    ";
+  for (unsigned i = 0, e = NumRCs; i != e; ++i) {
+    OS << RCSetStarts[i] << ",";
+  }
+  OS << "0 };\n"
+     << "  unsigned SetListStart = RCSetStartTable[RC->getID()];\n"
+     << "  return &RCSetsTable[SetListStart];\n"
+     << "}\n\n";
+}
 
+void
+RegisterInfoEmitter::EmitRegMappingTables(raw_ostream &OS,
+                                       const std::vector<CodeGenRegister*> &Regs,
+                                          bool isCtor) {
   // Collect all information about dwarf register numbers
   typedef std::map<Record*, std::vector<int64_t>, LessRecord> DwarfRegNumsMapTy;
   DwarfRegNumsMapTy DwarfRegNums;
@@ -124,6 +222,121 @@ RegisterInfoEmitter::EmitRegMapping(raw_ostream &OS,
     for (unsigned i = I->second.size(), e = maxLength; i != e; ++i)
       I->second.push_back(-1);
 
+  std::string Namespace = Regs[0]->TheDef->getValueAsString("Namespace");
+
+  OS << "// " << Namespace << " Dwarf<->LLVM register mappings.\n";
+
+  // Emit reverse information about the dwarf register numbers.
+  for (unsigned j = 0; j < 2; ++j) {
+    for (unsigned i = 0, e = maxLength; i != e; ++i) {
+      OS << "extern const MCRegisterInfo::DwarfLLVMRegPair " << Namespace;
+      OS << (j == 0 ? "DwarfFlavour" : "EHFlavour");
+      OS << i << "Dwarf2L[]";
+
+      if (!isCtor) {
+        OS << " = {\n";
+
+        // Store the mapping sorted by the LLVM reg num so lookup can be done
+        // with a binary search.
+        std::map<uint64_t, Record*> Dwarf2LMap;
+        for (DwarfRegNumsMapTy::iterator
+               I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {
+          int DwarfRegNo = I->second[i];
+          if (DwarfRegNo < 0)
+            continue;
+          Dwarf2LMap[DwarfRegNo] = I->first;
+        }
+
+        for (std::map<uint64_t, Record*>::iterator
+               I = Dwarf2LMap.begin(), E = Dwarf2LMap.end(); I != E; ++I)
+          OS << "  { " << I->first << "U, " << getQualifiedName(I->second)
+             << " },\n";
+
+        OS << "};\n";
+      } else {
+        OS << ";\n";
+      }
+
+      // We have to store the size in a const global, it's used in multiple
+      // places.
+      OS << "extern const unsigned " << Namespace
+         << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i << "Dwarf2LSize";
+      if (!isCtor)
+        OS << " = sizeof(" << Namespace
+           << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i
+           << "Dwarf2L)/sizeof(MCRegisterInfo::DwarfLLVMRegPair);\n\n";
+      else
+        OS << ";\n\n";
+    }
+  }
+
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    Record *Reg = Regs[i]->TheDef;
+    const RecordVal *V = Reg->getValue("DwarfAlias");
+    if (!V || !V->getValue())
+      continue;
+
+    DefInit *DI = dynamic_cast<DefInit*>(V->getValue());
+    Record *Alias = DI->getDef();
+    DwarfRegNums[Reg] = DwarfRegNums[Alias];
+  }
+
+  // Emit information about the dwarf register numbers.
+  for (unsigned j = 0; j < 2; ++j) {
+    for (unsigned i = 0, e = maxLength; i != e; ++i) {
+      OS << "extern const MCRegisterInfo::DwarfLLVMRegPair " << Namespace;
+      OS << (j == 0 ? "DwarfFlavour" : "EHFlavour");
+      OS << i << "L2Dwarf[]";
+      if (!isCtor) {
+        OS << " = {\n";
+        // Store the mapping sorted by the Dwarf reg num so lookup can be done
+        // with a binary search.
+        for (DwarfRegNumsMapTy::iterator
+               I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {
+          int RegNo = I->second[i];
+          if (RegNo == -1) // -1 is the default value, don't emit a mapping.
+            continue;
+
+          OS << "  { " << getQualifiedName(I->first) << ", " << RegNo
+             << "U },\n";
+        }
+        OS << "};\n";
+      } else {
+        OS << ";\n";
+      }
+
+      // We have to store the size in a const global, it's used in multiple
+      // places.
+      OS << "extern const unsigned " << Namespace
+         << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i << "L2DwarfSize";
+      if (!isCtor)
+        OS << " = sizeof(" << Namespace
+           << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i
+           << "L2Dwarf)/sizeof(MCRegisterInfo::DwarfLLVMRegPair);\n\n";
+      else
+        OS << ";\n\n";
+    }
+  }
+}
+
+void
+RegisterInfoEmitter::EmitRegMapping(raw_ostream &OS,
+                                    const std::vector<CodeGenRegister*> &Regs,
+                                    bool isCtor) {
+  // Emit the initializer so the tables from EmitRegMappingTables get wired up
+  // to the MCRegisterInfo object.
+  unsigned maxLength = 0;
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    Record *Reg = Regs[i]->TheDef;
+    maxLength = std::max((size_t)maxLength,
+                         Reg->getValueAsListOfInts("DwarfNumbers").size());
+  }
+
+  if (!maxLength)
+    return;
+
+  std::string Namespace = Regs[0]->TheDef->getValueAsString("Namespace");
+
   // Emit reverse information about the dwarf register numbers.
   for (unsigned j = 0; j < 2; ++j) {
     OS << "  switch (";
@@ -133,43 +346,28 @@ RegisterInfoEmitter::EmitRegMapping(raw_ostream &OS,
       OS << "EHFlavour";
     OS << ") {\n"
      << "  default:\n"
-     << "    assert(0 && \"Unknown DWARF flavour\");\n"
-     << "    break;\n";
+     << "    llvm_unreachable(\"Unknown DWARF flavour\");\n";
 
     for (unsigned i = 0, e = maxLength; i != e; ++i) {
       OS << "  case " << i << ":\n";
-      for (DwarfRegNumsMapTy::iterator
-             I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {
-        int DwarfRegNo = I->second[i];
-        if (DwarfRegNo < 0)
-          continue;
-        OS << "    ";
-        if (!isCtor)
-          OS << "RI->";
-        OS << "mapDwarfRegToLLVMReg(" << DwarfRegNo << ", "
-           << getQualifiedName(I->first) << ", ";
-        if (j == 0)
+      OS << "    ";
+      if (!isCtor)
+        OS << "RI->";
+      std::string Tmp;
+      raw_string_ostream(Tmp) << Namespace
+                              << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i
+                              << "Dwarf2L";
+      OS << "mapDwarfRegsToLLVMRegs(" << Tmp << ", " << Tmp << "Size, ";
+      if (j == 0)
           OS << "false";
         else
           OS << "true";
-        OS << " );\n";
-      }
+      OS << ");\n";
       OS << "    break;\n";
     }
     OS << "  }\n";
   }
 
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    Record *Reg = Regs[i]->TheDef;
-    const RecordVal *V = Reg->getValue("DwarfAlias");
-    if (!V || !V->getValue())
-      continue;
-
-    DefInit *DI = dynamic_cast<DefInit*>(V->getValue());
-    Record *Alias = DI->getDef();
-    DwarfRegNums[Reg] = DwarfRegNums[Alias];
-  }
-
   // Emit information about the dwarf register numbers.
   for (unsigned j = 0; j < 2; ++j) {
     OS << "  switch (";
@@ -179,26 +377,23 @@ RegisterInfoEmitter::EmitRegMapping(raw_ostream &OS,
       OS << "EHFlavour";
     OS << ") {\n"
        << "  default:\n"
-       << "    assert(0 && \"Unknown DWARF flavour\");\n"
-       << "    break;\n";
+       << "    llvm_unreachable(\"Unknown DWARF flavour\");\n";
 
     for (unsigned i = 0, e = maxLength; i != e; ++i) {
       OS << "  case " << i << ":\n";
-      // Sort by name to get a stable order.
-      for (DwarfRegNumsMapTy::iterator
-             I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {
-        int RegNo = I->second[i];
-        OS << "    ";
-        if (!isCtor)
-          OS << "RI->";
-        OS << "mapLLVMRegToDwarfReg(" << getQualifiedName(I->first) << ", "
-           <<  RegNo << ", ";
-        if (j == 0)
+      OS << "    ";
+      if (!isCtor)
+        OS << "RI->";
+      std::string Tmp;
+      raw_string_ostream(Tmp) << Namespace
+                              << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i
+                              << "L2Dwarf";
+      OS << "mapLLVMRegsToDwarfRegs(" << Tmp << ", " << Tmp << "Size, ";
+      if (j == 0)
           OS << "false";
         else
           OS << "true";
-        OS << " );\n";
-      }
+      OS << ");\n";
       OS << "    break;\n";
     }
     OS << "  }\n";
@@ -235,6 +430,14 @@ public:
   }
 };
 
+static void printRegister(raw_ostream &OS, const CodeGenRegister *Reg) {
+  OS << getQualifiedName(Reg->TheDef);
+}
+
+static void printSimpleValueType(raw_ostream &OS, MVT::SimpleValueType VT) {
+  OS << getEnumName(VT);
+}
+
 //
 // runMCDesc - Print out MC register descriptions.
 //
@@ -246,87 +449,79 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   OS << "\n#ifdef GET_REGINFO_MC_DESC\n";
   OS << "#undef GET_REGINFO_MC_DESC\n";
 
+  const std::vector<CodeGenRegister*> &Regs = RegBank.getRegisters();
   std::map<const CodeGenRegister*, CodeGenRegister::Set> Overlaps;
   RegBank.computeOverlaps(Overlaps);
 
-  OS << "namespace llvm {\n\n";
-
-  const std::string &TargetName = Target.getName();
-  std::string ClassName = TargetName + "GenMCRegisterInfo";
-  OS << "struct " << ClassName << " : public MCRegisterInfo {\n"
-     << "  explicit " << ClassName << "(const MCRegisterDesc *D);\n";
-  OS << "};\n";
+  // The lists of sub-registers, super-registers, and overlaps all go in the
+  // same array. That allows us to share suffixes.
+  typedef std::vector<const CodeGenRegister*> RegVec;
+  SmallVector<RegVec, 4> SubRegLists(Regs.size());
+  SmallVector<RegVec, 4> OverlapLists(Regs.size());
+  SequenceToOffsetTable<RegVec, CodeGenRegister::Less> RegSeqs;
 
-  OS << "\nnamespace {\n";
-
-  const std::vector<CodeGenRegister*> &Regs = RegBank.getRegisters();
-
-  // Emit an overlap list for all registers.
+  // Precompute register lists for the SequenceToOffsetTable.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
     const CodeGenRegister *Reg = Regs[i];
-    const CodeGenRegister::Set &O = Overlaps[Reg];
-    // Move Reg to the front so TRI::getAliasSet can share the list.
-    OS << "  const unsigned " << Reg->getName() << "_Overlaps[] = { "
-       << getQualifiedName(Reg->TheDef) << ", ";
-    for (CodeGenRegister::Set::const_iterator I = O.begin(), E = O.end();
-         I != E; ++I)
-      if (*I != Reg)
-        OS << getQualifiedName((*I)->TheDef) << ", ";
-    OS << "0 };\n";
-  }
 
-  // Emit the empty sub-registers list
-  OS << "  const unsigned Empty_SubRegsSet[] = { 0 };\n";
-  // Loop over all of the registers which have sub-registers, emitting the
-  // sub-registers list to memory.
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = *Regs[i];
-    if (Reg.getSubRegs().empty())
-     continue;
-    // getSubRegs() orders by SubRegIndex. We want a topological order.
-    SetVector<CodeGenRegister*> SR;
-    Reg.addSubRegsPreOrder(SR);
-    OS << "  const unsigned " << Reg.getName() << "_SubRegsSet[] = { ";
-    for (unsigned j = 0, je = SR.size(); j != je; ++j)
-      OS << getQualifiedName(SR[j]->TheDef) << ", ";
-    OS << "0 };\n";
+    // Compute the ordered sub-register list.
+    SetVector<const CodeGenRegister*> SR;
+    Reg->addSubRegsPreOrder(SR, RegBank);
+    RegVec &SubRegList = SubRegLists[i];
+    SubRegList.assign(SR.begin(), SR.end());
+    RegSeqs.add(SubRegList);
+
+    // Super-registers are already computed.
+    const RegVec &SuperRegList = Reg->getSuperRegs();
+    RegSeqs.add(SuperRegList);
+
+    // The list of overlaps doesn't need to have any particular order, except
+    // Reg itself must be the first element. Pick an ordering that has one of
+    // the other lists as a suffix.
+    RegVec &OverlapList = OverlapLists[i];
+    const RegVec &Suffix = SubRegList.size() > SuperRegList.size() ?
+                           SubRegList : SuperRegList;
+    CodeGenRegister::Set Omit(Suffix.begin(), Suffix.end());
+
+    // First element is Reg itself.
+    OverlapList.push_back(Reg);
+    Omit.insert(Reg);
+
+    // Any elements not in Suffix.
+    const CodeGenRegister::Set &OSet = Overlaps[Reg];
+    std::set_difference(OSet.begin(), OSet.end(),
+                        Omit.begin(), Omit.end(),
+                        std::back_inserter(OverlapList),
+                        CodeGenRegister::Less());
+
+    // Finally, Suffix itself.
+    OverlapList.insert(OverlapList.end(), Suffix.begin(), Suffix.end());
+    RegSeqs.add(OverlapList);
   }
 
-  // Emit the empty super-registers list
-  OS << "  const unsigned Empty_SuperRegsSet[] = { 0 };\n";
-  // Loop over all of the registers which have super-registers, emitting the
-  // super-registers list to memory.
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = *Regs[i];
-    const CodeGenRegister::SuperRegList &SR = Reg.getSuperRegs();
-    if (SR.empty())
-      continue;
-    OS << "  const unsigned " << Reg.getName() << "_SuperRegsSet[] = { ";
-    for (unsigned j = 0, je = SR.size(); j != je; ++j)
-      OS << getQualifiedName(SR[j]->TheDef) << ", ";
-    OS << "0 };\n";
-  }
-  OS << "}\n";       // End of anonymous namespace...
+  // Compute the final layout of the sequence table.
+  RegSeqs.layout();
 
-  OS << "\nMCRegisterDesc " << TargetName
+  OS << "namespace llvm {\n\n";
+
+  const std::string &TargetName = Target.getName();
+
+  // Emit the shared table of register lists.
+  OS << "extern const uint16_t " << TargetName << "RegLists[] = {\n";
+  RegSeqs.emit(OS, printRegister);
+  OS << "};\n\n";
+
+  OS << "extern const MCRegisterDesc " << TargetName
      << "RegDesc[] = { // Descriptors\n";
-  OS << "  { \"NOREG\",\t0,\t0,\t0 },\n";
+  OS << "  { \"NOREG\", 0, 0, 0 },\n";
 
-  // Now that register alias and sub-registers sets have been emitted, emit the
-  // register descriptors now.
+  // Emit the register descriptors now.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = *Regs[i];
-    OS << "  { \"";
-    OS << Reg.getName() << "\",\t" << Reg.getName() << "_Overlaps,\t";
-    if (!Reg.getSubRegs().empty())
-      OS << Reg.getName() << "_SubRegsSet,\t";
-    else
-      OS << "Empty_SubRegsSet,\t";
-    if (!Reg.getSuperRegs().empty())
-      OS << Reg.getName() << "_SuperRegsSet";
-    else
-      OS << "Empty_SuperRegsSet";
-    OS << " },\n";
+    const CodeGenRegister *Reg = Regs[i];
+    OS << "  { \"" << Reg->getName() << "\", "
+       << RegSeqs.get(OverlapLists[i]) << ", "
+       << RegSeqs.get(SubRegLists[i]) << ", "
+       << RegSeqs.get(Reg->getSuperRegs()) << " },\n";
   }
   OS << "};\n\n";      // End of register descriptors...
 
@@ -345,7 +540,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
 
     // Emit the register list now.
     OS << "  // " << Name << " Register Class...\n"
-       << "  static const unsigned " << Name
+       << "  const uint16_t " << Name
        << "[] = {\n    ";
     for (unsigned i = 0, e = Order.size(); i != e; ++i) {
       Record *Reg = Order[i];
@@ -354,7 +549,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
     OS << "\n  };\n\n";
 
     OS << "  // " << Name << " Bit set.\n"
-       << "  static const unsigned char " << Name
+       << "  const uint8_t " << Name
        << "Bits[] = {\n    ";
     BitVectorEmitter BVE;
     for (unsigned i = 0, e = Order.size(); i != e; ++i) {
@@ -367,37 +562,81 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   }
   OS << "}\n\n";
 
-  OS << "MCRegisterClass " << TargetName << "MCRegisterClasses[] = {\n";
+  OS << "extern const MCRegisterClass " << TargetName
+     << "MCRegisterClasses[] = {\n";
 
   for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
     const CodeGenRegisterClass &RC = *RegisterClasses[rc];
-    OS << "  MCRegisterClass(" << RC.getQualifiedName() + "RegClassID" << ", "
-       << '\"' << RC.getName() << "\", "
+
+    // Asserts to make sure values will fit in table assuming types from
+    // MCRegisterInfo.h
+    assert((RC.SpillSize/8) <= 0xffff && "SpillSize too large.");
+    assert((RC.SpillAlignment/8) <= 0xffff && "SpillAlignment too large.");
+    assert(RC.CopyCost >= -128 && RC.CopyCost <= 127 && "Copy cost too large.");
+
+    OS << "  { " << '\"' << RC.getName() << "\", "
+       << RC.getName() << ", " << RC.getName() << "Bits, "
+       << RC.getOrder().size() << ", sizeof(" << RC.getName() << "Bits), "
+       << RC.getQualifiedName() + "RegClassID" << ", "
        << RC.SpillSize/8 << ", "
        << RC.SpillAlignment/8 << ", "
        << RC.CopyCost << ", "
-       << RC.Allocatable << ", "
-       << RC.getName() << ", " << RC.getName() << " + "
-       << RC.getOrder().size() << ", "
-       << RC.getName() << "Bits, sizeof(" << RC.getName() << "Bits)"
-       << "),\n";
+       << RC.Allocatable << " },\n";
   }
 
   OS << "};\n\n";
 
+  // Emit the data table for getSubReg().
+  ArrayRef<CodeGenSubRegIndex*> SubRegIndices = RegBank.getSubRegIndices();
+  if (SubRegIndices.size()) {
+    OS << "const uint16_t " << TargetName << "SubRegTable[]["
+       << SubRegIndices.size() << "] = {\n";
+    for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+      const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs();
+      OS << "  /* " << Regs[i]->TheDef->getName() << " */\n";
+      if (SRM.empty()) {
+        OS << "  {0},\n";
+        continue;
+      }
+      OS << "  {";
+      for (unsigned j = 0, je = SubRegIndices.size(); j != je; ++j) {
+        // FIXME: We really should keep this to 80 columns...
+        CodeGenRegister::SubRegMap::const_iterator SubReg =
+          SRM.find(SubRegIndices[j]);
+        if (SubReg != SRM.end())
+          OS << getQualifiedName(SubReg->second->TheDef);
+        else
+          OS << "0";
+        if (j != je - 1)
+          OS << ", ";
+      }
+      OS << "}" << (i != e ? "," : "") << "\n";
+    }
+    OS << "};\n\n";
+    OS << "const uint16_t *get" << TargetName
+       << "SubRegTable() {\n  return (const uint16_t *)" << TargetName
+       << "SubRegTable;\n}\n\n";
+  }
+
+  EmitRegMappingTables(OS, Regs, false);
+
   // MCRegisterInfo initialization routine.
   OS << "static inline void Init" << TargetName
      << "MCRegisterInfo(MCRegisterInfo *RI, unsigned RA, "
      << "unsigned DwarfFlavour = 0, unsigned EHFlavour = 0) {\n";
   OS << "  RI->InitMCRegisterInfo(" << TargetName << "RegDesc, "
      << Regs.size()+1 << ", RA, " << TargetName << "MCRegisterClasses, "
-     << RegisterClasses.size() << ");\n\n";
+     << RegisterClasses.size() << ", " << TargetName << "RegLists, ";
+  if (SubRegIndices.size() != 0)
+    OS << "(uint16_t*)" << TargetName << "SubRegTable, "
+       << SubRegIndices.size() << ");\n\n";
+  else
+    OS << "NULL, 0);\n\n";
 
   EmitRegMapping(OS, Regs, false);
 
   OS << "}\n\n";
 
-
   OS << "} // End llvm namespace \n";
   OS << "#endif // GET_REGINFO_MC_DESC\n\n";
 }
@@ -413,8 +652,7 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
   const std::string &TargetName = Target.getName();
   std::string ClassName = TargetName + "GenRegisterInfo";
 
-  OS << "#include \"llvm/Target/TargetRegisterInfo.h\"\n";
-  OS << "#include <string>\n\n";
+  OS << "#include \"llvm/Target/TargetRegisterInfo.h\"\n\n";
 
   OS << "namespace llvm {\n\n";
 
@@ -423,28 +661,20 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
      << "(unsigned RA, unsigned D = 0, unsigned E = 0);\n"
      << "  virtual bool needsStackRealignment(const MachineFunction &) const\n"
      << "     { return false; }\n"
-     << "  unsigned getSubReg(unsigned RegNo, unsigned Index) const;\n"
-     << "  unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const;\n"
      << "  unsigned composeSubRegIndices(unsigned, unsigned) const;\n"
      << "  const TargetRegisterClass *"
         "getSubClassWithSubReg(const TargetRegisterClass*, unsigned) const;\n"
+     << "  const TargetRegisterClass *getMatchingSuperRegClass("
+        "const TargetRegisterClass*, const TargetRegisterClass*, "
+        "unsigned) const;\n"
+     << "  const RegClassWeight &getRegClassWeight("
+     << "const TargetRegisterClass *RC) const;\n"
+     << "  unsigned getNumRegPressureSets() const;\n"
+     << "  unsigned getRegPressureSetLimit(unsigned Idx) const;\n"
+     << "  const int *getRegClassPressureSets("
+     << "const TargetRegisterClass *RC) const;\n"
      << "};\n\n";
 
-  const std::vector<Record*> &SubRegIndices = RegBank.getSubRegIndices();
-  if (!SubRegIndices.empty()) {
-    OS << "\n// Subregister indices\n";
-    std::string Namespace = SubRegIndices[0]->getValueAsString("Namespace");
-    if (!Namespace.empty())
-      OS << "namespace " << Namespace << " {\n";
-    OS << "enum {\n  NoSubRegister,\n";
-    for (unsigned i = 0, e = RegBank.getNumNamedIndices(); i != e; ++i)
-      OS << "  " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n";
-    OS << "  NUM_TARGET_NAMED_SUBREGS = " << SubRegIndices.size()+1 << "\n";
-    OS << "};\n";
-    if (!Namespace.empty())
-      OS << "}\n";
-  }
-
   ArrayRef<CodeGenRegisterClass*> RegisterClasses = RegBank.getRegClasses();
 
   if (!RegisterClasses.empty()) {
@@ -455,19 +685,11 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
       const CodeGenRegisterClass &RC = *RegisterClasses[i];
       const std::string &Name = RC.getName();
 
-      // Output the register class definition.
-      OS << "  struct " << Name << "Class : public TargetRegisterClass {\n"
-         << "    " << Name << "Class();\n";
-      if (!RC.AltOrderSelect.empty())
-        OS << "    ArrayRef<unsigned> "
-              "getRawAllocationOrder(const MachineFunction&) const;\n";
-      OS << "  };\n";
-
       // Output the extern for the instance.
-      OS << "  extern " << Name << "Class\t" << Name << "RegClass;\n";
+      OS << "  extern const TargetRegisterClass " << Name << "RegClass;\n";
       // Output the extern for the pointer to the instance (should remove).
-      OS << "  static TargetRegisterClass * const "<< Name <<"RegisterClass = &"
-         << Name << "RegClass;\n";
+      OS << "  static const TargetRegisterClass * const " << Name
+         << "RegisterClass = &" << Name << "RegClass;\n";
     }
     OS << "} // end of namespace " << TargetName << "\n\n";
   }
@@ -489,8 +711,8 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   OS << "namespace llvm {\n\n";
 
   // Get access to MCRegisterClass data.
-  OS << "extern MCRegisterClass " << Target.getName()
-    << "MCRegisterClasses[];\n";
+  OS << "extern const MCRegisterClass " << Target.getName()
+     << "MCRegisterClasses[];\n";
 
   // Start out by emitting each of the register classes.
   ArrayRef<CodeGenRegisterClass*> RegisterClasses = RegBank.getRegClasses();
@@ -507,38 +729,21 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
       AllocatableRegs.insert(Order.begin(), Order.end());
   }
 
-  OS << "namespace {     // Register classes...\n";
-
-  // Emit the ValueType arrays for each RegisterClass
-  for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
-    const CodeGenRegisterClass &RC = *RegisterClasses[rc];
-
-    // Give the register class a legal C name if it's anonymous.
-    std::string Name = RC.getName() + "VTs";
-
-    // Emit the register list now.
-    OS << "  // " << Name
-       << " Register Class Value Types...\n"
-       << "  static const EVT " << Name
-       << "[] = {\n    ";
-    for (unsigned i = 0, e = RC.VTs.size(); i != e; ++i)
-      OS << getEnumName(RC.VTs[i]) << ", ";
-    OS << "MVT::Other\n  };\n\n";
-  }
-  OS << "}  // end anonymous namespace\n\n";
+  // Build a shared array of value types.
+  SequenceToOffsetTable<std::vector<MVT::SimpleValueType> > VTSeqs;
+  for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc)
+    VTSeqs.add(RegisterClasses[rc]->VTs);
+  VTSeqs.layout();
+  OS << "\nstatic const MVT::SimpleValueType VTLists[] = {\n";
+  VTSeqs.emit(OS, printSimpleValueType, "MVT::Other");
+  OS << "};\n";
 
   // Now that all of the structs have been emitted, emit the instances.
   if (!RegisterClasses.empty()) {
-    OS << "namespace " << RegisterClasses[0]->Namespace
-       << " {   // Register class instances\n";
-    for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i)
-      OS << "  " << RegisterClasses[i]->getName()  << "Class\t"
-         << RegisterClasses[i]->getName() << "RegClass;\n";
-
     std::map<unsigned, std::set<unsigned> > SuperRegClassMap;
 
-    OS << "\n  static const TargetRegisterClass* const "
-      << "NullRegClasses[] = { NULL };\n\n";
+    OS << "\nstatic const TargetRegisterClass *const "
+       << "NullRegClasses[] = { NULL };\n\n";
 
     unsigned NumSubRegIndices = RegBank.getSubRegIndices().size();
 
@@ -563,10 +768,10 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
         // Give the register class a legal C name if it's anonymous.
         std::string Name = RC.getName();
 
-        OS << "  // " << Name
+        OS << "// " << Name
            << " Super-register Classes...\n"
-           << "  static const TargetRegisterClass* const "
-           << Name << "SuperRegClasses[] = {\n    ";
+           << "static const TargetRegisterClass *const "
+           << Name << "SuperRegClasses[] = {\n  ";
 
         bool Empty = true;
         std::map<unsigned, std::set<unsigned> >::iterator I =
@@ -583,7 +788,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
         }
 
         OS << (!Empty ? ", " : "") << "NULL";
-        OS << "\n  };\n\n";
+        OS << "\n};\n\n";
       }
     }
 
@@ -594,9 +799,9 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
       // Give the register class a legal C name if it's anonymous.
       std::string Name = RC.getName();
 
-      OS << "  static const unsigned " << Name << "SubclassMask[] = { ";
+      OS << "static const uint32_t " << Name << "SubclassMask[] = {\n  ";
       printBitVectorAsHex(OS, RC.getSubClasses(), 32);
-      OS << "};\n\n";
+      OS << "\n};\n\n";
     }
 
     // Emit NULL terminated super-class lists.
@@ -608,54 +813,71 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
       if (Supers.empty())
         continue;
 
-      OS << "  static const TargetRegisterClass* const "
+      OS << "static const TargetRegisterClass *const "
          << RC.getName() << "Superclasses[] = {\n";
       for (unsigned i = 0; i != Supers.size(); ++i)
-        OS << "    &" << Supers[i]->getQualifiedName() << "RegClass,\n";
-      OS << "    NULL\n  };\n\n";
+        OS << "  &" << Supers[i]->getQualifiedName() << "RegClass,\n";
+      OS << "  NULL\n};\n\n";
     }
 
     // Emit methods.
     for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
       const CodeGenRegisterClass &RC = *RegisterClasses[i];
-      OS << RC.getName() << "Class::" << RC.getName()
-         << "Class()  : TargetRegisterClass(&"
-         << Target.getName() << "MCRegisterClasses["
-         << RC.getName() + "RegClassID" << "], "
-         << RC.getName() + "VTs" << ", "
-         << RC.getName() + "SubclassMask" << ", ";
-      if (RC.getSuperClasses().empty())
-        OS << "NullRegClasses, ";
-      else
-        OS << RC.getName() + "Superclasses, ";
-      OS << (NumSubRegIndices ? RC.getName() + "Super" : std::string("Null"))
-         << "RegClasses"
-         << ") {}\n";
       if (!RC.AltOrderSelect.empty()) {
         OS << "\nstatic inline unsigned " << RC.getName()
            << "AltOrderSelect(const MachineFunction &MF) {"
-           << RC.AltOrderSelect << "}\n\nArrayRef<unsigned> "
-           << RC.getName() << "Class::"
-           << "getRawAllocationOrder(const MachineFunction &MF) const {\n";
+           << RC.AltOrderSelect << "}\n\n"
+           << "static ArrayRef<uint16_t> " << RC.getName()
+           << "GetRawAllocationOrder(const MachineFunction &MF) {\n";
         for (unsigned oi = 1 , oe = RC.getNumOrders(); oi != oe; ++oi) {
           ArrayRef<Record*> Elems = RC.getOrder(oi);
-          OS << "  static const unsigned AltOrder" << oi << "[] = {";
-          for (unsigned elem = 0; elem != Elems.size(); ++elem)
-            OS << (elem ? ", " : " ") << getQualifiedName(Elems[elem]);
-          OS << " };\n";
+          if (!Elems.empty()) {
+            OS << "  static const uint16_t AltOrder" << oi << "[] = {";
+            for (unsigned elem = 0; elem != Elems.size(); ++elem)
+              OS << (elem ? ", " : " ") << getQualifiedName(Elems[elem]);
+            OS << " };\n";
+          }
         }
         OS << "  const MCRegisterClass &MCR = " << Target.getName()
-           << "MCRegisterClasses[" << RC.getQualifiedName() + "RegClassID];"
-           << "  static const ArrayRef<unsigned> Order[] = {\n"
+           << "MCRegisterClasses[" << RC.getQualifiedName() + "RegClassID];\n"
+           << "  const ArrayRef<uint16_t> Order[] = {\n"
            << "    makeArrayRef(MCR.begin(), MCR.getNumRegs()";
         for (unsigned oi = 1, oe = RC.getNumOrders(); oi != oe; ++oi)
-          OS << "),\n    makeArrayRef(AltOrder" << oi;
+          if (RC.getOrder(oi).empty())
+            OS << "),\n    ArrayRef<uint16_t>(";
+          else
+            OS << "),\n    makeArrayRef(AltOrder" << oi;
         OS << ")\n  };\n  const unsigned Select = " << RC.getName()
            << "AltOrderSelect(MF);\n  assert(Select < " << RC.getNumOrders()
            << ");\n  return Order[Select];\n}\n";
         }
     }
 
+    // Now emit the actual value-initialized register class instances.
+    OS << "namespace " << RegisterClasses[0]->Namespace
+       << " {   // Register class instances\n";
+
+    for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
+      const CodeGenRegisterClass &RC = *RegisterClasses[i];
+      OS << "  extern const TargetRegisterClass "
+         << RegisterClasses[i]->getName() << "RegClass = {\n    "
+         << '&' << Target.getName() << "MCRegisterClasses[" << RC.getName()
+         << "RegClassID],\n    "
+         << "VTLists + " << VTSeqs.get(RC.VTs) << ",\n    "
+         << RC.getName() << "SubclassMask,\n    ";
+      if (RC.getSuperClasses().empty())
+        OS << "NullRegClasses,\n    ";
+      else
+        OS << RC.getName() << "Superclasses,\n    ";
+      OS << (NumSubRegIndices ? RC.getName() + "Super" : std::string("Null"))
+         << "RegClasses,\n    ";
+      if (RC.AltOrderSelect.empty())
+        OS << "0\n";
+      else
+        OS << RC.getName() << "GetRawAllocationOrder\n";
+      OS << "  };\n\n";
+    }
+
     OS << "}\n";
   }
 
@@ -669,28 +891,27 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit extra information about registers.
   const std::string &TargetName = Target.getName();
-  OS << "\n  static const TargetRegisterInfoDesc "
-     << TargetName << "RegInfoDesc[] = "
-     << "{ // Extra Descriptors\n";
-  OS << "    { 0, 0 },\n";
+  OS << "\nstatic const TargetRegisterInfoDesc "
+     << TargetName << "RegInfoDesc[] = { // Extra Descriptors\n";
+  OS << "  { 0, 0 },\n";
 
   const std::vector<CodeGenRegister*> &Regs = RegBank.getRegisters();
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
     const CodeGenRegister &Reg = *Regs[i];
-    OS << "    { ";
+    OS << "  { ";
     OS << Reg.CostPerUse << ", "
        << int(AllocatableRegs.count(Reg.TheDef)) << " },\n";
   }
-  OS << "  };\n";      // End of register descriptors...
+  OS << "};\n";      // End of register descriptors...
 
 
   // Calculate the mapping of subregister+index pairs to physical registers.
-  // This will also create further anonymous indexes.
+  // This will also create further anonymous indices.
   unsigned NamedIndices = RegBank.getNumNamedIndices();
 
   // Emit SubRegIndex names, skipping 0
-  const std::vector<Record*> &SubRegIndices = RegBank.getSubRegIndices();
-  OS << "\n  static const char *const " << TargetName
+  ArrayRef<CodeGenSubRegIndex*> SubRegIndices = RegBank.getSubRegIndices();
+  OS << "\nstatic const char *const " << TargetName
      << "SubRegIndexTable[] = { \"";
   for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) {
     OS << SubRegIndices[i]->getName();
@@ -699,7 +920,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   }
   OS << "\" };\n\n";
 
-  // Emit names of the anonymus subreg indexes.
+  // Emit names of the anonymous subreg indices.
   if (SubRegIndices.size() > NamedIndices) {
     OS << "  enum {";
     for (unsigned i = NamedIndices, e = SubRegIndices.size(); i != e; ++i) {
@@ -713,48 +934,6 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   std::string ClassName = Target.getName() + "GenRegisterInfo";
 
-  // Emit the subregister + index mapping function based on the information
-  // calculated above.
-  OS << "unsigned " << ClassName
-     << "::getSubReg(unsigned RegNo, unsigned Index) const {\n"
-     << "  switch (RegNo) {\n"
-     << "  default:\n    return 0;\n";
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs();
-    if (SRM.empty())
-      continue;
-    OS << "  case " << getQualifiedName(Regs[i]->TheDef) << ":\n";
-    OS << "    switch (Index) {\n";
-    OS << "    default: return 0;\n";
-    for (CodeGenRegister::SubRegMap::const_iterator ii = SRM.begin(),
-         ie = SRM.end(); ii != ie; ++ii)
-      OS << "    case " << getQualifiedName(ii->first)
-         << ": return " << getQualifiedName(ii->second->TheDef) << ";\n";
-    OS << "    };\n" << "    break;\n";
-  }
-  OS << "  };\n";
-  OS << "  return 0;\n";
-  OS << "}\n\n";
-
-  OS << "unsigned " << ClassName
-     << "::getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const {\n"
-     << "  switch (RegNo) {\n"
-     << "  default:\n    return 0;\n";
-   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-     const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs();
-     if (SRM.empty())
-       continue;
-    OS << "  case " << getQualifiedName(Regs[i]->TheDef) << ":\n";
-    for (CodeGenRegister::SubRegMap::const_iterator ii = SRM.begin(),
-         ie = SRM.end(); ii != ie; ++ii)
-      OS << "    if (SubRegNo == " << getQualifiedName(ii->second->TheDef)
-         << ")  return " << getQualifiedName(ii->first) << ";\n";
-    OS << "    return 0;\n";
-  }
-  OS << "  };\n";
-  OS << "  return 0;\n";
-  OS << "}\n\n";
-
   // Emit composeSubRegIndices
   OS << "unsigned " << ClassName
      << "::composeSubRegIndices(unsigned IdxA, unsigned IdxB) const {\n"
@@ -763,15 +942,15 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) {
     bool Open = false;
     for (unsigned j = 0; j != e; ++j) {
-      if (Record *Comp = RegBank.getCompositeSubRegIndex(SubRegIndices[i],
-                                                         SubRegIndices[j])) {
+      if (CodeGenSubRegIndex *Comp =
+            SubRegIndices[i]->compose(SubRegIndices[j])) {
         if (!Open) {
-          OS << "  case " << getQualifiedName(SubRegIndices[i])
+          OS << "  case " << SubRegIndices[i]->getQualifiedName()
              << ": switch(IdxB) {\n    default: return IdxB;\n";
           Open = true;
         }
-        OS << "    case " << getQualifiedName(SubRegIndices[j])
-           << ": return " << getQualifiedName(Comp) << ";\n";
+        OS << "    case " << SubRegIndices[j]->getQualifiedName()
+           << ": return " << Comp->getQualifiedName() << ";\n";
       }
     }
     if (Open)
@@ -800,7 +979,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
       const CodeGenRegisterClass &RC = *RegisterClasses[rci];
       OS << "    {\t// " << RC.getName() << "\n";
       for (unsigned sri = 0, sre = SubRegIndices.size(); sri != sre; ++sri) {
-        Record *Idx = SubRegIndices[sri];
+        CodeGenSubRegIndex *Idx = SubRegIndices[sri];
         if (CodeGenRegisterClass *SRC = RC.getSubClassWithSubReg(Idx))
           OS << "      " << SRC->EnumValue + 1 << ",\t// " << Idx->getName()
              << " -> " << SRC->getName() << "\n";
@@ -817,22 +996,106 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   }
   OS << "}\n\n";
 
+  // Emit getMatchingSuperRegClass.
+  OS << "const TargetRegisterClass *" << ClassName
+     << "::getMatchingSuperRegClass(const TargetRegisterClass *A,"
+        " const TargetRegisterClass *B, unsigned Idx) const {\n";
+  if (SubRegIndices.empty()) {
+    OS << "  llvm_unreachable(\"Target has no sub-registers\");\n";
+  } else {
+    // We need to find the largest sub-class of A such that every register has
+    // an Idx sub-register in B.  Map (B, Idx) to a bit-vector of
+    // super-register classes that map into B. Then compute the largest common
+    // sub-class with A by taking advantage of the register class ordering,
+    // like getCommonSubClass().
+
+    // Bitvector table is NumRCs x NumSubIndexes x BVWords, where BVWords is
+    // the number of 32-bit words required to represent all register classes.
+    const unsigned BVWords = (RegisterClasses.size()+31)/32;
+    BitVector BV(RegisterClasses.size());
+
+    OS << "  static const uint32_t Table[" << RegisterClasses.size()
+       << "][" << SubRegIndices.size() << "][" << BVWords << "] = {\n";
+    for (unsigned rci = 0, rce = RegisterClasses.size(); rci != rce; ++rci) {
+      const CodeGenRegisterClass &RC = *RegisterClasses[rci];
+      OS << "    {\t// " << RC.getName() << "\n";
+      for (unsigned sri = 0, sre = SubRegIndices.size(); sri != sre; ++sri) {
+        CodeGenSubRegIndex *Idx = SubRegIndices[sri];
+        BV.reset();
+        RC.getSuperRegClasses(Idx, BV);
+        OS << "      { ";
+        printBitVectorAsHex(OS, BV, 32);
+        OS << "},\t// " << Idx->getName() << '\n';
+      }
+      OS << "    },\n";
+    }
+    OS << "  };\n  assert(A && B && \"Missing regclass\");\n"
+       << "  --Idx;\n"
+       << "  assert(Idx < " << SubRegIndices.size() << " && \"Bad subreg\");\n"
+       << "  const uint32_t *TV = Table[B->getID()][Idx];\n"
+       << "  const uint32_t *SC = A->getSubClassMask();\n"
+       << "  for (unsigned i = 0; i != " << BVWords << "; ++i)\n"
+       << "    if (unsigned Common = TV[i] & SC[i])\n"
+       << "      return getRegClass(32*i + CountTrailingZeros_32(Common));\n"
+       << "  return 0;\n";
+  }
+  OS << "}\n\n";
+
+  EmitRegUnitPressure(OS, RegBank, ClassName);
+
   // Emit the constructor of the class...
-  OS << "extern MCRegisterDesc " << TargetName << "RegDesc[];\n";
+  OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
+  OS << "extern const uint16_t " << TargetName << "RegLists[];\n";
+  if (SubRegIndices.size() != 0)
+    OS << "extern const uint16_t *get" << TargetName
+       << "SubRegTable();\n";
 
-  OS << ClassName << "::" << ClassName
+  EmitRegMappingTables(OS, Regs, true);
+
+  OS << ClassName << "::\n" << ClassName
      << "(unsigned RA, unsigned DwarfFlavour, unsigned EHFlavour)\n"
      << "  : TargetRegisterInfo(" << TargetName << "RegInfoDesc"
      << ", RegisterClasses, RegisterClasses+" << RegisterClasses.size() <<",\n"
-     << "                 " << TargetName << "SubRegIndexTable) {\n"
+     << "             " << TargetName << "SubRegIndexTable) {\n"
      << "  InitMCRegisterInfo(" << TargetName << "RegDesc, "
-     << Regs.size()+1 << ", RA, " << TargetName << "MCRegisterClasses, "
-     << RegisterClasses.size() << ");\n\n";
+     << Regs.size()+1 << ", RA,\n                     " << TargetName
+     << "MCRegisterClasses, " << RegisterClasses.size() << ",\n"
+     << "                     " << TargetName << "RegLists,\n"
+     << "                     ";
+  if (SubRegIndices.size() != 0)
+    OS << "get" << TargetName << "SubRegTable(), "
+       << SubRegIndices.size() << ");\n\n";
+  else
+    OS << "NULL, 0);\n\n";
 
   EmitRegMapping(OS, Regs, true);
 
   OS << "}\n\n";
 
+
+  // Emit CalleeSavedRegs information.
+  std::vector<Record*> CSRSets =
+    Records.getAllDerivedDefinitions("CalleeSavedRegs");
+  for (unsigned i = 0, e = CSRSets.size(); i != e; ++i) {
+    Record *CSRSet = CSRSets[i];
+    const SetTheory::RecVec *Regs = RegBank.getSets().expand(CSRSet);
+    assert(Regs && "Cannot expand CalleeSavedRegs instance");
+
+    // Emit the *_SaveList list of callee-saved registers.
+    OS << "static const uint16_t " << CSRSet->getName()
+       << "_SaveList[] = { ";
+    for (unsigned r = 0, re = Regs->size(); r != re; ++r)
+      OS << getQualifiedName((*Regs)[r]) << ", ";
+    OS << "0 };\n";
+
+    // Emit the *_RegMask bit mask of call-preserved registers.
+    OS << "static const uint32_t " << CSRSet->getName()
+       << "_RegMask[] = { ";
+    printBitVectorAsHex(OS, RegBank.computeCoveredRegisters(*Regs), 32);
+    OS << "};\n";
+  }
+  OS << "\n\n";
+
   OS << "} // End llvm namespace \n";
   OS << "#endif // GET_REGINFO_TARGET_DESC\n\n";
 }
diff --git a/utils/TableGen/RegisterInfoEmitter.h b/utils/TableGen/RegisterInfoEmitter.h
index 0fd4d079ebc0..ee9903cac7b1 100644
--- a/utils/TableGen/RegisterInfoEmitter.h
+++ b/utils/TableGen/RegisterInfoEmitter.h
@@ -50,7 +50,13 @@ public:
 private:
   void EmitRegMapping(raw_ostream &o,
                       const std::vector<CodeGenRegister*> &Regs, bool isCtor);
+  void EmitRegMappingTables(raw_ostream &o,
+                            const std::vector<CodeGenRegister*> &Regs,
+                            bool isCtor);
   void EmitRegClasses(raw_ostream &OS, CodeGenTarget &Target);
+
+  void EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
+                           const std::string &ClassName);
 };
 
 } // End llvm namespace
diff --git a/utils/TableGen/SequenceToOffsetTable.h b/utils/TableGen/SequenceToOffsetTable.h
new file mode 100644
index 000000000000..97c764e61d56
--- /dev/null
+++ b/utils/TableGen/SequenceToOffsetTable.h
@@ -0,0 +1,139 @@
+//===-- SequenceToOffsetTable.h - Compress similar sequences ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SequenceToOffsetTable can be used to emit a number of null-terminated
+// sequences as one big array.  Use the same memory when a sequence is a suffix
+// of another.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TBLGEN_SEQUENCE_TO_OFFSET_TABLE_H
+#define TBLGEN_SEQUENCE_TO_OFFSET_TABLE_H
+
+#include "llvm/Support/raw_ostream.h"
+#include <functional>
+#include <algorithm>
+#include <vector>
+#include <cassert>
+#include <cctype>
+
+namespace llvm {
+
+/// SequenceToOffsetTable - Collect a number of terminated sequences of T.
+/// Compute the layout of a table that contains all the sequences, possibly by
+/// reusing entries.
+///
+/// @param SeqT The sequence container. (vector or string).
+/// @param Less A stable comparator for SeqT elements.
+template<typename SeqT, typename Less = std::less<typename SeqT::value_type> >
+class SequenceToOffsetTable {
+  typedef typename SeqT::value_type ElemT;
+
+  // Define a comparator for SeqT that sorts a suffix immediately before a
+  // sequence with that suffix.
+  struct SeqLess : public std::binary_function<SeqT, SeqT, bool> {
+    Less L;
+    bool operator()(const SeqT &A, const SeqT &B) const {
+      return std::lexicographical_compare(A.rbegin(), A.rend(),
+                                          B.rbegin(), B.rend(), L);
+    }
+  };
+
+  // Keep sequences ordered according to SeqLess so suffixes are easy to find.
+  // Map each sequence to its offset in the table.
+  typedef std::map<SeqT, unsigned, SeqLess> SeqMap;
+
+  // Sequences added so far, with suffixes removed.
+  SeqMap Seqs;
+
+  // Entries in the final table, or 0 before layout was called.
+  unsigned Entries;
+
+  // isSuffix - Returns true if A is a suffix of B.
+  static bool isSuffix(const SeqT &A, const SeqT &B) {
+    return A.size() <= B.size() && std::equal(A.rbegin(), A.rend(), B.rbegin());
+  }
+
+public:
+  SequenceToOffsetTable() : Entries(0) {}
+
+  /// add - Add a sequence to the table.
+  /// This must be called before layout().
+  void add(const SeqT &Seq) {
+    assert(Entries == 0 && "Cannot call add() after layout()");
+    typename SeqMap::iterator I = Seqs.lower_bound(Seq);
+
+    // If SeqMap contains a sequence that has Seq as a suffix, I will be
+    // pointing to it.
+    if (I != Seqs.end() && isSuffix(Seq, I->first))
+      return;
+
+    I = Seqs.insert(I, std::make_pair(Seq, 0u));
+
+    // The entry before I may be a suffix of Seq that can now be erased.
+    if (I != Seqs.begin() && isSuffix((--I)->first, Seq))
+      Seqs.erase(I);
+  }
+
+  /// layout - Computes the final table layout.
+  void layout() {
+    assert(Entries == 0 && "Can only call layout() once");
+    // Lay out the table in Seqs iteration order.
+    for (typename SeqMap::iterator I = Seqs.begin(), E = Seqs.end(); I != E;
+         ++I) {
+      I->second = Entries;
+      // Include space for a terminator.
+      Entries += I->first.size() + 1;
+    }
+  }
+
+  /// get - Returns the offset of Seq in the final table.
+  unsigned get(const SeqT &Seq) const {
+    assert(Entries && "Call layout() before get()");
+    typename SeqMap::const_iterator I = Seqs.lower_bound(Seq);
+    assert(I != Seqs.end() && isSuffix(Seq, I->first) &&
+           "get() called with sequence that wasn't added first");
+    return I->second + (I->first.size() - Seq.size());
+  }
+
+  /// emit - Print out the table as the body of an array initializer.
+  /// Use the Print function to print elements.
+  void emit(raw_ostream &OS,
+            void (*Print)(raw_ostream&, ElemT),
+            const char *Term = "0") const {
+    assert(Entries && "Call layout() before emit()");
+    for (typename SeqMap::const_iterator I = Seqs.begin(), E = Seqs.end();
+         I != E; ++I) {
+      OS << "  /* " << I->second << " */ ";
+      for (typename SeqT::const_iterator SI = I->first.begin(),
+             SE = I->first.end(); SI != SE; ++SI) {
+        Print(OS, *SI);
+        OS << ", ";
+      }
+      OS << Term << ",\n";
+    }
+  }
+};
+
+// Helper function for SequenceToOffsetTable<string>.
+static inline void printChar(raw_ostream &OS, char C) {
+  unsigned char UC(C);
+  if (isalnum(UC) || ispunct(UC)) {
+    OS << '\'';
+    if (C == '\\' || C == '\'')
+      OS << '\\';
+    OS << C << '\'';
+  } else {
+    OS << unsigned(UC);
+  }
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/utils/TableGen/SetTheory.cpp b/utils/TableGen/SetTheory.cpp
index bef73f33effe..0649fd1cfaf9 100644
--- a/utils/TableGen/SetTheory.cpp
+++ b/utils/TableGen/SetTheory.cpp
@@ -139,6 +139,24 @@ struct DecimateOp : public SetIntBinOp {
   }
 };
 
+// (interleave S1, S2, ...) Interleave elements of the arguments.
+struct InterleaveOp : public SetTheory::Operator {
+  void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
+    // Evaluate the arguments individually.
+    SmallVector<RecSet, 4> Args(Expr->getNumArgs());
+    unsigned MaxSize = 0;
+    for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i) {
+      ST.evaluate(Expr->getArg(i), Args[i]);
+      MaxSize = std::max(MaxSize, unsigned(Args[i].size()));
+    }
+    // Interleave arguments into Elts.
+    for (unsigned n = 0; n != MaxSize; ++n)
+      for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i)
+        if (n < Args[i].size())
+          Elts.insert(Args[i][n]);
+  }
+};
+
 // (sequence "Format", From, To) Generate a sequence of records by name.
 struct SequenceOp : public SetTheory::Operator {
   void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts) {
@@ -198,6 +216,10 @@ struct FieldExpander : public SetTheory::Expander {
 };
 } // end anonymous namespace
 
+void SetTheory::Operator::anchor() { }
+
+void SetTheory::Expander::anchor() { }
+
 SetTheory::SetTheory() {
   addOperator("add", new AddOp);
   addOperator("sub", new SubOp);
@@ -207,6 +229,7 @@ SetTheory::SetTheory() {
   addOperator("rotl", new RotOp(false));
   addOperator("rotr", new RotOp(true));
   addOperator("decimate", new DecimateOp);
+  addOperator("interleave", new InterleaveOp);
   addOperator("sequence", new SequenceOp);
 }
 
diff --git a/utils/TableGen/SetTheory.h b/utils/TableGen/SetTheory.h
index 6e8313be07a3..b394058f4c35 100644
--- a/utils/TableGen/SetTheory.h
+++ b/utils/TableGen/SetTheory.h
@@ -65,7 +65,9 @@ public:
   typedef SmallSetVector<Record*, 16> RecSet;
 
   /// Operator - A callback representing a DAG operator.
-  struct Operator {
+  class Operator {
+    virtual void anchor();
+  public:
     virtual ~Operator() {}
 
     /// apply - Apply this operator to Expr's arguments and insert the result
@@ -76,7 +78,9 @@ public:
   /// Expander - A callback function that can transform a Record representing a
   /// set into a fully expanded list of elements. Expanders provide a way for
   /// users to define named sets that can be used in DAG expressions.
-  struct Expander {
+  class Expander {
+    virtual void anchor();
+  public:
     virtual ~Expander() {}
 
     virtual void expand(SetTheory&, Record*, RecSet &Elts) =0;
diff --git a/utils/TableGen/StringToOffsetTable.h b/utils/TableGen/StringToOffsetTable.h
index ac9422c5d72d..803f5bd5cf01 100644
--- a/utils/TableGen/StringToOffsetTable.h
+++ b/utils/TableGen/StringToOffsetTable.h
@@ -26,16 +26,17 @@ class StringToOffsetTable {
   std::string AggregateString;
 public:
   
-  unsigned GetOrAddStringOffset(StringRef Str) {
-    unsigned &Entry = StringOffset[Str];
-    if (Entry == 0) {
+  unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
+    StringMapEntry<unsigned> &Entry = StringOffset.GetOrCreateValue(Str, -1U);
+    if (Entry.getValue() == -1U) {
       // Add the string to the aggregate if this is the first time found.
-      Entry = AggregateString.size();
+      Entry.setValue(AggregateString.size());
       AggregateString.append(Str.begin(), Str.end());
-      AggregateString += '\0';
+      if (appendZero)
+        AggregateString += '\0';
     }
     
-    return Entry;
+    return Entry.getValue();
   }
   
   void EmitString(raw_ostream &O) {
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 103a4032b02a..986c50f87865 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -39,28 +39,41 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS,
 
   OS << "namespace " << Target << " {\n";
 
-  // Open enumeration
-  OS << "enum {\n";
+  // For bit flag enumerations with more than 32 items, emit constants.
+  // Emit an enum for everything else.
+  if (isBits && N > 32) {
+    // For each record
+    for (unsigned i = 0; i < N; i++) {
+      // Next record
+      Record *Def = DefList[i];
+
+      // Get and emit name and expression (1 << i)
+      OS << "  const uint64_t " << Def->getName() << " = 1ULL << " << i << ";\n";
+    }
+  } else {
+    // Open enumeration
+    OS << "enum {\n";
 
-  // For each record
-  for (unsigned i = 0; i < N;) {
-    // Next record
-    Record *Def = DefList[i];
+    // For each record
+    for (unsigned i = 0; i < N;) {
+      // Next record
+      Record *Def = DefList[i];
 
-    // Get and emit name
-    OS << "  " << Def->getName();
+      // Get and emit name
+      OS << "  " << Def->getName();
 
-    // If bit flags then emit expression (1 << i)
-    if (isBits)  OS << " = " << " 1ULL << " << i;
+      // If bit flags then emit expression (1 << i)
+      if (isBits)  OS << " = " << " 1ULL << " << i;
 
-    // Depending on 'if more in the list' emit comma
-    if (++i < N) OS << ",";
+      // Depending on 'if more in the list' emit comma
+      if (++i < N) OS << ",";
 
-    OS << "\n";
-  }
+      OS << "\n";
+    }
 
-  // Close enumeration
-  OS << "};\n";
+    // Close enumeration
+    OS << "};\n";
+  }
 
   OS << "}\n";
 }
@@ -81,7 +94,8 @@ unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
 
   // Begin feature table
   OS << "// Sorted (by key) array of values for CPU features.\n"
-     << "llvm::SubtargetFeatureKV " << Target << "FeatureKV[] = {\n";
+     << "extern const llvm::SubtargetFeatureKV " << Target
+     << "FeatureKV[] = {\n";
 
   // For each feature
   unsigned NumFeatures = 0;
@@ -140,7 +154,8 @@ unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
 
   // Begin processor table
   OS << "// Sorted (by key) array of values for CPU subtype.\n"
-     << "llvm::SubtargetFeatureKV " << Target << "SubTypeKV[] = {\n";
+     << "extern const llvm::SubtargetFeatureKV " << Target
+     << "SubTypeKV[] = {\n";
 
   // For each processor
   for (unsigned i = 0, N = ProcessorList.size(); i < N;) {
@@ -327,9 +342,9 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
       OS << "\n// Pipeline forwarding pathes for itineraries \"" << Name
          << "\"\n" << "namespace " << Name << "Bypass {\n";
 
-      OS << "  unsigned NoBypass = 0;\n";
+      OS << "  const unsigned NoBypass = 0;\n";
       for (unsigned j = 0, BPN = BPs.size(); j < BPN; ++j)
-        OS << "  unsigned " << BPs[j]->getName()
+        OS << "  const unsigned " << BPs[j]->getName()
            << " = 1 << " << j << ";\n";
 
       OS << "}\n";
@@ -337,16 +352,17 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   }
 
   // Begin stages table
-  std::string StageTable = "\nllvm::InstrStage " + Target + "Stages[] = {\n";
+  std::string StageTable = "\nextern const llvm::InstrStage " + Target +
+                           "Stages[] = {\n";
   StageTable += "  { 0, 0, 0, llvm::InstrStage::Required }, // No itinerary\n";
 
   // Begin operand cycle table
-  std::string OperandCycleTable = "unsigned " + Target +
+  std::string OperandCycleTable = "extern const unsigned " + Target +
     "OperandCycles[] = {\n";
   OperandCycleTable += "  0, // No itinerary\n";
 
   // Begin pipeline bypass table
-  std::string BypassTable = "unsigned " + Target +
+  std::string BypassTable = "extern const unsigned " + Target +
     "ForwardingPathes[] = {\n";
   BypassTable += "  0, // No itinerary\n";
 
@@ -488,7 +504,7 @@ EmitProcessorData(raw_ostream &OS,
 
     // Begin processor itinerary table
     OS << "\n";
-    OS << "llvm::InstrItinerary " << Name << "[] = {\n";
+    OS << "static const llvm::InstrItinerary " << Name << "[] = {\n";
 
     // For each itinerary class
     std::vector<InstrItinerary> &ItinList = *ProcListIter++;
@@ -530,7 +546,7 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
   // Begin processor table
   OS << "\n";
   OS << "// Sorted (by key) array of itineraries for CPU subtype.\n"
-     << "llvm::SubtargetInfoKV "
+     << "extern const llvm::SubtargetInfoKV "
      << Target << "ProcItinKV[] = {\n";
 
   // For each processor
@@ -708,9 +724,13 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 
   std::string ClassName = Target + "GenSubtargetInfo";
   OS << "namespace llvm {\n";
+  OS << "class DFAPacketizer;\n";
   OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n"
      << "  explicit " << ClassName << "(StringRef TT, StringRef CPU, "
      << "StringRef FS);\n"
+     << "public:\n"
+     << "  DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)"
+     << " const;\n"
      << "};\n";
   OS << "} // End llvm namespace \n";
 
@@ -720,13 +740,13 @@ void SubtargetEmitter::run(raw_ostream &OS) {
   OS << "#undef GET_SUBTARGETINFO_CTOR\n";
 
   OS << "namespace llvm {\n";
-  OS << "extern llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n";
-  OS << "extern llvm::SubtargetFeatureKV " << Target << "SubTypeKV[];\n";
+  OS << "extern const llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n";
+  OS << "extern const llvm::SubtargetFeatureKV " << Target << "SubTypeKV[];\n";
   if (HasItineraries) {
-    OS << "extern llvm::SubtargetInfoKV " << Target << "ProcItinKV[];\n";
-    OS << "extern llvm::InstrStage " << Target << "Stages[];\n";
-    OS << "extern unsigned " << Target << "OperandCycles[];\n";
-    OS << "extern unsigned " << Target << "ForwardingPathes[];\n";
+    OS << "extern const llvm::SubtargetInfoKV " << Target << "ProcItinKV[];\n";
+    OS << "extern const llvm::InstrStage " << Target << "Stages[];\n";
+    OS << "extern const unsigned " << Target << "OperandCycles[];\n";
+    OS << "extern const unsigned " << Target << "ForwardingPathes[];\n";
   }
 
   OS << ClassName << "::" << ClassName << "(StringRef TT, StringRef CPU, "
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index eacfdf6fed39..8c41358e3d85 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -16,6 +16,7 @@
 #include "CallingConvEmitter.h"
 #include "CodeEmitterGen.h"
 #include "DAGISelEmitter.h"
+#include "DFAPacketizerEmitter.h"
 #include "DisassemblerEmitter.h"
 #include "EDEmitter.h"
 #include "FastISelEmitter.h"
@@ -23,7 +24,6 @@
 #include "IntrinsicEmitter.h"
 #include "PseudoLoweringEmitter.h"
 #include "RegisterInfoEmitter.h"
-#include "ARMDecoderEmitter.h"
 #include "SubtargetEmitter.h"
 #include "SetTheory.h"
 
@@ -44,11 +44,11 @@ enum ActionType {
   GenInstrInfo,
   GenAsmWriter,
   GenAsmMatcher,
-  GenARMDecoder,
   GenDisassembler,
   GenPseudoLowering,
   GenCallingConv,
   GenDAGISel,
+  GenDFAPacketizer,
   GenFastISel,
   GenSubtarget,
   GenIntrinsic,
@@ -73,8 +73,6 @@ namespace {
                                "Generate calling convention descriptions"),
                     clEnumValN(GenAsmWriter, "gen-asm-writer",
                                "Generate assembly writer"),
-                    clEnumValN(GenARMDecoder, "gen-arm-decoder",
-                               "Generate decoders for ARM/Thumb"),
                     clEnumValN(GenDisassembler, "gen-disassembler",
                                "Generate disassembler"),
                     clEnumValN(GenPseudoLowering, "gen-pseudo-lowering",
@@ -83,6 +81,8 @@ namespace {
                                "Generate assembly instruction matcher"),
                     clEnumValN(GenDAGISel, "gen-dag-isel",
                                "Generate a DAG instruction selector"),
+                    clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer",
+                               "Generate DFA Packetizer for VLIW targets"),
                     clEnumValN(GenFastISel, "gen-fast-isel",
                                "Generate a \"fast\" instruction selector"),
                     clEnumValN(GenSubtarget, "gen-subtarget",
@@ -101,92 +101,89 @@ namespace {
 
   cl::opt<std::string>
   Class("class", cl::desc("Print Enum list for this class"),
-        cl::value_desc("class name"));
-}
-
-class LLVMTableGenAction : public TableGenAction {
-public:
-  bool operator()(raw_ostream &OS, RecordKeeper &Records) {
-    switch (Action) {
-    case PrintRecords:
-      OS << Records;           // No argument, dump all contents
-      break;
-    case GenEmitter:
-      CodeEmitterGen(Records).run(OS);
-      break;
-    case GenRegisterInfo:
-      RegisterInfoEmitter(Records).run(OS);
-      break;
-    case GenInstrInfo:
-      InstrInfoEmitter(Records).run(OS);
-      break;
-    case GenCallingConv:
-      CallingConvEmitter(Records).run(OS);
-      break;
-    case GenAsmWriter:
-      AsmWriterEmitter(Records).run(OS);
-      break;
-    case GenARMDecoder:
-      ARMDecoderEmitter(Records).run(OS);
-      break;
-    case GenAsmMatcher:
-      AsmMatcherEmitter(Records).run(OS);
-      break;
-    case GenDisassembler:
-      DisassemblerEmitter(Records).run(OS);
-      break;
-    case GenPseudoLowering:
-      PseudoLoweringEmitter(Records).run(OS);
-      break;
-    case GenDAGISel:
-      DAGISelEmitter(Records).run(OS);
-      break;
-    case GenFastISel:
-      FastISelEmitter(Records).run(OS);
-      break;
-    case GenSubtarget:
-      SubtargetEmitter(Records).run(OS);
-      break;
-    case GenIntrinsic:
-      IntrinsicEmitter(Records).run(OS);
-      break;
-    case GenTgtIntrinsic:
-      IntrinsicEmitter(Records, true).run(OS);
-      break;
-    case GenEDInfo:
-      EDEmitter(Records).run(OS);
-      break;
-    case PrintEnums:
-    {
-      std::vector<Record*> Recs = Records.getAllDerivedDefinitions(Class);
-      for (unsigned i = 0, e = Recs.size(); i != e; ++i)
-        OS << Recs[i]->getName() << ", ";
-      OS << "\n";
-      break;
-    }
-    case PrintSets:
-    {
-      SetTheory Sets;
-      Sets.addFieldExpander("Set", "Elements");
-      std::vector<Record*> Recs = Records.getAllDerivedDefinitions("Set");
-      for (unsigned i = 0, e = Recs.size(); i != e; ++i) {
-        OS << Recs[i]->getName() << " = [";
-        const std::vector<Record*> *Elts = Sets.expand(Recs[i]);
-        assert(Elts && "Couldn't expand Set instance");
-        for (unsigned ei = 0, ee = Elts->size(); ei != ee; ++ei)
-          OS << ' ' << (*Elts)[ei]->getName();
-        OS << " ]\n";
+          cl::value_desc("class name"));
+  
+  class LLVMTableGenAction : public TableGenAction {
+  public:
+    bool operator()(raw_ostream &OS, RecordKeeper &Records) {
+      switch (Action) {
+      case PrintRecords:
+        OS << Records;           // No argument, dump all contents
+        break;
+      case GenEmitter:
+        CodeEmitterGen(Records).run(OS);
+        break;
+      case GenRegisterInfo:
+        RegisterInfoEmitter(Records).run(OS);
+        break;
+      case GenInstrInfo:
+        InstrInfoEmitter(Records).run(OS);
+        break;
+      case GenCallingConv:
+        CallingConvEmitter(Records).run(OS);
+        break;
+      case GenAsmWriter:
+        AsmWriterEmitter(Records).run(OS);
+        break;
+      case GenAsmMatcher:
+        AsmMatcherEmitter(Records).run(OS);
+        break;
+      case GenDisassembler:
+        DisassemblerEmitter(Records).run(OS);
+        break;
+      case GenPseudoLowering:
+        PseudoLoweringEmitter(Records).run(OS);
+        break;
+      case GenDAGISel:
+        DAGISelEmitter(Records).run(OS);
+        break;
+      case GenDFAPacketizer:
+        DFAGen(Records).run(OS);
+        break;
+      case GenFastISel:
+        FastISelEmitter(Records).run(OS);
+        break;
+      case GenSubtarget:
+        SubtargetEmitter(Records).run(OS);
+        break;
+      case GenIntrinsic:
+        IntrinsicEmitter(Records).run(OS);
+        break;
+      case GenTgtIntrinsic:
+        IntrinsicEmitter(Records, true).run(OS);
+        break;
+      case GenEDInfo:
+        EDEmitter(Records).run(OS);
+        break;
+      case PrintEnums:
+      {
+        std::vector<Record*> Recs = Records.getAllDerivedDefinitions(Class);
+        for (unsigned i = 0, e = Recs.size(); i != e; ++i)
+          OS << Recs[i]->getName() << ", ";
+        OS << "\n";
+        break;
       }
-      break;
-    }
-    default:
-      assert(1 && "Invalid Action");
-      return true;
+      case PrintSets:
+      {
+        SetTheory Sets;
+        Sets.addFieldExpander("Set", "Elements");
+        std::vector<Record*> Recs = Records.getAllDerivedDefinitions("Set");
+        for (unsigned i = 0, e = Recs.size(); i != e; ++i) {
+          OS << Recs[i]->getName() << " = [";
+          const std::vector<Record*> *Elts = Sets.expand(Recs[i]);
+          assert(Elts && "Couldn't expand Set instance");
+          for (unsigned ei = 0, ee = Elts->size(); ei != ee; ++ei)
+            OS << ' ' << (*Elts)[ei]->getName();
+          OS << " ]\n";
+        }
+        break;
+      }
+      }
+  
+      return false;
     }
-
-    return false;
-  }
-};
+  };
+}
 
 int main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal();
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index e8c9a4897321..2875168a1083 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -41,15 +41,20 @@ static inline bool inheritsFrom(InstructionContext child,
   case IC:
     return(inheritsFrom(child, IC_64BIT) ||
            inheritsFrom(child, IC_OPSIZE) ||
+           inheritsFrom(child, IC_ADSIZE) ||
            inheritsFrom(child, IC_XD) ||
            inheritsFrom(child, IC_XS));
   case IC_64BIT:
     return(inheritsFrom(child, IC_64BIT_REXW)   ||
            inheritsFrom(child, IC_64BIT_OPSIZE) ||
+           inheritsFrom(child, IC_64BIT_ADSIZE) ||
            inheritsFrom(child, IC_64BIT_XD)     ||
            inheritsFrom(child, IC_64BIT_XS));
   case IC_OPSIZE:
     return inheritsFrom(child, IC_64BIT_OPSIZE);
+  case IC_ADSIZE:
+  case IC_64BIT_ADSIZE:
+    return false;
   case IC_XD:
     return inheritsFrom(child, IC_64BIT_XD);
   case IC_XS:
@@ -95,11 +100,13 @@ static inline bool inheritsFrom(InstructionContext child,
   case IC_VEX_L:
   case IC_VEX_L_XS:
   case IC_VEX_L_XD:
+    return false;
   case IC_VEX_L_OPSIZE:
+    return inheritsFrom(child, IC_VEX_L_W_OPSIZE);
+  case IC_VEX_L_W_OPSIZE:
     return false;
   default:
     llvm_unreachable("Unknown instruction class");
-    return false;
   }
 }
 
@@ -138,8 +145,6 @@ static inline const char* stringForContext(InstructionContext insnContext) {
   INSTRUCTION_CONTEXTS
 #undef ENUM_ENTRY
   }
-
-  return 0;
 }
 
 /// stringForOperandType - Like stringForContext, but for OperandTypes.
@@ -194,8 +199,7 @@ void DisassemblerTables::emitOneID(raw_ostream &o,
 /// @param i        - The indentation level for that output stream.
 static void emitEmptyTable(raw_ostream &o, uint32_t &i)
 {
-  o.indent(i * 2) << "static const InstrUID modRMEmptyTable[1] = { 0 };\n";
-  o << "\n";
+  o.indent(i * 2) << "0x0, /* EmptyTable */\n";
 }
 
 /// getDecisionType - Determines whether a ModRM decision with 255 entries can
@@ -207,28 +211,40 @@ static ModRMDecisionType getDecisionType(ModRMDecision &decision)
 {
   bool satisfiesOneEntry = true;
   bool satisfiesSplitRM = true;
-  
+  bool satisfiesSplitReg = true;
+
   uint16_t index;
-  
+
   for (index = 0; index < 256; ++index) {
     if (decision.instructionIDs[index] != decision.instructionIDs[0])
       satisfiesOneEntry = false;
-    
+
     if (((index & 0xc0) == 0xc0) &&
        (decision.instructionIDs[index] != decision.instructionIDs[0xc0]))
       satisfiesSplitRM = false;
-    
+
     if (((index & 0xc0) != 0xc0) &&
        (decision.instructionIDs[index] != decision.instructionIDs[0x00]))
       satisfiesSplitRM = false;
+
+    if (((index & 0xc0) == 0xc0) &&
+       (decision.instructionIDs[index] != decision.instructionIDs[index&0xf8]))
+      satisfiesSplitReg = false;
+
+    if (((index & 0xc0) != 0xc0) &&
+       (decision.instructionIDs[index] != decision.instructionIDs[index&0x38]))
+      satisfiesSplitReg = false;
   }
-  
+
   if (satisfiesOneEntry)
     return MODRM_ONEENTRY;
-  
+
   if (satisfiesSplitRM)
     return MODRM_SPLITRM;
-  
+
+  if (satisfiesSplitReg)
+    return MODRM_SPLITREG;
+
   return MODRM_FULL;
 }
 
@@ -291,71 +307,76 @@ void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
                                            ModRMDecision &decision)
   const {
   static uint64_t sTableNumber = 0;
-  uint64_t thisTableNumber = sTableNumber;
+  static uint64_t sEntryNumber = 1;
   ModRMDecisionType dt = getDecisionType(decision);
   uint16_t index;
-  
+
   if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0)
   {
     o2.indent(i2) << "{ /* ModRMDecision */" << "\n";
     i2++;
-    
+
     o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
-    o2.indent(i2) << "modRMEmptyTable";
-    
+    o2.indent(i2) << 0 << " /* EmptyTable */\n";
+
     i2--;
     o2.indent(i2) << "}";
     return;
   }
-    
-  o1.indent(i1) << "static const InstrUID modRMTable" << thisTableNumber;
-    
-  switch (dt) {
-    default:
-      llvm_unreachable("Unknown decision type");
-    case MODRM_ONEENTRY:
-      o1 << "[1]";
-      break;
-    case MODRM_SPLITRM:
-      o1 << "[2]";
-      break;
-    case MODRM_FULL:
-      o1 << "[256]";
-      break;      
-  }
 
-  o1 << " = {" << "\n";
+  o1 << "/* Table" << sTableNumber << " */\n";
   i1++;
-    
+
   switch (dt) {
     default:
       llvm_unreachable("Unknown decision type");
     case MODRM_ONEENTRY:
-      emitOneID(o1, i1, decision.instructionIDs[0], false);
+      emitOneID(o1, i1, decision.instructionIDs[0], true);
       break;
     case MODRM_SPLITRM:
       emitOneID(o1, i1, decision.instructionIDs[0x00], true); // mod = 0b00
-      emitOneID(o1, i1, decision.instructionIDs[0xc0], false); // mod = 0b11
+      emitOneID(o1, i1, decision.instructionIDs[0xc0], true); // mod = 0b11
+      break;
+    case MODRM_SPLITREG:
+      for (index = 0; index < 64; index += 8)
+        emitOneID(o1, i1, decision.instructionIDs[index], true);
+      for (index = 0xc0; index < 256; index += 8)
+        emitOneID(o1, i1, decision.instructionIDs[index], true);
       break;
     case MODRM_FULL:
       for (index = 0; index < 256; ++index)
-        emitOneID(o1, i1, decision.instructionIDs[index], index < 255);
+        emitOneID(o1, i1, decision.instructionIDs[index], true);
       break;
   }
-    
+
   i1--;
-  o1.indent(i1) << "};" << "\n";
-  o1 << "\n";
-    
+
   o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n";
   i2++;
-    
+
   o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
-  o2.indent(i2) << "modRMTable" << sTableNumber << "\n";
-    
+  o2.indent(i2) << sEntryNumber << " /* Table" << sTableNumber << " */\n";
+
   i2--;
   o2.indent(i2) << "}";
-    
+
+  switch (dt) {
+    default:
+      llvm_unreachable("Unknown decision type");
+    case MODRM_ONEENTRY:
+      sEntryNumber += 1;
+      break;
+    case MODRM_SPLITRM:
+      sEntryNumber += 2;
+      break;
+    case MODRM_SPLITREG:
+      sEntryNumber += 16;
+      break;
+    case MODRM_FULL:
+      sEntryNumber += 256;
+      break;
+  }
+
   ++sTableNumber;
 }
 
@@ -436,11 +457,11 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
   for (index = 0; index < numInstructions; ++index) {
     o.indent(i * 2) << "{ /* " << index << " */" << "\n";
     i++;
-    
-    o.indent(i * 2) << 
-      stringForModifierType(InstructionSpecifiers[index].modifierType);
+
+    o.indent(i * 2) << stringForModifierType(
+                       (ModifierType)InstructionSpecifiers[index].modifierType);
     o << "," << "\n";
-    
+
     o.indent(i * 2) << "0x";
     o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase);
     o << "," << "\n";
@@ -450,11 +471,11 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
 
     for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) {
       o.indent(i * 2) << "{ ";
-      o << stringForOperandEncoding(InstructionSpecifiers[index]
-                                    .operands[operandIndex]
-                                    .encoding);
+      o <<stringForOperandEncoding((OperandEncoding)InstructionSpecifiers[index]
+                                   .operands[operandIndex]
+                                   .encoding);
       o << ", ";
-      o << stringForOperandType(InstructionSpecifiers[index]
+      o << stringForOperandType((OperandType)InstructionSpecifiers[index]
                                 .operands[operandIndex]
                                 .type);
       o << " }";
@@ -468,7 +489,7 @@ void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i)
     i--;
     o.indent(i * 2) << "}," << "\n";
     
-    o.indent(i * 2) << "\"" << InstructionSpecifiers[index].name << "\"";
+    o.indent(i * 2) << "/* " << InstructionSpecifiers[index].name << " */";
     o << "\n";
 
     i--;
@@ -494,7 +515,9 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
   for (index = 0; index < 256; ++index) {
     o.indent(i * 2);
 
-    if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE))
+    if ((index & ATTR_VEXL) && (index & ATTR_REXW) && (index & ATTR_OPSIZE))
+      o << "IC_VEX_L_W_OPSIZE";
+    else if ((index & ATTR_VEXL) && (index & ATTR_OPSIZE))
       o << "IC_VEX_L_OPSIZE";
     else if ((index & ATTR_VEXL) && (index & ATTR_XD))
       o << "IC_VEX_L_XD";
@@ -535,6 +558,8 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
       o << "IC_64BIT_XD";
     else if ((index & ATTR_64BIT) && (index & ATTR_OPSIZE))
       o << "IC_64BIT_OPSIZE";
+    else if ((index & ATTR_64BIT) && (index & ATTR_ADSIZE))
+      o << "IC_64BIT_ADSIZE";
     else if ((index & ATTR_64BIT) && (index & ATTR_REXW))
       o << "IC_64BIT_REXW";
     else if ((index & ATTR_64BIT))
@@ -549,6 +574,8 @@ void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
       o << "IC_XD";
     else if (index & ATTR_OPSIZE)
       o << "IC_OPSIZE";
+    else if (index & ATTR_ADSIZE)
+      o << "IC_ADSIZE";
     else
       o << "IC";
 
@@ -594,11 +621,16 @@ void DisassemblerTables::emit(raw_ostream &o) const {
 
   emitContextTable(o, i2);
   o << "\n";
-  
+
+  o << "static const InstrUID modRMTable[] = {\n";
+  i1++;
   emitEmptyTable(o1, i1);
+  i1--;
   emitContextDecisions(o1, o2, i1, i2);
-  
+
   o << o1.str();
+  o << "  0x0\n";
+  o << "};\n";
   o << "\n";
   o << o2.str();
   o << "\n";
diff --git a/utils/TableGen/X86ModRMFilters.cpp b/utils/TableGen/X86ModRMFilters.cpp
new file mode 100644
index 000000000000..7166fe02d890
--- /dev/null
+++ b/utils/TableGen/X86ModRMFilters.cpp
@@ -0,0 +1,26 @@
+//===- X86ModRMFilters.cpp - Disassembler ModR/M filterss -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ModRMFilters.h"
+
+using namespace llvm::X86Disassembler;
+
+void ModRMFilter::anchor() { }
+
+void DumbFilter::anchor() { }
+
+void ModFilter::anchor() { }
+
+void EscapeFilter::anchor() { }
+
+void AddRegEscapeFilter::anchor() { }
+
+void ExtendedFilter::anchor() { }
+
+void ExactFilter::anchor() { }
diff --git a/utils/TableGen/X86ModRMFilters.h b/utils/TableGen/X86ModRMFilters.h
index 199040bad840..19fecbc3a0a5 100644
--- a/utils/TableGen/X86ModRMFilters.h
+++ b/utils/TableGen/X86ModRMFilters.h
@@ -27,6 +27,7 @@ namespace X86Disassembler {
 /// ModRMFilter - Abstract base class for clases that recognize patterns in
 ///   ModR/M bytes.
 class ModRMFilter {
+  virtual void anchor();
 public:
   /// Destructor    - Override as necessary.
   virtual ~ModRMFilter() { }
@@ -49,6 +50,7 @@ public:
 ///   require a ModR/M byte or instructions where the entire ModR/M byte is used
 ///   for operands.
 class DumbFilter : public ModRMFilter {
+  virtual void anchor();
 public:
   bool isDumb() const {
     return true;
@@ -63,7 +65,7 @@ public:
 ///   Some instructions are classified based on whether they are 11 or anything
 ///   else.  This filter performs that classification.
 class ModFilter : public ModRMFilter {
-private:
+  virtual void anchor();
   bool R;
 public:
   /// Constructor
@@ -90,7 +92,7 @@ public:
 ///   possible value.  Otherwise, there is one instruction for each value of the
 ///   nnn field [bits 5-3], known elsewhere as the reg field.
 class EscapeFilter : public ModRMFilter {
-private:
+  virtual void anchor();
   bool C0_FF;
   uint8_t NNN_or_ModRM;
 public:
@@ -121,7 +123,7 @@ public:
 ///   maps to a single instruction.  Such instructions require the ModR/M byte
 ///   to fall between 0xc0 and 0xff.
 class AddRegEscapeFilter : public ModRMFilter {
-private:
+  virtual void anchor();
   uint8_t ModRM;
 public:
   /// Constructor
@@ -142,7 +144,7 @@ public:
 /// ExtendedFilter - Extended opcodes are classified based on the value of the
 ///   mod field [bits 7-6] and the value of the nnn field [bits 5-3]. 
 class ExtendedFilter : public ModRMFilter {
-private:
+  virtual void anchor();
   bool R;
   uint8_t NNN;
 public:
@@ -169,9 +171,8 @@ public:
 
 /// ExactFilter - The occasional extended opcode (such as VMCALL or MONITOR)
 ///   requires the ModR/M byte to have a specific value.
-class ExactFilter : public ModRMFilter
-{
-private:
+class ExactFilter : public ModRMFilter {
+  virtual void anchor();
   uint8_t ModRM;
 public:
   /// Constructor
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index cae823749245..6a01cce637e4 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -36,7 +36,16 @@ using namespace llvm;
   MAP(F8, 41)           \
   MAP(F9, 42)           \
   MAP(D0, 45)           \
-  MAP(D1, 46)
+  MAP(D1, 46)           \
+  MAP(D4, 47)           \
+  MAP(D8, 48)           \
+  MAP(D9, 49)           \
+  MAP(DA, 50)           \
+  MAP(DB, 51)           \
+  MAP(DC, 52)           \
+  MAP(DD, 53)           \
+  MAP(DE, 54)           \
+  MAP(DF, 55)
 
 // A clone of X86 since we can't depend on something that is generated.
 namespace X86Local {
@@ -68,7 +77,7 @@ namespace X86Local {
     DC = 7, DD = 8, DE = 9, DF = 10,
     XD = 11,  XS = 12,
     T8 = 13,  P_TA = 14,
-    A6 = 15,  A7 = 16, TF = 17
+    A6 = 15,  A7 = 16, T8XD = 17, T8XS = 18, TAXD = 19
   };
 }
 
@@ -119,6 +128,9 @@ namespace X86Local {
   EXTENSION_TABLE(ba)             \
   EXTENSION_TABLE(c7)
 
+#define THREE_BYTE_38_EXTENSION_TABLES \
+  EXTENSION_TABLE(F3)
+
 using namespace X86Disassembler;
 
 /// needsModRMForDecode - Indicates whether a particular instruction requires a
@@ -213,10 +225,13 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   SegOvr   = byteFromRec(Rec, "SegOvrBits");
   
   HasOpSizePrefix  = Rec->getValueAsBit("hasOpSizePrefix");
+  HasAdSizePrefix  = Rec->getValueAsBit("hasAdSizePrefix");
   HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix");
   HasVEXPrefix     = Rec->getValueAsBit("hasVEXPrefix");
   HasVEX_4VPrefix  = Rec->getValueAsBit("hasVEX_4VPrefix");
+  HasVEX_4VOp3Prefix = Rec->getValueAsBit("hasVEX_4VOp3Prefix");
   HasVEX_WPrefix   = Rec->getValueAsBit("hasVEX_WPrefix");
+  HasMemOp4Prefix  = Rec->getValueAsBit("hasMemOp4Prefix");
   IgnoresVEX_L     = Rec->getValueAsBit("ignoresVEX_L");
   HasLockPrefix    = Rec->getValueAsBit("hasLockPrefix");
   IsCodeGenOnly    = Rec->getValueAsBit("isCodeGenOnly");
@@ -230,7 +245,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
                      (Name.find("CRC32") != Name.npos);
   HasFROperands    = hasFROperands();
   HasVEX_LPrefix   = has256BitOperands() || Rec->getValueAsBit("hasVEX_L");
-  
+
   // Check for 64-bit inst which does not require REX
   Is32Bit = false;
   Is64Bit = false;
@@ -254,10 +269,6 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
              Rec->getName() == "PUSHFS64" || 
              Rec->getName() == "PUSHGS64" ||
              Rec->getName() == "REX64_PREFIX" ||
-             Rec->getName().find("VMREAD64") != Name.npos ||
-             Rec->getName().find("VMWRITE64") != Name.npos ||
-             Rec->getName().find("INVEPT64") != Name.npos ||
-             Rec->getName().find("INVVPID64") != Name.npos ||
              Rec->getName().find("MOV64") != Name.npos || 
              Rec->getName().find("PUSH64") != Name.npos ||
              Rec->getName().find("POP64") != Name.npos;
@@ -284,67 +295,90 @@ void RecognizableInstr::processInstr(DisassemblerTables &tables,
 InstructionContext RecognizableInstr::insnContext() const {
   InstructionContext insnContext;
 
-  if (HasVEX_4VPrefix || HasVEXPrefix) {
-    if (HasVEX_LPrefix && HasVEX_WPrefix)
-      llvm_unreachable("Don't support VEX.L and VEX.W together");
-    else if (HasOpSizePrefix && HasVEX_LPrefix)
+  if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix|| HasVEXPrefix) {
+    if (HasVEX_LPrefix && HasVEX_WPrefix) {
+      if (HasOpSizePrefix)
+        insnContext = IC_VEX_L_W_OPSIZE;
+      else
+        llvm_unreachable("Don't support VEX.L and VEX.W together");
+    } else if (HasOpSizePrefix && HasVEX_LPrefix)
       insnContext = IC_VEX_L_OPSIZE;
     else if (HasOpSizePrefix && HasVEX_WPrefix)
       insnContext = IC_VEX_W_OPSIZE;
     else if (HasOpSizePrefix)
       insnContext = IC_VEX_OPSIZE;
-    else if (HasVEX_LPrefix && Prefix == X86Local::XS)
+    else if (HasVEX_LPrefix &&
+             (Prefix == X86Local::XS || Prefix == X86Local::T8XS))
       insnContext = IC_VEX_L_XS;
-    else if (HasVEX_LPrefix && Prefix == X86Local::XD)
+    else if (HasVEX_LPrefix && (Prefix == X86Local::XD ||
+                                Prefix == X86Local::T8XD ||
+                                Prefix == X86Local::TAXD))
       insnContext = IC_VEX_L_XD;
-    else if (HasVEX_WPrefix && Prefix == X86Local::XS)
+    else if (HasVEX_WPrefix &&
+             (Prefix == X86Local::XS || Prefix == X86Local::T8XS))
       insnContext = IC_VEX_W_XS;
-    else if (HasVEX_WPrefix && Prefix == X86Local::XD)
+    else if (HasVEX_WPrefix && (Prefix == X86Local::XD ||
+                                Prefix == X86Local::T8XD ||
+                                Prefix == X86Local::TAXD))
       insnContext = IC_VEX_W_XD;
     else if (HasVEX_WPrefix)
       insnContext = IC_VEX_W;
     else if (HasVEX_LPrefix)
       insnContext = IC_VEX_L;
-    else if (Prefix == X86Local::XD)
+    else if (Prefix == X86Local::XD || Prefix == X86Local::T8XD ||
+             Prefix == X86Local::TAXD)
       insnContext = IC_VEX_XD;
-    else if (Prefix == X86Local::XS)
+    else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS)
       insnContext = IC_VEX_XS;
     else
       insnContext = IC_VEX;
   } else if (Is64Bit || HasREX_WPrefix) {
     if (HasREX_WPrefix && HasOpSizePrefix)
       insnContext = IC_64BIT_REXW_OPSIZE;
-    else if (HasOpSizePrefix &&
-             (Prefix == X86Local::XD || Prefix == X86Local::TF))
+    else if (HasOpSizePrefix && (Prefix == X86Local::XD ||
+                                 Prefix == X86Local::T8XD ||
+                                 Prefix == X86Local::TAXD))
       insnContext = IC_64BIT_XD_OPSIZE;
-    else if (HasOpSizePrefix && Prefix == X86Local::XS)
+    else if (HasOpSizePrefix &&
+             (Prefix == X86Local::XS || Prefix == X86Local::T8XS))
       insnContext = IC_64BIT_XS_OPSIZE;
     else if (HasOpSizePrefix)
       insnContext = IC_64BIT_OPSIZE;
-    else if (HasREX_WPrefix && Prefix == X86Local::XS)
-      insnContext = IC_64BIT_REXW_XS;
+    else if (HasAdSizePrefix)
+      insnContext = IC_64BIT_ADSIZE;
     else if (HasREX_WPrefix &&
-             (Prefix == X86Local::XD || Prefix == X86Local::TF))
+             (Prefix == X86Local::XS || Prefix == X86Local::T8XS))
+      insnContext = IC_64BIT_REXW_XS;
+    else if (HasREX_WPrefix && (Prefix == X86Local::XD ||
+                                Prefix == X86Local::T8XD ||
+                                Prefix == X86Local::TAXD))
       insnContext = IC_64BIT_REXW_XD;
-    else if (Prefix == X86Local::XD || Prefix == X86Local::TF)
+    else if (Prefix == X86Local::XD || Prefix == X86Local::T8XD ||
+             Prefix == X86Local::TAXD)
       insnContext = IC_64BIT_XD;
-    else if (Prefix == X86Local::XS)
+    else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS)
       insnContext = IC_64BIT_XS;
     else if (HasREX_WPrefix)
       insnContext = IC_64BIT_REXW;
     else
       insnContext = IC_64BIT;
   } else {
-    if (HasOpSizePrefix &&
-        (Prefix == X86Local::XD || Prefix == X86Local::TF))
+    if (HasOpSizePrefix && (Prefix == X86Local::XD ||
+                            Prefix == X86Local::T8XD ||
+                            Prefix == X86Local::TAXD))
       insnContext = IC_XD_OPSIZE;
-    else if (HasOpSizePrefix && Prefix == X86Local::XS)
+    else if (HasOpSizePrefix &&
+             (Prefix == X86Local::XS || Prefix == X86Local::T8XS))
       insnContext = IC_XS_OPSIZE;
     else if (HasOpSizePrefix)
       insnContext = IC_OPSIZE;
-    else if (Prefix == X86Local::XD || Prefix == X86Local::TF)
+    else if (HasAdSizePrefix)
+      insnContext = IC_ADSIZE;
+    else if (Prefix == X86Local::XD || Prefix == X86Local::T8XD ||
+             Prefix == X86Local::TAXD)
       insnContext = IC_XD;
-    else if (Prefix == X86Local::XS || Prefix == X86Local::REP)
+    else if (Prefix == X86Local::XS || Prefix == X86Local::T8XS ||
+             Prefix == X86Local::REP)
       insnContext = IC_XS;
     else
       insnContext = IC;
@@ -371,19 +405,12 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
     return FILTER_STRONG;
     
     
-  // Filter out artificial instructions
+  // Filter out artificial instructions but leave in the LOCK_PREFIX so it is
+  // printed as a separate "instruction".
     
-  if (Name.find("TAILJMP") != Name.npos    ||
-      Name.find("_Int") != Name.npos       ||
-      Name.find("_int") != Name.npos       ||
+  if (Name.find("_Int") != Name.npos       ||
       Name.find("Int_") != Name.npos       ||
       Name.find("_NOREX") != Name.npos     ||
-      Name.find("_TC") != Name.npos        ||
-      Name.find("EH_RETURN") != Name.npos  ||
-      Name.find("V_SET") != Name.npos      ||
-      Name.find("LOCK_") != Name.npos      ||
-      Name.find("WIN") != Name.npos        ||
-      Name.find("_AVX") != Name.npos       ||
       Name.find("2SDL") != Name.npos)
     return FILTER_STRONG;
 
@@ -421,12 +448,6 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
       Name.find("Xrr") != Name.npos ||
       Name.find("rr64") != Name.npos)
     return FILTER_WEAK;
-    
-  if (Name == "VMASKMOVDQU64"  ||
-      Name == "VEXTRACTPSrr64" ||
-      Name == "VMOVQd64rr"     ||
-      Name == "VMOVQs64rr")
-    return FILTER_WEAK;
 
   // Special cases.
 
@@ -441,29 +462,15 @@ RecognizableInstr::filter_ret RecognizableInstr::filter() const {
     return FILTER_WEAK;
   if (Name.find("Fs") != Name.npos)
     return FILTER_WEAK;
-  if (Name == "MOVLPDrr"          ||
-      Name == "MOVLPSrr"          ||
-      Name == "PUSHFQ"            ||
-      Name == "BSF16rr"           ||
-      Name == "BSF16rm"           ||
-      Name == "BSR16rr"           ||
-      Name == "BSR16rm"           ||
-      Name == "MOVSX16rm8"        ||
-      Name == "MOVSX16rr8"        ||
-      Name == "MOVZX16rm8"        ||
-      Name == "MOVZX16rr8"        ||
-      Name == "PUSH32i16"         ||
-      Name == "PUSH64i16"         ||
+  if (Name == "PUSH64i16"         ||
       Name == "MOVPQI2QImr"       ||
       Name == "VMOVPQI2QImr"      ||
-      Name == "MOVSDmr"           ||
-      Name == "MOVSDrm"           ||
-      Name == "MOVSSmr"           ||
-      Name == "MOVSSrm"           ||
       Name == "MMX_MOVD64rrv164"  ||
-      Name == "CRC32m16"          ||
       Name == "MOV64ri64i32"      ||
-      Name == "CRC32r16")
+      Name == "VMASKMOVDQU64"     ||
+      Name == "VEXTRACTPSrr64"    ||
+      Name == "VMOVQd64rr"        ||
+      Name == "VMOVQs64rr")
     return FILTER_WEAK;
 
   if (HasFROperands && Name.find("MOV") != Name.npos &&
@@ -566,7 +573,7 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
   
   bool hasFROperands = false;
   
-  assert(numOperands < X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough");
+  assert(numOperands <= X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough");
   
   for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
     if (OperandList[operandIndex].Constraints.size()) {
@@ -684,31 +691,40 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
     // - In AVX, there is a register operand in the VEX.vvvv field here -
     // Operand 3 (optional) is an immediate.
 
-    if (HasVEX_4VPrefix)
-      assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 4 &&
+    if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix)
+      assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 5 &&
              "Unexpected number of operands for MRMSrcRegFrm with VEX_4V"); 
     else
       assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
              "Unexpected number of operands for MRMSrcRegFrm");
   
     HANDLE_OPERAND(roRegister)
-       
+
     if (HasVEX_4VPrefix)
       // FIXME: In AVX, the register below becomes the one encoded
       // in ModRMVEX and the one above the one in the VEX.VVVV field
       HANDLE_OPERAND(vvvvRegister)
-          
+
+    if (HasMemOp4Prefix)
+      HANDLE_OPERAND(immediate)
+
     HANDLE_OPERAND(rmRegister)
-    HANDLE_OPTIONAL(immediate)
+
+    if (HasVEX_4VOp3Prefix)
+      HANDLE_OPERAND(vvvvRegister)
+
+    if (!HasMemOp4Prefix)
+      HANDLE_OPTIONAL(immediate)
+    HANDLE_OPTIONAL(immediate) // above might be a register in 7:4
     break;
   case X86Local::MRMSrcMem:
     // Operand 1 is a register operand in the Reg/Opcode field.
     // Operand 2 is a memory operand (possibly SIB-extended)
     // - In AVX, there is a register operand in the VEX.vvvv field here -
     // Operand 3 (optional) is an immediate.
-    
-    if (HasVEX_4VPrefix)
-      assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 4 &&
+
+    if (HasVEX_4VPrefix || HasVEX_4VOp3Prefix)
+      assert(numPhysicalOperands >= 3 && numPhysicalOperands <= 5 &&
              "Unexpected number of operands for MRMSrcMemFrm with VEX_4V"); 
     else
       assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
@@ -721,8 +737,17 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
       // in ModRMVEX and the one above the one in the VEX.VVVV field
       HANDLE_OPERAND(vvvvRegister)
 
+    if (HasMemOp4Prefix)
+      HANDLE_OPERAND(immediate)
+
     HANDLE_OPERAND(memory)
-    HANDLE_OPTIONAL(immediate)
+
+    if (HasVEX_4VOp3Prefix)
+      HANDLE_OPERAND(vvvvRegister)
+
+    if (!HasMemOp4Prefix)
+      HANDLE_OPTIONAL(immediate)
+    HANDLE_OPTIONAL(immediate) // above might be a register in 7:4
     break;
   case X86Local::MRM0r:
   case X86Local::MRM1r:
@@ -736,12 +761,12 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
     // Operand 2 (optional) is an immediate or relocation.
     if (HasVEX_4VPrefix)
       assert(numPhysicalOperands <= 3 &&
-             "Unexpected number of operands for MRMSrcMemFrm with VEX_4V");
+             "Unexpected number of operands for MRMnRFrm with VEX_4V");
     else
       assert(numPhysicalOperands <= 2 &&
              "Unexpected number of operands for MRMnRFrm");
     if (HasVEX_4VPrefix)
-      HANDLE_OPERAND(vvvvRegister);
+      HANDLE_OPERAND(vvvvRegister)
     HANDLE_OPTIONAL(rmRegister)
     HANDLE_OPTIONAL(relocation)
     break;
@@ -755,8 +780,14 @@ void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
   case X86Local::MRM7m:
     // Operand 1 is a memory operand (possibly SIB-extended)
     // Operand 2 (optional) is an immediate or relocation.
-    assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 &&
-           "Unexpected number of operands for MRMnMFrm");
+    if (HasVEX_4VPrefix)
+      assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+             "Unexpected number of operands for MRMnMFrm");
+    else
+      assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 &&
+             "Unexpected number of operands for MRMnMFrm");
+    if (HasVEX_4VPrefix)
+      HANDLE_OPERAND(vvvvRegister)
     HANDLE_OPERAND(memory)
     HANDLE_OPTIONAL(relocation)
     break;
@@ -843,15 +874,50 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
     opcodeToSet = Opcode;
     break;
   case X86Local::T8:
-  case X86Local::TF:
+  case X86Local::T8XD:
+  case X86Local::T8XS:
     opcodeType = THREEBYTE_38;
-    if (needsModRMForDecode(Form))
-      filter = new ModFilter(isRegFormat(Form));
-    else
-      filter = new DumbFilter();
+    switch (Opcode) {
+    default:
+      if (needsModRMForDecode(Form))
+        filter = new ModFilter(isRegFormat(Form));
+      else
+        filter = new DumbFilter();
+      break;
+#define EXTENSION_TABLE(n) case 0x##n:
+    THREE_BYTE_38_EXTENSION_TABLES
+#undef EXTENSION_TABLE
+      switch (Form) {
+      default:
+        llvm_unreachable("Unhandled two-byte extended opcode");
+      case X86Local::MRM0r:
+      case X86Local::MRM1r:
+      case X86Local::MRM2r:
+      case X86Local::MRM3r:
+      case X86Local::MRM4r:
+      case X86Local::MRM5r:
+      case X86Local::MRM6r:
+      case X86Local::MRM7r:
+        filter = new ExtendedFilter(true, Form - X86Local::MRM0r);
+        break;
+      case X86Local::MRM0m:
+      case X86Local::MRM1m:
+      case X86Local::MRM2m:
+      case X86Local::MRM3m:
+      case X86Local::MRM4m:
+      case X86Local::MRM5m:
+      case X86Local::MRM6m:
+      case X86Local::MRM7m:
+        filter = new ExtendedFilter(false, Form - X86Local::MRM0m);
+        break;
+      MRM_MAPPING
+      } // switch (Form)
+      break;
+    } // switch (Opcode)
     opcodeToSet = Opcode;
     break;
   case X86Local::P_TA:
+  case X86Local::TAXD:
     opcodeType = THREEBYTE_3A;
     if (needsModRMForDecode(Form))
       filter = new ModFilter(isRegFormat(Form));
@@ -1049,6 +1115,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("i16imm_pcrel",        TYPE_REL16)
   TYPE("i32imm_pcrel",        TYPE_REL32)
   TYPE("SSECC",               TYPE_IMM3)
+  TYPE("AVXCC",               TYPE_IMM5)
   TYPE("brtarget",            TYPE_RELv)
   TYPE("uncondbrtarget",      TYPE_RELv)
   TYPE("brtarget8",           TYPE_REL8)
@@ -1090,6 +1157,7 @@ OperandEncoding RecognizableInstr::immediateEncodingFromString
   ENCODING("i32i8imm",        ENCODING_IB)
   ENCODING("u32u8imm",        ENCODING_IB)
   ENCODING("SSECC",           ENCODING_IB)
+  ENCODING("AVXCC",           ENCODING_IB)
   ENCODING("i16imm",          ENCODING_Iv)
   ENCODING("i16i8imm",        ENCODING_IB)
   ENCODING("i32imm",          ENCODING_Iv)
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index 44415978273f..6c0a234b5eff 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -50,17 +50,23 @@ private:
   uint8_t SegOvr;
   /// The hasOpSizePrefix field from the record
   bool HasOpSizePrefix;
+  /// The hasAdSizePrefix field from the record
+  bool HasAdSizePrefix;
   /// The hasREX_WPrefix field from the record
   bool HasREX_WPrefix;
   /// The hasVEXPrefix field from the record
   bool HasVEXPrefix;
   /// The hasVEX_4VPrefix field from the record
   bool HasVEX_4VPrefix;
+  /// The hasVEX_4VOp3Prefix field from the record
+  bool HasVEX_4VOp3Prefix;
   /// The hasVEX_WPrefix field from the record
   bool HasVEX_WPrefix;
   /// Inferred from the operands; indicates whether the L bit in the VEX prefix is set
   bool HasVEX_LPrefix;
-  // The ignoreVEX_L field from the record
+  /// The hasMemOp4Prefix field from the record
+  bool HasMemOp4Prefix;
+  /// The ignoreVEX_L field from the record
   bool IgnoresVEX_L;
   /// The hasLockPrefix field from the record
   bool HasLockPrefix;
@@ -70,7 +76,7 @@ private:
   bool Is64Bit;
   // Whether the instruction has the predicate "In32BitMode"
   bool Is32Bit;
-  
+
   /// The instruction name as listed in the tables
   std::string Name;
   /// The AT&T AsmString for the instruction
diff --git a/utils/buildit/GNUmakefile b/utils/buildit/GNUmakefile
index 470ee76b60fe..fc5578a68464 100644
--- a/utils/buildit/GNUmakefile
+++ b/utils/buildit/GNUmakefile
@@ -32,7 +32,7 @@ DSTROOT = $(OBJROOT)/../dst
 
 #######################################################################
 
-PREFIX = /Developer/usr/local
+PREFIX = /usr/local
 
 # Unless assertions are forced on in the GMAKE command line, disable them.
 ifndef ENABLE_ASSERTIONS
@@ -46,9 +46,6 @@ else
 LLVM_OPTIMIZED := yes
 endif
 
-# Default to not install libLTO.dylib.
-INSTALL_LIBLTO := no
-
 # Default to do a native build, not a cross-build for an ARM host or simulator.
 ARM_HOSTED_BUILD := no
 IOS_SIM_BUILD := no
@@ -66,7 +63,7 @@ install: $(OBJROOT) $(SYMROOT) $(DSTROOT)
 	cd $(OBJROOT) && \
 	  $(SRC)/utils/buildit/build_llvm "$(RC_ARCHS)" "$(TARGETS)" \
 	    $(SRC) $(PREFIX) $(DSTROOT) $(SYMROOT) \
-	    $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) \
+	    $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) \
 	    $(ARM_HOSTED_BUILD) $(IOS_SIM_BUILD) \
 	    $(RC_ProjectSourceVersion) $(RC_ProjectSourceSubversion) 
 
@@ -82,7 +79,7 @@ EmbeddedSim:
 
 Embedded:
 	ARM_PLATFORM=`xcodebuild -version -sdk iphoneos PlatformPath` && \
-	$(MAKE) DSTROOT=$(DSTROOT)$$ARM_PLATFORM install
+	$(MAKE) DSTROOT=$(DSTROOT)$$ARM_PLATFORM/Developer install
 
 # installhdrs does nothing, because the headers aren't useful until
 # the compiler is installed.
diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm
index 0ffbc190d393..88a26d309552 100755
--- a/utils/buildit/build_llvm
+++ b/utils/buildit/build_llvm
@@ -42,21 +42,17 @@ LLVM_ASSERTIONS="$7"
 # build.
 LLVM_OPTIMIZED="$8"
 
-# The ninth parameter is a yes/no that indicates whether libLTO.dylib
-# should be installed.
-INSTALL_LIBLTO="$9"
-
 # A yes/no parameter that controls whether to cross-build for an ARM host.
-ARM_HOSTED_BUILD="${10}"
+ARM_HOSTED_BUILD="$9"
 
 # A yes/no parameter that controls whether to cross-build for the iOS simulator
-IOS_SIM_BUILD="${11}"
+IOS_SIM_BUILD="${10}"
 
 # The version number of the submission, e.g. 1007.
-LLVM_SUBMIT_VERSION="${12}"
+LLVM_SUBMIT_VERSION="${11}"
 
 # The subversion number of the submission, e.g. 03.
-LLVM_SUBMIT_SUBVERSION="${13}"
+LLVM_SUBMIT_SUBVERSION="${12}"
 
 # The current working directory is where the build will happen. It may already
 # contain a partial result of an interrupted build, in which case this script
@@ -117,7 +113,15 @@ elif [ "$IOS_SIM_BUILD" = yes ]; then
   configure_opts="--enable-targets=x86 --host=i686-apple-darwin_sim \
                   --build=i686-apple-darwin10"
 else
-  configure_opts="--enable-targets=arm,x86,cbe"
+  configure_opts="--enable-targets=arm,x86"
+fi
+
+if [ "$ARM_HOSTED_BUILD" != yes ]; then
+  if [ $SDKROOT ]; then
+    CPPFLAGS="$CPPFLAGS -isysroot $SDKROOT"
+  fi
+  for host in $HOSTS; do :; done
+  CPPFLAGS="$CPPFLAGS -arch $host"
 fi
 
 if [ \! -f Makefile.config ]; then
@@ -125,6 +129,7 @@ if [ \! -f Makefile.config ]; then
     --enable-assertions=$LLVM_ASSERTIONS \
     --enable-optimized=$LLVM_OPTIMIZED \
     --disable-bindings \
+    CPPFLAGS="$CPPFLAGS" \
     || exit 1
 fi
 
@@ -156,6 +161,7 @@ make $JOBS_FLAG $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$HOSTS" \
     UNIVERSAL_SDK_PATH=$SDKROOT \
     NO_RUNTIME_LIBS=1 \
     DISABLE_EDIS=1 \
+    REQUIRES_RTTI=1 \
     DEBUG_SYMBOLS=1 \
     LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
     LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
@@ -218,9 +224,6 @@ if [ "x$LLVM_DEBUG" != "x1" ]; then
     done
 fi
 
-# Copy over the tblgen utility.
-cp `find $DIR -name tblgen` $DEST_DIR$DEST_ROOT/bin
-
 # Remove .dir files 
 cd $DEST_DIR$DEST_ROOT
 rm -f bin/.dir etc/llvm/.dir lib/.dir
@@ -289,34 +292,11 @@ find obj-* -name \*.\[chy\] -o -name \*.cpp -print \
     | cpio -pdml $SYM_DIR/src || exit 1
 
 ################################################################################
-# Install and strip libLTO.dylib
+# Remove libLTO.dylib and lto.h.  Those are installed by clang.
 
 cd $DEST_DIR$DEST_ROOT
-if [ "$INSTALL_LIBLTO" = "yes" ]; then
-  DT_HOME="$DEST_DIR/Developer/usr"
-  mkdir -p $DT_HOME/lib
-  mv lib/libLTO.dylib $DT_HOME/lib/libLTO.dylib
-
-  # Save a copy of the unstripped dylib
-  mkdir -p $SYM_DIR/Developer/usr/lib
-  cp $DT_HOME/lib/libLTO.dylib $SYM_DIR/Developer/usr/lib/libLTO.dylib
-
-  # Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
-  # PPC objects!
-  $STRIP -arch all -Sl $DT_HOME/lib/libLTO.dylib
-
-  if [ "x$DISABLE_USR_LINKS" == "x" ]; then
-    # Add a symlink in /usr/lib for B&I.
-    mkdir -p $DEST_DIR/usr/lib/
-    (cd $DEST_DIR/usr/lib && \
-      ln -s ../../Developer/usr/lib/libLTO.dylib ./libLTO.dylib)
-  fi
-else
-  rm -f lib/libLTO.dylib
-fi
+rm -f lib/libLTO.dylib
 rm -f lib/libLTO.a lib/libLTO.la
-
-# Omit lto.h from the result.  Clang will supply.
 find $DEST_DIR$DEST_ROOT -name lto.h -delete
 
 ################################################################################
diff --git a/utils/cgiplotNLT.pl b/utils/cgiplotNLT.pl
deleted file mode 100755
index 0360e4120d8c..000000000000
--- a/utils/cgiplotNLT.pl
+++ /dev/null
@@ -1,68 +0,0 @@
-#!/usr/bin/perl
-#takes a test and a program from a dp and produces a gnuplot script
-#use like perl plotNLT.pl password Programs/MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 llc
-
-use CGI;
-use DBI;
-my $q = new CGI;
-
-# database information
-$db="llvmalpha";
-$host="localhost";
-$userid="llvmdbuser";
-$passwd=$q->param('pwd');
-$connectionInfo="dbi:mysql:$db;$host";
-
-# make connection to database
-$dbh = DBI->connect($connectionInfo,$userid,$passwd) or die DBI->errstr;
-
-
-$count = 0;
-while ($q->param('n' . $count))
-  {
-    $count++;
-  }
-
-$| = 1;
-print "Content-type: image/png", "\n\n";
-
-open CMDSTREAM, "|gnuplot";
-#open CMDSTREAM, "|echo";
-
-print CMDSTREAM "set terminal png\n";
-print CMDSTREAM "set output\n";
-print CMDSTREAM "set xdata time\n";
-print CMDSTREAM 'set timefmt "%Y-%m-%d"';
-print CMDSTREAM "\nplot";
-for ($iter = 0; $iter < $count; $iter++) {
-  if ($iter)
-    { print CMDSTREAM ","; }
-  print CMDSTREAM " '-' using 1:2 title \"" . $q->param('t' . $iter) . "," . $q->param('n' . $iter) . "\"with lines";
-}
-
-print CMDSTREAM "\n";
-
-for ($iter = 0; $iter < $count; $iter++) {
-
-  $prog = $q->param('n' . $iter);
-  $test = $q->param('t' . $iter);
-
-  $query = "Select RUN, VALUE from Tests where TEST = '$test' AND NAME = '$prog' ORDER BY RUN";
-  #print "\n$query\n";
-  
-  my $sth = $dbh->prepare( $query) || die "Can't prepare statement: $DBI::errstr";;
-  
-  my $rc = $sth->execute or die DBI->errstr;
-  
-  while(($da,$v) = $sth->fetchrow_array)
-    {
-      print CMDSTREAM "$da $v\n";
-    }
-  
-  print CMDSTREAM "e\n";
-}
-print CMDSTREAM "exit\n";
-close CMDSTREAM;
-
-# disconnect from database
-$dbh->disconnect;
diff --git a/utils/clang-parse-diagnostics-file b/utils/clang-parse-diagnostics-file
new file mode 100755
index 000000000000..b8ea8eae310f
--- /dev/null
+++ b/utils/clang-parse-diagnostics-file
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+import plistlib
+
+def main():
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("""\
+Usage: %prog [options] <path>
+
+Utility for dumping Clang-style logged diagnostics.\
+""")
+    parser.add_option("-a", "--all", action="store_true", dest="all", 
+                      default=False, help="dump all messages.")
+    parser.add_option("-e", "--error", action="store_true", dest="error", 
+                      default=False, help="dump 'error' messages.")
+    parser.add_option("-f", "--fatal", action="store_true", dest="fatal", 
+                      default=False, help="dump 'fatal error' messages.")
+    parser.add_option("-i", "--ignored", action="store_true", dest="ignored", 
+                      default=False, help="dump 'ignored' messages.")
+    parser.add_option("-n", "--note", action="store_true", dest="note", 
+                      default=False, help="dump 'note' messages.")
+    parser.add_option("-w", "--warning", action="store_true", dest="warning", 
+                      default=False, help="dump 'warning' messages.")
+    (opts, args) = parser.parse_args()
+
+    if len(args) != 1:
+        parser.error("invalid number of arguments")
+
+    levels = {'error': False, 'fatal error': False, 'ignored': False,
+              'note': False, 'warning': False}
+    if opts.error:
+        levels['error'] = True
+    if opts.fatal:
+        levels['fatal error'] = True
+    if opts.ignored:
+        levels['ignored'] = True
+    if opts.note:
+        levels['note'] = True
+    if opts.warning:
+        levels['warning'] = True
+
+    path, = args
+
+    # Read the diagnostics log.
+    f = open(path)
+    try:
+        data = f.read()
+    finally:
+        f.close()
+
+    # Complete the plist (the log itself is just the chunks).
+    data = """\
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" \
+                       "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<array>
+%s
+</array>
+</plist>""" % data
+
+    # Load the diagnostics.
+    diags = plistlib.readPlistFromString(data)
+
+    # Print out the diagnostics.
+    print
+    print "**** BUILD DIAGNOSTICS ****"
+    for i, file_diags in enumerate(diags):
+        file = file_diags.get('main-file')
+        print "*** %s ***" % file
+        for d in file_diags.get('diagnostics', ()):
+            if levels[d.get('level')] or opts.all:
+                print " %s:%s:%s: %s: %s" % (
+                    d.get('filename'), d.get('line'), d.get('column'),
+                    d.get('level'), d.get('message'))
+
+if __name__ == "__main__":
+    main()
diff --git a/utils/emacs/tablegen-mode.el b/utils/emacs/tablegen-mode.el
index 3853ce66a285..e83a34ca1816 100644
--- a/utils/emacs/tablegen-mode.el
+++ b/utils/emacs/tablegen-mode.el
@@ -13,7 +13,7 @@
 
 (defvar tablegen-font-lock-keywords
   (let ((kw (regexp-opt '("class" "defm" "def" "field" "include" "in"
-                         "let" "multiclass")
+                         "let" "multiclass" "foreach")
                         'words))
         (type-kw (regexp-opt '("bit" "bits" "code" "dag" "int" "list" "string")
                              'words))
diff --git a/utils/importNLT.pl b/utils/importNLT.pl
deleted file mode 100644
index c1b950dc34d8..000000000000
--- a/utils/importNLT.pl
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/perl
-#take the output of parseNLT.pl and load it into a database
-# use like: cat file |perl parseNLT.pl |perl importNLT.pl password
-
-use DBI;
-
-# database information
-$db="llvmalpha";
-$host="localhost";
-$userid="llvmdbuser";
-$passwd=shift @ARGV;
-$connectionInfo="dbi:mysql:$db;$host";
-
-# make connection to database
-$dbh = DBI->connect($connectionInfo,$userid,$passwd) or die DBI->errstr;
-my $sth = $dbh->prepare( q{
-      INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES (?, STR_TO_DATE(?, '\%d \%M \%Y'), ?, ?)
-  }) || die "Can't prepare statement: $DBI::errstr";;
-
-while($d = <>)
-{
-  chomp $d;
-  if (18 == scalar split " ", $d)
-    {
-      ($day, $mon, $year, $prog, $gccas, $bc, $llccompile, $llcbetacompile, $jitcompile,
-       $mc, $gcc, $cbe, $llc, $llcbeta, $jit, $foo1, $foo2, $foo3) = split " ", $d;
-      if ($gccas =~ /\d+/)
-        {
-          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
-                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'gccas', $gccas)") || die DBI->errstr;
-        }
-      if ($bc =~ /\d/)
-        {
-          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
-                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'bytecode', $bc)") || die DBI->errstr;
-        }
-      if ($llccompile =~ /\d/)
-        {
-          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
-                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'llc-compile', $llccompile)") || die DBI->errstr;
-        }
-      if ($llcbetacompile =~ /\d/)
-        {
-          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
-                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'llc-beta-compile', $llcbetacompile)") || die DBI->errstr;
-        }
-      if ($jitcompile =~ /\d/)
-        {
-          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
-                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'jit-compile', $jitcompile)") || die DBI->errstr;
-        }
-      if ($mc =~ /\d/)
-        {
-          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
-                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'machine-code', $mc)") || die DBI->errstr;
-        }
-      if ($gcc =~ /\d/)
-        {
-          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
-                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'gcc', $gcc)") || die DBI->errstr;
-        }
-      if ($llc =~ /\d/)
-        {
-          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
-                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'llc', $llc)") || die DBI->errstr;
-        }
-      if ($llcbeta =~ /\d/)
-        {
-          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
-                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'llc-beta', $llcbeta)") || die DBI->errstr;
-        }
-      if ($jit =~ /\d/)
-        {
-          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
-                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'jit', $jit)") || die DBI->errstr;
-        }
-      print ".";
-    }
-  else
-    {
-      print "\nNO: $d\n";
-    }
-}
-print "\n";
-# disconnect from database
-$dbh->disconnect;
diff --git a/utils/json-bench/CMakeLists.txt b/utils/json-bench/CMakeLists.txt
new file mode 100644
index 000000000000..03ac51ce64a9
--- /dev/null
+++ b/utils/json-bench/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_utility(json-bench
+  JSONBench.cpp
+  )
+
+target_link_libraries(json-bench LLVMSupport)
diff --git a/utils/json-bench/JSONBench.cpp b/utils/json-bench/JSONBench.cpp
new file mode 100644
index 000000000000..ca8a36a03ab0
--- /dev/null
+++ b/utils/json-bench/JSONBench.cpp
@@ -0,0 +1,85 @@
+//===- JSONBench - Benchmark the JSONParser implementation ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program executes the JSONParser on differntly sized JSON texts and
+// outputs the run time.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/JSONParser.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+
+static llvm::cl::opt<bool>
+Verify("verify", llvm::cl::desc(
+         "Run a quick verification useful for regression testing"),
+       llvm::cl::init(false));
+
+static llvm::cl::opt<unsigned>
+MemoryLimitMB("memory-limit", llvm::cl::desc(
+                "Do not use more megabytes of memory"),
+	          llvm::cl::init(1000));
+
+void benchmark(llvm::TimerGroup &Group, llvm::StringRef Name,
+               llvm::StringRef JSONText) {
+  llvm::Timer BaseLine((Name + ": Loop").str(), Group);
+  BaseLine.startTimer();
+  char C = 0;
+  for (llvm::StringRef::iterator I = JSONText.begin(),
+                                 E = JSONText.end();
+       I != E; ++I) { C += *I; }
+  BaseLine.stopTimer();
+  volatile char DontOptimizeOut = C; (void)DontOptimizeOut;
+
+  llvm::Timer Parsing((Name + ": Parsing").str(), Group);
+  Parsing.startTimer();
+  llvm::SourceMgr SM;
+  llvm::JSONParser Parser(JSONText, &SM);
+  if (!Parser.validate()) {
+    llvm::errs() << "Parsing error in JSON parser benchmark.\n";
+    exit(1);
+  }
+  Parsing.stopTimer();
+}
+
+std::string createJSONText(size_t MemoryMB, unsigned ValueSize) {
+  std::string JSONText;
+  llvm::raw_string_ostream Stream(JSONText);
+  Stream << "[\n";
+  size_t MemoryBytes = MemoryMB * 1024 * 1024;
+  while (JSONText.size() < MemoryBytes) {
+    Stream << " {\n"
+           << "  \"key1\": \"" << std::string(ValueSize, '*') << "\",\n"
+           << "  \"key2\": \"" << std::string(ValueSize, '*') << "\",\n"
+           << "  \"key3\": \"" << std::string(ValueSize, '*') << "\"\n"
+           << " }";
+    Stream.flush();
+    if (JSONText.size() < MemoryBytes) Stream << ",";
+    Stream << "\n";
+  }
+  Stream << "]\n";
+  Stream.flush();
+  return JSONText;
+}
+
+int main(int argc, char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv);
+  llvm::TimerGroup Group("JSON parser benchmark");
+  if (Verify) {
+    benchmark(Group, "Fast", createJSONText(10, 500));
+  } else {
+    benchmark(Group, "Small Values", createJSONText(MemoryLimitMB, 5));
+    benchmark(Group, "Medium Values", createJSONText(MemoryLimitMB, 500));
+    benchmark(Group, "Large Values", createJSONText(MemoryLimitMB, 50000));
+  }
+  return 0;
+}
+
diff --git a/utils/json-bench/Makefile b/utils/json-bench/Makefile
new file mode 100644
index 000000000000..6651626f683b
--- /dev/null
+++ b/utils/json-bench/Makefile
@@ -0,0 +1,21 @@
+##===- utils/FileCheck/Makefile ----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = json-bench
+USEDLIBS = LLVMSupport.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg b/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
index e7ef037663a3..e9df1e5b53bf 100644
--- a/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
+++ b/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
@@ -75,16 +75,6 @@ for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
     if m:
         site_exp[m.group(1)] = m.group(2)
 
-# Add substitutions.
-for sub in ['prcontext', 'llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
-            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
-            'bugpoint_topts']:
-    if sub in ('llvmgcc', 'llvmgxx'):
-        config.substitutions.append(('%' + sub,
-                                     site_exp[sub] + ' -emit-llvm -w'))
-    else:
-        config.substitutions.append(('%' + sub, site_exp[sub]))
-
 excludes = []
 
 # Provide target_triple for use in XFAIL and XTARGET.
@@ -95,10 +85,6 @@ targets = set(site_exp["TARGETS_TO_BUILD"].split())
 def llvm_supports_target(name):
     return name in targets
 
-langs = set(site_exp['llvmgcc_langs'].split(','))
-def llvm_gcc_supports(name):
-    return name in langs
-
 # Provide on_clone hook for reading 'dg.exp'.
 import os
 simpleLibData = re.compile(r"""load_lib llvm.exp
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp b/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
index efa839e9ba0a..4bc58d757990 100644
--- a/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
+++ b/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
@@ -2,27 +2,9 @@
 # Do not edit here.  If you wish to override these values
 # edit the last section
 set target_triplet "x86_64-apple-darwin10"
-set TARGETS_TO_BUILD "X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend"
-set llvmgcc_langs "c,c++,objc,obj-c++"
-set prcontext "/usr/bin/tclsh8.4 /Volumes/Data/ddunbar/llvm/test/Scripts/prcontext.tcl"
-set llvmtoolsdir "/Users/ddunbar/llvm.obj.64/Debug/bin"
-set llvmlibsdir "/Users/ddunbar/llvm.obj.64/Debug/lib"
+set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips CellSPU PIC16 XCore MSP430 Blackfin MSIL CppBackend"
 set srcroot "/Volumes/Data/ddunbar/llvm"
 set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
 set srcdir "/Volumes/Data/ddunbar/llvm/test"
 set objdir "/Volumes/Data/ddunbar/llvm.obj.64/test"
-set gccpath "/usr/bin/gcc -arch x86_64"
-set gxxpath "/usr/bin/g++ -arch x86_64"
-set compile_c " /usr/bin/gcc -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
-set compile_cxx " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
-set link " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -g -L/Users/ddunbar/llvm.obj.64/Debug/lib -L/Volumes/Data/ddunbar/llvm.obj.64/Debug/lib "
-set llvmgcc "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
-set llvmgxx "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
-set bugpoint_topts "-gcc-tool-args -m64"
-set shlibext ".dylib"
-set ocamlopt "/sw/bin/ocamlopt -cc \"g++ -Wall -D_FILE_OFFSET_BITS=64 -D_REENTRANT\" -I /Users/ddunbar/llvm.obj.64/Debug/lib/ocaml"
-set valgrind ""
-set grep "/usr/bin/grep"
-set gas "/usr/bin/as"
-set llvmdsymutil "dsymutil"
 ## All variables above are generated by configure. Do Not Edit ## 
diff --git a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
index efa839e9ba0a..4bc58d757990 100644
--- a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
+++ b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
@@ -2,27 +2,9 @@
 # Do not edit here.  If you wish to override these values
 # edit the last section
 set target_triplet "x86_64-apple-darwin10"
-set TARGETS_TO_BUILD "X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend"
-set llvmgcc_langs "c,c++,objc,obj-c++"
-set prcontext "/usr/bin/tclsh8.4 /Volumes/Data/ddunbar/llvm/test/Scripts/prcontext.tcl"
-set llvmtoolsdir "/Users/ddunbar/llvm.obj.64/Debug/bin"
-set llvmlibsdir "/Users/ddunbar/llvm.obj.64/Debug/lib"
+set TARGETS_TO_BUILD "X86 Sparc PowerPC ARM Mips CellSPU PIC16 XCore MSP430 Blackfin MSIL CppBackend"
 set srcroot "/Volumes/Data/ddunbar/llvm"
 set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
 set srcdir "/Volumes/Data/ddunbar/llvm/test"
 set objdir "/Volumes/Data/ddunbar/llvm.obj.64/test"
-set gccpath "/usr/bin/gcc -arch x86_64"
-set gxxpath "/usr/bin/g++ -arch x86_64"
-set compile_c " /usr/bin/gcc -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
-set compile_cxx " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
-set link " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -g -L/Users/ddunbar/llvm.obj.64/Debug/lib -L/Volumes/Data/ddunbar/llvm.obj.64/Debug/lib "
-set llvmgcc "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
-set llvmgxx "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
-set bugpoint_topts "-gcc-tool-args -m64"
-set shlibext ".dylib"
-set ocamlopt "/sw/bin/ocamlopt -cc \"g++ -Wall -D_FILE_OFFSET_BITS=64 -D_REENTRANT\" -I /Users/ddunbar/llvm.obj.64/Debug/lib/ocaml"
-set valgrind ""
-set grep "/usr/bin/grep"
-set gas "/usr/bin/as"
-set llvmdsymutil "dsymutil"
 ## All variables above are generated by configure. Do Not Edit ## 
diff --git a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
index e7ef037663a3..e9df1e5b53bf 100644
--- a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
+++ b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
@@ -75,16 +75,6 @@ for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
     if m:
         site_exp[m.group(1)] = m.group(2)
 
-# Add substitutions.
-for sub in ['prcontext', 'llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
-            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
-            'bugpoint_topts']:
-    if sub in ('llvmgcc', 'llvmgxx'):
-        config.substitutions.append(('%' + sub,
-                                     site_exp[sub] + ' -emit-llvm -w'))
-    else:
-        config.substitutions.append(('%' + sub, site_exp[sub]))
-
 excludes = []
 
 # Provide target_triple for use in XFAIL and XTARGET.
@@ -95,10 +85,6 @@ targets = set(site_exp["TARGETS_TO_BUILD"].split())
 def llvm_supports_target(name):
     return name in targets
 
-langs = set(site_exp['llvmgcc_langs'].split(','))
-def llvm_gcc_supports(name):
-    return name in langs
-
 # Provide on_clone hook for reading 'dg.exp'.
 import os
 simpleLibData = re.compile(r"""load_lib llvm.exp
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index 2cc278111991..c71c0ccdea9b 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -61,6 +61,8 @@ class LitConfig:
         """load_config(config, path) - Load a config object from an alternate
         path."""
         from TestingConfig import TestingConfig
+        if self.debug:
+            self.note('load_config from %r' % path)
         return TestingConfig.frompath(path, config.parent, self,
                                       mustExist = True,
                                       config = config)
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index f5f7c19891b3..b5f7986bfe7c 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -23,6 +23,56 @@ kUseCloseFDs = not kIsWindows
 # Use temporary files to replace /dev/null on Windows.
 kAvoidDevNull = kIsWindows
 
+# Negate if win32file is not found.
+kHaveWin32File = kIsWindows
+
+def RemoveForce(f):
+    try:
+        os.remove(f)
+    except OSError:
+        pass
+
+def WinWaitReleased(f):
+    global kHaveWin32File
+    if not kHaveWin32File:
+        return
+    try:
+        import time
+        import win32file, pywintypes
+        retry_cnt = 256
+        while True:
+            try:
+                h = win32file.CreateFile(
+                    f,
+                    win32file.GENERIC_READ,
+                    0, # Exclusive
+                    None,
+                    win32file.OPEN_EXISTING,
+                    win32file.FILE_ATTRIBUTE_NORMAL,
+                    None)
+                h.close()
+                return
+            except WindowsError, (winerror, strerror):
+                retry_cnt = retry_cnt - 1
+                if retry_cnt <= 0:
+                    raise
+                elif winerror == 32: # ERROR_SHARING_VIOLATION
+                    pass
+                else:
+                    raise
+            except pywintypes.error, e:
+                retry_cnt = retry_cnt - 1
+                if retry_cnt <= 0:
+                    raise
+                elif e[0]== 32: # ERROR_SHARING_VIOLATION
+                    pass
+                else:
+                    raise
+            time.sleep(0.01)
+    except ImportError, e:
+        kHaveWin32File = False
+        return
+
 def executeCommand(command, cwd=None, env=None):
     p = subprocess.Popen(command, cwd=cwd,
                          stdin=subprocess.PIPE,
@@ -115,6 +165,7 @@ def executeShCmd(cmd, cfg, cwd, results):
             else:
                 if r[2] is None:
                     if kAvoidDevNull and r[0] == '/dev/null':
+                        r[0] = None
                         r[2] = tempfile.TemporaryFile(mode=r[1])
                     else:
                         r[2] = open(r[0], r[1])
@@ -123,7 +174,7 @@ def executeShCmd(cmd, cfg, cwd, results):
                     # FIXME: Actually, this is probably an instance of PR6753.
                     if r[1] == 'a':
                         r[2].seek(0, 2)
-                    opened_files.append(r[2])
+                    opened_files.append(r)
                 result = r[2]
             final_redirects.append(result)
 
@@ -185,7 +236,7 @@ def executeShCmd(cmd, cfg, cwd, results):
     # on Win32, for example). Since we have already spawned the subprocess, our
     # handles have already been transferred so we do not need them anymore.
     for f in opened_files:
-        f.close()
+        f[2].close()
 
     # FIXME: There is probably still deadlock potential here. Yawn.
     procData = [None] * len(procs)
@@ -224,12 +275,15 @@ def executeShCmd(cmd, cfg, cwd, results):
         else:
             exitCode = res
 
+    # Make sure opened_files is released by other (child) processes.
+    if kIsWindows:
+        for f in opened_files:
+            if f[0] is not None:
+                WinWaitReleased(f[0])
+
     # Remove any named temporary files we created.
     for f in named_temp_files:
-        try:
-            os.remove(f)
-        except OSError:
-            pass
+        RemoveForce(f)
 
     if cmd.negate:
         exitCode = not exitCode
@@ -383,7 +437,8 @@ def isExpectedFail(xfails, xtargets, target_triple):
 
     return True
 
-def parseIntegratedTestScript(test, normalize_slashes=False):
+def parseIntegratedTestScript(test, normalize_slashes=False,
+                              extra_substitutions=[]):
     """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
     script and extract the lines to 'RUN' as well as 'XFAIL' and 'XTARGET'
     information. The RUN lines also will have variable substitution performed.
@@ -410,11 +465,13 @@ def parseIntegratedTestScript(test, normalize_slashes=False):
         tmpBase = tmpBase.replace('\\', '/')
 
     # We use #_MARKER_# to hide %% while we do the other substitutions.
-    substitutions = [('%%', '#_MARKER_#')]
+    substitutions = list(extra_substitutions)
+    substitutions.extend([('%%', '#_MARKER_#')])
     substitutions.extend(test.config.substitutions)
     substitutions.extend([('%s', sourcepath),
                           ('%S', sourcedir),
                           ('%p', sourcedir),
+                          ('%{pathsep}', os.pathsep),
                           ('%t', tmpBase + '.tmp'),
                           ('%T', tmpDir),
                           # FIXME: Remove this once we kill DejaGNU.
@@ -557,11 +614,12 @@ def executeTclTest(test, litConfig):
 
     return formatTestOutput(status, out, err, exitCode, failDueToStderr, script)
 
-def executeShTest(test, litConfig, useExternalSh):
+def executeShTest(test, litConfig, useExternalSh,
+                  extra_substitutions=[]):
     if test.config.unsupported:
         return (Test.UNSUPPORTED, 'Test is unsupported')
 
-    res = parseIntegratedTestScript(test, useExternalSh)
+    res = parseIntegratedTestScript(test, useExternalSh, extra_substitutions)
     if len(res) == 2:
         return res
 
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index a92dca8fb1b1..223120c4fe22 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -16,11 +16,12 @@ class TestingConfig:
                 'PATH' : os.pathsep.join(litConfig.path +
                                          [os.environ.get('PATH','')]),
                 'SYSTEMROOT' : os.environ.get('SYSTEMROOT',''),
+                'LLVM_DISABLE_CRASH_REPORT' : '1',
                 }
 
             if sys.platform == 'win32':
                 environment.update({
-                        'LLVM_DISABLE_CRT_DEBUG' : '1',
+                        'INCLUDE' : os.environ.get('INCLUDE',''),
                         'PATHEXT' : os.environ.get('PATHEXT',''),
                         'PYTHONUNBUFFERED' : '1',
                         'TEMP' : os.environ.get('TEMP',''),
@@ -50,14 +51,19 @@ class TestingConfig:
             cfg_globals['__file__'] = path
             try:
                 exec f in cfg_globals
+                if litConfig.debug:
+                    litConfig.note('... loaded config %r' % path)
             except SystemExit,status:
                 # We allow normal system exit inside a config file to just
                 # return control without error.
                 if status.args:
                     raise
             f.close()
-        elif mustExist:
-            litConfig.fatal('unable to load config from %r ' % path)
+        else:
+            if mustExist:
+                litConfig.fatal('unable to load config from %r ' % path)
+            elif litConfig.debug:
+                litConfig.note('... config not found  - %r' %path)
 
         config.finish(litConfig)
         return config
@@ -108,3 +114,12 @@ class TestingConfig:
             # files. Should we distinguish them?
             self.test_source_root = str(self.test_source_root)
         self.excludes = set(self.excludes)
+
+    @property
+    def root(self):
+        """root attribute - The root configuration for the test suite."""
+        if self.parent is None:
+            return self
+        else:
+            return self.parent.root
+
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index e1a380c3fcbc..039868da7860 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -429,6 +429,10 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
     group.add_option("", "--shuffle", dest="shuffle",
                      help="Run tests in random order",
                      action="store_true", default=False)
+    group.add_option("", "--filter", dest="filter", metavar="EXPRESSION",
+                     help=("Only run tests with paths matching the given "
+                           "regular expression"),
+                     action="store", default=None)
     parser.add_option_group(group)
 
     group = OptionGroup(parser, "Debug and Experimental Options")
@@ -452,9 +456,10 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
         parser.error('No inputs specified')
 
     if opts.configPrefix is not None:
-        global gConfigName, gSiteConfigName
+        global gConfigName, gSiteConfigName, kLocalConfigName
         gConfigName = '%s.cfg' % opts.configPrefix
         gSiteConfigName = '%s.site.cfg' % opts.configPrefix
+        kLocalConfigName = '%s.local.cfg' % opts.configPrefix
 
     if opts.numThreads is None:
 # Python <2.5 has a race condition causing lit to always fail with numThreads>1
@@ -540,10 +545,24 @@ def main(builtinParameters = {}):    # Bump the GIL check interval, its more imp
 
     # Select and order the tests.
     numTotalTests = len(tests)
+
+    # First, select based on the filter expression if given.
+    if opts.filter:
+        try:
+            rex = re.compile(opts.filter)
+        except:
+            parser.error("invalid regular expression for --filter: %r" % (
+                    opts.filter))
+        tests = [t for t in tests
+                 if rex.search(t.getFullName())]
+
+    # Then select the order.
     if opts.shuffle:
         random.shuffle(tests)
     else:
         tests.sort(key = lambda t: t.getFullName())
+
+    # Finally limit the number of tests, if desired.
     if opts.maxTests is not None:
         tests = tests[:opts.maxTests]
 
diff --git a/utils/lldbDataFormatters.py b/utils/lldbDataFormatters.py
new file mode 100644
index 000000000000..18b407a02a63
--- /dev/null
+++ b/utils/lldbDataFormatters.py
@@ -0,0 +1,53 @@
+"""
+Load into LLDB with:
+script import lldbDataFormatters
+type synthetic add -x "^llvm::SmallVectorImpl<.+>$" -l lldbDataFormatters.SmallVectorSynthProvider
+"""
+
+# Pretty printer for llvm::SmallVector/llvm::SmallVectorImpl
+class SmallVectorSynthProvider:
+    def __init__(self, valobj, dict):
+        self.valobj = valobj;
+        self.update() # initialize this provider
+
+    def num_children(self):
+        begin = self.begin.GetValueAsUnsigned(0)
+        end = self.end.GetValueAsUnsigned(0)
+        return (end - begin)/self.type_size
+
+    def get_child_index(self, name):
+        try:
+            return int(name.lstrip('[').rstrip(']'))
+        except:
+            return -1;
+
+    def get_child_at_index(self, index):
+        # Do bounds checking.
+        if index < 0:
+            return None
+        if index >= self.num_children():
+            return None;
+
+        offset = index * self.type_size
+        return self.begin.CreateChildAtOffset('['+str(index)+']',
+                                              offset, self.data_type)
+
+    def get_type_from_name(self):
+        import re
+        name = self.valobj.GetType().GetName()
+        # This class works with both SmallVectors and SmallVectorImpls.
+        res = re.match("^(llvm::)?SmallVectorImpl<(.+)>$", name)
+        if res:
+            return res.group(2)
+        res = re.match("^(llvm::)?SmallVector<(.+), \d+>$", name)
+        if res:
+            return res.group(2)
+        return None
+
+    def update(self):
+        self.begin = self.valobj.GetChildMemberWithName('BeginX')
+        self.end = self.valobj.GetChildMemberWithName('EndX')
+        data_type = self.get_type_from_name()
+        # FIXME: this sometimes returns an invalid type.
+        self.data_type = self.valobj.GetTarget().FindFirstType(data_type)
+        self.type_size = self.data_type.GetByteSize()
diff --git a/utils/llvm-build/README.txt b/utils/llvm-build/README.txt
new file mode 100644
index 000000000000..b6bcaae0f1df
--- /dev/null
+++ b/utils/llvm-build/README.txt
@@ -0,0 +1,5 @@
+==============================
+ llvm-build - LLVM Build Tool
+==============================
+
+`llvm-build` is a tool for helping build the LLVM project.
diff --git a/utils/llvm-build/llvm-build b/utils/llvm-build/llvm-build
new file mode 100755
index 000000000000..7377e3d3fed7
--- /dev/null
+++ b/utils/llvm-build/llvm-build
@@ -0,0 +1,6 @@
+#!/usr/bin/env python
+
+import llvmbuild
+
+if __name__ == '__main__':
+   llvmbuild.main()
diff --git a/utils/llvm-build/llvmbuild/__init__.py b/utils/llvm-build/llvmbuild/__init__.py
new file mode 100644
index 000000000000..776021897337
--- /dev/null
+++ b/utils/llvm-build/llvmbuild/__init__.py
@@ -0,0 +1 @@
+from main import main
diff --git a/utils/llvm-build/llvmbuild/componentinfo.py b/utils/llvm-build/llvmbuild/componentinfo.py
new file mode 100644
index 000000000000..230ae219f2f2
--- /dev/null
+++ b/utils/llvm-build/llvmbuild/componentinfo.py
@@ -0,0 +1,428 @@
+"""
+Descriptor objects for entities that are part of the LLVM project.
+"""
+
+import ConfigParser
+import StringIO
+import sys
+
+from util import *
+
+class ParseError(Exception):
+    pass
+
+class ComponentInfo(object):
+    """
+    Base class for component descriptions.
+    """
+
+    type_name = None
+
+    @staticmethod
+    def parse_items(items, has_dependencies = True):
+        kwargs = {}
+        kwargs['name'] = items.get_string('name')
+        kwargs['parent'] = items.get_optional_string('parent')
+        if has_dependencies:
+            kwargs['dependencies'] = items.get_list('dependencies')
+        return kwargs
+
+    def __init__(self, subpath, name, dependencies, parent):
+        if not subpath.startswith('/'):
+            raise ValueError,"invalid subpath: %r" % subpath
+        self.subpath = subpath
+        self.name = name
+        self.dependencies = list(dependencies)
+
+        # The name of the parent component to logically group this component
+        # under.
+        self.parent = parent
+
+        # The parent instance, once loaded.
+        self.parent_instance = None
+        self.children = []
+
+        # The original source path.
+        self._source_path = None
+
+        # A flag to mark "special" components which have some amount of magic
+        # handling (generally based on command line options).
+        self._is_special_group = False
+
+    def set_parent_instance(self, parent):
+        assert parent.name == self.parent, "Unexpected parent!"
+        self.parent_instance = parent
+        self.parent_instance.children.append(self)
+
+    def get_component_references(self):
+        """get_component_references() -> iter
+
+        Return an iterator over the named references to other components from
+        this object. Items are of the form (reference-type, component-name).
+        """
+
+        # Parent references are handled specially.
+        for r in self.dependencies:
+            yield ('dependency', r)
+
+    def get_llvmbuild_fragment(self):
+        abstract
+
+class GroupComponentInfo(ComponentInfo):
+    """
+    Group components have no semantics as far as the build system are concerned,
+    but exist to help organize other components into a logical tree structure.
+    """
+
+    type_name = 'Group'
+
+    @staticmethod
+    def parse(subpath, items):
+        kwargs = ComponentInfo.parse_items(items, has_dependencies = False)
+        return GroupComponentInfo(subpath, **kwargs)
+
+    def __init__(self, subpath, name, parent):
+        ComponentInfo.__init__(self, subpath, name, [], parent)
+
+    def get_llvmbuild_fragment(self):
+        result = StringIO.StringIO()
+        print >>result, 'type = %s' % self.type_name
+        print >>result, 'name = %s' % self.name
+        print >>result, 'parent = %s' % self.parent
+        return result.getvalue()
+
+class LibraryComponentInfo(ComponentInfo):
+    type_name = 'Library'
+
+    @staticmethod
+    def parse(subpath, items):
+        kwargs = ComponentInfo.parse_items(items)
+        kwargs['library_name'] = items.get_optional_string('library_name')
+        kwargs['required_libraries'] = items.get_list('required_libraries')
+        kwargs['add_to_library_groups'] = items.get_list(
+            'add_to_library_groups')
+        return LibraryComponentInfo(subpath, **kwargs)
+
+    def __init__(self, subpath, name, dependencies, parent, library_name,
+                 required_libraries, add_to_library_groups):
+        ComponentInfo.__init__(self, subpath, name, dependencies, parent)
+
+        # If given, the name to use for the library instead of deriving it from
+        # the component name.
+        self.library_name = library_name
+
+        # The names of the library components which are required when linking
+        # with this component.
+        self.required_libraries = list(required_libraries)
+
+        # The names of the library group components this component should be
+        # considered part of.
+        self.add_to_library_groups = list(add_to_library_groups)
+
+    def get_component_references(self):
+        for r in ComponentInfo.get_component_references(self):
+            yield r
+        for r in self.required_libraries:
+            yield ('required library', r)
+        for r in self.add_to_library_groups:
+            yield ('library group', r)
+
+    def get_llvmbuild_fragment(self):
+        result = StringIO.StringIO()
+        print >>result, 'type = %s' % self.type_name
+        print >>result, 'name = %s' % self.name
+        print >>result, 'parent = %s' % self.parent
+        if self.library_name is not None:
+            print >>result, 'library_name = %s' % self.library_name
+        if self.required_libraries:
+            print >>result, 'required_libraries = %s' % ' '.join(
+                self.required_libraries)
+        if self.add_to_library_groups:
+            print >>result, 'add_to_library_groups = %s' % ' '.join(
+                self.add_to_library_groups)
+        return result.getvalue()
+
+    def get_library_name(self):
+        return self.library_name or self.name
+
+    def get_prefixed_library_name(self):
+        """
+        get_prefixed_library_name() -> str
+
+        Return the library name prefixed by the project name. This is generally
+        what the library name will be on disk.
+        """
+
+        basename = self.get_library_name()
+
+        # FIXME: We need to get the prefix information from an explicit project
+        # object, or something.
+        if basename in ('gtest', 'gtest_main'):
+            return basename
+
+        return 'LLVM%s' % basename
+
+    def get_llvmconfig_component_name(self):
+        return self.get_library_name().lower()
+
+class LibraryGroupComponentInfo(ComponentInfo):
+    type_name = 'LibraryGroup'
+
+    @staticmethod
+    def parse(subpath, items):
+        kwargs = ComponentInfo.parse_items(items, has_dependencies = False)
+        kwargs['required_libraries'] = items.get_list('required_libraries')
+        kwargs['add_to_library_groups'] = items.get_list(
+            'add_to_library_groups')
+        return LibraryGroupComponentInfo(subpath, **kwargs)
+
+    def __init__(self, subpath, name, parent, required_libraries = [],
+                 add_to_library_groups = []):
+        ComponentInfo.__init__(self, subpath, name, [], parent)
+
+        # The names of the library components which are required when linking
+        # with this component.
+        self.required_libraries = list(required_libraries)
+
+        # The names of the library group components this component should be
+        # considered part of.
+        self.add_to_library_groups = list(add_to_library_groups)
+
+    def get_component_references(self):
+        for r in ComponentInfo.get_component_references(self):
+            yield r
+        for r in self.required_libraries:
+            yield ('required library', r)
+        for r in self.add_to_library_groups:
+            yield ('library group', r)
+
+    def get_llvmbuild_fragment(self):
+        result = StringIO.StringIO()
+        print >>result, 'type = %s' % self.type_name
+        print >>result, 'name = %s' % self.name
+        print >>result, 'parent = %s' % self.parent
+        if self.required_libraries and not self._is_special_group:
+            print >>result, 'required_libraries = %s' % ' '.join(
+                self.required_libraries)
+        if self.add_to_library_groups:
+            print >>result, 'add_to_library_groups = %s' % ' '.join(
+                self.add_to_library_groups)
+        return result.getvalue()
+
+    def get_llvmconfig_component_name(self):
+        return self.name.lower()
+
+class TargetGroupComponentInfo(ComponentInfo):
+    type_name = 'TargetGroup'
+
+    @staticmethod
+    def parse(subpath, items):
+        kwargs = ComponentInfo.parse_items(items, has_dependencies = False)
+        kwargs['required_libraries'] = items.get_list('required_libraries')
+        kwargs['add_to_library_groups'] = items.get_list(
+            'add_to_library_groups')
+        kwargs['has_jit'] = items.get_optional_bool('has_jit', False)
+        kwargs['has_asmprinter'] = items.get_optional_bool('has_asmprinter',
+                                                           False)
+        kwargs['has_asmparser'] = items.get_optional_bool('has_asmparser',
+                                                          False)
+        kwargs['has_disassembler'] = items.get_optional_bool('has_disassembler',
+                                                             False)
+        return TargetGroupComponentInfo(subpath, **kwargs)
+
+    def __init__(self, subpath, name, parent, required_libraries = [],
+                 add_to_library_groups = [], has_jit = False,
+                 has_asmprinter = False, has_asmparser = False,
+                 has_disassembler = False):
+        ComponentInfo.__init__(self, subpath, name, [], parent)
+
+        # The names of the library components which are required when linking
+        # with this component.
+        self.required_libraries = list(required_libraries)
+
+        # The names of the library group components this component should be
+        # considered part of.
+        self.add_to_library_groups = list(add_to_library_groups)
+
+        # Whether or not this target supports the JIT.
+        self.has_jit = bool(has_jit)
+
+        # Whether or not this target defines an assembly printer.
+        self.has_asmprinter = bool(has_asmprinter)
+
+        # Whether or not this target defines an assembly parser.
+        self.has_asmparser = bool(has_asmparser)
+
+        # Whether or not this target defines an disassembler.
+        self.has_disassembler = bool(has_disassembler)
+
+        # Whether or not this target is enabled. This is set in response to
+        # configuration parameters.
+        self.enabled = False
+
+    def get_component_references(self):
+        for r in ComponentInfo.get_component_references(self):
+            yield r
+        for r in self.required_libraries:
+            yield ('required library', r)
+        for r in self.add_to_library_groups:
+            yield ('library group', r)
+
+    def get_llvmbuild_fragment(self):
+        result = StringIO.StringIO()
+        print >>result, 'type = %s' % self.type_name
+        print >>result, 'name = %s' % self.name
+        print >>result, 'parent = %s' % self.parent
+        if self.required_libraries:
+            print >>result, 'required_libraries = %s' % ' '.join(
+                self.required_libraries)
+        if self.add_to_library_groups:
+            print >>result, 'add_to_library_groups = %s' % ' '.join(
+                self.add_to_library_groups)
+        for bool_key in ('has_asmparser', 'has_asmprinter', 'has_disassembler',
+                         'has_jit'):
+            if getattr(self, bool_key):
+                print >>result, '%s = 1' % (bool_key,)
+        return result.getvalue()
+
+    def get_llvmconfig_component_name(self):
+        return self.name.lower()
+
+class ToolComponentInfo(ComponentInfo):
+    type_name = 'Tool'
+
+    @staticmethod
+    def parse(subpath, items):
+        kwargs = ComponentInfo.parse_items(items)
+        kwargs['required_libraries'] = items.get_list('required_libraries')
+        return ToolComponentInfo(subpath, **kwargs)
+
+    def __init__(self, subpath, name, dependencies, parent,
+                 required_libraries):
+        ComponentInfo.__init__(self, subpath, name, dependencies, parent)
+
+        # The names of the library components which are required to link this
+        # tool.
+        self.required_libraries = list(required_libraries)
+
+    def get_component_references(self):
+        for r in ComponentInfo.get_component_references(self):
+            yield r
+        for r in self.required_libraries:
+            yield ('required library', r)
+
+    def get_llvmbuild_fragment(self):
+        result = StringIO.StringIO()
+        print >>result, 'type = %s' % self.type_name
+        print >>result, 'name = %s' % self.name
+        print >>result, 'parent = %s' % self.parent
+        print >>result, 'required_libraries = %s' % ' '.join(
+            self.required_libraries)
+        return result.getvalue()
+
+class BuildToolComponentInfo(ToolComponentInfo):
+    type_name = 'BuildTool'
+
+    @staticmethod
+    def parse(subpath, items):
+        kwargs = ComponentInfo.parse_items(items)
+        kwargs['required_libraries'] = items.get_list('required_libraries')
+        return BuildToolComponentInfo(subpath, **kwargs)
+
+###
+
+class IniFormatParser(dict):
+    def get_list(self, key):
+        # Check if the value is defined.
+        value = self.get(key)
+        if value is None:
+            return []
+
+        # Lists are just whitespace separated strings.
+        return value.split()
+
+    def get_optional_string(self, key):
+        value = self.get_list(key)
+        if not value:
+            return None
+        if len(value) > 1:
+            raise ParseError("multiple values for scalar key: %r" % key)
+        return value[0]
+
+    def get_string(self, key):
+        value = self.get_optional_string(key)
+        if not value:
+            raise ParseError("missing value for required string: %r" % key)
+        return value
+
+    def get_optional_bool(self, key, default = None):
+        value = self.get_optional_string(key)
+        if not value:
+            return default
+        if value not in ('0', '1'):
+            raise ParseError("invalid value(%r) for boolean property: %r" % (
+                    value, key))
+        return bool(int(value))
+
+    def get_bool(self, key):
+        value = self.get_optional_bool(key)
+        if value is None:
+            raise ParseError("missing value for required boolean: %r" % key)
+        return value
+
+_component_type_map = dict(
+    (t.type_name, t)
+    for t in (GroupComponentInfo,
+              LibraryComponentInfo, LibraryGroupComponentInfo,
+              ToolComponentInfo, BuildToolComponentInfo,
+              TargetGroupComponentInfo))
+def load_from_path(path, subpath):
+    # Load the LLVMBuild.txt file as an .ini format file.
+    parser = ConfigParser.RawConfigParser()
+    parser.read(path)
+
+    # Extract the common section.
+    if parser.has_section("common"):
+        common = IniFormatParser(parser.items("common"))
+        parser.remove_section("common")
+    else:
+        common = IniFormatParser({})
+
+    return common, _read_components_from_parser(parser, path, subpath)
+
+def _read_components_from_parser(parser, path, subpath):
+    # We load each section which starts with 'component' as a distinct component
+    # description (so multiple components can be described in one file).
+    for section in parser.sections():
+        if not section.startswith('component'):
+            # We don't expect arbitrary sections currently, warn the user.
+            warning("ignoring unknown section %r in %r" % (section, path))
+            continue
+
+        # Determine the type of the component to instantiate.
+        if not parser.has_option(section, 'type'):
+            fatal("invalid component %r in %r: %s" % (
+                    section, path, "no component type"))
+
+        type_name = parser.get(section, 'type')
+        type_class = _component_type_map.get(type_name)
+        if type_class is None:
+            fatal("invalid component %r in %r: %s" % (
+                    section, path, "invalid component type: %r" % type_name))
+
+        # Instantiate the component based on the remaining values.
+        try:
+            info = type_class.parse(subpath,
+                                    IniFormatParser(parser.items(section)))
+        except TypeError:
+            print >>sys.stderr, "error: invalid component %r in %r: %s" % (
+                section, path, "unable to instantiate: %r" % type_name)
+            import traceback
+            traceback.print_exc()
+            raise SystemExit, 1
+        except ParseError,e:
+            fatal("unable to load component %r in %r: %s" % (
+                    section, path, e.message))
+
+        info._source_path = path
+        yield info
diff --git a/utils/llvm-build/llvmbuild/configutil.py b/utils/llvm-build/llvmbuild/configutil.py
new file mode 100644
index 000000000000..b5582c34de46
--- /dev/null
+++ b/utils/llvm-build/llvmbuild/configutil.py
@@ -0,0 +1,66 @@
+"""
+Defines utilities useful for performing standard "configuration" style tasks.
+"""
+
+import re
+import os
+
+def configure_file(input_path, output_path, substitutions):
+    """configure_file(input_path, output_path, substitutions) -> bool
+
+    Given an input and output path, "configure" the file at the given input path
+    by replacing variables in the file with those given in the substitutions
+    list. Returns true if the output file was written.
+
+    The substitutions list should be given as a list of tuples (regex string,
+    replacement), where the regex and replacement will be used as in 're.sub' to
+    execute the variable replacement.
+
+    The output path's parent directory need not exist (it will be created).
+
+    If the output path does exist and the configured data is not different than
+    it's current contents, the output file will not be modified. This is
+    designed to limit the impact of configured files on build dependencies.
+    """
+
+    # Read in the input data.
+    f = open(input_path, "rb")
+    try:
+        data = f.read()
+    finally:
+        f.close()
+
+    # Perform the substitutions.
+    for regex_string,replacement in substitutions:
+        regex = re.compile(regex_string)
+        data = regex.sub(replacement, data)
+
+    # Ensure the output parent directory exists.
+    output_parent_path = os.path.dirname(os.path.abspath(output_path))
+    if not os.path.exists(output_parent_path):
+        os.makedirs(output_parent_path)
+
+    # If the output path exists, load it and compare to the configured contents.
+    if os.path.exists(output_path):
+        current_data = None
+        try:
+            f = open(output_path, "rb")
+            try:
+                current_data = f.read()
+            except:
+                current_data = None
+            f.close()
+        except:
+            current_data = None
+
+        if current_data is not None and current_data == data:
+            return False
+
+    # Write the output contents.
+    f = open(output_path, "wb")
+    try:
+        f.write(data)
+    finally:
+        f.close()
+
+    return True
diff --git a/utils/llvm-build/llvmbuild/main.py b/utils/llvm-build/llvmbuild/main.py
new file mode 100644
index 000000000000..36bca872e5f1
--- /dev/null
+++ b/utils/llvm-build/llvmbuild/main.py
@@ -0,0 +1,868 @@
+import StringIO
+import os
+import sys
+
+import componentinfo
+import configutil
+
+from util import *
+
+###
+
+def cmake_quote_string(value):
+    """
+    cmake_quote_string(value) -> str
+
+    Return a quoted form of the given value that is suitable for use in CMake
+    language files.
+    """
+
+    # Currently, we only handle escaping backslashes.
+    value = value.replace("\\", "\\\\")
+
+    return value
+
+def cmake_quote_path(value):
+    """
+    cmake_quote_path(value) -> str
+
+    Return a quoted form of the given value that is suitable for use in CMake
+    language files.
+    """
+
+    # CMake has a bug in it's Makefile generator that doesn't properly quote
+    # strings it generates. So instead of using proper quoting, we just use "/"
+    # style paths.  Currently, we only handle escaping backslashes.
+    value = value.replace("\\", "/")
+
+    return value
+
+def mk_quote_string_for_target(value):
+    """
+    mk_quote_string_for_target(target_name) -> str
+
+    Return a quoted form of the given target_name suitable for including in a 
+    Makefile as a target name.
+    """
+
+    # The only quoting we currently perform is for ':', to support msys users.
+    return value.replace(":", "\\:")
+
+def make_install_dir(path):
+    """
+    make_install_dir(path) -> None
+
+    Create the given directory path for installation, including any parents.
+    """
+
+    # os.makedirs considers it an error to be called with an existant path.
+    if not os.path.exists(path):
+        os.makedirs(path)
+
+###
+
+class LLVMProjectInfo(object):
+    @staticmethod
+    def load_infos_from_path(llvmbuild_source_root):
+        def recurse(subpath):
+            # Load the LLVMBuild file.
+            llvmbuild_path = os.path.join(llvmbuild_source_root + subpath,
+                                          'LLVMBuild.txt')
+            if not os.path.exists(llvmbuild_path):
+                fatal("missing LLVMBuild.txt file at: %r" % (llvmbuild_path,))
+
+            # Parse the components from it.
+            common,info_iter = componentinfo.load_from_path(llvmbuild_path,
+                                                            subpath)
+            for info in info_iter:
+                yield info
+
+            # Recurse into the specified subdirectories.
+            for subdir in common.get_list("subdirectories"):
+                for item in recurse(os.path.join(subpath, subdir)):
+                    yield item
+
+        return recurse("/")
+
+    @staticmethod
+    def load_from_path(source_root, llvmbuild_source_root):
+        infos = list(
+            LLVMProjectInfo.load_infos_from_path(llvmbuild_source_root))
+
+        return LLVMProjectInfo(source_root, infos)
+
+    def __init__(self, source_root, component_infos):
+        # Store our simple ivars.
+        self.source_root = source_root
+        self.component_infos = list(component_infos)
+        self.component_info_map = None
+        self.ordered_component_infos = None
+
+    def validate_components(self):
+        """validate_components() -> None
+
+        Validate that the project components are well-defined. Among other
+        things, this checks that:
+          - Components have valid references.
+          - Components references do not form cycles.
+
+        We also construct the map from component names to info, and the
+        topological ordering of components.
+        """
+
+        # Create the component info map and validate that component names are
+        # unique.
+        self.component_info_map = {}
+        for ci in self.component_infos:
+            existing = self.component_info_map.get(ci.name)
+            if existing is not None:
+                # We found a duplicate component name, report it and error out.
+                fatal("found duplicate component %r (at %r and %r)" % (
+                        ci.name, ci.subpath, existing.subpath))
+            self.component_info_map[ci.name] = ci
+
+        # Disallow 'all' as a component name, which is a special case.
+        if 'all' in self.component_info_map:
+            fatal("project is not allowed to define 'all' component")
+
+        # Add the root component.
+        if '$ROOT' in self.component_info_map:
+            fatal("project is not allowed to define $ROOT component")
+        self.component_info_map['$ROOT'] = componentinfo.GroupComponentInfo(
+            '/', '$ROOT', None)
+        self.component_infos.append(self.component_info_map['$ROOT'])
+
+        # Topologically order the component information according to their
+        # component references.
+        def visit_component_info(ci, current_stack, current_set):
+            # Check for a cycles.
+            if ci in current_set:
+                # We found a cycle, report it and error out.
+                cycle_description = ' -> '.join(
+                    '%r (%s)' % (ci.name, relation)
+                    for relation,ci in current_stack)
+                fatal("found cycle to %r after following: %s -> %s" % (
+                        ci.name, cycle_description, ci.name))
+
+            # If we have already visited this item, we are done.
+            if ci not in components_to_visit:
+                return
+
+            # Otherwise, mark the component info as visited and traverse.
+            components_to_visit.remove(ci)
+
+            # Validate the parent reference, which we treat specially.
+            if ci.parent is not None:
+                parent = self.component_info_map.get(ci.parent)
+                if parent is None:
+                    fatal("component %r has invalid reference %r (via %r)" % (
+                            ci.name, ci.parent, 'parent'))
+                ci.set_parent_instance(parent)
+
+            for relation,referent_name in ci.get_component_references():
+                # Validate that the reference is ok.
+                referent = self.component_info_map.get(referent_name)
+                if referent is None:
+                    fatal("component %r has invalid reference %r (via %r)" % (
+                            ci.name, referent_name, relation))
+
+                # Visit the reference.
+                current_stack.append((relation,ci))
+                current_set.add(ci)
+                visit_component_info(referent, current_stack, current_set)
+                current_set.remove(ci)
+                current_stack.pop()
+
+            # Finally, add the component info to the ordered list.
+            self.ordered_component_infos.append(ci)
+
+        # FIXME: We aren't actually correctly checking for cycles along the
+        # parent edges. Haven't decided how I want to handle this -- I thought
+        # about only checking cycles by relation type. If we do that, it falls
+        # out easily. If we don't, we should special case the check.
+
+        self.ordered_component_infos = []
+        components_to_visit = set(self.component_infos)
+        while components_to_visit:
+            visit_component_info(iter(components_to_visit).next(), [], set())
+
+        # Canonicalize children lists.
+        for c in self.ordered_component_infos:
+            c.children.sort(key = lambda c: c.name)
+
+    def print_tree(self):
+        def visit(node, depth = 0):
+            print '%s%-40s (%s)' % ('  '*depth, node.name, node.type_name)
+            for c in node.children:
+                visit(c, depth + 1)
+        visit(self.component_info_map['$ROOT'])
+
+    def write_components(self, output_path):
+        # Organize all the components by the directory their LLVMBuild file
+        # should go in.
+        info_basedir = {}
+        for ci in self.component_infos:
+            # Ignore the $ROOT component.
+            if ci.parent is None:
+                continue
+
+            info_basedir[ci.subpath] = info_basedir.get(ci.subpath, []) + [ci]
+
+        # Compute the list of subdirectories to scan.
+        subpath_subdirs = {}
+        for ci in self.component_infos:
+            # Ignore root components.
+            if ci.subpath == '/':
+                continue
+
+            # Otherwise, append this subpath to the parent list.
+            parent_path = os.path.dirname(ci.subpath)
+            subpath_subdirs[parent_path] = parent_list = subpath_subdirs.get(
+                parent_path, set())
+            parent_list.add(os.path.basename(ci.subpath))
+
+        # Generate the build files.
+        for subpath, infos in info_basedir.items():
+            # Order the components by name to have a canonical ordering.
+            infos.sort(key = lambda ci: ci.name)
+
+            # Format the components into llvmbuild fragments.
+            fragments = []
+
+            # Add the common fragments.
+            subdirectories = subpath_subdirs.get(subpath)
+            if subdirectories:
+                fragment = """\
+subdirectories = %s
+""" % (" ".join(sorted(subdirectories)),)
+                fragments.append(("common", fragment))
+
+            # Add the component fragments.
+            num_common_fragments = len(fragments)
+            for ci in infos:
+                fragment = ci.get_llvmbuild_fragment()
+                if fragment is None:
+                    continue
+
+                name = "component_%d" % (len(fragments) - num_common_fragments)
+                fragments.append((name, fragment))
+
+            if not fragments:
+                continue
+
+            assert subpath.startswith('/')
+            directory_path = os.path.join(output_path, subpath[1:])
+
+            # Create the directory if it does not already exist.
+            if not os.path.exists(directory_path):
+                os.makedirs(directory_path)
+
+            # In an effort to preserve comments (which aren't parsed), read in
+            # the original file and extract the comments. We only know how to
+            # associate comments that prefix a section name.
+            f = open(infos[0]._source_path)
+            comments_map = {}
+            comment_block = ""
+            for ln in f:
+                if ln.startswith(';'):
+                    comment_block += ln
+                elif ln.startswith('[') and ln.endswith(']\n'):
+                    comments_map[ln[1:-2]] = comment_block
+                else:
+                    comment_block = ""
+            f.close()
+
+            # Create the LLVMBuild fil[e.
+            file_path = os.path.join(directory_path, 'LLVMBuild.txt')
+            f = open(file_path, "w")
+
+            # Write the header.
+            header_fmt = ';===- %s %s-*- Conf -*--===;'
+            header_name = '.' + os.path.join(subpath, 'LLVMBuild.txt')
+            header_pad = '-' * (80 - len(header_fmt % (header_name, '')))
+            header_string = header_fmt % (header_name, header_pad)
+            print >>f, """\
+%s
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+""" % header_string
+
+            # Write out each fragment.each component fragment.
+            for name,fragment in fragments:
+                comment = comments_map.get(name)
+                if comment is not None:
+                    f.write(comment)
+                print >>f, "[%s]" % name
+                f.write(fragment)
+                if fragment is not fragments[-1][1]:
+                    print >>f
+
+            f.close()
+
+    def write_library_table(self, output_path):
+        # Write out the mapping from component names to required libraries.
+        #
+        # We do this in topological order so that we know we can append the
+        # dependencies for added library groups.
+        entries = {}
+        for c in self.ordered_component_infos:
+            # Only certain components are in the table.
+            if c.type_name not in ('Library', 'LibraryGroup', 'TargetGroup'):
+                continue
+
+            # Compute the llvm-config "component name". For historical reasons,
+            # this is lowercased based on the library name.
+            llvmconfig_component_name = c.get_llvmconfig_component_name()
+            
+            # Get the library name, or None for LibraryGroups.
+            if c.type_name == 'Library':
+                library_name = c.get_prefixed_library_name()
+            else:
+                library_name = None
+
+            # Get the component names of all the required libraries.
+            required_llvmconfig_component_names = [
+                self.component_info_map[dep].get_llvmconfig_component_name()
+                for dep in c.required_libraries]
+
+            # Insert the entries for library groups we should add to.
+            for dep in c.add_to_library_groups:
+                entries[dep][2].append(llvmconfig_component_name)
+
+            # Add the entry.
+            entries[c.name] = (llvmconfig_component_name, library_name,
+                               required_llvmconfig_component_names)
+
+        # Convert to a list of entries and sort by name.
+        entries = entries.values()
+
+        # Create an 'all' pseudo component. We keep the dependency list small by
+        # only listing entries that have no other dependents.
+        root_entries = set(e[0] for e in entries)
+        for _,_,deps in entries:
+            root_entries -= set(deps)
+        entries.append(('all', None, root_entries))
+
+        entries.sort()
+
+        # Compute the maximum number of required libraries, plus one so there is
+        # always a sentinel.
+        max_required_libraries = max(len(deps)
+                                     for _,_,deps in entries) + 1
+
+        # Write out the library table.
+        make_install_dir(os.path.dirname(output_path))
+        f = open(output_path, 'w')
+        print >>f, """\
+//===- llvm-build generated file --------------------------------*- C++ -*-===//
+//
+// Component Library Depenedency Table
+//
+// Automatically generated file, do not edit!
+//
+//===----------------------------------------------------------------------===//
+"""
+        print >>f, 'struct AvailableComponent {'
+        print >>f, '  /// The name of the component.'
+        print >>f, '  const char *Name;'
+        print >>f, ''
+        print >>f, '  /// The name of the library for this component (or NULL).'
+        print >>f, '  const char *Library;'
+        print >>f, ''
+        print >>f, '\
+  /// The list of libraries required when linking this component.'
+        print >>f, '  const char *RequiredLibraries[%d];' % (
+            max_required_libraries)
+        print >>f, '} AvailableComponents[%d] = {' % len(entries)
+        for name,library_name,required_names in entries:
+            if library_name is None:
+                library_name_as_cstr = '0'
+            else:
+                library_name_as_cstr = '"lib%s.a"' % library_name
+            print >>f, '  { "%s", %s, { %s } },' % (
+                name, library_name_as_cstr,
+                ', '.join('"%s"' % dep
+                          for dep in required_names))
+        print >>f, '};'
+        f.close()
+
+    def get_required_libraries_for_component(self, ci, traverse_groups = False):
+        """
+        get_required_libraries_for_component(component_info) -> iter
+
+        Given a Library component info descriptor, return an iterator over all
+        of the directly required libraries for linking with this component. If
+        traverse_groups is True, then library and target groups will be
+        traversed to include their required libraries.
+        """
+
+        assert ci.type_name in ('Library', 'LibraryGroup', 'TargetGroup')
+
+        for name in ci.required_libraries:
+            # Get the dependency info.
+            dep = self.component_info_map[name]
+
+            # If it is a library, yield it.
+            if dep.type_name == 'Library':
+                yield dep
+                continue
+
+            # Otherwise if it is a group, yield or traverse depending on what
+            # was requested.
+            if dep.type_name in ('LibraryGroup', 'TargetGroup'):
+                if not traverse_groups:
+                    yield dep
+                    continue
+
+                for res in self.get_required_libraries_for_component(dep, True):
+                    yield res
+
+    def get_fragment_dependencies(self):
+        """
+        get_fragment_dependencies() -> iter
+
+        Compute the list of files (as absolute paths) on which the output
+        fragments depend (i.e., files for which a modification should trigger a
+        rebuild of the fragment).
+        """
+
+        # Construct a list of all the dependencies of the Makefile fragment
+        # itself. These include all the LLVMBuild files themselves, as well as
+        # all of our own sources.
+        #
+        # Many components may come from the same file, so we make sure to unique
+        # these.
+        build_paths = set()
+        for ci in self.component_infos:
+            p = os.path.join(self.source_root, ci.subpath[1:], 'LLVMBuild.txt')
+            if p not in build_paths:
+                yield p
+                build_paths.add(p)
+
+        # Gather the list of necessary sources by just finding all loaded
+        # modules that are inside the LLVM source tree.
+        for module in sys.modules.values():
+            # Find the module path.
+            if not hasattr(module, '__file__'):
+                continue
+            path = getattr(module, '__file__')
+            if not path:
+                continue
+
+            # Strip off any compiled suffix.
+            if os.path.splitext(path)[1] in ['.pyc', '.pyo', '.pyd']:
+                path = path[:-1]
+
+            # If the path exists and is in the source tree, consider it a
+            # dependency.
+            if (path.startswith(self.source_root) and os.path.exists(path)):
+                yield path
+
+    def write_cmake_fragment(self, output_path):
+        """
+        write_cmake_fragment(output_path) -> None
+
+        Generate a CMake fragment which includes all of the collated LLVMBuild
+        information in a format that is easily digestible by a CMake. The exact
+        contents of this are closely tied to how the CMake configuration
+        integrates LLVMBuild, see CMakeLists.txt in the top-level.
+        """
+
+        dependencies = list(self.get_fragment_dependencies())
+
+        # Write out the CMake fragment.
+        make_install_dir(os.path.dirname(output_path))
+        f = open(output_path, 'w')
+
+        # Write the header.
+        header_fmt = '\
+#===-- %s - LLVMBuild Configuration for LLVM %s-*- CMake -*--===#'
+        header_name = os.path.basename(output_path)
+        header_pad = '-' * (80 - len(header_fmt % (header_name, '')))
+        header_string = header_fmt % (header_name, header_pad)
+        print >>f, """\
+%s
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+#
+# This file contains the LLVMBuild project information in a format easily
+# consumed by the CMake based build system.
+#
+# This file is autogenerated by llvm-build, do not edit!
+#
+#===------------------------------------------------------------------------===#
+""" % header_string
+
+        # Write the dependency information in the best way we can.
+        print >>f, """
+# LLVMBuild CMake fragment dependencies.
+#
+# CMake has no builtin way to declare that the configuration depends on
+# a particular file. However, a side effect of configure_file is to add
+# said input file to CMake's internal dependency list. So, we use that
+# and a dummy output file to communicate the dependency information to
+# CMake.
+#
+# FIXME: File a CMake RFE to get a properly supported version of this
+# feature."""
+        for dep in dependencies:
+            print >>f, """\
+configure_file(\"%s\"
+               ${CMAKE_CURRENT_BINARY_DIR}/DummyConfigureOutput)""" % (
+                cmake_quote_path(dep),)
+
+        # Write the properties we use to encode the required library dependency
+        # information in a form CMake can easily use directly.
+        print >>f, """
+# Explicit library dependency information.
+#
+# The following property assignments effectively create a map from component
+# names to required libraries, in a way that is easily accessed from CMake."""
+        for ci in self.ordered_component_infos:
+            # We only write the information for libraries currently.
+            if ci.type_name != 'Library':
+                continue
+
+            print >>f, """\
+set_property(GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_%s %s)""" % (
+                ci.get_prefixed_library_name(), " ".join(sorted(
+                     dep.get_prefixed_library_name()
+                     for dep in self.get_required_libraries_for_component(ci))))
+
+        f.close()
+
+    def write_make_fragment(self, output_path):
+        """
+        write_make_fragment(output_path) -> None
+
+        Generate a Makefile fragment which includes all of the collated
+        LLVMBuild information in a format that is easily digestible by a
+        Makefile. The exact contents of this are closely tied to how the LLVM
+        Makefiles integrate LLVMBuild, see Makefile.rules in the top-level.
+        """
+
+        dependencies = list(self.get_fragment_dependencies())
+
+        # Write out the Makefile fragment.
+        make_install_dir(os.path.dirname(output_path))
+        f = open(output_path, 'w')
+
+        # Write the header.
+        header_fmt = '\
+#===-- %s - LLVMBuild Configuration for LLVM %s-*- Makefile -*--===#'
+        header_name = os.path.basename(output_path)
+        header_pad = '-' * (80 - len(header_fmt % (header_name, '')))
+        header_string = header_fmt % (header_name, header_pad)
+        print >>f, """\
+%s
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+#
+# This file contains the LLVMBuild project information in a format easily
+# consumed by the Makefile based build system.
+#
+# This file is autogenerated by llvm-build, do not edit!
+#
+#===------------------------------------------------------------------------===#
+""" % header_string
+
+        # Write the dependencies for the fragment.
+        #
+        # FIXME: Technically, we need to properly quote for Make here.
+        print >>f, """\
+# Clients must explicitly enable LLVMBUILD_INCLUDE_DEPENDENCIES to get
+# these dependencies. This is a compromise to help improve the
+# performance of recursive Make systems.""" 
+        print >>f, 'ifeq ($(LLVMBUILD_INCLUDE_DEPENDENCIES),1)'
+        print >>f, "# The dependencies for this Makefile fragment itself."
+        print >>f, "%s: \\" % (mk_quote_string_for_target(output_path),)
+        for dep in dependencies:
+            print >>f, "\t%s \\" % (dep,)
+        print >>f
+
+        # Generate dummy rules for each of the dependencies, so that things
+        # continue to work correctly if any of those files are moved or removed.
+        print >>f, """\
+# The dummy targets to allow proper regeneration even when files are moved or
+# removed."""
+        for dep in dependencies:
+            print >>f, "%s:" % (mk_quote_string_for_target(dep),)
+        print >>f, 'endif'
+
+        f.close()
+
+def add_magic_target_components(parser, project, opts):
+    """add_magic_target_components(project, opts) -> None
+
+    Add the "magic" target based components to the project, which can only be
+    determined based on the target configuration options.
+
+    This currently is responsible for populating the required_libraries list of
+    the "all-targets", "Native", "NativeCodeGen", and "Engine" components.
+    """
+
+    # Determine the available targets.
+    available_targets = dict((ci.name,ci)
+                             for ci in project.component_infos
+                             if ci.type_name == 'TargetGroup')
+
+    # Find the configured native target.
+
+    # We handle a few special cases of target names here for historical
+    # reasons, as these are the names configure currently comes up with.
+    native_target_name = { 'x86' : 'X86',
+                           'x86_64' : 'X86',
+                           'Unknown' : None }.get(opts.native_target,
+                                                  opts.native_target)
+    if native_target_name is None:
+        native_target = None
+    else:
+        native_target = available_targets.get(native_target_name)
+        if native_target is None:
+            parser.error("invalid native target: %r (not in project)" % (
+                    opts.native_target,))
+        if native_target.type_name != 'TargetGroup':
+            parser.error("invalid native target: %r (not a target)" % (
+                    opts.native_target,))
+
+    # Find the list of targets to enable.
+    if opts.enable_targets is None:
+        enable_targets = available_targets.values()
+    else:
+        # We support both space separated and semi-colon separated lists.
+        if ' ' in opts.enable_targets:
+            enable_target_names = opts.enable_targets.split()
+        else:
+            enable_target_names = opts.enable_targets.split(';')
+
+        enable_targets = []
+        for name in enable_target_names:
+            target = available_targets.get(name)
+            if target is None:
+                parser.error("invalid target to enable: %r (not in project)" % (
+                        name,))
+            if target.type_name != 'TargetGroup':
+                parser.error("invalid target to enable: %r (not a target)" % (
+                        name,))
+            enable_targets.append(target)
+
+    # Find the special library groups we are going to populate. We enforce that
+    # these appear in the project (instead of just adding them) so that they at
+    # least have an explicit representation in the project LLVMBuild files (and
+    # comments explaining how they are populated).
+    def find_special_group(name):
+        info = info_map.get(name)
+        if info is None:
+            fatal("expected project to contain special %r component" % (
+                    name,))
+
+        if info.type_name != 'LibraryGroup':
+            fatal("special component %r should be a LibraryGroup" % (
+                    name,))
+
+        if info.required_libraries:
+            fatal("special component %r must have empty %r list" % (
+                    name, 'required_libraries'))
+        if info.add_to_library_groups:
+            fatal("special component %r must have empty %r list" % (
+                    name, 'add_to_library_groups'))
+
+        info._is_special_group = True
+        return info
+
+    info_map = dict((ci.name, ci) for ci in project.component_infos)
+    all_targets = find_special_group('all-targets')
+    native_group = find_special_group('Native')
+    native_codegen_group = find_special_group('NativeCodeGen')
+    engine_group = find_special_group('Engine')
+
+    # Set the enabled bit in all the target groups, and append to the
+    # all-targets list.
+    for ci in enable_targets:
+        all_targets.required_libraries.append(ci.name)
+        ci.enabled = True
+
+    # If we have a native target, then that defines the native and
+    # native_codegen libraries.
+    if native_target and native_target.enabled:
+        native_group.required_libraries.append(native_target.name)
+        native_codegen_group.required_libraries.append(
+            '%sCodeGen' % native_target.name)
+
+    # If we have a native target with a JIT, use that for the engine. Otherwise,
+    # use the interpreter.
+    if native_target and native_target.enabled and native_target.has_jit:
+        engine_group.required_libraries.append('JIT')
+        engine_group.required_libraries.append(native_group.name)
+    else:
+        engine_group.required_libraries.append('Interpreter')
+
+def main():
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options]")
+
+    group = OptionGroup(parser, "Input Options")
+    group.add_option("", "--source-root", dest="source_root", metavar="PATH",
+                      help="Path to the LLVM source (inferred if not given)",
+                      action="store", default=None)
+    group.add_option("", "--llvmbuild-source-root",
+                     dest="llvmbuild_source_root",
+                     help=(
+            "If given, an alternate path to search for LLVMBuild.txt files"),
+                     action="store", default=None, metavar="PATH")
+    group.add_option("", "--build-root", dest="build_root", metavar="PATH",
+                      help="Path to the build directory (if needed) [%default]",
+                      action="store", default=None)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Output Options")
+    group.add_option("", "--print-tree", dest="print_tree",
+                     help="Print out the project component tree [%default]",
+                     action="store_true", default=False)
+    group.add_option("", "--write-llvmbuild", dest="write_llvmbuild",
+                      help="Write out the LLVMBuild.txt files to PATH",
+                      action="store", default=None, metavar="PATH")
+    group.add_option("", "--write-library-table",
+                     dest="write_library_table", metavar="PATH",
+                     help="Write the C++ library dependency table to PATH",
+                     action="store", default=None)
+    group.add_option("", "--write-cmake-fragment",
+                     dest="write_cmake_fragment", metavar="PATH",
+                     help="Write the CMake project information to PATH",
+                     action="store", default=None)
+    group.add_option("", "--write-make-fragment",
+                      dest="write_make_fragment", metavar="PATH",
+                     help="Write the Makefile project information to PATH",
+                     action="store", default=None)
+    group.add_option("", "--configure-target-def-file",
+                     dest="configure_target_def_files",
+                     help="""Configure the given file at SUBPATH (relative to
+the inferred or given source root, and with a '.in' suffix) by replacing certain
+substitution variables with lists of targets that support certain features (for
+example, targets with AsmPrinters) and write the result to the build root (as
+given by --build-root) at the same SUBPATH""",
+                     metavar="SUBPATH", action="append", default=None)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Configuration Options")
+    group.add_option("", "--native-target",
+                      dest="native_target", metavar="NAME",
+                      help=("Treat the named target as the 'native' one, if "
+                            "given [%default]"),
+                      action="store", default=None)
+    group.add_option("", "--enable-targets",
+                      dest="enable_targets", metavar="NAMES",
+                      help=("Enable the given space or semi-colon separated "
+                            "list of targets, or all targets if not present"),
+                      action="store", default=None)
+    parser.add_option_group(group)
+
+    (opts, args) = parser.parse_args()
+
+    # Determine the LLVM source path, if not given.
+    source_root = opts.source_root
+    if source_root:
+        if not os.path.exists(os.path.join(source_root, 'lib', 'VMCore',
+                                           'Function.cpp')):
+            parser.error('invalid LLVM source root: %r' % source_root)
+    else:
+        llvmbuild_path = os.path.dirname(__file__)
+        llvm_build_path = os.path.dirname(llvmbuild_path)
+        utils_path = os.path.dirname(llvm_build_path)
+        source_root = os.path.dirname(utils_path)
+        if not os.path.exists(os.path.join(source_root, 'lib', 'VMCore',
+                                           'Function.cpp')):
+            parser.error('unable to infer LLVM source root, please specify')
+
+    # Construct the LLVM project information.
+    llvmbuild_source_root = opts.llvmbuild_source_root or source_root
+    project_info = LLVMProjectInfo.load_from_path(
+        source_root, llvmbuild_source_root)
+
+    # Add the magic target based components.
+    add_magic_target_components(parser, project_info, opts)
+
+    # Validate the project component info.
+    project_info.validate_components()
+
+    # Print the component tree, if requested.
+    if opts.print_tree:
+        project_info.print_tree()
+
+    # Write out the components, if requested. This is useful for auto-upgrading
+    # the schema.
+    if opts.write_llvmbuild:
+        project_info.write_components(opts.write_llvmbuild)
+
+    # Write out the required library table, if requested.
+    if opts.write_library_table:
+        project_info.write_library_table(opts.write_library_table)
+
+    # Write out the make fragment, if requested.
+    if opts.write_make_fragment:
+        project_info.write_make_fragment(opts.write_make_fragment)
+
+    # Write out the cmake fragment, if requested.
+    if opts.write_cmake_fragment:
+        project_info.write_cmake_fragment(opts.write_cmake_fragment)
+
+    # Configure target definition files, if requested.
+    if opts.configure_target_def_files:
+        # Verify we were given a build root.
+        if not opts.build_root:
+            parser.error("must specify --build-root when using "
+                         "--configure-target-def-file")
+
+        # Create the substitution list.
+        available_targets = [ci for ci in project_info.component_infos
+                             if ci.type_name == 'TargetGroup']
+        substitutions = [
+            ("@LLVM_ENUM_TARGETS@",
+             ' '.join('LLVM_TARGET(%s)' % ci.name
+                      for ci in available_targets)),
+            ("@LLVM_ENUM_ASM_PRINTERS@",
+             ' '.join('LLVM_ASM_PRINTER(%s)' % ci.name
+                      for ci in available_targets
+                      if ci.has_asmprinter)),
+            ("@LLVM_ENUM_ASM_PARSERS@",
+             ' '.join('LLVM_ASM_PARSER(%s)' % ci.name
+                      for ci in available_targets
+                      if ci.has_asmparser)),
+            ("@LLVM_ENUM_DISASSEMBLERS@",
+             ' '.join('LLVM_DISASSEMBLER(%s)' % ci.name
+                      for ci in available_targets
+                      if ci.has_disassembler))]
+
+        # Configure the given files.
+        for subpath in opts.configure_target_def_files:
+            inpath = os.path.join(source_root, subpath + '.in')
+            outpath = os.path.join(opts.build_root, subpath)
+            result = configutil.configure_file(inpath, outpath, substitutions)
+            if not result:
+                note("configured file %r hasn't changed" % outpath)
+
+if __name__=='__main__':
+    main()
diff --git a/utils/llvm-build/llvmbuild/util.py b/utils/llvm-build/llvmbuild/util.py
new file mode 100644
index 000000000000..e581af23d452
--- /dev/null
+++ b/utils/llvm-build/llvmbuild/util.py
@@ -0,0 +1,13 @@
+import os
+import sys
+
+def _write_message(kind, message):
+    program = os.path.basename(sys.argv[0])
+    print >>sys.stderr, '%s: %s: %s' % (program, kind, message)
+
+note = lambda message: _write_message('note', message)
+warning = lambda message: _write_message('warning', message)
+error = lambda message: _write_message('error', message)
+fatal = lambda message: (_write_message('fatal error', message), sys.exit(1))
+
+__all__ = ['note', 'warning', 'error', 'fatal']
diff --git a/utils/llvm-compilers-check b/utils/llvm-compilers-check
new file mode 100755
index 000000000000..623ebc6a32cc
--- /dev/null
+++ b/utils/llvm-compilers-check
@@ -0,0 +1,577 @@
+#!/usr/bin/python3
+##===- utils/llvmbuild - Build the LLVM project ----------------*-python-*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+# This script builds many different flavors of the LLVM ecosystem.  It
+# will build LLVM, Clang and dragonegg as well as run tests on them.
+# This script is convenient to use to check builds and tests before
+# committing changes to the upstream repository
+#
+# A typical source setup uses three trees and looks like this:
+#
+# official
+#   dragonegg
+#   llvm
+#     tools
+#       clang
+# staging
+#   dragonegg
+#   llvm
+#     tools
+#       clang
+# commit
+#   dragonegg
+#   llvm
+#     tools
+#       clang
+#
+# In a typical workflow, the "official" tree always contains unchanged
+# sources from the main LLVM project repositories.  The "staging" tree
+# is where local work is done.  A set of changes resides there waiting
+# to be moved upstream.  The "commit" tree is where changes from
+# "staging" make their way upstream.  Individual incremental changes
+# from "staging" are applied to "commit" and committed upstream after
+# a successful build and test run.  A successful build is one in which
+# testing results in no more failures than seen in the testing of the
+# "official" tree.
+# 
+# A build may be invoked as such:
+#
+# llvmbuild --src=~/llvm/commit --src=~/llvm/staging --src=~/llvm/official
+#   --build=debug --build=release --build=paranoid
+#   --prefix=/home/greened/install --builddir=/home/greened/build
+#
+# This will build the LLVM ecosystem, including LLVM, Clangand
+# dragonegg, putting build results in ~/build and installing tools in
+# ~/install.  llvm-compilers-check creates separate build and install
+# directories for each source/build flavor.  In the above example,
+# llvmbuild will build debug, release and paranoid (debug+checks)
+# flavors from each source tree (official, staging and commit) for a
+# total of nine builds.  All builds will be run in parallel.
+#
+# The user may control parallelism via the --jobs and --threads
+# switches.  --jobs tells llvm-compilers-checl the maximum total
+# number of builds to activate in parallel.  The user may think of it
+# as equivalent to the GNU make -j switch.  --threads tells
+# llvm-compilers-check how many worker threads to use to accomplish
+# those builds.  If --threads is less than --jobs, --threads workers
+# will be launched and each one will pick a source/flavor combination
+# to build.  Then llvm-compilers-check will invoke GNU make with -j
+# (--jobs / --threads) to use up the remaining job capacity.  Once a
+# worker is finished with a build, it will pick another combination
+# off the list and start building it.
+#
+##===----------------------------------------------------------------------===##
+
+import optparse
+import os
+import sys
+import threading
+import queue
+import logging
+import traceback
+import subprocess
+import re
+
+# TODO: Use shutil.which when it is available (3.2 or later)
+def find_executable(executable, path=None):
+    """Try to find 'executable' in the directories listed in 'path' (a
+    string listing directories separated by 'os.pathsep'; defaults to
+    os.environ['PATH']).  Returns the complete filename or None if not
+    found
+    """
+    if path is None:
+        path = os.environ['PATH']
+    paths = path.split(os.pathsep)
+    extlist = ['']
+    if os.name == 'os2':
+        (base, ext) = os.path.splitext(executable)
+        # executable files on OS/2 can have an arbitrary extension, but
+        # .exe is automatically appended if no dot is present in the name
+        if not ext:
+            executable = executable + ".exe"
+    elif sys.platform == 'win32':
+        pathext = os.environ['PATHEXT'].lower().split(os.pathsep)
+        (base, ext) = os.path.splitext(executable)
+        if ext.lower() not in pathext:
+            extlist = pathext
+    for ext in extlist:
+        execname = executable + ext
+        if os.path.isfile(execname):
+            return execname
+        else:
+            for p in paths:
+                f = os.path.join(p, execname)
+                if os.path.isfile(f):
+                    return f
+    else:
+        return None
+
+def is_executable(fpath):
+    return os.path.exists(fpath) and os.access(fpath, os.X_OK)
+
+def add_options(parser):
+    parser.add_option("-v", "--verbose", action="store_true",
+                      default=False,
+                      help=("Output informational messages"
+                            " [default: %default]"))
+    parser.add_option("--src", action="append",
+                      help=("Top-level source directory [default: %default]"))
+    parser.add_option("--build", action="append",
+                      help=("Build types to run [default: %default]"))
+    parser.add_option("--cc", default=find_executable("cc"),
+                      help=("The C compiler to use [default: %default]"))
+    parser.add_option("--cxx", default=find_executable("c++"),
+                      help=("The C++ compiler to use [default: %default]"))
+    parser.add_option("--threads", default=4, type="int",
+                      help=("The number of worker threads to use "
+                            "[default: %default]"))
+    parser.add_option("--jobs", "-j", default=8, type="int",
+                      help=("The number of simultaneous build jobs "
+                            "[default: %default]"))
+    parser.add_option("--prefix",
+                      help=("Root install directory [default: %default]"))
+    parser.add_option("--builddir",
+                      help=("Root build directory [default: %default]"))
+    parser.add_option("--extra-llvm-config-flags", default="",
+                      help=("Extra flags to pass to llvm configure [default: %default]"))
+    parser.add_option("--force-configure", default=False, action="store_true",
+                      help=("Force reconfigure of all components"))
+    parser.add_option("--no-dragonegg", default=False, action="store_true",
+                      help=("Do not build dragonegg"))
+    parser.add_option("--no-install", default=False, action="store_true",
+                      help=("Do not do installs"))
+    return
+
+def check_options(parser, options, valid_builds):
+    # See if we're building valid flavors.
+    for build in options.build:
+        if (build not in valid_builds):
+            parser.error("'" + build + "' is not a valid build flavor "
+                         + str(valid_builds))
+
+    # See if we can find source directories.
+    for src in options.src:
+        for component in components:
+            component = component.rstrip("2")
+            compsrc = src + "/" + component
+            if (not os.path.isdir(compsrc)):
+                parser.error("'" + compsrc + "' does not exist")
+
+    # See if we can find the compilers
+    options.cc = find_executable(options.cc)
+    options.cxx = find_executable(options.cxx)
+
+    return
+
+# Find a unique short name for the given set of paths.  This searches
+# back through path components until it finds unique component names
+# among all given paths.
+def get_path_abbrevs(paths):
+    # Find the number of common starting characters in the last component
+    # of the paths.
+    unique_paths = list(paths)
+
+    class NotFoundException(Exception): pass
+
+    # Find a unique component of each path.
+    unique_bases = unique_paths[:]
+    found = 0
+    while len(unique_paths) > 0:
+        bases = [os.path.basename(src) for src in unique_paths]
+        components = { c for c in bases }
+        # Account for single entry in paths.
+        if len(components) > 1 or len(components) == len(bases):
+            # We found something unique.
+            for c in components:
+                if bases.count(c) == 1:
+                   index = bases.index(c)
+                   unique_bases[index] = c
+                   # Remove the corresponding path from the set under
+                   # consideration.
+                   unique_paths[index] = None
+            unique_paths = [ p for p in unique_paths if p is not None ]
+        unique_paths = [os.path.dirname(src) for src in unique_paths]
+
+    if len(unique_paths) > 0:
+        raise NotFoundException()
+
+    abbrevs = dict(zip(paths, [base for base in unique_bases]))
+
+    return abbrevs
+
+# Given a set of unique names, find a short character sequence that
+# uniquely identifies them.
+def get_short_abbrevs(unique_bases):
+    # Find a unique start character for each path base.
+    my_unique_bases = unique_bases[:]
+    unique_char_starts = unique_bases[:]
+    while len(my_unique_bases) > 0:
+        for start, char_tuple in enumerate(zip(*[base
+                                                 for base in my_unique_bases])):
+            chars = { c for c in char_tuple }
+            # Account for single path.
+            if len(chars) > 1 or len(chars) == len(char_tuple):
+                # We found something unique.
+                for c in chars:
+                    if char_tuple.count(c) == 1:
+                        index = char_tuple.index(c)
+                        unique_char_starts[index] = start
+                        # Remove the corresponding path from the set under
+                        # consideration.
+                        my_unique_bases[index] = None
+                my_unique_bases = [ b for b in my_unique_bases
+                                    if b is not None ]
+                break
+
+    if len(my_unique_bases) > 0:
+        raise NotFoundException()
+
+    abbrevs = [abbrev[start_index:start_index+3]
+               for abbrev, start_index
+               in zip([base for base in unique_bases],
+                      [index for index in unique_char_starts])]
+
+    abbrevs = dict(zip(unique_bases, abbrevs))
+
+    return abbrevs
+
+class Builder(threading.Thread):
+    class ExecutableNotFound(Exception): pass
+    class FileNotExecutable(Exception): pass
+
+    def __init__(self, work_queue, jobs,
+                 build_abbrev, source_abbrev,
+                 options):
+        super().__init__()
+        self.work_queue = work_queue
+        self.jobs = jobs
+        self.cc = options.cc
+        self.cxx = options.cxx
+        self.build_abbrev = build_abbrev
+        self.source_abbrev = source_abbrev
+        self.build_prefix = options.builddir
+        self.install_prefix = options.prefix
+        self.options = options
+        self.component_abbrev = dict(
+            llvm="llvm",
+            dragonegg="degg")
+    def run(self):
+        while True:
+            try:
+                source, build = self.work_queue.get()
+                self.dobuild(source, build)
+            except:
+                traceback.print_exc()
+            finally:
+                self.work_queue.task_done()
+
+    def execute(self, command, execdir, env, component):
+        prefix = self.component_abbrev[component.replace("-", "_")]
+        pwd = os.getcwd()
+        if not os.path.exists(execdir):
+            os.makedirs(execdir)
+
+        execenv = os.environ.copy()
+
+        for key, value in env.items():
+            execenv[key] = value
+ 
+        self.logger.debug("[" + prefix + "] " + "env " + str(env) + " "
+                          + " ".join(command));
+
+        try:
+            proc = subprocess.Popen(command,
+                                    cwd=execdir,
+                                    env=execenv,
+                                    stdout=subprocess.PIPE,
+                                    stderr=subprocess.STDOUT)
+
+            line = proc.stdout.readline()
+            while line:
+                self.logger.info("[" + prefix + "] "
+                                 + str(line, "utf-8").rstrip())
+                line = proc.stdout.readline()
+
+        except:
+            traceback.print_exc()
+
+    # Get a list of C++ include directories to pass to clang.
+    def get_includes(self):
+        # Assume we're building with g++ for now.
+        command = [self.cxx]
+        command += ["-v", "-x", "c++", "/dev/null", "-fsyntax-only"]
+        includes = []
+        self.logger.debug(command)
+        try:
+            proc = subprocess.Popen(command,
+                                    stdout=subprocess.PIPE,
+                                    stderr=subprocess.STDOUT)
+
+            gather = False
+            line = proc.stdout.readline()
+            while line:
+                self.logger.debug(line)
+                if re.search("End of search list", str(line)) is not None:
+                    self.logger.debug("Stop Gather")
+                    gather = False
+                if gather:
+                    includes.append(str(line, "utf-8").strip())
+                if re.search("#include <...> search starts", str(line)) is not None:
+                    self.logger.debug("Start Gather")
+                    gather = True
+                line = proc.stdout.readline()
+        except:
+            traceback.print_exc()
+        self.logger.debug(includes)
+        return includes
+
+    def dobuild(self, source, build):
+        build_suffix = ""
+
+        ssabbrev = get_short_abbrevs([ab for ab in self.source_abbrev.values()])
+
+        prefix = "[" + ssabbrev[self.source_abbrev[source]] + "-" + self.build_abbrev[build] + "]"
+        self.install_prefix += "/" + self.source_abbrev[source] + "/" + build
+        build_suffix += "/" + self.source_abbrev[source] + "/" + build
+
+        self.logger = logging.getLogger(prefix)
+
+        self.logger.debug(self.install_prefix)
+
+        # Assume we're building with gcc for now.
+        cxxincludes = self.get_includes()
+        cxxroot = os.path.dirname(cxxincludes[0]) # Remove the version
+        cxxroot = os.path.dirname(cxxroot)        # Remove the c++
+        cxxroot = os.path.dirname(cxxroot)        # Remove the include
+
+        configure_flags = dict(
+            llvm=dict(debug=["--prefix=" + self.install_prefix,
+                             "--with-extra-options=-Werror",
+                             "--enable-assertions",
+                             "--disable-optimized",
+                             "--with-gcc-toolchain=" + cxxroot],
+                      release=["--prefix=" + self.install_prefix,
+                               "--with-extra-options=-Werror",
+                               "--enable-optimized",
+                               "--with-gcc-toolchain=" + cxxroot],
+                      paranoid=["--prefix=" + self.install_prefix,
+                                "--with-extra-options=-Werror",
+                                "--enable-assertions",
+                                "--enable-expensive-checks",
+                                "--disable-optimized",
+                                "--with-gcc-toolchain=" + cxxroot]),
+            dragonegg=dict(debug=[],
+                           release=[],
+                           paranoid=[]))
+
+        configure_env = dict(
+            llvm=dict(debug=dict(CC=self.cc,
+                                 CXX=self.cxx),
+                      release=dict(CC=self.cc,
+                                   CXX=self.cxx),
+                      paranoid=dict(CC=self.cc,
+                                    CXX=self.cxx)),
+            dragonegg=dict(debug=dict(CC=self.cc,
+                                      CXX=self.cxx),
+                           release=dict(CC=self.cc,
+                                        CXX=self.cxx),
+                           paranoid=dict(CC=self.cc,
+                                         CXX=self.cxx)))
+
+        make_flags = dict(
+            llvm=dict(debug=["-j" + str(self.jobs)],
+                      release=["-j" + str(self.jobs)],
+                      paranoid=["-j" + str(self.jobs)]),
+            dragonegg=dict(debug=["-j" + str(self.jobs)],
+                           release=["-j" + str(self.jobs)],
+                           paranoid=["-j" + str(self.jobs)]))
+
+        make_env = dict(
+            llvm=dict(debug=dict(),
+                      release=dict(),
+                      paranoid=dict()),
+            dragonegg=dict(debug=dict(GCC=self.cc,
+                                      LLVM_CONFIG=self.install_prefix + "/bin/llvm-config"),
+                           release=dict(GCC=self.cc,
+                                        LLVM_CONFIG=self.install_prefix + "/bin/llvm-config"),
+                           paranoid=dict(GCC=self.cc,
+                                         LLVM_CONFIG=self.install_prefix + "/bin/llvm-config")))
+
+        make_install_flags = dict(
+            llvm=dict(debug=["install"],
+                      release=["install"],
+                      paranoid=["install"]),
+            dragonegg=dict(debug=["install"],
+                           release=["install"],
+                           paranoid=["install"]))
+
+        make_install_env = dict(
+            llvm=dict(debug=dict(),
+                      release=dict(),
+                      paranoid=dict()),
+            dragonegg=dict(debug=dict(),
+                           release=dict(),
+                           paranoid=dict()))
+
+        make_check_flags = dict(
+            llvm=dict(debug=["check"],
+                      release=["check"],
+                      paranoid=["check"]),
+            dragonegg=dict(debug=["check"],
+                           release=["check"],
+                           paranoid=["check"]))
+
+        make_check_env = dict(
+            llvm=dict(debug=dict(),
+                      release=dict(),
+                      paranoid=dict()),
+            dragonegg=dict(debug=dict(),
+                           release=dict(),
+                           paranoid=dict()))
+
+        for component in components:
+            comp = component[:]
+            
+            if (self.options.no_dragonegg):
+                if (comp == 'dragonegg'):
+                    self.logger.info("Skipping " + component + " in "
+                                     + builddir)
+                    continue
+
+            srcdir = source + "/" + comp.rstrip("2")
+            builddir = self.build_prefix + "/" + comp + "/" + build_suffix
+            installdir = self.install_prefix
+
+            comp_key = comp.replace("-", "_")
+
+            config_args = configure_flags[comp_key][build][:]
+            config_args.extend(getattr(self.options,
+                                       "extra_" + comp_key.rstrip("2")
+                                       + "_config_flags",
+                                       "").split())
+
+            self.logger.info("Configuring " + component + " in " + builddir)
+            self.configure(component, srcdir, builddir,
+                           config_args,
+                           configure_env[comp_key][build])
+
+            self.logger.info("Building " + component + " in " + builddir)
+            self.logger.info("Build: make " + str(make_flags[comp_key][build]))
+            self.make(component, srcdir, builddir,
+                      make_flags[comp_key][build],
+                      make_env[comp_key][build])
+
+            if (not self.options.no_install):
+                self.logger.info("Installing " + component + " in " + installdir)
+                self.make(component, srcdir, builddir,
+                          make_install_flags[comp_key][build],
+                          make_install_env[comp_key][build])
+
+            self.logger.info("Testing " + component + " in " + builddir)
+            self.logger.info("Test: make "
+                             + str(make_check_flags[comp_key][build]))
+            self.make(component, srcdir, builddir,
+                      make_check_flags[comp_key][build],
+                      make_check_env[comp_key][build])
+
+
+    def configure(self, component, srcdir, builddir, flags, env):
+        self.logger.debug("Configure " + str(flags) + " " + str(srcdir) + " -> "
+                          + str(builddir))
+
+        configure_files = dict(
+            llvm=[(srcdir + "/configure", builddir + "/Makefile")],
+            dragonegg=[("","")])
+
+
+        doconfig = False
+        for conf, mf in configure_files[component.replace("-", "_")]:
+            if not os.path.exists(conf):
+                return
+            if os.path.exists(conf) and os.path.exists(mf):
+                confstat = os.stat(conf)
+                makestat = os.stat(mf)
+                if confstat.st_mtime > makestat.st_mtime:
+                    doconfig = True
+                    break
+            else:
+                doconfig = True
+                break
+
+        if not doconfig and not self.options.force_configure:
+            return
+
+        program = srcdir + "/configure"
+        if not is_executable(program):
+            return
+
+        args = [program]
+        args += ["--verbose"]
+        args += flags
+        self.execute(args, builddir, env, component)
+
+    def make(self, component, srcdir, builddir, flags, env):
+        program = find_executable("make")
+        if program is None:
+            raise ExecutableNotFound
+
+        if not is_executable(program):
+            raise FileNotExecutable
+
+        args = [program]
+        args += flags
+        self.execute(args, builddir, env, component)
+
+# Global constants
+build_abbrev = dict(debug="dbg", release="opt", paranoid="par")
+components = ["llvm", "dragonegg"]
+
+# Parse options
+parser = optparse.OptionParser(version="%prog 1.0")
+add_options(parser)
+(options, args) = parser.parse_args()
+check_options(parser, options, build_abbrev.keys());
+
+if options.verbose:
+    logging.basicConfig(level=logging.DEBUG,
+                        format='%(name)-13s: %(message)s')
+else:
+    logging.basicConfig(level=logging.INFO,
+                        format='%(name)-13s: %(message)s')
+
+source_abbrev = get_path_abbrevs(set(options.src))
+
+work_queue = queue.Queue()
+
+jobs = options.jobs // options.threads
+if jobs == 0:
+    jobs = 1
+
+numthreads = options.threads
+
+logging.getLogger().info("Building with " + str(options.jobs) + " jobs and "
+                         + str(numthreads) + " threads using " + str(jobs)
+                         + " make jobs")
+
+logging.getLogger().info("CC  = " + str(options.cc))
+logging.getLogger().info("CXX = " + str(options.cxx))
+
+for t in range(numthreads):
+    builder = Builder(work_queue, jobs,
+                      build_abbrev, source_abbrev,
+                      options)
+    builder.daemon = True
+    builder.start()
+
+for build in set(options.build):
+    for source in set(options.src):
+        work_queue.put((source, build))
+
+work_queue.join()
diff --git a/utils/llvm.grm b/utils/llvm.grm
index fb26dbb66f60..322036b2c209 100644
--- a/utils/llvm.grm
+++ b/utils/llvm.grm
@@ -174,6 +174,7 @@ FuncAttr      ::= noreturn
  | sspreq
  | returns_twice
  | nonlazybind
+ | address_safety
  ;
 
 OptFuncAttrs  ::= + _ | OptFuncAttrs FuncAttr ;
diff --git a/utils/llvmbuild b/utils/llvmbuild
deleted file mode 100755
index b623d3202158..000000000000
--- a/utils/llvmbuild
+++ /dev/null
@@ -1,778 +0,0 @@
-#!/usr/bin/python3
-##===- utils/llvmbuild - Build the LLVM project ----------------*-python-*-===##
-# 
-#                     The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-# 
-##===----------------------------------------------------------------------===##
-#
-# This script builds many different flavors of the LLVM ecosystem.  It
-# will build LLVM, Clang, llvm-gcc, and dragonegg as well as run tests
-# on them.  This script is convenient to use to check builds and tests
-# before committing changes to the upstream repository
-#
-# A typical source setup uses three trees and looks like this:
-#
-# official
-#   dragonegg
-#     trunk
-#   gcc
-#     trunk
-#   llvm
-#     trunk
-#       tools
-#         clang
-#     tags
-#       RELEASE_28
-#         tools
-#           clang
-#   llvm-gcc
-#     trunk
-#     tags
-#       RELEASE_28
-# staging
-#   dragonegg
-#     trunk
-#   gcc
-#     trunk
-#   llvm
-#     trunk
-#       tools
-#         clang
-#     tags
-#       RELEASE_28
-#         tools
-#           clang
-#   llvm-gcc
-#     trunk
-#     tags
-#       RELEASE_28
-# commit
-#   dragonegg
-#     trunk
-#   gcc
-#     trunk
-#   llvm
-#     trunk
-#       tools
-#         clang
-#     tags
-#       RELEASE_28
-#         tools
-#           clang
-#   llvm-gcc
-#     trunk
-#     tags
-#       RELEASE_28
-#
-# "gcc" above is the upstream FSF gcc and "gcc/trunk" refers to the
-# 4.5 branch as discussed in the dragonegg build guide.
-#
-# In a typical workflow, the "official" tree always contains unchanged
-# sources from the main LLVM project repositories.  The "staging" tree
-# is where local work is done.  A set of changes resides there waiting
-# to be moved upstream.  The "commit" tree is where changes from
-# "staging" make their way upstream.  Individual incremental changes
-# from "staging" are applied to "commit" and committed upstream after
-# a successful build and test run.  A successful build is one in which
-# testing results in no more failures than seen in the testing of the
-# "official" tree.
-# 
-# A build may be invoked as such:
-#
-# llvmbuild --src=~/llvm/commit --src=~/llvm/staging
-#   --src=~/llvm/official --branch=trunk --branch=tags/RELEASE_28
-#   --build=debug --build=release --build=paranoid
-#   --prefix=/home/greened/install --builddir=/home/greened/build
-#
-# This will build the LLVM ecosystem, including LLVM, Clang, llvm-gcc,
-# gcc 4.5 and dragonegg, putting build results in ~/build and
-# installing tools in ~/install.  llvmbuild creates separate build and
-# install directories for each source/branch/build flavor.  In the
-# above example, llvmbuild will build debug, release and paranoid
-# (debug+checks) flavors of the trunk and RELEASE_28 branches from
-# each source tree (official, staging and commit) for a total of
-# eighteen builds.  All builds will be run in parallel.
-#
-# The user may control parallelism via the --jobs and --threads
-# switches.  --jobs tells llvmbuild the maximum total number of builds
-# to activate in parallel.  The user may think of it as equivalent to
-# the GNU make -j switch.  --threads tells llvmbuild how many worker
-# threads to use to accomplish those builds.  If --threads is less
-# than --jobs, --threads workers will be launched and each one will
-# pick a source/branch/flavor combination to build.  Then llvmbuild
-# will invoke GNU make with -j (--jobs / --threads) to use up the
-# remaining job capacity.  Once a worker is finished with a build, it
-# will pick another combination off the list and start building it.
-#
-##===----------------------------------------------------------------------===##
-
-import optparse
-import os
-import sys
-import threading
-import queue
-import logging
-import traceback
-import subprocess
-import re
-
-# TODO: Use shutil.which when it is available (3.2 or later)
-def find_executable(executable, path=None):
-    """Try to find 'executable' in the directories listed in 'path' (a
-    string listing directories separated by 'os.pathsep'; defaults to
-    os.environ['PATH']).  Returns the complete filename or None if not
-    found
-    """
-    if path is None:
-        path = os.environ['PATH']
-    paths = path.split(os.pathsep)
-    extlist = ['']
-    if os.name == 'os2':
-        (base, ext) = os.path.splitext(executable)
-        # executable files on OS/2 can have an arbitrary extension, but
-        # .exe is automatically appended if no dot is present in the name
-        if not ext:
-            executable = executable + ".exe"
-    elif sys.platform == 'win32':
-        pathext = os.environ['PATHEXT'].lower().split(os.pathsep)
-        (base, ext) = os.path.splitext(executable)
-        if ext.lower() not in pathext:
-            extlist = pathext
-    for ext in extlist:
-        execname = executable + ext
-        if os.path.isfile(execname):
-            return execname
-        else:
-            for p in paths:
-                f = os.path.join(p, execname)
-                if os.path.isfile(f):
-                    return f
-    else:
-        return None
-
-def is_executable(fpath):
-    return os.path.exists(fpath) and os.access(fpath, os.X_OK)
-
-def add_options(parser):
-    parser.add_option("-v", "--verbose", action="store_true",
-                      default=False,
-                      help=("Output informational messages"
-                            " [default: %default]"))
-    parser.add_option("--src", action="append",
-                      help=("Top-level source directory [default: %default]"))
-    parser.add_option("--build", action="append",
-                      help=("Build types to run [default: %default]"))
-    parser.add_option("--branch", action="append",
-                      help=("Source branch to build [default: %default]"))
-    parser.add_option("--cc", default=find_executable("cc"),
-                      help=("The C compiler to use [default: %default]"))
-    parser.add_option("--cxx", default=find_executable("c++"),
-                      help=("The C++ compiler to use [default: %default]"))
-    parser.add_option("--threads", default=4, type="int",
-                      help=("The number of worker threads to use "
-                            "[default: %default]"))
-    parser.add_option("--jobs", "-j", default=8, type="int",
-                      help=("The number of simultaneous build jobs "
-                            "[default: %default]"))
-    parser.add_option("--prefix",
-                      help=("Root install directory [default: %default]"))
-    parser.add_option("--builddir",
-                      help=("Root build directory [default: %default]"))
-    parser.add_option("--extra-llvm-config-flags", default="",
-                      help=("Extra flags to pass to llvm configure [default: %default]"))
-    parser.add_option("--extra-llvm-gcc-config-flags", default="",
-                      help=("Extra flags to pass to llvm-gcc configure [default: %default]"))
-    parser.add_option("--extra-gcc-config-flags", default="",
-                      help=("Extra flags to pass to gcc configure [default: %default]"))
-    parser.add_option("--force-configure", default=False, action="store_true",
-                      help=("Force reconfigure of all components"))
-    parser.add_option("--no-gcc", default=False, action="store_true",
-                      help=("Do not build dragonegg and gcc"))
-    parser.add_option("--no-install", default=False, action="store_true",
-                      help=("Do not do installs"))
-    return
-
-def check_options(parser, options, valid_builds):
-    # See if we're building valid flavors.
-    for build in options.build:
-        if (build not in valid_builds):
-            parser.error("'" + build + "' is not a valid build flavor "
-                         + str(valid_builds))
-
-    # See if we can find source directories.
-    for src in options.src:
-        for component in components:
-            component = component.rstrip("2")
-            compsrc = src + "/" + component
-            if (not os.path.isdir(compsrc)):
-                parser.error("'" + compsrc + "' does not exist")
-                if (options.branch is not None):
-                    for branch in options.branch:
-                        if (not os.path.isdir(os.path.join(compsrc, branch))):
-                            parser.error("'" + os.path.join(compsrc, branch)
-                                         + "' does not exist")
-
-    # See if we can find the compilers
-    options.cc = find_executable(options.cc)
-    options.cxx = find_executable(options.cxx)
-
-    return
-
-# Find a unique short name for the given set of paths.  This searches
-# back through path components until it finds unique component names
-# among all given paths.
-def get_path_abbrevs(paths):
-    # Find the number of common starting characters in the last component
-    # of the paths.
-    unique_paths = list(paths)
-
-    class NotFoundException(Exception): pass
-
-    # Find a unique component of each path.
-    unique_bases = unique_paths[:]
-    found = 0
-    while len(unique_paths) > 0:
-        bases = [os.path.basename(src) for src in unique_paths]
-        components = { c for c in bases }
-        # Account for single entry in paths.
-        if len(components) > 1 or len(components) == len(bases):
-            # We found something unique.
-            for c in components:
-                if bases.count(c) == 1:
-                   index = bases.index(c)
-                   unique_bases[index] = c
-                   # Remove the corresponding path from the set under
-                   # consideration.
-                   unique_paths[index] = None
-            unique_paths = [ p for p in unique_paths if p is not None ]
-        unique_paths = [os.path.dirname(src) for src in unique_paths]
-
-    if len(unique_paths) > 0:
-        raise NotFoundException()
-
-    abbrevs = dict(zip(paths, [base for base in unique_bases]))
-
-    return abbrevs
-
-# Given a set of unique names, find a short character sequence that
-# uniquely identifies them.
-def get_short_abbrevs(unique_bases):
-    # Find a unique start character for each path base.
-    my_unique_bases = unique_bases[:]
-    unique_char_starts = unique_bases[:]
-    while len(my_unique_bases) > 0:
-        for start, char_tuple in enumerate(zip(*[base
-                                                 for base in my_unique_bases])):
-            chars = { c for c in char_tuple }
-            # Account for single path.
-            if len(chars) > 1 or len(chars) == len(char_tuple):
-                # We found something unique.
-                for c in chars:
-                    if char_tuple.count(c) == 1:
-                        index = char_tuple.index(c)
-                        unique_char_starts[index] = start
-                        # Remove the corresponding path from the set under
-                        # consideration.
-                        my_unique_bases[index] = None
-                my_unique_bases = [ b for b in my_unique_bases
-                                    if b is not None ]
-                break
-
-    if len(my_unique_bases) > 0:
-        raise NotFoundException()
-
-    abbrevs = [abbrev[start_index:start_index+3]
-               for abbrev, start_index
-               in zip([base for base in unique_bases],
-                      [index for index in unique_char_starts])]
-
-    abbrevs = dict(zip(unique_bases, abbrevs))
-
-    return abbrevs
-
-class Builder(threading.Thread):
-    class ExecutableNotFound(Exception): pass
-    class FileNotExecutable(Exception): pass
-
-    def __init__(self, work_queue, jobs,
-                 build_abbrev, source_abbrev, branch_abbrev,
-                 options):
-        super().__init__()
-        self.work_queue = work_queue
-        self.jobs = jobs
-        self.cc = options.cc
-        self.cxx = options.cxx
-        self.build_abbrev = build_abbrev
-        self.source_abbrev = source_abbrev
-        self.branch_abbrev = branch_abbrev
-        self.build_prefix = options.builddir
-        self.install_prefix = options.prefix
-        self.options = options
-        self.component_abbrev = dict(
-            llvm="llvm",
-            llvm_gcc="lgcc",
-            llvm2="llv2",
-            gcc="ugcc",
-            dagonegg="degg")
-    def run(self):
-        while True:
-            try:
-                source, branch, build = self.work_queue.get()
-                self.dobuild(source, branch, build)
-            except:
-                traceback.print_exc()
-            finally:
-                self.work_queue.task_done()
-
-    def execute(self, command, execdir, env, component):
-        prefix = self.component_abbrev[component.replace("-", "_")]
-        pwd = os.getcwd()
-        if not os.path.exists(execdir):
-            os.makedirs(execdir)
-
-        execenv = os.environ.copy()
-
-        for key, value in env.items():
-            execenv[key] = value
- 
-        self.logger.debug("[" + prefix + "] " + "env " + str(env) + " "
-                          + " ".join(command));
-
-        try:
-            proc = subprocess.Popen(command,
-                                    cwd=execdir,
-                                    env=execenv,
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.STDOUT)
-
-            line = proc.stdout.readline()
-            while line:
-                self.logger.info("[" + prefix + "] "
-                                 + str(line, "utf-8").rstrip())
-                line = proc.stdout.readline()
-
-        except:
-            traceback.print_exc()
-
-    # Get a list of C++ include directories to pass to clang.
-    def get_includes(self):
-        # Assume we're building with g++ for now.
-        command = [self.cxx]
-        command += ["-v", "-x", "c++", "/dev/null", "-fsyntax-only"]
-        includes = []
-        self.logger.debug(command)
-        try:
-            proc = subprocess.Popen(command,
-                                    stdout=subprocess.PIPE,
-                                    stderr=subprocess.STDOUT)
-
-            gather = False
-            line = proc.stdout.readline()
-            while line:
-                self.logger.debug(line)
-                if re.search("End of search list", str(line)) is not None:
-                    self.logger.debug("Stop Gather")
-                    gather = False
-                if gather:
-                    includes.append(str(line, "utf-8").strip())
-                if re.search("#include <...> search starts", str(line)) is not None:
-                    self.logger.debug("Start Gather")
-                    gather = True
-                line = proc.stdout.readline()
-        except:
-            traceback.print_exc()
-        self.logger.debug(includes)
-        return includes
-
-    def dobuild(self, source, branch, build):
-        build_suffix = ""
-
-        ssabbrev = get_short_abbrevs([ab for ab in self.source_abbrev.values()])
-
-        if branch is not None:
-            sbabbrev = get_short_abbrevs([ab for ab in self.branch_abbrev.values()])
-
-            prefix = "[" + ssabbrev[self.source_abbrev[source]] + "-" + sbabbrev[self.branch_abbrev[branch]] + "-" + self.build_abbrev[build] + "]"
-            self.install_prefix += "/" + self.source_abbrev[source] + "/" + branch + "/" + build
-            build_suffix += self.source_abbrev[source] + "/" + branch + "/" + build
-        else:
-            prefix = "[" + ssabbrev[self.source_abbrev[source]] + "-" + self.build_abbrev[build] + "]"
-            self.install_prefix += "/" + self.source_abbrev[source] + "/" + build
-            build_suffix += "/" + self.source_abbrev[source] + "/" + build
-
-        self.logger = logging.getLogger(prefix)
-
-        self.logger.debug(self.install_prefix)
-
-        # Assume we're building with gcc for now.
-        cxxincludes = self.get_includes()
-        cxxroot = cxxincludes[0]
-        cxxarch = os.path.basename(cxxincludes[1])
-
-        configure_flags = dict(
-            llvm=dict(debug=["--prefix=" + self.install_prefix,
-                             "--with-extra-options=-Werror",
-                             "--enable-assertions",
-                             "--disable-optimized",
-                             "--with-cxx-include-root=" + cxxroot,
-                             "--with-cxx-include-arch=" + cxxarch],
-                      release=["--prefix=" + self.install_prefix,
-                               "--with-extra-options=-Werror",
-                               "--enable-optimized",
-                               "--with-cxx-include-root=" + cxxroot,
-                               "--with-cxx-include-arch=" + cxxarch],
-                      paranoid=["--prefix=" + self.install_prefix,
-                                "--with-extra-options=-Werror",
-                                "--enable-assertions",
-                                "--enable-expensive-checks",
-                                "--disable-optimized",
-                                "--with-cxx-include-root=" + cxxroot,
-                                "--with-cxx-include-arch=" + cxxarch]),
-            llvm_gcc=dict(debug=["--prefix=" + self.install_prefix,
-                                 "--enable-checking",
-                                 "--program-prefix=llvm-",
-                                 "--enable-llvm=" + self.build_prefix + "/llvm/" + build_suffix,
-# Fortran install seems to be broken.
-#                                 "--enable-languages=c,c++,fortran"],
-                                 "--enable-languages=c,c++"],
-                          release=["--prefix=" + self.install_prefix,
-                                   "--program-prefix=llvm-",
-                                   "--enable-llvm=" + self.build_prefix + "/llvm/" + build_suffix,
-# Fortran install seems to be broken.
-#                                   "--enable-languages=c,c++,fortran"],
-                                   "--enable-languages=c,c++"],
-                          paranoid=["--prefix=" + self.install_prefix,
-                                    "--enable-checking",
-                                    "--program-prefix=llvm-",
-                                    "--enable-llvm=" + self.build_prefix + "/llvm/" + build_suffix,
-# Fortran install seems to be broken.
-#                                    "--enable-languages=c,c++,fortran"]),
-                                    "--enable-languages=c,c++"]),
-            llvm2=dict(debug=["--prefix=" + self.install_prefix,
-                              "--with-extra-options=-Werror",
-                              "--enable-assertions",
-                              "--disable-optimized",
-                              "--with-llvmgccdir=" + self.install_prefix + "/bin",
-                              "--with-cxx-include-root=" + cxxroot,
-                              "--with-cxx-include-arch=" + cxxarch],
-                       release=["--prefix=" + self.install_prefix,
-                                "--with-extra-options=-Werror",
-                                "--enable-optimized",
-                                "--with-llvmgccdir=" + self.install_prefix + "/bin",
-                                "--with-cxx-include-root=" + cxxroot,
-                                "--with-cxx-include-arch=" + cxxarch],
-                       paranoid=["--prefix=" + self.install_prefix,
-                                 "--with-extra-options=-Werror",
-                                 "--enable-assertions",
-                                 "--enable-expensive-checks",
-                                 "--disable-optimized",
-                                 "--with-llvmgccdir=" + self.install_prefix + "/bin",
-                                 "--with-cxx-include-root=" + cxxroot,
-                                 "--with-cxx-include-arch=" + cxxarch]),
-            gcc=dict(debug=["--prefix=" + self.install_prefix,
-                            "--enable-checking"],
-                     release=["--prefix=" + self.install_prefix],
-                     paranoid=["--prefix=" + self.install_prefix,
-                               "--enable-checking"]),
-            dragonegg=dict(debug=[],
-                           release=[],
-                           paranoid=[]))
-
-        configure_env = dict(
-            llvm=dict(debug=dict(CC=self.cc,
-                                 CXX=self.cxx),
-                      release=dict(CC=self.cc,
-                                   CXX=self.cxx),
-                      paranoid=dict(CC=self.cc,
-                                    CXX=self.cxx)),
-            llvm_gcc=dict(debug=dict(CC=self.cc,
-                                     CXX=self.cxx),
-                          release=dict(CC=self.cc,
-                                       CXX=self.cxx),
-                          paranoid=dict(CC=self.cc,
-                                        CXX=self.cxx)),
-            llvm2=dict(debug=dict(CC=self.cc,
-                                  CXX=self.cxx),
-                       release=dict(CC=self.cc,
-                                    CXX=self.cxx),
-                       paranoid=dict(CC=self.cc,
-                                     CXX=self.cxx)),
-            gcc=dict(debug=dict(CC=self.cc,
-                                CXX=self.cxx),
-                     release=dict(CC=self.cc,
-                                  CXX=self.cxx),
-                     paranoid=dict(CC=self.cc,
-                                   CXX=self.cxx)),
-            dragonegg=dict(debug=dict(CC=self.cc,
-                                      CXX=self.cxx),
-                           release=dict(CC=self.cc,
-                                        CXX=self.cxx),
-                           paranoid=dict(CC=self.cc,
-                                         CXX=self.cxx)))
-
-        make_flags = dict(
-            llvm=dict(debug=["-j" + str(self.jobs)],
-                      release=["-j" + str(self.jobs)],
-                      paranoid=["-j" + str(self.jobs)]),
-            llvm_gcc=dict(debug=["-j" + str(self.jobs),
-                                 "bootstrap"],
-                          release=["-j" + str(self.jobs),
-                                   "bootstrap"],
-                          paranoid=["-j" + str(self.jobs),
-                                    "bootstrap"]),
-            llvm2=dict(debug=["-j" + str(self.jobs)],
-                       release=["-j" + str(self.jobs)],
-                       paranoid=["-j" + str(self.jobs)]),
-            gcc=dict(debug=["-j" + str(self.jobs),
-                            "bootstrap"],
-                     release=["-j" + str(self.jobs),
-                              "bootstrap"],
-                     paranoid=["-j" + str(self.jobs),
-                               "bootstrap"]),
-            dragonegg=dict(debug=["-j" + str(self.jobs)],
-                           release=["-j" + str(self.jobs)],
-                           paranoid=["-j" + str(self.jobs)]))
-
-        make_env = dict(
-            llvm=dict(debug=dict(),
-                      release=dict(),
-                      paranoid=dict()),
-            llvm_gcc=dict(debug=dict(),
-                          release=dict(),
-                          paranoid=dict()),
-            llvm2=dict(debug=dict(),
-                       release=dict(),
-                       paranoid=dict()),
-            gcc=dict(debug=dict(),
-                     release=dict(),
-                     paranoid=dict()),
-            dragonegg=dict(debug=dict(GCC=self.install_prefix + "/bin/gcc",
-                                      LLVM_CONFIG=self.install_prefix + "/bin/llvm-config"),
-                           release=dict(GCC=self.install_prefix + "/bin/gcc",
-                                        LLVM_CONFIG=self.install_prefix + "/bin/llvm-config"),
-                           paranoid=dict(GCC=self.install_prefix + "/bin/gcc",
-                                         LLVM_CONFIG=self.install_prefix + "/bin/llvm-config")))
-
-        make_install_flags = dict(
-            llvm=dict(debug=["install"],
-                      release=["install"],
-                      paranoid=["install"]),
-            llvm_gcc=dict(debug=["install"],
-                          release=["install"],
-                          paranoid=["install"]),
-            llvm2=dict(debug=["install"],
-                       release=["install"],
-                       paranoid=["install"]),
-            gcc=dict(debug=["install"],
-                     release=["install"],
-                     paranoid=["install"]),
-            dragonegg=dict(debug=["install"],
-                           release=["install"],
-                           paranoid=["install"]))
-
-        make_install_env = dict(
-            llvm=dict(debug=dict(),
-                      release=dict(),
-                      paranoid=dict()),
-            llvm_gcc=dict(debug=dict(),
-                          release=dict(),
-                          paranoid=dict()),
-            llvm2=dict(debug=dict(),
-                       release=dict(),
-                       paranoid=dict()),
-            gcc=dict(debug=dict(),
-                     release=dict(),
-                     paranoid=dict()),
-            dragonegg=dict(debug=dict(),
-                           release=dict(),
-                           paranoid=dict()))
-
-        make_check_flags = dict(
-            llvm=dict(debug=["check"],
-                      release=["check"],
-                      paranoid=["check"]),
-            llvm_gcc=dict(debug=["check"],
-                          release=["check"],
-                          paranoid=["check"]),
-            llvm2=dict(debug=["check"],
-                       release=["check"],
-                       paranoid=["check"]),
-            gcc=dict(debug=["check"],
-                     release=["check"],
-                     paranoid=["check"]),
-            dragonegg=dict(debug=["check"],
-                           release=["check"],
-                           paranoid=["check"]))
-
-        make_check_env = dict(
-            llvm=dict(debug=dict(),
-                      release=dict(),
-                      paranoid=dict()),
-            llvm_gcc=dict(debug=dict(),
-                          release=dict(),
-                          paranoid=dict()),
-            llvm2=dict(debug=dict(),
-                       release=dict(),
-                       paranoid=dict()),
-            gcc=dict(debug=dict(),
-                     release=dict(),
-                     paranoid=dict()),
-            dragonegg=dict(debug=dict(),
-                           release=dict(),
-                           paranoid=dict()))
-
-        for component in components:
-            comp = component[:]
-            
-            if (self.options.no_gcc):
-                if (comp == 'gcc' or comp == 'dragonegg' or comp == 'llvm2'):
-                    self.logger.info("Skipping " + component + " in "
-                                     + builddir)
-                    continue
-
-            srcdir = source + "/" + comp.rstrip("2")
-            builddir = self.build_prefix + "/" + comp + "/" + build_suffix
-            installdir = self.install_prefix
-
-            if (branch is not None):
-                srcdir += "/" + branch
-
-            comp_key = comp.replace("-", "_")
-
-            config_args = configure_flags[comp_key][build][:]
-            config_args.extend(getattr(self.options,
-                                       "extra_" + comp_key.rstrip("2")
-                                       + "_config_flags").split())
-
-            self.logger.info("Configuring " + component + " in " + builddir)
-            self.configure(component, srcdir, builddir,
-                           config_args,
-                           configure_env[comp_key][build])
-
-            self.logger.info("Building " + component + " in " + builddir)
-            self.logger.info("Build: make " + str(make_flags[comp_key][build]))
-            self.make(component, srcdir, builddir,
-                      make_flags[comp_key][build],
-                      make_env[comp_key][build])
-
-            if (not self.options.no_install):
-                self.logger.info("Installing " + component + " in " + installdir)
-                self.make(component, srcdir, builddir,
-                          make_install_flags[comp_key][build],
-                          make_install_env[comp_key][build])
-
-            self.logger.info("Testing " + component + " in " + builddir)
-            self.logger.info("Test: make "
-                             + str(make_check_flags[comp_key][build]))
-            self.make(component, srcdir, builddir,
-                      make_check_flags[comp_key][build],
-                      make_check_env[comp_key][build])
-
-
-    def configure(self, component, srcdir, builddir, flags, env):
-        self.logger.debug("Configure " + str(flags) + " " + str(srcdir) + " -> "
-                          + str(builddir))
-
-        configure_files = dict(
-            llvm=[(srcdir + "/configure", builddir + "/Makefile")],
-            llvm_gcc=[(srcdir + "/configure", builddir + "/Makefile"),
-                      (srcdir + "/gcc/configure", builddir + "/gcc/Makefile")],
-            llvm2=[(srcdir + "/configure", builddir + "/Makefile")],
-            gcc=[(srcdir + "/configure", builddir + "/Makefile"),
-                 (srcdir + "/gcc/configure", builddir + "/gcc/Makefile")],
-            dragonegg=[()])
-
-
-        doconfig = False
-        for conf, mf in configure_files[component.replace("-", "_")]:
-            if not os.path.exists(conf):
-                return
-            if os.path.exists(conf) and os.path.exists(mf):
-                confstat = os.stat(conf)
-                makestat = os.stat(mf)
-                if confstat.st_mtime > makestat.st_mtime:
-                    doconfig = True
-                    break
-            else:
-                doconfig = True
-                break
-
-        if not doconfig and not self.options.force_configure:
-            return
-
-        program = srcdir + "/configure"
-        if not is_executable(program):
-            return
-
-        args = [program]
-        args += ["--verbose"]
-        args += flags
-        self.execute(args, builddir, env, component)
-
-    def make(self, component, srcdir, builddir, flags, env):
-        program = find_executable("make")
-        if program is None:
-            raise ExecutableNotFound
-
-        if not is_executable(program):
-            raise FileNotExecutable
-
-        args = [program]
-        args += flags
-        self.execute(args, builddir, env, component)
-
-# Global constants
-build_abbrev = dict(debug="dbg", release="opt", paranoid="par")
-#components = ["llvm", "llvm-gcc", "llvm2", "gcc", "dragonegg"]
-components = ["llvm", "llvm2", "gcc", "dragonegg"]
-
-# Parse options
-parser = optparse.OptionParser(version="%prog 1.0")
-add_options(parser)
-(options, args) = parser.parse_args()
-check_options(parser, options, build_abbrev.keys());
-
-if options.verbose:
-    logging.basicConfig(level=logging.DEBUG,
-                        format='%(name)-13s: %(message)s')
-else:
-    logging.basicConfig(level=logging.INFO,
-                        format='%(name)-13s: %(message)s')
-
-source_abbrev = get_path_abbrevs(set(options.src))
-
-branch_abbrev = None
-if options.branch is not None:
-    branch_abbrev = get_path_abbrevs(set(options.branch))
-
-work_queue = queue.Queue()
-
-jobs = options.jobs // options.threads
-if jobs == 0:
-    jobs = 1
-
-numthreads = options.threads
-
-logging.getLogger().info("Building with " + str(options.jobs) + " jobs and "
-                         + str(numthreads) + " threads using " + str(jobs)
-                         + " make jobs")
-
-for t in range(numthreads):
-    builder = Builder(work_queue, jobs,
-                      build_abbrev, source_abbrev, branch_abbrev,
-                      options)
-    builder.daemon = True
-    builder.start()
-
-for build in set(options.build):
-    for source in set(options.src):
-        if options.branch is not None:
-            for branch in set(options.branch):
-                work_queue.put((source, branch, build))
-        else:
-            work_queue.put((source, None, build))
-
-work_queue.join()
diff --git a/utils/llvmgrep b/utils/llvmgrep
index 540f0598579d..dc15da4961ef 100755
--- a/utils/llvmgrep
+++ b/utils/llvmgrep
@@ -29,7 +29,7 @@ if test -d "$TOPDIR" ; then
   cd $TOPDIR
   case `uname -s` in
     SunOS) grep_cmd="ggrep -H -n" ;;
-    Linux) grep_cmd="egrep -H -n" ;;
+    Linux|Darwin) grep_cmd="egrep -H -n" ;;
     *) grep_cmd="egrep -l -n" ;;
   esac
   ./utils/llvmdo -topdir "$TOPDIR" \
diff --git a/utils/parseNLT.pl b/utils/parseNLT.pl
deleted file mode 100644
index 95afca73a132..000000000000
--- a/utils/parseNLT.pl
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/perl
-# a first attempt to parse the nightly tester pages into something
-# one can reason about, namely import into a database
-# USE: perl parseNLT.pl <2005-03-31.html
-# for example
-
-while(<>)
-  {
-    if (/LLVM Test Results for (\w+) (\d+), (\d+)</)
-      {
-        $mon = $1;
-        $day = $2;
-        $year = $3;
-      }
-    if (/<td>([^<]+)<\/td>/)
-      {
-        if ($prefix)
-          { $output .= "$1 "; $count++; }
-      }
-    if (/<tr/)
-      {
-        if ($output and $count > 3)
-          { print "\n$day $mon $year $prefix/$output"; }
-	$output = "";
-	$count = 0;
-      }
-    if (/<h2>(Programs.+)<\/h2>/)
-      {
-        $prefix = $1;
-      }
-  }
-
-if ($output)
-  { print "\n$day $mon $year $prefix/$output"; $output = ""; }
diff --git a/utils/plotNLT.pl b/utils/plotNLT.pl
deleted file mode 100644
index 55d503d68933..000000000000
--- a/utils/plotNLT.pl
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/perl
-#takes a test and a program from a dp and produces a gnuplot script
-#use like perl plotNLT.pl password Programs/MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 llc
-
-use DBI;
-
-# database information
-$db="llvmalpha";
-$host="localhost";
-$userid="llvmdbuser";
-$passwd=shift @ARGV;
-$connectionInfo="dbi:mysql:$db;$host";
-
-# make connection to database
-$dbh = DBI->connect($connectionInfo,$userid,$passwd) or die DBI->errstr;
-
-
-$count = @ARGV / 2;
-
-print "set xdata time\n";
-print 'set timefmt "%Y-%m-%d"';
-print "\nplot";
-for ($iter = 0; $iter < $count; $iter++) {
-  if ($iter)
-    { print ","; }
-  print " '-' using 1:2 with lines";
-}
-
-print "\n";
-
-for ($iter = 0; $iter < $count; $iter++) {
-
-  $prog = shift @ARGV;
-  $test = shift @ARGV;
-
-  $query = "Select RUN, VALUE from Tests where TEST = '$test' AND NAME = '$prog' ORDER BY RUN";
-  #print "\n$query\n";
-  
-  my $sth = $dbh->prepare( $query) || die "Can't prepare statement: $DBI::errstr";;
-  
-  my $rc = $sth->execute or die DBI->errstr;
-  
-  while(($da,$v) = $sth->fetchrow_array)
-    {
-      print "$da $v\n";
-    }
-  
-  print "e\n";
-}
-
-
-# disconnect from database
-$dbh->disconnect;
diff --git a/utils/release/findRegressions-nightly.py b/utils/release/findRegressions-nightly.py
new file mode 100755
index 000000000000..e801dab4aba7
--- /dev/null
+++ b/utils/release/findRegressions-nightly.py
@@ -0,0 +1,130 @@
+#!/usr/bin/python
+import re, string, sys, os, time
+
+DEBUG = 0
+testDirName = 'llvm-test'
+test      = ['compile', 'llc', 'jit', 'cbe']
+exectime     = ['llc-time', 'jit-time', 'cbe-time',]
+comptime     = ['llc', 'jit-comptime', 'compile']
+
+(tp, exp) = ('compileTime_', 'executeTime_')
+
+def parse(file):
+  f=open(file, 'r')
+  d = f.read()
+  
+  #Cleanup weird stuff
+  d = re.sub(r',\d+:\d','', d)
+   
+  r = re.findall(r'TEST-(PASS|FAIL|RESULT.*?):\s+(.*?)\s+(.*?)\r*\n', d)
+   
+  test = {}
+  fname = ''
+  for t in r:
+    if DEBUG:
+      print t
+    if t[0] == 'PASS' or t[0] == 'FAIL' :
+      tmp = t[2].split(testDirName)
+      
+      if DEBUG:
+        print tmp
+      
+      if len(tmp) == 2:
+        fname = tmp[1].strip('\r\n')
+      else:
+        fname = tmp[0].strip('\r\n')
+      
+      if not test.has_key(fname) :
+        test[fname] = {}
+      
+      for k in test:
+        test[fname][k] = 'NA'
+        test[fname][t[1]] = t[0]
+        if DEBUG:
+          print test[fname][t[1]]
+    else :
+      try:
+        n = t[0].split('RESULT-')[1]
+        
+        if DEBUG:
+          print n;
+        
+        if n == 'llc' or n == 'jit-comptime' or n == 'compile':
+          test[fname][tp + n] = float(t[2].split(' ')[2])
+          if DEBUG:
+            print test[fname][tp + n]
+        
+        elif n.endswith('-time') :
+            test[fname][exp + n] = float(t[2].strip('\r\n'))
+            if DEBUG:
+              print test[fname][exp + n]
+        
+        else :
+          print "ERROR!"
+          sys.exit(1)
+      
+      except:
+          continue
+
+  return test
+
+# Diff results and look for regressions.
+def diffResults(d_old, d_new):
+
+  for t in sorted(d_old.keys()) :
+    if DEBUG:
+      print t
+        
+    if d_new.has_key(t) :
+    
+      # Check if the test passed or failed.
+      for x in test:
+        if d_old[t].has_key(x):
+          if d_new[t].has_key(x):
+            if d_old[t][x] == 'PASS':
+              if d_new[t][x] != 'PASS':
+                print t + " *** REGRESSION (" + x + ")\n"
+            else:
+              if d_new[t][x] == 'PASS':
+                print t + " * NEW PASS (" + x + ")\n"
+                
+          else :
+            print t + "*** REGRESSION (" + x + ")\n"
+        
+        # For execution time, if there is no result, its a fail.
+        for x in exectime:
+          if d_old[t].has_key(tp + x):
+            if not d_new[t].has_key(tp + x):
+              print t + " *** REGRESSION (" + tp + x + ")\n"
+                
+          else :
+            if d_new[t].has_key(tp + x):
+              print t + " * NEW PASS (" + tp + x + ")\n"
+
+       
+        for x in comptime:
+          if d_old[t].has_key(exp + x):
+            if not d_new[t].has_key(exp + x):
+              print t + " *** REGRESSION (" + exp + x + ")\n"
+                
+          else :
+            if d_new[t].has_key(exp + x):
+              print t + " * NEW PASS (" + exp + x + ")\n"
+              
+    else :
+      print t + ": Removed from test-suite.\n"
+    
+
+#Main
+if len(sys.argv) < 3 :
+    print 'Usage:', sys.argv[0], \
+          '<old log> <new log>'
+    sys.exit(-1)
+
+d_old = parse(sys.argv[1])
+d_new = parse(sys.argv[2])
+
+
+diffResults(d_old, d_new)
+
+
diff --git a/utils/release/findRegressions-simple.py b/utils/release/findRegressions-simple.py
new file mode 100755
index 000000000000..758623199ff9
--- /dev/null
+++ b/utils/release/findRegressions-simple.py
@@ -0,0 +1,158 @@
+#!/usr/bin/python
+import re, string, sys, os, time, math
+
+DEBUG = 0
+
+(tp, exp) = ('compile', 'exec')
+
+def parse(file):
+  f = open(file, 'r')
+  d = f.read()
+  
+  # Cleanup weird stuff
+  d = re.sub(r',\d+:\d', '', d)
+
+  r = re.findall(r'TEST-(PASS|FAIL|RESULT.*?):\s+(.*?)\s+(.*?)\r*\n', d)
+
+  test = {}
+  fname = ''
+  for t in r:
+    if DEBUG:
+      print t
+
+    if t[0] == 'PASS' or t[0] == 'FAIL' :
+      tmp = t[2].split('llvm-test/')
+      
+      if DEBUG:
+        print tmp
+
+      if len(tmp) == 2:
+        fname = tmp[1].strip('\r\n')
+      else:
+        fname = tmp[0].strip('\r\n')
+
+      if not test.has_key(fname):
+        test[fname] = {}
+
+      test[fname][t[1] + ' state'] = t[0]
+      test[fname][t[1] + ' time'] = float('nan')
+    else :
+      try:
+        n = t[0].split('RESULT-')[1]
+
+        if DEBUG:
+          print "n == ", n;
+        
+        if n == 'compile-success':
+          test[fname]['compile time'] = float(t[2].split('program')[1].strip('\r\n'))
+
+        elif n == 'exec-success':
+          test[fname]['exec time'] = float(t[2].split('program')[1].strip('\r\n'))
+          if DEBUG:
+            print test[fname][string.replace(n, '-success', '')]
+
+        else :
+          # print "ERROR!"
+          sys.exit(1)
+
+      except:
+          continue
+
+  return test
+
+# Diff results and look for regressions.
+def diffResults(d_old, d_new):
+  regressions = {}
+  passes = {}
+  removed = ''
+
+  for x in ['compile state', 'compile time', 'exec state', 'exec time']:
+    regressions[x] = ''
+    passes[x] = ''
+
+  for t in sorted(d_old.keys()) :
+    if d_new.has_key(t):
+
+      # Check if the test passed or failed.
+      for x in ['compile state', 'compile time', 'exec state', 'exec time']:
+
+        if not d_old[t].has_key(x) and not d_new[t].has_key(x):
+          continue
+
+        if d_old[t].has_key(x):
+          if d_new[t].has_key(x):
+
+            if d_old[t][x] == 'PASS':
+              if d_new[t][x] != 'PASS':
+                regressions[x] += t + "\n"
+            else:
+              if d_new[t][x] == 'PASS':
+                passes[x] += t + "\n"
+
+          else :
+            regressions[x] += t + "\n"
+
+        if x == 'compile state' or x == 'exec state':
+          continue
+
+        # For execution time, if there is no result it's a fail.
+        if not d_old[t].has_key(x) and not d_new[t].has_key(x):
+          continue
+        elif not d_new[t].has_key(x):
+          regressions[x] += t + "\n"
+        elif not d_old[t].has_key(x):
+          passes[x] += t + "\n"
+
+        if math.isnan(d_old[t][x]) and math.isnan(d_new[t][x]):
+          continue
+
+        elif math.isnan(d_old[t][x]) and not math.isnan(d_new[t][x]):
+          passes[x] += t + "\n"
+
+        elif not math.isnan(d_old[t][x]) and math.isnan(d_new[t][x]):
+          regressions[x] += t + ": NaN%\n"
+
+        if d_new[t][x] > d_old[t][x] and d_old[t][x] > 0.0 and \
+              (d_new[t][x] - d_old[t][x]) / d_old[t][x] > .05:
+          regressions[x] += t + ": " + "{0:.1f}".format(100 * (d_new[t][x] - d_old[t][x]) / d_old[t][x]) + "%\n"
+
+    else :
+      removed += t + "\n"
+
+  if len(regressions['compile state']) != 0:
+    print 'REGRESSION: Compilation Failed'
+    print regressions['compile state']
+
+  if len(regressions['exec state']) != 0:
+    print 'REGRESSION: Execution Failed'
+    print regressions['exec state']
+
+  if len(regressions['compile time']) != 0:
+    print 'REGRESSION: Compilation Time'
+    print regressions['compile time']
+
+  if len(regressions['exec time']) != 0:
+    print 'REGRESSION: Execution Time'
+    print regressions['exec time']
+
+  if len(passes['compile state']) != 0:
+    print 'NEW PASSES: Compilation'
+    print passes['compile state']
+
+  if len(passes['exec state']) != 0:
+    print 'NEW PASSES: Execution'
+    print passes['exec state']
+
+  if len(removed) != 0:
+    print 'REMOVED TESTS'
+    print removed
+
+# Main
+if len(sys.argv) < 3 :
+  print 'Usage:', sys.argv[0], '<old log> <new log>'
+  sys.exit(-1)
+
+d_old = parse(sys.argv[1])
+d_new = parse(sys.argv[2])
+
+diffResults(d_old, d_new)
diff --git a/utils/release/findRegressions.py b/utils/release/findRegressions.py
deleted file mode 100755
index 7629c8b4fbf1..000000000000
--- a/utils/release/findRegressions.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/python
-import re, string, sys, os, time, math
-
-DEBUG = 0
-
-(tp, exp) = ('compile', 'exec')
-
-def parse(file):
-  f = open(file, 'r')
-  d = f.read()
-  
-  # Cleanup weird stuff
-  d = re.sub(r',\d+:\d', '', d)
-
-  r = re.findall(r'TEST-(PASS|FAIL|RESULT.*?):\s+(.*?)\s+(.*?)\r*\n', d)
-
-  test = {}
-  fname = ''
-  for t in r:
-    if DEBUG:
-      print t
-
-    if t[0] == 'PASS' or t[0] == 'FAIL' :
-      tmp = t[2].split('llvm-test/')
-      
-      if DEBUG:
-        print tmp
-
-      if len(tmp) == 2:
-        fname = tmp[1].strip('\r\n')
-      else:
-        fname = tmp[0].strip('\r\n')
-
-      if not test.has_key(fname):
-        test[fname] = {}
-
-      test[fname][t[1] + ' state'] = t[0]
-      test[fname][t[1] + ' time'] = float('nan')
-    else :
-      try:
-        n = t[0].split('RESULT-')[1]
-
-        if DEBUG:
-          print "n == ", n;
-        
-        if n == 'compile-success':
-          test[fname]['compile time'] = float(t[2].split('program')[1].strip('\r\n'))
-
-        elif n == 'exec-success':
-          test[fname]['exec time'] = float(t[2].split('program')[1].strip('\r\n'))
-          if DEBUG:
-            print test[fname][string.replace(n, '-success', '')]
-
-        else :
-          # print "ERROR!"
-          sys.exit(1)
-
-      except:
-          continue
-
-  return test
-
-# Diff results and look for regressions.
-def diffResults(d_old, d_new):
-
-  for t in sorted(d_old.keys()) :
-    if d_new.has_key(t):
-
-      # Check if the test passed or failed.
-      for x in ['compile state', 'compile time', 'exec state', 'exec time']:
-
-        if not d_old[t].has_key(x) and not d_new[t].has_key(x):
-          continue
-
-        if d_old[t].has_key(x):
-          if d_new[t].has_key(x):
-
-            if d_old[t][x] == 'PASS':
-              if d_new[t][x] != 'PASS':
-                print t + " *** REGRESSION (" + x + " now fails)"
-            else:
-              if d_new[t][x] == 'PASS':
-                print t + " * NEW PASS (" + x + " now fails)"
-
-          else :
-            print t + "*** REGRESSION (" + x + " now fails)"
-
-        if x == 'compile state' or x == 'exec state':
-          continue
-
-        # For execution time, if there is no result it's a fail.
-        if not d_old[t].has_key(x) and not d_new[t].has_key(x):
-          continue
-        elif not d_new[t].has_key(x):
-          print t + " *** REGRESSION (" + x + ")"
-        elif not d_old[t].has_key(x):
-          print t + " * NEW PASS (" + x + ")"
-
-        if math.isnan(d_old[t][x]) and math.isnan(d_new[t][x]):
-          continue
-
-        elif math.isnan(d_old[t][x]) and not math.isnan(d_new[t][x]):
-          print t + " * NEW PASS (" + x + ")"
-
-        elif not math.isnan(d_old[t][x]) and math.isnan(d_new[t][x]):
-          print t + " *** REGRESSION (" + x + ")"
-
-        if d_new[t][x] > d_old[t][x] and \
-              (d_new[t][x] - d_old[t][x]) / d_new[t][x] > .05:
-          print t + " *** REGRESSION (" + x + ")"
-
-    else :
-      print t + ": Removed from test-suite."
-
-# Main
-if len(sys.argv) < 3 :
-  print 'Usage:', sys.argv[0], '<old log> <new log>'
-  sys.exit(-1)
-
-d_old = parse(sys.argv[1])
-d_new = parse(sys.argv[2])
-
-diffResults(d_old, d_new)
diff --git a/utils/release/merge.sh b/utils/release/merge.sh
new file mode 100755
index 000000000000..2cf39b282a71
--- /dev/null
+++ b/utils/release/merge.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+#===-- merge.sh - Test the LLVM release candidates -------------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License.
+#
+#===------------------------------------------------------------------------===#
+#
+# Merge a revision into a project.
+#
+#===------------------------------------------------------------------------===#
+
+set -e
+
+rev=""
+proj=""
+
+function usage() {
+    echo "usage: `basename $0` [OPTIONS]"
+    echo "  -proj PROJECT  The project to merge the result into"
+    echo "  -rev NUM       The revision to merge into the project"
+}
+
+while [ $# -gt 0 ]; do
+    case $1 in
+        -rev | --rev | -r )
+            shift
+            rev=$1
+            ;;
+        -proj | --proj | -project | --project | -p )
+            shift
+            proj=$1
+            ;;
+        -h | -help | --help )
+            usage
+            ;;
+        * )
+            echo "unknown option: $1"
+            echo ""
+            usage
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+if [ "x$rev" = "x" -o "x$proj" = "x" ]; then
+    echo "error: need to specify project and revision"
+    echo
+    usage
+    exit 1
+fi
+
+if ! svn ls http://llvm.org/svn/llvm-project/$proj/trunk > /dev/null 2>&1 ; then
+    echo "error: invalid project: $proj"
+    exit 1
+fi
+
+tempfile=`mktemp /tmp/merge.XXXXXX` || exit 1
+
+echo "Merging r$rev:" > $tempfile
+svn log -c $rev http://llvm.org/svn/llvm-project/$proj/trunk >> $tempfile 2>&1
+
+cd $proj.src
+echo "# Updating tree"
+svn up
+echo "# Merging r$rev into $proj"
+svn merge -c $rev https://llvm.org/svn/llvm-project/$proj/trunk . || exit 1
+echo "# Committing changes"
+svn commit -F $tempfile || exit 1
+rm -f $tempfile
+exit 0
diff --git a/utils/release/tag.sh b/utils/release/tag.sh
new file mode 100755
index 000000000000..80da47a4db0d
--- /dev/null
+++ b/utils/release/tag.sh
@@ -0,0 +1,99 @@
+#!/bin/sh
+#===-- tag.sh - Tag the LLVM release candidates ----------------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License.
+#
+#===------------------------------------------------------------------------===#
+#
+# Create branches and release candidates for the LLVM release.
+#
+#===------------------------------------------------------------------------===#
+
+set -e
+
+release=""
+rc=""
+
+base_url="https://llvm.org/svn/llvm-project"
+
+function usage() {
+    echo "usage: `basename $0` -release <num>"
+    echo "usage: `basename $0` -release <num> -rc <num>"
+    echo " "
+    echo "  -release <num>  The version number of the release"
+    echo "  -rc <num>       The release candidate number"
+    echo "  -final          Tag final release candidate"
+}
+
+function tag_version() {
+    set -x
+    for proj in llvm cfe dragonegg test-suite compiler-rt libcxx libcxxabi ; do
+        if ! svn ls $base_url/$proj/branches/release_$release > /dev/null 2>&1 ; then
+            svn copy -m "Creating release_$release branch" \
+                $base_url/$proj/trunk \
+                $base_url/$proj/branches/release_$release
+        fi
+    done
+    set +x
+}
+
+function tag_release_candidate() {
+    set -x
+    for proj in llvm cfe dragonegg test-suite compiler-rt libcxx libcxxabi ; do
+        if ! svn ls $base_url/$proj/tags/RELEASE_$release > /dev/null 2>&1 ; then
+            svn mkdir -m "Creating release directory for release_$release." $base_url/$proj/tags/RELEASE_$release
+        fi
+        if ! svn ls $base_url/$proj/tags/RELEASE_$release/$rc > /dev/null 2>&1 ; then
+            svn copy -m "Creating release candidate $rc from release_$release branch" \
+                $base_url/$proj/branches/release_$release \
+                $base_url/$proj/tags/RELEASE_$release/$rc
+        fi
+    done
+    set +x
+}
+
+while [ $# -gt 0 ]; do
+    case $1 in
+        -release | --release )
+            shift
+            release=$1
+            ;;
+        -rc | --rc )
+            shift
+            rc="rc$1"
+            ;;
+        -final | --final )
+            rc="final"
+            ;;
+        -h | --help | -help )
+            usage
+            exit 0
+            ;;
+        * )
+            echo "unknown option: $1"
+            usage
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+if [ "x$release" = "x" ]; then
+    echo "error: need to specify a release version"
+    echo
+    usage
+    exit 1
+fi
+
+release=`echo $release | sed -e 's,\.,,g'`
+
+if [ "x$rc" = "x" ]; then
+    tag_version
+else
+    tag_release_candidate
+fi
+
+exit 1
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
index 94217e568e27..ad1af5fef055 100755
--- a/utils/release/test-release.sh
+++ b/utils/release/test-release.sh
@@ -28,11 +28,14 @@ Release_no_dot=""
 RC=""
 do_checkout="yes"
 do_ada="no"
-do_objc="yes"
+do_clang="yes"
+do_dragonegg="no"
 do_fortran="no"
+do_objc="yes"
 do_64bit="yes"
 do_debug="no"
 do_asserts="no"
+do_compare="yes"
 BuildDir="`pwd`"
 
 function usage() {
@@ -40,15 +43,19 @@ function usage() {
     echo ""
     echo " -release X.Y      The release number to test."
     echo " -rc NUM           The pre-release candidate number."
+    echo " -final            The final release candidate."
     echo " -j NUM            Number of compile jobs to run. [default: 3]"
     echo " -build-dir DIR    Directory to perform testing in. [default: pwd]"
     echo " -no-checkout      Don't checkout the sources from SVN."
     echo " -no-64bit         Don't test the 64-bit version. [default: yes]"
     echo " -enable-ada       Build Ada. [default: disable]"
+    echo " -disable-clang    Do not test clang. [default: enable]"
+    echo " -enable-dragonegg Test dragonegg. [default: disable]"
     echo " -enable-fortran   Enable Fortran build. [default: disable]"
     echo " -disable-objc     Disable ObjC build. [default: enable]"
     echo " -test-debug       Test the debug build. [default: no]"
     echo " -test-asserts     Test with asserts on. [default: no]"
+    echo " -no-compare-files Don't test that phase 2 and 3 files are identical."
 }
 
 while [ $# -gt 0 ]; do
@@ -60,7 +67,10 @@ while [ $# -gt 0 ]; do
             ;;
         -rc | --rc | -RC | --RC )
             shift
-            RC=$1
+            RC="rc$1"
+            ;;
+        -final | --final )
+            RC=final
             ;;
         -j* )
             NumJobs="`echo $1 | sed -e 's,-j\([0-9]*\),\1,g'`"
@@ -82,6 +92,12 @@ while [ $# -gt 0 ]; do
         -enable-ada | --enable-ada )
             do_ada="yes"
             ;;
+        -disable-clang | --disable-clang )
+            do_clang="no"
+            ;;
+        -enable-dragonegg | --enable-dragonegg )
+            do_dragonegg="yes"
+            ;;
         -enable-fortran | --enable-fortran )
             do_fortran="yes"
             ;;
@@ -94,6 +110,9 @@ while [ $# -gt 0 ]; do
         -test-asserts | --test-asserts )
             do_asserts="yes"
             ;;
+        -no-compare-files | --no-compare-files )
+            do_compare="no"
+            ;;
         -help | --help | -h | --h | -\? )
             usage
             exit 0
@@ -132,7 +151,7 @@ if [ -z "$NumJobs" ]; then
 fi
 
 # Go to the build directory (may be different from CWD)
-BuildDir=$BuildDir/rc$RC
+BuildDir=$BuildDir/$RC
 mkdir -p $BuildDir
 cd $BuildDir
 
@@ -140,16 +159,34 @@ cd $BuildDir
 LogDir=$BuildDir/logs
 mkdir -p $LogDir
 
-# Find a compilers.
-c_compiler="$CC"
-cxx_compiler="$CXX"
+# Find compilers.
+if [ "$do_dragonegg" = "yes" ]; then
+    gcc_compiler="$GCC"
+    if [ -z "$gcc_compiler" ]; then
+        gcc_compiler="`which gcc`"
+        if [ -z "$gcc_compiler" ]; then
+            echo "error: cannot find gcc to use with dragonegg"
+            exit 1
+        fi
+    fi
+
+    gxx_compiler="$GXX"
+    if [ -z "$gxx_compiler" ]; then
+        gxx_compiler="`which g++`"
+        if [ -z "$gxx_compiler" ]; then
+            echo "error: cannot find g++ to use with dragonegg"
+            exit 1
+        fi
+    fi
+fi
+
 
 # Make sure that the URLs are valid.
 function check_valid_urls() {
     for proj in $projects ; do
         echo "# Validating $proj SVN URL"
 
-        if ! svn ls $Base_url/$proj/tags/RELEASE_$Release_no_dot/rc$RC > /dev/null 2>&1 ; then
+        if ! svn ls $Base_url/$proj/tags/RELEASE_$Release_no_dot/$RC > /dev/null 2>&1 ; then
             echo "llvm $Release release candidate $RC doesn't exist!"
             exit 1
         fi
@@ -162,7 +199,7 @@ function export_sources() {
 
     for proj in $projects ; do
         echo "# Exporting $proj $Release-RC$RC sources"
-        if ! svn export -q $Base_url/$proj/tags/RELEASE_$Release_no_dot/rc$RC $proj.src ; then
+        if ! svn export -q $Base_url/$proj/tags/RELEASE_$Release_no_dot/$RC $proj.src ; then
             echo "error: failed to export $proj project"
             exit 1
         fi
@@ -210,14 +247,15 @@ function configure_llvmCore() {
     echo "# Using C++ compiler: $cxx_compiler"
 
     cd $ObjDir
-    echo "# Configuring llvm $Release-rc$RC $Flavor"
+    echo "# Configuring llvm $Release-$RC $Flavor"
     echo "# $BuildDir/llvm.src/configure --prefix=$InstallDir \
         --enable-optimized=$Optimized \
         --enable-assertions=$Assertions"
-    env CC=$c_compiler CXX=$cxx_compiler \
+    env CC="$c_compiler" CXX="$cxx_compiler" \
     $BuildDir/llvm.src/configure --prefix=$InstallDir \
         --enable-optimized=$Optimized \
         --enable-assertions=$Assertions \
+        --disable-timestamps \
         2>&1 | tee $LogDir/llvm.configure-Phase$Phase-$Flavor.log
     cd $BuildDir
 }
@@ -233,18 +271,40 @@ function build_llvmCore() {
     fi
 
     cd $ObjDir
-    echo "# Compiling llvm $Release-rc$RC $Flavor"
+    echo "# Compiling llvm $Release-$RC $Flavor"
     echo "# ${MAKE} -j $NumJobs VERBOSE=1 $ExtraOpts"
     ${MAKE} -j $NumJobs VERBOSE=1 $ExtraOpts \
         2>&1 | tee $LogDir/llvm.make-Phase$Phase-$Flavor.log
 
-    echo "# Installing llvm $Release-rc$RC $Flavor"
+    echo "# Installing llvm $Release-$RC $Flavor"
     echo "# ${MAKE} install"
     ${MAKE} install \
         2>&1 | tee $LogDir/llvm.install-Phase$Phase-$Flavor.log
     cd $BuildDir
 }
 
+function build_dragonegg() {
+    Phase="$1"
+    Flavor="$2"
+    LLVMInstallDir="$3"
+    DragonEggObjDir="$4"
+    LLVM_CONFIG=$LLVMInstallDir/bin/llvm-config
+    TOP_DIR=$BuildDir/dragonegg.src
+
+    echo "# Targeted compiler: $gcc_compiler"
+
+    cd $DragonEggObjDir
+    echo "# Compiling phase $Phase dragonegg $Release-$RC $Flavor"
+    echo -n "# CXX=$cxx_compiler TOP_DIR=$TOP_DIR GCC=$gcc_compiler "
+    echo -n "LLVM_CONFIG=$LLVM_CONFIG ${MAKE} -f $TOP_DIR/Makefile "
+    echo "-j $NumJobs VERBOSE=1"
+    CXX="$cxx_compiler" TOP_DIR="$TOP_DIR" GCC="$gcc_compiler" \
+    LLVM_CONFIG="$LLVM_CONFIG" ${MAKE} -f $TOP_DIR/Makefile \
+        -j $NumJobs VERBOSE=1 \
+            2>&1 | tee $LogDir/dragonegg-Phase$Phase-$Flavor.log
+    cd $BuildDir
+}
+
 function test_llvmCore() {
     Phase="$1"
     Flavor="$2"
@@ -280,81 +340,173 @@ for Flavor in $Flavors ; do
     echo ""
     echo ""
     echo "********************************************************************************"
-    echo "  Release:     $Release-rc$RC"
+    echo "  Release:     $Release-$RC"
     echo "  Build:       $Flavor"
     echo "  System Info: "
     echo "    `uname -a`"
     echo "********************************************************************************"
     echo ""
 
-    llvmCore_phase1_objdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-rc$RC.obj
-    llvmCore_phase1_installdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-rc$RC.install
+    c_compiler="$CC"
+    cxx_compiler="$CXX"
+
+    llvmCore_phase1_objdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-$RC.obj
+    llvmCore_phase1_installdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-$RC.install
+    dragonegg_phase1_objdir=$BuildDir/Phase1/$Flavor/DragonEgg-$Release-$RC.obj
 
-    llvmCore_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-rc$RC.obj
-    llvmCore_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-rc$RC.install
+    llvmCore_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-$RC.obj
+    llvmCore_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-$RC.install
+    llvmCore_de_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmCore-DragonEgg-$Release-$RC.obj
+    llvmCore_de_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmCore-DragonEgg-$Release-$RC.install
+    dragonegg_phase2_objdir=$BuildDir/Phase2/$Flavor/DragonEgg-$Release-$RC.obj
 
-    llvmCore_phase3_objdir=$BuildDir/Phase3/$Flavor/llvmCore-$Release-rc$RC.obj
-    llvmCore_phase3_installdir=$BuildDir/Phase3/$Flavor/llvmCore-$Release-rc$RC.install
+    llvmCore_phase3_objdir=$BuildDir/Phase3/$Flavor/llvmCore-$Release-$RC.obj
+    llvmCore_phase3_installdir=$BuildDir/Phase3/$Flavor/llvmCore-$Release-$RC.install
+    llvmCore_de_phase3_objdir=$BuildDir/Phase3/$Flavor/llvmCore-DragonEgg-$Release-$RC.obj
+    llvmCore_de_phase3_installdir=$BuildDir/Phase3/$Flavor/llvmCore-DragonEgg-$Release-$RC.install
+    dragonegg_phase3_objdir=$BuildDir/Phase3/$Flavor/DragonEgg-$Release-$RC.obj
 
     rm -rf $llvmCore_phase1_objdir
     rm -rf $llvmCore_phase1_installdir
+    rm -rf $dragonegg_phase1_objdir
+
     rm -rf $llvmCore_phase2_objdir
     rm -rf $llvmCore_phase2_installdir
+    rm -rf $llvmCore_de_phase2_objdir
+    rm -rf $llvmCore_de_phase2_installdir
+    rm -rf $dragonegg_phase2_objdir
+
     rm -rf $llvmCore_phase3_objdir
     rm -rf $llvmCore_phase3_installdir
+    rm -rf $llvmCore_de_phase3_objdir
+    rm -rf $llvmCore_de_phase3_installdir
+    rm -rf $dragonegg_phase3_objdir
 
     mkdir -p $llvmCore_phase1_objdir
     mkdir -p $llvmCore_phase1_installdir
+    mkdir -p $dragonegg_phase1_objdir
+
     mkdir -p $llvmCore_phase2_objdir
     mkdir -p $llvmCore_phase2_installdir
+    mkdir -p $llvmCore_de_phase2_objdir
+    mkdir -p $llvmCore_de_phase2_installdir
+    mkdir -p $dragonegg_phase2_objdir
+
     mkdir -p $llvmCore_phase3_objdir
     mkdir -p $llvmCore_phase3_installdir
+    mkdir -p $llvmCore_de_phase3_objdir
+    mkdir -p $llvmCore_de_phase3_installdir
+    mkdir -p $dragonegg_phase3_objdir
 
     ############################################################################
-    # Phase 1: Build llvmCore and llvmgcc42
+    # Phase 1: Build llvmCore and clang
     echo "# Phase 1: Building llvmCore"
     configure_llvmCore 1 $Flavor \
         $llvmCore_phase1_objdir $llvmCore_phase1_installdir
     build_llvmCore 1 $Flavor \
         $llvmCore_phase1_objdir
 
-    ############################################################################
-    # Phase 2: Build llvmCore with newly built clang from phase 1.
-    c_compiler=$llvmCore_phase1_installdir/bin/clang
-    cxx_compiler=$llvmCore_phase1_installdir/bin/clang++
-    echo "# Phase 2: Building llvmCore"
-    configure_llvmCore 2 $Flavor \
-        $llvmCore_phase2_objdir $llvmCore_phase2_installdir
-    build_llvmCore 2 $Flavor \
-        $llvmCore_phase2_objdir
-
-    ############################################################################
-    # Phase 3: Build llvmCore with newly built clang from phase 2.
-    c_compiler=$llvmCore_phase2_installdir/bin/clang
-    cxx_compiler=$llvmCore_phase2_installdir/bin/clang++
-    echo "# Phase 3: Building llvmCore"
-    configure_llvmCore 3 $Flavor \
-        $llvmCore_phase3_objdir $llvmCore_phase3_installdir
-    build_llvmCore 3 $Flavor \
-        $llvmCore_phase3_objdir
+    # Test clang
+    if [ "$do_clang" = "yes" ]; then
+        ########################################################################
+        # Phase 2: Build llvmCore with newly built clang from phase 1.
+        c_compiler=$llvmCore_phase1_installdir/bin/clang
+        cxx_compiler=$llvmCore_phase1_installdir/bin/clang++
+        echo "# Phase 2: Building llvmCore"
+        configure_llvmCore 2 $Flavor \
+            $llvmCore_phase2_objdir $llvmCore_phase2_installdir
+        build_llvmCore 2 $Flavor \
+            $llvmCore_phase2_objdir
+
+        ########################################################################
+        # Phase 3: Build llvmCore with newly built clang from phase 2.
+        c_compiler=$llvmCore_phase2_installdir/bin/clang
+        cxx_compiler=$llvmCore_phase2_installdir/bin/clang++
+        echo "# Phase 3: Building llvmCore"
+        configure_llvmCore 3 $Flavor \
+            $llvmCore_phase3_objdir $llvmCore_phase3_installdir
+        build_llvmCore 3 $Flavor \
+            $llvmCore_phase3_objdir
+
+        ########################################################################
+        # Testing: Test phase 3
+        echo "# Testing - built with clang"
+        test_llvmCore 3 $Flavor $llvmCore_phase3_objdir
+
+        ########################################################################
+        # Compare .o files between Phase2 and Phase3 and report which ones
+        # differ.
+        if [ "$do_compare" = "yes" ]; then
+            echo
+            echo "# Comparing Phase 2 and Phase 3 files"
+            for o in `find $llvmCore_phase2_objdir -name '*.o'` ; do
+                p3=`echo $o | sed -e 's,Phase2,Phase3,'`
+                if ! cmp --ignore-initial=16 $o $p3 > /dev/null 2>&1 ; then
+                    echo "file `basename $o` differs between phase 2 and phase 3"
+                fi
+            done
+        fi
+    fi
 
-    ############################################################################
-    # Testing: Test phase 3
-    echo "# Testing - built with clang"
-    test_llvmCore 3 $Flavor $llvmCore_phase3_objdir
+    # Test dragonegg
+    if [ "$do_dragonegg" = "yes" ]; then
+        # Build dragonegg using the targeted gcc.  This isn't necessary, but
+        # helps avoid using broken versions of gcc (which are legion), tests
+        # that the targeted gcc is basically sane and is consistent with the
+        # later phases in which the targeted gcc + dragonegg are used.
+        c_compiler="$gcc_compiler"
+        cxx_compiler="$gxx_compiler"
+        build_dragonegg 1 $Flavor $llvmCore_phase1_installdir $dragonegg_phase1_objdir
+
+        ########################################################################
+        # Phase 2: Build llvmCore with newly built dragonegg from phase 1.
+        c_compiler="$gcc_compiler -fplugin=$dragonegg_phase1_objdir/dragonegg.so"
+        cxx_compiler="$gxx_compiler -fplugin=$dragonegg_phase1_objdir/dragonegg.so"
+        echo "# Phase 2: Building llvmCore with dragonegg"
+        configure_llvmCore 2 $Flavor \
+            $llvmCore_de_phase2_objdir $llvmCore_de_phase2_installdir
+        build_llvmCore 2 $Flavor \
+            $llvmCore_de_phase2_objdir
+        build_dragonegg 2 $Flavor $llvmCore_de_phase2_installdir $dragonegg_phase2_objdir
+
+        ########################################################################
+        # Phase 3: Build llvmCore with newly built clang from phase 2.
+        c_compiler="$gcc_compiler -fplugin=$dragonegg_phase2_objdir/dragonegg.so"
+        cxx_compiler="$gxx_compiler -fplugin=$dragonegg_phase2_objdir/dragonegg.so"
+        echo "# Phase 3: Building llvmCore with dragonegg"
+        configure_llvmCore 3 $Flavor \
+            $llvmCore_de_phase3_objdir $llvmCore_de_phase3_installdir
+        build_llvmCore 3 $Flavor \
+            $llvmCore_de_phase3_objdir
+        build_dragonegg 3 $Flavor $llvmCore_de_phase3_installdir $dragonegg_phase3_objdir
+
+        ########################################################################
+        # Testing: Test phase 3
+        c_compiler="$gcc_compiler -fplugin=$dragonegg_phase3_objdir/dragonegg.so"
+        cxx_compiler="$gxx_compiler -fplugin=$dragonegg_phase3_objdir/dragonegg.so"
+        echo "# Testing - built with dragonegg"
+        test_llvmCore 3 $Flavor $llvmCore_de_phase3_objdir
+
+        ########################################################################
+        # Compare .o files between Phase2 and Phase3 and report which ones differ.
+        echo
+        echo "# Comparing Phase 2 and Phase 3 files"
+        for o in `find $llvmCore_de_phase2_objdir -name '*.o'` \
+          `find $dragonegg_phase2_objdir -name '*.o'` ; do
+            p3=`echo $o | sed -e 's,Phase2,Phase3,'`
+            if ! cmp --ignore-initial=16 $o $p3 > /dev/null 2>&1 ; then
+                echo "file `basename $o` differs between dragonegg phase 2 and phase 3"
+            fi
+        done
+    fi
 
-    ############################################################################
-    # Compare .o files between Phase2 and Phase3 and report which ones differ.
-    echo
-    echo "# Comparing Phase 2 and Phase 3 files"
-    for o in `find $llvmCore_phase2_objdir -name '*.o'` ; do
-        p3=`echo $o | sed -e 's,Phase2,Phase3,'`
-        if ! cmp --ignore-initial=16 $o $p3 > /dev/null 2>&1 ; then
-            echo "file `basename $o` differs between phase 2 and phase 3"
-        fi
-    done
+    # Otherwise just test the core.
+    if [ "$do_clang" != "yes" -a "$do_dragonegg" != "yes" ]; then
+        echo "# Testing - built with system compiler"
+        test_llvmCore 1 $Flavor $llvmCore_phase1_objdir
+    fi
 done
-) 2>&1 | tee $LogDir/testing.$Release-rc$RC.log
+) 2>&1 | tee $LogDir/testing.$Release-$RC.log
 
 set +e
 
diff --git a/utils/show-diagnostics b/utils/show-diagnostics
deleted file mode 100755
index 3a69793abc90..000000000000
--- a/utils/show-diagnostics
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env python
-
-import plistlib
-
-def main():
-    from optparse import OptionParser, OptionGroup
-    parser = OptionParser("""\
-usage: %prog [options] <path>
-
-Utility for dumping Clang-style logged diagnostics.\
-""")
-    (opts, args) = parser.parse_args()
-
-    if len(args) != 1:
-        parser.error("invalid number of arguments")
-
-    path, = args
-
-    # Read the diagnostics log.
-    f = open(path)
-    try:
-        data = f.read()
-    finally:
-        f.close()
-
-    # Complete the plist (the log itself is just the chunks).
-    data = """\
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" \
-                       "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<array>
-%s
-</array>
-</plist>""" % data
-
-    # Load the diagnostics.
-    diags = plistlib.readPlistFromString(data)
-
-    # Print out the diagnostics.
-    print
-    print "**** BUILD DIAGNOSTICS ****"
-    for i, file_diags in enumerate(diags):
-        file = file_diags.get('main-file')
-        print "*** %s ***" % file
-        for d in file_diags.get('diagnostics', ()):
-            print "%s:%s:%s: %s: %s" % (
-                d.get('filename'), d.get('line'), d.get('column'),
-                d.get('level'), d.get('message'))
-
-if __name__ == "__main__":
-    main()
diff --git a/utils/unittest/LLVMBuild.txt b/utils/unittest/LLVMBuild.txt
new file mode 100644
index 000000000000..2810567f4ae5
--- /dev/null
+++ b/utils/unittest/LLVMBuild.txt
@@ -0,0 +1,28 @@
+;===- ./utils/unittest/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = gtest
+parent = Libraries
+required_libraries = Support
+
+[component_1]
+type = Library
+name = gtest_main
+parent = Libraries
+required_libraries = gtest
diff --git a/utils/unittest/UnitTestMain/Makefile b/utils/unittest/UnitTestMain/Makefile
index 30827795aff9..7bcb72495049 100644
--- a/utils/unittest/UnitTestMain/Makefile
+++ b/utils/unittest/UnitTestMain/Makefile
@@ -11,7 +11,7 @@ LEVEL = ../../..
 
 include $(LEVEL)/Makefile.config
 
-LIBRARYNAME = UnitTestMain
+LIBRARYNAME = gtest_main
 BUILD_ARCHIVE = 1
 REQUIRES_RTTI = 1
 
diff --git a/utils/unittest/googletest/Makefile b/utils/unittest/googletest/Makefile
index 21b29ffc2c3a..22c8f36fccb6 100644
--- a/utils/unittest/googletest/Makefile
+++ b/utils/unittest/googletest/Makefile
@@ -11,7 +11,7 @@ LEVEL := ../../..
 
 include $(LEVEL)/Makefile.config
 
-LIBRARYNAME = GoogleTest
+LIBRARYNAME = gtest
 BUILD_ARCHIVE = 1
 REQUIRES_RTTI = 1
 
diff --git a/utils/unittest/googletest/gtest-death-test.cc b/utils/unittest/googletest/gtest-death-test.cc
index 65893851e2d3..bf7e32c23835 100644
--- a/utils/unittest/googletest/gtest-death-test.cc
+++ b/utils/unittest/googletest/gtest-death-test.cc
@@ -527,7 +527,6 @@ bool DeathTestImpl::Passed(bool status_ok) {
       }
       break;
     case IN_PROGRESS:
-    default:
       GTEST_LOG_(FATAL)
           << "DeathTest::Passed somehow called before conclusion of test";
   }
diff --git a/utils/unittest/googletest/gtest.cc b/utils/unittest/googletest/gtest.cc
index 76244974115e..3fdff0a9a355 100644
--- a/utils/unittest/googletest/gtest.cc
+++ b/utils/unittest/googletest/gtest.cc
@@ -2480,9 +2480,10 @@ static const char * TestPartResultTypeToString(TestPartResult::Type type) {
 #else
       return "Failure\n";
 #endif
-    default:
-      return "Unknown result type";
   }
+
+  // All cases return, so this is unreachable but GCC doesn't know it
+  abort();
 }
 
 // Prints a TestPartResult to a String.
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h b/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h
index 1d9f83b652b5..7bac2bd872bb 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h
@@ -207,8 +207,6 @@ GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
           gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
           break; \
         } \
-        default: \
-          break; \
       } \
     } \
   } else \
diff --git a/utils/vim/tablegen.vim b/utils/vim/tablegen.vim
index fed619a07add..40d8d78bba12 100644
--- a/utils/vim/tablegen.vim
+++ b/utils/vim/tablegen.vim
@@ -1,7 +1,7 @@
 " Vim syntax file
 " Language:   TableGen
 " Maintainer: The LLVM team, http://llvm.org/
-" Version:    $Revision: 141378 $
+" Version:    $Revision: 151164 $
 
 if version < 600
   syntax clear
@@ -14,7 +14,7 @@ syntax sync minlines=100
 
 syn case match
 
-syn keyword tgKeyword   def let in code dag field include defm
+syn keyword tgKeyword   def let in code dag field include defm foreach
 syn keyword tgType      class int string list bit bits multiclass
 
 syn match   tgNumber    /\<\d\+\>/
diff --git a/utils/webNLT.pl b/utils/webNLT.pl
deleted file mode 100755
index fb29fd292e2d..000000000000
--- a/utils/webNLT.pl
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/usr/bin/perl
-
-use DBI;
-use CGI;
-
-$q = new CGI;
-print $q->header();
-print $q->start_html(-title=>"Nightly Tester DB");
-
-unless($q->param('pwd'))
-  {
-    print $q->startform();
-    print $q->password_field(-name=>"pwd", -size=>20, -maxlength=>20);
-    print $q->submit();
-    print $q->endform();
-  }
-else
-  {
-    # database information
-    $db="llvmalpha";
-    $host="localhost";
-    $userid="llvmdbuser";
-    $passwd=$q->param('pwd');
-    $connectionInfo="dbi:mysql:$db;$host";
-    
-    # make connection to database
-    $dbh = DBI->connect($connectionInfo,$userid,$passwd) or die DBI->errstr;
-    $query = "Select DISTINCT(NAME) from Tests";
-    my $sth = $dbh->prepare($query) || die "Can't prepare statement: $DBI::errstr";
-    my $rc = $sth->execute or die DBI->errstr;
-    while (($n) = $sth->fetchrow_array)
-      {
-        push @names, ($n);
-#        print "$n<P>";
-      }
-    $query = "Select DISTINCT(TEST) from Tests";
-    my $sth = $dbh->prepare($query) || die "Can't prepare statement: $DBI::errstr";
-    my $rc = $sth->execute or die DBI->errstr;
-    while (($n) = $sth->fetchrow_array)
-      {
-        push @tests, ($n);
-#        print "$n\n";
-      }
-
-#    print join "<BR>", @names;
-
-    print $q->startform();
-    print $q->scrolling_list(-name=>"test", -values=>\@tests, -multiple=>'true');
-    print "<P>";
-    print $q->scrolling_list(-name=>"name", -values=>\@names, -multiple=>'true');
-    print "<P>";
-    print $q->submit();
-    print $q->hidden("pwd", $q->param('pwd'));
-    print $q->endform();
-
-    # disconnect from database
-    $dbh->disconnect;
-
-    #now generate the urls to the chart
-    if ($q->param('test') && $q->param('name'))
-      {
-        my @names = $q->param('name');
-        my @tests = $q->param('test');
-        print "<P>";
-        print join "<BR>", @names;
-        print "<P>";
-        print join "<BR>", @tests;
-        print "<P>";
-        $str = "pwd=" . $q->param('pwd');
-        $count = 0;
-        foreach $n (@names)
-          {
-            foreach $t (@tests)
-              {
-                $str = "$str&t$count=$t&n$count=$n";
-                $count++;
-              }
-          }
-        print "<img src=\"cgiplotNLT.pl?$str\">";
-      }
-  }
-
-print $q->end_html();
diff --git a/utils/yaml-bench/CMakeLists.txt b/utils/yaml-bench/CMakeLists.txt
new file mode 100644
index 000000000000..403182ceee2a
--- /dev/null
+++ b/utils/yaml-bench/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_utility(yaml-bench
+  YAMLBench.cpp
+  )
+
+target_link_libraries(yaml-bench LLVMSupport)
diff --git a/utils/yaml-bench/Makefile b/utils/yaml-bench/Makefile
new file mode 100644
index 000000000000..07e91226c7a9
--- /dev/null
+++ b/utils/yaml-bench/Makefile
@@ -0,0 +1,20 @@
+##===- utils/yaml-bench/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = yaml-bench
+USEDLIBS = LLVMSupport.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/utils/yaml-bench/YAMLBench.cpp b/utils/yaml-bench/YAMLBench.cpp
new file mode 100644
index 000000000000..e5ee52a16d96
--- /dev/null
+++ b/utils/yaml-bench/YAMLBench.cpp
@@ -0,0 +1,203 @@
+//===- YAMLBench - Benchmark the YAMLParser implementation ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program executes the YAMLParser on differntly sized YAML texts and
+// outputs the run time.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/YAMLParser.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+  DumpTokens( "tokens"
+            , cl::desc("Print the tokenization of the file.")
+            , cl::init(false)
+            );
+
+static cl::opt<bool>
+  DumpCanonical( "canonical"
+               , cl::desc("Print the canonical YAML for this file.")
+               , cl::init(false)
+               );
+
+static cl::opt<std::string>
+ Input(cl::Positional, cl::desc("<input>"));
+
+static cl::opt<bool>
+  Verify( "verify"
+        , cl::desc(
+            "Run a quick verification useful for regression testing")
+        , cl::init(false)
+        );
+
+static cl::opt<unsigned>
+  MemoryLimitMB("memory-limit", cl::desc(
+                  "Do not use more megabytes of memory"),
+                cl::init(1000));
+
+struct indent {
+  unsigned distance;
+  indent(unsigned d) : distance(d) {}
+};
+
+static raw_ostream &operator <<(raw_ostream &os, const indent &in) {
+  for (unsigned i = 0; i < in.distance; ++i)
+    os << "  ";
+  return os;
+}
+
+static void dumpNode( yaml::Node *n
+                    , unsigned Indent = 0
+                    , bool SuppressFirstIndent = false) {
+  if (!n)
+    return;
+  if (!SuppressFirstIndent)
+    outs() << indent(Indent);
+  StringRef Anchor = n->getAnchor();
+  if (!Anchor.empty())
+    outs() << "&" << Anchor << " ";
+  if (yaml::ScalarNode *sn = dyn_cast<yaml::ScalarNode>(n)) {
+    SmallString<32> Storage;
+    StringRef Val = sn->getValue(Storage);
+    outs() << "!!str \"" << yaml::escape(Val) << "\"";
+  } else if (yaml::SequenceNode *sn = dyn_cast<yaml::SequenceNode>(n)) {
+    outs() << "!!seq [\n";
+    ++Indent;
+    for (yaml::SequenceNode::iterator i = sn->begin(), e = sn->end();
+                                      i != e; ++i) {
+      dumpNode(i, Indent);
+      outs() << ",\n";
+    }
+    --Indent;
+    outs() << indent(Indent) << "]";
+  } else if (yaml::MappingNode *mn = dyn_cast<yaml::MappingNode>(n)) {
+    outs() << "!!map {\n";
+    ++Indent;
+    for (yaml::MappingNode::iterator i = mn->begin(), e = mn->end();
+                                     i != e; ++i) {
+      outs() << indent(Indent) << "? ";
+      dumpNode(i->getKey(), Indent, true);
+      outs() << "\n";
+      outs() << indent(Indent) << ": ";
+      dumpNode(i->getValue(), Indent, true);
+      outs() << ",\n";
+    }
+    --Indent;
+    outs() << indent(Indent) << "}";
+  } else if (yaml::AliasNode *an = dyn_cast<yaml::AliasNode>(n)){
+    outs() << "*" << an->getName();
+  } else if (dyn_cast<yaml::NullNode>(n)) {
+    outs() << "!!null null";
+  }
+}
+
+static void dumpStream(yaml::Stream &stream) {
+  for (yaml::document_iterator di = stream.begin(), de = stream.end(); di != de;
+       ++di) {
+    outs() << "%YAML 1.2\n"
+           << "---\n";
+    yaml::Node *n = di->getRoot();
+    if (n)
+      dumpNode(n);
+    else
+      break;
+    outs() << "\n...\n";
+  }
+}
+
+static void benchmark( llvm::TimerGroup &Group
+                     , llvm::StringRef Name
+                     , llvm::StringRef JSONText) {
+  llvm::Timer BaseLine((Name + ": Loop").str(), Group);
+  BaseLine.startTimer();
+  char C = 0;
+  for (llvm::StringRef::iterator I = JSONText.begin(),
+                                 E = JSONText.end();
+       I != E; ++I) { C += *I; }
+  BaseLine.stopTimer();
+  volatile char DontOptimizeOut = C; (void)DontOptimizeOut;
+
+  llvm::Timer Tokenizing((Name + ": Tokenizing").str(), Group);
+  Tokenizing.startTimer();
+  {
+    yaml::scanTokens(JSONText);
+  }
+  Tokenizing.stopTimer();
+
+  llvm::Timer Parsing((Name + ": Parsing").str(), Group);
+  Parsing.startTimer();
+  {
+    llvm::SourceMgr SM;
+    llvm::yaml::Stream stream(JSONText, SM);
+    stream.skip();
+  }
+  Parsing.stopTimer();
+}
+
+static std::string createJSONText(size_t MemoryMB, unsigned ValueSize) {
+  std::string JSONText;
+  llvm::raw_string_ostream Stream(JSONText);
+  Stream << "[\n";
+  size_t MemoryBytes = MemoryMB * 1024 * 1024;
+  while (JSONText.size() < MemoryBytes) {
+    Stream << " {\n"
+           << "  \"key1\": \"" << std::string(ValueSize, '*') << "\",\n"
+           << "  \"key2\": \"" << std::string(ValueSize, '*') << "\",\n"
+           << "  \"key3\": \"" << std::string(ValueSize, '*') << "\"\n"
+           << " }";
+    Stream.flush();
+    if (JSONText.size() < MemoryBytes) Stream << ",";
+    Stream << "\n";
+  }
+  Stream << "]\n";
+  Stream.flush();
+  return JSONText;
+}
+
+int main(int argc, char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv);
+  if (Input.getNumOccurrences()) {
+    OwningPtr<MemoryBuffer> Buf;
+    if (MemoryBuffer::getFileOrSTDIN(Input, Buf))
+      return 1;
+
+    llvm::SourceMgr sm;
+    if (DumpTokens) {
+      yaml::dumpTokens(Buf->getBuffer(), outs());
+    }
+
+    if (DumpCanonical) {
+      yaml::Stream stream(Buf->getBuffer(), sm);
+      dumpStream(stream);
+    }
+  }
+
+  if (Verify) {
+    llvm::TimerGroup Group("YAML parser benchmark");
+    benchmark(Group, "Fast", createJSONText(10, 500));
+  } else if (!DumpCanonical && !DumpTokens) {
+    llvm::TimerGroup Group("YAML parser benchmark");
+    benchmark(Group, "Small Values", createJSONText(MemoryLimitMB, 5));
+    benchmark(Group, "Medium Values", createJSONText(MemoryLimitMB, 500));
+    benchmark(Group, "Large Values", createJSONText(MemoryLimitMB, 50000));
+  }
+
+  return 0;
+}